diff --git a/.cirrus.yml b/.cirrus.yml
index f53c519447d..02c43a074a1 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,61 +1,6 @@
 env:
   CIRRUS_CLONE_DEPTH: 1
 
-freebsd_12_task:
-  freebsd_instance:
-    image_family: freebsd-12-2
-    cpu: 8
-    memory: 8G
-  install_script:
-    - ASSUME_ALWAYS_YES=yes pkg bootstrap -f ;
-    - pkg install -y bash curl cyrus-sasl git glib gmake gnutls gsed
-          nettle perl5 pixman pkgconf png usbredir ninja
-  script:
-    - mkdir build
-    - cd build
-    # TODO: Enable gnutls again once FreeBSD's libtasn1 got fixed
-    # See: https://gitlab.com/gnutls/libtasn1/-/merge_requests/71
-    - ../configure --enable-werror --disable-gnutls
-      || { cat config.log meson-logs/meson-log.txt; exit 1; }
-    - gmake -j$(sysctl -n hw.ncpu)
-    - gmake -j$(sysctl -n hw.ncpu) check V=1
-
-macos_task:
-  osx_instance:
-    image: catalina-base
-  install_script:
-    - brew install pkg-config python gnu-sed glib pixman make sdl2 bash ninja
-  script:
-    - mkdir build
-    - cd build
-    - ../configure --python=/usr/local/bin/python3 --enable-werror
-                   --extra-cflags='-Wno-error=deprecated-declarations'
-                   || { cat config.log meson-logs/meson-log.txt; exit 1; }
-    - gmake -j$(sysctl -n hw.ncpu)
-    - gmake check-unit V=1
-    - gmake check-block V=1
-    - gmake check-qapi-schema V=1
-    - gmake check-softfloat V=1
-    - gmake check-qtest-x86_64 V=1
-
-macos_xcode_task:
-  osx_instance:
-    # this is an alias for the latest Xcode
-    image: catalina-xcode
-  install_script:
-    - brew install pkg-config gnu-sed glib pixman make sdl2 bash ninja
-  script:
-    - mkdir build
-    - cd build
-    - ../configure --extra-cflags='-Wno-error=deprecated-declarations' --enable-modules
-                   --enable-werror --cc=clang || { cat config.log meson-logs/meson-log.txt; exit 1; }
-    - gmake -j$(sysctl -n hw.ncpu)
-    - gmake check-unit V=1
-    - gmake check-block V=1
-    - gmake check-qapi-schema V=1
-    - gmake check-softfloat V=1
-    - gmake check-qtest-x86_64 V=1
-
 windows_msys2_task:
   timeout_in: 90m
   windows_container:
@@ -67,7 +12,7 @@ windows_msys2_task:
     CIRRUS_SHELL: powershell
     MSYS: winsymlinks:nativestrict
     MSYSTEM: MINGW64
-    MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2021-01-05/msys2-base-x86_64-20210105.sfx.exe
+    MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2021-04-19/msys2-base-x86_64-20210419.sfx.exe
     MSYS2_FINGERPRINT: 0
     MSYS2_PACKAGES: "
       diffutils git grep make pkg-config sed
@@ -130,7 +75,7 @@ windows_msys2_task:
         taskkill /F /FI "MODULES eq msys-2.0.dll"
         tasklist
         C:\tools\msys64\usr\bin\bash.exe -lc "mv -f /etc/pacman.conf.pacnew /etc/pacman.conf || true"
-        C:\tools\msys64\usr\bin\bash.exe -lc "pacman --noconfirm -Suu --overwrite=*"
+        C:\tools\msys64\usr\bin\bash.exe -lc "pacman --noconfirm -Syuu --overwrite=*"
         Write-Output "Core install time taken: $((Get-Date).Subtract($start_time))"
         $start_time = Get-Date
 
diff --git a/.github/lockdown.yml b/.github/lockdown.yml
deleted file mode 100644
index 07fc2f31eef..00000000000
--- a/.github/lockdown.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown
-
-# Close issues and pull requests
-close: true
-
-# Lock issues and pull requests
-lock: true
-
-issues:
-  comment: |
-    Thank you for your interest in the QEMU project.
-
-    This repository is a read-only mirror of the project's repostories hosted
-    at https://gitlab.com/qemu-project/qemu.git.
-    The project does not process issues filed on GitHub.
-
-    The project issues are tracked on Launchpad:
-    https://bugs.launchpad.net/qemu
-
-    QEMU welcomes bug report contributions. You can file new ones on:
-    https://bugs.launchpad.net/qemu/+filebug
-
-pulls:
-  comment: |
-    Thank you for your interest in the QEMU project.
-
-    This repository is a read-only mirror of the project's repostories hosted
-    on https://gitlab.com/qemu-project/qemu.git.
-    The project does not process merge requests filed on GitHub.
-
-    QEMU welcomes contributions of code (either fixing bugs or adding new
-    functionality). However, we get a lot of patches, and so we have some
-    guidelines about contributing on the project website:
-    https://www.qemu.org/contribute/
diff --git a/.github/workflows/lockdown.yml b/.github/workflows/lockdown.yml
new file mode 100644
index 00000000000..ad8b8f7e30f
--- /dev/null
+++ b/.github/workflows/lockdown.yml
@@ -0,0 +1,30 @@
+# Configuration for Repo Lockdown - https://github.com/dessant/repo-lockdown
+
+name: 'Repo Lockdown'
+
+on:
+  pull_request_target:
+    types: opened
+
+permissions:
+  pull-requests: write
+
+jobs:
+  action:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: dessant/repo-lockdown@v2
+        with:
+          pull-comment: |
+            Thank you for your interest in the QEMU project.
+
+            This repository is a read-only mirror of the project's repostories hosted
+            on https://gitlab.com/qemu-project/qemu.git.
+            The project does not process merge requests filed on GitHub.
+
+            QEMU welcomes contributions of code (either fixing bugs or adding new
+            functionality). However, we get a lot of patches, and so we have some
+            guidelines about contributing on the project website:
+            https://www.qemu.org/contribute/
+          lock-pull: true
+          close-pull: true
diff --git a/.gitignore b/.gitignore
index 75a4be07240..eb2553026c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,5 @@ GTAGS
 *~
 *.ast_raw
 *.depend_raw
+*.swp
+*.patch
diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml
new file mode 100644
index 00000000000..2c7980a4f6a
--- /dev/null
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -0,0 +1,81 @@
+.native_build_job_template:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
+  before_script:
+    - JOBS=$(expr $(nproc) + 1)
+  script:
+    - if test -n "$LD_JOBS";
+      then
+        scripts/git-submodule.sh update meson ;
+      fi
+    - mkdir build
+    - cd build
+    - if test -n "$TARGETS";
+      then
+        ../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS --target-list="$TARGETS" ;
+      else
+        ../configure --enable-werror --disable-docs ${LD_JOBS:+--meson=git} $CONFIGURE_ARGS ;
+      fi || { cat config.log meson-logs/meson-log.txt && exit 1; }
+    - if test -n "$LD_JOBS";
+      then
+        ../meson/meson.py configure . -Dbackend_max_links="$LD_JOBS" ;
+      fi || exit 1;
+    - make -j"$JOBS"
+    - if test -n "$MAKE_CHECK_ARGS";
+      then
+        make -j"$JOBS" $MAKE_CHECK_ARGS ;
+      fi
+
+.native_test_job_template:
+  stage: test
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
+  script:
+    - scripts/git-submodule.sh update
+        $(sed -n '/GIT_SUBMODULES=/ s/.*=// p' build/config-host.mak)
+    - cd build
+    - find . -type f -exec touch {} +
+    # Avoid recompiling by hiding ninja with NINJA=":"
+    - make NINJA=":" $MAKE_CHECK_ARGS
+
+.avocado_test_job_template:
+  extends: .native_test_job_template
+  cache:
+    key: "${CI_JOB_NAME}-cache"
+    paths:
+      - ${CI_PROJECT_DIR}/avocado-cache
+    policy: pull-push
+  artifacts:
+    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+    when: on_failure
+    expire_in: 7 days
+    paths:
+      - build/tests/results/latest/results.xml
+      - build/tests/results/latest/test-results
+    reports:
+      junit: build/tests/results/latest/results.xml
+  before_script:
+    - mkdir -p ~/.config/avocado
+    - echo "[datadir.paths]" > ~/.config/avocado/avocado.conf
+    - echo "cache_dirs = ['${CI_PROJECT_DIR}/avocado-cache']"
+           >> ~/.config/avocado/avocado.conf
+    - echo -e '[job.output.testlogs]\nstatuses = ["FAIL", "INTERRUPT"]'
+           >> ~/.config/avocado/avocado.conf
+    - if [ -d ${CI_PROJECT_DIR}/avocado-cache ]; then
+        du -chs ${CI_PROJECT_DIR}/avocado-cache ;
+      fi
+    - export AVOCADO_ALLOW_UNTRUSTED_CODE=1
+  after_script:
+    - cd build
+    - du -chs ${CI_PROJECT_DIR}/avocado-cache
+  rules:
+    # Only run these jobs if running on the mainstream namespace,
+    # or if the user set the QEMU_CI_AVOCADO_TESTING variable (either
+    # in its namespace setting or via git-push option, see documentation
+    # in /.gitlab-ci.yml of this repository).
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project"'
+      when: on_success
+    - if: '$QEMU_CI_AVOCADO_TESTING'
+      when: on_success
+    # Otherwise, set to manual (the jobs are created but not run).
+    - when: manual
+      allow_failure: true
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
new file mode 100644
index 00000000000..71d0f407add
--- /dev/null
+++ b/.gitlab-ci.d/buildtest.yml
@@ -0,0 +1,649 @@
+include:
+  - local: '/.gitlab-ci.d/buildtest-template.yml'
+
+build-system-alpine:
+  extends: .native_build_job_template
+  needs:
+    - job: amd64-alpine-container
+  variables:
+    IMAGE: alpine
+    TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu
+      microblazeel-softmmu mips64el-softmmu
+    MAKE_CHECK_ARGS: check-build
+    CONFIGURE_ARGS: --enable-docs --enable-trace-backends=log,simple,syslog
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - .git-submodule-status
+      - build
+
+check-system-alpine:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-alpine
+      artifacts: true
+  variables:
+    IMAGE: alpine
+    MAKE_CHECK_ARGS: check
+
+avocado-system-alpine:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-alpine
+      artifacts: true
+  variables:
+    IMAGE: alpine
+    MAKE_CHECK_ARGS: check-avocado
+
+build-system-ubuntu:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-ubuntu2004-container
+  variables:
+    IMAGE: ubuntu2004
+    CONFIGURE_ARGS: --enable-docs --enable-fdt=system --enable-slirp=system
+    TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu
+      microblazeel-softmmu mips64el-softmmu
+    MAKE_CHECK_ARGS: check-build
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-system-ubuntu:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-ubuntu
+      artifacts: true
+  variables:
+    IMAGE: ubuntu2004
+    MAKE_CHECK_ARGS: check
+
+avocado-system-ubuntu:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-ubuntu
+      artifacts: true
+  variables:
+    IMAGE: ubuntu2004
+    MAKE_CHECK_ARGS: check-avocado
+
+build-system-debian:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-container
+  variables:
+    IMAGE: debian-amd64
+    TARGETS: arm-softmmu avr-softmmu i386-softmmu mipsel-softmmu
+      riscv64-softmmu sh4eb-softmmu sparc-softmmu xtensaeb-softmmu
+    MAKE_CHECK_ARGS: check-build
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-system-debian:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-debian
+      artifacts: true
+  variables:
+    IMAGE: debian-amd64
+    MAKE_CHECK_ARGS: check
+
+avocado-system-debian:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-debian
+      artifacts: true
+  variables:
+    IMAGE: debian-amd64
+    MAKE_CHECK_ARGS: check-avocado
+
+build-system-fedora:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-fedora-container
+  variables:
+    IMAGE: fedora
+    CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs
+             --enable-fdt=system --enable-slirp=system --enable-capstone=system
+    TARGETS: tricore-softmmu microblaze-softmmu mips-softmmu
+      xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu
+    MAKE_CHECK_ARGS: check-build
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-system-fedora:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-fedora
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check
+
+avocado-system-fedora:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-fedora
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check-avocado
+
+build-system-centos:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-centos8-container
+  variables:
+    IMAGE: centos8
+    CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system
+                    --enable-modules --enable-trace-backends=dtrace
+    TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu
+      x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
+    MAKE_CHECK_ARGS: check-build
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-system-centos:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-centos
+      artifacts: true
+  variables:
+    IMAGE: centos8
+    MAKE_CHECK_ARGS: check
+
+avocado-system-centos:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-centos
+      artifacts: true
+  variables:
+    IMAGE: centos8
+    MAKE_CHECK_ARGS: check-avocado
+
+build-system-opensuse:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-opensuse-leap-container
+  variables:
+    IMAGE: opensuse-leap
+    CONFIGURE_ARGS: --enable-fdt=system
+    TARGETS: s390x-softmmu x86_64-softmmu aarch64-softmmu
+    MAKE_CHECK_ARGS: check-build
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-system-opensuse:
+  extends: .native_test_job_template
+  needs:
+    - job: build-system-opensuse
+      artifacts: true
+  variables:
+    IMAGE: opensuse-leap
+    MAKE_CHECK_ARGS: check
+
+avocado-system-opensuse:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-system-opensuse
+      artifacts: true
+  variables:
+    IMAGE: opensuse-leap
+    MAKE_CHECK_ARGS: check-avocado
+
+
+# This jobs explicitly disable TCG (--disable-tcg), KVM is detected by
+# the configure script. The container doesn't contain Xen headers so
+# Xen accelerator is not detected / selected. As result it build the
+# i386-softmmu and x86_64-softmmu with KVM being the single accelerator
+# available.
+# Also use a different coroutine implementation (which is only really of
+# interest to KVM users, i.e. with TCG disabled)
+build-tcg-disabled:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-centos8-container
+  variables:
+    IMAGE: centos8
+  script:
+    - mkdir build
+    - cd build
+    - ../configure --disable-tcg --audio-drv-list="" --with-coroutine=ucontext
+      || { cat config.log meson-logs/meson-log.txt && exit 1; }
+    - make -j"$JOBS"
+    - make check-unit
+    - make check-qapi-schema
+    - cd tests/qemu-iotests/
+    - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048
+            052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163
+            170 171 183 184 192 194 208 221 226 227 236 253 277 image-fleecing
+    - ./check -qcow2 028 051 056 057 058 065 068 082 085 091 095 096 102 122
+            124 132 139 142 144 145 151 152 155 157 165 194 196 200 202
+            208 209 216 218 227 234 246 247 248 250 254 255 257 258
+            260 261 262 263 264 270 272 273 277 279 image-fleecing
+
+build-user:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+    CONFIGURE_ARGS: --disable-tools --disable-system
+    MAKE_CHECK_ARGS: check-tcg
+
+build-user-static:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+    CONFIGURE_ARGS: --disable-tools --disable-system --static
+    MAKE_CHECK_ARGS: check-tcg
+
+# Because the hexagon cross-compiler takes so long to build we don't rely
+# on the CI system to build it and hence this job has an optional dependency
+# declared. The image is manually uploaded.
+build-user-hexagon:
+  extends: .native_build_job_template
+  needs:
+    job: hexagon-cross-container
+    optional: true
+  variables:
+    IMAGE: debian-hexagon-cross
+    TARGETS: hexagon-linux-user
+    CONFIGURE_ARGS: --disable-tools --disable-docs --enable-debug-tcg
+    MAKE_CHECK_ARGS: check-tcg
+
+# Only build the softmmu targets we have check-tcg tests for
+build-some-softmmu:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+    CONFIGURE_ARGS: --disable-tools --enable-debug
+    TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu
+    MAKE_CHECK_ARGS: check-tcg
+
+# We build tricore in a very minimal tricore only container
+build-tricore-softmmu:
+  extends: .native_build_job_template
+  needs:
+    job: tricore-debian-cross-container
+  variables:
+    IMAGE: debian-tricore-cross
+    CONFIGURE_ARGS: --disable-tools --disable-fdt --enable-debug
+    TARGETS: tricore-softmmu
+    MAKE_CHECK_ARGS: check-tcg
+
+clang-system:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-fedora-container
+  variables:
+    IMAGE: fedora
+    CONFIGURE_ARGS: --cc=clang --cxx=clang++
+      --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined
+    TARGETS: alpha-softmmu arm-softmmu m68k-softmmu mips64-softmmu
+      ppc-softmmu s390x-softmmu
+    MAKE_CHECK_ARGS: check-qtest check-tcg
+
+clang-user:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --disable-system
+      --target-list-exclude=microblazeel-linux-user,aarch64_be-linux-user,i386-linux-user,m68k-linux-user,mipsn32el-linux-user,xtensaeb-linux-user
+      --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined
+    MAKE_CHECK_ARGS: check-unit check-tcg
+
+# Set LD_JOBS=1 because this requires LTO and ld consumes a large amount of memory.
+# On gitlab runners, default value sometimes end up calling 2 lds concurrently and
+# triggers an Out-Of-Memory error
+#
+# Since slirp callbacks are used in QEMU Timers, slirp needs to be compiled together
+# with QEMU and linked as a static library to avoid false positives in CFI checks.
+# This can be accomplished by using -enable-slirp=git, which avoids the use of
+# a system-wide version of the library
+#
+# Split in three sets of build/check/avocado to limit the execution time of each
+# job
+build-cfi-aarch64:
+  extends: .native_build_job_template
+  needs:
+  - job: amd64-fedora-container
+  variables:
+    LD_JOBS: 1
+    AR: llvm-ar
+    IMAGE: fedora
+    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
+      --enable-safe-stack --enable-slirp=git
+    TARGETS: aarch64-softmmu
+    MAKE_CHECK_ARGS: check-build
+  timeout: 70m
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+  rules:
+    # FIXME: This job is often failing, likely due to out-of-memory problems in
+    # the constrained containers of the shared runners. Thus this is marked as
+    # manual until the situation has been solved.
+    - when: manual
+      allow_failure: true
+
+check-cfi-aarch64:
+  extends: .native_test_job_template
+  needs:
+    - job: build-cfi-aarch64
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check
+
+avocado-cfi-aarch64:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-cfi-aarch64
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check-avocado
+
+build-cfi-ppc64-s390x:
+  extends: .native_build_job_template
+  needs:
+  - job: amd64-fedora-container
+  variables:
+    LD_JOBS: 1
+    AR: llvm-ar
+    IMAGE: fedora
+    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
+      --enable-safe-stack --enable-slirp=git
+    TARGETS: ppc64-softmmu s390x-softmmu
+    MAKE_CHECK_ARGS: check-build
+  timeout: 70m
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+  rules:
+    # FIXME: This job is often failing, likely due to out-of-memory problems in
+    # the constrained containers of the shared runners. Thus this is marked as
+    # manual until the situation has been solved.
+    - when: manual
+      allow_failure: true
+
+check-cfi-ppc64-s390x:
+  extends: .native_test_job_template
+  needs:
+    - job: build-cfi-ppc64-s390x
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check
+
+avocado-cfi-ppc64-s390x:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-cfi-ppc64-s390x
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check-avocado
+
+build-cfi-x86_64:
+  extends: .native_build_job_template
+  needs:
+  - job: amd64-fedora-container
+  variables:
+    LD_JOBS: 1
+    AR: llvm-ar
+    IMAGE: fedora
+    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
+      --enable-safe-stack --enable-slirp=git
+    TARGETS: x86_64-softmmu
+    MAKE_CHECK_ARGS: check-build
+  timeout: 70m
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+check-cfi-x86_64:
+  extends: .native_test_job_template
+  needs:
+    - job: build-cfi-x86_64
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check
+
+avocado-cfi-x86_64:
+  extends: .avocado_test_job_template
+  needs:
+    - job: build-cfi-x86_64
+      artifacts: true
+  variables:
+    IMAGE: fedora
+    MAKE_CHECK_ARGS: check-avocado
+
+tsan-build:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-ubuntu2004-container
+  variables:
+    IMAGE: ubuntu2004
+    CONFIGURE_ARGS: --enable-tsan --cc=clang-10 --cxx=clang++-10
+          --enable-trace-backends=ust --enable-fdt=system --enable-slirp=system
+    TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user
+    MAKE_CHECK_ARGS: bench V=1
+
+# These targets are on the way out
+build-deprecated:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+    CONFIGURE_ARGS: --disable-tools
+    MAKE_CHECK_ARGS: build-tcg
+    TARGETS: ppc64abi32-linux-user
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+# We split the check-tcg step as test failures are expected but we still
+# want to catch the build breaking.
+check-deprecated:
+  extends: .native_test_job_template
+  needs:
+    - job: build-deprecated
+      artifacts: true
+  variables:
+    IMAGE: debian-all-test-cross
+    MAKE_CHECK_ARGS: check-tcg
+  allow_failure: true
+
+# gprof/gcov are GCC features
+build-gprof-gcov:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-ubuntu2004-container
+  variables:
+    IMAGE: ubuntu2004
+    CONFIGURE_ARGS: --enable-gprof --enable-gcov
+    TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu
+  artifacts:
+    expire_in: 1 days
+    paths:
+      - build
+
+check-gprof-gcov:
+  extends: .native_test_job_template
+  needs:
+    - job: build-gprof-gcov
+      artifacts: true
+  variables:
+    IMAGE: ubuntu2004
+    MAKE_CHECK_ARGS: check
+  after_script:
+    - ${CI_PROJECT_DIR}/scripts/ci/coverage-summary.sh
+
+build-oss-fuzz:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-fedora-container
+  variables:
+    IMAGE: fedora
+  script:
+    - mkdir build-oss-fuzz
+    - CC="clang" CXX="clang++" CFLAGS="-fsanitize=address"
+      ./scripts/oss-fuzz/build.sh
+    - export ASAN_OPTIONS="fast_unwind_on_malloc=0"
+    - for fuzzer in $(find ./build-oss-fuzz/DEST_DIR/ -executable -type f
+                      | grep -v slirp); do
+        grep "LLVMFuzzerTestOneInput" ${fuzzer} > /dev/null 2>&1 || continue ;
+        echo Testing ${fuzzer} ... ;
+        "${fuzzer}" -runs=1 -seed=1 || exit 1 ;
+      done
+    # Unrelated to fuzzer: run some tests with -fsanitize=address
+    - cd build-oss-fuzz && make check-qtest-i386 check-unit
+
+build-tci:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-user-cross-container
+  variables:
+    IMAGE: debian-all-test-cross
+  script:
+    - TARGETS="aarch64 alpha arm hppa m68k microblaze ppc64 s390x x86_64"
+    - mkdir build
+    - cd build
+    - ../configure --enable-tcg-interpreter
+        --target-list="$(for tg in $TARGETS; do echo -n ${tg}'-softmmu '; done)" || { cat config.log meson-logs/meson-log.txt && exit 1; }
+    - make -j"$JOBS"
+    - make tests/qtest/boot-serial-test tests/qtest/cdrom-test tests/qtest/pxe-test
+    - for tg in $TARGETS ; do
+        export QTEST_QEMU_BINARY="./qemu-system-${tg}" ;
+        ./tests/qtest/boot-serial-test || exit 1 ;
+        ./tests/qtest/cdrom-test || exit 1 ;
+      done
+    - QTEST_QEMU_BINARY="./qemu-system-x86_64" ./tests/qtest/pxe-test
+    - QTEST_QEMU_BINARY="./qemu-system-s390x" ./tests/qtest/pxe-test -m slow
+    - make check-tcg
+
+# Alternate coroutines implementations are only really of interest to KVM users
+# However we can't test against KVM on Gitlab-CI so we can only run unit tests
+build-coroutine-sigaltstack:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-ubuntu2004-container
+  variables:
+    IMAGE: ubuntu2004
+    CONFIGURE_ARGS: --with-coroutine=sigaltstack --disable-tcg
+                    --enable-trace-backends=ftrace
+    MAKE_CHECK_ARGS: check-unit
+
+# Check our reduced build configurations
+build-without-default-devices:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-centos8-container
+  variables:
+    IMAGE: centos8
+    CONFIGURE_ARGS: --without-default-devices --disable-user
+
+build-without-default-features:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-fedora-container
+  variables:
+    IMAGE: fedora
+    CONFIGURE_ARGS:
+      --without-default-features
+      --disable-capstone
+      --disable-pie
+      --disable-qom-cast-debug
+      --disable-slirp
+      --disable-strip
+    TARGETS: avr-softmmu i386-softmmu mips64-softmmu s390x-softmmu sh4-softmmu
+      sparc64-softmmu hexagon-linux-user i386-linux-user s390x-linux-user
+    MAKE_CHECK_ARGS: check-unit check-qtest SPEED=slow
+
+build-libvhost-user:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/fedora:latest
+  needs:
+    job: amd64-fedora-container
+  script:
+    - mkdir subprojects/libvhost-user/build
+    - cd subprojects/libvhost-user/build
+    - meson
+    - ninja
+
+# No targets are built here, just tools, docs, and unit tests. This
+# also feeds into the eventual documentation deployment steps later
+build-tools-and-docs-debian:
+  extends: .native_build_job_template
+  needs:
+    job: amd64-debian-container
+  variables:
+    IMAGE: debian-amd64
+    MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope
+    CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools
+  artifacts:
+    expire_in: 2 days
+    paths:
+      - build
+
+# Prepare for GitLab pages deployment. Anything copied into the
+# "public" directory will be deployed to $USER.gitlab.io/$PROJECT
+#
+# GitLab publishes from any branch that triggers a CI pipeline
+#
+# For the main repo we don't want to publish from 'staging'
+# since that content may not be pushed, nor do we wish to
+# publish from 'stable-NNN' branches as that content is outdated.
+# Thus we restrict to just the default branch
+#
+# For contributor forks we want to publish from any repo so
+# that users can see the results of their commits, regardless
+# of what topic branch they're currently using
+pages:
+  image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest
+  stage: test
+  needs:
+    - job: build-tools-and-docs-debian
+  script:
+    - mkdir -p public
+    # HTML-ised source tree
+    - make gtags
+    - htags -anT --tree-view=filetree -m qemu_init
+        -t "Welcome to the QEMU sourcecode"
+    - mv HTML public/src
+    # Project documentation
+    - make -C build install DESTDIR=$(pwd)/temp-install
+    - mv temp-install/usr/local/share/doc/qemu/* public/
+  artifacts:
+    paths:
+      - public
+  rules:
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
+      when: on_success
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project"'
+      when: never
+    - if: '$CI_PROJECT_NAMESPACE != "qemu-project"'
+      when: on_success
diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml
new file mode 100644
index 00000000000..d273a9e7130
--- /dev/null
+++ b/.gitlab-ci.d/cirrus.yml
@@ -0,0 +1,91 @@
+# Jobs that we delegate to Cirrus CI because they require an operating
+# system other than Linux. These jobs will only run if the required
+# setup has been performed on the GitLab account.
+#
+# The Cirrus CI configuration is generated by replacing target-specific
+# variables in a generic template: some of these variables are provided
+# when the GitLab CI job is defined, others are taken from a shell
+# snippet generated using lcitool.
+#
+# Note that the $PATH environment variable has to be treated with
+# special care, because we can't just override it at the GitLab CI job
+# definition level or we risk breaking it completely.
+.cirrus_build_job:
+  stage: build
+  image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master
+  needs: []
+  timeout: 80m
+  allow_failure: true
+  script:
+    - source .gitlab-ci.d/cirrus/$NAME.vars
+    - sed -e "s|[@]CI_REPOSITORY_URL@|$CI_REPOSITORY_URL|g"
+          -e "s|[@]CI_COMMIT_REF_NAME@|$CI_COMMIT_REF_NAME|g"
+          -e "s|[@]CI_COMMIT_SHA@|$CI_COMMIT_SHA|g"
+          -e "s|[@]CIRRUS_VM_INSTANCE_TYPE@|$CIRRUS_VM_INSTANCE_TYPE|g"
+          -e "s|[@]CIRRUS_VM_IMAGE_SELECTOR@|$CIRRUS_VM_IMAGE_SELECTOR|g"
+          -e "s|[@]CIRRUS_VM_IMAGE_NAME@|$CIRRUS_VM_IMAGE_NAME|g"
+          -e "s|[@]CIRRUS_VM_CPUS@|$CIRRUS_VM_CPUS|g"
+          -e "s|[@]CIRRUS_VM_RAM@|$CIRRUS_VM_RAM|g"
+          -e "s|[@]UPDATE_COMMAND@|$UPDATE_COMMAND|g"
+          -e "s|[@]INSTALL_COMMAND@|$INSTALL_COMMAND|g"
+          -e "s|[@]PATH@|$PATH_EXTRA${PATH_EXTRA:+:}\$PATH|g"
+          -e "s|[@]PKG_CONFIG_PATH@|$PKG_CONFIG_PATH|g"
+          -e "s|[@]PKGS@|$PKGS|g"
+          -e "s|[@]MAKE@|$MAKE|g"
+          -e "s|[@]PYTHON@|$PYTHON|g"
+          -e "s|[@]PIP3@|$PIP3|g"
+          -e "s|[@]PYPI_PKGS@|$PYPI_PKGS|g"
+          -e "s|[@]CONFIGURE_ARGS@|$CONFIGURE_ARGS|g"
+          -e "s|[@]TEST_TARGETS@|$TEST_TARGETS|g"
+      <.gitlab-ci.d/cirrus/build.yml >.gitlab-ci.d/cirrus/$NAME.yml
+    - cat .gitlab-ci.d/cirrus/$NAME.yml
+    - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml
+  rules:
+    # Allow on 'staging' branch and 'stable-X.Y-staging' branches only
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/'
+      when: never
+    - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN"
+
+x64-freebsd-12-build:
+  extends: .cirrus_build_job
+  variables:
+    NAME: freebsd-12
+    CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
+    CIRRUS_VM_IMAGE_SELECTOR: image_family
+    CIRRUS_VM_IMAGE_NAME: freebsd-12-2
+    CIRRUS_VM_CPUS: 8
+    CIRRUS_VM_RAM: 8G
+    UPDATE_COMMAND: pkg update
+    INSTALL_COMMAND: pkg install -y
+    # TODO: Enable gnutls again once FreeBSD's libtasn1 got fixed
+    # See: https://gitlab.com/gnutls/libtasn1/-/merge_requests/71
+    CONFIGURE_ARGS: --disable-gnutls
+    TEST_TARGETS: check
+
+x64-freebsd-13-build:
+  extends: .cirrus_build_job
+  variables:
+    NAME: freebsd-13
+    CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
+    CIRRUS_VM_IMAGE_SELECTOR: image_family
+    CIRRUS_VM_IMAGE_NAME: freebsd-13-0
+    CIRRUS_VM_CPUS: 8
+    CIRRUS_VM_RAM: 8G
+    UPDATE_COMMAND: pkg update
+    INSTALL_COMMAND: pkg install -y
+    TEST_TARGETS: check
+
+x64-macos-11-base-build:
+  extends: .cirrus_build_job
+  variables:
+    NAME: macos-11
+    CIRRUS_VM_INSTANCE_TYPE: osx_instance
+    CIRRUS_VM_IMAGE_SELECTOR: image
+    CIRRUS_VM_IMAGE_NAME: big-sur-base
+    CIRRUS_VM_CPUS: 12
+    CIRRUS_VM_RAM: 24G
+    UPDATE_COMMAND: brew update
+    INSTALL_COMMAND: brew install
+    PATH_EXTRA: /usr/local/opt/ccache/libexec:/usr/local/opt/gettext/bin
+    PKG_CONFIG_PATH: /usr/local/opt/curl/lib/pkgconfig:/usr/local/opt/ncurses/lib/pkgconfig:/usr/local/opt/readline/lib/pkgconfig
+    TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat check-qtest-x86_64
diff --git a/.gitlab-ci.d/cirrus/README.rst b/.gitlab-ci.d/cirrus/README.rst
new file mode 100644
index 00000000000..657b0706d78
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/README.rst
@@ -0,0 +1,54 @@
+Cirrus CI integration
+=====================
+
+GitLab CI shared runners only provide a docker environment running on Linux.
+While it is possible to provide private runners for non-Linux platforms this
+is not something most contributors/maintainers will wish to do.
+
+To work around this limitation, we take advantage of `Cirrus CI`_'s free
+offering: more specifically, we use the `cirrus-run`_ script to trigger Cirrus
+CI jobs from GitLab CI jobs so that Cirrus CI job output is integrated into
+the main GitLab CI pipeline dashboard.
+
+There is, however, some one-time setup required. If you want FreeBSD and macOS
+builds to happen when you push to your GitLab repository, you need to
+
+* set up a GitHub repository for the project, eg. ``yourusername/qemu``.
+  This repository needs to exist for cirrus-run to work, but it doesn't need to
+  be kept up to date, so you can create it and then forget about it;
+
+* enable the `Cirrus CI GitHub app`_  for your GitHub account;
+
+* sign up for Cirrus CI. It's enough to log into the website using your GitHub
+  account;
+
+* grab an API token from the `Cirrus CI settings`_ page;
+
+* it may be necessary to push an empty ``.cirrus.yml`` file to your github fork
+  for Cirrus CI to properly recognize the project. You can check whether
+  Cirrus CI knows about your project by navigating to:
+
+  ``https://cirrus-ci.com/yourusername/qemu``
+
+* in the *CI/CD / Variables* section of the settings page for your GitLab
+  repository, create two new variables:
+
+  * ``CIRRUS_GITHUB_REPO``, containing the name of the GitHub repository
+    created earlier, eg. ``yourusername/qemu``;
+
+  * ``CIRRUS_API_TOKEN``, containing the Cirrus CI API token generated earlier.
+    This variable **must** be marked as *Masked*, because anyone with knowledge
+    of it can impersonate you as far as Cirrus CI is concerned.
+
+  Neither of these variables should be marked as *Protected*, because in
+  general you'll want to be able to trigger Cirrus CI builds from non-protected
+  branches.
+
+Once this one-time setup is complete, you can just keep pushing to your GitLab
+repository as usual and you'll automatically get the additional CI coverage.
+
+
+.. _Cirrus CI GitHub app: https://github.com/marketplace/cirrus-ci
+.. _Cirrus CI settings: https://cirrus-ci.com/settings/profile/
+.. _Cirrus CI: https://cirrus-ci.com/
+.. _cirrus-run: https://github.com/sio/cirrus-run/
diff --git a/.gitlab-ci.d/cirrus/build.yml b/.gitlab-ci.d/cirrus/build.yml
new file mode 100644
index 00000000000..c555f5d36e6
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/build.yml
@@ -0,0 +1,36 @@
+@CIRRUS_VM_INSTANCE_TYPE@:
+  @CIRRUS_VM_IMAGE_SELECTOR@: @CIRRUS_VM_IMAGE_NAME@
+  cpu: @CIRRUS_VM_CPUS@
+  memory: @CIRRUS_VM_RAM@
+
+env:
+  CIRRUS_CLONE_DEPTH: 1
+  CI_REPOSITORY_URL: "@CI_REPOSITORY_URL@"
+  CI_COMMIT_REF_NAME: "@CI_COMMIT_REF_NAME@"
+  CI_COMMIT_SHA: "@CI_COMMIT_SHA@"
+  PATH: "@PATH@"
+  PKG_CONFIG_PATH: "@PKG_CONFIG_PATH@"
+  PYTHON: "@PYTHON@"
+  MAKE: "@MAKE@"
+  CONFIGURE_ARGS: "@CONFIGURE_ARGS@"
+  TEST_TARGETS: "@TEST_TARGETS@"
+
+build_task:
+  install_script:
+    - @UPDATE_COMMAND@
+    - @INSTALL_COMMAND@ @PKGS@
+    - if test -n "@PYPI_PKGS@" ; then @PIP3@ install @PYPI_PKGS@ ; fi
+  clone_script:
+    - git clone --depth 100 "$CI_REPOSITORY_URL" .
+    - git fetch origin "$CI_COMMIT_REF_NAME"
+    - git reset --hard "$CI_COMMIT_SHA"
+  build_script:
+    - mkdir build
+    - cd build
+    - ../configure --enable-werror $CONFIGURE_ARGS
+      || { cat config.log meson-logs/meson-log.txt; exit 1; }
+    - $MAKE -j$(sysctl -n hw.ncpu)
+    - for TARGET in $TEST_TARGETS ;
+      do
+        $MAKE -j$(sysctl -n hw.ncpu) $TARGET V=1 ;
+      done
diff --git a/.gitlab-ci.d/cirrus/freebsd-12.vars b/.gitlab-ci.d/cirrus/freebsd-12.vars
new file mode 100644
index 00000000000..2099b213547
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/freebsd-12.vars
@@ -0,0 +1,13 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables freebsd-12 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1
+
+PACKAGING_COMMAND='pkg'
+CCACHE='/usr/local/bin/ccache'
+MAKE='/usr/local/bin/gmake'
+NINJA='/usr/local/bin/ninja'
+PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip-3.8'
+PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
diff --git a/.gitlab-ci.d/cirrus/freebsd-13.vars b/.gitlab-ci.d/cirrus/freebsd-13.vars
new file mode 100644
index 00000000000..323fe806d5e
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/freebsd-13.vars
@@ -0,0 +1,13 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables freebsd-13 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1
+
+PACKAGING_COMMAND='pkg'
+CCACHE='/usr/local/bin/ccache'
+MAKE='/usr/local/bin/gmake'
+NINJA='/usr/local/bin/ninja'
+PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip-3.8'
+PKGS='alsa-lib bash bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage ctags curl cyrus-sasl dbus diffutils gettext git glib gmake gnutls gsed gtk3 libepoxy libffi libgcrypt libjpeg-turbo libnfs libspice-server libssh libtasn1 libxml2 llvm lttng-ust lzo2 meson ncurses nettle ninja opencv p5-Test-Harness perl5 pixman pkgconf png py38-numpy py38-pillow py38-pip py38-sphinx py38-sphinx_rtd_theme py38-virtualenv py38-yaml python3 rpm2cpio sdl2 sdl2_image snappy spice-protocol tesseract texinfo usbredir virglrenderer vte3 zstd'
diff --git a/.gitlab-ci.d/cirrus/macos-11.vars b/.gitlab-ci.d/cirrus/macos-11.vars
new file mode 100644
index 00000000000..cbec8a44a35
--- /dev/null
+++ b/.gitlab-ci.d/cirrus/macos-11.vars
@@ -0,0 +1,15 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables macos-11 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci/-/commit/c7e275ab27ac0dcd09da290817b9adeea1fd1eb1
+
+PACKAGING_COMMAND='brew'
+CCACHE='/usr/local/bin/ccache'
+MAKE='/usr/local/bin/gmake'
+NINJA='/usr/local/bin/ninja'
+PYTHON='/usr/local/bin/python3'
+PIP3='/usr/local/bin/pip3'
+PKGS='bash bc bzip2 capstone ccache cpanminus ctags curl dbus diffutils gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb libxml2 llvm lzo make meson ncurses nettle ninja perl pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy sparse spice-protocol tesseract texinfo usbredir vde vte3 zlib zstd'
+PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme virtualenv'
+CPAN_PKGS='Test::Harness'
diff --git a/.gitlab-ci.d/container-core.yml b/.gitlab-ci.d/container-core.yml
new file mode 100644
index 00000000000..e8dd1f476a2
--- /dev/null
+++ b/.gitlab-ci.d/container-core.yml
@@ -0,0 +1,17 @@
+include:
+  - local: '/.gitlab-ci.d/container-template.yml'
+
+amd64-centos8-container:
+  extends: .container_job_template
+  variables:
+    NAME: centos8
+
+amd64-fedora-container:
+  extends: .container_job_template
+  variables:
+    NAME: fedora
+
+amd64-debian10-container:
+  extends: .container_job_template
+  variables:
+    NAME: debian10
diff --git a/.gitlab-ci.d/container-cross.yml b/.gitlab-ci.d/container-cross.yml
new file mode 100644
index 00000000000..a3b5b905520
--- /dev/null
+++ b/.gitlab-ci.d/container-cross.yml
@@ -0,0 +1,193 @@
+alpha-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-alpha-cross
+
+amd64-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-amd64-cross
+
+amd64-debian-user-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-all-test-cross
+
+arm64-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-arm64-cross
+
+arm64-test-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian11-container']
+  variables:
+    NAME: debian-arm64-test-cross
+
+armel-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-armel-cross
+
+armhf-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-armhf-cross
+
+# We never want to build hexagon in the CI system and by default we
+# always want to refer to the master registry where it lives.
+hexagon-cross-container:
+  image: docker:stable
+  stage: containers
+  rules:
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project"'
+      when: never
+    - when: always
+  variables:
+    NAME: debian-hexagon-cross
+    GIT_DEPTH: 1
+  services:
+    - docker:dind
+  before_script:
+    - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest"
+    - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest"
+    - docker info
+    - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD"
+  script:
+    - echo "TAG:$TAG"
+    - echo "COMMON_TAG:$COMMON_TAG"
+    - docker pull $COMMON_TAG
+    - docker tag $COMMON_TAG $TAG
+    - docker push "$TAG"
+  after_script:
+    - docker logout
+
+hppa-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-hppa-cross
+
+m68k-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-m68k-cross
+
+mips64-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-mips64-cross
+
+mips64el-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-mips64el-cross
+
+mips-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-mips-cross
+
+mipsel-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-mipsel-cross
+
+powerpc-test-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian11-container']
+  variables:
+    NAME: debian-powerpc-test-cross
+
+ppc64el-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-ppc64el-cross
+
+riscv64-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  # as we are currently based on 'sid/unstable' we may break so...
+  allow_failure: true
+  variables:
+    NAME: debian-riscv64-cross
+
+s390x-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-s390x-cross
+
+sh4-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-sh4-cross
+
+sparc64-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-sparc64-cross
+
+tricore-debian-cross-container:
+  extends: .container_job_template
+  stage: containers-layer2
+  needs: ['amd64-debian10-container']
+  variables:
+    NAME: debian-tricore-cross
+
+xtensa-debian-cross-container:
+  extends: .container_job_template
+  variables:
+    NAME: debian-xtensa-cross
+
+cris-fedora-cross-container:
+  extends: .container_job_template
+  variables:
+    NAME: fedora-cris-cross
+
+i386-fedora-cross-container:
+  extends: .container_job_template
+  variables:
+    NAME: fedora-i386-cross
+
+win32-fedora-cross-container:
+  extends: .container_job_template
+  variables:
+    NAME: fedora-win32-cross
+
+win64-fedora-cross-container:
+  extends: .container_job_template
+  variables:
+    NAME: fedora-win64-cross
diff --git a/.gitlab-ci.d/container-template.yml b/.gitlab-ci.d/container-template.yml
new file mode 100644
index 00000000000..1baecd94606
--- /dev/null
+++ b/.gitlab-ci.d/container-template.yml
@@ -0,0 +1,21 @@
+.container_job_template:
+  image: docker:stable
+  stage: containers
+  services:
+    - docker:dind
+  before_script:
+    - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest"
+    - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/$NAME:latest"
+    - apk add python3
+    - docker info
+    - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD"
+  script:
+    - echo "TAG:$TAG"
+    - echo "COMMON_TAG:$COMMON_TAG"
+    - ./tests/docker/docker.py --engine docker build
+          -t "qemu/$NAME" -f "tests/docker/dockerfiles/$NAME.docker"
+          -r $CI_REGISTRY/qemu-project/qemu
+    - docker tag "qemu/$NAME" "$TAG"
+    - docker push "$TAG"
+  after_script:
+    - docker logout
diff --git a/.gitlab-ci.d/containers.yml b/.gitlab-ci.d/containers.yml
index 33e4046e233..cd06d3f5f49 100644
--- a/.gitlab-ci.d/containers.yml
+++ b/.gitlab-ci.d/containers.yml
@@ -1,251 +1,45 @@
-.container_job_template: &container_job_definition
-  image: docker:stable
-  stage: containers
-  services:
-    - docker:dind
-  before_script:
-    - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest"
-    - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/$NAME:latest"
-    - apk add python3
-    - docker info
-    - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD"
-  script:
-    - echo "TAG:$TAG"
-    - echo "COMMON_TAG:$COMMON_TAG"
-    - docker pull "$TAG" || docker pull "$COMMON_TAG" || true
-    - ./tests/docker/docker.py --engine docker build
-          -t "qemu/$NAME" -f "tests/docker/dockerfiles/$NAME.docker"
-          -r $CI_REGISTRY_IMAGE
-    - docker tag "qemu/$NAME" "$TAG"
-    - docker push "$TAG"
-  after_script:
-    - docker logout
+include:
+  - local: '/.gitlab-ci.d/container-core.yml'
+  - local: '/.gitlab-ci.d/container-cross.yml'
 
 amd64-alpine-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: alpine
 
-amd64-centos7-container:
-  <<: *container_job_definition
-  variables:
-    NAME: centos7
-
-amd64-centos8-container:
-  <<: *container_job_definition
-  variables:
-    NAME: centos8
-
-amd64-debian10-container:
-  <<: *container_job_definition
-  variables:
-    NAME: debian10
-
 amd64-debian11-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: debian11
 
-alpha-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-alpha-cross
-
-amd64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-amd64-cross
-
-amd64-debian-user-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-all-test-cross
-
 amd64-debian-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   stage: containers-layer2
   needs: ['amd64-debian10-container']
   variables:
     NAME: debian-amd64
 
-arm64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-arm64-cross
-
-arm64-test-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian11-container']
-  variables:
-    NAME: debian-arm64-test-cross
-
-armel-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-armel-cross
-
-armhf-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-armhf-cross
-
-hppa-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-hppa-cross
-
-m68k-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-m68k-cross
-
-mips64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-mips64-cross
-
-mips64el-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-mips64el-cross
-
-mips-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-mips-cross
-
-mipsel-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-mipsel-cross
-
-powerpc-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-powerpc-cross
-
-ppc64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-ppc64-cross
-
-ppc64el-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-ppc64el-cross
-
-riscv64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-riscv64-cross
-
-s390x-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-s390x-cross
-
-sh4-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-sh4-cross
-
-sparc64-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-sparc64-cross
-
-tricore-debian-cross-container:
-  <<: *container_job_definition
-  stage: containers-layer2
-  needs: ['amd64-debian10-container']
-  variables:
-    NAME: debian-tricore-cross
-
-xtensa-debian-cross-container:
-  <<: *container_job_definition
-  variables:
-    NAME: debian-xtensa-cross
-
-cris-fedora-cross-container:
-  <<: *container_job_definition
-  variables:
-    NAME: fedora-cris-cross
-
-amd64-fedora-container:
-  <<: *container_job_definition
-  variables:
-    NAME: fedora
-
-i386-fedora-cross-container:
-  <<: *container_job_definition
-  variables:
-    NAME: fedora-i386-cross
-
-win32-fedora-cross-container:
-  <<: *container_job_definition
-  variables:
-    NAME: fedora-win32-cross
-
-win64-fedora-cross-container:
-  <<: *container_job_definition
-  variables:
-    NAME: fedora-win64-cross
-
 amd64-ubuntu1804-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: ubuntu1804
 
 amd64-ubuntu2004-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: ubuntu2004
 
 amd64-ubuntu-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: ubuntu
 
 amd64-opensuse-leap-container:
-  <<: *container_job_definition
+  extends: .container_job_template
   variables:
     NAME: opensuse-leap
+
+python-container:
+  extends: .container_job_template
+  variables:
+    NAME: python
diff --git a/.gitlab-ci.d/crossbuild-template.yml b/.gitlab-ci.d/crossbuild-template.yml
new file mode 100644
index 00000000000..10d22dcf6c1
--- /dev/null
+++ b/.gitlab-ci.d/crossbuild-template.yml
@@ -0,0 +1,47 @@
+.cross_system_build_job:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
+  timeout: 80m
+  script:
+    - mkdir build
+    - cd build
+    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
+      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
+        --disable-user --target-list-exclude="arm-softmmu cris-softmmu
+          i386-softmmu microblaze-softmmu mips-softmmu mipsel-softmmu
+          mips64-softmmu ppc-softmmu riscv32-softmmu sh4-softmmu
+          sparc-softmmu xtensa-softmmu $CROSS_SKIP_TARGETS"
+    - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS
+    - if grep -q "EXESUF=.exe" config-host.mak;
+      then make installer;
+      version="$(git describe --match v[0-9]*)";
+      mv -v qemu-setup*.exe qemu-setup-${version}.exe;
+      fi
+
+# Job to cross-build specific accelerators.
+#
+# Set the $ACCEL variable to select the specific accelerator (default to
+# KVM), and set extra options (such disabling other accelerators) via the
+# $EXTRA_CONFIGURE_OPTS variable.
+.cross_accel_build_job:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
+  timeout: 30m
+  script:
+    - mkdir build
+    - cd build
+    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
+      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
+        --disable-tools --enable-${ACCEL:-kvm} $EXTRA_CONFIGURE_OPTS
+    - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS
+
+.cross_user_build_job:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
+  script:
+    - mkdir build
+    - cd build
+    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
+      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
+        --disable-system
+    - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS
diff --git a/.gitlab-ci.d/crossbuilds.yml b/.gitlab-ci.d/crossbuilds.yml
index 2d95784ed51..17d6cb3e458 100644
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -1,44 +1,5 @@
-.cross_system_build_job:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
-  timeout: 80m
-  script:
-    - mkdir build
-    - cd build
-    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
-      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
-        --disable-user --target-list-exclude="arm-softmmu cris-softmmu
-          i386-softmmu microblaze-softmmu mips-softmmu mipsel-softmmu
-          mips64-softmmu ppc-softmmu sh4-softmmu xtensa-softmmu"
-    - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS
-
-# Job to cross-build specific accelerators.
-#
-# Set the $ACCEL variable to select the specific accelerator (default to
-# KVM), and set extra options (such disabling other accelerators) via the
-# $ACCEL_CONFIGURE_OPTS variable.
-.cross_accel_build_job:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
-  timeout: 30m
-  script:
-    - mkdir build
-    - cd build
-    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
-      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
-        --disable-tools --enable-${ACCEL:-kvm} $ACCEL_CONFIGURE_OPTS
-    - make -j$(expr $(nproc) + 1) all check-build
-
-.cross_user_build_job:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
-  script:
-    - mkdir build
-    - cd build
-    - PKG_CONFIG_PATH=$PKG_CONFIG_PATH
-      ../configure --enable-werror --disable-docs $QEMU_CONFIGURE_OPTS
-        --disable-system
-    - make -j$(expr $(nproc) + 1) all check-build $MAKE_CHECK_ARGS
+include:
+  - local: '/.gitlab-ci.d/crossbuild-template.yml'
 
 cross-armel-system:
   extends: .cross_system_build_job
@@ -98,6 +59,15 @@ cross-i386-user:
     IMAGE: fedora-i386-cross
     MAKE_CHECK_ARGS: check
 
+cross-i386-tci:
+  extends: .cross_accel_build_job
+  timeout: 60m
+  variables:
+    IMAGE: fedora-i386-cross
+    ACCEL: tcg-interpreter
+    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user
+    MAKE_CHECK_ARGS: check check-tcg
+
 cross-mips-system:
   extends: .cross_system_build_job
   needs:
@@ -154,6 +124,25 @@ cross-ppc64el-user:
   variables:
     IMAGE: debian-ppc64el-cross
 
+# The riscv64 cross-builds currently use a 'sid' container to get
+# compilers and libraries. Until something more stable is found we
+# allow_failure so as not to block CI.
+cross-riscv64-system:
+  extends: .cross_system_build_job
+  allow_failure: true
+  needs:
+    job: riscv64-debian-cross-container
+  variables:
+    IMAGE: debian-riscv64-cross
+
+cross-riscv64-user:
+  extends: .cross_user_build_job
+  allow_failure: true
+  needs:
+    job: riscv64-debian-cross-container
+  variables:
+    IMAGE: debian-riscv64-cross
+
 cross-s390x-system:
   extends: .cross_system_build_job
   needs:
@@ -174,7 +163,15 @@ cross-s390x-kvm-only:
     job: s390x-debian-cross-container
   variables:
     IMAGE: debian-s390x-cross
-    ACCEL_CONFIGURE_OPTS: --disable-tcg
+    EXTRA_CONFIGURE_OPTS: --disable-tcg
+
+cross-mips64el-kvm-only:
+  extends: .cross_accel_build_job
+  needs:
+    job: mips64el-debian-cross-container
+  variables:
+    IMAGE: debian-mips64el-cross
+    EXTRA_CONFIGURE_OPTS: --disable-tcg --target-list=mips64el-softmmu
 
 cross-win32-system:
   extends: .cross_system_build_job
@@ -182,6 +179,11 @@ cross-win32-system:
     job: win32-fedora-cross-container
   variables:
     IMAGE: fedora-win32-cross
+    CROSS_SKIP_TARGETS: alpha-softmmu avr-softmmu hppa-softmmu m68k-softmmu
+                        microblazeel-softmmu mips64el-softmmu nios2-softmmu
+  artifacts:
+    paths:
+      - build/qemu-setup*.exe
 
 cross-win64-system:
   extends: .cross_system_build_job
@@ -189,6 +191,11 @@ cross-win64-system:
     job: win64-fedora-cross-container
   variables:
     IMAGE: fedora-win64-cross
+    CROSS_SKIP_TARGETS: or1k-softmmu rx-softmmu sh4eb-softmmu sparc64-softmmu
+                        tricore-softmmu xtensaeb-softmmu
+  artifacts:
+    paths:
+      - build/qemu-setup*.exe
 
 cross-amd64-xen-only:
   extends: .cross_accel_build_job
@@ -197,7 +204,7 @@ cross-amd64-xen-only:
   variables:
     IMAGE: debian-amd64-cross
     ACCEL: xen
-    ACCEL_CONFIGURE_OPTS: --disable-tcg --disable-kvm
+    EXTRA_CONFIGURE_OPTS: --disable-tcg --disable-kvm
 
 cross-arm64-xen-only:
   extends: .cross_accel_build_job
@@ -206,4 +213,4 @@ cross-arm64-xen-only:
   variables:
     IMAGE: debian-arm64-cross
     ACCEL: xen
-    ACCEL_CONFIGURE_OPTS: --disable-tcg --disable-kvm
+    EXTRA_CONFIGURE_OPTS: --disable-tcg --disable-kvm
diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml
new file mode 100644
index 00000000000..056c374619b
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -0,0 +1,19 @@
+# The CI jobs defined here require GitLab runners installed and
+# registered on machines that match their operating system names,
+# versions and architectures.  This is in contrast to the other CI
+# jobs that are intended to run on GitLab's "shared" runners.
+
+# Different than the default approach on "shared" runners, based on
+# containers, the custom runners have no such *requirement*, as those
+# jobs should be capable of running on operating systems with no
+# compatible container implementation, or no support from
+# gitlab-runner.  To avoid problems that gitlab-runner can cause while
+# reusing the GIT repository, let's enable the clone strategy, which
+# guarantees a fresh repository on each job run.
+variables:
+  GIT_STRATEGY: clone
+
+include:
+  - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml'
+  - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml'
+  - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml'
diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
new file mode 100644
index 00000000000..49aa703f55c
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
@@ -0,0 +1,28 @@
+centos-stream-8-x86_64:
+ allow_failure: true
+ needs: []
+ stage: build
+ tags:
+ - centos_stream_8
+ - x86_64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
+ artifacts:
+   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+   when: on_failure
+   expire_in: 7 days
+   paths:
+     - build/tests/results/latest/results.xml
+     - build/tests/results/latest/test-results
+   reports:
+     junit: build/tests/results/latest/results.xml
+ before_script:
+ - JOBS=$(expr $(nproc) + 1)
+ script:
+ - mkdir build
+ - cd build
+ - ../scripts/ci/org.centos/stream/8/x86_64/configure
+ - make -j"$JOBS"
+ - make NINJA=":" check
+ - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
new file mode 100644
index 00000000000..f39d874a1e1
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml
@@ -0,0 +1,118 @@
+# All ubuntu-18.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+
+ubuntu-18.04-s390x-all-linux-static:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-18.04-s390x-all:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$S390X_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-alldbg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-clang:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-18.04-s390x-tci:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-18.04-s390x-notcg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_18.04
+ - s390x
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$S390X_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
new file mode 100644
index 00000000000..920e388bd05
--- /dev/null
+++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
@@ -0,0 +1,118 @@
+# All ubuntu-20.04 jobs should run successfully in an environment
+# setup by the scripts/ci/setup/qemu/build-environment.yml task
+# "Install basic packages to build QEMU on Ubuntu 18.04/20.04"
+
+ubuntu-20.04-aarch64-all-linux-static:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+ script:
+ # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763
+ # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+ - make --output-sync -j`nproc` check-tcg V=1
+
+ubuntu-20.04-aarch64-all:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-alldbg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --enable-debug --disable-libssh
+ - make clean
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-clang:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
+
+ubuntu-20.04-aarch64-tci:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --enable-tcg-interpreter
+ - make --output-sync -j`nproc`
+
+ubuntu-20.04-aarch64-notcg:
+ needs: []
+ stage: build
+ tags:
+ - ubuntu_20.04
+ - aarch64
+ rules:
+ - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
+   when: manual
+   allow_failure: true
+ - if: "$AARCH64_RUNNER_AVAILABLE"
+   when: manual
+   allow_failure: true
+ script:
+ - mkdir build
+ - cd build
+ - ../configure --disable-libssh --disable-tcg
+ - make --output-sync -j`nproc`
+ - make --output-sync -j`nproc` check V=1
diff --git a/.gitlab-ci.d/edk2.yml b/.gitlab-ci.d/edk2.yml
index ba7280605c4..13d0f8b019f 100644
--- a/.gitlab-ci.d/edk2.yml
+++ b/.gitlab-ci.d/edk2.yml
@@ -1,10 +1,22 @@
-docker-edk2:
- stage: containers
- rules: # Only run this job when the Dockerfile is modified
+# All jobs needing docker-edk2 must use the same rules it uses.
+.edk2_job_rules:
+ rules: # Only run this job when ...
  - changes:
+   # this file is modified
    - .gitlab-ci.d/edk2.yml
+   # or the Dockerfile is modified
    - .gitlab-ci.d/edk2/Dockerfile
-   when: always
+   # or roms/edk2/ is modified (submodule updated)
+   - roms/edk2/*
+   when: on_success
+ - if: '$CI_COMMIT_REF_NAME =~ /^edk2/' # or the branch/tag starts with 'edk2'
+   when: on_success
+ - if: '$CI_COMMIT_MESSAGE =~ /edk2/i' # or last commit description contains 'EDK2'
+   when: on_success
+
+docker-edk2:
+ extends: .edk2_job_rules
+ stage: containers
  image: docker:19.03.1
  services:
  - docker:19.03.1-dind
@@ -24,16 +36,9 @@ docker-edk2:
  - docker push $IMAGE_TAG
 
 build-edk2:
+ extends: .edk2_job_rules
  stage: build
  needs: ['docker-edk2']
- rules: # Only run this job when ...
- - changes: # ... roms/edk2/ is modified (submodule updated)
-   - roms/edk2/*
-   when: always
- - if: '$CI_COMMIT_REF_NAME =~ /^edk2/' # or the branch/tag starts with 'edk2'
-   when: always
- - if: '$CI_COMMIT_MESSAGE =~ /edk2/i' # or last commit description contains 'EDK2'
-   when: always
  artifacts:
    paths: # 'artifacts.zip' will contains the following files:
    - pc-bios/edk2*bz2
@@ -45,7 +50,11 @@ build-edk2:
    GIT_DEPTH: 3
  script: # Clone the required submodules and build EDK2
  - git submodule update --init roms/edk2
- - git -C roms/edk2 submodule update --init
+ - git -C roms/edk2 submodule update --init --
+     ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3
+     BaseTools/Source/C/BrotliCompress/brotli
+     CryptoPkg/Library/OpensslLib/openssl
+     MdeModulePkg/Library/BrotliCustomDecompressLib/brotli
  - export JOBS=$(($(getconf _NPROCESSORS_ONLN) + 1))
  - echo "=== Using ${JOBS} simultaneous jobs ==="
  - make -j${JOBS} -C roms efi 2>&1 1>edk2-stdout.log | tee -a edk2-stderr.log >&2
diff --git a/.gitlab-ci.d/opensbi.yml b/.gitlab-ci.d/opensbi.yml
index f66cd1d9089..5e0a2477c5d 100644
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@@ -1,10 +1,23 @@
-docker-opensbi:
- stage: containers
- rules: # Only run this job when the Dockerfile is modified
+# All jobs needing docker-opensbi must use the same rules it uses.
+.opensbi_job_rules:
+ rules: # Only run this job when ...
  - changes:
+   # this file is modified
    - .gitlab-ci.d/opensbi.yml
+   # or the Dockerfile is modified
    - .gitlab-ci.d/opensbi/Dockerfile
-   when: always
+   when: on_success
+ - changes: # or roms/opensbi/ is modified (submodule updated)
+   - roms/opensbi/*
+   when: on_success
+ - if: '$CI_COMMIT_REF_NAME =~ /^opensbi/' # or the branch/tag starts with 'opensbi'
+   when: on_success
+ - if: '$CI_COMMIT_MESSAGE =~ /opensbi/i' # or last commit description contains 'OpenSBI'
+   when: on_success
+
+docker-opensbi:
+ extends: .opensbi_job_rules
+ stage: containers
  image: docker:19.03.1
  services:
  - docker:19.03.1-dind
@@ -24,16 +37,9 @@ docker-opensbi:
  - docker push $IMAGE_TAG
 
 build-opensbi:
+ extends: .opensbi_job_rules
  stage: build
  needs: ['docker-opensbi']
- rules: # Only run this job when ...
- - changes: # ... roms/opensbi/ is modified (submodule updated)
-   - roms/opensbi/*
-   when: always
- - if: '$CI_COMMIT_REF_NAME =~ /^opensbi/' # or the branch/tag starts with 'opensbi'
-   when: always
- - if: '$CI_COMMIT_MESSAGE =~ /opensbi/i' # or last commit description contains 'OpenSBI'
-   when: always
  artifacts:
    paths: # 'artifacts.zip' will contains the following files:
    - pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
diff --git a/.gitlab-ci.d/qemu-project.yml b/.gitlab-ci.d/qemu-project.yml
new file mode 100644
index 00000000000..b3d79bc429b
--- /dev/null
+++ b/.gitlab-ci.d/qemu-project.yml
@@ -0,0 +1,13 @@
+# This file contains the set of jobs run by the QEMU project:
+# https://gitlab.com/qemu-project/qemu/-/pipelines
+
+include:
+  - local: '/.gitlab-ci.d/stages.yml'
+  - local: '/.gitlab-ci.d/edk2.yml'
+  - local: '/.gitlab-ci.d/opensbi.yml'
+  - local: '/.gitlab-ci.d/containers.yml'
+  - local: '/.gitlab-ci.d/crossbuilds.yml'
+  - local: '/.gitlab-ci.d/buildtest.yml'
+  - local: '/.gitlab-ci.d/static_checks.yml'
+  - local: '/.gitlab-ci.d/custom-runners.yml'
+  - local: '/.gitlab-ci.d/cirrus.yml'
diff --git a/.gitlab-ci.d/stages.yml b/.gitlab-ci.d/stages.yml
new file mode 100644
index 00000000000..f50826018df
--- /dev/null
+++ b/.gitlab-ci.d/stages.yml
@@ -0,0 +1,8 @@
+# Currently we have two build stages after our containers are built:
+#  - build (for traditional build and test or first stage build)
+#  - test (for test stages, using build artefacts from a build stage)
+stages:
+  - containers
+  - containers-layer2
+  - build
+  - test
diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml
new file mode 100644
index 00000000000..902843f8b3c
--- /dev/null
+++ b/.gitlab-ci.d/static_checks.yml
@@ -0,0 +1,49 @@
+check-patch:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/centos8:latest
+  needs:
+    job: amd64-centos8-container
+  script:
+    - .gitlab-ci.d/check-patch.py
+  variables:
+    GIT_DEPTH: 1000
+  rules:
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project"'
+      when: never
+    - when: on_success
+      allow_failure: true
+
+check-dco:
+  stage: build
+  image: $CI_REGISTRY_IMAGE/qemu/centos8:latest
+  needs:
+    job: amd64-centos8-container
+  script: .gitlab-ci.d/check-dco.py
+  variables:
+    GIT_DEPTH: 1000
+  rules:
+    - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
+      when: never
+    - when: on_success
+
+check-python-pipenv:
+  stage: test
+  image: $CI_REGISTRY_IMAGE/qemu/python:latest
+  script:
+    - make -C python check-pipenv
+  variables:
+    GIT_DEPTH: 1
+  needs:
+    job: python-container
+
+check-python-tox:
+  stage: test
+  image: $CI_REGISTRY_IMAGE/qemu/python:latest
+  script:
+    - make -C python check-tox
+  variables:
+    GIT_DEPTH: 1
+    QEMU_TOX_EXTRA_ARGS: --skip-missing-interpreters=false
+  needs:
+    job: python-container
+  allow_failure: true
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 52d65d6c04f..9762dda2ee3 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,837 +1,24 @@
-# Currently we have two build stages after our containers are built:
-#  - build (for traditional build and test or first stage build)
-#  - test (for test stages, using build artefacts from a build stage)
-stages:
-  - containers
-  - containers-layer2
-  - build
-  - test
-
-include:
-  - local: '/.gitlab-ci.d/edk2.yml'
-  - local: '/.gitlab-ci.d/opensbi.yml'
-  - local: '/.gitlab-ci.d/containers.yml'
-  - local: '/.gitlab-ci.d/crossbuilds.yml'
-
-.native_build_job_template: &native_build_job_definition
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
-  before_script:
-    - JOBS=$(expr $(nproc) + 1)
-  script:
-    - mkdir build
-    - cd build
-    - if test -n "$TARGETS";
-      then
-        ../configure --enable-werror --disable-docs $CONFIGURE_ARGS --target-list="$TARGETS" ;
-      else
-        ../configure --enable-werror --disable-docs $CONFIGURE_ARGS ;
-      fi || { cat config.log meson-logs/meson-log.txt && exit 1; }
-    - if test -n "$LD_JOBS";
-      then
-        meson configure . -Dbackend_max_links="$LD_JOBS" ;
-      fi || exit 1;
-    - make -j"$JOBS"
-    - if test -n "$MAKE_CHECK_ARGS";
-      then
-        make -j"$JOBS" $MAKE_CHECK_ARGS ;
-      fi
-
-.native_test_job_template: &native_test_job_definition
-  stage: test
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
-  script:
-    - scripts/git-submodule.sh update
-        $(sed -n '/GIT_SUBMODULES=/ s/.*=// p' build/config-host.mak)
-    - cd build
-    - find . -type f -exec touch {} +
-    # Avoid recompiling by hiding ninja with NINJA=":"
-    - make NINJA=":" $MAKE_CHECK_ARGS
-
-.acceptance_template: &acceptance_definition
-  cache:
-    key: "${CI_JOB_NAME}-cache"
-    paths:
-      - ${CI_PROJECT_DIR}/avocado-cache
-    policy: pull-push
-  artifacts:
-    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    when: always
-    expire_in: 2 days
-    paths:
-      - build/tests/results/latest/results.xml
-      - build/tests/results/latest/test-results
-    reports:
-      junit: build/tests/results/latest/results.xml
-  before_script:
-    - mkdir -p ~/.config/avocado
-    - echo "[datadir.paths]" > ~/.config/avocado/avocado.conf
-    - echo "cache_dirs = ['${CI_PROJECT_DIR}/avocado-cache']"
-           >> ~/.config/avocado/avocado.conf
-    - echo -e '[job.output.testlogs]\nstatuses = ["FAIL", "INTERRUPT"]'
-           >> ~/.config/avocado/avocado.conf
-    - if [ -d ${CI_PROJECT_DIR}/avocado-cache ]; then
-        du -chs ${CI_PROJECT_DIR}/avocado-cache ;
-      fi
-    - export AVOCADO_ALLOW_UNTRUSTED_CODE=1
-  after_script:
-    - cd build
-    - du -chs ${CI_PROJECT_DIR}/avocado-cache
-
-build-system-alpine:
-  <<: *native_build_job_definition
-  needs:
-    - job: amd64-alpine-container
-  variables:
-    IMAGE: alpine
-    TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu
-      moxie-softmmu microblazeel-softmmu mips64el-softmmu
-    MAKE_CHECK_ARGS: check-build
-    CONFIGURE_ARGS: --enable-docs --enable-trace-backends=log,simple,syslog
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - .git-submodule-status
-      - build
-
-check-system-alpine:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-alpine
-      artifacts: true
-  variables:
-    IMAGE: alpine
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-alpine:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-alpine
-      artifacts: true
-  variables:
-    IMAGE: alpine
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-system-ubuntu:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-ubuntu2004-container
-  variables:
-    IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --enable-docs --enable-fdt=system --enable-slirp=system
-    TARGETS: aarch64-softmmu alpha-softmmu cris-softmmu hppa-softmmu
-      moxie-softmmu microblazeel-softmmu mips64el-softmmu
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-system-ubuntu:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-ubuntu
-      artifacts: true
-  variables:
-    IMAGE: ubuntu2004
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-ubuntu:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-ubuntu
-      artifacts: true
-  variables:
-    IMAGE: ubuntu2004
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-system-debian:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-container
-  variables:
-    IMAGE: debian-amd64
-    CONFIGURE_ARGS: --enable-fdt=system
-    TARGETS: arm-softmmu avr-softmmu i386-softmmu mipsel-softmmu
-      riscv64-softmmu sh4eb-softmmu sparc-softmmu xtensaeb-softmmu
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-system-debian:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-debian
-      artifacts: true
-  variables:
-    IMAGE: debian-amd64
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-debian:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-debian
-      artifacts: true
-  variables:
-    IMAGE: debian-amd64
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-system-fedora:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-fedora-container
-  variables:
-    IMAGE: fedora
-    CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs
-             --enable-fdt=system --enable-slirp=system --enable-capstone=system
-    TARGETS: tricore-softmmu microblaze-softmmu mips-softmmu
-      xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-system-fedora:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-fedora
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-fedora:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-fedora
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-system-centos:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos8-container
-  variables:
-    IMAGE: centos8
-    CONFIGURE_ARGS: --disable-nettle --enable-gcrypt --enable-fdt=system
-                    --enable-modules --enable-trace-backends=dtrace
-    TARGETS: ppc64-softmmu or1k-softmmu s390x-softmmu
-      x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-system-centos:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-centos
-      artifacts: true
-  variables:
-    IMAGE: centos8
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-centos:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-centos
-      artifacts: true
-  variables:
-    IMAGE: centos8
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-system-opensuse:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-opensuse-leap-container
-  variables:
-    IMAGE: opensuse-leap
-    CONFIGURE_ARGS: --enable-fdt=system
-    TARGETS: s390x-softmmu x86_64-softmmu aarch64-softmmu
-    MAKE_CHECK_ARGS: check-build
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-system-opensuse:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-opensuse
-      artifacts: true
-  variables:
-    IMAGE: opensuse-leap
-    MAKE_CHECK_ARGS: check
-
-acceptance-system-opensuse:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-system-opensuse
-      artifacts: true
-  variables:
-    IMAGE: opensuse-leap
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-
-build-disabled:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-fedora-container
-  variables:
-    IMAGE: fedora
-    CONFIGURE_ARGS:
-      --disable-attr
-      --disable-auth-pam
-      --disable-avx2
-      --disable-bochs
-      --disable-brlapi
-      --disable-bzip2
-      --disable-cap-ng
-      --disable-capstone
-      --disable-cloop
-      --disable-coroutine-pool
-      --disable-curl
-      --disable-curses
-      --disable-dmg
-      --disable-docs
-      --disable-gcrypt
-      --disable-glusterfs
-      --disable-gnutls
-      --disable-gtk
-      --disable-guest-agent
-      --disable-iconv
-      --disable-keyring
-      --disable-kvm
-      --disable-libiscsi
-      --disable-libpmem
-      --disable-libssh
-      --disable-libudev
-      --disable-libusb
-      --disable-libxml2
-      --disable-linux-aio
-      --disable-live-block-migration
-      --disable-lzo
-      --disable-malloc-trim
-      --disable-mpath
-      --disable-nettle
-      --disable-numa
-      --disable-opengl
-      --disable-parallels
-      --disable-pie
-      --disable-qcow1
-      --disable-qed
-      --disable-qom-cast-debug
-      --disable-rbd
-      --disable-rdma
-      --disable-replication
-      --disable-sdl
-      --disable-seccomp
-      --disable-sheepdog
-      --disable-slirp
-      --disable-smartcard
-      --disable-snappy
-      --disable-sparse
-      --disable-spice
-      --disable-strip
-      --disable-tpm
-      --disable-usb-redir
-      --disable-vdi
-      --disable-vhost-crypto
-      --disable-vhost-net
-      --disable-vhost-scsi
-      --disable-vhost-kernel
-      --disable-vhost-user
-      --disable-vhost-vdpa
-      --disable-vhost-vsock
-      --disable-virglrenderer
-      --disable-vnc
-      --disable-vte
-      --disable-vvfat
-      --disable-xen
-      --disable-zstd
-    TARGETS: arm-softmmu i386-softmmu ppc64-softmmu mips64-softmmu
-      s390x-softmmu i386-linux-user
-    MAKE_CHECK_ARGS: check-qtest SPEED=slow
-
-# This jobs explicitly disable TCG (--disable-tcg), KVM is detected by
-# the configure script. The container doesn't contain Xen headers so
-# Xen accelerator is not detected / selected. As result it build the
-# i386-softmmu and x86_64-softmmu with KVM being the single accelerator
-# available.
-# Also use a different coroutine implementation (which is only really of
-# interest to KVM users, i.e. with TCG disabled)
-build-tcg-disabled:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos8-container
-  variables:
-    IMAGE: centos8
-  script:
-    - mkdir build
-    - cd build
-    - ../configure --disable-tcg --audio-drv-list="" --with-coroutine=ucontext
-      || { cat config.log meson-logs/meson-log.txt && exit 1; }
-    - make -j"$JOBS"
-    - make check-unit
-    - make check-qapi-schema
-    - cd tests/qemu-iotests/
-    - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048
-            052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163
-            170 171 183 184 192 194 197 208 215 221 222 226 227 236 253 277
-    - ./check -qcow2 028 051 056 057 058 065 068 082 085 091 095 096 102 122
-            124 132 139 142 144 145 151 152 155 157 165 194 196 197 200 202
-            208 209 215 216 218 222 227 234 246 247 248 250 254 255 257 258
-            260 261 262 263 264 270 272 273 277 279
-
-build-user:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools --disable-system
-    MAKE_CHECK_ARGS: check-tcg
-
-build-user-static:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools --disable-system --static
-    MAKE_CHECK_ARGS: check-tcg
-
-# Only build the softmmu targets we have check-tcg tests for
-build-some-softmmu:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools --enable-debug
-    TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu
-    MAKE_CHECK_ARGS: check-tcg
-
-# Run check-tcg against linux-user (with plugins)
-# we skip sparc64-linux-user until it has been fixed somewhat
-# we skip cris-linux-user as it doesn't use the common run loop
-build-user-plugins:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools --disable-system --enable-plugins --enable-debug-tcg --target-list-exclude=sparc64-linux-user,cris-linux-user
-    MAKE_CHECK_ARGS: check-tcg
-  timeout: 1h 30m
-
-build-user-centos7:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos7-container
-  variables:
-    IMAGE: centos7
-    CONFIGURE_ARGS: --disable-system --disable-tools --disable-docs
-    MAKE_CHECK_ARGS: check-tcg
-
-build-some-softmmu-plugins:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools --disable-user --enable-plugins --enable-debug-tcg
-    TARGETS: xtensa-softmmu arm-softmmu aarch64-softmmu alpha-softmmu
-    MAKE_CHECK_ARGS: check-tcg
-
-clang-system:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-fedora-container
-  variables:
-    IMAGE: fedora
-    CONFIGURE_ARGS: --cc=clang --cxx=clang++
-      --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined
-    TARGETS: alpha-softmmu arm-softmmu m68k-softmmu mips64-softmmu
-      ppc-softmmu s390x-softmmu
-    MAKE_CHECK_ARGS: check-qtest check-tcg
-
-clang-user:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --disable-system
-      --target-list-exclude=microblazeel-linux-user,aarch64_be-linux-user,i386-linux-user,m68k-linux-user,mipsn32el-linux-user,xtensaeb-linux-user
-      --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined
-    MAKE_CHECK_ARGS: check-unit check-tcg
-
-# Set LD_JOBS=1 because this requires LTO and ld consumes a large amount of memory.
-# On gitlab runners, default value sometimes end up calling 2 lds concurrently and
-# triggers an Out-Of-Memory error
 #
-# Since slirp callbacks are used in QEMU Timers, slirp needs to be compiled together
-# with QEMU and linked as a static library to avoid false positives in CFI checks.
-# This can be accomplished by using -enable-slirp=git, which avoids the use of
-# a system-wide version of the library
+# This is the GitLab CI configuration file for the mainstream QEMU
+# project: https://gitlab.com/qemu-project/qemu/-/pipelines
 #
-# Split in three sets of build/check/acceptance to limit the execution time of each
-# job
-build-cfi-aarch64:
-  <<: *native_build_job_definition
-  needs:
-  - job: amd64-fedora-container
-  variables:
-    LD_JOBS: 1
-    AR: llvm-ar
-    IMAGE: fedora
-    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
-      --enable-safe-stack --enable-slirp=git
-    TARGETS: aarch64-softmmu
-    MAKE_CHECK_ARGS: check-build
-  timeout: 70m
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-cfi-aarch64:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-aarch64
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check
-
-acceptance-cfi-aarch64:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-aarch64
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-cfi-ppc64-s390x:
-  <<: *native_build_job_definition
-  needs:
-  - job: amd64-fedora-container
-  variables:
-    LD_JOBS: 1
-    AR: llvm-ar
-    IMAGE: fedora
-    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
-      --enable-safe-stack --enable-slirp=git
-    TARGETS: ppc64-softmmu s390x-softmmu
-    MAKE_CHECK_ARGS: check-build
-  timeout: 70m
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-cfi-ppc64-s390x:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-ppc64-s390x
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check
-
-acceptance-cfi-ppc64-s390x:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-ppc64-s390x
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-build-cfi-x86_64:
-  <<: *native_build_job_definition
-  needs:
-  - job: amd64-fedora-container
-  variables:
-    LD_JOBS: 1
-    AR: llvm-ar
-    IMAGE: fedora
-    CONFIGURE_ARGS: --cc=clang --cxx=clang++ --enable-cfi --enable-cfi-debug
-      --enable-safe-stack --enable-slirp=git
-    TARGETS: x86_64-softmmu
-    MAKE_CHECK_ARGS: check-build
-  timeout: 70m
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-check-cfi-x86_64:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-x86_64
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check
-
-acceptance-cfi-x86_64:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-cfi-x86_64
-      artifacts: true
-  variables:
-    IMAGE: fedora
-    MAKE_CHECK_ARGS: check-acceptance
-  <<: *acceptance_definition
-
-tsan-build:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-ubuntu2004-container
-  variables:
-    IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --enable-tsan --cc=clang-10 --cxx=clang++-10
-          --enable-trace-backends=ust --enable-fdt=system --enable-slirp=system
-    TARGETS: x86_64-softmmu ppc64-softmmu riscv64-softmmu x86_64-linux-user
-    MAKE_CHECK_ARGS: bench V=1
-
-# These targets are on the way out
-build-deprecated:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-    CONFIGURE_ARGS: --disable-tools
-    MAKE_CHECK_ARGS: build-tcg
-    TARGETS: ppc64abi32-linux-user lm32-softmmu unicore32-softmmu
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
-
-# We split the check-tcg step as test failures are expected but we still
-# want to catch the build breaking.
-check-deprecated:
-  <<: *native_test_job_definition
-  needs:
-    - job: build-deprecated
-      artifacts: true
-  variables:
-    IMAGE: debian-all-test-cross
-    MAKE_CHECK_ARGS: check-tcg
-  allow_failure: true
-
-# gprof/gcov are GCC features
-gprof-gcov:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-ubuntu2004-container
-  variables:
-    IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --enable-gprof --enable-gcov
-    MAKE_CHECK_ARGS: check
-    TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu
-  timeout: 70m
-  after_script:
-    - ${CI_PROJECT_DIR}/scripts/ci/coverage-summary.sh
-
-build-oss-fuzz:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-fedora-container
-  variables:
-    IMAGE: fedora
-  script:
-    - mkdir build-oss-fuzz
-    - CC="clang" CXX="clang++" CFLAGS="-fsanitize=address"
-      ./scripts/oss-fuzz/build.sh
-    - export ASAN_OPTIONS="fast_unwind_on_malloc=0"
-    - for fuzzer in $(find ./build-oss-fuzz/DEST_DIR/ -executable -type f
-                      | grep -v slirp); do
-        grep "LLVMFuzzerTestOneInput" ${fuzzer} > /dev/null 2>&1 || continue ;
-        echo Testing ${fuzzer} ... ;
-        "${fuzzer}" -runs=1 -seed=1 || exit 1 ;
-      done
-    # Unrelated to fuzzer: run some tests with -fsanitize=address
-    - cd build-oss-fuzz && make check-qtest-i386 check-unit
-
-build-tci:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-user-cross-container
-  variables:
-    IMAGE: debian-all-test-cross
-  script:
-    - TARGETS="aarch64 alpha arm hppa m68k microblaze moxie ppc64 s390x x86_64"
-    - mkdir build
-    - cd build
-    - ../configure --enable-tcg-interpreter
-        --target-list="$(for tg in $TARGETS; do echo -n ${tg}'-softmmu '; done)" || { cat config.log meson-logs/meson-log.txt && exit 1; }
-    - make -j"$JOBS"
-    - make tests/qtest/boot-serial-test tests/qtest/cdrom-test tests/qtest/pxe-test
-    - for tg in $TARGETS ; do
-        export QTEST_QEMU_BINARY="./qemu-system-${tg}" ;
-        ./tests/qtest/boot-serial-test || exit 1 ;
-        ./tests/qtest/cdrom-test || exit 1 ;
-      done
-    - QTEST_QEMU_BINARY="./qemu-system-x86_64" ./tests/qtest/pxe-test
-    - QTEST_QEMU_BINARY="./qemu-system-s390x" ./tests/qtest/pxe-test -m slow
-    - make check-tcg
-
-# Alternate coroutines implementations are only really of interest to KVM users
-# However we can't test against KVM on Gitlab-CI so we can only run unit tests
-build-coroutine-sigaltstack:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-ubuntu2004-container
-  variables:
-    IMAGE: ubuntu2004
-    CONFIGURE_ARGS: --with-coroutine=sigaltstack --disable-tcg
-                    --enable-trace-backends=ftrace
-    MAKE_CHECK_ARGS: check-unit
-
-# Most jobs test latest gcrypt or nettle builds
+# !!! DO NOT ADD ANY NEW CONFIGURATION TO THIS FILE !!!
+#
+# Only documentation or comments is accepted.
+#
+# To use a different set of jobs than the mainstream QEMU project,
+# you need to set the location of your custom yml file at "custom CI/CD
+# configuration path", on your GitLab CI namespace:
+# https://docs.gitlab.com/ee/ci/pipelines/settings.html#custom-cicd-configuration-path
+#
+# ----------------------------------------------------------------------
+#
+# QEMU CI jobs are based on templates. Some templates provide
+# user-configurable options, modifiable via configuration variables.
+#
+# See https://qemu-project.gitlab.io/qemu/devel/ci.html#custom-ci-cd-variables
+# for more information.
 #
-# These jobs test old gcrypt and nettle from RHEL7
-# which had some API differences.
-crypto-old-nettle:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos7-container
-  variables:
-    IMAGE: centos7
-    TARGETS: x86_64-softmmu x86_64-linux-user
-    CONFIGURE_ARGS: --disable-gcrypt --enable-nettle
-    MAKE_CHECK_ARGS: check
-
-crypto-old-gcrypt:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos7-container
-  variables:
-    IMAGE: centos7
-    TARGETS: x86_64-softmmu x86_64-linux-user
-    CONFIGURE_ARGS: --disable-nettle --enable-gcrypt
-    MAKE_CHECK_ARGS: check
-
-crypto-only-gnutls:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos7-container
-  variables:
-    IMAGE: centos7
-    TARGETS: x86_64-softmmu x86_64-linux-user
-    CONFIGURE_ARGS: --disable-nettle --disable-gcrypt --enable-gnutls
-    MAKE_CHECK_ARGS: check
-
-
-# Check our reduced build configurations
-build-without-default-devices:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-centos8-container
-  variables:
-    IMAGE: centos8
-    CONFIGURE_ARGS: --without-default-devices --disable-user
-
-build-without-default-features:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-container
-  variables:
-    IMAGE: debian-amd64
-    CONFIGURE_ARGS: --without-default-features --disable-user
-        --target-list-exclude=arm-softmmu,i386-softmmu,mipsel-softmmu,mips64-softmmu,ppc-softmmu
-    MAKE_CHECK_ARGS: check-unit
-
-check-patch:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/centos8:latest
-  needs:
-    job: amd64-centos8-container
-  script: .gitlab-ci.d/check-patch.py
-  except:
-    variables:
-      - $CI_PROJECT_NAMESPACE == 'qemu-project' && $CI_COMMIT_BRANCH == 'master'
-  variables:
-    GIT_DEPTH: 1000
-  allow_failure: true
-
-check-dco:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/centos8:latest
-  needs:
-    job: amd64-centos8-container
-  script: .gitlab-ci.d/check-dco.py
-  except:
-    variables:
-      - $CI_PROJECT_NAMESPACE == 'qemu-project' && $CI_COMMIT_BRANCH == 'master'
-  variables:
-    GIT_DEPTH: 1000
-
-build-libvhost-user:
-  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/fedora:latest
-  needs:
-    job: amd64-fedora-container
-  before_script:
-    - dnf install -y meson ninja-build
-  script:
-    - mkdir subprojects/libvhost-user/build
-    - cd subprojects/libvhost-user/build
-    - meson
-    - ninja
-
-# No targets are built here, just tools, docs, and unit tests. This
-# also feeds into the eventual documentation deployment steps later
-build-tools-and-docs-debian:
-  <<: *native_build_job_definition
-  needs:
-    job: amd64-debian-container
-  variables:
-    IMAGE: debian-amd64
-    MAKE_CHECK_ARGS: check-unit check-softfloat ctags TAGS cscope
-    CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools
-  artifacts:
-    expire_in: 2 days
-    paths:
-      - build
 
-# Prepare for GitLab pages deployment. Anything copied into the
-# "public" directory will be deployed to $USER.gitlab.io/$PROJECT
-pages:
-  image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest
-  stage: test
-  needs:
-    - job: build-tools-and-docs-debian
-  script:
-    - mkdir -p public
-    # HTML-ised source tree
-    - make gtags
-    - htags -anT --tree-view=filetree -m qemu_init
-        -t "Welcome to the QEMU sourcecode"
-    - mv HTML public/src
-    # Project documentation
-    - make -C build install DESTDIR=$(pwd)/temp-install
-    - mv temp-install/usr/local/share/doc/qemu/* public/
-  artifacts:
-    paths:
-      - public
+include:
+  - local: '/.gitlab-ci.d/qemu-project.yml'
diff --git a/.gitlab/issue_templates/bug.md b/.gitlab/issue_templates/bug.md
new file mode 100644
index 00000000000..e910f7b1c29
--- /dev/null
+++ b/.gitlab/issue_templates/bug.md
@@ -0,0 +1,64 @@
+<!--
+This is the upstream QEMU issue tracker.
+
+If you are able to, it will greatly facilitate bug triage if you attempt
+to reproduce the problem with the latest qemu.git master built from
+source. See https://www.qemu.org/download/#source for instructions on
+how to do this.
+
+QEMU generally supports the last two releases advertised on
+https://www.qemu.org/. Problems with distro-packaged versions of QEMU
+older than this should be reported to the distribution instead.
+
+See https://www.qemu.org/contribute/report-a-bug/ for additional
+guidance.
+
+If this is a security issue, please consult
+https://www.qemu.org/contribute/security-process/
+-->
+
+## Host environment
+ - Operating system: (Windows 10 21H1, Fedora 34, etc.)
+ - OS/kernel version: (For POSIX hosts, use `uname -a`)
+ - Architecture: (x86, ARM, s390x, etc.)
+ - QEMU flavor: (qemu-system-x86_64, qemu-aarch64, qemu-img, etc.)
+ - QEMU version: (e.g. `qemu-system-x86_64 --version`)
+ - QEMU command line:
+   <!--
+   Give the smallest, complete command line that exhibits the problem.
+
+   If you are using libvirt, virsh, or vmm, you can likely find the QEMU
+   command line arguments in /var/log/libvirt/qemu/$GUEST.log.
+   -->
+   ```
+   ./qemu-system-x86_64 -M q35 -m 4096 -enable-kvm -hda fedora32.qcow2
+   ```
+
+## Emulated/Virtualized environment
+ - Operating system: (Windows 10 21H1, Fedora 34, etc.)
+ - OS/kernel version: (For POSIX guests, use `uname -a`.)
+ - Architecture: (x86, ARM, s390x, etc.)
+
+
+## Description of problem
+<!-- Describe the problem, including any error/crash messages seen. -->
+
+
+## Steps to reproduce
+1.
+2.
+3.
+
+
+## Additional information
+
+<!--
+Attach logs, stack traces, screenshots, etc. Compress the files if necessary.
+If using libvirt, libvirt logs and XML domain information may be relevant.
+-->
+
+<!--
+The line below ensures that proper tags are added to the issue.
+Please do not remove it.
+-->
+/label ~"kind::Bug"
diff --git a/.gitlab/issue_templates/feature_request.md b/.gitlab/issue_templates/feature_request.md
new file mode 100644
index 00000000000..7de02dcc2c9
--- /dev/null
+++ b/.gitlab/issue_templates/feature_request.md
@@ -0,0 +1,32 @@
+<!--
+This is the upstream QEMU issue tracker.
+
+Please note that QEMU, like most open source projects, relies on
+contributors who have motivation, skills and available time to work on
+implementing particular features.
+
+Feature requests can be helpful for determining demand and interest, but
+they are not a guarantee that a contributor will volunteer to implement
+it. We welcome and encourage even draft patches to implement a feature
+be sent to the mailing list where it can be discussed and developed
+further by the community.
+
+Thank you for your interest in helping us to make QEMU better!
+-->
+
+## Goal
+<!-- Describe the final result you want to achieve. Avoid design specifics. -->
+
+
+## Technical details
+<!-- Describe technical details, design specifics, suggestions, versions, etc. -->
+
+
+## Additional information
+<!-- Patch or branch references, any other useful information -->
+
+<!--
+The line below ensures that proper tags are added to the issue.
+Please do not remove it.
+-->
+/label ~"kind::Feature Request"
diff --git a/.mailmap b/.mailmap
index a1bd659817d..8beb2f95ae2 100644
--- a/.mailmap
+++ b/.mailmap
@@ -27,6 +27,10 @@ Paul Brook <paul@codesourcery.com> pbrook <pbrook@c046a42c-6fe2-441c-8c8c-714662
 Thiemo Seufer <ths@networkno.de> ths <ths@c046a42c-6fe2-441c-8c8c-71466251a162>
 malc <av1474@comtv.ru> malc <malc@c046a42c-6fe2-441c-8c8c-71466251a162>
 
+# Corrupted Author fields
+Marek Dolata <mkdolata@us.ibm.com> mkdolata@us.ibm.com <mkdolata@us.ibm.com>
+Nick Hudson <hnick@vmware.com> hnick@vmware.com <hnick@vmware.com>
+
 # There is also a:
 #    (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162>
 # for the cvs2svn initialization commit e63c3dc74bf.
@@ -65,6 +69,7 @@ Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>
 # git author config, or had utf8/latin1 encoding issues.
 Aaron Lindsay <aaron@os.amperecomputing.com>
 Alexey Gerasimenko <x1917x@gmail.com>
+Alex Chen <alex.chen@huawei.com>
 Alex Ivanov <void@aleksoft.net>
 Andreas Färber <afaerber@suse.de>
 Bandan Das <bsd@redhat.com>
@@ -95,8 +100,11 @@ Gautham R. Shenoy <ego@in.ibm.com>
 Gautham R. Shenoy <ego@linux.vnet.ibm.com>
 Gonglei (Arei) <arei.gonglei@huawei.com>
 Guang Wang <wang.guang55@zte.com.cn>
+Haibin Zhang <haibinzhang@tencent.com>
 Hailiang Zhang <zhang.zhanghailiang@huawei.com>
+Hanna Reitz <hreitz@redhat.com> <mreitz@redhat.com>
 Hervé Poussineau <hpoussin@reactos.org>
+Hyman Huang <huangy81@chinatelecom.cn>
 Jakub Jermář <jakub@jermar.eu>
 Jakub Jermář <jakub.jermar@kernkonzept.com>
 Jean-Christophe Dubois <jcd@tribudubois.net>
@@ -130,6 +138,7 @@ Nicholas Thomas <nick@bytemark.co.uk>
 Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
 Orit Wasserman <owasserm@redhat.com>
 Paolo Bonzini <pbonzini@redhat.com>
+Pan Nengyuan <pannengyuan@huawei.com>
 Pavel Dovgaluk <dovgaluk@ispras.ru>
 Pavel Dovgaluk <pavel.dovgaluk@gmail.com>
 Pavel Dovgaluk <Pavel.Dovgaluk@ispras.ru>
diff --git a/.patchew.yml b/.patchew.yml
index 988c29261f9..1b78262ce58 100644
--- a/.patchew.yml
+++ b/.patchew.yml
@@ -88,7 +88,7 @@ email:
         more information:
 
         {{ logtext }}
-        {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos7" or test == "asan" %}
+        {% elif test == "docker-mingw@fedora" or test == "docker-quick@centos8" or test == "asan" %}
         Hi,
 
         This series failed the {{ test }} build test. Please find the testing commands and
@@ -124,13 +124,13 @@ testing:
       script: |
         #!/bin/bash
         time make docker-test-debug@fedora TARGET_LIST=x86_64-softmmu J=14 NETWORK=1
-    docker-quick@centos7:
+    docker-quick@centos8:
       enabled: false
       requirements: docker,x86_64
       timeout: 3600
       script: |
         #!/bin/bash
-        time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
+        time make docker-test-quick@centos8 SHOW_ENV=1 J=14 NETWORK=1
     checkpatch:
       enabled: true
       requirements: ''
@@ -138,9 +138,6 @@ testing:
       script: |
         #!/bin/bash
         git rev-parse base > /dev/null || exit 0
-        git config --local diff.renamelimit 0
-        git config --local diff.renames True
-        git config --local diff.algorithm histogram
         ./scripts/checkpatch.pl --mailback base..
     docker-mingw@fedora:
       enabled: true
diff --git a/.require_clean_build b/.require_clean_build
index aafd98dbfa6..8d5cb63a435 100644
--- a/.require_clean_build
+++ b/.require_clean_build
@@ -4,3 +4,4 @@
 # https://github.com/CTSRD-CHERI/qemu/commit/56c3469a6d63c64fbe966af302de306e82cd5b6a
 # https://github.com/CTSRD-CHERI/qemu/commit/514635ea947fd44122fa9130782b5fc63873d2f6
 20230717  # CGetHigh and CSetHigh added, meson does not track dependencies correctly so we need a full rebuild
+20250106  # Merge to 6.2 breaks incremental builds due to missing config options - new configure required
diff --git a/.travis.yml b/.travis.yml
index 4609240b5aa..41010ebe6bc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -27,6 +27,7 @@ addons:
       - libattr1-dev
       - libbrlapi-dev
       - libcap-ng-dev
+      - libcacard-dev
       - libgcc-7-dev
       - libgnutls28-dev
       - libgtk-3-dev
@@ -34,7 +35,6 @@ addons:
       - liblttng-ust-dev
       - libncurses5-dev
       - libnfs-dev
-      - libnss3-dev
       - libpixman-1-dev
       - libpng-dev
       - librados-dev
@@ -129,6 +129,7 @@ jobs:
           - libaio-dev
           - libattr1-dev
           - libbrlapi-dev
+          - libcacard-dev
           - libcap-ng-dev
           - libgcrypt20-dev
           - libgnutls28-dev
@@ -137,7 +138,6 @@ jobs:
           - liblttng-ust-dev
           - libncurses5-dev
           - libnfs-dev
-          - libnss3-dev
           - libpixman-1-dev
           - libpng-dev
           - librados-dev
@@ -163,6 +163,7 @@ jobs:
           - libaio-dev
           - libattr1-dev
           - libbrlapi-dev
+          - libcacard-dev
           - libcap-ng-dev
           - libgcrypt20-dev
           - libgnutls28-dev
@@ -171,7 +172,6 @@ jobs:
           - liblttng-ust-dev
           - libncurses5-dev
           - libnfs-dev
-          - libnss3-dev
           - libpixman-1-dev
           - libpng-dev
           - librados-dev
@@ -196,6 +196,7 @@ jobs:
           - libaio-dev
           - libattr1-dev
           - libbrlapi-dev
+          - libcacard-dev
           - libcap-ng-dev
           - libgcrypt20-dev
           - libgnutls28-dev
@@ -204,7 +205,6 @@ jobs:
           - liblttng-ust-dev
           - libncurses5-dev
           - libnfs-dev
-          - libnss3-dev
           - libpixman-1-dev
           - libpng-dev
           - librados-dev
@@ -238,6 +238,7 @@ jobs:
         apt_packages:
           - libaio-dev
           - libattr1-dev
+          - libcacard-dev
           - libcap-ng-dev
           - libgnutls28-dev
           - libiscsi-dev
@@ -245,7 +246,6 @@ jobs:
           - liblzo2-dev
           - libncurses-dev
           - libnfs-dev
-          - libnss3-dev
           - libpixman-1-dev
           - libsdl2-dev
           - libsdl2-image-dev
@@ -281,6 +281,7 @@ jobs:
           - libaio-dev
           - libattr1-dev
           - libbrlapi-dev
+          - libcacard-dev
           - libcap-ng-dev
           - libgcrypt20-dev
           - libgnutls28-dev
@@ -289,7 +290,6 @@ jobs:
           - liblttng-ust-dev
           - libncurses5-dev
           - libnfs-dev
-          - libnss3-dev
           - libpixman-1-dev
           - libpng-dev
           - librados-dev
@@ -305,26 +305,3 @@ jobs:
         - CONFIG="--disable-containers --disable-tcg --enable-kvm
                   --disable-tools --host-cc=clang --cxx=clang++"
         - UNRELIABLE=true
-
-    # Release builds
-    # The make-release script expect a QEMU version, so our tag must start with a 'v'.
-    # This is the case when release candidate tags are created.
-    - name: "Release tarball"
-      if: tag IS present AND tag =~ /^v\d+\.\d+(\.\d+)?(-\S*)?$/
-      env:
-        # We want to build from the release tarball
-        - BUILD_DIR="release/build/dir" SRC_DIR="../../.."
-        - BASE_CONFIG="--prefix=$PWD/dist"
-        - CONFIG="--target-list=x86_64-softmmu,aarch64-softmmu,armeb-linux-user,ppc-linux-user"
-        - TEST_CMD="make install -j${JOBS}"
-        - QEMU_VERSION="${TRAVIS_TAG:1}"
-        - CACHE_NAME="${TRAVIS_BRANCH}-linux-gcc-default"
-      script:
-        - make -C ${SRC_DIR} qemu-${QEMU_VERSION}.tar.bz2
-        - ls -l ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2
-        - tar -xf ${SRC_DIR}/qemu-${QEMU_VERSION}.tar.bz2 && cd qemu-${QEMU_VERSION}
-        - mkdir -p release-build && cd release-build
-        - ../configure ${BASE_CONFIG} ${CONFIG} || { cat config.log meson-logs/meson-log.txt && exit 1; }
-        - make install
-  allow_failures:
-    - env: UNRELIABLE=true
diff --git a/Jenkinsfile b/Jenkinsfile
index eb33f5d27a8..a065736cd9c 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -73,12 +73,13 @@ def addBootJobs(bootJobs, params, String qemuConfig, String architecture, String
 
 def bootCheriBSDForAllArchitectures(params, String qemuConfig, boolean isDebug) {
     stage("Boot CheriBSD (${qemuConfig})") {
-        bootJobs = [failFast: false]
+        def bootJobs = [failFast: false]
         ["riscv64", "riscv64-purecap", "aarch64", "morello-purecap"].each { String architecture ->
             addBootJobs(bootJobs, params, qemuConfig, architecture, "main")
             if (!isDebug) {
                 // For the non-ASAN build of QEMU we also boot the latest release
-                addBootJobs(bootJobs, params, qemuConfig, architecture, "releng%252F22.12", "-latest-release")
+                def latestRelease = cheribsdInfo.getReleasedVersions()[-1]
+                addBootJobs(bootJobs, params, qemuConfig, architecture, "releng/${latestRelease}", "-latest-release")
             }
             def targetBranch = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME;
             if (targetBranch == 'dev') {
@@ -106,7 +107,8 @@ def bootCheriBSD(params, String qemuConfig, String stageSuffix, String archSuffi
         def compressedDiskImage = "artifacts-${archSuffix}/cheribsd-${archSuffix}.img.xz"
         dir (stageSuffix) {
             sh "rm -rfv artifacts-${archSuffix}/cheribsd-*.img* artifacts-${archSuffix}/kernel*"
-            copyArtifacts projectName: "CheriBSD-pipeline/${cheribsdBranch}", filter: "${compressedDiskImage}, ${compressedKernel}",
+            def jenkinsBranchName = cheribsdBranch.replaceAll('/', '%2F')
+            copyArtifacts projectName: "CheriBSD-pipeline/${jenkinsBranchName}", filter: "${compressedDiskImage}, ${compressedKernel}",
                          target: '.', fingerprintArtifacts: false, flatten: false, selector: lastSuccessful()
         }
         def testExtraArgs = [
diff --git a/Kconfig b/Kconfig
index d52ebd839b3..fb6a24a2de8 100644
--- a/Kconfig
+++ b/Kconfig
@@ -1,5 +1,6 @@
 source Kconfig.host
 source backends/Kconfig
 source accel/Kconfig
+source target/Kconfig
 source hw/Kconfig
 source semihosting/Kconfig
diff --git a/Kconfig.host b/Kconfig.host
index 24255ef4419..60b9c07b5ee 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -41,3 +41,7 @@ config PVRDMA
 config MULTIPROCESS_ALLOWED
     bool
     imply MULTIPROCESS
+
+config FUZZ
+    bool
+    select SPARSE_MEM
diff --git a/MAINTAINERS b/MAINTAINERS
index 36055f14c59..7543eb4d597 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -87,7 +87,7 @@ S390 general architecture support
 M: Cornelia Huck <cohuck@redhat.com>
 M: Thomas Huth <thuth@redhat.com>
 S: Supported
-F: default-configs/*/s390x-softmmu.mak
+F: configs/devices/s390x-softmmu/default.mak
 F: gdb-xml/s390*.xml
 F: hw/char/sclp*.[hc]
 F: hw/char/terminal3270.c
@@ -109,6 +109,12 @@ K: ^Subject:.*(?i)s390x?
 T: git https://gitlab.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
+MIPS general architecture support
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+R: Jiaxun Yang <jiaxun.yang@flygoat.com>
+S: Odd Fixes
+K: ^Subject:.*(?i)mips
+
 Guest CPU cores (TCG)
 ---------------------
 Overall TCG CPUs
@@ -128,7 +134,6 @@ F: docs/devel/decodetree.rst
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
-F: include/exec/tb-hash.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
 F: include/hw/core/tcg-cpu-ops.h
@@ -156,6 +161,7 @@ S: Maintained
 F: target/arm/
 F: tests/tcg/arm/
 F: tests/tcg/aarch64/
+F: tests/qtest/arm-cpu-features.c
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
 F: include/hw/cpu/a*mpcore.h
@@ -171,6 +177,7 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/smmu*
 F: include/hw/arm/smmu*
+F: tests/avocado/smmu.py
 
 AVR TCG CPUs
 M: Michael Rolnik <mrolnik@gmail.com>
@@ -178,7 +185,7 @@ S: Maintained
 F: docs/system/target-avr.rst
 F: gdb-xml/avr-cpu.xml
 F: target/avr/
-F: tests/acceptance/machine_avr6.py
+F: tests/avocado/machine_avr6.py
 
 CRIS TCG CPUs
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -196,29 +203,15 @@ F: target/hexagon/
 F: linux-user/hexagon/
 F: tests/tcg/hexagon/
 F: disas/hexagon.c
-F: default-configs/targets/hexagon-linux-user.mak
+F: configs/targets/hexagon-linux-user/default.mak
+F: docker/dockerfiles/debian-hexagon-cross.docker
+F: docker/dockerfiles/debian-hexagon-cross.docker.d/build-toolchain.sh
 
 HPPA (PA-RISC) TCG CPUs
 M: Richard Henderson <richard.henderson@linaro.org>
 S: Maintained
 F: target/hppa/
-F: hw/hppa/
 F: disas/hppa.c
-F: hw/net/*i82596*
-F: include/hw/net/lasi_82596.h
-
-LM32 TCG CPUs
-R: Michael Walle <michael@walle.cc>
-S: Orphan
-F: target/lm32/
-F: disas/lm32.c
-F: hw/lm32/
-F: hw/*/lm32_*
-F: hw/*/milkymist-*
-F: include/hw/display/milkymist_tmu2.h
-F: include/hw/char/lm32_juart.h
-F: include/hw/lm32/
-F: tests/tcg/lm32/
 
 M68K TCG CPUs
 M: Laurent Vivier <laurent@vivier.eu>
@@ -232,6 +225,8 @@ S: Maintained
 F: target/microblaze/
 F: hw/microblaze/
 F: disas/microblaze.c
+F: tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh
+F: tests/tcg/nios2/Makefile.target
 
 MIPS TCG CPUs
 M: Philippe Mathieu-Daudé <f4bug@amsat.org>
@@ -240,31 +235,14 @@ R: Jiaxun Yang <jiaxun.yang@flygoat.com>
 R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
 S: Odd Fixes
 F: target/mips/
-F: default-configs/*/*mips*
 F: disas/mips.c
 F: docs/system/cpu-models-mips.rst.inc
-F: hw/intc/mips_gic.c
-F: hw/mips/
-F: hw/misc/mips_*
-F: hw/timer/mips_gictimer.c
-F: include/hw/intc/mips_gic.h
-F: include/hw/mips/
-F: include/hw/misc/mips_*
-F: include/hw/timer/mips_gictimer.h
 F: tests/tcg/mips/
-K: ^Subject:.*(?i)mips
 
 MIPS TCG CPUs (nanoMIPS ISA)
 S: Orphan
 F: disas/nanomips.*
-
-Moxie TCG CPUs
-M: Anthony Green <green@moxielogic.com>
-S: Maintained
-F: target/moxie/
-F: disas/moxie.c
-F: hw/moxie/
-F: default-configs/*/moxie-softmmu.mak
+F: target/mips/tcg/*nanomips*
 
 NiosII TCG CPUs
 M: Chris Wulff <crwulff@gmail.com>
@@ -273,7 +251,8 @@ S: Maintained
 F: target/nios2/
 F: hw/nios2/
 F: disas/nios2.c
-F: default-configs/*/nios2-softmmu.mak
+F: configs/devices/nios2-softmmu/default.mak
+F: tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh
 
 OpenRISC TCG CPUs
 M: Stafford Horne <shorne@gmail.com>
@@ -283,21 +262,22 @@ F: hw/openrisc/
 F: tests/tcg/openrisc/
 
 PowerPC TCG CPUs
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
+M: Cédric Le Goater <clg@kaod.org>
+M: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: target/ppc/
-F: hw/ppc/
-F: include/hw/ppc/
+F: hw/ppc/ppc.c
+F: hw/ppc/ppc_booke.c
+F: include/hw/ppc/ppc.h
 F: disas/ppc.c
-F: tests/acceptance/machine_ppc.py
 
 RISC-V TCG CPUs
 M: Palmer Dabbelt <palmer@dabbelt.com>
-M: Alistair Francis <Alistair.Francis@wdc.com>
-M: Sagar Karandikar <sagark@eecs.berkeley.edu>
-M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
+M: Alistair Francis <alistair.francis@wdc.com>
+M: Bin Meng <bin.meng@windriver.com>
 L: qemu-riscv@nongnu.org
 S: Supported
 F: target/riscv/
@@ -316,6 +296,8 @@ M: Richard Henderson <richard.henderson@linaro.org>
 M: David Hildenbrand <david@redhat.com>
 S: Maintained
 F: target/s390x/
+F: target/s390x/tcg
+F: target/s390x/cpu_models_*.[ch]
 F: hw/s390x/
 F: disas/s390.c
 F: tests/tcg/s390x/
@@ -339,24 +321,17 @@ F: hw/sparc64/
 F: include/hw/sparc/sparc64.h
 F: disas/sparc.c
 
-UniCore32 TCG CPUs
-M: Guan Xuetao <gxt@mprc.pku.edu.cn>
-S: Maintained
-F: target/unicore32/
-F: hw/unicore32/
-F: include/hw/unicore32/
-
 X86 TCG CPUs
 M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <richard.henderson@linaro.org>
-M: Eduardo Habkost <ehabkost@redhat.com>
+M: Eduardo Habkost <eduardo@habkost.net>
 S: Maintained
-F: target/i386/
+F: target/i386/tcg/
 F: tests/tcg/i386/
 F: tests/tcg/x86_64/
 F: hw/i386/
 F: disas/i386.c
-F: docs/system/cpu-models-x86.rst.inc
+F: docs/system/cpu-models-x86*
 T: git https://gitlab.com/ehabkost/qemu.git x86-next
 
 Xtensa TCG CPUs
@@ -368,7 +343,7 @@ F: hw/xtensa/
 F: tests/tcg/xtensa/
 F: disas/xtensa.c
 F: include/hw/xtensa/xtensa-isa.h
-F: default-configs/*/xtensa*.mak
+F: configs/devices/xtensa*/default.mak
 
 TriCore TCG CPUs
 M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
@@ -376,6 +351,7 @@ S: Maintained
 F: target/tricore/
 F: hw/tricore/
 F: include/hw/tricore/
+F: tests/tcg/tricore/
 
 Multiarch Linux User Tests
 M: Alex Bennée <alex.bennee@linaro.org>
@@ -404,22 +380,22 @@ F: target/arm/kvm.c
 MIPS KVM CPUs
 M: Huacai Chen <chenhuacai@kernel.org>
 S: Odd Fixes
-F: target/mips/kvm.c
+F: target/mips/kvm*
+F: target/mips/sysemu/
 
 PPC KVM CPUs
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
+M: Cédric Le Goater <clg@kaod.org>
+M: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Greg Kurz <groug@kaod.org>
 S: Maintained
 F: target/ppc/kvm.c
 
 S390 KVM CPUs
 M: Halil Pasic <pasic@linux.ibm.com>
-M: Cornelia Huck <cohuck@redhat.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
-F: target/s390x/kvm.c
-F: target/s390x/kvm_s390x.h
-F: target/s390x/kvm-stub.c
+F: target/s390x/kvm/
 F: target/s390x/ioinst.[ch]
 F: target/s390x/machine.c
 F: target/s390x/sigp.c
@@ -431,7 +407,6 @@ F: hw/intc/s390_flic.c
 F: hw/intc/s390_flic_kvm.c
 F: include/hw/s390x/s390_flic.h
 F: gdb-xml/s390*.xml
-T: git https://gitlab.com/cohuck/qemu.git s390-next
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
@@ -440,7 +415,10 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marcelo Tosatti <mtosatti@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
+F: docs/amd-memory-encryption.txt
+F: docs/system/i386/sgx.rst
 F: target/i386/kvm/
+F: target/i386/sev*
 F: scripts/kvm/vmxcap
 
 Guest CPU Cores (other accelerators)
@@ -456,13 +434,26 @@ F: accel/accel-*.c
 F: accel/Makefile.objs
 F: accel/stubs/Makefile.objs
 
+Apple Silicon HVF CPUs
+M: Alexander Graf <agraf@csgraf.de>
+S: Maintained
+F: target/arm/hvf/
+
 X86 HVF CPUs
 M: Cameron Esfahani <dirty@apple.com>
 M: Roman Bolshakov <r.bolshakov@yadro.com>
 W: https://wiki.qemu.org/Features/HVF
 S: Maintained
 F: target/i386/hvf/
+
+HVF
+M: Cameron Esfahani <dirty@apple.com>
+M: Roman Bolshakov <r.bolshakov@yadro.com>
+W: https://wiki.qemu.org/Features/HVF
+S: Maintained
+F: accel/hvf/
 F: include/sysemu/hvf.h
+F: include/sysemu/hvf_int.h
 
 WHPX CPUs
 M: Sunil Muthuswamy <sunilmut@microsoft.com>
@@ -509,6 +500,15 @@ F: accel/stubs/hax-stub.c
 F: include/sysemu/hax.h
 F: target/i386/hax/
 
+Guest CPU Cores (NVMM)
+----------------------
+NetBSD Virtual Machine Monitor (NVMM) CPU support
+M: Kamil Rytarowski <kamil@netbsd.org>
+M: Reinoud Zandijk <reinoud@netbsd.org>
+S: Maintained
+F: include/sysemu/nvmm.h
+F: target/i386/nvmm/
+
 Hosts
 -----
 LINUX
@@ -529,6 +529,8 @@ F: include/qemu/*posix*.h
 
 NETBSD
 M: Kamil Rytarowski <kamil@netbsd.org>
+M: Reinoud Zandijk <reinoud@netbsd.org>
+M: Ryo ONODERA <ryoon@netbsd.org>
 S: Maintained
 K: ^Subject:.*(?i)NetBSD
 
@@ -564,6 +566,7 @@ S: Odd Fixes
 F: hw/*/allwinner*
 F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c
+F: docs/system/arm/cubieboard.rst
 
 Allwinner-h3
 M: Niek Linnenbank <nieklinnenbank@gmail.com>
@@ -620,6 +623,7 @@ F: hw/intc/gic_internal.h
 F: hw/misc/a9scu.c
 F: hw/misc/arm11scu.c
 F: hw/misc/arm_l2x0.c
+F: hw/misc/armv7m_ras.c
 F: hw/timer/a9gtimer*
 F: hw/timer/arm*
 F: include/hw/arm/arm*.h
@@ -629,6 +633,7 @@ F: include/hw/misc/arm11scu.h
 F: include/hw/timer/a9gtimer.h
 F: include/hw/timer/arm_mptimer.h
 F: include/hw/timer/armv7m_systick.h
+F: include/hw/misc/armv7m_ras.h
 F: tests/qtest/test-arm-mptimer.c
 
 Exynos
@@ -646,6 +651,7 @@ L: qemu-arm@nongnu.org
 S: Odd Fixes
 F: hw/arm/highbank.c
 F: hw/net/xgmac.c
+F: docs/system/arm/highbank.rst
 
 Canon DIGIC
 M: Antony Pavlov <antonynpavlov@gmail.com>
@@ -655,7 +661,7 @@ S: Odd Fixes
 F: include/hw/arm/digic.h
 F: hw/*/digic*
 F: include/hw/*/digic*
-F: tests/acceptance/machine_arm_canona1100.py
+F: tests/avocado/machine_arm_canona1100.py
 F: docs/system/arm/digic.rst
 
 Goldfish RTC
@@ -686,6 +692,7 @@ F: hw/watchdog/wdt_imx2.c
 F: include/hw/arm/fsl-imx25.h
 F: include/hw/misc/imx25_ccm.h
 F: include/hw/watchdog/wdt_imx2.h
+F: docs/system/arm/imx25-pdk.rst
 
 i.MX31 (kzm)
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -696,6 +703,7 @@ F: hw/*/imx_*
 F: hw/*/*imx31*
 F: include/hw/*/imx_*
 F: include/hw/*/*imx31*
+F: docs/system/arm/kzm.rst
 
 Integrator CP
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -704,7 +712,7 @@ S: Maintained
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c
 F: include/hw/misc/arm_integrator_debug.h
-F: tests/acceptance/machine_arm_integratorcp.py
+F: tests/avocado/machine_arm_integratorcp.py
 F: docs/system/arm/integratorcp.rst
 
 MCIMX6UL EVK / i.MX6ul
@@ -788,7 +796,6 @@ F: roms/vbootrom
 F: docs/system/arm/nuvoton.rst
 
 nSeries
-M: Andrzej Zaborowski <balrogg@gmail.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Odd Fixes
@@ -799,13 +806,13 @@ F: hw/input/tsc2005.c
 F: hw/misc/cbus.c
 F: hw/rtc/twl92230.c
 F: include/hw/display/blizzard.h
+F: include/hw/input/lm832x.h
 F: include/hw/input/tsc2xxx.h
 F: include/hw/misc/cbus.h
-F: tests/acceptance/machine_arm_n8x0.py
+F: tests/avocado/machine_arm_n8x0.py
 F: docs/system/arm/nseries.rst
 
 Palm
-M: Andrzej Zaborowski <balrogg@gmail.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Odd Fixes
@@ -838,7 +845,6 @@ F: include/hw/intc/realview_gic.h
 F: docs/system/arm/realview.rst
 
 PXA2XX
-M: Andrzej Zaborowski <balrogg@gmail.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Odd Fixes
@@ -851,12 +857,13 @@ F: hw/display/tc6393xb.c
 F: hw/gpio/max7310.c
 F: hw/gpio/zaurus.c
 F: hw/misc/mst_fpga.c
-F: hw/misc/max111x.c
-F: include/hw/misc/max111x.h
+F: hw/adc/max111x.c
+F: include/hw/adc/max111x.h
 F: include/hw/arm/pxa.h
 F: include/hw/arm/sharpsl.h
 F: include/hw/display/tc6393xb.h
 F: docs/system/arm/xscale.rst
+F: docs/system/arm/mainstone.rst
 
 SABRELITE / i.MX6
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -898,6 +905,13 @@ F: hw/*/stellaris*
 F: include/hw/input/gamepad.h
 F: docs/system/arm/stellaris.rst
 
+STM32VLDISCOVERY
+M: Alexandre Iooss <erdnaxe@crans.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32vldiscovery.c
+F: docs/system/arm/stm32.rst
+
 Versatile Express
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
@@ -930,8 +944,10 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/xilinx_*
 F: hw/*/cadence_*
-F: hw/misc/zynq*
-F: include/hw/misc/zynq*
+F: hw/misc/zynq_slcr.c
+F: hw/adc/zynq-xadc.c
+F: include/hw/misc/zynq_slcr.h
+F: include/hw/adc/zynq-xadc.h
 X: hw/ssi/xilinx_*
 
 Xilinx ZynqMP and Versal
@@ -953,6 +969,12 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/virt-acpi-build.c
 
+STM32F100
+M: Alexandre Iooss <erdnaxe@crans.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/stm32f100_soc.c
+
 STM32F205
 M: Alistair Francis <alistair@alistair23.me>
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -1011,6 +1033,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/msf2-som.c
+F: docs/system/arm/emcraft-sf2.rst
 
 ASPEED BMCs
 M: Cédric Le Goater <clg@kaod.org>
@@ -1026,6 +1049,7 @@ F: include/hw/misc/pca9552*.h
 F: hw/net/ftgmac100.c
 F: include/hw/net/ftgmac100.h
 F: docs/system/arm/aspeed.rst
+F: tests/qtest/*aspeed*
 
 NRF51
 M: Joel Stanley <joel@jms.id.au>
@@ -1037,6 +1061,7 @@ F: hw/*/microbit*.c
 F: include/hw/*/nrf51*.h
 F: include/hw/*/microbit*.h
 F: tests/qtest/microbit-test.c
+F: docs/system/arm/nrf.rst
 
 AVR Machines
 -------------
@@ -1044,7 +1069,7 @@ AVR Machines
 AVR MCUs
 M: Michael Rolnik <mrolnik@gmail.com>
 S: Maintained
-F: default-configs/*/avr-softmmu.mak
+F: configs/devices/avr-softmmu/default.mak
 F: hw/avr/
 F: include/hw/char/avr_usart.h
 F: hw/char/avr_usart.c
@@ -1072,22 +1097,12 @@ HP B160L
 M: Richard Henderson <richard.henderson@linaro.org>
 R: Helge Deller <deller@gmx.de>
 S: Odd Fixes
-F: default-configs/*/hppa-softmmu.mak
+F: configs/devices/hppa-softmmu/default.mak
 F: hw/hppa/
+F: hw/net/*i82596*
+F: include/hw/net/lasi_82596.h
 F: pc-bios/hppa-firmware.img
 
-LM32 Machines
--------------
-EVR32 and uclinux BSP
-R: Michael Walle <michael@walle.cc>
-S: Orphan
-F: hw/lm32/lm32_boards.c
-
-milkymist
-R: Michael Walle <michael@walle.cc>
-S: Orphan
-F: hw/lm32/milkymist.c
-
 M68K Machines
 -------------
 an5206
@@ -1148,7 +1163,7 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: hw/microblaze/petalogix_s3adsp1800_mmu.c
 F: include/hw/char/xilinx_uartlite.h
-F: tests/acceptance/machine_microblaze.py
+F: tests/avocado/machine_microblaze.py
 
 petalogix_ml605
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -1157,6 +1172,13 @@ F: hw/microblaze/petalogix_ml605_mmu.c
 
 MIPS Machines
 -------------
+Overall MIPS Machines
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Odd Fixes
+F: configs/devices/mips*/*
+F: hw/mips/
+F: include/hw/mips/
+
 Jazz
 M: Hervé Poussineau <hpoussin@reactos.org>
 R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
@@ -1174,8 +1196,8 @@ F: hw/acpi/piix4.c
 F: hw/mips/malta.c
 F: hw/mips/gt64xxx_pci.c
 F: include/hw/southbridge/piix.h
-F: tests/acceptance/linux_ssh_mips_malta.py
-F: tests/acceptance/machine_mips_malta.py
+F: tests/avocado/linux_ssh_mips_malta.py
+F: tests/avocado/machine_mips_malta.py
 
 Mipssim
 R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com>
@@ -1193,6 +1215,7 @@ F: hw/isa/vt82c686.c
 F: hw/pci-host/bonito.c
 F: hw/usb/vt82c686-uhci-pci.c
 F: include/hw/isa/vt82c686.h
+F: tests/avocado/machine_mips_fuloong2e.py
 
 Loongson-3 virtual platforms
 M: Huacai Chen <chenhuacai@kernel.org>
@@ -1202,7 +1225,7 @@ F: hw/intc/loongson_liointc.c
 F: hw/mips/loongson3_bootp.c
 F: hw/mips/loongson3_bootp.h
 F: hw/mips/loongson3_virt.c
-F: tests/acceptance/machine_mips_loongson3v.py
+F: tests/avocado/machine_mips_loongson3v.py
 
 Boston
 M: Paul Burton <paulburton@kernel.org>
@@ -1223,24 +1246,19 @@ F: hw/openrisc/openrisc_sim.c
 PowerPC Machines
 ----------------
 405
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc405_boards.c
 
 Bamboo
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc440_bamboo.c
+F: tests/avocado/ppc_bamboo.py
 
 e500
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/e500*
 F: hw/gpio/mpc8xxx.c
 F: hw/i2c/mpc_i2c.c
@@ -1249,19 +1267,18 @@ F: hw/pci-host/ppce500.c
 F: include/hw/ppc/ppc_e500.h
 F: include/hw/pci-host/ppce500.h
 F: pc-bios/u-boot.e500
+F: hw/intc/openpic_kvm.h
+F: include/hw/ppc/openpic_kvm.h
 
 mpc8544ds
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/mpc8544ds.c
 F: hw/ppc/mpc8544_guts.c
+F: tests/avocado/ppc_mpc8544ds.py
 
 New World (mac99)
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_newworld.c
@@ -1280,8 +1297,6 @@ F: pc-bios/qemu_vga.ndrv
 
 Old World (g3beige)
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_oldworld.c
@@ -1295,27 +1310,27 @@ F: pc-bios/qemu_vga.ndrv
 
 PReP
 M: Hervé Poussineau <hpoussin@reactos.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/prep.c
 F: hw/ppc/prep_systemio.c
 F: hw/ppc/rs6000_mc.c
-F: hw/pci-host/prep.[hc]
+F: hw/pci-host/raven.c
 F: hw/isa/i82378.c
 F: hw/isa/pc87312.c
 F: hw/dma/i82374.c
 F: hw/rtc/m48t59-isa.c
 F: include/hw/isa/pc87312.h
 F: include/hw/rtc/m48t59.h
-F: tests/acceptance/ppc_prep_40p.py
+F: tests/avocado/ppc_prep_40p.py
 
 sPAPR
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
+M: Cédric Le Goater <clg@kaod.org>
+M: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
-S: Supported
+S: Maintained
 F: hw/*/spapr*
 F: include/hw/*/spapr*
 F: hw/*/xics*
@@ -1327,11 +1342,10 @@ F: tests/qtest/spapr*
 F: tests/qtest/libqos/*spapr*
 F: tests/qtest/rtas*
 F: tests/qtest/libqos/rtas*
+F: tests/avocado/ppc_pseries.py
 
 PowerNV (Non-Virtualized)
 M: Cédric Le Goater <clg@kaod.org>
-M: David Gibson <david@gibson.dropbear.id.au>
-M: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/pnv*
@@ -1348,11 +1362,10 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/virtex_ml507.c
+F: tests/avocado/ppc_virtex_ml507.py
 
 sam460ex
 M: BALATON Zoltan <balaton@eik.bme.hu>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/sam460ex.c
@@ -1364,6 +1377,29 @@ F: pc-bios/canyonlands.dt[sb]
 F: pc-bios/u-boot-sam460ex-20100605.bin
 F: roms/u-boot-sam460ex
 
+pegasos2
+M: BALATON Zoltan <balaton@eik.bme.hu>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/pegasos2.c
+F: hw/pci-host/mv64361.c
+F: hw/pci-host/mv643xx.h
+F: include/hw/pci-host/mv64361.h
+
+Virtual Open Firmware (VOF)
+M: Alexey Kardashevskiy <aik@ozlabs.ru>
+R: Cédric Le Goater <clg@kaod.org>
+R: Daniel Henrique Barboza <danielhb413@gmail.com>
+R: David Gibson <david@gibson.dropbear.id.au>
+R: Greg Kurz <groug@kaod.org>
+L: qemu-ppc@nongnu.org
+S: Maintained
+F: hw/ppc/spapr_vof*
+F: hw/ppc/vof*
+F: include/hw/ppc/vof*
+F: pc-bios/vof/*
+F: pc-bios/vof*
+
 RISC-V Machines
 ---------------
 OpenTitan
@@ -1371,11 +1407,9 @@ M: Alistair Francis <Alistair.Francis@wdc.com>
 L: qemu-riscv@nongnu.org
 S: Supported
 F: hw/riscv/opentitan.c
-F: hw/char/ibex_uart.c
-F: hw/intc/ibex_plic.c
+F: hw/*/ibex_*.c
 F: include/hw/riscv/opentitan.h
-F: include/hw/char/ibex_uart.h
-F: include/hw/intc/ibex_plic.h
+F: include/hw/*/ibex_*.h
 
 Microchip PolarFire SoC Icicle Kit
 M: Bin Meng <bin.meng@windriver.com>
@@ -1392,6 +1426,15 @@ F: include/hw/misc/mchp_pfsoc_dmc.h
 F: include/hw/misc/mchp_pfsoc_ioscb.h
 F: include/hw/misc/mchp_pfsoc_sysreg.h
 
+Shakti C class SoC
+M: Vijai Kumar K <vijai@behindbytes.com>
+L: qemu-riscv@nongnu.org
+S: Supported
+F: hw/riscv/shakti_c.c
+F: hw/char/shakti_uart.c
+F: include/hw/riscv/shakti_c.h
+F: include/hw/char/shakti_uart.h
+
 SiFive Machines
 M: Alistair Francis <Alistair.Francis@wdc.com>
 M: Bin Meng <bin.meng@windriver.com>
@@ -1408,7 +1451,7 @@ R: Yoshinori Sato <ysato@users.sourceforge.jp>
 S: Orphan
 F: docs/system/target-rx.rst
 F: hw/rx/rx-gdbsim.c
-F: tests/acceptance/machine_rx_gdbsim.py
+F: tests/avocado/machine_rx_gdbsim.py
 
 SH4 Machines
 ------------
@@ -1462,7 +1505,7 @@ F: include/hw/pci-host/sabre.h
 F: hw/pci-bridge/simba.c
 F: include/hw/pci-bridge/simba.h
 F: pc-bios/openbios-sparc64
-F: tests/acceptance/machine_sparc64_sun4u.py
+F: tests/avocado/machine_sparc64_sun4u.py
 
 Sun4v
 M: Artyom Tarasenko <atar4qemu@gmail.com>
@@ -1478,12 +1521,11 @@ S: Maintained
 F: hw/sparc/leon3.c
 F: hw/*/grlib*
 F: include/hw/*/grlib*
-F: tests/acceptance/machine_sparc_leon3.py
+F: tests/avocado/machine_sparc_leon3.py
 
 S390 Machines
 -------------
 S390 Virtio-ccw
-M: Cornelia Huck <cohuck@redhat.com>
 M: Halil Pasic <pasic@linux.ibm.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
@@ -1493,9 +1535,8 @@ F: hw/s390x/
 F: include/hw/s390x/
 F: hw/watchdog/wdt_diag288.c
 F: include/hw/watchdog/wdt_diag288.h
-F: default-configs/*/s390x-softmmu.mak
-F: tests/acceptance/machine_s390_ccw_virtio.py
-T: git https://gitlab.com/cohuck/qemu.git s390-next
+F: configs/devices/s390x-softmmu/default.mak
+F: tests/avocado/machine_s390_ccw_virtio.py
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
@@ -1518,14 +1559,6 @@ F: hw/s390x/s390-pci*
 F: include/hw/s390x/s390-pci*
 L: qemu-s390x@nongnu.org
 
-UniCore32 Machines
-------------------
-PKUnity-3 SoC initramfs-with-busybox
-M: Guan Xuetao <gxt@mprc.pku.edu.cn>
-S: Maintained
-F: hw/*/puv3*
-F: hw/unicore32/
-
 X86 Machines
 ------------
 PC
@@ -1589,19 +1622,21 @@ microvm
 M: Sergio Lopez <slp@redhat.com>
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
-F: docs/microvm.rst
+F: docs/system/i386/microvm.rst
 F: hw/i386/microvm.c
 F: include/hw/i386/microvm.h
 F: pc-bios/bios-microvm.bin
 
 Machine core
-M: Eduardo Habkost <ehabkost@redhat.com>
+M: Eduardo Habkost <eduardo@habkost.net>
 M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
 S: Supported
 F: cpu.c
 F: hw/core/cpu.c
 F: hw/core/machine-qmp-cmds.c
 F: hw/core/machine.c
+F: hw/core/machine-smp.c
 F: hw/core/null-machine.c
 F: hw/core/numa.c
 F: hw/cpu/cluster.c
@@ -1611,6 +1646,7 @@ F: include/hw/boards.h
 F: include/hw/core/cpu.h
 F: include/hw/cpu/cluster.h
 F: include/sysemu/numa.h
+F: tests/unit/test-smp-parse.c
 T: git https://gitlab.com/ehabkost/qemu.git machine-next
 
 Xtensa Machines
@@ -1633,6 +1669,16 @@ F: hw/net/opencores_eth.c
 
 Devices
 -------
+Overall Audio frontends
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: hw/audio/
+F: include/hw/audio/
+F: tests/qtest/ac97-test.c
+F: tests/qtest/es1370-test.c
+F: tests/qtest/intel-hda-test.c
+F: tests/qtest/fuzz-sb16-test.c
+
 Xilinx CAN
 M: Vikram Garhwal <fnu.vikram@xilinx.com>
 M: Francisco Iglesias <francisco.iglesias@xilinx.com>
@@ -1676,6 +1722,9 @@ M: John Snow <jsnow@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/fdc.c
+F: hw/block/fdc-internal.h
+F: hw/block/fdc-isa.c
+F: hw/block/fdc-sysbus.c
 F: include/hw/block/fdc.h
 F: tests/qtest/fdc-test.c
 T: git https://gitlab.com/jsnow/qemu.git ide
@@ -1705,11 +1754,11 @@ F: hw/pci-bridge/*
 F: qapi/pci.json
 F: docs/pci*
 F: docs/specs/*pci*
-F: default-configs/pci.mak
 
 ACPI/SMBIOS
 M: Michael S. Tsirkin <mst@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
+R: Ani Sinha <ani@anisinha.ca>
 S: Supported
 F: include/hw/acpi/*
 F: include/hw/firmware/smbios.h
@@ -1722,6 +1771,10 @@ F: qapi/acpi.json
 F: tests/qtest/bios-tables-test*
 F: tests/qtest/acpi-utils.[hc]
 F: tests/data/acpi/
+F: docs/specs/acpi_cpu_hotplug.rst
+F: docs/specs/acpi_mem_hotplug.rst
+F: docs/specs/acpi_pci_hotplug.rst
+F: docs/specs/acpi_hw_reduced_hotplug.rst
 
 ACPI/HEST/GHES
 R: Dongjiu Geng <gengdongjiu1@gmail.com>
@@ -1732,9 +1785,8 @@ F: include/hw/acpi/ghes.h
 F: docs/specs/acpi_hest_ghes.rst
 
 ppc4xx
-M: David Gibson <david@gibson.dropbear.id.au>
 L: qemu-ppc@nongnu.org
-S: Odd Fixes
+S: Orphan
 F: hw/ppc/ppc4*.c
 F: hw/i2c/ppc4xx_i2c.c
 F: include/hw/ppc/ppc4xx.h
@@ -1798,21 +1850,21 @@ F: include/hw/sd/sd*
 F: hw/sd/core.c
 F: hw/sd/sd*
 F: hw/sd/ssi-sd.c
-F: tests/qtest/sd*
+F: tests/qtest/fuzz-sdcard-test.c
+F: tests/qtest/sdhci-test.c
 
 USB
 M: Gerd Hoffmann <kraxel@redhat.com>
-S: Maintained
+S: Odd Fixes
 F: hw/usb/*
+F: stubs/usb-dev-stub.c
 F: tests/qtest/usb-*-test.c
-F: docs/usb2.txt
-F: docs/usb-storage.txt
+F: docs/system/devices/usb.rst
 F: include/hw/usb.h
 F: include/hw/usb/
-F: default-configs/usb.mak
 
 USB (serial adapter)
-M: Gerd Hoffmann <kraxel@redhat.com>
+R: Gerd Hoffmann <kraxel@redhat.com>
 M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 S: Maintained
 F: hw/usb/dev-serial.c
@@ -1823,9 +1875,9 @@ S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/
 F: docs/igd-assign.txt
+F: docs/devel/vfio-migration.rst
 
 vfio-ccw
-M: Cornelia Huck <cohuck@redhat.com>
 M: Eric Farman <farman@linux.ibm.com>
 M: Matthew Rosato <mjrosato@linux.ibm.com>
 S: Supported
@@ -1833,7 +1885,6 @@ F: hw/vfio/ccw.c
 F: hw/s390x/s390-ccw.c
 F: include/hw/s390x/s390-ccw.h
 F: include/hw/s390x/vfio-ccw.h
-T: git https://gitlab.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
 vfio-ap
@@ -1881,6 +1932,7 @@ virtio-9p
 M: Greg Kurz <groug@kaod.org>
 M: Christian Schoenebeck <qemu_oss@crudebyte.com>
 S: Odd Fixes
+W: https://wiki.qemu.org/Documentation/9p
 F: hw/9pfs/
 X: hw/9pfs/xen-9p*
 F: fsdev/
@@ -1920,7 +1972,7 @@ L: virtio-fs@redhat.com
 
 virtio-input
 M: Gerd Hoffmann <kraxel@redhat.com>
-S: Maintained
+S: Odd Fixes
 F: hw/input/vhost-user-input.c
 F: hw/input/virtio-input*.c
 F: include/hw/virtio/virtio-input.h
@@ -1951,6 +2003,15 @@ F: include/sysemu/rng*.h
 F: backends/rng*.c
 F: tests/qtest/virtio-rng-test.c
 
+vhost-user-rng
+M: Mathieu Poirier <mathieu.poirier@linaro.org>
+S: Supported
+F: docs/tools/vhost-user-rng.rst
+F: hw/virtio/vhost-user-rng.c
+F: hw/virtio/vhost-user-rng-pci.c
+F: include/hw/virtio/vhost-user-rng.h
+F: tools/vhost-user-rng/*
+
 virtio-crypto
 M: Gonglei <arei.gonglei@huawei.com>
 S: Supported
@@ -1972,7 +2033,7 @@ M: Keith Busch <kbusch@kernel.org>
 M: Klaus Jensen <its@irrelevant.dk>
 L: qemu-block@nongnu.org
 S: Supported
-F: hw/block/nvme*
+F: hw/nvme/*
 F: include/block/nvme.h
 F: tests/qtest/nvme-test.c
 F: docs/system/nvme.rst
@@ -2017,6 +2078,7 @@ F: hw/acpi/nvdimm.c
 F: hw/mem/nvdimm.c
 F: include/hw/mem/nvdimm.h
 F: docs/nvdimm.txt
+F: docs/specs/acpi_nvdimm.rst
 
 e1000x
 M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
@@ -2040,6 +2102,12 @@ S: Maintained
 F: hw/net/tulip.c
 F: hw/net/tulip.h
 
+pca954x
+M: Patrick Venture <venture@google.com>
+S: Maintained
+F: hw/i2c/i2c_mux_pca954x.c
+F: include/hw/i2c/i2c_mux_pca954x.h
+
 Generic Loader
 M: Alistair Francis <alistair@alistair23.me>
 S: Maintained
@@ -2052,7 +2120,7 @@ M: Alex Bennée <alex.bennee@linaro.org>
 S: Maintained
 F: hw/core/guest-loader.c
 F: docs/system/guest-loader.rst
-F: tests/acceptance/boot_xen.py
+F: tests/avocado/boot_xen.py
 
 Intel Hexadecimal Object File Loader
 M: Su Hang <suhang16@mails.ucas.ac.cn>
@@ -2111,7 +2179,7 @@ F: include/hw/display/ramfb.h
 
 virtio-gpu
 M: Gerd Hoffmann <kraxel@redhat.com>
-S: Maintained
+S: Odd Fixes
 F: hw/display/virtio-gpu*
 F: hw/display/virtio-vga.*
 F: include/hw/virtio/virtio-gpu.h
@@ -2130,7 +2198,7 @@ F: include/hw/virtio/vhost-user-scsi.h
 
 vhost-user-gpu
 M: Marc-André Lureau <marcandre.lureau@redhat.com>
-M: Gerd Hoffmann <kraxel@redhat.com>
+R: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: docs/interop/vhost-user-gpu.rst
 F: contrib/vhost-user-gpu
@@ -2158,7 +2226,6 @@ F: include/hw/southbridge/piix.h
 
 Firmware configuration (fw_cfg)
 M: Philippe Mathieu-Daudé <philmd@redhat.com>
-R: Laszlo Ersek <lersek@redhat.com>
 R: Gerd Hoffmann <kraxel@redhat.com>
 S: Supported
 F: docs/specs/fw_cfg.txt
@@ -2172,8 +2239,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next
 
 XIVE
 M: Cédric Le Goater <clg@kaod.org>
-R: David Gibson <david@gibson.dropbear.id.au>
-R: Greg Kurz <groug@kaod.org>
 L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/*/*xive*
@@ -2209,22 +2274,84 @@ F: net/can/*
 F: hw/net/can/*
 F: include/net/can_*.h
 
+OpenPIC interrupt controller
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+S: Odd Fixes
+F: hw/intc/openpic.c
+F: include/hw/ppc/openpic.h
+
+MIPS CPS
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Odd Fixes
+F: hw/misc/mips_*
+F: include/hw/misc/mips_*
+
+MIPS GIC
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Odd Fixes
+F: hw/intc/mips_gic.c
+F: hw/timer/mips_gictimer.c
+F: include/hw/intc/mips_gic.h
+F: include/hw/timer/mips_gictimer.h
+
 Subsystems
 ----------
-Audio
+Overall Audio backends
 M: Gerd Hoffmann <kraxel@redhat.com>
-S: Maintained
+S: Odd Fixes
 F: audio/
-F: hw/audio/
-F: include/hw/audio/
+X: audio/alsaaudio.c
+X: audio/coreaudio.c
+X: audio/dsound*
+X: audio/jackaudio.c
+X: audio/ossaudio.c
+X: audio/paaudio.c
+X: audio/sdlaudio.c
+X: audio/spiceaudio.c
 F: qapi/audio.json
-F: tests/qtest/ac97-test.c
-F: tests/qtest/es1370-test.c
-F: tests/qtest/intel-hda-test.c
+
+ALSA Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Odd Fixes
+F: audio/alsaaudio.c
+
+Core Audio framework backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Odd Fixes
+F: audio/coreaudio.c
+
+DSound Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/dsound*
+
+JACK Audio Connection Kit backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Christian Schoenebeck <qemu_oss@crudebyte.com>
+S: Odd Fixes
+F: audio/jackaudio.c
+
+Open Sound System (OSS) Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/ossaudio.c
+
+PulseAudio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Odd Fixes
+F: audio/paaudio.c
+
+SDL Audio backend
+M: Gerd Hoffmann <kraxel@redhat.com>
+R: Thomas Huth <huth@tuxfamily.org>
+S: Odd Fixes
+F: audio/sdlaudio.c
 
 Block layer core
 M: Kevin Wolf <kwolf@redhat.com>
-M: Max Reitz <mreitz@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block*
@@ -2291,8 +2418,8 @@ F: block/mirror.c
 F: qapi/job.json
 F: block/block-copy.c
 F: include/block/block-copy.c
-F: block/backup-top.h
-F: block/backup-top.c
+F: block/copy-before-write.h
+F: block/copy-before-write.c
 F: include/block/aio_task.h
 F: block/aio_task.c
 F: util/qemu-co-shared-resource.c
@@ -2405,22 +2532,27 @@ F: tests/tcg/multiarch/gdbstub/
 
 Memory API
 M: Paolo Bonzini <pbonzini@redhat.com>
+M: Peter Xu <peterx@redhat.com>
+M: David Hildenbrand <david@redhat.com>
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
 S: Supported
 F: include/exec/ioport.h
 F: include/exec/memop.h
 F: include/exec/memory.h
 F: include/exec/ram_addr.h
 F: include/exec/ramblock.h
+F: include/sysemu/memory_mapping.h
 F: softmmu/dma-helpers.c
 F: softmmu/ioport.c
 F: softmmu/memory.c
+F: softmmu/memory_mapping.c
 F: softmmu/physmem.c
 F: include/exec/memory-internal.h
 F: scripts/coccinelle/memory-region-housekeeping.cocci
 
 SPICE
 M: Gerd Hoffmann <kraxel@redhat.com>
-S: Supported
+S: Odd Fixes
 F: include/ui/qemu-spice.h
 F: include/ui/spice-display.h
 F: ui/spice-*.c
@@ -2445,6 +2577,7 @@ F: ui/cocoa.m
 Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
+F: include/exec/gen-icount.h
 F: include/qemu/main-loop.h
 F: include/sysemu/runstate.h
 F: include/sysemu/runstate-action.h
@@ -2499,7 +2632,7 @@ S: Maintained
 F: net/netmap.c
 
 Host Memory Backends
-M: Eduardo Habkost <ehabkost@redhat.com>
+M: David Hildenbrand <david@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
 S: Maintained
 F: backends/hostmem*.c
@@ -2515,13 +2648,13 @@ F: backends/cryptodev*.c
 Python library
 M: John Snow <jsnow@redhat.com>
 M: Cleber Rosa <crosa@redhat.com>
-R: Eduardo Habkost <ehabkost@redhat.com>
+R: Eduardo Habkost <eduardo@habkost.net>
 S: Maintained
 F: python/
 T: git https://gitlab.com/jsnow/qemu.git python
 
 Python scripts
-M: Eduardo Habkost <ehabkost@redhat.com>
+M: Eduardo Habkost <eduardo@habkost.net>
 M: Cleber Rosa <crosa@redhat.com>
 S: Odd Fixes
 F: scripts/*.py
@@ -2531,6 +2664,13 @@ Benchmark util
 M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
 S: Maintained
 F: scripts/simplebench/
+T: git https://src.openvz.org/scm/~vsementsov/qemu.git simplebench
+
+Transactions helper
+M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+S: Maintained
+F: include/qemu/transactions.h
+F: util/transactions.c
 
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
@@ -2590,7 +2730,7 @@ T: git https://github.com/mdroth/qemu.git qga
 QOM
 M: Paolo Bonzini <pbonzini@redhat.com>
 R: Daniel P. Berrange <berrange@redhat.com>
-R: Eduardo Habkost <ehabkost@redhat.com>
+R: Eduardo Habkost <eduardo@habkost.net>
 S: Supported
 F: docs/qdev-device-use.txt
 F: hw/core/qdev*
@@ -2610,7 +2750,7 @@ F: tests/unit/check-qom-proplist.c
 F: tests/unit/test-qdev-global-props.c
 
 QOM boilerplate conversion script
-M: Eduardo Habkost <ehabkost@redhat.com>
+M: Eduardo Habkost <eduardo@habkost.net>
 S: Maintained
 F: scripts/codeconverter/
 
@@ -2648,6 +2788,8 @@ R: Paolo Bonzini <pbonzini@redhat.com>
 R: Bandan Das <bsd@redhat.com>
 R: Stefan Hajnoczi <stefanha@redhat.com>
 R: Thomas Huth <thuth@redhat.com>
+R: Darren Kenny <darren.kenny@oracle.com> 
+R: Qiuhao Li <Qiuhao.Li@outlook.com>
 S: Maintained
 F: tests/qtest/fuzz/
 F: tests/qtest/fuzz-*test.c
@@ -2692,14 +2834,13 @@ F: scripts/tracetool.py
 F: scripts/tracetool/
 F: scripts/qemu-trace-stap*
 F: docs/tools/qemu-trace-stap.rst
-F: docs/devel/tracing.txt
+F: docs/devel/tracing.rst
 T: git https://github.com/stefanha/qemu.git tracing
 
 TPM
 M: Stefan Berger <stefanb@linux.ibm.com>
 S: Maintained
 F: tpm.c
-F: stubs/tpm.c
 F: hw/tpm/*
 F: include/hw/acpi/tpm.h
 F: include/sysemu/tpm*
@@ -2786,7 +2927,6 @@ F: tests/unit/test-authz-*
 
 Sockets
 M: Daniel P. Berrange <berrange@redhat.com>
-M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: include/qemu/sockets.h
 F: util/qemu-sockets.c
@@ -2854,8 +2994,9 @@ F: net/filter-replay.c
 F: include/sysemu/replay.h
 F: docs/replay.txt
 F: stubs/replay.c
-F: tests/acceptance/replay_kernel.py
-F: tests/acceptance/reverse_debugging.py
+F: tests/avocado/replay_kernel.py
+F: tests/avocado/replay_linux.py
+F: tests/avocado/reverse_debugging.py
 F: qapi/replay.json
 
 IOVA Tree
@@ -2882,7 +3023,6 @@ F: include/hw/i2c/smbus_slave.h
 F: include/hw/i2c/smbus_eeprom.h
 
 Firmware schema specifications
-M: Laszlo Ersek <lersek@redhat.com>
 M: Philippe Mathieu-Daudé <philmd@redhat.com>
 R: Daniel P. Berrange <berrange@redhat.com>
 R: Kashyap Chamarthy <kchamart@redhat.com>
@@ -2890,9 +3030,10 @@ S: Maintained
 F: docs/interop/firmware.json
 
 EDK2 Firmware
-M: Laszlo Ersek <lersek@redhat.com>
 M: Philippe Mathieu-Daudé <philmd@redhat.com>
+R: Gerd Hoffmann <kraxel@redhat.com>
 S: Supported
+F: hw/i386/*ovmf*
 F: pc-bios/descriptors/??-edk2-*.json
 F: pc-bios/edk2-*
 F: roms/Makefile.edk2
@@ -2943,14 +3084,14 @@ M: Warner Losh <imp@bsdimp.com>
 R: Kyle Evans <kevans@freebsd.org>
 S: Maintained
 F: bsd-user/
-F: default-configs/targets/*-bsd-user.mak
+F: configs/targets/*-bsd-user.mak
 T: git https://github.com/qemu-bsd-user/qemu-bsd-user bsd-user-rebase-3.1
 
 Linux user
 M: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: linux-user/
-F: default-configs/targets/*linux-user.mak
+F: configs/targets/*linux-user.mak
 F: scripts/qemu-binfmt-conf.sh
 F: scripts/update-syscalltbl.sh
 F: scripts/update-mips-syscall-args.sh
@@ -2966,11 +3107,13 @@ F: include/tcg/
 
 TCG Plugins
 M: Alex Bennée <alex.bennee@linaro.org>
+R: Alexandre Iooss <erdnaxe@crans.org>
+R: Mahmoud Mandour <ma.mandourr@gmail.com>
 S: Maintained
 F: docs/devel/tcg-plugins.rst
 F: plugins/
 F: tests/plugin/
-F: tests/acceptance/tcg_plugins.py
+F: tests/avocado/tcg_plugins.py
 F: contrib/plugins/
 
 AArch64 TCG target
@@ -2982,7 +3125,7 @@ F: disas/arm-a64.cc
 F: disas/libvixl/
 
 ARM TCG target
-M: Andrzej Zaborowski <balrogg@gmail.com>
+M: Richard Henderson <richard.henderson@linaro.org>
 S: Maintained
 L: qemu-arm@nongnu.org
 F: tcg/arm/
@@ -3045,17 +3188,12 @@ S: Supported
 F: block/vmdk.c
 
 RBD
-M: Jason Dillaman <dillaman@redhat.com>
+M: Ilya Dryomov <idryomov@gmail.com>
+R: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/rbd.c
 
-Sheepdog
-M: Liu Yuan <namei.unix@gmail.com>
-L: qemu-block@nongnu.org
-S: Odd Fixes
-F: block/sheepdog.c
-
 VHDX
 M: Jeff Cody <codyprime@gmail.com>
 L: qemu-block@nongnu.org
@@ -3124,6 +3262,7 @@ F: block/null.c
 NVMe Block Driver
 M: Stefan Hajnoczi <stefanha@redhat.com>
 R: Fam Zheng <fam@euphon.net>
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/nvme*
@@ -3203,6 +3342,7 @@ Linux io_uring
 M: Aarushi Mehta <mehta.aaru20@gmail.com>
 M: Julia Suvorova <jusual@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
+R: Stefano Garzarella <sgarzare@redhat.com>
 L: qemu-block@nongnu.org
 S: Maintained
 F: block/io_uring.c
@@ -3210,7 +3350,7 @@ F: stubs/io_uring.c
 
 qcow2
 M: Kevin Wolf <kwolf@redhat.com>
-M: Max Reitz <mreitz@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/qcow2*
@@ -3224,7 +3364,7 @@ F: block/qcow.c
 
 blkdebug
 M: Kevin Wolf <kwolf@redhat.com>
-M: Max Reitz <mreitz@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/blkdebug.c
@@ -3254,10 +3394,12 @@ F: block/export/vhost-user-blk-server.c
 F: block/export/vhost-user-blk-server.h
 F: include/qemu/vhost-user-server.h
 F: tests/qtest/libqos/vhost-user-blk.c
+F: tests/qtest/libqos/vhost-user-blk.h
+F: tests/qtest/vhost-user-blk-test.c
 F: util/vhost-user-server.c
 
 FUSE block device exports
-M: Max Reitz <mreitz@redhat.com>
+M: Hanna Reitz <hreitz@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/export/fuse.c
@@ -3312,6 +3454,14 @@ F: include/hw/remote/proxy-memory-listener.h
 F: hw/remote/iohub.c
 F: include/hw/remote/iohub.h
 
+EBPF:
+M: Jason Wang <jasowang@redhat.com>
+R: Andrew Melnychenko <andrew@daynix.com>
+R: Yuri Benditovich <yuri.benditovich@daynix.com>
+S: Maintained
+F: ebpf/*
+F: tools/ebpf/*
+
 Build and test automation
 -------------------------
 Build and test automation, general continuous integration
@@ -3319,7 +3469,7 @@ M: Alex Bennée <alex.bennee@linaro.org>
 M: Philippe Mathieu-Daudé <f4bug@amsat.org>
 M: Thomas Huth <thuth@redhat.com>
 R: Wainer dos Santos Moschetta <wainersm@redhat.com>
-R: Willian Rampazzo <willianr@redhat.com>
+R: Beraldo Leal <bleal@redhat.com>
 S: Maintained
 F: .github/lockdown.yml
 F: .gitlab-ci.yml
@@ -3352,20 +3502,27 @@ S: Maintained
 F: tests/tcg/Makefile
 F: tests/tcg/Makefile.include
 
-Acceptance (Integration) Testing with the Avocado framework
+Integration Testing with the Avocado framework
 W: https://trello.com/b/6Qi1pxVn/avocado-qemu
 R: Cleber Rosa <crosa@redhat.com>
 R: Philippe Mathieu-Daudé <philmd@redhat.com>
 R: Wainer dos Santos Moschetta <wainersm@redhat.com>
+R: Beraldo Leal <bleal@redhat.com>
 S: Odd Fixes
-F: tests/acceptance/
+F: tests/avocado/
+
+GitLab custom runner (Works On Arm Sponsored)
+M: Alex Bennée <alex.bennee@linaro.org>
+M: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Maintained
+F: .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml
 
 Documentation
 -------------
 Build system architecture
 M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
-F: docs/devel/build-system.txt
+F: docs/devel/build-system.rst
 
 GIT Data Mining Config
 M: Alex Bennée <alex.bennee@linaro.org>
@@ -3375,7 +3532,7 @@ F: contrib/gitdm/*
 
 Incompatible changes
 R: libvir-list@redhat.com
-F: docs/system/deprecated.rst
+F: docs/about/deprecated.rst
 
 Build System
 ------------
@@ -3394,6 +3551,7 @@ S: Maintained
 F: docs/conf.py
 F: docs/*/conf.py
 F: docs/sphinx/
+F: docs/_templates/
 
 Miscellaneous
 -------------
diff --git a/Makefile b/Makefile
index bcbbec71a1c..74c5b46d38b 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,7 @@ SRC_PATH=.
 # we have explicit rules for everything
 MAKEFLAGS += -rR
 
-SHELL = /usr/bin/env bash -o pipefail
+SHELL = bash -o pipefail
 
 # Usage: $(call quiet-command,command and args,"NAME","args to print")
 # This will run "command and args", and either:
@@ -48,9 +48,11 @@ Makefile: .git-submodule-status
 
 .PHONY: git-submodule-update
 git-submodule-update:
+ifneq ($(GIT_SUBMODULES_ACTION),ignore)
 	$(call quiet-command, \
 		(GIT="$(GIT)" "$(SRC_PATH)/scripts/git-submodule.sh" $(GIT_SUBMODULES_ACTION) $(GIT_SUBMODULES)), \
 		"GIT","$(GIT_SUBMODULES)")
+endif
 
 # 0. ensure the build tree is okay
 
@@ -85,7 +87,7 @@ x := $(shell rm -rf meson-private meson-info meson-logs)
 endif
 
 # 1. ensure config-host.mak is up-to-date
-config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh $(SRC_PATH)/pc-bios $(SRC_PATH)/VERSION
 	@echo config-host.mak is out-of-date, running configure
 	@if test -f meson-private/coredata.dat; then \
 	  ./config.status --skip-meson; \
@@ -122,14 +124,22 @@ ifneq ($(MESON),)
 Makefile.mtest: build.ninja scripts/mtest2make.py
 	$(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@
 -include Makefile.mtest
+
+.PHONY: update-buildoptions
+all update-buildoptions: $(SRC_PATH)/scripts/meson-buildoptions.sh
+$(SRC_PATH)/scripts/meson-buildoptions.sh: $(SRC_PATH)/meson_options.txt
+	$(MESON) introspect --buildoptions $(SRC_PATH)/meson.build | $(PYTHON) \
+	  scripts/meson-buildoptions.py > $@.tmp && mv $@.tmp $@
 endif
 
 # 4. Rules to bridge to other makefiles
 
 ifneq ($(NINJA),)
-MAKE.n = $(findstring n,$(firstword $(MAKEFLAGS)))
-MAKE.k = $(findstring k,$(firstword $(MAKEFLAGS)))
-MAKE.q = $(findstring q,$(firstword $(MAKEFLAGS)))
+# Filter out long options to avoid flags like --no-print-directory which
+# may result in false positive match for MAKE.n
+MAKE.n = $(findstring n,$(firstword $(filter-out --%,$(MAKEFLAGS))))
+MAKE.k = $(findstring k,$(firstword $(filter-out --%,$(MAKEFLAGS))))
+MAKE.q = $(findstring q,$(firstword $(filter-out --%,$(MAKEFLAGS))))
 MAKE.nq = $(if $(word 2, $(MAKE.n) $(MAKE.q)),nq)
 NINJAFLAGS = $(if $V,-v) $(if $(MAKE.n), -n) $(if $(MAKE.k), -k0) \
         $(filter-out -j, $(lastword -j1 $(filter -l% -j%, $(MAKEFLAGS)))) \
@@ -213,7 +223,7 @@ qemu-%.tar.bz2:
 
 distclean: clean
 	-$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || :
-	rm -f config-host.mak config-host.h*
+	rm -f config-host.mak config-host.h* config-poison.h
 	rm -f tests/tcg/config-*.mak
 	rm -f config-all-disas.mak config.status
 	rm -f roms/seabios/config.mak roms/vgabios/config.mak
@@ -225,7 +235,8 @@ distclean: clean
 	rm -f linux-headers/asm
 	rm -Rf .sdk
 
-find-src-path = find "$(SRC_PATH)/" -path "$(SRC_PATH)/meson" -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
+find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
+	-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
 
 .PHONY: ctags
 ctags:
@@ -246,7 +257,7 @@ gtags:
 		"GTAGS", "Remove old $@ files")
 	$(call quiet-command, 				\
 	        (cd $(SRC_PATH) && 			\
-		 $(find-src-path) | gtags -f -), 	\
+		 $(find-src-path) -print | gtags -f -),	\
 		"GTAGS", "Re-index $(SRC_PATH)")
 
 .PHONY: TAGS
diff --git a/README.rst b/README.rst
index 8fce5774685..668475b8224 100644
--- a/README.rst
+++ b/README.rst
@@ -82,9 +82,9 @@ of other UNIX targets. The simple steps to build QEMU are:
 
 Additional information can also be found online via the QEMU website:
 
-* `<https://qemu.org/Hosts/Linux>`_
-* `<https://qemu.org/Hosts/Mac>`_
-* `<https://qemu.org/Hosts/W32>`_
+* `<https://wiki.qemu.org/Hosts/Linux>`_
+* `<https://wiki.qemu.org/Hosts/Mac>`_
+* `<https://wiki.qemu.org/Hosts/W32>`_
 
 
 Submitting patches
@@ -107,8 +107,8 @@ the Developers Guide.
 Additional information on submitting patches can be found online via
 the QEMU website
 
-* `<https://qemu.org/Contribute/SubmitAPatch>`_
-* `<https://qemu.org/Contribute/TrivialPatches>`_
+* `<https://wiki.qemu.org/Contribute/SubmitAPatch>`_
+* `<https://wiki.qemu.org/Contribute/TrivialPatches>`_
 
 The QEMU website is also maintained under source control.
 
@@ -154,20 +154,20 @@ will be tagged as my-feature-v2.
 Bug reporting
 =============
 
-The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs
+The QEMU project uses GitLab issues to track bugs. Bugs
 found when running code built from QEMU git or upstream released sources
 should be reported via:
 
-* `<https://bugs.launchpad.net/qemu/>`_
+* `<https://gitlab.com/qemu-project/qemu/-/issues>`_
 
 If using QEMU via an operating system vendor pre-built binary package, it
 is preferable to report bugs to the vendor's own bug tracker first. If
 the bug is also known to affect latest upstream code, it can also be
-reported via launchpad.
+reported via GitLab.
 
 For additional information on bug reporting consult:
 
-* `<https://qemu.org/Contribute/ReportABug>`_
+* `<https://wiki.qemu.org/Contribute/ReportABug>`_
 
 
 ChangeLog
@@ -191,4 +191,4 @@ main methods being email and IRC
 Information on additional methods of contacting the community can be
 found online via the QEMU website:
 
-* `<https://qemu.org/Contribute/StartHere>`_
+* `<https://wiki.qemu.org/Contribute/StartHere>`_
diff --git a/VERSION b/VERSION
index 09b254e90c6..6abaeb2f907 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-6.0.0
+6.2.0
diff --git a/accel/Kconfig b/accel/Kconfig
index 461104c7715..8bdedb7d15f 100644
--- a/accel/Kconfig
+++ b/accel/Kconfig
@@ -1,6 +1,9 @@
 config WHPX
     bool
 
+config NVMM
+    bool
+
 config HAX
     bool
 
diff --git a/accel/accel-common.c b/accel/accel-common.c
index 9901b0531c4..7b8ec7e0f72 100644
--- a/accel/accel-common.c
+++ b/accel/accel-common.c
@@ -44,7 +44,7 @@ static const TypeInfo accel_type = {
 AccelClass *accel_find(const char *opt_name)
 {
     char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
-    AccelClass *ac = ACCEL_CLASS(object_class_by_name(class_name));
+    AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name));
     g_free(class_name);
     return ac;
 }
@@ -54,10 +54,23 @@ static void accel_init_cpu_int_aux(ObjectClass *klass, void *opaque)
     CPUClass *cc = CPU_CLASS(klass);
     AccelCPUClass *accel_cpu = opaque;
 
+    /*
+     * The first callback allows accel-cpu to run initializations
+     * for the CPU, customizing CPU behavior according to the accelerator.
+     *
+     * The second one allows the CPU to customize the accel-cpu
+     * behavior according to the CPU.
+     *
+     * The second is currently only used by TCG, to specialize the
+     * TCGCPUOps depending on the CPU type.
+     */
     cc->accel_cpu = accel_cpu;
     if (accel_cpu->cpu_class_init) {
         accel_cpu->cpu_class_init(cc);
     }
+    if (cc->init_accel_cpu) {
+        cc->init_accel_cpu(accel_cpu, cc);
+    }
 }
 
 /* initialize the arch-specific accel CpuClass interfaces */
@@ -89,6 +102,25 @@ void accel_init_interfaces(AccelClass *ac)
     accel_init_cpu_interfaces(ac);
 }
 
+void accel_cpu_instance_init(CPUState *cpu)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->accel_cpu && cc->accel_cpu->cpu_instance_init) {
+        cc->accel_cpu->cpu_instance_init(cpu);
+    }
+}
+
+bool accel_cpu_realizefn(CPUState *cpu, Error **errp)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->accel_cpu && cc->accel_cpu->cpu_realizefn) {
+        return cc->accel_cpu->cpu_realizefn(cpu, errp);
+    }
+    return true;
+}
+
 static const TypeInfo accel_cpu_type = {
     .name = TYPE_ACCEL_CPU,
     .parent = TYPE_OBJECT,
diff --git a/accel/accel-softmmu.c b/accel/accel-softmmu.c
index 50fa5acaa40..67276e4f522 100644
--- a/accel/accel-softmmu.c
+++ b/accel/accel-softmmu.c
@@ -72,7 +72,7 @@ void accel_init_ops_interfaces(AccelClass *ac)
     g_assert(ac_name != NULL);
 
     ops_name = g_strdup_printf("%s" ACCEL_OPS_SUFFIX, ac_name);
-    ops = ACCEL_OPS_CLASS(object_class_by_name(ops_name));
+    ops = ACCEL_OPS_CLASS(module_object_class_by_name(ops_name));
     g_free(ops_name);
 
     /*
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
new file mode 100644
index 00000000000..54457c76c2f
--- /dev/null
+++ b/accel/hvf/hvf-accel-ops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright 2008 IBM Corporation
+ *           2008 Red Hat, Inc.
+ * Copyright 2011 Intel Corporation
+ * Copyright 2016 Veertu, Inc.
+ * Copyright 2017 The Android Open Source Project
+ *
+ * QEMU Hypervisor.framework support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file contain code under public domain from the hvdos project:
+ * https://github.com/mist64/hvdos
+ *
+ * Parts Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "exec/address-spaces.h"
+#include "exec/exec-all.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/runstate.h"
+#include "qemu/guest-random.h"
+
+HVFState *hvf_state;
+
+#ifdef __aarch64__
+#define HV_VM_DEFAULT NULL
+#endif
+
+/* Memory slots */
+
+hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
+{
+    hvf_slot *slot;
+    int x;
+    for (x = 0; x < hvf_state->num_slots; ++x) {
+        slot = &hvf_state->slots[x];
+        if (slot->size && start < (slot->start + slot->size) &&
+            (start + size) > slot->start) {
+            return slot;
+        }
+    }
+    return NULL;
+}
+
+struct mac_slot {
+    int present;
+    uint64_t size;
+    uint64_t gpa_start;
+    uint64_t gva;
+};
+
+struct mac_slot mac_slots[32];
+
+static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
+{
+    struct mac_slot *macslot;
+    hv_return_t ret;
+
+    macslot = &mac_slots[slot->slot_id];
+
+    if (macslot->present) {
+        if (macslot->size != slot->size) {
+            macslot->present = 0;
+            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
+            assert_hvf_ok(ret);
+        }
+    }
+
+    if (!slot->size) {
+        return 0;
+    }
+
+    macslot->present = 1;
+    macslot->gpa_start = slot->start;
+    macslot->size = slot->size;
+    ret = hv_vm_map(slot->mem, slot->start, slot->size, flags);
+    assert_hvf_ok(ret);
+    return 0;
+}
+
+static void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
+{
+    hvf_slot *mem;
+    MemoryRegion *area = section->mr;
+    bool writeable = !area->readonly && !area->rom_device;
+    hv_memory_flags_t flags;
+    uint64_t page_size = qemu_real_host_page_size;
+
+    if (!memory_region_is_ram(area)) {
+        if (writeable) {
+            return;
+        } else if (!memory_region_is_romd(area)) {
+            /*
+             * If the memory device is not in romd_mode, then we actually want
+             * to remove the hvf memory slot so all accesses will trap.
+             */
+             add = false;
+        }
+    }
+
+    if (!QEMU_IS_ALIGNED(int128_get64(section->size), page_size) ||
+        !QEMU_IS_ALIGNED(section->offset_within_address_space, page_size)) {
+        /* Not page aligned, so we can not map as RAM */
+        add = false;
+    }
+
+    mem = hvf_find_overlap_slot(
+            section->offset_within_address_space,
+            int128_get64(section->size));
+
+    if (mem && add) {
+        if (mem->size == int128_get64(section->size) &&
+            mem->start == section->offset_within_address_space &&
+            mem->mem == (memory_region_get_ram_ptr(area) +
+            section->offset_within_region)) {
+            return; /* Same region was attempted to register, go away. */
+        }
+    }
+
+    /* Region needs to be reset. set the size to 0 and remap it. */
+    if (mem) {
+        mem->size = 0;
+        if (do_hvf_set_memory(mem, 0)) {
+            error_report("Failed to reset overlapping slot");
+            abort();
+        }
+    }
+
+    if (!add) {
+        return;
+    }
+
+    if (area->readonly ||
+        (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
+        flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
+    } else {
+        flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
+    }
+
+    /* Now make a new slot. */
+    int x;
+
+    for (x = 0; x < hvf_state->num_slots; ++x) {
+        mem = &hvf_state->slots[x];
+        if (!mem->size) {
+            break;
+        }
+    }
+
+    if (x == hvf_state->num_slots) {
+        error_report("No free slots");
+        abort();
+    }
+
+    mem->size = int128_get64(section->size);
+    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
+    mem->start = section->offset_within_address_space;
+    mem->region = area;
+
+    if (do_hvf_set_memory(mem, flags)) {
+        error_report("Error registering new memory slot");
+        abort();
+    }
+}
+
+static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
+{
+    if (!cpu->vcpu_dirty) {
+        hvf_get_registers(cpu);
+        cpu->vcpu_dirty = true;
+    }
+}
+
+static void hvf_cpu_synchronize_state(CPUState *cpu)
+{
+    if (!cpu->vcpu_dirty) {
+        run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
+    }
+}
+
+static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu,
+                                             run_on_cpu_data arg)
+{
+    /* QEMU state is the reference, push it to HVF now and on next entry */
+    cpu->vcpu_dirty = true;
+}
+
+static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
+}
+
+static void hvf_cpu_synchronize_post_init(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
+}
+
+static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
+}
+
+static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
+{
+    hvf_slot *slot;
+
+    slot = hvf_find_overlap_slot(
+            section->offset_within_address_space,
+            int128_get64(section->size));
+
+    /* protect region against writes; begin tracking it */
+    if (on) {
+        slot->flags |= HVF_SLOT_LOG;
+        hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+                      HV_MEMORY_READ | HV_MEMORY_EXEC);
+    /* stop tracking region*/
+    } else {
+        slot->flags &= ~HVF_SLOT_LOG;
+        hv_vm_protect((uintptr_t)slot->start, (size_t)slot->size,
+                      HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
+    }
+}
+
+static void hvf_log_start(MemoryListener *listener,
+                          MemoryRegionSection *section, int old, int new)
+{
+    if (old != 0) {
+        return;
+    }
+
+    hvf_set_dirty_tracking(section, 1);
+}
+
+static void hvf_log_stop(MemoryListener *listener,
+                         MemoryRegionSection *section, int old, int new)
+{
+    if (new != 0) {
+        return;
+    }
+
+    hvf_set_dirty_tracking(section, 0);
+}
+
+static void hvf_log_sync(MemoryListener *listener,
+                         MemoryRegionSection *section)
+{
+    /*
+     * sync of dirty pages is handled elsewhere; just make sure we keep
+     * tracking the region.
+     */
+    hvf_set_dirty_tracking(section, 1);
+}
+
+static void hvf_region_add(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    hvf_set_phys_mem(section, true);
+}
+
+static void hvf_region_del(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    hvf_set_phys_mem(section, false);
+}
+
+static MemoryListener hvf_memory_listener = {
+    .name = "hvf",
+    .priority = 10,
+    .region_add = hvf_region_add,
+    .region_del = hvf_region_del,
+    .log_start = hvf_log_start,
+    .log_stop = hvf_log_stop,
+    .log_sync = hvf_log_sync,
+};
+
+static void dummy_signal(int sig)
+{
+}
+
+bool hvf_allowed;
+
+static int hvf_accel_init(MachineState *ms)
+{
+    int x;
+    hv_return_t ret;
+    HVFState *s;
+
+    ret = hv_vm_create(HV_VM_DEFAULT);
+    assert_hvf_ok(ret);
+
+    s = g_new0(HVFState, 1);
+
+    s->num_slots = ARRAY_SIZE(s->slots);
+    for (x = 0; x < s->num_slots; ++x) {
+        s->slots[x].size = 0;
+        s->slots[x].slot_id = x;
+    }
+
+    hvf_state = s;
+    memory_listener_register(&hvf_memory_listener, &address_space_memory);
+
+    return hvf_arch_init();
+}
+
+static void hvf_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "HVF";
+    ac->init_machine = hvf_accel_init;
+    ac->allowed = &hvf_allowed;
+}
+
+static const TypeInfo hvf_accel_type = {
+    .name = TYPE_HVF_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = hvf_accel_class_init,
+};
+
+static void hvf_type_init(void)
+{
+    type_register_static(&hvf_accel_type);
+}
+
+type_init(hvf_type_init);
+
+static void hvf_vcpu_destroy(CPUState *cpu)
+{
+    hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd);
+    assert_hvf_ok(ret);
+
+    hvf_arch_vcpu_destroy(cpu);
+    g_free(cpu->hvf);
+    cpu->hvf = NULL;
+}
+
+static int hvf_init_vcpu(CPUState *cpu)
+{
+    int r;
+
+    cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
+
+    /* init cpu signals */
+    struct sigaction sigact;
+
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = dummy_signal;
+    sigaction(SIG_IPI, &sigact, NULL);
+
+    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
+    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
+
+#ifdef __aarch64__
+    r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
+#else
+    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT);
+#endif
+    cpu->vcpu_dirty = 1;
+    assert_hvf_ok(r);
+
+    return hvf_arch_init_vcpu(cpu);
+}
+
+/*
+ * The HVF-specific vCPU thread function. This one should only run when the host
+ * CPU supports the VMX "unrestricted guest" feature.
+ */
+static void *hvf_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    int r;
+
+    assert(hvf_enabled());
+
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    current_cpu = cpu;
+
+    hvf_init_vcpu(cpu);
+
+    /* signal CPU creation */
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    do {
+        if (cpu_can_run(cpu)) {
+            r = hvf_vcpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+        qemu_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    hvf_vcpu_destroy(cpu);
+    cpu_thread_signal_destroyed(cpu);
+    qemu_mutex_unlock_iothread();
+    rcu_unregister_thread();
+    return NULL;
+}
+
+static void hvf_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    /*
+     * HVF currently does not support TCG, and only runs in
+     * unrestricted-guest mode.
+     */
+    assert(hvf_enabled());
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, hvf_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+}
+
+static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
+{
+    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
+
+    ops->create_vcpu_thread = hvf_start_vcpu_thread;
+    ops->kick_vcpu_thread = hvf_kick_vcpu_thread;
+
+    ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
+    ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
+    ops->synchronize_state = hvf_cpu_synchronize_state;
+    ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm;
+};
+static const TypeInfo hvf_accel_ops_type = {
+    .name = ACCEL_OPS_NAME("hvf"),
+
+    .parent = TYPE_ACCEL_OPS,
+    .class_init = hvf_accel_ops_class_init,
+    .abstract = true,
+};
+static void hvf_accel_ops_register_types(void)
+{
+    type_register_static(&hvf_accel_ops_type);
+}
+type_init(hvf_accel_ops_register_types);
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
new file mode 100644
index 00000000000..f185b0830a7
--- /dev/null
+++ b/accel/hvf/hvf-all.c
@@ -0,0 +1,47 @@
+/*
+ * QEMU Hypervisor.framework support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+
+void assert_hvf_ok(hv_return_t ret)
+{
+    if (ret == HV_SUCCESS) {
+        return;
+    }
+
+    switch (ret) {
+    case HV_ERROR:
+        error_report("Error: HV_ERROR");
+        break;
+    case HV_BUSY:
+        error_report("Error: HV_BUSY");
+        break;
+    case HV_BAD_ARGUMENT:
+        error_report("Error: HV_BAD_ARGUMENT");
+        break;
+    case HV_NO_RESOURCES:
+        error_report("Error: HV_NO_RESOURCES");
+        break;
+    case HV_NO_DEVICE:
+        error_report("Error: HV_NO_DEVICE");
+        break;
+    case HV_UNSUPPORTED:
+        error_report("Error: HV_UNSUPPORTED");
+        break;
+    default:
+        error_report("Unknown Error");
+    }
+
+    abort();
+}
diff --git a/accel/hvf/meson.build b/accel/hvf/meson.build
new file mode 100644
index 00000000000..fc52cb78433
--- /dev/null
+++ b/accel/hvf/meson.build
@@ -0,0 +1,7 @@
+hvf_ss = ss.source_set()
+hvf_ss.add(files(
+  'hvf-all.c',
+  'hvf-accel-ops.c',
+))
+
+specific_ss.add_all(when: 'CONFIG_HVF', if_true: hvf_ss)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index b6d9f92f151..eecd8031cf6 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -15,6 +15,7 @@
 
 #include "qemu/osdep.h"
 #include <sys/ioctl.h>
+#include <poll.h>
 
 #include <linux/kvm.h>
 
@@ -30,11 +31,9 @@
 #include "sysemu/kvm_int.h"
 #include "sysemu/runstate.h"
 #include "sysemu/cpus.h"
-#include "sysemu/sysemu.h"
 #include "qemu/bswap.h"
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
-#include "exec/address-spaces.h"
 #include "qemu/event_notifier.h"
 #include "qemu/main-loop.h"
 #include "trace.h"
@@ -80,6 +79,25 @@ struct KVMParkedVcpu {
     QLIST_ENTRY(KVMParkedVcpu) node;
 };
 
+enum KVMDirtyRingReaperState {
+    KVM_DIRTY_RING_REAPER_NONE = 0,
+    /* The reaper is sleeping */
+    KVM_DIRTY_RING_REAPER_WAIT,
+    /* The reaper is reaping for dirty pages */
+    KVM_DIRTY_RING_REAPER_REAPING,
+};
+
+/*
+ * KVM reaper instance, responsible for collecting the KVM dirty bits
+ * via the dirty ring.
+ */
+struct KVMDirtyRingReaper {
+    /* The reaper thread */
+    QemuThread reaper_thr;
+    volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
+    volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
+};
+
 struct KVMState
 {
     AccelState parent_obj;
@@ -128,6 +146,9 @@ struct KVMState
         KVMMemoryListener *ml;
         AddressSpace *as;
     } *as;
+    uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
+    uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
+    struct KVMDirtyRingReaper reaper;
 };
 
 KVMState *kvm_state;
@@ -174,8 +195,12 @@ typedef struct KVMResampleFd KVMResampleFd;
 static QLIST_HEAD(, KVMResampleFd) kvm_resample_fd_list =
     QLIST_HEAD_INITIALIZER(kvm_resample_fd_list);
 
-#define kvm_slots_lock(kml)      qemu_mutex_lock(&(kml)->slots_lock)
-#define kvm_slots_unlock(kml)    qemu_mutex_unlock(&(kml)->slots_lock)
+static QemuMutex kml_slots_lock;
+
+#define kvm_slots_lock()    qemu_mutex_lock(&kml_slots_lock)
+#define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
+
+static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
 
 static inline void kvm_resample_fd_remove(int gsi)
 {
@@ -241,9 +266,9 @@ bool kvm_has_free_slot(MachineState *ms)
     bool result;
     KVMMemoryListener *kml = &s->memory_listener;
 
-    kvm_slots_lock(kml);
+    kvm_slots_lock();
     result = !!kvm_get_free_slot(kml);
-    kvm_slots_unlock(kml);
+    kvm_slots_unlock();
 
     return result;
 }
@@ -309,7 +334,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
     KVMMemoryListener *kml = &s->memory_listener;
     int i, ret = 0;
 
-    kvm_slots_lock(kml);
+    kvm_slots_lock();
     for (i = 0; i < s->nr_slots; i++) {
         KVMSlot *mem = &kml->slots[i];
 
@@ -319,7 +344,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
             break;
         }
     }
-    kvm_slots_unlock(kml);
+    kvm_slots_unlock();
 
     return ret;
 }
@@ -385,6 +410,13 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
         goto err;
     }
 
+    if (cpu->kvm_dirty_gfns) {
+        ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
+        if (ret < 0) {
+            goto err;
+        }
+    }
+
     vcpu = g_malloc0(sizeof(*vcpu));
     vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
     vcpu->kvm_fd = cpu->kvm_fd;
@@ -437,6 +469,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
     cpu->kvm_fd = ret;
     cpu->kvm_state = s;
     cpu->vcpu_dirty = true;
+    cpu->dirty_pages = 0;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
@@ -461,6 +494,19 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
             (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
     }
 
+    if (s->kvm_dirty_ring_size) {
+        /* Use MAP_SHARED to share pages with the kernel */
+        cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
+                                   PROT_READ | PROT_WRITE, MAP_SHARED,
+                                   cpu->kvm_fd,
+                                   PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
+        if (cpu->kvm_dirty_gfns == MAP_FAILED) {
+            ret = -errno;
+            DPRINTF("mmap'ing vcpu dirty gfns failed: %d\n", ret);
+            goto err;
+        }
+    }
+
     ret = kvm_arch_init_vcpu(cpu);
     if (ret < 0) {
         error_setg_errno(errp, -ret,
@@ -500,6 +546,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
         return 0;
     }
 
+    kvm_slot_init_dirty_bitmap(mem);
     return kvm_set_user_memory_region(kml, mem, false);
 }
 
@@ -515,7 +562,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml,
         return 0;
     }
 
-    kvm_slots_lock(kml);
+    kvm_slots_lock();
 
     while (size && !ret) {
         slot_size = MIN(kvm_max_slot_size, size);
@@ -531,7 +578,7 @@ static int kvm_section_update_flags(KVMMemoryListener *kml,
     }
 
 out:
-    kvm_slots_unlock(kml);
+    kvm_slots_unlock();
     return ret;
 }
 
@@ -570,22 +617,28 @@ static void kvm_log_stop(MemoryListener *listener,
 }
 
 /* get kvm's dirty pages bitmap and update qemu's */
-static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
-                                         unsigned long *bitmap)
+static void kvm_slot_sync_dirty_pages(KVMSlot *slot)
 {
-    ram_addr_t start = section->offset_within_region +
-                       memory_region_get_ram_addr(section->mr);
-    ram_addr_t pages = int128_get64(section->size) / qemu_real_host_page_size;
+    ram_addr_t start = slot->ram_start_offset;
+    ram_addr_t pages = slot->memory_size / qemu_real_host_page_size;
 
-    cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
-    return 0;
+    cpu_physical_memory_set_dirty_lebitmap(slot->dirty_bmap, start, pages);
+}
+
+static void kvm_slot_reset_dirty_pages(KVMSlot *slot)
+{
+    memset(slot->dirty_bmap, 0, slot->dirty_bmap_size);
 }
 
 #define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
 
 /* Allocate the dirty bitmap for a slot  */
-static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
+static void kvm_slot_init_dirty_bitmap(KVMSlot *mem)
 {
+    if (!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) || mem->dirty_bmap) {
+        return;
+    }
+
     /*
      * XXX bad kernel interface alert
      * For dirty bitmap, kernel allocates array of size aligned to
@@ -606,6 +659,197 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
     hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size,
                                         /*HOST_LONG_BITS*/ 64) / 8;
     mem->dirty_bmap = g_malloc0(bitmap_size);
+    mem->dirty_bmap_size = bitmap_size;
+}
+
+/*
+ * Sync dirty bitmap from kernel to KVMSlot.dirty_bmap, return true if
+ * succeeded, false otherwise
+ */
+static bool kvm_slot_get_dirty_log(KVMState *s, KVMSlot *slot)
+{
+    struct kvm_dirty_log d = {};
+    int ret;
+
+    d.dirty_bitmap = slot->dirty_bmap;
+    d.slot = slot->slot | (slot->as_id << 16);
+    ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d);
+
+    if (ret == -ENOENT) {
+        /* kernel does not have dirty bitmap in this slot */
+        ret = 0;
+    }
+    if (ret) {
+        error_report_once("%s: KVM_GET_DIRTY_LOG failed with %d",
+                          __func__, ret);
+    }
+    return ret == 0;
+}
+
+/* Should be with all slots_lock held for the address spaces. */
+static void kvm_dirty_ring_mark_page(KVMState *s, uint32_t as_id,
+                                     uint32_t slot_id, uint64_t offset)
+{
+    KVMMemoryListener *kml;
+    KVMSlot *mem;
+
+    if (as_id >= s->nr_as) {
+        return;
+    }
+
+    kml = s->as[as_id].ml;
+    mem = &kml->slots[slot_id];
+
+    if (!mem->memory_size || offset >=
+        (mem->memory_size / qemu_real_host_page_size)) {
+        return;
+    }
+
+    set_bit(offset, mem->dirty_bmap);
+}
+
+static bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+    return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+    gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+/*
+ * Should be with all slots_lock held for the address spaces.  It returns the
+ * dirty page we've collected on this dirty ring.
+ */
+static uint32_t kvm_dirty_ring_reap_one(KVMState *s, CPUState *cpu)
+{
+    struct kvm_dirty_gfn *dirty_gfns = cpu->kvm_dirty_gfns, *cur;
+    uint32_t ring_size = s->kvm_dirty_ring_size;
+    uint32_t count = 0, fetch = cpu->kvm_fetch_index;
+
+    assert(dirty_gfns && ring_size);
+    trace_kvm_dirty_ring_reap_vcpu(cpu->cpu_index);
+
+    while (true) {
+        cur = &dirty_gfns[fetch % ring_size];
+        if (!dirty_gfn_is_dirtied(cur)) {
+            break;
+        }
+        kvm_dirty_ring_mark_page(s, cur->slot >> 16, cur->slot & 0xffff,
+                                 cur->offset);
+        dirty_gfn_set_collected(cur);
+        trace_kvm_dirty_ring_page(cpu->cpu_index, fetch, cur->offset);
+        fetch++;
+        count++;
+    }
+    cpu->kvm_fetch_index = fetch;
+    cpu->dirty_pages += count;
+
+    return count;
+}
+
+/* Must be with slots_lock held */
+static uint64_t kvm_dirty_ring_reap_locked(KVMState *s)
+{
+    int ret;
+    CPUState *cpu;
+    uint64_t total = 0;
+    int64_t stamp;
+
+    stamp = get_clock();
+
+    CPU_FOREACH(cpu) {
+        total += kvm_dirty_ring_reap_one(s, cpu);
+    }
+
+    if (total) {
+        ret = kvm_vm_ioctl(s, KVM_RESET_DIRTY_RINGS);
+        assert(ret == total);
+    }
+
+    stamp = get_clock() - stamp;
+
+    if (total) {
+        trace_kvm_dirty_ring_reap(total, stamp / 1000);
+    }
+
+    return total;
+}
+
+/*
+ * Currently for simplicity, we must hold BQL before calling this.  We can
+ * consider to drop the BQL if we're clear with all the race conditions.
+ */
+static uint64_t kvm_dirty_ring_reap(KVMState *s)
+{
+    uint64_t total;
+
+    /*
+     * We need to lock all kvm slots for all address spaces here,
+     * because:
+     *
+     * (1) We need to mark dirty for dirty bitmaps in multiple slots
+     *     and for tons of pages, so it's better to take the lock here
+     *     once rather than once per page.  And more importantly,
+     *
+     * (2) We must _NOT_ publish dirty bits to the other threads
+     *     (e.g., the migration thread) via the kvm memory slot dirty
+     *     bitmaps before correctly re-protect those dirtied pages.
+     *     Otherwise we can have potential risk of data corruption if
+     *     the page data is read in the other thread before we do
+     *     reset below.
+     */
+    kvm_slots_lock();
+    total = kvm_dirty_ring_reap_locked(s);
+    kvm_slots_unlock();
+
+    return total;
+}
+
+static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
+{
+    /* No need to do anything */
+}
+
+/*
+ * Kick all vcpus out in a synchronized way.  When returned, we
+ * guarantee that every vcpu has been kicked and at least returned to
+ * userspace once.
+ */
+static void kvm_cpu_synchronize_kick_all(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
+    }
+}
+
+/*
+ * Flush all the existing dirty pages to the KVM slot buffers.  When
+ * this call returns, we guarantee that all the touched dirty pages
+ * before calling this function have been put into the per-kvmslot
+ * dirty bitmap.
+ *
+ * This function must be called with BQL held.
+ */
+static void kvm_dirty_ring_flush(void)
+{
+    trace_kvm_dirty_ring_flush(0);
+    /*
+     * The function needs to be serialized.  Since this function
+     * should always be with BQL held, serialization is guaranteed.
+     * However, let's be sure of it.
+     */
+    assert(qemu_mutex_iothread_locked());
+    /*
+     * First make sure to flush the hardware buffers by kicking all
+     * vcpus out in a synchronous way.
+     */
+    kvm_cpu_synchronize_kick_all();
+    kvm_dirty_ring_reap(kvm_state);
+    trace_kvm_dirty_ring_flush(1);
 }
 
 /**
@@ -619,53 +863,28 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem)
  * @kml: the KVM memory listener object
  * @section: the memory section to sync the dirty bitmap with
  */
-static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
-                                          MemoryRegionSection *section)
+static void kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
+                                           MemoryRegionSection *section)
 {
     KVMState *s = kvm_state;
-    struct kvm_dirty_log d = {};
     KVMSlot *mem;
     hwaddr start_addr, size;
-    hwaddr slot_size, slot_offset = 0;
-    int ret = 0;
+    hwaddr slot_size;
 
     size = kvm_align_section(section, &start_addr);
     while (size) {
-        MemoryRegionSection subsection = *section;
-
         slot_size = MIN(kvm_max_slot_size, size);
         mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
         if (!mem) {
             /* We don't have a slot if we want to trap every access. */
-            goto out;
-        }
-
-        if (!mem->dirty_bmap) {
-            /* Allocate on the first log_sync, once and for all */
-            kvm_memslot_init_dirty_bitmap(mem);
+            return;
         }
-
-        d.dirty_bitmap = mem->dirty_bmap;
-        d.slot = mem->slot | (kml->as_id << 16);
-        ret = kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d);
-        if (ret == -ENOENT) {
-            /* kernel does not have dirty bitmap in this slot */
-            ret = 0;
-        } else if (ret < 0) {
-            error_report("ioctl KVM_GET_DIRTY_LOG failed: %d", errno);
-            goto out;
-        } else {
-            subsection.offset_within_region += slot_offset;
-            subsection.size = int128_make64(slot_size);
-            kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap);
+        if (kvm_slot_get_dirty_log(s, mem)) {
+            kvm_slot_sync_dirty_pages(mem);
         }
-
-        slot_offset += slot_size;
         start_addr += slot_size;
         size -= slot_size;
     }
-out:
-    return ret;
 }
 
 /* Alignment requirement for KVM_CLEAR_DIRTY_LOG - 64 pages */
@@ -812,7 +1031,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
         return ret;
     }
 
-    kvm_slots_lock(kml);
+    kvm_slots_lock();
 
     for (i = 0; i < s->nr_slots; i++) {
         mem = &kml->slots[i];
@@ -838,7 +1057,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
         }
     }
 
-    kvm_slots_unlock(kml);
+    kvm_slots_unlock();
 
     return ret;
 }
@@ -912,6 +1131,7 @@ static void kvm_coalesce_pio_del(MemoryListener *listener,
 }
 
 static MemoryListener kvm_coalesced_pio_listener = {
+    .name = "kvm-coalesced-pio",
     .coalesced_io_add = kvm_coalesce_pio_add,
     .coalesced_io_del = kvm_coalesce_pio_del,
 };
@@ -1121,7 +1341,8 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     int err;
     MemoryRegion *mr = section->mr;
     bool writeable = !mr->readonly && !mr->rom_device;
-    hwaddr start_addr, size, slot_size;
+    hwaddr start_addr, size, slot_size, mr_offset;
+    ram_addr_t ram_start_offset;
     void *ram;
 
     if (!memory_region_is_ram(mr)) {
@@ -1139,11 +1360,15 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
         return;
     }
 
-    /* use aligned delta to align the ram address */
-    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region +
-          (start_addr - section->offset_within_address_space);
+    /* The offset of the kvmslot within the memory region */
+    mr_offset = section->offset_within_region + start_addr -
+        section->offset_within_address_space;
+
+    /* use aligned delta to align the ram address and offset */
+    ram = memory_region_get_ram_ptr(mr) + mr_offset;
+    ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
 
-    kvm_slots_lock(kml);
+    kvm_slots_lock();
 
     if (!add) {
         do {
@@ -1153,7 +1378,25 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
                 goto out;
             }
             if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-                kvm_physical_sync_dirty_bitmap(kml, section);
+                /*
+                 * NOTE: We should be aware of the fact that here we're only
+                 * doing a best effort to sync dirty bits.  No matter whether
+                 * we're using dirty log or dirty ring, we ignored two facts:
+                 *
+                 * (1) dirty bits can reside in hardware buffers (PML)
+                 *
+                 * (2) after we collected dirty bits here, pages can be dirtied
+                 * again before we do the final KVM_SET_USER_MEMORY_REGION to
+                 * remove the slot.
+                 *
+                 * Not easy.  Let's cross the fingers until it's fixed.
+                 */
+                if (kvm_state->kvm_dirty_ring_size) {
+                    kvm_dirty_ring_reap_locked(kvm_state);
+                } else {
+                    kvm_slot_get_dirty_log(kvm_state, mem);
+                }
+                kvm_slot_sync_dirty_pages(mem);
             }
 
             /* unregister the slot */
@@ -1177,18 +1420,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
     do {
         slot_size = MIN(kvm_max_slot_size, size);
         mem = kvm_alloc_slot(kml);
+        mem->as_id = kml->as_id;
         mem->memory_size = slot_size;
         mem->start_addr = start_addr;
+        mem->ram_start_offset = ram_start_offset;
         mem->ram = ram;
         mem->flags = kvm_mem_flags(mr);
-
-        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-            /*
-             * Reallocate the bmap; it means it doesn't disappear in
-             * middle of a migrate.
-             */
-            kvm_memslot_init_dirty_bitmap(mem);
-        }
+        kvm_slot_init_dirty_bitmap(mem);
         err = kvm_set_user_memory_region(kml, mem, true);
         if (err) {
             fprintf(stderr, "%s: error registering slot: %s\n", __func__,
@@ -1196,12 +1434,58 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
             abort();
         }
         start_addr += slot_size;
+        ram_start_offset += slot_size;
         ram += slot_size;
         size -= slot_size;
     } while (size);
 
 out:
-    kvm_slots_unlock(kml);
+    kvm_slots_unlock();
+}
+
+static void *kvm_dirty_ring_reaper_thread(void *data)
+{
+    KVMState *s = data;
+    struct KVMDirtyRingReaper *r = &s->reaper;
+
+    rcu_register_thread();
+
+    trace_kvm_dirty_ring_reaper("init");
+
+    while (true) {
+        r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
+        trace_kvm_dirty_ring_reaper("wait");
+        /*
+         * TODO: provide a smarter timeout rather than a constant?
+         */
+        sleep(1);
+
+        trace_kvm_dirty_ring_reaper("wakeup");
+        r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
+
+        qemu_mutex_lock_iothread();
+        kvm_dirty_ring_reap(s);
+        qemu_mutex_unlock_iothread();
+
+        r->reaper_iteration++;
+    }
+
+    trace_kvm_dirty_ring_reaper("exit");
+
+    rcu_unregister_thread();
+
+    return NULL;
+}
+
+static int kvm_dirty_ring_reaper_init(KVMState *s)
+{
+    struct KVMDirtyRingReaper *r = &s->reaper;
+
+    qemu_thread_create(&r->reaper_thr, "kvm-reaper",
+                       kvm_dirty_ring_reaper_thread,
+                       s, QEMU_THREAD_JOINABLE);
+
+    return 0;
 }
 
 static void kvm_region_add(MemoryListener *listener,
@@ -1226,14 +1510,40 @@ static void kvm_log_sync(MemoryListener *listener,
                          MemoryRegionSection *section)
 {
     KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
-    int r;
 
-    kvm_slots_lock(kml);
-    r = kvm_physical_sync_dirty_bitmap(kml, section);
-    kvm_slots_unlock(kml);
-    if (r < 0) {
-        abort();
+    kvm_slots_lock();
+    kvm_physical_sync_dirty_bitmap(kml, section);
+    kvm_slots_unlock();
+}
+
+static void kvm_log_sync_global(MemoryListener *l)
+{
+    KVMMemoryListener *kml = container_of(l, KVMMemoryListener, listener);
+    KVMState *s = kvm_state;
+    KVMSlot *mem;
+    int i;
+
+    /* Flush all kernel dirty addresses into KVMSlot dirty bitmap */
+    kvm_dirty_ring_flush();
+
+    /*
+     * TODO: make this faster when nr_slots is big while there are
+     * only a few used slots (small VMs).
+     */
+    kvm_slots_lock();
+    for (i = 0; i < s->nr_slots; i++) {
+        mem = &kml->slots[i];
+        if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+            kvm_slot_sync_dirty_pages(mem);
+            /*
+             * This is not needed by KVM_GET_DIRTY_LOG because the
+             * ioctl will unconditionally overwrite the whole region.
+             * However kvm dirty ring has no such side effect.
+             */
+            kvm_slot_reset_dirty_pages(mem);
+        }
     }
+    kvm_slots_unlock();
 }
 
 static void kvm_log_clear(MemoryListener *listener,
@@ -1326,11 +1636,10 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
 }
 
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
-                                  AddressSpace *as, int as_id)
+                                  AddressSpace *as, int as_id, const char *name)
 {
     int i;
 
-    qemu_mutex_init(&kml->slots_lock);
     kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
     kml->as_id = as_id;
 
@@ -1342,9 +1651,15 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
     kml->listener.region_del = kvm_region_del;
     kml->listener.log_start = kvm_log_start;
     kml->listener.log_stop = kvm_log_stop;
-    kml->listener.log_sync = kvm_log_sync;
-    kml->listener.log_clear = kvm_log_clear;
     kml->listener.priority = 10;
+    kml->listener.name = name;
+
+    if (s->kvm_dirty_ring_size) {
+        kml->listener.log_sync_global = kvm_log_sync_global;
+    } else {
+        kml->listener.log_sync = kvm_log_sync;
+        kml->listener.log_clear = kvm_log_clear;
+    }
 
     memory_listener_register(&kml->listener, as);
 
@@ -1358,6 +1673,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
 }
 
 static MemoryListener kvm_io_listener = {
+    .name = "kvm-io",
     .eventfd_add = kvm_io_ioeventfd_add,
     .eventfd_del = kvm_io_ioeventfd_del,
     .priority = 10,
@@ -1982,6 +2298,11 @@ bool kvm_vcpu_id_is_valid(int vcpu_id)
     return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
 }
 
+bool kvm_dirty_ring_enabled(void)
+{
+    return kvm_state->kvm_dirty_ring_size ? true : false;
+}
+
 static int kvm_init(MachineState *ms)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -2003,6 +2324,8 @@ static int kvm_init(MachineState *ms)
     int type = 0;
     uint64_t dirty_log_manual_caps;
 
+    qemu_mutex_init(&kml_slots_lock);
+
     s = KVM_STATE(ms->accelerator);
 
     /*
@@ -2019,7 +2342,6 @@ static int kvm_init(MachineState *ms)
     QTAILQ_INIT(&s->kvm_sw_breakpoints);
 #endif
     QLIST_INIT(&s->kvm_parked_vcpus);
-    s->vmfd = -1;
     s->fd = qemu_open_old("/dev/kvm", O_RDWR);
     if (s->fd == -1) {
         fprintf(stderr, "Could not access KVM kernel module: %m\n");
@@ -2085,6 +2407,12 @@ static int kvm_init(MachineState *ms)
                     "- for kernels supporting the vm.allocate_pgste sysctl, "
                     "whether it is enabled\n");
         }
+#elif defined(TARGET_PPC)
+        if (ret == -EINVAL) {
+            fprintf(stderr,
+                    "PPC KVM module is not loaded. Try modprobe kvm_%s.\n",
+                    (type == 2) ? "pr" : "hv");
+        }
 #endif
         goto err;
     }
@@ -2127,20 +2455,70 @@ static int kvm_init(MachineState *ms)
     s->coalesced_pio = s->coalesced_mmio &&
                        kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
 
-    dirty_log_manual_caps =
-        kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-    dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
-                              KVM_DIRTY_LOG_INITIALLY_SET);
-    s->manual_dirty_log_protect = dirty_log_manual_caps;
-    if (dirty_log_manual_caps) {
-        ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
-                                   dirty_log_manual_caps);
-        if (ret) {
-            warn_report("Trying to enable capability %"PRIu64" of "
-                        "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
-                        "Falling back to the legacy mode. ",
-                        dirty_log_manual_caps);
-            s->manual_dirty_log_protect = 0;
+    /*
+     * Enable KVM dirty ring if supported, otherwise fall back to
+     * dirty logging mode
+     */
+    if (s->kvm_dirty_ring_size > 0) {
+        uint64_t ring_bytes;
+
+        ring_bytes = s->kvm_dirty_ring_size * sizeof(struct kvm_dirty_gfn);
+
+        /* Read the max supported pages */
+        ret = kvm_vm_check_extension(s, KVM_CAP_DIRTY_LOG_RING);
+        if (ret > 0) {
+            if (ring_bytes > ret) {
+                error_report("KVM dirty ring size %" PRIu32 " too big "
+                             "(maximum is %ld).  Please use a smaller value.",
+                             s->kvm_dirty_ring_size,
+                             (long)ret / sizeof(struct kvm_dirty_gfn));
+                ret = -EINVAL;
+                goto err;
+            }
+
+            ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING, 0, ring_bytes);
+            if (ret) {
+                error_report("Enabling of KVM dirty ring failed: %s. "
+                             "Suggested minimum value is 1024.", strerror(-ret));
+                goto err;
+            }
+
+            s->kvm_dirty_ring_bytes = ring_bytes;
+         } else {
+             warn_report("KVM dirty ring not available, using bitmap method");
+             s->kvm_dirty_ring_size = 0;
+        }
+    }
+
+    /*
+     * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is not needed when dirty ring is
+     * enabled.  More importantly, KVM_DIRTY_LOG_INITIALLY_SET will assume no
+     * page is wr-protected initially, which is against how kvm dirty ring is
+     * usage - kvm dirty ring requires all pages are wr-protected at the very
+     * beginning.  Enabling this feature for dirty ring causes data corruption.
+     *
+     * TODO: Without KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and kvm clear dirty log,
+     * we may expect a higher stall time when starting the migration.  In the
+     * future we can enable KVM_CLEAR_DIRTY_LOG to work with dirty ring too:
+     * instead of clearing dirty bit, it can be a way to explicitly wr-protect
+     * guest pages.
+     */
+    if (!s->kvm_dirty_ring_size) {
+        dirty_log_manual_caps =
+            kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+        dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+                                  KVM_DIRTY_LOG_INITIALLY_SET);
+        s->manual_dirty_log_protect = dirty_log_manual_caps;
+        if (dirty_log_manual_caps) {
+            ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
+                                    dirty_log_manual_caps);
+            if (ret) {
+                warn_report("Trying to enable capability %"PRIu64" of "
+                            "KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
+                            "Falling back to the legacy mode. ",
+                            dirty_log_manual_caps);
+                s->manual_dirty_log_protect = 0;
+            }
         }
     }
 
@@ -2211,7 +2589,7 @@ static int kvm_init(MachineState *ms)
     s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;
 
     kvm_memory_listener_register(s, &s->memory_listener,
-                                 &address_space_memory, 0);
+                                 &address_space_memory, 0, "kvm-memory");
     if (kvm_eventfds_allowed) {
         memory_listener_register(&kvm_io_listener,
                                  &address_space_io);
@@ -2226,6 +2604,14 @@ static int kvm_init(MachineState *ms)
         ret = ram_block_discard_disable(true);
         assert(!ret);
     }
+
+    if (s->kvm_dirty_ring_size) {
+        ret = kvm_dirty_ring_reaper_init(s);
+        if (ret) {
+            goto err;
+        }
+    }
+
     return 0;
 
 err:
@@ -2269,7 +2655,7 @@ static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run)
         int i;
 
         for (i = 0; i < run->internal.ndata; ++i) {
-            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
+            fprintf(stderr, "extra data[%d]: 0x%016"PRIx64"\n",
                     i, (uint64_t)run->internal.data[i]);
         }
     }
@@ -2538,6 +2924,17 @@ int kvm_cpu_exec(CPUState *cpu)
         case KVM_EXIT_INTERNAL_ERROR:
             ret = kvm_handle_internal_error(cpu, run);
             break;
+        case KVM_EXIT_DIRTY_RING_FULL:
+            /*
+             * We shouldn't continue if the dirty ring of this vcpu is
+             * still full.  Got kicked by KVM_RESET_DIRTY_RINGS.
+             */
+            trace_kvm_dirty_ring_full(cpu->cpu_index);
+            qemu_mutex_lock_iothread();
+            kvm_dirty_ring_reap(kvm_state);
+            qemu_mutex_unlock_iothread();
+            ret = 0;
+            break;
         case KVM_EXIT_SYSTEM_EVENT:
             switch (run->system_event.type) {
             case KVM_SYSTEM_EVENT_SHUTDOWN:
@@ -3114,6 +3511,11 @@ static void kvm_set_kvm_shadow_mem(Object *obj, Visitor *v,
     KVMState *s = KVM_STATE(obj);
     int64_t value;
 
+    if (s->fd != -1) {
+        error_setg(errp, "Cannot set properties after the accelerator has been initialized");
+        return;
+    }
+
     if (!visit_type_int(v, name, &value, errp)) {
         return;
     }
@@ -3128,6 +3530,11 @@ static void kvm_set_kernel_irqchip(Object *obj, Visitor *v,
     KVMState *s = KVM_STATE(obj);
     OnOffSplit mode;
 
+    if (s->fd != -1) {
+        error_setg(errp, "Cannot set properties after the accelerator has been initialized");
+        return;
+    }
+
     if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
         return;
     }
@@ -3170,13 +3577,53 @@ bool kvm_kernel_irqchip_split(void)
     return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON;
 }
 
+static void kvm_get_dirty_ring_size(Object *obj, Visitor *v,
+                                    const char *name, void *opaque,
+                                    Error **errp)
+{
+    KVMState *s = KVM_STATE(obj);
+    uint32_t value = s->kvm_dirty_ring_size;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void kvm_set_dirty_ring_size(Object *obj, Visitor *v,
+                                    const char *name, void *opaque,
+                                    Error **errp)
+{
+    KVMState *s = KVM_STATE(obj);
+    Error *error = NULL;
+    uint32_t value;
+
+    if (s->fd != -1) {
+        error_setg(errp, "Cannot set properties after the accelerator has been initialized");
+        return;
+    }
+
+    visit_type_uint32(v, name, &value, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    if (value & (value - 1)) {
+        error_setg(errp, "dirty-ring-size must be a power of two.");
+        return;
+    }
+
+    s->kvm_dirty_ring_size = value;
+}
+
 static void kvm_accel_instance_init(Object *obj)
 {
     KVMState *s = KVM_STATE(obj);
 
+    s->fd = -1;
+    s->vmfd = -1;
     s->kvm_shadow_mem = -1;
     s->kernel_irqchip_allowed = true;
     s->kernel_irqchip_split = ON_OFF_AUTO_AUTO;
+    /* KVM dirty ring is by default off */
+    s->kvm_dirty_ring_size = 0;
 }
 
 static void kvm_accel_class_init(ObjectClass *oc, void *data)
@@ -3198,6 +3645,12 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
         NULL, NULL);
     object_class_property_set_description(oc, "kvm-shadow-mem",
         "KVM shadow MMU size");
+
+    object_class_property_add(oc, "dirty-ring-size", "uint32",
+        kvm_get_dirty_ring_size, kvm_set_dirty_ring_size,
+        NULL, NULL);
+    object_class_property_set_description(oc, "dirty-ring-size",
+        "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)");
 }
 
 static const TypeInfo kvm_accel_type = {
diff --git a/accel/kvm/meson.build b/accel/kvm/meson.build
index 8d219bea507..397a1fe1fd1 100644
--- a/accel/kvm/meson.build
+++ b/accel/kvm/meson.build
@@ -3,6 +3,5 @@ kvm_ss.add(files(
   'kvm-all.c',
   'kvm-accel-ops.c',
 ))
-kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
 
 specific_ss.add_all(when: 'CONFIG_KVM', if_true: kvm_ss)
diff --git a/accel/kvm/sev-stub.c b/accel/kvm/sev-stub.c
deleted file mode 100644
index 9587d1b2a31..00000000000
--- a/accel/kvm/sev-stub.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * QEMU SEV stub
- *
- * Copyright Advanced Micro Devices 2018
- *
- * Authors:
- *      Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "sysemu/sev.h"
-
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
-{
-    /* If we get here, cgs must be some non-SEV thing */
-    return 0;
-}
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index e15ae8980d3..399aaeb0ec7 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
@@ -18,4 +18,11 @@ kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t
 kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
 kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
 kvm_resample_fd_notify(int gsi) "gsi %d"
+kvm_dirty_ring_full(int id) "vcpu %d"
+kvm_dirty_ring_reap_vcpu(int id) "vcpu %d"
+kvm_dirty_ring_page(int vcpu, uint32_t slot, uint64_t offset) "vcpu %d fetch %"PRIu32" offset 0x%"PRIx64
+kvm_dirty_ring_reaper(const char *s) "%s"
+kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
+kvm_dirty_ring_reaper_kick(const char *reason) "%s"
+kvm_dirty_ring_flush(int finished) "%d"
 
diff --git a/accel/meson.build b/accel/meson.build
index b44ba30c864..dfd808d2c8e 100644
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -2,6 +2,7 @@ specific_ss.add(files('accel-common.c'))
 softmmu_ss.add(files('accel-softmmu.c'))
 user_ss.add(files('accel-user.c'))
 
+subdir('hvf')
 subdir('qtest')
 subdir('kvm')
 subdir('tcg')
diff --git a/accel/qtest/meson.build b/accel/qtest/meson.build
index a2f32764598..4c656002933 100644
--- a/accel/qtest/meson.build
+++ b/accel/qtest/meson.build
@@ -1,6 +1,2 @@
-qtest_ss = ss.source_set()
-qtest_ss.add(files(
-  'qtest.c',
-))
-
-specific_ss.add_all(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'], if_true: qtest_ss)
+qtest_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_POSIX'],
+                    if_true: files('qtest.c'))
diff --git a/accel/qtest/qtest.c b/accel/qtest/qtest.c
index edb29f6fa4c..7e6b8110d52 100644
--- a/accel/qtest/qtest.c
+++ b/accel/qtest/qtest.c
@@ -45,6 +45,7 @@ static const TypeInfo qtest_accel_type = {
     .parent = TYPE_ACCEL,
     .class_init = qtest_accel_class_init,
 };
+module_obj(TYPE_QTEST_ACCEL);
 
 static void qtest_accel_ops_class_init(ObjectClass *oc, void *data)
 {
@@ -61,6 +62,7 @@ static const TypeInfo qtest_accel_ops_type = {
     .class_init = qtest_accel_ops_class_init,
     .abstract = true,
 };
+module_obj(ACCEL_OPS_NAME("qtest"));
 
 static void qtest_type_init(void)
 {
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 0f17acfac0f..5319573e003 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -11,7 +11,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "sysemu/kvm.h"
 
 #ifndef CONFIG_USER_ONLY
@@ -148,4 +147,9 @@ bool kvm_arm_supports_user_irq(void)
 {
     return false;
 }
+
+bool kvm_dirty_ring_enabled(void)
+{
+    return false;
+}
 #endif
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index 2304606f8e0..d8162673ae8 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -11,7 +11,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 
 void tb_flush(CPUState *cpu)
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
index 344525b0bb3..1df1f243e91 100644
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -13,42 +13,112 @@
  * See the COPYING file in the top-level directory.
  */
 
-static inline
-void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+static void atomic_trace_rmw_pre(CPUArchState *env, target_ulong addr,
+                                 MemOpIdx oi)
 {
     CPUState *cpu = env_cpu(env);
 
-    trace_guest_mem_before_exec(cpu, addr, info);
-    trace_guest_mem_before_exec(cpu, addr, info | TRACE_MEM_ST);
+    trace_guest_rmw_before_exec(cpu, addr, oi);
 }
 
-static inline void
-atomic_trace_rmw_post(CPUArchState *env, target_ulong addr, uint16_t info)
+static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
+                                  MemOpIdx oi)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info | TRACE_MEM_ST);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
 }
 
-static inline
-void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+#if HAVE_ATOMIC128
+static void atomic_trace_ld_pre(CPUArchState *env, target_ulong addr,
+                                MemOpIdx oi)
 {
-    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
 }
 
-static inline
-void atomic_trace_ld_post(CPUArchState *env, target_ulong addr, uint16_t info)
+static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
+                                 MemOpIdx oi)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
 }
 
-static inline
-void atomic_trace_st_pre(CPUArchState *env, target_ulong addr, uint16_t info)
+static void atomic_trace_st_pre(CPUArchState *env, target_ulong addr,
+                                MemOpIdx oi)
 {
-    trace_guest_mem_before_exec(env_cpu(env), addr, info);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
 }
 
-static inline
-void atomic_trace_st_post(CPUArchState *env, target_ulong addr, uint16_t info)
+static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
+                                 MemOpIdx oi)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, info);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
+#endif
+
+/*
+ * Atomic helpers callable from TCG.
+ * These have a common interface and all defer to cpu_atomic_*
+ * using the host return address from GETPC().
+ */
+
+#define CMPXCHG_HELPER(OP, TYPE) \
+    TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr,  \
+                             TYPE oldv, TYPE newv, uint32_t oi)     \
+    { return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
+
+CMPXCHG_HELPER(cmpxchgb, uint32_t)
+CMPXCHG_HELPER(cmpxchgw_be, uint32_t)
+CMPXCHG_HELPER(cmpxchgw_le, uint32_t)
+CMPXCHG_HELPER(cmpxchgl_be, uint32_t)
+CMPXCHG_HELPER(cmpxchgl_le, uint32_t)
+
+#ifdef CONFIG_ATOMIC64
+CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
+CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
+#endif
+
+#undef CMPXCHG_HELPER
+
+#define ATOMIC_HELPER(OP, TYPE) \
+    TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr,  \
+                                  TYPE val, uint32_t oi)                 \
+    { return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPERS(OP)              \
+    ATOMIC_HELPER(glue(OP,b), uint32_t)     \
+    ATOMIC_HELPER(glue(OP,w_be), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,w_le), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,l_be), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,l_le), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,q_be), uint64_t)  \
+    ATOMIC_HELPER(glue(OP,q_le), uint64_t)
+#else
+#define GEN_ATOMIC_HELPERS(OP)              \
+    ATOMIC_HELPER(glue(OP,b), uint32_t)     \
+    ATOMIC_HELPER(glue(OP,w_be), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,w_le), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,l_be), uint32_t)  \
+    ATOMIC_HELPER(glue(OP,l_le), uint32_t)
+#endif
+
+GEN_ATOMIC_HELPERS(fetch_add)
+GEN_ATOMIC_HELPERS(fetch_and)
+GEN_ATOMIC_HELPERS(fetch_or)
+GEN_ATOMIC_HELPERS(fetch_xor)
+GEN_ATOMIC_HELPERS(fetch_smin)
+GEN_ATOMIC_HELPERS(fetch_umin)
+GEN_ATOMIC_HELPERS(fetch_smax)
+GEN_ATOMIC_HELPERS(fetch_umax)
+
+GEN_ATOMIC_HELPERS(add_fetch)
+GEN_ATOMIC_HELPERS(and_fetch)
+GEN_ATOMIC_HELPERS(or_fetch)
+GEN_ATOMIC_HELPERS(xor_fetch)
+GEN_ATOMIC_HELPERS(smin_fetch)
+GEN_ATOMIC_HELPERS(umin_fetch)
+GEN_ATOMIC_HELPERS(smax_fetch)
+GEN_ATOMIC_HELPERS(umax_fetch)
+
+GEN_ATOMIC_HELPERS(xchg)
+
+#undef ATOMIC_HELPER
+#undef GEN_ATOMIC_HELPERS
diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 0ff7f913e1f..2d917b6b1fe 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -19,7 +19,6 @@
  */
 
 #include "qemu/plugin.h"
-#include "trace/mem.h"
 
 #if DATA_SIZE == 16
 # define SUFFIX     o
@@ -28,8 +27,8 @@
 # define SHIFT      4
 #elif DATA_SIZE == 8
 # define SUFFIX     q
-# define DATA_TYPE  uint64_t
-# define SDATA_TYPE int64_t
+# define DATA_TYPE  aligned_uint64_t
+# define SDATA_TYPE aligned_int64_t
 # define BSWAP      bswap64
 # define SHIFT      3
 #elif DATA_SIZE == 4
@@ -71,85 +70,78 @@
 #endif
 
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
-                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+                              ABI_TYPE cmpv, ABI_TYPE newv,
+                              MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
     DATA_TYPE ret;
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
-                                         ATOMIC_MMU_IDX);
 
-    atomic_trace_rmw_pre(env, addr, info);
+    atomic_trace_rmw_pre(env, addr, oi);
 #if DATA_SIZE == 16
     ret = atomic16_cmpxchg(haddr, cmpv, newv);
 #else
     ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, info);
+    atomic_trace_rmw_post(env, addr, oi);
     return ret;
 }
 
 #if DATA_SIZE >= 16
 #if HAVE_ATOMIC128
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
+                         MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
-                                         ATOMIC_MMU_IDX);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ, retaddr);
+    DATA_TYPE val;
 
-    atomic_trace_ld_pre(env, addr, info);
+    atomic_trace_ld_pre(env, addr, oi);
     val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_ld_post(env, addr, info);
+    atomic_trace_ld_post(env, addr, oi);
     return val;
 }
 
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
-                     ABI_TYPE val EXTRA_ARGS)
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                     MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, true,
-                                         ATOMIC_MMU_IDX);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_WRITE, retaddr);
 
-    atomic_trace_st_pre(env, addr, info);
+    atomic_trace_st_pre(env, addr, oi);
     atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_st_post(env, addr, info);
+    atomic_trace_st_post(env, addr, oi);
 }
 #endif
 #else
-ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
-                           ABI_TYPE val EXTRA_ARGS)
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                           MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
     DATA_TYPE ret;
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,
-                                         ATOMIC_MMU_IDX);
 
-    atomic_trace_rmw_pre(env, addr, info);
+    atomic_trace_rmw_pre(env, addr, oi);
     ret = qatomic_xchg__nocheck(haddr, val);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, info);
+    atomic_trace_rmw_post(env, addr, oi);
     return ret;
 }
 
 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE val EXTRA_ARGS)                    \
+                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
+                                         PAGE_READ | PAGE_WRITE, retaddr); \
     DATA_TYPE ret;                                                  \
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,    \
-                                         ATOMIC_MMU_IDX);           \
-    atomic_trace_rmw_pre(env, addr, info);                          \
+    atomic_trace_rmw_pre(env, addr, oi);                            \
     ret = qatomic_##X(haddr, val);                                  \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, info);                         \
+    atomic_trace_rmw_post(env, addr, oi);                           \
     return ret;                                                     \
 }
 
@@ -164,7 +156,8 @@ GEN_ATOMIC_HELPER(xor_fetch)
 
 #undef GEN_ATOMIC_HELPER
 
-/* These helpers are, as a whole, full barriers.  Within the helper,
+/*
+ * These helpers are, as a whole, full barriers.  Within the helper,
  * the leading barrier is explicit and the trailing barrier is within
  * cmpxchg primitive.
  *
@@ -173,14 +166,12 @@ GEN_ATOMIC_HELPER(xor_fetch)
  */
 #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE xval EXTRA_ARGS)                   \
+                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
-    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
+                                          PAGE_READ | PAGE_WRITE, retaddr); \
     XDATA_TYPE cmp, old, new, val = xval;                           \
-    uint16_t info = trace_mem_build_info(SHIFT, false, 0, false,    \
-                                         ATOMIC_MMU_IDX);           \
-    atomic_trace_rmw_pre(env, addr, info);                          \
+    atomic_trace_rmw_pre(env, addr, oi);                            \
     smp_mb();                                                       \
     cmp = qatomic_read__nocheck(haddr);                             \
     do {                                                            \
@@ -188,7 +179,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
         cmp = qatomic_cmpxchg__nocheck(haddr, old, new);            \
     } while (cmp != old);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, info);                         \
+    atomic_trace_rmw_post(env, addr, oi);                           \
     return RET;                                                     \
 }
 
@@ -218,87 +209,79 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
 #endif
 
 ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
-                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+                              ABI_TYPE cmpv, ABI_TYPE newv,
+                              MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
     DATA_TYPE ret;
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
-                                         ATOMIC_MMU_IDX);
 
-    atomic_trace_rmw_pre(env, addr, info);
+    atomic_trace_rmw_pre(env, addr, oi);
 #if DATA_SIZE == 16
     ret = atomic16_cmpxchg(haddr, BSWAP(cmpv), BSWAP(newv));
 #else
     ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, info);
+    atomic_trace_rmw_post(env, addr, oi);
     return BSWAP(ret);
 }
 
 #if DATA_SIZE >= 16
 #if HAVE_ATOMIC128
-ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
+                         MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
-                                         ATOMIC_MMU_IDX);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ, retaddr);
+    DATA_TYPE val;
 
-    atomic_trace_ld_pre(env, addr, info);
+    atomic_trace_ld_pre(env, addr, oi);
     val = atomic16_read(haddr);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_ld_post(env, addr, info);
+    atomic_trace_ld_post(env, addr, oi);
     return BSWAP(val);
 }
 
-void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
-                     ABI_TYPE val EXTRA_ARGS)
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                     MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, true,
-                                         ATOMIC_MMU_IDX);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_WRITE, retaddr);
 
-    val = BSWAP(val);
-    atomic_trace_st_pre(env, addr, info);
+    atomic_trace_st_pre(env, addr, oi);
     val = BSWAP(val);
     atomic16_set(haddr, val);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_st_post(env, addr, info);
+    atomic_trace_st_post(env, addr, oi);
 }
 #endif
 #else
-ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
-                           ABI_TYPE val EXTRA_ARGS)
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                           MemOpIdx oi, uintptr_t retaddr)
 {
-    ATOMIC_MMU_DECLS;
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
     ABI_TYPE ret;
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP, false,
-                                         ATOMIC_MMU_IDX);
 
-    atomic_trace_rmw_pre(env, addr, info);
+    atomic_trace_rmw_pre(env, addr, oi);
     ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, info);
+    atomic_trace_rmw_post(env, addr, oi);
     return BSWAP(ret);
 }
 
 #define GEN_ATOMIC_HELPER(X)                                        \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE val EXTRA_ARGS)                    \
+                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
-    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
+                                         PAGE_READ | PAGE_WRITE, retaddr); \
     DATA_TYPE ret;                                                  \
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP,    \
-                                         false, ATOMIC_MMU_IDX);    \
-    atomic_trace_rmw_pre(env, addr, info);                          \
+    atomic_trace_rmw_pre(env, addr, oi);                            \
     ret = qatomic_##X(haddr, BSWAP(val));                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, info);                         \
+    atomic_trace_rmw_post(env, addr, oi);                           \
     return BSWAP(ret);                                              \
 }
 
@@ -320,14 +303,12 @@ GEN_ATOMIC_HELPER(xor_fetch)
  */
 #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)                \
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
-                        ABI_TYPE xval EXTRA_ARGS)                   \
+                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    ATOMIC_MMU_DECLS;                                               \
-    XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                          \
+    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
+                                          PAGE_READ | PAGE_WRITE, retaddr); \
     XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
-    uint16_t info = trace_mem_build_info(SHIFT, false, MO_BSWAP,    \
-                                         false, ATOMIC_MMU_IDX);    \
-    atomic_trace_rmw_pre(env, addr, info);                          \
+    atomic_trace_rmw_pre(env, addr, oi);                            \
     smp_mb();                                                       \
     ldn = qatomic_read__nocheck(haddr);                             \
     do {                                                            \
@@ -335,7 +316,7 @@ ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
         ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));     \
     } while (ldo != ldn);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, info);                         \
+    atomic_trace_rmw_post(env, addr, oi);                           \
     return RET;                                                     \
 }
 
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index 12c1e3e9744..be6fe45aa5a 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -18,7 +18,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "sysemu/cpus.h"
 #include "sysemu/tcg.h"
 #include "exec/exec-all.h"
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 78da27ffaf9..6405c12a634 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/qemu-print.h"
-#include "cpu.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/type-helpers.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "trace.h"
 #include "disas/disas.h"
@@ -30,8 +32,6 @@
 #include "qemu/compiler.h"
 #include "qemu/timer.h"
 #include "qemu/rcu.h"
-#include "exec/tb-hash.h"
-#include "exec/tb-lookup.h"
 #include "exec/log.h"
 #include "exec/log_instr.h"
 #include "qemu/main-loop.h"
@@ -42,6 +42,10 @@
 #include "exec/cpu-all.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/replay.h"
+#include "sysemu/tcg.h"
+#include "exec/helper-proto.h"
+#include "tb-hash.h"
+#include "tb-context.h"
 #include "internal.h"
 
 /* -icount align implementation. */
@@ -146,6 +150,202 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 }
 #endif /* CONFIG USER ONLY */
 
+uint32_t curr_cflags(CPUState *cpu)
+{
+    uint32_t cflags = cpu->tcg_cflags;
+
+    /*
+     * Record gdb single-step.  We should be exiting the TB by raising
+     * EXCP_DEBUG, but to simplify other tests, disable chaining too.
+     *
+     * For singlestep and -d nochain, suppress goto_tb so that
+     * we can log -d cpu,exec after every TB.
+     */
+    if (unlikely(cpu->singlestep_enabled)) {
+        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
+    } else if (singlestep) {
+        cflags |= CF_NO_GOTO_TB | 1;
+    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+        cflags |= CF_NO_GOTO_TB;
+    }
+
+    return cflags;
+}
+
+/* Might cause an exception, so have a longjmp destination ready */
+static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base,
+                                          target_ulong pcc_base,
+                                          target_ulong pcc_top,
+                                          uint32_t cheri_flags,
+                                          uint32_t flags, uint32_t cflags)
+{
+    TranslationBlock *tb;
+    uint32_t hash;
+
+    /* we should never be trying to look up an INVALID tb */
+    tcg_debug_assert(!(cflags & CF_INVALID));
+
+    hash = tb_jmp_cache_hash_func(pc);
+    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+
+    if (likely(tb &&
+               tb->pc == pc &&
+               tb->cs_base == cs_base &&
+               tb->pcc_base == pcc_base &&
+               tb->pcc_top == pcc_top &&
+               tb->cheri_flags == cheri_flags &&
+               tb->flags == flags &&
+               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
+               tb_cflags(tb) == cflags)) {
+        return tb;
+    }
+    tb = tb_htable_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags,
+                          flags, cflags);
+    if (tb == NULL) {
+        return NULL;
+    }
+    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
+    return tb;
+}
+
+static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
+                                const TranslationBlock *tb)
+{
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
+        && qemu_log_in_addr_range(pc)) {
+
+        qemu_log_mask(CPU_LOG_EXEC,
+                      "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx
+                      "/" TARGET_FMT_lx "-" TARGET_FMT_lx
+                      "/%08x/%08x/%08x] %s\n",
+                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
+                      tb->pcc_base, tb->pcc_top, tb->cheri_flags,
+                      tb->flags, tb->cflags, lookup_symbol(pc));
+
+#if defined(DEBUG_DISAS)
+        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+            FILE *logfile = qemu_log_lock();
+            int flags = 0;
+
+            if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
+                flags |= CPU_DUMP_FPU;
+            }
+#if defined(TARGET_I386)
+            flags |= CPU_DUMP_CCOP;
+#endif
+            log_cpu_state(cpu, flags);
+            qemu_log_unlock(logfile);
+        }
+#endif /* DEBUG_DISAS */
+    }
+}
+
+static bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
+                                  uint32_t *cflags)
+{
+    CPUBreakpoint *bp;
+    bool match_page = false;
+
+    if (likely(QTAILQ_EMPTY(&cpu->breakpoints))) {
+        return false;
+    }
+
+    /*
+     * Singlestep overrides breakpoints.
+     * This requirement is visible in the record-replay tests, where
+     * we would fail to make forward progress in reverse-continue.
+     *
+     * TODO: gdb singlestep should only override gdb breakpoints,
+     * so that one could (gdb) singlestep into the guest kernel's
+     * architectural breakpoint handler.
+     */
+    if (cpu->singlestep_enabled) {
+        return false;
+    }
+
+    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
+        /*
+         * If we have an exact pc match, trigger the breakpoint.
+         * Otherwise, note matches within the page.
+         */
+        if (pc == bp->pc) {
+            bool match_bp = false;
+
+            if (bp->flags & BP_GDB) {
+                match_bp = true;
+            } else if (bp->flags & BP_CPU) {
+#ifdef CONFIG_USER_ONLY
+                g_assert_not_reached();
+#else
+                CPUClass *cc = CPU_GET_CLASS(cpu);
+                assert(cc->tcg_ops->debug_check_breakpoint);
+                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
+#endif
+            }
+
+            if (match_bp) {
+                cpu->exception_index = EXCP_DEBUG;
+                return true;
+            }
+        } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
+            match_page = true;
+        }
+    }
+
+    /*
+     * Within the same page as a breakpoint, single-step,
+     * returning to helper_lookup_tb_ptr after each insn looking
+     * for the actual breakpoint.
+     *
+     * TODO: Perhaps better to record all of the TBs associated
+     * with a given virtual page that contains a breakpoint, and
+     * then invalidate them when a new overlapping breakpoint is
+     * set on the page.  Non-overlapping TBs would not be
+     * invalidated, nor would any TB need to be invalidated as
+     * breakpoints are removed.
+     */
+    if (match_page) {
+        *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
+    }
+    return false;
+}
+
+/**
+ * helper_lookup_tb_ptr: quick check for next tb
+ * @env: current cpu state
+ *
+ * Look for an existing TB matching the current cpu state.
+ * If found, return the code pointer.  If not found, return
+ * the tcg epilogue so that we return into cpu_tb_exec.
+ */
+const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
+{
+    CPUState *cpu = env_cpu(env);
+    TranslationBlock *tb;
+    target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc;
+    uint32_t cheri_flags = 0;
+    uint32_t flags, cflags;
+
+    cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top,
+                             &cheri_flags, &flags);
+
+    cflags = curr_cflags(cpu);
+    if (check_for_breakpoints(cpu, pc, &cflags)) {
+        cpu_loop_exit(cpu);
+    }
+
+    tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, flags,
+                   cflags);
+    if (tb == NULL) {
+        return tcg_code_gen_epilogue;
+    }
+
+    log_cpu_exec(pc, cpu, tb);
+
+    return tb->tc.ptr;
+}
+
 /* Execute a TB, and fix up the CPU state afterwards if necessary */
 /*
  * Disable CFI checks.
@@ -164,28 +364,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
     TranslationBlock *last_tb;
     const void *tb_ptr = itb->tc.ptr;
 
-    qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
-                           "Trace %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx
-                           "/" TARGET_FMT_lx "/%#x/%#x] %s\n",
-                           cpu->cpu_index, itb->tc.ptr, itb->cs_base, itb->pc,
-                           itb->cs_top, itb->cheri_flags, itb->flags,
-                           lookup_symbol(itb->pc));
-
-#if defined(DEBUG_DISAS)
-    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
-        && qemu_log_in_addr_range(itb->pc)) {
-        FILE *logfile = qemu_log_lock();
-        int flags = 0;
-        if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
-            flags |= CPU_DUMP_FPU;
-        }
-#if defined(TARGET_I386)
-        flags |= CPU_DUMP_CCOP;
-#endif
-        log_cpu_state(cpu, flags);
-        qemu_log_unlock(logfile);
-    }
-#endif /* DEBUG_DISAS */
+    log_cpu_exec(itb->pc, cpu, itb);
 
     qemu_thread_jit_execute();
     ret = tcg_qemu_tb_exec(env, tb_ptr);
@@ -221,6 +400,17 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
             cc->set_pc(cpu, last_tb->pc);
         }
     }
+
+    /*
+     * If gdb single-step, and we haven't raised another exception,
+     * raise a debug exception.  Single-step with another exception
+     * is handled in cpu_handle_exception.
+     */
+    if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
+        cpu->exception_index = EXCP_DEBUG;
+        cpu_loop_exit(cpu);
+    }
+
     return last_tb;
 }
 
@@ -247,10 +437,9 @@ void cpu_exec_step_atomic(CPUState *cpu)
 {
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
-    target_ulong cs_base, cs_top = 0, pc;
+    target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc;
     uint32_t cheri_flags = 0;
-    uint32_t flags;
-    uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
+    uint32_t flags, cflags;
     int tb_exit;
 
     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
@@ -259,13 +448,27 @@ void cpu_exec_step_atomic(CPUState *cpu)
         g_assert(!cpu->running);
         cpu->running = true;
 
-        cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags);
-        tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags);
+        cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top,
+                                 &cheri_flags, &flags);
+
+        cflags = curr_cflags(cpu);
+        /* Execute in a serial context. */
+        cflags &= ~CF_PARALLEL;
+        /* After 1 insn, return and release the exclusive lock. */
+        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
+        /*
+         * No need to check_for_breakpoints here.
+         * We only arrive in cpu_exec_step_atomic after beginning execution
+         * of an insn that includes an atomic operation we can't handle.
+         * Any breakpoint for this insn will have been recognized earlier.
+         */
 
+        tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags, flags,
+                       cflags);
         if (tb == NULL) {
             mmap_lock();
-            tb = tb_gen_code(cpu, pc, cs_base, cs_top, cheri_flags, flags,
-                             cflags);
+            tb = tb_gen_code(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags,
+                             flags, cflags);
             mmap_unlock();
         }
 
@@ -280,6 +483,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
          * memory.
          */
 #ifndef CONFIG_SOFTMMU
+        clear_helper_retaddr();
         tcg_debug_assert(!have_mmap_lock());
 #endif
         if (qemu_mutex_iothread_locked()) {
@@ -289,7 +493,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
         qemu_plugin_disable_mem_helpers(cpu);
     }
 
-
     /*
      * As we start the exclusive region before codegen we must still
      * be in the region if we longjump out of either the codegen or
@@ -303,7 +506,8 @@ void cpu_exec_step_atomic(CPUState *cpu)
 struct tb_desc {
     target_ulong pc;
     target_ulong cs_base;
-    target_ulong cs_top;
+    target_ulong pcc_base;
+    target_ulong pcc_top;
     CPUArchState *env;
     tb_page_addr_t phys_page1;
     uint32_t cheri_flags;
@@ -318,8 +522,9 @@ static bool tb_lookup_cmp(const void *p, const void *d)
     const struct tb_desc *desc = d;
 
     if (tb->pc == desc->pc && tb->page_addr[0] == desc->phys_page1 &&
-        tb->cs_base == desc->cs_base && tb->cs_top == desc->cs_top &&
-        tb->cheri_flags == desc->cheri_flags && tb->flags == desc->flags &&
+        tb->cs_base == desc->cs_base && tb->pcc_base == desc->pcc_base &&
+        tb->pcc_top == desc->pcc_top && tb->cheri_flags == desc->cheri_flags &&
+        tb->flags == desc->flags &&
         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
         tb_cflags(tb) == desc->cflags) {
         /* check next page if needed */
@@ -340,9 +545,9 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 }
 
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, target_ulong cs_top,
-                                   uint32_t cheri_flags, uint32_t flags,
-                                   uint32_t cflags)
+                                   target_ulong cs_base, target_ulong pcc_base,
+                                   target_ulong pcc_top, uint32_t cheri_flags,
+                                   uint32_t flags, uint32_t cflags)
 {
     tb_page_addr_t phys_pc;
     struct tb_desc desc;
@@ -350,7 +555,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 
     desc.env = (CPUArchState *)cpu->env_ptr;
     desc.cs_base = cs_base;
-    desc.cs_top = cs_top;
+    desc.pcc_base = pcc_base;
+    desc.pcc_top = pcc_top;
     desc.cheri_flags = cheri_flags;
     desc.flags = flags;
     desc.cflags = cflags;
@@ -419,46 +625,11 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
     return;
 }
 
-static inline TranslationBlock *tb_find(CPUState *cpu,
-                                        TranslationBlock *last_tb,
-                                        int tb_exit, uint32_t cflags)
-{
-    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
-    TranslationBlock *tb;
-    target_ulong cs_base, cs_top = 0, pc;
-    uint32_t cheri_flags = 0;
-    uint32_t flags;
-
-    cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags);
-
-    tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags);
-    if (tb == NULL) {
-        mmap_lock();
-        tb = tb_gen_code(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags);
-        mmap_unlock();
-        /* We add the TB in the virtual pc hash table for the fast lookup */
-        qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
-    }
-#ifndef CONFIG_USER_ONLY
-    /* We don't take care of direct jumps when address mapping changes in
-     * system emulation. So it's not safe to make a direct jump to a TB
-     * spanning two pages because the mapping for the second page can change.
-     */
-    if (tb->page_addr[1] != -1) {
-        last_tb = NULL;
-    }
-#endif
-    /* See if we can patch the calling TB. */
-    if (last_tb) {
-        tb_add_jump(last_tb, tb_exit, tb);
-    }
-    return tb;
-}
-
 static inline bool cpu_handle_halt(CPUState *cpu)
 {
+#ifndef CONFIG_USER_ONLY
     if (cpu->halted) {
-#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
+#if defined(TARGET_I386)
         if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
             X86CPU *x86_cpu = X86_CPU(cpu);
             qemu_mutex_lock_iothread();
@@ -466,13 +637,14 @@ static inline bool cpu_handle_halt(CPUState *cpu)
             cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
             qemu_mutex_unlock_iothread();
         }
-#endif
+#endif /* TARGET_I386 */
         if (!cpu_has_work(cpu)) {
             return true;
         }
 
         cpu->halted = 0;
     }
+#endif /* !CONFIG_USER_ONLY */
 
     return false;
 }
@@ -520,8 +692,8 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
            loop */
 #if defined(TARGET_I386)
         CPUClass *cc = CPU_GET_CLASS(cpu);
-        cc->tcg_ops->do_interrupt(cpu);
-#endif
+        cc->tcg_ops->fake_user_interrupt(cpu);
+#endif /* TARGET_I386 */
         *ret = cpu->exception_index;
         cpu->exception_index = -1;
         return true;
@@ -554,6 +726,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
     return false;
 }
 
+#ifndef CONFIG_USER_ONLY
 /*
  * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
  * "real" interrupt event later. It does not need to be recorded for
@@ -567,11 +740,19 @@ static inline bool need_replay_interrupt(int interrupt_request)
     return true;
 #endif
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static inline bool cpu_handle_interrupt(CPUState *cpu,
                                         TranslationBlock **last_tb)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
+    /*
+     * If we have requested custom cflags with CF_NOIRQ we should
+     * skip checking here. Any pending interrupts will get picked up
+     * by the next TB we execute under normal cflags.
+     */
+    if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
+        return false;
+    }
 
     /* Clear the interrupt flag now since we're processing
      * cpu->interrupt_request and cpu->exit_request.
@@ -594,6 +775,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
             qemu_mutex_unlock_iothread();
             return true;
         }
+#if !defined(CONFIG_USER_ONLY)
         if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
             /* Do nothing */
         } else if (interrupt_request & CPU_INTERRUPT_HALT) {
@@ -622,12 +804,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
             qemu_mutex_unlock_iothread();
             return true;
         }
-#endif
+#endif /* !TARGET_I386 */
         /* The target hook has 3 exit conditions:
            False when the interrupt isn't processed,
            True when it is, and we should restart on a new TB,
            and via longjmp via cpu_loop_exit.  */
         else {
+            CPUClass *cc = CPU_GET_CLASS(cpu);
+
             if (cc->tcg_ops->cpu_exec_interrupt &&
                 cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
                 if (need_replay_interrupt(interrupt_request)) {
@@ -646,6 +830,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
              * reload the 'interrupt_request' value */
             interrupt_request = cpu->interrupt_request;
         }
+#endif /* !CONFIG_USER_ONLY */
         if (interrupt_request & CPU_INTERRUPT_EXITTB) {
             cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
             /* ensure that no TB jump will be modified as
@@ -703,7 +888,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
     /* Ensure global icount has gone forward */
     icount_update(cpu);
     /* Refill decrementer and continue execution.  */
-    insns_left = MIN(CF_COUNT_MASK, cpu->icount_budget);
+    insns_left = MIN(0xffff, cpu->icount_budget);
     cpu_neg(cpu)->icount_decr.u16.low = insns_left;
     cpu->icount_extra = cpu->icount_budget - insns_left;
 
@@ -712,7 +897,9 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
      * execute we need to ensure we find/generate a TB with exactly
      * insns_left instructions in it.
      */
-    if (!cpu->icount_extra && insns_left > 0 && insns_left < tb->icount)  {
+    if (insns_left > 0 && insns_left < tb->icount)  {
+        assert(insns_left <= CF_COUNT_MASK);
+        assert(cpu->icount_extra == 0);
         cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
     }
 #endif
@@ -722,7 +909,6 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 
 int cpu_exec(CPUState *cpu)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     int ret;
     SyncClocks sc = { 0 };
 
@@ -756,22 +942,18 @@ int cpu_exec(CPUState *cpu)
          * that we support, but is still unfixed in clang:
          *   https://bugs.llvm.org/show_bug.cgi?id=21183
          *
-         * Reload essential local variables here for those compilers.
+         * Reload an essential local variable here for those compilers.
          * Newer versions of gcc would complain about this code (-Wclobbered),
          * so we only perform the workaround for clang.
          */
         cpu = current_cpu;
-        cc = CPU_GET_CLASS(cpu);
 #else
-        /*
-         * Non-buggy compilers preserve these locals; assert that
-         * they have the correct value.
-         */
+        /* Non-buggy compilers preserve this; assert the correct value. */
         g_assert(cpu == current_cpu);
-        g_assert(cc == CPU_GET_CLASS(cpu));
 #endif
 
 #ifndef CONFIG_SOFTMMU
+        clear_helper_retaddr();
         tcg_debug_assert(!have_mmap_lock());
 #endif
         if (qemu_mutex_iothread_locked()) {
@@ -788,22 +970,64 @@ int cpu_exec(CPUState *cpu)
         int tb_exit = 0;
 
         while (!cpu_handle_interrupt(cpu, &last_tb)) {
-            uint32_t cflags = cpu->cflags_next_tb;
             TranslationBlock *tb;
-
-            /* When requested, use an exact setting for cflags for the next
-               execution.  This is used for icount, precise smc, and stop-
-               after-access watchpoints.  Since this request should never
-               have CF_INVALID set, -1 is a convenient invalid value that
-               does not require tcg headers for cpu_common_reset.  */
+            target_ulong cs_base, pcc_base = 0, pcc_top = 0, pc;
+            uint32_t cheri_flags = 0;
+            uint32_t flags, cflags;
+
+            cpu_get_tb_cpu_state_ext(cpu->env_ptr, &pc, &cs_base, &pcc_base,
+                                     &pcc_top, &cheri_flags, &flags);
+
+            /*
+             * When requested, use an exact setting for cflags for the next
+             * execution.  This is used for icount, precise smc, and stop-
+             * after-access watchpoints.  Since this request should never
+             * have CF_INVALID set, -1 is a convenient invalid value that
+             * does not require tcg headers for cpu_common_reset.
+             */
+            cflags = cpu->cflags_next_tb;
             if (cflags == -1) {
                 cflags = curr_cflags(cpu);
             } else {
                 cpu->cflags_next_tb = -1;
             }
 
-            tb = tb_find(cpu, last_tb, tb_exit, cflags);
+            if (check_for_breakpoints(cpu, pc, &cflags)) {
+                break;
+            }
+
+            tb = tb_lookup(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags,
+                           flags, cflags);
+            if (tb == NULL) {
+                mmap_lock();
+                tb = tb_gen_code(cpu, pc, cs_base, pcc_base, pcc_top, cheri_flags,
+                                 flags, cflags);
+                mmap_unlock();
+                /*
+                 * We add the TB in the virtual pc hash table
+                 * for the fast lookup
+                 */
+                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
+            }
+
+#ifndef CONFIG_USER_ONLY
+            /*
+             * We don't take care of direct jumps when address mapping
+             * changes in system emulation.  So it's not safe to make a
+             * direct jump to a TB spanning two pages because the mapping
+             * for the second page can change.
+             */
+            if (tb->page_addr[1] != -1) {
+                last_tb = NULL;
+            }
+#endif
+            /* See if we can patch the calling TB. */
+            if (last_tb) {
+                tb_add_jump(last_tb, tb_exit, tb);
+            }
+
             cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
+
             /* Try to align the host and virtual clocks
                if the guest is in advance */
             align_clocks(&sc, cpu);
@@ -847,23 +1071,52 @@ void tcg_exec_unrealizefn(CPUState *cpu)
 
 #ifndef CONFIG_USER_ONLY
 
-void dump_drift_info(void)
+void dump_drift_info(GString *buf)
 {
     if (!icount_enabled()) {
         return;
     }
 
-    qemu_printf("Host - Guest clock  %"PRIi64" ms\n",
-                (cpu_get_clock() - icount_get()) / SCALE_MS);
+    g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
+                           (cpu_get_clock() - icount_get()) / SCALE_MS);
     if (icount_align_option) {
-        qemu_printf("Max guest delay     %"PRIi64" ms\n",
-                    -max_delay / SCALE_MS);
-        qemu_printf("Max guest advance   %"PRIi64" ms\n",
-                    max_advance / SCALE_MS);
+        g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
+                               -max_delay / SCALE_MS);
+        g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
+                               max_advance / SCALE_MS);
     } else {
-        qemu_printf("Max guest delay     NA\n");
-        qemu_printf("Max guest advance   NA\n");
+        g_string_append_printf(buf, "Max guest delay     NA\n");
+        g_string_append_printf(buf, "Max guest advance   NA\n");
+    }
+}
+
+HumanReadableText *qmp_x_query_jit(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+
+    if (!tcg_enabled()) {
+        error_setg(errp, "JIT information is only available with accel=tcg");
+        return NULL;
     }
+
+    dump_exec_info(buf);
+    dump_drift_info(buf);
+
+    return human_readable_text_from_str(buf);
+}
+
+HumanReadableText *qmp_x_query_opcount(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+
+    if (!tcg_enabled()) {
+        error_setg(errp, "Opcode count information is only available with accel=tcg");
+        return NULL;
+    }
+
+    dump_opcount_info(buf);
+
+    return human_readable_text_from_str(buf);
 }
 
 #endif /* !CONFIG_USER_ONLY */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e7bfaac8c43..e8d3429c26d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -19,14 +19,11 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
-#include "cpu.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "exec/exec-all.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "exec/cpu_ldst.h"
 #include "exec/cputlb.h"
-#include "exec/tb-hash.h"
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
 #include "tcg/tcg.h"
@@ -37,12 +34,13 @@
 #include "qemu/atomic128.h"
 #include "exec/translate-all.h"
 #include "trace/trace-root.h"
-#include "trace/mem.h"
 #include "cheri_tagmem.h"
+#include "tb-hash.h"
 #include "internal.h"
 #ifdef CONFIG_PLUGIN
 #include "qemu/plugin-memory.h"
 #endif
+#include "tcg/tcg-ldst.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -710,8 +708,9 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
     tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 }
 
-static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
-                                       target_ulong page, unsigned bits)
+static void tlb_flush_range_locked(CPUArchState *env, int midx,
+                                   target_ulong addr, target_ulong len,
+                                   unsigned bits)
 {
     CPUTLBDesc *d = &env_tlb(env)->d[midx];
     CPUTLBDescFast *f = &env_tlb(env)->f[midx];
@@ -721,20 +720,26 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
      * If @bits is smaller than the tlb size, there may be multiple entries
      * within the TLB; otherwise all addresses that match under @mask hit
      * the same TLB entry.
-     *
      * TODO: Perhaps allow bits to be a few bits less than the size.
      * For now, just flush the entire TLB.
+     *
+     * If @len is larger than the tlb size, then it will take longer to
+     * test all of the entries in the TLB than it will to flush it all.
      */
-    if (mask < f->mask) {
+    if (mask < f->mask || len > f->mask) {
         tlb_debug("forcing full flush midx %d ("
-                  TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
-                  midx, page, mask);
+                  TARGET_FMT_lx "/" TARGET_FMT_lx "+" TARGET_FMT_lx ")\n",
+                  midx, addr, mask, len);
         tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
         return;
     }
 
-    /* Check if we need to flush due to large pages.  */
-    if ((page & d->large_page_mask) == d->large_page_addr) {
+    /*
+     * Check if we need to flush due to large pages.
+     * Because large_page_mask contains all 1's from the msb,
+     * we only need to test the end of the range.
+     */
+    if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
         tlb_debug("forcing full flush midx %d ("
                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                   midx, d->large_page_addr, d->large_page_mask);
@@ -742,85 +747,67 @@ static void tlb_flush_page_bits_locked(CPUArchState *env, int midx,
         return;
     }
 
-    if (tlb_flush_entry_mask_locked(tlb_entry(env, midx, page), page, mask)) {
-        tlb_n_used_entries_dec(env, midx);
+    for (target_ulong i = 0; i < len; i += TARGET_PAGE_SIZE) {
+        target_ulong page = addr + i;
+        CPUTLBEntry *entry = tlb_entry(env, midx, page);
+
+        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
+            tlb_n_used_entries_dec(env, midx);
+        }
+        tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
     }
-    tlb_flush_vtlb_page_mask_locked(env, midx, page, mask);
 }
 
 typedef struct {
     target_ulong addr;
+    target_ulong len;
     uint16_t idxmap;
     uint16_t bits;
-} TLBFlushPageBitsByMMUIdxData;
+} TLBFlushRangeData;
 
-static void
-tlb_flush_page_bits_by_mmuidx_async_0(CPUState *cpu,
-                                      TLBFlushPageBitsByMMUIdxData d)
+static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
+                                              TLBFlushRangeData d)
 {
     CPUArchState *env = cpu->env_ptr;
     int mmu_idx;
 
     assert_cpu_is_self(cpu);
 
-    tlb_debug("page addr:" TARGET_FMT_lx "/%u mmu_map:0x%x\n",
-              d.addr, d.bits, d.idxmap);
+    tlb_debug("range:" TARGET_FMT_lx "/%u+" TARGET_FMT_lx " mmu_map:0x%x\n",
+              d.addr, d.bits, d.len, d.idxmap);
 
     qemu_spin_lock(&env_tlb(env)->c.lock);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
         if ((d.idxmap >> mmu_idx) & 1) {
-            tlb_flush_page_bits_locked(env, mmu_idx, d.addr, d.bits);
+            tlb_flush_range_locked(env, mmu_idx, d.addr, d.len, d.bits);
         }
     }
     qemu_spin_unlock(&env_tlb(env)->c.lock);
 
-    tb_flush_jmp_cache(cpu, d.addr);
-}
-
-static bool encode_pbm_to_runon(run_on_cpu_data *out,
-                                TLBFlushPageBitsByMMUIdxData d)
-{
-    /* We need 6 bits to hold to hold @bits up to 63. */
-    if (d.idxmap <= MAKE_64BIT_MASK(0, TARGET_PAGE_BITS - 6)) {
-        *out = RUN_ON_CPU_TARGET_PTR(d.addr | (d.idxmap << 6) | d.bits);
-        return true;
+    for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
+        tb_flush_jmp_cache(cpu, d.addr + i);
     }
-    return false;
-}
-
-static TLBFlushPageBitsByMMUIdxData
-decode_runon_to_pbm(run_on_cpu_data data)
-{
-    target_ulong addr_map_bits = (target_ulong) data.target_ptr;
-    return (TLBFlushPageBitsByMMUIdxData){
-        .addr = addr_map_bits & TARGET_PAGE_MASK,
-        .idxmap = (addr_map_bits & ~TARGET_PAGE_MASK) >> 6,
-        .bits = addr_map_bits & 0x3f
-    };
 }
 
-static void tlb_flush_page_bits_by_mmuidx_async_1(CPUState *cpu,
-                                                  run_on_cpu_data runon)
+static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
+                                              run_on_cpu_data data)
 {
-    tlb_flush_page_bits_by_mmuidx_async_0(cpu, decode_runon_to_pbm(runon));
-}
-
-static void tlb_flush_page_bits_by_mmuidx_async_2(CPUState *cpu,
-                                                  run_on_cpu_data data)
-{
-    TLBFlushPageBitsByMMUIdxData *d = data.host_ptr;
-    tlb_flush_page_bits_by_mmuidx_async_0(cpu, *d);
+    TLBFlushRangeData *d = data.host_ptr;
+    tlb_flush_range_by_mmuidx_async_0(cpu, *d);
     g_free(d);
 }
 
-void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
-                                   uint16_t idxmap, unsigned bits)
+void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
+                               target_ulong len, uint16_t idxmap,
+                               unsigned bits)
 {
-    TLBFlushPageBitsByMMUIdxData d;
-    run_on_cpu_data runon;
+    TLBFlushRangeData d;
 
-    /* If all bits are significant, this devolves to tlb_flush_page. */
-    if (bits >= TARGET_LONG_BITS) {
+    /*
+     * If all bits are significant, and len is small,
+     * this devolves to tlb_flush_page.
+     */
+    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
         tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
         return;
     }
@@ -832,34 +819,38 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
 
     /* This should already be page aligned */
     d.addr = addr & TARGET_PAGE_MASK;
+    d.len = len;
     d.idxmap = idxmap;
     d.bits = bits;
 
     if (qemu_cpu_is_self(cpu)) {
-        tlb_flush_page_bits_by_mmuidx_async_0(cpu, d);
-    } else if (encode_pbm_to_runon(&runon, d)) {
-        async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
+        tlb_flush_range_by_mmuidx_async_0(cpu, d);
     } else {
-        TLBFlushPageBitsByMMUIdxData *p
-            = g_new(TLBFlushPageBitsByMMUIdxData, 1);
-
         /* Otherwise allocate a structure, freed by the worker.  */
-        *p = d;
-        async_run_on_cpu(cpu, tlb_flush_page_bits_by_mmuidx_async_2,
+        TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
+        async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1,
                          RUN_ON_CPU_HOST_PTR(p));
     }
 }
 
-void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
-                                            target_ulong addr,
-                                            uint16_t idxmap,
-                                            unsigned bits)
+void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, target_ulong addr,
+                                   uint16_t idxmap, unsigned bits)
 {
-    TLBFlushPageBitsByMMUIdxData d;
-    run_on_cpu_data runon;
+    tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
+}
+
+void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
+                                        target_ulong addr, target_ulong len,
+                                        uint16_t idxmap, unsigned bits)
+{
+    TLBFlushRangeData d;
+    CPUState *dst_cpu;
 
-    /* If all bits are significant, this devolves to tlb_flush_page. */
-    if (bits >= TARGET_LONG_BITS) {
+    /*
+     * If all bits are significant, and len is small,
+     * this devolves to tlb_flush_page.
+     */
+    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
         tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
         return;
     }
@@ -871,40 +862,45 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
 
     /* This should already be page aligned */
     d.addr = addr & TARGET_PAGE_MASK;
+    d.len = len;
     d.idxmap = idxmap;
     d.bits = bits;
 
-    if (encode_pbm_to_runon(&runon, d)) {
-        flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
-    } else {
-        CPUState *dst_cpu;
-        TLBFlushPageBitsByMMUIdxData *p;
-
-        /* Allocate a separate data block for each destination cpu.  */
-        CPU_FOREACH(dst_cpu) {
-            if (dst_cpu != src_cpu) {
-                p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
-                *p = d;
-                async_run_on_cpu(dst_cpu,
-                                 tlb_flush_page_bits_by_mmuidx_async_2,
-                                 RUN_ON_CPU_HOST_PTR(p));
-            }
+    /* Allocate a separate data block for each destination cpu.  */
+    CPU_FOREACH(dst_cpu) {
+        if (dst_cpu != src_cpu) {
+            TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
+            async_run_on_cpu(dst_cpu,
+                             tlb_flush_range_by_mmuidx_async_1,
+                             RUN_ON_CPU_HOST_PTR(p));
         }
     }
 
-    tlb_flush_page_bits_by_mmuidx_async_0(src_cpu, d);
+    tlb_flush_range_by_mmuidx_async_0(src_cpu, d);
 }
 
-void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
-                                                   target_ulong addr,
-                                                   uint16_t idxmap,
-                                                   unsigned bits)
+void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
+                                            target_ulong addr,
+                                            uint16_t idxmap, unsigned bits)
+{
+    tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
+                                       idxmap, bits);
+}
+
+void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+                                               target_ulong addr,
+                                               target_ulong len,
+                                               uint16_t idxmap,
+                                               unsigned bits)
 {
-    TLBFlushPageBitsByMMUIdxData d;
-    run_on_cpu_data runon;
+    TLBFlushRangeData d, *p;
+    CPUState *dst_cpu;
 
-    /* If all bits are significant, this devolves to tlb_flush_page. */
-    if (bits >= TARGET_LONG_BITS) {
+    /*
+     * If all bits are significant, and len is small,
+     * this devolves to tlb_flush_page.
+     */
+    if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
         tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
         return;
     }
@@ -916,32 +912,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 
     /* This should already be page aligned */
     d.addr = addr & TARGET_PAGE_MASK;
+    d.len = len;
     d.idxmap = idxmap;
     d.bits = bits;
 
-    if (encode_pbm_to_runon(&runon, d)) {
-        flush_all_helper(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1, runon);
-        async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_1,
-                              runon);
-    } else {
-        CPUState *dst_cpu;
-        TLBFlushPageBitsByMMUIdxData *p;
-
-        /* Allocate a separate data block for each destination cpu.  */
-        CPU_FOREACH(dst_cpu) {
-            if (dst_cpu != src_cpu) {
-                p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
-                *p = d;
-                async_run_on_cpu(dst_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
-                                 RUN_ON_CPU_HOST_PTR(p));
-            }
+    /* Allocate a separate data block for each destination cpu.  */
+    CPU_FOREACH(dst_cpu) {
+        if (dst_cpu != src_cpu) {
+            p = g_memdup(&d, sizeof(d));
+            async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
+                             RUN_ON_CPU_HOST_PTR(p));
         }
-
-        p = g_new(TLBFlushPageBitsByMMUIdxData, 1);
-        *p = d;
-        async_safe_run_on_cpu(src_cpu, tlb_flush_page_bits_by_mmuidx_async_2,
-                              RUN_ON_CPU_HOST_PTR(p));
     }
+
+    p = g_memdup(&d, sizeof(d));
+    async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
+                          RUN_ON_CPU_HOST_PTR(p));
+}
+
+void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+                                                   target_ulong addr,
+                                                   uint16_t idxmap,
+                                                   unsigned bits)
+{
+    tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
+                                              idxmap, bits);
 }
 
 /* update the TLBs so that writes to code in the virtual page 'addr'
@@ -1804,7 +1799,7 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
             data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
         } else {
             data->is_io = false;
-            data->v.ram.hostaddr = addr + tlbe->addend;
+            data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
         }
         return true;
     } else {
@@ -1818,18 +1813,22 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
 
 #endif
 
-/* Probe for a read-modify-write atomic operation.  Do not allow unaligned
- * operations, or io operations to proceed.  Return the host address.  */
+/*
+ * Probe for an atomic operation.  Do not allow unaligned operations,
+ * or io operations to proceed.  Return the host address.
+ *
+ * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
+ */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr)
+                               MemOpIdx oi, int size, int prot,
+                               uintptr_t retaddr)
 {
     size_t mmu_idx = get_mmuidx(oi);
-    uintptr_t index = tlb_index(env, mmu_idx, addr);
-    CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
-    target_ulong tlb_addr = tlb_addr_write(tlbe);
     MemOp mop = get_memop(oi);
     int a_bits = get_alignment_bits(mop);
-    int s_bits = mop & MO_SIZE;
+    uintptr_t index;
+    CPUTLBEntry *tlbe;
+    target_ulong tlb_addr;
     void *hostaddr;
 
     /* Adjust the given return address.  */
@@ -1843,7 +1842,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     }
 
     /* Enforce qemu required alignment.  */
-    if (unlikely(addr & ((1 << s_bits) - 1))) {
+    if (unlikely(addr & (size - 1))) {
         /* We get here if guest alignment was not requested,
            or was not enforced by cpu_unaligned_access above.
            We might widen the access and emulate, but for now
@@ -1851,15 +1850,45 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
         goto stop_the_world;
     }
 
+    index = tlb_index(env, mmu_idx, addr);
+    tlbe = tlb_entry(env, mmu_idx, addr);
+
     /* Check TLB entry and enforce page permissions.  */
-    if (!tlb_hit(tlb_addr, addr)) {
-        if (!VICTIM_TLB_HIT(addr_write, addr)) {
-            tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
-                     mmu_idx, retaddr);
-            index = tlb_index(env, mmu_idx, addr);
-            tlbe = tlb_entry(env, mmu_idx, addr);
+    if (prot & PAGE_WRITE) {
+        tlb_addr = tlb_addr_write(tlbe);
+        if (!tlb_hit(tlb_addr, addr)) {
+            if (!VICTIM_TLB_HIT(addr_write, addr)) {
+                tlb_fill(env_cpu(env), addr, size,
+                         MMU_DATA_STORE, mmu_idx, retaddr);
+                index = tlb_index(env, mmu_idx, addr);
+                tlbe = tlb_entry(env, mmu_idx, addr);
+            }
+            tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
+        }
+
+        /* Let the guest notice RMW on a write-only page.  */
+        if ((prot & PAGE_READ) &&
+            unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
+            tlb_fill(env_cpu(env), addr, size,
+                     MMU_DATA_LOAD, mmu_idx, retaddr);
+            /*
+             * Since we don't support reads and writes to different addresses,
+             * and we do have the proper page loaded for write, this shouldn't
+             * ever return.  But just in case, handle via stop-the-world.
+             */
+            goto stop_the_world;
+        }
+    } else /* if (prot & PAGE_READ) */ {
+        tlb_addr = tlbe->addr_read;
+        if (!tlb_hit(tlb_addr, addr)) {
+            if (!VICTIM_TLB_HIT(addr_write, addr)) {
+                tlb_fill(env_cpu(env), addr, size,
+                         MMU_DATA_LOAD, mmu_idx, retaddr);
+                index = tlb_index(env, mmu_idx, addr);
+                tlbe = tlb_entry(env, mmu_idx, addr);
+            }
+            tlb_addr = tlbe->addr_read & ~TLB_INVALID_MASK;
         }
-        tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
     }
 
     /* Notice an IO access or a needs-MMU-lookup access */
@@ -1869,20 +1898,10 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
         goto stop_the_world;
     }
 
-    /* Let the guest notice RMW on a write-only page.  */
-    if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
-        tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
-                 mmu_idx, retaddr);
-        /* Since we don't support reads and writes to different addresses,
-           and we do have the proper page loaded for write, this shouldn't
-           ever return.  But just in case, handle via stop-the-world.  */
-        goto stop_the_world;
-    }
-
     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
 
     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1 << s_bits,
+        notdirty_write(env_cpu(env), addr, size,
                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
     }
 
@@ -1892,6 +1911,25 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     cpu_loop_exit_atomic(env_cpu(env), retaddr);
 }
 
+/*
+ * Verify that we have passed the correct MemOp to the correct function.
+ *
+ * In the case of the helper_*_mmu functions, we will have done this by
+ * using the MemOp to look up the helper during code generation.
+ *
+ * In the case of the cpu_*_mmu functions, this is up to the caller.
+ * We could present one function to target code, and dispatch based on
+ * the MemOp, but so far we have worked hard to avoid an indirect function
+ * call along the memory path.
+ */
+static void validate_memop(MemOpIdx oi, MemOp expected)
+{
+#ifdef CONFIG_DEBUG_TCG
+    MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
+    assert(have == expected);
+#endif
+}
+
 /*
  * Load Helpers
  *
@@ -1902,7 +1940,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
  */
 
 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
-                                TCGMemOpIdx oi, uintptr_t retaddr);
+                                MemOpIdx oi, uintptr_t retaddr);
 
 static inline uint64_t QEMU_ALWAYS_INLINE
 load_memop(const void *haddr, MemOp op)
@@ -1964,10 +2002,10 @@ static void check_address_space_wrap(CPUArchState *env, target_ulong addr,
 #endif
 
 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr);
+                              MemOpIdx oi, uintptr_t retaddr);
 
 static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
             FullLoadHelper *full_load)
 {
@@ -2103,79 +2141,86 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
  */
 
 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
+                              MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_UB);
     return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
 }
 
 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
-                                     TCGMemOpIdx oi, uintptr_t retaddr)
+                                     MemOpIdx oi, uintptr_t retaddr)
 {
     return full_ldub_mmu(env, addr, oi, retaddr);
 }
 
 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                 TCGMemOpIdx oi, uintptr_t retaddr)
+                                 MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_LEUW);
     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
                        full_le_lduw_mmu);
 }
 
 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return full_le_lduw_mmu(env, addr, oi, retaddr);
 }
 
 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                 TCGMemOpIdx oi, uintptr_t retaddr)
+                                 MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEUW);
     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
                        full_be_lduw_mmu);
 }
 
 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return full_be_lduw_mmu(env, addr, oi, retaddr);
 }
 
 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                 TCGMemOpIdx oi, uintptr_t retaddr)
+                                 MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_LEUL);
     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
                        full_le_ldul_mmu);
 }
 
 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return full_le_ldul_mmu(env, addr, oi, retaddr);
 }
 
 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                 TCGMemOpIdx oi, uintptr_t retaddr)
+                                 MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEUL);
     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
                        full_be_ldul_mmu);
 }
 
 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return full_be_ldul_mmu(env, addr, oi, retaddr);
 }
 
 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           TCGMemOpIdx oi, uintptr_t retaddr)
+                           MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_LEQ);
     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
                        helper_le_ldq_mmu);
 }
 
 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           TCGMemOpIdx oi, uintptr_t retaddr)
+                           MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEQ);
     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
                        helper_be_ldq_mmu);
 }
@@ -2187,31 +2232,31 @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
 
 
 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
-                                     TCGMemOpIdx oi, uintptr_t retaddr)
+                                     MemOpIdx oi, uintptr_t retaddr)
 {
     return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
 }
 
 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
 }
 
 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
 }
 
 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
 }
 
 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr)
+                                    MemOpIdx oi, uintptr_t retaddr)
 {
     return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
 }
@@ -2224,238 +2269,97 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
 /*
  * Log a target memory load via cpu_ldst.
  */
-#define log_instr_load_int(env, addr, value, op)        \
-    helper_qemu_log_instr_load64(env, addr, value, op)
+#define log_instr_load_int(env, addr, value, oi)        \
+    helper_qemu_log_instr_load64(env, addr, value, oi)
 #else
-#define log_instr_load_int(env, addr, val, op) ((void)0)
+#define log_instr_load_int(env, addr, val, oi) ((void)0)
 #endif
 
 static inline uint64_t cpu_load_helper_no_log(CPUArchState *env, abi_ptr addr,
-                                              int mmu_idx, uintptr_t retaddr,
-                                              MemOp op,
+                                              MemOpIdx oi, uintptr_t retaddr,
                                               FullLoadHelper *full_load)
 {
-    uint16_t meminfo;
-    TCGMemOpIdx oi;
     uint64_t ret;
 
-    meminfo = trace_mem_get_info(op, mmu_idx, false);
-    trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
-
-    op &= ~MO_SIGN;
-    oi = make_memop_idx(op, mmu_idx);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
     ret = full_load(env, addr, oi, retaddr);
-
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
-
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
 static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
-                                       int mmu_idx, uintptr_t retaddr, MemOp op,
+                                       MemOpIdx oi, uintptr_t retaddr,
                                        FullLoadHelper *full_load)
 {
-    uint64_t ret =
-        cpu_load_helper_no_log(env, addr, mmu_idx, retaddr, op, full_load);
-    log_instr_load_int(env, addr, ret, op);
+    uint64_t ret = cpu_load_helper_no_log(env, addr, oi, retaddr, full_load);
+    log_instr_load_int(env, addr, ret, oi);
     return ret;
 }
 
-uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_UB, full_ldub_mmu);
-}
-
-int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra)
-{
-    return (int8_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_SB,
-                                   full_ldub_mmu);
-}
-
-uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUW, full_be_lduw_mmu);
-}
-
-int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra)
-{
-    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_BESW,
-                                    full_be_lduw_mmu);
-}
-
-uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEUL, full_be_ldul_mmu);
-}
-
-uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_BEQ, helper_be_ldq_mmu);
-}
-
-uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUW, full_le_lduw_mmu);
-}
-
-int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra)
-{
-    return (int16_t)cpu_load_helper(env, addr, mmu_idx, ra, MO_LESW,
-                                    full_le_lduw_mmu);
-}
-
-uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEUL, full_le_ldul_mmu);
-}
-
-uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra)
-{
-    return cpu_load_helper(env, addr, mmu_idx, ra, MO_LEQ, helper_le_ldq_mmu);
-}
-
-uint32_t cpu_ldub_data_ra(CPUArchState *env, target_ulong ptr,
-                          uintptr_t retaddr)
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_ldub_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
 }
 
-int cpu_ldsb_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_ldsb_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
 }
 
-uint32_t cpu_lduw_be_data_ra(CPUArchState *env, target_ulong ptr,
-                             uintptr_t retaddr)
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_lduw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
 }
 
-int cpu_ldsw_be_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_ldsw_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, MO_BEQ, helper_be_ldq_mmu);
 }
 
-uint32_t cpu_ldl_be_data_ra(CPUArchState *env, target_ulong ptr,
-                            uintptr_t retaddr)
+uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_ldl_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
 }
 
-uint64_t cpu_ldq_be_data_ra(CPUArchState *env, target_ulong ptr,
-                            uintptr_t retaddr)
+uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_ldq_be_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
 }
 
-uint32_t cpu_lduw_le_data_ra(CPUArchState *env, target_ulong ptr,
-                             uintptr_t retaddr)
+uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    return cpu_lduw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
-}
-
-int cpu_ldsw_le_data_ra(CPUArchState *env, target_ulong ptr, uintptr_t retaddr)
-{
-    return cpu_ldsw_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
-}
-
-uint32_t cpu_ldl_le_data_ra(CPUArchState *env, target_ulong ptr,
-                            uintptr_t retaddr)
-{
-    return cpu_ldl_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
-}
-
-uint64_t cpu_ldq_le_data_ra(CPUArchState *env, target_ulong ptr,
-                            uintptr_t retaddr)
-{
-    return cpu_ldq_le_mmuidx_ra(env, ptr, cpu_mmu_index(env, false), retaddr);
+    return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
 }
 
 #ifdef TARGET_CHERI
 /*
- * TODO(am2419): Ugly hack to avoid logging memory accesses that load capability
+ * Hack to avoid logging memory accesses that load capability
  * components as normal memory accesses. The caller is responsible for logging.
  */
-target_ulong cpu_ld_cap_word_ra(CPUArchState *env, target_ulong ptr,
+target_ulong cpu_ld_cap_word_ra(CPUArchState *env, abi_ptr ptr,
                                 uintptr_t retaddr)
 {
     FullLoadHelper *full_load;
-    MemOp op;
+    MemOpIdx oi;
 #if TARGET_LONG_BITS == 32
-    op = MO_TEUW;
+    oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, false));
     full_load = MO_TE == MO_LE ? helper_le_lduw_mmu : helper_be_lduw_mmu;
 #elif TARGET_LONG_BITS == 64
-    op = MO_TEQ;
+    oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, false));
     full_load = MO_TE == MO_LE ? helper_le_ldq_mmu : helper_be_ldq_mmu;
 #else
 #error "Unhandled target long width"
 #endif
-    return cpu_load_helper_no_log(
-        env, ptr, cpu_mmu_index(env, false), retaddr, MO_TEQ,
-        MO_TE == MO_LE ? helper_le_ldq_mmu : helper_be_ldq_mmu);
+    return cpu_load_helper_no_log(env, ptr, oi, retaddr, full_load);
 }
 #endif
 
-uint32_t cpu_ldub_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldub_data_ra(env, ptr, 0);
-}
-
-int cpu_ldsb_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldsb_data_ra(env, ptr, 0);
-}
-
-uint32_t cpu_lduw_be_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_lduw_be_data_ra(env, ptr, 0);
-}
-
-int cpu_ldsw_be_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldsw_be_data_ra(env, ptr, 0);
-}
-
-uint32_t cpu_ldl_be_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldl_be_data_ra(env, ptr, 0);
-}
-
-uint64_t cpu_ldq_be_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldq_be_data_ra(env, ptr, 0);
-}
-
-uint32_t cpu_lduw_le_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_lduw_le_data_ra(env, ptr, 0);
-}
-
-int cpu_ldsw_le_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldsw_le_data_ra(env, ptr, 0);
-}
-
-uint32_t cpu_ldl_le_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldl_le_data_ra(env, ptr, 0);
-}
-
-uint64_t cpu_ldq_le_data(CPUArchState *env, target_ulong ptr)
-{
-    return cpu_ldq_le_data_ra(env, ptr, 0);
-}
-
 /*
  * Store Helpers
  */
@@ -2495,6 +2399,9 @@ store_memop(void *haddr, uint64_t val, MemOp op)
     }
 }
 
+static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                         MemOpIdx oi, uintptr_t retaddr);
+
 static void __attribute__((noinline))
 store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
                        uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
@@ -2504,7 +2411,7 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
     uintptr_t index, index2;
     CPUTLBEntry *entry, *entry2;
     target_ulong page2, tlb_addr, tlb_addr2;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     size_t size2;
     int i;
 
@@ -2561,20 +2468,20 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
         for (i = 0; i < size; ++i) {
             /* Big-endian extract.  */
             uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
-            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
+            full_stb_mmu(env, addr + i, val8, oi, retaddr);
         }
     } else {
         for (i = 0; i < size; ++i) {
             /* Little-endian extract.  */
             uint8_t val8 = val >> (i * 8);
-            helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
+            full_stb_mmu(env, addr + i, val8, oi, retaddr);
         }
     }
 }
 
 static inline void QEMU_ALWAYS_INLINE
 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
-             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
+             MemOpIdx oi, uintptr_t retaddr, MemOp op)
 {
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
@@ -2670,46 +2577,83 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
     store_memop(haddr, val, op);
 }
 
-void __attribute__((noinline))
-helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
-                   TCGMemOpIdx oi, uintptr_t retaddr)
+static void __attribute__((noinline))
+full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+             MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_UB);
     store_helper(env, addr, val, oi, retaddr, MO_UB);
 }
 
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                        MemOpIdx oi, uintptr_t retaddr)
+{
+    full_stb_mmu(env, addr, val, oi, retaddr);
+}
+
+static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                            MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_LEUW);
     store_helper(env, addr, val, oi, retaddr, MO_LEUW);
 }
 
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       MemOpIdx oi, uintptr_t retaddr)
+{
+    full_le_stw_mmu(env, addr, val, oi, retaddr);
+}
+
+static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                            MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEUW);
     store_helper(env, addr, val, oi, retaddr, MO_BEUW);
 }
 
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       MemOpIdx oi, uintptr_t retaddr)
 {
+    full_be_stw_mmu(env, addr, val, oi, retaddr);
+}
+
+static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                            MemOpIdx oi, uintptr_t retaddr)
+{
+    validate_memop(oi, MO_LEUL);
     store_helper(env, addr, val, oi, retaddr, MO_LEUL);
 }
 
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       MemOpIdx oi, uintptr_t retaddr)
+{
+    full_le_stl_mmu(env, addr, val, oi, retaddr);
+}
+
+static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                            MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEUL);
     store_helper(env, addr, val, oi, retaddr, MO_BEUL);
 }
 
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       MemOpIdx oi, uintptr_t retaddr)
+{
+    full_be_stl_mmu(env, addr, val, oi, retaddr);
+}
+
 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+                       MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_LEQ);
     store_helper(env, addr, val, oi, retaddr, MO_LEQ);
 }
 
 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr)
+                       MemOpIdx oi, uintptr_t retaddr)
 {
+    validate_memop(oi, MO_BEQ);
     store_helper(env, addr, val, oi, retaddr, MO_BEQ);
 }
 
@@ -2727,177 +2671,102 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 #define log_instr_store_int(env, addr, val, op) ((void)0)
 #endif
 
-static inline void QEMU_ALWAYS_INLINE
-cpu_store_helper_no_log(CPUArchState *env, target_ulong addr, uint64_t val,
-                        int mmu_idx, uintptr_t retaddr, MemOp op)
-{
-    TCGMemOpIdx oi;
-    uint16_t meminfo;
-
-    meminfo = trace_mem_get_info(op, mmu_idx, true);
-    trace_guest_mem_before_exec(env_cpu(env), addr, meminfo);
-
-    oi = make_memop_idx(op, mmu_idx);
-    store_helper(env, addr, val, oi, retaddr, op);
-
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, meminfo);
-}
-
-static inline void QEMU_ALWAYS_INLINE
-cpu_store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
-                 int mmu_idx, uintptr_t retaddr, MemOp op)
-{
-    cpu_store_helper_no_log(env, addr, val, mmu_idx, retaddr, op);
-    log_instr_store_int(env, addr, val, op);
-}
-
-void cpu_stb_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr)
-{
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_UB);
-}
-
-void cpu_stw_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr)
-{
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUW);
-}
+typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
+                             uint64_t val, MemOpIdx oi, uintptr_t retaddr);
 
-void cpu_stl_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr)
+static inline void cpu_store_helper_no_log(CPUArchState *env, target_ulong addr,
+                                           uint64_t val, MemOpIdx oi,
+                                           uintptr_t ra,
+                                           FullStoreHelper *full_store)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEUL);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    full_store(env, addr, val, oi, ra);
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stq_be_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr)
+static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
+                                    uint64_t val, MemOpIdx oi, uintptr_t ra,
+                                    FullStoreHelper *full_store)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_BEQ);
+    cpu_store_helper_no_log(env, addr, val, oi, ra, full_store);
+    log_instr_store_int(env, addr, val, oi);
 }
 
-void cpu_stw_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr)
+void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                 MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUW);
+    cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
 }
 
-void cpu_stl_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr)
+void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEUL);
+    cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
 }
 
-void cpu_stq_le_mmuidx_ra(CPUArchState *env, target_ulong addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr)
+void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_store_helper(env, addr, val, mmu_idx, retaddr, MO_LEQ);
+    cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
 }
 
-void cpu_stb_data_ra(CPUArchState *env, target_ulong ptr,
-                     uint32_t val, uintptr_t retaddr)
+void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_stb_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
 }
 
-void cpu_stw_be_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint32_t val, uintptr_t retaddr)
+void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_stw_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
 }
 
-void cpu_stl_be_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint32_t val, uintptr_t retaddr)
+void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_stl_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
 }
 
-void cpu_stq_be_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint64_t val, uintptr_t retaddr)
+void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t retaddr)
 {
-    cpu_stq_be_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
-}
-
-void cpu_stw_le_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint32_t val, uintptr_t retaddr)
-{
-    cpu_stw_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
-}
-
-void cpu_stl_le_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint32_t val, uintptr_t retaddr)
-{
-    cpu_stl_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
-}
-
-void cpu_stq_le_data_ra(CPUArchState *env, target_ulong ptr,
-                        uint64_t val, uintptr_t retaddr)
-{
-    cpu_stq_le_mmuidx_ra(env, ptr, val, cpu_mmu_index(env, false), retaddr);
+    cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
 }
 
 #ifdef TARGET_CHERI
 /*
- * TODO(am2419): Ugly hack to avoid logging memory accesses that store capability
+ * Hack to avoid logging memory accesses that store capability
  * components as normal memory accesses. The caller is responsible for logging.
  */
 void cpu_st_cap_word_ra(CPUArchState *env, target_ulong ptr,
                         target_ulong val, uintptr_t retaddr)
 {
-    MemOp op;
+    FullStoreHelper *full_store;
+    MemOpIdx oi;
 #if TARGET_LONG_BITS == 32
-    op = MO_TEUW;
+    oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, false));
+    full_store = MO_TE == MO_LE ? full_le_stw_mmu : full_be_stw_mmu;
 #elif TARGET_LONG_BITS == 64
-    op = MO_TEQ;
+    oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, false));
+    full_store = MO_TE == MO_LE ? helper_le_stq_mmu : helper_be_stq_mmu;
 #else
 #error "Unhandled target long width"
 #endif
-    cpu_store_helper_no_log(env, ptr, val, cpu_mmu_index(env, false), retaddr, op);
+    cpu_store_helper_no_log(env, ptr, val, oi, retaddr, full_store);
 }
 #endif
 
-void cpu_stb_data(CPUArchState *env, target_ulong ptr, uint32_t val)
-{
-    cpu_stb_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stw_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
-{
-    cpu_stw_be_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stl_be_data(CPUArchState *env, target_ulong ptr, uint32_t val)
-{
-    cpu_stl_be_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stq_be_data(CPUArchState *env, target_ulong ptr, uint64_t val)
-{
-    cpu_stq_be_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stw_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
-{
-    cpu_stw_le_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stl_le_data(CPUArchState *env, target_ulong ptr, uint32_t val)
-{
-    cpu_stl_le_data_ra(env, ptr, val, 0);
-}
-
-void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
-{
-    cpu_stq_le_data_ra(env, ptr, val, 0);
-}
+#include "ldst_common.c.inc"
 
-/* First set of helpers allows passing in of OI and RETADDR.  This makes
-   them callable from other helpers.  */
+/*
+ * First set of functions passes in OI and RETADDR.
+ * This makes them callable from other helpers.
+ */
 
-#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
 #define ATOMIC_NAME(X) \
-    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
-#define ATOMIC_MMU_DECLS
-#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
+    glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
+
 #define ATOMIC_MMU_CLEANUP
 #define ATOMIC_MMU_IDX   get_mmuidx(oi)
 
@@ -2922,76 +2791,52 @@ void cpu_stq_le_data(CPUArchState *env, target_ulong ptr, uint64_t val)
 #include "atomic_template.h"
 #endif
 
-/* Second set of helpers are directly callable from TCG as helpers.  */
-
-#undef EXTRA_ARGS
-#undef ATOMIC_NAME
-#undef ATOMIC_MMU_LOOKUP
-#define EXTRA_ARGS         , TCGMemOpIdx oi
-#define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
-#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
-
-#define DATA_SIZE 1
-#include "atomic_template.h"
-
-#define DATA_SIZE 2
-#include "atomic_template.h"
-
-#define DATA_SIZE 4
-#include "atomic_template.h"
-
-#ifdef CONFIG_ATOMIC64
-#define DATA_SIZE 8
-#include "atomic_template.h"
-#endif
-#undef ATOMIC_MMU_IDX
-
 /* Code access functions.  */
 
 static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr)
+                               MemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
 }
 
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
 {
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
+    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
     return full_ldub_code(env, addr, oi, 0);
 }
 
 static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
-                               TCGMemOpIdx oi, uintptr_t retaddr)
+                               MemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
 }
 
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
 {
-    TCGMemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
+    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
     return full_lduw_code(env, addr, oi, 0);
 }
 
 static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
+                              MemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
 }
 
 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
 {
-    TCGMemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
+    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
     return full_ldl_code(env, addr, oi, 0);
 }
 
 static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
-                              TCGMemOpIdx oi, uintptr_t retaddr)
+                              MemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_TEQ, true, full_ldq_code);
 }
 
 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
 {
-    TCGMemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
+    MemOpIdx oi = make_memop_idx(MO_TEQ, cpu_mmu_index(env, true));
     return full_ldq_code(env, addr, oi, 0);
 }
diff --git a/accel/tcg/hmp.c b/accel/tcg/hmp.c
new file mode 100644
index 00000000000..d2ea3526553
--- /dev/null
+++ b/accel/tcg/hmp.c
@@ -0,0 +1,15 @@
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "exec/exec-all.h"
+#include "monitor/monitor.h"
+#include "sysemu/tcg.h"
+
+static void hmp_tcg_register(void)
+{
+    monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
+    monitor_register_hmp_info_hrt("opcount", qmp_x_query_opcount);
+}
+
+type_init(hmp_tcg_register);
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 301d9682ee3..d1c4e3ac330 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -12,9 +12,12 @@
 #include "exec/exec-all.h"
 
 TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
-                              target_ulong cs_base, target_ulong cs_top,
-                              uint32_t cheri_flags, uint32_t flags, int cflags);
+                              target_ulong cs_base, target_ulong pcc_base,
+                              target_ulong pcc_top, uint32_t cheri_flags,
+                              uint32_t flags, int cflags);
 
 void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
+void page_init(void);
+void tb_htable_init(void);
 
 #endif /* ACCEL_TCG_INTERNAL_H */
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
new file mode 100644
index 00000000000..bfefb275e7e
--- /dev/null
+++ b/accel/tcg/ldst_common.c.inc
@@ -0,0 +1,307 @@
+/*
+ * Routines common to user and system emulation of load/store.
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                            int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    return cpu_ldb_mmu(env, addr, oi, ra);
+}
+
+int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                       int mmu_idx, uintptr_t ra)
+{
+    return (int8_t)cpu_ldub_mmuidx_ra(env, addr, mmu_idx, ra);
+}
+
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
+    return cpu_ldw_be_mmu(env, addr, oi, ra);
+}
+
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
+{
+    return (int16_t)cpu_lduw_be_mmuidx_ra(env, addr, mmu_idx, ra);
+}
+
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
+    return cpu_ldl_be_mmu(env, addr, oi, ra);
+}
+
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEQ | MO_UNALN, mmu_idx);
+    return cpu_ldq_be_mmu(env, addr, oi, ra);
+}
+
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                               int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
+    return cpu_ldw_le_mmu(env, addr, oi, ra);
+}
+
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                          int mmu_idx, uintptr_t ra)
+{
+    return (int16_t)cpu_lduw_le_mmuidx_ra(env, addr, mmu_idx, ra);
+}
+
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
+    return cpu_ldl_le_mmu(env, addr, oi, ra);
+}
+
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
+                              int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEQ | MO_UNALN, mmu_idx);
+    return cpu_ldq_le_mmu(env, addr, oi, ra);
+}
+
+void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                       int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    cpu_stb_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
+    cpu_stw_be_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
+    cpu_stl_be_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_BEQ | MO_UNALN, mmu_idx);
+    cpu_stq_be_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
+    cpu_stw_le_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
+    cpu_stl_le_mmu(env, addr, val, oi, ra);
+}
+
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
+                          int mmu_idx, uintptr_t ra)
+{
+    MemOpIdx oi = make_memop_idx(MO_LEQ | MO_UNALN, mmu_idx);
+    cpu_stq_le_mmu(env, addr, val, oi, ra);
+}
+
+/*--------------------------*/
+
+uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_ldub_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return (int8_t)cpu_ldub_data_ra(env, addr, ra);
+}
+
+uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_lduw_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return (int16_t)cpu_lduw_be_data_ra(env, addr, ra);
+}
+
+uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_ldl_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_ldq_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_lduw_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return (int16_t)cpu_lduw_le_data_ra(env, addr, ra);
+}
+
+uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_ldl_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
+{
+    return cpu_ldq_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr,
+                     uint32_t val, uintptr_t ra)
+{
+    cpu_stb_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint32_t val, uintptr_t ra)
+{
+    cpu_stw_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint32_t val, uintptr_t ra)
+{
+    cpu_stl_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint64_t val, uintptr_t ra)
+{
+    cpu_stq_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint32_t val, uintptr_t ra)
+{
+    cpu_stw_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint32_t val, uintptr_t ra)
+{
+    cpu_stl_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr,
+                        uint64_t val, uintptr_t ra)
+{
+    cpu_stq_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
+}
+
+/*--------------------------*/
+
+uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_ldub_data_ra(env, addr, 0);
+}
+
+int cpu_ldsb_data(CPUArchState *env, abi_ptr addr)
+{
+    return (int8_t)cpu_ldub_data(env, addr);
+}
+
+uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_lduw_be_data_ra(env, addr, 0);
+}
+
+int cpu_ldsw_be_data(CPUArchState *env, abi_ptr addr)
+{
+    return (int16_t)cpu_lduw_be_data(env, addr);
+}
+
+uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_ldl_be_data_ra(env, addr, 0);
+}
+
+uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_ldq_be_data_ra(env, addr, 0);
+}
+
+uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_lduw_le_data_ra(env, addr, 0);
+}
+
+int cpu_ldsw_le_data(CPUArchState *env, abi_ptr addr)
+{
+    return (int16_t)cpu_lduw_le_data(env, addr);
+}
+
+uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_ldl_le_data_ra(env, addr, 0);
+}
+
+uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr addr)
+{
+    return cpu_ldq_le_data_ra(env, addr, 0);
+}
+
+void cpu_stb_data(CPUArchState *env, abi_ptr addr, uint32_t val)
+{
+    cpu_stb_data_ra(env, addr, val, 0);
+}
+
+void cpu_stw_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
+{
+    cpu_stw_be_data_ra(env, addr, val, 0);
+}
+
+void cpu_stl_be_data(CPUArchState *env, abi_ptr addr, uint32_t val)
+{
+    cpu_stl_be_data_ra(env, addr, val, 0);
+}
+
+void cpu_stq_be_data(CPUArchState *env, abi_ptr addr, uint64_t val)
+{
+    cpu_stq_be_data_ra(env, addr, val, 0);
+}
+
+void cpu_stw_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
+{
+    cpu_stw_le_data_ra(env, addr, val, 0);
+}
+
+void cpu_stl_le_data(CPUArchState *env, abi_ptr addr, uint32_t val)
+{
+    cpu_stl_le_data_ra(env, addr, val, 0);
+}
+
+void cpu_stq_le_data(CPUArchState *env, abi_ptr addr, uint64_t val)
+{
+    cpu_stq_le_data_ra(env, addr, val, 0);
+}
diff --git a/accel/tcg/log_instr.c b/accel/tcg/log_instr.c
index edffa5d87a0..445505612d2 100644
--- a/accel/tcg/log_instr.c
+++ b/accel/tcg/log_instr.c
@@ -992,7 +992,7 @@ void qemu_log_instr_cap_int(CPUArchState *env, const char *reg_name,
 #endif
 
 static inline void qemu_log_instr_mem_int(CPUArchState *env, target_ulong addr,
-                                          int flags, TCGMemOpIdx oi,
+                                          int flags, MemOpIdx oi,
                                           target_ulong value)
 {
     cpu_log_instr_info_t *iinfo = get_cpu_log_instr_info(env);
@@ -1005,13 +1005,13 @@ static inline void qemu_log_instr_mem_int(CPUArchState *env, target_ulong addr,
     g_array_append_val(iinfo->mem, m);
 }
 
-void qemu_log_instr_ld_int(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+void qemu_log_instr_ld_int(CPUArchState *env, target_ulong addr, MemOpIdx oi,
                            target_ulong value)
 {
     qemu_log_instr_mem_int(env, addr, LMI_LD, oi, value);
 }
 
-void qemu_log_instr_st_int(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+void qemu_log_instr_st_int(CPUArchState *env, target_ulong addr, MemOpIdx oi,
                            target_ulong value)
 {
     qemu_log_instr_mem_int(env, addr, LMI_ST, oi, value);
@@ -1616,28 +1616,28 @@ void helper_qemu_log_instr_commit(CPUArchState *env)
 }
 
 void helper_qemu_log_instr_load64(CPUArchState *env, target_ulong addr,
-                                  uint64_t value, TCGMemOpIdx oi)
+                                  uint64_t value, MemOpIdx oi)
 {
     if (qemu_log_instr_enabled(env))
         qemu_log_instr_mem_int(env, addr, LMI_LD, oi, value);
 }
 
 void helper_qemu_log_instr_store64(CPUArchState *env, target_ulong addr,
-                                   uint64_t value, TCGMemOpIdx oi)
+                                   uint64_t value, MemOpIdx oi)
 {
     if (qemu_log_instr_enabled(env))
         qemu_log_instr_mem_int(env, addr, LMI_ST, oi, value);
 }
 
 void helper_qemu_log_instr_load32(CPUArchState *env, target_ulong addr,
-                                  uint32_t value, TCGMemOpIdx oi)
+                                  uint32_t value, MemOpIdx oi)
 {
     if (qemu_log_instr_enabled(env))
         qemu_log_instr_mem_int(env, addr, LMI_LD, oi, (uint64_t)value);
 }
 
 void helper_qemu_log_instr_store32(CPUArchState *env, target_ulong addr,
-                                   uint32_t value, TCGMemOpIdx oi)
+                                   uint32_t value, MemOpIdx oi)
 {
     if (qemu_log_instr_enabled(env))
         qemu_log_instr_mem_int(env, addr, LMI_ST, oi, (uint64_t)value);
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 7b1ba57a7b1..3e2f52c939a 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -10,14 +10,18 @@ tcg_ss.add(files(
 ))
 tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
 tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
-tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl])
+tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 
 specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
   'cputlb.c',
+  'hmp.c',
+))
+
+tcg_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
   'tcg-accel-ops.c',
   'tcg-accel-ops-mttcg.c',
   'tcg-accel-ops-icount.c',
-  'tcg-accel-ops-rr.c'
+  'tcg-accel-ops-rr.c',
 ))
 specific_ss.add(when: ['CONFIG_TCG_LOG_INSTR', 'CONFIG_TCG'], if_true: files('log_instr.c'))
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index c3dc3effe7e..22d95fe1c35 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -43,10 +43,8 @@
  * CPU's index into a TCG temp, since the first callback did it already.
  */
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
-#include "trace/mem.h"
 #include "exec/exec-all.h"
 #include "exec/plugin-gen.h"
 #include "exec/translator.h"
@@ -161,15 +159,10 @@ static void gen_empty_mem_helper(void)
     tcg_temp_free_ptr(ptr);
 }
 
-static inline
-void gen_plugin_cb_start(enum plugin_gen_from from,
-                         enum plugin_gen_cb type, unsigned wr)
+static void gen_plugin_cb_start(enum plugin_gen_from from,
+                                enum plugin_gen_cb type, unsigned wr)
 {
-    TCGOp *op;
-
     tcg_gen_plugin_cb_start(from, type, wr);
-    op = tcg_last_op();
-    QSIMPLEQ_INSERT_TAIL(&tcg_ctx->plugin_ops, op, plugin_link);
 }
 
 static void gen_wrapped(enum plugin_gen_from from,
@@ -180,7 +173,7 @@ static void gen_wrapped(enum plugin_gen_from from,
     tcg_gen_plugin_cb_end();
 }
 
-static inline void plugin_gen_empty_callback(enum plugin_gen_from from)
+static void plugin_gen_empty_callback(enum plugin_gen_from from)
 {
     switch (from) {
     case PLUGIN_GEN_AFTER_INSN:
@@ -213,9 +206,9 @@ static void gen_mem_wrapped(enum plugin_gen_cb type,
                             const union mem_gen_fn *f, TCGv addr,
                             uint32_t info, bool is_mem)
 {
-    int wr = !!(info & TRACE_MEM_ST);
+    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
 
-    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, wr);
+    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
     if (is_mem) {
         f->mem_fn(addr, info);
     } else {
@@ -386,7 +379,7 @@ static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
 }
 
 static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
-                        void *func, unsigned tcg_flags, int *cb_idx)
+                        void *func, int *cb_idx)
 {
     /* copy all ops until the call */
     do {
@@ -413,7 +406,7 @@ static TCGOp *copy_call(TCGOp **begin_op, TCGOp *op, void *empty_func,
         tcg_debug_assert(i < MAX_OPC_PARAM_ARGS);
     }
     op->args[*cb_idx] = (uintptr_t)func;
-    op->args[*cb_idx + 1] = tcg_flags;
+    op->args[*cb_idx + 1] = (*begin_op)->args[*cb_idx + 1];
 
     return op;
 }
@@ -440,7 +433,7 @@ static TCGOp *append_udata_cb(const struct qemu_plugin_dyn_cb *cb,
 
     /* call */
     op = copy_call(&begin_op, op, HELPER(plugin_vcpu_udata_cb),
-                   cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
+                   cb->f.vcpu_udata, cb_idx);
 
     return op;
 }
@@ -491,7 +484,7 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
     if (type == PLUGIN_GEN_CB_MEM) {
         /* call */
         op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb),
-                       cb->f.vcpu_udata, cb->tcg_flags, cb_idx);
+                       cb->f.vcpu_udata, cb_idx);
     }
 
     return op;
@@ -514,9 +507,8 @@ static bool op_rw(const TCGOp *op, const struct qemu_plugin_dyn_cb *cb)
     return !!(cb->rw & (w + 1));
 }
 
-static inline
-void inject_cb_type(const GArray *cbs, TCGOp *begin_op, inject_fn inject,
-                    op_ok_fn ok)
+static void inject_cb_type(const GArray *cbs, TCGOp *begin_op,
+                           inject_fn inject, op_ok_fn ok)
 {
     TCGOp *end_op;
     TCGOp *op;
@@ -710,62 +702,6 @@ static void plugin_gen_disable_mem_helper(const struct qemu_plugin_tb *ptb,
     inject_mem_disable_helper(insn, begin_op);
 }
 
-static void plugin_inject_cb(const struct qemu_plugin_tb *ptb, TCGOp *begin_op,
-                             int insn_idx)
-{
-    enum plugin_gen_from from = begin_op->args[0];
-    enum plugin_gen_cb type = begin_op->args[1];
-
-    switch (from) {
-    case PLUGIN_GEN_FROM_TB:
-        switch (type) {
-        case PLUGIN_GEN_CB_UDATA:
-            plugin_gen_tb_udata(ptb, begin_op);
-            return;
-        case PLUGIN_GEN_CB_INLINE:
-            plugin_gen_tb_inline(ptb, begin_op);
-            return;
-        default:
-            g_assert_not_reached();
-        }
-    case PLUGIN_GEN_FROM_INSN:
-        switch (type) {
-        case PLUGIN_GEN_CB_UDATA:
-            plugin_gen_insn_udata(ptb, begin_op, insn_idx);
-            return;
-        case PLUGIN_GEN_CB_INLINE:
-            plugin_gen_insn_inline(ptb, begin_op, insn_idx);
-            return;
-        case PLUGIN_GEN_ENABLE_MEM_HELPER:
-            plugin_gen_enable_mem_helper(ptb, begin_op, insn_idx);
-            return;
-        default:
-            g_assert_not_reached();
-        }
-    case PLUGIN_GEN_FROM_MEM:
-        switch (type) {
-        case PLUGIN_GEN_CB_MEM:
-            plugin_gen_mem_regular(ptb, begin_op, insn_idx);
-            return;
-        case PLUGIN_GEN_CB_INLINE:
-            plugin_gen_mem_inline(ptb, begin_op, insn_idx);
-            return;
-        default:
-            g_assert_not_reached();
-        }
-    case PLUGIN_GEN_AFTER_INSN:
-        switch (type) {
-        case PLUGIN_GEN_DISABLE_MEM_HELPER:
-            plugin_gen_disable_mem_helper(ptb, begin_op, insn_idx);
-            return;
-        default:
-            g_assert_not_reached();
-        }
-    default:
-        g_assert_not_reached();
-    }
-}
-
 /* #define DEBUG_PLUGIN_GEN_OPS */
 static void pr_ops(void)
 {
@@ -823,21 +759,95 @@ static void pr_ops(void)
 static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
 {
     TCGOp *op;
-    int insn_idx;
+    int insn_idx = -1;
 
     pr_ops();
-    insn_idx = -1;
-    QSIMPLEQ_FOREACH(op, &tcg_ctx->plugin_ops, plugin_link) {
-        enum plugin_gen_from from = op->args[0];
-        enum plugin_gen_cb type = op->args[1];
-
-        tcg_debug_assert(op->opc == INDEX_op_plugin_cb_start);
-        /* ENABLE_MEM_HELPER is the first callback of an instruction */
-        if (from == PLUGIN_GEN_FROM_INSN &&
-            type == PLUGIN_GEN_ENABLE_MEM_HELPER) {
+
+    QTAILQ_FOREACH(op, &tcg_ctx->ops, link) {
+        switch (op->opc) {
+        case INDEX_op_insn_start:
             insn_idx++;
+            break;
+        case INDEX_op_plugin_cb_start:
+        {
+            enum plugin_gen_from from = op->args[0];
+            enum plugin_gen_cb type = op->args[1];
+
+            switch (from) {
+            case PLUGIN_GEN_FROM_TB:
+            {
+                g_assert(insn_idx == -1);
+
+                switch (type) {
+                case PLUGIN_GEN_CB_UDATA:
+                    plugin_gen_tb_udata(plugin_tb, op);
+                    break;
+                case PLUGIN_GEN_CB_INLINE:
+                    plugin_gen_tb_inline(plugin_tb, op);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                break;
+            }
+            case PLUGIN_GEN_FROM_INSN:
+            {
+                g_assert(insn_idx >= 0);
+
+                switch (type) {
+                case PLUGIN_GEN_CB_UDATA:
+                    plugin_gen_insn_udata(plugin_tb, op, insn_idx);
+                    break;
+                case PLUGIN_GEN_CB_INLINE:
+                    plugin_gen_insn_inline(plugin_tb, op, insn_idx);
+                    break;
+                case PLUGIN_GEN_ENABLE_MEM_HELPER:
+                    plugin_gen_enable_mem_helper(plugin_tb, op, insn_idx);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                break;
+            }
+            case PLUGIN_GEN_FROM_MEM:
+            {
+                g_assert(insn_idx >= 0);
+
+                switch (type) {
+                case PLUGIN_GEN_CB_MEM:
+                    plugin_gen_mem_regular(plugin_tb, op, insn_idx);
+                    break;
+                case PLUGIN_GEN_CB_INLINE:
+                    plugin_gen_mem_inline(plugin_tb, op, insn_idx);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+
+                break;
+            }
+            case PLUGIN_GEN_AFTER_INSN:
+            {
+                g_assert(insn_idx >= 0);
+
+                switch (type) {
+                case PLUGIN_GEN_DISABLE_MEM_HELPER:
+                    plugin_gen_disable_mem_helper(plugin_tb, op, insn_idx);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                break;
+            }
+            default:
+                g_assert_not_reached();
+            }
+            break;
+        }
+        default:
+            /* plugins don't care about any other ops */
+            break;
         }
-        plugin_inject_cb(plugin_tb, op, insn_idx);
     }
     pr_ops();
 }
@@ -850,7 +860,6 @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
     if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) {
         ret = true;
 
-        QSIMPLEQ_INIT(&tcg_ctx->plugin_ops);
         ptb->vaddr = tb->pc;
         ptb->vaddr2 = -1;
         get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
@@ -867,9 +876,8 @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
     struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
     struct qemu_plugin_insn *pinsn;
 
-    pinsn = qemu_plugin_tb_insn_get(ptb);
+    pinsn = qemu_plugin_tb_insn_get(ptb, db->pc_next);
     tcg_ctx->plugin_insn = pinsn;
-    pinsn->vaddr = db->pc_next;
     plugin_gen_empty_callback(PLUGIN_GEN_FROM_INSN);
 
     /*
diff --git a/accel/tcg/plugin-helpers.h b/accel/tcg/plugin-helpers.h
index 1916ee79206..9829abe4a95 100644
--- a/accel/tcg/plugin-helpers.h
+++ b/accel/tcg/plugin-helpers.h
@@ -1,5 +1,4 @@
 #ifdef CONFIG_PLUGIN
-/* Note: no TCG flags because those are overwritten later */
-DEF_HELPER_2(plugin_vcpu_udata_cb, void, i32, ptr)
-DEF_HELPER_4(plugin_vcpu_mem_cb, void, i32, i32, i64, ptr)
+DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG, void, i32, ptr)
+DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG, void, i32, i32, i64, ptr)
 #endif
diff --git a/include/exec/tb-context.h b/accel/tcg/tb-context.h
similarity index 96%
rename from include/exec/tb-context.h
rename to accel/tcg/tb-context.h
index cc339791138..cac62d97491 100644
--- a/include/exec/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -34,6 +34,7 @@ struct TBContext {
 
     /* statistics */
     unsigned tb_flush_count;
+    unsigned tb_phys_invalidate_count;
 };
 
 extern TBContext tb_ctx;
diff --git a/include/exec/tb-hash.h b/accel/tcg/tb-hash.h
similarity index 100%
rename from include/exec/tb-hash.h
rename to accel/tcg/tb-hash.h
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index 13b8fbeb699..ea42d1d51b1 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -30,7 +30,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
-#include "hw/boards.h"
 
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-icount.h"
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index 847d2079d21..29632bd4c0a 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -28,6 +28,7 @@
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 #include "hw/boards.h"
@@ -35,6 +36,26 @@
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-mttcg.h"
 
+typedef struct MttcgForceRcuNotifier {
+    Notifier notifier;
+    CPUState *cpu;
+} MttcgForceRcuNotifier;
+
+static void do_nothing(CPUState *cpu, run_on_cpu_data d)
+{
+}
+
+static void mttcg_force_rcu(Notifier *notify, void *data)
+{
+    CPUState *cpu = container_of(notify, MttcgForceRcuNotifier, notifier)->cpu;
+
+    /*
+     * Called with rcu_registry_lock held, using async_run_on_cpu() ensures
+     * that there are no deadlocks.
+     */
+    async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL);
+}
+
 /*
  * In the multi-threaded case each vCPU has its own thread. The TLS
  * variable current_cpu can be used deep in the code to find the
@@ -43,12 +64,16 @@
 
 static void *mttcg_cpu_thread_fn(void *arg)
 {
+    MttcgForceRcuNotifier force_rcu;
     CPUState *cpu = arg;
 
     assert(tcg_enabled());
     g_assert(!icount_enabled());
 
     rcu_register_thread();
+    force_rcu.notifier.notify = mttcg_force_rcu;
+    force_rcu.cpu = cpu;
+    rcu_add_force_rcu_notifier(&force_rcu.notifier);
     tcg_register_thread();
 
     qemu_mutex_lock_iothread();
@@ -100,6 +125,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
 
     tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
+    rcu_remove_force_rcu_notifier(&force_rcu.notifier);
     rcu_unregister_thread();
     return NULL;
 }
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 018b54c508f..bf59f53dbc2 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -28,9 +28,9 @@
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "qemu/main-loop.h"
+#include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
-#include "hw/boards.h"
 
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-rr.h"
@@ -61,8 +61,6 @@ void rr_kick_vcpu_thread(CPUState *unused)
 static QEMUTimer *rr_kick_vcpu_timer;
 static CPUState *rr_current_cpu;
 
-#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
-
 static inline int64_t rr_next_kick_time(void)
 {
     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
@@ -136,6 +134,11 @@ static void rr_deal_with_unplugged_cpus(void)
     }
 }
 
+static void rr_force_rcu(Notifier *notify, void *data)
+{
+    rr_kick_next_cpu();
+}
+
 /*
  * In the single-threaded case each vCPU is simulated in turn. If
  * there is more than a single vCPU we create a simple timer to kick
@@ -146,10 +149,13 @@ static void rr_deal_with_unplugged_cpus(void)
 
 static void *rr_cpu_thread_fn(void *arg)
 {
+    Notifier force_rcu;
     CPUState *cpu = arg;
 
     assert(tcg_enabled());
     rcu_register_thread();
+    force_rcu.notify = rr_force_rcu;
+    rcu_add_force_rcu_notifier(&force_rcu);
     tcg_register_thread();
 
     qemu_mutex_lock_iothread();
@@ -258,6 +264,7 @@ static void *rr_cpu_thread_fn(void *arg)
         rr_deal_with_unplugged_cpus();
     }
 
+    rcu_remove_force_rcu_notifier(&force_rcu);
     rcu_unregister_thread();
     return NULL;
 }
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index fdea83e4fe7..0e3f29884bd 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -32,7 +32,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
-#include "hw/boards.h"
 
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-mttcg.h"
@@ -129,6 +128,7 @@ static const TypeInfo tcg_accel_ops_type = {
     .class_init = tcg_accel_ops_class_init,
     .abstract = true,
 };
+module_obj(ACCEL_OPS_NAME("tcg"));
 
 static void tcg_accel_ops_register_types(void)
 {
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index e378c2db73f..d6336a9c966 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -32,6 +32,11 @@
 #include "qemu/error-report.h"
 #include "qemu/accel.h"
 #include "qapi/qapi-builtin-visit.h"
+#include "qemu/units.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/boards.h"
+#endif
+#include "internal.h"
 
 struct TCGState {
     AccelState parent_obj;
@@ -105,22 +110,29 @@ static void tcg_accel_instance_init(Object *obj)
 
 bool mttcg_enabled;
 
-static int tcg_init(MachineState *ms)
+static int tcg_init_machine(MachineState *ms)
 {
     TCGState *s = TCG_STATE(current_accel());
+#ifdef CONFIG_USER_ONLY
+    unsigned max_cpus = 1;
+#else
+    unsigned max_cpus = ms->smp.max_cpus;
+#endif
 
-    tcg_exec_init(s->tb_size * 1024 * 1024, s->splitwx_enabled);
+    tcg_allowed = true;
     mttcg_enabled = s->mttcg_enabled;
 
+    page_init();
+    tb_htable_init();
+    tcg_init(s->tb_size * MiB, s->splitwx_enabled, max_cpus);
+
+#if defined(CONFIG_SOFTMMU)
     /*
-     * Initialize TCG regions only for softmmu.
-     *
-     * This needs to be done later for user mode, because the prologue
-     * generation needs to be delayed so that GUEST_BASE is already set.
+     * There's no guest base to take into account, so go ahead and
+     * initialize the prologue now.
      */
-#ifndef CONFIG_USER_ONLY
-    tcg_region_init();
-#endif /* !CONFIG_USER_ONLY */
+    tcg_prologue_init(tcg_ctx);
+#endif
 
     return 0;
 }
@@ -200,7 +212,7 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data)
 {
     AccelClass *ac = ACCEL_CLASS(oc);
     ac->name = "tcg";
-    ac->init_machine = tcg_init;
+    ac->init_machine = tcg_init_machine;
     ac->allowed = &tcg_allowed;
 
     object_class_property_add_str(oc, "thread",
@@ -226,6 +238,7 @@ static const TypeInfo tcg_accel_type = {
     .class_init = tcg_accel_class_init,
     .instance_size = sizeof(TCGState),
 };
+module_obj(TYPE_TCG_ACCEL);
 
 static void register_accel_types(void)
 {
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 521da4a8137..ac7d28c251e 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1073,9 +1073,8 @@ void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
         int32_t ai = *(int32_t *)(a + i);
         int32_t bi = *(int32_t *)(b + i);
-        int32_t di = ai + bi;
-        if (((di ^ ai) &~ (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int32_t di;
+        if (sadd32_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT32_MAX : INT32_MIN);
         }
         *(int32_t *)(d + i) = di;
@@ -1091,9 +1090,8 @@ void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
         int64_t ai = *(int64_t *)(a + i);
         int64_t bi = *(int64_t *)(b + i);
-        int64_t di = ai + bi;
-        if (((di ^ ai) &~ (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int64_t di;
+        if (sadd64_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT64_MAX : INT64_MIN);
         }
         *(int64_t *)(d + i) = di;
@@ -1143,9 +1141,8 @@ void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
         int32_t ai = *(int32_t *)(a + i);
         int32_t bi = *(int32_t *)(b + i);
-        int32_t di = ai - bi;
-        if (((di ^ ai) & (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int32_t di;
+        if (ssub32_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT32_MAX : INT32_MIN);
         }
         *(int32_t *)(d + i) = di;
@@ -1161,9 +1158,8 @@ void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
         int64_t ai = *(int64_t *)(a + i);
         int64_t bi = *(int64_t *)(b + i);
-        int64_t di = ai - bi;
-        if (((di ^ ai) & (ai ^ bi)) < 0) {
-            /* Signed overflow.  */
+        int64_t di;
+        if (ssub64_overflow(ai, bi, &di)) {
             di = (di < 0 ? INT64_MAX : INT64_MIN);
         }
         *(int64_t *)(d + i) = di;
@@ -1209,8 +1205,8 @@ void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
         uint32_t ai = *(uint32_t *)(a + i);
         uint32_t bi = *(uint32_t *)(b + i);
-        uint32_t di = ai + bi;
-        if (di < ai) {
+        uint32_t di;
+        if (uadd32_overflow(ai, bi, &di)) {
             di = UINT32_MAX;
         }
         *(uint32_t *)(d + i) = di;
@@ -1226,8 +1222,8 @@ void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
         uint64_t ai = *(uint64_t *)(a + i);
         uint64_t bi = *(uint64_t *)(b + i);
-        uint64_t di = ai + bi;
-        if (di < ai) {
+        uint64_t di;
+        if (uadd64_overflow(ai, bi, &di)) {
             di = UINT64_MAX;
         }
         *(uint64_t *)(d + i) = di;
@@ -1273,8 +1269,8 @@ void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
         uint32_t ai = *(uint32_t *)(a + i);
         uint32_t bi = *(uint32_t *)(b + i);
-        uint32_t di = ai - bi;
-        if (ai < bi) {
+        uint32_t di;
+        if (usub32_overflow(ai, bi, &di)) {
             di = 0;
         }
         *(uint32_t *)(d + i) = di;
@@ -1290,8 +1286,8 @@ void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
         uint64_t ai = *(uint64_t *)(a + i);
         uint64_t bi = *(uint64_t *)(b + i);
-        uint64_t di = ai - bi;
-        if (ai < bi) {
+        uint64_t di;
+        if (usub64_overflow(ai, bi, &di)) {
             di = 0;
         }
         *(uint64_t *)(d + i) = di;
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 12ba4585619..e4e030043fb 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,15 +27,9 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
-#ifdef TARGET_CHERI
-#include "cheri-helper-utils.h"
-#endif
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
-#include "exec/tb-lookup.h"
-#include "exec/log_instr.h"
-
 
 /* 32-bit helpers */
 
@@ -150,28 +144,6 @@ uint64_t HELPER(ctpop_i64)(uint64_t arg)
     return ctpop64(arg);
 }
 
-const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
-{
-    CPUState *cpu = env_cpu(env);
-    TranslationBlock *tb;
-    target_ulong cs_base, cs_top = 0, pc;
-    uint32_t cheri_flags = 0;
-    uint32_t flags;
-
-    cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags);
-
-    tb = tb_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, curr_cflags(cpu));
-    if (tb == NULL) {
-        return tcg_code_gen_epilogue;
-    }
-    qemu_log_mask_and_addr(CPU_LOG_EXEC, pc,
-                           "Chain %d: %p [" TARGET_FMT_lx "/" TARGET_FMT_lx
-                           "/" TARGET_FMT_lx "/%#x/%#x] %s\n",
-                           cpu->cpu_index, tb->tc.ptr, cs_base, pc, cs_top,
-                           cheri_flags, flags, lookup_symbol(pc));
-    return tb->tc.ptr;
-}
-
 void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(env_cpu(env), GETPC());
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index d76d98d8e18..a12e1a6d4e7 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -39,8 +39,6 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
 #endif /* IN_HELPER_PROTO */
 
-#ifdef CONFIG_SOFTMMU
-
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
                    i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
@@ -88,50 +86,6 @@ DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
                        TCG_CALL_NO_WG, i32, env, tl, i32, i32)
 #endif /* CONFIG_ATOMIC64 */
 
-#else
-
-DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
-DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
-DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
-DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
-DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
-#ifdef CONFIG_ATOMIC64
-DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
-DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
-#endif
-
-#ifdef CONFIG_ATOMIC64
-#define GEN_ATOMIC_HELPERS(NAME)                             \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le),      \
-                       TCG_CALL_NO_WG, i64, env, tl, i64)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be),      \
-                       TCG_CALL_NO_WG, i64, env, tl, i64)
-#else
-#define GEN_ATOMIC_HELPERS(NAME)                             \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
-    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
-                       TCG_CALL_NO_WG, i32, env, tl, i32)
-#endif /* CONFIG_ATOMIC64 */
-
-#endif /* CONFIG_SOFTMMU */
-
 GEN_ATOMIC_HELPERS(fetch_add)
 GEN_ATOMIC_HELPERS(fetch_and)
 GEN_ATOMIC_HELPERS(fetch_or)
diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events
index 6eefb37f5d0..59eab96f264 100644
--- a/accel/tcg/trace-events
+++ b/accel/tcg/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # TCG related tracing
 # cpu-exec.c
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 2a254563802..dd3e38b01d5 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -18,11 +18,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/units.h"
 #include "qemu-common.h"
 
 #define NO_CPU_IO_DEFS
-#include "cpu.h"
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
@@ -48,10 +46,8 @@
 #endif
 
 #include "exec/cputlb.h"
-#include "exec/tb-hash.h"
 #include "exec/translate-all.h"
 #include "qemu/bitmap.h"
-#include "qemu/error-report.h"
 #include "qemu/qemu-print.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
@@ -61,6 +57,8 @@
 #include "sysemu/tcg.h"
 #include "qapi/error.h"
 #include "hw/core/tcg-cpu-ops.h"
+#include "tb-hash.h"
+#include "tb-context.h"
 #include "internal.h"
 
 /* #define DEBUG_TB_INVALIDATE */
@@ -220,9 +218,6 @@ static int v_l2_levels;
 
 static void *l1_map[V_L1_MAX_SIZE];
 
-/* code generation context */
-TCGContext tcg_init_ctx;
-__thread TCGContext *tcg_ctx;
 TBContext tb_ctx;
 
 static void page_table_config_init(void)
@@ -245,11 +240,6 @@ static void page_table_config_init(void)
     assert(v_l2_levels >= 0);
 }
 
-static void cpu_gen_init(void)
-{
-    tcg_context_init(&tcg_init_ctx);
-}
-
 /* Encode VAL as a signed leb128 sequence at P.
    Return P incremented past the encoded value.  */
 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
@@ -388,11 +378,6 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     return 0;
 }
 
-void tb_destroy(TranslationBlock *tb)
-{
-    qemu_spin_destroy(&tb->jmp_lock);
-}
-
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
 {
     /*
@@ -415,7 +400,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit)
     return false;
 }
 
-static void page_init(void)
+void page_init(void)
 {
     page_size_init();
     page_table_config_init();
@@ -900,408 +885,6 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
     }
 }
 
-/* Minimum size of the code gen buffer.  This number is randomly chosen,
-   but not so small that we can't have a fair number of TB's live.  */
-#define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
-
-/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
-   indicated, this is constrained by the range of direct branches on the
-   host cpu, as used by the TCG implementation of goto_tb.  */
-#if defined(__x86_64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__sparc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__powerpc64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__powerpc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
-#elif defined(__aarch64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
-#elif defined(__s390x__)
-  /* We have a +- 4GB range on the branches; leave some slop.  */
-# define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
-#elif defined(__mips__)
-  /* We have a 256MB branch region, but leave room to make sure the
-     main executable is also within that region.  */
-# define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
-#else
-# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-#endif
-
-#if TCG_TARGET_REG_BITS == 32
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
-#ifdef CONFIG_USER_ONLY
-/*
- * For user mode on smaller 32 bit systems we may run into trouble
- * allocating big chunks of data in the right place. On these systems
- * we utilise a static code generation buffer directly in the binary.
- */
-#define USE_STATIC_CODE_GEN_BUFFER
-#endif
-#else /* TCG_TARGET_REG_BITS == 64 */
-#ifdef CONFIG_USER_ONLY
-/*
- * As user-mode emulation typically means running multiple instances
- * of the translator don't go too nuts with our default code gen
- * buffer lest we make things too hard for the OS.
- */
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
-#else
-/*
- * We expect most system emulation to run one or two guests per host.
- * Users running large scale system emulation may want to tweak their
- * runtime setup via the tb-size control on the command line.
- */
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
-#endif
-#endif
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE \
-  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
-   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
-
-static size_t size_code_gen_buffer(size_t tb_size)
-{
-    /* Size the buffer.  */
-    if (tb_size == 0) {
-        size_t phys_mem = qemu_get_host_physmem();
-        if (phys_mem == 0) {
-            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-        } else {
-            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
-        }
-    }
-    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
-    }
-    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
-    }
-    return tb_size;
-}
-
-#ifdef __mips__
-/* In order to use J and JAL within the code_gen_buffer, we require
-   that the buffer not cross a 256MB boundary.  */
-static inline bool cross_256mb(void *addr, size_t size)
-{
-    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
-}
-
-/* We weren't able to allocate a buffer without crossing that boundary,
-   so make do with the larger portion of the buffer that doesn't cross.
-   Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
-static inline void *split_cross_256mb(void *buf1, size_t size1)
-{
-    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
-    size_t size2 = buf1 + size1 - buf2;
-
-    size1 = buf2 - buf1;
-    if (size1 < size2) {
-        size1 = size2;
-        buf1 = buf2;
-    }
-
-    tcg_ctx->code_gen_buffer_size = size1;
-    return buf1;
-}
-#endif
-
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-    __attribute__((aligned(CODE_GEN_ALIGN)));
-
-static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
-{
-    void *buf, *end;
-    size_t size;
-
-    if (splitwx > 0) {
-        error_setg(errp, "jit split-wx not supported");
-        return false;
-    }
-
-    /* page-align the beginning and end of the buffer */
-    buf = static_code_gen_buffer;
-    end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
-    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
-    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
-
-    size = end - buf;
-
-    /* Honor a command-line option limiting the size of the buffer.  */
-    if (size > tb_size) {
-        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
-    }
-    tcg_ctx->code_gen_buffer_size = size;
-
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        buf = split_cross_256mb(buf, size);
-        size = tcg_ctx->code_gen_buffer_size;
-    }
-#endif
-
-    if (qemu_mprotect_rwx(buf, size)) {
-        error_setg_errno(errp, errno, "mprotect of jit buffer");
-        return false;
-    }
-    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
-
-    tcg_ctx->code_gen_buffer = buf;
-    return true;
-}
-#elif defined(_WIN32)
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
-{
-    void *buf;
-
-    if (splitwx > 0) {
-        error_setg(errp, "jit split-wx not supported");
-        return false;
-    }
-
-    buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
-                             PAGE_EXECUTE_READWRITE);
-    if (buf == NULL) {
-        error_setg_win32(errp, GetLastError(),
-                         "allocate %zu bytes for jit buffer", size);
-        return false;
-    }
-
-    tcg_ctx->code_gen_buffer = buf;
-    tcg_ctx->code_gen_buffer_size = size;
-    return true;
-}
-#else
-static bool alloc_code_gen_buffer_anon(size_t size, int prot,
-                                       int flags, Error **errp)
-{
-    void *buf;
-
-    buf = mmap(NULL, size, prot, flags, -1, 0);
-    if (buf == MAP_FAILED) {
-        error_setg_errno(errp, errno,
-                         "allocate %zu bytes for jit buffer", size);
-        return false;
-    }
-    tcg_ctx->code_gen_buffer_size = size;
-
-#ifdef __mips__
-    if (cross_256mb(buf, size)) {
-        /*
-         * Try again, with the original still mapped, to avoid re-acquiring
-         * the same 256mb crossing.
-         */
-        size_t size2;
-        void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
-        switch ((int)(buf2 != MAP_FAILED)) {
-        case 1:
-            if (!cross_256mb(buf2, size)) {
-                /* Success!  Use the new buffer.  */
-                munmap(buf, size);
-                break;
-            }
-            /* Failure.  Work with what we had.  */
-            munmap(buf2, size);
-            /* fallthru */
-        default:
-            /* Split the original buffer.  Free the smaller half.  */
-            buf2 = split_cross_256mb(buf, size);
-            size2 = tcg_ctx->code_gen_buffer_size;
-            if (buf == buf2) {
-                munmap(buf + size2, size - size2);
-            } else {
-                munmap(buf, size - size2);
-            }
-            size = size2;
-            break;
-        }
-        buf = buf2;
-    }
-#endif
-
-    /* Request large pages for the buffer.  */
-    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
-
-    tcg_ctx->code_gen_buffer = buf;
-    return true;
-}
-
-#ifndef CONFIG_TCG_INTERPRETER
-#ifdef CONFIG_POSIX
-#include "qemu/memfd.h"
-
-static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
-{
-    void *buf_rw = NULL, *buf_rx = MAP_FAILED;
-    int fd = -1;
-
-#ifdef __mips__
-    /* Find space for the RX mapping, vs the 256MiB regions. */
-    if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
-                                    MAP_PRIVATE | MAP_ANONYMOUS |
-                                    MAP_NORESERVE, errp)) {
-        return false;
-    }
-    /* The size of the mapping may have been adjusted. */
-    size = tcg_ctx->code_gen_buffer_size;
-    buf_rx = tcg_ctx->code_gen_buffer;
-#endif
-
-    buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
-    if (buf_rw == NULL) {
-        goto fail;
-    }
-
-#ifdef __mips__
-    void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
-                     MAP_SHARED | MAP_FIXED, fd, 0);
-    if (tmp != buf_rx) {
-        goto fail_rx;
-    }
-#else
-    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
-    if (buf_rx == MAP_FAILED) {
-        goto fail_rx;
-    }
-#endif
-
-    close(fd);
-    tcg_ctx->code_gen_buffer = buf_rw;
-    tcg_ctx->code_gen_buffer_size = size;
-    tcg_splitwx_diff = buf_rx - buf_rw;
-
-    /* Request large pages for the buffer and the splitwx.  */
-    qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
-    qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
-    return true;
-
- fail_rx:
-    error_setg_errno(errp, errno, "failed to map shared memory for execute");
- fail:
-    if (buf_rx != MAP_FAILED) {
-        munmap(buf_rx, size);
-    }
-    if (buf_rw) {
-        munmap(buf_rw, size);
-    }
-    if (fd >= 0) {
-        close(fd);
-    }
-    return false;
-}
-#endif /* CONFIG_POSIX */
-
-#ifdef CONFIG_DARWIN
-#include <mach/mach.h>
-
-extern kern_return_t mach_vm_remap(vm_map_t target_task,
-                                   mach_vm_address_t *target_address,
-                                   mach_vm_size_t size,
-                                   mach_vm_offset_t mask,
-                                   int flags,
-                                   vm_map_t src_task,
-                                   mach_vm_address_t src_address,
-                                   boolean_t copy,
-                                   vm_prot_t *cur_protection,
-                                   vm_prot_t *max_protection,
-                                   vm_inherit_t inheritance);
-
-static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
-{
-    kern_return_t ret;
-    mach_vm_address_t buf_rw, buf_rx;
-    vm_prot_t cur_prot, max_prot;
-
-    /* Map the read-write portion via normal anon memory. */
-    if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
-                                    MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
-        return false;
-    }
-
-    buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
-    buf_rx = 0;
-    ret = mach_vm_remap(mach_task_self(),
-                        &buf_rx,
-                        size,
-                        0,
-                        VM_FLAGS_ANYWHERE,
-                        mach_task_self(),
-                        buf_rw,
-                        false,
-                        &cur_prot,
-                        &max_prot,
-                        VM_INHERIT_NONE);
-    if (ret != KERN_SUCCESS) {
-        /* TODO: Convert "ret" to a human readable error message. */
-        error_setg(errp, "vm_remap for jit splitwx failed");
-        munmap((void *)buf_rw, size);
-        return false;
-    }
-
-    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
-        error_setg_errno(errp, errno, "mprotect for jit splitwx");
-        munmap((void *)buf_rx, size);
-        munmap((void *)buf_rw, size);
-        return false;
-    }
-
-    tcg_splitwx_diff = buf_rx - buf_rw;
-    return true;
-}
-#endif /* CONFIG_DARWIN */
-#endif /* CONFIG_TCG_INTERPRETER */
-
-static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
-{
-#ifndef CONFIG_TCG_INTERPRETER
-# ifdef CONFIG_DARWIN
-    return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
-# endif
-# ifdef CONFIG_POSIX
-    return alloc_code_gen_buffer_splitwx_memfd(size, errp);
-# endif
-#endif
-    error_setg(errp, "jit split-wx not supported");
-    return false;
-}
-
-static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
-{
-    ERRP_GUARD();
-    int prot, flags;
-
-    if (splitwx) {
-        if (alloc_code_gen_buffer_splitwx(size, errp)) {
-            return true;
-        }
-        /*
-         * If splitwx force-on (1), fail;
-         * if splitwx default-on (-1), fall through to splitwx off.
-         */
-        if (splitwx > 0) {
-            return false;
-        }
-        error_free_or_abort(errp);
-    }
-
-    prot = PROT_READ | PROT_WRITE | PROT_EXEC;
-    flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#ifdef CONFIG_TCG_INTERPRETER
-    /* The tcg interpreter does not need execute permission. */
-    prot = PROT_READ | PROT_WRITE;
-#elif defined(CONFIG_DARWIN)
-    /* Applicable to both iOS and macOS (Apple Silicon). */
-    if (!splitwx) {
-        flags |= MAP_JIT;
-    }
-#endif
-
-    return alloc_code_gen_buffer_anon(size, prot, flags, errp);
-}
-#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
-
 static bool tb_cmp(const void *ap, const void *bp)
 {
     const TranslationBlock *a = ap;
@@ -1309,7 +892,8 @@ static bool tb_cmp(const void *ap, const void *bp)
 
     return a->pc == b->pc &&
         a->cs_base == b->cs_base &&
-        a->cs_top == b->cs_top &&
+        a->pcc_base == b->pcc_base &&
+        a->pcc_top == b->pcc_top &&
         a->cheri_flags == b->cheri_flags &&
         a->flags == b->flags &&
         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
@@ -1318,36 +902,13 @@ static bool tb_cmp(const void *ap, const void *bp)
         a->page_addr[1] == b->page_addr[1];
 }
 
-static void tb_htable_init(void)
+void tb_htable_init(void)
 {
     unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
 }
 
-/* Must be called before using the QEMU cpus. 'tb_size' is the size
-   (in bytes) allocated to the translation buffer. Zero means default
-   size. */
-void tcg_exec_init(unsigned long tb_size, int splitwx)
-{
-    bool ok;
-
-    tcg_allowed = true;
-    cpu_gen_init();
-    page_init();
-    tb_htable_init();
-
-    ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
-                               splitwx, &error_fatal);
-    assert(ok);
-
-#if defined(CONFIG_SOFTMMU)
-    /* There's no guest base to take into account, so go ahead and
-       initialize the prologue now.  */
-    tcg_prologue_init(tcg_ctx);
-#endif
-}
-
 /* call with @p->lock held */
 static inline void invalidate_page_bitmap(PageDesc *p)
 {
@@ -1661,8 +1222,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     /* suppress any remaining jumps to this TB */
     tb_jmp_unlink(tb);
 
-    qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
-               tcg_ctx->tb_phys_invalidate_count + 1);
+    qatomic_set(&tb_ctx.tb_phys_invalidate_count,
+                tb_ctx.tb_phys_invalidate_count + 1);
 }
 
 static void tb_phys_invalidate__locked(TranslationBlock *tb)
@@ -1739,31 +1300,8 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
     invalidate_page_bitmap(p);
 
 #if defined(CONFIG_USER_ONLY)
-    if (p->flags & PAGE_WRITE) {
-        target_ulong addr;
-        PageDesc *p2;
-        int prot;
-
-        /* force the host page as non writable (writes will have a
-           page fault + mprotect overhead) */
-        page_addr &= qemu_host_page_mask;
-        prot = 0;
-        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
-            addr += TARGET_PAGE_SIZE) {
-
-            p2 = page_find(addr >> TARGET_PAGE_BITS);
-            if (!p2) {
-                continue;
-            }
-            prot |= p2->flags;
-            p2->flags &= ~PAGE_WRITE;
-          }
-        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
-                 (prot & PAGE_BITS) & ~PAGE_WRITE);
-        if (DEBUG_TB_INVALIDATE_GATE) {
-            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
-        }
-    }
+    /* translator_loop() must have made all TB pages non-writable */
+    assert(!(p->flags & PAGE_WRITE));
 #else
     /* if some code is already present, then the pages are already
        protected. So we handle the case where only the first TB is
@@ -1844,8 +1382,9 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
-                              target_ulong cs_base, target_ulong cs_top,
-                              uint32_t cheri_flags, uint32_t flags, int cflags)
+                              target_ulong cs_base, target_ulong pcc_base,
+                              target_ulong pcc_top, uint32_t cheri_flags,
+                              uint32_t flags, int cflags)
 {
     CPUArchState *env = cpu->env_ptr;
     TranslationBlock *tb, *existing_tb;
@@ -1870,14 +1409,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
 
     max_insns = cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
-        max_insns = CF_COUNT_MASK;
-    }
-    if (max_insns > TCG_MAX_INSNS) {
         max_insns = TCG_MAX_INSNS;
     }
-    if (cpu->singlestep_enabled || singlestep) {
-        max_insns = 1;
-    }
+    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
 
  buffer_overflow:
     tb = tcg_tb_alloc(tcg_ctx);
@@ -1894,7 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
     tb->pc = pc;
     tb->cs_base = cs_base;
-    tb->cs_top = cs_top;
+    tb->pcc_base = pcc_base;
+    tb->pcc_top = pcc_top;
     tb->cheri_flags = cheri_flags;
     tb->flags = flags;
     tb->cflags = cflags;
@@ -1917,6 +1452,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
 
     tcg_ctx->cpu = env_cpu(env);
     gen_intermediate_code(cpu, tb, max_insns);
+    assert(tb->size != 0);
     tcg_ctx->cpu = NULL;
     max_insns = tb->icount;
 
@@ -2047,8 +1583,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
             int i;
             qemu_log("  data: [size=%d]\n", data_size);
             for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
-                qemu_log("0x%08" PRIxPTR ":  .quad  0x%" TCG_PRIlx "\n",
-                         (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                if (sizeof(tcg_target_ulong) == 8) {
+                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
+                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                } else if (sizeof(tcg_target_ulong) == 4) {
+                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
+                             (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
+                } else {
+                    qemu_build_not_reached();
+                }
             }
         }
         qemu_log("\n");
@@ -2088,6 +1631,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
         return tb;
     }
 
+    /*
+     * Insert TB into the corresponding region tree before publishing it
+     * through QHT. Otherwise rewinding happened in the TB might fail to
+     * lookup itself using host PC.
+     */
+    tcg_tb_insert(tb);
+
     /* check next page if needed */
     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
     phys_page2 = -1;
@@ -2105,10 +1655,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
 
         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
-        tb_destroy(tb);
+        tcg_tb_remove(tb);
         return existing_tb;
     }
-    tcg_tb_insert(tb);
     return tb;
 }
 
@@ -2134,7 +1683,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
     TranslationBlock *current_tb = NULL;
     target_ulong current_pc = 0;
     target_ulong current_cs_base = 0;
-    target_ulong current_cs_top = 0;
+    target_ulong current_pcc_base = 0;
+    target_ulong current_pcc_top = 0;
     uint32_t current_cheri_flags = 0;
     uint32_t current_flags = 0;
 #endif /* TARGET_HAS_PRECISE_SMC */
@@ -2180,9 +1730,9 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
                  */
                 current_tb_modified = true;
                 cpu_restore_state_from_tb(cpu, current_tb, retaddr, true);
-                cpu_get_tb_cpu_state_6(env, &current_pc, &current_cs_base,
-                                     &current_cs_top, &current_cheri_flags,
-                                     &current_flags);
+                cpu_get_tb_cpu_state_ext(env, &current_pc, &current_cs_base,
+                                         &current_pcc_base, &current_pcc_top,
+                                         &current_cheri_flags, &current_flags);
             }
 #endif /* TARGET_HAS_PRECISE_SMC */
             tb_phys_invalidate__locked(tb);
@@ -2199,7 +1749,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
     if (current_tb_modified) {
         page_collection_unlock(pages);
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
+        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
         mmap_unlock();
         cpu_loop_exit_noexc(cpu);
     }
@@ -2325,8 +1875,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
     int current_tb_modified = 0;
     target_ulong current_pc = 0;
     target_ulong current_cs_base = 0;
-    target_ulong current_cs_top = 0;
-    target_ulong current_ds_base = 0;
+    target_ulong current_pcc_base = 0;
+    target_ulong current_pcc_top = 0;
     uint32_t current_cheri_flags = 0;
     uint32_t current_flags = 0;
 #endif
@@ -2360,9 +1910,9 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
 
             current_tb_modified = 1;
             cpu_restore_state_from_tb(cpu, current_tb, pc, true);
-            cpu_get_tb_cpu_state_6(env, &current_pc, &current_cs_base,
-                                 &current_cs_top, &current_cheri_flags,
-                                 &current_flags);
+            cpu_get_tb_cpu_state_ext(env, &current_pc, &current_cs_base,
+                                     &current_pcc_base, &current_pcc_top,
+                                     &current_cheri_flags, &current_flags);
         }
 #endif /* TARGET_HAS_PRECISE_SMC */
         tb_phys_invalidate(tb, addr);
@@ -2371,7 +1921,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
 #ifdef TARGET_HAS_PRECISE_SMC
     if (current_tb_modified) {
         /* Force execution of one insn next time.  */
-        cpu->cflags_next_tb = 1 | curr_cflags(cpu);
+        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
         return true;
     }
 #endif
@@ -2396,12 +1946,13 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
         /* The exception probably happened in a helper.  The CPU state should
            have been saved before calling it. Fetch the PC from there.  */
         CPUArchState *env = cpu->env_ptr;
-        target_ulong pc, cs_base, cs_top = 0;
+        target_ulong pc, cs_base, pcc_base = 0, pcc_top = 0;
         uint32_t cheri_flags = 0;
         tb_page_addr_t addr;
         uint32_t flags;
 
-        cpu_get_tb_cpu_state_6(env, &pc, &cs_base, &cs_top, &cheri_flags, &flags);
+        cpu_get_tb_cpu_state_ext(env, &pc, &cs_base, &pcc_base, &pcc_top,
+                                 &cheri_flags, &flags);
         addr = get_page_addr_code(env, pc);
         if (addr != -1) {
             tb_invalidate_phys_range(addr, addr + 1);
@@ -2457,7 +2008,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     cpu_loop_exit_noexc(cpu);
 }
 
-static void print_qht_statistics(struct qht_stats hst)
+static void print_qht_statistics(struct qht_stats hst, GString *buf)
 {
     uint32_t hgram_opts;
     size_t hgram_bins;
@@ -2466,9 +2017,11 @@ static void print_qht_statistics(struct qht_stats hst)
     if (!hst.head_buckets) {
         return;
     }
-    qemu_printf("TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
-                hst.used_head_buckets, hst.head_buckets,
-                (double)hst.used_head_buckets / hst.head_buckets * 100);
+    g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
+                           "(%0.2f%% head buckets used)\n",
+                           hst.used_head_buckets, hst.head_buckets,
+                           (double)hst.used_head_buckets /
+                           hst.head_buckets * 100);
 
     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
@@ -2476,8 +2029,9 @@ static void print_qht_statistics(struct qht_stats hst)
         hgram_opts |= QDIST_PR_NODECIMAL;
     }
     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
-    qemu_printf("TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
-                qdist_avg(&hst.occupancy) * 100, hgram);
+    g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
+                           "Histogram: %s\n",
+                           qdist_avg(&hst.occupancy) * 100, hgram);
     g_free(hgram);
 
     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
@@ -2489,8 +2043,9 @@ static void print_qht_statistics(struct qht_stats hst)
         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
     }
     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
-    qemu_printf("TB hash avg chain   %0.3f buckets. Histogram: %s\n",
-                qdist_avg(&hst.chain), hgram);
+    g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
+                           "Histogram: %s\n",
+                           qdist_avg(&hst.chain), hgram);
     g_free(hgram);
 }
 
@@ -2527,7 +2082,7 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
     return false;
 }
 
-void dump_exec_info(void)
+void dump_exec_info(GString *buf)
 {
     struct tb_tree_stats tst = {};
     struct qht_stats hst;
@@ -2536,49 +2091,53 @@ void dump_exec_info(void)
     tcg_tb_foreach(tb_tree_stats_iter, &tst);
     nb_tbs = tst.nb_tbs;
     /* XXX: avoid using doubles ? */
-    qemu_printf("Translation buffer state:\n");
+    g_string_append_printf(buf, "Translation buffer state:\n");
     /*
      * Report total code size including the padding and TB structs;
      * otherwise users might think "-accel tcg,tb-size" is not honoured.
      * For avg host size we use the precise numbers from tb_tree_stats though.
      */
-    qemu_printf("gen code size       %zu/%zu\n",
-                tcg_code_size(), tcg_code_capacity());
-    qemu_printf("TB count            %zu\n", nb_tbs);
-    qemu_printf("TB avg target size  %zu max=%zu bytes\n",
-                nb_tbs ? tst.target_size / nb_tbs : 0,
-                tst.max_target_size);
-    qemu_printf("TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? tst.host_size / nb_tbs : 0,
-                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
-    qemu_printf("cross page TB count %zu (%zu%%)\n", tst.cross_page,
-                nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
-    qemu_printf("direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
-                tst.direct_jmp_count,
-                nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
-                tst.direct_jmp2_count,
-                nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
+    g_string_append_printf(buf, "gen code size       %zu/%zu\n",
+                           tcg_code_size(), tcg_code_capacity());
+    g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
+    g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
+                           nb_tbs ? tst.target_size / nb_tbs : 0,
+                           tst.max_target_size);
+    g_string_append_printf(buf, "TB avg host size    %zu bytes "
+                           "(expansion ratio: %0.1f)\n",
+                           nb_tbs ? tst.host_size / nb_tbs : 0,
+                           tst.target_size ?
+                           (double)tst.host_size / tst.target_size : 0);
+    g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
+                           tst.cross_page,
+                           nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
+    g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
+                           "(2 jumps=%zu %zu%%)\n",
+                           tst.direct_jmp_count,
+                           nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
+                           tst.direct_jmp2_count,
+                           nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
 
     qht_statistics_init(&tb_ctx.htable, &hst);
-    print_qht_statistics(hst);
+    print_qht_statistics(hst, buf);
     qht_statistics_destroy(&hst);
 
-    qemu_printf("\nStatistics:\n");
-    qemu_printf("TB flush count      %u\n",
-                qatomic_read(&tb_ctx.tb_flush_count));
-    qemu_printf("TB invalidate count %zu\n",
-                tcg_tb_phys_invalidate_count());
+    g_string_append_printf(buf, "\nStatistics:\n");
+    g_string_append_printf(buf, "TB flush count      %u\n",
+                           qatomic_read(&tb_ctx.tb_flush_count));
+    g_string_append_printf(buf, "TB invalidate count %u\n",
+                           qatomic_read(&tb_ctx.tb_phys_invalidate_count));
 
     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
-    qemu_printf("TLB full flushes    %zu\n", flush_full);
-    qemu_printf("TLB partial flushes %zu\n", flush_part);
-    qemu_printf("TLB elided flushes  %zu\n", flush_elide);
-    tcg_dump_info();
+    g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
+    g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
+    g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
+    tcg_dump_info(buf);
 }
 
-void dump_opcount_info(void)
+void dump_opcount_info(GString *buf)
 {
-    tcg_dump_op_count();
+    tcg_dump_op_count(buf);
 }
 
 #else /* CONFIG_USER_ONLY */
@@ -2837,6 +2396,38 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
     return 0;
 }
 
+void page_protect(tb_page_addr_t page_addr)
+{
+    target_ulong addr;
+    PageDesc *p;
+    int prot;
+
+    p = page_find(page_addr >> TARGET_PAGE_BITS);
+    if (p && (p->flags & PAGE_WRITE)) {
+        /*
+         * Force the host page as non writable (writes will have a page fault +
+         * mprotect overhead).
+         */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
+             addr += TARGET_PAGE_SIZE) {
+
+            p = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p) {
+                continue;
+            }
+            prot |= p->flags;
+            p->flags &= ~PAGE_WRITE;
+        }
+        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+        if (DEBUG_TB_INVALIDATE_GATE) {
+            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
+        }
+    }
+}
+
 /* called from signal handler: invalidate the code and unprotect the
  * page. Return 0 if the fault was not handled, 1 if it was handled,
  * and 2 if it was handled but the caller must cause the TB to be
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 4fd503b9ff2..60f136de60e 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -9,7 +9,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
-#include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
 #include "exec/exec-all.h"
@@ -35,10 +34,30 @@ void translator_loop_temp_check(DisasContextBase *db)
     }
 }
 
+bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
+{
+    /* Suppress goto_tb if requested. */
+    if (tb_cflags(db->tb) & CF_NO_GOTO_TB) {
+        return false;
+    }
+
+    /* Check for the dest on the same page as the start of the TB.  */
+    return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
+}
+
+static inline void translator_page_protect(DisasContextBase *dcbase,
+                                           target_ulong pc)
+{
+#ifdef CONFIG_USER_ONLY
+    dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
+    page_protect(pc);
+#endif
+}
+
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                      CPUState *cpu, TranslationBlock *tb, int max_insns)
 {
-    int bp_insn = 0;
+    uint32_t cflags = tb_cflags(tb);
     bool plugin_enabled;
 #ifdef CONFIG_TCG_LOG_INSTR
     /*
@@ -56,10 +75,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     db->is_jmp = DISAS_NEXT;
     db->num_insns = 0;
     db->max_insns = max_insns;
-    db->singlestep_enabled = cpu->singlestep_enabled;
+    db->singlestep_enabled = cflags & CF_SINGLE_STEP;
 #ifdef TARGET_CHERI
-    db->pcc_base = tb->cs_base;
-    db->pcc_top = tb->cs_top;
+    db->pcc_base = tb->pcc_base;
+    db->pcc_top = tb->pcc_top;
     cheri_debug_assert(db->pcc_base ==
                        cap_get_base(cheri_get_recent_pcc(cpu->env_ptr)));
     cheri_debug_assert(db->pcc_top ==
@@ -68,6 +87,8 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     disas_capreg_reset_all(db);
     // TODO: verify cheri_flags are correct?
 #endif
+    translator_page_protect(db, db->pc_next);
+
     ops->init_disas_context(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 #ifdef CONFIG_TCG_LOG_INSTR
@@ -110,8 +131,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 #endif
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
-    plugin_enabled = plugin_gen_tb_start(cpu, tb,
-                                         tb_cflags(db->tb) & CF_MEMI_ONLY);
+    plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
 
     while (true) {
         db->num_insns++;
@@ -128,39 +148,17 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
             plugin_gen_insn_start(cpu, db);
         }
 
-        /* Pass breakpoint hits to target for further processing */
-        if (!db->singlestep_enabled
-            && unlikely(!QTAILQ_EMPTY(&cpu->breakpoints))) {
-            CPUBreakpoint *bp;
-            QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
-                if (bp->pc == db->pc_next) {
-                    if (ops->breakpoint_check(db, cpu, bp)) {
-                        bp_insn = 1;
-                        break;
-                    }
-                }
-            }
-            /* The breakpoint_check hook may use DISAS_TOO_MANY to indicate
-               that only one more instruction is to be executed.  Otherwise
-               it should use DISAS_NORETURN when generating an exception,
-               but may use a DISAS_TARGET_* value for Something Else.  */
-            if (db->is_jmp > DISAS_TOO_MANY) {
-                break;
-            }
-        }
-
         /* Disassemble one instruction.  The translate_insn hook should
            update db->pc_next and db->is_jmp to indicate what should be
            done next -- either exiting this loop or locate the start of
            the next instruction.  */
-        if (db->num_insns == db->max_insns
-            && (tb_cflags(db->tb) & CF_LAST_IO)) {
+        if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
             /* Accept I/O on the last instruction.  */
             gen_io_start();
             ops->translate_insn(db, cpu);
         } else {
             /* we should only see CF_MEMI_ONLY for io_recompile */
-            tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY));
+            tcg_debug_assert(!(cflags & CF_MEMI_ONLY));
             ops->translate_insn(db, cpu);
         }
 
@@ -209,7 +207,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 
     /* Emit code to exit the TB, as indicated by db->is_jmp.  */
     ops->tb_stop(db, cpu);
-    gen_tb_end(db->tb, db->num_insns - bp_insn);
+    gen_tb_end(db->tb, db->num_insns);
 
     if (plugin_enabled) {
         plugin_gen_tb_end(cpu);
@@ -230,3 +228,32 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     }
 #endif
 }
+
+static inline void translator_maybe_page_protect(DisasContextBase *dcbase,
+                                                 target_ulong pc, size_t len)
+{
+#ifdef CONFIG_USER_ONLY
+    target_ulong end = pc + len - 1;
+
+    if (end > dcbase->page_protect_end) {
+        translator_page_protect(dcbase, end);
+    }
+#endif
+}
+
+#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
+    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
+                           abi_ptr pc, bool do_swap)                    \
+    {                                                                   \
+        translator_maybe_page_protect(dcbase, pc, sizeof(type));        \
+        type ret = load_fn(env, pc);                                    \
+        if (do_swap) {                                                  \
+            ret = swap_fn(ret);                                         \
+        }                                                               \
+        plugin_insn_append(pc, &ret, sizeof(ret));                      \
+        return ret;                                                     \
+    }
+
+FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
+
+#undef GEN_TRANSLATOR_LD
diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
index b876f5c1e45..968cd3ca60d 100644
--- a/accel/tcg/user-exec-stub.c
+++ b/accel/tcg/user-exec-stub.c
@@ -1,7 +1,6 @@
 #include "qemu/osdep.h"
 #include "hw/core/cpu.h"
 #include "sysemu/replay.h"
-#include "sysemu/sysemu.h"
 
 bool enable_cpu_pm = false;
 
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index f96fd31c61b..8569a431cac 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -17,7 +17,6 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
@@ -28,48 +27,18 @@
 #include "exec/helper-proto.h"
 #include "qemu/atomic128.h"
 #include "trace/trace-root.h"
-#include "trace/mem.h"
-
-#undef EAX
-#undef ECX
-#undef EDX
-#undef EBX
-#undef ESP
-#undef EBP
-#undef ESI
-#undef EDI
-#undef EIP
-#ifdef __linux__
-#include <sys/ucontext.h>
-#endif
+#include "tcg/tcg-ldst.h"
+#include "internal.h"
 
 __thread uintptr_t helper_retaddr;
 
 //#define DEBUG_SIGNAL
 
-/* exit the current TB from a signal handler. The host registers are
-   restored in a state compatible with the CPU emulator
+/*
+ * Adjust the pc to pass to cpu_restore_state; return the memop type.
  */
-static void QEMU_NORETURN cpu_exit_tb_from_sighandler(CPUState *cpu,
-                                                      sigset_t *old_set)
-{
-    /* XXX: use siglongjmp ? */
-    sigprocmask(SIG_SETMASK, old_set, NULL);
-    cpu_loop_exit_noexc(cpu);
-}
-
-/* 'pc' is the host PC at which the exception was raised. 'address' is
-   the effective address of the memory exception. 'is_write' is 1 if a
-   write caused the exception and otherwise 0'. 'old_set' is the
-   signal set which should be restored */
-static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
-                                    int is_write, sigset_t *old_set)
+MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
 {
-    CPUState *cpu = current_cpu;
-    CPUClass *cc;
-    unsigned long address = (unsigned long)info->si_addr;
-    MMUAccessType access_type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
-
     switch (helper_retaddr) {
     default:
         /*
@@ -78,7 +47,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
          * pointer into the generated code that will unwind to the
          * correct guest pc.
          */
-        pc = helper_retaddr;
+        *pc = helper_retaddr;
         break;
 
     case 0:
@@ -98,7 +67,7 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
          * Therefore, adjust to compensate for what will be done later
          * by cpu_restore_state_from_tb.
          */
-        pc += GETPC_ADJ;
+        *pc += GETPC_ADJ;
         break;
 
     case 1:
@@ -114,118 +83,97 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
          *
          * Like tb_gen_code, release the memory lock before cpu_loop_exit.
          */
-        pc = 0;
-        access_type = MMU_INST_FETCH;
         mmap_unlock();
-        break;
+        *pc = 0;
+        return MMU_INST_FETCH;
     }
 
-    /* For synchronous signals we expect to be coming from the vCPU
-     * thread (so current_cpu should be valid) and either from running
-     * code or during translation which can fault as we cross pages.
-     *
-     * If neither is true then something has gone wrong and we should
-     * abort rather than try and restart the vCPU execution.
-     */
-    if (!cpu || !cpu->running) {
-        printf("qemu:%s received signal outside vCPU context @ pc=0x%"
-               PRIxPTR "\n",  __func__, pc);
-        abort();
-    }
+    return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
+}
 
-#if defined(DEBUG_SIGNAL)
-    printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
-           pc, address, is_write, *(unsigned long *)old_set);
-#endif
-    /* XXX: locking issue */
-    /* Note that it is important that we don't call page_unprotect() unless
-     * this is really a "write to nonwriteable page" fault, because
-     * page_unprotect() assumes that if it is called for an access to
-     * a page that's writeable this means we had two threads racing and
-     * another thread got there first and already made the page writeable;
-     * so we will retry the access. If we were to call page_unprotect()
-     * for some other kind of fault that should really be passed to the
-     * guest, we'd end up in an infinite loop of retrying the faulting
-     * access.
-     */
-    if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR &&
-        h2g_valid(address)) {
-        switch (page_unprotect(h2g(address), pc)) {
-        case 0:
-            /* Fault not caused by a page marked unwritable to protect
-             * cached translations, must be the guest binary's problem.
-             */
-            break;
-        case 1:
-            /* Fault caused by protection of cached translation; TBs
-             * invalidated, so resume execution.  Retain helper_retaddr
-             * for a possible second fault.
-             */
-            return 1;
-        case 2:
-            /* Fault caused by protection of cached translation, and the
-             * currently executing TB was modified and must be exited
-             * immediately.  Clear helper_retaddr for next execution.
-             */
-            clear_helper_retaddr();
-            cpu_exit_tb_from_sighandler(cpu, old_set);
-            /* NORETURN */
-
-        default:
-            g_assert_not_reached();
-        }
+/**
+ * handle_sigsegv_accerr_write:
+ * @cpu: the cpu context
+ * @old_set: the sigset_t from the signal ucontext_t
+ * @host_pc: the host pc, adjusted for the signal
+ * @guest_addr: the guest address of the fault
+ *
+ * Return true if the write fault has been handled, and should be re-tried.
+ *
+ * Note that it is important that we don't call page_unprotect() unless
+ * this is really a "write to nonwriteable page" fault, because
+ * page_unprotect() assumes that if it is called for an access to
+ * a page that's writeable this means we had two threads racing and
+ * another thread got there first and already made the page writeable;
+ * so we will retry the access. If we were to call page_unprotect()
+ * for some other kind of fault that should really be passed to the
+ * guest, we'd end up in an infinite loop of retrying the faulting access.
+ */
+bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
+                                 uintptr_t host_pc, abi_ptr guest_addr)
+{
+    switch (page_unprotect(guest_addr, host_pc)) {
+    case 0:
+        /*
+         * Fault not caused by a page marked unwritable to protect
+         * cached translations, must be the guest binary's problem.
+         */
+        return false;
+    case 1:
+        /*
+         * Fault caused by protection of cached translation; TBs
+         * invalidated, so resume execution.
+         */
+        return true;
+    case 2:
+        /*
+         * Fault caused by protection of cached translation, and the
+         * currently executing TB was modified and must be exited immediately.
+         */
+        sigprocmask(SIG_SETMASK, old_set, NULL);
+        cpu_loop_exit_noexc(cpu);
+        /* NORETURN */
+    default:
+        g_assert_not_reached();
     }
-
-    /* Convert forcefully to guest address space, invalid addresses
-       are still valid segv ones */
-    address = h2g_nocheck(address);
-
-    /*
-     * There is no way the target can handle this other than raising
-     * an exception.  Undo signal and retaddr state prior to longjmp.
-     */
-    sigprocmask(SIG_SETMASK, old_set, NULL);
-    clear_helper_retaddr();
-
-    cc = CPU_GET_CLASS(cpu);
-    cc->tcg_ops->tlb_fill(cpu, address, 0, access_type,
-                          MMU_USER_IDX, false, pc);
-    g_assert_not_reached();
 }
 
 static QEMU_ALWAYS_INLINE int
 probe_access_internal(CPUArchState *env, target_ulong addr, int fault_size,
                       MMUAccessType access_type, bool nonfault, uintptr_t ra)
 {
-    int flags;
+    int acc_flag;
+    bool maperr;
 
     switch (access_type) {
     case MMU_DATA_STORE:
-        flags = PAGE_WRITE;
+        acc_flag = PAGE_WRITE_ORG;
         break;
     case MMU_DATA_LOAD:
-        flags = PAGE_READ;
+        acc_flag = PAGE_READ;
         break;
     case MMU_INST_FETCH:
-        flags = PAGE_EXEC;
+        acc_flag = PAGE_EXEC;
         break;
     default:
         g_assert_not_reached();
     }
 
-    if (!guest_addr_valid_untagged(addr) ||
-        page_check_range(addr, 1, flags) < 0) {
-        if (nonfault) {
-            return TLB_INVALID_MASK;
-        } else {
-            CPUState *cpu = env_cpu(env);
-            CPUClass *cc = CPU_GET_CLASS(cpu);
-            cc->tcg_ops->tlb_fill(cpu, addr, fault_size, access_type,
-                                  MMU_USER_IDX, false, ra);
-            g_assert_not_reached();
+    if (guest_addr_valid_untagged(addr)) {
+        int page_flags = page_get_flags(addr);
+        if (page_flags & acc_flag) {
+            return 0; /* success */
         }
+        maperr = !(page_flags & PAGE_VALID);
+    } else {
+        maperr = true;
     }
-    return 0;
+
+    if (nonfault) {
+        return TLB_INVALID_MASK;
+    }
+
+    cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
 }
 
 int probe_access_flags(CPUArchState *env, target_ulong addr,
@@ -254,919 +202,244 @@ probe_access_inlined(CPUArchState *env, target_ulong addr, int size,
 
 #include "probe-access.inc.c"
 
-#if defined(__i386__)
-
-#if defined(__NetBSD__)
-#include <ucontext.h>
-
-#define EIP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_EIP])
-#define TRAP_sig(context)    ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-#define ERROR_sig(context)   ((context)->uc_mcontext.__gregs[_REG_ERR])
-#define MASK_sig(context)    ((context)->uc_sigmask)
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
-#include <ucontext.h>
-
-#define EIP_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_eip))
-#define TRAP_sig(context)    ((context)->uc_mcontext.mc_trapno)
-#define ERROR_sig(context)   ((context)->uc_mcontext.mc_err)
-#define MASK_sig(context)    ((context)->uc_sigmask)
-#elif defined(__OpenBSD__)
-#define EIP_sig(context)     ((context)->sc_eip)
-#define TRAP_sig(context)    ((context)->sc_trapno)
-#define ERROR_sig(context)   ((context)->sc_err)
-#define MASK_sig(context)    ((context)->sc_mask)
-#else
-#define EIP_sig(context)     ((context)->uc_mcontext.gregs[REG_EIP])
-#define TRAP_sig(context)    ((context)->uc_mcontext.gregs[REG_TRAPNO])
-#define ERROR_sig(context)   ((context)->uc_mcontext.gregs[REG_ERR])
-#define MASK_sig(context)    ((context)->uc_sigmask)
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
-    ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-#else
-    ucontext_t *uc = puc;
-#endif
-    unsigned long pc;
-    int trapno;
-
-#ifndef REG_EIP
-/* for glibc 2.1 */
-#define REG_EIP    EIP
-#define REG_ERR    ERR
-#define REG_TRAPNO TRAPNO
-#endif
-    pc = EIP_sig(uc);
-    trapno = TRAP_sig(uc);
-    return handle_cpu_signal(pc, info,
-                             trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0,
-                             &MASK_sig(uc));
-}
-
-#elif defined(__x86_64__)
-
-#ifdef __NetBSD__
-#define PC_sig(context)       _UC_MACHINE_PC(context)
-#define TRAP_sig(context)     ((context)->uc_mcontext.__gregs[_REG_TRAPNO])
-#define ERROR_sig(context)    ((context)->uc_mcontext.__gregs[_REG_ERR])
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#elif defined(__OpenBSD__)
-#define PC_sig(context)       ((context)->sc_rip)
-#define TRAP_sig(context)     ((context)->sc_trapno)
-#define ERROR_sig(context)    ((context)->sc_err)
-#define MASK_sig(context)     ((context)->sc_mask)
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
-#include <ucontext.h>
-
-#define PC_sig(context)  (*((unsigned long *)&(context)->uc_mcontext.mc_rip))
-#define TRAP_sig(context)     ((context)->uc_mcontext.mc_trapno)
-#define ERROR_sig(context)    ((context)->uc_mcontext.mc_err)
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#else
-#define PC_sig(context)       ((context)->uc_mcontext.gregs[REG_RIP])
-#define TRAP_sig(context)     ((context)->uc_mcontext.gregs[REG_TRAPNO])
-#define ERROR_sig(context)    ((context)->uc_mcontext.gregs[REG_ERR])
-#define MASK_sig(context)     ((context)->uc_sigmask)
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    unsigned long pc;
-#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
-    ucontext_t *uc = puc;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-#else
-    ucontext_t *uc = puc;
-#endif
-
-    pc = PC_sig(uc);
-    return handle_cpu_signal(pc, info,
-                             TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0,
-                             &MASK_sig(uc));
-}
-
-#elif defined(_ARCH_PPC)
+/* The softmmu versions of these helpers are in cputlb.c.  */
 
-/***********************************************************************
- * signal context platform-specific definitions
- * From Wine
+/*
+ * Verify that we have passed the correct MemOp to the correct function.
+ *
+ * We could present one function to target code, and dispatch based on
+ * the MemOp, but so far we have worked hard to avoid an indirect function
+ * call along the memory path.
  */
-#ifdef linux
-/* All Registers access - only for local access */
-#define REG_sig(reg_name, context)              \
-    ((context)->uc_mcontext.regs->reg_name)
-/* Gpr Registers access  */
-#define GPR_sig(reg_num, context)              REG_sig(gpr[reg_num], context)
-/* Program counter */
-#define IAR_sig(context)                       REG_sig(nip, context)
-/* Machine State Register (Supervisor) */
-#define MSR_sig(context)                       REG_sig(msr, context)
-/* Count register */
-#define CTR_sig(context)                       REG_sig(ctr, context)
-/* User's integer exception register */
-#define XER_sig(context)                       REG_sig(xer, context)
-/* Link register */
-#define LR_sig(context)                        REG_sig(link, context)
-/* Condition register */
-#define CR_sig(context)                        REG_sig(ccr, context)
-
-/* Float Registers access  */
-#define FLOAT_sig(reg_num, context)                                     \
-    (((double *)((char *)((context)->uc_mcontext.regs + 48 * 4)))[reg_num])
-#define FPSCR_sig(context) \
-    (*(int *)((char *)((context)->uc_mcontext.regs + (48 + 32 * 2) * 4)))
-/* Exception Registers access */
-#define DAR_sig(context)                       REG_sig(dar, context)
-#define DSISR_sig(context)                     REG_sig(dsisr, context)
-#define TRAP_sig(context)                      REG_sig(trap, context)
-#endif /* linux */
-
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <ucontext.h>
-#define IAR_sig(context)               ((context)->uc_mcontext.mc_srr0)
-#define MSR_sig(context)               ((context)->uc_mcontext.mc_srr1)
-#define CTR_sig(context)               ((context)->uc_mcontext.mc_ctr)
-#define XER_sig(context)               ((context)->uc_mcontext.mc_xer)
-#define LR_sig(context)                ((context)->uc_mcontext.mc_lr)
-#define CR_sig(context)                ((context)->uc_mcontext.mc_cr)
-/* Exception Registers access */
-#define DAR_sig(context)               ((context)->uc_mcontext.mc_dar)
-#define DSISR_sig(context)             ((context)->uc_mcontext.mc_dsisr)
-#define TRAP_sig(context)              ((context)->uc_mcontext.mc_exc)
-#endif /* __FreeBSD__|| __FreeBSD_kernel__ */
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
+static void validate_memop(MemOpIdx oi, MemOp expected)
 {
-    siginfo_t *info = pinfo;
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-    ucontext_t *uc = puc;
-#else
-    ucontext_t *uc = puc;
+#ifdef CONFIG_DEBUG_TCG
+    MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
+    assert(have == expected);
 #endif
-    unsigned long pc;
-    int is_write;
-
-    pc = IAR_sig(uc);
-    is_write = 0;
-#if 0
-    /* ppc 4xx case */
-    if (DSISR_sig(uc) & 0x00800000) {
-        is_write = 1;
-    }
-#else
-    if (TRAP_sig(uc) != 0x400 && (DSISR_sig(uc) & 0x02000000)) {
-        is_write = 1;
-    }
-#endif
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
 }
 
-#elif defined(__alpha__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                           void *puc)
+void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
 {
-    siginfo_t *info = pinfo;
-    ucontext_t *uc = puc;
-    uint32_t *pc = uc->uc_mcontext.sc_pc;
-    uint32_t insn = *pc;
-    int is_write = 0;
-
-    /* XXX: need kernel patch to get write flag faster */
-    switch (insn >> 26) {
-    case 0x0d: /* stw */
-    case 0x0e: /* stb */
-    case 0x0f: /* stq_u */
-    case 0x24: /* stf */
-    case 0x25: /* stg */
-    case 0x26: /* sts */
-    case 0x27: /* stt */
-    case 0x2c: /* stl */
-    case 0x2d: /* stq */
-    case 0x2e: /* stl_c */
-    case 0x2f: /* stq_c */
-        is_write = 1;
-    }
-
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
+    cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
 }
-#elif defined(__sparc__)
 
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
+void helper_unaligned_st(CPUArchState *env, target_ulong addr)
 {
-    siginfo_t *info = pinfo;
-    int is_write;
-    uint32_t insn;
-#if !defined(__arch64__) || defined(CONFIG_SOLARIS)
-    uint32_t *regs = (uint32_t *)(info + 1);
-    void *sigmask = (regs + 20);
-    /* XXX: is there a standard glibc define ? */
-    unsigned long pc = regs[1];
-#else
-#ifdef __linux__
-    struct sigcontext *sc = puc;
-    unsigned long pc = sc->sigc_regs.tpc;
-    void *sigmask = (void *)sc->sigc_mask;
-#elif defined(__OpenBSD__)
-    struct sigcontext *uc = puc;
-    unsigned long pc = uc->sc_pc;
-    void *sigmask = (void *)(long)uc->sc_mask;
-#elif defined(__NetBSD__)
-    ucontext_t *uc = puc;
-    unsigned long pc = _UC_MACHINE_PC(uc);
-    void *sigmask = (void *)&uc->uc_sigmask;
-#endif
-#endif
-
-    /* XXX: need kernel patch to get write flag faster */
-    is_write = 0;
-    insn = *(uint32_t *)pc;
-    if ((insn >> 30) == 3) {
-        switch ((insn >> 19) & 0x3f) {
-        case 0x05: /* stb */
-        case 0x15: /* stba */
-        case 0x06: /* sth */
-        case 0x16: /* stha */
-        case 0x04: /* st */
-        case 0x14: /* sta */
-        case 0x07: /* std */
-        case 0x17: /* stda */
-        case 0x0e: /* stx */
-        case 0x1e: /* stxa */
-        case 0x24: /* stf */
-        case 0x34: /* stfa */
-        case 0x27: /* stdf */
-        case 0x37: /* stdfa */
-        case 0x26: /* stqf */
-        case 0x36: /* stqfa */
-        case 0x25: /* stfsr */
-        case 0x3c: /* casa */
-        case 0x3e: /* casxa */
-            is_write = 1;
-            break;
-        }
-    }
-    return handle_cpu_signal(pc, info, is_write, sigmask);
+    cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
 }
 
-#elif defined(__arm__)
-
-#if defined(__NetBSD__)
-#include <ucontext.h>
-#include <sys/siginfo.h>
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
+static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
+                            MemOpIdx oi, uintptr_t ra, MMUAccessType type)
 {
-    siginfo_t *info = pinfo;
-#if defined(__NetBSD__)
-    ucontext_t *uc = puc;
-    siginfo_t *si = pinfo;
-#else
-    ucontext_t *uc = puc;
-#endif
-    unsigned long pc;
-    uint32_t fsr;
-    int is_write;
-
-#if defined(__NetBSD__)
-    pc = uc->uc_mcontext.__gregs[_REG_R15];
-#elif defined(__GLIBC__) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
-    pc = uc->uc_mcontext.gregs[R15];
-#else
-    pc = uc->uc_mcontext.arm_pc;
-#endif
-
-#ifdef __NetBSD__
-    fsr = si->si_trap;
-#else
-    fsr = uc->uc_mcontext.error_code;
-#endif
-    /*
-     * In the FSR, bit 11 is WnR, assuming a v6 or
-     * later processor.  On v5 we will always report
-     * this as a read, which will fail later.
-     */
-    is_write = extract32(fsr, 11, 1);
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
+    MemOp mop = get_memop(oi);
+    int a_bits = get_alignment_bits(mop);
+    void *ret;
 
-#elif defined(__aarch64__)
-
-#if defined(__NetBSD__)
-
-#include <ucontext.h>
-#include <sys/siginfo.h>
-
-int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
-{
-    ucontext_t *uc = puc;
-    siginfo_t *si = pinfo;
-    unsigned long pc;
-    int is_write;
-    uint32_t esr;
-
-    pc = uc->uc_mcontext.__gregs[_REG_PC];
-    esr = si->si_trap;
-
-    /*
-     * siginfo_t::si_trap is the ESR value, for data aborts ESR.EC
-     * is 0b10010x: then bit 6 is the WnR bit
-     */
-    is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
-    return handle_cpu_signal(pc, si, is_write, &uc->uc_sigmask);
-}
-
-#else
-
-#ifndef ESR_MAGIC
-/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
-#define ESR_MAGIC 0x45535201
-struct esr_context {
-    struct _aarch64_ctx head;
-    uint64_t esr;
-};
-#endif
-
-static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc)
-{
-    return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved;
-}
-
-static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr)
-{
-    return (struct _aarch64_ctx *)((char *)hdr + hdr->size);
-}
-
-int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
-{
-    siginfo_t *info = pinfo;
-    ucontext_t *uc = puc;
-    uintptr_t pc = uc->uc_mcontext.pc;
-    bool is_write;
-    struct _aarch64_ctx *hdr;
-    struct esr_context const *esrctx = NULL;
-
-    /* Find the esr_context, which has the WnR bit in it */
-    for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) {
-        if (hdr->magic == ESR_MAGIC) {
-            esrctx = (struct esr_context const *)hdr;
-            break;
-        }
+    /* Enforce guest required alignment.  */
+    if (unlikely(addr & ((1 << a_bits) - 1))) {
+        cpu_loop_exit_sigbus(env_cpu(env), addr, type, ra);
     }
 
-    if (esrctx) {
-        /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */
-        uint64_t esr = esrctx->esr;
-        is_write = extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
-    } else {
-        /*
-         * Fall back to parsing instructions; will only be needed
-         * for really ancient (pre-3.16) kernels.
-         */
-        uint32_t insn = *(uint32_t *)pc;
-
-        is_write = ((insn & 0xbfff0000) == 0x0c000000   /* C3.3.1 */
-                    || (insn & 0xbfe00000) == 0x0c800000   /* C3.3.2 */
-                    || (insn & 0xbfdf0000) == 0x0d000000   /* C3.3.3 */
-                    || (insn & 0xbfc00000) == 0x0d800000   /* C3.3.4 */
-                    || (insn & 0x3f400000) == 0x08000000   /* C3.3.6 */
-                    || (insn & 0x3bc00000) == 0x39000000   /* C3.3.13 */
-                    || (insn & 0x3fc00000) == 0x3d800000   /* ... 128bit */
-                    /* Ignore bits 10, 11 & 21, controlling indexing.  */
-                    || (insn & 0x3bc00000) == 0x38000000   /* C3.3.8-12 */
-                    || (insn & 0x3fe00000) == 0x3c800000   /* ... 128bit */
-                    /* Ignore bits 23 & 24, controlling indexing.  */
-                    || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */
-    }
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-#endif
-
-#elif defined(__s390__)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    ucontext_t *uc = puc;
-    unsigned long pc;
-    uint16_t *pinsn;
-    int is_write = 0;
-
-    pc = uc->uc_mcontext.psw.addr;
-
-    /* ??? On linux, the non-rt signal handler has 4 (!) arguments instead
-       of the normal 2 arguments.  The 3rd argument contains the "int_code"
-       from the hardware which does in fact contain the is_write value.
-       The rt signal handler, as far as I can tell, does not give this value
-       at all.  Not that we could get to it from here even if it were.  */
-    /* ??? This is not even close to complete, since it ignores all
-       of the read-modify-write instructions.  */
-    pinsn = (uint16_t *)pc;
-    switch (pinsn[0] >> 8) {
-    case 0x50: /* ST */
-    case 0x42: /* STC */
-    case 0x40: /* STH */
-        is_write = 1;
-        break;
-    case 0xc4: /* RIL format insns */
-        switch (pinsn[0] & 0xf) {
-        case 0xf: /* STRL */
-        case 0xb: /* STGRL */
-        case 0x7: /* STHRL */
-            is_write = 1;
-        }
-        break;
-    case 0xe3: /* RXY format insns */
-        switch (pinsn[2] & 0xff) {
-        case 0x50: /* STY */
-        case 0x24: /* STG */
-        case 0x72: /* STCY */
-        case 0x70: /* STHY */
-        case 0x8e: /* STPQ */
-        case 0x3f: /* STRVH */
-        case 0x3e: /* STRV */
-        case 0x2f: /* STRVG */
-            is_write = 1;
-        }
-        break;
-    }
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__mips__)
-
-#if defined(__misp16) || defined(__mips_micromips)
-#error "Unsupported encoding"
-#endif
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    ucontext_t *uc = puc;
-    uintptr_t pc = uc->uc_mcontext.pc;
-    uint32_t insn = *(uint32_t *)pc;
-    int is_write = 0;
-
-    /* Detect all store instructions at program counter. */
-    switch((insn >> 26) & 077) {
-    case 050: /* SB */
-    case 051: /* SH */
-    case 052: /* SWL */
-    case 053: /* SW */
-    case 054: /* SDL */
-    case 055: /* SDR */
-    case 056: /* SWR */
-    case 070: /* SC */
-    case 071: /* SWC1 */
-    case 074: /* SCD */
-    case 075: /* SDC1 */
-    case 077: /* SD */
-#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
-    case 072: /* SWC2 */
-    case 076: /* SDC2 */
-#endif
-        is_write = 1;
-        break;
-    case 023: /* COP1X */
-        /* Required in all versions of MIPS64 since
-           MIPS64r1 and subsequent versions of MIPS32r2. */
-        switch (insn & 077) {
-        case 010: /* SWXC1 */
-        case 011: /* SDXC1 */
-        case 015: /* SUXC1 */
-            is_write = 1;
-        }
-        break;
-    }
-
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#elif defined(__riscv)
-
-int cpu_signal_handler(int host_signum, void *pinfo,
-                       void *puc)
-{
-    siginfo_t *info = pinfo;
-    ucontext_t *uc = puc;
-    greg_t pc = uc->uc_mcontext.__gregs[REG_PC];
-    uint32_t insn = *(uint32_t *)pc;
-    int is_write = 0;
-
-    /* Detect store by reading the instruction at the program
-       counter. Note: we currently only generate 32-bit
-       instructions so we thus only detect 32-bit stores */
-    switch (((insn >> 0) & 0b11)) {
-    case 3:
-        switch (((insn >> 2) & 0b11111)) {
-        case 8:
-            switch (((insn >> 12) & 0b111)) {
-            case 0: /* sb */
-            case 1: /* sh */
-            case 2: /* sw */
-            case 3: /* sd */
-            case 4: /* sq */
-                is_write = 1;
-                break;
-            default:
-                break;
-            }
-            break;
-        case 9:
-            switch (((insn >> 12) & 0b111)) {
-            case 2: /* fsw */
-            case 3: /* fsd */
-            case 4: /* fsq */
-                is_write = 1;
-                break;
-            default:
-                break;
-            }
-            break;
-        default:
-            break;
-        }
-    }
-
-    /* Check for compressed instructions */
-    switch (((insn >> 13) & 0b111)) {
-    case 7:
-        switch (insn & 0b11) {
-        case 0: /*c.sd */
-        case 2: /* c.sdsp */
-            is_write = 1;
-            break;
-        default:
-            break;
-        }
-        break;
-    case 6:
-        switch (insn & 0b11) {
-        case 0: /* c.sw */
-        case 3: /* c.swsp */
-            is_write = 1;
-            break;
-        default:
-            break;
-        }
-        break;
-    default:
-        break;
-    }
-
-    return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
-}
-
-#else
-
-#error host CPU specific signal handler needed
-
-#endif
-
-/* The softmmu versions of these helpers are in cputlb.c.  */
-
-uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint32_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldub_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr)
-{
-    int ret;
-    uint16_t meminfo = trace_mem_get_info(MO_SB, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldsb_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint32_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = lduw_be_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr)
-{
-    int ret;
-    uint16_t meminfo = trace_mem_get_info(MO_BESW, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldsw_be_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint32_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldl_be_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint64_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldq_be_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint32_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = lduw_le_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+    ret = g2h(env_cpu(env), addr);
+    set_helper_retaddr(ra);
     return ret;
 }
 
-int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr)
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    int ret;
-    uint16_t meminfo = trace_mem_get_info(MO_LESW, MMU_USER_IDX, false);
+    void *haddr;
+    uint8_t ret;
 
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldsw_le_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint32_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldl_le_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr)
-{
-    uint64_t ret;
-    uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, false);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    ret = ldq_le_p(g2h(env_cpu(env), ptr));
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-    return ret;
-}
-
-uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
-{
-    uint32_t ret;
-
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldub_data(env, ptr);
+    validate_memop(oi, MO_UB);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldub_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    int ret;
+    void *haddr;
+    uint16_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldsb_data(env, ptr);
+    validate_memop(oi, MO_BEUW);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = lduw_be_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
+    void *haddr;
     uint32_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_lduw_be_data(env, ptr);
+    validate_memop(oi, MO_BEUL);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldl_be_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
-{
-    int ret;
-
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldsw_be_data(env, ptr);
-    clear_helper_retaddr();
-    return ret;
-}
-
-uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
-{
-    uint32_t ret;
-
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldl_be_data(env, ptr);
-    clear_helper_retaddr();
-    return ret;
-}
-
-uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
+    void *haddr;
     uint64_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldq_be_data(env, ptr);
-    clear_helper_retaddr();
-    return ret;
-}
-
-uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
-{
-    uint32_t ret;
-
-    set_helper_retaddr(retaddr);
-    ret = cpu_lduw_le_data(env, ptr);
+    validate_memop(oi, MO_BEQ);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldq_be_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
-    int ret;
+    void *haddr;
+    uint16_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldsw_le_data(env, ptr);
+    validate_memop(oi, MO_LEUW);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = lduw_le_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
+    void *haddr;
     uint32_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldl_le_data(env, ptr);
+    validate_memop(oi, MO_LEUL);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldl_le_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t retaddr)
+uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
+    void *haddr;
     uint64_t ret;
 
-    set_helper_retaddr(retaddr);
-    ret = cpu_ldq_le_data(env, ptr);
+    validate_memop(oi, MO_LEQ);
+    trace_guest_ld_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldq_le_p(haddr);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
     return ret;
 }
 
-void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
+void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
+                 MemOpIdx oi, uintptr_t ra)
 {
-    uint16_t meminfo = trace_mem_get_info(MO_UB, MMU_USER_IDX, true);
+    void *haddr;
 
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stb_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-}
-
-void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
-{
-    uint16_t meminfo = trace_mem_get_info(MO_BEUW, MMU_USER_IDX, true);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stw_be_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-}
-
-void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
-{
-    uint16_t meminfo = trace_mem_get_info(MO_BEUL, MMU_USER_IDX, true);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stl_be_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-}
-
-void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
-{
-    uint16_t meminfo = trace_mem_get_info(MO_BEQ, MMU_USER_IDX, true);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stq_be_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+    validate_memop(oi, MO_UB);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stb_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
+void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    uint16_t meminfo = trace_mem_get_info(MO_LEUW, MMU_USER_IDX, true);
+    void *haddr;
 
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stw_le_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+    validate_memop(oi, MO_BEUW);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stw_be_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val)
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    uint16_t meminfo = trace_mem_get_info(MO_LEUL, MMU_USER_IDX, true);
+    void *haddr;
 
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stl_le_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
+    validate_memop(oi, MO_BEUL);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stl_be_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val)
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    uint16_t meminfo = trace_mem_get_info(MO_LEQ, MMU_USER_IDX, true);
-
-    trace_guest_mem_before_exec(env_cpu(env), ptr, meminfo);
-    stq_le_p(g2h(env_cpu(env), ptr), val);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), ptr, meminfo);
-}
+    void *haddr;
 
-void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
-                     uint32_t val, uintptr_t retaddr)
-{
-    set_helper_retaddr(retaddr);
-    cpu_stb_data(env, ptr, val);
+    validate_memop(oi, MO_BEQ);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stq_be_p(haddr, val);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint32_t val, uintptr_t retaddr)
+void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    set_helper_retaddr(retaddr);
-    cpu_stw_be_data(env, ptr, val);
-    clear_helper_retaddr();
-}
+    void *haddr;
 
-void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint32_t val, uintptr_t retaddr)
-{
-    set_helper_retaddr(retaddr);
-    cpu_stl_be_data(env, ptr, val);
+    validate_memop(oi, MO_LEUW);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stw_le_p(haddr, val);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint64_t val, uintptr_t retaddr)
+void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    set_helper_retaddr(retaddr);
-    cpu_stq_be_data(env, ptr, val);
-    clear_helper_retaddr();
-}
+    void *haddr;
 
-void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint32_t val, uintptr_t retaddr)
-{
-    set_helper_retaddr(retaddr);
-    cpu_stw_le_data(env, ptr, val);
+    validate_memop(oi, MO_LEUL);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stl_le_p(haddr, val);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
-void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint32_t val, uintptr_t retaddr)
+void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
-    set_helper_retaddr(retaddr);
-    cpu_stl_le_data(env, ptr, val);
-    clear_helper_retaddr();
-}
+    void *haddr;
 
-void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr,
-                        uint64_t val, uintptr_t retaddr)
-{
-    set_helper_retaddr(retaddr);
-    cpu_stq_le_data(env, ptr, val);
+    validate_memop(oi, MO_LEQ);
+    trace_guest_st_before_exec(env_cpu(env), addr, oi);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stq_le_p(haddr, val);
     clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }
 
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
@@ -1209,29 +482,48 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
     return ret;
 }
 
-/* Do not allow unaligned operations to proceed.  Return the host address.  */
+#include "ldst_common.c.inc"
+
+/*
+ * Do not allow unaligned operations to proceed.  Return the host address.
+ *
+ * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
+ */
 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
-                               int size, uintptr_t retaddr)
+                               MemOpIdx oi, int size, int prot,
+                               uintptr_t retaddr)
 {
+    MemOp mop = get_memop(oi);
+    int a_bits = get_alignment_bits(mop);
+    void *ret;
+
+    /* Enforce guest required alignment.  */
+    if (unlikely(addr & ((1 << a_bits) - 1))) {
+        MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE;
+        cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
+    }
+
     /* Enforce qemu required alignment.  */
     if (unlikely(addr & (size - 1))) {
         cpu_loop_exit_atomic(env_cpu(env), retaddr);
     }
-    void *ret = g2h(env_cpu(env), addr);
+
+    ret = g2h(env_cpu(env), addr);
     set_helper_retaddr(retaddr);
     return ret;
 }
 
-/* Macro to call the above, with local variables from the use context.  */
-#define ATOMIC_MMU_DECLS do {} while (0)
-#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
-#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
-#define ATOMIC_MMU_IDX MMU_USER_IDX
+#include "atomic_common.c.inc"
 
-#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
-#define EXTRA_ARGS
+/*
+ * First set of functions passes in OI and RETADDR.
+ * This makes them callable from other helpers.
+ */
 
-#include "atomic_common.c.inc"
+#define ATOMIC_NAME(X) \
+    glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
+#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
+#define ATOMIC_MMU_IDX MMU_USER_IDX
 
 #define DATA_SIZE 1
 #include "atomic_template.h"
@@ -1247,20 +539,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 #include "atomic_template.h"
 #endif
 
-/* The following is only callable from other helpers, and matches up
-   with the softmmu version.  */
-
 #if HAVE_ATOMIC128 || HAVE_CMPXCHG128
-
-#undef EXTRA_ARGS
-#undef ATOMIC_NAME
-#undef ATOMIC_MMU_LOOKUP
-
-#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
-#define ATOMIC_NAME(X) \
-    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
-#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
-
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index fcc2f62864f..2b9789e6477 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -34,6 +34,8 @@
 #define AUDIO_CAP "alsa"
 #include "audio_int.h"
 
+#define DEBUG_ALSA 0
+
 struct pollhlp {
     snd_pcm_t *handle;
     struct pollfd *pfds;
@@ -587,16 +589,12 @@ static int alsa_open(bool in, struct alsa_params_req *req,
 
     *handlep = handle;
 
-    if (obtfmt != req->fmt ||
-         obt->nchannels != req->nchannels ||
-         obt->freq != req->freq) {
+    if (DEBUG_ALSA || obtfmt != req->fmt ||
+        obt->nchannels != req->nchannels || obt->freq != req->freq) {
         dolog ("Audio parameters for %s\n", typ);
         alsa_dump_info(req, obt, obtfmt, apdo);
     }
 
-#ifdef DEBUG
-    alsa_dump_info(req, obt, obtfmt, apdo);
-#endif
     return 0;
 
  err:
diff --git a/audio/audio.c b/audio/audio.c
index 534278edfed..54a153c0ef0 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -32,6 +32,7 @@
 #include "qapi/qapi-visit-audio.h"
 #include "qemu/cutils.h"
 #include "qemu/module.h"
+#include "qemu-common.h"
 #include "sysemu/replay.h"
 #include "sysemu/runstate.h"
 #include "ui/qemu-spice.h"
@@ -704,7 +705,7 @@ static size_t audio_pcm_sw_write(SWVoiceOut *sw, void *buf, size_t size)
 
     if (live == hwsamples) {
 #ifdef DEBUG_OUT
-        dolog ("%s is full %d\n", sw->name, live);
+        dolog ("%s is full %zu\n", sw->name, live);
 #endif
         return 0;
     }
@@ -994,7 +995,7 @@ static size_t audio_get_avail (SWVoiceIn *sw)
     }
 
     ldebug (
-        "%s: get_avail live %d ret %" PRId64 "\n",
+        "%s: get_avail live %zu ret %" PRId64 "\n",
         SW_NAME (sw),
         live, (((int64_t) live << 32) / sw->ratio) * sw->info.bytes_per_frame
         );
@@ -1021,7 +1022,7 @@ static size_t audio_get_free(SWVoiceOut *sw)
     dead = sw->hw->mix_buf->size - live;
 
 #ifdef DEBUG_OUT
-    dolog ("%s: get_free live %d dead %d ret %" PRId64 "\n",
+    dolog ("%s: get_free live %zu dead %zu ret %" PRId64 "\n",
            SW_NAME (sw),
            live, dead, (((int64_t) dead << 32) / sw->ratio) *
            sw->info.bytes_per_frame);
@@ -1621,10 +1622,20 @@ void audio_cleanup(void)
     }
 }
 
+static bool vmstate_audio_needed(void *opaque)
+{
+    /*
+     * Never needed, this vmstate only exists in case
+     * an old qemu sends it to us.
+     */
+    return false;
+}
+
 static const VMStateDescription vmstate_audio = {
     .name = "audio",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = vmstate_audio_needed,
     .fields = (VMStateField[]) {
         VMSTATE_END_OF_LIST()
     }
@@ -2172,6 +2183,14 @@ const char *audio_get_id(QEMUSoundCard *card)
     }
 }
 
+const char *audio_application_name(void)
+{
+    const char *vm_name;
+
+    vm_name = qemu_get_vm_name();
+    return vm_name ? vm_name : "qemu";
+}
+
 void audio_rate_start(RateCtl *rate)
 {
     memset(rate, 0, sizeof(RateCtl));
diff --git a/audio/audio_int.h b/audio/audio_int.h
index 06f0913835b..6d685e24a38 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -243,6 +243,8 @@ void *audio_calloc (const char *funcname, int nmemb, size_t size);
 
 void audio_run(AudioState *s, const char *msg);
 
+const char *audio_application_name(void);
+
 typedef struct RateCtl {
     int64_t start_ticks;
     int64_t bytes_sent;
diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index 578ec9b8b2e..d8a21d3e507 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -26,6 +26,7 @@
 #include <CoreAudio/CoreAudio.h>
 #include <pthread.h>            /* pthread_X */
 
+#include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "audio.h"
 
@@ -34,12 +35,11 @@
 
 typedef struct coreaudioVoiceOut {
     HWVoiceOut hw;
-    pthread_mutex_t mutex;
+    pthread_mutex_t buf_mutex;
     AudioDeviceID outputDeviceID;
     int frameSizeSetting;
     uint32_t bufferCount;
     UInt32 audioDevicePropertyBufferFrameSize;
-    AudioStreamBasicDescription outputStreamBasicDescription;
     AudioDeviceIOProcID ioprocid;
     bool enabled;
 } coreaudioVoiceOut;
@@ -114,24 +114,6 @@ static OSStatus coreaudio_set_framesize(AudioDeviceID id, UInt32 *framesize)
                                       framesize);
 }
 
-static OSStatus coreaudio_get_streamformat(AudioDeviceID id,
-                                           AudioStreamBasicDescription *d)
-{
-    UInt32 size = sizeof(*d);
-    AudioObjectPropertyAddress addr = {
-        kAudioDevicePropertyStreamFormat,
-        kAudioDevicePropertyScopeOutput,
-        kAudioObjectPropertyElementMaster
-    };
-
-    return AudioObjectGetPropertyData(id,
-                                      &addr,
-                                      0,
-                                      NULL,
-                                      &size,
-                                      d);
-}
-
 static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
                                            AudioStreamBasicDescription *d)
 {
@@ -260,11 +242,11 @@ static void GCC_FMT_ATTR (3, 4) coreaudio_logerr2 (
 #define coreaudio_playback_logerr(status, ...) \
     coreaudio_logerr2(status, "playback", __VA_ARGS__)
 
-static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
+static int coreaudio_buf_lock (coreaudioVoiceOut *core, const char *fn_name)
 {
     int err;
 
-    err = pthread_mutex_lock (&core->mutex);
+    err = pthread_mutex_lock (&core->buf_mutex);
     if (err) {
         dolog ("Could not lock voice for %s\nReason: %s\n",
                fn_name, strerror (err));
@@ -273,11 +255,11 @@ static int coreaudio_lock (coreaudioVoiceOut *core, const char *fn_name)
     return 0;
 }
 
-static int coreaudio_unlock (coreaudioVoiceOut *core, const char *fn_name)
+static int coreaudio_buf_unlock (coreaudioVoiceOut *core, const char *fn_name)
 {
     int err;
 
-    err = pthread_mutex_unlock (&core->mutex);
+    err = pthread_mutex_unlock (&core->buf_mutex);
     if (err) {
         dolog ("Could not unlock voice for %s\nReason: %s\n",
                fn_name, strerror (err));
@@ -292,13 +274,13 @@ static int coreaudio_unlock (coreaudioVoiceOut *core, const char *fn_name)
         coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;     \
         ret_type ret;                                           \
                                                                 \
-        if (coreaudio_lock(core, "coreaudio_" #name)) {         \
+        if (coreaudio_buf_lock(core, "coreaudio_" #name)) {         \
             return 0;                                           \
         }                                                       \
                                                                 \
         ret = glue(audio_generic_, name)args;                   \
                                                                 \
-        coreaudio_unlock(core, "coreaudio_" #name);             \
+        coreaudio_buf_unlock(core, "coreaudio_" #name);             \
         return ret;                                             \
     }
 COREAUDIO_WRAPPER_FUNC(get_buffer_out, void *, (HWVoiceOut *hw, size_t *size),
@@ -310,7 +292,10 @@ COREAUDIO_WRAPPER_FUNC(write, size_t, (HWVoiceOut *hw, void *buf, size_t size),
                        (hw, buf, size))
 #undef COREAUDIO_WRAPPER_FUNC
 
-/* callback to feed audiooutput buffer */
+/*
+ * callback to feed audiooutput buffer. called without iothread lock.
+ * allowed to lock "buf_mutex", but disallowed to have any other locks.
+ */
 static OSStatus audioDeviceIOProc(
     AudioDeviceID inDevice,
     const AudioTimeStamp *inNow,
@@ -326,13 +311,13 @@ static OSStatus audioDeviceIOProc(
     coreaudioVoiceOut *core = (coreaudioVoiceOut *) hwptr;
     size_t len;
 
-    if (coreaudio_lock (core, "audioDeviceIOProc")) {
+    if (coreaudio_buf_lock (core, "audioDeviceIOProc")) {
         inInputTime = 0;
         return 0;
     }
 
     if (inDevice != core->outputDeviceID) {
-        coreaudio_unlock (core, "audioDeviceIOProc(old device)");
+        coreaudio_buf_unlock (core, "audioDeviceIOProc(old device)");
         return 0;
     }
 
@@ -342,7 +327,7 @@ static OSStatus audioDeviceIOProc(
     /* if there are not enough samples, set signal and return */
     if (pending_frames < frameCount) {
         inInputTime = 0;
-        coreaudio_unlock (core, "audioDeviceIOProc(empty)");
+        coreaudio_buf_unlock (core, "audioDeviceIOProc(empty)");
         return 0;
     }
 
@@ -364,7 +349,7 @@ static OSStatus audioDeviceIOProc(
         out += write_len;
     }
 
-    coreaudio_unlock (core, "audioDeviceIOProc");
+    coreaudio_buf_unlock (core, "audioDeviceIOProc");
     return 0;
 }
 
@@ -373,6 +358,17 @@ static OSStatus init_out_device(coreaudioVoiceOut *core)
     OSStatus status;
     AudioValueRange frameRange;
 
+    AudioStreamBasicDescription streamBasicDescription = {
+        .mBitsPerChannel = core->hw.info.bits,
+        .mBytesPerFrame = core->hw.info.bytes_per_frame,
+        .mBytesPerPacket = core->hw.info.bytes_per_frame,
+        .mChannelsPerFrame = core->hw.info.nchannels,
+        .mFormatFlags = kLinearPCMFormatFlagIsFloat,
+        .mFormatID = kAudioFormatLinearPCM,
+        .mFramesPerPacket = 1,
+        .mSampleRate = core->hw.info.freq
+    };
+
     status = coreaudio_get_voice(&core->outputDeviceID);
     if (status != kAudioHardwareNoError) {
         coreaudio_playback_logerr (status,
@@ -432,34 +428,30 @@ static OSStatus init_out_device(coreaudioVoiceOut *core)
     }
     core->hw.samples = core->bufferCount * core->audioDevicePropertyBufferFrameSize;
 
-    /* get StreamFormat */
-    status = coreaudio_get_streamformat(core->outputDeviceID,
-                                        &core->outputStreamBasicDescription);
-    if (status == kAudioHardwareBadObjectError) {
-        return 0;
-    }
-    if (status != kAudioHardwareNoError) {
-        coreaudio_playback_logerr (status,
-                                    "Could not get Device Stream properties\n");
-        core->outputDeviceID = kAudioDeviceUnknown;
-        return status;
-    }
-
     /* set Samplerate */
     status = coreaudio_set_streamformat(core->outputDeviceID,
-                                        &core->outputStreamBasicDescription);
+                                        &streamBasicDescription);
     if (status == kAudioHardwareBadObjectError) {
         return 0;
     }
     if (status != kAudioHardwareNoError) {
         coreaudio_playback_logerr (status,
                                    "Could not set samplerate %lf\n",
-                                   core->outputStreamBasicDescription.mSampleRate);
+                                   streamBasicDescription.mSampleRate);
         core->outputDeviceID = kAudioDeviceUnknown;
         return status;
     }
 
-    /* set Callback */
+    /*
+     * set Callback.
+     *
+     * On macOS 11.3.1, Core Audio calls AudioDeviceIOProc after calling an
+     * internal function named HALB_Mutex::Lock(), which locks a mutex in
+     * HALB_IOThread::Entry(void*). HALB_Mutex::Lock() is also called in
+     * AudioObjectGetPropertyData, which is called by coreaudio driver.
+     * Therefore, the specified callback must be designed to avoid a deadlock
+     * with the callers of AudioObjectGetPropertyData.
+     */
     core->ioprocid = NULL;
     status = AudioDeviceCreateIOProcID(core->outputDeviceID,
                                        audioDeviceIOProc,
@@ -542,6 +534,7 @@ static void update_device_playback_state(coreaudioVoiceOut *core)
     }
 }
 
+/* called without iothread lock. */
 static OSStatus handle_voice_change(
     AudioObjectID in_object_id,
     UInt32 in_number_addresses,
@@ -551,9 +544,7 @@ static OSStatus handle_voice_change(
     OSStatus status;
     coreaudioVoiceOut *core = in_client_data;
 
-    if (coreaudio_lock(core, __func__)) {
-        abort();
-    }
+    qemu_mutex_lock_iothread();
 
     if (core->outputDeviceID) {
         fini_out_device(core);
@@ -564,7 +555,7 @@ static OSStatus handle_voice_change(
         update_device_playback_state(core);
     }
 
-    coreaudio_unlock (core, __func__);
+    qemu_mutex_unlock_iothread();
     return status;
 }
 
@@ -579,14 +570,10 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
     struct audsettings obt_as;
 
     /* create mutex */
-    err = pthread_mutex_init(&core->mutex, NULL);
+    err = pthread_mutex_init(&core->buf_mutex, NULL);
     if (err) {
         dolog("Could not create mutex\nReason: %s\n", strerror (err));
-        goto mutex_error;
-    }
-
-    if (coreaudio_lock(core, __func__)) {
-        goto lock_error;
+        return -1;
     }
 
     obt_as = *as;
@@ -598,7 +585,6 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
         qapi_AudiodevCoreaudioPerDirectionOptions_base(cpdo), as, 11610);
 
     core->bufferCount = cpdo->has_buffer_count ? cpdo->buffer_count : 4;
-    core->outputStreamBasicDescription.mSampleRate = (Float64) as->freq;
 
     status = AudioObjectAddPropertyListener(kAudioObjectSystemObject,
                                             &voice_addr, handle_voice_change,
@@ -606,37 +592,21 @@ static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
     if (status != kAudioHardwareNoError) {
         coreaudio_playback_logerr (status,
                                    "Could not listen to voice property change\n");
-        goto listener_error;
+        return -1;
     }
 
     if (init_out_device(core)) {
-        goto device_error;
+        status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject,
+                                                   &voice_addr,
+                                                   handle_voice_change,
+                                                   core);
+        if (status != kAudioHardwareNoError) {
+            coreaudio_playback_logerr(status,
+                                      "Could not remove voice property change listener\n");
+        }
     }
 
-    coreaudio_unlock(core, __func__);
     return 0;
-
-device_error:
-    status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject,
-                                               &voice_addr,
-                                               handle_voice_change,
-                                               core);
-    if (status != kAudioHardwareNoError) {
-        coreaudio_playback_logerr(status,
-                                  "Could not remove voice property change listener\n");
-    }
-
-listener_error:
-    coreaudio_unlock(core, __func__);
-
-lock_error:
-    err = pthread_mutex_destroy(&core->mutex);
-    if (err) {
-        dolog("Could not destroy mutex\nReason: %s\n", strerror (err));
-    }
-
-mutex_error:
-    return -1;
 }
 
 static void coreaudio_fini_out (HWVoiceOut *hw)
@@ -645,10 +615,6 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
     int err;
     coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;
 
-    if (coreaudio_lock(core, __func__)) {
-        abort();
-    }
-
     status = AudioObjectRemovePropertyListener(kAudioObjectSystemObject,
                                                &voice_addr,
                                                handle_voice_change,
@@ -659,10 +625,8 @@ static void coreaudio_fini_out (HWVoiceOut *hw)
 
     fini_out_device(core);
 
-    coreaudio_unlock(core, __func__);
-
     /* destroy mutex */
-    err = pthread_mutex_destroy(&core->mutex);
+    err = pthread_mutex_destroy(&core->buf_mutex);
     if (err) {
         dolog("Could not destroy mutex\nReason: %s\n", strerror (err));
     }
@@ -672,14 +636,8 @@ static void coreaudio_enable_out(HWVoiceOut *hw, bool enable)
 {
     coreaudioVoiceOut *core = (coreaudioVoiceOut *) hw;
 
-    if (coreaudio_lock(core, __func__)) {
-        abort();
-    }
-
     core->enabled = enable;
     update_device_playback_state(core);
-
-    coreaudio_unlock(core, __func__);
 }
 
 static void *coreaudio_audio_init(Audiodev *dev)
diff --git a/audio/jackaudio.c b/audio/jackaudio.c
index 3031c4e29bd..e7de6d5433e 100644
--- a/audio/jackaudio.c
+++ b/audio/jackaudio.c
@@ -26,7 +26,6 @@
 #include "qemu/module.h"
 #include "qemu/atomic.h"
 #include "qemu/main-loop.h"
-#include "qemu-common.h"
 #include "audio.h"
 
 #define AUDIO_CAP "jack"
@@ -412,7 +411,7 @@ static int qjack_client_init(QJackClient *c)
 
     snprintf(client_name, sizeof(client_name), "%s-%s",
         c->out ? "out" : "in",
-        c->opt->client_name ? c->opt->client_name : qemu_get_vm_name());
+        c->opt->client_name ? c->opt->client_name : audio_application_name());
 
     if (c->opt->exact_name) {
         options |= JackUseExactName;
diff --git a/audio/meson.build b/audio/meson.build
index 7d53b0f920f..462533bb8c2 100644
--- a/audio/meson.build
+++ b/audio/meson.build
@@ -7,23 +7,22 @@ softmmu_ss.add(files(
   'wavcapture.c',
 ))
 
-softmmu_ss.add(when: [coreaudio, 'CONFIG_AUDIO_COREAUDIO'], if_true: files('coreaudio.c'))
-softmmu_ss.add(when: [dsound, 'CONFIG_AUDIO_DSOUND'], if_true: files('dsoundaudio.c'))
-softmmu_ss.add(when: ['CONFIG_AUDIO_WIN_INT'], if_true: files('audio_win_int.c'))
+softmmu_ss.add(when: coreaudio, if_true: files('coreaudio.c'))
+softmmu_ss.add(when: dsound, if_true: files('dsoundaudio.c', 'audio_win_int.c'))
 
 audio_modules = {}
 foreach m : [
-  ['CONFIG_AUDIO_ALSA', 'alsa', alsa, 'alsaaudio.c'],
-  ['CONFIG_AUDIO_OSS', 'oss', oss, 'ossaudio.c'],
-  ['CONFIG_AUDIO_PA', 'pa', pulse, 'paaudio.c'],
-  ['CONFIG_AUDIO_SDL', 'sdl', sdl, 'sdlaudio.c'],
-  ['CONFIG_AUDIO_JACK', 'jack', jack, 'jackaudio.c'],
-  ['CONFIG_SPICE', 'spice', spice, 'spiceaudio.c']
+  ['alsa', alsa, files('alsaaudio.c')],
+  ['oss', oss, files('ossaudio.c')],
+  ['pa', pulse, files('paaudio.c')],
+  ['sdl', sdl, files('sdlaudio.c')],
+  ['jack', jack, files('jackaudio.c')],
+  ['spice', spice, files('spiceaudio.c')]
 ]
-  if config_host.has_key(m[0])
+  if m[1].found()
     module_ss = ss.source_set()
-    module_ss.add(when: m[2], if_true: files(m[3]))
-    audio_modules += {m[1] : module_ss}
+    module_ss.add(m[1], m[2])
+    audio_modules += {m[0] : module_ss}
   endif
 endforeach
 
diff --git a/audio/paaudio.c b/audio/paaudio.c
index c97b22e970d..75401d53910 100644
--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -2,7 +2,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
-#include "qemu-common.h"
 #include "audio.h"
 #include "qapi/opts-visitor.h"
 
@@ -463,10 +462,7 @@ static pa_stream *qpa_simple_new (
 
     pa_stream_set_state_callback(stream, stream_state_cb, c);
 
-    flags =
-        PA_STREAM_INTERPOLATE_TIMING
-        | PA_STREAM_AUTO_TIMING_UPDATE
-        | PA_STREAM_EARLY_REQUESTS;
+    flags = PA_STREAM_EARLY_REQUESTS;
 
     if (dev) {
         /* don't move the stream if the user specified a sink/source */
@@ -756,7 +752,6 @@ static int qpa_validate_per_direction_opts(Audiodev *dev,
 /* common */
 static void *qpa_conn_init(const char *server)
 {
-    const char *vm_name;
     PAConnection *c = g_malloc0(sizeof(PAConnection));
     QTAILQ_INSERT_TAIL(&pa_conns, c, list);
 
@@ -765,9 +760,8 @@ static void *qpa_conn_init(const char *server)
         goto fail;
     }
 
-    vm_name = qemu_get_vm_name();
     c->context = pa_context_new(pa_threaded_mainloop_get_api(c->mainloop),
-                                vm_name ? vm_name : "qemu");
+                                audio_application_name());
     if (!c->context) {
         goto fail;
     }
diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index 999bfbde47c..a8d370fe6f3 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -317,3 +317,5 @@ static void register_audio_spice(void)
     audio_driver_register(&spice_audio_driver);
 }
 type_init(register_audio_spice);
+
+module_dep("ui-spice-core");
diff --git a/audio/trace-events b/audio/trace-events
index 6aec5357638..957c92337be 100644
--- a/audio/trace-events
+++ b/audio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # alsaaudio.c
 alsa_revents(int revents) "revents = %d"
diff --git a/authz/meson.build b/authz/meson.build
index 88fa7769cb1..42a1ec0ff62 100644
--- a/authz/meson.build
+++ b/authz/meson.build
@@ -6,4 +6,4 @@ authz_ss.add(files(
   'simple.c',
 ))
 
-authz_ss.add(when: ['CONFIG_AUTH_PAM', pam], if_true: files('pamacct.c'))
+authz_ss.add(when: pam, if_true: files('pamacct.c'))
diff --git a/authz/trace-events b/authz/trace-events
index e62ebb36b7e..9c255dafb64 100644
--- a/authz/trace-events
+++ b/authz/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # base.c
 qauthz_is_allowed(void *authz, const char *identity, bool allowed) "AuthZ %p check identity=%s allowed=%d"
diff --git a/backends/cryptodev-vhost.c b/backends/cryptodev-vhost.c
index 8231e7f1bca..bc13e466b4f 100644
--- a/backends/cryptodev-vhost.c
+++ b/backends/cryptodev-vhost.c
@@ -52,6 +52,7 @@ cryptodev_vhost_init(
 {
     int r;
     CryptoDevBackendVhost *crypto;
+    Error *local_err = NULL;
 
     crypto = g_new(CryptoDevBackendVhost, 1);
     crypto->dev.max_queues = 1;
@@ -66,8 +67,10 @@ cryptodev_vhost_init(
     /* vhost-user needs vq_index to initiate a specific queue pair */
     crypto->dev.vq_index = crypto->cc->queue_index * crypto->dev.nvqs;
 
-    r = vhost_dev_init(&crypto->dev, options->opaque, options->backend_type, 0);
+    r = vhost_dev_init(&crypto->dev, options->opaque, options->backend_type, 0,
+                       &local_err);
     if (r < 0) {
+        error_report_err(local_err);
         goto fail;
     }
 
diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c
new file mode 100644
index 00000000000..b47f98b6a3a
--- /dev/null
+++ b/backends/hostmem-epc.c
@@ -0,0 +1,82 @@
+/*
+ * QEMU host SGX EPC memory backend
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <sys/ioctl.h>
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qom/object_interfaces.h"
+#include "qapi/error.h"
+#include "sysemu/hostmem.h"
+#include "hw/i386/hostmem-epc.h"
+
+static void
+sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
+{
+    uint32_t ram_flags;
+    char *name;
+    int fd;
+
+    if (!backend->size) {
+        error_setg(errp, "can't create backend with size 0");
+        return;
+    }
+
+    fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
+    if (fd < 0) {
+        error_setg_errno(errp, errno,
+                         "failed to open /dev/sgx_vepc to alloc SGX EPC");
+        return;
+    }
+
+    name = object_get_canonical_path(OBJECT(backend));
+    ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
+    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
+                                   name, backend->size, ram_flags,
+                                   fd, 0, errp);
+    g_free(name);
+}
+
+static void sgx_epc_backend_instance_init(Object *obj)
+{
+    HostMemoryBackend *m = MEMORY_BACKEND(obj);
+
+    m->share = true;
+    m->merge = false;
+    m->dump = false;
+}
+
+static void sgx_epc_backend_class_init(ObjectClass *oc, void *data)
+{
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+    bc->alloc = sgx_epc_backend_memory_alloc;
+}
+
+static const TypeInfo sgx_epc_backed_info = {
+    .name = TYPE_MEMORY_BACKEND_EPC,
+    .parent = TYPE_MEMORY_BACKEND,
+    .instance_init = sgx_epc_backend_instance_init,
+    .class_init = sgx_epc_backend_class_init,
+    .instance_size = sizeof(HostMemoryBackendEpc),
+};
+
+static void register_types(void)
+{
+    int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
+    if (fd >= 0) {
+        close(fd);
+
+        type_register_static(&sgx_epc_backed_info);
+    }
+}
+
+type_init(register_types);
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index b683da9daf8..cd038024fae 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -15,7 +15,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
 #include "qom/object_interfaces.h"
 #include "qom/object.h"
 
@@ -40,6 +39,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
                object_get_typename(OBJECT(backend)));
 #else
     HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
+    uint32_t ram_flags;
     gchar *name;
 
     if (!backend->size) {
@@ -52,11 +52,11 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
 
     name = host_memory_backend_get_name(backend);
-    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
-                                     name,
-                                     backend->size, fb->align,
-                                     (backend->share ? RAM_SHARED : 0) |
-                                     (fb->is_pmem ? RAM_PMEM : 0),
+    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
+    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
+                                     backend->size, fb->align, ram_flags,
                                      fb->mem_path, fb->readonly, errp);
     g_free(name);
 #endif
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 69b0ae30bb0..3fc85c3db81 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
 #include "qom/object_interfaces.h"
 #include "qemu/memfd.h"
 #include "qemu/module.h"
@@ -36,6 +35,7 @@ static void
 memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
     HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
+    uint32_t ram_flags;
     char *name;
     int fd;
 
@@ -53,9 +53,10 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
 
     name = host_memory_backend_get_name(backend);
-    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
-                                   name, backend->size,
-                                   backend->share, fd, 0, errp);
+    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+                                   backend->size, ram_flags, fd, 0, errp);
     g_free(name);
 }
 
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index 5cc53e76c9d..b8e55cdbd0f 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -19,6 +19,7 @@
 static void
 ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
+    uint32_t ram_flags;
     char *name;
 
     if (!backend->size) {
@@ -27,8 +28,10 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
     }
 
     name = host_memory_backend_get_name(backend);
-    memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend), name,
-                           backend->size, backend->share, errp);
+    ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
+                                           backend->size, ram_flags, errp);
     g_free(name);
 }
 
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 984560ed343..e031c6208b8 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qapi/qapi-builtin-visit.h"
@@ -217,6 +216,11 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
     Error *local_err = NULL;
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 
+    if (!backend->reserve && value) {
+        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+        return;
+    }
+
     if (!host_memory_backend_mr_inited(backend)) {
         backend->prealloc = value;
         return;
@@ -268,6 +272,7 @@ static void host_memory_backend_init(Object *obj)
     /* TODO: convert access to globals to compat properties */
     backend->merge = machine_mem_merge(machine);
     backend->dump = machine_dump_guest_core(machine);
+    backend->reserve = true;
     backend->prealloc_threads = 1;
 }
 
@@ -435,6 +440,30 @@ static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
     backend->share = value;
 }
 
+#ifdef CONFIG_LINUX
+static bool host_memory_backend_get_reserve(Object *o, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+    return backend->reserve;
+}
+
+static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(errp, "cannot change property value");
+        return;
+    }
+    if (backend->prealloc && !value) {
+        error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
+        return;
+    }
+    backend->reserve = value;
+}
+#endif /* CONFIG_LINUX */
+
 static bool
 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
 {
@@ -518,6 +547,12 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
         host_memory_backend_get_share, host_memory_backend_set_share);
     object_class_property_set_description(oc, "share",
         "Mark the memory as private to QEMU or shared");
+#ifdef CONFIG_LINUX
+    object_class_property_add_bool(oc, "reserve",
+        host_memory_backend_get_reserve, host_memory_backend_set_reserve);
+    object_class_property_set_description(oc, "reserve",
+        "Reserve swap space (or huge pages) if applicable");
+#endif /* CONFIG_LINUX */
     /*
      * Do not delete/rename option. This option must be considered stable
      * (as if it didn't have the 'x-' prefix including deprecation period) as
diff --git a/backends/meson.build b/backends/meson.build
index d4221831fc3..6e689455280 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -16,5 +16,6 @@ softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vho
 softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
 softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c'))
 softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio])
+softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))
 
 subdir('tpm')
diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c
index a012adc1934..87d061e9bbd 100644
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -30,6 +30,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/sockets.h"
+#include "qemu/lockable.h"
 #include "io/channel-socket.h"
 #include "sysemu/tpm_backend.h"
 #include "sysemu/tpm_util.h"
@@ -124,31 +125,26 @@ static int tpm_emulator_ctrlcmd(TPMEmulator *tpm, unsigned long cmd, void *msg,
     uint32_t cmd_no = cpu_to_be32(cmd);
     ssize_t n = sizeof(uint32_t) + msg_len_in;
     uint8_t *buf = NULL;
-    int ret = -1;
 
-    qemu_mutex_lock(&tpm->mutex);
+    WITH_QEMU_LOCK_GUARD(&tpm->mutex) {
+        buf = g_alloca(n);
+        memcpy(buf, &cmd_no, sizeof(cmd_no));
+        memcpy(buf + sizeof(cmd_no), msg, msg_len_in);
 
-    buf = g_alloca(n);
-    memcpy(buf, &cmd_no, sizeof(cmd_no));
-    memcpy(buf + sizeof(cmd_no), msg, msg_len_in);
-
-    n = qemu_chr_fe_write_all(dev, buf, n);
-    if (n <= 0) {
-        goto end;
-    }
-
-    if (msg_len_out != 0) {
-        n = qemu_chr_fe_read_all(dev, msg, msg_len_out);
+        n = qemu_chr_fe_write_all(dev, buf, n);
         if (n <= 0) {
-            goto end;
+            return -1;
         }
-    }
 
-    ret = 0;
+        if (msg_len_out != 0) {
+            n = qemu_chr_fe_read_all(dev, msg, msg_len_out);
+            if (n <= 0) {
+                return -1;
+            }
+        }
+    }
 
-end:
-    qemu_mutex_unlock(&tpm->mutex);
-    return ret;
+    return 0;
 }
 
 static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu,
@@ -496,8 +492,7 @@ static int tpm_emulator_block_migration(TPMEmulator *tpm_emu)
         error_setg(&tpm_emu->migration_blocker,
                    "Migration disabled: TPM emulator does not support "
                    "migration");
-        migrate_add_blocker(tpm_emu->migration_blocker, &err);
-        if (err) {
+        if (migrate_add_blocker(tpm_emu->migration_blocker, &err) < 0) {
             error_report_err(err);
             error_free(tpm_emu->migration_blocker);
             tpm_emu->migration_blocker = NULL;
@@ -628,7 +623,7 @@ static TpmTypeOptions *tpm_emulator_get_tpm_options(TPMBackend *tb)
     TPMEmulator *tpm_emu = TPM_EMULATOR(tb);
     TpmTypeOptions *options = g_new0(TpmTypeOptions, 1);
 
-    options->type = TPM_TYPE_OPTIONS_KIND_EMULATOR;
+    options->type = TPM_TYPE_EMULATOR;
     options->u.emulator.data = QAPI_CLONE(TPMEmulatorOptions, tpm_emu->options);
 
     return options;
diff --git a/backends/tpm/tpm_passthrough.c b/backends/tpm/tpm_passthrough.c
index 21b74591838..d5558fae6cc 100644
--- a/backends/tpm/tpm_passthrough.c
+++ b/backends/tpm/tpm_passthrough.c
@@ -321,7 +321,7 @@ static TpmTypeOptions *tpm_passthrough_get_tpm_options(TPMBackend *tb)
 {
     TpmTypeOptions *options = g_new0(TpmTypeOptions, 1);
 
-    options->type = TPM_TYPE_OPTIONS_KIND_PASSTHROUGH;
+    options->type = TPM_TYPE_PASSTHROUGH;
     options->u.passthrough.data = QAPI_CLONE(TPMPassthroughOptions,
                                              TPM_PASSTHROUGH(tb)->options);
 
diff --git a/backends/tpm/trace-events b/backends/tpm/trace-events
index 0a2591fb2d6..3298766dd79 100644
--- a/backends/tpm/trace-events
+++ b/backends/tpm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # tpm_passthrough.c
 tpm_passthrough_handle_request(void *cmd) "processing command %p"
diff --git a/backends/trace-events b/backends/trace-events
index 59058f76303..652eb76a572 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # dbus-vmstate.c
 dbus_vmstate_pre_save(void)
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index b366610e16e..10b39992d21 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -48,9 +48,9 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice *vdev,
     b->dev.nvqs = nvqs;
     b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);
 
-    ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
+    ret = vhost_dev_init(&b->dev, &b->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
+                         errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "vhost initialization failed");
         return -1;
     }
 
diff --git a/block.c b/block.c
index c5b887cec19..0ac5b163d2a 100644
--- a/block.c
+++ b/block.c
@@ -2,6 +2,7 @@
  * QEMU System Emulator block driver
  *
  * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -41,7 +42,6 @@
 #include "qapi/qobject-output-visitor.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
 #include "qemu/notify.h"
 #include "qemu/option.h"
 #include "qemu/coroutine.h"
@@ -49,12 +49,14 @@
 #include "qemu/timer.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "qemu/range.h"
+#include "qemu/rcu.h"
 #include "block/coroutines.h"
 
 #ifdef CONFIG_BSD
 #include <sys/ioctl.h>
 #include <sys/queue.h>
-#ifndef __DragonFly__
+#if defined(HAVE_SYS_DISK_H)
 #include <sys/disk.h>
 #endif
 #endif
@@ -82,6 +84,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                                            BdrvChildRole child_role,
                                            Error **errp);
 
+static bool bdrv_recurse_has_child(BlockDriverState *bs,
+                                   BlockDriverState *child);
+
+static void bdrv_child_free(BdrvChild *child);
+static void bdrv_replace_child_noperm(BdrvChild **child,
+                                      BlockDriverState *new_bs,
+                                      bool free_empty_child);
+static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
+                                              BdrvChild *child,
+                                              Transaction *tran);
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+                                            Transaction *tran);
+
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue,
+                               Transaction *change_child_tran, Error **errp);
+static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -246,7 +267,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
  * image is inactivated. */
 bool bdrv_is_read_only(BlockDriverState *bs)
 {
-    return bs->read_only;
+    return !(bs->open_flags & BDRV_O_RDWR);
 }
 
 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
@@ -298,7 +319,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
         goto fail;
     }
 
-    bs->read_only = true;
     bs->open_flags &= ~BDRV_O_RDWR;
 
     return 0;
@@ -381,7 +401,6 @@ BlockDriverState *bdrv_new(void)
     for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
         QLIST_INIT(&bs->op_blockers[i]);
     }
-    notifier_with_return_list_init(&bs->before_write_notifiers);
     qemu_co_mutex_init(&bs->reqs_lock);
     qemu_mutex_init(&bs->dirty_bitmap_mutex);
     bs->refcnt = 1;
@@ -389,6 +408,9 @@ BlockDriverState *bdrv_new(void)
 
     qemu_co_queue_init(&bs->flush_queue);
 
+    qemu_co_mutex_init(&bs->bsc_modify_lock);
+    bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
+
     for (i = 0; i < bdrv_drain_all_count; i++) {
         bdrv_drained_begin(bs);
     }
@@ -1140,7 +1162,7 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 static char *bdrv_child_get_parent_desc(BdrvChild *c)
 {
     BlockDriverState *parent = c->opaque;
-    return g_strdup(bdrv_get_device_or_node_name(parent));
+    return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
 }
 
 static void bdrv_child_cb_drained_begin(BdrvChild *child)
@@ -1367,6 +1389,8 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
 
+    QLIST_INSERT_HEAD(&bs->children, child, next);
+
     if (child->role & BDRV_CHILD_COW) {
         bdrv_backing_attach(child);
     }
@@ -1383,6 +1407,8 @@ static void bdrv_child_cb_detach(BdrvChild *child)
     }
 
     bdrv_unapply_subtree_drain(child, bs);
+
+    QLIST_REMOVE(child, next);
 }
 
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
@@ -1394,6 +1420,13 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
     return 0;
 }
 
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
+{
+    BlockDriverState *bs = c->opaque;
+
+    return bdrv_get_aio_context(bs);
+}
+
 const BdrvChildClass child_of_bds = {
     .parent_is_bds   = true,
     .get_parent_desc = bdrv_child_get_parent_desc,
@@ -1407,8 +1440,14 @@ const BdrvChildClass child_of_bds = {
     .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
     .set_aio_ctx     = bdrv_child_cb_set_aio_ctx,
     .update_filename = bdrv_child_cb_update_filename,
+    .get_parent_aio_context = child_of_bds_get_parent_aio_context,
 };
 
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
+{
+    return c->klass->get_parent_aio_context(c);
+}
+
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
     int open_flags = flags;
@@ -1518,7 +1557,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     }
 
     bs->drv = drv;
-    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
     bs->opaque = g_malloc0(drv->instance_size);
 
     if (drv->bdrv_file_open) {
@@ -1547,7 +1585,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
         return ret;
     }
 
-    bdrv_refresh_limits(bs, &local_err);
+    bdrv_refresh_limits(bs, NULL, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return -EINVAL;
@@ -1575,16 +1613,26 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
     return ret;
 }
 
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
-                                       int flags, Error **errp)
+/*
+ * Create and open a block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ */
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+                                            const char *node_name,
+                                            QDict *options, int flags,
+                                            Error **errp)
 {
     BlockDriverState *bs;
     int ret;
 
     bs = bdrv_new();
     bs->open_flags = flags;
-    bs->explicit_options = qdict_new();
-    bs->options = qdict_new();
+    bs->options = options ?: qdict_new();
+    bs->explicit_options = qdict_clone_shallow(bs->options);
     bs->opaque = NULL;
 
     update_options_from_flags(bs->options, flags);
@@ -1602,6 +1650,13 @@ BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
     return bs;
 }
 
+/* Create and open a block node. */
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp)
+{
+    return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
+}
+
 QemuOptsList bdrv_runtime_opts = {
     .name = "bdrv_common",
     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@@ -1689,6 +1744,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     QemuOpts *opts;
     BlockDriver *drv;
     Error *local_err = NULL;
+    bool ro;
 
     assert(bs->file == NULL);
     assert(options != NULL && bs->options != options);
@@ -1739,17 +1795,17 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
                            drv->format_name);
 
-    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
+    ro = bdrv_is_read_only(bs);
 
-    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
-        if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
+    if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
+        if (!ro && bdrv_is_whitelisted(drv, true)) {
             ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
         } else {
             ret = -ENOTSUP;
         }
         if (ret < 0) {
             error_setg(errp,
-                       !bs->read_only && bdrv_is_whitelisted(drv, true)
+                       !ro && bdrv_is_whitelisted(drv, true)
                        ? "Driver '%s' can only be used for read-only devices"
                        : "Driver '%s' is not whitelisted",
                        drv->format_name);
@@ -1761,7 +1817,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     assert(qatomic_read(&bs->copy_on_read) == 0);
 
     if (bs->open_flags & BDRV_O_COPY_ON_READ) {
-        if (!bs->read_only) {
+        if (!ro) {
             bdrv_enable_copy_on_read(bs);
         } else {
             error_setg(errp, "Can't use copy-on-read on read-only device");
@@ -1955,12 +2011,6 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
-                                 GSList *ignore_children, Error **errp);
-static void bdrv_child_abort_perm_update(BdrvChild *c);
-static void bdrv_child_set_perm(BdrvChild *c);
-
 typedef struct BlockReopenQueueEntry {
      bool prepared;
      bool perms_checked;
@@ -2008,6 +2058,68 @@ bool bdrv_is_writable(BlockDriverState *bs)
     return bdrv_is_writable_after_reopen(bs, NULL);
 }
 
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+    return c->klass->get_parent_desc(c);
+}
+
+/*
+ * Check that @a allows everything that @b needs. @a and @b must reference same
+ * child node.
+ */
+static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
+{
+    const char *child_bs_name;
+    g_autofree char *a_user = NULL;
+    g_autofree char *b_user = NULL;
+    g_autofree char *perms = NULL;
+
+    assert(a->bs);
+    assert(a->bs == b->bs);
+
+    if ((b->perm & a->shared_perm) == b->perm) {
+        return true;
+    }
+
+    child_bs_name = bdrv_get_node_name(b->bs);
+    a_user = bdrv_child_user_desc(a);
+    b_user = bdrv_child_user_desc(b);
+    perms = bdrv_perm_names(b->perm & ~a->shared_perm);
+
+    error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
+               "both required by %s (uses node '%s' as '%s' child) and "
+               "unshared by %s (uses node '%s' as '%s' child).",
+               child_bs_name, perms,
+               b_user, child_bs_name, b->name,
+               a_user, child_bs_name, a->name);
+
+    return false;
+}
+
+static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
+{
+    BdrvChild *a, *b;
+
+    /*
+     * During the loop we'll look at each pair twice. That's correct because
+     * bdrv_a_allow_b() is asymmetric and we should check each pair in both
+     * directions.
+     */
+    QLIST_FOREACH(a, &bs->parents, next_parent) {
+        QLIST_FOREACH(b, &bs->parents, next_parent) {
+            if (a == b) {
+                continue;
+            }
+
+            if (!bdrv_a_allow_b(a, b, errp)) {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
                             BdrvChild *c, BdrvChildRole role,
                             BlockReopenQueue *reopen_queue,
@@ -2025,22 +2137,243 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
 }
 
 /*
- * Check whether permissions on this node can be changed in a way that
- * @cumulative_perms and @cumulative_shared_perms are the new cumulative
- * permissions of all its parents. This involves checking whether all necessary
- * permission changes to child nodes can be performed.
+ * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
+ * nodes that are already in the @list, of course) so that final list is
+ * topologically sorted. Return the result (GSList @list object is updated, so
+ * don't use old reference after function call).
+ *
+ * On function start @list must be already topologically sorted and for any node
+ * in the @list the whole subtree of the node must be in the @list as well. The
+ * simplest way to satisfy this criteria: use only result of
+ * bdrv_topological_dfs() or NULL as @list parameter.
+ */
+static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
+                                    BlockDriverState *bs)
+{
+    BdrvChild *child;
+    g_autoptr(GHashTable) local_found = NULL;
+
+    if (!found) {
+        assert(!list);
+        found = local_found = g_hash_table_new(NULL, NULL);
+    }
+
+    if (g_hash_table_contains(found, bs)) {
+        return list;
+    }
+    g_hash_table_add(found, bs);
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        list = bdrv_topological_dfs(list, found, child->bs);
+    }
+
+    return g_slist_prepend(list, bs);
+}
+
+typedef struct BdrvChildSetPermState {
+    BdrvChild *child;
+    uint64_t old_perm;
+    uint64_t old_shared_perm;
+} BdrvChildSetPermState;
+
+static void bdrv_child_set_perm_abort(void *opaque)
+{
+    BdrvChildSetPermState *s = opaque;
+
+    s->child->perm = s->old_perm;
+    s->child->shared_perm = s->old_shared_perm;
+}
+
+static TransactionActionDrv bdrv_child_set_pem_drv = {
+    .abort = bdrv_child_set_perm_abort,
+    .clean = g_free,
+};
+
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
+                                uint64_t shared, Transaction *tran)
+{
+    BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+
+    *s = (BdrvChildSetPermState) {
+        .child = c,
+        .old_perm = c->perm,
+        .old_shared_perm = c->shared_perm,
+    };
+
+    c->perm = perm;
+    c->shared_perm = shared;
+
+    tran_add(tran, &bdrv_child_set_pem_drv, s);
+}
+
+static void bdrv_drv_set_perm_commit(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    uint64_t cumulative_perms, cumulative_shared_perms;
+
+    if (bs->drv->bdrv_set_perm) {
+        bdrv_get_cumulative_perm(bs, &cumulative_perms,
+                                 &cumulative_shared_perms);
+        bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+    }
+}
+
+static void bdrv_drv_set_perm_abort(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    if (bs->drv->bdrv_abort_perm_update) {
+        bs->drv->bdrv_abort_perm_update(bs);
+    }
+}
+
+TransactionActionDrv bdrv_drv_set_perm_drv = {
+    .abort = bdrv_drv_set_perm_abort,
+    .commit = bdrv_drv_set_perm_commit,
+};
+
+static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
+                             uint64_t shared_perm, Transaction *tran,
+                             Error **errp)
+{
+    if (!bs->drv) {
+        return 0;
+    }
+
+    if (bs->drv->bdrv_check_perm) {
+        int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (tran) {
+        tran_add(tran, &bdrv_drv_set_perm_drv, bs);
+    }
+
+    return 0;
+}
+
+typedef struct BdrvReplaceChildState {
+    BdrvChild *child;
+    BdrvChild **childp;
+    BlockDriverState *old_bs;
+    bool free_empty_child;
+} BdrvReplaceChildState;
+
+static void bdrv_replace_child_commit(void *opaque)
+{
+    BdrvReplaceChildState *s = opaque;
+
+    if (s->free_empty_child && !s->child->bs) {
+        bdrv_child_free(s->child);
+    }
+    bdrv_unref(s->old_bs);
+}
+
+static void bdrv_replace_child_abort(void *opaque)
+{
+    BdrvReplaceChildState *s = opaque;
+    BlockDriverState *new_bs = s->child->bs;
+
+    /*
+     * old_bs reference is transparently moved from @s to s->child.
+     *
+     * Pass &s->child here instead of s->childp, because:
+     * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
+     *     modify the BdrvChild * pointer we indirectly pass to it, i.e. it
+     *     will not modify s->child.  From that perspective, it does not matter
+     *     whether we pass s->childp or &s->child.
+     * (2) If new_bs is not NULL, s->childp will be NULL.  We then cannot use
+     *     it here.
+     * (3) If new_bs is NULL, *s->childp will have been NULLed by
+     *     bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
+     *     must not pass a NULL *s->childp here.
+     *
+     * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
+     * any case, there is no reason to pass it anyway.
+     */
+    bdrv_replace_child_noperm(&s->child, s->old_bs, true);
+    /*
+     * The child was pre-existing, so s->old_bs must be non-NULL, and
+     * s->child thus must not have been freed
+     */
+    assert(s->child != NULL);
+    if (!new_bs) {
+        /* As described above, *s->childp was cleared, so restore it */
+        assert(s->childp != NULL);
+        *s->childp = s->child;
+    }
+    bdrv_unref(new_bs);
+}
+
+static TransactionActionDrv bdrv_replace_child_drv = {
+    .commit = bdrv_replace_child_commit,
+    .abort = bdrv_replace_child_abort,
+    .clean = g_free,
+};
+
+/*
+ * bdrv_replace_child_tran
+ *
+ * Note: real unref of old_bs is done only on commit.
+ *
+ * The function doesn't update permissions, caller is responsible for this.
+ *
+ * (*childp)->bs must not be NULL.
  *
- * A call to this function must always be followed by a call to bdrv_set_perm()
- * or bdrv_abort_perm_update().
+ * Note that if new_bs == NULL, @childp is stored in a state object attached
+ * to @tran, so that the old child can be reinstated in the abort handler.
+ * Therefore, if @new_bs can be NULL, @childp must stay valid until the
+ * transaction is committed or aborted.
+ *
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed (on commit).  @free_empty_child should only be false if the
+ * caller will free the BDrvChild themselves (which may be important
+ * if this is in turn called in another transactional context).
+ */
+static void bdrv_replace_child_tran(BdrvChild **childp,
+                                    BlockDriverState *new_bs,
+                                    Transaction *tran,
+                                    bool free_empty_child)
+{
+    BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+    *s = (BdrvReplaceChildState) {
+        .child = *childp,
+        .childp = new_bs == NULL ? childp : NULL,
+        .old_bs = (*childp)->bs,
+        .free_empty_child = free_empty_child,
+    };
+    tran_add(tran, &bdrv_replace_child_drv, s);
+
+    /* The abort handler relies on this */
+    assert(s->old_bs != NULL);
+
+    if (new_bs) {
+        bdrv_ref(new_bs);
+    }
+    /*
+     * Pass free_empty_child=false, we will free the child (if
+     * necessary) in bdrv_replace_child_commit() (if our
+     * @free_empty_child parameter was true).
+     */
+    bdrv_replace_child_noperm(childp, new_bs, false);
+    /* old_bs reference is transparently moved from *childp to @s */
+}
+
+/*
+ * Refresh permissions in @bs subtree. The function is intended to be called
+ * after some graph modification that was done without permission update.
  */
-static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                           uint64_t cumulative_perms,
-                           uint64_t cumulative_shared_perms,
-                           GSList *ignore_children, Error **errp)
+static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
+                                  Transaction *tran, Error **errp)
 {
     BlockDriver *drv = bs->drv;
     BdrvChild *c;
     int ret;
+    uint64_t cumulative_perms, cumulative_shared_perms;
+
+    bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
 
     /* Write permissions never work with read-only images */
     if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
@@ -2049,15 +2382,8 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
         if (!bdrv_is_writable_after_reopen(bs, NULL)) {
             error_setg(errp, "Block node is read-only");
         } else {
-            uint64_t current_perms, current_shared;
-            bdrv_get_cumulative_perm(bs, &current_perms, &current_shared);
-            if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
-                error_setg(errp, "Cannot make block node read-only, there is "
-                           "a writer on it");
-            } else {
-                error_setg(errp, "Cannot make block node read-only and create "
-                           "a writer on it");
-            }
+            error_setg(errp, "Read-only block node '%s' cannot support "
+                       "read-write users", bdrv_get_node_name(bs));
         }
 
         return -EPERM;
@@ -2084,12 +2410,10 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
         return 0;
     }
 
-    if (drv->bdrv_check_perm) {
-        ret = drv->bdrv_check_perm(bs, cumulative_perms,
-                                   cumulative_shared_perms, errp);
-        if (ret < 0) {
-            return ret;
-        }
+    ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
+                            errp);
+    if (ret < 0) {
+        return ret;
     }
 
     /* Drivers that never have children can omit .bdrv_child_perm() */
@@ -2105,68 +2429,32 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
         bdrv_child_perm(bs, c->bs, c, c->role, q,
                         cumulative_perms, cumulative_shared_perms,
                         &cur_perm, &cur_shared);
-        ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children,
-                                    errp);
-        if (ret < 0) {
-            return ret;
-        }
+        bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
     }
 
     return 0;
 }
 
-/*
- * Notifies drivers that after a previous bdrv_check_perm() call, the
- * permission update is not performed and any preparations made for it (e.g.
- * taken file locks) need to be undone.
- *
- * This function recursively notifies all child nodes.
- */
-static void bdrv_abort_perm_update(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    BdrvChild *c;
-
-    if (!drv) {
-        return;
-    }
-
-    if (drv->bdrv_abort_perm_update) {
-        drv->bdrv_abort_perm_update(bs);
-    }
-
-    QLIST_FOREACH(c, &bs->children, next) {
-        bdrv_child_abort_perm_update(c);
-    }
-}
-
-static void bdrv_set_perm(BlockDriverState *bs)
+static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
+                                   Transaction *tran, Error **errp)
 {
-    uint64_t cumulative_perms, cumulative_shared_perms;
-    BlockDriver *drv = bs->drv;
-    BdrvChild *c;
-
-    if (!drv) {
-        return;
-    }
+    int ret;
+    BlockDriverState *bs;
 
-    bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
+    for ( ; list; list = list->next) {
+        bs = list->data;
 
-    /* Update this node */
-    if (drv->bdrv_set_perm) {
-        drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
-    }
+        if (bdrv_parent_perms_conflict(bs, errp)) {
+            return -EINVAL;
+        }
 
-    /* Drivers that never have children can omit .bdrv_child_perm() */
-    if (!drv->bdrv_child_perm) {
-        assert(QLIST_EMPTY(&bs->children));
-        return;
+        ret = bdrv_node_refresh_perm(bs, q, tran, errp);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
-    /* Update all children */
-    QLIST_FOREACH(c, &bs->children, next) {
-        bdrv_child_set_perm(c);
-    }
+    return 0;
 }
 
 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
@@ -2185,15 +2473,6 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
     *shared_perm = cumulative_shared_perms;
 }
 
-static char *bdrv_child_user_desc(BdrvChild *c)
-{
-    if (c->klass->get_parent_desc) {
-        return c->klass->get_parent_desc(c);
-    }
-
-    return g_strdup("another user");
-}
-
 char *bdrv_perm_names(uint64_t perm)
 {
     struct perm_name {
@@ -2223,162 +2502,52 @@ char *bdrv_perm_names(uint64_t perm)
     return g_string_free(result, FALSE);
 }
 
-/*
- * Checks whether a new reference to @bs can be added if the new user requires
- * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
- * set, the BdrvChild objects in this list are ignored in the calculations;
- * this allows checking permission updates for an existing reference.
- *
- * Needs to be followed by a call to either bdrv_set_perm() or
- * bdrv_abort_perm_update(). */
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
-                                  uint64_t new_used_perm,
-                                  uint64_t new_shared_perm,
-                                  GSList *ignore_children,
-                                  Error **errp)
+
+static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
 {
-    BdrvChild *c;
-    uint64_t cumulative_perms = new_used_perm;
-    uint64_t cumulative_shared_perms = new_shared_perm;
+    int ret;
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
 
+    ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
+    tran_finalize(tran, ret);
 
-    /* There is no reason why anyone couldn't tolerate write_unchanged */
-    assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
+    return ret;
+}
 
-    QLIST_FOREACH(c, &bs->parents, next_parent) {
-        if (g_slist_find(ignore_children, c)) {
-            continue;
-        }
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                            Error **errp)
+{
+    Error *local_err = NULL;
+    Transaction *tran = tran_new();
+    int ret;
 
-        if ((new_used_perm & c->shared_perm) != new_used_perm) {
-            char *user = bdrv_child_user_desc(c);
-            char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
+    bdrv_child_set_perm(c, perm, shared, tran);
 
-            error_setg(errp, "Conflicts with use by %s as '%s', which does not "
-                             "allow '%s' on %s",
-                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
-            g_free(user);
-            g_free(perm_names);
-            return -EPERM;
-        }
+    ret = bdrv_refresh_perms(c->bs, &local_err);
 
-        if ((c->perm & new_shared_perm) != c->perm) {
-            char *user = bdrv_child_user_desc(c);
-            char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
+    tran_finalize(tran, ret);
 
-            error_setg(errp, "Conflicts with use by %s as '%s', which uses "
-                             "'%s' on %s",
-                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
-            g_free(user);
-            g_free(perm_names);
-            return -EPERM;
+    if (ret < 0) {
+        if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
+            /* tighten permissions */
+            error_propagate(errp, local_err);
+        } else {
+            /*
+             * Our caller may intend to only loosen restrictions and
+             * does not expect this function to fail.  Errors are not
+             * fatal in such a case, so we can just hide them from our
+             * caller.
+             */
+            error_free(local_err);
+            ret = 0;
         }
-
-        cumulative_perms |= c->perm;
-        cumulative_shared_perms &= c->shared_perm;
     }
 
-    return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
-                           ignore_children, errp);
+    return ret;
 }
 
-/* Needs to be followed by a call to either bdrv_child_set_perm() or
- * bdrv_child_abort_perm_update(). */
-static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q,
-                                 uint64_t perm, uint64_t shared,
-                                 GSList *ignore_children, Error **errp)
-{
-    int ret;
-
-    ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c);
-    ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp);
-    g_slist_free(ignore_children);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (!c->has_backup_perm) {
-        c->has_backup_perm = true;
-        c->backup_perm = c->perm;
-        c->backup_shared_perm = c->shared_perm;
-    }
-    /*
-     * Note: it's OK if c->has_backup_perm was already set, as we can find the
-     * same child twice during check_perm procedure
-     */
-
-    c->perm = perm;
-    c->shared_perm = shared;
-
-    return 0;
-}
-
-static void bdrv_child_set_perm(BdrvChild *c)
-{
-    c->has_backup_perm = false;
-
-    bdrv_set_perm(c->bs);
-}
-
-static void bdrv_child_abort_perm_update(BdrvChild *c)
-{
-    if (c->has_backup_perm) {
-        c->perm = c->backup_perm;
-        c->shared_perm = c->backup_shared_perm;
-        c->has_backup_perm = false;
-    }
-
-    bdrv_abort_perm_update(c->bs);
-}
-
-static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
-{
-    int ret;
-    uint64_t perm, shared_perm;
-
-    bdrv_get_cumulative_perm(bs, &perm, &shared_perm);
-    ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, errp);
-    if (ret < 0) {
-        bdrv_abort_perm_update(bs);
-        return ret;
-    }
-    bdrv_set_perm(bs);
-
-    return 0;
-}
-
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
-                            Error **errp)
-{
-    Error *local_err = NULL;
-    int ret;
-
-    ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, &local_err);
-    if (ret < 0) {
-        bdrv_child_abort_perm_update(c);
-        if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
-            /* tighten permissions */
-            error_propagate(errp, local_err);
-        } else {
-            /*
-             * Our caller may intend to only loosen restrictions and
-             * does not expect this function to fail.  Errors are not
-             * fatal in such a case, so we can just hide them from our
-             * caller.
-             */
-            error_free(local_err);
-            ret = 0;
-        }
-        return ret;
-    }
-
-    bdrv_child_set_perm(c);
-
-    return 0;
-}
-
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
+int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
 {
     uint64_t parent_perms, parent_shared;
     uint64_t perms, shared;
@@ -2560,14 +2729,30 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
     return permissions[qapi_perm];
 }
 
-static void bdrv_replace_child_noperm(BdrvChild *child,
-                                      BlockDriverState *new_bs)
+/**
+ * Replace (*childp)->bs by @new_bs.
+ *
+ * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
+ * generally cannot handle a BdrvChild with .bs == NULL, so clearing
+ * BdrvChild.bs should generally immediately be followed by the
+ * BdrvChild pointer being cleared as well.
+ *
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed.  @free_empty_child should only be false if the caller will
+ * free the BdrvChild themselves (this may be important in a
+ * transactional context, where it may only be freed on commit).
+ */
+static void bdrv_replace_child_noperm(BdrvChild **childp,
+                                      BlockDriverState *new_bs,
+                                      bool free_empty_child)
 {
+    BdrvChild *child = *childp;
     BlockDriverState *old_bs = child->bs;
     int new_bs_quiesce_counter;
     int drain_saldo;
 
     assert(!child->frozen);
+    assert(old_bs != new_bs);
 
     if (old_bs && new_bs) {
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
@@ -2596,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
     }
 
     child->bs = new_bs;
+    if (!new_bs) {
+        *childp = NULL;
+    }
 
     if (new_bs) {
         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
@@ -2625,40 +2813,208 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
         bdrv_parent_drained_end_single(child);
         drain_saldo++;
     }
+
+    if (free_empty_child && !child->bs) {
+        bdrv_child_free(child);
+    }
 }
 
+/**
+ * Free the given @child.
+ *
+ * The child must be empty (i.e. `child->bs == NULL`) and it must be
+ * unused (i.e. not in a children list).
+ */
+static void bdrv_child_free(BdrvChild *child)
+{
+    assert(!child->bs);
+    assert(!child->next.le_prev); /* not in children list */
+
+    g_free(child->name);
+    g_free(child);
+}
+
+typedef struct BdrvAttachChildCommonState {
+    BdrvChild **child;
+    AioContext *old_parent_ctx;
+    AioContext *old_child_ctx;
+} BdrvAttachChildCommonState;
+
+static void bdrv_attach_child_common_abort(void *opaque)
+{
+    BdrvAttachChildCommonState *s = opaque;
+    BdrvChild *child = *s->child;
+    BlockDriverState *bs = child->bs;
+
+    /*
+     * Pass free_empty_child=false, because we still need the child
+     * for the AioContext operations on the parent below; those
+     * BdrvChildClass methods all work on a BdrvChild object, so we
+     * need to keep it as an empty shell (after this function, it will
+     * not be attached to any parent, and it will not have a .bs).
+     */
+    bdrv_replace_child_noperm(s->child, NULL, false);
+
+    if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
+        bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
+    }
+
+    if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
+        GSList *ignore;
+
+        /* No need to ignore `child`, because it has been detached already */
+        ignore = NULL;
+        child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
+                                      &error_abort);
+        g_slist_free(ignore);
+
+        ignore = NULL;
+        child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
+        g_slist_free(ignore);
+    }
+
+    bdrv_unref(bs);
+    bdrv_child_free(child);
+}
+
+static TransactionActionDrv bdrv_attach_child_common_drv = {
+    .abort = bdrv_attach_child_common_abort,
+    .clean = g_free,
+};
+
 /*
- * Updates @child to change its reference to point to @new_bs, including
- * checking and applying the necessary permission updates both to the old node
- * and to @new_bs.
+ * Common part of attaching bdrv child to bs or to blk or to job
  *
- * NULL is passed as @new_bs for removing the reference before freeing @child.
+ * Resulting new child is returned through @child.
+ * At start *@child must be NULL.
+ * @child is saved to a new entry of @tran, so that *@child could be reverted to
+ * NULL on abort(). So referenced variable must live at least until transaction
+ * end.
  *
- * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
- * function uses bdrv_set_perm() to update the permissions according to the new
- * reference that @new_bs gets.
+ * Function doesn't update permissions, caller is responsible for this.
+ */
+static int bdrv_attach_child_common(BlockDriverState *child_bs,
+                                    const char *child_name,
+                                    const BdrvChildClass *child_class,
+                                    BdrvChildRole child_role,
+                                    uint64_t perm, uint64_t shared_perm,
+                                    void *opaque, BdrvChild **child,
+                                    Transaction *tran, Error **errp)
+{
+    BdrvChild *new_child;
+    AioContext *parent_ctx;
+    AioContext *child_ctx = bdrv_get_aio_context(child_bs);
+
+    assert(child);
+    assert(*child == NULL);
+    assert(child_class->get_parent_desc);
+
+    new_child = g_new(BdrvChild, 1);
+    *new_child = (BdrvChild) {
+        .bs             = NULL,
+        .name           = g_strdup(child_name),
+        .klass          = child_class,
+        .role           = child_role,
+        .perm           = perm,
+        .shared_perm    = shared_perm,
+        .opaque         = opaque,
+    };
+
+    /*
+     * If the AioContexts don't match, first try to move the subtree of
+     * child_bs into the AioContext of the new parent. If this doesn't work,
+     * try moving the parent into the AioContext of child_bs instead.
+     */
+    parent_ctx = bdrv_child_get_parent_aio_context(new_child);
+    if (child_ctx != parent_ctx) {
+        Error *local_err = NULL;
+        int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
+
+        if (ret < 0 && child_class->can_set_aio_ctx) {
+            GSList *ignore = g_slist_prepend(NULL, new_child);
+            if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
+                                             NULL))
+            {
+                error_free(local_err);
+                ret = 0;
+                g_slist_free(ignore);
+                ignore = g_slist_prepend(NULL, new_child);
+                child_class->set_aio_ctx(new_child, child_ctx, &ignore);
+            }
+            g_slist_free(ignore);
+        }
+
+        if (ret < 0) {
+            error_propagate(errp, local_err);
+            bdrv_child_free(new_child);
+            return ret;
+        }
+    }
+
+    bdrv_ref(child_bs);
+    bdrv_replace_child_noperm(&new_child, child_bs, true);
+    /* child_bs was non-NULL, so new_child must not have been freed */
+    assert(new_child != NULL);
+
+    *child = new_child;
+
+    BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+    *s = (BdrvAttachChildCommonState) {
+        .child = child,
+        .old_parent_ctx = parent_ctx,
+        .old_child_ctx = child_ctx,
+    };
+    tran_add(tran, &bdrv_attach_child_common_drv, s);
+
+    return 0;
+}
+
+/*
+ * Variable referenced by @child must live at least until transaction end.
+ * (see bdrv_attach_child_common() doc for details)
  *
- * Callers must ensure that child->frozen is false.
+ * Function doesn't update permissions, caller is responsible for this.
  */
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+                                    BlockDriverState *child_bs,
+                                    const char *child_name,
+                                    const BdrvChildClass *child_class,
+                                    BdrvChildRole child_role,
+                                    BdrvChild **child,
+                                    Transaction *tran,
+                                    Error **errp)
 {
-    BlockDriverState *old_bs = child->bs;
+    int ret;
+    uint64_t perm, shared_perm;
 
-    /* Asserts that child->frozen == false */
-    bdrv_replace_child_noperm(child, new_bs);
+    assert(parent_bs->drv);
 
-    /*
-     * Start with the new node's permissions.  If @new_bs is a (direct
-     * or indirect) child of @old_bs, we must complete the permission
-     * update on @new_bs before we loosen the restrictions on @old_bs.
-     * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
-     * updating the permissions of @new_bs, and thus not purely loosen
-     * restrictions.
-     */
-    if (new_bs) {
-        bdrv_set_perm(new_bs);
+    if (bdrv_recurse_has_child(child_bs, parent_bs)) {
+        error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
+                   child_bs->node_name, child_name, parent_bs->node_name);
+        return -EINVAL;
     }
 
+    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
+                    perm, shared_perm, &perm, &shared_perm);
+
+    ret = bdrv_attach_child_common(child_bs, child_name, child_class,
+                                   child_role, perm, shared_perm, parent_bs,
+                                   child, tran, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void bdrv_detach_child(BdrvChild **childp)
+{
+    BlockDriverState *old_bs = (*childp)->bs;
+
+    bdrv_replace_child_noperm(childp, NULL, true);
+
     if (old_bs) {
         /*
          * Update permissions for old node. We're just taking a parent away, so
@@ -2667,8 +3023,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
          */
         bdrv_refresh_perms(old_bs, NULL);
 
-        /* When the parent requiring a non-default AioContext is removed, the
-         * node moves back to the main AioContext */
+        /*
+         * When the parent requiring a non-default AioContext is removed, the
+         * node moves back to the main AioContext
+         */
         bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
     }
 }
@@ -2687,61 +3045,28 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildClass *child_class,
                                   BdrvChildRole child_role,
-                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp)
 {
-    BdrvChild *child;
-    Error *local_err = NULL;
     int ret;
+    BdrvChild *child = NULL;
+    Transaction *tran = tran_new();
 
-    ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
+    ret = bdrv_attach_child_common(child_bs, child_name, child_class,
+                                   child_role, perm, shared_perm, opaque,
+                                   &child, tran, errp);
     if (ret < 0) {
-        bdrv_abort_perm_update(child_bs);
-        bdrv_unref(child_bs);
-        return NULL;
+        goto out;
     }
 
-    child = g_new(BdrvChild, 1);
-    *child = (BdrvChild) {
-        .bs             = NULL,
-        .name           = g_strdup(child_name),
-        .klass          = child_class,
-        .role           = child_role,
-        .perm           = perm,
-        .shared_perm    = shared_perm,
-        .opaque         = opaque,
-    };
+    ret = bdrv_refresh_perms(child_bs, errp);
 
-    /* If the AioContexts don't match, first try to move the subtree of
-     * child_bs into the AioContext of the new parent. If this doesn't work,
-     * try moving the parent into the AioContext of child_bs instead. */
-    if (bdrv_get_aio_context(child_bs) != ctx) {
-        ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
-        if (ret < 0 && child_class->can_set_aio_ctx) {
-            GSList *ignore = g_slist_prepend(NULL, child);
-            ctx = bdrv_get_aio_context(child_bs);
-            if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
-                error_free(local_err);
-                ret = 0;
-                g_slist_free(ignore);
-                ignore = g_slist_prepend(NULL, child);
-                child_class->set_aio_ctx(child, ctx, &ignore);
-            }
-            g_slist_free(ignore);
-        }
-        if (ret < 0) {
-            error_propagate(errp, local_err);
-            g_free(child);
-            bdrv_abort_perm_update(child_bs);
-            bdrv_unref(child_bs);
-            return NULL;
-        }
-    }
-
-    /* This performs the matching bdrv_set_perm() for the above check. */
-    bdrv_replace_child(child, child_bs);
+out:
+    tran_finalize(tran, ret);
+    /* child is unset on failure by bdrv_attach_child_common_abort() */
+    assert((ret < 0) == !child);
 
+    bdrv_unref(child_bs);
     return child;
 }
 
@@ -2763,34 +3088,29 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BdrvChildRole child_role,
                              Error **errp)
 {
-    BdrvChild *child;
-    uint64_t perm, shared_perm;
-
-    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
-
-    assert(parent_bs->drv);
-    bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
-                    perm, shared_perm, &perm, &shared_perm);
+    int ret;
+    BdrvChild *child = NULL;
+    Transaction *tran = tran_new();
 
-    child = bdrv_root_attach_child(child_bs, child_name, child_class,
-                                   child_role, bdrv_get_aio_context(parent_bs),
-                                   perm, shared_perm, parent_bs, errp);
-    if (child == NULL) {
-        return NULL;
+    ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
+                                   child_role, &child, tran, errp);
+    if (ret < 0) {
+        goto out;
     }
 
-    QLIST_INSERT_HEAD(&parent_bs->children, child, next);
-    return child;
-}
+    ret = bdrv_refresh_perms(parent_bs, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
-static void bdrv_detach_child(BdrvChild *child)
-{
-    QLIST_SAFE_REMOVE(child, next);
+out:
+    tran_finalize(tran, ret);
+    /* child is unset on failure by bdrv_attach_child_common_abort() */
+    assert((ret < 0) == !child);
 
-    bdrv_replace_child(child, NULL);
+    bdrv_unref(child_bs);
 
-    g_free(child->name);
-    g_free(child);
+    return child;
 }
 
 /* Callers must ensure that child->frozen is false. */
@@ -2799,15 +3119,53 @@ void bdrv_root_unref_child(BdrvChild *child)
     BlockDriverState *child_bs;
 
     child_bs = child->bs;
-    bdrv_detach_child(child);
+    bdrv_detach_child(&child);
     bdrv_unref(child_bs);
 }
 
+typedef struct BdrvSetInheritsFrom {
+    BlockDriverState *bs;
+    BlockDriverState *old_inherits_from;
+} BdrvSetInheritsFrom;
+
+static void bdrv_set_inherits_from_abort(void *opaque)
+{
+    BdrvSetInheritsFrom *s = opaque;
+
+    s->bs->inherits_from = s->old_inherits_from;
+}
+
+static TransactionActionDrv bdrv_set_inherits_from_drv = {
+    .abort = bdrv_set_inherits_from_abort,
+    .clean = g_free,
+};
+
+/* @tran is allowed to be NULL. In this case no rollback is possible */
+static void bdrv_set_inherits_from(BlockDriverState *bs,
+                                   BlockDriverState *new_inherits_from,
+                                   Transaction *tran)
+{
+    if (tran) {
+        BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
+
+        *s = (BdrvSetInheritsFrom) {
+            .bs = bs,
+            .old_inherits_from = bs->inherits_from,
+        };
+
+        tran_add(tran, &bdrv_set_inherits_from_drv, s);
+    }
+
+    bs->inherits_from = new_inherits_from;
+}
+
 /**
  * Clear all inherits_from pointers from children and grandchildren of
  * @root that point to @root, where necessary.
+ * @tran is allowed to be NULL. In this case no rollback is possible
  */
-static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
+static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
+                                     Transaction *tran)
 {
     BdrvChild *c;
 
@@ -2822,12 +3180,12 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
             }
         }
         if (c == NULL) {
-            child->bs->inherits_from = NULL;
+            bdrv_set_inherits_from(child->bs, NULL, tran);
         }
     }
 
     QLIST_FOREACH(c, &child->bs->children, next) {
-        bdrv_unset_inherits_from(root, c);
+        bdrv_unset_inherits_from(root, c, tran);
     }
 }
 
@@ -2838,7 +3196,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
         return;
     }
 
-    bdrv_unset_inherits_from(parent, child);
+    bdrv_unset_inherits_from(parent, child, NULL);
     bdrv_root_unref_child(child);
 }
 
@@ -2880,50 +3238,118 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
 }
 
 /*
- * Sets the bs->backing link of a BDS. A new reference is created; callers
- * which don't need their own reference any more must call bdrv_unref().
+ * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
+ * callers which don't need their own reference any more must call bdrv_unref().
+ *
+ * Function doesn't update permissions, caller is responsible for this.
  */
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
-                        Error **errp)
+static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
+                                           BlockDriverState *child_bs,
+                                           bool is_backing,
+                                           Transaction *tran, Error **errp)
 {
     int ret = 0;
-    bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
-        bdrv_inherits_from_recursive(backing_hd, bs);
+    bool update_inherits_from =
+        bdrv_inherits_from_recursive(child_bs, parent_bs);
+    BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
+    BdrvChildRole role;
 
-    if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
+    if (!parent_bs->drv) {
+        /*
+         * Node without drv is an object without a class :/. TODO: finally fix
+         * qcow2 driver to never clear bs->drv and implement format corruption
+         * handling in other way.
+         */
+        error_setg(errp, "Node corrupted");
+        return -EINVAL;
+    }
+
+    if (child && child->frozen) {
+        error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
+                   child->name, parent_bs->node_name, child->bs->node_name);
         return -EPERM;
     }
 
-    if (backing_hd) {
-        bdrv_ref(backing_hd);
+    if (is_backing && !parent_bs->drv->is_filter &&
+        !parent_bs->drv->supports_backing)
+    {
+        error_setg(errp, "Driver '%s' of node '%s' does not support backing "
+                   "files", parent_bs->drv->format_name, parent_bs->node_name);
+        return -EINVAL;
     }
 
-    if (bs->backing) {
-        /* Cannot be frozen, we checked that above */
-        bdrv_unref_child(bs, bs->backing);
-        bs->backing = NULL;
+    if (parent_bs->drv->is_filter) {
+        role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
+    } else if (is_backing) {
+        role = BDRV_CHILD_COW;
+    } else {
+        /*
+         * We only can use same role as it is in existing child. We don't have
+         * infrastructure to determine role of file child in generic way
+         */
+        if (!child) {
+            error_setg(errp, "Cannot set file child to format node without "
+                       "file child");
+            return -EINVAL;
+        }
+        role = child->role;
     }
 
-    if (!backing_hd) {
-        goto out;
+    if (child) {
+        bdrv_unset_inherits_from(parent_bs, child, tran);
+        bdrv_remove_file_or_backing_child(parent_bs, child, tran);
     }
 
-    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
-                                    bdrv_backing_role(bs), errp);
-    if (!bs->backing) {
-        ret = -EPERM;
+    if (!child_bs) {
         goto out;
     }
 
-    /* If backing_hd was already part of bs's backing chain, and
-     * inherits_from pointed recursively to bs then let's update it to
-     * point directly to bs (else it will become NULL). */
+    ret = bdrv_attach_child_noperm(parent_bs, child_bs,
+                                   is_backing ? "backing" : "file",
+                                   &child_of_bds, role,
+                                   is_backing ? &parent_bs->backing :
+                                                &parent_bs->file,
+                                   tran, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+
+    /*
+     * If inherits_from pointed recursively to bs then let's update it to
+     * point directly to bs (else it will become NULL).
+     */
     if (update_inherits_from) {
-        backing_hd->inherits_from = bs;
+        bdrv_set_inherits_from(child_bs, parent_bs, tran);
     }
 
 out:
-    bdrv_refresh_limits(bs, NULL);
+    bdrv_refresh_limits(parent_bs, tran, NULL);
+
+    return 0;
+}
+
+static int bdrv_set_backing_noperm(BlockDriverState *bs,
+                                   BlockDriverState *backing_hd,
+                                   Transaction *tran, Error **errp)
+{
+    return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
+}
+
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                        Error **errp)
+{
+    int ret;
+    Transaction *tran = tran_new();
+
+    ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = bdrv_refresh_perms(bs, errp);
+out:
+    tran_finalize(tran, ret);
 
     return ret;
 }
@@ -3213,11 +3639,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
         goto out;
     }
 
-    /* bdrv_append() consumes a strong reference to bs_snapshot
-     * (i.e. it will call bdrv_unref() on it) even on error, so in
-     * order to be able to return one, we have to increase
-     * bs_snapshot's refcount here */
-    bdrv_ref(bs_snapshot);
     ret = bdrv_append(bs_snapshot, bs, errp);
     if (ret < 0) {
         bs_snapshot = NULL;
@@ -3729,10 +4150,6 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     bs_entry->state.explicit_options = explicit_options;
     bs_entry->state.flags = flags;
 
-    /* This needs to be overwritten in bdrv_reopen_prepare() */
-    bs_entry->state.perm = UINT64_MAX;
-    bs_entry->state.shared_perm = 0;
-
     /*
      * If keep_old_opts is false then it means that unspecified
      * options must be reset to their original value. We don't allow
@@ -3796,6 +4213,19 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                    NULL, 0, keep_old_opts);
 }
 
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
+{
+    if (bs_queue) {
+        BlockReopenQueueEntry *bs_entry, *next;
+        QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+            qobject_unref(bs_entry->state.explicit_options);
+            qobject_unref(bs_entry->state.options);
+            g_free(bs_entry);
+        }
+        g_free(bs_queue);
+    }
+}
+
 /*
  * Reopen multiple BlockDriverStates atomically & transactionally.
  *
@@ -3812,43 +4242,68 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
  *
  * All affected nodes must be drained between bdrv_reopen_queue() and
  * bdrv_reopen_multiple().
+ *
+ * To be called from the main thread, with all other AioContexts unlocked.
  */
 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
 {
     int ret = -1;
     BlockReopenQueueEntry *bs_entry, *next;
+    AioContext *ctx;
+    Transaction *tran = tran_new();
+    g_autoptr(GHashTable) found = NULL;
+    g_autoptr(GSList) refresh_list = NULL;
 
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
     assert(bs_queue != NULL);
 
+    QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
+        ret = bdrv_flush(bs_entry->state.bs);
+        aio_context_release(ctx);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Error flushing drive");
+            goto abort;
+        }
+    }
+
     QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
         assert(bs_entry->state.bs->quiesce_counter > 0);
-        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
-            goto cleanup;
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
+        ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
+        aio_context_release(ctx);
+        if (ret < 0) {
+            goto abort;
         }
         bs_entry->prepared = true;
     }
 
+    found = g_hash_table_new(NULL, NULL);
     QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
         BDRVReopenState *state = &bs_entry->state;
-        ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
-                              state->shared_perm, NULL, errp);
-        if (ret < 0) {
-            goto cleanup_perm;
-        }
-        /* Check if new_backing_bs would accept the new permissions */
-        if (state->replace_backing_bs && state->new_backing_bs) {
-            uint64_t nperm, nshared;
-            bdrv_child_perm(state->bs, state->new_backing_bs,
-                            NULL, bdrv_backing_role(state->bs),
-                            bs_queue, state->perm, state->shared_perm,
-                            &nperm, &nshared);
-            ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
-                                         nperm, nshared, NULL, errp);
-            if (ret < 0) {
-                goto cleanup_perm;
-            }
+
+        refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
+        if (state->old_backing_bs) {
+            refresh_list = bdrv_topological_dfs(refresh_list, found,
+                                                state->old_backing_bs);
+        }
+        if (state->old_file_bs) {
+            refresh_list = bdrv_topological_dfs(refresh_list, found,
+                                                state->old_file_bs);
         }
-        bs_entry->perms_checked = true;
+    }
+
+    /*
+     * Note that file-posix driver rely on permission update done during reopen
+     * (even if no permission changed), because it wants "new" permissions for
+     * reconfiguring the fd and that's why it does it in raw_check_perm(), not
+     * in raw_reopen_prepare() which is called with "old" permissions.
+     */
+    ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
+    if (ret < 0) {
+        goto abort;
     }
 
     /*
@@ -3861,146 +4316,76 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
      * to first element.
      */
     QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+        ctx = bdrv_get_aio_context(bs_entry->state.bs);
+        aio_context_acquire(ctx);
         bdrv_reopen_commit(&bs_entry->state);
+        aio_context_release(ctx);
     }
 
-    ret = 0;
-cleanup_perm:
-    QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
-        BDRVReopenState *state = &bs_entry->state;
-
-        if (!bs_entry->perms_checked) {
-            continue;
-        }
-
-        if (ret == 0) {
-            uint64_t perm, shared;
+    tran_commit(tran);
 
-            bdrv_get_cumulative_perm(state->bs, &perm, &shared);
-            assert(perm == state->perm);
-            assert(shared == state->shared_perm);
+    QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+        BlockDriverState *bs = bs_entry->state.bs;
 
-            bdrv_set_perm(state->bs);
-        } else {
-            bdrv_abort_perm_update(state->bs);
-            if (state->replace_backing_bs && state->new_backing_bs) {
-                bdrv_abort_perm_update(state->new_backing_bs);
-            }
+        if (bs->drv->bdrv_reopen_commit_post) {
+            ctx = bdrv_get_aio_context(bs);
+            aio_context_acquire(ctx);
+            bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
+            aio_context_release(ctx);
         }
     }
 
-    if (ret == 0) {
-        QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
-            BlockDriverState *bs = bs_entry->state.bs;
+    ret = 0;
+    goto cleanup;
 
-            if (bs->drv->bdrv_reopen_commit_post)
-                bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
-        }
-    }
-cleanup:
+abort:
+    tran_abort(tran);
     QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
-        if (ret) {
-            if (bs_entry->prepared) {
-                bdrv_reopen_abort(&bs_entry->state);
-            }
-            qobject_unref(bs_entry->state.explicit_options);
-            qobject_unref(bs_entry->state.options);
+        if (bs_entry->prepared) {
+            ctx = bdrv_get_aio_context(bs_entry->state.bs);
+            aio_context_acquire(ctx);
+            bdrv_reopen_abort(&bs_entry->state);
+            aio_context_release(ctx);
         }
-        if (bs_entry->state.new_backing_bs) {
-            bdrv_unref(bs_entry->state.new_backing_bs);
-        }
-        g_free(bs_entry);
     }
-    g_free(bs_queue);
+
+cleanup:
+    bdrv_reopen_queue_free(bs_queue);
 
     return ret;
 }
 
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
-                              Error **errp)
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+                Error **errp)
 {
-    int ret;
+    AioContext *ctx = bdrv_get_aio_context(bs);
     BlockReopenQueue *queue;
-    QDict *opts = qdict_new();
-
-    qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
+    int ret;
 
     bdrv_subtree_drained_begin(bs);
-    queue = bdrv_reopen_queue(NULL, bs, opts, true);
-    ret = bdrv_reopen_multiple(queue, errp);
-    bdrv_subtree_drained_end(bs);
-
-    return ret;
-}
-
-static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
-                                                          BdrvChild *c)
-{
-    BlockReopenQueueEntry *entry;
-
-    QTAILQ_FOREACH(entry, q, entry) {
-        BlockDriverState *bs = entry->state.bs;
-        BdrvChild *child;
-
-        QLIST_FOREACH(child, &bs->children, next) {
-            if (child == c) {
-                return entry;
-            }
-        }
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_release(ctx);
     }
 
-    return NULL;
-}
-
-static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
-                             uint64_t *perm, uint64_t *shared)
-{
-    BdrvChild *c;
-    BlockReopenQueueEntry *parent;
-    uint64_t cumulative_perms = 0;
-    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
-
-    QLIST_FOREACH(c, &bs->parents, next_parent) {
-        parent = find_parent_in_reopen_queue(q, c);
-        if (!parent) {
-            cumulative_perms |= c->perm;
-            cumulative_shared_perms &= c->shared_perm;
-        } else {
-            uint64_t nperm, nshared;
-
-            bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
-                            parent->state.perm, parent->state.shared_perm,
-                            &nperm, &nshared);
+    queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+    ret = bdrv_reopen_multiple(queue, errp);
 
-            cumulative_perms |= nperm;
-            cumulative_shared_perms &= nshared;
-        }
+    if (ctx != qemu_get_aio_context()) {
+        aio_context_acquire(ctx);
     }
-    *perm = cumulative_perms;
-    *shared = cumulative_shared_perms;
+    bdrv_subtree_drained_end(bs);
+
+    return ret;
 }
 
-static bool bdrv_reopen_can_attach(BlockDriverState *parent,
-                                   BdrvChild *child,
-                                   BlockDriverState *new_child,
-                                   Error **errp)
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+                              Error **errp)
 {
-    AioContext *parent_ctx = bdrv_get_aio_context(parent);
-    AioContext *child_ctx = bdrv_get_aio_context(new_child);
-    GSList *ignore;
-    bool ret;
+    QDict *opts = qdict_new();
 
-    ignore = g_slist_prepend(NULL, child);
-    ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL);
-    g_slist_free(ignore);
-    if (ret) {
-        return ret;
-    }
+    qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
 
-    ignore = g_slist_prepend(NULL, child);
-    ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp);
-    g_slist_free(ignore);
-    return ret;
+    return bdrv_reopen(bs, opts, true, errp);
 }
 
 /*
@@ -4020,110 +4405,81 @@ static bool bdrv_reopen_can_attach(BlockDriverState *parent,
  *
  * Return 0 on success, otherwise return < 0 and set @errp.
  */
-static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
-                                     Error **errp)
+static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
+                                             bool is_backing, Transaction *tran,
+                                             Error **errp)
 {
     BlockDriverState *bs = reopen_state->bs;
-    BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
+    BlockDriverState *new_child_bs;
+    BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) :
+                                                  child_bs(bs->file);
+    const char *child_name = is_backing ? "backing" : "file";
     QObject *value;
     const char *str;
 
-    value = qdict_get(reopen_state->options, "backing");
+    value = qdict_get(reopen_state->options, child_name);
     if (value == NULL) {
         return 0;
     }
 
     switch (qobject_type(value)) {
     case QTYPE_QNULL:
-        new_backing_bs = NULL;
+        assert(is_backing); /* The 'file' option does not allow a null value */
+        new_child_bs = NULL;
         break;
     case QTYPE_QSTRING:
         str = qstring_get_str(qobject_to(QString, value));
-        new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
-        if (new_backing_bs == NULL) {
-            return -EINVAL;
-        } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
-            error_setg(errp, "Making '%s' a backing file of '%s' "
-                       "would create a cycle", str, bs->node_name);
-            return -EINVAL;
-        }
-        break;
-    default:
-        /* 'backing' does not allow any other data type */
-        g_assert_not_reached();
-    }
-
-    /*
-     * Check AioContext compatibility so that the bdrv_set_backing_hd() call in
-     * bdrv_reopen_commit() won't fail.
-     */
-    if (new_backing_bs) {
-        if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) {
+        new_child_bs = bdrv_lookup_bs(NULL, str, errp);
+        if (new_child_bs == NULL) {
             return -EINVAL;
-        }
-    }
-
-    /*
-     * Ensure that @bs can really handle backing files, because we are
-     * about to give it one (or swap the existing one)
-     */
-    if (bs->drv->is_filter) {
-        /* Filters always have a file or a backing child */
-        if (!bs->backing) {
-            error_setg(errp, "'%s' is a %s filter node that does not support a "
-                       "backing child", bs->node_name, bs->drv->format_name);
+        } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
+            error_setg(errp, "Making '%s' a %s child of '%s' would create a "
+                       "cycle", str, child_name, bs->node_name);
             return -EINVAL;
         }
-    } else if (!bs->drv->supports_backing) {
-        error_setg(errp, "Driver '%s' of node '%s' does not support backing "
-                   "files", bs->drv->format_name, bs->node_name);
-        return -EINVAL;
+        break;
+    default:
+        /*
+         * The options QDict has been flattened, so 'backing' and 'file'
+         * do not allow any other data type here.
+         */
+        g_assert_not_reached();
     }
 
-    /*
-     * Find the "actual" backing file by skipping all links that point
-     * to an implicit node, if any (e.g. a commit filter node).
-     * We cannot use any of the bdrv_skip_*() functions here because
-     * those return the first explicit node, while we are looking for
-     * its overlay here.
-     */
-    overlay_bs = bs;
-    for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
-         below_bs && below_bs->implicit;
-         below_bs = bdrv_filter_or_cow_bs(overlay_bs))
-    {
-        overlay_bs = below_bs;
+    if (old_child_bs == new_child_bs) {
+        return 0;
     }
 
-    /* If we want to replace the backing file we need some extra checks */
-    if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
-        /* Check for implicit nodes between bs and its backing file */
-        if (bs != overlay_bs) {
-            error_setg(errp, "Cannot change backing link if '%s' has "
-                       "an implicit backing file", bs->node_name);
+    if (old_child_bs) {
+        if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
+            return 0;
+        }
+
+        if (old_child_bs->implicit) {
+            error_setg(errp, "Cannot replace implicit %s child of %s",
+                       child_name, bs->node_name);
             return -EPERM;
         }
+    }
+
+    if (bs->drv->is_filter && !old_child_bs) {
         /*
-         * Check if the backing link that we want to replace is frozen.
-         * Note that
-         * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
-         * because we know that overlay_bs == bs, and that @bs
-         * either is a filter that uses ->backing or a COW format BDS
-         * with bs->drv->supports_backing == true.
+         * Filters always have a file or a backing child, so we are trying to
+         * change wrong child
          */
-        if (bdrv_is_backing_chain_frozen(overlay_bs,
-                                         child_bs(overlay_bs->backing), errp))
-        {
-            return -EPERM;
-        }
-        reopen_state->replace_backing_bs = true;
-        if (new_backing_bs) {
-            bdrv_ref(new_backing_bs);
-            reopen_state->new_backing_bs = new_backing_bs;
-        }
+        error_setg(errp, "'%s' is a %s filter node that does not support a "
+                   "%s child", bs->node_name, bs->drv->format_name, child_name);
+        return -EINVAL;
     }
 
-    return 0;
+    if (is_backing) {
+        reopen_state->old_backing_bs = old_child_bs;
+    } else {
+        reopen_state->old_file_bs = old_child_bs;
+    }
+
+    return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
+                                           tran, errp);
 }
 
 /*
@@ -4143,8 +4499,9 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
  * commit() for any other BDS that have been left in a prepare() state
  *
  */
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
-                        Error **errp)
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue,
+                               Transaction *change_child_tran, Error **errp)
 {
     int ret = -1;
     int old_flags;
@@ -4211,16 +4568,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
         goto error;
     }
 
-    /* Calculate required permissions after reopening */
-    bdrv_reopen_perm(queue, reopen_state->bs,
-                     &reopen_state->perm, &reopen_state->shared_perm);
-
-    ret = bdrv_flush(reopen_state->bs);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Error flushing drive");
-        goto error;
-    }
-
     if (drv->bdrv_reopen_prepare) {
         /*
          * If a driver-specific option is missing, it means that we
@@ -4274,12 +4621,21 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
      * either a reference to an existing node (using its node name)
      * or NULL to simply detach the current backing file.
      */
-    ret = bdrv_reopen_parse_backing(reopen_state, errp);
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
+                                            change_child_tran, errp);
     if (ret < 0) {
         goto error;
     }
     qdict_del(reopen_state->options, "backing");
 
+    /* Allow changing the 'file' option. In this case NULL is not allowed */
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
+                                            change_child_tran, errp);
+    if (ret < 0) {
+        goto error;
+    }
+    qdict_del(reopen_state->options, "file");
+
     /* Options that are not handled are only okay if they are unchanged
      * compared to the old state. It is expected that some options are only
      * used for the initial open, but not reopen (e.g. filename) */
@@ -4359,7 +4715,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
  * makes them final by swapping the staging BlockDriverState contents into
  * the active BlockDriverState contents.
  */
-void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
     BlockDriver *drv;
     BlockDriverState *bs;
@@ -4378,48 +4734,32 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
     /* set BDS specific flags now */
     qobject_unref(bs->explicit_options);
     qobject_unref(bs->options);
+    qobject_ref(reopen_state->explicit_options);
+    qobject_ref(reopen_state->options);
 
     bs->explicit_options   = reopen_state->explicit_options;
     bs->options            = reopen_state->options;
     bs->open_flags         = reopen_state->flags;
-    bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
     bs->detect_zeroes      = reopen_state->detect_zeroes;
 
-    if (reopen_state->replace_backing_bs) {
-        qdict_del(bs->explicit_options, "backing");
-        qdict_del(bs->options, "backing");
-    }
-
     /* Remove child references from bs->options and bs->explicit_options.
      * Child options were already removed in bdrv_reopen_queue_child() */
     QLIST_FOREACH(child, &bs->children, next) {
         qdict_del(bs->explicit_options, child->name);
         qdict_del(bs->options, child->name);
     }
+    /* backing is probably removed, so it's not handled by previous loop */
+    qdict_del(bs->explicit_options, "backing");
+    qdict_del(bs->options, "backing");
 
-    /*
-     * Change the backing file if a new one was specified. We do this
-     * after updating bs->options, so bdrv_refresh_filename() (called
-     * from bdrv_set_backing_hd()) has the new values.
-     */
-    if (reopen_state->replace_backing_bs) {
-        BlockDriverState *old_backing_bs = child_bs(bs->backing);
-        assert(!old_backing_bs || !old_backing_bs->implicit);
-        /* Abort the permission update on the backing bs we're detaching */
-        if (old_backing_bs) {
-            bdrv_abort_perm_update(old_backing_bs);
-        }
-        bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
-    }
-
-    bdrv_refresh_limits(bs, NULL);
+    bdrv_refresh_limits(bs, NULL, NULL);
 }
 
 /*
  * Abort the reopen, and delete and free the staged changes in
  * reopen_state
  */
-void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 {
     BlockDriver *drv;
 
@@ -4472,6 +4812,8 @@ static void bdrv_close(BlockDriverState *bs)
     bs->explicit_options = NULL;
     qobject_unref(bs->full_open_options);
     bs->full_open_options = NULL;
+    g_free(bs->block_status_cache);
+    bs->block_status_cache = NULL;
 
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
@@ -4585,88 +4927,237 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to)
     return ret;
 }
 
+typedef struct BdrvRemoveFilterOrCowChild {
+    BdrvChild *child;
+    BlockDriverState *bs;
+    bool is_backing;
+} BdrvRemoveFilterOrCowChild;
+
+static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
+{
+    BdrvRemoveFilterOrCowChild *s = opaque;
+    BlockDriverState *parent_bs = s->child->opaque;
+
+    if (s->is_backing) {
+        parent_bs->backing = s->child;
+    } else {
+        parent_bs->file = s->child;
+    }
+
+    /*
+     * We don't have to restore child->bs here to undo bdrv_replace_child_tran()
+     * because that function is transactionable and it registered own completion
+     * entries in @tran, so .abort() for bdrv_replace_child_safe() will be
+     * called automatically.
+     */
+}
+
+static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
+{
+    BdrvRemoveFilterOrCowChild *s = opaque;
+
+    bdrv_child_free(s->child);
+}
+
+static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
+{
+    BdrvRemoveFilterOrCowChild *s = opaque;
+
+    /* Drop the bs reference after the transaction is done */
+    bdrv_unref(s->bs);
+    g_free(s);
+}
+
+static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
+    .abort = bdrv_remove_filter_or_cow_child_abort,
+    .commit = bdrv_remove_filter_or_cow_child_commit,
+    .clean = bdrv_remove_filter_or_cow_child_clean,
+};
+
+/*
+ * A function to remove backing or file child of @bs.
+ * Function doesn't update permissions, caller is responsible for this.
+ */
+static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
+                                              BdrvChild *child,
+                                              Transaction *tran)
+{
+    BdrvChild **childp;
+    BdrvRemoveFilterOrCowChild *s;
+
+    if (!child) {
+        return;
+    }
+
+    /*
+     * Keep a reference to @bs so @childp will stay valid throughout the
+     * transaction (required by bdrv_replace_child_tran())
+     */
+    bdrv_ref(bs);
+    if (child == bs->backing) {
+        childp = &bs->backing;
+    } else if (child == bs->file) {
+        childp = &bs->file;
+    } else {
+        g_assert_not_reached();
+    }
+
+    if (child->bs) {
+        /*
+         * Pass free_empty_child=false, we will free the child in
+         * bdrv_remove_filter_or_cow_child_commit()
+         */
+        bdrv_replace_child_tran(childp, NULL, tran, false);
+    }
+
+    s = g_new(BdrvRemoveFilterOrCowChild, 1);
+    *s = (BdrvRemoveFilterOrCowChild) {
+        .child = child,
+        .bs = bs,
+        .is_backing = (childp == &bs->backing),
+    };
+    tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
+}
+
 /*
- * With auto_skip=true bdrv_replace_node_common skips updating from parents
- * if it creates a parent-child relation loop or if parent is block-job.
- *
- * With auto_skip=false the error is returned if from has a parent which should
- * not be updated.
+ * A function to remove backing-chain child of @bs if exists: cow child for
+ * format nodes (always .backing) and filter child for filters (may be .file or
+ * .backing)
  */
-static int bdrv_replace_node_common(BlockDriverState *from,
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+                                            Transaction *tran)
+{
+    bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran);
+}
+
+static int bdrv_replace_node_noperm(BlockDriverState *from,
                                     BlockDriverState *to,
-                                    bool auto_skip, Error **errp)
+                                    bool auto_skip, Transaction *tran,
+                                    Error **errp)
 {
     BdrvChild *c, *next;
-    GSList *list = NULL, *p;
-    uint64_t perm = 0, shared = BLK_PERM_ALL;
-    int ret;
-
-    /* Make sure that @from doesn't go away until we have successfully attached
-     * all of its parents to @to. */
-    bdrv_ref(from);
 
-    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
-    assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
-    bdrv_drained_begin(from);
+    assert(to != NULL);
 
-    /* Put all parents into @list and calculate their cumulative permissions */
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
         assert(c->bs == from);
         if (!should_update_child(c, to)) {
             if (auto_skip) {
                 continue;
             }
-            ret = -EINVAL;
             error_setg(errp, "Should not change '%s' link to '%s'",
                        c->name, from->node_name);
-            goto out;
+            return -EINVAL;
         }
         if (c->frozen) {
-            ret = -EPERM;
             error_setg(errp, "Cannot change '%s' link to '%s'",
                        c->name, from->node_name);
-            goto out;
+            return -EPERM;
+        }
+
+        /*
+         * Passing a pointer to the local variable @c is fine here, because
+         * @to is not NULL, and so &c will not be attached to the transaction.
+         */
+        bdrv_replace_child_tran(&c, to, tran, true);
+    }
+
+    return 0;
+}
+
+/*
+ * With auto_skip=true bdrv_replace_node_common skips updating from parents
+ * if it creates a parent-child relation loop or if parent is block-job.
+ *
+ * With auto_skip=false the error is returned if from has a parent which should
+ * not be updated.
+ *
+ * With @detach_subchain=true @to must be in a backing chain of @from. In this
+ * case backing link of the cow-parent of @to is removed.
+ *
+ * @to must not be NULL.
+ */
+static int bdrv_replace_node_common(BlockDriverState *from,
+                                    BlockDriverState *to,
+                                    bool auto_skip, bool detach_subchain,
+                                    Error **errp)
+{
+    Transaction *tran = tran_new();
+    g_autoptr(GHashTable) found = NULL;
+    g_autoptr(GSList) refresh_list = NULL;
+    BlockDriverState *to_cow_parent = NULL;
+    int ret;
+
+    assert(to != NULL);
+
+    if (detach_subchain) {
+        assert(bdrv_chain_contains(from, to));
+        assert(from != to);
+        for (to_cow_parent = from;
+             bdrv_filter_or_cow_bs(to_cow_parent) != to;
+             to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
+        {
+            ;
         }
-        list = g_slist_prepend(list, c);
-        perm |= c->perm;
-        shared &= c->shared_perm;
     }
 
-    /* Check whether the required permissions can be granted on @to, ignoring
-     * all BdrvChild in @list so that they can't block themselves. */
-    ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
+    /* Make sure that @from doesn't go away until we have successfully attached
+     * all of its parents to @to. */
+    bdrv_ref(from);
+
+    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+    assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
+    bdrv_drained_begin(from);
+
+    /*
+     * Do the replacement without permission update.
+     * Replacement may influence the permissions, we should calculate new
+     * permissions based on new graph. If we fail, we'll roll-back the
+     * replacement.
+     */
+    ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
     if (ret < 0) {
-        bdrv_abort_perm_update(to);
         goto out;
     }
 
-    /* Now actually perform the change. We performed the permission check for
-     * all elements of @list at once, so set the permissions all at once at the
-     * very end. */
-    for (p = list; p != NULL; p = p->next) {
-        c = p->data;
-
-        bdrv_ref(to);
-        bdrv_replace_child_noperm(c, to);
-        bdrv_unref(from);
+    if (detach_subchain) {
+        bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
     }
 
-    bdrv_set_perm(to);
+    found = g_hash_table_new(NULL, NULL);
+
+    refresh_list = bdrv_topological_dfs(refresh_list, found, to);
+    refresh_list = bdrv_topological_dfs(refresh_list, found, from);
+
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+    if (ret < 0) {
+        goto out;
+    }
 
     ret = 0;
 
 out:
-    g_slist_free(list);
+    tran_finalize(tran, ret);
+
     bdrv_drained_end(from);
     bdrv_unref(from);
 
     return ret;
 }
 
+/**
+ * Replace node @from by @to (where neither may be NULL).
+ */
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                       Error **errp)
 {
-    return bdrv_replace_node_common(from, to, true, errp);
+    return bdrv_replace_node_common(from, to, true, false, errp);
+}
+
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
+{
+    return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
+                                    errp);
 }
 
 /*
@@ -4676,37 +5167,69 @@ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
  * This will modify the BlockDriverState fields, and swap contents
  * between bs_new and bs_top. Both bs_new and bs_top are modified.
  *
- * bs_new must not be attached to a BlockBackend.
+ * bs_new must not be attached to a BlockBackend and must not have backing
+ * child.
  *
  * This function does not create any image files.
- *
- * bdrv_append() takes ownership of a bs_new reference and unrefs it because
- * that's what the callers commonly need. bs_new will be referenced by the old
- * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
- * reference of its own, it must call bdrv_ref().
  */
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp)
 {
-    int ret = bdrv_set_backing_hd(bs_new, bs_top, errp);
+    int ret;
+    Transaction *tran = tran_new();
+
+    assert(!bs_new->backing);
+
+    ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
+                                   &child_of_bds, bdrv_backing_role(bs_new),
+                                   &bs_new->backing, tran, errp);
     if (ret < 0) {
         goto out;
     }
 
-    ret = bdrv_replace_node(bs_top, bs_new, errp);
+    ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
     if (ret < 0) {
-        bdrv_set_backing_hd(bs_new, NULL, &error_abort);
         goto out;
     }
 
-    ret = 0;
-
+    ret = bdrv_refresh_perms(bs_new, errp);
 out:
-    /*
-     * bs_new is now referenced by its new parents, we don't need the
-     * additional reference any more.
-     */
-    bdrv_unref(bs_new);
+    tran_finalize(tran, ret);
+
+    bdrv_refresh_limits(bs_top, NULL, NULL);
+
+    return ret;
+}
+
+/* Not for empty child */
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+                          Error **errp)
+{
+    int ret;
+    Transaction *tran = tran_new();
+    g_autoptr(GHashTable) found = NULL;
+    g_autoptr(GSList) refresh_list = NULL;
+    BlockDriverState *old_bs = child->bs;
+
+    bdrv_ref(old_bs);
+    bdrv_drained_begin(old_bs);
+    bdrv_drained_begin(new_bs);
+
+    bdrv_replace_child_tran(&child, new_bs, tran, true);
+    /* @new_bs must have been non-NULL, so @child must not have been freed */
+    assert(child != NULL);
+
+    found = g_hash_table_new(NULL, NULL);
+    refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
+    refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
+
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+
+    tran_finalize(tran, ret);
+
+    bdrv_drained_end(old_bs);
+    bdrv_drained_end(new_bs);
+    bdrv_unref(old_bs);
 
     return ret;
 }
@@ -4727,29 +5250,61 @@ static void bdrv_delete(BlockDriverState *bs)
     g_free(bs);
 }
 
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+
+/*
+ * Replace @bs by newly created block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ */
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
                                    int flags, Error **errp)
 {
-    BlockDriverState *new_node_bs;
-    Error *local_err = NULL;
+    ERRP_GUARD();
+    int ret;
+    BlockDriverState *new_node_bs = NULL;
+    const char *drvname, *node_name;
+    BlockDriver *drv;
+
+    drvname = qdict_get_try_str(options, "driver");
+    if (!drvname) {
+        error_setg(errp, "driver is not specified");
+        goto fail;
+    }
+
+    drv = bdrv_find_format(drvname);
+    if (!drv) {
+        error_setg(errp, "Unknown driver: '%s'", drvname);
+        goto fail;
+    }
+
+    node_name = qdict_get_try_str(options, "node-name");
 
-    new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
-    if (new_node_bs == NULL) {
+    new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
+                                            errp);
+    options = NULL; /* bdrv_new_open_driver() eats options */
+    if (!new_node_bs) {
         error_prepend(errp, "Could not create node: ");
-        return NULL;
+        goto fail;
     }
 
     bdrv_drained_begin(bs);
-    bdrv_replace_node(bs, new_node_bs, &local_err);
+    ret = bdrv_replace_node(bs, new_node_bs, errp);
     bdrv_drained_end(bs);
 
-    if (local_err) {
-        bdrv_unref(new_node_bs);
-        error_propagate(errp, local_err);
-        return NULL;
+    if (ret < 0) {
+        error_prepend(errp, "Could not replace node: ");
+        goto fail;
     }
 
     return new_node_bs;
+
+fail:
+    qobject_unref(options);
+    bdrv_unref(new_node_bs);
+    return NULL;
 }
 
 /*
@@ -4782,7 +5337,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
  * -ENOTSUP - format driver doesn't support changing the backing file
  */
 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
-                             const char *backing_fmt, bool warn)
+                             const char *backing_fmt, bool require)
 {
     BlockDriver *drv = bs->drv;
     int ret;
@@ -4796,10 +5351,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
         return -EINVAL;
     }
 
-    if (warn && backing_file && !backing_fmt) {
-        warn_report("Deprecated use of backing file without explicit "
-                    "backing format, use of this image requires "
-                    "potentially unsafe format probing");
+    if (require && backing_file && !backing_fmt) {
+        return -EINVAL;
     }
 
     if (drv->bdrv_change_backing_file != NULL) {
@@ -5002,7 +5555,17 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
         updated_children = g_slist_prepend(updated_children, c);
     }
 
-    bdrv_replace_node_common(top, base, false, &local_err);
+    /*
+     * It seems correct to pass detach_subchain=true here, but it triggers
+     * one more yet not fixed bug, when due to nested aio_poll loop we switch to
+     * another drained section, which modify the graph (for example, removing
+     * the child, which we keep in updated_children list). So, it's a TODO.
+     *
+     * Note, bug triggered if pass detach_subchain=true here and run
+     * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
+     * That's a FIXME.
+     */
+    bdrv_replace_node_common(top, base, false, false, &local_err);
     if (local_err) {
         error_report_err(local_err);
         goto exit;
@@ -5817,6 +6380,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
 
 void bdrv_init(void)
 {
+#ifdef CONFIG_BDRV_WHITELIST_TOOLS
+    use_bdrv_whitelist = 1;
+#endif
     module_call_init(MODULE_INIT_BLOCK);
 }
 
@@ -5940,6 +6506,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
 {
     BdrvChild *child, *parent;
     int ret;
+    uint64_t cumulative_perms, cumulative_shared_perms;
 
     if (!bs->drv) {
         return -ENOMEDIUM;
@@ -5970,6 +6537,13 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
         }
     }
 
+    bdrv_get_cumulative_perm(bs, &cumulative_perms,
+                             &cumulative_shared_perms);
+    if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
+        /* Our inactive parents still need write access. Inactivation failed. */
+        return -EPERM;
+    }
+
     bs->open_flags |= BDRV_O_INACTIVE;
 
     /*
@@ -6277,9 +6851,13 @@ void bdrv_img_create(const char *filename, const char *fmt,
         }
         assert(full_backing);
 
-        /* backing files always opened read-only */
+        /*
+         * No need to do I/O here, which allows us to open encrypted
+         * backing images without needing the secret
+         */
         back_flags = flags;
         back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+        back_flags |= BDRV_O_NO_IO;
 
         backing_options = qdict_new();
         if (backing_fmt) {
@@ -6295,24 +6873,11 @@ void bdrv_img_create(const char *filename, const char *fmt,
             goto out;
         } else {
             if (!backing_fmt) {
-                warn_report("Deprecated use of backing file without explicit "
-                            "backing format (detected format of %s)",
-                            bs->drv->format_name);
-                if (bs->drv != &bdrv_raw) {
-                    /*
-                     * A probe of raw deserves the most attention:
-                     * leaving the backing format out of the image
-                     * will ensure bs->probed is set (ensuring we
-                     * don't accidentally commit into the backing
-                     * file), and allow more spots to warn the users
-                     * to fix their toolchain when opening this image
-                     * later.  For other images, we can safely record
-                     * the format that we probed.
-                     */
-                    backing_fmt = bs->drv->format_name;
-                    qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt,
-                                 NULL);
-                }
+                error_setg(&local_err,
+                           "Backing file specified without backing format");
+                error_append_hint(&local_err, "Detected format of %s.",
+                                  bs->drv->format_name);
+                goto out;
             }
             if (size == -1) {
                 /* Opened BS, have no size */
@@ -6329,9 +6894,9 @@ void bdrv_img_create(const char *filename, const char *fmt,
         }
         /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
     } else if (backing_file && !backing_fmt) {
-        warn_report("Deprecated use of unopened backing file without "
-                    "explicit backing format, use of this image requires "
-                    "potentially unsafe format probing");
+        error_setg(&local_err,
+                   "Backing file specified without backing format");
+        goto out;
     }
 
     if (size == -1) {
@@ -7314,3 +7879,76 @@ BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
 {
     return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
 }
+
+/**
+ * Check whether [offset, offset + bytes) overlaps with the cached
+ * block-status data region.
+ *
+ * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
+ * which is what bdrv_bsc_is_data()'s interface needs.
+ * Otherwise, *pnum is not touched.
+ */
+static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
+                                           int64_t offset, int64_t bytes,
+                                           int64_t *pnum)
+{
+    BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
+    bool overlaps;
+
+    overlaps =
+        qatomic_read(&bsc->valid) &&
+        ranges_overlap(offset, bytes, bsc->data_start,
+                       bsc->data_end - bsc->data_start);
+
+    if (overlaps && pnum) {
+        *pnum = bsc->data_end - offset;
+    }
+
+    return overlaps;
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
+{
+    RCU_READ_LOCK_GUARD();
+
+    return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes)
+{
+    RCU_READ_LOCK_GUARD();
+
+    if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
+        qatomic_set(&bs->block_status_cache->valid, false);
+    }
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+    BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
+    BdrvBlockStatusCache *old_bsc;
+
+    *new_bsc = (BdrvBlockStatusCache) {
+        .valid = true,
+        .data_start = offset,
+        .data_end = offset + bytes,
+    };
+
+    QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
+
+    old_bsc = qatomic_rcu_read(&bs->block_status_cache);
+    qatomic_rcu_set(&bs->block_status_cache, new_bsc);
+    if (old_bsc) {
+        g_free_rcu(old_bsc, rcu);
+    }
+}
diff --git a/block/aio_task.c b/block/aio_task.c
index 88989fa248c..9bd17ea2c13 100644
--- a/block/aio_task.c
+++ b/block/aio_task.c
@@ -98,6 +98,8 @@ AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks)
 {
     AioTaskPool *pool = g_new0(AioTaskPool, 1);
 
+    assert(max_busy_tasks > 0);
+
     pool->main_co = qemu_coroutine_self();
     pool->max_busy_tasks = max_busy_tasks;
 
diff --git a/block/backup-top.c b/block/backup-top.c
deleted file mode 100644
index 589e8b651d2..00000000000
--- a/block/backup-top.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * backup-top filter driver
- *
- * The driver performs Copy-Before-Write (CBW) operation: it is injected above
- * some node, and before each write it copies _old_ data to the target node.
- *
- * Copyright (c) 2018-2019 Virtuozzo International GmbH.
- *
- * Author:
- *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "sysemu/block-backend.h"
-#include "qemu/cutils.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "block/qdict.h"
-#include "block/block-copy.h"
-
-#include "block/backup-top.h"
-
-typedef struct BDRVBackupTopState {
-    BlockCopyState *bcs;
-    BdrvChild *target;
-    bool active;
-    int64_t cluster_size;
-} BDRVBackupTopState;
-
-static coroutine_fn int backup_top_co_preadv(
-        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, int flags)
-{
-    BDRVBackupTopState *s = bs->opaque;
-
-    if (!s->active) {
-        return -EIO;
-    }
-
-    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
-}
-
-static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, BdrvRequestFlags flags)
-{
-    BDRVBackupTopState *s = bs->opaque;
-    uint64_t off, end;
-
-    if (!s->active) {
-        return -EIO;
-    }
-
-    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
-        return 0;
-    }
-
-    off = QEMU_ALIGN_DOWN(offset, s->cluster_size);
-    end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size);
-
-    return block_copy(s->bcs, off, end - off, true);
-}
-
-static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int bytes)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pdiscard(bs->backing, offset, bytes);
-}
-
-static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int bytes, BdrvRequestFlags flags)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
-}
-
-static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs,
-                                              uint64_t offset,
-                                              uint64_t bytes,
-                                              QEMUIOVector *qiov, int flags)
-{
-    int ret = backup_top_cbw(bs, offset, bytes, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn backup_top_co_flush(BlockDriverState *bs)
-{
-    if (!bs->backing) {
-        return 0;
-    }
-
-    return bdrv_co_flush(bs->backing->bs);
-}
-
-static void backup_top_refresh_filename(BlockDriverState *bs)
-{
-    if (bs->backing == NULL) {
-        /*
-         * we can be here after failed bdrv_attach_child in
-         * bdrv_set_backing_hd
-         */
-        return;
-    }
-    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-            bs->backing->bs->filename);
-}
-
-static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
-                                  BdrvChildRole role,
-                                  BlockReopenQueue *reopen_queue,
-                                  uint64_t perm, uint64_t shared,
-                                  uint64_t *nperm, uint64_t *nshared)
-{
-    BDRVBackupTopState *s = bs->opaque;
-
-    if (!s->active) {
-        /*
-         * The filter node may be in process of bdrv_append(), which firstly do
-         * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
-         * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
-         * let's require nothing during bdrv_append() and refresh permissions
-         * after it (see bdrv_backup_top_append()).
-         */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-        return;
-    }
-
-    if (!(role & BDRV_CHILD_FILTERED)) {
-        /*
-         * Target child
-         *
-         * Share write to target (child_file), to not interfere
-         * with guest writes to its disk which may be in target backing chain.
-         * Can't resize during a backup block job because we check the size
-         * only upfront.
-         */
-        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
-        *nperm = BLK_PERM_WRITE;
-    } else {
-        /* Source child */
-        bdrv_default_perms(bs, c, role, reopen_queue,
-                           perm, shared, nperm, nshared);
-
-        if (perm & BLK_PERM_WRITE) {
-            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-        }
-        *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
-    }
-}
-
-BlockDriver bdrv_backup_top_filter = {
-    .format_name = "backup-top",
-    .instance_size = sizeof(BDRVBackupTopState),
-
-    .bdrv_co_preadv             = backup_top_co_preadv,
-    .bdrv_co_pwritev            = backup_top_co_pwritev,
-    .bdrv_co_pwrite_zeroes      = backup_top_co_pwrite_zeroes,
-    .bdrv_co_pdiscard           = backup_top_co_pdiscard,
-    .bdrv_co_flush              = backup_top_co_flush,
-
-    .bdrv_refresh_filename      = backup_top_refresh_filename,
-
-    .bdrv_child_perm            = backup_top_child_perm,
-
-    .is_filter = true,
-};
-
-BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
-                                         BlockDriverState *target,
-                                         const char *filter_node_name,
-                                         uint64_t cluster_size,
-                                         BackupPerf *perf,
-                                         BdrvRequestFlags write_flags,
-                                         BlockCopyState **bcs,
-                                         Error **errp)
-{
-    ERRP_GUARD();
-    int ret;
-    BDRVBackupTopState *state;
-    BlockDriverState *top;
-    bool appended = false;
-
-    assert(source->total_sectors == target->total_sectors);
-
-    top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name,
-                               BDRV_O_RDWR, errp);
-    if (!top) {
-        return NULL;
-    }
-
-    state = top->opaque;
-    top->total_sectors = source->total_sectors;
-    top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-            (BDRV_REQ_FUA & source->supported_write_flags);
-    top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
-            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
-             source->supported_zero_flags);
-
-    bdrv_ref(target);
-    state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
-                                      BDRV_CHILD_DATA, errp);
-    if (!state->target) {
-        bdrv_unref(target);
-        bdrv_unref(top);
-        return NULL;
-    }
-
-    bdrv_drained_begin(source);
-
-    bdrv_ref(top);
-    ret = bdrv_append(top, source, errp);
-    if (ret < 0) {
-        error_prepend(errp, "Cannot append backup-top filter: ");
-        goto fail;
-    }
-    appended = true;
-
-    /*
-     * bdrv_append() finished successfully, now we can require permissions
-     * we want.
-     */
-    state->active = true;
-    ret = bdrv_child_refresh_perms(top, top->backing, errp);
-    if (ret < 0) {
-        error_prepend(errp, "Cannot set permissions for backup-top filter: ");
-        goto fail;
-    }
-
-    state->cluster_size = cluster_size;
-    state->bcs = block_copy_state_new(top->backing, state->target,
-                                      cluster_size, perf->use_copy_range,
-                                      write_flags, errp);
-    if (!state->bcs) {
-        error_prepend(errp, "Cannot create block-copy-state: ");
-        goto fail;
-    }
-    *bcs = state->bcs;
-
-    bdrv_drained_end(source);
-
-    return top;
-
-fail:
-    if (appended) {
-        state->active = false;
-        bdrv_backup_top_drop(top);
-    } else {
-        bdrv_unref(top);
-    }
-
-    bdrv_drained_end(source);
-
-    return NULL;
-}
-
-void bdrv_backup_top_drop(BlockDriverState *bs)
-{
-    BDRVBackupTopState *s = bs->opaque;
-
-    bdrv_drained_begin(bs);
-
-    block_copy_state_free(s->bcs);
-
-    s->active = false;
-    bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
-    bdrv_replace_node(bs, bs->backing->bs, &error_abort);
-    bdrv_set_backing_hd(bs, NULL, &error_abort);
-
-    bdrv_drained_end(bs);
-
-    bdrv_unref(bs);
-}
diff --git a/block/backup-top.h b/block/backup-top.h
deleted file mode 100644
index b28b0031c45..00000000000
--- a/block/backup-top.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * backup-top filter driver
- *
- * The driver performs Copy-Before-Write (CBW) operation: it is injected above
- * some node, and before each write it copies _old_ data to the target node.
- *
- * Copyright (c) 2018-2019 Virtuozzo International GmbH.
- *
- * Author:
- *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef BACKUP_TOP_H
-#define BACKUP_TOP_H
-
-#include "block/block_int.h"
-#include "block/block-copy.h"
-
-BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
-                                         BlockDriverState *target,
-                                         const char *filter_node_name,
-                                         uint64_t cluster_size,
-                                         BackupPerf *perf,
-                                         BdrvRequestFlags write_flags,
-                                         BlockCopyState **bcs,
-                                         Error **errp);
-void bdrv_backup_top_drop(BlockDriverState *bs);
-
-#endif /* BACKUP_TOP_H */
diff --git a/block/backup.c b/block/backup.c
index 6cf2f974aa2..21d5983779e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -27,13 +27,11 @@
 #include "qemu/bitmap.h"
 #include "qemu/error-report.h"
 
-#include "block/backup-top.h"
-
-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+#include "block/copy-before-write.h"
 
 typedef struct BackupBlockJob {
     BlockJob common;
-    BlockDriverState *backup_top;
+    BlockDriverState *cbw;
     BlockDriverState *source_bs;
     BlockDriverState *target_bs;
 
@@ -104,7 +102,7 @@ static void backup_clean(Job *job)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
     block_job_remove_all_bdrv(&s->common);
-    bdrv_backup_top_drop(s->backup_top);
+    bdrv_cbw_drop(s->cbw);
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
@@ -235,18 +233,16 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
     BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
+        bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
         ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
                                                NULL, true);
         assert(ret);
-    } else {
-        if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-            /*
-             * We can't hog the coroutine to initialize this thoroughly.
-             * Set a flag and resume work when we are able to yield safely.
-             */
-            block_copy_set_skip_unallocated(job->bcs, true);
-        }
-        bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+        /*
+         * We can't hog the coroutine to initialize this thoroughly.
+         * Set a flag and resume work when we are able to yield safely.
+         */
+        block_copy_set_skip_unallocated(job->bcs, true);
     }
 
     estimate = bdrv_get_dirty_count(bcs_bitmap);
@@ -331,11 +327,12 @@ static void coroutine_fn backup_set_speed(BlockJob *job, int64_t speed)
     }
 }
 
-static void backup_cancel(Job *job)
+static bool backup_cancel(Job *job, bool force)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
 
     bdrv_cancel_in_flight(s->target_bs);
+    return true;
 }
 
 static const BlockJobDriver backup_job_driver = {
@@ -354,43 +351,6 @@ static const BlockJobDriver backup_job_driver = {
     .set_speed = backup_set_speed,
 };
 
-static int64_t backup_calculate_cluster_size(BlockDriverState *target,
-                                             Error **errp)
-{
-    int ret;
-    BlockDriverInfo bdi;
-    bool target_does_cow = bdrv_backing_chain_next(target);
-
-    /*
-     * If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible.
-     */
-    ret = bdrv_get_info(target, &bdi);
-    if (ret == -ENOTSUP && !target_does_cow) {
-        /* Cluster size is not defined */
-        warn_report("The target block device doesn't provide "
-                    "information about the block size and it doesn't have a "
-                    "backing file. The default block size of %u bytes is "
-                    "used. If the actual block size of the target exceeds "
-                    "this default, the backup may be unusable",
-                    BACKUP_CLUSTER_SIZE_DEFAULT);
-        return BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else if (ret < 0 && !target_does_cow) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        return ret;
-    } else if (ret < 0 && target_does_cow) {
-        /* Not fatal; just trudge on ahead. */
-        return BACKUP_CLUSTER_SIZE_DEFAULT;
-    }
-
-    return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-}
-
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, int64_t speed,
                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -407,8 +367,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     int64_t len, target_len;
     BackupBlockJob *job = NULL;
     int64_t cluster_size;
-    BdrvRequestFlags write_flags;
-    BlockDriverState *backup_top = NULL;
+    BlockDriverState *cbw = NULL;
     BlockCopyState *bcs = NULL;
 
     assert(bs);
@@ -449,13 +408,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         return NULL;
     }
 
-    cluster_size = backup_calculate_cluster_size(target, errp);
-    if (cluster_size < 0) {
-        goto error;
-    }
-
-    if (perf->max_workers < 1) {
-        error_setg(errp, "max-workers must be greater than zero");
+    if (perf->max_workers < 1 || perf->max_workers > INT_MAX) {
+        error_setg(errp, "max-workers must be between 1 and %d", INT_MAX);
         return NULL;
     }
 
@@ -465,13 +419,6 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         return NULL;
     }
 
-    if (perf->max_chunk && perf->max_chunk < cluster_size) {
-        error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
-                   "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
-        return NULL;
-    }
-
-
     if (sync_bitmap) {
         /* If we need to write to this bitmap, check that we can: */
         if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
@@ -504,39 +451,28 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
-    /*
-     * If source is in backing chain of target assume that target is going to be
-     * used for "image fleecing", i.e. it should represent a kind of snapshot of
-     * source at backup-start point in time. And target is going to be read by
-     * somebody (for example, used as NBD export) during backup job.
-     *
-     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
-     * intersection of backup writes and third party reads from target,
-     * otherwise reading from target we may occasionally read already updated by
-     * guest data.
-     *
-     * For more information see commit f8d59dfb40bb and test
-     * tests/qemu-iotests/222
-     */
-    write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
-                  (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
+    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
+    if (!cbw) {
+        goto error;
+    }
 
-    backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
-                                        cluster_size, perf,
-                                        write_flags, &bcs, errp);
-    if (!backup_top) {
+    cluster_size = block_copy_cluster_size(bcs);
+
+    if (perf->max_chunk && perf->max_chunk < cluster_size) {
+        error_setg(errp, "Required max-chunk (%" PRIi64 ") is less than backup "
+                   "cluster size (%" PRIi64 ")", perf->max_chunk, cluster_size);
         goto error;
     }
 
     /* job->len is fixed, so we can't allow resize */
-    job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
+    job = block_job_create(job_id, &backup_job_driver, txn, cbw,
                            0, BLK_PERM_ALL,
                            speed, creation_flags, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
-    job->backup_top = backup_top;
+    job->cbw = cbw;
     job->source_bs = bs;
     job->target_bs = target;
     job->on_source_error = on_source_error;
@@ -549,10 +485,11 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     job->len = len;
     job->perf = *perf;
 
+    block_copy_set_copy_opts(bcs, perf->use_copy_range, compress);
     block_copy_set_progress_meter(bcs, &job->common.job.progress);
     block_copy_set_speed(bcs, speed);
 
-    /* Required permissions are already taken by backup-top target */
+    /* Required permissions are taken by copy-before-write filter target */
     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
                        &error_abort);
 
@@ -562,8 +499,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
     }
-    if (backup_top) {
-        bdrv_backup_top_drop(backup_top);
+    if (cbw) {
+        bdrv_cbw_drop(cbw);
     }
 
     return NULL;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 2c0b9b0ee85..bbf29487030 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -38,25 +38,27 @@
 #include "qapi/qobject-input-visitor.h"
 #include "sysemu/qtest.h"
 
+/* All APIs are thread-safe */
+
 typedef struct BDRVBlkdebugState {
-    int state;
-    int new_state;
+    /* IN: initialized in blkdebug_open() and never changed */
     uint64_t align;
     uint64_t max_transfer;
     uint64_t opt_write_zero;
     uint64_t max_write_zero;
     uint64_t opt_discard;
     uint64_t max_discard;
-
+    char *config_file; /* For blkdebug_refresh_filename() */
+    /* initialized in blkdebug_parse_perms() */
     uint64_t take_child_perms;
     uint64_t unshare_child_perms;
 
-    /* For blkdebug_refresh_filename() */
-    char *config_file;
-
+    /* State. Protected by lock */
+    int state;
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
     QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
+    QemuMutex lock;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -65,8 +67,11 @@ typedef struct BlkdebugAIOCB {
 } BlkdebugAIOCB;
 
 typedef struct BlkdebugSuspendedReq {
+    /* IN: initialized in suspend_request() */
     Coroutine *co;
     char *tag;
+
+    /* List entry protected BDRVBlkdebugState's lock */
     QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
@@ -74,9 +79,11 @@ enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
     ACTION_SUSPEND,
+    ACTION__MAX,
 };
 
 typedef struct BlkdebugRule {
+    /* IN: initialized in add_rule() or blkdebug_debug_breakpoint() */
     BlkdebugEvent event;
     int action;
     int state;
@@ -95,6 +102,8 @@ typedef struct BlkdebugRule {
             char *tag;
         } suspend;
     } options;
+
+    /* List entries protected BDRVBlkdebugState's lock */
     QLIST_ENTRY(BlkdebugRule) next;
     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
 } BlkdebugRule;
@@ -244,11 +253,14 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
     };
 
     /* Add the rule */
+    qemu_mutex_lock(&s->lock);
     QLIST_INSERT_HEAD(&s->rules[event], rule, next);
+    qemu_mutex_unlock(&s->lock);
 
     return 0;
 }
 
+/* Called with lock held or from .bdrv_close */
 static void remove_rule(BlkdebugRule *rule)
 {
     switch (rule->action) {
@@ -467,6 +479,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     uint64_t align;
 
+    qemu_mutex_init(&s->lock);
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
         ret = -EINVAL;
@@ -567,6 +580,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     ret = 0;
 out:
     if (ret < 0) {
+        qemu_mutex_destroy(&s->lock);
         g_free(s->config_file);
     }
     qemu_opts_del(opts);
@@ -581,6 +595,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     int error;
     bool immediately;
 
+    qemu_mutex_lock(&s->lock);
     QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
         uint64_t inject_offset = rule->options.inject.offset;
 
@@ -594,6 +609,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     }
 
     if (!rule || !rule->options.inject.error) {
+        qemu_mutex_unlock(&s->lock);
         return 0;
     }
 
@@ -605,6 +621,7 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         remove_rule(rule);
     }
 
+    qemu_mutex_unlock(&s->lock);
     if (!immediately) {
         aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
         qemu_coroutine_yield();
@@ -614,8 +631,8 @@ static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                   QEMUIOVector *qiov, int flags)
+blkdebug_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                   QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int err;
 
@@ -635,8 +652,8 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+blkdebug_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int err;
 
@@ -667,7 +684,7 @@ static int blkdebug_co_flush(BlockDriverState *bs)
 }
 
 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
+                                                  int64_t offset, int64_t bytes,
                                                   BdrvRequestFlags flags)
 {
     uint32_t align = MAX(bs->bl.request_alignment,
@@ -700,7 +717,7 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     uint32_t align = bs->bl.pdiscard_alignment;
     int err;
@@ -770,78 +787,80 @@ static void blkdebug_close(BlockDriverState *bs)
     }
 
     g_free(s->config_file);
+    qemu_mutex_destroy(&s->lock);
 }
 
+/* Called with lock held.  */
 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq r;
+    BlkdebugSuspendedReq *r;
 
-    r = (BlkdebugSuspendedReq) {
-        .co         = qemu_coroutine_self(),
-        .tag        = g_strdup(rule->options.suspend.tag),
-    };
+    r = g_new(BlkdebugSuspendedReq, 1);
+
+    r->co         = qemu_coroutine_self();
+    r->tag        = g_strdup(rule->options.suspend.tag);
 
     remove_rule(rule);
-    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
+    QLIST_INSERT_HEAD(&s->suspended_reqs, r, next);
 
     if (!qtest_enabled()) {
-        printf("blkdebug: Suspended request '%s'\n", r.tag);
-    }
-    qemu_coroutine_yield();
-    if (!qtest_enabled()) {
-        printf("blkdebug: Resuming request '%s'\n", r.tag);
+        printf("blkdebug: Suspended request '%s'\n", r->tag);
     }
-
-    QLIST_REMOVE(&r, next);
-    g_free(r.tag);
 }
 
-static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    bool injected)
+/* Called with lock held.  */
+static void process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
+                         int *action_count, int *new_state)
 {
     BDRVBlkdebugState *s = bs->opaque;
 
     /* Only process rules for the current state */
     if (rule->state && rule->state != s->state) {
-        return injected;
+        return;
     }
 
     /* Take the action */
+    action_count[rule->action]++;
     switch (rule->action) {
     case ACTION_INJECT_ERROR:
-        if (!injected) {
+        if (action_count[ACTION_INJECT_ERROR] == 1) {
             QSIMPLEQ_INIT(&s->active_rules);
-            injected = true;
         }
         QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
         break;
 
     case ACTION_SET_STATE:
-        s->new_state = rule->options.set_state.new_state;
+        *new_state = rule->options.set_state.new_state;
         break;
 
     case ACTION_SUSPEND:
         suspend_request(bs, rule);
         break;
     }
-    return injected;
 }
 
 static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
     struct BlkdebugRule *rule, *next;
-    bool injected;
+    int new_state;
+    int actions_count[ACTION__MAX] = { 0 };
 
     assert((int)event >= 0 && event < BLKDBG__MAX);
 
-    injected = false;
-    s->new_state = s->state;
-    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
-        injected = process_rule(bs, rule, injected);
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        new_state = s->state;
+        QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
+            process_rule(bs, rule, actions_count, &new_state);
+        }
+        s->state = new_state;
+    }
+
+    while (actions_count[ACTION_SUSPEND] > 0) {
+        qemu_coroutine_yield();
+        actions_count[ACTION_SUSPEND]--;
     }
-    s->state = s->new_state;
 }
 
 static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
@@ -864,33 +883,64 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
         .options.suspend.tag = g_strdup(tag),
     };
 
+    qemu_mutex_lock(&s->lock);
     QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
+    qemu_mutex_unlock(&s->lock);
 
     return 0;
 }
 
-static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+/* Called with lock held. May temporarily release lock. */
+static int resume_req_by_tag(BDRVBlkdebugState *s, const char *tag, bool all)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
+    BlkdebugSuspendedReq *r;
 
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
+retry:
+    /*
+     * No need for _SAFE, since a different coroutine can remove another node
+     * (not the current one) in this list, and when the current one is removed
+     * the iteration starts back from beginning anyways.
+     */
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
         if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co);
+            Coroutine *co = r->co;
+
+            if (!qtest_enabled()) {
+                printf("blkdebug: Resuming request '%s'\n", r->tag);
+            }
+
+            QLIST_REMOVE(r, next);
+            g_free(r->tag);
+            g_free(r);
+
+            qemu_mutex_unlock(&s->lock);
+            qemu_coroutine_enter(co);
+            qemu_mutex_lock(&s->lock);
+
+            if (all) {
+                goto retry;
+            }
             return 0;
         }
     }
     return -ENOENT;
 }
 
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    QEMU_LOCK_GUARD(&s->lock);
+    return resume_req_by_tag(s, tag, false);
+}
+
 static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
                                             const char *tag)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
     BlkdebugRule *rule, *next;
     int i, ret = -ENOENT;
 
+    QEMU_LOCK_GUARD(&s->lock);
     for (i = 0; i < BLKDBG__MAX; i++) {
         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
             if (rule->action == ACTION_SUSPEND &&
@@ -900,11 +950,8 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
             }
         }
     }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co);
-            ret = 0;
-        }
+    if (resume_req_by_tag(s, tag, true) == 0) {
+        ret = 0;
     }
     return ret;
 }
@@ -914,6 +961,7 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
     BDRVBlkdebugState *s = bs->opaque;
     BlkdebugSuspendedReq *r;
 
+    QEMU_LOCK_GUARD(&s->lock);
     QLIST_FOREACH(r, &s->suspended_reqs, next) {
         if (!strcmp(r->tag, tag)) {
             return true;
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index b7579370a30..f7a251e91f9 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -301,8 +301,8 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 static int coroutine_fn
-blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                         QEMUIOVector *qiov, int flags)
+blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                         QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -460,16 +460,16 @@ blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
 }
 
 static int coroutine_fn
-blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                          QEMUIOVector *qiov, int flags)
+blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                          QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
                                  blk_log_writes_co_do_file_pwritev, 0, false);
 }
 
 static int coroutine_fn
-blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
-                                BdrvRequestFlags flags)
+blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int64_t bytes, BdrvRequestFlags flags)
 {
     return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
                                  blk_log_writes_co_do_file_pwrite_zeroes, 0,
@@ -484,9 +484,9 @@ static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
 }
 
 static int coroutine_fn
-blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
-    return blk_log_writes_co_log(bs, offset, count, NULL, 0,
+    return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
                                  blk_log_writes_co_do_file_pdiscard,
                                  LOG_DISCARD_FLAG, false);
 }
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 4a247752fd8..dcbe780ddbd 100644
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -72,7 +72,7 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
 }
 
 static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
@@ -83,7 +83,7 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
 }
 
 static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
@@ -94,7 +94,7 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
 }
 
 static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -105,7 +105,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+                                              int64_t offset, int64_t bytes)
 {
     uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file, offset, bytes);
diff --git a/block/blkverify.c b/block/blkverify.c
index 188d7632fae..d1facf5ba90 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -221,8 +221,8 @@ blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset,
 }
 
 static int coroutine_fn
-blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
+blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlkverifyRequest r;
     QEMUIOVector raw_qiov;
@@ -250,8 +250,8 @@ blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                     QEMUIOVector *qiov, int flags)
+blkverify_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                     QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlkverifyRequest r;
     return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true);
diff --git a/block/block-backend.c b/block/block-backend.c
index 413af51f3ba..12ef80ea170 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -14,11 +14,11 @@
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/coroutines.h"
 #include "block/throttle-groups.h"
 #include "hw/qdev-core.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/replay.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-block.h"
@@ -142,19 +142,18 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
 static char *blk_root_get_parent_desc(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
-    char *dev_id;
+    g_autofree char *dev_id = NULL;
 
     if (blk->name) {
-        return g_strdup(blk->name);
+        return g_strdup_printf("block device '%s'", blk->name);
     }
 
     dev_id = blk_get_attached_dev_id(blk);
     if (*dev_id) {
-        return dev_id;
+        return g_strdup_printf("block device '%s'", dev_id);
     } else {
         /* TODO Callback into the BB owner for something more detailed */
-        g_free(dev_id);
-        return g_strdup("a block device");
+        return g_strdup("an unnamed block device");
     }
 }
 
@@ -298,6 +297,13 @@ static void blk_root_detach(BdrvChild *child)
     }
 }
 
+static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
+{
+    BlockBackend *blk = c->opaque;
+
+    return blk_get_aio_context(blk);
+}
+
 static const BdrvChildClass child_root = {
     .inherit_options    = blk_root_inherit_options,
 
@@ -318,6 +324,8 @@ static const BdrvChildClass child_root = {
 
     .can_set_aio_ctx    = blk_root_can_set_aio_ctx,
     .set_aio_ctx        = blk_root_set_aio_ctx,
+
+    .get_parent_aio_context = blk_root_get_parent_aio_context,
 };
 
 /*
@@ -398,15 +406,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
     BlockBackend *blk;
     BlockDriverState *bs;
     uint64_t perm = 0;
+    uint64_t shared = BLK_PERM_ALL;
 
-    /* blk_new_open() is mainly used in .bdrv_create implementations and the
-     * tools where sharing isn't a concern because the BDS stays private, so we
-     * just request permission according to the flags.
+    /*
+     * blk_new_open() is mainly used in .bdrv_create implementations and the
+     * tools where sharing isn't a major concern because the BDS stays private
+     * and the file is generally not supposed to be used by a second process,
+     * so we just request permission according to the flags.
      *
      * The exceptions are xen_disk and blockdev_init(); in these cases, the
      * caller of blk_new_open() doesn't make use of the permissions, but they
      * shouldn't hurt either. We can still share everything here because the
-     * guest devices will add their own blockers if they can't share. */
+     * guest devices will add their own blockers if they can't share.
+     */
     if ((flags & BDRV_O_NO_IO) == 0) {
         perm |= BLK_PERM_CONSISTENT_READ;
         if (flags & BDRV_O_RDWR) {
@@ -416,8 +428,11 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
     if (flags & BDRV_O_RESIZE) {
         perm |= BLK_PERM_RESIZE;
     }
+    if (flags & BDRV_O_NO_SHARE) {
+        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
+    }
 
-    blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
+    blk = blk_new(qemu_get_aio_context(), perm, shared);
     bs = bdrv_open(filename, reference, options, flags, errp);
     if (!bs) {
         blk_unref(blk);
@@ -426,7 +441,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 
     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                        BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                                       blk->ctx, perm, BLK_PERM_ALL, blk, errp);
+                                       perm, shared, blk, errp);
     if (!blk->root) {
         blk_unref(blk);
         return NULL;
@@ -840,7 +855,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
     bdrv_ref(bs);
     blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                        BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-                                       blk->ctx, blk->perm, blk->shared_perm,
+                                       blk->perm, blk->shared_perm,
                                        blk, errp);
     if (blk->root == NULL) {
         return -EPERM;
@@ -855,6 +870,14 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
     return 0;
 }
 
+/*
+ * Change BlockDriverState associated with @blk.
+ */
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
+{
+    return bdrv_replace_child_bs(blk->root, new_bs, errp);
+}
+
 /*
  * Sets the permission bitmasks that the user of the BlockBackend needs.
  */
@@ -1139,11 +1162,11 @@ void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
 }
 
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
-                                  size_t size)
+                                  int64_t bytes)
 {
     int64_t len;
 
-    if (size > INT_MAX) {
+    if (bytes < 0) {
         return -EIO;
     }
 
@@ -1161,7 +1184,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
             return len;
         }
 
-        if (offset > len || len - offset < size) {
+        if (offset > len || len - offset < bytes) {
             return -EIO;
         }
     }
@@ -1182,9 +1205,9 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn
-blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
-              QEMUIOVector *qiov, BdrvRequestFlags flags)
+int coroutine_fn
+blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
@@ -1214,23 +1237,23 @@ blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
 }
 
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
+                               int64_t bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
 {
     int ret;
 
     blk_inc_in_flight(blk);
-    ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+    ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn
-blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
-                    QEMUIOVector *qiov, size_t qiov_offset,
-                    BdrvRequestFlags flags)
+int coroutine_fn
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, size_t qiov_offset,
+                       BdrvRequestFlags flags)
 {
     int ret;
     BlockDriverState *bs;
@@ -1264,26 +1287,40 @@ blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
 }
 
 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     unsigned int bytes,
+                                     int64_t bytes,
                                      QEMUIOVector *qiov, size_t qiov_offset,
                                      BdrvRequestFlags flags)
 {
     int ret;
 
     blk_inc_in_flight(blk);
-    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+    ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                unsigned int bytes, QEMUIOVector *qiov,
+                                int64_t bytes, QEMUIOVector *qiov,
                                 BdrvRequestFlags flags)
 {
     return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
 }
 
+static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset,
+                                         int64_t bytes,
+                                         QEMUIOVector *qiov, size_t qiov_offset,
+                                         BdrvRequestFlags flags)
+{
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+    blk_dec_in_flight(blk);
+
+    return ret;
+}
+
 typedef struct BlkRwCo {
     BlockBackend *blk;
     int64_t offset;
@@ -1292,58 +1329,11 @@ typedef struct BlkRwCo {
     BdrvRequestFlags flags;
 } BlkRwCo;
 
-static void blk_read_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
-
-    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
-                              qiov, rwco->flags);
-    aio_wait_kick();
-}
-
-static void blk_write_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
-
-    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
-                                    qiov, 0, rwco->flags);
-    aio_wait_kick();
-}
-
-static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
-                   int64_t bytes, CoroutineEntry co_entry,
-                   BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-    BlkRwCo rwco = {
-        .blk    = blk,
-        .offset = offset,
-        .iobuf  = &qiov,
-        .flags  = flags,
-        .ret    = NOT_DONE,
-    };
-
-    blk_inc_in_flight(blk);
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        co_entry(&rwco);
-    } else {
-        Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        bdrv_coroutine_enter(blk_bs(blk), co);
-        BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
-    }
-    blk_dec_in_flight(blk);
-
-    return rwco.ret;
-}
-
 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int bytes, BdrvRequestFlags flags)
+                      int64_t bytes, BdrvRequestFlags flags)
 {
-    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
-                   flags | BDRV_REQ_ZERO_WRITE);
+    return blk_pwritev_part(blk, offset, bytes, NULL, 0,
+                            flags | BDRV_REQ_ZERO_WRITE);
 }
 
 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
@@ -1390,7 +1380,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
 typedef struct BlkAioEmAIOCB {
     BlockAIOCB common;
     BlkRwCo rwco;
-    int bytes;
+    int64_t bytes;
     bool has_returned;
 } BlkAioEmAIOCB;
 
@@ -1422,7 +1412,8 @@ static void blk_aio_complete_bh(void *opaque)
     blk_aio_complete(acb);
 }
 
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
+                                int64_t bytes,
                                 void *iobuf, CoroutineEntry co_entry,
                                 BdrvRequestFlags flags,
                                 BlockCompletionFunc *cb, void *opaque)
@@ -1461,8 +1452,8 @@ static void blk_aio_read_entry(void *opaque)
     QEMUIOVector *qiov = rwco->iobuf;
 
     assert(qiov->size == acb->bytes);
-    rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
-                              qiov, rwco->flags);
+    rwco->ret = blk_co_do_preadv(rwco->blk, rwco->offset, acb->bytes,
+                                 qiov, rwco->flags);
     blk_aio_complete(acb);
 }
 
@@ -1473,37 +1464,40 @@ static void blk_aio_write_entry(void *opaque)
     QEMUIOVector *qiov = rwco->iobuf;
 
     assert(!qiov || qiov->size == acb->bytes);
-    rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
-                                    qiov, 0, rwco->flags);
+    rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+                                       qiov, 0, rwco->flags);
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                  int count, BdrvRequestFlags flags,
+                                  int64_t bytes, BdrvRequestFlags flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
                         flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
 }
 
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
 {
-    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return count;
+    int ret;
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
+    blk_dec_in_flight(blk);
+
+    return ret < 0 ? ret : bytes;
 }
 
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
                BdrvRequestFlags flags)
 {
-    int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
-                      flags);
-    if (ret < 0) {
-        return ret;
-    }
-    return count;
+    int ret;
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+
+    ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
+
+    return ret < 0 ? ret : bytes;
 }
 
 int64_t blk_getlength(BlockBackend *blk)
@@ -1537,6 +1531,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
                            QEMUIOVector *qiov, BdrvRequestFlags flags,
                            BlockCompletionFunc *cb, void *opaque)
 {
+    assert((uint64_t)qiov->size <= INT64_MAX);
     return blk_aio_prwv(blk, offset, qiov->size, qiov,
                         blk_aio_read_entry, flags, cb, opaque);
 }
@@ -1545,6 +1540,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                             QEMUIOVector *qiov, BdrvRequestFlags flags,
                             BlockCompletionFunc *cb, void *opaque)
 {
+    assert((uint64_t)qiov->size <= INT64_MAX);
     return blk_aio_prwv(blk, offset, qiov->size, qiov,
                         blk_aio_write_entry, flags, cb, opaque);
 }
@@ -1560,8 +1556,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn
-blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+int coroutine_fn
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
     blk_wait_while_drained(blk);
 
@@ -1572,18 +1568,15 @@ blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
     return bdrv_co_ioctl(blk_bs(blk), req, buf);
 }
 
-static void blk_ioctl_entry(void *opaque)
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
+    int ret;
 
-    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
-    aio_wait_kick();
-}
+    blk_inc_in_flight(blk);
+    ret = blk_do_ioctl(blk, req, buf);
+    blk_dec_in_flight(blk);
 
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
-{
-    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+    return ret;
 }
 
 static void blk_aio_ioctl_entry(void *opaque)
@@ -1591,7 +1584,7 @@ static void blk_aio_ioctl_entry(void *opaque)
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+    rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
 
     blk_aio_complete(acb);
 }
@@ -1603,8 +1596,8 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn
-blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int coroutine_fn
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
     int ret;
 
@@ -1623,45 +1616,43 @@ static void blk_aio_pdiscard_entry(void *opaque)
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
     blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int bytes,
+                             int64_t offset, int64_t bytes,
                              BlockCompletionFunc *cb, void *opaque)
 {
     return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
                         cb, opaque);
 }
 
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes)
 {
     int ret;
 
     blk_inc_in_flight(blk);
-    ret = blk_do_pdiscard(blk, offset, bytes);
+    ret = blk_co_do_pdiscard(blk, offset, bytes);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
-static void blk_pdiscard_entry(void *opaque)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
 {
-    BlkRwCo *rwco = opaque;
-    QEMUIOVector *qiov = rwco->iobuf;
+    int ret;
 
-    rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
-    aio_wait_kick();
-}
+    blk_inc_in_flight(blk);
+    ret = blk_do_pdiscard(blk, offset, bytes);
+    blk_dec_in_flight(blk);
 
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
-    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+    return ret;
 }
 
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
-static int coroutine_fn blk_do_flush(BlockBackend *blk)
+int coroutine_fn blk_co_do_flush(BlockBackend *blk)
 {
     blk_wait_while_drained(blk);
 
@@ -1677,7 +1668,7 @@ static void blk_aio_flush_entry(void *opaque)
     BlkAioEmAIOCB *acb = opaque;
     BlkRwCo *rwco = &acb->rwco;
 
-    rwco->ret = blk_do_flush(rwco->blk);
+    rwco->ret = blk_co_do_flush(rwco->blk);
     blk_aio_complete(acb);
 }
 
@@ -1692,22 +1683,21 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
     int ret;
 
     blk_inc_in_flight(blk);
-    ret = blk_do_flush(blk);
+    ret = blk_co_do_flush(blk);
     blk_dec_in_flight(blk);
 
     return ret;
 }
 
-static void blk_flush_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-    rwco->ret = blk_do_flush(rwco->blk);
-    aio_wait_kick();
-}
-
 int blk_flush(BlockBackend *blk)
 {
-    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
+    int ret;
+
+    blk_inc_in_flight(blk);
+    ret = blk_do_flush(blk);
+    blk_dec_in_flight(blk);
+
+    return ret;
 }
 
 void blk_drain(BlockBackend *blk)
@@ -1837,7 +1827,7 @@ bool blk_supports_write_perm(BlockBackend *blk)
     if (bs) {
         return !bdrv_is_read_only(bs);
     } else {
-        return !blk->root_state.read_only;
+        return blk->root_state.open_flags & BDRV_O_RDWR;
     }
 }
 
@@ -1939,16 +1929,35 @@ uint32_t blk_get_request_alignment(BlockBackend *blk)
     return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
 }
 
+/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    uint64_t max = INT_MAX;
+
+    if (bs) {
+        max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
+        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
+    }
+    return ROUND_DOWN(max, blk_get_request_alignment(blk));
+}
+
 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
 uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
     BlockDriverState *bs = blk_bs(blk);
-    uint32_t max = 0;
+    uint32_t max = INT_MAX;
 
     if (bs) {
-        max = bs->bl.max_transfer;
+        max = MIN_NON_ZERO(max, bs->bl.max_transfer);
     }
-    return MIN_NON_ZERO(max, INT_MAX);
+    return ROUND_DOWN(max, blk_get_request_alignment(blk));
+}
+
+int blk_get_max_hw_iov(BlockBackend *blk)
+{
+    return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
+                        blk->root->bs->bl.max_iov);
 }
 
 int blk_get_max_iov(BlockBackend *blk)
@@ -2173,17 +2182,18 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
 }
 
 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int bytes, BdrvRequestFlags flags)
+                                      int64_t bytes, BdrvRequestFlags flags)
 {
     return blk_co_pwritev(blk, offset, bytes, NULL,
                           flags | BDRV_REQ_ZERO_WRITE);
 }
 
 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
-                          int count)
+                          int64_t bytes)
 {
-    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
-                   BDRV_REQ_WRITE_COMPRESSED);
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+    return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
+                            BDRV_REQ_WRITE_COMPRESSED);
 }
 
 int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
@@ -2254,7 +2264,6 @@ void blk_update_root_state(BlockBackend *blk)
     assert(blk->root);
 
     blk->root_state.open_flags    = blk->root->bs->open_flags;
-    blk->root_state.read_only     = blk->root->bs->read_only;
     blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
 }
 
@@ -2273,12 +2282,7 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
  */
 int blk_get_open_flags_from_root_state(BlockBackend *blk)
 {
-    int bs_flags;
-
-    bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
-    bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
-
-    return bs_flags;
+    return blk->root_state.open_flags;
 }
 
 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
@@ -2378,8 +2382,13 @@ static void blk_root_drained_begin(BdrvChild *child)
 static bool blk_root_drained_poll(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
+    bool busy = false;
     assert(blk->quiesce_counter);
-    return !!blk->in_flight;
+
+    if (blk->dev_ops && blk->dev_ops->drained_poll) {
+        busy = blk->dev_ops->drained_poll(blk->dev_opaque);
+    }
+    return busy || !!blk->in_flight;
 }
 
 static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
@@ -2412,7 +2421,7 @@ void blk_unregister_buf(BlockBackend *blk, void *host)
 
 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
                                    BlockBackend *blk_out, int64_t off_out,
-                                   int bytes, BdrvRequestFlags read_flags,
+                                   int64_t bytes, BdrvRequestFlags read_flags,
                                    BdrvRequestFlags write_flags)
 {
     int r;
diff --git a/block/block-copy.c b/block/block-copy.c
index 39ae481c8b4..ce116318b57 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -21,17 +21,27 @@
 #include "qemu/units.h"
 #include "qemu/coroutine.h"
 #include "block/aio_task.h"
+#include "qemu/error-report.h"
 
 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 #define BLOCK_COPY_MAX_WORKERS 64
 #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
+#define BLOCK_COPY_CLUSTER_SIZE_DEFAULT (1 << 16)
+
+typedef enum {
+    COPY_READ_WRITE_CLUSTER,
+    COPY_READ_WRITE,
+    COPY_WRITE_ZEROES,
+    COPY_RANGE_SMALL,
+    COPY_RANGE_FULL
+} BlockCopyMethod;
 
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
 typedef struct BlockCopyCallState {
-    /* IN parameters. Initialized in block_copy_async() and never changed. */
+    /* Fields initialized in block_copy_async() and never changed. */
     BlockCopyState *s;
     int64_t offset;
     int64_t bytes;
@@ -40,33 +50,60 @@ typedef struct BlockCopyCallState {
     bool ignore_ratelimit;
     BlockCopyAsyncCallbackFunc cb;
     void *cb_opaque;
-
     /* Coroutine where async block-copy is running */
     Coroutine *co;
 
+    /* Fields whose state changes throughout the execution */
+    bool finished; /* atomic */
+    QemuCoSleep sleep; /* TODO: protect API with a lock */
+    bool cancelled; /* atomic */
     /* To reference all call states from BlockCopyState */
     QLIST_ENTRY(BlockCopyCallState) list;
 
-    /* State */
-    int ret;
-    bool finished;
-    QemuCoSleepState *sleep_state;
-    bool cancelled;
-
-    /* OUT parameters */
+    /*
+     * Fields that report information about return values and erros.
+     * Protected by lock in BlockCopyState.
+     */
     bool error_is_read;
+    /*
+     * @ret is set concurrently by tasks under mutex. Only set once by first
+     * failed task (and untouched if no task failed).
+     * After finishing (call_state->finished is true), it is not modified
+     * anymore and may be safely read without mutex.
+     */
+    int ret;
 } BlockCopyCallState;
 
 typedef struct BlockCopyTask {
     AioTask task;
 
+    /*
+     * Fields initialized in block_copy_task_create()
+     * and never changed.
+     */
     BlockCopyState *s;
     BlockCopyCallState *call_state;
     int64_t offset;
+    /*
+     * @method can also be set again in the while loop of
+     * block_copy_dirty_clusters(), but it is never accessed concurrently
+     * because the only other function that reads it is
+     * block_copy_task_entry() and it is invoked afterwards in the same
+     * iteration.
+     */
+    BlockCopyMethod method;
+
+    /*
+     * Fields whose state changes throughout the execution
+     * Protected by lock in BlockCopyState.
+     */
+    CoQueue wait_queue; /* coroutines blocked on this task */
+    /*
+     * Only protect the case of parallel read while updating @bytes
+     * value in block_copy_task_shrink().
+     */
     int64_t bytes;
-    bool zeroes;
     QLIST_ENTRY(BlockCopyTask) list;
-    CoQueue wait_queue; /* coroutines blocked on this task */
 } BlockCopyTask;
 
 static int64_t task_end(BlockCopyTask *task)
@@ -82,17 +119,25 @@ typedef struct BlockCopyState {
      */
     BdrvChild *source;
     BdrvChild *target;
-    BdrvDirtyBitmap *copy_bitmap;
-    int64_t in_flight_bytes;
+
+    /*
+     * Fields initialized in block_copy_state_new()
+     * and never changed.
+     */
     int64_t cluster_size;
-    bool use_copy_range;
-    int64_t copy_size;
+    int64_t max_transfer;
     uint64_t len;
-    QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
-    QLIST_HEAD(, BlockCopyCallState) calls;
-
     BdrvRequestFlags write_flags;
 
+    /*
+     * Fields whose state changes throughout the execution
+     * Protected by lock.
+     */
+    CoMutex lock;
+    int64_t in_flight_bytes;
+    BlockCopyMethod method;
+    QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
+    QLIST_HEAD(, BlockCopyCallState) calls;
     /*
      * skip_unallocated:
      *
@@ -107,16 +152,15 @@ typedef struct BlockCopyState {
      * skip unallocated regions, clear them in the copy_bitmap, and invoke
      * block_copy_reset_unallocated() every time it does.
      */
-    bool skip_unallocated;
-
+    bool skip_unallocated; /* atomic */
+    /* State fields that use a thread-safe API */
+    BdrvDirtyBitmap *copy_bitmap;
     ProgressMeter *progress;
-
     SharedResource *mem;
-
-    uint64_t speed;
     RateLimit rate_limit;
 } BlockCopyState;
 
+/* Called with lock held */
 static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
                                             int64_t offset, int64_t bytes)
 {
@@ -134,6 +178,9 @@ static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
 /*
  * If there are no intersecting tasks return false. Otherwise, wait for the
  * first found intersecting tasks to finish and return true.
+ *
+ * Called with lock held. May temporary release the lock.
+ * Return value of 0 proves that lock was NOT released.
  */
 static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
                                              int64_t bytes)
@@ -144,22 +191,43 @@ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
         return false;
     }
 
-    qemu_co_queue_wait(&task->wait_queue, NULL);
+    qemu_co_queue_wait(&task->wait_queue, &s->lock);
 
     return true;
 }
 
+/* Called with lock held */
+static int64_t block_copy_chunk_size(BlockCopyState *s)
+{
+    switch (s->method) {
+    case COPY_READ_WRITE_CLUSTER:
+        return s->cluster_size;
+    case COPY_READ_WRITE:
+    case COPY_RANGE_SMALL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
+                   s->max_transfer);
+    case COPY_RANGE_FULL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+                   s->max_transfer);
+    default:
+        /* Cannot have COPY_WRITE_ZEROES here.  */
+        abort();
+    }
+}
+
 /*
  * Search for the first dirty area in offset/bytes range and create task at
  * the beginning of it.
  */
-static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
-                                             BlockCopyCallState *call_state,
-                                             int64_t offset, int64_t bytes)
+static coroutine_fn BlockCopyTask *
+block_copy_task_create(BlockCopyState *s, BlockCopyCallState *call_state,
+                       int64_t offset, int64_t bytes)
 {
     BlockCopyTask *task;
-    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
+    int64_t max_chunk;
 
+    QEMU_LOCK_GUARD(&s->lock);
+    max_chunk = MIN_NON_ZERO(block_copy_chunk_size(s), call_state->max_chunk);
     if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
                                            offset, offset + bytes,
                                            max_chunk, &offset, &bytes))
@@ -183,6 +251,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
         .call_state = call_state,
         .offset = offset,
         .bytes = bytes,
+        .method = s->method,
     };
     qemu_co_queue_init(&task->wait_queue);
     QLIST_INSERT_HEAD(&s->tasks, task, list);
@@ -200,6 +269,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
 static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
                                                 int64_t new_bytes)
 {
+    QEMU_LOCK_GUARD(&task->s->lock);
     if (new_bytes == task->bytes) {
         return;
     }
@@ -216,11 +286,17 @@ static void coroutine_fn block_copy_task_shrink(BlockCopyTask *task,
 
 static void coroutine_fn block_copy_task_end(BlockCopyTask *task, int ret)
 {
+    QEMU_LOCK_GUARD(&task->s->lock);
     task->s->in_flight_bytes -= task->bytes;
     if (ret < 0) {
         bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
     }
     QLIST_REMOVE(task, list);
+    if (task->s->progress) {
+        progress_set_remaining(task->s->progress,
+                               bdrv_get_dirty_count(task->s->copy_bitmap) +
+                               task->s->in_flight_bytes);
+    }
     qemu_co_queue_restart_all(&task->wait_queue);
 }
 
@@ -230,6 +306,7 @@ void block_copy_state_free(BlockCopyState *s)
         return;
     }
 
+    ratelimit_destroy(&s->rate_limit);
     bdrv_release_dirty_bitmap(s->copy_bitmap);
     shres_destroy(s->mem);
     g_free(s);
@@ -242,12 +319,82 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
                                      target->bs->bl.max_transfer));
 }
 
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+                              bool compress)
+{
+    /* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
+    s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
+        (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+
+    if (s->max_transfer < s->cluster_size) {
+        /*
+         * copy_range does not respect max_transfer. We don't want to bother
+         * with requests smaller than block-copy cluster size, so fallback to
+         * buffered copying (read and write respect max_transfer on their
+         * behalf).
+         */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else if (compress) {
+        /* Compression supports only cluster-size writes and no copy-range. */
+        s->method = COPY_READ_WRITE_CLUSTER;
+    } else {
+        /*
+         * If copy range enabled, start with COPY_RANGE_SMALL, until first
+         * successful copy_range (look at block_copy_do_copy).
+         */
+        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
+    }
+}
+
+static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 Error **errp)
+{
+    int ret;
+    BlockDriverInfo bdi;
+    bool target_does_cow = bdrv_backing_chain_next(target);
+
+    /*
+     * If there is no backing file on the target, we cannot rely on COW if our
+     * backup cluster size is smaller than the target cluster size. Even for
+     * targets with a backing file, try to avoid COW if possible.
+     */
+    ret = bdrv_get_info(target, &bdi);
+    if (ret == -ENOTSUP && !target_does_cow) {
+        /* Cluster size is not defined */
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    } else if (ret < 0 && !target_does_cow) {
+        error_setg_errno(errp, -ret,
+            "Couldn't determine the cluster size of the target image, "
+            "which has no backing file");
+        error_append_hint(errp,
+            "Aborting, since this may create an unusable destination image\n");
+        return ret;
+    } else if (ret < 0 && target_does_cow) {
+        /* Not fatal; just trudge on ahead. */
+        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+    }
+
+    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+}
+
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     int64_t cluster_size, bool use_copy_range,
-                                     BdrvRequestFlags write_flags, Error **errp)
+                                     Error **errp)
 {
     BlockCopyState *s;
+    int64_t cluster_size;
     BdrvDirtyBitmap *copy_bitmap;
+    bool is_fleecing;
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (cluster_size < 0) {
+        return NULL;
+    }
 
     copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
                                            errp);
@@ -256,6 +403,22 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
     }
     bdrv_disable_dirty_bitmap(copy_bitmap);
 
+    /*
+     * If source is in backing chain of target assume that target is going to be
+     * used for "image fleecing", i.e. it should represent a kind of snapshot of
+     * source at backup-start point in time. And target is going to be read by
+     * somebody (for example, used as NBD export) during backup job.
+     *
+     * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
+     * intersection of backup writes and third party reads from target,
+     * otherwise reading from target we may occasionally read already updated by
+     * guest data.
+     *
+     * For more information see commit f8d59dfb40bb and test
+     * tests/qemu-iotests/222
+     */
+    is_fleecing = bdrv_chain_contains(target->bs, source->bs);
+
     s = g_new(BlockCopyState, 1);
     *s = (BlockCopyState) {
         .source = source,
@@ -263,38 +426,24 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
         .copy_bitmap = copy_bitmap,
         .cluster_size = cluster_size,
         .len = bdrv_dirty_bitmap_size(copy_bitmap),
-        .write_flags = write_flags,
+        .write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
         .mem = shres_create(BLOCK_COPY_MAX_MEM),
+        .max_transfer = QEMU_ALIGN_DOWN(
+                                    block_copy_max_transfer(source, target),
+                                    cluster_size),
     };
 
-    if (block_copy_max_transfer(source, target) < cluster_size) {
-        /*
-         * copy_range does not respect max_transfer. We don't want to bother
-         * with requests smaller than block-copy cluster size, so fallback to
-         * buffered copying (read and write respect max_transfer on their
-         * behalf).
-         */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
-    } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
-        /* Compression supports only cluster-size writes and no copy-range. */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
-    } else {
-        /*
-         * We enable copy-range, but keep small copy_size, until first
-         * successful copy_range (look at block_copy_do_copy).
-         */
-        s->use_copy_range = use_copy_range;
-        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
-    }
+    block_copy_set_copy_opts(s, false, false);
 
+    ratelimit_init(&s->rate_limit);
+    qemu_co_mutex_init(&s->lock);
     QLIST_INIT(&s->tasks);
     QLIST_INIT(&s->calls);
 
     return s;
 }
 
+/* Only set before running the job, no need for locking. */
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
 {
     s->progress = pm;
@@ -340,11 +489,15 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
  *
  * No sync here: nor bitmap neighter intersecting requests handling, only copy.
  *
+ * @method is an in-out argument, so that copy_range can be either extended to
+ * a full-size buffer or disabled if the copy_range attempt fails.  The output
+ * value of @method should be used for subsequent tasks.
  * Returns 0 on success.
  */
 static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
                                            int64_t offset, int64_t bytes,
-                                           bool zeroes, bool *error_is_read)
+                                           BlockCopyMethod *method,
+                                           bool *error_is_read)
 {
     int ret;
     int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@@ -358,7 +511,8 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
            offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
     assert(nbytes < INT_MAX);
 
-    if (zeroes) {
+    switch (*method) {
+    case COPY_WRITE_ZEROES:
         ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
                                     ~BDRV_REQ_WRITE_COMPRESSED);
         if (ret < 0) {
@@ -366,84 +520,86 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
             *error_is_read = false;
         }
         return ret;
-    }
 
-    if (s->use_copy_range) {
+    case COPY_RANGE_SMALL:
+    case COPY_RANGE_FULL:
         ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
                                  0, s->write_flags);
+        if (ret >= 0) {
+            /* Successful copy-range, increase chunk size.  */
+            *method = COPY_RANGE_FULL;
+            return 0;
+        }
+
+        trace_block_copy_copy_range_fail(s, offset, ret);
+        *method = COPY_READ_WRITE;
+        /* Fall through to read+write with allocated buffer */
+
+    case COPY_READ_WRITE_CLUSTER:
+    case COPY_READ_WRITE:
+        /*
+         * In case of failed copy_range request above, we may proceed with
+         * buffered request larger than BLOCK_COPY_MAX_BUFFER.
+         * Still, further requests will be properly limited, so don't care too
+         * much. Moreover the most likely case (copy_range is unsupported for
+         * the configuration, so the very first copy_range request fails)
+         * is handled by setting large copy_size only after first successful
+         * copy_range.
+         */
+
+        bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
+
+        ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
         if (ret < 0) {
-            trace_block_copy_copy_range_fail(s, offset, ret);
-            s->use_copy_range = false;
-            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
-            /* Fallback to read+write with allocated buffer */
-        } else {
-            if (s->use_copy_range) {
-                /*
-                 * Successful copy-range. Now increase copy_size.  copy_range
-                 * does not respect max_transfer (it's a TODO), so we factor
-                 * that in here.
-                 *
-                 * Note: we double-check s->use_copy_range for the case when
-                 * parallel block-copy request unsets it during previous
-                 * bdrv_co_copy_range call.
-                 */
-                s->copy_size =
-                        MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
-                            QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
-                                                                    s->target),
-                                            s->cluster_size));
-            }
+            trace_block_copy_read_fail(s, offset, ret);
+            *error_is_read = true;
             goto out;
         }
-    }
 
-    /*
-     * In case of failed copy_range request above, we may proceed with buffered
-     * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
-     * be properly limited, so don't care too much. Moreover the most likely
-     * case (copy_range is unsupported for the configuration, so the very first
-     * copy_range request fails) is handled by setting large copy_size only
-     * after first successful copy_range.
-     */
+        ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
+                             s->write_flags);
+        if (ret < 0) {
+            trace_block_copy_write_fail(s, offset, ret);
+            *error_is_read = false;
+            goto out;
+        }
 
-    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
+    out:
+        qemu_vfree(bounce_buffer);
+        break;
 
-    ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
-    if (ret < 0) {
-        trace_block_copy_read_fail(s, offset, ret);
-        *error_is_read = true;
-        goto out;
+    default:
+        abort();
     }
 
-    ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
-                         s->write_flags);
-    if (ret < 0) {
-        trace_block_copy_write_fail(s, offset, ret);
-        *error_is_read = false;
-        goto out;
-    }
-
-out:
-    qemu_vfree(bounce_buffer);
-
     return ret;
 }
 
 static coroutine_fn int block_copy_task_entry(AioTask *task)
 {
     BlockCopyTask *t = container_of(task, BlockCopyTask, task);
+    BlockCopyState *s = t->s;
     bool error_is_read = false;
+    BlockCopyMethod method = t->method;
     int ret;
 
-    ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
-                             &error_is_read);
-    if (ret < 0 && !t->call_state->ret) {
-        t->call_state->ret = ret;
-        t->call_state->error_is_read = error_is_read;
-    } else {
-        progress_work_done(t->s->progress, t->bytes);
+    ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read);
+
+    WITH_QEMU_LOCK_GUARD(&s->lock) {
+        if (s->method == t->method) {
+            s->method = method;
+        }
+
+        if (ret < 0) {
+            if (!t->call_state->ret) {
+                t->call_state->ret = ret;
+                t->call_state->error_is_read = error_is_read;
+            }
+        } else if (s->progress) {
+            progress_work_done(s->progress, t->bytes);
+        }
     }
-    co_put_to_shres(t->s->mem, t->bytes);
+    co_put_to_shres(s->mem, t->bytes);
     block_copy_task_end(t, ret);
 
     return ret;
@@ -456,7 +612,7 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
     BlockDriverState *base;
     int ret;
 
-    if (s->skip_unallocated) {
+    if (qatomic_read(&s->skip_unallocated)) {
         base = bdrv_backing_chain_next(s->source->bs);
     } else {
         base = NULL;
@@ -543,10 +699,14 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
     bytes = clusters * s->cluster_size;
 
     if (!ret) {
+        qemu_co_mutex_lock(&s->lock);
         bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-        progress_set_remaining(s->progress,
-                               bdrv_get_dirty_count(s->copy_bitmap) +
-                               s->in_flight_bytes);
+        if (s->progress) {
+            progress_set_remaining(s->progress,
+                                   bdrv_get_dirty_count(s->copy_bitmap) +
+                                   s->in_flight_bytes);
+        }
+        qemu_co_mutex_unlock(&s->lock);
     }
 
     *count = bytes;
@@ -582,7 +742,8 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
     assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
 
-    while (bytes && aio_task_pool_status(aio) == 0 && !call_state->cancelled) {
+    while (bytes && aio_task_pool_status(aio) == 0 &&
+           !qatomic_read(&call_state->cancelled)) {
         BlockCopyTask *task;
         int64_t status_bytes;
 
@@ -604,34 +765,32 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
         if (status_bytes < task->bytes) {
             block_copy_task_shrink(task, status_bytes);
         }
-        if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
+        if (qatomic_read(&s->skip_unallocated) &&
+            !(ret & BDRV_BLOCK_ALLOCATED)) {
             block_copy_task_end(task, 0);
-            progress_set_remaining(s->progress,
-                                   bdrv_get_dirty_count(s->copy_bitmap) +
-                                   s->in_flight_bytes);
             trace_block_copy_skip_range(s, task->offset, task->bytes);
             offset = task_end(task);
             bytes = end - offset;
             g_free(task);
             continue;
         }
-        task->zeroes = ret & BDRV_BLOCK_ZERO;
-
-        if (s->speed) {
-            if (!call_state->ignore_ratelimit) {
-                uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
-                if (ns > 0) {
-                    block_copy_task_end(task, -EAGAIN);
-                    g_free(task);
-                    qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
-                                              &call_state->sleep_state);
-                    continue;
-                }
-            }
+        if (ret & BDRV_BLOCK_ZERO) {
+            task->method = COPY_WRITE_ZEROES;
+        }
 
-            ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+        if (!call_state->ignore_ratelimit) {
+            uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+            if (ns > 0) {
+                block_copy_task_end(task, -EAGAIN);
+                g_free(task);
+                qemu_co_sleep_ns_wakeable(&call_state->sleep,
+                                          QEMU_CLOCK_REALTIME, ns);
+                continue;
+            }
         }
 
+        ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+
         trace_block_copy_process(s, task->offset);
 
         co_get_from_shres(s->mem, task->bytes);
@@ -672,9 +831,7 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
 
 void block_copy_kick(BlockCopyCallState *call_state)
 {
-    if (call_state->sleep_state) {
-        qemu_co_sleep_wake(call_state->sleep_state);
-    }
+    qemu_co_sleep_wake(&call_state->sleep);
 }
 
 /*
@@ -689,15 +846,40 @@ void block_copy_kick(BlockCopyCallState *call_state)
 static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
 {
     int ret;
+    BlockCopyState *s = call_state->s;
 
-    QLIST_INSERT_HEAD(&call_state->s->calls, call_state, list);
+    qemu_co_mutex_lock(&s->lock);
+    QLIST_INSERT_HEAD(&s->calls, call_state, list);
+    qemu_co_mutex_unlock(&s->lock);
 
     do {
         ret = block_copy_dirty_clusters(call_state);
 
-        if (ret == 0 && !call_state->cancelled) {
-            ret = block_copy_wait_one(call_state->s, call_state->offset,
-                                      call_state->bytes);
+        if (ret == 0 && !qatomic_read(&call_state->cancelled)) {
+            WITH_QEMU_LOCK_GUARD(&s->lock) {
+                /*
+                 * Check that there is no task we still need to
+                 * wait to complete
+                 */
+                ret = block_copy_wait_one(s, call_state->offset,
+                                          call_state->bytes);
+                if (ret == 0) {
+                    /*
+                     * No pending tasks, but check again the bitmap in this
+                     * same critical section, since a task might have failed
+                     * between this and the critical section in
+                     * block_copy_dirty_clusters().
+                     *
+                     * block_copy_wait_one return value 0 also means that it
+                     * didn't release the lock. So, we are still in the same
+                     * critical section, not interrupted by any concurrent
+                     * access to state.
+                     */
+                    ret = bdrv_dirty_bitmap_next_dirty(s->copy_bitmap,
+                                                       call_state->offset,
+                                                       call_state->bytes) >= 0;
+                }
+            }
         }
 
         /*
@@ -709,15 +891,17 @@ static int coroutine_fn block_copy_common(BlockCopyCallState *call_state)
          * 2. We have waited for some intersecting block-copy request
          *    It may have failed and produced new dirty bits.
          */
-    } while (ret > 0 && !call_state->cancelled);
+    } while (ret > 0 && !qatomic_read(&call_state->cancelled));
 
-    call_state->finished = true;
+    qatomic_store_release(&call_state->finished, true);
 
     if (call_state->cb) {
         call_state->cb(call_state->cb_opaque);
     }
 
+    qemu_co_mutex_lock(&s->lock);
     QLIST_REMOVE(call_state, list);
+    qemu_co_mutex_unlock(&s->lock);
 
     return ret;
 }
@@ -772,44 +956,50 @@ void block_copy_call_free(BlockCopyCallState *call_state)
         return;
     }
 
-    assert(call_state->finished);
+    assert(qatomic_read(&call_state->finished));
     g_free(call_state);
 }
 
 bool block_copy_call_finished(BlockCopyCallState *call_state)
 {
-    return call_state->finished;
+    return qatomic_read(&call_state->finished);
 }
 
 bool block_copy_call_succeeded(BlockCopyCallState *call_state)
 {
-    return call_state->finished && !call_state->cancelled &&
-        call_state->ret == 0;
+    return qatomic_load_acquire(&call_state->finished) &&
+           !qatomic_read(&call_state->cancelled) &&
+           call_state->ret == 0;
 }
 
 bool block_copy_call_failed(BlockCopyCallState *call_state)
 {
-    return call_state->finished && !call_state->cancelled &&
-        call_state->ret < 0;
+    return qatomic_load_acquire(&call_state->finished) &&
+           !qatomic_read(&call_state->cancelled) &&
+           call_state->ret < 0;
 }
 
 bool block_copy_call_cancelled(BlockCopyCallState *call_state)
 {
-    return call_state->cancelled;
+    return qatomic_read(&call_state->cancelled);
 }
 
 int block_copy_call_status(BlockCopyCallState *call_state, bool *error_is_read)
 {
-    assert(call_state->finished);
+    assert(qatomic_load_acquire(&call_state->finished));
     if (error_is_read) {
         *error_is_read = call_state->error_is_read;
     }
     return call_state->ret;
 }
 
+/*
+ * Note that cancelling and finishing are racy.
+ * User can cancel a block-copy that is already finished.
+ */
 void block_copy_call_cancel(BlockCopyCallState *call_state)
 {
-    call_state->cancelled = true;
+    qatomic_set(&call_state->cancelled, true);
     block_copy_kick(call_state);
 }
 
@@ -818,17 +1008,19 @@ BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
     return s->copy_bitmap;
 }
 
+int64_t block_copy_cluster_size(BlockCopyState *s)
+{
+    return s->cluster_size;
+}
+
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
 {
-    s->skip_unallocated = skip;
+    qatomic_set(&s->skip_unallocated, skip);
 }
 
 void block_copy_set_speed(BlockCopyState *s, uint64_t speed)
 {
-    s->speed = speed;
-    if (speed > 0) {
-        ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
-    }
+    ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
 
     /*
      * Note: it's good to kick all call states from here, but it should be done
diff --git a/block/bochs.c b/block/bochs.c
index 2f010ab40a1..4d68658087b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -238,8 +238,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 }
 
 static int coroutine_fn
-bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+bochs_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVBochsState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
diff --git a/block/cloop.c b/block/cloop.c
index c99192a57f4..b8c6d0eccdb 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -245,8 +245,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
 }
 
 static int coroutine_fn
-cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVCloopState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
diff --git a/block/commit.c b/block/commit.c
index dd9ba87349e..10cc5ff4518 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -119,24 +119,24 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
     uint64_t delay_ns = 0;
     int ret = 0;
     int64_t n = 0; /* bytes */
-    void *buf = NULL;
+    QEMU_AUTO_VFREE void *buf = NULL;
     int64_t len, base_len;
 
-    ret = len = blk_getlength(s->top);
+    len = blk_getlength(s->top);
     if (len < 0) {
-        goto out;
+        return len;
     }
     job_progress_set_remaining(&s->common.job, len);
 
-    ret = base_len = blk_getlength(s->base);
+    base_len = blk_getlength(s->base);
     if (base_len < 0) {
-        goto out;
+        return base_len;
     }
 
     if (base_len < len) {
         ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
         if (ret) {
-            goto out;
+            return ret;
         }
     }
 
@@ -174,7 +174,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
                 block_job_error_action(&s->common, s->on_error,
                                        error_in_source, -ret);
             if (action == BLOCK_ERROR_ACTION_REPORT) {
-                goto out;
+                return ret;
             } else {
                 n = 0;
                 continue;
@@ -190,12 +190,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         }
     }
 
-    ret = 0;
-
-out:
-    qemu_vfree(buf);
-
-    return ret;
+    return 0;
 }
 
 static const BlockJobDriver commit_job_driver = {
@@ -212,7 +207,7 @@ static const BlockJobDriver commit_job_driver = {
 };
 
 static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
@@ -312,6 +307,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     commit_top_bs->total_sectors = top->total_sectors;
 
     ret = bdrv_append(commit_top_bs, top, errp);
+    bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
     if (ret < 0) {
         commit_top_bs = NULL;
         goto fail;
@@ -434,7 +430,7 @@ int bdrv_commit(BlockDriverState *bs)
     int ro;
     int64_t n;
     int ret = 0;
-    uint8_t *buf = NULL;
+    QEMU_AUTO_VFREE uint8_t *buf = NULL;
     Error *local_err = NULL;
 
     if (!drv)
@@ -452,7 +448,7 @@ int bdrv_commit(BlockDriverState *bs)
         return -EBUSY;
     }
 
-    ro = backing_file_bs->read_only;
+    ro = bdrv_is_read_only(backing_file_bs);
 
     if (ro) {
         if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
@@ -555,8 +551,6 @@ int bdrv_commit(BlockDriverState *bs)
 
     ret = 0;
 ro_cleanup:
-    qemu_vfree(buf);
-
     blk_unref(backing);
     if (bdrv_cow_bs(bs) != backing_file_bs) {
         bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
new file mode 100644
index 00000000000..c30a5ff8dea
--- /dev/null
+++ b/block/copy-before-write.c
@@ -0,0 +1,257 @@
+/*
+ * copy-before-write filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/qdict.h"
+#include "block/block-copy.h"
+
+#include "block/copy-before-write.h"
+
+typedef struct BDRVCopyBeforeWriteState {
+    BlockCopyState *bcs;
+    BdrvChild *target;
+} BDRVCopyBeforeWriteState;
+
+static coroutine_fn int cbw_co_preadv(
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, BdrvRequestFlags flags)
+{
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    uint64_t off, end;
+    int64_t cluster_size = block_copy_cluster_size(s->bcs);
+
+    if (flags & BDRV_REQ_WRITE_UNCHANGED) {
+        return 0;
+    }
+
+    off = QEMU_ALIGN_DOWN(offset, cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
+
+    return block_copy(s->bcs, off, end - off, true);
+}
+
+static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
+                                        int64_t offset, int64_t bytes)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pdiscard(bs->file, offset, bytes);
+}
+
+static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
+        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
+                                       int64_t offset,
+                                       int64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
+{
+    int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
+{
+    if (!bs->file) {
+        return 0;
+    }
+
+    return bdrv_co_flush(bs->file->bs);
+}
+
+static void cbw_refresh_filename(BlockDriverState *bs)
+{
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->file->bs->filename);
+}
+
+static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
+                           BdrvChildRole role,
+                           BlockReopenQueue *reopen_queue,
+                           uint64_t perm, uint64_t shared,
+                           uint64_t *nperm, uint64_t *nshared)
+{
+    if (!(role & BDRV_CHILD_FILTERED)) {
+        /*
+         * Target child
+         *
+         * Share write to target (child_file), to not interfere
+         * with guest writes to its disk which may be in target backing chain.
+         * Can't resize during a backup block job because we check the size
+         * only upfront.
+         */
+        *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
+        *nperm = BLK_PERM_WRITE;
+    } else {
+        /* Source child */
+        bdrv_default_perms(bs, c, role, reopen_queue,
+                           perm, shared, nperm, nshared);
+
+        if (!QLIST_EMPTY(&bs->parents)) {
+            if (perm & BLK_PERM_WRITE) {
+                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+            }
+            *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+        }
+    }
+}
+
+static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    BdrvDirtyBitmap *copy_bitmap;
+
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+                               BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
+                                BDRV_CHILD_DATA, false, errp);
+    if (!s->target) {
+        return -EINVAL;
+    }
+
+    bs->total_sectors = bs->file->bs->total_sectors;
+    bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+            (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+    bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+            ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+             bs->file->bs->supported_zero_flags);
+
+    s->bcs = block_copy_state_new(bs->file, s->target, errp);
+    if (!s->bcs) {
+        error_prepend(errp, "Cannot create block-copy-state: ");
+        return -EINVAL;
+    }
+
+    copy_bitmap = block_copy_dirty_bitmap(s->bcs);
+    bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap));
+
+    return 0;
+}
+
+static void cbw_close(BlockDriverState *bs)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+
+    block_copy_state_free(s->bcs);
+    s->bcs = NULL;
+}
+
+BlockDriver bdrv_cbw_filter = {
+    .format_name = "copy-before-write",
+    .instance_size = sizeof(BDRVCopyBeforeWriteState),
+
+    .bdrv_open                  = cbw_open,
+    .bdrv_close                 = cbw_close,
+
+    .bdrv_co_preadv             = cbw_co_preadv,
+    .bdrv_co_pwritev            = cbw_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = cbw_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = cbw_co_pdiscard,
+    .bdrv_co_flush              = cbw_co_flush,
+
+    .bdrv_refresh_filename      = cbw_refresh_filename,
+
+    .bdrv_child_perm            = cbw_child_perm,
+
+    .is_filter = true,
+};
+
+BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                  BlockDriverState *target,
+                                  const char *filter_node_name,
+                                  BlockCopyState **bcs,
+                                  Error **errp)
+{
+    ERRP_GUARD();
+    BDRVCopyBeforeWriteState *state;
+    BlockDriverState *top;
+    QDict *opts;
+
+    assert(source->total_sectors == target->total_sectors);
+
+    opts = qdict_new();
+    qdict_put_str(opts, "driver", "copy-before-write");
+    if (filter_node_name) {
+        qdict_put_str(opts, "node-name", filter_node_name);
+    }
+    qdict_put_str(opts, "file", bdrv_get_node_name(source));
+    qdict_put_str(opts, "target", bdrv_get_node_name(target));
+
+    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+    if (!top) {
+        return NULL;
+    }
+
+    state = top->opaque;
+    *bcs = state->bcs;
+
+    return top;
+}
+
+void bdrv_cbw_drop(BlockDriverState *bs)
+{
+    bdrv_drop_filter(bs, &error_abort);
+    bdrv_unref(bs);
+}
+
+static void cbw_init(void)
+{
+    bdrv_register(&bdrv_cbw_filter);
+}
+
+block_init(cbw_init);
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
new file mode 100644
index 00000000000..51847e711ac
--- /dev/null
+++ b/block/copy-before-write.h
@@ -0,0 +1,39 @@
+/*
+ * copy-before-write filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018-2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef COPY_BEFORE_WRITE_H
+#define COPY_BEFORE_WRITE_H
+
+#include "block/block_int.h"
+#include "block/block-copy.h"
+
+BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                  BlockDriverState *target,
+                                  const char *filter_node_name,
+                                  BlockCopyState **bcs,
+                                  Error **errp);
+void bdrv_cbw_drop(BlockDriverState *bs);
+
+#endif /* COPY_BEFORE_WRITE_H */
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 9cad9e1b8c2..1fc7fb3333b 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -29,7 +29,6 @@
 
 
 typedef struct BDRVStateCOR {
-    bool active;
     BlockDriverState *bottom_bs;
     bool chain_frozen;
 } BDRVStateCOR;
@@ -89,7 +88,6 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags,
          */
         bdrv_ref(bottom_bs);
     }
-    state->active = true;
     state->bottom_bs = bottom_bs;
 
     /*
@@ -112,17 +110,6 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
                            uint64_t perm, uint64_t shared,
                            uint64_t *nperm, uint64_t *nshared)
 {
-    BDRVStateCOR *s = bs->opaque;
-
-    if (!s->active) {
-        /*
-         * While the filter is being removed
-         */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-        return;
-    }
-
     *nperm = perm & PERM_PASSTHROUGH;
     *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -141,10 +128,10 @@ static int64_t cor_getlength(BlockDriverState *bs)
 
 
 static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
-                                           uint64_t offset, uint64_t bytes,
+                                           int64_t offset, int64_t bytes,
                                            QEMUIOVector *qiov,
                                            size_t qiov_offset,
-                                           int flags)
+                                           BdrvRequestFlags flags)
 {
     int64_t n;
     int local_flags;
@@ -194,10 +181,11 @@ static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
 
 
 static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
-                                            uint64_t offset,
-                                            uint64_t bytes,
+                                            int64_t offset,
+                                            int64_t bytes,
                                             QEMUIOVector *qiov,
-                                            size_t qiov_offset, int flags)
+                                            size_t qiov_offset,
+                                            BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags);
@@ -205,7 +193,7 @@ static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
 
 
 static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int bytes,
+                                             int64_t offset, int64_t bytes,
                                              BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -213,15 +201,15 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
 
 
 static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 
 static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
-                                                  uint64_t offset,
-                                                  uint64_t bytes,
+                                                  int64_t offset,
+                                                  int64_t bytes,
                                                   QEMUIOVector *qiov)
 {
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
@@ -280,32 +268,14 @@ static BlockDriver bdrv_copy_on_read = {
 
 void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
 {
-    BdrvChild *child;
-    BlockDriverState *bs;
     BDRVStateCOR *s = cor_filter_bs->opaque;
 
-    child = bdrv_filter_child(cor_filter_bs);
-    if (!child) {
-        return;
-    }
-    bs = child->bs;
-
-    /* Retain the BDS until we complete the graph change. */
-    bdrv_ref(bs);
-    /* Hold a guest back from writing while permissions are being reset. */
-    bdrv_drained_begin(bs);
-    /* Drop permissions before the graph change. */
-    s->active = false;
     /* unfreeze, as otherwise bdrv_replace_node() will fail */
     if (s->chain_frozen) {
         s->chain_frozen = false;
         bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs);
     }
-    bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort);
-    bdrv_replace_node(cor_filter_bs, bs, &error_abort);
-
-    bdrv_drained_end(bs);
-    bdrv_unref(bs);
+    bdrv_drop_filter(cor_filter_bs, &error_abort);
     bdrv_unref(cor_filter_bs);
 }
 
diff --git a/block/coroutines.h b/block/coroutines.h
index 4cfb4946e65..c8c14a29c83 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -27,6 +27,9 @@
 
 #include "block/block_int.h"
 
+/* For blk_bs() in generated block/block-gen.c */
+#include "sysemu/block-backend.h"
+
 int coroutine_fn bdrv_co_check(BlockDriverState *bs,
                                BdrvCheckResult *res, BdrvCheckMode fix);
 int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
@@ -66,4 +69,40 @@ int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
 int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
                                         QEMUIOVector *qiov, int64_t pos);
 
+int generated_co_wrapper
+nbd_do_establish_connection(BlockDriverState *bs, Error **errp);
+int coroutine_fn
+nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
+
+
+int generated_co_wrapper
+blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags);
+int coroutine_fn
+blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+
+int generated_co_wrapper
+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+                    QEMUIOVector *qiov, size_t qiov_offset,
+                    BdrvRequestFlags flags);
+int coroutine_fn
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, size_t qiov_offset,
+                       BdrvRequestFlags flags);
+
+int generated_co_wrapper
+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+int coroutine_fn
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int generated_co_wrapper
+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+int coroutine_fn
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+
+int generated_co_wrapper blk_do_flush(BlockBackend *blk);
+int coroutine_fn blk_co_do_flush(BlockBackend *blk);
+
 #endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/crypto.c b/block/crypto.c
index 1d30fde38e5..c8ba4681e20 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -397,8 +397,8 @@ static int block_crypto_reopen_prepare(BDRVReopenState *state,
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
 static coroutine_fn int
-block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                       QEMUIOVector *qiov, int flags)
+block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                       QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlockCrypto *crypto = bs->opaque;
     uint64_t cur_bytes; /* number of bytes in current iteration */
@@ -460,8 +460,8 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 
 
 static coroutine_fn int
-block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                        QEMUIOVector *qiov, int flags)
+block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                        QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BlockCrypto *crypto = bs->opaque;
     uint64_t cur_bytes; /* number of bytes in current iteration */
diff --git a/block/curl.c b/block/curl.c
index 50e741a0d7a..4a8ae2b2698 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -896,7 +896,8 @@ static void curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
 }
 
 static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags)
 {
     CURLAIOCB acb = {
         .co = qemu_coroutine_self(),
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 68d295d6e3e..0ef46163e3e 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -193,7 +193,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
         error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used",
                    bitmap->name);
         error_append_hint(errp, "Try block-dirty-bitmap-remove to delete"
-                          " this bitmap from disk");
+                          " this bitmap from disk\n");
         return -1;
     }
 
diff --git a/block/dmg.c b/block/dmg.c
index ef35a505f26..447901fbb87 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -689,8 +689,8 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 }
 
 static int coroutine_fn
-dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVDMGState *s = bs->opaque;
     uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
diff --git a/block/export/export.c b/block/export/export.c
index fec7d9f7382..6d3b9964c8d 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -111,6 +111,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
     if (export->has_iothread) {
         IOThread *iothread;
         AioContext *new_ctx;
+        Error **set_context_errp;
 
         iothread = iothread_by_id(export->iothread);
         if (!iothread) {
@@ -120,7 +121,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
 
         new_ctx = iothread_get_aio_context(iothread);
 
-        ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
+        /* Ignore errors with fixed-iothread=false */
+        set_context_errp = fixed_iothread ? errp : NULL;
+        ret = bdrv_try_set_aio_context(bs, new_ctx, set_context_errp);
         if (ret == 0) {
             aio_context_release(ctx);
             aio_context_acquire(new_ctx);
diff --git a/block/export/fuse.c b/block/export/fuse.c
index 208c3a0ff51..63f9ec9726a 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -35,6 +35,9 @@
 #include <linux/falloc.h>
 #endif
 
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
 
 /* Prevent overly long bounce buffer allocations */
 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
@@ -50,6 +53,12 @@ typedef struct FuseExport {
     char *mountpoint;
     bool writable;
     bool growable;
+    /* Whether allow_other was used as a mount option or not */
+    bool allow_other;
+
+    mode_t st_mode;
+    uid_t st_uid;
+    gid_t st_gid;
 } FuseExport;
 
 static GHashTable *exports;
@@ -61,7 +70,7 @@ static void fuse_export_delete(BlockExport *exp);
 static void init_exports_table(void);
 
 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
-                             Error **errp);
+                             bool allow_other, Error **errp);
 static void read_from_fuse_export(void *opaque);
 
 static bool is_regular_file(const char *path, Error **errp);
@@ -122,7 +131,29 @@ static int fuse_export_create(BlockExport *blk_exp,
     exp->writable = blk_exp_args->writable;
     exp->growable = args->growable;
 
-    ret = setup_fuse_export(exp, args->mountpoint, errp);
+    /* set default */
+    if (!args->has_allow_other) {
+        args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
+    }
+
+    exp->st_mode = S_IFREG | S_IRUSR;
+    if (exp->writable) {
+        exp->st_mode |= S_IWUSR;
+    }
+    exp->st_uid = getuid();
+    exp->st_gid = getgid();
+
+    if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
+        /* Ignore errors on our first attempt */
+        ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
+        exp->allow_other = ret == 0;
+        if (ret < 0) {
+            ret = setup_fuse_export(exp, args->mountpoint, false, errp);
+        }
+    } else {
+        exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
+        ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
+    }
     if (ret < 0) {
         goto fail;
     }
@@ -150,15 +181,20 @@ static void init_exports_table(void)
  * Create exp->fuse_session and mount it.
  */
 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
-                             Error **errp)
+                             bool allow_other, Error **errp)
 {
     const char *fuse_argv[4];
     char *mount_opts;
     struct fuse_args fuse_args;
     int ret;
 
-    /* Needs to match what fuse_init() sets.  Only max_read must be supplied. */
-    mount_opts = g_strdup_printf("max_read=%zu", FUSE_MAX_BOUNCE_BYTES);
+    /*
+     * max_read needs to match what fuse_init() sets.
+     * max_write need not be supplied.
+     */
+    mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
+                                 FUSE_MAX_BOUNCE_BYTES,
+                                 allow_other ? ",allow_other" : "");
 
     fuse_argv[0] = ""; /* Dummy program name */
     fuse_argv[1] = "-o";
@@ -320,7 +356,6 @@ static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
     int64_t length, allocated_blocks;
     time_t now = time(NULL);
     FuseExport *exp = fuse_req_userdata(req);
-    mode_t mode;
 
     length = blk_getlength(exp->common.blk);
     if (length < 0) {
@@ -335,17 +370,12 @@ static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
         allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
     }
 
-    mode = S_IFREG | S_IRUSR;
-    if (exp->writable) {
-        mode |= S_IWUSR;
-    }
-
     statbuf = (struct stat) {
         .st_ino     = inode,
-        .st_mode    = mode,
+        .st_mode    = exp->st_mode,
         .st_nlink   = 1,
-        .st_uid     = getuid(),
-        .st_gid     = getgid(),
+        .st_uid     = exp->st_uid,
+        .st_gid     = exp->st_gid,
         .st_size    = length,
         .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
         .st_blocks  = allocated_blocks,
@@ -391,28 +421,76 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
 }
 
 /**
- * Let clients set file attributes.  Only resizing is supported.
+ * Let clients set file attributes.  Only resizing and changing
+ * permissions (st_mode, st_uid, st_gid) is allowed.
+ * Changing permissions is only allowed as far as it will actually
+ * permit access: Read-only exports cannot be given +w, and exports
+ * without allow_other cannot be given a different UID or GID, and
+ * they cannot be given non-owner access.
  */
 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
                          int to_set, struct fuse_file_info *fi)
 {
     FuseExport *exp = fuse_req_userdata(req);
+    int supported_attrs;
     int ret;
 
-    if (!exp->writable) {
-        fuse_reply_err(req, EACCES);
-        return;
+    supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
+    if (exp->allow_other) {
+        supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
     }
 
-    if (to_set & ~FUSE_SET_ATTR_SIZE) {
+    if (to_set & ~supported_attrs) {
         fuse_reply_err(req, ENOTSUP);
         return;
     }
 
-    ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
-    if (ret < 0) {
-        fuse_reply_err(req, -ret);
-        return;
+    /* Do some argument checks first before committing to anything */
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /*
+         * Without allow_other, non-owners can never access the export, so do
+         * not allow setting permissions for them
+         */
+        if (!exp->allow_other &&
+            (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
+        {
+            fuse_reply_err(req, EPERM);
+            return;
+        }
+
+        /* +w for read-only exports makes no sense, disallow it */
+        if (!exp->writable &&
+            (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+        {
+            fuse_reply_err(req, EROFS);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_SIZE) {
+        if (!exp->writable) {
+            fuse_reply_err(req, EACCES);
+            return;
+        }
+
+        ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
+        if (ret < 0) {
+            fuse_reply_err(req, -ret);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /* Ignore FUSE-supplied file type, only change the mode */
+        exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
+    }
+
+    if (to_set & FUSE_SET_ATTR_UID) {
+        exp->st_uid = statbuf->st_uid;
+    }
+
+    if (to_set & FUSE_SET_ATTR_GID) {
+        exp->st_gid = statbuf->st_gid;
     }
 
     fuse_getattr(req, inode, fi);
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
index fa06996d37c..18625633366 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -70,9 +70,16 @@ static void vu_blk_req_complete(VuBlkReq *req)
 static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
                                  size_t size)
 {
-    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t nb_sectors;
     uint64_t total_sectors;
 
+    if (size % VIRTIO_BLK_SECTOR_SIZE) {
+        return false;
+    }
+
+    nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
+
+    QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
     if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
         return false;
     }
diff --git a/block/file-posix.c b/block/file-posix.c
index 20e14f8e96b..b283093e5b7 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -42,8 +42,11 @@
 #include "scsi/constants.h"
 
 #if defined(__APPLE__) && (__MACH__)
+#include <sys/ioctl.h>
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 #include <paths.h>
 #include <sys/param.h>
+#include <sys/mount.h>
 #include <IOKit/IOKitLib.h>
 #include <IOKit/IOBSD.h>
 #include <IOKit/storage/IOMediaBSDClient.h>
@@ -52,6 +55,7 @@
 //#include <IOKit/storage/IOCDTypes.h>
 #include <IOKit/storage/IODVDMedia.h>
 #include <CoreFoundation/CoreFoundation.h>
+#endif /* defined(HAVE_HOST_BLOCK_DEVICE) */
 #endif
 
 #ifdef __sun__
@@ -106,8 +110,6 @@
 #include <xfs/xfs.h>
 #endif
 
-#include "trace.h"
-
 /* OS X does not have O_DSYNC */
 #ifndef O_DSYNC
 #ifdef O_SYNC
@@ -148,6 +150,8 @@ typedef struct BDRVRawState {
     uint64_t locked_perm;
     uint64_t locked_shared_perm;
 
+    uint64_t aio_max_batch;
+
     int perm_change_fd;
     int perm_change_flags;
     BDRVReopenState *reopen_state;
@@ -160,9 +164,10 @@ typedef struct BDRVRawState {
     bool discard_zeroes:1;
     bool use_linux_aio:1;
     bool use_linux_io_uring:1;
-    bool page_cache_inconsistent:1;
+    int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
+    bool force_alignment;
     bool drop_cache;
     bool check_cache_dropped;
     struct {
@@ -175,13 +180,22 @@ typedef struct BDRVRawState {
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
-    int fd;
     int open_flags;
     bool drop_cache;
     bool check_cache_dropped;
 } BDRVRawReopenState;
 
-static int fd_open(BlockDriverState *bs);
+static int fd_open(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    /* this is just to ensure s->fd is sane (its called by io ops) */
+    if (s->fd >= 0) {
+        return 0;
+    }
+    return -EIO;
+}
+
 static int64_t raw_getlength(BlockDriverState *bs);
 
 typedef struct RawPosixAIOData {
@@ -338,6 +352,17 @@ static bool dio_byte_aligned(int fd)
     return false;
 }
 
+static bool raw_needs_alignment(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
+        return true;
+    }
+
+    return s->force_alignment;
+}
+
 /* Check if read is allowed with given memory buffer and length.
  *
  * This function is used to check O_DIRECT memory buffer and request alignment.
@@ -519,6 +544,11 @@ static QemuOptsList raw_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "host AIO implementation (threads, native, io_uring)",
         },
+        {
+            .name = "aio-max-batch",
+            .type = QEMU_OPT_NUMBER,
+            .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)",
+        },
         {
             .name = "locking",
             .type = QEMU_OPT_STRING,
@@ -598,6 +628,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
 #endif
 
+    s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0);
+
     locking = qapi_enum_parse(&OnOffAuto_lookup,
                               qemu_opt_get(opts, "locking"),
                               ON_OFF_AUTO_AUTO, &local_err);
@@ -708,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
 
     s->has_discard = true;
     s->has_write_zeroes = true;
-    if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) {
-        s->needs_alignment = true;
-    }
 
     if (fstat(s->fd, &st) < 0) {
         ret = -errno;
@@ -764,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
          * so QEMU makes sure all IO operations on the device are aligned
          * to sector size, or else FreeBSD will reject them with EINVAL.
          */
-        s->needs_alignment = true;
+        s->force_alignment = true;
     }
 #endif
+    s->needs_alignment = raw_needs_alignment(bs);
 
 #ifdef CONFIG_XFS
     if (platform_test_xfs_fd(s->fd)) {
@@ -1075,7 +1105,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     BDRVRawReopenState *rs;
     QemuOpts *opts;
     int ret;
-    Error *local_err = NULL;
 
     assert(state != NULL);
     assert(state->bs != NULL);
@@ -1101,32 +1130,18 @@ static int raw_reopen_prepare(BDRVReopenState *state,
      * bdrv_reopen_prepare() will detect changes and complain. */
     qemu_opts_to_qdict(opts, state->options);
 
-    rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
-                                   state->perm, true, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -1;
-        goto out;
-    }
-
-    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
-     * alignment with the new fd. */
-    if (rs->fd != -1) {
-        raw_probe_alignment(state->bs, rs->fd, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            ret = -EINVAL;
-            goto out_fd;
-        }
-    }
+    /*
+     * As part of reopen prepare we also want to create new fd by
+     * raw_reconfigure_getfd(). But it wants updated "perm", when in
+     * bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
+     * permission update. Happily, permission update is always a part (a seprate
+     * stage) of bdrv_reopen_multiple() so we can rely on this fact and
+     * reconfigure fd in raw_check_perm().
+     */
 
     s->reopen_state = state;
     ret = 0;
-out_fd:
-    if (ret < 0) {
-        qemu_close(rs->fd);
-        rs->fd = -1;
-    }
+
 out:
     qemu_opts_del(opts);
     return ret;
@@ -1140,10 +1155,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
     s->drop_cache = rs->drop_cache;
     s->check_cache_dropped = rs->check_cache_dropped;
     s->open_flags = rs->open_flags;
-
-    qemu_close(s->fd);
-    s->fd = rs->fd;
-
     g_free(state->opaque);
     state->opaque = NULL;
 
@@ -1162,10 +1173,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
         return;
     }
 
-    if (rs->fd >= 0) {
-        qemu_close(rs->fd);
-        rs->fd = -1;
-    }
     g_free(state->opaque);
     state->opaque = NULL;
 
@@ -1173,22 +1180,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
     s->reopen_state = NULL;
 }
 
-static int sg_get_max_transfer_length(int fd)
+static int hdev_get_max_hw_transfer(int fd, struct stat *st)
 {
 #ifdef BLKSECTGET
-    int max_bytes = 0;
-
-    if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
-        return max_bytes;
+    if (S_ISBLK(st->st_mode)) {
+        unsigned short max_sectors = 0;
+        if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+            return max_sectors * 512;
+        }
     } else {
-        return -errno;
+        int max_bytes = 0;
+        if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) {
+            return max_bytes;
+        }
     }
+    return -errno;
 #else
     return -ENOSYS;
 #endif
 }
 
-static int sg_get_max_segments(int fd)
+static int hdev_get_max_segments(int fd, struct stat *st)
 {
 #ifdef CONFIG_LINUX
     char buf[32];
@@ -1197,15 +1209,20 @@ static int sg_get_max_segments(int fd)
     int ret;
     int sysfd = -1;
     long max_segments;
-    struct stat st;
 
-    if (fstat(fd, &st)) {
-        ret = -errno;
-        goto out;
+    if (S_ISCHR(st->st_mode)) {
+        if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) {
+            return ret;
+        }
+        return -ENOTSUP;
+    }
+
+    if (!S_ISBLK(st->st_mode)) {
+        return -ENOTSUP;
     }
 
     sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
-                                major(st.st_rdev), minor(st.st_rdev));
+                                major(st->st_rdev), minor(st->st_rdev));
     sysfd = open(sysfspath, O_RDONLY);
     if (sysfd == -1) {
         ret = -errno;
@@ -1242,24 +1259,44 @@ static int sg_get_max_segments(int fd)
 static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVRawState *s = bs->opaque;
+    struct stat st;
+
+    s->needs_alignment = raw_needs_alignment(bs);
+    raw_probe_alignment(bs, s->fd, errp);
 
-    if (bs->sg) {
-        int ret = sg_get_max_transfer_length(s->fd);
+    bs->bl.min_mem_alignment = s->buf_align;
+    bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);
+
+    /*
+     * Maximum transfers are best effort, so it is okay to ignore any
+     * errors.  That said, based on the man page errors in fstat would be
+     * very much unexpected; the only possible case seems to be ENOMEM.
+     */
+    if (fstat(s->fd, &st)) {
+        return;
+    }
+
+#if defined(__APPLE__) && (__MACH__)
+    struct statfs buf;
+
+    if (!fstatfs(s->fd, &buf)) {
+        bs->bl.opt_transfer = buf.f_iosize;
+        bs->bl.pdiscard_alignment = buf.f_bsize;
+    }
+#endif
+
+    if (bs->sg || S_ISBLK(st.st_mode)) {
+        int ret = hdev_get_max_hw_transfer(s->fd, &st);
 
         if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
-            bs->bl.max_transfer = pow2floor(ret);
+            bs->bl.max_hw_transfer = ret;
         }
 
-        ret = sg_get_max_segments(s->fd);
+        ret = hdev_get_max_segments(s->fd, &st);
         if (ret > 0) {
-            bs->bl.max_transfer = MIN(bs->bl.max_transfer,
-                                      ret * qemu_real_host_page_size);
+            bs->bl.max_hw_iov = ret;
         }
     }
-
-    raw_probe_alignment(bs, s->fd, errp);
-    bs->bl.min_mem_alignment = s->buf_align;
-    bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size);
 }
 
 static int check_for_dasd(int fd)
@@ -1341,7 +1378,9 @@ static int handle_aiocb_ioctl(void *opaque)
     RawPosixAIOData *aiocb = opaque;
     int ret;
 
-    ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
+    do {
+        ret = ioctl(aiocb->aio_fildes, aiocb->ioctl.cmd, aiocb->ioctl.buf);
+    } while (ret == -1 && errno == EINTR);
     if (ret == -1) {
         return -errno;
     }
@@ -1357,11 +1396,13 @@ static int handle_aiocb_flush(void *opaque)
     int ret;
 
     if (s->page_cache_inconsistent) {
-        return -EIO;
+        return -s->page_cache_inconsistent;
     }
 
     ret = qemu_fdatasync(aiocb->aio_fildes);
     if (ret == -1) {
+        trace_file_flush_fdatasync_failed(errno);
+
         /* There is no clear definition of the semantics of a failing fsync(),
          * so we may have to assume the worst. The sad truth is that this
          * assumption is correct for Linux. Some pages are now probably marked
@@ -1376,7 +1417,7 @@ static int handle_aiocb_flush(void *opaque)
          * Obviously, this doesn't affect O_DIRECT, which bypasses the page
          * cache. */
         if ((s->open_flags & O_DIRECT) == 0) {
-            s->page_cache_inconsistent = true;
+            s->page_cache_inconsistent = errno;
         }
         return -errno;
     }
@@ -1581,6 +1622,7 @@ static int handle_aiocb_rw(void *opaque)
     }
 }
 
+#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD)
 static int translate_err(int err)
 {
     if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1589,6 +1631,7 @@ static int translate_err(int err)
     }
     return err;
 }
+#endif
 
 #ifdef CONFIG_FALLOCATE
 static int do_fallocate(int fd, int mode, off_t offset, off_t len)
@@ -1649,17 +1692,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
     if (s->has_write_zeroes) {
         int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
                                aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == -EINVAL) {
-            /*
-             * Allow falling back to pwrite for file systems that
-             * do not support fallocate() for an unaligned byte range.
-             */
-            return -ENOTSUP;
-        }
-        if (ret == 0 || ret != -ENOTSUP) {
+        if (ret == -ENOTSUP) {
+            s->has_write_zeroes = false;
+        } else if (ret == 0 || ret != -EINVAL) {
             return ret;
         }
-        s->has_write_zeroes = false;
+        /*
+         * Note: Some file systems do not like unaligned byte ranges, and
+         * return EINVAL in such a case, though they should not do it according
+         * to the man-page of fallocate(). Thus we simply ignore this return
+         * value and try the other fallbacks instead.
+         */
     }
 #endif
 
@@ -1674,6 +1717,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
                 return ret;
             }
             s->has_fallocate = false;
+        } else if (ret == -EINVAL) {
+            /*
+             * Some file systems like older versions of GPFS do not like un-
+             * aligned byte ranges, and return EINVAL in such a case, though
+             * they should not do it according to the man-page of fallocate().
+             * Warn about the bad filesystem and try the final fallback instead.
+             */
+            warn_report_once("Your file system is misbehaving: "
+                             "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. "
+                             "Please report this bug to your file system "
+                             "vendor.");
         } else if (ret != -ENOTSUP) {
             return ret;
         } else {
@@ -1774,7 +1828,7 @@ static int handle_aiocb_copy_range(void *opaque)
 static int handle_aiocb_discard(void *opaque)
 {
     RawPosixAIOData *aiocb = opaque;
-    int ret = -EOPNOTSUPP;
+    int ret = -ENOTSUP;
     BDRVRawState *s = aiocb->bs->opaque;
 
     if (!s->has_discard) {
@@ -1790,16 +1844,27 @@ static int handle_aiocb_discard(void *opaque)
             }
         } while (errno == EINTR);
 
-        ret = -errno;
+        ret = translate_err(-errno);
 #endif
     } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
         ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            aiocb->aio_offset, aiocb->aio_nbytes);
+        ret = translate_err(ret);
+#elif defined(__APPLE__) && (__MACH__)
+        fpunchhole_t fpunchhole;
+        fpunchhole.fp_flags = 0;
+        fpunchhole.reserved = 0;
+        fpunchhole.fp_offset = aiocb->aio_offset;
+        fpunchhole.fp_length = aiocb->aio_nbytes;
+        if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+            ret = errno == ENODEV ? -ENOTSUP : -errno;
+        } else {
+            ret = 0;
+        }
 #endif
     }
 
-    ret = translate_err(ret);
     if (ret == -ENOTSUP) {
         s->has_discard = false;
     }
@@ -2013,7 +2078,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
     } else if (s->use_linux_aio) {
         LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
         assert(qiov->size == bytes);
-        return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
+        return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
+                              s->aio_max_batch);
 #endif
     }
 
@@ -2033,16 +2099,16 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
     return raw_thread_pool_submit(bs, handle_aiocb_rw, &acb);
 }
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *qiov,
-                                      int flags)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *qiov,
+                                      BdrvRequestFlags flags)
 {
     return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
 }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     assert(flags == 0);
     return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
@@ -2071,7 +2137,7 @@ static void raw_aio_unplug(BlockDriverState *bs)
 #ifdef CONFIG_LINUX_AIO
     if (s->use_linux_aio) {
         LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
-        laio_io_unplug(bs, aio);
+        laio_io_unplug(bs, aio, s->aio_max_batch);
     }
 #endif
 #ifdef CONFIG_LINUX_IO_URING
@@ -2308,39 +2374,37 @@ static int64_t raw_getlength(BlockDriverState *bs)
 again:
 #endif
     if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
+        size = 0;
 #ifdef DIOCGMEDIASIZE
-        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#elif defined(DIOCGPART)
-        {
-                struct partinfo pi;
-                if (ioctl(fd, DIOCGPART, &pi) == 0)
-                        size = pi.media_size;
-                else
-                        size = 0;
+        if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) {
+            size = 0;
         }
-        if (size == 0)
 #endif
-#if defined(__APPLE__) && defined(__MACH__)
-        {
+#ifdef DIOCGPART
+        if (size == 0) {
+            struct partinfo pi;
+            if (ioctl(fd, DIOCGPART, &pi) == 0) {
+                size = pi.media_size;
+            }
+        }
+#endif
+#if defined(DKIOCGETBLOCKCOUNT) && defined(DKIOCGETBLOCKSIZE)
+        if (size == 0) {
             uint64_t sectors = 0;
             uint32_t sector_size = 0;
 
             if (ioctl(fd, DKIOCGETBLOCKCOUNT, &sectors) == 0
                && ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) == 0) {
                 size = sectors * sector_size;
-            } else {
-                size = lseek(fd, 0LL, SEEK_END);
-                if (size < 0) {
-                    return -errno;
-                }
             }
         }
-#else
-        size = lseek(fd, 0LL, SEEK_END);
+#endif
+        if (size == 0) {
+            size = lseek(fd, 0LL, SEEK_END);
+        }
         if (size < 0) {
             return -errno;
         }
-#endif
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
         switch(s->type) {
         case FTYPE_CD:
@@ -2702,7 +2766,8 @@ static int find_allocation(BlockDriverState *bs, off_t start,
  * the specified offset) that are known to be in the same
  * allocated/unallocated state.
  *
- * 'bytes' is the max value 'pnum' should be set to.
+ * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
+ * well exceed it.
  */
 static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
                                             bool want_zero,
@@ -2740,7 +2805,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     } else if (data == offset) {
         /* On a data extent, compute bytes to the end of the extent,
          * possibly including a partial sector at EOF. */
-        *pnum = MIN(bytes, hole - offset);
+        *pnum = hole - offset;
 
         /*
          * We are not allowed to return partial sectors, though, so
@@ -2759,7 +2824,7 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     } else {
         /* On a hole, compute bytes to the beginning of the next extent.  */
         assert(hole == offset);
-        *pnum = MIN(bytes, data - offset);
+        *pnum = data - offset;
         ret = BDRV_BLOCK_ZERO;
     }
     *map = offset;
@@ -2899,7 +2964,8 @@ static void raw_account_discard(BDRVRawState *s, uint64_t nbytes, int ret)
 }
 
 static coroutine_fn int
-raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
+raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                bool blkdev)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
@@ -2923,13 +2989,13 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, bool blkdev)
 }
 
 static coroutine_fn int
-raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     return raw_do_pdiscard(bs, offset, bytes, false);
 }
 
 static int coroutine_fn
-raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
+raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
                      BdrvRequestFlags flags, bool blkdev)
 {
     BDRVRawState *s = bs->opaque;
@@ -2997,7 +3063,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
 
 static int coroutine_fn raw_co_pwrite_zeroes(
     BlockDriverState *bs, int64_t offset,
-    int bytes, BdrvRequestFlags flags)
+    int64_t bytes, BdrvRequestFlags flags)
 {
     return raw_do_pwrite_zeroes(bs, offset, bytes, flags, false);
 }
@@ -3027,6 +3093,7 @@ static BlockStatsSpecific *raw_get_specific_stats(BlockDriverState *bs)
     return stats;
 }
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
 static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs)
 {
     BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
@@ -3036,6 +3103,7 @@ static BlockStatsSpecific *hdev_get_specific_stats(BlockDriverState *bs)
 
     return stats;
 }
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 
 static QemuOptsList raw_create_opts = {
     .name = "raw-create-opts",
@@ -3073,39 +3141,30 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
                           Error **errp)
 {
     BDRVRawState *s = bs->opaque;
-    BDRVRawReopenState *rs = NULL;
+    int input_flags = s->reopen_state ? s->reopen_state->flags : bs->open_flags;
     int open_flags;
     int ret;
 
-    if (s->perm_change_fd) {
+    /* We may need a new fd if auto-read-only switches the mode */
+    ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm,
+                                false, errp);
+    if (ret < 0) {
+        return ret;
+    } else if (ret != s->fd) {
+        Error *local_err = NULL;
+
         /*
-         * In the context of reopen, this function may be called several times
-         * (directly and recursively while change permissions of the parent).
-         * This is even true for children that don't inherit from the original
-         * reopen node, so s->reopen_state is not set.
-         *
-         * Ignore all but the first call.
+         * Fail already check_perm() if we can't get a working O_DIRECT
+         * alignment with the new fd.
          */
-        return 0;
-    }
-
-    if (s->reopen_state) {
-        /* We already have a new file descriptor to set permissions for */
-        assert(s->reopen_state->perm == perm);
-        assert(s->reopen_state->shared_perm == shared);
-        rs = s->reopen_state->opaque;
-        s->perm_change_fd = rs->fd;
-        s->perm_change_flags = rs->open_flags;
-    } else {
-        /* We may need a new fd if auto-read-only switches the mode */
-        ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
-                                    false, errp);
-        if (ret < 0) {
-            return ret;
-        } else if (ret != s->fd) {
-            s->perm_change_fd = ret;
-            s->perm_change_flags = open_flags;
+        raw_probe_alignment(bs, ret, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return -EINVAL;
         }
+
+        s->perm_change_fd = ret;
+        s->perm_change_flags = open_flags;
     }
 
     /* Prepare permissions on old fd to avoid conflicts between old and new,
@@ -3127,7 +3186,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
     return 0;
 
 fail:
-    if (s->perm_change_fd && !s->reopen_state) {
+    if (s->perm_change_fd) {
         qemu_close(s->perm_change_fd);
     }
     s->perm_change_fd = 0;
@@ -3158,7 +3217,7 @@ static void raw_abort_perm_update(BlockDriverState *bs)
 
     /* For reopen, .bdrv_reopen_abort is called afterwards and will close
      * the file descriptor. */
-    if (s->perm_change_fd && !s->reopen_state) {
+    if (s->perm_change_fd) {
         qemu_close(s->perm_change_fd);
     }
     s->perm_change_fd = 0;
@@ -3167,8 +3226,8 @@ static void raw_abort_perm_update(BlockDriverState *bs)
 }
 
 static int coroutine_fn raw_co_copy_range_from(
-        BlockDriverState *bs, BdrvChild *src, uint64_t src_offset,
-        BdrvChild *dst, uint64_t dst_offset, uint64_t bytes,
+        BlockDriverState *bs, BdrvChild *src, int64_t src_offset,
+        BdrvChild *dst, int64_t dst_offset, int64_t bytes,
         BdrvRequestFlags read_flags, BdrvRequestFlags write_flags)
 {
     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
@@ -3177,10 +3236,10 @@ static int coroutine_fn raw_co_copy_range_from(
 
 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
                                              BdrvChild *src,
-                                             uint64_t src_offset,
+                                             int64_t src_offset,
                                              BdrvChild *dst,
-                                             uint64_t dst_offset,
-                                             uint64_t bytes,
+                                             int64_t dst_offset,
+                                             int64_t bytes,
                                              BdrvRequestFlags read_flags,
                                              BdrvRequestFlags write_flags)
 {
@@ -3260,6 +3319,8 @@ BlockDriver bdrv_file = {
 /***********************************************/
 /* host device */
 
+#if defined(HAVE_HOST_BLOCK_DEVICE)
+
 #if defined(__APPLE__) && defined(__MACH__)
 static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
                                 CFIndex maxPathSize, int flags);
@@ -3552,18 +3613,8 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 }
 #endif /* linux */
 
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    /* this is just to ensure s->fd is sane (its called by io ops) */
-    if (s->fd >= 0)
-        return 0;
-    return -EIO;
-}
-
 static coroutine_fn int
-hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
     BDRVRawState *s = bs->opaque;
     int ret;
@@ -3577,7 +3628,7 @@ hdev_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 }
 
 static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     int rc;
 
@@ -3885,6 +3936,8 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
+#endif /* HAVE_HOST_BLOCK_DEVICE */
+
 static void bdrv_file_init(void)
 {
     /*
@@ -3892,6 +3945,7 @@ static void bdrv_file_init(void)
      * registered last will get probed first.
      */
     bdrv_register(&bdrv_file);
+#if defined(HAVE_HOST_BLOCK_DEVICE)
     bdrv_register(&bdrv_host_device);
 #ifdef __linux__
     bdrv_register(&bdrv_host_cdrom);
@@ -3899,6 +3953,7 @@ static void bdrv_file_init(void)
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
     bdrv_register(&bdrv_host_cdrom);
 #endif
+#endif /* HAVE_HOST_BLOCK_DEVICE */
 }
 
 block_init(bdrv_file_init);
diff --git a/block/file-win32.c b/block/file-win32.c
index 2642088bd6e..ec9d64d0e4e 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -58,6 +58,10 @@ typedef struct BDRVRawState {
     QEMUWin32AIOState *aio;
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+    HANDLE hfile;
+} BDRVRawReopenState;
+
 /*
  * Read/writes the data to/from a given linear buffer.
  *
@@ -392,7 +396,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
+                          FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
                           OPEN_EXISTING, overlapped, NULL);
     if (s->hfile == INVALID_HANDLE_VALUE) {
         int err = GetLastError();
@@ -436,8 +440,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
 }
 
 static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
-                                  uint64_t offset, uint64_t bytes,
-                                  QEMUIOVector *qiov, int flags,
+                                  int64_t offset, int64_t bytes,
+                                  QEMUIOVector *qiov, BdrvRequestFlags flags,
                                   BlockCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
@@ -451,8 +455,8 @@ static BlockAIOCB *raw_aio_preadv(BlockDriverState *bs,
 }
 
 static BlockAIOCB *raw_aio_pwritev(BlockDriverState *bs,
-                                   uint64_t offset, uint64_t bytes,
-                                   QEMUIOVector *qiov, int flags,
+                                   int64_t offset, int64_t bytes,
+                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
                                    BlockCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
@@ -634,6 +638,97 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
     return raw_co_create(&options, errp);
 }
 
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRawState *s = state->bs->opaque;
+    BDRVRawReopenState *rs;
+    int access_flags;
+    DWORD overlapped;
+    int ret = 0;
+
+    if (s->type != FTYPE_FILE) {
+        error_setg(errp, "Can only reopen files");
+        return -EINVAL;
+    }
+
+    rs = g_new0(BDRVRawReopenState, 1);
+
+    /*
+     * We do not support changing any options (only flags). By leaving
+     * all options in state->options, we tell the generic reopen code
+     * that we do not support changing any of them, so it will verify
+     * that their values did not change.
+     */
+
+    raw_parse_flags(state->flags, s->aio != NULL, &access_flags, &overlapped);
+    rs->hfile = CreateFile(state->bs->filename, access_flags,
+                           FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+                           OPEN_EXISTING, overlapped, NULL);
+
+    if (rs->hfile == INVALID_HANDLE_VALUE) {
+        int err = GetLastError();
+
+        error_setg_win32(errp, err, "Could not reopen '%s'",
+                         state->bs->filename);
+        if (err == ERROR_ACCESS_DENIED) {
+            ret = -EACCES;
+        } else {
+            ret = -EINVAL;
+        }
+        goto fail;
+    }
+
+    if (s->aio) {
+        ret = win32_aio_attach(s->aio, rs->hfile);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Could not enable AIO");
+            CloseHandle(rs->hfile);
+            goto fail;
+        }
+    }
+
+    state->opaque = rs;
+
+    return 0;
+
+fail:
+    g_free(rs);
+    state->opaque = NULL;
+
+    return ret;
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawState *s = state->bs->opaque;
+    BDRVRawReopenState *rs = state->opaque;
+
+    assert(rs != NULL);
+
+    CloseHandle(s->hfile);
+    s->hfile = rs->hfile;
+
+    g_free(rs);
+    state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    BDRVRawReopenState *rs = state->opaque;
+
+    if (!rs) {
+        return;
+    }
+
+    if (rs->hfile != INVALID_HANDLE_VALUE) {
+        CloseHandle(rs->hfile);
+    }
+
+    g_free(rs);
+    state->opaque = NULL;
+}
+
 static QemuOptsList raw_create_opts = {
     .name = "raw-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -659,6 +754,10 @@ BlockDriver bdrv_file = {
     .bdrv_co_create_opts = raw_co_create_opts,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
 
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit  = raw_reopen_commit,
+    .bdrv_reopen_abort   = raw_reopen_abort,
+
     .bdrv_aio_preadv    = raw_aio_preadv,
     .bdrv_aio_pwritev   = raw_aio_pwritev,
     .bdrv_aio_flush     = raw_aio_flush,
diff --git a/block/filter-compress.c b/block/filter-compress.c
index 5136371bf8b..d5be538619a 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -63,10 +63,10 @@ static int64_t compress_getlength(BlockDriverState *bs)
 
 
 static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
-                                                uint64_t offset, uint64_t bytes,
+                                                int64_t offset, int64_t bytes,
                                                 QEMUIOVector *qiov,
                                                 size_t qiov_offset,
-                                                int flags)
+                                                BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
@@ -74,10 +74,11 @@ static int coroutine_fn compress_co_preadv_part(BlockDriverState *bs,
 
 
 static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
-                                                 uint64_t offset,
-                                                 uint64_t bytes,
+                                                 int64_t offset,
+                                                 int64_t bytes,
                                                  QEMUIOVector *qiov,
-                                                 size_t qiov_offset, int flags)
+                                                 size_t qiov_offset,
+                                                 BdrvRequestFlags flags)
 {
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags | BDRV_REQ_WRITE_COMPRESSED);
@@ -85,7 +86,7 @@ static int coroutine_fn compress_co_pwritev_part(BlockDriverState *bs,
 
 
 static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
+                                                  int64_t offset, int64_t bytes,
                                                   BdrvRequestFlags flags)
 {
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
@@ -93,7 +94,7 @@ static int coroutine_fn compress_co_pwrite_zeroes(BlockDriverState *bs,
 
 
 static int coroutine_fn compress_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
diff --git a/block/gluster.c b/block/gluster.c
index e8ee14c8e9b..398976bc66d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,6 +891,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
+    bs->bl.max_pdiscard = SIZE_MAX;
 }
 
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -1003,19 +1004,19 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
 static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
                                                       int64_t offset,
-                                                      int size,
+                                                      int64_t bytes,
                                                       BdrvRequestFlags flags)
 {
     int ret;
     GlusterAIOCB acb;
     BDRVGlusterState *s = bs->opaque;
 
-    acb.size = size;
+    acb.size = bytes;
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 
-    ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    ret = glfs_zerofill_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
         return -errno;
     }
@@ -1297,18 +1298,20 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
 
 #ifdef CONFIG_GLUSTERFS_DISCARD
 static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
-                                                 int64_t offset, int size)
+                                                 int64_t offset, int64_t bytes)
 {
     int ret;
     GlusterAIOCB acb;
     BDRVGlusterState *s = bs->opaque;
 
+    assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+
     acb.size = 0;
     acb.ret = 0;
     acb.coroutine = qemu_coroutine_self();
     acb.aio_context = bdrv_get_aio_context(bs);
 
-    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
+    ret = glfs_discard_async(s->fd, offset, bytes, gluster_finish_aiocb, &acb);
     if (ret < 0) {
         return -errno;
     }
@@ -1461,7 +1464,8 @@ static int find_allocation(BlockDriverState *bs, off_t start,
  * the specified offset) that are known to be in the same
  * allocated/unallocated state.
  *
- * 'bytes' is the max value 'pnum' should be set to.
+ * 'bytes' is a soft cap for 'pnum'.  If the information is free, 'pnum' may
+ * well exceed it.
  *
  * (Based on raw_co_block_status() from file-posix.c.)
  */
@@ -1477,6 +1481,8 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
     off_t data = 0, hole = 0;
     int ret = -EINVAL;
 
+    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+
     if (!s->fd) {
         return ret;
     }
@@ -1500,12 +1506,26 @@ static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
     } else if (data == offset) {
         /* On a data extent, compute bytes to the end of the extent,
          * possibly including a partial sector at EOF. */
-        *pnum = MIN(bytes, hole - offset);
+        *pnum = hole - offset;
+
+        /*
+         * We are not allowed to return partial sectors, though, so
+         * round up if necessary.
+         */
+        if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
+            int64_t file_length = qemu_gluster_getlength(bs);
+            if (file_length > 0) {
+                /* Ignore errors, this is just a safeguard */
+                assert(hole == file_length);
+            }
+            *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
+        }
+
         ret = BDRV_BLOCK_DATA;
     } else {
         /* On a hole, compute bytes to the beginning of the next extent.  */
         assert(hole == offset);
-        *pnum = MIN(bytes, data - offset);
+        *pnum = data - offset;
         ret = BDRV_BLOCK_ZERO;
     }
 
diff --git a/block/io.c b/block/io.c
index ca2dca30070..bb0a254def1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -30,6 +30,7 @@
 #include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "block/coroutines.h"
+#include "block/write-threshold.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
@@ -124,22 +125,54 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
 
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
 {
+    dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
+                                  src->pdiscard_alignment);
     dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
     dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+    dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
+                                        src->max_hw_transfer);
     dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
                                  src->opt_mem_alignment);
     dst->min_mem_alignment = MAX(dst->min_mem_alignment,
                                  src->min_mem_alignment);
     dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
+    dst->max_hw_iov = MIN_NON_ZERO(dst->max_hw_iov, src->max_hw_iov);
 }
 
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+typedef struct BdrvRefreshLimitsState {
+    BlockDriverState *bs;
+    BlockLimits old_bl;
+} BdrvRefreshLimitsState;
+
+static void bdrv_refresh_limits_abort(void *opaque)
+{
+    BdrvRefreshLimitsState *s = opaque;
+
+    s->bs->bl = s->old_bl;
+}
+
+static TransactionActionDrv bdrv_refresh_limits_drv = {
+    .abort = bdrv_refresh_limits_abort,
+    .clean = g_free,
+};
+
+/* @tran is allowed to be NULL, in this case no rollback is possible. */
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
 {
     ERRP_GUARD();
     BlockDriver *drv = bs->drv;
     BdrvChild *c;
     bool have_limits;
 
+    if (tran) {
+        BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
+        *s = (BdrvRefreshLimitsState) {
+            .bs = bs,
+            .old_bl = bs->bl,
+        };
+        tran_add(tran, &bdrv_refresh_limits_drv, s);
+    }
+
     memset(&bs->bl, 0, sizeof(bs->bl));
 
     if (!drv) {
@@ -156,7 +189,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
     QLIST_FOREACH(c, &bs->children, next) {
         if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
         {
-            bdrv_refresh_limits(c->bs, errp);
+            bdrv_refresh_limits(c->bs, tran, errp);
             if (*errp) {
                 return;
             }
@@ -924,9 +957,9 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
     return waited;
 }
 
-static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
-                                   QEMUIOVector *qiov, size_t qiov_offset,
-                                   Error **errp)
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            Error **errp)
 {
     /*
      * Check generic offset/bytes correctness
@@ -1198,7 +1231,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
 static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
                                             int64_t offset, int64_t bytes,
                                             QEMUIOVector *qiov,
-                                            size_t qiov_offset, int flags)
+                                            size_t qiov_offset,
+                                            BdrvRequestFlags flags)
 {
     BlockDriver *drv = bs->drv;
     int64_t sector_num;
@@ -1809,7 +1843,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
     ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
                            NULL);
     if (ret < 0) {
-        return ret;
+        goto fail;
     }
 
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
@@ -1817,10 +1851,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
                               bs->bl.request_alignment,
                               qiov, qiov_offset, flags);
     tracked_request_end(&req);
-    bdrv_dec_in_flight(bs);
-
     bdrv_padding_destroy(&pad);
 
+fail:
+    bdrv_dec_in_flight(bs);
+
     return ret;
 }
 
@@ -1835,7 +1870,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int head = 0;
     int tail = 0;
 
-    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+    int64_t max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes,
+                                            INT64_MAX);
     int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                         bs->bl.request_alignment);
     int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
@@ -1850,6 +1886,9 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
         return -ENOTSUP;
     }
 
+    /* Invalidate the cached block-status data range if this write overlaps */
+    bdrv_bsc_invalidate_range(bs, offset, bytes);
+
     assert(alignment % bs->bl.request_alignment == 0);
     head = offset % alignment;
     tail = (offset + bytes) % alignment;
@@ -1945,7 +1984,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
 
     bdrv_check_request(offset, bytes, &error_abort);
 
-    if (bs->read_only) {
+    if (bdrv_is_read_only(bs)) {
         return -EPERM;
     }
 
@@ -1981,8 +2020,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
         } else {
             assert(child->perm & BLK_PERM_WRITE);
         }
-        return notifier_with_return_list_notify(&bs->before_write_notifiers,
-                                                req);
+        bdrv_write_threshold_check_write(bs, offset, bytes);
+        return 0;
     case BDRV_TRACKED_TRUNCATE:
         assert(child->perm & BLK_PERM_RESIZE);
         return 0;
@@ -2037,7 +2076,8 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
  */
 static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
-    int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+    int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
+    BdrvRequestFlags flags)
 {
     BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
@@ -2210,7 +2250,11 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
         return -ENOMEDIUM;
     }
 
-    ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
+    if (flags & BDRV_REQ_ZERO_WRITE) {
+        ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
+    } else {
+        ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
+    }
     if (ret < 0) {
         return ret;
     }
@@ -2414,9 +2458,65 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
     aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
 
     if (bs->drv->bdrv_co_block_status) {
-        ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
-                                            aligned_bytes, pnum, &local_map,
-                                            &local_file);
+        /*
+         * Use the block-status cache only for protocol nodes: Format
+         * drivers are generally quick to inquire the status, but protocol
+         * drivers often need to get information from outside of qemu, so
+         * we do not have control over the actual implementation.  There
+         * have been cases where inquiring the status took an unreasonably
+         * long time, and we can do nothing in qemu to fix it.
+         * This is especially problematic for images with large data areas,
+         * because finding the few holes in them and giving them special
+         * treatment does not gain much performance.  Therefore, we try to
+         * cache the last-identified data region.
+         *
+         * Second, limiting ourselves to protocol nodes allows us to assume
+         * the block status for data regions to be DATA | OFFSET_VALID, and
+         * that the host offset is the same as the guest offset.
+         *
+         * Note that it is possible that external writers zero parts of
+         * the cached regions without the cache being invalidated, and so
+         * we may report zeroes as data.  This is not catastrophic,
+         * however, because reporting zeroes as data is fine.
+         */
+        if (QLIST_EMPTY(&bs->children) &&
+            bdrv_bsc_is_data(bs, aligned_offset, pnum))
+        {
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+            local_file = bs;
+            local_map = aligned_offset;
+        } else {
+            ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
+                                                aligned_bytes, pnum, &local_map,
+                                                &local_file);
+
+            /*
+             * Note that checking QLIST_EMPTY(&bs->children) is also done when
+             * the cache is queried above.  Technically, we do not need to check
+             * it here; the worst that can happen is that we fill the cache for
+             * non-protocol nodes, and then it is never used.  However, filling
+             * the cache requires an RCU update, so double check here to avoid
+             * such an update if possible.
+             */
+            if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) &&
+                QLIST_EMPTY(&bs->children))
+            {
+                /*
+                 * When a protocol driver reports BLOCK_OFFSET_VALID, the
+                 * returned local_map value must be the same as the offset we
+                 * have passed (aligned_offset), and local_bs must be the node
+                 * itself.
+                 * Assert this, because we follow this rule when reading from
+                 * the cache (see the `local_file = bs` and
+                 * `local_map = aligned_offset` assignments above), and the
+                 * result the cache delivers must be the same as the driver
+                 * would deliver.
+                 */
+                assert(local_file == bs);
+                assert(local_map == aligned_offset);
+                bdrv_bsc_fill(bs, aligned_offset, *pnum);
+            }
+        }
     } else {
         /* Default code for filters */
 
@@ -2718,7 +2818,12 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret = -ENOTSUP;
+    int ret;
+
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -2730,6 +2835,8 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
         ret = drv->bdrv_load_vmstate(bs, qiov, pos);
     } else if (child_bs) {
         ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
+    } else {
+        ret = -ENOTSUP;
     }
 
     bdrv_dec_in_flight(bs);
@@ -2742,7 +2849,12 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 {
     BlockDriver *drv = bs->drv;
     BlockDriverState *child_bs = bdrv_primary_bs(bs);
-    int ret = -ENOTSUP;
+    int ret;
+
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
 
     if (!drv) {
         return -ENOMEDIUM;
@@ -2754,6 +2866,8 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
         ret = drv->bdrv_save_vmstate(bs, qiov, pos);
     } else if (child_bs) {
         ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
+    } else {
+        ret = -ENOTSUP;
     }
 
     bdrv_dec_in_flight(bs);
@@ -2943,7 +3057,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
                                   int64_t bytes)
 {
     BdrvTrackedRequest req;
-    int max_pdiscard, ret;
+    int ret;
+    int64_t max_pdiscard;
     int head, tail, align;
     BlockDriverState *bs = child->bs;
 
@@ -2969,6 +3084,9 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
         return 0;
     }
 
+    /* Invalidate the cached block-status data range if this discard overlaps */
+    bdrv_bsc_invalidate_range(bs, offset, bytes);
+
     /* Discard is advisory, but some devices track and coalesce
      * unaligned requests, so we must pass everything down rather than
      * round here.  Still, most devices will just silently ignore
@@ -2987,7 +3105,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
         goto out;
     }
 
-    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
                                    align);
     assert(max_pdiscard >= bs->bl.request_alignment);
 
@@ -3137,12 +3255,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier)
-{
-    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
 void bdrv_io_plug(BlockDriverState *bs)
 {
     BdrvChild *child;
@@ -3368,6 +3480,11 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
         return old_size;
     }
 
+    if (bdrv_is_read_only(bs)) {
+        error_setg(errp, "Image is read-only");
+        return -EACCES;
+    }
+
     if (offset > old_size) {
         new_bytes = offset - old_size;
     } else {
@@ -3384,11 +3501,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
     if (new_bytes) {
         bdrv_make_request_serialising(&req, 1);
     }
-    if (bs->read_only) {
-        error_setg(errp, "Image is read-only");
-        ret = -EACCES;
-        goto out;
-    }
     ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
                                     0);
     if (ret < 0) {
diff --git a/block/io_uring.c b/block/io_uring.c
index 00a3ee9fb85..dfa475cc874 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -165,7 +165,21 @@ static void luring_process_completions(LuringState *s)
         total_bytes = ret + luringcb->total_read;
 
         if (ret < 0) {
-            if (ret == -EINTR) {
+            /*
+             * Only writev/readv/fsync requests on regular files or host block
+             * devices are submitted. Therefore -EAGAIN is not expected but it's
+             * known to happen sometimes with Linux SCSI. Submit again and hope
+             * the request completes successfully.
+             *
+             * For more information, see:
+             * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
+             *
+             * If the code is changed to submit other types of requests in the
+             * future, then this workaround may need to be extended to deal with
+             * genuine -EAGAIN results that should not be resubmitted
+             * immediately.
+             */
+            if (ret == -EINTR || ret == -EAGAIN) {
                 luring_resubmit(s, luringcb);
                 continue;
             }
diff --git a/block/iscsi-opts.c b/block/iscsi-opts.c
index afaf8837d6c..4f2da405e64 100644
--- a/block/iscsi-opts.c
+++ b/block/iscsi-opts.c
@@ -68,3 +68,4 @@ static void iscsi_block_opts_init(void)
 }
 
 block_init(iscsi_block_opts_init);
+module_opts("iscsi");
diff --git a/block/iscsi.c b/block/iscsi.c
index 4d2a416ce77..57aa07a40d7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -427,14 +427,14 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
 }
 
-static bool is_byte_request_lun_aligned(int64_t offset, int count,
+static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
                                         IscsiLun *iscsilun)
 {
-    if (offset % iscsilun->block_size || count % iscsilun->block_size) {
+    if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
         error_report("iSCSI misaligned request: "
                      "iscsilun->block_size %u, offset %" PRIi64
-                     ", count %d",
-                     iscsilun->block_size, offset, count);
+                     ", bytes %" PRIi64,
+                     iscsilun->block_size, offset, bytes);
         return false;
     }
     return true;
@@ -781,9 +781,6 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
         iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
     }
 
-    if (*pnum > bytes) {
-        *pnum = bytes;
-    }
 out_unlock:
     qemu_mutex_unlock(&iscsilun->mutex);
     g_free(iTask.err_str);
@@ -1141,7 +1138,8 @@ iscsi_getlength(BlockDriverState *bs)
 }
 
 static int
-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
+                               int64_t bytes)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
@@ -1157,6 +1155,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
         return 0;
     }
 
+    /*
+     * We don't want to overflow list.num which is uint32_t.
+     * We rely on our max_pdiscard.
+     */
+    assert(bytes / iscsilun->block_size <= UINT32_MAX);
+
     list.lba = offset / iscsilun->block_size;
     list.num = bytes / iscsilun->block_size;
 
@@ -1205,12 +1209,12 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 
 static int
 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                    int bytes, BdrvRequestFlags flags)
+                                    int64_t bytes, BdrvRequestFlags flags)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
     uint64_t lba;
-    uint32_t nb_blocks;
+    uint64_t nb_blocks;
     bool use_16_for_ws = iscsilun->use_16_for_rw;
     int r = 0;
 
@@ -1250,11 +1254,21 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (use_16_for_ws) {
+        /*
+         * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
+         * on our max_pwrite_zeroes limit.
+         */
+        assert(nb_blocks <= UINT32_MAX);
         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             iscsilun->zeroblock, iscsilun->block_size,
                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
                                             0, 0, iscsi_co_generic_cb, &iTask);
     } else {
+        /*
+         * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
+         * on our max_pwrite_zeroes limit.
+         */
+        assert(nb_blocks <= UINT16_MAX);
         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                             iscsilun->zeroblock, iscsilun->block_size,
                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
@@ -2064,20 +2078,19 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
     }
 
     if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
-            bs->bl.max_pdiscard =
-                iscsilun->bl.max_unmap * iscsilun->block_size;
-        }
+        bs->bl.max_pdiscard =
+            MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
+                         (uint64_t)UINT32_MAX * iscsilun->block_size);
         bs->bl.pdiscard_alignment =
             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
     } else {
         bs->bl.pdiscard_alignment = iscsilun->block_size;
     }
 
-    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
-        bs->bl.max_pwrite_zeroes =
-            iscsilun->bl.max_ws_len * iscsilun->block_size;
-    }
+    bs->bl.max_pwrite_zeroes =
+        MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
+                     max_xfer_len * iscsilun->block_size);
+
     if (iscsilun->lbp.lbpws) {
         bs->bl.pwrite_zeroes_alignment =
             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
@@ -2172,10 +2185,10 @@ static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
 
 static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
                                                  BdrvChild *src,
-                                                 uint64_t src_offset,
+                                                 int64_t src_offset,
                                                  BdrvChild *dst,
-                                                 uint64_t dst_offset,
-                                                 uint64_t bytes,
+                                                 int64_t dst_offset,
+                                                 int64_t bytes,
                                                  BdrvRequestFlags read_flags,
                                                  BdrvRequestFlags write_flags)
 {
@@ -2313,10 +2326,10 @@ static void iscsi_xcopy_data(struct iscsi_data *data,
 
 static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
                                                BdrvChild *src,
-                                               uint64_t src_offset,
+                                               int64_t src_offset,
                                                BdrvChild *dst,
-                                               uint64_t dst_offset,
-                                               uint64_t bytes,
+                                               int64_t dst_offset,
+                                               int64_t bytes,
                                                BdrvRequestFlags read_flags,
                                                BdrvRequestFlags write_flags)
 {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 3c0527c2bf8..f53ae72e21f 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -28,6 +28,9 @@
  */
 #define MAX_EVENTS 1024
 
+/* Maximum number of requests in a batch. (default value) */
+#define DEFAULT_MAX_BATCH 32
+
 struct qemu_laiocb {
     Coroutine *co;
     LinuxAioState *ctx;
@@ -331,22 +334,41 @@ static void ioq_submit(LinuxAioState *s)
     }
 }
 
+static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
+{
+    uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
+
+    /*
+     * AIO context can be shared between multiple block devices, so
+     * `dev_max_batch` allows reducing the batch size for latency-sensitive
+     * devices.
+     */
+    max_batch = MIN_NON_ZERO(dev_max_batch, max_batch);
+
+    /* limit the batch with the number of available events */
+    max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
+
+    return max_batch;
+}
+
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
 {
     s->io_q.plugged++;
 }
 
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
+                    uint64_t dev_max_batch)
 {
     assert(s->io_q.plugged);
-    if (--s->io_q.plugged == 0 &&
-        !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
+        (--s->io_q.plugged == 0 &&
+         !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
         ioq_submit(s);
     }
 }
 
 static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
-                          int type)
+                          int type, uint64_t dev_max_batch)
 {
     LinuxAioState *s = laiocb->ctx;
     struct iocb *iocbs = &laiocb->iocb;
@@ -371,7 +393,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
     s->io_q.in_queue++;
     if (!s->io_q.blocked &&
         (!s->io_q.plugged ||
-         s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
+         s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
         ioq_submit(s);
     }
 
@@ -379,7 +401,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
 }
 
 int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type)
+                                uint64_t offset, QEMUIOVector *qiov, int type,
+                                uint64_t dev_max_batch)
 {
     int ret;
     struct qemu_laiocb laiocb = {
@@ -391,7 +414,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         .qiov       = qiov,
     };
 
-    ret = laio_do_submit(fd, &laiocb, offset, type);
+    ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
     if (ret < 0) {
         return ret;
     }
diff --git a/block/meson.build b/block/meson.build
index d21990ec95a..deb73ca389f 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -4,7 +4,7 @@ block_ss.add(files(
   'aio_task.c',
   'amend.c',
   'backup.c',
-  'backup-top.c',
+  'copy-before-write.c',
   'blkdebug.c',
   'blklogwrites.c',
   'blkverify.c',
@@ -13,6 +13,7 @@ block_ss.add(files(
   'commit.c',
   'copy-on-read.c',
   'preallocate.c',
+  'progress_meter.c',
   'create.c',
   'crypto.c',
   'dirty-bitmap.c',
@@ -64,27 +65,26 @@ block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
 block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
 block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
-block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
-block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
-block_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('io_uring.c'))
+block_ss.add(when: libaio, if_true: files('linux-aio.c'))
+block_ss.add(when: linux_io_uring, if_true: files('io_uring.c'))
 
 block_modules = {}
 
 modsrc = []
 foreach m : [
-  [curl, 'curl', [curl, glib], 'curl.c'],
-  [glusterfs, 'gluster', glusterfs, 'gluster.c'],
-  [libiscsi, 'iscsi', libiscsi, 'iscsi.c'],
-  [libnfs, 'nfs', libnfs, 'nfs.c'],
-  [libssh, 'ssh', libssh, 'ssh.c'],
-  [rbd, 'rbd', rbd, 'rbd.c'],
+  [curl, 'curl', files('curl.c')],
+  [glusterfs, 'gluster', files('gluster.c')],
+  [libiscsi, 'iscsi', [files('iscsi.c'), libm]],
+  [libnfs, 'nfs', files('nfs.c')],
+  [libssh, 'ssh', files('ssh.c')],
+  [rbd, 'rbd', files('rbd.c')],
 ]
   if m[0].found()
+    module_ss = ss.source_set()
+    module_ss.add(when: m[0], if_true: m[2])
     if enable_modules
-      modsrc += files(m[3])
+      modsrc += module_ss.all_sources()
     endif
-    module_ss = ss.source_set()
-    module_ss.add(when: m[2], if_true: files(m[3]))
     block_modules += {m[1] : module_ss}
   endif
 endforeach
diff --git a/block/mirror.c b/block/mirror.c
index 5a71bd8bbcb..efec2c7674b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -56,7 +56,6 @@ typedef struct MirrorBlockJob {
     bool zero_target;
     MirrorCopyMode copy_mode;
     BlockdevOnError on_source_error, on_target_error;
-    bool synced;
     /* Set when the target is synced (dirty bitmap is clean, nothing
      * in flight) and the job is running in active mode */
     bool actively_synced;
@@ -107,6 +106,7 @@ struct MirrorOp {
     bool is_in_flight;
     CoQueue waiting_requests;
     Coroutine *co;
+    MirrorOp *waiting_for_op;
 
     QTAILQ_ENTRY(MirrorOp) next;
 };
@@ -120,7 +120,6 @@ typedef enum MirrorMethod {
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                             int error)
 {
-    s->synced = false;
     s->actively_synced = false;
     if (read) {
         return block_job_error_action(&s->common, s->on_source_error,
@@ -159,7 +158,25 @@ static void coroutine_fn mirror_wait_on_conflicts(MirrorOp *self,
             if (ranges_overlap(self_start_chunk, self_nb_chunks,
                                op_start_chunk, op_nb_chunks))
             {
+                if (self) {
+                    /*
+                     * If the operation is already (indirectly) waiting for us,
+                     * or will wait for us as soon as it wakes up, then just go
+                     * on (instead of producing a deadlock in the former case).
+                     */
+                    if (op->waiting_for_op) {
+                        continue;
+                    }
+
+                    self->waiting_for_op = op;
+                }
+
                 qemu_co_queue_wait(&op->waiting_requests, NULL);
+
+                if (self) {
+                    self->waiting_for_op = NULL;
+                }
+
                 break;
             }
         }
@@ -925,12 +942,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
     if (s->bdev_length == 0) {
         /* Transition to the READY state and wait for complete. */
         job_transition_to_ready(&s->common.job);
-        s->synced = true;
         s->actively_synced = true;
-        while (!job_is_cancelled(&s->common.job) && !s->should_complete) {
+        while (!job_cancel_requested(&s->common.job) && !s->should_complete) {
             job_yield(&s->common.job);
         }
-        s->common.job.cancelled = false;
         goto immediate_exit;
     }
 
@@ -991,6 +1006,11 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 
         job_pause_point(&s->common.job);
 
+        if (job_is_cancelled(&s->common.job)) {
+            ret = 0;
+            goto immediate_exit;
+        }
+
         cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is
          * the number of bytes currently being processed; together those are
@@ -1017,7 +1037,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         should_complete = false;
         if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
-            if (!s->synced) {
+            if (!job_is_ready(&s->common.job)) {
                 if (mirror_flush(s) < 0) {
                     /* Go check s->ret.  */
                     continue;
@@ -1028,14 +1048,13 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
                  * the target in a consistent state.
                  */
                 job_transition_to_ready(&s->common.job);
-                s->synced = true;
                 if (s->copy_mode != MIRROR_COPY_MODE_BACKGROUND) {
                     s->actively_synced = true;
                 }
             }
 
             should_complete = s->should_complete ||
-                job_is_cancelled(&s->common.job);
+                job_cancel_requested(&s->common.job);
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         }
 
@@ -1065,24 +1084,17 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
              * completion.
              */
             assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.job.cancelled = false;
             need_drain = false;
             break;
         }
 
-        ret = 0;
-
-        if (s->synced && !should_complete) {
+        if (job_is_ready(&s->common.job) && !should_complete) {
             delay_ns = (s->in_flight == 0 &&
                         cnt == 0 ? BLOCK_JOB_SLICE_TIME : 0);
         }
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
+        trace_mirror_before_sleep(s, cnt, job_is_ready(&s->common.job),
+                                  delay_ns);
         job_sleep_ns(&s->common.job, delay_ns);
-        if (job_is_cancelled(&s->common.job) &&
-            (!s->synced || s->common.job.force_cancel))
-        {
-            break;
-        }
         s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     }
 
@@ -1092,8 +1104,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
          * or it was cancelled prematurely so that we do not guarantee that
          * the target is a copy of the source.
          */
-        assert(ret < 0 || ((s->common.job.force_cancel || !s->synced) &&
-               job_is_cancelled(&s->common.job)));
+        assert(ret < 0 || job_is_cancelled(&s->common.job));
         assert(need_drain);
         mirror_wait_for_all_io(s);
     }
@@ -1116,7 +1127,7 @@ static void mirror_complete(Job *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
 
-    if (!s->synced) {
+    if (!job_is_ready(job)) {
         error_setg(errp, "The active block job '%s' cannot be completed",
                    job->id);
         return;
@@ -1171,19 +1182,34 @@ static bool mirror_drained_poll(BlockJob *job)
      * from one of our own drain sections, to avoid a deadlock waiting for
      * ourselves.
      */
-    if (!s->common.job.paused && !s->common.job.cancelled && !s->in_drain) {
+    if (!s->common.job.paused && !job_is_cancelled(&job->job) && !s->in_drain) {
         return true;
     }
 
     return !!s->in_flight;
 }
 
-static void mirror_cancel(Job *job)
+static bool mirror_cancel(Job *job, bool force)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
     BlockDriverState *target = blk_bs(s->target);
 
-    bdrv_cancel_in_flight(target);
+    /*
+     * Before the job is READY, we treat any cancellation like a
+     * force-cancellation.
+     */
+    force = force || !job_is_ready(job);
+
+    if (force) {
+        bdrv_cancel_in_flight(target);
+    }
+    return force;
+}
+
+static bool commit_active_cancel(Job *job, bool force)
+{
+    /* Same as above in mirror_cancel() */
+    return force || !job_is_ready(job);
 }
 
 static const BlockJobDriver mirror_job_driver = {
@@ -1213,6 +1239,7 @@ static const BlockJobDriver commit_active_job_driver = {
         .abort                  = mirror_abort,
         .pause                  = mirror_pause,
         .complete               = mirror_complete,
+        .cancel                 = commit_active_cancel,
     },
     .drained_poll           = mirror_drained_poll,
 };
@@ -1341,6 +1368,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s,
         .bytes              = bytes,
         .is_active_write    = true,
         .is_in_flight       = true,
+        .co                 = qemu_coroutine_self(),
     };
     qemu_co_queue_init(&op->waiting_requests);
     QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
@@ -1380,7 +1408,7 @@ static void coroutine_fn active_write_settle(MirrorOp *op)
 }
 
 static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
 }
@@ -1395,6 +1423,7 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
     bool copy_to_target;
 
     copy_to_target = s->job->ret >= 0 &&
+                     !job_is_cancelled(&s->job->common.job) &&
                      s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
 
     if (copy_to_target) {
@@ -1434,7 +1463,7 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs,
 }
 
 static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
-    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+    int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     MirrorBDSOpaque *s = bs->opaque;
     QEMUIOVector bounce_qiov;
@@ -1443,6 +1472,7 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
     bool copy_to_target;
 
     copy_to_target = s->job->ret >= 0 &&
+                     !job_is_cancelled(&s->job->common.job) &&
                      s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING;
 
     if (copy_to_target) {
@@ -1479,14 +1509,14 @@ static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
 }
 
 static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_ZERO, offset, bytes, NULL,
                                     flags);
 }
 
 static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int bytes)
+    int64_t offset, int64_t bytes)
 {
     return bdrv_mirror_top_do_write(bs, MIRROR_METHOD_DISCARD, offset, bytes,
                                     NULL, 0);
@@ -1630,9 +1660,6 @@ static BlockJob *mirror_start_job(
 
     bs_opaque->is_commit = target_is_backing;
 
-    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() succeeds even if bs has no parent. */
-    bdrv_ref(mirror_top_bs);
     bdrv_drained_begin(bs);
     ret = bdrv_append(mirror_top_bs, bs, errp);
     bdrv_drained_end(bs);
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index ebf1033f318..2ac4aedfff0 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -251,10 +251,10 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     if (!filename) {
         error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
     qmp_drive_mirror(&mirror, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -281,11 +281,11 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
     if (!filename) {
         error_setg(&err, QERR_MISSING_PARAMETER, "target");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
 
     qmp_drive_backup(&backup, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -356,8 +356,7 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
          * will be taken internally. Today it's actually required.
          */
         error_setg(&err, QERR_MISSING_PARAMETER, "snapshot-file");
-        hmp_handle_error(mon, err);
-        return;
+        goto end;
     }
 
     mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -365,6 +364,7 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
                                filename, false, NULL,
                                !!format, format,
                                true, mode, &err);
+end:
     hmp_handle_error(mon, err);
 }
 
@@ -557,8 +557,10 @@ void hmp_eject(Monitor *mon, const QDict *qdict)
 
 void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 {
-    BlockBackend *blk;
+    BlockBackend *blk = NULL;
+    BlockDriverState *bs = NULL;
     BlockBackend *local_blk = NULL;
+    AioContext *ctx = NULL;
     bool qdev = qdict_get_try_bool(qdict, "qdev", false);
     const char *device = qdict_get_str(qdict, "device");
     const char *command = qdict_get_str(qdict, "command");
@@ -573,20 +575,24 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     } else {
         blk = blk_by_name(device);
         if (!blk) {
-            BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
-            if (bs) {
-                blk = local_blk = blk_new(bdrv_get_aio_context(bs),
-                                          0, BLK_PERM_ALL);
-                ret = blk_insert_bs(blk, bs, &err);
-                if (ret < 0) {
-                    goto fail;
-                }
-            } else {
+            bs = bdrv_lookup_bs(NULL, device, &err);
+            if (!bs) {
                 goto fail;
             }
         }
     }
 
+    ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+
+    if (bs) {
+        blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL);
+        ret = blk_insert_bs(blk, bs, &err);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
     /*
      * Notably absent: Proper permission management. This is sad, but it seems
      * almost impossible to achieve without changing the semantics and thereby
@@ -616,6 +622,11 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 
 fail:
     blk_unref(local_blk);
+
+    if (ctx) {
+        aio_context_release(ctx);
+    }
+
     hmp_handle_error(mon, err);
 }
 
diff --git a/block/nbd.c b/block/nbd.c
index 1d4668d42d9..5ef462db1b7 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -44,6 +44,7 @@
 #include "block/qdict.h"
 #include "block/nbd.h"
 #include "block/block_int.h"
+#include "block/coroutines.h"
 
 #include "qemu/yank.h"
 
@@ -56,7 +57,8 @@
 typedef struct {
     Coroutine *coroutine;
     uint64_t offset;        /* original offset of the request */
-    bool receiving;         /* waiting for connection_co? */
+    bool receiving;         /* sleeping in the yield in nbd_receive_replies */
+    bool reply_possible;    /* reply header not yet received */
 } NBDClientRequest;
 
 typedef enum NBDClientState {
@@ -66,64 +68,16 @@ typedef enum NBDClientState {
     NBD_CLIENT_QUIT
 } NBDClientState;
 
-typedef enum NBDConnectThreadState {
-    /* No thread, no pending results */
-    CONNECT_THREAD_NONE,
-
-    /* Thread is running, no results for now */
-    CONNECT_THREAD_RUNNING,
-
-    /*
-     * Thread is running, but requestor exited. Thread should close
-     * the new socket and free the connect state on exit.
-     */
-    CONNECT_THREAD_RUNNING_DETACHED,
-
-    /* Thread finished, results are stored in a state */
-    CONNECT_THREAD_FAIL,
-    CONNECT_THREAD_SUCCESS
-} NBDConnectThreadState;
-
-typedef struct NBDConnectThread {
-    /* Initialization constants */
-    SocketAddress *saddr; /* address to connect to */
-    /*
-     * Bottom half to schedule on completion. Scheduled only if bh_ctx is not
-     * NULL
-     */
-    QEMUBHFunc *bh_func;
-    void *bh_opaque;
-
-    /*
-     * Result of last attempt. Valid in FAIL and SUCCESS states.
-     * If you want to steal error, don't forget to set pointer to NULL.
-     */
-    QIOChannelSocket *sioc;
-    Error *err;
-
-    /* state and bh_ctx are protected by mutex */
-    QemuMutex mutex;
-    NBDConnectThreadState state; /* current state of the thread */
-    AioContext *bh_ctx; /* where to schedule bh (NULL means don't schedule) */
-} NBDConnectThread;
-
 typedef struct BDRVNBDState {
-    QIOChannelSocket *sioc; /* The master data channel */
-    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
+    QIOChannel *ioc; /* The current I/O channel */
     NBDExportInfo info;
 
     CoMutex send_mutex;
     CoQueue free_sema;
-    Coroutine *connection_co;
-    Coroutine *teardown_co;
-    QemuCoSleepState *connection_co_sleep_ns_state;
-    bool drained;
-    bool wait_drained_end;
+
+    CoMutex receive_mutex;
     int in_flight;
     NBDClientState state;
-    int connect_status;
-    Error *connect_err;
-    bool wait_in_flight;
 
     QEMUTimer *reconnect_delay_timer;
 
@@ -140,20 +94,20 @@ typedef struct BDRVNBDState {
     char *x_dirty_bitmap;
     bool alloc_depth;
 
-    bool wait_connect;
-    NBDConnectThread *connect_thread;
+    NBDClientConnection *conn;
 } BDRVNBDState;
 
-static int nbd_establish_connection(BlockDriverState *bs, SocketAddress *saddr,
-                                    Error **errp);
-static int nbd_co_establish_connection(BlockDriverState *bs, Error **errp);
-static void nbd_co_establish_connection_cancel(BlockDriverState *bs,
-                                               bool detach);
-static int nbd_client_handshake(BlockDriverState *bs, Error **errp);
 static void nbd_yank(void *opaque);
 
-static void nbd_clear_bdrvstate(BDRVNBDState *s)
+static void nbd_clear_bdrvstate(BlockDriverState *bs)
 {
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+    nbd_client_connection_release(s->conn);
+    s->conn = NULL;
+
+    yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
+
     object_unref(OBJECT(s->tlscreds));
     qapi_free_SocketAddress(s->saddr);
     s->saddr = NULL;
@@ -165,32 +119,49 @@ static void nbd_clear_bdrvstate(BDRVNBDState *s)
     s->x_dirty_bitmap = NULL;
 }
 
-static void nbd_channel_error(BDRVNBDState *s, int ret)
+static bool nbd_client_connected(BDRVNBDState *s)
 {
-    if (ret == -EIO) {
-        if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) {
-            s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
-                                            NBD_CLIENT_CONNECTING_NOWAIT;
-        }
-    } else {
-        if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) {
-            qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-        }
-        s->state = NBD_CLIENT_QUIT;
+    return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED;
+}
+
+static bool nbd_recv_coroutine_wake_one(NBDClientRequest *req)
+{
+    if (req->receiving) {
+        req->receiving = false;
+        aio_co_wake(req->coroutine);
+        return true;
     }
+
+    return false;
 }
 
-static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
+static void nbd_recv_coroutines_wake(BDRVNBDState *s, bool all)
 {
     int i;
 
     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        NBDClientRequest *req = &s->requests[i];
+        if (nbd_recv_coroutine_wake_one(&s->requests[i]) && !all) {
+            return;
+        }
+    }
+}
+
+static void nbd_channel_error(BDRVNBDState *s, int ret)
+{
+    if (nbd_client_connected(s)) {
+        qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    }
 
-        if (req->coroutine && req->receiving) {
-            aio_co_wake(req->coroutine);
+    if (ret == -EIO) {
+        if (nbd_client_connected(s)) {
+            s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
+                                            NBD_CLIENT_CONNECTING_NOWAIT;
         }
+    } else {
+        s->state = NBD_CLIENT_QUIT;
     }
+
+    nbd_recv_coroutines_wake(s, true);
 }
 
 static void reconnect_delay_timer_del(BDRVNBDState *s)
@@ -207,6 +178,7 @@ static void reconnect_delay_timer_cb(void *opaque)
 
     if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
         s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+        nbd_co_establish_connection_cancel(s->conn);
         while (qemu_co_enter_next(&s->free_sema, NULL)) {
             /* Resume all queued requests */
         }
@@ -229,121 +201,21 @@ static void reconnect_delay_timer_init(BDRVNBDState *s, uint64_t expire_time_ns)
     timer_mod(s->reconnect_delay_timer, expire_time_ns);
 }
 
-static void nbd_client_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    /* Timer is deleted in nbd_client_co_drain_begin() */
-    assert(!s->reconnect_delay_timer);
-    /*
-     * If reconnect is in progress we may have no ->ioc.  It will be
-     * re-instantiated in the proper aio context once the connection is
-     * reestablished.
-     */
-    if (s->ioc) {
-        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
-    }
-}
-
-static void nbd_client_attach_aio_context_bh(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    if (s->connection_co) {
-        /*
-         * The node is still drained, so we know the coroutine has yielded in
-         * nbd_read_eof(), the only place where bs->in_flight can reach 0, or
-         * it is entered for the first time. Both places are safe for entering
-         * the coroutine.
-         */
-        qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
-    }
-    bdrv_dec_in_flight(bs);
-}
-
-static void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                          AioContext *new_context)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    /*
-     * s->connection_co is either yielded from nbd_receive_reply or from
-     * nbd_co_reconnect_loop()
-     */
-    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED) {
-        qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
-    }
-
-    bdrv_inc_in_flight(bs);
-
-    /*
-     * Need to wait here for the BH to run because the BH must run while the
-     * node is still drained.
-     */
-    aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
-}
-
-static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    s->drained = true;
-    if (s->connection_co_sleep_ns_state) {
-        qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
-    }
-
-    nbd_co_establish_connection_cancel(bs, false);
-
-    reconnect_delay_timer_del(s);
-
-    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
-        s->state = NBD_CLIENT_CONNECTING_NOWAIT;
-        qemu_co_queue_restart_all(&s->free_sema);
-    }
-}
-
-static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    s->drained = false;
-    if (s->wait_drained_end) {
-        s->wait_drained_end = false;
-        aio_co_wake(s->connection_co);
-    }
-}
-
-
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
+    assert(!s->in_flight);
+
     if (s->ioc) {
-        /* finish any pending coroutines */
         qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
-    } else if (s->sioc) {
-        /* abort negotiation */
-        qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH,
-                             NULL);
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, s->bs);
+        object_unref(OBJECT(s->ioc));
+        s->ioc = NULL;
     }
 
     s->state = NBD_CLIENT_QUIT;
-    if (s->connection_co) {
-        if (s->connection_co_sleep_ns_state) {
-            qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
-        }
-        nbd_co_establish_connection_cancel(bs, true);
-    }
-    if (qemu_in_coroutine()) {
-        s->teardown_co = qemu_coroutine_self();
-        /* connection_co resumes us when it terminates */
-        qemu_coroutine_yield();
-        s->teardown_co = NULL;
-    } else {
-        BDRV_POLL_WHILE(bs, s->connection_co);
-    }
-    assert(!s->connection_co);
 }
 
 static bool nbd_client_connecting(BDRVNBDState *s)
@@ -358,260 +230,112 @@ static bool nbd_client_connecting_wait(BDRVNBDState *s)
     return qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT;
 }
 
-static void connect_bh(void *opaque)
-{
-    BDRVNBDState *state = opaque;
-
-    assert(state->wait_connect);
-    state->wait_connect = false;
-    aio_co_wake(state->connection_co);
-}
-
-static void nbd_init_connect_thread(BDRVNBDState *s)
-{
-    s->connect_thread = g_new(NBDConnectThread, 1);
-
-    *s->connect_thread = (NBDConnectThread) {
-        .saddr = QAPI_CLONE(SocketAddress, s->saddr),
-        .state = CONNECT_THREAD_NONE,
-        .bh_func = connect_bh,
-        .bh_opaque = s,
-    };
-
-    qemu_mutex_init(&s->connect_thread->mutex);
-}
-
-static void nbd_free_connect_thread(NBDConnectThread *thr)
-{
-    if (thr->sioc) {
-        qio_channel_close(QIO_CHANNEL(thr->sioc), NULL);
-    }
-    error_free(thr->err);
-    qapi_free_SocketAddress(thr->saddr);
-    g_free(thr);
-}
-
-static void *connect_thread_func(void *opaque)
+/*
+ * Update @bs with information learned during a completed negotiation process.
+ * Return failure if the server's advertised options are incompatible with the
+ * client's needs.
+ */
+static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp)
 {
-    NBDConnectThread *thr = opaque;
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int ret;
-    bool do_free = false;
 
-    thr->sioc = qio_channel_socket_new();
-
-    error_free(thr->err);
-    thr->err = NULL;
-    ret = qio_channel_socket_connect_sync(thr->sioc, thr->saddr, &thr->err);
-    if (ret < 0) {
-        object_unref(OBJECT(thr->sioc));
-        thr->sioc = NULL;
+    if (s->x_dirty_bitmap) {
+        if (!s->info.base_allocation) {
+            error_setg(errp, "requested x-dirty-bitmap %s not found",
+                       s->x_dirty_bitmap);
+            return -EINVAL;
+        }
+        if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
+            s->alloc_depth = true;
+        }
     }
 
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_RUNNING:
-        thr->state = ret < 0 ? CONNECT_THREAD_FAIL : CONNECT_THREAD_SUCCESS;
-        if (thr->bh_ctx) {
-            aio_bh_schedule_oneshot(thr->bh_ctx, thr->bh_func, thr->bh_opaque);
-
-            /* play safe, don't reuse bh_ctx on further connection attempts */
-            thr->bh_ctx = NULL;
+    if (s->info.flags & NBD_FLAG_READ_ONLY) {
+        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
+        if (ret < 0) {
+            return ret;
         }
-        break;
-    case CONNECT_THREAD_RUNNING_DETACHED:
-        do_free = true;
-        break;
-    default:
-        abort();
     }
 
-    qemu_mutex_unlock(&thr->mutex);
+    if (s->info.flags & NBD_FLAG_SEND_FUA) {
+        bs->supported_write_flags = BDRV_REQ_FUA;
+        bs->supported_zero_flags |= BDRV_REQ_FUA;
+    }
 
-    if (do_free) {
-        nbd_free_connect_thread(thr);
+    if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
+        if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) {
+            bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK;
+        }
     }
 
-    return NULL;
+    trace_nbd_client_handshake_success(s->export);
+
+    return 0;
 }
 
-static int coroutine_fn
-nbd_co_establish_connection(BlockDriverState *bs, Error **errp)
+int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
+                                                Error **errp)
 {
+    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     int ret;
-    QemuThread thread;
-    BDRVNBDState *s = bs->opaque;
-    NBDConnectThread *thr = s->connect_thread;
-
-    if (!thr) {
-        /* detached */
-        return -1;
-    }
-
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_FAIL:
-    case CONNECT_THREAD_NONE:
-        error_free(thr->err);
-        thr->err = NULL;
-        thr->state = CONNECT_THREAD_RUNNING;
-        qemu_thread_create(&thread, "nbd-connect",
-                           connect_thread_func, thr, QEMU_THREAD_DETACHED);
-        break;
-    case CONNECT_THREAD_SUCCESS:
-        /* Previous attempt finally succeeded in background */
-        thr->state = CONNECT_THREAD_NONE;
-        s->sioc = thr->sioc;
-        thr->sioc = NULL;
-        yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name),
-                               nbd_yank, bs);
-        qemu_mutex_unlock(&thr->mutex);
-        return 0;
-    case CONNECT_THREAD_RUNNING:
-        /* Already running, will wait */
-        break;
-    default:
-        abort();
-    }
+    bool blocking = nbd_client_connecting_wait(s);
 
-    thr->bh_ctx = qemu_get_current_aio_context();
-
-    qemu_mutex_unlock(&thr->mutex);
+    assert(!s->ioc);
 
+    s->ioc = nbd_co_establish_connection(s->conn, &s->info, blocking, errp);
+    if (!s->ioc) {
+        return -ECONNREFUSED;
+    }
 
-    /*
-     * We are going to wait for connect-thread finish, but
-     * nbd_client_co_drain_begin() can interrupt.
-     *
-     * Note that wait_connect variable is not visible for connect-thread. It
-     * doesn't need mutex protection, it used only inside home aio context of
-     * bs.
-     */
-    s->wait_connect = true;
-    qemu_coroutine_yield();
-
-    if (!s->connect_thread) {
-        /* detached */
-        return -1;
-    }
-    assert(thr == s->connect_thread);
-
-    qemu_mutex_lock(&thr->mutex);
-
-    switch (thr->state) {
-    case CONNECT_THREAD_SUCCESS:
-    case CONNECT_THREAD_FAIL:
-        thr->state = CONNECT_THREAD_NONE;
-        error_propagate(errp, thr->err);
-        thr->err = NULL;
-        s->sioc = thr->sioc;
-        thr->sioc = NULL;
-        if (s->sioc) {
-            yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name),
-                                   nbd_yank, bs);
-        }
-        ret = (s->sioc ? 0 : -1);
-        break;
-    case CONNECT_THREAD_RUNNING:
-    case CONNECT_THREAD_RUNNING_DETACHED:
-        /*
-         * Obviously, drained section wants to start. Report the attempt as
-         * failed. Still connect thread is executing in background, and its
-         * result may be used for next connection attempt.
-         */
-        ret = -1;
-        error_setg(errp, "Connection attempt cancelled by other operation");
-        break;
+    yank_register_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name), nbd_yank,
+                           bs);
 
-    case CONNECT_THREAD_NONE:
+    ret = nbd_handle_updated_info(s->bs, NULL);
+    if (ret < 0) {
         /*
-         * Impossible. We've seen this thread running. So it should be
-         * running or at least give some results.
+         * We have connected, but must fail for other reasons.
+         * Send NBD_CMD_DISC as a courtesy to the server.
          */
-        abort();
-
-    default:
-        abort();
-    }
+        NBDRequest request = { .type = NBD_CMD_DISC };
 
-    qemu_mutex_unlock(&thr->mutex);
+        nbd_send_request(s->ioc, &request);
 
-    return ret;
-}
+        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
+                                 nbd_yank, bs);
+        object_unref(OBJECT(s->ioc));
+        s->ioc = NULL;
 
-/*
- * nbd_co_establish_connection_cancel
- * Cancel nbd_co_establish_connection asynchronously: it will finish soon, to
- * allow drained section to begin.
- *
- * If detach is true, also cleanup the state (or if thread is running, move it
- * to CONNECT_THREAD_RUNNING_DETACHED state). s->connect_thread becomes NULL if
- * detach is true.
- */
-static void nbd_co_establish_connection_cancel(BlockDriverState *bs,
-                                               bool detach)
-{
-    BDRVNBDState *s = bs->opaque;
-    NBDConnectThread *thr = s->connect_thread;
-    bool wake = false;
-    bool do_free = false;
-
-    qemu_mutex_lock(&thr->mutex);
-
-    if (thr->state == CONNECT_THREAD_RUNNING) {
-        /* We can cancel only in running state, when bh is not yet scheduled */
-        thr->bh_ctx = NULL;
-        if (s->wait_connect) {
-            s->wait_connect = false;
-            wake = true;
-        }
-        if (detach) {
-            thr->state = CONNECT_THREAD_RUNNING_DETACHED;
-            s->connect_thread = NULL;
-        }
-    } else if (detach) {
-        do_free = true;
+        return ret;
     }
 
-    qemu_mutex_unlock(&thr->mutex);
+    qio_channel_set_blocking(s->ioc, false, NULL);
+    qio_channel_attach_aio_context(s->ioc, bdrv_get_aio_context(bs));
 
-    if (do_free) {
-        nbd_free_connect_thread(thr);
-        s->connect_thread = NULL;
-    }
+    /* successfully connected */
+    s->state = NBD_CLIENT_CONNECTED;
+    qemu_co_queue_restart_all(&s->free_sema);
 
-    if (wake) {
-        aio_co_wake(s->connection_co);
-    }
+    return 0;
 }
 
+/* called under s->send_mutex */
 static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
 {
-    int ret;
-    Error *local_err = NULL;
-
-    if (!nbd_client_connecting(s)) {
-        return;
-    }
-
-    /* Wait for completion of all in-flight requests */
-
-    qemu_co_mutex_lock(&s->send_mutex);
-
-    while (s->in_flight > 0) {
-        qemu_co_mutex_unlock(&s->send_mutex);
-        nbd_recv_coroutines_wake_all(s);
-        s->wait_in_flight = true;
-        qemu_coroutine_yield();
-        s->wait_in_flight = false;
-        qemu_co_mutex_lock(&s->send_mutex);
-    }
-
-    qemu_co_mutex_unlock(&s->send_mutex);
+    assert(nbd_client_connecting(s));
+    assert(s->in_flight == 0);
 
-    if (!nbd_client_connecting(s)) {
-        return;
+    if (nbd_client_connecting_wait(s) && s->reconnect_delay &&
+        !s->reconnect_delay_timer)
+    {
+        /*
+         * It's first reconnect attempt after switching to
+         * NBD_CLIENT_CONNECTING_WAIT
+         */
+        reconnect_delay_timer_init(s,
+            qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+            s->reconnect_delay * NANOSECONDS_PER_SECOND);
     }
 
     /*
@@ -624,176 +348,80 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
         qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
         yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
                                  nbd_yank, s->bs);
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
         object_unref(OBJECT(s->ioc));
         s->ioc = NULL;
     }
 
-    if (nbd_co_establish_connection(s->bs, &local_err) < 0) {
-        ret = -ECONNREFUSED;
-        goto out;
-    }
-
-    bdrv_dec_in_flight(s->bs);
-
-    ret = nbd_client_handshake(s->bs, &local_err);
-
-    if (s->drained) {
-        s->wait_drained_end = true;
-        while (s->drained) {
-            /*
-             * We may be entered once from nbd_client_attach_aio_context_bh
-             * and then from nbd_client_co_drain_end. So here is a loop.
-             */
-            qemu_coroutine_yield();
-        }
-    }
-    bdrv_inc_in_flight(s->bs);
-
-out:
-    s->connect_status = ret;
-    error_free(s->connect_err);
-    s->connect_err = NULL;
-    error_propagate(&s->connect_err, local_err);
-
-    if (ret >= 0) {
-        /* successfully connected */
-        s->state = NBD_CLIENT_CONNECTED;
-        qemu_co_queue_restart_all(&s->free_sema);
-    }
+    nbd_co_do_establish_connection(s->bs, NULL);
 }
 
-static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
+static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
 {
-    uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
-    uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;
+    int ret;
+    uint64_t ind = HANDLE_TO_INDEX(s, handle), ind2;
+    QEMU_LOCK_GUARD(&s->receive_mutex);
 
-    if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTING_WAIT) {
-        reconnect_delay_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
-                                   s->reconnect_delay * NANOSECONDS_PER_SECOND);
-    }
-
-    nbd_reconnect_attempt(s);
-
-    while (nbd_client_connecting(s)) {
-        if (s->drained) {
-            bdrv_dec_in_flight(s->bs);
-            s->wait_drained_end = true;
-            while (s->drained) {
-                /*
-                 * We may be entered once from nbd_client_attach_aio_context_bh
-                 * and then from nbd_client_co_drain_end. So here is a loop.
-                 */
-                qemu_coroutine_yield();
-            }
-            bdrv_inc_in_flight(s->bs);
-        } else {
-            qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
-                                      &s->connection_co_sleep_ns_state);
-            if (s->drained) {
-                continue;
-            }
-            if (timeout < max_timeout) {
-                timeout *= 2;
-            }
+    while (true) {
+        if (s->reply.handle == handle) {
+            /* We are done */
+            return 0;
         }
 
-        nbd_reconnect_attempt(s);
-    }
-
-    reconnect_delay_timer_del(s);
-}
+        if (!nbd_client_connected(s)) {
+            return -EIO;
+        }
 
-static coroutine_fn void nbd_connection_entry(void *opaque)
-{
-    BDRVNBDState *s = opaque;
-    uint64_t i;
-    int ret = 0;
-    Error *local_err = NULL;
+        if (s->reply.handle != 0) {
+            /*
+             * Some other request is being handled now. It should already be
+             * woken by whoever set s->reply.handle (or never wait in this
+             * yield). So, we should not wake it here.
+             */
+            ind2 = HANDLE_TO_INDEX(s, s->reply.handle);
+            assert(!s->requests[ind2].receiving);
 
-    while (qatomic_load_acquire(&s->state) != NBD_CLIENT_QUIT) {
-        /*
-         * The NBD client can only really be considered idle when it has
-         * yielded from qio_channel_readv_all_eof(), waiting for data. This is
-         * the point where the additional scheduled coroutine entry happens
-         * after nbd_client_attach_aio_context().
-         *
-         * Therefore we keep an additional in_flight reference all the time and
-         * only drop it temporarily here.
-         */
+            s->requests[ind].receiving = true;
+            qemu_co_mutex_unlock(&s->receive_mutex);
 
-        if (nbd_client_connecting(s)) {
-            nbd_co_reconnect_loop(s);
-        }
+            qemu_coroutine_yield();
+            /*
+             * We may be woken for 3 reasons:
+             * 1. From this function, executing in parallel coroutine, when our
+             *    handle is received.
+             * 2. From nbd_channel_error(), when connection is lost.
+             * 3. From nbd_co_receive_one_chunk(), when previous request is
+             *    finished and s->reply.handle set to 0.
+             * Anyway, it's OK to lock the mutex and go to the next iteration.
+             */
 
-        if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) {
+            qemu_co_mutex_lock(&s->receive_mutex);
+            assert(!s->requests[ind].receiving);
             continue;
         }
 
+        /* We are under mutex and handle is 0. We have to do the dirty work. */
         assert(s->reply.handle == 0);
-        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
-
-        if (local_err) {
-            trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
-            error_free(local_err);
-            local_err = NULL;
-        }
+        ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
         if (ret <= 0) {
-            nbd_channel_error(s, ret ? ret : -EIO);
-            continue;
+            ret = ret ? ret : -EIO;
+            nbd_channel_error(s, ret);
+            return ret;
         }
-
-        /*
-         * There's no need for a mutex on the receive side, because the
-         * handler acts as a synchronization point and ensures that only
-         * one coroutine is called until the reply finishes.
-         */
-        i = HANDLE_TO_INDEX(s, s->reply.handle);
-        if (i >= MAX_NBD_REQUESTS ||
-            !s->requests[i].coroutine ||
-            !s->requests[i].receiving ||
-            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
-        {
+        if (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply) {
             nbd_channel_error(s, -EINVAL);
-            continue;
+            return -EINVAL;
         }
-
-        /*
-         * We're woken up again by the request itself.  Note that there
-         * is no race between yielding and reentering connection_co.  This
-         * is because:
-         *
-         * - if the request runs on the same AioContext, it is only
-         *   entered after we yield
-         *
-         * - if the request runs on a different AioContext, reentering
-         *   connection_co happens through a bottom half, which can only
-         *   run after we yield.
-         */
-        aio_co_wake(s->requests[i].coroutine);
-        qemu_coroutine_yield();
-    }
-
-    qemu_co_queue_restart_all(&s->free_sema);
-    nbd_recv_coroutines_wake_all(s);
-    bdrv_dec_in_flight(s->bs);
-
-    s->connection_co = NULL;
-    if (s->ioc) {
-        qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
-        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(s->bs->node_name),
-                                 nbd_yank, s->bs);
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
-        object_unref(OBJECT(s->ioc));
-        s->ioc = NULL;
-    }
-
-    if (s->teardown_co) {
-        aio_co_wake(s->teardown_co);
+        if (s->reply.handle == handle) {
+            /* We are done */
+            return 0;
+        }
+        ind2 = HANDLE_TO_INDEX(s, s->reply.handle);
+        if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].reply_possible) {
+            nbd_channel_error(s, -EINVAL);
+            return -EINVAL;
+        }
+        nbd_recv_coroutine_wake_one(&s->requests[ind2]);
     }
-    aio_wait_kick();
 }
 
 static int nbd_co_send_request(BlockDriverState *bs,
@@ -804,11 +432,18 @@ static int nbd_co_send_request(BlockDriverState *bs,
     int rc, i = -1;
 
     qemu_co_mutex_lock(&s->send_mutex);
-    while (s->in_flight == MAX_NBD_REQUESTS || nbd_client_connecting_wait(s)) {
+
+    while (s->in_flight == MAX_NBD_REQUESTS ||
+           (!nbd_client_connected(s) && s->in_flight > 0))
+    {
         qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
     }
 
-    if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) {
+    if (nbd_client_connecting(s)) {
+        nbd_reconnect_attempt(s);
+    }
+
+    if (!nbd_client_connected(s)) {
         rc = -EIO;
         goto err;
     }
@@ -827,6 +462,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
     s->requests[i].coroutine = qemu_coroutine_self();
     s->requests[i].offset = request->from;
     s->requests[i].receiving = false;
+    s->requests[i].reply_possible = true;
 
     request->handle = INDEX_TO_HANDLE(s, i);
 
@@ -835,8 +471,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (qatomic_load_acquire(&s->state) == NBD_CLIENT_CONNECTED &&
-            rc >= 0) {
+        if (nbd_client_connected(s) && rc >= 0) {
             if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
                                        NULL) < 0) {
                 rc = -EIO;
@@ -855,10 +490,6 @@ static int nbd_co_send_request(BlockDriverState *bs,
         if (i != -1) {
             s->requests[i].coroutine = NULL;
             s->in_flight--;
-        }
-        if (s->in_flight == 0 && s->wait_in_flight) {
-            aio_co_wake(s->connection_co);
-        } else {
             qemu_co_queue_next(&s->free_sema);
         }
     }
@@ -1157,11 +788,8 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
     }
     *request_ret = 0;
 
-    /* Wait until we're woken up by nbd_connection_entry.  */
-    s->requests[i].receiving = true;
-    qemu_coroutine_yield();
-    s->requests[i].receiving = false;
-    if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) {
+    nbd_receive_replies(s, handle);
+    if (!nbd_client_connected(s)) {
         error_setg(errp, "Connection closed");
         return -EIO;
     }
@@ -1253,14 +881,7 @@ static coroutine_fn int nbd_co_receive_one_chunk(
     }
     s->reply.handle = 0;
 
-    if (s->connection_co && !s->wait_in_flight) {
-        /*
-         * We must check s->wait_in_flight, because we may entered by
-         * nbd_recv_coroutines_wake_all(), in this case we should not
-         * wake connection_co here, it will woken by last request.
-         */
-        aio_co_wake(s->connection_co);
-    }
+    nbd_recv_coroutines_wake(s, false);
 
     return ret;
 }
@@ -1320,7 +941,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     NBDReply local_reply;
     NBDStructuredReplyChunk *chunk;
     Error *local_err = NULL;
-    if (qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) {
+    if (!nbd_client_connected(s)) {
         error_setg(&local_err, "Connection closed");
         nbd_iter_channel_error(iter, -EIO, &local_err);
         goto break_loop;
@@ -1345,8 +966,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
     }
 
     /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(reply) ||
-        qatomic_load_acquire(&s->state) != NBD_CLIENT_CONNECTED) {
+    if (nbd_reply_is_simple(reply) || !nbd_client_connected(s)) {
         goto break_loop;
     }
 
@@ -1372,11 +992,7 @@ static bool nbd_reply_chunk_iter_receive(BDRVNBDState *s,
 
     qemu_co_mutex_lock(&s->send_mutex);
     s->in_flight--;
-    if (s->in_flight == 0 && s->wait_in_flight) {
-        aio_co_wake(s->connection_co);
-    } else {
-        qemu_co_queue_next(&s->free_sema);
-    }
+    qemu_co_queue_next(&s->free_sema);
     qemu_co_mutex_unlock(&s->send_mutex);
 
     return false;
@@ -1545,8 +1161,9 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
     return ret ? ret : request_ret;
 }
 
-static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int nbd_client_co_preadv(BlockDriverState *bs, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
     int ret, request_ret;
     Error *local_err = NULL;
@@ -1603,8 +1220,9 @@ static int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
     return ret ? ret : request_ret;
 }
 
-static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                 uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int nbd_client_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                 int64_t bytes, QEMUIOVector *qiov,
+                                 BdrvRequestFlags flags)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
@@ -1628,15 +1246,17 @@ static int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
 }
 
 static int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                       int bytes, BdrvRequestFlags flags)
+                                       int64_t bytes, BdrvRequestFlags flags)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_WRITE_ZEROES,
         .from = offset,
-        .len = bytes,
+        .len = bytes,  /* .len is uint32_t actually */
     };
 
+    assert(bytes <= UINT32_MAX); /* rely on max_pwrite_zeroes */
+
     assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (!(s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
         return -ENOTSUP;
@@ -1676,15 +1296,17 @@ static int nbd_client_co_flush(BlockDriverState *bs)
 }
 
 static int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                  int bytes)
+                                  int64_t bytes)
 {
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
     NBDRequest request = {
         .type = NBD_CMD_TRIM,
         .from = offset,
-        .len = bytes,
+        .len = bytes, /* len is uint32_t */
     };
 
+    assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */
+
     assert(!(s->info.flags & NBD_FLAG_READ_ONLY));
     if (!(s->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
         return 0;
@@ -1784,7 +1406,7 @@ static void nbd_yank(void *opaque)
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
     qatomic_store_release(&s->state, NBD_CLIENT_QUIT);
-    qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+    qio_channel_shutdown(QIO_CHANNEL(s->ioc), QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
 }
 
 static void nbd_client_close(BlockDriverState *bs)
@@ -1799,111 +1421,6 @@ static void nbd_client_close(BlockDriverState *bs)
     nbd_teardown_connection(bs);
 }
 
-static int nbd_establish_connection(BlockDriverState *bs,
-                                    SocketAddress *saddr,
-                                    Error **errp)
-{
-    ERRP_GUARD();
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-
-    s->sioc = qio_channel_socket_new();
-    qio_channel_set_name(QIO_CHANNEL(s->sioc), "nbd-client");
-
-    qio_channel_socket_connect_sync(s->sioc, saddr, errp);
-    if (*errp) {
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
-        return -1;
-    }
-
-    yank_register_function(BLOCKDEV_YANK_INSTANCE(bs->node_name), nbd_yank, bs);
-    qio_channel_set_delay(QIO_CHANNEL(s->sioc), false);
-
-    return 0;
-}
-
-/* nbd_client_handshake takes ownership on s->sioc. On failure it's unref'ed. */
-static int nbd_client_handshake(BlockDriverState *bs, Error **errp)
-{
-    BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    int ret;
-
-    trace_nbd_client_handshake(s->export);
-    qio_channel_set_blocking(QIO_CHANNEL(s->sioc), false, NULL);
-    qio_channel_attach_aio_context(QIO_CHANNEL(s->sioc), aio_context);
-
-    s->info.request_sizes = true;
-    s->info.structured_reply = true;
-    s->info.base_allocation = true;
-    s->info.x_dirty_bitmap = g_strdup(s->x_dirty_bitmap);
-    s->info.name = g_strdup(s->export ?: "");
-    ret = nbd_receive_negotiate(aio_context, QIO_CHANNEL(s->sioc), s->tlscreds,
-                                s->hostname, &s->ioc, &s->info, errp);
-    g_free(s->info.x_dirty_bitmap);
-    g_free(s->info.name);
-    if (ret < 0) {
-        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(bs->node_name),
-                                 nbd_yank, bs);
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
-        return ret;
-    }
-    if (s->x_dirty_bitmap) {
-        if (!s->info.base_allocation) {
-            error_setg(errp, "requested x-dirty-bitmap %s not found",
-                       s->x_dirty_bitmap);
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
-            s->alloc_depth = true;
-        }
-    }
-    if (s->info.flags & NBD_FLAG_READ_ONLY) {
-        ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    if (s->info.flags & NBD_FLAG_SEND_FUA) {
-        bs->supported_write_flags = BDRV_REQ_FUA;
-        bs->supported_zero_flags |= BDRV_REQ_FUA;
-    }
-    if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
-        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
-        if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) {
-            bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK;
-        }
-    }
-
-    if (!s->ioc) {
-        s->ioc = QIO_CHANNEL(s->sioc);
-        object_ref(OBJECT(s->ioc));
-    }
-
-    trace_nbd_client_handshake_success(s->export);
-
-    return 0;
-
- fail:
-    /*
-     * We have connected, but must fail for other reasons.
-     * Send NBD_CMD_DISC as a courtesy to the server.
-     */
-    {
-        NBDRequest request = { .type = NBD_CMD_DISC };
-
-        nbd_send_request(s->ioc ?: QIO_CHANNEL(s->sioc), &request);
-
-        yank_unregister_function(BLOCKDEV_YANK_INSTANCE(bs->node_name),
-                                 nbd_yank, bs);
-        object_unref(OBJECT(s->sioc));
-        s->sioc = NULL;
-
-        return ret;
-    }
-}
 
 /*
  * Parse nbd_open options
@@ -2137,6 +1654,12 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options,
         goto done;
     }
 
+    if (socket_address_parse_named_fd(saddr, errp) < 0) {
+        qapi_free_SocketAddress(saddr);
+        saddr = NULL;
+        goto done;
+    }
+
 done:
     qobject_unref(addr);
     visit_free(iv);
@@ -2163,9 +1686,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
         return NULL;
     }
 
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a client endpoint");
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT,
+                                          errp)) {
         return NULL;
     }
     object_ref(obj);
@@ -2278,9 +1801,6 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options,
     ret = 0;
 
  error:
-    if (ret < 0) {
-        nbd_clear_bdrvstate(s);
-    }
     qemu_opts_del(opts);
     return ret;
 }
@@ -2291,44 +1811,37 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-    ret = nbd_process_options(bs, options, errp);
-    if (ret < 0) {
-        return ret;
-    }
-
     s->bs = bs;
     qemu_co_mutex_init(&s->send_mutex);
     qemu_co_queue_init(&s->free_sema);
+    qemu_co_mutex_init(&s->receive_mutex);
 
     if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) {
         return -EEXIST;
     }
 
-    /*
-     * establish TCP connection, return error if it fails
-     * TODO: Configurable retry-until-timeout behaviour.
-     */
-    if (nbd_establish_connection(bs, s->saddr, errp) < 0) {
-        yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
-        return -ECONNREFUSED;
+    ret = nbd_process_options(bs, options, errp);
+    if (ret < 0) {
+        goto fail;
     }
 
-    ret = nbd_client_handshake(bs, errp);
+    s->conn = nbd_client_connection_new(s->saddr, true, s->export,
+                                        s->x_dirty_bitmap, s->tlscreds);
+
+    /* TODO: Configurable retry-until-timeout behaviour. */
+    s->state = NBD_CLIENT_CONNECTING_WAIT;
+    ret = nbd_do_establish_connection(bs, errp);
     if (ret < 0) {
-        yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
-        nbd_clear_bdrvstate(s);
-        return ret;
+        goto fail;
     }
-    /* successfully connected */
-    s->state = NBD_CLIENT_CONNECTED;
-
-    nbd_init_connect_thread(s);
 
-    s->connection_co = qemu_coroutine_create(nbd_connection_entry, s);
-    bdrv_inc_in_flight(bs);
-    aio_co_schedule(bdrv_get_aio_context(bs), s->connection_co);
+    nbd_client_connection_enable_retry(s->conn);
 
     return 0;
+
+fail:
+    nbd_clear_bdrvstate(bs);
+    return ret;
 }
 
 static int nbd_co_flush(BlockDriverState *bs)
@@ -2372,11 +1885,8 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 
 static void nbd_close(BlockDriverState *bs)
 {
-    BDRVNBDState *s = bs->opaque;
-
     nbd_client_close(bs);
-    yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
-    nbd_clear_bdrvstate(s);
+    nbd_clear_bdrvstate(bs);
 }
 
 /*
@@ -2479,6 +1989,8 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
         s->state = NBD_CLIENT_CONNECTING_NOWAIT;
         qemu_co_queue_restart_all(&s->free_sema);
     }
+
+    nbd_co_establish_connection_cancel(s->conn);
 }
 
 static BlockDriver bdrv_nbd = {
@@ -2499,10 +2011,6 @@ static BlockDriver bdrv_nbd = {
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
@@ -2528,10 +2036,6 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
@@ -2557,10 +2061,6 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_refresh_limits        = nbd_refresh_limits,
     .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
-    .bdrv_co_drain_begin        = nbd_client_co_drain_begin,
-    .bdrv_co_drain_end          = nbd_client_co_drain_end,
     .bdrv_refresh_filename      = nbd_refresh_filename,
     .bdrv_co_block_status       = nbd_client_co_block_status,
     .bdrv_dirname               = nbd_dirname,
diff --git a/block/nfs.c b/block/nfs.c
index 8c1968bb415..577aea1d222 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -39,7 +39,6 @@
 #include "qemu/option.h"
 #include "qemu/uri.h"
 #include "qemu/cutils.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/replay.h"
 #include "qapi/qapi-visit-block-core.h"
 #include "qapi/qmp/qdict.h"
@@ -148,9 +147,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
     if (qp) {
         query_params_free(qp);
     }
-    if (uri) {
-        uri_free(uri);
-    }
+    uri_free(uri);
     return ret;
 }
 
@@ -265,9 +262,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                                      nfs_co_generic_bh_cb, task);
 }
 
-static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *iov,
-                                      int flags)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *iov,
+                                      BdrvRequestFlags flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
@@ -299,9 +296,9 @@ static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
-static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *iov,
-                                       int flags)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *iov,
+                                       BdrvRequestFlags flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
diff --git a/block/null.c b/block/null.c
index cc9b1d4ea72..75f7d0db40c 100644
--- a/block/null.c
+++ b/block/null.c
@@ -116,8 +116,9 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
 }
 
 static coroutine_fn int null_co_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags)
+                                       int64_t offset, int64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     BDRVNullState *s = bs->opaque;
 
@@ -129,8 +130,9 @@ static coroutine_fn int null_co_preadv(BlockDriverState *bs,
 }
 
 static coroutine_fn int null_co_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags)
+                                        int64_t offset, int64_t bytes,
+                                        QEMUIOVector *qiov,
+                                        BdrvRequestFlags flags)
 {
     return null_co_common(bs);
 }
@@ -187,8 +189,8 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
 }
 
 static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
-                                   uint64_t offset, uint64_t bytes,
-                                   QEMUIOVector *qiov, int flags,
+                                   int64_t offset, int64_t bytes,
+                                   QEMUIOVector *qiov, BdrvRequestFlags flags,
                                    BlockCompletionFunc *cb,
                                    void *opaque)
 {
@@ -202,8 +204,8 @@ static BlockAIOCB *null_aio_preadv(BlockDriverState *bs,
 }
 
 static BlockAIOCB *null_aio_pwritev(BlockDriverState *bs,
-                                    uint64_t offset, uint64_t bytes,
-                                    QEMUIOVector *qiov, int flags,
+                                    int64_t offset, int64_t bytes,
+                                    QEMUIOVector *qiov, BdrvRequestFlags flags,
                                     BlockCompletionFunc *cb,
                                     void *opaque)
 {
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..e4f336d79c2 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -176,23 +176,27 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
         return false;
     }
     memset(q->queue, 0, bytes);
-    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
+    r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp);
     if (r) {
-        error_setg(errp, "Cannot map queue");
-        return false;
+        error_prepend(errp, "Cannot map queue: ");
     }
-    return true;
+    return r == 0;
+}
+
+static void nvme_free_queue(NVMeQueue *q)
+{
+    qemu_vfree(q->queue);
 }
 
 static void nvme_free_queue_pair(NVMeQueuePair *q)
 {
-    trace_nvme_free_queue_pair(q->index, q);
+    trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
     if (q->completion_bh) {
         qemu_bh_delete(q->completion_bh);
     }
+    nvme_free_queue(&q->sq);
+    nvme_free_queue(&q->cq);
     qemu_vfree(q->prp_list_pages);
-    qemu_vfree(q->sq.queue);
-    qemu_vfree(q->cq.queue);
     qemu_mutex_destroy(&q->lock);
     g_free(q);
 }
@@ -220,6 +224,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
 
     q = g_try_new0(NVMeQueuePair, 1);
     if (!q) {
+        error_setg(errp, "Cannot allocate queue pair");
         return NULL;
     }
     trace_nvme_create_queue_pair(idx, q, size, aio_context,
@@ -228,6 +233,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
                           qemu_real_host_page_size);
     q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
     if (!q->prp_list_pages) {
+        error_setg(errp, "Cannot allocate PRP page list");
         goto fail;
     }
     memset(q->prp_list_pages, 0, bytes);
@@ -237,8 +243,9 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
     qemu_co_queue_init(&q->free_req_queue);
     q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
     r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
-                          false, &prp_list_iova);
+                          false, &prp_list_iova, errp);
     if (r) {
+        error_prepend(errp, "Cannot map buffer for DMA: ");
         goto fail;
     }
     q->free_req_head = -1;
@@ -512,10 +519,10 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
 {
     BDRVNVMeState *s = bs->opaque;
     bool ret = false;
-    union {
+    QEMU_AUTO_VFREE union {
         NvmeIdCtrl ctrl;
         NvmeIdNs ns;
-    } *id;
+    } *id = NULL;
     NvmeLBAF *lbaf;
     uint16_t oncs;
     int r;
@@ -531,9 +538,9 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
         error_setg(errp, "Cannot allocate buffer for identify response");
         goto out;
     }
-    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
+    r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp);
     if (r) {
-        error_setg(errp, "Cannot map buffer for DMA");
+        error_prepend(errp, "Cannot map buffer for DMA: ");
         goto out;
     }
 
@@ -593,7 +600,6 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     s->blkshift = lbaf->ds;
 out:
     qemu_vfio_dma_unmap(s->vfio, id);
-    qemu_vfree(id);
 
     return ret;
 }
@@ -1017,6 +1023,7 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
     uint64_t *pagelist = req->prp_list_page;
     int i, j, r;
     int entries = 0;
+    Error *local_err = NULL, **errp = NULL;
 
     assert(qiov->size);
     assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
@@ -1029,8 +1036,30 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
 try_map:
         r = qemu_vfio_dma_map(s->vfio,
                               qiov->iov[i].iov_base,
-                              len, true, &iova);
+                              len, true, &iova, errp);
+        if (r == -ENOSPC) {
+            /*
+             * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
+             * ioctl returns -ENOSPC to signal the user exhausted the DMA
+             * mappings available for a container since Linux kernel commit
+             * 492855939bdb ("vfio/type1: Limit DMA mappings per container",
+             * April 2019, see CVE-2019-3882).
+             *
+             * This block driver already handles this error path by checking
+             * for the -ENOMEM error, so we directly replace -ENOSPC by
+             * -ENOMEM. Beside, -ENOSPC has a specific meaning for blockdev
+             * coroutines: it triggers BLOCKDEV_ON_ERROR_ENOSPC and
+             * BLOCK_ERROR_ACTION_STOP which stops the VM, asking the operator
+             * to add more storage to the blockdev. Not something we can do
+             * easily with an IOMMU :)
+             */
+            r = -ENOMEM;
+        }
         if (r == -ENOMEM && retry) {
+            /*
+             * We exhausted the DMA mappings available for our container:
+             * recycle the volatile IOVA mappings.
+             */
             retry = false;
             trace_nvme_dma_flush_queue_wait(s);
             if (s->dma_map_count) {
@@ -1042,6 +1071,8 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
                     goto fail;
                 }
             }
+            errp = &local_err;
+
             goto try_map;
         }
         if (r) {
@@ -1085,6 +1116,9 @@ static coroutine_fn int nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
      * because they are already mapped before calling this function; for
      * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by
      * calling qemu_vfio_dma_reset_temporary when necessary. */
+    if (local_err) {
+        error_reportf_err(local_err, "Cannot map buffer for DMA: ");
+    }
     return r;
 }
 
@@ -1189,7 +1223,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 {
     BDRVNVMeState *s = bs->opaque;
     int r;
-    uint8_t *buf = NULL;
+    QEMU_AUTO_VFREE uint8_t *buf = NULL;
     QEMUIOVector local_qiov;
     size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
     assert(QEMU_IS_ALIGNED(offset, s->page_size));
@@ -1216,20 +1250,21 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     if (!r && !is_write) {
         qemu_iovec_from_buf(qiov, 0, buf, bytes);
     }
-    qemu_vfree(buf);
     return r;
 }
 
 static coroutine_fn int nvme_co_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags)
+                                       int64_t offset, int64_t bytes,
+                                       QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     return nvme_co_prw(bs, offset, bytes, qiov, false, flags);
 }
 
 static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags)
+                                        int64_t offset, int64_t bytes,
+                                        QEMUIOVector *qiov,
+                                        BdrvRequestFlags flags)
 {
     return nvme_co_prw(bs, offset, bytes, qiov, true, flags);
 }
@@ -1264,19 +1299,29 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
 
 static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
                                               int64_t offset,
-                                              int bytes,
+                                              int64_t bytes,
                                               BdrvRequestFlags flags)
 {
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
     NVMeRequest *req;
-
-    uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
+    uint32_t cdw12;
 
     if (!s->supports_write_zeroes) {
         return -ENOTSUP;
     }
 
+    if (bytes == 0) {
+        return 0;
+    }
+
+    cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
+    /*
+     * We should not lose information. pwrite_zeroes_alignment and
+     * max_pwrite_zeroes guarantees it.
+     */
+    assert(((cdw12 + 1) << s->blkshift) == bytes);
+
     NvmeCmd cmd = {
         .opcode = NVME_CMD_WRITE_ZEROES,
         .nsid = cpu_to_le32(s->nsid),
@@ -1318,12 +1363,12 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
 
 static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
                                          int64_t offset,
-                                         int bytes)
+                                         int64_t bytes)
 {
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
     NVMeRequest *req;
-    NvmeDsmRange *buf;
+    QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL;
     QEMUIOVector local_qiov;
     int ret;
 
@@ -1345,6 +1390,14 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
 
     assert(s->queue_count > 1);
 
+    /*
+     * Filling the @buf requires @offset and @bytes to satisfy restrictions
+     * defined in nvme_refresh_limits().
+     */
+    assert(QEMU_IS_ALIGNED(bytes, 1UL << s->blkshift));
+    assert(QEMU_IS_ALIGNED(offset, 1UL << s->blkshift));
+    assert((bytes >> s->blkshift) <= UINT32_MAX);
+
     buf = qemu_try_memalign(s->page_size, s->page_size);
     if (!buf) {
         return -ENOMEM;
@@ -1390,7 +1443,6 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
     trace_nvme_dsm_done(s, offset, bytes, ret);
 out:
     qemu_iovec_destroy(&local_qiov);
-    qemu_vfree(buf);
     return ret;
 
 }
@@ -1440,6 +1492,18 @@ static void nvme_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.opt_mem_alignment = s->page_size;
     bs->bl.request_alignment = s->page_size;
     bs->bl.max_transfer = s->max_transfer;
+
+    /*
+     * Look at nvme_co_pwrite_zeroes: after shift and decrement we should get
+     * at most 0xFFFF
+     */
+    bs->bl.max_pwrite_zeroes = 1ULL << (s->blkshift + 16);
+    bs->bl.pwrite_zeroes_alignment = MAX(bs->bl.request_alignment,
+                                         1UL << s->blkshift);
+
+    bs->bl.max_pdiscard = (uint64_t)UINT32_MAX << s->blkshift;
+    bs->bl.pdiscard_alignment = MAX(bs->bl.request_alignment,
+                                    1UL << s->blkshift);
 }
 
 static void nvme_detach_aio_context(BlockDriverState *bs)
@@ -1499,14 +1563,15 @@ static void nvme_aio_unplug(BlockDriverState *bs)
 static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
 {
     int ret;
+    Error *local_err = NULL;
     BDRVNVMeState *s = bs->opaque;
 
-    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
+    ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
     if (ret) {
         /* FIXME: we may run out of IOVA addresses after repeated
          * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
          * doesn't reclaim addresses for fixed mappings. */
-        error_report("nvme_register_buf failed: %s", strerror(-ret));
+        error_reportf_err(local_err, "nvme_register_buf failed: ");
     }
 }
 
diff --git a/block/preallocate.c b/block/preallocate.c
index b6192063046..1d4233f7300 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -227,15 +227,15 @@ static void preallocate_reopen_abort(BDRVReopenState *state)
 }
 
 static coroutine_fn int preallocate_co_preadv_part(
-        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags)
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
 {
     return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
                                flags);
 }
 
 static int coroutine_fn preallocate_co_pdiscard(BlockDriverState *bs,
-                                               int64_t offset, int bytes)
+                                               int64_t offset, int64_t bytes)
 {
     return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
@@ -337,7 +337,7 @@ static bool coroutine_fn handle_write(BlockDriverState *bs, int64_t offset,
 }
 
 static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
-        int64_t offset, int bytes, BdrvRequestFlags flags)
+        int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     bool want_merge_zero =
         !(flags & ~(BDRV_REQ_ZERO_WRITE | BDRV_REQ_NO_FALLBACK));
@@ -349,11 +349,11 @@ static int coroutine_fn preallocate_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static coroutine_fn int preallocate_co_pwritev_part(BlockDriverState *bs,
-                                                    uint64_t offset,
-                                                    uint64_t bytes,
+                                                    int64_t offset,
+                                                    int64_t bytes,
                                                     QEMUIOVector *qiov,
                                                     size_t qiov_offset,
-                                                    int flags)
+                                                    BdrvRequestFlags flags)
 {
     handle_write(bs, offset, bytes, false);
 
diff --git a/block/progress_meter.c b/block/progress_meter.c
new file mode 100644
index 00000000000..aa2e60248c0
--- /dev/null
+++ b/block/progress_meter.c
@@ -0,0 +1,64 @@
+/*
+ * Helper functionality for some process progress tracking.
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012, 2018 Red Hat, Inc.
+ * Copyright (c) 2020 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu/progress_meter.h"
+
+void progress_init(ProgressMeter *pm)
+{
+    qemu_mutex_init(&pm->lock);
+}
+
+void progress_destroy(ProgressMeter *pm)
+{
+    qemu_mutex_destroy(&pm->lock);
+}
+
+void progress_get_snapshot(ProgressMeter *pm, uint64_t *current,
+                           uint64_t *total)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+
+    *current = pm->current;
+    *total = pm->total;
+}
+
+void progress_work_done(ProgressMeter *pm, uint64_t done)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->current += done;
+}
+
+void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->total = pm->current + remaining;
+}
+
+void progress_increase_remaining(ProgressMeter *pm, uint64_t delta)
+{
+    QEMU_LOCK_GUARD(&pm->lock);
+    pm->total += delta;
+}
diff --git a/block/qapi.c b/block/qapi.c
index 943e7b15ad2..cf557e3aea7 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -59,7 +59,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
 
     info = g_malloc0(sizeof(*info));
     info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
+    info->ro                     = bdrv_is_read_only(bs);
     info->drv                    = g_strdup(bs->drv->format_name);
     info->encrypted              = bs->encrypted;
 
@@ -663,10 +663,8 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
 
 void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
 {
-    char date_buf[128], clock_buf[128];
+    char clock_buf[128];
     char icount_buf[128] = {0};
-    struct tm tm;
-    time_t ti;
     int64_t secs;
     char *sizing = NULL;
 
@@ -674,10 +672,9 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
         qemu_printf("%-10s%-17s%8s%20s%13s%11s",
                     "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT");
     } else {
-        ti = sn->date_sec;
-        localtime_r(&ti, &tm);
-        strftime(date_buf, sizeof(date_buf),
-                 "%Y-%m-%d %H:%M:%S", &tm);
+        g_autoptr(GDateTime) date = g_date_time_new_from_unix_local(sn->date_sec);
+        g_autofree char *date_buf = g_date_time_format(date, "%Y-%m-%d %H:%M:%S");
+
         secs = sn->vm_clock_nsec / 1000000000;
         snprintf(clock_buf, sizeof(clock_buf),
                  "%02d:%02d:%02d.%03d",
diff --git a/block/qcow.c b/block/qcow.c
index f8919a44d19..c39940f33eb 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -617,9 +617,9 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error **errp)
     bs->bl.request_alignment = BDRV_SECTOR_SIZE;
 }
 
-static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
+static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -714,9 +714,9 @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
     return ret;
 }
 
-static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                        uint64_t bytes, QEMUIOVector *qiov,
-                                        int flags)
+static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                        int64_t bytes, QEMUIOVector *qiov,
+                                        BdrvRequestFlags flags)
 {
     BDRVQcowState *s = bs->opaque;
     int offset_in_cluster;
@@ -1047,8 +1047,8 @@ static int qcow_make_empty(BlockDriverState *bs)
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
 static coroutine_fn int
-qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
-                           uint64_t bytes, QEMUIOVector *qiov)
+qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
     z_stream strm;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index bd0597842f3..21884a1ab9a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -505,7 +505,20 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
         return -ENOMEDIUM;
     }
 
-    /* Call .bdrv_co_readv() directly instead of using the public block-layer
+    /*
+     * We never deal with requests that don't satisfy
+     * bdrv_check_qiov_request(), and aligning requests to clusters never
+     * breaks this condition. So, do some assertions before calling
+     * bs->drv->bdrv_co_preadv_part() which has int64_t arguments.
+     */
+    assert(src_cluster_offset <= INT64_MAX);
+    assert(src_cluster_offset + offset_in_cluster <= INT64_MAX);
+    /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */
+    assert((uint64_t)qiov->size <= INT64_MAX);
+    bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size,
+                            qiov, 0, &error_abort);
+    /*
+     * Call .bdrv_co_readv() directly instead of using the public block-layer
      * interface.  This avoids double I/O throttling and request tracking,
      * which can lead to deadlock when block layer copy-on-read is enabled.
      */
@@ -556,8 +569,7 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
  * offset needs to be aligned to a cluster boundary.
  *
  * If the cluster is unallocated then *host_offset will be 0.
- * If the cluster is compressed then *host_offset will contain the
- * complete compressed cluster descriptor.
+ * If the cluster is compressed then *host_offset will contain the l2 entry.
  *
  * On entry, *bytes is the maximum number of contiguous bytes starting at
  * offset that we are interested in.
@@ -660,7 +672,7 @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
             ret = -EIO;
             goto fail;
         }
-        *host_offset = l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK;
+        *host_offset = l2_entry;
         break;
     case QCOW2_SUBCLUSTER_ZERO_PLAIN:
     case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN:
@@ -1400,29 +1412,47 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
 
         if (end <= old_start || start >= old_end) {
             /* No intersection */
+            continue;
+        }
+
+        if (old_alloc->keep_old_clusters &&
+            (end <= l2meta_cow_start(old_alloc) ||
+             start >= l2meta_cow_end(old_alloc)))
+        {
+            /*
+             * Clusters intersect but COW areas don't. And cluster itself is
+             * already allocated. So, there is no actual conflict.
+             */
+            continue;
+        }
+
+        /* Conflict */
+
+        if (start < old_start) {
+            /* Stop at the start of a running allocation */
+            bytes = old_start - start;
         } else {
-            if (start < old_start) {
-                /* Stop at the start of a running allocation */
-                bytes = old_start - start;
-            } else {
-                bytes = 0;
-            }
+            bytes = 0;
+        }
 
-            /* Stop if already an l2meta exists. After yielding, it wouldn't
-             * be valid any more, so we'd have to clean up the old L2Metas
-             * and deal with requests depending on them before starting to
-             * gather new ones. Not worth the trouble. */
-            if (bytes == 0 && *m) {
-                *cur_bytes = 0;
-                return 0;
-            }
+        /*
+         * Stop if an l2meta already exists. After yielding, it wouldn't
+         * be valid any more, so we'd have to clean up the old L2Metas
+         * and deal with requests depending on them before starting to
+         * gather new ones. Not worth the trouble.
+         */
+        if (bytes == 0 && *m) {
+            *cur_bytes = 0;
+            return 0;
+        }
 
-            if (bytes == 0) {
-                /* Wait for the dependency to complete. We need to recheck
-                 * the free/allocated clusters when we continue. */
-                qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
-                return -EAGAIN;
-            }
+        if (bytes == 0) {
+            /*
+             * Wait for the dependency to complete. We need to recheck
+             * the free/allocated clusters when we continue.
+             */
+            qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
+            return -EAGAIN;
         }
     }
 
@@ -2463,3 +2493,18 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
     g_free(l1_table);
     return ret;
 }
+
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
+                                     uint64_t *coffset, int *csize)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int nb_csectors;
+
+    assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED);
+
+    *coffset = l2_entry & s->cluster_offset_mask;
+
+    nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1;
+    *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
+        (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1));
+}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 8e649b008e8..46145722527 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1177,11 +1177,11 @@ void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry,
     switch (ctype) {
     case QCOW2_CLUSTER_COMPRESSED:
         {
-            int64_t offset = (l2_entry & s->cluster_offset_mask)
-                & QCOW2_COMPRESSED_SECTOR_MASK;
-            int size = QCOW2_COMPRESSED_SECTOR_SIZE *
-                (((l2_entry >> s->csize_shift) & s->csize_mask) + 1);
-            qcow2_free_clusters(bs, offset, size, type);
+            uint64_t coffset;
+            int csize;
+
+            qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
+            qcow2_free_clusters(bs, coffset, csize, type);
         }
         break;
     case QCOW2_CLUSTER_NORMAL:
@@ -1247,7 +1247,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     bool l1_allocated = false;
     int64_t old_entry, old_l2_offset;
     unsigned slice, slice_size2, n_slices;
-    int i, j, l1_modified = 0, nb_csectors;
+    int i, j, l1_modified = 0;
     int ret;
 
     assert(addend >= -1 && addend <= 1);
@@ -1318,14 +1318,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
                     switch (qcow2_get_cluster_type(bs, entry)) {
                     case QCOW2_CLUSTER_COMPRESSED:
-                        nb_csectors = ((entry >> s->csize_shift) &
-                                       s->csize_mask) + 1;
                         if (addend != 0) {
-                            uint64_t coffset = (entry & s->cluster_offset_mask)
-                                & QCOW2_COMPRESSED_SECTOR_MASK;
+                            uint64_t coffset;
+                            int csize;
+
+                            qcow2_parse_compressed_l2_entry(bs, entry,
+                                                            &coffset, &csize);
                             ret = update_refcount(
-                                bs, coffset,
-                                nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE,
+                                bs, coffset, csize,
                                 abs(addend), addend < 0,
                                 QCOW2_DISCARD_SNAPSHOT);
                             if (ret < 0) {
@@ -1587,6 +1587,66 @@ enum {
     CHECK_FRAG_INFO = 0x2,      /* update BlockFragInfo counters */
 };
 
+/*
+ * Fix L2 entry by making it QCOW2_CLUSTER_ZERO_PLAIN (or making all its present
+ * subclusters QCOW2_SUBCLUSTER_ZERO_PLAIN).
+ *
+ * This function decrements res->corruptions on success, so the caller is
+ * responsible to increment res->corruptions prior to the call.
+ *
+ * On failure in-memory @l2_table may be modified.
+ */
+static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
+                                uint64_t l2_offset,
+                                uint64_t *l2_table, int l2_index, bool active,
+                                bool *metadata_overlap)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int ret;
+    int idx = l2_index * (l2_entry_size(s) / sizeof(uint64_t));
+    uint64_t l2e_offset = l2_offset + (uint64_t)l2_index * l2_entry_size(s);
+    int ign = active ? QCOW2_OL_ACTIVE_L2 : QCOW2_OL_INACTIVE_L2;
+
+    if (has_subclusters(s)) {
+        uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, l2_index);
+
+        /* Allocated subclusters become zero */
+        l2_bitmap |= l2_bitmap << 32;
+        l2_bitmap &= QCOW_L2_BITMAP_ALL_ZEROES;
+
+        set_l2_bitmap(s, l2_table, l2_index, l2_bitmap);
+        set_l2_entry(s, l2_table, l2_index, 0);
+    } else {
+        set_l2_entry(s, l2_table, l2_index, QCOW_OFLAG_ZERO);
+    }
+
+    ret = qcow2_pre_write_overlap_check(bs, ign, l2e_offset, l2_entry_size(s),
+                                        false);
+    if (metadata_overlap) {
+        *metadata_overlap = ret < 0;
+    }
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: Overlap check failed\n");
+        goto fail;
+    }
+
+    ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx],
+                           l2_entry_size(s));
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: Failed to overwrite L2 "
+                "table entry: %s\n", strerror(-ret));
+        goto fail;
+    }
+
+    res->corruptions--;
+    res->corruptions_fixed++;
+    return 0;
+
+fail:
+    res->check_errors++;
+    return ret;
+}
+
 /*
  * Increases the refcount in the given refcount table for the all clusters
  * referenced in the L2 table. While doing so, performs some checks on L2
@@ -1601,26 +1661,41 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                               int flags, BdrvCheckMode fix, bool active)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table, l2_entry;
+    uint64_t l2_entry, l2_bitmap;
     uint64_t next_contiguous_offset = 0;
-    int i, l2_size, nb_csectors, ret;
+    int i, ret;
+    size_t l2_size_bytes = s->l2_size * l2_entry_size(s);
+    g_autofree uint64_t *l2_table = g_malloc(l2_size_bytes);
+    bool metadata_overlap;
 
     /* Read L2 table from disk */
-    l2_size = s->l2_size * l2_entry_size(s);
-    l2_table = g_malloc(l2_size);
-
-    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+    ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
     if (ret < 0) {
         fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
         res->check_errors++;
-        goto fail;
+        return ret;
     }
 
     /* Do the actual checks */
-    for(i = 0; i < s->l2_size; i++) {
+    for (i = 0; i < s->l2_size; i++) {
+        uint64_t coffset;
+        int csize;
+        QCow2ClusterType type;
+
         l2_entry = get_l2_entry(s, l2_table, i);
+        l2_bitmap = get_l2_bitmap(s, l2_table, i);
+        type = qcow2_get_cluster_type(bs, l2_entry);
+
+        if (type != QCOW2_CLUSTER_COMPRESSED) {
+            /* Check reserved bits of Standard Cluster Descriptor */
+            if (l2_entry & L2E_STD_RESERVED_MASK) {
+                fprintf(stderr, "ERROR found l2 entry with reserved bits set: "
+                        "%" PRIx64 "\n", l2_entry);
+                res->corruptions++;
+            }
+        }
 
-        switch (qcow2_get_cluster_type(bs, l2_entry)) {
+        switch (type) {
         case QCOW2_CLUSTER_COMPRESSED:
             /* Compressed clusters don't have QCOW_OFLAG_COPIED */
             if (l2_entry & QCOW_OFLAG_COPIED) {
@@ -1638,23 +1713,28 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                 break;
             }
 
+            if (l2_bitmap) {
+                fprintf(stderr, "ERROR compressed cluster %d with non-zero "
+                        "subcluster allocation bitmap, entry=0x%" PRIx64 "\n",
+                        i, l2_entry);
+                res->corruptions++;
+                break;
+            }
+
             /* Mark cluster as used */
-            nb_csectors = ((l2_entry >> s->csize_shift) &
-                           s->csize_mask) + 1;
-            l2_entry &= s->cluster_offset_mask;
+            qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
             ret = qcow2_inc_refcounts_imrt(
-                bs, res, refcount_table, refcount_table_size,
-                l2_entry & QCOW2_COMPRESSED_SECTOR_MASK,
-                nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE);
+                bs, res, refcount_table, refcount_table_size, coffset, csize);
             if (ret < 0) {
-                goto fail;
+                return ret;
             }
 
             if (flags & CHECK_FRAG_INFO) {
                 res->bfi.allocated_clusters++;
                 res->bfi.compressed_clusters++;
 
-                /* Compressed clusters are fragmented by nature.  Since they
+                /*
+                 * Compressed clusters are fragmented by nature.  Since they
                  * take up sub-sector space but we only have sector granularity
                  * I/O we need to re-read the same sectors even for adjacent
                  * compressed clusters.
@@ -1668,13 +1748,19 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
         {
             uint64_t offset = l2_entry & L2E_OFFSET_MASK;
 
+            if ((l2_bitmap >> 32) & l2_bitmap) {
+                res->corruptions++;
+                fprintf(stderr, "ERROR offset=%" PRIx64 ": Allocated "
+                        "cluster has corrupted subcluster allocation bitmap\n",
+                        offset);
+            }
+
             /* Correct offsets are cluster aligned */
             if (offset_into_cluster(s, offset)) {
                 bool contains_data;
                 res->corruptions++;
 
                 if (has_subclusters(s)) {
-                    uint64_t l2_bitmap = get_l2_bitmap(s, l2_table, i);
                     contains_data = (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC);
                 } else {
                     contains_data = !(l2_entry & QCOW_OFLAG_ZERO);
@@ -1687,40 +1773,30 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                             fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR",
                             offset);
                     if (fix & BDRV_FIX_ERRORS) {
-                        int idx = i * (l2_entry_size(s) / sizeof(uint64_t));
-                        uint64_t l2e_offset =
-                            l2_offset + (uint64_t)i * l2_entry_size(s);
-                        int ign = active ? QCOW2_OL_ACTIVE_L2 :
-                                           QCOW2_OL_INACTIVE_L2;
-
-                        l2_entry = has_subclusters(s) ? 0 : QCOW_OFLAG_ZERO;
-                        set_l2_entry(s, l2_table, i, l2_entry);
-                        ret = qcow2_pre_write_overlap_check(bs, ign,
-                                l2e_offset, l2_entry_size(s), false);
-                        if (ret < 0) {
-                            fprintf(stderr, "ERROR: Overlap check failed\n");
-                            res->check_errors++;
-                            /* Something is seriously wrong, so abort checking
-                             * this L2 table */
-                            goto fail;
+                        ret = fix_l2_entry_by_zero(bs, res, l2_offset,
+                                                   l2_table, i, active,
+                                                   &metadata_overlap);
+                        if (metadata_overlap) {
+                            /*
+                             * Something is seriously wrong, so abort checking
+                             * this L2 table.
+                             */
+                            return ret;
                         }
 
-                        ret = bdrv_pwrite_sync(bs->file, l2e_offset,
-                                               &l2_table[idx],
-                                               l2_entry_size(s));
-                        if (ret < 0) {
-                            fprintf(stderr, "ERROR: Failed to overwrite L2 "
-                                    "table entry: %s\n", strerror(-ret));
-                            res->check_errors++;
-                            /* Do not abort, continue checking the rest of this
-                             * L2 table's entries */
-                        } else {
-                            res->corruptions--;
-                            res->corruptions_fixed++;
-                            /* Skip marking the cluster as used
-                             * (it is unused now) */
+                        if (ret == 0) {
+                            /*
+                             * Skip marking the cluster as used
+                             * (it is unused now).
+                             */
                             continue;
                         }
+
+                        /*
+                         * Failed to fix.
+                         * Do not abort, continue checking the rest of this
+                         * L2 table's entries.
+                         */
                     }
                 } else {
                     fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is "
@@ -1743,14 +1819,23 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                                                refcount_table_size,
                                                offset, s->cluster_size);
                 if (ret < 0) {
-                    goto fail;
+                    return ret;
                 }
             }
             break;
         }
 
         case QCOW2_CLUSTER_ZERO_PLAIN:
+            /* Impossible when image has subclusters */
+            assert(!l2_bitmap);
+            break;
+
         case QCOW2_CLUSTER_UNALLOCATED:
+            if (l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC) {
+                res->corruptions++;
+                fprintf(stderr, "ERROR: Unallocated "
+                        "cluster has non-zero subcluster allocation map\n");
+            }
             break;
 
         default:
@@ -1758,12 +1843,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
         }
     }
 
-    g_free(l2_table);
     return 0;
-
-fail:
-    g_free(l2_table);
-    return ret;
 }
 
 /*
@@ -1782,71 +1862,79 @@ static int check_refcounts_l1(BlockDriverState *bs,
                               int flags, BdrvCheckMode fix, bool active)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table = NULL, l2_offset, l1_size2;
+    size_t l1_size_bytes = l1_size * L1E_SIZE;
+    g_autofree uint64_t *l1_table = NULL;
+    uint64_t l2_offset;
     int i, ret;
 
-    l1_size2 = l1_size * L1E_SIZE;
+    if (!l1_size) {
+        return 0;
+    }
 
     /* Mark L1 table as used */
     ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
-                                   l1_table_offset, l1_size2);
+                                   l1_table_offset, l1_size_bytes);
     if (ret < 0) {
-        goto fail;
+        return ret;
+    }
+
+    l1_table = g_try_malloc(l1_size_bytes);
+    if (l1_table == NULL) {
+        res->check_errors++;
+        return -ENOMEM;
     }
 
     /* Read L1 table entries from disk */
-    if (l1_size2 > 0) {
-        l1_table = g_try_malloc(l1_size2);
-        if (l1_table == NULL) {
-            ret = -ENOMEM;
-            res->check_errors++;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
-            res->check_errors++;
-            goto fail;
-        }
-        for(i = 0;i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
+    ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes);
+    if (ret < 0) {
+        fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+        res->check_errors++;
+        return ret;
+    }
+
+    for (i = 0; i < l1_size; i++) {
+        be64_to_cpus(&l1_table[i]);
     }
 
     /* Do the actual checks */
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            /* Mark L2 table as used */
-            l2_offset &= L1E_OFFSET_MASK;
-            ret = qcow2_inc_refcounts_imrt(bs, res,
-                                           refcount_table, refcount_table_size,
-                                           l2_offset, s->cluster_size);
-            if (ret < 0) {
-                goto fail;
-            }
+    for (i = 0; i < l1_size; i++) {
+        if (!l1_table[i]) {
+            continue;
+        }
 
-            /* L2 tables are cluster aligned */
-            if (offset_into_cluster(s, l2_offset)) {
-                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
-                    "cluster aligned; L1 entry corrupted\n", l2_offset);
-                res->corruptions++;
-            }
+        if (l1_table[i] & L1E_RESERVED_MASK) {
+            fprintf(stderr, "ERROR found L1 entry with reserved bits set: "
+                    "%" PRIx64 "\n", l1_table[i]);
+            res->corruptions++;
+        }
 
-            /* Process and check L2 entries */
-            ret = check_refcounts_l2(bs, res, refcount_table,
-                                     refcount_table_size, l2_offset, flags,
-                                     fix, active);
-            if (ret < 0) {
-                goto fail;
-            }
+        l2_offset = l1_table[i] & L1E_OFFSET_MASK;
+
+        /* Mark L2 table as used */
+        ret = qcow2_inc_refcounts_imrt(bs, res,
+                                       refcount_table, refcount_table_size,
+                                       l2_offset, s->cluster_size);
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* L2 tables are cluster aligned */
+        if (offset_into_cluster(s, l2_offset)) {
+            fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+                "cluster aligned; L1 entry corrupted\n", l2_offset);
+            res->corruptions++;
+        }
+
+        /* Process and check L2 entries */
+        ret = check_refcounts_l2(bs, res, refcount_table,
+                                 refcount_table_size, l2_offset, flags,
+                                 fix, active);
+        if (ret < 0) {
+            return ret;
         }
     }
-    g_free(l1_table);
-    return 0;
 
-fail:
-    g_free(l1_table);
-    return ret;
+    return 0;
 }
 
 /*
@@ -2001,9 +2089,17 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
 
     for(i = 0; i < s->refcount_table_size; i++) {
         uint64_t offset, cluster;
-        offset = s->refcount_table[i];
+        offset = s->refcount_table[i] & REFT_OFFSET_MASK;
         cluster = offset >> s->cluster_bits;
 
+        if (s->refcount_table[i] & REFT_RESERVED_MASK) {
+            fprintf(stderr, "ERROR refcount table entry %" PRId64 " has "
+                    "reserved bits set\n", i);
+            res->corruptions++;
+            *rebuild = true;
+            continue;
+        }
+
         /* Refcount blocks are cluster aligned */
         if (offset_into_cluster(s, offset)) {
             fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 2e98c7f4b62..71ddb08c212 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -1026,7 +1026,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
     int new_l1_bytes;
     int ret;
 
-    assert(bs->read_only);
+    assert(bdrv_is_read_only(bs));
 
     /* Search the snapshot */
     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
diff --git a/block/qcow2.c b/block/qcow2.c
index 9727ae8fe34..d5090167569 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -74,7 +74,7 @@ typedef struct {
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
-                           uint64_t cluster_descriptor,
+                           uint64_t l2_entry,
                            uint64_t offset,
                            uint64_t bytes,
                            QEMUIOVector *qiov,
@@ -1723,8 +1723,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
 
     /* Clear unknown autoclear feature bits */
     update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
-    update_header =
-        update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
+    update_header = update_header && bdrv_is_writable(bs);
     if (update_header) {
         s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
     }
@@ -1811,7 +1810,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
     bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
 
     /* Repair image if dirty */
-    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
+    if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
         (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
         BdrvCheckResult result = {0};
 
@@ -1927,6 +1926,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 static int qcow2_reopen_prepare(BDRVReopenState *state,
                                 BlockReopenQueue *queue, Error **errp)
 {
+    BDRVQcow2State *s = state->bs->opaque;
     Qcow2ReopenState *r;
     int ret;
 
@@ -1957,6 +1957,16 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
         }
     }
 
+    /*
+     * Without an external data file, s->data_file points to the same BdrvChild
+     * as bs->file. It needs to be resynced after reopen because bs->file may
+     * be changed. We can't use it in the meantime.
+     */
+    if (!has_data_file(state->bs)) {
+        assert(s->data_file == state->bs->file);
+        s->data_file = NULL;
+    }
+
     return 0;
 
 fail:
@@ -1967,7 +1977,16 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
 
 static void qcow2_reopen_commit(BDRVReopenState *state)
 {
+    BDRVQcow2State *s = state->bs->opaque;
+
     qcow2_update_options_commit(state->bs, state->opaque);
+    if (!s->data_file) {
+        /*
+         * If we don't have an external data file, s->data_file was cleared by
+         * qcow2_reopen_prepare() and needs to be updated.
+         */
+        s->data_file = state->bs->file;
+    }
     g_free(state->opaque);
 }
 
@@ -1991,6 +2010,15 @@ static void qcow2_reopen_commit_post(BDRVReopenState *state)
 
 static void qcow2_reopen_abort(BDRVReopenState *state)
 {
+    BDRVQcow2State *s = state->bs->opaque;
+
+    if (!s->data_file) {
+        /*
+         * If we don't have an external data file, s->data_file was cleared by
+         * qcow2_reopen_prepare() and needs to be restored.
+         */
+        s->data_file = state->bs->file;
+    }
     qcow2_update_options_abort(state->bs, state->opaque);
     g_free(state->opaque);
 }
@@ -2177,7 +2205,7 @@ typedef struct Qcow2AioTask {
 
     BlockDriverState *bs;
     QCow2SubclusterType subcluster_type; /* only for read */
-    uint64_t host_offset; /* or full descriptor in compressed clusters */
+    uint64_t host_offset; /* or l2_entry for compressed read */
     uint64_t offset;
     uint64_t bytes;
     QEMUIOVector *qiov;
@@ -2282,9 +2310,10 @@ static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
 }
 
 static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
-                                             uint64_t offset, uint64_t bytes,
+                                             int64_t offset, int64_t bytes,
                                              QEMUIOVector *qiov,
-                                             size_t qiov_offset, int flags)
+                                             size_t qiov_offset,
+                                             BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int ret = 0;
@@ -2568,8 +2597,8 @@ static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
 }
 
 static coroutine_fn int qcow2_co_pwritev_part(
-        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags)
+        BlockDriverState *bs, int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags)
 {
     BDRVQcow2State *s = bs->opaque;
     int offset_in_cluster;
@@ -3912,7 +3941,7 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
 }
 
 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int bytes, BdrvRequestFlags flags)
+    int64_t offset, int64_t bytes, BdrvRequestFlags flags)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -3967,7 +3996,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
-                                          int64_t offset, int bytes)
+                                          int64_t offset, int64_t bytes)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
@@ -3997,9 +4026,9 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
 
 static int coroutine_fn
 qcow2_co_copy_range_from(BlockDriverState *bs,
-                         BdrvChild *src, uint64_t src_offset,
-                         BdrvChild *dst, uint64_t dst_offset,
-                         uint64_t bytes, BdrvRequestFlags read_flags,
+                         BdrvChild *src, int64_t src_offset,
+                         BdrvChild *dst, int64_t dst_offset,
+                         int64_t bytes, BdrvRequestFlags read_flags,
                          BdrvRequestFlags write_flags)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4080,9 +4109,9 @@ qcow2_co_copy_range_from(BlockDriverState *bs,
 
 static int coroutine_fn
 qcow2_co_copy_range_to(BlockDriverState *bs,
-                       BdrvChild *src, uint64_t src_offset,
-                       BdrvChild *dst, uint64_t dst_offset,
-                       uint64_t bytes, BdrvRequestFlags read_flags,
+                       BdrvChild *src, int64_t src_offset,
+                       BdrvChild *dst, int64_t dst_offset,
+                       int64_t bytes, BdrvRequestFlags read_flags,
                        BdrvRequestFlags write_flags)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4602,7 +4631,7 @@ static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
  */
 static coroutine_fn int
 qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
-                                 uint64_t offset, uint64_t bytes,
+                                 int64_t offset, int64_t bytes,
                                  QEMUIOVector *qiov, size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -4665,22 +4694,19 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
-                           uint64_t cluster_descriptor,
+                           uint64_t l2_entry,
                            uint64_t offset,
                            uint64_t bytes,
                            QEMUIOVector *qiov,
                            size_t qiov_offset)
 {
     BDRVQcow2State *s = bs->opaque;
-    int ret = 0, csize, nb_csectors;
+    int ret = 0, csize;
     uint64_t coffset;
     uint8_t *buf, *out_buf;
     int offset_in_cluster = offset_into_cluster(s, offset);
 
-    coffset = cluster_descriptor & s->cluster_offset_mask;
-    nb_csectors = ((cluster_descriptor >> s->csize_shift) & s->csize_mask) + 1;
-    csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE -
-        (coffset & ~QCOW2_COMPRESSED_SECTOR_MASK);
+    qcow2_parse_compressed_l2_entry(bs, l2_entry, &coffset, &csize);
 
     buf = g_try_malloc(csize);
     if (!buf) {
@@ -5089,6 +5115,7 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
     BDRVQcow2State *s = bs->opaque;
     bdi->cluster_size = s->cluster_size;
     bdi->vm_state_offset = qcow2_vm_state_offset(s);
+    bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
     return 0;
 }
 
@@ -5201,24 +5228,55 @@ static int qcow2_has_zero_init(BlockDriverState *bs)
     }
 }
 
+/*
+ * Check the request to vmstate. On success return
+ *      qcow2_vm_state_offset(bs) + @pos
+ */
+static int64_t qcow2_check_vmstate_request(BlockDriverState *bs,
+                                           QEMUIOVector *qiov, int64_t pos)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t vmstate_offset = qcow2_vm_state_offset(s);
+    int ret;
+
+    /* Incoming requests must be OK */
+    bdrv_check_qiov_request(pos, qiov->size, qiov, 0, &error_abort);
+
+    if (INT64_MAX - pos < vmstate_offset) {
+        return -EIO;
+    }
+
+    pos += vmstate_offset;
+    ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return pos;
+}
+
 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
                               int64_t pos)
 {
-    BDRVQcow2State *s = bs->opaque;
+    int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
+    if (offset < 0) {
+        return offset;
+    }
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
-                                         qiov->size, qiov, 0, 0);
+    return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
 }
 
 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
                               int64_t pos)
 {
-    BDRVQcow2State *s = bs->opaque;
+    int64_t offset = qcow2_check_vmstate_request(bs, qiov, pos);
+    if (offset < 0) {
+        return offset;
+    }
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
-    return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
-                                        qiov->size, qiov, 0, 0);
+    return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
 }
 
 /*
@@ -5620,15 +5678,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     if (backing_file || backing_format) {
         if (g_strcmp0(backing_file, s->image_backing_file) ||
             g_strcmp0(backing_format, s->image_backing_format)) {
-            warn_report("Deprecated use of amend to alter the backing file; "
-                        "use qemu-img rebase instead");
-        }
-        ret = qcow2_change_backing_file(bs,
-                    backing_file ?: s->image_backing_file,
-                    backing_format ?: s->image_backing_format);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to change the backing file");
-            return ret;
+            error_setg(errp, "Cannot amend the backing file");
+            error_append_hint(errp,
+                              "You can use 'qemu-img rebase' instead.\n");
+            return -EINVAL;
         }
     }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 0fe5f74ed3e..fd48a89d452 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -110,7 +110,6 @@
 
 /* Defined in the qcow2 spec (compressed cluster descriptor) */
 #define QCOW2_COMPRESSED_SECTOR_SIZE 512U
-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL))
 
 /* Must be at least 2 to cover COW */
 #define MIN_L2_CACHE_SIZE 2 /* cache entries */
@@ -587,10 +586,12 @@ typedef enum QCow2MetadataOverlap {
     (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
 
 #define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
+#define L1E_RESERVED_MASK 0x7f000000000001ffULL
 #define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+#define L2E_STD_RESERVED_MASK 0x3f000000000001feULL
 
 #define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
+#define REFT_RESERVED_MASK 0x1ffULL
 
 #define INV_OFFSET (-1ULL)
 
@@ -914,6 +915,8 @@ int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                           uint64_t offset,
                                           int compressed_size,
                                           uint64_t *host_offset);
+void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
+                                     uint64_t *coffset, int *csize);
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m);
diff --git a/block/qed.c b/block/qed.c
index f45c640513f..558d3646c4b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -582,6 +582,7 @@ static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
     BDRVQEDState *s = bs->opaque;
 
     bs->bl.pwrite_zeroes_alignment = s->header.cluster_size;
+    bs->bl.max_pwrite_zeroes = QEMU_ALIGN_DOWN(INT_MAX, s->header.cluster_size);
 }
 
 /* We have nothing to do for QED reopen, stubs just return
@@ -1397,7 +1398,7 @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
 
 static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
                                                   int64_t offset,
-                                                  int bytes,
+                                                  int64_t bytes,
                                                   BdrvRequestFlags flags)
 {
     BDRVQEDState *s = bs->opaque;
@@ -1408,6 +1409,12 @@ static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
      */
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
 
+    /*
+     * QED is not prepared for 63bit write-zero requests, so rely on
+     * max_pwrite_zeroes.
+     */
+    assert(bytes <= INT_MAX);
+
     /* Fall back if the request is not aligned */
     if (qed_offset_into_cluster(s, offset) ||
         qed_offset_into_cluster(s, bytes)) {
diff --git a/block/quorum.c b/block/quorum.c
index cfc1436abb5..c28dda7baac 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -663,8 +663,8 @@ static int read_fifo_child(QuorumAIOCB *acb)
     return ret;
 }
 
-static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset,
-                            uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int quorum_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -714,8 +714,9 @@ static void write_quorum_entry(void *opaque)
     }
 }
 
-static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                             uint64_t bytes, QEMUIOVector *qiov, int flags)
+static int quorum_co_pwritev(BlockDriverState *bs, int64_t offset,
+                             int64_t bytes, QEMUIOVector *qiov,
+                             BdrvRequestFlags flags)
 {
     BDRVQuorumState *s = bs->opaque;
     QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes, flags);
@@ -745,7 +746,7 @@ static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset,
 }
 
 static int quorum_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                   int bytes, BdrvRequestFlags flags)
+                                   int64_t bytes, BdrvRequestFlags flags)
 
 {
     return quorum_co_pwritev(bs, offset, bytes, NULL,
@@ -1279,7 +1280,7 @@ static BlockDriver bdrv_quorum = {
     .bdrv_dirname                       = quorum_dirname,
     .bdrv_co_block_status               = quorum_co_block_status,
 
-    .bdrv_co_flush_to_disk              = quorum_co_flush,
+    .bdrv_co_flush                      = quorum_co_flush,
 
     .bdrv_getlength                     = quorum_getlength,
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..bda757fd195 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -181,8 +181,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
 }
 
 /* Check and adjust the offset, against 'offset' and 'size' options. */
-static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
-                                    uint64_t bytes, bool is_write)
+static inline int raw_adjust_offset(BlockDriverState *bs, int64_t *offset,
+                                    int64_t bytes, bool is_write)
 {
     BDRVRawState *s = bs->opaque;
 
@@ -201,9 +201,9 @@ static inline int raw_adjust_offset(BlockDriverState *bs, uint64_t *offset,
     return 0;
 }
 
-static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
-                                      uint64_t bytes, QEMUIOVector *qiov,
-                                      int flags)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes, QEMUIOVector *qiov,
+                                      BdrvRequestFlags flags)
 {
     int ret;
 
@@ -216,9 +216,9 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
-                                       uint64_t bytes, QEMUIOVector *qiov,
-                                       int flags)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, QEMUIOVector *qiov,
+                                       BdrvRequestFlags flags)
 {
     void *buf = NULL;
     BlockDriver *drv;
@@ -289,12 +289,12 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
 }
 
 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int bytes,
+                                             int64_t offset, int64_t bytes,
                                              BdrvRequestFlags flags)
 {
     int ret;
 
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    ret = raw_adjust_offset(bs, &offset, bytes, true);
     if (ret) {
         return ret;
     }
@@ -302,11 +302,11 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int bytes)
+                                        int64_t offset, int64_t bytes)
 {
     int ret;
 
-    ret = raw_adjust_offset(bs, (uint64_t *)&offset, bytes, true);
+    ret = raw_adjust_offset(bs, &offset, bytes, true);
     if (ret) {
         return ret;
     }
@@ -532,10 +532,10 @@ static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 
 static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
                                                BdrvChild *src,
-                                               uint64_t src_offset,
+                                               int64_t src_offset,
                                                BdrvChild *dst,
-                                               uint64_t dst_offset,
-                                               uint64_t bytes,
+                                               int64_t dst_offset,
+                                               int64_t bytes,
                                                BdrvRequestFlags read_flags,
                                                BdrvRequestFlags write_flags)
 {
@@ -551,10 +551,10 @@ static int coroutine_fn raw_co_copy_range_from(BlockDriverState *bs,
 
 static int coroutine_fn raw_co_copy_range_to(BlockDriverState *bs,
                                              BdrvChild *src,
-                                             uint64_t src_offset,
+                                             int64_t src_offset,
                                              BdrvChild *dst,
-                                             uint64_t dst_offset,
-                                             uint64_t bytes,
+                                             int64_t dst_offset,
+                                             int64_t bytes,
                                              BdrvRequestFlags read_flags,
                                              BdrvRequestFlags write_flags)
 {
@@ -580,6 +580,25 @@ static void raw_cancel_in_flight(BlockDriverState *bs)
     bdrv_cancel_in_flight(bs->file->bs);
 }
 
+static void raw_child_perm(BlockDriverState *bs, BdrvChild *c,
+                           BdrvChildRole role,
+                           BlockReopenQueue *reopen_queue,
+                           uint64_t parent_perm, uint64_t parent_shared,
+                           uint64_t *nperm, uint64_t *nshared)
+{
+    bdrv_default_perms(bs, c, role, reopen_queue, parent_perm,
+                       parent_shared, nperm, nshared);
+
+    /*
+     * bdrv_default_perms() may add WRITE and/or RESIZE (see comment in
+     * bdrv_default_perms_for_storage() for an explanation) but we only need
+     * them if they are in parent_perm. Drop WRITE and RESIZE whenever possible
+     * to avoid permission conflicts.
+     */
+    *nperm &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    *nperm |= parent_perm & (BLK_PERM_WRITE | BLK_PERM_RESIZE);
+}
+
 BlockDriver bdrv_raw = {
     .format_name          = "raw",
     .instance_size        = sizeof(BDRVRawState),
@@ -588,7 +607,7 @@ BlockDriver bdrv_raw = {
     .bdrv_reopen_commit   = &raw_reopen_commit,
     .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
-    .bdrv_child_perm      = bdrv_default_perms,
+    .bdrv_child_perm      = raw_child_perm,
     .bdrv_co_create_opts  = &raw_co_create_opts,
     .bdrv_co_preadv       = &raw_co_preadv,
     .bdrv_co_pwritev      = &raw_co_pwritev,
diff --git a/block/rbd.c b/block/rbd.c
index f098a89c7bc..def96292e0e 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -55,49 +55,30 @@
  * leading "\".
  */
 
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
 #define RBD_MAX_SNAPS 100
 
-/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
-#ifdef LIBRBD_SUPPORTS_IOVEC
-#define LIBRBD_USE_IOVEC 1
-#else
-#define LIBRBD_USE_IOVEC 0
-#endif
+#define RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN 8
+
+static const char rbd_luks_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 1
+};
+
+static const char rbd_luks2_header_verification[
+        RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {
+    'L', 'U', 'K', 'S', 0xBA, 0xBE, 0, 2
+};
 
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
     RBD_AIO_DISCARD,
-    RBD_AIO_FLUSH
+    RBD_AIO_FLUSH,
+    RBD_AIO_WRITE_ZEROES
 } RBDAIOCmd;
 
-typedef struct RBDAIOCB {
-    BlockAIOCB common;
-    int64_t ret;
-    QEMUIOVector *qiov;
-    char *bounce;
-    RBDAIOCmd cmd;
-    int error;
-    struct BDRVRBDState *s;
-} RBDAIOCB;
-
-typedef struct RADOSCB {
-    RBDAIOCB *acb;
-    struct BDRVRBDState *s;
-    int64_t size;
-    char *buf;
-    int64_t ret;
-} RADOSCB;
-
 typedef struct BDRVRBDState {
     rados_t cluster;
     rados_ioctx_t io_ctx;
@@ -106,28 +87,52 @@ typedef struct BDRVRBDState {
     char *snap;
     char *namespace;
     uint64_t image_size;
+    uint64_t object_size;
 } BDRVRBDState;
 
+typedef struct RBDTask {
+    BlockDriverState *bs;
+    Coroutine *co;
+    bool complete;
+    int64_t ret;
+} RBDTask;
+
+typedef struct RBDDiffIterateReq {
+    uint64_t offs;
+    uint64_t bytes;
+    bool exists;
+} RBDDiffIterateReq;
+
 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
                             BlockdevOptionsRbd *opts, bool cache,
                             const char *keypairs, const char *secretid,
                             Error **errp);
 
+static char *qemu_rbd_strchr(char *src, char delim)
+{
+    char *p;
+
+    for (p = src; *p; ++p) {
+        if (*p == delim) {
+            return p;
+        }
+        if (*p == '\\' && p[1] != '\0') {
+            ++p;
+        }
+    }
+
+    return NULL;
+}
+
+
 static char *qemu_rbd_next_tok(char *src, char delim, char **p)
 {
     char *end;
 
     *p = NULL;
 
-    for (end = src; *end; ++end) {
-        if (*end == delim) {
-            break;
-        }
-        if (*end == '\\' && end[1] != '\0') {
-            end++;
-        }
-    }
-    if (*end == delim) {
+    end = qemu_rbd_strchr(src, delim);
+    if (end) {
         *p = end + 1;
         *end = '\0';
     }
@@ -171,7 +176,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
     qemu_rbd_unescape(found_str);
     qdict_put_str(options, "pool", found_str);
 
-    if (strchr(p, '@')) {
+    if (qemu_rbd_strchr(p, '@')) {
         image_name = qemu_rbd_next_tok(p, '@', &p);
 
         found_str = qemu_rbd_next_tok(p, ':', &p);
@@ -181,7 +186,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
         image_name = qemu_rbd_next_tok(p, ':', &p);
     }
     /* Check for namespace in the image_name */
-    if (strchr(image_name, '/')) {
+    if (qemu_rbd_strchr(image_name, '/')) {
         found_str = qemu_rbd_next_tok(image_name, '/', &image_name);
         qemu_rbd_unescape(found_str);
         qdict_put_str(options, "namespace", found_str);
@@ -241,14 +246,6 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
     return;
 }
 
-
-static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    /* XXX Does RBD support AIO on less than 512-byte alignment? */
-    bs->bl.request_alignment = 512;
-}
-
-
 static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
                              Error **errp)
 {
@@ -330,17 +327,203 @@ static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
     return ret;
 }
 
-static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+static int qemu_rbd_convert_luks_options(
+        RbdEncryptionOptionsLUKSBase *luks_opts,
+        char **passphrase,
+        size_t *passphrase_len,
+        Error **errp)
 {
-    if (LIBRBD_USE_IOVEC) {
-        RBDAIOCB *acb = rcb->acb;
-        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
-                   acb->qiov->size - offs);
+    return qcrypto_secret_lookup(luks_opts->key_secret, (uint8_t **)passphrase,
+                                 passphrase_len, errp);
+}
+
+static int qemu_rbd_convert_luks_create_options(
+        RbdEncryptionCreateOptionsLUKSBase *luks_opts,
+        rbd_encryption_algorithm_t *alg,
+        char **passphrase,
+        size_t *passphrase_len,
+        Error **errp)
+{
+    int r = 0;
+
+    r = qemu_rbd_convert_luks_options(
+            qapi_RbdEncryptionCreateOptionsLUKSBase_base(luks_opts),
+            passphrase, passphrase_len, errp);
+    if (r < 0) {
+        return r;
+    }
+
+    if (luks_opts->has_cipher_alg) {
+        switch (luks_opts->cipher_alg) {
+            case QCRYPTO_CIPHER_ALG_AES_128: {
+                *alg = RBD_ENCRYPTION_ALGORITHM_AES128;
+                break;
+            }
+            case QCRYPTO_CIPHER_ALG_AES_256: {
+                *alg = RBD_ENCRYPTION_ALGORITHM_AES256;
+                break;
+            }
+            default: {
+                r = -ENOTSUP;
+                error_setg_errno(errp, -r, "unknown encryption algorithm: %u",
+                                 luks_opts->cipher_alg);
+                return r;
+            }
+        }
     } else {
-        memset(rcb->buf + offs, 0, rcb->size - offs);
+        /* default alg */
+        *alg = RBD_ENCRYPTION_ALGORITHM_AES256;
     }
+
+    return 0;
 }
 
+static int qemu_rbd_encryption_format(rbd_image_t image,
+                                      RbdEncryptionCreateOptions *encrypt,
+                                      Error **errp)
+{
+    int r = 0;
+    g_autofree char *passphrase = NULL;
+    size_t passphrase_len;
+    rbd_encryption_format_t format;
+    rbd_encryption_options_t opts;
+    rbd_encryption_luks1_format_options_t luks_opts;
+    rbd_encryption_luks2_format_options_t luks2_opts;
+    size_t opts_size;
+    uint64_t raw_size, effective_size;
+
+    r = rbd_get_size(image, &raw_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot get raw image size");
+        return r;
+    }
+
+    switch (encrypt->format) {
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+            memset(&luks_opts, 0, sizeof(luks_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS1;
+            opts = &luks_opts;
+            opts_size = sizeof(luks_opts);
+            r = qemu_rbd_convert_luks_create_options(
+                    qapi_RbdEncryptionCreateOptionsLUKS_base(&encrypt->u.luks),
+                    &luks_opts.alg, &passphrase, &passphrase_len, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_opts.passphrase = passphrase;
+            luks_opts.passphrase_size = passphrase_len;
+            break;
+        }
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+            memset(&luks2_opts, 0, sizeof(luks2_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS2;
+            opts = &luks2_opts;
+            opts_size = sizeof(luks2_opts);
+            r = qemu_rbd_convert_luks_create_options(
+                    qapi_RbdEncryptionCreateOptionsLUKS2_base(
+                            &encrypt->u.luks2),
+                    &luks2_opts.alg, &passphrase, &passphrase_len, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks2_opts.passphrase = passphrase;
+            luks2_opts.passphrase_size = passphrase_len;
+            break;
+        }
+        default: {
+            r = -ENOTSUP;
+            error_setg_errno(
+                    errp, -r, "unknown image encryption format: %u",
+                    encrypt->format);
+            return r;
+        }
+    }
+
+    r = rbd_encryption_format(image, format, opts, opts_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "encryption format fail");
+        return r;
+    }
+
+    r = rbd_get_size(image, &effective_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot get effective image size");
+        return r;
+    }
+
+    r = rbd_resize(image, raw_size + (raw_size - effective_size));
+    if (r < 0) {
+        error_setg_errno(errp, -r, "cannot resize image after format");
+        return r;
+    }
+
+    return 0;
+}
+
+static int qemu_rbd_encryption_load(rbd_image_t image,
+                                    RbdEncryptionOptions *encrypt,
+                                    Error **errp)
+{
+    int r = 0;
+    g_autofree char *passphrase = NULL;
+    size_t passphrase_len;
+    rbd_encryption_luks1_format_options_t luks_opts;
+    rbd_encryption_luks2_format_options_t luks2_opts;
+    rbd_encryption_format_t format;
+    rbd_encryption_options_t opts;
+    size_t opts_size;
+
+    switch (encrypt->format) {
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS: {
+            memset(&luks_opts, 0, sizeof(luks_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS1;
+            opts = &luks_opts;
+            opts_size = sizeof(luks_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKS_base(&encrypt->u.luks),
+                    &passphrase, &passphrase_len, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks_opts.passphrase = passphrase;
+            luks_opts.passphrase_size = passphrase_len;
+            break;
+        }
+        case RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2: {
+            memset(&luks2_opts, 0, sizeof(luks2_opts));
+            format = RBD_ENCRYPTION_FORMAT_LUKS2;
+            opts = &luks2_opts;
+            opts_size = sizeof(luks2_opts);
+            r = qemu_rbd_convert_luks_options(
+                    qapi_RbdEncryptionOptionsLUKS2_base(&encrypt->u.luks2),
+                    &passphrase, &passphrase_len, errp);
+            if (r < 0) {
+                return r;
+            }
+            luks2_opts.passphrase = passphrase;
+            luks2_opts.passphrase_size = passphrase_len;
+            break;
+        }
+        default: {
+            r = -ENOTSUP;
+            error_setg_errno(
+                    errp, -r, "unknown image encryption format: %u",
+                    encrypt->format);
+            return r;
+        }
+    }
+
+    r = rbd_encryption_load(image, format, opts, opts_size);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "encryption load fail");
+        return r;
+    }
+
+    return 0;
+}
+#endif
+
 /* FIXME Deprecate and remove keypairs or make it available in QMP. */
 static int qemu_rbd_do_create(BlockdevCreateOptions *options,
                               const char *keypairs, const char *password_secret,
@@ -358,6 +541,13 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options,
         return -EINVAL;
     }
 
+#ifndef LIBRBD_SUPPORTS_ENCRYPTION
+    if (opts->has_encrypt) {
+        error_setg(errp, "RBD library does not support image encryption");
+        return -ENOTSUP;
+    }
+#endif
+
     if (opts->has_cluster_size) {
         int64_t objsize = opts->cluster_size;
         if ((objsize - 1) & objsize) {    /* not a power of 2? */
@@ -383,6 +573,28 @@ static int qemu_rbd_do_create(BlockdevCreateOptions *options,
         goto out;
     }
 
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+    if (opts->has_encrypt) {
+        rbd_image_t image;
+
+        ret = rbd_open(io_ctx, opts->location->image, &image, NULL);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret,
+                             "error opening image '%s' for encryption format",
+                             opts->location->image);
+            goto out;
+        }
+
+        ret = qemu_rbd_encryption_format(image, opts->encrypt, errp);
+        rbd_close(image);
+        if (ret < 0) {
+            /* encryption format fail, try removing the image */
+            rbd_remove(io_ctx, opts->location->image);
+            goto out;
+        }
+    }
+#endif
+
     ret = 0;
 out:
     rados_ioctx_destroy(io_ctx);
@@ -395,6 +607,43 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp)
     return qemu_rbd_do_create(options, NULL, NULL, errp);
 }
 
+static int qemu_rbd_extract_encryption_create_options(
+        QemuOpts *opts,
+        RbdEncryptionCreateOptions **spec,
+        Error **errp)
+{
+    QDict *opts_qdict;
+    QDict *encrypt_qdict;
+    Visitor *v;
+    int ret = 0;
+
+    opts_qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_extract_subqdict(opts_qdict, &encrypt_qdict, "encrypt.");
+    qobject_unref(opts_qdict);
+    if (!qdict_size(encrypt_qdict)) {
+        *spec = NULL;
+        goto exit;
+    }
+
+    /* Convert options into a QAPI object */
+    v = qobject_input_visitor_new_flat_confused(encrypt_qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    visit_type_RbdEncryptionCreateOptions(v, NULL, spec, errp);
+    visit_free(v);
+    if (!*spec) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+exit:
+    qobject_unref(encrypt_qdict);
+    return ret;
+}
+
 static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
                                                 const char *filename,
                                                 QemuOpts *opts,
@@ -403,6 +652,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     BlockdevCreateOptions *create_options;
     BlockdevCreateOptionsRbd *rbd_opts;
     BlockdevOptionsRbd *loc;
+    RbdEncryptionCreateOptions *encrypt = NULL;
     Error *local_err = NULL;
     const char *keypairs, *password_secret;
     QDict *options = NULL;
@@ -431,6 +681,13 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
         goto exit;
     }
 
+    ret = qemu_rbd_extract_encryption_create_options(opts, &encrypt, errp);
+    if (ret < 0) {
+        goto exit;
+    }
+    rbd_opts->encrypt     = encrypt;
+    rbd_opts->has_encrypt = !!encrypt;
+
     /*
      * Caution: while qdict_get_try_str() is fine, getting non-string
      * types would require more care.  When @options come from -blockdev
@@ -459,53 +716,6 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     return ret;
 }
 
-/*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
- */
-static void qemu_rbd_complete_aio(RADOSCB *rcb)
-{
-    RBDAIOCB *acb = rcb->acb;
-    int64_t r;
-
-    r = rcb->ret;
-
-    if (acb->cmd != RBD_AIO_READ) {
-        if (r < 0) {
-            acb->ret = r;
-            acb->error = 1;
-        } else if (!acb->error) {
-            acb->ret = rcb->size;
-        }
-    } else {
-        if (r < 0) {
-            qemu_rbd_memset(rcb, 0);
-            acb->ret = r;
-            acb->error = 1;
-        } else if (r < rcb->size) {
-            qemu_rbd_memset(rcb, r);
-            if (!acb->error) {
-                acb->ret = rcb->size;
-            }
-        } else if (!acb->error) {
-            acb->ret = r;
-        }
-    }
-
-    g_free(rcb);
-
-    if (!LIBRBD_USE_IOVEC) {
-        if (acb->cmd == RBD_AIO_READ) {
-            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-        }
-        qemu_vfree(acb->bounce);
-    }
-
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-
-    qemu_aio_unref(acb);
-}
-
 static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
 {
     const char **vals;
@@ -692,6 +902,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     const QDictEntry *e;
     Error *local_err = NULL;
     char *keypairs, *secretid;
+    rbd_image_info_t info;
     int r;
 
     keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
@@ -756,30 +967,49 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto failed_open;
     }
 
-    r = rbd_get_size(s->image, &s->image_size);
+    if (opts->has_encrypt) {
+#ifdef LIBRBD_SUPPORTS_ENCRYPTION
+        r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+        if (r < 0) {
+            goto failed_post_open;
+        }
+#else
+        r = -ENOTSUP;
+        error_setg(errp, "RBD library does not support image encryption");
+        goto failed_post_open;
+#endif
+    }
+
+    r = rbd_stat(s->image, &info, sizeof(info));
     if (r < 0) {
-        error_setg_errno(errp, -r, "error getting image size from %s",
+        error_setg_errno(errp, -r, "error getting image info from %s",
                          s->image_name);
-        rbd_close(s->image);
-        goto failed_open;
+        goto failed_post_open;
     }
+    s->image_size = info.size;
+    s->object_size = info.obj_size;
 
     /* If we are using an rbd snapshot, we must be r/o, otherwise
      * leave as-is */
     if (s->snap != NULL) {
         r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp);
         if (r < 0) {
-            rbd_close(s->image);
-            goto failed_open;
+            goto failed_post_open;
         }
     }
 
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
+#endif
+
     /* When extending regular files, we get zeros from the OS */
     bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
 
     r = 0;
     goto out;
 
+failed_post_open:
+    rbd_close(s->image);
 failed_open:
     rados_ioctx_destroy(s->io_ctx);
     g_free(s->snap);
@@ -839,229 +1069,318 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size)
     return 0;
 }
 
-static const AIOCBInfo rbd_aiocb_info = {
-    .aiocb_size = sizeof(RBDAIOCB),
-};
-
-static void rbd_finish_bh(void *opaque)
+static void qemu_rbd_finish_bh(void *opaque)
 {
-    RADOSCB *rcb = opaque;
-    qemu_rbd_complete_aio(rcb);
+    RBDTask *task = opaque;
+    task->complete = true;
+    aio_co_wake(task->co);
 }
 
 /*
- * This is the callback function for rbd_aio_read and _write
+ * This is the completion callback function for all rbd aio calls
+ * started from qemu_rbd_start_co().
  *
  * Note: this function is being called from a non qemu thread so
  * we need to be careful about what we do here. Generally we only
  * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
+ * from qemu_rbd_finish_bh() which runs in a qemu context.
  */
-static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
+static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task)
 {
-    RBDAIOCB *acb = rcb->acb;
-
-    rcb->ret = rbd_aio_get_return_value(c);
+    task->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
-
-    replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
-                                     rbd_finish_bh, rcb);
-}
-
-static int rbd_aio_discard_wrapper(rbd_image_t image,
-                                   uint64_t off,
-                                   uint64_t len,
-                                   rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    return rbd_aio_discard(image, off, len, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
-                                 rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    return rbd_aio_flush(image, comp);
-#else
-    return -ENOTSUP;
-#endif
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs),
+                            qemu_rbd_finish_bh, task);
 }
 
-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t off,
-                                 QEMUIOVector *qiov,
-                                 int64_t size,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque,
-                                 RBDAIOCmd cmd)
+static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
+                                          uint64_t offset,
+                                          uint64_t bytes,
+                                          QEMUIOVector *qiov,
+                                          int flags,
+                                          RBDAIOCmd cmd)
 {
-    RBDAIOCB *acb;
-    RADOSCB *rcb = NULL;
+    BDRVRBDState *s = bs->opaque;
+    RBDTask task = { .bs = bs, .co = qemu_coroutine_self() };
     rbd_completion_t c;
     int r;
 
-    BDRVRBDState *s = bs->opaque;
-
-    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
-    acb->cmd = cmd;
-    acb->qiov = qiov;
-    assert(!qiov || qiov->size == size);
-
-    rcb = g_new(RADOSCB, 1);
+    assert(!qiov || qiov->size == bytes);
 
-    if (!LIBRBD_USE_IOVEC) {
-        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-            acb->bounce = NULL;
-        } else {
-            acb->bounce = qemu_try_blockalign(bs, qiov->size);
-            if (acb->bounce == NULL) {
-                goto failed;
-            }
-        }
-        if (cmd == RBD_AIO_WRITE) {
-            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        }
-        rcb->buf = acb->bounce;
-    }
-
-    acb->ret = 0;
-    acb->error = 0;
-    acb->s = s;
-
-    rcb->acb = acb;
-    rcb->s = acb->s;
-    rcb->size = size;
-    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
+    r = rbd_aio_create_completion(&task,
+                                  (rbd_callback_t) qemu_rbd_completion_cb, &c);
     if (r < 0) {
-        goto failed;
+        return r;
     }
 
     switch (cmd) {
-    case RBD_AIO_WRITE: {
-        /*
-         * RBD APIs don't allow us to write more than actual size, so in order
-         * to support growing images, we resize the image before write
-         * operations that exceed the current size.
-         */
-        if (off + size > s->image_size) {
-            r = qemu_rbd_resize(bs, off + size);
-            if (r < 0) {
-                goto failed_completion;
-            }
-        }
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
-#endif
-        break;
-    }
     case RBD_AIO_READ:
-#ifdef LIBRBD_SUPPORTS_IOVEC
-            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
-#else
-            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
-#endif
+        r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, offset, c);
+        break;
+    case RBD_AIO_WRITE:
+        r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, offset, c);
         break;
     case RBD_AIO_DISCARD:
-        r = rbd_aio_discard_wrapper(s->image, off, size, c);
+        r = rbd_aio_discard(s->image, offset, bytes, c);
         break;
     case RBD_AIO_FLUSH:
-        r = rbd_aio_flush_wrapper(s->image, c);
+        r = rbd_aio_flush(s->image, c);
         break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    case RBD_AIO_WRITE_ZEROES: {
+        int zero_flags = 0;
+#ifdef RBD_WRITE_ZEROES_FLAG_THICK_PROVISION
+        if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+            zero_flags = RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
+        }
+#endif
+        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, zero_flags, 0);
+        break;
+    }
+#endif
     default:
         r = -EINVAL;
     }
 
     if (r < 0) {
-        goto failed_completion;
+        error_report("rbd request failed early: cmd %d offset %" PRIu64
+                     " bytes %" PRIu64 " flags %d r %d (%s)", cmd, offset,
+                     bytes, flags, r, strerror(-r));
+        rbd_aio_release(c);
+        return r;
     }
-    return &acb->common;
 
-failed_completion:
-    rbd_aio_release(c);
-failed:
-    g_free(rcb);
-    if (!LIBRBD_USE_IOVEC) {
-        qemu_vfree(acb->bounce);
+    while (!task.complete) {
+        qemu_coroutine_yield();
     }
 
-    qemu_aio_unref(acb);
-    return NULL;
+    if (task.ret < 0) {
+        error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %"
+                     PRIu64 " flags %d task.ret %" PRIi64 " (%s)", cmd, offset,
+                     bytes, flags, task.ret, strerror(-task.ret));
+        return task.ret;
+    }
+
+    /* zero pad short reads */
+    if (cmd == RBD_AIO_READ && task.ret < qiov->size) {
+        qemu_iovec_memset(qiov, task.ret, 0, qiov->size - task.ret);
+    }
+
+    return 0;
 }
 
-static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
-                                       uint64_t offset, uint64_t bytes,
-                                       QEMUIOVector *qiov, int flags,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
+static int
+coroutine_fn qemu_rbd_co_preadv(BlockDriverState *bs, int64_t offset,
+                                int64_t bytes, QEMUIOVector *qiov,
+                                BdrvRequestFlags flags)
 {
-    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
-                         RBD_AIO_READ);
+    return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_READ);
 }
 
-static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
-                                        uint64_t offset, uint64_t bytes,
-                                        QEMUIOVector *qiov, int flags,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
+static int
+coroutine_fn qemu_rbd_co_pwritev(BlockDriverState *bs, int64_t offset,
+                                 int64_t bytes, QEMUIOVector *qiov,
+                                 BdrvRequestFlags flags)
 {
-    return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
-                         RBD_AIO_WRITE);
+    BDRVRBDState *s = bs->opaque;
+    /*
+     * RBD APIs don't allow us to write more than actual size, so in order
+     * to support growing images, we resize the image before write
+     * operations that exceed the current size.
+     */
+    if (offset + bytes > s->image_size) {
+        int r = qemu_rbd_resize(bs, offset + bytes);
+        if (r < 0) {
+            return r;
+        }
+    }
+    return qemu_rbd_start_co(bs, offset, bytes, qiov, flags, RBD_AIO_WRITE);
 }
 
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
+static int coroutine_fn qemu_rbd_co_flush(BlockDriverState *bs)
 {
-    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
+    return qemu_rbd_start_co(bs, 0, 0, NULL, 0, RBD_AIO_FLUSH);
 }
 
-#else
+static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int64_t bytes)
+{
+    return qemu_rbd_start_co(bs, offset, bytes, NULL, 0, RBD_AIO_DISCARD);
+}
 
-static int qemu_rbd_co_flush(BlockDriverState *bs)
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                       int64_t bytes, BdrvRequestFlags flags)
 {
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
-    /* rbd_flush added in 0.1.1 */
-    BDRVRBDState *s = bs->opaque;
-    return rbd_flush(s->image);
-#else
-    return 0;
-#endif
+    return qemu_rbd_start_co(bs, offset, bytes, NULL, flags,
+                             RBD_AIO_WRITE_ZEROES);
 }
 #endif
 
 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
+    bdi->cluster_size = s->object_size;
+    return 0;
+}
+
+static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
+                                                     Error **errp)
+{
+    BDRVRBDState *s = bs->opaque;
+    ImageInfoSpecific *spec_info;
+    char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0};
     int r;
 
-    r = rbd_stat(s->image, &info, sizeof(info));
-    if (r < 0) {
-        return r;
+    if (s->image_size >= RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) {
+        r = rbd_read(s->image, 0,
+                     RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf);
+        if (r < 0) {
+            error_setg_errno(errp, -r, "cannot read image start for probe");
+            return NULL;
+        }
+    }
+
+    spec_info = g_new(ImageInfoSpecific, 1);
+    *spec_info = (ImageInfoSpecific){
+        .type  = IMAGE_INFO_SPECIFIC_KIND_RBD,
+        .u.rbd.data = g_new0(ImageInfoSpecificRbd, 1),
+    };
+
+    if (memcmp(buf, rbd_luks_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else if (memcmp(buf, rbd_luks2_header_verification,
+               RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+        spec_info->u.rbd.data->encryption_format =
+                RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+        spec_info->u.rbd.data->has_encryption_format = true;
+    } else {
+        spec_info->u.rbd.data->has_encryption_format = false;
+    }
+
+    return spec_info;
+}
+
+/*
+ * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
+ * value in the callback routine. Choose a value that does not conflict with
+ * an existing exitcode and return it if we want to prematurely stop the
+ * execution because we detected a change in the allocation status.
+ */
+#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
+
+static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
+                                    int exists, void *opaque)
+{
+    RBDDiffIterateReq *req = opaque;
+
+    assert(req->offs + req->bytes <= offs);
+    /*
+     * we do not diff against a snapshot so we should never receive a callback
+     * for a hole.
+     */
+    assert(exists);
+
+    if (!req->exists && offs > req->offs) {
+        /*
+         * we started in an unallocated area and hit the first allocated
+         * block. req->bytes must be set to the length of the unallocated area
+         * before the allocated area. stop further processing.
+         */
+        req->bytes = offs - req->offs;
+        return QEMU_RBD_EXIT_DIFF_ITERATE2;
+    }
+
+    if (req->exists && offs > req->offs + req->bytes) {
+        /*
+         * we started in an allocated area and jumped over an unallocated area,
+         * req->bytes contains the length of the allocated area before the
+         * unallocated area. stop further processing.
+         */
+        return QEMU_RBD_EXIT_DIFF_ITERATE2;
     }
 
-    bdi->cluster_size = info.obj_size;
+    req->bytes += len;
+    req->exists = true;
+
     return 0;
 }
 
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+                                                 bool want_zero, int64_t offset,
+                                                 int64_t bytes, int64_t *pnum,
+                                                 int64_t *map,
+                                                 BlockDriverState **file)
+{
+    BDRVRBDState *s = bs->opaque;
+    int status, r;
+    RBDDiffIterateReq req = { .offs = offset };
+    uint64_t features, flags;
+
+    assert(offset + bytes <= s->image_size);
+
+    /* default to all sectors allocated */
+    status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    *map = offset;
+    *file = bs;
+    *pnum = bytes;
+
+    /* check if RBD image supports fast-diff */
+    r = rbd_get_features(s->image, &features);
+    if (r < 0) {
+        return status;
+    }
+    if (!(features & RBD_FEATURE_FAST_DIFF)) {
+        return status;
+    }
+
+    /* check if RBD fast-diff result is valid */
+    r = rbd_get_flags(s->image, &flags);
+    if (r < 0) {
+        return status;
+    }
+    if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
+        return status;
+    }
+
+    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+                          qemu_rbd_diff_iterate_cb, &req);
+    if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
+        return status;
+    }
+    assert(req.bytes <= bytes);
+    if (!req.exists) {
+        if (r == 0) {
+            /*
+             * rbd_diff_iterate2 does not invoke callbacks for unallocated
+             * areas. This here catches the case where no callback was
+             * invoked at all (req.bytes == 0).
+             */
+            assert(req.bytes == 0);
+            req.bytes = bytes;
+        }
+        status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
+    }
+
+    *pnum = req.bytes;
+    return status;
+}
+
 static int64_t qemu_rbd_getlength(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
     int r;
 
-    r = rbd_stat(s->image, &info, sizeof(info));
+    r = rbd_get_size(s->image, &s->image_size);
     if (r < 0) {
         return r;
     }
 
-    return info.size;
+    return s->image_size;
 }
 
 static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
@@ -1200,19 +1519,6 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
-#ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
-                                         int64_t offset,
-                                         int bytes,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque)
-{
-    return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
-                         RBD_AIO_DISCARD);
-}
-#endif
-
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
 static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
                                                       Error **errp)
 {
@@ -1222,7 +1528,6 @@ static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
         error_setg_errno(errp, -r, "Failed to invalidate the cache");
     }
 }
-#endif
 
 static QemuOptsList qemu_rbd_create_opts = {
     .name = "rbd-create-opts",
@@ -1243,6 +1548,22 @@ static QemuOptsList qemu_rbd_create_opts = {
             .type = QEMU_OPT_STRING,
             .help = "ID of secret providing the password",
         },
+        {
+            .name = "encrypt.format",
+            .type = QEMU_OPT_STRING,
+            .help = "Encrypt the image, format choices: 'luks', 'luks2'",
+        },
+        {
+            .name = "encrypt.cipher-alg",
+            .type = QEMU_OPT_STRING,
+            .help = "Name of encryption cipher algorithm"
+                    " (allowed values: aes-128, aes-256)",
+        },
+        {
+            .name = "encrypt.key-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing LUKS passphrase",
+        },
         { /* end of list */ }
     }
 };
@@ -1264,7 +1585,6 @@ static BlockDriver bdrv_rbd = {
     .format_name            = "rbd",
     .instance_size          = sizeof(BDRVRBDState),
     .bdrv_parse_filename    = qemu_rbd_parse_filename,
-    .bdrv_refresh_limits    = qemu_rbd_refresh_limits,
     .bdrv_file_open         = qemu_rbd_open,
     .bdrv_close             = qemu_rbd_close,
     .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
@@ -1272,31 +1592,26 @@ static BlockDriver bdrv_rbd = {
     .bdrv_co_create_opts    = qemu_rbd_co_create_opts,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
     .bdrv_get_info          = qemu_rbd_getinfo,
+    .bdrv_get_specific_info = qemu_rbd_get_specific_info,
     .create_opts            = &qemu_rbd_create_opts,
     .bdrv_getlength         = qemu_rbd_getlength,
     .bdrv_co_truncate       = qemu_rbd_co_truncate,
     .protocol_name          = "rbd",
 
-    .bdrv_aio_preadv        = qemu_rbd_aio_preadv,
-    .bdrv_aio_pwritev       = qemu_rbd_aio_pwritev,
-
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    .bdrv_aio_flush         = qemu_rbd_aio_flush,
-#else
+    .bdrv_co_preadv         = qemu_rbd_co_preadv,
+    .bdrv_co_pwritev        = qemu_rbd_co_pwritev,
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
+    .bdrv_co_pdiscard       = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
 #endif
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    .bdrv_aio_pdiscard      = qemu_rbd_aio_pdiscard,
-#endif
+    .bdrv_co_block_status   = qemu_rbd_co_block_status,
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
     .bdrv_snapshot_list     = qemu_rbd_snap_list,
     .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
     .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
-#endif
 
     .strong_runtime_opts    = qemu_rbd_strong_runtime_opts,
 };
diff --git a/block/replication.c b/block/replication.c
index 97be7ef4de5..55c8f894aa3 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -22,7 +22,7 @@
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
-#include "replication.h"
+#include "block/replication.h"
 
 typedef enum {
     BLOCK_REPLICATION_NONE,             /* block replication is not started */
@@ -35,7 +35,6 @@ typedef enum {
 typedef struct BDRVReplicationState {
     ReplicationMode mode;
     ReplicationStage stage;
-    BdrvChild *active_disk;
     BlockJob *commit_job;
     BdrvChild *hidden_disk;
     BdrvChild *secondary_disk;
@@ -150,7 +149,7 @@ static void replication_close(BlockDriverState *bs)
     if (s->stage == BLOCK_REPLICATION_FAILOVER) {
         commit_job = &s->commit_job->job;
         assert(commit_job->aio_context == qemu_get_current_aio_context());
-        job_cancel_sync(commit_job);
+        job_cancel_sync(commit_job, false);
     }
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
@@ -166,7 +165,12 @@ static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
                                    uint64_t perm, uint64_t shared,
                                    uint64_t *nperm, uint64_t *nshared)
 {
-    *nperm = BLK_PERM_CONSISTENT_READ;
+    if (role & BDRV_CHILD_PRIMARY) {
+        *nperm = BLK_PERM_CONSISTENT_READ;
+    } else {
+        *nperm = 0;
+    }
+
     if ((bs->open_flags & (BDRV_O_INACTIVE | BDRV_O_RDWR)) == BDRV_O_RDWR) {
         *nperm |= BLK_PERM_WRITE;
     }
@@ -307,8 +311,10 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     return ret;
 }
 
-static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
+static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp)
 {
+    BDRVReplicationState *s = bs->opaque;
+    BdrvChild *active_disk = bs->file;
     Error *local_err = NULL;
     int ret;
 
@@ -323,13 +329,13 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }
 
-    if (!s->active_disk->bs->drv) {
+    if (!active_disk->bs->drv) {
         error_setg(errp, "Active disk %s is ejected",
-                   s->active_disk->bs->node_name);
+                   active_disk->bs->node_name);
         return;
     }
 
-    ret = bdrv_make_empty(s->active_disk, errp);
+    ret = bdrv_make_empty(active_disk, errp);
     if (ret < 0) {
         return;
     }
@@ -340,17 +346,7 @@ static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
         return;
     }
 
-    BlockBackend *blk = blk_new(qemu_get_current_aio_context(),
-                                BLK_PERM_WRITE, BLK_PERM_ALL);
-    blk_insert_bs(blk, s->hidden_disk->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        blk_unref(blk);
-        return;
-    }
-
-    ret = blk_make_empty(blk, errp);
-    blk_unref(blk);
+    ret = bdrv_make_empty(s->hidden_disk, errp);
     if (ret < 0) {
         return;
     }
@@ -365,36 +361,51 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
                                 Error **errp)
 {
     BDRVReplicationState *s = bs->opaque;
+    BdrvChild *hidden_disk, *secondary_disk;
     BlockReopenQueue *reopen_queue = NULL;
 
+    /*
+     * s->hidden_disk and s->secondary_disk may not be set yet, as they will
+     * only be set after the children are writable.
+     */
+    hidden_disk = bs->file->bs->backing;
+    secondary_disk = hidden_disk->bs->backing;
+
     if (writable) {
-        s->orig_hidden_read_only = bdrv_is_read_only(s->hidden_disk->bs);
-        s->orig_secondary_read_only = bdrv_is_read_only(s->secondary_disk->bs);
+        s->orig_hidden_read_only = bdrv_is_read_only(hidden_disk->bs);
+        s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
     }
 
-    bdrv_subtree_drained_begin(s->hidden_disk->bs);
-    bdrv_subtree_drained_begin(s->secondary_disk->bs);
+    bdrv_subtree_drained_begin(hidden_disk->bs);
+    bdrv_subtree_drained_begin(secondary_disk->bs);
 
     if (s->orig_hidden_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, hidden_disk->bs,
                                          opts, true);
     }
 
     if (s->orig_secondary_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
-        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+        reopen_queue = bdrv_reopen_queue(reopen_queue, secondary_disk->bs,
                                          opts, true);
     }
 
     if (reopen_queue) {
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_release(ctx);
+        }
         bdrv_reopen_multiple(reopen_queue, errp);
+        if (ctx != qemu_get_aio_context()) {
+            aio_context_acquire(ctx);
+        }
     }
 
-    bdrv_subtree_drained_end(s->hidden_disk->bs);
-    bdrv_subtree_drained_end(s->secondary_disk->bs);
+    bdrv_subtree_drained_end(hidden_disk->bs);
+    bdrv_subtree_drained_end(secondary_disk->bs);
 }
 
 static void backup_job_cleanup(BlockDriverState *bs)
@@ -451,6 +462,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     BlockDriverState *bs = rs->opaque;
     BDRVReplicationState *s;
     BlockDriverState *top_bs;
+    BdrvChild *active_disk, *hidden_disk, *secondary_disk;
     int64_t active_length, hidden_length, disk_length;
     AioContext *aio_context;
     Error *local_err = NULL;
@@ -488,32 +500,31 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     case REPLICATION_MODE_PRIMARY:
         break;
     case REPLICATION_MODE_SECONDARY:
-        s->active_disk = bs->file;
-        if (!s->active_disk || !s->active_disk->bs ||
-                                    !s->active_disk->bs->backing) {
+        active_disk = bs->file;
+        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
             error_setg(errp, "Active disk doesn't have backing file");
             aio_context_release(aio_context);
             return;
         }
 
-        s->hidden_disk = s->active_disk->bs->backing;
-        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+        hidden_disk = active_disk->bs->backing;
+        if (!hidden_disk->bs || !hidden_disk->bs->backing) {
             error_setg(errp, "Hidden disk doesn't have backing file");
             aio_context_release(aio_context);
             return;
         }
 
-        s->secondary_disk = s->hidden_disk->bs->backing;
-        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+        secondary_disk = hidden_disk->bs->backing;
+        if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
             error_setg(errp, "The secondary disk doesn't have block backend");
             aio_context_release(aio_context);
             return;
         }
 
         /* verify the length */
-        active_length = bdrv_getlength(s->active_disk->bs);
-        hidden_length = bdrv_getlength(s->hidden_disk->bs);
-        disk_length = bdrv_getlength(s->secondary_disk->bs);
+        active_length = bdrv_getlength(active_disk->bs);
+        hidden_length = bdrv_getlength(hidden_disk->bs);
+        disk_length = bdrv_getlength(secondary_disk->bs);
         if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
             active_length != hidden_length || hidden_length != disk_length) {
             error_setg(errp, "Active disk, hidden disk, secondary disk's length"
@@ -523,10 +534,10 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         }
 
         /* Must be true, or the bdrv_getlength() calls would have failed */
-        assert(s->active_disk->bs->drv && s->hidden_disk->bs->drv);
+        assert(active_disk->bs->drv && hidden_disk->bs->drv);
 
-        if (!s->active_disk->bs->drv->bdrv_make_empty ||
-            !s->hidden_disk->bs->drv->bdrv_make_empty) {
+        if (!active_disk->bs->drv->bdrv_make_empty ||
+            !hidden_disk->bs->drv->bdrv_make_empty) {
             error_setg(errp,
                        "Active disk or hidden disk doesn't support make_empty");
             aio_context_release(aio_context);
@@ -541,6 +552,26 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
             return;
         }
 
+        bdrv_ref(hidden_disk->bs);
+        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
+                                           &child_of_bds, BDRV_CHILD_DATA,
+                                           &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }
+
+        bdrv_ref(secondary_disk->bs);
+        s->secondary_disk = bdrv_attach_child(bs, secondary_disk->bs,
+                                              "secondary disk", &child_of_bds,
+                                              BDRV_CHILD_DATA, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }
+
         /* start backup job now */
         error_setg(&s->blocker,
                    "Block device is in use by internal backup job");
@@ -579,7 +610,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
     s->stage = BLOCK_REPLICATION_RUNNING;
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }
 
     s->error = 0;
@@ -608,7 +639,7 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
     }
 
     if (s->mode == REPLICATION_MODE_SECONDARY) {
-        secondary_do_checkpoint(s, errp);
+        secondary_do_checkpoint(bs, errp);
     }
     aio_context_release(aio_context);
 }
@@ -645,8 +676,9 @@ static void replication_done(void *opaque, int ret)
     if (ret == 0) {
         s->stage = BLOCK_REPLICATION_DONE;
 
-        s->active_disk = NULL;
+        bdrv_unref_child(bs, s->secondary_disk);
         s->secondary_disk = NULL;
+        bdrv_unref_child(bs, s->hidden_disk);
         s->hidden_disk = NULL;
         s->error = 0;
     } else {
@@ -694,11 +726,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
          * disk, secondary disk in backup_job_completed().
          */
         if (s->backup_job) {
-            job_cancel_sync(&s->backup_job->job);
+            job_cancel_sync(&s->backup_job->job, true);
         }
 
         if (!failover) {
-            secondary_do_checkpoint(s, errp);
+            secondary_do_checkpoint(bs, errp);
             s->stage = BLOCK_REPLICATION_DONE;
             aio_context_release(aio_context);
             return;
@@ -706,7 +738,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
 
         s->stage = BLOCK_REPLICATION_FAILOVER;
         s->commit_job = commit_active_start(
-                            NULL, s->active_disk->bs, s->secondary_disk->bs,
+                            NULL, bs->file->bs, s->secondary_disk->bs,
                             JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
                             NULL, replication_done, bs, true, errp);
         break;
diff --git a/block/sheepdog.c b/block/sheepdog.c
deleted file mode 100644
index a45c73826d4..00000000000
--- a/block/sheepdog.c
+++ /dev/null
@@ -1,3356 +0,0 @@
-/*
- * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qapi/error.h"
-#include "qapi/qapi-visit-sockets.h"
-#include "qapi/qapi-visit-block-core.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qobject-input-visitor.h"
-#include "qapi/qobject-output-visitor.h"
-#include "qemu/uri.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
-#include "qemu/module.h"
-#include "qemu/option.h"
-#include "qemu/sockets.h"
-#include "block/block_int.h"
-#include "block/qdict.h"
-#include "sysemu/block-backend.h"
-#include "qemu/bitops.h"
-#include "qemu/cutils.h"
-#include "trace.h"
-
-#define SD_PROTO_VER 0x01
-
-#define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT 7000
-
-#define SD_OP_CREATE_AND_WRITE_OBJ  0x01
-#define SD_OP_READ_OBJ       0x02
-#define SD_OP_WRITE_OBJ      0x03
-/* 0x04 is used internally by Sheepdog */
-
-#define SD_OP_NEW_VDI        0x11
-#define SD_OP_LOCK_VDI       0x12
-#define SD_OP_RELEASE_VDI    0x13
-#define SD_OP_GET_VDI_INFO   0x14
-#define SD_OP_READ_VDIS      0x15
-#define SD_OP_FLUSH_VDI      0x16
-#define SD_OP_DEL_VDI        0x17
-#define SD_OP_GET_CLUSTER_DEFAULT   0x18
-
-#define SD_FLAG_CMD_WRITE    0x01
-#define SD_FLAG_CMD_COW      0x02
-#define SD_FLAG_CMD_CACHE    0x04 /* Writeback mode for cache */
-#define SD_FLAG_CMD_DIRECT   0x08 /* Don't use cache */
-
-#define SD_RES_SUCCESS       0x00 /* Success */
-#define SD_RES_UNKNOWN       0x01 /* Unknown error */
-#define SD_RES_NO_OBJ        0x02 /* No object found */
-#define SD_RES_EIO           0x03 /* I/O error */
-#define SD_RES_VDI_EXIST     0x04 /* Vdi exists already */
-#define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */
-#define SD_RES_SYSTEM_ERROR  0x06 /* System error */
-#define SD_RES_VDI_LOCKED    0x07 /* Vdi is locked */
-#define SD_RES_NO_VDI        0x08 /* No vdi found */
-#define SD_RES_NO_BASE_VDI   0x09 /* No base vdi found */
-#define SD_RES_VDI_READ      0x0A /* Cannot read requested vdi */
-#define SD_RES_VDI_WRITE     0x0B /* Cannot write requested vdi */
-#define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */
-#define SD_RES_BASE_VDI_WRITE   0x0D /* Cannot write base vdi */
-#define SD_RES_NO_TAG        0x0E /* Requested tag is not found */
-#define SD_RES_STARTUP       0x0F /* Sheepdog is on starting up */
-#define SD_RES_VDI_NOT_LOCKED   0x10 /* Vdi is not locked */
-#define SD_RES_SHUTDOWN      0x11 /* Sheepdog is shutting down */
-#define SD_RES_NO_MEM        0x12 /* Cannot allocate memory */
-#define SD_RES_FULL_VDI      0x13 /* we already have the maximum vdis */
-#define SD_RES_VER_MISMATCH  0x14 /* Protocol version mismatch */
-#define SD_RES_NO_SPACE      0x15 /* Server has no room for new objects */
-#define SD_RES_WAIT_FOR_FORMAT  0x16 /* Waiting for a format operation */
-#define SD_RES_WAIT_FOR_JOIN    0x17 /* Waiting for other nodes joining */
-#define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
-#define SD_RES_HALT          0x19 /* Sheepdog is stopped serving IO request */
-#define SD_RES_READONLY      0x1A /* Object is read-only */
-
-/*
- * Object ID rules
- *
- *  0 - 19 (20 bits): data object space
- * 20 - 31 (12 bits): reserved data object space
- * 32 - 55 (24 bits): vdi object space
- * 56 - 59 ( 4 bits): reserved vdi object space
- * 60 - 63 ( 4 bits): object type identifier space
- */
-
-#define VDI_SPACE_SHIFT   32
-#define VDI_BIT (UINT64_C(1) << 63)
-#define VMSTATE_BIT (UINT64_C(1) << 62)
-#define MAX_DATA_OBJS (UINT64_C(1) << 20)
-#define MAX_CHILDREN 1024
-#define SD_MAX_VDI_LEN 256
-#define SD_MAX_VDI_TAG_LEN 256
-#define SD_NR_VDIS   (1U << 24)
-#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
-#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
-#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
-/*
- * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
- * (SD_EC_MAX_STRIP - 1) for parity strips
- *
- * SD_MAX_COPIES is sum of number of data strips and parity strips.
- */
-#define SD_EC_MAX_STRIP 16
-#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1)
-
-#define SD_INODE_SIZE (sizeof(SheepdogInode))
-#define CURRENT_VDI_ID 0
-
-#define LOCK_TYPE_NORMAL 0
-#define LOCK_TYPE_SHARED 1      /* for iSCSI multipath */
-
-typedef struct SheepdogReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t opcode_specific[8];
-} SheepdogReq;
-
-typedef struct SheepdogRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t opcode_specific[7];
-} SheepdogRsp;
-
-typedef struct SheepdogObjReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t oid;
-    uint64_t cow_oid;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[6];
-    uint64_t offset;
-} SheepdogObjReq;
-
-typedef struct SheepdogObjRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[2];
-    uint32_t pad[6];
-} SheepdogObjRsp;
-
-typedef struct SheepdogVdiReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t vdi_size;
-    uint32_t base_vdi_id;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t store_policy;
-    uint8_t block_size_shift;
-    uint32_t snapid;
-    uint32_t type;
-    uint32_t pad[2];
-} SheepdogVdiReq;
-
-typedef struct SheepdogVdiRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t rsvd;
-    uint32_t vdi_id;
-    uint32_t pad[5];
-} SheepdogVdiRsp;
-
-typedef struct SheepdogClusterRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t nr_copies;
-    uint8_t copy_policy;
-    uint8_t block_size_shift;
-    uint8_t __pad1;
-    uint32_t __pad2[6];
-} SheepdogClusterRsp;
-
-typedef struct SheepdogInode {
-    char name[SD_MAX_VDI_LEN];
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint64_t ctime;
-    uint64_t snap_ctime;
-    uint64_t vm_clock_nsec;
-    uint64_t vdi_size;
-    uint64_t vm_state_size;
-    uint16_t copy_policy;
-    uint8_t nr_copies;
-    uint8_t block_size_shift;
-    uint32_t snap_id;
-    uint32_t vdi_id;
-    uint32_t parent_vdi_id;
-    uint32_t child_vdi_id[MAX_CHILDREN];
-    uint32_t data_vdi_id[MAX_DATA_OBJS];
-} SheepdogInode;
-
-#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
-
-/*
- * 64 bit FNV-1a non-zero initial basis
- */
-#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
-
-static void deprecation_warning(void)
-{
-    static bool warned;
-
-    if (!warned) {
-        warn_report("the sheepdog block driver is deprecated");
-        warned = true;
-    }
-}
-
-/*
- * 64 bit Fowler/Noll/Vo FNV-1a hash code
- */
-static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
-{
-    unsigned char *bp = buf;
-    unsigned char *be = bp + len;
-    while (bp < be) {
-        hval ^= (uint64_t) *bp++;
-        hval += (hval << 1) + (hval << 4) + (hval << 5) +
-            (hval << 7) + (hval << 8) + (hval << 40);
-    }
-    return hval;
-}
-
-static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
-{
-    return inode->vdi_id == inode->data_vdi_id[idx];
-}
-
-static inline bool is_data_obj(uint64_t oid)
-{
-    return !(VDI_BIT & oid);
-}
-
-static inline uint64_t data_oid_to_idx(uint64_t oid)
-{
-    return oid & (MAX_DATA_OBJS - 1);
-}
-
-static inline uint32_t oid_to_vid(uint64_t oid)
-{
-    return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
-}
-
-static inline uint64_t vid_to_vdi_oid(uint32_t vid)
-{
-    return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
-}
-
-static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
-{
-    return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
-{
-    return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline bool is_snapshot(struct SheepdogInode *inode)
-{
-    return !!inode->snap_ctime;
-}
-
-static inline size_t count_data_objs(const struct SheepdogInode *inode)
-{
-    return DIV_ROUND_UP(inode->vdi_size,
-                        (1UL << inode->block_size_shift));
-}
-
-typedef struct SheepdogAIOCB SheepdogAIOCB;
-typedef struct BDRVSheepdogState BDRVSheepdogState;
-
-typedef struct AIOReq {
-    SheepdogAIOCB *aiocb;
-    unsigned int iov_offset;
-
-    uint64_t oid;
-    uint64_t base_oid;
-    uint64_t offset;
-    unsigned int data_len;
-    uint8_t flags;
-    uint32_t id;
-    bool create;
-
-    QLIST_ENTRY(AIOReq) aio_siblings;
-} AIOReq;
-
-enum AIOCBState {
-    AIOCB_WRITE_UDATA,
-    AIOCB_READ_UDATA,
-    AIOCB_FLUSH_CACHE,
-    AIOCB_DISCARD_OBJ,
-};
-
-#define AIOCBOverlapping(x, y)                                 \
-    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
-       || y->max_affect_data_idx < x->min_affect_data_idx))
-
-struct SheepdogAIOCB {
-    BDRVSheepdogState *s;
-
-    QEMUIOVector *qiov;
-
-    int64_t sector_num;
-    int nb_sectors;
-
-    int ret;
-    enum AIOCBState aiocb_type;
-
-    Coroutine *coroutine;
-    int nr_pending;
-
-    uint32_t min_affect_data_idx;
-    uint32_t max_affect_data_idx;
-
-    /*
-     * The difference between affect_data_idx and dirty_data_idx:
-     * affect_data_idx represents range of index of all request types.
-     * dirty_data_idx represents range of index updated by COW requests.
-     * dirty_data_idx is used for updating an inode object.
-     */
-    uint32_t min_dirty_data_idx;
-    uint32_t max_dirty_data_idx;
-
-    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
-};
-
-struct BDRVSheepdogState {
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    SheepdogInode inode;
-
-    char name[SD_MAX_VDI_LEN];
-    bool is_snapshot;
-    uint32_t cache_flags;
-    bool discard_supported;
-
-    SocketAddress *addr;
-    int fd;
-
-    CoMutex lock;
-    Coroutine *co_send;
-    Coroutine *co_recv;
-
-    uint32_t aioreq_seq_num;
-
-    /* Every aio request must be linked to either of these queues. */
-    QLIST_HEAD(, AIOReq) inflight_aio_head;
-    QLIST_HEAD(, AIOReq) failed_aio_head;
-
-    CoMutex queue_lock;
-    CoQueue overlapping_queue;
-    QLIST_HEAD(, SheepdogAIOCB) inflight_aiocb_head;
-};
-
-typedef struct BDRVSheepdogReopenState {
-    int fd;
-    int cache_flags;
-} BDRVSheepdogReopenState;
-
-static const char *sd_strerror(int err)
-{
-    int i;
-
-    static const struct {
-        int err;
-        const char *desc;
-    } errors[] = {
-        {SD_RES_SUCCESS, "Success"},
-        {SD_RES_UNKNOWN, "Unknown error"},
-        {SD_RES_NO_OBJ, "No object found"},
-        {SD_RES_EIO, "I/O error"},
-        {SD_RES_VDI_EXIST, "VDI exists already"},
-        {SD_RES_INVALID_PARMS, "Invalid parameters"},
-        {SD_RES_SYSTEM_ERROR, "System error"},
-        {SD_RES_VDI_LOCKED, "VDI is already locked"},
-        {SD_RES_NO_VDI, "No vdi found"},
-        {SD_RES_NO_BASE_VDI, "No base VDI found"},
-        {SD_RES_VDI_READ, "Failed read the requested VDI"},
-        {SD_RES_VDI_WRITE, "Failed to write the requested VDI"},
-        {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"},
-        {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"},
-        {SD_RES_NO_TAG, "Failed to find the requested tag"},
-        {SD_RES_STARTUP, "The system is still booting"},
-        {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"},
-        {SD_RES_SHUTDOWN, "The system is shutting down"},
-        {SD_RES_NO_MEM, "Out of memory on the server"},
-        {SD_RES_FULL_VDI, "We already have the maximum vdis"},
-        {SD_RES_VER_MISMATCH, "Protocol version mismatch"},
-        {SD_RES_NO_SPACE, "Server has no space for new objects"},
-        {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
-        {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
-        {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
-        {SD_RES_HALT, "Sheepdog is stopped serving IO request"},
-        {SD_RES_READONLY, "Object is read-only"},
-    };
-
-    for (i = 0; i < ARRAY_SIZE(errors); ++i) {
-        if (errors[i].err == err) {
-            return errors[i].desc;
-        }
-    }
-
-    return "Invalid error code";
-}
-
-/*
- * Sheepdog I/O handling:
- *
- * 1. In sd_co_rw_vector, we send the I/O requests to the server and
- *    link the requests to the inflight_list in the
- *    BDRVSheepdogState.  The function yields while waiting for
- *    receiving the response.
- *
- * 2. We receive the response in aio_read_response, the fd handler to
- *    the sheepdog connection.  We switch back to sd_co_readv/sd_writev
- *    after all the requests belonging to the AIOCB are finished.  If
- *    needed, sd_co_writev will send another requests for the vdi object.
- */
-
-static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
-                                    uint64_t oid, unsigned int data_len,
-                                    uint64_t offset, uint8_t flags, bool create,
-                                    uint64_t base_oid, unsigned int iov_offset)
-{
-    AIOReq *aio_req;
-
-    aio_req = g_malloc(sizeof(*aio_req));
-    aio_req->aiocb = acb;
-    aio_req->iov_offset = iov_offset;
-    aio_req->oid = oid;
-    aio_req->base_oid = base_oid;
-    aio_req->offset = offset;
-    aio_req->data_len = data_len;
-    aio_req->flags = flags;
-    aio_req->id = s->aioreq_seq_num++;
-    aio_req->create = create;
-
-    acb->nr_pending++;
-    return aio_req;
-}
-
-static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
-{
-    SheepdogAIOCB *cb;
-
-retry:
-    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
-        if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
-            goto retry;
-        }
-    }
-}
-
-static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
-                         QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
-                         int type)
-{
-    uint32_t object_size;
-
-    object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    acb->s = s;
-
-    acb->qiov = qiov;
-
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    acb->coroutine = qemu_coroutine_self();
-    acb->ret = 0;
-    acb->nr_pending = 0;
-
-    acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
-                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
-
-    acb->min_dirty_data_idx = UINT32_MAX;
-    acb->max_dirty_data_idx = 0;
-    acb->aiocb_type = type;
-
-    if (type == AIOCB_FLUSH_CACHE) {
-        return;
-    }
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    wait_for_overlapping_aiocb(s, acb);
-    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-static SocketAddress *sd_server_config(QDict *options, Error **errp)
-{
-    QDict *server = NULL;
-    Visitor *iv = NULL;
-    SocketAddress *saddr = NULL;
-
-    qdict_extract_subqdict(options, &server, "server.");
-
-    iv = qobject_input_visitor_new_flat_confused(server, errp);
-    if (!iv) {
-        goto done;
-    }
-
-    if (!visit_type_SocketAddress(iv, NULL, &saddr, errp)) {
-        goto done;
-    }
-
-done:
-    visit_free(iv);
-    qobject_unref(server);
-    return saddr;
-}
-
-/* Return -EIO in case of error, file descriptor on success */
-static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    fd = socket_connect(s->addr, errp);
-
-    if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
-        int ret = socket_set_nodelay(fd);
-        if (ret < 0) {
-            warn_report("can't set TCP_NODELAY: %s", strerror(errno));
-        }
-    }
-
-    if (fd >= 0) {
-        qemu_set_nonblock(fd);
-    } else {
-        fd = -EIO;
-    }
-
-    return fd;
-}
-
-/* Return 0 on success and -errno in case of error */
-static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
-                                    unsigned int *wlen)
-{
-    int ret;
-
-    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    ret = qemu_co_send(sockfd, data, *wlen);
-    if (ret != *wlen) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    return ret;
-}
-
-typedef struct SheepdogReqCo {
-    int sockfd;
-    BlockDriverState *bs;
-    AioContext *aio_context;
-    SheepdogReq *hdr;
-    void *data;
-    unsigned int *wlen;
-    unsigned int *rlen;
-    int ret;
-    bool finished;
-    Coroutine *co;
-} SheepdogReqCo;
-
-static void restart_co_req(void *opaque)
-{
-    SheepdogReqCo *srco = opaque;
-
-    aio_co_wake(srco->co);
-}
-
-static coroutine_fn void do_co_req(void *opaque)
-{
-    int ret;
-    SheepdogReqCo *srco = opaque;
-    int sockfd = srco->sockfd;
-    SheepdogReq *hdr = srco->hdr;
-    void *data = srco->data;
-    unsigned int *wlen = srco->wlen;
-    unsigned int *rlen = srco->rlen;
-
-    srco->co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, NULL, srco);
-
-    ret = send_co_req(sockfd, hdr, data, wlen);
-    if (ret < 0) {
-        goto out;
-    }
-
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, NULL, srco);
-
-    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to get a rsp, %s", strerror(errno));
-        ret = -errno;
-        goto out;
-    }
-
-    if (*rlen > hdr->data_length) {
-        *rlen = hdr->data_length;
-    }
-
-    if (*rlen) {
-        ret = qemu_co_recv(sockfd, data, *rlen);
-        if (ret != *rlen) {
-            error_report("failed to get the data, %s", strerror(errno));
-            ret = -errno;
-            goto out;
-        }
-    }
-    ret = 0;
-out:
-    /* there is at most one request for this sockfd, so it is safe to
-     * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, NULL, NULL, NULL);
-
-    srco->co = NULL;
-    srco->ret = ret;
-    /* Set srco->finished before reading bs->wakeup.  */
-    qatomic_mb_set(&srco->finished, true);
-    if (srco->bs) {
-        bdrv_wakeup(srco->bs);
-    }
-}
-
-/*
- * Send the request to the sheep in a synchronous manner.
- *
- * Return 0 on success, -errno in case of error.
- */
-static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
-                  void *data, unsigned int *wlen, unsigned int *rlen)
-{
-    Coroutine *co;
-    SheepdogReqCo srco = {
-        .sockfd = sockfd,
-        .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(),
-        .bs = bs,
-        .hdr = hdr,
-        .data = data,
-        .wlen = wlen,
-        .rlen = rlen,
-        .ret = 0,
-        .finished = false,
-    };
-
-    if (qemu_in_coroutine()) {
-        do_co_req(&srco);
-    } else {
-        co = qemu_coroutine_create(do_co_req, &srco);
-        if (bs) {
-            bdrv_coroutine_enter(bs, co);
-            BDRV_POLL_WHILE(bs, !srco.finished);
-        } else {
-            qemu_coroutine_enter(co);
-            while (!srco.finished) {
-                aio_poll(qemu_get_aio_context(), true);
-            }
-        }
-    }
-
-    return srco.ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type);
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
-static void co_write_request(void *opaque);
-
-static coroutine_fn void reconnect_to_sdog(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-    AIOReq *aio_req, *next;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL, NULL);
-    close(s->fd);
-    s->fd = -1;
-
-    /* Wait for outstanding write requests to be completed. */
-    while (s->co_send != NULL) {
-        co_write_request(opaque);
-    }
-
-    /* Try to reconnect the sheepdog server every one second. */
-    while (s->fd < 0) {
-        Error *local_err = NULL;
-        s->fd = get_sheep_fd(s, &local_err);
-        if (s->fd < 0) {
-            trace_sheepdog_reconnect_to_sdog();
-            error_report_err(local_err);
-            qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, NANOSECONDS_PER_SECOND);
-        }
-    };
-
-    /*
-     * Now we have to resend all the request in the inflight queue.  However,
-     * resend_aioreq() can yield and newly created requests can be added to the
-     * inflight queue before the coroutine is resumed.  To avoid mixing them, we
-     * have to move all the inflight requests to the failed queue before
-     * resend_aioreq() is called.
-     */
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
-        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
-    }
-
-    /* Resend all the failed aio requests. */
-    while (!QLIST_EMPTY(&s->failed_aio_head)) {
-        aio_req = QLIST_FIRST(&s->failed_aio_head);
-        QLIST_REMOVE(aio_req, aio_siblings);
-        qemu_co_mutex_unlock(&s->queue_lock);
-        resend_aioreq(s, aio_req);
-        qemu_co_mutex_lock(&s->queue_lock);
-    }
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-/*
- * Receive responses of the I/O requests.
- *
- * This function is registered as a fd handler, and called from the
- * main loop when s->fd is ready for reading responses.
- */
-static void coroutine_fn aio_read_response(void *opaque)
-{
-    SheepdogObjRsp rsp;
-    BDRVSheepdogState *s = opaque;
-    int fd = s->fd;
-    int ret;
-    AIOReq *aio_req = NULL;
-    SheepdogAIOCB *acb;
-    uint64_t idx;
-
-    /* read a header */
-    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
-    if (ret != sizeof(rsp)) {
-        error_report("failed to get the header, %s", strerror(errno));
-        goto err;
-    }
-
-    /* find the right aio_req from the inflight aio list */
-    QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) {
-        if (aio_req->id == rsp.id) {
-            break;
-        }
-    }
-    if (!aio_req) {
-        error_report("cannot find aio_req %x", rsp.id);
-        goto err;
-    }
-
-    acb = aio_req->aiocb;
-
-    switch (acb->aiocb_type) {
-    case AIOCB_WRITE_UDATA:
-        if (!is_data_obj(aio_req->oid)) {
-            break;
-        }
-        idx = data_oid_to_idx(aio_req->oid);
-
-        if (aio_req->create) {
-            /*
-             * If the object is newly created one, we need to update
-             * the vdi object (metadata object).  min_dirty_data_idx
-             * and max_dirty_data_idx are changed to include updated
-             * index between them.
-             */
-            if (rsp.result == SD_RES_SUCCESS) {
-                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
-                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
-            }
-        }
-        break;
-    case AIOCB_READ_UDATA:
-        ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov,
-                            aio_req->iov_offset, rsp.data_length);
-        if (ret != rsp.data_length) {
-            error_report("failed to get the data, %s", strerror(errno));
-            goto err;
-        }
-        break;
-    case AIOCB_FLUSH_CACHE:
-        if (rsp.result == SD_RES_INVALID_PARMS) {
-            trace_sheepdog_aio_read_response();
-            s->cache_flags = SD_FLAG_CMD_DIRECT;
-            rsp.result = SD_RES_SUCCESS;
-        }
-        break;
-    case AIOCB_DISCARD_OBJ:
-        switch (rsp.result) {
-        case SD_RES_INVALID_PARMS:
-            error_report("server doesn't support discard command");
-            rsp.result = SD_RES_SUCCESS;
-            s->discard_supported = false;
-            break;
-        default:
-            break;
-        }
-    }
-
-    /* No more data for this aio_req (reload_inode below uses its own file
-     * descriptor handler which doesn't use co_recv).
-    */
-    s->co_recv = NULL;
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_REMOVE(aio_req, aio_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-
-    switch (rsp.result) {
-    case SD_RES_SUCCESS:
-        break;
-    case SD_RES_READONLY:
-        if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) {
-            ret = reload_inode(s, 0, "");
-            if (ret < 0) {
-                goto err;
-            }
-        }
-        if (is_data_obj(aio_req->oid)) {
-            aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
-                                           data_oid_to_idx(aio_req->oid));
-        } else {
-            aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
-        }
-        resend_aioreq(s, aio_req);
-        return;
-    default:
-        acb->ret = -EIO;
-        error_report("%s", sd_strerror(rsp.result));
-        break;
-    }
-
-    g_free(aio_req);
-
-    if (!--acb->nr_pending) {
-        /*
-         * We've finished all requests which belong to the AIOCB, so
-         * we can switch back to sd_co_readv/writev now.
-         */
-        aio_co_wake(acb->coroutine);
-    }
-
-    return;
-
-err:
-    reconnect_to_sdog(opaque);
-}
-
-static void co_read_response(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    if (!s->co_recv) {
-        s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
-    }
-
-    aio_co_enter(s->aio_context, s->co_recv);
-}
-
-static void co_write_request(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    aio_co_wake(s->co_send);
-}
-
-/*
- * Return a socket descriptor to read/write objects.
- *
- * We cannot use this descriptor for other operations because
- * the block driver may be on waiting response from the server.
- */
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    aio_set_fd_handler(s->aio_context, fd, false,
-                       co_read_response, NULL, NULL, s);
-    return fd;
-}
-
-/*
- * Parse numeric snapshot ID in @str
- * If @str can't be parsed as number, return false.
- * Else, if the number is zero or too large, set *@snapid to zero and
- * return true.
- * Else, set *@snapid to the number and return true.
- */
-static bool sd_parse_snapid(const char *str, uint32_t *snapid)
-{
-    unsigned long ul;
-    int ret;
-
-    ret = qemu_strtoul(str, NULL, 10, &ul);
-    if (ret == -ERANGE) {
-        ul = ret = 0;
-    }
-    if (ret) {
-        return false;
-    }
-    if (ul > UINT32_MAX) {
-        ul = 0;
-    }
-
-    *snapid = ul;
-    return true;
-}
-
-static bool sd_parse_snapid_or_tag(const char *str,
-                                   uint32_t *snapid, char tag[])
-{
-    if (!sd_parse_snapid(str, snapid)) {
-        *snapid = 0;
-        if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) {
-            return false;
-        }
-    } else if (!*snapid) {
-        return false;
-    } else {
-        tag[0] = 0;
-    }
-    return true;
-}
-
-typedef struct {
-    const char *path;           /* non-null iff transport is tcp */
-    const char *host;           /* valid when transport is tcp */
-    int port;                   /* valid when transport is tcp */
-    char vdi[SD_MAX_VDI_LEN];
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snap_id;
-    /* Remainder is only for sd_config_done() */
-    URI *uri;
-    QueryParams *qp;
-} SheepdogConfig;
-
-static void sd_config_done(SheepdogConfig *cfg)
-{
-    if (cfg->qp) {
-        query_params_free(cfg->qp);
-    }
-    uri_free(cfg->uri);
-}
-
-static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
-                         Error **errp)
-{
-    Error *err = NULL;
-    QueryParams *qp = NULL;
-    bool is_unix;
-    URI *uri;
-
-    memset(cfg, 0, sizeof(*cfg));
-
-    cfg->uri = uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(&err, "invalid URI '%s'", filename);
-        goto out;
-    }
-
-    /* transport */
-    if (!g_strcmp0(uri->scheme, "sheepdog")) {
-        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
-        is_unix = false;
-    } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
-        is_unix = true;
-    } else {
-        error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
-                   " or 'sheepdog+unix'");
-        goto out;
-    }
-
-    if (uri->path == NULL || !strcmp(uri->path, "/")) {
-        error_setg(&err, "missing file path in URI");
-        goto out;
-    }
-    if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN)
-        >= SD_MAX_VDI_LEN) {
-        error_setg(&err, "VDI name is too long");
-        goto out;
-    }
-
-    cfg->qp = qp = query_params_parse(uri->query);
-
-    if (is_unix) {
-        /* sheepdog+unix:///vdiname?socket=path */
-        if (uri->server || uri->port) {
-            error_setg(&err, "URI scheme %s doesn't accept a server address",
-                       uri->scheme);
-            goto out;
-        }
-        if (!qp->n) {
-            error_setg(&err,
-                       "URI scheme %s requires query parameter 'socket'",
-                       uri->scheme);
-            goto out;
-        }
-        if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) {
-            error_setg(&err, "unexpected query parameters");
-            goto out;
-        }
-        cfg->path = qp->p[0].value;
-    } else {
-        /* sheepdog[+tcp]://[host:port]/vdiname */
-        if (qp->n) {
-            error_setg(&err, "unexpected query parameters");
-            goto out;
-        }
-        cfg->host = uri->server;
-        cfg->port = uri->port;
-    }
-
-    /* snapshot tag */
-    if (uri->fragment) {
-        if (!sd_parse_snapid_or_tag(uri->fragment,
-                                    &cfg->snap_id, cfg->tag)) {
-            error_setg(&err, "'%s' is not a valid snapshot ID",
-                       uri->fragment);
-            goto out;
-        }
-    } else {
-        cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */
-    }
-
-out:
-    if (err) {
-        error_propagate(errp, err);
-        sd_config_done(cfg);
-    }
-}
-
-/*
- * Parse a filename (old syntax)
- *
- * filename must be one of the following formats:
- *   1. [vdiname]
- *   2. [vdiname]:[snapid]
- *   3. [vdiname]:[tag]
- *   4. [hostname]:[port]:[vdiname]
- *   5. [hostname]:[port]:[vdiname]:[snapid]
- *   6. [hostname]:[port]:[vdiname]:[tag]
- *
- * You can boot from the snapshot images by specifying `snapid` or
- * `tag'.
- *
- * You can run VMs outside the Sheepdog cluster by specifying
- * `hostname' and `port' (experimental).
- */
-static void parse_vdiname(SheepdogConfig *cfg, const char *filename,
-                          Error **errp)
-{
-    Error *err = NULL;
-    char *p, *q, *uri;
-    const char *host_spec, *vdi_spec;
-    int nr_sep;
-
-    strstart(filename, "sheepdog:", &filename);
-    p = q = g_strdup(filename);
-
-    /* count the number of separators */
-    nr_sep = 0;
-    while (*p) {
-        if (*p == ':') {
-            nr_sep++;
-        }
-        p++;
-    }
-    p = q;
-
-    /* use the first two tokens as host_spec. */
-    if (nr_sep >= 2) {
-        host_spec = p;
-        p = strchr(p, ':');
-        p++;
-        p = strchr(p, ':');
-        *p++ = '\0';
-    } else {
-        host_spec = "";
-    }
-
-    vdi_spec = p;
-
-    p = strchr(vdi_spec, ':');
-    if (p) {
-        *p++ = '#';
-    }
-
-    uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
-
-    /*
-     * FIXME We to escape URI meta-characters, e.g. "x?y=z"
-     * produces "sheepdog://x?y=z".  Because of that ...
-     */
-    sd_parse_uri(cfg, uri, &err);
-    if (err) {
-        /*
-         * ... this can fail, but the error message is misleading.
-         * Replace it by the traditional useless one until the
-         * escaping is fixed.
-         */
-        error_free(err);
-        error_setg(errp, "Can't parse filename");
-    }
-
-    g_free(q);
-    g_free(uri);
-}
-
-static void sd_parse_filename(const char *filename, QDict *options,
-                              Error **errp)
-{
-    Error *err = NULL;
-    SheepdogConfig cfg;
-    char buf[32];
-
-    if (strstr(filename, "://")) {
-        sd_parse_uri(&cfg, filename, &err);
-    } else {
-        parse_vdiname(&cfg, filename, &err);
-    }
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-
-    if (cfg.path) {
-        qdict_set_default_str(options, "server.path", cfg.path);
-        qdict_set_default_str(options, "server.type", "unix");
-    } else {
-        qdict_set_default_str(options, "server.type", "inet");
-        qdict_set_default_str(options, "server.host",
-                              cfg.host ?: SD_DEFAULT_ADDR);
-        snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT);
-        qdict_set_default_str(options, "server.port", buf);
-    }
-    qdict_set_default_str(options, "vdi", cfg.vdi);
-    qdict_set_default_str(options, "tag", cfg.tag);
-    if (cfg.snap_id) {
-        snprintf(buf, sizeof(buf), "%d", cfg.snap_id);
-        qdict_set_default_str(options, "snap-id", buf);
-    }
-
-    sd_config_done(&cfg);
-}
-
-static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
-                         uint32_t snapid, const char *tag, uint32_t *vid,
-                         bool lock, Error **errp)
-{
-    int ret, fd;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN] QEMU_NONSTRING;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* This pair of strncpy calls ensures that the buffer is zero-filled,
-     * which is desirable since we'll soon be sending those bytes, and
-     * don't want the send_req to read uninitialized data.
-     */
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
-    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
-
-    memset(&hdr, 0, sizeof(hdr));
-    if (lock) {
-        hdr.opcode = SD_OP_LOCK_VDI;
-        hdr.type = LOCK_TYPE_NORMAL;
-    } else {
-        hdr.opcode = SD_OP_GET_VDI_INFO;
-    }
-    wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN;
-    hdr.proto_ver = SD_PROTO_VER;
-    hdr.data_length = wlen;
-    hdr.snapid = snapid;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_setg_errno(errp, -ret, "cannot get vdi info");
-        goto out;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s",
-                   sd_strerror(rsp->result), filename, snapid, tag);
-        if (rsp->result == SD_RES_NO_VDI) {
-            ret = -ENOENT;
-        } else if (rsp->result == SD_RES_VDI_LOCKED) {
-            ret = -EBUSY;
-        } else {
-            ret = -EIO;
-        }
-        goto out;
-    }
-    *vid = rsp->vdi_id;
-
-    ret = 0;
-out:
-    closesocket(fd);
-    return ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type)
-{
-    int nr_copies = s->inode.nr_copies;
-    SheepdogObjReq hdr;
-    unsigned int wlen = 0;
-    int ret;
-    uint64_t oid = aio_req->oid;
-    unsigned int datalen = aio_req->data_len;
-    uint64_t offset = aio_req->offset;
-    uint8_t flags = aio_req->flags;
-    uint64_t old_oid = aio_req->base_oid;
-    bool create = aio_req->create;
-
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    qemu_co_mutex_unlock(&s->queue_lock);
-
-    if (!nr_copies) {
-        error_report("bug");
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    switch (aiocb_type) {
-    case AIOCB_FLUSH_CACHE:
-        hdr.opcode = SD_OP_FLUSH_VDI;
-        break;
-    case AIOCB_READ_UDATA:
-        hdr.opcode = SD_OP_READ_OBJ;
-        hdr.flags = flags;
-        break;
-    case AIOCB_WRITE_UDATA:
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-        wlen = datalen;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        break;
-    case AIOCB_DISCARD_OBJ:
-        hdr.opcode = SD_OP_WRITE_OBJ;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
-        offset = offsetof(SheepdogInode,
-                          data_vdi_id[data_oid_to_idx(oid)]);
-        oid = vid_to_vdi_oid(s->inode.vdi_id);
-        wlen = datalen = sizeof(uint32_t);
-        break;
-    }
-
-    if (s->cache_flags) {
-        hdr.flags |= s->cache_flags;
-    }
-
-    hdr.oid = oid;
-    hdr.cow_oid = old_oid;
-    hdr.copies = s->inode.nr_copies;
-
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-
-    hdr.id = aio_req->id;
-
-    qemu_co_mutex_lock(&s->lock);
-    s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, co_write_request, NULL, s);
-    socket_set_cork(s->fd, 1);
-
-    /* send a header */
-    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
-    if (ret != sizeof(hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        goto out;
-    }
-
-    if (wlen) {
-        ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
-        if (ret != wlen) {
-            error_report("failed to send a data, %s", strerror(errno));
-        }
-    }
-out:
-    socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, NULL, NULL, s);
-    s->co_send = NULL;
-    qemu_co_mutex_unlock(&s->lock);
-}
-
-static int read_write_object(int fd, BlockDriverState *bs, char *buf,
-                             uint64_t oid, uint8_t copies,
-                             unsigned int datalen, uint64_t offset,
-                             bool write, bool create, uint32_t cache_flags)
-{
-    SheepdogObjReq hdr;
-    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
-    unsigned int wlen, rlen;
-    int ret;
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    if (write) {
-        wlen = datalen;
-        rlen = 0;
-        hdr.flags = SD_FLAG_CMD_WRITE;
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-    } else {
-        wlen = 0;
-        rlen = datalen;
-        hdr.opcode = SD_OP_READ_OBJ;
-    }
-
-    hdr.flags |= cache_flags;
-
-    hdr.oid = oid;
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-    hdr.copies = copies;
-
-    ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_report("failed to send a request to the sheep");
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_SUCCESS:
-        return 0;
-    default:
-        error_report("%s", sd_strerror(rsp->result));
-        return -EIO;
-    }
-}
-
-static int read_object(int fd, BlockDriverState *bs, char *buf,
-                       uint64_t oid, uint8_t copies,
-                       unsigned int datalen, uint64_t offset,
-                       uint32_t cache_flags)
-{
-    return read_write_object(fd, bs, buf, oid, copies,
-                             datalen, offset, false,
-                             false, cache_flags);
-}
-
-static int write_object(int fd, BlockDriverState *bs, char *buf,
-                        uint64_t oid, uint8_t copies,
-                        unsigned int datalen, uint64_t offset, bool create,
-                        uint32_t cache_flags)
-{
-    return read_write_object(fd, bs, buf, oid, copies,
-                             datalen, offset, true,
-                             create, cache_flags);
-}
-
-/* update inode with the latest state */
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
-{
-    Error *local_err = NULL;
-    SheepdogInode *inode;
-    int ret = 0, fd;
-    uint32_t vid = 0;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return -EIO;
-    }
-
-    inode = g_malloc(SD_INODE_HEADER_SIZE);
-
-    ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
-                      s->cache_flags);
-    if (ret < 0) {
-        goto out;
-    }
-
-    if (inode->vdi_id != s->inode.vdi_id) {
-        memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
-    }
-
-out:
-    g_free(inode);
-    closesocket(fd);
-
-    return ret;
-}
-
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
-{
-    SheepdogAIOCB *acb = aio_req->aiocb;
-
-    aio_req->create = false;
-
-    /* check whether this request becomes a CoW one */
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
-        int idx = data_oid_to_idx(aio_req->oid);
-
-        if (is_data_obj_writable(&s->inode, idx)) {
-            goto out;
-        }
-
-        if (s->inode.data_vdi_id[idx]) {
-            aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
-            aio_req->flags |= SD_FLAG_CMD_COW;
-        }
-        aio_req->create = true;
-    }
-out:
-    if (is_data_obj(aio_req->oid)) {
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    } else {
-        struct iovec iov;
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-    }
-}
-
-static void sd_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL, NULL);
-}
-
-static void sd_attach_aio_context(BlockDriverState *bs,
-                                  AioContext *new_context)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, false,
-                       co_read_response, NULL, NULL, s);
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "sheepdog",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "vdi",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "snap-id",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "tag",
-            .type = QEMU_OPT_STRING,
-        },
-        { /* end of list */ }
-    },
-};
-
-static int sd_open(BlockDriverState *bs, QDict *options, int flags,
-                   Error **errp)
-{
-    int ret, fd;
-    uint32_t vid = 0;
-    BDRVSheepdogState *s = bs->opaque;
-    const char *vdi, *snap_id_str, *tag;
-    uint64_t snap_id;
-    char *buf = NULL;
-    QemuOpts *opts;
-
-    deprecation_warning();
-
-    s->bs = bs;
-    s->aio_context = bdrv_get_aio_context(bs);
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    if (!qemu_opts_absorb_qdict(opts, options, errp)) {
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    s->addr = sd_server_config(options, errp);
-    if (!s->addr) {
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    vdi = qemu_opt_get(opts, "vdi");
-    snap_id_str = qemu_opt_get(opts, "snap-id");
-    snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
-    tag = qemu_opt_get(opts, "tag");
-
-    if (!vdi) {
-        error_setg(errp, "parameter 'vdi' is missing");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-    if (strlen(vdi) >= SD_MAX_VDI_LEN) {
-        error_setg(errp, "value of parameter 'vdi' is too long");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    if (snap_id > UINT32_MAX) {
-        snap_id = 0;
-    }
-    if (snap_id_str && !snap_id) {
-        error_setg(errp, "'snap-id=%s' is not a valid snapshot ID",
-                   snap_id_str);
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    if (!tag) {
-        tag = "";
-    }
-    if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
-        error_setg(errp, "value of parameter 'tag' is too long");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
-    QLIST_INIT(&s->inflight_aio_head);
-    QLIST_INIT(&s->failed_aio_head);
-    QLIST_INIT(&s->inflight_aiocb_head);
-
-    s->fd = get_sheep_fd(s, errp);
-    if (s->fd < 0) {
-        ret = s->fd;
-        goto err_no_fd;
-    }
-
-    ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp);
-    if (ret) {
-        goto err;
-    }
-
-    /*
-     * QEMU block layer emulates writethrough cache as 'writeback + flush', so
-     * we always set SD_FLAG_CMD_CACHE (writeback cache) as default.
-     */
-    s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (flags & BDRV_O_NOCACHE) {
-        s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-    s->discard_supported = true;
-
-    if (snap_id || tag[0]) {
-        trace_sheepdog_open(vid);
-        s->is_snapshot = true;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        ret = fd;
-        goto err;
-    }
-
-    buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
-                      0, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg(errp, "Can't read snapshot inode");
-        goto err;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
-    bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
-    pstrcpy(s->name, sizeof(s->name), vdi);
-    qemu_co_mutex_init(&s->lock);
-    qemu_co_mutex_init(&s->queue_lock);
-    qemu_co_queue_init(&s->overlapping_queue);
-    qemu_opts_del(opts);
-    g_free(buf);
-    return 0;
-
-err:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL, NULL);
-    closesocket(s->fd);
-err_no_fd:
-    qemu_opts_del(opts);
-    g_free(buf);
-    return ret;
-}
-
-static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
-                             Error **errp)
-{
-    BDRVSheepdogState *s = state->bs->opaque;
-    BDRVSheepdogReopenState *re_s;
-    int ret = 0;
-
-    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
-
-    re_s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (state->flags & BDRV_O_NOCACHE) {
-        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-
-    re_s->fd = get_sheep_fd(s, errp);
-    if (re_s->fd < 0) {
-        ret = re_s->fd;
-        return ret;
-    }
-
-    return ret;
-}
-
-static void sd_reopen_commit(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, false,
-                           NULL, NULL, NULL, NULL);
-        closesocket(s->fd);
-    }
-
-    s->fd = re_s->fd;
-    s->cache_flags = re_s->cache_flags;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static void sd_reopen_abort(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (re_s == NULL) {
-        return;
-    }
-
-    if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, false,
-                           NULL, NULL, NULL, NULL);
-        closesocket(re_s->fd);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
-                        Error **errp)
-{
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN];
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
-     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
-     */
-    memset(buf, 0, sizeof(buf));
-    pstrcpy(buf, sizeof(buf), s->name);
-
-    memset(&hdr, 0, sizeof(hdr));
-    hdr.opcode = SD_OP_NEW_VDI;
-    hdr.base_vdi_id = s->inode.vdi_id;
-
-    wlen = SD_MAX_VDI_LEN;
-
-    hdr.flags = SD_FLAG_CMD_WRITE;
-    hdr.snapid = snapshot;
-
-    hdr.data_length = wlen;
-    hdr.vdi_size = s->inode.vdi_size;
-    hdr.copy_policy = s->inode.copy_policy;
-    hdr.copies = s->inode.nr_copies;
-    hdr.block_size_shift = s->inode.block_size_shift;
-
-    ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg_errno(errp, -ret, "create failed");
-        return ret;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name);
-        return -EIO;
-    }
-
-    if (vdi_id) {
-        *vdi_id = rsp->vdi_id;
-    }
-
-    return 0;
-}
-
-static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
-                       Error **errp)
-{
-    BlockBackend *blk = NULL;
-    BDRVSheepdogState *base = bs->opaque;
-    unsigned long buf_size;
-    uint32_t idx, max_idx;
-    uint32_t object_size;
-    void *buf = NULL;
-    int ret;
-
-    blk = blk_new_with_bs(bs,
-                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
-                          BLK_PERM_ALL, errp);
-
-    if (!blk) {
-        ret = -EPERM;
-        goto out_with_err_set;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    object_size = (UINT32_C(1) << base->inode.block_size_shift);
-    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
-    buf = g_malloc0(buf_size);
-
-    max_idx = DIV_ROUND_UP(new_size, buf_size);
-
-    for (idx = old_size / buf_size; idx < max_idx; idx++) {
-        /*
-         * The created image can be a cloned image, so we need to read
-         * a data from the source image.
-         */
-        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
-        if (ret < 0) {
-            goto out;
-        }
-        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
-        if (ret < 0) {
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Can't pre-allocate");
-    }
-out_with_err_set:
-    blk_unref(blk);
-    g_free(buf);
-
-    return ret;
-}
-
-static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size,
-                              Error **errp)
-{
-    BlockDriverState *bs;
-    Visitor *v;
-    QObject *obj = NULL;
-    QDict *qdict;
-    int ret;
-
-    v = qobject_output_visitor_new(&obj);
-    visit_type_BlockdevOptionsSheepdog(v, NULL, &location, &error_abort);
-    visit_free(v);
-
-    qdict = qobject_to(QDict, obj);
-    qdict_flatten(qdict);
-
-    qdict_put_str(qdict, "driver", "sheepdog");
-
-    bs = bdrv_open(NULL, NULL, qdict, BDRV_O_PROTOCOL | BDRV_O_RDWR, errp);
-    if (bs == NULL) {
-        ret = -EIO;
-        goto fail;
-    }
-
-    ret = sd_prealloc(bs, 0, size, errp);
-fail:
-    bdrv_unref(bs);
-    qobject_unref(qdict);
-    return ret;
-}
-
-static int parse_redundancy(BDRVSheepdogState *s, SheepdogRedundancy *opt)
-{
-    struct SheepdogInode *inode = &s->inode;
-
-    switch (opt->type) {
-    case SHEEPDOG_REDUNDANCY_TYPE_FULL:
-        if (opt->u.full.copies > SD_MAX_COPIES || opt->u.full.copies < 1) {
-            return -EINVAL;
-        }
-        inode->copy_policy = 0;
-        inode->nr_copies = opt->u.full.copies;
-        return 0;
-
-    case SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED:
-    {
-        int64_t copy = opt->u.erasure_coded.data_strips;
-        int64_t parity = opt->u.erasure_coded.parity_strips;
-
-        if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
-            return -EINVAL;
-        }
-
-        if (parity >= SD_EC_MAX_STRIP || parity < 1) {
-            return -EINVAL;
-        }
-
-        /*
-         * 4 bits for parity and 4 bits for data.
-         * We have to compress upper data bits because it can't represent 16
-         */
-        inode->copy_policy = ((copy / 2) << 4) + parity;
-        inode->nr_copies = copy + parity;
-        return 0;
-    }
-
-    default:
-        g_assert_not_reached();
-    }
-
-    return -EINVAL;
-}
-
-/*
- * Sheepdog support two kinds of redundancy, full replication and erasure
- * coding.
- *
- * # create a fully replicated vdi with x copies
- * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
- *
- * # create a erasure coded vdi with x data strips and y parity strips
- * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
- */
-static SheepdogRedundancy *parse_redundancy_str(const char *opt)
-{
-    SheepdogRedundancy *redundancy;
-    const char *n1, *n2;
-    long copy, parity;
-    char p[10];
-    int ret;
-
-    pstrcpy(p, sizeof(p), opt);
-    n1 = strtok(p, ":");
-    n2 = strtok(NULL, ":");
-
-    if (!n1) {
-        return NULL;
-    }
-
-    ret = qemu_strtol(n1, NULL, 10, &copy);
-    if (ret < 0) {
-        return NULL;
-    }
-
-    redundancy = g_new0(SheepdogRedundancy, 1);
-    if (!n2) {
-        *redundancy = (SheepdogRedundancy) {
-            .type               = SHEEPDOG_REDUNDANCY_TYPE_FULL,
-            .u.full.copies      = copy,
-        };
-    } else {
-        ret = qemu_strtol(n2, NULL, 10, &parity);
-        if (ret < 0) {
-            g_free(redundancy);
-            return NULL;
-        }
-
-        *redundancy = (SheepdogRedundancy) {
-            .type               = SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED,
-            .u.erasure_coded    = {
-                .data_strips    = copy,
-                .parity_strips  = parity,
-            },
-        };
-    }
-
-    return redundancy;
-}
-
-static int parse_block_size_shift(BDRVSheepdogState *s,
-                                  BlockdevCreateOptionsSheepdog *opts)
-{
-    struct SheepdogInode *inode = &s->inode;
-    uint64_t object_size;
-    int obj_order;
-
-    if (opts->has_object_size) {
-        object_size = opts->object_size;
-
-        if ((object_size - 1) & object_size) {    /* not a power of 2? */
-            return -EINVAL;
-        }
-        obj_order = ctz32(object_size);
-        if (obj_order < 20 || obj_order > 31) {
-            return -EINVAL;
-        }
-        inode->block_size_shift = (uint8_t)obj_order;
-    }
-
-    return 0;
-}
-
-static int sd_co_create(BlockdevCreateOptions *options, Error **errp)
-{
-    BlockdevCreateOptionsSheepdog *opts = &options->u.sheepdog;
-    int ret = 0;
-    uint32_t vid = 0;
-    char *backing_file = NULL;
-    char *buf = NULL;
-    BDRVSheepdogState *s;
-    uint64_t max_vdi_size;
-    bool prealloc = false;
-
-    assert(options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
-
-    deprecation_warning();
-
-    s = g_new0(BDRVSheepdogState, 1);
-
-    /* Steal SocketAddress from QAPI, set NULL to prevent double free */
-    s->addr = opts->location->server;
-    opts->location->server = NULL;
-
-    if (strlen(opts->location->vdi) >= sizeof(s->name)) {
-        error_setg(errp, "'vdi' string too long");
-        ret = -EINVAL;
-        goto out;
-    }
-    pstrcpy(s->name, sizeof(s->name), opts->location->vdi);
-
-    s->inode.vdi_size = opts->size;
-    backing_file = opts->backing_file;
-
-    if (!opts->has_preallocation) {
-        opts->preallocation = PREALLOC_MODE_OFF;
-    }
-    switch (opts->preallocation) {
-    case PREALLOC_MODE_OFF:
-        prealloc = false;
-        break;
-    case PREALLOC_MODE_FULL:
-        prealloc = true;
-        break;
-    default:
-        error_setg(errp, "Preallocation mode not supported for Sheepdog");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (opts->has_redundancy) {
-        ret = parse_redundancy(s, opts->redundancy);
-        if (ret < 0) {
-            error_setg(errp, "Invalid redundancy mode");
-            goto out;
-        }
-    }
-    ret = parse_block_size_shift(s, opts);
-    if (ret < 0) {
-        error_setg(errp, "Invalid object_size."
-                         " obect_size needs to be power of 2"
-                         " and be limited from 2^20 to 2^31");
-        goto out;
-    }
-
-    if (opts->has_backing_file) {
-        BlockBackend *blk;
-        BDRVSheepdogState *base;
-        BlockDriver *drv;
-
-        /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(opts->backing_file, true, NULL);
-        if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
-            error_setg(errp, "backing_file must be a sheepdog image");
-            ret = -EINVAL;
-            goto out;
-        }
-
-        blk = blk_new_open(opts->backing_file, NULL, NULL,
-                           BDRV_O_PROTOCOL, errp);
-        if (blk == NULL) {
-            ret = -EIO;
-            goto out;
-        }
-
-        base = blk_bs(blk)->opaque;
-
-        if (!is_snapshot(&base->inode)) {
-            error_setg(errp, "cannot clone from a non snapshot vdi");
-            blk_unref(blk);
-            ret = -EINVAL;
-            goto out;
-        }
-        s->inode.vdi_id = base->inode.vdi_id;
-        blk_unref(blk);
-    }
-
-    s->aio_context = qemu_get_aio_context();
-
-    /* if block_size_shift is not specified, get cluster default value */
-    if (s->inode.block_size_shift == 0) {
-        SheepdogVdiReq hdr;
-        SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
-        int fd;
-        unsigned int wlen = 0, rlen = 0;
-
-        fd = connect_to_sdog(s, errp);
-        if (fd < 0) {
-            ret = fd;
-            goto out;
-        }
-
-        memset(&hdr, 0, sizeof(hdr));
-        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
-        hdr.proto_ver = SD_PROTO_VER;
-
-        ret = do_req(fd, NULL, (SheepdogReq *)&hdr,
-                     NULL, &wlen, &rlen);
-        closesocket(fd);
-        if (ret) {
-            error_setg_errno(errp, -ret, "failed to get cluster default");
-            goto out;
-        }
-        if (rsp->result == SD_RES_SUCCESS) {
-            s->inode.block_size_shift = rsp->block_size_shift;
-        } else {
-            s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
-        }
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-
-    if (s->inode.vdi_size > max_vdi_size) {
-        error_setg(errp, "An image is too large."
-                         " The maximum image size is %"PRIu64 "GB",
-                         max_vdi_size / 1024 / 1024 / 1024);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = do_sd_create(s, &vid, 0, errp);
-    if (ret) {
-        goto out;
-    }
-
-    if (prealloc) {
-        ret = sd_create_prealloc(opts->location, opts->size, errp);
-    }
-out:
-    g_free(backing_file);
-    g_free(buf);
-    g_free(s->addr);
-    g_free(s);
-    return ret;
-}
-
-static int coroutine_fn sd_co_create_opts(BlockDriver *drv,
-                                          const char *filename,
-                                          QemuOpts *opts,
-                                          Error **errp)
-{
-    BlockdevCreateOptions *create_options = NULL;
-    QDict *qdict = NULL, *location_qdict;
-    Visitor *v;
-    char *redundancy = NULL;
-    Error *local_err = NULL;
-    int ret;
-    char *backing_fmt = NULL;
-
-    redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
-    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
-
-    if (backing_fmt && strcmp(backing_fmt, "sheepdog") != 0) {
-        error_setg(errp, "backing_file must be a sheepdog image");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    qdict = qemu_opts_to_qdict(opts, NULL);
-    qdict_put_str(qdict, "driver", "sheepdog");
-
-    location_qdict = qdict_new();
-    qdict_put(qdict, "location", location_qdict);
-
-    sd_parse_filename(filename, location_qdict, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    qdict_flatten(qdict);
-
-    /* Change legacy command line options into QMP ones */
-    static const QDictRenames opt_renames[] = {
-        { BLOCK_OPT_BACKING_FILE,       "backing-file" },
-        { BLOCK_OPT_OBJECT_SIZE,        "object-size" },
-        { NULL, NULL },
-    };
-
-    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* Get the QAPI object */
-    v = qobject_input_visitor_new_flat_confused(qdict, errp);
-    if (!v) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
-    visit_free(v);
-    if (!create_options) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    assert(create_options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
-    create_options->u.sheepdog.size =
-        ROUND_UP(create_options->u.sheepdog.size, BDRV_SECTOR_SIZE);
-
-    if (redundancy) {
-        create_options->u.sheepdog.has_redundancy = true;
-        create_options->u.sheepdog.redundancy =
-            parse_redundancy_str(redundancy);
-        if (create_options->u.sheepdog.redundancy == NULL) {
-            error_setg(errp, "Invalid redundancy mode");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
-    ret = sd_co_create(create_options, errp);
-fail:
-    qapi_free_BlockdevCreateOptions(create_options);
-    qobject_unref(qdict);
-    g_free(redundancy);
-    g_free(backing_fmt);
-    return ret;
-}
-
-static void sd_close(BlockDriverState *bs)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    int fd, ret;
-
-    trace_sheepdog_close(s->name);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return;
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.type = LOCK_TYPE_NORMAL;
-    hdr.base_vdi_id = s->inode.vdi_id;
-    wlen = strlen(s->name) + 1;
-    hdr.data_length = wlen;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (!ret && rsp->result != SD_RES_SUCCESS &&
-        rsp->result != SD_RES_VDI_NOT_LOCKED) {
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-    }
-
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL, NULL);
-    closesocket(s->fd);
-    qapi_free_SocketAddress(s->addr);
-}
-
-static int64_t sd_getlength(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    return s->inode.vdi_size;
-}
-
-static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
-                                       bool exact, PreallocMode prealloc,
-                                       BdrvRequestFlags flags, Error **errp)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    unsigned int datalen;
-    uint64_t max_vdi_size;
-    int64_t old_size = s->inode.vdi_size;
-
-    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-    if (offset < old_size) {
-        error_setg(errp, "shrinking is not supported");
-        return -EINVAL;
-    } else if (offset > max_vdi_size) {
-        error_setg(errp, "too big image size");
-        return -EINVAL;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* we don't need to update entire object */
-    datalen = SD_INODE_HEADER_SIZE;
-    s->inode.vdi_size = offset;
-    ret = write_object(fd, s->bs, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    close(fd);
-
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "failed to update an inode");
-        return ret;
-    }
-
-    if (prealloc == PREALLOC_MODE_FULL) {
-        ret = sd_prealloc(bs, old_size, offset, errp);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-/*
- * This function is called after writing data objects.  If we need to
- * update metadata, this sends a write request to the vdi object.
- */
-static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s = acb->s;
-    struct iovec iov;
-    AIOReq *aio_req;
-    uint32_t offset, data_len, mn, mx;
-
-    mn = acb->min_dirty_data_idx;
-    mx = acb->max_dirty_data_idx;
-    if (mn <= mx) {
-        /* we need to update the vdi object. */
-        ++acb->nr_pending;
-        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
-            mn * sizeof(s->inode.data_vdi_id[0]);
-        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
-
-        acb->min_dirty_data_idx = UINT32_MAX;
-        acb->max_dirty_data_idx = 0;
-
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                                data_len, offset, 0, false, 0, offset);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-        if (--acb->nr_pending) {
-            qemu_coroutine_yield();
-        }
-    }
-}
-
-/* Delete current working VDI on the snapshot chain */
-static bool sd_delete(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .base_vdi_id = s->inode.vdi_id,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        return false;
-    }
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_report("%s was already deleted", s->name);
-        /* fall through */
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-        return false;
-    }
-
-    return true;
-}
-
-/*
- * Create a writable VDI from a snapshot
- */
-static int sd_create_branch(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    int ret, fd;
-    uint32_t vid;
-    char *buf;
-    bool deleted;
-
-    trace_sheepdog_create_branch_snapshot(s->inode.vdi_id);
-
-    buf = g_malloc(SD_INODE_SIZE);
-
-    /*
-     * Even If deletion fails, we will just create extra snapshot based on
-     * the working VDI which was supposed to be deleted. So no need to
-     * false bail out.
-     */
-    deleted = sd_delete(s);
-    ret = do_sd_create(s, &vid, !deleted, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    trace_sheepdog_create_branch_created(vid);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret < 0) {
-        goto out;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    s->is_snapshot = false;
-    ret = 0;
-    trace_sheepdog_create_branch_new(s->inode.vdi_id);
-
-out:
-    g_free(buf);
-
-    return ret;
-}
-
-/*
- * Send I/O requests to the server.
- *
- * This function sends requests to the server, links the requests to
- * the inflight_list in BDRVSheepdogState, and exits without
- * waiting the response.  The responses are received in the
- * `aio_read_response' function which is called from the main loop as
- * a fd handler.
- *
- * Returns 1 when we need to wait a response, 0 when there is no sent
- * request and -errno in error cases.
- */
-static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
-{
-    int ret = 0;
-    unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
-    unsigned long idx;
-    uint32_t object_size;
-    uint64_t oid;
-    uint64_t offset;
-    BDRVSheepdogState *s = acb->s;
-    SheepdogInode *inode = &s->inode;
-    AIOReq *aio_req;
-
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) {
-        /*
-         * In the case we open the snapshot VDI, Sheepdog creates the
-         * writable VDI when we do a write operation first.
-         */
-        ret = sd_create_branch(s);
-        if (ret) {
-            acb->ret = -EIO;
-            return;
-        }
-    }
-
-    object_size = (UINT32_C(1) << inode->block_size_shift);
-    idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size;
-
-    /*
-     * Make sure we don't free the aiocb before we are done with all requests.
-     * This additional reference is dropped at the end of this function.
-     */
-    acb->nr_pending++;
-
-    while (done != total) {
-        uint8_t flags = 0;
-        uint64_t old_oid = 0;
-        bool create = false;
-
-        oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
-
-        len = MIN(total - done, object_size - offset);
-
-        switch (acb->aiocb_type) {
-        case AIOCB_READ_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                qemu_iovec_memset(acb->qiov, done, 0, len);
-                goto done;
-            }
-            break;
-        case AIOCB_WRITE_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                create = true;
-            } else if (!is_data_obj_writable(inode, idx)) {
-                /* Copy-On-Write */
-                create = true;
-                old_oid = oid;
-                flags = SD_FLAG_CMD_COW;
-            }
-            break;
-        case AIOCB_DISCARD_OBJ:
-            /*
-             * We discard the object only when the whole object is
-             * 1) allocated 2) trimmed. Otherwise, simply skip it.
-             */
-            if (len != object_size || inode->data_vdi_id[idx] == 0) {
-                goto done;
-            }
-            break;
-        default:
-            break;
-        }
-
-        if (create) {
-            trace_sheepdog_co_rw_vector_update(inode->vdi_id, oid,
-                                  vid_to_data_oid(inode->data_vdi_id[idx], idx),
-                                  idx);
-            oid = vid_to_data_oid(inode->vdi_id, idx);
-            trace_sheepdog_co_rw_vector_new(oid);
-        }
-
-        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
-                                old_oid,
-                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
-                                0 : done);
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    done:
-        offset = 0;
-        idx++;
-        done += len;
-    }
-    if (--acb->nr_pending) {
-        qemu_coroutine_yield();
-    }
-}
-
-static void sd_aio_complete(SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s;
-    if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
-        return;
-    }
-
-    s = acb->s;
-    qemu_co_mutex_lock(&s->queue_lock);
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-    qemu_co_mutex_unlock(&s->queue_lock);
-}
-
-static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov,
-                                     int flags)
-{
-    SheepdogAIOCB acb;
-    int ret;
-    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
-    BDRVSheepdogState *s = bs->opaque;
-
-    assert(!flags);
-    if (offset > s->inode.vdi_size) {
-        ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
-    sd_co_rw_vector(&acb);
-    sd_write_done(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                       int nb_sectors, QEMUIOVector *qiov)
-{
-    SheepdogAIOCB acb;
-    BDRVSheepdogState *s = bs->opaque;
-
-    sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
-    sd_co_rw_vector(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogAIOCB acb;
-    AIOReq *aio_req;
-
-    if (s->cache_flags != SD_FLAG_CMD_CACHE) {
-        return 0;
-    }
-
-    sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
-
-    acb.nr_pending++;
-    aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
-                            0, 0, 0, false, 0, 0);
-    add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
-
-    if (--acb.nr_pending) {
-        qemu_coroutine_yield();
-    }
-
-    sd_aio_complete(&acb);
-    return acb.ret;
-}
-
-static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    uint32_t new_vid;
-    SheepdogInode *inode;
-    unsigned int datalen;
-
-    trace_sheepdog_snapshot_create_info(sn_info->name, sn_info->id_str, s->name,
-                                        sn_info->vm_state_size, s->is_snapshot);
-
-    if (s->is_snapshot) {
-        error_report("You can't create a snapshot of a snapshot VDI, "
-                     "%s (%" PRIu32 ").", s->name, s->inode.vdi_id);
-
-        return -EINVAL;
-    }
-
-    trace_sheepdog_snapshot_create(sn_info->name, sn_info->id_str);
-
-    s->inode.vm_state_size = sn_info->vm_state_size;
-    s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
-    /* It appears that inode.tag does not require a NUL terminator,
-     * which means this use of strncpy is ok.
-     */
-    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
-    /* we don't need to update entire object */
-    datalen = SD_INODE_HEADER_SIZE;
-    inode = g_malloc(datalen);
-
-    /* refresh inode. */
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto cleanup;
-    }
-
-    ret = write_object(fd, s->bs, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    if (ret < 0) {
-        error_report("failed to write snapshot's inode.");
-        goto cleanup;
-    }
-
-    ret = do_sd_create(s, &new_vid, 1, &local_err);
-    if (ret < 0) {
-        error_reportf_err(local_err,
-                          "failed to create inode for snapshot: ");
-        goto cleanup;
-    }
-
-    ret = read_object(fd, s->bs, (char *)inode,
-                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
-                      s->cache_flags);
-
-    if (ret < 0) {
-        error_report("failed to read new inode info. %s", strerror(errno));
-        goto cleanup;
-    }
-
-    memcpy(&s->inode, inode, datalen);
-    trace_sheepdog_snapshot_create_inode(s->inode.name, s->inode.snap_id,
-                                         s->inode.vdi_id);
-
-cleanup:
-    g_free(inode);
-    closesocket(fd);
-    return ret;
-}
-
-/*
- * We implement rollback(loadvm) operation to the specified snapshot by
- * 1) switch to the snapshot
- * 2) rely on sd_create_branch to delete working VDI and
- * 3) create a new working VDI based on the specified snapshot
- */
-static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    BDRVSheepdogState *old_s;
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snapid = 0;
-    int ret;
-
-    if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) {
-        return -EINVAL;
-    }
-
-    old_s = g_new(BDRVSheepdogState, 1);
-
-    memcpy(old_s, s, sizeof(BDRVSheepdogState));
-
-    ret = reload_inode(s, snapid, tag);
-    if (ret) {
-        goto out;
-    }
-
-    ret = sd_create_branch(s);
-    if (ret) {
-        goto out;
-    }
-
-    g_free(old_s);
-
-    return 0;
-out:
-    /* recover bdrv_sd_state */
-    memcpy(s, old_s, sizeof(BDRVSheepdogState));
-    g_free(old_s);
-
-    error_report("failed to open. recover old bdrv_sd_state.");
-
-    return ret;
-}
-
-#define NR_BATCHED_DISCARD 128
-
-static int remove_objects(BDRVSheepdogState *s, Error **errp)
-{
-    int fd, i = 0, nr_objs = 0;
-    int ret;
-    SheepdogInode *inode = &s->inode;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    nr_objs = count_data_objs(inode);
-    while (i < nr_objs) {
-        int start_idx, nr_filled_idx;
-
-        while (i < nr_objs && !inode->data_vdi_id[i]) {
-            i++;
-        }
-        start_idx = i;
-
-        nr_filled_idx = 0;
-        while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
-            if (inode->data_vdi_id[i]) {
-                inode->data_vdi_id[i] = 0;
-                nr_filled_idx++;
-            }
-
-            i++;
-        }
-
-        ret = write_object(fd, s->bs,
-                           (char *)&inode->data_vdi_id[start_idx],
-                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
-                           (i - start_idx) * sizeof(uint32_t),
-                           offsetof(struct SheepdogInode,
-                                    data_vdi_id[start_idx]),
-                           false, s->cache_flags);
-        if (ret < 0) {
-            error_setg(errp, "Failed to discard snapshot inode");
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    closesocket(fd);
-    return ret;
-}
-
-static int sd_snapshot_delete(BlockDriverState *bs,
-                              const char *snapshot_id,
-                              const char *name,
-                              Error **errp)
-{
-    /*
-     * FIXME should delete the snapshot matching both @snapshot_id and
-     * @name, but @name not used here
-     */
-    unsigned long snap_id = 0;
-    char snap_tag[SD_MAX_VDI_TAG_LEN];
-    int fd, ret;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
-    BDRVSheepdogState *s = bs->opaque;
-    unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
-    uint32_t vid;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-
-    ret = remove_objects(s, errp);
-    if (ret) {
-        return ret;
-    }
-
-    memset(buf, 0, sizeof(buf));
-    memset(snap_tag, 0, sizeof(snap_tag));
-    pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
-    /* TODO Use sd_parse_snapid() once this mess is cleaned up */
-    ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
-    if (ret || snap_id > UINT32_MAX) {
-        /*
-         * FIXME Since qemu_strtoul() returns -EINVAL when
-         * @snapshot_id is null, @snapshot_id is mandatory.  Correct
-         * would be to require at least one of @snapshot_id and @name.
-         */
-        error_setg(errp, "Invalid snapshot ID: %s",
-                         snapshot_id ? snapshot_id : "<null>");
-        return -EINVAL;
-    }
-
-    if (snap_id) {
-        hdr.snapid = (uint32_t) snap_id;
-    } else {
-        /* FIXME I suspect we should use @name here */
-        /* FIXME don't truncate silently */
-        pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
-        pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
-    }
-
-    ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp);
-    if (ret) {
-        return ret;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
-                 buf, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Couldn't send request to server");
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_setg(errp, "Can't find the snapshot");
-        return -ENOENT;
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_setg(errp, "%s", sd_strerror(rsp->result));
-        return -EIO;
-    }
-
-    return 0;
-}
-
-static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogReq req;
-    int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
-    QEMUSnapshotInfo *sn_tab = NULL;
-    unsigned wlen, rlen;
-    int found = 0;
-    SheepdogInode *inode;
-    unsigned long *vdi_inuse;
-    unsigned int start_nr;
-    uint64_t hval;
-    uint32_t vid;
-
-    vdi_inuse = g_malloc(max);
-    inode = g_malloc(SD_INODE_HEADER_SIZE);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    rlen = max;
-    wlen = 0;
-
-    memset(&req, 0, sizeof(req));
-
-    req.opcode = SD_OP_READ_VDIS;
-    req.data_length = max;
-
-    ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen);
-
-    closesocket(fd);
-    if (ret) {
-        goto out;
-    }
-
-    sn_tab = g_new0(QEMUSnapshotInfo, nr);
-
-    /* calculate a vdi id with hash function */
-    hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
-    start_nr = hval & (SD_NR_VDIS - 1);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) {
-        if (!test_bit(vid, vdi_inuse)) {
-            break;
-        }
-
-        /* we don't need to read entire object */
-        ret = read_object(fd, s->bs, (char *)inode,
-                          vid_to_vdi_oid(vid),
-                          0, SD_INODE_HEADER_SIZE, 0,
-                          s->cache_flags);
-
-        if (ret) {
-            continue;
-        }
-
-        if (!strcmp(inode->name, s->name) && is_snapshot(inode)) {
-            sn_tab[found].date_sec = inode->snap_ctime >> 32;
-            sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff;
-            sn_tab[found].vm_state_size = inode->vm_state_size;
-            sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec;
-
-            snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
-                     "%" PRIu32, inode->snap_id);
-            pstrcpy(sn_tab[found].name,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)),
-                    inode->tag);
-            found++;
-        }
-    }
-
-    closesocket(fd);
-out:
-    *psn_tab = sn_tab;
-
-    g_free(vdi_inuse);
-    g_free(inode);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return found;
-}
-
-static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
-                                int64_t pos, int size, int load)
-{
-    Error *local_err = NULL;
-    bool create;
-    int fd, ret = 0, remaining = size;
-    unsigned int data_len;
-    uint64_t vmstate_oid;
-    uint64_t offset;
-    uint32_t vdi_index;
-    uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
-    uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return fd;
-    }
-
-    while (remaining) {
-        vdi_index = pos / object_size;
-        offset = pos % object_size;
-
-        data_len = MIN(remaining, object_size - offset);
-
-        vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
-
-        create = (offset == 0);
-        if (load) {
-            ret = read_object(fd, s->bs, (char *)data, vmstate_oid,
-                              s->inode.nr_copies, data_len, offset,
-                              s->cache_flags);
-        } else {
-            ret = write_object(fd, s->bs, (char *)data, vmstate_oid,
-                               s->inode.nr_copies, data_len, offset, create,
-                               s->cache_flags);
-        }
-
-        if (ret < 0) {
-            error_report("failed to save vmstate %s", strerror(errno));
-            goto cleanup;
-        }
-
-        pos += data_len;
-        data += data_len;
-        remaining -= data_len;
-    }
-    ret = size;
-cleanup:
-    closesocket(fd);
-    return ret;
-}
-
-static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                           int64_t pos)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    void *buf;
-    int ret;
-
-    buf = qemu_blockalign(bs, qiov->size);
-    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
-    ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0);
-    qemu_vfree(buf);
-
-    return ret;
-}
-
-static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                           int64_t pos)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    void *buf;
-    int ret;
-
-    buf = qemu_blockalign(bs, qiov->size);
-    ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
-    qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
-    qemu_vfree(buf);
-
-    return ret;
-}
-
-
-static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                      int bytes)
-{
-    SheepdogAIOCB acb;
-    BDRVSheepdogState *s = bs->opaque;
-    QEMUIOVector discard_iov;
-    struct iovec iov;
-    uint32_t zero = 0;
-
-    if (!s->discard_supported) {
-        return 0;
-    }
-
-    memset(&discard_iov, 0, sizeof(discard_iov));
-    memset(&iov, 0, sizeof(iov));
-    iov.iov_base = &zero;
-    iov.iov_len = sizeof(zero);
-    discard_iov.iov = &iov;
-    discard_iov.niov = 1;
-    if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) {
-        return -ENOTSUP;
-    }
-    sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
-                 bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
-    sd_co_rw_vector(&acb);
-    sd_aio_complete(&acb);
-
-    return acb.ret;
-}
-
-static coroutine_fn int
-sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
-                   int64_t bytes, int64_t *pnum, int64_t *map,
-                   BlockDriverState **file)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long start = offset / object_size,
-                  end = DIV_ROUND_UP(offset + bytes, object_size);
-    unsigned long idx;
-    *map = offset;
-    int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-
-    for (idx = start; idx < end; idx++) {
-        if (inode->data_vdi_id[idx] == 0) {
-            break;
-        }
-    }
-    if (idx == start) {
-        /* Get the longest length of unallocated sectors */
-        ret = 0;
-        for (idx = start + 1; idx < end; idx++) {
-            if (inode->data_vdi_id[idx] != 0) {
-                break;
-            }
-        }
-    }
-
-    *pnum = (idx - start) * object_size;
-    if (*pnum > bytes) {
-        *pnum = bytes;
-    }
-    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
-        *file = bs;
-    }
-    return ret;
-}
-
-static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size);
-    uint64_t size = 0;
-
-    for (i = 0; i < last; i++) {
-        if (inode->data_vdi_id[i] == 0) {
-            continue;
-        }
-        size += object_size;
-    }
-    return size;
-}
-
-static QemuOptsList sd_create_opts = {
-    .name = "sheepdog-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FMT,
-            .type = QEMU_OPT_STRING,
-            .help = "Must be 'sheepdog' if present",
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        {
-            .name = BLOCK_OPT_REDUNDANCY,
-            .type = QEMU_OPT_STRING,
-            .help = "Redundancy of the image"
-        },
-        {
-            .name = BLOCK_OPT_OBJECT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Object size of the image"
-        },
-        { /* end of list */ }
-    }
-};
-
-static const char *const sd_strong_runtime_opts[] = {
-    "vdi",
-    "snap-id",
-    "tag",
-    "server.",
-
-    NULL
-};
-
-static BlockDriver bdrv_sheepdog = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static BlockDriver bdrv_sheepdog_tcp = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog+tcp",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static BlockDriver bdrv_sheepdog_unix = {
-    .format_name                  = "sheepdog",
-    .protocol_name                = "sheepdog+unix",
-    .instance_size                = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename          = sd_parse_filename,
-    .bdrv_file_open               = sd_open,
-    .bdrv_reopen_prepare          = sd_reopen_prepare,
-    .bdrv_reopen_commit           = sd_reopen_commit,
-    .bdrv_reopen_abort            = sd_reopen_abort,
-    .bdrv_close                   = sd_close,
-    .bdrv_co_create               = sd_co_create,
-    .bdrv_co_create_opts          = sd_co_create_opts,
-    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
-    .bdrv_getlength               = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_co_truncate             = sd_co_truncate,
-
-    .bdrv_co_readv                = sd_co_readv,
-    .bdrv_co_writev               = sd_co_writev,
-    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard             = sd_co_pdiscard,
-    .bdrv_co_block_status         = sd_co_block_status,
-
-    .bdrv_snapshot_create         = sd_snapshot_create,
-    .bdrv_snapshot_goto           = sd_snapshot_goto,
-    .bdrv_snapshot_delete         = sd_snapshot_delete,
-    .bdrv_snapshot_list           = sd_snapshot_list,
-
-    .bdrv_save_vmstate            = sd_save_vmstate,
-    .bdrv_load_vmstate            = sd_load_vmstate,
-
-    .bdrv_detach_aio_context      = sd_detach_aio_context,
-    .bdrv_attach_aio_context      = sd_attach_aio_context,
-
-    .create_opts                  = &sd_create_opts,
-    .strong_runtime_opts          = sd_strong_runtime_opts,
-};
-
-static void bdrv_sheepdog_init(void)
-{
-    bdrv_register(&bdrv_sheepdog);
-    bdrv_register(&bdrv_sheepdog_tcp);
-    bdrv_register(&bdrv_sheepdog_unix);
-}
-block_init(bdrv_sheepdog_init);
diff --git a/block/snapshot.c b/block/snapshot.c
index e8ae9a28c11..ccacda8bd59 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -275,13 +275,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
         qobject_unref(file_options);
         g_free(subqdict_prefix);
 
+        /* Force .bdrv_open() below to re-attach fallback_bs on *fallback_ptr */
         qdict_put_str(options, (*fallback_ptr)->name,
                       bdrv_get_node_name(fallback_bs));
 
+        /* Now close bs, apply the snapshot on fallback_bs, and re-open bs */
         if (drv->bdrv_close) {
             drv->bdrv_close(bs);
         }
 
+        /* .bdrv_open() will re-attach it */
         bdrv_unref_child(bs, *fallback_ptr);
         *fallback_ptr = NULL;
 
@@ -296,7 +299,16 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
             return ret < 0 ? ret : open_ret;
         }
 
-        assert(fallback_bs == (*fallback_ptr)->bs);
+        /*
+         * fallback_ptr is &bs->file or &bs->backing.  *fallback_ptr
+         * was closed above and set to NULL, but the .bdrv_open() call
+         * has opened it again, because we set the respective option
+         * (with the qdict_put_str() call above).
+         * Assert that .bdrv_open() has attached some child on
+         * *fallback_ptr, and that it has attached the one we wanted
+         * it to (i.e., fallback_bs).
+         */
+        assert(*fallback_ptr && fallback_bs == (*fallback_ptr)->bs);
         bdrv_unref(fallback_bs);
         return ret;
     }
@@ -415,7 +427,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
         error_setg(errp, "snapshot_id and name are both NULL");
         return -EINVAL;
     }
-    if (!bs->read_only) {
+    if (!bdrv_is_read_only(bs)) {
         error_setg(errp, "Device is not readonly");
         return -EINVAL;
     }
diff --git a/block/ssh.c b/block/ssh.c
index ebe3d8b631f..e0fbb4934ba 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -237,9 +237,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
     return 0;
 
  err:
-    if (uri) {
-      uri_free(uri);
-    }
+    uri_free(uri);
     return -EINVAL;
 }
 
@@ -277,7 +275,6 @@ static void ssh_parse_filename(const char *filename, QDict *options,
 static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
 {
     int ret;
-#ifdef HAVE_LIBSSH_0_8
     enum ssh_known_hosts_e state;
     int r;
     ssh_key pubkey;
@@ -343,46 +340,6 @@ static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
         error_setg(errp, "error while checking for known server (%d)", state);
         goto out;
     }
-#else /* !HAVE_LIBSSH_0_8 */
-    int state;
-
-    state = ssh_is_server_known(s->session);
-    trace_ssh_server_status(state);
-
-    switch (state) {
-    case SSH_SERVER_KNOWN_OK:
-        /* OK */
-        trace_ssh_check_host_key_knownhosts();
-        break;
-    case SSH_SERVER_KNOWN_CHANGED:
-        ret = -EINVAL;
-        error_setg(errp,
-                   "host key does not match the one in known_hosts; this "
-                   "may be a possible attack");
-        goto out;
-    case SSH_SERVER_FOUND_OTHER:
-        ret = -EINVAL;
-        error_setg(errp,
-                   "host key for this server not found, another type exists");
-        goto out;
-    case SSH_SERVER_FILE_NOT_FOUND:
-        ret = -ENOENT;
-        error_setg(errp, "known_hosts file not found");
-        goto out;
-    case SSH_SERVER_NOT_KNOWN:
-        ret = -EINVAL;
-        error_setg(errp, "no host key was found in known_hosts");
-        goto out;
-    case SSH_SERVER_ERROR:
-        ret = -EINVAL;
-        error_setg(errp, "server error");
-        goto out;
-    default:
-        ret = -EINVAL;
-        error_setg(errp, "error while checking for known server (%d)", state);
-        goto out;
-    }
-#endif /* !HAVE_LIBSSH_0_8 */
 
     /* known_hosts checking successful. */
     ret = 0;
@@ -438,11 +395,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
     unsigned char *server_hash;
     size_t server_hash_len;
 
-#ifdef HAVE_LIBSSH_0_8
     r = ssh_get_server_publickey(s->session, &pubkey);
-#else
-    r = ssh_get_publickey(s->session, &pubkey);
-#endif
     if (r != SSH_OK) {
         session_error_setg(errp, s, "failed to read remote host key");
         return -EINVAL;
@@ -487,6 +440,9 @@ static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
         } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
             return check_host_key_hash(s, hkc->u.hash.hash,
                                        SSH_PUBLICKEY_HASH_SHA1, errp);
+        } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA256) {
+            return check_host_key_hash(s, hkc->u.hash.hash,
+                                       SSH_PUBLICKEY_HASH_SHA256, errp);
         }
         g_assert_not_reached();
         break;
@@ -1233,8 +1189,6 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
     }
 }
 
-#ifdef HAVE_LIBSSH_0_8
-
 static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
@@ -1271,18 +1225,6 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
     return ret;
 }
 
-#else /* !HAVE_LIBSSH_0_8 */
-
-static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-
-    unsafe_flush_warning(s, "libssh >= 0.8.0");
-    return 0;
-}
-
-#endif /* !HAVE_LIBSSH_0_8 */
-
 static int64_t ssh_getlength(BlockDriverState *bs)
 {
     BDRVSSHState *s = bs->opaque;
diff --git a/block/stream.c b/block/stream.c
index 97bee482dce..e45113aed6a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,8 +54,8 @@ static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
     BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
-    BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
-    BlockDriverState *unfiltered_base = bdrv_skip_filters(base);
+    BlockDriverState *base;
+    BlockDriverState *unfiltered_base;
     Error *local_err = NULL;
     int ret = 0;
 
@@ -63,6 +63,9 @@ static int stream_prepare(Job *job)
     bdrv_cor_filter_drop(s->cor_filter_bs);
     s->cor_filter_bs = NULL;
 
+    base = bdrv_filter_or_cow_bs(s->above_base);
+    unfiltered_base = bdrv_skip_filters(base);
+
     if (bdrv_cow_child(unfiltered_bs)) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (unfiltered_base) {
diff --git a/block/throttle.c b/block/throttle.c
index b685166ad4a..6e8d52fa245 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -112,8 +112,9 @@ static int64_t throttle_getlength(BlockDriverState *bs)
 }
 
 static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
-                                           uint64_t offset, uint64_t bytes,
-                                           QEMUIOVector *qiov, int flags)
+                                           int64_t offset, int64_t bytes,
+                                           QEMUIOVector *qiov,
+                                           BdrvRequestFlags flags)
 {
 
     ThrottleGroupMember *tgm = bs->opaque;
@@ -123,8 +124,9 @@ static int coroutine_fn throttle_co_preadv(BlockDriverState *bs,
 }
 
 static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
-                                            uint64_t offset, uint64_t bytes,
-                                            QEMUIOVector *qiov, int flags)
+                                            int64_t offset, int64_t bytes,
+                                            QEMUIOVector *qiov,
+                                            BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -133,7 +135,7 @@ static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs,
 }
 
 static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int bytes,
+                                                  int64_t offset, int64_t bytes,
                                                   BdrvRequestFlags flags)
 {
     ThrottleGroupMember *tgm = bs->opaque;
@@ -143,7 +145,7 @@ static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int bytes)
+                                             int64_t offset, int64_t bytes)
 {
     ThrottleGroupMember *tgm = bs->opaque;
     throttle_group_co_io_limits_intercept(tgm, bytes, true);
@@ -152,8 +154,8 @@ static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
 }
 
 static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
-                                                       uint64_t offset,
-                                                       uint64_t bytes,
+                                                       int64_t offset,
+                                                       int64_t bytes,
                                                        QEMUIOVector *qiov)
 {
     return throttle_co_pwritev(bs, offset, bytes, qiov,
diff --git a/block/trace-events b/block/trace-events
index 1a12d634e2e..549090d453e 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -1,12 +1,12 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # ../block.c
 bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags 0x%x format_name \"%s\""
 bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 
 # block-backend.c
-blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
-blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
+blk_co_preadv(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, int64_t bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %" PRId64 " flags 0x%x"
 blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
 blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
 
@@ -75,13 +75,13 @@ luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p l
 
 # qcow2.c
 qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t host_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
-qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
+qcow2_writev_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
 qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
 qcow2_writev_start_part(void *co) "co %p"
 qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
 qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
-qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
-qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%" PRIx64 " count %d"
+qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
+qcow2_pwrite_zeroes(void *co, int64_t offset, int64_t bytes) "co %p offset 0x%" PRIx64 " bytes %" PRId64
 qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset 0x%" PRIx64 " nb_clusters %d"
 
 # qcow2-cluster.c
@@ -152,12 +152,12 @@ nvme_write_zeroes(void *s, uint64_t offset, uint64_t bytes, int flags) "s %p off
 nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int align) "qiov %p n %d base %p size 0x%zx align 0x%x"
 nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int is_write) "s %p offset 0x%"PRIx64" bytes %"PRId64" niov %d is_write %d"
 nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) "s %p is_write %d offset 0x%"PRIx64" bytes %"PRId64" ret %d"
-nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
-nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
+nvme_dsm(void *s, int64_t offset, int64_t bytes) "s %p offset 0x%"PRIx64" bytes %"PRId64""
+nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"PRIx64" bytes %"PRId64" ret %d"
 nvme_dma_map_flush(void *s) "s %p"
 nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
-nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void *aio_context, int fd) "index %u q %p size %u aioctx %p fd %d"
-nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
+nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
+nvme_free_queue_pair(unsigned q_index, void *q, void *cq, void *sq) "index %u q %p cq %p sq %p"
 nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
 nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
 nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
@@ -206,20 +206,7 @@ file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_of
 file_FindEjectableOpticalMedia(const char *media) "Matching using %s"
 file_setup_cdrom(const char *partition) "Using %s as optical disc"
 file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d"
-
-# sheepdog.c
-sheepdog_reconnect_to_sdog(void) "Wait for connection to be established"
-sheepdog_aio_read_response(void) "disable cache since the server doesn't support it"
-sheepdog_open(uint32_t vid) "0x%" PRIx32 " snapshot inode was open"
-sheepdog_close(const char *name) "%s"
-sheepdog_create_branch_snapshot(uint32_t vdi) "0x%" PRIx32 " is snapshot"
-sheepdog_create_branch_created(uint32_t vdi) "0x%" PRIx32 " is created"
-sheepdog_create_branch_new(uint32_t vdi) "0x%" PRIx32 " was newly created"
-sheepdog_co_rw_vector_update(uint32_t vdi, uint64_t oid, uint64_t data, long idx) "update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld"
-sheepdog_co_rw_vector_new(uint64_t oid) "new oid 0x%" PRIx64
-sheepdog_snapshot_create_info(const char *sn_name, const char *id, const char *name, int64_t size, int is_snapshot) "sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " " "is_snapshot %d"
-sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
-sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
+file_flush_fdatasync_failed(int err) "errno %d"
 
 # ssh.c
 sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
diff --git a/block/vdi.c b/block/vdi.c
index 548f8a057b8..bdc58d726ee 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -544,8 +544,8 @@ static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
 }
 
 static int coroutine_fn
-vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+vdi_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVdiState *s = bs->opaque;
     QEMUIOVector local_qiov;
@@ -600,8 +600,8 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+vdi_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVdiState *s = bs->opaque;
     QEMUIOVector local_qiov;
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 404fb5f3cb0..7672161d955 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -801,7 +801,7 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
     }
 
     if (logs.valid) {
-        if (bs->read_only) {
+        if (bdrv_is_read_only(bs)) {
             bdrv_refresh_filename(bs);
             ret = -EPERM;
             error_setg(errp,
diff --git a/block/vmdk.c b/block/vmdk.c
index 4499f136bdf..0dfab6e9413 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -60,6 +60,7 @@
 #define VMDK_ZEROED  (-3)
 
 #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
+#define BLOCK_OPT_TOOLSVERSION "toolsversion"
 
 typedef struct {
     uint32_t version;
@@ -1888,8 +1889,8 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
 }
 
 static int coroutine_fn
-vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVmdkState *s = bs->opaque;
     int ret;
@@ -2068,8 +2069,8 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
 }
 
 static int coroutine_fn
-vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -2080,8 +2081,8 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
-                           uint64_t bytes, QEMUIOVector *qiov)
+vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                           QEMUIOVector *qiov)
 {
     if (bytes == 0) {
         /* The caller will write bytes 0 to signal EOF.
@@ -2109,7 +2110,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
 
 static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
                                               int64_t offset,
-                                              int bytes,
+                                              int64_t bytes,
                                               BdrvRequestFlags flags)
 {
     int ret;
@@ -2344,6 +2345,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
                                           BlockdevVmdkAdapterType adapter_type,
                                           const char *backing_file,
                                           const char *hw_version,
+                                          const char *toolsversion,
                                           bool compat6,
                                           bool zeroed_grain,
                                           vmdk_create_extent_fn extent_fn,
@@ -2384,7 +2386,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
         "ddb.geometry.heads = \"%" PRIu32 "\"\n"
         "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"%s\"\n";
+        "ddb.adapterType = \"%s\"\n"
+        "ddb.toolsVersion = \"%s\"\n";
 
     ext_desc_lines = g_string_new(NULL);
 
@@ -2401,6 +2404,9 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
     if (!hw_version) {
         hw_version = "4";
     }
+    if (!toolsversion) {
+        toolsversion = "2147483647";
+    }
 
     if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
         /* that's the number of heads with which vmware operates when
@@ -2525,7 +2531,8 @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
                            size /
                                (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
                            number_heads,
-                           BlockdevVmdkAdapterType_str(adapter_type));
+                           BlockdevVmdkAdapterType_str(adapter_type),
+                           toolsversion);
     desc_len = strlen(desc);
     /* the descriptor offset = 0x200 */
     if (!split && !flat) {
@@ -2617,6 +2624,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
     BlockdevVmdkAdapterType adapter_type_enum;
     char *backing_file = NULL;
     char *hw_version = NULL;
+    char *toolsversion = NULL;
     char *fmt = NULL;
     BlockdevVmdkSubformat subformat;
     int ret = 0;
@@ -2649,6 +2657,7 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
     adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
     hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
+    toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
     compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
     if (strcmp(hw_version, "undefined") == 0) {
         g_free(hw_version);
@@ -2692,14 +2701,15 @@ static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
         .opts = opts,
     };
     ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
-                            backing_file, hw_version, compat6, zeroed_grain,
-                            vmdk_co_create_opts_cb, &data, errp);
+                            backing_file, hw_version, toolsversion, compat6,
+                            zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
 
 exit:
     g_free(backing_fmt);
     g_free(adapter_type);
     g_free(backing_file);
     g_free(hw_version);
+    g_free(toolsversion);
     g_free(fmt);
     g_free(desc);
     g_free(path);
@@ -2782,6 +2792,7 @@ static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
                             opts->adapter_type,
                             opts->backing_file,
                             opts->hwversion,
+                            opts->toolsversion,
                             false,
                             opts->zeroed_grain,
                             vmdk_co_create_cb,
@@ -3031,6 +3042,11 @@ static QemuOptsList vmdk_create_opts = {
             .help = "VMDK hardware version",
             .def_value_str = "undefined"
         },
+        {
+            .name = BLOCK_OPT_TOOLSVERSION,
+            .type = QEMU_OPT_STRING,
+            .help = "VMware guest tools version",
+        },
         {
             .name = BLOCK_OPT_SUBFMT,
             .type = QEMU_OPT_STRING,
diff --git a/block/vpc.c b/block/vpc.c
index 17a705b482a..297a26262ab 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -276,7 +276,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         if (ret < 0) {
             goto fail;
         }
-        if (strncmp(footer->creator, "conectix", 8)) {
+        if (strncmp(footer->creator, "conectix", 8) ||
+            be32_to_cpu(footer->type) != VHD_FIXED) {
             error_setg(errp, "invalid VPC image");
             ret = -EINVAL;
             goto fail;
@@ -608,8 +609,8 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 }
 
 static int coroutine_fn
-vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-              QEMUIOVector *qiov, int flags)
+vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVPCState *s = bs->opaque;
     int ret;
@@ -658,8 +659,8 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 }
 
 static int coroutine_fn
-vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-               QEMUIOVector *qiov, int flags)
+vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+               QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t image_offset;
diff --git a/block/vvfat.c b/block/vvfat.c
index 54807f82ca1..5dacc6cfac4 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1279,8 +1279,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
     qemu_co_mutex_init(&s->lock);
 
-    ret = 0;
+    qemu_opts_del(opts);
+
+    return 0;
+
 fail:
+    g_free(s->qcow_filename);
+    s->qcow_filename = NULL;
+    g_free(s->cluster_buffer);
+    s->cluster_buffer = NULL;
+    g_free(s->used_clusters);
+    s->used_clusters = NULL;
+
     qemu_opts_del(opts);
     return ret;
 }
@@ -1522,8 +1532,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
 }
 
 static int coroutine_fn
-vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                QEMUIOVector *qiov, int flags)
+vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVVFATState *s = bs->opaque;
@@ -3061,8 +3071,8 @@ DLOG(checkpoint());
 }
 
 static int coroutine_fn
-vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                 QEMUIOVector *qiov, int flags)
+vvfat_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+                 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
     int ret;
     BDRVVVFATState *s = bs->opaque;
@@ -3098,26 +3108,6 @@ static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs,
     return BDRV_BLOCK_DATA;
 }
 
-static int coroutine_fn
-write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-                    QEMUIOVector *qiov, int flags)
-{
-    int ret;
-
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    qemu_co_mutex_lock(&s->lock);
-    ret = try_commit(s);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-static BlockDriver vvfat_write_target = {
-    .format_name        = "vvfat_write_target",
-    .instance_size      = sizeof(void*),
-    .bdrv_co_pwritev    = write_target_commit,
-};
-
 static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
                                int *child_flags, QDict *child_options,
                                int parent_flags, QDict *parent_options)
@@ -3127,22 +3117,18 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 }
 
-static const BdrvChildClass child_vvfat_qcow = {
-    .parent_is_bds      = true,
-    .inherit_options    = vvfat_qcow_options,
-};
+static BdrvChildClass child_vvfat_qcow;
 
 static int enable_write_target(BlockDriverState *bs, Error **errp)
 {
     BDRVVVFATState *s = bs->opaque;
     BlockDriver *bdrv_qcow = NULL;
-    BlockDriverState *backing;
     QemuOpts *opts = NULL;
     int ret;
     int size = sector2cluster(s, s->sector_count);
     QDict *options;
 
-    s->used_clusters = calloc(size, 1);
+    s->used_clusters = g_malloc0(size);
 
     array_init(&(s->commits), sizeof(commit_t));
 
@@ -3187,18 +3173,9 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
     unlink(s->qcow_filename);
 #endif
 
-    backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
-                                   &error_abort);
-    *(void**) backing->opaque = s;
-
-    bdrv_set_backing_hd(s->bs, backing, &error_abort);
-    bdrv_unref(backing);
-
     return 0;
 
 err:
-    g_free(s->qcow_filename);
-    s->qcow_filename = NULL;
     return ret;
 }
 
@@ -3208,20 +3185,10 @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
                              uint64_t perm, uint64_t shared,
                              uint64_t *nperm, uint64_t *nshared)
 {
-    BDRVVVFATState *s = bs->opaque;
-
-    assert(c == s->qcow || (role & BDRV_CHILD_COW));
-
-    if (c == s->qcow) {
-        /* This is a private node, nobody should try to attach to it */
-        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
-        *nshared = BLK_PERM_WRITE_UNCHANGED;
-    } else {
-        /* The backing file is there so 'commit' can use it. vvfat doesn't
-         * access it in any way. */
-        *nperm = 0;
-        *nshared = BLK_PERM_ALL;
-    }
+    assert(role & BDRV_CHILD_DATA);
+    /* This is a private node, nobody should try to attach to it */
+    *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+    *nshared = BLK_PERM_WRITE_UNCHANGED;
 }
 
 static void vvfat_close(BlockDriverState *bs)
@@ -3270,6 +3237,8 @@ static BlockDriver bdrv_vvfat = {
 
 static void bdrv_vvfat_init(void)
 {
+    child_vvfat_qcow = child_of_bds;
+    child_vvfat_qcow.inherit_options = vvfat_qcow_options;
     bdrv_register(&bdrv_vvfat);
 }
 
diff --git a/block/write-threshold.c b/block/write-threshold.c
index 85b78dc2a9b..35cafbc22d9 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -12,9 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "block/block_int.h"
-#include "qemu/coroutine.h"
 #include "block/write-threshold.h"
-#include "qemu/notify.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-core.h"
 #include "qapi/qapi-events-block-core.h"
@@ -24,82 +22,9 @@ uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
     return bs->write_threshold_offset;
 }
 
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset > 0;
-}
-
-static void write_threshold_disable(BlockDriverState *bs)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        notifier_with_return_remove(&bs->write_threshold_notifier);
-        bs->write_threshold_offset = 0;
-    }
-}
-
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (req->offset > bs->write_threshold_offset) {
-            return (req->offset - bs->write_threshold_offset) + req->bytes;
-        }
-        if ((req->offset + req->bytes) > bs->write_threshold_offset) {
-            return (req->offset + req->bytes) - bs->write_threshold_offset;
-        }
-    }
-    return 0;
-}
-
-static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
-                                            void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    BlockDriverState *bs = req->bs;
-    uint64_t amount = 0;
-
-    amount = bdrv_write_threshold_exceeded(bs, req);
-    if (amount > 0) {
-        qapi_event_send_block_write_threshold(
-            bs->node_name,
-            amount,
-            bs->write_threshold_offset);
-
-        /* autodisable to avoid flooding the monitor */
-        write_threshold_disable(bs);
-    }
-
-    return 0; /* should always let other notifiers run */
-}
-
-static void write_threshold_register_notifier(BlockDriverState *bs)
-{
-    bs->write_threshold_notifier.notify = before_write_notify;
-    bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier);
-}
-
-static void write_threshold_update(BlockDriverState *bs,
-                                   int64_t threshold_bytes)
-{
-    bs->write_threshold_offset = threshold_bytes;
-}
-
 void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
 {
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (threshold_bytes > 0) {
-            write_threshold_update(bs, threshold_bytes);
-        } else {
-            write_threshold_disable(bs);
-        }
-    } else {
-        if (threshold_bytes > 0) {
-            /* avoid multiple registration */
-            write_threshold_register_notifier(bs);
-            write_threshold_update(bs, threshold_bytes);
-        }
-        /* discard bogus disable request */
-    }
+    bs->write_threshold_offset = threshold_bytes;
 }
 
 void qmp_block_set_write_threshold(const char *node_name,
@@ -122,3 +47,17 @@ void qmp_block_set_write_threshold(const char *node_name,
 
     aio_context_release(aio_context);
 }
+
+void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes)
+{
+    int64_t end = offset + bytes;
+    uint64_t wtr = bs->write_threshold_offset;
+
+    if (wtr > 0 && end > wtr) {
+        qapi_event_send_block_write_threshold(bs->node_name, end - wtr, wtr);
+
+        /* autodisable to avoid flooding the monitor */
+        bdrv_write_threshold_set(bs, 0);
+    }
+}
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index b264620b98d..bdfa7ed3a5a 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -108,9 +108,9 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
         return NULL;
     }
 
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a server endpoint");
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+                                          errp)) {
         return NULL;
     }
     object_ref(obj);
diff --git a/blockdev.c b/blockdev.c
index a57590aae40..b35072644eb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -56,7 +56,6 @@
 #include "sysemu/iothread.h"
 #include "block/block_int.h"
 #include "block/trace.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/runstate.h"
 #include "sysemu/replay.h"
 #include "qemu/cutils.h"
@@ -583,8 +582,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
 
         blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
-        blk_rs->open_flags    = bdrv_flags;
-        blk_rs->read_only     = read_only;
+        blk_rs->open_flags    = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
         blk_rs->detect_zeroes = detect_zeroes;
 
         qobject_unref(bs_opts);
@@ -1576,10 +1574,6 @@ static void external_snapshot_prepare(BlkActionState *common,
         goto out;
     }
 
-    /* This removes our old bs and adds the new bs. This is an operation that
-     * can fail, so we need to do it in .prepare; undoing it for abort is
-     * always possible. */
-    bdrv_ref(state->new_bs);
     ret = bdrv_append(state->new_bs, state->old_bs, errp);
     if (ret < 0) {
         goto out;
@@ -1719,6 +1713,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    state->bs = bs;
     /* Paired with .clean() */
     bdrv_drained_begin(bs);
 
@@ -1818,8 +1813,6 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
         }
     }
 
-    state->bs = bs;
-
     state->job = do_backup_common(qapi_DriveBackup_base(backup),
                                   bs, target_bs, aio_context,
                                   common->block_job_txn, errp);
@@ -1854,7 +1847,7 @@ static void drive_backup_abort(BlkActionState *common)
         aio_context = bdrv_get_aio_context(state->bs);
         aio_context_acquire(aio_context);
 
-        job_cancel_sync(&state->job->job);
+        job_cancel_sync(&state->job->job, true);
 
         aio_context_release(aio_context);
     }
@@ -1955,7 +1948,7 @@ static void blockdev_backup_abort(BlkActionState *common)
         aio_context = bdrv_get_aio_context(state->bs);
         aio_context_acquire(aio_context);
 
-        job_cancel_sync(&state->job->job);
+        job_cancel_sync(&state->job->job, true);
 
         aio_context_release(aio_context);
     }
@@ -3565,46 +3558,60 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
     visit_free(v);
 }
 
-void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp)
+void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
 {
-    BlockDriverState *bs;
-    AioContext *ctx;
-    QObject *obj;
-    Visitor *v = qobject_output_visitor_new(&obj);
-    BlockReopenQueue *queue;
-    QDict *qdict;
+    BlockReopenQueue *queue = NULL;
+    GSList *drained = NULL;
 
-    /* Check for the selected node name */
-    if (!options->has_node_name) {
-        error_setg(errp, "node-name not specified");
-        goto fail;
-    }
+    /* Add each one of the BDS that we want to reopen to the queue */
+    for (; reopen_list != NULL; reopen_list = reopen_list->next) {
+        BlockdevOptions *options = reopen_list->value;
+        BlockDriverState *bs;
+        AioContext *ctx;
+        QObject *obj;
+        Visitor *v;
+        QDict *qdict;
 
-    bs = bdrv_find_node(options->node_name);
-    if (!bs) {
-        error_setg(errp, "Failed to find node with node-name='%s'",
-                   options->node_name);
-        goto fail;
-    }
+        /* Check for the selected node name */
+        if (!options->has_node_name) {
+            error_setg(errp, "node-name not specified");
+            goto fail;
+        }
 
-    /* Put all options in a QDict and flatten it */
-    visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
-    visit_complete(v, &obj);
-    qdict = qobject_to(QDict, obj);
+        bs = bdrv_find_node(options->node_name);
+        if (!bs) {
+            error_setg(errp, "Failed to find node with node-name='%s'",
+                       options->node_name);
+            goto fail;
+        }
 
-    qdict_flatten(qdict);
+        /* Put all options in a QDict and flatten it */
+        v = qobject_output_visitor_new(&obj);
+        visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
+        visit_complete(v, &obj);
+        visit_free(v);
+
+        qdict = qobject_to(QDict, obj);
+
+        qdict_flatten(qdict);
+
+        ctx = bdrv_get_aio_context(bs);
+        aio_context_acquire(ctx);
+
+        bdrv_subtree_drained_begin(bs);
+        queue = bdrv_reopen_queue(queue, bs, qdict, false);
+        drained = g_slist_prepend(drained, bs);
+
+        aio_context_release(ctx);
+    }
 
     /* Perform the reopen operation */
-    ctx = bdrv_get_aio_context(bs);
-    aio_context_acquire(ctx);
-    bdrv_subtree_drained_begin(bs);
-    queue = bdrv_reopen_queue(NULL, bs, qdict, false);
     bdrv_reopen_multiple(queue, errp);
-    bdrv_subtree_drained_end(bs);
-    aio_context_release(ctx);
+    queue = NULL;
 
 fail:
-    visit_free(v);
+    bdrv_reopen_queue_free(queue);
+    g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end);
 }
 
 void qmp_blockdev_del(const char *node_name, Error **errp)
diff --git a/blockjob.c b/blockjob.c
index 207e8c7fd91..4bad1408cb5 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -87,6 +87,7 @@ void block_job_free(Job *job)
 
     block_job_remove_all_bdrv(bjob);
     blk_unref(bjob->blk);
+    ratelimit_destroy(&bjob->limit);
     error_free(bjob->blocker);
 }
 
@@ -163,6 +164,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
     job->job.aio_context = ctx;
 }
 
+static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
+{
+    BlockJob *job = c->opaque;
+
+    return job->job.aio_context;
+}
+
 static const BdrvChildClass child_job = {
     .get_parent_desc    = child_job_get_parent_desc,
     .drained_begin      = child_job_drained_begin,
@@ -171,6 +179,7 @@ static const BdrvChildClass child_job = {
     .can_set_aio_ctx    = child_job_can_set_aio_ctx,
     .set_aio_ctx        = child_job_set_aio_ctx,
     .stay_at_node       = true,
+    .get_parent_aio_context = child_job_get_parent_aio_context,
 };
 
 void block_job_remove_all_bdrv(BlockJob *job)
@@ -221,8 +230,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
     if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
         aio_context_release(job->job.aio_context);
     }
-    c = bdrv_root_attach_child(bs, name, &child_job, 0,
-                               job->job.aio_context, perm, shared_perm, job,
+    c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
                                errp);
     if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
         aio_context_acquire(job->job.aio_context);
@@ -292,28 +300,29 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 
 int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
 {
-    if (!job->speed) {
-        return 0;
-    }
-
     return ratelimit_calculate_delay(&job->limit, n);
 }
 
 BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 {
     BlockJobInfo *info;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         error_setg(errp, "Cannot query QEMU internal jobs");
         return NULL;
     }
+
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     info = g_new0(BlockJobInfo, 1);
     info->type      = g_strdup(job_type_str(&job->job));
     info->device    = g_strdup(job->job.id);
     info->busy      = qatomic_read(&job->job.busy);
     info->paused    = job->job.pause_count > 0;
-    info->offset    = job->job.progress.current;
-    info->len       = job->job.progress.total;
+    info->offset    = progress_current;
+    info->len       = progress_total;
     info->speed     = job->speed;
     info->io_status = job->iostatus;
     info->ready     = job_is_ready(&job->job),
@@ -340,15 +349,19 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
 static void block_job_event_cancelled(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_cancelled(job_type(&job->job),
                                         job->job.id,
-                                        job->job.progress.total,
-                                        job->job.progress.current,
+                                        progress_total,
+                                        progress_current,
                                         job->speed);
 }
 
@@ -356,6 +369,7 @@ static void block_job_event_completed(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
     const char *msg = NULL;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
@@ -365,10 +379,13 @@ static void block_job_event_completed(Notifier *n, void *opaque)
         msg = error_get_pretty(job->job.err);
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_completed(job_type(&job->job),
                                         job->job.id,
-                                        job->job.progress.total,
-                                        job->job.progress.current,
+                                        progress_total,
+                                        progress_current,
                                         job->speed,
                                         !!msg,
                                         msg);
@@ -389,15 +406,19 @@ static void block_job_event_pending(Notifier *n, void *opaque)
 static void block_job_event_ready(Notifier *n, void *opaque)
 {
     BlockJob *job = opaque;
+    uint64_t progress_current, progress_total;
 
     if (block_job_is_internal(job)) {
         return;
     }
 
+    progress_get_snapshot(&job->job.progress, &progress_current,
+                          &progress_total);
+
     qapi_event_send_block_job_ready(job_type(&job->job),
                                     job->job.id,
-                                    job->job.progress.total,
-                                    job->job.progress.current,
+                                    progress_total,
+                                    progress_current,
                                     job->speed);
 }
 
@@ -435,6 +456,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     assert(job->job.driver->free == &block_job_free);
     assert(job->job.driver->user_resume == &block_job_user_resume);
 
+    ratelimit_init(&job->limit);
+
     job->blk = blk;
 
     job->finalize_cancelled_notifier.notify = block_job_event_cancelled;
@@ -462,12 +485,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     blk_set_disable_request_queuing(blk, true);
     blk_set_allow_aio_context_change(blk, true);
 
-    /* Only set speed when necessary to avoid NotSupported error */
-    if (speed != 0) {
-        if (!block_job_set_speed(job, speed, errp)) {
-            job_early_fail(&job->job);
-            return NULL;
-        }
+    if (!block_job_set_speed(job, speed, errp)) {
+        job_early_fail(&job->job);
+        return NULL;
     }
 
     return job;
diff --git a/bsd-user/bsd-mman.h b/bsd-user/bsd-mman.h
deleted file mode 100644
index 910e8c19210..00000000000
--- a/bsd-user/bsd-mman.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*-
- * Copyright (c) 1982, 1986, 1993
- *      The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *      @(#)mman.h      8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/sys/mman.h,v 1.42 2008/03/28 04:29:27 ps Exp $
- */
-
-#define TARGET_FREEBSD_MAP_RESERVED0080 0x0080  /* previously misimplemented MAP_INHERIT */
-#define TARGET_FREEBSD_MAP_RESERVED0100 0x0100  /* previously unimplemented MAP_NOEXTEND */
-#define TARGET_FREEBSD_MAP_STACK        0x0400  /* region grows down, like a stack */
-#define TARGET_FREEBSD_MAP_NOSYNC       0x0800  /* page to but do not sync underlying file */
-
-#define TARGET_FREEBSD_MAP_FLAGMASK     0x1ff7
-
-/*      $NetBSD: mman.h,v 1.42 2008/11/18 22:13:49 ad Exp $     */
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- *      The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *      @(#)mman.h      8.2 (Berkeley) 1/9/95
- */
-#define TARGET_NETBSD_MAP_INHERIT       0x0080  /* region is retained after exec */
-#define TARGET_NETBSD_MAP_TRYFIXED      0x0400 /* attempt hint address, even within break */
-#define TARGET_NETBSD_MAP_WIRED         0x0800  /* mlock() mapping when it is established */
-
-#define TARGET_NETBSD_MAP_STACK         0x2000  /* allocated from memory, swap space (stack) */
-
-#define TARGET_NETBSD_MAP_FLAGMASK      0x3ff7
-
-/*      $OpenBSD: mman.h,v 1.18 2003/07/21 22:52:19 tedu Exp $  */
-/*      $NetBSD: mman.h,v 1.11 1995/03/26 20:24:23 jtc Exp $    */
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- *      The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *      @(#)mman.h      8.1 (Berkeley) 6/2/93
- */
-
-#define TARGET_OPENBSD_MAP_INHERIT      0x0080  /* region is retained after exec */
-#define TARGET_OPENBSD_MAP_NOEXTEND     0x0100  /* for MAP_FILE, don't change file size */
-#define TARGET_OPENBSD_MAP_TRYFIXED     0x0400  /* attempt hint address, even within heap */
-
-#define TARGET_OPENBSD_MAP_FLAGMASK     0x17f7
-
-// XXX
-#define TARGET_BSD_MAP_FLAGMASK         0x3ff7
diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index f38c4faacf8..5b3c061a452 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -1,11 +1,24 @@
-/* Code for loading BSD executables.  Mostly linux kernel code.  */
+/*
+ *  Load BSD executables.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
 
 #include "qemu/osdep.h"
 
 #include "qemu.h"
 
-#define TARGET_NGROUPS 32
-
 /* ??? This should really be somewhere else.  */
 abi_long memcpy_to_target(abi_ulong dest, const void *src,
                           unsigned long len)
@@ -13,47 +26,48 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src,
     void *host_ptr;
 
     host_ptr = lock_user(VERIFY_WRITE, dest, len, 0);
-    if (!host_ptr)
+    if (!host_ptr) {
         return -TARGET_EFAULT;
+    }
     memcpy(host_ptr, src, len);
     unlock_user(host_ptr, dest, 1);
     return 0;
 }
 
-static int count(char ** vec)
+static int count(char **vec)
 {
     int         i;
 
-    for(i = 0; *vec; i++) {
+    for (i = 0; *vec; i++) {
         vec++;
     }
 
-    return(i);
+    return i;
 }
 
-static int prepare_binprm(struct linux_binprm *bprm)
+static int prepare_binprm(struct bsd_binprm *bprm)
 {
     struct stat         st;
     int mode;
     int retval;
 
-    if(fstat(bprm->fd, &st) < 0) {
-        return(-errno);
+    if (fstat(bprm->fd, &st) < 0) {
+        return -errno;
     }
 
     mode = st.st_mode;
-    if(!S_ISREG(mode)) {        /* Must be regular file */
-        return(-EACCES);
+    if (!S_ISREG(mode)) {        /* Must be regular file */
+        return -EACCES;
     }
-    if(!(mode & 0111)) {        /* Must have at least one execute bit set */
-        return(-EACCES);
+    if (!(mode & 0111)) {        /* Must have at least one execute bit set */
+        return -EACCES;
     }
 
     bprm->e_uid = geteuid();
     bprm->e_gid = getegid();
 
     /* Set-uid? */
-    if(mode & S_ISUID) {
+    if (mode & S_ISUID) {
         bprm->e_uid = st.st_uid;
     }
 
@@ -69,22 +83,20 @@ static int prepare_binprm(struct linux_binprm *bprm)
 
     memset(bprm->buf, 0, sizeof(bprm->buf));
     retval = lseek(bprm->fd, 0L, SEEK_SET);
-    if(retval >= 0) {
+    if (retval >= 0) {
         retval = read(bprm->fd, bprm->buf, 128);
     }
-    if(retval < 0) {
+    if (retval < 0) {
         perror("prepare_binprm");
         exit(-1);
-        /* return(-errno); */
-    }
-    else {
-        return(retval);
+    } else {
+        return retval;
     }
 }
 
 /* Construct the envp and argv tables on the target stack.  */
 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
-                              abi_ulong stringp, int push_ptr)
+                              abi_ulong stringp)
 {
     int n = sizeof(abi_ulong);
     abi_ulong envp;
@@ -94,13 +106,6 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
     envp = sp;
     sp -= (argc + 1) * n;
     argv = sp;
-    if (push_ptr) {
-        /* FIXME - handle put_user() failures */
-        sp -= n;
-        put_user_ual(envp, sp);
-        sp -= n;
-        put_user_ual(argv, sp);
-    }
     sp -= n;
     /* FIXME - handle put_user() failures */
     put_user_ual(argc, sp);
@@ -125,49 +130,85 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
     return sp;
 }
 
-int loader_exec(const char * filename, char ** argv, char ** envp,
-             struct target_pt_regs * regs, struct image_info *infop)
+static bool is_there(const char *candidate)
 {
-    struct linux_binprm bprm;
-    int retval;
-    int i;
+    struct stat fin;
+
+    /* XXX work around access(2) false positives for superuser */
+    if (access(candidate, X_OK) == 0 && stat(candidate, &fin) == 0 &&
+            S_ISREG(fin.st_mode) && (getuid() != 0 ||
+                (fin.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)) {
+        return true;
+    }
+
+    return false;
+}
+
+int loader_exec(const char *filename, char **argv, char **envp,
+                struct target_pt_regs *regs, struct image_info *infop,
+                struct bsd_binprm *bprm)
+{
+    char *path, fullpath[PATH_MAX];
+    int retval, i;
 
-    bprm.p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int);
-    for (i=0 ; i<MAX_ARG_PAGES ; i++)       /* clear page-table */
-            bprm.page[i] = NULL;
-    retval = open(filename, O_RDONLY);
-    if (retval < 0)
+    bprm->p = TARGET_PAGE_SIZE * MAX_ARG_PAGES;
+    for (i = 0; i < MAX_ARG_PAGES; i++) {       /* clear page-table */
+        bprm->page[i] = NULL;
+    }
+
+    if (strchr(filename, '/') != NULL) {
+        path = realpath(filename, fullpath);
+        if (path == NULL) {
+            /* Failed to resolve. */
+            return -1;
+        }
+        if (!is_there(path)) {
+            return -1;
+        }
+    } else {
+        path = g_find_program_in_path(filename);
+        if (path == NULL) {
+            return -1;
+        }
+    }
+
+    retval = open(path, O_RDONLY);
+    if (retval < 0) {
+        g_free(path);
         return retval;
-    bprm.fd = retval;
-    bprm.filename = (char *)filename;
-    bprm.argc = count(argv);
-    bprm.argv = argv;
-    bprm.envc = count(envp);
-    bprm.envp = envp;
-
-    retval = prepare_binprm(&bprm);
-
-    if(retval>=0) {
-        if (bprm.buf[0] == 0x7f
-                && bprm.buf[1] == 'E'
-                && bprm.buf[2] == 'L'
-                && bprm.buf[3] == 'F') {
-            retval = load_elf_binary(&bprm,regs,infop);
+    }
+
+    bprm->fullpath = path;
+    bprm->fd = retval;
+    bprm->filename = (char *)filename;
+    bprm->argc = count(argv);
+    bprm->argv = argv;
+    bprm->envc = count(envp);
+    bprm->envp = envp;
+
+    retval = prepare_binprm(bprm);
+
+    if (retval >= 0) {
+        if (bprm->buf[0] == 0x7f
+                && bprm->buf[1] == 'E'
+                && bprm->buf[2] == 'L'
+                && bprm->buf[3] == 'F') {
+            retval = load_elf_binary(bprm, regs, infop);
         } else {
             fprintf(stderr, "Unknown binary format\n");
             return -1;
         }
     }
 
-    if(retval>=0) {
+    if (retval >= 0) {
         /* success.  Initialize important registers */
         do_init_thread(regs, infop);
         return retval;
     }
 
     /* Something went wrong, return the inode and free the argument pages*/
-    for (i=0 ; i<MAX_ARG_PAGES ; i++) {
-        g_free(bprm.page[i]);
+    for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+        g_free(bprm->page[i]);
     }
-    return(retval);
+    return retval;
 }
diff --git a/bsd-user/elfcore.c b/bsd-user/elfcore.c
new file mode 100644
index 00000000000..c49d9280e2d
--- /dev/null
+++ b/bsd-user/elfcore.c
@@ -0,0 +1,10 @@
+/* Stubbed out version of core dump support, explicitly in public domain */
+
+static int elf_core_dump(int signr, CPUArchState *env)
+{
+    struct elf_note en = { 0 };
+
+    bswap_note(&en);
+
+    return 0;
+}
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 5f4d824d78f..142a5bfac26 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -1,563 +1,53 @@
-/* This is the Linux kernel elf-loading code, ported into user space */
-
-#include "qemu/osdep.h"
-
-#include "qemu.h"
-#include "disas/disas.h"
-#include "qemu/path.h"
-
-#ifdef _ARCH_PPC64
-#undef ARCH_DLINFO
-#undef ELF_PLATFORM
-#undef ELF_HWCAP
-#undef ELF_CLASS
-#undef ELF_DATA
-#undef ELF_ARCH
-#endif
-
-/* from personality.h */
-
 /*
- * Flags for bug emulation.
+ *  ELF loading code
  *
- * These occupy the top three bytes.
- */
-enum {
-        ADDR_NO_RANDOMIZE =     0x0040000,      /* disable randomization of VA space */
-        FDPIC_FUNCPTRS =        0x0080000,      /* userspace function ptrs point to descriptors
-                                                 * (signal handling)
-                                                 */
-        MMAP_PAGE_ZERO =        0x0100000,
-        ADDR_COMPAT_LAYOUT =    0x0200000,
-        READ_IMPLIES_EXEC =     0x0400000,
-        ADDR_LIMIT_32BIT =      0x0800000,
-        SHORT_INODE =           0x1000000,
-        WHOLE_SECONDS =         0x2000000,
-        STICKY_TIMEOUTS =       0x4000000,
-        ADDR_LIMIT_3GB =        0x8000000,
-};
-
-/*
- * Personality types.
+ *  Copyright (c) 2013 Stacey D. Son
  *
- * These go in the low byte.  Avoid using the top bit, it will
- * conflict with error returns.
- */
-enum {
-        PER_LINUX =             0x0000,
-        PER_LINUX_32BIT =       0x0000 | ADDR_LIMIT_32BIT,
-        PER_LINUX_FDPIC =       0x0000 | FDPIC_FUNCPTRS,
-        PER_SVR4 =              0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-        PER_SVR3 =              0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_SCOSVR3 =           0x0003 | STICKY_TIMEOUTS |
-                                         WHOLE_SECONDS | SHORT_INODE,
-        PER_OSR5 =              0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
-        PER_WYSEV386 =          0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_ISCR4 =             0x0005 | STICKY_TIMEOUTS,
-        PER_BSD =               0x0006,
-        PER_SUNOS =             0x0006 | STICKY_TIMEOUTS,
-        PER_XENIX =             0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_LINUX32 =           0x0008,
-        PER_LINUX32_3GB =       0x0008 | ADDR_LIMIT_3GB,
-        PER_IRIX32 =            0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */
-        PER_IRIXN32 =           0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */
-        PER_IRIX64 =            0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */
-        PER_RISCOS =            0x000c,
-        PER_SOLARIS =           0x000d | STICKY_TIMEOUTS,
-        PER_UW7 =               0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-        PER_OSF4 =              0x000f,                  /* OSF/1 v4 */
-        PER_HPUX =              0x0010,
-        PER_MASK =              0x00ff,
-};
-
-/*
- * Return the base personality without flags.
- */
-#define personality(pers)       (pers & PER_MASK)
-
-/* this flag is uneffective under linux too, should be deleted */
-#ifndef MAP_DENYWRITE
-#define MAP_DENYWRITE 0
-#endif
-
-/* should probably go in elf.h */
-#ifndef ELIBBAD
-#define ELIBBAD 80
-#endif
-
-#ifdef TARGET_I386
-
-#define ELF_PLATFORM get_elf_platform()
-
-static const char *get_elf_platform(void)
-{
-    static char elf_platform[] = "i386";
-    int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
-    if (family > 6)
-        family = 6;
-    if (family >= 3)
-        elf_platform[1] = '0' + family;
-    return elf_platform;
-}
-
-#define ELF_HWCAP get_elf_hwcap()
-
-static uint32_t get_elf_hwcap(void)
-{
-    X86CPU *cpu = X86_CPU(thread_cpu);
-
-    return cpu->env.features[FEAT_1_EDX];
-}
-
-#ifdef TARGET_X86_64
-#define ELF_START_MMAP 0x2aaaaab000ULL
-#define elf_check_arch(x) ( ((x) == ELF_ARCH) )
-
-#define ELF_CLASS      ELFCLASS64
-#define ELF_DATA       ELFDATA2LSB
-#define ELF_ARCH       EM_X86_64
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->rax = 0;
-    regs->rsp = infop->start_stack;
-    regs->rip = infop->entry;
-    if (bsd_type == target_freebsd) {
-        regs->rdi = infop->start_stack;
-    }
-}
-
-#else
-
-#define ELF_START_MMAP 0x80000000
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) ( ((x) == EM_386) || ((x) == EM_486) )
-
-/*
- * These are used to set parameters in the core dumps.
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
-#define ELF_CLASS       ELFCLASS32
-#define ELF_DATA        ELFDATA2LSB
-#define ELF_ARCH        EM_386
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->esp = infop->start_stack;
-    regs->eip = infop->entry;
-
-    /* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program
-       starts %edx contains a pointer to a function which might be
-       registered using `atexit'.  This provides a mean for the
-       dynamic linker to call DT_FINI functions for shared libraries
-       that have been loaded before the code runs.
-
-       A value of 0 tells we have no such handler.  */
-    regs->edx = 0;
-}
-#endif
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       4096
-
-#endif
-
-#ifdef TARGET_ARM
-
-#define ELF_START_MMAP 0x80000000
 
-#define elf_check_arch(x) ( (x) == EM_ARM )
-
-#define ELF_CLASS       ELFCLASS32
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATA        ELFDATA2MSB
-#else
-#define ELF_DATA        ELFDATA2LSB
-#endif
-#define ELF_ARCH        EM_ARM
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    abi_long stack = infop->start_stack;
-    memset(regs, 0, sizeof(*regs));
-    regs->ARM_cpsr = 0x10;
-    if (infop->entry & 1)
-      regs->ARM_cpsr |= CPSR_T;
-    regs->ARM_pc = infop->entry & 0xfffffffe;
-    regs->ARM_sp = infop->start_stack;
-    /* FIXME - what to for failure of get_user()? */
-    get_user_ual(regs->ARM_r2, stack + 8); /* envp */
-    get_user_ual(regs->ARM_r1, stack + 4); /* envp */
-    /* XXX: it seems that r0 is zeroed after ! */
-    regs->ARM_r0 = 0;
-    /* For uClinux PIC binaries.  */
-    /* XXX: Linux does this only on ARM with no MMU (do we care ?) */
-    regs->ARM_r10 = infop->start_data;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       4096
-
-enum
-{
-  ARM_HWCAP_ARM_SWP       = 1 << 0,
-  ARM_HWCAP_ARM_HALF      = 1 << 1,
-  ARM_HWCAP_ARM_THUMB     = 1 << 2,
-  ARM_HWCAP_ARM_26BIT     = 1 << 3,
-  ARM_HWCAP_ARM_FAST_MULT = 1 << 4,
-  ARM_HWCAP_ARM_FPA       = 1 << 5,
-  ARM_HWCAP_ARM_VFP       = 1 << 6,
-  ARM_HWCAP_ARM_EDSP      = 1 << 7,
-};
-
-#define ELF_HWCAP (ARM_HWCAP_ARM_SWP | ARM_HWCAP_ARM_HALF              \
-                    | ARM_HWCAP_ARM_THUMB | ARM_HWCAP_ARM_FAST_MULT     \
-                    | ARM_HWCAP_ARM_FPA | ARM_HWCAP_ARM_VFP)
-
-#endif
-
-#ifdef TARGET_SPARC
-#ifdef TARGET_SPARC64
-
-#define ELF_START_MMAP 0x80000000
-
-#ifndef TARGET_ABI32
-#define elf_check_arch(x) ( (x) == EM_SPARCV9 || (x) == EM_SPARC32PLUS )
-#else
-#define elf_check_arch(x) ( (x) == EM_SPARC32PLUS || (x) == EM_SPARC )
-#endif
-
-#define ELF_CLASS   ELFCLASS64
-#define ELF_DATA    ELFDATA2MSB
-#define ELF_ARCH    EM_SPARCV9
-
-#define STACK_BIAS              2047
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-#ifndef TARGET_ABI32
-    regs->tstate = 0;
-#endif
-    regs->pc = infop->entry;
-    regs->npc = regs->pc + 4;
-    regs->y = 0;
-#ifdef TARGET_ABI32
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
-#else
-    if (personality(infop->personality) == PER_LINUX32)
-        regs->u_regs[14] = infop->start_stack - 16 * 4;
-    else {
-        regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS;
-        if (bsd_type == target_freebsd) {
-            regs->u_regs[8] = infop->start_stack;
-            regs->u_regs[11] = infop->start_stack;
-        }
-    }
-#endif
-}
-
-#else
-#define ELF_START_MMAP 0x80000000
-
-#define elf_check_arch(x) ( (x) == EM_SPARC )
-
-#define ELF_CLASS   ELFCLASS32
-#define ELF_DATA    ELFDATA2MSB
-#define ELF_ARCH    EM_SPARC
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->psr = 0;
-    regs->pc = infop->entry;
-    regs->npc = regs->pc + 4;
-    regs->y = 0;
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
-}
-
-#endif
-#endif
-
-#ifdef TARGET_PPC
-
-#define ELF_START_MMAP 0x80000000
-
-#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-
-#define elf_check_arch(x) ( (x) == EM_PPC64 )
-
-#define ELF_CLASS       ELFCLASS64
-
-#else
-
-#define elf_check_arch(x) ( (x) == EM_PPC )
-
-#define ELF_CLASS       ELFCLASS32
-
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATA        ELFDATA2MSB
-#else
-#define ELF_DATA        ELFDATA2LSB
-#endif
-#define ELF_ARCH        EM_PPC
-
-/*
- * We need to put in some extra aux table entries to tell glibc what
- * the cache block size is, so it can use the dcbz instruction safely.
- */
-#define AT_DCACHEBSIZE          19
-#define AT_ICACHEBSIZE          20
-#define AT_UCACHEBSIZE          21
-/* A special ignored type value for PPC, for glibc compatibility.  */
-#define AT_IGNOREPPC            22
-/*
- * The requirements here are:
- * - keep the final alignment of sp (sp & 0xf)
- * - make sure the 32-bit value at the first 16 byte aligned position of
- *   AUXV is greater than 16 for glibc compatibility.
- *   AT_IGNOREPPC is used for that.
- * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
- *   even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
- */
-#define DLINFO_ARCH_ITEMS       5
-#define ARCH_DLINFO                                                     \
-do {                                                                    \
-        NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20);                              \
-        NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20);                              \
-        NEW_AUX_ENT(AT_UCACHEBSIZE, 0);                                 \
-        /*                                                              \
-         * Now handle glibc compatibility.                              \
-         */                                                             \
-        NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);                        \
-        NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);                        \
- } while (0)
-
-static inline void init_thread(struct target_pt_regs *_regs, struct image_info *infop)
-{
-    abi_ulong pos = infop->start_stack;
-    abi_ulong tmp;
-#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-    abi_ulong entry, toc;
-#endif
-
-    _regs->gpr[1] = infop->start_stack;
-#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-    get_user_u64(entry, infop->entry);
-    entry += infop->load_addr;
-    get_user_u64(toc, infop->entry + 8);
-    toc += infop->load_addr;
-    _regs->gpr[2] = toc;
-    infop->entry = entry;
-#endif
-    _regs->nip = infop->entry;
-    /* Note that isn't exactly what regular kernel does
-     * but this is what the ABI wants and is needed to allow
-     * execution of PPC BSD programs.
-     */
-    /* FIXME - what to for failure of get_user()? */
-    get_user_ual(_regs->gpr[3], pos);
-    pos += sizeof(abi_ulong);
-    _regs->gpr[4] = pos;
-    for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong)) {
-        get_user_ual(tmp, pos);
-    }
-    _regs->gpr[5] = pos;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       4096
-
-#endif
-
-#ifdef TARGET_MIPS
-
-#define ELF_START_MMAP 0x80000000
-
-#define elf_check_arch(x) ( (x) == EM_MIPS )
-
-#ifdef TARGET_MIPS64
-#define ELF_CLASS   ELFCLASS64
-#else
-#define ELF_CLASS   ELFCLASS32
-#endif
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATA        ELFDATA2MSB
-#else
-#define ELF_DATA        ELFDATA2LSB
-#endif
-#define ELF_ARCH    EM_MIPS
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->cp0_status = 2 << CP0St_KSU;
-    regs->cp0_epc = infop->entry;
-    regs->regs[29] = infop->start_stack;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE        4096
-
-#endif /* TARGET_MIPS */
-
-#ifdef TARGET_SH4
-
-#define ELF_START_MMAP 0x80000000
-
-#define elf_check_arch(x) ( (x) == EM_SH )
-
-#define ELF_CLASS ELFCLASS32
-#define ELF_DATA  ELFDATA2LSB
-#define ELF_ARCH  EM_SH
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-  /* Check other registers XXXXX */
-  regs->pc = infop->entry;
-  regs->regs[15] = infop->start_stack;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE        4096
-
-#endif
-
-#ifdef TARGET_CRIS
-
-#define ELF_START_MMAP 0x80000000
-
-#define elf_check_arch(x) ( (x) == EM_CRIS )
-
-#define ELF_CLASS ELFCLASS32
-#define ELF_DATA  ELFDATA2LSB
-#define ELF_ARCH  EM_CRIS
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-  regs->erp = infop->entry;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE        8192
-
-#endif
-
-#ifdef TARGET_M68K
-
-#define ELF_START_MMAP 0x80000000
-
-#define elf_check_arch(x) ( (x) == EM_68K )
-
-#define ELF_CLASS       ELFCLASS32
-#define ELF_DATA        ELFDATA2MSB
-#define ELF_ARCH        EM_68K
-
-/* ??? Does this need to do anything?
-#define ELF_PLAT_INIT(_r) */
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->usp = infop->start_stack;
-    regs->sr = 0;
-    regs->pc = infop->entry;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       8192
-
-#endif
-
-#ifdef TARGET_ALPHA
-
-#define ELF_START_MMAP (0x30000000000ULL)
-
-#define elf_check_arch(x) ( (x) == ELF_ARCH )
-
-#define ELF_CLASS      ELFCLASS64
-#define ELF_DATA       ELFDATA2MSB
-#define ELF_ARCH       EM_ALPHA
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
-{
-    regs->pc = infop->entry;
-    regs->ps = 8;
-    regs->usp = infop->start_stack;
-    regs->unique = infop->start_data; /* ? */
-    printf("Set unique value to " TARGET_FMT_lx " (" TARGET_FMT_lx ")\n",
-           regs->unique, infop->start_data);
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE        8192
-
-#endif /* TARGET_ALPHA */
-
-#ifndef ELF_PLATFORM
-#define ELF_PLATFORM (NULL)
-#endif
-
-#ifndef ELF_HWCAP
-#define ELF_HWCAP 0
-#endif
-
-#ifdef TARGET_ABI32
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-#undef bswaptls
-#define bswaptls(ptr) bswap32s(ptr)
-#endif
-
-#include "elf.h"
-
-struct exec
-{
-  unsigned int a_info;   /* Use macros N_MAGIC, etc for access */
-  unsigned int a_text;   /* length of text, in bytes */
-  unsigned int a_data;   /* length of data, in bytes */
-  unsigned int a_bss;    /* length of uninitialized data area, in bytes */
-  unsigned int a_syms;   /* length of symbol table data in file, in bytes */
-  unsigned int a_entry;  /* start address */
-  unsigned int a_trsize; /* length of relocation info for text, in bytes */
-  unsigned int a_drsize; /* length of relocation info for data, in bytes */
-};
-
-
-#define N_MAGIC(exec) ((exec).a_info & 0xffff)
-#define OMAGIC 0407
-#define NMAGIC 0410
-#define ZMAGIC 0413
-#define QMAGIC 0314
+#include "qemu/osdep.h"
 
-/* max code+data+bss space allocated to elf interpreter */
-#define INTERP_MAP_SIZE (32 * 1024 * 1024)
+#include "qemu.h"
+#include "disas/disas.h"
+#include "qemu/path.h"
 
-/* max code+data+bss+brk space allocated to ET_DYN executables */
-#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
+static abi_ulong target_auxents;   /* Where the AUX entries are in target */
+static size_t target_auxents_sz;   /* Size of AUX entries including AT_NULL */
 
-/* Necessary parameters */
-#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
-#define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE-1))
-#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
+#include "target_arch_reg.h"
+#include "target_os_elf.h"
+#include "target_os_stack.h"
+#include "target_os_thread.h"
+#include "target_os_user.h"
 
-#define INTERPRETER_NONE 0
-#define INTERPRETER_AOUT 1
-#define INTERPRETER_ELF 2
+abi_ulong target_stksiz;
+abi_ulong target_stkbas;
 
-#define DLINFO_ITEMS 12
+static int elf_core_dump(int signr, CPUArchState *env);
+static int load_elf_sections(const struct elfhdr *hdr, struct elf_phdr *phdr,
+    int fd, abi_ulong rbase, abi_ulong *baddrp);
 
-static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
+static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
 {
-        memcpy(to, from, n);
+    memcpy(to, from, n);
 }
 
-static int load_aout_interp(void * exptr, int interp_fd);
-
 #ifdef BSWAP_NEEDED
 static void bswap_ehdr(struct elfhdr *ehdr)
 {
-    bswap16s(&ehdr->e_type);                    /* Object file type */
+    bswap16s(&ehdr->e_type);            /* Object file type */
     bswap16s(&ehdr->e_machine);         /* Architecture */
     bswap32s(&ehdr->e_version);         /* Object file version */
     bswaptls(&ehdr->e_entry);           /* Entry point virtual address */
@@ -565,37 +55,45 @@ static void bswap_ehdr(struct elfhdr *ehdr)
     bswaptls(&ehdr->e_shoff);           /* Section header table file offset */
     bswap32s(&ehdr->e_flags);           /* Processor-specific flags */
     bswap16s(&ehdr->e_ehsize);          /* ELF header size in bytes */
-    bswap16s(&ehdr->e_phentsize);               /* Program header table entry size */
+    bswap16s(&ehdr->e_phentsize);       /* Program header table entry size */
     bswap16s(&ehdr->e_phnum);           /* Program header table entry count */
-    bswap16s(&ehdr->e_shentsize);               /* Section header table entry size */
+    bswap16s(&ehdr->e_shentsize);       /* Section header table entry size */
     bswap16s(&ehdr->e_shnum);           /* Section header table entry count */
-    bswap16s(&ehdr->e_shstrndx);                /* Section header string table index */
+    bswap16s(&ehdr->e_shstrndx);        /* Section header string table index */
 }
 
-static void bswap_phdr(struct elf_phdr *phdr)
+static void bswap_phdr(struct elf_phdr *phdr, int phnum)
 {
-    bswap32s(&phdr->p_type);                    /* Segment type */
-    bswaptls(&phdr->p_offset);          /* Segment file offset */
-    bswaptls(&phdr->p_vaddr);           /* Segment virtual address */
-    bswaptls(&phdr->p_paddr);           /* Segment physical address */
-    bswaptls(&phdr->p_filesz);          /* Segment size in file */
-    bswaptls(&phdr->p_memsz);           /* Segment size in memory */
-    bswap32s(&phdr->p_flags);           /* Segment flags */
-    bswaptls(&phdr->p_align);           /* Segment alignment */
+    int i;
+
+    for (i = 0; i < phnum; i++, phdr++) {
+        bswap32s(&phdr->p_type);        /* Segment type */
+        bswap32s(&phdr->p_flags);       /* Segment flags */
+        bswaptls(&phdr->p_offset);      /* Segment file offset */
+        bswaptls(&phdr->p_vaddr);       /* Segment virtual address */
+        bswaptls(&phdr->p_paddr);       /* Segment physical address */
+        bswaptls(&phdr->p_filesz);      /* Segment size in file */
+        bswaptls(&phdr->p_memsz);       /* Segment size in memory */
+        bswaptls(&phdr->p_align);       /* Segment alignment */
+    }
 }
 
-static void bswap_shdr(struct elf_shdr *shdr)
+static void bswap_shdr(struct elf_shdr *shdr, int shnum)
 {
-    bswap32s(&shdr->sh_name);
-    bswap32s(&shdr->sh_type);
-    bswaptls(&shdr->sh_flags);
-    bswaptls(&shdr->sh_addr);
-    bswaptls(&shdr->sh_offset);
-    bswaptls(&shdr->sh_size);
-    bswap32s(&shdr->sh_link);
-    bswap32s(&shdr->sh_info);
-    bswaptls(&shdr->sh_addralign);
-    bswaptls(&shdr->sh_entsize);
+    int i;
+
+    for (i = 0; i < shnum; i++, shdr++) {
+        bswap32s(&shdr->sh_name);
+        bswap32s(&shdr->sh_type);
+        bswaptls(&shdr->sh_flags);
+        bswaptls(&shdr->sh_addr);
+        bswaptls(&shdr->sh_offset);
+        bswaptls(&shdr->sh_size);
+        bswap32s(&shdr->sh_link);
+        bswap32s(&shdr->sh_info);
+        bswaptls(&shdr->sh_addralign);
+        bswaptls(&shdr->sh_entsize);
+    }
 }
 
 static void bswap_sym(struct elf_sym *sym)
@@ -605,7 +103,25 @@ static void bswap_sym(struct elf_sym *sym)
     bswaptls(&sym->st_size);
     bswap16s(&sym->st_shndx);
 }
-#endif
+
+static void bswap_note(struct elf_note *en)
+{
+    bswap32s(&en->n_namesz);
+    bswap32s(&en->n_descsz);
+    bswap32s(&en->n_type);
+}
+
+#else /* ! BSWAP_NEEDED */
+
+static void bswap_ehdr(struct elfhdr *ehdr) { }
+static void bswap_phdr(struct elf_phdr *phdr, int phnum) { }
+static void bswap_shdr(struct elf_shdr *shdr, int shnum) { }
+static void bswap_sym(struct elf_sym *sym) { }
+static void bswap_note(struct elf_note *en) { }
+
+#endif /* ! BSWAP_NEEDED */
+
+#include "elfcore.c"
 
 /*
  * 'copy_elf_strings()' copies argument/envelope strings from user
@@ -613,7 +129,7 @@ static void bswap_sym(struct elf_sym *sym)
  * to be put directly into the top of new user memory.
  *
  */
-static abi_ulong copy_elf_strings(int argc,char ** argv, void **page,
+static abi_ulong copy_elf_strings(int argc, char **argv, void **page,
                                   abi_ulong p)
 {
     char *tmp, *tmp1, *pag = NULL;
@@ -629,27 +145,29 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, void **page,
             exit(-1);
         }
         tmp1 = tmp;
-        while (*tmp++);
+        while (*tmp++) {
+            continue;
+        }
         len = tmp - tmp1;
         if (p < len) {  /* this shouldn't happen - 128kB */
-                return 0;
+            return 0;
         }
         while (len) {
             --p; --tmp; --len;
             if (--offset < 0) {
                 offset = p % TARGET_PAGE_SIZE;
-                pag = (char *)page[p/TARGET_PAGE_SIZE];
+                pag = (char *)page[p / TARGET_PAGE_SIZE];
                 if (!pag) {
                     pag = g_try_malloc0(TARGET_PAGE_SIZE);
-                    page[p/TARGET_PAGE_SIZE] = pag;
-                    if (!pag)
+                    page[p / TARGET_PAGE_SIZE] = pag;
+                    if (!pag) {
                         return 0;
+                    }
                 }
             }
             if (len == 0 || offset == 0) {
                 *(pag + offset) = *tmp;
-            }
-            else {
+            } else {
               int bytes_to_copy = (len > offset) ? offset : len;
               tmp -= bytes_to_copy;
               p -= bytes_to_copy;
@@ -662,331 +180,174 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, void **page,
     return p;
 }
 
-static abi_ulong setup_arg_pages(abi_ulong p, struct linux_binprm *bprm,
-                                 struct image_info *info)
+static void setup_arg_pages(struct bsd_binprm *bprm, struct image_info *info,
+                            abi_ulong *stackp, abi_ulong *stringp)
 {
-    abi_ulong stack_base, size, error;
-    int i;
+    abi_ulong stack_base, size;
+    abi_long addr;
 
-    /* Create enough stack to hold everything.  If we don't use
-     * it for args, we'll use it for something else...
+    /*
+     * Create enough stack to hold everything.  If we don't use it for args,
+     * we'll use it for something else...
      */
-    size = x86_stack_size;
-    if (size < MAX_ARG_PAGES*TARGET_PAGE_SIZE)
-        size = MAX_ARG_PAGES*TARGET_PAGE_SIZE;
-    error = target_mmap(0,
-                        size + qemu_host_page_size,
-                        PROT_READ | PROT_WRITE,
-                        MAP_PRIVATE | MAP_ANON,
-                        -1, 0);
-    if (error == -1) {
+    size = target_dflssiz;
+    stack_base = TARGET_USRSTACK - size;
+    addr = target_mmap(stack_base , size + qemu_host_page_size,
+            PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+    if (addr == -1) {
         perror("stk mmap");
         exit(-1);
     }
     /* we reserve one extra page at the top of the stack as guard */
-    target_mprotect(error + size, qemu_host_page_size, PROT_NONE);
+    target_mprotect(addr + size, qemu_host_page_size, PROT_NONE);
 
-    stack_base = error + size - MAX_ARG_PAGES*TARGET_PAGE_SIZE;
-    p += stack_base;
+    target_stksiz = size;
+    target_stkbas = addr;
 
-    for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-        if (bprm->page[i]) {
-            info->rss++;
-            /* FIXME - check return value of memcpy_to_target() for failure */
-            memcpy_to_target(stack_base, bprm->page[i], TARGET_PAGE_SIZE);
-            g_free(bprm->page[i]);
-        }
-        stack_base += TARGET_PAGE_SIZE;
+    if (setup_initial_stack(bprm, stackp, stringp) != 0) {
+        perror("stk setup");
+        exit(-1);
     }
-    return p;
 }
 
 static void set_brk(abi_ulong start, abi_ulong end)
 {
-        /* page-align the start and end addresses... */
-        start = HOST_PAGE_ALIGN(start);
-        end = HOST_PAGE_ALIGN(end);
-        if (end <= start)
-                return;
-        if(target_mmap(start, end - start,
-                       PROT_READ | PROT_WRITE | PROT_EXEC,
-                       MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
-            perror("cannot mmap brk");
-            exit(-1);
-        }
+    /* page-align the start and end addresses... */
+    start = HOST_PAGE_ALIGN(start);
+    end = HOST_PAGE_ALIGN(end);
+    if (end <= start) {
+        return;
+    }
+    if (target_mmap(start, end - start, PROT_READ | PROT_WRITE | PROT_EXEC,
+        MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
+        perror("cannot mmap brk");
+        exit(-1);
+    }
 }
 
 
-/* We need to explicitly zero any fractional pages after the data
-   section (i.e. bss).  This would contain the junk from the file that
-   should not be in memory. */
+/*
+ * We need to explicitly zero any fractional pages after the data
+ * section (i.e. bss).  This would contain the junk from the file that
+ * should not be in memory.
+ */
 static void padzero(abi_ulong elf_bss, abi_ulong last_bss)
 {
-        abi_ulong nbyte;
-
-        if (elf_bss >= last_bss)
-                return;
-
-        /* XXX: this is really a hack : if the real host page size is
-           smaller than the target page size, some pages after the end
-           of the file may not be mapped. A better fix would be to
-           patch target_mmap(), but it is more complicated as the file
-           size must be known */
-        if (qemu_real_host_page_size < qemu_host_page_size) {
-            abi_ulong end_addr, end_addr1;
-            end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss);
-            end_addr = HOST_PAGE_ALIGN(elf_bss);
-            if (end_addr1 < end_addr) {
-                mmap((void *)g2h_untagged(end_addr1), end_addr - end_addr1,
-                     PROT_READ|PROT_WRITE|PROT_EXEC,
-                     MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0);
-            }
-        }
-
-        nbyte = elf_bss & (qemu_host_page_size-1);
-        if (nbyte) {
-            nbyte = qemu_host_page_size - nbyte;
-            do {
-                /* FIXME - what to do if put_user() fails? */
-                put_user_u8(0, elf_bss);
-                elf_bss++;
-            } while (--nbyte);
-        }
-}
+    abi_ulong nbyte;
 
+    if (elf_bss >= last_bss) {
+        return;
+    }
 
-static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
-                                   struct elfhdr * exec,
-                                   abi_ulong load_addr,
-                                   abi_ulong load_bias,
-                                   abi_ulong interp_load_addr, int ibcs,
-                                   struct image_info *info)
-{
-        abi_ulong sp;
-        int size;
-        abi_ulong u_platform;
-        const char *k_platform;
-        const int n = sizeof(elf_addr_t);
-
-        sp = p;
-        u_platform = 0;
-        k_platform = ELF_PLATFORM;
-        if (k_platform) {
-            size_t len = strlen(k_platform) + 1;
-            sp -= (len + n - 1) & ~(n - 1);
-            u_platform = sp;
-            /* FIXME - check return value of memcpy_to_target() for failure */
-            memcpy_to_target(sp, k_platform, len);
+    /*
+     * XXX: this is really a hack : if the real host page size is
+     * smaller than the target page size, some pages after the end
+     * of the file may not be mapped. A better fix would be to
+     * patch target_mmap(), but it is more complicated as the file
+     * size must be known.
+     */
+    if (qemu_real_host_page_size < qemu_host_page_size) {
+        abi_ulong end_addr, end_addr1;
+        end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss);
+        end_addr = HOST_PAGE_ALIGN(elf_bss);
+        if (end_addr1 < end_addr) {
+            mmap((void *)g2h_untagged(end_addr1), end_addr - end_addr1,
+                 PROT_READ | PROT_WRITE | PROT_EXEC,
+                 MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
         }
-        /*
-         * Force 16 byte _final_ alignment here for generality.
-         */
-        sp = sp &~ (abi_ulong)15;
-        size = (DLINFO_ITEMS + 1) * 2;
-        if (k_platform)
-          size += 2;
-#ifdef DLINFO_ARCH_ITEMS
-        size += DLINFO_ARCH_ITEMS * 2;
-#endif
-        size += envc + argc + 2;
-        size += (!ibcs ? 3 : 1);        /* argc itself */
-        size *= n;
-        if (size & 15)
-            sp -= 16 - (size & 15);
-
-        /* This is correct because Linux defines
-         * elf_addr_t as Elf32_Off / Elf64_Off
-         */
-#define NEW_AUX_ENT(id, val) do {               \
-            sp -= n; put_user_ual(val, sp);     \
-            sp -= n; put_user_ual(id, sp);      \
-          } while(0)
-
-        NEW_AUX_ENT (AT_NULL, 0);
-
-        /* There must be exactly DLINFO_ITEMS entries here.  */
-        NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff));
-        NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr)));
-        NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
-        NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
-        NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr));
-        NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
-        NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
-        NEW_AUX_ENT(AT_UID, (abi_ulong) getuid());
-        NEW_AUX_ENT(AT_EUID, (abi_ulong) geteuid());
-        NEW_AUX_ENT(AT_GID, (abi_ulong) getgid());
-        NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid());
-        NEW_AUX_ENT(AT_HWCAP, (abi_ulong) ELF_HWCAP);
-        NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
-        if (k_platform)
-            NEW_AUX_ENT(AT_PLATFORM, u_platform);
-#ifdef ARCH_DLINFO
-        /*
-         * ARCH_DLINFO must come last so platform specific code can enforce
-         * special alignment requirements on the AUXV if necessary (eg. PPC).
-         */
-        ARCH_DLINFO;
-#endif
-#undef NEW_AUX_ENT
+    }
 
-        sp = loader_build_argptr(envc, argc, sp, p, !ibcs);
-        return sp;
+    nbyte = elf_bss & (qemu_host_page_size - 1);
+    if (nbyte) {
+        nbyte = qemu_host_page_size - nbyte;
+        do {
+            /* FIXME - what to do if put_user() fails? */
+            put_user_u8(0, elf_bss);
+            elf_bss++;
+        } while (--nbyte);
+    }
 }
 
-
-static abi_ulong load_elf_interp(struct elfhdr * interp_elf_ex,
+static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex,
                                  int interpreter_fd,
                                  abi_ulong *interp_load_addr)
 {
-        struct elf_phdr *elf_phdata  =  NULL;
-        struct elf_phdr *eppnt;
-        abi_ulong load_addr = 0;
-        int load_addr_set = 0;
-        int retval;
-        abi_ulong last_bss, elf_bss;
-        abi_ulong error;
-        int i;
+    struct elf_phdr *elf_phdata  =  NULL;
+    abi_ulong rbase;
+    int retval;
+    abi_ulong baddr, error;
 
-        elf_bss = 0;
-        last_bss = 0;
-        error = 0;
+    error = 0;
 
-#ifdef BSWAP_NEEDED
-        bswap_ehdr(interp_elf_ex);
-#endif
-        /* First of all, some simple consistency checks */
-        if ((interp_elf_ex->e_type != ET_EXEC &&
-             interp_elf_ex->e_type != ET_DYN) ||
-           !elf_check_arch(interp_elf_ex->e_machine)) {
-                return ~((abi_ulong)0UL);
-        }
+    bswap_ehdr(interp_elf_ex);
+    /* First of all, some simple consistency checks */
+    if ((interp_elf_ex->e_type != ET_EXEC && interp_elf_ex->e_type != ET_DYN) ||
+          !elf_check_arch(interp_elf_ex->e_machine)) {
+        return ~((abi_ulong)0UL);
+    }
 
 
-        /* Now read in all of the header information */
+    /* Now read in all of the header information */
+    if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE) {
+        return ~(abi_ulong)0UL;
+    }
 
-        if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE)
-            return ~(abi_ulong)0UL;
+    elf_phdata =  (struct elf_phdr *) malloc(sizeof(struct elf_phdr) *
+            interp_elf_ex->e_phnum);
 
-        elf_phdata =  (struct elf_phdr *)
-                malloc(sizeof(struct elf_phdr) * interp_elf_ex->e_phnum);
+    if (!elf_phdata) {
+        return ~((abi_ulong)0UL);
+    }
 
-        if (!elf_phdata)
-          return ~((abi_ulong)0UL);
+    /*
+     * If the size of this structure has changed, then punt, since
+     * we will be doing the wrong thing.
+     */
+    if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) {
+        free(elf_phdata);
+        return ~((abi_ulong)0UL);
+    }
+
+    retval = lseek(interpreter_fd, interp_elf_ex->e_phoff, SEEK_SET);
+    if (retval >= 0) {
+        retval = read(interpreter_fd, (char *) elf_phdata,
+                sizeof(struct elf_phdr) * interp_elf_ex->e_phnum);
+    }
+    if (retval < 0) {
+        perror("load_elf_interp");
+        exit(-1);
+        free(elf_phdata);
+        return retval;
+    }
+    bswap_phdr(elf_phdata, interp_elf_ex->e_phnum);
 
+    rbase = 0;
+    if (interp_elf_ex->e_type == ET_DYN) {
         /*
-         * If the size of this structure has changed, then punt, since
-         * we will be doing the wrong thing.
+         * In order to avoid hardcoding the interpreter load
+         * address in qemu, we allocate a big enough memory zone.
          */
-        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr)) {
-            free(elf_phdata);
-            return ~((abi_ulong)0UL);
-        }
-
-        retval = lseek(interpreter_fd, interp_elf_ex->e_phoff, SEEK_SET);
-        if(retval >= 0) {
-            retval = read(interpreter_fd,
-                           (char *) elf_phdata,
-                           sizeof(struct elf_phdr) * interp_elf_ex->e_phnum);
-        }
-        if (retval < 0) {
-                perror("load_elf_interp");
-                exit(-1);
-                free (elf_phdata);
-                return retval;
-        }
-#ifdef BSWAP_NEEDED
-        eppnt = elf_phdata;
-        for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
-            bswap_phdr(eppnt);
-        }
-#endif
-
-        if (interp_elf_ex->e_type == ET_DYN) {
-            /* in order to avoid hardcoding the interpreter load
-               address in qemu, we allocate a big enough memory zone */
-            error = target_mmap(0, INTERP_MAP_SIZE,
-                                PROT_NONE, MAP_PRIVATE | MAP_ANON,
-                                -1, 0);
-            if (error == -1) {
-                perror("mmap");
-                exit(-1);
-            }
-            load_addr = error;
-            load_addr_set = 1;
+        rbase = target_mmap(0, INTERP_MAP_SIZE, PROT_NONE,
+                MAP_PRIVATE | MAP_ANON, -1, 0);
+        if (rbase == -1) {
+            perror("mmap");
+            exit(-1);
         }
+    }
 
-        eppnt = elf_phdata;
-        for(i=0; i<interp_elf_ex->e_phnum; i++, eppnt++)
-          if (eppnt->p_type == PT_LOAD) {
-            int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-            int elf_prot = 0;
-            abi_ulong vaddr = 0;
-            abi_ulong k;
-
-            if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
-            if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-            if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-            if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) {
-                elf_type |= MAP_FIXED;
-                vaddr = eppnt->p_vaddr;
-            }
-            error = target_mmap(load_addr+TARGET_ELF_PAGESTART(vaddr),
-                 eppnt->p_filesz + TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr),
-                 elf_prot,
-                 elf_type,
-                 interpreter_fd,
-                 eppnt->p_offset - TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr));
-
-            if (error == -1) {
-              /* Real error */
-              close(interpreter_fd);
-              free(elf_phdata);
-              return ~((abi_ulong)0UL);
-            }
-
-            if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
-              load_addr = error;
-              load_addr_set = 1;
-            }
-
-            /*
-             * Find the end of the file  mapping for this phdr, and keep
-             * track of the largest address we see for this.
-             */
-            k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
-            if (k > elf_bss) elf_bss = k;
-
-            /*
-             * Do the same thing for the memory mapping - between
-             * elf_bss and last_bss is the bss section.
-             */
-            k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
-            if (k > last_bss) last_bss = k;
-          }
-
-        /* Now use mmap to map the library into memory. */
-
-        close(interpreter_fd);
+    error = load_elf_sections(interp_elf_ex, elf_phdata, interpreter_fd, rbase,
+        &baddr);
+    if (error != 0) {
+        perror("load_elf_sections");
+        exit(-1);
+    }
 
-        /*
-         * Now fill out the bss section.  First pad the last page up
-         * to the page boundary, and then perform a mmap to make sure
-         * that there are zeromapped pages up to and including the last
-         * bss page.
-         */
-        padzero(elf_bss, last_bss);
-        elf_bss = TARGET_ELF_PAGESTART(elf_bss + qemu_host_page_size - 1); /* What we have mapped so far */
-
-        /* Map the last of the bss segment */
-        if (last_bss > elf_bss) {
-            target_mmap(elf_bss, last_bss-elf_bss,
-                        PROT_READ|PROT_WRITE|PROT_EXEC,
-                        MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0);
-        }
-        free(elf_phdata);
+    /* Now use mmap to map the library into memory. */
+    close(interpreter_fd);
+    free(elf_phdata);
 
-        *interp_load_addr = load_addr;
-        return ((abi_ulong) interp_elf_ex->e_entry) + load_addr;
+    *interp_load_addr = baddr;
+    return ((abi_ulong) interp_elf_ex->e_entry) + rbase;
 }
 
 static int symfind(const void *s0, const void *s1)
@@ -1010,7 +371,7 @@ static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
     struct elf_sym *syms = s->disas_symtab.elf64;
 #endif
 
-    // binary search
+    /* binary search */
     struct elf_sym *sym;
 
     sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms), symfind);
@@ -1026,9 +387,8 @@ static int symcmp(const void *s0, const void *s1)
 {
     struct elf_sym *sym0 = (struct elf_sym *)s0;
     struct elf_sym *sym1 = (struct elf_sym *)s1;
-    return (sym0->st_value < sym1->st_value)
-        ? -1
-        : ((sym0->st_value > sym1->st_value) ? 1 : 0);
+    return (sym0->st_value < sym1->st_value) ? -1 :
+        ((sym0->st_value > sym1->st_value) ? 1 : 0);
 }
 
 /* Best attempt to load symbols from this ELF object. */
@@ -1042,27 +402,24 @@ static void load_symbols(struct elfhdr *hdr, int fd)
 
     lseek(fd, hdr->e_shoff, SEEK_SET);
     for (i = 0; i < hdr->e_shnum; i++) {
-        if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr))
+        if (read(fd, &sechdr, sizeof(sechdr)) != sizeof(sechdr)) {
             return;
-#ifdef BSWAP_NEEDED
-        bswap_shdr(&sechdr);
-#endif
+        }
+        bswap_shdr(&sechdr, 1);
         if (sechdr.sh_type == SHT_SYMTAB) {
             symtab = sechdr;
-            lseek(fd, hdr->e_shoff
-                  + sizeof(sechdr) * sechdr.sh_link, SEEK_SET);
-            if (read(fd, &strtab, sizeof(strtab))
-                != sizeof(strtab))
+            lseek(fd, hdr->e_shoff + sizeof(sechdr) * sechdr.sh_link,
+                  SEEK_SET);
+            if (read(fd, &strtab, sizeof(strtab)) != sizeof(strtab)) {
                 return;
-#ifdef BSWAP_NEEDED
-            bswap_shdr(&strtab);
-#endif
+            }
+            bswap_shdr(&strtab, 1);
             goto found;
         }
     }
     return; /* Shouldn't happen... */
 
- found:
+found:
     /* Now know where the strtab and symtab are.  Snarf them. */
     s = malloc(sizeof(*s));
     syms = malloc(symtab.sh_size);
@@ -1089,10 +446,8 @@ static void load_symbols(struct elfhdr *hdr, int fd)
 
     i = 0;
     while (i < nsyms) {
-#ifdef BSWAP_NEEDED
         bswap_sym(syms + i);
-#endif
-        // Throw away entries which we do not need.
+        /* Throw away entries which we do not need. */
         if (syms[i].st_shndx == SHN_UNDEF ||
                 syms[i].st_shndx >= SHN_LORESERVE ||
                 ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) {
@@ -1102,17 +457,19 @@ static void load_symbols(struct elfhdr *hdr, int fd)
             }
             continue;
         }
-#if defined(TARGET_ARM) || defined (TARGET_MIPS)
+#if defined(TARGET_ARM) || defined(TARGET_MIPS)
         /* The bottom address bit marks a Thumb or MIPS16 symbol.  */
         syms[i].st_value &= ~(target_ulong)1;
 #endif
         i++;
     }
 
-     /* Attempt to free the storage associated with the local symbols
-        that we threw away.  Whether or not this has any effect on the
-        memory allocation depends on the malloc implementation and how
-        many symbols we managed to discard. */
+     /*
+      * Attempt to free the storage associated with the local symbols
+      * that we threw away.  Whether or not this has any effect on the
+      * memory allocation depends on the malloc implementation and how
+      * many symbols we managed to discard.
+      */
     new_syms = realloc(syms, nsyms * sizeof(*syms));
     if (new_syms == NULL) {
         free(s);
@@ -1143,142 +500,191 @@ static void load_symbols(struct elfhdr *hdr, int fd)
     syminfos = s;
 }
 
-int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info)
+/* Check the elf header and see if this a target elf binary. */
+int is_target_elf_binary(int fd)
+{
+    uint8_t buf[128];
+    struct elfhdr elf_ex;
+
+    if (lseek(fd, 0L, SEEK_SET) < 0) {
+        return 0;
+    }
+    if (read(fd, buf, sizeof(buf)) < 0) {
+        return 0;
+    }
+
+    elf_ex = *((struct elfhdr *)buf);
+    bswap_ehdr(&elf_ex);
+
+    if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
+        (!elf_check_arch(elf_ex.e_machine))) {
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+static int
+load_elf_sections(const struct elfhdr *hdr, struct elf_phdr *phdr, int fd,
+    abi_ulong rbase, abi_ulong *baddrp)
+{
+    struct elf_phdr *elf_ppnt;
+    abi_ulong baddr;
+    int i;
+    bool first;
+
+    /*
+     * Now we do a little grungy work by mmaping the ELF image into
+     * the correct location in memory.  At this point, we assume that
+     * the image should be loaded at fixed address, not at a variable
+     * address.
+     */
+    first = true;
+    for (i = 0, elf_ppnt = phdr; i < hdr->e_phnum; i++, elf_ppnt++) {
+        int elf_prot = 0;
+        abi_ulong error;
+
+        /* XXX Skip memsz == 0. */
+        if (elf_ppnt->p_type != PT_LOAD) {
+            continue;
+        }
+
+        if (elf_ppnt->p_flags & PF_R) {
+            elf_prot |= PROT_READ;
+        }
+        if (elf_ppnt->p_flags & PF_W) {
+            elf_prot |= PROT_WRITE;
+        }
+        if (elf_ppnt->p_flags & PF_X) {
+            elf_prot |= PROT_EXEC;
+        }
+
+        error = target_mmap(TARGET_ELF_PAGESTART(rbase + elf_ppnt->p_vaddr),
+                            (elf_ppnt->p_filesz +
+                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
+                            elf_prot,
+                            (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE),
+                            fd,
+                            (elf_ppnt->p_offset -
+                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)));
+        if (error == -1) {
+            perror("mmap");
+            exit(-1);
+        } else if (elf_ppnt->p_memsz != elf_ppnt->p_filesz) {
+            abi_ulong start_bss, end_bss;
+
+            start_bss = rbase + elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
+            end_bss = rbase + elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
+
+            /*
+             * Calling set_brk effectively mmaps the pages that we need for the
+             * bss and break sections.
+             */
+            set_brk(start_bss, end_bss);
+            padzero(start_bss, end_bss);
+        }
+
+        if (first) {
+            baddr = TARGET_ELF_PAGESTART(rbase + elf_ppnt->p_vaddr);
+            first = false;
+        }
+    }
+
+    if (baddrp != NULL) {
+        *baddrp = baddr;
+    }
+    return 0;
+}
+
+int load_elf_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs,
+                    struct image_info *info)
 {
     struct elfhdr elf_ex;
     struct elfhdr interp_elf_ex;
-    struct exec interp_ex;
     int interpreter_fd = -1; /* avoid warning */
-    abi_ulong load_addr, load_bias;
-    int load_addr_set = 0;
-    unsigned int interpreter_type = INTERPRETER_NONE;
-    unsigned char ibcs2_interpreter;
+    abi_ulong load_addr;
     int i;
-    struct elf_phdr * elf_ppnt;
+    struct elf_phdr *elf_ppnt;
     struct elf_phdr *elf_phdata;
-    abi_ulong elf_bss, k, elf_brk;
-    int retval;
-    char * elf_interpreter;
-    abi_ulong elf_entry, interp_load_addr = 0;
-    abi_ulong start_code, end_code, start_data, end_data;
+    abi_ulong elf_brk;
+    int error, retval;
+    char *elf_interpreter;
+    abi_ulong baddr, elf_entry, et_dyn_addr, interp_load_addr = 0;
     abi_ulong reloc_func_desc = 0;
-#ifdef LOW_ELF_STACK
-    abi_ulong elf_stack = ~((abi_ulong)0UL);
-#endif
-    char passed_fileno[6];
 
-    ibcs2_interpreter = 0;
     load_addr = 0;
-    load_bias = 0;
     elf_ex = *((struct elfhdr *) bprm->buf);          /* exec-header */
-#ifdef BSWAP_NEEDED
     bswap_ehdr(&elf_ex);
-#endif
 
     /* First of all, some simple consistency checks */
     if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
-                                (! elf_check_arch(elf_ex.e_machine))) {
+        (!elf_check_arch(elf_ex.e_machine))) {
             return -ENOEXEC;
     }
 
     bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p);
-    bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p);
-    bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p);
+    bprm->p = copy_elf_strings(bprm->envc, bprm->envp, bprm->page, bprm->p);
+    bprm->p = copy_elf_strings(bprm->argc, bprm->argv, bprm->page, bprm->p);
     if (!bprm->p) {
         retval = -E2BIG;
     }
 
     /* Now read in all of the header information */
-    elf_phdata = (struct elf_phdr *)malloc(elf_ex.e_phentsize*elf_ex.e_phnum);
+    elf_phdata = (struct elf_phdr *)malloc(elf_ex.e_phentsize * elf_ex.e_phnum);
     if (elf_phdata == NULL) {
         return -ENOMEM;
     }
 
     retval = lseek(bprm->fd, elf_ex.e_phoff, SEEK_SET);
-    if(retval > 0) {
-        retval = read(bprm->fd, (char *) elf_phdata,
+    if (retval > 0) {
+        retval = read(bprm->fd, (char *)elf_phdata,
                                 elf_ex.e_phentsize * elf_ex.e_phnum);
     }
 
     if (retval < 0) {
         perror("load_elf_binary");
         exit(-1);
-        free (elf_phdata);
+        free(elf_phdata);
         return -errno;
     }
 
-#ifdef BSWAP_NEEDED
-    elf_ppnt = elf_phdata;
-    for (i=0; i<elf_ex.e_phnum; i++, elf_ppnt++) {
-        bswap_phdr(elf_ppnt);
-    }
-#endif
+    bswap_phdr(elf_phdata, elf_ex.e_phnum);
     elf_ppnt = elf_phdata;
 
-    elf_bss = 0;
     elf_brk = 0;
 
 
     elf_interpreter = NULL;
-    start_code = ~((abi_ulong)0UL);
-    end_code = 0;
-    start_data = 0;
-    end_data = 0;
-    interp_ex.a_info = 0;
-
-    for(i=0;i < elf_ex.e_phnum; i++) {
+    for (i = 0; i < elf_ex.e_phnum; i++) {
         if (elf_ppnt->p_type == PT_INTERP) {
-            if ( elf_interpreter != NULL )
-            {
-                free (elf_phdata);
+            if (elf_interpreter != NULL) {
+                free(elf_phdata);
                 free(elf_interpreter);
                 close(bprm->fd);
                 return -EINVAL;
             }
 
-            /* This is the program interpreter used for
-             * shared libraries - for now assume that this
-             * is an a.out format binary
-             */
-
             elf_interpreter = (char *)malloc(elf_ppnt->p_filesz);
-
             if (elf_interpreter == NULL) {
-                free (elf_phdata);
+                free(elf_phdata);
                 close(bprm->fd);
                 return -ENOMEM;
             }
 
             retval = lseek(bprm->fd, elf_ppnt->p_offset, SEEK_SET);
-            if(retval >= 0) {
+            if (retval >= 0) {
                 retval = read(bprm->fd, elf_interpreter, elf_ppnt->p_filesz);
             }
-            if(retval < 0) {
+            if (retval < 0) {
                 perror("load_elf_binary2");
                 exit(-1);
             }
 
-            /* If the program interpreter is one of these two,
-               then assume an iBCS2 image. Otherwise assume
-               a native linux image. */
-
-            /* JRP - Need to add X86 lib dir stuff here... */
-
-            if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
-                strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0) {
-              ibcs2_interpreter = 1;
-            }
-
-#if 0
-            printf("Using ELF interpreter %s\n", path(elf_interpreter));
-#endif
             if (retval >= 0) {
                 retval = open(path(elf_interpreter), O_RDONLY);
-                if(retval >= 0) {
+                if (retval >= 0) {
                     interpreter_fd = retval;
-                }
-                else {
+                } else {
                     perror(elf_interpreter);
                     exit(-1);
                     /* retval = -errno; */
@@ -1287,18 +693,17 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
 
             if (retval >= 0) {
                 retval = lseek(interpreter_fd, 0, SEEK_SET);
-                if(retval >= 0) {
-                    retval = read(interpreter_fd,bprm->buf,128);
+                if (retval >= 0) {
+                    retval = read(interpreter_fd, bprm->buf, 128);
                 }
             }
             if (retval >= 0) {
-                interp_ex = *((struct exec *) bprm->buf); /* aout exec-header */
-                interp_elf_ex = *((struct elfhdr *) bprm->buf); /* elf exec-header */
+                interp_elf_ex = *((struct elfhdr *) bprm->buf);
             }
             if (retval < 0) {
                 perror("load_elf_binary3");
                 exit(-1);
-                free (elf_phdata);
+                free(elf_phdata);
                 free(elf_interpreter);
                 close(bprm->fd);
                 return retval;
@@ -1308,21 +713,9 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
     }
 
     /* Some simple consistency checks for the interpreter */
-    if (elf_interpreter){
-        interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
-
-        /* Now figure out which format our binary is */
-        if ((N_MAGIC(interp_ex) != OMAGIC) && (N_MAGIC(interp_ex) != ZMAGIC) &&
-                (N_MAGIC(interp_ex) != QMAGIC)) {
-          interpreter_type = INTERPRETER_ELF;
-        }
-
+    if (elf_interpreter) {
         if (interp_elf_ex.e_ident[0] != 0x7f ||
-                strncmp((char *)&interp_elf_ex.e_ident[1], "ELF",3) != 0) {
-            interpreter_type &= ~INTERPRETER_ELF;
-        }
-
-        if (!interpreter_type) {
+            strncmp((char *)&interp_elf_ex.e_ident[1], "ELF", 3) != 0) {
             free(elf_interpreter);
             free(elf_phdata);
             close(bprm->fd);
@@ -1330,27 +723,15 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
         }
     }
 
-    /* OK, we are done with that, now set up the arg stuff,
-       and then start this sucker up */
-
-    {
-        char * passed_p;
-
-        if (interpreter_type == INTERPRETER_AOUT) {
-            snprintf(passed_fileno, sizeof(passed_fileno), "%d", bprm->fd);
-            passed_p = passed_fileno;
-
-            if (elf_interpreter) {
-                bprm->p = copy_elf_strings(1,&passed_p,bprm->page,bprm->p);
-                bprm->argc++;
-            }
-        }
-        if (!bprm->p) {
-            free(elf_interpreter);
-            free (elf_phdata);
-            close(bprm->fd);
-            return -E2BIG;
-        }
+    /*
+     * OK, we are done with that, now set up the arg stuff, and then start this
+     * sucker up
+     */
+    if (!bprm->p) {
+        free(elf_interpreter);
+        free(elf_phdata);
+        close(bprm->fd);
+        return -E2BIG;
     }
 
     /* OK, This is the point of no return */
@@ -1360,129 +741,49 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
     info->mmap = 0;
     elf_entry = (abi_ulong) elf_ex.e_entry;
 
-    /*
-     * In case where user has not explicitly set the guest_base, we
-     * probe here that should we set it automatically.
-     */
-    if (!have_guest_base) {
-        /*
-         * Go through ELF program header table and find out whether
-         * any of the segments drop below our current mmap_min_addr and
-         * in that case set guest_base to corresponding address.
-         */
-        for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum;
-            i++, elf_ppnt++) {
-            if (elf_ppnt->p_type != PT_LOAD)
-                continue;
-            if (HOST_PAGE_ALIGN(elf_ppnt->p_vaddr) < mmap_min_addr) {
-                guest_base = HOST_PAGE_ALIGN(mmap_min_addr);
-                break;
-            }
+    /* XXX Join this with PT_INTERP search? */
+    baddr = 0;
+    for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
+        if (elf_ppnt->p_type != PT_LOAD) {
+            continue;
         }
+        baddr = elf_ppnt->p_vaddr;
+        break;
     }
 
-    /* Do this so that we can load the interpreter, if need be.  We will
-       change some of these later */
-    info->rss = 0;
-    bprm->p = setup_arg_pages(bprm->p, bprm, info);
-    info->start_stack = bprm->p;
+    et_dyn_addr = 0;
+    if (elf_ex.e_type == ET_DYN && baddr == 0) {
+        et_dyn_addr = ELF_ET_DYN_LOAD_ADDR;
+    }
 
-    /* Now we do a little grungy work by mmaping the ELF image into
-     * the correct location in memory.  At this point, we assume that
-     * the image should be loaded at fixed address, not at a variable
-     * address.
+    /*
+     * Do this so that we can load the interpreter, if need be.  We will
+     * change some of these later
      */
+    info->rss = 0;
+    setup_arg_pages(bprm, info, &bprm->p, &bprm->stringp);
+    info->start_stack = bprm->p;
 
-    for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
-        int elf_prot = 0;
-        int elf_flags = 0;
-        abi_ulong error;
+    info->elf_flags = elf_ex.e_flags;
 
-        if (elf_ppnt->p_type != PT_LOAD)
+    error = load_elf_sections(&elf_ex, elf_phdata, bprm->fd, et_dyn_addr,
+        &load_addr);
+    for (i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
+        if (elf_ppnt->p_type != PT_LOAD) {
             continue;
-
-        if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
-        if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-        if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-        elf_flags = MAP_PRIVATE | MAP_DENYWRITE;
-        if (elf_ex.e_type == ET_EXEC || load_addr_set) {
-            elf_flags |= MAP_FIXED;
-        } else if (elf_ex.e_type == ET_DYN) {
-            /* Try and get dynamic programs out of the way of the default mmap
-               base, as well as whatever program they might try to exec.  This
-               is because the brk will follow the loader, and is not movable.  */
-            /* NOTE: for qemu, we do a big mmap to get enough space
-               without hardcoding any address */
-            error = target_mmap(0, ET_DYN_MAP_SIZE,
-                                PROT_NONE, MAP_PRIVATE | MAP_ANON,
-                                -1, 0);
-            if (error == -1) {
-                perror("mmap");
-                exit(-1);
-            }
-            load_bias = TARGET_ELF_PAGESTART(error - elf_ppnt->p_vaddr);
         }
-
-        error = target_mmap(TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr),
-                            (elf_ppnt->p_filesz +
-                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
-                            elf_prot,
-                            (MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE),
-                            bprm->fd,
-                            (elf_ppnt->p_offset -
-                             TARGET_ELF_PAGEOFFSET(elf_ppnt->p_vaddr)));
-        if (error == -1) {
-            perror("mmap");
-            exit(-1);
-        }
-
-#ifdef LOW_ELF_STACK
-        if (TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack)
-            elf_stack = TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr);
-#endif
-
-        if (!load_addr_set) {
-            load_addr_set = 1;
-            load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset;
-            if (elf_ex.e_type == ET_DYN) {
-                load_bias += error -
-                    TARGET_ELF_PAGESTART(load_bias + elf_ppnt->p_vaddr);
-                load_addr += load_bias;
-                reloc_func_desc = load_bias;
-            }
-        }
-        k = elf_ppnt->p_vaddr;
-        if (k < start_code)
-            start_code = k;
-        if (start_data < k)
-            start_data = k;
-        k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
-        if (k > elf_bss)
-            elf_bss = k;
-        if ((elf_ppnt->p_flags & PF_X) && end_code <  k)
-            end_code = k;
-        if (end_data < k)
-            end_data = k;
-        k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
-        if (k > elf_brk) elf_brk = k;
+        if (elf_ppnt->p_memsz > elf_ppnt->p_filesz)
+            elf_brk = MAX(elf_brk, et_dyn_addr + elf_ppnt->p_vaddr +
+                elf_ppnt->p_memsz);
+    }
+    if (error != 0) {
+        perror("load_elf_sections");
+        exit(-1);
     }
-
-    elf_entry += load_bias;
-    elf_bss += load_bias;
-    elf_brk += load_bias;
-    start_code += load_bias;
-    end_code += load_bias;
-    start_data += load_bias;
-    end_data += load_bias;
 
     if (elf_interpreter) {
-        if (interpreter_type & 1) {
-            elf_entry = load_aout_interp(&interp_ex, interpreter_fd);
-        }
-        else if (interpreter_type & 2) {
-            elf_entry = load_elf_interp(&interp_elf_ex, interpreter_fd,
-                                            &interp_load_addr);
-        }
+        elf_entry = load_elf_interp(&interp_elf_ex, interpreter_fd,
+                                    &interp_load_addr);
         reloc_func_desc = interp_load_addr;
 
         close(interpreter_fd);
@@ -1494,72 +795,40 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
             exit(-1);
             return 0;
         }
+    } else {
+        interp_load_addr = et_dyn_addr;
+        elf_entry += interp_load_addr;
     }
 
     free(elf_phdata);
 
-    if (qemu_log_enabled())
+    if (qemu_log_enabled()) {
         load_symbols(&elf_ex, bprm->fd);
+    }
 
-    if (interpreter_type != INTERPRETER_AOUT) close(bprm->fd);
-    info->personality = (ibcs2_interpreter ? PER_SVR4 : PER_LINUX);
+    close(bprm->fd);
 
-#ifdef LOW_ELF_STACK
-    info->start_stack = bprm->p = elf_stack - 4;
-#endif
-    bprm->p = create_elf_tables(bprm->p,
-                    bprm->argc,
-                    bprm->envc,
-                    &elf_ex,
-                    load_addr, load_bias,
-                    interp_load_addr,
-                    (interpreter_type == INTERPRETER_AOUT ? 0 : 1),
-                    info);
+    bprm->p = target_create_elf_tables(bprm->p, bprm->argc, bprm->envc,
+                                       bprm->stringp, &elf_ex, load_addr,
+                                       et_dyn_addr, interp_load_addr, info);
     info->load_addr = reloc_func_desc;
     info->start_brk = info->brk = elf_brk;
-    info->end_code = end_code;
-    info->start_code = start_code;
-    info->start_data = start_data;
-    info->end_data = end_data;
     info->start_stack = bprm->p;
+    info->load_bias = 0;
 
-    /* Calling set_brk effectively mmaps the pages that we need for the bss and break
-       sections */
-    set_brk(elf_bss, elf_brk);
-
-    padzero(elf_bss, elf_brk);
+    info->entry = elf_entry;
 
-#if 0
-    printf("(start_brk) %x\n" , info->start_brk);
-    printf("(end_code) %x\n" , info->end_code);
-    printf("(start_code) %x\n" , info->start_code);
-    printf("(end_data) %x\n" , info->end_data);
-    printf("(start_stack) %x\n" , info->start_stack);
-    printf("(brk) %x\n" , info->brk);
+#ifdef USE_ELF_CORE_DUMP
+    bprm->core_dump = &elf_core_dump;
+#else
+    bprm->core_dump = NULL;
 #endif
 
-    if ( info->personality == PER_SVR4 )
-    {
-            /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
-               and some applications "depend" upon this behavior.
-               Since we do not have the power to recompile these, we
-               emulate the SVr4 behavior.  Sigh.  */
-            target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC,
-                                      MAP_FIXED | MAP_PRIVATE, -1, 0);
-    }
-
-    info->entry = elf_entry;
-
     return 0;
 }
 
-static int load_aout_interp(void * exptr, int interp_fd)
-{
-    printf("a.out interpreter not yet supported\n");
-    return(0);
-}
-
 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop)
 {
-    init_thread(regs, infop);
+
+    target_thread_init(regs, infop);
 }
diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h
index 1efa502a129..832671354fd 100644
--- a/bsd-user/errno_defs.h
+++ b/bsd-user/errno_defs.h
@@ -1,6 +1,3 @@
-/*      $OpenBSD: errno.h,v 1.20 2007/09/03 14:37:52 millert Exp $      */
-/*      $NetBSD: errno.h,v 1.10 1996/01/20 01:33:53 jtc Exp $   */
-
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *      The Regents of the University of California.  All rights reserved.
@@ -37,6 +34,9 @@
  *      @(#)errno.h     8.5 (Berkeley) 1/21/94
  */
 
+#ifndef _ERRNO_DEFS_H_
+#define _ERRNO_DEFS_H_
+
 #define TARGET_EPERM            1               /* Operation not permitted */
 #define TARGET_ENOENT           2               /* No such file or directory */
 #define TARGET_ESRCH            3               /* No such process */
@@ -147,3 +147,10 @@
 #define TARGET_EIDRM            89              /* Identifier removed */
 #define TARGET_ENOMSG           90              /* No message of desired type */
 #define TARGET_ELAST            90              /* Must be equal largest errno */
+
+/* Internal errors: */
+#define TARGET_EJUSTRETURN      254             /* Just return without modifing regs */
+#define TARGET_ERESTART         255             /* Restart syscall */
+#define TARGET_ERESTARTSYS      TARGET_ERESTART /* Linux compat */
+
+#endif /* !  _ERRNO_DEFS_H_ */
diff --git a/bsd-user/freebsd/host-os.h b/bsd-user/freebsd/host-os.h
new file mode 100644
index 00000000000..dfb8344b7b6
--- /dev/null
+++ b/bsd-user/freebsd/host-os.h
@@ -0,0 +1,25 @@
+/*
+ *  FreeBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HOST_OS_H_
+#define _HOST_OS_H_
+
+#define HOST_DEFAULT_BSD_TYPE target_freebsd
+
+#endif /*!_HOST_OS_H_ */
diff --git a/bsd-user/freebsd/meson.build b/bsd-user/freebsd/meson.build
new file mode 100644
index 00000000000..4b69cca7b90
--- /dev/null
+++ b/bsd-user/freebsd/meson.build
@@ -0,0 +1,3 @@
+bsd_user_ss.add(files(
+  'os-sys.c',
+))
diff --git a/bsd-user/freebsd/os-sys.c b/bsd-user/freebsd/os-sys.c
new file mode 100644
index 00000000000..309e27b9d63
--- /dev/null
+++ b/bsd-user/freebsd/os-sys.c
@@ -0,0 +1,27 @@
+/*
+ *  FreeBSD sysctl() and sysarch() system call emulation
+ *
+ *  Copyright (c) 2013-15 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu.h"
+#include "target_arch_sysarch.h"
+
+/* sysarch() is architecture dependent. */
+abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2)
+{
+    return do_freebsd_arch_sysarch(cpu_env, arg1, arg2);
+}
diff --git a/bsd-user/freebsd/strace.list b/bsd-user/freebsd/strace.list
index b01b5f36e88..275d2dbe274 100644
--- a/bsd-user/freebsd/strace.list
+++ b/bsd-user/freebsd/strace.list
@@ -33,10 +33,6 @@
 { TARGET_FREEBSD_NR___syscall, "__syscall", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR___sysctl, "__sysctl", NULL, print_sysctl, NULL },
 { TARGET_FREEBSD_NR__umtx_op, "_umtx_op", "%s(%#x, %d, %d, %#x, %#x)", NULL, NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000
-{ TARGET_FREEBSD_NR__umtx_lock, "__umtx_lock", NULL, NULL, NULL },
-{ TARGET_FREEBSD_NR__umtx_unlock, "__umtx_unlock", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_accept, "accept", "%s(%d,%#x,%#x)", NULL, NULL },
 { TARGET_FREEBSD_NR_accept4, "accept4", "%s(%d,%d,%#x,%#x)", NULL, NULL },
 { TARGET_FREEBSD_NR_access, "access", "%s(\"%s\",%#o)", NULL, NULL },
@@ -49,10 +45,6 @@
 { TARGET_FREEBSD_NR_cap_fcntls_get, "cap_fcntls_get", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_fcntls_limit, "cap_fcntls_limit", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_getmode, "cap_getmode", NULL, NULL, NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000
-{ TARGET_FREEBSD_NR_cap_getrights, "cap_getrights", NULL, NULL, NULL },
-{ TARGET_FREEBSD_NR_cap_new, "cap_new", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_cap_ioctls_get, "cap_ioctls_get", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_ioctls_limit, "cap_ioctls_limit", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_rights_limit, "cap_rights_limit", NULL, NULL, NULL },
@@ -146,9 +138,6 @@
 { TARGET_FREEBSD_NR_freebsd11_kevent, "freebsd11_kevent", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_kevent, "kevent", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_kill, "kill", NULL, NULL, NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 1000000
-{ TARGET_FREEBSD_NR_killpg, "killpg", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_kqueue, "kqueue", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_ktrace, "ktrace", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_lchown, "lchown", NULL, NULL, NULL },
diff --git a/bsd-user/freebsd/target_os_elf.h b/bsd-user/freebsd/target_os_elf.h
new file mode 100644
index 00000000000..e5ac8e8e501
--- /dev/null
+++ b/bsd-user/freebsd/target_os_elf.h
@@ -0,0 +1,137 @@
+/*
+ *  freebsd ELF definitions
+ *
+ *  Copyright (c) 2013-15 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_OS_ELF_H_
+#define _TARGET_OS_ELF_H_
+
+#include "target_arch_elf.h"
+#include "elf.h"
+
+#define bsd_get_ncpu() 1 /* until we pull in bsd-proc.[hc] */
+
+/* this flag is uneffective under linux too, should be deleted */
+#ifndef MAP_DENYWRITE
+#define MAP_DENYWRITE 0
+#endif
+
+/* should probably go in elf.h */
+#ifndef ELIBBAD
+#define ELIBBAD 80
+#endif
+
+#ifndef ELF_PLATFORM
+#define ELF_PLATFORM (NULL)
+#endif
+
+/* XXX Look at the other conflicting AT_* values. */
+#define FREEBSD_AT_NCPUS     19
+#define FREEBSD_AT_HWCAP     25
+#define FREEBSD_AT_HWCAP2    26
+
+#ifdef TARGET_ABI32
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+#undef bswaptls
+#define bswaptls(ptr) bswap32s(ptr)
+#endif
+
+/* max code+data+bss space allocated to elf interpreter */
+#define INTERP_MAP_SIZE (32 * 1024 * 1024)
+
+/* max code+data+bss+brk space allocated to ET_DYN executables */
+#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
+
+/* Necessary parameters */
+#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
+#define TARGET_ELF_PAGESTART(_v) ((_v) & \
+        ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1))
+#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1))
+
+#define DLINFO_ITEMS 14
+
+static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc,
+                                          abi_ulong stringp,
+                                          struct elfhdr *exec,
+                                          abi_ulong load_addr,
+                                          abi_ulong load_bias,
+                                          abi_ulong interp_load_addr,
+                                          struct image_info *info)
+{
+        abi_ulong features, sp;
+        int size;
+        const int n = sizeof(elf_addr_t);
+
+        target_auxents_sz = 0;
+        sp = p;
+        /*
+         * Force 16 byte _final_ alignment here for generality.
+         */
+        sp = sp & ~(abi_ulong)15;
+        size = (DLINFO_ITEMS + 1) * 2;
+        size += envc + argc + 2;
+        size += 1;                      /* argc itself */
+        size *= n;
+        if (size & 15) {
+            sp -= 16 - (size & 15);
+        }
+
+        /*
+         * FreeBSD defines elf_addr_t as Elf32_Off / Elf64_Off
+         */
+#define NEW_AUX_ENT(id, val) do {               \
+            sp -= n; put_user_ual(val, sp);     \
+            sp -= n; put_user_ual(id, sp);      \
+            target_auxents_sz += 2 * n;         \
+          } while (0)
+
+        NEW_AUX_ENT(AT_NULL, 0);
+
+        /* There must be exactly DLINFO_ITEMS entries here.  */
+        NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff));
+        NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr)));
+        NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
+        NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
+        NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr));
+        NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
+        NEW_AUX_ENT(FREEBSD_AT_NCPUS, (abi_ulong)bsd_get_ncpu());
+        NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
+        features = ELF_HWCAP;
+        NEW_AUX_ENT(FREEBSD_AT_HWCAP, features);
+#ifdef ELF_HWCAP2
+        features = ELF_HWCAP2;
+        NEW_AUX_ENT(FREEBSD_AT_HWCAP2, features);
+#endif
+        NEW_AUX_ENT(AT_UID, (abi_ulong)getuid());
+        NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid());
+        NEW_AUX_ENT(AT_GID, (abi_ulong)getgid());
+        NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid());
+        target_auxents = sp; /* Note where the aux entries are in the target */
+#ifdef ARCH_DLINFO
+        /*
+         * ARCH_DLINFO must come last so platform specific code can enforce
+         * special alignment requirements on the AUXV if necessary (eg. PPC).
+         */
+        ARCH_DLINFO;
+#endif
+#undef NEW_AUX_ENT
+
+        sp = loader_build_argptr(envc, argc, sp, stringp);
+        return sp;
+}
+
+#endif /* _TARGET_OS_ELF_H_ */
diff --git a/bsd-user/freebsd/target_os_siginfo.h b/bsd-user/freebsd/target_os_siginfo.h
new file mode 100644
index 00000000000..84944faa4d3
--- /dev/null
+++ b/bsd-user/freebsd/target_os_siginfo.h
@@ -0,0 +1,145 @@
+/*
+ *  FreeBSD siginfo related definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_OS_SIGINFO_H_
+#define _TARGET_OS_SIGINFO_H_
+
+#define TARGET_NSIG         128
+#define TARGET_NSIG_BPW     (sizeof(uint32_t) * 8)
+#define TARGET_NSIG_WORDS   (TARGET_NSIG / TARGET_NSIG_BPW)
+
+/* this struct defines a stack used during syscall handling */
+typedef struct target_sigaltstack {
+    abi_long    ss_sp;
+    abi_ulong   ss_size;
+    abi_long    ss_flags;
+} target_stack_t;
+
+typedef struct {
+    uint32_t __bits[TARGET_NSIG_WORDS];
+} target_sigset_t;
+
+struct target_sigaction {
+    abi_ulong   _sa_handler;
+    int32_t     sa_flags;
+    target_sigset_t sa_mask;
+};
+
+typedef union target_sigval {
+    int32_t sival_int;
+    abi_ulong sival_ptr;
+    int32_t sigval_int;
+    abi_ulong sigval_ptr;
+} target_sigval_t;
+
+typedef struct target_siginfo {
+    int32_t si_signo;   /* signal number */
+    int32_t si_errno;   /* errno association */
+    int32_t si_code;    /* signal code */
+    int32_t si_pid;     /* sending process */
+    int32_t si_uid;     /* sender's ruid */
+    int32_t si_status;  /* exit value */
+    abi_ulong si_addr;  /* faulting instruction */
+    union target_sigval si_value;   /* signal value */
+    union {
+        struct {
+            int32_t _trapno;    /* machine specific trap code */
+        } _fault;
+
+        /* POSIX.1b timers */
+        struct {
+            int32_t _timerid;
+            int32_t _overrun;
+        } _timer;
+
+        struct {
+            int32_t _mqd;
+        } _mesgp;
+
+        /* SIGPOLL */
+        struct {
+            int _band;  /* POLL_IN, POLL_OUT, POLL_MSG */
+        } _poll;
+
+        struct {
+            abi_long __spare1__;
+            int32_t  __spare2_[7];
+        } __spare__;
+    } _reason;
+} target_siginfo_t;
+
+struct target_sigevent {
+    abi_int sigev_notify;
+    abi_int sigev_signo;
+    target_sigval_t sigev_value;
+    union {
+        abi_int _threadid;
+
+        /*
+         * The kernel (and thus QEMU) never looks at these;
+         * they're only used as part of the ABI between a
+         * userspace program and libc.
+         */
+        struct {
+            abi_ulong _function;
+            abi_ulong _attribute;
+        } _sigev_thread;
+        abi_ushort _kevent_flags;
+        abi_long _pad[8];
+    } _sigev_un;
+};
+
+#define target_si_signo     si_signo
+#define target_si_code      si_code
+#define target_si_errno     si_errno
+#define target_si_addr      si_addr
+
+/* SIGILL si_codes */
+#define TARGET_ILL_ILLOPC   (1) /* Illegal opcode. */
+#define TARGET_ILL_ILLOPN   (2) /* Illegal operand. */
+#define TARGET_ILL_ILLADR   (3) /* Illegal addressing mode. */
+#define TARGET_ILL_ILLTRP   (4) /* Illegal trap. */
+#define TARGET_ILL_PRVOPC   (5) /* Privileged opcode. */
+#define TARGET_ILL_PRVREG   (6) /* Privileged register. */
+#define TARGET_ILL_COPROC   (7) /* Coprocessor error. */
+#define TARGET_ILL_BADSTK   (8) /* Internal stack error. */
+
+/* SIGSEGV si_codes */
+#define TARGET_SEGV_MAPERR  (1) /* address not mapped to object */
+#define TARGET_SEGV_ACCERR  (2) /* invalid permissions for mapped object */
+
+/* SIGTRAP si_codes */
+#define TARGET_TRAP_BRKPT   (1) /* process beakpoint */
+#define TARGET_TRAP_TRACE   (2) /* process trace trap */
+
+/* SIGBUS si_codes */
+#define TARGET_BUS_ADRALN   (1)
+#define TARGET_BUS_ADRERR   (2)
+#define TARGET_BUS_OBJERR   (3)
+
+/* SIGFPE codes */
+#define TARGET_FPE_INTOVF   (1) /* Integer overflow. */
+#define TARGET_FPE_INTDIV   (2) /* Integer divide by zero. */
+#define TARGET_FPE_FLTDIV   (3) /* Floating point divide by zero. */
+#define TARGET_FPE_FLTOVF   (4) /* Floating point overflow. */
+#define TARGET_FPE_FLTUND   (5) /* Floating point underflow. */
+#define TARGET_FPE_FLTRES   (6) /* Floating point inexact result. */
+#define TARGET_FPE_FLTINV   (7) /* Invalid floating point operation. */
+#define TARGET_FPE_FLTSUB   (8) /* Subscript out of range. */
+
+#endif /* !_TARGET_OS_SIGINFO_H_ */
diff --git a/bsd-user/freebsd/target_os_signal.h b/bsd-user/freebsd/target_os_signal.h
new file mode 100644
index 00000000000..1a4c5faf19e
--- /dev/null
+++ b/bsd-user/freebsd/target_os_signal.h
@@ -0,0 +1,81 @@
+#ifndef _TARGET_OS_SIGNAL_H_
+#define _TARGET_OS_SIGNAL_H_
+
+/* FreeBSD's sys/ucontext.h defines this */
+#define TARGET_MC_GET_CLEAR_RET 0x0001
+
+#include "target_os_siginfo.h"
+#include "target_arch_signal.h"
+
+/* Compare to sys/signal.h */
+#define TARGET_SIGHUP  1       /* hangup */
+#define TARGET_SIGINT  2       /* interrupt */
+#define TARGET_SIGQUIT 3       /* quit */
+#define TARGET_SIGILL  4       /* illegal instruction (not reset when caught) */
+#define TARGET_SIGTRAP 5       /* trace trap (not reset when caught) */
+#define TARGET_SIGABRT 6       /* abort() */
+#define TARGET_SIGIOT  SIGABRT /* compatibility */
+#define TARGET_SIGEMT  7       /* EMT instruction */
+#define TARGET_SIGFPE  8       /* floating point exception */
+#define TARGET_SIGKILL 9       /* kill (cannot be caught or ignored) */
+#define TARGET_SIGBUS  10      /* bus error */
+#define TARGET_SIGSEGV 11      /* segmentation violation */
+#define TARGET_SIGSYS  12      /* bad argument to system call */
+#define TARGET_SIGPIPE 13      /* write on a pipe with no one to read it */
+#define TARGET_SIGALRM 14      /* alarm clock */
+#define TARGET_SIGTERM 15      /* software termination signal from kill */
+#define TARGET_SIGURG  16      /* urgent condition on IO channel */
+#define TARGET_SIGSTOP 17      /* sendable stop signal not from tty */
+#define TARGET_SIGTSTP 18      /* stop signal from tty */
+#define TARGET_SIGCONT 19      /* continue a stopped process */
+#define TARGET_SIGCHLD 20      /* to parent on child stop or exit */
+#define TARGET_SIGTTIN 21      /* to readers pgrp upon background tty read */
+#define TARGET_SIGTTOU 22      /* like TTIN for output if(tp->t_local&LTOSTOP)*/
+#define TARGET_SIGIO   23      /* input/output possible signal */
+#define TARGET_SIGXCPU 24      /* exceeded CPU time limit */
+#define TARGET_SIGXFSZ 25      /* exceeded file size limit */
+#define TARGET_SIGVTALRM 26    /* virtual time alarm */
+#define TARGET_SIGPROF 27      /* profiling time alarm */
+#define TARGET_SIGWINCH 28     /* window size changes */
+#define TARGET_SIGINFO  29     /* information request */
+#define TARGET_SIGUSR1 30      /* user defined signal 1 */
+#define TARGET_SIGUSR2 31      /* user defined signal 2 */
+#define TARGET_SIGTHR 32       /* reserved by thread library */
+#define TARGET_SIGLWP SIGTHR   /* compatibility */
+#define TARGET_SIGLIBRT 33     /* reserved by the real-time library */
+#define TARGET_SIGRTMIN 65
+#define TARGET_SIGRTMAX 126
+
+/*
+ * Language spec says we must list exactly one parameter, even though we
+ * actually supply three.  Ugh!
+ */
+#define TARGET_SIG_DFL      ((abi_long)0)   /* default signal handling */
+#define TARGET_SIG_IGN      ((abi_long)1)   /* ignore signal */
+#define TARGET_SIG_ERR      ((abi_long)-1)  /* error return from signal */
+
+#define TARGET_SA_ONSTACK   0x0001  /* take signal on signal stack */
+#define TARGET_SA_RESTART   0x0002  /* restart system on signal return */
+#define TARGET_SA_RESETHAND 0x0004  /* reset to SIG_DFL when taking signal */
+#define TARGET_SA_NODEFER   0x0010  /* don't mask the signal we're delivering */
+#define TARGET_SA_NOCLDWAIT 0x0020  /* don't create zombies (assign to pid 1) */
+#define TARGET_SA_USERTRAMP 0x0100  /* do not bounce off kernel's sigtramp */
+#define TARGET_SA_NOCLDSTOP 0x0008  /* do not generate SIGCHLD on child stop */
+#define TARGET_SA_SIGINFO   0x0040  /* generate siginfo_t */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define TARGET_SIG_BLOCK        1   /* block specified signal set */
+#define TARGET_SIG_UNBLOCK      2   /* unblock specified signal set */
+#define TARGET_SIG_SETMASK      3   /* set specified signal set */
+
+#define TARGET_BADSIG           SIG_ERR
+
+/*
+ * sigaltstack control
+ */
+#define TARGET_SS_ONSTACK 0x0001  /* take signals on alternate stack */
+#define TARGET_SS_DISABLE 0x0004  /* disable taking signals on alternate stack*/
+
+#endif /* !_TARGET_OS_SIGNAL_H_ */
diff --git a/bsd-user/freebsd/target_os_stack.h b/bsd-user/freebsd/target_os_stack.h
new file mode 100644
index 00000000000..1bb1a2bf569
--- /dev/null
+++ b/bsd-user/freebsd/target_os_stack.h
@@ -0,0 +1,181 @@
+/*
+ *  FreeBSD setup_initial_stack() implementation.
+ *
+ *  Copyright (c) 2013-14 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_STACK_H_
+#define _TARGET_OS_STACK_H_
+
+#include <sys/param.h>
+#include "target_arch_sigtramp.h"
+#include "qemu/guest-random.h"
+
+/*
+ * The inital FreeBSD stack is as follows:
+ * (see kern/kern_exec.c exec_copyout_strings() )
+ *
+ *  Hi Address -> char **ps_argvstr  (struct ps_strings for ps, w, etc.)
+ *                unsigned ps_nargvstr
+ *                char **ps_envstr
+ *  PS_STRINGS -> unsigned ps_nenvstr
+ *
+ *                machine dependent sigcode (sv_sigcode of size
+ *                                           sv_szsigcode)
+ *
+ *                execpath          (absolute image path for rtld)
+ *
+ *                SSP Canary        (sizeof(long) * 8)
+ *
+ *                page sizes array  (usually sizeof(u_long) )
+ *
+ *  "destp" ->    argv, env strings (up to 262144 bytes)
+ */
+static inline int setup_initial_stack(struct bsd_binprm *bprm,
+        abi_ulong *ret_addr, abi_ulong *stringp)
+{
+    int i;
+    abi_ulong stack_hi_addr;
+    size_t execpath_len, stringspace;
+    abi_ulong destp, argvp, envp, p;
+    struct target_ps_strings ps_strs;
+    char canary[sizeof(abi_long) * 8];
+
+    stack_hi_addr = p = target_stkbas + target_stksiz;
+
+    /* Save some space for ps_strings. */
+    p -= sizeof(struct target_ps_strings);
+
+    /* Add machine depedent sigcode. */
+    p -= TARGET_SZSIGCODE;
+    if (setup_sigtramp(p, (unsigned)offsetof(struct target_sigframe, sf_uc),
+            TARGET_FREEBSD_NR_sigreturn)) {
+        errno = EFAULT;
+        return -1;
+    }
+    if (bprm->fullpath) {
+        execpath_len = strlen(bprm->fullpath) + 1;
+        p -= roundup(execpath_len, sizeof(abi_ulong));
+        if (memcpy_to_target(p, bprm->fullpath, execpath_len)) {
+            errno = EFAULT;
+            return -1;
+        }
+    }
+    /* Add canary for SSP. */
+    qemu_guest_getrandom_nofail(canary, sizeof(canary));
+    p -= roundup(sizeof(canary), sizeof(abi_ulong));
+    if (memcpy_to_target(p, canary, sizeof(canary))) {
+        errno = EFAULT;
+        return -1;
+    }
+    /* Add page sizes array. */
+    p -= sizeof(abi_ulong);
+    if (put_user_ual(TARGET_PAGE_SIZE, p)) {
+        errno = EFAULT;
+        return -1;
+    }
+    /*
+     * Deviate from FreeBSD stack layout: force stack to new page here
+     * so that signal trampoline is not sharing the page with user stack
+     * frames. This is actively harmful in qemu as it marks pages with
+     * code it translated as read-only, which is somewhat problematic
+     * for user trying to use the stack as intended.
+     */
+    p = rounddown(p, TARGET_PAGE_SIZE);
+
+    /* Calculate the string space needed */
+    stringspace = 0;
+    for (i = 0; i < bprm->argc; ++i) {
+        stringspace += strlen(bprm->argv[i]) + 1;
+    }
+    for (i = 0; i < bprm->envc; ++i) {
+        stringspace += strlen(bprm->envp[i]) + 1;
+    }
+    if (stringspace > TARGET_ARG_MAX) {
+        errno = ENOMEM;
+        return -1;
+    }
+    /* Make room for the argv and envp strings */
+    destp = rounddown(p - stringspace, sizeof(abi_ulong));
+    p = argvp = destp - (bprm->argc + bprm->envc + 2) * sizeof(abi_ulong);
+    /* Remember the strings pointer */
+    if (stringp) {
+        *stringp = destp;
+    }
+    /*
+     * Add argv strings.  Note that the argv[] vectors are added by
+     * loader_build_argptr()
+     */
+    /* XXX need to make room for auxargs */
+    ps_strs.ps_argvstr = tswapl(argvp);
+    ps_strs.ps_nargvstr = tswap32(bprm->argc);
+    for (i = 0; i < bprm->argc; ++i) {
+        size_t len = strlen(bprm->argv[i]) + 1;
+
+        if (memcpy_to_target(destp, bprm->argv[i], len)) {
+            errno = EFAULT;
+            return -1;
+        }
+        if (put_user_ual(destp, argvp)) {
+            errno = EFAULT;
+            return -1;
+        }
+        argvp += sizeof(abi_ulong);
+        destp += len;
+    }
+    if (put_user_ual(0, argvp)) {
+        errno = EFAULT;
+        return -1;
+    }
+    /*
+     * Add env strings. Note that the envp[] vectors are added by
+     * loader_build_argptr().
+     */
+    envp = argvp + sizeof(abi_ulong);
+    ps_strs.ps_envstr = tswapl(envp);
+    ps_strs.ps_nenvstr = tswap32(bprm->envc);
+    for (i = 0; i < bprm->envc; ++i) {
+        size_t len = strlen(bprm->envp[i]) + 1;
+
+        if (memcpy_to_target(destp, bprm->envp[i], len)) {
+            errno = EFAULT;
+            return -1;
+        }
+        if (put_user_ual(destp, envp)) {
+            errno = EFAULT;
+            return -1;
+        }
+        envp += sizeof(abi_ulong);
+        destp += len;
+    }
+    if (put_user_ual(0, envp)) {
+        errno = EFAULT;
+        return -1;
+    }
+    if (memcpy_to_target(stack_hi_addr - sizeof(ps_strs), &ps_strs,
+                sizeof(ps_strs))) {
+        errno = EFAULT;
+        return -1;
+    }
+
+    if (ret_addr) {
+        *ret_addr = p;
+    }
+
+    return 0;
+ }
+
+#endif /* !_TARGET_OS_STACK_H_ */
diff --git a/bsd-user/freebsd/target_os_thread.h b/bsd-user/freebsd/target_os_thread.h
new file mode 100644
index 00000000000..77433acdff8
--- /dev/null
+++ b/bsd-user/freebsd/target_os_thread.h
@@ -0,0 +1,25 @@
+/*
+ *  FreeBSD thread dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_THREAD_H_
+#define _TARGET_OS_THREAD_H_
+
+#include "target_arch_thread.h"
+
+#endif /* !_TARGET_OS_THREAD_H_ */
diff --git a/bsd-user/freebsd/target_os_user.h b/bsd-user/freebsd/target_os_user.h
new file mode 100644
index 00000000000..19892c5071b
--- /dev/null
+++ b/bsd-user/freebsd/target_os_user.h
@@ -0,0 +1,329 @@
+/*
+ *  sys/user.h definitions
+ *
+ *  Copyright (c) 2015 Stacey D. Son (sson at FreeBSD)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_USER_H_
+#define _TARGET_OS_USER_H_
+
+/*
+ * from sys/priority.h
+ */
+struct target_priority {
+    uint8_t     pri_class;      /* Scheduling class. */
+    uint8_t     pri_level;      /* Normal priority level. */
+    uint8_t     pri_native;     /* Priority before propogation. */
+    uint8_t     pri_user;       /* User priority based on p_cpu and p_nice. */
+};
+
+/*
+ * sys/caprights.h
+ */
+#define TARGET_CAP_RIGHTS_VERSION  0
+
+typedef struct target_cap_rights {
+    uint64_t    cr_rights[TARGET_CAP_RIGHTS_VERSION + 2];
+} target_cap_rights_t;
+
+/*
+ * From sys/_socketaddr_storage.h
+ *
+ */
+#define TARGET_SS_MAXSIZE     128U
+#define TARGET_SS_ALIGNSIZE   (sizeof(__int64_t))
+#define TARGET_SS_PAD1SIZE    (TARGET_SS_ALIGNSIZE - sizeof(unsigned char) - \
+        sizeof(uint8_t))
+#define TARGET_SS_PAD2SIZE    (TARGET_SS_MAXSIZE - sizeof(unsigned char) - \
+        sizeof(uint8_t) - TARGET_SS_PAD1SIZE - TARGET_SS_ALIGNSIZE)
+
+struct target_sockaddr_storage {
+    unsigned char   ss_len;         /* address length */
+    uint8_t         ss_family;      /* address family */
+    char            __ss_pad1[TARGET_SS_PAD1SIZE];
+    __int64_t       __ss_align;     /* force desired struct alignment */
+    char            __ss_pad2[TARGET_SS_PAD2SIZE];
+};
+
+/*
+ * from sys/user.h
+ */
+#define TARGET_KI_NSPARE_INT        2
+#define TARGET_KI_NSPARE_LONG       12
+#define TARGET_KI_NSPARE_PTR        6
+
+#define TARGET_WMESGLEN             8
+#define TARGET_LOCKNAMELEN          8
+#define TARGET_TDNAMLEN             16
+#define TARGET_COMMLEN              19
+#define TARGET_KI_EMULNAMELEN       16
+#define TARGET_KI_NGROUPS           16
+#define TARGET_LOGNAMELEN           17
+#define TARGET_LOGINCLASSLEN        17
+
+#define TARGET_KF_TYPE_NONE         0
+#define TARGET_KF_TYPE_VNODE        1
+#define TARGET_KF_TYPE_SOCKET       2
+#define TARGET_KF_TYPE_PIPE         3
+#define TARGET_KF_TYPE_FIFO         4
+#define TARGET_KF_TYPE_KQUEUE       5
+#define TARGET_KF_TYPE_CRYPTO       6
+#define TARGET_KF_TYPE_MQUEUE       7
+#define TARGET_KF_TYPE_SHM          8
+#define TARGET_KF_TYPE_SEM          9
+#define TARGET_KF_TYPE_PTS          10
+#define TARGET_KF_TYPE_PROCDESC     11
+#define TARGET_KF_TYPE_DEV          12
+#define TARGET_KF_TYPE_UNKNOWN      255
+
+struct target_kinfo_proc {
+    int32_t     ki_structsize;      /* size of this structure */
+    int32_t     ki_layout;          /* reserved: layout identifier */
+    abi_ulong   ki_args;            /* address of command arguments */
+    abi_ulong   ki_paddr;           /* address of proc */
+    abi_ulong   ki_addr;            /* kernel virtual addr of u-area */
+    abi_ulong   ki_tracep;          /* pointer to trace file */
+    abi_ulong   ki_textvp;          /* pointer to executable file */
+    abi_ulong   ki_fd;              /* pointer to open file info */
+    abi_ulong   ki_vmspace;         /* pointer to kernel vmspace struct */
+    abi_ulong   ki_wchan;           /* sleep address */
+    int32_t     ki_pid;             /* Process identifier */
+    int32_t     ki_ppid;            /* parent process id */
+    int32_t     ki_pgid;            /* process group id */
+    int32_t     ki_tpgid;           /* tty process group id */
+    int32_t     ki_sid;             /* Process session ID */
+    int32_t     ki_tsid;            /* Terminal session ID */
+    int16_t     ki_jobc;            /* job control counter */
+    int16_t     ki_spare_short1;    /* unused (just here for alignment) */
+    int32_t     ki_tdev__freebsd11; /* controlling tty dev */
+    target_sigset_t ki_siglist;     /* Signals arrived but not delivered */
+    target_sigset_t ki_sigmask;     /* Current signal mask */
+    target_sigset_t ki_sigignore;   /* Signals being ignored */
+    target_sigset_t ki_sigcatch;    /* Signals being caught by user */
+
+    int32_t     ki_uid;             /* effective user id */
+    int32_t     ki_ruid;            /* Real user id */
+    int32_t     ki_svuid;           /* Saved effective user id */
+    int32_t     ki_rgid;            /* Real group id */
+    int32_t     ki_svgid;           /* Saved effective group id */
+    int16_t     ki_ngroups;         /* number of groups */
+    int16_t     ki_spare_short2;    /* unused (just here for alignment) */
+    int32_t     ki_groups[TARGET_KI_NGROUPS];  /* groups */
+
+    abi_long    ki_size;            /* virtual size */
+
+    abi_long    ki_rssize;          /* current resident set size in pages */
+    abi_long    ki_swrss;           /* resident set size before last swap */
+    abi_long    ki_tsize;           /* text size (pages) XXX */
+    abi_long    ki_dsize;           /* data size (pages) XXX */
+    abi_long    ki_ssize;           /* stack size (pages) */
+
+    uint16_t    ki_xstat;           /* Exit status for wait & stop signal */
+    uint16_t    ki_acflag;          /* Accounting flags */
+
+    uint32_t    ki_pctcpu;          /* %cpu for process during ki_swtime */
+
+    uint32_t    ki_estcpu;          /* Time averaged value of ki_cpticks */
+    uint32_t    ki_slptime;         /* Time since last blocked */
+    uint32_t    ki_swtime;          /* Time swapped in or out */
+    uint32_t    ki_cow;             /* number of copy-on-write faults */
+    uint64_t    ki_runtime;         /* Real time in microsec */
+
+    struct  target_freebsd_timeval ki_start;  /* starting time */
+    struct  target_freebsd_timeval ki_childtime; /* time used by process children */
+
+    abi_long    ki_flag;            /* P_* flags */
+    abi_long    ki_kiflag;          /* KI_* flags (below) */
+    int32_t     ki_traceflag;       /* Kernel trace points */
+    char        ki_stat;            /* S* process status */
+    int8_t      ki_nice;            /* Process "nice" value */
+    char        ki_lock;            /* Process lock (prevent swap) count */
+    char        ki_rqindex;         /* Run queue index */
+    u_char      ki_oncpu_old;       /* Which cpu we are on (legacy) */
+    u_char      ki_lastcpu_old;     /* Last cpu we were on (legacy) */
+    char        ki_tdname[TARGET_TDNAMLEN + 1];  /* thread name */
+    char        ki_wmesg[TARGET_WMESGLEN + 1];   /* wchan message */
+    char        ki_login[TARGET_LOGNAMELEN + 1]; /* setlogin name */
+    char        ki_lockname[TARGET_LOCKNAMELEN + 1]; /* lock name */
+    char        ki_comm[TARGET_COMMLEN + 1];     /* command name */
+    char        ki_emul[TARGET_KI_EMULNAMELEN + 1];  /* emulation name */
+    char        ki_loginclass[TARGET_LOGINCLASSLEN + 1]; /* login class */
+
+    char        ki_sparestrings[50];    /* spare string space */
+    int32_t     ki_spareints[TARGET_KI_NSPARE_INT]; /* spare room for growth */
+    uint64_t ki_tdev;  /* controlling tty dev */
+    int32_t     ki_oncpu;           /* Which cpu we are on */
+    int32_t     ki_lastcpu;         /* Last cpu we were on */
+    int32_t     ki_tracer;          /* Pid of tracing process */
+    int32_t     ki_flag2;           /* P2_* flags */
+    int32_t     ki_fibnum;          /* Default FIB number */
+    uint32_t    ki_cr_flags;        /* Credential flags */
+    int32_t     ki_jid;             /* Process jail ID */
+    int32_t     ki_numthreads;      /* XXXKSE number of threads in total */
+
+    int32_t     ki_tid;             /* XXXKSE thread id */
+
+    struct  target_priority ki_pri; /* process priority */
+    struct  target_freebsd_rusage ki_rusage;  /* process rusage statistics */
+        /* XXX - most fields in ki_rusage_ch are not (yet) filled in */
+    struct  target_freebsd_rusage ki_rusage_ch; /* rusage of children processes */
+
+
+    abi_ulong   ki_pcb;             /* kernel virtual addr of pcb */
+    abi_ulong   ki_kstack;          /* kernel virtual addr of stack */
+    abi_ulong   ki_udata;           /* User convenience pointer */
+    abi_ulong   ki_tdaddr;          /* address of thread */
+
+    abi_ulong   ki_spareptrs[TARGET_KI_NSPARE_PTR];  /* spare room for growth */
+    abi_long    ki_sparelongs[TARGET_KI_NSPARE_LONG];/* spare room for growth */
+    abi_long    ki_sflag;           /* PS_* flags */
+    abi_long    ki_tdflags;         /* XXXKSE kthread flag */
+};
+
+struct target_kinfo_file {
+    int32_t  kf_structsize;  /* Variable size of record. */
+    int32_t  kf_type;  /* Descriptor type. */
+    int32_t  kf_fd;   /* Array index. */
+    int32_t  kf_ref_count;  /* Reference count. */
+    int32_t  kf_flags;  /* Flags. */
+    int32_t  kf_pad0;  /* Round to 64 bit alignment. */
+    int64_t  kf_offset;  /* Seek location. */
+    union {
+        struct {
+            uint32_t kf_spareint;
+            /* Socket domain. */
+            int  kf_sock_domain0;
+            /* Socket type. */
+            int  kf_sock_type0;
+            /* Socket protocol. */
+            int  kf_sock_protocol0;
+            /* Socket address. */
+            struct sockaddr_storage kf_sa_local;
+            /* Peer address. */
+            struct sockaddr_storage kf_sa_peer;
+            /* Address of so_pcb. */
+            uint64_t kf_sock_pcb;
+            /* Address of inp_ppcb. */
+            uint64_t kf_sock_inpcb;
+            /* Address of unp_conn. */
+            uint64_t kf_sock_unpconn;
+            /* Send buffer state. */
+            uint16_t kf_sock_snd_sb_state;
+            /* Receive buffer state. */
+            uint16_t kf_sock_rcv_sb_state;
+            /* Round to 64 bit alignment. */
+            uint32_t kf_sock_pad0;
+        } kf_sock;
+        struct {
+            /* Vnode type. */
+            int  kf_file_type;
+            /* Space for future use */
+            int  kf_spareint[3];
+            uint64_t kf_spareint64[30];
+            /* Vnode filesystem id. */
+            uint64_t kf_file_fsid;
+            /* File device. */
+            uint64_t kf_file_rdev;
+            /* Global file id. */
+            uint64_t kf_file_fileid;
+            /* File size. */
+            uint64_t kf_file_size;
+            /* Vnode filesystem id, FreeBSD 11 compat. */
+            uint32_t kf_file_fsid_freebsd11;
+            /* File device, FreeBSD 11 compat. */
+            uint32_t kf_file_rdev_freebsd11;
+            /* File mode. */
+            uint16_t kf_file_mode;
+            /* Round to 64 bit alignment. */
+            uint16_t kf_file_pad0;
+            uint32_t kf_file_pad1;
+        } kf_file;
+        struct {
+            uint32_t kf_spareint[4];
+            uint64_t kf_spareint64[32];
+            uint32_t kf_sem_value;
+            uint16_t kf_sem_mode;
+        } kf_sem;
+        struct {
+            uint32_t kf_spareint[4];
+            uint64_t kf_spareint64[32];
+            uint64_t kf_pipe_addr;
+            uint64_t kf_pipe_peer;
+            uint32_t kf_pipe_buffer_cnt;
+            /* Round to 64 bit alignment. */
+            uint32_t kf_pipe_pad0[3];
+        } kf_pipe;
+        struct {
+            uint32_t kf_spareint[4];
+            uint64_t kf_spareint64[32];
+            uint32_t kf_pts_dev_freebsd11;
+            uint32_t kf_pts_pad0;
+            uint64_t kf_pts_dev;
+            /* Round to 64 bit alignment. */
+            uint32_t kf_pts_pad1[4];
+        } kf_pts;
+        struct {
+            uint32_t kf_spareint[4];
+            uint64_t kf_spareint64[32];
+            int32_t  kf_pid;
+        } kf_proc;
+    } kf_un;
+    uint16_t kf_status;  /* Status flags. */
+    uint16_t kf_pad1;  /* Round to 32 bit alignment. */
+    int32_t  _kf_ispare0;  /* Space for more stuff. */
+    target_cap_rights_t kf_cap_rights; /* Capability rights. */
+    uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */
+    /* Truncated before copyout in sysctl */
+    char  kf_path[PATH_MAX]; /* Path to file, if any. */
+};
+
+struct target_kinfo_vmentry {
+    int32_t  kve_structsize;  /* Variable size of record. */
+    int32_t  kve_type;   /* Type of map entry. */
+    uint64_t kve_start;   /* Starting address. */
+    uint64_t kve_end;   /* Finishing address. */
+    uint64_t kve_offset;   /* Mapping offset in object */
+    uint64_t kve_vn_fileid;   /* inode number if vnode */
+    uint32_t kve_vn_fsid_freebsd11;  /* dev_t of vnode location */
+    int32_t  kve_flags;   /* Flags on map entry. */
+    int32_t  kve_resident;   /* Number of resident pages. */
+    int32_t  kve_private_resident;  /* Number of private pages. */
+    int32_t  kve_protection;  /* Protection bitmask. */
+    int32_t  kve_ref_count;   /* VM obj ref count. */
+    int32_t  kve_shadow_count;  /* VM obj shadow count. */
+    int32_t  kve_vn_type;   /* Vnode type. */
+    uint64_t kve_vn_size;   /* File size. */
+    uint32_t kve_vn_rdev_freebsd11;  /* Device id if device. */
+    uint16_t kve_vn_mode;   /* File mode. */
+    uint16_t kve_status;   /* Status flags. */
+#if (__FreeBSD_version >= 1300501 && __FreeBSD_version < 1400000) ||    \
+    __FreeBSD_version >= 1400009
+    union {
+        uint64_t _kve_vn_fsid;  /* dev_t of vnode location */
+        uint64_t _kve_obj;  /* handle of anon obj */
+    } kve_type_spec;
+#define kve_vn_fsid kve_type_spec._kve_vn_fsid
+#define kve_obj  kve_type_spec._kve_obj
+#else
+    uint64_t kve_vn_fsid;   /* dev_t of vnode location */
+#endif
+    uint64_t kve_vn_rdev;   /* Device id if device. */
+    int  _kve_ispare[8];  /* Space for more stuff. */
+    /* Truncated before copyout in sysctl */
+    char  kve_path[PATH_MAX];  /* Path to VM obj, if any. */
+};
+
+#endif /* ! _TARGET_OS_USER_H_ */
diff --git a/bsd-user/freebsd/target_os_vmparam.h b/bsd-user/freebsd/target_os_vmparam.h
new file mode 100644
index 00000000000..990300c619f
--- /dev/null
+++ b/bsd-user/freebsd/target_os_vmparam.h
@@ -0,0 +1,38 @@
+/*
+ *  FreeBSD VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_OS_VMPARAM_H_
+#define _TARGET_OS_VMPARAM_H_
+
+#include "target_arch_vmparam.h"
+
+/* Compare to sys/exec.h */
+struct target_ps_strings {
+    abi_ulong ps_argvstr;
+    uint32_t ps_nargvstr;
+    abi_ulong ps_envstr;
+    uint32_t ps_nenvstr;
+};
+
+extern abi_ulong target_stkbas;
+extern abi_ulong target_stksiz;
+
+#define TARGET_PS_STRINGS  ((target_stkbas + target_stksiz) - \
+                            sizeof(struct target_ps_strings))
+
+#endif /* !TARGET_OS_VMPARAM_H_ */
diff --git a/bsd-user/i386/target_arch.h b/bsd-user/i386/target_arch.h
new file mode 100644
index 00000000000..73e9a028feb
--- /dev/null
+++ b/bsd-user/i386/target_arch.h
@@ -0,0 +1,31 @@
+/*
+ * Intel x86 specific prototypes for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_H_
+#define _TARGET_ARCH_H_
+
+/* target_arch_cpu.c */
+void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit,
+                int flags);
+void bsd_i386_set_idt(int n, unsigned int dpl);
+void bsd_i386_set_idt_base(uint64_t base);
+
+#define target_cpu_set_tls(env, newtls)
+
+#endif /* ! _TARGET_ARCH_H_ */
diff --git a/bsd-user/i386/target_arch_cpu.c b/bsd-user/i386/target_arch_cpu.c
new file mode 100644
index 00000000000..d349e452997
--- /dev/null
+++ b/bsd-user/i386/target_arch_cpu.c
@@ -0,0 +1,71 @@
+/*
+ *  i386 cpu related code
+ *
+ * Copyright (c) 2013 Stacey Son <sson@FreeBSD.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu.h"
+#include "qemu/timer.h"
+
+#include "target_arch.h"
+
+static uint64_t *idt_table;
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+    return cpu_get_host_ticks();
+}
+
+void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit,
+                     int flags)
+{
+    unsigned int e1, e2;
+    uint32_t *p;
+    e1 = (addr << 16) | (limit & 0xffff);
+    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
+    e2 |= flags;
+    p = ptr;
+    p[0] = tswap32(e1);
+    p[1] = tswap32(e2);
+}
+
+
+static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
+                     uint32_t addr, unsigned int sel)
+{
+    uint32_t *p, e1, e2;
+    e1 = (addr & 0xffff) | (sel << 16);
+    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
+    p = ptr;
+    p[0] = tswap32(e1);
+    p[1] = tswap32(e2);
+}
+
+/* only dpl matters as we do only user space emulation */
+void bsd_i386_set_idt(int n, unsigned int dpl)
+{
+    set_gate(idt_table + n, 0, dpl, 0, 0);
+}
+
+void bsd_i386_set_idt_base(uint64_t base)
+{
+    idt_table = g2h_untagged(base);
+}
+
diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h
new file mode 100644
index 00000000000..b28602adbbd
--- /dev/null
+++ b/bsd-user/i386/target_arch_cpu.h
@@ -0,0 +1,207 @@
+/*
+ *  i386 cpu init and loop
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_CPU_H_
+#define _TARGET_ARCH_CPU_H_
+
+#include "target_arch.h"
+
+#define TARGET_DEFAULT_CPU_MODEL "qemu32"
+
+static inline void target_cpu_init(CPUX86State *env,
+        struct target_pt_regs *regs)
+{
+    uint64_t *gdt_table;
+
+    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
+    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
+        env->cr[4] |= CR4_OSFXSR_MASK;
+        env->hflags |= HF_OSFXSR_MASK;
+    }
+
+    /* flags setup : we activate the IRQs by default as in user mode */
+    env->eflags |= IF_MASK;
+
+    /* register setup */
+    env->regs[R_EAX] = regs->eax;
+    env->regs[R_EBX] = regs->ebx;
+    env->regs[R_ECX] = regs->ecx;
+    env->regs[R_EDX] = regs->edx;
+    env->regs[R_ESI] = regs->esi;
+    env->regs[R_EDI] = regs->edi;
+    env->regs[R_EBP] = regs->ebp;
+    env->regs[R_ESP] = regs->esp;
+    env->eip = regs->eip;
+
+    /* interrupt setup */
+    env->idt.limit = 255;
+
+    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
+        PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    bsd_i386_set_idt_base(env->idt.base);
+    bsd_i386_set_idt(0, 0);
+    bsd_i386_set_idt(1, 0);
+    bsd_i386_set_idt(2, 0);
+    bsd_i386_set_idt(3, 3);
+    bsd_i386_set_idt(4, 3);
+    bsd_i386_set_idt(5, 0);
+    bsd_i386_set_idt(6, 0);
+    bsd_i386_set_idt(7, 0);
+    bsd_i386_set_idt(8, 0);
+    bsd_i386_set_idt(9, 0);
+    bsd_i386_set_idt(10, 0);
+    bsd_i386_set_idt(11, 0);
+    bsd_i386_set_idt(12, 0);
+    bsd_i386_set_idt(13, 0);
+    bsd_i386_set_idt(14, 0);
+    bsd_i386_set_idt(15, 0);
+    bsd_i386_set_idt(16, 0);
+    bsd_i386_set_idt(17, 0);
+    bsd_i386_set_idt(18, 0);
+    bsd_i386_set_idt(19, 0);
+    bsd_i386_set_idt(0x80, 3);
+
+    /* segment setup */
+    env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
+            PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
+    gdt_table = g2h_untagged(env->gdt.base);
+
+    bsd_i386_write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
+            DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
+            (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
+
+    bsd_i386_write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
+            DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
+            (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
+
+    cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
+    cpu_x86_load_seg(env, R_DS, __USER_DS);
+    cpu_x86_load_seg(env, R_ES, __USER_DS);
+    cpu_x86_load_seg(env, R_FS, __USER_DS);
+    cpu_x86_load_seg(env, R_GS, __USER_DS);
+    /* This hack makes Wine work... */
+    env->segs[R_FS].selector = 0;
+}
+
+static inline void target_cpu_loop(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+    int trapnr;
+    abi_ulong pc;
+    /* target_siginfo_t info; */
+
+    for (;;) {
+        cpu_exec_start(cs);
+        trapnr = cpu_exec(cs);
+        cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
+        switch (trapnr) {
+        case 0x80:
+            /* syscall from int $0x80 */
+            if (bsd_type == target_freebsd) {
+                abi_ulong params = (abi_ulong) env->regs[R_ESP] +
+                    sizeof(int32_t);
+                int32_t syscall_nr = env->regs[R_EAX];
+                int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8;
+
+                if (syscall_nr == TARGET_FREEBSD_NR_syscall) {
+                    get_user_s32(syscall_nr, params);
+                    params += sizeof(int32_t);
+                } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) {
+                    get_user_s32(syscall_nr, params);
+                    params += sizeof(int64_t);
+                }
+                get_user_s32(arg1, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg2, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg3, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg4, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg5, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg6, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg7, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg8, params);
+                env->regs[R_EAX] = do_freebsd_syscall(env,
+                                                      syscall_nr,
+                                                      arg1,
+                                                      arg2,
+                                                      arg3,
+                                                      arg4,
+                                                      arg5,
+                                                      arg6,
+                                                      arg7,
+                                                      arg8);
+            } else { /* if (bsd_type == target_openbsd) */
+                env->regs[R_EAX] = do_openbsd_syscall(env,
+                                                      env->regs[R_EAX],
+                                                      env->regs[R_EBX],
+                                                      env->regs[R_ECX],
+                                                      env->regs[R_EDX],
+                                                      env->regs[R_ESI],
+                                                      env->regs[R_EDI],
+                                                      env->regs[R_EBP]);
+            }
+            if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) {
+                env->regs[R_EAX] = -env->regs[R_EAX];
+                env->eflags |= CC_C;
+            } else {
+                env->eflags &= ~CC_C;
+            }
+            break;
+
+        case EXCP_INTERRUPT:
+            /* just indicate that signals should be handled asap */
+            break;
+
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
+
+        default:
+            pc = env->segs[R_CS].base + env->eip;
+            fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - "
+                    "aborting\n", (long)pc, trapnr);
+            abort();
+        }
+        process_pending_signals(env);
+    }
+}
+
+static inline void target_cpu_clone_regs(CPUX86State *env, target_ulong newsp)
+{
+    if (newsp) {
+        env->regs[R_ESP] = newsp;
+    }
+    env->regs[R_EAX] = 0;
+}
+
+static inline void target_cpu_reset(CPUArchState *cpu)
+{
+    cpu_reset(env_cpu(cpu));
+}
+
+#endif /* ! _TARGET_ARCH_CPU_H_ */
diff --git a/bsd-user/i386/target_arch_elf.h b/bsd-user/i386/target_arch_elf.h
new file mode 100644
index 00000000000..eb760e07faa
--- /dev/null
+++ b/bsd-user/i386/target_arch_elf.h
@@ -0,0 +1,35 @@
+/*
+ *  i386 ELF definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_ELF_H_
+#define _TARGET_ARCH_ELF_H_
+
+#define ELF_START_MMAP 0x80000000
+#define ELF_ET_DYN_LOAD_ADDR    0x01001000
+#define elf_check_arch(x) (((x) == EM_386) || ((x) == EM_486))
+
+#define ELF_HWCAP       0 /* FreeBSD doesn't do AT_HWCAP{,2} on x86 */
+
+#define ELF_CLASS       ELFCLASS32
+#define ELF_DATA        ELFDATA2LSB
+#define ELF_ARCH        EM_386
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE       4096
+
+#endif /* _TARGET_ARCH_ELF_H_ */
diff --git a/bsd-user/i386/target_arch_reg.h b/bsd-user/i386/target_arch_reg.h
new file mode 100644
index 00000000000..1fce1daf015
--- /dev/null
+++ b/bsd-user/i386/target_arch_reg.h
@@ -0,0 +1,82 @@
+/*
+ *  FreeBSD i386 register structures
+ *
+ *  Copyright (c) 2015 Stacey Son
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_REG_H_
+#define _TARGET_ARCH_REG_H_
+
+/* See sys/i386/include/reg.h */
+typedef struct target_reg {
+    uint32_t        r_fs;
+    uint32_t        r_es;
+    uint32_t        r_ds;
+    uint32_t        r_edi;
+    uint32_t        r_esi;
+    uint32_t        r_ebp;
+    uint32_t        r_isp;
+    uint32_t        r_ebx;
+    uint32_t        r_edx;
+    uint32_t        r_ecx;
+    uint32_t        r_eax;
+    uint32_t        r_trapno;
+    uint32_t        r_err;
+    uint32_t        r_eip;
+    uint32_t        r_cs;
+    uint32_t        r_eflags;
+    uint32_t        r_esp;
+    uint32_t        r_ss;
+    uint32_t        r_gs;
+} target_reg_t;
+
+typedef struct target_fpreg {
+    uint32_t        fpr_env[7];
+    uint8_t         fpr_acc[8][10];
+    uint32_t        fpr_ex_sw;
+    uint8_t         fpr_pad[64];
+} target_fpreg_t;
+
+static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env)
+{
+
+    regs->r_fs = env->segs[R_FS].selector & 0xffff;
+    regs->r_es = env->segs[R_ES].selector & 0xffff;
+    regs->r_ds = env->segs[R_DS].selector & 0xffff;
+
+    regs->r_edi = env->regs[R_EDI];
+    regs->r_esi = env->regs[R_ESI];
+    regs->r_ebp = env->regs[R_EBP];
+    /* regs->r_isp = env->regs[R_ISP]; XXX */
+    regs->r_ebx = env->regs[R_EBX];
+    regs->r_edx = env->regs[R_EDX];
+    regs->r_ecx = env->regs[R_ECX];
+    regs->r_eax = env->regs[R_EAX];
+    /* regs->r_trapno = env->regs[R_TRAPNO]; XXX */
+    regs->r_err = env->error_code;  /* XXX ? */
+    regs->r_eip = env->eip;
+
+    regs->r_cs = env->segs[R_CS].selector & 0xffff;
+
+    regs->r_eflags = env->eflags;
+    regs->r_esp = env->regs[R_ESP];
+
+    regs->r_ss = env->segs[R_SS].selector & 0xffff;
+    regs->r_gs = env->segs[R_GS].selector & 0xffff;
+}
+
+#endif /* !_TARGET_ARCH_REG_H_ */
diff --git a/bsd-user/i386/target_arch_signal.h b/bsd-user/i386/target_arch_signal.h
new file mode 100644
index 00000000000..a90750d602c
--- /dev/null
+++ b/bsd-user/i386/target_arch_signal.h
@@ -0,0 +1,92 @@
+/*
+ *  i386 dependent signal definitions
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TARGET_ARCH_SIGNAL_H
+#define TARGET_ARCH_SIGNAL_H
+
+#include "cpu.h"
+
+/* Size of the signal trampolin code placed on the stack. */
+#define TARGET_SZSIGCODE    0
+
+/* compare to  x86/include/_limits.h */
+#define TARGET_MINSIGSTKSZ  (512 * 4)               /* min sig stack size */
+#define TARGET_SIGSTKSZ     (MINSIGSTKSZ + 32768)   /* recommended size */
+
+struct target_sigcontext {
+    /* to be added */
+};
+
+typedef struct target_mcontext {
+} target_mcontext_t;
+
+typedef struct target_ucontext {
+    target_sigset_t   uc_sigmask;
+    target_mcontext_t uc_mcontext;
+    abi_ulong         uc_link;
+    target_stack_t    uc_stack;
+    int32_t           uc_flags;
+    int32_t         __spare__[4];
+} target_ucontext_t;
+
+struct target_sigframe {
+    abi_ulong   sf_signum;
+    abi_ulong   sf_siginfo;    /* code or pointer to sf_si */
+    abi_ulong   sf_ucontext;   /* points to sf_uc */
+    abi_ulong   sf_addr;       /* undocumented 4th arg */
+    target_ucontext_t   sf_uc; /* = *sf_uncontext */
+    target_siginfo_t    sf_si; /* = *sf_siginfo (SA_SIGINFO case)*/
+    uint32_t    __spare__[2];
+};
+
+/*
+ * Compare to i386/i386/machdep.c sendsig()
+ * Assumes that target stack frame memory is locked.
+ */
+static inline abi_long set_sigtramp_args(CPUX86State *regs,
+        int sig, struct target_sigframe *frame, abi_ulong frame_addr,
+        struct target_sigaction *ka)
+{
+    /* XXX return -TARGET_EOPNOTSUPP; */
+    return 0;
+}
+
+/* Compare to i386/i386/machdep.c get_mcontext() */
+static inline abi_long get_mcontext(CPUX86State *regs,
+        target_mcontext_t *mcp, int flags)
+{
+    /* XXX */
+    return -TARGET_EOPNOTSUPP;
+}
+
+/* Compare to i386/i386/machdep.c set_mcontext() */
+static inline abi_long set_mcontext(CPUX86State *regs,
+        target_mcontext_t *mcp, int srflag)
+{
+    /* XXX */
+    return -TARGET_EOPNOTSUPP;
+}
+
+static inline abi_long get_ucontext_sigreturn(CPUX86State *regs,
+                        abi_ulong target_sf, abi_ulong *target_uc)
+{
+    /* XXX */
+    *target_uc = 0;
+    return -TARGET_EOPNOTSUPP;
+}
+
+#endif /* TARGET_ARCH_SIGNAL_H */
diff --git a/bsd-user/i386/target_arch_sigtramp.h b/bsd-user/i386/target_arch_sigtramp.h
new file mode 100644
index 00000000000..cb4e89b0b0d
--- /dev/null
+++ b/bsd-user/i386/target_arch_sigtramp.h
@@ -0,0 +1,29 @@
+/*
+ * Intel i386  sigcode for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_SIGTRAMP_H_
+#define _TARGET_ARCH_SIGTRAMP_H_
+
+static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc,
+        unsigned sys_sigreturn)
+{
+
+    return 0;
+}
+#endif /* _TARGET_ARCH_SIGTRAMP_H_ */
diff --git a/bsd-user/i386/target_arch_thread.h b/bsd-user/i386/target_arch_thread.h
new file mode 100644
index 00000000000..e65e476f757
--- /dev/null
+++ b/bsd-user/i386/target_arch_thread.h
@@ -0,0 +1,47 @@
+/*
+ *  i386 thread support
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_THREAD_H_
+#define _TARGET_ARCH_THREAD_H_
+
+/* Compare to vm_machdep.c cpu_set_upcall_kse() */
+static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry,
+    abi_ulong arg, abi_ulong stack_base, abi_ulong stack_size)
+{
+    /* XXX */
+}
+
+static inline void target_thread_init(struct target_pt_regs *regs,
+        struct image_info *infop)
+{
+    regs->esp = infop->start_stack;
+    regs->eip = infop->entry;
+
+    /*
+     * SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
+     * contains a pointer to a function which might be registered using
+     * `atexit'.  This provides a mean for the dynamic linker to call DT_FINI
+     * functions for shared libraries that have been loaded before the code
+     * runs.
+     *
+     * A value of 0 tells we have no such handler.
+     */
+    regs->edx = 0;
+}
+
+#endif /* !_TARGET_ARCH_THREAD_H_ */
diff --git a/bsd-user/i386/target_arch_vmparam.h b/bsd-user/i386/target_arch_vmparam.h
new file mode 100644
index 00000000000..bb7718265b2
--- /dev/null
+++ b/bsd-user/i386/target_arch_vmparam.h
@@ -0,0 +1,46 @@
+/*
+ *  i386 VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_VMPARAM_H_
+#define _TARGET_ARCH_VMPARAM_H_
+
+#include "cpu.h"
+
+/* compare to i386/include/vmparam.h */
+#define TARGET_MAXTSIZ  (128 * MiB)             /* max text size */
+#define TARGET_DFLDSIZ  (128 * MiB)             /* initial data size limit */
+#define TARGET_MAXDSIZ  (512 * MiB)             /* max data size */
+#define TARGET_DFLSSIZ  (8 * MiB)               /* initial stack size limit */
+#define TARGET_MAXSSIZ  (64 * MiB)              /* max stack size */
+#define TARGET_SGROWSIZ (128 * KiB)             /* amount to grow stack */
+
+#define TARGET_RESERVED_VA 0xf7000000
+
+#define TARGET_USRSTACK (0xbfc00000)
+
+static inline abi_ulong get_sp_from_cpustate(CPUX86State *state)
+{
+    return state->regs[R_ESP];
+}
+
+static inline void set_second_rval(CPUX86State *state, abi_ulong retval2)
+{
+    state->regs[R_EDX] = retval2;
+}
+
+#endif /* !_TARGET_ARCH_VMPARAM_H_ */
diff --git a/bsd-user/i386/target_signal.h b/bsd-user/i386/target_signal.h
deleted file mode 100644
index 2ef36d1f980..00000000000
--- a/bsd-user/i386/target_signal.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef TARGET_SIGNAL_H
-#define TARGET_SIGNAL_H
-
-#include "cpu.h"
-
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
-} target_stack_t;
-
-
-static inline abi_ulong get_sp_from_cpustate(CPUX86State *state)
-{
-    return state->regs[R_ESP];
-}
-
-#endif /* TARGET_SIGNAL_H */
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 798aba512c1..cb5ea402361 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1,7 +1,8 @@
 /*
- *  qemu user main
+ *  qemu bsd user main
  *
  *  Copyright (c) 2003-2008 Fabrice Bellard
+ *  Copyright (c) 2013-14 Stacey Son
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -17,6 +18,11 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/sysctl.h>
+
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/units.h"
@@ -32,632 +38,115 @@
 #include "qemu/path.h"
 #include "qemu/help_option.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
+#include "qemu/cutils.h"
 #include "exec/log.h"
 #include "trace/control.h"
+#include "crypto/init.h"
+#include "qemu/guest-random.h"
+
+#include "host-os.h"
+#include "target_arch_cpu.h"
 
 int singlestep;
-unsigned long mmap_min_addr;
 uintptr_t guest_base;
 bool have_guest_base;
+/*
+ * When running 32-on-64 we should make sure we can fit all of the possible
+ * guest address space into a contiguous chunk of virtual host memory.
+ *
+ * This way we will never overlap with our own libraries or binaries or stack
+ * or anything else that QEMU maps.
+ *
+ * Many cpus reserve the high bit (or more than one for some 64-bit cpus)
+ * of the address for the kernel.  Some cpus rely on this and user space
+ * uses the high bit(s) for pointer tagging and the like.  For them, we
+ * must preserve the expected address space.
+ */
+#ifndef MAX_RESERVED_VA
+# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
+#  if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
+      (TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
+/*
+ * There are a number of places where we assign reserved_va to a variable
+ * of type abi_ulong and expect it to fit.  Avoid the last page.
+ */
+#   define MAX_RESERVED_VA  (0xfffffffful & TARGET_PAGE_MASK)
+#  else
+#   define MAX_RESERVED_VA  (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
+#  endif
+# else
+#  define MAX_RESERVED_VA  0
+# endif
+#endif
+
+/*
+ * That said, reserving *too* much vm space via mmap can run into problems
+ * with rlimits, oom due to page table creation, etc.  We will still try it,
+ * if directed by the command-line option, but not by default.
+ */
+#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32
+unsigned long reserved_va = MAX_RESERVED_VA;
+#else
 unsigned long reserved_va;
+#endif
 
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
-extern char **environ;
 enum BSDType bsd_type;
+char qemu_proc_pathname[PATH_MAX];  /* full path to exeutable */
 
-/* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
-   we allocate a bigger stack. Need a better solution, for example
-   by remapping the process stack directly at the right place */
-unsigned long x86_stack_size = 512 * 1024;
+unsigned long target_maxtsiz = TARGET_MAXTSIZ;   /* max text size */
+unsigned long target_dfldsiz = TARGET_DFLDSIZ;   /* initial data size limit */
+unsigned long target_maxdsiz = TARGET_MAXDSIZ;   /* max data size */
+unsigned long target_dflssiz = TARGET_DFLSSIZ;   /* initial data size limit */
+unsigned long target_maxssiz = TARGET_MAXSSIZ;   /* max stack size */
+unsigned long target_sgrowsiz = TARGET_SGROWSIZ; /* amount to grow stack */
 
-void gemu_log(const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    va_end(ap);
-}
-
-#if defined(TARGET_I386)
-int cpu_get_pic_interrupt(CPUX86State *env)
-{
-    return -1;
-}
-#endif
+/* Helper routines for implementing atomic operations. */
 
 void fork_start(void)
 {
+    start_exclusive();
+    cpu_list_lock();
+    mmap_fork_start();
 }
 
 void fork_end(int child)
 {
     if (child) {
-        gdbserver_fork(thread_cpu);
-    }
-}
-
-#ifdef TARGET_I386
-/***********************************************************/
-/* CPUX86 core interface */
-
-uint64_t cpu_get_tsc(CPUX86State *env)
-{
-    return cpu_get_host_ticks();
-}
-
-static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
-                     int flags)
-{
-    unsigned int e1, e2;
-    uint32_t *p;
-    e1 = (addr << 16) | (limit & 0xffff);
-    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
-    e2 |= flags;
-    p = ptr;
-    p[0] = tswap32(e1);
-    p[1] = tswap32(e2);
-}
-
-static uint64_t *idt_table;
-#ifdef TARGET_X86_64
-static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
-                       uint64_t addr, unsigned int sel)
-{
-    uint32_t *p, e1, e2;
-    e1 = (addr & 0xffff) | (sel << 16);
-    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
-    p = ptr;
-    p[0] = tswap32(e1);
-    p[1] = tswap32(e2);
-    p[2] = tswap32(addr >> 32);
-    p[3] = 0;
-}
-/* only dpl matters as we do only user space emulation */
-static void set_idt(int n, unsigned int dpl)
-{
-    set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
-}
-#else
-static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
-                     uint32_t addr, unsigned int sel)
-{
-    uint32_t *p, e1, e2;
-    e1 = (addr & 0xffff) | (sel << 16);
-    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
-    p = ptr;
-    p[0] = tswap32(e1);
-    p[1] = tswap32(e2);
-}
-
-/* only dpl matters as we do only user space emulation */
-static void set_idt(int n, unsigned int dpl)
-{
-    set_gate(idt_table + n, 0, dpl, 0, 0);
-}
-#endif
-
-void cpu_loop(CPUX86State *env)
-{
-    CPUState *cs = env_cpu(env);
-    int trapnr;
-    abi_ulong pc;
-    //target_siginfo_t info;
-
-    for(;;) {
-        cpu_exec_start(cs);
-        trapnr = cpu_exec(cs);
-        cpu_exec_end(cs);
-        process_queued_cpu_work(cs);
-
-        switch(trapnr) {
-        case 0x80:
-            /* syscall from int $0x80 */
-            if (bsd_type == target_freebsd) {
-                abi_ulong params = (abi_ulong) env->regs[R_ESP] +
-                    sizeof(int32_t);
-                int32_t syscall_nr = env->regs[R_EAX];
-                int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8;
-
-                if (syscall_nr == TARGET_FREEBSD_NR_syscall) {
-                    get_user_s32(syscall_nr, params);
-                    params += sizeof(int32_t);
-                } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) {
-                    get_user_s32(syscall_nr, params);
-                    params += sizeof(int64_t);
-                }
-                get_user_s32(arg1, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg2, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg3, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg4, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg5, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg6, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg7, params);
-                params += sizeof(int32_t);
-                get_user_s32(arg8, params);
-                env->regs[R_EAX] = do_freebsd_syscall(env,
-                                                      syscall_nr,
-                                                      arg1,
-                                                      arg2,
-                                                      arg3,
-                                                      arg4,
-                                                      arg5,
-                                                      arg6,
-                                                      arg7,
-                                                      arg8);
-            } else { //if (bsd_type == target_openbsd)
-                env->regs[R_EAX] = do_openbsd_syscall(env,
-                                                      env->regs[R_EAX],
-                                                      env->regs[R_EBX],
-                                                      env->regs[R_ECX],
-                                                      env->regs[R_EDX],
-                                                      env->regs[R_ESI],
-                                                      env->regs[R_EDI],
-                                                      env->regs[R_EBP]);
-            }
-            if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) {
-                env->regs[R_EAX] = -env->regs[R_EAX];
-                env->eflags |= CC_C;
-            } else {
-                env->eflags &= ~CC_C;
-            }
-            break;
-#ifndef TARGET_ABI32
-        case EXCP_SYSCALL:
-            /* syscall from syscall instruction */
-            if (bsd_type == target_freebsd)
-                env->regs[R_EAX] = do_freebsd_syscall(env,
-                                                      env->regs[R_EAX],
-                                                      env->regs[R_EDI],
-                                                      env->regs[R_ESI],
-                                                      env->regs[R_EDX],
-                                                      env->regs[R_ECX],
-                                                      env->regs[8],
-                                                      env->regs[9], 0, 0);
-            else { //if (bsd_type == target_openbsd)
-                env->regs[R_EAX] = do_openbsd_syscall(env,
-                                                      env->regs[R_EAX],
-                                                      env->regs[R_EDI],
-                                                      env->regs[R_ESI],
-                                                      env->regs[R_EDX],
-                                                      env->regs[10],
-                                                      env->regs[8],
-                                                      env->regs[9]);
-            }
-            env->eip = env->exception_next_eip;
-            if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) {
-                env->regs[R_EAX] = -env->regs[R_EAX];
-                env->eflags |= CC_C;
-            } else {
-                env->eflags &= ~CC_C;
-            }
-            break;
-#endif
-#if 0
-        case EXCP0B_NOSEG:
-        case EXCP0C_STACK:
-            info.si_signo = SIGBUS;
-            info.si_errno = 0;
-            info.si_code = TARGET_SI_KERNEL;
-            info._sifields._sigfault._addr = 0;
-            queue_signal(env, info.si_signo, &info);
-            break;
-        case EXCP0D_GPF:
-            /* XXX: potential problem if ABI32 */
-#ifndef TARGET_X86_64
-            if (env->eflags & VM_MASK) {
-                handle_vm86_fault(env);
-            } else
-#endif
-            {
-                info.si_signo = SIGSEGV;
-                info.si_errno = 0;
-                info.si_code = TARGET_SI_KERNEL;
-                info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, &info);
-            }
-            break;
-        case EXCP0E_PAGE:
-            info.si_signo = SIGSEGV;
-            info.si_errno = 0;
-            if (!(env->error_code & 1))
-                info.si_code = TARGET_SEGV_MAPERR;
-            else
-                info.si_code = TARGET_SEGV_ACCERR;
-            info._sifields._sigfault._addr = env->cr[2];
-            queue_signal(env, info.si_signo, &info);
-            break;
-        case EXCP00_DIVZ:
-#ifndef TARGET_X86_64
-            if (env->eflags & VM_MASK) {
-                handle_vm86_trap(env, trapnr);
-            } else
-#endif
-            {
-                /* division by zero */
-                info.si_signo = SIGFPE;
-                info.si_errno = 0;
-                info.si_code = TARGET_FPE_INTDIV;
-                info._sifields._sigfault._addr = env->eip;
-                queue_signal(env, info.si_signo, &info);
+        CPUState *cpu, *next_cpu;
+        /*
+         * Child processes created by fork() only have a single thread.  Discard
+         * information about the parent threads.
+         */
+        CPU_FOREACH_SAFE(cpu, next_cpu) {
+            if (cpu != thread_cpu) {
+                QTAILQ_REMOVE_RCU(&cpus, cpu, node);
             }
-            break;
-        case EXCP01_DB:
-        case EXCP03_INT3:
-#ifndef TARGET_X86_64
-            if (env->eflags & VM_MASK) {
-                handle_vm86_trap(env, trapnr);
-            } else
-#endif
-            {
-                info.si_signo = SIGTRAP;
-                info.si_errno = 0;
-                if (trapnr == EXCP01_DB) {
-                    info.si_code = TARGET_TRAP_BRKPT;
-                    info._sifields._sigfault._addr = env->eip;
-                } else {
-                    info.si_code = TARGET_SI_KERNEL;
-                    info._sifields._sigfault._addr = 0;
-                }
-                queue_signal(env, info.si_signo, &info);
-            }
-            break;
-        case EXCP04_INTO:
-        case EXCP05_BOUND:
-#ifndef TARGET_X86_64
-            if (env->eflags & VM_MASK) {
-                handle_vm86_trap(env, trapnr);
-            } else
-#endif
-            {
-                info.si_signo = SIGSEGV;
-                info.si_errno = 0;
-                info.si_code = TARGET_SI_KERNEL;
-                info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, &info);
-            }
-            break;
-        case EXCP06_ILLOP:
-            info.si_signo = SIGILL;
-            info.si_errno = 0;
-            info.si_code = TARGET_ILL_ILLOPN;
-            info._sifields._sigfault._addr = env->eip;
-            queue_signal(env, info.si_signo, &info);
-            break;
-#endif
-        case EXCP_INTERRUPT:
-            /* just indicate that signals should be handled asap */
-            break;
-#if 0
-        case EXCP_DEBUG:
-            {
-                int sig;
-
-                sig = gdb_handlesig (env, TARGET_SIGTRAP);
-                if (sig)
-                  {
-                    info.si_signo = sig;
-                    info.si_errno = 0;
-                    info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
-                  }
-            }
-            break;
-#endif
-        default:
-            pc = env->segs[R_CS].base + env->eip;
-            fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
-                    (long)pc, trapnr);
-            abort();
         }
-        process_pending_signals(env);
-    }
-}
-#endif
-
-#ifdef TARGET_SPARC
-#define SPARC64_STACK_BIAS 2047
-
-//#define DEBUG_WIN
-/* WARNING: dealing with register windows _is_ complicated. More info
-   can be found at http://www.sics.se/~psm/sparcstack.html */
-static inline int get_reg_index(CPUSPARCState *env, int cwp, int index)
-{
-    index = (index + cwp * 16) % (16 * env->nwindows);
-    /* wrap handling : if cwp is on the last window, then we use the
-       registers 'after' the end */
-    if (index < 8 && env->cwp == env->nwindows - 1)
-        index += 16 * env->nwindows;
-    return index;
-}
-
-/* save the register window 'cwp1' */
-static inline void save_window_offset(CPUSPARCState *env, int cwp1)
-{
-    unsigned int i;
-    abi_ulong sp_ptr;
-
-    sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-    if (sp_ptr & 3)
-        sp_ptr += SPARC64_STACK_BIAS;
-#endif
-#if defined(DEBUG_WIN)
-    printf("win_overflow: sp_ptr=0x" TARGET_ABI_FMT_lx " save_cwp=%d\n",
-           sp_ptr, cwp1);
-#endif
-    for(i = 0; i < 16; i++) {
-        /* FIXME - what to do if put_user() fails? */
-        put_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-        sp_ptr += sizeof(abi_ulong);
-    }
-}
-
-static void save_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-    unsigned int new_wim;
-    new_wim = ((env->wim >> 1) | (env->wim << (env->nwindows - 1))) &
-        ((1LL << env->nwindows) - 1);
-    save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
-    env->wim = new_wim;
-#else
-    /*
-     * cansave is zero if the spill trap handler is triggered by `save` and
-     * nonzero if triggered by a `flushw`
-     */
-    save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - 2));
-    env->cansave++;
-    env->canrestore--;
-#endif
-}
-
-static void restore_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-    unsigned int new_wim;
-#endif
-    unsigned int i, cwp1;
-    abi_ulong sp_ptr;
-
-#ifndef TARGET_SPARC64
-    new_wim = ((env->wim << 1) | (env->wim >> (env->nwindows - 1))) &
-        ((1LL << env->nwindows) - 1);
-#endif
-
-    /* restore the invalid window */
-    cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-    sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-    if (sp_ptr & 3)
-        sp_ptr += SPARC64_STACK_BIAS;
-#endif
-#if defined(DEBUG_WIN)
-    printf("win_underflow: sp_ptr=0x" TARGET_ABI_FMT_lx " load_cwp=%d\n",
-           sp_ptr, cwp1);
-#endif
-    for(i = 0; i < 16; i++) {
-        /* FIXME - what to do if get_user() fails? */
-        get_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-        sp_ptr += sizeof(abi_ulong);
+        mmap_fork_end(child);
+        /*
+         * qemu_init_cpu_list() takes care of reinitializing the exclusive
+         * state, so we don't need to end_exclusive() here.
+         */
+        qemu_init_cpu_list();
+        gdbserver_fork(thread_cpu);
+    } else {
+        mmap_fork_end(child);
+        cpu_list_unlock();
+        end_exclusive();
     }
-#ifdef TARGET_SPARC64
-    env->canrestore++;
-    if (env->cleanwin < env->nwindows - 1)
-        env->cleanwin++;
-    env->cansave--;
-#else
-    env->wim = new_wim;
-#endif
 }
 
-static void flush_windows(CPUSPARCState *env)
+void cpu_loop(CPUArchState *env)
 {
-    int offset, cwp1;
-
-    offset = 1;
-    for(;;) {
-        /* if restore would invoke restore_window(), then we can stop */
-        cwp1 = cpu_cwp_inc(env, env->cwp + offset);
-#ifndef TARGET_SPARC64
-        if (env->wim & (1 << cwp1))
-            break;
-#else
-        if (env->canrestore == 0)
-            break;
-        env->cansave++;
-        env->canrestore--;
-#endif
-        save_window_offset(env, cwp1);
-        offset++;
-    }
-    cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-#ifndef TARGET_SPARC64
-    /* set wim so that restore will reload the registers */
-    env->wim = 1 << cwp1;
-#endif
-#if defined(DEBUG_WIN)
-    printf("flush_windows: nb=%d\n", offset - 1);
-#endif
+    target_cpu_loop(env);
 }
 
-void cpu_loop(CPUSPARCState *env)
-{
-    CPUState *cs = env_cpu(env);
-    int trapnr, ret, syscall_nr;
-    //target_siginfo_t info;
-
-    while (1) {
-        cpu_exec_start(cs);
-        trapnr = cpu_exec(cs);
-        cpu_exec_end(cs);
-        process_queued_cpu_work(cs);
-
-        switch (trapnr) {
-#ifndef TARGET_SPARC64
-        case 0x80:
-#else
-        /* FreeBSD uses 0x141 for syscalls too */
-        case 0x141:
-            if (bsd_type != target_freebsd)
-                goto badtrap;
-            /* fallthrough */
-        case 0x100:
-#endif
-            syscall_nr = env->gregs[1];
-            if (bsd_type == target_freebsd)
-                ret = do_freebsd_syscall(env, syscall_nr,
-                                         env->regwptr[0], env->regwptr[1],
-                                         env->regwptr[2], env->regwptr[3],
-                                         env->regwptr[4], env->regwptr[5], 0, 0);
-            else if (bsd_type == target_netbsd)
-                ret = do_netbsd_syscall(env, syscall_nr,
-                                        env->regwptr[0], env->regwptr[1],
-                                        env->regwptr[2], env->regwptr[3],
-                                        env->regwptr[4], env->regwptr[5]);
-            else { //if (bsd_type == target_openbsd)
-#if defined(TARGET_SPARC64)
-                syscall_nr &= ~(TARGET_OPENBSD_SYSCALL_G7RFLAG |
-                                TARGET_OPENBSD_SYSCALL_G2RFLAG);
-#endif
-                ret = do_openbsd_syscall(env, syscall_nr,
-                                         env->regwptr[0], env->regwptr[1],
-                                         env->regwptr[2], env->regwptr[3],
-                                         env->regwptr[4], env->regwptr[5]);
-            }
-            if ((unsigned int)ret >= (unsigned int)(-515)) {
-                ret = -ret;
-#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
-                env->xcc |= PSR_CARRY;
-#else
-                env->psr |= PSR_CARRY;
-#endif
-            } else {
-#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
-                env->xcc &= ~PSR_CARRY;
-#else
-                env->psr &= ~PSR_CARRY;
-#endif
-            }
-            env->regwptr[0] = ret;
-            /* next instruction */
-#if defined(TARGET_SPARC64)
-            if (bsd_type == target_openbsd &&
-                env->gregs[1] & TARGET_OPENBSD_SYSCALL_G2RFLAG) {
-                env->pc = env->gregs[2];
-                env->npc = env->pc + 4;
-            } else if (bsd_type == target_openbsd &&
-                       env->gregs[1] & TARGET_OPENBSD_SYSCALL_G7RFLAG) {
-                env->pc = env->gregs[7];
-                env->npc = env->pc + 4;
-            } else {
-                env->pc = env->npc;
-                env->npc = env->npc + 4;
-            }
-#else
-            env->pc = env->npc;
-            env->npc = env->npc + 4;
-#endif
-            break;
-        case 0x83: /* flush windows */
-#ifdef TARGET_ABI32
-        case 0x103:
-#endif
-            flush_windows(env);
-            /* next instruction */
-            env->pc = env->npc;
-            env->npc = env->npc + 4;
-            break;
-#ifndef TARGET_SPARC64
-        case TT_WIN_OVF: /* window overflow */
-            save_window(env);
-            break;
-        case TT_WIN_UNF: /* window underflow */
-            restore_window(env);
-            break;
-        case TT_TFAULT:
-        case TT_DFAULT:
-#if 0
-            {
-                info.si_signo = SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = env->mmuregs[4];
-                queue_signal(env, info.si_signo, &info);
-            }
-#endif
-            break;
-#else
-        case TT_SPILL: /* window overflow */
-            save_window(env);
-            break;
-        case TT_FILL: /* window underflow */
-            restore_window(env);
-            break;
-        case TT_TFAULT:
-        case TT_DFAULT:
-#if 0
-            {
-                info.si_signo = SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                if (trapnr == TT_DFAULT)
-                    info._sifields._sigfault._addr = env->dmmuregs[4];
-                else
-                    info._sifields._sigfault._addr = env->tsptr->tpc;
-                //queue_signal(env, info.si_signo, &info);
-            }
-#endif
-            break;
-#endif
-        case EXCP_INTERRUPT:
-            /* just indicate that signals should be handled asap */
-            break;
-        case EXCP_DEBUG:
-            {
-#if 0
-                int sig =
-#endif
-                gdb_handlesig(cs, TARGET_SIGTRAP);
-#if 0
-                if (sig)
-                  {
-                    info.si_signo = sig;
-                    info.si_errno = 0;
-                    info.si_code = TARGET_TRAP_BRKPT;
-                    //queue_signal(env, info.si_signo, &info);
-                  }
-#endif
-            }
-            break;
-        default:
-#ifdef TARGET_SPARC64
-        badtrap:
-#endif
-            printf ("Unhandled trap: 0x%x\n", trapnr);
-            cpu_dump_state(cs, stderr, 0);
-            exit (1);
-        }
-        process_pending_signals (env);
-    }
-}
-
-#endif
-
 static void usage(void)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION
@@ -681,7 +170,6 @@ static void usage(void)
            "-d item1[,...]    enable logging of specified items\n"
            "                  (use '-d help' for a list of log items)\n"
            "-D logfile        write logs to 'logfile' (default stderr)\n"
-           "-p pagesize       set the host page size to 'pagesize'\n"
            "-singlestep       always run in singlestep mode\n"
            "-strace           log system calls\n"
            "-trace            [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
@@ -701,11 +189,20 @@ static void usage(void)
            ,
            TARGET_NAME,
            interp_prefix,
-           x86_stack_size);
+           target_dflssiz);
     exit(1);
 }
 
-THREAD CPUState *thread_cpu;
+__thread CPUState *thread_cpu;
+
+void stop_all_tasks(void)
+{
+    /*
+     * We trust when using NPTL (pthreads) start_exclusive() handles thread
+     * stopping correctly.
+     */
+    start_exclusive();
+}
 
 bool qemu_cpu_is_self(CPUState *cpu)
 {
@@ -722,7 +219,6 @@ void init_task_state(TaskState *ts)
 {
     int i;
 
-    ts->used = 1;
     ts->first_free = ts->sigqueue_table;
     for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) {
         ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1];
@@ -730,6 +226,48 @@ void init_task_state(TaskState *ts)
     ts->sigqueue_table[i].next = NULL;
 }
 
+void gemu_log(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+}
+
+static void
+adjust_ssize(void)
+{
+    struct rlimit rl;
+
+    if (getrlimit(RLIMIT_STACK, &rl) != 0) {
+        return;
+    }
+
+    target_maxssiz = MIN(target_maxssiz, rl.rlim_max);
+    target_dflssiz = MIN(MAX(target_dflssiz, rl.rlim_cur), target_maxssiz);
+
+    rl.rlim_max = target_maxssiz;
+    rl.rlim_cur = target_dflssiz;
+    setrlimit(RLIMIT_STACK, &rl);
+}
+
+static void save_proc_pathname(char *argv0)
+{
+    int mib[4];
+    size_t len;
+
+    mib[0] = CTL_KERN;
+    mib[1] = KERN_PROC;
+    mib[2] = KERN_PROC_PATHNAME;
+    mib[3] = -1;
+
+    len = sizeof(qemu_proc_pathname);
+    if (sysctl(mib, 4, qemu_proc_pathname, &len, NULL, 0)) {
+        perror("sysctl");
+    }
+}
+
 int main(int argc, char **argv)
 {
     const char *filename;
@@ -737,20 +275,28 @@ int main(int argc, char **argv)
     const char *cpu_type;
     const char *log_file = NULL;
     const char *log_mask = NULL;
+    const char *seed_optarg = NULL;
     struct target_pt_regs regs1, *regs = &regs1;
     struct image_info info1, *info = &info1;
-    TaskState ts1, *ts = &ts1;
+    struct bsd_binprm bprm;
+    TaskState *ts;
     CPUArchState *env;
     CPUState *cpu;
-    int optind;
+    int optind, rv;
     const char *r;
     const char *gdbstub = NULL;
     char **target_environ, **wrk;
     envlist_t *envlist = NULL;
-    bsd_type = target_openbsd;
+    bsd_type = HOST_DEFAULT_BSD_TYPE;
+    char *argv0 = NULL;
+
+    adjust_ssize();
 
-    if (argc <= 1)
+    if (argc <= 1) {
         usage();
+    }
+
+    save_proc_pathname(argv[0]);
 
     error_init(argv[0]);
     module_call_init(MODULE_INIT_TRACE);
@@ -770,11 +316,13 @@ int main(int argc, char **argv)
 
     optind = 1;
     for (;;) {
-        if (optind >= argc)
+        if (optind >= argc) {
             break;
+        }
         r = argv[optind];
-        if (r[0] != '-')
+        if (r[0] != '-') {
             break;
+        }
         optind++;
         r++;
         if (!strcmp(r, "-")) {
@@ -791,24 +339,31 @@ int main(int argc, char **argv)
             log_file = argv[optind++];
         } else if (!strcmp(r, "E")) {
             r = argv[optind++];
-            if (envlist_setenv(envlist, r) != 0)
+            if (envlist_setenv(envlist, r) != 0) {
                 usage();
+            }
         } else if (!strcmp(r, "ignore-environment")) {
             envlist_free(envlist);
             envlist = envlist_create();
         } else if (!strcmp(r, "U")) {
             r = argv[optind++];
-            if (envlist_unsetenv(envlist, r) != 0)
+            if (envlist_unsetenv(envlist, r) != 0) {
                 usage();
+            }
         } else if (!strcmp(r, "s")) {
             r = argv[optind++];
-            x86_stack_size = strtol(r, (char **)&r, 0);
-            if (x86_stack_size <= 0)
+            rv = qemu_strtoul(r, &r, 0, &target_dflssiz);
+            if (rv < 0 || target_dflssiz <= 0) {
                 usage();
-            if (*r == 'M')
-                x86_stack_size *= MiB;
-            else if (*r == 'k' || *r == 'K')
-                x86_stack_size *= KiB;
+            }
+            if (*r == 'M') {
+                target_dflssiz *= 1024 * 1024;
+            } else if (*r == 'k' || *r == 'K') {
+                target_dflssiz *= 1024;
+            }
+            if (target_dflssiz > target_maxssiz) {
+                usage();
+            }
         } else if (!strcmp(r, "L")) {
             interp_prefix = argv[optind++];
         } else if (!strcmp(r, "p")) {
@@ -825,15 +380,18 @@ int main(int argc, char **argv)
         } else if (!strcmp(r, "cpu")) {
             cpu_model = argv[optind++];
             if (is_help_option(cpu_model)) {
-/* XXX: implement xxx_cpu_list for targets that still miss it */
+                /* XXX: implement xxx_cpu_list for targets that still miss it */
 #if defined(cpu_list)
-                    cpu_list();
+                cpu_list();
 #endif
                 exit(1);
             }
         } else if (!strcmp(r, "B")) {
-           guest_base = strtol(argv[optind++], NULL, 0);
-           have_guest_base = true;
+            rv = qemu_strtoul(argv[optind++], NULL, 0, &guest_base);
+            if (rv < 0) {
+                usage();
+            }
+            have_guest_base = true;
         } else if (!strcmp(r, "drop-ld-preload")) {
             (void) envlist_unsetenv(envlist, "LD_PRELOAD");
         } else if (!strcmp(r, "bsd")) {
@@ -847,12 +405,16 @@ int main(int argc, char **argv)
                 usage();
             }
             optind++;
+        } else if (!strcmp(r, "seed")) {
+            seed_optarg = optarg;
         } else if (!strcmp(r, "singlestep")) {
             singlestep = 1;
         } else if (!strcmp(r, "strace")) {
             do_strace = 1;
         } else if (!strcmp(r, "trace")) {
             trace_opt_parse(optarg);
+        } else if (!strcmp(r, "0")) {
+            argv0 = argv[optind++];
         } else {
             usage();
         }
@@ -876,6 +438,9 @@ int main(int argc, char **argv)
         usage();
     }
     filename = argv[optind];
+    if (argv0) {
+        argv[optind] = argv0;
+    }
 
     if (!trace_init_backends()) {
         exit(1);
@@ -885,6 +450,9 @@ int main(int argc, char **argv)
     /* Zero out regs */
     memset(regs, 0, sizeof(struct target_pt_regs));
 
+    /* Zero bsd params */
+    memset(&bprm, 0, sizeof(bprm));
+
     /* Zero out image_info */
     memset(info, 0, sizeof(struct image_info));
 
@@ -892,36 +460,21 @@ int main(int argc, char **argv)
     init_paths(interp_prefix);
 
     if (cpu_model == NULL) {
-#if defined(TARGET_I386)
-#ifdef TARGET_X86_64
-        cpu_model = "qemu64";
-#else
-        cpu_model = "qemu32";
-#endif
-#elif defined(TARGET_SPARC)
-#ifdef TARGET_SPARC64
-        cpu_model = "TI UltraSparc II";
-#else
-        cpu_model = "Fujitsu MB86904";
-#endif
-#else
-        cpu_model = "any";
-#endif
+        cpu_model = TARGET_DEFAULT_CPU_MODEL;
     }
 
     cpu_type = parse_cpu_option(cpu_model);
+
     /* init tcg before creating CPUs and to get qemu_host_page_size */
     {
         AccelClass *ac = ACCEL_GET_CLASS(current_accel());
 
-        ac->init_machine(NULL);
         accel_init_interfaces(ac);
+        ac->init_machine(NULL);
     }
     cpu = cpu_create(cpu_type);
     env = cpu->env_ptr;
-#if defined(TARGET_SPARC) || defined(TARGET_PPC)
     cpu_reset(cpu);
-#endif
     thread_cpu = cpu;
 
     if (getenv("QEMU_STRACE")) {
@@ -931,34 +484,31 @@ int main(int argc, char **argv)
     target_environ = envlist_to_environ(envlist, NULL);
     envlist_free(envlist);
 
+    if (reserved_va) {
+            mmap_next_start = reserved_va;
+    }
+
+    {
+        Error *err = NULL;
+        if (seed_optarg != NULL) {
+            qemu_guest_random_seed_main(seed_optarg, &err);
+        } else {
+            qcrypto_init(&err);
+        }
+        if (err) {
+            error_reportf_err(err, "cannot initialize crypto: ");
+            exit(1);
+        }
+    }
+
     /*
-     * Now that page sizes are configured in tcg_exec_init() we can do
+     * Now that page sizes are configured we can do
      * proper page alignment for guest_base.
      */
     guest_base = HOST_PAGE_ALIGN(guest_base);
 
-    /*
-     * Read in mmap_min_addr kernel parameter.  This value is used
-     * When loading the ELF image to determine whether guest_base
-     * is needed.
-     *
-     * When user has explicitly set the quest base, we skip this
-     * test.
-     */
-    if (!have_guest_base) {
-        FILE *fp;
-
-        if ((fp = fopen("/proc/sys/vm/mmap_min_addr", "r")) != NULL) {
-            unsigned long tmp;
-            if (fscanf(fp, "%lu", &tmp) == 1) {
-                mmap_min_addr = tmp;
-                qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n", mmap_min_addr);
-            }
-            fclose(fp);
-        }
-    }
-
-    if (loader_exec(filename, argv+optind, target_environ, regs, info) != 0) {
+    if (loader_exec(filename, argv + optind, target_environ, regs, info,
+                    &bprm) != 0) {
         printf("Error loading %s\n", filename);
         _exit(1);
     }
@@ -986,151 +536,25 @@ int main(int argc, char **argv)
         qemu_log("entry       0x" TARGET_ABI_FMT_lx "\n", info->entry);
     }
 
-    target_set_brk(info->brk);
-    syscall_init();
-    signal_init();
-
-    /* Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
-       generating the prologue until now so that the prologue can take
-       the real value of GUEST_BASE into account.  */
-    tcg_prologue_init(tcg_ctx);
-    tcg_region_init();
-
     /* build Task State */
-    memset(ts, 0, sizeof(TaskState));
+    ts = g_new0(TaskState, 1);
     init_task_state(ts);
     ts->info = info;
+    ts->bprm = &bprm;
     cpu->opaque = ts;
 
-#if defined(TARGET_I386)
-    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
-    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
-    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
-        env->cr[4] |= CR4_OSFXSR_MASK;
-        env->hflags |= HF_OSFXSR_MASK;
-    }
-#ifndef TARGET_ABI32
-    /* enable 64 bit mode if possible */
-    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
-        fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
-        exit(1);
-    }
-    env->cr[4] |= CR4_PAE_MASK;
-    env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
-    env->hflags |= HF_LMA_MASK;
-#endif
-
-    /* flags setup : we activate the IRQs by default as in user mode */
-    env->eflags |= IF_MASK;
-
-    /* linux register setup */
-#ifndef TARGET_ABI32
-    env->regs[R_EAX] = regs->rax;
-    env->regs[R_EBX] = regs->rbx;
-    env->regs[R_ECX] = regs->rcx;
-    env->regs[R_EDX] = regs->rdx;
-    env->regs[R_ESI] = regs->rsi;
-    env->regs[R_EDI] = regs->rdi;
-    env->regs[R_EBP] = regs->rbp;
-    env->regs[R_ESP] = regs->rsp;
-    env->eip = regs->rip;
-#else
-    env->regs[R_EAX] = regs->eax;
-    env->regs[R_EBX] = regs->ebx;
-    env->regs[R_ECX] = regs->ecx;
-    env->regs[R_EDX] = regs->edx;
-    env->regs[R_ESI] = regs->esi;
-    env->regs[R_EDI] = regs->edi;
-    env->regs[R_EBP] = regs->ebp;
-    env->regs[R_ESP] = regs->esp;
-    env->eip = regs->eip;
-#endif
+    target_set_brk(info->brk);
+    syscall_init();
+    signal_init();
 
-    /* linux interrupt setup */
-#ifndef TARGET_ABI32
-    env->idt.limit = 511;
-#else
-    env->idt.limit = 255;
-#endif
-    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
-                                PROT_READ|PROT_WRITE,
-                                MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-    idt_table = g2h_untagged(env->idt.base);
-    set_idt(0, 0);
-    set_idt(1, 0);
-    set_idt(2, 0);
-    set_idt(3, 3);
-    set_idt(4, 3);
-    set_idt(5, 0);
-    set_idt(6, 0);
-    set_idt(7, 0);
-    set_idt(8, 0);
-    set_idt(9, 0);
-    set_idt(10, 0);
-    set_idt(11, 0);
-    set_idt(12, 0);
-    set_idt(13, 0);
-    set_idt(14, 0);
-    set_idt(15, 0);
-    set_idt(16, 0);
-    set_idt(17, 0);
-    set_idt(18, 0);
-    set_idt(19, 0);
-    set_idt(0x80, 3);
-
-    /* linux segment setup */
-    {
-        uint64_t *gdt_table;
-        env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
-                                    PROT_READ|PROT_WRITE,
-                                    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-        env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
-        gdt_table = g2h_untagged(env->gdt.base);
-#ifdef TARGET_ABI32
-        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
-                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
-                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
-#else
-        /* 64 bit code segment */
-        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
-                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
-                 DESC_L_MASK |
-                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
-#endif
-        write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
-                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
-                 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
-    }
+    /*
+     * Now that we've loaded the binary, GUEST_BASE is fixed.  Delay
+     * generating the prologue until now so that the prologue can take
+     * the real value of GUEST_BASE into account.
+     */
+    tcg_prologue_init(tcg_ctx);
 
-    cpu_x86_load_seg(env, R_CS, __USER_CS);
-    cpu_x86_load_seg(env, R_SS, __USER_DS);
-#ifdef TARGET_ABI32
-    cpu_x86_load_seg(env, R_DS, __USER_DS);
-    cpu_x86_load_seg(env, R_ES, __USER_DS);
-    cpu_x86_load_seg(env, R_FS, __USER_DS);
-    cpu_x86_load_seg(env, R_GS, __USER_DS);
-    /* This hack makes Wine work... */
-    env->segs[R_FS].selector = 0;
-#else
-    cpu_x86_load_seg(env, R_DS, 0);
-    cpu_x86_load_seg(env, R_ES, 0);
-    cpu_x86_load_seg(env, R_FS, 0);
-    cpu_x86_load_seg(env, R_GS, 0);
-#endif
-#elif defined(TARGET_SPARC)
-    {
-        int i;
-        env->pc = regs->pc;
-        env->npc = regs->npc;
-        env->y = regs->y;
-        for(i = 0; i < 8; i++)
-            env->gregs[i] = regs->u_regs[i];
-        for(i = 0; i < 8; i++)
-            env->regwptr[i] = regs->u_regs[i + 8];
-    }
-#else
-#error unsupported target CPU
-#endif
+    target_cpu_init(env, regs);
 
     if (gdbstub) {
         gdbserver_start(gdbstub);
diff --git a/bsd-user/meson.build b/bsd-user/meson.build
index 03695493408..87885d91edc 100644
--- a/bsd-user/meson.build
+++ b/bsd-user/meson.build
@@ -1,3 +1,7 @@
+if not have_bsd_user
+   subdir_done()
+endif
+
 bsd_user_ss.add(files(
   'bsdload.c',
   'elfload.c',
@@ -8,3 +12,6 @@ bsd_user_ss.add(files(
   'syscall.c',
   'uaccess.c',
 ))
+
+# Pull in the OS-specific build glue, if any
+subdir(targetos)
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 01ec8080038..13cb32dba13 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -20,10 +20,6 @@
 
 #include "qemu.h"
 #include "qemu-common.h"
-#include "bsd-mman.h"
-#include "exec/exec-all.h"
-
-//#define DEBUG_MMAP
 
 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
@@ -69,14 +65,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
     abi_ulong end, host_start, host_end, addr;
     int prot1, ret;
 
-#ifdef DEBUG_MMAP
-    printf("mprotect: start=0x" TARGET_FMT_lx
-           " len=0x" TARGET_FMT_lx " prot=%c%c%c\n", start, len,
-           prot & PROT_READ ? 'r' : '-',
-           prot & PROT_WRITE ? 'w' : '-',
-           prot & PROT_EXEC ? 'x' : '-');
-#endif
-
+    qemu_log_mask(CPU_LOG_PAGE, "mprotect: start=0x" TARGET_ABI_FMT_lx
+                  " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
+                  prot & PROT_READ ? 'r' : '-',
+                  prot & PROT_WRITE ? 'w' : '-',
+                  prot & PROT_EXEC ? 'x' : '-');
     if ((start & ~TARGET_PAGE_MASK) != 0)
         return -EINVAL;
     len = TARGET_PAGE_ALIGN(len);
@@ -93,11 +86,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
     if (start > host_start) {
         /* handle host page containing start */
         prot1 = prot;
-        for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
+        for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
             prot1 |= page_get_flags(addr);
         }
         if (host_end == host_start + qemu_host_page_size) {
-            for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
+            for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
                 prot1 |= page_get_flags(addr);
             }
             end = host_end;
@@ -110,7 +103,7 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
     }
     if (end < host_end) {
         prot1 = prot;
-        for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
+        for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
             prot1 |= page_get_flags(addr);
         }
         ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
@@ -134,7 +127,27 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
     return ret;
 }
 
-/* map an incomplete host page */
+/*
+ * map an incomplete host page
+ *
+ * mmap_frag can be called with a valid fd, if flags doesn't contain one of
+ * MAP_ANON, MAP_STACK, MAP_GUARD. If we need to map a page in those cases, we
+ * pass fd == -1. However, if flags contains MAP_GUARD then MAP_ANON cannot be
+ * added.
+ *
+ * * If fd is valid (not -1) we want to map the pages with MAP_ANON.
+ * * If flags contains MAP_GUARD we don't want to add MAP_ANON because it
+ *   will be rejected.  See kern_mmap's enforcing of constraints for MAP_GUARD
+ *   in sys/vm/vm_mmap.c.
+ * * If flags contains MAP_ANON it doesn't matter if we add it or not.
+ * * If flags contains MAP_STACK, mmap adds MAP_ANON when called so doesn't
+ *   matter if we add it or not either. See enforcing of constraints for
+ *   MAP_STACK in kern_mmap.
+ *
+ * Don't add MAP_ANON for the flags that use fd == -1 without specifying the
+ * flags directly, with the assumption that future flags that require fd == -1
+ * will also not require MAP_ANON.
+ */
 static int mmap_frag(abi_ulong real_start,
                      abi_ulong start, abi_ulong end,
                      int prot, int flags, int fd, abi_ulong offset)
@@ -148,15 +161,15 @@ static int mmap_frag(abi_ulong real_start,
 
     /* get the protection of the target pages outside the mapping */
     prot1 = 0;
-    for(addr = real_start; addr < real_end; addr++) {
+    for (addr = real_start; addr < real_end; addr++) {
         if (addr < start || addr >= end)
             prot1 |= page_get_flags(addr);
     }
 
     if (prot1 == 0) {
-        /* no page was there, so we allocate one */
+        /* no page was there, so we allocate one. See also above. */
         void *p = mmap(host_start, qemu_host_page_size, prot,
-                       flags | MAP_ANON, -1, 0);
+                       flags | ((fd != -1) ? MAP_ANON : 0), -1, 0);
         if (p == MAP_FAILED)
             return -1;
         prot1 = prot;
@@ -164,7 +177,7 @@ static int mmap_frag(abi_ulong real_start,
     prot1 &= PAGE_BITS;
 
     prot_new = prot | prot1;
-    if (!(flags & MAP_ANON)) {
+    if (fd != -1) {
         /* msync() won't work here, so we return an error if write is
            possible while it is a shared mapping */
         if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
@@ -176,144 +189,379 @@ static int mmap_frag(abi_ulong real_start,
             mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
 
         /* read the corresponding file data */
-        pread(fd, g2h_untagged(start), end - start, offset);
+        if (pread(fd, g2h_untagged(start), end - start, offset) == -1) {
+            return -1;
+        }
 
         /* put final protection */
         if (prot_new != (prot1 | PROT_WRITE))
             mprotect(host_start, qemu_host_page_size, prot_new);
     } else {
-        /* just update the protection */
         if (prot_new != prot1) {
             mprotect(host_start, qemu_host_page_size, prot_new);
         }
+        if (prot_new & PROT_WRITE) {
+            memset(g2h_untagged(start), 0, end - start);
+        }
     }
     return 0;
 }
 
-static abi_ulong mmap_next_start = 0x40000000;
+#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
+# define TASK_UNMAPPED_BASE  (1ul << 38)
+#else
+# define TASK_UNMAPPED_BASE  0x40000000
+#endif
+abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
 
 unsigned long last_brk;
 
-/* find a free memory area of size 'size'. The search starts at
-   'start'. If 'start' == 0, then a default start address is used.
-   Return -1 if error.
-*/
-/* page_init() marks pages used by the host as reserved to be sure not
-   to use them. */
-static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
+/*
+ * Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest
+ * address space.
+ */
+static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
+                                        abi_ulong alignment)
 {
-    abi_ulong addr, addr1, addr_start;
+    abi_ulong addr;
+    abi_ulong end_addr;
     int prot;
-    unsigned long new_brk;
+    int looped = 0;
+
+    if (size > reserved_va) {
+        return (abi_ulong)-1;
+    }
+
+    size = HOST_PAGE_ALIGN(size) + alignment;
+    end_addr = start + size;
+    if (end_addr > reserved_va) {
+        end_addr = reserved_va;
+    }
+    addr = end_addr - qemu_host_page_size;
 
-    new_brk = (unsigned long)sbrk(0);
-    if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) {
-        /* This is a hack to catch the host allocating memory with brk().
-           If it uses mmap then we loose.
-           FIXME: We really want to avoid the host allocating memory in
-           the first place, and maybe leave some slack to avoid switching
-           to mmap.  */
-        page_set_flags(last_brk & TARGET_PAGE_MASK,
-                       TARGET_PAGE_ALIGN(new_brk),
-                       PAGE_RESERVED);
+    while (1) {
+        if (addr > end_addr) {
+            if (looped) {
+                return (abi_ulong)-1;
+            }
+            end_addr = reserved_va;
+            addr = end_addr - qemu_host_page_size;
+            looped = 1;
+            continue;
+        }
+        prot = page_get_flags(addr);
+        if (prot) {
+            end_addr = addr;
+        }
+        if (end_addr - addr >= size) {
+            break;
+        }
+        addr -= qemu_host_page_size;
+    }
+
+    if (start == mmap_next_start) {
+        mmap_next_start = addr;
+    }
+    /* addr is sufficiently low to align it up */
+    if (alignment != 0) {
+        addr = (addr + alignment) & ~(alignment - 1);
+    }
+    return addr;
+}
+
+/*
+ * Find and reserve a free memory area of size 'size'. The search
+ * starts at 'start'.
+ * It must be called with mmap_lock() held.
+ * Return -1 if error.
+ */
+static abi_ulong mmap_find_vma_aligned(abi_ulong start, abi_ulong size,
+                                       abi_ulong alignment)
+{
+    void *ptr, *prev;
+    abi_ulong addr;
+    int flags;
+    int wrapped, repeat;
+
+    /* If 'start' == 0, then a default start address is used. */
+    if (start == 0) {
+        start = mmap_next_start;
+    } else {
+        start &= qemu_host_page_mask;
     }
-    last_brk = new_brk;
 
     size = HOST_PAGE_ALIGN(size);
-    start = start & qemu_host_page_mask;
+
+    if (reserved_va) {
+        return mmap_find_vma_reserved(start, size,
+            (alignment != 0 ? 1 << alignment : 0));
+    }
+
     addr = start;
-    if (addr == 0)
-        addr = mmap_next_start;
-    addr_start = addr;
-    for(;;) {
-        prot = 0;
-        for(addr1 = addr; addr1 < (addr + size); addr1 += TARGET_PAGE_SIZE) {
-            prot |= page_get_flags(addr1);
+    wrapped = repeat = 0;
+    prev = 0;
+    flags = MAP_ANON | MAP_PRIVATE;
+    if (alignment != 0) {
+        flags |= MAP_ALIGNED(alignment);
+    }
+
+    for (;; prev = ptr) {
+        /*
+         * Reserve needed memory area to avoid a race.
+         * It should be discarded using:
+         *  - mmap() with MAP_FIXED flag
+         *  - mremap() with MREMAP_FIXED flag
+         *  - shmat() with SHM_REMAP flag
+         */
+        ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
+                   flags, -1, 0);
+
+        /* ENOMEM, if host address space has no memory */
+        if (ptr == MAP_FAILED) {
+            return (abi_ulong)-1;
         }
-        if (prot == 0)
-            break;
-        addr += qemu_host_page_size;
-        /* we found nothing */
-        if (addr == addr_start)
+
+        /*
+         * Count the number of sequential returns of the same address.
+         * This is used to modify the search algorithm below.
+         */
+        repeat = (ptr == prev ? repeat + 1 : 0);
+
+        if (h2g_valid(ptr + size - 1)) {
+            addr = h2g(ptr);
+
+            if ((addr & ~TARGET_PAGE_MASK) == 0) {
+                /* Success.  */
+                if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+                    mmap_next_start = addr + size;
+                }
+                return addr;
+            }
+
+            /* The address is not properly aligned for the target.  */
+            switch (repeat) {
+            case 0:
+                /*
+                 * Assume the result that the kernel gave us is the
+                 * first with enough free space, so start again at the
+                 * next higher target page.
+                 */
+                addr = TARGET_PAGE_ALIGN(addr);
+                break;
+            case 1:
+                /*
+                 * Sometimes the kernel decides to perform the allocation
+                 * at the top end of memory instead.
+                 */
+                addr &= TARGET_PAGE_MASK;
+                break;
+            case 2:
+                /* Start over at low memory.  */
+                addr = 0;
+                break;
+            default:
+                /* Fail.  This unaligned block must the last.  */
+                addr = -1;
+                break;
+            }
+        } else {
+            /*
+             * Since the result the kernel gave didn't fit, start
+             * again at low memory.  If any repetition, fail.
+             */
+            addr = (repeat ? -1 : 0);
+        }
+
+        /* Unmap and try again.  */
+        munmap(ptr, size);
+
+        /* ENOMEM if we checked the whole of the target address space.  */
+        if (addr == (abi_ulong)-1) {
             return (abi_ulong)-1;
+        } else if (addr == 0) {
+            if (wrapped) {
+                return (abi_ulong)-1;
+            }
+            wrapped = 1;
+            /*
+             * Don't actually use 0 when wrapping, instead indicate
+             * that we'd truly like an allocation in low memory.
+             */
+            addr = TARGET_PAGE_SIZE;
+        } else if (wrapped && addr >= start) {
+            return (abi_ulong)-1;
+        }
     }
-    if (start == 0)
-        mmap_next_start = addr + size;
-    return addr;
+}
+
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
+{
+    return mmap_find_vma_aligned(start, size, 0);
 }
 
 /* NOTE: all the constants are the HOST ones */
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
-                     int flags, int fd, abi_ulong offset)
+                     int flags, int fd, off_t offset)
 {
     abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
-    unsigned long host_start;
 
     mmap_lock();
-#ifdef DEBUG_MMAP
-    {
-        printf("mmap: start=0x" TARGET_FMT_lx
-               " len=0x" TARGET_FMT_lx " prot=%c%c%c flags=",
-               start, len,
-               prot & PROT_READ ? 'r' : '-',
-               prot & PROT_WRITE ? 'w' : '-',
-               prot & PROT_EXEC ? 'x' : '-');
-        if (flags & MAP_FIXED)
-            printf("MAP_FIXED ");
-        if (flags & MAP_ANON)
-            printf("MAP_ANON ");
-        switch(flags & TARGET_BSD_MAP_FLAGMASK) {
-        case MAP_PRIVATE:
-            printf("MAP_PRIVATE ");
-            break;
-        case MAP_SHARED:
-            printf("MAP_SHARED ");
-            break;
-        default:
-            printf("[MAP_FLAGMASK=0x%x] ", flags & TARGET_BSD_MAP_FLAGMASK);
-            break;
+    if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
+        qemu_log("mmap: start=0x" TARGET_ABI_FMT_lx
+                 " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
+                 start, len,
+                 prot & PROT_READ ? 'r' : '-',
+                 prot & PROT_WRITE ? 'w' : '-',
+                 prot & PROT_EXEC ? 'x' : '-');
+        if (flags & MAP_ALIGNMENT_MASK) {
+            qemu_log("MAP_ALIGNED(%u) ",
+                     (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
+        }
+        if (flags & MAP_GUARD) {
+            qemu_log("MAP_GUARD ");
+        }
+        if (flags & MAP_FIXED) {
+            qemu_log("MAP_FIXED ");
+        }
+        if (flags & MAP_ANON) {
+            qemu_log("MAP_ANON ");
         }
-        printf("fd=%d offset=" TARGET_FMT_lx "\n", fd, offset);
+        if (flags & MAP_EXCL) {
+            qemu_log("MAP_EXCL ");
+        }
+        if (flags & MAP_PRIVATE) {
+            qemu_log("MAP_PRIVATE ");
+        }
+        if (flags & MAP_SHARED) {
+            qemu_log("MAP_SHARED ");
+        }
+        if (flags & MAP_NOCORE) {
+            qemu_log("MAP_NOCORE ");
+        }
+        if (flags & MAP_STACK) {
+            qemu_log("MAP_STACK ");
+        }
+        qemu_log("fd=%d offset=0x%lx\n", fd, offset);
+    }
+
+    if ((flags & MAP_ANON) && fd != -1) {
+        errno = EINVAL;
+        goto fail;
+    }
+    if (flags & MAP_STACK) {
+        if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) !=
+                    (PROT_READ | PROT_WRITE))) {
+            errno = EINVAL;
+            goto fail;
+        }
+    }
+    if ((flags & MAP_GUARD) && (prot != PROT_NONE || fd != -1 ||
+        offset != 0 || (flags & (MAP_SHARED | MAP_PRIVATE |
+        /* MAP_PREFAULT | */ /* MAP_PREFAULT not in mman.h */
+        MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0)) {
+        errno = EINVAL;
+        goto fail;
     }
-#endif
 
     if (offset & ~TARGET_PAGE_MASK) {
         errno = EINVAL;
         goto fail;
     }
 
+    if (len == 0) {
+        errno = EINVAL;
+        goto fail;
+    }
+
+    /* Check for overflows */
     len = TARGET_PAGE_ALIGN(len);
-    if (len == 0)
-        goto the_end;
+    if (len == 0) {
+        errno = ENOMEM;
+        goto fail;
+    }
+
     real_start = start & qemu_host_page_mask;
+    host_offset = offset & qemu_host_page_mask;
 
+    /*
+     * If the user is asking for the kernel to find a location, do that
+     * before we truncate the length for mapping files below.
+     */
     if (!(flags & MAP_FIXED)) {
-        abi_ulong mmap_start;
-        void *p;
-        host_offset = offset & qemu_host_page_mask;
         host_len = len + offset - host_offset;
         host_len = HOST_PAGE_ALIGN(host_len);
-        mmap_start = mmap_find_vma(real_start, host_len);
-        if (mmap_start == (abi_ulong)-1) {
+        if ((flags & MAP_ALIGNMENT_MASK) != 0)
+            start = mmap_find_vma_aligned(real_start, host_len,
+                (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
+        else
+            start = mmap_find_vma(real_start, host_len);
+        if (start == (abi_ulong)-1) {
             errno = ENOMEM;
             goto fail;
         }
-        /* Note: we prefer to control the mapping address. It is
-           especially important if qemu_host_page_size >
-           qemu_real_host_page_size */
-        p = mmap(g2h_untagged(mmap_start),
-                 host_len, prot, flags | MAP_FIXED, fd, host_offset);
+    }
+
+    /*
+     * When mapping files into a memory area larger than the file, accesses
+     * to pages beyond the file size will cause a SIGBUS.
+     *
+     * For example, if mmaping a file of 100 bytes on a host with 4K pages
+     * emulating a target with 8K pages, the target expects to be able to
+     * access the first 8K. But the host will trap us on any access beyond
+     * 4K.
+     *
+     * When emulating a target with a larger page-size than the hosts, we
+     * may need to truncate file maps at EOF and add extra anonymous pages
+     * up to the targets page boundary.
+     */
+
+    if ((qemu_real_host_page_size < qemu_host_page_size) && fd != -1) {
+        struct stat sb;
+
+        if (fstat(fd, &sb) == -1) {
+            goto fail;
+        }
+
+        /* Are we trying to create a map beyond EOF?.  */
+        if (offset + len > sb.st_size) {
+            /*
+             * If so, truncate the file map at eof aligned with
+             * the hosts real pagesize. Additional anonymous maps
+             * will be created beyond EOF.
+             */
+            len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
+        }
+    }
+
+    if (!(flags & MAP_FIXED)) {
+        unsigned long host_start;
+        void *p;
+
+        host_len = len + offset - host_offset;
+        host_len = HOST_PAGE_ALIGN(host_len);
+
+        /*
+         * Note: we prefer to control the mapping address. It is
+         * especially important if qemu_host_page_size >
+         * qemu_real_host_page_size
+         */
+        p = mmap(g2h_untagged(start), host_len, prot,
+                 flags | MAP_FIXED | ((fd != -1) ? MAP_ANON : 0), -1, 0);
         if (p == MAP_FAILED)
             goto fail;
         /* update start so that it points to the file position at 'offset' */
         host_start = (unsigned long)p;
-        if (!(flags & MAP_ANON))
+        if (fd != -1) {
+            p = mmap(g2h_untagged(start), len, prot,
+                     flags | MAP_FIXED, fd, host_offset);
+            if (p == MAP_FAILED) {
+                munmap(g2h_untagged(start), host_len);
+                goto fail;
+            }
             host_start += offset - host_offset;
+        }
         start = h2g(host_start);
     } else {
-        int flg;
-        target_ulong addr;
-
         if (start & ~TARGET_PAGE_MASK) {
             errno = EINVAL;
             goto fail;
@@ -321,20 +569,26 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         end = start + len;
         real_end = HOST_PAGE_ALIGN(end);
 
-        for(addr = real_start; addr < real_end; addr += TARGET_PAGE_SIZE) {
-            flg = page_get_flags(addr);
-            if (flg & PAGE_RESERVED) {
-                errno = ENXIO;
-                goto fail;
-            }
+        /*
+         * Test if requested memory area fits target address space
+         * It can fail only on 64-bit host with 32-bit target.
+         * On any other target/host host mmap() handles this error correctly.
+         */
+        if (!guest_range_valid_untagged(start, len)) {
+            errno = EINVAL;
+            goto fail;
         }
 
-        /* worst case: we cannot map the file because the offset is not
-           aligned, so we read it */
-        if (!(flags & MAP_ANON) &&
+        /*
+         * worst case: we cannot map the file because the offset is not
+         * aligned, so we read it
+         */
+        if (fd != -1 &&
             (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
-            /* msync() won't work here, so we return an error if write is
-               possible while it is a shared mapping */
+            /*
+             * msync() won't work here, so we return an error if write is
+             * possible while it is a shared mapping
+             */
             if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
                 (prot & PROT_WRITE)) {
                 errno = EINVAL;
@@ -345,17 +599,22 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
                                   -1, 0);
             if (retaddr == -1)
                 goto fail;
-            pread(fd, g2h_untagged(start), len, offset);
+            if (pread(fd, g2h_untagged(start), len, offset) == -1) {
+                goto fail;
+            }
             if (!(prot & PROT_WRITE)) {
                 ret = target_mprotect(start, len, prot);
-                if (ret != 0) {
-                    start = ret;
-                    goto the_end;
-                }
+                assert(ret == 0);
             }
             goto the_end;
         }
 
+        /* Reject the mapping if any page within the range is mapped */
+        if ((flags & MAP_EXCL) && page_check_range(start, len, 0) < 0) {
+            errno = EINVAL;
+            goto fail;
+        }
+
         /* handle the start of the mapping */
         if (start > real_start) {
             if (real_end == real_start + qemu_host_page_size) {
@@ -375,7 +634,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
         /* handle the end of the mapping */
         if (end < real_end) {
             ret = mmap_frag(real_end - qemu_host_page_size,
-                            real_end - qemu_host_page_size, real_end,
+                            real_end - qemu_host_page_size, end,
                             prot, flags, fd,
                             offset + real_end - qemu_host_page_size - start);
             if (ret == -1)
@@ -401,10 +660,11 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
     page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
-    printf("ret=0x" TARGET_FMT_lx "\n", start);
+    printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
     page_dump(stdout);
     printf("\n");
 #endif
+    tb_invalidate_phys_range(start, start + len);
     mmap_unlock();
     return start;
 fail:
@@ -412,13 +672,57 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
     return -1;
 }
 
+static void mmap_reserve(abi_ulong start, abi_ulong size)
+{
+    abi_ulong real_start;
+    abi_ulong real_end;
+    abi_ulong addr;
+    abi_ulong end;
+    int prot;
+
+    real_start = start & qemu_host_page_mask;
+    real_end = HOST_PAGE_ALIGN(start + size);
+    end = start + size;
+    if (start > real_start) {
+        /* handle host page containing start */
+        prot = 0;
+        for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
+            prot |= page_get_flags(addr);
+        }
+        if (real_end == real_start + qemu_host_page_size) {
+            for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+                prot |= page_get_flags(addr);
+            }
+            end = real_end;
+        }
+        if (prot != 0) {
+            real_start += qemu_host_page_size;
+        }
+    }
+    if (end < real_end) {
+        prot = 0;
+        for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+            prot |= page_get_flags(addr);
+        }
+        if (prot != 0) {
+            real_end -= qemu_host_page_size;
+        }
+    }
+    if (real_start != real_end) {
+        mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
+                 MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+    }
+}
+
 int target_munmap(abi_ulong start, abi_ulong len)
 {
     abi_ulong end, real_start, real_end, addr;
     int prot, ret;
 
 #ifdef DEBUG_MMAP
-    printf("munmap: start=0x%lx len=0x%lx\n", start, len);
+    printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
+           TARGET_ABI_FMT_lx "\n",
+           start, len);
 #endif
     if (start & ~TARGET_PAGE_MASK)
         return -EINVAL;
@@ -433,11 +737,11 @@ int target_munmap(abi_ulong start, abi_ulong len)
     if (start > real_start) {
         /* handle host page containing start */
         prot = 0;
-        for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
+        for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
             prot |= page_get_flags(addr);
         }
         if (real_end == real_start + qemu_host_page_size) {
-            for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+            for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
                 prot |= page_get_flags(addr);
             }
             end = real_end;
@@ -447,7 +751,7 @@ int target_munmap(abi_ulong start, abi_ulong len)
     }
     if (end < real_end) {
         prot = 0;
-        for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
+        for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
             prot |= page_get_flags(addr);
         }
         if (prot != 0)
@@ -457,11 +761,17 @@ int target_munmap(abi_ulong start, abi_ulong len)
     ret = 0;
     /* unmap what we can */
     if (real_start < real_end) {
-        ret = munmap(g2h_untagged(real_start), real_end - real_start);
+        if (reserved_va) {
+            mmap_reserve(real_start, real_end - real_start);
+        } else {
+            ret = munmap(g2h_untagged(real_start), real_end - real_start);
+        }
     }
 
-    if (ret == 0)
+    if (ret == 0) {
         page_set_flags(start, start + len, 0);
+        tb_invalidate_phys_range(start, start + len);
+    }
     mmap_unlock();
     return ret;
 }
diff --git a/bsd-user/netbsd/host-os.h b/bsd-user/netbsd/host-os.h
new file mode 100644
index 00000000000..c0be51a7ef4
--- /dev/null
+++ b/bsd-user/netbsd/host-os.h
@@ -0,0 +1,25 @@
+/*
+ *  NetBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HOST_OS_H_
+#define _HOST_OS_H_
+
+#define HOST_DEFAULT_BSD_TYPE target_netbsd
+
+#endif /*!_HOST_OS_H_ */
diff --git a/bsd-user/netbsd/target_os_elf.h b/bsd-user/netbsd/target_os_elf.h
new file mode 100644
index 00000000000..21b475f458c
--- /dev/null
+++ b/bsd-user/netbsd/target_os_elf.h
@@ -0,0 +1,146 @@
+/*
+ *  netbsd ELF definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_OS_ELF_H_
+#define _TARGET_OS_ELF_H_
+
+#include "target_arch_elf.h"
+#include "elf.h"
+
+/* this flag is uneffective under linux too, should be deleted */
+#ifndef MAP_DENYWRITE
+#define MAP_DENYWRITE 0
+#endif
+
+/* should probably go in elf.h */
+#ifndef ELIBBAD
+#define ELIBBAD 80
+#endif
+
+#ifndef ELF_PLATFORM
+#define ELF_PLATFORM (NULL)
+#endif
+
+#ifndef ELF_HWCAP
+#define ELF_HWCAP 0
+#endif
+
+#ifdef TARGET_ABI32
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+#undef bswaptls
+#define bswaptls(ptr) bswap32s(ptr)
+#endif
+
+/* max code+data+bss space allocated to elf interpreter */
+#define INTERP_MAP_SIZE (32 * 1024 * 1024)
+
+/* max code+data+bss+brk space allocated to ET_DYN executables */
+#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
+
+/* Necessary parameters */
+#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
+#define TARGET_ELF_PAGESTART(_v) ((_v) & \
+        ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1))
+#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1))
+
+#define DLINFO_ITEMS 12
+
+static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc,
+                                          abi_ulong stringp,
+                                          struct elfhdr *exec,
+                                          abi_ulong load_addr,
+                                          abi_ulong load_bias,
+                                          abi_ulong interp_load_addr,
+                                          struct image_info *info)
+{
+        abi_ulong sp;
+        int size;
+        abi_ulong u_platform;
+        const char *k_platform;
+        const int n = sizeof(elf_addr_t);
+
+        sp = p;
+        u_platform = 0;
+        k_platform = ELF_PLATFORM;
+        if (k_platform) {
+            size_t len = strlen(k_platform) + 1;
+            sp -= (len + n - 1) & ~(n - 1);
+            u_platform = sp;
+            /* FIXME - check return value of memcpy_to_target() for failure */
+            memcpy_to_target(sp, k_platform, len);
+        }
+        /*
+         * Force 16 byte _final_ alignment here for generality.
+         */
+        sp = sp & ~(abi_ulong)15;
+        size = (DLINFO_ITEMS + 1) * 2;
+        if (k_platform) {
+            size += 2;
+        }
+#ifdef DLINFO_ARCH_ITEMS
+        size += DLINFO_ARCH_ITEMS * 2;
+#endif
+        size += envc + argc + 2;
+        size += 1;                         /* argc itself */
+        size *= n;
+        if (size & 15) {
+            sp -= 16 - (size & 15);
+        }
+
+        /*
+         * NetBSD defines elf_addr_t as Elf32_Off / Elf64_Off
+         */
+#define NEW_AUX_ENT(id, val) do {               \
+            sp -= n; put_user_ual(val, sp);     \
+            sp -= n; put_user_ual(id, sp);      \
+          } while (0)
+
+        NEW_AUX_ENT(AT_NULL, 0);
+
+        /* There must be exactly DLINFO_ITEMS entries here.  */
+        NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff));
+        NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr)));
+        NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
+        NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
+        NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr));
+        NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
+        NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
+        NEW_AUX_ENT(AT_UID, (abi_ulong)getuid());
+        NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid());
+        NEW_AUX_ENT(AT_GID, (abi_ulong)getgid());
+        NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid());
+        NEW_AUX_ENT(AT_HWCAP, (abi_ulong)ELF_HWCAP);
+        NEW_AUX_ENT(AT_CLKTCK, (abi_ulong)sysconf(_SC_CLK_TCK));
+        if (k_platform) {
+            NEW_AUX_ENT(AT_PLATFORM, u_platform);
+        }
+#ifdef ARCH_DLINFO
+        /*
+         * ARCH_DLINFO must come last so platform specific code can enforce
+         * special alignment requirements on the AUXV if necessary (eg. PPC).
+         */
+        ARCH_DLINFO;
+#endif
+#undef NEW_AUX_ENT
+
+        sp = loader_build_argptr(envc, argc, sp, stringp);
+        return sp;
+}
+
+#endif /* _TARGET_OS_ELF_H_ */
diff --git a/bsd-user/netbsd/target_os_siginfo.h b/bsd-user/netbsd/target_os_siginfo.h
new file mode 100644
index 00000000000..667c19cc7ce
--- /dev/null
+++ b/bsd-user/netbsd/target_os_siginfo.h
@@ -0,0 +1,82 @@
+#ifndef _TARGET_OS_SIGINFO_H_
+#define _TARGET_OS_SIGINFO_H_
+
+#define TARGET_NSIG     32  /* counting 0; could be 33 (mask is 1-32) */
+#define TARGET_NSIG_BPW     (sizeof(uint32_t) * 8)
+#define TARGET_NSIG_WORDS   (TARGET_NSIG / TARGET_NSIG_BPW)
+
+/* this struct defines a stack used during syscall handling */
+typedef struct target_sigaltstack {
+    abi_long    ss_sp;
+    abi_ulong   ss_size;
+    abi_long    ss_flags;
+} target_stack_t;
+
+typedef struct {
+    uint32_t __bits[TARGET_NSIG_WORDS];
+} target_sigset_t
+
+struct target_sigaction {
+    abi_ulong   _sa_handler;
+    int32_t     sa_flags;
+    target_sigset_t sa_mask;
+};
+
+/* Compare to sys/siginfo.h */
+typedef union target_sigval {
+    int         sival_int;
+    abi_ulong   sival_ptr;
+} target_sigval_t;
+
+struct target_ksiginfo {
+    int32_t     _signo;
+    int32_t     _code;
+    int32_t     _errno;
+#if TARGET_ABI_BITS == 64
+    int32_t     _pad;
+#endif
+    union {
+        struct {
+            int32_t             _pid;
+            int32_t             _uid;
+            target_sigval_t    _value;
+        } _rt;
+
+        struct {
+            int32_t             _pid;
+            int32_t             _uid;
+            int32_t             _struct;
+            /* clock_t          _utime; */
+            /* clock_t          _stime; */
+        } _child;
+
+        struct {
+            abi_ulong           _addr;
+            int32_t             _trap;
+        } _fault;
+
+        struct {
+            long                _band;
+            int                 _fd;
+        } _poll;
+    } _reason;
+};
+
+typedef union target_siginfo {
+    int8_t     si_pad[128];
+    struct     target_ksiginfo  _info;
+} target_siginfo_t;
+
+#define target_si_signo     _info._signo
+#define target_si_code      _info._code
+#define target_si_errno     _info._errno
+#define target_si_addr      _info._reason._fault._addr
+
+#define TARGET_SEGV_MAPERR  1
+#define TARGET_SEGV_ACCERR  2
+
+#define TARGET_TRAP_BRKPT   1
+#define TARGET_TRAP_TRACE   2
+
+
+#endif /* ! _TARGET_OS_SIGINFO_H_ */
diff --git a/bsd-user/netbsd/target_os_signal.h b/bsd-user/netbsd/target_os_signal.h
new file mode 100644
index 00000000000..a373922f7e8
--- /dev/null
+++ b/bsd-user/netbsd/target_os_signal.h
@@ -0,0 +1,69 @@
+#ifndef _TARGET_OS_SIGNAL_H_
+#define _TARGET_OS_SIGNAL_H_
+
+#include "target_os_siginfo.h"
+#include "target_arch_signal.h"
+
+#define TARGET_SIGHUP  1       /* hangup */
+#define TARGET_SIGINT  2       /* interrupt */
+#define TARGET_SIGQUIT 3       /* quit */
+#define TARGET_SIGILL  4       /* illegal instruction (not reset when caught) */
+#define TARGET_SIGTRAP 5       /* trace trap (not reset when caught) */
+#define TARGET_SIGABRT 6       /* abort() */
+#define TARGET_SIGIOT  SIGABRT /* compatibility */
+#define TARGET_SIGEMT  7       /* EMT instruction */
+#define TARGET_SIGFPE  8       /* floating point exception */
+#define TARGET_SIGKILL 9       /* kill (cannot be caught or ignored) */
+#define TARGET_SIGBUS  10      /* bus error */
+#define TARGET_SIGSEGV 11      /* segmentation violation */
+#define TARGET_SIGSYS  12      /* bad argument to system call */
+#define TARGET_SIGPIPE 13      /* write on a pipe with no one to read it */
+#define TARGET_SIGALRM 14      /* alarm clock */
+#define TARGET_SIGTERM 15      /* software termination signal from kill */
+#define TARGET_SIGURG  16      /* urgent condition on IO channel */
+#define TARGET_SIGSTOP 17      /* sendable stop signal not from tty */
+#define TARGET_SIGTSTP 18      /* stop signal from tty */
+#define TARGET_SIGCONT 19      /* continue a stopped process */
+#define TARGET_SIGCHLD 20      /* to parent on child stop or exit */
+#define TARGET_SIGTTIN 21      /* to readers pgrp upon background tty read */
+#define TARGET_SIGTTOU 22      /* like TTIN for out if (tp->t_local&LTOSTOP) */
+#define TARGET_SIGIO   23      /* input/output possible signal */
+#define TARGET_SIGXCPU 24      /* exceeded CPU time limit */
+#define TARGET_SIGXFSZ 25      /* exceeded file size limit */
+#define TARGET_SIGVTALRM 26    /* virtual time alarm */
+#define TARGET_SIGPROF   27    /* profiling time alarm */
+#define TARGET_SIGWINCH  28    /* window size changes */
+#define TARGET_SIGINFO   29    /* information request */
+#define TARGET_SIGUSR1   30    /* user defined signal 1 */
+#define TARGET_SIGUSR2   31    /* user defined signal 2 */
+
+/*
+ * Language spec says we must list exactly one parameter, even though we
+ * actually supply three.  Ugh!
+ */
+#define TARGET_SIG_DFL         ((void (*)(int))0)
+#define TARGET_SIG_IGN         ((void (*)(int))1)
+#define TARGET_SIG_ERR         ((void (*)(int))-1)
+
+#define TARGET_SA_ONSTACK   0x0001  /* take signal on signal stack */
+#define TARGET_SA_RESTART   0x0002  /* restart system on signal return */
+#define TARGET_SA_RESETHAND 0x0004  /* reset to SIG_DFL when taking signal */
+#define TARGET_SA_NODEFER   0x0010  /* don't mask the signal we're delivering */
+#define TARGET_SA_NOCLDWAIT 0x0020  /* don't create zombies (assign to pid 1) */
+#define TARGET_SA_USERTRAMP 0x0100  /* do not bounce off kernel's sigtramp */
+#define TARGET_SA_NOCLDSTOP 0x0008  /* do not generate SIGCHLD on child stop */
+#define TARGET_SA_SIGINFO   0x0040  /* generate siginfo_t */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define TARGET_SIG_BLOCK       1       /* block specified signal set */
+#define TARGET_SIG_UNBLOCK     2       /* unblock specified signal set */
+#define TARGET_SIG_SETMASK     3       /* set specified signal set */
+
+#define TARGET_BADSIG       SIG_ERR
+
+#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */
+#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */
+
+#endif /* !_TARGET_OS_SIGNAL_H_ */
diff --git a/bsd-user/netbsd/target_os_stack.h b/bsd-user/netbsd/target_os_stack.h
new file mode 100644
index 00000000000..503279c1a90
--- /dev/null
+++ b/bsd-user/netbsd/target_os_stack.h
@@ -0,0 +1,56 @@
+/*
+ *  NetBSD setup_initial_stack() implementation.
+ *
+ *  Copyright (c) 2013-14 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_STACK_H_
+#define _TARGET_OS_STACK_H_
+
+#include "target_arch_sigtramp.h"
+
+static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p,
+    abi_ulong *stringp)
+{
+    int i;
+    abi_ulong stack_base;
+
+    stack_base = (target_stkbas + target_stksiz) -
+                  MAX_ARG_PAGES * TARGET_PAGE_SIZE;
+    if (p) {
+        *p = stack_base;
+    }
+    if (stringp) {
+        *stringp = stack_base;
+    }
+
+    for (i = 0; i < MAX_ARG_PAGES; i++) {
+        if (bprm->page[i]) {
+            info->rss++;
+            if (!memcpy_to_target(stack_base, bprm->page[i],
+                        TARGET_PAGE_SIZE)) {
+                errno = EFAULT;
+                return -1;
+            }
+            g_free(bprm->page[i]);
+        }
+        stack_base += TARGET_PAGE_SIZE;
+    }
+
+    return 0;
+}
+
+#endif /* !_TARGET_OS_STACK_H_ */
diff --git a/bsd-user/netbsd/target_os_thread.h b/bsd-user/netbsd/target_os_thread.h
new file mode 100644
index 00000000000..904dd1bf782
--- /dev/null
+++ b/bsd-user/netbsd/target_os_thread.h
@@ -0,0 +1,25 @@
+/*
+ *  NetBSD thread dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_THREAD_H_
+#define _TARGET_OS_THREAD_H_
+
+#include "target_arch_thread.h"
+
+#endif /* !_TARGET_OS_THREAD_H_ */
diff --git a/bsd-user/openbsd/host-os.h b/bsd-user/openbsd/host-os.h
new file mode 100644
index 00000000000..eb8fdf15679
--- /dev/null
+++ b/bsd-user/openbsd/host-os.h
@@ -0,0 +1,25 @@
+/*
+ *  OpenBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _HOST_OS_H_
+#define _HOST_OS_H_
+
+#define HOST_DEFAULT_BSD_TYPE target_openbsd
+
+#endif /*!_HOST_OS_H_ */
diff --git a/bsd-user/openbsd/target_os_elf.h b/bsd-user/openbsd/target_os_elf.h
new file mode 100644
index 00000000000..a5cfcd3aff8
--- /dev/null
+++ b/bsd-user/openbsd/target_os_elf.h
@@ -0,0 +1,146 @@
+/*
+ *  openbsd ELF definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_OS_ELF_H_
+#define _TARGET_OS_ELF_H_
+
+#include "target_arch_elf.h"
+#include "elf.h"
+
+/* this flag is uneffective under linux too, should be deleted */
+#ifndef MAP_DENYWRITE
+#define MAP_DENYWRITE 0
+#endif
+
+/* should probably go in elf.h */
+#ifndef ELIBBAD
+#define ELIBBAD 80
+#endif
+
+#ifndef ELF_PLATFORM
+#define ELF_PLATFORM (NULL)
+#endif
+
+#ifndef ELF_HWCAP
+#define ELF_HWCAP 0
+#endif
+
+#ifdef TARGET_ABI32
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+#undef bswaptls
+#define bswaptls(ptr) bswap32s(ptr)
+#endif
+
+/* max code+data+bss space allocated to elf interpreter */
+#define INTERP_MAP_SIZE (32 * 1024 * 1024)
+
+/* max code+data+bss+brk space allocated to ET_DYN executables */
+#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
+
+/* Necessary parameters */
+#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
+#define TARGET_ELF_PAGESTART(_v) ((_v) & \
+        ~(unsigned long)(TARGET_ELF_EXEC_PAGESIZE - 1))
+#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1))
+
+#define DLINFO_ITEMS 12
+
+static abi_ulong target_create_elf_tables(abi_ulong p, int argc, int envc,
+                                          abi_ulong stringp,
+                                          struct elfhdr *exec,
+                                          abi_ulong load_addr,
+                                          abi_ulong load_bias,
+                                          abi_ulong interp_load_addr,
+                                          struct image_info *info)
+{
+        abi_ulong sp;
+        int size;
+        abi_ulong u_platform;
+        const char *k_platform;
+        const int n = sizeof(elf_addr_t);
+
+        sp = p;
+        u_platform = 0;
+        k_platform = ELF_PLATFORM;
+        if (k_platform) {
+            size_t len = strlen(k_platform) + 1;
+            sp -= (len + n - 1) & ~(n - 1);
+            u_platform = sp;
+            /* FIXME - check return value of memcpy_to_target() for failure */
+            memcpy_to_target(sp, k_platform, len);
+        }
+        /*
+         * Force 16 byte _final_ alignment here for generality.
+         */
+        sp = sp & ~(abi_ulong)15;
+        size = (DLINFO_ITEMS + 1) * 2;
+        if (k_platform) {
+            size += 2;
+        }
+#ifdef DLINFO_ARCH_ITEMS
+        size += DLINFO_ARCH_ITEMS * 2;
+#endif
+        size += envc + argc + 2;
+        size += 1;                        /* argc itself */
+        size *= n;
+        if (size & 15) {
+            sp -= 16 - (size & 15);
+        }
+
+        /*
+         * OpenBSD defines elf_addr_t as Elf32_Off / Elf64_Off
+         */
+#define NEW_AUX_ENT(id, val) do {               \
+            sp -= n; put_user_ual(val, sp);     \
+            sp -= n; put_user_ual(id, sp);      \
+          } while (0)
+
+        NEW_AUX_ENT(AT_NULL, 0);
+
+        /* There must be exactly DLINFO_ITEMS entries here.  */
+        NEW_AUX_ENT(AT_PHDR, (abi_ulong)(load_addr + exec->e_phoff));
+        NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof(struct elf_phdr)));
+        NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
+        NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
+        NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_load_addr));
+        NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
+        NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
+        NEW_AUX_ENT(AT_UID, (abi_ulong)getuid());
+        NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid());
+        NEW_AUX_ENT(AT_GID, (abi_ulong)getgid());
+        NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid());
+        NEW_AUX_ENT(AT_HWCAP, (abi_ulong)ELF_HWCAP);
+        NEW_AUX_ENT(AT_CLKTCK, (abi_ulong)sysconf(_SC_CLK_TCK));
+        if (k_platform) {
+            NEW_AUX_ENT(AT_PLATFORM, u_platform);
+        }
+#ifdef ARCH_DLINFO
+        /*
+         * ARCH_DLINFO must come last so platform specific code can enforce
+         * special alignment requirements on the AUXV if necessary (eg. PPC).
+         */
+        ARCH_DLINFO;
+#endif
+#undef NEW_AUX_ENT
+
+        sp = loader_build_argptr(envc, argc, sp, stringp);
+        return sp;
+}
+
+#endif /* _TARGET_OS_ELF_H_ */
diff --git a/bsd-user/openbsd/target_os_siginfo.h b/bsd-user/openbsd/target_os_siginfo.h
new file mode 100644
index 00000000000..baf646a5ab3
--- /dev/null
+++ b/bsd-user/openbsd/target_os_siginfo.h
@@ -0,0 +1,82 @@
+#ifndef _TARGET_OS_SIGINFO_H_
+#define _TARGET_OS_SIGINFO_H_
+
+#define TARGET_NSIG     32   /* counting 0; could be 33 (mask is 1-32) */
+#define TARGET_NSIG_BPW     (sizeof(uint32_t) * 8)
+#define TARGET_NSIG_WORDS   (TARGET_NSIG / TARGET_NSIG_BPW)
+
+/* this struct defines a stack used during syscall handling */
+typedef struct target_sigaltstack {
+    abi_long    ss_sp;
+    abi_ulong   ss_size;
+    abi_long    ss_flags;
+} target_stack_t;
+
+typedef struct {
+    uint32_t __bits[TARGET_NSIG_WORDS];
+} target_sigset_t
+
+struct target_sigaction {
+    abi_ulong   _sa_handler;
+    int32_t     sa_flags;
+    target_sigset_t sa_mask;
+};
+
+/* Compare to sys/siginfo.h */
+typedef union target_sigval {
+    int         sival_int;
+    abi_ulong   sival_ptr;
+} target_sigval_t;
+
+struct target_ksiginfo {
+    int32_t     _signo;
+    int32_t     _code;
+    int32_t     _errno;
+#if TARGET_ABI_BITS == 64
+    int32_t     _pad;
+#endif
+    union {
+        struct {
+            int32_t             _pid;
+            int32_t             _uid;
+            target_sigval_t    _value;
+        } _rt;
+
+        struct {
+            int32_t             _pid;
+            int32_t             _uid;
+            int32_t             _struct;
+            /* clock_t          _utime; */
+            /* clock_t          _stime; */
+        } _child;
+
+        struct {
+            abi_ulong           _addr;
+            int32_t             _trap;
+        } _fault;
+
+        struct {
+            long                _band;
+            int                 _fd;
+        } _poll;
+    } _reason;
+};
+
+typedef union target_siginfo {
+    int8_t     si_pad[128];
+    struct     target_ksiginfo  _info;
+} target_siginfo_t;
+
+#define target_si_signo     _info._signo
+#define target_si_code      _info._code
+#define target_si_errno     _info._errno
+#define target_si_addr      _info._reason._fault._addr
+
+#define TARGET_SEGV_MAPERR  1
+#define TARGET_SEGV_ACCERR  2
+
+#define TARGET_TRAP_BRKPT   1
+#define TARGET_TRAP_TRACE   2
+
+
+#endif /* ! _TARGET_OS_SIGINFO_H_ */
diff --git a/bsd-user/openbsd/target_os_signal.h b/bsd-user/openbsd/target_os_signal.h
new file mode 100644
index 00000000000..a373922f7e8
--- /dev/null
+++ b/bsd-user/openbsd/target_os_signal.h
@@ -0,0 +1,69 @@
+#ifndef _TARGET_OS_SIGNAL_H_
+#define _TARGET_OS_SIGNAL_H_
+
+#include "target_os_siginfo.h"
+#include "target_arch_signal.h"
+
+#define TARGET_SIGHUP  1       /* hangup */
+#define TARGET_SIGINT  2       /* interrupt */
+#define TARGET_SIGQUIT 3       /* quit */
+#define TARGET_SIGILL  4       /* illegal instruction (not reset when caught) */
+#define TARGET_SIGTRAP 5       /* trace trap (not reset when caught) */
+#define TARGET_SIGABRT 6       /* abort() */
+#define TARGET_SIGIOT  SIGABRT /* compatibility */
+#define TARGET_SIGEMT  7       /* EMT instruction */
+#define TARGET_SIGFPE  8       /* floating point exception */
+#define TARGET_SIGKILL 9       /* kill (cannot be caught or ignored) */
+#define TARGET_SIGBUS  10      /* bus error */
+#define TARGET_SIGSEGV 11      /* segmentation violation */
+#define TARGET_SIGSYS  12      /* bad argument to system call */
+#define TARGET_SIGPIPE 13      /* write on a pipe with no one to read it */
+#define TARGET_SIGALRM 14      /* alarm clock */
+#define TARGET_SIGTERM 15      /* software termination signal from kill */
+#define TARGET_SIGURG  16      /* urgent condition on IO channel */
+#define TARGET_SIGSTOP 17      /* sendable stop signal not from tty */
+#define TARGET_SIGTSTP 18      /* stop signal from tty */
+#define TARGET_SIGCONT 19      /* continue a stopped process */
+#define TARGET_SIGCHLD 20      /* to parent on child stop or exit */
+#define TARGET_SIGTTIN 21      /* to readers pgrp upon background tty read */
+#define TARGET_SIGTTOU 22      /* like TTIN for out if (tp->t_local&LTOSTOP) */
+#define TARGET_SIGIO   23      /* input/output possible signal */
+#define TARGET_SIGXCPU 24      /* exceeded CPU time limit */
+#define TARGET_SIGXFSZ 25      /* exceeded file size limit */
+#define TARGET_SIGVTALRM 26    /* virtual time alarm */
+#define TARGET_SIGPROF   27    /* profiling time alarm */
+#define TARGET_SIGWINCH  28    /* window size changes */
+#define TARGET_SIGINFO   29    /* information request */
+#define TARGET_SIGUSR1   30    /* user defined signal 1 */
+#define TARGET_SIGUSR2   31    /* user defined signal 2 */
+
+/*
+ * Language spec says we must list exactly one parameter, even though we
+ * actually supply three.  Ugh!
+ */
+#define TARGET_SIG_DFL         ((void (*)(int))0)
+#define TARGET_SIG_IGN         ((void (*)(int))1)
+#define TARGET_SIG_ERR         ((void (*)(int))-1)
+
+#define TARGET_SA_ONSTACK   0x0001  /* take signal on signal stack */
+#define TARGET_SA_RESTART   0x0002  /* restart system on signal return */
+#define TARGET_SA_RESETHAND 0x0004  /* reset to SIG_DFL when taking signal */
+#define TARGET_SA_NODEFER   0x0010  /* don't mask the signal we're delivering */
+#define TARGET_SA_NOCLDWAIT 0x0020  /* don't create zombies (assign to pid 1) */
+#define TARGET_SA_USERTRAMP 0x0100  /* do not bounce off kernel's sigtramp */
+#define TARGET_SA_NOCLDSTOP 0x0008  /* do not generate SIGCHLD on child stop */
+#define TARGET_SA_SIGINFO   0x0040  /* generate siginfo_t */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define TARGET_SIG_BLOCK       1       /* block specified signal set */
+#define TARGET_SIG_UNBLOCK     2       /* unblock specified signal set */
+#define TARGET_SIG_SETMASK     3       /* set specified signal set */
+
+#define TARGET_BADSIG       SIG_ERR
+
+#define TARGET_SS_ONSTACK 0x0001 /* take signals on alternate stack */
+#define TARGET_SS_DISABLE 0x0004 /* disable taking signals on alternate stack */
+
+#endif /* !_TARGET_OS_SIGNAL_H_ */
diff --git a/bsd-user/openbsd/target_os_stack.h b/bsd-user/openbsd/target_os_stack.h
new file mode 100644
index 00000000000..4b37955d3b1
--- /dev/null
+++ b/bsd-user/openbsd/target_os_stack.h
@@ -0,0 +1,56 @@
+/*
+ *  OpenBSD setup_initial_stack() implementation.
+ *
+ *  Copyright (c) 2013-14 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_STACK_H_
+#define _TARGET_OS_STACK_H_
+
+#include "target_arch_sigtramp.h"
+
+static inline int setup_initial_stack(struct bsd_binprm *bprm, abi_ulong *p,
+    abi_ulong *stringp)
+{
+    int i;
+    abi_ulong stack_base;
+
+    stack_base = (target_stkbas + target_stksiz) -
+                  MAX_ARG_PAGES * TARGET_PAGE_SIZE;
+    if (p) {
+        *p = stack_base;
+    }
+    if (stringp) {
+        *stringp = stack_base;
+    }
+
+    for (i = 0; i < MAX_ARG_PAGES; i++) {
+        if (bprm->page[i]) {
+            info->rss++;
+            if (!memcpy_to_target(stack_base, bprm->page[i],
+                        TARGET_PAGE_SIZE)) {
+                errno = EFAULT;
+                return -1;
+            }
+            g_free(bprm->page[i]);
+        }
+        stack_base += TARGET_PAGE_SIZE;
+    }
+
+    return 0;
+}
+
+#endif /* !_TARGET_OS_STACK_H_ */
diff --git a/bsd-user/openbsd/target_os_thread.h b/bsd-user/openbsd/target_os_thread.h
new file mode 100644
index 00000000000..01ed0d9fc86
--- /dev/null
+++ b/bsd-user/openbsd/target_os_thread.h
@@ -0,0 +1,25 @@
+/*
+ *  OpenBSD thread dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_OS_THREAD_H_
+#define _TARGET_OS_THREAD_H_
+
+#include "target_arch_thread.h"
+
+#endif /* !_TARGET_OS_THREAD_H_ */
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index d2bcaab7413..1b3b974afe9 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -17,16 +17,18 @@
 #ifndef QEMU_H
 #define QEMU_H
 
-
+#include "qemu/osdep.h"
 #include "cpu.h"
+#include "qemu/units.h"
 #include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
 
 #undef DEBUG_REMAP
-#ifdef DEBUG_REMAP
-#endif /* DEBUG_REMAP */
 
 #include "exec/user/abitypes.h"
 
+extern char **environ;
+
 enum BSDType {
     target_freebsd,
     target_netbsd,
@@ -34,22 +36,21 @@ enum BSDType {
 };
 extern enum BSDType bsd_type;
 
+#include "exec/user/thunk.h"
+#include "target_arch.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
-#include "target_signal.h"
+#include "target_os_vmparam.h"
+#include "target_os_signal.h"
 #include "exec/gdbstub.h"
 
-#if defined(CONFIG_USE_NPTL)
-#define THREAD __thread
-#else
-#define THREAD
-#endif
-
-/* This struct is used to hold certain information about the image.
- * Basically, it replicates in user space what would be certain
- * task_struct fields in the kernel
+/*
+ * This struct is used to hold certain information about the image.  Basically,
+ * it replicates in user space what would be certain task_struct fields in the
+ * kernel
  */
 struct image_info {
+    abi_ulong load_bias;
     abi_ulong load_addr;
     abi_ulong start_code;
     abi_ulong end_code;
@@ -64,78 +65,90 @@ struct image_info {
     abi_ulong entry;
     abi_ulong code_offset;
     abi_ulong data_offset;
-    int       personality;
+    abi_ulong arg_start;
+    abi_ulong arg_end;
+    uint32_t  elf_flags;
 };
 
 #define MAX_SIGQUEUE_SIZE 1024
 
-struct sigqueue {
-    struct sigqueue *next;
-    //target_siginfo_t info;
+struct qemu_sigqueue {
+    struct qemu_sigqueue *next;
+    target_siginfo_t info;
 };
 
 struct emulated_sigtable {
     int pending; /* true if signal is pending */
-    struct sigqueue *first;
-    struct sigqueue info; /* in order to always have memory for the
-                             first signal, we put it here */
+    struct qemu_sigqueue *first;
+    struct qemu_sigqueue info;  /* Put first signal info here */
 };
 
-/* NOTE: we force a big alignment so that the stack stored after is
-   aligned too */
+/*
+ * NOTE: we force a big alignment so that the stack stored after is aligned too
+ */
 typedef struct TaskState {
     pid_t ts_tid;     /* tid (or pid) of this task */
 
     struct TaskState *next;
-    int used; /* non zero if used */
+    struct bsd_binprm *bprm;
     struct image_info *info;
 
     struct emulated_sigtable sigtab[TARGET_NSIG];
-    struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
-    struct sigqueue *first_free; /* first free siginfo queue entry */
+    struct qemu_sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
+    struct qemu_sigqueue *first_free; /* first free siginfo queue entry */
     int signal_pending; /* non zero if a signal may be pending */
 
     uint8_t stack[];
 } __attribute__((aligned(16))) TaskState;
 
 void init_task_state(TaskState *ts);
+void stop_all_tasks(void);
 extern const char *qemu_uname_release;
-extern unsigned long mmap_min_addr;
 
-/* ??? See if we can avoid exposing so much of the loader internals.  */
 /*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
+ * TARGET_ARG_MAX defines the number of bytes allocated for arguments
+ * and envelope for the new program. 256k should suffice for a reasonable
+ * maxiumum env+arg in 32-bit environments, bump it up to 512k for !ILP32
+ * platforms.
  */
-#define MAX_ARG_PAGES 32
+#if TARGET_ABI_BITS > 32
+#define TARGET_ARG_MAX (512 * KiB)
+#else
+#define TARGET_ARG_MAX (256 * KiB)
+#endif
+#define MAX_ARG_PAGES (TARGET_ARG_MAX / TARGET_PAGE_SIZE)
 
 /*
  * This structure is used to hold the arguments that are
  * used when loading binaries.
  */
-struct linux_binprm {
+struct bsd_binprm {
         char buf[128];
         void *page[MAX_ARG_PAGES];
         abi_ulong p;
+        abi_ulong stringp;
         int fd;
         int e_uid, e_gid;
         int argc, envc;
         char **argv;
         char **envp;
-        char * filename;        /* Name of binary */
+        char *filename;         /* (Given) Name of binary */
+        char *fullpath;         /* Full path of binary */
+        int (*core_dump)(int, CPUArchState *);
 };
 
 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
-                              abi_ulong stringp, int push_ptr);
-int loader_exec(const char * filename, char ** argv, char ** envp,
-             struct target_pt_regs * regs, struct image_info *infop);
+                              abi_ulong stringp);
+int loader_exec(const char *filename, char **argv, char **envp,
+                struct target_pt_regs *regs, struct image_info *infop,
+                struct bsd_binprm *bprm);
 
-int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info);
-int load_flt_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
-                    struct image_info * info);
+int load_elf_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs,
+                    struct image_info *info);
+int load_flt_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs,
+                    struct image_info *info);
+int is_target_elf_binary(int fd);
 
 abi_long memcpy_to_target(abi_ulong dest, const void *src,
                           unsigned long len);
@@ -153,7 +166,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
                             abi_long arg2, abi_long arg3, abi_long arg4,
                             abi_long arg5, abi_long arg6);
 void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-extern THREAD CPUState *thread_cpu;
+extern __thread CPUState *thread_cpu;
 void cpu_loop(CPUArchState *env);
 char *target_strerror(int err);
 int get_osversion(void);
@@ -193,28 +206,41 @@ extern int do_strace;
 /* signal.c */
 void process_pending_signals(CPUArchState *cpu_env);
 void signal_init(void);
-//int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info);
-//void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
-//void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
 long do_sigreturn(CPUArchState *env);
 long do_rt_sigreturn(CPUArchState *env);
+void queue_signal(CPUArchState *env, int sig, target_siginfo_t *info);
 abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
 
 /* mmap.c */
 int target_mprotect(abi_ulong start, abi_ulong len, int prot);
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
-                     int flags, int fd, abi_ulong offset);
+                     int flags, int fd, off_t offset);
 int target_munmap(abi_ulong start, abi_ulong len);
 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                        abi_ulong new_size, unsigned long flags,
                        abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
+extern abi_ulong mmap_next_start;
+abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size);
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
 
 /* main.c */
-extern unsigned long x86_stack_size;
+extern char qemu_proc_pathname[];
+extern unsigned long target_maxtsiz;
+extern unsigned long target_dfldsiz;
+extern unsigned long target_maxdsiz;
+extern unsigned long target_dflssiz;
+extern unsigned long target_maxssiz;
+extern unsigned long target_sgrowsiz;
+
+/* syscall.c */
+abi_long get_errno(abi_long ret);
+bool is_error(abi_long ret);
+
+/* os-sys.c */
+abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2);
 
 /* user access */
 
@@ -226,14 +252,16 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size)
     return page_check_range((target_ulong)addr, size, type) == 0;
 }
 
-/* NOTE __get_user and __put_user use host pointers and don't check access. */
-/* These are usually used to access struct data members once the
- * struct has been locked - usually with lock_user_struct().
+/*
+ * NOTE __get_user and __put_user use host pointers and don't check access.
+ *
+ * These are usually used to access struct data members once the struct has been
+ * locked - usually with lock_user_struct().
  */
 #define __put_user(x, hptr)\
 ({\
     int size = sizeof(*hptr);\
-    switch(size) {\
+    switch (size) {\
     case 1:\
         *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\
         break;\
@@ -248,14 +276,14 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size)
         break;\
     default:\
         abort();\
-    }\
+    } \
     0;\
 })
 
 #define __get_user(x, hptr) \
 ({\
     int size = sizeof(*hptr);\
-    switch(size) {\
+    switch (size) {\
     case 1:\
         x = (typeof(*hptr))*(uint8_t *)(hptr);\
         break;\
@@ -269,24 +297,26 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size)
         x = (typeof(*hptr))tswap64(*(uint64_t *)(hptr));\
         break;\
     default:\
-        /* avoid warning */\
         x = 0;\
         abort();\
-    }\
+    } \
     0;\
 })
 
-/* put_user()/get_user() take a guest address and check access */
-/* These are usually used to access an atomic data type, such as an int,
- * that has been passed by address.  These internally perform locking
- * and unlocking on the data type.
+/*
+ * put_user()/get_user() take a guest address and check access
+ *
+ * These are usually used to access an atomic data type, such as an int, that
+ * has been passed by address.  These internally perform locking and unlocking
+ * on the data type.
  */
 #define put_user(x, gaddr, target_type)                                 \
 ({                                                                      \
     abi_ulong __gaddr = (gaddr);                                        \
     target_type *__hptr;                                                \
     abi_long __ret;                                                     \
-    if ((__hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0))) { \
+    __hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0);  \
+    if (__hptr) {                                                       \
         __ret = __put_user((x), __hptr);                                \
         unlock_user(__hptr, __gaddr, sizeof(target_type));              \
     } else                                                              \
@@ -299,11 +329,11 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size)
     abi_ulong __gaddr = (gaddr);                                        \
     target_type *__hptr;                                                \
     abi_long __ret;                                                     \
-    if ((__hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1))) { \
+    __hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1);   \
+    if (__hptr) {                                                       \
         __ret = __get_user((x), __hptr);                                \
         unlock_user(__hptr, __gaddr, 0);                                \
     } else {                                                            \
-        /* avoid warning */                                             \
         (x) = 0;                                                        \
         __ret = -TARGET_EFAULT;                                         \
     }                                                                   \
@@ -332,33 +362,41 @@ static inline bool access_ok(int type, abi_ulong addr, abi_ulong size)
 #define get_user_u8(x, gaddr)  get_user((x), (gaddr), uint8_t)
 #define get_user_s8(x, gaddr)  get_user((x), (gaddr), int8_t)
 
-/* copy_from_user() and copy_to_user() are usually used to copy data
+/*
+ * copy_from_user() and copy_to_user() are usually used to copy data
  * buffers between the target and host.  These internally perform
  * locking/unlocking of the memory.
  */
 abi_long copy_from_user(void *hptr, abi_ulong gaddr, size_t len);
 abi_long copy_to_user(abi_ulong gaddr, void *hptr, size_t len);
 
-/* Functions for accessing guest memory.  The tget and tput functions
-   read/write single values, byteswapping as necessary.  The lock_user function
-   gets a pointer to a contiguous area of guest memory, but does not perform
-   any byteswapping.  lock_user may return either a pointer to the guest
-   memory, or a temporary buffer.  */
+/*
+ * Functions for accessing guest memory.  The tget and tput functions
+ * read/write single values, byteswapping as necessary.  The lock_user function
+ * gets a pointer to a contiguous area of guest memory, but does not perform
+ * any byteswapping.  lock_user may return either a pointer to the guest
+ * memory, or a temporary buffer.
+ */
 
-/* Lock an area of guest memory into the host.  If copy is true then the
-   host area will have the same contents as the guest.  */
-static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy)
+/*
+ * Lock an area of guest memory into the host.  If copy is true then the
+ * host area will have the same contents as the guest.
+ */
+static inline void *lock_user(int type, abi_ulong guest_addr, long len,
+                              int copy)
 {
-    if (!access_ok(type, guest_addr, len))
+    if (!access_ok(type, guest_addr, len)) {
         return NULL;
+    }
 #ifdef DEBUG_REMAP
     {
         void *addr;
         addr = g_malloc(len);
-        if (copy)
+        if (copy) {
             memcpy(addr, g2h_untagged(guest_addr), len);
-        else
+        } else {
             memset(addr, 0, len);
+        }
         return addr;
     }
 #else
@@ -366,26 +404,32 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy
 #endif
 }
 
-/* Unlock an area of guest memory.  The first LEN bytes must be
-   flushed back to guest memory. host_ptr = NULL is explicitly
-   allowed and does nothing. */
+/*
+ * Unlock an area of guest memory.  The first LEN bytes must be flushed back to
+ * guest memory. host_ptr = NULL is explicitly allowed and does nothing.
+ */
 static inline void unlock_user(void *host_ptr, abi_ulong guest_addr,
                                long len)
 {
 
 #ifdef DEBUG_REMAP
-    if (!host_ptr)
+    if (!host_ptr) {
         return;
-    if (host_ptr == g2h_untagged(guest_addr))
+    }
+    if (host_ptr == g2h_untagged(guest_addr)) {
         return;
-    if (len > 0)
+    }
+    if (len > 0) {
         memcpy(g2h_untagged(guest_addr), host_ptr, len);
+    }
     g_free(host_ptr);
 #endif
 }
 
-/* Return the length of a string in target memory or -TARGET_EFAULT if
-   access error. */
+/*
+ * Return the length of a string in target memory or -TARGET_EFAULT if access
+ * error.
+ */
 abi_long target_strlen(abi_ulong gaddr);
 
 /* Like lock_user but for null terminated strings.  */
@@ -393,8 +437,9 @@ static inline void *lock_user_string(abi_ulong guest_addr)
 {
     abi_long len;
     len = target_strlen(guest_addr);
-    if (len < 0)
+    if (len < 0) {
         return NULL;
+    }
     return lock_user(VERIFY_READ, guest_addr, (long)(len + 1), 1);
 }
 
@@ -404,8 +449,6 @@ static inline void *lock_user_string(abi_ulong guest_addr)
 #define unlock_user_struct(host_ptr, guest_addr, copy)          \
     unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
-#if defined(CONFIG_USE_NPTL)
 #include <pthread.h>
-#endif
 
 #endif /* QEMU_H */
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index f6f7aa2427d..05b277c6422 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -16,10 +16,23 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
-#include "qemu/osdep.h"
 
+#include "qemu/osdep.h"
 #include "qemu.h"
-#include "target_signal.h"
+
+/*
+ * Stubbed out routines until we merge signal support from bsd-user
+ * fork.
+ */
+
+/*
+ * Queue a signal so that it will be send to the virtual CPU as soon as
+ * possible.
+ */
+void queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
+{
+    qemu_log_mask(LOG_UNIMP, "No signal queueing, dropping signal %d\n", sig);
+}
 
 void signal_init(void)
 {
@@ -28,3 +41,19 @@ void signal_init(void)
 void process_pending_signals(CPUArchState *cpu_env)
 {
 }
+
+void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+                           MMUAccessType access_type, bool maperr, uintptr_t ra)
+{
+    qemu_log_mask(LOG_UNIMP, "No signal support for SIGSEGV\n");
+    /* unreachable */
+    abort();
+}
+
+void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+                          MMUAccessType access_type, uintptr_t ra)
+{
+    qemu_log_mask(LOG_UNIMP, "No signal support for SIGBUS\n");
+    /* unreachable */
+    abort();
+}
diff --git a/bsd-user/sparc/target_arch_sysarch.h b/bsd-user/sparc/target_arch_sysarch.h
deleted file mode 100644
index d0b85ef6bbb..00000000000
--- a/bsd-user/sparc/target_arch_sysarch.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  SPARC sysarch() system call emulation
- *
- *  Copyright (c) 2013 Stacey D. Son
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef BSD_USER_ARCH_SYSARCH_H_
-#define BSD_USER_ARCH_SYSARCH_H_
-
-#include "target_syscall.h"
-
-static inline abi_long do_freebsd_arch_sysarch(void *env, int op,
-        abi_ulong parms)
-{
-    int ret = 0;
-
-    switch (op) {
-    case TARGET_SPARC_SIGTRAMP_INSTALL:
-        /* XXX not currently handled */
-    case TARGET_SPARC_UTRAP_INSTALL:
-        /* XXX not currently handled */
-    default:
-        ret = -TARGET_EINVAL;
-        break;
-    }
-
-    return ret;
-}
-
-static inline void do_freebsd_arch_print_sysarch(
-        const struct syscallname *name, abi_long arg1, abi_long arg2,
-        abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
-{
-
-    gemu_log("%s(%d, " TARGET_ABI_FMT_lx ", " TARGET_ABI_FMT_lx ", "
-        TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4);
-}
-
-#endif /*!BSD_USER_ARCH_SYSARCH_H_ */
diff --git a/bsd-user/sparc/target_signal.h b/bsd-user/sparc/target_signal.h
deleted file mode 100644
index 5b2abba40f2..00000000000
--- a/bsd-user/sparc/target_signal.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef TARGET_SIGNAL_H
-#define TARGET_SIGNAL_H
-
-#include "cpu.h"
-
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
-} target_stack_t;
-
-
-#ifndef UREG_I6
-#define UREG_I6        6
-#endif
-#ifndef UREG_FP
-#define UREG_FP        UREG_I6
-#endif
-
-static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state)
-{
-    return state->regwptr[UREG_FP];
-}
-
-#endif /* TARGET_SIGNAL_H */
diff --git a/bsd-user/sparc/target_syscall.h b/bsd-user/sparc/target_syscall.h
deleted file mode 100644
index 151284754be..00000000000
--- a/bsd-user/sparc/target_syscall.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  sparc dependent system call definitions
- *
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef TARGET_SYSCALL_H
-#define TARGET_SYSCALL_H
-
-struct target_pt_regs {
-	abi_ulong psr;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong u_regs[16];
-};
-
-#define UNAME_MACHINE           "sun4"
-#define TARGET_HW_MACHINE       "sparc"
-#define TARGET_HW_MACHINE_ARCH  "sparc"
-
-#define TARGET_SPARC_UTRAP_INSTALL      1
-#define TARGET_SPARC_SIGTRAMP_INSTALL   2
-
-#endif /* TARGET_SYSCALL_H */
diff --git a/bsd-user/sparc64/target_arch_sysarch.h b/bsd-user/sparc64/target_arch_sysarch.h
deleted file mode 100644
index e6f17c15045..00000000000
--- a/bsd-user/sparc64/target_arch_sysarch.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  SPARC64 sysarch() system call emulation
- *
- *  Copyright (c) 2013 Stacey D. Son
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef BSD_USER_ARCH_SYSARCH_H_
-#define BSD_USER_ARCH_SYSARCH_H_
-
-#include "target_syscall.h"
-
-static inline abi_long do_freebsd_arch_sysarch(void *env, int op,
-        abi_ulong parms)
-{
-    int ret = 0;
-
-    switch (op) {
-    case TARGET_SPARC_SIGTRAMP_INSTALL:
-        /* XXX not currently handled */
-    case TARGET_SPARC_UTRAP_INSTALL:
-        /* XXX not currently handled */
-    default:
-        ret = -TARGET_EINVAL;
-        break;
-    }
-
-    return ret;
-}
-
-static inline void do_freebsd_arch_print_sysarch(
-        const struct syscallname *name, abi_long arg1, abi_long arg2,
-        abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
-{
-
-    gemu_log("%s(%d, " TARGET_ABI_FMT_lx ", " TARGET_ABI_FMT_lx ", "
-        TARGET_ABI_FMT_lx ")", name->name, (int)arg1, arg2, arg3, arg4);
-}
-
-#endif /*!BSD_USER_ARCH_SYSARCH_H_ */
diff --git a/bsd-user/sparc64/target_signal.h b/bsd-user/sparc64/target_signal.h
deleted file mode 100644
index 5b2abba40f2..00000000000
--- a/bsd-user/sparc64/target_signal.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef TARGET_SIGNAL_H
-#define TARGET_SIGNAL_H
-
-#include "cpu.h"
-
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
-} target_stack_t;
-
-
-#ifndef UREG_I6
-#define UREG_I6        6
-#endif
-#ifndef UREG_FP
-#define UREG_FP        UREG_I6
-#endif
-
-static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state)
-{
-    return state->regwptr[UREG_FP];
-}
-
-#endif /* TARGET_SIGNAL_H */
diff --git a/bsd-user/sparc64/target_syscall.h b/bsd-user/sparc64/target_syscall.h
deleted file mode 100644
index b7d986a76d4..00000000000
--- a/bsd-user/sparc64/target_syscall.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  sparc64 dependent system call definitions
- *
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef TARGET_SYSCALL_H
-#define TARGET_SYSCALL_H
-
-struct target_pt_regs {
-	abi_ulong u_regs[16];
-	abi_ulong tstate;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong fprs;
-};
-
-#define UNAME_MACHINE           "sun4u"
-#define TARGET_HW_MACHINE       "sparc"
-#define TARGET_HW_MACHINE_ARCH  "sparc64"
-
-#define TARGET_SPARC_UTRAP_INSTALL      1
-#define TARGET_SPARC_SIGTRAMP_INSTALL   2
-
-#endif /* TARGET_SYSCALL_H */
diff --git a/bsd-user/strace.c b/bsd-user/strace.c
index 2c3b59caf06..be40b8a20cf 100644
--- a/bsd-user/strace.c
+++ b/bsd-user/strace.c
@@ -128,14 +128,6 @@ static void print_syscall_ret_addr(const struct syscallname *name, abi_long ret)
     }
 }
 
-#if 0 /* currently unused */
-static void
-print_syscall_ret_raw(struct syscallname *name, abi_long ret)
-{
-        gemu_log(" = 0x" TARGET_ABI_FMT_lx "\n", ret);
-}
-#endif
-
 /*
  * An array of all of the syscalls we know about
  */
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index adc3d21b542..d3322760f43 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -33,18 +33,18 @@
 static abi_ulong target_brk;
 static abi_ulong target_original_brk;
 
-static inline abi_long get_errno(abi_long ret)
+abi_long get_errno(abi_long ret)
 {
-    if (ret == -1)
+    if (ret == -1) {
         /* XXX need to translate host -> target errnos here */
         return -(errno);
-    else
-        return ret;
+    }
+    return ret;
 }
 
 #define target_to_host_bitmask(x, tbl) (x)
 
-static inline int is_error(abi_long ret)
+bool is_error(abi_long ret)
 {
     return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
@@ -88,67 +88,6 @@ static abi_long do_obreak(abi_ulong new_brk)
     return 0;
 }
 
-#if defined(TARGET_I386)
-static abi_long do_freebsd_sysarch(CPUX86State *env, int op, abi_ulong parms)
-{
-    abi_long ret = 0;
-    abi_ulong val;
-    int idx;
-
-    switch(op) {
-#ifdef TARGET_ABI32
-    case TARGET_FREEBSD_I386_SET_GSBASE:
-    case TARGET_FREEBSD_I386_SET_FSBASE:
-        if (op == TARGET_FREEBSD_I386_SET_GSBASE)
-#else
-    case TARGET_FREEBSD_AMD64_SET_GSBASE:
-    case TARGET_FREEBSD_AMD64_SET_FSBASE:
-        if (op == TARGET_FREEBSD_AMD64_SET_GSBASE)
-#endif
-            idx = R_GS;
-        else
-            idx = R_FS;
-        if (get_user(val, parms, abi_ulong))
-            return -TARGET_EFAULT;
-        cpu_x86_load_seg(env, idx, 0);
-        env->segs[idx].base = val;
-        break;
-#ifdef TARGET_ABI32
-    case TARGET_FREEBSD_I386_GET_GSBASE:
-    case TARGET_FREEBSD_I386_GET_FSBASE:
-        if (op == TARGET_FREEBSD_I386_GET_GSBASE)
-#else
-    case TARGET_FREEBSD_AMD64_GET_GSBASE:
-    case TARGET_FREEBSD_AMD64_GET_FSBASE:
-        if (op == TARGET_FREEBSD_AMD64_GET_GSBASE)
-#endif
-            idx = R_GS;
-        else
-            idx = R_FS;
-        val = env->segs[idx].base;
-        if (put_user(val, parms, abi_ulong))
-            return -TARGET_EFAULT;
-        break;
-    /* XXX handle the others... */
-    default:
-        ret = -TARGET_EINVAL;
-        break;
-    }
-    return ret;
-}
-#endif
-
-#ifdef TARGET_SPARC
-static abi_long do_freebsd_sysarch(void *env, int op, abi_ulong parms)
-{
-    /* XXX handle
-     * TARGET_FREEBSD_SPARC_UTRAP_INSTALL,
-     * TARGET_FREEBSD_SPARC_SIGTRAMP_INSTALL
-     */
-    return -TARGET_EINVAL;
-}
-#endif
-
 #ifdef __FreeBSD__
 /*
  * XXX this uses the undocumented oidfmt interface to find the kind of
@@ -199,6 +138,7 @@ static int sysctl_oldcvt(void *holdp, size_t holdlen, uint32_t kind)
 #else
     case CTLTYPE_LONG:
         *(uint64_t *)holdp = tswap64(*(long *)holdp);
+        break;
     case CTLTYPE_ULONG:
         *(uint64_t *)holdp = tswap64(*(unsigned long *)holdp);
         break;
@@ -271,7 +211,7 @@ static abi_long lock_iovec(int type, struct iovec *vec, abi_ulong target_addr,
     target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
     if (!target_vec)
         return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
+    for (i = 0;i < count; i++) {
         base = tswapl(target_vec[i].iov_base);
         vec[i].iov_len = tswapl(target_vec[i].iov_len);
         if (vec[i].iov_len != 0) {
@@ -297,7 +237,7 @@ static abi_long unlock_iovec(struct iovec *vec, abi_ulong target_addr,
     target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
     if (!target_vec)
         return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
+    for (i = 0;i < count; i++) {
         if (target_vec[i].iov_base) {
             base = tswapl(target_vec[i].iov_base);
             unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0);
@@ -325,16 +265,16 @@ abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
     record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
 
-    if(do_strace)
+    if (do_strace)
         print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
-    switch(num) {
+    switch (num) {
     case TARGET_FREEBSD_NR_exit:
 #ifdef CONFIG_GPROF
         _mcleanup();
 #endif
         gdb_exit(arg1);
-        qemu_plugin_atexit_cb();
+        qemu_plugin_user_exit();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -427,16 +367,16 @@ abi_long do_netbsd_syscall(void *cpu_env, int num, abi_long arg1,
 
     record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
 
-    if(do_strace)
+    if (do_strace)
         print_netbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
-    switch(num) {
+    switch (num) {
     case TARGET_NETBSD_NR_exit:
 #ifdef CONFIG_GPROF
         _mcleanup();
 #endif
         gdb_exit(arg1);
-        qemu_plugin_atexit_cb();
+        qemu_plugin_user_exit();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
@@ -506,16 +446,16 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, abi_long arg1,
 
     record_syscall_start(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
 
-    if(do_strace)
+    if (do_strace)
         print_openbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
 
-    switch(num) {
+    switch (num) {
     case TARGET_OPENBSD_NR_exit:
 #ifdef CONFIG_GPROF
         _mcleanup();
 #endif
         gdb_exit(arg1);
-        qemu_plugin_atexit_cb();
+        qemu_plugin_user_exit();
         /* XXX: should free thread stack and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h
index 207ddeecbfa..04a1a886d7b 100644
--- a/bsd-user/syscall_defs.h
+++ b/bsd-user/syscall_defs.h
@@ -1,114 +1,181 @@
-/*      $OpenBSD: signal.h,v 1.19 2006/01/08 14:20:16 millert Exp $     */
-/*      $NetBSD: signal.h,v 1.21 1996/02/09 18:25:32 christos Exp $     */
+/*
+ *  System call related declarations
+ *
+ *  Copyright (c) 2013-15 Stacey D. Son (sson at FreeBSD)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SYSCALL_DEFS_H_
+#define _SYSCALL_DEFS_H_
+
+#include <sys/syscall.h>
+
+#include "errno_defs.h"
+
+#include "freebsd/syscall_nr.h"
+#include "netbsd/syscall_nr.h"
+#include "openbsd/syscall_nr.h"
+
+/*
+ * machine/_types.h
+ * or x86/_types.h
+ */
 
 /*
- * Copyright (c) 1982, 1986, 1989, 1991, 1993
- *      The Regents of the University of California.  All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
+ * time_t seems to be very inconsistly defined for the different *BSD's...
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
+ * FreeBSD uses a 64bits time_t except on i386
+ * so we have to add a special case here.
  *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * On NetBSD time_t is always defined as an int64_t.  On OpenBSD time_t
+ * is always defined as an int.
  *
- *      @(#)signal.h    8.2 (Berkeley) 1/21/94
  */
+#if (!defined(TARGET_I386))
+typedef int64_t target_freebsd_time_t;
+#else
+typedef int32_t target_freebsd_time_t;
+#endif
 
-#define TARGET_NSIG     32              /* counting 0; could be 33 (mask is 1-32) */
-
-#define TARGET_SIGHUP  1       /* hangup */
-#define TARGET_SIGINT  2       /* interrupt */
-#define TARGET_SIGQUIT 3       /* quit */
-#define TARGET_SIGILL  4       /* illegal instruction (not reset when caught) */
-#define TARGET_SIGTRAP 5       /* trace trap (not reset when caught) */
-#define TARGET_SIGABRT 6       /* abort() */
-#define TARGET_SIGIOT  SIGABRT /* compatibility */
-#define TARGET_SIGEMT  7       /* EMT instruction */
-#define TARGET_SIGFPE  8       /* floating point exception */
-#define TARGET_SIGKILL 9       /* kill (cannot be caught or ignored) */
-#define TARGET_SIGBUS  10      /* bus error */
-#define TARGET_SIGSEGV 11      /* segmentation violation */
-#define TARGET_SIGSYS  12      /* bad argument to system call */
-#define TARGET_SIGPIPE 13      /* write on a pipe with no one to read it */
-#define TARGET_SIGALRM 14      /* alarm clock */
-#define TARGET_SIGTERM 15      /* software termination signal from kill */
-#define TARGET_SIGURG  16      /* urgent condition on IO channel */
-#define TARGET_SIGSTOP 17      /* sendable stop signal not from tty */
-#define TARGET_SIGTSTP 18      /* stop signal from tty */
-#define TARGET_SIGCONT 19      /* continue a stopped process */
-#define TARGET_SIGCHLD 20      /* to parent on child stop or exit */
-#define TARGET_SIGTTIN 21      /* to readers pgrp upon background tty read */
-#define TARGET_SIGTTOU 22      /* like TTIN for output if (tp->t_local&LTOSTOP) */
-#define TARGET_SIGIO   23      /* input/output possible signal */
-#define TARGET_SIGXCPU 24      /* exceeded CPU time limit */
-#define TARGET_SIGXFSZ 25      /* exceeded file size limit */
-#define TARGET_SIGVTALRM 26    /* virtual time alarm */
-#define TARGET_SIGPROF 27      /* profiling time alarm */
-#define TARGET_SIGWINCH 28      /* window size changes */
-#define TARGET_SIGINFO  29      /* information request */
-#define TARGET_SIGUSR1 30       /* user defined signal 1 */
-#define TARGET_SIGUSR2 31       /* user defined signal 2 */
+struct target_iovec {
+    abi_long iov_base;   /* Starting address */
+    abi_long iov_len;   /* Number of bytes */
+};
 
 /*
- * Language spec says we must list exactly one parameter, even though we
- * actually supply three.  Ugh!
+ *  sys/mman.h
  */
-#define TARGET_SIG_DFL         (void (*)(int))0
-#define TARGET_SIG_IGN         (void (*)(int))1
-#define TARGET_SIG_ERR         (void (*)(int))-1
-
-#define TARGET_SA_ONSTACK       0x0001  /* take signal on signal stack */
-#define TARGET_SA_RESTART       0x0002  /* restart system on signal return */
-#define TARGET_SA_RESETHAND     0x0004  /* reset to SIG_DFL when taking signal */
-#define TARGET_SA_NODEFER       0x0010  /* don't mask the signal we're delivering */
-#define TARGET_SA_NOCLDWAIT     0x0020  /* don't create zombies (assign to pid 1) */
-#define TARGET_SA_USERTRAMP    0x0100  /* do not bounce off kernel's sigtramp */
-#define TARGET_SA_NOCLDSTOP     0x0008  /* do not generate SIGCHLD on child stop */
-#define TARGET_SA_SIGINFO       0x0040  /* generate siginfo_t */
+#define TARGET_FREEBSD_MAP_RESERVED0080 0x0080  /* previously misimplemented */
+                                                /* MAP_INHERIT */
+#define TARGET_FREEBSD_MAP_RESERVED0100 0x0100  /* previously unimplemented */
+                                                /* MAP_NOEXTEND */
+#define TARGET_FREEBSD_MAP_STACK        0x0400  /* region grows down, like a */
+                                                /* stack */
+#define TARGET_FREEBSD_MAP_NOSYNC       0x0800  /* page to but do not sync */
+                                                /* underlying file */
+
+#define TARGET_FREEBSD_MAP_FLAGMASK     0x1ff7
+
+#define TARGET_NETBSD_MAP_INHERIT       0x0080  /* region is retained after */
+                                                /* exec */
+#define TARGET_NETBSD_MAP_TRYFIXED      0x0400  /* attempt hint address, even */
+                                                /* within break */
+#define TARGET_NETBSD_MAP_WIRED         0x0800  /* mlock() mapping when it is */
+                                                /* established */
+
+#define TARGET_NETBSD_MAP_STACK         0x2000  /* allocated from memory, */
+                                                /* swap space (stack) */
+
+#define TARGET_NETBSD_MAP_FLAGMASK      0x3ff7
+
+#define TARGET_OPENBSD_MAP_INHERIT      0x0080  /* region is retained after */
+                                                /* exec */
+#define TARGET_OPENBSD_MAP_NOEXTEND     0x0100  /* for MAP_FILE, don't change */
+                                                /* file size */
+#define TARGET_OPENBSD_MAP_TRYFIXED     0x0400  /* attempt hint address, */
+                                                /* even within heap */
+
+#define TARGET_OPENBSD_MAP_FLAGMASK     0x17f7
+
+/* XXX */
+#define TARGET_BSD_MAP_FLAGMASK         0x3ff7
 
 /*
- * Flags for sigprocmask:
+ * sys/time.h
+ * sys/timex.h
  */
-#define TARGET_SIG_BLOCK       1       /* block specified signal set */
-#define TARGET_SIG_UNBLOCK     2       /* unblock specified signal set */
-#define TARGET_SIG_SETMASK     3       /* set specified signal set */
 
-#define TARGET_BADSIG          SIG_ERR
+typedef abi_long target_freebsd_suseconds_t;
 
-#define TARGET_SS_ONSTACK       0x0001  /* take signals on alternate stack */
-#define TARGET_SS_DISABLE       0x0004  /* disable taking signals on alternate stack */
+/* compare to sys/timespec.h */
+struct target_freebsd_timespec {
+    target_freebsd_time_t   tv_sec;     /* seconds */
+    abi_long                tv_nsec;    /* and nanoseconds */
+#if !defined(TARGET_I386) && TARGET_ABI_BITS == 32
+    abi_long _pad;
+#endif
+};
 
-#include "errno_defs.h"
+#define TARGET_CPUCLOCK_WHICH_PID   0
+#define TARGET_CPUCLOCK_WHICH_TID   1
 
-#include "freebsd/syscall_nr.h"
-#include "netbsd/syscall_nr.h"
-#include "openbsd/syscall_nr.h"
+/* sys/umtx.h */
+struct target_freebsd__umtx_time {
+    struct target_freebsd_timespec  _timeout;
+    uint32_t    _flags;
+    uint32_t    _clockid;
+};
 
-struct target_iovec {
-    abi_long iov_base;   /* Starting address */
-    abi_long iov_len;   /* Number of bytes */
+struct target_freebsd_timeval {
+    target_freebsd_time_t       tv_sec; /* seconds */
+    target_freebsd_suseconds_t  tv_usec;/* and microseconds */
+#if !defined(TARGET_I386) && TARGET_ABI_BITS == 32
+    abi_long _pad;
+#endif
+};
+
+/*
+ *  sys/resource.h
+ */
+#if defined(__FreeBSD__)
+#define TARGET_RLIM_INFINITY    RLIM_INFINITY
+#else
+#define TARGET_RLIM_INFINITY    ((abi_ulong)-1)
+#endif
+
+#define TARGET_RLIMIT_CPU       0
+#define TARGET_RLIMIT_FSIZE     1
+#define TARGET_RLIMIT_DATA      2
+#define TARGET_RLIMIT_STACK     3
+#define TARGET_RLIMIT_CORE      4
+#define TARGET_RLIMIT_RSS       5
+#define TARGET_RLIMIT_MEMLOCK   6
+#define TARGET_RLIMIT_NPROC     7
+#define TARGET_RLIMIT_NOFILE    8
+#define TARGET_RLIMIT_SBSIZE    9
+#define TARGET_RLIMIT_AS        10
+#define TARGET_RLIMIT_NPTS      11
+#define TARGET_RLIMIT_SWAP      12
+
+struct target_rlimit {
+    uint64_t rlim_cur;
+    uint64_t rlim_max;
+};
+
+struct target_freebsd_rusage {
+    struct target_freebsd_timeval ru_utime; /* user time used */
+    struct target_freebsd_timeval ru_stime; /* system time used */
+    abi_long    ru_maxrss;      /* maximum resident set size */
+    abi_long    ru_ixrss;       /* integral shared memory size */
+    abi_long    ru_idrss;       /* integral unshared data size */
+    abi_long    ru_isrss;       /* integral unshared stack size */
+    abi_long    ru_minflt;      /* page reclaims */
+    abi_long    ru_majflt;      /* page faults */
+    abi_long    ru_nswap;       /* swaps */
+    abi_long    ru_inblock;     /* block input operations */
+    abi_long    ru_oublock;     /* block output operations */
+    abi_long    ru_msgsnd;      /* messages sent */
+    abi_long    ru_msgrcv;      /* messages received */
+    abi_long    ru_nsignals;    /* signals received */
+    abi_long    ru_nvcsw;       /* voluntary context switches */
+    abi_long    ru_nivcsw;      /* involuntary context switches */
+};
+
+struct target_freebsd__wrusage {
+    struct target_freebsd_rusage wru_self;
+    struct target_freebsd_rusage wru_children;
 };
 
+#endif /* ! _SYSCALL_DEFS_H_ */
diff --git a/bsd-user/uaccess.c b/bsd-user/uaccess.c
index 91e2067933d..89163257f4a 100644
--- a/bsd-user/uaccess.c
+++ b/bsd-user/uaccess.c
@@ -46,7 +46,7 @@ abi_long target_strlen(abi_ulong guest_addr1)
     int max_len, len;
 
     guest_addr = guest_addr1;
-    for(;;) {
+    for (;;) {
         max_len = TARGET_PAGE_SIZE - (guest_addr & ~TARGET_PAGE_MASK);
         ptr = lock_user(VERIFY_READ, guest_addr, max_len, 1);
         if (!ptr)
diff --git a/bsd-user/x86_64/target_arch.h b/bsd-user/x86_64/target_arch.h
new file mode 100644
index 00000000000..e558e1b956e
--- /dev/null
+++ b/bsd-user/x86_64/target_arch.h
@@ -0,0 +1,31 @@
+/*
+ * Intel x86_64 specific prototypes for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_H_
+#define _TARGET_ARCH_H_
+
+/* target_arch_cpu.c */
+void bsd_x86_64_write_dt(void *ptr, unsigned long addr, unsigned long limit,
+                int flags);
+void bsd_x86_64_set_idt(int n, unsigned int dpl);
+void bsd_x86_64_set_idt_base(uint64_t base);
+
+#define target_cpu_set_tls(env, newtls)
+
+#endif /* !_TARGET_ARCH_H_ */
diff --git a/bsd-user/x86_64/target_arch_cpu.c b/bsd-user/x86_64/target_arch_cpu.c
new file mode 100644
index 00000000000..be7bd107200
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_cpu.c
@@ -0,0 +1,71 @@
+/*
+ *  x86_64 cpu related code
+ *
+ * Copyright (c) 2013 Stacey Son <sson@FreeBSD.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu.h"
+#include "qemu/timer.h"
+
+#include "target_arch.h"
+
+static uint64_t *idt_table;
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+    return cpu_get_host_ticks();
+}
+
+void bsd_x86_64_write_dt(void *ptr, unsigned long addr,
+        unsigned long limit, int flags)
+{
+    unsigned int e1, e2;
+    uint32_t *p;
+    e1 = (addr << 16) | (limit & 0xffff);
+    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
+    e2 |= flags;
+    p = ptr;
+    p[0] = tswap32(e1);
+    p[1] = tswap32(e2);
+}
+
+static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
+        uint64_t addr, unsigned int sel)
+{
+    uint32_t *p, e1, e2;
+    e1 = (addr & 0xffff) | (sel << 16);
+    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
+    p = ptr;
+    p[0] = tswap32(e1);
+    p[1] = tswap32(e2);
+    p[2] = tswap32(addr >> 32);
+    p[3] = 0;
+}
+
+/* only dpl matters as we do only user space emulation */
+void bsd_x86_64_set_idt(int n, unsigned int dpl)
+{
+    set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
+}
+
+void bsd_x86_64_set_idt_base(uint64_t base)
+{
+    idt_table = g2h_untagged(base);
+}
diff --git a/bsd-user/x86_64/target_arch_cpu.h b/bsd-user/x86_64/target_arch_cpu.h
new file mode 100644
index 00000000000..5172b230f09
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_cpu.h
@@ -0,0 +1,245 @@
+/*
+ *  x86_64 cpu init and loop
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_CPU_H_
+#define _TARGET_ARCH_CPU_H_
+
+#include "target_arch.h"
+
+#define TARGET_DEFAULT_CPU_MODEL "qemu64"
+
+static inline void target_cpu_init(CPUX86State *env,
+        struct target_pt_regs *regs)
+{
+    uint64_t *gdt_table;
+
+    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
+    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
+    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
+        env->cr[4] |= CR4_OSFXSR_MASK;
+        env->hflags |= HF_OSFXSR_MASK;
+    }
+
+    /* enable 64 bit mode if possible */
+    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
+        fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
+        exit(1);
+    }
+    env->cr[4] |= CR4_PAE_MASK;
+    env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
+    env->hflags |= HF_LMA_MASK;
+
+    /* flags setup : we activate the IRQs by default as in user mode */
+    env->eflags |= IF_MASK;
+
+    /* register setup */
+    env->regs[R_EAX] = regs->rax;
+    env->regs[R_EBX] = regs->rbx;
+    env->regs[R_ECX] = regs->rcx;
+    env->regs[R_EDX] = regs->rdx;
+    env->regs[R_ESI] = regs->rsi;
+    env->regs[R_EDI] = regs->rdi;
+    env->regs[R_EBP] = regs->rbp;
+    env->regs[R_ESP] = regs->rsp;
+    env->eip = regs->rip;
+
+    /* interrupt setup */
+    env->idt.limit = 511;
+
+    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
+        PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    bsd_x86_64_set_idt_base(env->idt.base);
+    bsd_x86_64_set_idt(0, 0);
+    bsd_x86_64_set_idt(1, 0);
+    bsd_x86_64_set_idt(2, 0);
+    bsd_x86_64_set_idt(3, 3);
+    bsd_x86_64_set_idt(4, 3);
+    bsd_x86_64_set_idt(5, 0);
+    bsd_x86_64_set_idt(6, 0);
+    bsd_x86_64_set_idt(7, 0);
+    bsd_x86_64_set_idt(8, 0);
+    bsd_x86_64_set_idt(9, 0);
+    bsd_x86_64_set_idt(10, 0);
+    bsd_x86_64_set_idt(11, 0);
+    bsd_x86_64_set_idt(12, 0);
+    bsd_x86_64_set_idt(13, 0);
+    bsd_x86_64_set_idt(14, 0);
+    bsd_x86_64_set_idt(15, 0);
+    bsd_x86_64_set_idt(16, 0);
+    bsd_x86_64_set_idt(17, 0);
+    bsd_x86_64_set_idt(18, 0);
+    bsd_x86_64_set_idt(19, 0);
+    bsd_x86_64_set_idt(0x80, 3);
+
+    /* segment setup */
+    env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
+            PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
+    gdt_table = g2h_untagged(env->gdt.base);
+
+    /* 64 bit code segment */
+    bsd_x86_64_write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
+            DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | DESC_L_MASK
+            | (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
+
+    bsd_x86_64_write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
+            DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
+            (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
+
+    cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
+    cpu_x86_load_seg(env, R_DS, 0);
+    cpu_x86_load_seg(env, R_ES, 0);
+    cpu_x86_load_seg(env, R_FS, 0);
+    cpu_x86_load_seg(env, R_GS, 0);
+}
+
+static inline void target_cpu_loop(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+    int trapnr;
+    abi_ulong pc;
+    /* target_siginfo_t info; */
+
+    for (;;) {
+        cpu_exec_start(cs);
+        trapnr = cpu_exec(cs);
+        cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
+        switch (trapnr) {
+        case 0x80:
+            /* syscall from int $0x80 */
+            if (bsd_type == target_freebsd) {
+                abi_ulong params = (abi_ulong) env->regs[R_ESP] +
+                    sizeof(int32_t);
+                int32_t syscall_nr = env->regs[R_EAX];
+                int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8;
+
+                if (syscall_nr == TARGET_FREEBSD_NR_syscall) {
+                    get_user_s32(syscall_nr, params);
+                    params += sizeof(int32_t);
+                } else if (syscall_nr == TARGET_FREEBSD_NR___syscall) {
+                    get_user_s32(syscall_nr, params);
+                    params += sizeof(int64_t);
+                }
+                get_user_s32(arg1, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg2, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg3, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg4, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg5, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg6, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg7, params);
+                params += sizeof(int32_t);
+                get_user_s32(arg8, params);
+                env->regs[R_EAX] = do_freebsd_syscall(env,
+                                                      syscall_nr,
+                                                      arg1,
+                                                      arg2,
+                                                      arg3,
+                                                      arg4,
+                                                      arg5,
+                                                      arg6,
+                                                      arg7,
+                                                      arg8);
+            } else { /* if (bsd_type == target_openbsd) */
+                env->regs[R_EAX] = do_openbsd_syscall(env,
+                                                      env->regs[R_EAX],
+                                                      env->regs[R_EBX],
+                                                      env->regs[R_ECX],
+                                                      env->regs[R_EDX],
+                                                      env->regs[R_ESI],
+                                                      env->regs[R_EDI],
+                                                      env->regs[R_EBP]);
+            }
+            if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) {
+                env->regs[R_EAX] = -env->regs[R_EAX];
+                env->eflags |= CC_C;
+            } else {
+                env->eflags &= ~CC_C;
+            }
+            break;
+
+        case EXCP_SYSCALL:
+            /* syscall from syscall instruction */
+            if (bsd_type == target_freebsd) {
+                env->regs[R_EAX] = do_freebsd_syscall(env,
+                                                      env->regs[R_EAX],
+                                                      env->regs[R_EDI],
+                                                      env->regs[R_ESI],
+                                                      env->regs[R_EDX],
+                                                      env->regs[R_ECX],
+                                                      env->regs[8],
+                                                      env->regs[9], 0, 0);
+            } else { /* if (bsd_type == target_openbsd) */
+                env->regs[R_EAX] = do_openbsd_syscall(env,
+                                                      env->regs[R_EAX],
+                                                      env->regs[R_EDI],
+                                                      env->regs[R_ESI],
+                                                      env->regs[R_EDX],
+                                                      env->regs[10],
+                                                      env->regs[8],
+                                                      env->regs[9]);
+            }
+            env->eip = env->exception_next_eip;
+            if (((abi_ulong)env->regs[R_EAX]) >= (abi_ulong)(-515)) {
+                env->regs[R_EAX] = -env->regs[R_EAX];
+                env->eflags |= CC_C;
+            } else {
+                env->eflags &= ~CC_C;
+            }
+            break;
+
+        case EXCP_INTERRUPT:
+            /* just indicate that signals should be handled asap */
+            break;
+
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
+
+        default:
+            pc = env->segs[R_CS].base + env->eip;
+            fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - "
+                    "aborting\n", (long)pc, trapnr);
+            abort();
+        }
+        process_pending_signals(env);
+    }
+}
+
+static inline void target_cpu_clone_regs(CPUX86State *env, target_ulong newsp)
+{
+    if (newsp) {
+        env->regs[R_ESP] = newsp;
+    }
+    env->regs[R_EAX] = 0;
+}
+
+static inline void target_cpu_reset(CPUArchState *cpu)
+{
+    cpu_reset(env_cpu(cpu));
+}
+
+#endif /* ! _TARGET_ARCH_CPU_H_ */
diff --git a/bsd-user/x86_64/target_arch_elf.h b/bsd-user/x86_64/target_arch_elf.h
new file mode 100644
index 00000000000..c2f85539626
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_elf.h
@@ -0,0 +1,35 @@
+/*
+ *  x86_64 ELF definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_ELF_H_
+#define _TARGET_ARCH_ELF_H_
+
+#define ELF_START_MMAP 0x2aaaaab000ULL
+#define ELF_ET_DYN_LOAD_ADDR    0x01021000
+#define elf_check_arch(x) (((x) == ELF_ARCH))
+
+#define ELF_HWCAP      0 /* FreeBSD doesn't do AT_HWCAP{,2} on x86 */
+
+#define ELF_CLASS      ELFCLASS64
+#define ELF_DATA       ELFDATA2LSB
+#define ELF_ARCH       EM_X86_64
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE       4096
+
+#endif /* _TARGET_ARCH_ELF_H_ */
diff --git a/bsd-user/x86_64/target_arch_reg.h b/bsd-user/x86_64/target_arch_reg.h
new file mode 100644
index 00000000000..00e96245178
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_reg.h
@@ -0,0 +1,92 @@
+/*
+ *  FreeBSD amd64 register structures
+ *
+ *  Copyright (c) 2015 Stacey Son
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_REG_H_
+#define _TARGET_ARCH_REG_H_
+
+/* See sys/amd64/include/reg.h */
+typedef struct target_reg {
+    uint64_t        r_r15;
+    uint64_t        r_r14;
+    uint64_t        r_r13;
+    uint64_t        r_r12;
+    uint64_t        r_r11;
+    uint64_t        r_r10;
+    uint64_t        r_r9;
+    uint64_t        r_r8;
+    uint64_t        r_rdi;
+    uint64_t        r_rsi;
+    uint64_t        r_rbp;
+    uint64_t        r_rbx;
+    uint64_t        r_rdx;
+    uint64_t        r_rcx;
+    uint64_t        r_rax;
+    uint32_t        r_trapno;
+    uint16_t        r_fs;
+    uint16_t        r_gs;
+    uint32_t        r_err;
+    uint16_t        r_es;
+    uint16_t        r_ds;
+    uint64_t        r_rip;
+    uint64_t        r_cs;
+    uint64_t        r_rflags;
+    uint64_t        r_rsp;
+    uint64_t        r_ss;
+} target_reg_t;
+
+typedef struct target_fpreg {
+    uint64_t        fpr_env[4];
+    uint8_t         fpr_acc[8][16];
+    uint8_t         fpr_xacc[16][16];
+    uint64_t        fpr_spare[12];
+} target_fpreg_t;
+
+static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env)
+{
+
+    regs->r_r15 = env->regs[15];
+    regs->r_r14 = env->regs[14];
+    regs->r_r13 = env->regs[13];
+    regs->r_r12 = env->regs[12];
+    regs->r_r11 = env->regs[11];
+    regs->r_r10 = env->regs[10];
+    regs->r_r9 = env->regs[9];
+    regs->r_r8 = env->regs[8];
+    regs->r_rdi = env->regs[R_EDI];
+    regs->r_rsi = env->regs[R_ESI];
+    regs->r_rbp = env->regs[R_EBP];
+    regs->r_rbx = env->regs[R_EBX];
+    regs->r_rdx = env->regs[R_EDX];
+    regs->r_rcx = env->regs[R_ECX];
+    regs->r_rax = env->regs[R_EAX];
+    /* regs->r_trapno = env->regs[R_TRAPNO]; XXX */
+    regs->r_fs = env->segs[R_FS].selector & 0xffff;
+    regs->r_gs = env->segs[R_GS].selector & 0xffff;
+    regs->r_err = env->error_code;  /* XXX ? */
+    regs->r_es = env->segs[R_ES].selector & 0xffff;
+    regs->r_ds = env->segs[R_DS].selector & 0xffff;
+    regs->r_rip = env->eip;
+    regs->r_cs = env->segs[R_CS].selector & 0xffff;
+    regs->r_rflags = env->eflags;
+    regs->r_rsp = env->regs[R_ESP];
+    regs->r_ss = env->segs[R_SS].selector & 0xffff;
+}
+
+#endif /* !_TARGET_ARCH_REG_H_ */
diff --git a/bsd-user/x86_64/target_arch_signal.h b/bsd-user/x86_64/target_arch_signal.h
new file mode 100644
index 00000000000..4bb753b08bb
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_signal.h
@@ -0,0 +1,92 @@
+/*
+ *  x86_64 signal definitions
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_SIGNAL_H_
+#define _TARGET_ARCH_SIGNAL_H_
+
+#include "cpu.h"
+
+/* Size of the signal trampolin code placed on the stack. */
+#define TARGET_SZSIGCODE    0
+
+/* compare to  x86/include/_limits.h */
+#define TARGET_MINSIGSTKSZ  (512 * 4)               /* min sig stack size */
+#define TARGET_SIGSTKSZ     (MINSIGSTKSZ + 32768)   /* recommended size */
+
+struct target_sigcontext {
+    /* to be added */
+};
+
+typedef struct target_mcontext {
+} target_mcontext_t;
+
+typedef struct target_ucontext {
+    target_sigset_t   uc_sigmask;
+    target_mcontext_t uc_mcontext;
+    abi_ulong         uc_link;
+    target_stack_t    uc_stack;
+    int32_t           uc_flags;
+    int32_t         __spare__[4];
+} target_ucontext_t;
+
+struct target_sigframe {
+    abi_ulong   sf_signum;
+    abi_ulong   sf_siginfo;    /* code or pointer to sf_si */
+    abi_ulong   sf_ucontext;   /* points to sf_uc */
+    abi_ulong   sf_addr;       /* undocumented 4th arg */
+    target_ucontext_t   sf_uc; /* = *sf_uncontext */
+    target_siginfo_t    sf_si; /* = *sf_siginfo (SA_SIGINFO case)*/
+    uint32_t    __spare__[2];
+};
+
+/*
+ * Compare to amd64/amd64/machdep.c sendsig()
+ * Assumes that target stack frame memory is locked.
+ */
+static inline abi_long set_sigtramp_args(CPUX86State *regs,
+        int sig, struct target_sigframe *frame, abi_ulong frame_addr,
+        struct target_sigaction *ka)
+{
+    /* XXX return -TARGET_EOPNOTSUPP; */
+    return 0;
+}
+
+/* Compare to amd64/amd64/machdep.c get_mcontext() */
+static inline abi_long get_mcontext(CPUX86State *regs,
+                target_mcontext_t *mcp, int flags)
+{
+    /* XXX */
+    return -TARGET_EOPNOTSUPP;
+}
+
+/* Compare to amd64/amd64/machdep.c set_mcontext() */
+static inline abi_long set_mcontext(CPUX86State *regs,
+        target_mcontext_t *mcp, int srflag)
+{
+    /* XXX */
+    return -TARGET_EOPNOTSUPP;
+}
+
+static inline abi_long get_ucontext_sigreturn(CPUX86State *regs,
+        abi_ulong target_sf, abi_ulong *target_uc)
+{
+    /* XXX */
+    *target_uc = 0;
+    return -TARGET_EOPNOTSUPP;
+}
+
+#endif /* !TARGET_ARCH_SIGNAL_H_ */
diff --git a/bsd-user/x86_64/target_arch_sigtramp.h b/bsd-user/x86_64/target_arch_sigtramp.h
new file mode 100644
index 00000000000..29d4a8b55f3
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_sigtramp.h
@@ -0,0 +1,29 @@
+/*
+ * Intel x86_64  sigcode for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TARGET_ARCH_SIGTRAMP_H_
+#define _TARGET_ARCH_SIGTRAMP_H_
+
+static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc,
+        unsigned sys_sigreturn)
+{
+
+    return 0;
+}
+#endif /* _TARGET_ARCH_SIGTRAMP_H_ */
diff --git a/bsd-user/x86_64/target_arch_thread.h b/bsd-user/x86_64/target_arch_thread.h
new file mode 100644
index 00000000000..d105e43fd35
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_thread.h
@@ -0,0 +1,40 @@
+/*
+ *  x86_64 thread support
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_THREAD_H_
+#define _TARGET_ARCH_THREAD_H_
+
+/* Compare to vm_machdep.c cpu_set_upcall_kse() */
+static inline void target_thread_set_upcall(CPUX86State *regs, abi_ulong entry,
+    abi_ulong arg, abi_ulong stack_base, abi_ulong stack_size)
+{
+    /* XXX */
+}
+
+static inline void target_thread_init(struct target_pt_regs *regs,
+    struct image_info *infop)
+{
+    regs->rax = 0;
+    regs->rsp = infop->start_stack;
+    regs->rip = infop->entry;
+    if (bsd_type == target_freebsd) {
+        regs->rdi = infop->start_stack;
+    }
+}
+
+#endif /* !_TARGET_ARCH_THREAD_H_ */
diff --git a/bsd-user/x86_64/target_arch_vmparam.h b/bsd-user/x86_64/target_arch_vmparam.h
new file mode 100644
index 00000000000..81a915f2e55
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_vmparam.h
@@ -0,0 +1,46 @@
+/*
+ *  Intel x86_64 VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TARGET_ARCH_VMPARAM_H_
+#define _TARGET_ARCH_VMPARAM_H_
+
+#include "cpu.h"
+
+/* compare to amd64/include/vmparam.h */
+#define TARGET_MAXTSIZ  (128 * MiB)             /* max text size */
+#define TARGET_DFLDSIZ  (32 * GiB)              /* initial data size limit */
+#define TARGET_MAXDSIZ  (32 * GiB)              /* max data size */
+#define TARGET_DFLSSIZ  (8 * MiB)               /* initial stack size limit */
+#define TARGET_MAXSSIZ  (512 * MiB)             /* max stack size */
+#define TARGET_SGROWSIZ (128 * KiB)             /* amount to grow stack */
+
+#define TARGET_VM_MAXUSER_ADDRESS   (0x00007fffff000000UL)
+
+#define TARGET_USRSTACK (TARGET_VM_MAXUSER_ADDRESS - TARGET_PAGE_SIZE)
+
+static inline abi_ulong get_sp_from_cpustate(CPUX86State *state)
+{
+    return state->regs[R_ESP];
+}
+
+static inline void set_second_rval(CPUX86State *state, abi_ulong retval2)
+{
+    state->regs[R_EDX] = retval2;
+}
+
+#endif /* !_TARGET_ARCH_VMPARAM_H_ */
diff --git a/bsd-user/x86_64/target_signal.h b/bsd-user/x86_64/target_signal.h
deleted file mode 100644
index 659cd401b82..00000000000
--- a/bsd-user/x86_64/target_signal.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TARGET_SIGNAL_H
-#define TARGET_SIGNAL_H
-
-#include "cpu.h"
-
-/* this struct defines a stack used during syscall handling */
-
-typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
-} target_stack_t;
-
-static inline abi_ulong get_sp_from_cpustate(CPUX86State *state)
-{
-    return state->regs[R_ESP];
-}
-
-#endif /* TARGET_SIGNAL_H */
diff --git a/chardev/baum.c b/chardev/baum.c
index 5deca778bc4..79d618e3504 100644
--- a/chardev/baum.c
+++ b/chardev/baum.c
@@ -680,6 +680,7 @@ static const TypeInfo char_braille_type_info = {
     .instance_finalize = char_braille_finalize,
     .class_init = char_braille_class_init,
 };
+module_obj(TYPE_CHARDEV_BRAILLE);
 
 static void register_types(void)
 {
diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 1cd62f2779b..93c56913b49 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -28,6 +28,7 @@
 #include "qemu/sockets.h"
 #include "qapi/error.h"
 #include "chardev/char.h"
+#include "chardev/char-fe.h"
 #include "io/channel-file.h"
 
 #include "chardev/char-fd.h"
@@ -38,6 +39,10 @@ static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
     FDChardev *s = FD_CHARDEV(chr);
 
+    if (!s->ioc_out) {
+        return -1;
+    }
+
     return io_channel_send(s->ioc_out, buf, len);
 }
 
@@ -80,10 +85,85 @@ static int fd_chr_read_poll(void *opaque)
     return s->max_size;
 }
 
+typedef struct FDSource {
+    GSource parent;
+
+    GIOCondition cond;
+} FDSource;
+
+static gboolean
+fd_source_prepare(GSource *source,
+                  gint *timeout_)
+{
+    FDSource *src = (FDSource *)source;
+
+    return src->cond != 0;
+}
+
+static gboolean
+fd_source_check(GSource *source)
+{
+    FDSource *src = (FDSource *)source;
+
+    return src->cond != 0;
+}
+
+static gboolean
+fd_source_dispatch(GSource *source, GSourceFunc callback,
+                   gpointer user_data)
+{
+    FDSource *src = (FDSource *)source;
+    FEWatchFunc func = (FEWatchFunc)callback;
+    gboolean ret = G_SOURCE_CONTINUE;
+
+    if (src->cond) {
+        ret = func(NULL, src->cond, user_data);
+        src->cond = 0;
+    }
+
+    return ret;
+}
+
+static GSourceFuncs fd_source_funcs = {
+  fd_source_prepare,
+  fd_source_check,
+  fd_source_dispatch,
+  NULL, NULL, NULL
+};
+
+static GSource *fd_source_new(FDChardev *chr)
+{
+    return g_source_new(&fd_source_funcs, sizeof(FDSource));
+}
+
+static gboolean child_func(GIOChannel *source,
+                           GIOCondition condition,
+                           gpointer data)
+{
+    FDSource *parent = data;
+
+    parent->cond |= condition;
+
+    return G_SOURCE_CONTINUE;
+}
+
 static GSource *fd_chr_add_watch(Chardev *chr, GIOCondition cond)
 {
     FDChardev *s = FD_CHARDEV(chr);
-    return qio_channel_create_watch(s->ioc_out, cond);
+    g_autoptr(GSource) source = fd_source_new(s);
+
+    if (s->ioc_out) {
+        g_autoptr(GSource) child = qio_channel_create_watch(s->ioc_out, cond & ~G_IO_IN);
+        g_source_set_callback(child, (GSourceFunc)child_func, source, NULL);
+        g_source_add_child_source(source, child);
+    }
+    if (s->ioc_in) {
+        g_autoptr(GSource) child = qio_channel_create_watch(s->ioc_in, cond & ~G_IO_OUT);
+        g_source_set_callback(child, (GSourceFunc)child_func, source, NULL);
+        g_source_add_child_source(source, child);
+    }
+
+    return g_steal_pointer(&source);
 }
 
 static void fd_chr_update_read_handler(Chardev *chr)
@@ -131,17 +211,32 @@ void qemu_chr_open_fd(Chardev *chr,
                       int fd_in, int fd_out)
 {
     FDChardev *s = FD_CHARDEV(chr);
-    char *name;
-
-    s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
-    name = g_strdup_printf("chardev-file-in-%s", chr->label);
-    qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
-    g_free(name);
-    s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
-    name = g_strdup_printf("chardev-file-out-%s", chr->label);
-    qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
-    g_free(name);
-    qemu_set_nonblock(fd_out);
+    g_autofree char *name = NULL;
+
+    if (fd_out >= 0) {
+        qemu_set_nonblock(fd_out);
+    }
+
+    if (fd_out == fd_in && fd_in >= 0) {
+        s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+        name = g_strdup_printf("chardev-file-%s", chr->label);
+        qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
+        s->ioc_out = QIO_CHANNEL(object_ref(s->ioc_in));
+        return;
+    }
+
+    if (fd_in >= 0) {
+        s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+        name = g_strdup_printf("chardev-file-in-%s", chr->label);
+        qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
+    }
+
+    if (fd_out >= 0) {
+        s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
+        g_free(name);
+        name = g_strdup_printf("chardev-file-out-%s", chr->label);
+        qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
+    }
 }
 
 static void char_fd_class_init(ObjectClass *oc, void *data)
diff --git a/chardev/char-fe.c b/chardev/char-fe.c
index 474715c5a92..7789f7be9c8 100644
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -354,7 +354,7 @@ void qemu_chr_fe_set_open(CharBackend *be, int fe_open)
 }
 
 guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
-                            GIOFunc func, void *user_data)
+                            FEWatchFunc func, void *user_data)
 {
     Chardev *s = be->chr;
     GSource *src;
diff --git a/chardev/char-mux.c b/chardev/char-mux.c
index 72beef29d21..ee2d47b20d9 100644
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -28,7 +28,7 @@
 #include "qemu/option.h"
 #include "chardev/char.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
+#include "qapi/qapi-commands-control.h"
 #include "chardev-internal.h"
 
 /* MUX driver for serial I/O splitting */
@@ -158,7 +158,7 @@ static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch)
             {
                  const char *term =  "QEMU: Terminated\n\r";
                  qemu_chr_write_all(chr, (uint8_t *)term, strlen(term));
-                 exit(0);
+                 qmp_quit(NULL);
                  break;
             }
         case 's':
@@ -387,10 +387,9 @@ void suspend_mux_open(void)
 static int chardev_options_parsed_cb(Object *child, void *opaque)
 {
     Chardev *chr = (Chardev *)child;
-    ChardevClass *class = CHARDEV_GET_CLASS(chr);
 
-    if (!chr->be_open && class->chr_options_parsed) {
-        class->chr_options_parsed(chr);
+    if (!chr->be_open && CHARDEV_IS_MUX(chr)) {
+        open_muxes(chr);
     }
 
     return 0;
@@ -413,7 +412,6 @@ static void char_mux_class_init(ObjectClass *oc, void *data)
     cc->chr_accept_input = mux_chr_accept_input;
     cc->chr_add_watch = mux_chr_add_watch;
     cc->chr_be_event = mux_chr_be_event;
-    cc->chr_options_parsed = open_muxes;
     cc->chr_update_read_handler = mux_chr_update_read_handlers;
 }
 
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index daa89fe5d1d..836cfa0bc21 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -468,9 +468,9 @@ static char *qemu_chr_socket_address(SocketChardev *s, const char *prefix)
 
 #ifdef CONFIG_LINUX
         if (sa->has_abstract && sa->abstract) {
-            abstract = ",abstract";
+            abstract = ",abstract=on";
             if (sa->has_tight && sa->tight) {
-                tight = ",tight";
+                tight = ",tight=on";
             }
         }
 #endif
@@ -1402,18 +1402,12 @@ static void qmp_chardev_open_socket(Chardev *chr,
             return;
         }
         object_ref(OBJECT(s->tls_creds));
-        if (is_listen) {
-            if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-                error_setg(errp, "%s",
-                           "Expected TLS credentials for server endpoint");
-                return;
-            }
-        } else {
-            if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-                error_setg(errp, "%s",
-                           "Expected TLS credentials for client endpoint");
-                return;
-            }
+        if (!qcrypto_tls_creds_check_endpoint(s->tls_creds,
+                                          is_listen
+                                          ? QCRYPTO_TLS_CREDS_ENDPOINT_SERVER
+                                          : QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT,
+                                          errp)) {
+            return;
         }
     }
     s->tls_authz = g_strdup(sock->tls_authz);
@@ -1526,7 +1520,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
     addr = g_new0(SocketAddressLegacy, 1);
     if (path) {
         UnixSocketAddress *q_unix;
-        addr->type = SOCKET_ADDRESS_LEGACY_KIND_UNIX;
+        addr->type = SOCKET_ADDRESS_TYPE_UNIX;
         q_unix = addr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
         q_unix->path = g_strdup(path);
 #ifdef CONFIG_LINUX
@@ -1536,7 +1530,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
         q_unix->abstract = abstract;
 #endif
     } else if (host) {
-        addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
         addr->u.inet.data = g_new(InetSocketAddress, 1);
         *addr->u.inet.data = (InetSocketAddress) {
             .host = g_strdup(host),
@@ -1549,7 +1543,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
             .ipv6 = qemu_opt_get_bool(opts, "ipv6", 0),
         };
     } else if (fd) {
-        addr->type = SOCKET_ADDRESS_LEGACY_KIND_FD;
+        addr->type = SOCKET_ADDRESS_TYPE_FD;
         addr->u.fd.data = g_new(String, 1);
         addr->u.fd.data->str = g_strdup(fd);
     } else {
diff --git a/chardev/char-udp.c b/chardev/char-udp.c
index 16b5dbce582..6756e69924c 100644
--- a/chardev/char-udp.c
+++ b/chardev/char-udp.c
@@ -165,7 +165,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
     qemu_chr_parse_common(opts, qapi_ChardevUdp_base(udp));
 
     addr = g_new0(SocketAddressLegacy, 1);
-    addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
+    addr->type = SOCKET_ADDRESS_TYPE_INET;
     addr->u.inet.data = g_new(InetSocketAddress, 1);
     *addr->u.inet.data = (InetSocketAddress) {
         .host = g_strdup(host),
@@ -180,7 +180,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
     if (has_local) {
         udp->has_local = true;
         addr = g_new0(SocketAddressLegacy, 1);
-        addr->type = SOCKET_ADDRESS_LEGACY_KIND_INET;
+        addr->type = SOCKET_ADDRESS_TYPE_INET;
         addr->u.inet.data = g_new(InetSocketAddress, 1);
         *addr->u.inet.data = (InetSocketAddress) {
             .host = g_strdup(localaddr),
diff --git a/chardev/char.c b/chardev/char.c
index 398f09df19c..0169d8dde4b 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -25,7 +25,6 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "monitor/monitor.h"
-#include "sysemu/sysemu.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qemu/qemu-print.h"
@@ -242,18 +241,15 @@ static void qemu_char_open(Chardev *chr, ChardevBackend *backend,
     ChardevCommon *common = backend ? backend->u.null.data : NULL;
 
     if (common && common->has_logfile) {
-        int flags = O_WRONLY | O_CREAT;
+        int flags = O_WRONLY;
         if (common->has_logappend &&
             common->logappend) {
             flags |= O_APPEND;
         } else {
             flags |= O_TRUNC;
         }
-        chr->logfd = qemu_open_old(common->logfile, flags, 0666);
+        chr->logfd = qemu_create(common->logfile, flags, 0666, errp);
         if (chr->logfd < 0) {
-            error_setg_errno(errp, errno,
-                             "Unable to open logfile %s",
-                             common->logfile);
             return;
         }
     }
@@ -932,6 +928,12 @@ QemuOptsList qemu_chardev_opts = {
         },{
             .name = "logappend",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "mouse",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "clipboard",
+            .type = QEMU_OPT_BOOL,
 #ifdef CONFIG_LINUX
         },{
             .name = "tight",
@@ -1026,27 +1028,31 @@ Chardev *qemu_chardev_new(const char *id, const char *typename,
 ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
                                Error **errp)
 {
+    ERRP_GUARD();
     const ChardevClass *cc;
     ChardevReturn *ret;
-    Chardev *chr;
+    g_autoptr(Chardev) chr = NULL;
+
+    if (qemu_chr_find(id)) {
+        error_setg(errp, "Chardev with id '%s' already exists", id);
+        return NULL;
+    }
 
     cc = char_get_class(ChardevBackendKind_str(backend->type), errp);
     if (!cc) {
-        return NULL;
+        goto err;
     }
 
     chr = chardev_new(id, object_class_get_name(OBJECT_CLASS(cc)),
                       backend, NULL, false, errp);
     if (!chr) {
-        return NULL;
+        goto err;
     }
 
     if (!object_property_try_add_child(get_chardevs_root(), id, OBJECT(chr),
                                        errp)) {
-        object_unref(OBJECT(chr));
-        return NULL;
+        goto err;
     }
-    object_unref(OBJECT(chr));
 
     ret = g_new0(ChardevReturn, 1);
     if (CHARDEV_IS_PTY(chr)) {
@@ -1055,6 +1061,10 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
     }
 
     return ret;
+
+err:
+    error_prepend(errp, "Failed to add chardev '%s': ", id);
+    return NULL;
 }
 
 ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend,
diff --git a/chardev/meson.build b/chardev/meson.build
index 32377af383e..325ba2bdb97 100644
--- a/chardev/meson.build
+++ b/chardev/meson.build
@@ -35,7 +35,7 @@ if brlapi.found()
   chardev_modules += { 'baum': module_ss }
 endif
 
-if config_host.has_key('CONFIG_SPICE')
+if spice.found()
   module_ss = ss.source_set()
   module_ss.add(when: [spice], if_true: files('spice.c'))
   chardev_modules += { 'spice': module_ss }
diff --git a/chardev/spice.c b/chardev/spice.c
index 1104426e3a1..bbffef49136 100644
--- a/chardev/spice.c
+++ b/chardev/spice.c
@@ -366,6 +366,7 @@ static const TypeInfo char_spice_type_info = {
     .class_init = char_spice_class_init,
     .abstract = true,
 };
+module_obj(TYPE_CHARDEV_SPICE);
 
 static void char_spicevmc_class_init(ObjectClass *oc, void *data)
 {
@@ -381,6 +382,7 @@ static const TypeInfo char_spicevmc_type_info = {
     .parent = TYPE_CHARDEV_SPICE,
     .class_init = char_spicevmc_class_init,
 };
+module_obj(TYPE_CHARDEV_SPICEVMC);
 
 static void char_spiceport_class_init(ObjectClass *oc, void *data)
 {
@@ -396,6 +398,7 @@ static const TypeInfo char_spiceport_type_info = {
     .parent = TYPE_CHARDEV_SPICE,
     .class_init = char_spiceport_class_init,
 };
+module_obj(TYPE_CHARDEV_SPICEPORT);
 
 static void register_types(void)
 {
@@ -405,3 +408,5 @@ static void register_types(void)
 }
 
 type_init(register_types);
+
+module_dep("ui-spice-core");
diff --git a/chardev/trace-events b/chardev/trace-events
index 5ea44082073..027107b0c10 100644
--- a/chardev/trace-events
+++ b/chardev/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # wctablet.c
 wct_init(void) ""
diff --git a/chardev/wctablet.c b/chardev/wctablet.c
index 95e005f5a56..e8b292c43ca 100644
--- a/chardev/wctablet.c
+++ b/chardev/wctablet.c
@@ -320,7 +320,6 @@ static void wctablet_chr_finalize(Object *obj)
     TabletChardev *tablet = WCTABLET_CHARDEV(obj);
 
     qemu_input_handler_unregister(tablet->hs);
-    g_free(tablet);
 }
 
 static void wctablet_chr_open(Chardev *chr,
diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak
new file mode 100644
index 00000000000..cf43ac8da11
--- /dev/null
+++ b/configs/devices/aarch64-softmmu/default.mak
@@ -0,0 +1,8 @@
+# Default configuration for aarch64-softmmu
+
+# We support all the 32 bit boards so need all their config
+include ../arm-softmmu/default.mak
+
+CONFIG_XLNX_ZYNQMP_ARM=y
+CONFIG_XLNX_VERSAL=y
+CONFIG_SBSA_REF=y
diff --git a/configs/devices/aarch64-softmmu/minimal.mak b/configs/devices/aarch64-softmmu/minimal.mak
new file mode 100644
index 00000000000..0ebc1dca561
--- /dev/null
+++ b/configs/devices/aarch64-softmmu/minimal.mak
@@ -0,0 +1,9 @@
+#
+# A minimal version of the config that only supports only a few
+# virtual machines. This avoids bringing in any of numerous legacy
+# features from the 32bit platform (although virt still supports 32bit
+# itself)
+#
+
+CONFIG_ARM_VIRT=y
+CONFIG_SBSA_REF=y
diff --git a/default-configs/devices/alpha-softmmu.mak b/configs/devices/alpha-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/alpha-softmmu.mak
rename to configs/devices/alpha-softmmu/default.mak
diff --git a/configs/devices/arm-softmmu/default.mak b/configs/devices/arm-softmmu/default.mak
new file mode 100644
index 00000000000..6985a25377a
--- /dev/null
+++ b/configs/devices/arm-softmmu/default.mak
@@ -0,0 +1,44 @@
+# Default configuration for arm-softmmu
+
+# CONFIG_PCI_DEVICES=n
+# CONFIG_TEST_DEVICES=n
+
+CONFIG_ARM_VIRT=y
+CONFIG_CUBIEBOARD=y
+CONFIG_EXYNOS4=y
+CONFIG_HIGHBANK=y
+CONFIG_INTEGRATOR=y
+CONFIG_FSL_IMX31=y
+CONFIG_MUSICPAL=y
+CONFIG_MUSCA=y
+CONFIG_CHEETAH=y
+CONFIG_SX1=y
+CONFIG_NSERIES=y
+CONFIG_STELLARIS=y
+CONFIG_STM32VLDISCOVERY=y
+CONFIG_REALVIEW=y
+CONFIG_VERSATILE=y
+CONFIG_VEXPRESS=y
+CONFIG_ZYNQ=y
+CONFIG_MAINSTONE=y
+CONFIG_GUMSTIX=y
+CONFIG_SPITZ=y
+CONFIG_TOSA=y
+CONFIG_Z2=y
+CONFIG_NPCM7XX=y
+CONFIG_COLLIE=y
+CONFIG_ASPEED_SOC=y
+CONFIG_NETDUINO2=y
+CONFIG_NETDUINOPLUS2=y
+CONFIG_MPS2=y
+CONFIG_RASPI=y
+CONFIG_DIGIC=y
+CONFIG_SABRELITE=y
+CONFIG_EMCRAFT_SF2=y
+CONFIG_MICROBIT=y
+CONFIG_FSL_IMX25=y
+CONFIG_FSL_IMX7=y
+CONFIG_FSL_IMX6UL=y
+CONFIG_SEMIHOSTING=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
+CONFIG_ALLWINNER_H3=y
diff --git a/default-configs/devices/avr-softmmu.mak b/configs/devices/avr-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/avr-softmmu.mak
rename to configs/devices/avr-softmmu/default.mak
diff --git a/default-configs/devices/cris-softmmu.mak b/configs/devices/cris-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/cris-softmmu.mak
rename to configs/devices/cris-softmmu/default.mak
diff --git a/default-configs/devices/hppa-softmmu.mak b/configs/devices/hppa-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/hppa-softmmu.mak
rename to configs/devices/hppa-softmmu/default.mak
diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak
new file mode 100644
index 00000000000..598c6646dfc
--- /dev/null
+++ b/configs/devices/i386-softmmu/default.mak
@@ -0,0 +1,32 @@
+# Default configuration for i386-softmmu
+
+# Uncomment the following lines to disable these optional devices:
+#
+#CONFIG_AMD_IOMMU=n
+#CONFIG_APPLESMC=n
+#CONFIG_FDC=n
+#CONFIG_HPET=n
+#CONFIG_HYPERV=n
+#CONFIG_ISA_DEBUG=n
+#CONFIG_ISA_IPMI_BT=n
+#CONFIG_ISA_IPMI_KCS=n
+#CONFIG_PCI_IPMI_KCS=n
+#CONFIG_PCI_IPMI_BT=n
+#CONFIG_IPMI_SSIF=n
+#CONFIG_PCI_DEVICES=n
+#CONFIG_PVPANIC=n
+#CONFIG_QXL=n
+#CONFIG_SEV=n
+#CONFIG_SGA=n
+#CONFIG_TEST_DEVICES=n
+#CONFIG_TPM_CRB=n
+#CONFIG_TPM_TIS_ISA=n
+#CONFIG_VTD=n
+#CONFIG_SGX=n
+
+# Boards:
+#
+CONFIG_ISAPC=y
+CONFIG_I440FX=y
+CONFIG_Q35=y
+CONFIG_MICROVM=y
diff --git a/default-configs/devices/m68k-softmmu.mak b/configs/devices/m68k-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/m68k-softmmu.mak
rename to configs/devices/m68k-softmmu/default.mak
diff --git a/default-configs/devices/microblaze-softmmu.mak b/configs/devices/microblaze-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/microblaze-softmmu.mak
rename to configs/devices/microblaze-softmmu/default.mak
diff --git a/configs/devices/microblazeel-softmmu/default.mak b/configs/devices/microblazeel-softmmu/default.mak
new file mode 100644
index 00000000000..29f7f13816c
--- /dev/null
+++ b/configs/devices/microblazeel-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for microblazeel-softmmu
+
+include ../microblaze-softmmu/default.mak
diff --git a/configs/devices/mips-softmmu/common.mak b/configs/devices/mips-softmmu/common.mak
new file mode 100644
index 00000000000..4801b560511
--- /dev/null
+++ b/configs/devices/mips-softmmu/common.mak
@@ -0,0 +1,39 @@
+# Common mips*-softmmu CONFIG defines
+
+# CONFIG_SEMIHOSTING is always required on this architecture
+CONFIG_SEMIHOSTING=y
+
+CONFIG_ISA_BUS=y
+CONFIG_PCI=y
+CONFIG_PCI_DEVICES=y
+CONFIG_VGA_ISA=y
+CONFIG_VGA_ISA_MM=y
+CONFIG_VGA_CIRRUS=y
+CONFIG_VMWARE_VGA=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_ISA=y
+CONFIG_PARALLEL=y
+CONFIG_I8254=y
+CONFIG_PCSPK=y
+CONFIG_PCKBD=y
+CONFIG_FDC=y
+CONFIG_ACPI=y
+CONFIG_ACPI_PIIX4=y
+CONFIG_APM=y
+CONFIG_I8257=y
+CONFIG_PIIX4=y
+CONFIG_IDE_ISA=y
+CONFIG_IDE_PIIX=y
+CONFIG_PFLASH_CFI01=y
+CONFIG_I8259=y
+CONFIG_MC146818RTC=y
+CONFIG_EMPTY_SLOT=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_ITU=y
+CONFIG_MALTA=y
+CONFIG_PCNET_PCI=y
+CONFIG_MIPSSIM=y
+CONFIG_ACPI_SMBUS=y
+CONFIG_SMBUS_EEPROM=y
+CONFIG_TEST_DEVICES=y
+CONFIG_VIRTIO_MMIO=y
diff --git a/configs/devices/mips-softmmu/default.mak b/configs/devices/mips-softmmu/default.mak
new file mode 100644
index 00000000000..c23d95a83aa
--- /dev/null
+++ b/configs/devices/mips-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for mips-softmmu
+
+include common.mak
diff --git a/configs/devices/mips64-softmmu/default.mak b/configs/devices/mips64-softmmu/default.mak
new file mode 100644
index 00000000000..566672f3c22
--- /dev/null
+++ b/configs/devices/mips64-softmmu/default.mak
@@ -0,0 +1,4 @@
+# Default configuration for mips64-softmmu
+
+include ../mips-softmmu/common.mak
+CONFIG_JAZZ=y
diff --git a/configs/devices/mips64cheri128-softmmu/default.mak b/configs/devices/mips64cheri128-softmmu/default.mak
new file mode 100644
index 00000000000..a4f5f83d993
--- /dev/null
+++ b/configs/devices/mips64cheri128-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for cheri128-softmmu
+
+include ../mips64-softmmu/default.mak
diff --git a/configs/devices/mips64el-softmmu/default.mak b/configs/devices/mips64el-softmmu/default.mak
new file mode 100644
index 00000000000..c610749ac13
--- /dev/null
+++ b/configs/devices/mips64el-softmmu/default.mak
@@ -0,0 +1,11 @@
+# Default configuration for mips64el-softmmu
+
+include ../mips-softmmu/common.mak
+CONFIG_IDE_VIA=y
+CONFIG_FULOONG=y
+CONFIG_LOONGSON3V=y
+CONFIG_ATI_VGA=y
+CONFIG_RTL8139_PCI=y
+CONFIG_JAZZ=y
+CONFIG_VT82C686=y
+CONFIG_MIPS_BOSTON=y
diff --git a/configs/devices/mipsel-softmmu/default.mak b/configs/devices/mipsel-softmmu/default.mak
new file mode 100644
index 00000000000..009ccb0e2da
--- /dev/null
+++ b/configs/devices/mipsel-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for mipsel-softmmu
+
+include ../mips-softmmu/common.mak
diff --git a/default-configs/devices/morello-softmmu.mak b/configs/devices/morello-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/morello-softmmu.mak
rename to configs/devices/morello-softmmu/default.mak
diff --git a/default-configs/devices/nios2-softmmu.mak b/configs/devices/nios2-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/nios2-softmmu.mak
rename to configs/devices/nios2-softmmu/default.mak
diff --git a/default-configs/devices/or1k-softmmu.mak b/configs/devices/or1k-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/or1k-softmmu.mak
rename to configs/devices/or1k-softmmu/default.mak
diff --git a/configs/devices/ppc-softmmu/default.mak b/configs/devices/ppc-softmmu/default.mak
new file mode 100644
index 00000000000..658a454426e
--- /dev/null
+++ b/configs/devices/ppc-softmmu/default.mak
@@ -0,0 +1,19 @@
+# Default configuration for ppc-softmmu
+
+# For embedded PPCs:
+CONFIG_E500=y
+CONFIG_PPC405=y
+CONFIG_PPC440=y
+CONFIG_VIRTEX=y
+
+# For Sam460ex
+CONFIG_SAM460EX=y
+
+# For Macs
+CONFIG_MAC_OLDWORLD=y
+CONFIG_MAC_NEWWORLD=y
+
+CONFIG_PEGASOS2=y
+
+# For PReP
+CONFIG_PREP=y
diff --git a/configs/devices/ppc64-softmmu/default.mak b/configs/devices/ppc64-softmmu/default.mak
new file mode 100644
index 00000000000..b90e5bf4558
--- /dev/null
+++ b/configs/devices/ppc64-softmmu/default.mak
@@ -0,0 +1,10 @@
+# Default configuration for ppc64-softmmu
+
+# Include all 32-bit boards
+include ../ppc-softmmu/default.mak
+
+# For PowerNV
+CONFIG_POWERNV=y
+
+# For pSeries
+CONFIG_PSERIES=y
diff --git a/default-configs/devices/riscv32-softmmu.mak b/configs/devices/riscv32-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/riscv32-softmmu.mak
rename to configs/devices/riscv32-softmmu/default.mak
diff --git a/configs/devices/riscv32cheri-softmmu/default.mak b/configs/devices/riscv32cheri-softmmu/default.mak
new file mode 100644
index 00000000000..c830babc49d
--- /dev/null
+++ b/configs/devices/riscv32cheri-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for riscv32cheri-softmmu
+
+include ../riscv32-softmmu/default.mak
diff --git a/configs/devices/riscv64-softmmu/default.mak b/configs/devices/riscv64-softmmu/default.mak
new file mode 100644
index 00000000000..bc69301fa4a
--- /dev/null
+++ b/configs/devices/riscv64-softmmu/default.mak
@@ -0,0 +1,16 @@
+# Default configuration for riscv64-softmmu
+
+# Uncomment the following lines to disable these optional devices:
+#
+#CONFIG_PCI_DEVICES=n
+CONFIG_SEMIHOSTING=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
+
+# Boards:
+#
+CONFIG_SPIKE=y
+CONFIG_SIFIVE_E=y
+CONFIG_SIFIVE_U=y
+CONFIG_RISCV_VIRT=y
+CONFIG_MICROCHIP_PFSOC=y
+CONFIG_SHAKTI_C=y
diff --git a/configs/devices/riscv64cheri-softmmu/default.mak b/configs/devices/riscv64cheri-softmmu/default.mak
new file mode 100644
index 00000000000..57993434da3
--- /dev/null
+++ b/configs/devices/riscv64cheri-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for riscv64cheri-softmmu
+
+include ../riscv64-softmmu/default.mak
diff --git a/default-configs/devices/rx-softmmu.mak b/configs/devices/rx-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/rx-softmmu.mak
rename to configs/devices/rx-softmmu/default.mak
diff --git a/default-configs/devices/s390x-softmmu.mak b/configs/devices/s390x-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/s390x-softmmu.mak
rename to configs/devices/s390x-softmmu/default.mak
diff --git a/default-configs/devices/sh4-softmmu.mak b/configs/devices/sh4-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/sh4-softmmu.mak
rename to configs/devices/sh4-softmmu/default.mak
diff --git a/configs/devices/sh4eb-softmmu/default.mak b/configs/devices/sh4eb-softmmu/default.mak
new file mode 100644
index 00000000000..f18d1f65199
--- /dev/null
+++ b/configs/devices/sh4eb-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for sh4eb-softmmu
+
+include ../sh4-softmmu/default.mak
diff --git a/default-configs/devices/sparc-softmmu.mak b/configs/devices/sparc-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/sparc-softmmu.mak
rename to configs/devices/sparc-softmmu/default.mak
diff --git a/default-configs/devices/sparc64-softmmu.mak b/configs/devices/sparc64-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/sparc64-softmmu.mak
rename to configs/devices/sparc64-softmmu/default.mak
diff --git a/configs/devices/tricore-softmmu/default.mak b/configs/devices/tricore-softmmu/default.mak
new file mode 100644
index 00000000000..cb8fc286eb2
--- /dev/null
+++ b/configs/devices/tricore-softmmu/default.mak
@@ -0,0 +1,2 @@
+CONFIG_TRICORE_TESTBOARD=y
+CONFIG_TRIBOARD=y
diff --git a/configs/devices/x86_64-softmmu/default.mak b/configs/devices/x86_64-softmmu/default.mak
new file mode 100644
index 00000000000..ddfc2ea6266
--- /dev/null
+++ b/configs/devices/x86_64-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for x86_64-softmmu
+
+include ../i386-softmmu/default.mak
diff --git a/default-configs/devices/xtensa-softmmu.mak b/configs/devices/xtensa-softmmu/default.mak
similarity index 100%
rename from default-configs/devices/xtensa-softmmu.mak
rename to configs/devices/xtensa-softmmu/default.mak
diff --git a/configs/devices/xtensaeb-softmmu/default.mak b/configs/devices/xtensaeb-softmmu/default.mak
new file mode 100644
index 00000000000..00eafcc292e
--- /dev/null
+++ b/configs/devices/xtensaeb-softmmu/default.mak
@@ -0,0 +1,3 @@
+# Default configuration for Xtensa
+
+include ../xtensa-softmmu/default.mak
diff --git a/configs/targets/aarch64-linux-user.mak b/configs/targets/aarch64-linux-user.mak
new file mode 100644
index 00000000000..d0c603c54ec
--- /dev/null
+++ b/configs/targets/aarch64-linux-user.mak
@@ -0,0 +1,5 @@
+TARGET_ARCH=aarch64
+TARGET_BASE_ARCH=arm
+TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml
+TARGET_HAS_BFLT=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/configs/targets/aarch64-softmmu.mak b/configs/targets/aarch64-softmmu.mak
new file mode 100644
index 00000000000..d489e6da830
--- /dev/null
+++ b/configs/targets/aarch64-softmmu.mak
@@ -0,0 +1,5 @@
+TARGET_ARCH=aarch64
+TARGET_BASE_ARCH=arm
+TARGET_SUPPORTS_MTTCG=y
+TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml
+TARGET_NEED_FDT=y
diff --git a/configs/targets/aarch64_be-linux-user.mak b/configs/targets/aarch64_be-linux-user.mak
new file mode 100644
index 00000000000..d3ee10c00f3
--- /dev/null
+++ b/configs/targets/aarch64_be-linux-user.mak
@@ -0,0 +1,6 @@
+TARGET_ARCH=aarch64
+TARGET_BASE_ARCH=arm
+TARGET_WORDS_BIGENDIAN=y
+TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml
+TARGET_HAS_BFLT=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/alpha-linux-user.mak b/configs/targets/alpha-linux-user.mak
similarity index 100%
rename from default-configs/targets/alpha-linux-user.mak
rename to configs/targets/alpha-linux-user.mak
diff --git a/default-configs/targets/alpha-softmmu.mak b/configs/targets/alpha-softmmu.mak
similarity index 100%
rename from default-configs/targets/alpha-softmmu.mak
rename to configs/targets/alpha-softmmu.mak
diff --git a/configs/targets/arm-linux-user.mak b/configs/targets/arm-linux-user.mak
new file mode 100644
index 00000000000..3e10d6b15d5
--- /dev/null
+++ b/configs/targets/arm-linux-user.mak
@@ -0,0 +1,6 @@
+TARGET_ARCH=arm
+TARGET_SYSTBL_ABI=common,oabi
+TARGET_SYSTBL=syscall.tbl
+TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml
+TARGET_HAS_BFLT=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/configs/targets/arm-softmmu.mak b/configs/targets/arm-softmmu.mak
new file mode 100644
index 00000000000..92c8349b964
--- /dev/null
+++ b/configs/targets/arm-softmmu.mak
@@ -0,0 +1,4 @@
+TARGET_ARCH=arm
+TARGET_SUPPORTS_MTTCG=y
+TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml
+TARGET_NEED_FDT=y
diff --git a/configs/targets/armeb-linux-user.mak b/configs/targets/armeb-linux-user.mak
new file mode 100644
index 00000000000..f81e5bf1fe4
--- /dev/null
+++ b/configs/targets/armeb-linux-user.mak
@@ -0,0 +1,7 @@
+TARGET_ARCH=arm
+TARGET_SYSTBL_ABI=common,oabi
+TARGET_SYSTBL=syscall.tbl
+TARGET_WORDS_BIGENDIAN=y
+TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-vfp-sysregs.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml gdb-xml/arm-m-profile-mve.xml
+TARGET_HAS_BFLT=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/avr-softmmu.mak b/configs/targets/avr-softmmu.mak
similarity index 100%
rename from default-configs/targets/avr-softmmu.mak
rename to configs/targets/avr-softmmu.mak
diff --git a/default-configs/targets/cris-linux-user.mak b/configs/targets/cris-linux-user.mak
similarity index 100%
rename from default-configs/targets/cris-linux-user.mak
rename to configs/targets/cris-linux-user.mak
diff --git a/default-configs/targets/cris-softmmu.mak b/configs/targets/cris-softmmu.mak
similarity index 100%
rename from default-configs/targets/cris-softmmu.mak
rename to configs/targets/cris-softmmu.mak
diff --git a/default-configs/targets/hexagon-linux-user.mak b/configs/targets/hexagon-linux-user.mak
similarity index 100%
rename from default-configs/targets/hexagon-linux-user.mak
rename to configs/targets/hexagon-linux-user.mak
diff --git a/default-configs/targets/hppa-linux-user.mak b/configs/targets/hppa-linux-user.mak
similarity index 100%
rename from default-configs/targets/hppa-linux-user.mak
rename to configs/targets/hppa-linux-user.mak
diff --git a/default-configs/targets/hppa-softmmu.mak b/configs/targets/hppa-softmmu.mak
similarity index 100%
rename from default-configs/targets/hppa-softmmu.mak
rename to configs/targets/hppa-softmmu.mak
diff --git a/default-configs/targets/i386-bsd-user.mak b/configs/targets/i386-bsd-user.mak
similarity index 100%
rename from default-configs/targets/i386-bsd-user.mak
rename to configs/targets/i386-bsd-user.mak
diff --git a/default-configs/targets/i386-linux-user.mak b/configs/targets/i386-linux-user.mak
similarity index 100%
rename from default-configs/targets/i386-linux-user.mak
rename to configs/targets/i386-linux-user.mak
diff --git a/configs/targets/i386-softmmu.mak b/configs/targets/i386-softmmu.mak
new file mode 100644
index 00000000000..6b3c99fc86c
--- /dev/null
+++ b/configs/targets/i386-softmmu.mak
@@ -0,0 +1,4 @@
+TARGET_ARCH=i386
+TARGET_SUPPORTS_MTTCG=y
+TARGET_NEED_FDT=y
+TARGET_XML_FILES= gdb-xml/i386-32bit.xml
diff --git a/default-configs/targets/m68k-linux-user.mak b/configs/targets/m68k-linux-user.mak
similarity index 100%
rename from default-configs/targets/m68k-linux-user.mak
rename to configs/targets/m68k-linux-user.mak
diff --git a/default-configs/targets/m68k-softmmu.mak b/configs/targets/m68k-softmmu.mak
similarity index 100%
rename from default-configs/targets/m68k-softmmu.mak
rename to configs/targets/m68k-softmmu.mak
diff --git a/default-configs/targets/microblaze-linux-user.mak b/configs/targets/microblaze-linux-user.mak
similarity index 100%
rename from default-configs/targets/microblaze-linux-user.mak
rename to configs/targets/microblaze-linux-user.mak
diff --git a/default-configs/targets/microblaze-softmmu.mak b/configs/targets/microblaze-softmmu.mak
similarity index 100%
rename from default-configs/targets/microblaze-softmmu.mak
rename to configs/targets/microblaze-softmmu.mak
diff --git a/default-configs/targets/microblazeel-linux-user.mak b/configs/targets/microblazeel-linux-user.mak
similarity index 100%
rename from default-configs/targets/microblazeel-linux-user.mak
rename to configs/targets/microblazeel-linux-user.mak
diff --git a/default-configs/targets/microblazeel-softmmu.mak b/configs/targets/microblazeel-softmmu.mak
similarity index 100%
rename from default-configs/targets/microblazeel-softmmu.mak
rename to configs/targets/microblazeel-softmmu.mak
diff --git a/default-configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak
similarity index 100%
rename from default-configs/targets/mips-linux-user.mak
rename to configs/targets/mips-linux-user.mak
diff --git a/default-configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak
similarity index 100%
rename from default-configs/targets/mips-softmmu.mak
rename to configs/targets/mips-softmmu.mak
diff --git a/default-configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak
similarity index 100%
rename from default-configs/targets/mips64-linux-user.mak
rename to configs/targets/mips64-linux-user.mak
diff --git a/default-configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak
similarity index 100%
rename from default-configs/targets/mips64-softmmu.mak
rename to configs/targets/mips64-softmmu.mak
diff --git a/default-configs/targets/mips64cheri-softmmu-common.mak b/configs/targets/mips64cheri-softmmu-common.mak
similarity index 100%
rename from default-configs/targets/mips64cheri-softmmu-common.mak
rename to configs/targets/mips64cheri-softmmu-common.mak
diff --git a/default-configs/targets/mips64cheri128-softmmu.mak b/configs/targets/mips64cheri128-softmmu.mak
similarity index 100%
rename from default-configs/targets/mips64cheri128-softmmu.mak
rename to configs/targets/mips64cheri128-softmmu.mak
diff --git a/default-configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak
similarity index 100%
rename from default-configs/targets/mips64el-linux-user.mak
rename to configs/targets/mips64el-linux-user.mak
diff --git a/default-configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak
similarity index 100%
rename from default-configs/targets/mips64el-softmmu.mak
rename to configs/targets/mips64el-softmmu.mak
diff --git a/default-configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak
similarity index 100%
rename from default-configs/targets/mipsel-linux-user.mak
rename to configs/targets/mipsel-linux-user.mak
diff --git a/default-configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak
similarity index 100%
rename from default-configs/targets/mipsel-softmmu.mak
rename to configs/targets/mipsel-softmmu.mak
diff --git a/default-configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak
similarity index 100%
rename from default-configs/targets/mipsn32-linux-user.mak
rename to configs/targets/mipsn32-linux-user.mak
diff --git a/default-configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak
similarity index 100%
rename from default-configs/targets/mipsn32el-linux-user.mak
rename to configs/targets/mipsn32el-linux-user.mak
diff --git a/default-configs/targets/morello-softmmu.mak b/configs/targets/morello-softmmu.mak
similarity index 100%
rename from default-configs/targets/morello-softmmu.mak
rename to configs/targets/morello-softmmu.mak
diff --git a/default-configs/targets/nios2-linux-user.mak b/configs/targets/nios2-linux-user.mak
similarity index 100%
rename from default-configs/targets/nios2-linux-user.mak
rename to configs/targets/nios2-linux-user.mak
diff --git a/default-configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak
similarity index 100%
rename from default-configs/targets/nios2-softmmu.mak
rename to configs/targets/nios2-softmmu.mak
diff --git a/default-configs/targets/or1k-linux-user.mak b/configs/targets/or1k-linux-user.mak
similarity index 100%
rename from default-configs/targets/or1k-linux-user.mak
rename to configs/targets/or1k-linux-user.mak
diff --git a/default-configs/targets/or1k-softmmu.mak b/configs/targets/or1k-softmmu.mak
similarity index 100%
rename from default-configs/targets/or1k-softmmu.mak
rename to configs/targets/or1k-softmmu.mak
diff --git a/default-configs/targets/ppc-linux-user.mak b/configs/targets/ppc-linux-user.mak
similarity index 100%
rename from default-configs/targets/ppc-linux-user.mak
rename to configs/targets/ppc-linux-user.mak
diff --git a/default-configs/targets/ppc-softmmu.mak b/configs/targets/ppc-softmmu.mak
similarity index 100%
rename from default-configs/targets/ppc-softmmu.mak
rename to configs/targets/ppc-softmmu.mak
diff --git a/default-configs/targets/ppc64-linux-user.mak b/configs/targets/ppc64-linux-user.mak
similarity index 100%
rename from default-configs/targets/ppc64-linux-user.mak
rename to configs/targets/ppc64-linux-user.mak
diff --git a/default-configs/targets/ppc64-softmmu.mak b/configs/targets/ppc64-softmmu.mak
similarity index 100%
rename from default-configs/targets/ppc64-softmmu.mak
rename to configs/targets/ppc64-softmmu.mak
diff --git a/default-configs/targets/ppc64abi32-linux-user.mak b/configs/targets/ppc64abi32-linux-user.mak
similarity index 100%
rename from default-configs/targets/ppc64abi32-linux-user.mak
rename to configs/targets/ppc64abi32-linux-user.mak
diff --git a/default-configs/targets/ppc64le-linux-user.mak b/configs/targets/ppc64le-linux-user.mak
similarity index 100%
rename from default-configs/targets/ppc64le-linux-user.mak
rename to configs/targets/ppc64le-linux-user.mak
diff --git a/default-configs/targets/riscv32-linux-user.mak b/configs/targets/riscv32-linux-user.mak
similarity index 100%
rename from default-configs/targets/riscv32-linux-user.mak
rename to configs/targets/riscv32-linux-user.mak
diff --git a/default-configs/targets/riscv32-softmmu.mak b/configs/targets/riscv32-softmmu.mak
similarity index 100%
rename from default-configs/targets/riscv32-softmmu.mak
rename to configs/targets/riscv32-softmmu.mak
diff --git a/default-configs/targets/riscv32cheri-softmmu.mak b/configs/targets/riscv32cheri-softmmu.mak
similarity index 100%
rename from default-configs/targets/riscv32cheri-softmmu.mak
rename to configs/targets/riscv32cheri-softmmu.mak
diff --git a/default-configs/targets/riscv64-linux-user.mak b/configs/targets/riscv64-linux-user.mak
similarity index 100%
rename from default-configs/targets/riscv64-linux-user.mak
rename to configs/targets/riscv64-linux-user.mak
diff --git a/default-configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak
similarity index 100%
rename from default-configs/targets/riscv64-softmmu.mak
rename to configs/targets/riscv64-softmmu.mak
diff --git a/default-configs/targets/riscv64cheri-softmmu.mak b/configs/targets/riscv64cheri-softmmu.mak
similarity index 100%
rename from default-configs/targets/riscv64cheri-softmmu.mak
rename to configs/targets/riscv64cheri-softmmu.mak
diff --git a/default-configs/targets/rx-softmmu.mak b/configs/targets/rx-softmmu.mak
similarity index 100%
rename from default-configs/targets/rx-softmmu.mak
rename to configs/targets/rx-softmmu.mak
diff --git a/default-configs/targets/s390x-linux-user.mak b/configs/targets/s390x-linux-user.mak
similarity index 100%
rename from default-configs/targets/s390x-linux-user.mak
rename to configs/targets/s390x-linux-user.mak
diff --git a/default-configs/targets/s390x-softmmu.mak b/configs/targets/s390x-softmmu.mak
similarity index 100%
rename from default-configs/targets/s390x-softmmu.mak
rename to configs/targets/s390x-softmmu.mak
diff --git a/default-configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak
similarity index 100%
rename from default-configs/targets/sh4-linux-user.mak
rename to configs/targets/sh4-linux-user.mak
diff --git a/default-configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak
similarity index 100%
rename from default-configs/targets/sh4-softmmu.mak
rename to configs/targets/sh4-softmmu.mak
diff --git a/default-configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak
similarity index 100%
rename from default-configs/targets/sh4eb-linux-user.mak
rename to configs/targets/sh4eb-linux-user.mak
diff --git a/default-configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak
similarity index 100%
rename from default-configs/targets/sh4eb-softmmu.mak
rename to configs/targets/sh4eb-softmmu.mak
diff --git a/default-configs/targets/sparc-linux-user.mak b/configs/targets/sparc-linux-user.mak
similarity index 100%
rename from default-configs/targets/sparc-linux-user.mak
rename to configs/targets/sparc-linux-user.mak
diff --git a/default-configs/targets/sparc-softmmu.mak b/configs/targets/sparc-softmmu.mak
similarity index 100%
rename from default-configs/targets/sparc-softmmu.mak
rename to configs/targets/sparc-softmmu.mak
diff --git a/default-configs/targets/sparc32plus-linux-user.mak b/configs/targets/sparc32plus-linux-user.mak
similarity index 100%
rename from default-configs/targets/sparc32plus-linux-user.mak
rename to configs/targets/sparc32plus-linux-user.mak
diff --git a/default-configs/targets/sparc64-linux-user.mak b/configs/targets/sparc64-linux-user.mak
similarity index 87%
rename from default-configs/targets/sparc64-linux-user.mak
rename to configs/targets/sparc64-linux-user.mak
index 846924201ae..9d23ab4a266 100644
--- a/default-configs/targets/sparc64-linux-user.mak
+++ b/configs/targets/sparc64-linux-user.mak
@@ -1,5 +1,6 @@
 TARGET_ARCH=sparc64
 TARGET_BASE_ARCH=sparc
+TARGET_ABI_DIR=sparc
 TARGET_SYSTBL_ABI=common,64
 TARGET_SYSTBL=syscall.tbl
 TARGET_ALIGNED_ONLY=y
diff --git a/default-configs/targets/sparc64-softmmu.mak b/configs/targets/sparc64-softmmu.mak
similarity index 100%
rename from default-configs/targets/sparc64-softmmu.mak
rename to configs/targets/sparc64-softmmu.mak
diff --git a/default-configs/targets/tricore-softmmu.mak b/configs/targets/tricore-softmmu.mak
similarity index 100%
rename from default-configs/targets/tricore-softmmu.mak
rename to configs/targets/tricore-softmmu.mak
diff --git a/default-configs/targets/x86_64-bsd-user.mak b/configs/targets/x86_64-bsd-user.mak
similarity index 100%
rename from default-configs/targets/x86_64-bsd-user.mak
rename to configs/targets/x86_64-bsd-user.mak
diff --git a/default-configs/targets/x86_64-linux-user.mak b/configs/targets/x86_64-linux-user.mak
similarity index 100%
rename from default-configs/targets/x86_64-linux-user.mak
rename to configs/targets/x86_64-linux-user.mak
diff --git a/configs/targets/x86_64-softmmu.mak b/configs/targets/x86_64-softmmu.mak
new file mode 100644
index 00000000000..197817c9434
--- /dev/null
+++ b/configs/targets/x86_64-softmmu.mak
@@ -0,0 +1,5 @@
+TARGET_ARCH=x86_64
+TARGET_BASE_ARCH=i386
+TARGET_SUPPORTS_MTTCG=y
+TARGET_NEED_FDT=y
+TARGET_XML_FILES= gdb-xml/i386-64bit.xml
diff --git a/default-configs/targets/xtensa-linux-user.mak b/configs/targets/xtensa-linux-user.mak
similarity index 80%
rename from default-configs/targets/xtensa-linux-user.mak
rename to configs/targets/xtensa-linux-user.mak
index fc95cc60f58..420b30a68d9 100644
--- a/default-configs/targets/xtensa-linux-user.mak
+++ b/configs/targets/xtensa-linux-user.mak
@@ -1,5 +1,4 @@
 TARGET_ARCH=xtensa
 TARGET_SYSTBL_ABI=common
 TARGET_SYSTBL=syscall.tbl
-TARGET_ALIGNED_ONLY=y
 TARGET_HAS_BFLT=y
diff --git a/configs/targets/xtensa-softmmu.mak b/configs/targets/xtensa-softmmu.mak
new file mode 100644
index 00000000000..f075557bfa9
--- /dev/null
+++ b/configs/targets/xtensa-softmmu.mak
@@ -0,0 +1,2 @@
+TARGET_ARCH=xtensa
+TARGET_SUPPORTS_MTTCG=y
diff --git a/default-configs/targets/xtensaeb-linux-user.mak b/configs/targets/xtensaeb-linux-user.mak
similarity index 83%
rename from default-configs/targets/xtensaeb-linux-user.mak
rename to configs/targets/xtensaeb-linux-user.mak
index cfc3518118c..1ea0f1ba915 100644
--- a/default-configs/targets/xtensaeb-linux-user.mak
+++ b/configs/targets/xtensaeb-linux-user.mak
@@ -1,6 +1,5 @@
 TARGET_ARCH=xtensa
 TARGET_SYSTBL_ABI=common
 TARGET_SYSTBL=syscall.tbl
-TARGET_ALIGNED_ONLY=y
 TARGET_WORDS_BIGENDIAN=y
 TARGET_HAS_BFLT=y
diff --git a/configs/targets/xtensaeb-softmmu.mak b/configs/targets/xtensaeb-softmmu.mak
new file mode 100644
index 00000000000..405cf5acbb4
--- /dev/null
+++ b/configs/targets/xtensaeb-softmmu.mak
@@ -0,0 +1,3 @@
+TARGET_ARCH=xtensa
+TARGET_WORDS_BIGENDIAN=y
+TARGET_SUPPORTS_MTTCG=y
diff --git a/configure b/configure
index cc203001f9d..3ffd89689a4 100755
--- a/configure
+++ b/configure
@@ -142,11 +142,11 @@ lines: ${BASH_LINENO[*]}"
 }
 
 do_cc() {
-    do_compiler "$cc" "$@"
+    do_compiler "$cc" $CPU_CFLAGS "$@"
 }
 
 do_cxx() {
-    do_compiler "$cxx" "$@"
+    do_compiler "$cxx" $CPU_CFLAGS "$@"
 }
 
 # Append $2 to the variable named $1, with space separation
@@ -158,8 +158,8 @@ update_cxxflags() {
     # Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those
     # options which some versions of GCC's C++ compiler complain about
     # because they only make sense for C programs.
-    QEMU_CXXFLAGS="$QEMU_CXXFLAGS -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS"
-    CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu99/-std=gnu++11/)
+    QEMU_CXXFLAGS="-D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS"
+    CONFIGURE_CXXFLAGS=$(echo "$CONFIGURE_CFLAGS" | sed s/-std=gnu11/-std=gnu++11/)
     for arg in $QEMU_CFLAGS; do
         case $arg in
             -Wstrict-prototypes|-Wmissing-prototypes|-Wnested-externs|\
@@ -174,14 +174,14 @@ update_cxxflags() {
 
 compile_object() {
   local_cflags="$1"
-  do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
+  do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
 }
 
 compile_prog() {
   local_cflags="$1"
   local_ldflags="$2"
-  do_cc $CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \
-      $LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags
+  do_cc $CFLAGS $EXTRA_CFLAGS $CONFIGURE_CFLAGS $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC \
+      $LDFLAGS $EXTRA_LDFLAGS $CONFIGURE_LDFLAGS $QEMU_LDFLAGS $local_ldflags
 }
 
 # symbolically link $1 to $2.  Portable version of "ln -sf".
@@ -216,10 +216,6 @@ version_ge () {
     done
 }
 
-have_backend () {
-    echo "$trace_backends" | grep "$1" >/dev/null
-}
-
 glob() {
     eval test -z '"${1#'"$2"'}"'
 }
@@ -240,11 +236,11 @@ interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
 cross_compile="no"
 cross_prefix=""
-audio_drv_list=""
+audio_drv_list="default"
 block_drv_rw_whitelist=""
 block_drv_ro_whitelist=""
+block_drv_whitelist_tools="no"
 host_cc="cc"
-audio_win_int=""
 libs_qga=""
 debug_info="yes"
 lto="false"
@@ -256,31 +252,11 @@ gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
 if test -e "$source_path/.git"
 then
     git_submodules_action="update"
-    git_submodules="ui/keycodemapdb"
-    git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
-    git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
 else
     git_submodules_action="ignore"
-    git_submodules=""
-
-    if ! test -f "$source_path/ui/keycodemapdb/README"
-    then
-        echo
-        echo "ERROR: missing file $source_path/ui/keycodemapdb/README"
-        echo
-        echo "This is not a GIT checkout but module content appears to"
-        echo "be missing. Do not use 'git archive' or GitHub download links"
-        echo "to acquire QEMU source archives. Non-GIT builds are only"
-        echo "supported with source archives linked from:"
-        echo
-        echo "  https://www.qemu.org/download/#source"
-        echo
-        echo "Developers working with GIT can use scripts/archive-source.sh"
-        echo "if they need to create valid source archives."
-        echo
-        exit 1
-    fi
 fi
+
+git_submodules="ui/keycodemapdb"
 git="git"
 
 # Don't accept a target_list environment variable.
@@ -310,48 +286,21 @@ for opt do
   esac
 done
 
-brlapi="auto"
-curl="auto"
-iconv="auto"
-curses="auto"
-docs="auto"
-fdt="auto"
-netmap="no"
-sdl="auto"
-sdl_image="auto"
-coreaudio="auto"
-virtiofsd="auto"
-virtfs="auto"
-libudev="auto"
-mpath="auto"
-vnc="enabled"
-sparse="auto"
-vde="$default_feature"
-vnc_sasl="auto"
-vnc_jpeg="auto"
-vnc_png="auto"
-xkbcommon="auto"
-xen="$default_feature"
+EXTRA_CFLAGS=""
+EXTRA_CXXFLAGS=""
+EXTRA_LDFLAGS=""
+
 xen_ctrl_version="$default_feature"
-xen_pci_passthrough="auto"
-linux_aio="$default_feature"
-linux_io_uring="$default_feature"
-cap_ng="auto"
-attr="auto"
 xfs="$default_feature"
-tcg="enabled"
 membarrier="$default_feature"
+vhost_kernel="$default_feature"
 vhost_net="$default_feature"
 vhost_crypto="$default_feature"
 vhost_scsi="$default_feature"
 vhost_vsock="$default_feature"
 vhost_user="no"
-vhost_user_blk_server="auto"
 vhost_user_fs="$default_feature"
-kvm="auto"
-hax="auto"
-hvf="auto"
-whpx="auto"
+vhost_vdpa="$default_feature"
 rdma="$default_feature"
 pvrdma="$default_feature"
 gprof="no"
@@ -363,82 +312,43 @@ sanitizers="no"
 tsan="no"
 fortify_source="$default_feature"
 strip_opt="yes"
-tcg_interpreter="false"
-bigendian="no"
 mingw32="no"
 gcov="no"
 EXESUF=""
-HOST_DSOSUF=".so"
 modules="no"
 module_upgrades="no"
 prefix="/usr/local"
 qemu_suffix="qemu"
-slirp="auto"
-oss_lib=""
 bsd="no"
 linux="no"
 solaris="no"
 profiler="no"
-cocoa="auto"
 softmmu="yes"
 linux_user="no"
 bsd_user="no"
-blobs="true"
 pkgversion=""
 pie=""
 qom_cast_debug="yes"
 trace_backends="log"
 trace_file="trace"
-spice="$default_feature"
-rbd="auto"
-smartcard="$default_feature"
-u2f="auto"
-libusb="$default_feature"
-usb_redir="$default_feature"
 opengl="$default_feature"
 cpuid_h="no"
 avx2_opt="$default_feature"
-capstone="auto"
-lzo="auto"
-snappy="auto"
-bzip2="auto"
-lzfse="auto"
-zstd="auto"
 guest_agent="$default_feature"
 guest_agent_with_vss="no"
 guest_agent_ntddscsi="no"
-guest_agent_msi="auto"
 vss_win32_sdk="$default_feature"
 win_sdk="no"
 want_tools="$default_feature"
-libiscsi="auto"
-libnfs="auto"
 coroutine=""
 coroutine_pool="$default_feature"
 debug_stack_usage="no"
 crypto_afalg="no"
-cfi="false"
-cfi_debug="false"
-seccomp="auto"
-glusterfs="auto"
-gtk="auto"
 tls_priority="NORMAL"
-gnutls="$default_feature"
-nettle="$default_feature"
-nettle_xts="no"
-gcrypt="$default_feature"
-gcrypt_hmac="no"
-gcrypt_xts="no"
-qemu_private_xts="yes"
-auth_pam="$default_feature"
-vte="$default_feature"
-virglrenderer="$default_feature"
 tpm="$default_feature"
 libssh="$default_feature"
 live_block_migration=${default_feature:-yes}
 numa="$default_feature"
-tcmalloc="no"
-jemalloc="no"
 replication=${default_feature:-yes}
 bochs=${default_feature:-yes}
 cloop=${default_feature:-yes}
@@ -448,26 +358,34 @@ vdi=${default_feature:-yes}
 vvfat=${default_feature:-yes}
 qed=${default_feature:-yes}
 parallels=${default_feature:-yes}
-sheepdog="no"
-libxml2="$default_feature"
 debug_mutex="no"
-libpmem="$default_feature"
-default_devices="true"
-plugins="no"
-fuzzing="no"
+plugins="$default_feature"
 rng_none="no"
 secret_keyring="$default_feature"
-libdaxctl="$default_feature"
 meson=""
+meson_args=""
 ninja=""
+gio="$default_feature"
 skip_meson=no
-gettext="auto"
-fuse="auto"
-fuse_lseek="auto"
-multiprocess="auto"
+slirp_smbd="$default_feature"
 
-malloc_trim="auto"
-gio="$default_feature"
+# The following Meson options are handled manually (still they
+# are included in the automatically generated help message)
+
+# 1. Track which submodules are needed
+capstone="auto"
+fdt="auto"
+slirp="auto"
+
+# 2. Support --with/--without option
+default_devices="true"
+
+# 3. Automatically enable/disable other options
+tcg="enabled"
+cfi="false"
+
+# 4. Detection partly done in configure
+xen=${default_feature:+disabled}
 
 # parse CC options second
 for opt do
@@ -482,13 +400,13 @@ for opt do
   ;;
   --cpu=*) cpu="$optarg"
   ;;
-  --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
-                    QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg"
-  ;;
-  --extra-cxxflags=*) QEMU_CXXFLAGS="$QEMU_CXXFLAGS $optarg"
+  --extra-cflags=*)
+    EXTRA_CFLAGS="$EXTRA_CFLAGS $optarg"
+    EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
+    ;;
+  --extra-cxxflags=*) EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS $optarg"
   ;;
-  --extra-ldflags=*) QEMU_LDFLAGS="$QEMU_LDFLAGS $optarg"
-                     EXTRA_LDFLAGS="$optarg"
+  --extra-ldflags=*) EXTRA_LDFLAGS="$EXTRA_LDFLAGS $optarg"
   ;;
   --enable-debug-info) debug_info="yes"
   ;;
@@ -544,21 +462,20 @@ query_pkg_config() {
 pkg_config=query_pkg_config
 sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
 
-# If the user hasn't specified ARFLAGS, default to 'rv', just as make does.
-ARFLAGS="${ARFLAGS-rv}"
-
 # default flags for all hosts
 # We use -fwrapv to tell the compiler that we require a C dialect where
 # left shift of signed integers is well defined and has the expected
 # 2s-complement style results. (Both clang and gcc agree that it
 # provides these semantics.)
-QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS"
+QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv"
 QEMU_CFLAGS="-Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
 
+QEMU_LDFLAGS=
+
 # Flags that are needed during configure but later taken care of by Meson
-CONFIGURE_CFLAGS="-std=gnu99 -Wall"
+CONFIGURE_CFLAGS="-std=gnu11 -Wall"
 CONFIGURE_LDFLAGS=
 
 
@@ -586,15 +503,6 @@ int main(void) { return 0; }
 EOF
 }
 
-write_c_fuzzer_skeleton() {
-    cat > $TMPC <<EOF
-#include <stdint.h>
-#include <sys/types.h>
-int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
-int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; }
-EOF
-}
-
 if check_define __linux__ ; then
   targetos="Linux"
 elif check_define _WIN32 ; then
@@ -627,9 +535,6 @@ fi
 # cross-compiling to one of these OSes then you'll need to specify
 # the correct CPU with the --cpu option.
 case $targetos in
-Darwin)
-  HOST_DSOSUF=".dylib"
-  ;;
 SunOS)
   # $(uname -m) returns i86pc even on an x86_64 box, so default based on isainfo
   if test -z "$cpu" && test "$(isainfo -k)" = "amd64"; then
@@ -673,11 +578,7 @@ elif check_define __s390__ ; then
     cpu="s390"
   fi
 elif check_define __riscv ; then
-  if check_define _LP64 ; then
-    cpu="riscv64"
-  else
-    cpu="riscv32"
-  fi
+  cpu="riscv"
 elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __aarch64__ ; then
@@ -690,7 +591,7 @@ ARCH=
 # Normalise host CPU name and set ARCH.
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
-  ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64)
+  ppc|ppc64|s390x|sparc64|x32|riscv)
   ;;
   ppc64le)
     ARCH="ppc64"
@@ -727,52 +628,34 @@ fi
 case $targetos in
 MINGW32*)
   mingw32="yes"
-  audio_possible_drivers="dsound sdl"
-  if check_include dsound.h; then
-    audio_drv_list="dsound"
-  else
-    audio_drv_list=""
-  fi
   supported_os="yes"
+  plugins="no"
   pie="no"
 ;;
 GNU/kFreeBSD)
   bsd="yes"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
 ;;
 FreeBSD)
   bsd="yes"
+  bsd_user="yes"
   make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
   # needed for kinfo_getvmmap(3) in libutil.h
-  netmap=""  # enable netmap autodetect
 ;;
 DragonFly)
   bsd="yes"
   make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl pa"
 ;;
 NetBSD)
   bsd="yes"
   make="${MAKE-gmake}"
-  audio_drv_list="oss try-sdl"
-  audio_possible_drivers="oss sdl"
-  oss_lib="-lossaudio"
 ;;
 OpenBSD)
   bsd="yes"
   make="${MAKE-gmake}"
-  audio_drv_list="try-sdl"
-  audio_possible_drivers="sdl"
 ;;
 Darwin)
   bsd="yes"
   darwin="yes"
-  audio_drv_list="try-coreaudio try-sdl"
-  audio_possible_drivers="coreaudio sdl"
   # Disable attempts to use ObjectiveC features in os/object.h since they
   # won't work when we're compiling with gcc as a C compiler.
   QEMU_CFLAGS="-DOS_OBJECT_USE_OBJC=0 $QEMU_CFLAGS"
@@ -781,10 +664,6 @@ SunOS)
   solaris="yes"
   make="${MAKE-gmake}"
   smbd="${SMBD-/usr/sfw/sbin/smbd}"
-  if test -f /usr/include/sys/soundcard.h ; then
-    audio_drv_list="oss try-sdl"
-  fi
-  audio_possible_drivers="oss sdl"
 # needed for CMSG_ macros in sys/socket.h
   QEMU_CFLAGS="-D_XOPEN_SOURCE=600 $QEMU_CFLAGS"
 # needed for TIOCWIN* defines in termios.h
@@ -792,23 +671,16 @@ SunOS)
 ;;
 Haiku)
   haiku="yes"
-  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE $QEMU_CFLAGS"
+  pie="no"
+  QEMU_CFLAGS="-DB_USE_POSITIVE_POSIX_ERRORS -D_BSD_SOURCE -fPIC $QEMU_CFLAGS"
 ;;
 Linux)
-  audio_drv_list="try-pa oss"
-  audio_possible_drivers="oss alsa sdl pa"
   linux="yes"
   linux_user="yes"
   vhost_user=${default_feature:-yes}
 ;;
 esac
 
-if [ "$bsd" = "yes" ] ; then
-  if [ "$darwin" != "yes" ] ; then
-    bsd_user="yes"
-  fi
-fi
-
 : ${make=${MAKE-make}}
 
 # We prefer python 3.x. A bare 'python' is traditionally
@@ -837,8 +709,6 @@ do
     fi
 done
 
-: ${smbd=${SMBD-/usr/sbin/smbd}}
-
 # Default objcc to clang if available, otherwise use CC
 if has clang; then
   objcc=clang
@@ -848,7 +718,6 @@ fi
 
 if test "$mingw32" = "yes" ; then
   EXESUF=".exe"
-  HOST_DSOSUF=".dll"
   # MinGW needs -mthreads for TLS and macro _MT.
   CONFIGURE_CFLAGS="-mthreads $CONFIGURE_CFLAGS"
   write_c_skeleton;
@@ -859,6 +728,18 @@ fi
 
 werror=""
 
+. $source_path/scripts/meson-buildoptions.sh
+
+meson_options=
+meson_option_parse() {
+  meson_options="$meson_options $(_meson_option_parse "$@")"
+  if test $? -eq 1; then
+    echo "ERROR: unknown option $1"
+    echo "Try '$0 --help' for more information"
+    exit 1
+  fi
+}
+
 for opt do
   optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)')
   case "$opt" in
@@ -932,17 +813,24 @@ for opt do
                        error_exit "Can't mix --target-list-exclude with --target-list"
                    fi
   ;;
-  --enable-trace-backends=*) trace_backends="$optarg"
-  ;;
-  # XXX: backwards compatibility
-  --enable-trace-backend=*) trace_backends="$optarg"
-  ;;
   --with-trace-file=*) trace_file="$optarg"
   ;;
   --with-default-devices) default_devices="true"
   ;;
   --without-default-devices) default_devices="false"
   ;;
+  --with-devices-*[!a-zA-Z0-9_-]*=*) error_exit "Passed bad --with-devices-FOO option"
+  ;;
+  --with-devices-*) device_arch=${opt#--with-devices-};
+                    device_arch=${device_arch%%=*}
+                    cf=$source_path/configs/devices/$device_arch-softmmu/$optarg.mak
+                    if test -f "$cf"; then
+                        device_archs="$device_archs $device_arch"
+                        eval "devices_${device_arch}=\$optarg"
+                    else
+                        error_exit "File $cf does not exist"
+                    fi
+  ;;
   --without-default-features) # processed above
   ;;
   --enable-gprof) gprof="yes"
@@ -987,50 +875,20 @@ for opt do
     # configure to be used by RPM and similar macros that set
     # lots of directory switches by default.
   ;;
-  --disable-sdl) sdl="disabled"
-  ;;
-  --enable-sdl) sdl="enabled"
-  ;;
-  --disable-sdl-image) sdl_image="disabled"
-  ;;
-  --enable-sdl-image) sdl_image="enabled"
-  ;;
   --disable-qom-cast-debug) qom_cast_debug="no"
   ;;
   --enable-qom-cast-debug) qom_cast_debug="yes"
   ;;
-  --disable-virtfs) virtfs="disabled"
-  ;;
-  --enable-virtfs) virtfs="enabled"
-  ;;
-  --disable-libudev) libudev="disabled"
-  ;;
-  --enable-libudev) libudev="enabled"
-  ;;
-  --disable-virtiofsd) virtiofsd="disabled"
-  ;;
-  --enable-virtiofsd) virtiofsd="enabled"
-  ;;
-  --disable-mpath) mpath="disabled"
-  ;;
-  --enable-mpath) mpath="enabled"
-  ;;
-  --disable-vnc) vnc="disabled"
-  ;;
-  --enable-vnc) vnc="enabled"
-  ;;
-  --disable-gettext) gettext="disabled"
-  ;;
-  --enable-gettext) gettext="enabled"
-  ;;
-  --oss-lib=*) oss_lib="$optarg"
-  ;;
   --audio-drv-list=*) audio_drv_list="$optarg"
   ;;
   --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
   ;;
   --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
   ;;
+  --enable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="yes"
+  ;;
+  --disable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="no"
+  ;;
   --enable-debug-tcg) debug_tcg="yes"
   ;;
   --disable-debug-tcg) debug_tcg="no"
@@ -1061,102 +919,27 @@ for opt do
   ;;
   --disable-tsan) tsan="no"
   ;;
-  --enable-sparse) sparse="enabled"
-  ;;
-  --disable-sparse) sparse="disabled"
-  ;;
   --disable-strip) strip_opt="no"
   ;;
-  --disable-vnc-sasl) vnc_sasl="disabled"
-  ;;
-  --enable-vnc-sasl) vnc_sasl="enabled"
-  ;;
-  --disable-vnc-jpeg) vnc_jpeg="disabled"
-  ;;
-  --enable-vnc-jpeg) vnc_jpeg="enabled"
-  ;;
-  --disable-vnc-png) vnc_png="disabled"
-  ;;
-  --enable-vnc-png) vnc_png="enabled"
-  ;;
   --disable-slirp) slirp="disabled"
   ;;
   --enable-slirp) slirp="enabled"
   ;;
   --enable-slirp=git) slirp="internal"
   ;;
-  --enable-slirp=system) slirp="system"
-  ;;
-  --disable-vde) vde="no"
-  ;;
-  --enable-vde) vde="yes"
-  ;;
-  --disable-netmap) netmap="no"
-  ;;
-  --enable-netmap) netmap="yes"
+  --enable-slirp=*) slirp="$optarg"
   ;;
   --disable-xen) xen="disabled"
   ;;
   --enable-xen) xen="enabled"
   ;;
-  --disable-xen-pci-passthrough) xen_pci_passthrough="disabled"
-  ;;
-  --enable-xen-pci-passthrough) xen_pci_passthrough="enabled"
-  ;;
-  --disable-brlapi) brlapi="disabled"
-  ;;
-  --enable-brlapi) brlapi="enabled"
-  ;;
-  --disable-kvm) kvm="disabled"
-  ;;
-  --enable-kvm) kvm="enabled"
-  ;;
-  --disable-hax) hax="disabled"
-  ;;
-  --enable-hax) hax="enabled"
-  ;;
-  --disable-hvf) hvf="disabled"
-  ;;
-  --enable-hvf) hvf="enabled"
-  ;;
-  --disable-whpx) whpx="disabled"
-  ;;
-  --enable-whpx) whpx="enabled"
-  ;;
-  --disable-tcg-interpreter) tcg_interpreter="false"
-  ;;
-  --enable-tcg-interpreter) tcg_interpreter="true"
-  ;;
-  --disable-cap-ng)  cap_ng="disabled"
-  ;;
-  --enable-cap-ng) cap_ng="enabled"
-  ;;
   --disable-tcg) tcg="disabled"
+                 plugins="no"
   ;;
   --enable-tcg) tcg="enabled"
   ;;
-  --disable-malloc-trim) malloc_trim="disabled"
-  ;;
-  --enable-malloc-trim) malloc_trim="enabled"
-  ;;
-  --disable-spice) spice="no"
-  ;;
-  --enable-spice) spice="yes"
-  ;;
-  --disable-libiscsi) libiscsi="disabled"
-  ;;
-  --enable-libiscsi) libiscsi="enabled"
-  ;;
-  --disable-libnfs) libnfs="disabled"
-  ;;
-  --enable-libnfs) libnfs="enabled"
-  ;;
   --enable-profiler) profiler="yes"
   ;;
-  --disable-cocoa) cocoa="disabled"
-  ;;
-  --enable-cocoa) cocoa="enabled"
-  ;;
   --disable-system) softmmu="no"
   ;;
   --enable-system) softmmu="yes"
@@ -1200,48 +983,18 @@ for opt do
   ;;
   --disable-cfi) cfi="false"
   ;;
-  --enable-cfi-debug) cfi_debug="true"
-  ;;
-  --disable-cfi-debug) cfi_debug="false"
-  ;;
-  --disable-curses) curses="disabled"
-  ;;
-  --enable-curses) curses="enabled"
-  ;;
-  --disable-iconv) iconv="disabled"
-  ;;
-  --enable-iconv) iconv="enabled"
-  ;;
-  --disable-curl) curl="disabled"
-  ;;
-  --enable-curl) curl="enabled"
-  ;;
   --disable-fdt) fdt="disabled"
   ;;
   --enable-fdt) fdt="enabled"
   ;;
   --enable-fdt=git) fdt="internal"
   ;;
-  --enable-fdt=system) fdt="system"
-  ;;
-  --disable-linux-aio) linux_aio="no"
-  ;;
-  --enable-linux-aio) linux_aio="yes"
-  ;;
-  --disable-linux-io-uring) linux_io_uring="no"
-  ;;
-  --enable-linux-io-uring) linux_io_uring="yes"
-  ;;
-  --disable-attr) attr="disabled"
-  ;;
-  --enable-attr) attr="enabled"
+  --enable-fdt=*) fdt="$optarg"
   ;;
   --disable-membarrier) membarrier="no"
   ;;
   --enable-membarrier) membarrier="yes"
   ;;
-  --disable-blobs) blobs="false"
-  ;;
   --with-pkgversion=*) pkgversion="$optarg"
   ;;
   --with-coroutine=*) coroutine="$optarg"
@@ -1256,10 +1009,6 @@ for opt do
   ;;
   --disable-crypto-afalg) crypto_afalg="no"
   ;;
-  --disable-docs) docs="disabled"
-  ;;
-  --enable-docs) docs="enabled"
-  ;;
   --disable-vhost-net) vhost_net="no"
   ;;
   --enable-vhost-net) vhost_net="yes"
@@ -1276,10 +1025,6 @@ for opt do
   ;;
   --enable-vhost-vsock) vhost_vsock="yes"
   ;;
-  --disable-vhost-user-blk-server) vhost_user_blk_server="disabled"
-  ;;
-  --enable-vhost-user-blk-server) vhost_user_blk_server="enabled"
-  ;;
   --disable-vhost-user-fs) vhost_user_fs="no"
   ;;
   --enable-vhost-user-fs) vhost_user_fs="yes"
@@ -1288,60 +1033,16 @@ for opt do
   ;;
   --enable-opengl) opengl="yes"
   ;;
-  --disable-rbd) rbd="disabled"
-  ;;
-  --enable-rbd) rbd="enabled"
-  ;;
   --disable-xfsctl) xfs="no"
   ;;
   --enable-xfsctl) xfs="yes"
   ;;
-  --disable-smartcard) smartcard="no"
-  ;;
-  --enable-smartcard) smartcard="yes"
-  ;;
-  --disable-u2f) u2f="disabled"
-  ;;
-  --enable-u2f) u2f="enabled"
-  ;;
-  --disable-libusb) libusb="no"
-  ;;
-  --enable-libusb) libusb="yes"
-  ;;
-  --disable-usb-redir) usb_redir="no"
-  ;;
-  --enable-usb-redir) usb_redir="yes"
-  ;;
   --disable-zlib-test)
   ;;
-  --disable-lzo) lzo="disabled"
-  ;;
-  --enable-lzo) lzo="enabled"
-  ;;
-  --disable-snappy) snappy="disabled"
-  ;;
-  --enable-snappy) snappy="enabled"
-  ;;
-  --disable-bzip2) bzip2="disabled"
-  ;;
-  --enable-bzip2) bzip2="enabled"
-  ;;
-  --enable-lzfse) lzfse="enabled"
-  ;;
-  --disable-lzfse) lzfse="disabled"
-  ;;
-  --disable-zstd) zstd="disabled"
-  ;;
-  --enable-zstd) zstd="enabled"
-  ;;
   --enable-guest-agent) guest_agent="yes"
   ;;
   --disable-guest-agent) guest_agent="no"
   ;;
-  --enable-guest-agent-msi) guest_agent_msi="enabled"
-  ;;
-  --disable-guest-agent-msi) guest_agent_msi="disabled"
-  ;;
   --with-vss-sdk) vss_win32_sdk=""
   ;;
   --with-vss-sdk=*) vss_win32_sdk="$optarg"
@@ -1358,12 +1059,6 @@ for opt do
   ;;
   --disable-tools) want_tools="no"
   ;;
-  --enable-seccomp) seccomp="enabled"
-  ;;
-  --disable-seccomp) seccomp="disabled"
-  ;;
-  --disable-glusterfs) glusterfs="disabled"
-  ;;
   --disable-avx2) avx2_opt="no"
   ;;
   --enable-avx2) avx2_opt="yes"
@@ -1372,9 +1067,6 @@ for opt do
   ;;
   --enable-avx512f) avx512f_opt="yes"
   ;;
-
-  --enable-glusterfs) glusterfs="enabled"
-  ;;
   --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
       echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
   ;;
@@ -1384,28 +1076,8 @@ for opt do
   --enable-uuid|--disable-uuid)
       echo "$0: $opt is obsolete, UUID support is always built" >&2
   ;;
-  --disable-gtk) gtk="disabled"
-  ;;
-  --enable-gtk) gtk="enabled"
-  ;;
   --tls-priority=*) tls_priority="$optarg"
   ;;
-  --disable-gnutls) gnutls="no"
-  ;;
-  --enable-gnutls) gnutls="yes"
-  ;;
-  --disable-nettle) nettle="no"
-  ;;
-  --enable-nettle) nettle="yes"
-  ;;
-  --disable-gcrypt) gcrypt="no"
-  ;;
-  --enable-gcrypt) gcrypt="yes"
-  ;;
-  --disable-auth-pam) auth_pam="no"
-  ;;
-  --enable-auth-pam) auth_pam="yes"
-  ;;
   --enable-rdma) rdma="yes"
   ;;
   --disable-rdma) rdma="no"
@@ -1414,14 +1086,6 @@ for opt do
   ;;
   --disable-pvrdma) pvrdma="no"
   ;;
-  --disable-vte) vte="no"
-  ;;
-  --enable-vte) vte="yes"
-  ;;
-  --disable-virglrenderer) virglrenderer="no"
-  ;;
-  --enable-virglrenderer) virglrenderer="yes"
-  ;;
   --disable-tpm) tpm="no"
   ;;
   --enable-tpm) tpm="yes"
@@ -1438,18 +1102,6 @@ for opt do
   ;;
   --enable-numa) numa="yes"
   ;;
-  --disable-libxml2) libxml2="no"
-  ;;
-  --enable-libxml2) libxml2="yes"
-  ;;
-  --disable-tcmalloc) tcmalloc="no"
-  ;;
-  --enable-tcmalloc) tcmalloc="yes"
-  ;;
-  --disable-jemalloc) jemalloc="no"
-  ;;
-  --enable-jemalloc) jemalloc="yes"
-  ;;
   --disable-replication) replication="no"
   ;;
   --enable-replication) replication="yes"
@@ -1486,10 +1138,6 @@ for opt do
   ;;
   --enable-parallels) parallels="yes"
   ;;
-  --disable-sheepdog) sheepdog="no"
-  ;;
-  --enable-sheepdog) sheepdog="yes"
-  ;;
   --disable-vhost-user) vhost_user="no"
   ;;
   --enable-vhost-user) vhost_user="yes"
@@ -1508,18 +1156,10 @@ for opt do
   ;;
   --enable-capstone=git) capstone="internal"
   ;;
-  --enable-capstone=system) capstone="system"
+  --enable-capstone=*) capstone="$optarg"
   ;;
   --with-git=*) git="$optarg"
   ;;
-  --enable-git-update)
-      git_submodules_action="update"
-      echo "--enable-git-update deprecated, use --with-git-submodules=update"
-  ;;
-  --disable-git-update)
-      git_submodules_action="validate"
-      echo "--disable-git-update deprecated, use --with-git-submodules=validate"
-  ;;
   --with-git-submodules=*)
       git_submodules_action="$optarg"
   ;;
@@ -1527,15 +1167,11 @@ for opt do
   ;;
   --disable-debug-mutex) debug_mutex=no
   ;;
-  --enable-libpmem) libpmem=yes
-  ;;
-  --disable-libpmem) libpmem=no
-  ;;
-  --enable-xkbcommon) xkbcommon="enabled"
-  ;;
-  --disable-xkbcommon) xkbcommon="disabled"
-  ;;
-  --enable-plugins) plugins="yes"
+  --enable-plugins) if test "$mingw32" = "yes"; then
+                        error_exit "TCG plugins not currently supported on Windows platforms"
+                    else
+                        plugins="yes"
+                    fi
   ;;
   --disable-plugins) plugins="no"
   ;;
@@ -1543,10 +1179,6 @@ for opt do
   ;;
   --disable-containers) use_containers="no"
   ;;
-  --enable-fuzzing) fuzzing=yes
-  ;;
-  --disable-fuzzing) fuzzing=no
-  ;;
   --gdb=*) gdb_bin="$optarg"
   ;;
   --enable-rng-none) rng_none=yes
@@ -1557,25 +1189,25 @@ for opt do
   ;;
   --disable-keyring) secret_keyring="no"
   ;;
-  --enable-libdaxctl) libdaxctl=yes
-  ;;
-  --disable-libdaxctl) libdaxctl=no
+  --enable-gio) gio=yes
   ;;
-  --enable-fuse) fuse="enabled"
+  --disable-gio) gio=no
   ;;
-  --disable-fuse) fuse="disabled"
+  --enable-slirp-smbd) slirp_smbd=yes
   ;;
-  --enable-fuse-lseek) fuse_lseek="enabled"
+  --disable-slirp-smbd) slirp_smbd=no
   ;;
-  --disable-fuse-lseek) fuse_lseek="disabled"
+  # backwards compatibility options
+  --enable-trace-backend=*) meson_option_parse "--enable-trace-backends=$optarg" "$optarg"
   ;;
-  --enable-multiprocess) multiprocess="enabled"
+  --disable-blobs) meson_option_parse --disable-install-blobs ""
   ;;
-  --disable-multiprocess) multiprocess="disabled"
+  --enable-tcmalloc) meson_option_parse --enable-malloc=tcmalloc tcmalloc
   ;;
-  --enable-gio) gio=yes
+  --enable-jemalloc) meson_option_parse --enable-malloc=jemalloc jemalloc
   ;;
-  --disable-gio) gio=no
+  # everything else has the same name in configure and meson
+  --enable-* | --disable-*) meson_option_parse "$opt" "$optarg"
   ;;
   *)
       echo "ERROR: unknown option $opt"
@@ -1585,6 +1217,11 @@ for opt do
   esac
 done
 
+# test for any invalid configuration combinations
+if test "$plugins" = "yes" -a "$tcg" = "disabled"; then
+    error_exit "Can't enable plugins on non-TCG builds"
+fi
+
 case $git_submodules_action in
     update|validate)
         if test ! -e "$source_path/.git"; then
@@ -1593,6 +1230,28 @@ case $git_submodules_action in
         fi
     ;;
     ignore)
+        if ! test -f "$source_path/ui/keycodemapdb/README"
+        then
+            echo
+            echo "ERROR: missing GIT submodules"
+            echo
+            if test -e "$source_path/.git"; then
+                echo "--with-git-submodules=ignore specified but submodules were not"
+                echo "checked out.  Please initialize and update submodules."
+            else
+                echo "This is not a GIT checkout but module content appears to"
+                echo "be missing. Do not use 'git archive' or GitHub download links"
+                echo "to acquire QEMU source archives. Non-GIT builds are only"
+                echo "supported with source archives linked from:"
+                echo
+                echo "  https://www.qemu.org/download/#source"
+                echo
+                echo "Developers working with GIT can use scripts/archive-source.sh"
+                echo "if they need to create valid source archives."
+            fi
+            echo
+            exit 1
+        fi
     ;;
     *)
         echo "ERROR: invalid --with-git-submodules= value '$git_submodules_action'"
@@ -1618,51 +1277,27 @@ firmwarepath="${firmwarepath:-$datadir/qemu-firmware}"
 localedir="${localedir:-$datadir/locale}"
 
 case "$cpu" in
-    ppc)
-           CPU_CFLAGS="-m32"
-           QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS"
-           ;;
-    ppc64)
-           CPU_CFLAGS="-m64"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    sparc)
-           CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc"
-           QEMU_LDFLAGS="-m32 -mv8plus $QEMU_LDFLAGS"
-           ;;
-    sparc64)
-           CPU_CFLAGS="-m64 -mcpu=ultrasparc"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    s390)
-           CPU_CFLAGS="-m31"
-           QEMU_LDFLAGS="-m31 $QEMU_LDFLAGS"
-           ;;
-    s390x)
-           CPU_CFLAGS="-m64"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    i386)
-           CPU_CFLAGS="-m32"
-           QEMU_LDFLAGS="-m32 $QEMU_LDFLAGS"
-           ;;
-    x86_64)
-           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
-           # If we truly care, we should simply detect this case at
-           # runtime and generate the fallback to serial emulation.
-           CPU_CFLAGS="-m64 -mcx16"
-           QEMU_LDFLAGS="-m64 $QEMU_LDFLAGS"
-           ;;
-    x32)
-           CPU_CFLAGS="-mx32"
-           QEMU_LDFLAGS="-mx32 $QEMU_LDFLAGS"
-           ;;
+    ppc) CPU_CFLAGS="-m32" ;;
+    ppc64) CPU_CFLAGS="-m64" ;;
+    sparc) CPU_CFLAGS="-m32 -mv8plus -mcpu=ultrasparc" ;;
+    sparc64) CPU_CFLAGS="-m64 -mcpu=ultrasparc" ;;
+    s390) CPU_CFLAGS="-m31" ;;
+    s390x) CPU_CFLAGS="-m64" ;;
+    i386) CPU_CFLAGS="-m32" ;;
+    x32) CPU_CFLAGS="-mx32" ;;
+
+    # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+    # If we truly care, we should simply detect this case at
+    # runtime and generate the fallback to serial emulation.
+    x86_64) CPU_CFLAGS="-m64 -mcx16" ;;
+
     # No special flags required for other host CPUs
 esac
 
-eval "cross_cc_${cpu}=\$host_cc"
-cross_cc_vars="$cross_cc_vars cross_cc_${cpu}"
-QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS"
+if eval test -z "\${cross_cc_$cpu}"; then
+    eval "cross_cc_${cpu}=\$cc"
+    cross_cc_vars="$cross_cc_vars cross_cc_${cpu}"
+fi
 
 # For user-mode emulation the host arch has to be one we explicitly
 # support, even if we're using TCI.
@@ -1672,18 +1307,18 @@ if [ "$ARCH" = "unknown" ]; then
 fi
 
 default_target_list=""
-deprecated_targets_list=ppc64abi32-linux-user,lm32-softmmu,unicore32-softmmu
+deprecated_targets_list=ppc64abi32-linux-user
 deprecated_features=""
 mak_wilds=""
 
 if [ "$softmmu" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-softmmu.mak"
+    mak_wilds="${mak_wilds} $source_path/configs/targets/*-softmmu.mak"
 fi
 if [ "$linux_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-linux-user.mak"
+    mak_wilds="${mak_wilds} $source_path/configs/targets/*-linux-user.mak"
 fi
 if [ "$bsd_user" = "yes" ]; then
-    mak_wilds="${mak_wilds} $source_path/default-configs/targets/*-bsd-user.mak"
+    mak_wilds="${mak_wilds} $source_path/configs/targets/*-bsd-user.mak"
 fi
 
 # If the user doesn't explicitly specify a deprecated target we will
@@ -1703,9 +1338,6 @@ for config in $mak_wilds; do
     fi
 done
 
-# Enumerate public trace backends for --help output
-trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' "$source_path"/scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/'))
-
 if test x"$show_help" = x"yes" ; then
 cat << EOF
 
@@ -1732,8 +1364,8 @@ Advanced options (experts only):
                            build time
   --cxx=CXX                use C++ compiler CXX [$cxx]
   --objcc=OBJCC            use Objective-C compiler OBJCC [$objcc]
-  --extra-cflags=CFLAGS    append extra C compiler flags QEMU_CFLAGS
-  --extra-cxxflags=CXXFLAGS append extra C++ compiler flags QEMU_CXXFLAGS
+  --extra-cflags=CFLAGS    append extra C compiler flags CFLAGS
+  --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS
   --extra-ldflags=LDFLAGS  append extra linker flags LDFLAGS
   --cross-cc-ARCH=CC       use compiler when building ARCH guest test cases
   --cross-cc-flags-ARCH=   use compiler flags when building ARCH guest tests
@@ -1764,35 +1396,30 @@ Advanced options (experts only):
   --without-default-features default all --enable-* options to "disabled"
   --without-default-devices  do not include any device that is not needed to
                            start the emulator (only use if you are including
-                           desired devices in default-configs/devices/)
+                           desired devices in configs/devices/)
+  --with-devices-ARCH=NAME override default configs/devices
   --enable-debug           enable common debug build options
   --enable-sanitizers      enable default sanitizers
   --enable-tsan            enable thread sanitizer
   --disable-strip          disable stripping binaries
   --disable-werror         disable compilation abort on warning
   --disable-stack-protector disable compiler-provided stack protection
-  --audio-drv-list=LIST    set audio drivers list:
-                           Available drivers: $audio_possible_drivers
+  --audio-drv-list=LIST    set audio drivers to try if -audiodev is not used
   --block-drv-whitelist=L  Same as --block-drv-rw-whitelist=L
   --block-drv-rw-whitelist=L
                            set block driver read-write whitelist
-                           (affects only QEMU, not qemu-img)
+                           (by default affects only QEMU, not tools like qemu-img)
   --block-drv-ro-whitelist=L
                            set block driver read-only whitelist
-                           (affects only QEMU, not qemu-img)
-  --enable-trace-backends=B Set trace backend
-                           Available backends: $trace_backend_list
+                           (by default affects only QEMU, not tools like qemu-img)
+  --enable-block-drv-whitelist-in-tools
+                           use block whitelist also in tools instead of only QEMU
   --with-trace-file=NAME   Full PATH,NAME of file to store traces
                            Default:trace-<pid>
-  --disable-slirp          disable SLIRP userspace network connectivity
-  --enable-tcg-interpreter enable TCI (TCG with bytecode interpreter, experimental and slow)
-  --enable-malloc-trim     enable libc malloc_trim() for memory optimization
-  --oss-lib                path to OSS library
   --cpu=CPU                Build for host CPU [$cpu]
   --with-coroutine=BACKEND coroutine backend. Supported options:
                            ucontext, sigaltstack, windows
   --enable-gcov            enable test coverage analysis with gcov
-  --disable-blobs          disable installing provided firmware blobs
   --with-vss-sdk=SDK-path  enable Windows VSS support in QEMU Guest Agent
   --with-win-sdk=SDK-path  path to Windows Platform SDK (to build VSS .tlb)
   --tls-priority           default TLS protocol/cipher priority string
@@ -1804,110 +1431,41 @@ Advanced options (experts only):
                            enable plugins via shared library loading
   --disable-containers     don't use containers for cross-building
   --gdb=GDB-path           gdb to use for gdbstub tests [$gdb_bin]
-
-Optional features, enabled with --enable-FEATURE and
-disabled with --disable-FEATURE, default is enabled if available
-(unless built with --without-default-features):
-
+EOF
+  meson_options_help
+cat << EOF
   system          all system emulation targets
   user            supported user emulation targets
   linux-user      all linux usermode emulation targets
   bsd-user        all BSD usermode emulation targets
-  docs            build documentation
   guest-agent     build the QEMU Guest Agent
-  guest-agent-msi build guest agent Windows MSI installation package
   pie             Position Independent Executables
   modules         modules support (non-Windows)
   module-upgrades try to load modules from alternate paths for upgrades
   debug-tcg       TCG debugging (default is disabled)
   debug-info      debugging information
   lto             Enable Link-Time Optimization.
-  sparse          sparse checker
   safe-stack      SafeStack Stack Smash Protection. Depends on
                   clang/llvm >= 3.7 and requires coroutine backend ucontext.
-  cfi             Enable Control-Flow Integrity for indirect function calls.
-                  In case of a cfi violation, QEMU is terminated with SIGILL
-                  Depends on lto and is incompatible with modules
-                  Automatically enables Link-Time Optimization (lto)
-  cfi-debug       In case of a cfi violation, a message containing the line that
-                  triggered the error is written to stderr. After the error,
-                  QEMU is still terminated with SIGILL
-  gnutls          GNUTLS cryptography support
-  nettle          nettle cryptography support
-  gcrypt          libgcrypt cryptography support
-  auth-pam        PAM access control
-  sdl             SDL UI
-  sdl-image       SDL Image support for icons
-  gtk             gtk UI
-  vte             vte support for the gtk UI
-  curses          curses UI
-  iconv           font glyph conversion support
-  vnc             VNC UI support
-  vnc-sasl        SASL encryption for VNC server
-  vnc-jpeg        JPEG lossy compression for VNC server
-  vnc-png         PNG compression for VNC server
-  cocoa           Cocoa UI (Mac OS X only)
-  virtfs          VirtFS
-  virtiofsd       build virtiofs daemon (virtiofsd)
-  libudev         Use libudev to enumerate host devices
-  mpath           Multipath persistent reservation passthrough
-  xen             xen backend driver support
-  xen-pci-passthrough    PCI passthrough support for Xen
-  brlapi          BrlAPI (Braile)
-  curl            curl connectivity
   membarrier      membarrier system call (for Linux 4.14+ or Windows)
-  fdt             fdt device tree
-  kvm             KVM acceleration support
-  hax             HAX acceleration support
-  hvf             Hypervisor.framework acceleration support
-  whpx            Windows Hypervisor Platform acceleration support
   rdma            Enable RDMA-based migration
   pvrdma          Enable PVRDMA support
-  vde             support for vde network
-  netmap          support for netmap network
-  linux-aio       Linux AIO support
-  linux-io-uring  Linux io_uring support
-  cap-ng          libcap-ng support
-  attr            attr and xattr support
   vhost-net       vhost-net kernel acceleration support
   vhost-vsock     virtio sockets device support
   vhost-scsi      vhost-scsi kernel target support
   vhost-crypto    vhost-user-crypto backend support
   vhost-kernel    vhost kernel backend support
   vhost-user      vhost-user backend support
-  vhost-user-blk-server    vhost-user-blk server support
   vhost-vdpa      vhost-vdpa kernel backend support
-  spice           spice
-  rbd             rados block device (rbd)
-  libiscsi        iscsi support
-  libnfs          nfs support
-  smartcard       smartcard support (libcacard)
-  u2f             U2F support (u2f-emu)
-  libusb          libusb (for usb passthrough)
   live-block-migration   Block migration in the main migration stream
-  usb-redir       usb network redirection support
-  lzo             support of lzo compression library
-  snappy          support of snappy compression library
-  bzip2           support of bzip2 compression library
-                  (for reading bzip2-compressed dmg images)
-  lzfse           support of lzfse compression library
-                  (for reading lzfse-compressed dmg images)
-  zstd            support for zstd compression library
-                  (for migration compression and qcow2 cluster compression)
-  seccomp         seccomp support
   coroutine-pool  coroutine freelist (better performance)
-  glusterfs       GlusterFS backend
   tpm             TPM support
   libssh          ssh block device support
   numa            libnuma support
-  libxml2         for Parallels image format
-  tcmalloc        tcmalloc support
-  jemalloc        jemalloc support
   avx2            AVX2 optimization support
   avx512f         AVX512F optimization support
   replication     replication support
   opengl          opengl support
-  virglrenderer   virgl rendering support
   xfsctl          xfsctl support
   qom-cast-debug  cast debugging support
   tools           build qemu-io, qemu-nbd and qemu-img tools
@@ -1919,18 +1477,11 @@ disabled with --disable-FEATURE, default is enabled if available
   vvfat           vvfat image format support
   qed             qed image format support
   parallels       parallels image format support
-  sheepdog        sheepdog block driver support (deprecated)
   crypto-afalg    Linux AF_ALG crypto backend driver
-  capstone        capstone disassembler support
   debug-mutex     mutex debugging support
-  libpmem         libpmem support
-  xkbcommon       xkbcommon support
   rng-none        dummy RNG, avoid using /dev/(u)random and getrandom()
-  libdaxctl       libdaxctl support
-  fuse            FUSE block device export
-  fuse-lseek      SEEK_HOLE/SEEK_DATA support for FUSE exports
-  multiprocess    Out of process device emulation support
   gio             libgio support
+  slirp-smbd      use smbd (at path --smbd=*) in slirp networking
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1963,7 +1514,7 @@ python_version=$($python -c 'import sys; print("%d.%d.%d" % (sys.version_info[0]
 python="$python -B"
 
 if test -z "$meson"; then
-    if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.55.3; then
+    if test "$explicit_python" = no && has meson && version_ge "$(meson --version)" 0.59.3; then
         meson=meson
     elif test $git_submodules_action != 'ignore' ; then
         meson=git
@@ -2053,17 +1604,17 @@ fi
 cat > $TMPC << EOF
 #if defined(__clang_major__) && defined(__clang_minor__)
 # ifdef __apple_build_version__
-#  if __clang_major__ < 5 || (__clang_major__ == 5 && __clang_minor__ < 1)
-#   error You need at least XCode Clang v5.1 to compile QEMU
+#  if __clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 0)
+#   error You need at least XCode Clang v10.0 to compile QEMU
 #  endif
 # else
-#  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 4)
-#   error You need at least Clang v3.4 to compile QEMU
+#  if __clang_major__ < 6 || (__clang_major__ == 6 && __clang_minor__ < 0)
+#   error You need at least Clang v6.0 to compile QEMU
 #  endif
 # endif
 #elif defined(__GNUC__) && defined(__GNUC_MINOR__)
-# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
-#  error You need at least GCC v4.8 to compile QEMU
+# if __GNUC__ < 7 || (__GNUC__ == 7 && __GNUC_MINOR__ < 4)
+#  error You need at least GCC v7.4.0 to compile QEMU
 # endif
 #else
 # error You either need GCC or Clang to compiler QEMU
@@ -2071,7 +1622,7 @@ cat > $TMPC << EOF
 int main (void) { return 0; }
 EOF
 if ! compile_prog "" "" ; then
-    error_exit "You need at least GCC v4.8 or Clang v3.4 (or XCode Clang v5.1)"
+    error_exit "You need at least GCC v7.4 or Clang v6.0 (or XCode Clang v10.0)"
 fi
 
 # Accumulate -Wfoo and -Wno-bar separately.
@@ -2175,22 +1726,16 @@ if test "$modules" = "no" && test "$module_upgrades" = "yes" ; then
   error_exit "Can't enable module-upgrades as Modules are not enabled"
 fi
 
-# Static linking is not possible with modules or PIE
+# Static linking is not possible with plugins, modules or PIE
 if test "$static" = "yes" ; then
   if test "$modules" = "yes" ; then
     error_exit "static and modules are mutually incompatible"
   fi
-fi
-
-# Unconditional check for compiler __thread support
-  cat > $TMPC << EOF
-static __thread int tls_var;
-int main(void) { return tls_var; }
-EOF
-
-if ! compile_prog "-Werror" "" ; then
-    error_exit "Your compiler does not support the __thread specifier for " \
-	"Thread-Local Storage (TLS). Please upgrade to a version that does."
+  if test "$plugins" = "yes"; then
+    error_exit "static and plugins are mutually incompatible"
+  else
+    plugins="no"
+  fi
 fi
 
 cat > $TMPC << EOF
@@ -2267,19 +1812,9 @@ EOF
   fi
 fi
 
-#########################################
-# Solaris specific configure tool chain decisions
-
-if test "$solaris" = "yes" ; then
-  if has ar; then
-    :
-  else
-    if test -f /usr/ccs/bin/ar ; then
-      error_exit "No path includes ar" \
-          "Add /usr/ccs/bin to your path and rerun configure"
-    fi
-    error_exit "No path includes ar"
-  fi
+if test "$tcg" = "enabled"; then
+    git_submodules="$git_submodules tests/fp/berkeley-testfloat-3"
+    git_submodules="$git_submodules tests/fp/berkeley-softfloat-3"
 fi
 
 if test -z "${target_list+xxx}" ; then
@@ -2346,24 +1881,27 @@ feature_not_found() {
 # ---
 # big/little endian test
 cat > $TMPC << EOF
+#include <stdio.h>
 short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, };
 short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, };
-extern int foo(short *, short *);
-int main(int argc, char *argv[]) {
-    return foo(big_endian, little_endian);
+int main(int argc, char *argv[])
+{
+    return printf("%s %s\n", (char *)big_endian, (char *)little_endian);
 }
 EOF
 
-if compile_object ; then
-    if strings -a $TMPO | grep -q BiGeNdIaN ; then
+if compile_prog ; then
+    if strings -a $TMPE | grep -q BiGeNdIaN ; then
         bigendian="yes"
-    elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then
+    elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then
         bigendian="no"
     else
-        echo "big/little test failed (won't work with -flto=thin)"
+        echo big/little test failed
+        exit 1
     fi
 else
-    echo "big/little test failed (failed to compile test binary)"
+    echo big/little test failed
+    exit 1
 fi
 
 ##########################################
@@ -2376,66 +1914,20 @@ if test -z "$want_tools"; then
     fi
 fi
 
-##########################################
-# Disable features only meaningful for system-mode emulation
-if test "$softmmu" = "no"; then
-    audio_drv_list=""
-fi
+#########################################
+# vhost interdependencies and host support
 
-##########################################
-# Some versions of Mac OS X incorrectly define SIZE_MAX
-cat > $TMPC << EOF
-#include <stdint.h>
-#include <stdio.h>
-int main(int argc, char *argv[]) {
-    return printf("%zu", SIZE_MAX);
-}
-EOF
-have_broken_size_max=no
-if ! compile_object -Werror ; then
-    have_broken_size_max=yes
-fi
-
-##########################################
-# L2TPV3 probe
-
-cat > $TMPC <<EOF
-#include <sys/socket.h>
-#include <linux/ip.h>
-int main(void) { return sizeof(struct mmsghdr); }
-EOF
-if compile_prog "" "" ; then
-  l2tpv3=yes
-else
-  l2tpv3=no
-fi
-
-cat > $TMPC <<EOF
-#include <sys/mman.h>
-int main(int argc, char *argv[]) {
-    return mlockall(MCL_FUTURE);
-}
-EOF
-if compile_prog "" "" ; then
-  have_mlockall=yes
-else
-  have_mlockall=no
-fi
-
-#########################################
-# vhost interdependencies and host support
-
-# vhost backends
-if test "$vhost_user" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-user is only available on Linux"
-fi
-test "$vhost_vdpa" = "" && vhost_vdpa=$linux
-if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-vdpa is only available on Linux"
-fi
-test "$vhost_kernel" = "" && vhost_kernel=$linux
-if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
-  error_exit "vhost-kernel is only available on Linux"
+# vhost backends
+if test "$vhost_user" = "yes" && test "$linux" != "yes"; then
+  error_exit "vhost-user is only available on Linux"
+fi
+test "$vhost_vdpa" = "" && vhost_vdpa=$linux
+if test "$vhost_vdpa" = "yes" && test "$linux" != "yes"; then
+  error_exit "vhost-vdpa is only available on Linux"
+fi
+test "$vhost_kernel" = "" && vhost_kernel=$linux
+if test "$vhost_kernel" = "yes" && test "$linux" != "yes"; then
+  error_exit "vhost-kernel is only available on Linux"
 fi
 
 # vhost-kernel devices
@@ -2481,25 +1973,6 @@ if ! has "$pkg_config_exe"; then
   error_exit "pkg-config binary '$pkg_config_exe' not found"
 fi
 
-##########################################
-# NPTL probe
-
-if test "$linux_user" = "yes"; then
-  cat > $TMPC <<EOF
-#include <sched.h>
-#include <linux/futex.h>
-int main(void) {
-#if !defined(CLONE_SETTLS) || !defined(FUTEX_WAIT)
-#error bork
-#endif
-  return 0;
-}
-EOF
-  if ! compile_object ; then
-    feature_not_found "nptl" "Install glibc and linux kernel headers."
-  fi
-fi
-
 ##########################################
 # xen probe
 
@@ -2806,234 +2279,6 @@ EOF
   fi
 fi
 
-##########################################
-# GNUTLS probe
-
-if test "$gnutls" != "no"; then
-    pass="no"
-    if $pkg_config --exists "gnutls >= 3.1.18"; then
-        gnutls_cflags=$($pkg_config --cflags gnutls)
-        gnutls_libs=$($pkg_config --libs gnutls)
-        # Packaging for the static libraries is not always correct.
-        # At least ubuntu 18.04 ships only shared libraries.
-        write_c_skeleton
-        if compile_prog "" "$gnutls_libs" ; then
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "no" && test "$gnutls" = "yes"; then
-	feature_not_found "gnutls" "Install gnutls devel >= 3.1.18"
-    else
-        gnutls="$pass"
-    fi
-fi
-
-
-# If user didn't give a --disable/enable-gcrypt flag,
-# then mark as disabled if user requested nettle
-# explicitly
-if test -z "$gcrypt"
-then
-    if test "$nettle" = "yes"
-    then
-        gcrypt="no"
-    fi
-fi
-
-# If user didn't give a --disable/enable-nettle flag,
-# then mark as disabled if user requested gcrypt
-# explicitly
-if test -z "$nettle"
-then
-    if test "$gcrypt" = "yes"
-    then
-        nettle="no"
-    fi
-fi
-
-has_libgcrypt() {
-    if ! has "libgcrypt-config"
-    then
-	return 1
-    fi
-
-    if test -n "$cross_prefix"
-    then
-	host=$(libgcrypt-config --host)
-	if test "$host-" != $cross_prefix
-	then
-	    return 1
-	fi
-    fi
-
-    maj=`libgcrypt-config --version | awk -F . '{print $1}'`
-    min=`libgcrypt-config --version | awk -F . '{print $2}'`
-
-    if test $maj != 1 || test $min -lt 5
-    then
-       return 1
-    fi
-
-    return 0
-}
-
-
-if test "$nettle" != "no"; then
-    pass="no"
-    if $pkg_config --exists "nettle >= 2.7.1"; then
-        nettle_cflags=$($pkg_config --cflags nettle)
-        nettle_libs=$($pkg_config --libs nettle)
-        nettle_version=$($pkg_config --modversion nettle)
-        # Link test to make sure the given libraries work (e.g for static).
-        write_c_skeleton
-        if compile_prog "" "$nettle_libs" ; then
-            if test -z "$gcrypt"; then
-               gcrypt="no"
-            fi
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "yes"
-    then
-        cat > $TMPC << EOF
-#include <nettle/xts.h>
-int main(void) {
-  return 0;
-}
-EOF
-        if compile_prog "$nettle_cflags" "$nettle_libs" ; then
-            nettle_xts=yes
-            qemu_private_xts=no
-        fi
-    fi
-    if test "$pass" = "no" && test "$nettle" = "yes"; then
-        feature_not_found "nettle" "Install nettle devel >= 2.7.1"
-    else
-        nettle="$pass"
-    fi
-fi
-
-if test "$gcrypt" != "no"; then
-    pass="no"
-    if has_libgcrypt; then
-        gcrypt_cflags=$(libgcrypt-config --cflags)
-        gcrypt_libs=$(libgcrypt-config --libs)
-        # Debian has removed -lgpg-error from libgcrypt-config
-        # as it "spreads unnecessary dependencies" which in
-        # turn breaks static builds...
-        if test "$static" = "yes"
-        then
-            gcrypt_libs="$gcrypt_libs -lgpg-error"
-        fi
-
-        # Link test to make sure the given libraries work (e.g for static).
-        write_c_skeleton
-        if compile_prog "" "$gcrypt_libs" ; then
-            pass="yes"
-        fi
-    fi
-    if test "$pass" = "yes"; then
-        gcrypt="yes"
-        cat > $TMPC << EOF
-#include <gcrypt.h>
-int main(void) {
-  gcry_mac_hd_t handle;
-  gcry_mac_open(&handle, GCRY_MAC_HMAC_MD5,
-                GCRY_MAC_FLAG_SECURE, NULL);
-  return 0;
-}
-EOF
-        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
-            gcrypt_hmac=yes
-        fi
-        cat > $TMPC << EOF
-#include <gcrypt.h>
-int main(void) {
-  gcry_cipher_hd_t handle;
-  gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0);
-  return 0;
-}
-EOF
-        if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
-            gcrypt_xts=yes
-            qemu_private_xts=no
-        fi
-    elif test "$gcrypt" = "yes"; then
-        feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0"
-    else
-        gcrypt="no"
-    fi
-fi
-
-
-if test "$gcrypt" = "yes" && test "$nettle" = "yes"
-then
-    error_exit "Only one of gcrypt & nettle can be enabled"
-fi
-
-##########################################
-# libtasn1 - only for the TLS creds/session test suite
-
-tasn1=yes
-tasn1_cflags=""
-tasn1_libs=""
-if $pkg_config --exists "libtasn1"; then
-    tasn1_cflags=$($pkg_config --cflags libtasn1)
-    tasn1_libs=$($pkg_config --libs libtasn1)
-else
-    tasn1=no
-fi
-
-
-##########################################
-# PAM probe
-
-if test "$auth_pam" != "no"; then
-    cat > $TMPC <<EOF
-#include <security/pam_appl.h>
-#include <stdio.h>
-int main(void) {
-   const char *service_name = "qemu";
-   const char *user = "frank";
-   const struct pam_conv pam_conv = { 0 };
-   pam_handle_t *pamh = NULL;
-   pam_start(service_name, user, &pam_conv, &pamh);
-   return 0;
-}
-EOF
-    if compile_prog "" "-lpam" ; then
-        auth_pam=yes
-    else
-        if test "$auth_pam" = "yes"; then
-            feature_not_found "PAM" "Install PAM development package"
-        else
-            auth_pam=no
-        fi
-    fi
-fi
-
-##########################################
-# VTE probe
-
-if test "$vte" != "no"; then
-    vteminversion="0.32.0"
-    if $pkg_config --exists "vte-2.91"; then
-      vtepackage="vte-2.91"
-    else
-      vtepackage="vte-2.90"
-    fi
-    if $pkg_config --exists "$vtepackage >= $vteminversion"; then
-        vte_cflags=$($pkg_config --cflags $vtepackage)
-        vte_libs=$($pkg_config --libs $vtepackage)
-        vteversion=$($pkg_config --modversion $vtepackage)
-        vte="yes"
-    elif test "$vte" = "yes"; then
-        feature_not_found "vte" "Install libvte-2.90/2.91 devel"
-    else
-        vte="no"
-    fi
-fi
-
 ##########################################
 # RDMA needs OpenFabrics libraries
 if test "$rdma" != "no" ; then
@@ -3151,192 +2396,77 @@ EOF
 fi
 
 ##########################################
-# vde libraries probe
-if test "$vde" != "no" ; then
-  vde_libs="-lvdeplug"
-  cat > $TMPC << EOF
-#include <libvdeplug.h>
-int main(void)
+# plugin linker support probe
+
+if test "$plugins" != "no"; then
+
+    #########################################
+    # See if --dynamic-list is supported by the linker
+
+    ld_dynamic_list="no"
+    cat > $TMPTXT <<EOF
 {
-    struct vde_open_args a = {0, 0, 0};
-    char s[] = "";
-    vde_open(s, s, &a);
-    return 0;
-}
+  foo;
+};
 EOF
-  if compile_prog "" "$vde_libs" ; then
-    vde=yes
-  else
-    if test "$vde" = "yes" ; then
-      feature_not_found "vde" "Install vde (Virtual Distributed Ethernet) devel"
-    fi
-    vde=no
-  fi
-fi
 
-##########################################
-# netmap support probe
-# Apart from looking for netmap headers, we make sure that the host API version
-# supports the netmap backend (>=11). The upper bound (15) is meant to simulate
-# a minor/major version number. Minor new features will be marked with values up
-# to 15, and if something happens that requires a change to the backend we will
-# move above 15, submit the backend fixes and modify this two bounds.
-if test "$netmap" != "no" ; then
-  cat > $TMPC << EOF
-#include <inttypes.h>
-#include <net/if.h>
-#include <net/netmap.h>
-#include <net/netmap_user.h>
-#if (NETMAP_API < 11) || (NETMAP_API > 15)
-#error
-#endif
-int main(void) { return 0; }
-EOF
-  if compile_prog "" "" ; then
-    netmap=yes
-  else
-    if test "$netmap" = "yes" ; then
-      feature_not_found "netmap"
-    fi
-    netmap=no
-  fi
-fi
+        cat > $TMPC <<EOF
+#include <stdio.h>
+void foo(void);
+
+void foo(void)
+{
+  printf("foo\n");
+}
 
-##########################################
-# detect CoreAudio
-if test "$coreaudio" != "no" ; then
-  coreaudio_libs="-framework CoreAudio"
-  cat > $TMPC << EOF
-#include <CoreAudio/CoreAudio.h>
 int main(void)
 {
-  return (int)AudioGetCurrentHostTime();
+  foo();
+  return 0;
 }
 EOF
-  if compile_prog "" "$coreaudio_libs" ; then
-    coreaudio=yes
-  else
-    coreaudio=no
-  fi
-fi
-
-##########################################
-# Sound support libraries probe
-
-audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/,/ /g')
-for drv in $audio_drv_list; do
-    case $drv in
-    alsa | try-alsa)
-    if $pkg_config alsa --exists; then
-        alsa_libs=$($pkg_config alsa --libs)
-        alsa_cflags=$($pkg_config alsa --cflags)
-        alsa=yes
-        if test "$drv" = "try-alsa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa/alsa/')
-        fi
-    else
-        if test "$drv" = "try-alsa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-alsa//')
-        else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
-        fi
-    fi
-    ;;
 
-    pa | try-pa)
-    if $pkg_config libpulse --exists; then
-        libpulse=yes
-        pulse_libs=$($pkg_config libpulse --libs)
-        pulse_cflags=$($pkg_config libpulse --cflags)
-        if test "$drv" = "try-pa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa/pa/')
-        fi
-    else
-        if test "$drv" = "try-pa"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-pa//')
-        else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
-        fi
+    if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then
+        ld_dynamic_list="yes"
     fi
-    ;;
 
-    sdl)
-    if test "$sdl" = "no"; then
-        error_exit "sdl not found or disabled, can not use sdl audio driver"
-    fi
-    ;;
+    #########################################
+    # See if -exported_symbols_list is supported by the linker
 
-    try-sdl)
-    if test "$sdl" = "no"; then
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl//')
-    else
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-sdl/sdl/')
-    fi
-    ;;
+    ld_exported_symbols_list="no"
+    cat > $TMPTXT <<EOF
+  _foo
+EOF
 
-    coreaudio | try-coreaudio)
-    if test "$coreaudio" = "no"; then
-      if test "$drv" = "try-coreaudio"; then
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-coreaudio//')
-      else
-        error_exit "$drv check failed" \
-                "Make sure to have the $drv is available."
-      fi
-    else
-      coreaudio_libs="-framework CoreAudio"
-      if test "$drv" = "try-coreaudio"; then
-        audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-coreaudio/coreaudio/')
-      fi
+    if compile_prog "" "-Wl,-exported_symbols_list,$TMPTXT" ; then
+        ld_exported_symbols_list="yes"
     fi
-    ;;
-
-    dsound)
-      dsound_libs="-lole32 -ldxguid"
-      audio_win_int="yes"
-    ;;
-
-    oss)
-      oss_libs="$oss_lib"
-    ;;
 
-    jack | try-jack)
-    if $pkg_config jack --exists; then
-        libjack=yes
-        jack_libs=$($pkg_config jack --libs)
-        if test "$drv" = "try-jack"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack/jack/')
-        fi
-    else
-        if test "$drv" = "try-jack"; then
-            audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/try-jack//')
+    if test "$ld_dynamic_list" = "no" &&
+       test "$ld_exported_symbols_list" = "no" ; then
+        if test "$plugins" = "yes"; then
+            error_exit \
+                "Plugin support requires dynamic linking and specifying a set of symbols " \
+                "that are exported to plugins. Unfortunately your linker doesn't " \
+                "support the flag (--dynamic-list or -exported_symbols_list) used " \
+                "for this purpose."
         else
-            error_exit "$drv check failed" \
-                "Make sure to have the $drv libs and headers installed."
+            plugins="no"
         fi
+    else
+        plugins="yes"
     fi
-    ;;
-
-    *)
-    echo "$audio_possible_drivers" | grep -q "\<$drv\>" || {
-        error_exit "Unknown driver '$drv' selected" \
-            "Possible drivers are: $audio_possible_drivers"
-    }
-    ;;
-    esac
-done
+fi
 
 ##########################################
 # glib support probe
 
-glib_req_ver=2.48
+glib_req_ver=2.56
 glib_modules=gthread-2.0
 if test "$modules" = yes; then
     glib_modules="$glib_modules gmodule-export-2.0"
-fi
-if test "$plugins" = yes; then
-    glib_modules="$glib_modules gmodule-2.0"
+elif test "$plugins" = "yes"; then
+    glib_modules="$glib_modules gmodule-no-export-2.0"
 fi
 
 for i in $glib_modules; do
@@ -3361,7 +2491,7 @@ if ! test "$gio" = "no"; then
         gio_cflags=$($pkg_config --cflags gio-2.0)
         gio_libs=$($pkg_config --libs gio-2.0)
         gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
-        if [ ! -x "$gdbus_codegen" ]; then
+        if ! has "$gdbus_codegen"; then
             gdbus_codegen=
         fi
         # Check that the libraries actually work -- Ubuntu 18.04 ships
@@ -3423,18 +2553,6 @@ if ! compile_prog "$glib_cflags" "$glib_libs" ; then
 	       "build target"
 fi
 
-# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
-cat > $TMPC << EOF
-#include <glib.h>
-int main(void) { return 0; }
-EOF
-if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
-    if cc_has_warning_flag "-Wno-unknown-attributes"; then
-        glib_cflags="-Wno-unknown-attributes $glib_cflags"
-        CONFIGURE_CFLAGS="-Wno-unknown-attributes $CONFIGURE_CFLAGS"
-    fi
-fi
-
 # Silence clang warnings triggered by glib < 2.57.2
 cat > $TMPC << EOF
 #include <glib.h>
@@ -3471,137 +2589,20 @@ if test "$modules" = yes; then
 fi
 
 ##########################################
-# pthread probe
-PTHREADLIBS_LIST="-pthread -lpthread -lpthreadGC2"
-
-pthread=no
-cat > $TMPC << EOF
-#include <pthread.h>
-static void *f(void *p) { return NULL; }
-int main(void) {
-  pthread_t thread;
-  pthread_create(&thread, 0, f, 0);
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  pthread=yes
-else
-  for pthread_lib in $PTHREADLIBS_LIST; do
-    if compile_prog "" "$pthread_lib" ; then
-      pthread=yes
-      break
-    fi
-  done
-fi
-
-if test "$mingw32" != yes && test "$pthread" = no; then
-  error_exit "pthread check failed" \
-      "Make sure to have the pthread libs and headers installed."
-fi
-
-# check for pthread_setname_np with thread id
-pthread_setname_np_w_tid=no
-cat > $TMPC << EOF
-#include <pthread.h>
-
-static void *f(void *p) { return NULL; }
-int main(void)
-{
-    pthread_t thread;
-    pthread_create(&thread, 0, f, 0);
-    pthread_setname_np(thread, "QEMU");
-    return 0;
-}
-EOF
-if compile_prog "" "$pthread_lib" ; then
-  pthread_setname_np_w_tid=yes
-fi
-
-# check for pthread_setname_np without thread id
-pthread_setname_np_wo_tid=no
-cat > $TMPC << EOF
-#include <pthread.h>
-
-static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; }
-int main(void)
-{
-    pthread_t thread;
-    pthread_create(&thread, 0, f, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "$pthread_lib" ; then
-  pthread_setname_np_wo_tid=yes
-fi
-
-##########################################
-# libssh probe
-if test "$libssh" != "no" ; then
-  if $pkg_config --exists libssh; then
-    libssh_cflags=$($pkg_config libssh --cflags)
-    libssh_libs=$($pkg_config libssh --libs)
-    libssh=yes
-  else
-    if test "$libssh" = "yes" ; then
-      error_exit "libssh required for --enable-libssh"
+# libssh probe
+if test "$libssh" != "no" ; then
+  if $pkg_config --exists "libssh >= 0.8.7"; then
+    libssh_cflags=$($pkg_config libssh --cflags)
+    libssh_libs=$($pkg_config libssh --libs)
+    libssh=yes
+  else
+    if test "$libssh" = "yes" ; then
+      error_exit "libssh required for --enable-libssh"
     fi
     libssh=no
   fi
 fi
 
-##########################################
-# Check for libssh 0.8
-# This is done like this instead of using the LIBSSH_VERSION_* and
-# SSH_VERSION_* macros because some distributions in the past shipped
-# snapshots of the future 0.8 from Git, and those snapshots did not
-# have updated version numbers (still referring to 0.7.0).
-
-if test "$libssh" = "yes"; then
-  cat > $TMPC <<EOF
-#include <libssh/libssh.h>
-int main(void) { return ssh_get_server_publickey(NULL, NULL); }
-EOF
-  if compile_prog "$libssh_cflags" "$libssh_libs"; then
-    libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
-  fi
-fi
-
-##########################################
-# linux-aio probe
-
-if test "$linux_aio" != "no" ; then
-  cat > $TMPC <<EOF
-#include <libaio.h>
-#include <sys/eventfd.h>
-#include <stddef.h>
-int main(void) { io_setup(0, NULL); io_set_eventfd(NULL, 0); eventfd(0, 0); return 0; }
-EOF
-  if compile_prog "" "-laio" ; then
-    linux_aio=yes
-  else
-    if test "$linux_aio" = "yes" ; then
-      feature_not_found "linux AIO" "Install libaio devel"
-    fi
-    linux_aio=no
-  fi
-fi
-##########################################
-# linux-io-uring probe
-
-if test "$linux_io_uring" != "no" ; then
-  if $pkg_config liburing; then
-    linux_io_uring_cflags=$($pkg_config --cflags liburing)
-    linux_io_uring_libs=$($pkg_config --libs liburing)
-    linux_io_uring=yes
-  else
-    if test "$linux_io_uring" = "yes" ; then
-      feature_not_found "linux io_uring" "Install liburing devel"
-    fi
-    linux_io_uring=no
-  fi
-fi
-
 ##########################################
 # TPM emulation is only on POSIX
 
@@ -3617,40 +2618,18 @@ elif test "$tpm" = "yes"; then
   fi
 fi
 
-##########################################
-# iovec probe
-cat > $TMPC <<EOF
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <unistd.h>
-int main(void) { return sizeof(struct iovec); }
-EOF
-iovec=no
-if compile_prog "" "" ; then
-  iovec=yes
-fi
-
 ##########################################
 # fdt probe
 
 case "$fdt" in
   auto | enabled | internal)
     # Simpler to always update submodule, even if not needed.
-    if test "$git_submodules_action" != "ignore"; then
-      git_submodules="${git_submodules} dtc"
-    fi
+    git_submodules="${git_submodules} dtc"
     ;;
 esac
 
 ##########################################
-# opengl probe (for sdl2, gtk, milkymist-tmu2)
-
-gbm="no"
-if $pkg_config gbm; then
-    gbm_cflags="$($pkg_config --cflags gbm)"
-    gbm_libs="$($pkg_config --libs gbm)"
-    gbm="yes"
-fi
+# opengl probe (for sdl2, gtk)
 
 if test "$opengl" != "no" ; then
   epoxy=no
@@ -3679,566 +2658,47 @@ EOF
 fi
 
 ##########################################
-# libxml2 probe
-if test "$libxml2" != "no" ; then
-    if $pkg_config --exists libxml-2.0; then
-        libxml2="yes"
-        libxml2_cflags=$($pkg_config --cflags libxml-2.0)
-        libxml2_libs=$($pkg_config --libs libxml-2.0)
-    else
-        if test "$libxml2" = "yes"; then
-            feature_not_found "libxml2" "Install libxml2 devel"
-        fi
-        libxml2="no"
-    fi
-fi
-
-# Check for inotify functions when we are building linux-user
-# emulator.  This is done because older glibc versions don't
-# have syscall stubs for these implemented.  In that case we
-# don't provide them even if kernel supports them.
-#
-inotify=no
-cat > $TMPC << EOF
-#include <sys/inotify.h>
-
-int
-main(void)
-{
-	/* try to start inotify */
-	return inotify_init();
-}
-EOF
-if compile_prog "" "" ; then
-  inotify=yes
-fi
-
-inotify1=no
-cat > $TMPC << EOF
-#include <sys/inotify.h>
-
-int
-main(void)
-{
-    /* try to start inotify */
-    return inotify_init1(0);
-}
-EOF
-if compile_prog "" "" ; then
-  inotify1=yes
-fi
-
-# check if pipe2 is there
-pipe2=no
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <fcntl.h>
-
-int main(void)
-{
-    int pipefd[2];
-    return pipe2(pipefd, O_CLOEXEC);
-}
-EOF
-if compile_prog "" "" ; then
-  pipe2=yes
-fi
-
-# check if accept4 is there
-accept4=no
-cat > $TMPC << EOF
-#include <sys/socket.h>
-#include <stddef.h>
-
-int main(void)
-{
-    accept4(0, NULL, NULL, SOCK_CLOEXEC);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  accept4=yes
-fi
-
-# check if tee/splice is there. vmsplice was added same time.
-splice=no
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <fcntl.h>
-#include <limits.h>
-
-int main(void)
-{
-    int len, fd = 0;
-    len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK);
-    splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  splice=yes
-fi
-
-##########################################
-# libnuma probe
-
-if test "$numa" != "no" ; then
-  cat > $TMPC << EOF
-#include <numa.h>
-int main(void) { return numa_available(); }
-EOF
-
-  if compile_prog "" "-lnuma" ; then
-    numa=yes
-    numa_libs="-lnuma"
-  else
-    if test "$numa" = "yes" ; then
-      feature_not_found "numa" "install numactl devel"
-    fi
-    numa=no
-  fi
-fi
-
-malloc=system
-if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then
-    echo "ERROR: tcmalloc && jemalloc can't be used at the same time"
-    exit 1
-elif test "$tcmalloc" = "yes" ; then
-    malloc=tcmalloc
-elif test "$jemalloc" = "yes" ; then
-    malloc=jemalloc
-fi
-
-##########################################
-# signalfd probe
-signalfd="no"
-cat > $TMPC << EOF
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <signal.h>
-int main(void) { return syscall(SYS_signalfd, -1, NULL, _NSIG / 8); }
-EOF
-
-if compile_prog "" "" ; then
-  signalfd=yes
-fi
-
-# check if optreset global is declared by <getopt.h>
-optreset="no"
-cat > $TMPC << EOF
-#include <getopt.h>
-int main(void) { return optreset; }
-EOF
-
-if compile_prog "" "" ; then
-  optreset=yes
-fi
-
-# check if eventfd is supported
-eventfd=no
-cat > $TMPC << EOF
-#include <sys/eventfd.h>
-
-int main(void)
-{
-    return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-}
-EOF
-if compile_prog "" "" ; then
-  eventfd=yes
-fi
-
-# check if memfd is supported
-memfd=no
-cat > $TMPC << EOF
-#include <sys/mman.h>
-
-int main(void)
-{
-    return memfd_create("foo", MFD_ALLOW_SEALING);
-}
-EOF
-if compile_prog "" "" ; then
-  memfd=yes
-fi
-
-# check for usbfs
-have_usbfs=no
-if test "$linux_user" = "yes"; then
-  cat > $TMPC << EOF
-#include <linux/usbdevice_fs.h>
-
-#ifndef USBDEVFS_GET_CAPABILITIES
-#error "USBDEVFS_GET_CAPABILITIES undefined"
-#endif
-
-#ifndef USBDEVFS_DISCONNECT_CLAIM
-#error "USBDEVFS_DISCONNECT_CLAIM undefined"
-#endif
-
-int main(void)
-{
-    return 0;
-}
-EOF
-  if compile_prog "" ""; then
-    have_usbfs=yes
-  fi
-fi
-
-# check for fallocate
-fallocate=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    fallocate(0, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate=yes
-fi
-
-# check for fallocate hole punching
-fallocate_punch_hole=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#include <linux/falloc.h>
-
-int main(void)
-{
-    fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate_punch_hole=yes
-fi
-
-# check that fallocate supports range zeroing inside the file
-fallocate_zero_range=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#include <linux/falloc.h>
-
-int main(void)
-{
-    fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fallocate_zero_range=yes
-fi
-
-# check for posix_fallocate
-posix_fallocate=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    posix_fallocate(0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    posix_fallocate=yes
-fi
-
-# check for sync_file_range
-sync_file_range=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-
-int main(void)
-{
-    sync_file_range(0, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  sync_file_range=yes
-fi
-
-# check for linux/fiemap.h and FS_IOC_FIEMAP
-fiemap=no
-cat > $TMPC << EOF
-#include <sys/ioctl.h>
-#include <linux/fs.h>
-#include <linux/fiemap.h>
-
-int main(void)
-{
-    ioctl(0, FS_IOC_FIEMAP, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  fiemap=yes
-fi
-
-# check for dup3
-dup3=no
-cat > $TMPC << EOF
-#include <unistd.h>
-
-int main(void)
-{
-    dup3(0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  dup3=yes
-fi
-
-# check for ppoll support
-ppoll=no
-cat > $TMPC << EOF
-#include <poll.h>
-
-int main(void)
-{
-    struct pollfd pfd = { .fd = 0, .events = 0, .revents = 0 };
-    ppoll(&pfd, 1, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  ppoll=yes
-fi
-
-# check for prctl(PR_SET_TIMERSLACK , ... ) support
-prctl_pr_set_timerslack=no
-cat > $TMPC << EOF
-#include <sys/prctl.h>
-
-int main(void)
-{
-    prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  prctl_pr_set_timerslack=yes
-fi
-
-# check for epoll support
-epoll=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
-    epoll_create(0);
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  epoll=yes
-fi
-
-# epoll_create1 is a later addition
-# so we must check separately for its presence
-epoll_create1=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
-    /* Note that we use epoll_create1 as a value, not as
-     * a function being called. This is necessary so that on
-     * old SPARC glibc versions where the function was present in
-     * the library but not declared in the header file we will
-     * fail the configure check. (Otherwise we will get a compiler
-     * warning but not an error, and will proceed to fail the
-     * qemu compile where we compile with -Werror.)
-     */
-    return (int)(uintptr_t)&epoll_create1;
-}
-EOF
-if compile_prog "" "" ; then
-  epoll_create1=yes
-fi
-
-# check for sendfile support
-sendfile=no
-cat > $TMPC << EOF
-#include <sys/sendfile.h>
-
-int main(void)
-{
-    return sendfile(0, 0, 0, 0);
-}
-EOF
-if compile_prog "" "" ; then
-  sendfile=yes
-fi
-
-# check for timerfd support (glibc 2.8 and newer)
-timerfd=no
-cat > $TMPC << EOF
-#include <sys/timerfd.h>
-
-int main(void)
-{
-    return(timerfd_create(CLOCK_REALTIME, 0));
-}
-EOF
-if compile_prog "" "" ; then
-  timerfd=yes
-fi
-
-# check for setns and unshare support
-setns=no
-cat > $TMPC << EOF
-#include <sched.h>
-
-int main(void)
-{
-    int ret;
-    ret = setns(0, 0);
-    ret = unshare(0);
-    return ret;
-}
-EOF
-if compile_prog "" "" ; then
-  setns=yes
-fi
-
-# clock_adjtime probe
-clock_adjtime=no
-cat > $TMPC <<EOF
-#include <time.h>
-#include <sys/timex.h>
-
-int main(void)
-{
-    return clock_adjtime(0, 0);
-}
-EOF
-clock_adjtime=no
-if compile_prog "" "" ; then
-  clock_adjtime=yes
-fi
-
-# syncfs probe
-syncfs=no
-cat > $TMPC <<EOF
-#include <unistd.h>
-
-int main(void)
-{
-    return syncfs(0);
-}
-EOF
-syncfs=no
-if compile_prog "" "" ; then
-  syncfs=yes
-fi
-
-# Search for bswap_32 function
-byteswap_h=no
-cat > $TMPC << EOF
-#include <byteswap.h>
-int main(void) { return bswap_32(0); }
-EOF
-if compile_prog "" "" ; then
-  byteswap_h=yes
-fi
-
-# Search for bswap32 function
-bswap_h=no
-cat > $TMPC << EOF
-#include <sys/endian.h>
-#include <sys/types.h>
-#include <machine/bswap.h>
-int main(void) { return bswap32(0); }
-EOF
-if compile_prog "" "" ; then
-  bswap_h=yes
-fi
-
-# Check whether we have openpty() in either libc or libutil
-cat > $TMPC << EOF
-extern int openpty(int *am, int *as, char *name, void *termp, void *winp);
-int main(void) { return openpty(0, 0, 0, 0, 0); }
-EOF
-
-have_openpty="no"
-if compile_prog "" "" ; then
-  have_openpty="yes"
-else
-  if compile_prog "" "-lutil" ; then
-    have_openpty="yes"
-  fi
-fi
-
-##########################################
-# spice probe
-if test "$spice" != "no" ; then
+# libnuma probe
+
+if test "$numa" != "no" ; then
   cat > $TMPC << EOF
-#include <spice.h>
-int main(void) { spice_server_new(); return 0; }
+#include <numa.h>
+int main(void) { return numa_available(); }
 EOF
-  spice_cflags=$($pkg_config --cflags spice-protocol spice-server 2>/dev/null)
-  spice_libs=$($pkg_config --libs spice-protocol spice-server 2>/dev/null)
-  if $pkg_config --atleast-version=0.12.5 spice-server && \
-     $pkg_config --atleast-version=0.12.3 spice-protocol && \
-     compile_prog "$spice_cflags" "$spice_libs" ; then
-    spice="yes"
+
+  if compile_prog "" "-lnuma" ; then
+    numa=yes
+    numa_libs="-lnuma"
   else
-    if test "$spice" = "yes" ; then
-      feature_not_found "spice" \
-          "Install spice-server(>=0.12.5) and spice-protocol(>=0.12.3) devel"
+    if test "$numa" = "yes" ; then
+      feature_not_found "numa" "install numactl devel"
     fi
-    spice="no"
+    numa=no
   fi
 fi
 
-# check for smartcard support
-if test "$smartcard" != "no"; then
-    if $pkg_config --atleast-version=2.5.1 libcacard; then
-        libcacard_cflags=$($pkg_config --cflags libcacard)
-        libcacard_libs=$($pkg_config --libs libcacard)
-        smartcard="yes"
-    else
-        if test "$smartcard" = "yes"; then
-            feature_not_found "smartcard" "Install libcacard devel"
-        fi
-        smartcard="no"
-    fi
-fi
+# check for usbfs
+have_usbfs=no
+if test "$linux_user" = "yes"; then
+  cat > $TMPC << EOF
+#include <linux/usbdevice_fs.h>
 
-# check for libusb
-if test "$libusb" != "no" ; then
-    if $pkg_config --atleast-version=1.0.13 libusb-1.0; then
-        libusb="yes"
-        libusb_cflags=$($pkg_config --cflags libusb-1.0)
-        libusb_libs=$($pkg_config --libs libusb-1.0)
-    else
-        if test "$libusb" = "yes"; then
-            feature_not_found "libusb" "Install libusb devel >= 1.0.13"
-        fi
-        libusb="no"
-    fi
-fi
+#ifndef USBDEVFS_GET_CAPABILITIES
+#error "USBDEVFS_GET_CAPABILITIES undefined"
+#endif
 
-# check for usbredirparser for usb network redirection support
-if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.6 libusbredirparser-0.5; then
-        usb_redir="yes"
-        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5)
-        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5)
-    else
-        if test "$usb_redir" = "yes"; then
-            feature_not_found "usb-redir" "Install usbredir devel"
-        fi
-        usb_redir="no"
-    fi
+#ifndef USBDEVFS_DISCONNECT_CLAIM
+#error "USBDEVFS_DISCONNECT_CLAIM undefined"
+#endif
+
+int main(void)
+{
+    return 0;
+}
+EOF
+  if compile_prog "" ""; then
+    have_usbfs=yes
+  fi
 fi
 
 ##########################################
@@ -4319,203 +2779,16 @@ EOF
   fi
 fi
 
-##########################################
-# virgl renderer probe
-
-if test "$virglrenderer" != "no" ; then
-  cat > $TMPC << EOF
-#include <virglrenderer.h>
-int main(void) { virgl_renderer_poll(); return 0; }
-EOF
-  virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null)
-  virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null)
-  virgl_version=$($pkg_config --modversion virglrenderer 2>/dev/null)
-  if $pkg_config virglrenderer >/dev/null 2>&1 && \
-     compile_prog "$virgl_cflags" "$virgl_libs" ; then
-    virglrenderer="yes"
-  else
-    if test "$virglrenderer" = "yes" ; then
-      feature_not_found "virglrenderer"
-    fi
-    virglrenderer="no"
-  fi
-fi
-
 ##########################################
 # capstone
 
 case "$capstone" in
   auto | enabled | internal)
     # Simpler to always update submodule, even if not needed.
-    if test "$git_submodules_action" != "ignore"; then
-      git_submodules="${git_submodules} capstone"
-    fi
+    git_submodules="${git_submodules} capstone"
     ;;
 esac
 
-##########################################
-# check if we have fdatasync
-
-fdatasync=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
-return fdatasync(0);
-#else
-#error Not supported
-#endif
-}
-EOF
-if compile_prog "" "" ; then
-    fdatasync=yes
-fi
-
-##########################################
-# check if we have madvise
-
-madvise=no
-cat > $TMPC << EOF
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <stddef.h>
-int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }
-EOF
-if compile_prog "" "" ; then
-    madvise=yes
-fi
-
-##########################################
-# check if we have posix_madvise
-
-posix_madvise=no
-cat > $TMPC << EOF
-#include <sys/mman.h>
-#include <stddef.h>
-int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }
-EOF
-if compile_prog "" "" ; then
-    posix_madvise=yes
-fi
-
-##########################################
-# check if we have posix_memalign()
-
-posix_memalign=no
-cat > $TMPC << EOF
-#include <stdlib.h>
-int main(void) {
-    void *p;
-    return posix_memalign(&p, 8, 8);
-}
-EOF
-if compile_prog "" "" ; then
-    posix_memalign=yes
-fi
-
-##########################################
-# check if we have posix_syslog
-
-posix_syslog=no
-cat > $TMPC << EOF
-#include <syslog.h>
-int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
-EOF
-if compile_prog "" "" ; then
-    posix_syslog=yes
-fi
-
-##########################################
-# check if we have sem_timedwait
-
-sem_timedwait=no
-cat > $TMPC << EOF
-#include <semaphore.h>
-int main(void) { sem_t s; struct timespec t = {0}; return sem_timedwait(&s, &t); }
-EOF
-if compile_prog "" "" ; then
-    sem_timedwait=yes
-fi
-
-##########################################
-# check if we have strchrnul
-
-strchrnul=no
-cat > $TMPC << EOF
-#include <string.h>
-int main(void);
-// Use a haystack that the compiler shouldn't be able to constant fold
-char *haystack = (char*)&main;
-int main(void) { return strchrnul(haystack, 'x') != &haystack[6]; }
-EOF
-if compile_prog "" "" ; then
-    strchrnul=yes
-fi
-
-#########################################
-# check if we have st_atim
-
-st_atim=no
-cat > $TMPC << EOF
-#include <sys/stat.h>
-#include <stddef.h>
-int main(void) { return offsetof(struct stat, st_atim); }
-EOF
-if compile_prog "" "" ; then
-    st_atim=yes
-fi
-
-##########################################
-# check if trace backend exists
-
-$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends  > /dev/null 2> /dev/null
-if test "$?" -ne 0 ; then
-  error_exit "invalid trace backends" \
-      "Please choose supported trace backends."
-fi
-
-##########################################
-# For 'ust' backend, test if ust headers are present
-if have_backend "ust"; then
-  cat > $TMPC << EOF
-#include <lttng/tracepoint.h>
-int main(void) { return 0; }
-EOF
-  if compile_prog "" "-Wl,--no-as-needed -ldl" ; then
-    if $pkg_config lttng-ust --exists; then
-      lttng_ust_libs=$($pkg_config --libs lttng-ust)
-    else
-      lttng_ust_libs="-llttng-ust -ldl"
-    fi
-    if $pkg_config liburcu-bp --exists; then
-      urcu_bp_libs=$($pkg_config --libs liburcu-bp)
-    else
-      urcu_bp_libs="-lurcu-bp"
-    fi
-  else
-    error_exit "Trace backend 'ust' missing lttng-ust header files"
-  fi
-fi
-
-##########################################
-# For 'dtrace' backend, test if 'dtrace' command is present
-if have_backend "dtrace"; then
-  if ! has 'dtrace' ; then
-    error_exit "dtrace command is not found in PATH $PATH"
-  fi
-  trace_backend_stap="no"
-  if has 'stap' ; then
-    trace_backend_stap="yes"
-
-    # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol
-    # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility
-    # instead. QEMU --enable-modules depends on this because the SystemTap
-    # semaphores are linked into the main binary and not the module's shared
-    # object.
-    QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2"
-  fi
-fi
-
 ##########################################
 # check and set a backend for coroutine
 
@@ -4644,65 +2917,6 @@ else # "$safe_stack" = ""
 fi
 fi
 
-##########################################
-# check if we have open_by_handle_at
-
-open_by_handle_at=no
-cat > $TMPC << EOF
-#include <fcntl.h>
-#if !defined(AT_EMPTY_PATH)
-# error missing definition
-#else
-int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
-#endif
-EOF
-if compile_prog "" "" ; then
-    open_by_handle_at=yes
-fi
-
-########################################
-# check if we have linux/magic.h
-
-linux_magic_h=no
-cat > $TMPC << EOF
-#include <linux/magic.h>
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    linux_magic_h=yes
-fi
-
-########################################
-# check if we have valgrind/valgrind.h
-
-valgrind_h=no
-cat > $TMPC << EOF
-#include <valgrind/valgrind.h>
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    valgrind_h=yes
-fi
-
-########################################
-# check if environ is declared
-
-has_environ=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-    environ = 0;
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    has_environ=yes
-fi
-
 ########################################
 # check if cpuid.h is usable.
 
@@ -4747,7 +2961,7 @@ static int bar(void *a) {
 int main(int argc, char *argv[]) { return bar(argv[0]); }
 #pragma clang attribute pop
 EOF
-  if compile_object "" ; then
+  if compile_object "-Werror" ; then
     avx2_opt="yes"
   else
     avx2_opt="no"
@@ -4777,7 +2991,7 @@ int main(int argc, char *argv[])
 	return bar(argv[0]);
 }
 EOF
-  if ! compile_object "" ; then
+  if ! compile_object "-Werror" ; then
     avx512f_opt="no"
   fi
 else
@@ -4821,118 +3035,20 @@ EOF
     atomic128=yes
   fi
 fi
-
-cmpxchg128=no
-if test "$int128" = yes && test "$atomic128" = no; then
-  cat > $TMPC << EOF
-int main(void)
-{
-  unsigned __int128 x = 0, y = 0;
-  __sync_val_compare_and_swap_16(&x, y, x);
-  return 0;
-}
-EOF
-  if compile_prog "" "" ; then
-    cmpxchg128=yes
-  fi
-fi
-
-#########################################
-# See if 64-bit atomic operations are supported.
-# Note that without __atomic builtins, we can only
-# assume atomic loads/stores max at pointer size.
-
-cat > $TMPC << EOF
-#include <stdint.h>
-int main(void)
-{
-  uint64_t x = 0, y = 0;
-#ifdef __ATOMIC_RELAXED
-  y = __atomic_load_n(&x, __ATOMIC_RELAXED);
-  __atomic_store_n(&x, y, __ATOMIC_RELAXED);
-  __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-  __atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
-  __atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
-#else
-  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
-  __sync_lock_test_and_set(&x, y);
-  __sync_val_compare_and_swap(&x, y, 0);
-  __sync_fetch_and_add(&x, y);
-#endif
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-  atomic64=yes
-fi
-
-#########################################
-# See if --dynamic-list is supported by the linker
-ld_dynamic_list="no"
-if test "$static" = "no" ; then
-    cat > $TMPTXT <<EOF
-{
-  foo;
-};
-EOF
-
-    cat > $TMPC <<EOF
-#include <stdio.h>
-void foo(void);
-
-void foo(void)
-{
-  printf("foo\n");
-}
-
-int main(void)
-{
-  foo();
-  return 0;
-}
-EOF
-
-    if compile_prog "" "-Wl,--dynamic-list=$TMPTXT" ; then
-        ld_dynamic_list="yes"
-    fi
-fi
-
-#########################################
-# See if -exported_symbols_list is supported by the linker
-
-ld_exported_symbols_list="no"
-if test "$static" = "no" ; then
-    cat > $TMPTXT <<EOF
-  _foo
-EOF
-
-    if compile_prog "" "-Wl,-exported_symbols_list,$TMPTXT" ; then
-        ld_exported_symbols_list="yes"
-    fi
-fi
-
-if  test "$plugins" = "yes" &&
-    test "$ld_dynamic_list" = "no" &&
-    test "$ld_exported_symbols_list" = "no" ; then
-  error_exit \
-      "Plugin support requires dynamic linking and specifying a set of symbols " \
-      "that are exported to plugins. Unfortunately your linker doesn't " \
-      "support the flag (--dynamic-list or -exported_symbols_list) used " \
-      "for this purpose. You can't build with --static."
-fi
-
-########################################
-# check if getauxval is available.
-
-getauxval=no
-cat > $TMPC << EOF
-#include <sys/auxv.h>
-int main(void) {
-  return getauxval(AT_HWCAP) == 0;
+
+cmpxchg128=no
+if test "$int128" = yes && test "$atomic128" = no; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  __sync_val_compare_and_swap_16(&x, y, x);
+  return 0;
 }
 EOF
-if compile_prog "" "" ; then
-    getauxval=yes
+  if compile_prog "" "" ; then
+    cmpxchg128=yes
+  fi
 fi
 
 ########################################
@@ -4977,35 +3093,6 @@ if test "$fortify_source" != "no"; then
   fi
 fi
 
-###############################################
-# Check if copy_file_range is provided by glibc
-have_copy_file_range=no
-cat > $TMPC << EOF
-#include <unistd.h>
-int main(void) {
-  copy_file_range(0, NULL, 0, NULL, 0, 0);
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_copy_file_range=yes
-fi
-
-##########################################
-# check if struct fsxattr is available via linux/fs.h
-
-have_fsxattr=no
-cat > $TMPC << EOF
-#include <linux/fs.h>
-struct fsxattr foo;
-int main(void) {
-  return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_fsxattr=yes
-fi
-
 ##########################################
 # check for usable membarrier system call
 if test "$membarrier" = "yes"; then
@@ -5037,46 +3124,6 @@ else
     membarrier=no
 fi
 
-##########################################
-# check if rtnetlink.h exists and is useful
-have_rtnetlink=no
-cat > $TMPC << EOF
-#include <linux/rtnetlink.h>
-int main(void) {
-  return IFLA_PROTO_DOWN;
-}
-EOF
-if compile_prog "" "" ; then
-    have_rtnetlink=yes
-fi
-
-##########################################
-# check for usable AF_VSOCK environment
-have_af_vsock=no
-cat > $TMPC << EOF
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#if !defined(AF_VSOCK)
-# error missing AF_VSOCK flag
-#endif
-#include <linux/vm_sockets.h>
-int main(void) {
-    int sock, ret;
-    struct sockaddr_vm svm;
-    socklen_t len = sizeof(svm);
-    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
-    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
-    if ((ret == -1) && (errno == ENOTCONN)) {
-        return 0;
-    }
-    return -1;
-}
-EOF
-if compile_prog "" "" ; then
-    have_af_vsock=yes
-fi
-
 ##########################################
 # check for usable AF_ALG environment
 have_afalg=no
@@ -5103,63 +3150,6 @@ then
 fi
 
 
-#################################################
-# check for sysmacros.h
-
-have_sysmacros=no
-cat > $TMPC << EOF
-#include <sys/sysmacros.h>
-int main(void) {
-    return makedev(0, 0);
-}
-EOF
-if compile_prog "" "" ; then
-    have_sysmacros=yes
-fi
-
-##########################################
-# check for _Static_assert()
-
-have_static_assert=no
-cat > $TMPC << EOF
-_Static_assert(1, "success");
-int main(void) {
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_static_assert=yes
-fi
-
-##########################################
-# check for utmpx.h, it is missing e.g. on OpenBSD
-
-have_utmpx=no
-cat > $TMPC << EOF
-#include <utmpx.h>
-struct utmpx user_info;
-int main(void) {
-    return 0;
-}
-EOF
-if compile_prog "" "" ; then
-    have_utmpx=yes
-fi
-
-##########################################
-# check for getrandom()
-
-have_getrandom=no
-cat > $TMPC << EOF
-#include <sys/random.h>
-int main(void) {
-    return getrandom(0, 0, GRND_NONBLOCK);
-}
-EOF
-if compile_prog "" "" ; then
-    have_getrandom=yes
-fi
-
 ##########################################
 # checks for sanitizers
 
@@ -5207,18 +3197,6 @@ EOF
   fi
 fi
 
-##########################################
-# checks for fuzzer
-if test "$fuzzing" = "yes" && test -z "${LIB_FUZZING_ENGINE+xxx}"; then
-  write_c_fuzzer_skeleton
-  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=fuzzer" ""; then
-    have_fuzzer=yes
-  else
-    error_exit "Your compiler doesn't support -fsanitize=fuzzer"
-    exit 1
-  fi
-fi
-
 # Thread sanitizer is, for now, much noisier than the other sanitizers;
 # keep it separate until that is not the case.
 if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then
@@ -5243,50 +3221,29 @@ EOF
   fi
 fi
 
-##########################################
-# check for libpmem
-
-if test "$libpmem" != "no"; then
-	if $pkg_config --exists "libpmem"; then
-		libpmem="yes"
-		libpmem_libs=$($pkg_config --libs libpmem)
-		libpmem_cflags=$($pkg_config --cflags libpmem)
-	else
-		if test "$libpmem" = "yes" ; then
-			feature_not_found "libpmem" "Install nvml or pmdk"
-		fi
-		libpmem="no"
-	fi
-fi
-
-##########################################
-# check for libdaxctl
-
-if test "$libdaxctl" != "no"; then
-	if $pkg_config --atleast-version=57 "libdaxctl"; then
-		libdaxctl="yes"
-		libdaxctl_libs=$($pkg_config --libs libdaxctl)
-		libdaxctl_cflags=$($pkg_config --cflags libdaxctl)
-	else
-		if test "$libdaxctl" = "yes" ; then
-			feature_not_found "libdaxctl" "Install libdaxctl"
-		fi
-		libdaxctl="no"
-	fi
-fi
-
 ##########################################
 # check for slirp
 
 case "$slirp" in
   auto | enabled | internal)
     # Simpler to always update submodule, even if not needed.
-    if test "$git_submodules_action" != "ignore"; then
-      git_submodules="${git_submodules} slirp"
-    fi
+    git_submodules="${git_submodules} slirp"
     ;;
 esac
 
+# Check for slirp smbd dupport
+: ${smbd=${SMBD-/usr/sbin/smbd}}
+if test "$slirp_smbd" != "no" ; then
+  if test "$mingw32" = "yes" ; then
+    if test "$slirp_smbd" = "yes" ; then
+      error_exit "Host smbd not supported on this platform."
+    fi
+    slirp_smbd=no
+  else
+    slirp_smbd=yes
+  fi
+fi
+
 ##########################################
 # check for usable __NR_keyctl syscall
 
@@ -5398,11 +3355,6 @@ if test "$mingw32" = "yes" ; then
     done
 fi
 
-# We can only support ivshmem if we have eventfd
-if [ "$eventfd" = "yes" ]; then
-  ivshmem=yes
-fi
-
 # Probe for guest agent support/options
 
 if [ "$guest_agent" != "no" ]; then
@@ -5449,15 +3401,20 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \
 fi
 
 # Only build s390-ccw bios if we're on s390x and the compiler has -march=z900
+# or -march=z10 (which is the lowest architecture level that Clang supports)
 if test "$cpu" = "s390x" ; then
   write_c_skeleton
-  if compile_prog "-march=z900" ""; then
+  compile_prog "-march=z900" ""
+  has_z900=$?
+  if [ $has_z900 = 0 ] || compile_object "-march=z10 -msoft-float -Werror"; then
+    if [ $has_z900 != 0 ]; then
+      echo "WARNING: Your compiler does not support the z900!"
+      echo "         The s390-ccw bios will only work with guest CPUs >= z10."
+    fi
     roms="$roms s390-ccw"
     # SLOF is required for building the s390-ccw firmware on s390x,
     # since it is using the libnet code from SLOF for network booting.
-    if test "$git_submodules_action" != "ignore"; then
-      git_submodules="${git_submodules} roms/SLOF"
-    fi
+    git_submodules="${git_submodules} roms/SLOF"
   fi
 fi
 
@@ -5480,7 +3437,7 @@ EOF
 
     update_cxxflags
 
-    if do_cxx $CXXFLAGS $CONFIGURE_CXXFLAGS $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $QEMU_LDFLAGS; then
+    if do_cxx $CXXFLAGS $EXTRA_CXXFLAGS $CONFIGURE_CXXFLAGS $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $QEMU_LDFLAGS; then
         # C++ compiler $cxx works ok with C compiler $cc
         :
     else
@@ -5521,9 +3478,6 @@ fi
 if test "$strip_opt" = "yes" ; then
   echo "STRIP=${strip}" >> $config_host_mak
 fi
-if test "$bigendian" = "yes" ; then
-  echo "HOST_WORDS_BIGENDIAN=y" >> $config_host_mak
-fi
 if test "$mingw32" = "yes" ; then
   echo "CONFIG_WIN32=y" >> $config_host_mak
   if test "$guest_agent_with_vss" = "yes" ; then
@@ -5568,47 +3522,18 @@ fi
 if test "$guest_agent" = "yes" ; then
   echo "CONFIG_GUEST_AGENT=y" >> $config_host_mak
 fi
-echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak
-if test "$vde" = "yes" ; then
-  echo "CONFIG_VDE=y" >> $config_host_mak
-  echo "VDE_LIBS=$vde_libs" >> $config_host_mak
-fi
-if test "$netmap" = "yes" ; then
-  echo "CONFIG_NETMAP=y" >> $config_host_mak
-fi
-if test "$l2tpv3" = "yes" ; then
-  echo "CONFIG_L2TPV3=y" >> $config_host_mak
+if test "$slirp_smbd" = "yes" ; then
+  echo "CONFIG_SLIRP_SMBD=y" >> $config_host_mak
+  echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak
 fi
 if test "$gprof" = "yes" ; then
   echo "CONFIG_GPROF=y" >> $config_host_mak
 fi
-echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak
-for drv in $audio_drv_list; do
-    def=CONFIG_AUDIO_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]')
-    echo "$def=y" >> $config_host_mak
-done
-if test "$alsa" = "yes" ; then
-    echo "CONFIG_ALSA=y" >> $config_host_mak
-fi
-echo "ALSA_LIBS=$alsa_libs" >> $config_host_mak
-echo "ALSA_CFLAGS=$alsa_cflags" >> $config_host_mak
-if test "$libpulse" = "yes" ; then
-    echo "CONFIG_LIBPULSE=y" >> $config_host_mak
-fi
-echo "PULSE_LIBS=$pulse_libs" >> $config_host_mak
-echo "PULSE_CFLAGS=$pulse_cflags" >> $config_host_mak
-echo "COREAUDIO_LIBS=$coreaudio_libs" >> $config_host_mak
-echo "DSOUND_LIBS=$dsound_libs" >> $config_host_mak
-echo "OSS_LIBS=$oss_libs" >> $config_host_mak
-if test "$libjack" = "yes" ; then
-    echo "CONFIG_LIBJACK=y" >> $config_host_mak
-fi
-echo "JACK_LIBS=$jack_libs" >> $config_host_mak
-if test "$audio_win_int" = "yes" ; then
-  echo "CONFIG_AUDIO_WIN_INT=y" >> $config_host_mak
-fi
 echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak
 echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak
+if test "$block_drv_whitelist_tools" = "yes" ; then
+  echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak
+fi
 if test "$xfs" = "yes" ; then
   echo "CONFIG_XFS=y" >> $config_host_mak
 fi
@@ -5625,169 +3550,25 @@ fi
 if test "$module_upgrades" = "yes"; then
   echo "CONFIG_MODULE_UPGRADES=y" >> $config_host_mak
 fi
-if test "$pipe2" = "yes" ; then
-  echo "CONFIG_PIPE2=y" >> $config_host_mak
-fi
-if test "$accept4" = "yes" ; then
-  echo "CONFIG_ACCEPT4=y" >> $config_host_mak
-fi
-if test "$splice" = "yes" ; then
-  echo "CONFIG_SPLICE=y" >> $config_host_mak
-fi
-if test "$eventfd" = "yes" ; then
-  echo "CONFIG_EVENTFD=y" >> $config_host_mak
-fi
-if test "$memfd" = "yes" ; then
-  echo "CONFIG_MEMFD=y" >> $config_host_mak
-fi
 if test "$have_usbfs" = "yes" ; then
   echo "CONFIG_USBFS=y" >> $config_host_mak
 fi
-if test "$fallocate" = "yes" ; then
-  echo "CONFIG_FALLOCATE=y" >> $config_host_mak
-fi
-if test "$fallocate_punch_hole" = "yes" ; then
-  echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
-fi
-if test "$fallocate_zero_range" = "yes" ; then
-  echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak
-fi
-if test "$posix_fallocate" = "yes" ; then
-  echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak
-fi
-if test "$sync_file_range" = "yes" ; then
-  echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak
-fi
-if test "$fiemap" = "yes" ; then
-  echo "CONFIG_FIEMAP=y" >> $config_host_mak
-fi
-if test "$dup3" = "yes" ; then
-  echo "CONFIG_DUP3=y" >> $config_host_mak
-fi
-if test "$ppoll" = "yes" ; then
-  echo "CONFIG_PPOLL=y" >> $config_host_mak
-fi
-if test "$prctl_pr_set_timerslack" = "yes" ; then
-  echo "CONFIG_PRCTL_PR_SET_TIMERSLACK=y" >> $config_host_mak
-fi
-if test "$epoll" = "yes" ; then
-  echo "CONFIG_EPOLL=y" >> $config_host_mak
-fi
-if test "$epoll_create1" = "yes" ; then
-  echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak
-fi
-if test "$sendfile" = "yes" ; then
-  echo "CONFIG_SENDFILE=y" >> $config_host_mak
-fi
-if test "$timerfd" = "yes" ; then
-  echo "CONFIG_TIMERFD=y" >> $config_host_mak
-fi
-if test "$setns" = "yes" ; then
-  echo "CONFIG_SETNS=y" >> $config_host_mak
-fi
-if test "$clock_adjtime" = "yes" ; then
-  echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak
-fi
-if test "$syncfs" = "yes" ; then
-  echo "CONFIG_SYNCFS=y" >> $config_host_mak
-fi
-if test "$inotify" = "yes" ; then
-  echo "CONFIG_INOTIFY=y" >> $config_host_mak
-fi
-if test "$inotify1" = "yes" ; then
-  echo "CONFIG_INOTIFY1=y" >> $config_host_mak
-fi
-if test "$sem_timedwait" = "yes" ; then
-  echo "CONFIG_SEM_TIMEDWAIT=y" >> $config_host_mak
-fi
-if test "$strchrnul" = "yes" ; then
-  echo "HAVE_STRCHRNUL=y" >> $config_host_mak
-fi
-if test "$st_atim" = "yes" ; then
-  echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak
-fi
-if test "$byteswap_h" = "yes" ; then
-  echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
-fi
-if test "$bswap_h" = "yes" ; then
-  echo "CONFIG_MACHINE_BSWAP_H=y" >> $config_host_mak
-fi
 if test "$gio" = "yes" ; then
     echo "CONFIG_GIO=y" >> $config_host_mak
     echo "GIO_CFLAGS=$gio_cflags" >> $config_host_mak
     echo "GIO_LIBS=$gio_libs" >> $config_host_mak
+fi
+if test "$gdbus_codegen" != "" ; then
     echo "GDBUS_CODEGEN=$gdbus_codegen" >> $config_host_mak
 fi
 echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
-if test "$gnutls" = "yes" ; then
-  echo "CONFIG_GNUTLS=y" >> $config_host_mak
-  echo "GNUTLS_CFLAGS=$gnutls_cflags" >> $config_host_mak
-  echo "GNUTLS_LIBS=$gnutls_libs" >> $config_host_mak
-fi
-if test "$gcrypt" = "yes" ; then
-  echo "CONFIG_GCRYPT=y" >> $config_host_mak
-  if test "$gcrypt_hmac" = "yes" ; then
-    echo "CONFIG_GCRYPT_HMAC=y" >> $config_host_mak
-  fi
-  echo "GCRYPT_CFLAGS=$gcrypt_cflags" >> $config_host_mak
-  echo "GCRYPT_LIBS=$gcrypt_libs" >> $config_host_mak
-fi
-if test "$nettle" = "yes" ; then
-  echo "CONFIG_NETTLE=y" >> $config_host_mak
-  echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
-  echo "NETTLE_CFLAGS=$nettle_cflags" >> $config_host_mak
-  echo "NETTLE_LIBS=$nettle_libs" >> $config_host_mak
-fi
-if test "$qemu_private_xts" = "yes" ; then
-  echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak
-fi
-if test "$tasn1" = "yes" ; then
-  echo "CONFIG_TASN1=y" >> $config_host_mak
-fi
-if test "$auth_pam" = "yes" ; then
-    echo "CONFIG_AUTH_PAM=y" >> $config_host_mak
-fi
-if test "$have_broken_size_max" = "yes" ; then
-    echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak
-fi
-if test "$have_openpty" = "yes" ; then
-    echo "HAVE_OPENPTY=y" >> $config_host_mak
-fi
 
-# Work around a system header bug with some kernel/XFS header
-# versions where they both try to define 'struct fsxattr':
-# xfs headers will not try to redefine structs from linux headers
-# if this macro is set.
-if test "$have_fsxattr" = "yes" ; then
-    echo "HAVE_FSXATTR=y" >> $config_host_mak
-fi
-if test "$have_copy_file_range" = "yes" ; then
-    echo "HAVE_COPY_FILE_RANGE=y" >> $config_host_mak
-fi
-if test "$vte" = "yes" ; then
-  echo "CONFIG_VTE=y" >> $config_host_mak
-  echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
-  echo "VTE_LIBS=$vte_libs" >> $config_host_mak
-fi
-if test "$virglrenderer" = "yes" ; then
-  echo "CONFIG_VIRGL=y" >> $config_host_mak
-  echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak
-  echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak
-fi
 if test "$xen" = "enabled" ; then
   echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak
   echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak
   echo "XEN_CFLAGS=$xen_cflags" >> $config_host_mak
   echo "XEN_LIBS=$xen_libs" >> $config_host_mak
 fi
-if test "$linux_aio" = "yes" ; then
-  echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
-fi
-if test "$linux_io_uring" = "yes" ; then
-  echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak
-  echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak
-  echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak
-fi
 if test "$vhost_scsi" = "yes" ; then
   echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
 fi
@@ -5821,52 +3602,11 @@ fi
 if test "$vhost_user_fs" = "yes" ; then
   echo "CONFIG_VHOST_USER_FS=y" >> $config_host_mak
 fi
-if test "$iovec" = "yes" ; then
-  echo "CONFIG_IOVEC=y" >> $config_host_mak
-fi
 if test "$membarrier" = "yes" ; then
   echo "CONFIG_MEMBARRIER=y" >> $config_host_mak
 fi
-if test "$signalfd" = "yes" ; then
-  echo "CONFIG_SIGNALFD=y" >> $config_host_mak
-fi
-if test "$optreset" = "yes" ; then
-  echo "HAVE_OPTRESET=y" >> $config_host_mak
-fi
-if test "$fdatasync" = "yes" ; then
-  echo "CONFIG_FDATASYNC=y" >> $config_host_mak
-fi
-if test "$madvise" = "yes" ; then
-  echo "CONFIG_MADVISE=y" >> $config_host_mak
-fi
-if test "$posix_madvise" = "yes" ; then
-  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
-fi
-if test "$posix_memalign" = "yes" ; then
-  echo "CONFIG_POSIX_MEMALIGN=y" >> $config_host_mak
-fi
-if test "$spice" = "yes" ; then
-  echo "CONFIG_SPICE=y" >> $config_host_mak
-  echo "SPICE_CFLAGS=$spice_cflags" >> $config_host_mak
-  echo "SPICE_LIBS=$spice_libs" >> $config_host_mak
-fi
-
-if test "$smartcard" = "yes" ; then
-  echo "CONFIG_SMARTCARD=y" >> $config_host_mak
-  echo "SMARTCARD_CFLAGS=$libcacard_cflags" >> $config_host_mak
-  echo "SMARTCARD_LIBS=$libcacard_libs" >> $config_host_mak
-fi
-
-if test "$libusb" = "yes" ; then
-  echo "CONFIG_USB_LIBUSB=y" >> $config_host_mak
-  echo "LIBUSB_CFLAGS=$libusb_cflags" >> $config_host_mak
-  echo "LIBUSB_LIBS=$libusb_libs" >> $config_host_mak
-fi
-
-if test "$usb_redir" = "yes" ; then
-  echo "CONFIG_USB_REDIR=y" >> $config_host_mak
-  echo "USB_REDIR_CFLAGS=$usb_redir_cflags" >> $config_host_mak
-  echo "USB_REDIR_LIBS=$usb_redir_libs" >> $config_host_mak
+if test "$tcg" = "enabled" -a "$tcg_interpreter" = "true" ; then
+  echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
 fi
 
 if test "$opengl" = "yes" ; then
@@ -5875,13 +3615,6 @@ if test "$opengl" = "yes" ; then
   echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak
 fi
 
-if test "$gbm" = "yes" ; then
-    echo "CONFIG_GBM=y" >> $config_host_mak
-    echo "GBM_LIBS=$gbm_libs" >> $config_host_mak
-    echo "GBM_CFLAGS=$gbm_cflags" >> $config_host_mak
-fi
-
-
 if test "$avx2_opt" = "yes" ; then
   echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
 fi
@@ -5914,18 +3647,6 @@ if test "$crypto_afalg" = "yes" ; then
   echo "CONFIG_AF_ALG=y" >> $config_host_mak
 fi
 
-if test "$open_by_handle_at" = "yes" ; then
-  echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
-fi
-
-if test "$linux_magic_h" = "yes" ; then
-  echo "CONFIG_LINUX_MAGIC_H=y" >> $config_host_mak
-fi
-
-if test "$valgrind_h" = "yes" ; then
-  echo "CONFIG_VALGRIND_H=y" >> $config_host_mak
-fi
-
 if test "$have_asan_iface_fiber" = "yes" ; then
     echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak
 fi
@@ -5934,10 +3655,6 @@ if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then
     echo "CONFIG_TSAN=y" >> $config_host_mak
 fi
 
-if test "$has_environ" = "yes" ; then
-  echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
-fi
-
 if test "$cpuid_h" = "yes" ; then
   echo "CONFIG_CPUID_H=y" >> $config_host_mak
 fi
@@ -5954,14 +3671,6 @@ if test "$cmpxchg128" = "yes" ; then
   echo "CONFIG_CMPXCHG128=y" >> $config_host_mak
 fi
 
-if test "$atomic64" = "yes" ; then
-  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
-fi
-
-if test "$getauxval" = "yes" ; then
-  echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
-fi
-
 if test "$libssh" = "yes" ; then
   echo "CONFIG_LIBSSH=y" >> $config_host_mak
   echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
@@ -5976,45 +3685,6 @@ if test "$tpm" = "yes"; then
   echo 'CONFIG_TPM=y' >> $config_host_mak
 fi
 
-echo "TRACE_BACKENDS=$trace_backends" >> $config_host_mak
-if have_backend "nop"; then
-  echo "CONFIG_TRACE_NOP=y" >> $config_host_mak
-fi
-if have_backend "simple"; then
-  echo "CONFIG_TRACE_SIMPLE=y" >> $config_host_mak
-  # Set the appropriate trace file.
-  trace_file="\"$trace_file-\" FMT_pid"
-fi
-if have_backend "log"; then
-  echo "CONFIG_TRACE_LOG=y" >> $config_host_mak
-fi
-if have_backend "ust"; then
-  echo "CONFIG_TRACE_UST=y" >> $config_host_mak
-  echo "LTTNG_UST_LIBS=$lttng_ust_libs" >> $config_host_mak
-  echo "URCU_BP_LIBS=$urcu_bp_libs" >> $config_host_mak
-fi
-if have_backend "dtrace"; then
-  echo "CONFIG_TRACE_DTRACE=y" >> $config_host_mak
-  if test "$trace_backend_stap" = "yes" ; then
-    echo "CONFIG_TRACE_SYSTEMTAP=y" >> $config_host_mak
-  fi
-fi
-if have_backend "ftrace"; then
-  if test "$linux" = "yes" ; then
-    echo "CONFIG_TRACE_FTRACE=y" >> $config_host_mak
-  else
-    feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
-  fi
-fi
-if have_backend "syslog"; then
-  if test "$posix_syslog" = "yes" ; then
-    echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
-  else
-    feature_not_found "syslog(trace backend)" "syslog not available"
-  fi
-fi
-echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
-
 if test "$rdma" = "yes" ; then
   echo "CONFIG_RDMA=y" >> $config_host_mak
   echo "RDMA_LIBS=$rdma_libs" >> $config_host_mak
@@ -6024,69 +3694,14 @@ if test "$pvrdma" = "yes" ; then
   echo "CONFIG_PVRDMA=y" >> $config_host_mak
 fi
 
-if test "$have_rtnetlink" = "yes" ; then
-  echo "CONFIG_RTNETLINK=y" >> $config_host_mak
-fi
-
-if test "$libxml2" = "yes" ; then
-  echo "CONFIG_LIBXML2=y" >> $config_host_mak
-  echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak
-  echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak
-fi
-
 if test "$replication" = "yes" ; then
   echo "CONFIG_REPLICATION=y" >> $config_host_mak
 fi
 
-if test "$have_af_vsock" = "yes" ; then
-  echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
-fi
-
-if test "$have_sysmacros" = "yes" ; then
-  echo "CONFIG_SYSMACROS=y" >> $config_host_mak
-fi
-
-if test "$have_static_assert" = "yes" ; then
-  echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak
-fi
-
-if test "$have_utmpx" = "yes" ; then
-  echo "HAVE_UTMPX=y" >> $config_host_mak
-fi
-if test "$have_getrandom" = "yes" ; then
-  echo "CONFIG_GETRANDOM=y" >> $config_host_mak
-fi
-if test "$ivshmem" = "yes" ; then
-  echo "CONFIG_IVSHMEM=y" >> $config_host_mak
-fi
 if test "$debug_mutex" = "yes" ; then
   echo "CONFIG_DEBUG_MUTEX=y" >> $config_host_mak
 fi
 
-# Hold two types of flag:
-#   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
-#                                     a thread we have a handle to
-#   CONFIG_PTHREAD_SETNAME_NP_W_TID - A way of doing it on a particular
-#                                     platform
-if test "$pthread_setname_np_w_tid" = "yes" ; then
-  echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak
-  echo "CONFIG_PTHREAD_SETNAME_NP_W_TID=y" >> $config_host_mak
-elif test "$pthread_setname_np_wo_tid" = "yes" ; then
-  echo "CONFIG_THREAD_SETNAME_BYTHREAD=y" >> $config_host_mak
-  echo "CONFIG_PTHREAD_SETNAME_NP_WO_TID=y" >> $config_host_mak
-fi
-
-if test "$libpmem" = "yes" ; then
-  echo "CONFIG_LIBPMEM=y" >> $config_host_mak
-  echo "LIBPMEM_LIBS=$libpmem_libs" >> $config_host_mak
-  echo "LIBPMEM_CFLAGS=$libpmem_cflags" >> $config_host_mak
-fi
-
-if test "$libdaxctl" = "yes" ; then
-  echo "CONFIG_LIBDAXCTL=y" >> $config_host_mak
-  echo "LIBDAXCTL_LIBS=$libdaxctl_libs" >> $config_host_mak
-fi
-
 if test "$bochs" = "yes" ; then
   echo "CONFIG_BOCHS=y" >> $config_host_mak
 fi
@@ -6111,33 +3726,6 @@ fi
 if test "$parallels" = "yes" ; then
   echo "CONFIG_PARALLELS=y" >> $config_host_mak
 fi
-if test "$sheepdog" = "yes" ; then
-  add_to deprecated_features "sheepdog"
-  echo "CONFIG_SHEEPDOG=y" >> $config_host_mak
-fi
-if test "$have_mlockall" = "yes" ; then
-  echo "HAVE_MLOCKALL=y" >> $config_host_mak
-fi
-if test "$fuzzing" = "yes" ; then
-  # If LIB_FUZZING_ENGINE is set, assume we are running on OSS-Fuzz, and the
-  # needed CFLAGS have already been provided
-  if test -z "${LIB_FUZZING_ENGINE+xxx}" ; then
-    # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the
-    # compiled code.
-    QEMU_CFLAGS="$QEMU_CFLAGS -fsanitize=fuzzer-no-link"
-    # To build non-fuzzer binaries with --enable-fuzzing, link everything with
-    # fsanitize=fuzzer-no-link. Otherwise, the linker will be unable to bind
-    # the fuzzer-related callbacks added by instrumentation.
-    QEMU_LDFLAGS="$QEMU_LDFLAGS -fsanitize=fuzzer-no-link"
-    # For the actual fuzzer binaries, we need to link against the libfuzzer
-    # library. Provide the flags for doing this in FUZZ_EXE_LDFLAGS. The meson
-    # rule for the fuzzer adds these to the link_args. They need to be
-    # configurable, to support OSS-Fuzz
-    FUZZ_EXE_LDFLAGS="-fsanitize=fuzzer"
-  else
-    FUZZ_EXE_LDFLAGS="$LIB_FUZZING_ENGINE"
-  fi
-fi
 
 if test "$plugins" = "yes" ; then
     echo "CONFIG_PLUGIN=y" >> $config_host_mak
@@ -6177,22 +3765,16 @@ echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
 echo "MESON=$meson" >> $config_host_mak
 echo "NINJA=$ninja" >> $config_host_mak
 echo "CC=$cc" >> $config_host_mak
+echo "HOST_CC=$host_cc" >> $config_host_mak
 if $iasl -h > /dev/null 2>&1; then
   echo "CONFIG_IASL=$iasl" >> $config_host_mak
 fi
-echo "CXX=$cxx" >> $config_host_mak
-echo "OBJCC=$objcc" >> $config_host_mak
 echo "AR=$ar" >> $config_host_mak
-echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
 echo "AS=$as" >> $config_host_mak
 echo "CCAS=$ccas" >> $config_host_mak
 echo "CPP=$cpp" >> $config_host_mak
 echo "OBJCOPY=$objcopy" >> $config_host_mak
 echo "LD=$ld" >> $config_host_mak
-echo "RANLIB=$ranlib" >> $config_host_mak
-echo "NM=$nm" >> $config_host_mak
-echo "PKG_CONFIG=$pkg_config_exe" >> $config_host_mak
-echo "WINDRES=$windres" >> $config_host_mak
 echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak
 echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
 echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak
@@ -6201,18 +3783,7 @@ echo "GLIB_LIBS=$glib_libs" >> $config_host_mak
 echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak
 echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
 echo "EXESUF=$EXESUF" >> $config_host_mak
-echo "HOST_DSOSUF=$HOST_DSOSUF" >> $config_host_mak
 echo "LIBS_QGA=$libs_qga" >> $config_host_mak
-echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
-echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
-if test "$gcov" = "yes" ; then
-  echo "CONFIG_GCOV=y" >> $config_host_mak
-fi
-
-if test "$fuzzing" != "no"; then
-    echo "CONFIG_FUZZ=y" >> $config_host_mak
-fi
-echo "FUZZ_EXE_LDFLAGS=$FUZZ_EXE_LDFLAGS" >> $config_host_mak
 
 if test "$rng_none" = "yes"; then
   echo "CONFIG_RNG_NONE=y" >> $config_host_mak
@@ -6289,21 +3860,20 @@ fi
 # UNLINK is used to remove symlinks from older development versions
 # that might get into the way when doing "git update" without doing
 # a "make distclean" in between.
-DIRS="tests tests/tcg tests/tcg/lm32 tests/qapi-schema tests/qtest/libqos"
+DIRS="tests tests/tcg tests/qapi-schema tests/qtest/libqos"
 DIRS="$DIRS tests/qtest tests/qemu-iotests tests/vm tests/fp tests/qgraph"
 DIRS="$DIRS docs docs/interop fsdev scsi"
 DIRS="$DIRS pc-bios/optionrom pc-bios/s390-ccw"
 DIRS="$DIRS roms/seabios"
 DIRS="$DIRS contrib/plugins/"
 LINKS="Makefile"
-LINKS="$LINKS tests/tcg/lm32/Makefile"
 LINKS="$LINKS tests/tcg/Makefile.target"
 LINKS="$LINKS pc-bios/optionrom/Makefile"
 LINKS="$LINKS pc-bios/s390-ccw/Makefile"
 LINKS="$LINKS roms/seabios/Makefile"
 LINKS="$LINKS pc-bios/qemu-icon.bmp"
 LINKS="$LINKS .gdbinit scripts" # scripts needed by relative path in .gdbinit
-LINKS="$LINKS tests/acceptance tests/data"
+LINKS="$LINKS tests/avocado tests/data"
 LINKS="$LINKS tests/qemu-iotests/check"
 LINKS="$LINKS python"
 LINKS="$LINKS contrib/plugins/Makefile "
@@ -6318,7 +3888,9 @@ for bios_file in \
     $source_path/pc-bios/openbios-* \
     $source_path/pc-bios/u-boot.* \
     $source_path/pc-bios/edk2-*.fd.bz2 \
-    $source_path/pc-bios/palcode-*
+    $source_path/pc-bios/palcode-* \
+    $source_path/pc-bios/qemu_vga.ndrv
+
 do
     LINKS="$LINKS pc-bios/$(basename $bios_file)"
 done
@@ -6356,34 +3928,46 @@ for rom in seabios; do
     echo "RANLIB=$ranlib" >> $config_mak
 done
 
+config_mak=pc-bios/optionrom/config.mak
+echo "# Automatically generated by configure - do not modify" > $config_mak
+echo "TOPSRC_DIR=$source_path" >> $config_mak
+
 if test "$skip_meson" = no; then
-cross="config-meson.cross.new"
-meson_quote() {
+  cross="config-meson.cross.new"
+  meson_quote() {
+    test $# = 0 && return
     echo "'$(echo $* | sed "s/ /','/g")'"
-}
+  }
 
-echo "# Automatically generated by configure - do not modify" > $cross
-echo "[properties]" >> $cross
-test -z "$cxx" && echo "link_language = 'c'" >> $cross
-echo "[built-in options]" >> $cross
-echo "c_args = [${CFLAGS:+$(meson_quote $CFLAGS)}]" >> $cross
-echo "cpp_args = [${CXXFLAGS:+$(meson_quote $CXXFLAGS)}]" >> $cross
-echo "c_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross
-echo "cpp_link_args = [${LDFLAGS:+$(meson_quote $LDFLAGS)}]" >> $cross
-echo "[binaries]" >> $cross
-echo "c = [$(meson_quote $cc)]" >> $cross
-test -n "$cxx" && echo "cpp = [$(meson_quote $cxx)]" >> $cross
-test -n "$objcc" && echo "objc = [$(meson_quote $objcc)]" >> $cross
-echo "ar = [$(meson_quote $ar)]" >> $cross
-echo "nm = [$(meson_quote $nm)]" >> $cross
-echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross
-echo "ranlib = [$(meson_quote $ranlib)]" >> $cross
-if has $sdl2_config; then
-  echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross
-fi
-echo "strip = [$(meson_quote $strip)]" >> $cross
-echo "windres = [$(meson_quote $windres)]" >> $cross
-if test "$cross_compile" = "yes"; then
+  echo "# Automatically generated by configure - do not modify" > $cross
+  echo "[properties]" >> $cross
+
+  # unroll any custom device configs
+  for a in $device_archs; do
+      eval "c=\$devices_${a}"
+      echo "${a}-softmmu = '$c'" >> $cross
+  done
+
+  test -z "$cxx" && echo "link_language = 'c'" >> $cross
+  echo "[built-in options]" >> $cross
+  echo "c_args = [$(meson_quote $CFLAGS $EXTRA_CFLAGS)]" >> $cross
+  echo "cpp_args = [$(meson_quote $CXXFLAGS $EXTRA_CXXFLAGS)]" >> $cross
+  echo "c_link_args = [$(meson_quote $CFLAGS $LDFLAGS $EXTRA_CFLAGS $EXTRA_LDFLAGS)]" >> $cross
+  echo "cpp_link_args = [$(meson_quote $CXXFLAGS $LDFLAGS $EXTRA_CXXFLAGS $EXTRA_LDFLAGS)]" >> $cross
+  echo "[binaries]" >> $cross
+  echo "c = [$(meson_quote $cc $CPU_CFLAGS)]" >> $cross
+  test -n "$cxx" && echo "cpp = [$(meson_quote $cxx $CPU_CFLAGS)]" >> $cross
+  test -n "$objcc" && echo "objc = [$(meson_quote $objcc $CPU_CFLAGS)]" >> $cross
+  echo "ar = [$(meson_quote $ar)]" >> $cross
+  echo "nm = [$(meson_quote $nm)]" >> $cross
+  echo "pkgconfig = [$(meson_quote $pkg_config_exe)]" >> $cross
+  echo "ranlib = [$(meson_quote $ranlib)]" >> $cross
+  if has $sdl2_config; then
+    echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross
+  fi
+  echo "strip = [$(meson_quote $strip)]" >> $cross
+  echo "windres = [$(meson_quote $windres)]" >> $cross
+  if test "$cross_compile" = "yes"; then
     cross_arg="--cross-file config-meson.cross"
     echo "[host_machine]" >> $cross
     if test "$mingw32" = "yes" ; then
@@ -6399,7 +3983,7 @@ if test "$cross_compile" = "yes"; then
         i386)
             echo "cpu_family = 'x86'" >> $cross
             ;;
-        x86_64)
+        x86_64|x32)
             echo "cpu_family = 'x86_64'" >> $cross
             ;;
         ppc64le)
@@ -6415,17 +3999,14 @@ if test "$cross_compile" = "yes"; then
     else
         echo "endian = 'little'" >> $cross
     fi
-else
+  else
     cross_arg="--native-file config-meson.cross"
-fi
-mv $cross config-meson.cross
+  fi
+  mv $cross config-meson.cross
 
-rm -rf meson-private meson-info meson-logs
-unset staticpic
-if ! version_ge "$($meson --version)" 0.56.0; then
-  staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi)
-fi
-NINJA=$ninja $meson setup \
+  rm -rf meson-private meson-info meson-logs
+  run_meson() {
+    NINJA=$ninja $meson setup \
         --prefix "$prefix" \
         --libdir "$libdir" \
         --libexecdir "$libexecdir" \
@@ -6436,48 +4017,58 @@ NINJA=$ninja $meson setup \
         --sysconfdir "$sysconfdir" \
         --localedir "$localedir" \
         --localstatedir "$local_statedir" \
+        -Daudio_drv_list=$audio_drv_list \
+        -Ddefault_devices=$default_devices \
         -Ddocdir="$docdir" \
         -Dqemu_firmwarepath="$firmwarepath" \
         -Dqemu_suffix="$qemu_suffix" \
+        -Dsphinx_build="$sphinx_build" \
+        -Dtrace_file="$trace_file" \
         -Doptimization=$(if test "$debug" = yes; then echo 0; else echo 2; fi) \
         -Ddebug=$(if test "$debug_info" = yes; then echo true; else echo false; fi) \
         -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \
         -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \
         -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \
-        ${staticpic:+-Db_staticpic=$staticpic} \
         -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \
-        -Db_lto=$lto -Dcfi=$cfi -Dcfi_debug=$cfi_debug \
-        -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \
-        -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \
-        -Dxen=$xen -Dxen_pci_passthrough=$xen_pci_passthrough -Dtcg=$tcg \
-        -Dcocoa=$cocoa -Dgtk=$gtk -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \
-        -Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \
-        -Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f -Dvirtiofsd=$virtiofsd \
-        -Dcapstone=$capstone -Dslirp=$slirp -Dfdt=$fdt -Dbrlapi=$brlapi \
-        -Dcurl=$curl -Dglusterfs=$glusterfs -Dbzip2=$bzip2 -Dlibiscsi=$libiscsi \
-        -Dlibnfs=$libnfs -Diconv=$iconv -Dcurses=$curses -Dlibudev=$libudev\
-        -Drbd=$rbd -Dlzo=$lzo -Dsnappy=$snappy -Dlzfse=$lzfse \
-        -Dzstd=$zstd -Dseccomp=$seccomp -Dvirtfs=$virtfs -Dcap_ng=$cap_ng \
-        -Dattr=$attr -Ddefault_devices=$default_devices \
-        -Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
-        -Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \
-        -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \
-        $(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \
-	-Dtcg_interpreter=$tcg_interpreter \
-        $cross_arg \
-        "$PWD" "$source_path"
-
-if test "$?" -ne 0 ; then
-    error_exit "meson setup failed"
-fi
+        -Db_lto=$lto -Dcfi=$cfi -Dtcg=$tcg -Dxen=$xen \
+        -Dcapstone=$capstone -Dfdt=$fdt -Dslirp=$slirp \
+        $(test -n "${LIB_FUZZING_ENGINE+xxx}" && echo "-Dfuzzing_engine=$LIB_FUZZING_ENGINE") \
+        $(if test "$default_feature" = no; then echo "-Dauto_features=disabled"; fi) \
+        "$@" $cross_arg "$PWD" "$source_path"
+  }
+  eval run_meson $meson_options
+  if test "$?" -ne 0 ; then
+      error_exit "meson setup failed"
+  fi
+else
+  if test -f meson-private/cmd_line.txt; then
+    # Adjust old command line options whose type was changed
+    # Avoids having to use "setup --wipe" when Meson is upgraded
+    perl -i -ne '
+      s/^gettext = true$/gettext = auto/;
+      s/^gettext = false$/gettext = disabled/;
+      /^b_staticpic/ && next;
+      print;' meson-private/cmd_line.txt
+  fi
 fi
 
 if test -n "${deprecated_features}"; then
     echo "Warning, deprecated features enabled."
-    echo "Please see docs/system/deprecated.rst"
+    echo "Please see docs/about/deprecated.rst"
     echo "  features: ${deprecated_features}"
 fi
 
+# Create list of config switches that should be poisoned in common code...
+# but filter out CONFIG_TCG and CONFIG_USER_ONLY which are special.
+target_configs_h=$(ls *-config-devices.h *-config-target.h 2>/dev/null)
+if test -n "$target_configs_h" ; then
+    sed -n -e '/CONFIG_TCG/d' -e '/CONFIG_USER_ONLY/d' \
+        -e '/^#define / { s///; s/ .*//; s/^/#pragma GCC poison /p; }' \
+        $target_configs_h | sort -u > config-poison.h
+else
+    :> config-poison.h
+fi
+
 # Save the configure command line for later reuse.
 cat <<EOD >config.status
 #!/bin/sh
@@ -6507,9 +4098,12 @@ preserve_env AR
 preserve_env AS
 preserve_env CC
 preserve_env CPP
+preserve_env CFLAGS
 preserve_env CXX
+preserve_env CXXFLAGS
 preserve_env INSTALL
 preserve_env LD
+preserve_env LDFLAGS
 preserve_env LD_LIBRARY_PATH
 preserve_env LIBTOOL
 preserve_env MAKE
diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
index d09e607431f..bd7650a7a27 100644
--- a/contrib/elf2dmp/download.c
+++ b/contrib/elf2dmp/download.c
@@ -25,21 +25,19 @@ int download_url(const char *name, const char *url)
         goto out_curl;
     }
 
-    curl_easy_setopt(curl, CURLOPT_URL, url);
-    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL);
-    curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
-    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
-    curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0);
-
-    if (curl_easy_perform(curl) != CURLE_OK) {
-        err = 1;
-        fclose(file);
+    if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK
+            || curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK
+            || curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) != CURLE_OK
+            || curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK
+            || curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK
+            || curl_easy_perform(curl) != CURLE_OK) {
         unlink(name);
-        goto out_curl;
+        fclose(file);
+        err = 1;
+    } else {
+        err = fclose(file);
     }
 
-    err = fclose(file);
-
 out_curl:
     curl_easy_cleanup(curl);
 
diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
index b3a65470680..adcfa7e154c 100644
--- a/contrib/elf2dmp/pdb.c
+++ b/contrib/elf2dmp/pdb.c
@@ -215,6 +215,10 @@ static int pdb_init_symbols(struct pdb_reader *r)
 
 static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr)
 {
+    if (hdr->block_size == 0) {
+        return 1;
+    }
+
     memset(r->file_used, 0, sizeof(r->file_used));
     r->ds.header = hdr;
     r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr +
diff --git a/contrib/gitdm/aliases b/contrib/gitdm/aliases
index c1e744312f5..4792413ce78 100644
--- a/contrib/gitdm/aliases
+++ b/contrib/gitdm/aliases
@@ -31,6 +31,12 @@ pbrook@c046a42c-6fe2-441c-8c8c-71466251a162 paul@codesourcery.com
 ths@c046a42c-6fe2-441c-8c8c-71466251a162 ths@networkno.de
 malc@c046a42c-6fe2-441c-8c8c-71466251a162 av1474@comtv.ru
 
+# canonical emails
+liq3ea@163.com liq3ea@gmail.com
+
+# some broken tags
+yuval.shaia.ml.gmail.com yuval.shaia.ml@gmail.com
+
 # There is also a:
 #    (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162>
 # for the cvs2svn initialization commit e63c3dc74bf.
diff --git a/contrib/gitdm/domain-map b/contrib/gitdm/domain-map
index 0074da618f5..2800d9f986a 100644
--- a/contrib/gitdm/domain-map
+++ b/contrib/gitdm/domain-map
@@ -9,6 +9,8 @@ baidu.com       Baidu
 bytedance.com   ByteDance
 cmss.chinamobile.com China Mobile
 citrix.com      Citrix
+crudebyte.com   Crudebyte
+eldorado.org.br Instituto de Pesquisas Eldorado
 fujitsu.com     Fujitsu
 google.com      Google
 greensocs.com   GreenSocs
@@ -17,20 +19,25 @@ ibm.com         IBM
 igalia.com      Igalia
 intel.com       Intel
 linaro.org      Linaro
+lwn.net         LWN
 microsoft.com   Microsoft
+mvista.com      MontaVista
 nokia.com       Nokia
 nuviainc.com    NUVIA
+nvidia.com      NVIDIA
 oracle.com      Oracle
 proxmox.com     Proxmox
 quicinc.com     Qualcomm Innovation Center
 redhat.com      Red Hat
 rt-rk.com       RT-RK
+samsung.com     Samsung
 siemens.com     Siemens
 sifive.com      SiFive
 suse.com        SUSE
 suse.de         SUSE
 virtuozzo.com   Virtuozzo
 wdc.com         Western Digital
+windriver.com   Wind River
 xilinx.com      Xilinx
 yadro.com       YADRO
 yandex-team.ru  Yandex
diff --git a/contrib/gitdm/group-map-academics b/contrib/gitdm/group-map-academics
index bf3c894821b..44745ca85b6 100644
--- a/contrib/gitdm/group-map-academics
+++ b/contrib/gitdm/group-map-academics
@@ -16,3 +16,6 @@ cota@braap.org
 uni-paderborn.de
 edu
 edu.cn
+
+# Boston University
+bu.edu
diff --git a/contrib/gitdm/group-map-individuals b/contrib/gitdm/group-map-individuals
index 36bbb77c39a..f816aa87702 100644
--- a/contrib/gitdm/group-map-individuals
+++ b/contrib/gitdm/group-map-individuals
@@ -29,3 +29,8 @@ mrolnik@gmail.com
 huth@tuxfamily.org
 jhogan@kernel.org
 atar4qemu@gmail.com
+minwoo.im.dev@gmail.com
+bmeng.cn@gmail.com
+liq3ea@gmail.com
+chetan4windows@gmail.com
+akihiko.odaki@gmail.com
diff --git a/contrib/gitdm/group-map-interns b/contrib/gitdm/group-map-interns
new file mode 100644
index 00000000000..fe33a3231ec
--- /dev/null
+++ b/contrib/gitdm/group-map-interns
@@ -0,0 +1,13 @@
+#
+# Group together everyone working as an intern via one of the various
+# outreach programs.
+#
+
+# GSoC 2020 Virtual FIDO/U2F security key
+cesar.belley@lse.epita.fr
+
+# GSoC 2020 TCG performance
+ahmedkhaledkaraman@gmail.com
+
+# GSoC 2021 TCG plugins
+ma.mandourr@gmail.com
diff --git a/contrib/gitdm/group-map-netflix b/contrib/gitdm/group-map-netflix
new file mode 100644
index 00000000000..468f95dcb2e
--- /dev/null
+++ b/contrib/gitdm/group-map-netflix
@@ -0,0 +1,5 @@
+#
+# Netflix contributors using their personal emails
+#
+
+imp@bsdimp.com
diff --git a/contrib/gitdm/group-map-robots b/contrib/gitdm/group-map-robots
new file mode 100644
index 00000000000..ffd956c2eb5
--- /dev/null
+++ b/contrib/gitdm/group-map-robots
@@ -0,0 +1,7 @@
+#
+# There are various automatic robots that occasionally scan and report
+# bugs. Let's group them together here.
+#
+
+# Euler Robot
+euler.robot@huawei.com
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b9d7935e5ef..54ac5ccd9ff 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -13,18 +13,20 @@ include $(BUILD_DIR)/config-host.mak
 VPATH += $(SRC_PATH)/contrib/plugins
 
 NAMES :=
+NAMES += execlog
 NAMES += hotblocks
 NAMES += hotpages
 NAMES += howvec
 NAMES += lockstep
 NAMES += hwprofile
+NAMES += cache
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
 # The main QEMU uses Glib extensively so it's perfectly fine to use it
 # in plugins (which many example do).
 CFLAGS = $(GLIB_CFLAGS)
-CFLAGS += -fPIC
+CFLAGS += -fPIC -Wall $(filter -W%, $(QEMU_CFLAGS))
 CFLAGS += $(if $(findstring no-psabi,$(QEMU_CFLAGS)),-Wpsabi)
 CFLAGS += -I$(SRC_PATH)/include/qemu
 
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
new file mode 100644
index 00000000000..b9226e7c40b
--- /dev/null
+++ b/contrib/plugins/cache.c
@@ -0,0 +1,860 @@
+/*
+ * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static GHashTable *miss_ht;
+
+static GMutex hashtable_lock;
+static GRand *rng;
+
+static int limit;
+static bool sys;
+
+enum EvictionPolicy {
+    LRU,
+    FIFO,
+    RAND,
+};
+
+enum EvictionPolicy policy;
+
+/*
+ * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
+ * put in any of the blocks inside the set. The number of block per set is
+ * called the associativity (assoc).
+ *
+ * Each block contains the the stored tag and a valid bit. Since this is not
+ * a functional simulator, the data itself is not stored. We only identify
+ * whether a block is in the cache or not by searching for its tag.
+ *
+ * In order to search for memory data in the cache, the set identifier and tag
+ * are extracted from the address and the set is probed to see whether a tag
+ * match occur.
+ *
+ * An address is logically divided into three portions: The block offset,
+ * the set number, and the tag.
+ *
+ * The set number is used to identify the set in which the block may exist.
+ * The tag is compared against all the tags of a set to search for a match. If a
+ * match is found, then the access is a hit.
+ *
+ * The CacheSet also contains bookkeaping information about eviction details.
+ */
+
+typedef struct {
+    uint64_t tag;
+    bool valid;
+} CacheBlock;
+
+typedef struct {
+    CacheBlock *blocks;
+    uint64_t *lru_priorities;
+    uint64_t lru_gen_counter;
+    GQueue *fifo_queue;
+} CacheSet;
+
+typedef struct {
+    CacheSet *sets;
+    int num_sets;
+    int cachesize;
+    int assoc;
+    int blksize_shift;
+    uint64_t set_mask;
+    uint64_t tag_mask;
+    uint64_t accesses;
+    uint64_t misses;
+} Cache;
+
+typedef struct {
+    char *disas_str;
+    const char *symbol;
+    uint64_t addr;
+    uint64_t l1_dmisses;
+    uint64_t l1_imisses;
+    uint64_t l2_misses;
+} InsnData;
+
+void (*update_hit)(Cache *cache, int set, int blk);
+void (*update_miss)(Cache *cache, int set, int blk);
+
+void (*metadata_init)(Cache *cache);
+void (*metadata_destroy)(Cache *cache);
+
+static int cores;
+static Cache **l1_dcaches, **l1_icaches;
+
+static bool use_l2;
+static Cache **l2_ucaches;
+
+static GMutex *l1_dcache_locks;
+static GMutex *l1_icache_locks;
+static GMutex *l2_ucache_locks;
+
+static uint64_t l1_dmem_accesses;
+static uint64_t l1_imem_accesses;
+static uint64_t l1_imisses;
+static uint64_t l1_dmisses;
+
+static uint64_t l2_mem_accesses;
+static uint64_t l2_misses;
+
+static int pow_of_two(int num)
+{
+    g_assert((num & (num - 1)) == 0);
+    int ret = 0;
+    while (num /= 2) {
+        ret++;
+    }
+    return ret;
+}
+
+/*
+ * LRU evection policy: For each set, a generation counter is maintained
+ * alongside a priority array.
+ *
+ * On each set access, the generation counter is incremented.
+ *
+ * On a cache hit: The hit-block is assigned the current generation counter,
+ * indicating that it is the most recently used block.
+ *
+ * On a cache miss: The block with the least priority is searched and replaced
+ * with the newly-cached block, of which the priority is set to the current
+ * generation number.
+ */
+
+static void lru_priorities_init(Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
+        cache->sets[i].lru_gen_counter = 0;
+    }
+}
+
+static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
+{
+    CacheSet *set = &cache->sets[set_idx];
+    set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
+    set->lru_gen_counter++;
+}
+
+static int lru_get_lru_block(Cache *cache, int set_idx)
+{
+    int i, min_idx, min_priority;
+
+    min_priority = cache->sets[set_idx].lru_priorities[0];
+    min_idx = 0;
+
+    for (i = 1; i < cache->assoc; i++) {
+        if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
+            min_priority = cache->sets[set_idx].lru_priorities[i];
+            min_idx = i;
+        }
+    }
+    return min_idx;
+}
+
+static void lru_priorities_destroy(Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        g_free(cache->sets[i].lru_priorities);
+    }
+}
+
+/*
+ * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
+ * stores accesses to the cache.
+ *
+ * On a compulsory miss: The block index is enqueued to the fifo_queue to
+ * indicate that it's the latest cached block.
+ *
+ * On a conflict miss: The first-in block is removed from the cache and the new
+ * block is put in its place and enqueued to the FIFO queue.
+ */
+
+static void fifo_init(Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].fifo_queue = g_queue_new();
+    }
+}
+
+static int fifo_get_first_block(Cache *cache, int set)
+{
+    GQueue *q = cache->sets[set].fifo_queue;
+    return GPOINTER_TO_INT(g_queue_pop_tail(q));
+}
+
+static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
+{
+    GQueue *q = cache->sets[set].fifo_queue;
+    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
+}
+
+static void fifo_destroy(Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        g_queue_free(cache->sets[i].fifo_queue);
+    }
+}
+
+static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
+{
+    return addr & cache->tag_mask;
+}
+
+static inline uint64_t extract_set(Cache *cache, uint64_t addr)
+{
+    return (addr & cache->set_mask) >> cache->blksize_shift;
+}
+
+static const char *cache_config_error(int blksize, int assoc, int cachesize)
+{
+    if (cachesize % blksize != 0) {
+        return "cache size must be divisible by block size";
+    } else if (cachesize % (blksize * assoc) != 0) {
+        return "cache size must be divisible by set size (assoc * block size)";
+    } else {
+        return NULL;
+    }
+}
+
+static bool bad_cache_params(int blksize, int assoc, int cachesize)
+{
+    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
+}
+
+static Cache *cache_init(int blksize, int assoc, int cachesize)
+{
+    Cache *cache;
+    int i;
+    uint64_t blk_mask;
+
+    /*
+     * This function shall not be called directly, and hence expects suitable
+     * parameters.
+     */
+    g_assert(!bad_cache_params(blksize, assoc, cachesize));
+
+    cache = g_new(Cache, 1);
+    cache->assoc = assoc;
+    cache->cachesize = cachesize;
+    cache->num_sets = cachesize / (blksize * assoc);
+    cache->sets = g_new(CacheSet, cache->num_sets);
+    cache->blksize_shift = pow_of_two(blksize);
+    cache->accesses = 0;
+    cache->misses = 0;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].blocks = g_new0(CacheBlock, assoc);
+    }
+
+    blk_mask = blksize - 1;
+    cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
+    cache->tag_mask = ~(cache->set_mask | blk_mask);
+
+    if (metadata_init) {
+        metadata_init(cache);
+    }
+
+    return cache;
+}
+
+static Cache **caches_init(int blksize, int assoc, int cachesize)
+{
+    Cache **caches;
+    int i;
+
+    if (bad_cache_params(blksize, assoc, cachesize)) {
+        return NULL;
+    }
+
+    caches = g_new(Cache *, cores);
+
+    for (i = 0; i < cores; i++) {
+        caches[i] = cache_init(blksize, assoc, cachesize);
+    }
+
+    return caches;
+}
+
+static int get_invalid_block(Cache *cache, uint64_t set)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (!cache->sets[set].blocks[i].valid) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+static int get_replaced_block(Cache *cache, int set)
+{
+    switch (policy) {
+    case RAND:
+        return g_rand_int_range(rng, 0, cache->assoc);
+    case LRU:
+        return lru_get_lru_block(cache, set);
+    case FIFO:
+        return fifo_get_first_block(cache, set);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static int in_cache(Cache *cache, uint64_t addr)
+{
+    int i;
+    uint64_t tag, set;
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set].blocks[i].tag == tag &&
+                cache->sets[set].blocks[i].valid) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+/**
+ * access_cache(): Simulate a cache access
+ * @cache: The cache under simulation
+ * @addr: The address of the requested memory location
+ *
+ * Returns true if the requsted data is hit in the cache and false when missed.
+ * The cache is updated on miss for the next access.
+ */
+static bool access_cache(Cache *cache, uint64_t addr)
+{
+    int hit_blk, replaced_blk;
+    uint64_t tag, set;
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    hit_blk = in_cache(cache, addr);
+    if (hit_blk != -1) {
+        if (update_hit) {
+            update_hit(cache, set, hit_blk);
+        }
+        return true;
+    }
+
+    replaced_blk = get_invalid_block(cache, set);
+
+    if (replaced_blk == -1) {
+        replaced_blk = get_replaced_block(cache, set);
+    }
+
+    if (update_miss) {
+        update_miss(cache, set, replaced_blk);
+    }
+
+    cache->sets[set].blocks[replaced_blk].tag = tag;
+    cache->sets[set].blocks[replaced_blk].valid = true;
+
+    return false;
+}
+
+static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
+                            uint64_t vaddr, void *userdata)
+{
+    uint64_t effective_addr;
+    struct qemu_plugin_hwaddr *hwaddr;
+    int cache_idx;
+    InsnData *insn;
+    bool hit_in_l1;
+
+    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+        return;
+    }
+
+    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+    cache_idx = vcpu_index % cores;
+
+    g_mutex_lock(&l1_dcache_locks[cache_idx]);
+    hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
+    if (!hit_in_l1) {
+        insn = (InsnData *) userdata;
+        __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
+        l1_dcaches[cache_idx]->misses++;
+    }
+    l1_dcaches[cache_idx]->accesses++;
+    g_mutex_unlock(&l1_dcache_locks[cache_idx]);
+
+    if (hit_in_l1 || !use_l2) {
+        /* No need to access L2 */
+        return;
+    }
+
+    g_mutex_lock(&l2_ucache_locks[cache_idx]);
+    if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
+        insn = (InsnData *) userdata;
+        __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+        l2_ucaches[cache_idx]->misses++;
+    }
+    l2_ucaches[cache_idx]->accesses++;
+    g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
+{
+    uint64_t insn_addr;
+    InsnData *insn;
+    int cache_idx;
+    bool hit_in_l1;
+
+    insn_addr = ((InsnData *) userdata)->addr;
+
+    cache_idx = vcpu_index % cores;
+    g_mutex_lock(&l1_icache_locks[cache_idx]);
+    hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
+    if (!hit_in_l1) {
+        insn = (InsnData *) userdata;
+        __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
+        l1_icaches[cache_idx]->misses++;
+    }
+    l1_icaches[cache_idx]->accesses++;
+    g_mutex_unlock(&l1_icache_locks[cache_idx]);
+
+    if (hit_in_l1 || !use_l2) {
+        /* No need to access L2 */
+        return;
+    }
+
+    g_mutex_lock(&l2_ucache_locks[cache_idx]);
+    if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
+        insn = (InsnData *) userdata;
+        __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+        l2_ucaches[cache_idx]->misses++;
+    }
+    l2_ucaches[cache_idx]->accesses++;
+    g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns;
+    size_t i;
+    InsnData *data;
+
+    n_insns = qemu_plugin_tb_n_insns(tb);
+    for (i = 0; i < n_insns; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        uint64_t effective_addr;
+
+        if (sys) {
+            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
+        } else {
+            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
+        }
+
+        /*
+         * Instructions might get translated multiple times, we do not create
+         * new entries for those instructions. Instead, we fetch the same
+         * entry from the hash table and register it for the callback again.
+         */
+        g_mutex_lock(&hashtable_lock);
+        data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
+        if (data == NULL) {
+            data = g_new0(InsnData, 1);
+            data->disas_str = qemu_plugin_insn_disas(insn);
+            data->symbol = qemu_plugin_insn_symbol(insn);
+            data->addr = effective_addr;
+            g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
+                               (gpointer) data);
+        }
+        g_mutex_unlock(&hashtable_lock);
+
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         rw, data);
+
+        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+                                               QEMU_PLUGIN_CB_NO_REGS, data);
+    }
+}
+
+static void insn_free(gpointer data)
+{
+    InsnData *insn = (InsnData *) data;
+    g_free(insn->disas_str);
+    g_free(insn);
+}
+
+static void cache_free(Cache *cache)
+{
+    for (int i = 0; i < cache->num_sets; i++) {
+        g_free(cache->sets[i].blocks);
+    }
+
+    if (metadata_destroy) {
+        metadata_destroy(cache);
+    }
+
+    g_free(cache->sets);
+    g_free(cache);
+}
+
+static void caches_free(Cache **caches)
+{
+    int i;
+
+    for (i = 0; i < cores; i++) {
+        cache_free(caches[i]);
+    }
+}
+
+static void append_stats_line(GString *line, uint64_t l1_daccess,
+                              uint64_t l1_dmisses, uint64_t l1_iaccess,
+                              uint64_t l1_imisses,  uint64_t l2_access,
+                              uint64_t l2_misses)
+{
+    double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate;
+
+    l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
+    l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
+
+    g_string_append_printf(line, "%-14lu %-12lu %9.4lf%%  %-14lu %-12lu"
+                           " %9.4lf%%",
+                           l1_daccess,
+                           l1_dmisses,
+                           l1_daccess ? l1_dmiss_rate : 0.0,
+                           l1_iaccess,
+                           l1_imisses,
+                           l1_iaccess ? l1_imiss_rate : 0.0);
+
+    if (use_l2) {
+        l2_miss_rate =  ((double) l2_misses) / (l2_access) * 100.0;
+        g_string_append_printf(line, "  %-12lu %-11lu %10.4lf%%",
+                               l2_access,
+                               l2_misses,
+                               l2_access ? l2_miss_rate : 0.0);
+    }
+
+    g_string_append(line, "\n");
+}
+
+static void sum_stats(void)
+{
+    int i;
+
+    g_assert(cores > 1);
+    for (i = 0; i < cores; i++) {
+        l1_imisses += l1_icaches[i]->misses;
+        l1_dmisses += l1_dcaches[i]->misses;
+        l1_imem_accesses += l1_icaches[i]->accesses;
+        l1_dmem_accesses += l1_dcaches[i]->accesses;
+
+        if (use_l2) {
+            l2_misses += l2_ucaches[i]->misses;
+            l2_mem_accesses += l2_ucaches[i]->accesses;
+        }
+    }
+}
+
+static int dcmp(gconstpointer a, gconstpointer b)
+{
+    InsnData *insn_a = (InsnData *) a;
+    InsnData *insn_b = (InsnData *) b;
+
+    return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
+}
+
+static int icmp(gconstpointer a, gconstpointer b)
+{
+    InsnData *insn_a = (InsnData *) a;
+    InsnData *insn_b = (InsnData *) b;
+
+    return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
+}
+
+static int l2_cmp(gconstpointer a, gconstpointer b)
+{
+    InsnData *insn_a = (InsnData *) a;
+    InsnData *insn_b = (InsnData *) b;
+
+    return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
+}
+
+static void log_stats(void)
+{
+    int i;
+    Cache *icache, *dcache, *l2_cache;
+
+    g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
+                                          " dmiss rate, insn accesses,"
+                                          " insn misses, imiss rate");
+
+    if (use_l2) {
+        g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
+    }
+
+    g_string_append(rep, "\n");
+
+    for (i = 0; i < cores; i++) {
+        g_string_append_printf(rep, "%-8d", i);
+        dcache = l1_dcaches[i];
+        icache = l1_icaches[i];
+        l2_cache = use_l2 ? l2_ucaches[i] : NULL;
+        append_stats_line(rep, dcache->accesses, dcache->misses,
+                icache->accesses, icache->misses,
+                l2_cache ? l2_cache->accesses : 0,
+                l2_cache ? l2_cache->misses : 0);
+    }
+
+    if (cores > 1) {
+        sum_stats();
+        g_string_append_printf(rep, "%-8s", "sum");
+        append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
+                l1_imem_accesses, l1_imisses,
+                l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
+    }
+
+    g_string_append(rep, "\n");
+    qemu_plugin_outs(rep->str);
+}
+
+static void log_top_insns(void)
+{
+    int i;
+    GList *curr, *miss_insns;
+    InsnData *insn;
+
+    miss_insns = g_hash_table_get_values(miss_ht);
+    miss_insns = g_list_sort(miss_insns, dcmp);
+    g_autoptr(GString) rep = g_string_new("");
+    g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
+
+    for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        insn = (InsnData *) curr->data;
+        g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+        if (insn->symbol) {
+            g_string_append_printf(rep, " (%s)", insn->symbol);
+        }
+        g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses,
+                               insn->disas_str);
+    }
+
+    miss_insns = g_list_sort(miss_insns, icmp);
+    g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
+
+    for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        insn = (InsnData *) curr->data;
+        g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+        if (insn->symbol) {
+            g_string_append_printf(rep, " (%s)", insn->symbol);
+        }
+        g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses,
+                               insn->disas_str);
+    }
+
+    if (!use_l2) {
+        goto finish;
+    }
+
+    miss_insns = g_list_sort(miss_insns, l2_cmp);
+    g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
+
+    for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        insn = (InsnData *) curr->data;
+        g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+        if (insn->symbol) {
+            g_string_append_printf(rep, " (%s)", insn->symbol);
+        }
+        g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses,
+                               insn->disas_str);
+    }
+
+finish:
+    qemu_plugin_outs(rep->str);
+    g_list_free(miss_insns);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    log_stats();
+    log_top_insns();
+
+    caches_free(l1_dcaches);
+    caches_free(l1_icaches);
+
+    g_free(l1_dcache_locks);
+    g_free(l1_icache_locks);
+
+    if (use_l2) {
+        caches_free(l2_ucaches);
+        g_free(l2_ucache_locks);
+    }
+
+    g_hash_table_destroy(miss_ht);
+}
+
+static void policy_init(void)
+{
+    switch (policy) {
+    case LRU:
+        update_hit = lru_update_blk;
+        update_miss = lru_update_blk;
+        metadata_init = lru_priorities_init;
+        metadata_destroy = lru_priorities_destroy;
+        break;
+    case FIFO:
+        update_miss = fifo_update_on_miss;
+        metadata_init = fifo_init;
+        metadata_destroy = fifo_destroy;
+        break;
+    case RAND:
+        rng = g_rand_new();
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    int i;
+    int l1_iassoc, l1_iblksize, l1_icachesize;
+    int l1_dassoc, l1_dblksize, l1_dcachesize;
+    int l2_assoc, l2_blksize, l2_cachesize;
+
+    limit = 32;
+    sys = info->system_emulation;
+
+    l1_dassoc = 8;
+    l1_dblksize = 64;
+    l1_dcachesize = l1_dblksize * l1_dassoc * 32;
+
+    l1_iassoc = 8;
+    l1_iblksize = 64;
+    l1_icachesize = l1_iblksize * l1_iassoc * 32;
+
+    l2_assoc = 16;
+    l2_blksize = 64;
+    l2_cachesize = l2_assoc * l2_blksize * 2048;
+
+    policy = LRU;
+
+    cores = sys ? qemu_plugin_n_vcpus() : 1;
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+        if (g_strcmp0(tokens[0], "iblksize") == 0) {
+            l1_iblksize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
+            l1_iassoc = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
+            l1_icachesize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
+            l1_dblksize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
+            l1_dassoc = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
+            l1_dcachesize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "limit") == 0) {
+            limit = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "cores") == 0) {
+            cores = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
+            use_l2 = true;
+            l2_cachesize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
+            use_l2 = true;
+            l2_blksize = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
+            use_l2 = true;
+            l2_assoc = STRTOLL(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "l2") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "evict") == 0) {
+            if (g_strcmp0(tokens[1], "rand") == 0) {
+                policy = RAND;
+            } else if (g_strcmp0(tokens[1], "lru") == 0) {
+                policy = LRU;
+            } else if (g_strcmp0(tokens[1], "fifo") == 0) {
+                policy = FIFO;
+            } else {
+                fprintf(stderr, "invalid eviction policy: %s\n", opt);
+                return -1;
+            }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    policy_init();
+
+    l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
+    if (!l1_dcaches) {
+        const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
+        fprintf(stderr, "dcache cannot be constructed from given parameters\n");
+        fprintf(stderr, "%s\n", err);
+        return -1;
+    }
+
+    l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
+    if (!l1_icaches) {
+        const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
+        fprintf(stderr, "icache cannot be constructed from given parameters\n");
+        fprintf(stderr, "%s\n", err);
+        return -1;
+    }
+
+    l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
+    if (!l2_ucaches && use_l2) {
+        const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
+        fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
+        fprintf(stderr, "%s\n", err);
+        return -1;
+    }
+
+    l1_dcache_locks = g_new0(GMutex, cores);
+    l1_icache_locks = g_new0(GMutex, cores);
+    l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
+
+    return 0;
+}
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
new file mode 100644
index 00000000000..a5275dcc15c
--- /dev/null
+++ b/contrib/plugins/execlog.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Log instruction execution with memory access.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include <glib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* Store last executed instruction on each vCPU as a GString */
+GArray *last_exec;
+
+/**
+ * Add memory read or write information to current instruction log
+ */
+static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
+                     uint64_t vaddr, void *udata)
+{
+    GString *s;
+
+    /* Find vCPU in array */
+    g_assert(cpu_index < last_exec->len);
+    s = g_array_index(last_exec, GString *, cpu_index);
+
+    /* Indicate type of memory access */
+    if (qemu_plugin_mem_is_store(info)) {
+        g_string_append(s, ", store");
+    } else {
+        g_string_append(s, ", load");
+    }
+
+    /* If full system emulation log physical address and device name */
+    struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+    if (hwaddr) {
+        uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
+        const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+        g_string_append_printf(s, ", 0x%08"PRIx64", %s", addr, name);
+    } else {
+        g_string_append_printf(s, ", 0x%08"PRIx64, vaddr);
+    }
+}
+
+/**
+ * Log instruction execution
+ */
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+    GString *s;
+
+    /* Find or create vCPU in array */
+    while (cpu_index >= last_exec->len) {
+        s = g_string_new(NULL);
+        g_array_append_val(last_exec, s);
+    }
+    s = g_array_index(last_exec, GString *, cpu_index);
+
+    /* Print previous instruction in cache */
+    if (s->len) {
+        qemu_plugin_outs(s->str);
+        qemu_plugin_outs("\n");
+    }
+
+    /* Store new instruction in cache */
+    /* vcpu_mem will add memory access information to last_exec */
+    g_string_printf(s, "%u, ", cpu_index);
+    g_string_append(s, (char *)udata);
+}
+
+/**
+ * On translation block new translation
+ *
+ * QEMU convert code by translation block (TB). By hooking here we can then hook
+ * a callback on each instruction and memory access.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    struct qemu_plugin_insn *insn;
+    uint64_t insn_vaddr;
+    uint32_t insn_opcode;
+    char *insn_disas;
+
+    size_t n = qemu_plugin_tb_n_insns(tb);
+    for (size_t i = 0; i < n; i++) {
+        /*
+         * `insn` is shared between translations in QEMU, copy needed data here.
+         * `output` is never freed as it might be used multiple times during
+         * the emulation lifetime.
+         * We only consider the first 32 bits of the instruction, this may be
+         * a limitation for CISC architectures.
+         */
+        insn = qemu_plugin_tb_get_insn(tb, i);
+        insn_vaddr = qemu_plugin_insn_vaddr(insn);
+        insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+        insn_disas = qemu_plugin_insn_disas(insn);
+        char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"",
+                                       insn_vaddr, insn_opcode, insn_disas);
+
+        /* Register callback on memory read or write */
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         QEMU_PLUGIN_MEM_RW, NULL);
+
+        /* Register callback on instruction */
+        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+                                               QEMU_PLUGIN_CB_NO_REGS, output);
+    }
+}
+
+/**
+ * On plugin exit, print last instruction in cache
+ */
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    guint i;
+    GString *s;
+    for (i = 0; i < last_exec->len; i++) {
+        s = g_array_index(last_exec, GString *, i);
+        if (s->str) {
+            qemu_plugin_outs(s->str);
+            qemu_plugin_outs("\n");
+        }
+    }
+}
+
+/**
+ * Install the plugin
+ */
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info, int argc,
+                                           char **argv)
+{
+    /*
+     * Initialize dynamic array to cache vCPU instruction. In user mode
+     * we don't know the size before emulation.
+     */
+    last_exec = g_array_new(FALSE, FALSE, sizeof(GString *));
+
+    /* Register translation block and exit callbacks */
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    return 0;
+}
diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
index 4b083401432..062200a7a42 100644
--- a/contrib/plugins/hotblocks.c
+++ b/contrib/plugins/hotblocks.c
@@ -133,8 +133,18 @@ QEMU_PLUGIN_EXPORT
 int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
                         int argc, char **argv)
 {
-    if (argc && strcmp(argv[0], "inline") == 0) {
-        do_inline = true;
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "inline") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
     }
 
     plugin_init();
diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c
index bf532675328..0d12910af69 100644
--- a/contrib/plugins/hotpages.c
+++ b/contrib/plugins/hotpages.c
@@ -169,16 +169,26 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
-        if (g_strcmp0(opt, "reads") == 0) {
-            sort_by = SORT_R;
-        } else if (g_strcmp0(opt, "writes") == 0) {
-            sort_by = SORT_W;
-        } else if (g_strcmp0(opt, "address") == 0) {
-            sort_by = SORT_A;
-        } else if (g_strcmp0(opt, "io") == 0) {
-            track_io = true;
-        } else if (g_str_has_prefix(opt, "pagesize=")) {
-            page_size = g_ascii_strtoull(opt + 9, NULL, 10);
+        g_autofree char **tokens = g_strsplit(opt, "=", -1);
+
+        if (g_strcmp0(tokens[0], "sortby") == 0) {
+            if (g_strcmp0(tokens[1], "reads") == 0) {
+                sort_by = SORT_R;
+            } else if (g_strcmp0(tokens[1], "writes") == 0) {
+                sort_by = SORT_W;
+            } else if (g_strcmp0(tokens[1], "address") == 0) {
+                sort_by = SORT_A;
+            } else {
+                fprintf(stderr, "invalid value to sortby: %s\n", tokens[1]);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "io") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &track_io)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "pagesize") == 0) {
+            page_size = g_ascii_strtoull(tokens[1], NULL, 10);
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c
index 600f7facc1e..4a5ec3d936a 100644
--- a/contrib/plugins/howvec.c
+++ b/contrib/plugins/howvec.c
@@ -333,23 +333,34 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
 
     for (i = 0; i < argc; i++) {
         char *p = argv[i];
-        if (strcmp(p, "inline") == 0) {
-            do_inline = true;
-        } else if (strcmp(p, "verbose") == 0) {
-            verbose = true;
-        } else {
+        g_autofree char **tokens = g_strsplit(p, "=", -1);
+        if (g_strcmp0(tokens[0], "inline") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "verbose") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "count") == 0) {
+            char *value = tokens[1];
             int j;
             CountType type = COUNT_INDIVIDUAL;
-            if (*p == '!') {
+            if (*value == '!') {
                 type = COUNT_NONE;
-                p++;
+                value++;
             }
             for (j = 0; j < class_table_sz; j++) {
-                if (strcmp(p, class_table[j].opt) == 0) {
+                if (strcmp(value, class_table[j].opt) == 0) {
                     class_table[j].what = type;
                     break;
                 }
             }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", p);
+            return -1;
         }
     }
 
diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c
index faf216ac002..691d4edb0c6 100644
--- a/contrib/plugins/hwprofile.c
+++ b/contrib/plugins/hwprofile.c
@@ -259,27 +259,42 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
                         int argc, char **argv)
 {
     int i;
+    g_autoptr(GString) matches_raw = g_string_new("");
 
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
-        if (g_strcmp0(opt, "read") == 0) {
-            rw = QEMU_PLUGIN_MEM_R;
-        } else if (g_strcmp0(opt, "write") == 0) {
-            rw = QEMU_PLUGIN_MEM_W;
-        } else if (g_strcmp0(opt, "pattern") == 0) {
-            pattern = true;
-        } else if (g_strcmp0(opt, "source") == 0) {
-            source = true;
-        } else if (g_str_has_prefix(opt, "match")) {
-            gchar **parts = g_strsplit(opt, "=", 2);
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+        if (g_strcmp0(tokens[0], "track") == 0) {
+            if (g_strcmp0(tokens[1], "read") == 0) {
+                rw = QEMU_PLUGIN_MEM_R;
+            } else if (g_strcmp0(tokens[1], "write") == 0) {
+                rw = QEMU_PLUGIN_MEM_W;
+            } else {
+                fprintf(stderr, "invalid value for track: %s\n", tokens[1]);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "pattern") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &pattern)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "source") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &source)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "match") == 0) {
             check_match = true;
-            matches = g_strsplit(parts[1], ",", -1);
-            g_strfreev(parts);
+            g_string_append_printf(matches_raw, "%s,", tokens[1]);
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
         }
     }
+    if (check_match) {
+        matches = g_strsplit(matches_raw->str, ",", -1);
+    }
 
     if (source && pattern) {
         fprintf(stderr, "can only currently track either source or pattern.\n");
diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c
index 7fd35eb6692..a41ffe83fa6 100644
--- a/contrib/plugins/lockstep.c
+++ b/contrib/plugins/lockstep.c
@@ -319,22 +319,35 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            int argc, char **argv)
 {
     int i;
-
-    if (!argc || !argv[0]) {
-        qemu_plugin_outs("Need a socket path to talk to other instance.");
-        return -1;
-    }
+    g_autofree char *sock_path = NULL;
 
     for (i = 0; i < argc; i++) {
         char *p = argv[i];
-        if (strcmp(p, "verbose") == 0) {
-            verbose = true;
-        } else if (!setup_unix_socket(argv[0])) {
-            qemu_plugin_outs("Failed to setup socket for communications.");
+        g_autofree char **tokens = g_strsplit(p, "=", 2);
+
+        if (g_strcmp0(tokens[0], "verbose") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "sockpath") == 0) {
+            sock_path = tokens[1];
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", p);
             return -1;
         }
     }
 
+    if (sock_path == NULL) {
+        fprintf(stderr, "Need a socket path to talk to other instance.\n");
+        return -1;
+    }
+
+    if (!setup_unix_socket(sock_path)) {
+        fprintf(stderr, "Failed to setup socket for communications.\n");
+        return -1;
+    }
+
     our_id = id;
 
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
diff --git a/contrib/vhost-user-gpu/meson.build b/contrib/vhost-user-gpu/meson.build
index 0ce1515a10e..92c8f3a86a7 100644
--- a/contrib/vhost-user-gpu/meson.build
+++ b/contrib/vhost-user-gpu/meson.build
@@ -1,6 +1,5 @@
-if 'CONFIG_TOOLS' in config_host and 'CONFIG_VIRGL' in config_host \
-    and 'CONFIG_GBM' in config_host and 'CONFIG_LINUX' in config_host \
-    and pixman.found()
+if 'CONFIG_TOOLS' in config_host and virgl.found() and gbm.found() \
+    and 'CONFIG_LINUX' in config_host and pixman.found()
   executable('vhost-user-gpu', files('vhost-user-gpu.c', 'virgl.c', 'vugbm.c'),
              dependencies: [qemuutil, pixman, gbm, virgl, vhost_user, opengl],
              install: true,
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c
index f73f292c9f7..611360e6b47 100644
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -49,6 +49,8 @@ static char *opt_render_node;
 static gboolean opt_virgl;
 
 static void vg_handle_ctrl(VuDev *dev, int qidx);
+static void vg_cleanup_mapping(VuGpu *g,
+                               struct virtio_gpu_simple_resource *res);
 
 static const char *
 vg_cmd_to_string(int cmd)
@@ -348,6 +350,7 @@ vg_resource_create_2d(VuGpu *g,
     if (!res->image) {
         g_critical("%s: resource creation failed %d %d %d",
                    __func__, c2d.resource_id, c2d.width, c2d.height);
+        vugbm_buffer_destroy(&res->buffer);
         g_free(res);
         cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY;
         return;
@@ -399,6 +402,7 @@ vg_resource_destroy(VuGpu *g,
     }
 
     vugbm_buffer_destroy(&res->buffer);
+    vg_cleanup_mapping(g, res);
     pixman_image_unref(res->image);
     QTAILQ_REMOVE(&g->reslist, res, next);
     g_free(res);
@@ -488,6 +492,11 @@ vg_resource_attach_backing(VuGpu *g,
         return;
     }
 
+    if (res->iov) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
     ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov);
     if (ret != 0) {
         cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
@@ -497,6 +506,22 @@ vg_resource_attach_backing(VuGpu *g,
     res->iov_cnt = ab.nr_entries;
 }
 
+/* Though currently only free iov, maybe later will do more work. */
+void vg_cleanup_mapping_iov(VuGpu *g,
+                            struct iovec *iov, uint32_t count)
+{
+    g_free(iov);
+}
+
+static void
+vg_cleanup_mapping(VuGpu *g,
+                   struct virtio_gpu_simple_resource *res)
+{
+    vg_cleanup_mapping_iov(g, res->iov, res->iov_cnt);
+    res->iov = NULL;
+    res->iov_cnt = 0;
+}
+
 static void
 vg_resource_detach_backing(VuGpu *g,
                            struct virtio_gpu_ctrl_command *cmd)
@@ -515,9 +540,7 @@ vg_resource_detach_backing(VuGpu *g,
         return;
     }
 
-    g_free(res->iov);
-    res->iov = NULL;
-    res->iov_cnt = 0;
+    vg_cleanup_mapping(g, res);
 }
 
 static void
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
index 9e6660c7ab8..3e45e1bd336 100644
--- a/contrib/vhost-user-gpu/virgl.c
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -108,9 +108,17 @@ virgl_cmd_resource_unref(VuGpu *g,
                          struct virtio_gpu_ctrl_command *cmd)
 {
     struct virtio_gpu_resource_unref unref;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
 
     VUGPU_FILL_CMD(unref);
 
+    virgl_renderer_resource_detach_iov(unref.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs != NULL && num_iovs != 0) {
+        vg_cleanup_mapping_iov(g, res_iovs, num_iovs);
+    }
     virgl_renderer_resource_unref(unref.resource_id);
 }
 
@@ -128,6 +136,7 @@ virgl_cmd_get_capset_info(VuGpu *g,
 
     VUGPU_FILL_CMD(info);
 
+    memset(&resp, 0, sizeof(resp));
     if (info.capset_index == 0) {
         resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
         virgl_renderer_get_cap_set(resp.capset_id,
@@ -169,6 +178,10 @@ virgl_cmd_get_capset(VuGpu *g,
 
     virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
                                &max_size);
+    if (!max_size) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
     resp = g_malloc0(sizeof(*resp) + max_size);
 
     resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
@@ -279,8 +292,11 @@ virgl_resource_attach_backing(VuGpu *g,
         return;
     }
 
-    virgl_renderer_resource_attach_iov(att_rb.resource_id,
+    ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
                                        res_iovs, att_rb.nr_entries);
+    if (ret != 0) {
+        vg_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries);
+    }
 }
 
 static void
@@ -299,7 +315,7 @@ virgl_resource_detach_backing(VuGpu *g,
     if (res_iovs == NULL || num_iovs == 0) {
         return;
     }
-    g_free(res_iovs);
+    vg_cleanup_mapping_iov(g, res_iovs, num_iovs);
 }
 
 static void
diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
index 04d56158123..e2864bba68e 100644
--- a/contrib/vhost-user-gpu/vugpu.h
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -169,7 +169,7 @@ int     vg_create_mapping_iov(VuGpu *g,
                               struct virtio_gpu_resource_attach_backing *ab,
                               struct virtio_gpu_ctrl_command *cmd,
                               struct iovec **iov);
-
+void    vg_cleanup_mapping_iov(VuGpu *g, struct iovec *iov, uint32_t count);
 void    vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
 
 void    vg_wait_ok(VuGpu *g);
diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c
index c15d18c33f0..081230da548 100644
--- a/contrib/vhost-user-input/main.c
+++ b/contrib/vhost-user-input/main.c
@@ -6,12 +6,13 @@
 
 #include "qemu/osdep.h"
 
-#include <linux/input.h>
+#include <sys/ioctl.h>
 
 #include "qemu/iov.h"
 #include "qemu/bswap.h"
 #include "qemu/sockets.h"
 #include "libvhost-user-glib.h"
+#include "standard-headers/linux/input.h"
 #include "standard-headers/linux/virtio_input.h"
 #include "qapi/error.h"
 
@@ -113,13 +114,16 @@ vi_evdev_watch(VuDev *dev, int condition, void *data)
 static void vi_handle_status(VuInput *vi, virtio_input_event *event)
 {
     struct input_event evdev;
+    struct timeval tval;
     int rc;
 
-    if (gettimeofday(&evdev.time, NULL)) {
+    if (gettimeofday(&tval, NULL)) {
         perror("vi_handle_status: gettimeofday");
         return;
     }
 
+    evdev.input_event_sec = tval.tv_sec;
+    evdev.input_event_usec = tval.tv_usec;
     evdev.type = le16toh(event->type);
     evdev.code = le16toh(event->code);
     evdev.value = le32toh(event->value);
diff --git a/cpu.c b/cpu.c
index bfbe5a66f95..9bce67ef556 100644
--- a/cpu.c
+++ b/cpu.c
@@ -29,6 +29,7 @@
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 #else
+#include "hw/core/sysemu-cpu-ops.h"
 #include "exec/address-spaces.h"
 #endif
 #include "sysemu/tcg.h"
@@ -36,6 +37,8 @@
 #include "sysemu/replay.h"
 #include "exec/translate-all.h"
 #include "exec/log.h"
+#include "hw/core/accel-cpu.h"
+#include "trace/trace-root.h"
 
 uintptr_t qemu_host_page_size;
 intptr_t qemu_host_page_mask;
@@ -126,10 +129,14 @@ const VMStateDescription vmstate_cpu_common = {
 
 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 {
+#ifndef CONFIG_USER_ONLY
     CPUClass *cc = CPU_GET_CLASS(cpu);
+#endif
 
     cpu_list_add(cpu);
-
+    if (!accel_cpu_realizefn(cpu, errp)) {
+        return;
+    }
 #ifdef CONFIG_TCG
     /* NB: errp parameter is unused currently */
     if (tcg_enabled()) {
@@ -138,26 +145,25 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 #endif /* CONFIG_TCG */
 
 #ifdef CONFIG_USER_ONLY
-    assert(cc->vmsd == NULL);
+    assert(qdev_get_vmsd(DEVICE(cpu)) == NULL ||
+           qdev_get_vmsd(DEVICE(cpu))->unmigratable);
 #else
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
     }
-    if (cc->vmsd != NULL) {
-        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
+    if (cc->sysemu_ops->legacy_vmsd != NULL) {
+        vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
     }
 #endif /* CONFIG_USER_ONLY */
 }
 
 void cpu_exec_unrealizefn(CPUState *cpu)
 {
+#ifndef CONFIG_USER_ONLY
     CPUClass *cc = CPU_GET_CLASS(cpu);
 
-#ifdef CONFIG_USER_ONLY
-    assert(cc->vmsd == NULL);
-#else
-    if (cc->vmsd != NULL) {
-        vmstate_unregister(NULL, cc->vmsd, cpu);
+    if (cc->sysemu_ops->legacy_vmsd != NULL) {
+        vmstate_unregister(NULL, cc->sysemu_ops->legacy_vmsd, cpu);
     }
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
@@ -173,6 +179,27 @@ void cpu_exec_unrealizefn(CPUState *cpu)
     cpu_list_remove(cpu);
 }
 
+static Property cpu_common_props[] = {
+#ifndef CONFIG_USER_ONLY
+    /*
+     * Create a memory property for softmmu CPU object,
+     * so users can wire up its memory. (This can't go in hw/core/cpu.c
+     * because that file is compiled only once for both user-mode
+     * and system builds.) The default if no link is set up is to use
+     * the system address space.
+     */
+    DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
+#endif
+    DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+void cpu_class_init_props(DeviceClass *dc)
+{
+    device_class_set_props(dc, cpu_common_props);
+}
+
 void cpu_exec_initfn(CPUState *cpu)
 {
     cpu->as = NULL;
@@ -219,11 +246,6 @@ void tb_invalidate_phys_addr(target_ulong addr)
     tb_invalidate_phys_page_range(addr, addr + 1);
     mmap_unlock();
 }
-
-static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
-{
-    tb_invalidate_phys_addr(pc);
-}
 #else
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
 {
@@ -244,25 +266,19 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs)
     ram_addr = memory_region_get_ram_addr(mr) + addr;
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1);
 }
-
-static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
-{
-    /*
-     * There may not be a virtual to physical translation for the pc
-     * right now, but there may exist cached TB for this pc.
-     * Flush the whole TB cache to force re-translation of such TBs.
-     * This is heavyweight, but we're debugging anyway.
-     */
-    tb_flush(cpu);
-}
 #endif
 
 /* Add a breakpoint.  */
 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                           CPUBreakpoint **breakpoint)
 {
+    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUBreakpoint *bp;
 
+    if (cc->gdb_adjust_breakpoint) {
+        pc = cc->gdb_adjust_breakpoint(cpu, pc);
+    }
+
     bp = g_malloc(sizeof(*bp));
 
     bp->pc = pc;
@@ -275,19 +291,24 @@ int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
         QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
     }
 
-    breakpoint_invalidate(cpu, pc);
-
     if (breakpoint) {
         *breakpoint = bp;
     }
+
+    trace_breakpoint_insert(cpu->cpu_index, pc, flags);
     return 0;
 }
 
 /* Remove a specific breakpoint.  */
 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 {
+    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUBreakpoint *bp;
 
+    if (cc->gdb_adjust_breakpoint) {
+        pc = cc->gdb_adjust_breakpoint(cpu, pc);
+    }
+
     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
         if (bp->pc == pc && bp->flags == flags) {
             cpu_breakpoint_remove_by_ref(cpu, bp);
@@ -298,13 +319,12 @@ int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
 }
 
 /* Remove a specific breakpoint by reference.  */
-void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
+void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *bp)
 {
-    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
+    QTAILQ_REMOVE(&cpu->breakpoints, bp, entry);
 
-    breakpoint_invalidate(cpu, breakpoint->pc);
-
-    g_free(breakpoint);
+    trace_breakpoint_remove(cpu->cpu_index, bp->pc, bp->flags);
+    g_free(bp);
 }
 
 /* Remove all matching breakpoints. */
@@ -327,11 +347,8 @@ void cpu_single_step(CPUState *cpu, int enabled)
         cpu->singlestep_enabled = enabled;
         if (kvm_enabled()) {
             kvm_update_guest_debug(cpu, 0);
-        } else {
-            /* must flush all the translated code to avoid inconsistencies */
-            /* XXX: only flush what is necessary */
-            tb_flush(cpu);
         }
+        trace_breakpoint_singlestep(cpu->cpu_index, enabled);
     }
 }
 
diff --git a/crypto/cipher-builtin.c.inc b/crypto/cipher-builtin.c.inc
index 7597cf4a10f..b409089095c 100644
--- a/crypto/cipher-builtin.c.inc
+++ b/crypto/cipher-builtin.c.inc
@@ -19,8 +19,6 @@
  */
 
 #include "crypto/aes.h"
-#include "crypto/desrfb.h"
-#include "crypto/xts.h"
 
 typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
 struct QCryptoCipherBuiltinAESContext {
@@ -32,7 +30,6 @@ typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
 struct QCryptoCipherBuiltinAES {
     QCryptoCipher base;
     QCryptoCipherBuiltinAESContext key;
-    QCryptoCipherBuiltinAESContext key_tweak;
     uint8_t iv[AES_BLOCK_SIZE];
 };
 
@@ -194,39 +191,6 @@ static int qcrypto_cipher_aes_decrypt_cbc(QCryptoCipher *cipher,
     return 0;
 }
 
-static int qcrypto_cipher_aes_encrypt_xts(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinAES *ctx
-        = container_of(cipher, QCryptoCipherBuiltinAES, base);
-
-    if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) {
-        return -1;
-    }
-    xts_encrypt(&ctx->key, &ctx->key_tweak,
-                do_aes_encrypt_ecb, do_aes_decrypt_ecb,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_cipher_aes_decrypt_xts(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinAES *ctx
-        = container_of(cipher, QCryptoCipherBuiltinAES, base);
-
-    if (!qcrypto_length_check(len, AES_BLOCK_SIZE, errp)) {
-        return -1;
-    }
-    xts_decrypt(&ctx->key, &ctx->key_tweak,
-                do_aes_encrypt_ecb, do_aes_decrypt_ecb,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-
 static int qcrypto_cipher_aes_setiv(QCryptoCipher *cipher, const uint8_t *iv,
                              size_t niv, Error **errp)
 {
@@ -257,84 +221,16 @@ static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_cbc = {
     .cipher_free = qcrypto_cipher_ctx_free,
 };
 
-static const struct QCryptoCipherDriver qcrypto_cipher_aes_driver_xts = {
-    .cipher_encrypt = qcrypto_cipher_aes_encrypt_xts,
-    .cipher_decrypt = qcrypto_cipher_aes_decrypt_xts,
-    .cipher_setiv = qcrypto_cipher_aes_setiv,
-    .cipher_free = qcrypto_cipher_ctx_free,
-};
-
-
-typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
-struct QCryptoCipherBuiltinDESRFB {
-    QCryptoCipher base;
-
-    /* C.f. alg_key_len[QCRYPTO_CIPHER_ALG_DES_RFB] */
-    uint8_t key[8];
-};
-
-static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinDESRFB *ctx
-        = container_of(cipher, QCryptoCipherBuiltinDESRFB, base);
-    size_t i;
-
-    if (!qcrypto_length_check(len, 8, errp)) {
-        return -1;
-    }
-
-    deskey(ctx->key, EN0);
-
-    for (i = 0; i < len; i += 8) {
-        des((void *)in + i, out + i);
-    }
-
-    return 0;
-}
-
-static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher,
-                                          const void *in, void *out,
-                                          size_t len, Error **errp)
-{
-    QCryptoCipherBuiltinDESRFB *ctx
-        = container_of(cipher, QCryptoCipherBuiltinDESRFB, base);
-    size_t i;
-
-    if (!qcrypto_length_check(len, 8, errp)) {
-        return -1;
-    }
-
-    deskey(ctx->key, DE1);
-
-    for (i = 0; i < len; i += 8) {
-        des((void *)in + i, out + i);
-    }
-
-    return 0;
-}
-
-static const struct QCryptoCipherDriver qcrypto_cipher_des_rfb_driver = {
-    .cipher_encrypt = qcrypto_cipher_encrypt_des_rfb,
-    .cipher_decrypt = qcrypto_cipher_decrypt_des_rfb,
-    .cipher_setiv = qcrypto_cipher_no_setiv,
-    .cipher_free = qcrypto_cipher_ctx_free,
-};
-
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        return mode == QCRYPTO_CIPHER_MODE_ECB;
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
     case QCRYPTO_CIPHER_ALG_AES_256:
         switch (mode) {
         case QCRYPTO_CIPHER_MODE_ECB:
         case QCRYPTO_CIPHER_MODE_CBC:
-        case QCRYPTO_CIPHER_MODE_XTS:
             return true;
         default:
             return false;
@@ -356,18 +252,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        if (mode == QCRYPTO_CIPHER_MODE_ECB) {
-            QCryptoCipherBuiltinDESRFB *ctx;
-
-            ctx = g_new0(QCryptoCipherBuiltinDESRFB, 1);
-            ctx->base.driver = &qcrypto_cipher_des_rfb_driver;
-            memcpy(ctx->key, key, sizeof(ctx->key));
-
-            return &ctx->base;
-        }
-        goto bad_mode;
-
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
     case QCRYPTO_CIPHER_ALG_AES_256:
@@ -382,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
             case QCRYPTO_CIPHER_MODE_CBC:
                 drv = &qcrypto_cipher_aes_driver_cbc;
                 break;
-            case QCRYPTO_CIPHER_MODE_XTS:
-                drv = &qcrypto_cipher_aes_driver_xts;
-                break;
             default:
                 goto bad_mode;
             }
@@ -392,19 +273,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
             ctx = g_new0(QCryptoCipherBuiltinAES, 1);
             ctx->base.driver = drv;
 
-            if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-                nkey /= 2;
-                if (AES_set_encrypt_key(key + nkey, nkey * 8,
-                                        &ctx->key_tweak.enc)) {
-                    error_setg(errp, "Failed to set encryption key");
-                    goto error;
-                }
-                if (AES_set_decrypt_key(key + nkey, nkey * 8,
-                                        &ctx->key_tweak.dec)) {
-                    error_setg(errp, "Failed to set decryption key");
-                    goto error;
-                }
-            }
             if (AES_set_encrypt_key(key, nkey * 8, &ctx->key.enc)) {
                 error_setg(errp, "Failed to set encryption key");
                 goto error;
diff --git a/crypto/cipher-gcrypt.c.inc b/crypto/cipher-gcrypt.c.inc
index 42d4137534f..a6a0117717f 100644
--- a/crypto/cipher-gcrypt.c.inc
+++ b/crypto/cipher-gcrypt.c.inc
@@ -18,17 +18,13 @@
  *
  */
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-#include "crypto/xts.h"
-#endif
-
 #include <gcrypt.h>
 
 bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
     case QCRYPTO_CIPHER_ALG_3DES:
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
@@ -59,10 +55,6 @@ typedef struct QCryptoCipherGcrypt {
     QCryptoCipher base;
     gcry_cipher_hd_t handle;
     size_t blocksize;
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    gcry_cipher_hd_t tweakhandle;
-    uint8_t iv[XTS_BLOCK_SIZE];
-#endif
 } QCryptoCipherGcrypt;
 
 
@@ -178,90 +170,6 @@ static const struct QCryptoCipherDriver qcrypto_gcrypt_ctr_driver = {
     .cipher_free = qcrypto_gcrypt_ctx_free,
 };
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-static void qcrypto_gcrypt_xts_ctx_free(QCryptoCipher *cipher)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    gcry_cipher_close(ctx->tweakhandle);
-    qcrypto_gcrypt_ctx_free(cipher);
-}
-
-static void qcrypto_gcrypt_xts_wrape(const void *ctx, size_t length,
-                                     uint8_t *dst, const uint8_t *src)
-{
-    gcry_error_t err;
-    err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
-    g_assert(err == 0);
-}
-
-static void qcrypto_gcrypt_xts_wrapd(const void *ctx, size_t length,
-                                     uint8_t *dst, const uint8_t *src)
-{
-    gcry_error_t err;
-    err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
-    g_assert(err == 0);
-}
-
-static int qcrypto_gcrypt_xts_encrypt(QCryptoCipher *cipher, const void *in,
-                                      void *out, size_t len, Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (len & (ctx->blocksize - 1)) {
-        error_setg(errp, "Length %zu must be a multiple of block size %zu",
-                   len, ctx->blocksize);
-        return -1;
-    }
-
-    xts_encrypt(ctx->handle, ctx->tweakhandle,
-                qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_gcrypt_xts_decrypt(QCryptoCipher *cipher, const void *in,
-                                      void *out, size_t len, Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (len & (ctx->blocksize - 1)) {
-        error_setg(errp, "Length %zu must be a multiple of block size %zu",
-                   len, ctx->blocksize);
-        return -1;
-    }
-
-    xts_decrypt(ctx->handle, ctx->tweakhandle,
-                qcrypto_gcrypt_xts_wrape, qcrypto_gcrypt_xts_wrapd,
-                ctx->iv, len, out, in);
-    return 0;
-}
-
-static int qcrypto_gcrypt_xts_setiv(QCryptoCipher *cipher,
-                                    const uint8_t *iv, size_t niv,
-                                    Error **errp)
-{
-    QCryptoCipherGcrypt *ctx = container_of(cipher, QCryptoCipherGcrypt, base);
-
-    if (niv != ctx->blocksize) {
-        error_setg(errp, "Expected IV size %zu not %zu",
-                   ctx->blocksize, niv);
-        return -1;
-    }
-
-    memcpy(ctx->iv, iv, niv);
-    return 0;
-}
-
-static const struct QCryptoCipherDriver qcrypto_gcrypt_xts_driver = {
-    .cipher_encrypt = qcrypto_gcrypt_xts_encrypt,
-    .cipher_decrypt = qcrypto_gcrypt_xts_decrypt,
-    .cipher_setiv = qcrypto_gcrypt_xts_setiv,
-    .cipher_free = qcrypto_gcrypt_xts_ctx_free,
-};
-#endif /* CONFIG_QEMU_PRIVATE_XTS */
-
-
 static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
                                              QCryptoCipherMode mode,
                                              const uint8_t *key,
@@ -278,7 +186,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
         gcryalg = GCRY_CIPHER_DES;
         break;
     case QCRYPTO_CIPHER_ALG_3DES:
@@ -323,12 +231,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
         gcrymode = GCRY_CIPHER_MODE_ECB;
         break;
     case QCRYPTO_CIPHER_MODE_XTS:
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-        drv = &qcrypto_gcrypt_xts_driver;
-        gcrymode = GCRY_CIPHER_MODE_ECB;
-#else
         gcrymode = GCRY_CIPHER_MODE_XTS;
-#endif
         break;
     case QCRYPTO_CIPHER_MODE_CBC:
         gcrymode = GCRY_CIPHER_MODE_CBC;
@@ -354,44 +257,7 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
     ctx->blocksize = gcry_cipher_get_algo_blklen(gcryalg);
 
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-        if (ctx->blocksize != XTS_BLOCK_SIZE) {
-            error_setg(errp,
-                       "Cipher block size %zu must equal XTS block size %d",
-		       ctx->blocksize, XTS_BLOCK_SIZE);
-            goto error;
-        }
-        err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
-        if (err != 0) {
-            error_setg(errp, "Cannot initialize cipher: %s",
-                       gcry_strerror(err));
-            goto error;
-        }
-    }
-#endif
-
-    if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
-        /* We're using standard DES cipher from gcrypt, so we need
-         * to munge the key so that the results are the same as the
-         * bizarre RFB variant of DES :-)
-         */
-        uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-        err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
-        g_free(rfbkey);
-    } else {
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-            nkey /= 2;
-            err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey);
-            if (err != 0) {
-                error_setg(errp, "Cannot set key: %s", gcry_strerror(err));
-                goto error;
-            }
-        }
-#endif
-        err = gcry_cipher_setkey(ctx->handle, key, nkey);
-    }
+    err = gcry_cipher_setkey(ctx->handle, key, nkey);
     if (err != 0) {
         error_setg(errp, "Cannot set key: %s", gcry_strerror(err));
         goto error;
@@ -400,9 +266,6 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     return &ctx->base;
 
  error:
-#ifdef CONFIG_QEMU_PRIVATE_XTS
-    gcry_cipher_close(ctx->tweakhandle);
-#endif
     gcry_cipher_close(ctx->handle);
     g_free(ctx);
     return NULL;
diff --git a/crypto/cipher-gnutls.c.inc b/crypto/cipher-gnutls.c.inc
new file mode 100644
index 00000000000..501e4e07a5b
--- /dev/null
+++ b/crypto/cipher-gnutls.c.inc
@@ -0,0 +1,335 @@
+/*
+ * QEMU Crypto cipher gnutls algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cipherpriv.h"
+
+#include <gnutls/crypto.h>
+
+#if GNUTLS_VERSION_NUMBER >= 0x030608
+#define QEMU_GNUTLS_XTS
+#endif
+
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode)
+{
+
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+        case QCRYPTO_CIPHER_ALG_AES_192:
+        case QCRYPTO_CIPHER_ALG_AES_256:
+        case QCRYPTO_CIPHER_ALG_DES:
+        case QCRYPTO_CIPHER_ALG_3DES:
+            return true;
+        default:
+            return false;
+        }
+#ifdef QEMU_GNUTLS_XTS
+    case QCRYPTO_CIPHER_MODE_XTS:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            return true;
+        default:
+            return false;
+        }
+#endif
+    default:
+        return false;
+    }
+}
+
+typedef struct QCryptoCipherGnutls QCryptoCipherGnutls;
+struct QCryptoCipherGnutls {
+    QCryptoCipher base;
+    gnutls_cipher_hd_t handle; /* XTS & CBC mode */
+    gnutls_cipher_algorithm_t galg; /* ECB mode */
+    guint8 *key; /* ECB mode */
+    size_t nkey; /* ECB mode */
+    size_t blocksize;
+};
+
+
+static void
+qcrypto_gnutls_cipher_free(QCryptoCipher *cipher)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+
+    g_free(ctx->key);
+    if (ctx->handle) {
+        gnutls_cipher_deinit(ctx->handle);
+    }
+    g_free(ctx);
+}
+
+
+static int
+qcrypto_gnutls_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+    int err;
+
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    if (ctx->handle) { /* CBC / XTS mode */
+        err = gnutls_cipher_encrypt2(ctx->handle,
+                                     in, len,
+                                     out, len);
+        if (err != 0) {
+            error_setg(errp, "Cannot encrypt data: %s",
+                       gnutls_strerror(err));
+            return -1;
+        }
+    } else { /* ECB mode very inefficiently faked with CBC */
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        while (len) {
+            gnutls_cipher_hd_t handle;
+            gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey };
+            int err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL);
+            if (err != 0) {
+                error_setg(errp, "Cannot initialize cipher: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+
+            gnutls_cipher_set_iv(handle, iv, ctx->blocksize);
+
+            err = gnutls_cipher_encrypt2(handle,
+                                         in, ctx->blocksize,
+                                         out, ctx->blocksize);
+            if (err != 0) {
+                gnutls_cipher_deinit(handle);
+                error_setg(errp, "Cannot encrypt data: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+            gnutls_cipher_deinit(handle);
+
+            len -= ctx->blocksize;
+            in += ctx->blocksize;
+            out += ctx->blocksize;
+        }
+    }
+
+    return 0;
+}
+
+
+static int
+qcrypto_gnutls_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+    int err;
+
+    if (len % ctx->blocksize) {
+        error_setg(errp, "Length %zu must be a multiple of block size %zu",
+                   len, ctx->blocksize);
+        return -1;
+    }
+
+    if (ctx->handle) { /* CBC / XTS mode */
+        err = gnutls_cipher_decrypt2(ctx->handle,
+                                     in, len,
+                                     out, len);
+
+        if (err != 0) {
+            error_setg(errp, "Cannot decrypt data: %s",
+                       gnutls_strerror(err));
+            return -1;
+        }
+    } else { /* ECB mode very inefficiently faked with CBC */
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        while (len) {
+            gnutls_cipher_hd_t handle;
+            gnutls_datum_t gkey = { (unsigned char *)ctx->key, ctx->nkey };
+            int err = gnutls_cipher_init(&handle, ctx->galg, &gkey, NULL);
+            if (err != 0) {
+                error_setg(errp, "Cannot initialize cipher: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+
+            gnutls_cipher_set_iv(handle, iv, ctx->blocksize);
+
+            err = gnutls_cipher_decrypt2(handle,
+                                         in, ctx->blocksize,
+                                         out, ctx->blocksize);
+            if (err != 0) {
+                gnutls_cipher_deinit(handle);
+                error_setg(errp, "Cannot encrypt data: %s",
+                           gnutls_strerror(err));
+                return -1;
+            }
+            gnutls_cipher_deinit(handle);
+
+            len -= ctx->blocksize;
+            in += ctx->blocksize;
+            out += ctx->blocksize;
+        }
+    }
+
+    return 0;
+}
+
+static int
+qcrypto_gnutls_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
+{
+    QCryptoCipherGnutls *ctx = container_of(cipher, QCryptoCipherGnutls, base);
+
+    if (niv != ctx->blocksize) {
+        error_setg(errp, "Expected IV size %zu not %zu",
+                   ctx->blocksize, niv);
+        return -1;
+    }
+
+    gnutls_cipher_set_iv(ctx->handle, (unsigned char *)iv, niv);
+
+    return 0;
+}
+
+
+static struct QCryptoCipherDriver gnutls_driver = {
+    .cipher_encrypt = qcrypto_gnutls_cipher_encrypt,
+    .cipher_decrypt = qcrypto_gnutls_cipher_decrypt,
+    .cipher_setiv = qcrypto_gnutls_cipher_setiv,
+    .cipher_free = qcrypto_gnutls_cipher_free,
+};
+
+static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                             QCryptoCipherMode mode,
+                                             const uint8_t *key,
+                                             size_t nkey,
+                                             Error **errp)
+{
+    QCryptoCipherGnutls *ctx;
+    gnutls_datum_t gkey = { (unsigned char *)key, nkey };
+    gnutls_cipher_algorithm_t galg = GNUTLS_CIPHER_UNKNOWN;
+    int err;
+
+    switch (mode) {
+#ifdef QEMU_GNUTLS_XTS
+    case QCRYPTO_CIPHER_MODE_XTS:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+            galg = GNUTLS_CIPHER_AES_128_XTS;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            galg = GNUTLS_CIPHER_AES_256_XTS;
+            break;
+        default:
+            break;
+        }
+        break;
+#endif
+
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+        switch (alg) {
+        case QCRYPTO_CIPHER_ALG_AES_128:
+            galg = GNUTLS_CIPHER_AES_128_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_192:
+            galg = GNUTLS_CIPHER_AES_192_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_AES_256:
+            galg = GNUTLS_CIPHER_AES_256_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_DES:
+            galg = GNUTLS_CIPHER_DES_CBC;
+            break;
+        case QCRYPTO_CIPHER_ALG_3DES:
+            galg = GNUTLS_CIPHER_3DES_CBC;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (galg == GNUTLS_CIPHER_UNKNOWN) {
+        error_setg(errp, "Unsupported cipher algorithm %s with %s mode",
+                   QCryptoCipherAlgorithm_str(alg),
+                   QCryptoCipherMode_str(mode));
+        return NULL;
+    }
+
+    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
+        return NULL;
+    }
+
+    ctx = g_new0(QCryptoCipherGnutls, 1);
+    ctx->base.driver = &gnutls_driver;
+
+    if (mode == QCRYPTO_CIPHER_MODE_ECB) {
+        ctx->key = g_new0(guint8, nkey);
+        memcpy(ctx->key, key, nkey);
+        ctx->nkey = nkey;
+        ctx->galg = galg;
+    } else {
+        err = gnutls_cipher_init(&ctx->handle, galg, &gkey, NULL);
+        if (err != 0) {
+            error_setg(errp, "Cannot initialize cipher: %s",
+                       gnutls_strerror(err));
+            goto error;
+        }
+    }
+
+    if (alg == QCRYPTO_CIPHER_ALG_DES ||
+        alg == QCRYPTO_CIPHER_ALG_3DES)
+        ctx->blocksize = 8;
+    else
+        ctx->blocksize = 16;
+
+    /*
+     * Our API contract for requires iv to be optional
+     * but nettle gets unhappy when called by gnutls
+     * in this case, so we just force set a default
+     * all-zeros IV, to match behaviour of other backends.
+     */
+    if (mode != QCRYPTO_CIPHER_MODE_ECB) {
+        g_autofree unsigned char *iv = g_new0(unsigned char, ctx->blocksize);
+        gnutls_cipher_set_iv(ctx->handle, iv, ctx->blocksize);
+    }
+
+    return &ctx->base;
+
+ error:
+    qcrypto_gnutls_cipher_free(&ctx->base);
+    return NULL;
+}
diff --git a/crypto/cipher-nettle.c.inc b/crypto/cipher-nettle.c.inc
index cac771e4ff7..24cc61f87bf 100644
--- a/crypto/cipher-nettle.c.inc
+++ b/crypto/cipher-nettle.c.inc
@@ -34,47 +34,6 @@
 #include <nettle/xts.h>
 #endif
 
-typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
-                                               size_t length,
-                                               uint8_t *dst,
-                                               const uint8_t *src);
-
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef nettle_crypt_func * QCryptoCipherNettleFuncNative;
-typedef void *       cipher_ctx_t;
-typedef unsigned     cipher_length_t;
-#define CONST_CTX
-
-#define cast5_set_key cast128_set_key
-
-#define aes128_ctx aes_ctx
-#define aes192_ctx aes_ctx
-#define aes256_ctx aes_ctx
-#define aes128_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 16, k)
-#define aes192_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 24, k)
-#define aes256_set_encrypt_key(c, k) \
-    aes_set_encrypt_key(c, 32, k)
-#define aes128_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 16, k)
-#define aes192_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 24, k)
-#define aes256_set_decrypt_key(c, k) \
-    aes_set_decrypt_key(c, 32, k)
-#define aes128_encrypt aes_encrypt
-#define aes192_encrypt aes_encrypt
-#define aes256_encrypt aes_encrypt
-#define aes128_decrypt aes_decrypt
-#define aes192_decrypt aes_decrypt
-#define aes256_decrypt aes_decrypt
-#else
-typedef nettle_cipher_func * QCryptoCipherNettleFuncNative;
-typedef const void * cipher_ctx_t;
-typedef size_t       cipher_length_t;
-#define CONST_CTX    const
-#endif
-
 static inline bool qcrypto_length_check(size_t len, size_t blocksize,
                                         Error **errp)
 {
@@ -197,12 +156,12 @@ static const struct QCryptoCipherDriver NAME##_driver_ctr = {           \
 static void NAME##_xts_wrape(const void *ctx, size_t length,            \
                              uint8_t *dst, const uint8_t *src)          \
 {                                                                       \
-    ENCRYPT((cipher_ctx_t)ctx, length, dst, src);                       \
+    ENCRYPT((const void *)ctx, length, dst, src);                       \
 }                                                                       \
 static void NAME##_xts_wrapd(const void *ctx, size_t length,            \
                              uint8_t *dst, const uint8_t *src)          \
 {                                                                       \
-    DECRYPT((cipher_ctx_t)ctx, length, dst, src);                       \
+    DECRYPT((const void *)ctx, length, dst, src);                       \
 }                                                                       \
 static int NAME##_encrypt_xts(QCryptoCipher *cipher, const void *in,    \
                               void *out, size_t len, Error **errp)      \
@@ -276,25 +235,25 @@ static const struct QCryptoCipherDriver NAME##_driver_xts = {   \
     DEFINE_XTS(NAME, TYPE, BLEN, ENCRYPT, DECRYPT)
 
 
-typedef struct QCryptoNettleDESRFB {
+typedef struct QCryptoNettleDES {
     QCryptoCipher base;
     struct des_ctx key;
     uint8_t iv[DES_BLOCK_SIZE];
-} QCryptoNettleDESRFB;
+} QCryptoNettleDES;
 
-static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des_encrypt_native(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
     des_encrypt(ctx, length, dst, src);
 }
 
-static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des_decrypt_native(const void *ctx, size_t length,
                                uint8_t *dst, const uint8_t *src)
 {
     des_decrypt(ctx, length, dst, src);
 }
 
-DEFINE_ECB_CBC_CTR(qcrypto_nettle_des_rfb, QCryptoNettleDESRFB,
+DEFINE_ECB_CBC_CTR(qcrypto_nettle_des, QCryptoNettleDES,
                    DES_BLOCK_SIZE, des_encrypt_native, des_decrypt_native)
 
 
@@ -304,13 +263,13 @@ typedef struct QCryptoNettleDES3 {
     uint8_t iv[DES3_BLOCK_SIZE];
 } QCryptoNettleDES3;
 
-static void des3_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des3_encrypt_native(const void *ctx, size_t length,
                                 uint8_t *dst, const uint8_t *src)
 {
     des3_encrypt(ctx, length, dst, src);
 }
 
-static void des3_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void des3_decrypt_native(const void *ctx, size_t length,
                                 uint8_t *dst, const uint8_t *src)
 {
     des3_decrypt(ctx, length, dst, src);
@@ -327,17 +286,17 @@ typedef struct QCryptoNettleAES128 {
     struct aes128_ctx key[2], key_xts[2];
 } QCryptoNettleAES128;
 
-static void aes128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes128_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes128_ctx *keys = ctx;
+    const struct aes128_ctx *keys = ctx;
     aes128_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes128_decrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes128_ctx *keys = ctx;
+    const struct aes128_ctx *keys = ctx;
     aes128_decrypt(&keys[1], length, dst, src);
 }
 
@@ -353,17 +312,17 @@ typedef struct QCryptoNettleAES192 {
     struct aes192_ctx key[2], key_xts[2];
 } QCryptoNettleAES192;
 
-static void aes192_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes192_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes192_ctx *keys = ctx;
+    const struct aes192_ctx *keys = ctx;
     aes192_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes192_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes192_decrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes192_ctx *keys = ctx;
+    const struct aes192_ctx *keys = ctx;
     aes192_decrypt(&keys[1], length, dst, src);
 }
 
@@ -379,17 +338,17 @@ typedef struct QCryptoNettleAES256 {
     struct aes256_ctx key[2], key_xts[2];
 } QCryptoNettleAES256;
 
-static void aes256_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void aes256_encrypt_native(const void *ctx, size_t length,
                                   uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes256_ctx *keys = ctx;
+    const struct aes256_ctx *keys = ctx;
     aes256_encrypt(&keys[0], length, dst, src);
 }
 
-static void aes256_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
-                               uint8_t *dst, const uint8_t *src)
+static void aes256_decrypt_native(const void *ctx, size_t length,
+                                  uint8_t *dst, const uint8_t *src)
 {
-    CONST_CTX struct aes256_ctx *keys = ctx;
+    const struct aes256_ctx *keys = ctx;
     aes256_decrypt(&keys[1], length, dst, src);
 }
 
@@ -404,13 +363,13 @@ typedef struct QCryptoNettleCAST128 {
     struct cast128_ctx key, key_xts;
 } QCryptoNettleCAST128;
 
-static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void cast128_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     cast128_encrypt(ctx, length, dst, src);
 }
 
-static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void cast128_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     cast128_decrypt(ctx, length, dst, src);
@@ -428,13 +387,13 @@ typedef struct QCryptoNettleSerpent {
 } QCryptoNettleSerpent;
 
 
-static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void serpent_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     serpent_encrypt(ctx, length, dst, src);
 }
 
-static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void serpent_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     serpent_decrypt(ctx, length, dst, src);
@@ -451,13 +410,13 @@ typedef struct QCryptoNettleTwofish {
     struct twofish_ctx key, key_xts;
 } QCryptoNettleTwofish;
 
-static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void twofish_encrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     twofish_encrypt(ctx, length, dst, src);
 }
 
-static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+static void twofish_decrypt_native(const void *ctx, size_t length,
                                    uint8_t *dst, const uint8_t *src)
 {
     twofish_decrypt(ctx, length, dst, src);
@@ -472,7 +431,7 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
                              QCryptoCipherMode mode)
 {
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
     case QCRYPTO_CIPHER_ALG_3DES:
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
@@ -521,32 +480,28 @@ static QCryptoCipher *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
     }
 
     switch (alg) {
-    case QCRYPTO_CIPHER_ALG_DES_RFB:
+    case QCRYPTO_CIPHER_ALG_DES:
         {
-            QCryptoNettleDESRFB *ctx;
+            QCryptoNettleDES *ctx;
             const QCryptoCipherDriver *drv;
-            uint8_t *rfbkey;
 
             switch (mode) {
             case QCRYPTO_CIPHER_MODE_ECB:
-                drv = &qcrypto_nettle_des_rfb_driver_ecb;
+                drv = &qcrypto_nettle_des_driver_ecb;
                 break;
             case QCRYPTO_CIPHER_MODE_CBC:
-                drv = &qcrypto_nettle_des_rfb_driver_cbc;
+                drv = &qcrypto_nettle_des_driver_cbc;
                 break;
             case QCRYPTO_CIPHER_MODE_CTR:
-                drv = &qcrypto_nettle_des_rfb_driver_ctr;
+                drv = &qcrypto_nettle_des_driver_ctr;
                 break;
             default:
                 goto bad_cipher_mode;
             }
 
-            ctx = g_new0(QCryptoNettleDESRFB, 1);
+            ctx = g_new0(QCryptoNettleDES, 1);
             ctx->base.driver = drv;
-
-            rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
-            des_set_key(&ctx->key, rfbkey);
-            g_free(rfbkey);
+            des_set_key(&ctx->key, key);
 
             return &ctx->base;
         }
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 068b2fb867c..74b09a5b261 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -29,7 +29,7 @@ static const size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
     [QCRYPTO_CIPHER_ALG_AES_128] = 16,
     [QCRYPTO_CIPHER_ALG_AES_192] = 24,
     [QCRYPTO_CIPHER_ALG_AES_256] = 32,
-    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_DES] = 8,
     [QCRYPTO_CIPHER_ALG_3DES] = 24,
     [QCRYPTO_CIPHER_ALG_CAST5_128] = 16,
     [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
@@ -44,7 +44,7 @@ static const size_t alg_block_len[QCRYPTO_CIPHER_ALG__MAX] = {
     [QCRYPTO_CIPHER_ALG_AES_128] = 16,
     [QCRYPTO_CIPHER_ALG_AES_192] = 16,
     [QCRYPTO_CIPHER_ALG_AES_256] = 16,
-    [QCRYPTO_CIPHER_ALG_DES_RFB] = 8,
+    [QCRYPTO_CIPHER_ALG_DES] = 8,
     [QCRYPTO_CIPHER_ALG_3DES] = 8,
     [QCRYPTO_CIPHER_ALG_CAST5_128] = 8,
     [QCRYPTO_CIPHER_ALG_SERPENT_128] = 16,
@@ -107,9 +107,9 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
     }
 
     if (mode == QCRYPTO_CIPHER_MODE_XTS) {
-        if (alg == QCRYPTO_CIPHER_ALG_DES_RFB
-                || alg == QCRYPTO_CIPHER_ALG_3DES) {
-            error_setg(errp, "XTS mode not compatible with DES-RFB/3DES");
+        if (alg == QCRYPTO_CIPHER_ALG_DES ||
+            alg == QCRYPTO_CIPHER_ALG_3DES) {
+            error_setg(errp, "XTS mode not compatible with DES/3DES");
             return false;
         }
         if (nkey % 2) {
@@ -132,28 +132,12 @@ qcrypto_cipher_validate_key_length(QCryptoCipherAlgorithm alg,
     return true;
 }
 
-#if defined(CONFIG_GCRYPT) || defined(CONFIG_NETTLE)
-static uint8_t *
-qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
-                                 size_t nkey)
-{
-    uint8_t *ret = g_new0(uint8_t, nkey);
-    size_t i;
-    for (i = 0; i < nkey; i++) {
-        uint8_t r = key[i];
-        r = (r & 0xf0) >> 4 | (r & 0x0f) << 4;
-        r = (r & 0xcc) >> 2 | (r & 0x33) << 2;
-        r = (r & 0xaa) >> 1 | (r & 0x55) << 1;
-        ret[i] = r;
-    }
-    return ret;
-}
-#endif /* CONFIG_GCRYPT || CONFIG_NETTLE */
-
 #ifdef CONFIG_GCRYPT
 #include "cipher-gcrypt.c.inc"
 #elif defined CONFIG_NETTLE
 #include "cipher-nettle.c.inc"
+#elif defined CONFIG_GNUTLS_CRYPTO
+#include "cipher-gnutls.c.inc"
 #else
 #include "cipher-builtin.c.inc"
 #endif
diff --git a/crypto/desrfb.c b/crypto/desrfb.c
deleted file mode 100644
index b2a105ebbcb..00000000000
--- a/crypto/desrfb.c
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * This is D3DES (V5.09) by Richard Outerbridge with the double and
- * triple-length support removed for use in VNC.  Also the bytebit[] array
- * has been reversed so that the most significant bit in each byte of the
- * key is ignored, not the least significant.
- *
- * These changes are:
- *  Copyright (C) 1999 AT&T Laboratories Cambridge.  All Rights Reserved.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-/* D3DES (V5.09) -
- *
- * A portable, public domain, version of the Data Encryption Standard.
- *
- * Written with Symantec's THINK (Lightspeed) C by Richard Outerbridge.
- * Thanks to: Dan Hoey for his excellent Initial and Inverse permutation
- * code;  Jim Gillogly & Phil Karn for the DES key schedule code; Dennis
- * Ferguson, Eric Young and Dana How for comparing notes; and Ray Lau,
- * for humouring me on.
- *
- * Copyright (c) 1988,1989,1990,1991,1992 by Richard Outerbridge.
- * (GEnie : OUTER; CIS : [71755,204]) Graven Imagery, 1992.
- */
-
-#include "qemu/osdep.h"
-#include "crypto/desrfb.h"
-
-static void scrunch(unsigned char *, unsigned long *);
-static void unscrun(unsigned long *, unsigned char *);
-static void desfunc(unsigned long *, unsigned long *);
-static void cookey(unsigned long *);
-
-static unsigned long KnL[32] = { 0L };
-
-static const unsigned short bytebit[8]	= {
-        01, 02, 04, 010, 020, 040, 0100, 0200 };
-
-static const unsigned long bigbyte[24] = {
-        0x800000L,	0x400000L,	0x200000L,	0x100000L,
-        0x80000L,	0x40000L,	0x20000L,	0x10000L,
-        0x8000L,	0x4000L,	0x2000L,	0x1000L,
-        0x800L, 	0x400L, 	0x200L, 	0x100L,
-        0x80L,		0x40L,		0x20L,		0x10L,
-        0x8L,		0x4L,		0x2L,		0x1L	};
-
-/* Use the key schedule specified in the Standard (ANSI X3.92-1981). */
-
-static const unsigned char pc1[56] = {
-        56, 48, 40, 32, 24, 16,  8,	 0, 57, 49, 41, 33, 25, 17,
-         9,  1, 58, 50, 42, 34, 26,	18, 10,  2, 59, 51, 43, 35,
-        62, 54, 46, 38, 30, 22, 14,	 6, 61, 53, 45, 37, 29, 21,
-        13,  5, 60, 52, 44, 36, 28,	20, 12,  4, 27, 19, 11,  3 };
-
-static const unsigned char totrot[16] = {
-        1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28 };
-
-static const unsigned char pc2[48] = {
-        13, 16, 10, 23,  0,  4,  2, 27, 14,  5, 20,  9,
-        22, 18, 11,  3, 25,  7, 15,  6, 26, 19, 12,  1,
-        40, 51, 30, 36, 46, 54, 29, 39, 50, 44, 32, 47,
-        43, 48, 38, 55, 33, 52, 45, 41, 49, 35, 28, 31 };
-
-/* Thanks to James Gillogly & Phil Karn! */
-void deskey(unsigned char *key, int edf)
-{
-        register int i, j, l, m, n;
-        unsigned char pc1m[56], pcr[56];
-        unsigned long kn[32];
-
-        for ( j = 0; j < 56; j++ ) {
-                l = pc1[j];
-                m = l & 07;
-                pc1m[j] = (key[l >> 3] & bytebit[m]) ? 1 : 0;
-                }
-        for( i = 0; i < 16; i++ ) {
-                if( edf == DE1 ) m = (15 - i) << 1;
-                else m = i << 1;
-                n = m + 1;
-                kn[m] = kn[n] = 0L;
-                for( j = 0; j < 28; j++ ) {
-                        l = j + totrot[i];
-                        if( l < 28 ) pcr[j] = pc1m[l];
-                        else pcr[j] = pc1m[l - 28];
-                        }
-                for( j = 28; j < 56; j++ ) {
-                    l = j + totrot[i];
-                    if( l < 56 ) pcr[j] = pc1m[l];
-                    else pcr[j] = pc1m[l - 28];
-                    }
-                for( j = 0; j < 24; j++ ) {
-                        if( pcr[pc2[j]] ) kn[m] |= bigbyte[j];
-                        if( pcr[pc2[j + 24]] ) kn[n] |= bigbyte[j];
-                        }
-                }
-        cookey(kn);
-        return;
-        }
-
-static void cookey(register unsigned long *raw1)
-{
-        register unsigned long *cook, *raw0;
-        unsigned long dough[32];
-        register int i;
-
-        cook = dough;
-        for( i = 0; i < 16; i++, raw1++ ) {
-                raw0 = raw1++;
-                *cook	 = (*raw0 & 0x00fc0000L) << 6;
-                *cook	|= (*raw0 & 0x00000fc0L) << 10;
-                *cook	|= (*raw1 & 0x00fc0000L) >> 10;
-                *cook++ |= (*raw1 & 0x00000fc0L) >> 6;
-                *cook	 = (*raw0 & 0x0003f000L) << 12;
-                *cook	|= (*raw0 & 0x0000003fL) << 16;
-                *cook	|= (*raw1 & 0x0003f000L) >> 4;
-                *cook++ |= (*raw1 & 0x0000003fL);
-                }
-        usekey(dough);
-        return;
-        }
-
-void usekey(register unsigned long *from)
-{
-        register unsigned long *to, *endp;
-
-        to = KnL, endp = &KnL[32];
-        while( to < endp ) *to++ = *from++;
-        return;
-        }
-
-void des(unsigned char *inblock, unsigned char *outblock)
-{
-        unsigned long work[2];
-
-        scrunch(inblock, work);
-        desfunc(work, KnL);
-        unscrun(work, outblock);
-        return;
-        }
-
-static void scrunch(register unsigned char *outof, register unsigned long *into)
-{
-        *into	 = (*outof++ & 0xffL) << 24;
-        *into	|= (*outof++ & 0xffL) << 16;
-        *into	|= (*outof++ & 0xffL) << 8;
-        *into++ |= (*outof++ & 0xffL);
-        *into	 = (*outof++ & 0xffL) << 24;
-        *into	|= (*outof++ & 0xffL) << 16;
-        *into	|= (*outof++ & 0xffL) << 8;
-        *into	|= (*outof   & 0xffL);
-        return;
-        }
-
-static void unscrun(register unsigned long *outof, register unsigned char *into)
-{
-        *into++ = (unsigned char)((*outof >> 24) & 0xffL);
-        *into++ = (unsigned char)((*outof >> 16) & 0xffL);
-        *into++ = (unsigned char)((*outof >>  8) & 0xffL);
-        *into++ = (unsigned char)(*outof++	 & 0xffL);
-        *into++ = (unsigned char)((*outof >> 24) & 0xffL);
-        *into++ = (unsigned char)((*outof >> 16) & 0xffL);
-        *into++ = (unsigned char)((*outof >>  8) & 0xffL);
-        *into	=  (unsigned char)(*outof	 & 0xffL);
-        return;
-        }
-
-static const unsigned long SP1[64] = {
-        0x01010400L, 0x00000000L, 0x00010000L, 0x01010404L,
-        0x01010004L, 0x00010404L, 0x00000004L, 0x00010000L,
-        0x00000400L, 0x01010400L, 0x01010404L, 0x00000400L,
-        0x01000404L, 0x01010004L, 0x01000000L, 0x00000004L,
-        0x00000404L, 0x01000400L, 0x01000400L, 0x00010400L,
-        0x00010400L, 0x01010000L, 0x01010000L, 0x01000404L,
-        0x00010004L, 0x01000004L, 0x01000004L, 0x00010004L,
-        0x00000000L, 0x00000404L, 0x00010404L, 0x01000000L,
-        0x00010000L, 0x01010404L, 0x00000004L, 0x01010000L,
-        0x01010400L, 0x01000000L, 0x01000000L, 0x00000400L,
-        0x01010004L, 0x00010000L, 0x00010400L, 0x01000004L,
-        0x00000400L, 0x00000004L, 0x01000404L, 0x00010404L,
-        0x01010404L, 0x00010004L, 0x01010000L, 0x01000404L,
-        0x01000004L, 0x00000404L, 0x00010404L, 0x01010400L,
-        0x00000404L, 0x01000400L, 0x01000400L, 0x00000000L,
-        0x00010004L, 0x00010400L, 0x00000000L, 0x01010004L };
-
-static const unsigned long SP2[64] = {
-        0x80108020L, 0x80008000L, 0x00008000L, 0x00108020L,
-        0x00100000L, 0x00000020L, 0x80100020L, 0x80008020L,
-        0x80000020L, 0x80108020L, 0x80108000L, 0x80000000L,
-        0x80008000L, 0x00100000L, 0x00000020L, 0x80100020L,
-        0x00108000L, 0x00100020L, 0x80008020L, 0x00000000L,
-        0x80000000L, 0x00008000L, 0x00108020L, 0x80100000L,
-        0x00100020L, 0x80000020L, 0x00000000L, 0x00108000L,
-        0x00008020L, 0x80108000L, 0x80100000L, 0x00008020L,
-        0x00000000L, 0x00108020L, 0x80100020L, 0x00100000L,
-        0x80008020L, 0x80100000L, 0x80108000L, 0x00008000L,
-        0x80100000L, 0x80008000L, 0x00000020L, 0x80108020L,
-        0x00108020L, 0x00000020L, 0x00008000L, 0x80000000L,
-        0x00008020L, 0x80108000L, 0x00100000L, 0x80000020L,
-        0x00100020L, 0x80008020L, 0x80000020L, 0x00100020L,
-        0x00108000L, 0x00000000L, 0x80008000L, 0x00008020L,
-        0x80000000L, 0x80100020L, 0x80108020L, 0x00108000L };
-
-static const unsigned long SP3[64] = {
-        0x00000208L, 0x08020200L, 0x00000000L, 0x08020008L,
-        0x08000200L, 0x00000000L, 0x00020208L, 0x08000200L,
-        0x00020008L, 0x08000008L, 0x08000008L, 0x00020000L,
-        0x08020208L, 0x00020008L, 0x08020000L, 0x00000208L,
-        0x08000000L, 0x00000008L, 0x08020200L, 0x00000200L,
-        0x00020200L, 0x08020000L, 0x08020008L, 0x00020208L,
-        0x08000208L, 0x00020200L, 0x00020000L, 0x08000208L,
-        0x00000008L, 0x08020208L, 0x00000200L, 0x08000000L,
-        0x08020200L, 0x08000000L, 0x00020008L, 0x00000208L,
-        0x00020000L, 0x08020200L, 0x08000200L, 0x00000000L,
-        0x00000200L, 0x00020008L, 0x08020208L, 0x08000200L,
-        0x08000008L, 0x00000200L, 0x00000000L, 0x08020008L,
-        0x08000208L, 0x00020000L, 0x08000000L, 0x08020208L,
-        0x00000008L, 0x00020208L, 0x00020200L, 0x08000008L,
-        0x08020000L, 0x08000208L, 0x00000208L, 0x08020000L,
-        0x00020208L, 0x00000008L, 0x08020008L, 0x00020200L };
-
-static const unsigned long SP4[64] = {
-        0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L,
-        0x00802080L, 0x00800081L, 0x00800001L, 0x00002001L,
-        0x00000000L, 0x00802000L, 0x00802000L, 0x00802081L,
-        0x00000081L, 0x00000000L, 0x00800080L, 0x00800001L,
-        0x00000001L, 0x00002000L, 0x00800000L, 0x00802001L,
-        0x00000080L, 0x00800000L, 0x00002001L, 0x00002080L,
-        0x00800081L, 0x00000001L, 0x00002080L, 0x00800080L,
-        0x00002000L, 0x00802080L, 0x00802081L, 0x00000081L,
-        0x00800080L, 0x00800001L, 0x00802000L, 0x00802081L,
-        0x00000081L, 0x00000000L, 0x00000000L, 0x00802000L,
-        0x00002080L, 0x00800080L, 0x00800081L, 0x00000001L,
-        0x00802001L, 0x00002081L, 0x00002081L, 0x00000080L,
-        0x00802081L, 0x00000081L, 0x00000001L, 0x00002000L,
-        0x00800001L, 0x00002001L, 0x00802080L, 0x00800081L,
-        0x00002001L, 0x00002080L, 0x00800000L, 0x00802001L,
-        0x00000080L, 0x00800000L, 0x00002000L, 0x00802080L };
-
-static const unsigned long SP5[64] = {
-        0x00000100L, 0x02080100L, 0x02080000L, 0x42000100L,
-        0x00080000L, 0x00000100L, 0x40000000L, 0x02080000L,
-        0x40080100L, 0x00080000L, 0x02000100L, 0x40080100L,
-        0x42000100L, 0x42080000L, 0x00080100L, 0x40000000L,
-        0x02000000L, 0x40080000L, 0x40080000L, 0x00000000L,
-        0x40000100L, 0x42080100L, 0x42080100L, 0x02000100L,
-        0x42080000L, 0x40000100L, 0x00000000L, 0x42000000L,
-        0x02080100L, 0x02000000L, 0x42000000L, 0x00080100L,
-        0x00080000L, 0x42000100L, 0x00000100L, 0x02000000L,
-        0x40000000L, 0x02080000L, 0x42000100L, 0x40080100L,
-        0x02000100L, 0x40000000L, 0x42080000L, 0x02080100L,
-        0x40080100L, 0x00000100L, 0x02000000L, 0x42080000L,
-        0x42080100L, 0x00080100L, 0x42000000L, 0x42080100L,
-        0x02080000L, 0x00000000L, 0x40080000L, 0x42000000L,
-        0x00080100L, 0x02000100L, 0x40000100L, 0x00080000L,
-        0x00000000L, 0x40080000L, 0x02080100L, 0x40000100L };
-
-static const unsigned long SP6[64] = {
-        0x20000010L, 0x20400000L, 0x00004000L, 0x20404010L,
-        0x20400000L, 0x00000010L, 0x20404010L, 0x00400000L,
-        0x20004000L, 0x00404010L, 0x00400000L, 0x20000010L,
-        0x00400010L, 0x20004000L, 0x20000000L, 0x00004010L,
-        0x00000000L, 0x00400010L, 0x20004010L, 0x00004000L,
-        0x00404000L, 0x20004010L, 0x00000010L, 0x20400010L,
-        0x20400010L, 0x00000000L, 0x00404010L, 0x20404000L,
-        0x00004010L, 0x00404000L, 0x20404000L, 0x20000000L,
-        0x20004000L, 0x00000010L, 0x20400010L, 0x00404000L,
-        0x20404010L, 0x00400000L, 0x00004010L, 0x20000010L,
-        0x00400000L, 0x20004000L, 0x20000000L, 0x00004010L,
-        0x20000010L, 0x20404010L, 0x00404000L, 0x20400000L,
-        0x00404010L, 0x20404000L, 0x00000000L, 0x20400010L,
-        0x00000010L, 0x00004000L, 0x20400000L, 0x00404010L,
-        0x00004000L, 0x00400010L, 0x20004010L, 0x00000000L,
-        0x20404000L, 0x20000000L, 0x00400010L, 0x20004010L };
-
-static const unsigned long SP7[64] = {
-        0x00200000L, 0x04200002L, 0x04000802L, 0x00000000L,
-        0x00000800L, 0x04000802L, 0x00200802L, 0x04200800L,
-        0x04200802L, 0x00200000L, 0x00000000L, 0x04000002L,
-        0x00000002L, 0x04000000L, 0x04200002L, 0x00000802L,
-        0x04000800L, 0x00200802L, 0x00200002L, 0x04000800L,
-        0x04000002L, 0x04200000L, 0x04200800L, 0x00200002L,
-        0x04200000L, 0x00000800L, 0x00000802L, 0x04200802L,
-        0x00200800L, 0x00000002L, 0x04000000L, 0x00200800L,
-        0x04000000L, 0x00200800L, 0x00200000L, 0x04000802L,
-        0x04000802L, 0x04200002L, 0x04200002L, 0x00000002L,
-        0x00200002L, 0x04000000L, 0x04000800L, 0x00200000L,
-        0x04200800L, 0x00000802L, 0x00200802L, 0x04200800L,
-        0x00000802L, 0x04000002L, 0x04200802L, 0x04200000L,
-        0x00200800L, 0x00000000L, 0x00000002L, 0x04200802L,
-        0x00000000L, 0x00200802L, 0x04200000L, 0x00000800L,
-        0x04000002L, 0x04000800L, 0x00000800L, 0x00200002L };
-
-static const unsigned long SP8[64] = {
-        0x10001040L, 0x00001000L, 0x00040000L, 0x10041040L,
-        0x10000000L, 0x10001040L, 0x00000040L, 0x10000000L,
-        0x00040040L, 0x10040000L, 0x10041040L, 0x00041000L,
-        0x10041000L, 0x00041040L, 0x00001000L, 0x00000040L,
-        0x10040000L, 0x10000040L, 0x10001000L, 0x00001040L,
-        0x00041000L, 0x00040040L, 0x10040040L, 0x10041000L,
-        0x00001040L, 0x00000000L, 0x00000000L, 0x10040040L,
-        0x10000040L, 0x10001000L, 0x00041040L, 0x00040000L,
-        0x00041040L, 0x00040000L, 0x10041000L, 0x00001000L,
-        0x00000040L, 0x10040040L, 0x00001000L, 0x00041040L,
-        0x10001000L, 0x00000040L, 0x10000040L, 0x10040000L,
-        0x10040040L, 0x10000000L, 0x00040000L, 0x10001040L,
-        0x00000000L, 0x10041040L, 0x00040040L, 0x10000040L,
-        0x10040000L, 0x10001000L, 0x10001040L, 0x00000000L,
-        0x10041040L, 0x00041000L, 0x00041000L, 0x00001040L,
-        0x00001040L, 0x00040040L, 0x10000000L, 0x10041000L };
-
-static void desfunc(register unsigned long *block, register unsigned long *keys)
-{
-        register unsigned long fval, work, right, leftt;
-        register int round;
-
-        leftt = block[0];
-        right = block[1];
-        work = ((leftt >> 4) ^ right) & 0x0f0f0f0fL;
-        right ^= work;
-        leftt ^= (work << 4);
-        work = ((leftt >> 16) ^ right) & 0x0000ffffL;
-        right ^= work;
-        leftt ^= (work << 16);
-        work = ((right >> 2) ^ leftt) & 0x33333333L;
-        leftt ^= work;
-        right ^= (work << 2);
-        work = ((right >> 8) ^ leftt) & 0x00ff00ffL;
-        leftt ^= work;
-        right ^= (work << 8);
-        right = ((right << 1) | ((right >> 31) & 1L)) & 0xffffffffL;
-        work = (leftt ^ right) & 0xaaaaaaaaL;
-        leftt ^= work;
-        right ^= work;
-        leftt = ((leftt << 1) | ((leftt >> 31) & 1L)) & 0xffffffffL;
-
-        for( round = 0; round < 8; round++ ) {
-                work  = (right << 28) | (right >> 4);
-                work ^= *keys++;
-                fval  = SP7[ work		 & 0x3fL];
-                fval |= SP5[(work >>  8) & 0x3fL];
-                fval |= SP3[(work >> 16) & 0x3fL];
-                fval |= SP1[(work >> 24) & 0x3fL];
-                work  = right ^ *keys++;
-                fval |= SP8[ work		 & 0x3fL];
-                fval |= SP6[(work >>  8) & 0x3fL];
-                fval |= SP4[(work >> 16) & 0x3fL];
-                fval |= SP2[(work >> 24) & 0x3fL];
-                leftt ^= fval;
-                work  = (leftt << 28) | (leftt >> 4);
-                work ^= *keys++;
-                fval  = SP7[ work		 & 0x3fL];
-                fval |= SP5[(work >>  8) & 0x3fL];
-                fval |= SP3[(work >> 16) & 0x3fL];
-                fval |= SP1[(work >> 24) & 0x3fL];
-                work  = leftt ^ *keys++;
-                fval |= SP8[ work		 & 0x3fL];
-                fval |= SP6[(work >>  8) & 0x3fL];
-                fval |= SP4[(work >> 16) & 0x3fL];
-                fval |= SP2[(work >> 24) & 0x3fL];
-                right ^= fval;
-                }
-
-        right = (right << 31) | (right >> 1);
-        work = (leftt ^ right) & 0xaaaaaaaaL;
-        leftt ^= work;
-        right ^= work;
-        leftt = (leftt << 31) | (leftt >> 1);
-        work = ((leftt >> 8) ^ right) & 0x00ff00ffL;
-        right ^= work;
-        leftt ^= (work << 8);
-        work = ((leftt >> 2) ^ right) & 0x33333333L;
-        right ^= work;
-        leftt ^= (work << 2);
-        work = ((right >> 16) ^ leftt) & 0x0000ffffL;
-        leftt ^= work;
-        right ^= (work << 16);
-        work = ((right >> 4) ^ leftt) & 0x0f0f0f0fL;
-        leftt ^= work;
-        right ^= (work << 4);
-        *block++ = right;
-        *block = leftt;
-        return;
-        }
-
-/* Validation sets:
- *
- * Single-length key, single-length plaintext -
- * Key	  : 0123 4567 89ab cdef
- * Plain  : 0123 4567 89ab cde7
- * Cipher : c957 4425 6a5e d31d
- *
- * Double-length key, single-length plaintext -
- * Key	  : 0123 4567 89ab cdef fedc ba98 7654 3210
- * Plain  : 0123 4567 89ab cde7
- * Cipher : 7f1d 0a77 826b 8aff
- *
- * Double-length key, double-length plaintext -
- * Key	  : 0123 4567 89ab cdef fedc ba98 7654 3210
- * Plain  : 0123 4567 89ab cdef 0123 4567 89ab cdff
- * Cipher : 27a0 8440 406a df60 278f 47cf 42d6 15d7
- *
- * Triple-length key, single-length plaintext -
- * Key	  : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567
- * Plain  : 0123 4567 89ab cde7
- * Cipher : de0b 7c06 ae5e 0ed5
- *
- * Triple-length key, double-length plaintext -
- * Key	  : 0123 4567 89ab cdef fedc ba98 7654 3210 89ab cdef 0123 4567
- * Plain  : 0123 4567 89ab cdef 0123 4567 89ab cdff
- * Cipher : ad0d 1b30 ac17 cf07 0ed1 1c63 81e4 4de5
- *
- * d3des V5.0a rwo 9208.07 18:44 Graven Imagery
- **********************************************************************/
diff --git a/crypto/hash-gnutls.c b/crypto/hash-gnutls.c
new file mode 100644
index 00000000000..17911ac5d1c
--- /dev/null
+++ b/crypto/hash-gnutls.c
@@ -0,0 +1,104 @@
+/*
+ * QEMU Crypto hash algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+#include "qapi/error.h"
+#include "crypto/hash.h"
+#include "hashpriv.h"
+
+
+static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224,
+    [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384,
+    [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160,
+};
+
+gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
+{
+    size_t i;
+    const gnutls_digest_algorithm_t *algs;
+    if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
+        qcrypto_hash_alg_map[alg] == GNUTLS_DIG_UNKNOWN) {
+        return false;
+    }
+    algs = gnutls_digest_list();
+    for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) {
+        if (algs[i] == qcrypto_hash_alg_map[alg]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+
+static int
+qcrypto_gnutls_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
+{
+    int i, ret;
+    gnutls_hash_hd_t hash;
+
+    if (!qcrypto_hash_supports(alg)) {
+        error_setg(errp,
+                   "Unknown hash algorithm %d",
+                   alg);
+        return -1;
+    }
+
+    ret = gnutls_hash_get_len(qcrypto_hash_alg_map[alg]);
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp,
+                   "Result buffer size %zu is smaller than hash %d",
+                   *resultlen, ret);
+        return -1;
+    }
+
+    ret = gnutls_hash_init(&hash, qcrypto_hash_alg_map[alg]);
+    if (ret < 0) {
+        error_setg(errp,
+                   "Unable to initialize hash algorithm: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    for (i = 0; i < niov; i++) {
+        gnutls_hash(hash, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    gnutls_hash_deinit(hash, *result);
+    return 0;
+}
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_gnutls_hash_bytesv,
+};
diff --git a/crypto/hash-nettle.c b/crypto/hash-nettle.c
index 2a6ee7c7d53..1ca1a410628 100644
--- a/crypto/hash-nettle.c
+++ b/crypto/hash-nettle.c
@@ -26,18 +26,12 @@
 #include <nettle/sha.h>
 #include <nettle/ripemd160.h>
 
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef unsigned int     hash_length_t;
-#else
-typedef size_t       hash_length_t;
-#endif
-
 typedef void (*qcrypto_nettle_init)(void *ctx);
 typedef void (*qcrypto_nettle_write)(void *ctx,
-                                     hash_length_t len,
+                                     size_t len,
                                      const uint8_t *buf);
 typedef void (*qcrypto_nettle_result)(void *ctx,
-                                      hash_length_t len,
+                                      size_t len,
                                       uint8_t *buf);
 
 union qcrypto_hash_ctx {
diff --git a/crypto/hmac-gnutls.c b/crypto/hmac-gnutls.c
new file mode 100644
index 00000000000..24db383322c
--- /dev/null
+++ b/crypto/hmac-gnutls.c
@@ -0,0 +1,139 @@
+/*
+ * QEMU Crypto hmac algorithms
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Derived from hmac-gcrypt.c:
+ *
+ *   Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+
+#include "qapi/error.h"
+#include "crypto/hmac.h"
+#include "hmacpriv.h"
+
+static int qcrypto_hmac_alg_map[QCRYPTO_HASH_ALG__MAX] = {
+    [QCRYPTO_HASH_ALG_MD5] = GNUTLS_MAC_MD5,
+    [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_MAC_SHA1,
+    [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_MAC_SHA224,
+    [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_MAC_SHA256,
+    [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_MAC_SHA384,
+    [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_MAC_SHA512,
+    [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_MAC_RMD160,
+};
+
+typedef struct QCryptoHmacGnutls QCryptoHmacGnutls;
+struct QCryptoHmacGnutls {
+    gnutls_hmac_hd_t handle;
+};
+
+bool qcrypto_hmac_supports(QCryptoHashAlgorithm alg)
+{
+    size_t i;
+    const gnutls_digest_algorithm_t *algs;
+    if (alg >= G_N_ELEMENTS(qcrypto_hmac_alg_map) ||
+        qcrypto_hmac_alg_map[alg] == GNUTLS_DIG_UNKNOWN) {
+        return false;
+    }
+    algs = gnutls_digest_list();
+    for (i = 0; algs[i] != GNUTLS_DIG_UNKNOWN; i++) {
+        if (algs[i] == qcrypto_hmac_alg_map[alg]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void *qcrypto_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
+{
+    QCryptoHmacGnutls *ctx;
+    int err;
+
+    if (!qcrypto_hmac_supports(alg)) {
+        error_setg(errp, "Unsupported hmac algorithm %s",
+                   QCryptoHashAlgorithm_str(alg));
+        return NULL;
+    }
+
+    ctx = g_new0(QCryptoHmacGnutls, 1);
+
+    err = gnutls_hmac_init(&ctx->handle,
+                           qcrypto_hmac_alg_map[alg],
+                           (const void *)key, nkey);
+    if (err != 0) {
+        error_setg(errp, "Cannot initialize hmac: %s",
+                   gnutls_strerror(err));
+        goto error;
+    }
+
+    return ctx;
+
+error:
+    g_free(ctx);
+    return NULL;
+}
+
+static void
+qcrypto_gnutls_hmac_ctx_free(QCryptoHmac *hmac)
+{
+    QCryptoHmacGnutls *ctx;
+
+    ctx = hmac->opaque;
+    gnutls_hmac_deinit(ctx->handle, NULL);
+
+    g_free(ctx);
+}
+
+static int
+qcrypto_gnutls_hmac_bytesv(QCryptoHmac *hmac,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
+{
+    QCryptoHmacGnutls *ctx;
+    uint32_t ret;
+    int i;
+
+    ctx = hmac->opaque;
+
+    for (i = 0; i < niov; i++) {
+        gnutls_hmac(ctx->handle, iov[i].iov_base, iov[i].iov_len);
+    }
+
+    ret = gnutls_hmac_get_len(qcrypto_hmac_alg_map[hmac->alg]);
+    if (ret <= 0) {
+        error_setg(errp, "Unable to get hmac length: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    if (*resultlen == 0) {
+        *resultlen = ret;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != ret) {
+        error_setg(errp, "Result buffer size %zu is smaller than hmac %d",
+                   *resultlen, ret);
+        return -1;
+    }
+
+    gnutls_hmac_output(ctx->handle, *result);
+
+    return 0;
+}
+
+QCryptoHmacDriver qcrypto_hmac_lib_driver = {
+    .hmac_bytesv = qcrypto_gnutls_hmac_bytesv,
+    .hmac_free = qcrypto_gnutls_hmac_ctx_free,
+};
diff --git a/crypto/hmac-nettle.c b/crypto/hmac-nettle.c
index 1152b741fdc..1ad6c4f2530 100644
--- a/crypto/hmac-nettle.c
+++ b/crypto/hmac-nettle.c
@@ -18,22 +18,16 @@
 #include "hmacpriv.h"
 #include <nettle/hmac.h>
 
-#if CONFIG_NETTLE_VERSION_MAJOR < 3
-typedef unsigned int hmac_length_t;
-#else
-typedef size_t hmac_length_t;
-#endif
-
 typedef void (*qcrypto_nettle_hmac_setkey)(void *ctx,
-                                           hmac_length_t key_length,
+                                           size_t key_length,
                                            const uint8_t *key);
 
 typedef void (*qcrypto_nettle_hmac_update)(void *ctx,
-                                           hmac_length_t length,
+                                           size_t length,
                                            const uint8_t *data);
 
 typedef void (*qcrypto_nettle_hmac_digest)(void *ctx,
-                                           hmac_length_t length,
+                                           size_t length,
                                            uint8_t *digest);
 
 typedef struct QCryptoHmacNettle QCryptoHmacNettle;
diff --git a/crypto/init.c b/crypto/init.c
index ea233b9192a..fb7f1bff105 100644
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -35,21 +35,6 @@
 #include "crypto/random.h"
 
 /* #define DEBUG_GNUTLS */
-
-/*
- * We need to init gcrypt threading if
- *
- *   - gcrypt < 1.6.0
- *
- */
-
-#if (defined(CONFIG_GCRYPT) &&                  \
-     (GCRYPT_VERSION_NUMBER < 0x010600))
-#define QCRYPTO_INIT_GCRYPT_THREADS
-#else
-#undef QCRYPTO_INIT_GCRYPT_THREADS
-#endif
-
 #ifdef DEBUG_GNUTLS
 static void qcrypto_gnutls_log(int level, const char *str)
 {
@@ -57,55 +42,8 @@ static void qcrypto_gnutls_log(int level, const char *str)
 }
 #endif
 
-#ifdef QCRYPTO_INIT_GCRYPT_THREADS
-static int qcrypto_gcrypt_mutex_init(void **priv)
-{                                                                             \
-    QemuMutex *lock = NULL;
-    lock = g_new0(QemuMutex, 1);
-    qemu_mutex_init(lock);
-    *priv = lock;
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_destroy(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_destroy(lock);
-    g_free(lock);
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_lock(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_lock(lock);
-    return 0;
-}
-
-static int qcrypto_gcrypt_mutex_unlock(void **priv)
-{
-    QemuMutex *lock = *priv;
-    qemu_mutex_unlock(lock);
-    return 0;
-}
-
-static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {
-    (GCRY_THREAD_OPTION_PTHREAD | (GCRY_THREAD_OPTION_VERSION << 8)),
-    NULL,
-    qcrypto_gcrypt_mutex_init,
-    qcrypto_gcrypt_mutex_destroy,
-    qcrypto_gcrypt_mutex_lock,
-    qcrypto_gcrypt_mutex_unlock,
-    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
-};
-#endif /* QCRYPTO_INIT_GCRYPT */
-
 int qcrypto_init(Error **errp)
 {
-#ifdef QCRYPTO_INIT_GCRYPT_THREADS
-    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
-#endif /* QCRYPTO_INIT_GCRYPT_THREADS */
-
 #ifdef CONFIG_GNUTLS
     int ret;
     ret = gnutls_global_init();
diff --git a/crypto/meson.build b/crypto/meson.build
index 7f37b5d3354..95a6a835049 100644
--- a/crypto/meson.build
+++ b/crypto/meson.build
@@ -5,7 +5,6 @@ crypto_ss.add(files(
   'block-qcow.c',
   'block.c',
   'cipher.c',
-  'desrfb.c',
   'hash.c',
   'hmac.c',
   'ivgen-essiv.c',
@@ -22,52 +21,36 @@ crypto_ss.add(files(
   'tlssession.c',
 ))
 
-if 'CONFIG_NETTLE' in config_host
-  crypto_ss.add(files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c'))
-elif 'CONFIG_GCRYPT' in config_host
-  crypto_ss.add(files('hash-gcrypt.c', 'pbkdf-gcrypt.c'))
-  if 'CONFIG_GCRYPT_HMAC' in config_host
-    crypto_ss.add(files('hmac-gcrypt.c'))
-  else
-    crypto_ss.add(files('hmac-glib.c'))
+if nettle.found()
+  crypto_ss.add(nettle, files('hash-nettle.c', 'hmac-nettle.c', 'pbkdf-nettle.c'))
+  if xts == 'private'
+    crypto_ss.add(files('xts.c'))
   endif
+elif gcrypt.found()
+  crypto_ss.add(gcrypt, files('hash-gcrypt.c', 'hmac-gcrypt.c', 'pbkdf-gcrypt.c'))
+elif gnutls_crypto.found()
+  crypto_ss.add(gnutls, files('hash-gnutls.c', 'hmac-gnutls.c', 'pbkdf-gnutls.c'))
 else
   crypto_ss.add(files('hash-glib.c', 'hmac-glib.c', 'pbkdf-stub.c'))
 endif
 
 crypto_ss.add(when: 'CONFIG_SECRET_KEYRING', if_true: files('secret_keyring.c'))
-crypto_ss.add(when: 'CONFIG_QEMU_PRIVATE_XTS', if_true: files('xts.c'))
 crypto_ss.add(when: 'CONFIG_AF_ALG', if_true: files('afalg.c', 'cipher-afalg.c', 'hash-afalg.c'))
-crypto_ss.add(when: 'CONFIG_GNUTLS', if_true: files('tls-cipher-suites.c'))
-
-if 'CONFIG_NETTLE' in config_host
-  crypto_ss.add(nettle)
-elif 'CONFIG_GCRYPT' in config_host
-  crypto_ss.add(gcrypt)
-endif
-
-if 'CONFIG_GNUTLS' in config_host
-  crypto_ss.add(gnutls)
-endif
-
+crypto_ss.add(when: gnutls, if_true: files('tls-cipher-suites.c'))
 
 util_ss.add(files('aes.c'))
 util_ss.add(files('init.c'))
+if gnutls.found()
+  util_ss.add(gnutls)
+endif
 
-if 'CONFIG_GCRYPT' in config_host
-  util_ss.add(files('random-gcrypt.c'))
-elif 'CONFIG_GNUTLS' in config_host
-  util_ss.add(files('random-gnutls.c'))
+if gcrypt.found()
+  util_ss.add(gcrypt, files('random-gcrypt.c'))
+elif gnutls.found()
+  util_ss.add(gnutls, files('random-gnutls.c'))
 elif 'CONFIG_RNG_NONE' in config_host
   util_ss.add(files('random-none.c'))
 else
   util_ss.add(files('random-platform.c'))
 endif
 
-if 'CONFIG_GCRYPT' in config_host
-  util_ss.add(gcrypt)
-endif
-
-if 'CONFIG_GNUTLS' in config_host
-  util_ss.add(gnutls)
-endif
diff --git a/crypto/pbkdf-gnutls.c b/crypto/pbkdf-gnutls.c
new file mode 100644
index 00000000000..2dfbbd382c2
--- /dev/null
+++ b/crypto/pbkdf-gnutls.c
@@ -0,0 +1,90 @@
+/*
+ * QEMU Crypto PBKDF support (Password-Based Key Derivation Function)
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <gnutls/crypto.h>
+#include "qapi/error.h"
+#include "crypto/pbkdf.h"
+
+bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
+{
+    switch (hash) {
+    case QCRYPTO_HASH_ALG_MD5:
+    case QCRYPTO_HASH_ALG_SHA1:
+    case QCRYPTO_HASH_ALG_SHA224:
+    case QCRYPTO_HASH_ALG_SHA256:
+    case QCRYPTO_HASH_ALG_SHA384:
+    case QCRYPTO_HASH_ALG_SHA512:
+    case QCRYPTO_HASH_ALG_RIPEMD160:
+        return true;
+    default:
+        return false;
+    }
+}
+
+int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
+                   const uint8_t *key, size_t nkey,
+                   const uint8_t *salt, size_t nsalt,
+                   uint64_t iterations,
+                   uint8_t *out, size_t nout,
+                   Error **errp)
+{
+    static const int hash_map[QCRYPTO_HASH_ALG__MAX] = {
+        [QCRYPTO_HASH_ALG_MD5] = GNUTLS_DIG_MD5,
+        [QCRYPTO_HASH_ALG_SHA1] = GNUTLS_DIG_SHA1,
+        [QCRYPTO_HASH_ALG_SHA224] = GNUTLS_DIG_SHA224,
+        [QCRYPTO_HASH_ALG_SHA256] = GNUTLS_DIG_SHA256,
+        [QCRYPTO_HASH_ALG_SHA384] = GNUTLS_DIG_SHA384,
+        [QCRYPTO_HASH_ALG_SHA512] = GNUTLS_DIG_SHA512,
+        [QCRYPTO_HASH_ALG_RIPEMD160] = GNUTLS_DIG_RMD160,
+    };
+    int ret;
+    const gnutls_datum_t gkey = { (unsigned char *)key, nkey };
+    const gnutls_datum_t gsalt = { (unsigned char *)salt, nsalt };
+
+    if (iterations > ULONG_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu must be less than %lu",
+                         (long long unsigned)iterations, ULONG_MAX);
+        return -1;
+    }
+
+    if (hash >= G_N_ELEMENTS(hash_map) ||
+        hash_map[hash] == GNUTLS_DIG_UNKNOWN) {
+        error_setg_errno(errp, ENOSYS,
+                         "PBKDF does not support hash algorithm %s",
+                         QCryptoHashAlgorithm_str(hash));
+        return -1;
+    }
+
+    ret = gnutls_pbkdf2(hash_map[hash],
+                        &gkey,
+                        &gsalt,
+                        iterations,
+                        out,
+                        nout);
+    if (ret != 0) {
+        error_setg(errp, "Cannot derive password: %s",
+                   gnutls_strerror(ret));
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/crypto/tls-cipher-suites.c b/crypto/tls-cipher-suites.c
index 55fb5f7c19d..5e4f5974645 100644
--- a/crypto/tls-cipher-suites.c
+++ b/crypto/tls-cipher-suites.c
@@ -14,8 +14,15 @@
 #include "crypto/tlscreds.h"
 #include "crypto/tls-cipher-suites.h"
 #include "hw/nvram/fw_cfg.h"
+#include "tlscredspriv.h"
 #include "trace.h"
 
+struct QCryptoTLSCipherSuites {
+    /* <private> */
+    QCryptoTLSCreds parent_obj;
+    /* <public> */
+};
+
 /*
  * IANA registered TLS ciphers:
  * https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-4
diff --git a/crypto/tlscreds.c b/crypto/tlscreds.c
index b68735f06fe..084ce0d51ae 100644
--- a/crypto/tlscreds.c
+++ b/crypto/tlscreds.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qapi-types-crypto.h"
 #include "qemu/module.h"
 #include "tlscredspriv.h"
 #include "trace.h"
@@ -259,6 +260,17 @@ qcrypto_tls_creds_finalize(Object *obj)
     g_free(creds->priority);
 }
 
+bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds,
+                                      QCryptoTLSCredsEndpoint endpoint,
+                                      Error **errp)
+{
+    if (creds->endpoint != endpoint) {
+        error_setg(errp, "Expected TLS credentials for a %s endpoint",
+                   QCryptoTLSCredsEndpoint_str(endpoint));
+        return false;
+    }
+    return true;
+}
 
 static const TypeInfo qcrypto_tls_creds_info = {
     .parent = TYPE_OBJECT,
diff --git a/crypto/tlscredsanon.c b/crypto/tlscredsanon.c
index bea5f76c55d..6fb83639ecd 100644
--- a/crypto/tlscredsanon.c
+++ b/crypto/tlscredsanon.c
@@ -29,6 +29,8 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
+
 
 static int
 qcrypto_tls_creds_anon_load(QCryptoTLSCredsAnon *creds,
diff --git a/crypto/tlscredspriv.h b/crypto/tlscredspriv.h
index 39f1a91c413..df9815a2863 100644
--- a/crypto/tlscredspriv.h
+++ b/crypto/tlscredspriv.h
@@ -23,6 +23,51 @@
 
 #include "crypto/tlscreds.h"
 
+#ifdef CONFIG_GNUTLS
+#include <gnutls/gnutls.h>
+#endif
+
+struct QCryptoTLSCreds {
+    Object parent_obj;
+    char *dir;
+    QCryptoTLSCredsEndpoint endpoint;
+#ifdef CONFIG_GNUTLS
+    gnutls_dh_params_t dh_params;
+#endif
+    bool verifyPeer;
+    char *priority;
+};
+
+struct QCryptoTLSCredsAnon {
+    QCryptoTLSCreds parent_obj;
+#ifdef CONFIG_GNUTLS
+    union {
+        gnutls_anon_server_credentials_t server;
+        gnutls_anon_client_credentials_t client;
+    } data;
+#endif
+};
+
+struct QCryptoTLSCredsPSK {
+    QCryptoTLSCreds parent_obj;
+    char *username;
+#ifdef CONFIG_GNUTLS
+    union {
+        gnutls_psk_server_credentials_t server;
+        gnutls_psk_client_credentials_t client;
+    } data;
+#endif
+};
+
+struct QCryptoTLSCredsX509 {
+    QCryptoTLSCreds parent_obj;
+#ifdef CONFIG_GNUTLS
+    gnutls_certificate_credentials_t data;
+#endif
+    bool sanityCheck;
+    char *passwordid;
+};
+
 #ifdef CONFIG_GNUTLS
 
 int qcrypto_tls_creds_get_path(QCryptoTLSCreds *creds,
diff --git a/crypto/tlscredspsk.c b/crypto/tlscredspsk.c
index f5a31108d15..752f2d92bee 100644
--- a/crypto/tlscredspsk.c
+++ b/crypto/tlscredspsk.c
@@ -29,6 +29,8 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
+
 static int
 lookup_key(const char *pskfile, const char *username, gnutls_datum_t *key,
            Error **errp)
diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c
index bc503bab558..32948a6bdc4 100644
--- a/crypto/tlscredsx509.c
+++ b/crypto/tlscredsx509.c
@@ -30,6 +30,7 @@
 
 #ifdef CONFIG_GNUTLS
 
+#include <gnutls/gnutls.h>
 #include <gnutls/x509.h>
 
 
@@ -354,11 +355,9 @@ qcrypto_tls_creds_check_cert_pair(gnutls_x509_crt_t cert,
             reason = "The certificate has been revoked";
         }
 
-#ifndef GNUTLS_1_0_COMPAT
         if (status & GNUTLS_CERT_INSECURE_ALGORITHM) {
             reason = "The certificate uses an insecure algorithm";
         }
-#endif
 
         error_setg(errp,
                    "Our own certificate %s failed validation against %s: %s",
diff --git a/crypto/tlssession.c b/crypto/tlssession.c
index 33203e8ca71..a8db8c76d13 100644
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c
@@ -25,6 +25,7 @@
 #include "crypto/tlscredsx509.h"
 #include "qapi/error.h"
 #include "authz/base.h"
+#include "tlscredspriv.h"
 #include "trace.h"
 
 #ifdef CONFIG_GNUTLS
diff --git a/crypto/trace-events b/crypto/trace-events
index 798b6067ab0..bccd0bbf291 100644
--- a/crypto/trace-events
+++ b/crypto/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # tlscreds.c
 qcrypto_tls_creds_load_dh(void *creds, const char *filename) "TLS creds load DH creds=%p filename=%s"
diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak
deleted file mode 100644
index 958b1e08e40..00000000000
--- a/default-configs/devices/aarch64-softmmu.mak
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default configuration for aarch64-softmmu
-
-# We support all the 32 bit boards so need all their config
-include arm-softmmu.mak
-
-CONFIG_XLNX_ZYNQMP_ARM=y
-CONFIG_XLNX_VERSAL=y
-CONFIG_SBSA_REF=y
diff --git a/default-configs/devices/arm-softmmu.mak b/default-configs/devices/arm-softmmu.mak
deleted file mode 100644
index 0500156a0c7..00000000000
--- a/default-configs/devices/arm-softmmu.mak
+++ /dev/null
@@ -1,46 +0,0 @@
-# Default configuration for arm-softmmu
-
-# TODO: ARM_V7M is currently always required - make this more flexible!
-CONFIG_ARM_V7M=y
-
-# CONFIG_PCI_DEVICES=n
-# CONFIG_TEST_DEVICES=n
-
-CONFIG_ARM_VIRT=y
-CONFIG_CUBIEBOARD=y
-CONFIG_EXYNOS4=y
-CONFIG_HIGHBANK=y
-CONFIG_INTEGRATOR=y
-CONFIG_FSL_IMX31=y
-CONFIG_MUSICPAL=y
-CONFIG_MUSCA=y
-CONFIG_CHEETAH=y
-CONFIG_SX1=y
-CONFIG_NSERIES=y
-CONFIG_STELLARIS=y
-CONFIG_REALVIEW=y
-CONFIG_VERSATILE=y
-CONFIG_VEXPRESS=y
-CONFIG_ZYNQ=y
-CONFIG_MAINSTONE=y
-CONFIG_GUMSTIX=y
-CONFIG_SPITZ=y
-CONFIG_TOSA=y
-CONFIG_Z2=y
-CONFIG_NPCM7XX=y
-CONFIG_COLLIE=y
-CONFIG_ASPEED_SOC=y
-CONFIG_NETDUINO2=y
-CONFIG_NETDUINOPLUS2=y
-CONFIG_MPS2=y
-CONFIG_RASPI=y
-CONFIG_DIGIC=y
-CONFIG_SABRELITE=y
-CONFIG_EMCRAFT_SF2=y
-CONFIG_MICROBIT=y
-CONFIG_FSL_IMX25=y
-CONFIG_FSL_IMX7=y
-CONFIG_FSL_IMX6UL=y
-CONFIG_SEMIHOSTING=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
-CONFIG_ALLWINNER_H3=y
diff --git a/default-configs/devices/i386-softmmu.mak b/default-configs/devices/i386-softmmu.mak
deleted file mode 100644
index 84d1a2487cd..00000000000
--- a/default-configs/devices/i386-softmmu.mak
+++ /dev/null
@@ -1,31 +0,0 @@
-# Default configuration for i386-softmmu
-
-# Uncomment the following lines to disable these optional devices:
-#
-#CONFIG_AMD_IOMMU=n
-#CONFIG_APPLESMC=n
-#CONFIG_FDC=n
-#CONFIG_HPET=n
-#CONFIG_HYPERV=n
-#CONFIG_ISA_DEBUG=n
-#CONFIG_ISA_IPMI_BT=n
-#CONFIG_ISA_IPMI_KCS=n
-#CONFIG_PCI_IPMI_KCS=n
-#CONFIG_PCI_IPMI_BT=n
-#CONFIG_IPMI_SSIF=n
-#CONFIG_PCI_DEVICES=n
-#CONFIG_PVPANIC=n
-#CONFIG_QXL=n
-#CONFIG_SEV=n
-#CONFIG_SGA=n
-#CONFIG_TEST_DEVICES=n
-#CONFIG_TPM_CRB=n
-#CONFIG_TPM_TIS_ISA=n
-#CONFIG_VTD=n
-
-# Boards:
-#
-CONFIG_ISAPC=y
-CONFIG_I440FX=y
-CONFIG_Q35=y
-CONFIG_MICROVM=y
diff --git a/default-configs/devices/lm32-softmmu.mak b/default-configs/devices/lm32-softmmu.mak
deleted file mode 100644
index 1bce3f6e8b6..00000000000
--- a/default-configs/devices/lm32-softmmu.mak
+++ /dev/null
@@ -1,12 +0,0 @@
-# Default configuration for lm32-softmmu
-
-# Uncomment the following lines to disable these optional devices:
-#
-#CONFIG_MILKYMIST_TMU2=n        # disabling it actually causes compile-time failures
-
-CONFIG_SEMIHOSTING=y
-
-# Boards:
-#
-CONFIG_LM32_EVR=y
-CONFIG_MILKYMIST=y
diff --git a/default-configs/devices/microblazeel-softmmu.mak b/default-configs/devices/microblazeel-softmmu.mak
deleted file mode 100644
index 2fcf442fc75..00000000000
--- a/default-configs/devices/microblazeel-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for microblazeel-softmmu
-
-include microblaze-softmmu.mak
diff --git a/default-configs/devices/mips-softmmu-common.mak b/default-configs/devices/mips-softmmu-common.mak
deleted file mode 100644
index a53bc898bfe..00000000000
--- a/default-configs/devices/mips-softmmu-common.mak
+++ /dev/null
@@ -1,42 +0,0 @@
-# Common mips*-softmmu CONFIG defines
-
-# CONFIG_SEMIHOSTING is always required on this architecture
-CONFIG_SEMIHOSTING=y
-
-CONFIG_ISA_BUS=y
-CONFIG_PCI=y
-CONFIG_PCI_DEVICES=y
-CONFIG_VGA_ISA=y
-CONFIG_VGA_ISA_MM=y
-CONFIG_VGA_CIRRUS=y
-CONFIG_VMWARE_VGA=y
-CONFIG_SERIAL=y
-CONFIG_SERIAL_ISA=y
-CONFIG_PARALLEL=y
-CONFIG_I8254=y
-CONFIG_PCSPK=y
-CONFIG_PCKBD=y
-CONFIG_FDC=y
-CONFIG_ACPI=y
-CONFIG_ACPI_X86=y
-CONFIG_ACPI_MEMORY_HOTPLUG=y
-CONFIG_ACPI_NVDIMM=y
-CONFIG_ACPI_CPU_HOTPLUG=y
-CONFIG_APM=y
-CONFIG_I8257=y
-CONFIG_PIIX4=y
-CONFIG_IDE_ISA=y
-CONFIG_IDE_PIIX=y
-CONFIG_PFLASH_CFI01=y
-CONFIG_I8259=y
-CONFIG_MC146818RTC=y
-CONFIG_EMPTY_SLOT=y
-CONFIG_MIPS_CPS=y
-CONFIG_MIPS_ITU=y
-CONFIG_MALTA=y
-CONFIG_PCNET_PCI=y
-CONFIG_MIPSSIM=y
-CONFIG_ACPI_SMBUS=y
-CONFIG_SMBUS_EEPROM=y
-CONFIG_TEST_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
diff --git a/default-configs/devices/mips-softmmu.mak b/default-configs/devices/mips-softmmu.mak
deleted file mode 100644
index 9fede6e00f8..00000000000
--- a/default-configs/devices/mips-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for mips-softmmu
-
-include mips-softmmu-common.mak
diff --git a/default-configs/devices/mips64-softmmu.mak b/default-configs/devices/mips64-softmmu.mak
deleted file mode 100644
index a169738635f..00000000000
--- a/default-configs/devices/mips64-softmmu.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-# Default configuration for mips64-softmmu
-
-include mips-softmmu-common.mak
-CONFIG_JAZZ=y
diff --git a/default-configs/devices/mips64cheri128-softmmu.mak b/default-configs/devices/mips64cheri128-softmmu.mak
deleted file mode 100644
index bbbc8cf747f..00000000000
--- a/default-configs/devices/mips64cheri128-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for cheri128-softmmu
-
-include mips64-softmmu.mak
diff --git a/default-configs/devices/mips64el-softmmu.mak b/default-configs/devices/mips64el-softmmu.mak
deleted file mode 100644
index 26c660a05c4..00000000000
--- a/default-configs/devices/mips64el-softmmu.mak
+++ /dev/null
@@ -1,15 +0,0 @@
-# Default configuration for mips64el-softmmu
-
-include mips-softmmu-common.mak
-CONFIG_IDE_VIA=y
-CONFIG_FULOONG=y
-CONFIG_LOONGSON3V=y
-CONFIG_ATI_VGA=y
-CONFIG_RTL8139_PCI=y
-CONFIG_JAZZ=y
-CONFIG_VT82C686=y
-CONFIG_AHCI=y
-CONFIG_MIPS_BOSTON=y
-CONFIG_FITLOADER=y
-CONFIG_PCI_EXPRESS=y
-CONFIG_PCI_EXPRESS_XILINX=y
diff --git a/default-configs/devices/mipsel-softmmu.mak b/default-configs/devices/mipsel-softmmu.mak
deleted file mode 100644
index a7f60594849..00000000000
--- a/default-configs/devices/mipsel-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for mipsel-softmmu
-
-include mips-softmmu-common.mak
diff --git a/default-configs/devices/moxie-softmmu.mak b/default-configs/devices/moxie-softmmu.mak
deleted file mode 100644
index bd50da3c58f..00000000000
--- a/default-configs/devices/moxie-softmmu.mak
+++ /dev/null
@@ -1,5 +0,0 @@
-# Default configuration for moxie-softmmu
-
-# Boards:
-#
-CONFIG_MOXIESIM=y
diff --git a/default-configs/devices/ppc-softmmu.mak b/default-configs/devices/ppc-softmmu.mak
deleted file mode 100644
index 61b78b844dd..00000000000
--- a/default-configs/devices/ppc-softmmu.mak
+++ /dev/null
@@ -1,18 +0,0 @@
-# Default configuration for ppc-softmmu
-
-# For embedded PPCs:
-CONFIG_DS1338=y
-CONFIG_E500=y
-CONFIG_PPC405=y
-CONFIG_PPC440=y
-CONFIG_VIRTEX=y
-
-# For Sam460ex
-CONFIG_SAM460EX=y
-
-# For Macs
-CONFIG_MAC_OLDWORLD=y
-CONFIG_MAC_NEWWORLD=y
-
-# For PReP
-CONFIG_PREP=y
diff --git a/default-configs/devices/ppc64-softmmu.mak b/default-configs/devices/ppc64-softmmu.mak
deleted file mode 100644
index ae0841fa3a1..00000000000
--- a/default-configs/devices/ppc64-softmmu.mak
+++ /dev/null
@@ -1,11 +0,0 @@
-# Default configuration for ppc64-softmmu
-
-# Include all 32-bit boards
-include ppc-softmmu.mak
-
-# For PowerNV
-CONFIG_POWERNV=y
-
-# For pSeries
-CONFIG_PSERIES=y
-CONFIG_NVDIMM=y
diff --git a/default-configs/devices/riscv32cheri-softmmu.mak b/default-configs/devices/riscv32cheri-softmmu.mak
deleted file mode 100644
index ede744e1fec..00000000000
--- a/default-configs/devices/riscv32cheri-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for riscv32cheri-softmmu
-
-include riscv32-softmmu.mak
diff --git a/default-configs/devices/riscv64-softmmu.mak b/default-configs/devices/riscv64-softmmu.mak
deleted file mode 100644
index d5eec75f05e..00000000000
--- a/default-configs/devices/riscv64-softmmu.mak
+++ /dev/null
@@ -1,15 +0,0 @@
-# Default configuration for riscv64-softmmu
-
-# Uncomment the following lines to disable these optional devices:
-#
-#CONFIG_PCI_DEVICES=n
-CONFIG_SEMIHOSTING=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
-
-# Boards:
-#
-CONFIG_SPIKE=y
-CONFIG_SIFIVE_E=y
-CONFIG_SIFIVE_U=y
-CONFIG_RISCV_VIRT=y
-CONFIG_MICROCHIP_PFSOC=y
diff --git a/default-configs/devices/riscv64cheri-softmmu.mak b/default-configs/devices/riscv64cheri-softmmu.mak
deleted file mode 100644
index fd6e4b93792..00000000000
--- a/default-configs/devices/riscv64cheri-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for riscv64cheri-softmmu
-
-include riscv64-softmmu.mak
diff --git a/default-configs/devices/sh4eb-softmmu.mak b/default-configs/devices/sh4eb-softmmu.mak
deleted file mode 100644
index 522a7a50fab..00000000000
--- a/default-configs/devices/sh4eb-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for sh4eb-softmmu
-
-include sh4-softmmu.mak
diff --git a/default-configs/devices/tricore-softmmu.mak b/default-configs/devices/tricore-softmmu.mak
deleted file mode 100644
index 5cc91cebce7..00000000000
--- a/default-configs/devices/tricore-softmmu.mak
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_TRIBOARD=y
diff --git a/default-configs/devices/unicore32-softmmu.mak b/default-configs/devices/unicore32-softmmu.mak
deleted file mode 100644
index 899288e3d71..00000000000
--- a/default-configs/devices/unicore32-softmmu.mak
+++ /dev/null
@@ -1,6 +0,0 @@
-# Default configuration for unicore32-softmmu
-
-# Boards:
-#
-CONFIG_PUV3=y
-CONFIG_SEMIHOSTING=y
diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak
deleted file mode 100644
index 64b2ee2960e..00000000000
--- a/default-configs/devices/x86_64-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for x86_64-softmmu
-
-include i386-softmmu.mak
diff --git a/default-configs/devices/xtensaeb-softmmu.mak b/default-configs/devices/xtensaeb-softmmu.mak
deleted file mode 100644
index f7e48c750cd..00000000000
--- a/default-configs/devices/xtensaeb-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default configuration for Xtensa
-
-include xtensa-softmmu.mak
diff --git a/default-configs/targets/aarch64-linux-user.mak b/default-configs/targets/aarch64-linux-user.mak
deleted file mode 100644
index 4713253709f..00000000000
--- a/default-configs/targets/aarch64-linux-user.mak
+++ /dev/null
@@ -1,5 +0,0 @@
-TARGET_ARCH=aarch64
-TARGET_BASE_ARCH=arm
-TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_HAS_BFLT=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/aarch64-softmmu.mak b/default-configs/targets/aarch64-softmmu.mak
deleted file mode 100644
index 7703127674e..00000000000
--- a/default-configs/targets/aarch64-softmmu.mak
+++ /dev/null
@@ -1,5 +0,0 @@
-TARGET_ARCH=aarch64
-TARGET_BASE_ARCH=arm
-TARGET_SUPPORTS_MTTCG=y
-TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_NEED_FDT=y
diff --git a/default-configs/targets/aarch64_be-linux-user.mak b/default-configs/targets/aarch64_be-linux-user.mak
deleted file mode 100644
index fae831558da..00000000000
--- a/default-configs/targets/aarch64_be-linux-user.mak
+++ /dev/null
@@ -1,6 +0,0 @@
-TARGET_ARCH=aarch64
-TARGET_BASE_ARCH=arm
-TARGET_WORDS_BIGENDIAN=y
-TARGET_XML_FILES= gdb-xml/aarch64-core.xml gdb-xml/aarch64-fpu.xml gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_HAS_BFLT=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/arm-linux-user.mak b/default-configs/targets/arm-linux-user.mak
deleted file mode 100644
index e741ffd4d30..00000000000
--- a/default-configs/targets/arm-linux-user.mak
+++ /dev/null
@@ -1,6 +0,0 @@
-TARGET_ARCH=arm
-TARGET_SYSTBL_ABI=common,oabi
-TARGET_SYSTBL=syscall.tbl
-TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_HAS_BFLT=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/arm-softmmu.mak b/default-configs/targets/arm-softmmu.mak
deleted file mode 100644
index 84a98f48186..00000000000
--- a/default-configs/targets/arm-softmmu.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-TARGET_ARCH=arm
-TARGET_SUPPORTS_MTTCG=y
-TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_NEED_FDT=y
diff --git a/default-configs/targets/armeb-linux-user.mak b/default-configs/targets/armeb-linux-user.mak
deleted file mode 100644
index 255e44e8b0a..00000000000
--- a/default-configs/targets/armeb-linux-user.mak
+++ /dev/null
@@ -1,7 +0,0 @@
-TARGET_ARCH=arm
-TARGET_SYSTBL_ABI=common,oabi
-TARGET_SYSTBL=syscall.tbl
-TARGET_WORDS_BIGENDIAN=y
-TARGET_XML_FILES= gdb-xml/arm-core.xml gdb-xml/arm-vfp.xml gdb-xml/arm-vfp3.xml gdb-xml/arm-neon.xml gdb-xml/arm-m-profile.xml
-TARGET_HAS_BFLT=y
-CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
diff --git a/default-configs/targets/i386-softmmu.mak b/default-configs/targets/i386-softmmu.mak
deleted file mode 100644
index 5babf71895d..00000000000
--- a/default-configs/targets/i386-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-TARGET_ARCH=i386
-TARGET_SUPPORTS_MTTCG=y
-TARGET_XML_FILES= gdb-xml/i386-32bit.xml
diff --git a/default-configs/targets/lm32-softmmu.mak b/default-configs/targets/lm32-softmmu.mak
deleted file mode 100644
index 55e7184a3db..00000000000
--- a/default-configs/targets/lm32-softmmu.mak
+++ /dev/null
@@ -1,2 +0,0 @@
-TARGET_ARCH=lm32
-TARGET_WORDS_BIGENDIAN=y
diff --git a/default-configs/targets/moxie-softmmu.mak b/default-configs/targets/moxie-softmmu.mak
deleted file mode 100644
index 183e6b0ebda..00000000000
--- a/default-configs/targets/moxie-softmmu.mak
+++ /dev/null
@@ -1,2 +0,0 @@
-TARGET_ARCH=moxie
-TARGET_WORDS_BIGENDIAN=y
diff --git a/default-configs/targets/sparc-bsd-user.mak b/default-configs/targets/sparc-bsd-user.mak
deleted file mode 100644
index 9ba3d7b07f1..00000000000
--- a/default-configs/targets/sparc-bsd-user.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-TARGET_ARCH=sparc
-TARGET_ALIGNED_ONLY=y
-TARGET_WORDS_BIGENDIAN=y
diff --git a/default-configs/targets/sparc64-bsd-user.mak b/default-configs/targets/sparc64-bsd-user.mak
deleted file mode 100644
index 8dd32178004..00000000000
--- a/default-configs/targets/sparc64-bsd-user.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-TARGET_ARCH=sparc64
-TARGET_BASE_ARCH=sparc
-TARGET_ALIGNED_ONLY=y
-TARGET_WORDS_BIGENDIAN=y
diff --git a/default-configs/targets/unicore32-softmmu.mak b/default-configs/targets/unicore32-softmmu.mak
deleted file mode 100644
index 57331e94fe2..00000000000
--- a/default-configs/targets/unicore32-softmmu.mak
+++ /dev/null
@@ -1 +0,0 @@
-TARGET_ARCH=unicore32
diff --git a/default-configs/targets/x86_64-softmmu.mak b/default-configs/targets/x86_64-softmmu.mak
deleted file mode 100644
index 75e42bc8404..00000000000
--- a/default-configs/targets/x86_64-softmmu.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-TARGET_ARCH=x86_64
-TARGET_BASE_ARCH=i386
-TARGET_SUPPORTS_MTTCG=y
-TARGET_XML_FILES= gdb-xml/i386-64bit.xml
diff --git a/default-configs/targets/xtensa-softmmu.mak b/default-configs/targets/xtensa-softmmu.mak
deleted file mode 100644
index 26c0285655c..00000000000
--- a/default-configs/targets/xtensa-softmmu.mak
+++ /dev/null
@@ -1,3 +0,0 @@
-TARGET_ARCH=xtensa
-TARGET_ALIGNED_ONLY=y
-TARGET_SUPPORTS_MTTCG=y
diff --git a/default-configs/targets/xtensaeb-softmmu.mak b/default-configs/targets/xtensaeb-softmmu.mak
deleted file mode 100644
index 14cb9289a62..00000000000
--- a/default-configs/targets/xtensaeb-softmmu.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-TARGET_ARCH=xtensa
-TARGET_ALIGNED_ONLY=y
-TARGET_WORDS_BIGENDIAN=y
-TARGET_SUPPORTS_MTTCG=y
diff --git a/disas.c b/disas.c
index a0de01cc39a..29bc66bc9da 100644
--- a/disas.c
+++ b/disas.c
@@ -4,7 +4,6 @@
 #include "elf.h"
 #include "qemu/qemu-print.h"
 
-#include "cpu.h"
 #include "disas/disas.h"
 #include "disas/capstone.h"
 
diff --git a/disas/arm-a64.cc b/disas/arm-a64.cc
index 1cb15f6b823..4b3c91f2131 100644
--- a/disas/arm-a64.cc
+++ b/disas/arm-a64.cc
@@ -18,9 +18,7 @@
  */
 
 #include "qemu/osdep.h"
-extern "C" {
 #include "disas/dis-asm.h"
-}
 
 #include "vixl/aarch64/disasm-aarch64.h"
 
diff --git a/disas/hexagon.c b/disas/hexagon.c
index 3c24e2a94af..c1a4ffc5f6b 100644
--- a/disas/hexagon.c
+++ b/disas/hexagon.c
@@ -33,7 +33,7 @@ int print_insn_hexagon(bfd_vma memaddr, struct disassemble_info *info)
 {
     uint32_t words[PACKET_WORDS_MAX];
     bool found_end = false;
-    GString *buf = g_string_sized_new(PACKET_BUFFER_LEN);
+    GString *buf;
     int i, len;
 
     for (i = 0; i < PACKET_WORDS_MAX && !found_end; i++) {
@@ -57,6 +57,7 @@ int print_insn_hexagon(bfd_vma memaddr, struct disassemble_info *info)
         return PACKET_WORDS_MAX * sizeof(uint32_t);
     }
 
+    buf = g_string_sized_new(PACKET_BUFFER_LEN);
     len = disassemble_hexagon(words, i, memaddr, buf);
     (*info->fprintf_func)(info->stream, "%s", buf->str);
     g_string_free(buf, true);
diff --git a/disas/libvixl/vixl/code-buffer-vixl.h b/disas/libvixl/vixl/code-buffer-vixl.h
index a43c584367e..88138e4e327 100644
--- a/disas/libvixl/vixl/code-buffer-vixl.h
+++ b/disas/libvixl/vixl/code-buffer-vixl.h
@@ -28,9 +28,7 @@
 #define VIXL_CODE_BUFFER_H
 
 #include <cstring>
-
-#include "globals-vixl.h"
-#include "utils-vixl.h"
+#include "vixl/globals.h"
 
 namespace vixl {
 
diff --git a/disas/libvixl/vixl/globals.h b/disas/libvixl/vixl/globals.h
new file mode 100644
index 00000000000..3a71942f1e5
--- /dev/null
+++ b/disas/libvixl/vixl/globals.h
@@ -0,0 +1,155 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_GLOBALS_H
+#define VIXL_GLOBALS_H
+
+// Get standard C99 macros for integer types.
+#ifndef __STDC_CONSTANT_MACROS
+#define __STDC_CONSTANT_MACROS
+#endif
+
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+extern "C" {
+#include <inttypes.h>
+#include <stdint.h>
+}
+
+#include <cassert>
+#include <cstdarg>
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+
+#include "vixl/platform.h"
+
+
+typedef uint8_t byte;
+
+// Type for half-precision (16 bit) floating point numbers.
+typedef uint16_t float16;
+
+const int KBytes = 1024;
+const int MBytes = 1024 * KBytes;
+
+#define VIXL_ABORT() \
+    do { printf("in %s, line %i", __FILE__, __LINE__); abort(); } while (false)
+#ifdef VIXL_DEBUG
+  #define VIXL_ASSERT(condition) assert(condition)
+  #define VIXL_CHECK(condition) VIXL_ASSERT(condition)
+  #define VIXL_UNIMPLEMENTED() \
+    do { fprintf(stderr, "UNIMPLEMENTED\t"); VIXL_ABORT(); } while (false)
+  #define VIXL_UNREACHABLE() \
+    do { fprintf(stderr, "UNREACHABLE\t"); VIXL_ABORT(); } while (false)
+#else
+  #define VIXL_ASSERT(condition) ((void) 0)
+  #define VIXL_CHECK(condition) assert(condition)
+  #define VIXL_UNIMPLEMENTED() ((void) 0)
+  #define VIXL_UNREACHABLE() ((void) 0)
+#endif
+// This is not as powerful as template based assertions, but it is simple.
+// It assumes that the descriptions are unique. If this starts being a problem,
+// we can switch to a different implemention.
+#define VIXL_CONCAT(a, b) a##b
+#define VIXL_STATIC_ASSERT_LINE(line, condition) \
+  typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \
+  __attribute__((unused))
+#define VIXL_STATIC_ASSERT(condition) \
+    VIXL_STATIC_ASSERT_LINE(__LINE__, condition)
+
+template <typename T1>
+inline void USE(T1) {}
+
+template <typename T1, typename T2>
+inline void USE(T1, T2) {}
+
+template <typename T1, typename T2, typename T3>
+inline void USE(T1, T2, T3) {}
+
+template <typename T1, typename T2, typename T3, typename T4>
+inline void USE(T1, T2, T3, T4) {}
+
+#define VIXL_ALIGNMENT_EXCEPTION() \
+    do { fprintf(stderr, "ALIGNMENT EXCEPTION\t"); VIXL_ABORT(); } while (0)
+
+// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough
+// argument to annotate intentional fall-through between switch labels.
+// For more information please refer to:
+// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+#ifndef __has_warning
+  #define __has_warning(x)  0
+#endif
+
+// Fallthrough annotation for Clang and C++11(201103L).
+#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L
+  #define VIXL_FALLTHROUGH() [[clang::fallthrough]] //NOLINT
+// Fallthrough annotation for GCC >= 7.
+#elif __GNUC__ >= 7
+  #define VIXL_FALLTHROUGH() __attribute__((fallthrough))
+#else
+  #define VIXL_FALLTHROUGH() do {} while (0)
+#endif
+
+#if __cplusplus >= 201103L
+  #define VIXL_NO_RETURN  [[noreturn]] //NOLINT
+#else
+  #define VIXL_NO_RETURN  __attribute__((noreturn))
+#endif
+
+// Some functions might only be marked as "noreturn" for the DEBUG build. This
+// macro should be used for such cases (for more details see what
+// VIXL_UNREACHABLE expands to).
+#ifdef VIXL_DEBUG
+  #define VIXL_DEBUG_NO_RETURN  VIXL_NO_RETURN
+#else
+  #define VIXL_DEBUG_NO_RETURN
+#endif
+
+#ifdef VIXL_INCLUDE_SIMULATOR
+#ifndef VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE
+  #define VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE  1
+#endif
+#else
+#ifndef VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE
+  #define VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE  0
+#endif
+#if VIXL_GENERATE_SIMULATOR_INSTRUCTIONS_VALUE
+  #warning "Generating Simulator instructions without Simulator support."
+#endif
+#endif
+
+#ifdef USE_SIMULATOR
+  #error "Please see the release notes for USE_SIMULATOR."
+#endif
+
+#endif  // VIXL_GLOBALS_H
diff --git a/disas/libvixl/vixl/utils.cc b/disas/libvixl/vixl/utils.cc
new file mode 100644
index 00000000000..69304d266d7
--- /dev/null
+++ b/disas/libvixl/vixl/utils.cc
@@ -0,0 +1,142 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "vixl/utils.h"
+#include <cstdio>
+
+namespace vixl {
+
+uint32_t float_to_rawbits(float value) {
+  uint32_t bits = 0;
+  memcpy(&bits, &value, 4);
+  return bits;
+}
+
+
+uint64_t double_to_rawbits(double value) {
+  uint64_t bits = 0;
+  memcpy(&bits, &value, 8);
+  return bits;
+}
+
+
+float rawbits_to_float(uint32_t bits) {
+  float value = 0.0;
+  memcpy(&value, &bits, 4);
+  return value;
+}
+
+
+double rawbits_to_double(uint64_t bits) {
+  double value = 0.0;
+  memcpy(&value, &bits, 8);
+  return value;
+}
+
+
+uint32_t float_sign(float val) {
+  uint32_t rawbits = float_to_rawbits(val);
+  return unsigned_bitextract_32(31, 31, rawbits);
+}
+
+
+uint32_t float_exp(float val) {
+  uint32_t rawbits = float_to_rawbits(val);
+  return unsigned_bitextract_32(30, 23, rawbits);
+}
+
+
+uint32_t float_mantissa(float val) {
+  uint32_t rawbits = float_to_rawbits(val);
+  return unsigned_bitextract_32(22, 0, rawbits);
+}
+
+
+uint32_t double_sign(double val) {
+  uint64_t rawbits = double_to_rawbits(val);
+  return static_cast<uint32_t>(unsigned_bitextract_64(63, 63, rawbits));
+}
+
+
+uint32_t double_exp(double val) {
+  uint64_t rawbits = double_to_rawbits(val);
+  return static_cast<uint32_t>(unsigned_bitextract_64(62, 52, rawbits));
+}
+
+
+uint64_t double_mantissa(double val) {
+  uint64_t rawbits = double_to_rawbits(val);
+  return unsigned_bitextract_64(51, 0, rawbits);
+}
+
+
+float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
+  uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
+  return rawbits_to_float(bits);
+}
+
+
+double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
+  uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
+  return rawbits_to_double(bits);
+}
+
+
+int float16classify(float16 value) {
+  uint16_t exponent_max = (1 << 5) - 1;
+  uint16_t exponent_mask = exponent_max << 10;
+  uint16_t mantissa_mask = (1 << 10) - 1;
+
+  uint16_t exponent = (value & exponent_mask) >> 10;
+  uint16_t mantissa = value & mantissa_mask;
+  if (exponent == 0) {
+    if (mantissa == 0) {
+      return FP_ZERO;
+    }
+    return FP_SUBNORMAL;
+  } else if (exponent == exponent_max) {
+    if (mantissa == 0) {
+      return FP_INFINITE;
+    }
+    return FP_NAN;
+  }
+  return FP_NORMAL;
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
+  VIXL_ASSERT((reg_size % 8) == 0);
+  int count = 0;
+  for (unsigned i = 0; i < (reg_size / 16); i++) {
+    if ((imm & 0xffff) == 0) {
+      count++;
+    }
+    imm >>= 16;
+  }
+  return count;
+}
+
+}  // namespace vixl
diff --git a/disas/libvixl/vixl/utils.h b/disas/libvixl/vixl/utils.h
new file mode 100644
index 00000000000..ecb0f1014ab
--- /dev/null
+++ b/disas/libvixl/vixl/utils.h
@@ -0,0 +1,286 @@
+// Copyright 2015, ARM Limited
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+//   * Redistributions of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//   * Neither the name of ARM Limited nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef VIXL_UTILS_H
+#define VIXL_UTILS_H
+
+#include <cmath>
+#include <cstring>
+#include "vixl/globals.h"
+#include "vixl/compiler-intrinsics.h"
+
+namespace vixl {
+
+// Macros for compile-time format checking.
+#if GCC_VERSION_OR_NEWER(4, 4, 0)
+#define PRINTF_CHECK(format_index, varargs_index) \
+  __attribute__((format(gnu_printf, format_index, varargs_index)))
+#else
+#define PRINTF_CHECK(format_index, varargs_index)
+#endif
+
+// Check number width.
+inline bool is_intn(unsigned n, int64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  int64_t limit = INT64_C(1) << (n - 1);
+  return (-limit <= x) && (x < limit);
+}
+
+inline bool is_uintn(unsigned n, int64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  return !(x >> n);
+}
+
+inline uint32_t truncate_to_intn(unsigned n, int64_t x) {
+  VIXL_ASSERT((0 < n) && (n < 64));
+  return static_cast<uint32_t>(x & ((INT64_C(1) << n) - 1));
+}
+
+#define INT_1_TO_63_LIST(V)                                                    \
+V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
+V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
+V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
+V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)                                \
+V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
+V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
+V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
+V(57) V(58) V(59) V(60) V(61) V(62) V(63)
+
+#define DECLARE_IS_INT_N(N)                                                    \
+inline bool is_int##N(int64_t x) { return is_intn(N, x); }
+#define DECLARE_IS_UINT_N(N)                                                   \
+inline bool is_uint##N(int64_t x) { return is_uintn(N, x); }
+#define DECLARE_TRUNCATE_TO_INT_N(N)                                           \
+inline uint32_t truncate_to_int##N(int x) { return truncate_to_intn(N, x); }
+INT_1_TO_63_LIST(DECLARE_IS_INT_N)
+INT_1_TO_63_LIST(DECLARE_IS_UINT_N)
+INT_1_TO_63_LIST(DECLARE_TRUNCATE_TO_INT_N)
+#undef DECLARE_IS_INT_N
+#undef DECLARE_IS_UINT_N
+#undef DECLARE_TRUNCATE_TO_INT_N
+
+// Bit field extraction.
+inline uint32_t unsigned_bitextract_32(int msb, int lsb, uint32_t x) {
+  return (x >> lsb) & ((1 << (1 + msb - lsb)) - 1);
+}
+
+inline uint64_t unsigned_bitextract_64(int msb, int lsb, uint64_t x) {
+  return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1);
+}
+
+inline int32_t signed_bitextract_32(int msb, int lsb, int32_t x) {
+  return (x << (31 - msb)) >> (lsb + 31 - msb);
+}
+
+inline int64_t signed_bitextract_64(int msb, int lsb, int64_t x) {
+  return (x << (63 - msb)) >> (lsb + 63 - msb);
+}
+
+// Floating point representation.
+uint32_t float_to_rawbits(float value);
+uint64_t double_to_rawbits(double value);
+float rawbits_to_float(uint32_t bits);
+double rawbits_to_double(uint64_t bits);
+
+uint32_t float_sign(float val);
+uint32_t float_exp(float val);
+uint32_t float_mantissa(float val);
+uint32_t double_sign(double val);
+uint32_t double_exp(double val);
+uint64_t double_mantissa(double val);
+
+float float_pack(uint32_t sign, uint32_t exp, uint32_t mantissa);
+double double_pack(uint64_t sign, uint64_t exp, uint64_t mantissa);
+
+// An fpclassify() function for 16-bit half-precision floats.
+int float16classify(float16 value);
+
+// NaN tests.
+inline bool IsSignallingNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  uint64_t raw = double_to_rawbits(num);
+  if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+inline bool IsSignallingNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  uint32_t raw = float_to_rawbits(num);
+  if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+    return true;
+  }
+  return false;
+}
+
+
+inline bool IsSignallingNaN(float16 num) {
+  const uint16_t kFP16QuietNaNMask = 0x0200;
+  return (float16classify(num) == FP_NAN) &&
+         ((num & kFP16QuietNaNMask) == 0);
+}
+
+
+template <typename T>
+inline bool IsQuietNaN(T num) {
+  return std::isnan(num) && !IsSignallingNaN(num);
+}
+
+
+// Convert the NaN in 'num' to a quiet NaN.
+inline double ToQuietNaN(double num) {
+  const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
+  VIXL_ASSERT(std::isnan(num));
+  return rawbits_to_double(double_to_rawbits(num) | kFP64QuietNaNMask);
+}
+
+
+inline float ToQuietNaN(float num) {
+  const uint32_t kFP32QuietNaNMask = 0x00400000;
+  VIXL_ASSERT(std::isnan(num));
+  return rawbits_to_float(float_to_rawbits(num) | kFP32QuietNaNMask);
+}
+
+
+// Fused multiply-add.
+inline double FusedMultiplyAdd(double op1, double op2, double a) {
+  return fma(op1, op2, a);
+}
+
+
+inline float FusedMultiplyAdd(float op1, float op2, float a) {
+  return fmaf(op1, op2, a);
+}
+
+
+inline uint64_t LowestSetBit(uint64_t value) {
+  return value & -value;
+}
+
+
+template<typename T>
+inline int HighestSetBitPosition(T value) {
+  VIXL_ASSERT(value != 0);
+  return (sizeof(value) * 8 - 1) - CountLeadingZeros(value);
+}
+
+
+template<typename V>
+inline int WhichPowerOf2(V value) {
+  VIXL_ASSERT(IsPowerOf2(value));
+  return CountTrailingZeros(value);
+}
+
+
+unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size);
+
+
+template <typename T>
+T ReverseBits(T value) {
+  VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
+              (sizeof(value) == 4) || (sizeof(value) == 8));
+  T result = 0;
+  for (unsigned i = 0; i < (sizeof(value) * 8); i++) {
+    result = (result << 1) | (value & 1);
+    value >>= 1;
+  }
+  return result;
+}
+
+
+template <typename T>
+T ReverseBytes(T value, int block_bytes_log2) {
+  VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8));
+  VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value));
+  // Split the 64-bit value into an 8-bit array, where b[0] is the least
+  // significant byte, and b[7] is the most significant.
+  uint8_t bytes[8];
+  uint64_t mask = UINT64_C(0xff00000000000000);
+  for (int i = 7; i >= 0; i--) {
+    bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8);
+    mask >>= 8;
+  }
+
+  // Permutation tables for REV instructions.
+  //  permute_table[0] is used by REV16_x, REV16_w
+  //  permute_table[1] is used by REV32_x, REV_w
+  //  permute_table[2] is used by REV_x
+  VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4));
+  static const uint8_t permute_table[3][8] = { {6, 7, 4, 5, 2, 3, 0, 1},
+                                               {4, 5, 6, 7, 0, 1, 2, 3},
+                                               {0, 1, 2, 3, 4, 5, 6, 7} };
+  T result = 0;
+  for (int i = 0; i < 8; i++) {
+    result <<= 8;
+    result |= bytes[permute_table[block_bytes_log2 - 1][i]];
+  }
+  return result;
+}
+
+
+// Pointer alignment
+// TODO: rename/refactor to make it specific to instructions.
+template<typename T>
+bool IsWordAligned(T pointer) {
+  VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t));   // NOLINT(runtime/sizeof)
+  return ((intptr_t)(pointer) & 3) == 0;
+}
+
+// Increment a pointer (up to 64 bits) until it has the specified alignment.
+template<class T>
+T AlignUp(T pointer, size_t alignment) {
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uint64_t pointer_raw = (uint64_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+  size_t align_step = (alignment - pointer_raw) % alignment;
+  VIXL_ASSERT((pointer_raw + align_step) % alignment == 0);
+
+  return (T)(pointer_raw + align_step);
+}
+
+// Decrement a pointer (up to 64 bits) until it has the specified alignment.
+template<class T>
+T AlignDown(T pointer, size_t alignment) {
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uint64_t pointer_raw = (uint64_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw));
+
+  size_t align_step = pointer_raw % alignment;
+  VIXL_ASSERT((pointer_raw - align_step) % alignment == 0);
+
+  return (T)(pointer_raw - align_step);
+}
+
+}  // namespace vixl
+
+#endif  // VIXL_UTILS_H
diff --git a/disas/lm32.c b/disas/lm32.c
deleted file mode 100644
index 4fbb1245348..00000000000
--- a/disas/lm32.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- *  Simple LatticeMico32 disassembler.
- *
- *  Copyright (c) 2012 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include "qemu/osdep.h"
-#include "disas/dis-asm.h"
-
-typedef enum {
-    LM32_OP_SRUI = 0, LM32_OP_NORI, LM32_OP_MULI, LM32_OP_SH, LM32_OP_LB,
-    LM32_OP_SRI, LM32_OP_XORI, LM32_OP_LH, LM32_OP_ANDI, LM32_OP_XNORI,
-    LM32_OP_LW, LM32_OP_LHU, LM32_OP_SB, LM32_OP_ADDI, LM32_OP_ORI,
-    LM32_OP_SLI, LM32_OP_LBU, LM32_OP_BE, LM32_OP_BG, LM32_OP_BGE,
-    LM32_OP_BGEU, LM32_OP_BGU, LM32_OP_SW, LM32_OP_BNE, LM32_OP_ANDHI,
-    LM32_OP_CMPEI, LM32_OP_CMPGI, LM32_OP_CMPGEI, LM32_OP_CMPGEUI,
-    LM32_OP_CMPGUI, LM32_OP_ORHI, LM32_OP_CMPNEI, LM32_OP_SRU, LM32_OP_NOR,
-    LM32_OP_MUL, LM32_OP_DIVU, LM32_OP_RCSR, LM32_OP_SR, LM32_OP_XOR,
-    LM32_OP_ILL0, LM32_OP_AND, LM32_OP_XNOR, LM32_OP_ILL1, LM32_OP_SCALL,
-    LM32_OP_SEXTB, LM32_OP_ADD, LM32_OP_OR, LM32_OP_SL, LM32_OP_B,
-    LM32_OP_MODU, LM32_OP_SUB, LM32_OP_ILL2, LM32_OP_WCSR, LM32_OP_ILL3,
-    LM32_OP_CALL, LM32_OP_SEXTH, LM32_OP_BI, LM32_OP_CMPE, LM32_OP_CMPG,
-    LM32_OP_CMPGE, LM32_OP_CMPGEU, LM32_OP_CMPGU, LM32_OP_CALLI, LM32_OP_CMPNE,
-} Lm32Opcode;
-
-typedef enum {
-    FMT_INVALID = 0, FMT_RRI5, FMT_RRI16, FMT_IMM26, FMT_LOAD, FMT_STORE,
-    FMT_RRR, FMT_R, FMT_RNR, FMT_CRN, FMT_CNR, FMT_BREAK,
-} Lm32OpcodeFmt;
-
-typedef enum {
-    LM32_CSR_IE = 0, LM32_CSR_IM, LM32_CSR_IP, LM32_CSR_ICC, LM32_CSR_DCC,
-    LM32_CSR_CC, LM32_CSR_CFG, LM32_CSR_EBA, LM32_CSR_DC, LM32_CSR_DEBA,
-    LM32_CSR_CFG2, LM32_CSR_JTX = 0xe, LM32_CSR_JRX, LM32_CSR_BP0,
-    LM32_CSR_BP1, LM32_CSR_BP2, LM32_CSR_BP3, LM32_CSR_WP0 = 0x18,
-    LM32_CSR_WP1, LM32_CSR_WP2, LM32_CSR_WP3,
-} Lm32CsrNum;
-
-typedef struct {
-    int csr;
-    const char *name;
-} Lm32CsrInfo;
-
-static const Lm32CsrInfo lm32_csr_info[] = {
-    {LM32_CSR_IE,   "ie", },
-    {LM32_CSR_IM,   "im", },
-    {LM32_CSR_IP,   "ip", },
-    {LM32_CSR_ICC,  "icc", },
-    {LM32_CSR_DCC,  "dcc", },
-    {LM32_CSR_CC,   "cc", },
-    {LM32_CSR_CFG,  "cfg", },
-    {LM32_CSR_EBA,  "eba", },
-    {LM32_CSR_DC,   "dc", },
-    {LM32_CSR_DEBA, "deba", },
-    {LM32_CSR_CFG2, "cfg2", },
-    {LM32_CSR_JTX,  "jtx", },
-    {LM32_CSR_JRX,  "jrx", },
-    {LM32_CSR_BP0,  "bp0", },
-    {LM32_CSR_BP1,  "bp1", },
-    {LM32_CSR_BP2,  "bp2", },
-    {LM32_CSR_BP3,  "bp3", },
-    {LM32_CSR_WP0,  "wp0", },
-    {LM32_CSR_WP1,  "wp1", },
-    {LM32_CSR_WP2,  "wp2", },
-    {LM32_CSR_WP3,  "wp3", },
-};
-
-static const Lm32CsrInfo *find_csr_info(int csr)
-{
-    const Lm32CsrInfo *info;
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(lm32_csr_info); i++) {
-        info = &lm32_csr_info[i];
-        if (csr == info->csr) {
-            return info;
-        }
-    }
-
-    return NULL;
-}
-
-typedef struct {
-    int reg;
-    const char *name;
-} Lm32RegInfo;
-
-typedef enum {
-    LM32_REG_R0 = 0, LM32_REG_R1, LM32_REG_R2, LM32_REG_R3, LM32_REG_R4,
-    LM32_REG_R5, LM32_REG_R6, LM32_REG_R7, LM32_REG_R8, LM32_REG_R9,
-    LM32_REG_R10, LM32_REG_R11, LM32_REG_R12, LM32_REG_R13, LM32_REG_R14,
-    LM32_REG_R15, LM32_REG_R16, LM32_REG_R17, LM32_REG_R18, LM32_REG_R19,
-    LM32_REG_R20, LM32_REG_R21, LM32_REG_R22, LM32_REG_R23, LM32_REG_R24,
-    LM32_REG_R25, LM32_REG_GP, LM32_REG_FP, LM32_REG_SP, LM32_REG_RA,
-    LM32_REG_EA, LM32_REG_BA,
-} Lm32RegNum;
-
-static const Lm32RegInfo lm32_reg_info[] = {
-    {LM32_REG_R0,  "r0", },
-    {LM32_REG_R1,  "r1", },
-    {LM32_REG_R2,  "r2", },
-    {LM32_REG_R3,  "r3", },
-    {LM32_REG_R4,  "r4", },
-    {LM32_REG_R5,  "r5", },
-    {LM32_REG_R6,  "r6", },
-    {LM32_REG_R7,  "r7", },
-    {LM32_REG_R8,  "r8", },
-    {LM32_REG_R9,  "r9", },
-    {LM32_REG_R10, "r10", },
-    {LM32_REG_R11, "r11", },
-    {LM32_REG_R12, "r12", },
-    {LM32_REG_R13, "r13", },
-    {LM32_REG_R14, "r14", },
-    {LM32_REG_R15, "r15", },
-    {LM32_REG_R16, "r16", },
-    {LM32_REG_R17, "r17", },
-    {LM32_REG_R18, "r18", },
-    {LM32_REG_R19, "r19", },
-    {LM32_REG_R20, "r20", },
-    {LM32_REG_R21, "r21", },
-    {LM32_REG_R22, "r22", },
-    {LM32_REG_R23, "r23", },
-    {LM32_REG_R24, "r24", },
-    {LM32_REG_R25, "r25", },
-    {LM32_REG_GP,  "gp", },
-    {LM32_REG_FP,  "fp", },
-    {LM32_REG_SP,  "sp", },
-    {LM32_REG_RA,  "ra", },
-    {LM32_REG_EA,  "ea", },
-    {LM32_REG_BA,  "ba", },
-};
-
-static const Lm32RegInfo *find_reg_info(int reg)
-{
-    assert(ARRAY_SIZE(lm32_reg_info) == 32);
-    return &lm32_reg_info[reg & 0x1f];
-}
-
-typedef struct {
-    struct {
-        uint32_t code;
-        uint32_t mask;
-    } op;
-    const char *name;
-    const char *args_fmt;
-} Lm32OpcodeInfo;
-
-static const Lm32OpcodeInfo lm32_opcode_info[] = {
-    /* pseudo instructions */
-    {{0x34000000, 0xffffffff}, "nop",   NULL},
-    {{0xac000002, 0xffffffff}, "break", NULL},
-    {{0xac000003, 0xffffffff}, "scall", NULL},
-    {{0xc3e00000, 0xffffffff}, "bret",  NULL},
-    {{0xc3c00000, 0xffffffff}, "eret",  NULL},
-    {{0xc3a00000, 0xffffffff}, "ret",   NULL},
-    {{0xa4000000, 0xfc1f07ff}, "not",   "%2, %0"},
-    {{0xb8000000, 0xfc1f07ff}, "mv",    "%2, %0"},
-    {{0x71e00000, 0xffe00000}, "mvhi",  "%1, %u"},
-    {{0x34000000, 0xffe00000}, "mvi",   "%1, %s"},
-
-#define _O(op) {op << 26, 0x3f << 26}
-    /* regular opcodes */
-    {_O(LM32_OP_ADD),     "add",     "%2, %0, %1"  },
-    {_O(LM32_OP_ADDI),    "addi",    "%1, %0, %s"  },
-    {_O(LM32_OP_AND),     "and",     "%2, %0, %1"  },
-    {_O(LM32_OP_ANDHI),   "andhi",   "%1, %0, %u"  },
-    {_O(LM32_OP_ANDI),    "andi",    "%1, %0, %u"  },
-    {_O(LM32_OP_B),       "b",       "%0",         },
-    {_O(LM32_OP_BE),      "be",      "%1, %0, %r"  },
-    {_O(LM32_OP_BG),      "bg",      "%1, %0, %r"  },
-    {_O(LM32_OP_BGE),     "bge",     "%1, %0, %r"  },
-    {_O(LM32_OP_BGEU),    "bgeu",    "%1, %0, %r"  },
-    {_O(LM32_OP_BGU),     "bgu",     "%1, %0, %r"  },
-    {_O(LM32_OP_BI),      "bi",      "%R",         },
-    {_O(LM32_OP_BNE),     "bne",     "%1, %0, %r"  },
-    {_O(LM32_OP_CALL),    "call",    "%0",         },
-    {_O(LM32_OP_CALLI),   "calli",   "%R",         },
-    {_O(LM32_OP_CMPE),    "cmpe",    "%2, %0, %1"  },
-    {_O(LM32_OP_CMPEI),   "cmpei",   "%1, %0, %s"  },
-    {_O(LM32_OP_CMPG),    "cmpg",    "%2, %0, %1"  },
-    {_O(LM32_OP_CMPGE),   "cmpge",   "%2, %0, %1"  },
-    {_O(LM32_OP_CMPGEI),  "cmpgei",  "%1, %0, %s"  },
-    {_O(LM32_OP_CMPGEU),  "cmpgeu",  "%2, %0, %1"  },
-    {_O(LM32_OP_CMPGEUI), "cmpgeui", "%1, %0, %s"  },
-    {_O(LM32_OP_CMPGI),   "cmpgi",   "%1, %0, %s"  },
-    {_O(LM32_OP_CMPGU),   "cmpgu",   "%2, %0, %1"  },
-    {_O(LM32_OP_CMPGUI),  "cmpgui",  "%1, %0, %s"  },
-    {_O(LM32_OP_CMPNE),   "cmpne",   "%2, %0, %1"  },
-    {_O(LM32_OP_CMPNEI),  "cmpnei",  "%1, %0, %s"  },
-    {_O(LM32_OP_DIVU),    "divu",    "%2, %0, %1"  },
-    {_O(LM32_OP_LB),      "lb",      "%1, (%0+%s)" },
-    {_O(LM32_OP_LBU),     "lbu",     "%1, (%0+%s)" },
-    {_O(LM32_OP_LH),      "lh",      "%1, (%0+%s)" },
-    {_O(LM32_OP_LHU),     "lhu",     "%1, (%0+%s)" },
-    {_O(LM32_OP_LW),      "lw",      "%1, (%0+%s)" },
-    {_O(LM32_OP_MODU),    "modu",    "%2, %0, %1"  },
-    {_O(LM32_OP_MULI),    "muli",    "%1, %0, %s"  },
-    {_O(LM32_OP_MUL),     "mul",     "%2, %0, %1"  },
-    {_O(LM32_OP_NORI),    "nori",    "%1, %0, %u"  },
-    {_O(LM32_OP_NOR),     "nor",     "%2, %0, %1"  },
-    {_O(LM32_OP_ORHI),    "orhi",    "%1, %0, %u"  },
-    {_O(LM32_OP_ORI),     "ori",     "%1, %0, %u"  },
-    {_O(LM32_OP_OR),      "or",      "%2, %0, %1"  },
-    {_O(LM32_OP_RCSR),    "rcsr",    "%2, %c",     },
-    {_O(LM32_OP_SB),      "sb",      "(%0+%s), %1" },
-    {_O(LM32_OP_SEXTB),   "sextb",   "%2, %0",     },
-    {_O(LM32_OP_SEXTH),   "sexth",   "%2, %0",     },
-    {_O(LM32_OP_SH),      "sh",      "(%0+%s), %1" },
-    {_O(LM32_OP_SLI),     "sli",     "%1, %0, %h"  },
-    {_O(LM32_OP_SL),      "sl",      "%2, %0, %1"  },
-    {_O(LM32_OP_SRI),     "sri",     "%1, %0, %h"  },
-    {_O(LM32_OP_SR),      "sr",      "%2, %0, %1"  },
-    {_O(LM32_OP_SRUI),    "srui",    "%1, %0, %d"  },
-    {_O(LM32_OP_SRU),     "sru",     "%2, %0, %s"  },
-    {_O(LM32_OP_SUB),     "sub",     "%2, %0, %s"  },
-    {_O(LM32_OP_SW),      "sw",      "(%0+%s), %1" },
-    {_O(LM32_OP_WCSR),    "wcsr",    "%c, %1",     },
-    {_O(LM32_OP_XNORI),   "xnori",   "%1, %0, %u"  },
-    {_O(LM32_OP_XNOR),    "xnor",    "%2, %0, %1"  },
-    {_O(LM32_OP_XORI),    "xori",    "%1, %0, %u"  },
-    {_O(LM32_OP_XOR),     "xor",     "%2, %0, %1"  },
-#undef _O
-};
-
-static const Lm32OpcodeInfo *find_opcode_info(uint32_t opcode)
-{
-    const Lm32OpcodeInfo *info;
-    int i;
-    for (i = 0; i < ARRAY_SIZE(lm32_opcode_info); i++) {
-        info = &lm32_opcode_info[i];
-        if ((opcode & info->op.mask) == info->op.code) {
-            return info;
-        }
-    }
-
-    return NULL;
-}
-
-int print_insn_lm32(bfd_vma memaddr, struct disassemble_info *info)
-{
-    fprintf_function fprintf_fn = info->fprintf_func;
-    void *stream = info->stream;
-    int rc;
-    uint8_t insn[4];
-    const Lm32OpcodeInfo *opc_info;
-    uint32_t op;
-    const char *args_fmt;
-
-    rc = info->read_memory_func(memaddr, insn, 4, info);
-    if (rc != 0) {
-        info->memory_error_func(rc, memaddr, info);
-        return -1;
-    }
-
-    fprintf_fn(stream, "%02x %02x %02x %02x    ",
-            insn[0], insn[1], insn[2], insn[3]);
-
-    op = bfd_getb32(insn);
-    opc_info = find_opcode_info(op);
-    if (opc_info) {
-        fprintf_fn(stream, "%-8s ", opc_info->name);
-        args_fmt = opc_info->args_fmt;
-        while (args_fmt && *args_fmt) {
-            if (*args_fmt == '%') {
-                switch (*(++args_fmt)) {
-                case '0': {
-                    uint8_t r0;
-                    const char *r0_name;
-                    r0 = (op >> 21) & 0x1f;
-                    r0_name = find_reg_info(r0)->name;
-                    fprintf_fn(stream, "%s", r0_name);
-                    break;
-                }
-                case '1': {
-                    uint8_t r1;
-                    const char *r1_name;
-                    r1 = (op >> 16) & 0x1f;
-                    r1_name = find_reg_info(r1)->name;
-                    fprintf_fn(stream, "%s", r1_name);
-                    break;
-                }
-                case '2': {
-                    uint8_t r2;
-                    const char *r2_name;
-                    r2 = (op >> 11) & 0x1f;
-                    r2_name = find_reg_info(r2)->name;
-                    fprintf_fn(stream, "%s", r2_name);
-                    break;
-                }
-                case 'c': {
-                    uint8_t csr;
-                    const Lm32CsrInfo *info;
-                    csr = (op >> 21) & 0x1f;
-                    info = find_csr_info(csr);
-                    if (info) {
-                        fprintf_fn(stream, "%s", info->name);
-                    } else {
-                        fprintf_fn(stream, "0x%x", csr);
-                    }
-                    break;
-                }
-                case 'u': {
-                    uint16_t u16;
-                    u16 = op & 0xffff;
-                    fprintf_fn(stream, "0x%x", u16);
-                    break;
-                }
-                case 's': {
-                    int16_t s16;
-                    s16 = (int16_t)(op & 0xffff);
-                    fprintf_fn(stream, "%d", s16);
-                    break;
-                }
-                case 'r': {
-                    uint32_t rela;
-                    rela = memaddr + (((int16_t)(op & 0xffff)) << 2);
-                    fprintf_fn(stream, "%x", rela);
-                    break;
-                }
-                case 'R': {
-                    uint32_t rela;
-                    int32_t imm26;
-                    imm26 = (int32_t)((op & 0x3ffffff) << 6) >> 4;
-                    rela = memaddr + imm26;
-                    fprintf_fn(stream, "%x", rela);
-                    break;
-                }
-                case 'h': {
-                    uint8_t u5;
-                    u5 = (op & 0x1f);
-                    fprintf_fn(stream, "%d", u5);
-                    break;
-                }
-                default:
-                    break;
-                }
-            } else {
-                fprintf_fn(stream, "%c", *args_fmt);
-            }
-            args_fmt++;
-        }
-    } else {
-        fprintf_fn(stream, ".word 0x%x", op);
-    }
-
-    return 4;
-}
diff --git a/disas/meson.build b/disas/meson.build
index 4c8da018778..449f99e1de6 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -9,11 +9,9 @@ common_ss.add(when: 'CONFIG_CRIS_DIS', if_true: files('cris.c'))
 common_ss.add(when: 'CONFIG_HEXAGON_DIS', if_true: files('hexagon.c'))
 common_ss.add(when: 'CONFIG_HPPA_DIS', if_true: files('hppa.c'))
 common_ss.add(when: 'CONFIG_I386_DIS', if_true: files('i386.c'))
-common_ss.add(when: 'CONFIG_LM32_DIS', if_true: files('lm32.c'))
 common_ss.add(when: 'CONFIG_M68K_DIS', if_true: files('m68k.c'))
 common_ss.add(when: 'CONFIG_MICROBLAZE_DIS', if_true: files('microblaze.c'))
 common_ss.add(when: 'CONFIG_MIPS_DIS', if_true: files('mips.c'))
-common_ss.add(when: 'CONFIG_MOXIE_DIS', if_true: files('moxie.c'))
 common_ss.add(when: 'CONFIG_NANOMIPS_DIS', if_true: files('nanomips.cpp'))
 common_ss.add(when: 'CONFIG_NIOS2_DIS', if_true: files('nios2.c'))
 common_ss.add(when: 'CONFIG_PPC_DIS', if_true: files('ppc.c'))
diff --git a/disas/moxie.c b/disas/moxie.c
deleted file mode 100644
index e94ab4c33d8..00000000000
--- a/disas/moxie.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/* Disassemble moxie instructions.
-   Copyright (c) 2009  Free Software Foundation, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, see <http://www.gnu.org/licenses/>. */
-
-#include "qemu/osdep.h"
-#define STATIC_TABLE
-#define DEFINE_TABLE
-
-#include "disas/dis-asm.h"
-
-static void *stream;
-
-/* Form 1 instructions come in different flavors:
-
-   Some have no arguments                          (MOXIE_F1_NARG)
-   Some only use the A operand                     (MOXIE_F1_A)
-   Some use A and B registers                      (MOXIE_F1_AB)
-   Some use A and consume a 4 byte immediate value (MOXIE_F1_A4)
-   Some use just a 4 byte immediate value          (MOXIE_F1_4)
-   Some use just a 4 byte memory address           (MOXIE_F1_M)
-   Some use B and an indirect A                    (MOXIE_F1_AiB)
-   Some use A and an indirect B                    (MOXIE_F1_ABi)
-   Some consume a 4 byte immediate value and use X (MOXIE_F1_4A)
-   Some use B and an indirect A plus 4 bytes       (MOXIE_F1_AiB4)
-   Some use A and an indirect B plus 4 bytes       (MOXIE_F1_ABi4)
-
-   Form 2 instructions also come in different flavors:
-
-   Some have no arguments                          (MOXIE_F2_NARG)
-   Some use the A register and an 8-bit value      (MOXIE_F2_A8V)
-
-   Form 3 instructions also come in different flavors:
-
-   Some have no arguments                          (MOXIE_F3_NARG)
-   Some have a 10-bit PC relative operand          (MOXIE_F3_PCREL).  */
-
-#define MOXIE_F1_NARG 0x100
-#define MOXIE_F1_A    0x101
-#define MOXIE_F1_AB   0x102
-/* #define MOXIE_F1_ABC  0x103 */
-#define MOXIE_F1_A4   0x104
-#define MOXIE_F1_4    0x105
-#define MOXIE_F1_AiB  0x106
-#define MOXIE_F1_ABi  0x107
-#define MOXIE_F1_4A   0x108
-#define MOXIE_F1_AiB4 0x109
-#define MOXIE_F1_ABi4 0x10a
-#define MOXIE_F1_M    0x10b
-
-#define MOXIE_F2_NARG 0x200
-#define MOXIE_F2_A8V  0x201
-
-#define MOXIE_F3_NARG  0x300
-#define MOXIE_F3_PCREL 0x301
-
-typedef struct moxie_opc_info_t {
-    short         opcode;
-    unsigned      itype;
-    const char *  name;
-} moxie_opc_info_t;
-
-extern const moxie_opc_info_t moxie_form1_opc_info[64];
-extern const moxie_opc_info_t moxie_form2_opc_info[4];
-extern const moxie_opc_info_t moxie_form3_opc_info[16];
-
-/* The moxie processor's 16-bit instructions come in two forms:
-
-   FORM 1 instructions start with a 0 bit...
-
-   0oooooooaaaabbbb
-   0              F
-
-   ooooooo - form 1 opcode number
-   aaaa    - operand A
-   bbbb    - operand B
-
-   FORM 2 instructions start with bits "10"...
-
-   10ooaaaavvvvvvvv
-   0              F
-
-   oo       - form 2 opcode number
-   aaaa     - operand A
-   vvvvvvvv - 8-bit immediate value
-
-   FORM 3 instructions start with a bits "11"...
-
-   11oooovvvvvvvvvv
-   0              F
-
-   oooo         - form 3 opcode number
-   vvvvvvvvvv   - 10-bit immediate value.  */
-
-const moxie_opc_info_t moxie_form1_opc_info[64] =
-    {
-        { 0x00, MOXIE_F1_NARG, "nop" },
-        { 0x01, MOXIE_F1_A4,   "ldi.l" },
-        { 0x02, MOXIE_F1_AB,   "mov" },
-        { 0x03, MOXIE_F1_M,    "jsra" },
-        { 0x04, MOXIE_F1_NARG, "ret" },
-        { 0x05, MOXIE_F1_AB,   "add.l" },
-        { 0x06, MOXIE_F1_AB,   "push" },
-        { 0x07, MOXIE_F1_AB,   "pop" },
-        { 0x08, MOXIE_F1_A4,   "lda.l" },
-        { 0x09, MOXIE_F1_4A,   "sta.l" },
-        { 0x0a, MOXIE_F1_ABi,  "ld.l" },
-        { 0x0b, MOXIE_F1_AiB,  "st.l" },
-        { 0x0c, MOXIE_F1_ABi4, "ldo.l" },
-        { 0x0d, MOXIE_F1_AiB4, "sto.l" },
-        { 0x0e, MOXIE_F1_AB,   "cmp" },
-        { 0x0f, MOXIE_F1_NARG, "bad" },
-        { 0x10, MOXIE_F1_NARG, "bad" },
-        { 0x11, MOXIE_F1_NARG, "bad" },
-        { 0x12, MOXIE_F1_NARG, "bad" },
-        { 0x13, MOXIE_F1_NARG, "bad" },
-        { 0x14, MOXIE_F1_NARG, "bad" },
-        { 0x15, MOXIE_F1_NARG, "bad" },
-        { 0x16, MOXIE_F1_NARG, "bad" },
-        { 0x17, MOXIE_F1_NARG, "bad" },
-        { 0x18, MOXIE_F1_NARG, "bad" },
-        { 0x19, MOXIE_F1_A,    "jsr" },
-        { 0x1a, MOXIE_F1_M,    "jmpa" },
-        { 0x1b, MOXIE_F1_A4,   "ldi.b" },
-        { 0x1c, MOXIE_F1_ABi,  "ld.b" },
-        { 0x1d, MOXIE_F1_A4,   "lda.b" },
-        { 0x1e, MOXIE_F1_AiB,  "st.b" },
-        { 0x1f, MOXIE_F1_4A,   "sta.b" },
-        { 0x20, MOXIE_F1_A4,   "ldi.s" },
-        { 0x21, MOXIE_F1_ABi,  "ld.s" },
-        { 0x22, MOXIE_F1_A4,   "lda.s" },
-        { 0x23, MOXIE_F1_AiB,  "st.s" },
-        { 0x24, MOXIE_F1_4A,   "sta.s" },
-        { 0x25, MOXIE_F1_A,    "jmp" },
-        { 0x26, MOXIE_F1_AB,   "and" },
-        { 0x27, MOXIE_F1_AB,   "lshr" },
-        { 0x28, MOXIE_F1_AB,   "ashl" },
-        { 0x29, MOXIE_F1_AB,   "sub.l" },
-        { 0x2a, MOXIE_F1_AB,   "neg" },
-        { 0x2b, MOXIE_F1_AB,   "or" },
-        { 0x2c, MOXIE_F1_AB,   "not" },
-        { 0x2d, MOXIE_F1_AB,   "ashr" },
-        { 0x2e, MOXIE_F1_AB,   "xor" },
-        { 0x2f, MOXIE_F1_AB,   "mul.l" },
-        { 0x30, MOXIE_F1_4,    "swi" },
-        { 0x31, MOXIE_F1_AB,   "div.l" },
-        { 0x32, MOXIE_F1_AB,   "udiv.l" },
-        { 0x33, MOXIE_F1_AB,   "mod.l" },
-        { 0x34, MOXIE_F1_AB,   "umod.l" },
-        { 0x35, MOXIE_F1_NARG, "brk" },
-        { 0x36, MOXIE_F1_ABi4, "ldo.b" },
-        { 0x37, MOXIE_F1_AiB4, "sto.b" },
-        { 0x38, MOXIE_F1_ABi4, "ldo.s" },
-        { 0x39, MOXIE_F1_AiB4, "sto.s" },
-        { 0x3a, MOXIE_F1_NARG, "bad" },
-        { 0x3b, MOXIE_F1_NARG, "bad" },
-        { 0x3c, MOXIE_F1_NARG, "bad" },
-        { 0x3d, MOXIE_F1_NARG, "bad" },
-        { 0x3e, MOXIE_F1_NARG, "bad" },
-        { 0x3f, MOXIE_F1_NARG, "bad" }
-    };
-
-const moxie_opc_info_t moxie_form2_opc_info[4] =
-    {
-        { 0x00, MOXIE_F2_A8V,  "inc" },
-        { 0x01, MOXIE_F2_A8V,  "dec" },
-        { 0x02, MOXIE_F2_A8V,  "gsr" },
-        { 0x03, MOXIE_F2_A8V,  "ssr" }
-    };
-
-const moxie_opc_info_t moxie_form3_opc_info[16] =
-    {
-        { 0x00, MOXIE_F3_PCREL,"beq" },
-        { 0x01, MOXIE_F3_PCREL,"bne" },
-        { 0x02, MOXIE_F3_PCREL,"blt" },
-        { 0x03, MOXIE_F3_PCREL,"bgt" },
-        { 0x04, MOXIE_F3_PCREL,"bltu" },
-        { 0x05, MOXIE_F3_PCREL,"bgtu" },
-        { 0x06, MOXIE_F3_PCREL,"bge" },
-        { 0x07, MOXIE_F3_PCREL,"ble" },
-        { 0x08, MOXIE_F3_PCREL,"bgeu" },
-        { 0x09, MOXIE_F3_PCREL,"bleu" },
-        { 0x0a, MOXIE_F3_NARG, "bad" },
-        { 0x0b, MOXIE_F3_NARG, "bad" },
-        { 0x0c, MOXIE_F3_NARG, "bad" },
-        { 0x0d, MOXIE_F3_NARG, "bad" },
-        { 0x0e, MOXIE_F3_NARG, "bad" },
-        { 0x0f, MOXIE_F3_NARG, "bad" }
-    };
-
-/* Macros to extract operands from the instruction word.  */
-#define OP_A(i) ((i >> 4) & 0xf)
-#define OP_B(i) (i & 0xf)
-#define INST2OFFSET(o) ((((signed short)((o & ((1<<10)-1))<<6))>>6)<<1)
-
-static const char * reg_names[16] =
-    { "$fp", "$sp", "$r0", "$r1", "$r2", "$r3", "$r4", "$r5",
-      "$r6", "$r7", "$r8", "$r9", "$r10", "$r11", "$r12", "$r13" };
-
-int
-print_insn_moxie(bfd_vma addr, struct disassemble_info * info)
-{
-    int length = 2;
-    int status;
-    stream = info->stream;
-    const moxie_opc_info_t * opcode;
-    bfd_byte buffer[4];
-    unsigned short iword;
-    fprintf_function fpr = info->fprintf_func;
-
-    if ((status = info->read_memory_func(addr, buffer, 2, info)))
-        goto fail;
-    iword = (bfd_getb16(buffer) >> 16);
-
-    /* Form 1 instructions have the high bit set to 0.  */
-    if ((iword & (1<<15)) == 0) {
-        /* Extract the Form 1 opcode.  */
-        opcode = &moxie_form1_opc_info[iword >> 8];
-        switch (opcode->itype) {
-        case MOXIE_F1_NARG:
-            fpr(stream, "%s", opcode->name);
-            break;
-        case MOXIE_F1_A:
-            fpr(stream, "%s\t%s", opcode->name,
-                reg_names[OP_A(iword)]);
-            break;
-        case MOXIE_F1_AB:
-            fpr(stream, "%s\t%s, %s", opcode->name,
-                reg_names[OP_A(iword)],
-                reg_names[OP_B(iword)]);
-            break;
-        case MOXIE_F1_A4:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr + 2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t%s, 0x%x", opcode->name,
-                    reg_names[OP_A(iword)], imm);
-                length = 6;
-            }
-            break;
-        case MOXIE_F1_4:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr + 2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t0x%x", opcode->name, imm);
-                length = 6;
-            }
-            break;
-        case MOXIE_F1_M:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr + 2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t", opcode->name);
-                info->print_address_func((bfd_vma) imm, info);
-                length = 6;
-            }
-            break;
-        case MOXIE_F1_AiB:
-            fpr (stream, "%s\t(%s), %s", opcode->name,
-                 reg_names[OP_A(iword)], reg_names[OP_B(iword)]);
-            break;
-        case MOXIE_F1_ABi:
-            fpr(stream, "%s\t%s, (%s)", opcode->name,
-                reg_names[OP_A(iword)], reg_names[OP_B(iword)]);
-            break;
-        case MOXIE_F1_4A:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr + 2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t0x%x, %s",
-                    opcode->name, imm, reg_names[OP_A(iword)]);
-                length = 6;
-            }
-            break;
-        case MOXIE_F1_AiB4:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr+2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t0x%x(%s), %s", opcode->name,
-                    imm,
-                    reg_names[OP_A(iword)],
-                    reg_names[OP_B(iword)]);
-                length = 6;
-            }
-            break;
-        case MOXIE_F1_ABi4:
-            {
-                unsigned imm;
-                if ((status = info->read_memory_func(addr+2, buffer, 4, info)))
-                    goto fail;
-                imm = bfd_getb32(buffer);
-                fpr(stream, "%s\t%s, 0x%x(%s)",
-                    opcode->name,
-                    reg_names[OP_A(iword)],
-                    imm,
-                    reg_names[OP_B(iword)]);
-                length = 6;
-            }
-            break;
-        default:
-            abort();
-        }
-    }
-    else if ((iword & (1<<14)) == 0) {
-        /* Extract the Form 2 opcode.  */
-        opcode = &moxie_form2_opc_info[(iword >> 12) & 3];
-        switch (opcode->itype) {
-        case MOXIE_F2_A8V:
-            fpr(stream, "%s\t%s, 0x%x",
-                opcode->name,
-                reg_names[(iword >> 8) & 0xf],
-                iword & ((1 << 8) - 1));
-            break;
-        case MOXIE_F2_NARG:
-            fpr(stream, "%s", opcode->name);
-            break;
-        default:
-            abort();
-        }
-    } else {
-        /* Extract the Form 3 opcode.  */
-        opcode = &moxie_form3_opc_info[(iword >> 10) & 15];
-        switch (opcode->itype) {
-        case MOXIE_F3_PCREL:
-            fpr(stream, "%s\t", opcode->name);
-            info->print_address_func((bfd_vma) (addr + INST2OFFSET(iword) + 2),
-                                     info);
-            break;
-        default:
-            abort();
-        }
-    }
-
-    return length;
-
- fail:
-    info->memory_error_func(status, addr, info);
-    return -1;
-}
diff --git a/disas/nanomips.cpp b/disas/nanomips.cpp
index 8ddef897f0d..9be8df75dd6 100644
--- a/disas/nanomips.cpp
+++ b/disas/nanomips.cpp
@@ -28,9 +28,7 @@
  */
 
 #include "qemu/osdep.h"
-extern "C" {
 #include "disas/dis-asm.h"
-}
 
 #include <cstring>
 #include <stdexcept>
diff --git a/disas/nios2.c b/disas/nios2.c
index c3e82140c79..98ac07d72e9 100644
--- a/disas/nios2.c
+++ b/disas/nios2.c
@@ -3478,54 +3478,37 @@ nios2_disassemble (bfd_vma address, unsigned long opcode,
    instruction word at the address given, and prints the disassembled
    instruction on the stream info->stream using info->fprintf_func. */
 
-static int
-print_insn_nios2 (bfd_vma address, disassemble_info *info,
-		  enum bfd_endian endianness)
+int print_insn_nios2(bfd_vma address, disassemble_info *info)
 {
-  bfd_byte buffer[INSNLEN];
-  int status;
-
-  status = (*info->read_memory_func) (address, buffer, INSNLEN, info);
-  if (status == 0)
-    {
-      unsigned long insn;
-      if (endianness == BFD_ENDIAN_BIG)
-	insn = (unsigned long) bfd_getb32 (buffer);
-      else
-	insn = (unsigned long) bfd_getl32 (buffer);
-      return nios2_disassemble (address, insn, info);
+    bfd_byte buffer[INSNLEN];
+    int status;
+
+    status = (*info->read_memory_func)(address, buffer, INSNLEN, info);
+    if (status == 0) {
+        unsigned long insn;
+        if (info->endian == BFD_ENDIAN_BIG) {
+            insn = (unsigned long) bfd_getb32(buffer);
+        } else {
+            insn = (unsigned long) bfd_getl32(buffer);
+        }
+        return nios2_disassemble(address, insn, info);
     }
 
-  /* We might have a 16-bit R2 instruction at the end of memory.  Try that.  */
-  if (info->mach == bfd_mach_nios2r2)
-    {
-      status = (*info->read_memory_func) (address, buffer, 2, info);
-      if (status == 0)
-	{
-	  unsigned long insn;
-	  if (endianness == BFD_ENDIAN_BIG)
-	    insn = (unsigned long) bfd_getb16 (buffer);
-	  else
-	    insn = (unsigned long) bfd_getl16 (buffer);
-	  return nios2_disassemble (address, insn, info);
-	}
+    /* We might have a 16-bit R2 instruction at the end of memory. Try that. */
+    if (info->mach == bfd_mach_nios2r2) {
+        status = (*info->read_memory_func)(address, buffer, 2, info);
+        if (status == 0) {
+            unsigned long insn;
+            if (info->endian == BFD_ENDIAN_BIG) {
+                insn = (unsigned long) bfd_getb16(buffer);
+            } else {
+                insn = (unsigned long) bfd_getl16(buffer);
+            }
+            return nios2_disassemble(address, insn, info);
+        }
     }
 
-  /* If we got here, we couldn't read anything.  */
-  (*info->memory_error_func) (status, address, info);
-  return -1;
-}
-
-/* These two functions are the main entry points, accessed from
-   disassemble.c.  */
-int
-print_insn_big_nios2 (bfd_vma address, disassemble_info *info)
-{
-  return print_insn_nios2 (address, info, BFD_ENDIAN_BIG);
-}
-
-int
-print_insn_little_nios2 (bfd_vma address, disassemble_info *info)
-{
-  return print_insn_nios2 (address, info, BFD_ENDIAN_LITTLE);
+    /* If we got here, we couldn't read anything.  */
+    (*info->memory_error_func)(status, address, info);
+    return -1;
 }
diff --git a/disas/riscv.c b/disas/riscv.c
index bccc2c1aac3..ba73dbaf70f 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -478,8 +478,51 @@ typedef enum {
     rv_op_fsflags = 316,
     rv_op_fsrmi = 317,
     rv_op_fsflagsi = 318,
+    rv_op_bseti = 319,
+    rv_op_bclri = 320,
+    rv_op_binvi = 321,
+    rv_op_bexti = 322,
+    rv_op_rori = 323,
+    rv_op_clz = 324,
+    rv_op_ctz = 325,
+    rv_op_cpop = 326,
+    rv_op_sext_h = 327,
+    rv_op_sext_b = 328,
+    rv_op_xnor = 329,
+    rv_op_orn = 330,
+    rv_op_andn = 331,
+    rv_op_rol = 332,
+    rv_op_ror = 333,
+    rv_op_sh1add = 334,
+    rv_op_sh2add = 335,
+    rv_op_sh3add = 336,
+    rv_op_sh1add_uw = 337,
+    rv_op_sh2add_uw = 338,
+    rv_op_sh3add_uw = 339,
+    rv_op_clmul = 340,
+    rv_op_clmulr = 341,
+    rv_op_clmulh = 342,
+    rv_op_min = 343,
+    rv_op_minu = 344,
+    rv_op_max = 345,
+    rv_op_maxu = 346,
+    rv_op_clzw = 347,
+    rv_op_ctzw = 348,
+    rv_op_cpopw = 349,
+    rv_op_slli_uw = 350,
+    rv_op_add_uw = 351,
+    rv_op_rolw = 352,
+    rv_op_rorw = 353,
+    rv_op_rev8 = 354,
+    rv_op_zext_h = 355,
+    rv_op_roriw = 356,
+    rv_op_orc_b = 357,
+    rv_op_bset = 358,
+    rv_op_bclr = 359,
+    rv_op_binv = 360,
+    rv_op_bext = 361,
     // CHERI:
-    rv_op_auipcc = 319,
+    rv_op_auipcc = 362,
     rv_op_lc,
     rv_op_clc,
     rv_op_clb,
@@ -545,7 +588,6 @@ typedef enum {
     rv_op_cfsw,
     rv_op_cfld,
     rv_op_cfsd,
-
 } rv_op;
 
 /* structures */
@@ -1209,6 +1251,49 @@ const rv_opcode_data opcode_data[] = {
     { "fsflags", rv_codec_i_csr, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
     { "fsrmi", rv_codec_i_csr, rv_fmt_rd_zimm, NULL, 0, 0, 0 },
     { "fsflagsi", rv_codec_i_csr, rv_fmt_rd_zimm, NULL, 0, 0, 0 },
+    { "bseti", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "bclri", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "binvi", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "bexti", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "rori", rv_codec_i_sh7, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "clz", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "ctz", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "cpop", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "sext.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "sext.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "xnor", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "orn", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "andn", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "rol", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "ror", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh1add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh2add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh3add", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh1add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh2add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "sh3add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "clmul", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "clmulr", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "clmulh", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "min", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "minu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "max", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "maxu", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "clzw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "cpopw", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "slli.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "add.uw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "rolw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "rorw", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "rev8", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "zext.h", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "roriw", rv_codec_i_sh5, rv_fmt_rd_rs1_imm, NULL, 0, 0, 0 },
+    { "orc.b", rv_codec_r, rv_fmt_rd_rs1, NULL, 0, 0, 0 },
+    { "bset", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "bclr", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "binv", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
+    { "bext", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 },
 
     // CHERI extensions
     [rv_op_auipcc] = { "auipcc", rv_codec_u, rv_fmt_cd_offset, NULL, 0, 0, 0 },
@@ -1746,7 +1831,20 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags)
             case 0: op = rv_op_addi; break;
             case 1:
                 switch (((inst >> 27) & 0b11111)) {
-                case 0: op = rv_op_slli; break;
+                case 0b00000: op = rv_op_slli; break;
+                case 0b00101: op = rv_op_bseti; break;
+                case 0b01001: op = rv_op_bclri; break;
+                case 0b01101: op = rv_op_binvi; break;
+                case 0b01100:
+                    switch (((inst >> 20) & 0b1111111)) {
+                    case 0b0000000: op = rv_op_clz; break;
+                    case 0b0000001: op = rv_op_ctz; break;
+                    case 0b0000010: op = rv_op_cpop; break;
+                      /* 0b0000011 */
+                    case 0b0000100: op = rv_op_sext_b; break;
+                    case 0b0000101: op = rv_op_sext_h; break;
+                    }
+                    break;
                 }
                 break;
             case 2: op = rv_op_slti; break;
@@ -1754,8 +1852,16 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags)
             case 4: op = rv_op_xori; break;
             case 5:
                 switch (((inst >> 27) & 0b11111)) {
-                case 0: op = rv_op_srli; break;
-                case 8: op = rv_op_srai; break;
+                case 0b00000: op = rv_op_srli; break;
+                case 0b00101: op = rv_op_orc_b; break;
+                case 0b01000: op = rv_op_srai; break;
+                case 0b01001: op = rv_op_bexti; break;
+                case 0b01100: op = rv_op_rori; break;
+                case 0b01101:
+                    switch ((inst >> 20) & 0b1111111) {
+                    case 0b0111000: op = rv_op_rev8; break;
+                    }
+                    break;
                 }
                 break;
             case 6: op = rv_op_ori; break;
@@ -1771,12 +1877,21 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags)
             case 1:
                 switch (((inst >> 25) & 0b1111111)) {
                 case 0: op = rv_op_slliw; break;
+                case 4: op = rv_op_slli_uw; break;
+                case 48:
+                    switch ((inst >> 20) & 0b11111) {
+                    case 0b00000: op = rv_op_clzw; break;
+                    case 0b00001: op = rv_op_ctzw; break;
+                    case 0b00010: op = rv_op_cpopw; break;
+                    }
+                    break;
                 }
                 break;
             case 5:
                 switch (((inst >> 25) & 0b1111111)) {
                 case 0: op = rv_op_srliw; break;
                 case 32: op = rv_op_sraiw; break;
+                case 48: op = rv_op_roriw; break;
                 }
                 break;
             }
@@ -1876,8 +1991,32 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags)
             case 13: op = rv_op_divu; break;
             case 14: op = rv_op_rem; break;
             case 15: op = rv_op_remu; break;
+            case 36:
+                switch ((inst >> 20) & 0b11111) {
+                case 0: op = rv_op_zext_h; break;
+                }
+                break;
+            case 41: op = rv_op_clmul; break;
+            case 42: op = rv_op_clmulr; break;
+            case 43: op = rv_op_clmulh; break;
+            case 44: op = rv_op_min; break;
+            case 45: op = rv_op_minu; break;
+            case 46: op = rv_op_max; break;
+            case 47: op = rv_op_maxu; break;
+            case 130: op = rv_op_sh1add; break;
+            case 132: op = rv_op_sh2add; break;
+            case 134: op = rv_op_sh3add; break;
+            case 161: op = rv_op_bset; break;
             case 256: op = rv_op_sub; break;
+            case 260: op = rv_op_xnor; break;
             case 261: op = rv_op_sra; break;
+            case 262: op = rv_op_orn; break;
+            case 263: op = rv_op_andn; break;
+            case 289: op = rv_op_bclr; break;
+            case 293: op = rv_op_bext; break;
+            case 385: op = rv_op_rol; break;
+            case 386: op = rv_op_ror; break;
+            case 417: op = rv_op_binv; break;
             }
             break;
         case 13: op = rv_op_lui; break;
@@ -1891,8 +2030,19 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa, int flags)
             case 13: op = rv_op_divuw; break;
             case 14: op = rv_op_remw; break;
             case 15: op = rv_op_remuw; break;
+            case 32: op = rv_op_add_uw; break;
+            case 36:
+                switch ((inst >> 20) & 0b11111) {
+                case 0: op = rv_op_zext_h; break;
+                }
+                break;
+            case 130: op = rv_op_sh1add_uw; break;
+            case 132: op = rv_op_sh2add_uw; break;
+            case 134: op = rv_op_sh3add_uw; break;
             case 256: op = rv_op_subw; break;
             case 261: op = rv_op_sraw; break;
+            case 385: op = rv_op_rolw; break;
+            case 389: op = rv_op_rorw; break;
             }
             break;
         case 16:
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
index 8d6d53a5a2c..8ec653f81cf 100644
--- a/docs/COLO-FT.txt
+++ b/docs/COLO-FT.txt
@@ -209,9 +209,9 @@ children.0=childs0 \
 
 
 3. On Secondary VM's QEMU monitor, issue command
-{'execute':'qmp_capabilities'}
-{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } }
-{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } }
+{"execute":"qmp_capabilities"}
+{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } }
+{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } }
 
 Note:
   a. The qmp command nbd-server-start and nbd-server-add must be run
@@ -222,11 +222,11 @@ Note:
      will be merged into the parent disk on failover.
 
 4. On Primary VM's QEMU monitor, issue command:
-{'execute':'qmp_capabilities'}
-{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
-{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } }
-{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
-{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } }
+{"execute":"qmp_capabilities"}
+{"execute": "human-monitor-command", "arguments": {"command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}}
+{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "replication0" } }
+{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } }
+{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } }
 
   Note:
   a. There should be only one NBD Client for each primary disk.
@@ -249,59 +249,59 @@ if you want to resume the replication, follow "Secondary resume replication"
 == Primary Failover ==
 The Secondary died, resume on the Primary
 
-{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} }
-{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'm0' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } }
-{'execute': 'x-colo-lost-heartbeat' }
+{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "child": "children.1"} }
+{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_del replication0" } }
+{"execute": "object-del", "arguments":{ "id": "comp0" } }
+{"execute": "object-del", "arguments":{ "id": "iothread1" } }
+{"execute": "object-del", "arguments":{ "id": "m0" } }
+{"execute": "object-del", "arguments":{ "id": "redire0" } }
+{"execute": "object-del", "arguments":{ "id": "redire1" } }
+{"execute": "x-colo-lost-heartbeat" }
 
 == Secondary Failover ==
 The Primary died, resume on the Secondary and prepare to become the new Primary
 
-{'execute': 'nbd-server-stop'}
-{'execute': 'x-colo-lost-heartbeat'}
+{"execute": "nbd-server-stop"}
+{"execute": "x-colo-lost-heartbeat"}
 
-{'execute': 'object-del', 'arguments':{ 'id': 'f2' } }
-{'execute': 'object-del', 'arguments':{ 'id': 'f1' } }
-{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } }
-{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } }
+{"execute": "object-del", "arguments":{ "id": "f2" } }
+{"execute": "object-del", "arguments":{ "id": "f1" } }
+{"execute": "chardev-remove", "arguments":{ "id": "red1" } }
+{"execute": "chardev-remove", "arguments":{ "id": "red0" } }
 
-{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } }
-{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } }
-{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } }
-{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } }
-{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } }
-{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } }
+{"execute": "chardev-add", "arguments":{ "id": "mirror0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9003" } }, "server": true } } } }
+{"execute": "chardev-add", "arguments":{ "id": "compare1", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9004" } }, "server": true } } } }
+{"execute": "chardev-add", "arguments":{ "id": "compare0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": true } } } }
+{"execute": "chardev-add", "arguments":{ "id": "compare0-0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": false } } } }
+{"execute": "chardev-add", "arguments":{ "id": "compare_out", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": true } } } }
+{"execute": "chardev-add", "arguments":{ "id": "compare_out0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": false } } } }
 
 == Primary resume replication ==
 Resume replication after new Secondary is up.
 
 Start the new Secondary (Steps 2 and 3 above), then on the Primary:
-{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
+{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.2:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} }
 
 Wait until disk is synced, then:
-{'execute': 'stop'}
-{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} }
+{"execute": "stop"}
+{"execute": "block-job-cancel", "arguments":{ "device": "resync"} }
 
-{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}}
-{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
+{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}}
+{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } }
 
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } }
+{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } }
+{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } }
 
-{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
-{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } }
+{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } }
+{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.2:9998" } }
 
 Note:
 If this Primary previously was a Secondary, then we need to insert the
 filters before the filter-rewriter by using the
-"'insert': 'before', 'position': 'id=rew0'" Options. See below.
+""insert": "before", "position": "id=rew0"" Options. See below.
 
 == Secondary resume replication ==
 Become Primary and resume replication after new Secondary is up. Note
@@ -309,23 +309,23 @@ that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary.
 
 Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2),
 then on the old Secondary:
-{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} }
+{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.1:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} }
 
 Wait until disk is synced, then:
-{'execute': 'stop'}
-{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } }
+{"execute": "stop"}
+{"execute": "block-job-cancel", "arguments":{ "device": "resync" } }
 
-{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}}
-{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } }
+{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0"}}
+{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } }
 
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } }
-{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } }
+{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } }
+{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } }
+{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } }
 
-{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
-{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } }
+{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } }
+{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.1:9998" } }
 
 == TODO ==
 1. Support shared storage.
diff --git a/docs/_templates/editpage.html b/docs/_templates/editpage.html
deleted file mode 100644
index 4319b0f5ac8..00000000000
--- a/docs/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/_templates/footer.html b/docs/_templates/footer.html
new file mode 100644
index 00000000000..977053b5415
--- /dev/null
+++ b/docs/_templates/footer.html
@@ -0,0 +1,14 @@
+{% extends "!footer.html" %}
+{% block extrafooter %}
+
+<!-- Empty para to force a blank line after "Built with Sphinx ..." -->
+<p></p>
+
+<p>This documentation is for QEMU version {{ version }}.</p>
+
+{% trans path=pathto('about/license') %}
+<p><a href="{{ path }}">QEMU and this manual are released under the
+GNU General Public License, version 2.</a></p>
+{% endtrans %}
+{{ super() }}
+{% endblock %}
diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst
new file mode 100644
index 00000000000..c29a4b8fe64
--- /dev/null
+++ b/docs/about/build-platforms.rst
@@ -0,0 +1,97 @@
+.. _Supported-build-platforms:
+
+Supported build platforms
+=========================
+
+QEMU aims to support building and executing on multiple host OS
+platforms. This appendix outlines which platforms are the major build
+targets. These platforms are used as the basis for deciding upon the
+minimum required versions of 3rd party software QEMU depends on. The
+supported platforms are the targets for automated testing performed by
+the project when patches are submitted for review, and tested before and
+after merge.
+
+If a platform is not listed here, it does not imply that QEMU won't
+work. If an unlisted platform has comparable software versions to a
+listed platform, there is every expectation that it will work. Bug
+reports are welcome for problems encountered on unlisted platforms
+unless they are clearly older vintage than what is described here.
+
+Note that when considering software versions shipped in distros as
+support targets, QEMU considers only the version number, and assumes the
+features in that distro match the upstream release with the same
+version. In other words, if a distro backports extra features to the
+software in their distro, QEMU upstream code will not add explicit
+support for those backports, unless the feature is auto-detectable in a
+manner that works for the upstream releases too.
+
+The `Repology`_ site is a useful resource to identify
+currently shipped versions of software in various operating systems,
+though it does not cover all distros listed below.
+
+Supported host architectures
+----------------------------
+
+Those hosts are officially supported, with various accelerators:
+
+  .. list-table::
+   :header-rows: 1
+
+   * - CPU Architecture
+     - Accelerators
+   * - Arm
+     - kvm (64 bit only), tcg, xen
+   * - MIPS
+     - kvm, tcg
+   * - PPC
+     - kvm, tcg
+   * - RISC-V
+     - tcg
+   * - s390x
+     - kvm, tcg
+   * - SPARC
+     - tcg
+   * - x86
+     - hax, hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen
+
+Other host architectures are not supported. It is possible to build QEMU system
+emulation on an unsupported host architecture using the configure
+``--enable-tcg-interpreter`` option to enable the TCI support, but note that
+this is very slow and is not recommended for normal use. QEMU user emulation
+requires host-specific support for signal handling, therefore TCI won't help
+on unsupported host architectures.
+
+Non-supported architectures may be removed in the future following the
+:ref:`deprecation process<Deprecated features>`.
+
+Linux OS, macOS, FreeBSD, NetBSD, OpenBSD
+-----------------------------------------
+
+The project aims to support the most recent major version at all times. Support
+for the previous major version will be dropped 2 years after the new major
+version is released or when the vendor itself drops support, whichever comes
+first. In this context, third-party efforts to extend the lifetime of a distro
+are not considered, even when they are endorsed by the vendor (eg. Debian LTS).
+
+For the purposes of identifying supported software versions available on Linux,
+the project will look at CentOS, Debian, Fedora, openSUSE, RHEL, SLES and
+Ubuntu LTS. Other distros will be assumed to ship similar software versions.
+
+For FreeBSD and OpenBSD, decisions will be made based on the contents of the
+respective ports repository, while NetBSD will use the pkgsrc repository.
+
+For macOS, `HomeBrew`_ will be used, although `MacPorts`_ is expected to carry
+similar versions.
+
+Windows
+-------
+
+The project supports building with current versions of the MinGW toolchain,
+hosted on Linux (Debian/Fedora).
+
+The version of the Windows API that's currently targeted is Vista / Server
+2008.
+
+.. _HomeBrew: https://brew.sh/
+.. _MacPorts: https://www.macports.org/
+.. _Repology: https://repology.org/
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
new file mode 100644
index 00000000000..ff7488cb63b
--- /dev/null
+++ b/docs/about/deprecated.rst
@@ -0,0 +1,440 @@
+.. _Deprecated features:
+
+Deprecated features
+===================
+
+In general features are intended to be supported indefinitely once
+introduced into QEMU. In the event that a feature needs to be removed,
+it will be listed in this section. The feature will remain functional for the
+release in which it was deprecated and one further release. After these two
+releases, the feature is liable to be removed. Deprecated features may also
+generate warnings on the console when QEMU starts up, or if activated via a
+monitor command, however, this is not a mandatory requirement.
+
+Prior to the 2.10.0 release there was no official policy on how
+long features would be deprecated prior to their removal, nor
+any documented list of which features were deprecated. Thus
+any features deprecated prior to 2.10.0 will be treated as if
+they were first deprecated in the 2.10.0 release.
+
+What follows is a list of all features currently marked as
+deprecated.
+
+System emulator command line arguments
+--------------------------------------
+
+``QEMU_AUDIO_`` environment variables and ``-audio-help`` (since 4.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``-audiodev`` argument is now the preferred way to specify audio
+backend settings instead of environment variables.  To ease migration to
+the new format, the ``-audiodev-help`` option can be used to convert
+the current values of the environment variables to ``-audiodev`` options.
+
+Creating sound card devices and vnc without ``audiodev=`` property (since 4.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+When not using the deprecated legacy audio config, each sound card
+should specify an ``audiodev=`` property.  Additionally, when using
+vnc, you should specify an ``audiodev=`` property if you plan to
+transmit audio through the VNC protocol.
+
+Creating sound card devices using ``-soundhw`` (since 5.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Sound card devices should be created using ``-device`` instead.  The
+names are the same for most devices.  The exceptions are ``hda`` which
+needs two devices (``-device intel-hda -device hda-duplex``) and
+``pcspk`` which can be activated using ``-machine
+pcspk-audiodev=<name>``.
+
+``-chardev`` backend aliases ``tty`` and ``parport`` (since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+``tty`` and ``parport`` are aliases that will be removed. Instead, the
+actual backend names ``serial`` and ``parallel`` should be used.
+
+Short-form boolean options (since 6.0)
+''''''''''''''''''''''''''''''''''''''
+
+Boolean options such as ``share=on``/``share=off`` could be written
+in short form as ``share`` and ``noshare``.  This is now deprecated
+and will cause a warning.
+
+``delay`` option for socket character devices (since 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on``
+rather than ``delay=off``.
+
+``--enable-fips`` (since 6.0)
+'''''''''''''''''''''''''''''
+
+This option restricts usage of certain cryptographic algorithms when
+the host is operating in FIPS mode.
+
+If FIPS compliance is required, QEMU should be built with the ``libgcrypt``
+library enabled as a cryptography provider.
+
+Neither the ``nettle`` library, or the built-in cryptography provider are
+supported on FIPS enabled hosts.
+
+``-writeconfig`` (since 6.0)
+'''''''''''''''''''''''''''''
+
+The ``-writeconfig`` option is not able to serialize the entire contents
+of the QEMU command line.  It is thus considered a failed experiment
+and deprecated, with no current replacement.
+
+Userspace local APIC with KVM (x86, since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''
+
+Using ``-M kernel-irqchip=off`` with x86 machine types that include a local
+APIC is deprecated.  The ``split`` setting is supported, as is using
+``-M kernel-irqchip=off`` with the ISA PC machine type.
+
+hexadecimal sizes with scaling multipliers (since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Input parameters that take a size value should only use a size suffix
+(such as 'k' or 'M') when the base is written in decimal, and not when
+the value is hexadecimal.  That is, '0x20M' is deprecated, and should
+be written either as '32M' or as '0x2000000'.
+
+``-spice password=string`` (since 6.0)
+''''''''''''''''''''''''''''''''''''''
+
+This option is insecure because the SPICE password remains visible in
+the process listing. This is replaced by the new ``password-secret``
+option which lets the password be securely provided on the command
+line using a ``secret`` object instance.
+
+``opened`` property of ``rng-*`` objects (since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The only effect of specifying ``opened=on`` in the command line or QMP
+``object-add`` is that the device is opened immediately, possibly before all
+other options have been processed.  This will either have no effect (if
+``opened`` was the last option) or cause errors.  The property is therefore
+useless and should not be specified.
+
+``loaded`` property of ``secret`` and ``secret_keyring`` objects (since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The only effect of specifying ``loaded=on`` in the command line or QMP
+``object-add`` is that the secret is loaded immediately, possibly before all
+other options have been processed.  This will either have no effect (if
+``loaded`` was the last option) or cause options to be effectively ignored as
+if they were not given.  The property is therefore useless and should not be
+specified.
+
+``-display sdl,window_close=...`` (since 6.1)
+'''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-display sdl,window-close=...`` instead (i.e. with a minus instead of
+an underscore between "window" and "close").
+
+``-no-quit`` (since 6.1)
+''''''''''''''''''''''''
+
+The ``-no-quit`` is a synonym for ``-display ...,window-close=off`` which
+should be used instead.
+
+``-alt-grab`` and ``-display sdl,alt_grab=on`` (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-display sdl,grab-mod=lshift-lctrl-lalt`` instead.
+
+``-ctrl-grab`` and ``-display sdl,ctrl_grab=on`` (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-display sdl,grab-mod=rctrl`` instead.
+
+``-sdl`` (since 6.2)
+''''''''''''''''''''
+
+Use ``-display sdl`` instead.
+
+``-curses`` (since 6.2)
+'''''''''''''''''''''''
+
+Use ``-display curses`` instead.
+
+``-watchdog`` (since 6.2)
+'''''''''''''''''''''''''
+
+Use ``-device`` instead.
+
+``-smp`` ("parameter=0" SMP configurations) (since 6.2)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Specified CPU topology parameters must be greater than zero.
+
+In the SMP configuration, users should either provide a CPU topology
+parameter with a reasonable value (greater than zero) or just omit it
+and QEMU will compute the missing value.
+
+However, historically it was implicitly allowed for users to provide
+a parameter with zero value, which is meaningless and could also possibly
+cause unexpected results in the -smp parsing. So support for this kind of
+configurations (e.g. -smp 8,sockets=0) is deprecated since 6.2 and will
+be removed in the near future, users have to ensure that all the topology
+members described with -smp are greater than zero.
+
+Plugin argument passing through ``arg=<string>`` (since 6.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Passing TCG plugins arguments through ``arg=`` is redundant is makes the
+command-line less readable, especially when the argument itself consist of a
+name and a value, e.g. ``-plugin plugin_name,arg="arg_name=arg_value"``.
+Therefore, the usage of ``arg`` is redundant. Single-word arguments are treated
+as short-form boolean values, and passed to plugins as ``arg_name=on``.
+However, short-form booleans are deprecated and full explicit ``arg_name=on``
+form is preferred.
+
+``-drive if=none`` for the sifive_u OTP device (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Using ``-drive if=none`` to configure the OTP device of the sifive_u
+RISC-V machine is deprecated. Use ``-drive if=pflash`` instead.
+
+
+QEMU Machine Protocol (QMP) commands
+------------------------------------
+
+``blockdev-open-tray``, ``blockdev-close-tray`` argument ``device`` (since 2.8)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use argument ``id`` instead.
+
+``eject`` argument ``device`` (since 2.8)
+'''''''''''''''''''''''''''''''''''''''''
+
+Use argument ``id`` instead.
+
+``blockdev-change-medium`` argument ``device`` (since 2.8)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use argument ``id`` instead.
+
+``block_set_io_throttle`` argument ``device`` (since 2.8)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use argument ``id`` instead.
+
+``blockdev-add`` empty string argument ``backing`` (since 2.10)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use argument value ``null`` instead.
+
+``block-commit`` arguments ``base`` and ``top`` (since 3.1)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use arguments ``base-node`` and ``top-node`` instead.
+
+``nbd-server-add`` and ``nbd-server-remove`` (since 5.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use the more generic commands ``block-export-add`` and ``block-export-del``
+instead.  As part of this deprecation, where ``nbd-server-add`` used a
+single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``.
+
+``query-qmp-schema`` return value member ``values`` (since 6.2)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Member ``values`` in return value elements with meta-type ``enum`` is
+deprecated.  Use ``members`` instead.
+
+``drive-backup`` (since 6.2)
+''''''''''''''''''''''''''''
+
+Use ``blockdev-backup`` in combination with ``blockdev-add`` instead.
+This change primarily separates the creation/opening process of the backup
+target with explicit, separate steps. ``blockdev-backup`` uses mostly the
+same arguments as ``drive-backup``, except the ``format`` and ``mode``
+options are removed in favor of using explicit ``blockdev-create`` and
+``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for
+details.
+
+Incorrectly typed ``device_add`` arguments (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Due to shortcomings in the internal implementation of ``device_add``, QEMU
+incorrectly accepts certain invalid arguments: Any object or list arguments are
+silently ignored. Other argument types are not checked, but an implicit
+conversion happens, so that e.g. string values can be assigned to integer
+device properties or vice versa.
+
+This is a bug in QEMU that will be fixed in the future so that previously
+accepted incorrect commands will return an error. Users should make sure that
+all arguments passed to ``device_add`` are consistent with the documented
+property types.
+
+System accelerators
+-------------------
+
+MIPS ``Trap-and-Emul`` KVM support (since 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''
+
+The MIPS ``Trap-and-Emul`` KVM host and guest support has been removed
+from Linux upstream kernel, declare it deprecated.
+
+System emulator CPUS
+--------------------
+
+``Icelake-Client`` CPU Model (since 5.2)
+''''''''''''''''''''''''''''''''''''''''
+
+``Icelake-Client`` CPU Models are deprecated. Use ``Icelake-Server`` CPU
+Models instead.
+
+MIPS ``I7200`` CPU Model (since 5.2)
+''''''''''''''''''''''''''''''''''''
+
+The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated
+(the ISA has never been upstreamed to a compiler toolchain). Therefore
+this CPU is also deprecated.
+
+
+QEMU API (QAPI) events
+----------------------
+
+``MEM_UNPLUG_ERROR`` (since 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use the more generic event ``DEVICE_UNPLUG_GUEST_ERROR`` instead.
+
+
+System emulator machines
+------------------------
+
+Aspeed ``swift-bmc`` machine (since 6.1)
+''''''''''''''''''''''''''''''''''''''''
+
+This machine is deprecated because we have enough AST2500 based OpenPOWER
+machines. It can be easily replaced by the ``witherspoon-bmc`` or the
+``romulus-bmc`` machines.
+
+Backend options
+---------------
+
+Using non-persistent backing file with pmem=on (since 6.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+This option is used when ``memory-backend-file`` is consumed by emulated NVDIMM
+device. However enabling ``memory-backend-file.pmem`` option, when backing file
+is (a) not DAX capable or (b) not on a filesystem that support direct mapping
+of persistent memory, is not safe and may lead to data loss or corruption in case
+of host crash.
+Options are:
+
+    - modify VM configuration to set ``pmem=off`` to continue using fake NVDIMM
+      (without persistence guaranties) with backing file on non DAX storage
+    - move backing file to NVDIMM storage and keep ``pmem=on``
+      (to have NVDIMM with persistence guaranties).
+
+Device options
+--------------
+
+Emulated device options
+'''''''''''''''''''''''
+
+``-device virtio-blk,scsi=on|off`` (since 5.0)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature.  VIRTIO 1.0
+and later do not support it because the virtio-scsi device was introduced for
+full SCSI support.  Use virtio-scsi instead when SCSI passthrough is required.
+
+Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an
+alias.
+
+``-device sga`` (since 6.2)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``sga`` device loads an option ROM for x86 targets which enables
+SeaBIOS to send messages to the serial console. SeaBIOS 1.11.0 onwards
+contains native support for this feature and thus use of the option
+ROM approach is obsolete. The native SeaBIOS support can be activated
+by using ``-machine graphics=off``.
+
+
+Block device options
+''''''''''''''''''''
+
+``"backing": ""`` (since 2.12)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to prevent QEMU from automatically opening an image's backing
+chain, use ``"backing": null`` instead.
+
+``rbd`` keyvalue pair encoded filenames: ``""`` (since 3.1)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Options for ``rbd`` should be specified according to its runtime options,
+like other block drivers.  Legacy parsing of keyvalue pair encoded
+filenames is useful to open images with the old format for backing files;
+These image files should be updated to use the current format.
+
+Example of legacy encoding::
+
+  json:{"file.driver":"rbd", "file.filename":"rbd:rbd/name"}
+
+The above, converted to the current supported format::
+
+  json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"}
+
+linux-user mode CPUs
+--------------------
+
+``ppc64abi32`` CPUs (since 5.2)
+'''''''''''''''''''''''''''''''
+
+The ``ppc64abi32`` architecture has a number of issues which regularly
+trip up our CI testing and is suspected to be quite broken. For that
+reason the maintainers strongly suspect no one actually uses it.
+
+MIPS ``I7200`` CPU (since 5.2)
+''''''''''''''''''''''''''''''
+
+The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated
+(the ISA has never been upstreamed to a compiler toolchain). Therefore
+this CPU is also deprecated.
+
+Backwards compatibility
+-----------------------
+
+Runnability guarantee of CPU models (since 4.1)
+'''''''''''''''''''''''''''''''''''''''''''''''
+
+Previous versions of QEMU never changed existing CPU models in
+ways that introduced additional host software or hardware
+requirements to the VM.  This allowed management software to
+safely change the machine type of an existing VM without
+introducing new requirements ("runnability guarantee").  This
+prevented CPU models from being updated to include CPU
+vulnerability mitigations, leaving guests vulnerable in the
+default configuration.
+
+The CPU model runnability guarantee won't apply anymore to
+existing CPU models.  Management software that needs runnability
+guarantees must resolve the CPU model aliases using the
+``alias-of`` field returned by the ``query-cpu-definitions`` QMP
+command.
+
+While those guarantees are kept, the return value of
+``query-cpu-definitions`` will have existing CPU model aliases
+point to a version that doesn't break runnability guarantees
+(specifically, version 1 of those CPU models).  In future QEMU
+versions, aliases will point to newer CPU model versions
+depending on the machine type, so management software must
+resolve CPU model aliases before starting a virtual machine.
+
+Guest Emulator ISAs
+-------------------
+
+nanoMIPS ISA
+''''''''''''
+
+The ``nanoMIPS`` ISA has never been upstreamed to any compiler toolchain.
+As it is hard to generate binaries for it, declare it deprecated.
diff --git a/docs/about/index.rst b/docs/about/index.rst
new file mode 100644
index 00000000000..5bea653c07c
--- /dev/null
+++ b/docs/about/index.rst
@@ -0,0 +1,28 @@
+----------
+About QEMU
+----------
+
+QEMU is a generic and open source machine emulator and virtualizer.
+
+QEMU can be used in several different ways. The most common is for
+"system emulation", where it provides a virtual model of an
+entire machine (CPU, memory and emulated devices) to run a guest OS.
+In this mode the CPU may be fully emulated, or it may work with
+a hypervisor such as KVM, Xen, Hax or Hypervisor.Framework to
+allow the guest to run directly on the host CPU.
+
+The second supported way to use QEMU is "user mode emulation",
+where QEMU can launch processes compiled for one CPU on another CPU.
+In this mode the CPU is always emulated.
+
+QEMU also provides a number of standalone commandline utilities,
+such as the ``qemu-img`` disk image utility that allows you to create,
+convert and modify disk images.
+
+.. toctree::
+   :maxdepth: 2
+
+   build-platforms
+   deprecated
+   removed-features
+   license
diff --git a/docs/system/license.rst b/docs/about/license.rst
similarity index 100%
rename from docs/system/license.rst
rename to docs/about/license.rst
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
new file mode 100644
index 00000000000..d42c3341dee
--- /dev/null
+++ b/docs/about/removed-features.rst
@@ -0,0 +1,705 @@
+
+Removed features
+================
+
+What follows is a record of recently removed, formerly deprecated
+features that serves as a record for users who have encountered
+trouble after a recent upgrade.
+
+System emulator command line arguments
+--------------------------------------
+
+``-hdachs`` (removed in 2.12)
+'''''''''''''''''''''''''''''
+
+The geometry defined by ``-hdachs c,h,s,t`` should now be specified via
+``-device ide-hd,drive=dr,cyls=c,heads=h,secs=s,bios-chs-trans=t``
+(together with ``-drive if=none,id=dr,...``).
+
+``-net channel`` (removed in 2.12)
+''''''''''''''''''''''''''''''''''
+
+This option has been replaced by ``-net user,guestfwd=...``.
+
+``-net dump`` (removed in 2.12)
+'''''''''''''''''''''''''''''''
+
+``-net dump[,vlan=n][,file=filename][,len=maxlen]`` has been replaced by
+``-object filter-dump,id=id,netdev=dev[,file=filename][,maxlen=maxlen]``.
+Note that the new syntax works with netdev IDs instead of the old "vlan" hubs.
+
+``-no-kvm-pit`` (removed in 2.12)
+'''''''''''''''''''''''''''''''''
+
+This was just a dummy option that has been ignored, since the in-kernel PIT
+cannot be disabled separately from the irqchip anymore. A similar effect
+(which also disables the KVM IOAPIC) can be obtained with
+``-M kernel_irqchip=split``.
+
+``-tdf`` (removed in 2.12)
+''''''''''''''''''''''''''
+
+There is no replacement, the ``-tdf`` option has just been ignored since the
+behaviour that could be changed by this option in qemu-kvm is now the default
+when using the KVM PIT. It still can be requested explicitly using
+``-global kvm-pit.lost_tick_policy=delay``.
+
+``-drive secs=s``, ``-drive heads=h`` & ``-drive cyls=c`` (removed in 3.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The drive geometry should now be specified via
+``-device ...,drive=dr,cyls=c,heads=h,secs=s`` (together with
+``-drive if=none,id=dr,...``).
+
+``-drive serial=``, ``-drive trans=`` & ``-drive addr=`` (removed in 3.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-device ...,drive=dr,serial=r,bios-chs-trans=t,addr=a`` instead
+(together with ``-drive if=none,id=dr,...``).
+
+``-net ...,vlan=x`` (removed in 3.0)
+''''''''''''''''''''''''''''''''''''
+
+The term "vlan" was very confusing for most users in this context (it's about
+specifying a hub ID, not about IEEE 802.1Q or something similar), so this
+has been removed. To connect one NIC frontend with a network backend, either
+use ``-nic ...`` (e.g. for on-board NICs) or use ``-netdev ...,id=n`` together
+with ``-device ...,netdev=n`` (for full control over pluggable NICs). To
+connect multiple NICs or network backends via a hub device (which is what
+vlan did), use ``-nic hubport,hubid=x,...`` or
+``-netdev hubport,id=n,hubid=x,...`` (with ``-device ...,netdev=n``) instead.
+
+``-no-kvm-irqchip`` (removed in 3.0)
+''''''''''''''''''''''''''''''''''''
+
+Use ``-machine kernel_irqchip=off`` instead.
+
+``-no-kvm-pit-reinjection`` (removed in 3.0)
+''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-global kvm-pit.lost_tick_policy=discard`` instead.
+
+``-balloon`` (removed in 3.1)
+'''''''''''''''''''''''''''''
+
+The ``-balloon virtio`` option has been replaced by ``-device virtio-balloon``.
+The ``-balloon none`` option was a no-op and has no replacement.
+
+``-bootp`` (removed in 3.1)
+'''''''''''''''''''''''''''
+
+The ``-bootp /some/file`` argument is replaced by either
+``-netdev user,id=x,bootp=/some/file`` (for pluggable NICs, accompanied with
+``-device ...,netdev=x``), or ``-nic user,bootp=/some/file`` (for on-board NICs).
+The new syntax allows different settings to be provided per NIC.
+
+``-redir`` (removed in 3.1)
+'''''''''''''''''''''''''''
+
+The ``-redir [tcp|udp]:hostport:[guestaddr]:guestport`` option is replaced
+by either ``-netdev
+user,id=x,hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport``
+(for pluggable NICs, accompanied with ``-device ...,netdev=x``) or by the option
+``-nic user,hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport``
+(for on-board NICs). The new syntax allows different settings to be provided
+per NIC.
+
+``-smb`` (removed in 3.1)
+'''''''''''''''''''''''''
+
+The ``-smb /some/dir`` argument is replaced by either
+``-netdev user,id=x,smb=/some/dir`` (for pluggable NICs, accompanied with
+``-device ...,netdev=x``), or ``-nic user,smb=/some/dir`` (for on-board NICs).
+The new syntax allows different settings to be provided per NIC.
+
+``-tftp`` (removed in 3.1)
+''''''''''''''''''''''''''
+
+The ``-tftp /some/dir`` argument is replaced by either
+``-netdev user,id=x,tftp=/some/dir`` (for pluggable NICs, accompanied with
+``-device ...,netdev=x``), or ``-nic user,tftp=/some/dir`` (for embedded NICs).
+The new syntax allows different settings to be provided per NIC.
+
+``-localtime`` (removed in 3.1)
+'''''''''''''''''''''''''''''''
+
+Replaced by ``-rtc base=localtime``.
+
+``-nodefconfig`` (removed in 3.1)
+'''''''''''''''''''''''''''''''''
+
+Use ``-no-user-config`` instead.
+
+``-rtc-td-hack`` (removed in 3.1)
+'''''''''''''''''''''''''''''''''
+
+Use ``-rtc driftfix=slew`` instead.
+
+``-startdate`` (removed in 3.1)
+'''''''''''''''''''''''''''''''
+
+Replaced by ``-rtc base=date``.
+
+``-vnc ...,tls=...``, ``-vnc ...,x509=...`` & ``-vnc ...,x509verify=...`` (removed in 3.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The "tls-creds" option should be used instead to point to a "tls-creds-x509"
+object created using "-object".
+
+``-mem-path`` fallback to RAM (removed in 5.0)
+''''''''''''''''''''''''''''''''''''''''''''''
+
+If guest RAM allocation from file pointed by ``mem-path`` failed,
+QEMU was falling back to allocating from RAM, which might have resulted
+in unpredictable behavior since the backing file specified by the user
+as ignored. Currently, users are responsible for making sure the backing storage
+specified with ``-mem-path`` can actually provide the guest RAM configured with
+``-m`` and QEMU fails to start up if RAM allocation is unsuccessful.
+
+``-net ...,name=...`` (removed in 5.1)
+''''''''''''''''''''''''''''''''''''''
+
+The ``name`` parameter of the ``-net`` option was a synonym
+for the ``id`` parameter, which should now be used instead.
+
+``-numa node,mem=...`` (removed in 5.1)
+'''''''''''''''''''''''''''''''''''''''
+
+The parameter ``mem`` of ``-numa node`` was used to assign a part of guest RAM
+to a NUMA node. But when using it, it's impossible to manage a specified RAM
+chunk on the host side (like bind it to a host node, setting bind policy, ...),
+so the guest ends up with the fake NUMA configuration with suboptiomal
+performance.
+However since 2014 there is an alternative way to assign RAM to a NUMA node
+using parameter ``memdev``, which does the same as ``mem`` and adds
+means to actually manage node RAM on the host side. Use parameter ``memdev``
+with *memory-backend-ram* backend as replacement for parameter ``mem``
+to achieve the same fake NUMA effect or a properly configured
+*memory-backend-file* backend to actually benefit from NUMA configuration.
+New machine versions (since 5.1) will not accept the option but it will still
+work with old machine types. User can check the QAPI schema to see if the legacy
+option is supported by looking at MachineInfo::numa-mem-supported property.
+
+``-numa`` node (without memory specified) (removed in 5.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Splitting RAM by default between NUMA nodes had the same issues as ``mem``
+parameter with the difference that the role of the user plays QEMU using
+implicit generic or board specific splitting rule.
+Use ``memdev`` with *memory-backend-ram* backend or ``mem`` (if
+it's supported by used machine type) to define mapping explicitly instead.
+Users of existing VMs, wishing to preserve the same RAM distribution, should
+configure it explicitly using ``-numa node,memdev`` options. Current RAM
+distribution can be retrieved using HMP command ``info numa`` and if separate
+memory devices (pc|nv-dimm) are present use ``info memory-device`` and subtract
+device memory from output of ``info numa``.
+
+``-smp`` (invalid topologies) (removed in 5.2)
+''''''''''''''''''''''''''''''''''''''''''''''
+
+CPU topology properties should describe whole machine topology including
+possible CPUs.
+
+However, historically it was possible to start QEMU with an incorrect topology
+where *n* <= *sockets* * *cores* * *threads* < *maxcpus*,
+which could lead to an incorrect topology enumeration by the guest.
+Support for invalid topologies is removed, the user must ensure
+topologies described with -smp include all possible cpus, i.e.
+*sockets* * *cores* * *threads* = *maxcpus*.
+
+``-machine enforce-config-section=on|off`` (removed in 5.2)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``enforce-config-section`` property was replaced by the
+``-global migration.send-configuration={on|off}`` option.
+
+``-no-kvm`` (removed in 5.2)
+''''''''''''''''''''''''''''
+
+The ``-no-kvm`` argument was a synonym for setting ``-machine accel=tcg``.
+
+``-realtime`` (removed in 6.0)
+''''''''''''''''''''''''''''''
+
+The ``-realtime mlock=on|off`` argument has been replaced by the
+``-overcommit mem-lock=on|off`` argument.
+
+``-show-cursor`` option (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''
+
+Use ``-display sdl,show-cursor=on``, ``-display gtk,show-cursor=on``
+or ``-display default,show-cursor=on`` instead.
+
+``-tb-size`` option (removed in 6.0)
+''''''''''''''''''''''''''''''''''''
+
+QEMU 5.0 introduced an alternative syntax to specify the size of the translation
+block cache, ``-accel tcg,tb-size=``.
+
+``-usbdevice audio`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''
+
+This option lacked the possibility to specify an audio backend device.
+Use ``-device usb-audio`` now instead (and specify a corresponding USB
+host controller or ``-usb`` if necessary).
+
+``-vnc acl`` (removed in 6.0)
+'''''''''''''''''''''''''''''
+
+The ``acl`` option to the ``-vnc`` argument has been replaced
+by the ``tls-authz`` and ``sasl-authz`` options.
+
+``-mon ...,control=readline,pretty=on|off`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``pretty=on|off`` switch has no effect for HMP monitors and
+its use is rejected.
+
+``-drive file=json:{...{'driver':'file'}}`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The 'file' driver for drives is no longer appropriate for character or host
+devices and will only accept regular files (S_IFREG). The correct driver
+for these file types is 'host_cdrom' or 'host_device' as appropriate.
+
+Floppy controllers' drive properties (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``-device floppy,...`` instead.  When configuring onboard floppy
+controllers
+::
+
+    -global isa-fdc.driveA=...
+    -global sysbus-fdc.driveA=...
+    -global SUNW,fdtwo.drive=...
+
+become
+::
+
+    -device floppy,unit=0,drive=...
+
+and
+::
+
+    -global isa-fdc.driveB=...
+    -global sysbus-fdc.driveB=...
+
+become
+::
+
+    -device floppy,unit=1,drive=...
+
+When plugging in a floppy controller
+::
+
+    -device isa-fdc,...,driveA=...
+
+becomes
+::
+
+    -device isa-fdc,...
+    -device floppy,unit=0,drive=...
+
+and
+::
+
+    -device isa-fdc,...,driveB=...
+
+becomes
+::
+
+    -device isa-fdc,...
+    -device floppy,unit=1,drive=...
+
+``-drive`` with bogus interface type (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Drives with interface types other than ``if=none`` are for onboard
+devices.  Drives the board doesn't pick up can no longer be used with
+-device.  Use ``if=none`` instead.
+
+``-usbdevice ccid`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''
+
+This option was undocumented and not used in the field.
+Use ``-device usb-ccid`` instead.
+
+RISC-V firmware not booted by default (removed in 5.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default``
+for the RISC-V ``virt`` machine and ``sifive_u`` machine.
+
+QEMU Machine Protocol (QMP) commands
+------------------------------------
+
+``block-dirty-bitmap-add`` "autoload" parameter (removed in 4.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The "autoload" parameter has been ignored since 2.12.0. All bitmaps
+are automatically loaded from qcow2 images.
+
+``cpu-add`` (removed in 5.2)
+''''''''''''''''''''''''''''
+
+Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``.  See
+documentation of ``query-hotpluggable-cpus`` for additional details.
+
+``change`` (removed in 6.0)
+'''''''''''''''''''''''''''
+
+Use ``blockdev-change-medium`` or ``change-vnc-password`` instead.
+
+``query-events`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''
+
+The ``query-events`` command has been superseded by the more powerful
+and accurate ``query-qmp-schema`` command.
+
+``migrate_set_cache_size`` and ``query-migrate-cache-size`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``migrate_set_parameter`` and ``info migrate_parameters`` instead.
+
+``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``migrate_set_parameter`` instead.
+
+``query-cpus`` (removed in 6.0)
+'''''''''''''''''''''''''''''''
+
+The ``query-cpus`` command is replaced by the ``query-cpus-fast`` command.
+
+``query-cpus-fast`` ``arch`` output member (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``arch`` output member of the ``query-cpus-fast`` command is
+replaced by the ``target`` output member.
+
+chardev client socket with ``wait`` option (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Character devices creating sockets in client mode should not specify
+the 'wait' field, which is only applicable to sockets in server mode
+
+``query-named-block-nodes`` result ``encryption_key_missing`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Removed with no replacement.
+
+``query-block`` result ``inserted.encryption_key_missing`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Removed with no replacement.
+
+``query-named-block-nodes`` and ``query-block`` result dirty-bitmaps[i].status (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``status`` field of the ``BlockDirtyInfo`` structure, returned by
+these commands is removed. Two new boolean fields, ``recording`` and
+``busy`` effectively replace it.
+
+``query-block`` result field ``dirty-bitmaps`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``dirty-bitmaps`` field of the ``BlockInfo`` structure, returned by
+the query-block command is itself now removed. The ``dirty-bitmaps``
+field of the ``BlockDeviceInfo`` struct should be used instead, which is the
+type of the ``inserted`` field in query-block replies, as well as the
+type of array items in query-named-block-nodes.
+
+``object-add`` option ``props`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+Specify the properties for the object as top-level arguments instead.
+
+Human Monitor Protocol (HMP) commands
+-------------------------------------
+
+``usb_add`` and ``usb_remove`` (removed in 2.12)
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+Replaced by ``device_add`` and ``device_del`` (use ``device_add help`` for a
+list of available devices).
+
+``host_net_add`` and ``host_net_remove`` (removed in 2.12)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Replaced by ``netdev_add`` and ``netdev_del``.
+
+The ``hub_id`` parameter of ``hostfwd_add`` / ``hostfwd_remove`` (removed in 5.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``[hub_id name]`` parameter tuple of the 'hostfwd_add' and
+'hostfwd_remove' HMP commands has been replaced by ``netdev_id``.
+
+``cpu-add`` (removed in 5.2)
+''''''''''''''''''''''''''''
+
+Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``.  See
+documentation of ``query-hotpluggable-cpus`` for additional details.
+
+``change vnc TARGET`` (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''
+
+No replacement.  The ``change vnc password`` and ``change DEVICE MEDIUM``
+commands are not affected.
+
+``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, ``acl_remove`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, and
+``acl_remove`` commands were removed with no replacement. Authorization
+for VNC should be performed using the pluggable QAuthZ objects.
+
+``migrate-set-cache-size`` and ``info migrate-cache-size`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``migrate-set-parameters`` and ``info migrate-parameters`` instead.
+
+``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Use ``migrate-set-parameters`` instead.
+
+``info cpustats`` (removed in 6.1)
+''''''''''''''''''''''''''''''''''
+
+This command didn't produce any output already. Removed with no replacement.
+
+Guest Emulator ISAs
+-------------------
+
+RISC-V ISA privilege specification version 1.09.1 (removed in 5.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The RISC-V ISA privilege specification version 1.09.1 has been removed.
+QEMU supports both the newer version 1.10.0 and the ratified version 1.11.0, these
+should be used instead of the 1.09.1 version.
+
+System emulator CPUS
+--------------------
+
+KVM guest support on 32-bit Arm hosts (removed in 5.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The Linux kernel has dropped support for allowing 32-bit Arm systems
+to host KVM guests as of the 5.7 kernel. Accordingly, QEMU is deprecating
+its support for this configuration and will remove it in a future version.
+Running 32-bit guests on a 64-bit Arm host remains supported.
+
+RISC-V ISA Specific CPUs (removed in 5.1)
+'''''''''''''''''''''''''''''''''''''''''
+
+The RISC-V cpus with the ISA version in the CPU name have been removed. The
+four CPUs are: ``rv32gcsu-v1.9.1``, ``rv32gcsu-v1.10.0``, ``rv64gcsu-v1.9.1`` and
+``rv64gcsu-v1.10.0``. Instead the version can be specified via the CPU ``priv_spec``
+option when using the ``rv32`` or ``rv64`` CPUs.
+
+RISC-V no MMU CPUs (removed in 5.1)
+'''''''''''''''''''''''''''''''''''
+
+The RISC-V no MMU cpus have been removed. The two CPUs: ``rv32imacu-nommu`` and
+``rv64imacu-nommu`` can no longer be used. Instead the MMU status can be specified
+via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs.
+
+``compat`` property of server class POWER CPUs (removed in 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``max-cpu-compat`` property of the ``pseries`` machine type should be used
+instead.
+
+``moxie`` CPU (removed in 6.1)
+''''''''''''''''''''''''''''''
+
+Nobody was using this CPU emulation in QEMU, and there were no test images
+available to make sure that the code is still working, so it has been removed
+without replacement.
+
+``lm32`` CPUs (removed in 6.1)
+''''''''''''''''''''''''''''''
+
+The only public user of this architecture was the milkymist project,
+which has been dead for years; there was never an upstream Linux
+port.  Removed without replacement.
+
+``unicore32`` CPUs (removed in 6.1)
+'''''''''''''''''''''''''''''''''''
+
+Support for this CPU was removed from the upstream Linux kernel, and
+there is no available upstream toolchain to build binaries for it.
+Removed without replacement.
+
+System emulator machines
+------------------------
+
+``s390-virtio`` (removed in 2.6)
+''''''''''''''''''''''''''''''''
+
+Use the ``s390-ccw-virtio`` machine instead.
+
+The m68k ``dummy`` machine (removed in 2.9)
+'''''''''''''''''''''''''''''''''''''''''''
+
+Use the ``none`` machine with the ``loader`` device instead.
+
+``xlnx-ep108`` (removed in 3.0)
+'''''''''''''''''''''''''''''''
+
+The EP108 was an early access development board that is no longer used.
+Use the ``xlnx-zcu102`` machine instead.
+
+``spike_v1.9.1`` and ``spike_v1.10`` (removed in 5.1)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The version specific Spike machines have been removed in favour of the
+generic ``spike`` machine. If you need to specify an older version of the RISC-V
+spec you can use the ``-cpu rv64gcsu,priv_spec=v1.10.0`` command line argument.
+
+mips ``r4k`` platform (removed in 5.2)
+''''''''''''''''''''''''''''''''''''''
+
+This machine type was very old and unmaintained. Users should use the ``malta``
+machine type instead.
+
+mips ``fulong2e`` machine alias (removed in 6.0)
+''''''''''''''''''''''''''''''''''''''''''''''''
+
+This machine has been renamed ``fuloong2e``.
+
+``pc-0.10`` up to ``pc-1.3`` (removed in 4.0 up to 6.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+These machine types were very old and likely could not be used for live
+migration from old QEMU versions anymore. Use a newer machine type instead.
+
+Raspberry Pi ``raspi2`` and ``raspi3`` machines (removed in 6.2)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The Raspberry Pi machines come in various models (A, A+, B, B+). To be able
+to distinguish which model QEMU is implementing, the ``raspi2`` and ``raspi3``
+machines have been renamed ``raspi2b`` and ``raspi3b``.
+
+
+linux-user mode CPUs
+--------------------
+
+``tilegx`` CPUs (removed in 6.0)
+''''''''''''''''''''''''''''''''
+
+The ``tilegx`` guest CPU support has been removed without replacement. It was
+only implemented in linux-user mode, but support for this CPU was removed from
+the upstream Linux kernel in 2018, and it has also been dropped from glibc, so
+there is no new Linux development taking place with this architecture. For
+running the old binaries, you can use older versions of QEMU.
+
+System emulator devices
+-----------------------
+
+``spapr-pci-vfio-host-bridge`` (removed in 2.12)
+'''''''''''''''''''''''''''''''''''''''''''''''''
+
+The ``spapr-pci-vfio-host-bridge`` device type has been replaced by the
+``spapr-pci-host-bridge`` device type.
+
+``ivshmem`` (removed in 4.0)
+''''''''''''''''''''''''''''
+
+Replaced by either the ``ivshmem-plain`` or ``ivshmem-doorbell``.
+
+``ide-drive`` (removed in 6.0)
+''''''''''''''''''''''''''''''
+
+The 'ide-drive' device has been removed. Users should use 'ide-hd' or
+'ide-cd' as appropriate to get an IDE hard disk or CD-ROM as needed.
+
+``scsi-disk`` (removed in 6.0)
+''''''''''''''''''''''''''''''
+
+The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or
+'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed.
+
+Related binaries
+----------------
+
+``qemu-nbd --partition`` (removed in 5.0)
+'''''''''''''''''''''''''''''''''''''''''
+
+The ``qemu-nbd --partition $digit`` code (also spelled ``-P``)
+could only handle MBR partitions, and never correctly handled logical
+partitions beyond partition 5.  Exporting a partition can still be
+done by utilizing the ``--image-opts`` option with a raw blockdev
+using the ``offset`` and ``size`` parameters layered on top of
+any other existing blockdev. For example, if partition 1 is 100MiB
+long starting at 1MiB, the old command::
+
+  qemu-nbd -t -P 1 -f qcow2 file.qcow2
+
+can be rewritten as::
+
+  qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.file.driver=file,file.file.filename=file.qcow2
+
+``qemu-img convert -n -o`` (removed in 5.1)
+'''''''''''''''''''''''''''''''''''''''''''
+
+All options specified in ``-o`` are image creation options, so
+they are now rejected when used with ``-n`` to skip image creation.
+
+
+``qemu-img create -b bad file $size`` (removed in 5.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+When creating an image with a backing file that could not be opened,
+``qemu-img create`` used to issue a warning about the failure but
+proceed with the image creation if an explicit size was provided.
+However, as the ``-u`` option exists for this purpose, it is safer to
+enforce that any failure to open the backing image (including if the
+backing file is missing or an incorrect format was specified) is an
+error when ``-u`` is not used.
+
+``qemu-img amend`` to adjust backing file (removed in 6.1)
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The use of ``qemu-img amend`` to modify the name or format of a qcow2
+backing image was never fully documented or tested, and interferes
+with other amend operations that need access to the original backing
+image (such as deciding whether a v3 zero cluster may be left
+unallocated when converting to a v2 image).  Any changes to the
+backing chain should be performed with ``qemu-img rebase -u`` either
+before or after the remaining changes being performed by amend, as
+appropriate.
+
+``qemu-img`` backing file without format (removed in 6.1)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+The use of ``qemu-img create``, ``qemu-img rebase``, or ``qemu-img
+convert`` to create or modify an image that depends on a backing file
+now requires that an explicit backing format be provided.  This is
+for safety: if QEMU probes a different format than what you thought,
+the data presented to the guest will be corrupt; similarly, presenting
+a raw image to a guest allows a potential security exploit if a future
+probe sees a non-raw image based on guest writes.
+
+To avoid creating unsafe backing chains, you must pass ``-o
+backing_fmt=`` (or the shorthand ``-F`` during create) to specify the
+intended backing format.  You may use ``qemu-img rebase -u`` to
+retroactively add a backing format to an existing image.  However, be
+aware that there are already potential security risks to blindly using
+``qemu-img info`` to probe the format of an untrusted backing image,
+when deciding what format to add into an existing image.
+
+Block devices
+-------------
+
+VXHS backend (removed in 5.1)
+'''''''''''''''''''''''''''''
+
+The VXHS code did not compile since v2.12.0. It was removed in 5.1.
+
+``sheepdog`` driver (removed in 6.0)
+''''''''''''''''''''''''''''''''''''
+
+The corresponding upstream server project is no longer maintained.
+Users are recommended to switch to an alternative distributed block
+device driver such as RBD.
diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt
index 145896aec78..ffca382b5f5 100644
--- a/docs/amd-memory-encryption.txt
+++ b/docs/amd-memory-encryption.txt
@@ -1,38 +1,48 @@
 Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
 
 SEV is an extension to the AMD-V architecture which supports running encrypted
-virtual machine (VMs) under the control of KVM. Encrypted VMs have their pages
+virtual machines (VMs) under the control of KVM. Encrypted VMs have their pages
 (code and data) secured such that only the guest itself has access to the
 unencrypted version. Each encrypted VM is associated with a unique encryption
-key; if its data is accessed to a different entity using a different key the
+key; if its data is accessed by a different entity using a different key the
 encrypted guests data will be incorrectly decrypted, leading to unintelligible
 data.
 
-The key management of this feature is handled by separate processor known as
-AMD secure processor (AMD-SP) which is present in AMD SOCs. Firmware running
-inside the AMD-SP provide commands to support common VM lifecycle. This
+Key management for this feature is handled by a separate processor known as the
+AMD secure processor (AMD-SP), which is present in AMD SOCs. Firmware running
+inside the AMD-SP provides commands to support a common VM lifecycle. This
 includes commands for launching, snapshotting, migrating and debugging the
-encrypted guest. Those SEV command can be issued via KVM_MEMORY_ENCRYPT_OP
+encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP
 ioctls.
 
+Secure Encrypted Virtualization - Encrypted State (SEV-ES) builds on the SEV
+support to additionally protect the guest register state. In order to allow a
+hypervisor to perform functions on behalf of a guest, there is architectural
+support for notifying a guest's operating system when certain types of VMEXITs
+are about to occur. This allows the guest to selectively share information with
+the hypervisor to satisfy the requested function.
+
 Launching
 ---------
-Boot images (such as bios) must be encrypted before guest can be booted.
-MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images :LAUNCH_START,
+Boot images (such as bios) must be encrypted before a guest can be booted. The
+MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images: LAUNCH_START,
 LAUNCH_UPDATE_DATA, LAUNCH_MEASURE and LAUNCH_FINISH. These four commands
 together generate a fresh memory encryption key for the VM, encrypt the boot
-images and provide a measurement than can be used as an attestation of the
+images and provide a measurement than can be used as an attestation of a
 successful launch.
 
+For a SEV-ES guest, the LAUNCH_UPDATE_VMSA command is also used to encrypt the
+guest register state, or VM save area (VMSA), for all of the guest vCPUs.
+
 LAUNCH_START is called first to create a cryptographic launch context within
-the firmware. To create this context, guest owner must provides guest policy,
+the firmware. To create this context, guest owner must provide a guest policy,
 its public Diffie-Hellman key (PDH) and session parameters. These inputs
-should be treated as binary blob and must be passed as-is to the SEV firmware.
+should be treated as a binary blob and must be passed as-is to the SEV firmware.
 
-The guest policy is passed as plaintext and hypervisor may able to read it
+The guest policy is passed as plaintext. A hypervisor may choose to read it,
 but should not modify it (any modification of the policy bits will result
 in bad measurement). The guest policy is a 4-byte data structure containing
-several flags that restricts what can be done on running SEV guest.
+several flags that restricts what can be done on a running SEV guest.
 See KM Spec section 3 and 6.2 for more details.
 
 The guest policy can be provided via the 'policy' property (see below)
@@ -40,31 +50,42 @@ The guest policy can be provided via the 'policy' property (see below)
 # ${QEMU} \
    sev-guest,id=sev0,policy=0x1...\
 
-Guest owners provided DH certificate and session parameters will be used to
+Setting the "SEV-ES required" policy bit (bit 2) will launch the guest as a
+SEV-ES guest (see below)
+
+# ${QEMU} \
+   sev-guest,id=sev0,policy=0x5...\
+
+The guest owner provided DH certificate and session parameters will be used to
 establish a cryptographic session with the guest owner to negotiate keys used
 for the attestation.
 
-The DH certificate and session blob can be provided via 'dh-cert-file' and
-'session-file' property (see below
+The DH certificate and session blob can be provided via the 'dh-cert-file' and
+'session-file' properties (see below)
 
 # ${QEMU} \
      sev-guest,id=sev0,dh-cert-file=<file1>,session-file=<file2>
 
 LAUNCH_UPDATE_DATA encrypts the memory region using the cryptographic context
-created via LAUNCH_START command. If required, this command can be called
+created via the LAUNCH_START command. If required, this command can be called
 multiple times to encrypt different memory regions. The command also calculates
 the measurement of the memory contents as it encrypts.
 
-LAUNCH_MEASURE command can be used to retrieve the measurement of encrypted
-memory. This measurement is a signature of the memory contents that can be
-sent to the guest owner as an attestation that the memory was encrypted
+LAUNCH_UPDATE_VMSA encrypts all the vCPU VMSAs for a SEV-ES guest using the
+cryptographic context created via the LAUNCH_START command. The command also
+calculates the measurement of the VMSAs as it encrypts them.
+
+LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory and,
+for a SEV-ES guest, encrypted VMSAs. This measurement is a signature of the
+memory contents and, for a SEV-ES guest, the VMSA contents, that can be sent
+to the guest owner as an attestation that the memory and VMSAs were encrypted
 correctly by the firmware. The guest owner may wait to provide the guest
 confidential information until it can verify the attestation measurement.
 Since the guest owner knows the initial contents of the guest at boot, the
 attestation measurement can be verified by comparing it to what the guest owner
 expects.
 
-LAUNCH_FINISH command finalizes the guest launch and destroy's the cryptographic
+LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic
 context.
 
 See SEV KM API Spec [1] 'Launching a guest' usage flow (Appendix A) for the
@@ -76,12 +97,28 @@ To launch a SEV guest
     -machine ...,confidential-guest-support=sev0 \
     -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1
 
+To launch a SEV-ES guest
+
+# ${QEMU} \
+    -machine ...,confidential-guest-support=sev0 \
+    -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1,policy=0x5
+
+An SEV-ES guest has some restrictions as compared to a SEV guest. Because the
+guest register state is encrypted and cannot be updated by the VMM/hypervisor,
+a SEV-ES guest:
+ - Does not support SMM - SMM support requires updating the guest register
+   state.
+ - Does not support reboot - a system reset requires updating the guest register
+   state.
+ - Requires in-kernel irqchip - the burden is placed on the hypervisor to
+   manage booting APs.
+
 Debugging
 -----------
-Since memory contents of SEV guest is encrypted hence hypervisor access to the
-guest memory will get a cipher text. If guest policy allows debugging, then
-hypervisor can use DEBUG_DECRYPT and DEBUG_ENCRYPT commands access the guest
-memory region for debug purposes.  This is not supported in QEMU yet.
+Since the memory contents of a SEV guest are encrypted, hypervisor access to
+the guest memory will return cipher text. If the guest policy allows debugging,
+then a hypervisor can use the DEBUG_DECRYPT and DEBUG_ENCRYPT commands to access
+the guest memory region for debug purposes.  This is not supported in QEMU yet.
 
 Snapshot/Restore
 -----------------
@@ -102,8 +139,10 @@ Secure Encrypted Virtualization Key Management:
 
 KVM Forum slides:
 http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
+https://www.linux-kvm.org/images/9/94/Extending-Secure-Encrypted-Virtualization-with-SEV-ES-Thomas-Lendacky-AMD.pdf
 
 AMD64 Architecture Programmer's Manual:
    http://support.amd.com/TechDocs/24593.pdf
    SME is section 7.10
    SEV is section 15.34
+   SEV-ES is section 15.35
diff --git a/docs/barrier.txt b/docs/barrier.txt
deleted file mode 100644
index b21d15015d9..00000000000
--- a/docs/barrier.txt
+++ /dev/null
@@ -1,370 +0,0 @@
-                                QEMU Barrier Client
-
-
-* About
-
-    Barrier is a KVM (Keyboard-Video-Mouse) software forked from Symless's
-    synergy 1.9 codebase.
-
-    See https://github.com/debauchee/barrier
-
-* QEMU usage
-
-    Generally, mouse and keyboard are grabbed through the QEMU video
-    interface emulation.
-
-    But when we want to use a video graphic adapter via a PCI passthrough
-    there is no way to provide the keyboard and mouse inputs to the VM
-    except by plugging a second set of mouse and keyboard to the host
-    or by installing a KVM software in the guest OS.
-
-    The QEMU Barrier client avoids this by implementing directly the Barrier
-    protocol into QEMU.
-
-    This protocol is enabled by adding an input-barrier object to QEMU.
-
-    Syntax: input-barrier,id=<object-id>,name=<guest display name>
-            [,server=<barrier server address>][,port=<barrier server port>]
-            [,x-origin=<x-origin>][,y-origin=<y-origin>]
-            [,width=<width>][,height=<height>]
-
-    The object can be added on the QEMU command line, for instance with:
-
-        ... -object input-barrier,id=barrier0,name=VM-1 ...
-
-    where VM-1 is the name the display configured int the Barrier server
-    on the host providing the mouse and the keyboard events.
-
-    by default <barrier server address> is "localhost", port is 24800,
-    <x-origin> and <y-origin> are set to 0, <width> and <height> to
-    1920 and 1080.
-
-    If Barrier server is stopped QEMU needs to be reconnected manually,
-    by removing and re-adding the input-barrier object, for instance
-    with the help of the HMP monitor:
-
-        (qemu) object_del barrier0
-        (qemu) object_add input-barrier,id=barrier0,name=VM-1
-
-* Message format
-
-    Message format between the server and client is in two parts:
-
-        1- the payload length is a 32bit integer in network endianness,
-        2- the payload
-
-    The payload starts with a 4byte string (without NUL) which is the
-    command. The first command between the server and the client
-    is the only command not encoded on 4 bytes ("Barrier").
-    The remaining part of the payload is decoded according to the command.
-
-* Protocol Description (from barrier/src/lib/barrier/protocol_types.h)
-
-    - barrierCmdHello          "Barrier"
-
-      Direction:  server -> client
-      Parameters: { int16_t minor, int16_t major }
-      Description:
-
-          Say hello to client
-          minor = protocol major version number supported by server
-          major = protocol minor version number supported by server
-
-    - barrierCmdHelloBack      "Barrier"
-
-      Direction:  client ->server
-      Parameters: { int16_t minor, int16_t major, char *name}
-      Description:
-
-          Respond to hello from server
-          minor = protocol major version number supported by client
-          major = protocol minor version number supported by client
-          name  = client name
-
-    - barrierCmdDInfo          "DINF"
-
-      Direction:  client ->server
-      Parameters: { int16_t x_origin, int16_t y_origin, int16_t width, int16_t height, int16_t x, int16_t y}
-      Description:
-
-          The client screen must send this message in response to the
-          barrierCmdQInfo message.  It must also send this message when the
-          screen's resolution changes.  In this case, the client screen should
-          ignore any barrierCmdDMouseMove messages until it receives a
-          barrierCmdCInfoAck in order to prevent attempts to move the mouse off
-          the new screen area.
-
-    - barrierCmdCNoop          "CNOP"
-
-      Direction:  client -> server
-      Parameters: None
-      Description:
-
-          No operation
-
-    - barrierCmdCClose         "CBYE"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Close connection
-
-    - barrierCmdCEnter         "CINN"
-
-      Direction:  server -> client
-      Parameters: { int16_t x, int16_t y, int32_t seq, int16_t modifier }
-      Description:
-
-          Enter screen.
-          x,y      = entering screen absolute coordinates
-          seq      = sequence number, which is used to order messages between
-                     screens.  the secondary screen must return this number
-                     with some messages
-          modifier = modifier key mask.  this will have bits set for each
-                     toggle modifier key that is activated on entry to the
-                     screen.  the secondary screen should adjust its toggle
-                     modifiers to reflect that state.
-
-    - barrierCmdCLeave         "COUT"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Leaving screen.  the secondary screen should send clipboard data in
-          response to this message for those clipboards that it has grabbed
-          (i.e. has sent a barrierCmdCClipboard for and has not received a
-          barrierCmdCClipboard for with a greater sequence number) and that
-          were grabbed or have changed since the last leave.
-
-    - barrierCmdCClipboard     "CCLP"
-
-      Direction:  server -> client
-      Parameters: { int8_t id, int32_t seq }
-      Description:
-
-          Grab clipboard. Sent by screen when some other app on that screen
-          grabs a clipboard.
-          id  = the clipboard identifier
-          seq = sequence number. Client must use the sequence number passed in
-                the most recent barrierCmdCEnter.  the server always sends 0.
-
-    - barrierCmdCScreenSaver   "CSEC"
-
-      Direction:  server -> client
-      Parameters: { int8_t started }
-      Description:
-
-          Screensaver change.
-          started = Screensaver on primary has started (1) or closed (0)
-
-    - barrierCmdCResetOptions  "CROP"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Reset options. Client should reset all of its options to their
-          defaults.
-
-    - barrierCmdCInfoAck       "CIAK"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Resolution change acknowledgment. Sent by server in response to a
-          client screen's barrierCmdDInfo. This is sent for every
-          barrierCmdDInfo, whether or not the server had sent a barrierCmdQInfo.
-
-    - barrierCmdCKeepAlive     "CALV"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Keep connection alive. Sent by the server periodically to verify
-          that connections are still up and running.  clients must reply in
-          kind on receipt.  if the server gets an error sending the message or
-          does not receive a reply within a reasonable time then the server
-          disconnects the client.  if the client doesn't receive these (or any
-          message) periodically then it should disconnect from the server.  the
-          appropriate interval is defined by an option.
-
-    - barrierCmdDKeyDown       "DKDN"
-
-      Direction:  server -> client
-      Parameters: { int16_t keyid, int16_t modifier [,int16_t button] }
-      Description:
-
-          Key pressed.
-          keyid    = X11 key id
-          modified = modified mask
-          button   = X11 Xkb keycode (optional)
-
-    - barrierCmdDKeyRepeat     "DKRP"
-
-      Direction:  server -> client
-      Parameters: { int16_t keyid, int16_t modifier, int16_t repeat [,int16_t button] }
-      Description:
-
-          Key auto-repeat.
-          keyid    = X11 key id
-          modified = modified mask
-          repeat   = number of repeats
-          button   = X11 Xkb keycode (optional)
-
-    - barrierCmdDKeyUp         "DKUP"
-
-      Direction:  server -> client
-      Parameters: { int16_t keyid, int16_t modifier [,int16_t button] }
-      Description:
-
-          Key released.
-          keyid    = X11 key id
-          modified = modified mask
-          button   = X11 Xkb keycode (optional)
-
-    - barrierCmdDMouseDown     "DMDN"
-
-      Direction:  server -> client
-      Parameters: { int8_t button }
-      Description:
-
-          Mouse button pressed.
-          button = button id
-
-    - barrierCmdDMouseUp       "DMUP"
-
-      Direction:  server -> client
-      Parameters: { int8_t button }
-      Description:
-
-          Mouse button release.
-          button = button id
-
-    - barrierCmdDMouseMove     "DMMV"
-
-      Direction:  server -> client
-      Parameters: { int16_t x, int16_t y }
-      Description:
-
-          Absolute mouse moved.
-          x,y = absolute screen coordinates
-
-    - barrierCmdDMouseRelMove  "DMRM"
-
-      Direction:  server -> client
-      Parameters: { int16_t x, int16_t y }
-      Description:
-
-          Relative mouse moved.
-          x,y = r relative screen coordinates
-
-    - barrierCmdDMouseWheel    "DMWM"
-
-      Direction:  server -> client
-      Parameters: { int16_t x , int16_t y } or { int16_t y }
-      Description:
-
-          Mouse scroll. The delta should be +120 for one tick forward (away
-          from the user) or right and -120 for one tick backward (toward the
-          user) or left.
-          x = x delta
-          y = y delta
-
-    - barrierCmdDClipboard     "DCLP"
-
-      Direction:  server -> client
-      Parameters: { int8_t id, int32_t seq, int8_t mark, char *data }
-      Description:
-
-          Clipboard data.
-          id  = clipboard id
-          seq = sequence number. The sequence number is 0 when sent by the
-                server.  Client screens should use the/ sequence number from
-                the most recent barrierCmdCEnter.
-
-    - barrierCmdDSetOptions    "DSOP"
-
-      Direction:  server -> client
-      Parameters: { int32 t nb, { int32_t id, int32_t val }[] }
-      Description:
-
-          Set options. Client should set the given option/value pairs.
-          nb  = numbers of { id, val } entries
-          id  = option id
-          val = option new value
-
-    - barrierCmdDFileTransfer  "DFTR"
-
-      Direction:  server -> client
-      Parameters: { int8_t mark, char *content }
-      Description:
-
-          Transfer file data.
-          mark = 0 means the content followed is the file size
-                 1 means the content followed is the chunk data
-                 2 means the file transfer is finished
-
-    - barrierCmdDDragInfo      "DDRG" int16_t char *
-
-      Direction:  server -> client
-      Parameters: { int16_t nb, char *content }
-      Description:
-
-          Drag information.
-          nb      = number of dragging objects
-          content = object's directory
-
-    - barrierCmdQInfo          "QINF"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Query screen info
-          Client should reply with a barrierCmdDInfo
-
-    - barrierCmdEIncompatible  "EICV"
-
-      Direction:  server -> client
-      Parameters: { int16_t nb, major *minor }
-      Description:
-
-          Incompatible version.
-          major = major version
-          minor = minor version
-
-    - barrierCmdEBusy          "EBSY"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Name provided when connecting is already in use.
-
-    - barrierCmdEUnknown       "EUNK"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Unknown client. Name provided when connecting is not in primary's
-           screen configuration map.
-
-    - barrierCmdEBad           "EBAD"
-
-      Direction:  server -> client
-      Parameters: None
-      Description:
-
-          Protocol violation. Server should disconnect after sending this
-          message.
-
-* TO DO
-
-    - Enable SSL
-    - Manage SetOptions/ResetOptions commands
-
diff --git a/docs/block-replication.txt b/docs/block-replication.txt
index 108e9166a8b..b0f23761c6e 100644
--- a/docs/block-replication.txt
+++ b/docs/block-replication.txt
@@ -79,7 +79,7 @@ Primary |                 ||  Secondary disk <--------- hidden-disk 5 <---------
                           ||        |                         |
                           ||        |                         |
                           ||        '-------------------------'
-                          ||           drive-backup sync=none 6
+                          ||         blockdev-backup sync=none 6
 
 1) The disk on the primary is represented by a block device with two
 children, providing replication between a primary disk and the host that
@@ -101,7 +101,7 @@ should support bdrv_make_empty() and backing file.
 that is modified by the primary VM. It should also start as an empty disk,
 and the driver supports bdrv_make_empty() and backing file.
 
-6) The drive-backup job (sync=none) is run to allow hidden-disk to buffer
+6) The blockdev-backup job (sync=none) is run to allow hidden-disk to buffer
 any state that would otherwise be lost by the speculative write-through
 of the NBD server into the secondary disk. So before block replication,
 the primary disk and secondary disk should contain the same data.
@@ -156,15 +156,15 @@ Primary:
          children.0.driver=raw
 
   Run qmp command in primary qemu:
-    { 'execute': 'human-monitor-command',
-      'arguments': {
-          'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1'
+    { "execute": "human-monitor-command",
+      "arguments": {
+          "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1"
       }
     }
-    { 'execute': 'x-blockdev-change',
-      'arguments': {
-          'parent': 'colo1',
-          'node': 'nbd_client1'
+    { "execute": "x-blockdev-change",
+      "arguments": {
+          "parent": "colo1",
+          "node": "nbd_client1"
       }
     }
   Note:
@@ -189,21 +189,21 @@ Secondary:
          vote-threshold=1,children.0=childs1
 
   Then run qmp command in secondary qemu:
-    { 'execute': 'nbd-server-start',
-      'arguments': {
-          'addr': {
-              'type': 'inet',
-              'data': {
-                  'host': 'xxx',
-                  'port': 'xxx'
+    { "execute": "nbd-server-start",
+      "arguments": {
+          "addr": {
+              "type": "inet",
+              "data": {
+                  "host": "xxx",
+                  "port": "xxx"
               }
           }
       }
     }
-    { 'execute': 'nbd-server-add',
-      'arguments': {
-          'device': 'colo1',
-          'writable': true
+    { "execute": "nbd-server-add",
+      "arguments": {
+          "device": "colo1",
+          "writable": true
       }
     }
 
@@ -223,22 +223,22 @@ After Failover:
 Primary:
   The secondary host is down, so we should run the following qmp command
   to remove the nbd child from the quorum:
-  { 'execute': 'x-blockdev-change',
-    'arguments': {
-        'parent': 'colo1',
-        'child': 'children.1'
+  { "execute": "x-blockdev-change",
+    "arguments": {
+        "parent": "colo1",
+        "child": "children.1"
     }
   }
-  { 'execute': 'human-monitor-command',
-    'arguments': {
-        'command-line': 'drive_del xxxx'
+  { "execute": "human-monitor-command",
+    "arguments": {
+        "command-line": "drive_del xxxx"
     }
   }
   Note: there is no qmp command to remove the blockdev now
 
 Secondary:
   The primary host is down, so we should do the following thing:
-  { 'execute': 'nbd-server-stop' }
+  { "execute": "nbd-server-stop" }
 
 Promote Secondary to Primary:
   see COLO-FT.txt
diff --git a/docs/bootindex.txt b/docs/bootindex.txt
deleted file mode 100644
index 6937862ba0d..00000000000
--- a/docs/bootindex.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-= Bootindex property =
-
-Block and net devices have bootindex property. This property is used to
-determine the order in which firmware will consider devices for booting
-the guest OS. If the bootindex property is not set for a device, it gets
-lowest boot priority. There is no particular order in which devices with
-unset bootindex property will be considered for booting, but they will
-still be bootable.
-
-== Example ==
-
-Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two
-disks (IDE, virtio):
-
-qemu -drive file=disk1.img,if=none,id=disk1
-     -device ide-hd,drive=disk1,bootindex=4
-     -drive file=disk2.img,if=none,id=disk2
-     -device virtio-blk-pci,drive=disk2,bootindex=3
-     -netdev type=user,id=net0 -device virtio-net-pci,netdev=net0,bootindex=2
-     -netdev type=user,id=net1 -device e1000,netdev=net1,bootindex=1
-
-Given the command above, firmware should try to boot from the e1000 NIC
-first.  If this fails, it should try the virtio NIC next; if this fails
-too, it should try the virtio disk, and then the IDE disk.
-
-== Limitations ==
-
-1. Some firmware has limitations on which devices can be considered for
-booting.  For instance, the PC BIOS boot specification allows only one
-disk to be bootable.  If boot from disk fails for some reason, the BIOS
-won't retry booting from other disk.  It can still try to boot from
-floppy or net, though.
-
-2. Sometimes, firmware cannot map the device path QEMU wants firmware to
-boot from to a boot method.  It doesn't happen for devices the firmware
-can natively boot from, but if firmware relies on an option ROM for
-booting, and the same option ROM is used for booting from more then one
-device, the firmware may not be able to ask the option ROM to boot from
-a particular device reliably.  For instance with the PC BIOS, if a SCSI HBA
-has three bootable devices target1, target3, target5 connected to it,
-the option ROM will have a boot method for each of them, but it is not
-possible to map from boot method back to a specific target.  This is a
-shortcoming of the PC BIOS boot specification.
-
-== Mixing bootindex and boot order parameters ==
-
-Note that it does not make sense to use the bootindex property together
-with the "-boot order=..." (or "-boot once=...") parameter. The guest
-firmware implementations normally either support the one or the other,
-but not both parameters at the same time. Mixing them will result in
-undefined behavior, and thus the guest firmware will likely not boot
-from the expected devices.
diff --git a/docs/bypass-iommu.txt b/docs/bypass-iommu.txt
new file mode 100644
index 00000000000..e6677bddd32
--- /dev/null
+++ b/docs/bypass-iommu.txt
@@ -0,0 +1,89 @@
+BYPASS IOMMU PROPERTY
+=====================
+
+Description
+===========
+Traditionally, there is a global switch to enable/disable vIOMMU. All
+devices in the system can only support go through vIOMMU or not, which
+is not flexible. We introduce this bypass iommu property to support
+coexist of devices go through vIOMMU and devices not. This is useful to
+passthrough devices with no-iommu mode and devices go through vIOMMU in
+the same virtual machine.
+
+PCI host bridges have a bypass_iommu property. This property is used to
+determine whether the devices attached on the PCI host bridge will bypass
+virtual iommu. The bypass_iommu property is valid only when there is a
+virtual iommu in the system, it is implemented to allow some devices to
+bypass vIOMMU. When bypass_iommu property is not set for a host bridge,
+the attached devices will go through vIOMMU by default.
+
+Usage
+=====
+The bypass iommu feature support PXB host bridge and default main host
+bridge, we add a bypass_iommu property for PXB and default_bus_bypass_iommu
+for machine. Note that default_bus_bypass_iommu is available only when
+the 'q35' machine type on x86 architecture and the 'virt' machine type
+on AArch64. Other machine types do not support bypass iommu for default
+root bus.
+
+1. The following is the bypass iommu options:
+ (1) PCI expander bridge
+     qemu -device pxb-pcie,bus_nr=0x10,addr=0x1,bypass_iommu=true
+ (2) Arm default host bridge
+     qemu -machine virt,iommu=smmuv3,default_bus_bypass_iommu=true
+ (3) X86 default root bus bypass iommu:
+     qemu -machine q35,default_bus_bypass_iommu=true
+
+2. Here is the detailed qemu command line for 'virt' machine with PXB on
+AArch64:
+
+qemu-system-aarch64 \
+ -machine virt,kernel_irqchip=on,iommu=smmuv3,default_bus_bypass_iommu=true \
+ -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3.0x1 \
+ -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x3.0x2,bypass_iommu=true \
+
+And we got:
+ - a default host bridge which bypass SMMUv3
+ - a pxb host bridge which go through SMMUv3
+ - a pxb host bridge which bypass SMMUv3
+
+3. Here is the detailed qemu command line for 'q35' machine with PXB on
+x86 architecture:
+
+qemu-system-x86_64 \
+ -machine q35,accel=kvm,default_bus_bypass_iommu=true \
+ -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3 \
+ -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x4,bypass_iommu=true \
+ -device intel-iommu \
+
+And we got:
+ - a default host bridge which bypass iommu
+ - a pxb host bridge which go through iommu
+ - a pxb host bridge which bypass iommu
+
+Limitations
+===========
+There might be potential security risk when devices bypass iommu, because
+devices might send malicious dma request to virtual machine if there is no
+iommu isolation. So it would be necessary to only bypass iommu for trusted
+device.
+
+Implementation
+==============
+The bypass iommu feature includes:
+ - Address space
+   Add bypass iommu property check of PCI Host and do not get iommu address
+   space for devices bypass iommu.
+ - Arm SMMUv3 support
+   We traverse all PCI root bus and get bus number ranges, then build explicit
+   RID mapping for devices which do not bypass iommu.
+ - X86 IOMMU support
+   To support Intel iommu, we traverse all PCI host bridge and get information
+   of devices which do not bypass iommu, then fill the DMAR drhd struct with
+   explicit device scope info. To support AMD iommu, add check of bypass iommu
+   when traverse the PCI hsot bridge.
+ - Machine and PXB options
+   We add bypass iommu options in machine option for default root bus, and add
+   option for PXB also. Note that the default value of bypass iommu is false,
+   so that the devices will by default go through iommu if there exist one.
+
diff --git a/docs/ccid.txt b/docs/ccid.txt
index c97fbd2de0a..2b85b1bd42c 100644
--- a/docs/ccid.txt
+++ b/docs/ccid.txt
@@ -34,15 +34,14 @@ reader and smart card (i.e. not backed by a physical device) using this device.
 
 2. Building
 
-The cryptographic functions and access to the physical card is done via NSS.
-
-Installing NSS:
+The cryptographic functions and access to the physical card is done via the
+libcacard library, whose development package must be installed prior to
+building QEMU:
 
 In redhat/fedora:
-    yum install nss-devel
-In ubuntu/debian:
-    apt-get install libnss3-dev
-    (not tested on ubuntu)
+    yum install libcacard-devel
+In ubuntu:
+    apt-get install libcacard-dev
 
 Configuring and building:
     ./configure --enable-smartcard && make
@@ -51,7 +50,7 @@ Configuring and building:
 3. Using ccid-card-emulated with hardware
 
 Assuming you have a working smartcard on the host with the current
-user, using NSS, qemu acts as another NSS client using ccid-card-emulated:
+user, using libcacard, QEMU acts as another client using ccid-card-emulated:
 
     qemu -usb -device usb-ccid -device ccid-card-emulated
 
diff --git a/docs/conf.py b/docs/conf.py
index 2ee61118725..763e7d24344 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -29,6 +29,7 @@
 import os
 import sys
 import sphinx
+from distutils.version import LooseVersion
 from sphinx.errors import ConfigError
 
 # Make Sphinx fail cleanly if using an old Python, rather than obscurely
@@ -73,7 +74,7 @@
 extensions = ['kerneldoc', 'qmp_lexer', 'hxtool', 'depfile', 'qapidoc']
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = [os.path.join(qemu_docdir, '_templates')]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
@@ -84,9 +85,14 @@
 # The master toctree document.
 master_doc = 'index'
 
+# Interpret `single-backticks` to be a cross-reference to any kind of
+# referenceable object. Unresolvable or ambiguous references will emit a
+# warning at build time.
+default_role = 'any'
+
 # General information about the project.
 project = u'QEMU'
-copyright = u'2020, The QEMU Project Developers'
+copyright = u'2021, The QEMU Project Developers'
 author = u'The QEMU Project Developers'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -150,38 +156,52 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+try:
+    import sphinx_rtd_theme
+except ImportError:
+    raise ConfigError(
+        'The Sphinx \'sphinx_rtd_theme\' HTML theme was not found.\n'
+    )
+
+html_theme = 'sphinx_rtd_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-# We initialize this to empty here, so the per-manual conf.py can just
-# add individual key/value entries.
-html_theme_options = {
-}
+if LooseVersion(sphinx_rtd_theme.__version__) >= LooseVersion("0.4.3"):
+    html_theme_options = {
+        "style_nav_header_background": "#802400",
+        "navigation_with_keys": True,
+    }
+
+html_logo = os.path.join(qemu_docdir, "../ui/icons/qemu_128x128.png")
+
+html_favicon = os.path.join(qemu_docdir, "../ui/icons/qemu_32x32.png")
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-# QEMU doesn't yet have any static files, so comment this out so we don't
-# get a warning about a missing directory.
-# If we do ever add this then it would probably be better to call the
-# subdirectory sphinx_static, as the Linux kernel does.
-# html_static_path = ['_static']
+html_static_path = [os.path.join(qemu_docdir, "sphinx-static")]
+
+html_css_files = [
+    'theme_overrides.css',
+]
+
+html_js_files = [
+    'custom.js',
+]
+
+html_context = {
+    "display_gitlab": True,
+    "gitlab_user": "qemu-project",
+    "gitlab_repo": "qemu",
+    "gitlab_version": "master",
+    "conf_py_path": "/docs/", # Path in the checkout to the docs root
+}
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-html_sidebars = {
-    '**': [
-        'about.html',
-        'editpage.html',
-        'navigation.html',
-        'searchbox.html',
-    ]
-}
+#html_sidebars = {}
 
 # Don't copy the rST source files to the HTML output directory,
 # and don't put links to the sources into the output HTML.
@@ -269,6 +289,7 @@
      ['Stefan Hajnoczi <stefanha@redhat.com>',
       'Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>'], 1),
 ]
+man_make_section_directory = False
 
 # -- Options for Texinfo output -------------------------------------------
 
diff --git a/docs/devel/_templates/editpage.html b/docs/devel/_templates/editpage.html
deleted file mode 100644
index a86d22bca86..00000000000
--- a/docs/devel/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/devel/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 7ef36f42d0f..431caba7aa0 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -42,73 +42,21 @@ perform a build:
      ../configure
      make
 
-For now, checks on the compilation environment are found in configure
-rather than meson.build, though this is expected to change.  The command
-line is parsed in the configure script and, whenever needed, converted
-into the appropriate options to Meson.
-
-New checks should be added to Meson, which usually comprises the
-following tasks:
-
- - Add a Meson build option to meson_options.txt.
-
- - Add support to the command line arg parser to handle any new
-   `--enable-XXX`/`--disable-XXX` flags required by the feature.
-
- - Add information to the help output message to report on the new
-   feature flag.
-
- - Add code to perform the actual feature check.
-
- - Add code to include the feature status in `config-host.h`
-
- - Add code to print out the feature status in the configure summary
-   upon completion.
-
-
-Taking the probe for SDL2_Image as an example, we have the following pieces
-in configure::
-
-  # Initial variable state
-  sdl_image=auto
-
-  ..snip..
-
-  # Configure flag processing
-  --disable-sdl-image) sdl_image=disabled
-  ;;
-  --enable-sdl-image) sdl_image=enabled
-  ;;
-
-  ..snip..
-
-  # Help output feature message
-  sdl-image         SDL Image support for icons
-
-  ..snip..
-
-  # Meson invocation
-  -Dsdl_image=$sdl_image
-
-In meson_options.txt::
-
-  option('sdl', type : 'feature', value : 'auto',
-         description: 'SDL Image support for icons')
-
-In meson.build::
-
-  # Detect dependency
-  sdl_image = dependency('SDL2_image', required: get_option('sdl_image'),
-                         method: 'pkg-config',
-                         kwargs: static_kwargs)
-
-  # Create config-host.h (if applicable)
-  config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
-
-  # Summary
-  summary_info += {'SDL image support': sdl_image.found()}
-
-
+The configure script automatically recognizes
+command line options for which a same-named Meson option exists;
+dashes in the command line are replaced with underscores.
+
+Many checks on the compilation environment are still found in configure
+rather than ``meson.build``, but new checks should be added directly to
+``meson.build``.
+
+Patches are also welcome to move existing checks from the configure
+phase to ``meson.build``.  When doing so, ensure that ``meson.build`` does
+not use anymore the keys that you have removed from ``config-host.mak``.
+Typically these will be replaced in ``meson.build`` by boolean variables,
+``get_option('optname')`` invocations, or ``dep.found()`` expressions.
+In general, the remaining checks have little or no interdependencies,
+so they can be moved one by one.
 
 Helper functions
 ----------------
@@ -116,51 +64,51 @@ Helper functions
 The configure script provides a variety of helper functions to assist
 developers in checking for system features:
 
-`do_cc $ARGS...`
+``do_cc $ARGS...``
    Attempt to run the system C compiler passing it $ARGS...
 
-`do_cxx $ARGS...`
+``do_cxx $ARGS...``
    Attempt to run the system C++ compiler passing it $ARGS...
 
-`compile_object $CFLAGS`
+``compile_object $CFLAGS``
    Attempt to compile a test program with the system C compiler using
    $CFLAGS. The test program must have been previously written to a file
-   called $TMPC.  The replacement in Meson is the compiler object `cc`,
-   which has methods such as `cc.compiles()`,
-   `cc.check_header()`, `cc.has_function()`.
+   called $TMPC.  The replacement in Meson is the compiler object ``cc``,
+   which has methods such as ``cc.compiles()``,
+   ``cc.check_header()``, ``cc.has_function()``.
 
-`compile_prog $CFLAGS $LDFLAGS`
+``compile_prog $CFLAGS $LDFLAGS``
    Attempt to compile a test program with the system C compiler using
    $CFLAGS and link it with the system linker using $LDFLAGS. The test
    program must have been previously written to a file called $TMPC.
-   The replacement in Meson is `cc.find_library()` and `cc.links()`.
+   The replacement in Meson is ``cc.find_library()`` and ``cc.links()``.
 
-`has $COMMAND`
+``has $COMMAND``
    Determine if $COMMAND exists in the current environment, either as a
    shell builtin, or executable binary, returning 0 on success.  The
-   replacement in Meson is `find_program()`.
+   replacement in Meson is ``find_program()``.
 
-`check_define $NAME`
+``check_define $NAME``
    Determine if the macro $NAME is defined by the system C compiler
 
-`check_include $NAME`
+``check_include $NAME``
    Determine if the include $NAME file is available to the system C
-   compiler.  The replacement in Meson is `cc.has_header()`.
+   compiler.  The replacement in Meson is ``cc.has_header()``.
 
-`write_c_skeleton`
+``write_c_skeleton``
    Write a minimal C program main() function to the temporary file
    indicated by $TMPC
 
-`feature_not_found $NAME $REMEDY`
+``feature_not_found $NAME $REMEDY``
    Print a message to stderr that the feature $NAME was not available
    on the system, suggesting the user try $REMEDY to address the
    problem.
 
-`error_exit $MESSAGE $MORE...`
+``error_exit $MESSAGE $MORE...``
    Print $MESSAGE to stderr, followed by $MORE... and then exit from the
    configure script with non-zero status
 
-`query_pkg_config $ARGS...`
+``query_pkg_config $ARGS...``
    Run pkg-config passing it $ARGS. If QEMU is doing a static build,
    then --static will be automatically added to $ARGS
 
@@ -173,11 +121,11 @@ process for:
 
 1) executables, which include:
 
-   - Tools - qemu-img, qemu-nbd, qga (guest agent), etc
+   - Tools - ``qemu-img``, ``qemu-nbd``, ``qga`` (guest agent), etc
 
-   - System emulators - qemu-system-$ARCH
+   - System emulators - ``qemu-system-$ARCH``
 
-   - Userspace emulators - qemu-$ARCH
+   - Userspace emulators - ``qemu-$ARCH``
 
    - Unit tests
 
@@ -187,7 +135,7 @@ process for:
 
 4) other data files, such as icons or desktop files
 
-All executables are built by default, except for some `contrib/`
+All executables are built by default, except for some ``contrib/``
 binaries that are known to fail to build on some platforms (for example
 32-bit or big-endian platforms).  Tests are also built by default,
 though that might change in the future.
@@ -195,14 +143,14 @@ though that might change in the future.
 The source code is highly modularized, split across many files to
 facilitate building of all of these components with as little duplicated
 compilation as possible. Using the Meson "sourceset" functionality,
-`meson.build` files group the source files in rules that are
+``meson.build`` files group the source files in rules that are
 enabled according to the available system libraries and to various
 configuration symbols.  Sourcesets belong to one of four groups:
 
 Subsystem sourcesets:
   Various subsystems that are common to both tools and emulators have
-  their own sourceset, for example `block_ss` for the block device subsystem,
-  `chardev_ss` for the character device subsystem, etc.  These sourcesets
+  their own sourceset, for example ``block_ss`` for the block device subsystem,
+  ``chardev_ss`` for the character device subsystem, etc.  These sourcesets
   are then turned into static libraries as follows::
 
     libchardev = static_library('chardev', chardev_ss.sources(),
@@ -211,8 +159,8 @@ Subsystem sourcesets:
 
     chardev = declare_dependency(link_whole: libchardev)
 
-  As of Meson 0.55.1, the special `.fa` suffix should be used for everything
-  that is used with `link_whole`, to ensure that the link flags are placed
+  As of Meson 0.55.1, the special ``.fa`` suffix should be used for everything
+  that is used with ``link_whole``, to ensure that the link flags are placed
   correctly in the command line.
 
 Target-independent emulator sourcesets:
@@ -221,38 +169,38 @@ Target-independent emulator sourcesets:
   This includes error handling infrastructure, standard data structures,
   platform portability wrapper functions, etc.
 
-  Target-independent code lives in the `common_ss`, `softmmu_ss` and
-  `user_ss` sourcesets.  `common_ss` is linked into all emulators,
-  `softmmu_ss` only in system emulators, `user_ss` only in user-mode
+  Target-independent code lives in the ``common_ss``, ``softmmu_ss`` and
+  ``user_ss`` sourcesets.  ``common_ss`` is linked into all emulators,
+  ``softmmu_ss`` only in system emulators, ``user_ss`` only in user-mode
   emulators.
 
   Target-independent sourcesets must exercise particular care when using
-  `if_false` rules.  The `if_false` rule will be used correctly when linking
+  ``if_false`` rules.  The ``if_false`` rule will be used correctly when linking
   emulator binaries; however, when *compiling* target-independent files
-  into .o files, Meson may need to pick *both* the `if_true` and
-  `if_false` sides to cater for targets that want either side.  To
+  into .o files, Meson may need to pick *both* the ``if_true`` and
+  ``if_false`` sides to cater for targets that want either side.  To
   achieve that, you can add a special rule using the ``CONFIG_ALL``
   symbol::
 
     # Some targets have CONFIG_ACPI, some don't, so this is not enough
-    softmmu_ss.add(when: 'CONFIG_ACPI`, if_true: files('acpi.c'),
+    softmmu_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi.c'),
                                         if_false: files('acpi-stub.c'))
 
     # This is required as well:
-    softmmu_ss.add(when: 'CONFIG_ALL`, if_true: files('acpi-stub.c'))
+    softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c'))
 
 Target-dependent emulator sourcesets:
   In the target-dependent set lives CPU emulation, some device emulation and
   much glue code. This sometimes also has to be compiled multiple times,
   once for each target being built.  Target-dependent files are included
-  in the `specific_ss` sourceset.
+  in the ``specific_ss`` sourceset.
 
-  Each emulator also includes sources for files in the `hw/` and `target/`
+  Each emulator also includes sources for files in the ``hw/`` and ``target/``
   subdirectories.  The subdirectory used for each emulator comes
   from the target's definition of ``TARGET_BASE_ARCH`` or (if missing)
-  ``TARGET_ARCH``, as found in `default-configs/targets/*.mak`.
+  ``TARGET_ARCH``, as found in ``default-configs/targets/*.mak``.
 
-  Each subdirectory in `hw/` adds one sourceset to the `hw_arch` dictionary,
+  Each subdirectory in ``hw/`` adds one sourceset to the ``hw_arch`` dictionary,
   for example::
 
     arm_ss = ss.source_set()
@@ -262,8 +210,8 @@ Target-dependent emulator sourcesets:
 
   The sourceset is only used for system emulators.
 
-  Each subdirectory in `target/` instead should add one sourceset to each
-  of the `target_arch` and `target_softmmu_arch`, which are used respectively
+  Each subdirectory in ``target/`` instead should add one sourceset to each
+  of the ``target_arch`` and ``target_softmmu_arch``, which are used respectively
   for all emulators and for system emulators only.  For example::
 
     arm_ss = ss.source_set()
@@ -272,16 +220,33 @@ Target-dependent emulator sourcesets:
     target_arch += {'arm': arm_ss}
     target_softmmu_arch += {'arm': arm_softmmu_ss}
 
+Module sourcesets:
+  There are two dictionaries for modules: ``modules`` is used for
+  target-independent modules and ``target_modules`` is used for
+  target-dependent modules.  When modules are disabled the ``module``
+  source sets are added to ``softmmu_ss`` and the ``target_modules``
+  source sets are added to ``specific_ss``.
+
+  Both dictionaries are nested.  One dictionary is created per
+  subdirectory, and these per-subdirectory dictionaries are added to
+  the toplevel dictionaries.  For example::
+
+    hw_display_modules = {}
+    qxl_ss = ss.source_set()
+    ...
+    hw_display_modules += { 'qxl': qxl_ss }
+    modules += { 'hw-display': hw_display_modules }
+
 Utility sourcesets:
-  All binaries link with a static library `libqemuutil.a`.  This library
+  All binaries link with a static library ``libqemuutil.a``.  This library
   is built from several sourcesets; most of them however host generated
-  code, and the only two of general interest are `util_ss` and `stub_ss`.
+  code, and the only two of general interest are ``util_ss`` and ``stub_ss``.
 
   The separation between these two is purely for documentation purposes.
-  `util_ss` contains generic utility files.  Even though this code is only
+  ``util_ss`` contains generic utility files.  Even though this code is only
   linked in some binaries, sometimes it requires hooks only in some of
   these and depend on other functions that are not fully implemented by
-  all QEMU binaries.  `stub_ss` links dummy stubs that will only be linked
+  all QEMU binaries.  ``stub_ss`` links dummy stubs that will only be linked
   into the binary if the real implementation is not present.  In a way,
   the stubs can be thought of as a portable implementation of the weak
   symbols concept.
@@ -290,8 +255,8 @@ Utility sourcesets:
 The following files concur in the definition of which files are linked
 into each emulator:
 
-`default-configs/devices/*.mak`
-  The files under `default-configs/devices/` control the boards and devices
+``default-configs/devices/*.mak``
+  The files under ``default-configs/devices/`` control the boards and devices
   that are built into each QEMU system emulation targets. They merely contain
   a list of config variable definitions such as::
 
@@ -299,18 +264,18 @@ into each emulator:
     CONFIG_XLNX_ZYNQMP_ARM=y
     CONFIG_XLNX_VERSAL=y
 
-`*/Kconfig`
-  These files are processed together with `default-configs/devices/*.mak` and
+``*/Kconfig``
+  These files are processed together with ``default-configs/devices/*.mak`` and
   describe the dependencies between various features, subsystems and
   device models.  They are described in :ref:`kconfig`
 
-`default-configs/targets/*.mak`
-  These files mostly define symbols that appear in the `*-config-target.h`
+``default-configs/targets/*.mak``
+  These files mostly define symbols that appear in the ``*-config-target.h``
   file for each emulator [#cfgtarget]_.  However, the ``TARGET_ARCH``
-  and ``TARGET_BASE_ARCH`` will also be used to select the `hw/` and
-  `target/` subdirectories that are compiled into each target.
+  and ``TARGET_BASE_ARCH`` will also be used to select the ``hw/`` and
+  ``target/`` subdirectories that are compiled into each target.
 
-.. [#cfgtarget] This header is included by `qemu/osdep.h` when
+.. [#cfgtarget] This header is included by ``qemu/osdep.h`` when
                 compiling files from the target-specific sourcesets.
 
 These files rarely need changing unless you are adding a completely
@@ -318,23 +283,77 @@ new target, or enabling new devices or hardware for a particular
 system/userspace emulation target
 
 
+Adding checks
+-------------
+
+New checks should be added to Meson.  Compiler checks can be as simple as
+the following::
+
+  config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h'))
+
+A more complex task such as adding a new dependency usually
+comprises the following tasks:
+
+ - Add a Meson build option to meson_options.txt.
+
+ - Add code to perform the actual feature check.
+
+ - Add code to include the feature status in ``config-host.h``
+
+ - Add code to print out the feature status in the configure summary
+   upon completion.
+
+Taking the probe for SDL2_Image as an example, we have the following
+in ``meson_options.txt``::
+
+  option('sdl_image', type : 'feature', value : 'auto',
+         description: 'SDL Image support for icons')
+
+Unless the option was given a non-``auto`` value (on the configure
+command line), the detection code must be performed only if the
+dependency will be used::
+
+  sdl_image = not_found
+  if not get_option('sdl_image').auto() or have_system
+    sdl_image = dependency('SDL2_image', required: get_option('sdl_image'),
+                           method: 'pkg-config',
+                           static: enable_static)
+  endif
+
+This avoids warnings on static builds of user-mode emulators, for example.
+Most of the libraries used by system-mode emulators are not available for
+static linking.
+
+The other supporting code is generally simple::
+
+  # Create config-host.h (if applicable)
+  config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
+
+  # Summary
+  summary_info += {'SDL image support': sdl_image.found()}
+
+For the configure script to parse the new option, the
+``scripts/meson-buildoptions.sh`` file must be up-to-date; ``make
+update-buildoptions`` (or just ``make``) will take care of updating it.
+
+
 Support scripts
 ---------------
 
 Meson has a special convention for invoking Python scripts: if their
-first line is `#! /usr/bin/env python3` and the file is *not* executable,
+first line is ``#! /usr/bin/env python3`` and the file is *not* executable,
 find_program() arranges to invoke the script under the same Python
 interpreter that was used to invoke Meson.  This is the most common
 and preferred way to invoke support scripts from Meson build files,
 because it automatically uses the value of configure's --python= option.
 
-In case the script is not written in Python, use a `#! /usr/bin/env ...`
+In case the script is not written in Python, use a ``#! /usr/bin/env ...``
 line and make the script executable.
 
 Scripts written in Python, where it is desirable to make the script
 executable (for example for test scripts that developers may want to
 invoke from the command line, such as tests/qapi-schema/test-qapi.py),
-should be invoked through the `python` variable in meson.build. For
+should be invoked through the ``python`` variable in meson.build. For
 example::
 
   test('QAPI schema regression tests', python,
@@ -358,10 +377,20 @@ rules and wraps them so that e.g. submodules are built before QEMU.
 The resulting build system is largely non-recursive in nature, in
 contrast to common practices seen with automake.
 
-Tests are also ran by the Makefile with the traditional `make check`
-phony target, while benchmarks are run with `make bench`.  Meson test
-suites such as `unit` can be ran with `make check-unit` too.  It is also
-possible to run tests defined in meson.build with `meson test`.
+Tests are also ran by the Makefile with the traditional ``make check``
+phony target, while benchmarks are run with ``make bench``.  Meson test
+suites such as ``unit`` can be ran with ``make check-unit`` too.  It is also
+possible to run tests defined in meson.build with ``meson test``.
+
+Useful make targets
+-------------------
+
+``help``
+  Print a help message for the most common build targets.
+
+``print-VAR``
+  Print the value of the variable VAR. Useful for debugging the build
+  system.
 
 Important files for the build system
 ====================================
@@ -373,28 +402,28 @@ The following key files are statically defined in the source tree, with
 the rules needed to build QEMU. Their behaviour is influenced by a
 number of dynamically created files listed later.
 
-`Makefile`
+``Makefile``
   The main entry point used when invoking make to build all the components
   of QEMU. The default 'all' target will naturally result in the build of
   every component. Makefile takes care of recursively building submodules
   directly via a non-recursive set of rules.
 
-`*/meson.build`
+``*/meson.build``
   The meson.build file in the root directory is the main entry point for the
   Meson build system, and it coordinates the configuration and build of all
   executables.  Build rules for various subdirectories are included in
   other meson.build files spread throughout the QEMU source tree.
 
-`tests/Makefile.include`
+``tests/Makefile.include``
   Rules for external test harnesses. These include the TCG tests,
-  `qemu-iotests` and the Avocado-based acceptance tests.
+  ``qemu-iotests`` and the Avocado-based integration tests.
 
-`tests/docker/Makefile.include`
+``tests/docker/Makefile.include``
   Rules for Docker tests. Like tests/Makefile, this file is included
   directly by the top level Makefile, anything defined in this file will
   influence the entire build system.
 
-`tests/vm/Makefile.include`
+``tests/vm/Makefile.include``
   Rules for VM-based tests. Like tests/Makefile, this file is included
   directly by the top level Makefile, anything defined in this file will
   influence the entire build system.
@@ -410,11 +439,11 @@ Makefile.
 
 Built by configure:
 
-`config-host.mak`
+``config-host.mak``
   When configure has determined the characteristics of the build host it
   will write a long list of variables to config-host.mak file. This
   provides the various install directories, compiler / linker flags and a
-  variety of `CONFIG_*` variables related to optionally enabled features.
+  variety of ``CONFIG_*`` variables related to optionally enabled features.
   This is imported by the top level Makefile and meson.build in order to
   tailor the build output.
 
@@ -429,41 +458,29 @@ Built by configure:
 
 Built by Meson:
 
-`${TARGET-NAME}-config-devices.mak`
+``${TARGET-NAME}-config-devices.mak``
   TARGET-NAME is again the name of a system or userspace emulator. The
   config-devices.mak file is automatically generated by make using the
   scripts/make_device_config.sh program, feeding it the
   default-configs/$TARGET-NAME file as input.
 
-`config-host.h`, `$TARGET-NAME/config-target.h`, `$TARGET-NAME/config-devices.h`
-  These files are used by source code to determine what features
-  are enabled.  They are generated from the contents of the corresponding
-  `*.h` files using the scripts/create_config program. This extracts
-  relevant variables and formats them as C preprocessor macros.
+``config-host.h``, ``$TARGET_NAME-config-target.h``, ``$TARGET_NAME-config-devices.h``
+  These files are used by source code to determine what features are
+  enabled.  They are generated from the contents of the corresponding
+  ``*.mak`` files using Meson's ``configure_file()`` function.
 
-`build.ninja`
+``build.ninja``
   The build rules.
 
 
 Built by Makefile:
 
-`Makefile.ninja`
+``Makefile.ninja``
   A Makefile include that bridges to ninja for the actual build.  The
   Makefile is mostly a list of targets that Meson included in build.ninja.
 
-`Makefile.mtest`
+``Makefile.mtest``
   The Makefile definitions that let "make check" run tests defined in
   meson.build.  The rules are produced from Meson's JSON description of
   tests (obtained with "meson introspect --tests") through the script
   scripts/mtest2make.py.
-
-
-Useful make targets
--------------------
-
-`help`
-  Print a help message for the most common build targets.
-
-`print-VAR`
-  Print the value of the variable VAR. Useful for debugging the build
-  system.
diff --git a/docs/devel/ci-definitions.rst.inc b/docs/devel/ci-definitions.rst.inc
new file mode 100644
index 00000000000..6d5c6fd9f20
--- /dev/null
+++ b/docs/devel/ci-definitions.rst.inc
@@ -0,0 +1,121 @@
+Definition of terms
+===================
+
+This section defines the terms used in this document and correlates them with
+what is currently used on QEMU.
+
+Automated tests
+---------------
+
+An automated test is written on a test framework using its generic test
+functions/classes. The test framework can run the tests and report their
+success or failure [1]_.
+
+An automated test has essentially three parts:
+
+1. The test initialization of the parameters, where the expected parameters,
+   like inputs and expected results, are set up;
+2. The call to the code that should be tested;
+3. An assertion, comparing the result from the previous call with the expected
+   result set during the initialization of the parameters. If the result
+   matches the expected result, the test has been successful; otherwise, it has
+   failed.
+
+Unit testing
+------------
+
+A unit test is responsible for exercising individual software components as a
+unit, like interfaces, data structures, and functionality, uncovering errors
+within the boundaries of a component. The verification effort is in the
+smallest software unit and focuses on the internal processing logic and data
+structures. A test case of unit tests should be designed to uncover errors due
+to erroneous computations, incorrect comparisons, or improper control flow [2]_.
+
+On QEMU, unit testing is represented by the 'check-unit' target from 'make'.
+
+Functional testing
+------------------
+
+A functional test focuses on the functional requirement of the software.
+Deriving sets of input conditions, the functional tests should fully exercise
+all the functional requirements for a program. Functional testing is
+complementary to other testing techniques, attempting to find errors like
+incorrect or missing functions, interface errors, behavior errors, and
+initialization and termination errors [3]_.
+
+On QEMU, functional testing is represented by the 'check-qtest' target from
+'make'.
+
+System testing
+--------------
+
+System tests ensure all application elements mesh properly while the overall
+functionality and performance are achieved [4]_. Some or all system components
+are integrated to create a complete system to be tested as a whole. System
+testing ensures that components are compatible, interact correctly, and
+transfer the right data at the right time across their interfaces. As system
+testing focuses on interactions, use case-based testing is a practical approach
+to system testing [5]_. Note that, in some cases, system testing may require
+interaction with third-party software, like operating system images, databases,
+networks, and so on.
+
+On QEMU, system testing is represented by the 'check-avocado' target from
+'make'.
+
+Flaky tests
+-----------
+
+A flaky test is defined as a test that exhibits both a passing and a failing
+result with the same code on different runs. Some usual reasons for an
+intermittent/flaky test are async wait, concurrency, and test order dependency
+[6]_.
+
+Gating
+------
+
+A gate restricts the move of code from one stage to another on a
+test/deployment pipeline. The step move is granted with approval. The approval
+can be a manual intervention or a set of tests succeeding [7]_.
+
+On QEMU, the gating process happens during the pull request. The approval is
+done by the project leader running its own set of tests. The pull request gets
+merged when the tests succeed.
+
+Continuous Integration (CI)
+---------------------------
+
+Continuous integration (CI) requires the builds of the entire application and
+the execution of a comprehensive set of automated tests every time there is a
+need to commit any set of changes [8]_. The automated tests can be composed of
+the unit, functional, system, and other tests.
+
+Keynotes about continuous integration (CI) [9]_:
+
+1. System tests may depend on external software (operating system images,
+   firmware, database, network).
+2. It may take a long time to build and test. It may be impractical to build
+   the system being developed several times per day.
+3. If the development platform is different from the target platform, it may
+   not be possible to run system tests in the developer’s private workspace.
+   There may be differences in hardware, operating system, or installed
+   software. Therefore, more time is required for testing the system.
+
+References
+----------
+
+.. [1] Sommerville, Ian (2016). Software Engineering. p. 233.
+.. [2] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering,
+       A Practitioner’s Approach. p. 48, 376, 378, 381.
+.. [3] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering,
+       A Practitioner’s Approach. p. 388.
+.. [4] Pressman, Roger S. & Maxim, Bruce R. (2020). Software Engineering,
+       A Practitioner’s Approach. Software Engineering, p. 377.
+.. [5] Sommerville, Ian (2016). Software Engineering. p. 59, 232, 240.
+.. [6] Luo, Qingzhou, et al. An empirical analysis of flaky tests.
+       Proceedings of the 22nd ACM SIGSOFT International Symposium on
+       Foundations of Software Engineering. 2014.
+.. [7] Humble, Jez & Farley, David (2010). Continuous Delivery:
+       Reliable Software Releases Through Build, Test, and Deployment, p. 122.
+.. [8] Humble, Jez & Farley, David (2010). Continuous Delivery:
+       Reliable Software Releases Through Build, Test, and Deployment, p. 55.
+.. [9] Sommerville, Ian (2016). Software Engineering. p. 743.
diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc
new file mode 100644
index 00000000000..db3f571d5f3
--- /dev/null
+++ b/docs/devel/ci-jobs.rst.inc
@@ -0,0 +1,58 @@
+Custom CI/CD variables
+======================
+
+QEMU CI pipelines can be tuned by setting some CI environment variables.
+
+Set variable globally in the user's CI namespace
+------------------------------------------------
+
+Variables can be set globally in the user's CI namespace setting.
+
+For further information about how to set these variables, please refer to::
+
+  https://docs.gitlab.com/ee/ci/variables/#add-a-cicd-variable-to-a-project
+
+Set variable manually when pushing a branch or tag to the user's repository
+---------------------------------------------------------------------------
+
+Variables can be set manually when pushing a branch or tag, using
+git-push command line arguments.
+
+Example setting the QEMU_CI_EXAMPLE_VAR variable:
+
+.. code::
+
+   git push -o ci.variable="QEMU_CI_EXAMPLE_VAR=value" myrepo mybranch
+
+For further information about how to set these variables, please refer to::
+
+  https://docs.gitlab.com/ee/user/project/push_options.html#push-options-for-gitlab-cicd
+
+Here is a list of the most used variables:
+
+QEMU_CI_AVOCADO_TESTING
+~~~~~~~~~~~~~~~~~~~~~~~
+By default, tests using the Avocado framework are not run automatically in
+the pipelines (because multiple artifacts have to be downloaded, and if
+these artifacts are not already cached, downloading them make the jobs
+reach the timeout limit). Set this variable to have the tests using the
+Avocado framework run automatically.
+
+AARCH64_RUNNER_AVAILABLE
+~~~~~~~~~~~~~~~~~~~~~~~~
+If you've got access to an aarch64 host that can be used as a gitlab-CI
+runner, you can set this variable to enable the tests that require this
+kind of host. The runner should be tagged with "aarch64".
+
+S390X_RUNNER_AVAILABLE
+~~~~~~~~~~~~~~~~~~~~~~
+If you've got access to an IBM Z host that can be used as a gitlab-CI
+runner, you can set this variable to enable the tests that require this
+kind of host. The runner should be tagged with "s390x".
+
+CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If you've got access to a CentOS Stream 8 x86_64 host that can be
+used as a gitlab-CI runner, you can set this variable to enable the
+tests that require this kind of host. The runner should be tagged with
+both "centos_stream_8" and "x86_64".
diff --git a/docs/devel/ci-runners.rst.inc b/docs/devel/ci-runners.rst.inc
new file mode 100644
index 00000000000..7817001fb28
--- /dev/null
+++ b/docs/devel/ci-runners.rst.inc
@@ -0,0 +1,117 @@
+Jobs on Custom Runners
+======================
+
+Besides the jobs run under the various CI systems listed before, there
+are a number additional jobs that will run before an actual merge.
+These use the same GitLab CI's service/framework already used for all
+other GitLab based CI jobs, but rely on additional systems, not the
+ones provided by GitLab as "shared runners".
+
+The architecture of GitLab's CI service allows different machines to
+be set up with GitLab's "agent", called gitlab-runner, which will take
+care of running jobs created by events such as a push to a branch.
+Here, the combination of a machine, properly configured with GitLab's
+gitlab-runner, is called a "custom runner".
+
+The GitLab CI jobs definition for the custom runners are located under::
+
+  .gitlab-ci.d/custom-runners.yml
+
+Custom runners entail custom machines.  To see a list of the machines
+currently deployed in the QEMU GitLab CI and their maintainers, please
+refer to the QEMU `wiki <https://wiki.qemu.org/AdminContacts>`__.
+
+Machine Setup Howto
+-------------------
+
+For all Linux based systems, the setup can be mostly automated by the
+execution of two Ansible playbooks.  Create an ``inventory`` file
+under ``scripts/ci/setup``, such as this::
+
+  fully.qualified.domain
+  other.machine.hostname
+
+You may need to set some variables in the inventory file itself.  One
+very common need is to tell Ansible to use a Python 3 interpreter on
+those hosts.  This would look like::
+
+  fully.qualified.domain ansible_python_interpreter=/usr/bin/python3
+  other.machine.hostname ansible_python_interpreter=/usr/bin/python3
+
+Build environment
+~~~~~~~~~~~~~~~~~
+
+The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will
+set up machines with the environment needed to perform builds and run
+QEMU tests.  This playbook consists on the installation of various
+required packages (and a general package update while at it).  It
+currently covers a number of different Linux distributions, but it can
+be expanded to cover other systems.
+
+The minimum required version of Ansible successfully tested in this
+playbook is 2.8.0 (a version check is embedded within the playbook
+itself).  To run the playbook, execute::
+
+  cd scripts/ci/setup
+  ansible-playbook -i inventory build-environment.yml
+
+Please note that most of the tasks in the playbook require superuser
+privileges, such as those from the ``root`` account or those obtained
+by ``sudo``.  If necessary, please refer to ``ansible-playbook``
+options such as ``--become``, ``--become-method``, ``--become-user``
+and ``--ask-become-pass``.
+
+gitlab-runner setup and registration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The gitlab-runner agent needs to be installed on each machine that
+will run jobs.  The association between a machine and a GitLab project
+happens with a registration token.  To find the registration token for
+your repository/project, navigate on GitLab's web UI to:
+
+ * Settings (the gears-like icon at the bottom of the left hand side
+   vertical toolbar), then
+ * CI/CD, then
+ * Runners, and click on the "Expand" button, then
+ * Under "Set up a specific Runner manually", look for the value under
+   "And this registration token:"
+
+Copy the ``scripts/ci/setup/vars.yml.template`` file to
+``scripts/ci/setup/vars.yml``.  Then, set the
+``gitlab_runner_registration_token`` variable to the value obtained
+earlier.
+
+To run the playbook, execute::
+
+  cd scripts/ci/setup
+  ansible-playbook -i inventory gitlab-runner.yml
+
+Following the registration, it's necessary to configure the runner tags,
+and optionally other configurations on the GitLab UI.  Navigate to:
+
+ * Settings (the gears like icon), then
+ * CI/CD, then
+ * Runners, and click on the "Expand" button, then
+ * "Runners activated for this project", then
+ * Click on the "Edit" icon (next to the "Lock" Icon)
+
+Tags are very important as they are used to route specific jobs to
+specific types of runners, so it's a good idea to double check that
+the automatically created tags are consistent with the OS and
+architecture.  For instance, an Ubuntu 20.04 aarch64 system should
+have tags set as::
+
+  ubuntu_20.04,aarch64
+
+Because the job definition at ``.gitlab-ci.d/custom-runners.yml``
+would contain::
+
+  ubuntu-20.04-aarch64-all:
+   tags:
+   - ubuntu_20.04
+   - aarch64
+
+It's also recommended to:
+
+ * increase the "Maximum job timeout" to something like ``2h``
+ * give it a better Description
diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst
new file mode 100644
index 00000000000..d106610096e
--- /dev/null
+++ b/docs/devel/ci.rst
@@ -0,0 +1,13 @@
+==
+CI
+==
+
+QEMU has configurations enabled for a number of different CI services.
+The most up to date information about them and their status can be
+found at::
+
+   https://wiki.qemu.org/Testing/CI
+
+.. include:: ci-definitions.rst.inc
+.. include:: ci-jobs.rst.inc
+.. include:: ci-runners.rst.inc
diff --git a/docs/devel/clocks.rst b/docs/devel/clocks.rst
index 956bd147ea0..675fbeb6abe 100644
--- a/docs/devel/clocks.rst
+++ b/docs/devel/clocks.rst
@@ -260,6 +260,29 @@ clocks get the new clock period value: *Clock 2*, *Clock 3* and *Clock 4*.
 It is not possible to disconnect a clock or to change the clock connection
 after it is connected.
 
+Clock multiplier and divider settings
+-------------------------------------
+
+By default, when clocks are connected together, the child
+clocks run with the same period as their source (parent) clock.
+The Clock API supports a built-in period multiplier/divider
+mechanism so you can configure a clock to make its children
+run at a different period from its own. If you call the
+``clock_set_mul_div()`` function you can specify the clock's
+multiplier and divider values. The children of that clock
+will all run with a period of ``parent_period * multiplier / divider``.
+For instance, if the clock has a frequency of 8MHz and you set its
+multiplier to 2 and its divider to 3, the child clocks will run
+at 12MHz.
+
+You can change the multiplier and divider of a clock at runtime,
+so you can use this to model clock controller devices which
+have guest-programmable frequency multipliers or dividers.
+
+Note that ``clock_set_mul_div()`` does not automatically call
+``clock_propagate()``. If you make a runtime change to the
+multiplier or divider you must call clock_propagate() yourself.
+
 Unconnected input clocks
 ------------------------
 
diff --git a/docs/devel/code-of-conduct.rst b/docs/devel/code-of-conduct.rst
index 277b5250d13..195444d1b48 100644
--- a/docs/devel/code-of-conduct.rst
+++ b/docs/devel/code-of-conduct.rst
@@ -55,6 +55,6 @@ Sources
 -------
 
 This document is based on the `Fedora Code of Conduct
-<https://fedoraproject.org/code-of-conduct>`__ and the
-`Contributor Covenant version 1.3.0
+<http://web.archive.org/web/20210429132536/https://docs.fedoraproject.org/en-US/project/code-of-conduct/>`__
+(as of April 2021) and the `Contributor Covenant version 1.3.0
 <https://www.contributor-covenant.org/version/1/3/0/code-of-conduct/>`__.
diff --git a/docs/devel/control-flow-integrity.rst b/docs/devel/control-flow-integrity.rst
index d89d70733d8..e6b73a4fe1a 100644
--- a/docs/devel/control-flow-integrity.rst
+++ b/docs/devel/control-flow-integrity.rst
@@ -39,7 +39,7 @@ later).
 Given the use of LTO, a version of AR that supports LLVM IR is required.
 The easies way of doing this is by selecting the AR provided by LLVM::
 
- AR=llvm-ar-9 CC=clang-9 CXX=lang++-9 /path/to/configure --enable-cfi
+ AR=llvm-ar-9 CC=clang-9 CXX=clang++-9 /path/to/configure --enable-cfi
 
 CFI is enabled on every binary produced.
 
@@ -131,7 +131,7 @@ lld with version 11+.
 In other words, to compile with fuzzing and CFI, clang 11+ is required, and
 lld needs to be used as a linker::
 
- AR=llvm-ar-11 CC=clang-11 CXX=lang++-11 /path/to/configure --enable-cfi \
+ AR=llvm-ar-11 CC=clang-11 CXX=clang++-11 /path/to/configure --enable-cfi \
                            -enable-fuzzing --extra-ldflags="-fuse-ld=lld"
 
 and then, compile the fuzzers as usual.
diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
index 33a576058ca..68f6065d642 100644
--- a/docs/devel/decodetree.rst
+++ b/docs/devel/decodetree.rst
@@ -40,9 +40,6 @@ and returns an integral value extracted from there.
 
 A field with no ``unnamed_fields`` and no ``!function`` is in error.
 
-FIXME: the fields of the structure into which this result will be stored
-is restricted to ``int``.  Which means that we cannot expand 64-bit items.
-
 Field examples:
 
 +---------------------------+---------------------------------------------+
@@ -66,9 +63,14 @@ Argument Sets
 Syntax::
 
   args_def    := '&' identifier ( args_elt )+ ( !extern )?
-  args_elt    := identifier
+  args_elt    := identifier (':' identifier)?
 
 Each *args_elt* defines an argument within the argument set.
+If the form of the *args_elt* contains a colon, the first
+identifier is the argument name and the second identifier is
+the argument type.  If the colon is missing, the argument
+type will be ``int``.
+
 Each argument set will be rendered as a C structure "arg_$name"
 with each of the fields being one of the member arguments.
 
@@ -86,6 +88,7 @@ Argument set examples::
 
   &reg3       ra rb rc
   &loadstore  reg base offset
+  &longldst   reg base offset:int64_t
 
 
 Formats
diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst
new file mode 100644
index 00000000000..4a68682b31a
--- /dev/null
+++ b/docs/devel/ebpf_rss.rst
@@ -0,0 +1,125 @@
+===========================
+eBPF RSS virtio-net support
+===========================
+
+RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues
+by calculating packet hash. Usually every queue is processed then by a specific guest CPU core.
+
+For now there are 2 RSS implementations in qemu:
+- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
+- eBPF RSS (can function with also with vhost=on)
+
+eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
+To enable eBPF RSS support use './configure --enable-bpf'.
+
+If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection
+of rx virtqueue based on lookup table built according to calculated symmetric hash
+of transmitted packets.
+If steering BPF is set for TUN the BPF code calculates the hash of packet header and
+returns the virtqueue number to place the packet to.
+
+Simplified decision formula:
+
+.. code:: C
+
+    queue_index = indirection_table[hash(<packet data>)%<indirection_table size>]
+
+
+Not for all packets, the hash can/should be calculated.
+
+Note: currently, eBPF RSS does not support hash reporting.
+
+eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations:
+
+- eBPF is used:
+
+        tap,vhost=off & virtio-net-pci,rss=on,hash=off
+
+- eBPF is used:
+
+        tap,vhost=on & virtio-net-pci,rss=on,hash=off
+
+- 'in-qemu' RSS is used:
+
+        tap,vhost=off & virtio-net-pci,rss=on,hash=on
+
+- eBPF is used, hash population feature is not reported to the guest:
+
+        tap,vhost=on & virtio-net-pci,rss=on,hash=on
+
+If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
+Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN.
+
+RSS eBPF program
+----------------
+
+RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool.
+So the program is part of the qemu binary.
+Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c.
+Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h):
+
+        llvm, clang, kernel source tree, bpftool
+        Adjust Makefile.ebpf to reflect the location of the kernel source tree
+
+        $ cd tools/ebpf
+        $ make -f Makefile.ebpf
+
+Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels.
+Overall eBPF RSS works on kernels 5.8+.
+
+eBPF RSS implementation
+-----------------------
+
+eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h.
+
+The ``struct EBPFRSSContext`` structure that holds 4 file descriptors:
+
+- ctx - pointer of the libbpf context.
+- program_fd - file descriptor of the eBPF RSS program.
+- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior.
+- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm.
+- map_indirections_table - 128 elements of queue indexes.
+
+``struct EBPFRSSConfig`` fields:
+
+- redirect - "boolean" value, should the hash be calculated, on false  - ``default_queue`` would be used as the final decision.
+- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting.
+- hash_types - binary mask of different hash types. See ``VIRTIO_NET_RSS_HASH_TYPE_*`` defines. If for packet hash should not be calculated - ``default_queue`` would be used.
+- indirections_len - length of the indirections table, maximum 128.
+- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP).
+
+Functions:
+
+- ``ebpf_rss_init()`` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded.
+- ``ebpf_rss_load()`` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP.
+- ``ebpf_rss_set_all()`` - sets values for eBPF maps. ``indirections_table`` length is in EBPFRSSConfig. ``toeplitz_key`` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array.
+- ``ebpf_rss_unload()`` - close all file descriptors and set ctx to NULL.
+
+Simplified eBPF RSS workflow:
+
+.. code:: C
+
+    struct EBPFRSSConfig config;
+    config.redirect = 1;
+    config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
+    config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN;
+    config.default_queue = 0;
+
+    uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...};
+    uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...};
+
+    struct EBPFRSSContext ctx;
+    ebpf_rss_init(&ctx);
+    ebpf_rss_load(&ctx);
+    ebpf_rss_set_all(&ctx, &config, table, key);
+    if (net_client->info->set_steering_ebpf != NULL) {
+        net_client->info->set_steering_ebpf(net_client, ctx->program_fd);
+    }
+    ...
+    ebpf_unload(&ctx);
+
+
+NetClientState SetSteeringEBPF()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For now, ``set_steering_ebpf()`` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument.
diff --git a/docs/devel/fuzzing.rst b/docs/devel/fuzzing.rst
index 2749bb9bed3..784ecb99e66 100644
--- a/docs/devel/fuzzing.rst
+++ b/docs/devel/fuzzing.rst
@@ -182,10 +182,11 @@ The output should contain a complete list of matched MemoryRegions.
 
 OSS-Fuzz
 --------
-QEMU is continuously fuzzed on `OSS-Fuzz` __(https://github.com/google/oss-fuzz).
-By default, the OSS-Fuzz build will try to fuzz every fuzz-target. Since the
-generic-fuzz target requires additional information provided in environment
-variables, we pre-define some generic-fuzz configs in
+QEMU is continuously fuzzed on `OSS-Fuzz
+<https://github.com/google/oss-fuzz>`_.  By default, the OSS-Fuzz build
+will try to fuzz every fuzz-target. Since the generic-fuzz target
+requires additional information provided in environment variables, we
+pre-define some generic-fuzz configs in
 ``tests/qtest/fuzz/generic_fuzz_configs.h``. Each config must specify:
 
 - ``.name``: To identify the fuzzer config
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 6cf7e2d2330..afd937535e9 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -1,15 +1,11 @@
-.. This is the top level page for the 'devel' manual.
-
-
+---------------------
 Developer Information
-=====================
+---------------------
 
-This manual documents various parts of the internals of QEMU.
+This section of the manual documents various parts of the internals of QEMU.
 You only need to read it if you are interested in reading or
 modifying QEMU's source code.
 
-Contents:
-
 .. toctree::
    :maxdepth: 2
    :includehidden:
@@ -27,6 +23,7 @@ Contents:
    migration
    atomics
    stable-process
+   ci
    qtest
    decodetree
    secure-coding-practices
@@ -36,9 +33,18 @@ Contents:
    multi-thread-tcg
    tcg-plugins
    bitops
+   ui
    reset
    s390-dasd-ipl
    clocks
    qom
+   modules
    block-coroutine-wrapper
    multi-process
+   ebpf_rss
+   vfio-migration
+   qapi-code-gen
+   writing-monitor-commands
+   trivial-patches
+   submitting-a-patch
+   submitting-a-pull-request
diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst
index cb2d7ffac0f..a1cdbec7512 100644
--- a/docs/devel/kconfig.rst
+++ b/docs/devel/kconfig.rst
@@ -303,5 +303,5 @@ variable::
     host_kconfig = \
       ('CONFIG_TPM' in config_host ? ['CONFIG_TPM=y'] : []) + \
       ('CONFIG_SPICE' in config_host ? ['CONFIG_SPICE=y'] : []) + \
-      ('CONFIG_IVSHMEM' in config_host ? ['CONFIG_IVSHMEM=y'] : []) + \
+      (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \
       ...
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
index 568274baec0..8f0035c821b 100644
--- a/docs/devel/loads-stores.rst
+++ b/docs/devel/loads-stores.rst
@@ -68,15 +68,19 @@ Regexes for git grep
  - ``\<ldn_\([hbl]e\)?_p\>``
  - ``\<stn_\([hbl]e\)?_p\>``
 
-``cpu_{ld,st}*_mmuidx_ra``
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+``cpu_{ld,st}*_mmu``
+~~~~~~~~~~~~~~~~~~~~
 
-These functions operate on a guest virtual address plus a context,
-known as a "mmu index" or ``mmuidx``, which controls how that virtual
-address is translated.  The meaning of the indexes are target specific,
-but specifying a particular index might be necessary if, for instance,
-the helper requires an "always as non-privileged" access rather that
-the default access for the current state of the guest CPU.
+These functions operate on a guest virtual address, plus a context
+known as a "mmu index" which controls how that virtual address is
+translated, plus a ``MemOp`` which contains alignment requirements
+among other things.  The ``MemOp`` and mmu index are combined into
+a single argument of type ``MemOpIdx``.
+
+The meaning of the indexes are target specific, but specifying a
+particular index might be necessary if, for instance, the helper
+requires a "always as non-privileged" access rather than the
+default access for the current state of the guest CPU.
 
 These functions may cause a guest CPU exception to be taken
 (e.g. for an alignment fault or MMU fault) which will result in
@@ -99,6 +103,35 @@ function, which is a return address into the generated code [#gpc]_.
 
 Function names follow the pattern:
 
+load: ``cpu_ld{size}{end}_mmu(env, ptr, oi, retaddr)``
+
+store: ``cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)``
+
+``size``
+ - ``b`` : 8 bits
+ - ``w`` : 16 bits
+ - ``l`` : 32 bits
+ - ``q`` : 64 bits
+
+``end``
+ - (empty) : for target endian, or 8 bit sizes
+ - ``_be`` : big endian
+ - ``_le`` : little endian
+
+Regexes for git grep:
+ - ``\<cpu_ld[bwlq](_[bl]e)\?_mmu\>``
+ - ``\<cpu_st[bwlq](_[bl]e)\?_mmu\>``
+
+
+``cpu_{ld,st}*_mmuidx_ra``
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions work like the ``cpu_{ld,st}_mmu`` functions except
+that the ``mmuidx`` parameter is not combined with a ``MemOp``,
+and therefore there is no required alignment supplied or enforced.
+
+Function names follow the pattern:
+
 load: ``cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmuidx, retaddr)``
 
 store: ``cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)``
@@ -132,7 +165,8 @@ of the guest CPU, as determined by ``cpu_mmu_index(env, false)``.
 
 These are generally the preferred way to do accesses by guest
 virtual address from helper functions, unless the access should
-be performed with a context other than the default.
+be performed with a context other than the default, or alignment
+should be enforced for the access.
 
 Function names follow the pattern:
 
diff --git a/docs/devel/lockcnt.txt b/docs/devel/lockcnt.txt
index 2d85462fe37..a3fb3bc5d8d 100644
--- a/docs/devel/lockcnt.txt
+++ b/docs/devel/lockcnt.txt
@@ -145,7 +145,7 @@ can also be more efficient in two ways:
 - on some platforms, one can implement QemuLockCnt to hold the lock
   and the mutex in a single word, making the fast path no more expensive
   than simply managing a counter using atomic operations (see
-  docs/devel/atomics.txt).  This can be very helpful if concurrent access to
+  docs/devel/atomics.rst).  This can be very helpful if concurrent access to
   the data structure is expected to be rare.
 
 
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 19c3d4f3eac..24012534827 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -53,7 +53,7 @@ savevm/loadvm functionality.
 Debugging
 =========
 
-The migration stream can be analyzed thanks to `scripts/analyze-migration.py`.
+The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``.
 
 Example usage:
 
@@ -75,8 +75,8 @@ Common infrastructure
 =====================
 
 The files, sockets or fd's that carry the migration stream are abstracted by
-the  ``QEMUFile`` type (see `migration/qemu-file.h`).  In most cases this
-is connected to a subtype of ``QIOChannel`` (see `io/`).
+the  ``QEMUFile`` type (see ``migration/qemu-file.h``).  In most cases this
+is connected to a subtype of ``QIOChannel`` (see ``io/``).
 
 
 Saving the state of one device
@@ -166,14 +166,14 @@ An example (from hw/input/pckbd.c)
   };
 
 We are declaring the state with name "pckbd".
-The `version_id` is 3, and the fields are 4 uint8_t in a KBDState structure.
+The ``version_id`` is 3, and the fields are 4 uint8_t in a KBDState structure.
 We registered this with:
 
 .. code:: c
 
     vmstate_register(NULL, 0, &vmstate_kbd, s);
 
-For devices that are `qdev` based, we can register the device in the class
+For devices that are ``qdev`` based, we can register the device in the class
 init function:
 
 .. code:: c
@@ -210,9 +210,9 @@ another to load the state back.
                            SaveVMHandlers *ops,
                            void *opaque);
 
-Two functions in the ``ops`` structure are the `save_state`
-and `load_state` functions.  Notice that `load_state` receives a version_id
-parameter to know what state format is receiving.  `save_state` doesn't
+Two functions in the ``ops`` structure are the ``save_state``
+and ``load_state`` functions.  Notice that ``load_state`` receives a version_id
+parameter to know what state format is receiving.  ``save_state`` doesn't
 have a version_id parameter because it always uses the latest version.
 
 Note that because the VMState macros still save the data in a raw
@@ -385,18 +385,18 @@ migration of a device, and using them breaks backward-migration
 compatibility; in general most changes can be made by adding Subsections
 (see above) or _TEST macros (see above) which won't break compatibility.
 
-Each version is associated with a series of fields saved.  The `save_state` always saves
-the state as the newer version.  But `load_state` sometimes is able to
+Each version is associated with a series of fields saved.  The ``save_state`` always saves
+the state as the newer version.  But ``load_state`` sometimes is able to
 load state from an older version.
 
 You can see that there are several version fields:
 
-- `version_id`: the maximum version_id supported by VMState for that device.
-- `minimum_version_id`: the minimum version_id that VMState is able to understand
+- ``version_id``: the maximum version_id supported by VMState for that device.
+- ``minimum_version_id``: the minimum version_id that VMState is able to understand
   for that device.
-- `minimum_version_id_old`: For devices that were not able to port to vmstate, we can
+- ``minimum_version_id_old``: For devices that were not able to port to vmstate, we can
   assign a function that knows how to read this old state. This field is
-  ignored if there is no `load_state_old` handler.
+  ignored if there is no ``load_state_old`` handler.
 
 VMState is able to read versions from minimum_version_id to
 version_id.  And the function ``load_state_old()`` (if present) is able to
@@ -454,7 +454,7 @@ data and then transferred to the main structure.
 
 If you use memory API functions that update memory layout outside
 initialization (i.e., in response to a guest action), this is a strong
-indication that you need to call these functions in a `post_load` callback.
+indication that you need to call these functions in a ``post_load`` callback.
 Examples of such memory API functions are:
 
   - memory_region_add_subregion()
@@ -823,12 +823,12 @@ Postcopy migration with shared memory needs explicit support from the other
 processes that share memory and from QEMU. There are restrictions on the type of
 memory that userfault can support shared.
 
-The Linux kernel userfault support works on `/dev/shm` memory and on `hugetlbfs`
-(although the kernel doesn't provide an equivalent to `madvise(MADV_DONTNEED)`
+The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs``
+(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)``
 for hugetlbfs which may be a problem in some configurations).
 
 The vhost-user code in QEMU supports clients that have Postcopy support,
-and the `vhost-user-bridge` (in `tests/`) and the DPDK package have changes
+and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes
 to support postcopy.
 
 The client needs to open a userfaultfd and register the areas
diff --git a/docs/devel/modules.rst b/docs/devel/modules.rst
new file mode 100644
index 00000000000..8e999c4fa48
--- /dev/null
+++ b/docs/devel/modules.rst
@@ -0,0 +1,5 @@
+============
+QEMU modules
+============
+
+.. kernel-doc:: include/qemu/module.h
diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst
index 69699329d62..e4801751f23 100644
--- a/docs/devel/multi-process.rst
+++ b/docs/devel/multi-process.rst
@@ -1,15 +1,17 @@
-This is the design document for multi-process QEMU. It does not
-necessarily reflect the status of the current implementation, which
-may lack features or be considerably different from what is described
-in this document. This document is still useful as a description of
-the goals and general direction of this feature.
-
-Please refer to the following wiki for latest details:
-https://wiki.qemu.org/Features/MultiProcessQEMU
-
 Multi-process QEMU
 ===================
 
+.. note::
+
+  This is the design document for multi-process QEMU. It does not
+  necessarily reflect the status of the current implementation, which
+  may lack features or be considerably different from what is described
+  in this document. This document is still useful as a description of
+  the goals and general direction of this feature.
+
+  Please refer to the following wiki for latest details:
+  https://wiki.qemu.org/Features/MultiProcessQEMU
+
 QEMU is often used as the hypervisor for virtual machines running in the
 Oracle cloud. Since one of the advantages of cloud computing is the
 ability to run many VMs from different tenants in the same cloud
@@ -185,9 +187,9 @@ desired, in which the emulation application should only be allowed to
 access the files or devices the VM it's running on behalf of can access.
 #### qemu-io model
 
-Qemu-io is a test harness used to test changes to the QEMU block backend
-object code. (e.g., the code that implements disk images for disk driver
-emulation) Qemu-io is not a device emulation application per se, but it
+``qemu-io`` is a test harness used to test changes to the QEMU block backend
+object code (e.g., the code that implements disk images for disk driver
+emulation). ``qemu-io`` is not a device emulation application per se, but it
 does compile the QEMU block objects into a separate binary from the main
 QEMU one. This could be useful for disk device emulation, since its
 emulation applications will need to include the QEMU block objects.
@@ -639,7 +641,7 @@ the CPU that issued the MMIO.
 +==========+========================+
 | rid      | range MMIO is within   |
 +----------+------------------------+
-| offset   | offset withing *rid*   |
+| offset   | offset within *rid*    |
 +----------+------------------------+
 | type     | e.g., load or store    |
 +----------+------------------------+
diff --git a/docs/devel/multi-thread-tcg.rst b/docs/devel/multi-thread-tcg.rst
index 92a9eba13c9..c9541a7b20a 100644
--- a/docs/devel/multi-thread-tcg.rst
+++ b/docs/devel/multi-thread-tcg.rst
@@ -4,8 +4,9 @@
   This work is licensed under the terms of the GNU GPL, version 2 or
   later. See the COPYING file in the top-level directory.
 
-Introduction
-============
+==================
+Multi-threaded TCG
+==================
 
 This document outlines the design for multi-threaded TCG (a.k.a MTTCG)
 system-mode emulation. user-mode emulation has always mirrored the
@@ -227,7 +228,7 @@ Emulated hardware state
 
 Currently thanks to KVM work any access to IO memory is automatically
 protected by the global iothread mutex, also known as the BQL (Big
-Qemu Lock). Any IO region that doesn't use global mutex is expected to
+QEMU Lock). Any IO region that doesn't use global mutex is expected to
 do its own locking.
 
 However IO memory isn't the only way emulated hardware state can be
diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst
new file mode 100644
index 00000000000..a3b54730894
--- /dev/null
+++ b/docs/devel/qapi-code-gen.rst
@@ -0,0 +1,1932 @@
+==================================
+How to use the QAPI code generator
+==================================
+
+..
+   Copyright IBM Corp. 2011
+   Copyright (C) 2012-2016 Red Hat, Inc.
+
+   This work is licensed under the terms of the GNU GPL, version 2 or
+   later.  See the COPYING file in the top-level directory.
+
+
+Introduction
+============
+
+QAPI is a native C API within QEMU which provides management-level
+functionality to internal and external users.  For external
+users/processes, this interface is made available by a JSON-based wire
+format for the QEMU Monitor Protocol (QMP) for controlling qemu, as
+well as the QEMU Guest Agent (QGA) for communicating with the guest.
+The remainder of this document uses "Client JSON Protocol" when
+referring to the wire contents of a QMP or QGA connection.
+
+To map between Client JSON Protocol interfaces and the native C API,
+we generate C code from a QAPI schema.  This document describes the
+QAPI schema language, and how it gets mapped to the Client JSON
+Protocol and to C.  It additionally provides guidance on maintaining
+Client JSON Protocol compatibility.
+
+
+The QAPI schema language
+========================
+
+The QAPI schema defines the Client JSON Protocol's commands and
+events, as well as types used by them.  Forward references are
+allowed.
+
+It is permissible for the schema to contain additional types not used
+by any commands or events, for the side effect of generated C code
+used internally.
+
+There are several kinds of types: simple types (a number of built-in
+types, such as ``int`` and ``str``; as well as enumerations), arrays,
+complex types (structs and two flavors of unions), and alternate types
+(a choice between other types).
+
+
+Schema syntax
+-------------
+
+Syntax is loosely based on `JSON <http://www.ietf.org/rfc/rfc8259.txt>`_.
+Differences:
+
+* Comments: start with a hash character (``#``) that is not part of a
+  string, and extend to the end of the line.
+
+* Strings are enclosed in ``'single quotes'``, not ``"double quotes"``.
+
+* Strings are restricted to printable ASCII, and escape sequences to
+  just ``\\``.
+
+* Numbers and ``null`` are not supported.
+
+A second layer of syntax defines the sequences of JSON texts that are
+a correctly structured QAPI schema.  We provide a grammar for this
+syntax in an EBNF-like notation:
+
+* Production rules look like ``non-terminal = expression``
+* Concatenation: expression ``A B`` matches expression ``A``, then ``B``
+* Alternation: expression ``A | B`` matches expression ``A`` or ``B``
+* Repetition: expression ``A...`` matches zero or more occurrences of
+  expression ``A``
+* Repetition: expression ``A, ...`` matches zero or more occurrences of
+  expression ``A`` separated by ``,``
+* Grouping: expression ``( A )`` matches expression ``A``
+* JSON's structural characters are terminals: ``{ } [ ] : ,``
+* JSON's literal names are terminals: ``false true``
+* String literals enclosed in ``'single quotes'`` are terminal, and match
+  this JSON string, with a leading ``*`` stripped off
+* When JSON object member's name starts with ``*``, the member is
+  optional.
+* The symbol ``STRING`` is a terminal, and matches any JSON string
+* The symbol ``BOOL`` is a terminal, and matches JSON ``false`` or ``true``
+* ALL-CAPS words other than ``STRING`` are non-terminals
+
+The order of members within JSON objects does not matter unless
+explicitly noted.
+
+A QAPI schema consists of a series of top-level expressions::
+
+    SCHEMA = TOP-LEVEL-EXPR...
+
+The top-level expressions are all JSON objects.  Code and
+documentation is generated in schema definition order.  Code order
+should not matter.
+
+A top-level expressions is either a directive or a definition::
+
+    TOP-LEVEL-EXPR = DIRECTIVE | DEFINITION
+
+There are two kinds of directives and six kinds of definitions::
+
+    DIRECTIVE = INCLUDE | PRAGMA
+    DEFINITION = ENUM | STRUCT | UNION | ALTERNATE | COMMAND | EVENT
+
+These are discussed in detail below.
+
+
+Built-in Types
+--------------
+
+The following types are predefined, and map to C as follows:
+
+  ============= ============== ============================================
+  Schema        C              JSON
+  ============= ============== ============================================
+  ``str``       ``char *``     any JSON string, UTF-8
+  ``number``    ``double``     any JSON number
+  ``int``       ``int64_t``    a JSON number without fractional part
+                               that fits into the C integer type
+  ``int8``      ``int8_t``     likewise
+  ``int16``     ``int16_t``    likewise
+  ``int32``     ``int32_t``    likewise
+  ``int64``     ``int64_t``    likewise
+  ``uint8``     ``uint8_t``    likewise
+  ``uint16``    ``uint16_t``   likewise
+  ``uint32``    ``uint32_t``   likewise
+  ``uint64``    ``uint64_t``   likewise
+  ``size``      ``uint64_t``   like ``uint64_t``, except
+                               ``StringInputVisitor`` accepts size suffixes
+  ``bool``      ``bool``       JSON ``true`` or ``false``
+  ``null``      ``QNull *``    JSON ``null``
+  ``any``       ``QObject *``  any JSON value
+  ``QType``     ``QType``      JSON string matching enum ``QType`` values
+  ============= ============== ============================================
+
+
+Include directives
+------------------
+
+Syntax::
+
+    INCLUDE = { 'include': STRING }
+
+The QAPI schema definitions can be modularized using the 'include' directive::
+
+ { 'include': 'path/to/file.json' }
+
+The directive is evaluated recursively, and include paths are relative
+to the file using the directive.  Multiple includes of the same file
+are idempotent.
+
+As a matter of style, it is a good idea to have all files be
+self-contained, but at the moment, nothing prevents an included file
+from making a forward reference to a type that is only introduced by
+an outer file.  The parser may be made stricter in the future to
+prevent incomplete include files.
+
+.. _pragma:
+
+Pragma directives
+-----------------
+
+Syntax::
+
+    PRAGMA = { 'pragma': {
+                   '*doc-required': BOOL,
+                   '*command-name-exceptions': [ STRING, ... ],
+                   '*command-returns-exceptions': [ STRING, ... ],
+                   '*member-name-exceptions': [ STRING, ... ] } }
+
+The pragma directive lets you control optional generator behavior.
+
+Pragma's scope is currently the complete schema.  Setting the same
+pragma to different values in parts of the schema doesn't work.
+
+Pragma 'doc-required' takes a boolean value.  If true, documentation
+is required.  Default is false.
+
+Pragma 'command-name-exceptions' takes a list of commands whose names
+may contain ``"_"`` instead of ``"-"``.  Default is none.
+
+Pragma 'command-returns-exceptions' takes a list of commands that may
+violate the rules on permitted return types.  Default is none.
+
+Pragma 'member-name-exceptions' takes a list of types whose member
+names may contain uppercase letters, and ``"_"`` instead of ``"-"``.
+Default is none.
+
+.. _ENUM-VALUE:
+
+Enumeration types
+-----------------
+
+Syntax::
+
+    ENUM = { 'enum': STRING,
+             'data': [ ENUM-VALUE, ... ],
+             '*prefix': STRING,
+             '*if': COND,
+             '*features': FEATURES }
+    ENUM-VALUE = STRING
+               | { 'name': STRING,
+                   '*if': COND,
+                   '*features': FEATURES }
+
+Member 'enum' names the enum type.
+
+Each member of the 'data' array defines a value of the enumeration
+type.  The form STRING is shorthand for :code:`{ 'name': STRING }`.  The
+'name' values must be be distinct.
+
+Example::
+
+ { 'enum': 'MyEnum', 'data': [ 'value1', 'value2', 'value3' ] }
+
+Nothing prevents an empty enumeration, although it is probably not
+useful.
+
+On the wire, an enumeration type's value is represented by its
+(string) name.  In C, it's represented by an enumeration constant.
+These are of the form PREFIX_NAME, where PREFIX is derived from the
+enumeration type's name, and NAME from the value's name.  For the
+example above, the generator maps 'MyEnum' to MY_ENUM and 'value1' to
+VALUE1, resulting in the enumeration constant MY_ENUM_VALUE1.  The
+optional 'prefix' member overrides PREFIX.
+
+The generated C enumeration constants have values 0, 1, ..., N-1 (in
+QAPI schema order), where N is the number of values.  There is an
+additional enumeration constant PREFIX__MAX with value N.
+
+Do not use string or an integer type when an enumeration type can do
+the job satisfactorily.
+
+The optional 'if' member specifies a conditional.  See `Configuring the
+schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+.. _TYPE-REF:
+
+Type references and array types
+-------------------------------
+
+Syntax::
+
+    TYPE-REF = STRING | ARRAY-TYPE
+    ARRAY-TYPE = [ STRING ]
+
+A string denotes the type named by the string.
+
+A one-element array containing a string denotes an array of the type
+named by the string.  Example: ``['int']`` denotes an array of ``int``.
+
+
+Struct types
+------------
+
+Syntax::
+
+    STRUCT = { 'struct': STRING,
+               'data': MEMBERS,
+               '*base': STRING,
+               '*if': COND,
+               '*features': FEATURES }
+    MEMBERS = { MEMBER, ... }
+    MEMBER = STRING : TYPE-REF
+           | STRING : { 'type': TYPE-REF,
+                        '*if': COND,
+                        '*features': FEATURES }
+
+Member 'struct' names the struct type.
+
+Each MEMBER of the 'data' object defines a member of the struct type.
+
+.. _MEMBERS:
+
+The MEMBER's STRING name consists of an optional ``*`` prefix and the
+struct member name.  If ``*`` is present, the member is optional.
+
+The MEMBER's value defines its properties, in particular its type.
+The form TYPE-REF_ is shorthand for :code:`{ 'type': TYPE-REF }`.
+
+Example::
+
+ { 'struct': 'MyType',
+   'data': { 'member1': 'str', 'member2': ['int'], '*member3': 'str' } }
+
+A struct type corresponds to a struct in C, and an object in JSON.
+The C struct's members are generated in QAPI schema order.
+
+The optional 'base' member names a struct type whose members are to be
+included in this type.  They go first in the C struct.
+
+Example::
+
+ { 'struct': 'BlockdevOptionsGenericFormat',
+   'data': { 'file': 'str' } }
+ { 'struct': 'BlockdevOptionsGenericCOWFormat',
+   'base': 'BlockdevOptionsGenericFormat',
+   'data': { '*backing': 'str' } }
+
+An example BlockdevOptionsGenericCOWFormat object on the wire could use
+both members like this::
+
+ { "file": "/some/place/my-image",
+   "backing": "/some/place/my-backing-file" }
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+Union types
+-----------
+
+Syntax::
+
+    UNION = { 'union': STRING,
+              'base': ( MEMBERS | STRING ),
+              'discriminator': STRING,
+              'data': BRANCHES,
+              '*if': COND,
+              '*features': FEATURES }
+    BRANCHES = { BRANCH, ... }
+    BRANCH = STRING : TYPE-REF
+           | STRING : { 'type': TYPE-REF, '*if': COND }
+
+Member 'union' names the union type.
+
+The 'base' member defines the common members.  If it is a MEMBERS_
+object, it defines common members just like a struct type's 'data'
+member defines struct type members.  If it is a STRING, it names a
+struct type whose members are the common members.
+
+Member 'discriminator' must name a non-optional enum-typed member of
+the base struct.  That member's value selects a branch by its name.
+If no such branch exists, an empty branch is assumed.
+
+Each BRANCH of the 'data' object defines a branch of the union.  A
+union must have at least one branch.
+
+The BRANCH's STRING name is the branch name.  It must be a value of
+the discriminator enum type.
+
+The BRANCH's value defines the branch's properties, in particular its
+type.  The type must a struct type.  The form TYPE-REF_ is shorthand
+for :code:`{ 'type': TYPE-REF }`.
+
+In the Client JSON Protocol, a union is represented by an object with
+the common members (from the base type) and the selected branch's
+members.  The two sets of member names must be disjoint.
+
+Example::
+
+ { 'enum': 'BlockdevDriver', 'data': [ 'file', 'qcow2' ] }
+ { 'union': 'BlockdevOptions',
+   'base': { 'driver': 'BlockdevDriver', '*read-only': 'bool' },
+   'discriminator': 'driver',
+   'data': { 'file': 'BlockdevOptionsFile',
+             'qcow2': 'BlockdevOptionsQcow2' } }
+
+Resulting in these JSON objects::
+
+ { "driver": "file", "read-only": true,
+   "filename": "/some/place/my-image" }
+ { "driver": "qcow2", "read-only": false,
+   "backing": "/some/place/my-image", "lazy-refcounts": true }
+
+The order of branches need not match the order of the enum values.
+The branches need not cover all possible enum values.  In the
+resulting generated C data types, a union is represented as a struct
+with the base members in QAPI schema order, and then a union of
+structures for each branch of the struct.
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+Alternate types
+---------------
+
+Syntax::
+
+    ALTERNATE = { 'alternate': STRING,
+                  'data': ALTERNATIVES,
+                  '*if': COND,
+                  '*features': FEATURES }
+    ALTERNATIVES = { ALTERNATIVE, ... }
+    ALTERNATIVE = STRING : STRING
+                | STRING : { 'type': STRING, '*if': COND }
+
+Member 'alternate' names the alternate type.
+
+Each ALTERNATIVE of the 'data' object defines a branch of the
+alternate.  An alternate must have at least one branch.
+
+The ALTERNATIVE's STRING name is the branch name.
+
+The ALTERNATIVE's value defines the branch's properties, in particular
+its type.  The form STRING is shorthand for :code:`{ 'type': STRING }`.
+
+Example::
+
+ { 'alternate': 'BlockdevRef',
+   'data': { 'definition': 'BlockdevOptions',
+             'reference': 'str' } }
+
+An alternate type is like a union type, except there is no
+discriminator on the wire.  Instead, the branch to use is inferred
+from the value.  An alternate can only express a choice between types
+represented differently on the wire.
+
+If a branch is typed as the 'bool' built-in, the alternate accepts
+true and false; if it is typed as any of the various numeric
+built-ins, it accepts a JSON number; if it is typed as a 'str'
+built-in or named enum type, it accepts a JSON string; if it is typed
+as the 'null' built-in, it accepts JSON null; and if it is typed as a
+complex type (struct or union), it accepts a JSON object.
+
+The example alternate declaration above allows using both of the
+following example objects::
+
+ { "file": "my_existing_block_device_id" }
+ { "file": { "driver": "file",
+             "read-only": false,
+             "filename": "/tmp/mydisk.qcow2" } }
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+Commands
+--------
+
+Syntax::
+
+    COMMAND = { 'command': STRING,
+                (
+                '*data': ( MEMBERS | STRING ),
+                |
+                'data': STRING,
+                'boxed': true,
+                )
+                '*returns': TYPE-REF,
+                '*success-response': false,
+                '*gen': false,
+                '*allow-oob': true,
+                '*allow-preconfig': true,
+                '*coroutine': true,
+                '*if': COND,
+                '*features': FEATURES }
+
+Member 'command' names the command.
+
+Member 'data' defines the arguments.  It defaults to an empty MEMBERS_
+object.
+
+If 'data' is a MEMBERS_ object, then MEMBERS defines arguments just
+like a struct type's 'data' defines struct type members.
+
+If 'data' is a STRING, then STRING names a complex type whose members
+are the arguments.  A union type requires ``'boxed': true``.
+
+Member 'returns' defines the command's return type.  It defaults to an
+empty struct type.  It must normally be a complex type or an array of
+a complex type.  To return anything else, the command must be listed
+in pragma 'commands-returns-exceptions'.  If you do this, extending
+the command to return additional information will be harder.  Use of
+the pragma for new commands is strongly discouraged.
+
+A command's error responses are not specified in the QAPI schema.
+Error conditions should be documented in comments.
+
+In the Client JSON Protocol, the value of the "execute" or "exec-oob"
+member is the command name.  The value of the "arguments" member then
+has to conform to the arguments, and the value of the success
+response's "return" member will conform to the return type.
+
+Some example commands::
+
+ { 'command': 'my-first-command',
+   'data': { 'arg1': 'str', '*arg2': 'str' } }
+ { 'struct': 'MyType', 'data': { '*value': 'str' } }
+ { 'command': 'my-second-command',
+   'returns': [ 'MyType' ] }
+
+which would validate this Client JSON Protocol transaction::
+
+ => { "execute": "my-first-command",
+      "arguments": { "arg1": "hello" } }
+ <= { "return": { } }
+ => { "execute": "my-second-command" }
+ <= { "return": [ { "value": "one" }, { } ] }
+
+The generator emits a prototype for the C function implementing the
+command.  The function itself needs to be written by hand.  See
+section `Code generated for commands`_ for examples.
+
+The function returns the return type.  When member 'boxed' is absent,
+it takes the command arguments as arguments one by one, in QAPI schema
+order.  Else it takes them wrapped in the C struct generated for the
+complex argument type.  It takes an additional ``Error **`` argument in
+either case.
+
+The generator also emits a marshalling function that extracts
+arguments for the user's function out of an input QDict, calls the
+user's function, and if it succeeded, builds an output QObject from
+its return value.  This is for use by the QMP monitor core.
+
+In rare cases, QAPI cannot express a type-safe representation of a
+corresponding Client JSON Protocol command.  You then have to suppress
+generation of a marshalling function by including a member 'gen' with
+boolean value false, and instead write your own function.  For
+example::
+
+ { 'command': 'netdev_add',
+   'data': {'type': 'str', 'id': 'str'},
+   'gen': false }
+
+Please try to avoid adding new commands that rely on this, and instead
+use type-safe unions.
+
+Normally, the QAPI schema is used to describe synchronous exchanges,
+where a response is expected.  But in some cases, the action of a
+command is expected to change state in a way that a successful
+response is not possible (although the command will still return an
+error object on failure).  When a successful reply is not possible,
+the command definition includes the optional member 'success-response'
+with boolean value false.  So far, only QGA makes use of this member.
+
+Member 'allow-oob' declares whether the command supports out-of-band
+(OOB) execution.  It defaults to false.  For example::
+
+ { 'command': 'migrate_recover',
+   'data': { 'uri': 'str' }, 'allow-oob': true }
+
+See qmp-spec.txt for out-of-band execution syntax and semantics.
+
+Commands supporting out-of-band execution can still be executed
+in-band.
+
+When a command is executed in-band, its handler runs in the main
+thread with the BQL held.
+
+When a command is executed out-of-band, its handler runs in a
+dedicated monitor I/O thread with the BQL *not* held.
+
+An OOB-capable command handler must satisfy the following conditions:
+
+- It terminates quickly.
+- It does not invoke system calls that may block.
+- It does not access guest RAM that may block when userfaultfd is
+  enabled for postcopy live migration.
+- It takes only "fast" locks, i.e. all critical sections protected by
+  any lock it takes also satisfy the conditions for OOB command
+  handler code.
+
+The restrictions on locking limit access to shared state.  Such access
+requires synchronization, but OOB commands can't take the BQL or any
+other "slow" lock.
+
+When in doubt, do not implement OOB execution support.
+
+Member 'allow-preconfig' declares whether the command is available
+before the machine is built.  It defaults to false.  For example::
+
+ { 'enum': 'QMPCapability',
+   'data': [ 'oob' ] }
+ { 'command': 'qmp_capabilities',
+   'data': { '*enable': [ 'QMPCapability' ] },
+   'allow-preconfig': true }
+
+QMP is available before the machine is built only when QEMU was
+started with --preconfig.
+
+Member 'coroutine' tells the QMP dispatcher whether the command handler
+is safe to be run in a coroutine.  It defaults to false.  If it is true,
+the command handler is called from coroutine context and may yield while
+waiting for an external event (such as I/O completion) in order to avoid
+blocking the guest and other background operations.
+
+Coroutine safety can be hard to prove, similar to thread safety.  Common
+pitfalls are:
+
+- The global mutex isn't held across ``qemu_coroutine_yield()``, so
+  operations that used to assume that they execute atomically may have
+  to be more careful to protect against changes in the global state.
+
+- Nested event loops (``AIO_WAIT_WHILE()`` etc.) are problematic in
+  coroutine context and can easily lead to deadlocks.  They should be
+  replaced by yielding and reentering the coroutine when the condition
+  becomes false.
+
+Since the command handler may assume coroutine context, any callers
+other than the QMP dispatcher must also call it in coroutine context.
+In particular, HMP commands calling such a QMP command handler must be
+marked ``.coroutine = true`` in hmp-commands.hx.
+
+It is an error to specify both ``'coroutine': true`` and ``'allow-oob': true``
+for a command.  We don't currently have a use case for both together and
+without a use case, it's not entirely clear what the semantics should
+be.
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+Events
+------
+
+Syntax::
+
+    EVENT = { 'event': STRING,
+              (
+              '*data': ( MEMBERS | STRING ),
+              |
+              'data': STRING,
+              'boxed': true,
+              )
+              '*if': COND,
+              '*features': FEATURES }
+
+Member 'event' names the event.  This is the event name used in the
+Client JSON Protocol.
+
+Member 'data' defines the event-specific data.  It defaults to an
+empty MEMBERS object.
+
+If 'data' is a MEMBERS object, then MEMBERS defines event-specific
+data just like a struct type's 'data' defines struct type members.
+
+If 'data' is a STRING, then STRING names a complex type whose members
+are the event-specific data.  A union type requires ``'boxed': true``.
+
+An example event is::
+
+ { 'event': 'EVENT_C',
+   'data': { '*a': 'int', 'b': 'str' } }
+
+Resulting in this JSON object::
+
+ { "event": "EVENT_C",
+   "data": { "b": "test string" },
+   "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+The generator emits a function to send the event.  When member 'boxed'
+is absent, it takes event-specific data one by one, in QAPI schema
+order.  Else it takes them wrapped in the C struct generated for the
+complex type.  See section `Code generated for events`_ for examples.
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+The optional 'features' member specifies features.  See Features_
+below for more on this.
+
+
+.. _FEATURE:
+
+Features
+--------
+
+Syntax::
+
+    FEATURES = [ FEATURE, ... ]
+    FEATURE = STRING
+            | { 'name': STRING, '*if': COND }
+
+Sometimes, the behaviour of QEMU changes compatibly, but without a
+change in the QMP syntax (usually by allowing values or operations
+that previously resulted in an error).  QMP clients may still need to
+know whether the extension is available.
+
+For this purpose, a list of features can be specified for a command or
+struct type.  Each list member can either be ``{ 'name': STRING, '*if':
+COND }``, or STRING, which is shorthand for ``{ 'name': STRING }``.
+
+The optional 'if' member specifies a conditional.  See `Configuring
+the schema`_ below for more on this.
+
+Example::
+
+ { 'struct': 'TestType',
+   'data': { 'number': 'int' },
+   'features': [ 'allow-negative-numbers' ] }
+
+The feature strings are exposed to clients in introspection, as
+explained in section `Client JSON Protocol introspection`_.
+
+Intended use is to have each feature string signal that this build of
+QEMU shows a certain behaviour.
+
+
+Special features
+~~~~~~~~~~~~~~~~
+
+Feature "deprecated" marks a command, event, enum value, or struct
+member as deprecated.  It is not supported elsewhere so far.
+Interfaces so marked may be withdrawn in future releases in accordance
+with QEMU's deprecation policy.
+
+Feature "unstable" marks a command, event, enum value, or struct
+member as unstable.  It is not supported elsewhere so far.  Interfaces
+so marked may be withdrawn or changed incompatibly in future releases.
+
+
+Naming rules and reserved names
+-------------------------------
+
+All names must begin with a letter, and contain only ASCII letters,
+digits, hyphen, and underscore.  There are two exceptions: enum values
+may start with a digit, and names that are downstream extensions (see
+section `Downstream extensions`_) start with underscore.
+
+Names beginning with ``q_`` are reserved for the generator, which uses
+them for munging QMP names that resemble C keywords or other
+problematic strings.  For example, a member named ``default`` in qapi
+becomes ``q_default`` in the generated C code.
+
+Types, commands, and events share a common namespace.  Therefore,
+generally speaking, type definitions should always use CamelCase for
+user-defined type names, while built-in types are lowercase.
+
+Type names ending with ``Kind`` or ``List`` are reserved for the
+generator, which uses them for implicit union enums and array types,
+respectively.
+
+Command names, and member names within a type, should be all lower
+case with words separated by a hyphen.  However, some existing older
+commands and complex types use underscore; when extending them,
+consistency is preferred over blindly avoiding underscore.
+
+Event names should be ALL_CAPS with words separated by underscore.
+
+Member name ``u`` and names starting with ``has-`` or ``has_`` are reserved
+for the generator, which uses them for unions and for tracking
+optional members.
+
+Names beginning with ``x-`` used to signify "experimental".  This
+convention has been replaced by special feature "unstable".
+
+Pragmas ``command-name-exceptions`` and ``member-name-exceptions`` let
+you violate naming rules.  Use for new code is strongly discouraged. See
+`Pragma directives`_ for details.
+
+
+Downstream extensions
+---------------------
+
+QAPI schema names that are externally visible, say in the Client JSON
+Protocol, need to be managed with care.  Names starting with a
+downstream prefix of the form __RFQDN_ are reserved for the downstream
+who controls the valid, reverse fully qualified domain name RFQDN.
+RFQDN may only contain ASCII letters, digits, hyphen and period.
+
+Example: Red Hat, Inc. controls redhat.com, and may therefore add a
+downstream command ``__com.redhat_drive-mirror``.
+
+
+Configuring the schema
+----------------------
+
+Syntax::
+
+    COND = STRING
+         | { 'all: [ COND, ... ] }
+         | { 'any: [ COND, ... ] }
+         | { 'not': COND }
+
+All definitions take an optional 'if' member.  Its value must be a
+string, or an object with a single member 'all', 'any' or 'not'.
+
+The C code generated for the definition will then be guarded by an #if
+preprocessing directive with an operand generated from that condition:
+
+ * STRING will generate defined(STRING)
+ * { 'all': [COND, ...] } will generate (COND && ...)
+ * { 'any': [COND, ...] } will generate (COND || ...)
+ * { 'not': COND } will generate !COND
+
+Example: a conditional struct ::
+
+ { 'struct': 'IfStruct', 'data': { 'foo': 'int' },
+   'if': { 'all': [ 'CONFIG_FOO', 'HAVE_BAR' ] } }
+
+gets its generated code guarded like this::
+
+ #if defined(CONFIG_FOO) && defined(HAVE_BAR)
+ ... generated code ...
+ #endif /* defined(HAVE_BAR) && defined(CONFIG_FOO) */
+
+Individual members of complex types, commands arguments, and
+event-specific data can also be made conditional.  This requires the
+longhand form of MEMBER.
+
+Example: a struct type with unconditional member 'foo' and conditional
+member 'bar' ::
+
+ { 'struct': 'IfStruct',
+   'data': { 'foo': 'int',
+             'bar': { 'type': 'int', 'if': 'IFCOND'} } }
+
+A union's discriminator may not be conditional.
+
+Likewise, individual enumeration values be conditional.  This requires
+the longhand form of ENUM-VALUE_.
+
+Example: an enum type with unconditional value 'foo' and conditional
+value 'bar' ::
+
+ { 'enum': 'IfEnum',
+   'data': [ 'foo',
+             { 'name' : 'bar', 'if': 'IFCOND' } ] }
+
+Likewise, features can be conditional.  This requires the longhand
+form of FEATURE_.
+
+Example: a struct with conditional feature 'allow-negative-numbers' ::
+
+ { 'struct': 'TestType',
+   'data': { 'number': 'int' },
+   'features': [ { 'name': 'allow-negative-numbers',
+                   'if': 'IFCOND' } ] }
+
+Please note that you are responsible to ensure that the C code will
+compile with an arbitrary combination of conditions, since the
+generator is unable to check it at this point.
+
+The conditions apply to introspection as well, i.e. introspection
+shows a conditional entity only when the condition is satisfied in
+this particular build.
+
+
+Documentation comments
+----------------------
+
+A multi-line comment that starts and ends with a ``##`` line is a
+documentation comment.
+
+If the documentation comment starts like ::
+
+    ##
+    # @SYMBOL:
+
+it documents the definition of SYMBOL, else it's free-form
+documentation.
+
+See below for more on `Definition documentation`_.
+
+Free-form documentation may be used to provide additional text and
+structuring content.
+
+
+Headings and subheadings
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A free-form documentation comment containing a line which starts with
+some ``=`` symbols and then a space defines a section heading::
+
+    ##
+    # = This is a top level heading
+    #
+    # This is a free-form comment which will go under the
+    # top level heading.
+    ##
+
+    ##
+    # == This is a second level heading
+    ##
+
+A heading line must be the first line of the documentation
+comment block.
+
+Section headings must always be correctly nested, so you can only
+define a third-level heading inside a second-level heading, and so on.
+
+
+Documentation markup
+~~~~~~~~~~~~~~~~~~~~
+
+Documentation comments can use most rST markup.  In particular,
+a ``::`` literal block can be used for examples::
+
+    # ::
+    #
+    #   Text of the example, may span
+    #   multiple lines
+
+``*`` starts an itemized list::
+
+    # * First item, may span
+    #   multiple lines
+    # * Second item
+
+You can also use ``-`` instead of ``*``.
+
+A decimal number followed by ``.`` starts a numbered list::
+
+    # 1. First item, may span
+    #    multiple lines
+    # 2. Second item
+
+The actual number doesn't matter.
+
+Lists of either kind must be preceded and followed by a blank line.
+If a list item's text spans multiple lines, then the second and
+subsequent lines must be correctly indented to line up with the
+first character of the first line.
+
+The usual ****strong****, *\*emphasized\** and ````literal```` markup
+should be used.  If you need a single literal ``*``, you will need to
+backslash-escape it.  As an extension beyond the usual rST syntax, you
+can also use ``@foo`` to reference a name in the schema; this is rendered
+the same way as ````foo````.
+
+Example::
+
+ ##
+ # Some text foo with **bold** and *emphasis*
+ # 1. with a list
+ # 2. like that
+ #
+ # And some code:
+ #
+ # ::
+ #
+ #   $ echo foo
+ #   -> do this
+ #   <- get that
+ ##
+
+
+Definition documentation
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Definition documentation, if present, must immediately precede the
+definition it documents.
+
+When documentation is required (see pragma_ 'doc-required'), every
+definition must have documentation.
+
+Definition documentation starts with a line naming the definition,
+followed by an optional overview, a description of each argument (for
+commands and events), member (for structs and unions), branch (for
+alternates), or value (for enums), a description of each feature (if
+any), and finally optional tagged sections.
+
+The description of an argument or feature 'name' starts with
+'\@name:'.  The description text can start on the line following the
+'\@name:', in which case it must not be indented at all.  It can also
+start on the same line as the '\@name:'.  In this case if it spans
+multiple lines then second and subsequent lines must be indented to
+line up with the first character of the first line of the
+description::
+
+ # @argone:
+ # This is a two line description
+ # in the first style.
+ #
+ # @argtwo: This is a two line description
+ #          in the second style.
+
+The number of spaces between the ':' and the text is not significant.
+
+.. admonition:: FIXME
+
+   The parser accepts these things in almost any order.
+
+.. admonition:: FIXME
+
+   union branches should be described, too.
+
+Extensions added after the definition was first released carry a
+'(since x.y.z)' comment.
+
+The feature descriptions must be preceded by a line "Features:", like
+this::
+
+  # Features:
+  # @feature: Description text
+
+A tagged section starts with one of the following words:
+"Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:".
+The section ends with the start of a new section.
+
+The text of a section can start on a new line, in
+which case it must not be indented at all.  It can also start
+on the same line as the 'Note:', 'Returns:', etc tag.  In this
+case if it spans multiple lines then second and subsequent
+lines must be indented to match the first, in the same way as
+multiline argument descriptions.
+
+A 'Since: x.y.z' tagged section lists the release that introduced the
+definition.
+
+An 'Example' or 'Examples' section is automatically rendered
+entirely as literal fixed-width text.  In other sections,
+the text is formatted, and rST markup can be used.
+
+For example::
+
+ ##
+ # @BlockStats:
+ #
+ # Statistics of a virtual block device or a block backing device.
+ #
+ # @device: If the stats are for a virtual block device, the name
+ #          corresponding to the virtual block device.
+ #
+ # @node-name: The node name of the device. (since 2.3)
+ #
+ # ... more members ...
+ #
+ # Since: 0.14.0
+ ##
+ { 'struct': 'BlockStats',
+   'data': {'*device': 'str', '*node-name': 'str',
+            ... more members ... } }
+
+ ##
+ # @query-blockstats:
+ #
+ # Query the @BlockStats for all virtual block devices.
+ #
+ # @query-nodes: If true, the command will query all the
+ #               block nodes ... explain, explain ...  (since 2.3)
+ #
+ # Returns: A list of @BlockStats for each virtual block devices.
+ #
+ # Since: 0.14.0
+ #
+ # Example:
+ #
+ # -> { "execute": "query-blockstats" }
+ # <- {
+ #      ... lots of output ...
+ #    }
+ #
+ ##
+ { 'command': 'query-blockstats',
+   'data': { '*query-nodes': 'bool' },
+   'returns': ['BlockStats'] }
+
+
+Client JSON Protocol introspection
+==================================
+
+Clients of a Client JSON Protocol commonly need to figure out what
+exactly the server (QEMU) supports.
+
+For this purpose, QMP provides introspection via command
+query-qmp-schema.  QGA currently doesn't support introspection.
+
+While Client JSON Protocol wire compatibility should be maintained
+between qemu versions, we cannot make the same guarantees for
+introspection stability.  For example, one version of qemu may provide
+a non-variant optional member of a struct, and a later version rework
+the member to instead be non-optional and associated with a variant.
+Likewise, one version of qemu may list a member with open-ended type
+'str', and a later version could convert it to a finite set of strings
+via an enum type; or a member may be converted from a specific type to
+an alternate that represents a choice between the original type and
+something else.
+
+query-qmp-schema returns a JSON array of SchemaInfo objects.  These
+objects together describe the wire ABI, as defined in the QAPI schema.
+There is no specified order to the SchemaInfo objects returned; a
+client must search for a particular name throughout the entire array
+to learn more about that name, but is at least guaranteed that there
+will be no collisions between type, command, and event names.
+
+However, the SchemaInfo can't reflect all the rules and restrictions
+that apply to QMP.  It's interface introspection (figuring out what's
+there), not interface specification.  The specification is in the QAPI
+schema.  To understand how QMP is to be used, you need to study the
+QAPI schema.
+
+Like any other command, query-qmp-schema is itself defined in the QAPI
+schema, along with the SchemaInfo type.  This text attempts to give an
+overview how things work.  For details you need to consult the QAPI
+schema.
+
+SchemaInfo objects have common members "name", "meta-type",
+"features", and additional variant members depending on the value of
+meta-type.
+
+Each SchemaInfo object describes a wire ABI entity of a certain
+meta-type: a command, event or one of several kinds of type.
+
+SchemaInfo for commands and events have the same name as in the QAPI
+schema.
+
+Command and event names are part of the wire ABI, but type names are
+not.  Therefore, the SchemaInfo for types have auto-generated
+meaningless names.  For readability, the examples in this section use
+meaningful type names instead.
+
+Optional member "features" exposes the entity's feature strings as a
+JSON array of strings.
+
+To examine a type, start with a command or event using it, then follow
+references by name.
+
+QAPI schema definitions not reachable that way are omitted.
+
+The SchemaInfo for a command has meta-type "command", and variant
+members "arg-type", "ret-type" and "allow-oob".  On the wire, the
+"arguments" member of a client's "execute" command must conform to the
+object type named by "arg-type".  The "return" member that the server
+passes in a success response conforms to the type named by "ret-type".
+When "allow-oob" is true, it means the command supports out-of-band
+execution.  It defaults to false.
+
+If the command takes no arguments, "arg-type" names an object type
+without members.  Likewise, if the command returns nothing, "ret-type"
+names an object type without members.
+
+Example: the SchemaInfo for command query-qmp-schema ::
+
+ { "name": "query-qmp-schema", "meta-type": "command",
+   "arg-type": "q_empty", "ret-type": "SchemaInfoList" }
+
+   Type "q_empty" is an automatic object type without members, and type
+   "SchemaInfoList" is the array of SchemaInfo type.
+
+The SchemaInfo for an event has meta-type "event", and variant member
+"arg-type".  On the wire, a "data" member that the server passes in an
+event conforms to the object type named by "arg-type".
+
+If the event carries no additional information, "arg-type" names an
+object type without members.  The event may not have a data member on
+the wire then.
+
+Each command or event defined with 'data' as MEMBERS object in the
+QAPI schema implicitly defines an object type.
+
+Example: the SchemaInfo for EVENT_C from section Events_ ::
+
+    { "name": "EVENT_C", "meta-type": "event",
+      "arg-type": "q_obj-EVENT_C-arg" }
+
+    Type "q_obj-EVENT_C-arg" is an implicitly defined object type with
+    the two members from the event's definition.
+
+The SchemaInfo for struct and union types has meta-type "object".
+
+The SchemaInfo for a struct type has variant member "members".
+
+The SchemaInfo for a union type additionally has variant members "tag"
+and "variants".
+
+"members" is a JSON array describing the object's common members, if
+any.  Each element is a JSON object with members "name" (the member's
+name), "type" (the name of its type), "features" (a JSON array of
+feature strings), and "default".  The latter two are optional.  The
+member is optional if "default" is present.  Currently, "default" can
+only have value null.  Other values are reserved for future
+extensions.  The "members" array is in no particular order; clients
+must search the entire object when learning whether a particular
+member is supported.
+
+Example: the SchemaInfo for MyType from section `Struct types`_ ::
+
+    { "name": "MyType", "meta-type": "object",
+      "members": [
+          { "name": "member1", "type": "str" },
+          { "name": "member2", "type": "int" },
+          { "name": "member3", "type": "str", "default": null } ] }
+
+"features" exposes the command's feature strings as a JSON array of
+strings.
+
+Example: the SchemaInfo for TestType from section Features_::
+
+    { "name": "TestType", "meta-type": "object",
+      "members": [
+          { "name": "number", "type": "int" } ],
+      "features": ["allow-negative-numbers"] }
+
+"tag" is the name of the common member serving as type tag.
+"variants" is a JSON array describing the object's variant members.
+Each element is a JSON object with members "case" (the value of type
+tag this element applies to) and "type" (the name of an object type
+that provides the variant members for this type tag value).  The
+"variants" array is in no particular order, and is not guaranteed to
+list cases in the same order as the corresponding "tag" enum type.
+
+Example: the SchemaInfo for union BlockdevOptions from section
+`Union types`_ ::
+
+    { "name": "BlockdevOptions", "meta-type": "object",
+      "members": [
+          { "name": "driver", "type": "BlockdevDriver" },
+          { "name": "read-only", "type": "bool", "default": null } ],
+      "tag": "driver",
+      "variants": [
+          { "case": "file", "type": "BlockdevOptionsFile" },
+          { "case": "qcow2", "type": "BlockdevOptionsQcow2" } ] }
+
+Note that base types are "flattened": its members are included in the
+"members" array.
+
+The SchemaInfo for an alternate type has meta-type "alternate", and
+variant member "members".  "members" is a JSON array.  Each element is
+a JSON object with member "type", which names a type.  Values of the
+alternate type conform to exactly one of its member types.  There is
+no guarantee on the order in which "members" will be listed.
+
+Example: the SchemaInfo for BlockdevRef from section `Alternate types`_ ::
+
+    { "name": "BlockdevRef", "meta-type": "alternate",
+      "members": [
+          { "type": "BlockdevOptions" },
+          { "type": "str" } ] }
+
+The SchemaInfo for an array type has meta-type "array", and variant
+member "element-type", which names the array's element type.  Array
+types are implicitly defined.  For convenience, the array's name may
+resemble the element type; however, clients should examine member
+"element-type" instead of making assumptions based on parsing member
+"name".
+
+Example: the SchemaInfo for ['str'] ::
+
+    { "name": "[str]", "meta-type": "array",
+      "element-type": "str" }
+
+The SchemaInfo for an enumeration type has meta-type "enum" and
+variant member "members".
+
+"members" is a JSON array describing the enumeration values.  Each
+element is a JSON object with member "name" (the member's name), and
+optionally "features" (a JSON array of feature strings).  The
+"members" array is in no particular order; clients must search the
+entire array when learning whether a particular value is supported.
+
+Example: the SchemaInfo for MyEnum from section `Enumeration types`_ ::
+
+    { "name": "MyEnum", "meta-type": "enum",
+      "members": [
+        { "name": "value1" },
+        { "name": "value2" },
+        { "name": "value3" }
+      ] }
+
+The SchemaInfo for a built-in type has the same name as the type in
+the QAPI schema (see section `Built-in Types`_), with one exception
+detailed below.  It has variant member "json-type" that shows how
+values of this type are encoded on the wire.
+
+Example: the SchemaInfo for str ::
+
+    { "name": "str", "meta-type": "builtin", "json-type": "string" }
+
+The QAPI schema supports a number of integer types that only differ in
+how they map to C.  They are identical as far as SchemaInfo is
+concerned.  Therefore, they get all mapped to a single type "int" in
+SchemaInfo.
+
+As explained above, type names are not part of the wire ABI.  Not even
+the names of built-in types.  Clients should examine member
+"json-type" instead of hard-coding names of built-in types.
+
+
+Compatibility considerations
+============================
+
+Maintaining backward compatibility at the Client JSON Protocol level
+while evolving the schema requires some care.  This section is about
+syntactic compatibility, which is necessary, but not sufficient, for
+actual compatibility.
+
+Clients send commands with argument data, and receive command
+responses with return data and events with event data.
+
+Adding opt-in functionality to the send direction is backwards
+compatible: adding commands, optional arguments, enumeration values,
+union and alternate branches; turning an argument type into an
+alternate of that type; making mandatory arguments optional.  Clients
+oblivious of the new functionality continue to work.
+
+Incompatible changes include removing commands, command arguments,
+enumeration values, union and alternate branches, adding mandatory
+command arguments, and making optional arguments mandatory.
+
+The specified behavior of an absent optional argument should remain
+the same.  With proper documentation, this policy still allows some
+flexibility; for example, when an optional 'buffer-size' argument is
+specified to default to a sensible buffer size, the actual default
+value can still be changed.  The specified default behavior is not the
+exact size of the buffer, only that the default size is sensible.
+
+Adding functionality to the receive direction is generally backwards
+compatible: adding events, adding return and event data members.
+Clients are expected to ignore the ones they don't know.
+
+Removing "unreachable" stuff like events that can't be triggered
+anymore, optional return or event data members that can't be sent
+anymore, and return or event data member (enumeration) values that
+can't be sent anymore makes no difference to clients, except for
+introspection.  The latter can conceivably confuse clients, so tread
+carefully.
+
+Incompatible changes include removing return and event data members.
+
+Any change to a command definition's 'data' or one of the types used
+there (recursively) needs to consider send direction compatibility.
+
+Any change to a command definition's 'return', an event definition's
+'data', or one of the types used there (recursively) needs to consider
+receive direction compatibility.
+
+Any change to types used in both contexts need to consider both.
+
+Enumeration type values and complex and alternate type members may be
+reordered freely.  For enumerations and alternate types, this doesn't
+affect the wire encoding.  For complex types, this might make the
+implementation emit JSON object members in a different order, which
+the Client JSON Protocol permits.
+
+Since type names are not visible in the Client JSON Protocol, types
+may be freely renamed.  Even certain refactorings are invisible, such
+as splitting members from one type into a common base type.
+
+
+Code generation
+===============
+
+The QAPI code generator qapi-gen.py generates code and documentation
+from the schema.  Together with the core QAPI libraries, this code
+provides everything required to take JSON commands read in by a Client
+JSON Protocol server, unmarshal the arguments into the underlying C
+types, call into the corresponding C function, map the response back
+to a Client JSON Protocol response to be returned to the user, and
+introspect the commands.
+
+As an example, we'll use the following schema, which describes a
+single complex user-defined type, along with command which takes a
+list of that type as a parameter, and returns a single element of that
+type.  The user is responsible for writing the implementation of
+qmp_my_command(); everything else is produced by the generator. ::
+
+    $ cat example-schema.json
+    { 'struct': 'UserDefOne',
+      'data': { 'integer': 'int', '*string': 'str' } }
+
+    { 'command': 'my-command',
+      'data': { 'arg1': ['UserDefOne'] },
+      'returns': 'UserDefOne' }
+
+    { 'event': 'MY_EVENT' }
+
+We run qapi-gen.py like this::
+
+    $ python scripts/qapi-gen.py --output-dir="qapi-generated" \
+    --prefix="example-" example-schema.json
+
+For a more thorough look at generated code, the testsuite includes
+tests/qapi-schema/qapi-schema-tests.json that covers more examples of
+what the generator will accept, and compiles the resulting C code as
+part of 'make check-unit'.
+
+
+Code generated for QAPI types
+-----------------------------
+
+The following files are created:
+
+ ``$(prefix)qapi-types.h``
+     C types corresponding to types defined in the schema
+
+ ``$(prefix)qapi-types.c``
+     Cleanup functions for the above C types
+
+The $(prefix) is an optional parameter used as a namespace to keep the
+generated code from one schema/code-generation separated from others so code
+can be generated/used from multiple schemas without clobbering previously
+created code.
+
+Example::
+
+    $ cat qapi-generated/example-qapi-types.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_TYPES_H
+    #define EXAMPLE_QAPI_TYPES_H
+
+    #include "qapi/qapi-builtin-types.h"
+
+    typedef struct UserDefOne UserDefOne;
+
+    typedef struct UserDefOneList UserDefOneList;
+
+    typedef struct q_obj_my_command_arg q_obj_my_command_arg;
+
+    struct UserDefOne {
+        int64_t integer;
+        bool has_string;
+        char *string;
+    };
+
+    void qapi_free_UserDefOne(UserDefOne *obj);
+    G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne)
+
+    struct UserDefOneList {
+        UserDefOneList *next;
+        UserDefOne *value;
+    };
+
+    void qapi_free_UserDefOneList(UserDefOneList *obj);
+    G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList)
+
+    struct q_obj_my_command_arg {
+        UserDefOneList *arg1;
+    };
+
+    #endif /* EXAMPLE_QAPI_TYPES_H */
+    $ cat qapi-generated/example-qapi-types.c
+    [Uninteresting stuff omitted...]
+
+    void qapi_free_UserDefOne(UserDefOne *obj)
+    {
+        Visitor *v;
+
+        if (!obj) {
+            return;
+        }
+
+        v = qapi_dealloc_visitor_new();
+        visit_type_UserDefOne(v, NULL, &obj, NULL);
+        visit_free(v);
+    }
+
+    void qapi_free_UserDefOneList(UserDefOneList *obj)
+    {
+        Visitor *v;
+
+        if (!obj) {
+            return;
+        }
+
+        v = qapi_dealloc_visitor_new();
+        visit_type_UserDefOneList(v, NULL, &obj, NULL);
+        visit_free(v);
+    }
+
+    [Uninteresting stuff omitted...]
+
+For a modular QAPI schema (see section `Include directives`_), code for
+each sub-module SUBDIR/SUBMODULE.json is actually generated into ::
+
+ SUBDIR/$(prefix)qapi-types-SUBMODULE.h
+ SUBDIR/$(prefix)qapi-types-SUBMODULE.c
+
+If qapi-gen.py is run with option --builtins, additional files are
+created:
+
+ ``qapi-builtin-types.h``
+     C types corresponding to built-in types
+
+ ``qapi-builtin-types.c``
+     Cleanup functions for the above C types
+
+
+Code generated for visiting QAPI types
+--------------------------------------
+
+These are the visitor functions used to walk through and convert
+between a native QAPI C data structure and some other format (such as
+QObject); the generated functions are named visit_type_FOO() and
+visit_type_FOO_members().
+
+The following files are generated:
+
+ ``$(prefix)qapi-visit.c``
+     Visitor function for a particular C type, used to automagically
+     convert QObjects into the corresponding C type and vice-versa, as
+     well as for deallocating memory for an existing C type
+
+ ``$(prefix)qapi-visit.h``
+     Declarations for previously mentioned visitor functions
+
+Example::
+
+    $ cat qapi-generated/example-qapi-visit.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_VISIT_H
+    #define EXAMPLE_QAPI_VISIT_H
+
+    #include "qapi/qapi-builtin-visit.h"
+    #include "example-qapi-types.h"
+
+
+    bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp);
+
+    bool visit_type_UserDefOne(Visitor *v, const char *name,
+                     UserDefOne **obj, Error **errp);
+
+    bool visit_type_UserDefOneList(Visitor *v, const char *name,
+                     UserDefOneList **obj, Error **errp);
+
+    bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp);
+
+    #endif /* EXAMPLE_QAPI_VISIT_H */
+    $ cat qapi-generated/example-qapi-visit.c
+    [Uninteresting stuff omitted...]
+
+    bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp)
+    {
+        if (!visit_type_int(v, "integer", &obj->integer, errp)) {
+            return false;
+        }
+        if (visit_optional(v, "string", &obj->has_string)) {
+            if (!visit_type_str(v, "string", &obj->string, errp)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    bool visit_type_UserDefOne(Visitor *v, const char *name,
+                     UserDefOne **obj, Error **errp)
+    {
+        bool ok = false;
+
+        if (!visit_start_struct(v, name, (void **)obj, sizeof(UserDefOne), errp)) {
+            return false;
+        }
+        if (!*obj) {
+            /* incomplete */
+            assert(visit_is_dealloc(v));
+            ok = true;
+            goto out_obj;
+        }
+        if (!visit_type_UserDefOne_members(v, *obj, errp)) {
+            goto out_obj;
+        }
+        ok = visit_check_struct(v, errp);
+    out_obj:
+        visit_end_struct(v, (void **)obj);
+        if (!ok && visit_is_input(v)) {
+            qapi_free_UserDefOne(*obj);
+            *obj = NULL;
+        }
+        return ok;
+    }
+
+    bool visit_type_UserDefOneList(Visitor *v, const char *name,
+                     UserDefOneList **obj, Error **errp)
+    {
+        bool ok = false;
+        UserDefOneList *tail;
+        size_t size = sizeof(**obj);
+
+        if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) {
+            return false;
+        }
+
+        for (tail = *obj; tail;
+             tail = (UserDefOneList *)visit_next_list(v, (GenericList *)tail, size)) {
+            if (!visit_type_UserDefOne(v, NULL, &tail->value, errp)) {
+                goto out_obj;
+            }
+        }
+
+        ok = visit_check_list(v, errp);
+    out_obj:
+        visit_end_list(v, (void **)obj);
+        if (!ok && visit_is_input(v)) {
+            qapi_free_UserDefOneList(*obj);
+            *obj = NULL;
+        }
+        return ok;
+    }
+
+    bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp)
+    {
+        if (!visit_type_UserDefOneList(v, "arg1", &obj->arg1, errp)) {
+            return false;
+        }
+        return true;
+    }
+
+    [Uninteresting stuff omitted...]
+
+For a modular QAPI schema (see section `Include directives`_), code for
+each sub-module SUBDIR/SUBMODULE.json is actually generated into ::
+
+ SUBDIR/$(prefix)qapi-visit-SUBMODULE.h
+ SUBDIR/$(prefix)qapi-visit-SUBMODULE.c
+
+If qapi-gen.py is run with option --builtins, additional files are
+created:
+
+ ``qapi-builtin-visit.h``
+     Visitor functions for built-in types
+
+ ``qapi-builtin-visit.c``
+     Declarations for these visitor functions
+
+
+Code generated for commands
+---------------------------
+
+These are the marshaling/dispatch functions for the commands defined
+in the schema.  The generated code provides qmp_marshal_COMMAND(), and
+declares qmp_COMMAND() that the user must implement.
+
+The following files are generated:
+
+ ``$(prefix)qapi-commands.c``
+     Command marshal/dispatch functions for each QMP command defined in
+     the schema
+
+ ``$(prefix)qapi-commands.h``
+     Function prototypes for the QMP commands specified in the schema
+
+ ``$(prefix)qapi-init-commands.h``
+     Command initialization prototype
+
+ ``$(prefix)qapi-init-commands.c``
+     Command initialization code
+
+Example::
+
+    $ cat qapi-generated/example-qapi-commands.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_COMMANDS_H
+    #define EXAMPLE_QAPI_COMMANDS_H
+
+    #include "example-qapi-types.h"
+
+    UserDefOne *qmp_my_command(UserDefOneList *arg1, Error **errp);
+    void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp);
+
+    #endif /* EXAMPLE_QAPI_COMMANDS_H */
+    $ cat qapi-generated/example-qapi-commands.c
+    [Uninteresting stuff omitted...]
+
+
+    static void qmp_marshal_output_UserDefOne(UserDefOne *ret_in,
+                                    QObject **ret_out, Error **errp)
+    {
+        Visitor *v;
+
+        v = qobject_output_visitor_new_qmp(ret_out);
+        if (visit_type_UserDefOne(v, "unused", &ret_in, errp)) {
+            visit_complete(v, ret_out);
+        }
+        visit_free(v);
+        v = qapi_dealloc_visitor_new();
+        visit_type_UserDefOne(v, "unused", &ret_in, NULL);
+        visit_free(v);
+    }
+
+    void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp)
+    {
+        Error *err = NULL;
+        bool ok = false;
+        Visitor *v;
+        UserDefOne *retval;
+        q_obj_my_command_arg arg = {0};
+
+        v = qobject_input_visitor_new_qmp(QOBJECT(args));
+        if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
+            goto out;
+        }
+        if (visit_type_q_obj_my_command_arg_members(v, &arg, errp)) {
+            ok = visit_check_struct(v, errp);
+        }
+        visit_end_struct(v, NULL);
+        if (!ok) {
+            goto out;
+        }
+
+        retval = qmp_my_command(arg.arg1, &err);
+        error_propagate(errp, err);
+        if (err) {
+            goto out;
+        }
+
+        qmp_marshal_output_UserDefOne(retval, ret, errp);
+
+    out:
+        visit_free(v);
+        v = qapi_dealloc_visitor_new();
+        visit_start_struct(v, NULL, NULL, 0, NULL);
+        visit_type_q_obj_my_command_arg_members(v, &arg, NULL);
+        visit_end_struct(v, NULL);
+        visit_free(v);
+    }
+
+    [Uninteresting stuff omitted...]
+    $ cat qapi-generated/example-qapi-init-commands.h
+    [Uninteresting stuff omitted...]
+    #ifndef EXAMPLE_QAPI_INIT_COMMANDS_H
+    #define EXAMPLE_QAPI_INIT_COMMANDS_H
+
+    #include "qapi/qmp/dispatch.h"
+
+    void example_qmp_init_marshal(QmpCommandList *cmds);
+
+    #endif /* EXAMPLE_QAPI_INIT_COMMANDS_H */
+    $ cat qapi-generated/example-qapi-init-commands.c
+    [Uninteresting stuff omitted...]
+    void example_qmp_init_marshal(QmpCommandList *cmds)
+    {
+        QTAILQ_INIT(cmds);
+
+        qmp_register_command(cmds, "my-command",
+                             qmp_marshal_my_command, QCO_NO_OPTIONS);
+    }
+    [Uninteresting stuff omitted...]
+
+For a modular QAPI schema (see section `Include directives`_), code for
+each sub-module SUBDIR/SUBMODULE.json is actually generated into::
+
+ SUBDIR/$(prefix)qapi-commands-SUBMODULE.h
+ SUBDIR/$(prefix)qapi-commands-SUBMODULE.c
+
+
+Code generated for events
+-------------------------
+
+This is the code related to events defined in the schema, providing
+qapi_event_send_EVENT().
+
+The following files are created:
+
+ ``$(prefix)qapi-events.h``
+     Function prototypes for each event type
+
+ ``$(prefix)qapi-events.c``
+     Implementation of functions to send an event
+
+ ``$(prefix)qapi-emit-events.h``
+     Enumeration of all event names, and common event code declarations
+
+ ``$(prefix)qapi-emit-events.c``
+     Common event code definitions
+
+Example::
+
+    $ cat qapi-generated/example-qapi-events.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_EVENTS_H
+    #define EXAMPLE_QAPI_EVENTS_H
+
+    #include "qapi/util.h"
+    #include "example-qapi-types.h"
+
+    void qapi_event_send_my_event(void);
+
+    #endif /* EXAMPLE_QAPI_EVENTS_H */
+    $ cat qapi-generated/example-qapi-events.c
+    [Uninteresting stuff omitted...]
+
+    void qapi_event_send_my_event(void)
+    {
+        QDict *qmp;
+
+        qmp = qmp_event_build_dict("MY_EVENT");
+
+        example_qapi_event_emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp);
+
+        qobject_unref(qmp);
+    }
+
+    [Uninteresting stuff omitted...]
+    $ cat qapi-generated/example-qapi-emit-events.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_EMIT_EVENTS_H
+    #define EXAMPLE_QAPI_EMIT_EVENTS_H
+
+    #include "qapi/util.h"
+
+    typedef enum example_QAPIEvent {
+        EXAMPLE_QAPI_EVENT_MY_EVENT,
+        EXAMPLE_QAPI_EVENT__MAX,
+    } example_QAPIEvent;
+
+    #define example_QAPIEvent_str(val) \
+        qapi_enum_lookup(&example_QAPIEvent_lookup, (val))
+
+    extern const QEnumLookup example_QAPIEvent_lookup;
+
+    void example_qapi_event_emit(example_QAPIEvent event, QDict *qdict);
+
+    #endif /* EXAMPLE_QAPI_EMIT_EVENTS_H */
+    $ cat qapi-generated/example-qapi-emit-events.c
+    [Uninteresting stuff omitted...]
+
+    const QEnumLookup example_QAPIEvent_lookup = {
+        .array = (const char *const[]) {
+            [EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT",
+        },
+        .size = EXAMPLE_QAPI_EVENT__MAX
+    };
+
+    [Uninteresting stuff omitted...]
+
+For a modular QAPI schema (see section `Include directives`_), code for
+each sub-module SUBDIR/SUBMODULE.json is actually generated into ::
+
+ SUBDIR/$(prefix)qapi-events-SUBMODULE.h
+ SUBDIR/$(prefix)qapi-events-SUBMODULE.c
+
+
+Code generated for introspection
+--------------------------------
+
+The following files are created:
+
+ ``$(prefix)qapi-introspect.c``
+     Defines a string holding a JSON description of the schema
+
+ ``$(prefix)qapi-introspect.h``
+     Declares the above string
+
+Example::
+
+    $ cat qapi-generated/example-qapi-introspect.h
+    [Uninteresting stuff omitted...]
+
+    #ifndef EXAMPLE_QAPI_INTROSPECT_H
+    #define EXAMPLE_QAPI_INTROSPECT_H
+
+    #include "qapi/qmp/qlit.h"
+
+    extern const QLitObject example_qmp_schema_qlit;
+
+    #endif /* EXAMPLE_QAPI_INTROSPECT_H */
+    $ cat qapi-generated/example-qapi-introspect.c
+    [Uninteresting stuff omitted...]
+
+    const QLitObject example_qmp_schema_qlit = QLIT_QLIST(((QLitObject[]) {
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "arg-type", QLIT_QSTR("0"), },
+            { "meta-type", QLIT_QSTR("command"), },
+            { "name", QLIT_QSTR("my-command"), },
+            { "ret-type", QLIT_QSTR("1"), },
+            {}
+        })),
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "arg-type", QLIT_QSTR("2"), },
+            { "meta-type", QLIT_QSTR("event"), },
+            { "name", QLIT_QSTR("MY_EVENT"), },
+            {}
+        })),
+        /* "0" = q_obj_my-command-arg */
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "members", QLIT_QLIST(((QLitObject[]) {
+                QLIT_QDICT(((QLitDictEntry[]) {
+                    { "name", QLIT_QSTR("arg1"), },
+                    { "type", QLIT_QSTR("[1]"), },
+                    {}
+                })),
+                {}
+            })), },
+            { "meta-type", QLIT_QSTR("object"), },
+            { "name", QLIT_QSTR("0"), },
+            {}
+        })),
+        /* "1" = UserDefOne */
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "members", QLIT_QLIST(((QLitObject[]) {
+                QLIT_QDICT(((QLitDictEntry[]) {
+                    { "name", QLIT_QSTR("integer"), },
+                    { "type", QLIT_QSTR("int"), },
+                    {}
+                })),
+                QLIT_QDICT(((QLitDictEntry[]) {
+                    { "default", QLIT_QNULL, },
+                    { "name", QLIT_QSTR("string"), },
+                    { "type", QLIT_QSTR("str"), },
+                    {}
+                })),
+                {}
+            })), },
+            { "meta-type", QLIT_QSTR("object"), },
+            { "name", QLIT_QSTR("1"), },
+            {}
+        })),
+        /* "2" = q_empty */
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "members", QLIT_QLIST(((QLitObject[]) {
+                {}
+            })), },
+            { "meta-type", QLIT_QSTR("object"), },
+            { "name", QLIT_QSTR("2"), },
+            {}
+        })),
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "element-type", QLIT_QSTR("1"), },
+            { "meta-type", QLIT_QSTR("array"), },
+            { "name", QLIT_QSTR("[1]"), },
+            {}
+        })),
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "json-type", QLIT_QSTR("int"), },
+            { "meta-type", QLIT_QSTR("builtin"), },
+            { "name", QLIT_QSTR("int"), },
+            {}
+        })),
+        QLIT_QDICT(((QLitDictEntry[]) {
+            { "json-type", QLIT_QSTR("string"), },
+            { "meta-type", QLIT_QSTR("builtin"), },
+            { "name", QLIT_QSTR("str"), },
+            {}
+        })),
+        {}
+    }));
+
+    [Uninteresting stuff omitted...]
diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt
deleted file mode 100644
index c1cb6f987de..00000000000
--- a/docs/devel/qapi-code-gen.txt
+++ /dev/null
@@ -1,1897 +0,0 @@
-= How to use the QAPI code generator =
-
-Copyright IBM Corp. 2011
-Copyright (C) 2012-2016 Red Hat, Inc.
-
-This work is licensed under the terms of the GNU GPL, version 2 or
-later.  See the COPYING file in the top-level directory.
-
-== Introduction ==
-
-QAPI is a native C API within QEMU which provides management-level
-functionality to internal and external users.  For external
-users/processes, this interface is made available by a JSON-based wire
-format for the QEMU Monitor Protocol (QMP) for controlling qemu, as
-well as the QEMU Guest Agent (QGA) for communicating with the guest.
-The remainder of this document uses "Client JSON Protocol" when
-referring to the wire contents of a QMP or QGA connection.
-
-To map between Client JSON Protocol interfaces and the native C API,
-we generate C code from a QAPI schema.  This document describes the
-QAPI schema language, and how it gets mapped to the Client JSON
-Protocol and to C.  It additionally provides guidance on maintaining
-Client JSON Protocol compatibility.
-
-
-== The QAPI schema language ==
-
-The QAPI schema defines the Client JSON Protocol's commands and
-events, as well as types used by them.  Forward references are
-allowed.
-
-It is permissible for the schema to contain additional types not used
-by any commands or events, for the side effect of generated C code
-used internally.
-
-There are several kinds of types: simple types (a number of built-in
-types, such as 'int' and 'str'; as well as enumerations), arrays,
-complex types (structs and two flavors of unions), and alternate types
-(a choice between other types).
-
-
-=== Schema syntax ===
-
-Syntax is loosely based on JSON (http://www.ietf.org/rfc/rfc8259.txt).
-Differences:
-
-* Comments: start with a hash character (#) that is not part of a
-  string, and extend to the end of the line.
-
-* Strings are enclosed in 'single quotes', not "double quotes".
-
-* Strings are restricted to printable ASCII, and escape sequences to
-  just '\\'.
-
-* Numbers and null are not supported.
-
-A second layer of syntax defines the sequences of JSON texts that are
-a correctly structured QAPI schema.  We provide a grammar for this
-syntax in an EBNF-like notation:
-
-* Production rules look like non-terminal = expression
-* Concatenation: expression A B matches expression A, then B
-* Alternation: expression A | B matches expression A or B
-* Repetition: expression A... matches zero or more occurrences of
-  expression A
-* Repetition: expression A, ... matches zero or more occurrences of
-  expression A separated by ,
-* Grouping: expression ( A ) matches expression A
-* JSON's structural characters are terminals: { } [ ] : ,
-* JSON's literal names are terminals: false true
-* String literals enclosed in 'single quotes' are terminal, and match
-  this JSON string, with a leading '*' stripped off
-* When JSON object member's name starts with '*', the member is
-  optional.
-* The symbol STRING is a terminal, and matches any JSON string
-* The symbol BOOL is a terminal, and matches JSON false or true
-* ALL-CAPS words other than STRING are non-terminals
-
-The order of members within JSON objects does not matter unless
-explicitly noted.
-
-A QAPI schema consists of a series of top-level expressions:
-
-    SCHEMA = TOP-LEVEL-EXPR...
-
-The top-level expressions are all JSON objects.  Code and
-documentation is generated in schema definition order.  Code order
-should not matter.
-
-A top-level expressions is either a directive or a definition:
-
-    TOP-LEVEL-EXPR = DIRECTIVE | DEFINITION
-
-There are two kinds of directives and six kinds of definitions:
-
-    DIRECTIVE = INCLUDE | PRAGMA
-    DEFINITION = ENUM | STRUCT | UNION | ALTERNATE | COMMAND | EVENT
-
-These are discussed in detail below.
-
-
-=== Built-in Types ===
-
-The following types are predefined, and map to C as follows:
-
-  Schema    C          JSON
-  str       char *     any JSON string, UTF-8
-  number    double     any JSON number
-  int       int64_t    a JSON number without fractional part
-                       that fits into the C integer type
-  int8      int8_t     likewise
-  int16     int16_t    likewise
-  int32     int32_t    likewise
-  int64     int64_t    likewise
-  uint8     uint8_t    likewise
-  uint16    uint16_t   likewise
-  uint32    uint32_t   likewise
-  uint64    uint64_t   likewise
-  size      uint64_t   like uint64_t, except StringInputVisitor
-                       accepts size suffixes
-  bool      bool       JSON true or false
-  null      QNull *    JSON null
-  any       QObject *  any JSON value
-  QType     QType      JSON string matching enum QType values
-
-
-=== Include directives ===
-
-Syntax:
-    INCLUDE = { 'include': STRING }
-
-The QAPI schema definitions can be modularized using the 'include' directive:
-
- { 'include': 'path/to/file.json' }
-
-The directive is evaluated recursively, and include paths are relative
-to the file using the directive.  Multiple includes of the same file
-are idempotent.
-
-As a matter of style, it is a good idea to have all files be
-self-contained, but at the moment, nothing prevents an included file
-from making a forward reference to a type that is only introduced by
-an outer file.  The parser may be made stricter in the future to
-prevent incomplete include files.
-
-
-=== Pragma directives ===
-
-Syntax:
-    PRAGMA = { 'pragma': {
-                   '*doc-required': BOOL,
-                   '*command-name-exceptions': [ STRING, ... ],
-                   '*command-returns-exceptions': [ STRING, ... ],
-                   '*member-name-exceptions': [ STRING, ... ] } }
-
-The pragma directive lets you control optional generator behavior.
-
-Pragma's scope is currently the complete schema.  Setting the same
-pragma to different values in parts of the schema doesn't work.
-
-Pragma 'doc-required' takes a boolean value.  If true, documentation
-is required.  Default is false.
-
-Pragma 'command-name-exceptions' takes a list of commands whose names
-may contain '_' instead of '-'.  Default is none.
-
-Pragma 'command-returns-exceptions' takes a list of commands that may
-violate the rules on permitted return types.  Default is none.
-
-Pragma 'member-name-exceptions' takes a list of types whose member
-names may contain uppercase letters, and '_' instead of '-'.  Default
-is none.
-
-
-=== Enumeration types ===
-
-Syntax:
-    ENUM = { 'enum': STRING,
-             'data': [ ENUM-VALUE, ... ],
-             '*prefix': STRING,
-             '*if': COND,
-             '*features': FEATURES }
-    ENUM-VALUE = STRING
-               | { 'name': STRING, '*if': COND }
-
-Member 'enum' names the enum type.
-
-Each member of the 'data' array defines a value of the enumeration
-type.  The form STRING is shorthand for { 'name': STRING }.  The
-'name' values must be be distinct.
-
-Example:
-
- { 'enum': 'MyEnum', 'data': [ 'value1', 'value2', 'value3' ] }
-
-Nothing prevents an empty enumeration, although it is probably not
-useful.
-
-On the wire, an enumeration type's value is represented by its
-(string) name.  In C, it's represented by an enumeration constant.
-These are of the form PREFIX_NAME, where PREFIX is derived from the
-enumeration type's name, and NAME from the value's name.  For the
-example above, the generator maps 'MyEnum' to MY_ENUM and 'value1' to
-VALUE1, resulting in the enumeration constant MY_ENUM_VALUE1.  The
-optional 'prefix' member overrides PREFIX.
-
-The generated C enumeration constants have values 0, 1, ..., N-1 (in
-QAPI schema order), where N is the number of values.  There is an
-additional enumeration constant PREFIX__MAX with value N.
-
-Do not use string or an integer type when an enumeration type can do
-the job satisfactorily.
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Type references and array types ===
-
-Syntax:
-    TYPE-REF = STRING | ARRAY-TYPE
-    ARRAY-TYPE = [ STRING ]
-
-A string denotes the type named by the string.
-
-A one-element array containing a string denotes an array of the type
-named by the string.  Example: ['int'] denotes an array of 'int'.
-
-
-=== Struct types ===
-
-Syntax:
-    STRUCT = { 'struct': STRING,
-               'data': MEMBERS,
-               '*base': STRING,
-               '*if': COND,
-               '*features': FEATURES }
-    MEMBERS = { MEMBER, ... }
-    MEMBER = STRING : TYPE-REF
-           | STRING : { 'type': TYPE-REF,
-                        '*if': COND,
-                        '*features': FEATURES }
-
-Member 'struct' names the struct type.
-
-Each MEMBER of the 'data' object defines a member of the struct type.
-
-The MEMBER's STRING name consists of an optional '*' prefix and the
-struct member name.  If '*' is present, the member is optional.
-
-The MEMBER's value defines its properties, in particular its type.
-The form TYPE-REF is shorthand for { 'type': TYPE-REF }.
-
-Example:
-
- { 'struct': 'MyType',
-   'data': { 'member1': 'str', 'member2': ['int'], '*member3': 'str' } }
-
-A struct type corresponds to a struct in C, and an object in JSON.
-The C struct's members are generated in QAPI schema order.
-
-The optional 'base' member names a struct type whose members are to be
-included in this type.  They go first in the C struct.
-
-Example:
-
- { 'struct': 'BlockdevOptionsGenericFormat',
-   'data': { 'file': 'str' } }
- { 'struct': 'BlockdevOptionsGenericCOWFormat',
-   'base': 'BlockdevOptionsGenericFormat',
-   'data': { '*backing': 'str' } }
-
-An example BlockdevOptionsGenericCOWFormat object on the wire could use
-both members like this:
-
- { "file": "/some/place/my-image",
-   "backing": "/some/place/my-backing-file" }
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Union types ===
-
-Syntax:
-    UNION = { 'union': STRING,
-              'data': BRANCHES,
-              '*if': COND,
-              '*features': FEATURES }
-          | { 'union': STRING,
-              'data': BRANCHES,
-              'base': ( MEMBERS | STRING ),
-              'discriminator': STRING,
-              '*if': COND,
-              '*features': FEATURES }
-    BRANCHES = { BRANCH, ... }
-    BRANCH = STRING : TYPE-REF
-           | STRING : { 'type': TYPE-REF, '*if': COND }
-
-Member 'union' names the union type.
-
-There are two flavors of union types: simple (no discriminator or
-base), and flat (both discriminator and base).
-
-Each BRANCH of the 'data' object defines a branch of the union.  A
-union must have at least one branch.
-
-The BRANCH's STRING name is the branch name.
-
-The BRANCH's value defines the branch's properties, in particular its
-type.  The form TYPE-REF is shorthand for { 'type': TYPE-REF }.
-
-A simple union type defines a mapping from automatic discriminator
-values to data types like in this example:
-
- { 'struct': 'BlockdevOptionsFile', 'data': { 'filename': 'str' } }
- { 'struct': 'BlockdevOptionsQcow2',
-   'data': { 'backing': 'str', '*lazy-refcounts': 'bool' } }
-
- { 'union': 'BlockdevOptionsSimple',
-   'data': { 'file': 'BlockdevOptionsFile',
-             'qcow2': 'BlockdevOptionsQcow2' } }
-
-In the Client JSON Protocol, a simple union is represented by an
-object that contains the 'type' member as a discriminator, and a
-'data' member that is of the specified data type corresponding to the
-discriminator value, as in these examples:
-
- { "type": "file", "data": { "filename": "/some/place/my-image" } }
- { "type": "qcow2", "data": { "backing": "/some/place/my-image",
-                              "lazy-refcounts": true } }
-
-The generated C code uses a struct containing a union.  Additionally,
-an implicit C enum 'NameKind' is created, corresponding to the union
-'Name', for accessing the various branches of the union.  The value
-for each branch can be of any type.
-
-Flat unions permit arbitrary common members that occur in all variants
-of the union, not just a discriminator.  Their discriminators need not
-be named 'type'.  They also avoid nesting on the wire.
-
-The 'base' member defines the common members.  If it is a MEMBERS
-object, it defines common members just like a struct type's 'data'
-member defines struct type members.  If it is a STRING, it names a
-struct type whose members are the common members.
-
-All flat union branches must be of struct type.
-
-In the Client JSON Protocol, a flat union is represented by an object
-with the common members (from the base type) and the selected branch's
-members.  The two sets of member names must be disjoint.  Member
-'discriminator' must name a non-optional enum-typed member of the base
-struct.
-
-The following example enhances the above simple union example by
-adding an optional common member 'read-only', renaming the
-discriminator to something more applicable than the simple union's
-default of 'type', and reducing the number of {} required on the wire:
-
- { 'enum': 'BlockdevDriver', 'data': [ 'file', 'qcow2' ] }
- { 'union': 'BlockdevOptions',
-   'base': { 'driver': 'BlockdevDriver', '*read-only': 'bool' },
-   'discriminator': 'driver',
-   'data': { 'file': 'BlockdevOptionsFile',
-             'qcow2': 'BlockdevOptionsQcow2' } }
-
-Resulting in these JSON objects:
-
- { "driver": "file", "read-only": true,
-   "filename": "/some/place/my-image" }
- { "driver": "qcow2", "read-only": false,
-   "backing": "/some/place/my-image", "lazy-refcounts": true }
-
-Notice that in a flat union, the discriminator name is controlled by
-the user, but because it must map to a base member with enum type, the
-code generator ensures that branches match the existing values of the
-enum.  The order of branches need not match the order of the enum
-values.  The branches need not cover all possible enum values.
-Omitted enum values are still valid branches that add no additional
-members to the data type.  In the resulting generated C data types, a
-flat union is represented as a struct with the base members in QAPI
-schema order, and then a union of structures for each branch of the
-struct.
-
-A simple union can always be re-written as a flat union where the base
-class has a single member named 'type', and where each branch of the
-union has a struct with a single member named 'data'.  That is,
-
- { 'union': 'Simple', 'data': { 'one': 'str', 'two': 'int' } }
-
-is identical on the wire to:
-
- { 'enum': 'Enum', 'data': ['one', 'two'] }
- { 'struct': 'Branch1', 'data': { 'data': 'str' } }
- { 'struct': 'Branch2', 'data': { 'data': 'int' } }
- { 'union': 'Flat', 'base': { 'type': 'Enum' }, 'discriminator': 'type',
-   'data': { 'one': 'Branch1', 'two': 'Branch2' } }
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Alternate types ===
-
-Syntax:
-    ALTERNATE = { 'alternate': STRING,
-                  'data': ALTERNATIVES,
-                  '*if': COND,
-                  '*features': FEATURES }
-    ALTERNATIVES = { ALTERNATIVE, ... }
-    ALTERNATIVE = STRING : STRING
-                | STRING : { 'type': STRING, '*if': COND }
-
-Member 'alternate' names the alternate type.
-
-Each ALTERNATIVE of the 'data' object defines a branch of the
-alternate.  An alternate must have at least one branch.
-
-The ALTERNATIVE's STRING name is the branch name.
-
-The ALTERNATIVE's value defines the branch's properties, in particular
-its type.  The form STRING is shorthand for { 'type': STRING }.
-
-Example:
-
- { 'alternate': 'BlockdevRef',
-   'data': { 'definition': 'BlockdevOptions',
-             'reference': 'str' } }
-
-An alternate type is like a union type, except there is no
-discriminator on the wire.  Instead, the branch to use is inferred
-from the value.  An alternate can only express a choice between types
-represented differently on the wire.
-
-If a branch is typed as the 'bool' built-in, the alternate accepts
-true and false; if it is typed as any of the various numeric
-built-ins, it accepts a JSON number; if it is typed as a 'str'
-built-in or named enum type, it accepts a JSON string; if it is typed
-as the 'null' built-in, it accepts JSON null; and if it is typed as a
-complex type (struct or union), it accepts a JSON object.
-
-The example alternate declaration above allows using both of the
-following example objects:
-
- { "file": "my_existing_block_device_id" }
- { "file": { "driver": "file",
-             "read-only": false,
-             "filename": "/tmp/mydisk.qcow2" } }
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Commands ===
-
-Syntax:
-    COMMAND = { 'command': STRING,
-                (
-                '*data': ( MEMBERS | STRING ),
-                |
-                'data': STRING,
-                'boxed': true,
-                )
-                '*returns': TYPE-REF,
-                '*success-response': false,
-                '*gen': false,
-                '*allow-oob': true,
-                '*allow-preconfig': true,
-                '*coroutine': true,
-                '*if': COND,
-                '*features': FEATURES }
-
-Member 'command' names the command.
-
-Member 'data' defines the arguments.  It defaults to an empty MEMBERS
-object.
-
-If 'data' is a MEMBERS object, then MEMBERS defines arguments just
-like a struct type's 'data' defines struct type members.
-
-If 'data' is a STRING, then STRING names a complex type whose members
-are the arguments.  A union type requires 'boxed': true.
-
-Member 'returns' defines the command's return type.  It defaults to an
-empty struct type.  It must normally be a complex type or an array of
-a complex type.  To return anything else, the command must be listed
-in pragma 'commands-returns-exceptions'.  If you do this, extending
-the command to return additional information will be harder.  Use of
-the pragma for new commands is strongly discouraged.
-
-A command's error responses are not specified in the QAPI schema.
-Error conditions should be documented in comments.
-
-In the Client JSON Protocol, the value of the "execute" or "exec-oob"
-member is the command name.  The value of the "arguments" member then
-has to conform to the arguments, and the value of the success
-response's "return" member will conform to the return type.
-
-Some example commands:
-
- { 'command': 'my-first-command',
-   'data': { 'arg1': 'str', '*arg2': 'str' } }
- { 'struct': 'MyType', 'data': { '*value': 'str' } }
- { 'command': 'my-second-command',
-   'returns': [ 'MyType' ] }
-
-which would validate this Client JSON Protocol transaction:
-
- => { "execute": "my-first-command",
-      "arguments": { "arg1": "hello" } }
- <= { "return": { } }
- => { "execute": "my-second-command" }
- <= { "return": [ { "value": "one" }, { } ] }
-
-The generator emits a prototype for the C function implementing the
-command.  The function itself needs to be written by hand.  See
-section "Code generated for commands" for examples.
-
-The function returns the return type.  When member 'boxed' is absent,
-it takes the command arguments as arguments one by one, in QAPI schema
-order.  Else it takes them wrapped in the C struct generated for the
-complex argument type.  It takes an additional Error ** argument in
-either case.
-
-The generator also emits a marshalling function that extracts
-arguments for the user's function out of an input QDict, calls the
-user's function, and if it succeeded, builds an output QObject from
-its return value.  This is for use by the QMP monitor core.
-
-In rare cases, QAPI cannot express a type-safe representation of a
-corresponding Client JSON Protocol command.  You then have to suppress
-generation of a marshalling function by including a member 'gen' with
-boolean value false, and instead write your own function.  For
-example:
-
- { 'command': 'netdev_add',
-   'data': {'type': 'str', 'id': 'str'},
-   'gen': false }
-
-Please try to avoid adding new commands that rely on this, and instead
-use type-safe unions.
-
-Normally, the QAPI schema is used to describe synchronous exchanges,
-where a response is expected.  But in some cases, the action of a
-command is expected to change state in a way that a successful
-response is not possible (although the command will still return an
-error object on failure).  When a successful reply is not possible,
-the command definition includes the optional member 'success-response'
-with boolean value false.  So far, only QGA makes use of this member.
-
-Member 'allow-oob' declares whether the command supports out-of-band
-(OOB) execution.  It defaults to false.  For example:
-
- { 'command': 'migrate_recover',
-   'data': { 'uri': 'str' }, 'allow-oob': true }
-
-See qmp-spec.txt for out-of-band execution syntax and semantics.
-
-Commands supporting out-of-band execution can still be executed
-in-band.
-
-When a command is executed in-band, its handler runs in the main
-thread with the BQL held.
-
-When a command is executed out-of-band, its handler runs in a
-dedicated monitor I/O thread with the BQL *not* held.
-
-An OOB-capable command handler must satisfy the following conditions:
-
-- It terminates quickly.
-- It does not invoke system calls that may block.
-- It does not access guest RAM that may block when userfaultfd is
-  enabled for postcopy live migration.
-- It takes only "fast" locks, i.e. all critical sections protected by
-  any lock it takes also satisfy the conditions for OOB command
-  handler code.
-
-The restrictions on locking limit access to shared state.  Such access
-requires synchronization, but OOB commands can't take the BQL or any
-other "slow" lock.
-
-When in doubt, do not implement OOB execution support.
-
-Member 'allow-preconfig' declares whether the command is available
-before the machine is built.  It defaults to false.  For example:
-
- { 'enum': 'QMPCapability',
-   'data': [ 'oob' ] }
- { 'command': 'qmp_capabilities',
-   'data': { '*enable': [ 'QMPCapability' ] },
-   'allow-preconfig': true }
-
-QMP is available before the machine is built only when QEMU was
-started with --preconfig.
-
-Member 'coroutine' tells the QMP dispatcher whether the command handler
-is safe to be run in a coroutine.  It defaults to false.  If it is true,
-the command handler is called from coroutine context and may yield while
-waiting for an external event (such as I/O completion) in order to avoid
-blocking the guest and other background operations.
-
-Coroutine safety can be hard to prove, similar to thread safety.  Common
-pitfalls are:
-
-- The global mutex isn't held across qemu_coroutine_yield(), so
-  operations that used to assume that they execute atomically may have
-  to be more careful to protect against changes in the global state.
-
-- Nested event loops (AIO_WAIT_WHILE() etc.) are problematic in
-  coroutine context and can easily lead to deadlocks.  They should be
-  replaced by yielding and reentering the coroutine when the condition
-  becomes false.
-
-Since the command handler may assume coroutine context, any callers
-other than the QMP dispatcher must also call it in coroutine context.
-In particular, HMP commands calling such a QMP command handler must be
-marked .coroutine = true in hmp-commands.hx.
-
-It is an error to specify both 'coroutine': true and 'allow-oob': true
-for a command.  We don't currently have a use case for both together and
-without a use case, it's not entirely clear what the semantics should
-be.
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Events ===
-
-Syntax:
-    EVENT = { 'event': STRING,
-              (
-              '*data': ( MEMBERS | STRING ),
-              |
-              'data': STRING,
-              'boxed': true,
-              )
-              '*if': COND,
-              '*features': FEATURES }
-
-Member 'event' names the event.  This is the event name used in the
-Client JSON Protocol.
-
-Member 'data' defines the event-specific data.  It defaults to an
-empty MEMBERS object.
-
-If 'data' is a MEMBERS object, then MEMBERS defines event-specific
-data just like a struct type's 'data' defines struct type members.
-
-If 'data' is a STRING, then STRING names a complex type whose members
-are the event-specific data.  A union type requires 'boxed': true.
-
-An example event is:
-
-{ 'event': 'EVENT_C',
-  'data': { '*a': 'int', 'b': 'str' } }
-
-Resulting in this JSON object:
-
-{ "event": "EVENT_C",
-  "data": { "b": "test string" },
-  "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
-
-The generator emits a function to send the event.  When member 'boxed'
-is absent, it takes event-specific data one by one, in QAPI schema
-order.  Else it takes them wrapped in the C struct generated for the
-complex type.  See section "Code generated for events" for examples.
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-The optional 'features' member specifies features.  See "Features"
-below for more on this.
-
-
-=== Features ===
-
-Syntax:
-    FEATURES = [ FEATURE, ... ]
-    FEATURE = STRING
-            | { 'name': STRING, '*if': COND }
-
-Sometimes, the behaviour of QEMU changes compatibly, but without a
-change in the QMP syntax (usually by allowing values or operations
-that previously resulted in an error).  QMP clients may still need to
-know whether the extension is available.
-
-For this purpose, a list of features can be specified for a command or
-struct type.  Each list member can either be { 'name': STRING, '*if':
-COND }, or STRING, which is shorthand for { 'name': STRING }.
-
-The optional 'if' member specifies a conditional.  See "Configuring
-the schema" below for more on this.
-
-Example:
-
-{ 'struct': 'TestType',
-  'data': { 'number': 'int' },
-  'features': [ 'allow-negative-numbers' ] }
-
-The feature strings are exposed to clients in introspection, as
-explained in section "Client JSON Protocol introspection".
-
-Intended use is to have each feature string signal that this build of
-QEMU shows a certain behaviour.
-
-
-==== Special features ====
-
-Feature "deprecated" marks a command, event, or struct member as
-deprecated.  It is not supported elsewhere so far.
-
-
-=== Naming rules and reserved names ===
-
-All names must begin with a letter, and contain only ASCII letters,
-digits, hyphen, and underscore.  There are two exceptions: enum values
-may start with a digit, and names that are downstream extensions (see
-section Downstream extensions) start with underscore.
-
-Names beginning with 'q_' are reserved for the generator, which uses
-them for munging QMP names that resemble C keywords or other
-problematic strings.  For example, a member named "default" in qapi
-becomes "q_default" in the generated C code.
-
-Types, commands, and events share a common namespace.  Therefore,
-generally speaking, type definitions should always use CamelCase for
-user-defined type names, while built-in types are lowercase.
-
-Type names ending with 'Kind' or 'List' are reserved for the
-generator, which uses them for implicit union enums and array types,
-respectively.
-
-Command names, and member names within a type, should be all lower
-case with words separated by a hyphen.  However, some existing older
-commands and complex types use underscore; when extending them,
-consistency is preferred over blindly avoiding underscore.
-
-Event names should be ALL_CAPS with words separated by underscore.
-
-Member name 'u' and names starting with 'has-' or 'has_' are reserved
-for the generator, which uses them for unions and for tracking
-optional members.
-
-Any name (command, event, type, member, or enum value) beginning with
-"x-" is marked experimental, and may be withdrawn or changed
-incompatibly in a future release.
-
-Pragmas 'command-name-exceptions' and 'member-name-exceptions' let you
-violate naming rules.  Use for new code is strongly discouraged.
-
-
-=== Downstream extensions ===
-
-QAPI schema names that are externally visible, say in the Client JSON
-Protocol, need to be managed with care.  Names starting with a
-downstream prefix of the form __RFQDN_ are reserved for the downstream
-who controls the valid, reverse fully qualified domain name RFQDN.
-RFQDN may only contain ASCII letters, digits, hyphen and period.
-
-Example: Red Hat, Inc. controls redhat.com, and may therefore add a
-downstream command __com.redhat_drive-mirror.
-
-
-=== Configuring the schema ===
-
-Syntax:
-    COND = STRING
-         | [ STRING, ... ]
-
-All definitions take an optional 'if' member.  Its value must be a
-string or a list of strings.  A string is shorthand for a list
-containing just that string.  The code generated for the definition
-will then be guarded by #if STRING for each STRING in the COND list.
-
-Example: a conditional struct
-
- { 'struct': 'IfStruct', 'data': { 'foo': 'int' },
-   'if': ['defined(CONFIG_FOO)', 'defined(HAVE_BAR)'] }
-
-gets its generated code guarded like this:
-
- #if defined(CONFIG_FOO)
- #if defined(HAVE_BAR)
- ... generated code ...
- #endif /* defined(HAVE_BAR) */
- #endif /* defined(CONFIG_FOO) */
-
-Individual members of complex types, commands arguments, and
-event-specific data can also be made conditional.  This requires the
-longhand form of MEMBER.
-
-Example: a struct type with unconditional member 'foo' and conditional
-member 'bar'
-
-{ 'struct': 'IfStruct', 'data':
-  { 'foo': 'int',
-    'bar': { 'type': 'int', 'if': 'defined(IFCOND)'} } }
-
-A union's discriminator may not be conditional.
-
-Likewise, individual enumeration values be conditional.  This requires
-the longhand form of ENUM-VALUE.
-
-Example: an enum type with unconditional value 'foo' and conditional
-value 'bar'
-
-{ 'enum': 'IfEnum', 'data':
-  [ 'foo',
-    { 'name' : 'bar', 'if': 'defined(IFCOND)' } ] }
-
-Likewise, features can be conditional.  This requires the longhand
-form of FEATURE.
-
-Example: a struct with conditional feature 'allow-negative-numbers'
-
-{ 'struct': 'TestType',
-  'data': { 'number': 'int' },
-  'features': [ { 'name': 'allow-negative-numbers',
-                  'if': 'defined(IFCOND)' } ] }
-
-Please note that you are responsible to ensure that the C code will
-compile with an arbitrary combination of conditions, since the
-generator is unable to check it at this point.
-
-The conditions apply to introspection as well, i.e. introspection
-shows a conditional entity only when the condition is satisfied in
-this particular build.
-
-
-=== Documentation comments ===
-
-A multi-line comment that starts and ends with a '##' line is a
-documentation comment.
-
-If the documentation comment starts like
-
-    ##
-    # @SYMBOL:
-
-it documents the definition if SYMBOL, else it's free-form
-documentation.
-
-See below for more on definition documentation.
-
-Free-form documentation may be used to provide additional text and
-structuring content.
-
-==== Headings and subheadings ====
-
-A free-form documentation comment containing a line which starts with
-some '=' symbols and then a space defines a section heading:
-
-    ##
-    # = This is a top level heading
-    #
-    # This is a free-form comment which will go under the
-    # top level heading.
-    ##
-
-    ##
-    # == This is a second level heading
-    ##
-
-A heading line must be the first line of the documentation
-comment block.
-
-Section headings must always be correctly nested, so you can only
-define a third-level heading inside a second-level heading, and so on.
-
-==== Documentation markup ====
-
-Documentation comments can use most rST markup.  In particular,
-a '::' literal block can be used for examples:
-
-    # ::
-    #
-    #   Text of the example, may span
-    #   multiple lines
-
-'*' starts an itemized list:
-
-    # * First item, may span
-    #   multiple lines
-    # * Second item
-
-You can also use '-' instead of '*'.
-
-A decimal number followed by '.' starts a numbered list:
-
-    # 1. First item, may span
-    #    multiple lines
-    # 2. Second item
-
-The actual number doesn't matter.
-
-Lists of either kind must be preceded and followed by a blank line.
-If a list item's text spans multiple lines, then the second and
-subsequent lines must be correctly indented to line up with the
-first character of the first line.
-
-The usual '**strong**', '*emphasised*' and '``literal``' markup should
-be used.  If you need a single literal '*' you will need to
-backslash-escape it.  As an extension beyond the usual rST syntax, you
-can also use '@foo' to reference a name in the schema; this is
-rendered the same way as '``foo``'.
-
-Example:
-
-##
-# Some text foo with **bold** and *emphasis*
-# 1. with a list
-# 2. like that
-#
-# And some code:
-#
-# ::
-#
-#   $ echo foo
-#   -> do this
-#   <- get that
-##
-
-
-==== Definition documentation ====
-
-Definition documentation, if present, must immediately precede the
-definition it documents.
-
-When documentation is required (see pragma 'doc-required'), every
-definition must have documentation.
-
-Definition documentation starts with a line naming the definition,
-followed by an optional overview, a description of each argument (for
-commands and events), member (for structs and unions), branch (for
-alternates), or value (for enums), and finally optional tagged
-sections.
-
-Descriptions of arguments can span multiple lines.  The description
-text can start on the line following the '@argname:', in which case it
-must not be indented at all.  It can also start on the same line as
-the '@argname:'.  In this case if it spans multiple lines then second
-and subsequent lines must be indented to line up with the first
-character of the first line of the description:
-
-# @argone:
-# This is a two line description
-# in the first style.
-#
-# @argtwo: This is a two line description
-#          in the second style.
-
-The number of spaces between the ':' and the text is not significant.
-
-FIXME: the parser accepts these things in almost any order.
-FIXME: union branches should be described, too.
-
-Extensions added after the definition was first released carry a
-'(since x.y.z)' comment.
-
-A tagged section starts with one of the following words:
-"Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:".
-The section ends with the start of a new section.
-
-The text of a section can start on a new line, in
-which case it must not be indented at all.  It can also start
-on the same line as the 'Note:', 'Returns:', etc tag.  In this
-case if it spans multiple lines then second and subsequent
-lines must be indented to match the first, in the same way as
-multiline argument descriptions.
-
-A 'Since: x.y.z' tagged section lists the release that introduced the
-definition.
-
-The text of a section can start on a new line, in
-which case it must not be indented at all.  It can also start
-on the same line as the 'Note:', 'Returns:', etc tag.  In this
-case if it spans multiple lines then second and subsequent
-lines must be indented to match the first.
-
-An 'Example' or 'Examples' section is automatically rendered
-entirely as literal fixed-width text.  In other sections,
-the text is formatted, and rST markup can be used.
-
-For example:
-
-##
-# @BlockStats:
-#
-# Statistics of a virtual block device or a block backing device.
-#
-# @device: If the stats are for a virtual block device, the name
-#          corresponding to the virtual block device.
-#
-# @node-name: The node name of the device. (since 2.3)
-#
-# ... more members ...
-#
-# Since: 0.14.0
-##
-{ 'struct': 'BlockStats',
-  'data': {'*device': 'str', '*node-name': 'str',
-           ... more members ... } }
-
-##
-# @query-blockstats:
-#
-# Query the @BlockStats for all virtual block devices.
-#
-# @query-nodes: If true, the command will query all the
-#               block nodes ... explain, explain ...  (since 2.3)
-#
-# Returns: A list of @BlockStats for each virtual block devices.
-#
-# Since: 0.14.0
-#
-# Example:
-#
-# -> { "execute": "query-blockstats" }
-# <- {
-#      ... lots of output ...
-#    }
-#
-##
-{ 'command': 'query-blockstats',
-  'data': { '*query-nodes': 'bool' },
-  'returns': ['BlockStats'] }
-
-
-== Client JSON Protocol introspection ==
-
-Clients of a Client JSON Protocol commonly need to figure out what
-exactly the server (QEMU) supports.
-
-For this purpose, QMP provides introspection via command
-query-qmp-schema.  QGA currently doesn't support introspection.
-
-While Client JSON Protocol wire compatibility should be maintained
-between qemu versions, we cannot make the same guarantees for
-introspection stability.  For example, one version of qemu may provide
-a non-variant optional member of a struct, and a later version rework
-the member to instead be non-optional and associated with a variant.
-Likewise, one version of qemu may list a member with open-ended type
-'str', and a later version could convert it to a finite set of strings
-via an enum type; or a member may be converted from a specific type to
-an alternate that represents a choice between the original type and
-something else.
-
-query-qmp-schema returns a JSON array of SchemaInfo objects.  These
-objects together describe the wire ABI, as defined in the QAPI schema.
-There is no specified order to the SchemaInfo objects returned; a
-client must search for a particular name throughout the entire array
-to learn more about that name, but is at least guaranteed that there
-will be no collisions between type, command, and event names.
-
-However, the SchemaInfo can't reflect all the rules and restrictions
-that apply to QMP.  It's interface introspection (figuring out what's
-there), not interface specification.  The specification is in the QAPI
-schema.  To understand how QMP is to be used, you need to study the
-QAPI schema.
-
-Like any other command, query-qmp-schema is itself defined in the QAPI
-schema, along with the SchemaInfo type.  This text attempts to give an
-overview how things work.  For details you need to consult the QAPI
-schema.
-
-SchemaInfo objects have common members "name", "meta-type",
-"features", and additional variant members depending on the value of
-meta-type.
-
-Each SchemaInfo object describes a wire ABI entity of a certain
-meta-type: a command, event or one of several kinds of type.
-
-SchemaInfo for commands and events have the same name as in the QAPI
-schema.
-
-Command and event names are part of the wire ABI, but type names are
-not.  Therefore, the SchemaInfo for types have auto-generated
-meaningless names.  For readability, the examples in this section use
-meaningful type names instead.
-
-Optional member "features" exposes the entity's feature strings as a
-JSON array of strings.
-
-To examine a type, start with a command or event using it, then follow
-references by name.
-
-QAPI schema definitions not reachable that way are omitted.
-
-The SchemaInfo for a command has meta-type "command", and variant
-members "arg-type", "ret-type" and "allow-oob".  On the wire, the
-"arguments" member of a client's "execute" command must conform to the
-object type named by "arg-type".  The "return" member that the server
-passes in a success response conforms to the type named by "ret-type".
-When "allow-oob" is true, it means the command supports out-of-band
-execution.  It defaults to false.
-
-If the command takes no arguments, "arg-type" names an object type
-without members.  Likewise, if the command returns nothing, "ret-type"
-names an object type without members.
-
-Example: the SchemaInfo for command query-qmp-schema
-
-    { "name": "query-qmp-schema", "meta-type": "command",
-      "arg-type": "q_empty", "ret-type": "SchemaInfoList" }
-
-    Type "q_empty" is an automatic object type without members, and type
-    "SchemaInfoList" is the array of SchemaInfo type.
-
-The SchemaInfo for an event has meta-type "event", and variant member
-"arg-type".  On the wire, a "data" member that the server passes in an
-event conforms to the object type named by "arg-type".
-
-If the event carries no additional information, "arg-type" names an
-object type without members.  The event may not have a data member on
-the wire then.
-
-Each command or event defined with 'data' as MEMBERS object in the
-QAPI schema implicitly defines an object type.
-
-Example: the SchemaInfo for EVENT_C from section Events
-
-    { "name": "EVENT_C", "meta-type": "event",
-      "arg-type": "q_obj-EVENT_C-arg" }
-
-    Type "q_obj-EVENT_C-arg" is an implicitly defined object type with
-    the two members from the event's definition.
-
-The SchemaInfo for struct and union types has meta-type "object".
-
-The SchemaInfo for a struct type has variant member "members".
-
-The SchemaInfo for a union type additionally has variant members "tag"
-and "variants".
-
-"members" is a JSON array describing the object's common members, if
-any.  Each element is a JSON object with members "name" (the member's
-name), "type" (the name of its type), and optionally "default".  The
-member is optional if "default" is present.  Currently, "default" can
-only have value null.  Other values are reserved for future
-extensions.  The "members" array is in no particular order; clients
-must search the entire object when learning whether a particular
-member is supported.
-
-Example: the SchemaInfo for MyType from section Struct types
-
-    { "name": "MyType", "meta-type": "object",
-      "members": [
-          { "name": "member1", "type": "str" },
-          { "name": "member2", "type": "int" },
-          { "name": "member3", "type": "str", "default": null } ] }
-
-"features" exposes the command's feature strings as a JSON array of
-strings.
-
-Example: the SchemaInfo for TestType from section Features:
-
-    { "name": "TestType", "meta-type": "object",
-      "members": [
-          { "name": "number", "type": "int" } ],
-      "features": ["allow-negative-numbers"] }
-
-"tag" is the name of the common member serving as type tag.
-"variants" is a JSON array describing the object's variant members.
-Each element is a JSON object with members "case" (the value of type
-tag this element applies to) and "type" (the name of an object type
-that provides the variant members for this type tag value).  The
-"variants" array is in no particular order, and is not guaranteed to
-list cases in the same order as the corresponding "tag" enum type.
-
-Example: the SchemaInfo for flat union BlockdevOptions from section
-Union types
-
-    { "name": "BlockdevOptions", "meta-type": "object",
-      "members": [
-          { "name": "driver", "type": "BlockdevDriver" },
-          { "name": "read-only", "type": "bool", "default": null } ],
-      "tag": "driver",
-      "variants": [
-          { "case": "file", "type": "BlockdevOptionsFile" },
-          { "case": "qcow2", "type": "BlockdevOptionsQcow2" } ] }
-
-Note that base types are "flattened": its members are included in the
-"members" array.
-
-A simple union implicitly defines an enumeration type for its implicit
-discriminator (called "type" on the wire, see section Union types).
-
-A simple union implicitly defines an object type for each of its
-variants.
-
-Example: the SchemaInfo for simple union BlockdevOptionsSimple from section
-Union types
-
-    { "name": "BlockdevOptionsSimple", "meta-type": "object",
-      "members": [
-          { "name": "type", "type": "BlockdevOptionsSimpleKind" } ],
-      "tag": "type",
-      "variants": [
-          { "case": "file", "type": "q_obj-BlockdevOptionsFile-wrapper" },
-          { "case": "qcow2", "type": "q_obj-BlockdevOptionsQcow2-wrapper" } ] }
-
-    Enumeration type "BlockdevOptionsSimpleKind" and the object types
-    "q_obj-BlockdevOptionsFile-wrapper", "q_obj-BlockdevOptionsQcow2-wrapper"
-    are implicitly defined.
-
-The SchemaInfo for an alternate type has meta-type "alternate", and
-variant member "members".  "members" is a JSON array.  Each element is
-a JSON object with member "type", which names a type.  Values of the
-alternate type conform to exactly one of its member types.  There is
-no guarantee on the order in which "members" will be listed.
-
-Example: the SchemaInfo for BlockdevRef from section Alternate types
-
-    { "name": "BlockdevRef", "meta-type": "alternate",
-      "members": [
-          { "type": "BlockdevOptions" },
-          { "type": "str" } ] }
-
-The SchemaInfo for an array type has meta-type "array", and variant
-member "element-type", which names the array's element type.  Array
-types are implicitly defined.  For convenience, the array's name may
-resemble the element type; however, clients should examine member
-"element-type" instead of making assumptions based on parsing member
-"name".
-
-Example: the SchemaInfo for ['str']
-
-    { "name": "[str]", "meta-type": "array",
-      "element-type": "str" }
-
-The SchemaInfo for an enumeration type has meta-type "enum" and
-variant member "values".  The values are listed in no particular
-order; clients must search the entire enum when learning whether a
-particular value is supported.
-
-Example: the SchemaInfo for MyEnum from section Enumeration types
-
-    { "name": "MyEnum", "meta-type": "enum",
-      "values": [ "value1", "value2", "value3" ] }
-
-The SchemaInfo for a built-in type has the same name as the type in
-the QAPI schema (see section Built-in Types), with one exception
-detailed below.  It has variant member "json-type" that shows how
-values of this type are encoded on the wire.
-
-Example: the SchemaInfo for str
-
-    { "name": "str", "meta-type": "builtin", "json-type": "string" }
-
-The QAPI schema supports a number of integer types that only differ in
-how they map to C.  They are identical as far as SchemaInfo is
-concerned.  Therefore, they get all mapped to a single type "int" in
-SchemaInfo.
-
-As explained above, type names are not part of the wire ABI.  Not even
-the names of built-in types.  Clients should examine member
-"json-type" instead of hard-coding names of built-in types.
-
-
-== Compatibility considerations ==
-
-Maintaining backward compatibility at the Client JSON Protocol level
-while evolving the schema requires some care.  This section is about
-syntactic compatibility, which is necessary, but not sufficient, for
-actual compatibility.
-
-Clients send commands with argument data, and receive command
-responses with return data and events with event data.
-
-Adding opt-in functionality to the send direction is backwards
-compatible: adding commands, optional arguments, enumeration values,
-union and alternate branches; turning an argument type into an
-alternate of that type; making mandatory arguments optional.  Clients
-oblivious of the new functionality continue to work.
-
-Incompatible changes include removing commands, command arguments,
-enumeration values, union and alternate branches, adding mandatory
-command arguments, and making optional arguments mandatory.
-
-The specified behavior of an absent optional argument should remain
-the same.  With proper documentation, this policy still allows some
-flexibility; for example, when an optional 'buffer-size' argument is
-specified to default to a sensible buffer size, the actual default
-value can still be changed.  The specified default behavior is not the
-exact size of the buffer, only that the default size is sensible.
-
-Adding functionality to the receive direction is generally backwards
-compatible: adding events, adding return and event data members.
-Clients are expected to ignore the ones they don't know.
-
-Removing "unreachable" stuff like events that can't be triggered
-anymore, optional return or event data members that can't be sent
-anymore, and return or event data member (enumeration) values that
-can't be sent anymore makes no difference to clients, except for
-introspection.  The latter can conceivably confuse clients, so tread
-carefully.
-
-Incompatible changes include removing return and event data members.
-
-Any change to a command definition's 'data' or one of the types used
-there (recursively) needs to consider send direction compatibility.
-
-Any change to a command definition's 'return', an event definition's
-'data', or one of the types used there (recursively) needs to consider
-receive direction compatibility.
-
-Any change to types used in both contexts need to consider both.
-
-Enumeration type values and complex and alternate type members may be
-reordered freely.  For enumerations and alternate types, this doesn't
-affect the wire encoding.  For complex types, this might make the
-implementation emit JSON object members in a different order, which
-the Client JSON Protocol permits.
-
-Since type names are not visible in the Client JSON Protocol, types
-may be freely renamed.  Even certain refactorings are invisible, such
-as splitting members from one type into a common base type.
-
-
-== Code generation ==
-
-The QAPI code generator qapi-gen.py generates code and documentation
-from the schema.  Together with the core QAPI libraries, this code
-provides everything required to take JSON commands read in by a Client
-JSON Protocol server, unmarshal the arguments into the underlying C
-types, call into the corresponding C function, map the response back
-to a Client JSON Protocol response to be returned to the user, and
-introspect the commands.
-
-As an example, we'll use the following schema, which describes a
-single complex user-defined type, along with command which takes a
-list of that type as a parameter, and returns a single element of that
-type.  The user is responsible for writing the implementation of
-qmp_my_command(); everything else is produced by the generator.
-
-    $ cat example-schema.json
-    { 'struct': 'UserDefOne',
-      'data': { 'integer': 'int', '*string': 'str' } }
-
-    { 'command': 'my-command',
-      'data': { 'arg1': ['UserDefOne'] },
-      'returns': 'UserDefOne' }
-
-    { 'event': 'MY_EVENT' }
-
-We run qapi-gen.py like this:
-
-    $ python scripts/qapi-gen.py --output-dir="qapi-generated" \
-    --prefix="example-" example-schema.json
-
-For a more thorough look at generated code, the testsuite includes
-tests/qapi-schema/qapi-schema-tests.json that covers more examples of
-what the generator will accept, and compiles the resulting C code as
-part of 'make check-unit'.
-
-=== Code generated for QAPI types ===
-
-The following files are created:
-
-$(prefix)qapi-types.h - C types corresponding to types defined in
-                        the schema
-
-$(prefix)qapi-types.c - Cleanup functions for the above C types
-
-The $(prefix) is an optional parameter used as a namespace to keep the
-generated code from one schema/code-generation separated from others so code
-can be generated/used from multiple schemas without clobbering previously
-created code.
-
-Example:
-
-    $ cat qapi-generated/example-qapi-types.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_TYPES_H
-    #define EXAMPLE_QAPI_TYPES_H
-
-    #include "qapi/qapi-builtin-types.h"
-
-    typedef struct UserDefOne UserDefOne;
-
-    typedef struct UserDefOneList UserDefOneList;
-
-    typedef struct q_obj_my_command_arg q_obj_my_command_arg;
-
-    struct UserDefOne {
-        int64_t integer;
-        bool has_string;
-        char *string;
-    };
-
-    void qapi_free_UserDefOne(UserDefOne *obj);
-    G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne)
-
-    struct UserDefOneList {
-        UserDefOneList *next;
-        UserDefOne *value;
-    };
-
-    void qapi_free_UserDefOneList(UserDefOneList *obj);
-    G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList)
-
-    struct q_obj_my_command_arg {
-        UserDefOneList *arg1;
-    };
-
-    #endif /* EXAMPLE_QAPI_TYPES_H */
-    $ cat qapi-generated/example-qapi-types.c
-[Uninteresting stuff omitted...]
-
-    void qapi_free_UserDefOne(UserDefOne *obj)
-    {
-        Visitor *v;
-
-        if (!obj) {
-            return;
-        }
-
-        v = qapi_dealloc_visitor_new();
-        visit_type_UserDefOne(v, NULL, &obj, NULL);
-        visit_free(v);
-    }
-
-    void qapi_free_UserDefOneList(UserDefOneList *obj)
-    {
-        Visitor *v;
-
-        if (!obj) {
-            return;
-        }
-
-        v = qapi_dealloc_visitor_new();
-        visit_type_UserDefOneList(v, NULL, &obj, NULL);
-        visit_free(v);
-    }
-
-[Uninteresting stuff omitted...]
-
-For a modular QAPI schema (see section Include directives), code for
-each sub-module SUBDIR/SUBMODULE.json is actually generated into
-
-SUBDIR/$(prefix)qapi-types-SUBMODULE.h
-SUBDIR/$(prefix)qapi-types-SUBMODULE.c
-
-If qapi-gen.py is run with option --builtins, additional files are
-created:
-
-qapi-builtin-types.h - C types corresponding to built-in types
-
-qapi-builtin-types.c - Cleanup functions for the above C types
-
-=== Code generated for visiting QAPI types ===
-
-These are the visitor functions used to walk through and convert
-between a native QAPI C data structure and some other format (such as
-QObject); the generated functions are named visit_type_FOO() and
-visit_type_FOO_members().
-
-The following files are generated:
-
-$(prefix)qapi-visit.c: Visitor function for a particular C type, used
-                       to automagically convert QObjects into the
-                       corresponding C type and vice-versa, as well
-                       as for deallocating memory for an existing C
-                       type
-
-$(prefix)qapi-visit.h: Declarations for previously mentioned visitor
-                       functions
-
-Example:
-
-    $ cat qapi-generated/example-qapi-visit.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_VISIT_H
-    #define EXAMPLE_QAPI_VISIT_H
-
-    #include "qapi/qapi-builtin-visit.h"
-    #include "example-qapi-types.h"
-
-
-    bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp);
-    bool visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp);
-    bool visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp);
-
-    bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp);
-
-    #endif /* EXAMPLE_QAPI_VISIT_H */
-    $ cat qapi-generated/example-qapi-visit.c
-[Uninteresting stuff omitted...]
-
-    bool visit_type_UserDefOne_members(Visitor *v, UserDefOne *obj, Error **errp)
-    {
-        if (!visit_type_int(v, "integer", &obj->integer, errp)) {
-            return false;
-        }
-        if (visit_optional(v, "string", &obj->has_string)) {
-            if (!visit_type_str(v, "string", &obj->string, errp)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    bool visit_type_UserDefOne(Visitor *v, const char *name, UserDefOne **obj, Error **errp)
-    {
-        bool ok = false;
-
-        if (!visit_start_struct(v, name, (void **)obj, sizeof(UserDefOne), errp)) {
-            return false;
-        }
-        if (!*obj) {
-            /* incomplete */
-            assert(visit_is_dealloc(v));
-            goto out_obj;
-        }
-        if (!visit_type_UserDefOne_members(v, *obj, errp)) {
-            goto out_obj;
-        }
-        ok = visit_check_struct(v, errp);
-    out_obj:
-        visit_end_struct(v, (void **)obj);
-        if (!ok && visit_is_input(v)) {
-            qapi_free_UserDefOne(*obj);
-            *obj = NULL;
-        }
-        return ok;
-    }
-
-    bool visit_type_UserDefOneList(Visitor *v, const char *name, UserDefOneList **obj, Error **errp)
-    {
-        bool ok = false;
-        UserDefOneList *tail;
-        size_t size = sizeof(**obj);
-
-        if (!visit_start_list(v, name, (GenericList **)obj, size, errp)) {
-            return false;
-        }
-
-        for (tail = *obj; tail;
-             tail = (UserDefOneList *)visit_next_list(v, (GenericList *)tail, size)) {
-            if (!visit_type_UserDefOne(v, NULL, &tail->value, errp)) {
-                goto out_obj;
-            }
-        }
-
-        ok = visit_check_list(v, errp);
-    out_obj:
-        visit_end_list(v, (void **)obj);
-        if (!ok && visit_is_input(v)) {
-            qapi_free_UserDefOneList(*obj);
-            *obj = NULL;
-        }
-        return ok;
-    }
-
-    bool visit_type_q_obj_my_command_arg_members(Visitor *v, q_obj_my_command_arg *obj, Error **errp)
-    {
-        if (!visit_type_UserDefOneList(v, "arg1", &obj->arg1, errp)) {
-            return false;
-        }
-        return true;
-    }
-
-[Uninteresting stuff omitted...]
-
-For a modular QAPI schema (see section Include directives), code for
-each sub-module SUBDIR/SUBMODULE.json is actually generated into
-
-SUBDIR/$(prefix)qapi-visit-SUBMODULE.h
-SUBDIR/$(prefix)qapi-visit-SUBMODULE.c
-
-If qapi-gen.py is run with option --builtins, additional files are
-created:
-
-qapi-builtin-visit.h - Visitor functions for built-in types
-
-qapi-builtin-visit.c - Declarations for these visitor functions
-
-=== Code generated for commands ===
-
-These are the marshaling/dispatch functions for the commands defined
-in the schema.  The generated code provides qmp_marshal_COMMAND(), and
-declares qmp_COMMAND() that the user must implement.
-
-The following files are generated:
-
-$(prefix)qapi-commands.c: Command marshal/dispatch functions for each
-                          QMP command defined in the schema
-
-$(prefix)qapi-commands.h: Function prototypes for the QMP commands
-                          specified in the schema
-
-$(prefix)qapi-init-commands.h - Command initialization prototype
-
-$(prefix)qapi-init-commands.c - Command initialization code
-
-Example:
-
-    $ cat qapi-generated/example-qapi-commands.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_COMMANDS_H
-    #define EXAMPLE_QAPI_COMMANDS_H
-
-    #include "example-qapi-types.h"
-
-    UserDefOne *qmp_my_command(UserDefOneList *arg1, Error **errp);
-    void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp);
-
-    #endif /* EXAMPLE_QAPI_COMMANDS_H */
-    $ cat qapi-generated/example-qapi-commands.c
-[Uninteresting stuff omitted...]
-
-    static void qmp_marshal_output_UserDefOne(UserDefOne *ret_in, QObject **ret_out, Error **errp)
-    {
-        Visitor *v;
-
-        v = qobject_output_visitor_new(ret_out);
-        if (visit_type_UserDefOne(v, "unused", &ret_in, errp)) {
-            visit_complete(v, ret_out);
-        }
-        visit_free(v);
-        v = qapi_dealloc_visitor_new();
-        visit_type_UserDefOne(v, "unused", &ret_in, NULL);
-        visit_free(v);
-    }
-
-    void qmp_marshal_my_command(QDict *args, QObject **ret, Error **errp)
-    {
-        Error *err = NULL;
-        bool ok = false;
-        Visitor *v;
-        UserDefOne *retval;
-        q_obj_my_command_arg arg = {0};
-
-        v = qobject_input_visitor_new(QOBJECT(args));
-        if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
-            goto out;
-        }
-        if (visit_type_q_obj_my_command_arg_members(v, &arg, errp)) {
-            ok = visit_check_struct(v, errp);
-        }
-        visit_end_struct(v, NULL);
-        if (!ok) {
-            goto out;
-        }
-
-        retval = qmp_my_command(arg.arg1, &err);
-        error_propagate(errp, err);
-        if (err) {
-            goto out;
-        }
-
-        qmp_marshal_output_UserDefOne(retval, ret, errp);
-
-    out:
-        visit_free(v);
-        v = qapi_dealloc_visitor_new();
-        visit_start_struct(v, NULL, NULL, 0, NULL);
-        visit_type_q_obj_my_command_arg_members(v, &arg, NULL);
-        visit_end_struct(v, NULL);
-        visit_free(v);
-    }
-
-[Uninteresting stuff omitted...]
-    $ cat qapi-generated/example-qapi-init-commands.h
-[Uninteresting stuff omitted...]
-    #ifndef EXAMPLE_QAPI_INIT_COMMANDS_H
-    #define EXAMPLE_QAPI_INIT_COMMANDS_H
-
-    #include "qapi/qmp/dispatch.h"
-
-    void example_qmp_init_marshal(QmpCommandList *cmds);
-
-    #endif /* EXAMPLE_QAPI_INIT_COMMANDS_H */
-    $ cat qapi-generated/example-qapi-init-commands.c
-[Uninteresting stuff omitted...]
-    void example_qmp_init_marshal(QmpCommandList *cmds)
-    {
-        QTAILQ_INIT(cmds);
-
-        qmp_register_command(cmds, "my-command",
-                             qmp_marshal_my_command, QCO_NO_OPTIONS);
-    }
-[Uninteresting stuff omitted...]
-
-For a modular QAPI schema (see section Include directives), code for
-each sub-module SUBDIR/SUBMODULE.json is actually generated into
-
-SUBDIR/$(prefix)qapi-commands-SUBMODULE.h
-SUBDIR/$(prefix)qapi-commands-SUBMODULE.c
-
-=== Code generated for events ===
-
-This is the code related to events defined in the schema, providing
-qapi_event_send_EVENT().
-
-The following files are created:
-
-$(prefix)qapi-events.h - Function prototypes for each event type
-
-$(prefix)qapi-events.c - Implementation of functions to send an event
-
-$(prefix)qapi-emit-events.h - Enumeration of all event names, and
-                              common event code declarations
-
-$(prefix)qapi-emit-events.c - Common event code definitions
-
-Example:
-
-    $ cat qapi-generated/example-qapi-events.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_EVENTS_H
-    #define EXAMPLE_QAPI_EVENTS_H
-
-    #include "qapi/util.h"
-    #include "example-qapi-types.h"
-
-    void qapi_event_send_my_event(void);
-
-    #endif /* EXAMPLE_QAPI_EVENTS_H */
-    $ cat qapi-generated/example-qapi-events.c
-[Uninteresting stuff omitted...]
-
-    void qapi_event_send_my_event(void)
-    {
-        QDict *qmp;
-
-        qmp = qmp_event_build_dict("MY_EVENT");
-
-        example_qapi_event_emit(EXAMPLE_QAPI_EVENT_MY_EVENT, qmp);
-
-        qobject_unref(qmp);
-    }
-
-[Uninteresting stuff omitted...]
-    $ cat qapi-generated/example-qapi-emit-events.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_EMIT_EVENTS_H
-    #define EXAMPLE_QAPI_EMIT_EVENTS_H
-
-    #include "qapi/util.h"
-
-    typedef enum example_QAPIEvent {
-        EXAMPLE_QAPI_EVENT_MY_EVENT,
-        EXAMPLE_QAPI_EVENT__MAX,
-    } example_QAPIEvent;
-
-    #define example_QAPIEvent_str(val) \
-        qapi_enum_lookup(&example_QAPIEvent_lookup, (val))
-
-    extern const QEnumLookup example_QAPIEvent_lookup;
-
-    void example_qapi_event_emit(example_QAPIEvent event, QDict *qdict);
-
-    #endif /* EXAMPLE_QAPI_EMIT_EVENTS_H */
-    $ cat qapi-generated/example-qapi-emit-events.c
-[Uninteresting stuff omitted...]
-
-    const QEnumLookup example_QAPIEvent_lookup = {
-        .array = (const char *const[]) {
-            [EXAMPLE_QAPI_EVENT_MY_EVENT] = "MY_EVENT",
-        },
-        .size = EXAMPLE_QAPI_EVENT__MAX
-    };
-
-[Uninteresting stuff omitted...]
-
-For a modular QAPI schema (see section Include directives), code for
-each sub-module SUBDIR/SUBMODULE.json is actually generated into
-
-SUBDIR/$(prefix)qapi-events-SUBMODULE.h
-SUBDIR/$(prefix)qapi-events-SUBMODULE.c
-
-=== Code generated for introspection ===
-
-The following files are created:
-
-$(prefix)qapi-introspect.c - Defines a string holding a JSON
-                            description of the schema
-
-$(prefix)qapi-introspect.h - Declares the above string
-
-Example:
-
-    $ cat qapi-generated/example-qapi-introspect.h
-[Uninteresting stuff omitted...]
-
-    #ifndef EXAMPLE_QAPI_INTROSPECT_H
-    #define EXAMPLE_QAPI_INTROSPECT_H
-
-    #include "qapi/qmp/qlit.h"
-
-    extern const QLitObject example_qmp_schema_qlit;
-
-    #endif /* EXAMPLE_QAPI_INTROSPECT_H */
-    $ cat qapi-generated/example-qapi-introspect.c
-[Uninteresting stuff omitted...]
-
-    const QLitObject example_qmp_schema_qlit = QLIT_QLIST(((QLitObject[]) {
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "arg-type", QLIT_QSTR("0"), },
-            { "meta-type", QLIT_QSTR("command"), },
-            { "name", QLIT_QSTR("my-command"), },
-            { "ret-type", QLIT_QSTR("1"), },
-            {}
-        })),
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "arg-type", QLIT_QSTR("2"), },
-            { "meta-type", QLIT_QSTR("event"), },
-            { "name", QLIT_QSTR("MY_EVENT"), },
-            {}
-        })),
-        /* "0" = q_obj_my-command-arg */
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "members", QLIT_QLIST(((QLitObject[]) {
-                QLIT_QDICT(((QLitDictEntry[]) {
-                    { "name", QLIT_QSTR("arg1"), },
-                    { "type", QLIT_QSTR("[1]"), },
-                    {}
-                })),
-                {}
-            })), },
-            { "meta-type", QLIT_QSTR("object"), },
-            { "name", QLIT_QSTR("0"), },
-            {}
-        })),
-        /* "1" = UserDefOne */
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "members", QLIT_QLIST(((QLitObject[]) {
-                QLIT_QDICT(((QLitDictEntry[]) {
-                    { "name", QLIT_QSTR("integer"), },
-                    { "type", QLIT_QSTR("int"), },
-                    {}
-                })),
-                QLIT_QDICT(((QLitDictEntry[]) {
-                    { "default", QLIT_QNULL, },
-                    { "name", QLIT_QSTR("string"), },
-                    { "type", QLIT_QSTR("str"), },
-                    {}
-                })),
-                {}
-            })), },
-            { "meta-type", QLIT_QSTR("object"), },
-            { "name", QLIT_QSTR("1"), },
-            {}
-        })),
-        /* "2" = q_empty */
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "members", QLIT_QLIST(((QLitObject[]) {
-                {}
-            })), },
-            { "meta-type", QLIT_QSTR("object"), },
-            { "name", QLIT_QSTR("2"), },
-            {}
-        })),
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "element-type", QLIT_QSTR("1"), },
-            { "meta-type", QLIT_QSTR("array"), },
-            { "name", QLIT_QSTR("[1]"), },
-            {}
-        })),
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "json-type", QLIT_QSTR("int"), },
-            { "meta-type", QLIT_QSTR("builtin"), },
-            { "name", QLIT_QSTR("int"), },
-            {}
-        })),
-        QLIT_QDICT(((QLitDictEntry[]) {
-            { "json-type", QLIT_QSTR("string"), },
-            { "meta-type", QLIT_QSTR("builtin"), },
-            { "name", QLIT_QSTR("str"), },
-            {}
-        })),
-        {}
-    }));
-
-[Uninteresting stuff omitted...]
diff --git a/docs/devel/qgraph.rst b/docs/devel/qgraph.rst
index a9aff167ada..43342d9d650 100644
--- a/docs/devel/qgraph.rst
+++ b/docs/devel/qgraph.rst
@@ -1,8 +1,7 @@
 .. _qgraph:
 
-========================================
 Qtest Driver Framework
-========================================
+======================
 
 In order to test a specific driver, plain libqos tests need to
 take care of booting QEMU with the right machine and devices.
@@ -15,7 +14,7 @@ support that device.
 Using only libqos APIs, the test has to manually take care of
 covering all the setups, and build the correct command line.
 
-This also introduces backward compability issues: if a device/driver command
+This also introduces backward compatibility issues: if a device/driver command
 line name is changed, all tests that use that will not work
 properly anymore and need to be adjusted.
 
@@ -31,17 +30,19 @@ so the sdhci-test should only care of linking its qgraph node with
 that interface. In this way, if the command line of a sdhci driver
 is changed, only the respective qgraph driver node has to be adjusted.
 
+QGraph concepts
+---------------
+
 The graph is composed by nodes that represent machines, drivers, tests
 and edges that define the relationships between them (``CONSUMES``, ``PRODUCES``, and
 ``CONTAINS``).
 
-
 Nodes
-^^^^^^
+~~~~~
 
 A node can be of four types:
 
-- **QNODE_MACHINE**:   for example ``arm/raspi2``
+- **QNODE_MACHINE**:   for example ``arm/raspi2b``
 - **QNODE_DRIVER**:    for example ``generic-sdhci``
 - **QNODE_INTERFACE**: for example ``sdhci`` (interface for all ``-sdhci``
   drivers).
@@ -64,16 +65,16 @@ Notes for the nodes:
   drivers name, otherwise they won't be discovered
 
 Edges
-^^^^^^
+~~~~~
 
-An edge relation between two nodes (drivers or machines) `X` and `Y` can be:
+An edge relation between two nodes (drivers or machines) ``X`` and ``Y`` can be:
 
-- ``X CONSUMES Y``: `Y` can be plugged into `X`
-- ``X PRODUCES Y``: `X` provides the interface `Y`
-- ``X CONTAINS Y``: `Y` is part of `X` component
+- ``X CONSUMES Y``: ``Y`` can be plugged into ``X``
+- ``X PRODUCES Y``: ``X`` provides the interface ``Y``
+- ``X CONTAINS Y``: ``Y`` is part of ``X`` component
 
 Execution steps
-^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~
 
 The basic framework steps are the following:
 
@@ -92,15 +93,129 @@ The basic framework steps are the following:
 Depending on the QEMU binary used, only some drivers/machines will be
 available and only test that are reached by them will be executed.
 
+Command line
+~~~~~~~~~~~~
+
+Command line is built by using node names and optional arguments
+passed by the user when building the edges.
+
+There are three types of command line arguments:
+
+- ``in node``      : created from the node name. For example, machines will
+  have ``-M <machine>`` to its command line, while devices
+  ``-device <device>``. It is automatically done by the framework.
+- ``after node``   : added as additional argument to the node name.
+  This argument is added optionally when creating edges,
+  by setting the parameter ``after_cmd_line`` and
+  ``extra_edge_opts`` in ``QOSGraphEdgeOptions``.
+  The framework automatically adds
+  a comma before ``extra_edge_opts``,
+  because it is going to add attributes
+  after the destination node pointed by
+  the edge containing these options, and automatically
+  adds a space before ``after_cmd_line``, because it
+  adds an additional device, not an attribute.
+- ``before node``  : added as additional argument to the node name.
+  This argument is added optionally when creating edges,
+  by setting the parameter ``before_cmd_line`` in
+  ``QOSGraphEdgeOptions``. This attribute
+  is going to add attributes before the destination node
+  pointed by the edge containing these options. It is
+  helpful to commands that are not node-representable,
+  such as ``-fdsev`` or ``-netdev``.
+
+While adding command line in edges is always used, not all nodes names are
+used in every path walk: this is because the contained or produced ones
+are already added by QEMU, so only nodes that "consumes" will be used to
+build the command line. Also, nodes that will have ``{ "abstract" : true }``
+as QMP attribute will loose their command line, since they are not proper
+devices to be added in QEMU.
+
+Example::
+
+    QOSGraphEdgeOptions opts = {
+        .before_cmd_line = "-drive id=drv0,if=none,file=null-co://,"
+                           "file.read-zeroes=on,format=raw",
+        .after_cmd_line = "-device scsi-hd,bus=vs0.0,drive=drv0",
+
+        opts.extra_device_opts = "id=vs0";
+    };
+
+    qos_node_create_driver("virtio-scsi-device",
+                            virtio_scsi_device_create);
+    qos_node_consumes("virtio-scsi-device", "virtio-bus", &opts);
+
+Will produce the following command line:
+``-drive id=drv0,if=none,file=null-co://, -device virtio-scsi-device,id=vs0 -device scsi-hd,bus=vs0.0,drive=drv0``
+
+Troubleshooting unavailable tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If there is no path from an available machine to a test then that test will be
+unavailable and won't execute. This can happen if a test or driver did not set
+up its qgraph node correctly. It can also happen if the necessary machine type
+or device is missing from the QEMU binary because it was compiled out or
+otherwise.
+
+It is possible to troubleshoot unavailable tests by running::
+
+  $ QTEST_QEMU_BINARY=build/qemu-system-x86_64 build/tests/qtest/qos-test --verbose
+  # ALL QGRAPH EDGES: {
+  #   src='virtio-net'
+  #      |-> dest='virtio-net-tests/vhost-user/multiqueue' type=2 (node=0x559142109e30)
+  #      |-> dest='virtio-net-tests/vhost-user/migrate' type=2 (node=0x559142109d00)
+  #   src='virtio-net-pci'
+  #      |-> dest='virtio-net' type=1 (node=0x55914210d740)
+  #   src='pci-bus'
+  #      |-> dest='virtio-net-pci' type=2 (node=0x55914210d880)
+  #   src='pci-bus-pc'
+  #      |-> dest='pci-bus' type=1 (node=0x559142103f40)
+  #   src='i440FX-pcihost'
+  #      |-> dest='pci-bus-pc' type=0 (node=0x55914210ac70)
+  #   src='x86_64/pc'
+  #      |-> dest='i440FX-pcihost' type=0 (node=0x5591421117f0)
+  #   src=''
+  #      |-> dest='x86_64/pc' type=0 (node=0x559142111600)
+  #      |-> dest='arm/raspi2b' type=0 (node=0x559142110740)
+  ...
+  # }
+  # ALL QGRAPH NODES: {
+  #   name='virtio-net-tests/announce-self' type=3 cmd_line='(null)' [available]
+  #   name='arm/raspi2b' type=0 cmd_line='-M raspi2b ' [UNAVAILABLE]
+  ...
+  # }
+
+The ``virtio-net-tests/announce-self`` test is listed as "available" in the
+"ALL QGRAPH NODES" output. This means the test will execute. We can follow the
+qgraph path in the "ALL QGRAPH EDGES" output as follows: '' -> 'x86_64/pc' ->
+'i440FX-pcihost' -> 'pci-bus-pc' -> 'pci-bus' -> 'virtio-net-pci' ->
+'virtio-net'. The root of the qgraph is '' and the depth first search begins
+there.
+
+The ``arm/raspi2b`` machine node is listed as "UNAVAILABLE". Although it is
+reachable from the root via '' -> 'arm/raspi2b' the node is unavailable because
+the QEMU binary did not list it when queried by the framework. This is expected
+because we used the ``qemu-system-x86_64`` binary which does not support ARM
+machine types.
+
+If a test is unexpectedly listed as "UNAVAILABLE", first check that the "ALL
+QGRAPH EDGES" output reports edge connectivity from the root ('') to the test.
+If there is no connectivity then the qgraph nodes were not set up correctly and
+the driver or test code is incorrect. If there is connectivity, check the
+availability of each node in the path in the "ALL QGRAPH NODES" output. The
+first unavailable node in the path is the reason why the test is unavailable.
+Typically this is because the QEMU binary lacks support for the necessary
+machine type or device.
+
 Creating a new driver and its interface
-"""""""""""""""""""""""""""""""""""""""""
+---------------------------------------
 
 Here we continue the ``sdhci`` use case, with the following scenario:
 
 - ``sdhci-test`` aims to test the ``read[q,w], writeq`` functions
   offered by the ``sdhci`` drivers.
 - The current ``sdhci`` device is supported by both ``x86_64/pc`` and ``ARM``
-  (in this example we focus on the ``arm-raspi2``) machines.
+  (in this example we focus on the ``arm-raspi2b``) machines.
 - QEMU offers 2 types of drivers: ``QSDHCI_MemoryMapped`` for ``ARM`` and
   ``QSDHCI_PCI`` for ``x86_64/pc``. Both implement the
   ``read[q,w], writeq`` functions.
@@ -122,11 +237,11 @@ In order to implement such scenario in qgraph, the test developer needs to:
   all the pci drivers available)
 
   ``sdhci-pci --consumes--> pci-bus``
-- Create an ``arm/raspi2`` machine node. This machine ``contains``
+- Create an ``arm/raspi2b`` machine node. This machine ``contains``
   a ``generic-sdhci`` memory mapped ``sdhci`` driver node, representing
   ``QSDHCI_MemoryMapped``.
 
-  ``arm/raspi2 --contains--> generic-sdhci``
+  ``arm/raspi2b --contains--> generic-sdhci``
 - Create the ``sdhci`` interface node. This interface offers the
   functions that are shared by all ``sdhci`` devices.
   The interface is produced by ``sdhci-pci`` and ``generic-sdhci``,
@@ -141,7 +256,7 @@ In order to implement such scenario in qgraph, the test developer needs to:
 
   ``sdhci-test --consumes--> sdhci``
 
-``arm-raspi2`` machine, simplified from
+``arm-raspi2b`` machine, simplified from
 ``tests/qtest/libqos/arm-raspi2-machine.c``::
 
     #include "qgraph.h"
@@ -159,7 +274,7 @@ In order to implement such scenario in qgraph, the test developer needs to:
             return &machine->alloc;
         }
 
-        fprintf(stderr, "%s not present in arm/raspi2\n", interface);
+        fprintf(stderr, "%s not present in arm/raspi2b\n", interface);
         g_assert_not_reached();
     }
 
@@ -171,7 +286,7 @@ In order to implement such scenario in qgraph, the test developer needs to:
             return &machine->sdhci.obj;
         }
 
-        fprintf(stderr, "%s not present in arm/raspi2\n", device);
+        fprintf(stderr, "%s not present in arm/raspi2b\n", device);
         g_assert_not_reached();
     }
 
@@ -195,10 +310,10 @@ In order to implement such scenario in qgraph, the test developer needs to:
 
     static void raspi2_register_nodes(void)
     {
-        /* arm/raspi2 --contains--> generic-sdhci */
-        qos_node_create_machine("arm/raspi2",
+        /* arm/raspi2b --contains--> generic-sdhci */
+        qos_node_create_machine("arm/raspi2b",
                                  qos_create_machine_arm_raspi2);
-        qos_node_contains("arm/raspi2", "generic-sdhci", NULL);
+        qos_node_contains("arm/raspi2b", "generic-sdhci", NULL);
     }
 
     libqos_init(raspi2_register_nodes);
@@ -412,7 +527,7 @@ In the above example, all possible types of relations are created::
                                |
                                +--produces-- +
                                              |
-               arm/raspi2 --contains--> generic-sdhci
+               arm/raspi2b --contains--> generic-sdhci
 
 or inverting the consumes edge in consumed_by::
 
@@ -428,10 +543,10 @@ or inverting the consumes edge in consumed_by::
                              |
                              +--produces-- +
                                            |
-            arm/raspi2 --contains--> generic-sdhci
+            arm/raspi2b --contains--> generic-sdhci
 
 Adding a new test
-"""""""""""""""""
+-----------------
 
 Given the above setup, adding a new test is very simple.
 ``sdhci-test``, taken from ``tests/qtest/sdhci-test.c``::
@@ -478,7 +593,7 @@ Final graph will be like this::
                                |
                                +--produces-- +
                                              |
-               arm/raspi2 --contains--> generic-sdhci
+               arm/raspi2b --contains--> generic-sdhci
 
 or inverting the consumes edge in consumed_by::
 
@@ -494,7 +609,7 @@ or inverting the consumes edge in consumed_by::
                              |
                              +--produces-- +
                                            |
-            arm/raspi2 --contains--> generic-sdhci
+            arm/raspi2b --contains--> generic-sdhci
 
 Assuming there the binary is
 ``QTEST_QEMU_BINARY=./qemu-system-x86_64``
@@ -503,66 +618,11 @@ a valid test path will be:
 
 and for the binary ``QTEST_QEMU_BINARY=./qemu-system-arm``:
 
-``/arm/raspi2/generic-sdhci/sdhci/sdhci-test``
+``/arm/raspi2b/generic-sdhci/sdhci/sdhci-test``
 
 Additional examples are also in ``test-qgraph.c``
 
-Command line:
-""""""""""""""
-
-Command line is built by using node names and optional arguments
-passed by the user when building the edges.
-
-There are three types of command line arguments:
-
-- ``in node``      : created from the node name. For example, machines will
-  have ``-M <machine>`` to its command line, while devices
-  ``-device <device>``. It is automatically done by the framework.
-- ``after node``   : added as additional argument to the node name.
-  This argument is added optionally when creating edges,
-  by setting the parameter ``after_cmd_line`` and
-  ``extra_edge_opts`` in ``QOSGraphEdgeOptions``.
-  The framework automatically adds
-  a comma before ``extra_edge_opts``,
-  because it is going to add attributes
-  after the destination node pointed by
-  the edge containing these options, and automatically
-  adds a space before ``after_cmd_line``, because it
-  adds an additional device, not an attribute.
-- ``before node``  : added as additional argument to the node name.
-  This argument is added optionally when creating edges,
-  by setting the parameter ``before_cmd_line`` in
-  ``QOSGraphEdgeOptions``. This attribute
-  is going to add attributes before the destination node
-  pointed by the edge containing these options. It is
-  helpful to commands that are not node-representable,
-  such as ``-fdsev`` or ``-netdev``.
-
-While adding command line in edges is always used, not all nodes names are
-used in every path walk: this is because the contained or produced ones
-are already added by QEMU, so only nodes that "consumes" will be used to
-build the command line. Also, nodes that will have ``{ "abstract" : true }``
-as QMP attribute will loose their command line, since they are not proper
-devices to be added in QEMU.
-
-Example::
-
-    QOSGraphEdgeOptions opts = {
-        .before_cmd_line = "-drive id=drv0,if=none,file=null-co://,"
-                           "file.read-zeroes=on,format=raw",
-        .after_cmd_line = "-device scsi-hd,bus=vs0.0,drive=drv0",
-
-        opts.extra_device_opts = "id=vs0";
-    };
-
-    qos_node_create_driver("virtio-scsi-device",
-                            virtio_scsi_device_create);
-    qos_node_consumes("virtio-scsi-device", "virtio-bus", &opts);
-
-Will produce the following command line:
-``-drive id=drv0,if=none,file=null-co://, -device virtio-scsi-device,id=vs0 -device scsi-hd,bus=vs0.0,drive=drv0``
-
 Qgraph API reference
-^^^^^^^^^^^^^^^^^^^^
+--------------------
 
 .. kernel-doc:: tests/qtest/libqos/qgraph.h
diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst
index 42d0dc4f4da..e5fe3597cd8 100644
--- a/docs/devel/qom.rst
+++ b/docs/devel/qom.rst
@@ -87,6 +87,14 @@ specific type:
    #define MY_DEVICE(obj) \
       OBJECT_CHECK(MyDevice, obj, TYPE_MY_DEVICE)
 
+In case the ObjectClass implementation can be built as module a
+module_obj() line must be added to make sure qemu loads the module
+when the object is needed.
+
+.. code-block:: c
+
+   module_obj(TYPE_MY_DEVICE);
+
 Class Initialization
 ====================
 
diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst
index cbfc8af67e6..0454cc527e1 100644
--- a/docs/devel/secure-coding-practices.rst
+++ b/docs/devel/secure-coding-practices.rst
@@ -104,3 +104,12 @@ structures and only process the local copy.  This prevents
 time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to
 crash when a vCPU thread modifies guest RAM while device emulation is
 processing it.
+
+Use of null-co block drivers
+----------------------------
+
+The ``null-co`` block driver is designed for performance: its read accesses are
+not initialized by default. In case this driver has to be used for security
+research, it must be used with the ``read-zeroes=on`` option which fills read
+buffers with zeroes. Security issues reported with the default
+(``read-zeroes=off``) will be discarded.
diff --git a/docs/devel/stable-process.rst b/docs/devel/stable-process.rst
index e541b983fac..c21fb86645a 100644
--- a/docs/devel/stable-process.rst
+++ b/docs/devel/stable-process.rst
@@ -1,3 +1,5 @@
+.. _stable-process:
+
 QEMU and the stable process
 ===========================
 
diff --git a/docs/devel/style.rst b/docs/devel/style.rst
index 260e3263fa0..9c5c0fffd98 100644
--- a/docs/devel/style.rst
+++ b/docs/devel/style.rst
@@ -1,3 +1,5 @@
+.. _coding-style:
+
 =================
 QEMU Coding Style
 =================
@@ -686,7 +688,7 @@ Rationale: hex numbers are hard to read in logs when there is no 0x prefix,
 especially when (occasionally) the representation doesn't contain any letters
 and especially in one line with other decimal numbers. Number groups are allowed
 to not use '0x' because for some things notations like %x.%x.%x are used not
-only in Qemu. Also dumping raw data bytes with '0x' is less readable.
+only in QEMU. Also dumping raw data bytes with '0x' is less readable.
 
 '#' printf flag
 ---------------
diff --git a/docs/devel/submitting-a-patch.rst b/docs/devel/submitting-a-patch.rst
new file mode 100644
index 00000000000..e51259eb9ca
--- /dev/null
+++ b/docs/devel/submitting-a-patch.rst
@@ -0,0 +1,562 @@
+.. _submitting-a-patch:
+
+Submitting a Patch
+==================
+
+QEMU welcomes contributions of code (either fixing bugs or adding new
+functionality). However, we get a lot of patches, and so we have some
+guidelines about submitting patches. If you follow these, you'll help
+make our task of code review easier and your patch is likely to be
+committed faster.
+
+This page seems very long, so if you are only trying to post a quick
+one-shot fix, the bare minimum we ask is that:
+
+-  You **must** provide a Signed-off-by: line (this is a hard
+   requirement because it's how you say "I'm legally okay to contribute
+   this and happy for it to go into QEMU", modeled after the `Linux kernel
+   <http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297>`__
+   policy.) ``git commit -s`` or ``git format-patch -s`` will add one.
+-  All contributions to QEMU must be **sent as patches** to the
+   qemu-devel `mailing list <MailingLists>`__. Patch contributions
+   should not be posted on the bug tracker, posted on forums, or
+   externally hosted and linked to. (We have other mailing lists too,
+   but all patches must go to qemu-devel, possibly with a Cc: to another
+   list.) ``git send-email`` (`step-by-step setup
+   guide <https://git-send-email.io/>`__ and `hints and
+   tips <https://elixir.bootlin.com/linux/latest/source/Documentation/process/email-clients.rst>`__)
+   works best for delivering the patch without mangling it, but
+   attachments can be used as a last resort on a first-time submission.
+-  You must read replies to your message, and be willing to act on them.
+   Note, however, that maintainers are often willing to manually fix up
+   first-time contributions, since there is a learning curve involved in
+   making an ideal patch submission.
+
+You do not have to subscribe to post (list policy is to reply-to-all to
+preserve CCs and keep non-subscribers in the loop on the threads they
+start), although you may find it easier as a subscriber to pick up good
+ideas from other posts. If you do subscribe, be prepared for a high
+volume of email, often over one thousand messages in a week. The list is
+moderated; first-time posts from an email address (whether or not you
+subscribed) may be subject to some delay while waiting for a moderator
+to whitelist your address.
+
+The larger your contribution is, or if you plan on becoming a long-term
+contributor, then the more important the rest of this page becomes.
+Reading the table of contents below should already give you an idea of
+the basic requirements. Use the table of contents as a reference, and
+read the parts that you have doubts about.
+
+.. contents:: Table of Contents
+
+.. _writing_your_patches:
+
+Writing your Patches
+--------------------
+
+.. _use_the_qemu_coding_style:
+
+Use the QEMU coding style
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can run run *scripts/checkpatch.pl <patchfile>* before submitting to
+check that you are in compliance with our coding standards. Be aware
+that ``checkpatch.pl`` is not infallible, though, especially where C
+preprocessor macros are involved; use some common sense too. See also:
+
+-  :ref:`coding-style`
+-  `Automate a checkpatch run on
+   commit <https://blog.vmsplice.net/2011/03/how-to-automatically-run-checkpatchpl.html>`__
+
+.. _base_patches_against_current_git_master:
+
+Base patches against current git master
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There's no point submitting a patch which is based on a released version
+of QEMU because development will have moved on from then and it probably
+won't even apply to master. We only apply selected bugfixes to release
+branches and then only as backports once the code has gone into master.
+
+It is also okay to base patches on top of other on-going work that is
+not yet part of the git master branch. To aid continuous integration
+tools, such as `patchew <http://patchew.org/QEMU/>`__, you should `add a
+tag <https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg01288.html>`__
+line ``Based-on: $MESSAGE_ID`` to your cover letter to make the series
+dependency obvious.
+
+.. _split_up_long_patches:
+
+Split up long patches
+~~~~~~~~~~~~~~~~~~~~~
+
+Split up longer patches into a patch series of logical code changes.
+Each change should compile and execute successfully. For instance, don't
+add a file to the makefile in patch one and then add the file itself in
+patch two. (This rule is here so that people can later use tools like
+`git bisect <http://git-scm.com/docs/git-bisect>`__ without hitting
+points in the commit history where QEMU doesn't work for reasons
+unrelated to the bug they're chasing.) Put documentation first, not
+last, so that someone reading the series can do a clean-room evaluation
+of the documentation, then validate that the code matched the
+documentation. A commit message that mentions "Also, ..." is often a
+good candidate for splitting into multiple patches. For more thoughts on
+properly splitting patches and writing good commit messages, see `this
+advice from
+OpenStack <https://wiki.openstack.org/wiki/GitCommitMessages>`__.
+
+.. _make_code_motion_patches_easy_to_review:
+
+Make code motion patches easy to review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a series requires large blocks of code motion, there are tricks for
+making the refactoring easier to review. Split up the series so that
+semantic changes (or even function renames) are done in a separate patch
+from the raw code motion. Use a one-time setup of ``git config
+diff.renames true;`` ``git config diff.algorithm patience`` (refer to
+`git-config <http://git-scm.com/docs/git-config>`__). The 'diff.renames'
+property ensures file rename patches will be given in a more compact
+representation that focuses only on the differences across the file
+rename, instead of showing the entire old file as a deletion and the new
+file as an insertion. Meanwhile, the 'diff.algorithm' property ensures
+that extracting a non-contiguous subset of one file into a new file, but
+where all extracted parts occur in the same order both before and after
+the patch, will reduce churn in trying to treat unrelated ``}`` lines in
+the original file as separating hunks of changes.
+
+Ideally, a code motion patch can be reviewed by doing::
+
+    git format-patch --stdout -1 > patch;
+    diff -u <(sed -n 's/^-//p' patch) <(sed -n 's/^\+//p' patch)
+
+to focus on the few changes that weren't wholesale code motion.
+
+.. _dont_include_irrelevant_changes:
+
+Don't include irrelevant changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In particular, don't include formatting, coding style or whitespace
+changes to bits of code that would otherwise not be touched by the
+patch. (It's OK to fix coding style issues in the immediate area (few
+lines) of the lines you're changing.) If you think a section of code
+really does need a reindent or other large-scale style fix, submit this
+as a separate patch which makes no semantic changes; don't put it in the
+same patch as your bug fix.
+
+For smaller patches in less frequently changed areas of QEMU, consider
+using the :ref:`trivial-patches` process.
+
+.. _write_a_meaningful_commit_message:
+
+Write a meaningful commit message
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Commit messages should be meaningful and should stand on their own as a
+historical record of why the changes you applied were necessary or
+useful.
+
+QEMU follows the usual standard for git commit messages: the first line
+(which becomes the email subject line) is "subsystem: single line
+summary of change". Whether the "single line summary of change" starts
+with a capital is a matter of taste, but we prefer that the summary does
+not end in a dot. Look at ``git shortlog -30`` for an idea of sample
+subject lines. Then there is a blank line and a more detailed
+description of the patch, another blank and your Signed-off-by: line.
+Please do not use lines that are longer than 76 characters in your
+commit message (so that the text still shows up nicely with "git show"
+in a 80-columns terminal window).
+
+The body of the commit message is a good place to document why your
+change is important. Don't include comments like "This is a suggestion
+for fixing this bug" (they can go below the ``---`` line in the email so
+they don't go into the final commit message). Make sure the body of the
+commit message can be read in isolation even if the reader's mailer
+displays the subject line some distance apart (that is, a body that
+starts with "... so that" as a continuation of the subject line is
+harder to follow).
+
+If your patch fixes a commit that is already in the repository, please
+add an additional line with "Fixes: <at-least-12-digits-of-SHA-commit-id>
+("Fixed commit subject")" below the patch description / before your
+"Signed-off-by:" line in the commit message.
+
+If your patch fixes a bug in the gitlab bug tracker, please add a line
+with "Resolves: <URL-of-the-bug>" to the commit message, too. Gitlab can
+close bugs automatically once commits with the "Resolved:" keyword get
+merged into the master branch of the project. And if your patch addresses
+a bug in another public bug tracker, you can also use a line with
+"Buglink: <URL-of-the-bug>" for reference here, too.
+
+Example::
+
+ Fixes: 14055ce53c2d ("s390x/tcg: avoid overflows in time2tod/tod2time")
+ Resolves: https://gitlab.com/qemu-project/qemu/-/issues/42
+ Buglink: https://bugs.launchpad.net/qemu/+bug/1804323``
+
+Some other tags that are used in commit messages include "Message-Id:"
+"Tested-by:", "Acked-by:", "Reported-by:", "Suggested-by:".  See ``git
+log`` for these keywords for example usage.
+
+.. _test_your_patches:
+
+Test your patches
+~~~~~~~~~~~~~~~~~
+
+Although QEMU has `continuous integration
+services <Testing#Continuous_Integration>`__ that attempt to test
+patches submitted to the list, it still saves everyone time if you have
+already tested that your patch compiles and works. Because QEMU is such
+a large project, it's okay to use configure arguments to limit what is
+built for faster turnaround during your development time; but it is
+still wise to also check that your patches work with a full build before
+submitting a series, especially if your changes might have an unintended
+effect on other areas of the code you don't normally experiment with.
+See `Testing <Testing>`__ for more details on what tests are available.
+Also, it is a wise idea to include a testsuite addition as part of your
+patches - either to ensure that future changes won't regress your new
+feature, or to add a test which exposes the bug that the rest of your
+series fixes. Keeping separate commits for the test and the fix allows
+reviewers to rebase the test to occur first to prove it catches the
+problem, then again to place it last in the series so that bisection
+doesn't land on a known-broken state.
+
+.. _submitting_your_patches:
+
+Submitting your Patches
+-----------------------
+
+.. _if_you_cannot_send_patch_emails:
+
+If you cannot send patch emails
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In rare cases it may not be possible to send properly formatted patch
+emails. You can use `sourcehut <https://sourcehut.org/>`__ to send your
+patches to the QEMU mailing list by following these steps:
+
+#. Register or sign in to your account
+#. Add your SSH public key in `meta \|
+   keys <https://meta.sr.ht/keys>`__.
+#. Publish your git branch using **git push git@git.sr.ht:~USERNAME/qemu
+   HEAD**
+#. Send your patches to the QEMU mailing list using the web-based
+   ``git-send-email`` UI at https://git.sr.ht/~USERNAME/qemu/send-email
+
+`This video
+<https://spacepub.space/videos/watch/ad258d23-0ac6-488c-83fc-2bacf578de3a>`__
+shows the web-based ``git-send-email`` workflow. Documentation is
+available `here
+<https://man.sr.ht/git.sr.ht/#sending-patches-upstream>`__.
+
+.. _cc_the_relevant_maintainer:
+
+CC the relevant maintainer
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send patches both to the mailing list and CC the maintainer(s) of the
+files you are modifying. look in the MAINTAINERS file to find out who
+that is. Also try using scripts/get_maintainer.pl from the repository
+for learning the most common committers for the files you touched.
+
+Example::
+
+    ~/src/qemu/scripts/get_maintainer.pl -f hw/ide/core.c
+
+In fact, you can automate this, via a one-time setup of ``git config
+sendemail.cccmd 'scripts/get_maintainer.pl --nogit-fallback'`` (Refer to
+`git-config <http://git-scm.com/docs/git-config>`__.)
+
+.. _do_not_send_as_an_attachment:
+
+Do not send as an attachment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Send patches inline so they are easy to reply to with review comments.
+Do not put patches in attachments.
+
+.. _use_git_format_patch:
+
+Use ``git format-patch``
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the right diff format.
+`git format-patch <http://git-scm.com/docs/git-format-patch>`__ will
+produce patch emails in the right format (check the documentation to
+find out how to drive it). You can then edit the cover letter before
+using ``git send-email`` to mail the files to the mailing list. (We
+recommend `git send-email <http://git-scm.com/docs/git-send-email>`__
+because mail clients often mangle patches by wrapping long lines or
+messing up whitespace. Some distributions do not include send-email in a
+default install of git; you may need to download additional packages,
+such as 'git-email' on Fedora-based systems.) Patch series need a cover
+letter, with shallow threading (all patches in the series are
+in-reply-to the cover letter, but not to each other); single unrelated
+patches do not need a cover letter (but if you do send a cover letter,
+use ``--numbered`` so the cover and the patch have distinct subject lines).
+Patches are easier to find if they start a new top-level thread, rather
+than being buried in-reply-to another existing thread.
+
+.. _avoid_posting_large_binary_blob:
+
+Avoid posting large binary blob
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you added binaries to the repository, consider producing the patch
+emails using ``git format-patch --no-binary`` and include a link to a
+git repository to fetch the original commit.
+
+.. _patch_emails_must_include_a_signed_off_by_line:
+
+Patch emails must include a ``Signed-off-by:`` line
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For more information see `SubmittingPatches 1.12
+<http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297>`__.
+This is vital or we will not be able to apply your patch! Please use
+your real name to sign a patch (not an alias or acronym).
+
+If you wrote the patch, make sure your "From:" and "Signed-off-by:"
+lines use the same spelling. It's okay if you subscribe or contribute to
+the list via more than one address, but using multiple addresses in one
+commit just confuses things. If someone else wrote the patch, git will
+include a "From:" line in the body of the email (different from your
+envelope From:) that will give credit to the correct author; but again,
+that author's Signed-off-by: line is mandatory, with the same spelling.
+
+.. _include_a_meaningful_cover_letter:
+
+Include a meaningful cover letter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This is a requirement for any series with multiple patches (as it aids
+continuous integration), but optional for an isolated patch. The cover
+letter explains the overall goal of such a series, and also provides a
+convenient 0/N email for others to reply to the series as a whole. A
+one-time setup of ``git config format.coverletter auto`` (refer to
+`git-config <http://git-scm.com/docs/git-config>`__) will generate the
+cover letter as needed.
+
+When reviewers don't know your goal at the start of their review, they
+may object to early changes that don't make sense until the end of the
+series, because they do not have enough context yet at that point of
+their review. A series where the goal is unclear also risks a higher
+number of review-fix cycles because the reviewers haven't bought into
+the idea yet. If the cover letter can explain these points to the
+reviewer, the process will be smoother patches will get merged faster.
+Make sure your cover letter includes a diffstat of changes made over the
+entire series; potential reviewers know what files they are interested
+in, and they need an easy way determine if your series touches them.
+
+.. _use_the_rfc_tag_if_needed:
+
+Use the RFC tag if needed
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For example, "[PATCH RFC v2]". ``git format-patch --subject-prefix=RFC``
+can help.
+
+"RFC" means "Request For Comments" and is a statement that you don't
+intend for your patchset to be applied to master, but would like some
+review on it anyway. Reasons for doing this include:
+
+-  the patch depends on some pending kernel changes which haven't yet
+   been accepted, so the QEMU patch series is blocked until that
+   dependency has been dealt with, but is worth reviewing anyway
+-  the patch set is not finished yet (perhaps it doesn't cover all use
+   cases or work with all targets) but you want early review of a major
+   API change or design structure before continuing
+
+In general, since it's asking other people to do review work on a
+patchset that the submitter themselves is saying shouldn't be applied,
+it's best to:
+
+-  use it sparingly
+-  in the cover letter, be clear about why a patch is an RFC, what areas
+   of the patchset you're looking for review on, and why reviewers
+   should care
+
+.. _consider_whether_your_patch_is_applicable_for_stable:
+
+Consider whether your patch is applicable for stable
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If your patch fixes a severe issue or a regression, it may be applicable
+for stable. In that case, consider adding ``Cc: qemu-stable@nongnu.org``
+to your patch to notify the stable maintainers.
+
+For more details on how QEMU's stable process works, refer to the
+:ref:`stable-process` page.
+
+.. _participating_in_code_review:
+
+Participating in Code Review
+----------------------------
+
+All patches submitted to the QEMU project go through a code review
+process before they are accepted. Some areas of code that are well
+maintained may review patches quickly, lesser-loved areas of code may
+have a longer delay.
+
+.. _stay_around_to_fix_problems_raised_in_code_review:
+
+Stay around to fix problems raised in code review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Not many patches get into QEMU straight away -- it is quite common that
+developers will identify bugs, or suggest a cleaner approach, or even
+just point out code style issues or commit message typos. You'll need to
+respond to these, and then send a second version of your patches with
+the issues fixed. This takes a little time and effort on your part, but
+if you don't do it then your changes will never get into QEMU. It's also
+just polite -- it is quite disheartening for a developer to spend time
+reviewing your code and suggesting improvements, only to find that
+you're not going to do anything further and it was all wasted effort.
+
+When replying to comments on your patches **reply to all and not just
+the sender** -- keeping discussion on the mailing list means everybody
+can follow it.
+
+.. _pay_attention_to_review_comments:
+
+Pay attention to review comments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Someone took their time to review your work, and it pays to respect that
+effort; repeatedly submitting a series without addressing all comments
+from the previous round tends to alienate reviewers and stall your
+patch. Reviewers aren't always perfect, so it is okay if you want to
+argue that your code was correct in the first place instead of blindly
+doing everything the reviewer asked. On the other hand, if someone
+pointed out a potential issue during review, then even if your code
+turns out to be correct, it's probably a sign that you should improve
+your commit message and/or comments in the code explaining why the code
+is correct.
+
+If you fix issues that are raised during review **resend the entire
+patch series** not just the one patch that was changed. This allows
+maintainers to easily apply the fixed series without having to manually
+identify which patches are relevant. Send the new version as a complete
+fresh email or series of emails -- don't try to make it a followup to
+version 1. (This helps automatic patch email handling tools distinguish
+between v1 and v2 emails.)
+
+.. _when_resending_patches_add_a_version_tag:
+
+When resending patches add a version tag
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All patches beyond the first version should include a version tag -- for
+example, "[PATCH v2]". This means people can easily identify whether
+they're looking at the most recent version. (The first version of a
+patch need not say "v1", just [PATCH] is sufficient.) For patch series,
+the version applies to the whole series -- even if you only change one
+patch, you resend the entire series and mark it as "v2". Don't try to
+track versions of different patches in the series separately.  `git
+format-patch <http://git-scm.com/docs/git-format-patch>`__ and `git
+send-email <http://git-scm.com/docs/git-send-email>`__ both understand
+the ``-v2`` option to make this easier. Send each new revision as a new
+top-level thread, rather than burying it in-reply-to an earlier
+revision, as many reviewers are not looking inside deep threads for new
+patches.
+
+.. _include_version_history_in_patchset_revisions:
+
+Include version history in patchset revisions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For later versions of patches, include a summary of changes from
+previous versions, but not in the commit message itself. In an email
+formatted as a git patch, the commit message is the part above the ``---``
+line, and this will go into the git changelog when the patch is
+committed. This part should be a self-contained description of what this
+version of the patch does, written to make sense to anybody who comes
+back to look at this commit in git in six months' time. The part below
+the ``---`` line and above the patch proper (git format-patch puts the
+diffstat here) is a good place to put remarks for people reading the
+patch email, and this is where the "changes since previous version"
+summary belongs. The `git-publish
+<https://github.com/stefanha/git-publish>`__ script can help with
+tracking a good summary across versions. Also, the `git-backport-diff
+<https://github.com/codyprime/git-scripts>`__ script can help focus
+reviewers on what changed between revisions.
+
+.. _tips_and_tricks:
+
+Tips and Tricks
+---------------
+
+.. _proper_use_of_reviewed_by_tags_can_aid_review:
+
+Proper use of Reviewed-by: tags can aid review
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When reviewing a large series, a reviewer can reply to some of the
+patches with a Reviewed-by tag, stating that they are happy with that
+patch in isolation (sometimes conditional on minor cleanup, like fixing
+whitespace, that doesn't affect code content). You should then update
+those commit messages by hand to include the Reviewed-by tag, so that in
+the next revision, reviewers can spot which patches were already clean
+from the previous round. Conversely, if you significantly modify a patch
+that was previously reviewed, remove the reviewed-by tag out of the
+commit message, as well as listing the changes from the previous
+version, to make it easier to focus a reviewer's attention to your
+changes.
+
+.. _if_your_patch_seems_to_have_been_ignored:
+
+If your patch seems to have been ignored
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If your patchset has received no replies you should "ping" it after a
+week or two, by sending an email as a reply-to-all to the patch mail,
+including the word "ping" and ideally also a link to the page for the
+patch on `patchew <https://patchew.org/QEMU/>`__ or
+`lore.kernel.org <https://lore.kernel.org/qemu-devel/>`__. It's worth
+double-checking for reasons why your patch might have been ignored
+(forgot to CC the maintainer? annoyed people by failing to respond to
+review comments on an earlier version?), but often for less-maintained
+areas of QEMU patches do just slip through the cracks. If your ping is
+also ignored, ping again after another week or so. As the submitter, you
+are the person with the most motivation to get your patch applied, so
+you have to be persistent.
+
+.. _is_my_patch_in:
+
+Is my patch in?
+~~~~~~~~~~~~~~~
+
+QEMU has some Continuous Integration machines that try to catch patch
+submission problems as soon as possible.  `patchew
+<http://patchew.org/QEMU/>`__ includes a web interface for tracking the
+status of various threads that have been posted to the list, and may
+send you an automated mail if it detected a problem with your patch.
+
+Once your patch has had enough review on list, the maintainer for that
+area of code will send notification to the list that they are including
+your patch in a particular staging branch. Periodically, the maintainer
+then takes care of :ref:`submitting-a-pull-request`
+for aggregating topic branches into mainline QEMU. Generally, you do not
+need to send a pull request unless you have contributed enough patches
+to become a maintainer over a particular section of code. Maintainers
+may further modify your commit, by resolving simple merge conflicts or
+fixing minor typos pointed out during review, but will always add a
+Signed-off-by line in addition to yours, indicating that it went through
+their tree. Occasionally, the maintainer's pull request may hit more
+difficult merge conflicts, where you may be requested to help rebase and
+resolve the problems. It may take a couple of weeks between when your
+patch first had a positive review to when it finally lands in qemu.git;
+release cycle freezes may extend that time even longer.
+
+.. _return_the_favor:
+
+Return the favor
+~~~~~~~~~~~~~~~~
+
+Peer review only works if everyone chips in a bit of review time. If
+everyone submitted more patches than they reviewed, we would have a
+patch backlog. A good goal is to try to review at least as many patches
+from others as what you submit. Don't worry if you don't know the code
+base as well as a maintainer; it's perfectly fine to admit when your
+review is weak because you are unfamiliar with the code.
diff --git a/docs/devel/submitting-a-pull-request.rst b/docs/devel/submitting-a-pull-request.rst
new file mode 100644
index 00000000000..c9d1e8afd91
--- /dev/null
+++ b/docs/devel/submitting-a-pull-request.rst
@@ -0,0 +1,77 @@
+.. _submitting-a-pull-request:
+
+Submitting a Pull Request
+=========================
+
+QEMU welcomes contributions of code, but we generally expect these to be
+sent as simple patch emails to the mailing list (see our page on
+:ref:`submitting-a-patch`
+for more details).  Generally only existing submaintainers of a tree
+will need to submit pull requests, although occasionally for a large
+patch series we might ask a submitter to send a pull request. This page
+documents our recommendations on pull requests for those people.
+
+A good rule of thumb is not to send a pull request unless somebody asks
+you to.
+
+**Resend the patches with the pull request** as emails which are
+threaded as follow-ups to the pull request itself. The simplest way to
+do this is to use ``git format-patch --cover-letter`` to create the
+emails, and then edit the cover letter to include the pull request
+details that ``git request-pull`` outputs.
+
+**Use PULL as the subject line tag** in both the cover letter and the
+retransmitted patch mails (for example, by using
+``--subject-prefix=PULL`` in your ``git format-patch`` command). This
+helps people to filter in or out the resulting emails (especially useful
+if they are only CC'd on one email out of the set).
+
+**Each patch must have your own Signed-off-by: line** as well as that of
+the original author if the patch was not written by you. This is because
+with a pull request you're now indicating that the patch has passed via
+you rather than directly from the original author.
+
+**Don't forget to add Reviewed-by: and Acked-by: lines**. When other
+people have reviewed the patches you're putting in the pull request,
+make sure you've copied their signoffs across. (If you use the `patches
+tool <https://github.com/stefanha/patches>`__ to add patches from email
+directly to your git repo it will include the tags automatically; if
+you're updating patches manually or in some other way you'll need to
+edit the commit messages by hand.)
+
+**Don't send pull requests for code that hasn't passed review**. A pull
+request says these patches are ready to go into QEMU now, so they must
+have passed the standard code review processes. In particular if you've
+corrected issues in one round of code review, you need to send your
+fixed patch series as normal to the list; you can't put it in a pull
+request until it's gone through. (Extremely trivial fixes may be OK to
+just fix in passing, but if in doubt err on the side of not.)
+
+**Test before sending**. This is an obvious thing to say, but make sure
+everything builds (including that it compiles at each step of the patch
+series) and that "make check" passes before sending out the pull
+request. As a submaintainer you're one of QEMU's lines of defense
+against bad code, so double check the details.
+
+**All pull requests must be signed**. If your key is not already signed
+by members of the QEMU community, you should make arrangements to attend
+a `KeySigningParty <https://wiki.qemu.org/KeySigningParty>`__ (for
+example at KVM Forum) or make alternative arrangements to have your key
+signed by an attendee.  Key signing requires meeting another community
+member \*in person\* so please make appropriate arrangements.  By
+"signed" here we mean that the pullreq email should quote a tag which is
+a GPG-signed tag (as created with 'gpg tag -s ...').
+
+**Pull requests not for master should say "not for master" and have
+"PULL SUBSYSTEM whatever" in the subject tag**. If your pull request is
+targeting a stable branch or some submaintainer tree, please include the
+string "not for master" in the cover letter email, and make sure the
+subject tag is "PULL SUBSYSTEM s390/block/whatever" rather than just
+"PULL". This allows it to be automatically filtered out of the set of
+pull requests that should be applied to master.
+
+You might be interested in the `make-pullreq
+<https://git.linaro.org/people/peter.maydell/misc-scripts.git/tree/make-pullreq>`__
+script which automates some of this process for you and includes a few
+sanity checks. Note that you must edit it to configure it suitably for
+your local situation!
diff --git a/docs/devel/tcg-icount.rst b/docs/devel/tcg-icount.rst
index 8d67b6c076a..50c8e8dabc1 100644
--- a/docs/devel/tcg-icount.rst
+++ b/docs/devel/tcg-icount.rst
@@ -92,6 +92,3 @@ When the translator is handling an instruction of this kind:
     }
 
 * it must end the TB immediately after this instruction
-
-Note that some older front-ends call a "gen_io_end()" function:
-this is obsolete and should not be used.
diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 18c6581d85c..f93ef4fe52a 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -3,7 +3,6 @@
    Copyright (c) 2019, Linaro Limited
    Written by Emilio Cota and Alex Bennée
 
-================
 QEMU TCG Plugins
 ================
 
@@ -16,8 +15,30 @@ only monitor it passively. However they can do this down to an
 individual instruction granularity including potentially subscribing
 to all load and store operations.
 
-API Stability
-=============
+Usage
+-----
+
+Any QEMU binary with TCG support has plugins enabled by default.
+Earlier releases needed to be explicitly enabled with::
+
+  configure --enable-plugins
+
+Once built a program can be run with multiple plugins loaded each with
+their own arguments::
+
+  $QEMU $OTHER_QEMU_ARGS \
+      -plugin tests/plugin/libhowvec.so,inline=on,count=hint \
+      -plugin tests/plugin/libhotblocks.so
+
+Arguments are plugin specific and can be used to modify their
+behaviour. In this case the howvec plugin is being asked to use inline
+ops to count and break down the hint instructions by type.
+
+Writing plugins
+---------------
+
+API versioning
+~~~~~~~~~~~~~~
 
 This is a new feature for QEMU and it does allow people to develop
 out-of-tree plugins that can be dynamically linked into a running QEMU
@@ -25,36 +46,23 @@ process. However the project reserves the right to change or break the
 API should it need to do so. The best way to avoid this is to submit
 your plugin upstream so they can be updated if/when the API changes.
 
-API versioning
---------------
-
 All plugins need to declare a symbol which exports the plugin API
 version they were built against. This can be done simply by::
 
   QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
 The core code will refuse to load a plugin that doesn't export a
-`qemu_plugin_version` symbol or if plugin version is outside of QEMU's
+``qemu_plugin_version`` symbol or if plugin version is outside of QEMU's
 supported range of API versions.
 
-Additionally the `qemu_info_t` structure which is passed to the
-`qemu_plugin_install` method of a plugin will detail the minimum and
+Additionally the ``qemu_info_t`` structure which is passed to the
+``qemu_plugin_install`` method of a plugin will detail the minimum and
 current API versions supported by QEMU. The API version will be
 incremented if new APIs are added. The minimum API version will be
 incremented if existing APIs are changed or removed.
 
-Exposure of QEMU internals
---------------------------
-
-The plugin architecture actively avoids leaking implementation details
-about how QEMU's translation works to the plugins. While there are
-conceptions such as translation time and translation blocks the
-details are opaque to plugins. The plugin is able to query select
-details of instructions and system configuration only through the
-exported *qemu_plugin* functions.
-
-Query Handle Lifetime
----------------------
+Lifetime of the query handle
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Each callback provides an opaque anonymous information handle which
 can usually be further queried to find out information about a
@@ -63,31 +71,8 @@ valid during the lifetime of the callback so it is important that any
 information that is needed is extracted during the callback and saved
 by the plugin.
 
-API
-===
-
-.. kernel-doc:: include/qemu/qemu-plugin.h
-
-Usage
-=====
-
-The QEMU binary needs to be compiled for plugin support::
-
-  configure --enable-plugins
-
-Once built a program can be run with multiple plugins loaded each with
-their own arguments::
-
-  $QEMU $OTHER_QEMU_ARGS \
-      -plugin tests/plugin/libhowvec.so,arg=inline,arg=hint \
-      -plugin tests/plugin/libhotblocks.so
-
-Arguments are plugin specific and can be used to modify their
-behaviour. In this case the howvec plugin is being asked to use inline
-ops to count and break down the hint instructions by type.
-
-Plugin Life cycle
-=================
+Plugin life cycle
+~~~~~~~~~~~~~~~~~
 
 First the plugin is loaded and the public qemu_plugin_install function
 is called. The plugin will then register callbacks for various plugin
@@ -110,11 +95,26 @@ callback which can then ensure atomicity itself.
 Finally when QEMU exits all the registered *atexit* callbacks are
 invoked.
 
+Exposure of QEMU internals
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The plugin architecture actively avoids leaking implementation details
+about how QEMU's translation works to the plugins. While there are
+conceptions such as translation time and translation blocks the
+details are opaque to plugins. The plugin is able to query select
+details of instructions and system configuration only through the
+exported *qemu_plugin* functions.
+
+API
+~~~
+
+.. kernel-doc:: include/qemu/qemu-plugin.h
+
 Internals
-=========
+---------
 
 Locking
--------
+~~~~~~~
 
 We have to ensure we cannot deadlock, particularly under MTTCG. For
 this we acquire a lock when called from plugin code. We also keep the
@@ -141,16 +141,16 @@ requested. The plugin isn't completely uninstalled until the safe work
 has executed while all vCPUs are quiescent.
 
 Example Plugins
-===============
+---------------
 
 There are a number of plugins included with QEMU and you are
 encouraged to contribute your own plugins plugins upstream. There is a
-`contrib/plugins` directory where they can go.
+``contrib/plugins`` directory where they can go.
 
 - tests/plugins
 
 These are some basic plugins that are used to test and exercise the
-API during the `make check-tcg` target.
+API during the ``make check-tcg`` target.
 
 - contrib/plugins/hotblocks.c
 
@@ -162,7 +162,7 @@ with linux-user execution as system emulation tends to generate
 re-translations as blocks from different programs get swapped in and
 out of system memory.
 
-If your program is single-threaded you can use the `inline` option for
+If your program is single-threaded you can use the ``inline`` option for
 slightly faster (but not thread safe) counters.
 
 Example::
@@ -192,17 +192,32 @@ Similar to hotblocks but this time tracks memory accesses::
   0x0000000048b000, 0x0001, 130594, 0x0001, 355
   0x0000000048a000, 0x0001, 1826, 0x0001, 11
 
+The hotpages plugin can be configured using the following arguments:
+
+  * sortby=reads|writes|address
+
+  Log the data sorted by either the number of reads, the number of writes, or
+  memory address. (Default: entries are sorted by the sum of reads and writes)
+
+  * io=on
+
+  Track IO addresses. Only relevant to full system emulation. (Default: off)
+
+  * pagesize=N
+
+  The page size used. (Default: N = 4096)
+
 - contrib/plugins/howvec.c
 
 This is an instruction classifier so can be used to count different
 types of instructions. It has a number of options to refine which get
-counted. You can give an argument for a class of instructions to break
-it down fully, so for example to see all the system registers
-accesses::
+counted. You can give a value to the ``count`` argument for a class of
+instructions to break it down fully, so for example to see all the system
+registers accesses::
 
   ./aarch64-softmmu/qemu-system-aarch64 $(QEMU_ARGS) \
     -append "root=/dev/sda2 systemd.unit=benchmark.service" \
-    -smp 4 -plugin ./contrib/plugins/libhowvec.so,arg=sreg -d plugin
+    -smp 4 -plugin ./contrib/plugins/libhowvec.so,count=sreg -d plugin
 
 which will lead to a sorted list after the class breakdown::
 
@@ -250,7 +265,7 @@ which will lead to a sorted list after the class breakdown::
   ...
 
 To find the argument shorthand for the class you need to examine the
-source code of the plugin at the moment, specifically the `*opt`
+source code of the plugin at the moment, specifically the ``*opt``
 argument in the InsnClassExecCount tables.
 
 - contrib/plugins/lockstep.c
@@ -270,7 +285,7 @@ communicate over::
 
   ./sparc-softmmu/qemu-system-sparc -monitor none -parallel none \
     -net none -M SS-20 -m 256 -kernel day11/zImage.elf \
-    -plugin ./contrib/plugins/liblockstep.so,arg=lockstep-sparc.sock \
+    -plugin ./contrib/plugins/liblockstep.so,sockpath=lockstep-sparc.sock \
   -d plugin,nochain
 
 which will eventually report::
@@ -285,27 +300,27 @@ which will eventually report::
     previously @ 0x000000ffd08098/5 (809900593 insns)
     previously @ 0x000000ffd080c0/1 (809900588 insns)
 
-- contrib/plugins/hwprofile
+- contrib/plugins/hwprofile.c
 
 The hwprofile tool can only be used with system emulation and allows
 the user to see what hardware is accessed how often. It has a number of options:
 
- * arg=read or arg=write
+ * track=read or track=write
 
  By default the plugin tracks both reads and writes. You can use one
  of these options to limit the tracking to just one class of accesses.
 
- * arg=source
+ * source
 
  Will include a detailed break down of what the guest PC that made the
- access was. Not compatible with arg=pattern. Example output::
+ access was. Not compatible with the pattern option. Example output::
 
    cirrus-low-memory @ 0xfffffd00000a0000
     pc:fffffc0000005cdc, 1, 256
     pc:fffffc0000005ce8, 1, 256
     pc:fffffc0000005cec, 1, 256
 
- * arg=pattern
+ * pattern
 
  Instead break down the accesses based on the offset into the HW
  region. This can be useful for seeing the most used registers of a
@@ -319,3 +334,105 @@ the user to see what hardware is accessed how often. It has a number of options:
       off:0000001c, 1, 2
       off:00000020, 1, 2
       ...
+
+- contrib/plugins/execlog.c
+
+The execlog tool traces executed instructions with memory access. It can be used
+for debugging and security analysis purposes.
+Please be aware that this will generate a lot of output.
+
+The plugin takes no argument::
+
+  qemu-system-arm $(QEMU_ARGS) \
+    -plugin ./contrib/plugins/libexeclog.so -d plugin
+
+which will output an execution trace following this structure::
+
+  # vCPU, vAddr, opcode, disassembly[, load/store, memory addr, device]...
+  0, 0xa12, 0xf8012400, "movs r4, #0"
+  0, 0xa14, 0xf87f42b4, "cmp r4, r6"
+  0, 0xa16, 0xd206, "bhs #0xa26"
+  0, 0xa18, 0xfff94803, "ldr r0, [pc, #0xc]", load, 0x00010a28, RAM
+  0, 0xa1a, 0xf989f000, "bl #0xd30"
+  0, 0xd30, 0xfff9b510, "push {r4, lr}", store, 0x20003ee0, RAM, store, 0x20003ee4, RAM
+  0, 0xd32, 0xf9893014, "adds r0, #0x14"
+  0, 0xd34, 0xf9c8f000, "bl #0x10c8"
+  0, 0x10c8, 0xfff96c43, "ldr r3, [r0, #0x44]", load, 0x200000e4, RAM
+
+- contrib/plugins/cache.c
+
+Cache modelling plugin that measures the performance of a given L1 cache
+configuration, and optionally a unified L2 per-core cache when a given working
+set is run::
+
+    qemu-x86_64 -plugin ./contrib/plugins/libcache.so \
+      -d plugin -D cache.log ./tests/tcg/x86_64-linux-user/float_convs
+
+will report the following::
+
+    core #, data accesses, data misses, dmiss rate, insn accesses, insn misses, imiss rate
+    0       996695         508             0.0510%  2642799        18617           0.7044%
+
+    address, data misses, instruction
+    0x424f1e (_int_malloc), 109, movq %rax, 8(%rcx)
+    0x41f395 (_IO_default_xsputn), 49, movb %dl, (%rdi, %rax)
+    0x42584d (ptmalloc_init.part.0), 33, movaps %xmm0, (%rax)
+    0x454d48 (__tunables_init), 20, cmpb $0, (%r8)
+    ...
+
+    address, fetch misses, instruction
+    0x4160a0 (__vfprintf_internal), 744, movl $1, %ebx
+    0x41f0a0 (_IO_setb), 744, endbr64
+    0x415882 (__vfprintf_internal), 744, movq %r12, %rdi
+    0x4268a0 (__malloc), 696, andq $0xfffffffffffffff0, %rax
+    ...
+
+The plugin has a number of arguments, all of them are optional:
+
+  * limit=N
+
+  Print top N icache and dcache thrashing instructions along with their
+  address, number of misses, and its disassembly. (default: 32)
+
+  * icachesize=N
+  * iblksize=B
+  * iassoc=A
+
+  Instruction cache configuration arguments. They specify the cache size, block
+  size, and associativity of the instruction cache, respectively.
+  (default: N = 16384, B = 64, A = 8)
+
+  * dcachesize=N
+  * dblksize=B
+  * dassoc=A
+
+  Data cache configuration arguments. They specify the cache size, block size,
+  and associativity of the data cache, respectively.
+  (default: N = 16384, B = 64, A = 8)
+
+  * evict=POLICY
+
+  Sets the eviction policy to POLICY. Available policies are: :code:`lru`,
+  :code:`fifo`, and :code:`rand`. The plugin will use the specified policy for
+  both instruction and data caches. (default: POLICY = :code:`lru`)
+
+  * cores=N
+
+  Sets the number of cores for which we maintain separate icache and dcache.
+  (default: for linux-user, N = 1, for full system emulation: N = cores
+  available to guest)
+
+  * l2=on
+
+  Simulates a unified L2 cache (stores blocks for both instructions and data)
+  using the default L2 configuration (cache size = 2MB, associativity = 16-way,
+  block size = 64B).
+
+  * l2cachesize=N
+  * l2blksize=B
+  * l2assoc=A
+
+  L2 cache configuration arguments. They specify the cache size, block size, and
+  associativity of the L2 cache, respectively. Setting any of the L2
+  configuration arguments implies ``l2=on``.
+  (default: N = 2097152 (2MB), B = 64, A = 16)
diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst
index 4ebde44b9d7..a65fb7b1c44 100644
--- a/docs/devel/tcg.rst
+++ b/docs/devel/tcg.rst
@@ -11,13 +11,14 @@ performances.
 QEMU's dynamic translation backend is called TCG, for "Tiny Code
 Generator". For more information, please take a look at ``tcg/README``.
 
-Some notable features of QEMU's dynamic translator are:
+The following sections outline some notable features and implementation
+details of QEMU's dynamic translator.
 
 CPU state optimisations
 -----------------------
 
-The target CPUs have many internal states which change the way it
-evaluates instructions. In order to achieve a good speed, the
+The target CPUs have many internal states which change the way they
+evaluate instructions. In order to achieve a good speed, the
 translation phase considers that some state information of the virtual
 CPU cannot change in it. The state is recorded in the Translation
 Block (TB). If the state changes (e.g. privilege level), a new TB will
@@ -31,17 +32,95 @@ Direct block chaining
 ---------------------
 
 After each translated basic block is executed, QEMU uses the simulated
-Program Counter (PC) and other cpu state information (such as the CS
+Program Counter (PC) and other CPU state information (such as the CS
 segment base value) to find the next basic block.
 
-In order to accelerate the most common cases where the new simulated PC
-is known, QEMU can patch a basic block so that it jumps directly to the
-next one.
-
-The most portable code uses an indirect jump. An indirect jump makes
-it easier to make the jump target modification atomic. On some host
-architectures (such as x86 or PowerPC), the ``JUMP`` opcode is
-directly patched so that the block chaining has no overhead.
+In its simplest, less optimized form, this is done by exiting from the
+current TB, going through the TB epilogue, and then back to the
+main loop. That’s where QEMU looks for the next TB to execute,
+translating it from the guest architecture if it isn’t already available
+in memory. Then QEMU proceeds to execute this next TB, starting at the
+prologue and then moving on to the translated instructions.
+
+Exiting from the TB this way will cause the ``cpu_exec_interrupt()``
+callback to be re-evaluated before executing additional instructions.
+It is mandatory to exit this way after any CPU state changes that may
+unmask interrupts.
+
+In order to accelerate the cases where the TB for the new
+simulated PC is already available, QEMU has mechanisms that allow
+multiple TBs to be chained directly, without having to go back to the
+main loop as described above. These mechanisms are:
+
+``lookup_and_goto_ptr``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Calling ``tcg_gen_lookup_and_goto_ptr()`` will emit a call to
+``helper_lookup_tb_ptr``. This helper will look for an existing TB that
+matches the current CPU state. If the destination TB is available its
+code address is returned, otherwise the address of the JIT epilogue is
+returned. The call to the helper is always followed by the tcg ``goto_ptr``
+opcode, which branches to the returned address. In this way, we either
+branch to the next TB or return to the main loop.
+
+``goto_tb + exit_tb``
+^^^^^^^^^^^^^^^^^^^^^
+
+The translation code usually implements branching by performing the
+following steps:
+
+1. Call ``tcg_gen_goto_tb()`` passing a jump slot index (either 0 or 1)
+   as a parameter.
+
+2. Emit TCG instructions to update the CPU state with any information
+   that has been assumed constant and is required by the main loop to
+   correctly locate and execute the next TB. For most guests, this is
+   just the PC of the branch destination, but others may store additional
+   data. The information updated in this step must be inferable from both
+   ``cpu_get_tb_cpu_state()`` and ``cpu_restore_state()``.
+
+3. Call ``tcg_gen_exit_tb()`` passing the address of the current TB and
+   the jump slot index again.
+
+Step 1, ``tcg_gen_goto_tb()``, will emit a ``goto_tb`` TCG
+instruction that later on gets translated to a jump to an address
+associated with the specified jump slot. Initially, this is the address
+of step 2's instructions, which update the CPU state information. Step 3,
+``tcg_gen_exit_tb()``, exits from the current TB returning a tagged
+pointer composed of the last executed TB’s address and the jump slot
+index.
+
+The first time this whole sequence is executed, step 1 simply jumps
+to step 2. Then the CPU state information gets updated and we exit from
+the current TB. As a result, the behavior is very similar to the less
+optimized form described earlier in this section.
+
+Next, the main loop looks for the next TB to execute using the
+current CPU state information (creating the TB if it wasn’t already
+available) and, before starting to execute the new TB’s instructions,
+patches the previously executed TB by associating one of its jump
+slots (the one specified in the call to ``tcg_gen_exit_tb()``) with the
+address of the new TB.
+
+The next time this previous TB is executed and we get to that same
+``goto_tb`` step, it will already be patched (assuming the destination TB
+is still in memory) and will jump directly to the first instruction of
+the destination TB, without going back to the main loop.
+
+For the ``goto_tb + exit_tb`` mechanism to be used, the following
+conditions need to be satisfied:
+
+* The change in CPU state must be constant, e.g., a direct branch and
+  not an indirect branch.
+
+* The direct branch cannot cross a page boundary. Memory mappings
+  may change, causing the code at the destination address to change.
+
+Note that, on step 3 (``tcg_gen_exit_tb()``), in addition to the
+jump slot index, the address of the TB just executed is also returned.
+This address corresponds to the TB that will be patched; it may be
+different than the one that was directly executed from the main loop
+if the latter had already been chained to other TBs.
 
 Self-modifying code and translated code invalidation
 ----------------------------------------------------
diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst
index 1da4c4e4c4e..755343c7dd0 100644
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -1,11 +1,10 @@
-===============
 Testing in QEMU
 ===============
 
 This document describes the testing infrastructure in QEMU.
 
 Testing with "make check"
-=========================
+-------------------------
 
 The "make check" testing family includes most of the C based tests in QEMU. For
 a quick help, run ``make check-help`` from the source tree.
@@ -24,7 +23,7 @@ expect the executables to exist and will fail with obscure messages if they
 cannot find them.
 
 Unit tests
-----------
+~~~~~~~~~~
 
 Unit tests, which can be invoked with ``make check-unit``, are simple C tests
 that typically link to individual QEMU object files and exercise them by
@@ -67,7 +66,7 @@ and copy the actual command line which executes the unit test, then run
 it from the command line.
 
 QTest
------
+~~~~~
 
 QTest is a device emulation testing framework.  It can be very useful to test
 device models; it could also control certain aspects of QEMU (such as virtual
@@ -81,7 +80,7 @@ QTest cases can be executed with
    make check-qtest
 
 QAPI schema tests
------------------
+~~~~~~~~~~~~~~~~~
 
 The QAPI schema tests validate the QAPI parser used by QMP, by feeding
 predefined input to the parser and comparing the result with the reference
@@ -108,33 +107,14 @@ parser (either fixing a bug or extending/modifying the syntax). To do this:
   ``qapi-schema += foo.json``
 
 check-block
------------
+~~~~~~~~~~~
 
 ``make check-block`` runs a subset of the block layer iotests (the tests that
 are in the "auto" group).
 See the "QEMU iotests" section below for more information.
 
-GCC gcov support
-----------------
-
-``gcov`` is a GCC tool to analyze the testing coverage by
-instrumenting the tested code. To use it, configure QEMU with
-``--enable-gcov`` option and build. Then run ``make check`` as usual.
-
-If you want to gather coverage information on a single test the ``make
-clean-gcda`` target can be used to delete any existing coverage
-information before running a single test.
-
-You can generate a HTML coverage report by executing ``make
-coverage-html`` which will create
-``meson-logs/coveragereport/index.html``.
-
-Further analysis can be conducted by running the ``gcov`` command
-directly on the various .gcda output files. Please read the ``gcov``
-documentation for more information.
-
 QEMU iotests
-============
+------------
 
 QEMU iotests, under the directory ``tests/qemu-iotests``, is the testing
 framework widely used to test block layer related features. It is higher level
@@ -171,7 +151,7 @@ More options are supported by the ``./check`` script, run ``./check -h`` for
 help.
 
 Writing a new test case
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
 
 Consider writing a tests case when you are making any changes to the block
 layer. An iotest case is usually the choice for that. There are already many
@@ -224,8 +204,38 @@ another application on the host may have locked the file, possibly leading to a
 test failure.  If using such devices are explicitly desired, consider adding
 ``locking=off`` option to disable image locking.
 
+Debugging a test case
+~~~~~~~~~~~~~~~~~~~~~
+
+The following options to the ``check`` script can be useful when debugging
+a failing test:
+
+* ``-gdb`` wraps every QEMU invocation in a ``gdbserver``, which waits for a
+  connection from a gdb client.  The options given to ``gdbserver`` (e.g. the
+  address on which to listen for connections) are taken from the ``$GDB_OPTIONS``
+  environment variable.  By default (if ``$GDB_OPTIONS`` is empty), it listens on
+  ``localhost:12345``.
+  It is possible to connect to it for example with
+  ``gdb -iex "target remote $addr"``, where ``$addr`` is the address
+  ``gdbserver`` listens on.
+  If the ``-gdb`` option is not used, ``$GDB_OPTIONS`` is ignored,
+  regardless of whether it is set or not.
+
+* ``-valgrind`` attaches a valgrind instance to QEMU. If it detects
+  warnings, it will print and save the log in
+  ``$TEST_DIR/<valgrind_pid>.valgrind``.
+  The final command line will be ``valgrind --log-file=$TEST_DIR/
+  <valgrind_pid>.valgrind --error-exitcode=99 $QEMU ...``
+
+* ``-d`` (debug) just increases the logging verbosity, showing
+  for example the QMP commands and answers.
+
+* ``-p`` (print) redirects QEMU’s stdout and stderr to the test output,
+  instead of saving it into a log file in
+  ``$TEST_DIR/qemu-machine-<random_string>``.
+
 Test case groups
-----------------
+~~~~~~~~~~~~~~~~
 
 "Tests may belong to one or more test groups, which are defined in the form
 of a comment in the test source file. By convention, test groups are listed
@@ -275,10 +285,10 @@ Note that the following group names have a special meaning:
 .. _container-ref:
 
 Container based tests
-=====================
+---------------------
 
 Introduction
-------------
+~~~~~~~~~~~~
 
 The container testing framework in QEMU utilizes public images to
 build and test QEMU in predefined and widely accessible Linux
@@ -293,7 +303,7 @@ The container images are also used to augment the generation of tests
 for testing TCG. See :ref:`checktcg-ref` for more details.
 
 Docker Prerequisites
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 Install "docker" with the system package manager and start the Docker service
 on your development machine, then make sure you have the privilege to run
@@ -324,7 +334,7 @@ exploit the whole host with Docker bind mounting or other privileged
 operations.  So only do it on development machines.
 
 Podman Prerequisites
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 Install "podman" with the system package manager.
 
@@ -336,7 +346,7 @@ Install "podman" with the system package manager.
 The last command should print an empty table, to verify the system is ready.
 
 Quickstart
-----------
+~~~~~~~~~~
 
 From source tree, type ``make docker-help`` to see the help. Testing
 can be started without configuring or building QEMU (``configure`` and
@@ -352,7 +362,7 @@ is downloaded and initialized automatically), in which the ``test-build`` job
 is executed.
 
 Registry
---------
+~~~~~~~~
 
 The QEMU project has a container registry hosted by GitLab at
 ``registry.gitlab.com/qemu-project/qemu`` which will automatically be
@@ -366,7 +376,7 @@ locally by using the ``NOCACHE`` build option:
    make docker-image-debian10 NOCACHE=1
 
 Images
-------
+~~~~~~
 
 Along with many other images, the ``centos8`` image is defined in a Dockerfile
 in ``tests/docker/dockerfiles/``, called ``centos8.docker``. ``make docker-help``
@@ -381,7 +391,7 @@ mainly used to do necessary host side setup. One such setup is ``binfmt_misc``,
 for example, to make qemu-user powered cross build containers work.
 
 Tests
------
+~~~~~
 
 Different tests are added to cover various configurations to build and test
 QEMU.  Docker tests are the executables under ``tests/docker`` named
@@ -392,7 +402,7 @@ source and build it.
 The full list of tests is printed in the ``make docker-help`` help.
 
 Debugging a Docker test failure
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When CI tasks, maintainers or yourself report a Docker test failure, follow the
 below steps to debug it:
@@ -409,7 +419,7 @@ below steps to debug it:
    the prompt for debug.
 
 Options
--------
+~~~~~~~
 
 Various options can be used to affect how Docker tests are done. The full
 list is in the ``make docker`` help text. The frequently used ones are:
@@ -423,7 +433,7 @@ list is in the ``make docker`` help text. The frequently used ones are:
   failure" section.
 
 Thread Sanitizer
-================
+----------------
 
 Thread Sanitizer (TSan) is a tool which can detect data races.  QEMU supports
 building and testing with this tool.
@@ -433,7 +443,7 @@ For more information on TSan:
 https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual
 
 Thread Sanitizer in Docker
----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 TSan is currently supported in the ubuntu2004 docker.
 
 The test-tsan test will build using TSan and then run make check.
@@ -448,7 +458,7 @@ We recommend using DEBUG=1 to allow launching the test from inside the docker,
 and to allow review of the warnings generated by TSan.
 
 Building and Testing with TSan
-------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 It is possible to build and test with TSan, with a few additional steps.
 These steps are normally done automatically in the docker.
@@ -487,7 +497,7 @@ This allows for running the test and then checking the warnings afterwards.
 If you want TSan to stop and exit with error on warnings, use exitcode=66.
 
 TSan Suppressions
------------------
+~~~~~~~~~~~~~~~~~
 Keep in mind that for any data race warning, although there might be a data race
 detected by TSan, there might be no actual bug here.  TSan provides several
 different mechanisms for suppressing warnings.  In general it is recommended
@@ -513,7 +523,7 @@ More information on the file format can be found here under "Blacklist Format":
 https://github.com/google/sanitizers/wiki/ThreadSanitizerFlags
 
 TSan Annotations
-----------------
+~~~~~~~~~~~~~~~~
 include/qemu/tsan.h defines annotations.  See this file for more descriptions
 of the annotations themselves.  Annotations can be used to suppress
 TSan warnings or give TSan more information so that it can detect proper
@@ -530,14 +540,14 @@ The full set of annotations can be found here:
 https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
 
 VM testing
-==========
+----------
 
 This test suite contains scripts that bootstrap various guest images that have
 necessary packages to build QEMU. The basic usage is documented in ``Makefile``
 help which is displayed with ``make vm-help``.
 
 Quickstart
-----------
+~~~~~~~~~~
 
 Run ``make vm-help`` to list available make targets. Invoke a specific make
 command to run build test in an image. For example, ``make vm-build-freebsd``
@@ -552,29 +562,29 @@ concerned about attackers taking control of the guest and potentially
 exploiting a QEMU security bug to compromise the host.
 
 QEMU binaries
--------------
+~~~~~~~~~~~~~
 
-By default, qemu-system-x86_64 is searched in $PATH to run the guest. If there
-isn't one, or if it is older than 2.10, the test won't work. In this case,
+By default, ``qemu-system-x86_64`` is searched in $PATH to run the guest. If
+there isn't one, or if it is older than 2.10, the test won't work. In this case,
 provide the QEMU binary in env var: ``QEMU=/path/to/qemu-2.10+``.
 
-Likewise the path to qemu-img can be set in QEMU_IMG environment variable.
+Likewise the path to ``qemu-img`` can be set in QEMU_IMG environment variable.
 
 Make jobs
----------
+~~~~~~~~~
 
 The ``-j$X`` option in the make command line is not propagated into the VM,
 specify ``J=$X`` to control the make jobs in the guest.
 
 Debugging
----------
+~~~~~~~~~
 
 Add ``DEBUG=1`` and/or ``V=1`` to the make command to allow interactive
 debugging and verbose output. If this is not enough, see the next section.
 ``V=1`` will be propagated down into the make jobs in the guest.
 
 Manual invocation
------------------
+~~~~~~~~~~~~~~~~~
 
 Each guest script is an executable script with the same command line options.
 For example to work with the netbsd guest, use ``$QEMU_SRC/tests/vm/netbsd``:
@@ -598,7 +608,7 @@ For example to work with the netbsd guest, use ``$QEMU_SRC/tests/vm/netbsd``:
     $ ./netbsd --interactive --image /var/tmp/netbsd.img sh
 
 Adding new guests
------------------
+~~~~~~~~~~~~~~~~~
 
 Please look at existing guest scripts for how to add new guests.
 
@@ -631,7 +641,7 @@ the script's ``main()``.
   recommended.
 
 Image fuzzer testing
-====================
+--------------------
 
 An image fuzzer was added to exercise format drivers. Currently only qcow2 is
 supported. To start the fuzzer, run
@@ -640,20 +650,19 @@ supported. To start the fuzzer, run
 
   tests/image-fuzzer/runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2
 
-Alternatively, some command different from "qemu-img info" can be tested, by
+Alternatively, some command different from ``qemu-img info`` can be tested, by
 changing the ``-c`` option.
 
-Acceptance tests using the Avocado Framework
-============================================
+Integration tests using the Avocado Framework
+---------------------------------------------
 
-The ``tests/acceptance`` directory hosts functional tests, also known
-as acceptance level tests.  They're usually higher level tests, and
-may interact with external resources and with various guest operating
-systems.
+The ``tests/avocado`` directory hosts integration tests. They're usually
+higher level tests, and may interact with external resources and with
+various guest operating systems.
 
 These tests are written using the Avocado Testing Framework (which must
 be installed separately) in conjunction with a the ``avocado_qemu.Test``
-class, implemented at ``tests/acceptance/avocado_qemu``.
+class, implemented at ``tests/avocado/avocado_qemu``.
 
 Tests based on ``avocado_qemu.Test`` can easily:
 
@@ -683,13 +692,13 @@ Tests based on ``avocado_qemu.Test`` can easily:
    - http://avocado-framework.readthedocs.io/en/latest/api/utils/avocado.utils.html
 
 Running tests
--------------
+~~~~~~~~~~~~~
 
-You can run the acceptance tests simply by executing:
+You can run the avocado tests simply by executing:
 
 .. code::
 
-  make check-acceptance
+  make check-avocado
 
 This involves the automatic creation of Python virtual environment
 within the build tree (at ``tests/venv``) which will have all the
@@ -703,16 +712,85 @@ available.  On Debian and Ubuntu based systems, depending on the
 specific version, they may be on packages named ``python3-venv`` and
 ``python3-pip``.
 
+It is also possible to run tests based on tags using the
+``make check-avocado`` command and the ``AVOCADO_TAGS`` environment
+variable:
+
+.. code::
+
+   make check-avocado AVOCADO_TAGS=quick
+
+Note that tags separated with commas have an AND behavior, while tags
+separated by spaces have an OR behavior. For more information on Avocado
+tags, see:
+
+ https://avocado-framework.readthedocs.io/en/latest/guides/user/chapters/tags.html
+
+To run a single test file, a couple of them, or a test within a file
+using the ``make check-avocado`` command, set the ``AVOCADO_TESTS``
+environment variable with the test files or test names. To run all
+tests from a single file, use:
+
+ .. code::
+
+  make check-avocado AVOCADO_TESTS=$FILEPATH
+
+The same is valid to run tests from multiple test files:
+
+ .. code::
+
+  make check-avocado AVOCADO_TESTS='$FILEPATH1 $FILEPATH2'
+
+To run a single test within a file, use:
+
+ .. code::
+
+  make check-avocado AVOCADO_TESTS=$FILEPATH:$TESTCLASS.$TESTNAME
+
+The same is valid to run single tests from multiple test files:
+
+ .. code::
+
+  make check-avocado AVOCADO_TESTS='$FILEPATH1:$TESTCLASS1.$TESTNAME1 $FILEPATH2:$TESTCLASS2.$TESTNAME2'
+
 The scripts installed inside the virtual environment may be used
 without an "activation".  For instance, the Avocado test runner
 may be invoked by running:
 
  .. code::
 
-  tests/venv/bin/avocado run $OPTION1 $OPTION2 tests/acceptance/
+  tests/venv/bin/avocado run $OPTION1 $OPTION2 tests/avocado/
+
+Note that if ``make check-avocado`` was not executed before, it is
+possible to create the Python virtual environment with the dependencies
+needed running:
+
+ .. code::
+
+  make check-venv
+
+It is also possible to run tests from a single file or a single test within
+a test file. To run tests from a single file within the build tree, use:
+
+ .. code::
+
+  tests/venv/bin/avocado run tests/avocado/$TESTFILE
+
+To run a single test within a test file, use:
+
+ .. code::
+
+  tests/venv/bin/avocado run tests/avocado/$TESTFILE:$TESTCLASS.$TESTNAME
+
+Valid test names are visible in the output from any previous execution
+of Avocado or ``make check-avocado``, and can also be queried using:
+
+ .. code::
+
+  tests/venv/bin/avocado list tests/avocado
 
 Manual Installation
--------------------
+~~~~~~~~~~~~~~~~~~~
 
 To manually install Avocado and its dependencies, run:
 
@@ -725,18 +803,18 @@ Alternatively, follow the instructions on this link:
   https://avocado-framework.readthedocs.io/en/latest/guides/user/chapters/installing.html
 
 Overview
---------
+~~~~~~~~
 
-The ``tests/acceptance/avocado_qemu`` directory provides the
+The ``tests/avocado/avocado_qemu`` directory provides the
 ``avocado_qemu`` Python module, containing the ``avocado_qemu.Test``
 class.  Here's a simple usage example:
 
 .. code::
 
-  from avocado_qemu import Test
+  from avocado_qemu import QemuSystemTest
 
 
-  class Version(Test):
+  class Version(QemuSystemTest):
       """
       :avocado: tags=quick
       """
@@ -761,7 +839,7 @@ in the current directory, tagged as "quick", run:
   avocado run -t quick .
 
 The ``avocado_qemu.Test`` base test class
------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``avocado_qemu.Test`` class has a number of characteristics that
 are worth being mentioned right away.
@@ -775,16 +853,16 @@ The base test class has also support for tests with more than one
 QEMUMachine. The way to get machines is through the ``self.get_vm()``
 method which will return a QEMUMachine instance. The ``self.get_vm()``
 method accepts arguments that will be passed to the QEMUMachine creation
-and also an optional `name` attribute so you can identify a specific
+and also an optional ``name`` attribute so you can identify a specific
 machine and get it more than once through the tests methods. A simple
 and hypothetical example follows:
 
 .. code::
 
-  from avocado_qemu import Test
+  from avocado_qemu import QemuSystemTest
 
 
-  class MultipleMachines(Test):
+  class MultipleMachines(QemuSystemTest):
       def test_multiple_machines(self):
           first_machine = self.get_vm()
           second_machine = self.get_vm()
@@ -810,6 +888,32 @@ and hypothetical example follows:
 At test "tear down", ``avocado_qemu.Test`` handles all the QEMUMachines
 shutdown.
 
+The ``avocado_qemu.LinuxTest`` base test class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``avocado_qemu.LinuxTest`` is further specialization of the
+``avocado_qemu.Test`` class, so it contains all the characteristics of
+the later plus some extra features.
+
+First of all, this base class is intended for tests that need to
+interact with a fully booted and operational Linux guest.  At this
+time, it uses a Fedora 31 guest image.  The most basic example looks
+like this:
+
+.. code::
+
+  from avocado_qemu import LinuxTest
+
+
+  class SomeTest(LinuxTest):
+
+      def test(self):
+          self.launch_and_wait()
+          self.ssh_command('some_command_to_be_run_in_the_guest')
+
+Please refer to tests that use ``avocado_qemu.LinuxTest`` under
+``tests/avocado`` for more examples.
+
 QEMUMachine
 ~~~~~~~~~~~
 
@@ -828,7 +932,7 @@ execution of a QEMU binary, giving its users:
    a more succinct and intuitive way
 
 QEMU binary selection
-~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^
 
 The QEMU binary used for the ``self.vm`` QEMUMachine instance will
 primarily depend on the value of the ``qemu_bin`` parameter.  If it's
@@ -849,20 +953,23 @@ The resulting ``qemu_bin`` value will be preserved in the
 ``avocado_qemu.Test`` as an attribute with the same name.
 
 Attribute reference
--------------------
+~~~~~~~~~~~~~~~~~~~
+
+Test
+^^^^
 
 Besides the attributes and methods that are part of the base
 ``avocado.Test`` class, the following attributes are available on any
 ``avocado_qemu.Test`` instance.
 
 vm
-~~
+''
 
 A QEMUMachine instance, initially configured according to the given
 ``qemu_bin`` parameter.
 
 arch
-~~~~
+''''
 
 The architecture can be used on different levels of the stack, e.g. by
 the framework or by the test itself.  At the framework level, it will
@@ -878,8 +985,19 @@ name.  If one is not given explicitly, it will either be set to
 ``None``, or, if the test is tagged with one (and only one)
 ``:avocado: tags=arch:VALUE`` tag, it will be set to ``VALUE``.
 
+cpu
+'''
+
+The cpu model that will be set to all QEMUMachine instances created
+by the test.
+
+The ``cpu`` attribute will be set to the test parameter of the same
+name. If one is not given explicitly, it will either be set to
+``None ``, or, if the test is tagged with one (and only one)
+``:avocado: tags=cpu:VALUE`` tag, it will be set to ``VALUE``.
+
 machine
-~~~~~~~
+'''''''
 
 The machine type that will be set to all QEMUMachine instances created
 by the test.
@@ -890,14 +1008,47 @@ name.  If one is not given explicitly, it will either be set to
 ``:avocado: tags=machine:VALUE`` tag, it will be set to ``VALUE``.
 
 qemu_bin
-~~~~~~~~
+''''''''
 
 The preserved value of the ``qemu_bin`` parameter or the result of the
 dynamic probe for a QEMU binary in the current working directory or
 source tree.
 
+LinuxTest
+^^^^^^^^^
+
+Besides the attributes present on the ``avocado_qemu.Test`` base
+class, the ``avocado_qemu.LinuxTest`` adds the following attributes:
+
+distro
+''''''
+
+The name of the Linux distribution used as the guest image for the
+test.  The name should match the **Provider** column on the list
+of images supported by the avocado.utils.vmimage library:
+
+https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images
+
+distro_version
+''''''''''''''
+
+The version of the Linux distribution as the guest image for the
+test.  The name should match the **Version** column on the list
+of images supported by the avocado.utils.vmimage library:
+
+https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images
+
+distro_checksum
+'''''''''''''''
+
+The sha256 hash of the guest image file used for the test.
+
+If this value is not set in the code or by a test parameter (with the
+same name), no validation on the integrity of the image will be
+performed.
+
 Parameter reference
--------------------
+~~~~~~~~~~~~~~~~~~~
 
 To understand how Avocado parameters are accessed by tests, and how
 they can be passed to tests, please refer to::
@@ -911,8 +1062,11 @@ like the following:
 
   PARAMS (key=qemu_bin, path=*, default=./qemu-system-x86_64) => './qemu-system-x86_64
 
+Test
+^^^^
+
 arch
-~~~~
+''''
 
 The architecture that will influence the selection of a QEMU binary
 (when one is not explicitly given).
@@ -924,20 +1078,58 @@ architecture of a kernel or disk image to boot a VM with.
 This parameter has a direct relation with the ``arch`` attribute.  If
 not given, it will default to None.
 
+cpu
+'''
+
+The cpu model that will be set to all QEMUMachine instances created
+by the test.
+
 machine
-~~~~~~~
+'''''''
 
 The machine type that will be set to all QEMUMachine instances created
 by the test.
 
-
 qemu_bin
-~~~~~~~~
+''''''''
 
 The exact QEMU binary to be used on QEMUMachine.
 
+LinuxTest
+^^^^^^^^^
+
+Besides the parameters present on the ``avocado_qemu.Test`` base
+class, the ``avocado_qemu.LinuxTest`` adds the following parameters:
+
+distro
+''''''
+
+The name of the Linux distribution used as the guest image for the
+test.  The name should match the **Provider** column on the list
+of images supported by the avocado.utils.vmimage library:
+
+https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images
+
+distro_version
+''''''''''''''
+
+The version of the Linux distribution as the guest image for the
+test.  The name should match the **Version** column on the list
+of images supported by the avocado.utils.vmimage library:
+
+https://avocado-framework.readthedocs.io/en/latest/guides/writer/libs/vmimage.html#supported-images
+
+distro_checksum
+'''''''''''''''
+
+The sha256 hash of the guest image file used for the test.
+
+If this value is not set in the code or by this parameter no
+validation on the integrity of the image will be performed.
+
 Skipping tests
---------------
+~~~~~~~~~~~~~~
+
 The Avocado framework provides Python decorators which allow for easily skip
 tests running under certain conditions. For example, on the lack of a binary
 on the test system or when the running environment is a CI system. For further
@@ -952,27 +1144,27 @@ environment variables became a kind of standard way to enable/disable tests.
 Here is a list of the most used variables:
 
 AVOCADO_ALLOW_LARGE_STORAGE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Tests which are going to fetch or produce assets considered *large* are not
-going to run unless that `AVOCADO_ALLOW_LARGE_STORAGE=1` is exported on
+going to run unless that ``AVOCADO_ALLOW_LARGE_STORAGE=1`` is exported on
 the environment.
 
 The definition of *large* is a bit arbitrary here, but it usually means an
 asset which occupies at least 1GB of size on disk when uncompressed.
 
 AVOCADO_ALLOW_UNTRUSTED_CODE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 There are tests which will boot a kernel image or firmware that can be
 considered not safe to run on the developer's workstation, thus they are
 skipped by default. The definition of *not safe* is also arbitrary but
 usually it means a blob which either its source or build process aren't
 public available.
 
-You should export `AVOCADO_ALLOW_UNTRUSTED_CODE=1` on the environment in
+You should export ``AVOCADO_ALLOW_UNTRUSTED_CODE=1`` on the environment in
 order to allow tests which make use of those kind of assets.
 
 AVOCADO_TIMEOUT_EXPECTED
-~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^
 The Avocado framework has a timeout mechanism which interrupts tests to avoid the
 test suite of getting stuck. The timeout value can be set via test parameter or
 property defined in the test class, for further details::
@@ -982,11 +1174,11 @@ property defined in the test class, for further details::
 Even though the timeout can be set by the test developer, there are some tests
 that may not have a well-defined limit of time to finish under certain
 conditions. For example, tests that take longer to execute when QEMU is
-compiled with debug flags. Therefore, the `AVOCADO_TIMEOUT_EXPECTED` variable
+compiled with debug flags. Therefore, the ``AVOCADO_TIMEOUT_EXPECTED`` variable
 has been used to determine whether those tests should run or not.
 
 GITLAB_CI
-~~~~~~~~~
+^^^^^^^^^
 A number of tests are flagged to not run on the GitLab CI. Usually because
 they proved to the flaky or there are constraints on the CI environment which
 would make them fail. If you encounter a similar situation then use that
@@ -999,7 +1191,7 @@ variable as shown on the code snippet below to skip the test:
       do_something()
 
 Uninstalling Avocado
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 If you've followed the manual installation instructions above, you can
 easily uninstall Avocado.  Start by listing the packages you have
@@ -1011,13 +1203,13 @@ And remove any package you want with::
 
   pip uninstall <package_name>
 
-If you've used ``make check-acceptance``, the Python virtual environment where
+If you've used ``make check-avocado``, the Python virtual environment where
 Avocado is installed will be cleaned up as part of ``make check-clean``.
 
 .. _checktcg-ref:
 
 Testing with "make check-tcg"
-=============================
+-----------------------------
 
 The check-tcg tests are intended for simple smoke tests of both
 linux-user and softmmu TCG functionality. However to build test
@@ -1050,7 +1242,7 @@ itself.
 See :ref:`container-ref` for more details.
 
 Running subset of tests
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
 
 You can build the tests for one architecture::
 
@@ -1064,7 +1256,7 @@ Adding ``V=1`` to the invocation will show the details of how to
 invoke QEMU for the test which is useful for debugging tests.
 
 TCG test dependencies
----------------------
+~~~~~~~~~~~~~~~~~~~~~
 
 The TCG tests are deliberately very light on dependencies and are
 either totally bare with minimal gcc lib support (for softmmu tests)
@@ -1096,3 +1288,22 @@ exercise as many corner cases as possible. It is a useful test suite
 to run to exercise QEMU's linux-user code::
 
   https://linux-test-project.github.io/
+
+GCC gcov support
+----------------
+
+``gcov`` is a GCC tool to analyze the testing coverage by
+instrumenting the tested code. To use it, configure QEMU with
+``--enable-gcov`` option and build. Then run the tests as usual.
+
+If you want to gather coverage information on a single test the ``make
+clean-gcda`` target can be used to delete any existing coverage
+information before running a single test.
+
+You can generate a HTML coverage report by executing ``make
+coverage-html`` which will create
+``meson-logs/coveragereport/index.html``.
+
+Further analysis can be conducted by running the ``gcov`` command
+directly on the various .gcda output files. Please read the ``gcov``
+documentation for more information.
diff --git a/docs/devel/trivial-patches.rst b/docs/devel/trivial-patches.rst
new file mode 100644
index 00000000000..9380c730f78
--- /dev/null
+++ b/docs/devel/trivial-patches.rst
@@ -0,0 +1,52 @@
+.. _trivial-patches:
+
+Trivial Patches
+===============
+
+Overview
+--------
+
+Trivial patches that change just a few lines of code sometimes languish
+on the mailing list even though they require only a small amount of
+review. This is often the case for patches that do not fall under an
+actively maintained subsystem and therefore fall through the cracks.
+
+The trivial patches team take on the task of reviewing and building pull
+requests for patches that:
+
+- Do not fall under an actively maintained subsystem.
+- Are single patches or short series (max 2-4 patches).
+- Only touch a few lines of code.
+
+**You should hint that your patch is a candidate by CCing
+qemu-trivial@nongnu.org.**
+
+Repositories
+------------
+
+Since the trivial patch team rotates maintainership there is only one
+active repository at a time:
+
+- git://github.com/vivier/qemu.git trivial-patches - `browse <https://github.com/vivier/qemu/tree/trivial-patches>`__
+
+Workflow
+--------
+
+The trivial patches team rotates the duty of collecting trivial patches
+amongst its members. A team member's job is to:
+
+1. Identify trivial patches on the development mailing list.
+2. Review trivial patches, merge them into a git tree, and reply to state
+   that the patch is queued.
+3. Send pull requests to the development mailing list once a week.
+
+A single team member can be on duty as long as they like. The suggested
+time is 1 week before handing off to the next member.
+
+Team
+----
+
+If you would like to join the trivial patches team, contact Laurent
+Vivier. The current team includes:
+
+- `Laurent Vivier <mailto:laurent@vivier.eu>`__
diff --git a/docs/devel/ui.rst b/docs/devel/ui.rst
new file mode 100644
index 00000000000..17fb667dec4
--- /dev/null
+++ b/docs/devel/ui.rst
@@ -0,0 +1,8 @@
+=================
+QEMU UI subsystem
+=================
+
+QEMU Clipboard
+--------------
+
+.. kernel-doc:: include/ui/clipboard.h
diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
new file mode 100644
index 00000000000..9ff6163c881
--- /dev/null
+++ b/docs/devel/vfio-migration.rst
@@ -0,0 +1,150 @@
+=====================
+VFIO device Migration
+=====================
+
+Migration of virtual machine involves saving the state for each device that
+the guest is running on source host and restoring this saved state on the
+destination host. This document details how saving and restoring of VFIO
+devices is done in QEMU.
+
+Migration of VFIO devices consists of two phases: the optional pre-copy phase,
+and the stop-and-copy phase. The pre-copy phase is iterative and allows to
+accommodate VFIO devices that have a large amount of data that needs to be
+transferred. The iterative pre-copy phase of migration allows for the guest to
+continue whilst the VFIO device state is transferred to the destination, this
+helps to reduce the total downtime of the VM. VFIO devices can choose to skip
+the pre-copy phase of migration by returning pending_bytes as zero during the
+pre-copy phase.
+
+A detailed description of the UAPI for VFIO device migration can be found in
+the comment for the ``vfio_device_migration_info`` structure in the header
+file linux-headers/linux/vfio.h.
+
+VFIO implements the device hooks for the iterative approach as follows:
+
+* A ``save_setup`` function that sets up the migration region and sets _SAVING
+  flag in the VFIO device state.
+
+* A ``load_setup`` function that sets up the migration region on the
+  destination and sets _RESUMING flag in the VFIO device state.
+
+* A ``save_live_pending`` function that reads pending_bytes from the vendor
+  driver, which indicates the amount of data that the vendor driver has yet to
+  save for the VFIO device.
+
+* A ``save_live_iterate`` function that reads the VFIO device's data from the
+  vendor driver through the migration region during iterative phase.
+
+* A ``save_state`` function to save the device config space if it is present.
+
+* A ``save_live_complete_precopy`` function that resets _RUNNING flag from the
+  VFIO device state and iteratively copies the remaining data for the VFIO
+  device until the vendor driver indicates that no data remains (pending bytes
+  is zero).
+
+* A ``load_state`` function that loads the config section and the data
+  sections that are generated by the save functions above
+
+* ``cleanup`` functions for both save and load that perform any migration
+  related cleanup, including unmapping the migration region
+
+
+The VFIO migration code uses a VM state change handler to change the VFIO
+device state when the VM state changes from running to not-running, and
+vice versa.
+
+Similarly, a migration state change handler is used to trigger a transition of
+the VFIO device state when certain changes of the migration state occur. For
+example, the VFIO device state is transitioned back to _RUNNING in case a
+migration failed or was canceled.
+
+System memory dirty pages tracking
+----------------------------------
+
+A ``log_global_start`` and ``log_global_stop`` memory listener callback informs
+the VFIO IOMMU module to start and stop dirty page tracking. A ``log_sync``
+memory listener callback marks those system memory pages as dirty which are
+used for DMA by the VFIO device. The dirty pages bitmap is queried per
+container. All pages pinned by the vendor driver through external APIs have to
+be marked as dirty during migration. When there are CPU writes, CPU dirty page
+tracking can identify dirtied pages, but any page pinned by the vendor driver
+can also be written by the device. There is currently no device or IOMMU
+support for dirty page tracking in hardware.
+
+By default, dirty pages are tracked when the device is in pre-copy as well as
+stop-and-copy phase. So, a page pinned by the vendor driver will be copied to
+the destination in both phases. Copying dirty pages in pre-copy phase helps
+QEMU to predict if it can achieve its downtime tolerances. If QEMU during
+pre-copy phase keeps finding dirty pages continuously, then it understands
+that even in stop-and-copy phase, it is likely to find dirty pages and can
+predict the downtime accordingly.
+
+QEMU also provides a per device opt-out option ``pre-copy-dirty-page-tracking``
+which disables querying the dirty bitmap during pre-copy phase. If it is set to
+off, all dirty pages will be copied to the destination in stop-and-copy phase
+only.
+
+System memory dirty pages tracking when vIOMMU is enabled
+---------------------------------------------------------
+
+With vIOMMU, an IO virtual address range can get unmapped while in pre-copy
+phase of migration. In that case, the unmap ioctl returns any dirty pages in
+that range and QEMU reports corresponding guest physical pages dirty. During
+stop-and-copy phase, an IOMMU notifier is used to get a callback for mapped
+pages and then dirty pages bitmap is fetched from VFIO IOMMU modules for those
+mapped ranges.
+
+Flow of state changes during Live migration
+===========================================
+
+Below is the flow of state change during live migration.
+The values in the brackets represent the VM state, the migration state, and
+the VFIO device state, respectively.
+
+Live migration save path
+------------------------
+
+::
+
+                        QEMU normal running state
+                        (RUNNING, _NONE, _RUNNING)
+                                  |
+                     migrate_init spawns migration_thread
+                Migration thread then calls each device's .save_setup()
+                    (RUNNING, _SETUP, _RUNNING|_SAVING)
+                                  |
+                    (RUNNING, _ACTIVE, _RUNNING|_SAVING)
+             If device is active, get pending_bytes by .save_live_pending()
+          If total pending_bytes >= threshold_size, call .save_live_iterate()
+                  Data of VFIO device for pre-copy phase is copied
+        Iterate till total pending bytes converge and are less than threshold
+                                  |
+  On migration completion, vCPU stops and calls .save_live_complete_precopy for
+   each active device. The VFIO device is then transitioned into _SAVING state
+                   (FINISH_MIGRATE, _DEVICE, _SAVING)
+                                  |
+     For the VFIO device, iterate in .save_live_complete_precopy until
+                         pending data is 0
+                   (FINISH_MIGRATE, _DEVICE, _STOPPED)
+                                  |
+                 (FINISH_MIGRATE, _COMPLETED, _STOPPED)
+             Migraton thread schedules cleanup bottom half and exits
+
+Live migration resume path
+--------------------------
+
+::
+
+              Incoming migration calls .load_setup for each device
+                       (RESTORE_VM, _ACTIVE, _STOPPED)
+                                 |
+       For each device, .load_state is called for that device section data
+                       (RESTORE_VM, _ACTIVE, _RESUMING)
+                                 |
+    At the end, .load_cleanup is called for each device and vCPUs are started
+                       (RUNNING, _NONE, _RUNNING)
+
+Postcopy
+========
+
+Postcopy migration is currently not supported for VFIO devices.
diff --git a/docs/devel/writing-monitor-commands.rst b/docs/devel/writing-monitor-commands.rst
new file mode 100644
index 00000000000..1693822f8f9
--- /dev/null
+++ b/docs/devel/writing-monitor-commands.rst
@@ -0,0 +1,751 @@
+How to write monitor commands
+=============================
+
+This document is a step-by-step guide on how to write new QMP commands using
+the QAPI framework and HMP commands.
+
+This document doesn't discuss QMP protocol level details, nor does it dive
+into the QAPI framework implementation.
+
+For an in-depth introduction to the QAPI framework, please refer to
+docs/devel/qapi-code-gen.txt. For documentation about the QMP protocol,
+start with docs/interop/qmp-intro.txt.
+
+New commands may be implemented in QMP only.  New HMP commands should be
+implemented on top of QMP.  The typical HMP command wraps around an
+equivalent QMP command, but HMP convenience commands built from QMP
+building blocks are also fine.  The long term goal is to make all
+existing HMP commands conform to this, to fully isolate HMP from the
+internals of QEMU. Refer to the `Writing a debugging aid returning
+unstructured text`_ section for further guidance on commands that
+would have traditionally been HMP only.
+
+Overview
+--------
+
+Generally speaking, the following steps should be taken in order to write a
+new QMP command.
+
+1. Define the command and any types it needs in the appropriate QAPI
+   schema module.
+
+2. Write the QMP command itself, which is a regular C function. Preferably,
+   the command should be exported by some QEMU subsystem. But it can also be
+   added to the monitor/qmp-cmds.c file
+
+3. At this point the command can be tested under the QMP protocol
+
+4. Write the HMP command equivalent. This is not required and should only be
+   done if it does make sense to have the functionality in HMP. The HMP command
+   is implemented in terms of the QMP command
+
+The following sections will demonstrate each of the steps above. We will start
+very simple and get more complex as we progress.
+
+
+Testing
+-------
+
+For all the examples in the next sections, the test setup is the same and is
+shown here.
+
+First, QEMU should be started like this::
+
+ # qemu-system-TARGET [...] \
+     -chardev socket,id=qmp,port=4444,host=localhost,server=on \
+     -mon chardev=qmp,mode=control,pretty=on
+
+Then, in a different terminal::
+
+ $ telnet localhost 4444
+ Trying 127.0.0.1...
+ Connected to localhost.
+ Escape character is '^]'.
+ {
+     "QMP": {
+         "version": {
+             "qemu": {
+                 "micro": 50,
+                 "minor": 15,
+                 "major": 0
+             },
+             "package": ""
+         },
+         "capabilities": [
+         ]
+     }
+ }
+
+The above output is the QMP server saying you're connected. The server is
+actually in capabilities negotiation mode. To enter in command mode type::
+
+ { "execute": "qmp_capabilities" }
+
+Then the server should respond::
+
+ {
+     "return": {
+     }
+ }
+
+Which is QMP's way of saying "the latest command executed OK and didn't return
+any data". Now you're ready to enter the QMP example commands as explained in
+the following sections.
+
+
+Writing a simple command: hello-world
+-------------------------------------
+
+That's the most simple QMP command that can be written. Usually, this kind of
+command carries some meaningful action in QEMU but here it will just print
+"Hello, world" to the standard output.
+
+Our command will be called "hello-world". It takes no arguments, nor does it
+return any data.
+
+The first step is defining the command in the appropriate QAPI schema
+module.  We pick module qapi/misc.json, and add the following line at
+the bottom::
+
+ { 'command': 'hello-world' }
+
+The "command" keyword defines a new QMP command. It's an JSON object. All
+schema entries are JSON objects. The line above will instruct the QAPI to
+generate any prototypes and the necessary code to marshal and unmarshal
+protocol data.
+
+The next step is to write the "hello-world" implementation. As explained
+earlier, it's preferable for commands to live in QEMU subsystems. But
+"hello-world" doesn't pertain to any, so we put its implementation in
+monitor/qmp-cmds.c::
+
+ void qmp_hello_world(Error **errp)
+ {
+     printf("Hello, world!\n");
+ }
+
+There are a few things to be noticed:
+
+1. QMP command implementation functions must be prefixed with "qmp\_"
+2. qmp_hello_world() returns void, this is in accordance with the fact that the
+   command doesn't return any data
+3. It takes an "Error \*\*" argument. This is required. Later we will see how to
+   return errors and take additional arguments. The Error argument should not
+   be touched if the command doesn't return errors
+4. We won't add the function's prototype. That's automatically done by the QAPI
+5. Printing to the terminal is discouraged for QMP commands, we do it here
+   because it's the easiest way to demonstrate a QMP command
+
+You're done. Now build qemu, run it as suggested in the "Testing" section,
+and then type the following QMP command::
+
+ { "execute": "hello-world" }
+
+Then check the terminal running qemu and look for the "Hello, world" string. If
+you don't see it then something went wrong.
+
+
+Arguments
+~~~~~~~~~
+
+Let's add an argument called "message" to our "hello-world" command. The new
+argument will contain the string to be printed to stdout. It's an optional
+argument, if it's not present we print our default "Hello, World" string.
+
+The first change we have to do is to modify the command specification in the
+schema file to the following::
+
+ { 'command': 'hello-world', 'data': { '*message': 'str' } }
+
+Notice the new 'data' member in the schema. It's an JSON object whose each
+element is an argument to the command in question. Also notice the asterisk,
+it's used to mark the argument optional (that means that you shouldn't use it
+for mandatory arguments). Finally, 'str' is the argument's type, which
+stands for "string". The QAPI also supports integers, booleans, enumerations
+and user defined types.
+
+Now, let's update our C implementation in monitor/qmp-cmds.c::
+
+ void qmp_hello_world(bool has_message, const char *message, Error **errp)
+ {
+     if (has_message) {
+         printf("%s\n", message);
+     } else {
+         printf("Hello, world\n");
+     }
+ }
+
+There are two important details to be noticed:
+
+1. All optional arguments are accompanied by a 'has\_' boolean, which is set
+   if the optional argument is present or false otherwise
+2. The C implementation signature must follow the schema's argument ordering,
+   which is defined by the "data" member
+
+Time to test our new version of the "hello-world" command. Build qemu, run it as
+described in the "Testing" section and then send two commands::
+
+ { "execute": "hello-world" }
+ {
+     "return": {
+     }
+ }
+
+ { "execute": "hello-world", "arguments": { "message": "We love qemu" } }
+ {
+     "return": {
+     }
+ }
+
+You should see "Hello, world" and "We love qemu" in the terminal running qemu,
+if you don't see these strings, then something went wrong.
+
+
+Errors
+~~~~~~
+
+QMP commands should use the error interface exported by the error.h header
+file. Basically, most errors are set by calling the error_setg() function.
+
+Let's say we don't accept the string "message" to contain the word "love". If
+it does contain it, we want the "hello-world" command to return an error::
+
+ void qmp_hello_world(bool has_message, const char *message, Error **errp)
+ {
+     if (has_message) {
+         if (strstr(message, "love")) {
+             error_setg(errp, "the word 'love' is not allowed");
+             return;
+         }
+         printf("%s\n", message);
+     } else {
+         printf("Hello, world\n");
+     }
+ }
+
+The first argument to the error_setg() function is the Error pointer
+to pointer, which is passed to all QMP functions. The next argument is a human
+description of the error, this is a free-form printf-like string.
+
+Let's test the example above. Build qemu, run it as defined in the "Testing"
+section, and then issue the following command::
+
+ { "execute": "hello-world", "arguments": { "message": "all you need is love" } }
+
+The QMP server's response should be::
+
+ {
+     "error": {
+         "class": "GenericError",
+         "desc": "the word 'love' is not allowed"
+     }
+ }
+
+Note that error_setg() produces a "GenericError" class.  In general,
+all QMP errors should have that error class.  There are two exceptions
+to this rule:
+
+ 1. To support a management application's need to recognize a specific
+    error for special handling
+
+ 2. Backward compatibility
+
+If the failure you want to report falls into one of the two cases above,
+use error_set() with a second argument of an ErrorClass value.
+
+
+Command Documentation
+~~~~~~~~~~~~~~~~~~~~~
+
+There's only one step missing to make "hello-world"'s implementation complete,
+and that's its documentation in the schema file.
+
+There are many examples of such documentation in the schema file already, but
+here goes "hello-world"'s new entry for qapi/misc.json::
+
+ ##
+ # @hello-world:
+ #
+ # Print a client provided string to the standard output stream.
+ #
+ # @message: string to be printed
+ #
+ # Returns: Nothing on success.
+ #
+ # Notes: if @message is not provided, the "Hello, world" string will
+ #        be printed instead
+ #
+ # Since: <next qemu stable release, eg. 1.0>
+ ##
+ { 'command': 'hello-world', 'data': { '*message': 'str' } }
+
+Please, note that the "Returns" clause is optional if a command doesn't return
+any data nor any errors.
+
+
+Implementing the HMP command
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Now that the QMP command is in place, we can also make it available in the human
+monitor (HMP).
+
+With the introduction of the QAPI, HMP commands make QMP calls. Most of the
+time HMP commands are simple wrappers. All HMP commands implementation exist in
+the monitor/hmp-cmds.c file.
+
+Here's the implementation of the "hello-world" HMP command::
+
+ void hmp_hello_world(Monitor *mon, const QDict *qdict)
+ {
+     const char *message = qdict_get_try_str(qdict, "message");
+     Error *err = NULL;
+
+     qmp_hello_world(!!message, message, &err);
+     if (hmp_handle_error(mon, err)) {
+         return;
+     }
+ }
+
+Also, you have to add the function's prototype to the hmp.h file.
+
+There are three important points to be noticed:
+
+1. The "mon" and "qdict" arguments are mandatory for all HMP functions. The
+   former is the monitor object. The latter is how the monitor passes
+   arguments entered by the user to the command implementation
+2. hmp_hello_world() performs error checking. In this example we just call
+   hmp_handle_error() which prints a message to the user, but we could do
+   more, like taking different actions depending on the error
+   qmp_hello_world() returns
+3. The "err" variable must be initialized to NULL before performing the
+   QMP call
+
+There's one last step to actually make the command available to monitor users,
+we should add it to the hmp-commands.hx file::
+
+    {
+        .name       = "hello-world",
+        .args_type  = "message:s?",
+        .params     = "hello-world [message]",
+        .help       = "Print message to the standard output",
+        .cmd        = hmp_hello_world,
+    },
+
+::
+
+ STEXI
+ @item hello_world @var{message}
+ @findex hello_world
+ Print message to the standard output
+ ETEXI
+
+To test this you have to open a user monitor and issue the "hello-world"
+command. It might be instructive to check the command's documentation with
+HMP's "help" command.
+
+Please, check the "-monitor" command-line option to know how to open a user
+monitor.
+
+
+Writing more complex commands
+-----------------------------
+
+A QMP command is capable of returning any data the QAPI supports like integers,
+strings, booleans, enumerations and user defined types.
+
+In this section we will focus on user defined types. Please, check the QAPI
+documentation for information about the other types.
+
+
+Modelling data in QAPI
+~~~~~~~~~~~~~~~~~~~~~~
+
+For a QMP command that to be considered stable and supported long term,
+there is a requirement returned data should be explicitly modelled
+using fine-grained QAPI types. As a general guide, a caller of the QMP
+command should never need to parse individual returned data fields. If
+a field appears to need parsing, then it should be split into separate
+fields corresponding to each distinct data item. This should be the
+common case for any new QMP command that is intended to be used by
+machines, as opposed to exclusively human operators.
+
+Some QMP commands, however, are only intended as ad hoc debugging aids
+for human operators. While they may return large amounts of formatted
+data, it is not expected that machines will need to parse the result.
+The overhead of defining a fine grained QAPI type for the data may not
+be justified by the potential benefit. In such cases, it is permitted
+to have a command return a simple string that contains formatted data,
+however, it is mandatory for the command to use the 'x-' name prefix.
+This indicates that the command is not guaranteed to be long term
+stable / liable to change in future and is not following QAPI design
+best practices. An example where this approach is taken is the QMP
+command "x-query-registers". This returns a formatted dump of the
+architecture specific CPU state. The way the data is formatted varies
+across QEMU targets, is liable to change over time, and is only
+intended to be consumed as an opaque string by machines. Refer to the
+`Writing a debugging aid returning unstructured text`_ section for
+an illustration.
+
+User Defined Types
+~~~~~~~~~~~~~~~~~~
+
+FIXME This example needs to be redone after commit 6d32717
+
+For this example we will write the query-alarm-clock command, which returns
+information about QEMU's timer alarm. For more information about it, please
+check the "-clock" command-line option.
+
+We want to return two pieces of information. The first one is the alarm clock's
+name. The second one is when the next alarm will fire. The former information is
+returned as a string, the latter is an integer in nanoseconds (which is not
+very useful in practice, as the timer has probably already fired when the
+information reaches the client).
+
+The best way to return that data is to create a new QAPI type, as shown below::
+
+ ##
+ # @QemuAlarmClock
+ #
+ # QEMU alarm clock information.
+ #
+ # @clock-name: The alarm clock method's name.
+ #
+ # @next-deadline: The time (in nanoseconds) the next alarm will fire.
+ #
+ # Since: 1.0
+ ##
+ { 'type': 'QemuAlarmClock',
+   'data': { 'clock-name': 'str', '*next-deadline': 'int' } }
+
+The "type" keyword defines a new QAPI type. Its "data" member contains the
+type's members. In this example our members are the "clock-name" and the
+"next-deadline" one, which is optional.
+
+Now let's define the query-alarm-clock command::
+
+ ##
+ # @query-alarm-clock
+ #
+ # Return information about QEMU's alarm clock.
+ #
+ # Returns a @QemuAlarmClock instance describing the alarm clock method
+ # being currently used by QEMU (this is usually set by the '-clock'
+ # command-line option).
+ #
+ # Since: 1.0
+ ##
+ { 'command': 'query-alarm-clock', 'returns': 'QemuAlarmClock' }
+
+Notice the "returns" keyword. As its name suggests, it's used to define the
+data returned by a command.
+
+It's time to implement the qmp_query_alarm_clock() function, you can put it
+in the qemu-timer.c file::
+
+ QemuAlarmClock *qmp_query_alarm_clock(Error **errp)
+ {
+     QemuAlarmClock *clock;
+     int64_t deadline;
+
+     clock = g_malloc0(sizeof(*clock));
+
+     deadline = qemu_next_alarm_deadline();
+     if (deadline > 0) {
+         clock->has_next_deadline = true;
+         clock->next_deadline = deadline;
+     }
+     clock->clock_name = g_strdup(alarm_timer->name);
+
+     return clock;
+ }
+
+There are a number of things to be noticed:
+
+1. The QemuAlarmClock type is automatically generated by the QAPI framework,
+   its members correspond to the type's specification in the schema file
+2. As specified in the schema file, the function returns a QemuAlarmClock
+   instance and takes no arguments (besides the "errp" one, which is mandatory
+   for all QMP functions)
+3. The "clock" variable (which will point to our QAPI type instance) is
+   allocated by the regular g_malloc0() function. Note that we chose to
+   initialize the memory to zero. This is recommended for all QAPI types, as
+   it helps avoiding bad surprises (specially with booleans)
+4. Remember that "next_deadline" is optional? All optional members have a
+   'has_TYPE_NAME' member that should be properly set by the implementation,
+   as shown above
+5. Even static strings, such as "alarm_timer->name", should be dynamically
+   allocated by the implementation. This is so because the QAPI also generates
+   a function to free its types and it cannot distinguish between dynamically
+   or statically allocated strings
+6. You have to include "qapi/qapi-commands-misc.h" in qemu-timer.c
+
+Time to test the new command. Build qemu, run it as described in the "Testing"
+section and try this::
+
+ { "execute": "query-alarm-clock" }
+ {
+     "return": {
+         "next-deadline": 2368219,
+         "clock-name": "dynticks"
+     }
+ }
+
+
+The HMP command
+~~~~~~~~~~~~~~~
+
+Here's the HMP counterpart of the query-alarm-clock command::
+
+ void hmp_info_alarm_clock(Monitor *mon)
+ {
+     QemuAlarmClock *clock;
+     Error *err = NULL;
+
+     clock = qmp_query_alarm_clock(&err);
+     if (hmp_handle_error(mon, err)) {
+         return;
+     }
+
+     monitor_printf(mon, "Alarm clock method in use: '%s'\n", clock->clock_name);
+     if (clock->has_next_deadline) {
+         monitor_printf(mon, "Next alarm will fire in %" PRId64 " nanoseconds\n",
+                        clock->next_deadline);
+     }
+
+    qapi_free_QemuAlarmClock(clock);
+ }
+
+It's important to notice that hmp_info_alarm_clock() calls
+qapi_free_QemuAlarmClock() to free the data returned by qmp_query_alarm_clock().
+For user defined types, the QAPI will generate a qapi_free_QAPI_TYPE_NAME()
+function and that's what you have to use to free the types you define and
+qapi_free_QAPI_TYPE_NAMEList() for list types (explained in the next section).
+If the QMP call returns a string, then you should g_free() to free it.
+
+Also note that hmp_info_alarm_clock() performs error handling. That's not
+strictly required if you're sure the QMP function doesn't return errors, but
+it's good practice to always check for errors.
+
+Another important detail is that HMP's "info" commands don't go into the
+hmp-commands.hx. Instead, they go into the info_cmds[] table, which is defined
+in the monitor/misc.c file. The entry for the "info alarmclock" follows::
+
+    {
+        .name       = "alarmclock",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show information about the alarm clock",
+        .cmd        = hmp_info_alarm_clock,
+    },
+
+To test this, run qemu and type "info alarmclock" in the user monitor.
+
+
+Returning Lists
+~~~~~~~~~~~~~~~
+
+For this example, we're going to return all available methods for the timer
+alarm, which is pretty much what the command-line option "-clock ?" does,
+except that we're also going to inform which method is in use.
+
+This first step is to define a new type::
+
+ ##
+ # @TimerAlarmMethod
+ #
+ # Timer alarm method information.
+ #
+ # @method-name: The method's name.
+ #
+ # @current: true if this alarm method is currently in use, false otherwise
+ #
+ # Since: 1.0
+ ##
+ { 'type': 'TimerAlarmMethod',
+   'data': { 'method-name': 'str', 'current': 'bool' } }
+
+The command will be called "query-alarm-methods", here is its schema
+specification::
+
+ ##
+ # @query-alarm-methods
+ #
+ # Returns information about available alarm methods.
+ #
+ # Returns: a list of @TimerAlarmMethod for each method
+ #
+ # Since: 1.0
+ ##
+ { 'command': 'query-alarm-methods', 'returns': ['TimerAlarmMethod'] }
+
+Notice the syntax for returning lists "'returns': ['TimerAlarmMethod']", this
+should be read as "returns a list of TimerAlarmMethod instances".
+
+The C implementation follows::
+
+ TimerAlarmMethodList *qmp_query_alarm_methods(Error **errp)
+ {
+     TimerAlarmMethodList *method_list = NULL;
+     const struct qemu_alarm_timer *p;
+     bool current = true;
+
+     for (p = alarm_timers; p->name; p++) {
+         TimerAlarmMethod *value = g_malloc0(*value);
+         value->method_name = g_strdup(p->name);
+         value->current = current;
+         QAPI_LIST_PREPEND(method_list, value);
+         current = false;
+     }
+
+     return method_list;
+ }
+
+The most important difference from the previous examples is the
+TimerAlarmMethodList type, which is automatically generated by the QAPI from
+the TimerAlarmMethod type.
+
+Each list node is represented by a TimerAlarmMethodList instance. We have to
+allocate it, and that's done inside the for loop: the "info" pointer points to
+an allocated node. We also have to allocate the node's contents, which is
+stored in its "value" member. In our example, the "value" member is a pointer
+to an TimerAlarmMethod instance.
+
+Notice that the "current" variable is used as "true" only in the first
+iteration of the loop. That's because the alarm timer method in use is the
+first element of the alarm_timers array. Also notice that QAPI lists are handled
+by hand and we return the head of the list.
+
+Now Build qemu, run it as explained in the "Testing" section and try our new
+command::
+
+ { "execute": "query-alarm-methods" }
+ {
+     "return": [
+         {
+             "current": false,
+             "method-name": "unix"
+         },
+         {
+             "current": true,
+             "method-name": "dynticks"
+         }
+     ]
+ }
+
+The HMP counterpart is a bit more complex than previous examples because it
+has to traverse the list, it's shown below for reference::
+
+ void hmp_info_alarm_methods(Monitor *mon)
+ {
+     TimerAlarmMethodList *method_list, *method;
+     Error *err = NULL;
+
+     method_list = qmp_query_alarm_methods(&err);
+     if (hmp_handle_error(mon, err)) {
+         return;
+     }
+
+     for (method = method_list; method; method = method->next) {
+         monitor_printf(mon, "%c %s\n", method->value->current ? '*' : ' ',
+                                        method->value->method_name);
+     }
+
+     qapi_free_TimerAlarmMethodList(method_list);
+ }
+
+Writing a debugging aid returning unstructured text
+---------------------------------------------------
+
+As discussed in section `Modelling data in QAPI`_, it is required that
+commands expecting machine usage be using fine-grained QAPI data types.
+The exception to this rule applies when the command is solely intended
+as a debugging aid and allows for returning unstructured text. This is
+commonly needed for query commands that report aspects of QEMU's
+internal state that are useful to human operators.
+
+In this example we will consider a simplified variant of the HMP
+command ``info roms``. Following the earlier rules, this command will
+need to live under the ``x-`` name prefix, so its QMP implementation
+will be called ``x-query-roms``. It will have no parameters and will
+return a single text string::
+
+ { 'struct': 'HumanReadableText',
+   'data': { 'human-readable-text': 'str' } }
+
+ { 'command': 'x-query-roms',
+   'returns': 'HumanReadableText' }
+
+The ``HumanReadableText`` struct is intended to be used for all
+commands, under the ``x-`` name prefix that are returning unstructured
+text targeted at humans. It should never be used for commands outside
+the ``x-`` name prefix, as those should be using structured QAPI types.
+
+Implementing the QMP command
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The QMP implementation will typically involve creating a ``GString``
+object and printing formatted data into it::
+
+ HumanReadableText *qmp_x_query_roms(Error **errp)
+ {
+     g_autoptr(GString) buf = g_string_new("");
+     Rom *rom;
+
+     QTAILQ_FOREACH(rom, &roms, next) {
+        g_string_append_printf("%s size=0x%06zx name=\"%s\"\n",
+                               memory_region_name(rom->mr),
+                               rom->romsize,
+                               rom->name);
+     }
+
+     return human_readable_text_from_str(buf);
+ }
+
+
+Implementing the HMP command
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Now that the QMP command is in place, we can also make it available in
+the human monitor (HMP) as shown in previous examples. The HMP
+implementations will all look fairly similar, as all they need do is
+invoke the QMP command and then print the resulting text or error
+message. Here's the implementation of the "info roms" HMP command::
+
+ void hmp_info_roms(Monitor *mon, const QDict *qdict)
+ {
+     Error err = NULL;
+     g_autoptr(HumanReadableText) info = qmp_x_query_roms(&err);
+
+     if (hmp_handle_error(mon, err)) {
+         return;
+     }
+     monitor_printf(mon, "%s", info->human_readable_text);
+ }
+
+Also, you have to add the function's prototype to the hmp.h file.
+
+There's one last step to actually make the command available to
+monitor users, we should add it to the hmp-commands-info.hx file::
+
+    {
+        .name       = "roms",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show roms",
+        .cmd        = hmp_info_roms,
+    },
+
+The case of writing a HMP info handler that calls a no-parameter QMP query
+command is quite common. To simplify the implementation there is a general
+purpose HMP info handler for this scenario. All that is required to expose
+a no-parameter QMP query command via HMP is to declare it using the
+'.cmd_info_hrt' field to point to the QMP handler, and leave the '.cmd'
+field NULL::
+
+    {
+        .name         = "roms",
+        .args_type    = "",
+        .params       = "",
+        .help         = "show roms",
+        .cmd_info_hrt = qmp_x_query_roms,
+    },
diff --git a/docs/devel/writing-qmp-commands.txt b/docs/devel/writing-qmp-commands.txt
deleted file mode 100644
index b1e31d56c0f..00000000000
--- a/docs/devel/writing-qmp-commands.txt
+++ /dev/null
@@ -1,597 +0,0 @@
-= How to write QMP commands using the QAPI framework =
-
-This document is a step-by-step guide on how to write new QMP commands using
-the QAPI framework. It also shows how to implement new style HMP commands.
-
-This document doesn't discuss QMP protocol level details, nor does it dive
-into the QAPI framework implementation.
-
-For an in-depth introduction to the QAPI framework, please refer to
-docs/devel/qapi-code-gen.txt. For documentation about the QMP protocol,
-start with docs/interop/qmp-intro.txt.
-
-== Overview ==
-
-Generally speaking, the following steps should be taken in order to write a
-new QMP command.
-
-1. Define the command and any types it needs in the appropriate QAPI
-   schema module.
-
-2. Write the QMP command itself, which is a regular C function. Preferably,
-   the command should be exported by some QEMU subsystem. But it can also be
-   added to the monitor/qmp-cmds.c file
-
-3. At this point the command can be tested under the QMP protocol
-
-4. Write the HMP command equivalent. This is not required and should only be
-   done if it does make sense to have the functionality in HMP. The HMP command
-   is implemented in terms of the QMP command
-
-The following sections will demonstrate each of the steps above. We will start
-very simple and get more complex as we progress.
-
-=== Testing ===
-
-For all the examples in the next sections, the test setup is the same and is
-shown here.
-
-First, QEMU should be started like this:
-
-# qemu-system-TARGET [...] \
-    -chardev socket,id=qmp,port=4444,host=localhost,server=on \
-    -mon chardev=qmp,mode=control,pretty=on
-
-Then, in a different terminal:
-
-$ telnet localhost 4444
-Trying 127.0.0.1...
-Connected to localhost.
-Escape character is '^]'.
-{
-    "QMP": {
-        "version": {
-            "qemu": {
-                "micro": 50, 
-                "minor": 15, 
-                "major": 0
-            }, 
-            "package": ""
-        }, 
-        "capabilities": [
-        ]
-    }
-}
-
-The above output is the QMP server saying you're connected. The server is
-actually in capabilities negotiation mode. To enter in command mode type:
-
-{ "execute": "qmp_capabilities" }
-
-Then the server should respond:
-
-{
-    "return": {
-    }
-}
-
-Which is QMP's way of saying "the latest command executed OK and didn't return
-any data". Now you're ready to enter the QMP example commands as explained in
-the following sections.
-
-== Writing a command that doesn't return data ==
-
-That's the most simple QMP command that can be written. Usually, this kind of
-command carries some meaningful action in QEMU but here it will just print
-"Hello, world" to the standard output.
-
-Our command will be called "hello-world". It takes no arguments, nor does it
-return any data.
-
-The first step is defining the command in the appropriate QAPI schema
-module.  We pick module qapi/misc.json, and add the following line at
-the bottom:
-
-{ 'command': 'hello-world' }
-
-The "command" keyword defines a new QMP command. It's an JSON object. All
-schema entries are JSON objects. The line above will instruct the QAPI to
-generate any prototypes and the necessary code to marshal and unmarshal
-protocol data.
-
-The next step is to write the "hello-world" implementation. As explained
-earlier, it's preferable for commands to live in QEMU subsystems. But
-"hello-world" doesn't pertain to any, so we put its implementation in
-monitor/qmp-cmds.c:
-
-void qmp_hello_world(Error **errp)
-{
-    printf("Hello, world!\n");
-}
-
-There are a few things to be noticed:
-
-1. QMP command implementation functions must be prefixed with "qmp_"
-2. qmp_hello_world() returns void, this is in accordance with the fact that the
-   command doesn't return any data
-3. It takes an "Error **" argument. This is required. Later we will see how to
-   return errors and take additional arguments. The Error argument should not
-   be touched if the command doesn't return errors
-4. We won't add the function's prototype. That's automatically done by the QAPI
-5. Printing to the terminal is discouraged for QMP commands, we do it here
-   because it's the easiest way to demonstrate a QMP command
-
-You're done. Now build qemu, run it as suggested in the "Testing" section,
-and then type the following QMP command:
-
-{ "execute": "hello-world" }
-
-Then check the terminal running qemu and look for the "Hello, world" string. If
-you don't see it then something went wrong.
-
-=== Arguments ===
-
-Let's add an argument called "message" to our "hello-world" command. The new
-argument will contain the string to be printed to stdout. It's an optional
-argument, if it's not present we print our default "Hello, World" string.
-
-The first change we have to do is to modify the command specification in the
-schema file to the following:
-
-{ 'command': 'hello-world', 'data': { '*message': 'str' } }
-
-Notice the new 'data' member in the schema. It's an JSON object whose each
-element is an argument to the command in question. Also notice the asterisk,
-it's used to mark the argument optional (that means that you shouldn't use it
-for mandatory arguments). Finally, 'str' is the argument's type, which
-stands for "string". The QAPI also supports integers, booleans, enumerations
-and user defined types.
-
-Now, let's update our C implementation in monitor/qmp-cmds.c:
-
-void qmp_hello_world(bool has_message, const char *message, Error **errp)
-{
-    if (has_message) {
-        printf("%s\n", message);
-    } else {
-        printf("Hello, world\n");
-    }
-}
-
-There are two important details to be noticed:
-
-1. All optional arguments are accompanied by a 'has_' boolean, which is set
-   if the optional argument is present or false otherwise
-2. The C implementation signature must follow the schema's argument ordering,
-   which is defined by the "data" member
-
-Time to test our new version of the "hello-world" command. Build qemu, run it as
-described in the "Testing" section and then send two commands:
-
-{ "execute": "hello-world" }
-{
-    "return": {
-    }
-}
-
-{ "execute": "hello-world", "arguments": { "message": "We love qemu" } }
-{
-    "return": {
-    }
-}
-
-You should see "Hello, world" and "We love qemu" in the terminal running qemu,
-if you don't see these strings, then something went wrong.
-
-=== Errors ===
-
-QMP commands should use the error interface exported by the error.h header
-file. Basically, most errors are set by calling the error_setg() function.
-
-Let's say we don't accept the string "message" to contain the word "love". If
-it does contain it, we want the "hello-world" command to return an error:
-
-void qmp_hello_world(bool has_message, const char *message, Error **errp)
-{
-    if (has_message) {
-        if (strstr(message, "love")) {
-            error_setg(errp, "the word 'love' is not allowed");
-            return;
-        }
-        printf("%s\n", message);
-    } else {
-        printf("Hello, world\n");
-    }
-}
-
-The first argument to the error_setg() function is the Error pointer
-to pointer, which is passed to all QMP functions. The next argument is a human
-description of the error, this is a free-form printf-like string.
-
-Let's test the example above. Build qemu, run it as defined in the "Testing"
-section, and then issue the following command:
-
-{ "execute": "hello-world", "arguments": { "message": "all you need is love" } }
-
-The QMP server's response should be:
-
-{
-    "error": {
-        "class": "GenericError",
-        "desc": "the word 'love' is not allowed"
-    }
-}
-
-Note that error_setg() produces a "GenericError" class.  In general,
-all QMP errors should have that error class.  There are two exceptions
-to this rule:
-
- 1. To support a management application's need to recognize a specific
-    error for special handling
-
- 2. Backward compatibility
-
-If the failure you want to report falls into one of the two cases above,
-use error_set() with a second argument of an ErrorClass value.
-
-=== Command Documentation ===
-
-There's only one step missing to make "hello-world"'s implementation complete,
-and that's its documentation in the schema file.
-
-There are many examples of such documentation in the schema file already, but
-here goes "hello-world"'s new entry for qapi/misc.json:
-
-##
-# @hello-world:
-#
-# Print a client provided string to the standard output stream.
-#
-# @message: string to be printed
-#
-# Returns: Nothing on success.
-#
-# Notes: if @message is not provided, the "Hello, world" string will
-#        be printed instead
-#
-# Since: <next qemu stable release, eg. 1.0>
-##
-{ 'command': 'hello-world', 'data': { '*message': 'str' } }
-
-Please, note that the "Returns" clause is optional if a command doesn't return
-any data nor any errors.
-
-=== Implementing the HMP command ===
-
-Now that the QMP command is in place, we can also make it available in the human
-monitor (HMP).
-
-With the introduction of the QAPI, HMP commands make QMP calls. Most of the
-time HMP commands are simple wrappers. All HMP commands implementation exist in
-the monitor/hmp-cmds.c file.
-
-Here's the implementation of the "hello-world" HMP command:
-
-void hmp_hello_world(Monitor *mon, const QDict *qdict)
-{
-    const char *message = qdict_get_try_str(qdict, "message");
-    Error *err = NULL;
-
-    qmp_hello_world(!!message, message, &err);
-    if (err) {
-        monitor_printf(mon, "%s\n", error_get_pretty(err));
-        error_free(err);
-        return;
-    }
-}
-
-Also, you have to add the function's prototype to the hmp.h file.
-
-There are three important points to be noticed:
-
-1. The "mon" and "qdict" arguments are mandatory for all HMP functions. The
-   former is the monitor object. The latter is how the monitor passes
-   arguments entered by the user to the command implementation
-2. hmp_hello_world() performs error checking. In this example we just print
-   the error description to the user, but we could do more, like taking
-   different actions depending on the error qmp_hello_world() returns
-3. The "err" variable must be initialized to NULL before performing the
-   QMP call
-
-There's one last step to actually make the command available to monitor users,
-we should add it to the hmp-commands.hx file:
-
-    {
-        .name       = "hello-world",
-        .args_type  = "message:s?",
-        .params     = "hello-world [message]",
-        .help       = "Print message to the standard output",
-        .cmd        = hmp_hello_world,
-    },
-
-STEXI
-@item hello_world @var{message}
-@findex hello_world
-Print message to the standard output
-ETEXI
-
-To test this you have to open a user monitor and issue the "hello-world"
-command. It might be instructive to check the command's documentation with
-HMP's "help" command.
-
-Please, check the "-monitor" command-line option to know how to open a user
-monitor.
-
-== Writing a command that returns data ==
-
-A QMP command is capable of returning any data the QAPI supports like integers,
-strings, booleans, enumerations and user defined types.
-
-In this section we will focus on user defined types. Please, check the QAPI
-documentation for information about the other types.
-
-=== User Defined Types ===
-
-FIXME This example needs to be redone after commit 6d32717
-
-For this example we will write the query-alarm-clock command, which returns
-information about QEMU's timer alarm. For more information about it, please
-check the "-clock" command-line option.
-
-We want to return two pieces of information. The first one is the alarm clock's
-name. The second one is when the next alarm will fire. The former information is
-returned as a string, the latter is an integer in nanoseconds (which is not
-very useful in practice, as the timer has probably already fired when the
-information reaches the client).
-
-The best way to return that data is to create a new QAPI type, as shown below:
-
-##
-# @QemuAlarmClock
-#
-# QEMU alarm clock information.
-#
-# @clock-name: The alarm clock method's name.
-#
-# @next-deadline: The time (in nanoseconds) the next alarm will fire.
-#
-# Since: 1.0
-##
-{ 'type': 'QemuAlarmClock',
-  'data': { 'clock-name': 'str', '*next-deadline': 'int' } }
-
-The "type" keyword defines a new QAPI type. Its "data" member contains the
-type's members. In this example our members are the "clock-name" and the
-"next-deadline" one, which is optional.
-
-Now let's define the query-alarm-clock command:
-
-##
-# @query-alarm-clock
-#
-# Return information about QEMU's alarm clock.
-#
-# Returns a @QemuAlarmClock instance describing the alarm clock method
-# being currently used by QEMU (this is usually set by the '-clock'
-# command-line option).
-#
-# Since: 1.0
-##
-{ 'command': 'query-alarm-clock', 'returns': 'QemuAlarmClock' }
-
-Notice the "returns" keyword. As its name suggests, it's used to define the
-data returned by a command.
-
-It's time to implement the qmp_query_alarm_clock() function, you can put it
-in the qemu-timer.c file:
-
-QemuAlarmClock *qmp_query_alarm_clock(Error **errp)
-{
-    QemuAlarmClock *clock;
-    int64_t deadline;
-
-    clock = g_malloc0(sizeof(*clock));
-
-    deadline = qemu_next_alarm_deadline();
-    if (deadline > 0) {
-        clock->has_next_deadline = true;
-        clock->next_deadline = deadline;
-    }
-    clock->clock_name = g_strdup(alarm_timer->name);
-
-    return clock;
-}
-
-There are a number of things to be noticed:
-
-1. The QemuAlarmClock type is automatically generated by the QAPI framework,
-   its members correspond to the type's specification in the schema file
-2. As specified in the schema file, the function returns a QemuAlarmClock
-   instance and takes no arguments (besides the "errp" one, which is mandatory
-   for all QMP functions)
-3. The "clock" variable (which will point to our QAPI type instance) is
-   allocated by the regular g_malloc0() function. Note that we chose to
-   initialize the memory to zero. This is recommended for all QAPI types, as
-   it helps avoiding bad surprises (specially with booleans)
-4. Remember that "next_deadline" is optional? All optional members have a
-   'has_TYPE_NAME' member that should be properly set by the implementation,
-   as shown above
-5. Even static strings, such as "alarm_timer->name", should be dynamically
-   allocated by the implementation. This is so because the QAPI also generates
-   a function to free its types and it cannot distinguish between dynamically
-   or statically allocated strings
-6. You have to include "qapi/qapi-commands-misc.h" in qemu-timer.c
-
-Time to test the new command. Build qemu, run it as described in the "Testing"
-section and try this:
-
-{ "execute": "query-alarm-clock" }
-{
-    "return": {
-        "next-deadline": 2368219,
-        "clock-name": "dynticks"
-    }
-}
-
-==== The HMP command ====
-
-Here's the HMP counterpart of the query-alarm-clock command:
-
-void hmp_info_alarm_clock(Monitor *mon)
-{
-    QemuAlarmClock *clock;
-    Error *err = NULL;
-
-    clock = qmp_query_alarm_clock(&err);
-    if (err) {
-        monitor_printf(mon, "Could not query alarm clock information\n");
-        error_free(err);
-        return;
-    }
-
-    monitor_printf(mon, "Alarm clock method in use: '%s'\n", clock->clock_name);
-    if (clock->has_next_deadline) {
-        monitor_printf(mon, "Next alarm will fire in %" PRId64 " nanoseconds\n",
-                       clock->next_deadline);
-    }
-
-   qapi_free_QemuAlarmClock(clock); 
-}
-
-It's important to notice that hmp_info_alarm_clock() calls
-qapi_free_QemuAlarmClock() to free the data returned by qmp_query_alarm_clock().
-For user defined types, the QAPI will generate a qapi_free_QAPI_TYPE_NAME()
-function and that's what you have to use to free the types you define and
-qapi_free_QAPI_TYPE_NAMEList() for list types (explained in the next section).
-If the QMP call returns a string, then you should g_free() to free it.
-
-Also note that hmp_info_alarm_clock() performs error handling. That's not
-strictly required if you're sure the QMP function doesn't return errors, but
-it's good practice to always check for errors.
-
-Another important detail is that HMP's "info" commands don't go into the
-hmp-commands.hx. Instead, they go into the info_cmds[] table, which is defined
-in the monitor/misc.c file. The entry for the "info alarmclock" follows:
-
-    {
-        .name       = "alarmclock",
-        .args_type  = "",
-        .params     = "",
-        .help       = "show information about the alarm clock",
-        .cmd        = hmp_info_alarm_clock,
-    },
-
-To test this, run qemu and type "info alarmclock" in the user monitor.
-
-=== Returning Lists ===
-
-For this example, we're going to return all available methods for the timer
-alarm, which is pretty much what the command-line option "-clock ?" does,
-except that we're also going to inform which method is in use.
-
-This first step is to define a new type:
-
-##
-# @TimerAlarmMethod
-#
-# Timer alarm method information.
-#
-# @method-name: The method's name.
-#
-# @current: true if this alarm method is currently in use, false otherwise
-#
-# Since: 1.0
-##
-{ 'type': 'TimerAlarmMethod',
-  'data': { 'method-name': 'str', 'current': 'bool' } }
-
-The command will be called "query-alarm-methods", here is its schema
-specification:
-
-##
-# @query-alarm-methods
-#
-# Returns information about available alarm methods.
-#
-# Returns: a list of @TimerAlarmMethod for each method
-#
-# Since: 1.0
-##
-{ 'command': 'query-alarm-methods', 'returns': ['TimerAlarmMethod'] }
-
-Notice the syntax for returning lists "'returns': ['TimerAlarmMethod']", this
-should be read as "returns a list of TimerAlarmMethod instances".
-
-The C implementation follows:
-
-TimerAlarmMethodList *qmp_query_alarm_methods(Error **errp)
-{
-    TimerAlarmMethodList *method_list = NULL;
-    const struct qemu_alarm_timer *p;
-    bool current = true;
-
-    for (p = alarm_timers; p->name; p++) {
-        TimerAlarmMethod *value = g_malloc0(*value);
-        value->method_name = g_strdup(p->name);
-        value->current = current;
-        QAPI_LIST_PREPEND(method_list, value);
-        current = false;
-    }
-
-    return method_list;
-}
-
-The most important difference from the previous examples is the
-TimerAlarmMethodList type, which is automatically generated by the QAPI from
-the TimerAlarmMethod type.
-
-Each list node is represented by a TimerAlarmMethodList instance. We have to
-allocate it, and that's done inside the for loop: the "info" pointer points to
-an allocated node. We also have to allocate the node's contents, which is
-stored in its "value" member. In our example, the "value" member is a pointer
-to an TimerAlarmMethod instance.
-
-Notice that the "current" variable is used as "true" only in the first
-iteration of the loop. That's because the alarm timer method in use is the
-first element of the alarm_timers array. Also notice that QAPI lists are handled
-by hand and we return the head of the list.
-
-Now Build qemu, run it as explained in the "Testing" section and try our new
-command:
-
-{ "execute": "query-alarm-methods" }
-{
-    "return": [
-        {
-            "current": false, 
-            "method-name": "unix"
-        }, 
-        {
-            "current": true, 
-            "method-name": "dynticks"
-        }
-    ]
-}
-
-The HMP counterpart is a bit more complex than previous examples because it
-has to traverse the list, it's shown below for reference:
-
-void hmp_info_alarm_methods(Monitor *mon)
-{
-    TimerAlarmMethodList *method_list, *method;
-    Error *err = NULL;
-
-    method_list = qmp_query_alarm_methods(&err);
-    if (err) {
-        monitor_printf(mon, "Could not query alarm methods\n");
-        error_free(err);
-        return;
-    }
-
-    for (method = method_list; method; method = method->next) {
-        monitor_printf(mon, "%c %s\n", method->value->current ? '*' : ' ',
-                                       method->value->method_name);
-    }
-
-    qapi_free_TimerAlarmMethodList(method_list);
-}
diff --git a/docs/hyperv.txt b/docs/hyperv.txt
index e53c581f458..0417c183a3b 100644
--- a/docs/hyperv.txt
+++ b/docs/hyperv.txt
@@ -170,7 +170,7 @@ Recommended: hv-frequencies
 3.16. hv-evmcs
 ===============
 The enlightenment is nested specific, it targets Hyper-V on KVM guests. When
-enabled, it provides Enlightened VMCS feature to the guest. The feature
+enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature
 implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V)
 hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only.
 Note: some virtualization features (e.g. Posted Interrupts) are disabled when
@@ -189,7 +189,15 @@ enabled.
 
 Requires: hv-vpindex, hv-synic, hv-time, hv-stimer
 
-3.17. hv-no-nonarch-coresharing=on/off/auto
+3.18. hv-avic (hv-apicv)
+=======================
+The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled.
+Normally, Hyper-V SynIC disables these hardware feature and suggests the guest
+to use paravirtualized AutoEOI feature.
+Note: enabling this feature on old hardware (without APICv/AVIC support) may
+have negative effect on guest's performance.
+
+3.19. hv-no-nonarch-coresharing=on/off/auto
 ===========================================
 This enlightenment tells guest OS that virtual processors will never share a
 physical core unless they are reported as sibling SMT threads. This information
@@ -203,17 +211,45 @@ When the option is set to 'on' QEMU will always enable the feature, regardless
 of host setup. To keep guests secure, this can only be used in conjunction with
 exposing correct vCPU topology and vCPU pinning.
 
-4. Development features
-========================
+3.20. hv-version-id-{build,major,minor,spack,sbranch,snumber}
+=============================================================
+This changes Hyper-V version identification in CPUID 0x40000002.EAX-EDX from the
+default (WS2016).
+- hv-version-id-build sets 'Build Number' (32 bits)
+- hv-version-id-major sets 'Major Version' (16 bits)
+- hv-version-id-minor sets 'Minor Version' (16 bits)
+- hv-version-id-spack sets 'Service Pack' (32 bits)
+- hv-version-id-sbranch sets 'Service Branch' (8 bits)
+- hv-version-id-snumber sets 'Service Number' (24 bits)
+
+Note: hv-version-id-* are not enlightenments and thus don't enable Hyper-V
+identification when specified without any other enlightenments.
+
+4. Supplementary features
+=========================
+
+4.1. hv-passthrough
+===================
 In some cases (e.g. during development) it may make sense to use QEMU in
 'pass-through' mode and give Windows guests all enlightenments currently
 supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU
 flag.
-Note: enabling this flag effectively prevents migration as supported features
-may differ between target and destination.
-
-
-4. Useful links
+Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU
+(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id="
+values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on
+the command line. Also, enabling this flag effectively prevents migration as the
+list of enabled enlightenments may differ between target and destination hosts.
+
+4.2. hv-enforce-cpuid
+=====================
+By default, KVM allows the guest to use all currently supported Hyper-V
+enlightenments when Hyper-V CPUID interface was exposed, regardless of if
+some features were not announced in guest visible CPUIDs. 'hv-enforce-cpuid'
+feature alters this behavior and only allows the guest to use exposed Hyper-V
+enlightenments.
+
+
+5. Useful links
 ================
 Hyper-V Top Level Functional specification and other information:
 https://github.com/MicrosoftDocs/Virtualization-Documentation
diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt
index 3e23ebec331..279cc8c807f 100644
--- a/docs/image-fuzzer.txt
+++ b/docs/image-fuzzer.txt
@@ -51,10 +51,10 @@ assumes that core dumps will be generated in the current working directory.
 For comprehensive test results, please, set up your test environment
 properly.
 
-Paths to binaries under test (SUTs) qemu-img and qemu-io are retrieved from
-environment variables. If the environment check fails the runner will
+Paths to binaries under test (SUTs) ``qemu-img`` and ``qemu-io`` are retrieved
+from environment variables. If the environment check fails the runner will
 use SUTs installed in system paths.
-qemu-img is required for creation of backing files, so it's mandatory to set
+``qemu-img`` is required for creation of backing files, so it's mandatory to set
 the related environment variable if it's not installed in the system path.
 For details about environment variables see qemu-iotests/check.
 
diff --git a/docs/index.rst b/docs/index.rst
index 763e3d0426e..0b9ee9901d9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,6 +3,7 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
+================================
 Welcome to QEMU's documentation!
 ================================
 
@@ -10,6 +11,7 @@ Welcome to QEMU's documentation!
    :maxdepth: 2
    :caption: Contents:
 
+   about/index
    system/index
    user/index
    tools/index
diff --git a/docs/interop/_templates/editpage.html b/docs/interop/_templates/editpage.html
deleted file mode 100644
index 215e5626812..00000000000
--- a/docs/interop/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/interop/barrier.rst b/docs/interop/barrier.rst
new file mode 100644
index 00000000000..055f2c1aef3
--- /dev/null
+++ b/docs/interop/barrier.rst
@@ -0,0 +1,426 @@
+Barrier client protocol
+=======================
+
+QEMU's ``input-barrier`` device implements the client end of
+the KVM (Keyboard-Video-Mouse) software
+`Barrier <https://github.com/debauchee/barrier>`__.
+
+This document briefly describes the protocol as we implement it.
+
+Message format
+--------------
+
+Message format between the server and client is in two parts:
+
+#. the payload length, a 32bit integer in network endianness
+#. the payload
+
+The payload starts with a 4byte string (without NUL) which is the
+command. The first command between the server and the client
+is the only command not encoded on 4 bytes ("Barrier").
+The remaining part of the payload is decoded according to the command.
+
+Protocol Description
+--------------------
+
+This comes from ``barrier/src/lib/barrier/protocol_types.h``.
+
+barrierCmdHello  "Barrier"
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t minor, int16_t major }``
+Description:
+  Say hello to client
+
+  ``minor`` = protocol major version number supported by server
+
+  ``major`` = protocol minor version number supported by server
+
+barrierCmdHelloBack  "Barrier"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  client ->server
+Parameters:
+  ``{ int16_t minor, int16_t major, char *name}``
+Description:
+  Respond to hello from server
+
+  ``minor`` = protocol major version number supported by client
+
+  ``major`` = protocol minor version number supported by client
+
+  ``name``  = client name
+
+barrierCmdDInfo  "DINF"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  client ->server
+Parameters:
+  ``{ int16_t x_origin, int16_t y_origin, int16_t width, int16_t height, int16_t x, int16_t y}``
+Description:
+  The client screen must send this message in response to the
+  barrierCmdQInfo message.  It must also send this message when the
+  screen's resolution changes.  In this case, the client screen should
+  ignore any barrierCmdDMouseMove messages until it receives a
+  barrierCmdCInfoAck in order to prevent attempts to move the mouse off
+  the new screen area.
+
+barrierCmdCNoop  "CNOP"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  client -> server
+Parameters:
+  None
+Description:
+  No operation
+
+barrierCmdCClose "CBYE"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Close connection
+
+barrierCmdCEnter "CINN"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t x, int16_t y, int32_t seq, int16_t modifier }``
+Description:
+  Enter screen.
+
+  ``x``, ``y``  = entering screen absolute coordinates
+
+  ``seq``  = sequence number, which is used to order messages between
+  screens.  the secondary screen must return this number
+  with some messages
+
+  ``modifier`` = modifier key mask.  this will have bits set for each
+  toggle modifier key that is activated on entry to the
+  screen.  the secondary screen should adjust its toggle
+  modifiers to reflect that state.
+
+barrierCmdCLeave "COUT"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Leaving screen.  the secondary screen should send clipboard data in
+  response to this message for those clipboards that it has grabbed
+  (i.e. has sent a barrierCmdCClipboard for and has not received a
+  barrierCmdCClipboard for with a greater sequence number) and that
+  were grabbed or have changed since the last leave.
+
+barrierCmdCClipboard "CCLP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t id, int32_t seq }``
+Description:
+  Grab clipboard. Sent by screen when some other app on that screen
+  grabs a clipboard.
+
+  ``id``  = the clipboard identifier
+
+  ``seq`` = sequence number. Client must use the sequence number passed in
+  the most recent barrierCmdCEnter.  the server always sends 0.
+
+barrierCmdCScreenSaver   "CSEC"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t started }``
+Description:
+  Screensaver change.
+
+  ``started`` = Screensaver on primary has started (1) or closed (0)
+
+barrierCmdCResetOptions  "CROP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Reset options. Client should reset all of its options to their
+  defaults.
+
+barrierCmdCInfoAck   "CIAK"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Resolution change acknowledgment. Sent by server in response to a
+  client screen's barrierCmdDInfo. This is sent for every
+  barrierCmdDInfo, whether or not the server had sent a barrierCmdQInfo.
+
+barrierCmdCKeepAlive "CALV"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Keep connection alive. Sent by the server periodically to verify
+  that connections are still up and running.  clients must reply in
+  kind on receipt.  if the server gets an error sending the message or
+  does not receive a reply within a reasonable time then the server
+  disconnects the client.  if the client doesn't receive these (or any
+  message) periodically then it should disconnect from the server.  the
+  appropriate interval is defined by an option.
+
+barrierCmdDKeyDown   "DKDN"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t keyid, int16_t modifier [,int16_t button] }``
+Description:
+  Key pressed.
+
+  ``keyid`` = X11 key id
+
+  ``modified`` = modified mask
+
+  ``button`` = X11 Xkb keycode (optional)
+
+barrierCmdDKeyRepeat "DKRP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t keyid, int16_t modifier, int16_t repeat [,int16_t button] }``
+Description:
+  Key auto-repeat.
+
+  ``keyid`` = X11 key id
+
+  ``modified`` = modified mask
+
+  ``repeat``   = number of repeats
+
+  ``button``   = X11 Xkb keycode (optional)
+
+barrierCmdDKeyUp "DKUP"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t keyid, int16_t modifier [,int16_t button] }``
+Description:
+  Key released.
+
+  ``keyid`` = X11 key id
+
+  ``modified`` = modified mask
+
+  ``button`` = X11 Xkb keycode (optional)
+
+barrierCmdDMouseDown "DMDN"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t button }``
+Description:
+  Mouse button pressed.
+
+  ``button`` = button id
+
+barrierCmdDMouseUp   "DMUP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t button }``
+Description:
+  Mouse button release.
+
+  ``button`` = button id
+
+barrierCmdDMouseMove "DMMV"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t x, int16_t y }``
+Description:
+  Absolute mouse moved.
+
+  ``x``, ``y`` = absolute screen coordinates
+
+barrierCmdDMouseRelMove  "DMRM"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t x, int16_t y }``
+Description:
+  Relative mouse moved.
+
+  ``x``, ``y`` = r relative screen coordinates
+
+barrierCmdDMouseWheel "DMWM"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t x , int16_t y }`` or ``{ int16_t y }``
+Description:
+  Mouse scroll. The delta should be +120 for one tick forward (away
+  from the user) or right and -120 for one tick backward (toward the
+  user) or left.
+
+  ``x`` = x delta
+
+  ``y`` = y delta
+
+barrierCmdDClipboard "DCLP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t id, int32_t seq, int8_t mark, char *data }``
+Description:
+  Clipboard data.
+
+  ``id``  = clipboard id
+
+  ``seq`` = sequence number. The sequence number is 0 when sent by the
+  server.  Client screens should use the/ sequence number from
+  the most recent barrierCmdCEnter.
+
+barrierCmdDSetOptions "DSOP"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int32 t nb, { int32_t id, int32_t val }[] }``
+Description:
+  Set options. Client should set the given option/value pairs.
+
+  ``nb``  = numbers of ``{ id, val }`` entries
+
+  ``id``  = option id
+
+  ``val`` = option new value
+
+barrierCmdDFileTransfer "DFTR"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int8_t mark, char *content }``
+Description:
+  Transfer file data.
+
+  * ``mark`` = 0 means the content followed is the file size
+  * 1 means the content followed is the chunk data
+  * 2 means the file transfer is finished
+
+barrierCmdDDragInfo  "DDRG"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t nb, char *content }``
+Description:
+  Drag information.
+
+  ``nb``  = number of dragging objects
+
+  ``content`` = object's directory
+
+barrierCmdQInfo  "QINF"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Query screen info
+
+  Client should reply with a barrierCmdDInfo
+
+barrierCmdEIncompatible  "EICV"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  ``{ int16_t nb, major *minor }``
+Description:
+  Incompatible version.
+
+  ``major`` = major version
+
+  ``minor`` = minor version
+
+barrierCmdEBusy  "EBSY"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Name provided when connecting is already in use.
+
+barrierCmdEUnknown   "EUNK"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Unknown client. Name provided when connecting is not in primary's
+  screen configuration map.
+
+barrierCmdEBad   "EBAD"
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Direction:
+  server -> client
+Parameters:
+  None
+Description:
+  Protocol violation. Server should disconnect after sending this
+  message.
+
diff --git a/docs/interop/bitmaps.rst b/docs/interop/bitmaps.rst
index 059ad679294..1de46febdc5 100644
--- a/docs/interop/bitmaps.rst
+++ b/docs/interop/bitmaps.rst
@@ -539,12 +539,11 @@ other partial disk images on top of a base image to reconstruct a full backup
 from the point in time at which the incremental backup was issued.
 
 The "Push Model" here references the fact that QEMU is "pushing" the modified
-blocks out to a destination. We will be using the `drive-backup
-<qemu-qmp-ref.html#index-drive_002dbackup>`_ and `blockdev-backup
-<qemu-qmp-ref.html#index-blockdev_002dbackup>`_ QMP commands to create both
+blocks out to a destination. We will be using the  `blockdev-backup
+<qemu-qmp-ref.html#index-blockdev_002dbackup>`_ QMP command to create both
 full and incremental backups.
 
-Both of these commands are jobs, which have their own QMP API for querying and
+The command is a background job, which has its own QMP API for querying and
 management documented in `Background jobs
 <qemu-qmp-ref.html#Background-jobs>`_.
 
@@ -557,6 +556,10 @@ create a new incremental backup chain attached to a drive.
 This example creates a new, full backup of "drive0" and accompanies it with a
 new, empty bitmap that records writes from this point in time forward.
 
+The target can be created with the help of `blockdev-add
+<qemu-qmp-ref.html#index-blockdev_002dadd>`_ or `blockdev-create
+<qemu-qmp-ref.html#index-blockdev_002dcreate>`_ command.
+
 .. note:: Any new writes that happen after this command is issued, even while
           the backup job runs, will be written locally and not to the backup
           destination. These writes will be recorded in the bitmap
@@ -576,12 +579,11 @@ new, empty bitmap that records writes from this point in time forward.
              }
            },
            {
-             "type": "drive-backup",
+             "type": "blockdev-backup",
              "data": {
                "device": "drive0",
-               "target": "/path/to/drive0.full.qcow2",
-               "sync": "full",
-               "format": "qcow2"
+               "target": "target0",
+               "sync": "full"
              }
            }
          ]
@@ -664,12 +666,11 @@ use a transaction to reset the bitmap while making a new full backup:
            }
          },
          {
-           "type": "drive-backup",
+           "type": "blockdev-backup",
            "data": {
              "device": "drive0",
-             "target": "/path/to/drive0.new_full.qcow2",
-             "sync": "full",
-             "format": "qcow2"
+             "target": "target0",
+             "sync": "full"
            }
          }
        ]
@@ -728,19 +729,35 @@ Example: First Incremental Backup
        $ qemu-img create -f qcow2 drive0.inc0.qcow2 \
          -b drive0.full.qcow2 -F qcow2
 
+#. Add target block node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc0.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Issue an incremental backup command:
 
    .. code-block:: QMP
 
     -> {
-         "execute": "drive-backup",
+         "execute": "blockdev-backup",
          "arguments": {
            "device": "drive0",
            "bitmap": "bitmap0",
-           "target": "drive0.inc0.qcow2",
-           "format": "qcow2",
-           "sync": "incremental",
-           "mode": "existing"
+           "target": "target0",
+           "sync": "incremental"
          }
        }
 
@@ -785,20 +802,36 @@ Example: Second Incremental Backup
        $ qemu-img create -f qcow2 drive0.inc1.qcow2 \
          -b drive0.inc0.qcow2 -F qcow2
 
+#. Add target block node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc1.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Issue a new incremental backup command. The only difference here is that we
    have changed the target image below.
 
    .. code-block:: QMP
 
     -> {
-         "execute": "drive-backup",
+         "execute": "blockdev-backup",
          "arguments": {
            "device": "drive0",
            "bitmap": "bitmap0",
-           "target": "drive0.inc1.qcow2",
-           "format": "qcow2",
-           "sync": "incremental",
-           "mode": "existing"
+           "target": "target0",
+           "sync": "incremental"
          }
        }
 
@@ -866,20 +899,36 @@ image:
              file for you, but you lose control over format options like
              compatibility and preallocation presets.
 
+#. Add target block node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc2.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Issue a new incremental backup command. Apart from the new destination
    image, there is no difference from the last two examples.
 
    .. code-block:: QMP
 
     -> {
-         "execute": "drive-backup",
+         "execute": "blockdev-backup",
          "arguments": {
            "device": "drive0",
            "bitmap": "bitmap0",
-           "target": "drive0.inc2.qcow2",
-           "format": "qcow2",
-           "sync": "incremental",
-           "mode": "existing"
+           "target": "target0",
+           "sync": "incremental"
          }
        }
 
@@ -930,6 +979,38 @@ point in time.
     $ qemu-img create -f qcow2 drive0.full.qcow2 64G
     $ qemu-img create -f qcow2 drive1.full.qcow2 64G
 
+#. Add target block nodes:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.full.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target1",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive1.full.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Create a full (anchor) backup for each drive, with accompanying bitmaps:
 
    .. code-block:: QMP
@@ -953,21 +1034,19 @@ point in time.
                }
              },
              {
-               "type": "drive-backup",
+               "type": "blockdev-backup",
                "data": {
                  "device": "drive0",
-                 "target": "/path/to/drive0.full.qcow2",
-                 "sync": "full",
-                 "format": "qcow2"
+                 "target": "target0",
+                 "sync": "full"
                }
              },
              {
-               "type": "drive-backup",
+               "type": "blockdev-backup",
                "data": {
                  "device": "drive1",
-                 "target": "/path/to/drive1.full.qcow2",
-                 "sync": "full",
-                 "format": "qcow2"
+                 "target": "target1",
+                 "sync": "full"
                }
              }
            ]
@@ -1016,6 +1095,38 @@ point in time.
      $ qemu-img create -f qcow2 drive1.inc0.qcow2 \
        -b drive1.full.qcow2 -F qcow2
 
+#. Add target block nodes:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc0.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target1",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive1.inc0.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Issue a multi-drive incremental push backup transaction:
 
    .. code-block:: QMP
@@ -1025,25 +1136,21 @@ point in time.
          "arguments": {
            "actions": [
              {
-               "type": "drive-backup",
+               "type": "blockev-backup",
                "data": {
                  "device": "drive0",
                  "bitmap": "bitmap0",
-                 "format": "qcow2",
-                 "mode": "existing",
                  "sync": "incremental",
-                 "target": "drive0.inc0.qcow2"
+                 "target": "target0"
                }
              },
              {
-               "type": "drive-backup",
+               "type": "blockdev-backup",
                "data": {
                  "device": "drive1",
                  "bitmap": "bitmap0",
-                 "format": "qcow2",
-                 "mode": "existing",
                  "sync": "incremental",
-                 "target": "drive1.inc0.qcow2"
+                 "target": "target1"
                }
              },
            ]
@@ -1119,19 +1226,35 @@ described above. This example demonstrates the single-job failure case:
        $ qemu-img create -f qcow2 drive0.inc0.qcow2 \
          -b drive0.full.qcow2 -F qcow2
 
+#. Add target block node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc0.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Attempt to create an incremental backup via QMP:
 
    .. code-block:: QMP
 
     -> {
-         "execute": "drive-backup",
+         "execute": "blockdev-backup",
          "arguments": {
            "device": "drive0",
            "bitmap": "bitmap0",
-           "target": "drive0.inc0.qcow2",
-           "format": "qcow2",
-           "sync": "incremental",
-           "mode": "existing"
+           "target": "target0",
+           "sync": "incremental"
          }
        }
 
@@ -1164,6 +1287,19 @@ described above. This example demonstrates the single-job failure case:
          "event": "BLOCK_JOB_COMPLETED"
        }
 
+#. Remove target node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-del",
+         "arguments": {
+           "node-name": "target0",
+         }
+       }
+
+    <- { "return": {} }
+
 #. Delete the failed image, and re-create it.
 
    .. code:: bash
@@ -1172,20 +1308,36 @@ described above. This example demonstrates the single-job failure case:
        $ qemu-img create -f qcow2 drive0.inc0.qcow2 \
          -b drive0.full.qcow2 -F qcow2
 
+#. Add target block node:
+
+   .. code-block:: QMP
+
+    -> {
+         "execute": "blockdev-add",
+         "arguments": {
+           "node-name": "target0",
+           "driver": "qcow2",
+           "file": {
+             "driver": "file",
+             "filename": "drive0.inc0.qcow2"
+           }
+         }
+       }
+
+    <- { "return": {} }
+
 #. Retry the command after fixing the underlying problem, such as
    freeing up space on the backup volume:
 
    .. code-block:: QMP
 
     -> {
-         "execute": "drive-backup",
+         "execute": "blockdev-backup",
          "arguments": {
            "device": "drive0",
            "bitmap": "bitmap0",
-           "target": "drive0.inc0.qcow2",
-           "format": "qcow2",
-           "sync": "incremental",
-           "mode": "existing"
+           "target": "target0",
+           "sync": "incremental"
          }
        }
 
@@ -1210,7 +1362,8 @@ described above. This example demonstrates the single-job failure case:
 Example: Partial Transactional Failures
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-QMP commands like `drive-backup <qemu-qmp-ref.html#index-drive_002dbackup>`_
+QMP commands like `blockdev-backup
+<qemu-qmp-ref.html#index-blockdev_002dbackup>`_
 conceptually only start a job, and so transactions containing these commands
 may succeed even if the job it created later fails. This might have surprising
 interactions with notions of how a "transaction" ought to behave.
@@ -1240,25 +1393,21 @@ and one succeeds:
          "arguments": {
            "actions": [
            {
-             "type": "drive-backup",
+             "type": "blockdev-backup",
              "data": {
                "device": "drive0",
                "bitmap": "bitmap0",
-               "format": "qcow2",
-               "mode": "existing",
                "sync": "incremental",
-               "target": "drive0.inc0.qcow2"
+               "target": "target0"
              }
            },
            {
-             "type": "drive-backup",
+             "type": "blockdev-backup",
              "data": {
                "device": "drive1",
                "bitmap": "bitmap0",
-               "format": "qcow2",
-               "mode": "existing",
                "sync": "incremental",
-               "target": "drive1.inc0.qcow2"
+               "target": "target1"
              }
            }]
          }
@@ -1375,25 +1524,21 @@ applied:
            },
            "actions": [
            {
-             "type": "drive-backup",
+             "type": "blockdev-backup",
              "data": {
                "device": "drive0",
                "bitmap": "bitmap0",
-               "format": "qcow2",
-               "mode": "existing",
                "sync": "incremental",
-               "target": "drive0.inc0.qcow2"
+               "target": "target0"
              }
            },
            {
-             "type": "drive-backup",
+             "type": "blockdev-backup",
              "data": {
                "device": "drive1",
                "bitmap": "bitmap0",
-               "format": "qcow2",
-               "mode": "existing",
                "sync": "incremental",
-               "target": "drive1.inc0.qcow2"
+               "target": "target1"
              }
            }]
          }
diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json
index 9d94ccafa9e..8d8b0be030e 100644
--- a/docs/interop/firmware.json
+++ b/docs/interop/firmware.json
@@ -115,6 +115,12 @@
 #           this feature are documented in
 #           "docs/amd-memory-encryption.txt".
 #
+# @amd-sev-es: The firmware supports running under AMD Secure Encrypted
+#              Virtualization - Encrypted State, as specified in the AMD64
+#              Architecture Programmer's Manual. QEMU command line options
+#              related to this feature are documented in
+#              "docs/amd-memory-encryption.txt".
+#
 # @enrolled-keys: The variable store (NVRAM) template associated with
 #                 the firmware binary has the UEFI Secure Boot
 #                 operational mode turned on, with certificates
@@ -179,7 +185,7 @@
 # Since: 3.0
 ##
 { 'enum' : 'FirmwareFeature',
-  'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'enrolled-keys',
+  'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'enrolled-keys',
              'requires-smm', 'secure-boot', 'verbose-dynamic',
              'verbose-static' ] }
 
@@ -504,6 +510,45 @@
 # }
 #
 # {
+#     "description": "OVMF with SEV-ES support",
+#     "interface-types": [
+#         "uefi"
+#     ],
+#     "mapping": {
+#         "device": "flash",
+#         "executable": {
+#             "filename": "/usr/share/OVMF/OVMF_CODE.fd",
+#             "format": "raw"
+#         },
+#         "nvram-template": {
+#             "filename": "/usr/share/OVMF/OVMF_VARS.fd",
+#             "format": "raw"
+#         }
+#     },
+#     "targets": [
+#         {
+#             "architecture": "x86_64",
+#             "machines": [
+#                 "pc-q35-*"
+#             ]
+#         }
+#     ],
+#     "features": [
+#         "acpi-s3",
+#         "amd-sev",
+#         "amd-sev-es",
+#         "verbose-dynamic"
+#     ],
+#     "tags": [
+#         "-a X64",
+#         "-p OvmfPkg/OvmfPkgX64.dsc",
+#         "-t GCC48",
+#         "-b DEBUG",
+#         "-D FD_SIZE_4MB"
+#     ]
+# }
+#
+# {
 #     "description": "UEFI firmware for ARM64 virtual machines",
 #     "interface-types": [
 #         "uefi"
diff --git a/docs/interop/index.rst b/docs/interop/index.rst
index 219a5e5fc50..47b9ed82bbc 100644
--- a/docs/interop/index.rst
+++ b/docs/interop/index.rst
@@ -1,17 +1,14 @@
-.. This is the top level page for the 'interop' manual.
-
-
+------------------------------------------------
 System Emulation Management and Interoperability
-================================================
-
-This manual contains documents and specifications that are useful
-for making QEMU interoperate with other software.
+------------------------------------------------
 
-Contents:
+This section of the manual contains documents and specifications that
+are useful for making QEMU interoperate with other software.
 
 .. toctree::
    :maxdepth: 2
 
+   barrier
    bitmaps
    dbus
    dbus-vmstate
diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst
index 1073b930dce..39e62c99151 100644
--- a/docs/interop/live-block-operations.rst
+++ b/docs/interop/live-block-operations.rst
@@ -116,8 +116,8 @@ QEMU block layer supports.
 (3) ``drive-mirror`` (and ``blockdev-mirror``): Synchronize a running
     disk to another image.
 
-(4) ``drive-backup`` (and ``blockdev-backup``): Point-in-time (live) copy
-    of a block device to a destination.
+(4) ``blockdev-backup`` (and the deprecated ``drive-backup``):
+    Point-in-time (live) copy of a block device to a destination.
 
 
 .. _`Interacting with a QEMU instance`:
@@ -127,13 +127,15 @@ Interacting with a QEMU instance
 
 To show some example invocations of command-line, we will use the
 following invocation of QEMU, with a QMP server running over UNIX
-socket::
+socket:
 
-    $ ./qemu-system-x86_64 -display none -no-user-config \
-        -M q35 -nodefaults -m 512 \
-        -blockdev node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \
-        -device virtio-blk,drive=node-A,id=virtio0 \
-        -monitor stdio -qmp unix:/tmp/qmp-sock,server=on,wait=off
+.. parsed-literal::
+
+  $ |qemu_system| -display none -no-user-config -nodefaults \\
+    -m 512 -blockdev \\
+    node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \\
+    -device virtio-blk,drive=node-A,id=virtio0 \\
+    -monitor stdio -qmp unix:/tmp/qmp-sock,server=on,wait=off
 
 The ``-blockdev`` command-line option, used above, is available from
 QEMU 2.9 onwards.  In the above invocation, notice the ``node-name``
@@ -553,13 +555,14 @@ Currently, there are four different kinds:
 
 (3) ``none`` -- Synchronize only the new writes from this point on.
 
-    .. note:: In the case of ``drive-backup`` (or ``blockdev-backup``),
-              the behavior of ``none`` synchronization mode is different.
-              Normally, a ``backup`` job consists of two parts: Anything
-              that is overwritten by the guest is first copied out to
-              the backup, and in the background the whole image is
-              copied from start to end. With ``sync=none``, it's only
-              the first part.
+    .. note:: In the case of ``blockdev-backup`` (or deprecated
+              ``drive-backup``), the behavior of ``none``
+              synchronization mode is different.  Normally, a
+              ``backup`` job consists of two parts: Anything that is
+              overwritten by the guest is first copied out to the
+              backup, and in the background the whole image is copied
+              from start to end. With ``sync=none``, it's only the
+              first part.
 
 (4) ``incremental`` -- Synchronize content that is described by the
     dirty bitmap
@@ -638,7 +641,7 @@ at this point:
         (QEMU) block-job-complete device=job0
 
 In either of the above cases, if you once again run the
-`query-block-jobs` command, there should not be any active block
+``query-block-jobs`` command, there should not be any active block
 operation.
 
 Comparing 'commit' and 'mirror': In both then cases, the overlay images
@@ -692,14 +695,16 @@ And start the destination QEMU (we already have the source QEMU running
 -- discussed in the section: `Interacting with a QEMU instance`_)
 instance, with the following invocation.  (As noted earlier, for
 simplicity's sake, the destination QEMU is started on the same host, but
-it could be located elsewhere)::
+it could be located elsewhere):
+
+.. parsed-literal::
 
-    $ ./qemu-system-x86_64 -display none -no-user-config \
-        -M q35 -nodefaults -m 512 \
-        -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \
-        -device virtio-blk,drive=node-TargetDisk,id=virtio0 \
-        -S -monitor stdio -qmp unix:./qmp-sock2,server=on,wait=off \
-        -incoming tcp:localhost:6666
+  $ |qemu_system| -display none -no-user-config -nodefaults \\
+    -m 512 -blockdev \\
+    node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \\
+    -device virtio-blk,drive=node-TargetDisk,id=virtio0 \\
+    -S -monitor stdio -qmp unix:./qmp-sock2,server=on,wait=off \\
+    -incoming tcp:localhost:6666
 
 Given the disk image chain on source QEMU::
 
@@ -777,7 +782,7 @@ the content of image [D].
         }
 
 (6) [On *destination* QEMU] Finally, resume the guest vCPUs by issuing the
-    QMP command `cont`::
+    QMP command ``cont``::
 
         (QEMU) cont
         {
@@ -924,19 +929,22 @@ Shutdown the guest, by issuing the ``quit`` QMP command::
     }
 
 
-Live disk backup --- ``drive-backup`` and ``blockdev-backup``
--------------------------------------------------------------
+Live disk backup --- ``blockdev-backup`` and the deprecated``drive-backup``
+---------------------------------------------------------------------------
 
-The ``drive-backup`` (and its newer equivalent ``blockdev-backup``) allows
+The ``blockdev-backup`` (and the deprecated ``drive-backup``) allows
 you to create a point-in-time snapshot.
 
-In this case, the point-in-time is when you *start* the ``drive-backup``
-(or its newer equivalent ``blockdev-backup``) command.
+In this case, the point-in-time is when you *start* the
+``blockdev-backup`` (or deprecated ``drive-backup``) command.
 
 
 QMP invocation for ``drive-backup``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+Note that ``drive-backup`` command is deprecated since QEMU 6.2 and
+will be removed in future.
+
 Yet again, starting afresh with our example disk image chain::
 
     [A] <-- [B] <-- [C] <-- [D]
@@ -961,11 +969,22 @@ will be issued, indicating the live block device job operation has
 completed, and no further action is required.
 
 
+Moving from the deprecated ``drive-backup`` to newer ``blockdev-backup``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``blockdev-backup`` differs from ``drive-backup`` in how you specify
+the backup target. With ``blockdev-backup`` you can't specify filename
+as a target.  Instead you use ``node-name`` of existing block node,
+which you may add by ``blockdev-add`` or ``blockdev-create`` commands.
+Correspondingly, ``blockdev-backup`` doesn't have ``mode`` and
+``format`` arguments which don't apply to an existing block node. See
+following sections for details and examples.
+
+
 Notes on ``blockdev-backup``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``blockdev-backup`` command is equivalent in functionality to
-``drive-backup``, except that it operates at node-level in a Block Driver
+The ``blockdev-backup`` command operates at node-level in a Block Driver
 State (BDS) graph.
 
 E.g. the sequence of actions to create a point-in-time backup
diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index 10ce098a29b..bdb0f2a41ac 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -1,4 +1,4 @@
-Qemu supports the NBD protocol, and has an internal NBD client (see
+QEMU supports the NBD protocol, and has an internal NBD client (see
 block/nbd.c), an internal NBD server (see blockdev-nbd.c), and an
 external NBD server tool (see qemu-nbd.c). The common code is placed
 in nbd/*.
@@ -7,11 +7,11 @@ The NBD protocol is specified here:
 https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md
 
 The following paragraphs describe some specific properties of NBD
-protocol realization in Qemu.
+protocol realization in QEMU.
 
 = Metadata namespaces =
 
-Qemu supports the "base:allocation" metadata context as defined in the
+QEMU supports the "base:allocation" metadata context as defined in the
 NBD protocol specification, and also defines an additional metadata
 namespace "qemu".
 
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 0463f761efb..f7dc304ff69 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -313,7 +313,7 @@ The fields of the bitmaps extension are:
                    The number of bitmaps contained in the image. Must be
                    greater than or equal to 1.
 
-                   Note: Qemu currently only supports up to 65535 bitmaps per
+                   Note: QEMU currently only supports up to 65535 bitmaps per
                    image.
 
           4 -  7:  Reserved, must be zero.
@@ -775,7 +775,7 @@ Structure of a bitmap directory entry:
                       2: extra_data_compatible
                          This flags is meaningful when the extra data is
                          unknown to the software (currently any extra data is
-                         unknown to Qemu).
+                         unknown to QEMU).
                          If it is set, the bitmap may be used as expected, extra
                          data must be left as is.
                          If it is not set, the bitmap must not be used, but
@@ -793,7 +793,7 @@ Structure of a bitmap directory entry:
              17:    granularity_bits
                     Granularity bits. Valid values: 0 - 63.
 
-                    Note: Qemu currently supports only values 9 - 31.
+                    Note: QEMU currently supports only values 9 - 31.
 
                     Granularity is calculated as
                         granularity = 1 << granularity_bits
@@ -804,7 +804,7 @@ Structure of a bitmap directory entry:
         18 - 19:    name_size
                     Size of the bitmap name. Must be non-zero.
 
-                    Note: Qemu currently doesn't support values greater than
+                    Note: QEMU currently doesn't support values greater than
                     1023.
 
         20 - 23:    extra_data_size
diff --git a/docs/interop/qemu-ga-ref.rst b/docs/interop/qemu-ga-ref.rst
index 3f1c4f908fa..032d4924552 100644
--- a/docs/interop/qemu-ga-ref.rst
+++ b/docs/interop/qemu-ga-ref.rst
@@ -1,13 +1,7 @@
 QEMU Guest Agent Protocol Reference
 ===================================
 
-..
-   TODO: the old Texinfo manual used to note that this manual
-   is GPL-v2-or-later. We should make that reader-visible
-   both here and in our Sphinx manuals more generally.
-
-..
-   TODO: display the QEMU version, both here and in our Sphinx manuals
-   more generally.
+.. contents::
+   :depth: 3
 
 .. qapi-doc:: qga/qapi-schema.json
diff --git a/docs/interop/qemu-qmp-ref.rst b/docs/interop/qemu-qmp-ref.rst
index c8abaaf8e3e..357effd64f3 100644
--- a/docs/interop/qemu-qmp-ref.rst
+++ b/docs/interop/qemu-qmp-ref.rst
@@ -1,13 +1,7 @@
 QEMU QMP Reference Manual
 =========================
 
-..
-   TODO: the old Texinfo manual used to note that this manual
-   is GPL-v2-or-later. We should make that reader-visible
-   both here and in our Sphinx manuals more generally.
-
-..
-   TODO: display the QEMU version, both here and in our Sphinx manuals
-   more generally.
+.. contents::
+   :depth: 3
 
 .. qapi-doc:: qapi/qapi-schema.json
diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst
index caf9dad23a7..9fed68152f5 100644
--- a/docs/interop/qemu-storage-daemon-qmp-ref.rst
+++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst
@@ -1,13 +1,7 @@
 QEMU Storage Daemon QMP Reference Manual
 ========================================
 
-..
-   TODO: the old Texinfo manual used to note that this manual
-   is GPL-v2-or-later. We should make that reader-visible
-   both here and in our Sphinx manuals more generally.
-
-..
-   TODO: display the QEMU version, both here and in our Sphinx manuals
-   more generally.
+.. contents::
+   :depth: 3
 
 .. qapi-doc:: storage-daemon/qapi/qapi-schema.json
diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst
index 3268bf405ce..71a2c52b313 100644
--- a/docs/interop/vhost-user-gpu.rst
+++ b/docs/interop/vhost-user-gpu.rst
@@ -2,9 +2,10 @@
 Vhost-user-gpu Protocol
 =======================
 
-:Licence: This work is licensed under the terms of the GNU GPL,
-          version 2 or later. See the COPYING file in the top-level
-          directory.
+..
+  Licence: This work is licensed under the terms of the GNU GPL,
+           version 2 or later. See the COPYING file in the top-level
+           directory.
 
 .. contents:: Table of Contents
 
diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index d6085f70452..edc3ad84a35 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1,11 +1,15 @@
+.. _vhost_user_proto:
+
 ===================
 Vhost-user Protocol
 ===================
-:Copyright: 2014 Virtual Open Systems Sarl.
-:Copyright: 2019 Intel Corporation
-:Licence: This work is licensed under the terms of the GNU GPL,
-          version 2 or later. See the COPYING file in the top-level
-          directory.
+
+..
+  Copyright 2014 Virtual Open Systems Sarl.
+  Copyright 2019 Intel Corporation
+  Licence: This work is licensed under the terms of the GNU GPL,
+           version 2 or later. See the COPYING file in the top-level
+           directory.
 
 .. contents:: Table of Contents
 
diff --git a/docs/meson.build b/docs/meson.build
index f84306ba7e0..27c6e156fff 100644
--- a/docs/meson.build
+++ b/docs/meson.build
@@ -9,7 +9,7 @@ endif
 # Check if tools are available to build documentation.
 build_docs = false
 if sphinx_build.found()
-  SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build]
+  SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build, '-q']
   # If we're making warnings fatal, apply this to Sphinx runs as well
   if get_option('werror')
     SPHINX_ARGS += [ '-W' ]
@@ -27,10 +27,9 @@ if sphinx_build.found()
   build_docs = (sphinx_build_test_out.returncode() == 0)
 
   if not build_docs
-    warning('@0@ is either too old or uses too old a Python version'
-            .format(sphinx_build.full_path()))
+    warning('@0@: @1@'.format(sphinx_build.full_path(), sphinx_build_test_out.stderr()))
     if get_option('docs').enabled()
-      error('Install a Python 3 version of python-sphinx')
+      error('Install a Python 3 version of python-sphinx and the readthedoc theme')
     endif
   endif
 endif
@@ -38,14 +37,6 @@ endif
 if build_docs
   SPHINX_ARGS += ['-Dversion=' + meson.project_version(), '-Drelease=' + config_host['PKGVERSION']]
 
-  sphinx_extn_depends = [ meson.source_root() / 'docs/sphinx/depfile.py',
-                          meson.source_root() / 'docs/sphinx/hxtool.py',
-                          meson.source_root() / 'docs/sphinx/kerneldoc.py',
-                          meson.source_root() / 'docs/sphinx/kernellog.py',
-                          meson.source_root() / 'docs/sphinx/qapidoc.py',
-                          meson.source_root() / 'docs/sphinx/qmp_lexer.py',
-                          qapi_gen_depends ]
-
   have_ga = have_tools and config_host.has_key('CONFIG_GUEST_AGENT')
 
   man_pages = {
@@ -57,7 +48,7 @@ if build_docs
         'qemu-nbd.8': (have_tools ? 'man8' : ''),
         'qemu-pr-helper.8': (have_tools ? 'man8' : ''),
         'qemu-storage-daemon.1': (have_tools ? 'man1' : ''),
-        'qemu-trace-stap.1': (config_host.has_key('CONFIG_TRACE_SYSTEMTAP') ? 'man1' : ''),
+        'qemu-trace-stap.1': (stap.found() ? 'man1' : ''),
         'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''),
         'virtiofsd.1': (have_virtiofsd ? 'man1' : ''),
         'qemu.1': 'man1',
@@ -77,7 +68,6 @@ if build_docs
                 output: 'docs.stamp',
                 input: files('conf.py'),
                 depfile: 'docs.d',
-                depend_files: sphinx_extn_depends,
                 command: [SPHINX_ARGS, '-Ddepfile=@DEPFILE@',
                           '-Ddepfile_stamp=@OUTPUT0@',
                           '-b', 'html', '-d', private_dir,
diff --git a/docs/multiseat.txt b/docs/multiseat.txt
index 11850c96ff8..2b297e979d6 100644
--- a/docs/multiseat.txt
+++ b/docs/multiseat.txt
@@ -123,7 +123,7 @@ Background info is here:
 guest side with pci-bridge-seat
 -------------------------------
 
-Qemu version 2.4 and newer has a new pci-bridge-seat device which
+QEMU version 2.4 and newer has a new pci-bridge-seat device which
 can be used instead of pci-bridge.  Just swap the device name in the
 qemu command line above.  The only difference between the two devices
 is the pci id.  We can match the pci id instead of the device path
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 0aae682be3e..fd7773dc5ab 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -15,7 +15,7 @@ backend (i.e. memory-backend-file and memory-backend-ram). A simple
 way to create a vNVDIMM device at startup time is done via the
 following command line options:
 
- -machine pc,nvdimm
+ -machine pc,nvdimm=on
  -m $RAM_SIZE,slots=$N,maxmem=$MAX_SIZE
  -object memory-backend-file,id=mem1,share=on,mem-path=$PATH,size=$NVDIMM_SIZE,readonly=off
  -device nvdimm,id=nvdimm1,memdev=mem1,unarmed=off
diff --git a/docs/pcie_pci_bridge.txt b/docs/pcie_pci_bridge.txt
index ab35ebf3cae..1aa08fc5f0c 100644
--- a/docs/pcie_pci_bridge.txt
+++ b/docs/pcie_pci_bridge.txt
@@ -70,9 +70,9 @@ A detailed command line would be:
 
 [qemu-bin + storage options] \
 -m 2G \
--device pcie-root-port,bus=pcie.0,id=rp1 \
--device pcie-root-port,bus=pcie.0,id=rp2 \
--device pcie-root-port,bus=pcie.0,id=rp3,bus-reserve=1 \
+-device pcie-root-port,bus=pcie.0,id=rp1,slot=1 \
+-device pcie-root-port,bus=pcie.0,id=rp2,slot=2 \
+-device pcie-root-port,bus=pcie.0,id=rp3,slot=3,bus-reserve=1 \
 -device pcie-pci-bridge,id=br1,bus=rp1 \
 -device pcie-pci-bridge,id=br2,bus=rp2 \
 -device e1000,bus=br1,addr=8
diff --git a/docs/specs/_templates/editpage.html b/docs/specs/_templates/editpage.html
deleted file mode 100644
index aaa468aa98d..00000000000
--- a/docs/specs/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/specs/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/specs/acpi_cpu_hotplug.rst b/docs/specs/acpi_cpu_hotplug.rst
new file mode 100644
index 00000000000..351057c9676
--- /dev/null
+++ b/docs/specs/acpi_cpu_hotplug.rst
@@ -0,0 +1,235 @@
+QEMU<->ACPI BIOS CPU hotplug interface
+======================================
+
+QEMU supports CPU hotplug via ACPI. This document
+describes the interface between QEMU and the ACPI BIOS.
+
+ACPI BIOS GPE.2 handler is dedicated for notifying OS about CPU hot-add
+and hot-remove events.
+
+
+Legacy ACPI CPU hotplug interface registers
+-------------------------------------------
+
+CPU present bitmap for:
+
+- ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access)
+- PIIX-PM  (IO port 0xaf00-0xaf1f, 1-byte access)
+- One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only.
+- The first DWORD in bitmap is used in write mode to switch from legacy
+  to modern CPU hotplug interface, write 0 into it to do switch.
+
+QEMU sets corresponding CPU bit on hot-add event and issues SCI
+with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler
+to notify OS about CPU hot-add events. CPU hot-remove isn't supported.
+
+
+Modern ACPI CPU hotplug interface registers
+-------------------------------------------
+
+Register block base address:
+
+- ICH9-LPC IO port 0x0cd8
+- PIIX-PM  IO port 0xaf00
+
+Register block size:
+
+- ACPI_CPU_HOTPLUG_REG_LEN = 12
+
+All accesses to registers described below, imply little-endian byte order.
+
+Reserved registers behavior:
+
+- write accesses are ignored
+- read accesses return all bits set to 0.
+
+The last stored value in 'CPU selector' must refer to a possible CPU, otherwise
+
+- reads from any register return 0
+- writes to any other register are ignored until valid value is stored into it
+
+On QEMU start, 'CPU selector' is initialized to a valid value, on reset it
+keeps the current value.
+
+Read access behavior
+^^^^^^^^^^^^^^^^^^^^
+
+offset [0x0-0x3]
+  Command data 2: (DWORD access)
+
+  If value last stored in 'Command field' is:
+
+  0:
+    reads as 0x0
+  3:
+    upper 32 bits of architecture specific CPU ID value
+  other values:
+    reserved
+
+offset [0x4]
+  CPU device status fields: (1 byte access)
+
+  bits:
+
+  0:
+    Device is enabled and may be used by guest
+  1:
+    Device insert event, used to distinguish device for which
+    no device check event to OSPM was issued.
+    It's valid only when bit 0 is set.
+  2:
+    Device remove event, used to distinguish device for which
+    no device eject request to OSPM was issued. Firmware must
+    ignore this bit.
+  3:
+    reserved and should be ignored by OSPM
+  4:
+    if set to 1, OSPM requests firmware to perform device eject.
+  5-7:
+    reserved and should be ignored by OSPM
+
+offset [0x5-0x7]
+  reserved
+
+offset [0x8]
+  Command data: (DWORD access)
+
+  If value last stored in 'Command field' is one of:
+
+  0:
+    contains 'CPU selector' value of a CPU with pending event[s]
+  3:
+    lower 32 bits of architecture specific CPU ID value
+    (in x86 case: APIC ID)
+  otherwise:
+    contains 0
+
+Write access behavior
+^^^^^^^^^^^^^^^^^^^^^
+
+offset [0x0-0x3]
+  CPU selector: (DWORD access)
+
+  Selects active CPU device. All following accesses to other
+  registers will read/store data from/to selected CPU.
+  Valid values: [0 .. max_cpus)
+
+offset [0x4]
+  CPU device control fields: (1 byte access)
+
+  bits:
+
+  0:
+    reserved, OSPM must clear it before writing to register.
+  1:
+    if set to 1 clears device insert event, set by OSPM
+    after it has emitted device check event for the
+    selected CPU device
+  2:
+    if set to 1 clears device remove event, set by OSPM
+    after it has emitted device eject request for the
+    selected CPU device.
+  3:
+    if set to 1 initiates device eject, set by OSPM when it
+    triggers CPU device removal and calls _EJ0 method or by firmware
+    when bit #4 is set. In case bit #4 were set, it's cleared as
+    part of device eject.
+  4:
+    if set to 1, OSPM hands over device eject to firmware.
+    Firmware shall issue device eject request as described above
+    (bit #3) and OSPM should not touch device eject bit (#3) in case
+    it's asked firmware to perform CPU device eject.
+  5-7:
+    reserved, OSPM must clear them before writing to register
+
+offset[0x5]
+  Command field: (1 byte access)
+
+  value:
+
+  0:
+    selects a CPU device with inserting/removing events and
+    following reads from 'Command data' register return
+    selected CPU ('CPU selector' value).
+    If no CPU with events found, the current 'CPU selector' doesn't
+    change and corresponding insert/remove event flags are not modified.
+
+  1:
+    following writes to 'Command data' register set OST event
+    register in QEMU
+  2:
+    following writes to 'Command data' register set OST status
+    register in QEMU
+  3:
+    following reads from 'Command data' and 'Command data 2' return
+    architecture specific CPU ID value for currently selected CPU.
+  other values:
+    reserved
+
+offset [0x6-0x7]
+  reserved
+
+offset [0x8]
+  Command data: (DWORD access)
+
+  If last stored 'Command field' value is:
+
+  1:
+    stores value into OST event register
+  2:
+    stores value into OST status register, triggers
+    ACPI_DEVICE_OST QMP event from QEMU to external applications
+    with current values of OST event and status registers.
+  other values:
+    reserved
+
+Typical usecases
+----------------
+
+(x86) Detecting and enabling modern CPU hotplug interface
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+QEMU starts with legacy CPU hotplug interface enabled. Detecting and
+switching to modern interface is based on the 2 legacy CPU hotplug features:
+
+#. Writes into CPU bitmap are ignored.
+#. CPU bitmap always has bit #0 set, corresponding to boot CPU.
+
+Use following steps to detect and enable modern CPU hotplug interface:
+
+#. Store 0x0 to the 'CPU selector' register, attempting to switch to modern mode
+#. Store 0x0 to the 'CPU selector' register, to ensure valid selector value
+#. Store 0x0 to the 'Command field' register
+#. Read the 'Command data 2' register.
+   If read value is 0x0, the modern interface is enabled.
+   Otherwise legacy or no CPU hotplug interface available
+
+Get a cpu with pending event
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. Store 0x0 to the 'CPU selector' register.
+#. Store 0x0 to the 'Command field' register.
+#. Read the 'CPU device status fields' register.
+#. If both bit #1 and bit #2 are clear in the value read, there is no CPU
+   with a pending event and selected CPU remains unchanged.
+#. Otherwise, read the 'Command data' register. The value read is the
+   selector of the CPU with the pending event (which is already selected).
+
+Enumerate CPUs present/non present CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. Set the present CPU count to 0.
+#. Set the iterator to 0.
+#. Store 0x0 to the 'CPU selector' register, to ensure that it's in
+   a valid state and that access to other registers won't be ignored.
+#. Store 0x0 to the 'Command field' register to make 'Command data'
+   register return 'CPU selector' value of selected CPU
+#. Read the 'CPU device status fields' register.
+#. If bit #0 is set, increment the present CPU count.
+#. Increment the iterator.
+#. Store the iterator to the 'CPU selector' register.
+#. Read the 'Command data' register.
+#. If the value read is not zero, goto 05.
+#. Otherwise store 0x0 to the 'CPU selector' register, to put it
+   into a valid state and exit.
+   The iterator at this point equals "max_cpus".
diff --git a/docs/specs/acpi_cpu_hotplug.txt b/docs/specs/acpi_cpu_hotplug.txt
deleted file mode 100644
index 9bd59ae0dae..00000000000
--- a/docs/specs/acpi_cpu_hotplug.txt
+++ /dev/null
@@ -1,160 +0,0 @@
-QEMU<->ACPI BIOS CPU hotplug interface
---------------------------------------
-
-QEMU supports CPU hotplug via ACPI. This document
-describes the interface between QEMU and the ACPI BIOS.
-
-ACPI BIOS GPE.2 handler is dedicated for notifying OS about CPU hot-add
-and hot-remove events.
-
-============================================
-Legacy ACPI CPU hotplug interface registers:
---------------------------------------------
-CPU present bitmap for:
-  ICH9-LPC (IO port 0x0cd8-0xcf7, 1-byte access)
-  PIIX-PM  (IO port 0xaf00-0xaf1f, 1-byte access)
-  One bit per CPU. Bit position reflects corresponding CPU APIC ID. Read-only.
-  The first DWORD in bitmap is used in write mode to switch from legacy
-  to modern CPU hotplug interface, write 0 into it to do switch.
----------------------------------------------------------------
-QEMU sets corresponding CPU bit on hot-add event and issues SCI
-with GPE.2 event set. CPU present map is read by ACPI BIOS GPE.2 handler
-to notify OS about CPU hot-add events. CPU hot-remove isn't supported.
-
-=====================================
-Modern ACPI CPU hotplug interface registers:
--------------------------------------
-Register block base address:
-    ICH9-LPC IO port 0x0cd8
-    PIIX-PM  IO port 0xaf00
-Register block size:
-    ACPI_CPU_HOTPLUG_REG_LEN = 12
-
-All accesses to registers described below, imply little-endian byte order.
-
-Reserved resisters behavior:
-   - write accesses are ignored
-   - read accesses return all bits set to 0.
-
-The last stored value in 'CPU selector' must refer to a possible CPU, otherwise
-  - reads from any register return 0
-  - writes to any other register are ignored until valid value is stored into it
-On QEMU start, 'CPU selector' is initialized to a valid value, on reset it
-keeps the current value.
-
-read access:
-    offset:
-    [0x0-0x3] Command data 2: (DWORD access)
-              if value last stored in 'Command field':
-                0: reads as 0x0
-                3: upper 32 bits of architecture specific CPU ID value
-                other values: reserved
-    [0x4] CPU device status fields: (1 byte access)
-        bits:
-           0: Device is enabled and may be used by guest
-           1: Device insert event, used to distinguish device for which
-              no device check event to OSPM was issued.
-              It's valid only when bit 0 is set.
-           2: Device remove event, used to distinguish device for which
-              no device eject request to OSPM was issued. Firmware must
-              ignore this bit.
-           3: reserved and should be ignored by OSPM
-           4: if set to 1, OSPM requests firmware to perform device eject.
-           5-7: reserved and should be ignored by OSPM
-    [0x5-0x7] reserved
-    [0x8] Command data: (DWORD access)
-          contains 0 unless value last stored in 'Command field' is one of:
-              0: contains 'CPU selector' value of a CPU with pending event[s]
-              3: lower 32 bits of architecture specific CPU ID value
-                 (in x86 case: APIC ID)
-
-write access:
-    offset:
-    [0x0-0x3] CPU selector: (DWORD access)
-              selects active CPU device. All following accesses to other
-              registers will read/store data from/to selected CPU.
-              Valid values: [0 .. max_cpus)
-    [0x4] CPU device control fields: (1 byte access)
-        bits:
-            0: reserved, OSPM must clear it before writing to register.
-            1: if set to 1 clears device insert event, set by OSPM
-               after it has emitted device check event for the
-               selected CPU device
-            2: if set to 1 clears device remove event, set by OSPM
-               after it has emitted device eject request for the
-               selected CPU device.
-            3: if set to 1 initiates device eject, set by OSPM when it
-               triggers CPU device removal and calls _EJ0 method or by firmware
-               when bit #4 is set. In case bit #4 were set, it's cleared as
-               part of device eject.
-            4: if set to 1, OSPM hands over device eject to firmware.
-               Firmware shall issue device eject request as described above
-               (bit #3) and OSPM should not touch device eject bit (#3) in case
-               it's asked firmware to perform CPU device eject.
-            5-7: reserved, OSPM must clear them before writing to register
-    [0x5] Command field: (1 byte access)
-          value:
-            0: selects a CPU device with inserting/removing events and
-               following reads from 'Command data' register return
-               selected CPU ('CPU selector' value).
-               If no CPU with events found, the current 'CPU selector' doesn't
-               change and corresponding insert/remove event flags are not modified.
-            1: following writes to 'Command data' register set OST event
-               register in QEMU
-            2: following writes to 'Command data' register set OST status
-               register in QEMU
-            3: following reads from 'Command data' and 'Command data 2' return
-               architecture specific CPU ID value for currently selected CPU.
-            other values: reserved
-    [0x6-0x7] reserved
-    [0x8] Command data: (DWORD access)
-          if last stored 'Command field' value:
-              1: stores value into OST event register
-              2: stores value into OST status register, triggers
-                 ACPI_DEVICE_OST QMP event from QEMU to external applications
-                 with current values of OST event and status registers.
-              other values: reserved
-
-Typical usecases:
-    - (x86) Detecting and enabling modern CPU hotplug interface.
-      QEMU starts with legacy CPU hotplug interface enabled. Detecting and
-      switching to modern interface is based on the 2 legacy CPU hotplug features:
-        1. Writes into CPU bitmap are ignored.
-        2. CPU bitmap always has bit#0 set, corresponding to boot CPU.
-
-      Use following steps to detect and enable modern CPU hotplug interface:
-        1. Store 0x0 to the 'CPU selector' register,
-           attempting to switch to modern mode
-        2. Store 0x0 to the 'CPU selector' register,
-           to ensure valid selector value
-        3. Store 0x0 to the 'Command field' register,
-        4. Read the 'Command data 2' register.
-           If read value is 0x0, the modern interface is enabled.
-           Otherwise legacy or no CPU hotplug interface available
-
-    - Get a cpu with pending event
-      1. Store 0x0 to the 'CPU selector' register.
-      2. Store 0x0 to the 'Command field' register.
-      3. Read the 'CPU device status fields' register.
-      4. If both bit#1 and bit#2 are clear in the value read, there is no CPU
-         with a pending event and selected CPU remains unchanged.
-      5. Otherwise, read the 'Command data' register. The value read is the
-         selector of the CPU with the pending event (which is already
-         selected).
-
-    - Enumerate CPUs present/non present CPUs
-      01. Set the present CPU count to 0.
-      02. Set the iterator to 0.
-      03. Store 0x0 to the 'CPU selector' register, to ensure that it's in
-          a valid state and that access to other registers won't be ignored.
-      04. Store 0x0 to the 'Command field' register to make 'Command data'
-          register return 'CPU selector' value of selected CPU
-      05. Read the 'CPU device status fields' register.
-      06. If bit#0 is set, increment the present CPU count.
-      07. Increment the iterator.
-      08. Store the iterator to the 'CPU selector' register.
-      09. Read the 'Command data' register.
-      10. If the value read is not zero, goto 05.
-      11. Otherwise store 0x0 to the 'CPU selector' register, to put it
-          into a valid state and exit.
-          The iterator at this point equals "max_cpus".
diff --git a/docs/specs/acpi_mem_hotplug.rst b/docs/specs/acpi_mem_hotplug.rst
new file mode 100644
index 00000000000..069819bc3e0
--- /dev/null
+++ b/docs/specs/acpi_mem_hotplug.rst
@@ -0,0 +1,128 @@
+QEMU<->ACPI BIOS memory hotplug interface
+=========================================
+
+ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add
+and hot-remove events.
+
+Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access)
+----------------------------------------------------------------
+
+Read access behavior
+^^^^^^^^^^^^^^^^^^^^
+
+[0x0-0x3]
+  Lo part of memory device phys address
+[0x4-0x7]
+  Hi part of memory device phys address
+[0x8-0xb]
+  Lo part of memory device size in bytes
+[0xc-0xf]
+  Hi part of memory device size in bytes
+[0x10-0x13]
+  Memory device proximity domain
+[0x14]
+  Memory device status fields
+
+  bits:
+
+  0:
+    Device is enabled and may be used by guest
+  1:
+    Device insert event, used to distinguish device for which
+    no device check event to OSPM was issued.
+    It's valid only when bit 1 is set.
+  2:
+    Device remove event, used to distinguish device for which
+    no device eject request to OSPM was issued.
+  3-7:
+    reserved and should be ignored by OSPM
+
+[0x15-0x17]
+  reserved
+
+Write access behavior
+^^^^^^^^^^^^^^^^^^^^^
+
+
+[0x0-0x3]
+  Memory device slot selector, selects active memory device.
+  All following accesses to other registers in 0xa00-0xa17
+  region will read/store data from/to selected memory device.
+[0x4-0x7]
+  OST event code reported by OSPM
+[0x8-0xb]
+  OST status code reported by OSPM
+[0xc-0x13]
+  reserved, writes into it are ignored
+[0x14]
+  Memory device control fields
+
+  bits:
+
+  0:
+    reserved, OSPM must clear it before writing to register.
+    Due to BUG in versions prior 2.4 that field isn't cleared
+    when other fields are written. Keep it reserved and don't
+    try to reuse it.
+  1:
+    if set to 1 clears device insert event, set by OSPM
+    after it has emitted device check event for the
+    selected memory device
+  2:
+    if set to 1 clears device remove event, set by OSPM
+    after it has emitted device eject request for the
+    selected memory device
+  3:
+    if set to 1 initiates device eject, set by OSPM when it
+    triggers memory device removal and calls _EJ0 method
+  4-7:
+    reserved, OSPM must clear them before writing to register
+
+Selecting memory device slot beyond present range has no effect on platform:
+
+- write accesses to memory hot-plug registers not documented above are ignored
+- read accesses to memory hot-plug registers not documented above return
+  all bits set to 1.
+
+Memory hot remove process diagram
+---------------------------------
+
+::
+
+   +-------------+     +-----------------------+      +------------------+
+   |  1. QEMU    |     | 2. QEMU               |      |3. QEMU           |
+   |  device_del +---->+ device unplug request +----->+Send SCI to guest,|
+   |             |     |         cb            |      |return control to |
+   |             |     |                       |      |management        |
+   +-------------+     +-----------------------+      +------------------+
+
+   +---------------------------------------------------------------------+
+
+   +---------------------+              +-------------------------+
+   | OSPM:               | remove event | OSPM:                   |
+   | send Eject Request, |              | Scan memory devices     |
+   | clear remove event  +<-------------+ for event flags         |
+   |                     |              |                         |
+   +---------------------+              +-------------------------+
+             |
+             |
+   +---------v--------+            +-----------------------+
+   | Guest OS:        |  success   | OSPM:                 |
+   | process Ejection +----------->+ Execute _EJ0 method,  |
+   | request          |            | set eject bit in flags|
+   +------------------+            +-----------------------+
+             |failure                         |
+             v                                v
+   +------------------------+      +-----------------------+
+   | OSPM:                  |      | QEMU:                 |
+   | set OST event & status |      | call device unplug cb |
+   | fields                 |      |                       |
+   +------------------------+      +-----------------------+
+            |                                  |
+            v                                  v
+   +------------------+              +-------------------+
+   |QEMU:             |              |QEMU:              |
+   |Send OST QMP event|              |Send device deleted|
+   |                  |              |QMP event          |
+   +------------------+              |                   |
+                                     +-------------------+
diff --git a/docs/specs/acpi_mem_hotplug.txt b/docs/specs/acpi_mem_hotplug.txt
deleted file mode 100644
index 3df3620ce42..00000000000
--- a/docs/specs/acpi_mem_hotplug.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-QEMU<->ACPI BIOS memory hotplug interface
---------------------------------------
-
-ACPI BIOS GPE.3 handler is dedicated for notifying OS about memory hot-add
-and hot-remove events.
-
-Memory hot-plug interface (IO port 0xa00-0xa17, 1-4 byte access):
----------------------------------------------------------------
-0xa00:
-  read access:
-      [0x0-0x3] Lo part of memory device phys address
-      [0x4-0x7] Hi part of memory device phys address
-      [0x8-0xb] Lo part of memory device size in bytes
-      [0xc-0xf] Hi part of memory device size in bytes
-      [0x10-0x13] Memory device proximity domain
-      [0x14] Memory device status fields
-          bits:
-              0: Device is enabled and may be used by guest
-              1: Device insert event, used to distinguish device for which
-                 no device check event to OSPM was issued.
-                 It's valid only when bit 1 is set.
-              2: Device remove event, used to distinguish device for which
-                 no device eject request to OSPM was issued.
-              3-7: reserved and should be ignored by OSPM
-      [0x15-0x17] reserved
-
-  write access:
-      [0x0-0x3] Memory device slot selector, selects active memory device.
-                All following accesses to other registers in 0xa00-0xa17
-                region will read/store data from/to selected memory device.
-      [0x4-0x7] OST event code reported by OSPM
-      [0x8-0xb] OST status code reported by OSPM
-      [0xc-0x13] reserved, writes into it are ignored
-      [0x14] Memory device control fields
-          bits:
-              0: reserved, OSPM must clear it before writing to register.
-                 Due to BUG in versions prior 2.4 that field isn't cleared
-                 when other fields are written. Keep it reserved and don't
-                 try to reuse it.
-              1: if set to 1 clears device insert event, set by OSPM
-                 after it has emitted device check event for the
-                 selected memory device
-              2: if set to 1 clears device remove event, set by OSPM
-                 after it has emitted device eject request for the
-                 selected memory device
-              3: if set to 1 initiates device eject, set by OSPM when it
-                 triggers memory device removal and calls _EJ0 method
-              4-7: reserved, OSPM must clear them before writing to register
-
-Selecting memory device slot beyond present range has no effect on platform:
-   - write accesses to memory hot-plug registers not documented above are
-     ignored
-   - read accesses to memory hot-plug registers not documented above return
-     all bits set to 1.
-
-Memory hot remove process diagram:
-----------------------------------
- +-------------+     +-----------------------+      +------------------+     
- |  1. QEMU    |     | 2. QEMU               |      |3. QEMU           |     
- |  device_del +---->+ device unplug request +----->+Send SCI to guest,|     
- |             |     |         cb            |      |return control to |     
- +-------------+     +-----------------------+      |management        |     
-                                                    +------------------+     
-                                                                             
- +---------------------------------------------------------------------+     
-                                                                             
- +---------------------+              +-------------------------+            
- | OSPM:               | remove event | OSPM:                   |            
- | send Eject Request, |              | Scan memory devices     |            
- | clear remove event  +<-------------+ for event flags         |            
- |                     |              |                         |            
- +---------------------+              +-------------------------+            
-           |                                                                 
-           |                                                                 
- +---------v--------+            +-----------------------+                   
- | Guest OS:        |  success   | OSPM:                 |                   
- | process Ejection +----------->+ Execute _EJ0 method,  |                   
- | request          |            | set eject bit in flags|                   
- +------------------+            +-----------------------+                   
-           |failure                         |                                
-           v                                v                                
- +------------------------+      +-----------------------+                   
- | OSPM:                  |      | QEMU:                 |                   
- | set OST event & status |      | call device unplug cb |                   
- | fields                 |      |                       |                   
- +------------------------+      +-----------------------+                   
-          |                                  |                               
-          v                                  v                               
- +------------------+              +-------------------+                     
- |QEMU:             |              |QEMU:              |                     
- |Send OST QMP event|              |Send device deleted|                     
- |                  |              |QMP event          |                     
- +------------------+              |                   |                     
-                                   +-------------------+
diff --git a/docs/specs/acpi_nvdimm.rst b/docs/specs/acpi_nvdimm.rst
new file mode 100644
index 00000000000..ab0335253d7
--- /dev/null
+++ b/docs/specs/acpi_nvdimm.rst
@@ -0,0 +1,228 @@
+QEMU<->ACPI BIOS NVDIMM interface
+=================================
+
+QEMU supports NVDIMM via ACPI. This document describes the basic concepts of
+NVDIMM ACPI and the interface between QEMU and the ACPI BIOS.
+
+NVDIMM ACPI Background
+----------------------
+
+NVDIMM is introduced in ACPI 6.0 which defines an NVDIMM root device under
+_SB scope with a _HID of "ACPI0012". For each NVDIMM present or intended
+to be supported by platform, platform firmware also exposes an ACPI
+Namespace Device under the root device.
+
+The NVDIMM child devices under the NVDIMM root device are defined with _ADR
+corresponding to the NFIT device handle. The NVDIMM root device and the
+NVDIMM devices can have device specific methods (_DSM) to provide additional
+functions specific to a particular NVDIMM implementation.
+
+This is an example from ACPI 6.0, a platform contains one NVDIMM::
+
+  Scope (\_SB){
+     Device (NVDR) // Root device
+     {
+        Name (_HID, "ACPI0012")
+        Method (_STA) {...}
+        Method (_FIT) {...}
+        Method (_DSM, ...) {...}
+        Device (NVD)
+        {
+           Name(_ADR, h) //where h is NFIT Device Handle for this NVDIMM
+           Method (_DSM, ...) {...}
+        }
+     }
+  }
+
+Methods supported on both NVDIMM root device and NVDIMM device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+_DSM (Device Specific Method)
+   It is a control method that enables devices to provide device specific
+   control functions that are consumed by the device driver.
+   The NVDIMM DSM specification can be found at
+   http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
+
+   Arguments:
+
+   Arg0
+     A Buffer containing a UUID (16 Bytes)
+   Arg1
+     An Integer containing the Revision ID (4 Bytes)
+   Arg2
+     An Integer containing the Function Index (4 Bytes)
+   Arg3
+     A package containing parameters for the function specified by the
+     UUID, Revision ID, and Function Index
+
+   Return Value:
+
+   If Function Index = 0, a Buffer containing a function index bitfield.
+   Otherwise, the return value and type depends on the UUID, revision ID
+   and function index which are described in the DSM specification.
+
+Methods on NVDIMM ROOT Device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+_FIT(Firmware Interface Table)
+   It evaluates to a buffer returning data in the format of a series of NFIT
+   Type Structure.
+
+   Arguments: None
+
+   Return Value:
+   A Buffer containing a list of NFIT Type structure entries.
+
+   The detailed definition of the structure can be found at ACPI 6.0: 5.2.25
+   NVDIMM Firmware Interface Table (NFIT).
+
+QEMU NVDIMM Implementation
+--------------------------
+
+QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page
+for NVDIMM ACPI.
+
+Memory:
+   QEMU uses BIOS Linker/loader feature to ask BIOS to allocate a memory
+   page and dynamically patch its address into an int32 object named "MEMA"
+   in ACPI.
+
+   This page is RAM-based and it is used to transfer data between _DSM
+   method and QEMU. If ACPI has control, this pages is owned by ACPI which
+   writes _DSM input data to it, otherwise, it is owned by QEMU which
+   emulates _DSM access and writes the output data to it.
+
+   ACPI writes _DSM Input Data (based on the offset in the page):
+
+   [0x0 - 0x3]
+      4 bytes, NVDIMM Device Handle.
+
+      The handle is completely QEMU internal thing, the values in
+      range [1, 0xFFFF] indicate nvdimm device. Other values are
+      reserved for other purposes.
+
+      Reserved handles:
+
+      - 0 is reserved for nvdimm root device named NVDR.
+      - 0x10000 is reserved for QEMU internal DSM function called on
+        the root device.
+
+   [0x4 - 0x7]
+      4 bytes, Revision ID, that is the Arg1 of _DSM method.
+
+   [0x8 - 0xB]
+      4 bytes. Function Index, that is the Arg2 of _DSM method.
+
+   [0xC - 0xFFF]
+      4084 bytes, the Arg3 of _DSM method.
+
+   QEMU writes Output Data (based on the offset in the page):
+
+   [0x0 - 0x3]
+      4 bytes, the length of result
+
+   [0x4 - 0xFFF]
+      4092 bytes, the DSM result filled by QEMU
+
+IO Port 0x0a18 - 0xa1b:
+   ACPI writes the address of the memory page allocated by BIOS to this
+   port then QEMU gets the control and fills the result in the memory page.
+
+   Write Access:
+
+   [0x0a18 - 0xa1b]
+      4 bytes, the address of the memory page allocated by BIOS.
+
+_DSM process diagram
+--------------------
+
+"MEMA" indicates the address of memory page allocated by BIOS.
+
+::
+
+ +----------------------+      +-----------------------+
+ |    1. OSPM           |      |    2. OSPM            |
+ | save _DSM input data |      |  write "MEMA" to      | Exit to QEMU
+ | to the page          +----->|  IO port 0x0a18       +------------+
+ | indicated by "MEMA"  |      |                       |            |
+ +----------------------+      +-----------------------+            |
+                                                                    |
+                                                                    v
+ +--------------------+       +-----------+      +------------------+--------+
+ |      5 QEMU        |       | 4 QEMU    |      |        3. QEMU            |
+ | write _DSM result  |       |  emulate  |      | get _DSM input data from  |
+ | to the page        +<------+ _DSM      +<-----+ the page indicated by the |
+ |                    |       |           |      | value from the IO port    |
+ +--------+-----------+       +-----------+      +---------------------------+
+          |
+          | Enter Guest
+          |
+          v
+ +--------------------------+      +--------------+
+ |     6 OSPM               |      |   7 OSPM     |
+ | result size is returned  |      |  _DSM return |
+ | by reading  DSM          +----->+              |
+ | result from the page     |      |              |
+ +--------------------------+      +--------------+
+
+NVDIMM hotplug
+--------------
+
+ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
+hot-add event.
+
+QEMU internal use only _DSM functions
+-------------------------------------
+
+Read FIT
+^^^^^^^^
+
+_FIT method uses _DSM method to fetch NFIT structures blob from QEMU
+in 1 page sized increments which are then concatenated and returned
+as _FIT method result.
+
+Input parameters:
+
+Arg0
+  UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62}
+Arg1
+  Revision ID (set to 1)
+Arg2
+  Function Index, 0x1
+Arg3
+  A package containing a buffer whose layout is as follows:
+
+   +----------+--------+--------+-------------------------------------------+
+   |  Field   | Length | Offset |                 Description               |
+   +----------+--------+--------+-------------------------------------------+
+   | offset   |   4    |   0    | offset in QEMU's NFIT structures blob to  |
+   |          |        |        | read from                                 |
+   +----------+--------+--------+-------------------------------------------+
+
+Output layout in the dsm memory page:
+
+   +----------+--------+--------+-------------------------------------------+
+   | Field    | Length | Offset | Description                               |
+   +----------+--------+--------+-------------------------------------------+
+   | length   | 4      | 0      | length of entire returned data            |
+   |          |        |        | (including this header)                   |
+   +----------+--------+--------+-------------------------------------------+
+   |          |        |        | return status codes                       |
+   |          |        |        |                                           |
+   |          |        |        | - 0x0 - success                           |
+   |          |        |        | - 0x100 - error caused by NFIT update     |
+   | status   | 4      | 4      |   while read by _FIT wasn't completed     |
+   |          |        |        | - other codes follow Chapter 3 in         |
+   |          |        |        |   DSM Spec Rev1                           |
+   +----------+--------+--------+-------------------------------------------+
+   | fit data | Varies | 8      | contains FIT data. This field is present  |
+   |          |        |        | if status field is 0.                     |
+   +----------+--------+--------+-------------------------------------------+
+
+The FIT offset is maintained by the OSPM itself, current offset plus
+the size of the fit data returned by the function is the next offset
+OSPM should read. When all FIT data has been read out, zero fit data
+size is returned.
+
+If it returns status code 0x100, OSPM should restart to read FIT (read
+from offset 0 again).
diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt
deleted file mode 100644
index 3ec42ecbce4..00000000000
--- a/docs/specs/acpi_nvdimm.txt
+++ /dev/null
@@ -1,188 +0,0 @@
-QEMU<->ACPI BIOS NVDIMM interface
----------------------------------
-
-QEMU supports NVDIMM via ACPI. This document describes the basic concepts of
-NVDIMM ACPI and the interface between QEMU and the ACPI BIOS.
-
-NVDIMM ACPI Background
-----------------------
-NVDIMM is introduced in ACPI 6.0 which defines an NVDIMM root device under
-_SB scope with a _HID of “ACPI0012”. For each NVDIMM present or intended
-to be supported by platform, platform firmware also exposes an ACPI
-Namespace Device under the root device.
-
-The NVDIMM child devices under the NVDIMM root device are defined with _ADR
-corresponding to the NFIT device handle. The NVDIMM root device and the
-NVDIMM devices can have device specific methods (_DSM) to provide additional
-functions specific to a particular NVDIMM implementation.
-
-This is an example from ACPI 6.0, a platform contains one NVDIMM:
-
-Scope (\_SB){
-   Device (NVDR) // Root device
-   {
-      Name (_HID, “ACPI0012”)
-      Method (_STA) {...}
-      Method (_FIT) {...}
-      Method (_DSM, ...) {...}
-      Device (NVD)
-      {
-         Name(_ADR, h) //where h is NFIT Device Handle for this NVDIMM
-         Method (_DSM, ...) {...}
-      }
-   }
-}
-
-Method supported on both NVDIMM root device and NVDIMM device
-_DSM (Device Specific Method)
-   It is a control method that enables devices to provide device specific
-   control functions that are consumed by the device driver.
-   The NVDIMM DSM specification can be found at:
-        http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
-
-   Arguments:
-   Arg0 – A Buffer containing a UUID (16 Bytes)
-   Arg1 – An Integer containing the Revision ID (4 Bytes)
-   Arg2 – An Integer containing the Function Index (4 Bytes)
-   Arg3 – A package containing parameters for the function specified by the
-          UUID, Revision ID, and Function Index
-
-   Return Value:
-   If Function Index = 0, a Buffer containing a function index bitfield.
-   Otherwise, the return value and type depends on the UUID, revision ID
-   and function index which are described in the DSM specification.
-
-Methods on NVDIMM ROOT Device
-_FIT(Firmware Interface Table)
-   It evaluates to a buffer returning data in the format of a series of NFIT
-   Type Structure.
-
-   Arguments: None
-
-   Return Value:
-   A Buffer containing a list of NFIT Type structure entries.
-
-   The detailed definition of the structure can be found at ACPI 6.0: 5.2.25
-   NVDIMM Firmware Interface Table (NFIT).
-
-QEMU NVDIMM Implementation
-==========================
-QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page
-for NVDIMM ACPI.
-
-Memory:
-   QEMU uses BIOS Linker/loader feature to ask BIOS to allocate a memory
-   page and dynamically patch its address into an int32 object named "MEMA"
-   in ACPI.
-
-   This page is RAM-based and it is used to transfer data between _DSM
-   method and QEMU. If ACPI has control, this pages is owned by ACPI which
-   writes _DSM input data to it, otherwise, it is owned by QEMU which
-   emulates _DSM access and writes the output data to it.
-
-   ACPI writes _DSM Input Data (based on the offset in the page):
-   [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle.
-
-                The handle is completely QEMU internal thing, the values in
-                range [1, 0xFFFF] indicate nvdimm device. Other values are
-                reserved for other purposes.
-
-                Reserved handles:
-                0 is reserved for nvdimm root device named NVDR.
-                0x10000 is reserved for QEMU internal DSM function called on
-                the root device.
-
-   [0x4 - 0x7]: 4 bytes, Revision ID, that is the Arg1 of _DSM method.
-   [0x8 - 0xB]: 4 bytes. Function Index, that is the Arg2 of _DSM method.
-   [0xC - 0xFFF]: 4084 bytes, the Arg3 of _DSM method.
-
-   QEMU Writes Output Data (based on the offset in the page):
-   [0x0 - 0x3]: 4 bytes, the length of result
-   [0x4 - 0xFFF]: 4092 bytes, the DSM result filled by QEMU
-
-IO Port 0x0a18 - 0xa1b:
-   ACPI writes the address of the memory page allocated by BIOS to this
-   port then QEMU gets the control and fills the result in the memory page.
-
-   write Access:
-   [0x0a18 - 0xa1b]: 4 bytes, the address of the memory page allocated
-                     by BIOS.
-
-_DSM process diagram:
----------------------
-"MEMA" indicates the address of memory page allocated by BIOS.
-
- +----------------------+      +-----------------------+
- |    1. OSPM           |      |    2. OSPM            |
- | save _DSM input data |      |  write "MEMA" to      | Exit to QEMU
- | to the page          +----->|  IO port 0x0a18       +------------+
- | indicated by "MEMA"  |      |                       |            |
- +----------------------+      +-----------------------+            |
-                                                                    |
-                                                                    v
- +-------------   ----+       +-----------+      +------------------+--------+
- |      5 QEMU        |       | 4 QEMU    |      |        3. QEMU            |
- | write _DSM result  |       |  emulate  |      | get _DSM input data from  |
- | to the page        +<------+ _DSM      +<-----+ the page indicated by the |
- |                    |       |           |      | value from the IO port    |
- +--------+-----------+       +-----------+      +---------------------------+
-          |
-          | Enter Guest
-          |
-          v
- +--------------------------+      +--------------+
- |     6 OSPM               |      |   7 OSPM     |
- | result size is returned  |      |  _DSM return |
- | by reading  DSM          +----->+              |
- | result from the page     |      |              |
- +--------------------------+      +--------------+
-
-NVDIMM hotplug
---------------
-ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
-hot-add event.
-
-QEMU internal use only _DSM function
-------------------------------------
-1) Read FIT
-   _FIT method uses _DSM method to fetch NFIT structures blob from QEMU
-   in 1 page sized increments which are then concatenated and returned
-   as _FIT method result.
-
-   Input parameters:
-   Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62}
-   Arg1 – Revision ID (set to 1)
-   Arg2 - Function Index, 0x1
-   Arg3 - A package containing a buffer whose layout is as follows:
-
-   +----------+--------+--------+-------------------------------------------+
-   |  Field   | Length | Offset |                 Description               |
-   +----------+--------+--------+-------------------------------------------+
-   | offset   |   4    |   0    | offset in QEMU's NFIT structures blob to  |
-   |          |        |        | read from                                 |
-   +----------+--------+--------+-------------------------------------------+
-
-   Output layout in the dsm memory page:
-   +----------+--------+--------+-------------------------------------------+
-   |  Field   | Length | Offset |                 Description               |
-   +----------+--------+--------+-------------------------------------------+
-   | length   |   4    |   0    | length of entire returned data            |
-   |          |        |        | (including this header)                   |
-   +----------+-----------------+-------------------------------------------+
-   |          |        |        | return status codes                       |
-   |          |        |        | 0x0 - success                             |
-   |          |        |        | 0x100 - error caused by NFIT update while |
-   | status   |   4    |   4    | read by _FIT wasn't completed, other      |
-   |          |        |        | codes follow Chapter 3 in DSM Spec Rev1   |
-   +----------+-----------------+-------------------------------------------+
-   | fit data | Varies |   8    | contains FIT data, this field is present  |
-   |          |        |        | if status field is 0;                     |
-   +----------+--------+--------+-------------------------------------------+
-
-   The FIT offset is maintained by the OSPM itself, current offset plus
-   the size of the fit data returned by the function is the next offset
-   OSPM should read. When all FIT data has been read out, zero fit data
-   size is returned.
-
-   If it returns status code 0x100, OSPM should restart to read FIT (read
-   from offset 0 again).
diff --git a/docs/specs/acpi_pci_hotplug.rst b/docs/specs/acpi_pci_hotplug.rst
new file mode 100644
index 00000000000..685bc5c322f
--- /dev/null
+++ b/docs/specs/acpi_pci_hotplug.rst
@@ -0,0 +1,48 @@
+QEMU<->ACPI BIOS PCI hotplug interface
+======================================
+
+QEMU supports PCI hotplug via ACPI, for PCI bus 0. This document
+describes the interface between QEMU and the ACPI BIOS.
+
+ACPI GPE block (IO ports 0xafe0-0xafe3, byte access)
+----------------------------------------------------
+
+Generic ACPI GPE block. Bit 1 (GPE.1) used to notify PCI hotplug/eject
+event to ACPI BIOS, via SCI interrupt.
+
+PCI slot injection notification pending (IO port 0xae00-0xae03, 4-byte access)
+------------------------------------------------------------------------------
+
+Slot injection notification pending. One bit per slot.
+
+Read by ACPI BIOS GPE.1 handler to notify OS of injection
+events.  Read-only.
+
+PCI slot removal notification (IO port 0xae04-0xae07, 4-byte access)
+--------------------------------------------------------------------
+
+Slot removal notification pending. One bit per slot.
+
+Read by ACPI BIOS GPE.1 handler to notify OS of removal
+events.  Read-only.
+
+PCI device eject (IO port 0xae08-0xae0b, 4-byte access)
+-------------------------------------------------------
+
+Write: Used by ACPI BIOS _EJ0 method to request device removal.
+One bit per slot.
+
+Read: Hotplug features register.  Used by platform to identify features
+available.  Current base feature set (no bits set):
+
+- Read-only "up" register @0xae00, 4-byte access, bit per slot
+- Read-only "down" register @0xae04, 4-byte access, bit per slot
+- Read/write "eject" register @0xae08, 4-byte access,
+  write: bit per slot eject, read: hotplug feature set
+- Read-only hotplug capable register @0xae0c, 4-byte access, bit per slot
+
+PCI removability status (IO port 0xae0c-0xae0f, 4-byte access)
+--------------------------------------------------------------
+
+Used by ACPI BIOS _RMV method to indicate removability status to OS. One
+bit per slot.  Read-only.
diff --git a/docs/specs/acpi_pci_hotplug.txt b/docs/specs/acpi_pci_hotplug.txt
deleted file mode 100644
index a839434f313..00000000000
--- a/docs/specs/acpi_pci_hotplug.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-QEMU<->ACPI BIOS PCI hotplug interface
---------------------------------------
-
-QEMU supports PCI hotplug via ACPI, for PCI bus 0. This document
-describes the interface between QEMU and the ACPI BIOS.
-
-ACPI GPE block (IO ports 0xafe0-0xafe3, byte access):
------------------------------------------
-
-Generic ACPI GPE block. Bit 1 (GPE.1) used to notify PCI hotplug/eject
-event to ACPI BIOS, via SCI interrupt.
-
-PCI slot injection notification pending (IO port 0xae00-0xae03, 4-byte access):
----------------------------------------------------------------
-Slot injection notification pending. One bit per slot.
-
-Read by ACPI BIOS GPE.1 handler to notify OS of injection
-events.  Read-only.
-
-PCI slot removal notification (IO port 0xae04-0xae07, 4-byte access):
------------------------------------------------------
-Slot removal notification pending. One bit per slot.
-
-Read by ACPI BIOS GPE.1 handler to notify OS of removal
-events.  Read-only.
-
-PCI device eject (IO port 0xae08-0xae0b, 4-byte access):
-----------------------------------------
-
-Write: Used by ACPI BIOS _EJ0 method to request device removal.
-One bit per slot.
-
-Read: Hotplug features register.  Used by platform to identify features
-available.  Current base feature set (no bits set):
- - Read-only "up" register @0xae00, 4-byte access, bit per slot
- - Read-only "down" register @0xae04, 4-byte access, bit per slot
- - Read/write "eject" register @0xae08, 4-byte access,
-   write: bit per slot eject, read: hotplug feature set
- - Read-only hotplug capable register @0xae0c, 4-byte access, bit per slot
-
-PCI removability status (IO port 0xae0c-0xae0f, 4-byte access):
------------------------------------------------
-
-Used by ACPI BIOS _RMV method to indicate removability status to OS. One
-bit per slot.  Read-only
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 7b08314d334..ecc43896bb2 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -1,11 +1,9 @@
-.. This is the top level page for the 'specs' manual
-
-
+----------------------------------------------
 System Emulation Guest Hardware Specifications
-==============================================
-
+----------------------------------------------
 
-Contents:
+This section of the manual contains specifications of
+guest hardware that is specific to QEMU.
 
 .. toctree::
    :maxdepth: 2
@@ -16,3 +14,7 @@ Contents:
    acpi_hw_reduced_hotplug
    tpm
    acpi_hest_ghes
+   acpi_cpu_hotplug
+   acpi_mem_hotplug
+   acpi_pci_hotplug
+   acpi_nvdimm
diff --git a/docs/sphinx-static/custom.js b/docs/sphinx-static/custom.js
new file mode 100644
index 00000000000..71a86053051
--- /dev/null
+++ b/docs/sphinx-static/custom.js
@@ -0,0 +1,9 @@
+document.addEventListener('keydown', (event) => {
+    // find a better way to look it up?
+    let search_input = document.getElementsByName('q')[0];
+
+    if (event.code === 'KeyS' && document.activeElement !== search_input) {
+        event.preventDefault();
+        search_input.focus();
+    }
+});
diff --git a/docs/sphinx-static/theme_overrides.css b/docs/sphinx-static/theme_overrides.css
new file mode 100644
index 00000000000..c70ef951286
--- /dev/null
+++ b/docs/sphinx-static/theme_overrides.css
@@ -0,0 +1,161 @@
+/* -*- coding: utf-8; mode: css -*-
+ *
+ * Sphinx HTML theme customization: read the doc
+ * Based on Linux Documentation/sphinx-static/theme_overrides.css
+ */
+
+/* Improve contrast and increase size for easier reading. */
+
+body {
+    font-family: serif;
+    color: black;
+    font-size: 100%;
+}
+
+h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend {
+    font-family: sans-serif;
+}
+
+.rst-content dl:not(.docutils) dt {
+    border-top: none;
+    border-left: solid 3px #ccc;
+    background-color: #f0f0f0;
+    color: black;
+}
+
+.wy-nav-top {
+    background: #802400;
+}
+
+.wy-side-nav-search input[type="text"] {
+    border-color: #f60;
+}
+
+.wy-menu-vertical p.caption {
+    color: white;
+}
+
+.wy-menu-vertical li.current a {
+    color: #505050;
+}
+
+.wy-menu-vertical li.on a, .wy-menu-vertical li.current > a {
+    color: #303030;
+}
+
+.fa-gitlab {
+      box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2), 0 3px 10px 0 rgba(0,0,0,0.19);
+      border-radius: 5px;
+}
+
+div[class^="highlight"] pre {
+    font-family: monospace;
+    color: black;
+    font-size: 100%;
+}
+
+.wy-menu-vertical {
+    font-family: sans-serif;
+}
+
+.c {
+    font-style: normal;
+}
+
+p {
+    font-size: 100%;
+}
+
+/* Interim: Code-blocks with line nos - lines and line numbers don't line up.
+ * see: https://github.com/rtfd/sphinx_rtd_theme/issues/419
+ */
+
+div[class^="highlight"] pre {
+    line-height: normal;
+}
+.rst-content .highlight > pre {
+    line-height: normal;
+}
+
+/* Keep fields from being strangely far apart due to inheirited table CSS. */
+.rst-content table.field-list th.field-name {
+    padding-top: 1px;
+    padding-bottom: 1px;
+}
+.rst-content table.field-list td.field-body {
+    padding-top: 1px;
+    padding-bottom: 1px;
+}
+
+@media screen {
+
+    /* content column
+     *
+     * RTD theme's default is 800px as max width for the content, but we have
+     * tables with tons of columns, which need the full width of the view-port.
+     */
+
+    .wy-nav-content{max-width: none; }
+
+    /* table:
+     *
+     *   - Sequences of whitespace should collapse into a single whitespace.
+     *   - make the overflow auto (scrollbar if needed)
+     *   - align caption "left" ("center" is unsuitable on vast tables)
+     */
+
+    .wy-table-responsive table td { white-space: normal; }
+    .wy-table-responsive { overflow: auto; }
+    .rst-content table.docutils caption { text-align: left; font-size: 100%; }
+
+    /* captions:
+     *
+     *   - captions should have 100% (not 85%) font size
+     *   - hide the permalink symbol as long as link is not hovered
+     */
+
+    .toc-title {
+        font-size: 150%;
+        font-weight: bold;
+    }
+
+    caption, .wy-table caption, .rst-content table.field-list caption {
+        font-size: 100%;
+    }
+    caption a.headerlink { opacity: 0; }
+    caption a.headerlink:hover { opacity: 1; }
+
+    /* Menu selection and keystrokes */
+
+    span.menuselection {
+        color: blue;
+        font-family: "Courier New", Courier, monospace
+    }
+
+    code.kbd, code.kbd span {
+        color: white;
+        background-color: darkblue;
+        font-weight: bold;
+        font-family: "Courier New", Courier, monospace
+    }
+
+    /* fix bottom margin of lists items */
+
+    .rst-content .section ul li:last-child, .rst-content .section ul li p:last-child {
+          margin-bottom: 12px;
+    }
+
+    /* inline literal: drop the borderbox, padding and red color */
+
+    code, .rst-content tt, .rst-content code {
+        color: inherit;
+        border: none;
+        padding: unset;
+        background: inherit;
+        font-size: 85%;
+    }
+
+    .rst-content tt.literal,.rst-content tt.literal,.rst-content code.literal {
+        color: inherit;
+    }
+}
diff --git a/docs/sphinx/depfile.py b/docs/sphinx/depfile.py
index 277fdf0f568..afdcbcec6e7 100644
--- a/docs/sphinx/depfile.py
+++ b/docs/sphinx/depfile.py
@@ -12,6 +12,8 @@
 
 import os
 import sphinx
+import sys
+from pathlib import Path
 
 __version__ = '1.0'
 
@@ -20,8 +22,21 @@ def get_infiles(env):
         yield env.doc2path(x)
         yield from ((os.path.join(env.srcdir, dep)
                     for dep in env.dependencies[x]))
+    for mod in sys.modules.values():
+        if hasattr(mod, '__file__'):
+            if mod.__file__:
+                yield mod.__file__
+    # this is perhaps going to include unused files:
+    for static_path in env.config.html_static_path + env.config.templates_path:
+        for path in Path(static_path).rglob('*'):
+            yield str(path)
 
-def write_depfile(app, env):
+
+def write_depfile(app, exception):
+    if exception:
+        return
+
+    env = app.env
     if not env.config.depfile:
         return
 
@@ -42,7 +57,7 @@ def write_depfile(app, env):
 def setup(app):
     app.add_config_value('depfile', None, 'env')
     app.add_config_value('depfile_stamp', None, 'env')
-    app.connect('env-updated', write_depfile)
+    app.connect('build-finished', write_depfile)
 
     return dict(
         version = __version__,
diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py
index b7a2d39c105..d791b594923 100644
--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -34,7 +34,8 @@
 from sphinx.util.nodes import nested_parse_with_titles
 import sphinx
 from qapi.gen import QAPISchemaVisitor
-from qapi.schema import QAPIError, QAPISemError, QAPISchema
+from qapi.error import QAPIError, QAPISemError
+from qapi.schema import QAPISchema
 
 
 # Sphinx up to 1.6 uses AutodocReporter; 1.7 and later
@@ -111,17 +112,19 @@ def _make_section(self, title):
     def _nodes_for_ifcond(self, ifcond, with_if=True):
         """Return list of Text, literal nodes for the ifcond
 
-        Return a list which gives text like ' (If: cond1, cond2, cond3)', where
-        the conditions are in literal-text and the commas are not.
+        Return a list which gives text like ' (If: condition)'.
         If with_if is False, we don't return the "(If: " and ")".
         """
-        condlist = intersperse([nodes.literal('', c) for c in ifcond],
-                               nodes.Text(', '))
+
+        doc = ifcond.docgen()
+        if not doc:
+            return []
+        doc = nodes.literal('', doc)
         if not with_if:
-            return condlist
+            return [doc]
 
         nodelist = [nodes.Text(' ('), nodes.strong('', 'If: ')]
-        nodelist.extend(condlist)
+        nodelist.append(doc)
         nodelist.append(nodes.Text(')'))
         return nodelist
 
@@ -138,7 +141,7 @@ def _nodes_for_one_member(self, member):
             term.append(nodes.literal('', member.type.doc_type()))
         if member.optional:
             term.append(nodes.Text(' (optional)'))
-        if member.ifcond:
+        if member.ifcond.is_present():
             term.extend(self._nodes_for_ifcond(member.ifcond))
         return term
 
@@ -153,7 +156,7 @@ def _nodes_for_variant_when(self, variants, variant):
                 nodes.literal('', variants.tag_member.name),
                 nodes.Text(' is '),
                 nodes.literal('', '"%s"' % variant.name)]
-        if variant.ifcond:
+        if variant.ifcond.is_present():
             term.extend(self._nodes_for_ifcond(variant.ifcond))
         return term
 
@@ -208,7 +211,7 @@ def _nodes_for_enum_values(self, doc):
         dlnode = nodes.definition_list()
         for section in doc.args.values():
             termtext = [nodes.literal('', section.member.name)]
-            if section.member.ifcond:
+            if section.member.ifcond.is_present():
                 termtext.extend(self._nodes_for_ifcond(section.member.ifcond))
             # TODO drop fallbacks when undocumented members are outlawed
             if section.text:
@@ -276,7 +279,7 @@ def _nodes_for_sections(self, doc):
     def _nodes_for_if_section(self, ifcond):
         """Return list of doctree nodes for the "If" section"""
         nodelist = []
-        if ifcond:
+        if ifcond.is_present():
             snode = self._make_section('If')
             snode += nodes.paragraph(
                 '', '', *self._nodes_for_ifcond(ifcond, with_if=False)
diff --git a/docs/system/_templates/editpage.html b/docs/system/_templates/editpage.html
deleted file mode 100644
index 6586b2e2579..00000000000
--- a/docs/system/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/system/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
index d1fb8f25b39..cec87e3743d 100644
--- a/docs/system/arm/aspeed.rst
+++ b/docs/system/arm/aspeed.rst
@@ -5,7 +5,7 @@ The QEMU Aspeed machines model BMCs of various OpenPOWER systems and
 Aspeed evaluation boards. They are based on different releases of the
 Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the
 AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600
-with dual cores ARM Cortex A7 CPUs (1.2GHz).
+with dual cores ARM Cortex-A7 CPUs (1.2GHz).
 
 The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C,
 etc.
@@ -13,6 +13,7 @@ etc.
 AST2400 SoC based machines :
 
 - ``palmetto-bmc``         OpenPOWER Palmetto POWER8 BMC
+- ``quanta-q71l-bmc``      OpenBMC Quanta BMC
 
 AST2500 SoC based machines :
 
@@ -24,7 +25,7 @@ AST2500 SoC based machines :
 
 AST2600 SoC based machines :
 
-- ``ast2600-evb``          Aspeed AST2600 Evaluation board (Cortex A7)
+- ``ast2600-evb``          Aspeed AST2600 Evaluation board (Cortex-A7)
 - ``tacoma-bmc``           OpenPOWER Witherspoon POWER9 AST2600 BMC
 
 Supported devices
@@ -49,6 +50,7 @@ Supported devices
  * Ethernet controllers
  * Front LEDs (PCA9552 on I2C bus)
  * LPC Peripheral Controller (a subset of subdevices are supported)
+ * Hash/Crypto Engine (HACE) - Hash support only. TODO: HMAC and RSA
 
 
 Missing devices
@@ -59,7 +61,6 @@ Missing devices
  * PWM and Fan Controller
  * Slave GPIO Controller
  * Super I/O Controller
- * Hash/Crypto Engine
  * PCI-Express 1 Controller
  * Graphic Display Controller
  * PECI Controller
diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst
index c455442eaf5..584eb170974 100644
--- a/docs/system/arm/cpu-features.rst
+++ b/docs/system/arm/cpu-features.rst
@@ -10,22 +10,22 @@ is the Performance Monitoring Unit (PMU).  CPU types such as the
 Cortex-A15 and the Cortex-A57, which respectively implement Arm
 architecture reference manuals ARMv7-A and ARMv8-A, may both optionally
 implement PMUs.  For example, if a user wants to use a Cortex-A15 without
-a PMU, then the `-cpu` parameter should contain `pmu=off` on the QEMU
-command line, i.e. `-cpu cortex-a15,pmu=off`.
+a PMU, then the ``-cpu`` parameter should contain ``pmu=off`` on the QEMU
+command line, i.e. ``-cpu cortex-a15,pmu=off``.
 
 As not all CPU types support all optional CPU features, then whether or
 not a CPU property exists depends on the CPU type.  For example, CPUs
 that implement the ARMv8-A architecture reference manual may optionally
 support the AArch32 CPU feature, which may be enabled by disabling the
-`aarch64` CPU property.  A CPU type such as the Cortex-A15, which does
-not implement ARMv8-A, will not have the `aarch64` CPU property.
+``aarch64`` CPU property.  A CPU type such as the Cortex-A15, which does
+not implement ARMv8-A, will not have the ``aarch64`` CPU property.
 
 QEMU's support may be limited for some CPU features, only partially
 supporting the feature or only supporting the feature under certain
-configurations.  For example, the `aarch64` CPU feature, which, when
+configurations.  For example, the ``aarch64`` CPU feature, which, when
 disabled, enables the optional AArch32 CPU feature, is only supported
 when using the KVM accelerator and when running on a host CPU type that
-supports the feature.  While `aarch64` currently only works with KVM,
+supports the feature.  While ``aarch64`` currently only works with KVM,
 it could work with TCG.  CPU features that are specific to KVM are
 prefixed with "kvm-" and are described in "KVM VCPU Features".
 
@@ -33,12 +33,12 @@ CPU Feature Probing
 ===================
 
 Determining which CPU features are available and functional for a given
-CPU type is possible with the `query-cpu-model-expansion` QMP command.
-Below are some examples where `scripts/qmp/qmp-shell` (see the top comment
+CPU type is possible with the ``query-cpu-model-expansion`` QMP command.
+Below are some examples where ``scripts/qmp/qmp-shell`` (see the top comment
 block in the script for usage) is used to issue the QMP commands.
 
-1. Determine which CPU features are available for the `max` CPU type
-   (Note, we started QEMU with qemu-system-aarch64, so `max` is
+1. Determine which CPU features are available for the ``max`` CPU type
+   (Note, we started QEMU with qemu-system-aarch64, so ``max`` is
    implementing the ARMv8-A reference manual in this case)::
 
       (QEMU) query-cpu-model-expansion type=full model={"name":"max"}
@@ -51,9 +51,9 @@ block in the script for usage) is used to issue the QMP commands.
         "sve896": true, "sve1280": true, "sve2048": true
       }}}}
 
-We see that the `max` CPU type has the `pmu`, `aarch64`, `sve`, and many
-`sve<N>` CPU features.  We also see that all the CPU features are
-enabled, as they are all `true`.  (The `sve<N>` CPU features are all
+We see that the ``max`` CPU type has the ``pmu``, ``aarch64``, ``sve``, and many
+``sve<N>`` CPU features.  We also see that all the CPU features are
+enabled, as they are all ``true``.  (The ``sve<N>`` CPU features are all
 optional SVE vector lengths (see "SVE CPU Properties").  While with TCG
 all SVE vector lengths can be supported, when KVM is in use it's more
 likely that only a few lengths will be supported, if SVE is supported at
@@ -71,9 +71,9 @@ all.)
         "sve896": true, "sve1280": true, "sve2048": true
       }}}}
 
-We see it worked, as `pmu` is now `false`.
+We see it worked, as ``pmu`` is now ``false``.
 
-(3) Let's try to disable `aarch64`, which enables the AArch32 CPU feature::
+(3) Let's try to disable ``aarch64``, which enables the AArch32 CPU feature::
 
       (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"aarch64":false}}
       {"error": {
@@ -84,7 +84,7 @@ We see it worked, as `pmu` is now `false`.
 It looks like this feature is limited to a configuration we do not
 currently have.
 
-(4) Let's disable `sve` and see what happens to all the optional SVE
+(4) Let's disable ``sve`` and see what happens to all the optional SVE
     vector lengths::
 
       (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"sve":false}}
@@ -97,14 +97,14 @@ currently have.
         "sve896": false, "sve1280": false, "sve2048": false
       }}}}
 
-As expected they are now all `false`.
+As expected they are now all ``false``.
 
 (5) Let's try probing CPU features for the Cortex-A15 CPU type::
 
       (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"}
       {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}}
 
-Only the `pmu` CPU feature is available.
+Only the ``pmu`` CPU feature is available.
 
 A note about CPU feature dependencies
 -------------------------------------
@@ -123,29 +123,29 @@ A note about CPU models and KVM
 -------------------------------
 
 Named CPU models generally do not work with KVM.  There are a few cases
-that do work, e.g. using the named CPU model `cortex-a57` with KVM on a
-seattle host, but mostly if KVM is enabled the `host` CPU type must be
+that do work, e.g. using the named CPU model ``cortex-a57`` with KVM on a
+seattle host, but mostly if KVM is enabled the ``host`` CPU type must be
 used.  This means the guest is provided all the same CPU features as the
-host CPU type has.  And, for this reason, the `host` CPU type should
+host CPU type has.  And, for this reason, the ``host`` CPU type should
 enable all CPU features that the host has by default.  Indeed it's even
 a bit strange to allow disabling CPU features that the host has when using
-the `host` CPU type, but in the absence of CPU models it's the best we can
+the ``host`` CPU type, but in the absence of CPU models it's the best we can
 do if we want to launch guests without all the host's CPU features enabled.
 
-Enabling KVM also affects the `query-cpu-model-expansion` QMP command.  The
+Enabling KVM also affects the ``query-cpu-model-expansion`` QMP command.  The
 affect is not only limited to specific features, as pointed out in example
 (3) of "CPU Feature Probing", but also to which CPU types may be expanded.
-When KVM is enabled, only the `max`, `host`, and current CPU type may be
+When KVM is enabled, only the ``max``, ``host``, and current CPU type may be
 expanded.  This restriction is necessary as it's not possible to know all
 CPU types that may work with KVM, but it does impose a small risk of users
 experiencing unexpected errors.  For example on a seattle, as mentioned
-above, the `cortex-a57` CPU type is also valid when KVM is enabled.
-Therefore a user could use the `host` CPU type for the current type, but
-then attempt to query `cortex-a57`, however that query will fail with our
+above, the ``cortex-a57`` CPU type is also valid when KVM is enabled.
+Therefore a user could use the ``host`` CPU type for the current type, but
+then attempt to query ``cortex-a57``, however that query will fail with our
 restrictions.  This shouldn't be an issue though as management layers and
-users have been preferring the `host` CPU type for use with KVM for quite
+users have been preferring the ``host`` CPU type for use with KVM for quite
 some time.  Additionally, if the KVM-enabled QEMU instance running on a
-seattle host is using the `cortex-a57` CPU type, then querying `cortex-a57`
+seattle host is using the ``cortex-a57`` CPU type, then querying ``cortex-a57``
 will work.
 
 Using CPU Features
@@ -158,12 +158,12 @@ QEMU command line with that CPU type::
   $ qemu-system-aarch64 -M virt -cpu max,pmu=off,sve=on,sve128=on,sve256=on
 
 The example above disables the PMU and enables the first two SVE vector
-lengths for the `max` CPU type.  Note, the `sve=on` isn't actually
-necessary, because, as we observed above with our probe of the `max` CPU
-type, `sve` is already on by default.  Also, based on our probe of
+lengths for the ``max`` CPU type.  Note, the ``sve=on`` isn't actually
+necessary, because, as we observed above with our probe of the ``max`` CPU
+type, ``sve`` is already on by default.  Also, based on our probe of
 defaults, it would seem we need to disable many SVE vector lengths, rather
 than only enabling the two we want.  This isn't the case, because, as
-disabling many SVE vector lengths would be quite verbose, the `sve<N>` CPU
+disabling many SVE vector lengths would be quite verbose, the ``sve<N>`` CPU
 properties have special semantics (see "SVE CPU Property Parsing
 Semantics").
 
@@ -217,11 +217,11 @@ TCG VCPU Features
 TCG VCPU features are CPU features that are specific to TCG.
 Below is the list of TCG VCPU features and their descriptions.
 
-  pauth                    Enable or disable `FEAT_Pauth`, pointer
+  pauth                    Enable or disable ``FEAT_Pauth``, pointer
                            authentication.  By default, the feature is
-                           enabled with `-cpu max`.
+                           enabled with ``-cpu max``.
 
-  pauth-impdef             When `FEAT_Pauth` is enabled, either the
+  pauth-impdef             When ``FEAT_Pauth`` is enabled, either the
                            *impdef* (Implementation Defined) algorithm
                            is enabled or the *architected* QARMA algorithm
                            is enabled.  By default the impdef algorithm
@@ -235,49 +235,49 @@ Below is the list of TCG VCPU features and their descriptions.
 SVE CPU Properties
 ==================
 
-There are two types of SVE CPU properties: `sve` and `sve<N>`.  The first
-is used to enable or disable the entire SVE feature, just as the `pmu`
+There are two types of SVE CPU properties: ``sve`` and ``sve<N>``.  The first
+is used to enable or disable the entire SVE feature, just as the ``pmu``
 CPU property completely enables or disables the PMU.  The second type
-is used to enable or disable specific vector lengths, where `N` is the
-number of bits of the length.  The `sve<N>` CPU properties have special
+is used to enable or disable specific vector lengths, where ``N`` is the
+number of bits of the length.  The ``sve<N>`` CPU properties have special
 dependencies and constraints, see "SVE CPU Property Dependencies and
 Constraints" below.  Additionally, as we want all supported vector lengths
 to be enabled by default, then, in order to avoid overly verbose command
-lines (command lines full of `sve<N>=off`, for all `N` not wanted), we
+lines (command lines full of ``sve<N>=off``, for all ``N`` not wanted), we
 provide the parsing semantics listed in "SVE CPU Property Parsing
 Semantics".
 
 SVE CPU Property Dependencies and Constraints
 ---------------------------------------------
 
-  1) At least one vector length must be enabled when `sve` is enabled.
+  1) At least one vector length must be enabled when ``sve`` is enabled.
 
-  2) If a vector length `N` is enabled, then, when KVM is enabled, all
+  2) If a vector length ``N`` is enabled, then, when KVM is enabled, all
      smaller, host supported vector lengths must also be enabled.  If
      KVM is not enabled, then only all the smaller, power-of-two vector
      lengths must be enabled.  E.g. with KVM if the host supports all
-     vector lengths up to 512-bits (128, 256, 384, 512), then if `sve512`
+     vector lengths up to 512-bits (128, 256, 384, 512), then if ``sve512``
      is enabled, the 128-bit vector length, 256-bit vector length, and
      384-bit vector length must also be enabled. Without KVM, the 384-bit
      vector length would not be required.
 
   3) If KVM is enabled then only vector lengths that the host CPU type
      support may be enabled.  If SVE is not supported by the host, then
-     no `sve*` properties may be enabled.
+     no ``sve*`` properties may be enabled.
 
 SVE CPU Property Parsing Semantics
 ----------------------------------
 
-  1) If SVE is disabled (`sve=off`), then which SVE vector lengths
+  1) If SVE is disabled (``sve=off``), then which SVE vector lengths
      are enabled or disabled is irrelevant to the guest, as the entire
      SVE feature is disabled and that disables all vector lengths for
-     the guest.  However QEMU will still track any `sve<N>` CPU
-     properties provided by the user.  If later an `sve=on` is provided,
-     then the guest will get only the enabled lengths.  If no `sve=on`
+     the guest.  However QEMU will still track any ``sve<N>`` CPU
+     properties provided by the user.  If later an ``sve=on`` is provided,
+     then the guest will get only the enabled lengths.  If no ``sve=on``
      is provided and there are explicitly enabled vector lengths, then
      an error is generated.
 
-  2) If SVE is enabled (`sve=on`), but no `sve<N>` CPU properties are
+  2) If SVE is enabled (``sve=on``), but no ``sve<N>`` CPU properties are
      provided, then all supported vector lengths are enabled, which when
      KVM is not in use means including the non-power-of-two lengths, and,
      when KVM is in use, it means all vector lengths supported by the host
@@ -293,7 +293,7 @@ SVE CPU Property Parsing Semantics
      constraint (2) of "SVE CPU Property Dependencies and Constraints").
 
   5) When KVM is enabled, if the host does not support SVE, then an error
-     is generated when attempting to enable any `sve*` properties (see
+     is generated when attempting to enable any ``sve*`` properties (see
      constraint (3) of "SVE CPU Property Dependencies and Constraints").
 
   6) When KVM is enabled, if the host does support SVE, then an error is
@@ -301,8 +301,8 @@ SVE CPU Property Parsing Semantics
      by the host (see constraint (3) of "SVE CPU Property Dependencies and
      Constraints").
 
-  7) If one or more `sve<N>` CPU properties are set `off`, but no `sve<N>`,
-     CPU properties are set `on`, then the specified vector lengths are
+  7) If one or more ``sve<N>`` CPU properties are set ``off``, but no ``sve<N>``,
+     CPU properties are set ``on``, then the specified vector lengths are
      disabled but the default for any unspecified lengths remains enabled.
      When KVM is not enabled, disabling a power-of-two vector length also
      disables all vector lengths larger than the power-of-two length.
@@ -310,15 +310,15 @@ SVE CPU Property Parsing Semantics
      disables all larger vector lengths (see constraint (2) of "SVE CPU
      Property Dependencies and Constraints").
 
-  8) If one or more `sve<N>` CPU properties are set to `on`, then they
+  8) If one or more ``sve<N>`` CPU properties are set to ``on``, then they
      are enabled and all unspecified lengths default to disabled, except
      for the required lengths per constraint (2) of "SVE CPU Property
      Dependencies and Constraints", which will even be auto-enabled if
      they were not explicitly enabled.
 
-  9) If SVE was disabled (`sve=off`), allowing all vector lengths to be
+  9) If SVE was disabled (``sve=off``), allowing all vector lengths to be
      explicitly disabled (i.e. avoiding the error specified in (3) of
-     "SVE CPU Property Parsing Semantics"), then if later an `sve=on` is
+     "SVE CPU Property Parsing Semantics"), then if later an ``sve=on`` is
      provided an error will be generated.  To avoid this error, one must
      enable at least one vector length prior to enabling SVE.
 
@@ -329,12 +329,12 @@ SVE CPU Property Examples
 
      $ qemu-system-aarch64 -M virt -cpu max,sve=off
 
-  2) Implicitly enable all vector lengths for the `max` CPU type::
+  2) Implicitly enable all vector lengths for the ``max`` CPU type::
 
      $ qemu-system-aarch64 -M virt -cpu max
 
   3) When KVM is enabled, implicitly enable all host CPU supported vector
-     lengths with the `host` CPU type::
+     lengths with the ``host`` CPU type::
 
      $ qemu-system-aarch64 -M virt,accel=kvm -cpu host
 
@@ -376,3 +376,18 @@ verbose command lines.  However, the recommended way to select vector
 lengths is to explicitly enable each desired length.  Therefore only
 example's (1), (4), and (6) exhibit recommended uses of the properties.
 
+SVE User-mode Default Vector Length Property
+--------------------------------------------
+
+For qemu-aarch64, the cpu property ``sve-default-vector-length=N`` is
+defined to mirror the Linux kernel parameter file
+``/proc/sys/abi/sve_default_vector_length``.  The default length, ``N``,
+is in units of bytes and must be between 16 and 8192.
+If not specified, the default vector length is 64.
+
+If the default length is larger than the maximum vector length enabled,
+the actual vector length will be reduced.  Note that the maximum vector
+length supported by QEMU is 256.
+
+If this property is set to ``-1`` then the default vector length
+is set to the maximum possible length.
diff --git a/docs/system/arm/cubieboard.rst b/docs/system/arm/cubieboard.rst
new file mode 100644
index 00000000000..344ff8cef99
--- /dev/null
+++ b/docs/system/arm/cubieboard.rst
@@ -0,0 +1,16 @@
+Cubietech Cubieboard (``cubieboard``)
+=====================================
+
+The ``cubieboard`` model emulates the Cubietech Cubieboard,
+which is a Cortex-A8 based single-board computer using
+the AllWinner A10 SoC.
+
+Emulated devices:
+
+- Timer
+- UART
+- RTC
+- EMAC
+- SDHCI
+- USB controller
+- SATA controller
diff --git a/docs/system/arm/emcraft-sf2.rst b/docs/system/arm/emcraft-sf2.rst
new file mode 100644
index 00000000000..377e2487206
--- /dev/null
+++ b/docs/system/arm/emcraft-sf2.rst
@@ -0,0 +1,15 @@
+Emcraft SmartFusion2 SOM kit (``emcraft-sf2``)
+==============================================
+
+The ``emcraft-sf2`` board emulates the SmartFusion2 SOM kit from
+Emcraft (M2S010). This is a System-on-Module from EmCraft systems,
+based on the SmartFusion2 SoC FPGA from Microsemi Corporation.
+The SoC is based on a Cortex-M4 processor.
+
+Emulated devices:
+
+- System timer
+- System registers
+- SPI controller
+- UART
+- EMAC
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
new file mode 100644
index 00000000000..144dc491d95
--- /dev/null
+++ b/docs/system/arm/emulation.rst
@@ -0,0 +1,103 @@
+A-profile CPU architecture support
+==================================
+
+QEMU's TCG emulation includes support for the Armv5, Armv6, Armv7 and
+Armv8 versions of the A-profile architecture. It also has support for
+the following architecture extensions:
+
+- FEAT_AA32BF16 (AArch32 BFloat16 instructions)
+- FEAT_AA32HPD (AArch32 hierarchical permission disables)
+- FEAT_AA32I8MM (AArch32 Int8 matrix multiplication instructions)
+- FEAT_AES (AESD and AESE instructions)
+- FEAT_BF16 (AArch64 BFloat16 instructions)
+- FEAT_BTI (Branch Target Identification)
+- FEAT_DIT (Data Independent Timing instructions)
+- FEAT_DPB (DC CVAP instruction)
+- FEAT_DotProd (Advanced SIMD dot product instructions)
+- FEAT_FCMA (Floating-point complex number instructions)
+- FEAT_FHM (Floating-point half-precision multiplication instructions)
+- FEAT_FP16 (Half-precision floating-point data processing)
+- FEAT_FRINTTS (Floating-point to integer instructions)
+- FEAT_FlagM (Flag manipulation instructions v2)
+- FEAT_FlagM2 (Enhancements to flag manipulation instructions)
+- FEAT_HPDS (Hierarchical permission disables)
+- FEAT_I8MM (AArch64 Int8 matrix multiplication instructions)
+- FEAT_JSCVT (JavaScript conversion instructions)
+- FEAT_LOR (Limited ordering regions)
+- FEAT_LRCPC (Load-acquire RCpc instructions)
+- FEAT_LRCPC2 (Load-acquire RCpc instructions v2)
+- FEAT_LSE (Large System Extensions)
+- FEAT_MTE (Memory Tagging Extension)
+- FEAT_MTE2 (Memory Tagging Extension)
+- FEAT_MTE3 (MTE Asymmetric Fault Handling)
+- FEAT_PAN (Privileged access never)
+- FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN)
+- FEAT_PAuth (Pointer authentication)
+- FEAT_PMULL (PMULL, PMULL2 instructions)
+- FEAT_PMUv3p1 (PMU Extensions v3.1)
+- FEAT_PMUv3p4 (PMU Extensions v3.4)
+- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
+- FEAT_RNG (Random number generator)
+- FEAT_SB (Speculation Barrier)
+- FEAT_SEL2 (Secure EL2)
+- FEAT_SHA1 (SHA1 instructions)
+- FEAT_SHA256 (SHA256 instructions)
+- FEAT_SHA3 (Advanced SIMD SHA3 instructions)
+- FEAT_SHA512 (Advanced SIMD SHA512 instructions)
+- FEAT_SM3 (Advanced SIMD SM3 instructions)
+- FEAT_SM4 (Advanced SIMD SM4 instructions)
+- FEAT_SPECRES (Speculation restriction instructions)
+- FEAT_SSBS (Speculative Store Bypass Safe)
+- FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
+- FEAT_TLBIRANGE (TLB invalidate range instructions)
+- FEAT_TTCNP (Translation table Common not private translations)
+- FEAT_TTST (Small translation tables)
+- FEAT_UAO (Unprivileged Access Override control)
+- FEAT_VHE (Virtualization Host Extensions)
+- FEAT_VMID16 (16-bit VMID)
+- FEAT_XNX (Translation table stage 2 Unprivileged Execute-never)
+- SVE (The Scalable Vector Extension)
+- SVE2 (The Scalable Vector Extension v2)
+
+For information on the specifics of these extensions, please refer
+to the `Armv8-A Arm Architecture Reference Manual
+<https://developer.arm.com/documentation/ddi0487/latest>`_.
+
+When a specific named CPU is being emulated, only those features which
+are present in hardware for that CPU are emulated. (If a feature is
+not in the list above then it is not supported, even if the real
+hardware should have it.) The ``max`` CPU enables all features.
+
+R-profile CPU architecture support
+==================================
+
+QEMU's TCG emulation support for R-profile CPUs is currently limited.
+We emulate only the Cortex-R5 and Cortex-R5F CPUs.
+
+M-profile CPU architecture support
+==================================
+
+QEMU's TCG emulation includes support for Armv6-M, Armv7-M, Armv8-M, and
+Armv8.1-M versions of the M-profile architucture.  It also has support
+for the following architecture extensions:
+
+- FP (Floating-point Extension)
+- FPCXT (FPCXT access instructions)
+- HP (Half-precision floating-point instructions)
+- LOB (Low Overhead loops and Branch future)
+- M (Main Extension)
+- MPU (Memory Protection Unit Extension)
+- PXN (Privileged Execute Never)
+- RAS (Reliability, Serviceability and Availability): "minimum RAS Extension" only
+- S (Security Extension)
+- ST (System Timer Extension)
+
+For information on the specifics of these extensions, please refer
+to the `Armv8-M Arm Architecture Reference Manual
+<https://developer.arm.com/documentation/ddi0553/latest>`_.
+
+When a specific named CPU is being emulated, only those features which
+are present in hardware for that CPU are emulated. (If a feature is
+not in the list above then it is not supported, even if the real
+hardware should have it.) There is no equivalent of the ``max`` CPU for
+M-profile.
diff --git a/docs/system/arm/highbank.rst b/docs/system/arm/highbank.rst
new file mode 100644
index 00000000000..bb4965b367f
--- /dev/null
+++ b/docs/system/arm/highbank.rst
@@ -0,0 +1,19 @@
+Calxeda Highbank and Midway (``highbank``, ``midway``)
+======================================================
+
+``highbank`` is a model of the Calxeda Highbank (ECX-1000) system,
+which has four Cortex-A9 cores.
+
+``midway`` is a model of the Calxeda Midway (ECX-2000) system,
+which has four Cortex-A15 cores.
+
+Emulated devices:
+
+- L2x0 cache controller
+- SP804 dual timer
+- PL011 UART
+- PL061 GPIOs
+- PL031 RTC
+- PL022 synchronous serial port controller
+- AHCI
+- XGMAC ethernet controllers
diff --git a/docs/system/arm/imx25-pdk.rst b/docs/system/arm/imx25-pdk.rst
new file mode 100644
index 00000000000..2a9711e8a79
--- /dev/null
+++ b/docs/system/arm/imx25-pdk.rst
@@ -0,0 +1,19 @@
+NXP i.MX25 PDK board (``imx25-pdk``)
+====================================
+
+The ``imx25-pdk`` board emulates the NXP i.MX25 Product Development Kit
+board, which is based on an i.MX25 SoC which uses an ARM926 CPU.
+
+Emulated devices:
+
+- SD controller
+- AVIC
+- CCM
+- GPT
+- EPIT timers
+- FEC
+- RNGC
+- I2C
+- GPIO controllers
+- Watchdog timer
+- USB controllers
diff --git a/docs/system/arm/kzm.rst b/docs/system/arm/kzm.rst
new file mode 100644
index 00000000000..bb018fbdf7c
--- /dev/null
+++ b/docs/system/arm/kzm.rst
@@ -0,0 +1,18 @@
+Kyoto Microcomputer KZM-ARM11-01 (``kzm``)
+==========================================
+
+The ``kzm`` board emulates the Kyoto Microcomputer KZM-ARM11-01
+evaluation board, which is based on an NXP i.MX32 SoC
+which uses an ARM1136 CPU.
+
+Emulated devices:
+
+- UARTs
+- LAN9118 ethernet
+- AVIC
+- CCM
+- GPT
+- EPIT timers
+- I2C
+- GPIO controllers
+- Watchdog timer
diff --git a/docs/system/arm/mainstone.rst b/docs/system/arm/mainstone.rst
new file mode 100644
index 00000000000..05310f42c7f
--- /dev/null
+++ b/docs/system/arm/mainstone.rst
@@ -0,0 +1,25 @@
+Intel Mainstone II board (``mainstone``)
+========================================
+
+The ``mainstone`` board emulates the Intel Mainstone II development
+board, which uses a PXA270 CPU.
+
+Emulated devices:
+
+- Flash memory
+- Keypad
+- MMC controller
+- 91C111 ethernet
+- PIC
+- Timer
+- DMA
+- GPIO
+- FIR
+- Serial
+- LCD controller
+- SSP
+- USB controller
+- RTC
+- PCMCIA
+- I2C
+- I2S
diff --git a/docs/system/arm/mps2.rst b/docs/system/arm/mps2.rst
index f83b1517871..8a75beb3a08 100644
--- a/docs/system/arm/mps2.rst
+++ b/docs/system/arm/mps2.rst
@@ -45,3 +45,13 @@ Differences between QEMU and real hardware:
   flash, but only as simple ROM, so attempting to rewrite the flash
   from the guest will fail
 - QEMU does not model the USB controller in MPS3 boards
+
+Machine-specific options
+""""""""""""""""""""""""
+
+The following machine-specific options are supported:
+
+remap
+  Supported for ``mps3-an524`` only.
+  Set ``BRAM``/``QSPI`` to select the initial memory mapping. The
+  default is ``BRAM``.
diff --git a/docs/system/arm/nrf.rst b/docs/system/arm/nrf.rst
new file mode 100644
index 00000000000..eda87bd7602
--- /dev/null
+++ b/docs/system/arm/nrf.rst
@@ -0,0 +1,51 @@
+Nordic nRF boards (``microbit``)
+================================
+
+The `Nordic nRF`_ chips are a family of ARM-based System-on-Chip that
+are designed to be used for low-power and short-range wireless solutions.
+
+.. _Nordic nRF: https://www.nordicsemi.com/Products
+
+The nRF51 series is the first series for short range wireless applications.
+It is superseded by the nRF52 series.
+The following machines are based on this chip :
+
+- ``microbit``       BBC micro:bit board with nRF51822 SoC
+
+There are other series such as nRF52, nRF53 and nRF91 which are currently not
+supported by QEMU.
+
+Supported devices
+-----------------
+
+ * ARM Cortex-M0 (ARMv6-M)
+ * Serial ports (UART)
+ * Clock controller
+ * Timers
+ * Random Number Generator (RNG)
+ * GPIO controller
+ * NVMC
+ * SWI
+
+Missing devices
+---------------
+
+ * Watchdog
+ * Real-Time Clock (RTC) controller
+ * TWI (i2c)
+ * SPI controller
+ * Analog to Digital Converter (ADC)
+ * Quadrature decoder
+ * Radio
+
+Boot options
+------------
+
+The Micro:bit machine can be started using the ``-device`` option to load a
+firmware in `ihex format`_. Example:
+
+.. _ihex format: https://en.wikipedia.org/wiki/Intel_HEX
+
+.. code-block:: bash
+
+  $ qemu-system-arm -M microbit -device loader,file=test.hex
diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index d3cf2d9cd7e..adf497e6791 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst
@@ -1,24 +1,26 @@
-Nuvoton iBMC boards (``npcm750-evb``, ``quanta-gsj``)
-=====================================================
+Nuvoton iBMC boards (``*-bmc``, ``npcm750-evb``, ``quanta-gsj``)
+================================================================
 
 The `Nuvoton iBMC`_ chips (NPCM7xx) are a family of ARM-based SoCs that are
 designed to be used as Baseboard Management Controllers (BMCs) in various
-servers. They all feature one or two ARM Cortex A9 CPU cores, as well as an
+servers. They all feature one or two ARM Cortex-A9 CPU cores, as well as an
 assortment of peripherals targeted for either Enterprise or Data Center /
 Hyperscale applications. The former is a superset of the latter, so NPCM750 has
 all the peripherals of NPCM730 and more.
 
 .. _Nuvoton iBMC: https://www.nuvoton.com/products/cloud-computing/ibmc/
 
-The NPCM750 SoC has two Cortex A9 cores and is targeted for the Enterprise
+The NPCM750 SoC has two Cortex-A9 cores and is targeted for the Enterprise
 segment. The following machines are based on this chip :
 
 - ``npcm750-evb``       Nuvoton NPCM750 Evaluation board
 
-The NPCM730 SoC has two Cortex A9 cores and is targeted for Data Center and
+The NPCM730 SoC has two Cortex-A9 cores and is targeted for Data Center and
 Hyperscale applications. The following machines are based on this chip :
 
+- ``quanta-gbs-bmc``    Quanta GBS server BMC
 - ``quanta-gsj``        Quanta GSJ server BMC
+- ``kudo-bmc``          Fii USA Kudo server BMC
 
 There are also two more SoCs, NPCM710 and NPCM705, which are single-core
 variants of NPCM750 and NPCM730, respectively. These are currently not
@@ -78,7 +80,7 @@ Boot options
 ------------
 
 The Nuvoton machines can boot from an OpenBMC firmware image, or directly into
-a kernel using the ``-kernel`` option. OpenBMC images for `quanta-gsj` and
+a kernel using the ``-kernel`` option. OpenBMC images for ``quanta-gsj`` and
 possibly others can be downloaded from the OpenPOWER jenkins :
 
    https://openpower.xyz/
diff --git a/docs/system/arm/orangepi.rst b/docs/system/arm/orangepi.rst
index 6f23907fb69..83c7445197b 100644
--- a/docs/system/arm/orangepi.rst
+++ b/docs/system/arm/orangepi.rst
@@ -128,7 +128,7 @@ Alternatively, you can also choose to build you own image with buildroot
 using the orangepi_pc_defconfig. Also see https://buildroot.org for more information.
 
 When using an image as an SD card, it must be resized to a power of two. This can be
-done with the qemu-img command. It is recommended to only increase the image size
+done with the ``qemu-img`` command. It is recommended to only increase the image size
 instead of shrinking it to a power of two, to avoid loss of data. For example,
 to prepare a downloaded Armbian image, first extract it and then increase
 its size to one gigabyte as follows:
@@ -250,14 +250,14 @@ and set the following environment variables before booting:
 Optionally you may save the environment variables to SD card with 'saveenv'.
 To continue booting simply give the 'boot' command and NetBSD boots.
 
-Orange Pi PC acceptance tests
-"""""""""""""""""""""""""""""
+Orange Pi PC integration tests
+""""""""""""""""""""""""""""""
 
-The Orange Pi PC machine has several acceptance tests included.
+The Orange Pi PC machine has several integration tests included.
 To run the whole set of tests, build QEMU from source and simply
 provide the following command:
 
 .. code-block:: bash
 
   $ AVOCADO_ALLOW_LARGE_STORAGE=yes avocado --show=app,console run \
-     -t machine:orangepi-pc tests/acceptance/boot_linux_console.py
+     -t machine:orangepi-pc tests/avocado/boot_linux_console.py
diff --git a/docs/system/arm/sabrelite.rst b/docs/system/arm/sabrelite.rst
index 71713310e3a..4ccb0560afe 100644
--- a/docs/system/arm/sabrelite.rst
+++ b/docs/system/arm/sabrelite.rst
@@ -10,7 +10,7 @@ Supported devices
 
 The SABRE Lite machine supports the following devices:
 
- * Up to 4 Cortex A9 cores
+ * Up to 4 Cortex-A9 cores
  * Generic Interrupt Controller
  * 1 Clock Controller Module
  * 1 System Reset Controller
diff --git a/docs/system/arm/sbsa.rst b/docs/system/arm/sbsa.rst
index b8ecfdb62fd..b499d7e9272 100644
--- a/docs/system/arm/sbsa.rst
+++ b/docs/system/arm/sbsa.rst
@@ -1,10 +1,10 @@
 Arm Server Base System Architecture Reference board (``sbsa-ref``)
 ==================================================================
 
-While the `virt` board is a generic board platform that doesn't match
-any real hardware the `sbsa-ref` board intends to look like real
+While the ``virt`` board is a generic board platform that doesn't match
+any real hardware the ``sbsa-ref`` board intends to look like real
 hardware. The `Server Base System Architecture
-<https://developer.arm.com/documentation/den0029/latest>` defines a
+<https://developer.arm.com/documentation/den0029/latest>`_ defines a
 minimum base line of hardware support and importantly how the firmware
 reports that to any operating system. It is a static system that
 reports a very minimal DT to the firmware for non-discoverable
diff --git a/docs/system/arm/stm32.rst b/docs/system/arm/stm32.rst
new file mode 100644
index 00000000000..508b92cf862
--- /dev/null
+++ b/docs/system/arm/stm32.rst
@@ -0,0 +1,66 @@
+STMicroelectronics STM32 boards (``netduino2``, ``netduinoplus2``, ``stm32vldiscovery``)
+========================================================================================
+
+The `STM32`_ chips are a family of 32-bit ARM-based microcontroller by
+STMicroelectronics.
+
+.. _STM32: https://www.st.com/en/microcontrollers-microprocessors/stm32-32-bit-arm-cortex-mcus.html
+
+The STM32F1 series is based on ARM Cortex-M3 core. The following machines are
+based on this chip :
+
+- ``stm32vldiscovery``  STM32VLDISCOVERY board with STM32F100RBT6 microcontroller
+
+The STM32F2 series is based on ARM Cortex-M3 core. The following machines are
+based on this chip :
+
+- ``netduino2``         Netduino 2 board with STM32F205RFT6 microcontroller
+
+The STM32F4 series is based on ARM Cortex-M4F core. This series is pin-to-pin
+compatible with STM32F2 series. The following machines are based on this chip :
+
+- ``netduinoplus2``     Netduino Plus 2 board with STM32F405RGT6 microcontroller
+
+There are many other STM32 series that are currently not supported by QEMU.
+
+Supported devices
+-----------------
+
+ * ARM Cortex-M3, Cortex M4F
+ * Analog to Digital Converter (ADC)
+ * EXTI interrupt
+ * Serial ports (USART)
+ * SPI controller
+ * System configuration (SYSCFG)
+ * Timer controller (TIMER)
+
+Missing devices
+---------------
+
+ * Camera interface (DCMI)
+ * Controller Area Network (CAN)
+ * Cycle Redundancy Check (CRC) calculation unit
+ * Digital to Analog Converter (DAC)
+ * DMA controller
+ * Ethernet controller
+ * Flash Interface Unit
+ * GPIO controller
+ * I2C controller
+ * Inter-Integrated Sound (I2S) controller
+ * Power supply configuration (PWR)
+ * Random Number Generator (RNG)
+ * Real-Time Clock (RTC) controller
+ * Reset and Clock Controller (RCC)
+ * Secure Digital Input/Output (SDIO) interface
+ * USB OTG
+ * Watchdog controller (IWDG, WWDG)
+
+Boot options
+------------
+
+The STM32 machines can be started using the ``-kernel`` option to load a
+firmware. Example:
+
+.. code-block:: bash
+
+  $ qemu-system-arm -M stm32vldiscovery -kernel firmware.bin
diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 27652adfae1..850787495be 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -1,7 +1,7 @@
 'virt' generic virtual platform (``virt``)
 ==========================================
 
-The `virt` board is a platform which does not correspond to any
+The ``virt`` board is a platform which does not correspond to any
 real hardware; it is designed for use in virtual machines.
 It is the recommended board type if you simply want to run
 a guest such as Linux and do not care about reproducing the
@@ -55,6 +55,7 @@ Supported guest CPU types:
 - ``cortex-a53`` (64-bit)
 - ``cortex-a57`` (64-bit)
 - ``cortex-a72`` (64-bit)
+- ``a64fx`` (64-bit)
 - ``host`` (with KVM only)
 - ``max`` (same as ``host`` for KVM; best possible emulation with TCG)
 
diff --git a/docs/system/arm/xlnx-versal-virt.rst b/docs/system/arm/xlnx-versal-virt.rst
index 27f73500d95..92ad10d2da4 100644
--- a/docs/system/arm/xlnx-versal-virt.rst
+++ b/docs/system/arm/xlnx-versal-virt.rst
@@ -32,6 +32,8 @@ Implemented devices:
 - OCM (256KB of On Chip Memory)
 - XRAM (4MB of on chip Accelerator RAM)
 - DDR memory
+- BBRAM (36 bytes of Battery-backed RAM)
+- eFUSE (3072 bytes of one-time field-programmable bit array)
 
 QEMU does not yet model any other devices, including the PL and the AI Engine.
 
@@ -175,3 +177,50 @@ Run the following at the U-Boot prompt:
   fdt set /chosen/dom0 reg <0x00000000 0x40000000 0x0 0x03100000>
   booti 30000000 - 20000000
 
+BBRAM File Backend
+""""""""""""""""""
+BBRAM can have an optional file backend, which must be a seekable
+binary file with a size of 36 bytes or larger. A file with all
+binary 0s is a 'blank'.
+
+To add a file-backend for the BBRAM:
+
+.. code-block:: bash
+
+  -drive if=pflash,index=0,file=versal-bbram.bin,format=raw
+
+To use a different index value, N, from default of 0, add:
+
+.. code-block:: bash
+
+  -global xlnx,bbram-ctrl.drive-index=N
+
+eFUSE File Backend
+""""""""""""""""""
+eFUSE can have an optional file backend, which must be a seekable
+binary file with a size of 3072 bytes or larger. A file with all
+binary 0s is a 'blank'.
+
+To add a file-backend for the eFUSE:
+
+.. code-block:: bash
+
+  -drive if=pflash,index=1,file=versal-efuse.bin,format=raw
+
+To use a different index value, N, from default of 1, add:
+
+.. code-block:: bash
+
+  -global xlnx,efuse.drive-index=N
+
+.. warning::
+  In actual physical Versal, BBRAM and eFUSE contain sensitive data.
+  The QEMU device models do **not** encrypt nor obfuscate any data
+  when holding them in models' memory or when writing them to their
+  file backends.
+
+  Thus, a file backend should be used with caution, and 'format=luks'
+  is highly recommended (albeit with usage complexity).
+
+  Better yet, do not use actual product data when running guest image
+  on this Xilinx Versal Virt board.
diff --git a/docs/system/authz.rst b/docs/system/authz.rst
new file mode 100644
index 00000000000..55b7315e496
--- /dev/null
+++ b/docs/system/authz.rst
@@ -0,0 +1,257 @@
+.. _client authorization:
+
+Client authorization
+--------------------
+
+When configuring a QEMU network backend with either TLS certificates or SASL
+authentication, access will be granted if the client successfully proves
+their identity. If the authorization identity database is scoped to the QEMU
+client this may be sufficient. It is common, however, for the identity database
+to be much broader and thus authentication alone does not enable sufficient
+access control. In this case QEMU provides a flexible system for enforcing
+finer grained authorization on clients post-authentication.
+
+Identity providers
+~~~~~~~~~~~~~~~~~~
+
+At the time of writing there are two authentication frameworks used by QEMU
+that emit an identity upon completion.
+
+ * TLS x509 certificate distinguished name.
+
+   When configuring the QEMU backend as a network server with TLS, there
+   are a choice of credentials to use. The most common scenario is to utilize
+   x509 certificates. The simplest configuration only involves issuing
+   certificates to the servers, allowing the client to avoid a MITM attack
+   against their intended server.
+
+   It is possible, however, to enable mutual verification by requiring that
+   the client provide a certificate to the server to prove its own identity.
+   This is done by setting the property ``verify-peer=yes`` on the
+   ``tls-creds-x509`` object, which is in fact the default.
+
+   When peer verification is enabled, client will need to be issued with a
+   certificate by the same certificate authority as the server. If this is
+   still not sufficiently strong access control the Distinguished Name of
+   the certificate can be used as an identity in the QEMU authorization
+   framework.
+
+ * SASL username.
+
+   When configuring the QEMU backend as a network server with SASL, upon
+   completion of the SASL authentication mechanism, a username will be
+   provided. The format of this username will vary depending on the choice
+   of mechanism configured for SASL. It might be a simple UNIX style user
+   ``joebloggs``, while if using Kerberos/GSSAPI it can have a realm
+   attached ``joebloggs@QEMU.ORG``.  Whatever format the username is presented
+   in, it can be used with the QEMU authorization framework.
+
+Authorization drivers
+~~~~~~~~~~~~~~~~~~~~~
+
+The QEMU authorization framework is a general purpose design with choice of
+user customizable drivers. These are provided as objects that can be
+created at startup using the ``-object`` argument, or at runtime using the
+``object_add`` monitor command.
+
+Simple
+^^^^^^
+
+This authorization driver provides a simple mechanism for granting access
+based on an exact match against a single identity. This is useful when it is
+known that only a single client is to be allowed access.
+
+A possible use case would be when configuring QEMU for an incoming live
+migration. It is known exactly which source QEMU the migration is expected
+to arrive from. The x509 certificate associated with this source QEMU would
+thus be used as the identity to match against. Alternatively if the virtual
+machine is dedicated to a specific tenant, then the VNC server would be
+configured with SASL and the username of only that tenant listed.
+
+To create an instance of this driver via QMP:
+
+::
+
+   {
+     "execute": "object-add",
+     "arguments": {
+       "qom-type": "authz-simple",
+       "id": "authz0",
+       "identity": "fred"
+     }
+   }
+
+
+Or via the command line
+
+::
+
+   -object authz-simple,id=authz0,identity=fred
+
+
+List
+^^^^
+
+In some network backends it will be desirable to grant access to a range of
+clients. This authorization driver provides a list mechanism for granting
+access by matching identities against a list of permitted one. Each match
+rule has an associated policy and a catch all policy applies if no rule
+matches. The match can either be done as an exact string comparison, or can
+use the shell-like glob syntax, which allows for use of wildcards.
+
+To create an instance of this class via QMP:
+
+::
+
+   {
+     "execute": "object-add",
+     "arguments": {
+       "qom-type": "authz-list",
+       "id": "authz0",
+       "rules": [
+          { "match": "fred", "policy": "allow", "format": "exact" },
+          { "match": "bob", "policy": "allow", "format": "exact" },
+          { "match": "danb", "policy": "deny", "format": "exact" },
+          { "match": "dan*", "policy": "allow", "format": "glob" }
+       ],
+       "policy": "deny"
+     }
+   }
+
+
+Due to the way this driver requires setting nested properties, creating
+it on the command line will require use of the JSON syntax for ``-object``.
+In most cases, however, the next driver will be more suitable.
+
+List file
+^^^^^^^^^
+
+This is a variant on the previous driver that allows for a more dynamic
+access control policy by storing the match rules in a standalone file
+that can be reloaded automatically upon change.
+
+To create an instance of this class via QMP:
+
+::
+
+   {
+     "execute": "object-add",
+     "arguments": {
+       "qom-type": "authz-list-file",
+       "id": "authz0",
+       "filename": "/etc/qemu/myvm-vnc.acl",
+       "refresh": true
+     }
+   }
+
+
+If ``refresh`` is ``yes``, inotify is used to monitor for changes
+to the file and auto-reload the rules.
+
+The ``myvm-vnc.acl`` file should contain the match rules in a format that
+closely matches the previous driver:
+
+::
+
+   {
+     "rules": [
+       { "match": "fred", "policy": "allow", "format": "exact" },
+       { "match": "bob", "policy": "allow", "format": "exact" },
+       { "match": "danb", "policy": "deny", "format": "exact" },
+       { "match": "dan*", "policy": "allow", "format": "glob" }
+     ],
+     "policy": "deny"
+   }
+
+
+The object can be created on the command line using
+
+::
+
+   -object authz-list-file,id=authz0,\
+           filename=/etc/qemu/myvm-vnc.acl,refresh=on
+
+
+PAM
+^^^
+
+In some scenarios it might be desirable to integrate with authorization
+mechanisms that are implemented outside of QEMU. In order to allow maximum
+flexibility, QEMU provides a driver that uses the ``PAM`` framework.
+
+To create an instance of this class via QMP:
+
+::
+
+   {
+     "execute": "object-add",
+     "arguments": {
+       "qom-type": "authz-pam",
+       "id": "authz0",
+       "parameters": {
+         "service": "qemu-vnc-tls"
+       }
+     }
+   }
+
+
+The driver only uses the PAM "account" verification
+subsystem. The above config would require a config
+file /etc/pam.d/qemu-vnc-tls. For a simple file
+lookup it would contain
+
+::
+
+   account requisite  pam_listfile.so item=user sense=allow \
+           file=/etc/qemu/vnc.allow
+
+
+The external file would then contain a list of usernames.
+If x509 cert was being used as the username, a suitable
+entry would match the distinguished name:
+
+::
+
+   CN=laptop.berrange.com,O=Berrange Home,L=London,ST=London,C=GB
+
+
+On the command line it can be created using
+
+::
+
+   -object authz-pam,id=authz0,service=qemu-vnc-tls
+
+
+There are a variety of PAM plugins that can be used which are not illustrated
+here, and it is possible to implement brand new plugins using the PAM API.
+
+
+Connecting backends
+~~~~~~~~~~~~~~~~~~~
+
+The authorization driver is created using the ``-object`` argument and then
+needs to be associated with a network service. The authorization driver object
+will be given a unique ID that needs to be referenced.
+
+The property to set in the network service will vary depending on the type of
+identity to verify. By convention, any network server backend that uses TLS
+will provide ``tls-authz`` property, while any server using SASL will provide
+a ``sasl-authz`` property.
+
+Thus an example using SASL and authorization for the VNC server would look
+like:
+
+::
+
+   $QEMU --object authz-simple,id=authz0,identity=fred \
+         --vnc 0.0.0.0:1,sasl,sasl-authz=authz0
+
+While to validate both the x509 certificate and SASL username:
+
+::
+
+   echo "CN=laptop.qemu.org,O=QEMU Project,L=London,ST=London,C=GB" >> tls.acl
+   $QEMU --object authz-simple,id=authz0,identity=fred \
+         --object authz-list-file,id=authz1,filename=tls.acl \
+	 --object tls-creds-x509,id=tls0,dir=/etc/qemu/tls,verify-peer=yes \
+         --vnc 0.0.0.0:1,sasl,sasl-authz=auth0,tls-creds=tls0,tls-authz=authz1
diff --git a/docs/system/barrier.rst b/docs/system/barrier.rst
new file mode 100644
index 00000000000..155d7d29013
--- /dev/null
+++ b/docs/system/barrier.rst
@@ -0,0 +1,44 @@
+QEMU Barrier Client
+===================
+
+Generally, mouse and keyboard are grabbed through the QEMU video
+interface emulation.
+
+But when we want to use a video graphic adapter via a PCI passthrough
+there is no way to provide the keyboard and mouse inputs to the VM
+except by plugging a second set of mouse and keyboard to the host
+or by installing a KVM software in the guest OS.
+
+The QEMU Barrier client avoids this by implementing directly the Barrier
+protocol into QEMU.
+
+`Barrier <https://github.com/debauchee/barrier>`__
+is a KVM (Keyboard-Video-Mouse) software forked from Symless's
+synergy 1.9 codebase.
+
+This protocol is enabled by adding an input-barrier object to QEMU.
+
+Syntax::
+
+    input-barrier,id=<object-id>,name=<guest display name>
+    [,server=<barrier server address>][,port=<barrier server port>]
+    [,x-origin=<x-origin>][,y-origin=<y-origin>]
+    [,width=<width>][,height=<height>]
+
+The object can be added on the QEMU command line, for instance with::
+
+    -object input-barrier,id=barrier0,name=VM-1
+
+where VM-1 is the name the display configured in the Barrier server
+on the host providing the mouse and the keyboard events.
+
+by default ``<barrier server address>`` is ``localhost``,
+``<port>`` is ``24800``, ``<x-origin>`` and ``<y-origin>`` are set to ``0``,
+``<width>`` and ``<height>`` to ``1920`` and ``1080``.
+
+If the Barrier server is stopped QEMU needs to be reconnected manually,
+by removing and re-adding the input-barrier object, for instance
+with the help of the HMP monitor::
+
+    (qemu) object_del barrier0
+    (qemu) object_add input-barrier,id=barrier0,name=VM-1
diff --git a/docs/system/bootindex.rst b/docs/system/bootindex.rst
new file mode 100644
index 00000000000..8b057f812f2
--- /dev/null
+++ b/docs/system/bootindex.rst
@@ -0,0 +1,76 @@
+Managing device boot order with bootindex properties
+====================================================
+
+QEMU can tell QEMU-aware guest firmware (like the x86 PC BIOS)
+which order it should look for a bootable OS on which devices.
+A simple way to set this order is to use the ``-boot order=`` option,
+but you can also do this more flexibly, by setting a ``bootindex``
+property on the individual block or net devices you specify
+on the QEMU command line.
+
+The ``bootindex`` properties are used to determine the order in which
+firmware will consider devices for booting the guest OS. If the
+``bootindex`` property is not set for a device, it gets the lowest
+boot priority. There is no particular order in which devices with no
+``bootindex`` property set will be considered for booting, but they
+will still be bootable.
+
+Some guest machine types (for instance the s390x machines) do
+not support ``-boot order=``; on those machines you must always
+use ``bootindex`` properties.
+
+There is no way to set a ``bootindex`` property if you are using
+a short-form option like ``-hda`` or ``-cdrom``, so to use
+``bootindex`` properties you will need to expand out those options
+into long-form ``-drive`` and ``-device`` option pairs.
+
+Example
+-------
+
+Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two
+disks (IDE, virtio):
+
+.. parsed-literal::
+
+  |qemu_system| -drive file=disk1.img,if=none,id=disk1 \\
+                -device ide-hd,drive=disk1,bootindex=4 \\
+                -drive file=disk2.img,if=none,id=disk2 \\
+                -device virtio-blk-pci,drive=disk2,bootindex=3 \\
+                -netdev type=user,id=net0 \\
+                -device virtio-net-pci,netdev=net0,bootindex=2 \\
+                -netdev type=user,id=net1 \\
+                -device e1000,netdev=net1,bootindex=1
+
+Given the command above, firmware should try to boot from the e1000 NIC
+first.  If this fails, it should try the virtio NIC next; if this fails
+too, it should try the virtio disk, and then the IDE disk.
+
+Limitations
+-----------
+
+Some firmware has limitations on which devices can be considered for
+booting.  For instance, the PC BIOS boot specification allows only one
+disk to be bootable.  If boot from disk fails for some reason, the BIOS
+won't retry booting from other disk.  It can still try to boot from
+floppy or net, though.
+
+Sometimes, firmware cannot map the device path QEMU wants firmware to
+boot from to a boot method.  It doesn't happen for devices the firmware
+can natively boot from, but if firmware relies on an option ROM for
+booting, and the same option ROM is used for booting from more then one
+device, the firmware may not be able to ask the option ROM to boot from
+a particular device reliably.  For instance with the PC BIOS, if a SCSI HBA
+has three bootable devices target1, target3, target5 connected to it,
+the option ROM will have a boot method for each of them, but it is not
+possible to map from boot method back to a specific target.  This is a
+shortcoming of the PC BIOS boot specification.
+
+Mixing bootindex and boot order parameters
+------------------------------------------
+
+Note that it does not make sense to use the bootindex property together
+with the ``-boot order=...`` (or ``-boot once=...``) parameter. The guest
+firmware implementations normally either support the one or the other,
+but not both parameters at the same time. Mixing them will result in
+undefined behavior, and thus the guest firmware will likely not boot
+from the expected devices.
diff --git a/docs/system/build-platforms.rst b/docs/system/build-platforms.rst
deleted file mode 100644
index 692323609e7..00000000000
--- a/docs/system/build-platforms.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-.. _Supported-build-platforms:
-
-Supported build platforms
-=========================
-
-QEMU aims to support building and executing on multiple host OS
-platforms. This appendix outlines which platforms are the major build
-targets. These platforms are used as the basis for deciding upon the
-minimum required versions of 3rd party software QEMU depends on. The
-supported platforms are the targets for automated testing performed by
-the project when patches are submitted for review, and tested before and
-after merge.
-
-If a platform is not listed here, it does not imply that QEMU won't
-work. If an unlisted platform has comparable software versions to a
-listed platform, there is every expectation that it will work. Bug
-reports are welcome for problems encountered on unlisted platforms
-unless they are clearly older vintage than what is described here.
-
-Note that when considering software versions shipped in distros as
-support targets, QEMU considers only the version number, and assumes the
-features in that distro match the upstream release with the same
-version. In other words, if a distro backports extra features to the
-software in their distro, QEMU upstream code will not add explicit
-support for those backports, unless the feature is auto-detectable in a
-manner that works for the upstream releases too.
-
-The `Repology`_ site is a useful resource to identify
-currently shipped versions of software in various operating systems,
-though it does not cover all distros listed below.
-
-Linux OS, macOS, FreeBSD, NetBSD, OpenBSD
------------------------------------------
-
-The project aims to support the most recent major version at all times. Support
-for the previous major version will be dropped 2 years after the new major
-version is released or when the vendor itself drops support, whichever comes
-first. In this context, third-party efforts to extend the lifetime of a distro
-are not considered, even when they are endorsed by the vendor (eg. Debian LTS).
-
-For the purposes of identifying supported software versions available on Linux,
-the project will look at CentOS, Debian, Fedora, openSUSE, RHEL, SLES and
-Ubuntu LTS. Other distros will be assumed to ship similar software versions.
-
-For FreeBSD and OpenBSD, decisions will be made based on the contents of the
-respective ports repository, while NetBSD will use the pkgsrc repository.
-
-For macOS, `HomeBrew`_ will be used, although `MacPorts`_ is expected to carry
-similar versions.
-
-Windows
--------
-
-The project supports building with current versions of the MinGW toolchain,
-hosted on Linux (Debian/Fedora).
-
-The version of the Windows API that's currently targeted is Vista / Server
-2008.
-
-.. _HomeBrew: https://brew.sh/
-.. _MacPorts: https://www.macports.org/
-.. _Repology: https://repology.org/
diff --git a/docs/system/cpu-hotplug.rst b/docs/system/cpu-hotplug.rst
index bd0663616e8..015ce2b6ec3 100644
--- a/docs/system/cpu-hotplug.rst
+++ b/docs/system/cpu-hotplug.rst
@@ -78,7 +78,7 @@ vCPU hotplug
       }
       (QEMU)
 
-(5) Optionally, run QMP `query-cpus-fast` for some details about the
+(5) Optionally, run QMP ``query-cpus-fast`` for some details about the
     vCPUs::
 
       (QEMU) query-cpus-fast
diff --git a/docs/system/cpu-models-x86-abi.csv b/docs/system/cpu-models-x86-abi.csv
new file mode 100644
index 00000000000..f3f3b60be10
--- /dev/null
+++ b/docs/system/cpu-models-x86-abi.csv
@@ -0,0 +1,67 @@
+Model,baseline,v2,v3,v4
+486-v1,,,,
+Broadwell-v1,✅,✅,✅,
+Broadwell-v2,✅,✅,✅,
+Broadwell-v3,✅,✅,✅,
+Broadwell-v4,✅,✅,✅,
+Cascadelake-Server-v1,✅,✅,✅,✅
+Cascadelake-Server-v2,✅,✅,✅,✅
+Cascadelake-Server-v3,✅,✅,✅,✅
+Cascadelake-Server-v4,✅,✅,✅,✅
+Conroe-v1,✅,,,
+Cooperlake-v1,✅,✅,✅,✅
+Denverton-v1,✅,✅,,
+Denverton-v2,✅,✅,,
+Dhyana-v1,✅,✅,✅,
+EPYC-Milan-v1,✅,✅,✅,
+EPYC-Rome-v1,✅,✅,✅,
+EPYC-Rome-v2,✅,✅,✅,
+EPYC-v1,✅,✅,✅,
+EPYC-v2,✅,✅,✅,
+EPYC-v3,✅,✅,✅,
+Haswell-v1,✅,✅,✅,
+Haswell-v2,✅,✅,✅,
+Haswell-v3,✅,✅,✅,
+Haswell-v4,✅,✅,✅,
+Icelake-Client-v1,✅,✅,✅,
+Icelake-Client-v2,✅,✅,✅,
+Icelake-Server-v1,✅,✅,✅,✅
+Icelake-Server-v2,✅,✅,✅,✅
+Icelake-Server-v3,✅,✅,✅,✅
+Icelake-Server-v4,✅,✅,✅,✅
+IvyBridge-v1,✅,✅,,
+IvyBridge-v2,✅,✅,,
+KnightsMill-v1,✅,✅,✅,
+Nehalem-v1,✅,✅,,
+Nehalem-v2,✅,✅,,
+Opteron_G1-v1,✅,,,
+Opteron_G2-v1,✅,,,
+Opteron_G3-v1,✅,,,
+Opteron_G4-v1,✅,✅,,
+Opteron_G5-v1,✅,✅,,
+Penryn-v1,✅,,,
+SandyBridge-v1,✅,✅,,
+SandyBridge-v2,✅,✅,,
+Skylake-Client-v1,✅,✅,✅,
+Skylake-Client-v2,✅,✅,✅,
+Skylake-Client-v3,✅,✅,✅,
+Skylake-Server-v1,✅,✅,✅,✅
+Skylake-Server-v2,✅,✅,✅,✅
+Skylake-Server-v3,✅,✅,✅,✅
+Skylake-Server-v4,✅,✅,✅,✅
+Snowridge-v1,✅,✅,,
+Snowridge-v2,✅,✅,,
+Westmere-v1,✅,✅,,
+Westmere-v2,✅,✅,,
+athlon-v1,,,,
+core2duo-v1,✅,,,
+coreduo-v1,,,,
+kvm32-v1,,,,
+kvm64-v1,✅,,,
+n270-v1,,,,
+pentium-v1,,,,
+pentium2-v1,,,,
+pentium3-v1,,,,
+phenom-v1,✅,,,
+qemu32-v1,,,,
+qemu64-v1,✅,,,
diff --git a/docs/system/cpu-models-x86.rst.inc b/docs/system/cpu-models-x86.rst.inc
index 867c8216b5a..7f6368f999b 100644
--- a/docs/system/cpu-models-x86.rst.inc
+++ b/docs/system/cpu-models-x86.rst.inc
@@ -1,5 +1,5 @@
 Recommendations for KVM CPU model configuration on x86 hosts
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+============================================================
 
 The information that follows provides recommendations for configuring
 CPU models on x86 hosts. The goals are to maximise performance, while
@@ -39,6 +39,28 @@ CPU, as they would with "Host passthrough", but gives much of the
 benefit of passthrough, while making live migration safe.
 
 
+ABI compatibility levels for CPU models
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The x86_64 architecture has a number of `ABI compatibility levels`_
+defined. Traditionally most operating systems and toolchains would
+only target the original baseline ABI. It is expected that in
+future OS and toolchains are likely to target newer ABIs. The
+table that follows illustrates which ABI compatibility levels
+can be satisfied by the QEMU CPU models. Note that the table only
+lists the long term stable CPU model versions (eg Haswell-v4).
+In addition to what is listed, there are also many CPU model
+aliases which resolve to a different CPU model version,
+depending on the machine type is in use.
+
+.. _ABI compatibility levels: https://gitlab.com/x86-psABIs/x86-64-ABI/
+
+.. csv-table:: x86-64 ABI compatibility levels
+   :file: cpu-models-x86-abi.csv
+   :widths: 40,15,15,15,15
+   :header-rows: 2
+
+
 Preferred CPU models for Intel x86 hosts
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -205,7 +227,7 @@ features are included if using "Host passthrough" or "Host model".
 Preferred CPU models for AMD x86 hosts
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The following CPU models are preferred for use on Intel hosts.
+The following CPU models are preferred for use on AMD hosts.
 Administrators / applications are recommended to use the CPU model that
 matches the generation of the host CPUs in use. In a deployment with a
 mixture of host CPU models between machines, if live migration
@@ -346,7 +368,7 @@ featureset, which prevents guests having optimal performance.
 
 
 Syntax for configuring CPU models
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+=================================
 
 The examples below illustrate the approach to configuring the various
 CPU models / features in QEMU and libvirt.
diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
deleted file mode 100644
index 80cae862528..00000000000
--- a/docs/system/deprecated.rst
+++ /dev/null
@@ -1,386 +0,0 @@
-Deprecated features
-===================
-
-In general features are intended to be supported indefinitely once
-introduced into QEMU. In the event that a feature needs to be removed,
-it will be listed in this section. The feature will remain functional for the
-release in which it was deprecated and one further release. After these two
-releases, the feature is liable to be removed. Deprecated features may also
-generate warnings on the console when QEMU starts up, or if activated via a
-monitor command, however, this is not a mandatory requirement.
-
-Prior to the 2.10.0 release there was no official policy on how
-long features would be deprecated prior to their removal, nor
-any documented list of which features were deprecated. Thus
-any features deprecated prior to 2.10.0 will be treated as if
-they were first deprecated in the 2.10.0 release.
-
-What follows is a list of all features currently marked as
-deprecated.
-
-System emulator command line arguments
---------------------------------------
-
-``QEMU_AUDIO_`` environment variables and ``-audio-help`` (since 4.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``-audiodev`` argument is now the preferred way to specify audio
-backend settings instead of environment variables.  To ease migration to
-the new format, the ``-audiodev-help`` option can be used to convert
-the current values of the environment variables to ``-audiodev`` options.
-
-Creating sound card devices and vnc without ``audiodev=`` property (since 4.2)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-When not using the deprecated legacy audio config, each sound card
-should specify an ``audiodev=`` property.  Additionally, when using
-vnc, you should specify an ``audiodev=`` property if you plan to
-transmit audio through the VNC protocol.
-
-Creating sound card devices using ``-soundhw`` (since 5.1)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Sound card devices should be created using ``-device`` instead.  The
-names are the same for most devices.  The exceptions are ``hda`` which
-needs two devices (``-device intel-hda -device hda-duplex``) and
-``pcspk`` which can be activated using ``-machine
-pcspk-audiodev=<name>``.
-
-``-chardev`` backend aliases ``tty`` and ``parport`` (since 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-``tty`` and ``parport`` are aliases that will be removed. Instead, the
-actual backend names ``serial`` and ``parallel`` should be used.
-
-RISC-V ``-bios`` (since 5.1)
-''''''''''''''''''''''''''''
-
-QEMU 4.1 introduced support for the -bios option in QEMU for RISC-V for the
-RISC-V virt machine and sifive_u machine. QEMU 4.1 had no changes to the
-default behaviour to avoid breakages.
-
-QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default``.
-
-QEMU 5.1 has three options:
- 1. ``-bios default`` - This is the current default behavior if no -bios option
-      is included. This option will load the default OpenSBI firmware automatically.
-      The firmware is included with the QEMU release and no user interaction is
-      required. All a user needs to do is specify the kernel they want to boot
-      with the -kernel option
- 2. ``-bios none`` - QEMU will not automatically load any firmware. It is up
-      to the user to load all the images they need.
- 3. ``-bios <file>`` - Tells QEMU to load the specified file as the firmwrae.
-
-Short-form boolean options (since 6.0)
-''''''''''''''''''''''''''''''''''''''
-
-Boolean options such as ``share=on``/``share=off`` could be written
-in short form as ``share`` and ``noshare``.  This is now deprecated
-and will cause a warning.
-
-``delay`` option for socket character devices (since 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The replacement for the ``nodelay`` short-form boolean option is ``nodelay=on``
-rather than ``delay=off``.
-
-``--enable-fips`` (since 6.0)
-'''''''''''''''''''''''''''''
-
-This option restricts usage of certain cryptographic algorithms when
-the host is operating in FIPS mode.
-
-If FIPS compliance is required, QEMU should be built with the ``libgcrypt``
-library enabled as a cryptography provider.
-
-Neither the ``nettle`` library, or the built-in cryptography provider are
-supported on FIPS enabled hosts.
-
-``-writeconfig`` (since 6.0)
-'''''''''''''''''''''''''''''
-
-The ``-writeconfig`` option is not able to serialize the entire contents
-of the QEMU command line.  It is thus considered a failed experiment
-and deprecated, with no current replacement.
-
-Userspace local APIC with KVM (x86, since 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''
-
-Using ``-M kernel-irqchip=off`` with x86 machine types that include a local
-APIC is deprecated.  The ``split`` setting is supported, as is using
-``-M kernel-irqchip=off`` with the ISA PC machine type.
-
-hexadecimal sizes with scaling multipliers (since 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Input parameters that take a size value should only use a size suffix
-(such as 'k' or 'M') when the base is written in decimal, and not when
-the value is hexadecimal.  That is, '0x20M' is deprecated, and should
-be written either as '32M' or as '0x2000000'.
-
-``-spice password=string`` (since 6.0)
-''''''''''''''''''''''''''''''''''''''
-
-This option is insecure because the SPICE password remains visible in
-the process listing. This is replaced by the new ``password-secret``
-option which lets the password be securely provided on the command
-line using a ``secret`` object instance.
-
-``opened`` property of ``rng-*`` objects (since 6.0.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The only effect of specifying ``opened=on`` in the command line or QMP
-``object-add`` is that the device is opened immediately, possibly before all
-other options have been processed.  This will either have no effect (if
-``opened`` was the last option) or cause errors.  The property is therefore
-useless and should not be specified.
-
-``loaded`` property of ``secret`` and ``secret_keyring`` objects (since 6.0.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The only effect of specifying ``loaded=on`` in the command line or QMP
-``object-add`` is that the secret is loaded immediately, possibly before all
-other options have been processed.  This will either have no effect (if
-``loaded`` was the last option) or cause options to be effectively ignored as
-if they were not given.  The property is therefore useless and should not be
-specified.
-
-
-QEMU Machine Protocol (QMP) commands
-------------------------------------
-
-``blockdev-open-tray``, ``blockdev-close-tray`` argument ``device`` (since 2.8.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use argument ``id`` instead.
-
-``eject`` argument ``device`` (since 2.8.0)
-'''''''''''''''''''''''''''''''''''''''''''
-
-Use argument ``id`` instead.
-
-``blockdev-change-medium`` argument ``device`` (since 2.8.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use argument ``id`` instead.
-
-``block_set_io_throttle`` argument ``device`` (since 2.8.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use argument ``id`` instead.
-
-``blockdev-add`` empty string argument ``backing`` (since 2.10.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use argument value ``null`` instead.
-
-``block-commit`` arguments ``base`` and ``top`` (since 3.1.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use arguments ``base-node`` and ``top-node`` instead.
-
-``nbd-server-add`` and ``nbd-server-remove`` (since 5.2)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use the more generic commands ``block-export-add`` and ``block-export-del``
-instead.  As part of this deprecation, where ``nbd-server-add`` used a
-single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``.
-
-System accelerators
--------------------
-
-MIPS ``Trap-and-Emul`` KVM support (since 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''
-
-The MIPS ``Trap-and-Emul`` KVM host and guest support has been removed
-from Linux upstream kernel, declare it deprecated.
-
-System emulator CPUS
---------------------
-
-``moxie`` CPU (since 5.2.0)
-'''''''''''''''''''''''''''
-
-The ``moxie`` guest CPU support is deprecated and will be removed in
-a future version of QEMU. It's unclear whether anybody is still using
-CPU emulation in QEMU, and there are no test images available to make
-sure that the code is still working.
-
-``lm32`` CPUs (since 5.2.0)
-'''''''''''''''''''''''''''
-
-The ``lm32`` guest CPU support is deprecated and will be removed in
-a future version of QEMU. The only public user of this architecture
-was the milkymist project, which has been dead for years; there was
-never an upstream Linux port.
-
-``unicore32`` CPUs (since 5.2.0)
-''''''''''''''''''''''''''''''''
-
-The ``unicore32`` guest CPU support is deprecated and will be removed in
-a future version of QEMU. Support for this CPU was removed from the
-upstream Linux kernel, and there is no available upstream toolchain
-to build binaries for it.
-
-``Icelake-Client`` CPU Model (since 5.2.0)
-''''''''''''''''''''''''''''''''''''''''''
-
-``Icelake-Client`` CPU Models are deprecated. Use ``Icelake-Server`` CPU
-Models instead.
-
-MIPS ``I7200`` CPU Model (since 5.2)
-''''''''''''''''''''''''''''''''''''
-
-The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated
-(the ISA has never been upstreamed to a compiler toolchain). Therefore
-this CPU is also deprecated.
-
-System emulator machines
-------------------------
-
-Raspberry Pi ``raspi2`` and ``raspi3`` machines (since 5.2)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The Raspberry Pi machines come in various models (A, A+, B, B+). To be able
-to distinguish which model QEMU is implementing, the ``raspi2`` and ``raspi3``
-machines have been renamed ``raspi2b`` and ``raspi3b``.
-
-Device options
---------------
-
-Emulated device options
-'''''''''''''''''''''''
-
-``-device virtio-blk,scsi=on|off`` (since 5.0.0)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature.  VIRTIO 1.0
-and later do not support it because the virtio-scsi device was introduced for
-full SCSI support.  Use virtio-scsi instead when SCSI passthrough is required.
-
-Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an
-alias.
-
-Block device options
-''''''''''''''''''''
-
-``"backing": ""`` (since 2.12.0)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-In order to prevent QEMU from automatically opening an image's backing
-chain, use ``"backing": null`` instead.
-
-``rbd`` keyvalue pair encoded filenames: ``""`` (since 3.1.0)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Options for ``rbd`` should be specified according to its runtime options,
-like other block drivers.  Legacy parsing of keyvalue pair encoded
-filenames is useful to open images with the old format for backing files;
-These image files should be updated to use the current format.
-
-Example of legacy encoding::
-
-  json:{"file.driver":"rbd", "file.filename":"rbd:rbd/name"}
-
-The above, converted to the current supported format::
-
-  json:{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"}
-
-``sheepdog`` driver (since 5.2.0)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The ``sheepdog`` block device driver is deprecated. The corresponding upstream
-server project is no longer actively maintained. Users are recommended to switch
-to an alternative distributed block device driver such as RBD. The
-``qemu-img convert`` command can be used to liberate existing data by moving
-it out of sheepdog volumes into an alternative storage backend.
-
-linux-user mode CPUs
---------------------
-
-``ppc64abi32`` CPUs (since 5.2.0)
-'''''''''''''''''''''''''''''''''
-
-The ``ppc64abi32`` architecture has a number of issues which regularly
-trip up our CI testing and is suspected to be quite broken. For that
-reason the maintainers strongly suspect no one actually uses it.
-
-MIPS ``I7200`` CPU (since 5.2)
-''''''''''''''''''''''''''''''
-
-The ``I7200`` guest CPU relies on the nanoMIPS ISA, which is deprecated
-(the ISA has never been upstreamed to a compiler toolchain). Therefore
-this CPU is also deprecated.
-
-Related binaries
-----------------
-
-qemu-img amend to adjust backing file (since 5.1)
-'''''''''''''''''''''''''''''''''''''''''''''''''
-
-The use of ``qemu-img amend`` to modify the name or format of a qcow2
-backing image is deprecated; this functionality was never fully
-documented or tested, and interferes with other amend operations that
-need access to the original backing image (such as deciding whether a
-v3 zero cluster may be left unallocated when converting to a v2
-image).  Rather, any changes to the backing chain should be performed
-with ``qemu-img rebase -u`` either before or after the remaining
-changes being performed by amend, as appropriate.
-
-qemu-img backing file without format (since 5.1)
-''''''''''''''''''''''''''''''''''''''''''''''''
-
-The use of ``qemu-img create``, ``qemu-img rebase``, or ``qemu-img
-convert`` to create or modify an image that depends on a backing file
-now recommends that an explicit backing format be provided.  This is
-for safety: if QEMU probes a different format than what you thought,
-the data presented to the guest will be corrupt; similarly, presenting
-a raw image to a guest allows a potential security exploit if a future
-probe sees a non-raw image based on guest writes.
-
-To avoid the warning message, or even future refusal to create an
-unsafe image, you must pass ``-o backing_fmt=`` (or the shorthand
-``-F`` during create) to specify the intended backing format.  You may
-use ``qemu-img rebase -u`` to retroactively add a backing format to an
-existing image.  However, be aware that there are already potential
-security risks to blindly using ``qemu-img info`` to probe the format
-of an untrusted backing image, when deciding what format to add into
-an existing image.
-
-Backwards compatibility
------------------------
-
-Runnability guarantee of CPU models (since 4.1.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''
-
-Previous versions of QEMU never changed existing CPU models in
-ways that introduced additional host software or hardware
-requirements to the VM.  This allowed management software to
-safely change the machine type of an existing VM without
-introducing new requirements ("runnability guarantee").  This
-prevented CPU models from being updated to include CPU
-vulnerability mitigations, leaving guests vulnerable in the
-default configuration.
-
-The CPU model runnability guarantee won't apply anymore to
-existing CPU models.  Management software that needs runnability
-guarantees must resolve the CPU model aliases using the
-``alias-of`` field returned by the ``query-cpu-definitions`` QMP
-command.
-
-While those guarantees are kept, the return value of
-``query-cpu-definitions`` will have existing CPU model aliases
-point to a version that doesn't break runnability guarantees
-(specifically, version 1 of those CPU models).  In future QEMU
-versions, aliases will point to newer CPU model versions
-depending on the machine type, so management software must
-resolve CPU model aliases before starting a virtual machine.
-
-Guest Emulator ISAs
--------------------
-
-nanoMIPS ISA
-''''''''''''
-
-The ``nanoMIPS`` ISA has never been upstreamed to any compiler toolchain.
-As it is hard to generate binaries for it, declare it deprecated.
diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst
new file mode 100644
index 00000000000..19944f526ce
--- /dev/null
+++ b/docs/system/device-emulation.rst
@@ -0,0 +1,91 @@
+.. _device-emulation:
+
+Device Emulation
+----------------
+
+QEMU supports the emulation of a large number of devices from
+peripherals such network cards and USB devices to integrated systems
+on a chip (SoCs). Configuration of these is often a source of
+confusion so it helps to have an understanding of some of the terms
+used to describes devices within QEMU.
+
+Common Terms
+~~~~~~~~~~~~
+
+Device Front End
+================
+
+A device front end is how a device is presented to the guest. The type
+of device presented should match the hardware that the guest operating
+system is expecting to see. All devices can be specified with the
+``--device`` command line option. Running QEMU with the command line
+options ``--device help`` will list all devices it is aware of. Using
+the command line ``--device foo,help`` will list the additional
+configuration options available for that device.
+
+A front end is often paired with a back end, which describes how the
+host's resources are used in the emulation.
+
+Device Buses
+============
+
+Most devices will exist on a BUS of some sort. Depending on the
+machine model you choose (``-M foo``) a number of buses will have been
+automatically created. In most cases the BUS a device is attached to
+can be inferred, for example PCI devices are generally automatically
+allocated to the next free address of first PCI bus found. However in
+complicated configurations you can explicitly specify what bus
+(``bus=ID``) a device is attached to along with its address
+(``addr=N``).
+
+Some devices, for example a PCI SCSI host controller, will add an
+additional buses to the system that other devices can be attached to.
+A hypothetical chain of devices might look like:
+
+  --device foo,bus=pci.0,addr=0,id=foo
+  --device bar,bus=foo.0,addr=1,id=baz
+
+which would be a bar device (with the ID of baz) which is attached to
+the first foo bus (foo.0) at address 1. The foo device which provides
+that bus is itself is attached to the first PCI bus (pci.0).
+
+
+Device Back End
+===============
+
+The back end describes how the data from the emulated device will be
+processed by QEMU. The configuration of the back end is usually
+specific to the class of device being emulated. For example serial
+devices will be backed by a ``--chardev`` which can redirect the data
+to a file or socket or some other system. Storage devices are handled
+by ``--blockdev`` which will specify how blocks are handled, for
+example being stored in a qcow2 file or accessing a raw host disk
+partition. Back ends can sometimes be stacked to implement features
+like snapshots.
+
+While the choice of back end is generally transparent to the guest,
+there are cases where features will not be reported to the guest if
+the back end is unable to support it.
+
+Device Pass Through
+===================
+
+Device pass through is where the device is actually given access to
+the underlying hardware. This can be as simple as exposing a single
+USB device on the host system to the guest or dedicating a video card
+in a PCI slot to the exclusive use of the guest.
+
+
+Emulated Devices
+~~~~~~~~~~~~~~~~
+
+.. toctree::
+   :maxdepth: 1
+
+   devices/ivshmem.rst
+   devices/net.rst
+   devices/nvme.rst
+   devices/usb.rst
+   devices/vhost-user.rst
+   devices/virtio-pmem.rst
+   devices/vhost-user-rng.rst
diff --git a/docs/system/device-url-syntax.rst.inc b/docs/system/device-url-syntax.rst.inc
index 6f6ec8366b7..7dbc525fa80 100644
--- a/docs/system/device-url-syntax.rst.inc
+++ b/docs/system/device-url-syntax.rst.inc
@@ -15,7 +15,7 @@ These are specified using a special URL syntax.
    'iqn.2008-11.org.linux-kvm[:<name>]' but this can also be set from
    the command line or a configuration file.
 
-   Since version Qemu 2.4 it is possible to specify a iSCSI request
+   Since version QEMU 2.4 it is possible to specify a iSCSI request
    timeout to detect stalled requests and force a reestablishment of the
    session. The timeout is specified in seconds. The default is 0 which
    means no timeout. Libiscsi 1.15.0 or greater is required for this
@@ -85,24 +85,6 @@ These are specified using a special URL syntax.
    Currently authentication must be done using ssh-agent. Other
    authentication methods may be supported in future.
 
-``Sheepdog``
-   Sheepdog is a distributed storage system for QEMU. QEMU supports
-   using either local sheepdog devices or remote networked devices.
-
-   Syntax for specifying a sheepdog device
-
-   ::
-
-      sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag]
-
-   Example
-
-   .. parsed-literal::
-
-      |qemu_system| --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine
-
-   See also https://sheepdog.github.io/sheepdog/.
-
 ``GlusterFS``
    GlusterFS is a user space distributed file system. QEMU supports the
    use of GlusterFS volumes for hosting VM disk images using TCP, Unix
diff --git a/docs/system/ivshmem.rst b/docs/system/devices/ivshmem.rst
similarity index 100%
rename from docs/system/ivshmem.rst
rename to docs/system/devices/ivshmem.rst
diff --git a/docs/system/net.rst b/docs/system/devices/net.rst
similarity index 100%
rename from docs/system/net.rst
rename to docs/system/devices/net.rst
diff --git a/docs/system/nvme.rst b/docs/system/devices/nvme.rst
similarity index 82%
rename from docs/system/nvme.rst
rename to docs/system/devices/nvme.rst
index f7f63d6bf61..b5acb2a9c19 100644
--- a/docs/system/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -70,7 +70,7 @@ namespaces and additional features, the ``nvme-ns`` device must be used.
 
 The namespaces defined by the ``nvme-ns`` device will attach to the most
 recently defined ``nvme-bus`` that is created by the ``nvme`` device. Namespace
-identifers are allocated automatically, starting from ``1``.
+identifiers are allocated automatically, starting from ``1``.
 
 There are a number of parameters available:
 
@@ -81,6 +81,12 @@ There are a number of parameters available:
   Set the UUID of the namespace. This will be reported as a "Namespace UUID"
   descriptor in the Namespace Identification Descriptor List.
 
+``eui64``
+  Set the EUI-64 of the namespace. This will be reported as a "IEEE Extended
+  Unique Identifier" descriptor in the Namespace Identification Descriptor List.
+  Since machine type 6.1 a non-zero default value is used if the parameter
+  is not provided. For earlier machine types the field defaults to 0.
+
 ``bus``
   If there are more ``nvme`` devices defined, this parameter may be used to
   attach the namespace to a specific ``nvme`` device (identified by an ``id``
@@ -104,28 +110,32 @@ multipath I/O.
 This will create an NVM subsystem with two controllers. Having controllers
 linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
 
-``shared`` (default: ``off``)
+``shared`` (default: ``on`` since 6.2)
   Specifies that the namespace will be attached to all controllers in the
-  subsystem. If set to ``off`` (the default), the namespace will remain a
-  private namespace and may only be attached to a single controller at a time.
+  subsystem. If set to ``off``, the namespace will remain a private namespace
+  and may only be attached to a single controller at a time. Shared namespaces
+  are always automatically attached to all controllers (also when controllers
+  are hotplugged).
 
 ``detached`` (default: ``off``)
   If set to ``on``, the namespace will be be available in the subsystem, but
-  not attached to any controllers initially.
+  not attached to any controllers initially. A shared namespace with this set
+  to ``on`` will never be automatically attached to controllers.
 
 Thus, adding
 
 .. code-block:: console
 
    -drive file=nvm-1.img,if=none,id=nvm-1
-   -device nvme-ns,drive=nvm-1,nsid=1,shared=on
+   -device nvme-ns,drive=nvm-1,nsid=1
    -drive file=nvm-2.img,if=none,id=nvm-2
-   -device nvme-ns,drive=nvm-2,nsid=3,detached=on
+   -device nvme-ns,drive=nvm-2,nsid=3,shared=off,detached=on
 
-will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is
-initially attached to both controllers. NSID 3 will be a private namespace
-(i.e. only attachable to a single controller at a time) and will not be
-attached to any controller initially (due to ``detached=on``).
+will cause NSID 1 will be a shared namespace that is initially attached to both
+controllers. NSID 3 will be a private namespace due to ``shared=off`` and only
+attachable to a single controller at a time. Additionally it will not be
+attached to any controller initially (due to ``detached=on``) or to hotplugged
+controllers.
 
 Optional Features
 =================
@@ -196,6 +206,12 @@ The namespace may be configured with additional parameters
   allows all zones to be open. If ``zoned.max_active`` is specified, this value
   must be less than or equal to that.
 
+``zoned.zasl=UINT8`` (default: ``0``)
+  Set the maximum data transfer size for the Zone Append command. Like
+  ``mdts``, the value is specified as a power of two (2^n) and is in units of
+  the minimum memory page size (CAP.MPSMIN). The default value (``0``)
+  has this property inherit the ``mdts`` value.
+
 Metadata
 --------
 
diff --git a/docs/system/devices/usb.rst b/docs/system/devices/usb.rst
new file mode 100644
index 00000000000..afb7d6c2268
--- /dev/null
+++ b/docs/system/devices/usb.rst
@@ -0,0 +1,351 @@
+.. _pcsys_005fusb:
+
+USB emulation
+-------------
+
+QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can
+plug virtual USB devices or real host USB devices (only works with
+certain host operating systems). QEMU will automatically create and
+connect virtual USB hubs as necessary to connect multiple USB devices.
+
+USB controllers
+~~~~~~~~~~~~~~~
+
+XHCI controller support
+^^^^^^^^^^^^^^^^^^^^^^^
+
+QEMU has XHCI host adapter support.  The XHCI hardware design is much
+more virtualization-friendly when compared to EHCI and UHCI, thus XHCI
+emulation uses less resources (especially CPU).  So if your guest
+supports XHCI (which should be the case for any operating system
+released around 2010 or later) we recommend using it:
+
+    qemu -device qemu-xhci
+
+XHCI supports USB 1.1, USB 2.0 and USB 3.0 devices, so this is the
+only controller you need.  With only a single USB controller (and
+therefore only a single USB bus) present in the system there is no
+need to use the bus= parameter when adding USB devices.
+
+
+EHCI controller support
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The QEMU EHCI Adapter supports USB 2.0 devices.  It can be used either
+standalone or with companion controllers (UHCI, OHCI) for USB 1.1
+devices.  The companion controller setup is more convenient to use
+because it provides a single USB bus supporting both USB 2.0 and USB
+1.1 devices.  See next section for details.
+
+When running EHCI in standalone mode you can add UHCI or OHCI
+controllers for USB 1.1 devices too.  Each controller creates its own
+bus though, so there are two completely separate USB buses: One USB
+1.1 bus driven by the UHCI controller and one USB 2.0 bus driven by
+the EHCI controller.  Devices must be attached to the correct
+controller manually.
+
+The easiest way to add a UHCI controller to a ``pc`` machine is the
+``-usb`` switch.  QEMU will create the UHCI controller as function of
+the PIIX3 chipset.  The USB 1.1 bus will carry the name ``usb-bus.0``.
+
+You can use the standard ``-device`` switch to add a EHCI controller to
+your virtual machine.  It is strongly recommended to specify an ID for
+the controller so the USB 2.0 bus gets an individual name, for example
+``-device usb-ehci,id=ehci``.  This will give you a USB 2.0 bus named
+``ehci.0``.
+
+When adding USB devices using the ``-device`` switch you can specify the
+bus they should be attached to.  Here is a complete example:
+
+.. parsed-literal::
+
+    |qemu_system| -M pc ${otheroptions}                        \\
+        -drive if=none,id=usbstick,format=raw,file=/path/to/image   \\
+        -usb                                                        \\
+        -device usb-ehci,id=ehci                                    \\
+        -device usb-tablet,bus=usb-bus.0                            \\
+        -device usb-storage,bus=ehci.0,drive=usbstick
+
+This attaches a USB tablet to the UHCI adapter and a USB mass storage
+device to the EHCI adapter.
+
+
+Companion controller support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The UHCI and OHCI controllers can attach to a USB bus created by EHCI
+as companion controllers.  This is done by specifying the ``masterbus``
+and ``firstport`` properties.  ``masterbus`` specifies the bus name the
+controller should attach to.  ``firstport`` specifies the first port the
+controller should attach to, which is needed as usually one EHCI
+controller with six ports has three UHCI companion controllers with
+two ports each.
+
+There is a config file in docs which will do all this for
+you, which you can use like this:
+
+.. parsed-literal::
+
+   |qemu_system| -readconfig docs/config/ich9-ehci-uhci.cfg
+
+Then use ``bus=ehci.0`` to assign your USB devices to that bus.
+
+Using the ``-usb`` switch for ``q35`` machines will create a similar
+USB controller configuration.
+
+
+.. _Connecting USB devices:
+
+Connecting USB devices
+~~~~~~~~~~~~~~~~~~~~~~
+
+USB devices can be connected with the ``-device usb-...`` command line
+option or the ``device_add`` monitor command. Available devices are:
+
+``usb-mouse``
+   Virtual Mouse. This will override the PS/2 mouse emulation when
+   activated.
+
+``usb-tablet``
+   Pointer device that uses absolute coordinates (like a touchscreen).
+   This means QEMU is able to report the mouse position without having
+   to grab the mouse. Also overrides the PS/2 mouse emulation when
+   activated.
+
+``usb-storage,drive=drive_id``
+   Mass storage device backed by drive_id (see the :ref:`disk images`
+   chapter in the System Emulation Users Guide). This is the classic
+   bulk-only transport protocol used by 99% of USB sticks. This
+   example shows it connected to an XHCI USB controller and with
+   a drive backed by a raw format disk image:
+
+   .. parsed-literal::
+
+       |qemu_system| [...]                                   \\
+        -drive if=none,id=stick,format=raw,file=/path/to/file.img \\
+        -device nec-usb-xhci,id=xhci                              \\
+        -device usb-storage,bus=xhci.0,drive=stick
+
+``usb-uas``
+   USB attached SCSI device. This does not create a SCSI disk, so
+   you need to explicitly create a ``scsi-hd`` or ``scsi-cd`` device
+   on the command line, as well as using the ``-drive`` option to
+   specify what those disks are backed by. One ``usb-uas`` device can
+   handle multiple logical units (disks). This example creates three
+   logical units: two disks and one cdrom drive:
+
+   .. parsed-literal::
+
+      |qemu_system| [...]                                         \\
+       -drive if=none,id=uas-disk1,format=raw,file=/path/to/file1.img  \\
+       -drive if=none,id=uas-disk2,format=raw,file=/path/to/file2.img  \\
+       -drive if=none,id=uas-cdrom,media=cdrom,format=raw,file=/path/to/image.iso \\
+       -device nec-usb-xhci,id=xhci                                    \\
+       -device usb-uas,id=uas,bus=xhci.0                               \\
+       -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=uas-disk1       \\
+       -device scsi-hd,bus=uas.0,scsi-id=0,lun=1,drive=uas-disk2       \\
+       -device scsi-cd,bus=uas.0,scsi-id=0,lun=5,drive=uas-cdrom
+
+``usb-bot``
+   Bulk-only transport storage device. This presents the guest with the
+   same USB bulk-only transport protocol interface as ``usb-storage``, but
+   the QEMU command line option works like ``usb-uas`` and does not
+   automatically create SCSI disks for you. ``usb-bot`` supports up to
+   16 LUNs. Unlike ``usb-uas``, the LUN numbers must be continuous,
+   i.e. for three devices you must use 0+1+2. The 0+1+5 numbering from the
+   ``usb-uas`` example above won't work with ``usb-bot``.
+
+``usb-mtp,rootdir=dir``
+   Media transfer protocol device, using dir as root of the file tree
+   that is presented to the guest.
+
+``usb-host,hostbus=bus,hostaddr=addr``
+   Pass through the host device identified by bus and addr
+
+``usb-host,vendorid=vendor,productid=product``
+   Pass through the host device identified by vendor and product ID
+
+``usb-wacom-tablet``
+   Virtual Wacom PenPartner tablet. This device is similar to the
+   ``tablet`` above but it can be used with the tslib library because in
+   addition to touch coordinates it reports touch pressure.
+
+``usb-kbd``
+   Standard USB keyboard. Will override the PS/2 keyboard (if present).
+
+``usb-serial,chardev=id``
+   Serial converter. This emulates an FTDI FT232BM chip connected to
+   host character device id.
+
+``usb-braille,chardev=id``
+   Braille device. This will use BrlAPI to display the braille output on
+   a real or fake device referenced by id.
+
+``usb-net[,netdev=id]``
+   Network adapter that supports CDC ethernet and RNDIS protocols. id
+   specifies a netdev defined with ``-netdev …,id=id``. For instance,
+   user-mode networking can be used with
+
+   .. parsed-literal::
+
+      |qemu_system| [...] -netdev user,id=net0 -device usb-net,netdev=net0
+
+``usb-ccid``
+   Smartcard reader device
+
+``usb-audio``
+   USB audio device
+
+``u2f-{emulated,passthru}``
+   Universal Second Factor device
+
+Physical port addressing
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+For all the above USB devices, by default QEMU will plug the device
+into the next available port on the specified USB bus, or onto
+some available USB bus if you didn't specify one explicitly.
+If you need to, you can also specify the physical port where
+the device will show up in the guest.  This can be done using the
+``port`` property.  UHCI has two root ports (1,2).  EHCI has six root
+ports (1-6), and the emulated (1.1) USB hub has eight ports.
+
+Plugging a tablet into UHCI port 1 works like this::
+
+        -device usb-tablet,bus=usb-bus.0,port=1
+
+Plugging a hub into UHCI port 2 works like this::
+
+        -device usb-hub,bus=usb-bus.0,port=2
+
+Plugging a virtual USB stick into port 4 of the hub just plugged works
+this way::
+
+        -device usb-storage,bus=usb-bus.0,port=2.4,drive=...
+
+In the monitor, the ``device_add` command also accepts a ``port``
+property specification. If you want to unplug devices too you should
+specify some unique id which you can use to refer to the device.
+You can then use ``device_del`` to unplug the device later.
+For example::
+
+        (qemu) device_add usb-tablet,bus=usb-bus.0,port=1,id=my-tablet
+        (qemu) device_del my-tablet
+
+Hotplugging USB storage
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``usb-bot`` and ``usb-uas`` devices can be hotplugged.  In the hotplug
+case they are added with ``attached = false`` so the guest will not see
+the device until the ``attached`` property is explicitly set to true.
+That allows you to attach one or more scsi devices before making the
+device visible to the guest. The workflow looks like this:
+
+#. ``device-add usb-bot,id=foo``
+#. ``device-add scsi-{hd,cd},bus=foo.0,lun=0``
+#. optionally add more devices (luns 1 ... 15)
+#. ``scripts/qmp/qom-set foo.attached = true``
+
+.. _host_005fusb_005fdevices:
+
+Using host USB devices on a Linux host
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+WARNING: this is an experimental feature. QEMU will slow down when using
+it. USB devices requiring real time streaming (i.e. USB Video Cameras)
+are not supported yet.
+
+1. If you use an early Linux 2.4 kernel, verify that no Linux driver is
+   actually using the USB device. A simple way to do that is simply to
+   disable the corresponding kernel module by renaming it from
+   ``mydriver.o`` to ``mydriver.o.disabled``.
+
+2. Verify that ``/proc/bus/usb`` is working (most Linux distributions
+   should enable it by default). You should see something like that:
+
+   ::
+
+      ls /proc/bus/usb
+      001  devices  drivers
+
+3. Since only root can access to the USB devices directly, you can
+   either launch QEMU as root or change the permissions of the USB
+   devices you want to use. For testing, the following suffices:
+
+   ::
+
+      chown -R myuid /proc/bus/usb
+
+4. Launch QEMU and do in the monitor:
+
+   ::
+
+      info usbhost
+        Device 1.2, speed 480 Mb/s
+          Class 00: USB device 1234:5678, USB DISK
+
+   You should see the list of the devices you can use (Never try to use
+   hubs, it won't work).
+
+5. Add the device in QEMU by using:
+
+   ::
+
+      device_add usb-host,vendorid=0x1234,productid=0x5678
+
+   Normally the guest OS should report that a new USB device is plugged.
+   You can use the option ``-device usb-host,...`` to do the same.
+
+6. Now you can try to use the host USB device in QEMU.
+
+When relaunching QEMU, you may have to unplug and plug again the USB
+device to make it work again (this is a bug).
+
+``usb-host`` properties for specifying the host device
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The example above uses the ``vendorid`` and ``productid`` to
+specify which host device to pass through, but this is not
+the only way to specify the host device. ``usb-host`` supports
+the following properties:
+
+``hostbus=<nr>``
+  Specifies the bus number the device must be attached to
+``hostaddr=<nr>``
+  Specifies the device address the device got assigned by the guest os
+``hostport=<str>``
+  Specifies the physical port the device is attached to
+``vendorid=<hexnr>``
+  Specifies the vendor ID of the device
+``productid=<hexnr>``
+  Specifies the product ID of the device.
+
+In theory you can combine all these properties as you like.  In
+practice only a few combinations are useful:
+
+- ``vendorid`` and ``productid`` -- match for a specific device, pass it to
+  the guest when it shows up somewhere in the host.
+
+- ``hostbus`` and ``hostport`` -- match for a specific physical port in the
+  host, any device which is plugged in there gets passed to the
+  guest.
+
+- ``hostbus`` and ``hostaddr`` -- most useful for ad-hoc pass through as the
+  hostaddr isn't stable. The next time you plug the device into the host it
+  will get a new hostaddr.
+
+Note that on the host USB 1.1 devices are handled by UHCI/OHCI and USB
+2.0 by EHCI.  That means different USB devices plugged into the very
+same physical port on the host may show up on different host buses
+depending on the speed. Supposing that devices plugged into a given
+physical port appear as bus 1 + port 1 for 2.0 devices and bus 3 + port 1
+for 1.1 devices, you can pass through any device plugged into that port
+and also assign it to the correct USB bus in QEMU like this:
+
+.. parsed-literal::
+
+   |qemu_system| -M pc [...]                            \\
+        -usb                                                 \\
+        -device usb-ehci,id=ehci                             \\
+        -device usb-host,bus=usb-bus.0,hostbus=3,hostport=1  \\
+        -device usb-host,bus=ehci.0,hostbus=1,hostport=1
diff --git a/docs/system/devices/vhost-user-rng.rst b/docs/system/devices/vhost-user-rng.rst
new file mode 100644
index 00000000000..a145d4105c1
--- /dev/null
+++ b/docs/system/devices/vhost-user-rng.rst
@@ -0,0 +1,39 @@
+QEMU vhost-user-rng - RNG emulation
+===================================
+
+Background
+----------
+
+What follows builds on the material presented in vhost-user.rst - it should
+be reviewed before moving forward with the content in this file.
+
+Description
+-----------
+
+The vhost-user-rng device implementation was designed to work with a random
+number generator daemon such as the one found in the vhost-device crate of
+the rust-vmm project available on github [1].
+
+[1]. https://github.com/rust-vmm/vhost-device
+
+Examples
+--------
+
+The daemon should be started first:
+
+::
+
+  host# vhost-device-rng --socket-path=rng.sock -c 1 -m 512 -p 1000
+
+The QEMU invocation needs to create a chardev socket the device can
+use to communicate as well as share the guests memory over a memfd.
+
+::
+
+  host# qemu-system								\
+      -chardev socket,path=$(PATH)/rng.sock,id=rng0				\
+      -device vhost-user-rng-pci,chardev=rng0					\
+      -m 4096 									\
+      -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on	\
+      -numa node,memdev=mem							\
+      ...
diff --git a/docs/system/devices/vhost-user.rst b/docs/system/devices/vhost-user.rst
new file mode 100644
index 00000000000..86128114fa3
--- /dev/null
+++ b/docs/system/devices/vhost-user.rst
@@ -0,0 +1,59 @@
+.. _vhost_user:
+
+vhost-user back ends
+--------------------
+
+vhost-user back ends are way to service the request of VirtIO devices
+outside of QEMU itself. To do this there are a number of things
+required.
+
+vhost-user device
+===================
+
+These are simple stub devices that ensure the VirtIO device is visible
+to the guest. The code is mostly boilerplate although each device has
+a ``chardev`` option which specifies the ID of the ``--chardev``
+device that connects via a socket to the vhost-user *daemon*.
+
+vhost-user daemon
+=================
+
+This is a separate process that is connected to by QEMU via a socket
+following the :ref:`vhost_user_proto`. There are a number of daemons
+that can be built when enabled by the project although any daemon that
+meets the specification for a given device can be used.
+
+Shared memory object
+====================
+
+In order for the daemon to access the VirtIO queues to process the
+requests it needs access to the guest's address space. This is
+achieved via the ``memory-backend-file`` or ``memory-backend-memfd``
+objects. A reference to a file-descriptor which can access this object
+will be passed via the socket as part of the protocol negotiation.
+
+Currently the shared memory object needs to match the size of the main
+system memory as defined by the ``-m`` argument.
+
+Example
+=======
+
+First start you daemon.
+
+.. parsed-literal::
+
+  $ virtio-foo --socket-path=/var/run/foo.sock $OTHER_ARGS
+
+The you start your QEMU instance specifying the device, chardev and
+memory objects.
+
+.. parsed-literal::
+
+  $ |qemu_system| \\
+      -m 4096 \\
+      -chardev socket,id=ba1,path=/var/run/foo.sock \\
+      -device vhost-user-foo,chardev=ba1,$OTHER_ARGS \\
+      -object memory-backend-memfd,id=mem,size=4G,share=on \\
+      -numa node,memdev=mem \\
+        ...
+
diff --git a/docs/system/virtio-pmem.rst b/docs/system/devices/virtio-pmem.rst
similarity index 100%
rename from docs/system/virtio-pmem.rst
rename to docs/system/devices/virtio-pmem.rst
diff --git a/docs/system/gdb.rst b/docs/system/gdb.rst
index 144d083df31..453eb73f6c4 100644
--- a/docs/system/gdb.rst
+++ b/docs/system/gdb.rst
@@ -15,7 +15,8 @@ The ``-s`` option will make QEMU listen for an incoming connection
 from gdb on TCP port 1234, and ``-S`` will make QEMU not start the
 guest until you tell it to from gdb. (If you want to specify which
 TCP port to use or to use something other than TCP for the gdbstub
-connection, use the ``-gdb dev`` option instead of ``-s``.)
+connection, use the ``-gdb dev`` option instead of ``-s``. See
+`Using unix sockets`_ for an example.)
 
 .. parsed-literal::
 
@@ -55,7 +56,7 @@ machine has more than one CPU, QEMU exposes each CPU cluster as a
 separate "inferior", where each CPU within the cluster is a separate
 "thread". Most QEMU machine types have identical CPUs, so there is a
 single cluster which has all the CPUs in it.  A few machine types are
-heterogenous and have multiple clusters: for example the ``sifive_u``
+heterogeneous and have multiple clusters: for example the ``sifive_u``
 machine has a cluster with one E51 core and a second cluster with four
 U54 cores. Here the E51 is the only thread in the first inferior, and
 the U54 cores are all threads in the second inferior.
@@ -100,6 +101,29 @@ not just those in the cluster you are currently working on::
 
   (gdb) set schedule-multiple on
 
+Using unix sockets
+==================
+
+An alternate method for connecting gdb to the QEMU gdbstub is to use
+a unix socket (if supported by your operating system). This is useful when
+running several tests in parallel, or if you do not have a known free TCP
+port (e.g. when running automated tests).
+
+First create a chardev with the appropriate options, then
+instruct the gdbserver to use that device:
+
+.. parsed-literal::
+
+   |qemu_system| -chardev socket,path=/tmp/gdb-socket,server=on,wait=off,id=gdb0 -gdb chardev:gdb0 -S ...
+
+Start gdb as before, but this time connect using the path to
+the socket::
+
+   (gdb) target remote /tmp/gdb-socket
+
+Note that to use a unix socket for the connection you will need
+gdb version 9.0 or newer.
+
 Advanced debugging options
 ==========================
 
diff --git a/docs/system/generic-loader.rst b/docs/system/generic-loader.rst
index 6bf8a4eb486..4f9fb005f1d 100644
--- a/docs/system/generic-loader.rst
+++ b/docs/system/generic-loader.rst
@@ -1,8 +1,8 @@
 ..
    Copyright (c) 2016, Xilinx Inc.
 
-This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-the COPYING file in the top-level directory.
+   This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+   the COPYING file in the top-level directory.
 
 Generic Loader
 --------------
@@ -92,9 +92,12 @@ shown below:
   specified in the executable format header. This option should only
   be used for the boot image. This will also cause the image to be
   written to the specified CPU's address space. If not specified, the
-  default is CPU 0. <force-raw> - Setting force-raw=on forces the file
-  to be treated as a raw image. This can be used to load supported
-  executable formats as if they were raw.
+  default is CPU 0.
+
+``<force-raw>``
+  Setting 'force-raw=on' forces the file to be treated as a raw image.
+  This can be used to load supported executable formats as if they
+  were raw.
 
 All values are parsed using the standard QemuOpts parsing. This allows the user
 to specify any values in any format supported. By default the values
diff --git a/docs/system/guest-loader.rst b/docs/system/guest-loader.rst
index 37d03cbd892..9ef9776bf07 100644
--- a/docs/system/guest-loader.rst
+++ b/docs/system/guest-loader.rst
@@ -4,7 +4,7 @@
 Guest Loader
 ------------
 
-The guest loader is similar to the `generic-loader` although it is
+The guest loader is similar to the ``generic-loader`` although it is
 aimed at a particular use case of loading hypervisor guests. This is
 useful for debugging hypervisors without having to jump through the
 hoops of firmware and boot-loaders.
@@ -27,12 +27,12 @@ multi-boot capability. A typical example would look like:
 In the above example the Xen hypervisor is loaded by the -kernel
 parameter and passed it's boot arguments via -append. The Dom0 guest
 is loaded into the areas of memory. Each blob will get
-`/chosen/module@<addr>` entry in the FDT to indicate it's location and
+``/chosen/module@<addr>`` entry in the FDT to indicate it's location and
 size. Additional information can be passed with by using additional
 arguments.
 
 Currently the only supported machines which use FDT data to boot are
-the ARM and RiscV `virt` machines.
+the ARM and RiscV ``virt`` machines.
 
 Arguments
 ^^^^^^^^^
@@ -51,4 +51,4 @@ The full syntax of the guest-loader is::
 
 ``bootargs=<args>``
   This is an optional field for kernel blobs which will pass command
-  like via the `/chosen/module@<addr>/bootargs` node.
+  like via the ``/chosen/module@<addr>/bootargs`` node.
diff --git a/docs/system/i386/cpu.rst b/docs/system/i386/cpu.rst
new file mode 100644
index 00000000000..738719da9a2
--- /dev/null
+++ b/docs/system/i386/cpu.rst
@@ -0,0 +1 @@
+.. include:: ../cpu-models-x86.rst.inc
diff --git a/docs/system/i386/kvm-pv.rst b/docs/system/i386/kvm-pv.rst
new file mode 100644
index 00000000000..1e5a9923ef4
--- /dev/null
+++ b/docs/system/i386/kvm-pv.rst
@@ -0,0 +1,100 @@
+Paravirtualized KVM features
+============================
+
+Description
+-----------
+
+In some cases when implementing hardware interfaces in software is slow, ``KVM``
+implements its own paravirtualized interfaces.
+
+Setup
+-----
+
+Paravirtualized ``KVM`` features are represented as CPU flags. The following
+features are enabled by default for any CPU model when ``KVM`` acceleration is
+enabled:
+
+- ``kvmclock``
+- ``kvm-nopiodelay``
+- ``kvm-asyncpf``
+- ``kvm-steal-time``
+- ``kvm-pv-eoi``
+- ``kvmclock-stable-bit``
+
+``kvm-msi-ext-dest-id`` feature is enabled by default in x2apic mode with split
+irqchip (e.g. "-machine ...,kernel-irqchip=split -cpu ...,x2apic").
+
+Note: when CPU model ``host`` is used, QEMU passes through all supported
+paravirtualized ``KVM`` features to the guest.
+
+Existing features
+-----------------
+
+``kvmclock``
+  Expose a ``KVM`` specific paravirtualized clocksource to the guest. Supported
+  since Linux v2.6.26.
+
+``kvm-nopiodelay``
+  The guest doesn't need to perform delays on PIO operations. Supported since
+  Linux v2.6.26.
+
+``kvm-mmu``
+  This feature is deprecated.
+
+``kvm-asyncpf``
+  Enable asynchronous page fault mechanism. Supported since Linux v2.6.38.
+  Note: since Linux v5.10 the feature is deprecated and not enabled by ``KVM``.
+  Use ``kvm-asyncpf-int`` instead.
+
+``kvm-steal-time``
+  Enable stolen (when guest vCPU is not running) time accounting. Supported
+  since Linux v3.1.
+
+``kvm-pv-eoi``
+  Enable paravirtualized end-of-interrupt signaling. Supported since Linux
+  v3.10.
+
+``kvm-pv-unhalt``
+  Enable paravirtualized spinlocks support. Supported since Linux v3.12.
+
+``kvm-pv-tlb-flush``
+  Enable paravirtualized TLB flush mechanism. Supported since Linux v4.16.
+
+``kvm-pv-ipi``
+  Enable paravirtualized IPI mechanism. Supported since Linux v4.19.
+
+``kvm-poll-control``
+  Enable host-side polling on HLT control from the guest. Supported since Linux
+  v5.10.
+
+``kvm-pv-sched-yield``
+  Enable paravirtualized sched yield feature. Supported since Linux v5.10.
+
+``kvm-asyncpf-int``
+  Enable interrupt based asynchronous page fault mechanism. Supported since Linux
+  v5.10.
+
+``kvm-msi-ext-dest-id``
+  Support 'Extended Destination ID' for external interrupts. The feature allows
+  to use up to 32768 CPUs without IRQ remapping (but other limits may apply making
+  the number of supported vCPUs for a given configuration lower). Supported since
+  Linux v5.10.
+
+``kvmclock-stable-bit``
+  Tell the guest that guest visible TSC value can be fully trusted for kvmclock
+  computations and no warps are expected. Supported since Linux v2.6.35.
+
+Supplementary features
+----------------------
+
+``kvm-pv-enforce-cpuid``
+  Limit the supported paravirtualized feature set to the exposed features only.
+  Note, by default, ``KVM`` allows the guest to use all currently supported
+  paravirtualized features even when they were not announced in guest visible
+  CPUIDs. Supported since Linux v5.10.
+
+
+Useful links
+------------
+
+Please refer to Documentation/virt/kvm in Linux for additional details.
diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst
new file mode 100644
index 00000000000..f8fade5ac2d
--- /dev/null
+++ b/docs/system/i386/sgx.rst
@@ -0,0 +1,165 @@
+Software Guard eXtensions (SGX)
+===============================
+
+Overview
+--------
+
+Intel Software Guard eXtensions (SGX) is a set of instructions and mechanisms
+for memory accesses in order to provide security accesses for sensitive
+applications and data. SGX allows an application to use it's pariticular
+address space as an *enclave*, which is a protected area provides confidentiality
+and integrity even in the presence of privileged malware. Accesses to the
+enclave memory area from any software not resident in the enclave are prevented,
+including those from privileged software.
+
+Virtual SGX
+-----------
+
+SGX feature is exposed to guest via SGX CPUID. Looking at SGX CPUID, we can
+report the same CPUID info to guest as on host for most of SGX CPUID. With
+reporting the same CPUID guest is able to use full capacity of SGX, and KVM
+doesn't need to emulate those info.
+
+The guest's EPC base and size are determined by QEMU, and KVM needs QEMU to
+notify such info to it before it can initialize SGX for guest.
+
+Virtual EPC
+~~~~~~~~~~~
+
+By default, QEMU does not assign EPC to a VM, i.e. fully enabling SGX in a VM
+requires explicit allocation of EPC to the VM. Similar to other specialized
+memory types, e.g. hugetlbfs, EPC is exposed as a memory backend.
+
+SGX EPC is enumerated through CPUID, i.e. EPC "devices" need to be realized
+prior to realizing the vCPUs themselves, which occurs long before generic
+devices are parsed and realized.  This limitation means that EPC does not
+require -maxmem as EPC is not treated as {cold,hot}plugged memory.
+
+QEMU does not artificially restrict the number of EPC sections exposed to a
+guest, e.g. QEMU will happily allow you to create 64 1M EPC sections. Be aware
+that some kernels may not recognize all EPC sections, e.g. the Linux SGX driver
+is hardwired to support only 8 EPC sections.
+
+The following QEMU snippet creates two EPC sections, with 64M pre-allocated
+to the VM and an additional 28M mapped but not allocated::
+
+ -object memory-backend-epc,id=mem1,size=64M,prealloc=on \
+ -object memory-backend-epc,id=mem2,size=28M \
+ -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
+
+Note:
+
+The size and location of the virtual EPC are far less restricted compared
+to physical EPC. Because physical EPC is protected via range registers,
+the size of the physical EPC must be a power of two (though software sees
+a subset of the full EPC, e.g. 92M or 128M) and the EPC must be naturally
+aligned.  KVM SGX's virtual EPC is purely a software construct and only
+requires the size and location to be page aligned. QEMU enforces the EPC
+size is a multiple of 4k and will ensure the base of the EPC is 4k aligned.
+To simplify the implementation, EPC is always located above 4g in the guest
+physical address space.
+
+Migration
+~~~~~~~~~
+
+QEMU/KVM doesn't prevent live migrating SGX VMs, although from hardware's
+perspective, SGX doesn't support live migration, since both EPC and the SGX
+key hierarchy are bound to the physical platform. However live migration
+can be supported in the sense if guest software stack can support recreating
+enclaves when it suffers sudden lose of EPC; and if guest enclaves can detect
+SGX keys being changed, and handle gracefully. For instance, when ERESUME fails
+with #PF.SGX, guest software can gracefully detect it and recreate enclaves;
+and when enclave fails to unseal sensitive information from outside, it can
+detect such error and sensitive information can be provisioned to it again.
+
+CPUID
+~~~~~
+
+Due to its myriad dependencies, SGX is currently not listed as supported
+in any of QEMU's built-in CPU configuration. To expose SGX (and SGX Launch
+Control) to a guest, you must either use ``-cpu host`` to pass-through the
+host CPU model, or explicitly enable SGX when using a built-in CPU model,
+e.g. via ``-cpu <model>,+sgx`` or ``-cpu <model>,+sgx,+sgxlc``.
+
+All SGX sub-features enumerated through CPUID, e.g. SGX2, MISCSELECT,
+ATTRIBUTES, etc... can be restricted via CPUID flags. Be aware that enforcing
+restriction of MISCSELECT, ATTRIBUTES and XFRM requires intercepting ECREATE,
+i.e. may marginally reduce SGX performance in the guest. All SGX sub-features
+controlled via -cpu are prefixed with "sgx", e.g.::
+
+  $ qemu-system-x86_64 -cpu help | xargs printf "%s\n" | grep sgx
+  sgx
+  sgx-debug
+  sgx-encls-c
+  sgx-enclv
+  sgx-exinfo
+  sgx-kss
+  sgx-mode64
+  sgx-provisionkey
+  sgx-tokenkey
+  sgx1
+  sgx2
+  sgxlc
+
+The following QEMU snippet passes through the host CPU but restricts access to
+the provision and EINIT token keys::
+
+ -cpu host,-sgx-provisionkey,-sgx-tokenkey
+
+SGX sub-features cannot be emulated, i.e. sub-features that are not present
+in hardware cannot be forced on via '-cpu'.
+
+Virtualize SGX Launch Control
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+QEMU SGX support for Launch Control (LC) is passive, in the sense that it
+does not actively change the LC configuration.  QEMU SGX provides the user
+the ability to set/clear the CPUID flag (and by extension the associated
+IA32_FEATURE_CONTROL MSR bit in fw_cfg) and saves/restores the LE Hash MSRs
+when getting/putting guest state, but QEMU does not add new controls to
+directly modify the LC configuration.  Similar to hardware behavior, locking
+the LC configuration to a non-Intel value is left to guest firmware.  Unlike
+host bios setting for SGX launch control(LC), there is no special bios setting
+for SGX guest by our design. If host is in locked mode, we can still allow
+creating VM with SGX.
+
+Feature Control
+~~~~~~~~~~~~~~~
+
+QEMU SGX updates the ``etc/msr_feature_control`` fw_cfg entry to set the SGX
+(bit 18) and SGX LC (bit 17) flags based on their respective CPUID support,
+i.e. existing guest firmware will automatically set SGX and SGX LC accordingly,
+assuming said firmware supports fw_cfg.msr_feature_control.
+
+Launching a guest
+-----------------
+
+To launch a SGX guest:
+
+.. parsed-literal::
+
+  |qemu_system_x86| \\
+   -cpu host,+sgx-provisionkey \\
+   -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\
+   -object memory-backend-epc,id=mem2,size=28M \\
+   -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
+
+Utilizing SGX in the guest requires a kernel/OS with SGX support.
+The support can be determined in guest by::
+
+  $ grep sgx /proc/cpuinfo
+
+and SGX epc info by::
+
+  $ dmesg | grep sgx
+  [    1.242142] sgx: EPC section 0x180000000-0x181bfffff
+  [    1.242319] sgx: EPC section 0x181c00000-0x1837fffff
+
+References
+----------
+
+- `SGX Homepage <https://software.intel.com/sgx>`__
+
+- `SGX SDK <https://github.com/intel/linux-sgx.git>`__
+
+- SGX specification: Intel SDM Volume 3
diff --git a/docs/system/images.rst b/docs/system/images.rst
index 3d9144e6258..d000bd6b6f1 100644
--- a/docs/system/images.rst
+++ b/docs/system/images.rst
@@ -20,7 +20,7 @@ where myimage.img is the disk image filename and mysize is its size in
 kilobytes. You can add an ``M`` suffix to give the size in megabytes and
 a ``G`` suffix for gigabytes.
 
-See the qemu-img invocation documentation for more information.
+See the ``qemu-img`` invocation documentation for more information.
 
 .. _disk_005fimages_005fsnapshot_005fmode:
 
diff --git a/docs/system/index.rst b/docs/system/index.rst
index b05af716a97..73bbedbc22d 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -1,44 +1,36 @@
-.. This is the top level page for the 'system' manual.
-
-
+----------------
 System Emulation
-================
+----------------
 
-This manual is the overall guide for users using QEMU
+This section of the manual is the overall guide for users using QEMU
 for full system emulation (as opposed to user-mode emulation).
 This includes working with hypervisors such as KVM, Xen, Hax
 or Hypervisor.Framework.
 
-Contents:
-
 .. toctree::
    :maxdepth: 3
 
    quickstart
    invocation
+   device-emulation
    keys
    mux-chardev
    monitor
    images
-   net
    virtio-net-failover
-   usb
-   nvme
-   ivshmem
    linuxboot
    generic-loader
    guest-loader
+   barrier
    vnc-security
    tls
+   secrets
+   authz
    gdb
    managed-startup
+   bootindex
    cpu-hotplug
-   virtio-pmem
    pr-manager
    targets
    security
    multi-process
-   deprecated
-   removed-features
-   build-platforms
-   license
diff --git a/docs/system/multi-process.rst b/docs/system/multi-process.rst
index 46bb0cafc27..210531ee17d 100644
--- a/docs/system/multi-process.rst
+++ b/docs/system/multi-process.rst
@@ -45,7 +45,7 @@ Following is a description of command-line used to launch mpqemu.
       -device lsi53c895a,id=lsi0                                         \
       -drive id=drive_image2,file=/build/ol7-nvme-test-1.qcow2           \
       -device scsi-hd,id=drive2,drive=drive_image2,bus=lsi0.0,scsi-id=0  \
-      -object x-remote-object,id=robj1,devid=lsi1,fd=4,
+      -object x-remote-object,id=robj1,devid=lsi0,fd=4,
 
 * QEMU:
 
diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
index 43c58bc32e7..86186b7d2cb 100644
--- a/docs/system/ppc/powernv.rst
+++ b/docs/system/ppc/powernv.rst
@@ -48,15 +48,14 @@ Firmware
 --------
 
 The OPAL firmware (OpenPower Abstraction Layer) for OpenPower systems
-includes the runtime services `skiboot` and the bootloader kernel and
-initramfs `skiroot`. Source code can be found on GitHub:
+includes the runtime services ``skiboot`` and the bootloader kernel and
+initramfs ``skiroot``. Source code can be found on GitHub:
 
   https://github.com/open-power.
 
-Prebuilt images of `skiboot` and `skiboot` are made available on the `OpenPOWER <https://openpower.xyz/job/openpower/job/openpower-op-build/>`__ site. To boot a POWER9 machine, use the `witherspoon <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=witherspoon/lastSuccessfulBuild/>`__ images. For POWER8, use
-the `palmetto <https://openpower.xyz/job/openpower/job/openpower-op-build/label=slave,target=palmetto/lastSuccessfulBuild/>`__ images.
+Prebuilt images of ``skiboot`` and ``skiroot`` are made available on the `OpenPOWER <https://github.com/open-power/op-build/releases/>`__ site.
 
-QEMU includes a prebuilt image of `skiboot` which is updated when a
+QEMU includes a prebuilt image of ``skiboot`` which is updated when a
 more recent version is required by the models.
 
 Boot options
diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst
new file mode 100644
index 00000000000..9beef391717
--- /dev/null
+++ b/docs/system/ppc/ppce500.rst
@@ -0,0 +1,164 @@
+ppce500 generic platform (``ppce500``)
+======================================
+
+QEMU for PPC supports a special ``ppce500`` machine designed for emulation and
+virtualization purposes.
+
+Supported devices
+-----------------
+
+The ``ppce500`` machine supports the following devices:
+
+* PowerPC e500 series core (e500v2/e500mc/e5500/e6500)
+* Configuration, Control, and Status Register (CCSR)
+* Multicore Programmable Interrupt Controller (MPIC) with MSI support
+* 1 16550A UART device
+* 1 Freescale MPC8xxx I2C controller
+* 1 Pericom pt7c4338 RTC via I2C
+* 1 Freescale MPC8xxx GPIO controller
+* Power-off functionality via one GPIO pin
+* 1 Freescale MPC8xxx PCI host controller
+* VirtIO devices via PCI bus
+* 1 Freescale Enhanced Triple Speed Ethernet controller (eTSEC)
+
+Hardware configuration information
+----------------------------------
+
+The ``ppce500`` machine automatically generates a device tree blob ("dtb")
+which it passes to the guest, if there is no ``-dtb`` option. This provides
+information about the addresses, interrupt lines and other configuration of
+the various devices in the system.
+
+If users want to provide their own DTB, they can use the ``-dtb`` option.
+These DTBs should have the following requirements:
+
+* The number of subnodes under /cpus node should match QEMU's ``-smp`` option
+* The /memory reg size should match QEMU’s selected ram_size via ``-m``
+
+Both ``qemu-system-ppc`` and ``qemu-system-ppc64`` provide emulation for the
+following 32-bit PowerPC CPUs:
+
+* e500v2
+* e500mc
+
+Additionally ``qemu-system-ppc64`` provides support for the following 64-bit
+PowerPC CPUs:
+
+* e5500
+* e6500
+
+The CPU type can be specified via the ``-cpu`` command line. If not specified,
+it creates a machine with e500v2 core. The following example shows an e6500
+based machine creation:
+
+.. code-block:: bash
+
+  $ qemu-system-ppc64 -nographic -M ppce500 -cpu e6500
+
+Boot options
+------------
+
+The ``ppce500`` machine can start using the standard -kernel functionality
+for loading a payload like an OS kernel (e.g.: Linux), or U-Boot firmware.
+
+When -bios is omitted, the default pc-bios/u-boot.e500 firmware image is used
+as the BIOS. QEMU follows below truth table to select which payload to execute:
+
+===== ========== =======
+-bios    -kernel payload
+===== ========== =======
+    N          N  u-boot
+    N          Y  kernel
+    Y don't care  u-boot
+===== ========== =======
+
+When both -bios and -kernel are present, QEMU loads U-Boot and U-Boot in turns
+automatically loads the kernel image specified by the -kernel parameter via
+U-Boot's built-in "bootm" command, hence a legacy uImage format is required in
+such scenario.
+
+Running Linux kernel
+--------------------
+
+Linux mainline v5.11 release is tested at the time of writing. To build a
+Linux mainline kernel that can be booted by the ``ppce500`` machine in
+64-bit mode, simply configure the kernel using the defconfig configuration:
+
+.. code-block:: bash
+
+  $ export ARCH=powerpc
+  $ export CROSS_COMPILE=powerpc-linux-
+  $ make corenet64_smp_defconfig
+  $ make menuconfig
+
+then manually select the following configuration:
+
+  Platform support > Freescale Book-E Machine Type > QEMU generic e500 platform
+
+To boot the newly built Linux kernel in QEMU with the ``ppce500`` machine:
+
+.. code-block:: bash
+
+  $ qemu-system-ppc64 -M ppce500 -cpu e5500 -smp 4 -m 2G \
+      -display none -serial stdio \
+      -kernel vmlinux \
+      -initrd /path/to/rootfs.cpio \
+      -append "root=/dev/ram"
+
+To build a Linux mainline kernel that can be booted by the ``ppce500`` machine
+in 32-bit mode, use the same 64-bit configuration steps except the defconfig
+file should use corenet32_smp_defconfig.
+
+To boot the 32-bit Linux kernel:
+
+.. code-block:: bash
+
+  $ qemu-system-ppc{64|32} -M ppce500 -cpu e500mc -smp 4 -m 2G \
+      -display none -serial stdio \
+      -kernel vmlinux \
+      -initrd /path/to/rootfs.cpio \
+      -append "root=/dev/ram"
+
+Running U-Boot
+--------------
+
+U-Boot mainline v2021.07 release is tested at the time of writing. To build a
+U-Boot mainline bootloader that can be booted by the ``ppce500`` machine, use
+the qemu-ppce500_defconfig with similar commands as described above for Linux:
+
+.. code-block:: bash
+
+  $ export CROSS_COMPILE=powerpc-linux-
+  $ make qemu-ppce500_defconfig
+
+You will get u-boot file in the build tree.
+
+When U-Boot boots, you will notice the following if using with ``-cpu e6500``:
+
+.. code-block:: none
+
+  CPU:   Unknown, Version: 0.0, (0x00000000)
+  Core:  e6500, Version: 2.0, (0x80400020)
+
+This is because we only specified a core name to QEMU and it does not have a
+meaningful SVR value which represents an actual SoC that integrates such core.
+You can specify a real world SoC device that QEMU has built-in support but all
+these SoCs are e500v2 based MPC85xx series, hence you cannot test anything
+built for P4080 (e500mc), P5020 (e5500) and T2080 (e6500).
+
+By default a VirtIO standard PCI networking device is connected as an ethernet
+interface at PCI address 0.1.0, but we can switch that to an e1000 NIC by:
+
+.. code-block:: bash
+
+  $ qemu-system-ppc -M ppce500 -smp 4 -m 2G \
+                    -display none -serial stdio \
+                    -bios u-boot \
+                    -nic tap,ifname=tap0,script=no,downscript=no,model=e1000
+
+The QEMU ``ppce500`` machine can also dynamically instantiate an eTSEC device
+if “-device eTSEC” is given to QEMU:
+
+.. code-block:: bash
+
+  -netdev tap,ifname=tap0,script=no,downscript=no,id=net0 -device eTSEC,netdev=net0
diff --git a/docs/system/qemu-block-drivers.rst b/docs/system/qemu-block-drivers.rst
index bd99d4fa8eb..c2c0114cec4 100644
--- a/docs/system/qemu-block-drivers.rst
+++ b/docs/system/qemu-block-drivers.rst
@@ -1,18 +1,22 @@
 :orphan:
 
+============================
 QEMU block drivers reference
 ============================
 
+--------
 Synopsis
 --------
 
 QEMU block driver reference manual
 
+-----------
 Description
 -----------
 
 .. include:: qemu-block-drivers.rst.inc
 
+--------
 See also
 --------
 
diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc
index 60a064b2327..e313784426d 100644
--- a/docs/system/qemu-block-drivers.rst.inc
+++ b/docs/system/qemu-block-drivers.rst.inc
@@ -511,13 +511,13 @@ of an inet socket:
 
   |qemu_system| linux.img -hdb nbd+unix://?socket=/tmp/my_socket
 
-In this case, the block device must be exported using qemu-nbd:
+In this case, the block device must be exported using ``qemu-nbd``:
 
 .. parsed-literal::
 
   qemu-nbd --socket=/tmp/my_socket my_disk.qcow2
 
-The use of qemu-nbd allows sharing of a disk between several guests:
+The use of ``qemu-nbd`` allows sharing of a disk between several guests:
 
 .. parsed-literal::
 
@@ -530,7 +530,7 @@ and then you can use it with two guests:
   |qemu_system| linux1.img -hdb nbd+unix://?socket=/tmp/my_socket
   |qemu_system| linux2.img -hdb nbd+unix://?socket=/tmp/my_socket
 
-If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's
+If the ``nbd-server`` uses named exports (supported since NBD 2.9.18, or with QEMU's
 own embedded NBD server), you must specify an export name in the URI:
 
 .. parsed-literal::
@@ -547,75 +547,6 @@ also available.  Here are some example of the older syntax:
   |qemu_system| linux2.img -hdb nbd:unix:/tmp/my_socket
   |qemu_system| -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst
 
-
-
-Sheepdog disk images
-~~~~~~~~~~~~~~~~~~~~
-
-Sheepdog is a distributed storage system for QEMU.  It provides highly
-available block level storage volumes that can be attached to
-QEMU-based virtual machines.
-
-You can create a Sheepdog disk image with the command:
-
-.. parsed-literal::
-
-  qemu-img create sheepdog:///IMAGE SIZE
-
-where *IMAGE* is the Sheepdog image name and *SIZE* is its
-size.
-
-To import the existing *FILENAME* to Sheepdog, you can use a
-convert command.
-
-.. parsed-literal::
-
-  qemu-img convert FILENAME sheepdog:///IMAGE
-
-You can boot from the Sheepdog disk image with the command:
-
-.. parsed-literal::
-
-  |qemu_system| sheepdog:///IMAGE
-
-You can also create a snapshot of the Sheepdog image like qcow2.
-
-.. parsed-literal::
-
-  qemu-img snapshot -c TAG sheepdog:///IMAGE
-
-where *TAG* is a tag name of the newly created snapshot.
-
-To boot from the Sheepdog snapshot, specify the tag name of the
-snapshot.
-
-.. parsed-literal::
-
-  |qemu_system| sheepdog:///IMAGE#TAG
-
-You can create a cloned image from the existing snapshot.
-
-.. parsed-literal::
-
-  qemu-img create -b sheepdog:///BASE#TAG sheepdog:///IMAGE
-
-where *BASE* is an image name of the source snapshot and *TAG*
-is its tag name.
-
-You can use an unix socket instead of an inet socket:
-
-.. parsed-literal::
-
-  |qemu_system| sheepdog+unix:///IMAGE?socket=PATH
-
-If the Sheepdog daemon doesn't run on the local host, you need to
-specify one of the Sheepdog servers to connect to.
-
-.. parsed-literal::
-
-  qemu-img create sheepdog://HOSTNAME:PORT/IMAGE SIZE
-  |qemu_system| sheepdog://HOSTNAME:PORT/IMAGE
-
 iSCSI LUNs
 ~~~~~~~~~~
 
diff --git a/docs/system/qemu-cpu-models.rst b/docs/system/qemu-cpu-models.rst
index 53d7538c473..5cf6e46f8ae 100644
--- a/docs/system/qemu-cpu-models.rst
+++ b/docs/system/qemu-cpu-models.rst
@@ -1,20 +1,24 @@
 :orphan:
 
+==================================
 QEMU / KVM CPU model configuration
 ==================================
 
+--------
 Synopsis
-''''''''
+--------
 
 QEMU CPU Modelling Infrastructure manual
 
+-----------
 Description
-'''''''''''
+-----------
 
 .. include:: cpu-models-x86.rst.inc
 .. include:: cpu-models-mips.rst.inc
 
+--------
 See also
-''''''''
+--------
 
 The HTML documentation of QEMU for more precise information and Linux user mode emulator invocation.
diff --git a/docs/system/qemu-manpage.rst b/docs/system/qemu-manpage.rst
index e9a25d0680f..c47a4127582 100644
--- a/docs/system/qemu-manpage.rst
+++ b/docs/system/qemu-manpage.rst
@@ -6,9 +6,11 @@
    parts of the documentation that go in the manpage as well as the
    HTML manual.
 
-Title
-=====
+=======================
+QEMU User Documentation
+=======================
 
+--------
 Synopsis
 --------
 
@@ -16,11 +18,13 @@ Synopsis
 
    |qemu_system| [options] [disk_image]
 
+-----------
 Description
 -----------
 
 .. include:: target-i386-desc.rst.inc
 
+-------
 Options
 -------
 
@@ -33,11 +37,13 @@ not need a disk image.
 
 .. include:: mux-chardev.rst.inc
 
+-----
 Notes
 -----
 
 .. include:: device-url-syntax.rst.inc
 
+--------
 See also
 --------
 
diff --git a/docs/system/removed-features.rst b/docs/system/removed-features.rst
deleted file mode 100644
index 29e90601a51..00000000000
--- a/docs/system/removed-features.rst
+++ /dev/null
@@ -1,463 +0,0 @@
-
-Removed features
-================
-
-What follows is a record of recently removed, formerly deprecated
-features that serves as a record for users who have encountered
-trouble after a recent upgrade.
-
-System emulator command line arguments
---------------------------------------
-
-``-net ...,name=``\ *name* (removed in 5.1)
-'''''''''''''''''''''''''''''''''''''''''''
-
-The ``name`` parameter of the ``-net`` option was a synonym
-for the ``id`` parameter, which should now be used instead.
-
-``-no-kvm`` (removed in 5.2)
-''''''''''''''''''''''''''''
-
-The ``-no-kvm`` argument was a synonym for setting ``-machine accel=tcg``.
-
-``-realtime`` (removed in 6.0)
-''''''''''''''''''''''''''''''
-
-The ``-realtime mlock=on|off`` argument has been replaced by the
-``-overcommit mem-lock=on|off`` argument.
-
-``-show-cursor`` option (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''
-
-Use ``-display sdl,show-cursor=on``, ``-display gtk,show-cursor=on``
-or ``-display default,show-cursor=on`` instead.
-
-``-tb-size`` option (removed in 6.0)
-''''''''''''''''''''''''''''''''''''
-
-QEMU 5.0 introduced an alternative syntax to specify the size of the translation
-block cache, ``-accel tcg,tb-size=``.
-
-``-usbdevice audio`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''
-
-This option lacked the possibility to specify an audio backend device.
-Use ``-device usb-audio`` now instead (and specify a corresponding USB
-host controller or ``-usb`` if necessary).
-
-``-vnc acl`` (removed in 6.0)
-'''''''''''''''''''''''''''''
-
-The ``acl`` option to the ``-vnc`` argument has been replaced
-by the ``tls-authz`` and ``sasl-authz`` options.
-
-``-mon ...,control=readline,pretty=on|off`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``pretty=on|off`` switch has no effect for HMP monitors and
-its use is rejected.
-
-``-drive file=json:{...{'driver':'file'}}`` (removed 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The 'file' driver for drives is no longer appropriate for character or host
-devices and will only accept regular files (S_IFREG). The correct driver
-for these file types is 'host_cdrom' or 'host_device' as appropriate.
-
-Floppy controllers' drive properties (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use ``-device floppy,...`` instead.  When configuring onboard floppy
-controllers
-::
-
-    -global isa-fdc.driveA=...
-    -global sysbus-fdc.driveA=...
-    -global SUNW,fdtwo.drive=...
-
-become
-::
-
-    -device floppy,unit=0,drive=...
-
-and
-::
-
-    -global isa-fdc.driveB=...
-    -global sysbus-fdc.driveB=...
-
-become
-::
-
-    -device floppy,unit=1,drive=...
-
-When plugging in a floppy controller
-::
-
-    -device isa-fdc,...,driveA=...
-
-becomes
-::
-
-    -device isa-fdc,...
-    -device floppy,unit=0,drive=...
-
-and
-::
-
-    -device isa-fdc,...,driveB=...
-
-becomes
-::
-
-    -device isa-fdc,...
-    -device floppy,unit=1,drive=...
-
-``-drive`` with bogus interface type (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Drives with interface types other than ``if=none`` are for onboard
-devices.  Drives the board doesn't pick up can no longer be used with
--device.  Use ``if=none`` instead.
-
-``-usbdevice ccid`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''
-
-This option was undocumented and not used in the field.
-Use `-device usb-ccid`` instead.
-
-
-QEMU Machine Protocol (QMP) commands
-------------------------------------
-
-``block-dirty-bitmap-add`` "autoload" parameter (removed in 4.2.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The "autoload" parameter has been ignored since 2.12.0. All bitmaps
-are automatically loaded from qcow2 images.
-
-``cpu-add`` (removed in 5.2)
-''''''''''''''''''''''''''''
-
-Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``.  See
-documentation of ``query-hotpluggable-cpus`` for additional details.
-
-``change`` (removed in 6.0)
-'''''''''''''''''''''''''''
-
-Use ``blockdev-change-medium`` or ``change-vnc-password`` instead.
-
-``query-events`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''
-
-The ``query-events`` command has been superseded by the more powerful
-and accurate ``query-qmp-schema`` command.
-
-``migrate_set_cache_size`` and ``query-migrate-cache-size`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use ``migrate_set_parameter`` and ``info migrate_parameters`` instead.
-
-``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use ``migrate_set_parameter`` instead.
-
-``query-cpus`` (removed in 6.0)
-'''''''''''''''''''''''''''''''
-
-The ``query-cpus`` command is replaced by the ``query-cpus-fast`` command.
-
-``query-cpus-fast`` ``arch`` output member (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``arch`` output member of the ``query-cpus-fast`` command is
-replaced by the ``target`` output member.
-
-chardev client socket with ``wait`` option (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Character devices creating sockets in client mode should not specify
-the 'wait' field, which is only applicable to sockets in server mode
-
-``query-named-block-nodes`` result ``encryption_key_missing`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Removed with no replacement.
-
-``query-block`` result ``inserted.encryption_key_missing`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Removed with no replacement.
-
-``query-named-block-nodes`` and ``query-block`` result dirty-bitmaps[i].status (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``status`` field of the ``BlockDirtyInfo`` structure, returned by
-these commands is removed. Two new boolean fields, ``recording`` and
-``busy`` effectively replace it.
-
-``query-block`` result field ``dirty-bitmaps`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``dirty-bitmaps`` field of the ``BlockInfo`` structure, returned by
-the query-block command is itself now removed. The ``dirty-bitmaps``
-field of the ``BlockDeviceInfo`` struct should be used instead, which is the
-type of the ``inserted`` field in query-block replies, as well as the
-type of array items in query-named-block-nodes.
-
-``object-add`` option ``props`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''
-
-Specify the properties for the object as top-level arguments instead.
-
-Human Monitor Protocol (HMP) commands
--------------------------------------
-
-The ``hub_id`` parameter of ``hostfwd_add`` / ``hostfwd_remove`` (removed in 5.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``[hub_id name]`` parameter tuple of the 'hostfwd_add' and
-'hostfwd_remove' HMP commands has been replaced by ``netdev_id``.
-
-``cpu-add`` (removed in 5.2)
-''''''''''''''''''''''''''''
-
-Use ``device_add`` for hotplugging vCPUs instead of ``cpu-add``.  See
-documentation of ``query-hotpluggable-cpus`` for additional details.
-
-``change vnc TARGET`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''
-
-No replacement.  The ``change vnc password`` and ``change DEVICE MEDIUM``
-commands are not affected.
-
-``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, ``acl_remove`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``acl_show``, ``acl_reset``, ``acl_policy``, ``acl_add``, and
-``acl_remove`` commands were removed with no replacement. Authorization
-for VNC should be performed using the pluggable QAuthZ objects.
-
-``migrate-set-cache-size`` and ``info migrate-cache-size`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use ``migrate-set-parameters`` and ``info migrate-parameters`` instead.
-
-``migrate_set_downtime`` and ``migrate_set_speed`` (removed in 6.0)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Use ``migrate-set-parameters`` instead.
-
-Guest Emulator ISAs
--------------------
-
-RISC-V ISA privilege specification version 1.09.1 (removed in 5.1)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The RISC-V ISA privilege specification version 1.09.1 has been removed.
-QEMU supports both the newer version 1.10.0 and the ratified version 1.11.0, these
-should be used instead of the 1.09.1 version.
-
-System emulator CPUS
---------------------
-
-KVM guest support on 32-bit Arm hosts (removed in 5.2)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The Linux kernel has dropped support for allowing 32-bit Arm systems
-to host KVM guests as of the 5.7 kernel. Accordingly, QEMU is deprecating
-its support for this configuration and will remove it in a future version.
-Running 32-bit guests on a 64-bit Arm host remains supported.
-
-RISC-V ISA Specific CPUs (removed in 5.1)
-'''''''''''''''''''''''''''''''''''''''''
-
-The RISC-V cpus with the ISA version in the CPU name have been removed. The
-four CPUs are: ``rv32gcsu-v1.9.1``, ``rv32gcsu-v1.10.0``, ``rv64gcsu-v1.9.1`` and
-``rv64gcsu-v1.10.0``. Instead the version can be specified via the CPU ``priv_spec``
-option when using the ``rv32`` or ``rv64`` CPUs.
-
-RISC-V no MMU CPUs (removed in 5.1)
-'''''''''''''''''''''''''''''''''''
-
-The RISC-V no MMU cpus have been removed. The two CPUs: ``rv32imacu-nommu`` and
-``rv64imacu-nommu`` can no longer be used. Instead the MMU status can be specified
-via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs.
-
-System emulator machines
-------------------------
-
-``spike_v1.9.1`` and ``spike_v1.10`` (removed in 5.1)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The version specific Spike machines have been removed in favour of the
-generic ``spike`` machine. If you need to specify an older version of the RISC-V
-spec you can use the ``-cpu rv64gcsu,priv_spec=v1.10.0`` command line argument.
-
-mips ``r4k`` platform (removed in 5.2)
-''''''''''''''''''''''''''''''''''''''
-
-This machine type was very old and unmaintained. Users should use the ``malta``
-machine type instead.
-
-mips ``fulong2e`` machine alias (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''
-
-This machine has been renamed ``fuloong2e``.
-
-``pc-1.0``, ``pc-1.1``, ``pc-1.2`` and ``pc-1.3`` (removed in 6.0)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-These machine types were very old and likely could not be used for live
-migration from old QEMU versions anymore. Use a newer machine type instead.
-
-
-linux-user mode CPUs
---------------------
-
-``tilegx`` CPUs (removed in 6.0)
-''''''''''''''''''''''''''''''''
-
-The ``tilegx`` guest CPU support has been removed without replacement. It was
-only implemented in linux-user mode, but support for this CPU was removed from
-the upstream Linux kernel in 2018, and it has also been dropped from glibc, so
-there is no new Linux development taking place with this architecture. For
-running the old binaries, you can use older versions of QEMU.
-
-System emulator devices
------------------------
-
-``ide-drive`` (removed in 6.0)
-''''''''''''''''''''''''''''''
-
-The 'ide-drive' device has been removed. Users should use 'ide-hd' or
-'ide-cd' as appropriate to get an IDE hard disk or CD-ROM as needed.
-
-``scsi-disk`` (removed in 6.0)
-''''''''''''''''''''''''''''''
-
-The 'scsi-disk' device has been removed. Users should use 'scsi-hd' or
-'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed.
-
-Related binaries
-----------------
-
-``qemu-nbd --partition`` (removed in 5.0)
-'''''''''''''''''''''''''''''''''''''''''
-
-The ``qemu-nbd --partition $digit`` code (also spelled ``-P``)
-could only handle MBR partitions, and never correctly handled logical
-partitions beyond partition 5.  Exporting a partition can still be
-done by utilizing the ``--image-opts`` option with a raw blockdev
-using the ``offset`` and ``size`` parameters layered on top of
-any other existing blockdev. For example, if partition 1 is 100MiB
-long starting at 1MiB, the old command::
-
-  qemu-nbd -t -P 1 -f qcow2 file.qcow2
-
-can be rewritten as::
-
-  qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.file.driver=file,file.file.filename=file.qcow2
-
-``qemu-img convert -n -o`` (removed in 5.1)
-'''''''''''''''''''''''''''''''''''''''''''
-
-All options specified in ``-o`` are image creation options, so
-they are now rejected when used with ``-n`` to skip image creation.
-
-
-``qemu-img create -b bad file $size`` (removed in 5.1)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-When creating an image with a backing file that could not be opened,
-``qemu-img create`` used to issue a warning about the failure but
-proceed with the image creation if an explicit size was provided.
-However, as the ``-u`` option exists for this purpose, it is safer to
-enforce that any failure to open the backing image (including if the
-backing file is missing or an incorrect format was specified) is an
-error when ``-u`` is not used.
-
-Command line options
---------------------
-
-``-smp`` (invalid topologies) (removed 5.2)
-'''''''''''''''''''''''''''''''''''''''''''
-
-CPU topology properties should describe whole machine topology including
-possible CPUs.
-
-However, historically it was possible to start QEMU with an incorrect topology
-where *n* <= *sockets* * *cores* * *threads* < *maxcpus*,
-which could lead to an incorrect topology enumeration by the guest.
-Support for invalid topologies is removed, the user must ensure
-topologies described with -smp include all possible cpus, i.e.
-*sockets* * *cores* * *threads* = *maxcpus*.
-
-``-numa`` node (without memory specified) (removed 5.2)
-'''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Splitting RAM by default between NUMA nodes had the same issues as ``mem``
-parameter with the difference that the role of the user plays QEMU using
-implicit generic or board specific splitting rule.
-Use ``memdev`` with *memory-backend-ram* backend or ``mem`` (if
-it's supported by used machine type) to define mapping explicitly instead.
-Users of existing VMs, wishing to preserve the same RAM distribution, should
-configure it explicitly using ``-numa node,memdev`` options. Current RAM
-distribution can be retrieved using HMP command ``info numa`` and if separate
-memory devices (pc|nv-dimm) are present use ``info memory-device`` and subtract
-device memory from output of ``info numa``.
-
-``-numa node,mem=``\ *size* (removed in 5.1)
-''''''''''''''''''''''''''''''''''''''''''''
-
-The parameter ``mem`` of ``-numa node`` was used to assign a part of
-guest RAM to a NUMA node. But when using it, it's impossible to manage a specified
-RAM chunk on the host side (like bind it to a host node, setting bind policy, ...),
-so the guest ends up with the fake NUMA configuration with suboptiomal performance.
-However since 2014 there is an alternative way to assign RAM to a NUMA node
-using parameter ``memdev``, which does the same as ``mem`` and adds
-means to actually manage node RAM on the host side. Use parameter ``memdev``
-with *memory-backend-ram* backend as replacement for parameter ``mem``
-to achieve the same fake NUMA effect or a properly configured
-*memory-backend-file* backend to actually benefit from NUMA configuration.
-New machine versions (since 5.1) will not accept the option but it will still
-work with old machine types. User can check the QAPI schema to see if the legacy
-option is supported by looking at MachineInfo::numa-mem-supported property.
-
-``-mem-path`` fallback to RAM (removed in 5.0)
-''''''''''''''''''''''''''''''''''''''''''''''
-
-If guest RAM allocation from file pointed by ``mem-path`` failed,
-QEMU was falling back to allocating from RAM, which might have resulted
-in unpredictable behavior since the backing file specified by the user
-as ignored. Currently, users are responsible for making sure the backing storage
-specified with ``-mem-path`` can actually provide the guest RAM configured with
-``-m`` and QEMU fails to start up if RAM allocation is unsuccessful.
-
-``-smp`` (invalid topologies) (removed 5.2)
-'''''''''''''''''''''''''''''''''''''''''''
-
-CPU topology properties should describe whole machine topology including
-possible CPUs.
-
-However, historically it was possible to start QEMU with an incorrect topology
-where *n* <= *sockets* * *cores* * *threads* < *maxcpus*,
-which could lead to an incorrect topology enumeration by the guest.
-Support for invalid topologies is removed, the user must ensure
-topologies described with -smp include all possible cpus, i.e.
-*sockets* * *cores* * *threads* = *maxcpus*.
-
-``-machine enforce-config-section=on|off`` (removed 5.2)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-The ``enforce-config-section`` property was replaced by the
-``-global migration.send-configuration={on|off}`` option.
-
-Block devices
--------------
-
-VXHS backend (removed in 5.1)
-'''''''''''''''''''''''''''''
-
-The VXHS code did not compile since v2.12.0. It was removed in 5.1.
diff --git a/docs/system/riscv/microchip-icicle-kit.rst b/docs/system/riscv/microchip-icicle-kit.rst
index 4fe97bce3f0..40798b1aae5 100644
--- a/docs/system/riscv/microchip-icicle-kit.rst
+++ b/docs/system/riscv/microchip-icicle-kit.rst
@@ -15,33 +15,53 @@ Supported devices
 
 The ``microchip-icicle-kit`` machine supports the following devices:
 
- * 1 E51 core
- * 4 U54 cores
- * Core Level Interruptor (CLINT)
- * Platform-Level Interrupt Controller (PLIC)
- * L2 Loosely Integrated Memory (L2-LIM)
- * DDR memory controller
- * 5 MMUARTs
- * 1 DMA controller
- * 2 GEM Ethernet controllers
- * 1 SDHC storage controller
+* 1 E51 core
+* 4 U54 cores
+* Core Level Interruptor (CLINT)
+* Platform-Level Interrupt Controller (PLIC)
+* L2 Loosely Integrated Memory (L2-LIM)
+* DDR memory controller
+* 5 MMUARTs
+* 1 DMA controller
+* 2 GEM Ethernet controllers
+* 1 SDHC storage controller
 
 Boot options
 ------------
 
 The ``microchip-icicle-kit`` machine can start using the standard -bios
 functionality for loading its BIOS image, aka Hart Software Services (HSS_).
-HSS loads the second stage bootloader U-Boot from an SD card. It does not
-support direct kernel loading via the -kernel option. One has to load kernel
-from U-Boot.
+HSS loads the second stage bootloader U-Boot from an SD card. Then a kernel
+can be loaded from U-Boot. It also supports direct kernel booting via the
+-kernel option along with the device tree blob via -dtb. When direct kernel
+boot is used, the OpenSBI fw_dynamic BIOS image is used to boot a payload
+like U-Boot or OS kernel directly.
+
+The user provided DTB should have the following requirements:
+
+* The /cpus node should contain at least one subnode for E51 and the number
+  of subnodes should match QEMU's ``-smp`` option
+* The /memory reg size should match QEMU’s selected ram_size via ``-m``
+* Should contain a node for the CLINT device with a compatible string
+  "riscv,clint0"
+
+QEMU follows below truth table to select which payload to execute:
+
+===== ========== ========== =======
+-bios    -kernel       -dtb payload
+===== ========== ========== =======
+    N          N don't care     HSS
+    Y don't care don't care     HSS
+    N          Y          Y  kernel
+===== ========== ========== =======
 
 The memory is set to 1537 MiB by default which is the minimum required high
 memory size by HSS. A sanity check on ram size is performed in the machine
 init routine to prompt user to increase the RAM size to > 1537 MiB when less
 than 1537 MiB ram is detected.
 
-Boot the machine
-----------------
+Running HSS
+-----------
 
 HSS 2020.12 release is tested at the time of writing. To build an HSS image
 that can be booted by the ``microchip-icicle-kit`` machine, type the following
@@ -75,7 +95,7 @@ Then we can boot the machine by:
       -serial chardev:serial1
 
 With above command line, current terminal session will be used for the first
-serial port. Open another terminal window, and use `minicom` to connect the
+serial port. Open another terminal window, and use ``minicom`` to connect the
 second serial port.
 
 .. code-block:: bash
@@ -86,4 +106,44 @@ HSS output is on the first serial port (stdio) and U-Boot outputs on the
 second serial port. U-Boot will automatically load the Linux kernel from
 the SD card image.
 
+Direct Kernel Boot
+------------------
+
+Sometimes we just want to test booting a new kernel, and transforming the
+kernel image to the format required by the HSS bootflow is tedious. We can
+use '-kernel' for direct kernel booting just like other RISC-V machines do.
+
+In this mode, the OpenSBI fw_dynamic BIOS image for 'generic' platform is
+used to boot an S-mode payload like U-Boot or OS kernel directly.
+
+For example, the following commands show building a U-Boot image from U-Boot
+mainline v2021.07 for the Microchip Icicle Kit board:
+
+.. code-block:: bash
+
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ make microchip_mpfs_icicle_defconfig
+
+Then we can boot the machine by:
+
+.. code-block:: bash
+
+  $ qemu-system-riscv64 -M microchip-icicle-kit -smp 5 -m 2G \
+      -sd path/to/sdcard.img \
+      -nic user,model=cadence_gem \
+      -nic tap,ifname=tap,model=cadence_gem,script=no \
+      -display none -serial stdio \
+      -kernel path/to/u-boot/build/dir/u-boot.bin \
+      -dtb path/to/u-boot/build/dir/u-boot.dtb
+
+CAVEATS:
+
+* Check the "stdout-path" property in the /chosen node in the DTB to determine
+  which serial port is used for the serial console, e.g.: if the console is set
+  to the second serial port, change to use "-serial null -serial stdio".
+* The default U-Boot configuration uses CONFIG_OF_SEPARATE hence the ELF image
+  ``u-boot`` cannot be passed to "-kernel" as it does not contain the DTB hence
+  ``u-boot.bin`` has to be used which does contain one. To use the ELF image,
+  we need to change to CONFIG_OF_EMBED or CONFIG_OF_PRIOR_STAGE.
+
 .. _HSS: https://github.com/polarfire-soc/hart-software-services
diff --git a/docs/system/riscv/shakti-c.rst b/docs/system/riscv/shakti-c.rst
new file mode 100644
index 00000000000..fea57f7b6ba
--- /dev/null
+++ b/docs/system/riscv/shakti-c.rst
@@ -0,0 +1,82 @@
+Shakti C Reference Platform (``shakti_c``)
+==========================================
+
+Shakti C Reference Platform is a reference platform based on arty a7 100t
+for the Shakti SoC.
+
+Shakti SoC is a SoC based on the Shakti C-class processor core. Shakti C
+is a 64bit RV64GCSUN processor core.
+
+For more details on Shakti SoC, please see:
+https://gitlab.com/shaktiproject/cores/shakti-soc/-/blob/master/fpga/boards/artya7-100t/c-class/README.rst
+
+For more info on the Shakti C-class core, please see:
+https://c-class.readthedocs.io/en/latest/
+
+Supported devices
+-----------------
+
+The ``shakti_c`` machine supports the following devices:
+
+ * 1 C-class core
+ * Core Level Interruptor (CLINT)
+ * Platform-Level Interrupt Controller (PLIC)
+ * 1 UART
+
+Boot options
+------------
+
+The ``shakti_c`` machine can start using the standard -bios
+functionality for loading the baremetal application or opensbi.
+
+Boot the machine
+----------------
+
+Shakti SDK
+~~~~~~~~~~
+Shakti SDK can be used to generate the baremetal example UART applications.
+
+.. code-block:: bash
+
+   $ git clone https://gitlab.com/behindbytes/shakti-sdk.git
+   $ cd shakti-sdk
+   $ make software PROGRAM=loopback TARGET=artix7_100t
+
+Binary would be generated in:
+  software/examples/uart_applns/loopback/output/loopback.shakti
+
+You could also download the precompiled example applications using below
+commands.
+
+.. code-block:: bash
+
+   $ wget -c https://gitlab.com/behindbytes/shakti-binaries/-/raw/master/sdk/shakti_sdk_qemu.zip
+   $ unzip shakti_sdk_qemu.zip
+
+Then we can run the UART example using:
+
+.. code-block:: bash
+
+   $ qemu-system-riscv64 -M shakti_c -nographic \
+      -bios path/to/shakti_sdk_qemu/loopback.shakti
+
+OpenSBI
+~~~~~~~
+We can also run OpenSBI with Test Payload.
+
+.. code-block:: bash
+
+   $ git clone https://github.com/riscv/opensbi.git -b v0.9
+   $ cd opensbi
+   $ wget -c https://gitlab.com/behindbytes/shakti-binaries/-/raw/master/dts/shakti.dtb
+   $ export CROSS_COMPILE=riscv64-unknown-elf-
+   $ export FW_FDT_PATH=./shakti.dtb
+   $ make PLATFORM=generic
+
+fw_payload.elf would be generated in build/platform/generic/firmware/fw_payload.elf.
+Boot it using the below qemu command.
+
+.. code-block:: bash
+
+   $ qemu-system-riscv64 -M shakti_c -nographic \
+      -bios path/to/fw_payload.elf
diff --git a/docs/system/riscv/sifive_u.rst b/docs/system/riscv/sifive_u.rst
index 98e7562848f..7b166567f97 100644
--- a/docs/system/riscv/sifive_u.rst
+++ b/docs/system/riscv/sifive_u.rst
@@ -9,21 +9,22 @@ Supported devices
 
 The ``sifive_u`` machine supports the following devices:
 
- * 1 E51 / E31 core
- * Up to 4 U54 / U34 cores
- * Core Level Interruptor (CLINT)
- * Platform-Level Interrupt Controller (PLIC)
- * Power, Reset, Clock, Interrupt (PRCI)
- * L2 Loosely Integrated Memory (L2-LIM)
- * DDR memory controller
- * 2 UARTs
- * 1 GEM Ethernet controller
- * 1 GPIO controller
- * 1 One-Time Programmable (OTP) memory with stored serial number
- * 1 DMA controller
- * 2 QSPI controllers
- * 1 ISSI 25WP256 flash
- * 1 SD card in SPI mode
+* 1 E51 / E31 core
+* Up to 4 U54 / U34 cores
+* Core Local Interruptor (CLINT)
+* Platform-Level Interrupt Controller (PLIC)
+* Power, Reset, Clock, Interrupt (PRCI)
+* L2 Loosely Integrated Memory (L2-LIM)
+* DDR memory controller
+* 2 UARTs
+* 1 GEM Ethernet controller
+* 1 GPIO controller
+* 1 One-Time Programmable (OTP) memory with stored serial number
+* 1 DMA controller
+* 2 QSPI controllers
+* 1 ISSI 25WP256 flash
+* 1 SD card in SPI mode
+* PWM0 and PWM1
 
 Please note the real world HiFive Unleashed board has a fixed configuration of
 1 E51 core and 4 U54 core combination and the RISC-V core boots in 64-bit mode.
@@ -36,12 +37,21 @@ Hardware configuration information
 ----------------------------------
 
 The ``sifive_u`` machine automatically generates a device tree blob ("dtb")
-which it passes to the guest. This provides information about the addresses,
-interrupt lines and other configuration of the various devices in the system.
-Guest software should discover the devices that are present in the generated
-DTB instead of using a DTB for the real hardware, as some of the devices are
-not modeled by QEMU and trying to access these devices may cause unexpected
-behavior.
+which it passes to the guest, if there is no ``-dtb`` option. This provides
+information about the addresses, interrupt lines and other configuration of
+the various devices in the system. Guest software should discover the devices
+that are present in the generated DTB instead of using a DTB for the real
+hardware, as some of the devices are not modeled by QEMU and trying to access
+these devices may cause unexpected behavior.
+
+If users want to provide their own DTB, they can use the ``-dtb`` option.
+These DTBs should have the following requirements:
+
+* The /cpus node should contain at least one subnode for E51 and the number
+  of subnodes should match QEMU's ``-smp`` option
+* The /memory reg size should match QEMU’s selected ram_size via ``-m``
+* Should contain a node for the CLINT device with a compatible string
+  "riscv,clint0" if using with OpenSBI BIOS images
 
 Boot options
 ------------
@@ -122,6 +132,32 @@ To boot the newly built Linux kernel in QEMU with the ``sifive_u`` machine:
       -initrd /path/to/rootfs.ext4 \
       -append "root=/dev/ram"
 
+Alternatively, we can use a custom DTB to boot the machine by inserting a CLINT
+node in fu540-c000.dtsi in the Linux kernel,
+
+.. code-block:: none
+
+    clint: clint@2000000 {
+        compatible = "riscv,clint0";
+        interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+                               &cpu1_intc 3 &cpu1_intc 7
+                               &cpu2_intc 3 &cpu2_intc 7
+                               &cpu3_intc 3 &cpu3_intc 7
+                               &cpu4_intc 3 &cpu4_intc 7>;
+        reg = <0x00 0x2000000 0x00 0x10000>;
+    };
+
+with the following command line options:
+
+.. code-block:: bash
+
+  $ qemu-system-riscv64 -M sifive_u -smp 5 -m 8G \
+      -display none -serial stdio \
+      -kernel arch/riscv/boot/Image \
+      -dtb arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dtb \
+      -initrd /path/to/rootfs.ext4 \
+      -append "root=/dev/ram"
+
 To build a Linux mainline kernel that can be booted by the ``sifive_u`` machine
 in 32-bit mode, use the rv32_defconfig configuration. A patch is required to
 fix the 32-bit boot issue for Linux kernel v5.10.
@@ -174,15 +210,16 @@ command line options with ``qemu-system-riscv32``.
 Running U-Boot
 --------------
 
-U-Boot mainline v2021.01 release is tested at the time of writing. To build a
+U-Boot mainline v2021.07 release is tested at the time of writing. To build a
 U-Boot mainline bootloader that can be booted by the ``sifive_u`` machine, use
-the sifive_fu540_defconfig with similar commands as described above for Linux:
+the sifive_unleashed_defconfig with similar commands as described above for
+Linux:
 
 .. code-block:: bash
 
   $ export CROSS_COMPILE=riscv64-linux-
   $ export OPENSBI=/path/to/opensbi-riscv64-generic-fw_dynamic.bin
-  $ make sifive_fu540_defconfig
+  $ make sifive_unleashed_defconfig
 
 You will get spl/u-boot-spl.bin and u-boot.itb file in the build tree.
 
@@ -277,31 +314,29 @@ board on QEMU ``sifive_u`` machine out of the box. This allows users to
 develop and test the recommended RISC-V boot flow with a real world use
 case: ZSBL (in QEMU) loads U-Boot SPL from SD card or SPI flash to L2LIM,
 then U-Boot SPL loads the combined payload image of OpenSBI fw_dynamic
-firmware and U-Boot proper. However sometimes we want to have a quick test
-of booting U-Boot on QEMU without the needs of preparing the SPI flash or
-SD card images, an alternate way can be used, which is to create a U-Boot
-S-mode image by modifying the configuration of U-Boot:
+firmware and U-Boot proper.
+
+However sometimes we want to have a quick test of booting U-Boot on QEMU
+without the needs of preparing the SPI flash or SD card images, an alternate
+way can be used, which is to create a U-Boot S-mode image by modifying the
+configuration of U-Boot:
 
 .. code-block:: bash
 
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ make sifive_unleashed_defconfig
   $ make menuconfig
 
-then manually select the following configuration in U-Boot:
+then manually select the following configuration:
 
-  Device Tree Control > Provider of DTB for DT Control > Prior Stage bootloader DTB
+  * Device Tree Control ---> Provider of DTB for DT Control ---> Prior Stage bootloader DTB
 
-This lets U-Boot to use the QEMU generated device tree blob. During the build,
-a build error will be seen below:
+and unselect the following configuration:
 
-.. code-block:: none
-
-  MKIMAGE u-boot.img
-  ./tools/mkimage: Can't open arch/riscv/dts/hifive-unleashed-a00.dtb: No such file or directory
-  ./tools/mkimage: failed to build FIT
-  make: *** [Makefile:1440: u-boot.img] Error 1
+  * Library routines ---> Allow access to binman information in the device tree
 
-The above errors can be safely ignored as we don't run U-Boot SPL under QEMU
-in this alternate configuration.
+This changes U-Boot to use the QEMU generated device tree blob, and bypass
+running the U-Boot SPL stage.
 
 Boot the 64-bit U-Boot S-mode image directly:
 
@@ -316,14 +351,18 @@ It's possible to create a 32-bit U-Boot S-mode image as well.
 .. code-block:: bash
 
   $ export CROSS_COMPILE=riscv64-linux-
-  $ make sifive_fu540_defconfig
+  $ make sifive_unleashed_defconfig
   $ make menuconfig
 
 then manually update the following configuration in U-Boot:
 
-  Device Tree Control > Provider of DTB for DT Control > Prior Stage bootloader DTB
-  RISC-V architecture > Base ISA > RV32I
-  Boot images > Text Base > 0x80400000
+  * Device Tree Control ---> Provider of DTB for DT Control ---> Prior Stage bootloader DTB
+  * RISC-V architecture ---> Base ISA ---> RV32I
+  * Boot options ---> Boot images ---> Text Base ---> 0x80400000
+
+and unselect the following configuration:
+
+  * Library routines ---> Allow access to binman information in the device tree
 
 Use the same command line options to boot the 32-bit U-Boot S-mode image:
 
diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
new file mode 100644
index 00000000000..fa016584bf5
--- /dev/null
+++ b/docs/system/riscv/virt.rst
@@ -0,0 +1,148 @@
+'virt' Generic Virtual Platform (``virt``)
+==========================================
+
+The ``virt`` board is a platform which does not correspond to any real hardware;
+it is designed for use in virtual machines. It is the recommended board type
+if you simply want to run a guest such as Linux and do not care about
+reproducing the idiosyncrasies and limitations of a particular bit of
+real-world hardware.
+
+Supported devices
+-----------------
+
+The ``virt`` machine supports the following devices:
+
+* Up to 8 generic RV32GC/RV64GC cores, with optional extensions
+* Core Local Interruptor (CLINT)
+* Platform-Level Interrupt Controller (PLIC)
+* CFI parallel NOR flash memory
+* 1 NS16550 compatible UART
+* 1 Google Goldfish RTC
+* 1 SiFive Test device
+* 8 virtio-mmio transport devices
+* 1 generic PCIe host bridge
+* The fw_cfg device that allows a guest to obtain data from QEMU
+
+Note that the default CPU is a generic RV32GC/RV64GC. Optional extensions
+can be enabled via command line parameters, e.g.: ``-cpu rv64,x-h=true``
+enables the hypervisor extension for RV64.
+
+Hardware configuration information
+----------------------------------
+
+The ``virt`` machine automatically generates a device tree blob ("dtb")
+which it passes to the guest, if there is no ``-dtb`` option. This provides
+information about the addresses, interrupt lines and other configuration of
+the various devices in the system. Guest software should discover the devices
+that are present in the generated DTB.
+
+If users want to provide their own DTB, they can use the ``-dtb`` option.
+These DTBs should have the following requirements:
+
+* The number of subnodes of the /cpus node should match QEMU's ``-smp`` option
+* The /memory reg size should match QEMU’s selected ram_size via ``-m``
+* Should contain a node for the CLINT device with a compatible string
+  "riscv,clint0" if using with OpenSBI BIOS images
+
+Boot options
+------------
+
+The ``virt`` machine can start using the standard -kernel functionality
+for loading a Linux kernel, a VxWorks kernel, an S-mode U-Boot bootloader
+with the default OpenSBI firmware image as the -bios. It also supports
+the recommended RISC-V bootflow: U-Boot SPL (M-mode) loads OpenSBI fw_dynamic
+firmware and U-Boot proper (S-mode), using the standard -bios functionality.
+
+Machine-specific options
+------------------------
+
+The following machine-specific options are supported:
+
+- aclint=[on|off]
+
+  When this option is "on", ACLINT devices will be emulated instead of
+  SiFive CLINT. When not specified, this option is assumed to be "off".
+
+Running Linux kernel
+--------------------
+
+Linux mainline v5.12 release is tested at the time of writing. To build a
+Linux mainline kernel that can be booted by the ``virt`` machine in
+64-bit mode, simply configure the kernel using the defconfig configuration:
+
+.. code-block:: bash
+
+  $ export ARCH=riscv
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ make defconfig
+  $ make
+
+To boot the newly built Linux kernel in QEMU with the ``virt`` machine:
+
+.. code-block:: bash
+
+  $ qemu-system-riscv64 -M virt -smp 4 -m 2G \
+      -display none -serial stdio \
+      -kernel arch/riscv/boot/Image \
+      -initrd /path/to/rootfs.cpio \
+      -append "root=/dev/ram"
+
+To build a Linux mainline kernel that can be booted by the ``virt`` machine
+in 32-bit mode, use the rv32_defconfig configuration. A patch is required to
+fix the 32-bit boot issue for Linux kernel v5.12.
+
+.. code-block:: bash
+
+  $ export ARCH=riscv
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ curl https://patchwork.kernel.org/project/linux-riscv/patch/20210627135117.28641-1-bmeng.cn@gmail.com/mbox/ > riscv.patch
+  $ git am riscv.patch
+  $ make rv32_defconfig
+  $ make
+
+Replace ``qemu-system-riscv64`` with ``qemu-system-riscv32`` in the command
+line above to boot the 32-bit Linux kernel. A rootfs image containing 32-bit
+applications shall be used in order for kernel to boot to user space.
+
+Running U-Boot
+--------------
+
+U-Boot mainline v2021.04 release is tested at the time of writing. To build an
+S-mode U-Boot bootloader that can be booted by the ``virt`` machine, use
+the qemu-riscv64_smode_defconfig with similar commands as described above for Linux:
+
+.. code-block:: bash
+
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ make qemu-riscv64_smode_defconfig
+
+Boot the 64-bit U-Boot S-mode image directly:
+
+.. code-block:: bash
+
+  $ qemu-system-riscv64 -M virt -smp 4 -m 2G \
+      -display none -serial stdio \
+      -kernel /path/to/u-boot.bin
+
+To test booting U-Boot SPL which in M-mode, which in turn loads a FIT image
+that bundles OpenSBI fw_dynamic firmware and U-Boot proper (S-mode) together,
+build the U-Boot images using riscv64_spl_defconfig:
+
+.. code-block:: bash
+
+  $ export CROSS_COMPILE=riscv64-linux-
+  $ export OPENSBI=/path/to/opensbi-riscv64-generic-fw_dynamic.bin
+  $ make qemu-riscv64_spl_defconfig
+
+The minimal QEMU commands to run U-Boot SPL are:
+
+.. code-block:: bash
+
+  $ qemu-system-riscv64 -M virt -smp 4 -m 2G \
+      -display none -serial stdio \
+      -bios /path/to/u-boot-spl \
+      -device loader,file=/path/to/u-boot.itb,addr=0x80200000
+
+To test 32-bit U-Boot images, switch to use qemu-riscv32_smode_defconfig and
+riscv32_spl_defconfig builds, and replace ``qemu-system-riscv64`` with
+``qemu-system-riscv32`` in the command lines above to boot the 32-bit U-Boot.
diff --git a/docs/system/s390x/protvirt.rst b/docs/system/s390x/protvirt.rst
index 0f481043d99..aee63ed7ec9 100644
--- a/docs/system/s390x/protvirt.rst
+++ b/docs/system/s390x/protvirt.rst
@@ -14,11 +14,11 @@ Prerequisites
 To run PVMs, a machine with the Protected Virtualization feature, as
 indicated by the Ultravisor Call facility (stfle bit 158), is
 required. The Ultravisor needs to be initialized at boot by setting
-`prot_virt=1` on the host's kernel command line.
+``prot_virt=1`` on the host's kernel command line.
 
 Running PVMs requires using the KVM hypervisor.
 
-If those requirements are met, the capability `KVM_CAP_S390_PROTECTED`
+If those requirements are met, the capability ``KVM_CAP_S390_PROTECTED``
 will indicate that KVM can support PVMs on that LPAR.
 
 
@@ -26,15 +26,15 @@ Running a Protected Virtual Machine
 -----------------------------------
 
 To run a PVM you will need to select a CPU model which includes the
-`Unpack facility` (stfle bit 161 represented by the feature
-`unpack`/`S390_FEAT_UNPACK`), and add these options to the command line::
+``Unpack facility`` (stfle bit 161 represented by the feature
+``unpack``/``S390_FEAT_UNPACK``), and add these options to the command line::
 
     -object s390-pv-guest,id=pv0 \
     -machine confidential-guest-support=pv0
 
 Adding these options will:
 
-* Ensure the `unpack` facility is available
+* Ensure the ``unpack`` facility is available
 * Enable the IOMMU by default for all I/O devices
 * Initialize the PV mechanism
 
@@ -63,5 +63,5 @@ from the disk boot. This memory layout includes the encrypted
 components (kernel, initrd, cmdline), the stage3a loader and
 metadata. In case this boot method is used, the command line
 options -initrd and -cmdline are ineffective. The preparation of a PVM
-image is done via the `genprotimg` tool from the s390-tools
+image is done via the ``genprotimg`` tool from the s390-tools
 collection.
diff --git a/docs/system/secrets.rst b/docs/system/secrets.rst
new file mode 100644
index 00000000000..4a177369b69
--- /dev/null
+++ b/docs/system/secrets.rst
@@ -0,0 +1,162 @@
+.. _secret data:
+
+Providing secret data to QEMU
+-----------------------------
+
+There are a variety of objects in QEMU which require secret data to be provided
+by the administrator or management application. For example, network block
+devices often require a password, LUKS block devices require a passphrase to
+unlock key material, remote desktop services require an access password.
+QEMU has a general purpose mechanism for providing secret data to QEMU in a
+secure manner, using the ``secret`` object type.
+
+At startup this can be done using the ``-object secret,...`` command line
+argument. At runtime this can be done using the ``object_add`` QMP / HMP
+monitor commands. The examples that follow will illustrate use of ``-object``
+command lines, but they all apply equivalentely in QMP / HMP. When creating
+a ``secret`` object it must be given a unique ID string. This ID is then
+used to identify the object when configuring the thing which need the data.
+
+
+INSECURE: Passing secrets as clear text inline
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**The following should never be done in a production environment or on a
+multi-user host. Command line arguments are usually visible in the process
+listings and are often collected in log files by system monitoring agents
+or bug reporting tools. QMP/HMP commands and their arguments are also often
+logged and attached to bug reports. This all risks compromising secrets that
+are passed inline.**
+
+For the convenience of people debugging / developing with QEMU, it is possible
+to pass secret data inline on the command line.
+
+::
+
+   -object secret,id=secvnc0,data=87539319
+
+
+Again it is possible to provide the data in base64 encoded format, which is
+particularly useful if the data contains binary characters that would clash
+with argument parsing.
+
+::
+
+   -object secret,id=secvnc0,data=ODc1MzkzMTk=,format=base64
+
+
+**Note: base64 encoding does not provide any security benefit.**
+
+Passing secrets as clear text via a file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The simplest approach to providing data securely is to use a file to store
+the secret:
+
+::
+
+   -object secret,id=secvnc0,file=vnc-password.txt
+
+
+In this example the file ``vnc-password.txt`` contains the plain text secret
+data. It is important to note that the contents of the file are treated as an
+opaque blob. The entire raw file contents is used as the value, thus it is
+important not to mistakenly add any trailing newline character in the file if
+this newline is not intended to be part of the secret data.
+
+In some cases it might be more convenient to pass the secret data in base64
+format and have QEMU decode to get the raw bytes before use:
+
+::
+
+   -object secret,id=sec0,file=vnc-password.txt,format=base64
+
+
+The file should generally be given mode ``0600`` or ``0400`` permissions, and
+have its user/group ownership set to the same account that the QEMU process
+will be launched under. If using mandatory access control such as SELinux, then
+the file should be labelled to only grant access to the specific QEMU process
+that needs access. This will prevent other processes/users from compromising the
+secret data.
+
+
+Passing secrets as cipher text inline
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To address the insecurity of passing secrets inline as clear text, it is
+possible to configure a second secret as an AES key to use for decrypting
+the data.
+
+The secret used as the AES key must always be configured using the file based
+storage mechanism:
+
+::
+
+   -object secret,id=secmaster,file=masterkey.data,format=base64
+
+
+In this case the ``masterkey.data`` file would be initialized with 32
+cryptographically secure random bytes, which are then base64 encoded.
+The contents of this file will by used as an AES-256 key to encrypt the
+real secret that can now be safely passed to QEMU inline as cipher text
+
+::
+
+   -object secret,id=secvnc0,keyid=secmaster,data=BASE64-CIPHERTEXT,iv=BASE64-IV,format=base64
+
+
+In this example ``BASE64-CIPHERTEXT`` is the result of AES-256-CBC encrypting
+the secret with ``masterkey.data`` and then base64 encoding the ciphertext.
+The ``BASE64-IV`` data is 16 random bytes which have been base64 encrypted.
+These bytes are used as the initialization vector for the AES-256-CBC value.
+
+A single master key can be used to encrypt all subsequent secrets, **but it is
+critical that a different initialization vector is used for every secret**.
+
+Passing secrets via the Linux keyring
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The earlier mechanisms described are platform agnostic. If using QEMU on a Linux
+host, it is further possible to pass secrets to QEMU using the Linux keyring:
+
+::
+
+   -object secret_keyring,id=secvnc0,serial=1729
+
+
+This instructs QEMU to load data from the Linux keyring secret identified by
+the serial number ``1729``. It is possible to combine use of the keyring with
+other features mentioned earlier such as base64 encoding:
+
+::
+
+   -object secret_keyring,id=secvnc0,serial=1729,format=base64
+
+
+and also encryption with a master key:
+
+::
+
+   -object secret_keyring,id=secvnc0,keyid=secmaster,serial=1729,iv=BASE64-IV
+
+
+Best practice
+~~~~~~~~~~~~~
+
+It is recommended for production deployments to use a master key secret, and
+then pass all subsequent inline secrets encrypted with the master key.
+
+Each QEMU instance must have a distinct master key, and that must be generated
+from a cryptographically secure random data source. The master key should be
+deleted immediately upon QEMU shutdown. If passing the master key as a file,
+the key file must have access control rules applied that restrict access to
+just the one QEMU process that is intended to use it. Alternatively the Linux
+keyring can be used to pass the master key to QEMU.
+
+The secrets for individual QEMU device backends must all then be encrypted
+with this master key.
+
+This procedure helps ensure that the individual secrets for QEMU backends will
+not be compromised, even if ``-object`` CLI args or ``object_add`` monitor
+commands are collected in log files and attached to public bug support tickets.
+The only item that needs strongly protecting is the master key file.
diff --git a/docs/system/target-arm.rst b/docs/system/target-arm.rst
index edd013c7bbd..91ebc26c6db 100644
--- a/docs/system/target-arm.rst
+++ b/docs/system/target-arm.rst
@@ -85,10 +85,17 @@ undocumented; you can get a complete list by running
    arm/aspeed
    arm/sabrelite
    arm/digic
+   arm/cubieboard
+   arm/emcraft-sf2
+   arm/highbank
    arm/musicpal
    arm/gumstix
+   arm/mainstone
+   arm/kzm
+   arm/nrf
    arm/nseries
    arm/nuvoton
+   arm/imx25-pdk
    arm/orangepi
    arm/palm
    arm/raspi
@@ -96,9 +103,16 @@ undocumented; you can get a complete list by running
    arm/collie
    arm/sx1
    arm/stellaris
+   arm/stm32
    arm/virt
    arm/xlnx-versal-virt
 
+Emulated CPU architecture support
+=================================
+
+.. toctree::
+   arm/emulation
+
 Arm CPU features
 ================
 
diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst
index 22ba5ce2c0f..4daa53c35d8 100644
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -19,7 +19,15 @@ Board-specific documentation
    i386/microvm
    i386/pc
 
-.. include:: cpu-models-x86.rst.inc
+Architectural features
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. toctree::
+   :maxdepth: 1
+
+   i386/cpu
+   i386/kvm-pv
+   i386/sgx
 
 .. _pcsys_005freq:
 
diff --git a/docs/system/target-ppc.rst b/docs/system/target-ppc.rst
index 67905b8f2a6..4f6eb93b177 100644
--- a/docs/system/target-ppc.rst
+++ b/docs/system/target-ppc.rst
@@ -20,5 +20,6 @@ help``.
    ppc/embedded
    ppc/powermac
    ppc/powernv
+   ppc/ppce500
    ppc/prep
    ppc/pseries
diff --git a/docs/system/target-riscv.rst b/docs/system/target-riscv.rst
index 8d5946fbbbc..89a866e4f4f 100644
--- a/docs/system/target-riscv.rst
+++ b/docs/system/target-riscv.rst
@@ -67,7 +67,20 @@ undocumented; you can get a complete list by running
    :maxdepth: 1
 
    riscv/microchip-icicle-kit
+   riscv/shakti-c
    riscv/sifive_u
+   riscv/virt
 
-RISC-V CPU features
+RISC-V CPU firmware
 -------------------
+
+When using the ``sifive_u`` or ``virt`` machine there are three different
+firmware boot options:
+1. ``-bios default`` - This is the default behaviour if no -bios option
+is included. This option will load the default OpenSBI firmware automatically.
+The firmware is included with the QEMU release and no user interaction is
+required. All a user needs to do is specify the kernel they want to boot
+with the -kernel option
+2. ``-bios none`` - QEMU will not automatically load any firmware. It is up
+to the user to load all the images they need.
+3. ``-bios <file>`` - Tells QEMU to load the specified file as the firmware.
diff --git a/docs/system/tls.rst b/docs/system/tls.rst
index b0973afe1bf..1a04674362e 100644
--- a/docs/system/tls.rst
+++ b/docs/system/tls.rst
@@ -311,7 +311,7 @@ containing one or more usernames and random keys::
    mkdir -m 0700 /tmp/keys
    psktool -u rich -p /tmp/keys/keys.psk
 
-TLS-enabled servers such as qemu-nbd can use this directory like so::
+TLS-enabled servers such as ``qemu-nbd`` can use this directory like so::
 
    qemu-nbd \
      -t -x / \
diff --git a/docs/system/usb.rst b/docs/system/usb.rst
deleted file mode 100644
index eeab78dcfbe..00000000000
--- a/docs/system/usb.rst
+++ /dev/null
@@ -1,140 +0,0 @@
-.. _pcsys_005fusb:
-
-USB emulation
--------------
-
-QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can
-plug virtual USB devices or real host USB devices (only works with
-certain host operating systems). QEMU will automatically create and
-connect virtual USB hubs as necessary to connect multiple USB devices.
-
-.. _Connecting USB devices:
-
-Connecting USB devices
-~~~~~~~~~~~~~~~~~~~~~~
-
-USB devices can be connected with the ``-device usb-...`` command line
-option or the ``device_add`` monitor command. Available devices are:
-
-``usb-mouse``
-   Virtual Mouse. This will override the PS/2 mouse emulation when
-   activated.
-
-``usb-tablet``
-   Pointer device that uses absolute coordinates (like a touchscreen).
-   This means QEMU is able to report the mouse position without having
-   to grab the mouse. Also overrides the PS/2 mouse emulation when
-   activated.
-
-``usb-storage,drive=drive_id``
-   Mass storage device backed by drive_id (see the :ref:`disk images`
-   chapter in the System Emulation Users Guide)
-
-``usb-uas``
-   USB attached SCSI device, see
-   `usb-storage.txt <https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt>`__
-   for details
-
-``usb-bot``
-   Bulk-only transport storage device, see
-   `usb-storage.txt <https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt>`__
-   for details here, too
-
-``usb-mtp,rootdir=dir``
-   Media transfer protocol device, using dir as root of the file tree
-   that is presented to the guest.
-
-``usb-host,hostbus=bus,hostaddr=addr``
-   Pass through the host device identified by bus and addr
-
-``usb-host,vendorid=vendor,productid=product``
-   Pass through the host device identified by vendor and product ID
-
-``usb-wacom-tablet``
-   Virtual Wacom PenPartner tablet. This device is similar to the
-   ``tablet`` above but it can be used with the tslib library because in
-   addition to touch coordinates it reports touch pressure.
-
-``usb-kbd``
-   Standard USB keyboard. Will override the PS/2 keyboard (if present).
-
-``usb-serial,chardev=id``
-   Serial converter. This emulates an FTDI FT232BM chip connected to
-   host character device id.
-
-``usb-braille,chardev=id``
-   Braille device. This will use BrlAPI to display the braille output on
-   a real or fake device referenced by id.
-
-``usb-net[,netdev=id]``
-   Network adapter that supports CDC ethernet and RNDIS protocols. id
-   specifies a netdev defined with ``-netdev …,id=id``. For instance,
-   user-mode networking can be used with
-
-   .. parsed-literal::
-
-      |qemu_system| [...] -netdev user,id=net0 -device usb-net,netdev=net0
-
-``usb-ccid``
-   Smartcard reader device
-
-``usb-audio``
-   USB audio device
-
-``u2f-{emulated,passthru}``
-   Universal Second Factor device
-
-.. _host_005fusb_005fdevices:
-
-Using host USB devices on a Linux host
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-WARNING: this is an experimental feature. QEMU will slow down when using
-it. USB devices requiring real time streaming (i.e. USB Video Cameras)
-are not supported yet.
-
-1. If you use an early Linux 2.4 kernel, verify that no Linux driver is
-   actually using the USB device. A simple way to do that is simply to
-   disable the corresponding kernel module by renaming it from
-   ``mydriver.o`` to ``mydriver.o.disabled``.
-
-2. Verify that ``/proc/bus/usb`` is working (most Linux distributions
-   should enable it by default). You should see something like that:
-
-   ::
-
-      ls /proc/bus/usb
-      001  devices  drivers
-
-3. Since only root can access to the USB devices directly, you can
-   either launch QEMU as root or change the permissions of the USB
-   devices you want to use. For testing, the following suffices:
-
-   ::
-
-      chown -R myuid /proc/bus/usb
-
-4. Launch QEMU and do in the monitor:
-
-   ::
-
-      info usbhost
-        Device 1.2, speed 480 Mb/s
-          Class 00: USB device 1234:5678, USB DISK
-
-   You should see the list of the devices you can use (Never try to use
-   hubs, it won't work).
-
-5. Add the device in QEMU by using:
-
-   ::
-
-      device_add usb-host,vendorid=0x1234,productid=0x5678
-
-   Normally the guest OS should report that a new USB device is plugged.
-   You can use the option ``-device usb-host,...`` to do the same.
-
-6. Now you can try to use the host USB device in QEMU.
-
-When relaunching QEMU, you may have to unplug and plug again the USB
-device to make it work again (this is a bug).
diff --git a/docs/system/vnc-security.rst b/docs/system/vnc-security.rst
index 830f6acc738..4c1769eeb86 100644
--- a/docs/system/vnc-security.rst
+++ b/docs/system/vnc-security.rst
@@ -168,7 +168,7 @@ used is drastically reduced. In fact only the GSSAPI SASL mechanism
 provides an acceptable level of security by modern standards. Previous
 versions of QEMU referred to the DIGEST-MD5 mechanism, however, it has
 multiple serious flaws described in detail in RFC 6331 and thus should
-never be used any more. The SCRAM-SHA-1 mechanism provides a simple
+never be used any more. The SCRAM-SHA-256 mechanism provides a simple
 username/password auth facility similar to DIGEST-MD5, but does not
 support session encryption, so can only be used in combination with TLS.
 
@@ -191,11 +191,12 @@ reasonable configuration is
 
 ::
 
-   mech_list: scram-sha-1
+   mech_list: scram-sha-256
    sasldb_path: /etc/qemu/passwd.db
 
 The ``saslpasswd2`` program can be used to populate the ``passwd.db``
-file with accounts.
+file with accounts. Note that the ``passwd.db`` file stores passwords
+in clear text.
 
 Other SASL configurations will be left as an exercise for the reader.
 Note that all mechanisms, except GSSAPI, should be combined with use of
diff --git a/docs/throttle.txt b/docs/throttle.txt
index b5b78b7326d..0a0453a5ee6 100644
--- a/docs/throttle.txt
+++ b/docs/throttle.txt
@@ -273,11 +273,9 @@ A group can be created using the object-add QMP function:
      "arguments": {
        "qom-type": "throttle-group",
        "id": "group0",
-       "props": {
-         "limits" : {
-           "iops-total": 1000
-           "bps-write": 2097152
-         }
+       "limits" : {
+         "iops-total": 1000,
+         "bps-write": 2097152
        }
      }
    }
diff --git a/docs/tools/_templates/editpage.html b/docs/tools/_templates/editpage.html
deleted file mode 100644
index 2a9c8fc92b3..00000000000
--- a/docs/tools/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/tools/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/tools/index.rst b/docs/tools/index.rst
index d923834a739..1edd5a8054a 100644
--- a/docs/tools/index.rst
+++ b/docs/tools/index.rst
@@ -1,11 +1,9 @@
-.. This is the top level page for the 'tools' manual
-
-
+-----
 Tools
-=====
-
+-----
 
-Contents:
+This section of the manual documents QEMU's "tools": its
+command line utilities and other standalone programs.
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index c9efcfaefc4..d663dd92bd7 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -1,3 +1,4 @@
+=======================
 QEMU disk image utility
 =======================
 
@@ -126,9 +127,9 @@ by the used format or see the format descriptions below for details.
 .. option:: -S SIZE
 
   Indicates the consecutive number of bytes that must contain only zeros
-  for qemu-img to create a sparse image during conversion. This value is rounded
-  down to the nearest 512 bytes. You may use the common size suffixes like
-  ``k`` for kilobytes.
+  for ``qemu-img`` to create a sparse image during conversion. This value is
+  rounded down to the nearest 512 bytes. You may use the common size suffixes
+  like ``k`` for kilobytes.
 
 .. option:: -t CACHE
 
@@ -414,7 +415,7 @@ Command description:
   4
     Error on reading data
 
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F BACKING_FMT]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
 
   Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM*
   to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can
@@ -430,7 +431,7 @@ Command description:
   suppressed from the destination image.
 
   *SPARSE_SIZE* indicates the consecutive number of bytes (defaults to 4k)
-  that must contain only zeros for qemu-img to create a sparse image during
+  that must contain only zeros for ``qemu-img`` to create a sparse image during
   conversion. If *SPARSE_SIZE* is 0, the source will not be scanned for
   unallocated or zero sectors, and the destination image will always be
   fully allocated.
@@ -438,7 +439,7 @@ Command description:
   You can use the *BACKING_FILE* option to force the output image to be
   created as a copy on write image of the specified base image; the
   *BACKING_FILE* should have the same content as the input's base image,
-  however the path, image format, etc may differ.
+  however the path, image format (as given by *BACKING_FMT*), etc may differ.
 
   If a relative path name is given, the backing file is looked up relative to
   the directory containing *OUTPUT_FILENAME*.
@@ -446,7 +447,7 @@ Command description:
   If the ``-n`` option is specified, the target volume creation will be
   skipped. This is useful for formats such as ``rbd`` if the target
   volume has already been created with site specific options that cannot
-  be supplied through qemu-img.
+  be supplied through ``qemu-img``.
 
   Out of order writes can be enabled with ``-W`` to improve performance.
   This is only recommended for preallocated devices like host devices or other
@@ -456,6 +457,12 @@ Command description:
   *NUM_COROUTINES* specifies how many coroutines work in parallel during
   the convert process (defaults to 8).
 
+  Use of ``--bitmaps`` requests that any persistent bitmaps present in
+  the original are also copied to the destination.  If any bitmap is
+  inconsistent in the source, the conversion will fail unless
+  ``--skip-broken-bitmaps`` is also specified to copy only the
+  consistent bitmaps.
+
 .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE]
 
   Create the new disk image *FILENAME* of size *SIZE* and format
@@ -465,7 +472,7 @@ Command description:
   If the option *BACKING_FILE* is specified, then the image will record
   only the differences from *BACKING_FILE*. No size needs to be specified in
   this case. *BACKING_FILE* will never be modified unless you use the
-  ``commit`` monitor command (or qemu-img commit).
+  ``commit`` monitor command (or ``qemu-img commit``).
 
   If a relative path name is given, the backing file is looked up relative to
   the directory containing *FILENAME*.
@@ -593,13 +600,16 @@ Command description:
   the ``start``, ``length``, ``offset`` fields;
   it will also include other more specific information:
 
-  - whether the sectors contain actual data or not (boolean field ``data``;
-    if false, the sectors are either unallocated or stored as optimized
-    all-zero clusters);
-  - whether the data is known to read as zero (boolean field ``zero``);
-  - in order to make the output shorter, the target file is expressed as
-    a ``depth``; for example, a depth of 2 refers to the backing file
-    of the backing file of *FILENAME*.
+  - boolean field ``data``: true if the sectors contain actual data,
+    false if the sectors are either unallocated or stored as optimized
+    all-zero clusters
+  - boolean field ``zero``: true if the data is known to read as zero
+  - boolean field ``present``: true if the data belongs to the backing
+    chain, false if rebasing the backing chain onto a deeper file
+    would pick up data from the deeper file;
+  - integer field ``depth``: the depth within the backing chain at
+    which the data was resolved; for example, a depth of 2 refers to
+    the backing file of the backing file of *FILENAME*.
 
   In JSON format, the ``offset`` field is optional; it is absent in
   cases where ``human`` format would omit the entry or exit with an error.
@@ -674,7 +684,7 @@ Command description:
 
   Safe mode
     This is the default mode and performs a real rebase operation. The
-    new backing file may differ from the old one and qemu-img rebase
+    new backing file may differ from the old one and ``qemu-img rebase``
     will take care of keeping the guest-visible content of *FILENAME*
     unchanged.
 
@@ -687,7 +697,7 @@ Command description:
     exists.
 
   Unsafe mode
-    qemu-img uses the unsafe mode if ``-u`` is specified. In this
+    ``qemu-img`` uses the unsafe mode if ``-u`` is specified. In this
     mode, only the backing file name and format of *FILENAME* is changed
     without any checks on the file contents. The user must take care of
     specifying the correct new backing file, or the guest-visible
@@ -725,7 +735,7 @@ Command description:
   sizes accordingly.  Failure to do so will result in data loss!
 
   When shrinking images, the ``--shrink`` option must be given. This informs
-  qemu-img that the user acknowledges all loss of data beyond the truncated
+  ``qemu-img`` that the user acknowledges all loss of data beyond the truncated
   image's end.
 
   After using this command to grow a disk image, you must use file system and
@@ -866,6 +876,37 @@ Supported image file formats:
     issue ``lsattr filename`` to check if the NOCOW flag is set or not
     (Capital 'C' is NOCOW flag).
 
+  ``data_file``
+    Filename where all guest data will be stored. If this option is used,
+    the qcow2 file will only contain the image's metadata.
+
+    Note: Data loss will occur if the given filename already exists when
+    using this option with ``qemu-img create`` since ``qemu-img`` will create
+    the data file anew, overwriting the file's original contents. To simply
+    update the reference to point to the given pre-existing file, use
+    ``qemu-img amend``.
+
+  ``data_file_raw``
+    If this option is set to ``on``, QEMU will always keep the external data
+    file consistent as a standalone read-only raw image.
+
+    It does this by forwarding all write accesses to the qcow2 file through to
+    the raw data file, including their offsets. Therefore, data that is visible
+    on the qcow2 node (i.e., to the guest) at some offset is visible at the same
+    offset in the raw data file. This results in a read-only raw image. Writes
+    that bypass the qcow2 metadata may corrupt the qcow2 metadata because the
+    out-of-band writes may result in the metadata falling out of sync with the
+    raw image.
+
+    If this option is ``off``, QEMU will use the data file to store data in an
+    arbitrary manner. The file’s content will not make sense without the
+    accompanying qcow2 metadata. Where data is written will have no relation to
+    its offset as seen by the guest, and some writes (specifically zero writes)
+    may not be forwarded to the data file at all, but will only be handled by
+    modifying qcow2 metadata.
+
+    This option can only be enabled if ``data_file`` is set.
+
 ``Other``
 
   QEMU also supports various other image file formats for
diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index ee862fa0bc0..6031f968931 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -1,3 +1,4 @@
+=====================================
 QEMU Disk Network Block Device Server
 =====================================
 
@@ -30,14 +31,14 @@ driver options if ``--image-opts`` is specified.
 
 *dev* is an NBD device.
 
-.. option:: --object type,id=ID,...props...
+.. option:: --object type,id=ID,...
 
   Define a new instance of the *type* object class identified by *ID*.
   See the :manpage:`qemu(1)` manual page for full details of the properties
   supported. The common object types that it makes sense to define are the
   ``secret`` object, which is used to supply passwords and/or encryption
   keys, and the ``tls-creds`` object, which is used to supply TLS
-  credentials for the qemu-nbd server or client.
+  credentials for the ``qemu-nbd`` server or client.
 
 .. option:: -p, --port=PORT
 
@@ -98,8 +99,10 @@ driver options if ``--image-opts`` is specified.
 
 .. option:: --cache=CACHE
 
-  The cache mode to be used with the file.  See the documentation of
-  the emulator's ``-drive cache=...`` option for allowed values.
+  The cache mode to be used with the file. Valid values are:
+  ``none``, ``writeback`` (the default), ``writethrough``,
+  ``directsync`` and ``unsafe``. See the documentation of
+  the emulator's ``-drive cache=...`` option for more info.
 
 .. option:: -n, --nocache
 
@@ -235,7 +238,7 @@ daemon:
 Expose the guest-visible contents of a qcow2 file via a block device
 /dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for
 partitions found within), then disconnect the device when done.
-Access to bind qemu-nbd to an /dev/nbd device generally requires root
+Access to bind ``qemu-nbd`` to a /dev/nbd device generally requires root
 privileges, and may also require the execution of ``modprobe nbd``
 to enable the kernel NBD client module.  *CAUTION*: Do not use
 this method to mount filesystems from an untrusted guest image - a
diff --git a/docs/tools/qemu-pr-helper.rst b/docs/tools/qemu-pr-helper.rst
index ac036180ac1..eaebe40da0e 100644
--- a/docs/tools/qemu-pr-helper.rst
+++ b/docs/tools/qemu-pr-helper.rst
@@ -1,3 +1,4 @@
+==================================
 QEMU persistent reservation helper
 ==================================
 
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index 3ec4bdd9145..3e5a9dc0320 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -1,3 +1,4 @@
+===================
 QEMU Storage Daemon
 ===================
 
@@ -9,9 +10,10 @@ Synopsis
 Description
 -----------
 
-qemu-storage-daemon provides disk image functionality from QEMU, qemu-img, and
-qemu-nbd in a long-running process controlled via QMP commands without running
-a virtual machine. It can export disk images, run block job operations, and
+``qemu-storage-daemon`` provides disk image functionality from QEMU,
+``qemu-img``, and ``qemu-nbd`` in a long-running process controlled via QMP
+commands without running a virtual machine.
+It can export disk images, run block job operations, and
 perform other disk-related operations. The daemon is controlled via a QMP
 monitor and initial configuration from the command-line.
 
diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst
index fb70445c751..d53073b52b6 100644
--- a/docs/tools/qemu-trace-stap.rst
+++ b/docs/tools/qemu-trace-stap.rst
@@ -1,3 +1,4 @@
+=========================
 QEMU SystemTap trace tool
 =========================
 
diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
index 00554c75bd7..07ac0be5511 100644
--- a/docs/tools/virtiofsd.rst
+++ b/docs/tools/virtiofsd.rst
@@ -101,6 +101,9 @@ Options
     Enable/disable extended attributes (xattr) on files and directories.  The
     default is ``no_xattr``.
 
+  * posix_acl|no_posix_acl -
+    Enable/disable posix acl support.  Posix ACLs are disabled by default.
+
 .. option:: --socket-path=PATH
 
   Listen on vhost-user UNIX domain socket at PATH.
@@ -127,14 +130,17 @@ Options
   timeout.  ``always`` sets a long cache lifetime at the expense of coherency.
   The default is ``auto``.
 
-xattr-mapping
--------------
+Extended attribute (xattr) mapping
+----------------------------------
 
 By default the name of xattr's used by the client are passed through to the server
 file system.  This can be a problem where either those xattr names are used
 by something on the server (e.g. selinux client/server confusion) or if the
-virtiofsd is running in a container with restricted privileges where it cannot
-access some attributes.
+``virtiofsd`` is running in a container with restricted privileges where it
+cannot access some attributes.
+
+Mapping syntax
+~~~~~~~~~~~~~~
 
 A mapping of xattr names can be made using -o xattrmap=mapping where the ``mapping``
 string consists of a series of rules.
@@ -177,6 +183,12 @@ Using ':' as the separator a rule is of the form:
   'ok' as either an explicit terminator or for special handling of certain
   patterns.
 
+- 'unsupported' - If a client tries to use a name matching 'key' it's
+  denied using ENOTSUP; when the server passes an attribute
+  name matching 'prepend' it's hidden.  In many ways it's use is very like
+  'ok' as either an explicit terminator or for special handling of certain
+  patterns.
+
 **key** is a string tested as a prefix on an attribute name originating
 on the client.  It maybe empty in which case a 'client' rule
 will always match on client names.
@@ -232,14 +244,54 @@ Note: When the 'security.capability' xattr is remapped, the daemon has to do
 extra work to remove it during many operations, which the host kernel normally
 does itself.
 
-xattr-mapping Examples
-----------------------
+Security considerations
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Operating systems typically partition the xattr namespace using
+well defined name prefixes. Each partition may have different
+access controls applied. For example, on Linux there are multiple
+partitions
+
+ * ``system.*`` - access varies depending on attribute & filesystem
+ * ``security.*`` - only processes with CAP_SYS_ADMIN
+ * ``trusted.*`` - only processes with CAP_SYS_ADMIN
+ * ``user.*`` - any process granted by file permissions / ownership
+
+While other OS such as FreeBSD have different name prefixes
+and access control rules.
+
+When remapping attributes on the host, it is important to
+ensure that the remapping does not allow a guest user to
+evade the guest access control rules.
+
+Consider if ``trusted.*`` from the guest was remapped to
+``user.virtiofs.trusted*`` in the host. An unprivileged
+user in a Linux guest has the ability to write to xattrs
+under ``user.*``. Thus the user can evade the access
+control restriction on ``trusted.*`` by instead writing
+to ``user.virtiofs.trusted.*``.
+
+As noted above, the partitions used and access controls
+applied, will vary across guest OS, so it is not wise to
+try to predict what the guest OS will use.
+
+The simplest way to avoid an insecure configuration is
+to remap all xattrs at once, to a given fixed prefix.
+This is shown in example (1) below.
+
+If selectively mapping only a subset of xattr prefixes,
+then rules must be added to explicitly block direct
+access to the target of the remapping. This is shown
+in example (2) below.
+
+Mapping examples
+~~~~~~~~~~~~~~~~
 
 1) Prefix all attributes with 'user.virtiofs.'
 
 ::
 
--o xattrmap=":prefix:all::user.virtiofs.::bad:all:::"
+ -o xattrmap=":prefix:all::user.virtiofs.::bad:all:::"
 
 
 This uses two rules, using : as the field separator;
@@ -250,7 +302,8 @@ the host set.
 This is equivalent to the 'map' rule:
 
 ::
--o xattrmap=":map::user.virtiofs.:"
+
+ -o xattrmap=":map::user.virtiofs.:"
 
 2) Prefix 'trusted.' attributes, allow others through
 
@@ -270,14 +323,17 @@ stripping of 'user.virtiofs.'.
 The second rule hides unprefixed 'trusted.' attributes
 on the host.
 The third rule stops a guest from explicitly setting
-the 'user.virtiofs.' path directly.
+the 'user.virtiofs.' path directly to prevent access
+control bypass on the target of the earlier prefix
+remapping.
 Finally, the fourth rule lets all remaining attributes
 through.
 
 This is equivalent to the 'map' rule:
 
 ::
--o xattrmap="/map/trusted./user.virtiofs./"
+
+ -o xattrmap="/map/trusted./user.virtiofs./"
 
 3) Hide 'security.' attributes, and allow everything else
 
@@ -298,13 +354,13 @@ Examples
 Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket
 ``/var/run/vm001-vhost-fs.sock``:
 
-::
+.. parsed-literal::
 
   host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001
-  host# qemu-system-x86_64 \
-      -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \
-      -device vhost-user-fs-pci,chardev=char0,tag=myfs \
-      -object memory-backend-memfd,id=mem,size=4G,share=on \
-      -numa node,memdev=mem \
-      ...
+  host# |qemu_system| \\
+        -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \\
+        -device vhost-user-fs-pci,chardev=char0,tag=myfs \\
+        -object memory-backend-memfd,id=mem,size=4G,share=on \\
+        -numa node,memdev=mem \\
+        ...
   guest# mount -t virtiofs myfs /mnt
diff --git a/docs/u2f.txt b/docs/u2f.txt
index 8f44994818a..7f5813a0b72 100644
--- a/docs/u2f.txt
+++ b/docs/u2f.txt
@@ -21,7 +21,7 @@ The second factor is materialized by a device implementing the U2F
 protocol. In case of a USB U2F security key, it is a USB HID device
 that implements the U2F protocol.
 
-In Qemu, the USB U2F key device offers a dedicated support of U2F, allowing
+In QEMU, the USB U2F key device offers a dedicated support of U2F, allowing
 guest USB FIDO/U2F security keys operating in two possible modes:
 pass-through and emulated.
 
diff --git a/docs/usb-storage.txt b/docs/usb-storage.txt
deleted file mode 100644
index 551af6f88bb..00000000000
--- a/docs/usb-storage.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-
-qemu usb storage emulation
---------------------------
-
-QEMU has three devices for usb storage emulation.
-
-Number one emulates the classic bulk-only transport protocol which is
-used by 99% of the usb sticks on the market today and is called
-"usb-storage".  Usage (hooking up to xhci, other host controllers work
-too):
-
-  qemu ${other_vm_args}                                \
-       -drive if=none,id=stick,file=/path/to/file.img  \
-       -device nec-usb-xhci,id=xhci                    \
-       -device usb-storage,bus=xhci.0,drive=stick
-
-
-Number two is the newer usb attached scsi transport.  This one doesn't
-automagically create a scsi disk, so you have to explicitly attach one
-manually.  Multiple logical units are supported.  Here is an example
-with tree logical units:
-
-  qemu ${other_vm_args}                                                \
-       -drive if=none,id=uas-disk1,file=/path/to/file1.img             \
-       -drive if=none,id=uas-disk2,file=/path/to/file2.img             \
-       -drive if=none,id=uas-cdrom,media=cdrom,file=/path/to/image.iso \
-       -device nec-usb-xhci,id=xhci                                    \
-       -device usb-uas,id=uas,bus=xhci.0                               \
-       -device scsi-hd,bus=uas.0,scsi-id=0,lun=0,drive=uas-disk1       \
-       -device scsi-hd,bus=uas.0,scsi-id=0,lun=1,drive=uas-disk2       \
-       -device scsi-cd,bus=uas.0,scsi-id=0,lun=5,drive=uas-cdrom
-
-
-Number three emulates the classic bulk-only transport protocol too.
-It's called "usb-bot".  It shares most code with "usb-storage", and
-the guest will not be able to see the difference.  The qemu command
-line interface is similar to usb-uas though, i.e. no automatic scsi
-disk creation.  It also features support for up to 16 LUNs.  The LUN
-numbers must be continuous, i.e. for three devices you must use 0+1+2.
-The 0+1+5 numbering from the "usb-uas" example isn't going to work
-with "usb-bot".
-
-Starting with qemu version 2.7 usb-bot and usb-uas devices can be
-hotplugged.  In the hotplug case they are added with "attached =
-false" so the guest will not see the device until the "attached"
-property is explicitly set to true.  That allows to attach one or more
-scsi devices before making the device visible to the guest, i.e. the
-workflow looks like this:
-
-   (1) device-add usb-bot,id=foo
-   (2) device-add scsi-{hd,cd},bus=foo.0,lun=0
-   (2b) optionally add more devices (luns 1 ... 15).
-   (3) scripts/qmp/qom-set foo.attached = true
-
-enjoy,
-  Gerd
-
---
-Gerd Hoffmann <kraxel@redhat.com>
diff --git a/docs/usb2.txt b/docs/usb2.txt
deleted file mode 100644
index 172614d3a7e..00000000000
--- a/docs/usb2.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-
-USB Quick Start
-===============
-
-XHCI controller support
------------------------
-
-QEMU has XHCI host adapter support.  The XHCI hardware design is much
-more virtualization-friendly when compared to EHCI and UHCI, thus XHCI
-emulation uses less resources (especially cpu).  So if your guest
-supports XHCI (which should be the case for any operating system
-released around 2010 or later) we recommend using it:
-
-    qemu -device qemu-xhci
-
-XHCI supports USB 1.1, USB 2.0 and USB 3.0 devices, so this is the
-only controller you need.  With only a single USB controller (and
-therefore only a single USB bus) present in the system there is no
-need to use the bus= parameter when adding USB devices.
-
-
-EHCI controller support
------------------------
-
-The QEMU EHCI Adapter supports USB 2.0 devices.  It can be used either
-standalone or with companion controllers (UHCI, OHCI) for USB 1.1
-devices.  The companion controller setup is more convenient to use
-because it provides a single USB bus supporting both USB 2.0 and USB
-1.1 devices.  See next section for details.
-
-When running EHCI in standalone mode you can add UHCI or OHCI
-controllers for USB 1.1 devices too.  Each controller creates its own
-bus though, so there are two completely separate USB buses: One USB
-1.1 bus driven by the UHCI controller and one USB 2.0 bus driven by
-the EHCI controller.  Devices must be attached to the correct
-controller manually.
-
-The easiest way to add a UHCI controller to a 'pc' machine is the
-'-usb' switch.  QEMU will create the UHCI controller as function of
-the PIIX3 chipset.  The USB 1.1 bus will carry the name "usb-bus.0".
-
-You can use the standard -device switch to add a EHCI controller to
-your virtual machine.  It is strongly recommended to specify an ID for
-the controller so the USB 2.0 bus gets an individual name, for example
-'-device usb-ehci,id=ehci".  This will give you a USB 2.0 bus named
-"ehci.0".
-
-When adding USB devices using the -device switch you can specify the
-bus they should be attached to.  Here is a complete example:
-
-    qemu -M pc ${otheroptions}                           \
-        -drive if=none,id=usbstick,file=/path/to/image   \
-        -usb                                             \
-        -device usb-ehci,id=ehci                         \
-        -device usb-tablet,bus=usb-bus.0                 \
-        -device usb-storage,bus=ehci.0,drive=usbstick
-
-This attaches a USB tablet to the UHCI adapter and a USB mass storage
-device to the EHCI adapter.
-
-
-Companion controller support
-----------------------------
-
-The UHCI and OHCI controllers can attach to a USB bus created by EHCI
-as companion controllers.  This is done by specifying the masterbus
-and firstport properties.  masterbus specifies the bus name the
-controller should attach to.  firstport specifies the first port the
-controller should attach to, which is needed as usually one EHCI
-controller with six ports has three UHCI companion controllers with
-two ports each.
-
-There is a config file in docs which will do all this for
-you, just try ...
-
-    qemu -readconfig docs/config/ich9-ehci-uhci.cfg
-
-... then use "bus=ehci.0" to assign your USB devices to that bus.
-
-Using the '-usb' switch for 'q35' machines will create a similar
-USB controller configuration.
-
-
-More USB tips & tricks
-======================
-
-Recently the USB pass through driver (also known as usb-host) and the
-QEMU USB subsystem gained a few capabilities which are available only
-via qdev properties, i,e. when using '-device'.
-
-
-physical port addressing
-------------------------
-
-First you can (for all USB devices) specify the physical port where
-the device will show up in the guest.  This can be done using the
-"port" property.  UHCI has two root ports (1,2).  EHCI has six root
-ports (1-6), the emulated (1.1) USB hub has eight ports.
-
-Plugging a tablet into UHCI port 1 works like this:
-
-        -device usb-tablet,bus=usb-bus.0,port=1
-
-Plugging a hub into UHCI port 2 works like this:
-
-        -device usb-hub,bus=usb-bus.0,port=2
-
-Plugging a virtual USB stick into port 4 of the hub just plugged works
-this way:
-
-        -device usb-storage,bus=usb-bus.0,port=2.4,drive=...
-
-You can do basically the same in the monitor using the device_add
-command.  If you want to unplug devices too you should specify some
-unique id which you can use to refer to the device ...
-
-        (qemu) device_add usb-tablet,bus=usb-bus.0,port=1,id=my-tablet
-        (qemu) device_del my-tablet
-
-... when unplugging it with device_del.
-
-
-USB pass through hints
-----------------------
-
-The usb-host driver has a bunch of properties to specify the device
-which should be passed to the guest:
-
-  hostbus=<nr> -- Specifies the bus number the device must be attached
-  to.
-
-  hostaddr=<nr> -- Specifies the device address the device got
-  assigned by the guest os.
-
-  hostport=<str> -- Specifies the physical port the device is attached
-  to.
-
-  vendorid=<hexnr> -- Specifies the vendor ID of the device.
-  productid=<hexnr> -- Specifies the product ID of the device.
-
-In theory you can combine all these properties as you like.  In
-practice only a few combinations are useful:
-
-  (1) vendorid+productid -- match for a specific device, pass it to
-      the guest when it shows up somewhere in the host.
-
-  (2) hostbus+hostport -- match for a specific physical port in the
-      host, any device which is plugged in there gets passed to the
-      guest.
-
-  (3) hostbus+hostaddr -- most useful for ad-hoc pass through as the
-      hostaddr isn't stable, the next time you plug in the device it
-      gets a new one ...
-
-Note that USB 1.1 devices are handled by UHCI/OHCI and USB 2.0 by
-EHCI.  That means a device plugged into the very same physical port
-may show up on different buses depending on the speed.  The port I'm
-using for testing is bus 1 + port 1 for 2.0 devices and bus 3 + port 1
-for 1.1 devices.  Passing through any device plugged into that port
-and also assign them to the correct bus can be done this way:
-
-    qemu -M pc ${otheroptions}                               \
-        -usb                                                 \
-        -device usb-ehci,id=ehci                             \
-        -device usb-host,bus=usb-bus.0,hostbus=3,hostport=1  \
-        -device usb-host,bus=ehci.0,hostbus=1,hostport=1
-
-enjoy,
-  Gerd
-
---
-Gerd Hoffmann <kraxel@redhat.com>
diff --git a/docs/user/_templates/editpage.html b/docs/user/_templates/editpage.html
deleted file mode 100644
index 1f5ee01e606..00000000000
--- a/docs/user/_templates/editpage.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div id="editpage">
-  <ul>
-    <li><a href="https://gitlab.com/qemu-project/qemu/-/blob/master/docs/user/{{pagename}}.rst">Page source</a></li>
-  </ul>
-</div>
diff --git a/docs/user/index.rst b/docs/user/index.rst
index a5b47459ec7..2c4e29f3dbc 100644
--- a/docs/user/index.rst
+++ b/docs/user/index.rst
@@ -1,15 +1,11 @@
-.. This is the top level page for the 'user' manual.
-
-
+-------------------
 User Mode Emulation
-===================
+-------------------
 
-This manual is the overall guide for users using QEMU
+This section of the manual is the overall guide for users using QEMU
 for user-mode emulation.  In this mode, QEMU can launch
 processes compiled for one CPU on another CPU.
 
-Contents:
-
 .. toctree::
    :maxdepth: 2
 
diff --git a/dtc b/dtc
index 85e5d839847..b6910bec116 160000
--- a/dtc
+++ b/dtc
@@ -1 +1 @@
-Subproject commit 85e5d839847af54efab170f2b1331b2a6421e647
+Subproject commit b6910bec11614980a21e46fbccc35934b671bd81
diff --git a/dump/dump.c b/dump/dump.c
index 929138e91d0..662d0a62cd9 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -15,7 +15,6 @@
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "elf.h"
-#include "cpu.h"
 #include "exec/hwaddr.h"
 #include "monitor/monitor.h"
 #include "sysemu/kvm.h"
@@ -30,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "hw/misc/vmcoreinfo.h"
+#include "migration/blocker.h"
 
 #ifdef TARGET_X86_64
 #include "win_dump.h"
@@ -48,6 +48,8 @@
 
 #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */
 
+static Error *dump_migration_blocker;
+
 #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
     ((DIV_ROUND_UP((hdr_size), 4) +                     \
       DIV_ROUND_UP((name_size), 4) +                    \
@@ -102,6 +104,7 @@ static int dump_cleanup(DumpState *s)
             qemu_mutex_unlock_iothread();
         }
     }
+    migrate_del_blocker(dump_migration_blocker);
 
     return 0;
 }
@@ -2006,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file,
         return;
     }
 
+    if (!dump_migration_blocker) {
+        error_setg(&dump_migration_blocker,
+                   "Live migration disabled: dump-guest-memory in progress");
+    }
+
+    /*
+     * Allows even for -only-migratable, but forbid migration during the
+     * process of dump guest memory.
+     */
+    if (migrate_add_blocker_internal(dump_migration_blocker, errp)) {
+        /* Remember to release the fd before passing it over to dump state */
+        close(fd);
+        return;
+    }
+
     s = &dump_state_global;
     dump_state_prepare(s);
 
diff --git a/dump/win_dump.c b/dump/win_dump.c
index 652c7bad995..c5eb5a9aacd 100644
--- a/dump/win_dump.c
+++ b/dump/win_dump.c
@@ -12,7 +12,6 @@
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "elf.h"
-#include "cpu.h"
 #include "exec/hwaddr.h"
 #include "monitor/monitor.h"
 #include "sysemu/kvm.h"
diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
new file mode 100644
index 00000000000..e71e229190d
--- /dev/null
+++ b/ebpf/ebpf_rss-stub.c
@@ -0,0 +1,40 @@
+/*
+ * eBPF RSS stub file
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Yuri Benditovich <yuri.benditovich@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "ebpf/ebpf_rss.h"
+
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
+{
+
+}
+
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
+{
+    return false;
+}
+
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
+{
+    return false;
+}
+
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+                      uint16_t *indirections_table, uint8_t *toeplitz_key)
+{
+    return false;
+}
+
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
+{
+
+}
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
new file mode 100644
index 00000000000..118c68da831
--- /dev/null
+++ b/ebpf/ebpf_rss.c
@@ -0,0 +1,165 @@
+/*
+ * eBPF RSS loader
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *  Yuri Benditovich <yuri.benditovich@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */
+
+#include "ebpf/ebpf_rss.h"
+#include "ebpf/rss.bpf.skeleton.h"
+#include "trace.h"
+
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
+{
+    if (ctx != NULL) {
+        ctx->obj = NULL;
+    }
+}
+
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
+{
+    return ctx != NULL && ctx->obj != NULL;
+}
+
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
+{
+    struct rss_bpf *rss_bpf_ctx;
+
+    if (ctx == NULL) {
+        return false;
+    }
+
+    rss_bpf_ctx = rss_bpf__open();
+    if (rss_bpf_ctx == NULL) {
+        trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
+        goto error;
+    }
+
+    bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);
+
+    if (rss_bpf__load(rss_bpf_ctx)) {
+        trace_ebpf_error("eBPF RSS", "can not load RSS program");
+        goto error;
+    }
+
+    ctx->obj = rss_bpf_ctx;
+    ctx->program_fd = bpf_program__fd(
+            rss_bpf_ctx->progs.tun_rss_steering_prog);
+    ctx->map_configuration = bpf_map__fd(
+            rss_bpf_ctx->maps.tap_rss_map_configurations);
+    ctx->map_indirections_table = bpf_map__fd(
+            rss_bpf_ctx->maps.tap_rss_map_indirection_table);
+    ctx->map_toeplitz_key = bpf_map__fd(
+            rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
+
+    return true;
+error:
+    rss_bpf__destroy(rss_bpf_ctx);
+    ctx->obj = NULL;
+
+    return false;
+}
+
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
+                                struct EBPFRSSConfig *config)
+{
+    uint32_t map_key = 0;
+
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return false;
+    }
+    if (bpf_map_update_elem(ctx->map_configuration,
+                            &map_key, config, 0) < 0) {
+        return false;
+    }
+    return true;
+}
+
+static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
+                                            uint16_t *indirections_table,
+                                            size_t len)
+{
+    uint32_t i = 0;
+
+    if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
+       len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
+        return false;
+    }
+
+    for (; i < len; ++i) {
+        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
+                                indirections_table + i, 0) < 0) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
+                                     uint8_t *toeplitz_key)
+{
+    uint32_t map_key = 0;
+
+    /* prepare toeplitz key */
+    uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
+
+    if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
+        return false;
+    }
+    memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
+    *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
+
+    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
+                            0) < 0) {
+        return false;
+    }
+    return true;
+}
+
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+                      uint16_t *indirections_table, uint8_t *toeplitz_key)
+{
+    if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
+        indirections_table == NULL || toeplitz_key == NULL) {
+        return false;
+    }
+
+    if (!ebpf_rss_set_config(ctx, config)) {
+        return false;
+    }
+
+    if (!ebpf_rss_set_indirections_table(ctx, indirections_table,
+                                      config->indirections_len)) {
+        return false;
+    }
+
+    if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) {
+        return false;
+    }
+
+    return true;
+}
+
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
+{
+    if (!ebpf_rss_is_loaded(ctx)) {
+        return;
+    }
+
+    rss_bpf__destroy(ctx->obj);
+    ctx->obj = NULL;
+}
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
new file mode 100644
index 00000000000..bf3f2572c7c
--- /dev/null
+++ b/ebpf/ebpf_rss.h
@@ -0,0 +1,44 @@
+/*
+ * eBPF RSS header
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *  Yuri Benditovich <yuri.benditovich@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_EBPF_RSS_H
+#define QEMU_EBPF_RSS_H
+
+struct EBPFRSSContext {
+    void *obj;
+    int program_fd;
+    int map_configuration;
+    int map_toeplitz_key;
+    int map_indirections_table;
+};
+
+struct EBPFRSSConfig {
+    uint8_t redirect;
+    uint8_t populate_hash;
+    uint32_t hash_types;
+    uint16_t indirections_len;
+    uint16_t default_queue;
+} __attribute__((packed));
+
+void ebpf_rss_init(struct EBPFRSSContext *ctx);
+
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
+
+bool ebpf_rss_load(struct EBPFRSSContext *ctx);
+
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
+                      uint16_t *indirections_table, uint8_t *toeplitz_key);
+
+void ebpf_rss_unload(struct EBPFRSSContext *ctx);
+
+#endif /* QEMU_EBPF_RSS_H */
diff --git a/ebpf/meson.build b/ebpf/meson.build
new file mode 100644
index 00000000000..2dd0fd89480
--- /dev/null
+++ b/ebpf/meson.build
@@ -0,0 +1 @@
+softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
new file mode 100644
index 00000000000..126683eb878
--- /dev/null
+++ b/ebpf/rss.bpf.skeleton.h
@@ -0,0 +1,431 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* THIS FILE IS AUTOGENERATED! */
+#ifndef __RSS_BPF_SKEL_H__
+#define __RSS_BPF_SKEL_H__
+
+#include <stdlib.h>
+#include <bpf/libbpf.h>
+
+struct rss_bpf {
+	struct bpf_object_skeleton *skeleton;
+	struct bpf_object *obj;
+	struct {
+		struct bpf_map *tap_rss_map_configurations;
+		struct bpf_map *tap_rss_map_indirection_table;
+		struct bpf_map *tap_rss_map_toeplitz_key;
+	} maps;
+	struct {
+		struct bpf_program *tun_rss_steering_prog;
+	} progs;
+	struct {
+		struct bpf_link *tun_rss_steering_prog;
+	} links;
+};
+
+static void
+rss_bpf__destroy(struct rss_bpf *obj)
+{
+	if (!obj)
+		return;
+	if (obj->skeleton)
+		bpf_object__destroy_skeleton(obj->skeleton);
+	free(obj);
+}
+
+static inline int
+rss_bpf__create_skeleton(struct rss_bpf *obj);
+
+static inline struct rss_bpf *
+rss_bpf__open_opts(const struct bpf_object_open_opts *opts)
+{
+	struct rss_bpf *obj;
+
+	obj = (struct rss_bpf *)calloc(1, sizeof(*obj));
+	if (!obj)
+		return NULL;
+	if (rss_bpf__create_skeleton(obj))
+		goto err;
+	if (bpf_object__open_skeleton(obj->skeleton, opts))
+		goto err;
+
+	return obj;
+err:
+	rss_bpf__destroy(obj);
+	return NULL;
+}
+
+static inline struct rss_bpf *
+rss_bpf__open(void)
+{
+	return rss_bpf__open_opts(NULL);
+}
+
+static inline int
+rss_bpf__load(struct rss_bpf *obj)
+{
+	return bpf_object__load_skeleton(obj->skeleton);
+}
+
+static inline struct rss_bpf *
+rss_bpf__open_and_load(void)
+{
+	struct rss_bpf *obj;
+
+	obj = rss_bpf__open();
+	if (!obj)
+		return NULL;
+	if (rss_bpf__load(obj)) {
+		rss_bpf__destroy(obj);
+		return NULL;
+	}
+	return obj;
+}
+
+static inline int
+rss_bpf__attach(struct rss_bpf *obj)
+{
+	return bpf_object__attach_skeleton(obj->skeleton);
+}
+
+static inline void
+rss_bpf__detach(struct rss_bpf *obj)
+{
+	return bpf_object__detach_skeleton(obj->skeleton);
+}
+
+static inline int
+rss_bpf__create_skeleton(struct rss_bpf *obj)
+{
+	struct bpf_object_skeleton *s;
+
+	s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
+	if (!s)
+		return -1;
+	obj->skeleton = s;
+
+	s->sz = sizeof(*s);
+	s->name = "rss_bpf";
+	s->obj = &obj->obj;
+
+	/* maps */
+	s->map_cnt = 3;
+	s->map_skel_sz = sizeof(*s->maps);
+	s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
+	if (!s->maps)
+		goto err;
+
+	s->maps[0].name = "tap_rss_map_configurations";
+	s->maps[0].map = &obj->maps.tap_rss_map_configurations;
+
+	s->maps[1].name = "tap_rss_map_indirection_table";
+	s->maps[1].map = &obj->maps.tap_rss_map_indirection_table;
+
+	s->maps[2].name = "tap_rss_map_toeplitz_key";
+	s->maps[2].map = &obj->maps.tap_rss_map_toeplitz_key;
+
+	/* programs */
+	s->prog_cnt = 1;
+	s->prog_skel_sz = sizeof(*s->progs);
+	s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
+	if (!s->progs)
+		goto err;
+
+	s->progs[0].name = "tun_rss_steering_prog";
+	s->progs[0].prog = &obj->progs.tun_rss_steering_prog;
+	s->progs[0].link = &obj->links.tun_rss_steering_prog;
+
+	s->data_sz = 8088;
+	s->data = (void *)"\
+\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x18\x1d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\
+\x01\0\xbf\x18\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
+\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\
+\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x66\x02\0\0\0\0\xbf\x79\0\0\
+\0\0\0\0\x15\x09\x64\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
+\0\x5d\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
+\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
+\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
+\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
+\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
+\xff\0\0\0\0\x15\x08\x4c\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
+\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
+\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
+\x77\0\0\0\x20\0\0\0\x55\0\x11\0\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\xff\
+\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\x55\
+\x03\x0c\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
+\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\
+\x85\0\0\0\x44\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\
+\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x2f\x02\0\0\0\0\x15\x01\x2e\x02\0\0\0\0\x7b\
+\x9a\x30\xff\0\0\0\0\x15\x01\x57\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\x7b\x7a\
+\x20\xff\0\0\0\0\xb7\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\
+\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\
+\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\
+\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\
+\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x1a\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\
+\0\0\x55\x01\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\
+\x5c\xff\0\0\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x60\xff\0\0\0\0\x73\x7a\x56\
+\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\
+\0\0\x57\x01\0\0\x3c\0\0\0\x7b\x1a\x40\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\xbf\
+\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\0\x15\x01\x19\0\0\0\0\0\x71\xa1\x56\xff\0\
+\0\0\0\x55\x01\x17\0\0\0\0\0\x57\x09\0\0\xff\0\0\0\x15\x09\x7a\x01\x11\0\0\0\
+\x55\x09\x14\0\x06\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x53\xff\0\0\0\0\xb7\x01\
+\0\0\0\0\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\xf4\x01\0\0\0\0\x69\xa1\
+\xd0\xff\0\0\0\0\x6b\x1a\x58\xff\0\0\0\0\x69\xa1\xd2\xff\0\0\0\0\x6b\x1a\x5a\
+\xff\0\0\0\0\x71\xa1\x50\xff\0\0\0\0\x15\x01\xd4\0\0\0\0\0\x71\x62\x03\0\0\0\0\
+\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\x12\0\0\0\0\0\0\x71\x63\x04\
+\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\0\0\x4f\x31\0\0\0\0\0\0\x67\
+\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\xff\0\0\0\0\x79\xa0\x30\xff\
+\0\0\0\0\x15\x02\x06\x01\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x02\0\0\0\x15\
+\x02\x03\x01\0\0\0\0\x61\xa1\x5c\xff\0\0\0\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\
+\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x69\xa1\x58\xff\0\0\0\0\x6b\x1a\xa8\
+\xff\0\0\0\0\x69\xa1\x5a\xff\0\0\0\0\x6b\x1a\xaa\xff\0\0\0\0\x05\0\x65\x01\0\0\
+\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x51\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\
+\xf0\xff\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\
+\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\
+\xff\xff\xb7\x01\0\0\x28\0\0\0\x7b\x1a\x40\xff\0\0\0\0\xbf\x81\0\0\0\0\0\0\xb7\
+\x02\0\0\0\0\0\0\xb7\x04\0\0\x28\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\
+\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x10\x01\0\0\0\0\x79\xa1\xe0\
+\xff\0\0\0\0\x63\x1a\x64\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x68\xff\0\0\
+\0\0\x79\xa1\xd8\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\
+\x1a\x60\xff\0\0\0\0\x79\xa1\xe8\xff\0\0\0\0\x63\x1a\x6c\xff\0\0\0\0\x77\x01\0\
+\0\x20\0\0\0\x63\x1a\x70\xff\0\0\0\0\x79\xa1\xf0\xff\0\0\0\0\x63\x1a\x74\xff\0\
+\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x78\xff\0\0\0\0\x71\xa9\xd6\xff\0\0\0\0\
+\x25\x09\xff\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\0\0\x18\x02\0\0\
+\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\0\
+\xf8\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x6b\x1a\xfe\xff\0\0\0\0\xb7\x01\0\0\x28\0\0\
+\0\x7b\x1a\x40\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x8c\xff\xff\xff\x7b\
+\x1a\x18\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x7c\xff\xff\xff\x7b\x1a\
+\x10\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\x28\xff\0\0\0\0\x7b\x7a\x20\xff\0\
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xfe\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x90\
+\x01\0\0\0\0\xbf\x91\0\0\0\0\0\0\x15\x01\x23\0\x3c\0\0\0\x15\x01\x59\0\x2c\0\0\
+\0\x55\x01\x5a\0\x2b\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xf8\xff\0\0\0\0\xbf\xa3\
+\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\
+\0\0\0\xb7\x04\0\0\x04\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\
+\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x03\x01\0\0\0\
+\0\x71\xa1\xfa\xff\0\0\0\0\x55\x01\x4b\0\x02\0\0\0\x71\xa1\xf9\xff\0\0\0\0\x55\
+\x01\x49\0\x02\0\0\0\x71\xa1\xfb\xff\0\0\0\0\x55\x01\x47\0\x01\0\0\0\x79\xa2\
+\x40\xff\0\0\0\0\x07\x02\0\0\x08\0\0\0\xbf\x81\0\0\0\0\0\0\x79\xa3\x18\xff\0\0\
+\0\0\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\
+\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\xf2\0\0\0\0\0\
+\xb7\x01\0\0\x01\0\0\0\x73\x1a\x55\xff\0\0\0\0\x05\0\x39\0\0\0\0\0\xb7\x01\0\0\
+\0\0\0\0\x6b\x1a\xf8\xff\0\0\0\0\xb7\x09\0\0\x02\0\0\0\xb7\x07\0\0\x1e\0\0\0\
+\x05\0\x0e\0\0\0\0\0\x79\xa2\x38\xff\0\0\0\0\x0f\x29\0\0\0\0\0\0\xbf\x92\0\0\0\
+\0\0\0\x07\x02\0\0\x01\0\0\0\x71\xa3\xff\xff\0\0\0\0\x67\x03\0\0\x03\0\0\0\x2d\
+\x23\x02\0\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x2b\0\0\0\0\0\x07\x07\0\0\xff\
+\xff\xff\xff\xbf\x72\0\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x77\x02\0\0\x20\0\0\0\
+\x15\x02\xf9\xff\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\x79\xa1\x40\xff\0\0\0\0\x0f\
+\x19\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\
+\0\0\0\xbf\x92\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\
+\0\x44\0\0\0\xbf\x01\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\
+\x55\x01\x94\0\0\0\0\0\x71\xa2\xf8\xff\0\0\0\0\x55\x02\x0f\0\xc9\0\0\0\x07\x09\
+\0\0\x02\0\0\0\xbf\x81\0\0\0\0\0\0\xbf\x92\0\0\0\0\0\0\x79\xa3\x10\xff\0\0\0\0\
+\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\0\0\
+\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x87\0\0\0\0\0\xb7\
+\x01\0\0\x01\0\0\0\x73\x1a\x54\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x07\0\
+\0\0\0\0\xb7\x09\0\0\x01\0\0\0\x15\x02\xd1\xff\0\0\0\0\x71\xa9\xf9\xff\0\0\0\0\
+\x07\x09\0\0\x02\0\0\0\x05\0\xce\xff\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x56\
+\xff\0\0\0\0\x71\xa1\xff\xff\0\0\0\0\x67\x01\0\0\x03\0\0\0\x79\xa2\x40\xff\0\0\
+\0\0\x0f\x12\0\0\0\0\0\0\x07\x02\0\0\x08\0\0\0\x7b\x2a\x40\xff\0\0\0\0\x71\xa9\
+\xfe\xff\0\0\0\0\x25\x09\x0e\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\
+\0\0\x18\x02\0\0\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\
+\0\0\0\0\0\x05\0\x07\0\0\0\0\0\x79\xa1\x28\xff\0\0\0\0\x07\x01\0\0\x01\0\0\0\
+\x7b\x1a\x28\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\
+\x82\xff\x0b\0\0\0\x05\0\x10\xff\0\0\0\0\x15\x09\xf8\xff\x87\0\0\0\x05\0\xfd\
+\xff\0\0\0\0\x71\xa1\x51\xff\0\0\0\0\x79\xa0\x30\xff\0\0\0\0\x15\x01\x17\x01\0\
+\0\0\0\x71\x62\x03\0\0\0\0\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\
+\x12\0\0\0\0\0\0\x71\x63\x04\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\
+\0\0\x4f\x31\0\0\0\0\0\0\x67\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\
+\xff\0\0\0\0\x15\x02\x3d\0\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x10\0\0\0\
+\x15\x02\x3a\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
+\x07\x03\0\0\x7c\xff\xff\xff\x67\x01\0\0\x38\0\0\0\xc7\x01\0\0\x38\0\0\0\x65\
+\x01\x01\0\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
+\x6c\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\
+\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x65\x01\x01\0\xff\xff\xff\
+\xff\xbf\x43\0\0\0\0\0\0\x61\x21\x04\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\x24\0\
+\0\0\0\0\0\x4f\x41\0\0\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\0\x61\x21\x08\0\0\0\0\0\
+\x61\x22\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\xa8\
+\xff\0\0\0\0\x61\x31\0\0\0\0\0\0\x61\x32\x04\0\0\0\0\0\x61\x34\x08\0\0\0\0\0\
+\x61\x33\x0c\0\0\0\0\0\x69\xa5\x5a\xff\0\0\0\0\x6b\x5a\xc2\xff\0\0\0\0\x69\xa5\
+\x58\xff\0\0\0\0\x6b\x5a\xc0\xff\0\0\0\0\x67\x03\0\0\x20\0\0\0\x4f\x43\0\0\0\0\
+\0\0\x7b\x3a\xb8\xff\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\
+\xb0\xff\0\0\0\0\x05\0\x6b\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x04\0\0\0\
+\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x04\0\0\0\x15\x02\x01\0\0\0\0\0\x05\0\xf7\
+\xfe\0\0\0\0\x57\x01\0\0\x01\0\0\0\x15\x01\xd3\0\0\0\0\0\x61\xa1\x5c\xff\0\0\0\
+\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x05\
+\0\x5e\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x1e\0\0\0\0\0\xbf\x12\0\0\0\0\
+\0\0\x57\x02\0\0\x20\0\0\0\x15\x02\x1b\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\
+\0\x5c\xff\xff\xff\x71\xa4\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\0\x01\0\0\
+\x15\x01\x01\0\0\0\0\0\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x6c\
+\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\0\0\
+\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x15\x01\xc3\xff\0\0\0\0\x05\0\
+\xc1\xff\0\0\0\0\xb7\x09\0\0\x3c\0\0\0\x79\xa7\x20\xff\0\0\0\0\x67\0\0\0\x20\0\
+\0\0\x77\0\0\0\x20\0\0\0\x15\0\xa5\xfe\0\0\0\0\x05\0\xb0\0\0\0\0\0\x15\x09\x07\
+\xff\x87\0\0\0\x05\0\xa2\xfe\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x08\0\0\0\
+\x15\x02\xab\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
+\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\x40\0\0\0\x15\x01\x01\0\0\0\0\0\xbf\
+\x32\0\0\0\0\0\0\x61\x23\x04\0\0\0\0\0\x67\x03\0\0\x20\0\0\0\x61\x24\0\0\0\0\0\
+\0\x4f\x43\0\0\0\0\0\0\x7b\x3a\xa0\xff\0\0\0\0\x61\x23\x08\0\0\0\0\0\x61\x22\
+\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x32\0\0\0\0\0\0\x7b\x2a\xa8\xff\0\0\0\
+\0\x15\x01\x1c\0\0\0\0\0\x71\xa1\x55\xff\0\0\0\0\x15\x01\x1a\0\0\0\0\0\x61\xa1\
+\x98\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x94\xff\0\0\0\0\x4f\x21\0\0\0\0\
+\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x90\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\
+\xa2\x8c\xff\0\0\0\0\x05\0\x19\0\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x52\xff\
+\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\
+\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\0\0\0\xb7\x04\0\
+\0\x08\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\
+\0\0\0\x20\0\0\0\x55\0\x7d\0\0\0\0\0\x05\0\x88\xfe\0\0\0\0\xb7\x09\0\0\x2b\0\0\
+\0\x05\0\xc6\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\
+\x74\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x70\xff\0\
+\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x6c\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\
+\x1a\xb0\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x07\x07\0\0\x04\0\0\0\x61\x03\0\0\0\0\
+\0\0\xb7\x05\0\0\0\0\0\0\x05\0\x4e\0\0\0\0\0\xaf\x52\0\0\0\0\0\0\xbf\x75\0\0\0\
+\0\0\0\x0f\x15\0\0\0\0\0\0\x71\x55\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\
+\0\0\0\0\0\x77\0\0\0\x07\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\
+\0\x39\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
+\x50\0\0\0\0\0\0\x77\0\0\0\x06\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
+\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3a\0\0\0\xc7\0\0\0\x3f\0\0\
+\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\
+\0\0\0\x77\0\0\0\x05\0\0\0\x57\0\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\
+\0\0\0\0\x67\0\0\0\x3b\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\
+\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x04\0\0\0\x57\0\
+\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3c\0\0\0\xc7\
+\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\
+\x77\0\0\0\x03\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\
+\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3d\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\
+\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x02\0\0\0\x57\0\0\0\
+\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\
+\0\0\x3e\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
+\x50\0\0\0\0\0\0\x77\0\0\0\x01\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
+\x4f\x03\0\0\0\0\0\0\x57\x04\0\0\x01\0\0\0\x87\x04\0\0\0\0\0\0\x5f\x34\0\0\0\0\
+\0\0\xaf\x42\0\0\0\0\0\0\x57\x05\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x53\0\
+\0\0\0\0\0\x07\x01\0\0\x01\0\0\0\xbf\x25\0\0\0\0\0\0\x15\x01\x0b\0\x24\0\0\0\
+\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\xa0\xff\xff\xff\x0f\x12\0\0\0\0\0\0\x71\x24\0\
+\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x38\0\0\0\xc7\0\0\0\x38\0\0\0\xb7\x02\
+\0\0\0\0\0\0\x65\0\xa9\xff\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\x05\0\xa7\xff\0\
+\0\0\0\xbf\x21\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x15\x01\
+\x0e\0\0\0\0\0\x71\x63\x06\0\0\0\0\0\x71\x64\x07\0\0\0\0\0\x67\x04\0\0\x08\0\0\
+\0\x4f\x34\0\0\0\0\0\0\x3f\x41\0\0\0\0\0\0\x2f\x41\0\0\0\0\0\0\x1f\x12\0\0\0\0\
+\0\0\x63\x2a\x50\xff\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x50\xff\xff\xff\
+\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\x55\0\x05\0\0\0\0\0\
+\x71\x61\x08\0\0\0\0\0\x71\x60\x09\0\0\0\0\0\x67\0\0\0\x08\0\0\0\x4f\x10\0\0\0\
+\0\0\0\x95\0\0\0\0\0\0\0\x69\0\0\0\0\0\0\0\x05\0\xfd\xff\0\0\0\0\x02\0\0\0\x04\
+\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x28\0\0\0\x01\0\0\0\0\0\
+\0\0\x02\0\0\0\x04\0\0\0\x02\0\0\0\x80\0\0\0\0\0\0\0\x47\x50\x4c\x20\x76\x32\0\
+\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\x7a\x52\0\x08\x7c\x0b\x01\x0c\0\0\0\x18\0\0\0\
+\x18\0\0\0\0\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\xa0\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x60\x02\0\0\0\0\x03\0\x20\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3f\x02\0\0\0\0\
+\x03\0\xd0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xed\x01\0\0\0\0\x03\0\x10\x10\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\xd4\x01\0\0\0\0\x03\0\x20\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\xa3\x01\0\0\0\0\x03\0\xb8\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x01\0\0\0\0\
+\x03\0\x48\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2a\x01\0\0\0\0\x03\0\x10\x13\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\xe1\0\0\0\0\0\x03\0\xa0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x2e\x02\0\0\0\0\x03\0\x28\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x02\0\0\0\0\x03\
+\0\xc0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x36\x02\0\0\0\0\x03\0\xc8\x13\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x22\x01\0\0\0\0\x03\0\xe8\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x02\x01\0\0\0\0\x03\0\x40\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd9\0\0\0\0\0\x03\0\
+\xf8\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x26\x02\0\0\0\0\x03\0\x20\x0e\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\xcc\x01\0\0\0\0\x03\0\x60\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9b\
+\x01\0\0\0\0\x03\0\xc8\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5b\x01\0\0\0\0\x03\0\
+\x20\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x7c\x01\0\0\0\0\x03\0\x48\x08\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\x53\x01\0\0\0\0\x03\0\xb8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\
+\x01\0\0\0\0\x03\0\xe0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x84\x01\0\0\0\0\x03\0\
+\xb8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1e\x02\0\0\0\0\x03\0\xd8\x09\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\xc4\x01\0\0\0\0\x03\0\x70\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x93\
+\x01\0\0\0\0\x03\0\xa8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\x01\0\0\0\0\x03\0\
+\xf0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4b\x01\0\0\0\0\x03\0\0\x0a\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x12\x01\0\0\0\0\x03\0\x10\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfa\0\
+\0\0\0\0\x03\0\xc0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x58\x02\0\0\0\0\x03\0\x88\
+\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x16\x02\0\0\0\0\x03\0\xb8\x0a\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\xe5\x01\0\0\0\0\x03\0\xc0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbc\x01\
+\0\0\0\0\x03\0\0\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8b\x01\0\0\0\0\x03\0\x18\x0e\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd1\0\0\0\0\0\x03\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\x50\x02\0\0\0\0\x03\0\x20\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x02\0\0\0\0\
+\x03\0\x48\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6c\x01\0\0\0\0\x03\0\xb0\x04\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x43\x01\0\0\0\0\x03\0\xc8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\xc9\0\0\0\0\0\x03\0\xf8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\x02\0\0\0\0\x03\
+\0\xd0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3b\x01\0\0\0\0\x03\0\x98\x0b\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\xf2\0\0\0\0\0\x03\0\xb8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\
+\x02\0\0\0\0\x03\0\xf0\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfe\x01\0\0\0\0\x03\0\
+\xf8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xdd\x01\0\0\0\0\x03\0\0\x0c\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\xb4\x01\0\0\0\0\x03\0\x30\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0a\
+\x01\0\0\0\0\x03\0\x90\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\xa8\
+\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xd0\x01\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\xf6\x01\0\0\0\0\x03\0\xe0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xac\x01\0\
+\0\0\0\x03\0\x30\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x33\x01\0\0\0\0\x03\0\x80\x0e\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xea\0\0\0\0\0\x03\0\x98\x0e\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6b\0\0\0\x11\0\x06\
+\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x25\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x14\
+\0\0\0\0\0\0\0\x82\0\0\0\x11\0\x05\0\x28\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x01\0\
+\0\0\x11\0\x05\0\x14\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x40\0\0\0\x12\0\x03\0\0\0\
+\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\x28\0\0\0\0\0\0\0\x01\0\0\0\x3a\0\0\0\x50\0\0\
+\0\0\0\0\0\x01\0\0\0\x3c\0\0\0\x80\x13\0\0\0\0\0\0\x01\0\0\0\x3b\0\0\0\x1c\0\0\
+\0\0\0\0\0\x01\0\0\0\x38\0\0\0\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\
+\x5f\x74\x6f\x65\x70\x6c\x69\x74\x7a\x5f\x6b\x65\x79\0\x2e\x74\x65\x78\x74\0\
+\x6d\x61\x70\x73\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x63\x6f\x6e\
+\x66\x69\x67\x75\x72\x61\x74\x69\x6f\x6e\x73\0\x74\x75\x6e\x5f\x72\x73\x73\x5f\
+\x73\x74\x65\x65\x72\x69\x6e\x67\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x74\x75\
+\x6e\x5f\x72\x73\x73\x5f\x73\x74\x65\x65\x72\x69\x6e\x67\0\x5f\x6c\x69\x63\x65\
+\x6e\x73\x65\0\x2e\x72\x65\x6c\x2e\x65\x68\x5f\x66\x72\x61\x6d\x65\0\x74\x61\
+\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x69\x6e\x64\x69\x72\x65\x63\x74\x69\
+\x6f\x6e\x5f\x74\x61\x62\x6c\x65\0\x72\x73\x73\x2e\x62\x70\x66\x2e\x63\0\x2e\
+\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x4c\x42\x42\x30\x5f\
+\x39\0\x4c\x42\x42\x30\x5f\x38\x39\0\x4c\x42\x42\x30\x5f\x36\x39\0\x4c\x42\x42\
+\x30\x5f\x35\x39\0\x4c\x42\x42\x30\x5f\x31\x39\0\x4c\x42\x42\x30\x5f\x31\x30\
+\x39\0\x4c\x42\x42\x30\x5f\x39\x38\0\x4c\x42\x42\x30\x5f\x37\x38\0\x4c\x42\x42\
+\x30\x5f\x34\x38\0\x4c\x42\x42\x30\x5f\x31\x38\0\x4c\x42\x42\x30\x5f\x38\x37\0\
+\x4c\x42\x42\x30\x5f\x34\x37\0\x4c\x42\x42\x30\x5f\x33\x37\0\x4c\x42\x42\x30\
+\x5f\x31\x37\0\x4c\x42\x42\x30\x5f\x31\x30\x37\0\x4c\x42\x42\x30\x5f\x39\x36\0\
+\x4c\x42\x42\x30\x5f\x37\x36\0\x4c\x42\x42\x30\x5f\x36\x36\0\x4c\x42\x42\x30\
+\x5f\x34\x36\0\x4c\x42\x42\x30\x5f\x33\x36\0\x4c\x42\x42\x30\x5f\x32\x36\0\x4c\
+\x42\x42\x30\x5f\x31\x30\x36\0\x4c\x42\x42\x30\x5f\x36\x35\0\x4c\x42\x42\x30\
+\x5f\x34\x35\0\x4c\x42\x42\x30\x5f\x33\x35\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\
+\x42\x30\x5f\x35\x34\0\x4c\x42\x42\x30\x5f\x34\x34\0\x4c\x42\x42\x30\x5f\x32\
+\x34\0\x4c\x42\x42\x30\x5f\x31\x30\x34\0\x4c\x42\x42\x30\x5f\x39\x33\0\x4c\x42\
+\x42\x30\x5f\x38\x33\0\x4c\x42\x42\x30\x5f\x35\x33\0\x4c\x42\x42\x30\x5f\x34\
+\x33\0\x4c\x42\x42\x30\x5f\x32\x33\0\x4c\x42\x42\x30\x5f\x31\x30\x33\0\x4c\x42\
+\x42\x30\x5f\x38\x32\0\x4c\x42\x42\x30\x5f\x35\x32\0\x4c\x42\x42\x30\x5f\x31\
+\x30\x32\0\x4c\x42\x42\x30\x5f\x39\x31\0\x4c\x42\x42\x30\x5f\x38\x31\0\x4c\x42\
+\x42\x30\x5f\x37\x31\0\x4c\x42\x42\x30\x5f\x36\x31\0\x4c\x42\x42\x30\x5f\x35\
+\x31\0\x4c\x42\x42\x30\x5f\x34\x31\0\x4c\x42\x42\x30\x5f\x32\x31\0\x4c\x42\x42\
+\x30\x5f\x31\x31\0\x4c\x42\x42\x30\x5f\x31\x31\x31\0\x4c\x42\x42\x30\x5f\x31\
+\x30\x31\0\x4c\x42\x42\x30\x5f\x38\x30\0\x4c\x42\x42\x30\x5f\x36\x30\0\x4c\x42\
+\x42\x30\x5f\x35\x30\0\x4c\x42\x42\x30\x5f\x31\x30\0\x4c\x42\x42\x30\x5f\x31\
+\x31\x30\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\
+\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa0\x1a\0\0\0\0\0\0\x71\x02\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\0\0\0\x01\0\0\
+\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5a\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x56\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\x60\x1a\0\0\0\0\0\0\x30\0\0\0\0\0\0\0\x09\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\
+\x10\0\0\0\0\0\0\0\x20\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\
+\x14\0\0\0\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x6c\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x54\x14\0\0\0\0\0\
+\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x78\0\0\
+\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x14\0\0\0\0\0\0\x30\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\0\0\0\x09\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x1a\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x09\0\0\0\
+\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xb2\0\0\0\x02\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\x90\x14\0\0\0\0\0\0\xd0\x05\0\0\0\0\0\0\x01\0\0\0\x39\0\0\
+\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0";
+
+	return 0;
+err:
+	bpf_object__destroy_skeleton(s);
+	return -1;
+}
+
+#endif /* __RSS_BPF_SKEL_H__ */
diff --git a/ebpf/trace-events b/ebpf/trace-events
new file mode 100644
index 00000000000..411b1e2be39
--- /dev/null
+++ b/ebpf/trace-events
@@ -0,0 +1,4 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# ebpf-rss.c
+ebpf_error(const char *s1, const char *s2) "error in %s: %s"
diff --git a/ebpf/trace.h b/ebpf/trace.h
new file mode 100644
index 00000000000..abefc46ab10
--- /dev/null
+++ b/ebpf/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-ebpf.h"
diff --git a/fpu/meson.build b/fpu/meson.build
new file mode 100644
index 00000000000..1a9992ded56
--- /dev/null
+++ b/fpu/meson.build
@@ -0,0 +1 @@
+specific_ss.add(when: 'CONFIG_TCG', if_true: files('softfloat.c'))
diff --git a/fpu/softfloat-parts-addsub.c.inc b/fpu/softfloat-parts-addsub.c.inc
new file mode 100644
index 00000000000..ae5c1017c5b
--- /dev/null
+++ b/fpu/softfloat-parts-addsub.c.inc
@@ -0,0 +1,62 @@
+/*
+ * Floating point arithmetic implementation
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ *  the SoftFloat-2a license
+ *  the BSD license
+ *  GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(add_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+    int exp_diff = a->exp - b->exp;
+
+    if (exp_diff > 0) {
+        frac_shrjam(b, exp_diff);
+    } else if (exp_diff < 0) {
+        frac_shrjam(a, -exp_diff);
+        a->exp = b->exp;
+    }
+
+    if (frac_add(a, a, b)) {
+        frac_shrjam(a, 1);
+        a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+        a->exp += 1;
+    }
+}
+
+static bool partsN(sub_normal)(FloatPartsN *a, FloatPartsN *b)
+{
+    int exp_diff = a->exp - b->exp;
+    int shift;
+
+    if (exp_diff > 0) {
+        frac_shrjam(b, exp_diff);
+        frac_sub(a, a, b);
+    } else if (exp_diff < 0) {
+        a->exp = b->exp;
+        a->sign ^= 1;
+        frac_shrjam(a, -exp_diff);
+        frac_sub(a, b, a);
+    } else if (frac_sub(a, a, b)) {
+        /* Overflow means that A was less than B. */
+        frac_neg(a);
+        a->sign ^= 1;
+    }
+
+    shift = frac_normalize(a);
+    if (likely(shift < N)) {
+        a->exp -= shift;
+	return true;
+    }
+    a->cls = float_class_zero;
+    return false;
+}
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
new file mode 100644
index 00000000000..41d4b17e419
--- /dev/null
+++ b/fpu/softfloat-parts.c.inc
@@ -0,0 +1,1513 @@
+/*
+ * QEMU float support
+ *
+ * The code in this source file is derived from release 2a of the SoftFloat
+ * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
+ * some later contributions) are provided under that license, as detailed below.
+ * It has subsequently been modified by contributors to the QEMU Project,
+ * so some portions are provided under:
+ *  the SoftFloat-2a license
+ *  the BSD license
+ *  GPL-v2-or-later
+ *
+ * Any future contributions to this file after December 1st 2014 will be
+ * taken to be licensed under the Softfloat-2a license unless specifically
+ * indicated otherwise.
+ */
+
+static void partsN(return_nan)(FloatPartsN *a, float_status *s)
+{
+    switch (a->cls) {
+    case float_class_snan:
+        float_raise(float_flag_invalid, s);
+        if (s->default_nan_mode) {
+            parts_default_nan(a, s);
+        } else {
+            parts_silence_nan(a, s);
+        }
+        break;
+    case float_class_qnan:
+        if (s->default_nan_mode) {
+            parts_default_nan(a, s);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
+                                     float_status *s)
+{
+    if (is_snan(a->cls) || is_snan(b->cls)) {
+        float_raise(float_flag_invalid, s);
+    }
+
+    if (s->default_nan_mode) {
+        parts_default_nan(a, s);
+    } else {
+        int cmp = frac_cmp(a, b);
+        if (cmp == 0) {
+            cmp = a->sign < b->sign;
+        }
+
+        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
+            a = b;
+        }
+        if (is_snan(a->cls)) {
+            parts_silence_nan(a, s);
+        }
+    }
+    return a;
+}
+
+static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
+                                            FloatPartsN *c, float_status *s,
+                                            int ab_mask, int abc_mask)
+{
+    int which;
+
+    if (unlikely(abc_mask & float_cmask_snan)) {
+        float_raise(float_flag_invalid, s);
+    }
+
+    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
+                          ab_mask == float_cmask_infzero, s);
+
+    if (s->default_nan_mode || which == 3) {
+        /*
+         * Note that this check is after pickNaNMulAdd so that function
+         * has an opportunity to set the Invalid flag for infzero.
+         */
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    switch (which) {
+    case 0:
+        break;
+    case 1:
+        a = b;
+        break;
+    case 2:
+        a = c;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (is_snan(a->cls)) {
+        parts_silence_nan(a, s);
+    }
+    return a;
+}
+
+/*
+ * Canonicalize the FloatParts structure.  Determine the class,
+ * unbias the exponent, and normalize the fraction.
+ */
+static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
+                                 const FloatFmt *fmt)
+{
+    if (unlikely(p->exp == 0)) {
+        if (likely(frac_eqz(p))) {
+            p->cls = float_class_zero;
+        } else if (status->flush_inputs_to_zero) {
+            float_raise(float_flag_input_denormal, status);
+            p->cls = float_class_zero;
+            frac_clear(p);
+        } else {
+            int shift = frac_normalize(p);
+            p->cls = float_class_normal;
+            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
+        }
+    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
+        p->cls = float_class_normal;
+        p->exp -= fmt->exp_bias;
+        frac_shl(p, fmt->frac_shift);
+        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+    } else if (likely(frac_eqz(p))) {
+        p->cls = float_class_inf;
+    } else {
+        frac_shl(p, fmt->frac_shift);
+        p->cls = (parts_is_snan_frac(p->frac_hi, status)
+                  ? float_class_snan : float_class_qnan);
+    }
+}
+
+/*
+ * Round and uncanonicalize a floating-point number by parts. There
+ * are FRAC_SHIFT bits that may require rounding at the bottom of the
+ * fraction; these bits will be removed. The exponent will be biased
+ * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ */
+static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
+                                   const FloatFmt *fmt)
+{
+    const int exp_max = fmt->exp_max;
+    const int frac_shift = fmt->frac_shift;
+    const uint64_t round_mask = fmt->round_mask;
+    const uint64_t frac_lsb = round_mask + 1;
+    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
+    const uint64_t roundeven_mask = round_mask | frac_lsb;
+    uint64_t inc;
+    bool overflow_norm = false;
+    int exp, flags = 0;
+
+    switch (s->float_rounding_mode) {
+    case float_round_nearest_even:
+        if (N > 64 && frac_lsb == 0) {
+            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
+                   ? frac_lsbm1 : 0);
+        } else {
+            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
+                   ? frac_lsbm1 : 0);
+        }
+        break;
+    case float_round_ties_away:
+        inc = frac_lsbm1;
+        break;
+    case float_round_to_zero:
+        overflow_norm = true;
+        inc = 0;
+        break;
+    case float_round_up:
+        inc = p->sign ? 0 : round_mask;
+        overflow_norm = p->sign;
+        break;
+    case float_round_down:
+        inc = p->sign ? round_mask : 0;
+        overflow_norm = !p->sign;
+        break;
+    case float_round_to_odd:
+        overflow_norm = true;
+        /* fall through */
+    case float_round_to_odd_inf:
+        if (N > 64 && frac_lsb == 0) {
+            inc = p->frac_hi & 1 ? 0 : round_mask;
+        } else {
+            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    exp = p->exp + fmt->exp_bias;
+    if (likely(exp > 0)) {
+        if (p->frac_lo & round_mask) {
+            flags |= float_flag_inexact;
+            if (frac_addi(p, p, inc)) {
+                frac_shr(p, 1);
+                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+                exp++;
+            }
+            p->frac_lo &= ~round_mask;
+        }
+
+        if (fmt->arm_althp) {
+            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
+            if (unlikely(exp > exp_max)) {
+                /* Overflow.  Return the maximum normal.  */
+                flags = float_flag_invalid;
+                exp = exp_max;
+                frac_allones(p);
+                p->frac_lo &= ~round_mask;
+            }
+        } else if (unlikely(exp >= exp_max)) {
+            flags |= float_flag_overflow | float_flag_inexact;
+            if (overflow_norm) {
+                exp = exp_max - 1;
+                frac_allones(p);
+                p->frac_lo &= ~round_mask;
+            } else {
+                p->cls = float_class_inf;
+                exp = exp_max;
+                frac_clear(p);
+            }
+        }
+        frac_shr(p, frac_shift);
+    } else if (s->flush_to_zero) {
+        flags |= float_flag_output_denormal;
+        p->cls = float_class_zero;
+        exp = 0;
+        frac_clear(p);
+    } else {
+        bool is_tiny = s->tininess_before_rounding || exp < 0;
+
+        if (!is_tiny) {
+            FloatPartsN discard;
+            is_tiny = !frac_addi(&discard, p, inc);
+        }
+
+        frac_shrjam(p, 1 - exp);
+
+        if (p->frac_lo & round_mask) {
+            /* Need to recompute round-to-even/round-to-odd. */
+            switch (s->float_rounding_mode) {
+            case float_round_nearest_even:
+                if (N > 64 && frac_lsb == 0) {
+                    inc = ((p->frac_hi & 1) ||
+                           (p->frac_lo & round_mask) != frac_lsbm1
+                           ? frac_lsbm1 : 0);
+                } else {
+                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
+                           ? frac_lsbm1 : 0);
+                }
+                break;
+            case float_round_to_odd:
+            case float_round_to_odd_inf:
+                if (N > 64 && frac_lsb == 0) {
+                    inc = p->frac_hi & 1 ? 0 : round_mask;
+                } else {
+                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
+                }
+                break;
+            default:
+                break;
+            }
+            flags |= float_flag_inexact;
+            frac_addi(p, p, inc);
+            p->frac_lo &= ~round_mask;
+        }
+
+        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
+        frac_shr(p, frac_shift);
+
+        if (is_tiny && (flags & float_flag_inexact)) {
+            flags |= float_flag_underflow;
+        }
+        if (exp == 0 && frac_eqz(p)) {
+            p->cls = float_class_zero;
+        }
+    }
+    p->exp = exp;
+    float_raise(flags, s);
+}
+
+static void partsN(uncanon)(FloatPartsN *p, float_status *s,
+                            const FloatFmt *fmt)
+{
+    if (likely(p->cls == float_class_normal)) {
+        parts_uncanon_normal(p, s, fmt);
+    } else {
+        switch (p->cls) {
+        case float_class_zero:
+            p->exp = 0;
+            frac_clear(p);
+            return;
+        case float_class_inf:
+            g_assert(!fmt->arm_althp);
+            p->exp = fmt->exp_max;
+            frac_clear(p);
+            return;
+        case float_class_qnan:
+        case float_class_snan:
+            g_assert(!fmt->arm_althp);
+            p->exp = fmt->exp_max;
+            frac_shr(p, fmt->frac_shift);
+            return;
+        default:
+            break;
+        }
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Returns the result of adding or subtracting the values of the
+ * floating-point values `a' and `b'. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ */
+static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
+                                   float_status *s, bool subtract)
+{
+    bool b_sign = b->sign ^ subtract;
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+
+    if (a->sign != b_sign) {
+        /* Subtraction */
+        if (likely(ab_mask == float_cmask_normal)) {
+            if (parts_sub_normal(a, b)) {
+                return a;
+            }
+            /* Subtract was exact, fall through to set sign. */
+            ab_mask = float_cmask_zero;
+        }
+
+        if (ab_mask == float_cmask_zero) {
+            a->sign = s->float_rounding_mode == float_round_down;
+            return a;
+        }
+
+        if (unlikely(ab_mask & float_cmask_anynan)) {
+            goto p_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            if (a->cls != float_class_inf) {
+                /* N - Inf */
+                goto return_b;
+            }
+            if (b->cls != float_class_inf) {
+                /* Inf - N */
+                return a;
+            }
+            /* Inf - Inf */
+            float_raise(float_flag_invalid, s);
+            parts_default_nan(a, s);
+            return a;
+        }
+    } else {
+        /* Addition */
+        if (likely(ab_mask == float_cmask_normal)) {
+            parts_add_normal(a, b);
+            return a;
+        }
+
+        if (ab_mask == float_cmask_zero) {
+            return a;
+        }
+
+        if (unlikely(ab_mask & float_cmask_anynan)) {
+            goto p_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            a->cls = float_class_inf;
+            return a;
+        }
+    }
+
+    if (b->cls == float_class_zero) {
+        g_assert(a->cls == float_class_normal);
+        return a;
+    }
+
+    g_assert(a->cls == float_class_zero);
+    g_assert(b->cls == float_class_normal);
+ return_b:
+    b->sign = b_sign;
+    return b;
+
+ p_nan:
+    return parts_pick_nan(a, b, s);
+}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b'. The operation is performed according to the IEC/IEEE Standard
+ * for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
+                                float_status *s)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    bool sign = a->sign ^ b->sign;
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        FloatPartsW tmp;
+
+        frac_mulw(&tmp, a, b);
+        frac_truncjam(a, &tmp);
+
+        a->exp += b->exp + 1;
+        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+            frac_add(a, a, a);
+            a->exp -= 1;
+        }
+
+        a->sign = sign;
+        return a;
+    }
+
+    /* Inf * Zero == NaN */
+    if (unlikely(ab_mask == float_cmask_infzero)) {
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        return parts_pick_nan(a, b, s);
+    }
+
+    /* Multiply by 0 or Inf */
+    if (ab_mask & float_cmask_inf) {
+        a->cls = float_class_inf;
+        a->sign = sign;
+        return a;
+    }
+
+    g_assert(ab_mask & float_cmask_zero);
+    a->cls = float_class_zero;
+    a->sign = sign;
+    return a;
+}
+
+/*
+ * Returns the result of multiplying the floating-point values `a' and
+ * `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
+ * The flags argument allows the caller to select negation of the
+ * addend, the intermediate product, or the final result. (The
+ * difference between this and having the caller do a separate
+ * negation is that negating externally will flip the sign bit on NaNs.)
+ *
+ * Requires A and C extracted into a double-sized structure to provide the
+ * extra space for the widening multiply.
+ */
+static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
+                                   FloatPartsN *c, int flags, float_status *s)
+{
+    int ab_mask, abc_mask;
+    FloatPartsW p_widen, c_widen;
+
+    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    abc_mask = float_cmask(c->cls) | ab_mask;
+
+    /*
+     * It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (unlikely(abc_mask & float_cmask_anynan)) {
+        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
+    }
+
+    if (flags & float_muladd_negate_c) {
+        c->sign ^= 1;
+    }
+
+    /* Compute the sign of the product into A. */
+    a->sign ^= b->sign;
+    if (flags & float_muladd_negate_product) {
+        a->sign ^= 1;
+    }
+
+    if (unlikely(ab_mask != float_cmask_normal)) {
+        if (unlikely(ab_mask == float_cmask_infzero)) {
+            goto d_nan;
+        }
+
+        if (ab_mask & float_cmask_inf) {
+            if (c->cls == float_class_inf && a->sign != c->sign) {
+                goto d_nan;
+            }
+            goto return_inf;
+        }
+
+        g_assert(ab_mask & float_cmask_zero);
+        if (c->cls == float_class_normal) {
+            *a = *c;
+            goto return_normal;
+        }
+        if (c->cls == float_class_zero) {
+            if (a->sign != c->sign) {
+                goto return_sub_zero;
+            }
+            goto return_zero;
+        }
+        g_assert(c->cls == float_class_inf);
+    }
+
+    if (unlikely(c->cls == float_class_inf)) {
+        a->sign = c->sign;
+        goto return_inf;
+    }
+
+    /* Perform the multiplication step. */
+    p_widen.sign = a->sign;
+    p_widen.exp = a->exp + b->exp + 1;
+    frac_mulw(&p_widen, a, b);
+    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+        frac_add(&p_widen, &p_widen, &p_widen);
+        p_widen.exp -= 1;
+    }
+
+    /* Perform the addition step. */
+    if (c->cls != float_class_zero) {
+        /* Zero-extend C to less significant bits. */
+        frac_widen(&c_widen, c);
+        c_widen.exp = c->exp;
+
+        if (a->sign == c->sign) {
+            parts_add_normal(&p_widen, &c_widen);
+        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
+            goto return_sub_zero;
+        }
+    }
+
+    /* Narrow with sticky bit, for proper rounding later. */
+    frac_truncjam(a, &p_widen);
+    a->sign = p_widen.sign;
+    a->exp = p_widen.exp;
+
+ return_normal:
+    if (flags & float_muladd_halve_result) {
+        a->exp -= 1;
+    }
+ finish_sign:
+    if (flags & float_muladd_negate_result) {
+        a->sign ^= 1;
+    }
+    return a;
+
+ return_sub_zero:
+    a->sign = s->float_rounding_mode == float_round_down;
+ return_zero:
+    a->cls = float_class_zero;
+    goto finish_sign;
+
+ return_inf:
+    a->cls = float_class_inf;
+    goto finish_sign;
+
+ d_nan:
+    float_raise(float_flag_invalid, s);
+    parts_default_nan(a, s);
+    return a;
+}
+
+/*
+ * Returns the result of dividing the floating-point value `a' by the
+ * corresponding value `b'. The operation is performed according to
+ * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
+                                float_status *s)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    bool sign = a->sign ^ b->sign;
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        a->sign = sign;
+        a->exp -= b->exp + frac_div(a, b);
+        return a;
+    }
+
+    /* 0/0 or Inf/Inf => NaN */
+    if (unlikely(ab_mask == float_cmask_zero) ||
+        unlikely(ab_mask == float_cmask_inf)) {
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    /* All the NaN cases */
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        return parts_pick_nan(a, b, s);
+    }
+
+    a->sign = sign;
+
+    /* Inf / X */
+    if (a->cls == float_class_inf) {
+        return a;
+    }
+
+    /* 0 / X */
+    if (a->cls == float_class_zero) {
+        return a;
+    }
+
+    /* X / Inf */
+    if (b->cls == float_class_inf) {
+        a->cls = float_class_zero;
+        return a;
+    }
+
+    /* X / 0 => Inf */
+    g_assert(b->cls == float_class_zero);
+    float_raise(float_flag_divbyzero, s);
+    a->cls = float_class_inf;
+    return a;
+}
+
+/*
+ * Floating point remainder, per IEC/IEEE, or modulus.
+ */
+static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
+                                   uint64_t *mod_quot, float_status *s)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        frac_modrem(a, b, mod_quot);
+        return a;
+    }
+
+    if (mod_quot) {
+        *mod_quot = 0;
+    }
+
+    /* All the NaN cases */
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        return parts_pick_nan(a, b, s);
+    }
+
+    /* Inf % N; N % 0 */
+    if (a->cls == float_class_inf || b->cls == float_class_zero) {
+        float_raise(float_flag_invalid, s);
+        parts_default_nan(a, s);
+        return a;
+    }
+
+    /* N % Inf; 0 % N */
+    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
+    return a;
+}
+
+/*
+ * Square Root
+ *
+ * The base algorithm is lifted from
+ * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
+ * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
+ * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
+ * and is thus MIT licenced.
+ */
+static void partsN(sqrt)(FloatPartsN *a, float_status *status,
+                         const FloatFmt *fmt)
+{
+    const uint32_t three32 = 3u << 30;
+    const uint64_t three64 = 3ull << 62;
+    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
+    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
+    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
+    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
+    uint64_t discard;
+    bool exp_odd;
+    size_t index;
+
+    if (unlikely(a->cls != float_class_normal)) {
+        switch (a->cls) {
+        case float_class_snan:
+        case float_class_qnan:
+            parts_return_nan(a, status);
+            return;
+        case float_class_zero:
+            return;
+        case float_class_inf:
+            if (unlikely(a->sign)) {
+                goto d_nan;
+            }
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    if (unlikely(a->sign)) {
+        goto d_nan;
+    }
+
+    /*
+     * Argument reduction.
+     * x = 4^e frac; with integer e, and frac in [1, 4)
+     * m = frac fixed point at bit 62, since we're in base 4.
+     * If base-2 exponent is odd, exchange that for multiply by 2,
+     * which results in no shift.
+     */
+    exp_odd = a->exp & 1;
+    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
+    if (!exp_odd) {
+        frac_shr(a, 1);
+    }
+
+    /*
+     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
+     *
+     * Initial estimate:
+     * 7-bit lookup table (1-bit exponent and 6-bit significand).
+     *
+     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
+     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
+     * a table lookup is faster and needs one less iteration.
+     * The 7-bit table gives |e| < 0x1.fdp-9.
+     *
+     * A Newton-Raphson iteration for r is
+     *   s = m*r
+     *   d = s*r
+     *   u = 3 - d
+     *   r = r*u/2
+     *
+     * Fixed point representations:
+     *   m, s, d, u, three are all 2.30; r is 0.32
+     */
+    m64 = a->frac_hi;
+    m32 = m64 >> 32;
+
+    r32 = rsqrt_tab[index] << 16;
+    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
+
+    s32 = ((uint64_t)m32 * r32) >> 32;
+    d32 = ((uint64_t)s32 * r32) >> 32;
+    u32 = three32 - d32;
+
+    if (N == 64) {
+        /* float64 or smaller */
+
+        r32 = ((uint64_t)r32 * u32) >> 31;
+        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
+
+        s32 = ((uint64_t)m32 * r32) >> 32;
+        d32 = ((uint64_t)s32 * r32) >> 32;
+        u32 = three32 - d32;
+
+        if (fmt->frac_size <= 23) {
+            /* float32 or smaller */
+
+            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
+            s32 = (s32 - 1) >> 6;               /* 9.23 */
+            /* s < sqrt(m) < s + 0x1.08p-23 */
+
+            /* compute nearest rounded result to 2.23 bits */
+            uint32_t d0 = (m32 << 16) - s32 * s32;
+            uint32_t d1 = s32 - d0;
+            uint32_t d2 = d1 + s32 + 1;
+            s32 += d1 >> 31;
+            a->frac_hi = (uint64_t)s32 << (64 - 25);
+
+            /* increment or decrement for inexact */
+            if (d2 != 0) {
+                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
+            }
+            goto done;
+        }
+
+        /* float64 */
+
+        r64 = (uint64_t)r32 * u32 * 2;
+        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
+        mul64To128(m64, r64, &s64, &discard);
+        mul64To128(s64, r64, &d64, &discard);
+        u64 = three64 - d64;
+
+        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
+        s64 = (s64 - 2) >> 9;                  /* 12.52 */
+
+        /* Compute nearest rounded result */
+        uint64_t d0 = (m64 << 42) - s64 * s64;
+        uint64_t d1 = s64 - d0;
+        uint64_t d2 = d1 + s64 + 1;
+        s64 += d1 >> 63;
+        a->frac_hi = s64 << (64 - 54);
+
+        /* increment or decrement for inexact */
+        if (d2 != 0) {
+            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
+        }
+        goto done;
+    }
+
+    r64 = (uint64_t)r32 * u32 * 2;
+    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
+
+    mul64To128(m64, r64, &s64, &discard);
+    mul64To128(s64, r64, &d64, &discard);
+    u64 = three64 - d64;
+    mul64To128(u64, r64, &r64, &discard);
+    r64 <<= 1;
+    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
+
+    mul64To128(m64, r64, &s64, &discard);
+    mul64To128(s64, r64, &d64, &discard);
+    u64 = three64 - d64;
+    mul64To128(u64, r64, &rh, &rl);
+    add128(rh, rl, rh, rl, &rh, &rl);
+    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
+
+    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
+    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
+    sub128(three64, 0, dh, dl, &uh, &ul);
+    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
+    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
+
+    sub128(sh, sl, 0, 4, &sh, &sl);
+    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
+    /* s < sqrt(m) < s + 1ulp */
+
+    /* Compute nearest rounded result */
+    mul64To128(sl, sl, &d0h, &d0l);
+    d0h += 2 * sh * sl;
+    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
+    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
+    add128(sh, sl, 0, 1, &d2h, &d2l);
+    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
+    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
+    shift128Left(sh, sl, 128 - 114, &sh, &sl);
+
+    /* increment or decrement for inexact */
+    if (d2h | d2l) {
+        if ((int64_t)(d1h ^ d2h) < 0) {
+            sub128(sh, sl, 0, 1, &sh, &sl);
+        } else {
+            add128(sh, sl, 0, 1, &sh, &sl);
+        }
+    }
+    a->frac_lo = sl;
+    a->frac_hi = sh;
+
+ done:
+    /* Convert back from base 4 to base 2. */
+    a->exp >>= 1;
+    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
+        frac_add(a, a, a);
+    } else {
+        a->exp += 1;
+    }
+    return;
+
+ d_nan:
+    float_raise(float_flag_invalid, status);
+    parts_default_nan(a, status);
+}
+
+/*
+ * Rounds the floating-point value `a' to an integer, and returns the
+ * result as a floating-point value. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ *
+ * parts_round_to_int_normal is an internal helper function for
+ * normal numbers only, returning true for inexact but not directly
+ * raising float_flag_inexact.
+ */
+static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
+                                        int scale, int frac_size)
+{
+    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
+    int shift_adj;
+
+    scale = MIN(MAX(scale, -0x10000), 0x10000);
+    a->exp += scale;
+
+    if (a->exp < 0) {
+        bool one;
+
+        /* All fractional */
+        switch (rmode) {
+        case float_round_nearest_even:
+            one = false;
+            if (a->exp == -1) {
+                FloatPartsN tmp;
+                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
+                frac_add(&tmp, a, a);
+                /* Anything remaining means frac > 0.5. */
+                one = !frac_eqz(&tmp);
+            }
+            break;
+        case float_round_ties_away:
+            one = a->exp == -1;
+            break;
+        case float_round_to_zero:
+            one = false;
+            break;
+        case float_round_up:
+            one = !a->sign;
+            break;
+        case float_round_down:
+            one = a->sign;
+            break;
+        case float_round_to_odd:
+            one = true;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        frac_clear(a);
+        a->exp = 0;
+        if (one) {
+            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
+        } else {
+            a->cls = float_class_zero;
+        }
+        return true;
+    }
+
+    if (a->exp >= frac_size) {
+        /* All integral */
+        return false;
+    }
+
+    if (N > 64 && a->exp < N - 64) {
+        /*
+         * Rounding is not in the low word -- shift lsb to bit 2,
+         * which leaves room for sticky and rounding bit.
+         */
+        shift_adj = (N - 1) - (a->exp + 2);
+        frac_shrjam(a, shift_adj);
+        frac_lsb = 1 << 2;
+    } else {
+        shift_adj = 0;
+        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
+    }
+
+    frac_lsbm1 = frac_lsb >> 1;
+    rnd_mask = frac_lsb - 1;
+    rnd_even_mask = rnd_mask | frac_lsb;
+
+    if (!(a->frac_lo & rnd_mask)) {
+        /* Fractional bits already clear, undo the shift above. */
+        frac_shl(a, shift_adj);
+        return false;
+    }
+
+    switch (rmode) {
+    case float_round_nearest_even:
+        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+        break;
+    case float_round_ties_away:
+        inc = frac_lsbm1;
+        break;
+    case float_round_to_zero:
+        inc = 0;
+        break;
+    case float_round_up:
+        inc = a->sign ? 0 : rnd_mask;
+        break;
+    case float_round_down:
+        inc = a->sign ? rnd_mask : 0;
+        break;
+    case float_round_to_odd:
+        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (shift_adj == 0) {
+        if (frac_addi(a, a, inc)) {
+            frac_shr(a, 1);
+            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
+            a->exp++;
+        }
+        a->frac_lo &= ~rnd_mask;
+    } else {
+        frac_addi(a, a, inc);
+        a->frac_lo &= ~rnd_mask;
+        /* Be careful shifting back, not to overflow */
+        frac_shl(a, shift_adj - 1);
+        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
+            a->exp++;
+        } else {
+            frac_add(a, a, a);
+        }
+    }
+    return true;
+}
+
+static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
+                                 int scale, float_status *s,
+                                 const FloatFmt *fmt)
+{
+    switch (a->cls) {
+    case float_class_qnan:
+    case float_class_snan:
+        parts_return_nan(a, s);
+        break;
+    case float_class_zero:
+    case float_class_inf:
+        break;
+    case float_class_normal:
+        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
+            float_raise(float_flag_inexact, s);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Returns the result of converting the floating-point value `a' to
+ * the two's complement integer format. The conversion is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic---which means in particular that the conversion is
+ * rounded according to the current rounding mode. If `a' is a NaN,
+ * the largest positive integer is returned. Otherwise, if the
+ * conversion overflows, the largest integer with the same sign as `a'
+ * is returned.
+ */
+static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s)
+{
+    int flags = 0;
+    uint64_t r;
+
+    switch (p->cls) {
+    case float_class_snan:
+    case float_class_qnan:
+        flags = float_flag_invalid;
+        r = max;
+        break;
+
+    case float_class_inf:
+        flags = float_flag_invalid;
+        r = p->sign ? min : max;
+        break;
+
+    case float_class_zero:
+        return 0;
+
+    case float_class_normal:
+        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
+        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
+            flags = float_flag_inexact;
+        }
+
+        if (p->exp <= DECOMPOSED_BINARY_POINT) {
+            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
+        } else {
+            r = UINT64_MAX;
+        }
+        if (p->sign) {
+            if (r <= -(uint64_t)min) {
+                r = -r;
+            } else {
+                flags = float_flag_invalid;
+                r = min;
+            }
+        } else if (r > max) {
+            flags = float_flag_invalid;
+            r = max;
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    float_raise(flags, s);
+    return r;
+}
+
+/*
+ *  Returns the result of converting the floating-point value `a' to
+ *  the unsigned integer format. The conversion is performed according
+ *  to the IEC/IEEE Standard for Binary Floating-Point
+ *  Arithmetic---which means in particular that the conversion is
+ *  rounded according to the current rounding mode. If `a' is a NaN,
+ *  the largest unsigned integer is returned. Otherwise, if the
+ *  conversion overflows, the largest unsigned integer is returned. If
+ *  the 'a' is negative, the result is rounded and zero is returned;
+ *  values that do not round to zero will raise the inexact exception
+ *  flag.
+ */
+static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
+                                      int scale, uint64_t max, float_status *s)
+{
+    int flags = 0;
+    uint64_t r;
+
+    switch (p->cls) {
+    case float_class_snan:
+    case float_class_qnan:
+        flags = float_flag_invalid;
+        r = max;
+        break;
+
+    case float_class_inf:
+        flags = float_flag_invalid;
+        r = p->sign ? 0 : max;
+        break;
+
+    case float_class_zero:
+        return 0;
+
+    case float_class_normal:
+        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
+        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
+            flags = float_flag_inexact;
+            if (p->cls == float_class_zero) {
+                r = 0;
+                break;
+            }
+        }
+
+        if (p->sign) {
+            flags = float_flag_invalid;
+            r = 0;
+        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
+            flags = float_flag_invalid;
+            r = max;
+        } else {
+            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
+            if (r > max) {
+                flags = float_flag_invalid;
+                r = max;
+            }
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    float_raise(flags, s);
+    return r;
+}
+
+/*
+ * Integer to float conversions
+ *
+ * Returns the result of converting the two's complement integer `a'
+ * to the floating-point format. The conversion is performed according
+ * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
+                                  int scale, float_status *s)
+{
+    uint64_t f = a;
+    int shift;
+
+    memset(p, 0, sizeof(*p));
+
+    if (a == 0) {
+        p->cls = float_class_zero;
+        return;
+    }
+
+    p->cls = float_class_normal;
+    if (a < 0) {
+        f = -f;
+        p->sign = true;
+    }
+    shift = clz64(f);
+    scale = MIN(MAX(scale, -0x10000), 0x10000);
+
+    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
+    p->frac_hi = f << shift;
+}
+
+/*
+ * Unsigned Integer to float conversions
+ *
+ * Returns the result of converting the unsigned integer `a' to the
+ * floating-point format. The conversion is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
+                                  int scale, float_status *status)
+{
+    memset(p, 0, sizeof(*p));
+
+    if (a == 0) {
+        p->cls = float_class_zero;
+    } else {
+        int shift = clz64(a);
+        scale = MIN(MAX(scale, -0x10000), 0x10000);
+        p->cls = float_class_normal;
+        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
+        p->frac_hi = a << shift;
+    }
+}
+
+/*
+ * Float min/max.
+ */
+static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
+                                   float_status *s, int flags)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    int a_exp, b_exp, cmp;
+
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        /*
+         * For minNum/maxNum (IEEE 754-2008)
+         * or minimumNumber/maximumNumber (IEEE 754-2019),
+         * if one operand is a QNaN, and the other
+         * operand is numerical, then return numerical argument.
+         */
+        if ((flags & (minmax_isnum | minmax_isnumber))
+            && !(ab_mask & float_cmask_snan)
+            && (ab_mask & ~float_cmask_qnan)) {
+            return is_nan(a->cls) ? b : a;
+        }
+
+        /*
+         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
+         * are removed and replaced with minimum, minimumNumber, maximum
+         * and maximumNumber.
+         * minimumNumber/maximumNumber behavior for SNaN is changed to:
+         *   If both operands are NaNs, a QNaN is returned.
+         *   If either operand is a SNaN,
+         *   an invalid operation exception is signaled,
+         *   but unless both operands are NaNs,
+         *   the SNaN is otherwise ignored and not converted to a QNaN.
+         */
+        if ((flags & minmax_isnumber)
+            && (ab_mask & float_cmask_snan)
+            && (ab_mask & ~float_cmask_anynan)) {
+            float_raise(float_flag_invalid, s);
+            return is_nan(a->cls) ? b : a;
+        }
+
+        return parts_pick_nan(a, b, s);
+    }
+
+    a_exp = a->exp;
+    b_exp = b->exp;
+
+    if (unlikely(ab_mask != float_cmask_normal)) {
+        switch (a->cls) {
+        case float_class_normal:
+            break;
+        case float_class_inf:
+            a_exp = INT16_MAX;
+            break;
+        case float_class_zero:
+            a_exp = INT16_MIN;
+            break;
+        default:
+            g_assert_not_reached();
+            break;
+        }
+        switch (b->cls) {
+        case float_class_normal:
+            break;
+        case float_class_inf:
+            b_exp = INT16_MAX;
+            break;
+        case float_class_zero:
+            b_exp = INT16_MIN;
+            break;
+        default:
+            g_assert_not_reached();
+            break;
+        }
+    }
+
+    /* Compare magnitudes. */
+    cmp = a_exp - b_exp;
+    if (cmp == 0) {
+        cmp = frac_cmp(a, b);
+    }
+
+    /*
+     * Take the sign into account.
+     * For ismag, only do this if the magnitudes are equal.
+     */
+    if (!(flags & minmax_ismag) || cmp == 0) {
+        if (a->sign != b->sign) {
+            /* For differing signs, the negative operand is less. */
+            cmp = a->sign ? -1 : 1;
+        } else if (a->sign) {
+            /* For two negative operands, invert the magnitude comparison. */
+            cmp = -cmp;
+        }
+    }
+
+    if (flags & minmax_ismin) {
+        cmp = -cmp;
+    }
+    return cmp < 0 ? b : a;
+}
+
+/*
+ * Floating point compare
+ */
+static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
+                                     float_status *s, bool is_quiet)
+{
+    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
+    int cmp;
+
+    if (likely(ab_mask == float_cmask_normal)) {
+        if (a->sign != b->sign) {
+            goto a_sign;
+        }
+        if (a->exp != b->exp) {
+            cmp = a->exp < b->exp ? -1 : 1;
+        } else {
+            cmp = frac_cmp(a, b);
+        }
+        if (a->sign) {
+            cmp = -cmp;
+        }
+        return cmp;
+    }
+
+    if (unlikely(ab_mask & float_cmask_anynan)) {
+        if (!is_quiet || (ab_mask & float_cmask_snan)) {
+            float_raise(float_flag_invalid, s);
+        }
+        return float_relation_unordered;
+    }
+
+    if (ab_mask & float_cmask_zero) {
+        if (ab_mask == float_cmask_zero) {
+            return float_relation_equal;
+        } else if (a->cls == float_class_zero) {
+            goto b_sign;
+        } else {
+            goto a_sign;
+        }
+    }
+
+    if (ab_mask == float_cmask_inf) {
+        if (a->sign == b->sign) {
+            return float_relation_equal;
+        }
+    } else if (b->cls == float_class_inf) {
+        goto b_sign;
+    } else {
+        g_assert(a->cls == float_class_inf);
+    }
+
+ a_sign:
+    return a->sign ? float_relation_less : float_relation_greater;
+ b_sign:
+    return b->sign ? float_relation_greater : float_relation_less;
+}
+
+/*
+ * Multiply A by 2 raised to the power N.
+ */
+static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
+{
+    switch (a->cls) {
+    case float_class_snan:
+    case float_class_qnan:
+        parts_return_nan(a, s);
+        break;
+    case float_class_zero:
+    case float_class_inf:
+        break;
+    case float_class_normal:
+        a->exp += MIN(MAX(n, -0x10000), 0x10000);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Return log2(A)
+ */
+static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
+{
+    uint64_t a0, a1, r, t, ign;
+    FloatPartsN f;
+    int i, n, a_exp, f_exp;
+
+    if (unlikely(a->cls != float_class_normal)) {
+        switch (a->cls) {
+        case float_class_snan:
+        case float_class_qnan:
+            parts_return_nan(a, s);
+            return;
+        case float_class_zero:
+            /* log2(0) = -inf */
+            a->cls = float_class_inf;
+            a->sign = 1;
+            return;
+        case float_class_inf:
+            if (unlikely(a->sign)) {
+                goto d_nan;
+            }
+            return;
+        default:
+            break;
+        }
+        g_assert_not_reached();
+    }
+    if (unlikely(a->sign)) {
+        goto d_nan;
+    }
+
+    /* TODO: This algorithm looses bits too quickly for float128. */
+    g_assert(N == 64);
+
+    a_exp = a->exp;
+    f_exp = -1;
+
+    r = 0;
+    t = DECOMPOSED_IMPLICIT_BIT;
+    a0 = a->frac_hi;
+    a1 = 0;
+
+    n = fmt->frac_size + 2;
+    if (unlikely(a_exp == -1)) {
+        /*
+         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
+         * When the value is very close to 1.0, there are lots of 1's in
+         * the msb parts of the fraction.  At the end, when we subtract
+         * this value from -1.0, we can see a catastrophic loss of precision,
+         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
+         * bits of y in the final result.  To minimize this, compute as many
+         * digits as we can.
+         * ??? This case needs another algorithm to avoid this.
+         */
+        n = fmt->frac_size * 2 + 2;
+        /* Don't compute a value overlapping the sticky bit */
+        n = MIN(n, 62);
+    }
+
+    for (i = 0; i < n; i++) {
+        if (a1) {
+            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
+        } else if (a0 & 0xffffffffull) {
+            mul64To128(a0, a0, &a0, &a1);
+        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
+            a0 >>= 32;
+            a0 *= a0;
+        } else {
+            goto exact;
+        }
+
+        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
+            if (unlikely(a_exp == 0 && r == 0)) {
+                /*
+                 * When a_exp == 0, we're computing the log2 of a value
+                 * [1.0,2.0).  When the value is very close to 1.0, there
+                 * are lots of 0's in the msb parts of the fraction.
+                 * We need to compute more digits to produce a correct
+                 * result -- restart at the top of the fraction.
+                 * ??? This is likely to lose precision quickly, as for
+                 * float128; we may need another method.
+                 */
+                f_exp -= i;
+                t = r = DECOMPOSED_IMPLICIT_BIT;
+                i = 0;
+            } else {
+                r |= t;
+            }
+        } else {
+            add128(a0, a1, a0, a1, &a0, &a1);
+        }
+        t >>= 1;
+    }
+
+    /* Set sticky for inexact. */
+    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
+
+ exact:
+    parts_sint_to_float(a, a_exp, 0, s);
+    if (r == 0) {
+        return;
+    }
+
+    memset(&f, 0, sizeof(f));
+    f.cls = float_class_normal;
+    f.frac_hi = r;
+    f.exp = f_exp - frac_normalize(&f);
+
+    if (a_exp < 0) {
+        parts_sub_normal(a, &f);
+    } else if (a_exp > 0) {
+        parts_add_normal(a, &f);
+    } else {
+        *a = f;
+    }
+    return;
+
+ d_nan:
+    float_raise(float_flag_invalid, s);
+    parts_default_nan(a, s);
+}
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index c2f87addb25..f2ad0f335e6 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -103,7 +103,7 @@ static inline bool snan_bit_is_one(float_status *status)
 {
 #if defined(TARGET_MIPS)
     return status->snan_bit_is_one;
-#elif defined(TARGET_HPPA) || defined(TARGET_UNICORE32) || defined(TARGET_SH4)
+#elif defined(TARGET_HPPA) || defined(TARGET_SH4)
     return 1;
 #else
     return 0;
@@ -129,7 +129,7 @@ static bool parts_is_snan_frac(uint64_t frac, float_status *status)
 | The pattern for a default generated deconstructed floating-point NaN.
 *----------------------------------------------------------------------------*/
 
-static FloatParts parts_default_nan(float_status *status)
+static void parts64_default_nan(FloatParts64 *p, float_status *status)
 {
     bool sign = 0;
     uint64_t frac;
@@ -145,12 +145,14 @@ static FloatParts parts_default_nan(float_status *status)
 #elif defined(TARGET_HPPA)
     /* snan_bit_is_one, set msb-1.  */
     frac = 1ULL << (DECOMPOSED_BINARY_POINT - 2);
+#elif defined(TARGET_HEXAGON)
+    sign = 1;
+    frac = ~0ULL;
 #else
-    /* This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
-     * S390, SH4, TriCore, and Xtensa.  I cannot find documentation
-     * for Unicore32; the choice from the original commit is unchanged.
-     * Our other supported targets, CRIS, LM32, Moxie, Nios2, and Tile,
-     * do not have floating-point.
+    /*
+     * This case is true for Alpha, ARM, MIPS, OpenRISC, PPC, RISC-V,
+     * S390, SH4, TriCore, and Xtensa.  Our other supported targets,
+     * CRIS, Nios2, and Tile, do not have floating-point.
      */
     if (snan_bit_is_one(status)) {
         /* set all bits other than msb */
@@ -161,7 +163,7 @@ static FloatParts parts_default_nan(float_status *status)
     }
 #endif
 
-    return (FloatParts) {
+    *p = (FloatParts64) {
         .cls = float_class_qnan,
         .sign = sign,
         .exp = INT_MAX,
@@ -169,26 +171,54 @@ static FloatParts parts_default_nan(float_status *status)
     };
 }
 
+static void parts128_default_nan(FloatParts128 *p, float_status *status)
+{
+    /*
+     * Extrapolate from the choices made by parts64_default_nan to fill
+     * in the quad-floating format.  If the low bit is set, assume we
+     * want to set all non-snan bits.
+     */
+    FloatParts64 p64;
+    parts64_default_nan(&p64, status);
+
+    *p = (FloatParts128) {
+        .cls = float_class_qnan,
+        .sign = p64.sign,
+        .exp = INT_MAX,
+        .frac_hi = p64.frac,
+        .frac_lo = -(p64.frac & 1)
+    };
+}
+
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN from a signalling NaN for the deconstructed
 | floating-point parts.
 *----------------------------------------------------------------------------*/
 
-static FloatParts parts_silence_nan(FloatParts a, float_status *status)
+static uint64_t parts_silence_nan_frac(uint64_t frac, float_status *status)
 {
     g_assert(!no_signaling_nans(status));
-#if defined(TARGET_HPPA)
-    a.frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
-    a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
-#else
+
+    /* The only snan_bit_is_one target without default_nan_mode is HPPA. */
     if (snan_bit_is_one(status)) {
-        return parts_default_nan(status);
+        frac &= ~(1ULL << (DECOMPOSED_BINARY_POINT - 1));
+        frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 2);
     } else {
-        a.frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
+        frac |= 1ULL << (DECOMPOSED_BINARY_POINT - 1);
     }
-#endif
-    a.cls = float_class_qnan;
-    return a;
+    return frac;
+}
+
+static void parts64_silence_nan(FloatParts64 *p, float_status *status)
+{
+    p->frac = parts_silence_nan_frac(p->frac, status);
+    p->cls = float_class_qnan;
+}
+
+static void parts128_silence_nan(FloatParts128 *p, float_status *status)
+{
+    p->frac_hi = parts_silence_nan_frac(p->frac_hi, status);
+    p->cls = float_class_qnan;
 }
 
 /*----------------------------------------------------------------------------
@@ -225,26 +255,6 @@ floatx80 floatx80_default_nan(float_status *status)
 const floatx80 floatx80_infinity
     = make_floatx80_init(floatx80_infinity_high, floatx80_infinity_low);
 
-/*----------------------------------------------------------------------------
-| Raises the exceptions specified by `flags'.  Floating-point traps can be
-| defined here if desired.  It is currently not possible for such a trap
-| to substitute a result value.  If traps are not implemented, this routine
-| should be simply `float_exception_flags |= flags;'.
-*----------------------------------------------------------------------------*/
-
-void float_raise(uint8_t flags, float_status *status)
-{
-    status->float_exception_flags |= flags;
-}
-
-/*----------------------------------------------------------------------------
-| Internal canonical NaN format.
-*----------------------------------------------------------------------------*/
-typedef struct {
-    bool sign;
-    uint64_t high, low;
-} commonNaNT;
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -360,46 +370,6 @@ bool float32_is_signaling_nan(float32 a_, float_status *status)
     }
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point NaN
-| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
-| exception is raised.
-*----------------------------------------------------------------------------*/
-
-static commonNaNT float32ToCommonNaN(float32 a, float_status *status)
-{
-    commonNaNT z;
-
-    if (float32_is_signaling_nan(a, status)) {
-        float_raise(float_flag_invalid, status);
-    }
-    z.sign = float32_val(a) >> 31;
-    z.low = 0;
-    z.high = ((uint64_t)float32_val(a)) << 41;
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the single-
-| precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static float32 commonNaNToFloat32(commonNaNT a, float_status *status)
-{
-    uint32_t mantissa = a.high >> 41;
-
-    if (status->default_nan_mode) {
-        return float32_default_nan(status);
-    }
-
-    if (mantissa) {
-        return make_float32(
-            (((uint32_t)a.sign) << 31) | 0x7F800000 | (a.high >> 41));
-    } else {
-        return float32_default_nan(status);
-    }
-}
-
 /*----------------------------------------------------------------------------
 | Select which NaN to propagate for a two-input operation.
 | IEEE754 doesn't specify all the details of this, so the
@@ -624,6 +594,12 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
     } else {
         return 1;
     }
+#elif defined(TARGET_RISCV)
+    /* For RISC-V, InvalidOp is set when multiplicands are Inf and zero */
+    if (infzero) {
+        float_raise(float_flag_invalid, status);
+    }
+    return 3; /* default NaN */
 #elif defined(TARGET_XTENSA)
     /*
      * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns
@@ -664,62 +640,6 @@ static int pickNaNMulAdd(FloatClass a_cls, FloatClass b_cls, FloatClass c_cls,
 #endif
 }
 
-/*----------------------------------------------------------------------------
-| Takes two single-precision floating-point values `a' and `b', one of which
-| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
-| signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status)
-{
-    bool aIsLargerSignificand;
-    uint32_t av, bv;
-    FloatClass a_cls, b_cls;
-
-    /* This is not complete, but is good enough for pickNaN.  */
-    a_cls = (!float32_is_any_nan(a)
-             ? float_class_normal
-             : float32_is_signaling_nan(a, status)
-             ? float_class_snan
-             : float_class_qnan);
-    b_cls = (!float32_is_any_nan(b)
-             ? float_class_normal
-             : float32_is_signaling_nan(b, status)
-             ? float_class_snan
-             : float_class_qnan);
-
-    av = float32_val(a);
-    bv = float32_val(b);
-
-    if (is_snan(a_cls) || is_snan(b_cls)) {
-        float_raise(float_flag_invalid, status);
-    }
-
-    if (status->default_nan_mode) {
-        return float32_default_nan(status);
-    }
-
-    if ((uint32_t)(av << 1) < (uint32_t)(bv << 1)) {
-        aIsLargerSignificand = 0;
-    } else if ((uint32_t)(bv << 1) < (uint32_t)(av << 1)) {
-        aIsLargerSignificand = 1;
-    } else {
-        aIsLargerSignificand = (av < bv) ? 1 : 0;
-    }
-
-    if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
-        if (is_snan(b_cls)) {
-            return float32_silence_nan(b, status);
-        }
-        return b;
-    } else {
-        if (is_snan(a_cls)) {
-            return float32_silence_nan(a, status);
-        }
-        return a;
-    }
-}
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -760,104 +680,6 @@ bool float64_is_signaling_nan(float64 a_, float_status *status)
     }
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point NaN
-| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
-| exception is raised.
-*----------------------------------------------------------------------------*/
-
-static commonNaNT float64ToCommonNaN(float64 a, float_status *status)
-{
-    commonNaNT z;
-
-    if (float64_is_signaling_nan(a, status)) {
-        float_raise(float_flag_invalid, status);
-    }
-    z.sign = float64_val(a) >> 63;
-    z.low = 0;
-    z.high = float64_val(a) << 12;
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the double-
-| precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static float64 commonNaNToFloat64(commonNaNT a, float_status *status)
-{
-    uint64_t mantissa = a.high >> 12;
-
-    if (status->default_nan_mode) {
-        return float64_default_nan(status);
-    }
-
-    if (mantissa) {
-        return make_float64(
-              (((uint64_t) a.sign) << 63)
-            | UINT64_C(0x7FF0000000000000)
-            | (a.high >> 12));
-    } else {
-        return float64_default_nan(status);
-    }
-}
-
-/*----------------------------------------------------------------------------
-| Takes two double-precision floating-point values `a' and `b', one of which
-| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
-| signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status)
-{
-    bool aIsLargerSignificand;
-    uint64_t av, bv;
-    FloatClass a_cls, b_cls;
-
-    /* This is not complete, but is good enough for pickNaN.  */
-    a_cls = (!float64_is_any_nan(a)
-             ? float_class_normal
-             : float64_is_signaling_nan(a, status)
-             ? float_class_snan
-             : float_class_qnan);
-    b_cls = (!float64_is_any_nan(b)
-             ? float_class_normal
-             : float64_is_signaling_nan(b, status)
-             ? float_class_snan
-             : float_class_qnan);
-
-    av = float64_val(a);
-    bv = float64_val(b);
-
-    if (is_snan(a_cls) || is_snan(b_cls)) {
-        float_raise(float_flag_invalid, status);
-    }
-
-    if (status->default_nan_mode) {
-        return float64_default_nan(status);
-    }
-
-    if ((uint64_t)(av << 1) < (uint64_t)(bv << 1)) {
-        aIsLargerSignificand = 0;
-    } else if ((uint64_t)(bv << 1) < (uint64_t)(av << 1)) {
-        aIsLargerSignificand = 1;
-    } else {
-        aIsLargerSignificand = (av < bv) ? 1 : 0;
-    }
-
-    if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
-        if (is_snan(b_cls)) {
-            return float64_silence_nan(b, status);
-        }
-        return b;
-    } else {
-        if (is_snan(a_cls)) {
-            return float64_silence_nan(a, status);
-        }
-        return a;
-    }
-}
-
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
 | quiet NaN; otherwise returns 0. This slightly differs from the same
@@ -921,55 +743,6 @@ floatx80 floatx80_silence_nan(floatx80 a, float_status *status)
     return a;
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
-| invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static commonNaNT floatx80ToCommonNaN(floatx80 a, float_status *status)
-{
-    floatx80 dflt;
-    commonNaNT z;
-
-    if (floatx80_is_signaling_nan(a, status)) {
-        float_raise(float_flag_invalid, status);
-    }
-    if (a.low >> 63) {
-        z.sign = a.high >> 15;
-        z.low = 0;
-        z.high = a.low << 1;
-    } else {
-        dflt = floatx80_default_nan(status);
-        z.sign = dflt.high >> 15;
-        z.low = 0;
-        z.high = dflt.low << 1;
-    }
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the extended
-| double-precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status)
-{
-    floatx80 z;
-
-    if (status->default_nan_mode) {
-        return floatx80_default_nan(status);
-    }
-
-    if (a.high >> 1) {
-        z.low = UINT64_C(0x8000000000000000) | a.high >> 1;
-        z.high = (((uint16_t)a.sign) << 15) | 0x7FFF;
-    } else {
-        z = floatx80_default_nan(status);
-    }
-    return z;
-}
-
 /*----------------------------------------------------------------------------
 | Takes two extended double-precision floating-point values `a' and `b', one
 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
@@ -1061,111 +834,3 @@ bool float128_is_signaling_nan(float128 a, float_status *status)
         }
     }
 }
-
-/*----------------------------------------------------------------------------
-| Returns a quiet NaN from a signalling NaN for the quadruple-precision
-| floating point value `a'.
-*----------------------------------------------------------------------------*/
-
-float128 float128_silence_nan(float128 a, float_status *status)
-{
-    if (no_signaling_nans(status)) {
-        g_assert_not_reached();
-    } else {
-        if (snan_bit_is_one(status)) {
-            return float128_default_nan(status);
-        } else {
-            a.high |= UINT64_C(0x0000800000000000);
-            return a;
-        }
-    }
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point NaN
-| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
-| exception is raised.
-*----------------------------------------------------------------------------*/
-
-static commonNaNT float128ToCommonNaN(float128 a, float_status *status)
-{
-    commonNaNT z;
-
-    if (float128_is_signaling_nan(a, status)) {
-        float_raise(float_flag_invalid, status);
-    }
-    z.sign = a.high >> 63;
-    shortShift128Left(a.high, a.low, 16, &z.high, &z.low);
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the canonical NaN `a' to the quadruple-
-| precision floating-point format.
-*----------------------------------------------------------------------------*/
-
-static float128 commonNaNToFloat128(commonNaNT a, float_status *status)
-{
-    float128 z;
-
-    if (status->default_nan_mode) {
-        return float128_default_nan(status);
-    }
-
-    shift128Right(a.high, a.low, 16, &z.high, &z.low);
-    z.high |= (((uint64_t)a.sign) << 63) | UINT64_C(0x7FFF000000000000);
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Takes two quadruple-precision floating-point values `a' and `b', one of
-| which is a NaN, and returns the appropriate NaN result.  If either `a' or
-| `b' is a signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static float128 propagateFloat128NaN(float128 a, float128 b,
-                                     float_status *status)
-{
-    bool aIsLargerSignificand;
-    FloatClass a_cls, b_cls;
-
-    /* This is not complete, but is good enough for pickNaN.  */
-    a_cls = (!float128_is_any_nan(a)
-             ? float_class_normal
-             : float128_is_signaling_nan(a, status)
-             ? float_class_snan
-             : float_class_qnan);
-    b_cls = (!float128_is_any_nan(b)
-             ? float_class_normal
-             : float128_is_signaling_nan(b, status)
-             ? float_class_snan
-             : float_class_qnan);
-
-    if (is_snan(a_cls) || is_snan(b_cls)) {
-        float_raise(float_flag_invalid, status);
-    }
-
-    if (status->default_nan_mode) {
-        return float128_default_nan(status);
-    }
-
-    if (lt128(a.high << 1, a.low, b.high << 1, b.low)) {
-        aIsLargerSignificand = 0;
-    } else if (lt128(b.high << 1, b.low, a.high << 1, a.low)) {
-        aIsLargerSignificand = 1;
-    } else {
-        aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
-    }
-
-    if (pickNaN(a_cls, b_cls, aIsLargerSignificand, status)) {
-        if (is_snan(b_cls)) {
-            return float128_silence_nan(b, status);
-        }
-        return b;
-    } else {
-        if (is_snan(a_cls)) {
-            return float128_silence_nan(a, status);
-        }
-        return a;
-    }
-}
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 67cfa0fd82c..9a28720d82a 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -132,7 +132,7 @@ this code that are retained.
         if (unlikely(soft_t ## _is_denormal(*a))) {                     \
             *a = soft_t ## _set_sign(soft_t ## _zero,                   \
                                      soft_t ## _is_neg(*a));            \
-            s->float_exception_flags |= float_flag_input_denormal;      \
+            float_raise(float_flag_input_denormal, s);                  \
         }                                                               \
     }
 
@@ -360,7 +360,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f32_is_inf(ur))) {
-        s->float_exception_flags |= float_flag_overflow;
+        float_raise(float_flag_overflow, s);
     } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
         goto soft;
     }
@@ -391,7 +391,7 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
 
     ur.h = hard(ua.h, ub.h);
     if (unlikely(f64_is_inf(ur))) {
-        s->float_exception_flags |= float_flag_overflow;
+        float_raise(float_flag_overflow, s);
     } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
         goto soft;
     }
@@ -401,60 +401,6 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
     return soft(ua.s, ub.s, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the fraction bits of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline uint32_t extractFloat32Frac(float32 a)
-{
-    return float32_val(a) & 0x007FFFFF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline int extractFloat32Exp(float32 a)
-{
-    return (float32_val(a) >> 23) & 0xFF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the sign bit of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline bool extractFloat32Sign(float32 a)
-{
-    return float32_val(a) >> 31;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the fraction bits of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline uint64_t extractFloat64Frac(float64 a)
-{
-    return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline int extractFloat64Exp(float64 a)
-{
-    return (float64_val(a) >> 52) & 0x7FF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the sign bit of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline bool extractFloat64Sign(float64 a)
-{
-    return float64_val(a) >> 63;
-}
-
 /*
  * Classify a floating point number. Everything above float_class_qnan
  * is a NaN so cls >= float_class_qnan is any NaN.
@@ -469,6 +415,34 @@ typedef enum __attribute__ ((__packed__)) {
     float_class_snan,
 } FloatClass;
 
+#define float_cmask(bit)  (1u << (bit))
+
+enum {
+    float_cmask_zero    = float_cmask(float_class_zero),
+    float_cmask_normal  = float_cmask(float_class_normal),
+    float_cmask_inf     = float_cmask(float_class_inf),
+    float_cmask_qnan    = float_cmask(float_class_qnan),
+    float_cmask_snan    = float_cmask(float_class_snan),
+
+    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
+    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
+};
+
+/* Flags for parts_minmax. */
+enum {
+    /* Set for minimum; clear for maximum. */
+    minmax_ismin = 1,
+    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
+    minmax_isnum = 2,
+    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
+    minmax_ismag = 4,
+    /*
+     * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
+     * operations.
+     */
+    minmax_isnumber = 8,
+};
+
 /* Simple helpers for checking if, or what kind of, NaN we have */
 static inline __attribute__((unused)) bool is_nan(FloatClass c)
 {
@@ -486,26 +460,52 @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 }
 
 /*
- * Structure holding all of the decomposed parts of a float. The
- * exponent is unbiased and the fraction is normalized. All
- * calculations are done with a 64 bit fraction and then rounded as
- * appropriate for the final format.
+ * Structure holding all of the decomposed parts of a float.
+ * The exponent is unbiased and the fraction is normalized.
  *
- * Thanks to the packed FloatClass a decent compiler should be able to
- * fit the whole structure into registers and avoid using the stack
- * for parameter passing.
+ * The fraction words are stored in big-endian word ordering,
+ * so that truncation from a larger format to a smaller format
+ * can be done simply by ignoring subsequent elements.
  */
 
 typedef struct {
-    uint64_t frac;
-    int32_t  exp;
     FloatClass cls;
     bool sign;
-} FloatParts;
+    int32_t exp;
+    union {
+        /* Routines that know the structure may reference the singular name. */
+        uint64_t frac;
+        /*
+         * Routines expanded with multiple structures reference "hi" and "lo"
+         * depending on the operation.  In FloatParts64, "hi" and "lo" are
+         * both the same word and aliased here.
+         */
+        uint64_t frac_hi;
+        uint64_t frac_lo;
+    };
+} FloatParts64;
+
+typedef struct {
+    FloatClass cls;
+    bool sign;
+    int32_t exp;
+    uint64_t frac_hi;
+    uint64_t frac_lo;
+} FloatParts128;
 
-#define DECOMPOSED_BINARY_POINT    (64 - 2)
+typedef struct {
+    FloatClass cls;
+    bool sign;
+    int32_t exp;
+    uint64_t frac_hi;
+    uint64_t frac_hm;  /* high-middle */
+    uint64_t frac_lm;  /* low-middle */
+    uint64_t frac_lo;
+} FloatParts256;
+
+/* These apply to the most significant word of each FloatPartsN. */
+#define DECOMPOSED_BINARY_POINT    63
 #define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
-#define DECOMPOSED_OVERFLOW_BIT    (DECOMPOSED_IMPLICIT_BIT << 1)
 
 /* Structure holding all of the relevant parameters for a format.
  *   exp_size: the size of the exponent field
@@ -514,9 +514,7 @@ typedef struct {
  *   frac_size: the size of the fraction field
  *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
  * The following are computed based the size of fraction
- *   frac_lsb: least significant bit of fraction
- *   frac_lsbm1: the bit below the least significant bit (for rounding)
- *   round_mask/roundeven_mask: masks used for rounding
+ *   round_mask: bits below lsb which must be rounded
  * The following optional modifiers are available:
  *   arm_althp: handle ARM Alternative Half Precision
  */
@@ -526,24 +524,21 @@ typedef struct {
     int exp_max;
     int frac_size;
     int frac_shift;
-    uint64_t frac_lsb;
-    uint64_t frac_lsbm1;
-    uint64_t round_mask;
-    uint64_t roundeven_mask;
     bool arm_althp;
+    uint64_t round_mask;
 } FloatFmt;
 
 /* Expand fields based on the size of exponent and fraction */
-#define FLOAT_PARAMS(E, F)                                           \
-    .exp_size       = E,                                             \
-    .exp_bias       = ((1 << E) - 1) >> 1,                           \
-    .exp_max        = (1 << E) - 1,                                  \
-    .frac_size      = F,                                             \
-    .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \
-    .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \
-    .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \
-    .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \
-    .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
+#define FLOAT_PARAMS_(E)                                \
+    .exp_size       = E,                                \
+    .exp_bias       = ((1 << E) - 1) >> 1,              \
+    .exp_max        = (1 << E) - 1
+
+#define FLOAT_PARAMS(E, F)                              \
+    FLOAT_PARAMS_(E),                                   \
+    .frac_size      = F,                                \
+    .frac_shift     = (-F - 1) & 63,                    \
+    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 
 static const FloatFmt float16_params = {
     FLOAT_PARAMS(5, 10)
@@ -566,65 +561,123 @@ static const FloatFmt float64_params = {
     FLOAT_PARAMS(11, 52)
 };
 
+static const FloatFmt float128_params = {
+    FLOAT_PARAMS(15, 112)
+};
+
+#define FLOATX80_PARAMS(R)              \
+    FLOAT_PARAMS_(15),                  \
+    .frac_size = R == 64 ? 63 : R,      \
+    .frac_shift = 0,                    \
+    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
+
+static const FloatFmt floatx80_params[3] = {
+    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
+    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
+    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
+};
+
 /* Unpack a float to parts, but do not canonicalize.  */
-static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
+static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 {
-    const int sign_pos = fmt.frac_size + fmt.exp_size;
+    const int f_size = fmt->frac_size;
+    const int e_size = fmt->exp_size;
 
-    return (FloatParts) {
+    *r = (FloatParts64) {
         .cls = float_class_unclassified,
-        .sign = extract64(raw, sign_pos, 1),
-        .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
-        .frac = extract64(raw, 0, fmt.frac_size),
+        .sign = extract64(raw, f_size + e_size, 1),
+        .exp = extract64(raw, f_size, e_size),
+        .frac = extract64(raw, 0, f_size)
     };
 }
 
-static inline FloatParts float16_unpack_raw(float16 f)
+static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
+{
+    unpack_raw64(p, &float16_params, f);
+}
+
+static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
+{
+    unpack_raw64(p, &bfloat16_params, f);
+}
+
+static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 {
-    return unpack_raw(float16_params, f);
+    unpack_raw64(p, &float32_params, f);
 }
 
-static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
+static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 {
-    return unpack_raw(bfloat16_params, f);
+    unpack_raw64(p, &float64_params, f);
 }
 
-static inline FloatParts float32_unpack_raw(float32 f)
+static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 {
-    return unpack_raw(float32_params, f);
+    *p = (FloatParts128) {
+        .cls = float_class_unclassified,
+        .sign = extract32(f.high, 15, 1),
+        .exp = extract32(f.high, 0, 15),
+        .frac_hi = f.low
+    };
 }
 
-static inline FloatParts float64_unpack_raw(float64 f)
+static void float128_unpack_raw(FloatParts128 *p, float128 f)
 {
-    return unpack_raw(float64_params, f);
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+
+    *p = (FloatParts128) {
+        .cls = float_class_unclassified,
+        .sign = extract64(f.high, f_size + e_size, 1),
+        .exp = extract64(f.high, f_size, e_size),
+        .frac_hi = extract64(f.high, 0, f_size),
+        .frac_lo = f.low,
+    };
 }
 
 /* Pack a float from parts, but do not canonicalize.  */
-static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
+static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
+{
+    const int f_size = fmt->frac_size;
+    const int e_size = fmt->exp_size;
+    uint64_t ret;
+
+    ret = (uint64_t)p->sign << (f_size + e_size);
+    ret = deposit64(ret, f_size, e_size, p->exp);
+    ret = deposit64(ret, 0, f_size, p->frac);
+    return ret;
+}
+
+static inline float16 float16_pack_raw(const FloatParts64 *p)
 {
-    const int sign_pos = fmt.frac_size + fmt.exp_size;
-    uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
-    return deposit64(ret, sign_pos, 1, p.sign);
+    return make_float16(pack_raw64(p, &float16_params));
 }
 
-static inline float16 float16_pack_raw(FloatParts p)
+static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 {
-    return make_float16(pack_raw(float16_params, p));
+    return pack_raw64(p, &bfloat16_params);
 }
 
-static inline bfloat16 bfloat16_pack_raw(FloatParts p)
+static inline float32 float32_pack_raw(const FloatParts64 *p)
 {
-    return pack_raw(bfloat16_params, p);
+    return make_float32(pack_raw64(p, &float32_params));
 }
 
-static inline float32 float32_pack_raw(FloatParts p)
+static inline float64 float64_pack_raw(const FloatParts64 *p)
 {
-    return make_float32(pack_raw(float32_params, p));
+    return make_float64(pack_raw64(p, &float64_params));
 }
 
-static inline float64 float64_pack_raw(FloatParts p)
+static float128 float128_pack_raw(const FloatParts128 *p)
 {
-    return make_float64(pack_raw(float64_params, p));
+    const int f_size = float128_params.frac_size - 64;
+    const int e_size = float128_params.exp_size;
+    uint64_t hi;
+
+    hi = (uint64_t)p->sign << (f_size + e_size);
+    hi = deposit64(hi, f_size, e_size, p->exp);
+    hi = deposit64(hi, 0, f_size, p->frac_hi);
+    return make_float128(hi, p->frac_lo);
 }
 
 /*----------------------------------------------------------------------------
@@ -637,7003 +690,4175 @@ static inline float64 float64_pack_raw(FloatParts p)
 *----------------------------------------------------------------------------*/
 #include "softfloat-specialize.c.inc"
 
-/* Canonicalize EXP and FRAC, setting CLS.  */
-static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
-                                  float_status *status)
-{
-    if (part.exp == parm->exp_max && !parm->arm_althp) {
-        if (part.frac == 0) {
-            part.cls = float_class_inf;
-        } else {
-            part.frac <<= parm->frac_shift;
-            part.cls = (parts_is_snan_frac(part.frac, status)
-                        ? float_class_snan : float_class_qnan);
-        }
-    } else if (part.exp == 0) {
-        if (likely(part.frac == 0)) {
-            part.cls = float_class_zero;
-        } else if (status->flush_inputs_to_zero) {
-            float_raise(float_flag_input_denormal, status);
-            part.cls = float_class_zero;
-            part.frac = 0;
-        } else {
-            int shift = clz64(part.frac) - 1;
-            part.cls = float_class_normal;
-            part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
-            part.frac <<= shift;
-        }
-    } else {
-        part.cls = float_class_normal;
-        part.exp -= parm->exp_bias;
-        part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
-    }
-    return part;
-}
+#define PARTS_GENERIC_64_128(NAME, P) \
+    _Generic((P), FloatParts64 *: parts64_##NAME, \
+                  FloatParts128 *: parts128_##NAME)
 
-/* Round and uncanonicalize a floating-point number by parts. There
- * are FRAC_SHIFT bits that may require rounding at the bottom of the
- * fraction; these bits will be removed. The exponent will be biased
- * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
- */
+#define PARTS_GENERIC_64_128_256(NAME, P) \
+    _Generic((P), FloatParts64 *: parts64_##NAME, \
+                  FloatParts128 *: parts128_##NAME, \
+                  FloatParts256 *: parts256_##NAME)
 
-static FloatParts round_canonical(FloatParts p, float_status *s,
-                                  const FloatFmt *parm)
-{
-    const uint64_t frac_lsb = parm->frac_lsb;
-    const uint64_t frac_lsbm1 = parm->frac_lsbm1;
-    const uint64_t round_mask = parm->round_mask;
-    const uint64_t roundeven_mask = parm->roundeven_mask;
-    const int exp_max = parm->exp_max;
-    const int frac_shift = parm->frac_shift;
-    uint64_t frac, inc;
-    int exp, flags = 0;
-    bool overflow_norm;
+#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
+#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 
-    frac = p.frac;
-    exp = p.exp;
+static void parts64_return_nan(FloatParts64 *a, float_status *s);
+static void parts128_return_nan(FloatParts128 *a, float_status *s);
 
-    switch (p.cls) {
-    case float_class_normal:
-        switch (s->float_rounding_mode) {
-        case float_round_nearest_even:
-            overflow_norm = false;
-            inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
-            break;
-        case float_round_ties_away:
-            overflow_norm = false;
-            inc = frac_lsbm1;
-            break;
-        case float_round_to_zero:
-            overflow_norm = true;
-            inc = 0;
-            break;
-        case float_round_up:
-            inc = p.sign ? 0 : round_mask;
-            overflow_norm = p.sign;
-            break;
-        case float_round_down:
-            inc = p.sign ? round_mask : 0;
-            overflow_norm = !p.sign;
-            break;
-        case float_round_to_odd:
-            overflow_norm = true;
-            inc = frac & frac_lsb ? 0 : round_mask;
-            break;
-        default:
-            g_assert_not_reached();
-        }
+#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 
-        exp += parm->exp_bias;
-        if (likely(exp > 0)) {
-            if (frac & round_mask) {
-                flags |= float_flag_inexact;
-                frac += inc;
-                if (frac & DECOMPOSED_OVERFLOW_BIT) {
-                    frac >>= 1;
-                    exp++;
-                }
-            }
-            frac >>= frac_shift;
-
-            if (parm->arm_althp) {
-                /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
-                if (unlikely(exp > exp_max)) {
-                    /* Overflow.  Return the maximum normal.  */
-                    flags = float_flag_invalid;
-                    exp = exp_max;
-                    frac = -1;
-                }
-            } else if (unlikely(exp >= exp_max)) {
-                flags |= float_flag_overflow | float_flag_inexact;
-                if (overflow_norm) {
-                    exp = exp_max - 1;
-                    frac = -1;
-                } else {
-                    p.cls = float_class_inf;
-                    goto do_inf;
-                }
-            }
-        } else if (s->flush_to_zero) {
-            flags |= float_flag_output_denormal;
-            p.cls = float_class_zero;
-            goto do_zero;
-        } else {
-            bool is_tiny = s->tininess_before_rounding
-                        || (exp < 0)
-                        || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
-
-            shift64RightJamming(frac, 1 - exp, &frac);
-            if (frac & round_mask) {
-                /* Need to recompute round-to-even.  */
-                switch (s->float_rounding_mode) {
-                case float_round_nearest_even:
-                    inc = ((frac & roundeven_mask) != frac_lsbm1
-                           ? frac_lsbm1 : 0);
-                    break;
-                case float_round_to_odd:
-                    inc = frac & frac_lsb ? 0 : round_mask;
-                    break;
-                default:
-                    break;
-                }
-                flags |= float_flag_inexact;
-                frac += inc;
-            }
+static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
+                                      float_status *s);
+static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
+                                        float_status *s);
 
-            exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
-            frac >>= frac_shift;
+#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 
-            if (is_tiny && (flags & float_flag_inexact)) {
-                flags |= float_flag_underflow;
-            }
-            if (exp == 0 && frac == 0) {
-                p.cls = float_class_zero;
-            }
-        }
-        break;
+static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
+                                             FloatParts64 *c, float_status *s,
+                                             int ab_mask, int abc_mask);
+static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
+                                               FloatParts128 *b,
+                                               FloatParts128 *c,
+                                               float_status *s,
+                                               int ab_mask, int abc_mask);
 
-    case float_class_zero:
-    do_zero:
-        exp = 0;
-        frac = 0;
-        break;
+#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
+    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 
-    case float_class_inf:
-    do_inf:
-        assert(!parm->arm_althp);
-        exp = exp_max;
-        frac = 0;
-        break;
+static void parts64_canonicalize(FloatParts64 *p, float_status *status,
+                                 const FloatFmt *fmt);
+static void parts128_canonicalize(FloatParts128 *p, float_status *status,
+                                  const FloatFmt *fmt);
 
-    case float_class_qnan:
-    case float_class_snan:
-        assert(!parm->arm_althp);
-        exp = exp_max;
-        frac >>= parm->frac_shift;
-        break;
+#define parts_canonicalize(A, S, F) \
+    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 
-    default:
-        g_assert_not_reached();
-    }
+static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
+                                   const FloatFmt *fmt);
+static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
+                                    const FloatFmt *fmt);
 
-    float_raise(flags, s);
-    p.exp = exp;
-    p.frac = frac;
-    return p;
-}
+#define parts_uncanon_normal(A, S, F) \
+    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
+
+static void parts64_uncanon(FloatParts64 *p, float_status *status,
+                            const FloatFmt *fmt);
+static void parts128_uncanon(FloatParts128 *p, float_status *status,
+                             const FloatFmt *fmt);
+
+#define parts_uncanon(A, S, F) \
+    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
+
+static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
+static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
+static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
+
+#define parts_add_normal(A, B) \
+    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 
-/* Explicit FloatFmt version */
-static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
-                                            const FloatFmt *params)
+static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
+static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
+static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
+
+#define parts_sub_normal(A, B) \
+    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
+
+static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
+                                    float_status *s, bool subtract);
+static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
+                                      float_status *s, bool subtract);
+
+#define parts_addsub(A, B, S, Z) \
+    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
+
+static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
+                                 float_status *s);
+static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
+                                   float_status *s);
+
+#define parts_mul(A, B, S) \
+    PARTS_GENERIC_64_128(mul, A)(A, B, S)
+
+static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
+                                    FloatParts64 *c, int flags,
+                                    float_status *s);
+static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
+                                      FloatParts128 *c, int flags,
+                                      float_status *s);
+
+#define parts_muladd(A, B, C, Z, S) \
+    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+
+static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
+                                 float_status *s);
+static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
+                                   float_status *s);
+
+#define parts_div(A, B, S) \
+    PARTS_GENERIC_64_128(div, A)(A, B, S)
+
+static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
+                                    uint64_t *mod_quot, float_status *s);
+static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
+                                      uint64_t *mod_quot, float_status *s);
+
+#define parts_modrem(A, B, Q, S) \
+    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
+
+static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
+static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
+
+#define parts_sqrt(A, S, F) \
+    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
+
+static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
+                                        int scale, int frac_size);
+static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
+                                         int scale, int frac_size);
+
+#define parts_round_to_int_normal(A, R, C, F) \
+    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
+
+static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
+                                 int scale, float_status *s,
+                                 const FloatFmt *fmt);
+static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
+                                  int scale, float_status *s,
+                                  const FloatFmt *fmt);
+
+#define parts_round_to_int(A, R, C, S, F) \
+    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
+
+static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s);
+static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
+                                     int scale, int64_t min, int64_t max,
+                                     float_status *s);
+
+#define parts_float_to_sint(P, R, Z, MN, MX, S) \
+    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
+
+static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
+                                      int scale, uint64_t max,
+                                      float_status *s);
+static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
+                                       int scale, uint64_t max,
+                                       float_status *s);
+
+#define parts_float_to_uint(P, R, Z, M, S) \
+    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
+
+static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
+                                  int scale, float_status *s);
+static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
+                                   int scale, float_status *s);
+
+#define parts_sint_to_float(P, I, Z, S) \
+    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
+
+static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
+                                  int scale, float_status *s);
+static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
+                                   int scale, float_status *s);
+
+#define parts_uint_to_float(P, I, Z, S) \
+    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
+
+static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
+                                    float_status *s, int flags);
+static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
+                                      float_status *s, int flags);
+
+#define parts_minmax(A, B, S, F) \
+    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
+
+static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
+                           float_status *s, bool q);
+static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
+                            float_status *s, bool q);
+
+#define parts_compare(A, B, S, Q) \
+    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
+
+static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
+static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
+
+#define parts_scalbn(A, N, S) \
+    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
+
+static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
+static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
+
+#define parts_log2(A, S, F) \
+    PARTS_GENERIC_64_128(log2, A)(A, S, F)
+
+/*
+ * Helper functions for softfloat-parts.c.inc, per-size operations.
+ */
+
+#define FRAC_GENERIC_64_128(NAME, P) \
+    _Generic((P), FloatParts64 *: frac64_##NAME, \
+                  FloatParts128 *: frac128_##NAME)
+
+#define FRAC_GENERIC_64_128_256(NAME, P) \
+    _Generic((P), FloatParts64 *: frac64_##NAME, \
+                  FloatParts128 *: frac128_##NAME, \
+                  FloatParts256 *: frac256_##NAME)
+
+static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 {
-    return sf_canonicalize(float16_unpack_raw(f), params, s);
+    return uadd64_overflow(a->frac, b->frac, &r->frac);
 }
 
-static FloatParts float16_unpack_canonical(float16 f, float_status *s)
+static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 {
-    return float16a_unpack_canonical(f, s, &float16_params);
+    bool c = 0;
+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+    return c;
 }
 
-static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 {
-    return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
+    bool c = 0;
+    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
+    return c;
 }
 
-static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
-                                             const FloatFmt *params)
+#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
+
+static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 {
-    return float16_pack_raw(round_canonical(p, s, params));
+    return uadd64_overflow(a->frac, c, &r->frac);
 }
 
-static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
+static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 {
-    return float16a_round_pack_canonical(p, s, &float16_params);
+    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
+    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 }
 
-static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
+#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
+
+static void frac64_allones(FloatParts64 *a)
 {
-    return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
+    a->frac = -1;
 }
 
-static FloatParts float32_unpack_canonical(float32 f, float_status *s)
+static void frac128_allones(FloatParts128 *a)
 {
-    return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
+    a->frac_hi = a->frac_lo = -1;
 }
 
-static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
+#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
+
+static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 {
-    return float32_pack_raw(round_canonical(p, s, &float32_params));
+    return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 }
 
-static FloatParts float64_unpack_canonical(float64 f, float_status *s)
+static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 {
-    return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
+    uint64_t ta = a->frac_hi, tb = b->frac_hi;
+    if (ta == tb) {
+        ta = a->frac_lo, tb = b->frac_lo;
+        if (ta == tb) {
+            return 0;
+        }
+    }
+    return ta < tb ? -1 : 1;
 }
 
-static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
+#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
+
+static void frac64_clear(FloatParts64 *a)
 {
-    return float64_pack_raw(round_canonical(p, s, &float64_params));
+    a->frac = 0;
 }
 
-static FloatParts return_nan(FloatParts a, float_status *s)
+static void frac128_clear(FloatParts128 *a)
 {
-    switch (a.cls) {
-    case float_class_snan:
-        s->float_exception_flags |= float_flag_invalid;
-        a = parts_silence_nan(a, s);
-        /* fall through */
-    case float_class_qnan:
-        if (s->default_nan_mode) {
-            return parts_default_nan(s);
-        }
-        break;
-
-    default:
-        g_assert_not_reached();
-    }
-    return a;
+    a->frac_hi = a->frac_lo = 0;
 }
 
-static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
+#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
+
+static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 {
-    if (is_snan(a.cls) || is_snan(b.cls)) {
-        s->float_exception_flags |= float_flag_invalid;
-    }
+    uint64_t n1, n0, r, q;
+    bool ret;
 
-    if (s->default_nan_mode) {
-        return parts_default_nan(s);
+    /*
+     * We want a 2*N / N-bit division to produce exactly an N-bit
+     * result, so that we do not lose any precision and so that we
+     * do not have to renormalize afterward.  If A.frac < B.frac,
+     * then division would produce an (N-1)-bit result; shift A left
+     * by one to produce the an N-bit result, and return true to
+     * decrement the exponent to match.
+     *
+     * The udiv_qrnnd algorithm that we're using requires normalization,
+     * i.e. the msb of the denominator must be set, which is already true.
+     */
+    ret = a->frac < b->frac;
+    if (ret) {
+        n0 = a->frac;
+        n1 = 0;
     } else {
-        if (pickNaN(a.cls, b.cls,
-                    a.frac > b.frac ||
-                    (a.frac == b.frac && a.sign < b.sign), s)) {
-            a = b;
-        }
-        if (is_snan(a.cls)) {
-            return parts_silence_nan(a, s);
-        }
+        n0 = a->frac >> 1;
+        n1 = a->frac << 63;
     }
-    return a;
+    q = udiv_qrnnd(&r, n0, n1, b->frac);
+
+    /* Set lsb if there is a remainder, to set inexact. */
+    a->frac = q | (r != 0);
+
+    return ret;
 }
 
-static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
-                                  bool inf_zero, float_status *s)
+static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
 {
-    int which;
-
-    if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
-        s->float_exception_flags |= float_flag_invalid;
-    }
+    uint64_t q0, q1, a0, a1, b0, b1;
+    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
+    bool ret = false;
 
-    which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
+    a0 = a->frac_hi, a1 = a->frac_lo;
+    b0 = b->frac_hi, b1 = b->frac_lo;
 
-    if (s->default_nan_mode) {
-        /* Note that this check is after pickNaNMulAdd so that function
-         * has an opportunity to set the Invalid flag.
-         */
-        which = 3;
+    ret = lt128(a0, a1, b0, b1);
+    if (!ret) {
+        a1 = shr_double(a0, a1, 1);
+        a0 = a0 >> 1;
     }
 
-    switch (which) {
-    case 0:
-        break;
-    case 1:
-        a = b;
-        break;
-    case 2:
-        a = c;
-        break;
-    case 3:
-        return parts_default_nan(s);
-    default:
-        g_assert_not_reached();
-    }
+    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
+    q0 = estimateDiv128To64(a0, a1, b0);
 
-    if (is_snan(a.cls)) {
-        return parts_silence_nan(a, s);
+    /*
+     * Estimate is high because B1 was not included (unless B1 == 0).
+     * Reduce quotient and increase remainder until remainder is non-negative.
+     * This loop will execute 0 to 2 times.
+     */
+    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
+    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
+    while (r0 != 0) {
+        q0--;
+        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
     }
-    return a;
-}
 
-/*
- * Returns the result of adding or subtracting the values of the
- * floating-point values `a' and `b'. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
- */
+    /* Repeat using the remainder, producing a second word of quotient. */
+    q1 = estimateDiv128To64(r1, r2, b0);
+    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
+    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
+    while (r1 != 0) {
+        q1--;
+        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
+    }
 
-static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
-                                float_status *s)
-{
-    bool a_sign = a.sign;
-    bool b_sign = b.sign ^ subtract;
-
-    if (a_sign != b_sign) {
-        /* Subtraction */
-
-        if (a.cls == float_class_normal && b.cls == float_class_normal) {
-            if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
-                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
-                a.frac = a.frac - b.frac;
-            } else {
-                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
-                a.frac = b.frac - a.frac;
-                a.exp = b.exp;
-                a_sign ^= 1;
-            }
+    /* Any remainder indicates inexact; set sticky bit. */
+    q1 |= (r2 | r3) != 0;
 
-            if (a.frac == 0) {
-                a.cls = float_class_zero;
-                a.sign = s->float_rounding_mode == float_round_down;
-            } else {
-                int shift = clz64(a.frac) - 1;
-                a.frac = a.frac << shift;
-                a.exp = a.exp - shift;
-                a.sign = a_sign;
-            }
-            return a;
-        }
-        if (is_nan(a.cls) || is_nan(b.cls)) {
-            return pick_nan(a, b, s);
-        }
-        if (a.cls == float_class_inf) {
-            if (b.cls == float_class_inf) {
-                float_raise(float_flag_invalid, s);
-                return parts_default_nan(s);
-            }
-            return a;
-        }
-        if (a.cls == float_class_zero && b.cls == float_class_zero) {
-            a.sign = s->float_rounding_mode == float_round_down;
-            return a;
-        }
-        if (a.cls == float_class_zero || b.cls == float_class_inf) {
-            b.sign = a_sign ^ 1;
-            return b;
-        }
-        if (b.cls == float_class_zero) {
-            return a;
-        }
-    } else {
-        /* Addition */
-        if (a.cls == float_class_normal && b.cls == float_class_normal) {
-            if (a.exp > b.exp) {
-                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
-            } else if (a.exp < b.exp) {
-                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
-                a.exp = b.exp;
-            }
-            a.frac += b.frac;
-            if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
-                shift64RightJamming(a.frac, 1, &a.frac);
-                a.exp += 1;
-            }
-            return a;
-        }
-        if (is_nan(a.cls) || is_nan(b.cls)) {
-            return pick_nan(a, b, s);
-        }
-        if (a.cls == float_class_inf || b.cls == float_class_zero) {
-            return a;
-        }
-        if (b.cls == float_class_inf || a.cls == float_class_zero) {
-            b.sign = b_sign;
-            return b;
-        }
-    }
-    g_assert_not_reached();
+    a->frac_hi = q0;
+    a->frac_lo = q1;
+    return ret;
 }
 
-/*
- * Returns the result of adding or subtracting the floating-point
- * values `a' and `b'. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
+#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
 
-float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
+static bool frac64_eqz(FloatParts64 *a)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, false, status);
-
-    return float16_round_pack_canonical(pr, status);
+    return a->frac == 0;
 }
 
-float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
+static bool frac128_eqz(FloatParts128 *a)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, true, status);
-
-    return float16_round_pack_canonical(pr, status);
+    return (a->frac_hi | a->frac_lo) == 0;
 }
 
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
-{
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, subtract, status);
-
-    return float32_round_pack_canonical(pr, status);
-}
+#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
 
-static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
+static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
 {
-    return soft_f32_addsub(a, b, false, status);
+    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
 }
 
-static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
+static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
 {
-    return soft_f32_addsub(a, b, true, status);
+    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
+                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
 }
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
-{
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, subtract, status);
+#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
 
-    return float64_round_pack_canonical(pr, status);
+static void frac64_neg(FloatParts64 *a)
+{
+    a->frac = -a->frac;
 }
 
-static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
+static void frac128_neg(FloatParts128 *a)
 {
-    return soft_f64_addsub(a, b, false, status);
+    bool c = 0;
+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
 }
 
-static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
+static void frac256_neg(FloatParts256 *a)
 {
-    return soft_f64_addsub(a, b, true, status);
+    bool c = 0;
+    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
+    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
+    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
+    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
 }
 
-static float hard_f32_add(float a, float b)
+#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
+
+static int frac64_normalize(FloatParts64 *a)
 {
-    return a + b;
+    if (a->frac) {
+        int shift = clz64(a->frac);
+        a->frac <<= shift;
+        return shift;
+    }
+    return 64;
 }
 
-static float hard_f32_sub(float a, float b)
+static int frac128_normalize(FloatParts128 *a)
 {
-    return a - b;
+    if (a->frac_hi) {
+        int shl = clz64(a->frac_hi);
+        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
+        a->frac_lo <<= shl;
+        return shl;
+    } else if (a->frac_lo) {
+        int shl = clz64(a->frac_lo);
+        a->frac_hi = a->frac_lo << shl;
+        a->frac_lo = 0;
+        return shl + 64;
+    }
+    return 128;
 }
 
-static double hard_f64_add(double a, double b)
+static int frac256_normalize(FloatParts256 *a)
 {
-    return a + b;
-}
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    int ret, shl;
 
-static double hard_f64_sub(double a, double b)
-{
-    return a - b;
+    if (likely(a0)) {
+        shl = clz64(a0);
+        if (shl == 0) {
+            return 0;
+        }
+        ret = shl;
+    } else {
+        if (a1) {
+            ret = 64;
+            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
+        } else if (a2) {
+            ret = 128;
+            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
+        } else if (a3) {
+            ret = 192;
+            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
+        } else {
+            ret = 256;
+            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+            goto done;
+        }
+        shl = clz64(a0);
+        if (shl == 0) {
+            goto done;
+        }
+        ret += shl;
+    }
+
+    a0 = shl_double(a0, a1, shl);
+    a1 = shl_double(a1, a2, shl);
+    a2 = shl_double(a2, a3, shl);
+    a3 <<= shl;
+
+ done:
+    a->frac_hi = a0;
+    a->frac_hm = a1;
+    a->frac_lm = a2;
+    a->frac_lo = a3;
+    return ret;
 }
 
-static bool f32_addsubmul_post(union_float32 a, union_float32 b)
+#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
+
+static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
 {
-    if (QEMU_HARDFLOAT_2F32_USE_FP) {
-        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
+    uint64_t a0, a1, b0, t0, t1, q, quot;
+    int exp_diff = a->exp - b->exp;
+    int shift;
+
+    a0 = a->frac;
+    a1 = 0;
+
+    if (exp_diff < -1) {
+        if (mod_quot) {
+            *mod_quot = 0;
+        }
+        return;
+    }
+    if (exp_diff == -1) {
+        a0 >>= 1;
+        exp_diff = 0;
     }
-    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
-}
 
-static bool f64_addsubmul_post(union_float64 a, union_float64 b)
-{
-    if (QEMU_HARDFLOAT_2F64_USE_FP) {
-        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
+    b0 = b->frac;
+    quot = q = b0 <= a0;
+    if (q) {
+        a0 -= b0;
+    }
+
+    exp_diff -= 64;
+    while (exp_diff > 0) {
+        q = estimateDiv128To64(a0, a1, b0);
+        q = q > 2 ? q - 2 : 0;
+        mul64To128(b0, q, &t0, &t1);
+        sub128(a0, a1, t0, t1, &a0, &a1);
+        shortShift128Left(a0, a1, 62, &a0, &a1);
+        exp_diff -= 62;
+        quot = (quot << 62) + q;
+    }
+
+    exp_diff += 64;
+    if (exp_diff > 0) {
+        q = estimateDiv128To64(a0, a1, b0);
+        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
+        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
+        sub128(a0, a1, t0, t1, &a0, &a1);
+        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
+        while (le128(t0, t1, a0, a1)) {
+            ++q;
+            sub128(a0, a1, t0, t1, &a0, &a1);
+        }
+        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
     } else {
-        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
+        t0 = b0;
+        t1 = 0;
     }
-}
 
-static float32 float32_addsub(float32 a, float32 b, float_status *s,
-                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
-{
-    return float32_gen2(a, b, s, hard, soft,
-                        f32_is_zon2, f32_addsubmul_post);
-}
+    if (mod_quot) {
+        *mod_quot = quot;
+    } else {
+        sub128(t0, t1, a0, a1, &t0, &t1);
+        if (lt128(t0, t1, a0, a1) ||
+            (eq128(t0, t1, a0, a1) && (q & 1))) {
+            a0 = t0;
+            a1 = t1;
+            a->sign = !a->sign;
+        }
+    }
+
+    if (likely(a0)) {
+        shift = clz64(a0);
+        shortShift128Left(a0, a1, shift, &a0, &a1);
+    } else if (likely(a1)) {
+        shift = clz64(a1);
+        a0 = a1 << shift;
+        a1 = 0;
+        shift += 64;
+    } else {
+        a->cls = float_class_zero;
+        return;
+    }
 
-static float64 float64_addsub(float64 a, float64 b, float_status *s,
-                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
-{
-    return float64_gen2(a, b, s, hard, soft,
-                        f64_is_zon2, f64_addsubmul_post);
+    a->exp = b->exp + exp_diff - shift;
+    a->frac = a0 | (a1 != 0);
 }
 
-float32 QEMU_FLATTEN
-float32_add(float32 a, float32 b, float_status *s)
+static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
+                           uint64_t *mod_quot)
 {
-    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
+    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
+    int exp_diff = a->exp - b->exp;
+    int shift;
+
+    a0 = a->frac_hi;
+    a1 = a->frac_lo;
+    a2 = 0;
+
+    if (exp_diff < -1) {
+        if (mod_quot) {
+            *mod_quot = 0;
+        }
+        return;
+    }
+    if (exp_diff == -1) {
+        shift128Right(a0, a1, 1, &a0, &a1);
+        exp_diff = 0;
+    }
+
+    b0 = b->frac_hi;
+    b1 = b->frac_lo;
+
+    quot = q = le128(b0, b1, a0, a1);
+    if (q) {
+        sub128(a0, a1, b0, b1, &a0, &a1);
+    }
+
+    exp_diff -= 64;
+    while (exp_diff > 0) {
+        q = estimateDiv128To64(a0, a1, b0);
+        q = q > 4 ? q - 4 : 0;
+        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
+        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
+        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
+        exp_diff -= 61;
+        quot = (quot << 61) + q;
+    }
+
+    exp_diff += 64;
+    if (exp_diff > 0) {
+        q = estimateDiv128To64(a0, a1, b0);
+        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
+        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
+        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
+        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
+        while (le192(t0, t1, t2, a0, a1, a2)) {
+            ++q;
+            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
+        }
+        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
+    } else {
+        t0 = b0;
+        t1 = b1;
+        t2 = 0;
+    }
+
+    if (mod_quot) {
+        *mod_quot = quot;
+    } else {
+        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
+        if (lt192(t0, t1, t2, a0, a1, a2) ||
+            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
+            a0 = t0;
+            a1 = t1;
+            a2 = t2;
+            a->sign = !a->sign;
+        }
+    }
+
+    if (likely(a0)) {
+        shift = clz64(a0);
+        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
+    } else if (likely(a1)) {
+        shift = clz64(a1);
+        shortShift128Left(a1, a2, shift, &a0, &a1);
+        a2 = 0;
+        shift += 64;
+    } else if (likely(a2)) {
+        shift = clz64(a2);
+        a0 = a2 << shift;
+        a1 = a2 = 0;
+        shift += 128;
+    } else {
+        a->cls = float_class_zero;
+        return;
+    }
+
+    a->exp = b->exp + exp_diff - shift;
+    a->frac_hi = a0;
+    a->frac_lo = a1 | (a2 != 0);
 }
 
-float32 QEMU_FLATTEN
-float32_sub(float32 a, float32 b, float_status *s)
+#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
+
+static void frac64_shl(FloatParts64 *a, int c)
 {
-    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
+    a->frac <<= c;
 }
 
-float64 QEMU_FLATTEN
-float64_add(float64 a, float64 b, float_status *s)
+static void frac128_shl(FloatParts128 *a, int c)
 {
-    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+
+    if (c & 64) {
+        a0 = a1, a1 = 0;
+    }
+
+    c &= 63;
+    if (c) {
+        a0 = shl_double(a0, a1, c);
+        a1 = a1 << c;
+    }
+
+    a->frac_hi = a0;
+    a->frac_lo = a1;
 }
 
-float64 QEMU_FLATTEN
-float64_sub(float64 a, float64 b, float_status *s)
+#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
+
+static void frac64_shr(FloatParts64 *a, int c)
 {
-    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
+    a->frac >>= c;
 }
 
-/*
- * Returns the result of adding or subtracting the bfloat16
- * values `a' and `b'.
- */
-bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
+static void frac128_shr(FloatParts128 *a, int c)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, false, status);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
 
-    return bfloat16_round_pack_canonical(pr, status);
-}
+    if (c & 64) {
+        a1 = a0, a0 = 0;
+    }
 
-bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
-{
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = addsub_floats(pa, pb, true, status);
+    c &= 63;
+    if (c) {
+        a1 = shr_double(a0, a1, c);
+        a0 = a0 >> c;
+    }
 
-    return bfloat16_round_pack_canonical(pr, status);
+    a->frac_hi = a0;
+    a->frac_lo = a1;
 }
 
-/*
- * Returns the result of multiplying the floating-point values `a' and
- * `b'. The operation is performed according to the IEC/IEEE Standard
- * for Binary Floating-Point Arithmetic.
- */
+#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
 
-static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
+static void frac64_shrjam(FloatParts64 *a, int c)
 {
-    bool sign = a.sign ^ b.sign;
+    uint64_t a0 = a->frac;
 
-    if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t hi, lo;
-        int exp = a.exp + b.exp;
-
-        mul64To128(a.frac, b.frac, &hi, &lo);
-        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-        if (lo & DECOMPOSED_OVERFLOW_BIT) {
-            shift64RightJamming(lo, 1, &lo);
-            exp += 1;
+    if (likely(c != 0)) {
+        if (likely(c < 64)) {
+            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
+        } else {
+            a0 = a0 != 0;
         }
-
-        /* Re-use a */
-        a.exp = exp;
-        a.sign = sign;
-        a.frac = lo;
-        return a;
-    }
-    /* handle all the NaN cases */
-    if (is_nan(a.cls) || is_nan(b.cls)) {
-        return pick_nan(a, b, s);
-    }
-    /* Inf * Zero == NaN */
-    if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
-        (a.cls == float_class_zero && b.cls == float_class_inf)) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-    /* Multiply by 0 or Inf */
-    if (a.cls == float_class_inf || a.cls == float_class_zero) {
-        a.sign = sign;
-        return a;
+        a->frac = a0;
     }
-    if (b.cls == float_class_inf || b.cls == float_class_zero) {
-        b.sign = sign;
-        return b;
-    }
-    g_assert_not_reached();
 }
 
-float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
+static void frac128_shrjam(FloatParts128 *a, int c)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
+    uint64_t sticky = 0;
 
-    return float16_round_pack_canonical(pr, status);
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 128)) {
+        sticky = a1;
+        a1 = a0;
+        a0 = 0;
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1;
+        a0 = a1 = 0;
+        goto done;
+    }
+
+    sticky |= shr_double(a1, 0, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
+
+ done:
+    a->frac_lo = a1 | (sticky != 0);
+    a->frac_hi = a0;
 }
 
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_mul(float32 a, float32 b, float_status *status)
+static void frac256_shrjam(FloatParts256 *a, int c)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
+    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
+    uint64_t sticky = 0;
 
-    return float32_round_pack_canonical(pr, status);
+    if (unlikely(c == 0)) {
+        return;
+    } else if (likely(c < 64)) {
+        /* nothing */
+    } else if (likely(c < 256)) {
+        if (unlikely(c & 128)) {
+            sticky |= a2 | a3;
+            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
+        }
+        if (unlikely(c & 64)) {
+            sticky |= a3;
+            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
+        }
+        c &= 63;
+        if (c == 0) {
+            goto done;
+        }
+    } else {
+        sticky = a0 | a1 | a2 | a3;
+        a0 = a1 = a2 = a3 = 0;
+        goto done;
+    }
+
+    sticky |= shr_double(a3, 0, c);
+    a3 = shr_double(a2, a3, c);
+    a2 = shr_double(a1, a2, c);
+    a1 = shr_double(a0, a1, c);
+    a0 = a0 >> c;
+
+ done:
+    a->frac_lo = a3 | (sticky != 0);
+    a->frac_lm = a2;
+    a->frac_hm = a1;
+    a->frac_hi = a0;
 }
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_mul(float64 a, float64 b, float_status *status)
-{
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
 
-    return float64_round_pack_canonical(pr, status);
+static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
+{
+    return usub64_overflow(a->frac, b->frac, &r->frac);
 }
 
-static float hard_f32_mul(float a, float b)
+static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 {
-    return a * b;
+    bool c = 0;
+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+    return c;
 }
 
-static double hard_f64_mul(double a, double b)
+static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 {
-    return a * b;
+    bool c = 0;
+    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
+    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
+    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
+    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
+    return c;
 }
 
-float32 QEMU_FLATTEN
-float32_mul(float32 a, float32 b, float_status *s)
+#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
+
+static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
 {
-    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
-                        f32_is_zon2, f32_addsubmul_post);
+    r->frac = a->frac_hi | (a->frac_lo != 0);
 }
 
-float64 QEMU_FLATTEN
-float64_mul(float64 a, float64 b, float_status *s)
+static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
 {
-    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
-                        f64_is_zon2, f64_addsubmul_post);
+    r->frac_hi = a->frac_hi;
+    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
 }
 
-/*
- * Returns the result of multiplying the bfloat16
- * values `a' and `b'.
- */
+#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
 
-bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
+static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = mul_floats(pa, pb, status);
+    r->frac_hi = a->frac;
+    r->frac_lo = 0;
+}
 
-    return bfloat16_round_pack_canonical(pr, status);
+static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
+{
+    r->frac_hi = a->frac_hi;
+    r->frac_hm = a->frac_lo;
+    r->frac_lm = 0;
+    r->frac_lo = 0;
 }
 
+#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
+
 /*
- * Returns the result of multiplying the floating-point values `a' and
- * `b' then adding 'c', with no intermediate rounding step after the
- * multiplication. The operation is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
- * The flags argument allows the caller to select negation of the
- * addend, the intermediate product, or the final result. (The
- * difference between this and having the caller do a separate
- * negation is that negating externally will flip the sign bit on
- * NaNs.)
+ * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
+ * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
+ * and thus MIT licenced.
  */
+static const uint16_t rsqrt_tab[128] = {
+    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
+    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
+    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
+    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
+    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
+    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
+    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
+    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
+    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
+    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
+    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
+    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
+    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
+    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
+    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
+    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
+};
 
-static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
-                                int flags, float_status *s)
-{
-    bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
-                    ((1 << float_class_inf) | (1 << float_class_zero));
-    bool p_sign;
-    bool sign_flip = flags & float_muladd_negate_result;
-    FloatClass p_class;
-    uint64_t hi, lo;
-    int p_exp;
-
-    /* It is implementation-defined whether the cases of (0,inf,qnan)
-     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
-     * they return if they do), so we have to hand this information
-     * off to the target-specific pick-a-NaN routine.
-     */
-    if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
-        return pick_nan_muladd(a, b, c, inf_zero, s);
-    }
-
-    if (inf_zero) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-
-    if (flags & float_muladd_negate_c) {
-        c.sign ^= 1;
-    }
-
-    p_sign = a.sign ^ b.sign;
-
-    if (flags & float_muladd_negate_product) {
-        p_sign ^= 1;
-    }
-
-    if (a.cls == float_class_inf || b.cls == float_class_inf) {
-        p_class = float_class_inf;
-    } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
-        p_class = float_class_zero;
-    } else {
-        p_class = float_class_normal;
-    }
-
-    if (c.cls == float_class_inf) {
-        if (p_class == float_class_inf && p_sign != c.sign) {
-            s->float_exception_flags |= float_flag_invalid;
-            return parts_default_nan(s);
-        } else {
-            a.cls = float_class_inf;
-            a.sign = c.sign ^ sign_flip;
-            return a;
-        }
-    }
-
-    if (p_class == float_class_inf) {
-        a.cls = float_class_inf;
-        a.sign = p_sign ^ sign_flip;
-        return a;
-    }
-
-    if (p_class == float_class_zero) {
-        if (c.cls == float_class_zero) {
-            if (p_sign != c.sign) {
-                p_sign = s->float_rounding_mode == float_round_down;
-            }
-            c.sign = p_sign;
-        } else if (flags & float_muladd_halve_result) {
-            c.exp -= 1;
-        }
-        c.sign ^= sign_flip;
-        return c;
-    }
+#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
+#define FloatPartsN    glue(FloatParts,N)
+#define FloatPartsW    glue(FloatParts,W)
 
-    /* a & b should be normals now... */
-    assert(a.cls == float_class_normal &&
-           b.cls == float_class_normal);
+#define N 64
+#define W 128
 
-    p_exp = a.exp + b.exp;
+#include "softfloat-parts-addsub.c.inc"
+#include "softfloat-parts.c.inc"
 
-    /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
-     * result.
-     */
-    mul64To128(a.frac, b.frac, &hi, &lo);
-    /* binary point now at bit 124 */
+#undef  N
+#undef  W
+#define N 128
+#define W 256
 
-    /* check for overflow */
-    if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
-        shift128RightJamming(hi, lo, 1, &hi, &lo);
-        p_exp += 1;
-    }
+#include "softfloat-parts-addsub.c.inc"
+#include "softfloat-parts.c.inc"
 
-    /* + add/sub */
-    if (c.cls == float_class_zero) {
-        /* move binary point back to 62 */
-        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-    } else {
-        int exp_diff = p_exp - c.exp;
-        if (p_sign == c.sign) {
-            /* Addition */
-            if (exp_diff <= 0) {
-                shift128RightJamming(hi, lo,
-                                     DECOMPOSED_BINARY_POINT - exp_diff,
-                                     &hi, &lo);
-                lo += c.frac;
-                p_exp = c.exp;
-            } else {
-                uint64_t c_hi, c_lo;
-                /* shift c to the same binary point as the product (124) */
-                c_hi = c.frac >> 2;
-                c_lo = 0;
-                shift128RightJamming(c_hi, c_lo,
-                                     exp_diff,
-                                     &c_hi, &c_lo);
-                add128(hi, lo, c_hi, c_lo, &hi, &lo);
-                /* move binary point back to 62 */
-                shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
-            }
+#undef  N
+#undef  W
+#define N            256
 
-            if (lo & DECOMPOSED_OVERFLOW_BIT) {
-                shift64RightJamming(lo, 1, &lo);
-                p_exp += 1;
-            }
+#include "softfloat-parts-addsub.c.inc"
 
-        } else {
-            /* Subtraction */
-            uint64_t c_hi, c_lo;
-            /* make C binary point match product at bit 124 */
-            c_hi = c.frac >> 2;
-            c_lo = 0;
-
-            if (exp_diff <= 0) {
-                shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
-                if (exp_diff == 0
-                    &&
-                    (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
-                    sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-                } else {
-                    sub128(c_hi, c_lo, hi, lo, &hi, &lo);
-                    p_sign ^= 1;
-                    p_exp = c.exp;
-                }
-            } else {
-                shift128RightJamming(c_hi, c_lo,
-                                     exp_diff,
-                                     &c_hi, &c_lo);
-                sub128(hi, lo, c_hi, c_lo, &hi, &lo);
-            }
+#undef  N
+#undef  W
+#undef  partsN
+#undef  FloatPartsN
+#undef  FloatPartsW
 
-            if (hi == 0 && lo == 0) {
-                a.cls = float_class_zero;
-                a.sign = s->float_rounding_mode == float_round_down;
-                a.sign ^= sign_flip;
-                return a;
-            } else {
-                int shift;
-                if (hi != 0) {
-                    shift = clz64(hi);
-                } else {
-                    shift = clz64(lo) + 64;
-                }
-                /* Normalizing to a binary point of 124 is the
-                   correct adjust for the exponent.  However since we're
-                   shifting, we might as well put the binary point back
-                   at 62 where we really want it.  Therefore shift as
-                   if we're leaving 1 bit at the top of the word, but
-                   adjust the exponent as if we're leaving 3 bits.  */
-                shift -= 1;
-                if (shift >= 64) {
-                    lo = lo << (shift - 64);
-                } else {
-                    hi = (hi << shift) | (lo >> (64 - shift));
-                    lo = hi | ((lo << shift) != 0);
-                }
-                p_exp -= shift - 2;
-            }
-        }
-    }
+/*
+ * Pack/unpack routines with a specific FloatFmt.
+ */
 
-    if (flags & float_muladd_halve_result) {
-        p_exp -= 1;
-    }
+static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
+                                      float_status *s, const FloatFmt *params)
+{
+    float16_unpack_raw(p, f);
+    parts_canonicalize(p, s, params);
+}
 
-    /* finally prepare our result */
-    a.cls = float_class_normal;
-    a.sign = p_sign ^ sign_flip;
-    a.exp = p_exp;
-    a.frac = lo;
+static void float16_unpack_canonical(FloatParts64 *p, float16 f,
+                                     float_status *s)
+{
+    float16a_unpack_canonical(p, f, s, &float16_params);
+}
 
-    return a;
+static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
+                                      float_status *s)
+{
+    bfloat16_unpack_raw(p, f);
+    parts_canonicalize(p, s, &bfloat16_params);
 }
 
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
-                                                int flags, float_status *status)
+static float16 float16a_round_pack_canonical(FloatParts64 *p,
+                                             float_status *s,
+                                             const FloatFmt *params)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pc = float16_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    parts_uncanon(p, s, params);
+    return float16_pack_raw(p);
+}
 
-    return float16_round_pack_canonical(pr, status);
+static float16 float16_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
+{
+    return float16a_round_pack_canonical(p, s, &float16_params);
 }
 
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
-                float_status *status)
+static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
+                                              float_status *s)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pc = float32_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    parts_uncanon(p, s, &bfloat16_params);
+    return bfloat16_pack_raw(p);
+}
 
-    return float32_round_pack_canonical(pr, status);
+static void float32_unpack_canonical(FloatParts64 *p, float32 f,
+                                     float_status *s)
+{
+    float32_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float32_params);
 }
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
-                float_status *status)
+static float32 float32_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pc = float64_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    parts_uncanon(p, s, &float32_params);
+    return float32_pack_raw(p);
+}
 
-    return float64_round_pack_canonical(pr, status);
+static void float64_unpack_canonical(FloatParts64 *p, float64 f,
+                                     float_status *s)
+{
+    float64_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float64_params);
 }
 
-static bool force_soft_fma;
+static float64 float64_round_pack_canonical(FloatParts64 *p,
+                                            float_status *s)
+{
+    parts_uncanon(p, s, &float64_params);
+    return float64_pack_raw(p);
+}
 
-float32 QEMU_FLATTEN
-float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
+static void float128_unpack_canonical(FloatParts128 *p, float128 f,
+                                      float_status *s)
 {
-    union_float32 ua, ub, uc, ur;
+    float128_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float128_params);
+}
 
-    ua.s = xa;
-    ub.s = xb;
-    uc.s = xc;
+static float128 float128_round_pack_canonical(FloatParts128 *p,
+                                              float_status *s)
+{
+    parts_uncanon(p, s, &float128_params);
+    return float128_pack_raw(p);
+}
 
-    if (unlikely(!can_use_fpu(s))) {
-        goto soft;
-    }
-    if (unlikely(flags & float_muladd_halve_result)) {
-        goto soft;
+/* Returns false if the encoding is invalid. */
+static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
+                                      float_status *s)
+{
+    /* Ensure rounding precision is set before beginning. */
+    switch (s->floatx80_rounding_precision) {
+    case floatx80_precision_x:
+    case floatx80_precision_d:
+    case floatx80_precision_s:
+        break;
+    default:
+        g_assert_not_reached();
     }
 
-    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
-    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
-        goto soft;
+    if (unlikely(floatx80_invalid_encoding(f))) {
+        float_raise(float_flag_invalid, s);
+        return false;
     }
 
-    if (unlikely(force_soft_fma)) {
-        goto soft;
+    floatx80_unpack_raw(p, f);
+
+    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
+        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
+    } else {
+        /* The explicit integer bit is ignored, after invalid checks. */
+        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
+        p->cls = (p->frac_hi == 0 ? float_class_inf
+                  : parts_is_snan_frac(p->frac_hi, s)
+                  ? float_class_snan : float_class_qnan);
     }
+    return true;
+}
 
-    /*
-     * When (a || b) == 0, there's no need to check for under/over flow,
-     * since we know the addend is (normal || 0) and the product is 0.
-     */
-    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
-        union_float32 up;
-        bool prod_sign;
-
-        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
-        prod_sign ^= !!(flags & float_muladd_negate_product);
-        up.s = float32_set_sign(float32_zero, prod_sign);
+static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
+                                              float_status *s)
+{
+    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
+    uint64_t frac;
+    int exp;
 
-        if (flags & float_muladd_negate_c) {
-            uc.h = -uc.h;
-        }
-        ur.h = up.h + uc.h;
-    } else {
-        union_float32 ua_orig = ua;
-        union_float32 uc_orig = uc;
+    switch (p->cls) {
+    case float_class_normal:
+        if (s->floatx80_rounding_precision == floatx80_precision_x) {
+            parts_uncanon_normal(p, s, fmt);
+            frac = p->frac_hi;
+            exp = p->exp;
+        } else {
+            FloatParts64 p64;
 
-        if (flags & float_muladd_negate_product) {
-            ua.h = -ua.h;
+            p64.sign = p->sign;
+            p64.exp = p->exp;
+            frac_truncjam(&p64, p);
+            parts_uncanon_normal(&p64, s, fmt);
+            frac = p64.frac;
+            exp = p64.exp;
         }
-        if (flags & float_muladd_negate_c) {
-            uc.h = -uc.h;
-        }
-
-        ur.h = fmaf(ua.h, ub.h, uc.h);
-
-        if (unlikely(f32_is_inf(ur))) {
-            s->float_exception_flags |= float_flag_overflow;
-        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
-            ua = ua_orig;
-            uc = uc_orig;
-            goto soft;
+        if (exp != fmt->exp_max) {
+            break;
         }
-    }
-    if (flags & float_muladd_negate_result) {
-        return float32_chs(ur.s);
-    }
-    return ur.s;
-
- soft:
-    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
-}
-
-float64 QEMU_FLATTEN
-float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
-{
-    union_float64 ua, ub, uc, ur;
+        /* rounded to inf -- fall through to set frac correctly */
 
-    ua.s = xa;
-    ub.s = xb;
-    uc.s = xc;
+    case float_class_inf:
+        /* x86 and m68k differ in the setting of the integer bit. */
+        frac = floatx80_infinity_low;
+        exp = fmt->exp_max;
+        break;
 
-    if (unlikely(!can_use_fpu(s))) {
-        goto soft;
-    }
-    if (unlikely(flags & float_muladd_halve_result)) {
-        goto soft;
-    }
+    case float_class_zero:
+        frac = 0;
+        exp = 0;
+        break;
 
-    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
-    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
-        goto soft;
-    }
+    case float_class_snan:
+    case float_class_qnan:
+        /* NaNs have the integer bit set. */
+        frac = p->frac_hi | (1ull << 63);
+        exp = fmt->exp_max;
+        break;
 
-    if (unlikely(force_soft_fma)) {
-        goto soft;
+    default:
+        g_assert_not_reached();
     }
 
-    /*
-     * When (a || b) == 0, there's no need to check for under/over flow,
-     * since we know the addend is (normal || 0) and the product is 0.
-     */
-    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
-        union_float64 up;
-        bool prod_sign;
-
-        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
-        prod_sign ^= !!(flags & float_muladd_negate_product);
-        up.s = float64_set_sign(float64_zero, prod_sign);
+    return packFloatx80(p->sign, exp, frac);
+}
 
-        if (flags & float_muladd_negate_c) {
-            uc.h = -uc.h;
-        }
-        ur.h = up.h + uc.h;
-    } else {
-        union_float64 ua_orig = ua;
-        union_float64 uc_orig = uc;
+/*
+ * Addition and subtraction
+ */
 
-        if (flags & float_muladd_negate_product) {
-            ua.h = -ua.h;
-        }
-        if (flags & float_muladd_negate_c) {
-            uc.h = -uc.h;
-        }
+static float16 QEMU_FLATTEN
+float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
+{
+    FloatParts64 pa, pb, *pr;
 
-        ur.h = fma(ua.h, ub.h, uc.h);
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
-        if (unlikely(f64_is_inf(ur))) {
-            s->float_exception_flags |= float_flag_overflow;
-        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
-            ua = ua_orig;
-            uc = uc_orig;
-            goto soft;
-        }
-    }
-    if (flags & float_muladd_negate_result) {
-        return float64_chs(ur.s);
-    }
-    return ur.s;
+    return float16_round_pack_canonical(pr, status);
+}
 
- soft:
-    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
+float16 float16_add(float16 a, float16 b, float_status *status)
+{
+    return float16_addsub(a, b, status, false);
 }
 
-/*
- * Returns the result of multiplying the bfloat16 values `a'
- * and `b' then adding 'c', with no intermediate rounding step after the
- * multiplication.
- */
+float16 float16_sub(float16 a, float16 b, float_status *status)
+{
+    return float16_addsub(a, b, status, true);
+}
 
-bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
-                                      int flags, float_status *status)
+static float32 QEMU_SOFTFLOAT_ATTR
+soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pc = bfloat16_unpack_canonical(c, status);
-    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+    FloatParts64 pa, pb, *pr;
 
-    return bfloat16_round_pack_canonical(pr, status);
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
+
+    return float32_round_pack_canonical(pr, status);
 }
 
-/*
- * Returns the result of dividing the floating-point value `a' by the
- * corresponding value `b'. The operation is performed according to
- * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
+static float32 soft_f32_add(float32 a, float32 b, float_status *status)
+{
+    return soft_f32_addsub(a, b, status, false);
+}
 
-static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
+static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
 {
-    bool sign = a.sign ^ b.sign;
+    return soft_f32_addsub(a, b, status, true);
+}
 
-    if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t n0, n1, q, r;
-        int exp = a.exp - b.exp;
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
+{
+    FloatParts64 pa, pb, *pr;
 
-        /*
-         * We want a 2*N / N-bit division to produce exactly an N-bit
-         * result, so that we do not lose any precision and so that we
-         * do not have to renormalize afterward.  If A.frac < B.frac,
-         * then division would produce an (N-1)-bit result; shift A left
-         * by one to produce the an N-bit result, and decrement the
-         * exponent to match.
-         *
-         * The udiv_qrnnd algorithm that we're using requires normalization,
-         * i.e. the msb of the denominator must be set.  Since we know that
-         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
-         * by one (more), and the remainder must be shifted right by one.
-         */
-        if (a.frac < b.frac) {
-            exp -= 1;
-            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
-        } else {
-            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
-        }
-        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
-        /*
-         * Set lsb if there is a remainder, to set inexact.
-         * As mentioned above, to find the actual value of the remainder we
-         * would need to shift right, but (1) we are only concerned about
-         * non-zero-ness, and (2) the remainder will always be even because
-         * both inputs to the division primitive are even.
-         */
-        a.frac = q | (r != 0);
-        a.sign = sign;
-        a.exp = exp;
-        return a;
-    }
-    /* handle all the NaN cases */
-    if (is_nan(a.cls) || is_nan(b.cls)) {
-        return pick_nan(a, b, s);
-    }
-    /* 0/0 or Inf/Inf */
-    if (a.cls == b.cls
-        &&
-        (a.cls == float_class_inf || a.cls == float_class_zero)) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-    /* Inf / x or 0 / x */
-    if (a.cls == float_class_inf || a.cls == float_class_zero) {
-        a.sign = sign;
-        return a;
-    }
-    /* Div 0 => Inf */
-    if (b.cls == float_class_zero) {
-        s->float_exception_flags |= float_flag_divbyzero;
-        a.cls = float_class_inf;
-        a.sign = sign;
-        return a;
-    }
-    /* Div by Inf */
-    if (b.cls == float_class_inf) {
-        a.cls = float_class_zero;
-        a.sign = sign;
-        return a;
-    }
-    g_assert_not_reached();
+    return float64_round_pack_canonical(pr, status);
 }
 
-float16 float16_div(float16 a, float16 b, float_status *status)
+static float64 soft_f64_add(float64 a, float64 b, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pb = float16_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
-
-    return float16_round_pack_canonical(pr, status);
+    return soft_f64_addsub(a, b, status, false);
 }
 
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_div(float32 a, float32 b, float_status *status)
+static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pb = float32_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
-
-    return float32_round_pack_canonical(pr, status);
+    return soft_f64_addsub(a, b, status, true);
 }
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_div(float64 a, float64 b, float_status *status)
+static float hard_f32_add(float a, float b)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pb = float64_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    return a + b;
+}
 
-    return float64_round_pack_canonical(pr, status);
+static float hard_f32_sub(float a, float b)
+{
+    return a - b;
 }
 
-static float hard_f32_div(float a, float b)
+static double hard_f64_add(double a, double b)
 {
-    return a / b;
+    return a + b;
 }
 
-static double hard_f64_div(double a, double b)
+static double hard_f64_sub(double a, double b)
 {
-    return a / b;
+    return a - b;
 }
 
-static bool f32_div_pre(union_float32 a, union_float32 b)
+static bool f32_addsubmul_post(union_float32 a, union_float32 b)
 {
     if (QEMU_HARDFLOAT_2F32_USE_FP) {
-        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
-               fpclassify(b.h) == FP_NORMAL;
+        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
     }
-    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
+    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
 }
 
-static bool f64_div_pre(union_float64 a, union_float64 b)
+static bool f64_addsubmul_post(union_float64 a, union_float64 b)
 {
     if (QEMU_HARDFLOAT_2F64_USE_FP) {
-        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
-               fpclassify(b.h) == FP_NORMAL;
+        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
+    } else {
+        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
     }
-    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
 }
 
-static bool f32_div_post(union_float32 a, union_float32 b)
+static float32 float32_addsub(float32 a, float32 b, float_status *s,
+                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
 {
-    if (QEMU_HARDFLOAT_2F32_USE_FP) {
-        return fpclassify(a.h) != FP_ZERO;
-    }
-    return !float32_is_zero(a.s);
+    return float32_gen2(a, b, s, hard, soft,
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
-static bool f64_div_post(union_float64 a, union_float64 b)
+static float64 float64_addsub(float64 a, float64 b, float_status *s,
+                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
 {
-    if (QEMU_HARDFLOAT_2F64_USE_FP) {
-        return fpclassify(a.h) != FP_ZERO;
-    }
-    return !float64_is_zero(a.s);
+    return float64_gen2(a, b, s, hard, soft,
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
 float32 QEMU_FLATTEN
-float32_div(float32 a, float32 b, float_status *s)
+float32_add(float32 a, float32 b, float_status *s)
 {
-    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
-                        f32_div_pre, f32_div_post);
+    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
+}
+
+float32 QEMU_FLATTEN
+float32_sub(float32 a, float32 b, float_status *s)
+{
+    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
 }
 
 float64 QEMU_FLATTEN
-float64_div(float64 a, float64 b, float_status *s)
+float64_add(float64 a, float64 b, float_status *s)
 {
-    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
-                        f64_div_pre, f64_div_post);
+    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
 }
 
-/*
- * Returns the result of dividing the bfloat16
- * value `a' by the corresponding value `b'.
- */
+float64 QEMU_FLATTEN
+float64_sub(float64 a, float64 b, float_status *s)
+{
+    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
+}
 
-bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
+static bfloat16 QEMU_FLATTEN
+bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pb = bfloat16_unpack_canonical(b, status);
-    FloatParts pr = div_floats(pa, pb, status);
+    FloatParts64 pa, pb, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
     return bfloat16_round_pack_canonical(pr, status);
 }
 
-/*
- * Float to Float conversions
- *
- * Returns the result of converting one float format to another. The
- * conversion is performed according to the IEC/IEEE Standard for
- * Binary Floating-Point Arithmetic.
- *
- * The float_to_float helper only needs to take care of raising
- * invalid exceptions and handling the conversion on NaNs.
- */
-
-static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
-                                 float_status *s)
+bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
 {
-    if (dstf->arm_althp) {
-        switch (a.cls) {
-        case float_class_qnan:
-        case float_class_snan:
-            /* There is no NaN in the destination format.  Raise Invalid
-             * and return a zero with the sign of the input NaN.
-             */
-            s->float_exception_flags |= float_flag_invalid;
-            a.cls = float_class_zero;
-            a.frac = 0;
-            a.exp = 0;
-            break;
-
-        case float_class_inf:
-            /* There is no Inf in the destination format.  Raise Invalid
-             * and return the maximum normal with the correct sign.
-             */
-            s->float_exception_flags |= float_flag_invalid;
-            a.cls = float_class_normal;
-            a.exp = dstf->exp_max;
-            a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
-            break;
-
-        default:
-            break;
-        }
-    } else if (is_nan(a.cls)) {
-        if (is_snan(a.cls)) {
-            s->float_exception_flags |= float_flag_invalid;
-            a = parts_silence_nan(a, s);
-        }
-        if (s->default_nan_mode) {
-            return parts_default_nan(s);
-        }
-    }
-    return a;
+    return bfloat16_addsub(a, b, status, false);
 }
 
-float32 float16_to_float32(float16 a, bool ieee, float_status *s)
+bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
 {
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
+    return bfloat16_addsub(a, b, status, true);
 }
 
-float64 float16_to_float64(float16 a, bool ieee, float_status *s)
+static float128 QEMU_FLATTEN
+float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
 {
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-float16 float32_to_float16(float32 a, bool ieee, float_status *s)
-{
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, fmt16, s);
-    return float16a_round_pack_canonical(pr, s, fmt16);
-}
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_float32_to_float64(float32 a, float_status *s)
-{
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
+    return float128_round_pack_canonical(pr, status);
 }
 
-float64 float32_to_float64(float32 a, float_status *s)
+float128 float128_add(float128 a, float128 b, float_status *status)
 {
-    if (likely(float32_is_normal(a))) {
-        /* Widening conversion can never produce inexact results.  */
-        union_float32 uf;
-        union_float64 ud;
-        uf.s = a;
-        ud.h = uf.h;
-        return ud.s;
-    } else if (float32_is_zero(a)) {
-        return float64_set_sign(float64_zero, float32_is_neg(a));
-    } else {
-        return soft_float32_to_float64(a, s);
-    }
+    return float128_addsub(a, b, status, false);
 }
 
-float16 float64_to_float16(float64 a, bool ieee, float_status *s)
+float128 float128_sub(float128 a, float128 b, float_status *status)
 {
-    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, fmt16, s);
-    return float16a_round_pack_canonical(pr, s, fmt16);
+    return float128_addsub(a, b, status, true);
 }
 
-float32 float64_to_float32(float64 a, float_status *s)
+static floatx80 QEMU_FLATTEN
+floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
 {
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-float32 bfloat16_to_float32(bfloat16 a, float_status *s)
-{
-    FloatParts p = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float32_params, s);
-    return float32_round_pack_canonical(pr, s);
-}
+    if (!floatx80_unpack_canonical(&pa, a, status) ||
+        !floatx80_unpack_canonical(&pb, b, status)) {
+        return floatx80_default_nan(status);
+    }
 
-float64 bfloat16_to_float64(bfloat16 a, float_status *s)
-{
-    FloatParts p = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &float64_params, s);
-    return float64_round_pack_canonical(pr, s);
+    pr = parts_addsub(&pa, &pb, status, subtract);
+    return floatx80_round_pack_canonical(pr, status);
 }
 
-bfloat16 float32_to_bfloat16(float32 a, float_status *s)
+floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
 {
-    FloatParts p = float32_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &bfloat16_params, s);
-    return bfloat16_round_pack_canonical(pr, s);
+    return floatx80_addsub(a, b, status, false);
 }
 
-bfloat16 float64_to_bfloat16(float64 a, float_status *s)
+floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
 {
-    FloatParts p = float64_unpack_canonical(a, s);
-    FloatParts pr = float_to_float(p, &bfloat16_params, s);
-    return bfloat16_round_pack_canonical(pr, s);
+    return floatx80_addsub(a, b, status, true);
 }
 
 /*
- * Rounds the floating-point value `a' to an integer, and returns the
- * result as a floating-point value. The operation is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic.
+ * Multiplication
  */
 
-static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
-                               int scale, float_status *s)
+float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
 {
-    switch (a.cls) {
-    case float_class_qnan:
-    case float_class_snan:
-        return return_nan(a, s);
-
-    case float_class_zero:
-    case float_class_inf:
-        /* already "integral" */
-        break;
-
-    case float_class_normal:
-        scale = MIN(MAX(scale, -0x10000), 0x10000);
-        a.exp += scale;
-
-        if (a.exp >= DECOMPOSED_BINARY_POINT) {
-            /* already integral */
-            break;
-        }
-        if (a.exp < 0) {
-            bool one;
-            /* all fractional */
-            s->float_exception_flags |= float_flag_inexact;
-            switch (rmode) {
-            case float_round_nearest_even:
-                one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
-                break;
-            case float_round_ties_away:
-                one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
-                break;
-            case float_round_to_zero:
-                one = false;
-                break;
-            case float_round_up:
-                one = !a.sign;
-                break;
-            case float_round_down:
-                one = a.sign;
-                break;
-            case float_round_to_odd:
-                one = true;
-                break;
-            default:
-                g_assert_not_reached();
-            }
-
-            if (one) {
-                a.frac = DECOMPOSED_IMPLICIT_BIT;
-                a.exp = 0;
-            } else {
-                a.cls = float_class_zero;
-            }
-        } else {
-            uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
-            uint64_t frac_lsbm1 = frac_lsb >> 1;
-            uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
-            uint64_t rnd_mask = rnd_even_mask >> 1;
-            uint64_t inc;
+    FloatParts64 pa, pb, *pr;
 
-            switch (rmode) {
-            case float_round_nearest_even:
-                inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
-                break;
-            case float_round_ties_away:
-                inc = frac_lsbm1;
-                break;
-            case float_round_to_zero:
-                inc = 0;
-                break;
-            case float_round_up:
-                inc = a.sign ? 0 : rnd_mask;
-                break;
-            case float_round_down:
-                inc = a.sign ? rnd_mask : 0;
-                break;
-            case float_round_to_odd:
-                inc = a.frac & frac_lsb ? 0 : rnd_mask;
-                break;
-            default:
-                g_assert_not_reached();
-            }
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-            if (a.frac & rnd_mask) {
-                s->float_exception_flags |= float_flag_inexact;
-                a.frac += inc;
-                a.frac &= ~rnd_mask;
-                if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
-                    a.frac >>= 1;
-                    a.exp++;
-                }
-            }
-        }
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return a;
+    return float16_round_pack_canonical(pr, status);
 }
 
-float16 float16_round_to_int(float16 a, float_status *s)
+static float32 QEMU_SOFTFLOAT_ATTR
+soft_f32_mul(float32 a, float32 b, float_status *status)
 {
-    FloatParts pa = float16_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float16_round_pack_canonical(pr, s);
-}
+    FloatParts64 pa, pb, *pr;
 
-float32 float32_round_to_int(float32 a, float_status *s)
-{
-    FloatParts pa = float32_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float32_round_pack_canonical(pr, s);
-}
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-float64 float64_round_to_int(float64 a, float_status *s)
-{
-    FloatParts pa = float64_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return float64_round_pack_canonical(pr, s);
+    return float32_round_pack_canonical(pr, status);
 }
 
-/*
- * Rounds the bfloat16 value `a' to an integer, and returns the
- * result as a bfloat16 value.
- */
-
-bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_f64_mul(float64 a, float64 b, float_status *status)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, s);
-    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
-    return bfloat16_round_pack_canonical(pr, s);
-}
-
-/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format. The conversion is performed
- * according to the IEC/IEEE Standard for Binary Floating-Point
- * Arithmetic---which means in particular that the conversion is
- * rounded according to the current rounding mode. If `a' is a NaN,
- * the largest positive integer is returned. Otherwise, if the
- * conversion overflows, the largest integer with the same sign as `a'
- * is returned.
-*/
+    FloatParts64 pa, pb, *pr;
 
-static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
-                                     int scale, int64_t min, int64_t max,
-                                     float_status *s)
-{
-    uint64_t r;
-    int orig_flags = get_float_exception_flags(s);
-    FloatParts p = round_to_int(in, rmode, scale, s);
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-    switch (p.cls) {
-    case float_class_snan:
-    case float_class_qnan:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return max;
-    case float_class_inf:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return p.sign ? min : max;
-    case float_class_zero:
-        return 0;
-    case float_class_normal:
-        if (p.exp < DECOMPOSED_BINARY_POINT) {
-            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
-        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
-            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
-        } else {
-            r = UINT64_MAX;
-        }
-        if (p.sign) {
-            if (r <= -(uint64_t) min) {
-                return -r;
-            } else {
-                s->float_exception_flags = orig_flags | float_flag_invalid;
-                return min;
-            }
-        } else {
-            if (r <= max) {
-                return r;
-            } else {
-                s->float_exception_flags = orig_flags | float_flag_invalid;
-                return max;
-            }
-        }
-    default:
-        g_assert_not_reached();
-    }
+    return float64_round_pack_canonical(pr, status);
 }
 
-int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                              float_status *s)
+static float hard_f32_mul(float a, float b)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT8_MIN, INT8_MAX, s);
+    return a * b;
 }
 
-int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+static double hard_f64_mul(double a, double b)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+    return a * b;
 }
 
-int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+float32 QEMU_FLATTEN
+float32_mul(float32 a, float32 b, float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
+    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
+                        f32_is_zon2, f32_addsubmul_post);
 }
 
-int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+float64 QEMU_FLATTEN
+float64_mul(float64 a, float64 b, float_status *s)
 {
-    return round_to_int_and_pack(float16_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
+                        f64_is_zon2, f64_addsubmul_post);
 }
 
-int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+bfloat16 QEMU_FLATTEN
+bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
 {
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
-}
+    FloatParts64 pa, pb, *pr;
 
-int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
-{
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
-}
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
-{
-    return round_to_int_and_pack(float32_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    return bfloat16_round_pack_canonical(pr, status);
 }
 
-int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+float128 QEMU_FLATTEN
+float128_mul(float128 a, float128 b, float_status *status)
 {
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
-{
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
-}
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_mul(&pa, &pb, status);
 
-int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
-{
-    return round_to_int_and_pack(float64_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+    return float128_round_pack_canonical(pr, status);
 }
 
-int8_t float16_to_int8(float16 a, float_status *s)
+floatx80 QEMU_FLATTEN
+floatx80_mul(floatx80 a, floatx80 b, float_status *status)
 {
-    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-int16_t float16_to_int16(float16 a, float_status *s)
-{
-    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    if (!floatx80_unpack_canonical(&pa, a, status) ||
+        !floatx80_unpack_canonical(&pb, b, status)) {
+        return floatx80_default_nan(status);
+    }
 
-int32_t float16_to_int32(float16 a, float_status *s)
-{
-    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+    pr = parts_mul(&pa, &pb, status);
+    return floatx80_round_pack_canonical(pr, status);
 }
 
-int64_t float16_to_int64(float16 a, float_status *s)
-{
-    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
-}
+/*
+ * Fused multiply-add
+ */
 
-int16_t float32_to_int16(float32 a, float_status *s)
+float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
+                                    int flags, float_status *status)
 {
-    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts64 pa, pb, pc, *pr;
 
-int32_t float32_to_int32(float32 a, float_status *s)
-{
-    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    float16_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-int64_t float32_to_int64(float32 a, float_status *s)
-{
-    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+    return float16_round_pack_canonical(pr, status);
 }
 
-int16_t float64_to_int16(float64 a, float_status *s)
+static float32 QEMU_SOFTFLOAT_ATTR
+soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
+                float_status *status)
 {
-    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts64 pa, pb, pc, *pr;
 
-int32_t float64_to_int32(float64 a, float_status *s)
-{
-    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    float32_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-int64_t float64_to_int64(float64 a, float_status *s)
-{
-    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+    return float32_round_pack_canonical(pr, status);
 }
 
-int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
+                float_status *status)
 {
-    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
-}
+    FloatParts64 pa, pb, pc, *pr;
 
-int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
-{
-    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
-}
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    float64_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
 
-int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
-{
-    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
+    return float64_round_pack_canonical(pr, status);
 }
 
-int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
-{
-    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
-}
+static bool force_soft_fma;
 
-int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
+float32 QEMU_FLATTEN
+float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
 {
-    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
-}
+    union_float32 ua, ub, uc, ur;
 
-int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
-{
-    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
-}
+    ua.s = xa;
+    ub.s = xb;
+    uc.s = xc;
 
-int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
-{
-    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
-}
+    if (unlikely(!can_use_fpu(s))) {
+        goto soft;
+    }
+    if (unlikely(flags & float_muladd_halve_result)) {
+        goto soft;
+    }
 
-int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
-{
-    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
-}
+    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
+    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
+        goto soft;
+    }
 
-int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
-{
-    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
-}
+    if (unlikely(force_soft_fma)) {
+        goto soft;
+    }
 
-/*
- * Returns the result of converting the floating-point value `a' to
- * the two's complement integer format.
- */
+    /*
+     * When (a || b) == 0, there's no need to check for under/over flow,
+     * since we know the addend is (normal || 0) and the product is 0.
+     */
+    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
+        union_float32 up;
+        bool prod_sign;
 
-int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
-{
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT16_MIN, INT16_MAX, s);
-}
+        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
+        prod_sign ^= !!(flags & float_muladd_negate_product);
+        up.s = float32_set_sign(float32_zero, prod_sign);
 
-int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
-{
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT32_MIN, INT32_MAX, s);
-}
+        if (flags & float_muladd_negate_c) {
+            uc.h = -uc.h;
+        }
+        ur.h = up.h + uc.h;
+    } else {
+        union_float32 ua_orig = ua;
+        union_float32 uc_orig = uc;
 
-int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
-                                 float_status *s)
-{
-    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
-                                 rmode, scale, INT64_MIN, INT64_MAX, s);
-}
+        if (flags & float_muladd_negate_product) {
+            ua.h = -ua.h;
+        }
+        if (flags & float_muladd_negate_c) {
+            uc.h = -uc.h;
+        }
 
-int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
-{
-    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+        ur.h = fmaf(ua.h, ub.h, uc.h);
 
-int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
-{
-    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
-}
+        if (unlikely(f32_is_inf(ur))) {
+            float_raise(float_flag_overflow, s);
+        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
+            ua = ua_orig;
+            uc = uc_orig;
+            goto soft;
+        }
+    }
+    if (flags & float_muladd_negate_result) {
+        return float32_chs(ur.s);
+    }
+    return ur.s;
 
-int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
-{
-    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+ soft:
+    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
-int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
+float64 QEMU_FLATTEN
+float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
 {
-    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
-}
+    union_float64 ua, ub, uc, ur;
 
-int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
-{
-    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
-}
+    ua.s = xa;
+    ub.s = xb;
+    uc.s = xc;
 
-int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
-{
-    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
-}
+    if (unlikely(!can_use_fpu(s))) {
+        goto soft;
+    }
+    if (unlikely(flags & float_muladd_halve_result)) {
+        goto soft;
+    }
 
-/*
- *  Returns the result of converting the floating-point value `a' to
- *  the unsigned integer format. The conversion is performed according
- *  to the IEC/IEEE Standard for Binary Floating-Point
- *  Arithmetic---which means in particular that the conversion is
- *  rounded according to the current rounding mode. If `a' is a NaN,
- *  the largest unsigned integer is returned. Otherwise, if the
- *  conversion overflows, the largest unsigned integer is returned. If
- *  the 'a' is negative, the result is rounded and zero is returned;
- *  values that do not round to zero will raise the inexact exception
- *  flag.
- */
+    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
+    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
+        goto soft;
+    }
 
-static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
-                                       int scale, uint64_t max,
-                                       float_status *s)
-{
-    int orig_flags = get_float_exception_flags(s);
-    FloatParts p = round_to_int(in, rmode, scale, s);
-    uint64_t r;
+    if (unlikely(force_soft_fma)) {
+        goto soft;
+    }
 
-    switch (p.cls) {
-    case float_class_snan:
-    case float_class_qnan:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return max;
-    case float_class_inf:
-        s->float_exception_flags = orig_flags | float_flag_invalid;
-        return p.sign ? 0 : max;
-    case float_class_zero:
-        return 0;
-    case float_class_normal:
-        if (p.sign) {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return 0;
+    /*
+     * When (a || b) == 0, there's no need to check for under/over flow,
+     * since we know the addend is (normal || 0) and the product is 0.
+     */
+    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
+        union_float64 up;
+        bool prod_sign;
+
+        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
+        prod_sign ^= !!(flags & float_muladd_negate_product);
+        up.s = float64_set_sign(float64_zero, prod_sign);
+
+        if (flags & float_muladd_negate_c) {
+            uc.h = -uc.h;
         }
+        ur.h = up.h + uc.h;
+    } else {
+        union_float64 ua_orig = ua;
+        union_float64 uc_orig = uc;
 
-        if (p.exp < DECOMPOSED_BINARY_POINT) {
-            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
-        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
-            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
-        } else {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return max;
+        if (flags & float_muladd_negate_product) {
+            ua.h = -ua.h;
+        }
+        if (flags & float_muladd_negate_c) {
+            uc.h = -uc.h;
         }
 
-        /* For uint64 this will never trip, but if p.exp is too large
-         * to shift a decomposed fraction we shall have exited via the
-         * 3rd leg above.
-         */
-        if (r > max) {
-            s->float_exception_flags = orig_flags | float_flag_invalid;
-            return max;
+        ur.h = fma(ua.h, ub.h, uc.h);
+
+        if (unlikely(f64_is_inf(ur))) {
+            float_raise(float_flag_overflow, s);
+        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
+            ua = ua_orig;
+            uc = uc_orig;
+            goto soft;
         }
-        return r;
-    default:
-        g_assert_not_reached();
     }
+    if (flags & float_muladd_negate_result) {
+        return float64_chs(ur.s);
+    }
+    return ur.s;
+
+ soft:
+    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
-uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                float_status *s)
+bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
+                                      int flags, float_status *status)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT8_MAX, s);
+    FloatParts64 pa, pb, pc, *pr;
+
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    bfloat16_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
 }
 
-uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
+                                      int flags, float_status *status)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts128 pa, pb, pc, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    float128_unpack_canonical(&pc, c, status);
+    pr = parts_muladd(&pa, &pb, &pc, flags, status);
+
+    return float128_round_pack_canonical(pr, status);
 }
 
-uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+/*
+ * Division
+ */
+
+float16 float16_div(float16 a, float16 b, float_status *status)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    FloatParts64 pa, pb, *pr;
+
+    float16_unpack_canonical(&pa, a, status);
+    float16_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
+
+    return float16_round_pack_canonical(pr, status);
 }
 
-uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static float32 QEMU_SOFTFLOAT_ATTR
+soft_f32_div(float32 a, float32 b, float_status *status)
 {
-    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    FloatParts64 pa, pb, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
+
+    return float32_round_pack_canonical(pr, status);
 }
 
-uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_f64_div(float64 a, float64 b, float_status *status)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    FloatParts64 pa, pb, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
+
+    return float64_round_pack_canonical(pr, status);
 }
 
-uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static float hard_f32_div(float a, float b)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    return a / b;
 }
 
-uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static double hard_f64_div(double a, double b)
 {
-    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    return a / b;
 }
 
-uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static bool f32_div_pre(union_float32 a, union_float32 b)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
+    if (QEMU_HARDFLOAT_2F32_USE_FP) {
+        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
+               fpclassify(b.h) == FP_NORMAL;
+    }
+    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
 }
 
-uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static bool f64_div_pre(union_float64 a, union_float64 b)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
+    if (QEMU_HARDFLOAT_2F64_USE_FP) {
+        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
+               fpclassify(b.h) == FP_NORMAL;
+    }
+    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
 }
 
-uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
-                                  float_status *s)
+static bool f32_div_post(union_float32 a, union_float32 b)
 {
-    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    if (QEMU_HARDFLOAT_2F32_USE_FP) {
+        return fpclassify(a.h) != FP_ZERO;
+    }
+    return !float32_is_zero(a.s);
 }
 
-uint8_t float16_to_uint8(float16 a, float_status *s)
+static bool f64_div_post(union_float64 a, union_float64 b)
 {
-    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
+    if (QEMU_HARDFLOAT_2F64_USE_FP) {
+        return fpclassify(a.h) != FP_ZERO;
+    }
+    return !float64_is_zero(a.s);
 }
 
-uint16_t float16_to_uint16(float16 a, float_status *s)
+float32 QEMU_FLATTEN
+float32_div(float32 a, float32 b, float_status *s)
 {
-    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
+    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
+                        f32_div_pre, f32_div_post);
 }
 
-uint32_t float16_to_uint32(float16 a, float_status *s)
+float64 QEMU_FLATTEN
+float64_div(float64 a, float64 b, float_status *s)
 {
-    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
+                        f64_div_pre, f64_div_post);
 }
 
-uint64_t float16_to_uint64(float16 a, float_status *s)
+bfloat16 QEMU_FLATTEN
+bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
 {
-    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts64 pa, pb, *pr;
 
-uint16_t float32_to_uint16(float32 a, float_status *s)
-{
-    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    bfloat16_unpack_canonical(&pa, a, status);
+    bfloat16_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
-uint32_t float32_to_uint32(float32 a, float_status *s)
-{
-    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+    return bfloat16_round_pack_canonical(pr, status);
 }
 
-uint64_t float32_to_uint64(float32 a, float_status *s)
+float128 QEMU_FLATTEN
+float128_div(float128 a, float128 b, float_status *status)
 {
-    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-uint16_t float64_to_uint16(float64 a, float_status *s)
-{
-    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_div(&pa, &pb, status);
 
-uint32_t float64_to_uint32(float64 a, float_status *s)
-{
-    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+    return float128_round_pack_canonical(pr, status);
 }
 
-uint64_t float64_to_uint64(float64 a, float_status *s)
+floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
 {
-    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
-}
+    FloatParts128 pa, pb, *pr;
 
-uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
-{
-    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
-}
+    if (!floatx80_unpack_canonical(&pa, a, status) ||
+        !floatx80_unpack_canonical(&pb, b, status)) {
+        return floatx80_default_nan(status);
+    }
 
-uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
-{
-    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+    pr = parts_div(&pa, &pb, status);
+    return floatx80_round_pack_canonical(pr, status);
 }
 
-uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
-{
-    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
-}
+/*
+ * Remainder
+ */
 
-uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
+float32 float32_rem(float32 a, float32 b, float_status *status)
 {
-    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+    FloatParts64 pa, pb, *pr;
+
+    float32_unpack_canonical(&pa, a, status);
+    float32_unpack_canonical(&pb, b, status);
+    pr = parts_modrem(&pa, &pb, NULL, status);
+
+    return float32_round_pack_canonical(pr, status);
 }
 
-uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
+float64 float64_rem(float64 a, float64 b, float_status *status)
 {
-    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+    FloatParts64 pa, pb, *pr;
+
+    float64_unpack_canonical(&pa, a, status);
+    float64_unpack_canonical(&pb, b, status);
+    pr = parts_modrem(&pa, &pb, NULL, status);
+
+    return float64_round_pack_canonical(pr, status);
 }
 
-uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
+float128 float128_rem(float128 a, float128 b, float_status *status)
 {
-    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+    FloatParts128 pa, pb, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_modrem(&pa, &pb, NULL, status);
+
+    return float128_round_pack_canonical(pr, status);
 }
 
-uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
+/*
+ * Returns the remainder of the extended double-precision floating-point value
+ * `a' with respect to the corresponding value `b'.
+ * If 'mod' is false, the operation is performed according to the IEC/IEEE
+ * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
+ * the remainder based on truncating the quotient toward zero instead and
+ * *quotient is set to the low 64 bits of the absolute value of the integer
+ * quotient.
+ */
+floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
+                         uint64_t *quotient, float_status *status)
 {
-    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+    FloatParts128 pa, pb, *pr;
+
+    *quotient = 0;
+    if (!floatx80_unpack_canonical(&pa, a, status) ||
+        !floatx80_unpack_canonical(&pb, b, status)) {
+        return floatx80_default_nan(status);
+    }
+    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
+
+    return floatx80_round_pack_canonical(pr, status);
 }
 
-uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
+floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
 {
-    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+    uint64_t quotient;
+    return floatx80_modrem(a, b, false, &quotient, status);
 }
 
-uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
+floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
 {
-    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+    uint64_t quotient;
+    return floatx80_modrem(a, b, true, &quotient, status);
 }
 
 /*
- *  Returns the result of converting the bfloat16 value `a' to
- *  the unsigned integer format.
+ * Float to Float conversions
+ *
+ * Returns the result of converting one float format to another. The
+ * conversion is performed according to the IEC/IEEE Standard for
+ * Binary Floating-Point Arithmetic.
+ *
+ * Usually this only needs to take care of raising invalid exceptions
+ * and handling the conversion on NaNs.
  */
 
-uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
-                                   int scale, float_status *s)
+static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
 {
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT16_MAX, s);
-}
+    switch (a->cls) {
+    case float_class_qnan:
+    case float_class_snan:
+        /*
+         * There is no NaN in the destination format.  Raise Invalid
+         * and return a zero with the sign of the input NaN.
+         */
+        float_raise(float_flag_invalid, s);
+        a->cls = float_class_zero;
+        break;
 
-uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
-                                   int scale, float_status *s)
-{
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT32_MAX, s);
-}
+    case float_class_inf:
+        /*
+         * There is no Inf in the destination format.  Raise Invalid
+         * and return the maximum normal with the correct sign.
+         */
+        float_raise(float_flag_invalid, s);
+        a->cls = float_class_normal;
+        a->exp = float16_params_ahp.exp_max;
+        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
+                                  float16_params_ahp.frac_size + 1);
+        break;
 
-uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
-                                   int scale, float_status *s)
-{
-    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
-                                  rmode, scale, UINT64_MAX, s);
+    case float_class_normal:
+    case float_class_zero:
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
 }
 
-uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
+static void parts64_float_to_float(FloatParts64 *a, float_status *s)
 {
-    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
 }
 
-uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
+static void parts128_float_to_float(FloatParts128 *a, float_status *s)
 {
-    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
 }
 
-uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
+#define parts_float_to_float(P, S) \
+    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
+
+static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
+                                        float_status *s)
 {
-    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
+    a->cls = b->cls;
+    a->sign = b->sign;
+    a->exp = b->exp;
+
+    if (a->cls == float_class_normal) {
+        frac_truncjam(a, b);
+    } else if (is_nan(a->cls)) {
+        /* Discard the low bits of the NaN. */
+        a->frac = b->frac_hi;
+        parts_return_nan(a, s);
+    }
 }
 
-uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
+static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
+                                       float_status *s)
 {
-    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+    a->cls = b->cls;
+    a->sign = b->sign;
+    a->exp = b->exp;
+    frac_widen(a, b);
+
+    if (is_nan(a->cls)) {
+        parts_return_nan(a, s);
+    }
 }
 
-uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
+float32 float16_to_float32(float16 a, bool ieee, float_status *s)
 {
-    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
+    FloatParts64 p;
+
+    float16a_unpack_canonical(&p, a, s, fmt16);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
-uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
+float64 float16_to_float64(float16 a, bool ieee, float_status *s)
 {
-    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
-}
+    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
+    FloatParts64 p;
 
-/*
- * Integer to float conversions
- *
- * Returns the result of converting the two's complement integer `a'
- * to the floating-point format. The conversion is performed according
- * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
- */
+    float16a_unpack_canonical(&p, a, s, fmt16);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
+}
 
-static FloatParts int_to_float(int64_t a, int scale, float_status *status)
+float16 float32_to_float16(float32 a, bool ieee, float_status *s)
 {
-    FloatParts r = { .sign = false };
+    FloatParts64 p;
+    const FloatFmt *fmt;
 
-    if (a == 0) {
-        r.cls = float_class_zero;
+    float32_unpack_canonical(&p, a, s);
+    if (ieee) {
+        parts_float_to_float(&p, s);
+        fmt = &float16_params;
     } else {
-        uint64_t f = a;
-        int shift;
-
-        r.cls = float_class_normal;
-        if (a < 0) {
-            f = -f;
-            r.sign = true;
-        }
-        shift = clz64(f) - 1;
-        scale = MIN(MAX(scale, -0x10000), 0x10000);
-
-        r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
-        r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
+        parts_float_to_ahp(&p, s);
+        fmt = &float16_params_ahp;
     }
-
-    return r;
+    return float16a_round_pack_canonical(&p, s, fmt);
 }
 
-float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_float32_to_float64(float32 a, float_status *s)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float16_round_pack_canonical(pa, status);
-}
+    FloatParts64 p;
 
-float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
-{
-    return int64_to_float16_scalbn(a, scale, status);
+    float32_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
 }
 
-float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
+float64 float32_to_float64(float32 a, float_status *s)
 {
-    return int64_to_float16_scalbn(a, scale, status);
+    if (likely(float32_is_normal(a))) {
+        /* Widening conversion can never produce inexact results.  */
+        union_float32 uf;
+        union_float64 ud;
+        uf.s = a;
+        ud.h = uf.h;
+        return ud.s;
+    } else if (float32_is_zero(a)) {
+        return float64_set_sign(float64_zero, float32_is_neg(a));
+    } else {
+        return soft_float32_to_float64(a, s);
+    }
 }
 
-float16 int64_to_float16(int64_t a, float_status *status)
+float16 float64_to_float16(float64 a, bool ieee, float_status *s)
 {
-    return int64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+    const FloatFmt *fmt;
+
+    float64_unpack_canonical(&p, a, s);
+    if (ieee) {
+        parts_float_to_float(&p, s);
+        fmt = &float16_params;
+    } else {
+        parts_float_to_ahp(&p, s);
+        fmt = &float16_params_ahp;
+    }
+    return float16a_round_pack_canonical(&p, s, fmt);
 }
 
-float16 int32_to_float16(int32_t a, float_status *status)
+float32 float64_to_float32(float64 a, float_status *s)
 {
-    return int64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
-float16 int16_to_float16(int16_t a, float_status *status)
+float32 bfloat16_to_float32(bfloat16 a, float_status *s)
 {
-    return int64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float32_round_pack_canonical(&p, s);
 }
 
-float16 int8_to_float16(int8_t a, float_status *status)
+float64 bfloat16_to_float64(bfloat16 a, float_status *s)
 {
-    return int64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return float64_round_pack_canonical(&p, s);
 }
 
-float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
+bfloat16 float32_to_bfloat16(float32 a, float_status *s)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float32_round_pack_canonical(pa, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return bfloat16_round_pack_canonical(&p, s);
 }
 
-float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
+bfloat16 float64_to_bfloat16(float64 a, float_status *s)
 {
-    return int64_to_float32_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return bfloat16_round_pack_canonical(&p, s);
 }
 
-float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
+float32 float128_to_float32(float128 a, float_status *s)
 {
-    return int64_to_float32_scalbn(a, scale, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float128_unpack_canonical(&p128, a, s);
+    parts_float_to_float_narrow(&p64, &p128, s);
+    return float32_round_pack_canonical(&p64, s);
 }
 
-float32 int64_to_float32(int64_t a, float_status *status)
+float64 float128_to_float64(float128 a, float_status *s)
 {
-    return int64_to_float32_scalbn(a, 0, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float128_unpack_canonical(&p128, a, s);
+    parts_float_to_float_narrow(&p64, &p128, s);
+    return float64_round_pack_canonical(&p64, s);
 }
 
-float32 int32_to_float32(int32_t a, float_status *status)
+float128 float32_to_float128(float32 a, float_status *s)
 {
-    return int64_to_float32_scalbn(a, 0, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float32_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return float128_round_pack_canonical(&p128, s);
 }
 
-float32 int16_to_float32(int16_t a, float_status *status)
+float128 float64_to_float128(float64 a, float_status *s)
 {
-    return int64_to_float32_scalbn(a, 0, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float64_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return float128_round_pack_canonical(&p128, s);
 }
 
-float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
+float32 floatx80_to_float32(floatx80 a, float_status *s)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return float64_round_pack_canonical(pa, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    if (floatx80_unpack_canonical(&p128, a, s)) {
+        parts_float_to_float_narrow(&p64, &p128, s);
+    } else {
+        parts_default_nan(&p64, s);
+    }
+    return float32_round_pack_canonical(&p64, s);
 }
 
-float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
+float64 floatx80_to_float64(floatx80 a, float_status *s)
 {
-    return int64_to_float64_scalbn(a, scale, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    if (floatx80_unpack_canonical(&p128, a, s)) {
+        parts_float_to_float_narrow(&p64, &p128, s);
+    } else {
+        parts_default_nan(&p64, s);
+    }
+    return float64_round_pack_canonical(&p64, s);
 }
 
-float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
+float128 floatx80_to_float128(floatx80 a, float_status *s)
 {
-    return int64_to_float64_scalbn(a, scale, status);
+    FloatParts128 p;
+
+    if (floatx80_unpack_canonical(&p, a, s)) {
+        parts_float_to_float(&p, s);
+    } else {
+        parts_default_nan(&p, s);
+    }
+    return float128_round_pack_canonical(&p, s);
 }
 
-float64 int64_to_float64(int64_t a, float_status *status)
+floatx80 float32_to_floatx80(float32 a, float_status *s)
 {
-    return int64_to_float64_scalbn(a, 0, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float32_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return floatx80_round_pack_canonical(&p128, s);
 }
 
-float64 int32_to_float64(int32_t a, float_status *status)
+floatx80 float64_to_floatx80(float64 a, float_status *s)
 {
-    return int64_to_float64_scalbn(a, 0, status);
+    FloatParts64 p64;
+    FloatParts128 p128;
+
+    float64_unpack_canonical(&p64, a, s);
+    parts_float_to_float_widen(&p128, &p64, s);
+    return floatx80_round_pack_canonical(&p128, s);
 }
 
-float64 int16_to_float64(int16_t a, float_status *status)
+floatx80 float128_to_floatx80(float128 a, float_status *s)
 {
-    return int64_to_float64_scalbn(a, 0, status);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    parts_float_to_float(&p, s);
+    return floatx80_round_pack_canonical(&p, s);
 }
 
 /*
- * Returns the result of converting the two's complement integer `a'
- * to the bfloat16 format.
+ * Round to integral value
  */
 
-bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
+float16 float16_round_to_int(float16 a, float_status *s)
 {
-    FloatParts pa = int_to_float(a, scale, status);
-    return bfloat16_round_pack_canonical(pa, status);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
+    return float16_round_pack_canonical(&p, s);
 }
 
-bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
+float32 float32_round_to_int(float32 a, float_status *s)
 {
-    return int64_to_bfloat16_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
+    return float32_round_pack_canonical(&p, s);
 }
 
-bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
+float64 float64_round_to_int(float64 a, float_status *s)
 {
-    return int64_to_bfloat16_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
+    return float64_round_pack_canonical(&p, s);
 }
 
-bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
+bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
 {
-    return int64_to_bfloat16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
+    return bfloat16_round_pack_canonical(&p, s);
 }
 
-bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
+float128 float128_round_to_int(float128 a, float_status *s)
 {
-    return int64_to_bfloat16_scalbn(a, 0, status);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
+    return float128_round_pack_canonical(&p, s);
 }
 
-bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
+floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
 {
-    return int64_to_bfloat16_scalbn(a, 0, status);
+    FloatParts128 p;
+
+    if (!floatx80_unpack_canonical(&p, a, status)) {
+        return floatx80_default_nan(status);
+    }
+
+    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
+                       &floatx80_params[status->floatx80_rounding_precision]);
+    return floatx80_round_pack_canonical(&p, status);
 }
 
 /*
- * Unsigned Integer to float conversions
- *
- * Returns the result of converting the unsigned integer `a' to the
- * floating-point format. The conversion is performed according to the
- * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ * Floating-point to signed integer conversions
  */
 
-static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
+int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                              float_status *s)
 {
-    FloatParts r = { .sign = false };
-
-    if (a == 0) {
-        r.cls = float_class_zero;
-    } else {
-        scale = MIN(MAX(scale, -0x10000), 0x10000);
-        r.cls = float_class_normal;
-        if ((int64_t)a < 0) {
-            r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
-            shift64RightJamming(a, 1, &a);
-            r.frac = a;
-        } else {
-            int shift = clz64(a) - 1;
-            r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
-            r.frac = a << shift;
-        }
-    }
+    FloatParts64 p;
 
-    return r;
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
 }
 
-float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
+int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float16_round_pack_canonical(pa, status);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
+int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
+int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float16 uint64_to_float16(uint64_t a, float_status *status)
+int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-float16 uint32_to_float16(uint32_t a, float_status *status)
+int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float16 uint16_to_float16(uint16_t a, float_status *status)
+int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float16 uint8_to_float16(uint8_t a, float_status *status)
+int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float16_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
+int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float32_round_pack_canonical(pa, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
+int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    return uint64_to_float32_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
+int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
 {
-    return uint64_to_float32_scalbn(a, scale, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
 }
 
-float32 uint64_to_float32(uint64_t a, float_status *status)
+int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
 {
-    return uint64_to_float32_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float32 uint32_to_float32(uint32_t a, float_status *status)
+int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
+                                 float_status *s)
 {
-    return uint64_to_float32_scalbn(a, 0, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float32 uint16_to_float32(uint16_t a, float_status *status)
+static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
 {
-    return uint64_to_float32_scalbn(a, 0, status);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
+static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return float64_round_pack_canonical(pa, status);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
+static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
 {
-    return uint64_to_float64_scalbn(a, scale, status);
+    FloatParts128 p;
+
+    if (!floatx80_unpack_canonical(&p, a, s)) {
+        parts_default_nan(&p, s);
+    }
+    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
 }
 
-float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
+static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
+                                        int scale, float_status *s)
 {
-    return uint64_to_float64_scalbn(a, scale, status);
+    FloatParts128 p;
+
+    if (!floatx80_unpack_canonical(&p, a, s)) {
+        parts_default_nan(&p, s);
+    }
+    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
 }
 
-float64 uint64_to_float64(uint64_t a, float_status *status)
+int8_t float16_to_int8(float16 a, float_status *s)
 {
-    return uint64_to_float64_scalbn(a, 0, status);
+    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-float64 uint32_to_float64(uint32_t a, float_status *status)
+int16_t float16_to_int16(float16 a, float_status *s)
 {
-    return uint64_to_float64_scalbn(a, 0, status);
+    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-float64 uint16_to_float64(uint16_t a, float_status *status)
+int32_t float16_to_int32(float16 a, float_status *s)
 {
-    return uint64_to_float64_scalbn(a, 0, status);
+    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-/*
- * Returns the result of converting the unsigned integer `a' to the
- * bfloat16 format.
- */
-
-bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
+int64_t float16_to_int64(float16 a, float_status *s)
 {
-    FloatParts pa = uint_to_float(a, scale, status);
-    return bfloat16_round_pack_canonical(pa, status);
+    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
+int16_t float32_to_int16(float32 a, float_status *s)
 {
-    return uint64_to_bfloat16_scalbn(a, scale, status);
+    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
+int32_t float32_to_int32(float32 a, float_status *s)
 {
-    return uint64_to_bfloat16_scalbn(a, scale, status);
+    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
+int64_t float32_to_int64(float32 a, float_status *s)
 {
-    return uint64_to_bfloat16_scalbn(a, 0, status);
+    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
+int16_t float64_to_int16(float64 a, float_status *s)
 {
-    return uint64_to_bfloat16_scalbn(a, 0, status);
+    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
+int32_t float64_to_int32(float64 a, float_status *s)
 {
-    return uint64_to_bfloat16_scalbn(a, 0, status);
+    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-/* Float Min/Max */
-/* min() and max() functions. These can't be implemented as
- * 'compare and pick one input' because that would mishandle
- * NaNs and +0 vs -0.
- *
- * minnum() and maxnum() functions. These are similar to the min()
- * and max() functions but if one of the arguments is a QNaN and
- * the other is numerical then the numerical argument is returned.
- * SNaNs will get quietened before being returned.
- * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
- * and maxNum() operations. min() and max() are the typical min/max
- * semantics provided by many CPUs which predate that specification.
- *
- * minnummag() and maxnummag() functions correspond to minNumMag()
- * and minNumMag() from the IEEE-754 2008.
- */
-static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
-                                bool ieee, bool ismag, float_status *s)
-{
-    if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
-        if (ieee) {
-            /* Takes two floating-point values `a' and `b', one of
-             * which is a NaN, and returns the appropriate NaN
-             * result. If either `a' or `b' is a signaling NaN,
-             * the invalid exception is raised.
-             */
-            if (is_snan(a.cls) || is_snan(b.cls)) {
-                return pick_nan(a, b, s);
-            } else if (is_nan(a.cls) && !is_nan(b.cls)) {
-                return b;
-            } else if (is_nan(b.cls) && !is_nan(a.cls)) {
-                return a;
-            }
-        }
-        return pick_nan(a, b, s);
-    } else {
-        int a_exp, b_exp;
-
-        switch (a.cls) {
-        case float_class_normal:
-            a_exp = a.exp;
-            break;
-        case float_class_inf:
-            a_exp = INT_MAX;
-            break;
-        case float_class_zero:
-            a_exp = INT_MIN;
-            break;
-        default:
-            g_assert_not_reached();
-            break;
-        }
-        switch (b.cls) {
-        case float_class_normal:
-            b_exp = b.exp;
-            break;
-        case float_class_inf:
-            b_exp = INT_MAX;
-            break;
-        case float_class_zero:
-            b_exp = INT_MIN;
-            break;
-        default:
-            g_assert_not_reached();
-            break;
-        }
-
-        if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
-            bool a_less = a_exp < b_exp;
-            if (a_exp == b_exp) {
-                a_less = a.frac < b.frac;
-            }
-            return a_less ^ ismin ? b : a;
-        }
-
-        if (a.sign == b.sign) {
-            bool a_less = a_exp < b_exp;
-            if (a_exp == b_exp) {
-                a_less = a.frac < b.frac;
-            }
-            return a.sign ^ a_less ^ ismin ? b : a;
-        } else {
-            return a.sign ^ ismin ? b : a;
-        }
-    }
+int64_t float64_to_int64(float64 a, float_status *s)
+{
+    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-#define MINMAX(sz, name, ismin, isiee, ismag)                           \
-float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
-                                     float_status *s)                   \
-{                                                                       \
-    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
-    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
-    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
-                                                                        \
-    return float ## sz ## _round_pack_canonical(pr, s);                 \
-}
-
-MINMAX(16, min, true, false, false)
-MINMAX(16, minnum, true, true, false)
-MINMAX(16, minnummag, true, true, true)
-MINMAX(16, max, false, false, false)
-MINMAX(16, maxnum, false, true, false)
-MINMAX(16, maxnummag, false, true, true)
-
-MINMAX(32, min, true, false, false)
-MINMAX(32, minnum, true, true, false)
-MINMAX(32, minnummag, true, true, true)
-MINMAX(32, max, false, false, false)
-MINMAX(32, maxnum, false, true, false)
-MINMAX(32, maxnummag, false, true, true)
-
-MINMAX(64, min, true, false, false)
-MINMAX(64, minnum, true, true, false)
-MINMAX(64, minnummag, true, true, true)
-MINMAX(64, max, false, false, false)
-MINMAX(64, maxnum, false, true, false)
-MINMAX(64, maxnummag, false, true, true)
-
-#undef MINMAX
-
-#define BF16_MINMAX(name, ismin, isiee, ismag)                          \
-bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
-{                                                                       \
-    FloatParts pa = bfloat16_unpack_canonical(a, s);                    \
-    FloatParts pb = bfloat16_unpack_canonical(b, s);                    \
-    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
-                                                                        \
-    return bfloat16_round_pack_canonical(pr, s);                        \
-}
-
-BF16_MINMAX(min, true, false, false)
-BF16_MINMAX(minnum, true, true, false)
-BF16_MINMAX(minnummag, true, true, true)
-BF16_MINMAX(max, false, false, false)
-BF16_MINMAX(maxnum, false, true, false)
-BF16_MINMAX(maxnummag, false, true, true)
-
-#undef BF16_MINMAX
-
-/* Floating point compare */
-static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
-                                    float_status *s)
-{
-    if (is_nan(a.cls) || is_nan(b.cls)) {
-        if (!is_quiet ||
-            a.cls == float_class_snan ||
-            b.cls == float_class_snan) {
-            s->float_exception_flags |= float_flag_invalid;
-        }
-        return float_relation_unordered;
-    }
-
-    if (a.cls == float_class_zero) {
-        if (b.cls == float_class_zero) {
-            return float_relation_equal;
-        }
-        return b.sign ? float_relation_greater : float_relation_less;
-    } else if (b.cls == float_class_zero) {
-        return a.sign ? float_relation_less : float_relation_greater;
-    }
-
-    /* The only really important thing about infinity is its sign. If
-     * both are infinities the sign marks the smallest of the two.
-     */
-    if (a.cls == float_class_inf) {
-        if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
-            return float_relation_equal;
-        }
-        return a.sign ? float_relation_less : float_relation_greater;
-    } else if (b.cls == float_class_inf) {
-        return b.sign ? float_relation_greater : float_relation_less;
-    }
-
-    if (a.sign != b.sign) {
-        return a.sign ? float_relation_less : float_relation_greater;
-    }
-
-    if (a.exp == b.exp) {
-        if (a.frac == b.frac) {
-            return float_relation_equal;
-        }
-        if (a.sign) {
-            return a.frac > b.frac ?
-                float_relation_less : float_relation_greater;
-        } else {
-            return a.frac > b.frac ?
-                float_relation_greater : float_relation_less;
-        }
-    } else {
-        if (a.sign) {
-            return a.exp > b.exp ? float_relation_less : float_relation_greater;
-        } else {
-            return a.exp > b.exp ? float_relation_greater : float_relation_less;
-        }
-    }
+int32_t float128_to_int32(float128 a, float_status *s)
+{
+    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-#define COMPARE(name, attr, sz)                                         \
-static int attr                                                         \
-name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
-{                                                                       \
-    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
-    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
-    return compare_floats(pa, pb, is_quiet, s);                         \
+int64_t float128_to_int64(float128 a, float_status *s)
+{
+    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
-COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
-COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
-
-#undef COMPARE
-
-FloatRelation float16_compare(float16 a, float16 b, float_status *s)
+int32_t floatx80_to_int32(floatx80 a, float_status *s)
 {
-    return soft_f16_compare(a, b, false, s);
+    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
+int64_t floatx80_to_int64(floatx80 a, float_status *s)
 {
-    return soft_f16_compare(a, b, true, s);
+    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-static FloatRelation QEMU_FLATTEN
-f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
+int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
 {
-    union_float32 ua, ub;
-
-    ua.s = xa;
-    ub.s = xb;
-
-    if (QEMU_NO_HARDFLOAT) {
-        goto soft;
-    }
-
-    float32_input_flush2(&ua.s, &ub.s, s);
-    if (isgreaterequal(ua.h, ub.h)) {
-        if (isgreater(ua.h, ub.h)) {
-            return float_relation_greater;
-        }
-        return float_relation_equal;
-    }
-    if (likely(isless(ua.h, ub.h))) {
-        return float_relation_less;
-    }
-    /* The only condition remaining is unordered.
-     * Fall through to set flags.
-     */
- soft:
-    return soft_f32_compare(ua.s, ub.s, is_quiet, s);
+    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation float32_compare(float32 a, float32 b, float_status *s)
+int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
 {
-    return f32_compare(a, b, false, s);
+    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
+int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
 {
-    return f32_compare(a, b, true, s);
+    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-static FloatRelation QEMU_FLATTEN
-f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
+int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
 {
-    union_float64 ua, ub;
-
-    ua.s = xa;
-    ub.s = xb;
-
-    if (QEMU_NO_HARDFLOAT) {
-        goto soft;
-    }
+    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    float64_input_flush2(&ua.s, &ub.s, s);
-    if (isgreaterequal(ua.h, ub.h)) {
-        if (isgreater(ua.h, ub.h)) {
-            return float_relation_greater;
-        }
-        return float_relation_equal;
-    }
-    if (likely(isless(ua.h, ub.h))) {
-        return float_relation_less;
-    }
-    /* The only condition remaining is unordered.
-     * Fall through to set flags.
-     */
- soft:
-    return soft_f64_compare(ua.s, ub.s, is_quiet, s);
+int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
+{
+    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation float64_compare(float64 a, float64 b, float_status *s)
+int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
 {
-    return f64_compare(a, b, false, s);
+    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
+int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
 {
-    return f64_compare(a, b, true, s);
+    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
 }
 
-static FloatRelation QEMU_FLATTEN
-soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
+int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, s);
-    FloatParts pb = bfloat16_unpack_canonical(b, s);
-    return compare_floats(pa, pb, is_quiet, s);
+    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
+int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
 {
-    return soft_bf16_compare(a, b, false, s);
+    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
+int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
 {
-    return soft_bf16_compare(a, b, true, s);
+    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-/* Multiply A by 2 raised to the power N.  */
-static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
+int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
 {
-    if (unlikely(is_nan(a.cls))) {
-        return return_nan(a, s);
-    }
-    if (a.cls == float_class_normal) {
-        /* The largest float type (even though not supported by FloatParts)
-         * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
-         * still allows rounding to infinity, without allowing overflow
-         * within the int32_t that backs FloatParts.exp.
-         */
-        n = MIN(MAX(n, -0x10000), 0x10000);
-        a.exp += n;
-    }
-    return a;
+    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float16 float16_scalbn(float16 a, int n, float_status *status)
+int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float16_round_pack_canonical(pr, status);
+    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float32 float32_scalbn(float32 a, int n, float_status *status)
+int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float32_round_pack_canonical(pr, status);
+    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float64 float64_scalbn(float64 a, int n, float_status *status)
+int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return float64_round_pack_canonical(pr, status);
+    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
+int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pr = scalbn_decomposed(pa, n, status);
-    return bfloat16_round_pack_canonical(pr, status);
+    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
+}
+
+int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
+}
+
+int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
 /*
- * Square Root
- *
- * The old softfloat code did an approximation step before zeroing in
- * on the final result. However for simpleness we just compute the
- * square root by iterating down from the implicit bit to enough extra
- * bits to ensure we get a correctly rounded result.
- *
- * This does mean however the calculation is slower than before,
- * especially for 64 bit floats.
+ * Floating-point to unsigned integer conversions
  */
 
-static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
+uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                float_status *s)
 {
-    uint64_t a_frac, r_frac, s_frac;
-    int bit, last_bit;
+    FloatParts64 p;
 
-    if (is_nan(a.cls)) {
-        return return_nan(a, s);
-    }
-    if (a.cls == float_class_zero) {
-        return a;  /* sqrt(+-0) = +-0 */
-    }
-    if (a.sign) {
-        s->float_exception_flags |= float_flag_invalid;
-        return parts_default_nan(s);
-    }
-    if (a.cls == float_class_inf) {
-        return a;  /* sqrt(+inf) = +inf */
-    }
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
+}
 
-    assert(a.cls == float_class_normal);
+uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
+{
+    FloatParts64 p;
 
-    /* We need two overflow bits at the top. Adding room for that is a
-     * right shift. If the exponent is odd, we can discard the low bit
-     * by multiplying the fraction by 2; that's a left shift. Combine
-     * those and we shift right if the exponent is even.
-     */
-    a_frac = a.frac;
-    if (!(a.exp & 1)) {
-        a_frac >>= 1;
-    }
-    a.exp >>= 1;
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
+}
 
-    /* Bit-by-bit computation of sqrt.  */
-    r_frac = 0;
-    s_frac = 0;
+uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
+{
+    FloatParts64 p;
 
-    /* Iterate from implicit bit down to the 3 extra bits to compute a
-     * properly rounded result. Remember we've inserted one more bit
-     * at the top, so these positions are one less.
-     */
-    bit = DECOMPOSED_BINARY_POINT - 1;
-    last_bit = MAX(p->frac_shift - 4, 0);
-    do {
-        uint64_t q = 1ULL << bit;
-        uint64_t t_frac = s_frac + q;
-        if (t_frac <= a_frac) {
-            s_frac = t_frac + q;
-            a_frac -= t_frac;
-            r_frac += q;
-        }
-        a_frac <<= 1;
-    } while (--bit >= last_bit);
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
+}
 
-    /* Undo the right shift done above. If there is any remaining
-     * fraction, the result is inexact. Set the sticky bit.
-     */
-    a.frac = (r_frac << 1) + (a_frac != 0);
+uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
+{
+    FloatParts64 p;
 
-    return a;
+    float16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
 }
 
-float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
+uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
 {
-    FloatParts pa = float16_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float16_params);
-    return float16_round_pack_canonical(pr, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
 }
 
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_sqrt(float32 a, float_status *status)
+uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
 {
-    FloatParts pa = float32_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float32_params);
-    return float32_round_pack_canonical(pr, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
 }
 
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_sqrt(float64 a, float_status *status)
+uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
 {
-    FloatParts pa = float64_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &float64_params);
-    return float64_round_pack_canonical(pr, status);
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
 }
 
-float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
+uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
 {
-    union_float32 ua, ur;
+    FloatParts64 p;
 
-    ua.s = xa;
-    if (unlikely(!can_use_fpu(s))) {
-        goto soft;
-    }
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
+}
 
-    float32_input_flush1(&ua.s, s);
-    if (QEMU_HARDFLOAT_1F32_USE_FP) {
-        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
-                       fpclassify(ua.h) == FP_ZERO) ||
-                     signbit(ua.h))) {
-            goto soft;
-        }
-    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
-                        float32_is_neg(ua.s))) {
-        goto soft;
-    }
-    ur.h = sqrtf(ua.h);
-    return ur.s;
+uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
+{
+    FloatParts64 p;
 
- soft:
-    return soft_f32_sqrt(ua.s, s);
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
 }
 
-float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
+uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
+                                  float_status *s)
 {
-    union_float64 ua, ur;
+    FloatParts64 p;
 
-    ua.s = xa;
-    if (unlikely(!can_use_fpu(s))) {
-        goto soft;
-    }
+    float64_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
+}
 
-    float64_input_flush1(&ua.s, s);
-    if (QEMU_HARDFLOAT_1F64_USE_FP) {
-        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
-                       fpclassify(ua.h) == FP_ZERO) ||
-                     signbit(ua.h))) {
-            goto soft;
-        }
-    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
-                        float64_is_neg(ua.s))) {
-        goto soft;
-    }
-    ur.h = sqrt(ua.h);
-    return ur.s;
+uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
+                                   int scale, float_status *s)
+{
+    FloatParts64 p;
 
- soft:
-    return soft_f64_sqrt(ua.s, s);
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
 }
 
-bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
+uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
+                                   int scale, float_status *s)
 {
-    FloatParts pa = bfloat16_unpack_canonical(a, status);
-    FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
-    return bfloat16_round_pack_canonical(pr, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
 }
 
-/*----------------------------------------------------------------------------
-| The pattern for a default generated NaN.
-*----------------------------------------------------------------------------*/
+uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
+                                   int scale, float_status *s)
+{
+    FloatParts64 p;
 
-float16 float16_default_nan(float_status *status)
+    bfloat16_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
+}
+
+static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
+                                          int scale, float_status *s)
 {
-    FloatParts p = parts_default_nan(status);
-    p.frac >>= float16_params.frac_shift;
-    return float16_pack_raw(p);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
 }
 
-float32 float32_default_nan(float_status *status)
+static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
+                                          int scale, float_status *s)
 {
-    FloatParts p = parts_default_nan(status);
-    p.frac >>= float32_params.frac_shift;
-    return float32_pack_raw(p);
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, s);
+    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
 }
 
-float64 float64_default_nan(float_status *status)
+uint8_t float16_to_uint8(float16 a, float_status *s)
 {
-    FloatParts p = parts_default_nan(status);
-    p.frac >>= float64_params.frac_shift;
-    return float64_pack_raw(p);
+    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-float128 float128_default_nan(float_status *status)
+uint16_t float16_to_uint16(float16 a, float_status *s)
 {
-    FloatParts p = parts_default_nan(status);
-    float128 r;
+    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-    /* Extrapolate from the choices made by parts_default_nan to fill
-     * in the quad-floating format.  If the low bit is set, assume we
-     * want to set all non-snan bits.
-     */
-    r.low = -(p.frac & 1);
-    r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
-    r.high |= UINT64_C(0x7FFF000000000000);
-    r.high |= (uint64_t)p.sign << 63;
+uint32_t float16_to_uint32(float16 a, float_status *s)
+{
+    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-    return r;
+uint64_t float16_to_uint64(float16 a, float_status *s)
+{
+    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 bfloat16_default_nan(float_status *status)
+uint16_t float32_to_uint16(float32 a, float_status *s)
 {
-    FloatParts p = parts_default_nan(status);
-    p.frac >>= bfloat16_params.frac_shift;
-    return bfloat16_pack_raw(p);
+    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
-*----------------------------------------------------------------------------*/
+uint32_t float32_to_uint32(float32 a, float_status *s)
+{
+    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-float16 float16_silence_nan(float16 a, float_status *status)
+uint64_t float32_to_uint64(float32 a, float_status *s)
 {
-    FloatParts p = float16_unpack_raw(a);
-    p.frac <<= float16_params.frac_shift;
-    p = parts_silence_nan(p, status);
-    p.frac >>= float16_params.frac_shift;
-    return float16_pack_raw(p);
+    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-float32 float32_silence_nan(float32 a, float_status *status)
+uint16_t float64_to_uint16(float64 a, float_status *s)
 {
-    FloatParts p = float32_unpack_raw(a);
-    p.frac <<= float32_params.frac_shift;
-    p = parts_silence_nan(p, status);
-    p.frac >>= float32_params.frac_shift;
-    return float32_pack_raw(p);
+    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-float64 float64_silence_nan(float64 a, float_status *status)
+uint32_t float64_to_uint32(float64 a, float_status *s)
 {
-    FloatParts p = float64_unpack_raw(a);
-    p.frac <<= float64_params.frac_shift;
-    p = parts_silence_nan(p, status);
-    p.frac >>= float64_params.frac_shift;
-    return float64_pack_raw(p);
+    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
+uint64_t float64_to_uint64(float64 a, float_status *s)
 {
-    FloatParts p = bfloat16_unpack_raw(a);
-    p.frac <<= bfloat16_params.frac_shift;
-    p = parts_silence_nan(p, status);
-    p.frac >>= bfloat16_params.frac_shift;
-    return bfloat16_pack_raw(p);
+    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-/*----------------------------------------------------------------------------
-| If `a' is denormal and we are in flush-to-zero mode then set the
-| input-denormal exception and return zero. Otherwise just return the value.
-*----------------------------------------------------------------------------*/
+uint32_t float128_to_uint32(float128 a, float_status *s)
+{
+    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-static bool parts_squash_denormal(FloatParts p, float_status *status)
+uint64_t float128_to_uint64(float128 a, float_status *s)
 {
-    if (p.exp == 0 && p.frac != 0) {
-        float_raise(float_flag_input_denormal, status);
-        return true;
-    }
+    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-    return false;
+uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
+{
+    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float16 float16_squash_input_denormal(float16 a, float_status *status)
+uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
 {
-    if (status->flush_inputs_to_zero) {
-        FloatParts p = float16_unpack_raw(a);
-        if (parts_squash_denormal(p, status)) {
-            return float16_set_sign(float16_zero, p.sign);
-        }
-    }
-    return a;
+    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float32 float32_squash_input_denormal(float32 a, float_status *status)
+uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
 {
-    if (status->flush_inputs_to_zero) {
-        FloatParts p = float32_unpack_raw(a);
-        if (parts_squash_denormal(p, status)) {
-            return float32_set_sign(float32_zero, p.sign);
-        }
-    }
-    return a;
+    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-float64 float64_squash_input_denormal(float64 a, float_status *status)
+uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
 {
-    if (status->flush_inputs_to_zero) {
-        FloatParts p = float64_unpack_raw(a);
-        if (parts_squash_denormal(p, status)) {
-            return float64_set_sign(float64_zero, p.sign);
-        }
-    }
-    return a;
+    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
 }
 
-bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
+uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
 {
-    if (status->flush_inputs_to_zero) {
-        FloatParts p = bfloat16_unpack_raw(a);
-        if (parts_squash_denormal(p, status)) {
-            return bfloat16_set_sign(bfloat16_zero, p.sign);
-        }
-    }
-    return a;
+    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
 }
 
-/*----------------------------------------------------------------------------
-| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
-| and 7, and returns the properly rounded 32-bit integer corresponding to the
-| input.  If `zSign' is 1, the input is negated before being converted to an
-| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
-| is simply rounded to an integer, with the inexact exception raised if the
-| input cannot be represented exactly as an integer.  However, if the fixed-
-| point input is too large, the invalid exception is raised and the largest
-| positive or negative integer is returned.
-*----------------------------------------------------------------------------*/
+uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
+{
+    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+}
 
-static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
-                                 float_status *status)
+uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
 {
-    int8_t roundingMode;
-    bool roundNearestEven;
-    int8_t roundIncrement, roundBits;
-    int32_t z;
+    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        roundIncrement = 0x40;
-        break;
-    case float_round_to_zero:
-        roundIncrement = 0;
-        break;
-    case float_round_up:
-        roundIncrement = zSign ? 0 : 0x7f;
-        break;
-    case float_round_down:
-        roundIncrement = zSign ? 0x7f : 0;
-        break;
-    case float_round_to_odd:
-        roundIncrement = absZ & 0x80 ? 0 : 0x7f;
-        break;
-    default:
-        abort();
-    }
-    roundBits = absZ & 0x7F;
-    absZ = ( absZ + roundIncrement )>>7;
-    if (!(roundBits ^ 0x40) && roundNearestEven) {
-        absZ &= ~1;
-    }
-    z = absZ;
-    if ( zSign ) z = - z;
-    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
-        float_raise(float_flag_invalid, status);
-        return zSign ? INT32_MIN : INT32_MAX;
-    }
-    if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
+uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
+{
+    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+}
 
+uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
+{
+    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
 }
 
-/*----------------------------------------------------------------------------
-| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
-| `absZ1', with binary point between bits 63 and 64 (between the input words),
-| and returns the properly rounded 64-bit integer corresponding to the input.
-| If `zSign' is 1, the input is negated before being converted to an integer.
-| Ordinarily, the fixed-point input is simply rounded to an integer, with
-| the inexact exception raised if the input cannot be represented exactly as
-| an integer.  However, if the fixed-point input is too large, the invalid
-| exception is raised and the largest positive or negative integer is
-| returned.
-*----------------------------------------------------------------------------*/
+uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
+{
+    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+}
 
-static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
-                               float_status *status)
+uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
 {
-    int8_t roundingMode;
-    bool roundNearestEven, increment;
-    int64_t z;
+    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        increment = ((int64_t) absZ1 < 0);
-        break;
-    case float_round_to_zero:
-        increment = 0;
-        break;
-    case float_round_up:
-        increment = !zSign && absZ1;
-        break;
-    case float_round_down:
-        increment = zSign && absZ1;
-        break;
-    case float_round_to_odd:
-        increment = !(absZ0 & 1) && absZ1;
-        break;
-    default:
-        abort();
-    }
-    if ( increment ) {
-        ++absZ0;
-        if ( absZ0 == 0 ) goto overflow;
-        if (!(absZ1 << 1) && roundNearestEven) {
-            absZ0 &= ~1;
-        }
-    }
-    z = absZ0;
-    if ( zSign ) z = - z;
-    if ( z && ( ( z < 0 ) ^ zSign ) ) {
- overflow:
-        float_raise(float_flag_invalid, status);
-        return zSign ? INT64_MIN : INT64_MAX;
-    }
-    if (absZ1) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
+uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
+uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
 }
 
-/*----------------------------------------------------------------------------
-| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
-| `absZ1', with binary point between bits 63 and 64 (between the input words),
-| and returns the properly rounded 64-bit unsigned integer corresponding to the
-| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
-| with the inexact exception raised if the input cannot be represented exactly
-| as an integer.  However, if the fixed-point input is too large, the invalid
-| exception is raised and the largest unsigned integer is returned.
-*----------------------------------------------------------------------------*/
+uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
+}
 
-static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
-                                uint64_t absZ1, float_status *status)
+uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
 {
-    int8_t roundingMode;
-    bool roundNearestEven, increment;
+    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = (roundingMode == float_round_nearest_even);
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        increment = ((int64_t)absZ1 < 0);
-        break;
-    case float_round_to_zero:
-        increment = 0;
-        break;
-    case float_round_up:
-        increment = !zSign && absZ1;
-        break;
-    case float_round_down:
-        increment = zSign && absZ1;
-        break;
-    case float_round_to_odd:
-        increment = !(absZ0 & 1) && absZ1;
-        break;
-    default:
-        abort();
-    }
-    if (increment) {
-        ++absZ0;
-        if (absZ0 == 0) {
-            float_raise(float_flag_invalid, status);
-            return UINT64_MAX;
-        }
-        if (!(absZ1 << 1) && roundNearestEven) {
-            absZ0 &= ~1;
-        }
-    }
+uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    if (zSign && absZ0) {
-        float_raise(float_flag_invalid, status);
-        return 0;
-    }
+uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+}
 
-    if (absZ1) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return absZ0;
+/*
+ * Signed integer to floating-point conversions
+ */
+
+float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
+{
+    FloatParts64 p;
+
+    parts_sint_to_float(&p, a, scale, status);
+    return float16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Normalizes the subnormal single-precision floating-point value represented
-| by the denormalized significand `aSig'.  The normalized exponent and
-| significand are stored at the locations pointed to by `zExpPtr' and
-| `zSigPtr', respectively.
-*----------------------------------------------------------------------------*/
+float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
+{
+    return int64_to_float16_scalbn(a, scale, status);
+}
 
-static void
- normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
+float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
 {
-    int8_t shiftCount;
+    return int64_to_float16_scalbn(a, scale, status);
+}
 
-    shiftCount = clz32(aSig) - 8;
-    *zSigPtr = aSig<<shiftCount;
-    *zExpPtr = 1 - shiftCount;
+float16 int64_to_float16(int64_t a, float_status *status)
+{
+    return int64_to_float16_scalbn(a, 0, status);
+}
 
+float16 int32_to_float16(int32_t a, float_status *status)
+{
+    return int64_to_float16_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and significand `zSig', and returns the proper single-precision floating-
-| point value corresponding to the abstract input.  Ordinarily, the abstract
-| value is simply rounded and packed into the single-precision format, with
-| the inexact exception raised if the abstract input cannot be represented
-| exactly.  However, if the abstract value is too large, the overflow and
-| inexact exceptions are raised and an infinity or maximal finite value is
-| returned.  If the abstract value is too small, the input value is rounded to
-| a subnormal number, and the underflow and inexact exceptions are raised if
-| the abstract input cannot be represented exactly as a subnormal single-
-| precision floating-point number.
-|     The input significand `zSig' has its binary point between bits 30
-| and 29, which is 7 bits to the left of the usual location.  This shifted
-| significand must be normalized or smaller.  If `zSig' is not normalized,
-| `zExp' must be 0; in that case, the result returned is a subnormal number,
-| and it must not require rounding.  In the usual case that `zSig' is
-| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
-| The handling of underflow and overflow follows the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float16 int16_to_float16(int16_t a, float_status *status)
+{
+    return int64_to_float16_scalbn(a, 0, status);
+}
 
-static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
-                                   float_status *status)
+float16 int8_to_float16(int8_t a, float_status *status)
 {
-    int8_t roundingMode;
-    bool roundNearestEven;
-    int8_t roundIncrement, roundBits;
-    bool isTiny;
+    return int64_to_float16_scalbn(a, 0, status);
+}
 
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        roundIncrement = 0x40;
-        break;
-    case float_round_to_zero:
-        roundIncrement = 0;
-        break;
-    case float_round_up:
-        roundIncrement = zSign ? 0 : 0x7f;
-        break;
-    case float_round_down:
-        roundIncrement = zSign ? 0x7f : 0;
-        break;
-    case float_round_to_odd:
-        roundIncrement = zSig & 0x80 ? 0 : 0x7f;
-        break;
-    default:
-        abort();
-        break;
-    }
-    roundBits = zSig & 0x7F;
-    if ( 0xFD <= (uint16_t) zExp ) {
-        if (    ( 0xFD < zExp )
-             || (    ( zExp == 0xFD )
-                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
-           ) {
-            bool overflow_to_inf = roundingMode != float_round_to_odd &&
-                                   roundIncrement != 0;
-            float_raise(float_flag_overflow | float_flag_inexact, status);
-            return packFloat32(zSign, 0xFF, -!overflow_to_inf);
-        }
-        if ( zExp < 0 ) {
-            if (status->flush_to_zero) {
-                float_raise(float_flag_output_denormal, status);
-                return packFloat32(zSign, 0, 0);
-            }
-            isTiny = status->tininess_before_rounding
-                  || (zExp < -1)
-                  || (zSig + roundIncrement < 0x80000000);
-            shift32RightJamming( zSig, - zExp, &zSig );
-            zExp = 0;
-            roundBits = zSig & 0x7F;
-            if (isTiny && roundBits) {
-                float_raise(float_flag_underflow, status);
-            }
-            if (roundingMode == float_round_to_odd) {
-                /*
-                 * For round-to-odd case, the roundIncrement depends on
-                 * zSig which just changed.
-                 */
-                roundIncrement = zSig & 0x80 ? 0 : 0x7f;
-            }
-        }
-    }
-    if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    zSig = ( zSig + roundIncrement )>>7;
-    if (!(roundBits ^ 0x40) && roundNearestEven) {
-        zSig &= ~1;
+float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
+{
+    FloatParts64 p;
+
+    /* Without scaling, there are no overflow concerns. */
+    if (likely(scale == 0) && can_use_fpu(status)) {
+        union_float32 ur;
+        ur.h = a;
+        return ur.s;
     }
-    if ( zSig == 0 ) zExp = 0;
-    return packFloat32( zSign, zExp, zSig );
 
+    parts64_sint_to_float(&p, a, scale, status);
+    return float32_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and significand `zSig', and returns the proper single-precision floating-
-| point value corresponding to the abstract input.  This routine is just like
-| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
-| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
-| floating-point exponent.
-*----------------------------------------------------------------------------*/
+float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
+{
+    return int64_to_float32_scalbn(a, scale, status);
+}
 
-static float32
- normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
-                              float_status *status)
+float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
 {
-    int8_t shiftCount;
+    return int64_to_float32_scalbn(a, scale, status);
+}
 
-    shiftCount = clz32(zSig) - 1;
-    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
-                               status);
+float32 int64_to_float32(int64_t a, float_status *status)
+{
+    return int64_to_float32_scalbn(a, 0, status);
+}
 
+float32 int32_to_float32(int32_t a, float_status *status)
+{
+    return int64_to_float32_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Normalizes the subnormal double-precision floating-point value represented
-| by the denormalized significand `aSig'.  The normalized exponent and
-| significand are stored at the locations pointed to by `zExpPtr' and
-| `zSigPtr', respectively.
-*----------------------------------------------------------------------------*/
+float32 int16_to_float32(int16_t a, float_status *status)
+{
+    return int64_to_float32_scalbn(a, 0, status);
+}
 
-static void
- normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
+float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
 {
-    int8_t shiftCount;
+    FloatParts64 p;
 
-    shiftCount = clz64(aSig) - 11;
-    *zSigPtr = aSig<<shiftCount;
-    *zExpPtr = 1 - shiftCount;
+    /* Without scaling, there are no overflow concerns. */
+    if (likely(scale == 0) && can_use_fpu(status)) {
+        union_float64 ur;
+        ur.h = a;
+        return ur.s;
+    }
 
+    parts_sint_to_float(&p, a, scale, status);
+    return float64_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
-| double-precision floating-point value, returning the result.  After being
-| shifted into the proper positions, the three fields are simply added
-| together to form the result.  This means that any integer portion of `zSig'
-| will be added into the exponent.  Since a properly normalized significand
-| will have an integer portion equal to 1, the `zExp' input should be 1 less
-| than the desired result exponent whenever `zSig' is a complete, normalized
-| significand.
-*----------------------------------------------------------------------------*/
+float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
+{
+    return int64_to_float64_scalbn(a, scale, status);
+}
 
-static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
+float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
 {
+    return int64_to_float64_scalbn(a, scale, status);
+}
 
-    return make_float64(
-        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
+float64 int64_to_float64(int64_t a, float_status *status)
+{
+    return int64_to_float64_scalbn(a, 0, status);
+}
 
+float64 int32_to_float64(int32_t a, float_status *status)
+{
+    return int64_to_float64_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and significand `zSig', and returns the proper double-precision floating-
-| point value corresponding to the abstract input.  Ordinarily, the abstract
-| value is simply rounded and packed into the double-precision format, with
-| the inexact exception raised if the abstract input cannot be represented
-| exactly.  However, if the abstract value is too large, the overflow and
-| inexact exceptions are raised and an infinity or maximal finite value is
-| returned.  If the abstract value is too small, the input value is rounded to
-| a subnormal number, and the underflow and inexact exceptions are raised if
-| the abstract input cannot be represented exactly as a subnormal double-
-| precision floating-point number.
-|     The input significand `zSig' has its binary point between bits 62
-| and 61, which is 10 bits to the left of the usual location.  This shifted
-| significand must be normalized or smaller.  If `zSig' is not normalized,
-| `zExp' must be 0; in that case, the result returned is a subnormal number,
-| and it must not require rounding.  In the usual case that `zSig' is
-| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
-| The handling of underflow and overflow follows the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float64 int16_to_float64(int16_t a, float_status *status)
+{
+    return int64_to_float64_scalbn(a, 0, status);
+}
 
-static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
-                                   float_status *status)
+bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
 {
-    int8_t roundingMode;
-    bool roundNearestEven;
-    int roundIncrement, roundBits;
-    bool isTiny;
-
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        roundIncrement = 0x200;
-        break;
-    case float_round_to_zero:
-        roundIncrement = 0;
-        break;
-    case float_round_up:
-        roundIncrement = zSign ? 0 : 0x3ff;
-        break;
-    case float_round_down:
-        roundIncrement = zSign ? 0x3ff : 0;
-        break;
-    case float_round_to_odd:
-        roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
-        break;
-    default:
-        abort();
-    }
-    roundBits = zSig & 0x3FF;
-    if ( 0x7FD <= (uint16_t) zExp ) {
-        if (    ( 0x7FD < zExp )
-             || (    ( zExp == 0x7FD )
-                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
-           ) {
-            bool overflow_to_inf = roundingMode != float_round_to_odd &&
-                                   roundIncrement != 0;
-            float_raise(float_flag_overflow | float_flag_inexact, status);
-            return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
-        }
-        if ( zExp < 0 ) {
-            if (status->flush_to_zero) {
-                float_raise(float_flag_output_denormal, status);
-                return packFloat64(zSign, 0, 0);
-            }
-            isTiny = status->tininess_before_rounding
-                  || (zExp < -1)
-                  || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
-            shift64RightJamming( zSig, - zExp, &zSig );
-            zExp = 0;
-            roundBits = zSig & 0x3FF;
-            if (isTiny && roundBits) {
-                float_raise(float_flag_underflow, status);
-            }
-            if (roundingMode == float_round_to_odd) {
-                /*
-                 * For round-to-odd case, the roundIncrement depends on
-                 * zSig which just changed.
-                 */
-                roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
-            }
-        }
-    }
-    if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    zSig = ( zSig + roundIncrement )>>10;
-    if (!(roundBits ^ 0x200) && roundNearestEven) {
-        zSig &= ~1;
-    }
-    if ( zSig == 0 ) zExp = 0;
-    return packFloat64( zSign, zExp, zSig );
-
-}
-
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and significand `zSig', and returns the proper double-precision floating-
-| point value corresponding to the abstract input.  This routine is just like
-| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
-| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
-| floating-point exponent.
-*----------------------------------------------------------------------------*/
-
-static float64
- normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
-                              float_status *status)
-{
-    int8_t shiftCount;
-
-    shiftCount = clz64(zSig) - 1;
-    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
-                               status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Normalizes the subnormal extended double-precision floating-point value
-| represented by the denormalized significand `aSig'.  The normalized exponent
-| and significand are stored at the locations pointed to by `zExpPtr' and
-| `zSigPtr', respectively.
-*----------------------------------------------------------------------------*/
-
-void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
-                                uint64_t *zSigPtr)
-{
-    int8_t shiftCount;
-
-    shiftCount = clz64(aSig);
-    *zSigPtr = aSig<<shiftCount;
-    *zExpPtr = 1 - shiftCount;
-}
-
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and extended significand formed by the concatenation of `zSig0' and `zSig1',
-| and returns the proper extended double-precision floating-point value
-| corresponding to the abstract input.  Ordinarily, the abstract value is
-| rounded and packed into the extended double-precision format, with the
-| inexact exception raised if the abstract input cannot be represented
-| exactly.  However, if the abstract value is too large, the overflow and
-| inexact exceptions are raised and an infinity or maximal finite value is
-| returned.  If the abstract value is too small, the input value is rounded to
-| a subnormal number, and the underflow and inexact exceptions are raised if
-| the abstract input cannot be represented exactly as a subnormal extended
-| double-precision floating-point number.
-|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
-| number of bits as single or double precision, respectively.  Otherwise, the
-| result is rounded to the full precision of the extended double-precision
-| format.
-|     The input significand must be normalized or smaller.  If the input
-| significand is not normalized, `zExp' must be 0; in that case, the result
-| returned is a subnormal number, and it must not require rounding.  The
-| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
-                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
-                              float_status *status)
-{
-    int8_t roundingMode;
-    bool roundNearestEven, increment, isTiny;
-    int64_t roundIncrement, roundMask, roundBits;
-
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    if ( roundingPrecision == 80 ) goto precision80;
-    if ( roundingPrecision == 64 ) {
-        roundIncrement = UINT64_C(0x0000000000000400);
-        roundMask = UINT64_C(0x00000000000007FF);
-    }
-    else if ( roundingPrecision == 32 ) {
-        roundIncrement = UINT64_C(0x0000008000000000);
-        roundMask = UINT64_C(0x000000FFFFFFFFFF);
-    }
-    else {
-        goto precision80;
-    }
-    zSig0 |= ( zSig1 != 0 );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        break;
-    case float_round_to_zero:
-        roundIncrement = 0;
-        break;
-    case float_round_up:
-        roundIncrement = zSign ? 0 : roundMask;
-        break;
-    case float_round_down:
-        roundIncrement = zSign ? roundMask : 0;
-        break;
-    default:
-        abort();
-    }
-    roundBits = zSig0 & roundMask;
-    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
-        if (    ( 0x7FFE < zExp )
-             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
-           ) {
-            goto overflow;
-        }
-        if ( zExp <= 0 ) {
-            if (status->flush_to_zero) {
-                float_raise(float_flag_output_denormal, status);
-                return packFloatx80(zSign, 0, 0);
-            }
-            isTiny = status->tininess_before_rounding
-                  || (zExp < 0 )
-                  || (zSig0 <= zSig0 + roundIncrement);
-            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
-            zExp = 0;
-            roundBits = zSig0 & roundMask;
-            if (isTiny && roundBits) {
-                float_raise(float_flag_underflow, status);
-            }
-            if (roundBits) {
-                status->float_exception_flags |= float_flag_inexact;
-            }
-            zSig0 += roundIncrement;
-            if ( (int64_t) zSig0 < 0 ) zExp = 1;
-            roundIncrement = roundMask + 1;
-            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
-                roundMask |= roundIncrement;
-            }
-            zSig0 &= ~ roundMask;
-            return packFloatx80( zSign, zExp, zSig0 );
-        }
-    }
-    if (roundBits) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    zSig0 += roundIncrement;
-    if ( zSig0 < roundIncrement ) {
-        ++zExp;
-        zSig0 = UINT64_C(0x8000000000000000);
-    }
-    roundIncrement = roundMask + 1;
-    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
-        roundMask |= roundIncrement;
-    }
-    zSig0 &= ~ roundMask;
-    if ( zSig0 == 0 ) zExp = 0;
-    return packFloatx80( zSign, zExp, zSig0 );
- precision80:
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        increment = ((int64_t)zSig1 < 0);
-        break;
-    case float_round_to_zero:
-        increment = 0;
-        break;
-    case float_round_up:
-        increment = !zSign && zSig1;
-        break;
-    case float_round_down:
-        increment = zSign && zSig1;
-        break;
-    default:
-        abort();
-    }
-    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
-        if (    ( 0x7FFE < zExp )
-             || (    ( zExp == 0x7FFE )
-                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
-                  && increment
-                )
-           ) {
-            roundMask = 0;
- overflow:
-            float_raise(float_flag_overflow | float_flag_inexact, status);
-            if (    ( roundingMode == float_round_to_zero )
-                 || ( zSign && ( roundingMode == float_round_up ) )
-                 || ( ! zSign && ( roundingMode == float_round_down ) )
-               ) {
-                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
-            }
-            return packFloatx80(zSign,
-                                floatx80_infinity_high,
-                                floatx80_infinity_low);
-        }
-        if ( zExp <= 0 ) {
-            isTiny = status->tininess_before_rounding
-                  || (zExp < 0)
-                  || !increment
-                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
-            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
-            zExp = 0;
-            if (isTiny && zSig1) {
-                float_raise(float_flag_underflow, status);
-            }
-            if (zSig1) {
-                status->float_exception_flags |= float_flag_inexact;
-            }
-            switch (roundingMode) {
-            case float_round_nearest_even:
-            case float_round_ties_away:
-                increment = ((int64_t)zSig1 < 0);
-                break;
-            case float_round_to_zero:
-                increment = 0;
-                break;
-            case float_round_up:
-                increment = !zSign && zSig1;
-                break;
-            case float_round_down:
-                increment = zSign && zSig1;
-                break;
-            default:
-                abort();
-            }
-            if ( increment ) {
-                ++zSig0;
-                if (!(zSig1 << 1) && roundNearestEven) {
-                    zSig0 &= ~1;
-                }
-                if ( (int64_t) zSig0 < 0 ) zExp = 1;
-            }
-            return packFloatx80( zSign, zExp, zSig0 );
-        }
-    }
-    if (zSig1) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    if ( increment ) {
-        ++zSig0;
-        if ( zSig0 == 0 ) {
-            ++zExp;
-            zSig0 = UINT64_C(0x8000000000000000);
-        }
-        else {
-            if (!(zSig1 << 1) && roundNearestEven) {
-                zSig0 &= ~1;
-            }
-        }
-    }
-    else {
-        if ( zSig0 == 0 ) zExp = 0;
-    }
-    return packFloatx80( zSign, zExp, zSig0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent
-| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
-| and returns the proper extended double-precision floating-point value
-| corresponding to the abstract input.  This routine is just like
-| `roundAndPackFloatx80' except that the input significand does not have to be
-| normalized.
-*----------------------------------------------------------------------------*/
-
-floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
-                                       bool zSign, int32_t zExp,
-                                       uint64_t zSig0, uint64_t zSig1,
-                                       float_status *status)
-{
-    int8_t shiftCount;
-
-    if ( zSig0 == 0 ) {
-        zSig0 = zSig1;
-        zSig1 = 0;
-        zExp -= 64;
-    }
-    shiftCount = clz64(zSig0);
-    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
-    zExp -= shiftCount;
-    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
-                                zSig0, zSig1, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the least-significant 64 fraction bits of the quadruple-precision
-| floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline uint64_t extractFloat128Frac1( float128 a )
-{
-
-    return a.low;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the most-significant 48 fraction bits of the quadruple-precision
-| floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline uint64_t extractFloat128Frac0( float128 a )
-{
-
-    return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the quadruple-precision floating-point value
-| `a'.
-*----------------------------------------------------------------------------*/
-
-static inline int32_t extractFloat128Exp( float128 a )
-{
-
-    return ( a.high>>48 ) & 0x7FFF;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the sign bit of the quadruple-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline bool extractFloat128Sign(float128 a)
-{
-    return a.high >> 63;
-}
-
-/*----------------------------------------------------------------------------
-| Normalizes the subnormal quadruple-precision floating-point value
-| represented by the denormalized significand formed by the concatenation of
-| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
-| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
-| significand are stored at the location pointed to by `zSig0Ptr', and the
-| least significant 64 bits of the normalized significand are stored at the
-| location pointed to by `zSig1Ptr'.
-*----------------------------------------------------------------------------*/
-
-static void
- normalizeFloat128Subnormal(
-     uint64_t aSig0,
-     uint64_t aSig1,
-     int32_t *zExpPtr,
-     uint64_t *zSig0Ptr,
-     uint64_t *zSig1Ptr
- )
-{
-    int8_t shiftCount;
-
-    if ( aSig0 == 0 ) {
-        shiftCount = clz64(aSig1) - 15;
-        if ( shiftCount < 0 ) {
-            *zSig0Ptr = aSig1>>( - shiftCount );
-            *zSig1Ptr = aSig1<<( shiftCount & 63 );
-        }
-        else {
-            *zSig0Ptr = aSig1<<shiftCount;
-            *zSig1Ptr = 0;
-        }
-        *zExpPtr = - shiftCount - 63;
-    }
-    else {
-        shiftCount = clz64(aSig0) - 15;
-        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
-        *zExpPtr = 1 - shiftCount;
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Packs the sign `zSign', the exponent `zExp', and the significand formed
-| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
-| floating-point value, returning the result.  After being shifted into the
-| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
-| added together to form the most significant 32 bits of the result.  This
-| means that any integer portion of `zSig0' will be added into the exponent.
-| Since a properly normalized significand will have an integer portion equal
-| to 1, the `zExp' input should be 1 less than the desired result exponent
-| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
-| significand.
-*----------------------------------------------------------------------------*/
-
-static inline float128
-packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
-{
-    float128 z;
-
-    z.low = zSig1;
-    z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
-    return z;
-}
-
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and extended significand formed by the concatenation of `zSig0', `zSig1',
-| and `zSig2', and returns the proper quadruple-precision floating-point value
-| corresponding to the abstract input.  Ordinarily, the abstract value is
-| simply rounded and packed into the quadruple-precision format, with the
-| inexact exception raised if the abstract input cannot be represented
-| exactly.  However, if the abstract value is too large, the overflow and
-| inexact exceptions are raised and an infinity or maximal finite value is
-| returned.  If the abstract value is too small, the input value is rounded to
-| a subnormal number, and the underflow and inexact exceptions are raised if
-| the abstract input cannot be represented exactly as a subnormal quadruple-
-| precision floating-point number.
-|     The input significand must be normalized or smaller.  If the input
-| significand is not normalized, `zExp' must be 0; in that case, the result
-| returned is a subnormal number, and it must not require rounding.  In the
-| usual case that the input significand is normalized, `zExp' must be 1 less
-| than the ``true'' floating-point exponent.  The handling of underflow and
-| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
-                                     uint64_t zSig0, uint64_t zSig1,
-                                     uint64_t zSig2, float_status *status)
-{
-    int8_t roundingMode;
-    bool roundNearestEven, increment, isTiny;
-
-    roundingMode = status->float_rounding_mode;
-    roundNearestEven = ( roundingMode == float_round_nearest_even );
-    switch (roundingMode) {
-    case float_round_nearest_even:
-    case float_round_ties_away:
-        increment = ((int64_t)zSig2 < 0);
-        break;
-    case float_round_to_zero:
-        increment = 0;
-        break;
-    case float_round_up:
-        increment = !zSign && zSig2;
-        break;
-    case float_round_down:
-        increment = zSign && zSig2;
-        break;
-    case float_round_to_odd:
-        increment = !(zSig1 & 0x1) && zSig2;
-        break;
-    default:
-        abort();
-    }
-    if ( 0x7FFD <= (uint32_t) zExp ) {
-        if (    ( 0x7FFD < zExp )
-             || (    ( zExp == 0x7FFD )
-                  && eq128(
-                         UINT64_C(0x0001FFFFFFFFFFFF),
-                         UINT64_C(0xFFFFFFFFFFFFFFFF),
-                         zSig0,
-                         zSig1
-                     )
-                  && increment
-                )
-           ) {
-            float_raise(float_flag_overflow | float_flag_inexact, status);
-            if (    ( roundingMode == float_round_to_zero )
-                 || ( zSign && ( roundingMode == float_round_up ) )
-                 || ( ! zSign && ( roundingMode == float_round_down ) )
-                 || (roundingMode == float_round_to_odd)
-               ) {
-                return
-                    packFloat128(
-                        zSign,
-                        0x7FFE,
-                        UINT64_C(0x0000FFFFFFFFFFFF),
-                        UINT64_C(0xFFFFFFFFFFFFFFFF)
-                    );
-            }
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        if ( zExp < 0 ) {
-            if (status->flush_to_zero) {
-                float_raise(float_flag_output_denormal, status);
-                return packFloat128(zSign, 0, 0, 0);
-            }
-            isTiny = status->tininess_before_rounding
-                  || (zExp < -1)
-                  || !increment
-                  || lt128(zSig0, zSig1,
-                           UINT64_C(0x0001FFFFFFFFFFFF),
-                           UINT64_C(0xFFFFFFFFFFFFFFFF));
-            shift128ExtraRightJamming(
-                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
-            zExp = 0;
-            if (isTiny && zSig2) {
-                float_raise(float_flag_underflow, status);
-            }
-            switch (roundingMode) {
-            case float_round_nearest_even:
-            case float_round_ties_away:
-                increment = ((int64_t)zSig2 < 0);
-                break;
-            case float_round_to_zero:
-                increment = 0;
-                break;
-            case float_round_up:
-                increment = !zSign && zSig2;
-                break;
-            case float_round_down:
-                increment = zSign && zSig2;
-                break;
-            case float_round_to_odd:
-                increment = !(zSig1 & 0x1) && zSig2;
-                break;
-            default:
-                abort();
-            }
-        }
-    }
-    if (zSig2) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    if ( increment ) {
-        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
-        if ((zSig2 + zSig2 == 0) && roundNearestEven) {
-            zSig1 &= ~1;
-        }
-    }
-    else {
-        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
-    }
-    return packFloat128( zSign, zExp, zSig0, zSig1 );
+    FloatParts64 p;
 
+    parts_sint_to_float(&p, a, scale, status);
+    return bfloat16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
-| and significand formed by the concatenation of `zSig0' and `zSig1', and
-| returns the proper quadruple-precision floating-point value corresponding
-| to the abstract input.  This routine is just like `roundAndPackFloat128'
-| except that the input significand has fewer bits and does not have to be
-| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
-| point exponent.
-*----------------------------------------------------------------------------*/
-
-static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
-                                              uint64_t zSig0, uint64_t zSig1,
-                                              float_status *status)
+bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
 {
-    int8_t shiftCount;
-    uint64_t zSig2;
-
-    if ( zSig0 == 0 ) {
-        zSig0 = zSig1;
-        zSig1 = 0;
-        zExp -= 64;
-    }
-    shiftCount = clz64(zSig0) - 15;
-    if ( 0 <= shiftCount ) {
-        zSig2 = 0;
-        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
-    }
-    else {
-        shift128ExtraRightJamming(
-            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
-    }
-    zExp -= shiftCount;
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+    return int64_to_bfloat16_scalbn(a, scale, status);
+}
 
+bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, scale, status);
 }
 
+bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 32-bit two's complement integer `a'
-| to the extended double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
+bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
 
-floatx80 int32_to_floatx80(int32_t a, float_status *status)
+bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
 {
-    bool zSign;
-    uint32_t absA;
-    int8_t shiftCount;
-    uint64_t zSig;
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
 
-    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = clz32(absA) + 32;
-    zSig = absA;
-    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+float128 int64_to_float128(int64_t a, float_status *status)
+{
+    FloatParts128 p;
 
+    parts_sint_to_float(&p, a, 0, status);
+    return float128_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 32-bit two's complement integer `a' to
-| the quadruple-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
 float128 int32_to_float128(int32_t a, float_status *status)
 {
-    bool zSign;
-    uint32_t absA;
-    int8_t shiftCount;
-    uint64_t zSig0;
-
-    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = clz32(absA) + 17;
-    zSig0 = absA;
-    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
-
+    return int64_to_float128(a, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit two's complement integer `a'
-| to the extended double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
 floatx80 int64_to_floatx80(int64_t a, float_status *status)
 {
-    bool zSign;
-    uint64_t absA;
-    int8_t shiftCount;
+    FloatParts128 p;
 
-    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = clz64(absA);
-    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+    parts_sint_to_float(&p, a, 0, status);
+    return floatx80_round_pack_canonical(&p, status);
+}
 
+floatx80 int32_to_floatx80(int32_t a, float_status *status)
+{
+    return int64_to_floatx80(a, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit two's complement integer `a' to
-| the quadruple-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+/*
+ * Unsigned Integer to floating-point conversions
+ */
 
-float128 int64_to_float128(int64_t a, float_status *status)
+float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
 {
-    bool zSign;
-    uint64_t absA;
-    int8_t shiftCount;
-    int32_t zExp;
-    uint64_t zSig0, zSig1;
-
-    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = clz64(absA) + 49;
-    zExp = 0x406E - shiftCount;
-    if ( 64 <= shiftCount ) {
-        zSig1 = 0;
-        zSig0 = absA;
-        shiftCount -= 64;
-    }
-    else {
-        zSig1 = absA;
-        zSig0 = 0;
-    }
-    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
-    return packFloat128( zSign, zExp, zSig0, zSig1 );
+    FloatParts64 p;
 
+    parts_uint_to_float(&p, a, scale, status);
+    return float16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit unsigned integer `a'
-| to the quadruple-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
+{
+    return uint64_to_float16_scalbn(a, scale, status);
+}
 
-float128 uint64_to_float128(uint64_t a, float_status *status)
+float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
 {
-    if (a == 0) {
-        return float128_zero;
-    }
-    return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
+    return uint64_to_float16_scalbn(a, scale, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point value
-| `a' to the extended double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
+float16 uint64_to_float16(uint64_t a, float_status *status)
+{
+    return uint64_to_float16_scalbn(a, 0, status);
+}
 
-floatx80 float32_to_floatx80(float32 a, float_status *status)
-{
-    bool aSign;
-    int aExp;
-    uint32_t aSig;
-
-    a = float32_squash_input_denormal(a, status);
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
-    if ( aExp == 0xFF ) {
-        if (aSig) {
-            floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
-                                               status);
-            return floatx80_silence_nan(res, status);
-        }
-        return packFloatx80(aSign,
-                            floatx80_infinity_high,
-                            floatx80_infinity_low);
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
-        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
-    }
-    aSig |= 0x00800000;
-    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
+float16 uint32_to_float16(uint32_t a, float_status *status)
+{
+    return uint64_to_float16_scalbn(a, 0, status);
+}
 
+float16 uint16_to_float16(uint16_t a, float_status *status)
+{
+    return uint64_to_float16_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point value
-| `a' to the double-precision floating-point format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
+float16 uint8_to_float16(uint8_t a, float_status *status)
+{
+    return uint64_to_float16_scalbn(a, 0, status);
+}
 
-float128 float32_to_float128(float32 a, float_status *status)
+float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
 {
-    bool aSign;
-    int aExp;
-    uint32_t aSig;
+    FloatParts64 p;
 
-    a = float32_squash_input_denormal(a, status);
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
-    if ( aExp == 0xFF ) {
-        if (aSig) {
-            return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
-        }
-        return packFloat128( aSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
-        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
-        --aExp;
+    /* Without scaling, there are no overflow concerns. */
+    if (likely(scale == 0) && can_use_fpu(status)) {
+        union_float32 ur;
+        ur.h = a;
+        return ur.s;
     }
-    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
 
+    parts_uint_to_float(&p, a, scale, status);
+    return float32_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the single-precision floating-point value `a'
-| with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float32_rem(float32 a, float32 b, float_status *status)
+float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
 {
-    bool aSign, zSign;
-    int aExp, bExp, expDiff;
-    uint32_t aSig, bSig;
-    uint32_t q;
-    uint64_t aSig64, bSig64, q64;
-    uint32_t alternateASig;
-    int32_t sigMean;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
-    bSig = extractFloat32Frac( b );
-    bExp = extractFloat32Exp( b );
-    if ( aExp == 0xFF ) {
-        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
-            return propagateFloat32NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float32_default_nan(status);
-    }
-    if ( bExp == 0xFF ) {
-        if (bSig) {
-            return propagateFloat32NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        if ( bSig == 0 ) {
-            float_raise(float_flag_invalid, status);
-            return float32_default_nan(status);
-        }
-        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return a;
-        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
-    }
-    expDiff = aExp - bExp;
-    aSig |= 0x00800000;
-    bSig |= 0x00800000;
-    if ( expDiff < 32 ) {
-        aSig <<= 8;
-        bSig <<= 8;
-        if ( expDiff < 0 ) {
-            if ( expDiff < -1 ) return a;
-            aSig >>= 1;
-        }
-        q = ( bSig <= aSig );
-        if ( q ) aSig -= bSig;
-        if ( 0 < expDiff ) {
-            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
-            q >>= 32 - expDiff;
-            bSig >>= 2;
-            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
-        }
-        else {
-            aSig >>= 2;
-            bSig >>= 2;
-        }
-    }
-    else {
-        if ( bSig <= aSig ) aSig -= bSig;
-        aSig64 = ( (uint64_t) aSig )<<40;
-        bSig64 = ( (uint64_t) bSig )<<40;
-        expDiff -= 64;
-        while ( 0 < expDiff ) {
-            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
-            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
-            aSig64 = - ( ( bSig * q64 )<<38 );
-            expDiff -= 62;
-        }
-        expDiff += 64;
-        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
-        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
-        q = q64>>( 64 - expDiff );
-        bSig <<= 6;
-        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
-    }
-    do {
-        alternateASig = aSig;
-        ++q;
-        aSig -= bSig;
-    } while ( 0 <= (int32_t) aSig );
-    sigMean = aSig + alternateASig;
-    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
-        aSig = alternateASig;
-    }
-    zSign = ( (int32_t) aSig < 0 );
-    if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
+    return uint64_to_float32_scalbn(a, scale, status);
 }
 
+float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
+{
+    return uint64_to_float32_scalbn(a, scale, status);
+}
 
+float32 uint64_to_float32(uint64_t a, float_status *status)
+{
+    return uint64_to_float32_scalbn(a, 0, status);
+}
 
-/*----------------------------------------------------------------------------
-| Returns the binary exponential of the single-precision floating-point value
-| `a'. The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-|
-| Uses the following identities:
-|
-| 1. -------------------------------------------------------------------------
-|      x    x*ln(2)
-|     2  = e
-|
-| 2. -------------------------------------------------------------------------
-|                      2     3     4     5           n
-|      x        x     x     x     x     x           x
-|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
-|               1!    2!    3!    4!    5!          n!
-*----------------------------------------------------------------------------*/
-
-static const float64 float32_exp2_coefficients[15] =
+float32 uint32_to_float32(uint32_t a, float_status *status)
 {
-    const_float64( 0x3ff0000000000000ll ), /*  1 */
-    const_float64( 0x3fe0000000000000ll ), /*  2 */
-    const_float64( 0x3fc5555555555555ll ), /*  3 */
-    const_float64( 0x3fa5555555555555ll ), /*  4 */
-    const_float64( 0x3f81111111111111ll ), /*  5 */
-    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
-    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
-    const_float64( 0x3efa01a01a01a01all ), /*  8 */
-    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
-    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
-    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
-    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
-    const_float64( 0x3de6124613a86d09ll ), /* 13 */
-    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
-    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
-};
+    return uint64_to_float32_scalbn(a, 0, status);
+}
 
-float32 float32_exp2(float32 a, float_status *status)
+float32 uint16_to_float32(uint16_t a, float_status *status)
 {
-    bool aSign;
-    int aExp;
-    uint32_t aSig;
-    float64 r, x, xn;
-    int i;
-    a = float32_squash_input_denormal(a, status);
+    return uint64_to_float32_scalbn(a, 0, status);
+}
 
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
+float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
+{
+    FloatParts64 p;
 
-    if ( aExp == 0xFF) {
-        if (aSig) {
-            return propagateFloat32NaN(a, float32_zero, status);
-        }
-        return (aSign) ? float32_zero : a;
+    /* Without scaling, there are no overflow concerns. */
+    if (likely(scale == 0) && can_use_fpu(status)) {
+        union_float64 ur;
+        ur.h = a;
+        return ur.s;
     }
-    if (aExp == 0) {
-        if (aSig == 0) return float32_one;
-    }
-
-    float_raise(float_flag_inexact, status);
 
-    /* ******************************* */
-    /* using float64 for approximation */
-    /* ******************************* */
-    x = float32_to_float64(a, status);
-    x = float64_mul(x, float64_ln2, status);
+    parts_uint_to_float(&p, a, scale, status);
+    return float64_round_pack_canonical(&p, status);
+}
 
-    xn = x;
-    r = float64_one;
-    for (i = 0 ; i < 15 ; i++) {
-        float64 f;
+float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
+{
+    return uint64_to_float64_scalbn(a, scale, status);
+}
 
-        f = float64_mul(xn, float32_exp2_coefficients[i], status);
-        r = float64_add(r, f, status);
+float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
+{
+    return uint64_to_float64_scalbn(a, scale, status);
+}
 
-        xn = float64_mul(xn, x, status);
-    }
+float64 uint64_to_float64(uint64_t a, float_status *status)
+{
+    return uint64_to_float64_scalbn(a, 0, status);
+}
 
-    return float64_to_float32(r, status);
+float64 uint32_to_float64(uint32_t a, float_status *status)
+{
+    return uint64_to_float64_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the binary log of the single-precision floating-point value `a'.
-| The operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-float32 float32_log2(float32 a, float_status *status)
+float64 uint16_to_float64(uint16_t a, float_status *status)
 {
-    bool aSign, zSign;
-    int aExp;
-    uint32_t aSig, zSig, i;
+    return uint64_to_float64_scalbn(a, 0, status);
+}
 
-    a = float32_squash_input_denormal(a, status);
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    aSign = extractFloat32Sign( a );
+bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
+{
+    FloatParts64 p;
 
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
-        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
-    }
-    if ( aSign ) {
-        float_raise(float_flag_invalid, status);
-        return float32_default_nan(status);
-    }
-    if ( aExp == 0xFF ) {
-        if (aSig) {
-            return propagateFloat32NaN(a, float32_zero, status);
-        }
-        return a;
-    }
+    parts_uint_to_float(&p, a, scale, status);
+    return bfloat16_round_pack_canonical(&p, status);
+}
 
-    aExp -= 0x7F;
-    aSig |= 0x00800000;
-    zSign = aExp < 0;
-    zSig = aExp << 23;
+bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
 
-    for (i = 1 << 22; i > 0; i >>= 1) {
-        aSig = ( (uint64_t)aSig * aSig ) >> 23;
-        if ( aSig & 0x01000000 ) {
-            aSig >>= 1;
-            zSig |= i;
-        }
-    }
+bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
 
-    if ( zSign )
-        zSig = -zSig;
+bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
 
-    return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
+bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point value
-| `a' to the extended double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
+bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
 
-floatx80 float64_to_floatx80(float64 a, float_status *status)
-{
-    bool aSign;
-    int aExp;
-    uint64_t aSig;
-
-    a = float64_squash_input_denormal(a, status);
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    if ( aExp == 0x7FF ) {
-        if (aSig) {
-            floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
-                                               status);
-            return floatx80_silence_nan(res, status);
-        }
-        return packFloatx80(aSign,
-                            floatx80_infinity_high,
-                            floatx80_infinity_low);
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
-        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
-    }
-    return
-        packFloatx80(
-            aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
+float128 uint64_to_float128(uint64_t a, float_status *status)
+{
+    FloatParts128 p;
 
+    parts_uint_to_float(&p, a, 0, status);
+    return float128_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point value
-| `a' to the quadruple-precision floating-point format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
+/*
+ * Minimum and maximum
+ */
 
-float128 float64_to_float128(float64 a, float_status *status)
+static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
 {
-    bool aSign;
-    int aExp;
-    uint64_t aSig, zSig0, zSig1;
+    FloatParts64 pa, pb, *pr;
 
-    a = float64_squash_input_denormal(a, status);
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    if ( aExp == 0x7FF ) {
-        if (aSig) {
-            return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
-        }
-        return packFloat128( aSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
-        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
-        --aExp;
-    }
-    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
-    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+    float16_unpack_canonical(&pa, a, s);
+    float16_unpack_canonical(&pb, b, s);
+    pr = parts_minmax(&pa, &pb, s, flags);
 
+    return float16_round_pack_canonical(pr, s);
 }
 
+static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
+                                float_status *s, int flags)
+{
+    FloatParts64 pa, pb, *pr;
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the double-precision floating-point value `a'
-| with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+    bfloat16_unpack_canonical(&pa, a, s);
+    bfloat16_unpack_canonical(&pb, b, s);
+    pr = parts_minmax(&pa, &pb, s, flags);
 
-float64 float64_rem(float64 a, float64 b, float_status *status)
+    return bfloat16_round_pack_canonical(pr, s);
+}
+
+static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
 {
-    bool aSign, zSign;
-    int aExp, bExp, expDiff;
-    uint64_t aSig, bSig;
-    uint64_t q, alternateASig;
-    int64_t sigMean;
-
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
-    bSig = extractFloat64Frac( b );
-    bExp = extractFloat64Exp( b );
-    if ( aExp == 0x7FF ) {
-        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
-            return propagateFloat64NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float64_default_nan(status);
-    }
-    if ( bExp == 0x7FF ) {
-        if (bSig) {
-            return propagateFloat64NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        if ( bSig == 0 ) {
-            float_raise(float_flag_invalid, status);
-            return float64_default_nan(status);
-        }
-        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return a;
-        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
-    }
-    expDiff = aExp - bExp;
-    aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
-    bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
-    if ( expDiff < 0 ) {
-        if ( expDiff < -1 ) return a;
-        aSig >>= 1;
-    }
-    q = ( bSig <= aSig );
-    if ( q ) aSig -= bSig;
-    expDiff -= 64;
-    while ( 0 < expDiff ) {
-        q = estimateDiv128To64( aSig, 0, bSig );
-        q = ( 2 < q ) ? q - 2 : 0;
-        aSig = - ( ( bSig>>2 ) * q );
-        expDiff -= 62;
-    }
-    expDiff += 64;
-    if ( 0 < expDiff ) {
-        q = estimateDiv128To64( aSig, 0, bSig );
-        q = ( 2 < q ) ? q - 2 : 0;
-        q >>= 64 - expDiff;
-        bSig >>= 2;
-        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
-    }
-    else {
-        aSig >>= 2;
-        bSig >>= 2;
-    }
-    do {
-        alternateASig = aSig;
-        ++q;
-        aSig -= bSig;
-    } while ( 0 <= (int64_t) aSig );
-    sigMean = aSig + alternateASig;
-    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
-        aSig = alternateASig;
-    }
-    zSign = ( (int64_t) aSig < 0 );
-    if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
+    FloatParts64 pa, pb, *pr;
 
+    float32_unpack_canonical(&pa, a, s);
+    float32_unpack_canonical(&pb, b, s);
+    pr = parts_minmax(&pa, &pb, s, flags);
+
+    return float32_round_pack_canonical(pr, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the binary log of the double-precision floating-point value `a'.
-| The operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-float64 float64_log2(float64 a, float_status *status)
+static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
 {
-    bool aSign, zSign;
-    int aExp;
-    uint64_t aSig, aSig0, aSig1, zSig, i;
-    a = float64_squash_input_denormal(a, status);
+    FloatParts64 pa, pb, *pr;
 
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    aSign = extractFloat64Sign( a );
+    float64_unpack_canonical(&pa, a, s);
+    float64_unpack_canonical(&pb, b, s);
+    pr = parts_minmax(&pa, &pb, s, flags);
 
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
-        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
-    }
-    if ( aSign ) {
-        float_raise(float_flag_invalid, status);
-        return float64_default_nan(status);
-    }
-    if ( aExp == 0x7FF ) {
-        if (aSig) {
-            return propagateFloat64NaN(a, float64_zero, status);
-        }
-        return a;
-    }
+    return float64_round_pack_canonical(pr, s);
+}
 
-    aExp -= 0x3FF;
-    aSig |= UINT64_C(0x0010000000000000);
-    zSign = aExp < 0;
-    zSig = (uint64_t)aExp << 52;
-    for (i = 1LL << 51; i > 0; i >>= 1) {
-        mul64To128( aSig, aSig, &aSig0, &aSig1 );
-        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
-        if ( aSig & UINT64_C(0x0020000000000000) ) {
-            aSig >>= 1;
-            zSig |= i;
-        }
-    }
+static float128 float128_minmax(float128 a, float128 b,
+                                float_status *s, int flags)
+{
+    FloatParts128 pa, pb, *pr;
 
-    if ( zSign )
-        zSig = -zSig;
-    return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
+    float128_unpack_canonical(&pa, a, s);
+    float128_unpack_canonical(&pb, b, s);
+    pr = parts_minmax(&pa, &pb, s, flags);
+
+    return float128_round_pack_canonical(pr, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 32-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic---which means in particular that the conversion
-| is rounded according to the current rounding mode.  If `a' is a NaN, the
-| largest positive integer is returned.  Otherwise, if the conversion
-| overflows, the largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
+#define MINMAX_1(type, name, flags) \
+    type type##_##name(type a, type b, float_status *s) \
+    { return type##_minmax(a, b, s, flags); }
 
-int32_t floatx80_to_int32(floatx80 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig;
+#define MINMAX_2(type) \
+    MINMAX_1(type, max, 0)                                                \
+    MINMAX_1(type, maxnum, minmax_isnum)                                  \
+    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
+    MINMAX_1(type, maximum_number, minmax_isnumber)                       \
+    MINMAX_1(type, min, minmax_ismin)                                     \
+    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
+    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
+    MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1 << 31;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
-    shiftCount = 0x4037 - aExp;
-    if ( shiftCount <= 0 ) shiftCount = 1;
-    shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32(aSign, aSig, status);
+MINMAX_2(float16)
+MINMAX_2(bfloat16)
+MINMAX_2(float32)
+MINMAX_2(float64)
+MINMAX_2(float128)
 
-}
+#undef MINMAX_1
+#undef MINMAX_2
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 32-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic, except that the conversion is always rounded
-| toward zero.  If `a' is a NaN, the largest positive integer is returned.
-| Otherwise, if the conversion overflows, the largest integer with the same
-| sign as `a' is returned.
-*----------------------------------------------------------------------------*/
+/*
+ * Floating point compare
+ */
 
-int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
+static FloatRelation QEMU_FLATTEN
+float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
 {
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig, savedASig;
-    int32_t z;
-
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1 << 31;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
-        goto invalid;
-    }
-    else if ( aExp < 0x3FFF ) {
-        if (aExp || aSig) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-        return 0;
-    }
-    shiftCount = 0x403E - aExp;
-    savedASig = aSig;
-    aSig >>= shiftCount;
-    z = aSig;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_raise(float_flag_invalid, status);
-        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
-    }
-    if ( ( aSig<<shiftCount ) != savedASig ) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
+    FloatParts64 pa, pb;
 
+    float16_unpack_canonical(&pa, a, s);
+    float16_unpack_canonical(&pb, b, s);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 64-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic---which means in particular that the conversion
-| is rounded according to the current rounding mode.  If `a' is a NaN,
-| the largest positive integer is returned.  Otherwise, if the conversion
-| overflows, the largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
+FloatRelation float16_compare(float16 a, float16 b, float_status *s)
+{
+    return float16_do_compare(a, b, s, false);
+}
 
-int64_t floatx80_to_int64(floatx80 a, float_status *status)
+FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
 {
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig, aSigExtra;
+    return float16_do_compare(a, b, s, true);
+}
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1ULL << 63;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    shiftCount = 0x403E - aExp;
-    if ( shiftCount <= 0 ) {
-        if ( shiftCount ) {
-            float_raise(float_flag_invalid, status);
-            if (!aSign || floatx80_is_any_nan(a)) {
-                return INT64_MAX;
-            }
-            return INT64_MIN;
-        }
-        aSigExtra = 0;
-    }
-    else {
-        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
-    }
-    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
+static FloatRelation QEMU_SOFTFLOAT_ATTR
+float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
+{
+    FloatParts64 pa, pb;
 
+    float32_unpack_canonical(&pa, a, s);
+    float32_unpack_canonical(&pb, b, s);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 64-bit two's complement integer format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic, except that the conversion is always rounded
-| toward zero.  If `a' is a NaN, the largest positive integer is returned.
-| Otherwise, if the conversion overflows, the largest integer with the same
-| sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
+static FloatRelation QEMU_FLATTEN
+float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
 {
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig;
-    int64_t z;
+    union_float32 ua, ub;
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return 1ULL << 63;
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    shiftCount = aExp - 0x403E;
-    if ( 0 <= shiftCount ) {
-        aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
-        if ( ( a.high != 0xC03E ) || aSig ) {
-            float_raise(float_flag_invalid, status);
-            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
-                return INT64_MAX;
-            }
-        }
-        return INT64_MIN;
+    ua.s = xa;
+    ub.s = xb;
+
+    if (QEMU_NO_HARDFLOAT) {
+        goto soft;
     }
-    else if ( aExp < 0x3FFF ) {
-        if (aExp | aSig) {
-            status->float_exception_flags |= float_flag_inexact;
+
+    float32_input_flush2(&ua.s, &ub.s, s);
+    if (isgreaterequal(ua.h, ub.h)) {
+        if (isgreater(ua.h, ub.h)) {
+            return float_relation_greater;
         }
-        return 0;
+        return float_relation_equal;
     }
-    z = aSig>>( - shiftCount );
-    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
-        status->float_exception_flags |= float_flag_inexact;
+    if (likely(isless(ua.h, ub.h))) {
+        return float_relation_less;
     }
-    if ( aSign ) z = - z;
-    return z;
-
+    /*
+     * The only condition remaining is unordered.
+     * Fall through to set flags.
+     */
+ soft:
+    return float32_do_compare(ua.s, ub.s, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the single-precision floating-point format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+FloatRelation float32_compare(float32 a, float32 b, float_status *s)
+{
+    return float32_hs_compare(a, b, s, false);
+}
 
-float32 floatx80_to_float32(floatx80 a, float_status *status)
+FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig;
+    return float32_hs_compare(a, b, s, true);
+}
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return float32_default_nan(status);
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( (uint64_t) ( aSig<<1 ) ) {
-            float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
-                                             status);
-            return float32_silence_nan(res, status);
-        }
-        return packFloat32( aSign, 0xFF, 0 );
-    }
-    shift64RightJamming( aSig, 33, &aSig );
-    if ( aExp || aSig ) aExp -= 0x3F81;
-    return roundAndPackFloat32(aSign, aExp, aSig, status);
+static FloatRelation QEMU_SOFTFLOAT_ATTR
+float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
+{
+    FloatParts64 pa, pb;
 
+    float64_unpack_canonical(&pa, a, s);
+    float64_unpack_canonical(&pb, b, s);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the double-precision floating-point format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 floatx80_to_float64(floatx80 a, float_status *status)
+static FloatRelation QEMU_FLATTEN
+float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig, zSig;
+    union_float64 ua, ub;
+
+    ua.s = xa;
+    ub.s = xb;
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return float64_default_nan(status);
+    if (QEMU_NO_HARDFLOAT) {
+        goto soft;
     }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( (uint64_t) ( aSig<<1 ) ) {
-            float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
-                                             status);
-            return float64_silence_nan(res, status);
+
+    float64_input_flush2(&ua.s, &ub.s, s);
+    if (isgreaterequal(ua.h, ub.h)) {
+        if (isgreater(ua.h, ub.h)) {
+            return float_relation_greater;
         }
-        return packFloat64( aSign, 0x7FF, 0 );
+        return float_relation_equal;
     }
-    shift64RightJamming( aSig, 1, &zSig );
-    if ( aExp || aSig ) aExp -= 0x3C01;
-    return roundAndPackFloat64(aSign, aExp, zSig, status);
-
+    if (likely(isless(ua.h, ub.h))) {
+        return float_relation_less;
+    }
+    /*
+     * The only condition remaining is unordered.
+     * Fall through to set flags.
+     */
+ soft:
+    return float64_do_compare(ua.s, ub.s, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the quadruple-precision floating-point format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+FloatRelation float64_compare(float64 a, float64 b, float_status *s)
+{
+    return float64_hs_compare(a, b, s, false);
+}
 
-float128 floatx80_to_float128(floatx80 a, float_status *status)
+FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
 {
-    bool aSign;
-    int aExp;
-    uint64_t aSig, zSig0, zSig1;
+    return float64_hs_compare(a, b, s, true);
+}
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return float128_default_nan(status);
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
-        float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
-                                           status);
-        return float128_silence_nan(res, status);
-    }
-    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
-    return packFloat128( aSign, aExp, zSig0, zSig1 );
+static FloatRelation QEMU_FLATTEN
+bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
+{
+    FloatParts64 pa, pb;
 
+    bfloat16_unpack_canonical(&pa, a, s);
+    bfloat16_unpack_canonical(&pb, b, s);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Rounds the extended double-precision floating-point value `a'
-| to the precision provided by floatx80_rounding_precision and returns the
-| result as an extended double-precision floating-point value.
-| The operation is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-floatx80 floatx80_round(floatx80 a, float_status *status)
+FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
 {
-    return roundAndPackFloatx80(status->floatx80_rounding_precision,
-                                extractFloatx80Sign(a),
-                                extractFloatx80Exp(a),
-                                extractFloatx80Frac(a), 0, status);
+    return bfloat16_do_compare(a, b, s, false);
 }
 
-/*----------------------------------------------------------------------------
-| Rounds the extended double-precision floating-point value `a' to an integer,
-| and returns the result as an extended quadruple-precision floating-point
-| value.  The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
+FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t lastBitMask, roundBitsMask;
-    floatx80 z;
+    return bfloat16_do_compare(a, b, s, true);
+}
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aExp = extractFloatx80Exp( a );
-    if ( 0x403E <= aExp ) {
-        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
-            return propagateFloatx80NaN(a, a, status);
-        }
-        return a;
-    }
-    if ( aExp < 0x3FFF ) {
-        if (    ( aExp == 0 )
-             && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
-            return a;
-        }
-        status->float_exception_flags |= float_flag_inexact;
-        aSign = extractFloatx80Sign( a );
-        switch (status->float_rounding_mode) {
-         case float_round_nearest_even:
-            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
-               ) {
-                return
-                    packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
-            }
-            break;
-        case float_round_ties_away:
-            if (aExp == 0x3FFE) {
-                return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
-            }
-            break;
-         case float_round_down:
-            return
-                  aSign ?
-                      packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
-                : packFloatx80( 0, 0, 0 );
-         case float_round_up:
-            return
-                  aSign ? packFloatx80( 1, 0, 0 )
-                : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
-
-        case float_round_to_zero:
-            break;
-        default:
-            g_assert_not_reached();
-        }
-        return packFloatx80( aSign, 0, 0 );
-    }
-    lastBitMask = 1;
-    lastBitMask <<= 0x403E - aExp;
-    roundBitsMask = lastBitMask - 1;
-    z = a;
-    switch (status->float_rounding_mode) {
-    case float_round_nearest_even:
-        z.low += lastBitMask>>1;
-        if ((z.low & roundBitsMask) == 0) {
-            z.low &= ~lastBitMask;
-        }
-        break;
-    case float_round_ties_away:
-        z.low += lastBitMask >> 1;
-        break;
-    case float_round_to_zero:
-        break;
-    case float_round_up:
-        if (!extractFloatx80Sign(z)) {
-            z.low += roundBitsMask;
-        }
-        break;
-    case float_round_down:
-        if (extractFloatx80Sign(z)) {
-            z.low += roundBitsMask;
-        }
-        break;
-    default:
-        abort();
-    }
-    z.low &= ~ roundBitsMask;
-    if ( z.low == 0 ) {
-        ++z.high;
-        z.low = UINT64_C(0x8000000000000000);
-    }
-    if (z.low != a.low) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
+static FloatRelation QEMU_FLATTEN
+float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
+{
+    FloatParts128 pa, pb;
 
+    float128_unpack_canonical(&pa, a, s);
+    float128_unpack_canonical(&pb, b, s);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the extended double-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
-| negated before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+FloatRelation float128_compare(float128 a, float128 b, float_status *s)
+{
+    return float128_do_compare(a, b, s, false);
+}
 
-static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig0, zSig1;
-    int32_t expDiff;
-
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    bSig = extractFloatx80Frac( b );
-    bExp = extractFloatx80Exp( b );
-    expDiff = aExp - bExp;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0x7FFF ) {
-            if ((uint64_t)(aSig << 1)) {
-                return propagateFloatx80NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( bExp == 0 ) --expDiff;
-        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0x7FFF ) {
-            if ((uint64_t)(bSig << 1)) {
-                return propagateFloatx80NaN(a, b, status);
-            }
-            return packFloatx80(zSign,
-                                floatx80_infinity_high,
-                                floatx80_infinity_low);
-        }
-        if ( aExp == 0 ) ++expDiff;
-        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0x7FFF ) {
-            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
-                return propagateFloatx80NaN(a, b, status);
-            }
-            return a;
-        }
-        zSig1 = 0;
-        zSig0 = aSig + bSig;
-        if ( aExp == 0 ) {
-            if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
-                /* At least one of the values is a pseudo-denormal,
-                 * and there is a carry out of the result.  */
-                zExp = 1;
-                goto shiftRight1;
-            }
-            if (zSig0 == 0) {
-                return packFloatx80(zSign, 0, 0);
-            }
-            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
-            goto roundAndPack;
-        }
-        zExp = aExp;
-        goto shiftRight1;
-    }
-    zSig0 = aSig + bSig;
-    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
- shiftRight1:
-    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
-    zSig0 |= UINT64_C(0x8000000000000000);
-    ++zExp;
- roundAndPack:
-    return roundAndPackFloatx80(status->floatx80_rounding_precision,
-                                zSign, zExp, zSig0, zSig1, status);
+FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
+{
+    return float128_do_compare(a, b, s, true);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the extended
-| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+static FloatRelation QEMU_FLATTEN
+floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
+{
+    FloatParts128 pa, pb;
 
-static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig0, zSig1;
-    int32_t expDiff;
-
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    bSig = extractFloatx80Frac( b );
-    bExp = extractFloatx80Exp( b );
-    expDiff = aExp - bExp;
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0x7FFF ) {
-        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
-    }
-    zSig1 = 0;
-    if ( bSig < aSig ) goto aBigger;
-    if ( aSig < bSig ) goto bBigger;
-    return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
-    if ( bExp == 0x7FFF ) {
-        if ((uint64_t)(bSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        return packFloatx80(zSign ^ 1, floatx80_infinity_high,
-                            floatx80_infinity_low);
-    }
-    if ( aExp == 0 ) ++expDiff;
-    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
- bBigger:
-    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0x7FFF ) {
-        if ((uint64_t)(aSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        return a;
+    if (!floatx80_unpack_canonical(&pa, a, s) ||
+        !floatx80_unpack_canonical(&pb, b, s)) {
+        return float_relation_unordered;
     }
-    if ( bExp == 0 ) --expDiff;
-    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
- aBigger:
-    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
-    zExp = aExp;
- normalizeRoundAndPack:
-    return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
-                                         zSign, zExp, zSig0, zSig1, status);
+    return parts_compare(&pa, &pb, s, is_quiet);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of adding the extended double-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
+{
+    return floatx80_do_compare(a, b, s, false);
+}
 
-floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
+FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
 {
-    bool aSign, bSign;
+    return floatx80_do_compare(a, b, s, true);
+}
 
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign == bSign ) {
-        return addFloatx80Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloatx80Sigs(a, b, aSign, status);
-    }
+/*
+ * Scale by 2**N
+ */
+
+float16 float16_scalbn(float16 a, int n, float_status *status)
+{
+    FloatParts64 p;
 
+    float16_unpack_canonical(&p, a, status);
+    parts_scalbn(&p, n, status);
+    return float16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the extended double-precision floating-
-| point values `a' and `b'.  The operation is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float32 float32_scalbn(float32 a, int n, float_status *status)
+{
+    FloatParts64 p;
 
-floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
+    float32_unpack_canonical(&p, a, status);
+    parts_scalbn(&p, n, status);
+    return float32_round_pack_canonical(&p, status);
+}
+
+float64 float64_scalbn(float64 a, int n, float_status *status)
 {
-    bool aSign, bSign;
+    FloatParts64 p;
 
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign == bSign ) {
-        return subFloatx80Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloatx80Sigs(a, b, aSign, status);
-    }
+    float64_unpack_canonical(&p, a, status);
+    parts_scalbn(&p, n, status);
+    return float64_round_pack_canonical(&p, status);
+}
+
+bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
+{
+    FloatParts64 p;
 
+    bfloat16_unpack_canonical(&p, a, status);
+    parts_scalbn(&p, n, status);
+    return bfloat16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of multiplying the extended double-precision floating-
-| point values `a' and `b'.  The operation is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float128 float128_scalbn(float128 a, int n, float_status *status)
+{
+    FloatParts128 p;
+
+    float128_unpack_canonical(&p, a, status);
+    parts_scalbn(&p, n, status);
+    return float128_round_pack_canonical(&p, status);
+}
 
-floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
+floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
 {
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig0, zSig1;
+    FloatParts128 p;
 
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
+    if (!floatx80_unpack_canonical(&p, a, status)) {
         return floatx80_default_nan(status);
     }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    bSig = extractFloatx80Frac( b );
-    bExp = extractFloatx80Exp( b );
-    bSign = extractFloatx80Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if (    (uint64_t) ( aSig<<1 )
-             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        if ( ( bExp | bSig ) == 0 ) goto invalid;
-        return packFloatx80(zSign, floatx80_infinity_high,
-                                   floatx80_infinity_low);
-    }
-    if ( bExp == 0x7FFF ) {
-        if ((uint64_t)(bSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        if ( ( aExp | aSig ) == 0 ) {
- invalid:
-            float_raise(float_flag_invalid, status);
-            return floatx80_default_nan(status);
-        }
-        return packFloatx80(zSign, floatx80_infinity_high,
-                                   floatx80_infinity_low);
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
-        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
-    }
-    if ( bExp == 0 ) {
-        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
-        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
-    }
-    zExp = aExp + bExp - 0x3FFE;
-    mul64To128( aSig, bSig, &zSig0, &zSig1 );
-    if ( 0 < (int64_t) zSig0 ) {
-        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
-        --zExp;
-    }
-    return roundAndPackFloatx80(status->floatx80_rounding_precision,
-                                zSign, zExp, zSig0, zSig1, status);
+    parts_scalbn(&p, n, status);
+    return floatx80_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of dividing the extended double-precision floating-point
-| value `a' by the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+/*
+ * Square Root
+ */
 
-floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
+float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
 {
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig0, zSig1;
-    uint64_t rem0, rem1, rem2, term0, term1, term2;
+    FloatParts64 p;
 
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    bSig = extractFloatx80Frac( b );
-    bExp = extractFloatx80Exp( b );
-    bSign = extractFloatx80Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if ((uint64_t)(aSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        if ( bExp == 0x7FFF ) {
-            if ((uint64_t)(bSig << 1)) {
-                return propagateFloatx80NaN(a, b, status);
-            }
-            goto invalid;
-        }
-        return packFloatx80(zSign, floatx80_infinity_high,
-                                   floatx80_infinity_low);
-    }
-    if ( bExp == 0x7FFF ) {
-        if ((uint64_t)(bSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        return packFloatx80( zSign, 0, 0 );
-    }
-    if ( bExp == 0 ) {
-        if ( bSig == 0 ) {
-            if ( ( aExp | aSig ) == 0 ) {
- invalid:
-                float_raise(float_flag_invalid, status);
-                return floatx80_default_nan(status);
-            }
-            float_raise(float_flag_divbyzero, status);
-            return packFloatx80(zSign, floatx80_infinity_high,
-                                       floatx80_infinity_low);
-        }
-        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
-        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
-    }
-    zExp = aExp - bExp + 0x3FFE;
-    rem1 = 0;
-    if ( bSig <= aSig ) {
-        shift128Right( aSig, 0, 1, &aSig, &rem1 );
-        ++zExp;
-    }
-    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
-    mul64To128( bSig, zSig0, &term0, &term1 );
-    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
-    while ( (int64_t) rem0 < 0 ) {
-        --zSig0;
-        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
-    }
-    zSig1 = estimateDiv128To64( rem1, 0, bSig );
-    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
-        mul64To128( bSig, zSig1, &term1, &term2 );
-        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
-        while ( (int64_t) rem1 < 0 ) {
-            --zSig1;
-            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
-        }
-        zSig1 |= ( ( rem1 | rem2 ) != 0 );
-    }
-    return roundAndPackFloatx80(status->floatx80_rounding_precision,
-                                zSign, zExp, zSig0, zSig1, status);
+    float16_unpack_canonical(&p, a, status);
+    parts_sqrt(&p, status, &float16_params);
+    return float16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the extended double-precision floating-point value
-| `a' with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
-| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
-| the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
-| the absolute value of the integer quotient.
-*----------------------------------------------------------------------------*/
+static float32 QEMU_SOFTFLOAT_ATTR
+soft_f32_sqrt(float32 a, float_status *status)
+{
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, status);
+    parts_sqrt(&p, status, &float32_params);
+    return float32_round_pack_canonical(&p, status);
+}
 
-floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
-                         float_status *status)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_f64_sqrt(float64 a, float_status *status)
 {
-    bool aSign, zSign;
-    int32_t aExp, bExp, expDiff, aExpOrig;
-    uint64_t aSig0, aSig1, bSig;
-    uint64_t q, term0, term1, alternateASig0, alternateASig1;
+    FloatParts64 p;
 
-    *quotient = 0;
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aSig0 = extractFloatx80Frac( a );
-    aExpOrig = aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    bSig = extractFloatx80Frac( b );
-    bExp = extractFloatx80Exp( b );
-    if ( aExp == 0x7FFF ) {
-        if (    (uint64_t) ( aSig0<<1 )
-             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        goto invalid;
-    }
-    if ( bExp == 0x7FFF ) {
-        if ((uint64_t)(bSig << 1)) {
-            return propagateFloatx80NaN(a, b, status);
-        }
-        if (aExp == 0 && aSig0 >> 63) {
-            /*
-             * Pseudo-denormal argument must be returned in normalized
-             * form.
-             */
-            return packFloatx80(aSign, 1, aSig0);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        if ( bSig == 0 ) {
- invalid:
-            float_raise(float_flag_invalid, status);
-            return floatx80_default_nan(status);
-        }
-        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
-    }
-    if ( aExp == 0 ) {
-        if ( aSig0 == 0 ) return a;
-        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
-    }
-    zSign = aSign;
-    expDiff = aExp - bExp;
-    aSig1 = 0;
-    if ( expDiff < 0 ) {
-        if ( mod || expDiff < -1 ) {
-            if (aExp == 1 && aExpOrig == 0) {
-                /*
-                 * Pseudo-denormal argument must be returned in
-                 * normalized form.
-                 */
-                return packFloatx80(aSign, aExp, aSig0);
-            }
-            return a;
-        }
-        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
-        expDiff = 0;
-    }
-    *quotient = q = ( bSig <= aSig0 );
-    if ( q ) aSig0 -= bSig;
-    expDiff -= 64;
-    while ( 0 < expDiff ) {
-        q = estimateDiv128To64( aSig0, aSig1, bSig );
-        q = ( 2 < q ) ? q - 2 : 0;
-        mul64To128( bSig, q, &term0, &term1 );
-        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
-        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
-        expDiff -= 62;
-        *quotient <<= 62;
-        *quotient += q;
+    float64_unpack_canonical(&p, a, status);
+    parts_sqrt(&p, status, &float64_params);
+    return float64_round_pack_canonical(&p, status);
+}
+
+float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
+{
+    union_float32 ua, ur;
+
+    ua.s = xa;
+    if (unlikely(!can_use_fpu(s))) {
+        goto soft;
     }
-    expDiff += 64;
-    if ( 0 < expDiff ) {
-        q = estimateDiv128To64( aSig0, aSig1, bSig );
-        q = ( 2 < q ) ? q - 2 : 0;
-        q >>= 64 - expDiff;
-        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
-        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
-        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
-        while ( le128( term0, term1, aSig0, aSig1 ) ) {
-            ++q;
-            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
-        }
-        if (expDiff < 64) {
-            *quotient <<= expDiff;
-        } else {
-            *quotient = 0;
+
+    float32_input_flush1(&ua.s, s);
+    if (QEMU_HARDFLOAT_1F32_USE_FP) {
+        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
+                       fpclassify(ua.h) == FP_ZERO) ||
+                     signbit(ua.h))) {
+            goto soft;
         }
-        *quotient += q;
+    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
+                        float32_is_neg(ua.s))) {
+        goto soft;
     }
-    else {
-        term1 = 0;
-        term0 = bSig;
+    ur.h = sqrtf(ua.h);
+    return ur.s;
+
+ soft:
+    return soft_f32_sqrt(ua.s, s);
+}
+
+float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
+{
+    union_float64 ua, ur;
+
+    ua.s = xa;
+    if (unlikely(!can_use_fpu(s))) {
+        goto soft;
     }
-    if (!mod) {
-        sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
-        if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
-                || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
-                        && ( q & 1 ) )
-            ) {
-            aSig0 = alternateASig0;
-            aSig1 = alternateASig1;
-            zSign = ! zSign;
-            ++*quotient;
+
+    float64_input_flush1(&ua.s, s);
+    if (QEMU_HARDFLOAT_1F64_USE_FP) {
+        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
+                       fpclassify(ua.h) == FP_ZERO) ||
+                     signbit(ua.h))) {
+            goto soft;
         }
+    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
+                        float64_is_neg(ua.s))) {
+        goto soft;
     }
-    return
-        normalizeRoundAndPackFloatx80(
-            80, zSign, bExp + expDiff, aSig0, aSig1, status);
+    ur.h = sqrt(ua.h);
+    return ur.s;
 
+ soft:
+    return soft_f64_sqrt(ua.s, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the extended double-precision floating-point value
-| `a' with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
+bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
 {
-    uint64_t quotient;
-    return floatx80_modrem(a, b, false, &quotient, status);
+    FloatParts64 p;
+
+    bfloat16_unpack_canonical(&p, a, status);
+    parts_sqrt(&p, status, &bfloat16_params);
+    return bfloat16_round_pack_canonical(&p, status);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the extended double-precision floating-point value
-| `a' with respect to the corresponding value `b', with the quotient truncated
-| toward zero.
-*----------------------------------------------------------------------------*/
+float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
+{
+    FloatParts128 p;
 
-floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
+    float128_unpack_canonical(&p, a, status);
+    parts_sqrt(&p, status, &float128_params);
+    return float128_round_pack_canonical(&p, status);
+}
+
+floatx80 floatx80_sqrt(floatx80 a, float_status *s)
 {
-    uint64_t quotient;
-    return floatx80_modrem(a, b, true, &quotient, status);
+    FloatParts128 p;
+
+    if (!floatx80_unpack_canonical(&p, a, s)) {
+        return floatx80_default_nan(s);
+    }
+    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
+    return floatx80_round_pack_canonical(&p, s);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the square root of the extended double-precision floating-point
-| value `a'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+/*
+ * log2
+ */
+float32 float32_log2(float32 a, float_status *status)
+{
+    FloatParts64 p;
+
+    float32_unpack_canonical(&p, a, status);
+    parts_log2(&p, status, &float32_params);
+    return float32_round_pack_canonical(&p, status);
+}
 
-floatx80 floatx80_sqrt(floatx80 a, float_status *status)
+float64 float64_log2(float64 a, float_status *status)
 {
-    bool aSign;
-    int32_t aExp, zExp;
-    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
-    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    FloatParts64 p;
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    aSig0 = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ((uint64_t)(aSig0 << 1)) {
-            return propagateFloatx80NaN(a, a, status);
-        }
-        if ( ! aSign ) return a;
-        goto invalid;
-    }
-    if ( aSign ) {
-        if ( ( aExp | aSig0 ) == 0 ) return a;
- invalid:
-        float_raise(float_flag_invalid, status);
-        return floatx80_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
-        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
-    }
-    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
-    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
-    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
-    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
-    doubleZSig0 = zSig0<<1;
-    mul64To128( zSig0, zSig0, &term0, &term1 );
-    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (int64_t) rem0 < 0 ) {
-        --zSig0;
-        doubleZSig0 -= 2;
-        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
-    }
-    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
-    if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
-        if ( zSig1 == 0 ) zSig1 = 1;
-        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
-        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
-        mul64To128( zSig1, zSig1, &term2, &term3 );
-        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (int64_t) rem1 < 0 ) {
-            --zSig1;
-            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
-            term3 |= 1;
-            term2 |= doubleZSig0;
-            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
-        }
-        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
-    }
-    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
-    zSig0 |= doubleZSig0;
-    return roundAndPackFloatx80(status->floatx80_rounding_precision,
-                                0, zExp, zSig0, zSig1, status);
+    float64_unpack_canonical(&p, a, status);
+    parts_log2(&p, status, &float64_params);
+    return float64_round_pack_canonical(&p, status);
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
+| The pattern for a default generated NaN.
 *----------------------------------------------------------------------------*/
 
-int32_t float128_to_int32(float128 a, float_status *status)
+float16 float16_default_nan(float_status *status)
 {
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    aSig0 |= ( aSig1 != 0 );
-    shiftCount = 0x4028 - aExp;
-    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
-    return roundAndPackInt32(aSign, aSig0, status);
+    FloatParts64 p;
 
+    parts_default_nan(&p, status);
+    p.frac >>= float16_params.frac_shift;
+    return float16_pack_raw(&p);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.  If
-| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
-| conversion overflows, the largest integer with the same sign as `a' is
-| returned.
-*----------------------------------------------------------------------------*/
+float32 float32_default_nan(float_status *status)
+{
+    FloatParts64 p;
 
-int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1, savedASig;
-    int32_t z;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    aSig0 |= ( aSig1 != 0 );
-    if ( 0x401E < aExp ) {
-        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
-        goto invalid;
-    }
-    else if ( aExp < 0x3FFF ) {
-        if (aExp || aSig0) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-        return 0;
-    }
-    aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = 0x402F - aExp;
-    savedASig = aSig0;
-    aSig0 >>= shiftCount;
-    z = aSig0;
-    if ( aSign ) z = - z;
-    if ( ( z < 0 ) ^ aSign ) {
- invalid:
-        float_raise(float_flag_invalid, status);
-        return aSign ? INT32_MIN : INT32_MAX;
-    }
-    if ( ( aSig0<<shiftCount ) != savedASig ) {
-        status->float_exception_flags |= float_flag_inexact;
-    }
-    return z;
+    parts_default_nan(&p, status);
+    p.frac >>= float32_params.frac_shift;
+    return float32_pack_raw(&p);
+}
+
+float64 float64_default_nan(float_status *status)
+{
+    FloatParts64 p;
 
+    parts_default_nan(&p, status);
+    p.frac >>= float64_params.frac_shift;
+    return float64_pack_raw(&p);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  Otherwise, if the conversion overflows, the
-| largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
+float128 float128_default_nan(float_status *status)
+{
+    FloatParts128 p;
 
-int64_t float128_to_int64(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = 0x402F - aExp;
-    if ( shiftCount <= 0 ) {
-        if ( 0x403E < aExp ) {
-            float_raise(float_flag_invalid, status);
-            if (    ! aSign
-                 || (    ( aExp == 0x7FFF )
-                      && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
-                    )
-               ) {
-                return INT64_MAX;
-            }
-            return INT64_MIN;
-        }
-        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
-    }
-    else {
-        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
-    }
-    return roundAndPackInt64(aSign, aSig0, aSig1, status);
+    parts_default_nan(&p, status);
+    frac_shr(&p, float128_params.frac_shift);
+    return float128_pack_raw(&p);
+}
+
+bfloat16 bfloat16_default_nan(float_status *status)
+{
+    FloatParts64 p;
 
+    parts_default_nan(&p, status);
+    p.frac >>= bfloat16_params.frac_shift;
+    return bfloat16_pack_raw(&p);
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 64-bit two's complement integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic, except that the conversion is always rounded toward zero.
-| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
-| the conversion overflows, the largest integer with the same sign as `a' is
-| returned.
+| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
 *----------------------------------------------------------------------------*/
 
-int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, shiftCount;
-    uint64_t aSig0, aSig1;
-    int64_t z;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
-    shiftCount = aExp - 0x402F;
-    if ( 0 < shiftCount ) {
-        if ( 0x403E <= aExp ) {
-            aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
-            if (    ( a.high == UINT64_C(0xC03E000000000000) )
-                 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
-                if (aSig1) {
-                    status->float_exception_flags |= float_flag_inexact;
-                }
-            }
-            else {
-                float_raise(float_flag_invalid, status);
-                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
-                    return INT64_MAX;
-                }
-            }
-            return INT64_MIN;
-        }
-        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
-        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-    }
-    else {
-        if ( aExp < 0x3FFF ) {
-            if ( aExp | aSig0 | aSig1 ) {
-                status->float_exception_flags |= float_flag_inexact;
-            }
-            return 0;
-        }
-        z = aSig0>>( - shiftCount );
-        if (    aSig1
-             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
-            status->float_exception_flags |= float_flag_inexact;
-        }
-    }
-    if ( aSign ) z = - z;
-    return z;
+float16 float16_silence_nan(float16 a, float_status *status)
+{
+    FloatParts64 p;
 
+    float16_unpack_raw(&p, a);
+    p.frac <<= float16_params.frac_shift;
+    parts_silence_nan(&p, status);
+    p.frac >>= float16_params.frac_shift;
+    return float16_pack_raw(&p);
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point value
-| `a' to the 64-bit unsigned integer format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  If the conversion overflows, the
-| largest unsigned integer is returned.  If 'a' is negative, the value is
-| rounded and zero is returned; negative values that do not round to zero
-| will raise the inexact exception.
-*----------------------------------------------------------------------------*/
+float32 float32_silence_nan(float32 a, float_status *status)
+{
+    FloatParts64 p;
 
-uint64_t float128_to_uint64(float128 a, float_status *status)
-{
-    bool aSign;
-    int aExp;
-    int shiftCount;
-    uint64_t aSig0, aSig1;
-
-    aSig0 = extractFloat128Frac0(a);
-    aSig1 = extractFloat128Frac1(a);
-    aExp = extractFloat128Exp(a);
-    aSign = extractFloat128Sign(a);
-    if (aSign && (aExp > 0x3FFE)) {
-        float_raise(float_flag_invalid, status);
-        if (float128_is_any_nan(a)) {
-            return UINT64_MAX;
-        } else {
-            return 0;
-        }
-    }
-    if (aExp) {
-        aSig0 |= UINT64_C(0x0001000000000000);
-    }
-    shiftCount = 0x402F - aExp;
-    if (shiftCount <= 0) {
-        if (0x403E < aExp) {
-            float_raise(float_flag_invalid, status);
-            return UINT64_MAX;
-        }
-        shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
-    } else {
-        shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
-    }
-    return roundAndPackUint64(aSign, aSig0, aSig1, status);
+    float32_unpack_raw(&p, a);
+    p.frac <<= float32_params.frac_shift;
+    parts_silence_nan(&p, status);
+    p.frac >>= float32_params.frac_shift;
+    return float32_pack_raw(&p);
+}
+
+float64 float64_silence_nan(float64 a, float_status *status)
+{
+    FloatParts64 p;
+
+    float64_unpack_raw(&p, a);
+    p.frac <<= float64_params.frac_shift;
+    parts_silence_nan(&p, status);
+    p.frac >>= float64_params.frac_shift;
+    return float64_pack_raw(&p);
 }
 
-uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
+bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
 {
-    uint64_t v;
-    signed char current_rounding_mode = status->float_rounding_mode;
+    FloatParts64 p;
+
+    bfloat16_unpack_raw(&p, a);
+    p.frac <<= bfloat16_params.frac_shift;
+    parts_silence_nan(&p, status);
+    p.frac >>= bfloat16_params.frac_shift;
+    return bfloat16_pack_raw(&p);
+}
 
-    set_float_rounding_mode(float_round_to_zero, status);
-    v = float128_to_uint64(a, status);
-    set_float_rounding_mode(current_rounding_mode, status);
+float128 float128_silence_nan(float128 a, float_status *status)
+{
+    FloatParts128 p;
 
-    return v;
+    float128_unpack_raw(&p, a);
+    frac_shl(&p, float128_params.frac_shift);
+    parts_silence_nan(&p, status);
+    frac_shr(&p, float128_params.frac_shift);
+    return float128_pack_raw(&p);
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the 32-bit unsigned integer format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic except that the conversion is always rounded toward zero.
-| If `a' is a NaN, the largest positive integer is returned.  Otherwise,
-| if the conversion overflows, the largest unsigned integer is returned.
-| If 'a' is negative, the value is rounded and zero is returned; negative
-| values that do not round to zero will raise the inexact exception.
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
 *----------------------------------------------------------------------------*/
 
-uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
+static bool parts_squash_denormal(FloatParts64 p, float_status *status)
 {
-    uint64_t v;
-    uint32_t res;
-    int old_exc_flags = get_float_exception_flags(status);
-
-    v = float128_to_uint64_round_to_zero(a, status);
-    if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        return v;
+    if (p.exp == 0 && p.frac != 0) {
+        float_raise(float_flag_input_denormal, status);
+        return true;
     }
-    set_float_exception_flags(old_exc_flags, status);
-    float_raise(float_flag_invalid, status);
-    return res;
-}
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point value
-| `a' to the 32-bit unsigned integer format.  The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic---which means in particular that the conversion is rounded
-| according to the current rounding mode.  If `a' is a NaN, the largest
-| positive integer is returned.  If the conversion overflows, the
-| largest unsigned integer is returned.  If 'a' is negative, the value is
-| rounded and zero is returned; negative values that do not round to zero
-| will raise the inexact exception.
-*----------------------------------------------------------------------------*/
+    return false;
+}
 
-uint32_t float128_to_uint32(float128 a, float_status *status)
+float16 float16_squash_input_denormal(float16 a, float_status *status)
 {
-    uint64_t v;
-    uint32_t res;
-    int old_exc_flags = get_float_exception_flags(status);
+    if (status->flush_inputs_to_zero) {
+        FloatParts64 p;
 
-    v = float128_to_uint64(a, status);
-    if (v > 0xffffffff) {
-        res = 0xffffffff;
-    } else {
-        return v;
+        float16_unpack_raw(&p, a);
+        if (parts_squash_denormal(p, status)) {
+            return float16_set_sign(float16_zero, p.sign);
+        }
     }
-    set_float_exception_flags(old_exc_flags, status);
-    float_raise(float_flag_invalid, status);
-    return res;
+    return a;
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the single-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float128_to_float32(float128 a, float_status *status)
+float32 float32_squash_input_denormal(float32 a, float_status *status)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
-    uint32_t zSig;
+    if (status->flush_inputs_to_zero) {
+        FloatParts64 p;
 
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
+        float32_unpack_raw(&p, a);
+        if (parts_squash_denormal(p, status)) {
+            return float32_set_sign(float32_zero, p.sign);
         }
-        return packFloat32( aSign, 0xFF, 0 );
     }
-    aSig0 |= ( aSig1 != 0 );
-    shift64RightJamming( aSig0, 18, &aSig0 );
-    zSig = aSig0;
-    if ( aExp || zSig ) {
-        zSig |= 0x40000000;
-        aExp -= 0x3F81;
-    }
-    return roundAndPackFloat32(aSign, aExp, zSig, status);
-
+    return a;
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the double-precision floating-point format.  The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float128_to_float64(float128 a, float_status *status)
+float64 float64_squash_input_denormal(float64 a, float_status *status)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
+    if (status->flush_inputs_to_zero) {
+        FloatParts64 p;
 
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
+        float64_unpack_raw(&p, a);
+        if (parts_squash_denormal(p, status)) {
+            return float64_set_sign(float64_zero, p.sign);
         }
-        return packFloat64( aSign, 0x7FF, 0 );
     }
-    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
-    aSig0 |= ( aSig1 != 0 );
-    if ( aExp || aSig0 ) {
-        aSig0 |= UINT64_C(0x4000000000000000);
-        aExp -= 0x3C01;
-    }
-    return roundAndPackFloat64(aSign, aExp, aSig0, status);
+    return a;
+}
+
+bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
+{
+    if (status->flush_inputs_to_zero) {
+        FloatParts64 p;
 
+        bfloat16_unpack_raw(&p, a);
+        if (parts_squash_denormal(p, status)) {
+            return bfloat16_set_sign(bfloat16_zero, p.sign);
+        }
+    }
+    return a;
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the extended double-precision floating-point format.  The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
 *----------------------------------------------------------------------------*/
 
-floatx80 float128_to_floatx80(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
-                                               status);
-            return floatx80_silence_nan(res, status);
-        }
-        return packFloatx80(aSign, floatx80_infinity_high,
-                                   floatx80_infinity_low);
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    else {
-        aSig0 |= UINT64_C(0x0001000000000000);
-    }
-    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
-    return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
+                                uint64_t *zSigPtr)
+{
+    int8_t shiftCount;
 
+    shiftCount = clz64(aSig);
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
 }
 
 /*----------------------------------------------------------------------------
-| Rounds the quadruple-precision floating-point value `a' to an integer, and
-| returns the result as a quadruple-precision floating-point value.  The
-| operation is performed according to the IEC/IEEE Standard for Binary
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
+| the result is rounded to the same number of bits as single or double
+| precision, respectively.  Otherwise, the result is rounded to the full
+| precision of the extended double-precision format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float128 float128_round_to_int(float128 a, float_status *status)
+floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
+                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
+                              float_status *status)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t lastBitMask, roundBitsMask;
-    float128 z;
+    FloatRoundMode roundingMode;
+    bool roundNearestEven, increment, isTiny;
+    int64_t roundIncrement, roundMask, roundBits;
 
-    aExp = extractFloat128Exp( a );
-    if ( 0x402F <= aExp ) {
-        if ( 0x406F <= aExp ) {
-            if (    ( aExp == 0x7FFF )
-                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
-               ) {
-                return propagateFloat128NaN(a, a, status);
-            }
-            return a;
-        }
-        lastBitMask = 1;
-        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
-        roundBitsMask = lastBitMask - 1;
-        z = a;
-        switch (status->float_rounding_mode) {
-        case float_round_nearest_even:
-            if ( lastBitMask ) {
-                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
-                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
-            }
-            else {
-                if ( (int64_t) z.low < 0 ) {
-                    ++z.high;
-                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
-                }
-            }
-            break;
-        case float_round_ties_away:
-            if (lastBitMask) {
-                add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
-            } else {
-                if ((int64_t) z.low < 0) {
-                    ++z.high;
-                }
-            }
-            break;
-        case float_round_to_zero:
-            break;
-        case float_round_up:
-            if (!extractFloat128Sign(z)) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        case float_round_down:
-            if (extractFloat128Sign(z)) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        case float_round_to_odd:
-            /*
-             * Note that if lastBitMask == 0, the last bit is the lsb
-             * of high, and roundBitsMask == -1.
-             */
-            if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
-                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
-            }
-            break;
-        default:
-            abort();
-        }
-        z.low &= ~ roundBitsMask;
+    roundingMode = status->float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    switch (roundingPrecision) {
+    case floatx80_precision_x:
+        goto precision80;
+    case floatx80_precision_d:
+        roundIncrement = UINT64_C(0x0000000000000400);
+        roundMask = UINT64_C(0x00000000000007FF);
+        break;
+    case floatx80_precision_s:
+        roundIncrement = UINT64_C(0x0000008000000000);
+        roundMask = UINT64_C(0x000000FFFFFFFFFF);
+        break;
+    default:
+        g_assert_not_reached();
     }
-    else {
-        if ( aExp < 0x3FFF ) {
-            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
-            status->float_exception_flags |= float_flag_inexact;
-            aSign = extractFloat128Sign( a );
-            switch (status->float_rounding_mode) {
-            case float_round_nearest_even:
-                if (    ( aExp == 0x3FFE )
-                     && (   extractFloat128Frac0( a )
-                          | extractFloat128Frac1( a ) )
-                   ) {
-                    return packFloat128( aSign, 0x3FFF, 0, 0 );
-                }
-                break;
-            case float_round_ties_away:
-                if (aExp == 0x3FFE) {
-                    return packFloat128(aSign, 0x3FFF, 0, 0);
-                }
-                break;
-            case float_round_down:
-                return
-                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
-                    : packFloat128( 0, 0, 0, 0 );
-            case float_round_up:
-                return
-                      aSign ? packFloat128( 1, 0, 0, 0 )
-                    : packFloat128( 0, 0x3FFF, 0, 0 );
-
-            case float_round_to_odd:
-                return packFloat128(aSign, 0x3FFF, 0, 0);
-
-            case float_round_to_zero:
-                break;
-            }
-            return packFloat128( aSign, 0, 0, 0 );
+    zSig0 |= ( zSig1 != 0 );
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        break;
+    case float_round_to_zero:
+        roundIncrement = 0;
+        break;
+    case float_round_up:
+        roundIncrement = zSign ? 0 : roundMask;
+        break;
+    case float_round_down:
+        roundIncrement = zSign ? roundMask : 0;
+        break;
+    default:
+        abort();
+    }
+    roundBits = zSig0 & roundMask;
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+           ) {
+            goto overflow;
         }
-        lastBitMask = 1;
-        lastBitMask <<= 0x402F - aExp;
-        roundBitsMask = lastBitMask - 1;
-        z.low = 0;
-        z.high = a.high;
-        switch (status->float_rounding_mode) {
-        case float_round_nearest_even:
-            z.high += lastBitMask>>1;
-            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
-                z.high &= ~ lastBitMask;
+        if ( zExp <= 0 ) {
+            if (status->flush_to_zero) {
+                float_raise(float_flag_output_denormal, status);
+                return packFloatx80(zSign, 0, 0);
             }
-            break;
-        case float_round_ties_away:
-            z.high += lastBitMask>>1;
-            break;
-        case float_round_to_zero:
-            break;
-        case float_round_up:
-            if (!extractFloat128Sign(z)) {
-                z.high |= ( a.low != 0 );
-                z.high += roundBitsMask;
+            isTiny = status->tininess_before_rounding
+                  || (zExp < 0 )
+                  || (zSig0 <= zSig0 + roundIncrement);
+            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if (isTiny && roundBits) {
+                float_raise(float_flag_underflow, status);
             }
-            break;
-        case float_round_down:
-            if (extractFloat128Sign(z)) {
-                z.high |= (a.low != 0);
-                z.high += roundBitsMask;
+            if (roundBits) {
+                float_raise(float_flag_inexact, status);
             }
-            break;
-        case float_round_to_odd:
-            if ((z.high & lastBitMask) == 0) {
-                z.high |= (a.low != 0);
-                z.high += roundBitsMask;
+            zSig0 += roundIncrement;
+            if ( (int64_t) zSig0 < 0 ) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                roundMask |= roundIncrement;
             }
-            break;
-        default:
-            abort();
+            zSig0 &= ~ roundMask;
+            return packFloatx80( zSign, zExp, zSig0 );
         }
-        z.high &= ~ roundBitsMask;
     }
-    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
-        status->float_exception_flags |= float_flag_inexact;
+    if (roundBits) {
+        float_raise(float_flag_inexact, status);
     }
-    return z;
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the quadruple-precision
-| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
-| before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0x7FFF ) {
-            if (aSig0 | aSig1) {
-                return propagateFloat128NaN(a, b, status);
+    zSig0 += roundIncrement;
+    if ( zSig0 < roundIncrement ) {
+        ++zExp;
+        zSig0 = UINT64_C(0x8000000000000000);
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+        roundMask |= roundIncrement;
+    }
+    zSig0 &= ~ roundMask;
+    if ( zSig0 == 0 ) zExp = 0;
+    return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+    switch (roundingMode) {
+    case float_round_nearest_even:
+    case float_round_ties_away:
+        increment = ((int64_t)zSig1 < 0);
+        break;
+    case float_round_to_zero:
+        increment = 0;
+        break;
+    case float_round_up:
+        increment = !zSign && zSig1;
+        break;
+    case float_round_down:
+        increment = zSign && zSig1;
+        break;
+    default:
+        abort();
+    }
+    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || (    ( zExp == 0x7FFE )
+                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
+                  && increment
+                )
+           ) {
+            roundMask = 0;
+ overflow:
+            float_raise(float_flag_overflow | float_flag_inexact, status);
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
             }
-            return a;
-        }
-        if ( bExp == 0 ) {
-            --expDiff;
-        }
-        else {
-            bSig0 |= UINT64_C(0x0001000000000000);
+            return packFloatx80(zSign,
+                                floatx80_infinity_high,
+                                floatx80_infinity_low);
         }
-        shift128ExtraRightJamming(
-            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0x7FFF ) {
-            if (bSig0 | bSig1) {
-                return propagateFloat128NaN(a, b, status);
+        if ( zExp <= 0 ) {
+            isTiny = status->tininess_before_rounding
+                  || (zExp < 0)
+                  || !increment
+                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
+            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+            zExp = 0;
+            if (isTiny && zSig1) {
+                float_raise(float_flag_underflow, status);
+            }
+            if (zSig1) {
+                float_raise(float_flag_inexact, status);
             }
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        if ( aExp == 0 ) {
-            ++expDiff;
-        }
-        else {
-            aSig0 |= UINT64_C(0x0001000000000000);
-        }
-        shift128ExtraRightJamming(
-            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0x7FFF ) {
-            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-                return propagateFloat128NaN(a, b, status);
+            switch (roundingMode) {
+            case float_round_nearest_even:
+            case float_round_ties_away:
+                increment = ((int64_t)zSig1 < 0);
+                break;
+            case float_round_to_zero:
+                increment = 0;
+                break;
+            case float_round_up:
+                increment = !zSign && zSig1;
+                break;
+            case float_round_down:
+                increment = zSign && zSig1;
+                break;
+            default:
+                abort();
             }
-            return a;
-        }
-        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-        if ( aExp == 0 ) {
-            if (status->flush_to_zero) {
-                if (zSig0 | zSig1) {
-                    float_raise(float_flag_output_denormal, status);
+            if ( increment ) {
+                ++zSig0;
+                if (!(zSig1 << 1) && roundNearestEven) {
+                    zSig0 &= ~1;
                 }
-                return packFloat128(zSign, 0, 0, 0);
+                if ( (int64_t) zSig0 < 0 ) zExp = 1;
             }
-            return packFloat128( zSign, 0, zSig0, zSig1 );
-        }
-        zSig2 = 0;
-        zSig0 |= UINT64_C(0x0002000000000000);
-        zExp = aExp;
-        goto shiftRight1;
-    }
-    aSig0 |= UINT64_C(0x0001000000000000);
-    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    --zExp;
-    if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
-    ++zExp;
- shiftRight1:
-    shift128ExtraRightJamming(
-        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- roundAndPack:
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the quadruple-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
-    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-            return propagateFloat128NaN(a, b, status);
+            return packFloatx80( zSign, zExp, zSig0 );
         }
-        float_raise(float_flag_invalid, status);
-        return float128_default_nan(status);
     }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
+    if (zSig1) {
+        float_raise(float_flag_inexact, status);
     }
-    if ( bSig0 < aSig0 ) goto aBigger;
-    if ( aSig0 < bSig0 ) goto bBigger;
-    if ( bSig1 < aSig1 ) goto aBigger;
-    if ( aSig1 < bSig1 ) goto bBigger;
-    return packFloat128(status->float_rounding_mode == float_round_down,
-                        0, 0, 0);
- bExpBigger:
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
+    if ( increment ) {
+        ++zSig0;
+        if ( zSig0 == 0 ) {
+            ++zExp;
+            zSig0 = UINT64_C(0x8000000000000000);
         }
-        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        ++expDiff;
-    }
-    else {
-        aSig0 |= UINT64_C(0x4000000000000000);
-    }
-    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
-    bSig0 |= UINT64_C(0x4000000000000000);
- bBigger:
-    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, b, status);
+        else {
+            if (!(zSig1 << 1) && roundNearestEven) {
+                zSig0 &= ~1;
+            }
         }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        --expDiff;
     }
     else {
-        bSig0 |= UINT64_C(0x4000000000000000);
+        if ( zSig0 == 0 ) zExp = 0;
     }
-    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
-    aSig0 |= UINT64_C(0x4000000000000000);
- aBigger:
-    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    zExp = aExp;
- normalizeRoundAndPack:
-    --zExp;
-    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
-                                         status);
+    return packFloatx80( zSign, zExp, zSig0 );
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of adding the quadruple-precision floating-point values
-| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
 *----------------------------------------------------------------------------*/
 
-float128 float128_add(float128 a, float128 b, float_status *status)
+floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
+                                       bool zSign, int32_t zExp,
+                                       uint64_t zSig0, uint64_t zSig1,
+                                       float_status *status)
 {
-    bool aSign, bSign;
+    int8_t shiftCount;
 
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloat128Sigs(a, b, aSign, status);
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
     }
+    shiftCount = clz64(zSig0);
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    zExp -= shiftCount;
+    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
+                                zSig0, zSig1, status);
 
 }
 
 /*----------------------------------------------------------------------------
-| Returns the result of subtracting the quadruple-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
+| Returns the binary exponential of the single-precision floating-point value
+| `a'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+|
+| Uses the following identities:
+|
+| 1. -------------------------------------------------------------------------
+|      x    x*ln(2)
+|     2  = e
+|
+| 2. -------------------------------------------------------------------------
+|                      2     3     4     5           n
+|      x        x     x     x     x     x           x
+|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+|               1!    2!    3!    4!    5!          n!
 *----------------------------------------------------------------------------*/
 
-float128 float128_sub(float128 a, float128 b, float_status *status)
+static const float64 float32_exp2_coefficients[15] =
 {
-    bool aSign, bSign;
-
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return subFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-
-}
+    const_float64( 0x3ff0000000000000ll ), /*  1 */
+    const_float64( 0x3fe0000000000000ll ), /*  2 */
+    const_float64( 0x3fc5555555555555ll ), /*  3 */
+    const_float64( 0x3fa5555555555555ll ), /*  4 */
+    const_float64( 0x3f81111111111111ll ), /*  5 */
+    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
+    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
+    const_float64( 0x3efa01a01a01a01all ), /*  8 */
+    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
+    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+    const_float64( 0x3de6124613a86d09ll ), /* 13 */
+    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
+    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+};
 
-/*----------------------------------------------------------------------------
-| Returns the result of multiplying the quadruple-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+float32 float32_exp2(float32 a, float_status *status)
+{
+    FloatParts64 xp, xnp, tp, rp;
+    int i;
 
-float128 float128_mul(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    bSign = extractFloat128Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if (    ( aSig0 | aSig1 )
-             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
-            float_raise(float_flag_invalid, status);
-            return float128_default_nan(status);
+    float32_unpack_canonical(&xp, a, status);
+    if (unlikely(xp.cls != float_class_normal)) {
+        switch (xp.cls) {
+        case float_class_snan:
+        case float_class_qnan:
+            parts_return_nan(&xp, status);
+            return float32_round_pack_canonical(&xp, status);
+        case float_class_inf:
+            return xp.sign ? float32_zero : a;
+        case float_class_zero:
+            return float32_one;
+        default:
+            break;
         }
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    if ( bExp == 0 ) {
-        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
-    }
-    zExp = aExp + bExp - 0x4000;
-    aSig0 |= UINT64_C(0x0001000000000000);
-    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
-    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
-    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
-    zSig2 |= ( zSig3 != 0 );
-    if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
-        shift128ExtraRightJamming(
-            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
-        ++zExp;
+        g_assert_not_reached();
     }
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
 
-}
+    float_raise(float_flag_inexact, status);
 
-/*----------------------------------------------------------------------------
-| Returns the result of dividing the quadruple-precision floating-point value
-| `a' by the corresponding value `b'.  The operation is performed according to
-| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
+    float64_unpack_canonical(&tp, float64_ln2, status);
+    xp = *parts_mul(&xp, &tp, status);
+    xnp = xp;
 
-float128 float128_div(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign, zSign;
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    bSign = extractFloat128Sign( b );
-    zSign = aSign ^ bSign;
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        if ( bExp == 0x7FFF ) {
-            if (bSig0 | bSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            goto invalid;
-        }
-        return packFloat128( zSign, 0x7FFF, 0, 0 );
-    }
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return packFloat128( zSign, 0, 0, 0 );
-    }
-    if ( bExp == 0 ) {
-        if ( ( bSig0 | bSig1 ) == 0 ) {
-            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
- invalid:
-                float_raise(float_flag_invalid, status);
-                return float128_default_nan(status);
-            }
-            float_raise(float_flag_divbyzero, status);
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    zExp = aExp - bExp + 0x3FFD;
-    shortShift128Left(
-        aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
-    shortShift128Left(
-        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
-    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
-        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
-        ++zExp;
-    }
-    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
-    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
-    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
-    while ( (int64_t) rem0 < 0 ) {
-        --zSig0;
-        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
-    }
-    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
-    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
-        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
-        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (int64_t) rem1 < 0 ) {
-            --zSig1;
-            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
-        }
-        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    float64_unpack_canonical(&rp, float64_one, status);
+    for (i = 0 ; i < 15 ; i++) {
+        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
+        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
+        xnp = *parts_mul(&xnp, &xp, status);
     }
-    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
 
-/*----------------------------------------------------------------------------
-| Returns the remainder of the quadruple-precision floating-point value `a'
-| with respect to the corresponding value `b'.  The operation is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_rem(float128 a, float128 b, float_status *status)
-{
-    bool aSign, zSign;
-    int32_t aExp, bExp, expDiff;
-    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
-    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
-    int64_t sigMean0;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    if ( aExp == 0x7FFF ) {
-        if (    ( aSig0 | aSig1 )
-             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        goto invalid;
-    }
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        if ( ( bSig0 | bSig1 ) == 0 ) {
- invalid:
-            float_raise(float_flag_invalid, status);
-            return float128_default_nan(status);
-        }
-        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return a;
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    expDiff = aExp - bExp;
-    if ( expDiff < -1 ) return a;
-    shortShift128Left(
-        aSig0 | UINT64_C(0x0001000000000000),
-        aSig1,
-        15 - ( expDiff < 0 ),
-        &aSig0,
-        &aSig1
-    );
-    shortShift128Left(
-        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
-    q = le128( bSig0, bSig1, aSig0, aSig1 );
-    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
-    expDiff -= 64;
-    while ( 0 < expDiff ) {
-        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
-        q = ( 4 < q ) ? q - 4 : 0;
-        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
-        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
-        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
-        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
-        expDiff -= 61;
-    }
-    if ( -64 < expDiff ) {
-        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
-        q = ( 4 < q ) ? q - 4 : 0;
-        q >>= - expDiff;
-        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
-        expDiff += 52;
-        if ( expDiff < 0 ) {
-            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
-        }
-        else {
-            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
-        }
-        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
-        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
-    }
-    else {
-        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
-        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
-    }
-    do {
-        alternateASig0 = aSig0;
-        alternateASig1 = aSig1;
-        ++q;
-        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
-    } while ( 0 <= (int64_t) aSig0 );
-    add128(
-        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
-    if (    ( sigMean0 < 0 )
-         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
-        aSig0 = alternateASig0;
-        aSig1 = alternateASig1;
-    }
-    zSign = ( (int64_t) aSig0 < 0 );
-    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
-    return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
-                                         status);
+    return float32_round_pack_canonical(&rp, status);
 }
 
 /*----------------------------------------------------------------------------
-| Returns the square root of the quadruple-precision floating-point value `a'.
+| Rounds the extended double-precision floating-point value `a'
+| to the precision provided by floatx80_rounding_precision and returns the
+| result as an extended double-precision floating-point value.
 | The operation is performed according to the IEC/IEEE Standard for Binary
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-float128 float128_sqrt(float128 a, float_status *status)
-{
-    bool aSign;
-    int32_t aExp, zExp;
-    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
-    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, a, status);
-        }
-        if ( ! aSign ) return a;
-        goto invalid;
-    }
-    if ( aSign ) {
-        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
- invalid:
-        float_raise(float_flag_invalid, status);
-        return float128_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
-        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
-    }
-    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
-    aSig0 |= UINT64_C(0x0001000000000000);
-    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
-    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
-    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
-    doubleZSig0 = zSig0<<1;
-    mul64To128( zSig0, zSig0, &term0, &term1 );
-    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
-    while ( (int64_t) rem0 < 0 ) {
-        --zSig0;
-        doubleZSig0 -= 2;
-        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
-    }
-    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
-    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
-        if ( zSig1 == 0 ) zSig1 = 1;
-        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
-        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
-        mul64To128( zSig1, zSig1, &term2, &term3 );
-        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
-        while ( (int64_t) rem1 < 0 ) {
-            --zSig1;
-            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
-            term3 |= 1;
-            term2 |= doubleZSig0;
-            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
-        }
-        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
-    }
-    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
-    return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-static inline FloatRelation
-floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
-                          float_status *status)
-{
-    bool aSign, bSign;
-
-    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
-        float_raise(float_flag_invalid, status);
-        return float_relation_unordered;
-    }
-    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
-          ( extractFloatx80Frac( a )<<1 ) ) ||
-        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
-          ( extractFloatx80Frac( b )<<1 ) )) {
-        if (!is_quiet ||
-            floatx80_is_signaling_nan(a, status) ||
-            floatx80_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return float_relation_unordered;
-    }
-    aSign = extractFloatx80Sign( a );
-    bSign = extractFloatx80Sign( b );
-    if ( aSign != bSign ) {
-
-        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
-             ( ( a.low | b.low ) == 0 ) ) {
-            /* zero case */
-            return float_relation_equal;
-        } else {
-            return 1 - (2 * aSign);
-        }
-    } else {
-        /* Normalize pseudo-denormals before comparison.  */
-        if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
-            ++a.high;
-        }
-        if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
-            ++b.high;
-        }
-        if (a.low == b.low && a.high == b.high) {
-            return float_relation_equal;
-        } else {
-            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
-        }
-    }
-}
-
-FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
-{
-    return floatx80_compare_internal(a, b, 0, status);
-}
-
-FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
-                                     float_status *status)
-{
-    return floatx80_compare_internal(a, b, 1, status);
-}
-
-static inline FloatRelation
-float128_compare_internal(float128 a, float128 b, bool is_quiet,
-                          float_status *status)
-{
-    bool aSign, bSign;
-
-    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
-          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
-        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
-          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
-        if (!is_quiet ||
-            float128_is_signaling_nan(a, status) ||
-            float128_is_signaling_nan(b, status)) {
-            float_raise(float_flag_invalid, status);
-        }
-        return float_relation_unordered;
-    }
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign != bSign ) {
-        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
-            /* zero case */
-            return float_relation_equal;
-        } else {
-            return 1 - (2 * aSign);
-        }
-    } else {
-        if (a.low == b.low && a.high == b.high) {
-            return float_relation_equal;
-        } else {
-            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
-        }
-    }
-}
-
-FloatRelation float128_compare(float128 a, float128 b, float_status *status)
-{
-    return float128_compare_internal(a, b, 0, status);
-}
-
-FloatRelation float128_compare_quiet(float128 a, float128 b,
-                                     float_status *status)
-{
-    return float128_compare_internal(a, b, 1, status);
-}
-
-floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
+floatx80 floatx80_round(floatx80 a, float_status *status)
 {
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig;
+    FloatParts128 p;
 
-    if (floatx80_invalid_encoding(a)) {
-        float_raise(float_flag_invalid, status);
+    if (!floatx80_unpack_canonical(&p, a, status)) {
         return floatx80_default_nan(status);
     }
-    aSig = extractFloatx80Frac( a );
-    aExp = extractFloatx80Exp( a );
-    aSign = extractFloatx80Sign( a );
-
-    if ( aExp == 0x7FFF ) {
-        if ( aSig<<1 ) {
-            return propagateFloatx80NaN(a, a, status);
-        }
-        return a;
-    }
-
-    if (aExp == 0) {
-        if (aSig == 0) {
-            return a;
-        }
-        aExp++;
-    }
-
-    if (n > 0x10000) {
-        n = 0x10000;
-    } else if (n < -0x10000) {
-        n = -0x10000;
-    }
-
-    aExp += n;
-    return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
-                                         aSign, aExp, aSig, 0, status);
-}
-
-float128 float128_scalbn(float128 a, int n, float_status *status)
-{
-    bool aSign;
-    int32_t aExp;
-    uint64_t aSig0, aSig1;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    aSign = extractFloat128Sign( a );
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 ) {
-            return propagateFloat128NaN(a, a, status);
-        }
-        return a;
-    }
-    if (aExp != 0) {
-        aSig0 |= UINT64_C(0x0001000000000000);
-    } else if (aSig0 == 0 && aSig1 == 0) {
-        return a;
-    } else {
-        aExp++;
-    }
-
-    if (n > 0x10000) {
-        n = 0x10000;
-    } else if (n < -0x10000) {
-        n = -0x10000;
-    }
-
-    aExp += n - 1;
-    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
-                                         , status);
-
+    return floatx80_round_pack_canonical(&p, status);
 }
 
 static void __attribute__((constructor)) softfloat_init(void)
diff --git a/fsdev/9p-marshal.c b/fsdev/9p-marshal.c
index a01bba6908a..51881fe2201 100644
--- a/fsdev/9p-marshal.c
+++ b/fsdev/9p-marshal.c
@@ -18,6 +18,8 @@
 
 #include "9p-marshal.h"
 
+P9ARRAY_DEFINE_TYPE(V9fsString, v9fs_string_free);
+
 void v9fs_string_free(V9fsString *str)
 {
     g_free(str->data);
diff --git a/fsdev/9p-marshal.h b/fsdev/9p-marshal.h
index ceaf2f521ec..f1abbe151c3 100644
--- a/fsdev/9p-marshal.h
+++ b/fsdev/9p-marshal.h
@@ -1,10 +1,13 @@
 #ifndef QEMU_9P_MARSHAL_H
 #define QEMU_9P_MARSHAL_H
 
+#include "p9array.h"
+
 typedef struct V9fsString {
     uint16_t size;
     char *data;
 } V9fsString;
+P9ARRAY_DECLARE_TYPE(V9fsString);
 
 typedef struct V9fsQID {
     uint8_t type;
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index d9d058b756d..16c1a9d9fed 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -18,6 +18,7 @@
 #include <utime.h>
 #include "qemu/statfs.h"
 #include "qemu-fsdev-throttle.h"
+#include "p9array.h"
 
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700
@@ -105,6 +106,7 @@ struct V9fsPath {
     uint16_t size;
     char *data;
 };
+P9ARRAY_DECLARE_TYPE(V9fsPath);
 
 typedef union V9fsFidOpenState V9fsFidOpenState;
 
diff --git a/fsdev/p9array.h b/fsdev/p9array.h
new file mode 100644
index 00000000000..6aa25327ca3
--- /dev/null
+++ b/fsdev/p9array.h
@@ -0,0 +1,160 @@
+/*
+ * P9Array - deep auto free C-array
+ *
+ * Copyright (c) 2021 Crudebyte
+ *
+ * Authors:
+ *   Christian Schoenebeck <qemu_oss@crudebyte.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_P9ARRAY_H
+#define QEMU_P9ARRAY_H
+
+#include "qemu/compiler.h"
+
+/**
+ * P9Array provides a mechanism to access arrays in common C-style (e.g. by
+ * square bracket [] operator) in conjunction with reference variables that
+ * perform deep auto free of the array when leaving the scope of the auto
+ * reference variable. That means not only is the array itself automatically
+ * freed, but also memory dynamically allocated by the individual array
+ * elements.
+ *
+ * Example:
+ *
+ * Consider the following user struct @c Foo which shall be used as scalar
+ * (element) type of an array:
+ * @code
+ * typedef struct Foo {
+ *     int i;
+ *     char *s;
+ * } Foo;
+ * @endcode
+ * and assume it has the following function to free memory allocated by @c Foo
+ * instances:
+ * @code
+ * void free_foo(Foo *foo) {
+ *     free(foo->s);
+ * }
+ * @endcode
+ * Add the following to a shared header file:
+ * @code
+ * P9ARRAY_DECLARE_TYPE(Foo);
+ * @endcode
+ * and the following to a C unit file:
+ * @code
+ * P9ARRAY_DEFINE_TYPE(Foo, free_foo);
+ * @endcode
+ * Finally the array may then be used like this:
+ * @code
+ * void doSomething(size_t n) {
+ *     P9ARRAY_REF(Foo) foos = NULL;
+ *     P9ARRAY_NEW(Foo, foos, n);
+ *     for (size_t i = 0; i < n; ++i) {
+ *         foos[i].i = i;
+ *         foos[i].s = calloc(4096, 1);
+ *         snprintf(foos[i].s, 4096, "foo %d", i);
+ *         if (...) {
+ *             return; // array auto freed here
+ *         }
+ *     }
+ *     // array auto freed here
+ * }
+ * @endcode
+ */
+
+/**
+ * Declares an array type for the passed @a scalar_type.
+ *
+ * This is typically used from a shared header file.
+ *
+ * @param scalar_type - type of the individual array elements
+ */
+#define P9ARRAY_DECLARE_TYPE(scalar_type) \
+    typedef struct P9Array##scalar_type { \
+        size_t len; \
+        scalar_type first[]; \
+    } P9Array##scalar_type; \
+    \
+    void p9array_new_##scalar_type(scalar_type **auto_var, size_t len); \
+    void p9array_auto_free_##scalar_type(scalar_type **auto_var); \
+
+/**
+ * Defines an array type for the passed @a scalar_type and appropriate
+ * @a scalar_cleanup_func.
+ *
+ * This is typically used from a C unit file.
+ *
+ * @param scalar_type - type of the individual array elements
+ * @param scalar_cleanup_func - appropriate function to free memory dynamically
+ *                              allocated by individual array elements before
+ */
+#define P9ARRAY_DEFINE_TYPE(scalar_type, scalar_cleanup_func) \
+    void p9array_new_##scalar_type(scalar_type **auto_var, size_t len) \
+    { \
+        p9array_auto_free_##scalar_type(auto_var); \
+        P9Array##scalar_type *arr = g_malloc0(sizeof(P9Array##scalar_type) + \
+            len * sizeof(scalar_type)); \
+        arr->len = len; \
+        *auto_var = &arr->first[0]; \
+    } \
+    \
+    void p9array_auto_free_##scalar_type(scalar_type **auto_var) \
+    { \
+        scalar_type *first = (*auto_var); \
+        if (!first) { \
+            return; \
+        } \
+        P9Array##scalar_type *arr = (P9Array##scalar_type *) ( \
+            ((char *)first) - offsetof(P9Array##scalar_type, first) \
+        ); \
+        for (size_t i = 0; i < arr->len; ++i) { \
+            scalar_cleanup_func(&arr->first[i]); \
+        } \
+        g_free(arr); \
+    } \
+
+/**
+ * Used to declare a reference variable (unique pointer) for an array. After
+ * leaving the scope of the reference variable, the associated array is
+ * automatically freed.
+ *
+ * @param scalar_type - type of the individual array elements
+ */
+#define P9ARRAY_REF(scalar_type) \
+    __attribute((__cleanup__(p9array_auto_free_##scalar_type))) scalar_type*
+
+/**
+ * Allocates a new array of passed @a scalar_type with @a len number of array
+ * elements and assigns the created array to the reference variable
+ * @a auto_var.
+ *
+ * @param scalar_type - type of the individual array elements
+ * @param auto_var - destination reference variable
+ * @param len - amount of array elements to be allocated immediately
+ */
+#define P9ARRAY_NEW(scalar_type, auto_var, len) \
+    QEMU_BUILD_BUG_MSG( \
+        !__builtin_types_compatible_p(scalar_type, typeof(*auto_var)), \
+        "P9Array scalar type mismatch" \
+    ); \
+    p9array_new_##scalar_type((&auto_var), len)
+
+#endif /* QEMU_P9ARRAY_H */
diff --git a/gdb-xml/arm-m-profile-mve.xml b/gdb-xml/arm-m-profile-mve.xml
new file mode 100644
index 00000000000..cba664c4c5b
--- /dev/null
+++ b/gdb-xml/arm-m-profile-mve.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2021 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.arm.m-profile-mve">
+  <flags id="vpr_reg" size="4">
+    <!-- ARMv8.1-M and MVE: Unprivileged and privileged Access.  -->
+    <field name="P0" start="0" end="15"/>
+    <!-- ARMv8.1-M: Privileged Access only.  -->
+    <field name="MASK01" start="16" end="19"/>
+    <!-- ARMv8.1-M: Privileged Access only.  -->
+    <field name="MASK23" start="20" end="23"/>
+  </flags>
+  <reg name="vpr" bitsize="32" type="vpr_reg"/>
+</feature>
diff --git a/gdb-xml/arm-neon.xml b/gdb-xml/arm-neon.xml
index ce3ee03ec48..9dce0a996fc 100644
--- a/gdb-xml/arm-neon.xml
+++ b/gdb-xml/arm-neon.xml
@@ -82,7 +82,5 @@
   <reg name="q14" bitsize="128" type="neon_q"/>
   <reg name="q15" bitsize="128" type="neon_q"/>
 
-  <reg name="fpsid" bitsize="32" type="int" group="float"/>
   <reg name="fpscr" bitsize="32" type="int" group="float"/>
-  <reg name="fpexc" bitsize="32" type="int" group="float"/>
 </feature>
diff --git a/gdb-xml/arm-vfp-sysregs.xml b/gdb-xml/arm-vfp-sysregs.xml
new file mode 100644
index 00000000000..c4aa2721c8d
--- /dev/null
+++ b/gdb-xml/arm-vfp-sysregs.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2021 Linaro Ltd.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.
+
+     These are A/R profile VFP system registers. Debugger users probably
+     don't really care about these, but because we used to (incorrectly)
+     provide them to gdb in the org.gnu.gdb.arm.vfp XML we continue
+     to do so via this separate XML.
+     -->
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.qemu.gdb.arm.vfp-sysregs">
+  <reg name="fpsid" bitsize="32" type="int" group="float"/>
+  <reg name="fpexc" bitsize="32" type="int" group="float"/>
+</feature>
diff --git a/gdb-xml/arm-vfp.xml b/gdb-xml/arm-vfp.xml
index b20881e9a99..ebed5b3d573 100644
--- a/gdb-xml/arm-vfp.xml
+++ b/gdb-xml/arm-vfp.xml
@@ -23,7 +23,5 @@
   <reg name="d14" bitsize="64" type="float"/>
   <reg name="d15" bitsize="64" type="float"/>
 
-  <reg name="fpsid" bitsize="32" type="int" group="float"/>
   <reg name="fpscr" bitsize="32" type="int" group="float"/>
-  <reg name="fpexc" bitsize="32" type="int" group="float"/>
 </feature>
diff --git a/gdb-xml/arm-vfp3.xml b/gdb-xml/arm-vfp3.xml
index 227afd8017f..ef391c7144d 100644
--- a/gdb-xml/arm-vfp3.xml
+++ b/gdb-xml/arm-vfp3.xml
@@ -39,7 +39,5 @@
   <reg name="d30" bitsize="64" type="float"/>
   <reg name="d31" bitsize="64" type="float"/>
 
-  <reg name="fpsid" bitsize="32" type="int" group="float"/>
   <reg name="fpscr" bitsize="32" type="int" group="float"/>
-  <reg name="fpexc" bitsize="32" type="int" group="float"/>
 </feature>
diff --git a/gdbstub.c b/gdbstub.c
index 19769539ba1..9e0fd0f6d99 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -31,14 +31,13 @@
 #include "qemu/cutils.h"
 #include "qemu/module.h"
 #include "trace/trace-root.h"
+#include "exec/gdbstub.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 #else
 #include "monitor/monitor.h"
 #include "chardev/char.h"
 #include "chardev/char-fe.h"
-#include "sysemu/sysemu.h"
-#include "exec/gdbstub.h"
 #include "hw/cpu/cluster.h"
 #include "hw/boards.h"
 #endif
@@ -103,7 +102,7 @@ static inline int cpu_gdb_index(CPUState *cpu)
 {
 #if defined(CONFIG_USER_ONLY)
     TaskState *ts = (TaskState *) cpu->opaque;
-    return ts->ts_tid;
+    return ts ? ts->ts_tid : -1;
 #else
     return cpu->cpu_index + 1;
 #endif
@@ -474,6 +473,15 @@ int use_gdb_syscalls(void)
     return gdb_syscall_mode == GDB_SYS_ENABLED;
 }
 
+static bool stub_can_reverse(void)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;
+#else
+    return replay_mode == REPLAY_MODE_PLAY;
+#endif
+}
+
 /* Resume execution.  */
 static inline void gdb_continue(void)
 {
@@ -1347,6 +1355,8 @@ typedef union GdbCmdVariant {
     } thread_id;
 } GdbCmdVariant;
 
+#define get_param(p, i)    (&g_array_index(p, GdbCmdVariant, i))
+
 static const char *cmd_next_param(const char *param, const char delimiter)
 {
     static const char all_delimiters[] = ",;:=";
@@ -1372,55 +1382,52 @@ static const char *cmd_next_param(const char *param, const char delimiter)
 }
 
 static int cmd_parse_params(const char *data, const char *schema,
-                            GdbCmdVariant *params, int *num_params)
+                            GArray *params)
 {
-    int curr_param;
     const char *curr_schema, *curr_data;
 
-    *num_params = 0;
-
-    if (!schema) {
-        return 0;
-    }
+    g_assert(schema);
+    g_assert(params->len == 0);
 
     curr_schema = schema;
-    curr_param = 0;
     curr_data = data;
     while (curr_schema[0] && curr_schema[1] && *curr_data) {
+        GdbCmdVariant this_param;
+
         switch (curr_schema[0]) {
         case 'l':
             if (qemu_strtoul(curr_data, &curr_data, 16,
-                             &params[curr_param].val_ul)) {
+                             &this_param.val_ul)) {
                 return -EINVAL;
             }
-            curr_param++;
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
+            g_array_append_val(params, this_param);
             break;
         case 'L':
             if (qemu_strtou64(curr_data, &curr_data, 16,
-                              (uint64_t *)&params[curr_param].val_ull)) {
+                              (uint64_t *)&this_param.val_ull)) {
                 return -EINVAL;
             }
-            curr_param++;
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
+            g_array_append_val(params, this_param);
             break;
         case 's':
-            params[curr_param].data = curr_data;
-            curr_param++;
+            this_param.data = curr_data;
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
+            g_array_append_val(params, this_param);
             break;
         case 'o':
-            params[curr_param].opcode = *(uint8_t *)curr_data;
-            curr_param++;
+            this_param.opcode = *(uint8_t *)curr_data;
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
+            g_array_append_val(params, this_param);
             break;
         case 't':
-            params[curr_param].thread_id.kind =
+            this_param.thread_id.kind =
                 read_thread_id(curr_data, &curr_data,
-                               &params[curr_param].thread_id.pid,
-                               &params[curr_param].thread_id.tid);
-            curr_param++;
+                               &this_param.thread_id.pid,
+                               &this_param.thread_id.tid);
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
+            g_array_append_val(params, this_param);
             break;
         case '?':
             curr_data = cmd_next_param(curr_data, curr_schema[1]);
@@ -1431,16 +1438,10 @@ static int cmd_parse_params(const char *data, const char *schema,
         curr_schema += 2;
     }
 
-    *num_params = curr_param;
     return 0;
 }
 
-typedef struct GdbCmdContext {
-    GdbCmdVariant *params;
-    int num_params;
-} GdbCmdContext;
-
-typedef void (*GdbCmdHandler)(GdbCmdContext *gdb_ctx, void *user_ctx);
+typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx);
 
 /*
  * cmd_startswith -> cmd is compared using startswith
@@ -1480,8 +1481,8 @@ static inline int startswith(const char *string, const char *pattern)
 static int process_string_cmd(void *user_ctx, const char *data,
                               const GdbCmdParseEntry *cmds, int num_cmds)
 {
-    int i, schema_len, max_num_params = 0;
-    GdbCmdContext gdb_ctx;
+    int i;
+    g_autoptr(GArray) params = g_array_new(false, true, sizeof(GdbCmdVariant));
 
     if (!cmds) {
         return -1;
@@ -1497,24 +1498,13 @@ static int process_string_cmd(void *user_ctx, const char *data,
         }
 
         if (cmd->schema) {
-            schema_len = strlen(cmd->schema);
-            if (schema_len % 2) {
-                return -2;
+            if (cmd_parse_params(&data[strlen(cmd->cmd)],
+                                 cmd->schema, params)) {
+                return -1;
             }
-
-            max_num_params = schema_len / 2;
         }
 
-        gdb_ctx.params =
-            (GdbCmdVariant *)alloca(sizeof(*gdb_ctx.params) * max_num_params);
-        memset(gdb_ctx.params, 0, sizeof(*gdb_ctx.params) * max_num_params);
-
-        if (cmd_parse_params(&data[strlen(cmd->cmd)], cmd->schema,
-                             gdb_ctx.params, &gdb_ctx.num_params)) {
-            return -1;
-        }
-
-        cmd->handler(&gdb_ctx, user_ctx);
+        cmd->handler(params, user_ctx);
         return 0;
     }
 
@@ -1537,18 +1527,18 @@ static void run_cmd_parser(const char *data, const GdbCmdParseEntry *cmd)
     }
 }
 
-static void handle_detach(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_detach(GArray *params, void *user_ctx)
 {
     GDBProcess *process;
     uint32_t pid = 1;
 
     if (gdbserver_state.multiprocess) {
-        if (!gdb_ctx->num_params) {
+        if (!params->len) {
             put_packet("E22");
             return;
         }
 
-        pid = gdb_ctx->params[0].val_ul;
+        pid = get_param(params, 0)->val_ul;
     }
 
     process = gdb_get_process(pid);
@@ -1571,22 +1561,22 @@ static void handle_detach(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("OK");
 }
 
-static void handle_thread_alive(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_thread_alive(GArray *params, void *user_ctx)
 {
     CPUState *cpu;
 
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         put_packet("E22");
         return;
     }
 
-    if (gdb_ctx->params[0].thread_id.kind == GDB_READ_THREAD_ERR) {
+    if (get_param(params, 0)->thread_id.kind == GDB_READ_THREAD_ERR) {
         put_packet("E22");
         return;
     }
 
-    cpu = gdb_get_cpu(gdb_ctx->params[0].thread_id.pid,
-                      gdb_ctx->params[0].thread_id.tid);
+    cpu = gdb_get_cpu(get_param(params, 0)->thread_id.pid,
+                      get_param(params, 0)->thread_id.tid);
     if (!cpu) {
         put_packet("E22");
         return;
@@ -1595,17 +1585,17 @@ static void handle_thread_alive(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("OK");
 }
 
-static void handle_continue(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_continue(GArray *params, void *user_ctx)
 {
-    if (gdb_ctx->num_params) {
-        gdb_set_cpu_pc(gdb_ctx->params[0].val_ull);
+    if (params->len) {
+        gdb_set_cpu_pc(get_param(params, 0)->val_ull);
     }
 
     gdbserver_state.signal = 0;
     gdb_continue();
 }
 
-static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_cont_with_sig(GArray *params, void *user_ctx)
 {
     unsigned long signal = 0;
 
@@ -1613,8 +1603,8 @@ static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx)
      * Note: C sig;[addr] is currently unsupported and we simply
      *       omit the addr parameter
      */
-    if (gdb_ctx->num_params) {
-        signal = gdb_ctx->params[0].val_ul;
+    if (params->len) {
+        signal = get_param(params, 0)->val_ul;
     }
 
     gdbserver_state.signal = gdb_signal_to_target(signal);
@@ -1624,27 +1614,27 @@ static void handle_cont_with_sig(GdbCmdContext *gdb_ctx, void *user_ctx)
     gdb_continue();
 }
 
-static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_set_thread(GArray *params, void *user_ctx)
 {
     CPUState *cpu;
 
-    if (gdb_ctx->num_params != 2) {
+    if (params->len != 2) {
         put_packet("E22");
         return;
     }
 
-    if (gdb_ctx->params[1].thread_id.kind == GDB_READ_THREAD_ERR) {
+    if (get_param(params, 1)->thread_id.kind == GDB_READ_THREAD_ERR) {
         put_packet("E22");
         return;
     }
 
-    if (gdb_ctx->params[1].thread_id.kind != GDB_ONE_THREAD) {
+    if (get_param(params, 1)->thread_id.kind != GDB_ONE_THREAD) {
         put_packet("OK");
         return;
     }
 
-    cpu = gdb_get_cpu(gdb_ctx->params[1].thread_id.pid,
-                      gdb_ctx->params[1].thread_id.tid);
+    cpu = gdb_get_cpu(get_param(params, 1)->thread_id.pid,
+                      get_param(params, 1)->thread_id.tid);
     if (!cpu) {
         put_packet("E22");
         return;
@@ -1654,7 +1644,7 @@ static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx)
      * Note: This command is deprecated and modern gdb's will be using the
      *       vCont command instead.
      */
-    switch (gdb_ctx->params[0].opcode) {
+    switch (get_param(params, 0)->opcode) {
     case 'c':
         gdbserver_state.c_cpu = cpu;
         put_packet("OK");
@@ -1669,18 +1659,18 @@ static void handle_set_thread(GdbCmdContext *gdb_ctx, void *user_ctx)
     }
 }
 
-static void handle_insert_bp(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_insert_bp(GArray *params, void *user_ctx)
 {
     int res;
 
-    if (gdb_ctx->num_params != 3) {
+    if (params->len != 3) {
         put_packet("E22");
         return;
     }
 
-    res = gdb_breakpoint_insert(gdb_ctx->params[0].val_ul,
-                                gdb_ctx->params[1].val_ull,
-                                gdb_ctx->params[2].val_ull);
+    res = gdb_breakpoint_insert(get_param(params, 0)->val_ul,
+                                get_param(params, 1)->val_ull,
+                                get_param(params, 2)->val_ull);
     if (res >= 0) {
         put_packet("OK");
         return;
@@ -1692,18 +1682,18 @@ static void handle_insert_bp(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("E22");
 }
 
-static void handle_remove_bp(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_remove_bp(GArray *params, void *user_ctx)
 {
     int res;
 
-    if (gdb_ctx->num_params != 3) {
+    if (params->len != 3) {
         put_packet("E22");
         return;
     }
 
-    res = gdb_breakpoint_remove(gdb_ctx->params[0].val_ul,
-                                gdb_ctx->params[1].val_ull,
-                                gdb_ctx->params[2].val_ull);
+    res = gdb_breakpoint_remove(get_param(params, 0)->val_ul,
+                                get_param(params, 1)->val_ull,
+                                get_param(params, 2)->val_ull);
     if (res >= 0) {
         put_packet("OK");
         return;
@@ -1726,7 +1716,7 @@ static void handle_remove_bp(GdbCmdContext *gdb_ctx, void *user_ctx)
  * the remote gdb to fallback to older methods.
  */
 
-static void handle_set_reg(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_set_reg(GArray *params, void *user_ctx)
 {
     int reg_size;
 
@@ -1735,19 +1725,19 @@ static void handle_set_reg(GdbCmdContext *gdb_ctx, void *user_ctx)
         return;
     }
 
-    if (gdb_ctx->num_params != 2) {
+    if (params->len != 2) {
         put_packet("E22");
         return;
     }
 
-    reg_size = strlen(gdb_ctx->params[1].data) / 2;
-    hextomem(gdbserver_state.mem_buf, gdb_ctx->params[1].data, reg_size);
+    reg_size = strlen(get_param(params, 1)->data) / 2;
+    hextomem(gdbserver_state.mem_buf, get_param(params, 1)->data, reg_size);
     gdb_write_register(gdbserver_state.g_cpu, gdbserver_state.mem_buf->data,
-                       gdb_ctx->params[0].val_ull);
+                       get_param(params, 0)->val_ull);
     put_packet("OK");
 }
 
-static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_get_reg(GArray *params, void *user_ctx)
 {
     int reg_size;
 
@@ -1756,14 +1746,14 @@ static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx)
         return;
     }
 
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         put_packet("E14");
         return;
     }
 
     reg_size = gdb_read_register(gdbserver_state.g_cpu,
                                  gdbserver_state.mem_buf,
-                                 gdb_ctx->params[0].val_ull);
+                                 get_param(params, 0)->val_ull);
     if (!reg_size) {
         put_packet("E14");
         return;
@@ -1775,22 +1765,24 @@ static void handle_get_reg(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_write_mem(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_write_mem(GArray *params, void *user_ctx)
 {
-    if (gdb_ctx->num_params != 3) {
+    if (params->len != 3) {
         put_packet("E22");
         return;
     }
 
     /* hextomem() reads 2*len bytes */
-    if (gdb_ctx->params[1].val_ull > strlen(gdb_ctx->params[2].data) / 2) {
+    if (get_param(params, 1)->val_ull >
+        strlen(get_param(params, 2)->data) / 2) {
         put_packet("E22");
         return;
     }
 
-    hextomem(gdbserver_state.mem_buf, gdb_ctx->params[2].data,
-             gdb_ctx->params[1].val_ull);
-    if (target_memory_rw_debug(gdbserver_state.g_cpu, gdb_ctx->params[0].val_ull,
+    hextomem(gdbserver_state.mem_buf, get_param(params, 2)->data,
+             get_param(params, 1)->val_ull);
+    if (target_memory_rw_debug(gdbserver_state.g_cpu,
+                               get_param(params, 0)->val_ull,
                                gdbserver_state.mem_buf->data,
                                gdbserver_state.mem_buf->len, true)) {
         put_packet("E14");
@@ -1800,22 +1792,24 @@ static void handle_write_mem(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("OK");
 }
 
-static void handle_read_mem(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_read_mem(GArray *params, void *user_ctx)
 {
-    if (gdb_ctx->num_params != 2) {
+    if (params->len != 2) {
         put_packet("E22");
         return;
     }
 
     /* memtohex() doubles the required space */
-    if (gdb_ctx->params[1].val_ull > MAX_PACKET_LENGTH / 2) {
+    if (get_param(params, 1)->val_ull > MAX_PACKET_LENGTH / 2) {
         put_packet("E22");
         return;
     }
 
-    g_byte_array_set_size(gdbserver_state.mem_buf, gdb_ctx->params[1].val_ull);
+    g_byte_array_set_size(gdbserver_state.mem_buf,
+                          get_param(params, 1)->val_ull);
 
-    if (target_memory_rw_debug(gdbserver_state.g_cpu, gdb_ctx->params[0].val_ull,
+    if (target_memory_rw_debug(gdbserver_state.g_cpu,
+                               get_param(params, 0)->val_ull,
                                gdbserver_state.mem_buf->data,
                                gdbserver_state.mem_buf->len, false)) {
         put_packet("E14");
@@ -1827,19 +1821,19 @@ static void handle_read_mem(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_write_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_write_all_regs(GArray *params, void *user_ctx)
 {
     target_ulong addr, len;
     uint8_t *registers;
     int reg_size;
 
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
     cpu_synchronize_state(gdbserver_state.g_cpu);
-    len = strlen(gdb_ctx->params[0].data) / 2;
-    hextomem(gdbserver_state.mem_buf, gdb_ctx->params[0].data, len);
+    len = strlen(get_param(params, 0)->data) / 2;
+    hextomem(gdbserver_state.mem_buf, get_param(params, 0)->data, len);
     registers = gdbserver_state.mem_buf->data;
     for (addr = 0; addr < gdbserver_state.g_cpu->gdb_num_g_regs && len > 0;
          addr++) {
@@ -1850,7 +1844,7 @@ static void handle_write_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("OK");
 }
 
-static void handle_read_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_read_all_regs(GArray *params, void *user_ctx)
 {
     target_ulong addr, len;
 
@@ -1868,14 +1862,14 @@ static void handle_read_all_regs(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_file_io(GArray *params, void *user_ctx)
 {
-    if (gdb_ctx->num_params >= 1 && gdbserver_state.current_syscall_cb) {
+    if (params->len >= 1 && gdbserver_state.current_syscall_cb) {
         target_ulong ret, err;
 
-        ret = (target_ulong)gdb_ctx->params[0].val_ull;
-        if (gdb_ctx->num_params >= 2) {
-            err = (target_ulong)gdb_ctx->params[1].val_ull;
+        ret = (target_ulong)get_param(params, 0)->val_ull;
+        if (params->len >= 2) {
+            err = (target_ulong)get_param(params, 1)->val_ull;
         } else {
             err = 0;
         }
@@ -1883,7 +1877,7 @@ static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx)
         gdbserver_state.current_syscall_cb = NULL;
     }
 
-    if (gdb_ctx->num_params >= 3 && gdb_ctx->params[2].opcode == (uint8_t)'C') {
+    if (params->len >= 3 && get_param(params, 2)->opcode == (uint8_t)'C') {
         put_packet("T02");
         return;
     }
@@ -1891,23 +1885,23 @@ static void handle_file_io(GdbCmdContext *gdb_ctx, void *user_ctx)
     gdb_continue();
 }
 
-static void handle_step(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_step(GArray *params, void *user_ctx)
 {
-    if (gdb_ctx->num_params) {
-        gdb_set_cpu_pc((target_ulong)gdb_ctx->params[0].val_ull);
+    if (params->len) {
+        gdb_set_cpu_pc((target_ulong)get_param(params, 0)->val_ull);
     }
 
     cpu_single_step(gdbserver_state.c_cpu, get_sstep_flags());
     gdb_continue();
 }
 
-static void handle_backward(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_backward(GArray *params, void *user_ctx)
 {
-    if (replay_mode != REPLAY_MODE_PLAY) {
+    if (!stub_can_reverse()) {
         put_packet("E22");
     }
-    if (gdb_ctx->num_params == 1) {
-        switch (gdb_ctx->params[0].opcode) {
+    if (params->len == 1) {
+        switch (get_param(params, 0)->opcode) {
         case 's':
             if (replay_reverse_step()) {
                 gdb_continue();
@@ -1929,20 +1923,20 @@ static void handle_backward(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_packet("");
 }
 
-static void handle_v_cont_query(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_v_cont_query(GArray *params, void *user_ctx)
 {
     put_packet("vCont;c;C;s;S");
 }
 
-static void handle_v_cont(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_v_cont(GArray *params, void *user_ctx)
 {
     int res;
 
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
-    res = gdb_handle_vcont(gdb_ctx->params[0].data);
+    res = gdb_handle_vcont(get_param(params, 0)->data);
     if ((res == -EINVAL) || (res == -ERANGE)) {
         put_packet("E22");
     } else if (res) {
@@ -1950,17 +1944,17 @@ static void handle_v_cont(GdbCmdContext *gdb_ctx, void *user_ctx)
     }
 }
 
-static void handle_v_attach(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_v_attach(GArray *params, void *user_ctx)
 {
     GDBProcess *process;
     CPUState *cpu;
 
     g_string_assign(gdbserver_state.str_buf, "E22");
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         goto cleanup;
     }
 
-    process = gdb_get_process(gdb_ctx->params[0].val_ul);
+    process = gdb_get_process(get_param(params, 0)->val_ul);
     if (!process) {
         goto cleanup;
     }
@@ -1981,7 +1975,7 @@ static void handle_v_attach(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_v_kill(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_v_kill(GArray *params, void *user_ctx)
 {
     /* Kill the target */
     put_packet("OK");
@@ -1990,7 +1984,7 @@ static void handle_v_kill(GdbCmdContext *gdb_ctx, void *user_ctx)
     exit(0);
 }
 
-static GdbCmdParseEntry gdb_v_commands_table[] = {
+static const GdbCmdParseEntry gdb_v_commands_table[] = {
     /* Order is important if has same prefix */
     {
         .handler = handle_v_cont_query,
@@ -2016,43 +2010,43 @@ static GdbCmdParseEntry gdb_v_commands_table[] = {
     },
 };
 
-static void handle_v_commands(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_v_commands(GArray *params, void *user_ctx)
 {
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
-    if (process_string_cmd(NULL, gdb_ctx->params[0].data,
+    if (process_string_cmd(NULL, get_param(params, 0)->data,
                            gdb_v_commands_table,
                            ARRAY_SIZE(gdb_v_commands_table))) {
         put_packet("");
     }
 }
 
-static void handle_query_qemu_sstepbits(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_qemu_sstepbits(GArray *params, void *user_ctx)
 {
     g_string_printf(gdbserver_state.str_buf, "ENABLE=%x,NOIRQ=%x,NOTIMER=%x",
                     SSTEP_ENABLE, SSTEP_NOIRQ, SSTEP_NOTIMER);
     put_strbuf();
 }
 
-static void handle_set_qemu_sstep(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_set_qemu_sstep(GArray *params, void *user_ctx)
 {
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
-    sstep_flags = gdb_ctx->params[0].val_ul;
+    sstep_flags = get_param(params, 0)->val_ul;
     put_packet("OK");
 }
 
-static void handle_query_qemu_sstep(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_qemu_sstep(GArray *params, void *user_ctx)
 {
     g_string_printf(gdbserver_state.str_buf, "0x%x", sstep_flags);
     put_strbuf();
 }
 
-static void handle_query_curr_tid(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_curr_tid(GArray *params, void *user_ctx)
 {
     CPUState *cpu;
     GDBProcess *process;
@@ -2069,7 +2063,7 @@ static void handle_query_curr_tid(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_query_threads(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_threads(GArray *params, void *user_ctx)
 {
     if (!gdbserver_state.query_cpu) {
         put_packet("l");
@@ -2082,25 +2076,25 @@ static void handle_query_threads(GdbCmdContext *gdb_ctx, void *user_ctx)
     gdbserver_state.query_cpu = gdb_next_attached_cpu(gdbserver_state.query_cpu);
 }
 
-static void handle_query_first_threads(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_first_threads(GArray *params, void *user_ctx)
 {
     gdbserver_state.query_cpu = gdb_first_attached_cpu();
-    handle_query_threads(gdb_ctx, user_ctx);
+    handle_query_threads(params, user_ctx);
 }
 
-static void handle_query_thread_extra(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_thread_extra(GArray *params, void *user_ctx)
 {
     g_autoptr(GString) rs = g_string_new(NULL);
     CPUState *cpu;
 
-    if (!gdb_ctx->num_params ||
-        gdb_ctx->params[0].thread_id.kind == GDB_READ_THREAD_ERR) {
+    if (!params->len ||
+        get_param(params, 0)->thread_id.kind == GDB_READ_THREAD_ERR) {
         put_packet("E22");
         return;
     }
 
-    cpu = gdb_get_cpu(gdb_ctx->params[0].thread_id.pid,
-                      gdb_ctx->params[0].thread_id.tid);
+    cpu = gdb_get_cpu(get_param(params, 0)->thread_id.pid,
+                      get_param(params, 0)->thread_id.tid);
     if (!cpu) {
         return;
     }
@@ -2125,7 +2119,7 @@ static void handle_query_thread_extra(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 
 #ifdef CONFIG_USER_ONLY
-static void handle_query_offsets(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_offsets(GArray *params, void *user_ctx)
 {
     TaskState *ts;
 
@@ -2140,17 +2134,17 @@ static void handle_query_offsets(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 #else
-static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_rcmd(GArray *params, void *user_ctx)
 {
     const guint8 zero = 0;
     int len;
 
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         put_packet("E22");
         return;
     }
 
-    len = strlen(gdb_ctx->params[0].data);
+    len = strlen(get_param(params, 0)->data);
     if (len % 2) {
         put_packet("E01");
         return;
@@ -2158,7 +2152,7 @@ static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx)
 
     g_assert(gdbserver_state.mem_buf->len == 0);
     len = len / 2;
-    hextomem(gdbserver_state.mem_buf, gdb_ctx->params[0].data, len);
+    hextomem(gdbserver_state.mem_buf, get_param(params, 0)->data, len);
     g_byte_array_append(gdbserver_state.mem_buf, &zero, 1);
     qemu_chr_be_write(gdbserver_state.mon_chr, gdbserver_state.mem_buf->data,
                       gdbserver_state.mem_buf->len);
@@ -2166,7 +2160,7 @@ static void handle_query_rcmd(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 #endif
 
-static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_supported(GArray *params, void *user_ctx)
 {
     CPUClass *cc;
 
@@ -2176,7 +2170,7 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
         g_string_append(gdbserver_state.str_buf, ";qXfer:features:read+");
     }
 
-    if (replay_mode == REPLAY_MODE_PLAY) {
+    if (stub_can_reverse()) {
         g_string_append(gdbserver_state.str_buf,
             ";ReverseStep+;ReverseContinue+");
     }
@@ -2187,8 +2181,8 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
     }
 #endif
 
-    if (gdb_ctx->num_params &&
-        strstr(gdb_ctx->params[0].data, "multiprocess+")) {
+    if (params->len &&
+        strstr(get_param(params, 0)->data, "multiprocess+")) {
         gdbserver_state.multiprocess = true;
     }
 
@@ -2200,7 +2194,7 @@ static void handle_query_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
     put_strbuf();
 }
 
-static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_xfer_features(GArray *params, void *user_ctx)
 {
     GDBProcess *process;
     CPUClass *cc;
@@ -2208,7 +2202,7 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx)
     const char *xml;
     const char *p;
 
-    if (gdb_ctx->num_params < 3) {
+    if (params->len < 3) {
         put_packet("E22");
         return;
     }
@@ -2221,15 +2215,15 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx)
     }
 
     gdb_has_xml = true;
-    p = gdb_ctx->params[0].data;
+    p = get_param(params, 0)->data;
     xml = get_feature_xml(p, &p, process);
     if (!xml) {
         put_packet("E00");
         return;
     }
 
-    addr = gdb_ctx->params[1].val_ul;
-    len = gdb_ctx->params[2].val_ul;
+    addr = get_param(params, 1)->val_ul;
+    len = get_param(params, 2)->val_ul;
     total_len = strlen(xml);
     if (addr > total_len) {
         put_packet("E00");
@@ -2253,18 +2247,18 @@ static void handle_query_xfer_features(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 
 #if defined(CONFIG_USER_ONLY) && defined(CONFIG_LINUX_USER)
-static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_xfer_auxv(GArray *params, void *user_ctx)
 {
     TaskState *ts;
     unsigned long offset, len, saved_auxv, auxv_len;
 
-    if (gdb_ctx->num_params < 2) {
+    if (params->len < 2) {
         put_packet("E22");
         return;
     }
 
-    offset = gdb_ctx->params[0].val_ul;
-    len = gdb_ctx->params[1].val_ul;
+    offset = get_param(params, 0)->val_ul;
+    len = get_param(params, 1)->val_ul;
     ts = gdbserver_state.c_cpu->opaque;
     saved_auxv = ts->info->saved_auxv;
     auxv_len = ts->info->auxv_len;
@@ -2300,24 +2294,24 @@ static void handle_query_xfer_auxv(GdbCmdContext *gdb_ctx, void *user_ctx)
 #endif
 
 #if defined(TARGET_CHERI) && !defined(CONFIG_USER_ONLY)
-static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_xfer_capa_read(GArray *params, void *user_ctx)
 {
     uint8_t capbuf[CHERI_CAP_SIZE + 1];
     uint64_t addr;
     unsigned long len, offset;
 
-    if (gdb_ctx->num_params != 3) {
+    if (params->len != 3) {
         put_packet("E22");
         return;
     }
 
-    addr = gdb_ctx->params[0].val_ull;
+    addr = get_param(params, 0)->val_ull;
     if (addr % CHERI_CAP_SIZE != 0) {
         put_packet("E22");
         return;
     }
 
-    offset = gdb_ctx->params[1].val_ul;
+    offset = get_param(params, 1)->val_ul;
     if (offset > sizeof(capbuf)) {
         put_packet("E22");
         return;
@@ -2333,7 +2327,7 @@ static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx)
         return;
     }
 
-    len = gdb_ctx->params[2].val_ul;
+    len = get_param(params, 2)->val_ul;
     if (len > sizeof(capbuf) - offset) {
         len = sizeof(capbuf) - offset;
     }
@@ -2349,12 +2343,12 @@ static void handle_query_xfer_capa_read(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 #endif
 
-static void handle_query_attached(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_attached(GArray *params, void *user_ctx)
 {
     put_packet(GDB_ATTACHED);
 }
 
-static void handle_query_qemu_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_query_qemu_supported(GArray *params, void *user_ctx)
 {
     g_string_printf(gdbserver_state.str_buf, "sstepbits;sstep");
 #ifndef CONFIG_USER_ONLY
@@ -2364,21 +2358,21 @@ static void handle_query_qemu_supported(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 
 #ifndef CONFIG_USER_ONLY
-static void handle_query_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx,
+static void handle_query_qemu_phy_mem_mode(GArray *params,
                                            void *user_ctx)
 {
     g_string_printf(gdbserver_state.str_buf, "%d", phy_memory_mode);
     put_strbuf();
 }
 
-static void handle_set_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_set_qemu_phy_mem_mode(GArray *params, void *user_ctx)
 {
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         put_packet("E22");
         return;
     }
 
-    if (!gdb_ctx->params[0].val_ul) {
+    if (!get_param(params, 0)->val_ul) {
         phy_memory_mode = 0;
     } else {
         phy_memory_mode = 1;
@@ -2387,7 +2381,7 @@ static void handle_set_qemu_phy_mem_mode(GdbCmdContext *gdb_ctx, void *user_ctx)
 }
 #endif
 
-static GdbCmdParseEntry gdb_gen_query_set_common_table[] = {
+static const GdbCmdParseEntry gdb_gen_query_set_common_table[] = {
     /* Order is important if has same prefix */
     {
         .handler = handle_query_qemu_sstepbits,
@@ -2405,7 +2399,7 @@ static GdbCmdParseEntry gdb_gen_query_set_common_table[] = {
     },
 };
 
-static GdbCmdParseEntry gdb_gen_query_table[] = {
+static const GdbCmdParseEntry gdb_gen_query_table[] = {
     {
         .handler = handle_query_curr_tid,
         .cmd = "C",
@@ -2491,7 +2485,7 @@ static GdbCmdParseEntry gdb_gen_query_table[] = {
 #endif
 };
 
-static GdbCmdParseEntry gdb_gen_set_table[] = {
+static const GdbCmdParseEntry gdb_gen_set_table[] = {
     /* Order is important if has same prefix */
     {
         .handler = handle_set_qemu_sstep,
@@ -2509,45 +2503,45 @@ static GdbCmdParseEntry gdb_gen_set_table[] = {
 #endif
 };
 
-static void handle_gen_query(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_gen_query(GArray *params, void *user_ctx)
 {
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
-    if (!process_string_cmd(NULL, gdb_ctx->params[0].data,
+    if (!process_string_cmd(NULL, get_param(params, 0)->data,
                             gdb_gen_query_set_common_table,
                             ARRAY_SIZE(gdb_gen_query_set_common_table))) {
         return;
     }
 
-    if (process_string_cmd(NULL, gdb_ctx->params[0].data,
+    if (process_string_cmd(NULL, get_param(params, 0)->data,
                            gdb_gen_query_table,
                            ARRAY_SIZE(gdb_gen_query_table))) {
         put_packet("");
     }
 }
 
-static void handle_gen_set(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_gen_set(GArray *params, void *user_ctx)
 {
-    if (!gdb_ctx->num_params) {
+    if (!params->len) {
         return;
     }
 
-    if (!process_string_cmd(NULL, gdb_ctx->params[0].data,
+    if (!process_string_cmd(NULL, get_param(params, 0)->data,
                             gdb_gen_query_set_common_table,
                             ARRAY_SIZE(gdb_gen_query_set_common_table))) {
         return;
     }
 
-    if (process_string_cmd(NULL, gdb_ctx->params[0].data,
+    if (process_string_cmd(NULL, get_param(params, 0)->data,
                            gdb_gen_set_table,
                            ARRAY_SIZE(gdb_gen_set_table))) {
         put_packet("");
     }
 }
 
-static void handle_target_halt(GdbCmdContext *gdb_ctx, void *user_ctx)
+static void handle_target_halt(GArray *params, void *user_ctx)
 {
     g_string_printf(gdbserver_state.str_buf, "T%02xthread:", GDB_SIGNAL_TRAP);
     gdb_append_thread_id(gdbserver_state.c_cpu, gdbserver_state.str_buf);
@@ -3221,8 +3215,12 @@ gdb_handlesig(CPUState *cpu, int sig)
     tb_flush(cpu);
 
     if (sig != 0) {
-        snprintf(buf, sizeof(buf), "S%02x", target_signal_to_gdb(sig));
-        put_packet(buf);
+        gdb_set_stop_cpu(cpu);
+        g_string_printf(gdbserver_state.str_buf,
+                        "T%02xthread:", target_signal_to_gdb(sig));
+        gdb_append_thread_id(cpu, gdbserver_state.str_buf);
+        g_string_append_c(gdbserver_state.str_buf, ';');
+        put_strbuf();
     }
     /* put_packet() might have detected that the peer terminated the
        connection.  */
@@ -3301,7 +3299,7 @@ static bool gdb_accept_socket(int gdb_fd)
 
 static int gdbserver_open_socket(const char *path)
 {
-    struct sockaddr_un sockaddr;
+    struct sockaddr_un sockaddr = {};
     int fd, ret;
 
     fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -3330,7 +3328,7 @@ static int gdbserver_open_socket(const char *path)
 
 static bool gdb_accept_tcp(int gdb_fd)
 {
-    struct sockaddr_in sockaddr;
+    struct sockaddr_in sockaddr = {};
     socklen_t len;
     int fd;
 
diff --git a/gitdm.config b/gitdm.config
index c01c2190787..288b100d89d 100644
--- a/gitdm.config
+++ b/gitdm.config
@@ -28,20 +28,25 @@ EmailMap contrib/gitdm/domain-map
 #
 # Use GroupMap to map a file full of addresses to the
 # same employer. This is used for people that don't post from easily
-# identifiable corporate emails.
+# identifiable corporate emails. Please keep this list sorted.
 #
 
-GroupMap contrib/gitdm/group-map-redhat Red Hat
-GroupMap contrib/gitdm/group-map-wavecomp Wave Computing
 GroupMap contrib/gitdm/group-map-cadence Cadence Design Systems
 GroupMap contrib/gitdm/group-map-codeweavers CodeWeavers
 GroupMap contrib/gitdm/group-map-ibm IBM
 GroupMap contrib/gitdm/group-map-janustech Janus Technologies
+GroupMap contrib/gitdm/group-map-netflix Netflix
+GroupMap contrib/gitdm/group-map-redhat Red Hat
+GroupMap contrib/gitdm/group-map-wavecomp Wave Computing
 
 # Also group together our prolific individual contributors
-# and those working under academic auspices
+# and those working under academic or intern auspices
 GroupMap contrib/gitdm/group-map-individuals (None)
 GroupMap contrib/gitdm/group-map-academics Academics (various)
+GroupMap contrib/gitdm/group-map-interns GSoC/Outreachy Interns
+
+# Group together robots and other auto-reporters
+GroupMap contrib/gitdm/group-map-robots Robots (various)
 
 #
 #
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index ab0c7aa5eea..407a1da800c 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -127,21 +127,6 @@ SRST
     Show local APIC state
 ERST
 
-#if defined(TARGET_I386)
-    {
-        .name       = "ioapic",
-        .args_type  = "",
-        .params     = "",
-        .help       = "show io apic state",
-        .cmd        = hmp_info_io_apic,
-    },
-#endif
-
-SRST
-  ``info ioapic``
-    Show io APIC state
-ERST
-
     {
         .name       = "cpus",
         .args_type  = "",
@@ -174,7 +159,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show the interrupts statistics (if available)",
-        .cmd        = hmp_info_irq,
+        .cmd_info_hrt = qmp_x_query_irq,
     },
 
 SRST
@@ -200,7 +185,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show RDMA state",
-        .cmd        = hmp_info_rdma,
+        .cmd_info_hrt = qmp_x_query_rdma,
     },
 
 SRST
@@ -274,7 +259,6 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show dynamic compiler info",
-        .cmd        = hmp_info_jit,
     },
 #endif
 
@@ -289,7 +273,6 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show dynamic compiler opcode counters",
-        .cmd        = hmp_info_opcount,
     },
 #endif
 
@@ -342,7 +325,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show NUMA information",
-        .cmd        = hmp_info_numa,
+        .cmd_info_hrt = qmp_x_query_numa,
     },
 
 SRST
@@ -355,7 +338,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show guest USB devices",
-        .cmd        = hmp_info_usb,
+        .cmd_info_hrt = qmp_x_query_usb,
     },
 
 SRST
@@ -368,7 +351,6 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show host USB devices",
-        .cmd        = hmp_info_usbhost,
     },
 
 SRST
@@ -381,7 +363,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show profiling information",
-        .cmd        = hmp_info_profile,
+        .cmd_info_hrt = qmp_x_query_profile,
     },
 
 SRST
@@ -500,19 +482,6 @@ SRST
     Show the current VM UUID.
 ERST
 
-    {
-        .name       = "cpustats",
-        .args_type  = "",
-        .params     = "",
-        .help       = "show CPU statistics",
-        .cmd        = hmp_info_cpustats,
-    },
-
-SRST
-  ``info cpustats``
-    Show CPU statistics.
-ERST
-
 #if defined(CONFIG_SLIRP)
     {
         .name       = "usernet",
@@ -625,7 +594,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "show roms",
-        .cmd        = hmp_info_roms,
+        .cmd_info_hrt = qmp_x_query_roms,
     },
 
 SRST
@@ -803,7 +772,7 @@ ERST
         .args_type  = "",
         .params     = "",
         .help       = "Display system ramblock information",
-        .cmd        = hmp_info_ramblock,
+        .cmd_info_hrt = qmp_x_query_ramblock,
     },
 
 SRST
@@ -880,3 +849,31 @@ SRST
   ``info replay``
     Display the record/replay information: mode and the current icount.
 ERST
+
+    {
+        .name       = "dirty_rate",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show dirty rate information",
+        .cmd        = hmp_info_dirty_rate,
+    },
+
+SRST
+  ``info dirty_rate``
+    Display the vcpu dirty rate information.
+ERST
+
+#if defined(TARGET_I386)
+    {
+        .name       = "sgx",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show intel SGX information",
+        .cmd        = hmp_info_sgx,
+    },
+#endif
+
+SRST
+  ``info sgx``
+    Show intel SGX information.
+ERST
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 18b7b783c57..a999f22df82 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -382,7 +382,7 @@ SRST
 ERST
 
     {
-        .name       = "stop",
+        .name       = "stop|s",
         .args_type  = "",
         .params     = "",
         .help       = "stop emulation",
@@ -390,7 +390,7 @@ ERST
     },
 
 SRST
-``stop``
+``stop`` or ``s``
   Stop emulation.
 ERST
 
@@ -1269,6 +1269,7 @@ ERST
         .help       = "add host network device",
         .cmd        = hmp_netdev_add,
         .command_completion = netdev_add_completion,
+        .flags      = "p",
     },
 
 SRST
@@ -1283,6 +1284,7 @@ ERST
         .help       = "remove host network device",
         .cmd        = hmp_netdev_del,
         .command_completion = netdev_del_completion,
+        .flags      = "p",
     },
 
 SRST
@@ -1297,6 +1299,7 @@ ERST
         .help       = "create QOM object",
         .cmd        = hmp_object_add,
         .command_completion = object_add_completion,
+        .flags      = "p",
     },
 
 SRST
@@ -1311,6 +1314,7 @@ ERST
         .help       = "destroy QOM object",
         .cmd        = hmp_object_del,
         .command_completion = object_del_completion,
+        .flags      = "p",
     },
 
 SRST
@@ -1518,12 +1522,11 @@ ERST
 
 SRST
 ``set_password [ vnc | spice ] password [ action-if-connected ]``
-  Change spice/vnc password.  Use zero to make the password stay valid
-  forever.  *action-if-connected* specifies what should happen in
-  case a connection is established: *fail* makes the password change
-  fail.  *disconnect* changes the password and disconnects the
-  client.  *keep* changes the password and keeps the connection up.
-  *keep* is the default.
+  Change spice/vnc password.  *action-if-connected* specifies what
+  should happen in case a connection is established: *fail* makes the
+  password change fail.  *disconnect* changes the password and
+  disconnects the client.  *keep* changes the password and keeps the
+  connection up.  *keep* is the default.
 ERST
 
     {
@@ -1667,7 +1670,7 @@ ERST
 
     {
         .name       = "replay_break",
-        .args_type  = "icount:i",
+        .args_type  = "icount:l",
         .params     = "icount",
         .help       = "set breakpoint at the specified instruction count",
         .cmd        = hmp_replay_break,
@@ -1699,7 +1702,7 @@ ERST
 
     {
         .name       = "replay_seek",
-        .args_type  = "icount:i",
+        .args_type  = "icount:l",
         .params     = "icount",
         .help       = "replay execution to the specified instruction count",
         .cmd        = hmp_replay_seek,
@@ -1725,6 +1728,23 @@ ERST
         .flags      = "p",
     },
 
+SRST
+``calc_dirty_rate`` *second*
+  Start a round of dirty rate measurement with the period specified in *second*.
+  The result of the dirty rate measurement may be observed with ``info
+  dirty_rate`` command.
+ERST
+
+    {
+        .name       = "calc_dirty_rate",
+        .args_type  = "dirty_ring:-r,dirty_bitmap:-b,second:l,sample_pages_per_GB:l?",
+        .params     = "[-r] [-b] second [sample_pages_per_GB]",
+        .help       = "start a round of guest dirty rate measurement (using -r to"
+                      "\n\t\t\t specify dirty ring as the method of calculation and"
+                      "\n\t\t\t -b to specify dirty bitmap as method of calculation)",
+        .cmd        = hmp_calc_dirty_rate,
+    },
+
     {
         .name       = "cheri_trace_buffer_size",
         .args_type  = "buffer_size:i",
@@ -1736,4 +1756,4 @@ ERST
 SRST
 ``cheri_trace_buffer_size`` *buffer_size*
   Set the instruction trace buffer size to the given number of entries..
-ERST
+ERST
\ No newline at end of file
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 890e25a6834..42b65e143b3 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -10,6 +10,11 @@
  * the COPYING file in the top-level directory.
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "9p.h"
 #include "9p-local.h"
diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index bbf89064f7a..eadae270dde 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "qemu/xattr.h"
 #include "9p.h"
diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index 9db07dfa845..be1546c1be5 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -10,6 +10,11 @@
  * the COPYING file in the top-level directory.
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include <sys/socket.h>
 #include <sys/un.h>
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index d538fd4f305..09b9c252885 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -12,6 +12,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "9p.h"
 #include "fsdev/qemu-fsdev.h"
diff --git a/hw/9pfs/9p-util-linux.c b/hw/9pfs/9p-util-linux.c
index 06399c59199..4f57d8c0471 100644
--- a/hw/9pfs/9p-util-linux.c
+++ b/hw/9pfs/9p-util-linux.c
@@ -10,6 +10,11 @@
  * See the COPYING file in the top-level directory.
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "qemu/xattr.h"
 #include "9p-util.h"
diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index 2c90817b75a..f2ae9582e61 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index c696d8f8460..9ae69dd8dbc 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 9f20338ac5c..71984bb7f84 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include <glib/gprintf.h>
 #include "hw/virtio/virtio.h"
@@ -48,6 +53,8 @@ enum {
     Oappend = 0x80,
 };
 
+P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free);
+
 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
 {
     ssize_t ret;
@@ -978,23 +985,6 @@ static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
     return 0;
 }
 
-static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
-                                   V9fsQID *qidp)
-{
-    struct stat stbuf;
-    int err;
-
-    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
-    if (err < 0) {
-        return err;
-    }
-    err = stat_to_qid(pdu, &stbuf, qidp);
-    if (err < 0) {
-        return err;
-    }
-    return 0;
-}
-
 V9fsPDU *pdu_alloc(V9fsState *s)
 {
     V9fsPDU *pdu = NULL;
@@ -1286,6 +1276,37 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
 #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
 
 
+/**
+ * Convert host filesystem's block size into an appropriate block size for
+ * 9p client (guest OS side). The value returned suggests an "optimum" block
+ * size for 9p I/O, i.e. to maximize performance.
+ *
+ * @pdu: 9p client request
+ * @blksize: host filesystem's block size
+ */
+static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize)
+{
+    int32_t iounit = 0;
+    V9fsState *s = pdu->s;
+
+    /*
+     * iounit should be multiples of blksize (host filesystem block size)
+     * as well as less than (client msize - P9_IOHDRSZ)
+     */
+    if (blksize) {
+        iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize);
+    }
+    if (!iounit) {
+        iounit = s->msize - P9_IOHDRSZ;
+    }
+    return iounit;
+}
+
+static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf)
+{
+    return blksize_to_iounit(pdu, stbuf->st_blksize);
+}
+
 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
                                 V9fsStatDotl *v9lstat)
 {
@@ -1297,7 +1318,7 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
     v9lstat->st_gid = stbuf->st_gid;
     v9lstat->st_rdev = stbuf->st_rdev;
     v9lstat->st_size = stbuf->st_size;
-    v9lstat->st_blksize = stbuf->st_blksize;
+    v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
     v9lstat->st_blocks = stbuf->st_blocks;
     v9lstat->st_atime_sec = stbuf->st_atime;
     v9lstat->st_mtime_sec = stbuf->st_mtime;
@@ -1413,6 +1434,7 @@ static void coroutine_fn v9fs_attach(void *opaque)
     size_t offset = 7;
     V9fsQID qid;
     ssize_t err;
+    struct stat stbuf;
 
     v9fs_string_init(&uname);
     v9fs_string_init(&aname);
@@ -1435,7 +1457,13 @@ static void coroutine_fn v9fs_attach(void *opaque)
         clunk_fid(s, fid);
         goto out;
     }
-    err = fid_to_qid(pdu, fidp, &qid);
+    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
+    if (err < 0) {
+        err = -EINVAL;
+        clunk_fid(s, fid);
+        goto out;
+    }
+    err = stat_to_qid(pdu, &stbuf, &qid);
     if (err < 0) {
         err = -EINVAL;
         clunk_fid(s, fid);
@@ -1467,7 +1495,7 @@ static void coroutine_fn v9fs_attach(void *opaque)
     }
     err += offset;
 
-    memcpy(&s->root_qid, &qid, sizeof(qid));
+    memcpy(&s->root_st, &stbuf, sizeof(stbuf));
     trace_v9fs_attach_return(pdu->tag, pdu->id,
                              qid.type, qid.version, qid.path);
 out:
@@ -1718,25 +1746,24 @@ static bool name_is_illegal(const char *name)
     return !*name || strchr(name, '/') != NULL;
 }
 
-static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
+static bool same_stat_id(const struct stat *a, const struct stat *b)
 {
-    return
-        qid1->type != qid2->type ||
-        qid1->version != qid2->version ||
-        qid1->path != qid2->path;
+    return a->st_dev == b->st_dev && a->st_ino == b->st_ino;
 }
 
 static void coroutine_fn v9fs_walk(void *opaque)
 {
     int name_idx;
-    V9fsQID *qids = NULL;
+    g_autofree V9fsQID *qids = NULL;
     int i, err = 0;
     V9fsPath dpath, path;
+    P9ARRAY_REF(V9fsPath) pathes = NULL;
     uint16_t nwnames;
-    struct stat stbuf;
+    struct stat stbuf, fidst;
+    g_autofree struct stat *stbufs = NULL;
     size_t offset = 7;
     int32_t fid, newfid;
-    V9fsString *wnames = NULL;
+    P9ARRAY_REF(V9fsString) wnames = NULL;
     V9fsFidState *fidp;
     V9fsFidState *newfidp = NULL;
     V9fsPDU *pdu = opaque;
@@ -1752,9 +1779,15 @@ static void coroutine_fn v9fs_walk(void *opaque)
 
     trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
 
-    if (nwnames && nwnames <= P9_MAXWELEM) {
-        wnames = g_new0(V9fsString, nwnames);
+    if (nwnames > P9_MAXWELEM) {
+        err = -EINVAL;
+        goto out_nofid;
+    }
+    if (nwnames) {
+        P9ARRAY_NEW(V9fsString, wnames, nwnames);
         qids   = g_new0(V9fsQID, nwnames);
+        stbufs = g_new0(struct stat, nwnames);
+        P9ARRAY_NEW(V9fsPath, pathes, nwnames);
         for (i = 0; i < nwnames; i++) {
             err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
             if (err < 0) {
@@ -1766,9 +1799,6 @@ static void coroutine_fn v9fs_walk(void *opaque)
             }
             offset += err;
         }
-    } else if (nwnames > P9_MAXWELEM) {
-        err = -EINVAL;
-        goto out_nofid;
     }
     fidp = get_fid(pdu, fid);
     if (fidp == NULL) {
@@ -1778,35 +1808,85 @@ static void coroutine_fn v9fs_walk(void *opaque)
 
     v9fs_path_init(&dpath);
     v9fs_path_init(&path);
+    /*
+     * Both dpath and path initially point to fidp.
+     * Needed to handle request with nwnames == 0
+     */
+    v9fs_path_copy(&dpath, &fidp->path);
+    v9fs_path_copy(&path, &fidp->path);
 
-    err = fid_to_qid(pdu, fidp, &qid);
+    /*
+     * To keep latency (i.e. overall execution time for processing this
+     * Twalk client request) as small as possible, run all the required fs
+     * driver code altogether inside the following block.
+     */
+    v9fs_co_run_in_worker({
+        if (v9fs_request_cancelled(pdu)) {
+            err = -EINTR;
+            break;
+        }
+        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
+        if (err < 0) {
+            err = -errno;
+            break;
+        }
+        stbuf = fidst;
+        for (name_idx = 0; name_idx < nwnames; name_idx++) {
+            if (v9fs_request_cancelled(pdu)) {
+                err = -EINTR;
+                break;
+            }
+            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
+                strcmp("..", wnames[name_idx].data))
+            {
+                err = s->ops->name_to_path(&s->ctx, &dpath,
+                                           wnames[name_idx].data,
+                                           &pathes[name_idx]);
+                if (err < 0) {
+                    err = -errno;
+                    break;
+                }
+                if (v9fs_request_cancelled(pdu)) {
+                    err = -EINTR;
+                    break;
+                }
+                err = s->ops->lstat(&s->ctx, &pathes[name_idx], &stbuf);
+                if (err < 0) {
+                    err = -errno;
+                    break;
+                }
+                stbufs[name_idx] = stbuf;
+                v9fs_path_copy(&dpath, &pathes[name_idx]);
+            }
+        }
+    });
+    /*
+     * Handle all the rest of this Twalk request on main thread ...
+     */
     if (err < 0) {
         goto out;
     }
 
-    /*
-     * Both dpath and path initially poin to fidp.
-     * Needed to handle request with nwnames == 0
-     */
+    err = stat_to_qid(pdu, &fidst, &qid);
+    if (err < 0) {
+        goto out;
+    }
+    stbuf = fidst;
+
+    /* reset dpath and path */
     v9fs_path_copy(&dpath, &fidp->path);
     v9fs_path_copy(&path, &fidp->path);
-    for (name_idx = 0; name_idx < nwnames; name_idx++) {
-        if (not_same_qid(&pdu->s->root_qid, &qid) ||
-            strcmp("..", wnames[name_idx].data)) {
-            err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
-                                       &path);
-            if (err < 0) {
-                goto out;
-            }
 
-            err = v9fs_co_lstat(pdu, &path, &stbuf);
-            if (err < 0) {
-                goto out;
-            }
+    for (name_idx = 0; name_idx < nwnames; name_idx++) {
+        if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
+            strcmp("..", wnames[name_idx].data))
+        {
+            stbuf = stbufs[name_idx];
             err = stat_to_qid(pdu, &stbuf, &qid);
             if (err < 0) {
                 goto out;
             }
+            v9fs_path_copy(&path, &pathes[name_idx]);
             v9fs_path_copy(&dpath, &path);
         }
         memcpy(&qids[name_idx], &qid, sizeof(qid));
@@ -1839,35 +1919,14 @@ static void coroutine_fn v9fs_walk(void *opaque)
     v9fs_path_free(&path);
 out_nofid:
     pdu_complete(pdu, err);
-    if (nwnames && nwnames <= P9_MAXWELEM) {
-        for (name_idx = 0; name_idx < nwnames; name_idx++) {
-            v9fs_string_free(&wnames[name_idx]);
-        }
-        g_free(wnames);
-        g_free(qids);
-    }
 }
 
 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
 {
     struct statfs stbuf;
-    int32_t iounit = 0;
-    V9fsState *s = pdu->s;
+    int err = v9fs_co_statfs(pdu, path, &stbuf);
 
-    /*
-     * iounit should be multiples of f_bsize (host filesystem block size
-     * and as well as less than (client msize - P9_IOHDRSZ))
-     */
-    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
-        if (stbuf.f_bsize) {
-            iounit = stbuf.f_bsize;
-            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
-        }
-    }
-    if (!iounit) {
-        iounit = s->msize - P9_IOHDRSZ;
-    }
-    return iounit;
+    return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0);
 }
 
 static void coroutine_fn v9fs_open(void *opaque)
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 00381591ffa..1567b678411 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -355,7 +355,7 @@ struct V9fsState {
     int32_t root_fid;
     Error *migration_blocker;
     V9fsConf fsconf;
-    V9fsQID root_qid;
+    struct stat root_st;
     dev_t dev_id;
     struct qht qpd_table;
     struct qht qpp_table;
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 77a82ca4356..b078b294112 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 83bb6c14e02..20f93a90e75 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
diff --git a/hw/9pfs/cofs.c b/hw/9pfs/cofs.c
index 0b321b456e3..9d0adc2e786 100644
--- a/hw/9pfs/cofs.c
+++ b/hw/9pfs/cofs.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
diff --git a/hw/9pfs/coth.c b/hw/9pfs/coth.c
index 9778f24b000..2802d41cce2 100644
--- a/hw/9pfs/coth.c
+++ b/hw/9pfs/coth.c
@@ -12,6 +12,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "block/thread-pool.h"
 #include "qemu/coroutine.h"
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index c9b2a8d4148..cb134ad7054 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -51,7 +51,9 @@
          */                                                             \
         qemu_coroutine_yield();                                         \
         qemu_bh_delete(co_bh);                                          \
-        code_block;                                                     \
+        do {                                                            \
+            code_block;                                                 \
+        } while (0);                                                    \
         /* re-enter back to qemu thread */                              \
         qemu_coroutine_yield();                                         \
     } while (0)
diff --git a/hw/9pfs/coxattr.c b/hw/9pfs/coxattr.c
index 0e00ffaa0d4..dbcd09e0fd5 100644
--- a/hw/9pfs/coxattr.c
+++ b/hw/9pfs/coxattr.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "fsdev/qemu-fsdev.h"
 #include "qemu/thread.h"
diff --git a/hw/9pfs/trace-events b/hw/9pfs/trace-events
index 10188daf7fa..6c77966c0b2 100644
--- a/hw/9pfs/trace-events
+++ b/hw/9pfs/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # 9p.c
 v9fs_rcancel(uint16_t tag, uint8_t id) "tag %d id %d"
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 14371a78efd..54ee93b71fc 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -11,6 +11,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "hw/virtio/virtio.h"
 #include "qemu/sockets.h"
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index a969fcc54c3..65c4979c3c5 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -8,6 +8,11 @@
  *
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 
 #include "hw/9pfs/9p.h"
diff --git a/hw/Kconfig b/hw/Kconfig
index ff40bd3f7bb..ad20cce0a95 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -21,6 +21,7 @@ source mem/Kconfig
 source misc/Kconfig
 source net/Kconfig
 source nubus/Kconfig
+source nvme/Kconfig
 source nvram/Kconfig
 source pci-bridge/Kconfig
 source pci-host/Kconfig
@@ -31,6 +32,7 @@ source remote/Kconfig
 source rtc/Kconfig
 source scsi/Kconfig
 source sd/Kconfig
+source sensor/Kconfig
 source smbios/Kconfig
 source ssi/Kconfig
 source timer/Kconfig
@@ -47,11 +49,9 @@ source avr/Kconfig
 source cris/Kconfig
 source hppa/Kconfig
 source i386/Kconfig
-source lm32/Kconfig
 source m68k/Kconfig
 source microblaze/Kconfig
 source mips/Kconfig
-source moxie/Kconfig
 source nios2/Kconfig
 source openrisc/Kconfig
 source ppc/Kconfig
@@ -62,7 +62,6 @@ source sh4/Kconfig
 source sparc/Kconfig
 source sparc64/Kconfig
 source tricore/Kconfig
-source unicore32/Kconfig
 source xtensa/Kconfig
 
 # Symbols used by multiple targets
@@ -82,3 +81,5 @@ config XLNX_ZYNQMP
     select REGISTER
     select CAN_BUS
     select PTIMER
+    select XLNX_BBRAM
+    select XLNX_EFUSE_ZYNQMP
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 1932f66af8d..622b0b50b75 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -8,6 +8,8 @@ config ACPI_X86
     select ACPI_CPU_HOTPLUG
     select ACPI_MEMORY_HOTPLUG
     select ACPI_HMAT
+    select ACPI_PIIX4
+    select ACPI_PCIHP
 
 config ACPI_X86_ICH
     bool
@@ -24,6 +26,14 @@ config ACPI_NVDIMM
     bool
     depends on ACPI
 
+config ACPI_PIIX4
+    bool
+    depends on ACPI
+
+config ACPI_PCIHP
+    bool
+    depends on ACPI
+
 config ACPI_HMAT
     bool
     depends on ACPI
@@ -41,4 +51,12 @@ config ACPI_VMGENID
     default y
     depends on PC
 
+config ACPI_VIOT
+    bool
+    depends on ACPI
+
 config ACPI_HW_REDUCED
+    bool
+    select ACPI
+    select ACPI_MEMORY_HOTPLUG
+    select ACPI_NVDIMM
diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c
new file mode 100644
index 00000000000..3fc4b14c260
--- /dev/null
+++ b/hw/acpi/acpi-cpu-hotplug-stub.c
@@ -0,0 +1,50 @@
+#include "qemu/osdep.h"
+#include "hw/acpi/cpu_hotplug.h"
+#include "migration/vmstate.h"
+
+
+/* Following stubs are all related to ACPI cpu hotplug */
+const VMStateDescription vmstate_cpu_hotplug;
+
+void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu,
+                                CPUHotplugState *cpuhp_state,
+                                uint16_t io_port)
+{
+    return;
+}
+
+void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner,
+                                  AcpiCpuHotplug *gpe_cpu, uint16_t base)
+{
+    return;
+}
+
+void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list)
+{
+    return;
+}
+
+void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev,
+                      CPUHotplugState *cpu_st, DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev,
+                             AcpiCpuHotplug *g, DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st,
+                        DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                CPUHotplugState *cpu_st,
+                                DeviceState *dev, Error **errp)
+{
+    return;
+}
diff --git a/hw/acpi/acpi-mem-hotplug-stub.c b/hw/acpi/acpi-mem-hotplug-stub.c
new file mode 100644
index 00000000000..73a076a2657
--- /dev/null
+++ b/hw/acpi/acpi-mem-hotplug-stub.c
@@ -0,0 +1,35 @@
+#include "qemu/osdep.h"
+#include "hw/acpi/memory_hotplug.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_memory_hotplug;
+
+void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner,
+                              MemHotplugState *state, hwaddr io_base)
+{
+    return;
+}
+
+void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list)
+{
+    return;
+}
+
+void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st,
+                         DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_memory_unplug_cb(MemHotplugState *mem_st,
+                           DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_memory_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                   MemHotplugState *mem_st,
+                                   DeviceState *dev, Error **errp)
+{
+    return;
+}
diff --git a/hw/acpi/acpi-nvdimm-stub.c b/hw/acpi/acpi-nvdimm-stub.c
new file mode 100644
index 00000000000..8baff9be6f4
--- /dev/null
+++ b/hw/acpi/acpi-nvdimm-stub.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "hw/mem/nvdimm.h"
+#include "hw/hotplug.h"
+
+void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    return;
+}
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
new file mode 100644
index 00000000000..734e4c59868
--- /dev/null
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
@@ -0,0 +1,47 @@
+#include "qemu/osdep.h"
+#include "hw/acpi/pcihp.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_acpi_pcihp_pci_status;
+
+void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
+                     MemoryRegion *address_space_io, bool bridges_enabled,
+                     uint16_t io_base)
+{
+    return;
+}
+
+void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
+                               DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+                                   DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
+                                 DeviceState *dev, Error **errp)
+{
+    return;
+}
+
+void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
+                                         AcpiPciHpState *s, DeviceState *dev,
+                                         Error **errp)
+{
+    return;
+}
+
+void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
+{
+    return;
+}
+
+bool vmstate_acpi_pcihp_use_acpi_index(void *opaque, int version_id)
+{
+    return false;
+}
diff --git a/hw/acpi/acpi-x86-stub.c b/hw/acpi/acpi-x86-stub.c
index f88d6a090b3..3df1e090f46 100644
--- a/hw/acpi/acpi-x86-stub.c
+++ b/hw/acpi/acpi-x86-stub.c
@@ -1,7 +1,14 @@
 #include "qemu/osdep.h"
 #include "hw/i386/pc.h"
+#include "hw/i386/acpi-build.h"
 
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
-                       const CPUArchIdList *apic_ids, GArray *entry)
+                       const CPUArchIdList *apic_ids, GArray *entry,
+                       bool force_enabled)
 {
 }
+
+Object *acpi_get_i386_pci_host(void)
+{
+       return NULL;
+}
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index d33ce8954aa..b3b3310df32 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -52,6 +52,19 @@ static void build_append_byte(GArray *array, uint8_t val)
     g_array_append_val(array, val);
 }
 
+static void build_append_padded_str(GArray *array, const char *str,
+                                    size_t maxlen, char pad)
+{
+    size_t i;
+    size_t len = strlen(str);
+
+    g_assert(len <= maxlen);
+    g_array_append_vals(array, str, len);
+    for (i = maxlen - len; i > 0; i--) {
+        g_array_append_val(array, pad);
+    }
+}
+
 static void build_append_array(GArray *array, GArray *val)
 {
     g_array_append_vals(array, val->data, val->len);
@@ -1692,27 +1705,53 @@ Aml *aml_object_type(Aml *object)
     return var;
 }
 
-void
-build_header(BIOSLinker *linker, GArray *table_data,
-             AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
-             const char *oem_id, const char *oem_table_id)
-{
-    unsigned tbl_offset = (char *)h - table_data->data;
-    unsigned checksum_offset = (char *)&h->checksum - table_data->data;
-    memcpy(&h->signature, sig, 4);
-    h->length = cpu_to_le32(len);
-    h->revision = rev;
-
-    strpadcpy((char *)h->oem_id, sizeof h->oem_id, oem_id, ' ');
-    strpadcpy((char *)h->oem_table_id, sizeof h->oem_table_id,
-              oem_table_id, ' ');
-
-    h->oem_revision = cpu_to_le32(1);
-    memcpy(h->asl_compiler_id, ACPI_BUILD_APPNAME8, 4);
-    h->asl_compiler_revision = cpu_to_le32(1);
-    /* Checksum to be filled in by Guest linker */
+void acpi_table_begin(AcpiTable *desc, GArray *array)
+{
+
+    desc->array = array;
+    desc->table_offset = array->len;
+
+    /*
+     * ACPI spec 1.0b
+     * 5.2.3 System Description Table Header
+     */
+    g_assert(strlen(desc->sig) == 4);
+    g_array_append_vals(array, desc->sig, 4); /* Signature */
+    /*
+     * reserve space for Length field, which will be patched by
+     * acpi_table_end() when the table creation is finished.
+     */
+    build_append_int_noprefix(array, 0, 4); /* Length */
+    build_append_int_noprefix(array, desc->rev, 1); /* Revision */
+    build_append_int_noprefix(array, 0, 1); /* Checksum */
+    build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */
+    /* OEM Table ID */
+    build_append_padded_str(array, desc->oem_table_id, 8, ' ');
+    build_append_int_noprefix(array, 1, 4); /* OEM Revision */
+    g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */
+    build_append_int_noprefix(array, 1, 4); /* Creator Revision */
+}
+
+void acpi_table_end(BIOSLinker *linker, AcpiTable *desc)
+{
+    /*
+     * ACPI spec 1.0b
+     * 5.2.3 System Description Table Header
+     * Table 5-2 DESCRIPTION_HEADER Fields
+     */
+    const unsigned checksum_offset = 9;
+    uint32_t table_len = desc->array->len - desc->table_offset;
+    uint32_t table_len_le = cpu_to_le32(table_len);
+    gchar *len_ptr = &desc->array->data[desc->table_offset + 4];
+
+    /* patch "Length" field that has been reserved by acpi_table_begin()
+     * to the actual length, i.e. accumulated table length from
+     * acpi_table_begin() till acpi_table_end()
+     */
+    memcpy(len_ptr, &table_len_le, sizeof table_len_le);
+
     bios_linker_loader_add_checksum(linker, ACPI_BUILD_TABLE_FILE,
-        tbl_offset, len, checksum_offset);
+        desc->table_offset, table_len, desc->table_offset + checksum_offset);
 }
 
 void *acpi_data_push(GArray *table_data, unsigned size)
@@ -1822,69 +1861,81 @@ build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data)
                                     32);
 }
 
-/* Build rsdt table */
+/*
+ * ACPI 1.0 Root System Description Table (RSDT)
+ */
 void
 build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
            const char *oem_id, const char *oem_table_id)
 {
     int i;
-    unsigned rsdt_entries_offset;
-    AcpiRsdtDescriptorRev1 *rsdt;
-    const unsigned table_data_len = (sizeof(uint32_t) * table_offsets->len);
-    const unsigned rsdt_entry_size = sizeof(rsdt->table_offset_entry[0]);
-    const size_t rsdt_len = sizeof(*rsdt) + table_data_len;
-
-    rsdt = acpi_data_push(table_data, rsdt_len);
-    rsdt_entries_offset = (char *)rsdt->table_offset_entry - table_data->data;
+    AcpiTable table = { .sig = "RSDT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
     for (i = 0; i < table_offsets->len; ++i) {
         uint32_t ref_tbl_offset = g_array_index(table_offsets, uint32_t, i);
-        uint32_t rsdt_entry_offset = rsdt_entries_offset + rsdt_entry_size * i;
+        uint32_t rsdt_entry_offset = table.array->len;
 
-        /* rsdt->table_offset_entry to be filled by Guest linker */
+        /* reserve space for entry */
+        build_append_int_noprefix(table.array, 0, 4);
+
+        /* mark position of RSDT entry to be filled by Guest linker */
         bios_linker_loader_add_pointer(linker,
-            ACPI_BUILD_TABLE_FILE, rsdt_entry_offset, rsdt_entry_size,
+            ACPI_BUILD_TABLE_FILE, rsdt_entry_offset, 4,
             ACPI_BUILD_TABLE_FILE, ref_tbl_offset);
+
     }
-    build_header(linker, table_data,
-                 (void *)rsdt, "RSDT", rsdt_len, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
-/* Build xsdt table */
+/*
+ * ACPI 2.0 eXtended System Description Table (XSDT)
+ */
 void
 build_xsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets,
            const char *oem_id, const char *oem_table_id)
 {
     int i;
-    unsigned xsdt_entries_offset;
-    AcpiXsdtDescriptorRev2 *xsdt;
-    const unsigned table_data_len = (sizeof(uint64_t) * table_offsets->len);
-    const unsigned xsdt_entry_size = sizeof(xsdt->table_offset_entry[0]);
-    const size_t xsdt_len = sizeof(*xsdt) + table_data_len;
-
-    xsdt = acpi_data_push(table_data, xsdt_len);
-    xsdt_entries_offset = (char *)xsdt->table_offset_entry - table_data->data;
+    AcpiTable table = { .sig = "XSDT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+
     for (i = 0; i < table_offsets->len; ++i) {
         uint64_t ref_tbl_offset = g_array_index(table_offsets, uint32_t, i);
-        uint64_t xsdt_entry_offset = xsdt_entries_offset + xsdt_entry_size * i;
+        uint64_t xsdt_entry_offset = table.array->len;
+
+        /* reserve space for entry */
+        build_append_int_noprefix(table.array, 0, 8);
 
-        /* xsdt->table_offset_entry to be filled by Guest linker */
+        /* mark position of RSDT entry to be filled by Guest linker */
         bios_linker_loader_add_pointer(linker,
-            ACPI_BUILD_TABLE_FILE, xsdt_entry_offset, xsdt_entry_size,
+            ACPI_BUILD_TABLE_FILE, xsdt_entry_offset, 8,
             ACPI_BUILD_TABLE_FILE, ref_tbl_offset);
     }
-    build_header(linker, table_data,
-                 (void *)xsdt, "XSDT", xsdt_len, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
-void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+/*
+ * ACPI spec, Revision 4.0
+ * 5.2.16.2 Memory Affinity Structure
+ */
+void build_srat_memory(GArray *table_data, uint64_t base,
                        uint64_t len, int node, MemoryAffinityFlags flags)
 {
-    numamem->type = ACPI_SRAT_MEMORY;
-    numamem->length = sizeof(*numamem);
-    numamem->proximity = cpu_to_le32(node);
-    numamem->flags = cpu_to_le32(flags);
-    numamem->base_addr = cpu_to_le64(base);
-    numamem->range_length = cpu_to_le64(len);
+    build_append_int_noprefix(table_data, 1, 1); /* Type */
+    build_append_int_noprefix(table_data, 40, 1); /* Length */
+    build_append_int_noprefix(table_data, node, 4); /* Proximity Domain */
+    build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+    build_append_int_noprefix(table_data, base, 4); /* Base Address Low */
+    /* Base Address High */
+    build_append_int_noprefix(table_data, base >> 32, 4);
+    build_append_int_noprefix(table_data, len, 4); /* Length Low */
+    build_append_int_noprefix(table_data, len >> 32, 4); /* Length High */
+    build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+    build_append_int_noprefix(table_data, flags, 4); /* Flags */
+    build_append_int_noprefix(table_data, 0, 8); /* Reserved */
 }
 
 /*
@@ -1894,11 +1945,12 @@ void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
 void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id)
 {
-    int slit_start, i, j;
-    slit_start = table_data->len;
+    int i, j;
     int nb_numa_nodes = ms->numa_state->num_nodes;
+    AcpiTable table = { .sig = "SLIT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, table_data);
 
     build_append_int_noprefix(table_data, nb_numa_nodes, 8);
     for (i = 0; i < nb_numa_nodes; i++) {
@@ -1909,11 +1961,96 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                                       1);
         }
     }
+    acpi_table_end(linker, &table);
+}
+
+/*
+ * ACPI spec, Revision 6.3
+ * 5.2.29.1 Processor hierarchy node structure (Type 0)
+ */
+static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags,
+                                           uint32_t parent, uint32_t id,
+                                           uint32_t *priv_rsrc,
+                                           uint32_t priv_num)
+{
+    int i;
+
+    build_append_byte(tbl, 0);                 /* Type 0 - processor */
+    build_append_byte(tbl, 20 + priv_num * 4); /* Length */
+    build_append_int_noprefix(tbl, 0, 2);      /* Reserved */
+    build_append_int_noprefix(tbl, flags, 4);  /* Flags */
+    build_append_int_noprefix(tbl, parent, 4); /* Parent */
+    build_append_int_noprefix(tbl, id, 4);     /* ACPI Processor ID */
+
+    /* Number of private resources */
+    build_append_int_noprefix(tbl, priv_num, 4);
+
+    /* Private resources[N] */
+    if (priv_num > 0) {
+        assert(priv_rsrc);
+        for (i = 0; i < priv_num; i++) {
+            build_append_int_noprefix(tbl, priv_rsrc[i], 4);
+        }
+    }
+}
+
+/*
+ * ACPI spec, Revision 6.3
+ * 5.2.29 Processor Properties Topology Table (PPTT)
+ */
+void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                const char *oem_id, const char *oem_table_id)
+{
+    int pptt_start = table_data->len;
+    int uid = 0;
+    int socket;
+    AcpiTable table = { .sig = "PPTT", .rev = 2,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+
+    for (socket = 0; socket < ms->smp.sockets; socket++) {
+        uint32_t socket_offset = table_data->len - pptt_start;
+        int core;
+
+        build_processor_hierarchy_node(
+            table_data,
+            /*
+             * Physical package - represents the boundary
+             * of a physical package
+             */
+            (1 << 0),
+            0, socket, NULL, 0);
+
+        for (core = 0; core < ms->smp.cores; core++) {
+            uint32_t core_offset = table_data->len - pptt_start;
+            int thread;
+
+            if (ms->smp.threads > 1) {
+                build_processor_hierarchy_node(
+                    table_data,
+                    (0 << 0), /* not a physical package */
+                    socket_offset, core, NULL, 0);
+
+                for (thread = 0; thread < ms->smp.threads; thread++) {
+                    build_processor_hierarchy_node(
+                        table_data,
+                        (1 << 1) | /* ACPI Processor ID valid */
+                        (1 << 2) | /* Processor is a Thread */
+                        (1 << 3),  /* Node is a Leaf */
+                        core_offset, uid++, NULL, 0);
+                }
+            } else {
+                build_processor_hierarchy_node(
+                    table_data,
+                    (1 << 1) | /* ACPI Processor ID valid */
+                    (1 << 3),  /* Node is a Leaf */
+                    socket_offset, uid++, NULL, 0);
+            }
+        }
+    }
 
-    build_header(linker, table_data,
-                 (void *)(table_data->data + slit_start),
-                 "SLIT",
-                 table_data->len - slit_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 /* build rev1/rev3/rev5.1 FADT */
@@ -1921,9 +2058,10 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
                 const char *oem_id, const char *oem_table_id)
 {
     int off;
-    int fadt_start = tbl->len;
+    AcpiTable table = { .sig = "FACP", .rev = f->rev,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-    acpi_data_push(tbl, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, tbl);
 
     /* FACS address to be filled by Guest linker at runtime */
     off = tbl->len;
@@ -1987,7 +2125,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
     build_append_int_noprefix(tbl, f->flags, 4); /* Flags */
 
     if (f->rev == 1) {
-        goto build_hdr;
+        goto done;
     }
 
     build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */
@@ -2024,7 +2162,7 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
     build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */
 
     if (f->rev <= 4) {
-        goto build_hdr;
+        goto done;
     }
 
     /* SLEEP_CONTROL_REG */
@@ -2035,11 +2173,11 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
     /* TODO: extra fields need to be added to support revisions above rev5 */
     assert(f->rev == 5);
 
-build_hdr:
-    build_header(linker, tbl, (void *)(tbl->data + fadt_start),
-                 "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id);
+done:
+    acpi_table_end(linker, &table);
 }
 
+#ifdef CONFIG_TPM
 /*
  * build_tpm2 - Build the TPM2 table as specified in
  * table 7: TCG Hardware Interface Description Table Format for TPM 2.0
@@ -2049,14 +2187,14 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
                 const char *oem_id, const char *oem_table_id)
 {
     uint8_t start_method_params[12] = {};
-    unsigned log_addr_offset, tpm2_start;
+    unsigned log_addr_offset;
     uint64_t control_area_start_address;
     TPMIf *tpmif = tpm_find();
     uint32_t start_method;
-    void *tpm2_ptr;
+    AcpiTable table = { .sig = "TPM2", .rev = 4,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-    tpm2_start = table_data->len;
-    tpm2_ptr = acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, table_data);
 
     /* Platform Class */
     build_append_int_noprefix(table_data, TPM2_ACPI_CLASS_CLIENT, 2);
@@ -2094,10 +2232,9 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
     bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
                                    log_addr_offset, 8,
                                    ACPI_BUILD_TPMLOG_FILE, 0);
-    build_header(linker, table_data,
-                 tpm2_ptr, "TPM2", table_data->len - tpm2_start, 4, oem_id,
-                 oem_table_id);
+    acpi_table_end(linker, &table);
 }
+#endif
 
 Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset,
                uint32_t mmio32_offset, uint64_t mmio64_offset,
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index e2317be546b..b20903ea303 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -1,5 +1,4 @@
 #include "qemu/osdep.h"
-#include "hw/boards.h"
 #include "migration/vmstate.h"
 #include "hw/acpi/cpu.h"
 #include "qapi/error.h"
@@ -670,21 +669,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
 
             /* build _MAT object */
             assert(adevc && adevc->madt_cpu);
-            adevc->madt_cpu(adev, i, arch_ids, madt_buf);
-            switch (madt_buf->data[0]) {
-            case ACPI_APIC_PROCESSOR: {
-                AcpiMadtProcessorApic *apic = (void *)madt_buf->data;
-                apic->flags = cpu_to_le32(1);
-                break;
-            }
-            case ACPI_APIC_LOCAL_X2APIC: {
-                AcpiMadtProcessorX2Apic *apic = (void *)madt_buf->data;
-                apic->flags = cpu_to_le32(1);
-                break;
-            }
-            default:
-                assert(0);
-            }
+            adevc->madt_cpu(adev, i, arch_ids, madt_buf,
+                            true); /* set enabled flag */
             aml_append(dev, aml_name_decl("_MAT",
                 aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data)));
             g_array_free(madt_buf, true);
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 5454be67d5f..e28457a7d10 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -11,7 +11,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "exec/address-spaces.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/irq.h"
@@ -208,7 +207,7 @@ static void ged_regs_write(void *opaque, hwaddr addr, uint64_t data,
         return;
     case ACPI_GED_REG_RESET:
         if (data == ACPI_GED_RESET_VALUE) {
-            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
         }
         return;
     }
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
new file mode 100644
index 00000000000..c315de1802d
--- /dev/null
+++ b/hw/acpi/ghes-stub.c
@@ -0,0 +1,22 @@
+/*
+ * Support for generating APEI tables and recording CPER for Guests:
+ * stub functions.
+ *
+ * Copyright (c) 2021 Linaro, Ltd
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/acpi/ghes.h"
+
+int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
+{
+    return -1;
+}
+
+bool acpi_ghes_present(void)
+{
+    return false;
+}
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index a4dac6bf15e..45d9a809cc9 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -362,18 +362,16 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker)
 void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
                      const char *oem_id, const char *oem_table_id)
 {
-    uint64_t hest_start = table_data->len;
+    AcpiTable table = { .sig = "HEST", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-    /* Hardware Error Source Table header*/
-    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, table_data);
 
     /* Error Source Count */
     build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
-
     build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
 
-    build_header(linker, table_data, (void *)(table_data->data + hest_start),
-                 "HEST", table_data->len - hest_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
@@ -386,6 +384,8 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
     /* Create a read-write fw_cfg file for Address */
     fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
         NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
+
+    ags->present = true;
 }
 
 int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
@@ -443,3 +443,18 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
 
     return ret;
 }
+
+bool acpi_ghes_present(void)
+{
+    AcpiGedState *acpi_ged_state;
+    AcpiGhesState *ags;
+
+    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
+                                                       NULL));
+
+    if (!acpi_ged_state) {
+        return false;
+    }
+    ags = &acpi_ged_state->ghes_state;
+    return ags->present;
+}
diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index edb3fd91b21..6913ebf7308 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -200,6 +200,8 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
     HMAT_LB_Info *hmat_lb;
     NumaHmatCacheOptions *hmat_cache;
 
+    build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+
     for (i = 0; i < numa_state->num_nodes; i++) {
         flags = 0;
 
@@ -256,14 +258,10 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state)
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state,
                 const char *oem_id, const char *oem_table_id)
 {
-    int hmat_start = table_data->len;
-
-    /* reserve space for HMAT header  */
-    acpi_data_push(table_data, 40);
+    AcpiTable table = { .sig = "HMAT", .rev = 2,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
+    acpi_table_begin(&table, table_data);
     hmat_build_table_structs(table_data, numa_state);
-
-    build_header(linker, table_data,
-                 (void *)(table_data->data + hmat_start),
-                 "HMAT", table_data->len - hmat_start, 2, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 853447cf9d2..ebe08ed831f 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -35,7 +35,6 @@
 #include "sysemu/runstate.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/tco.h"
-#include "exec/address-spaces.h"
 
 #include "hw/i386/ich9.h"
 #include "hw/mem/pc-dimm.h"
@@ -218,6 +217,26 @@ static const VMStateDescription vmstate_cpuhp_state = {
     }
 };
 
+static bool vmstate_test_use_pcihp(void *opaque)
+{
+    ICH9LPCPMRegs *s = opaque;
+
+    return s->use_acpi_hotplug_bridge;
+}
+
+static const VMStateDescription vmstate_pcihp_state = {
+    .name = "ich9_pm/pcihp",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vmstate_test_use_pcihp,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug,
+                            ICH9LPCPMRegs,
+                            NULL, NULL),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ich9_pm = {
     .name = "ich9_pm",
     .version_id = 1,
@@ -239,6 +258,7 @@ const VMStateDescription vmstate_ich9_pm = {
         &vmstate_memhp_state,
         &vmstate_tco_io_state,
         &vmstate_cpuhp_state,
+        &vmstate_pcihp_state,
         NULL
     }
 };
@@ -260,6 +280,10 @@ static void pm_reset(void *opaque)
     }
     pm->smi_en_wmask = ~0;
 
+    if (pm->use_acpi_hotplug_bridge) {
+        acpi_pcihp_reset(&pm->acpi_pci_hotplug, true);
+    }
+
     acpi_update_sci(&pm->acpi_regs, pm->irq);
 }
 
@@ -298,6 +322,18 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
     pm->enable_tco = true;
     acpi_pm_tco_init(&pm->tco_regs, &pm->io);
 
+    if (pm->use_acpi_hotplug_bridge) {
+        acpi_pcihp_init(OBJECT(lpc_pci),
+                        &pm->acpi_pci_hotplug,
+                        pci_get_bus(lpc_pci),
+                        pci_address_space_io(lpc_pci),
+                        true,
+                        ACPI_PCIHP_ADDR_ICH9);
+
+        qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)),
+                                 OBJECT(lpc_pci));
+    }
+
     pm->irq = sci_irq;
     qemu_register_reset(pm_reset, pm);
     pm->powerdown_notifier.notify = pm_powerdown_req;
@@ -369,6 +405,34 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp)
     s->pm.enable_tco = value;
 }
 
+static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    return s->pm.use_acpi_hotplug_bridge;
+}
+
+static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    s->pm.use_acpi_hotplug_bridge = value;
+}
+
+static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    return s->pm.keep_pci_slot_hpc;
+}
+
+static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+
+    s->pm.keep_pci_slot_hpc = value;
+}
+
 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
 {
     static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
@@ -377,6 +441,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
     pm->disable_s3 = 0;
     pm->disable_s4 = 0;
     pm->s4_val = 2;
+    pm->use_acpi_hotplug_bridge = true;
+    pm->keep_pci_slot_hpc = true;
 
     object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE,
                                    &pm->pm_io_base, OBJ_PROP_FLAG_READ);
@@ -400,6 +466,12 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
     object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED,
                              ich9_pm_get_enable_tco,
                              ich9_pm_set_enable_tco);
+    object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
+                             ich9_pm_get_acpi_pci_hotplug,
+                             ich9_pm_set_acpi_pci_hotplug);
+    object_property_add_bool(obj, "x-keep-pci-slot-hpc",
+                             ich9_pm_get_keep_pci_slot_hpc,
+                             ich9_pm_set_keep_pci_slot_hpc);
 }
 
 void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
@@ -407,6 +479,11 @@ void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
 {
     ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp);
+        return;
+    }
+
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
         !lpc->pm.acpi_memory_hotplug.is_enabled) {
         error_setg(errp,
@@ -442,6 +519,9 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
         } else {
             acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp);
         }
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_plug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug,
+                                  dev, errp);
     } else {
         error_setg(errp, "acpi: device plug request for not supported device"
                    " type: %s", object_get_typename(OBJECT(dev)));
@@ -474,6 +554,10 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
 
         acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state,
                                    dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_unplug_request_cb(hotplug_dev,
+                                            &lpc->pm.acpi_pci_hotplug,
+                                            dev, errp);
     } else {
         error_setg(errp, "acpi: device unplug request for not supported device"
                    " type: %s", object_get_typename(OBJECT(dev)));
@@ -491,6 +575,9 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) &&
                !lpc->pm.cpu_hotplug_legacy) {
         acpi_cpu_unplug_cb(&lpc->pm.cpuhp_state, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        acpi_pcihp_device_unplug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug,
+                                    dev, errp);
     } else {
         error_setg(errp, "acpi: device unplug for not supported device"
                    " type: %s", object_get_typename(OBJECT(dev)));
diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c
index 0bdcf15528f..d0fffcf7870 100644
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -2,13 +2,13 @@
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/pc-hotplug.h"
 #include "hw/mem/pc-dimm.h"
-#include "hw/boards.h"
 #include "hw/qdev-core.h"
 #include "migration/vmstate.h"
 #include "trace.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-acpi.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 
 #define MEMORY_SLOTS_NUMBER          "MDNR"
 #define MEMORY_HOTPLUG_IO_REGION     "HPMR"
@@ -179,8 +179,16 @@ static void acpi_memory_hotplug_write(void *opaque, hwaddr addr, uint64_t data,
             hotplug_handler_unplug(hotplug_ctrl, dev, &local_err);
             if (local_err) {
                 trace_mhp_acpi_pc_dimm_delete_failed(mem_st->selector);
-                qapi_event_send_mem_unplug_error(dev->id,
+
+                /*
+                 * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_GUEST_ERROR
+                 * while the deprecation of MEM_UNPLUG_ERROR is
+                 * pending.
+                 */
+                qapi_event_send_mem_unplug_error(dev->id ? : "",
                                                  error_get_pretty(local_err));
+                qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+                                                          dev->canonical_path);
                 error_free(local_err);
                 break;
             }
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index dd69577212a..adf6347bc42 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -3,23 +3,31 @@ acpi_ss.add(files(
   'acpi_interface.c',
   'aml-build.c',
   'bios-linker-loader.c',
+  'core.c',
   'utils.c',
 ))
-acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu.c'))
-acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu_hotplug.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_true: files('cpu.c', 'cpu_hotplug.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_CPU_HOTPLUG', if_false: files('acpi-cpu-hotplug-stub.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_true: files('memory_hotplug.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_MEMORY_HOTPLUG', if_false: files('acpi-mem-hotplug-stub.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_true: files('nvdimm.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c'))
-acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'))
-acpi_ss.add(when: 'CONFIG_ACPI_X86', if_true: files('core.c', 'piix4.c', 'pcihp.c'), if_false: files('acpi-stub.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_PIIX4', if_true: files('piix4.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_true: files('pcihp.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_PCIHP', if_false: files('acpi-pci-hotplug-stub.c'))
+acpi_ss.add(when: 'CONFIG_ACPI_VIOT', if_true: files('viot.c'))
 acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c'))
 acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c'))
 acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c'))
 acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c'))
-softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c'))
+softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c'))
 softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
 softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
-                                                  'acpi-x86-stub.c', 'ipmi-stub.c'))
+                                                  'acpi-x86-stub.c', 'ipmi-stub.c', 'ghes-stub.c',
+                                                  'acpi-mem-hotplug-stub.c', 'acpi-cpu-hotplug-stub.c',
+                                                  'acpi-pci-hotplug-stub.c', 'acpi-nvdimm-stub.c'))
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index e3d5fe19392..0d43da19ea4 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -44,22 +44,6 @@ static const uint8_t nvdimm_nfit_spa_uuid[] =
       UUID_LE(0x66f0d379, 0xb4f3, 0x4074, 0xac, 0x43, 0x0d, 0x33,
               0x18, 0xb7, 0x8c, 0xdb);
 
-/*
- * NVDIMM Firmware Interface Table
- * @signature: "NFIT"
- *
- * It provides information that allows OSPM to enumerate NVDIMM present in
- * the platform and associate system physical address ranges created by the
- * NVDIMMs.
- *
- * It is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT)
- */
-struct NvdimmNfitHeader {
-    ACPI_TABLE_HEADER_DEF
-    uint32_t reserved;
-} QEMU_PACKED;
-typedef struct NvdimmNfitHeader NvdimmNfitHeader;
-
 /*
  * define NFIT structures according to ACPI 6.0: 5.2.25 NVDIMM Firmware
  * Interface Table (NFIT).
@@ -355,10 +339,10 @@ nvdimm_build_structure_caps(GArray *structures, uint32_t capabilities)
 
 static GArray *nvdimm_build_device_structure(NVDIMMState *state)
 {
-    GSList *device_list = nvdimm_get_device_list();
+    GSList *device_list, *list = nvdimm_get_device_list();
     GArray *structures = g_array_new(false, true /* clear */, 1);
 
-    for (; device_list; device_list = device_list->next) {
+    for (device_list = list; device_list; device_list = device_list->next) {
         DeviceState *dev = device_list->data;
 
         /* build System Physical Address Range Structure. */
@@ -373,7 +357,7 @@ static GArray *nvdimm_build_device_structure(NVDIMMState *state)
         /* build NVDIMM Control Region Structure. */
         nvdimm_build_structure_dcr(structures, dev);
     }
-    g_slist_free(device_list);
+    g_slist_free(list);
 
     if (state->persistence) {
         nvdimm_build_structure_caps(structures, state->persistence);
@@ -401,25 +385,33 @@ void nvdimm_plug(NVDIMMState *state)
     nvdimm_build_fit_buffer(state);
 }
 
+/*
+ * NVDIMM Firmware Interface Table
+ * @signature: "NFIT"
+ *
+ * It provides information that allows OSPM to enumerate NVDIMM present in
+ * the platform and associate system physical address ranges created by the
+ * NVDIMMs.
+ *
+ * It is defined in ACPI 6.0: 5.2.25 NVDIMM Firmware Interface Table (NFIT)
+ */
+
 static void nvdimm_build_nfit(NVDIMMState *state, GArray *table_offsets,
                               GArray *table_data, BIOSLinker *linker,
                               const char *oem_id, const char *oem_table_id)
 {
     NvdimmFitBuffer *fit_buf = &state->fit_buf;
-    unsigned int header;
+    AcpiTable table = { .sig = "NFIT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
     acpi_add_table(table_offsets, table_data);
 
-    /* NFIT header. */
-    header = table_data->len;
-    acpi_data_push(table_data, sizeof(NvdimmNfitHeader));
+    acpi_table_begin(&table, table_data);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 4);
     /* NVDIMM device structures. */
     g_array_append_vals(table_data, fit_buf->fit->data, fit_buf->fit->len);
-
-    build_header(linker, table_data,
-                 (void *)(table_data->data + header), "NFIT",
-                 sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, oem_id,
-                 oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 #define NVDIMM_DSM_MEMORY_SIZE      4096
@@ -1282,14 +1274,15 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
                               NVDIMMState *nvdimm_state,
                               uint32_t ram_slots, const char *oem_id)
 {
+    int mem_addr_offset;
     Aml *ssdt, *sb_scope, *dev;
-    int mem_addr_offset, nvdimm_ssdt;
+    AcpiTable table = { .sig = "SSDT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = "NVDIMM" };
 
     acpi_add_table(table_offsets, table_data);
 
+    acpi_table_begin(&table, table_data);
     ssdt = init_aml_allocator();
-    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
-
     sb_scope = aml_scope("\\_SB");
 
     dev = aml_device("NVDR");
@@ -1318,8 +1311,6 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
     aml_append(sb_scope, dev);
     aml_append(ssdt, sb_scope);
 
-    nvdimm_ssdt = table_data->len;
-
     /* copy AML table into ACPI tables blob and patch header there */
     g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
     mem_addr_offset = build_append_named_dword(table_data,
@@ -1331,18 +1322,20 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
     bios_linker_loader_add_pointer(linker,
         ACPI_BUILD_TABLE_FILE, mem_addr_offset, sizeof(uint32_t),
         NVDIMM_DSM_MEM_FILE, 0);
-    build_header(linker, table_data,
-        (void *)(table_data->data + nvdimm_ssdt),
-                 "SSDT", table_data->len - nvdimm_ssdt, 1, oem_id, "NVDIMM");
     free_aml_allocator();
+    /*
+     * must be executed as the last so that pointer patching command above
+     * would be executed by guest before it recalculated checksum which were
+     * scheduled by acpi_table_end()
+     */
+    acpi_table_end(linker, &table);
 }
 
 void nvdimm_build_srat(GArray *table_data)
 {
-    GSList *device_list = nvdimm_get_device_list();
+    GSList *device_list, *list = nvdimm_get_device_list();
 
-    for (; device_list; device_list = device_list->next) {
-        AcpiSratMemoryAffinity *numamem = NULL;
+    for (device_list = list; device_list; device_list = device_list->next) {
         DeviceState *dev = device_list->data;
         Object *obj = OBJECT(dev);
         uint64_t addr, size;
@@ -1352,11 +1345,10 @@ void nvdimm_build_srat(GArray *table_data)
         addr = object_property_get_uint(obj, PC_DIMM_ADDR_PROP, &error_abort);
         size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, &error_abort);
 
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-        build_srat_memory(numamem, addr, size, node,
+        build_srat_memory(table_data, addr, size, node,
                           MEM_AFFINITY_ENABLED | MEM_AFFINITY_NON_VOLATILE);
     }
-    g_slist_free(device_list);
+    g_slist_free(list);
 }
 
 void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c
index 75b1103ec45..20b70dcd815 100644
--- a/hw/acpi/pci.c
+++ b/hw/acpi/pci.c
@@ -28,19 +28,20 @@
 #include "hw/acpi/pci.h"
 #include "hw/pci/pcie_host.h"
 
+/*
+ * PCI Firmware Specification, Revision 3.0
+ * 4.1.2 MCFG Table Description.
+ */
 void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info,
                 const char *oem_id, const char *oem_table_id)
 {
-    int mcfg_start = table_data->len;
+    AcpiTable table = { .sig = "MCFG", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
 
-    /*
-     * PCI Firmware Specification, Revision 3.0
-     * 4.1.2 MCFG Table Description.
-     */
-    acpi_data_push(table_data, sizeof(AcpiTableHeader));
     /* Reserved */
     build_append_int_noprefix(table_data, 0, 8);
-
     /*
      * Memory Mapped Enhanced Configuration Space Base Address Allocation
      * Structure
@@ -56,6 +57,5 @@ void build_mcfg(GArray *table_data, BIOSLinker *linker, AcpiMcfgInfo *info,
     /* Reserved */
     build_append_int_noprefix(table_data, 0, 4);
 
-    build_header(linker, table_data, (void *)(table_data->data + mcfg_start),
-                 "MCFG", table_data->len - mcfg_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index f4cb3c979d0..30405b5113d 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -30,15 +30,16 @@
 #include "hw/pci-host/i440fx.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/i386/acpi-build.h"
 #include "hw/acpi/acpi.h"
-#include "exec/address-spaces.h"
 #include "hw/pci/pci_bus.h"
 #include "migration/vmstate.h"
 #include "qapi/error.h"
 #include "qom/qom-qobject.h"
 #include "trace.h"
 
-#define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0018
 #define PCI_UP_BASE 0x0000
 #define PCI_DOWN_BASE 0x0004
@@ -105,6 +106,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 static void acpi_set_pci_info(void)
 {
     static bool bsel_is_set;
+    Object *host = acpi_get_i386_pci_host();
     PCIBus *bus;
     unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
 
@@ -113,7 +115,11 @@ static void acpi_set_pci_info(void)
     }
     bsel_is_set = true;
 
-    bus = find_i440fx(); /* TODO: Q35 support */
+    if (!host) {
+        return;
+    }
+
+    bus = PCI_HOST_BRIDGE(host)->bus;
     if (bus) {
         /* Scan all PCI buses. Set property to enable acpi based hotplug. */
         pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc);
@@ -123,13 +129,14 @@ static void acpi_set_pci_info(void)
 static void acpi_pcihp_disable_root_bus(void)
 {
     static bool root_hp_disabled;
+    Object *host = acpi_get_i386_pci_host();
     PCIBus *bus;
 
     if (root_hp_disabled) {
         return;
     }
 
-    bus = find_i440fx();
+    bus = PCI_HOST_BRIDGE(host)->bus;
     if (bus) {
         /* setting the hotplug handler to NULL makes the bus non-hotpluggable */
         qbus_set_hotplug_handler(BUS(bus), NULL);
@@ -215,9 +222,27 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo
         PCIDevice *dev = PCI_DEVICE(qdev);
         if (PCI_SLOT(dev->devfn) == slot) {
             if (!acpi_pcihp_pc_no_hotplug(s, dev)) {
-                hotplug_ctrl = qdev_get_hotplug_handler(qdev);
-                hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort);
-                object_unparent(OBJECT(qdev));
+                /*
+                 * partially_hotplugged is used by virtio-net failover:
+                 * failover has asked the guest OS to unplug the device
+                 * but we need to keep some references to the device
+                 * to be able to plug it back in case of failure so
+                 * we don't execute hotplug_handler_unplug().
+                 */
+                if (dev->partially_hotplugged) {
+                    /*
+                     * pending_deleted_event is set to true when
+                     * virtio-net failover asks to unplug the device,
+                     * and set to false here when the operation is done
+                     * This is used by the migration loop to detect the
+                     * end of the operation and really start the migration.
+                     */
+                    qdev->pending_deleted_event = false;
+                } else {
+                    hotplug_ctrl = qdev_get_hotplug_handler(qdev);
+                    hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort);
+                    object_unparent(OBJECT(qdev));
+                }
             }
         }
     }
@@ -276,7 +301,7 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
 
     /* Only hotplugged devices need the hotplug capability. */
     if (dev->hotplugged &&
-        acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) {
+        acpi_pcihp_get_bsel(pci_get_bus(pdev)) < 0) {
         error_setg(errp, "Unsupported bus. Bus doesn't have property '"
                    ACPI_PCIHP_PROP_BSEL "' set");
         return;
@@ -330,6 +355,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
             object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
             PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
 
+            /* Remove all hot-plug handlers if hot-plug is disabled on slot */
+            if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) &&
+                !PCIE_SLOT(pdev)->hotplug) {
+                qbus_set_hotplug_handler(BUS(sec), NULL);
+                return;
+            }
+
             qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev));
             /* We don't have to overwrite any other hotplug handler yet */
             assert(QLIST_EMPTY(&sec->child));
@@ -349,8 +381,8 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
 {
     PCIDevice *pdev = PCI_DEVICE(dev);
 
-    trace_acpi_pci_unplug(PCI_SLOT(PCI_DEVICE(dev)->devfn),
-                          acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))));
+    trace_acpi_pci_unplug(PCI_SLOT(pdev->devfn),
+                          acpi_pcihp_get_bsel(pci_get_bus(pdev)));
 
     /*
      * clean up acpi-index so it could reused by another device
@@ -382,6 +414,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
+    /*
+     * pending_deleted_event is used by virtio-net failover to detect the
+     * end of the unplug operation, the flag is set to false in
+     * acpi_pcihp_eject_slot() when the operation is completed.
+     */
+    pdev->qdev.pending_deleted_event = true;
     s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);
     acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
 }
@@ -489,10 +527,11 @@ static const MemoryRegionOps acpi_pcihp_io_ops = {
 };
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
-                     MemoryRegion *address_space_io, bool bridges_enabled)
+                     MemoryRegion *address_space_io, bool bridges_enabled,
+                     uint16_t io_base)
 {
     s->io_len = ACPI_PCIHP_SIZE;
-    s->io_base = ACPI_PCIHP_ADDR;
+    s->io_base = io_base;
 
     s->root = root_bus;
     s->legacy_piix = !bridges_enabled;
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 8f8b0e95e52..f0b5fac44a1 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -33,7 +33,6 @@
 #include "sysemu/xen.h"
 #include "qapi/error.h"
 #include "qemu/range.h"
-#include "exec/address-spaces.h"
 #include "hw/acpi/pcihp.h"
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/acpi/cpu.h"
@@ -50,6 +49,8 @@
 #define GPE_BASE 0xafe0
 #define GPE_LEN 4
 
+#define ACPI_PCIHP_ADDR_PIIX4 0xae00
+
 struct pci_status {
     uint32_t up; /* deprecated, maintained for migration compatibility */
     uint32_t down;
@@ -608,7 +609,7 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
 
     if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) {
         acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent,
-                        s->use_acpi_hotplug_bridge);
+                        s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4);
     }
 
     s->cpu_hotplug_legacy = true;
@@ -646,9 +647,9 @@ static Property piix4_pm_properties[] = {
     DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0),
     DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
     DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
-    DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState,
+    DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
                      use_acpi_hotplug_bridge, true),
-    DEFINE_PROP_BOOL("acpi-root-pci-hotplug", PIIX4PMState,
+    DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState,
                      use_acpi_root_pci_hotplug, true),
     DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
                      acpi_memory_hotplug.is_enabled, true),
diff --git a/hw/acpi/tpm.c b/hw/acpi/tpm.c
index b96459e45be..cdc02275365 100644
--- a/hw/acpi/tpm.c
+++ b/hw/acpi/tpm.c
@@ -57,7 +57,7 @@ void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev)
                aml_operation_region(
                    "TPP3", AML_SYSTEM_MEMORY,
                    aml_int(TPM_PPI_ADDR_BASE +
-                           0x15a /* movv, docs/specs/tpm.txt */),
+                           0x15a /* movv, docs/specs/tpm.rst */),
                            0x1));
     field = aml_field("TPP3", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
     aml_append(field, aml_named_field("MOVV", 8));
diff --git a/hw/acpi/trace-events b/hw/acpi/trace-events
index dcc1438f3a0..974d770e8b4 100644
--- a/hw/acpi/trace-events
+++ b/hw/acpi/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # memory_hotplug.c
 mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
diff --git a/hw/acpi/viot.c b/hw/acpi/viot.c
new file mode 100644
index 00000000000..c1af75206e5
--- /dev/null
+++ b/hw/acpi/viot.c
@@ -0,0 +1,114 @@
+/*
+ * ACPI Virtual I/O Translation table implementation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/viot.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+
+struct viot_pci_ranges {
+    GArray *blob;
+    size_t count;
+    uint16_t output_node;
+};
+
+/* Build PCI range for a given PCI host bridge */
+static int build_pci_range_node(Object *obj, void *opaque)
+{
+    struct viot_pci_ranges *pci_ranges = opaque;
+    GArray *blob = pci_ranges->blob;
+
+    if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
+        PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
+
+        if (bus && !pci_bus_bypass_iommu(bus)) {
+            int min_bus, max_bus;
+
+            pci_bus_range(bus, &min_bus, &max_bus);
+
+            /* Type */
+            build_append_int_noprefix(blob, 1 /* PCI range */, 1);
+            /* Reserved */
+            build_append_int_noprefix(blob, 0, 1);
+            /* Length */
+            build_append_int_noprefix(blob, 24, 2);
+            /* Endpoint start */
+            build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 4);
+            /* PCI Segment start */
+            build_append_int_noprefix(blob, 0, 2);
+            /* PCI Segment end */
+            build_append_int_noprefix(blob, 0, 2);
+            /* PCI BDF start */
+            build_append_int_noprefix(blob, PCI_BUILD_BDF(min_bus, 0), 2);
+            /* PCI BDF end */
+            build_append_int_noprefix(blob, PCI_BUILD_BDF(max_bus, 0xff), 2);
+            /* Output node */
+            build_append_int_noprefix(blob, pci_ranges->output_node, 2);
+            /* Reserved */
+            build_append_int_noprefix(blob, 0, 6);
+
+            pci_ranges->count++;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Generate a VIOT table with one PCI-based virtio-iommu that manages PCI
+ * endpoints.
+ *
+ * Defined in the ACPI Specification (Version TBD)
+ */
+void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker,
+                uint16_t virtio_iommu_bdf, const char *oem_id,
+                const char *oem_table_id)
+{
+    /* The virtio-iommu node follows the 48-bytes header */
+    int viommu_off = 48;
+    AcpiTable table = { .sig = "VIOT", .rev = 0,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+    struct viot_pci_ranges pci_ranges = {
+        .output_node = viommu_off,
+        .blob = g_array_new(false, true /* clear */, 1),
+    };
+
+    /* Build the list of PCI ranges that this viommu manages */
+    object_child_foreach_recursive(OBJECT(ms), build_pci_range_node,
+                                   &pci_ranges);
+
+    /* ACPI table header */
+    acpi_table_begin(&table, table_data);
+    /* Node count */
+    build_append_int_noprefix(table_data, pci_ranges.count + 1, 2);
+    /* Node offset */
+    build_append_int_noprefix(table_data, viommu_off, 2);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 8);
+
+    /* Virtio-iommu node */
+    /* Type */
+    build_append_int_noprefix(table_data, 3 /* virtio-pci IOMMU */, 1);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 1);
+    /* Length */
+    build_append_int_noprefix(table_data, 16, 2);
+    /* PCI Segment */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* PCI BDF number */
+    build_append_int_noprefix(table_data, virtio_iommu_bdf, 2);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 8);
+
+    /* PCI ranges found above */
+    g_array_append_vals(table_data, pci_ranges.blob->data,
+                        pci_ranges.blob->len);
+    g_array_free(pci_ranges.blob, true);
+
+    acpi_table_end(linker, &table);
+}
+
diff --git a/hw/acpi/viot.h b/hw/acpi/viot.h
new file mode 100644
index 00000000000..9fe565bb878
--- /dev/null
+++ b/hw/acpi/viot.h
@@ -0,0 +1,13 @@
+/*
+ * ACPI Virtual I/O Translation Table implementation
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef VIOT_H
+#define VIOT_H
+
+void build_viot(MachineState *ms, GArray *table_data, BIOSLinker *linker,
+                uint16_t virtio_iommu_bdf, const char *oem_id,
+                const char *oem_table_id);
+
+#endif /* VIOT_H */
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index 4f41a13ea05..0c9f158ac9e 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c
@@ -29,6 +29,8 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
     Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
     uint32_t vgia_offset;
     QemuUUID guid_le;
+    AcpiTable table = { .sig = "SSDT", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = "VMGENID" };
 
     /* Fill in the GUID values.  These need to be converted to little-endian
      * first, since that's what the guest expects
@@ -42,12 +44,10 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
     g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
                         ARRAY_SIZE(guid_le.data));
 
-    /* Put this in a separate SSDT table */
+    /* Put VMGNEID into a separate SSDT table */
+    acpi_table_begin(&table, table_data);
     ssdt = init_aml_allocator();
 
-    /* Reserve space for header */
-    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
-
     /* Storage for the GUID address */
     vgia_offset = table_data->len +
         build_append_named_dword(ssdt->buf, "VGIA");
@@ -116,9 +116,8 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
         ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
         VMGENID_GUID_FW_CFG_FILE, 0);
 
-    build_header(linker, table_data,
-        (void *)(table_data->data + table_data->len - ssdt->buf->len),
-        "SSDT", ssdt->buf->len, 1, oem_id, "VMGENID");
+    /* must be called after above command to ensure correct table checksum */
+    acpi_table_end(linker, &table);
     free_aml_allocator();
 }
 
diff --git a/hw/adc/Kconfig b/hw/adc/Kconfig
index 25d2229fb83..a825bd3d343 100644
--- a/hw/adc/Kconfig
+++ b/hw/adc/Kconfig
@@ -1,2 +1,5 @@
 config STM32F2XX_ADC
     bool
+
+config MAX111X
+    bool
diff --git a/hw/adc/aspeed_adc.c b/hw/adc/aspeed_adc.c
new file mode 100644
index 00000000000..c5fcae29f63
--- /dev/null
+++ b/hw/adc/aspeed_adc.c
@@ -0,0 +1,427 @@
+/*
+ * Aspeed ADC
+ *
+ * Copyright 2017-2021 IBM Corp.
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/adc/aspeed_adc.h"
+#include "trace.h"
+
+#define ASPEED_ADC_MEMORY_REGION_SIZE           0x1000
+#define ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE    0x100
+#define  ASPEED_ADC_ENGINE_CH_EN_MASK           0xffff0000
+#define   ASPEED_ADC_ENGINE_CH_EN(x)            ((BIT(x)) << 16)
+#define  ASPEED_ADC_ENGINE_INIT                 BIT(8)
+#define  ASPEED_ADC_ENGINE_AUTO_COMP            BIT(5)
+#define  ASPEED_ADC_ENGINE_COMP                 BIT(4)
+#define  ASPEED_ADC_ENGINE_MODE_MASK            0x0000000e
+#define   ASPEED_ADC_ENGINE_MODE_OFF            (0b000 << 1)
+#define   ASPEED_ADC_ENGINE_MODE_STANDBY        (0b001 << 1)
+#define   ASPEED_ADC_ENGINE_MODE_NORMAL         (0b111 << 1)
+#define  ASPEED_ADC_ENGINE_EN                   BIT(0)
+#define ASPEED_ADC_HYST_EN                      BIT(31)
+
+#define ASPEED_ADC_L_MASK       ((1 << 10) - 1)
+#define ASPEED_ADC_L(x)         ((x) & ASPEED_ADC_L_MASK)
+#define ASPEED_ADC_H(x)         (((x) >> 16) & ASPEED_ADC_L_MASK)
+#define ASPEED_ADC_LH_MASK      (ASPEED_ADC_L_MASK << 16 | ASPEED_ADC_L_MASK)
+#define LOWER_CHANNEL_MASK      ((1 << 10) - 1)
+#define LOWER_CHANNEL_DATA(x)   ((x) & LOWER_CHANNEL_MASK)
+#define UPPER_CHANNEL_DATA(x)   (((x) >> 16) & LOWER_CHANNEL_MASK)
+
+#define TO_REG(addr) (addr >> 2)
+
+#define ENGINE_CONTROL              TO_REG(0x00)
+#define INTERRUPT_CONTROL           TO_REG(0x04)
+#define VGA_DETECT_CONTROL          TO_REG(0x08)
+#define CLOCK_CONTROL               TO_REG(0x0C)
+#define DATA_CHANNEL_1_AND_0        TO_REG(0x10)
+#define DATA_CHANNEL_7_AND_6        TO_REG(0x1C)
+#define DATA_CHANNEL_9_AND_8        TO_REG(0x20)
+#define DATA_CHANNEL_15_AND_14      TO_REG(0x2C)
+#define BOUNDS_CHANNEL_0            TO_REG(0x30)
+#define BOUNDS_CHANNEL_7            TO_REG(0x4C)
+#define BOUNDS_CHANNEL_8            TO_REG(0x50)
+#define BOUNDS_CHANNEL_15           TO_REG(0x6C)
+#define HYSTERESIS_CHANNEL_0        TO_REG(0x70)
+#define HYSTERESIS_CHANNEL_7        TO_REG(0x8C)
+#define HYSTERESIS_CHANNEL_8        TO_REG(0x90)
+#define HYSTERESIS_CHANNEL_15       TO_REG(0xAC)
+#define INTERRUPT_SOURCE            TO_REG(0xC0)
+#define COMPENSATING_AND_TRIMMING   TO_REG(0xC4)
+
+static inline uint32_t update_channels(uint32_t current)
+{
+    return ((((current >> 16) & ASPEED_ADC_L_MASK) + 7) << 16) |
+        ((current + 5) & ASPEED_ADC_L_MASK);
+}
+
+static bool breaks_threshold(AspeedADCEngineState *s, int reg)
+{
+    assert(reg >= DATA_CHANNEL_1_AND_0 &&
+           reg < DATA_CHANNEL_1_AND_0 + s->nr_channels / 2);
+
+    int a_bounds_reg = BOUNDS_CHANNEL_0 + (reg - DATA_CHANNEL_1_AND_0) * 2;
+    int b_bounds_reg = a_bounds_reg + 1;
+    uint32_t a_and_b = s->regs[reg];
+    uint32_t a_bounds = s->regs[a_bounds_reg];
+    uint32_t b_bounds = s->regs[b_bounds_reg];
+    uint32_t a = ASPEED_ADC_L(a_and_b);
+    uint32_t b = ASPEED_ADC_H(a_and_b);
+    uint32_t a_lower = ASPEED_ADC_L(a_bounds);
+    uint32_t a_upper = ASPEED_ADC_H(a_bounds);
+    uint32_t b_lower = ASPEED_ADC_L(b_bounds);
+    uint32_t b_upper = ASPEED_ADC_H(b_bounds);
+
+    return (a < a_lower || a > a_upper) ||
+           (b < b_lower || b > b_upper);
+}
+
+static uint32_t read_channel_sample(AspeedADCEngineState *s, int reg)
+{
+    assert(reg >= DATA_CHANNEL_1_AND_0 &&
+           reg < DATA_CHANNEL_1_AND_0 + s->nr_channels / 2);
+
+    /* Poor man's sampling */
+    uint32_t value = s->regs[reg];
+    s->regs[reg] = update_channels(s->regs[reg]);
+
+    if (breaks_threshold(s, reg)) {
+        s->regs[INTERRUPT_CONTROL] |= BIT(reg - DATA_CHANNEL_1_AND_0);
+        qemu_irq_raise(s->irq);
+    }
+
+    return value;
+}
+
+static uint64_t aspeed_adc_engine_read(void *opaque, hwaddr addr,
+                                       unsigned int size)
+{
+    AspeedADCEngineState *s = ASPEED_ADC_ENGINE(opaque);
+    int reg = TO_REG(addr);
+    uint32_t value = 0;
+
+    switch (reg) {
+    case BOUNDS_CHANNEL_8 ... BOUNDS_CHANNEL_15:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "bounds register %u invalid, only 0...7 valid\n",
+                          __func__, s->engine_id, reg - BOUNDS_CHANNEL_0);
+            break;
+        }
+        /* fallthrough */
+    case HYSTERESIS_CHANNEL_8 ... HYSTERESIS_CHANNEL_15:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "hysteresis register %u invalid, only 0...7 valid\n",
+                          __func__, s->engine_id, reg - HYSTERESIS_CHANNEL_0);
+            break;
+        }
+        /* fallthrough */
+    case BOUNDS_CHANNEL_0 ... BOUNDS_CHANNEL_7:
+    case HYSTERESIS_CHANNEL_0 ... HYSTERESIS_CHANNEL_7:
+    case ENGINE_CONTROL:
+    case INTERRUPT_CONTROL:
+    case VGA_DETECT_CONTROL:
+    case CLOCK_CONTROL:
+    case INTERRUPT_SOURCE:
+    case COMPENSATING_AND_TRIMMING:
+        value = s->regs[reg];
+        break;
+    case DATA_CHANNEL_9_AND_8 ... DATA_CHANNEL_15_AND_14:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "data register %u invalid, only 0...3 valid\n",
+                          __func__, s->engine_id, reg - DATA_CHANNEL_1_AND_0);
+            break;
+        }
+        /* fallthrough */
+    case DATA_CHANNEL_1_AND_0 ... DATA_CHANNEL_7_AND_6:
+        value = read_channel_sample(s, reg);
+        /* Allow 16-bit reads of the data registers */
+        if (addr & 0x2) {
+            assert(size == 2);
+            value >>= 16;
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: engine[%u]: 0x%" HWADDR_PRIx "\n",
+                      __func__, s->engine_id, addr);
+        break;
+    }
+
+    trace_aspeed_adc_engine_read(s->engine_id, addr, value);
+    return value;
+}
+
+static void aspeed_adc_engine_write(void *opaque, hwaddr addr, uint64_t value,
+                                    unsigned int size)
+{
+    AspeedADCEngineState *s = ASPEED_ADC_ENGINE(opaque);
+    int reg = TO_REG(addr);
+    uint32_t init = 0;
+
+    trace_aspeed_adc_engine_write(s->engine_id, addr, value);
+
+    switch (reg) {
+    case ENGINE_CONTROL:
+        init = !!(value & ASPEED_ADC_ENGINE_EN);
+        init *= ASPEED_ADC_ENGINE_INIT;
+
+        value &= ~ASPEED_ADC_ENGINE_INIT;
+        value |= init;
+
+        value &= ~ASPEED_ADC_ENGINE_AUTO_COMP;
+        break;
+    case INTERRUPT_CONTROL:
+    case VGA_DETECT_CONTROL:
+    case CLOCK_CONTROL:
+        break;
+    case DATA_CHANNEL_9_AND_8 ... DATA_CHANNEL_15_AND_14:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "data register %u invalid, only 0...3 valid\n",
+                          __func__, s->engine_id, reg - DATA_CHANNEL_1_AND_0);
+            return;
+        }
+        /* fallthrough */
+    case BOUNDS_CHANNEL_8 ... BOUNDS_CHANNEL_15:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "bounds register %u invalid, only 0...7 valid\n",
+                          __func__, s->engine_id, reg - BOUNDS_CHANNEL_0);
+            return;
+        }
+        /* fallthrough */
+    case DATA_CHANNEL_1_AND_0 ... DATA_CHANNEL_7_AND_6:
+    case BOUNDS_CHANNEL_0 ... BOUNDS_CHANNEL_7:
+        value &= ASPEED_ADC_LH_MASK;
+        break;
+    case HYSTERESIS_CHANNEL_8 ... HYSTERESIS_CHANNEL_15:
+        if (s->nr_channels <= 8) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: engine[%u]: "
+                          "hysteresis register %u invalid, only 0...7 valid\n",
+                          __func__, s->engine_id, reg - HYSTERESIS_CHANNEL_0);
+            return;
+        }
+        /* fallthrough */
+    case HYSTERESIS_CHANNEL_0 ... HYSTERESIS_CHANNEL_7:
+        value &= (ASPEED_ADC_HYST_EN | ASPEED_ADC_LH_MASK);
+        break;
+    case INTERRUPT_SOURCE:
+        value &= 0xffff;
+        break;
+    case COMPENSATING_AND_TRIMMING:
+        value &= 0xf;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: engine[%u]: "
+                      "0x%" HWADDR_PRIx " 0x%" PRIx64 "\n",
+                      __func__, s->engine_id, addr, value);
+        break;
+    }
+
+    s->regs[reg] = value;
+}
+
+static const MemoryRegionOps aspeed_adc_engine_ops = {
+    .read = aspeed_adc_engine_read,
+    .write = aspeed_adc_engine_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 2,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+};
+
+static const uint32_t aspeed_adc_resets[ASPEED_ADC_NR_REGS] = {
+    [ENGINE_CONTROL]     = 0x00000000,
+    [INTERRUPT_CONTROL]  = 0x00000000,
+    [VGA_DETECT_CONTROL] = 0x0000000f,
+    [CLOCK_CONTROL]      = 0x0000000f,
+};
+
+static void aspeed_adc_engine_reset(DeviceState *dev)
+{
+    AspeedADCEngineState *s = ASPEED_ADC_ENGINE(dev);
+
+    memcpy(s->regs, aspeed_adc_resets, sizeof(aspeed_adc_resets));
+}
+
+static void aspeed_adc_engine_realize(DeviceState *dev, Error **errp)
+{
+    AspeedADCEngineState *s = ASPEED_ADC_ENGINE(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    g_autofree char *name = g_strdup_printf(TYPE_ASPEED_ADC_ENGINE ".%d",
+                                            s->engine_id);
+
+    assert(s->engine_id < 2);
+
+    sysbus_init_irq(sbd, &s->irq);
+
+    memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_adc_engine_ops, s, name,
+                          ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE);
+
+    sysbus_init_mmio(sbd, &s->mmio);
+}
+
+static const VMStateDescription vmstate_aspeed_adc_engine = {
+    .name = TYPE_ASPEED_ADC,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, AspeedADCEngineState, ASPEED_ADC_NR_REGS),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property aspeed_adc_engine_properties[] = {
+    DEFINE_PROP_UINT32("engine-id", AspeedADCEngineState, engine_id, 0),
+    DEFINE_PROP_UINT32("nr-channels", AspeedADCEngineState, nr_channels, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void aspeed_adc_engine_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = aspeed_adc_engine_realize;
+    dc->reset = aspeed_adc_engine_reset;
+    device_class_set_props(dc, aspeed_adc_engine_properties);
+    dc->desc = "Aspeed Analog-to-Digital Engine";
+    dc->vmsd = &vmstate_aspeed_adc_engine;
+}
+
+static const TypeInfo aspeed_adc_engine_info = {
+    .name = TYPE_ASPEED_ADC_ENGINE,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(AspeedADCEngineState),
+    .class_init = aspeed_adc_engine_class_init,
+};
+
+static void aspeed_adc_instance_init(Object *obj)
+{
+    AspeedADCState *s = ASPEED_ADC(obj);
+    AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(obj);
+    uint32_t nr_channels = ASPEED_ADC_NR_CHANNELS / aac->nr_engines;
+
+    for (int i = 0; i < aac->nr_engines; i++) {
+        AspeedADCEngineState *engine = &s->engines[i];
+        object_initialize_child(obj, "engine[*]", engine,
+                                TYPE_ASPEED_ADC_ENGINE);
+        qdev_prop_set_uint32(DEVICE(engine), "engine-id", i);
+        qdev_prop_set_uint32(DEVICE(engine), "nr-channels", nr_channels);
+    }
+}
+
+static void aspeed_adc_set_irq(void *opaque, int n, int level)
+{
+    AspeedADCState *s = opaque;
+    AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(s);
+    uint32_t pending = 0;
+
+    /* TODO: update Global IRQ status register on AST2600 (Need specs) */
+    for (int i = 0; i < aac->nr_engines; i++) {
+        uint32_t irq_status = s->engines[i].regs[INTERRUPT_CONTROL] & 0xFF;
+        pending |= irq_status << (i * 8);
+    }
+
+    qemu_set_irq(s->irq, !!pending);
+}
+
+static void aspeed_adc_realize(DeviceState *dev, Error **errp)
+{
+    AspeedADCState *s = ASPEED_ADC(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    AspeedADCClass *aac = ASPEED_ADC_GET_CLASS(dev);
+
+    qdev_init_gpio_in_named_with_opaque(DEVICE(sbd), aspeed_adc_set_irq,
+                                        s, NULL, aac->nr_engines);
+
+    sysbus_init_irq(sbd, &s->irq);
+
+    memory_region_init(&s->mmio, OBJECT(s), TYPE_ASPEED_ADC,
+                       ASPEED_ADC_MEMORY_REGION_SIZE);
+
+    sysbus_init_mmio(sbd, &s->mmio);
+
+    for (int i = 0; i < aac->nr_engines; i++) {
+        Object *eng = OBJECT(&s->engines[i]);
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(eng), errp)) {
+            return;
+        }
+        sysbus_connect_irq(SYS_BUS_DEVICE(eng), 0,
+                           qdev_get_gpio_in(DEVICE(sbd), i));
+        memory_region_add_subregion(&s->mmio,
+                                    i * ASPEED_ADC_ENGINE_MEMORY_REGION_SIZE,
+                                    &s->engines[i].mmio);
+    }
+}
+
+static void aspeed_adc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedADCClass *aac = ASPEED_ADC_CLASS(klass);
+
+    dc->realize = aspeed_adc_realize;
+    dc->desc = "Aspeed Analog-to-Digital Converter";
+    aac->nr_engines = 1;
+}
+
+static void aspeed_2600_adc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedADCClass *aac = ASPEED_ADC_CLASS(klass);
+
+    dc->desc = "ASPEED 2600 ADC Controller";
+    aac->nr_engines = 2;
+}
+
+static const TypeInfo aspeed_adc_info = {
+    .name = TYPE_ASPEED_ADC,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = aspeed_adc_instance_init,
+    .instance_size = sizeof(AspeedADCState),
+    .class_init = aspeed_adc_class_init,
+    .class_size = sizeof(AspeedADCClass),
+    .abstract   = true,
+};
+
+static const TypeInfo aspeed_2400_adc_info = {
+    .name = TYPE_ASPEED_2400_ADC,
+    .parent = TYPE_ASPEED_ADC,
+};
+
+static const TypeInfo aspeed_2500_adc_info = {
+    .name = TYPE_ASPEED_2500_ADC,
+    .parent = TYPE_ASPEED_ADC,
+};
+
+static const TypeInfo aspeed_2600_adc_info = {
+    .name = TYPE_ASPEED_2600_ADC,
+    .parent = TYPE_ASPEED_ADC,
+    .class_init = aspeed_2600_adc_class_init,
+};
+
+static void aspeed_adc_register_types(void)
+{
+    type_register_static(&aspeed_adc_engine_info);
+    type_register_static(&aspeed_adc_info);
+    type_register_static(&aspeed_2400_adc_info);
+    type_register_static(&aspeed_2500_adc_info);
+    type_register_static(&aspeed_2600_adc_info);
+}
+
+type_init(aspeed_adc_register_types);
diff --git a/hw/misc/max111x.c b/hw/adc/max111x.c
similarity index 99%
rename from hw/misc/max111x.c
rename to hw/adc/max111x.c
index 1b3234a5196..e8bf4cccd44 100644
--- a/hw/misc/max111x.c
+++ b/hw/adc/max111x.c
@@ -11,7 +11,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "hw/misc/max111x.h"
+#include "hw/adc/max111x.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "qemu/module.h"
diff --git a/hw/adc/meson.build b/hw/adc/meson.build
index 6ddee238139..b29ac7ccdf5 100644
--- a/hw/adc/meson.build
+++ b/hw/adc/meson.build
@@ -1,2 +1,5 @@
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_ADC', if_true: files('stm32f2xx_adc.c'))
+softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_adc.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_adc.c'))
+softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq-xadc.c'))
+softmmu_ss.add(when: 'CONFIG_MAX111X', if_true: files('max111x.c'))
diff --git a/hw/adc/trace-events b/hw/adc/trace-events
index 4c3279ece2c..5a4c444d773 100644
--- a/hw/adc/trace-events
+++ b/hw/adc/trace-events
@@ -1,5 +1,8 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # npcm7xx_adc.c
 npcm7xx_adc_read(const char *id, uint64_t offset, uint32_t value) " %s offset: 0x%04" PRIx64 " value 0x%04" PRIx32
 npcm7xx_adc_write(const char *id, uint64_t offset, uint32_t value) "%s offset: 0x%04" PRIx64 " value 0x%04" PRIx32
+
+aspeed_adc_engine_read(uint32_t engine_id, uint64_t addr, uint64_t value) "engine[%u] 0x%" PRIx64 " 0x%" PRIx64
+aspeed_adc_engine_write(uint32_t engine_id, uint64_t addr, uint64_t value) "engine[%u] 0x%" PRIx64 " 0x%" PRIx64
diff --git a/hw/misc/zynq-xadc.c b/hw/adc/zynq-xadc.c
similarity index 99%
rename from hw/misc/zynq-xadc.c
rename to hw/adc/zynq-xadc.c
index 7b1972ce063..cfc7bab0651 100644
--- a/hw/misc/zynq-xadc.c
+++ b/hw/adc/zynq-xadc.c
@@ -15,7 +15,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/irq.h"
-#include "hw/misc/zynq-xadc.h"
+#include "hw/adc/zynq-xadc.h"
 #include "migration/vmstate.h"
 #include "qemu/timer.h"
 #include "qemu/log.h"
diff --git a/hw/alpha/Kconfig b/hw/alpha/Kconfig
index 15c59ff2645..9af650c94ec 100644
--- a/hw/alpha/Kconfig
+++ b/hw/alpha/Kconfig
@@ -3,9 +3,7 @@ config DP264
     imply PCI_DEVICES
     imply TEST_DEVICES
     imply E1000_PCI
-    select I82374
-    select I8254
-    select I8259
+    select I82378
     select IDE_CMD646
     select MC146818RTC
     select PCI
diff --git a/hw/alpha/alpha_sys.h b/hw/alpha/alpha_sys.h
index e2c02e2bbe1..2263e821da5 100644
--- a/hw/alpha/alpha_sys.h
+++ b/hw/alpha/alpha_sys.h
@@ -10,8 +10,8 @@
 #include "hw/intc/i8259.h"
 
 
-PCIBus *typhoon_init(MemoryRegion *, ISABus **, qemu_irq *, AlphaCPU *[4],
-                     pci_map_irq_fn);
+PCIBus *typhoon_init(MemoryRegion *, qemu_irq *, qemu_irq *, AlphaCPU *[4],
+                     pci_map_irq_fn, uint8_t devfn_min);
 
 /* alpha_pci.c.  */
 extern const MemoryRegionOps alpha_pci_ignore_ops;
diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c
index c8e300d93f6..c78ed96d0ec 100644
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -13,12 +13,9 @@
 #include "hw/loader.h"
 #include "alpha_sys.h"
 #include "qemu/error-report.h"
-#include "sysemu/sysemu.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/ide/pci.h"
-#include "hw/timer/i8254.h"
 #include "hw/isa/superio.h"
-#include "hw/dma/i8257.h"
 #include "net/net.h"
 #include "qemu/cutils.h"
 #include "qemu/datadir.h"
@@ -59,8 +56,10 @@ static void clipper_init(MachineState *machine)
     AlphaCPU *cpus[4];
     PCIBus *pci_bus;
     PCIDevice *pci_dev;
+    DeviceState *i82378_dev;
     ISABus *isa_bus;
     qemu_irq rtc_irq;
+    qemu_irq isa_irq;
     long size, i;
     char *palcode_filename;
     uint64_t palcode_entry;
@@ -73,19 +72,57 @@ static void clipper_init(MachineState *machine)
         cpus[i] = ALPHA_CPU(cpu_create(machine->cpu_type));
     }
 
+    /*
+     * arg0 -> memory size
+     * arg1 -> kernel entry point
+     * arg2 -> config word
+     *
+     * Config word: bits 0-5 -> ncpus
+     *              bit  6   -> nographics option (for HWRPB CTB)
+     *
+     * See init_hwrpb() in the PALcode.
+     */
     cpus[0]->env.trap_arg0 = ram_size;
     cpus[0]->env.trap_arg1 = 0;
-    cpus[0]->env.trap_arg2 = smp_cpus;
-
-    /* Init the chipset.  */
-    pci_bus = typhoon_init(machine->ram, &isa_bus, &rtc_irq, cpus,
-                           clipper_pci_map_irq);
+    cpus[0]->env.trap_arg2 = smp_cpus | (!machine->enable_graphics << 6);
+
+    /*
+     * Init the chipset.  Because we're using CLIPPER IRQ mappings,
+     * the minimum PCI device IdSel is 1.
+     */
+    pci_bus = typhoon_init(machine->ram, &isa_irq, &rtc_irq, cpus,
+                           clipper_pci_map_irq, PCI_DEVFN(1, 0));
+
+    /*
+     * Init the PCI -> ISA bridge.
+     *
+     * Technically, PCI-based Alphas shipped with one of three different
+     * PCI-ISA bridges:
+     *
+     * - Intel i82378 SIO
+     * - Cypress CY82c693UB
+     * - ALI M1533
+     *
+     * (An Intel i82375 PCI-EISA bridge was also used on some models.)
+     *
+     * For simplicity, we model an i82378 here, even though it wouldn't
+     * have been on any Tsunami/Typhoon systems; it's close enough, and
+     * we don't want to deal with modelling the CY82c693UB (which has
+     * incompatible edge/level control registers, plus other peripherals
+     * like IDE and USB) or the M1533 (which also has IDE and USB).
+     *
+     * Importantly, we need to provide a PCI device node for it, otherwise
+     * some operating systems won't notice there's an ISA bus to configure.
+     */
+    i82378_dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(7, 0), "i82378"));
+    isa_bus = ISA_BUS(qdev_get_child_bus(i82378_dev, "isa.0"));
+
+    /* Connect the ISA PIC to the Typhoon IRQ used for ISA interrupts. */
+    qdev_connect_gpio_out(i82378_dev, 0, isa_irq);
 
     /* Since we have an SRM-compatible PALcode, use the SRM epoch.  */
     mc146818_rtc_init(isa_bus, 1900, rtc_irq);
 
-    i8254_pit_init(isa_bus, 0x40, 0, NULL);
-
     /* VGA setup.  Don't bother loading the bios.  */
     pci_vga_init(pci_bus);
 
@@ -94,9 +131,6 @@ static void clipper_init(MachineState *machine)
         pci_nic_init_nofail(&nd_table[i], pci_bus, "e1000", NULL);
     }
 
-    /* 2 82C37 (dma) */
-    isa_create_simple(isa_bus, "i82374");
-
     /* Super I/O */
     isa_create_simple(isa_bus, TYPE_SMC37C669_SUPERIO);
 
diff --git a/hw/alpha/trace-events b/hw/alpha/trace-events
index 5b8315f27fc..952a8164075 100644
--- a/hw/alpha/trace-events
+++ b/hw/alpha/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
 alpha_pci_iack_write(void) ""
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index a42b3198121..bd39c8ca862 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -11,10 +11,8 @@
 #include "qemu/units.h"
 #include "qapi/error.h"
 #include "cpu.h"
-#include "hw/boards.h"
 #include "hw/irq.h"
 #include "alpha_sys.h"
-#include "exec/address-spaces.h"
 #include "qom/object.h"
 
 
@@ -816,8 +814,9 @@ static void typhoon_alarm_timer(void *opaque)
     cpu_interrupt(CPU(s->cchip.cpu[cpu]), CPU_INTERRUPT_TIMER);
 }
 
-PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq,
-                     AlphaCPU *cpus[4], pci_map_irq_fn sys_map_irq)
+PCIBus *typhoon_init(MemoryRegion *ram, qemu_irq *p_isa_irq,
+                     qemu_irq *p_rtc_irq, AlphaCPU *cpus[4],
+                     pci_map_irq_fn sys_map_irq, uint8_t devfn_min)
 {
     MemoryRegion *addr_space = get_system_memory();
     DeviceState *dev;
@@ -845,6 +844,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq,
         }
     }
 
+    *p_isa_irq = qemu_allocate_irq(typhoon_set_isa_irq, s, 0);
     *p_rtc_irq = qemu_allocate_irq(typhoon_set_timer_irq, s, 0);
 
     /* Main memory region, 0x00.0000.0000.  Real hardware supports 32GB,
@@ -887,7 +887,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq,
     b = pci_register_root_bus(dev, "pci",
                               typhoon_set_irq, sys_map_irq, s,
                               &s->pchip.reg_mem, &s->pchip.reg_io,
-                              0, 64, TYPE_PCI_BUS);
+                              devfn_min, 64, TYPE_PCI_BUS);
     phb->bus = b;
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
@@ -920,18 +920,6 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus **isa_bus, qemu_irq *p_rtc_irq,
     /* Pchip1 PCI I/O, 0x802.FC00.0000, 32MB.  */
     /* Pchip1 PCI configuration, 0x802.FE00.0000, 16MB.  */
 
-    /* Init the ISA bus.  */
-    /* ??? Technically there should be a cy82c693ub pci-isa bridge.  */
-    {
-        qemu_irq *isa_irqs;
-
-        *isa_bus = isa_bus_new(NULL, get_system_memory(), &s->pchip.reg_io,
-                               &error_abort);
-        isa_irqs = i8259_init(*isa_bus,
-                              qemu_allocate_irq(typhoon_set_isa_irq, s, 0));
-        isa_bus_irqs(*isa_bus, isa_irqs);
-    }
-
     return b;
 }
 
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 0cc28e3ea55..5a06777cff6 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -6,6 +6,7 @@ config ARM_VIRT
     imply VFIO_PLATFORM
     imply VFIO_XGMAC
     imply TPM_TIS_SYSBUS
+    imply NVDIMM
     select ARM_GIC
     select ACPI
     select ARM_SMMUV3
@@ -24,9 +25,7 @@ config ARM_VIRT
     select ACPI_PCI
     select MEM_DEVICE
     select DIMM
-    select ACPI_MEMORY_HOTPLUG
     select ACPI_HW_REDUCED
-    select ACPI_NVDIMM
     select ACPI_APEI
 
 config CHEETAH
@@ -142,6 +141,7 @@ config PXA2XX
     select SD
     select SSI
     select USB_OHCI
+    select PCMCIA
 
 config GUMSTIX
     bool
@@ -235,8 +235,13 @@ config STELLARIS
     select SSI_SD
     select STELLARIS_INPUT
     select STELLARIS_ENET # ethernet
+    select STELLARIS_GPTM # general purpose timer module
     select UNIMP
 
+config STM32VLDISCOVERY
+    bool
+    select STM32F100_SOC
+
 config STRONGARM
     bool
     select PXA2XX
@@ -294,7 +299,10 @@ config ZYNQ
 
 config ARM_V7M
     bool
+    # currently v7M must be included in a TCG build due to translate.c
+    default y if TCG && (ARM || AARCH64)
     select PTIMER
+    select ARM_COMPATIBLE_SEMIHOSTING
 
 config ALLWINNER_A10
     bool
@@ -324,6 +332,12 @@ config RASPI
     select SDHCI
     select USB_DWC2
 
+config STM32F100_SOC
+    bool
+    select ARM_V7M
+    select STM32F2XX_USART
+    select STM32F2XX_SPI
+
 config STM32F205_SOC
     bool
     select ARM_V7M
@@ -366,16 +380,24 @@ config XLNX_VERSAL
     select UNIMP
     select XLNX_ZDMA
     select XLNX_ZYNQMP
+    select OR_IRQ
+    select XLNX_BBRAM
+    select XLNX_EFUSE_VERSAL
 
 config NPCM7XX
     bool
     select A9MPCORE
+    select ADM1272
     select ARM_GIC
+    select SMBUS
     select AT24C  # EEPROM
+    select MAX34451
     select PL310  # cache controller
+    select PMBUS
     select SERIAL
     select SSI
     select UNIMP
+    select PCA954X
 
 config FSL_IMX25
     bool
@@ -383,7 +405,7 @@ config FSL_IMX25
     select IMX_FEC
     select IMX_I2C
     select WDT_IMX2
-    select DS1338
+    select SDHCI
 
 config FSL_IMX31
     bool
@@ -408,9 +430,11 @@ config ASPEED_SOC
     select DS1338
     select FTGMAC100
     select I2C
+    select DPS310
     select PCA9552
     select SERIAL
     select SMBUS_EEPROM
+    select PCA954X
     select SSI
     select SSI_M25P80
     select TMP105
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index d404f31e02f..05e84728cb3 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -16,10 +16,8 @@
  */
 
 #include "qemu/osdep.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/sysbus.h"
 #include "hw/arm/allwinner-a10.h"
 #include "hw/misc/unimp.h"
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c
index 88259a9c0d1..f9b7ed18711 100644
--- a/hw/arm/allwinner-h3.c
+++ b/hw/arm/allwinner-h3.c
@@ -18,13 +18,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/units.h"
 #include "hw/qdev-core.h"
-#include "cpu.h"
 #include "hw/sysbus.h"
 #include "hw/char/serial.h"
 #include "hw/misc/unimp.h"
@@ -239,7 +237,7 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp)
 
         /* Provide Power State Coordination Interface */
         qdev_prop_set_int32(DEVICE(&s->cpus[i]), "psci-conduit",
-                            QEMU_PSCI_CONDUIT_HVC);
+                            QEMU_PSCI_CONDUIT_SMC);
 
         /* Disable secondary CPUs */
         qdev_prop_set_bit(DEVICE(&s->cpus[i]), "start-powered-off",
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
index 2e5d0679e7b..aecdeb9815a 100644
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@@ -13,6 +13,7 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/bitops.h"
+#include "qemu/units.h"
 #include "qapi/error.h"
 #include "trace.h"
 #include "hw/sysbus.h"
@@ -59,6 +60,7 @@ struct ARMSSEInfo {
     const char *cpu_type;
     uint32_t sse_version;
     int sram_banks;
+    uint32_t sram_bank_base;
     int num_cpus;
     uint32_t sys_version;
     uint32_t iidr;
@@ -69,6 +71,7 @@ struct ARMSSEInfo {
     bool has_cpuid;
     bool has_cpu_pwrctrl;
     bool has_sse_counter;
+    bool has_tcms;
     Property *props;
     const ARMSSEDeviceInfo *devinfo;
     const bool *irq_is_common;
@@ -102,7 +105,7 @@ static Property sse300_properties[] = {
     DEFINE_PROP_LINK("memory", ARMSSE, board_memory, TYPE_MEMORY_REGION,
                      MemoryRegion *),
     DEFINE_PROP_UINT32("EXP_NUMIRQ", ARMSSE, exp_numirq, 64),
-    DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 15),
+    DEFINE_PROP_UINT32("SRAM_ADDR_WIDTH", ARMSSE, sram_addr_width, 18),
     DEFINE_PROP_UINT32("init-svtor", ARMSSE, init_svtor, 0x10000000),
     DEFINE_PROP_BOOL("CPU0_FPU", ARMSSE, cpu_fpu[0], true),
     DEFINE_PROP_BOOL("CPU0_DSP", ARMSSE, cpu_dsp[0], true),
@@ -504,6 +507,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .sse_version = ARMSSE_IOTKIT,
         .cpu_type = ARM_CPU_TYPE_NAME("cortex-m33"),
         .sram_banks = 1,
+        .sram_bank_base = 0x20000000,
         .num_cpus = 1,
         .sys_version = 0x41743,
         .iidr = 0,
@@ -514,6 +518,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .has_cpuid = false,
         .has_cpu_pwrctrl = false,
         .has_sse_counter = false,
+        .has_tcms = false,
         .props = iotkit_properties,
         .devinfo = iotkit_devices,
         .irq_is_common = sse200_irq_is_common,
@@ -523,6 +528,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .sse_version = ARMSSE_SSE200,
         .cpu_type = ARM_CPU_TYPE_NAME("cortex-m33"),
         .sram_banks = 4,
+        .sram_bank_base = 0x20000000,
         .num_cpus = 2,
         .sys_version = 0x22041743,
         .iidr = 0,
@@ -533,6 +539,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .has_cpuid = true,
         .has_cpu_pwrctrl = false,
         .has_sse_counter = false,
+        .has_tcms = false,
         .props = sse200_properties,
         .devinfo = sse200_devices,
         .irq_is_common = sse200_irq_is_common,
@@ -542,6 +549,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .sse_version = ARMSSE_SSE300,
         .cpu_type = ARM_CPU_TYPE_NAME("cortex-m55"),
         .sram_banks = 2,
+        .sram_bank_base = 0x21000000,
         .num_cpus = 1,
         .sys_version = 0x7e00043b,
         .iidr = 0x74a0043b,
@@ -552,6 +560,7 @@ static const ARMSSEInfo armsse_variants[] = {
         .has_cpuid = true,
         .has_cpu_pwrctrl = true,
         .has_sse_counter = true,
+        .has_tcms = true,
         .props = sse300_properties,
         .devinfo = sse300_devices,
         .irq_is_common = sse300_irq_is_common,
@@ -680,17 +689,6 @@ static void armsse_forward_sec_resp_cfg(ARMSSE *s)
     qdev_connect_gpio_out(dev_splitter, 2, s->sec_resp_cfg_in);
 }
 
-static void armsse_mainclk_update(void *opaque, ClockEvent event)
-{
-    ARMSSE *s = ARM_SSE(opaque);
-
-    /*
-     * Set system_clock_scale from our Clock input; this is what
-     * controls the tick rate of the CPU SysTick timer.
-     */
-    system_clock_scale = clock_ticks_to_ns(s->mainclk, 1);
-}
-
 static void armsse_init(Object *obj)
 {
     ARMSSE *s = ARM_SSE(obj);
@@ -702,8 +700,7 @@ static void armsse_init(Object *obj)
     assert(info->sram_banks <= MAX_SRAM_BANKS);
     assert(info->num_cpus <= SSE_MAX_CPUS);
 
-    s->mainclk = qdev_init_clock_in(DEVICE(s), "MAINCLK",
-                                    armsse_mainclk_update, s, ClockUpdate);
+    s->mainclk = qdev_init_clock_in(DEVICE(s), "MAINCLK", NULL, NULL, 0);
     s->s32kclk = qdev_init_clock_in(DEVICE(s), "S32KCLK", NULL, NULL, 0);
 
     memory_region_init(&s->container, obj, "armsse-container", UINT64_MAX);
@@ -909,7 +906,6 @@ static void armsse_realize(DeviceState *dev, Error **errp)
     const ARMSSEDeviceInfo *devinfo;
     int i;
     MemoryRegion *mr;
-    Error *err = NULL;
     SysBusDevice *sbd_apb_ppc0;
     SysBusDevice *sbd_secctl;
     DeviceState *dev_apb_ppc0;
@@ -918,6 +914,8 @@ static void armsse_realize(DeviceState *dev, Error **errp)
     DeviceState *dev_splitter;
     uint32_t addr_width_max;
 
+    ERRP_GUARD();
+
     if (!s->board_memory) {
         error_setg(errp, "memory property was not set");
         return;
@@ -985,6 +983,9 @@ static void armsse_realize(DeviceState *dev, Error **errp)
         int j;
         char *gpioname;
 
+        qdev_connect_clock_in(cpudev, "cpuclk", s->mainclk);
+        /* The SSE subsystems do not wire up a systick refclk */
+
         qdev_prop_set_uint32(cpudev, "num-irq", s->exp_numirq + NUM_SSE_IRQS);
         /*
          * In real hardware the initial Secure VTOR is set from the INITSVTOR*
@@ -1147,10 +1148,9 @@ static void armsse_realize(DeviceState *dev, Error **errp)
         uint32_t sram_bank_size = 1 << s->sram_addr_width;
 
         memory_region_init_ram(&s->sram[i], NULL, ramname,
-                               sram_bank_size, &err);
+                               sram_bank_size, errp);
         g_free(ramname);
-        if (err) {
-            error_propagate(errp, err);
+        if (*errp) {
             return;
         }
         object_property_set_link(OBJECT(&s->mpc[i]), "downstream",
@@ -1161,7 +1161,7 @@ static void armsse_realize(DeviceState *dev, Error **errp)
         /* Map the upstream end of the MPC into the right place... */
         sbd_mpc = SYS_BUS_DEVICE(&s->mpc[i]);
         memory_region_add_subregion(&s->container,
-                                    0x20000000 + i * sram_bank_size,
+                                    info->sram_bank_base + i * sram_bank_size,
                                     sysbus_mmio_get_region(sbd_mpc, 1));
         /* ...and its register interface */
         memory_region_add_subregion(&s->container, 0x50083000 + i * 0x1000,
@@ -1210,6 +1210,20 @@ static void armsse_realize(DeviceState *dev, Error **errp)
                                     sysbus_mmio_get_region(sbd, 1));
     }
 
+    if (info->has_tcms) {
+        /* The SSE-300 has an ITCM at 0x0000_0000 and a DTCM at 0x2000_0000 */
+        memory_region_init_ram(&s->itcm, NULL, "sse300-itcm", 512 * KiB, errp);
+        if (*errp) {
+            return;
+        }
+        memory_region_init_ram(&s->dtcm, NULL, "sse300-dtcm", 512 * KiB, errp);
+        if (*errp) {
+            return;
+        }
+        memory_region_add_subregion(&s->container, 0x00000000, &s->itcm);
+        memory_region_add_subregion(&s->container, 0x20000000, &s->dtcm);
+    }
+
     /* Devices behind APB PPC0:
      *   0x40000000: timer0
      *   0x40001000: timer1
@@ -1628,9 +1642,6 @@ static void armsse_realize(DeviceState *dev, Error **errp)
      * devices in the ARMSSE.
      */
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->container);
-
-    /* Set initial system_clock_scale from MAINCLK */
-    armsse_mainclk_update(s, ClockUpdate);
 }
 
 static void armsse_idau_check(IDAUInterface *ii, uint32_t address,
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 6dd10d84703..8d08db80be8 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -10,17 +10,18 @@
 #include "qemu/osdep.h"
 #include "hw/arm/armv7m.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/sysbus.h"
 #include "hw/arm/boot.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
 #include "elf.h"
 #include "sysemu/reset.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
-#include "exec/address-spaces.h"
+#include "qemu/log.h"
 #include "target/arm/idau.h"
+#include "migration/vmstate.h"
 
 /* Bitbanded IO.  Each word corresponds to a single bit.  */
 
@@ -126,6 +127,122 @@ static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
     0x22000000, 0x42000000
 };
 
+static MemTxResult v7m_sysreg_ns_write(void *opaque, hwaddr addr,
+                                       uint64_t value, unsigned size,
+                                       MemTxAttrs attrs)
+{
+    MemoryRegion *mr = opaque;
+
+    if (attrs.secure) {
+        /* S accesses to the alias act like NS accesses to the real region */
+        attrs.secure = 0;
+        return memory_region_dispatch_write(mr, addr, value,
+                                            size_memop(size) | MO_TE, attrs);
+    } else {
+        /* NS attrs are RAZ/WI for privileged, and BusFault for user */
+        if (attrs.user) {
+            return MEMTX_ERROR;
+        }
+        return MEMTX_OK;
+    }
+}
+
+static MemTxResult v7m_sysreg_ns_read(void *opaque, hwaddr addr,
+                                      uint64_t *data, unsigned size,
+                                      MemTxAttrs attrs)
+{
+    MemoryRegion *mr = opaque;
+
+    if (attrs.secure) {
+        /* S accesses to the alias act like NS accesses to the real region */
+        attrs.secure = 0;
+        return memory_region_dispatch_read(mr, addr, data,
+                                           size_memop(size) | MO_TE, attrs);
+    } else {
+        /* NS attrs are RAZ/WI for privileged, and BusFault for user */
+        if (attrs.user) {
+            return MEMTX_ERROR;
+        }
+        *data = 0;
+        return MEMTX_OK;
+    }
+}
+
+static const MemoryRegionOps v7m_sysreg_ns_ops = {
+    .read_with_attrs = v7m_sysreg_ns_read,
+    .write_with_attrs = v7m_sysreg_ns_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static MemTxResult v7m_systick_write(void *opaque, hwaddr addr,
+                                     uint64_t value, unsigned size,
+                                     MemTxAttrs attrs)
+{
+    ARMv7MState *s = opaque;
+    MemoryRegion *mr;
+
+    /* Direct the access to the correct systick */
+    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
+    return memory_region_dispatch_write(mr, addr, value,
+                                        size_memop(size) | MO_TE, attrs);
+}
+
+static MemTxResult v7m_systick_read(void *opaque, hwaddr addr,
+                                    uint64_t *data, unsigned size,
+                                    MemTxAttrs attrs)
+{
+    ARMv7MState *s = opaque;
+    MemoryRegion *mr;
+
+    /* Direct the access to the correct systick */
+    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
+    return memory_region_dispatch_read(mr, addr, data, size_memop(size) | MO_TE,
+                                       attrs);
+}
+
+static const MemoryRegionOps v7m_systick_ops = {
+    .read_with_attrs = v7m_systick_read,
+    .write_with_attrs = v7m_systick_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+/*
+ * Unassigned portions of the PPB space are RAZ/WI for privileged
+ * accesses, and fault for non-privileged accesses.
+ */
+static MemTxResult ppb_default_read(void *opaque, hwaddr addr,
+                                    uint64_t *data, unsigned size,
+                                    MemTxAttrs attrs)
+{
+    qemu_log_mask(LOG_UNIMP, "Read of unassigned area of PPB: offset 0x%x\n",
+                  (uint32_t)addr);
+    if (attrs.user) {
+        return MEMTX_ERROR;
+    }
+    *data = 0;
+    return MEMTX_OK;
+}
+
+static MemTxResult ppb_default_write(void *opaque, hwaddr addr,
+                                     uint64_t value, unsigned size,
+                                     MemTxAttrs attrs)
+{
+    qemu_log_mask(LOG_UNIMP, "Write of unassigned area of PPB: offset 0x%x\n",
+                  (uint32_t)addr);
+    if (attrs.user) {
+        return MEMTX_ERROR;
+    }
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps ppb_default_ops = {
+    .read_with_attrs = ppb_default_read,
+    .write_with_attrs = ppb_default_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+};
+
 static void armv7m_instance_init(Object *obj)
 {
     ARMv7MState *s = ARMV7M(obj);
@@ -139,10 +256,20 @@ static void armv7m_instance_init(Object *obj)
     object_property_add_alias(obj, "num-irq",
                               OBJECT(&s->nvic), "num-irq");
 
+    object_initialize_child(obj, "systick-reg-ns", &s->systick[M_REG_NS],
+                            TYPE_SYSTICK);
+    /*
+     * We can't initialize the secure systick here, as we don't know
+     * yet if we need it.
+     */
+
     for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
         object_initialize_child(obj, "bitband[*]", &s->bitband[i],
                                 TYPE_BITBAND);
     }
+
+    s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk", NULL, NULL, 0);
+    s->cpuclk = qdev_init_clock_in(DEVICE(obj), "cpuclk", NULL, NULL, 0);
 }
 
 static void armv7m_realize(DeviceState *dev, Error **errp)
@@ -178,6 +305,12 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
             return;
         }
     }
+    if (object_property_find(OBJECT(s->cpu), "init-nsvtor")) {
+        if (!object_property_set_uint(OBJECT(s->cpu), "init-nsvtor",
+                                      s->init_nsvtor, errp)) {
+            return;
+        }
+    }
     if (object_property_find(OBJECT(s->cpu), "start-powered-off")) {
         if (!object_property_set_bool(OBJECT(s->cpu), "start-powered-off",
                                       s->start_powered_off, errp)) {
@@ -219,13 +352,130 @@ static void armv7m_realize(DeviceState *dev, Error **errp)
     qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
     qdev_pass_gpios(DEVICE(&s->nvic), dev, "NMI");
 
+    /*
+     * We map various devices into the container MR at their architected
+     * addresses. In particular, we map everything corresponding to the
+     * "System PPB" space. This is the range from 0xe0000000 to 0xe00fffff
+     * and includes the NVIC, the System Control Space (system registers),
+     * the systick timer, and for CPUs with the Security extension an NS
+     * banked version of all of these.
+     *
+     * The default behaviour for unimplemented registers/ranges
+     * (for instance the Data Watchpoint and Trace unit at 0xe0001000)
+     * is to RAZ/WI for privileged access and BusFault for non-privileged
+     * access.
+     *
+     * The NVIC and System Control Space (SCS) starts at 0xe000e000
+     * and looks like this:
+     *  0x004 - ICTR
+     *  0x010 - 0xff - systick
+     *  0x100..0x7ec - NVIC
+     *  0x7f0..0xcff - Reserved
+     *  0xd00..0xd3c - SCS registers
+     *  0xd40..0xeff - Reserved or Not implemented
+     *  0xf00 - STIR
+     *
+     * Some registers within this space are banked between security states.
+     * In v8M there is a second range 0xe002e000..0xe002efff which is the
+     * NonSecure alias SCS; secure accesses to this behave like NS accesses
+     * to the main SCS range, and non-secure accesses (including when
+     * the security extension is not implemented) are RAZ/WI.
+     * Note that both the main SCS range and the alias range are defined
+     * to be exempt from memory attribution (R_BLJT) and so the memory
+     * transaction attribute always matches the current CPU security
+     * state (attrs.secure == env->v7m.secure). In the v7m_sysreg_ns_ops
+     * wrappers we change attrs.secure to indicate the NS access; so
+     * generally code determining which banked register to use should
+     * use attrs.secure; code determining actual behaviour of the system
+     * should use env->v7m.secure.
+     *
+     * Within the PPB space, some MRs overlap, and the priority
+     * of overlapping regions is:
+     *  - default region (for RAZ/WI and BusFault) : -1
+     *  - system register regions (provided by the NVIC) : 0
+     *  - systick : 1
+     * This is because the systick device is a small block of registers
+     * in the middle of the other system control registers.
+     */
+
+    memory_region_init_io(&s->defaultmem, OBJECT(s), &ppb_default_ops, s,
+                          "nvic-default", 0x100000);
+    memory_region_add_subregion_overlap(&s->container, 0xe0000000,
+                                        &s->defaultmem, -1);
+
     /* Wire the NVIC up to the CPU */
     sbd = SYS_BUS_DEVICE(&s->nvic);
     sysbus_connect_irq(sbd, 0,
                        qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
 
-    memory_region_add_subregion(&s->container, 0xe0000000,
+    memory_region_add_subregion(&s->container, 0xe000e000,
                                 sysbus_mmio_get_region(sbd, 0));
+    if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) {
+        /* Create the NS alias region for the NVIC sysregs */
+        memory_region_init_io(&s->sysreg_ns_mem, OBJECT(s),
+                              &v7m_sysreg_ns_ops,
+                              sysbus_mmio_get_region(sbd, 0),
+                              "nvic_sysregs_ns", 0x1000);
+        memory_region_add_subregion(&s->container, 0xe002e000,
+                                    &s->sysreg_ns_mem);
+    }
+
+    /* Create and map the systick devices */
+    qdev_connect_clock_in(DEVICE(&s->systick[M_REG_NS]), "refclk", s->refclk);
+    qdev_connect_clock_in(DEVICE(&s->systick[M_REG_NS]), "cpuclk", s->cpuclk);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), errp)) {
+        return;
+    }
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), 0,
+                       qdev_get_gpio_in_named(DEVICE(&s->nvic),
+                                              "systick-trigger", M_REG_NS));
+
+    if (arm_feature(&s->cpu->env, ARM_FEATURE_M_SECURITY)) {
+        /*
+         * We couldn't init the secure systick device in instance_init
+         * as we didn't know then if the CPU had the security extensions;
+         * so we have to do it here.
+         */
+        object_initialize_child(OBJECT(dev), "systick-reg-s",
+                                &s->systick[M_REG_S], TYPE_SYSTICK);
+        qdev_connect_clock_in(DEVICE(&s->systick[M_REG_S]), "refclk",
+                              s->refclk);
+        qdev_connect_clock_in(DEVICE(&s->systick[M_REG_S]), "cpuclk",
+                              s->cpuclk);
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_S]), errp)) {
+            return;
+        }
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_S]), 0,
+                           qdev_get_gpio_in_named(DEVICE(&s->nvic),
+                                                  "systick-trigger", M_REG_S));
+    }
+
+    memory_region_init_io(&s->systickmem, OBJECT(s),
+                          &v7m_systick_ops, s,
+                          "v7m_systick", 0xe0);
+
+    memory_region_add_subregion_overlap(&s->container, 0xe000e010,
+                                        &s->systickmem, 1);
+    if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) {
+        memory_region_init_io(&s->systick_ns_mem, OBJECT(s),
+                              &v7m_sysreg_ns_ops, &s->systickmem,
+                              "v7m_systick_ns", 0xe0);
+        memory_region_add_subregion_overlap(&s->container, 0xe002e010,
+                                            &s->systick_ns_mem, 1);
+    }
+
+    /* If the CPU has RAS support, create the RAS register block */
+    if (cpu_isar_feature(aa32_ras, s->cpu)) {
+        object_initialize_child(OBJECT(dev), "armv7m-ras",
+                                &s->ras, TYPE_ARMV7M_RAS);
+        sbd = SYS_BUS_DEVICE(&s->ras);
+        if (!sysbus_realize(sbd, errp)) {
+            return;
+        }
+        memory_region_add_subregion_overlap(&s->container, 0xe0005000,
+                                            sysbus_mmio_get_region(sbd, 0), 1);
+    }
 
     for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
         if (s->enable_bitband) {
@@ -256,6 +506,7 @@ static Property armv7m_properties[] = {
                      MemoryRegion *),
     DEFINE_PROP_LINK("idau", ARMv7MState, idau, TYPE_IDAU_INTERFACE, Object *),
     DEFINE_PROP_UINT32("init-svtor", ARMv7MState, init_svtor, 0),
+    DEFINE_PROP_UINT32("init-nsvtor", ARMv7MState, init_nsvtor, 0),
     DEFINE_PROP_BOOL("enable-bitband", ARMv7MState, enable_bitband, false),
     DEFINE_PROP_BOOL("start-powered-off", ARMv7MState, start_powered_off,
                      false),
@@ -264,11 +515,23 @@ static Property armv7m_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static const VMStateDescription vmstate_armv7m = {
+    .name = "armv7m",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_CLOCK(refclk, SysTickState),
+        VMSTATE_CLOCK(cpuclk, SysTickState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void armv7m_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = armv7m_realize;
+    dc->vmsd = &vmstate_armv7m;
     device_class_set_props(dc, armv7m_properties);
 }
 
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index a17b75f4940..a77f46b3adb 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -11,20 +11,16 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
 #include "hw/arm/boot.h"
 #include "hw/arm/aspeed.h"
 #include "hw/arm/aspeed_soc.h"
-#include "hw/boards.h"
+#include "hw/i2c/i2c_mux_pca954x.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/misc/pca9552.h"
-#include "hw/misc/tmp105.h"
+#include "hw/sensor/tmp105.h"
 #include "hw/misc/led.h"
 #include "hw/qdev-properties.h"
-#include "qemu/log.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
 #include "hw/loader.h"
 #include "qemu/error-report.h"
 #include "qemu/units.h"
@@ -135,9 +131,37 @@ struct AspeedMachineState {
         SCU_HW_STRAP_VGA_SIZE_SET(VGA_64M_DRAM) |                       \
         SCU_AST2500_HW_STRAP_RESERVED1)
 
+/* FP5280G2 hardware value: 0XF100D286 */
+#define FP5280G2_BMC_HW_STRAP1 (                                      \
+        SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE |                     \
+        SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE |                        \
+        SCU_AST2500_HW_STRAP_UART_DEBUG |                               \
+        SCU_AST2500_HW_STRAP_RESERVED28 |                               \
+        SCU_AST2500_HW_STRAP_DDR4_ENABLE |                              \
+        SCU_HW_STRAP_VGA_CLASS_CODE |                                   \
+        SCU_HW_STRAP_LPC_RESET_PIN |                                    \
+        SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_MASTER) |                \
+        SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) |     \
+        SCU_HW_STRAP_MAC1_RGMII |                                       \
+        SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) |                       \
+        SCU_AST2500_HW_STRAP_RESERVED1)
+
 /* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */
 #define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1
 
+/* Quanta-Q71l hardware value */
+#define QUANTA_Q71L_BMC_HW_STRAP1 (                                     \
+        SCU_AST2400_HW_STRAP_DRAM_SIZE(DRAM_SIZE_128MB) |               \
+        SCU_AST2400_HW_STRAP_DRAM_CONFIG(2/* DDR3 with CL=6, CWL=5 */) | \
+        SCU_AST2400_HW_STRAP_ACPI_DIS |                                 \
+        SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(AST2400_CLK_24M_IN) |       \
+        SCU_HW_STRAP_VGA_CLASS_CODE |                                   \
+        SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_PASS_THROUGH) |          \
+        SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(AST2400_CPU_AHB_RATIO_2_1) | \
+        SCU_HW_STRAP_SPI_WIDTH |                                        \
+        SCU_HW_STRAP_VGA_SIZE_SET(VGA_8M_DRAM) |                        \
+        SCU_AST2400_HW_STRAP_BOOT_MODE(AST2400_SPI_BOOT))
+
 /* AST2600 evb hardware value */
 #define AST2600_EVB_HW_STRAP1 0x000000C0
 #define AST2600_EVB_HW_STRAP2 0x00000003
@@ -146,6 +170,14 @@ struct AspeedMachineState {
 #define TACOMA_BMC_HW_STRAP1  0x00000000
 #define TACOMA_BMC_HW_STRAP2  0x00000040
 
+/* Rainier hardware value: (QEMU prototype) */
+#define RAINIER_BMC_HW_STRAP1 0x00000000
+#define RAINIER_BMC_HW_STRAP2 0x00000000
+
+/* Fuji hardware value */
+#define FUJI_BMC_HW_STRAP1    0x00000000
+#define FUJI_BMC_HW_STRAP2    0x00000000
+
 /*
  * The max ram region is for firmwares that scan the address space
  * with load/store to guess how much RAM the SoC has.
@@ -257,18 +289,17 @@ static void aspeed_board_init_flashes(AspeedSMCState *s,
     int i ;
 
     for (i = 0; i < s->num_cs; ++i) {
-        AspeedSMCFlash *fl = &s->flashes[i];
         DriveInfo *dinfo = drive_get_next(IF_MTD);
         qemu_irq cs_line;
+        DeviceState *dev;
 
-        fl->flash = qdev_new(flashtype);
+        dev = qdev_new(flashtype);
         if (dinfo) {
-            qdev_prop_set_drive(fl->flash, "drive",
-                                blk_by_legacy_dinfo(dinfo));
+            qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
         }
-        qdev_realize_and_unref(fl->flash, BUS(s->spi), &error_fatal);
+        qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal);
 
-        cs_line = qdev_get_gpio_in_named(fl->flash, SSI_GPIO_CS, 0);
+        cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0);
         sysbus_connect_irq(SYS_BUS_DEVICE(s), i + 1, cs_line);
     }
 }
@@ -327,7 +358,7 @@ static void aspeed_machine_init(MachineState *machine)
     object_property_set_int(OBJECT(&bmc->soc), "num-cs", amc->num_cs,
                             &error_abort);
     object_property_set_link(OBJECT(&bmc->soc), "dram",
-                             OBJECT(&bmc->ram_container), &error_abort);
+                             OBJECT(machine->ram), &error_abort);
     if (machine->kernel_filename) {
         /*
          * When booting with a -kernel command line there is no u-boot
@@ -337,6 +368,8 @@ static void aspeed_machine_init(MachineState *machine)
         object_property_set_int(OBJECT(&bmc->soc), "hw-prot-key",
                                 ASPEED_SCU_PROT_KEY, &error_abort);
     }
+    qdev_prop_set_uint32(DEVICE(&bmc->soc), "uart-default",
+                         amc->uart_default);
     qdev_realize(DEVICE(&bmc->soc), NULL, &error_abort);
 
     memory_region_add_subregion(get_system_memory(),
@@ -358,6 +391,7 @@ static void aspeed_machine_init(MachineState *machine)
     if (drive0) {
         AspeedSMCFlash *fl = &bmc->soc.fmc.flashes[0];
         MemoryRegion *boot_rom = g_new(MemoryRegion, 1);
+        uint64_t size = memory_region_size(&fl->mmio);
 
         /*
          * create a ROM region using the default mapping window size of
@@ -367,15 +401,15 @@ static void aspeed_machine_init(MachineState *machine)
          */
         if (ASPEED_MACHINE(machine)->mmio_exec) {
             memory_region_init_alias(boot_rom, NULL, "aspeed.boot_rom",
-                                     &fl->mmio, 0, fl->size);
+                                     &fl->mmio, 0, size);
             memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR,
                                         boot_rom);
         } else {
             memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom",
-                                   fl->size, &error_abort);
+                                   size, &error_abort);
             memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR,
                                         boot_rom);
-            write_boot_rom(drive0, FIRMWARE_ADDR, fl->size, &error_abort);
+            write_boot_rom(drive0, FIRMWARE_ADDR, size, &error_abort);
         }
     }
 
@@ -411,6 +445,15 @@ static void aspeed_machine_init(MachineState *machine)
     arm_load_kernel(ARM_CPU(first_cpu), machine, &aspeed_board_binfo);
 }
 
+static void at24c_eeprom_init(I2CBus *bus, uint8_t addr, uint32_t rsize)
+{
+    I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
+    DeviceState *dev = DEVICE(i2c_dev);
+
+    qdev_prop_set_uint32(dev, "rom-size", rsize);
+    i2c_slave_realize_and_unref(i2c_dev, bus, &error_abort);
+}
+
 static void palmetto_bmc_i2c_init(AspeedMachineState *bmc)
 {
     AspeedSoCState *soc = &bmc->soc;
@@ -433,6 +476,38 @@ static void palmetto_bmc_i2c_init(AspeedMachineState *bmc)
     object_property_set_int(OBJECT(dev), "temperature3", 110000, &error_abort);
 }
 
+static void quanta_q71l_bmc_i2c_init(AspeedMachineState *bmc)
+{
+    AspeedSoCState *soc = &bmc->soc;
+
+    /*
+     * The quanta-q71l platform expects tmp75s which are compatible with
+     * tmp105s.
+     */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4c);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4e);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4f);
+
+    /* TODO: i2c-1: Add baseboard FRU eeprom@54 24c64 */
+    /* TODO: i2c-1: Add Frontpanel FRU eeprom@57 24c64 */
+    /* TODO: Add Memory Riser i2c mux and eeproms. */
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9546", 0x74);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), "pca9548", 0x77);
+
+    /* TODO: i2c-3: Add BIOS FRU eeprom@56 24c64 */
+
+    /* i2c-7 */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9546", 0x70);
+    /*        - i2c@0: pmbus@59 */
+    /*        - i2c@1: pmbus@58 */
+    /*        - i2c@2: pmbus@58 */
+    /*        - i2c@3: pmbus@59 */
+
+    /* TODO: i2c-7: Add PDB FRU eeprom@52 */
+    /* TODO: i2c-8: Add BMC FRU eeprom@50 */
+}
+
 static void ast2500_evb_i2c_init(AspeedMachineState *bmc)
 {
     AspeedSoCState *soc = &bmc->soc;
@@ -557,7 +632,6 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc)
 
     /* Bus 3: TODO bmp280@77 */
     /* Bus 3: TODO max31785@52 */
-    /* Bus 3: TODO dps310@76 */
     dev = DEVICE(i2c_slave_new(TYPE_PCA9552, 0x60));
     qdev_prop_set_string(dev, "description", "pca1");
     i2c_slave_realize_and_unref(I2C_SLAVE(dev),
@@ -572,6 +646,7 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState *bmc)
         qdev_connect_gpio_out(dev, pca1_leds[i].gpio_id,
                               qdev_get_gpio_in(DEVICE(led), 0));
     }
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 3), "dps310", 0x76);
     i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), "tmp423", 0x4c);
     i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), "tmp423", 0x4c);
 
@@ -629,6 +704,215 @@ static void g220a_bmc_i2c_init(AspeedMachineState *bmc)
                           eeprom_buf);
 }
 
+static void aspeed_eeprom_init(I2CBus *bus, uint8_t addr, uint32_t rsize)
+{
+    I2CSlave *i2c_dev = i2c_slave_new("at24c-eeprom", addr);
+    DeviceState *dev = DEVICE(i2c_dev);
+
+    qdev_prop_set_uint32(dev, "rom-size", rsize);
+    i2c_slave_realize_and_unref(i2c_dev, bus, &error_abort);
+}
+
+static void fp5280g2_bmc_i2c_init(AspeedMachineState *bmc)
+{
+    AspeedSoCState *soc = &bmc->soc;
+    I2CSlave *i2c_mux;
+
+    /* The at24c256 */
+    at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 1), 0x50, 32768);
+
+    /* The fp5280g2 expects a TMP112 but a TMP105 is compatible */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2), TYPE_TMP105,
+                     0x49);
+
+    i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 2),
+                     "pca9546", 0x70);
+    /* It expects a TMP112 but a TMP105 is compatible */
+    i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 0), TYPE_TMP105,
+                     0x4a);
+
+    /* It expects a ds3232 but a ds1338 is good enough */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), "ds1338", 0x68);
+
+    /* It expects a pca9555 but a pca9552 is compatible */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_PCA9552,
+                     0x20);
+}
+
+static void rainier_bmc_i2c_init(AspeedMachineState *bmc)
+{
+    AspeedSoCState *soc = &bmc->soc;
+    I2CSlave *i2c_mux;
+
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 0), 0x51, 32 * KiB);
+
+    /* The rainier expects a TMP275 but a TMP105 is compatible */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105,
+                     0x49);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4), TYPE_TMP105,
+                     0x4a);
+    i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 4),
+                                      "pca9546", 0x70);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 0x52, 64 * KiB);
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5), TYPE_TMP105,
+                     0x49);
+    i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 5),
+                                      "pca9546", 0x70);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB);
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105,
+                     0x4a);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), TYPE_TMP105,
+                     0x4b);
+    i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6),
+                                      "pca9546", 0x70);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 2), 0x50, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 3), 0x51, 64 * KiB);
+
+    /* Bus 7: TODO max31785@52 */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "pca9552", 0x61);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), "dps310", 0x76);
+    /* Bus 7: TODO si7021-a20@20 */
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), TYPE_TMP105,
+                     0x48);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 7), 0x50, 64 * KiB);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 7), 0x51, 64 * KiB);
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), TYPE_TMP105,
+                     0x4a);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 8), 0x50, 64 * KiB);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51, 64 * KiB);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), "pca9552", 0x61);
+    /* Bus 8: ucd90320@11 */
+    /* Bus 8: ucd90320@b */
+    /* Bus 8: ucd90320@c */
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4c);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp423", 0x4d);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 9), 0x50, 128 * KiB);
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 10), "tmp423", 0x4c);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 10), "tmp423", 0x4d);
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 10), 0x50, 128 * KiB);
+
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), TYPE_TMP105,
+                     0x48);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), TYPE_TMP105,
+                     0x49);
+    i2c_mux = i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11),
+                                      "pca9546", 0x70);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 0), 0x50, 64 * KiB);
+    aspeed_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x51, 64 * KiB);
+
+
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 13), 0x50, 64 * KiB);
+
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 14), 0x50, 64 * KiB);
+
+    aspeed_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 15), 0x50, 64 * KiB);
+}
+
+static void get_pca9548_channels(I2CBus *bus, uint8_t mux_addr,
+                                 I2CBus **channels)
+{
+    I2CSlave *mux = i2c_slave_create_simple(bus, "pca9548", mux_addr);
+    for (int i = 0; i < 8; i++) {
+        channels[i] = pca954x_i2c_get_bus(mux, i);
+    }
+}
+
+#define TYPE_LM75 TYPE_TMP105
+#define TYPE_TMP75 TYPE_TMP105
+#define TYPE_TMP422 "tmp422"
+
+static void fuji_bmc_i2c_init(AspeedMachineState *bmc)
+{
+    AspeedSoCState *soc = &bmc->soc;
+    I2CBus *i2c[144] = {};
+
+    for (int i = 0; i < 16; i++) {
+        i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+    }
+    I2CBus *i2c180 = i2c[2];
+    I2CBus *i2c480 = i2c[8];
+    I2CBus *i2c600 = i2c[11];
+
+    get_pca9548_channels(i2c180, 0x70, &i2c[16]);
+    get_pca9548_channels(i2c480, 0x70, &i2c[24]);
+    /* NOTE: The device tree skips [32, 40) in the alias numbering */
+    get_pca9548_channels(i2c600, 0x77, &i2c[40]);
+    get_pca9548_channels(i2c[24], 0x71, &i2c[48]);
+    get_pca9548_channels(i2c[25], 0x72, &i2c[56]);
+    get_pca9548_channels(i2c[26], 0x76, &i2c[64]);
+    get_pca9548_channels(i2c[27], 0x76, &i2c[72]);
+    for (int i = 0; i < 8; i++) {
+        get_pca9548_channels(i2c[40 + i], 0x76, &i2c[80 + i * 8]);
+    }
+
+    i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4c);
+    i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4d);
+
+    aspeed_eeprom_init(i2c[19], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[20], 0x50, 2 * KiB);
+    aspeed_eeprom_init(i2c[22], 0x52, 2 * KiB);
+
+    i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x48);
+    i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x49);
+    i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x4a);
+    i2c_slave_create_simple(i2c[3], TYPE_TMP422, 0x4c);
+
+    aspeed_eeprom_init(i2c[8], 0x51, 64 * KiB);
+    i2c_slave_create_simple(i2c[8], TYPE_LM75, 0x4a);
+
+    i2c_slave_create_simple(i2c[50], TYPE_LM75, 0x4c);
+    aspeed_eeprom_init(i2c[50], 0x52, 64 * KiB);
+    i2c_slave_create_simple(i2c[51], TYPE_TMP75, 0x48);
+    i2c_slave_create_simple(i2c[52], TYPE_TMP75, 0x49);
+
+    i2c_slave_create_simple(i2c[59], TYPE_TMP75, 0x48);
+    i2c_slave_create_simple(i2c[60], TYPE_TMP75, 0x49);
+
+    aspeed_eeprom_init(i2c[65], 0x53, 64 * KiB);
+    i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x49);
+    i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x48);
+    aspeed_eeprom_init(i2c[68], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[69], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[70], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[71], 0x52, 64 * KiB);
+
+    aspeed_eeprom_init(i2c[73], 0x53, 64 * KiB);
+    i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x49);
+    i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x48);
+    aspeed_eeprom_init(i2c[76], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[77], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[78], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[79], 0x52, 64 * KiB);
+    aspeed_eeprom_init(i2c[28], 0x50, 2 * KiB);
+
+    for (int i = 0; i < 8; i++) {
+        aspeed_eeprom_init(i2c[81 + i * 8], 0x56, 64 * KiB);
+        i2c_slave_create_simple(i2c[82 + i * 8], TYPE_TMP75, 0x48);
+        i2c_slave_create_simple(i2c[83 + i * 8], TYPE_TMP75, 0x4b);
+        i2c_slave_create_simple(i2c[84 + i * 8], TYPE_TMP75, 0x4a);
+    }
+}
+
 static bool aspeed_get_mmio_exec(Object *obj, Error **errp)
 {
     return ASPEED_MACHINE(obj)->mmio_exec;
@@ -707,6 +991,7 @@ static void aspeed_machine_class_init(ObjectClass *oc, void *data)
     mc->no_parallel = 1;
     mc->default_ram_id = "ram";
     amc->macs_mask = ASPEED_MAC0_ON;
+    amc->uart_default = ASPEED_DEV_UART5;
 
     aspeed_machine_class_props_init(oc);
 }
@@ -728,6 +1013,23 @@ static void aspeed_machine_palmetto_class_init(ObjectClass *oc, void *data)
         aspeed_soc_num_cpus(amc->soc_name);
 };
 
+static void aspeed_machine_quanta_q71l_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc       = "Quanta-Q71l BMC (ARM926EJ-S)";
+    amc->soc_name  = "ast2400-a1";
+    amc->hw_strap1 = QUANTA_Q71L_BMC_HW_STRAP1;
+    amc->fmc_model = "n25q256a";
+    amc->spi_model = "mx25l25635e";
+    amc->num_cs    = 1;
+    amc->i2c_init  = quanta_q71l_bmc_i2c_init;
+    mc->default_ram_size       = 128 * MiB;
+    mc->default_cpus = mc->min_cpus = mc->max_cpus =
+        aspeed_soc_num_cpus(amc->soc_name);
+}
+
 static void aspeed_machine_supermicrox11_bmc_class_init(ObjectClass *oc,
                                                         void *data)
 {
@@ -811,6 +1113,9 @@ static void aspeed_machine_swift_class_init(ObjectClass *oc, void *data)
     mc->default_ram_size       = 512 * MiB;
     mc->default_cpus = mc->min_cpus = mc->max_cpus =
         aspeed_soc_num_cpus(amc->soc_name);
+
+    mc->deprecation_reason = "redundant system. Please use a similar "
+        "OpenPOWER BMC, Witherspoon or Romulus.";
 };
 
 static void aspeed_machine_witherspoon_class_init(ObjectClass *oc, void *data)
@@ -835,14 +1140,15 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
 
-    mc->desc       = "Aspeed AST2600 EVB (Cortex A7)";
-    amc->soc_name  = "ast2600-a1";
+    mc->desc       = "Aspeed AST2600 EVB (Cortex-A7)";
+    amc->soc_name  = "ast2600-a3";
     amc->hw_strap1 = AST2600_EVB_HW_STRAP1;
     amc->hw_strap2 = AST2600_EVB_HW_STRAP2;
     amc->fmc_model = "w25q512jv";
     amc->spi_model = "mx66u51235f";
     amc->num_cs    = 1;
-    amc->macs_mask  = ASPEED_MAC1_ON | ASPEED_MAC2_ON | ASPEED_MAC3_ON;
+    amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON |
+                     ASPEED_MAC3_ON;
     amc->i2c_init  = ast2600_evb_i2c_init;
     mc->default_ram_size = 1 * GiB;
     mc->default_cpus = mc->min_cpus = mc->max_cpus =
@@ -854,8 +1160,8 @@ static void aspeed_machine_tacoma_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
 
-    mc->desc       = "OpenPOWER Tacoma BMC (Cortex A7)";
-    amc->soc_name  = "ast2600-a1";
+    mc->desc       = "OpenPOWER Tacoma BMC (Cortex-A7)";
+    amc->soc_name  = "ast2600-a3";
     amc->hw_strap1 = TACOMA_BMC_HW_STRAP1;
     amc->hw_strap2 = TACOMA_BMC_HW_STRAP2;
     amc->fmc_model = "mx66l1g45g";
@@ -879,13 +1185,77 @@ static void aspeed_machine_g220a_class_init(ObjectClass *oc, void *data)
     amc->fmc_model = "n25q512a";
     amc->spi_model = "mx25l25635e";
     amc->num_cs    = 2;
-    amc->macs_mask  = ASPEED_MAC1_ON | ASPEED_MAC2_ON;
+    amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
     amc->i2c_init  = g220a_bmc_i2c_init;
     mc->default_ram_size = 1024 * MiB;
     mc->default_cpus = mc->min_cpus = mc->max_cpus =
         aspeed_soc_num_cpus(amc->soc_name);
 };
 
+static void aspeed_machine_fp5280g2_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc       = "Inspur FP5280G2 BMC (ARM1176)";
+    amc->soc_name  = "ast2500-a1";
+    amc->hw_strap1 = FP5280G2_BMC_HW_STRAP1;
+    amc->fmc_model = "n25q512a";
+    amc->spi_model = "mx25l25635e";
+    amc->num_cs    = 2;
+    amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
+    amc->i2c_init  = fp5280g2_bmc_i2c_init;
+    mc->default_ram_size = 512 * MiB;
+    mc->default_cpus = mc->min_cpus = mc->max_cpus =
+        aspeed_soc_num_cpus(amc->soc_name);
+};
+
+static void aspeed_machine_rainier_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc       = "IBM Rainier BMC (Cortex-A7)";
+    amc->soc_name  = "ast2600-a3";
+    amc->hw_strap1 = RAINIER_BMC_HW_STRAP1;
+    amc->hw_strap2 = RAINIER_BMC_HW_STRAP2;
+    amc->fmc_model = "mx66l1g45g";
+    amc->spi_model = "mx66l1g45g";
+    amc->num_cs    = 2;
+    amc->macs_mask  = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
+    amc->i2c_init  = rainier_bmc_i2c_init;
+    mc->default_ram_size = 1 * GiB;
+    mc->default_cpus = mc->min_cpus = mc->max_cpus =
+        aspeed_soc_num_cpus(amc->soc_name);
+};
+
+/* On 32-bit hosts, lower RAM to 1G because of the 2047 MB limit */
+#if HOST_LONG_BITS == 32
+#define FUJI_BMC_RAM_SIZE (1 * GiB)
+#else
+#define FUJI_BMC_RAM_SIZE (2 * GiB)
+#endif
+
+static void aspeed_machine_fuji_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc = "Facebook Fuji BMC (Cortex-A7)";
+    amc->soc_name = "ast2600-a3";
+    amc->hw_strap1 = FUJI_BMC_HW_STRAP1;
+    amc->hw_strap2 = FUJI_BMC_HW_STRAP2;
+    amc->fmc_model = "mx66l1g45g";
+    amc->spi_model = "mx66l1g45g";
+    amc->num_cs = 2;
+    amc->macs_mask = ASPEED_MAC3_ON;
+    amc->i2c_init = fuji_bmc_i2c_init;
+    amc->uart_default = ASPEED_DEV_UART1;
+    mc->default_ram_size = FUJI_BMC_RAM_SIZE;
+    mc->default_cpus = mc->min_cpus = mc->max_cpus =
+        aspeed_soc_num_cpus(amc->soc_name);
+};
+
 static const TypeInfo aspeed_machine_types[] = {
     {
         .name          = MACHINE_TYPE_NAME("palmetto-bmc"),
@@ -927,6 +1297,22 @@ static const TypeInfo aspeed_machine_types[] = {
         .name          = MACHINE_TYPE_NAME("g220a-bmc"),
         .parent        = TYPE_ASPEED_MACHINE,
         .class_init    = aspeed_machine_g220a_class_init,
+    }, {
+        .name          = MACHINE_TYPE_NAME("fp5280g2-bmc"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_fp5280g2_class_init,
+    }, {
+        .name          = MACHINE_TYPE_NAME("quanta-q71l-bmc"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_quanta_q71l_class_init,
+    }, {
+        .name          = MACHINE_TYPE_NAME("rainier-bmc"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_rainier_class_init,
+    }, {
+        .name          = MACHINE_TYPE_NAME("fuji-bmc"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_fuji_class_init,
     }, {
         .name          = TYPE_ASPEED_MACHINE,
         .parent        = TYPE_MACHINE,
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index bc87e754a3c..0384357a951 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -9,12 +9,9 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
 #include "hw/misc/unimp.h"
 #include "hw/arm/aspeed_soc.h"
 #include "hw/char/serial.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "hw/i2c/aspeed_i2c.h"
@@ -42,6 +39,7 @@ static const hwaddr aspeed_soc_ast2600_memmap[] = {
     [ASPEED_DEV_ETH2]      = 0x1E680000,
     [ASPEED_DEV_ETH4]      = 0x1E690000,
     [ASPEED_DEV_VIC]       = 0x1E6C0000,
+    [ASPEED_DEV_HACE]      = 0x1E6D0000,
     [ASPEED_DEV_SDMC]      = 0x1E6E0000,
     [ASPEED_DEV_SCU]       = 0x1E6E2000,
     [ASPEED_DEV_XDMA]      = 0x1E6E7000,
@@ -102,6 +100,7 @@ static const int aspeed_soc_ast2600_irqmap[] = {
     [ASPEED_DEV_I2C]       = 110,   /* 110 -> 125 */
     [ASPEED_DEV_ETH1]      = 2,
     [ASPEED_DEV_ETH2]      = 3,
+    [ASPEED_DEV_HACE]      = 4,
     [ASPEED_DEV_ETH3]      = 32,
     [ASPEED_DEV_ETH4]      = 33,
     [ASPEED_DEV_KCS]       = 138,   /* 138 -> 142 */
@@ -149,6 +148,9 @@ static void aspeed_soc_ast2600_init(Object *obj)
     snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname);
     object_initialize_child(obj, "timerctrl", &s->timerctrl, typename);
 
+    snprintf(typename, sizeof(typename), "aspeed.adc-%s", socname);
+    object_initialize_child(obj, "adc", &s->adc, typename);
+
     snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname);
     object_initialize_child(obj, "i2c", &s->i2c, typename);
 
@@ -185,7 +187,8 @@ static void aspeed_soc_ast2600_init(Object *obj)
         object_initialize_child(obj, "mii[*]", &s->mii[i], TYPE_ASPEED_MII);
     }
 
-    object_initialize_child(obj, "xdma", &s->xdma, TYPE_ASPEED_XDMA);
+    snprintf(typename, sizeof(typename), TYPE_ASPEED_XDMA "-%s", socname);
+    object_initialize_child(obj, "xdma", &s->xdma, typename);
 
     snprintf(typename, sizeof(typename), "aspeed.gpio-%s", socname);
     object_initialize_child(obj, "gpio", &s->gpio, typename);
@@ -213,6 +216,9 @@ static void aspeed_soc_ast2600_init(Object *obj)
                             TYPE_SYSBUS_SDHCI);
 
     object_initialize_child(obj, "lpc", &s->lpc, TYPE_ASPEED_LPC);
+
+    snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname);
+    object_initialize_child(obj, "hace", &s->hace, typename);
 }
 
 /*
@@ -319,10 +325,18 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
         sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
     }
 
-    /* UART - attach an 8250 to the IO space as our UART5 */
-    serial_mm_init(get_system_memory(), sc->memmap[ASPEED_DEV_UART5], 2,
-                   aspeed_soc_get_irq(s, ASPEED_DEV_UART5),
-                   38400, serial_hd(0), DEVICE_LITTLE_ENDIAN);
+    /* ADC */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+
+    /* UART - attach an 8250 to the IO space as our UART */
+    serial_mm_init(get_system_memory(), sc->memmap[s->uart_default], 2,
+                   aspeed_soc_get_irq(s, s->uart_default), 38400,
+                   serial_hd(0), DEVICE_LITTLE_ENDIAN);
 
     /* I2C */
     object_property_set_link(OBJECT(&s->i2c), "dram", OBJECT(s->dram_mr),
@@ -334,26 +348,19 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) {
         qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->a7mpcore),
                                         sc->irqmap[ASPEED_DEV_I2C] + i);
-        /*
-         * The AST2600 SoC has one IRQ per I2C bus. Skip the common
-         * IRQ (AST2400 and AST2500) and connect all bussses.
-         */
-        sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c), i + 1, irq);
+        /* The AST2600 I2C controller has one IRQ per bus. */
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c.busses[i]), 0, irq);
     }
 
     /* FMC, The number of CS is set at the board level */
     object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr),
                              &error_abort);
-    if (!object_property_set_int(OBJECT(&s->fmc), "sdram-base",
-                                 sc->memmap[ASPEED_DEV_SDRAM], errp)) {
-        return;
-    }
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
         return;
     }
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 1,
-                    s->fmc.ctrl->flash_window_base);
+                    ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
                        aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
 
@@ -368,7 +375,7 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0,
                         sc->memmap[ASPEED_DEV_SPI1 + i]);
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 1,
-                        s->spi[i].ctrl->flash_window_base);
+                        ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
     }
 
     /* EHCI */
@@ -498,6 +505,16 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 1 + aspeed_lpc_kcs_4,
                        qdev_get_gpio_in(DEVICE(&s->a7mpcore),
                                 sc->irqmap[ASPEED_DEV_KCS] + aspeed_lpc_kcs_4));
+
+    /* HACE */
+    object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(s->dram_mr),
+                             &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->hace), 0, sc->memmap[ASPEED_DEV_HACE]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
 }
 
 static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data)
@@ -507,9 +524,9 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data)
 
     dc->realize      = aspeed_soc_ast2600_realize;
 
-    sc->name         = "ast2600-a1";
+    sc->name         = "ast2600-a3";
     sc->cpu_type     = ARM_CPU_TYPE_NAME("cortex-a7");
-    sc->silicon_rev  = AST2600_A1_SILICON_REV;
+    sc->silicon_rev  = AST2600_A3_SILICON_REV;
     sc->sram_size    = 0x16400;
     sc->spis_num     = 2;
     sc->ehcis_num    = 2;
@@ -521,7 +538,7 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, void *data)
 }
 
 static const TypeInfo aspeed_soc_ast2600_type_info = {
-    .name           = "ast2600-a1",
+    .name           = "ast2600-a3",
     .parent         = TYPE_ASPEED_SOC,
     .instance_size  = sizeof(AspeedSoCState),
     .instance_init  = aspeed_soc_ast2600_init,
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index 057d053c847..7d53cf2f513 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -12,12 +12,9 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
 #include "hw/misc/unimp.h"
 #include "hw/arm/aspeed_soc.h"
 #include "hw/char/serial.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "hw/i2c/aspeed_i2c.h"
@@ -34,6 +31,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = {
     [ASPEED_DEV_VIC]    = 0x1E6C0000,
     [ASPEED_DEV_SDMC]   = 0x1E6E0000,
     [ASPEED_DEV_SCU]    = 0x1E6E2000,
+    [ASPEED_DEV_HACE]   = 0x1E6E3000,
     [ASPEED_DEV_XDMA]   = 0x1E6E7000,
     [ASPEED_DEV_VIDEO]  = 0x1E700000,
     [ASPEED_DEV_ADC]    = 0x1E6E9000,
@@ -65,6 +63,7 @@ static const hwaddr aspeed_soc_ast2500_memmap[] = {
     [ASPEED_DEV_VIC]    = 0x1E6C0000,
     [ASPEED_DEV_SDMC]   = 0x1E6E0000,
     [ASPEED_DEV_SCU]    = 0x1E6E2000,
+    [ASPEED_DEV_HACE]   = 0x1E6E3000,
     [ASPEED_DEV_XDMA]   = 0x1E6E7000,
     [ASPEED_DEV_ADC]    = 0x1E6E9000,
     [ASPEED_DEV_VIDEO]  = 0x1E700000,
@@ -117,6 +116,7 @@ static const int aspeed_soc_ast2400_irqmap[] = {
     [ASPEED_DEV_ETH2]   = 3,
     [ASPEED_DEV_XDMA]   = 6,
     [ASPEED_DEV_SDHCI]  = 26,
+    [ASPEED_DEV_HACE]   = 4,
 };
 
 #define aspeed_soc_ast2500_irqmap aspeed_soc_ast2400_irqmap
@@ -162,6 +162,9 @@ static void aspeed_soc_init(Object *obj)
     snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname);
     object_initialize_child(obj, "timerctrl", &s->timerctrl, typename);
 
+    snprintf(typename, sizeof(typename), "aspeed.adc-%s", socname);
+    object_initialize_child(obj, "adc", &s->adc, typename);
+
     snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname);
     object_initialize_child(obj, "i2c", &s->i2c, typename);
 
@@ -196,7 +199,8 @@ static void aspeed_soc_init(Object *obj)
                                 TYPE_FTGMAC100);
     }
 
-    object_initialize_child(obj, "xdma", &s->xdma, TYPE_ASPEED_XDMA);
+    snprintf(typename, sizeof(typename), TYPE_ASPEED_XDMA "-%s", socname);
+    object_initialize_child(obj, "xdma", &s->xdma, typename);
 
     snprintf(typename, sizeof(typename), "aspeed.gpio-%s", socname);
     object_initialize_child(obj, "gpio", &s->gpio, typename);
@@ -212,6 +216,9 @@ static void aspeed_soc_init(Object *obj)
     }
 
     object_initialize_child(obj, "lpc", &s->lpc, TYPE_ASPEED_LPC);
+
+    snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname);
+    object_initialize_child(obj, "hace", &s->hace, typename);
 }
 
 static void aspeed_soc_realize(DeviceState *dev, Error **errp)
@@ -283,9 +290,17 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
         sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
     }
 
-    /* UART - attach an 8250 to the IO space as our UART5 */
-    serial_mm_init(get_system_memory(), sc->memmap[ASPEED_DEV_UART5], 2,
-                   aspeed_soc_get_irq(s, ASPEED_DEV_UART5), 38400,
+    /* ADC */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+
+    /* UART - attach an 8250 to the IO space as our UART */
+    serial_mm_init(get_system_memory(), sc->memmap[s->uart_default], 2,
+                   aspeed_soc_get_irq(s, s->uart_default), 38400,
                    serial_hd(0), DEVICE_LITTLE_ENDIAN);
 
     /* I2C */
@@ -301,16 +316,12 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
     /* FMC, The number of CS is set at the board level */
     object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr),
                              &error_abort);
-    if (!object_property_set_int(OBJECT(&s->fmc), "sdram-base",
-                                 sc->memmap[ASPEED_DEV_SDRAM], errp)) {
-        return;
-    }
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
         return;
     }
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 1,
-                    s->fmc.ctrl->flash_window_base);
+                    ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
                        aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
 
@@ -323,7 +334,7 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0,
                         sc->memmap[ASPEED_DEV_SPI1 + i]);
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 1,
-                        s->spi[i].ctrl->flash_window_base);
+                        ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
     }
 
     /* EHCI */
@@ -425,10 +436,22 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
 
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 1 + aspeed_lpc_kcs_4,
                        qdev_get_gpio_in(DEVICE(&s->lpc), aspeed_lpc_kcs_4));
+
+    /* HACE */
+    object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(s->dram_mr),
+                             &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->hace), 0, sc->memmap[ASPEED_DEV_HACE]);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
 }
 static Property aspeed_soc_properties[] = {
     DEFINE_PROP_LINK("dram", AspeedSoCState, dram_mr, TYPE_MEMORY_REGION,
                      MemoryRegion *),
+    DEFINE_PROP_UINT32("uart-default", AspeedSoCState, uart_default,
+                       ASPEED_DEV_UART5),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index dcff13433e5..48538c9360c 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -126,6 +126,10 @@ static void bcm2835_peripherals_init(Object *obj)
 
     object_property_add_const_link(OBJECT(&s->dwc2), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr));
+
+    /* Power Management */
+    object_initialize_child(obj, "powermgt", &s->powermgt,
+                            TYPE_BCM2835_POWERMGT);
 }
 
 static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -364,9 +368,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
         qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
                                INTERRUPT_USB));
 
+    /* Power Management */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->powermgt), errp)) {
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, PM_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->powermgt), 0));
+
     create_unimp(s, &s->txp, "bcm2835-txp", TXP_OFFSET, 0x1000);
     create_unimp(s, &s->armtmr, "bcm2835-sp804", ARMCTRL_TIMER0_1_OFFSET, 0x40);
-    create_unimp(s, &s->powermgt, "bcm2835-powermgt", PM_OFFSET, 0x114);
     create_unimp(s, &s->i2s, "bcm2835-i2s", I2S_OFFSET, 0x100);
     create_unimp(s, &s->smi, "bcm2835-smi", SMI_OFFSET, 0x100);
     create_unimp(s, &s->spi[0], "bcm2835-spi0", SPI0_OFFSET, 0x20);
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
index de7ade2878e..24354338cad 100644
--- a/hw/arm/bcm2836.c
+++ b/hw/arm/bcm2836.c
@@ -12,7 +12,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/arm/bcm2836.h"
 #include "hw/arm/raspi_platform.h"
 #include "hw/sysbus.h"
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 42e73248c0f..61ae93fa013 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -25,7 +25,6 @@
 #include "sysemu/device_tree.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
-#include "exec/address-spaces.h"
 #include "qemu/units.h"
 
 /* Kernel boot protocol is specified in the kernel docs
@@ -600,10 +599,23 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
     }
     g_strfreev(node_path);
 
+    /*
+     * We drop all the memory nodes which correspond to empty NUMA nodes
+     * from the device tree, because the Linux NUMA binding document
+     * states they should not be generated. Linux will get the NUMA node
+     * IDs of the empty NUMA nodes from the distance map if they are needed.
+     * This means QEMU users may be obliged to provide command lines which
+     * configure distance maps when the empty NUMA node IDs are needed and
+     * Linux's default distance map isn't sufficient.
+     */
     if (ms->numa_state != NULL && ms->numa_state->num_nodes > 0) {
         mem_base = binfo->loader_start;
         for (i = 0; i < ms->numa_state->num_nodes; i++) {
             mem_len = ms->numa_state->nodes[i].node_mem;
+            if (!mem_len) {
+                continue;
+            }
+
             rc = fdt_add_memory_node(fdt, acells, mem_base,
                                      scells, mem_len, i);
             if (rc < 0) {
@@ -1248,6 +1260,15 @@ static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info)
         bool try_decompressing_kernel;
 
         fw_cfg = fw_cfg_find();
+
+        if (!fw_cfg) {
+            error_report("This machine type does not support loading both "
+                         "a guest firmware/BIOS image and a guest kernel at "
+                         "the same time. You should change your QEMU command "
+                         "line to specify one or the other, but not both.");
+            exit(1);
+        }
+
         try_decompressing_kernel = arm_feature(&cpu->env,
                                                ARM_FEATURE_AARCH64);
 
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index 9d0d728180b..294ba5de6ec 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -16,11 +16,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "sysemu/sysemu.h"
-#include "hw/sysbus.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/allwinner-a10.h"
diff --git a/hw/arm/digic_boards.c b/hw/arm/digic_boards.c
index 6cdc1d83fca..b771a3d8b74 100644
--- a/hw/arm/digic_boards.c
+++ b/hw/arm/digic_boards.c
@@ -27,14 +27,11 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/datadir.h"
-#include "cpu.h"
 #include "hw/boards.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "hw/arm/digic.h"
 #include "hw/block/flash.h"
 #include "hw/loader.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "qemu/units.h"
 #include "qemu/cutils.h"
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index ced2769b102..0299e81f853 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -23,7 +23,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/cpu/a9mpcore.h"
 #include "hw/irq.h"
@@ -174,6 +173,9 @@ static DeviceState *pl330_create(uint32_t base, qemu_or_irq *orgate,
     int i;
 
     dev = qdev_new("pl330");
+    object_property_set_link(OBJECT(dev), "memory",
+                             OBJECT(get_system_memory()),
+                             &error_fatal);
     qdev_prop_set_uint8(dev, "num_events", nevents);
     qdev_prop_set_uint8(dev, "num_chnls",  8);
     qdev_prop_set_uint8(dev, "num_periph_req",  nreq);
diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index 56b729141b5..35dd9875da1 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -25,8 +25,6 @@
 #include "qemu/units.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
-#include "cpu.h"
-#include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
 #include "net/net.h"
 #include "hw/arm/boot.h"
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 08a98f828fc..24c43745903 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -24,10 +24,8 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/arm/fsl-imx25.h"
 #include "sysemu/sysemu.h"
-#include "exec/address-spaces.h"
 #include "hw/qdev-properties.h"
 #include "chardev/char.h"
 
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index 0983998bb4b..def27bb9136 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -21,7 +21,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/arm/fsl-imx31.h"
 #include "sysemu/sysemu.h"
 #include "exec/address-spaces.h"
diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c
index e0128d73161..1d1a708dd97 100644
--- a/hw/arm/fsl-imx6ul.c
+++ b/hw/arm/fsl-imx6ul.c
@@ -534,6 +534,13 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
      */
     create_unimplemented_device("sdma", FSL_IMX6UL_SDMA_ADDR, 0x4000);
 
+    /*
+     * SAI (Audio SSI (Synchronous Serial Interface))
+     */
+    create_unimplemented_device("sai1", FSL_IMX6UL_SAI1_ADDR, 0x4000);
+    create_unimplemented_device("sai2", FSL_IMX6UL_SAI2_ADDR, 0x4000);
+    create_unimplemented_device("sai3", FSL_IMX6UL_SAI3_ADDR, 0x4000);
+
     /*
      * PWM
      */
@@ -542,6 +549,11 @@ static void fsl_imx6ul_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("pwm3", FSL_IMX6UL_PWM3_ADDR, 0x4000);
     create_unimplemented_device("pwm4", FSL_IMX6UL_PWM4_ADDR, 0x4000);
 
+    /*
+     * Audio ASRC (asynchronous sample rate converter)
+     */
+    create_unimplemented_device("asrc", FSL_IMX6UL_ASRC_ADDR, 0x4000);
+
     /*
      * CAN
      */
diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c
index 2ff2cab9246..149885f2b80 100644
--- a/hw/arm/fsl-imx7.c
+++ b/hw/arm/fsl-imx7.c
@@ -467,6 +467,13 @@ static void fsl_imx7_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("can1", FSL_IMX7_CAN1_ADDR, FSL_IMX7_CANn_SIZE);
     create_unimplemented_device("can2", FSL_IMX7_CAN2_ADDR, FSL_IMX7_CANn_SIZE);
 
+    /*
+     * SAI (Audio SSI (Synchronous Serial Interface))
+     */
+    create_unimplemented_device("sai1", FSL_IMX7_SAI1_ADDR, FSL_IMX7_SAIn_SIZE);
+    create_unimplemented_device("sai2", FSL_IMX7_SAI2_ADDR, FSL_IMX7_SAIn_SIZE);
+    create_unimplemented_device("sai2", FSL_IMX7_SAI3_ADDR, FSL_IMX7_SAIn_SIZE);
+
     /*
      * OCOTP
      */
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index bf886268c57..c3cb315dbc6 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -29,7 +29,6 @@
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "hw/char/pl011.h"
 #include "hw/ide/ahci.h"
@@ -170,7 +169,7 @@ struct HighbankRegsState {
     uint32_t regs[NUM_REGS];
 };
 
-static VMStateDescription vmstate_highbank_regs = {
+static const VMStateDescription vmstate_highbank_regs = {
     .name = "highbank-regs",
     .version_id = 0,
     .minimum_version_id = 0,
diff --git a/hw/arm/imx25_pdk.c b/hw/arm/imx25_pdk.c
index 1c201d0d8ed..bd16acd4d9f 100644
--- a/hw/arm/imx25_pdk.c
+++ b/hw/arm/imx25_pdk.c
@@ -25,12 +25,10 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/fsl-imx25.h"
 #include "hw/boards.h"
 #include "qemu/error-report.h"
-#include "exec/address-spaces.h"
 #include "sysemu/qtest.h"
 #include "hw/i2c/i2c.h"
 #include "qemu/cutils.h"
@@ -67,7 +65,6 @@ static struct arm_boot_info imx25_pdk_binfo;
 
 static void imx25_pdk_init(MachineState *machine)
 {
-    MachineClass *mc = MACHINE_GET_CLASS(machine);
     IMX25PDK *s = g_new0(IMX25PDK, 1);
     unsigned int ram_size;
     unsigned int alias_offset;
@@ -79,8 +76,8 @@ static void imx25_pdk_init(MachineState *machine)
 
     /* We need to initialize our memory */
     if (machine->ram_size > (FSL_IMX25_SDRAM0_SIZE + FSL_IMX25_SDRAM1_SIZE)) {
-        char *sz = size_to_str(mc->default_ram_size);
-        error_report("Invalid RAM size, should be %s", sz);
+        char *sz = size_to_str(FSL_IMX25_SDRAM0_SIZE + FSL_IMX25_SDRAM1_SIZE);
+        error_report("RAM size more than %s is not supported", sz);
         g_free(sz);
         exit(EXIT_FAILURE);
     }
diff --git a/hw/arm/kzm.c b/hw/arm/kzm.c
index e3f7d4ead23..39559c44c29 100644
--- a/hw/arm/kzm.c
+++ b/hw/arm/kzm.c
@@ -15,7 +15,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/arm/fsl-imx31.h"
 #include "hw/boards.h"
 #include "qemu/error-report.h"
diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c
index ed69a7b037a..77fae874b16 100644
--- a/hw/arm/mcimx6ul-evk.c
+++ b/hw/arm/mcimx6ul-evk.c
@@ -15,7 +15,6 @@
 #include "hw/arm/fsl-imx6ul.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
 
@@ -68,7 +67,7 @@ static void mcimx6ul_evk_init(MachineState *machine)
 
 static void mcimx6ul_evk_machine_init(MachineClass *mc)
 {
-    mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex A7)";
+    mc->desc = "Freescale i.MX6UL Evaluation Kit (Cortex-A7)";
     mc->init = mcimx6ul_evk_init;
     mc->max_cpus = FSL_IMX6UL_NUM_CPUS;
     mc->default_ram_id = "mcimx6ul-evk.ram";
diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c
index e57d52b3441..935d4b0f1cd 100644
--- a/hw/arm/mcimx7d-sabre.c
+++ b/hw/arm/mcimx7d-sabre.c
@@ -17,7 +17,6 @@
 #include "hw/arm/fsl-imx7.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
 
@@ -68,7 +67,7 @@ static void mcimx7d_sabre_init(MachineState *machine)
 
 static void mcimx7d_sabre_machine_init(MachineClass *mc)
 {
-    mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex A7)";
+    mc->desc = "Freescale i.MX7 DUAL SABRE (Cortex-A7)";
     mc->init = mcimx7d_sabre_init;
     mc->max_cpus = FSL_IMX7_NUM_CPUS;
     mc->default_ram_id = "mcimx7d-sabre.ram";
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 79a9acd0be8..01ecc20324f 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -24,6 +24,7 @@ arm_ss.add(when: 'CONFIG_Z2', if_true: files('z2.c'))
 arm_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview.c'))
 arm_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa-ref.c'))
 arm_ss.add(when: 'CONFIG_STELLARIS', if_true: files('stellaris.c'))
+arm_ss.add(when: 'CONFIG_STM32VLDISCOVERY', if_true: files('stm32vldiscovery.c'))
 arm_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c'))
 arm_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
 arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
@@ -44,6 +45,7 @@ arm_ss.add(when: 'CONFIG_STRONGARM', if_true: files('strongarm.c'))
 arm_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c'))
 arm_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c'))
 arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c', 'bcm2836.c', 'raspi.c'))
+arm_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c'))
 arm_ss.add(when: 'CONFIG_STM32F205_SOC', if_true: files('stm32f205_soc.c'))
 arm_ss.add(when: 'CONFIG_STM32F405_SOC', if_true: files('stm32f405_soc.c'))
 arm_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp.c', 'xlnx-zcu102.c'))
diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c
index 25016e464d9..f40e854dec7 100644
--- a/hw/arm/mps2-tz.c
+++ b/hw/arm/mps2-tz.c
@@ -55,6 +55,7 @@
 #include "hw/boards.h"
 #include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
 #include "hw/misc/unimp.h"
 #include "hw/char/cmsdk-apb-uart.h"
 #include "hw/timer/cmsdk-apb-timer.h"
@@ -72,6 +73,7 @@
 #include "hw/core/split-irq.h"
 #include "hw/qdev-clock.h"
 #include "qom/object.h"
+#include "hw/irq.h"
 
 #define MPS2TZ_NUMIRQ_MAX 96
 #define MPS2TZ_RAM_MAX 5
@@ -121,8 +123,10 @@ struct MPS2TZMachineClass {
     int numirq; /* Number of external interrupts */
     int uart_overflow_irq; /* number of the combined UART overflow IRQ */
     uint32_t init_svtor; /* init-svtor setting for SSE */
+    uint32_t sram_addr_width; /* SRAM_ADDR_WIDTH setting for SSE */
     const RAMInfo *raminfo;
     const char *armsse_type;
+    uint32_t boot_ram_size; /* size of ram at address 0; 0 == find in raminfo */
 };
 
 struct MPS2TZMachineState {
@@ -153,6 +157,9 @@ struct MPS2TZMachineState {
     SplitIRQ cpu_irq_splitter[MPS2TZ_NUMIRQ_MAX];
     Clock *sysclk;
     Clock *s32kclk;
+
+    bool remap;
+    qemu_irq remap_irq;
 };
 
 #define TYPE_MPS2TZ_MACHINE "mps2tz"
@@ -228,25 +235,23 @@ static const RAMInfo an505_raminfo[] = { {
     },
 };
 
+/*
+ * Note that the addresses and MPC numbering here should match up
+ * with those used in remap_memory(), which can swap the BRAM and QSPI.
+ */
 static const RAMInfo an524_raminfo[] = { {
         .name = "bram",
         .base = 0x00000000,
         .size = 512 * KiB,
         .mpc = 0,
         .mrindex = 0,
-    }, {
-        .name = "sram",
-        .base = 0x20000000,
-        .size = 32 * 4 * KiB,
-        .mpc = -1,
-        .mrindex = 1,
     }, {
         /* We don't model QSPI flash yet; for now expose it as simple ROM */
         .name = "QSPI",
         .base = 0x28000000,
         .size = 8 * MiB,
         .mpc = 1,
-        .mrindex = 2,
+        .mrindex = 1,
         .flags = IS_ROM,
     }, {
         .name = "DDR",
@@ -260,23 +265,11 @@ static const RAMInfo an524_raminfo[] = { {
 };
 
 static const RAMInfo an547_raminfo[] = { {
-        .name = "itcm",
-        .base = 0x00000000,
-        .size = 512 * KiB,
-        .mpc = -1,
-        .mrindex = 0,
-    }, {
         .name = "sram",
         .base = 0x01000000,
         .size = 2 * MiB,
         .mpc = 0,
         .mrindex = 1,
-    }, {
-        .name = "dtcm",
-        .base = 0x20000000,
-        .size = 4 * 128 * KiB,
-        .mpc = -1,
-        .mrindex = 2,
     }, {
         .name = "sram 2",
         .base = 0x21000000,
@@ -380,6 +373,11 @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno)
     }
 }
 
+/* Union describing the device-specific extra data we pass to the devfn. */
+typedef union PPCExtraData {
+    bool i2c_internal;
+} PPCExtraData;
+
 /* Most of the devices in the AN505 FPGA image sit behind
  * Peripheral Protection Controllers. These data structures
  * define the layout of which devices sit behind which PPCs.
@@ -389,7 +387,8 @@ static qemu_irq get_sse_irq_in(MPS2TZMachineState *mms, int irqno)
  */
 typedef MemoryRegion *MakeDevFn(MPS2TZMachineState *mms, void *opaque,
                                 const char *name, hwaddr size,
-                                const int *irqs);
+                                const int *irqs,
+                                const PPCExtraData *extradata);
 
 typedef struct PPCPortInfo {
     const char *name;
@@ -398,6 +397,7 @@ typedef struct PPCPortInfo {
     hwaddr addr;
     hwaddr size;
     int irqs[3]; /* currently no device needs more IRQ lines than this */
+    PPCExtraData extradata; /* to pass device-specific info to the devfn */
 } PPCPortInfo;
 
 typedef struct PPCInfo {
@@ -408,7 +408,8 @@ typedef struct PPCInfo {
 static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms,
                                     void *opaque,
                                     const char *name, hwaddr size,
-                                    const int *irqs)
+                                    const int *irqs,
+                                    const PPCExtraData *extradata)
 {
     /* Initialize, configure and realize a TYPE_UNIMPLEMENTED_DEVICE,
      * and return a pointer to its MemoryRegion.
@@ -424,7 +425,7 @@ static MemoryRegion *make_unimp_dev(MPS2TZMachineState *mms,
 
 static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque,
                                const char *name, hwaddr size,
-                               const int *irqs)
+                               const int *irqs, const PPCExtraData *extradata)
 {
     /* The irq[] array is tx, rx, combined, in that order */
     MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
@@ -448,7 +449,7 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     MPS2SCC *scc = opaque;
     DeviceState *sccdev;
@@ -457,6 +458,7 @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
 
     object_initialize_child(OBJECT(mms), "scc", scc, TYPE_MPS2_SCC);
     sccdev = DEVICE(scc);
+    qdev_prop_set_uint32(sccdev, "scc-cfg0", mms->remap ? 1 : 0);
     qdev_prop_set_uint32(sccdev, "scc-cfg4", 0x2);
     qdev_prop_set_uint32(sccdev, "scc-aid", 0x00200008);
     qdev_prop_set_uint32(sccdev, "scc-id", mmc->scc_id);
@@ -471,7 +473,7 @@ static MemoryRegion *make_scc(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
                                  const char *name, hwaddr size,
-                                 const int *irqs)
+                                 const int *irqs, const PPCExtraData *extradata)
 {
     MPS2FPGAIO *fpgaio = opaque;
     MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
@@ -486,7 +488,8 @@ static MemoryRegion *make_fpgaio(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
                                   const char *name, hwaddr size,
-                                  const int *irqs)
+                                  const int *irqs,
+                                  const PPCExtraData *extradata)
 {
     SysBusDevice *s;
     NICInfo *nd = &nd_table[0];
@@ -506,7 +509,8 @@ static MemoryRegion *make_eth_dev(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque,
                                   const char *name, hwaddr size,
-                                  const int *irqs)
+                                  const int *irqs,
+                                  const PPCExtraData *extradata)
 {
     /*
      * The AN524 makes the ethernet and USB share a PPC port.
@@ -549,7 +553,7 @@ static MemoryRegion *make_eth_usb(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     TZMPC *mpc = opaque;
     int i = mpc - &mms->mpc[0];
@@ -573,9 +577,55 @@ static MemoryRegion *make_mpc(MPS2TZMachineState *mms, void *opaque,
     return sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 0);
 }
 
+static hwaddr boot_mem_base(MPS2TZMachineState *mms)
+{
+    /*
+     * Return the canonical address of the block which will be mapped
+     * at address 0x0 (i.e. where the vector table is).
+     * This is usually 0, but if the AN524 alternate memory map is
+     * enabled it will be the base address of the QSPI block.
+     */
+    return mms->remap ? 0x28000000 : 0;
+}
+
+static void remap_memory(MPS2TZMachineState *mms, int map)
+{
+    /*
+     * Remap the memory for the AN524. 'map' is the value of
+     * SCC CFG_REG0 bit 0, i.e. 0 for the default map and 1
+     * for the "option 1" mapping where QSPI is at address 0.
+     *
+     * Effectively we need to swap around the "upstream" ends of
+     * MPC 0 and MPC 1.
+     */
+    MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
+    int i;
+
+    if (mmc->fpga_type != FPGA_AN524) {
+        return;
+    }
+
+    memory_region_transaction_begin();
+    for (i = 0; i < 2; i++) {
+        TZMPC *mpc = &mms->mpc[i];
+        MemoryRegion *upstream = sysbus_mmio_get_region(SYS_BUS_DEVICE(mpc), 1);
+        hwaddr addr = (i ^ map) ? 0x28000000 : 0;
+
+        memory_region_set_address(upstream, addr);
+    }
+    memory_region_transaction_commit();
+}
+
+static void remap_irq_fn(void *opaque, int n, int level)
+{
+    MPS2TZMachineState *mms = opaque;
+
+    remap_memory(mms, level);
+}
+
 static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     /* The irq[] array is DMACINTR, DMACINTERR, DMACINTTC, in that order */
     PL080State *dma = opaque;
@@ -632,7 +682,7 @@ static MemoryRegion *make_dma(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     /*
      * The AN505 has five PL022 SPI controllers.
@@ -654,7 +704,7 @@ static MemoryRegion *make_spi(MPS2TZMachineState *mms, void *opaque,
 
 static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     ArmSbconI2CState *i2c = opaque;
     SysBusDevice *s;
@@ -662,12 +712,26 @@ static MemoryRegion *make_i2c(MPS2TZMachineState *mms, void *opaque,
     object_initialize_child(OBJECT(mms), name, i2c, TYPE_ARM_SBCON_I2C);
     s = SYS_BUS_DEVICE(i2c);
     sysbus_realize(s, &error_fatal);
+
+    /*
+     * If this is an internal-use-only i2c bus, mark it full
+     * so that user-created i2c devices are not plugged into it.
+     * If we implement models of any on-board i2c devices that
+     * plug in to one of the internal-use-only buses, then we will
+     * need to create and plugging those in here before we mark the
+     * bus as full.
+     */
+    if (extradata->i2c_internal) {
+        BusState *qbus = qdev_get_child_bus(DEVICE(i2c), "i2c");
+        qbus_mark_full(qbus);
+    }
+
     return sysbus_mmio_get_region(s, 0);
 }
 
 static MemoryRegion *make_rtc(MPS2TZMachineState *mms, void *opaque,
                               const char *name, hwaddr size,
-                              const int *irqs)
+                              const int *irqs, const PPCExtraData *extradata)
 {
     PL031State *pl031 = opaque;
     SysBusDevice *s;
@@ -710,8 +774,16 @@ static uint32_t boot_ram_size(MPS2TZMachineState *mms)
     const RAMInfo *p;
     MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms);
 
+    /*
+     * Use a per-board specification (for when the boot RAM is in
+     * the SSE and so doesn't have a RAMInfo list entry)
+     */
+    if (mmc->boot_ram_size) {
+        return mmc->boot_ram_size;
+    }
+
     for (p = mmc->raminfo; p->name; p++) {
-        if (p->base == 0) {
+        if (p->base == boot_mem_base(mms)) {
             return p->size;
         }
     }
@@ -756,6 +828,7 @@ static void mps2tz_common_init(MachineState *machine)
                              OBJECT(system_memory), &error_abort);
     qdev_prop_set_uint32(iotkitdev, "EXP_NUMIRQ", mmc->numirq);
     qdev_prop_set_uint32(iotkitdev, "init-svtor", mmc->init_svtor);
+    qdev_prop_set_uint32(iotkitdev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width);
     qdev_connect_clock_in(iotkitdev, "MAINCLK", mms->sysclk);
     qdev_connect_clock_in(iotkitdev, "S32KCLK", mms->s32kclk);
     sysbus_realize(SYS_BUS_DEVICE(&mms->iotkit), &error_fatal);
@@ -863,10 +936,14 @@ static void mps2tz_common_init(MachineState *machine)
                 { "uart2", make_uart, &mms->uart[2], 0x40202000, 0x1000, { 36, 37, 44 } },
                 { "uart3", make_uart, &mms->uart[3], 0x40203000, 0x1000, { 38, 39, 45 } },
                 { "uart4", make_uart, &mms->uart[4], 0x40204000, 0x1000, { 40, 41, 46 } },
-                { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000 },
-                { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000 },
-                { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000 },
-                { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000 },
+                { "i2c0", make_i2c, &mms->i2c[0], 0x40207000, 0x1000, {},
+                  { .i2c_internal = true /* touchscreen */ } },
+                { "i2c1", make_i2c, &mms->i2c[1], 0x40208000, 0x1000, {},
+                  { .i2c_internal = true /* audio conf */ } },
+                { "i2c2", make_i2c, &mms->i2c[2], 0x4020c000, 0x1000, {},
+                  { .i2c_internal = false /* shield 0 */ } },
+                { "i2c3", make_i2c, &mms->i2c[3], 0x4020d000, 0x1000, {},
+                  { .i2c_internal = false /* shield 1 */ } },
             },
         }, {
             .name = "apb_ppcexp2",
@@ -907,15 +984,20 @@ static void mps2tz_common_init(MachineState *machine)
         }, {
             .name = "apb_ppcexp1",
             .ports = {
-                { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000 },
-                { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000 },
+                { "i2c0", make_i2c, &mms->i2c[0], 0x41200000, 0x1000, {},
+                  { .i2c_internal = true /* touchscreen */ } },
+                { "i2c1", make_i2c, &mms->i2c[1], 0x41201000, 0x1000, {},
+                  { .i2c_internal = true /* audio conf */ } },
                 { "spi0", make_spi, &mms->spi[0], 0x41202000, 0x1000, { 52 } },
                 { "spi1", make_spi, &mms->spi[1], 0x41203000, 0x1000, { 53 } },
                 { "spi2", make_spi, &mms->spi[2], 0x41204000, 0x1000, { 54 } },
-                { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000 },
-                { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000 },
+                { "i2c2", make_i2c, &mms->i2c[2], 0x41205000, 0x1000, {},
+                  { .i2c_internal = false /* shield 0 */ } },
+                { "i2c3", make_i2c, &mms->i2c[3], 0x41206000, 0x1000, {},
+                  { .i2c_internal = false /* shield 1 */ } },
                 { /* port 7 reserved */ },
-                { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000 },
+                { "i2c4", make_i2c, &mms->i2c[4], 0x41208000, 0x1000, {},
+                  { .i2c_internal = true /* DDR4 EEPROM */ } },
             },
         }, {
             .name = "apb_ppcexp2",
@@ -957,15 +1039,20 @@ static void mps2tz_common_init(MachineState *machine)
         }, {
             .name = "apb_ppcexp1",
             .ports = {
-                { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000 },
-                { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000 },
+                { "i2c0", make_i2c, &mms->i2c[0], 0x49200000, 0x1000, {},
+                  { .i2c_internal = true /* touchscreen */ } },
+                { "i2c1", make_i2c, &mms->i2c[1], 0x49201000, 0x1000, {},
+                  { .i2c_internal = true /* audio conf */ } },
                 { "spi0", make_spi, &mms->spi[0], 0x49202000, 0x1000, { 53 } },
                 { "spi1", make_spi, &mms->spi[1], 0x49203000, 0x1000, { 54 } },
                 { "spi2", make_spi, &mms->spi[2], 0x49204000, 0x1000, { 55 } },
-                { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000 },
-                { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000 },
+                { "i2c2", make_i2c, &mms->i2c[2], 0x49205000, 0x1000, {},
+                  { .i2c_internal = false /* shield 0 */ } },
+                { "i2c3", make_i2c, &mms->i2c[3], 0x49206000, 0x1000, {},
+                  { .i2c_internal = false /* shield 1 */ } },
                 { /* port 7 reserved */ },
-                { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000 },
+                { "i2c4", make_i2c, &mms->i2c[4], 0x49208000, 0x1000, {},
+                  { .i2c_internal = true /* DDR4 EEPROM */ } },
             },
         }, {
             .name = "apb_ppcexp2",
@@ -1035,7 +1122,7 @@ static void mps2tz_common_init(MachineState *machine)
             }
 
             mr = pinfo->devfn(mms, pinfo->opaque, pinfo->name, pinfo->size,
-                              pinfo->irqs);
+                              pinfo->irqs, &pinfo->extradata);
             portname = g_strdup_printf("port[%d]", port);
             object_property_set_link(OBJECT(ppc), portname, OBJECT(mr),
                                      &error_fatal);
@@ -1095,6 +1182,16 @@ static void mps2tz_common_init(MachineState *machine)
 
     create_non_mpc_ram(mms);
 
+    if (mmc->fpga_type == FPGA_AN524) {
+        /*
+         * Connect the line from the SCC so that we can remap when the
+         * guest updates that register.
+         */
+        mms->remap_irq = qemu_allocate_irq(remap_irq_fn, mms, 0);
+        qdev_connect_gpio_out_named(DEVICE(&mms->scc), "remap", 0,
+                                    mms->remap_irq);
+    }
+
     armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
                        boot_ram_size(mms));
 }
@@ -1117,12 +1214,47 @@ static void mps2_tz_idau_check(IDAUInterface *ii, uint32_t address,
     *iregion = region;
 }
 
+static char *mps2_get_remap(Object *obj, Error **errp)
+{
+    MPS2TZMachineState *mms = MPS2TZ_MACHINE(obj);
+    const char *val = mms->remap ? "QSPI" : "BRAM";
+    return g_strdup(val);
+}
+
+static void mps2_set_remap(Object *obj, const char *value, Error **errp)
+{
+    MPS2TZMachineState *mms = MPS2TZ_MACHINE(obj);
+
+    if (!strcmp(value, "BRAM")) {
+        mms->remap = false;
+    } else if (!strcmp(value, "QSPI")) {
+        mms->remap = true;
+    } else {
+        error_setg(errp, "Invalid remap value");
+        error_append_hint(errp, "Valid values are BRAM and QSPI.\n");
+    }
+}
+
+static void mps2_machine_reset(MachineState *machine)
+{
+    MPS2TZMachineState *mms = MPS2TZ_MACHINE(machine);
+
+    /*
+     * Set the initial memory mapping before triggering the reset of
+     * the rest of the system, so that the guest image loader and CPU
+     * reset see the correct mapping.
+     */
+    remap_memory(mms, mms->remap);
+    qemu_devices_reset();
+}
+
 static void mps2tz_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
     IDAUInterfaceClass *iic = IDAU_INTERFACE_CLASS(oc);
 
     mc->init = mps2tz_common_init;
+    mc->reset = mps2_machine_reset;
     iic->check = mps2_tz_idau_check;
 }
 
@@ -1168,8 +1300,10 @@ static void mps2tz_an505_class_init(ObjectClass *oc, void *data)
     mmc->numirq = 92;
     mmc->uart_overflow_irq = 47;
     mmc->init_svtor = 0x10000000;
+    mmc->sram_addr_width = 15;
     mmc->raminfo = an505_raminfo;
     mmc->armsse_type = TYPE_IOTKIT;
+    mmc->boot_ram_size = 0;
     mps2tz_set_default_ram_info(mmc);
 }
 
@@ -1195,8 +1329,10 @@ static void mps2tz_an521_class_init(ObjectClass *oc, void *data)
     mmc->numirq = 92;
     mmc->uart_overflow_irq = 47;
     mmc->init_svtor = 0x10000000;
+    mmc->sram_addr_width = 15;
     mmc->raminfo = an505_raminfo; /* AN521 is the same as AN505 here */
     mmc->armsse_type = TYPE_SSE200;
+    mmc->boot_ram_size = 0;
     mps2tz_set_default_ram_info(mmc);
 }
 
@@ -1222,9 +1358,16 @@ static void mps3tz_an524_class_init(ObjectClass *oc, void *data)
     mmc->numirq = 95;
     mmc->uart_overflow_irq = 47;
     mmc->init_svtor = 0x10000000;
+    mmc->sram_addr_width = 15;
     mmc->raminfo = an524_raminfo;
     mmc->armsse_type = TYPE_SSE200;
+    mmc->boot_ram_size = 0;
     mps2tz_set_default_ram_info(mmc);
+
+    object_class_property_add_str(oc, "remap", mps2_get_remap, mps2_set_remap);
+    object_class_property_set_description(oc, "remap",
+                                          "Set memory mapping. Valid values "
+                                          "are BRAM (default) and QSPI.");
 }
 
 static void mps3tz_an547_class_init(ObjectClass *oc, void *data)
@@ -1249,8 +1392,10 @@ static void mps3tz_an547_class_init(ObjectClass *oc, void *data)
     mmc->numirq = 96;
     mmc->uart_overflow_irq = 48;
     mmc->init_svtor = 0x00000000;
+    mmc->sram_addr_width = 21;
     mmc->raminfo = an547_raminfo;
     mmc->armsse_type = TYPE_SSE300;
+    mmc->boot_ram_size = 512 * KiB;
     mps2tz_set_default_ram_info(mmc);
 }
 
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
index 81413b7133e..bb76fa68890 100644
--- a/hw/arm/mps2.c
+++ b/hw/arm/mps2.c
@@ -86,6 +86,7 @@ struct MPS2MachineState {
     CMSDKAPBWatchdog watchdog;
     CMSDKAPBTimer timer[2];
     Clock *sysclk;
+    Clock *refclk;
 };
 
 #define TYPE_MPS2_MACHINE "mps2"
@@ -99,6 +100,15 @@ OBJECT_DECLARE_TYPE(MPS2MachineState, MPS2MachineClass, MPS2_MACHINE)
 /* Main SYSCLK frequency in Hz */
 #define SYSCLK_FRQ 25000000
 
+/*
+ * The Application Notes don't say anything about how the
+ * systick reference clock is configured. (Quite possibly
+ * they don't have one at all.) This 1MHz clock matches the
+ * pre-existing behaviour that used to be hardcoded in the
+ * armv7m_systick implementation.
+ */
+#define REFCLK_FRQ (1 * 1000 * 1000)
+
 /* Initialize the auxiliary RAM region @mr and map it into
  * the memory map at @base.
  */
@@ -146,6 +156,9 @@ static void mps2_common_init(MachineState *machine)
     mms->sysclk = clock_new(OBJECT(machine), "SYSCLK");
     clock_set_hz(mms->sysclk, SYSCLK_FRQ);
 
+    mms->refclk = clock_new(OBJECT(machine), "REFCLK");
+    clock_set_hz(mms->refclk, REFCLK_FRQ);
+
     /* The FPGA images have an odd combination of different RAMs,
      * because in hardware they are different implementations and
      * connected to different buses, giving varying performance/size
@@ -223,6 +236,8 @@ static void mps2_common_init(MachineState *machine)
     default:
         g_assert_not_reached();
     }
+    qdev_connect_clock_in(armv7m, "cpuclk", mms->sysclk);
+    qdev_connect_clock_in(armv7m, "refclk", mms->refclk);
     qdev_prop_set_string(armv7m, "cpu-type", machine->cpu_type);
     qdev_prop_set_bit(armv7m, "enable-bitband", true);
     object_property_set_link(OBJECT(&mms->armv7m), "memory",
@@ -413,7 +428,17 @@ static void mps2_common_init(MachineState *machine)
                                          0x40023000,    /* Audio */
                                          0x40029000,    /* Shield0 */
                                          0x4002a000};   /* Shield1 */
-        sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL);
+        DeviceState *dev;
+
+        dev = sysbus_create_simple(TYPE_ARM_SBCON_I2C, i2cbase[i], NULL);
+        if (i < 2) {
+            /*
+             * internal-only bus: mark it full to avoid user-created
+             * i2c devices being plugged into it.
+             */
+            BusState *qbus = qdev_get_child_bus(dev, "i2c");
+            qbus_mark_full(qbus);
+        }
     }
     create_unimplemented_device("i2s", 0x40024000, 0x400);
 
@@ -424,8 +449,6 @@ static void mps2_common_init(MachineState *machine)
                  qdev_get_gpio_in(armv7m,
                                   mmc->fpga_type == FPGA_AN511 ? 47 : 13));
 
-    system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ;
-
     armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
                        0x400000);
 }
diff --git a/hw/arm/msf2-soc.c b/hw/arm/msf2-soc.c
index d2c29e82d13..b5fe9f364d5 100644
--- a/hw/arm/msf2-soc.c
+++ b/hw/arm/msf2-soc.c
@@ -27,9 +27,9 @@
 #include "qapi/error.h"
 #include "exec/address-spaces.h"
 #include "hw/char/serial.h"
-#include "hw/irq.h"
 #include "hw/arm/msf2-soc.h"
 #include "hw/misc/unimp.h"
+#include "hw/qdev-clock.h"
 #include "sysemu/sysemu.h"
 
 #define MSF2_TIMER_BASE       0x40004000
@@ -74,6 +74,9 @@ static void m2sxxx_soc_initfn(Object *obj)
     }
 
     object_initialize_child(obj, "emac", &s->emac, TYPE_MSS_EMAC);
+
+    s->m3clk = qdev_init_clock_in(DEVICE(obj), "m3clk", NULL, NULL, 0);
+    s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk", NULL, NULL, 0);
 }
 
 static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
@@ -84,11 +87,34 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
     int i;
 
     MemoryRegion *system_memory = get_system_memory();
-    MemoryRegion *nvm = g_new(MemoryRegion, 1);
-    MemoryRegion *nvm_alias = g_new(MemoryRegion, 1);
-    MemoryRegion *sram = g_new(MemoryRegion, 1);
 
-    memory_region_init_rom(nvm, OBJECT(dev_soc), "MSF2.eNVM", s->envm_size,
+    if (!clock_has_source(s->m3clk)) {
+        error_setg(errp, "m3clk must be wired up by the board code");
+        return;
+    }
+
+    /*
+     * We use s->refclk internally and only define it with qdev_init_clock_in()
+     * so it is correctly parented and not leaked on an init/deinit; it is not
+     * intended as an externally exposed clock.
+     */
+    if (clock_has_source(s->refclk)) {
+        error_setg(errp, "refclk must not be wired up by the board code");
+        return;
+    }
+
+    /*
+     * TODO: ideally we should model the SoC SYSTICK_CR register at 0xe0042038,
+     * which allows the guest to program the divisor between the m3clk and
+     * the systick refclk to either /4, /8, /16 or /32, as well as setting
+     * the value the guest can read in the STCALIB register. Currently we
+     * implement the divisor as a fixed /32, which matches the reset value
+     * of SYSTICK_CR.
+     */
+    clock_set_mul_div(s->refclk, 32, 1);
+    clock_set_source(s->refclk, s->m3clk);
+
+    memory_region_init_rom(&s->nvm, OBJECT(dev_soc), "MSF2.eNVM", s->envm_size,
                            &error_fatal);
     /*
      * On power-on, the eNVM region 0x60000000 is automatically
@@ -96,34 +122,28 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
      * start address (0x0). We do not support remapping other eNVM,
      * eSRAM and DDR regions by guest(via Sysreg) currently.
      */
-    memory_region_init_alias(nvm_alias, OBJECT(dev_soc), "MSF2.eNVM", nvm, 0,
-                             s->envm_size);
+    memory_region_init_alias(&s->nvm_alias, OBJECT(dev_soc), "MSF2.eNVM",
+                             &s->nvm, 0, s->envm_size);
 
-    memory_region_add_subregion(system_memory, ENVM_BASE_ADDRESS, nvm);
-    memory_region_add_subregion(system_memory, 0, nvm_alias);
+    memory_region_add_subregion(system_memory, ENVM_BASE_ADDRESS, &s->nvm);
+    memory_region_add_subregion(system_memory, 0, &s->nvm_alias);
 
-    memory_region_init_ram(sram, NULL, "MSF2.eSRAM", s->esram_size,
+    memory_region_init_ram(&s->sram, NULL, "MSF2.eSRAM", s->esram_size,
                            &error_fatal);
-    memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
+    memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram);
 
     armv7m = DEVICE(&s->armv7m);
     qdev_prop_set_uint32(armv7m, "num-irq", 81);
     qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type);
     qdev_prop_set_bit(armv7m, "enable-bitband", true);
+    qdev_connect_clock_in(armv7m, "cpuclk", s->m3clk);
+    qdev_connect_clock_in(armv7m, "refclk", s->refclk);
     object_property_set_link(OBJECT(&s->armv7m), "memory",
                              OBJECT(get_system_memory()), &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
         return;
     }
 
-    if (!s->m3clk) {
-        error_setg(errp, "Invalid m3clk value");
-        error_append_hint(errp, "m3clk can not be zero\n");
-        return;
-    }
-
-    system_clock_scale = NANOSECONDS_PER_SECOND / s->m3clk;
-
     for (i = 0; i < MSF2_NUM_UARTS; i++) {
         if (serial_hd(i)) {
             serial_mm_init(get_system_memory(), uart_addr[i], 2,
@@ -133,8 +153,13 @@ static void m2sxxx_soc_realize(DeviceState *dev_soc, Error **errp)
     }
 
     dev = DEVICE(&s->timer);
-    /* APB0 clock is the timer input clock */
-    qdev_prop_set_uint32(dev, "clock-frequency", s->m3clk / s->apb0div);
+    /*
+     * APB0 clock is the timer input clock.
+     * TODO: ideally the MSF2 timer device should use a Clock rather than a
+     * clock-frequency integer property.
+     */
+    qdev_prop_set_uint32(dev, "clock-frequency",
+                         clock_get_hz(s->m3clk) / s->apb0div);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer), errp)) {
         return;
     }
@@ -211,8 +236,6 @@ static Property m2sxxx_soc_properties[] = {
     DEFINE_PROP_UINT64("eNVM-size", MSF2State, envm_size, MSF2_ENVM_MAX_SIZE),
     DEFINE_PROP_UINT64("eSRAM-size", MSF2State, esram_size,
                         MSF2_ESRAM_MAX_SIZE),
-    /* Libero GUI shows 100Mhz as default for clocks */
-    DEFINE_PROP_UINT32("m3clk", MSF2State, m3clk, 100 * 1000000),
     /* default divisors in Libero GUI */
     DEFINE_PROP_UINT8("apb0div", MSF2State, apb0div, 2),
     DEFINE_PROP_UINT8("apb1div", MSF2State, apb1div, 2),
diff --git a/hw/arm/msf2-som.c b/hw/arm/msf2-som.c
index f9b61c36ddb..396e8b99138 100644
--- a/hw/arm/msf2-som.c
+++ b/hw/arm/msf2-som.c
@@ -29,9 +29,9 @@
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/boot.h"
+#include "hw/qdev-clock.h"
 #include "exec/address-spaces.h"
 #include "hw/arm/msf2-soc.h"
-#include "cpu.h"
 
 #define DDR_BASE_ADDRESS      0xA0000000
 #define DDR_SIZE              (64 * MiB)
@@ -50,6 +50,7 @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
     BusState *spi_bus;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ddr = g_new(MemoryRegion, 1);
+    Clock *m3clk;
 
     if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) {
         error_report("This board can only be used with CPU %s",
@@ -73,7 +74,10 @@ static void emcraft_sf2_s2s010_init(MachineState *machine)
      * in Libero. CPU clock is divided by APB0 and APB1 divisors for
      * peripherals. Emcraft's SoM kit comes with these settings by default.
      */
-    qdev_prop_set_uint32(dev, "m3clk", 142 * 1000000);
+    /* This clock doesn't need migration because it is fixed-frequency */
+    m3clk = clock_new(OBJECT(machine), "m3clk");
+    clock_set_hz(m3clk, 142 * 1000000);
+    qdev_connect_clock_in(dev, "m3clk", m3clk);
     qdev_prop_set_uint32(dev, "apb0div", 2);
     qdev_prop_set_uint32(dev, "apb1div", 2);
 
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index 9cebece2de0..2d612cc0c9b 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -19,7 +19,6 @@
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/char/serial.h"
-#include "hw/hw.h"
 #include "qemu/timer.h"
 #include "hw/ptimer.h"
 #include "hw/qdev-properties.h"
@@ -32,7 +31,6 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/runstate.h"
 #include "sysemu/dma.h"
-#include "exec/address-spaces.h"
 #include "ui/pixel_ops.h"
 #include "qemu/cutils.h"
 #include "qom/object.h"
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 1733b71507c..3365da11bf7 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
 #include "qemu/error-report.h"
 #include "hw/arm/stm32f205_soc.h"
 #include "hw/arm/boot.h"
@@ -36,16 +37,15 @@
 static void netduino2_init(MachineState *machine)
 {
     DeviceState *dev;
+    Clock *sysclk;
 
-    /*
-     * TODO: ideally we would model the SoC RCC and let it handle
-     * system_clock_scale, including its ability to define different
-     * possible SYSCLK sources.
-     */
-    system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ;
+    /* This clock doesn't need migration because it is fixed-frequency */
+    sysclk = clock_new(OBJECT(machine), "SYSCLK");
+    clock_set_hz(sysclk, SYSCLK_FRQ);
 
     dev = qdev_new(TYPE_STM32F205_SOC);
     qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3"));
+    qdev_connect_clock_in(dev, "sysclk", sysclk);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
diff --git a/hw/arm/netduinoplus2.c b/hw/arm/netduinoplus2.c
index d3ad7a2b675..76cea8e4891 100644
--- a/hw/arm/netduinoplus2.c
+++ b/hw/arm/netduinoplus2.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
 #include "qemu/error-report.h"
 #include "hw/arm/stm32f405_soc.h"
 #include "hw/arm/boot.h"
@@ -36,16 +37,15 @@
 static void netduinoplus2_init(MachineState *machine)
 {
     DeviceState *dev;
+    Clock *sysclk;
 
-    /*
-     * TODO: ideally we would model the SoC RCC and let it handle
-     * system_clock_scale, including its ability to define different
-     * possible SYSCLK sources.
-     */
-    system_clock_scale = NANOSECONDS_PER_SECOND / SYSCLK_FRQ;
+    /* This clock doesn't need migration because it is fixed-frequency */
+    sysclk = clock_new(OBJECT(machine), "SYSCLK");
+    clock_set_hz(sysclk, SYSCLK_FRQ);
 
     dev = qdev_new(TYPE_STM32F405_SOC);
     qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4"));
+    qdev_connect_clock_in(dev, "sysclk", sysclk);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     armv7m_load_kernel(ARM_CPU(first_cpu),
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 495b0f8e91d..878c2208e07 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -16,7 +16,6 @@
 
 #include "qemu/osdep.h"
 
-#include "exec/address-spaces.h"
 #include "hw/arm/boot.h"
 #include "hw/arm/npcm7xx.h"
 #include "hw/char/serial.h"
@@ -64,6 +63,8 @@
 #define NPCM7XX_ROM_BA          (0xffff0000)
 #define NPCM7XX_ROM_SZ          (64 * KiB)
 
+/* SDHCI Modules */
+#define NPCM7XX_MMC_BA          (0xf0842000)
 
 /* Clock configuration values to be fixed up when bypassing bootloader */
 
@@ -84,6 +85,7 @@ enum NPCM7xxInterrupt {
     NPCM7XX_UART3_IRQ,
     NPCM7XX_EMC1RX_IRQ          = 15,
     NPCM7XX_EMC1TX_IRQ,
+    NPCM7XX_MMC_IRQ             = 26,
     NPCM7XX_TIMER0_IRQ          = 32,   /* Timer Module 0 */
     NPCM7XX_TIMER1_IRQ,
     NPCM7XX_TIMER2_IRQ,
@@ -444,6 +446,8 @@ static void npcm7xx_init(Object *obj)
     for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
         object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC);
     }
+
+    object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
 }
 
 static void npcm7xx_realize(DeviceState *dev, Error **errp)
@@ -708,6 +712,12 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
                            &error_abort);
     memory_region_add_subregion(get_system_memory(), NPCM7XX_ROM_BA, &s->irom);
 
+    /* SDHCI */
+    sysbus_realize(SYS_BUS_DEVICE(&s->mmc), &error_abort);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->mmc), 0, NPCM7XX_MMC_BA);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->mmc), 0,
+            npcm7xx_irq(s, NPCM7XX_MMC_IRQ));
+
     create_unimplemented_device("npcm7xx.shm",          0xc0001000,   4 * KiB);
     create_unimplemented_device("npcm7xx.vdmx",         0xe0800000,   4 * KiB);
     create_unimplemented_device("npcm7xx.pcierc",       0xe1000000,  64 * KiB);
@@ -737,7 +747,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
     create_unimplemented_device("npcm7xx.usbd[8]",      0xf0838000,   4 * KiB);
     create_unimplemented_device("npcm7xx.usbd[9]",      0xf0839000,   4 * KiB);
     create_unimplemented_device("npcm7xx.sd",           0xf0840000,   8 * KiB);
-    create_unimplemented_device("npcm7xx.mmc",          0xf0842000,   8 * KiB);
     create_unimplemented_device("npcm7xx.pcimbx",       0xf0848000, 512 * KiB);
     create_unimplemented_device("npcm7xx.aes",          0xf0858000,   4 * KiB);
     create_unimplemented_device("npcm7xx.des",          0xf0859000,   4 * KiB);
diff --git a/hw/arm/npcm7xx_boards.c b/hw/arm/npcm7xx_boards.c
index e22fe4bf8f0..dec7d16ae51 100644
--- a/hw/arm/npcm7xx_boards.c
+++ b/hw/arm/npcm7xx_boards.c
@@ -16,9 +16,9 @@
 
 #include "qemu/osdep.h"
 
-#include "exec/address-spaces.h"
 #include "hw/arm/npcm7xx.h"
 #include "hw/core/cpu.h"
+#include "hw/i2c/i2c_mux_pca954x.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/loader.h"
 #include "hw/qdev-core.h"
@@ -27,10 +27,14 @@
 #include "qemu-common.h"
 #include "qemu/datadir.h"
 #include "qemu/units.h"
+#include "sysemu/blockdev.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
 
 #define NPCM750_EVB_POWER_ON_STRAPS 0x00001ff7
 #define QUANTA_GSJ_POWER_ON_STRAPS 0x00001fff
+#define QUANTA_GBS_POWER_ON_STRAPS 0x000017ff
+#define KUDO_BMC_POWER_ON_STRAPS 0x00001fff
 
 static const char npcm7xx_default_bootrom[] = "npcm7xx_bootrom.bin";
 
@@ -80,6 +84,22 @@ static void npcm7xx_connect_dram(NPCM7xxState *soc, MemoryRegion *dram)
                              &error_abort);
 }
 
+static void sdhci_attach_drive(SDHCIState *sdhci)
+{
+        DriveInfo *di = drive_get_next(IF_SD);
+        BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
+
+        BusState *bus = qdev_get_child_bus(DEVICE(sdhci), "sd-bus");
+        if (bus == NULL) {
+            error_report("No SD bus found in SOC object");
+            exit(1);
+        }
+
+        DeviceState *carddev = qdev_new(TYPE_SD_CARD);
+        qdev_prop_set_drive_err(carddev, "drive", blk, &error_fatal);
+        qdev_realize_and_unref(carddev, bus, &error_fatal);
+}
+
 static NPCM7xxState *npcm7xx_create_soc(MachineState *machine,
                                         uint32_t hw_straps)
 {
@@ -222,7 +242,18 @@ static void quanta_gsj_i2c_init(NPCM7xxState *soc)
     at24c_eeprom_init(soc, 9, 0x55, 8192);
     at24c_eeprom_init(soc, 10, 0x55, 8192);
 
-    /* TODO: Add additional i2c devices. */
+    /*
+     * i2c-11:
+     * - power-brick@36: delta,dps800
+     * - hotswap@15: ti,lm5066i
+     */
+
+    /*
+     * i2c-12:
+     * - ucd90160@6b
+     */
+
+    i2c_slave_create_simple(npcm7xx_i2c_get_bus(soc, 15), "pca9548", 0x75);
 }
 
 static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
@@ -239,6 +270,65 @@ static void quanta_gsj_fan_init(NPCM7xxMachine *machine, NPCM7xxState *soc)
     npcm7xx_connect_pwm_fan(soc, &splitter[2], 0x05, 1);
 }
 
+static void quanta_gbs_i2c_init(NPCM7xxState *soc)
+{
+    /*
+     * i2c-0:
+     *     pca9546@71
+     *
+     * i2c-1:
+     *     pca9535@24
+     *     pca9535@20
+     *     pca9535@21
+     *     pca9535@22
+     *     pca9535@23
+     *     pca9535@25
+     *     pca9535@26
+     *
+     * i2c-2:
+     *     sbtsi@4c
+     *
+     * i2c-5:
+     *     atmel,24c64@50 mb_fru
+     *     pca9546@71
+     *         - channel 0: max31725@54
+     *         - channel 1: max31725@55
+     *         - channel 2: max31725@5d
+     *                      atmel,24c64@51 fan_fru
+     *         - channel 3: atmel,24c64@52 hsbp_fru
+     *
+     * i2c-6:
+     *     pca9545@73
+     *
+     * i2c-7:
+     *     pca9545@72
+     *
+     * i2c-8:
+     *     adi,adm1272@10
+     *
+     * i2c-9:
+     *     pca9546@71
+     *         - channel 0: isil,isl68137@60
+     *         - channel 1: isil,isl68137@61
+     *         - channel 2: isil,isl68137@63
+     *         - channel 3: isil,isl68137@45
+     *
+     * i2c-10:
+     *     pca9545@71
+     *
+     * i2c-11:
+     *     pca9545@76
+     *
+     * i2c-12:
+     *     maxim,max34451@4e
+     *     isil,isl68137@5d
+     *     isil,isl68137@5e
+     *
+     * i2c-14:
+     *     pca9545@70
+     */
+}
+
 static void npcm750_evb_init(MachineState *machine)
 {
     NPCM7xxState *soc;
@@ -270,6 +360,41 @@ static void quanta_gsj_init(MachineState *machine)
     npcm7xx_load_kernel(machine, soc);
 }
 
+static void quanta_gbs_init(MachineState *machine)
+{
+    NPCM7xxState *soc;
+
+    soc = npcm7xx_create_soc(machine, QUANTA_GBS_POWER_ON_STRAPS);
+    npcm7xx_connect_dram(soc, machine->ram);
+    qdev_realize(DEVICE(soc), NULL, &error_fatal);
+
+    npcm7xx_load_bootrom(machine, soc);
+
+    npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f",
+                          drive_get(IF_MTD, 0, 0));
+
+    quanta_gbs_i2c_init(soc);
+    sdhci_attach_drive(&soc->mmc.sdhci);
+    npcm7xx_load_kernel(machine, soc);
+}
+
+static void kudo_bmc_init(MachineState *machine)
+{
+    NPCM7xxState *soc;
+
+    soc = npcm7xx_create_soc(machine, KUDO_BMC_POWER_ON_STRAPS);
+    npcm7xx_connect_dram(soc, machine->ram);
+    qdev_realize(DEVICE(soc), NULL, &error_fatal);
+
+    npcm7xx_load_bootrom(machine, soc);
+    npcm7xx_connect_flash(&soc->fiu[0], 0, "mx66u51235f",
+                          drive_get(IF_MTD, 0, 0));
+    npcm7xx_connect_flash(&soc->fiu[1], 0, "mx66u51235f",
+                          drive_get(IF_MTD, 3, 0));
+
+    npcm7xx_load_kernel(machine, soc);
+}
+
 static void npcm7xx_set_soc_type(NPCM7xxMachineClass *nmc, const char *type)
 {
     NPCM7xxClass *sc = NPCM7XX_CLASS(object_class_by_name(type));
@@ -301,7 +426,7 @@ static void npcm750_evb_machine_class_init(ObjectClass *oc, void *data)
 
     npcm7xx_set_soc_type(nmc, TYPE_NPCM750);
 
-    mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex A9)";
+    mc->desc = "Nuvoton NPCM750 Evaluation Board (Cortex-A9)";
     mc->init = npcm750_evb_init;
     mc->default_ram_size = 512 * MiB;
 };
@@ -313,11 +438,35 @@ static void gsj_machine_class_init(ObjectClass *oc, void *data)
 
     npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
 
-    mc->desc = "Quanta GSJ (Cortex A9)";
+    mc->desc = "Quanta GSJ (Cortex-A9)";
     mc->init = quanta_gsj_init;
     mc->default_ram_size = 512 * MiB;
 };
 
+static void gbs_bmc_machine_class_init(ObjectClass *oc, void *data)
+{
+    NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
+
+    mc->desc = "Quanta GBS (Cortex-A9)";
+    mc->init = quanta_gbs_init;
+    mc->default_ram_size = 1 * GiB;
+}
+
+static void kudo_bmc_machine_class_init(ObjectClass *oc, void *data)
+{
+    NPCM7xxMachineClass *nmc = NPCM7XX_MACHINE_CLASS(oc);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    npcm7xx_set_soc_type(nmc, TYPE_NPCM730);
+
+    mc->desc = "Kudo BMC (Cortex-A9)";
+    mc->init = kudo_bmc_init;
+    mc->default_ram_size = 1 * GiB;
+};
+
 static const TypeInfo npcm7xx_machine_types[] = {
     {
         .name           = TYPE_NPCM7XX_MACHINE,
@@ -334,6 +483,14 @@ static const TypeInfo npcm7xx_machine_types[] = {
         .name           = MACHINE_TYPE_NAME("quanta-gsj"),
         .parent         = TYPE_NPCM7XX_MACHINE,
         .class_init     = gsj_machine_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("quanta-gbs-bmc"),
+        .parent         = TYPE_NPCM7XX_MACHINE,
+        .class_init     = gbs_bmc_machine_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("kudo-bmc"),
+        .parent         = TYPE_NPCM7XX_MACHINE,
+        .class_init     = kudo_bmc_machine_class_init,
     },
 };
 
diff --git a/hw/arm/nrf51_soc.c b/hw/arm/nrf51_soc.c
index e15981e019f..34da0d62f00 100644
--- a/hw/arm/nrf51_soc.c
+++ b/hw/arm/nrf51_soc.c
@@ -12,10 +12,9 @@
 #include "qapi/error.h"
 #include "hw/arm/boot.h"
 #include "hw/sysbus.h"
+#include "hw/qdev-clock.h"
 #include "hw/misc/unimp.h"
-#include "exec/address-spaces.h"
 #include "qemu/log.h"
-#include "cpu.h"
 
 #include "hw/arm/nrf51.h"
 #include "hw/arm/nrf51_soc.h"
@@ -68,7 +67,22 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
         return;
     }
 
-    system_clock_scale = NANOSECONDS_PER_SECOND / HCLK_FRQ;
+    /*
+     * HCLK on this SoC is fixed, so we set up sysclk ourselves and
+     * the board shouldn't connect it.
+     */
+    if (clock_has_source(s->sysclk)) {
+        error_setg(errp, "sysclk clock must not be wired up by the board code");
+        return;
+    }
+    /* This clock doesn't need migration because it is fixed-frequency */
+    clock_set_hz(s->sysclk, HCLK_FRQ);
+    qdev_connect_clock_in(DEVICE(&s->cpu), "cpuclk", s->sysclk);
+    /*
+     * This SoC has no systick device, so don't connect refclk.
+     * TODO: model the lack of systick (currently the armv7m object
+     * will always provide one).
+     */
 
     object_property_set_link(OBJECT(&s->cpu), "memory", OBJECT(&s->container),
                              &error_abort);
@@ -193,6 +207,8 @@ static void nrf51_soc_init(Object *obj)
                                 TYPE_NRF51_TIMER);
 
     }
+
+    s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
 }
 
 static Property nrf51_soc_properties[] = {
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
index 387eea4d44c..af3164c5519 100644
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -34,16 +34,16 @@
 #include "hw/boards.h"
 #include "hw/i2c/i2c.h"
 #include "hw/display/blizzard.h"
+#include "hw/input/lm832x.h"
 #include "hw/input/tsc2xxx.h"
 #include "hw/misc/cbus.h"
-#include "hw/misc/tmp105.h"
+#include "hw/sensor/tmp105.h"
 #include "hw/qdev-properties.h"
 #include "hw/block/flash.h"
 #include "hw/hw.h"
 #include "hw/loader.h"
 #include "hw/sysbus.h"
 #include "qemu/log.h"
-#include "exec/address-spaces.h"
 
 /* Nokia N8x0 support */
 struct n800_s {
@@ -417,7 +417,7 @@ static void n810_kbd_setup(struct n800_s *s)
     /* Attach the LM8322 keyboard to the I2C bus,
      * should happen in n8x0_i2c_setup and s->kbd be initialised here.  */
     s->kbd = DEVICE(i2c_slave_create_simple(omap_i2c_bus(s->mpu->i2c[0]),
-                                            "lm8323", N810_LM8323_ADDR));
+                                            TYPE_LM8323, N810_LM8323_ADDR));
     qdev_connect_gpio_out(s->kbd, 0, kbd_irq);
 }
 
@@ -692,7 +692,7 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
     default:
     bad_cmd:
         qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: unknown command %02x\n", __func__, s->cmd);
+                      "%s: unknown command 0x%02x\n", __func__, s->cmd);
         break;
     }
 
diff --git a/hw/arm/omap1.c b/hw/arm/omap1.c
index 02c0f66431b..180d3788f89 100644
--- a/hw/arm/omap1.c
+++ b/hw/arm/omap1.c
@@ -24,7 +24,6 @@
 #include "qemu-common.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
-#include "hw/boards.h"
 #include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
index 16d388fc79d..02b1aa8c974 100644
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -27,7 +27,6 @@
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
-#include "hw/boards.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/boot.h"
diff --git a/hw/arm/orangepi.c b/hw/arm/orangepi.c
index 40cdb5c6d2c..0cf9895ce79 100644
--- a/hw/arm/orangepi.c
+++ b/hw/arm/orangepi.c
@@ -21,12 +21,9 @@
 #include "qemu/units.h"
 #include "exec/address-spaces.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "hw/sysbus.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/allwinner-h3.h"
-#include "sysemu/sysemu.h"
 
 static struct arm_boot_info orangepi_binfo = {
     .nb_cpus = AW_H3_NUM_CPUS,
diff --git a/hw/arm/palm.c b/hw/arm/palm.c
index 4e3dc5fbbf2..68e11dd1ecc 100644
--- a/hw/arm/palm.c
+++ b/hw/arm/palm.c
@@ -29,7 +29,6 @@
 #include "hw/input/tsc2xxx.h"
 #include "hw/irq.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "cpu.h"
 #include "qemu/cutils.h"
 #include "qom/object.h"
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index fdc4955e95b..15a247efae2 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1437,7 +1437,7 @@ static void pxa2xx_i2c_write(void *opaque, hwaddr addr,
         break;
 
     case ISAR:
-        i2c_set_slave_address(I2C_SLAVE(s->slave), value & 0x7f);
+        i2c_slave_set_address(I2C_SLAVE(s->slave), value & 0x7f);
         break;
 
     case IDBR:
diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c
index cf6cb2a373a..ed032fed548 100644
--- a/hw/arm/pxa2xx_pic.c
+++ b/hw/arm/pxa2xx_pic.c
@@ -301,7 +301,7 @@ DeviceState *pxa2xx_pic_init(hwaddr base, ARMCPU *cpu)
     return dev;
 }
 
-static VMStateDescription vmstate_pxa2xx_pic_regs = {
+static const VMStateDescription vmstate_pxa2xx_pic_regs = {
     .name = "pxa2xx_pic",
     .version_id = 0,
     .minimum_version_id = 0,
diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c
index 990509d3852..146d35382bf 100644
--- a/hw/arm/raspi.c
+++ b/hw/arm/raspi.c
@@ -16,14 +16,12 @@
 #include "qemu/units.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/arm/bcm2836.h"
 #include "hw/registerfields.h"
 #include "qemu/error-report.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "hw/arm/boot.h"
-#include "sysemu/sysemu.h"
 #include "qom/object.h"
 
 #define SMPBOOT_ADDR    0x300 /* this should leave enough space for ATAGS */
@@ -283,7 +281,7 @@ static void raspi_machine_init(MachineState *machine)
     object_property_add_const_link(OBJECT(&s->soc), "ram", OBJECT(machine->ram));
     object_property_set_int(OBJECT(&s->soc), "board-rev", board_rev,
                             &error_abort);
-    qdev_realize(DEVICE(&s->soc), NULL, &error_abort);
+    qdev_realize(DEVICE(&s->soc), NULL, &error_fatal);
 
     /* Create and plug in the SD cards */
     di = drive_get_next(IF_SD);
@@ -342,7 +340,6 @@ static void raspi2b_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiMachineClass *rmc = RASPI_MACHINE_CLASS(oc);
 
-    mc->alias = "raspi2";
     rmc->board_rev = 0xa21041;
     raspi_machine_class_common_init(mc, rmc->board_rev);
 };
@@ -362,7 +359,6 @@ static void raspi3b_machine_class_init(ObjectClass *oc, void *data)
     MachineClass *mc = MACHINE_CLASS(oc);
     RaspiMachineClass *rmc = RASPI_MACHINE_CLASS(oc);
 
-    mc->alias = "raspi3";
     rmc->board_rev = 0xa02082;
     raspi_machine_class_common_init(mc, rmc->board_rev);
 };
diff --git a/hw/arm/realview.c b/hw/arm/realview.c
index 0831159d158..1c54316ba38 100644
--- a/hw/arm/realview.c
+++ b/hw/arm/realview.c
@@ -20,7 +20,6 @@
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/i2c/i2c.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "hw/char/pl011.h"
 #include "hw/cpu/a9mpcore.h"
diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c
index a3dbf85e0ed..553608e5835 100644
--- a/hw/arm/sabrelite.c
+++ b/hw/arm/sabrelite.c
@@ -15,7 +15,6 @@
 #include "hw/arm/fsl-imx6.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
 
@@ -88,7 +87,7 @@ static void sabrelite_init(MachineState *machine)
                 qdev_realize_and_unref(flash_dev, BUS(spi_bus), &error_fatal);
 
                 cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
-                sysbus_connect_irq(SYS_BUS_DEVICE(spi_dev), 1, cs_line);
+                qdev_connect_gpio_out(DEVICE(&s->gpio[2]), 19, cs_line);
             }
         }
     }
@@ -106,7 +105,7 @@ static void sabrelite_init(MachineState *machine)
 
 static void sabrelite_machine_init(MachineClass *mc)
 {
-    mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex A9)";
+    mc->desc = "Freescale i.MX6 Quad SABRE Lite Board (Cortex-A9)";
     mc->init = sabrelite_init;
     mc->max_cpus = FSL_IMX6_NUM_CPUS;
     mc->ignore_memory_transaction_failures = true;
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 88dfb2284c1..358714bd3e8 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -27,7 +27,6 @@
 #include "sysemu/numa.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
-#include "exec/address-spaces.h"
 #include "exec/hwaddr.h"
 #include "kvm_arm.h"
 #include "hw/arm/boot.h"
@@ -66,7 +65,7 @@ enum {
     SBSA_GIC_DIST,
     SBSA_GIC_REDIST,
     SBSA_SECURE_EC,
-    SBSA_GWDT,
+    SBSA_GWDT_WS0,
     SBSA_GWDT_REFRESH,
     SBSA_GWDT_CONTROL,
     SBSA_SMMU,
@@ -141,7 +140,7 @@ static const int sbsa_ref_irqmap[] = {
     [SBSA_AHCI] = 10,
     [SBSA_EHCI] = 11,
     [SBSA_SMMU] = 12, /* ... to 15 */
-    [SBSA_GWDT] = 16,
+    [SBSA_GWDT_WS0] = 16,
 };
 
 static const char * const valid_cpus[] = {
@@ -482,7 +481,7 @@ static void create_wdt(const SBSAMachineState *sms)
     hwaddr cbase = sbsa_ref_memmap[SBSA_GWDT_CONTROL].base;
     DeviceState *dev = qdev_new(TYPE_WDT_SBSA);
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
-    int irq = sbsa_ref_irqmap[SBSA_GWDT];
+    int irq = sbsa_ref_irqmap[SBSA_GWDT_WS0];
 
     sysbus_realize_and_unref(s, &error_fatal);
     sysbus_mmio_map(s, 0, rbase);
@@ -671,7 +670,7 @@ static void sbsa_ref_init(MachineState *machine)
     int n, sbsa_max_cpus;
 
     if (!cpu_type_valid(machine->cpu_type)) {
-        error_report("mach-virt: CPU type %s not supported", machine->cpu_type);
+        error_report("sbsa-ref: CPU type %s not supported", machine->cpu_type);
         exit(1);
     }
 
@@ -692,13 +691,6 @@ static void sbsa_ref_init(MachineState *machine)
 
     firmware_loaded = sbsa_firmware_init(sms, sysmem, secure_sysmem);
 
-    if (machine->kernel_filename && firmware_loaded) {
-        error_report("sbsa-ref: No fw_cfg device on this machine, "
-                     "so -kernel option is not supported when firmware loaded, "
-                     "please load OS from hard disk instead");
-        exit(1);
-    }
-
     /*
      * This machine has EL3 enabled, external firmware should supply PSCI
      * implementation, so the QEMU's internal PSCI is disabled.
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 84d2c62c26f..0459850a93d 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -17,7 +17,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "exec/target_page.h"
 #include "hw/core/cpu.h"
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 3dac5766ca3..d1885ae3f25 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -570,7 +570,7 @@ static inline int pa_range(STE *ste)
 
 /* CD fields */
 
-#define CD_VALID(x)   extract32((x)->word[0], 30, 1)
+#define CD_VALID(x)   extract32((x)->word[0], 31, 1)
 #define CD_ASID(x)    extract32((x)->word[1], 16, 16)
 #define CD_TTB(x, sel)                                      \
     ({                                                      \
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 87056125357..01b60bee495 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -23,7 +23,6 @@
 #include "migration/vmstate.h"
 #include "hw/qdev-core.h"
 #include "hw/pci/pci.h"
-#include "exec/address-spaces.h"
 #include "cpu.h"
 #include "trace.h"
 #include "qemu/log.h"
@@ -259,8 +258,9 @@ static void smmuv3_init_regs(SMMUv3State *s)
     s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1);
     s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1);
 
-   /* 4K and 64K granule support */
+    /* 4K, 16K and 64K granule support */
     s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1);
+    s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1);
     s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1);
     s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */
 
@@ -503,7 +503,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event)
 
         tg = CD_TG(cd, i);
         tt->granule_sz = tg2granule(tg, i);
-        if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) {
+        if ((tt->granule_sz != 12 && tt->granule_sz != 14 &&
+             tt->granule_sz != 16) || CD_ENDI(cd)) {
             goto bad_cd;
         }
 
@@ -856,43 +857,45 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova,
 
 static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
 {
-    uint8_t scale = 0, num = 0, ttl = 0;
-    dma_addr_t addr = CMD_ADDR(cmd);
+    dma_addr_t end, addr = CMD_ADDR(cmd);
     uint8_t type = CMD_TYPE(cmd);
     uint16_t vmid = CMD_VMID(cmd);
+    uint8_t scale = CMD_SCALE(cmd);
+    uint8_t num = CMD_NUM(cmd);
+    uint8_t ttl = CMD_TTL(cmd);
     bool leaf = CMD_LEAF(cmd);
     uint8_t tg = CMD_TG(cmd);
-    uint64_t first_page = 0, last_page;
-    uint64_t num_pages = 1;
+    uint64_t num_pages;
+    uint8_t granule;
     int asid = -1;
 
-    if (tg) {
-        scale = CMD_SCALE(cmd);
-        num = CMD_NUM(cmd);
-        ttl = CMD_TTL(cmd);
-        num_pages = (num + 1) * BIT_ULL(scale);
-    }
-
     if (type == SMMU_CMD_TLBI_NH_VA) {
         asid = CMD_ASID(cmd);
     }
 
-    /* Split invalidations into ^2 range invalidations */
-    last_page = num_pages - 1;
-    while (num_pages) {
-        uint8_t granule = tg * 2 + 10;
-        uint64_t mask, count;
+    if (!tg) {
+        trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
+        smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
+        smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
+        return;
+    }
+
+    /* RIL in use */
 
-        mask = dma_aligned_pow2_mask(first_page, last_page, 64 - granule);
-        count = mask + 1;
+    num_pages = (num + 1) * BIT_ULL(scale);
+    granule = tg * 2 + 10;
+
+    /* Split invalidations into ^2 range invalidations */
+    end = addr + (num_pages << granule) - 1;
 
-        trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, count, ttl, leaf);
-        smmuv3_inv_notifiers_iova(s, asid, addr, tg, count);
-        smmu_iotlb_inv_iova(s, asid, addr, tg, count, ttl);
+    while (addr != end + 1) {
+        uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
 
-        num_pages -= count;
-        first_page += count;
-        addr += count * BIT_ULL(granule);
+        num_pages = (mask + 1) >> granule;
+        trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf);
+        smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
+        smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
+        addr += mask + 1;
     }
 }
 
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 6b3bf9828bc..5aab0b85657 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -30,7 +30,7 @@
 #include "audio/audio.h"
 #include "hw/boards.h"
 #include "hw/sysbus.h"
-#include "hw/misc/max111x.h"
+#include "hw/adc/max111x.h"
 #include "migration/vmstate.h"
 #include "exec/address-spaces.h"
 #include "cpu.h"
@@ -769,9 +769,9 @@ static void spitz_wm8750_addr(void *opaque, int line, int level)
 {
     I2CSlave *wm = (I2CSlave *) opaque;
     if (level)
-        i2c_set_slave_address(wm, SPITZ_WM_ADDRH);
+        i2c_slave_set_address(wm, SPITZ_WM_ADDRH);
     else
-        i2c_set_slave_address(wm, SPITZ_WM_ADDRL);
+        i2c_slave_set_address(wm, SPITZ_WM_ADDRL);
 }
 
 static void spitz_i2c_setup(PXA2xxState *cpu)
@@ -1134,7 +1134,7 @@ static bool is_version_0(void *opaque, int version_id)
     return version_id == 0;
 }
 
-static VMStateDescription vmstate_sl_nand_info = {
+static const VMStateDescription vmstate_sl_nand_info = {
     .name = "sl-nand",
     .version_id = 0,
     .minimum_version_id = 0,
@@ -1170,7 +1170,7 @@ static const TypeInfo sl_nand_info = {
     .class_init    = sl_nand_class_init,
 };
 
-static VMStateDescription vmstate_spitz_kbd = {
+static const VMStateDescription vmstate_spitz_kbd = {
     .name = "spitz-keyboard",
     .version_id = 1,
     .minimum_version_id = 0,
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 27292ec4113..78827ace6b8 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -26,8 +26,8 @@
 #include "hw/watchdog/cmsdk-apb-watchdog.h"
 #include "migration/vmstate.h"
 #include "hw/misc/unimp.h"
+#include "hw/timer/stellaris-gptm.h"
 #include "hw/qdev-clock.h"
-#include "cpu.h"
 #include "qom/object.h"
 
 #define GPIO_A 0
@@ -56,306 +56,6 @@ typedef const struct {
     uint32_t peripherals;
 } stellaris_board_info;
 
-/* General purpose timer module.  */
-
-#define TYPE_STELLARIS_GPTM "stellaris-gptm"
-OBJECT_DECLARE_SIMPLE_TYPE(gptm_state, STELLARIS_GPTM)
-
-struct gptm_state {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    uint32_t config;
-    uint32_t mode[2];
-    uint32_t control;
-    uint32_t state;
-    uint32_t mask;
-    uint32_t load[2];
-    uint32_t match[2];
-    uint32_t prescale[2];
-    uint32_t match_prescale[2];
-    uint32_t rtc;
-    int64_t tick[2];
-    struct gptm_state *opaque[2];
-    QEMUTimer *timer[2];
-    /* The timers have an alternate output used to trigger the ADC.  */
-    qemu_irq trigger;
-    qemu_irq irq;
-};
-
-static void gptm_update_irq(gptm_state *s)
-{
-    int level;
-    level = (s->state & s->mask) != 0;
-    qemu_set_irq(s->irq, level);
-}
-
-static void gptm_stop(gptm_state *s, int n)
-{
-    timer_del(s->timer[n]);
-}
-
-static void gptm_reload(gptm_state *s, int n, int reset)
-{
-    int64_t tick;
-    if (reset)
-        tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    else
-        tick = s->tick[n];
-
-    if (s->config == 0) {
-        /* 32-bit CountDown.  */
-        uint32_t count;
-        count = s->load[0] | (s->load[1] << 16);
-        tick += (int64_t)count * system_clock_scale;
-    } else if (s->config == 1) {
-        /* 32-bit RTC.  1Hz tick.  */
-        tick += NANOSECONDS_PER_SECOND;
-    } else if (s->mode[n] == 0xa) {
-        /* PWM mode.  Not implemented.  */
-    } else {
-        qemu_log_mask(LOG_UNIMP,
-                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
-                      s->mode[n]);
-        return;
-    }
-    s->tick[n] = tick;
-    timer_mod(s->timer[n], tick);
-}
-
-static void gptm_tick(void *opaque)
-{
-    gptm_state **p = (gptm_state **)opaque;
-    gptm_state *s;
-    int n;
-
-    s = *p;
-    n = p - s->opaque;
-    if (s->config == 0) {
-        s->state |= 1;
-        if ((s->control & 0x20)) {
-            /* Output trigger.  */
-            qemu_irq_pulse(s->trigger);
-        }
-        if (s->mode[0] & 1) {
-            /* One-shot.  */
-            s->control &= ~1;
-        } else {
-            /* Periodic.  */
-            gptm_reload(s, 0, 0);
-        }
-    } else if (s->config == 1) {
-        /* RTC.  */
-        uint32_t match;
-        s->rtc++;
-        match = s->match[0] | (s->match[1] << 16);
-        if (s->rtc > match)
-            s->rtc = 0;
-        if (s->rtc == 0) {
-            s->state |= 8;
-        }
-        gptm_reload(s, 0, 0);
-    } else if (s->mode[n] == 0xa) {
-        /* PWM mode.  Not implemented.  */
-    } else {
-        qemu_log_mask(LOG_UNIMP,
-                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
-                      s->mode[n]);
-    }
-    gptm_update_irq(s);
-}
-
-static uint64_t gptm_read(void *opaque, hwaddr offset,
-                          unsigned size)
-{
-    gptm_state *s = (gptm_state *)opaque;
-
-    switch (offset) {
-    case 0x00: /* CFG */
-        return s->config;
-    case 0x04: /* TAMR */
-        return s->mode[0];
-    case 0x08: /* TBMR */
-        return s->mode[1];
-    case 0x0c: /* CTL */
-        return s->control;
-    case 0x18: /* IMR */
-        return s->mask;
-    case 0x1c: /* RIS */
-        return s->state;
-    case 0x20: /* MIS */
-        return s->state & s->mask;
-    case 0x24: /* CR */
-        return 0;
-    case 0x28: /* TAILR */
-        return s->load[0] | ((s->config < 4) ? (s->load[1] << 16) : 0);
-    case 0x2c: /* TBILR */
-        return s->load[1];
-    case 0x30: /* TAMARCHR */
-        return s->match[0] | ((s->config < 4) ? (s->match[1] << 16) : 0);
-    case 0x34: /* TBMATCHR */
-        return s->match[1];
-    case 0x38: /* TAPR */
-        return s->prescale[0];
-    case 0x3c: /* TBPR */
-        return s->prescale[1];
-    case 0x40: /* TAPMR */
-        return s->match_prescale[0];
-    case 0x44: /* TBPMR */
-        return s->match_prescale[1];
-    case 0x48: /* TAR */
-        if (s->config == 1) {
-            return s->rtc;
-        }
-        qemu_log_mask(LOG_UNIMP,
-                      "GPTM: read of TAR but timer read not supported\n");
-        return 0;
-    case 0x4c: /* TBR */
-        qemu_log_mask(LOG_UNIMP,
-                      "GPTM: read of TBR but timer read not supported\n");
-        return 0;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "GPTM: read at bad offset 0x02%" HWADDR_PRIx "\n",
-                      offset);
-        return 0;
-    }
-}
-
-static void gptm_write(void *opaque, hwaddr offset,
-                       uint64_t value, unsigned size)
-{
-    gptm_state *s = (gptm_state *)opaque;
-    uint32_t oldval;
-
-    /* The timers should be disabled before changing the configuration.
-       We take advantage of this and defer everything until the timer
-       is enabled.  */
-    switch (offset) {
-    case 0x00: /* CFG */
-        s->config = value;
-        break;
-    case 0x04: /* TAMR */
-        s->mode[0] = value;
-        break;
-    case 0x08: /* TBMR */
-        s->mode[1] = value;
-        break;
-    case 0x0c: /* CTL */
-        oldval = s->control;
-        s->control = value;
-        /* TODO: Implement pause.  */
-        if ((oldval ^ value) & 1) {
-            if (value & 1) {
-                gptm_reload(s, 0, 1);
-            } else {
-                gptm_stop(s, 0);
-            }
-        }
-        if (((oldval ^ value) & 0x100) && s->config >= 4) {
-            if (value & 0x100) {
-                gptm_reload(s, 1, 1);
-            } else {
-                gptm_stop(s, 1);
-            }
-        }
-        break;
-    case 0x18: /* IMR */
-        s->mask = value & 0x77;
-        gptm_update_irq(s);
-        break;
-    case 0x24: /* CR */
-        s->state &= ~value;
-        break;
-    case 0x28: /* TAILR */
-        s->load[0] = value & 0xffff;
-        if (s->config < 4) {
-            s->load[1] = value >> 16;
-        }
-        break;
-    case 0x2c: /* TBILR */
-        s->load[1] = value & 0xffff;
-        break;
-    case 0x30: /* TAMARCHR */
-        s->match[0] = value & 0xffff;
-        if (s->config < 4) {
-            s->match[1] = value >> 16;
-        }
-        break;
-    case 0x34: /* TBMATCHR */
-        s->match[1] = value >> 16;
-        break;
-    case 0x38: /* TAPR */
-        s->prescale[0] = value;
-        break;
-    case 0x3c: /* TBPR */
-        s->prescale[1] = value;
-        break;
-    case 0x40: /* TAPMR */
-        s->match_prescale[0] = value;
-        break;
-    case 0x44: /* TBPMR */
-        s->match_prescale[0] = value;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "GPTM: write at bad offset 0x02%" HWADDR_PRIx "\n",
-                      offset);
-    }
-    gptm_update_irq(s);
-}
-
-static const MemoryRegionOps gptm_ops = {
-    .read = gptm_read,
-    .write = gptm_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static const VMStateDescription vmstate_stellaris_gptm = {
-    .name = "stellaris_gptm",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(config, gptm_state),
-        VMSTATE_UINT32_ARRAY(mode, gptm_state, 2),
-        VMSTATE_UINT32(control, gptm_state),
-        VMSTATE_UINT32(state, gptm_state),
-        VMSTATE_UINT32(mask, gptm_state),
-        VMSTATE_UNUSED(8),
-        VMSTATE_UINT32_ARRAY(load, gptm_state, 2),
-        VMSTATE_UINT32_ARRAY(match, gptm_state, 2),
-        VMSTATE_UINT32_ARRAY(prescale, gptm_state, 2),
-        VMSTATE_UINT32_ARRAY(match_prescale, gptm_state, 2),
-        VMSTATE_UINT32(rtc, gptm_state),
-        VMSTATE_INT64_ARRAY(tick, gptm_state, 2),
-        VMSTATE_TIMER_PTR_ARRAY(timer, gptm_state, 2),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void stellaris_gptm_init(Object *obj)
-{
-    DeviceState *dev = DEVICE(obj);
-    gptm_state *s = STELLARIS_GPTM(obj);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(sbd, &s->irq);
-    qdev_init_gpio_out(dev, &s->trigger, 1);
-
-    memory_region_init_io(&s->iomem, obj, &gptm_ops, s,
-                          "gptm", 0x1000);
-    sysbus_init_mmio(sbd, &s->iomem);
-
-    s->opaque[0] = s->opaque[1] = s;
-}
-
-static void stellaris_gptm_realize(DeviceState *dev, Error **errp)
-{
-    gptm_state *s = STELLARIS_GPTM(dev);
-    s->timer[0] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[0]);
-    s->timer[1] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[1]);
-}
-
 /* System controller.  */
 
 #define TYPE_STELLARIS_SYS "stellaris-sys"
@@ -563,17 +263,18 @@ static bool ssys_use_rcc2(ssys_state *s)
  */
 static void ssys_calculate_system_clock(ssys_state *s, bool propagate_clock)
 {
+    int period_ns;
     /*
      * SYSDIV field specifies divisor: 0 == /1, 1 == /2, etc.  Input
      * clock is 200MHz, which is a period of 5 ns. Dividing the clock
      * frequency by X is the same as multiplying the period by X.
      */
     if (ssys_use_rcc2(s)) {
-        system_clock_scale = 5 * (((s->rcc2 >> 23) & 0x3f) + 1);
+        period_ns = 5 * (((s->rcc2 >> 23) & 0x3f) + 1);
     } else {
-        system_clock_scale = 5 * (((s->rcc >> 23) & 0xf) + 1);
+        period_ns = 5 * (((s->rcc >> 23) & 0xf) + 1);
     }
-    clock_set_ns(s->sysclk, system_clock_scale);
+    clock_set_ns(s->sysclk, period_ns);
     if (propagate_clock) {
         clock_propagate(s->sysclk);
     }
@@ -756,33 +457,6 @@ static void stellaris_sys_instance_init(Object *obj)
     s->sysclk = qdev_init_clock_out(DEVICE(s), "SYSCLK");
 }
 
-static DeviceState *stellaris_sys_init(uint32_t base, qemu_irq irq,
-                                       stellaris_board_info *board,
-                                       uint8_t *macaddr)
-{
-    DeviceState *dev = qdev_new(TYPE_STELLARIS_SYS);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    /* Most devices come preprogrammed with a MAC address in the user data. */
-    qdev_prop_set_uint32(dev, "user0",
-                         macaddr[0] | (macaddr[1] << 8) | (macaddr[2] << 16));
-    qdev_prop_set_uint32(dev, "user1",
-                         macaddr[3] | (macaddr[4] << 8) | (macaddr[5] << 16));
-    qdev_prop_set_uint32(dev, "did0", board->did0);
-    qdev_prop_set_uint32(dev, "did1", board->did1);
-    qdev_prop_set_uint32(dev, "dc0", board->dc0);
-    qdev_prop_set_uint32(dev, "dc1", board->dc1);
-    qdev_prop_set_uint32(dev, "dc2", board->dc2);
-    qdev_prop_set_uint32(dev, "dc3", board->dc3);
-    qdev_prop_set_uint32(dev, "dc4", board->dc4);
-
-    sysbus_realize_and_unref(sbd, &error_fatal);
-    sysbus_mmio_map(sbd, 0, base);
-    sysbus_connect_irq(sbd, 0, irq);
-
-    return dev;
-}
-
 /* I2C controller.  */
 
 #define TYPE_STELLARIS_I2C "stellaris-i2c"
@@ -1350,6 +1024,7 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
     DeviceState *ssys_dev;
     int i;
     int j;
+    const uint8_t *macaddr;
 
     MemoryRegion *sram = g_new(MemoryRegion, 1);
     MemoryRegion *flash = g_new(MemoryRegion, 1);
@@ -1367,15 +1042,42 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
                            &error_fatal);
     memory_region_add_subregion(system_memory, 0x20000000, sram);
 
+    /*
+     * Create the system-registers object early, because we will
+     * need its sysclk output.
+     */
+    ssys_dev = qdev_new(TYPE_STELLARIS_SYS);
+    /* Most devices come preprogrammed with a MAC address in the user data. */
+    macaddr = nd_table[0].macaddr.a;
+    qdev_prop_set_uint32(ssys_dev, "user0",
+                         macaddr[0] | (macaddr[1] << 8) | (macaddr[2] << 16));
+    qdev_prop_set_uint32(ssys_dev, "user1",
+                         macaddr[3] | (macaddr[4] << 8) | (macaddr[5] << 16));
+    qdev_prop_set_uint32(ssys_dev, "did0", board->did0);
+    qdev_prop_set_uint32(ssys_dev, "did1", board->did1);
+    qdev_prop_set_uint32(ssys_dev, "dc0", board->dc0);
+    qdev_prop_set_uint32(ssys_dev, "dc1", board->dc1);
+    qdev_prop_set_uint32(ssys_dev, "dc2", board->dc2);
+    qdev_prop_set_uint32(ssys_dev, "dc3", board->dc3);
+    qdev_prop_set_uint32(ssys_dev, "dc4", board->dc4);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(ssys_dev), &error_fatal);
+
     nvic = qdev_new(TYPE_ARMV7M);
     qdev_prop_set_uint32(nvic, "num-irq", NUM_IRQ_LINES);
     qdev_prop_set_string(nvic, "cpu-type", ms->cpu_type);
     qdev_prop_set_bit(nvic, "enable-bitband", true);
+    qdev_connect_clock_in(nvic, "cpuclk",
+                          qdev_get_clock_out(ssys_dev, "SYSCLK"));
+    /* This SoC does not connect the systick reference clock */
     object_property_set_link(OBJECT(nvic), "memory",
                              OBJECT(get_system_memory()), &error_abort);
     /* This will exit with an error if the user passed us a bad cpu_type */
     sysbus_realize_and_unref(SYS_BUS_DEVICE(nvic), &error_fatal);
 
+    /* Now we can wire up the IRQ and MMIO of the system registers */
+    sysbus_mmio_map(SYS_BUS_DEVICE(ssys_dev), 0, 0x400fe000);
+    sysbus_connect_irq(SYS_BUS_DEVICE(ssys_dev), 0, qdev_get_gpio_in(nvic, 28));
+
     if (board->dc1 & (1 << 16)) {
         dev = sysbus_create_varargs(TYPE_STELLARIS_ADC, 0x40038000,
                                     qdev_get_gpio_in(nvic, 14),
@@ -1389,19 +1091,21 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
     }
     for (i = 0; i < 4; i++) {
         if (board->dc2 & (0x10000 << i)) {
-            dev = sysbus_create_simple(TYPE_STELLARIS_GPTM,
-                                       0x40030000 + i * 0x1000,
-                                       qdev_get_gpio_in(nvic, timer_irq[i]));
+            SysBusDevice *sbd;
+
+            dev = qdev_new(TYPE_STELLARIS_GPTM);
+            sbd = SYS_BUS_DEVICE(dev);
+            qdev_connect_clock_in(dev, "clk",
+                                  qdev_get_clock_out(ssys_dev, "SYSCLK"));
+            sysbus_realize_and_unref(sbd, &error_fatal);
+            sysbus_mmio_map(sbd, 0, 0x40030000 + i * 0x1000);
+            sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
             /* TODO: This is incorrect, but we get away with it because
                the ADC output is only ever pulsed.  */
             qdev_connect_gpio_out(dev, 0, adc);
         }
     }
 
-    ssys_dev = stellaris_sys_init(0x400fe000, qdev_get_gpio_in(nvic, 28),
-                                  board, nd_table[0].macaddr.a);
-
-
     if (board->dc1 & (1 << 3)) { /* watchdog present */
         dev = qdev_new(TYPE_LUMINARY_WATCHDOG);
 
@@ -1454,13 +1158,67 @@ static void stellaris_init(MachineState *ms, stellaris_board_info *board)
             DeviceState *sddev;
             DeviceState *ssddev;
 
-            /* Some boards have both an OLED controller and SD card connected to
+            /*
+             * Some boards have both an OLED controller and SD card connected to
              * the same SSI port, with the SD card chip select connected to a
              * GPIO pin.  Technically the OLED chip select is connected to the
              * SSI Fss pin.  We do not bother emulating that as both devices
              * should never be selected simultaneously, and our OLED controller
              * ignores stray 0xff commands that occur when deselecting the SD
              * card.
+             *
+             * The h/w wiring is:
+             *  - GPIO pin D0 is wired to the active-low SD card chip select
+             *  - GPIO pin A3 is wired to the active-low OLED chip select
+             *  - The SoC wiring of the PL061 "auxiliary function" for A3 is
+             *    SSI0Fss ("frame signal"), which is an output from the SoC's
+             *    SSI controller. The SSI controller takes SSI0Fss low when it
+             *    transmits a frame, so it can work as a chip-select signal.
+             *  - GPIO A4 is aux-function SSI0Rx, and wired to the SD card Tx
+             *    (the OLED never sends data to the CPU, so no wiring needed)
+             *  - GPIO A5 is aux-function SSI0Tx, and wired to the SD card Rx
+             *    and the OLED display-data-in
+             *  - GPIO A2 is aux-function SSI0Clk, wired to SD card and OLED
+             *    serial-clock input
+             * So a guest that wants to use the OLED can configure the PL061
+             * to make pins A2, A3, A5 aux-function, so they are connected
+             * directly to the SSI controller. When the SSI controller sends
+             * data it asserts SSI0Fss which selects the OLED.
+             * A guest that wants to use the SD card configures A2, A4 and A5
+             * as aux-function, but leaves A3 as a software-controlled GPIO
+             * line. It asserts the SD card chip-select by using the PL061
+             * to control pin D0, and lets the SSI controller handle Clk, Tx
+             * and Rx. (The SSI controller asserts Fss during tx cycles as
+             * usual, but because A3 is not set to aux-function this is not
+             * forwarded to the OLED, and so the OLED stays unselected.)
+             *
+             * The QEMU implementation instead is:
+             *  - GPIO pin D0 is wired to the active-low SD card chip select,
+             *    and also to the OLED chip-select which is implemented
+             *    as *active-high*
+             *  - SSI controller signals go to the devices regardless of
+             *    whether the guest programs A2, A4, A5 as aux-function or not
+             *
+             * The problem with this implementation is if the guest doesn't
+             * care about the SD card and only uses the OLED. In that case it
+             * may choose never to do anything with D0 (leaving it in its
+             * default floating state, which reliably leaves the card disabled
+             * because an SD card has a pullup on CS within the card itself),
+             * and only set up A2, A3, A5. This for us would mean the OLED
+             * never gets the chip-select assert it needs. We work around
+             * this with a manual raise of D0 here (despite board creation
+             * code being the wrong place to raise IRQ lines) to put the OLED
+             * into an initially selected state.
+             *
+             * In theory the right way to model this would be:
+             *  - Implement aux-function support in the PL061, with an
+             *    extra set of AFIN and AFOUT GPIO lines (set up so that
+             *    if a GPIO line is in auxfn mode the main GPIO in and out
+             *    track the AFIN and AFOUT lines)
+             *  - Wire the AFOUT for D0 up to either a line from the
+             *    SSI controller that's pulled low around every transmit,
+             *    or at least to an always-0 line here on the board
+             *  - Make the ssd0323 OLED controller chipselect active-low
              */
             bus = qdev_get_child_bus(dev, "ssi");
 
@@ -1589,22 +1347,6 @@ static const TypeInfo stellaris_i2c_info = {
     .class_init    = stellaris_i2c_class_init,
 };
 
-static void stellaris_gptm_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->vmsd = &vmstate_stellaris_gptm;
-    dc->realize = stellaris_gptm_realize;
-}
-
-static const TypeInfo stellaris_gptm_info = {
-    .name          = TYPE_STELLARIS_GPTM,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(gptm_state),
-    .instance_init = stellaris_gptm_init,
-    .class_init    = stellaris_gptm_class_init,
-};
-
 static void stellaris_adc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -1643,7 +1385,6 @@ static const TypeInfo stellaris_sys_info = {
 static void stellaris_register_types(void)
 {
     type_register_static(&stellaris_i2c_info);
-    type_register_static(&stellaris_gptm_info);
     type_register_static(&stellaris_adc_info);
     type_register_static(&stellaris_sys_info);
 }
diff --git a/hw/arm/stm32f100_soc.c b/hw/arm/stm32f100_soc.c
new file mode 100644
index 00000000000..f7b344ba9fb
--- /dev/null
+++ b/hw/arm/stm32f100_soc.c
@@ -0,0 +1,209 @@
+/*
+ * STM32F100 SoC
+ *
+ * Copyright (c) 2021 Alexandre Iooss <erdnaxe@crans.org>
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "hw/arm/boot.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/stm32f100_soc.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
+#include "hw/misc/unimp.h"
+#include "sysemu/sysemu.h"
+
+/* stm32f100_soc implementation is derived from stm32f205_soc */
+
+static const uint32_t usart_addr[STM_NUM_USARTS] = { 0x40013800, 0x40004400,
+    0x40004800 };
+static const uint32_t spi_addr[STM_NUM_SPIS] = { 0x40013000, 0x40003800 };
+
+static const int usart_irq[STM_NUM_USARTS] = {37, 38, 39};
+static const int spi_irq[STM_NUM_SPIS] = {35, 36};
+
+static void stm32f100_soc_initfn(Object *obj)
+{
+    STM32F100State *s = STM32F100_SOC(obj);
+    int i;
+
+    object_initialize_child(obj, "armv7m", &s->armv7m, TYPE_ARMV7M);
+
+    for (i = 0; i < STM_NUM_USARTS; i++) {
+        object_initialize_child(obj, "usart[*]", &s->usart[i],
+                                TYPE_STM32F2XX_USART);
+    }
+
+    for (i = 0; i < STM_NUM_SPIS; i++) {
+        object_initialize_child(obj, "spi[*]", &s->spi[i], TYPE_STM32F2XX_SPI);
+    }
+
+    s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
+    s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0);
+}
+
+static void stm32f100_soc_realize(DeviceState *dev_soc, Error **errp)
+{
+    STM32F100State *s = STM32F100_SOC(dev_soc);
+    DeviceState *dev, *armv7m;
+    SysBusDevice *busdev;
+    int i;
+
+    MemoryRegion *system_memory = get_system_memory();
+
+    /*
+     * We use s->refclk internally and only define it with qdev_init_clock_in()
+     * so it is correctly parented and not leaked on an init/deinit; it is not
+     * intended as an externally exposed clock.
+     */
+    if (clock_has_source(s->refclk)) {
+        error_setg(errp, "refclk clock must not be wired up by the board code");
+        return;
+    }
+
+    if (!clock_has_source(s->sysclk)) {
+        error_setg(errp, "sysclk clock must be wired up by the board code");
+        return;
+    }
+
+    /*
+     * TODO: ideally we should model the SoC RCC and its ability to
+     * change the sysclk frequency and define different sysclk sources.
+     */
+
+    /* The refclk always runs at frequency HCLK / 8 */
+    clock_set_mul_div(s->refclk, 8, 1);
+    clock_set_source(s->refclk, s->sysclk);
+
+    /*
+     * Init flash region
+     * Flash starts at 0x08000000 and then is aliased to boot memory at 0x0
+     */
+    memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F100.flash",
+                           FLASH_SIZE, &error_fatal);
+    memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc),
+                             "STM32F100.flash.alias", &s->flash, 0, FLASH_SIZE);
+    memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash);
+    memory_region_add_subregion(system_memory, 0, &s->flash_alias);
+
+    /* Init SRAM region */
+    memory_region_init_ram(&s->sram, NULL, "STM32F100.sram", SRAM_SIZE,
+                           &error_fatal);
+    memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram);
+
+    /* Init ARMv7m */
+    armv7m = DEVICE(&s->armv7m);
+    qdev_prop_set_uint32(armv7m, "num-irq", 61);
+    qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type);
+    qdev_prop_set_bit(armv7m, "enable-bitband", true);
+    qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
+    qdev_connect_clock_in(armv7m, "refclk", s->refclk);
+    object_property_set_link(OBJECT(&s->armv7m), "memory",
+                             OBJECT(get_system_memory()), &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
+        return;
+    }
+
+    /* Attach UART (uses USART registers) and USART controllers */
+    for (i = 0; i < STM_NUM_USARTS; i++) {
+        dev = DEVICE(&(s->usart[i]));
+        qdev_prop_set_chr(dev, "chardev", serial_hd(i));
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->usart[i]), errp)) {
+            return;
+        }
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, usart_addr[i]);
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
+    }
+
+    /* SPI 1 and 2 */
+    for (i = 0; i < STM_NUM_SPIS; i++) {
+        dev = DEVICE(&(s->spi[i]));
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
+            return;
+        }
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, spi_addr[i]);
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
+    }
+
+    create_unimplemented_device("timer[2]",  0x40000000, 0x400);
+    create_unimplemented_device("timer[3]",  0x40000400, 0x400);
+    create_unimplemented_device("timer[4]",  0x40000800, 0x400);
+    create_unimplemented_device("timer[6]",  0x40001000, 0x400);
+    create_unimplemented_device("timer[7]",  0x40001400, 0x400);
+    create_unimplemented_device("RTC",       0x40002800, 0x400);
+    create_unimplemented_device("WWDG",      0x40002C00, 0x400);
+    create_unimplemented_device("IWDG",      0x40003000, 0x400);
+    create_unimplemented_device("I2C1",      0x40005400, 0x400);
+    create_unimplemented_device("I2C2",      0x40005800, 0x400);
+    create_unimplemented_device("BKP",       0x40006C00, 0x400);
+    create_unimplemented_device("PWR",       0x40007000, 0x400);
+    create_unimplemented_device("DAC",       0x40007400, 0x400);
+    create_unimplemented_device("CEC",       0x40007800, 0x400);
+    create_unimplemented_device("AFIO",      0x40010000, 0x400);
+    create_unimplemented_device("EXTI",      0x40010400, 0x400);
+    create_unimplemented_device("GPIOA",     0x40010800, 0x400);
+    create_unimplemented_device("GPIOB",     0x40010C00, 0x400);
+    create_unimplemented_device("GPIOC",     0x40011000, 0x400);
+    create_unimplemented_device("GPIOD",     0x40011400, 0x400);
+    create_unimplemented_device("GPIOE",     0x40011800, 0x400);
+    create_unimplemented_device("ADC1",      0x40012400, 0x400);
+    create_unimplemented_device("timer[1]",  0x40012C00, 0x400);
+    create_unimplemented_device("timer[15]", 0x40014000, 0x400);
+    create_unimplemented_device("timer[16]", 0x40014400, 0x400);
+    create_unimplemented_device("timer[17]", 0x40014800, 0x400);
+    create_unimplemented_device("DMA",       0x40020000, 0x400);
+    create_unimplemented_device("RCC",       0x40021000, 0x400);
+    create_unimplemented_device("Flash Int", 0x40022000, 0x400);
+    create_unimplemented_device("CRC",       0x40023000, 0x400);
+}
+
+static Property stm32f100_soc_properties[] = {
+    DEFINE_PROP_STRING("cpu-type", STM32F100State, cpu_type),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void stm32f100_soc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = stm32f100_soc_realize;
+    device_class_set_props(dc, stm32f100_soc_properties);
+}
+
+static const TypeInfo stm32f100_soc_info = {
+    .name          = TYPE_STM32F100_SOC,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(STM32F100State),
+    .instance_init = stm32f100_soc_initfn,
+    .class_init    = stm32f100_soc_class_init,
+};
+
+static void stm32f100_soc_types(void)
+{
+    type_register_static(&stm32f100_soc_info);
+}
+
+type_init(stm32f100_soc_types)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 9cd41bf56da..c6b75a381d9 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -29,6 +29,7 @@
 #include "exec/address-spaces.h"
 #include "hw/arm/stm32f205_soc.h"
 #include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
 #include "sysemu/sysemu.h"
 
 /* At the moment only Timer 2 to 5 are modelled */
@@ -74,6 +75,9 @@ static void stm32f205_soc_initfn(Object *obj)
     for (i = 0; i < STM_NUM_SPIS; i++) {
         object_initialize_child(obj, "spi[*]", &s->spi[i], TYPE_STM32F2XX_SPI);
     }
+
+    s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
+    s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0);
 }
 
 static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
@@ -84,26 +88,49 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     int i;
 
     MemoryRegion *system_memory = get_system_memory();
-    MemoryRegion *sram = g_new(MemoryRegion, 1);
-    MemoryRegion *flash = g_new(MemoryRegion, 1);
-    MemoryRegion *flash_alias = g_new(MemoryRegion, 1);
 
-    memory_region_init_rom(flash, OBJECT(dev_soc), "STM32F205.flash",
+    /*
+     * We use s->refclk internally and only define it with qdev_init_clock_in()
+     * so it is correctly parented and not leaked on an init/deinit; it is not
+     * intended as an externally exposed clock.
+     */
+    if (clock_has_source(s->refclk)) {
+        error_setg(errp, "refclk clock must not be wired up by the board code");
+        return;
+    }
+
+    if (!clock_has_source(s->sysclk)) {
+        error_setg(errp, "sysclk clock must be wired up by the board code");
+        return;
+    }
+
+    /*
+     * TODO: ideally we should model the SoC RCC and its ability to
+     * change the sysclk frequency and define different sysclk sources.
+     */
+
+    /* The refclk always runs at frequency HCLK / 8 */
+    clock_set_mul_div(s->refclk, 8, 1);
+    clock_set_source(s->refclk, s->sysclk);
+
+    memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F205.flash",
                            FLASH_SIZE, &error_fatal);
-    memory_region_init_alias(flash_alias, OBJECT(dev_soc),
-                             "STM32F205.flash.alias", flash, 0, FLASH_SIZE);
+    memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc),
+                             "STM32F205.flash.alias", &s->flash, 0, FLASH_SIZE);
 
-    memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, flash);
-    memory_region_add_subregion(system_memory, 0, flash_alias);
+    memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash);
+    memory_region_add_subregion(system_memory, 0, &s->flash_alias);
 
-    memory_region_init_ram(sram, NULL, "STM32F205.sram", SRAM_SIZE,
+    memory_region_init_ram(&s->sram, NULL, "STM32F205.sram", SRAM_SIZE,
                            &error_fatal);
-    memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
+    memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram);
 
     armv7m = DEVICE(&s->armv7m);
     qdev_prop_set_uint32(armv7m, "num-irq", 96);
     qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type);
     qdev_prop_set_bit(armv7m, "enable-bitband", true);
+    qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
+    qdev_connect_clock_in(armv7m, "refclk", s->refclk);
     object_property_set_link(OBJECT(&s->armv7m), "memory",
                              OBJECT(get_system_memory()), &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
diff --git a/hw/arm/stm32f405_soc.c b/hw/arm/stm32f405_soc.c
index cb04c111987..0019b7f4785 100644
--- a/hw/arm/stm32f405_soc.c
+++ b/hw/arm/stm32f405_soc.c
@@ -28,6 +28,7 @@
 #include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
 #include "hw/arm/stm32f405_soc.h"
+#include "hw/qdev-clock.h"
 #include "hw/misc/unimp.h"
 
 #define SYSCFG_ADD                     0x40013800
@@ -80,6 +81,9 @@ static void stm32f405_soc_initfn(Object *obj)
     }
 
     object_initialize_child(obj, "exti", &s->exti, TYPE_STM32F4XX_EXTI);
+
+    s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
+    s->refclk = qdev_init_clock_in(DEVICE(s), "refclk", NULL, NULL, 0);
 }
 
 static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp)
@@ -91,6 +95,30 @@ static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp)
     Error *err = NULL;
     int i;
 
+    /*
+     * We use s->refclk internally and only define it with qdev_init_clock_in()
+     * so it is correctly parented and not leaked on an init/deinit; it is not
+     * intended as an externally exposed clock.
+     */
+    if (clock_has_source(s->refclk)) {
+        error_setg(errp, "refclk clock must not be wired up by the board code");
+        return;
+    }
+
+    if (!clock_has_source(s->sysclk)) {
+        error_setg(errp, "sysclk clock must be wired up by the board code");
+        return;
+    }
+
+    /*
+     * TODO: ideally we should model the SoC RCC and its ability to
+     * change the sysclk frequency and define different sysclk sources.
+     */
+
+    /* The refclk always runs at frequency HCLK / 8 */
+    clock_set_mul_div(s->refclk, 8, 1);
+    clock_set_source(s->refclk, s->sysclk);
+
     memory_region_init_rom(&s->flash, OBJECT(dev_soc), "STM32F405.flash",
                            FLASH_SIZE, &err);
     if (err != NULL) {
@@ -116,6 +144,8 @@ static void stm32f405_soc_realize(DeviceState *dev_soc, Error **errp)
     qdev_prop_set_uint32(armv7m, "num-irq", 96);
     qdev_prop_set_string(armv7m, "cpu-type", s->cpu_type);
     qdev_prop_set_bit(armv7m, "enable-bitband", true);
+    qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
+    qdev_connect_clock_in(armv7m, "refclk", s->refclk);
     object_property_set_link(OBJECT(&s->armv7m), "memory",
                              OBJECT(system_memory), &error_abort);
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
diff --git a/hw/arm/stm32vldiscovery.c b/hw/arm/stm32vldiscovery.c
new file mode 100644
index 00000000000..04036da3ee0
--- /dev/null
+++ b/hw/arm/stm32vldiscovery.c
@@ -0,0 +1,65 @@
+/*
+ * ST STM32VLDISCOVERY machine
+ *
+ * Copyright (c) 2021 Alexandre Iooss <erdnaxe@crans.org>
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
+#include "qemu/error-report.h"
+#include "hw/arm/stm32f100_soc.h"
+#include "hw/arm/boot.h"
+
+/* stm32vldiscovery implementation is derived from netduinoplus2 */
+
+/* Main SYSCLK frequency in Hz (24MHz) */
+#define SYSCLK_FRQ 24000000ULL
+
+static void stm32vldiscovery_init(MachineState *machine)
+{
+    DeviceState *dev;
+    Clock *sysclk;
+
+    /* This clock doesn't need migration because it is fixed-frequency */
+    sysclk = clock_new(OBJECT(machine), "SYSCLK");
+    clock_set_hz(sysclk, SYSCLK_FRQ);
+
+    dev = qdev_new(TYPE_STM32F100_SOC);
+    qdev_prop_set_string(dev, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m3"));
+    qdev_connect_clock_in(dev, "sysclk", sysclk);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu),
+                       machine->kernel_filename,
+                       FLASH_SIZE);
+}
+
+static void stm32vldiscovery_machine_init(MachineClass *mc)
+{
+    mc->desc = "ST STM32VLDISCOVERY (Cortex-M3)";
+    mc->init = stm32vldiscovery_init;
+}
+
+DEFINE_MACHINE("stm32vldiscovery", stm32vldiscovery_machine_init)
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index c7ca54bceaa..939a57dda55 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -30,7 +30,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "cpu.h"
-#include "hw/boards.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
@@ -207,7 +206,7 @@ static int strongarm_pic_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static VMStateDescription vmstate_strongarm_pic_regs = {
+static const VMStateDescription vmstate_strongarm_pic_regs = {
     .name = "strongarm_pic",
     .version_id = 0,
     .minimum_version_id = 0,
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 6b6906f4cfc..48c5fe9bf18 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -437,6 +437,7 @@ static bool vfio_platform_match(SysBusDevice *sbdev,
 
 #endif /* CONFIG_LINUX */
 
+#ifdef CONFIG_TPM
 /*
  * add_tpm_tis_fdt_node: Create a DT node for TPM TIS
  *
@@ -467,6 +468,7 @@ static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque)
     g_free(nodename);
     return 0;
 }
+#endif
 
 static int no_fdt_node(SysBusDevice *sbdev, void *opaque)
 {
@@ -488,7 +490,9 @@ static const BindingEntry bindings[] = {
     TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
     VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
 #endif
+#ifdef CONFIG_TPM
     TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
+#endif
     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
     TYPE_BINDING("", NULL), /* last element */
 };
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index b79a91af5fb..2dee296c8fb 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # virt-acpi-build.c
 virt_acpi_setup(void) "No fw cfg or ACPI disabled. Bailing out."
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index 1ea55346264..575399c4fc9 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -21,7 +21,6 @@
 #include "hw/i2c/arm_sbcon_i2c.h"
 #include "hw/irq.h"
 #include "hw/boards.h"
-#include "exec/address-spaces.h"
 #include "hw/block/flash.h"
 #include "qemu/error-report.h"
 #include "hw/char/pl011.h"
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 326a1a6db52..58481c07629 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -35,7 +35,6 @@
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "hw/block/flash.h"
 #include "sysemu/device_tree.h"
 #include "qemu/error-report.h"
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 60fe2e65a76..674f9026525 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -44,6 +44,7 @@
 #include "hw/acpi/tpm.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/arm/virt.h"
 #include "hw/mem/nvdimm.h"
@@ -205,6 +206,7 @@ static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap,
     aml_append(scope, dev);
 }
 
+#ifdef CONFIG_TPM
 static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
 {
     PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
@@ -236,180 +238,317 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
     aml_append(dev, aml_name_decl("_CRS", crs));
     aml_append(scope, dev);
 }
+#endif
 
+#define ID_MAPPING_ENTRY_SIZE 20
+#define SMMU_V3_ENTRY_SIZE 68
+#define ROOT_COMPLEX_ENTRY_SIZE 36
+#define IORT_NODE_OFFSET 48
+
+static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
+                                  uint32_t id_count, uint32_t out_ref)
+{
+    /* Table 4 ID mapping format */
+    build_append_int_noprefix(table_data, input_base, 4); /* Input base */
+    build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */
+    build_append_int_noprefix(table_data, input_base, 4); /* Output base */
+    build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
+    /* Flags */
+    build_append_int_noprefix(table_data, 0 /* Single mapping (disabled) */, 4);
+}
+
+struct AcpiIortIdMapping {
+    uint32_t input_base;
+    uint32_t id_count;
+};
+typedef struct AcpiIortIdMapping AcpiIortIdMapping;
+
+/* Build the iort ID mapping to SMMUv3 for a given PCI host bridge */
+static int
+iort_host_bridges(Object *obj, void *opaque)
+{
+    GArray *idmap_blob = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
+        PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
+
+        if (bus && !pci_bus_bypass_iommu(bus)) {
+            int min_bus, max_bus;
+
+            pci_bus_range(bus, &min_bus, &max_bus);
+
+            AcpiIortIdMapping idmap = {
+                .input_base = min_bus << 8,
+                .id_count = (max_bus - min_bus + 1) << 8,
+            };
+            g_array_append_val(idmap_blob, idmap);
+        }
+    }
+
+    return 0;
+}
+
+static int iort_idmap_compare(gconstpointer a, gconstpointer b)
+{
+    AcpiIortIdMapping *idmap_a = (AcpiIortIdMapping *)a;
+    AcpiIortIdMapping *idmap_b = (AcpiIortIdMapping *)b;
+
+    return idmap_a->input_base - idmap_b->input_base;
+}
+
+/*
+ * Input Output Remapping Table (IORT)
+ * Conforms to "IO Remapping Table System Software on ARM Platforms",
+ * Document number: ARM DEN 0049E.b, Feb 2021
+ */
 static void
 build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
-    int nb_nodes, iort_start = table_data->len;
+    int i, nb_nodes, rc_mapping_count;
+    const uint32_t iort_node_offset = IORT_NODE_OFFSET;
+    size_t node_size, smmu_offset = 0;
     AcpiIortIdMapping *idmap;
-    AcpiIortItsGroup *its;
-    AcpiIortTable *iort;
-    AcpiIortSmmu3 *smmu;
-    size_t node_size, iort_node_offset, iort_length, smmu_offset = 0;
-    AcpiIortRC *rc;
+    uint32_t id = 0;
+    GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+    GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
 
-    iort = acpi_data_push(table_data, sizeof(*iort));
+    AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
+    /* Table 2 The IORT */
+    acpi_table_begin(&table, table_data);
 
     if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+        AcpiIortIdMapping next_range = {0};
+
+        object_child_foreach_recursive(object_get_root(),
+                                       iort_host_bridges, smmu_idmaps);
+
+        /* Sort the smmu idmap by input_base */
+        g_array_sort(smmu_idmaps, iort_idmap_compare);
+
+        /*
+         * Split the whole RIDs by mapping from RC to SMMU,
+         * build the ID mapping from RC to ITS directly.
+         */
+        for (i = 0; i < smmu_idmaps->len; i++) {
+            idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+
+            if (next_range.input_base < idmap->input_base) {
+                next_range.id_count = idmap->input_base - next_range.input_base;
+                g_array_append_val(its_idmaps, next_range);
+            }
+
+            next_range.input_base = idmap->input_base + idmap->id_count;
+        }
+
+        /* Append the last RC -> ITS ID mapping */
+        if (next_range.input_base < 0xFFFF) {
+            next_range.id_count = 0xFFFF - next_range.input_base;
+            g_array_append_val(its_idmaps, next_range);
+        }
+
         nb_nodes = 3; /* RC, ITS, SMMUv3 */
+        rc_mapping_count = smmu_idmaps->len + its_idmaps->len;
     } else {
         nb_nodes = 2; /* RC, ITS */
+        rc_mapping_count = 1;
     }
-
-    iort_length = sizeof(*iort);
-    iort->node_count = cpu_to_le32(nb_nodes);
-    /*
-     * Use a copy in case table_data->data moves during acpi_data_push
-     * operations.
-     */
-    iort_node_offset = sizeof(*iort);
-    iort->node_offset = cpu_to_le32(iort_node_offset);
-
-    /* ITS group node */
-    node_size =  sizeof(*its) + sizeof(uint32_t);
-    iort_length += node_size;
-    its = acpi_data_push(table_data, node_size);
-
-    its->type = ACPI_IORT_NODE_ITS_GROUP;
-    its->length = cpu_to_le16(node_size);
-    its->its_count = cpu_to_le32(1);
-    its->identifiers[0] = 0; /* MADT translation_id */
+    /* Number of IORT Nodes */
+    build_append_int_noprefix(table_data, nb_nodes, 4);
+
+    /* Offset to Array of IORT Nodes */
+    build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4);
+    build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+
+    /* Table 12 ITS Group Format */
+    build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
+    node_size =  20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
+    build_append_int_noprefix(table_data, node_size, 2); /* Length */
+    build_append_int_noprefix(table_data, 1, 1); /* Revision */
+    build_append_int_noprefix(table_data, id++, 4); /* Identifier */
+    build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
+    build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
+    build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
+    /* GIC ITS Identifier Array */
+    build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
 
     if (vms->iommu == VIRT_IOMMU_SMMUV3) {
         int irq =  vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
 
-        /* SMMUv3 node */
-        smmu_offset = iort_node_offset + node_size;
-        node_size = sizeof(*smmu) + sizeof(*idmap);
-        iort_length += node_size;
-        smmu = acpi_data_push(table_data, node_size);
-
-        smmu->type = ACPI_IORT_NODE_SMMU_V3;
-        smmu->length = cpu_to_le16(node_size);
-        smmu->mapping_count = cpu_to_le32(1);
-        smmu->mapping_offset = cpu_to_le32(sizeof(*smmu));
-        smmu->base_address = cpu_to_le64(vms->memmap[VIRT_SMMU].base);
-        smmu->flags = cpu_to_le32(ACPI_IORT_SMMU_V3_COHACC_OVERRIDE);
-        smmu->event_gsiv = cpu_to_le32(irq);
-        smmu->pri_gsiv = cpu_to_le32(irq + 1);
-        smmu->sync_gsiv = cpu_to_le32(irq + 2);
-        smmu->gerr_gsiv = cpu_to_le32(irq + 3);
-
-        /* Identity RID mapping covering the whole input RID range */
-        idmap = &smmu->id_mapping_array[0];
-        idmap->input_base = 0;
-        idmap->id_count = cpu_to_le32(0xFFFF);
-        idmap->output_base = 0;
+        smmu_offset = table_data->len - table.table_offset;
+        /* Table 9 SMMUv3 Format */
+        build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
+        node_size =  SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
+        build_append_int_noprefix(table_data, node_size, 2); /* Length */
+        build_append_int_noprefix(table_data, 4, 1); /* Revision */
+        build_append_int_noprefix(table_data, id++, 4); /* Identifier */
+        build_append_int_noprefix(table_data, 1, 4); /* Number of ID mappings */
+        /* Reference to ID Array */
+        build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4);
+        /* Base address */
+        build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
+        /* Flags */
+        build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4);
+        build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+        build_append_int_noprefix(table_data, 0, 8); /* VATOS address */
+        /* Model */
+        build_append_int_noprefix(table_data, 0 /* Generic SMMU-v3 */, 4);
+        build_append_int_noprefix(table_data, irq, 4); /* Event */
+        build_append_int_noprefix(table_data, irq + 1, 4); /* PRI */
+        build_append_int_noprefix(table_data, irq + 3, 4); /* GERR */
+        build_append_int_noprefix(table_data, irq + 2, 4); /* Sync */
+        build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */
+        /* DeviceID mapping index (ignored since interrupts are GSIV based) */
+        build_append_int_noprefix(table_data, 0, 4);
+
         /* output IORT node is the ITS group node (the first node) */
-        idmap->output_reference = cpu_to_le32(iort_node_offset);
+        build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
     }
 
-    /* Root Complex Node */
-    node_size = sizeof(*rc) + sizeof(*idmap);
-    iort_length += node_size;
-    rc = acpi_data_push(table_data, node_size);
-
-    rc->type = ACPI_IORT_NODE_PCI_ROOT_COMPLEX;
-    rc->length = cpu_to_le16(node_size);
-    rc->mapping_count = cpu_to_le32(1);
-    rc->mapping_offset = cpu_to_le32(sizeof(*rc));
-
-    /* fully coherent device */
-    rc->memory_properties.cache_coherency = cpu_to_le32(1);
-    rc->memory_properties.memory_flags = 0x3; /* CCA = CPM = DCAS = 1 */
-    rc->pci_segment_number = 0; /* MCFG pci_segment */
-
-    /* Identity RID mapping covering the whole input RID range */
-    idmap = &rc->id_mapping_array[0];
-    idmap->input_base = 0;
-    idmap->id_count = cpu_to_le32(0xFFFF);
-    idmap->output_base = 0;
-
+    /* Table 17 Root Complex Node */
+    build_append_int_noprefix(table_data, 2 /* Root complex */, 1); /* Type */
+    node_size =  ROOT_COMPLEX_ENTRY_SIZE +
+                 ID_MAPPING_ENTRY_SIZE * rc_mapping_count;
+    build_append_int_noprefix(table_data, node_size, 2); /* Length */
+    build_append_int_noprefix(table_data, 3, 1); /* Revision */
+    build_append_int_noprefix(table_data, id++, 4); /* Identifier */
+    /* Number of ID mappings */
+    build_append_int_noprefix(table_data, rc_mapping_count, 4);
+    /* Reference to ID Array */
+    build_append_int_noprefix(table_data, ROOT_COMPLEX_ENTRY_SIZE, 4);
+
+    /* Table 14 Memory access properties */
+    /* CCA: Cache Coherent Attribute */
+    build_append_int_noprefix(table_data, 1 /* fully coherent */, 4);
+    build_append_int_noprefix(table_data, 0, 1); /* AH: Note Allocation Hints */
+    build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+    /* Table 15 Memory Access Flags */
+    build_append_int_noprefix(table_data, 0x3 /* CCA = CPM = DACS = 1 */, 1);
+
+    build_append_int_noprefix(table_data, 0, 4); /* ATS Attribute */
+    /* MCFG pci_segment */
+    build_append_int_noprefix(table_data, 0, 4); /* PCI Segment number */
+
+    /* Memory address size limit */
+    build_append_int_noprefix(table_data, 64, 1);
+
+    build_append_int_noprefix(table_data, 0, 3); /* Reserved */
+
+    /* Output Reference */
     if (vms->iommu == VIRT_IOMMU_SMMUV3) {
-        /* output IORT node is the smmuv3 node */
-        idmap->output_reference = cpu_to_le32(smmu_offset);
+        AcpiIortIdMapping *range;
+
+        /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */
+        for (i = 0; i < smmu_idmaps->len; i++) {
+            range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+            /* output IORT node is the smmuv3 node */
+            build_iort_id_mapping(table_data, range->input_base,
+                                  range->id_count, smmu_offset);
+        }
+
+        /* bypassed RIDs connect to ITS group node directly: RC -> ITS */
+        for (i = 0; i < its_idmaps->len; i++) {
+            range = &g_array_index(its_idmaps, AcpiIortIdMapping, i);
+            /* output IORT node is the ITS group node (the first node) */
+            build_iort_id_mapping(table_data, range->input_base,
+                                  range->id_count, iort_node_offset);
+        }
     } else {
         /* output IORT node is the ITS group node (the first node) */
-        idmap->output_reference = cpu_to_le32(iort_node_offset);
+        build_iort_id_mapping(table_data, 0, 0xFFFF, IORT_NODE_OFFSET);
     }
 
-    /*
-     * Update the pointer address in case table_data->data moves during above
-     * acpi_data_push operations.
-     */
-    iort = (AcpiIortTable *)(table_data->data + iort_start);
-    iort->length = cpu_to_le32(iort_length);
-
-    build_header(linker, table_data, (void *)(table_data->data + iort_start),
-                 "IORT", table_data->len - iort_start, 0, vms->oem_id,
-                 vms->oem_table_id);
+    acpi_table_end(linker, &table);
+    g_array_free(smmu_idmaps, true);
+    g_array_free(its_idmaps, true);
 }
 
+/*
+ * Serial Port Console Redirection Table (SPCR)
+ * Rev: 1.07
+ */
 static void
 build_spcr(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
-    AcpiSerialPortConsoleRedirection *spcr;
-    const MemMapEntry *uart_memmap = &vms->memmap[VIRT_UART];
-    int irq = vms->irqmap[VIRT_UART] + ARM_SPI_BASE;
-    int spcr_start = table_data->len;
-
-    spcr = acpi_data_push(table_data, sizeof(*spcr));
-
-    spcr->interface_type = 0x3;    /* ARM PL011 UART */
-
-    spcr->base_address.space_id = AML_SYSTEM_MEMORY;
-    spcr->base_address.bit_width = 8;
-    spcr->base_address.bit_offset = 0;
-    spcr->base_address.access_width = 1;
-    spcr->base_address.address = cpu_to_le64(uart_memmap->base);
-
-    spcr->interrupt_types = (1 << 3); /* Bit[3] ARMH GIC interrupt */
-    spcr->gsi = cpu_to_le32(irq);  /* Global System Interrupt */
-
-    spcr->baud = 3;                /* Baud Rate: 3 = 9600 */
-    spcr->parity = 0;              /* No Parity */
-    spcr->stopbits = 1;            /* 1 Stop bit */
-    spcr->flowctrl = (1 << 1);     /* Bit[1] = RTS/CTS hardware flow control */
-    spcr->term_type = 0;           /* Terminal Type: 0 = VT100 */
-
-    spcr->pci_device_id = 0xffff;  /* PCI Device ID: not a PCI device */
-    spcr->pci_vendor_id = 0xffff;  /* PCI Vendor ID: not a PCI device */
-
-    build_header(linker, table_data, (void *)(table_data->data + spcr_start),
-                 "SPCR", table_data->len - spcr_start, 2, vms->oem_id,
-                 vms->oem_table_id);
+    AcpiTable table = { .sig = "SPCR", .rev = 2, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+
+    /* Interface Type */
+    build_append_int_noprefix(table_data, 3, 1); /* ARM PL011 UART */
+    build_append_int_noprefix(table_data, 0, 3); /* Reserved */
+    /* Base Address */
+    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1,
+                     vms->memmap[VIRT_UART].base);
+    /* Interrupt Type */
+    build_append_int_noprefix(table_data,
+        (1 << 3) /* Bit[3] ARMH GIC interrupt */, 1);
+    build_append_int_noprefix(table_data, 0, 1); /* IRQ */
+    /* Global System Interrupt */
+    build_append_int_noprefix(table_data,
+                              vms->irqmap[VIRT_UART] + ARM_SPI_BASE, 4);
+    build_append_int_noprefix(table_data, 3 /* 9600 */, 1); /* Baud Rate */
+    build_append_int_noprefix(table_data, 0 /* No Parity */, 1); /* Parity */
+    /* Stop Bits */
+    build_append_int_noprefix(table_data, 1 /* 1 Stop bit */, 1);
+    /* Flow Control */
+    build_append_int_noprefix(table_data,
+        (1 << 1) /* RTS/CTS hardware flow control */, 1);
+    /* Terminal Type */
+    build_append_int_noprefix(table_data, 0 /* VT100 */, 1);
+    build_append_int_noprefix(table_data, 0, 1); /* Language */
+    /* PCI Device ID  */
+    build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2);
+    /* PCI Vendor ID */
+    build_append_int_noprefix(table_data, 0xffff /* not a PCI device*/, 2);
+    build_append_int_noprefix(table_data, 0, 1); /* PCI Bus Number */
+    build_append_int_noprefix(table_data, 0, 1); /* PCI Device Number */
+    build_append_int_noprefix(table_data, 0, 1); /* PCI Function Number */
+    build_append_int_noprefix(table_data, 0, 4); /* PCI Flags */
+    build_append_int_noprefix(table_data, 0, 1); /* PCI Segment */
+    build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+
+    acpi_table_end(linker, &table);
 }
 
+/*
+ * ACPI spec, Revision 5.1
+ * 5.2.16 System Resource Affinity Table (SRAT)
+ */
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
-    AcpiSystemResourceAffinityTable *srat;
-    AcpiSratProcessorGiccAffinity *core;
-    AcpiSratMemoryAffinity *numamem;
-    int i, srat_start;
+    int i;
     uint64_t mem_base;
     MachineClass *mc = MACHINE_GET_CLASS(vms);
     MachineState *ms = MACHINE(vms);
     const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(ms);
+    AcpiTable table = { .sig = "SRAT", .rev = 3, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
 
-    srat_start = table_data->len;
-    srat = acpi_data_push(table_data, sizeof(*srat));
-    srat->reserved1 = cpu_to_le32(1);
+    acpi_table_begin(&table, table_data);
+    build_append_int_noprefix(table_data, 1, 4); /* Reserved */
+    build_append_int_noprefix(table_data, 0, 8); /* Reserved */
 
     for (i = 0; i < cpu_list->len; ++i) {
-        core = acpi_data_push(table_data, sizeof(*core));
-        core->type = ACPI_SRAT_PROCESSOR_GICC;
-        core->length = sizeof(*core);
-        core->proximity = cpu_to_le32(cpu_list->cpus[i].props.node_id);
-        core->acpi_processor_uid = cpu_to_le32(i);
-        core->flags = cpu_to_le32(1);
+        uint32_t nodeid = cpu_list->cpus[i].props.node_id;
+        /*
+         * 5.2.16.4 GICC Affinity Structure
+         */
+        build_append_int_noprefix(table_data, 3, 1);      /* Type */
+        build_append_int_noprefix(table_data, 18, 1);     /* Length */
+        build_append_int_noprefix(table_data, nodeid, 4); /* Proximity Domain */
+        build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */
+        /* Flags, Table 5-76 */
+        build_append_int_noprefix(table_data, 1 /* Enabled */, 4);
+        build_append_int_noprefix(table_data, 0, 4); /* Clock Domain */
     }
 
     mem_base = vms->memmap[VIRT_MEM].base;
     for (i = 0; i < ms->numa_state->num_nodes; ++i) {
         if (ms->numa_state->nodes[i].node_mem > 0) {
-            numamem = acpi_data_push(table_data, sizeof(*numamem));
-            build_srat_memory(numamem, mem_base,
+            build_srat_memory(table_data, mem_base,
                               ms->numa_state->nodes[i].node_mem, i,
                               MEM_AFFINITY_ENABLED);
             mem_base += ms->numa_state->nodes[i].node_mem;
@@ -421,142 +560,251 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     }
 
     if (ms->device_memory) {
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-        build_srat_memory(numamem, ms->device_memory->base,
+        build_srat_memory(table_data, ms->device_memory->base,
                           memory_region_size(&ms->device_memory->mr),
                           ms->numa_state->num_nodes - 1,
                           MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
     }
 
-    build_header(linker, table_data, (void *)(table_data->data + srat_start),
-                 "SRAT", table_data->len - srat_start, 3, vms->oem_id,
-                 vms->oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
-/* GTDT */
+/*
+ * ACPI spec, Revision 5.1
+ * 5.2.24 Generic Timer Description Table (GTDT)
+ */
 static void
 build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
-    int gtdt_start = table_data->len;
-    AcpiGenericTimerTable *gtdt;
-    uint32_t irqflags;
-
-    if (vmc->claim_edge_triggered_timers) {
-        irqflags = ACPI_GTDT_INTERRUPT_MODE_EDGE;
-    } else {
-        irqflags = ACPI_GTDT_INTERRUPT_MODE_LEVEL;
-    }
-
-    gtdt = acpi_data_push(table_data, sizeof *gtdt);
-    /* The interrupt values are the same with the device tree when adding 16 */
-    gtdt->secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_S_EL1_IRQ + 16);
-    gtdt->secure_el1_flags = cpu_to_le32(irqflags);
-
-    gtdt->non_secure_el1_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL1_IRQ + 16);
-    gtdt->non_secure_el1_flags = cpu_to_le32(irqflags |
-                                             ACPI_GTDT_CAP_ALWAYS_ON);
-
-    gtdt->virtual_timer_interrupt = cpu_to_le32(ARCH_TIMER_VIRT_IRQ + 16);
-    gtdt->virtual_timer_flags = cpu_to_le32(irqflags);
+    /*
+     * Table 5-117 Flag Definitions
+     * set only "Timer interrupt Mode" and assume "Timer Interrupt
+     * polarity" bit as '0: Interrupt is Active high'
+     */
+    uint32_t irqflags = vmc->claim_edge_triggered_timers ?
+        1 : /* Interrupt is Edge triggered */
+        0;  /* Interrupt is Level triggered  */
+    AcpiTable table = { .sig = "GTDT", .rev = 2, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+
+    /* CntControlBase Physical Address */
+    /* FIXME: invalid value, should be 0xFFFFFFFFFFFFFFFF if not impl. ? */
+    build_append_int_noprefix(table_data, 0, 8);
+    build_append_int_noprefix(table_data, 0, 4); /* Reserved */
+    /*
+     * FIXME: clarify comment:
+     * The interrupt values are the same with the device tree when adding 16
+     */
+    /* Secure EL1 timer GSIV */
+    build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ + 16, 4);
+    /* Secure EL1 timer Flags */
+    build_append_int_noprefix(table_data, irqflags, 4);
+    /* Non-Secure EL1 timer GSIV */
+    build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ + 16, 4);
+    /* Non-Secure EL1 timer Flags */
+    build_append_int_noprefix(table_data, irqflags |
+                              1UL << 2, /* Always-on Capability */
+                              4);
+    /* Virtual timer GSIV */
+    build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ + 16, 4);
+    /* Virtual Timer Flags */
+    build_append_int_noprefix(table_data, irqflags, 4);
+    /* Non-Secure EL2 timer GSIV */
+    build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ + 16, 4);
+    /* Non-Secure EL2 timer Flags */
+    build_append_int_noprefix(table_data, irqflags, 4);
+    /* CntReadBase Physical address */
+    build_append_int_noprefix(table_data, 0, 8);
+    /* Platform Timer Count */
+    build_append_int_noprefix(table_data, 0, 4);
+    /* Platform Timer Offset */
+    build_append_int_noprefix(table_data, 0, 4);
+
+    acpi_table_end(linker, &table);
+}
 
-    gtdt->non_secure_el2_interrupt = cpu_to_le32(ARCH_TIMER_NS_EL2_IRQ + 16);
-    gtdt->non_secure_el2_flags = cpu_to_le32(irqflags);
+/* Debug Port Table 2 (DBG2) */
+static void
+build_dbg2(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
+{
+    AcpiTable table = { .sig = "DBG2", .rev = 0, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
+    int dbg2devicelength;
+    const char name[] = "COM0";
+    const int namespace_length = sizeof(name);
+
+    acpi_table_begin(&table, table_data);
+
+    dbg2devicelength = 22 + /* BaseAddressRegister[] offset */
+                       12 + /* BaseAddressRegister[] */
+                       4 + /* AddressSize[] */
+                       namespace_length /* NamespaceString[] */;
+
+    /* OffsetDbgDeviceInfo */
+    build_append_int_noprefix(table_data, 44, 4);
+    /* NumberDbgDeviceInfo */
+    build_append_int_noprefix(table_data, 1, 4);
+
+    /* Table 2. Debug Device Information structure format */
+    build_append_int_noprefix(table_data, 0, 1); /* Revision */
+    build_append_int_noprefix(table_data, dbg2devicelength, 2); /* Length */
+    /* NumberofGenericAddressRegisters */
+    build_append_int_noprefix(table_data, 1, 1);
+    /* NameSpaceStringLength */
+    build_append_int_noprefix(table_data, namespace_length, 2);
+    build_append_int_noprefix(table_data, 38, 2); /* NameSpaceStringOffset */
+    build_append_int_noprefix(table_data, 0, 2); /* OemDataLength */
+    /* OemDataOffset (0 means no OEM data) */
+    build_append_int_noprefix(table_data, 0, 2);
+
+    /* Port Type */
+    build_append_int_noprefix(table_data, 0x8000 /* Serial */, 2);
+    /* Port Subtype */
+    build_append_int_noprefix(table_data, 0x3 /* ARM PL011 UART */, 2);
+    build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+    /* BaseAddressRegisterOffset */
+    build_append_int_noprefix(table_data, 22, 2);
+    /* AddressSizeOffset */
+    build_append_int_noprefix(table_data, 34, 2);
+
+    /* BaseAddressRegister[] */
+    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 8, 0, 1,
+                     vms->memmap[VIRT_UART].base);
+
+    /* AddressSize[] */
+    build_append_int_noprefix(table_data,
+                              vms->memmap[VIRT_UART].size, 4);
+
+    /* NamespaceString[] */
+    g_array_append_vals(table_data, name, namespace_length);
+
+    acpi_table_end(linker, &table);
+};
 
-    build_header(linker, table_data,
-                 (void *)(table_data->data + gtdt_start), "GTDT",
-                 table_data->len - gtdt_start, 2, vms->oem_id,
-                 vms->oem_table_id);
+/*
+ * ACPI spec, Revision 5.1 Errata A
+ * 5.2.12 Multiple APIC Description Table (MADT)
+ */
+static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size)
+{
+    build_append_int_noprefix(table_data, 0xE, 1);  /* Type */
+    build_append_int_noprefix(table_data, 16, 1);   /* Length */
+    build_append_int_noprefix(table_data, 0, 2);    /* Reserved */
+    /* Discovery Range Base Addres */
+    build_append_int_noprefix(table_data, base, 8);
+    build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */
 }
 
-/* MADT */
 static void
 build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
 {
+    int i;
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
-    int madt_start = table_data->len;
     const MemMapEntry *memmap = vms->memmap;
-    const int *irqmap = vms->irqmap;
-    AcpiMadtGenericDistributor *gicd;
-    AcpiMadtGenericMsiFrame *gic_msi;
-    int i;
-
-    acpi_data_push(table_data, sizeof(AcpiMultipleApicTable));
-
-    gicd = acpi_data_push(table_data, sizeof *gicd);
-    gicd->type = ACPI_APIC_GENERIC_DISTRIBUTOR;
-    gicd->length = sizeof(*gicd);
-    gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base);
-    gicd->version = vms->gic_version;
+    AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+    /* Local Interrupt Controller Address */
+    build_append_int_noprefix(table_data, 0, 4);
+    build_append_int_noprefix(table_data, 0, 4);   /* Flags */
+
+    /* 5.2.12.15 GIC Distributor Structure */
+    build_append_int_noprefix(table_data, 0xC, 1); /* Type */
+    build_append_int_noprefix(table_data, 24, 1);  /* Length */
+    build_append_int_noprefix(table_data, 0, 2);   /* Reserved */
+    build_append_int_noprefix(table_data, 0, 4);   /* GIC ID */
+    /* Physical Base Address */
+    build_append_int_noprefix(table_data, memmap[VIRT_GIC_DIST].base, 8);
+    build_append_int_noprefix(table_data, 0, 4);   /* System Vector Base */
+    /* GIC version */
+    build_append_int_noprefix(table_data, vms->gic_version, 1);
+    build_append_int_noprefix(table_data, 0, 3);   /* Reserved */
 
     for (i = 0; i < MACHINE(vms)->smp.cpus; i++) {
-        AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data,
-                                                           sizeof(*gicc));
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
+        uint64_t physical_base_address = 0, gich = 0, gicv = 0;
+        uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0;
+        uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ?
+                                             PPI(VIRTUAL_PMU_IRQ) : 0;
 
-        gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE;
-        gicc->length = sizeof(*gicc);
         if (vms->gic_version == 2) {
-            gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base);
-            gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base);
-            gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base);
+            physical_base_address = memmap[VIRT_GIC_CPU].base;
+            gicv = memmap[VIRT_GIC_VCPU].base;
+            gich = memmap[VIRT_GIC_HYP].base;
         }
-        gicc->cpu_interface_number = cpu_to_le32(i);
-        gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity);
-        gicc->uid = cpu_to_le32(i);
-        gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED);
 
-        if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
-            gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
-        }
-        if (vms->virt) {
-            gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ));
-        }
+        /* 5.2.12.14 GIC Structure */
+        build_append_int_noprefix(table_data, 0xB, 1);  /* Type */
+        build_append_int_noprefix(table_data, 76, 1);   /* Length */
+        build_append_int_noprefix(table_data, 0, 2);    /* Reserved */
+        build_append_int_noprefix(table_data, i, 4);    /* GIC ID */
+        build_append_int_noprefix(table_data, i, 4);    /* ACPI Processor UID */
+        /* Flags */
+        build_append_int_noprefix(table_data, 1, 4);    /* Enabled */
+        /* Parking Protocol Version */
+        build_append_int_noprefix(table_data, 0, 4);
+        /* Performance Interrupt GSIV */
+        build_append_int_noprefix(table_data, pmu_interrupt, 4);
+        build_append_int_noprefix(table_data, 0, 8); /* Parked Address */
+        /* Physical Base Address */
+        build_append_int_noprefix(table_data, physical_base_address, 8);
+        build_append_int_noprefix(table_data, gicv, 8); /* GICV */
+        build_append_int_noprefix(table_data, gich, 8); /* GICH */
+        /* VGIC Maintenance interrupt */
+        build_append_int_noprefix(table_data, vgic_interrupt, 4);
+        build_append_int_noprefix(table_data, 0, 8);    /* GICR Base Address*/
+        /* MPIDR */
+        build_append_int_noprefix(table_data, armcpu->mp_affinity, 8);
     }
 
     if (vms->gic_version == 3) {
-        AcpiMadtGenericTranslator *gic_its;
-        int nb_redist_regions = virt_gicv3_redist_region_count(vms);
-        AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
-                                                         sizeof *gicr);
-
-        gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR;
-        gicr->length = sizeof(*gicr);
-        gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
-        gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
-
-        if (nb_redist_regions == 2) {
-            gicr = acpi_data_push(table_data, sizeof(*gicr));
-            gicr->type = ACPI_APIC_GENERIC_REDISTRIBUTOR;
-            gicr->length = sizeof(*gicr);
-            gicr->base_address =
-                cpu_to_le64(memmap[VIRT_HIGH_GIC_REDIST2].base);
-            gicr->range_length =
-                cpu_to_le32(memmap[VIRT_HIGH_GIC_REDIST2].size);
+        build_append_gicr(table_data, memmap[VIRT_GIC_REDIST].base,
+                                      memmap[VIRT_GIC_REDIST].size);
+        if (virt_gicv3_redist_region_count(vms) == 2) {
+            build_append_gicr(table_data, memmap[VIRT_HIGH_GIC_REDIST2].base,
+                                          memmap[VIRT_HIGH_GIC_REDIST2].size);
         }
 
         if (its_class_name() && !vmc->no_its) {
-            gic_its = acpi_data_push(table_data, sizeof *gic_its);
-            gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
-            gic_its->length = sizeof(*gic_its);
-            gic_its->translation_id = 0;
-            gic_its->base_address = cpu_to_le64(memmap[VIRT_GIC_ITS].base);
+            /*
+             * FIXME: Structure is from Revision 6.0 where 'GIC Structure'
+             * has additional fields on top of implemented 5.1 Errata A,
+             * to make it consistent with v6.0 we need to bump everything
+             * to v6.0
+             */
+            /*
+             * ACPI spec, Revision 6.0 Errata A
+             * (original 6.0 definition has invalid Length)
+             * 5.2.12.18 GIC ITS Structure
+             */
+            build_append_int_noprefix(table_data, 0xF, 1);  /* Type */
+            build_append_int_noprefix(table_data, 20, 1);   /* Length */
+            build_append_int_noprefix(table_data, 0, 2);    /* Reserved */
+            build_append_int_noprefix(table_data, 0, 4);    /* GIC ITS ID */
+            /* Physical Base Address */
+            build_append_int_noprefix(table_data, memmap[VIRT_GIC_ITS].base, 8);
+            build_append_int_noprefix(table_data, 0, 4);    /* Reserved */
         }
     } else {
-        gic_msi = acpi_data_push(table_data, sizeof *gic_msi);
-        gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME;
-        gic_msi->length = sizeof(*gic_msi);
-        gic_msi->gic_msi_frame_id = 0;
-        gic_msi->base_address = cpu_to_le64(memmap[VIRT_GIC_V2M].base);
-        gic_msi->flags = cpu_to_le32(1);
-        gic_msi->spi_count = cpu_to_le16(NUM_GICV2M_SPIS);
-        gic_msi->spi_base = cpu_to_le16(irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE);
+        const uint16_t spi_base = vms->irqmap[VIRT_GIC_V2M] + ARM_SPI_BASE;
+
+        /* 5.2.12.16 GIC MSI Frame Structure */
+        build_append_int_noprefix(table_data, 0xD, 1);  /* Type */
+        build_append_int_noprefix(table_data, 24, 1);   /* Length */
+        build_append_int_noprefix(table_data, 0, 2);    /* Reserved */
+        build_append_int_noprefix(table_data, 0, 4);    /* GIC MSI Frame ID */
+        /* Physical Base Address */
+        build_append_int_noprefix(table_data, memmap[VIRT_GIC_V2M].base, 8);
+        build_append_int_noprefix(table_data, 1, 4);    /* Flags */
+        /* SPI Count */
+        build_append_int_noprefix(table_data, NUM_GICV2M_SPIS, 2);
+        build_append_int_noprefix(table_data, spi_base, 2); /* SPI Base */
     }
-
-    build_header(linker, table_data,
-                 (void *)(table_data->data + madt_start), "APIC",
-                 table_data->len - madt_start, 3, vms->oem_id,
-                 vms->oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 /* FADT */
@@ -598,10 +846,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     MachineState *ms = MACHINE(vms);
     const MemMapEntry *memmap = vms->memmap;
     const int *irqmap = vms->irqmap;
+    AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = vms->oem_id,
+                        .oem_table_id = vms->oem_table_id };
 
+    acpi_table_begin(&table, table_data);
     dsdt = init_aml_allocator();
-    /* Reserve space for header */
-    acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader));
 
     /* When booting the VM with UEFI, UEFI takes ownership of the RTC hardware.
      * While UEFI can use libfdt to disable the RTC device node in the DTB that
@@ -642,16 +891,16 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     }
 
     acpi_dsdt_add_power_button(scope);
+#ifdef CONFIG_TPM
     acpi_dsdt_add_tpm(scope, vms);
+#endif
 
     aml_append(dsdt, scope);
 
-    /* copy AML table into ACPI tables blob and patch header there */
+    /* copy AML table into ACPI tables blob */
     g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
-    build_header(linker, table_data,
-        (void *)(table_data->data + table_data->len - dsdt->buf->len),
-                 "DSDT", dsdt->buf->len, 2, vms->oem_id,
-                 vms->oem_table_id);
+
+    acpi_table_end(linker, &table);
     free_aml_allocator();
 }
 
@@ -694,13 +943,19 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
     dsdt = tables_blob->len;
     build_dsdt(tables_blob, tables->linker, vms);
 
-    /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */
+    /* FADT MADT PPTT GTDT MCFG SPCR DBG2 pointed to by RSDT */
     acpi_add_table(table_offsets, tables_blob);
     build_fadt_rev5(tables_blob, tables->linker, vms, dsdt);
 
     acpi_add_table(table_offsets, tables_blob);
     build_madt(tables_blob, tables->linker, vms);
 
+    if (!vmc->no_cpu_topology) {
+        acpi_add_table(table_offsets, tables_blob);
+        build_pptt(tables_blob, tables->linker, ms,
+                   vms->oem_id, vms->oem_table_id);
+    }
+
     acpi_add_table(table_offsets, tables_blob);
     build_gtdt(tables_blob, tables->linker, vms);
 
@@ -717,6 +972,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
     acpi_add_table(table_offsets, tables_blob);
     build_spcr(tables_blob, tables->linker, vms);
 
+    acpi_add_table(table_offsets, tables_blob);
+    build_dbg2(tables_blob, tables->linker, vms);
+
     if (vms->ras) {
         build_ghes_error_table(tables->hardware_errors, tables->linker);
         acpi_add_table(table_offsets, tables_blob);
@@ -745,11 +1003,13 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
         build_iort(tables_blob, tables->linker, vms);
     }
 
+#ifdef CONFIG_TPM
     if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) {
         acpi_add_table(table_offsets, tables_blob);
         build_tpm2(tables_blob, tables->linker, tables->tcpalog, vms->oem_id,
                    vms->oem_table_id);
     }
+#endif
 
     /* XSDT is pointed to by RSDP */
     xsdt = tables_blob->len;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a30a254600c..a94e906b2eb 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -36,7 +36,6 @@
 #include "monitor/qdev.h"
 #include "qapi/error.h"
 #include "hw/sysbus.h"
-#include "hw/boards.h"
 #include "hw/arm/boot.h"
 #include "hw/arm/primecell.h"
 #include "hw/arm/virt.h"
@@ -48,11 +47,11 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/numa.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/tpm.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hvf.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
+#include "qapi/error.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
@@ -207,6 +206,7 @@ static const char *valid_cpus[] = {
     ARM_CPU_TYPE_NAME("cortex-a57"),
     ARM_CPU_TYPE_NAME("cortex-a72"),
     ARM_CPU_TYPE_NAME("morello"),
+    ARM_CPU_TYPE_NAME("a64fx"),
     ARM_CPU_TYPE_NAME("host"),
     ARM_CPU_TYPE_NAME("max"),
 };
@@ -357,20 +357,21 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
     int cpu;
     int addr_cells = 1;
     const MachineState *ms = MACHINE(vms);
+    const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     int smp_cpus = ms->smp.cpus;
 
     /*
-     * From Documentation/devicetree/bindings/arm/cpus.txt
-     *  On ARM v8 64-bit systems value should be set to 2,
-     *  that corresponds to the MPIDR_EL1 register size.
-     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
-     *  in the system, #address-cells can be set to 1, since
-     *  MPIDR_EL1[63:32] bits are not used for CPUs
-     *  identification.
+     * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
+     * On ARM v8 64-bit systems value should be set to 2,
+     * that corresponds to the MPIDR_EL1 register size.
+     * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
+     * in the system, #address-cells can be set to 1, since
+     * MPIDR_EL1[63:32] bits are not used for CPUs
+     * identification.
      *
-     *  Here we actually don't know whether our system is 32- or 64-bit one.
-     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
-     *  at least one of them has Aff3 populated, we set #address-cells to 2.
+     * Here we actually don't know whether our system is 32- or 64-bit one.
+     * The simplest way to go is to examine affinity IDs of all our CPUs. If
+     * at least one of them has Aff3 populated, we set #address-cells to 2.
      */
     for (cpu = 0; cpu < smp_cpus; cpu++) {
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
@@ -413,8 +414,57 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
                 ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
         }
 
+        if (!vmc->no_cpu_topology) {
+            qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
+                                  qemu_fdt_alloc_phandle(ms->fdt));
+        }
+
         g_free(nodename);
     }
+
+    if (!vmc->no_cpu_topology) {
+        /*
+         * Add vCPU topology description through fdt node cpu-map.
+         *
+         * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt
+         * In a SMP system, the hierarchy of CPUs can be defined through
+         * four entities that are used to describe the layout of CPUs in
+         * the system: socket/cluster/core/thread.
+         *
+         * A socket node represents the boundary of system physical package
+         * and its child nodes must be one or more cluster nodes. A system
+         * can contain several layers of clustering within a single physical
+         * package and cluster nodes can be contained in parent cluster nodes.
+         *
+         * Given that cluster is not yet supported in the vCPU topology,
+         * we currently generate one cluster node within each socket node
+         * by default.
+         */
+        qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
+
+        for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
+            char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu);
+            char *map_path;
+
+            if (ms->smp.threads > 1) {
+                map_path = g_strdup_printf(
+                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
+                    cpu / (ms->smp.cores * ms->smp.threads),
+                    (cpu / ms->smp.threads) % ms->smp.cores,
+                    cpu % ms->smp.threads);
+            } else {
+                map_path = g_strdup_printf(
+                    "/cpus/cpu-map/socket%d/cluster0/core%d",
+                    cpu / ms->smp.cores,
+                    cpu % ms->smp.cores);
+            }
+            qemu_fdt_add_path(ms->fdt, map_path);
+            qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path);
+
+            g_free(map_path);
+            g_free(cpu_path);
+        }
+    }
 }
 
 static void fdt_add_its_gic_node(VirtMachineState *vms)
@@ -590,6 +640,12 @@ static void create_its(VirtMachineState *vms)
     const char *itsclass = its_class_name();
     DeviceState *dev;
 
+    if (!strcmp(itsclass, "arm-gicv3-its")) {
+        if (!vms->tcg_its) {
+            itsclass = NULL;
+        }
+    }
+
     if (!itsclass) {
         /* Do nothing if not supported */
         return;
@@ -627,7 +683,7 @@ static void create_v2m(VirtMachineState *vms)
     vms->msi_controller = VIRT_MSI_CTRL_GICV2M;
 }
 
-static void create_gic(VirtMachineState *vms)
+static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
 {
     MachineState *ms = MACHINE(vms);
     /* We create a standalone GIC */
@@ -661,6 +717,14 @@ static void create_gic(VirtMachineState *vms)
                              nb_redist_regions);
         qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", redist0_count);
 
+        if (!kvm_irqchip_in_kernel()) {
+            if (vms->tcg_its) {
+                object_property_set_link(OBJECT(vms->gic), "sysmem",
+                                         OBJECT(mem), &error_fatal);
+                qdev_prop_set_bit(vms->gic, "has-lpi", true);
+            }
+        }
+
         if (nb_redist_regions == 2) {
             uint32_t redist1_capacity =
                     vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE;
@@ -902,6 +966,9 @@ static void create_gpio_devices(const VirtMachineState *vms, int gpio,
     MachineState *ms = MACHINE(vms);
 
     pl061_dev = qdev_new("pl061");
+    /* Pull lines down to 0 if not driven by the PL061 */
+    qdev_prop_set_uint32(pl061_dev, "pullups", 0);
+    qdev_prop_set_uint32(pl061_dev, "pulldowns", 0xff);
     s = SYS_BUS_DEVICE(pl061_dev);
     sysbus_realize_and_unref(s, &error_fatal);
     memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
@@ -1371,6 +1438,7 @@ static void create_pcie(VirtMachineState *vms)
     }
 
     pci = PCI_HOST_BRIDGE(dev);
+    pci->bypass_iommu = vms->default_bus_bypass_iommu;
     vms->bus = pci->bus;
     if (vms->bus) {
         for (i = 0; i < nb_nics; i++) {
@@ -1447,7 +1515,7 @@ static void create_platform_bus(VirtMachineState *vms)
     MemoryRegion *sysmem = get_system_memory();
 
     dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
-    dev->id = TYPE_PLATFORM_BUS_DEVICE;
+    dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
     qdev_prop_set_uint32(dev, "num_irqs", PLATFORM_BUS_NUM_IRQS);
     qdev_prop_set_uint32(dev, "mmio_size", vms->memmap[VIRT_PLATFORM_BUS].size);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -1534,8 +1602,10 @@ static void virt_build_smbios(VirtMachineState *vms)
                         vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
                         true, SMBIOS_ENTRY_POINT_30);
 
-    smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len,
-                      &smbios_anchor, &smbios_anchor_len);
+    smbios_get_tables(MACHINE(vms), NULL, 0,
+                      &smbios_tables, &smbios_tables_len,
+                      &smbios_anchor, &smbios_anchor_len,
+                      &error_fatal);
 
     if (smbios_anchor) {
         fw_cfg_add_file(vms->fw_cfg, "etc/smbios/smbios-tables",
@@ -1858,11 +1928,6 @@ static void machvirt_init(MachineState *machine)
     }
 
     if (vms->secure) {
-        if (kvm_enabled()) {
-            error_report("mach-virt: KVM does not support Security extensions");
-            exit(1);
-        }
-
         /*
          * The Secure view of the world is the same as the NonSecure,
          * but with a few extra devices. Create it as a container region
@@ -1915,15 +1980,17 @@ static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
-    if (vms->virt && kvm_enabled()) {
-        error_report("mach-virt: KVM does not support providing "
-                     "Virtualization extensions to the guest CPU");
+    if (vms->virt && (kvm_enabled() || hvf_enabled())) {
+        error_report("mach-virt: %s does not support providing "
+                     "Virtualization extensions to the guest CPU",
+                     kvm_enabled() ? "KVM" : "HVF");
         exit(1);
     }
 
-    if (vms->mte && kvm_enabled()) {
-        error_report("mach-virt: KVM does not support providing "
-                     "MTE to the guest CPU");
+    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
+        error_report("mach-virt: %s does not support providing "
+                     "MTE to the guest CPU",
+                     kvm_enabled() ? "KVM" : "HVF");
         exit(1);
     }
 
@@ -2050,7 +2117,7 @@ static void machvirt_init(MachineState *machine)
 
     virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
 
-    create_gic(vms);
+    create_gic(vms, sysmem);
 
     virt_cpu_post_init(vms, sysmem);
 
@@ -2330,6 +2397,21 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp)
     }
 }
 
+static bool virt_get_default_bus_bypass_iommu(Object *obj, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    return vms->default_bus_bypass_iommu;
+}
+
+static void virt_set_default_bus_bypass_iommu(Object *obj, bool value,
+                                              Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    vms->default_bus_bypass_iommu = value;
+}
+
 static CpuInstanceProperties
 virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
 {
@@ -2610,7 +2692,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
+#ifdef CONFIG_TPM
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
+#endif
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
     mc->pci_allow_0_address = true;
@@ -2671,6 +2755,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
                                           "Set the IOMMU type. "
                                           "Valid values are none and smmuv3");
 
+    object_class_property_add_bool(oc, "default-bus-bypass-iommu",
+                                   virt_get_default_bus_bypass_iommu,
+                                   virt_set_default_bus_bypass_iommu);
+    object_class_property_set_description(oc, "default-bus-bypass-iommu",
+                                          "Set on/off to enable/disable "
+                                          "bypass_iommu for default root bus");
+
     object_class_property_add_bool(oc, "ras", virt_get_ras,
                                    virt_set_ras);
     object_class_property_set_description(oc, "ras",
@@ -2733,11 +2824,20 @@ static void virt_instance_init(Object *obj)
     } else {
         /* Default allows ITS instantiation */
         vms->its = true;
+
+        if (vmc->no_tcg_its) {
+            vms->tcg_its = false;
+        } else {
+            vms->tcg_its = true;
+        }
     }
 
     /* Default disallows iommu instantiation */
     vms->iommu = VIRT_IOMMU_NONE;
 
+    /* The default root bus is attached to iommu by default */
+    vms->default_bus_bypass_iommu = false;
+
     /* Default disallows RAS instantiation */
     vms->ras = false;
 
@@ -2772,10 +2872,31 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
+static void virt_machine_6_2_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(6, 2)
+
+static void virt_machine_6_1_options(MachineClass *mc)
+{
+    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+    virt_machine_6_2_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    mc->smp_props.prefer_sockets = true;
+    vmc->no_cpu_topology = true;
+
+    /* qemu ITS was introduced with 6.2 */
+    vmc->no_tcg_its = true;
+}
+DEFINE_VIRT_MACHINE(6, 1)
+
 static void virt_machine_6_0_options(MachineClass *mc)
 {
+    virt_machine_6_1_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(6, 0)
+DEFINE_VIRT_MACHINE(6, 0)
 
 static void virt_machine_5_2_options(MachineClass *mc)
 {
diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 8db6cfd47f5..69c333e91b1 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -22,12 +22,11 @@
 #include "hw/sysbus.h"
 #include "hw/arm/boot.h"
 #include "net/net.h"
-#include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/block/flash.h"
 #include "hw/loader.h"
-#include "hw/misc/zynq-xadc.h"
+#include "hw/adc/zynq-xadc.h"
 #include "hw/ssi/ssi.h"
 #include "hw/usb/chipidea.h"
 #include "qemu/error-report.h"
@@ -119,7 +118,7 @@ static void gem_init(NICInfo *nd, uint32_t base, qemu_irq irq)
         qemu_check_nic_model(nd, TYPE_CADENCE_GEM);
         qdev_set_nic_properties(dev, nd);
     }
-    object_property_set_int(OBJECT(dev), "phy-addr", 23, &error_abort);
+    object_property_set_int(OBJECT(dev), "phy-addr", 7, &error_abort);
     s = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(s, &error_fatal);
     sysbus_mmio_map(s, 0, base);
@@ -313,6 +312,9 @@ static void zynq_init(MachineState *machine)
     sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[39-IRQ_OFFSET]);
 
     dev = qdev_new("pl330");
+    object_property_set_link(OBJECT(dev), "memory",
+                             OBJECT(address_space_mem),
+                             &error_fatal);
     qdev_prop_set_uint8(dev, "num_chnls",  8);
     qdev_prop_set_uint8(dev, "num_periph_req",  4);
     qdev_prop_set_uint8(dev, "num_events",  16);
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 8482cd61960..d2f55e29b64 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -10,11 +10,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "sysemu/device_tree.h"
-#include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/sysbus.h"
 #include "hw/arm/sysbus-fdt.h"
@@ -358,6 +356,61 @@ static void fdt_add_rtc_node(VersalVirt *s)
     g_free(name);
 }
 
+static void fdt_add_bbram_node(VersalVirt *s)
+{
+    const char compat[] = TYPE_XLNX_BBRAM;
+    const char interrupt_names[] = "bbram-error";
+    char *name = g_strdup_printf("/bbram@%x", MM_PMC_BBRAM_CTRL);
+
+    qemu_fdt_add_subnode(s->fdt, name);
+
+    qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+                           GIC_FDT_IRQ_TYPE_SPI, VERSAL_BBRAM_APB_IRQ_0,
+                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+    qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+                     interrupt_names, sizeof(interrupt_names));
+    qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+                                 2, MM_PMC_BBRAM_CTRL,
+                                 2, MM_PMC_BBRAM_CTRL_SIZE);
+    qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+    g_free(name);
+}
+
+static void fdt_add_efuse_ctrl_node(VersalVirt *s)
+{
+    const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CTRL;
+    const char interrupt_names[] = "pmc_efuse";
+    char *name = g_strdup_printf("/pmc_efuse@%x", MM_PMC_EFUSE_CTRL);
+
+    qemu_fdt_add_subnode(s->fdt, name);
+
+    qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+                           GIC_FDT_IRQ_TYPE_SPI, VERSAL_EFUSE_IRQ,
+                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+    qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+                     interrupt_names, sizeof(interrupt_names));
+    qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+                                 2, MM_PMC_EFUSE_CTRL,
+                                 2, MM_PMC_EFUSE_CTRL_SIZE);
+    qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+    g_free(name);
+}
+
+static void fdt_add_efuse_cache_node(VersalVirt *s)
+{
+    const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CACHE;
+    char *name = g_strdup_printf("/xlnx_pmc_efuse_cache@%x",
+                                 MM_PMC_EFUSE_CACHE);
+
+    qemu_fdt_add_subnode(s->fdt, name);
+
+    qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+                                 2, MM_PMC_EFUSE_CACHE,
+                                 2, MM_PMC_EFUSE_CACHE_SIZE);
+    qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+    g_free(name);
+}
+
 static void fdt_nop_memory_nodes(void *fdt, Error **errp)
 {
     Error *err = NULL;
@@ -512,6 +565,30 @@ static void create_virtio_regions(VersalVirt *s)
     }
 }
 
+static void bbram_attach_drive(XlnxBBRam *dev)
+{
+    DriveInfo *dinfo;
+    BlockBackend *blk;
+
+    dinfo = drive_get_by_index(IF_PFLASH, 0);
+    blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
+    if (blk) {
+        qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+    }
+}
+
+static void efuse_attach_drive(XlnxEFuse *dev)
+{
+    DriveInfo *dinfo;
+    BlockBackend *blk;
+
+    dinfo = drive_get_by_index(IF_PFLASH, 1);
+    blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
+    if (blk) {
+        qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+    }
+}
+
 static void sd_plugin_card(SDHCIState *sd, DriveInfo *di)
 {
     BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
@@ -572,6 +649,9 @@ static void versal_virt_init(MachineState *machine)
     fdt_add_usb_xhci_nodes(s);
     fdt_add_sd_nodes(s);
     fdt_add_rtc_node(s);
+    fdt_add_bbram_node(s);
+    fdt_add_efuse_ctrl_node(s);
+    fdt_add_efuse_cache_node(s);
     fdt_add_cpu_nodes(s, psci_conduit);
     fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
     fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
@@ -581,6 +661,12 @@ static void versal_virt_init(MachineState *machine)
     memory_region_add_subregion_overlap(get_system_memory(),
                                         0, &s->soc.fpd.apu.mr, 0);
 
+    /* Attach bbram backend, if given */
+    bbram_attach_drive(&s->soc.pmc.bbram);
+
+    /* Attach efuse backend, if given */
+    efuse_attach_drive(&s->soc.pmc.efuse);
+
     /* Plugin SD cards.  */
     for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) {
         sd_plugin_card(&s->soc.pmc.iou.sd[i], drive_get_next(IF_SD));
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 79609692e4e..b2705b6925e 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -12,7 +12,6 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qapi/error.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "hw/sysbus.h"
 #include "net/net.h"
@@ -219,6 +218,8 @@ static void versal_create_admas(Versal *s, qemu_irq *pic)
                                 TYPE_XLNX_ZDMA);
         dev = DEVICE(&s->lpd.iou.adma[i]);
         object_property_set_int(OBJECT(dev), "bus-width", 128, &error_abort);
+        object_property_set_link(OBJECT(dev), "dma",
+                                 OBJECT(get_system_memory()), &error_fatal);
         sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
 
         mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
@@ -313,6 +314,61 @@ static void versal_create_xrams(Versal *s, qemu_irq *pic)
     }
 }
 
+static void versal_create_bbram(Versal *s, qemu_irq *pic)
+{
+    SysBusDevice *sbd;
+
+    object_initialize_child_with_props(OBJECT(s), "bbram", &s->pmc.bbram,
+                                       sizeof(s->pmc.bbram), TYPE_XLNX_BBRAM,
+                                       &error_fatal,
+                                       "crc-zpads", "0",
+                                       NULL);
+    sbd = SYS_BUS_DEVICE(&s->pmc.bbram);
+
+    sysbus_realize(sbd, &error_fatal);
+    memory_region_add_subregion(&s->mr_ps, MM_PMC_BBRAM_CTRL,
+                                sysbus_mmio_get_region(sbd, 0));
+    sysbus_connect_irq(sbd, 0, pic[VERSAL_BBRAM_APB_IRQ_0]);
+}
+
+static void versal_realize_efuse_part(Versal *s, Object *dev, hwaddr base)
+{
+    SysBusDevice *part = SYS_BUS_DEVICE(dev);
+
+    object_property_set_link(OBJECT(part), "efuse",
+                             OBJECT(&s->pmc.efuse), &error_abort);
+
+    sysbus_realize(part, &error_abort);
+    memory_region_add_subregion(&s->mr_ps, base,
+                                sysbus_mmio_get_region(part, 0));
+}
+
+static void versal_create_efuse(Versal *s, qemu_irq *pic)
+{
+    Object *bits = OBJECT(&s->pmc.efuse);
+    Object *ctrl = OBJECT(&s->pmc.efuse_ctrl);
+    Object *cache = OBJECT(&s->pmc.efuse_cache);
+
+    object_initialize_child(OBJECT(s), "efuse-ctrl", &s->pmc.efuse_ctrl,
+                            TYPE_XLNX_VERSAL_EFUSE_CTRL);
+
+    object_initialize_child(OBJECT(s), "efuse-cache", &s->pmc.efuse_cache,
+                            TYPE_XLNX_VERSAL_EFUSE_CACHE);
+
+    object_initialize_child_with_props(ctrl, "xlnx-efuse@0", bits,
+                                       sizeof(s->pmc.efuse),
+                                       TYPE_XLNX_EFUSE, &error_abort,
+                                       "efuse-nr", "3",
+                                       "efuse-size", "8192",
+                                       NULL);
+
+    qdev_realize(DEVICE(bits), NULL, &error_abort);
+    versal_realize_efuse_part(s, ctrl, MM_PMC_EFUSE_CTRL);
+    versal_realize_efuse_part(s, cache, MM_PMC_EFUSE_CACHE);
+
+    sysbus_connect_irq(SYS_BUS_DEVICE(ctrl), 0, pic[VERSAL_EFUSE_IRQ]);
+}
+
 /* This takes the board allocated linear DDR memory and creates aliases
  * for each split DDR range/aperture on the Versal address map.
  */
@@ -377,6 +433,8 @@ static void versal_unimp(Versal *s)
                         MM_CRL, MM_CRL_SIZE);
     versal_unimp_area(s, "crf", &s->mr_ps,
                         MM_FPD_CRF, MM_FPD_CRF_SIZE);
+    versal_unimp_area(s, "apu", &s->mr_ps,
+                        MM_FPD_FPD_APU, MM_FPD_FPD_APU_SIZE);
     versal_unimp_area(s, "crp", &s->mr_ps,
                         MM_PMC_CRP, MM_PMC_CRP_SIZE);
     versal_unimp_area(s, "iou-scntr", &s->mr_ps,
@@ -399,6 +457,8 @@ static void versal_realize(DeviceState *dev, Error **errp)
     versal_create_sds(s, pic);
     versal_create_rtc(s, pic);
     versal_create_xrams(s, pic);
+    versal_create_bbram(s, pic);
+    versal_create_efuse(s, pic);
     versal_map_ddr(s);
     versal_unimp(s);
 
diff --git a/hw/arm/xlnx-zcu102.c b/hw/arm/xlnx-zcu102.c
index a9db25eb99f..3dc2b5e8ca4 100644
--- a/hw/arm/xlnx-zcu102.c
+++ b/hw/arm/xlnx-zcu102.c
@@ -17,7 +17,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/arm/xlnx-zynqmp.h"
 #include "hw/boards.h"
 #include "qemu/error-report.h"
@@ -99,6 +98,30 @@ static void zcu102_modify_dtb(const struct arm_boot_info *binfo, void *fdt)
     }
 }
 
+static void bbram_attach_drive(XlnxBBRam *dev)
+{
+    DriveInfo *dinfo;
+    BlockBackend *blk;
+
+    dinfo = drive_get_by_index(IF_PFLASH, 2);
+    blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
+    if (blk) {
+        qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+    }
+}
+
+static void efuse_attach_drive(XlnxEFuse *dev)
+{
+    DriveInfo *dinfo;
+    BlockBackend *blk;
+
+    dinfo = drive_get_by_index(IF_PFLASH, 3);
+    blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
+    if (blk) {
+        qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+    }
+}
+
 static void xlnx_zcu102_init(MachineState *machine)
 {
     XlnxZCU102 *s = ZCU102_MACHINE(machine);
@@ -137,6 +160,12 @@ static void xlnx_zcu102_init(MachineState *machine)
 
     qdev_realize(DEVICE(&s->soc), NULL, &error_fatal);
 
+    /* Attach bbram backend, if given */
+    bbram_attach_drive(&s->soc.bbram);
+
+    /* Attach efuse backend, if given */
+    efuse_attach_drive(&s->soc.efuse);
+
     /* Create and plug in the SD cards */
     for (i = 0; i < XLNX_ZYNQMP_NUM_SDHCI; i++) {
         BusState *bus;
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 7f01284a5c0..1c52a575aad 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -18,11 +18,10 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/arm/xlnx-zynqmp.h"
 #include "hw/intc/arm_gic_common.h"
+#include "hw/misc/unimp.h"
 #include "hw/boards.h"
-#include "exec/address-spaces.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
 #include "kvm_arm.h"
@@ -58,12 +57,21 @@
 #define DPDMA_ADDR          0xfd4c0000
 #define DPDMA_IRQ           116
 
+#define APU_ADDR            0xfd5c0000
+#define APU_SIZE            0x100
+
 #define IPI_ADDR            0xFF300000
 #define IPI_IRQ             64
 
 #define RTC_ADDR            0xffa60000
 #define RTC_IRQ             26
 
+#define BBRAM_ADDR          0xffcd0000
+#define BBRAM_IRQ           11
+
+#define EFUSE_ADDR          0xffcc0000
+#define EFUSE_IRQ           87
+
 #define SDHCI_CAPABILITIES  0x280737ec6481 /* Datasheet: UG1085 (v1.7) */
 
 static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = {
@@ -224,6 +232,73 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, XlnxZynqMPState *s,
     qdev_realize(DEVICE(&s->rpu_cluster), NULL, &error_fatal);
 }
 
+static void xlnx_zynqmp_create_bbram(XlnxZynqMPState *s, qemu_irq *gic)
+{
+    SysBusDevice *sbd;
+
+    object_initialize_child_with_props(OBJECT(s), "bbram", &s->bbram,
+                                       sizeof(s->bbram), TYPE_XLNX_BBRAM,
+                                       &error_fatal,
+                                       "crc-zpads", "1",
+                                       NULL);
+    sbd = SYS_BUS_DEVICE(&s->bbram);
+
+    sysbus_realize(sbd, &error_fatal);
+    sysbus_mmio_map(sbd, 0, BBRAM_ADDR);
+    sysbus_connect_irq(sbd, 0, gic[BBRAM_IRQ]);
+}
+
+static void xlnx_zynqmp_create_efuse(XlnxZynqMPState *s, qemu_irq *gic)
+{
+    Object *bits = OBJECT(&s->efuse);
+    Object *ctrl = OBJECT(&s->efuse_ctrl);
+    SysBusDevice *sbd;
+
+    object_initialize_child(OBJECT(s), "efuse-ctrl", &s->efuse_ctrl,
+                            TYPE_XLNX_ZYNQMP_EFUSE);
+
+    object_initialize_child_with_props(ctrl, "xlnx-efuse@0", bits,
+                                       sizeof(s->efuse),
+                                       TYPE_XLNX_EFUSE, &error_abort,
+                                       "efuse-nr", "3",
+                                       "efuse-size", "2048",
+                                       NULL);
+
+    qdev_realize(DEVICE(bits), NULL, &error_abort);
+    object_property_set_link(ctrl, "efuse", bits, &error_abort);
+
+    sbd = SYS_BUS_DEVICE(ctrl);
+    sysbus_realize(sbd, &error_abort);
+    sysbus_mmio_map(sbd, 0, EFUSE_ADDR);
+    sysbus_connect_irq(sbd, 0, gic[EFUSE_IRQ]);
+}
+
+static void xlnx_zynqmp_create_unimp_mmio(XlnxZynqMPState *s)
+{
+    static const struct UnimpInfo {
+        const char *name;
+        hwaddr base;
+        hwaddr size;
+    } unimp_areas[ARRAY_SIZE(s->mr_unimp)] = {
+        { .name = "apu", APU_ADDR, APU_SIZE },
+    };
+    unsigned int nr;
+
+    for (nr = 0; nr < ARRAY_SIZE(unimp_areas); nr++) {
+        const struct UnimpInfo *info = &unimp_areas[nr];
+        DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE);
+        SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+        assert(info->name && info->base && info->size > 0);
+        qdev_prop_set_string(dev, "name", info->name);
+        qdev_prop_set_uint64(dev, "size", info->size);
+        object_property_add_child(OBJECT(s), info->name, OBJECT(dev));
+
+        sysbus_realize_and_unref(sbd, &error_fatal);
+        sysbus_mmio_map(sbd, 0, info->base);
+    }
+}
+
 static void xlnx_zynqmp_init(Object *obj)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
@@ -572,26 +647,6 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
         g_free(bus_name);
     }
 
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi), errp)) {
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 0, QSPI_ADDR);
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 1, LQSPI_ADDR);
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi), 0, gic_spi[QSPI_IRQ]);
-
-    for (i = 0; i < XLNX_ZYNQMP_NUM_QSPI_BUS; i++) {
-        gchar *bus_name;
-        gchar *target_bus;
-
-        /* Alias controller SPI bus to the SoC itself */
-        bus_name = g_strdup_printf("qspi%d", i);
-        target_bus = g_strdup_printf("spi%d", i);
-        object_property_add_alias(OBJECT(s), bus_name,
-                                  OBJECT(&s->qspi), target_bus);
-        g_free(bus_name);
-        g_free(target_bus);
-    }
-
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->dp), errp)) {
         return;
     }
@@ -618,11 +673,19 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
 
+    xlnx_zynqmp_create_bbram(s, gic_spi);
+    xlnx_zynqmp_create_efuse(s, gic_spi);
+    xlnx_zynqmp_create_unimp_mmio(s);
+
     for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
         if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
                                       errp)) {
             return;
         }
+        if (!object_property_set_link(OBJECT(&s->gdma[i]), "dma",
+                                      OBJECT(system_memory), errp)) {
+            return;
+        }
         if (!sysbus_realize(SYS_BUS_DEVICE(&s->gdma[i]), errp)) {
             return;
         }
@@ -633,6 +696,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
     }
 
     for (i = 0; i < XLNX_ZYNQMP_NUM_ADMA_CH; i++) {
+        if (!object_property_set_link(OBJECT(&s->adma[i]), "dma",
+                                      OBJECT(system_memory), errp)) {
+            return;
+        }
         if (!sysbus_realize(SYS_BUS_DEVICE(&s->adma[i]), errp)) {
             return;
         }
@@ -642,14 +709,36 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
                            gic_spi[adma_ch_intr[i]]);
     }
 
+    if (!object_property_set_link(OBJECT(&s->qspi_dma), "dma",
+                                  OBJECT(system_memory), errp)) {
+        return;
+    }
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi_dma), errp)) {
         return;
     }
 
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi_dma), 0, QSPI_DMA_ADDR);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi_dma), 0, gic_spi[QSPI_IRQ]);
-    object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma",
-                             OBJECT(&s->qspi_dma), errp);
+
+    if (!object_property_set_link(OBJECT(&s->qspi), "stream-connected-dma",
+                                  OBJECT(&s->qspi_dma), errp)) {
+         return;
+    }
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->qspi), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 0, QSPI_ADDR);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->qspi), 1, LQSPI_ADDR);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->qspi), 0, gic_spi[QSPI_IRQ]);
+
+    for (i = 0; i < XLNX_ZYNQMP_NUM_QSPI_BUS; i++) {
+        g_autofree gchar *bus_name = g_strdup_printf("qspi%d", i);
+        g_autofree gchar *target_bus = g_strdup_printf("spi%d", i);
+
+        /* Alias controller SPI bus to the SoC itself */
+        object_property_add_alias(OBJECT(s), bus_name,
+                                  OBJECT(&s->qspi), target_bus);
+    }
 }
 
 static Property xlnx_zynqmp_props[] = {
diff --git a/hw/arm/z2.c b/hw/arm/z2.c
index 5099bd83802..9c1e876207b 100644
--- a/hw/arm/z2.c
+++ b/hw/arm/z2.c
@@ -162,7 +162,7 @@ static void zipit_lcd_realize(SSIPeripheral *dev, Error **errp)
     z->pos = 0;
 }
 
-static VMStateDescription vmstate_zipit_lcd_state = {
+static const VMStateDescription vmstate_zipit_lcd_state = {
     .name = "zipit-lcd",
     .version_id = 2,
     .minimum_version_id = 2,
@@ -268,7 +268,7 @@ static uint8_t aer915_recv(I2CSlave *slave)
     return retval;
 }
 
-static VMStateDescription vmstate_aer915_state = {
+static const VMStateDescription vmstate_aer915_state = {
     .name = "aer915",
     .version_id = 1,
     .minimum_version_id = 1,
diff --git a/hw/audio/adlib.c b/hw/audio/adlib.c
index 42d50d2fdc4..5f979b1487d 100644
--- a/hw/audio/adlib.c
+++ b/hw/audio/adlib.c
@@ -186,7 +186,7 @@ static int write_audio (AdlibState *s, int samples)
 static void adlib_callback (void *opaque, int free)
 {
     AdlibState *s = opaque;
-    int samples, net = 0, to_play, written;
+    int samples, to_play, written;
 
     samples = free >> SHIFT;
     if (!(s->active && s->enabled) || !samples) {
@@ -219,7 +219,6 @@ static void adlib_callback (void *opaque, int free)
         written = write_audio (s, samples);
 
         if (written) {
-            net += written;
             samples -= written;
             s->pos = (s->pos + written) % s->samples;
         }
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index 4330213fff1..8ce9df64e3e 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -52,7 +52,7 @@ void hda_codec_bus_init(DeviceState *dev, HDACodecBus *bus, size_t bus_size,
                         hda_codec_response_func response,
                         hda_codec_xfer_func xfer)
 {
-    qbus_create_inplace(bus, bus_size, TYPE_HDA_BUS, dev, NULL);
+    qbus_init(bus, bus_size, TYPE_HDA_BUS, dev, NULL);
     bus->response = response;
     bus->xfer = xfer;
 }
diff --git a/hw/audio/meson.build b/hw/audio/meson.build
index 32c42bdebe4..e48a9fc73d5 100644
--- a/hw/audio/meson.build
+++ b/hw/audio/meson.build
@@ -7,7 +7,6 @@ softmmu_ss.add(when: 'CONFIG_ES1370', if_true: files('es1370.c'))
 softmmu_ss.add(when: 'CONFIG_GUS', if_true: files('gus.c', 'gusemu_hal.c', 'gusemu_mixer.c'))
 softmmu_ss.add(when: 'CONFIG_HDA', if_true: files('intel-hda.c', 'hda-codec.c'))
 softmmu_ss.add(when: 'CONFIG_MARVELL_88W8618', if_true: files('marvell_88w8618.c'))
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-ac97.c'))
 softmmu_ss.add(when: 'CONFIG_PCSPK', if_true: files('pcspk.c'))
 softmmu_ss.add(when: 'CONFIG_PL041', if_true: files('pl041.c', 'lm4549.c'))
 softmmu_ss.add(when: 'CONFIG_SB16', if_true: files('sb16.c'))
diff --git a/hw/audio/milkymist-ac97.c b/hw/audio/milkymist-ac97.c
deleted file mode 100644
index 7d2e0570380..00000000000
--- a/hw/audio/milkymist-ac97.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- *  QEMU model of the Milkymist System Controller.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/ac97.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "audio/audio.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    R_AC97_CTRL = 0,
-    R_AC97_ADDR,
-    R_AC97_DATAOUT,
-    R_AC97_DATAIN,
-    R_D_CTRL,
-    R_D_ADDR,
-    R_D_REMAINING,
-    R_RESERVED,
-    R_U_CTRL,
-    R_U_ADDR,
-    R_U_REMAINING,
-    R_MAX
-};
-
-enum {
-    AC97_CTRL_RQEN  = (1<<0),
-    AC97_CTRL_WRITE = (1<<1),
-};
-
-enum {
-    CTRL_EN = (1<<0),
-};
-
-#define TYPE_MILKYMIST_AC97 "milkymist-ac97"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistAC97State, MILKYMIST_AC97)
-
-struct MilkymistAC97State {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-
-    QEMUSoundCard card;
-    SWVoiceIn *voice_in;
-    SWVoiceOut *voice_out;
-
-    uint32_t regs[R_MAX];
-
-    qemu_irq crrequest_irq;
-    qemu_irq crreply_irq;
-    qemu_irq dmar_irq;
-    qemu_irq dmaw_irq;
-};
-
-static void update_voices(MilkymistAC97State *s)
-{
-    if (s->regs[R_D_CTRL] & CTRL_EN) {
-        AUD_set_active_out(s->voice_out, 1);
-    } else {
-        AUD_set_active_out(s->voice_out, 0);
-    }
-
-    if (s->regs[R_U_CTRL] & CTRL_EN) {
-        AUD_set_active_in(s->voice_in, 1);
-    } else {
-        AUD_set_active_in(s->voice_in, 0);
-    }
-}
-
-static uint64_t ac97_read(void *opaque, hwaddr addr,
-                          unsigned size)
-{
-    MilkymistAC97State *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_AC97_CTRL:
-    case R_AC97_ADDR:
-    case R_AC97_DATAOUT:
-    case R_AC97_DATAIN:
-    case R_D_CTRL:
-    case R_D_ADDR:
-    case R_D_REMAINING:
-    case R_U_CTRL:
-    case R_U_ADDR:
-    case R_U_REMAINING:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_ac97: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_ac97_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void ac97_write(void *opaque, hwaddr addr, uint64_t value,
-                       unsigned size)
-{
-    MilkymistAC97State *s = opaque;
-
-    trace_milkymist_ac97_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_AC97_CTRL:
-        /* always raise an IRQ according to the direction */
-        if (value & AC97_CTRL_RQEN) {
-            if (value & AC97_CTRL_WRITE) {
-                trace_milkymist_ac97_pulse_irq_crrequest();
-                qemu_irq_pulse(s->crrequest_irq);
-            } else {
-                trace_milkymist_ac97_pulse_irq_crreply();
-                qemu_irq_pulse(s->crreply_irq);
-            }
-        }
-
-        /* RQEN is self clearing */
-        s->regs[addr] = value & ~AC97_CTRL_RQEN;
-        break;
-    case R_D_CTRL:
-    case R_U_CTRL:
-        s->regs[addr] = value;
-        update_voices(s);
-        break;
-    case R_AC97_ADDR:
-    case R_AC97_DATAOUT:
-    case R_AC97_DATAIN:
-    case R_D_ADDR:
-    case R_D_REMAINING:
-    case R_U_ADDR:
-    case R_U_REMAINING:
-        s->regs[addr] = value;
-        break;
-
-    default:
-        error_report("milkymist_ac97: write access to unknown register 0x"
-                TARGET_FMT_plx, addr);
-        break;
-    }
-
-}
-
-static const MemoryRegionOps ac97_mmio_ops = {
-    .read = ac97_read,
-    .write = ac97_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void ac97_in_cb(void *opaque, int avail_b)
-{
-    MilkymistAC97State *s = opaque;
-    uint8_t buf[4096];
-    uint32_t remaining = s->regs[R_U_REMAINING];
-    int temp = MIN(remaining, avail_b);
-    uint32_t addr = s->regs[R_U_ADDR];
-    int transferred = 0;
-
-    trace_milkymist_ac97_in_cb(avail_b, remaining);
-
-    /* prevent from raising an IRQ */
-    if (temp == 0) {
-        return;
-    }
-
-    while (temp) {
-        int acquired, to_copy;
-
-        to_copy = MIN(temp, sizeof(buf));
-        acquired = AUD_read(s->voice_in, buf, to_copy);
-        if (!acquired) {
-            break;
-        }
-
-        cpu_physical_memory_write(addr, buf, acquired);
-
-        temp -= acquired;
-        addr += acquired;
-        transferred += acquired;
-    }
-
-    trace_milkymist_ac97_in_cb_transferred(transferred);
-
-    s->regs[R_U_ADDR] = addr;
-    s->regs[R_U_REMAINING] -= transferred;
-
-    if ((s->regs[R_U_CTRL] & CTRL_EN) && (s->regs[R_U_REMAINING] == 0)) {
-        trace_milkymist_ac97_pulse_irq_dmaw();
-        qemu_irq_pulse(s->dmaw_irq);
-    }
-}
-
-static void ac97_out_cb(void *opaque, int free_b)
-{
-    MilkymistAC97State *s = opaque;
-    uint8_t buf[4096];
-    uint32_t remaining = s->regs[R_D_REMAINING];
-    int temp = MIN(remaining, free_b);
-    uint32_t addr = s->regs[R_D_ADDR];
-    int transferred = 0;
-
-    trace_milkymist_ac97_out_cb(free_b, remaining);
-
-    /* prevent from raising an IRQ */
-    if (temp == 0) {
-        return;
-    }
-
-    while (temp) {
-        int copied, to_copy;
-
-        to_copy = MIN(temp, sizeof(buf));
-        cpu_physical_memory_read(addr, buf, to_copy);
-        copied = AUD_write(s->voice_out, buf, to_copy);
-        if (!copied) {
-            break;
-        }
-        temp -= copied;
-        addr += copied;
-        transferred += copied;
-    }
-
-    trace_milkymist_ac97_out_cb_transferred(transferred);
-
-    s->regs[R_D_ADDR] = addr;
-    s->regs[R_D_REMAINING] -= transferred;
-
-    if ((s->regs[R_D_CTRL] & CTRL_EN) && (s->regs[R_D_REMAINING] == 0)) {
-        trace_milkymist_ac97_pulse_irq_dmar();
-        qemu_irq_pulse(s->dmar_irq);
-    }
-}
-
-static void milkymist_ac97_reset(DeviceState *d)
-{
-    MilkymistAC97State *s = MILKYMIST_AC97(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    AUD_set_active_in(s->voice_in, 0);
-    AUD_set_active_out(s->voice_out, 0);
-}
-
-static int ac97_post_load(void *opaque, int version_id)
-{
-    MilkymistAC97State *s = opaque;
-
-    update_voices(s);
-
-    return 0;
-}
-
-static void milkymist_ac97_init(Object *obj)
-{
-    MilkymistAC97State *s = MILKYMIST_AC97(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(dev, &s->crrequest_irq);
-    sysbus_init_irq(dev, &s->crreply_irq);
-    sysbus_init_irq(dev, &s->dmar_irq);
-    sysbus_init_irq(dev, &s->dmaw_irq);
-
-    memory_region_init_io(&s->regs_region, obj, &ac97_mmio_ops, s,
-            "milkymist-ac97", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->regs_region);
-}
-
-static void milkymist_ac97_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistAC97State *s = MILKYMIST_AC97(dev);
-    struct audsettings as;
-
-    AUD_register_card("Milkymist AC'97", &s->card);
-
-    as.freq = 48000;
-    as.nchannels = 2;
-    as.fmt = AUDIO_FORMAT_S16;
-    as.endianness = 1;
-
-    s->voice_in = AUD_open_in(&s->card, s->voice_in,
-            "mm_ac97.in", s, ac97_in_cb, &as);
-    s->voice_out = AUD_open_out(&s->card, s->voice_out,
-            "mm_ac97.out", s, ac97_out_cb, &as);
-}
-
-static const VMStateDescription vmstate_milkymist_ac97 = {
-    .name = "milkymist-ac97",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .post_load = ac97_post_load,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistAC97State, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_ac97_properties[] = {
-    DEFINE_AUDIO_PROPERTIES(MilkymistAC97State, card),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_ac97_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_ac97_realize;
-    dc->reset = milkymist_ac97_reset;
-    dc->vmsd = &vmstate_milkymist_ac97;
-    device_class_set_props(dc, milkymist_ac97_properties);
-}
-
-static const TypeInfo milkymist_ac97_info = {
-    .name          = TYPE_MILKYMIST_AC97,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistAC97State),
-    .instance_init = milkymist_ac97_init,
-    .class_init    = milkymist_ac97_class_init,
-};
-
-static void milkymist_ac97_register_types(void)
-{
-    type_register_static(&milkymist_ac97_info);
-}
-
-type_init(milkymist_ac97_register_types)
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 8b207004102..60f1f75e3ac 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -115,6 +115,9 @@ struct SB16State {
     PortioList portio_list;
 };
 
+#define SAMPLE_RATE_MIN 5000
+#define SAMPLE_RATE_MAX 45000
+
 static void SB_audio_callback (void *opaque, int free);
 
 static int magic_of_irq (int irq)
@@ -226,6 +229,23 @@ static void continue_dma8 (SB16State *s)
     control (s, 1);
 }
 
+static inline int restrict_sampling_rate(int freq)
+{
+    if (freq < SAMPLE_RATE_MIN) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "sampling range too low: %d, increasing to %u\n",
+                      freq, SAMPLE_RATE_MIN);
+        return SAMPLE_RATE_MIN;
+    } else if (freq > SAMPLE_RATE_MAX) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "sampling range too high: %d, decreasing to %u\n",
+                      freq, SAMPLE_RATE_MAX);
+        return SAMPLE_RATE_MAX;
+    } else {
+        return freq;
+    }
+}
+
 static void dma_cmd8 (SB16State *s, int mask, int dma_len)
 {
     s->fmt = AUDIO_FORMAT_U8;
@@ -241,6 +261,7 @@ static void dma_cmd8 (SB16State *s, int mask, int dma_len)
         int tmp = (256 - s->time_const);
         s->freq = (1000000 + (tmp / 2)) / tmp;
     }
+    s->freq = restrict_sampling_rate(s->freq);
 
     if (dma_len != -1) {
         s->block_size = dma_len << s->fmt_stereo;
@@ -754,7 +775,7 @@ static void complete (SB16State *s)
              * and FT2 sets output freq with this (go figure).  Compare:
              * http://homepages.cae.wisc.edu/~brodskye/sb16doc/sb16doc.html#SamplingRate
              */
-            s->freq = dsp_get_hilo (s);
+            s->freq = restrict_sampling_rate(dsp_get_hilo(s));
             ldebug ("set freq %d\n", s->freq);
             break;
 
diff --git a/hw/audio/trace-events b/hw/audio/trace-events
index 60556b4a979..e0e71cd9b16 100644
--- a/hw/audio/trace-events
+++ b/hw/audio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # cs4231.c
 cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret) "read dreg %d: 0x%02x"
@@ -6,18 +6,6 @@ cs4231_mem_readl_reg(uint32_t reg, uint32_t ret) "read reg %d: 0x%08x"
 cs4231_mem_writel_reg(uint32_t reg, uint32_t old, uint32_t val) "write reg %d: 0x%08x -> 0x%08x"
 cs4231_mem_writel_dreg(uint32_t reg, uint32_t old, uint32_t val) "write dreg %d: 0x%02x -> 0x%02x"
 
-# milkymist-ac97.c
-milkymist_ac97_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_ac97_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_ac97_pulse_irq_crrequest(void) "Pulse IRQ CR request"
-milkymist_ac97_pulse_irq_crreply(void) "Pulse IRQ CR reply"
-milkymist_ac97_pulse_irq_dmaw(void) "Pulse IRQ DMA write"
-milkymist_ac97_pulse_irq_dmar(void) "Pulse IRQ DMA read"
-milkymist_ac97_in_cb(int avail, uint32_t remaining) "avail %d remaining %u"
-milkymist_ac97_in_cb_transferred(int transferred) "transferred %d"
-milkymist_ac97_out_cb(int free, uint32_t remaining) "free %d remaining %u"
-milkymist_ac97_out_cb_transferred(int transferred) "transferred %d"
-
 # hda-codec.c
 hda_audio_running(const char *stream, int nr, bool running) "st %s, nr %d, run %d"
 hda_audio_format(const char *stream, int chan, const char *fmt, int freq) "st %s, %d x %s @ %d Hz"
diff --git a/hw/avr/arduino.c b/hw/avr/arduino.c
index 3ff31492fa6..48ef478346e 100644
--- a/hw/avr/arduino.c
+++ b/hw/avr/arduino.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "hw/boards.h"
 #include "atmega.h"
 #include "boot.h"
 #include "qom/object.h"
diff --git a/hw/avr/atmega.c b/hw/avr/atmega.c
index 44c6afebbb6..0608e2d475e 100644
--- a/hw/avr/atmega.c
+++ b/hw/avr/atmega.c
@@ -18,7 +18,6 @@
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "qom/object.h"
-#include "hw/boards.h" /* FIXME memory_region_allocate_system_memory for sram */
 #include "hw/misc/unimp.h"
 #include "atmega.h"
 
@@ -402,7 +401,7 @@ static void atmega1280_class_init(ObjectClass *oc, void *data)
 {
     AtmegaMcuClass *amc = ATMEGA_MCU_CLASS(oc);
 
-    amc->cpu_type = AVR_CPU_TYPE_NAME("avr6");
+    amc->cpu_type = AVR_CPU_TYPE_NAME("avr51");
     amc->flash_size = 128 * KiB;
     amc->eeprom_size = 4 * KiB;
     amc->sram_size = 8 * KiB;
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index 4fcd1521668..9e8f28f9824 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -1,8 +1,14 @@
 config FDC
     bool
-    # FIXME: there is no separate file for the MMIO floppy disk controller, so
-    # select ISA_BUS here instead of polluting each board that requires one
-    select ISA_BUS
+
+config FDC_ISA
+    bool
+    depends on ISA_BUS
+    select FDC
+
+config FDC_SYSBUS
+    bool
+    select FDC
 
 config SSI_M25P80
     bool
@@ -25,11 +31,6 @@ config ONENAND
 config TC58128
     bool
 
-config NVME_PCI
-    bool
-    default y if PCI_DEVICES
-    depends on PCI
-
 config VIRTIO_BLK
     bool
     default y
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..d47ebf005ad 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -65,24 +65,58 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
 {
     BlockBackend *blk = conf->blk;
     BlockSizes blocksizes;
-    int backend_ret;
+    BlockDriverState *bs;
+    bool use_blocksizes;
+    bool use_bs;
+
+    switch (conf->backend_defaults) {
+    case ON_OFF_AUTO_AUTO:
+        use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
+        use_bs = false;
+        break;
+
+    case ON_OFF_AUTO_ON:
+        use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
+        bs = blk_bs(blk);
+        use_bs = bs;
+        break;
+
+    case ON_OFF_AUTO_OFF:
+        use_blocksizes = false;
+        use_bs = false;
+        break;
+
+    default:
+        abort();
+    }
 
-    backend_ret = blk_probe_blocksizes(blk, &blocksizes);
     /* fill in detected values if they are not defined via qemu command line */
     if (!conf->physical_block_size) {
-        if (!backend_ret) {
+        if (use_blocksizes) {
            conf->physical_block_size = blocksizes.phys;
         } else {
             conf->physical_block_size = BDRV_SECTOR_SIZE;
         }
     }
     if (!conf->logical_block_size) {
-        if (!backend_ret) {
+        if (use_blocksizes) {
             conf->logical_block_size = blocksizes.log;
         } else {
             conf->logical_block_size = BDRV_SECTOR_SIZE;
         }
     }
+    if (use_bs) {
+        if (!conf->opt_io_size) {
+            conf->opt_io_size = bs->bl.opt_transfer;
+        }
+        if (conf->discard_granularity == -1) {
+            if (bs->bl.pdiscard_alignment) {
+                conf->discard_granularity = bs->bl.pdiscard_alignment;
+            } else if (bs->bl.request_alignment != 1) {
+                conf->discard_granularity = bs->bl.request_alignment;
+            }
+        }
+    }
 
     if (conf->logical_block_size > conf->physical_block_size) {
         error_setg(errp,
diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events
index 843cc4e7b16..38fc3e75071 100644
--- a/hw/block/dataplane/trace-events
+++ b/hw/block/dataplane/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # virtio-blk.c
 virtio_blk_data_plane_start(void *s) "dataplane %p"
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index e9050c8987e..ee5a5352dc8 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -198,19 +198,38 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
         goto fail_guest_notifiers;
     }
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+
     /* Set up virtqueue notify */
     for (i = 0; i < nvqs; i++) {
         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true);
         if (r != 0) {
+            int j = i;
+
             fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r);
             while (i--) {
                 virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
-                virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
             }
-            goto fail_guest_notifiers;
+
+            /*
+             * The transaction expects the ioeventfds to be open when it
+             * commits. Do it now, before the cleanup loop.
+             */
+            memory_region_transaction_commit();
+
+            while (j--) {
+                virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j);
+            }
+            goto fail_host_notifiers;
         }
     }
 
+    memory_region_transaction_commit();
+
     s->starting = false;
     vblk->dataplane_started = true;
     trace_virtio_blk_data_plane_start(s);
@@ -221,7 +240,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     aio_context_release(old_context);
     if (r < 0) {
         error_report_err(local_err);
-        goto fail_guest_notifiers;
+        goto fail_aio_context;
     }
 
     /* Process queued requests before the ones in vring */
@@ -245,6 +264,20 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
     aio_context_release(s->ctx);
     return 0;
 
+  fail_aio_context:
+    memory_region_transaction_begin();
+
+    for (i = 0; i < nvqs; i++) {
+        virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+    }
+
+    memory_region_transaction_commit();
+
+    for (i = 0; i < nvqs; i++) {
+        virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
+    }
+  fail_host_notifiers:
+    k->set_guest_notifiers(qbus->parent, nvqs, false);
   fail_guest_notifiers:
     /*
      * If we failed to set up the guest notifiers queued requests will be
@@ -305,8 +338,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 
     aio_context_release(s->ctx);
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+
     for (i = 0; i < nvqs; i++) {
         virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+    }
+
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    for (i = 0; i < nvqs; i++) {
         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
     }
 
diff --git a/hw/block/ecc.c b/hw/block/ecc.c
index 1a182367ee6..6e0d63842c1 100644
--- a/hw/block/ecc.c
+++ b/hw/block/ecc.c
@@ -78,7 +78,7 @@ void ecc_reset(ECCState *s)
 }
 
 /* Save/restore */
-VMStateDescription vmstate_ecc_state = {
+const VMStateDescription vmstate_ecc_state = {
     .name = "ecc-state",
     .version_id = 0,
     .minimum_version_id = 0,
diff --git a/hw/block/fdc-internal.h b/hw/block/fdc-internal.h
new file mode 100644
index 00000000000..036392e9fc1
--- /dev/null
+++ b/hw/block/fdc-internal.h
@@ -0,0 +1,158 @@
+/*
+ * QEMU Floppy disk emulator (Intel 82078)
+ *
+ * Copyright (c) 2003, 2007 Jocelyn Mayer
+ * Copyright (c) 2008 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef HW_BLOCK_FDC_INTERNAL_H
+#define HW_BLOCK_FDC_INTERNAL_H
+
+#include "exec/memory.h"
+#include "exec/ioport.h"
+#include "hw/block/block.h"
+#include "hw/block/fdc.h"
+#include "qapi/qapi-types-block.h"
+
+typedef struct FDCtrl FDCtrl;
+
+/* Floppy bus emulation */
+
+typedef struct FloppyBus {
+    BusState bus;
+    FDCtrl *fdc;
+} FloppyBus;
+
+/* Floppy disk drive emulation */
+
+typedef enum FDriveRate {
+    FDRIVE_RATE_500K = 0x00,  /* 500 Kbps */
+    FDRIVE_RATE_300K = 0x01,  /* 300 Kbps */
+    FDRIVE_RATE_250K = 0x02,  /* 250 Kbps */
+    FDRIVE_RATE_1M   = 0x03,  /*   1 Mbps */
+} FDriveRate;
+
+typedef enum FDriveSize {
+    FDRIVE_SIZE_UNKNOWN,
+    FDRIVE_SIZE_350,
+    FDRIVE_SIZE_525,
+} FDriveSize;
+
+typedef struct FDFormat {
+    FloppyDriveType drive;
+    uint8_t last_sect;
+    uint8_t max_track;
+    uint8_t max_head;
+    FDriveRate rate;
+} FDFormat;
+
+typedef enum FDiskFlags {
+    FDISK_DBL_SIDES  = 0x01,
+} FDiskFlags;
+
+typedef struct FDrive {
+    FDCtrl *fdctrl;
+    BlockBackend *blk;
+    BlockConf *conf;
+    /* Drive status */
+    FloppyDriveType drive;    /* CMOS drive type        */
+    uint8_t perpendicular;    /* 2.88 MB access mode    */
+    /* Position */
+    uint8_t head;
+    uint8_t track;
+    uint8_t sect;
+    /* Media */
+    FloppyDriveType disk;     /* Current disk type      */
+    FDiskFlags flags;
+    uint8_t last_sect;        /* Nb sector per track    */
+    uint8_t max_track;        /* Nb of tracks           */
+    uint16_t bps;             /* Bytes per sector       */
+    uint8_t ro;               /* Is read-only           */
+    uint8_t media_changed;    /* Is media changed       */
+    uint8_t media_rate;       /* Data rate of medium    */
+
+    bool media_validated;     /* Have we validated the media? */
+} FDrive;
+
+struct FDCtrl {
+    MemoryRegion iomem;
+    qemu_irq irq;
+    /* Controller state */
+    QEMUTimer *result_timer;
+    int dma_chann;
+    uint8_t phase;
+    IsaDma *dma;
+    /* Controller's identification */
+    uint8_t version;
+    /* HW */
+    uint8_t sra;
+    uint8_t srb;
+    uint8_t dor;
+    uint8_t dor_vmstate; /* only used as temp during vmstate */
+    uint8_t tdr;
+    uint8_t dsr;
+    uint8_t msr;
+    uint8_t cur_drv;
+    uint8_t status0;
+    uint8_t status1;
+    uint8_t status2;
+    /* Command FIFO */
+    uint8_t *fifo;
+    int32_t fifo_size;
+    uint32_t data_pos;
+    uint32_t data_len;
+    uint8_t data_state;
+    uint8_t data_dir;
+    uint8_t eot; /* last wanted sector */
+    /* States kept only to be returned back */
+    /* precompensation */
+    uint8_t precomp_trk;
+    uint8_t config;
+    uint8_t lock;
+    /* Power down config (also with status regB access mode */
+    uint8_t pwrd;
+    /* Floppy drives */
+    FloppyBus bus;
+    uint8_t num_floppies;
+    FDrive drives[MAX_FD];
+    struct {
+        FloppyDriveType type;
+    } qdev_for_drives[MAX_FD];
+    int reset_sensei;
+    FloppyDriveType fallback; /* type=auto failure fallback */
+    /* Timers state */
+    uint8_t timer0;
+    uint8_t timer1;
+    PortioList portio_list;
+};
+
+extern const FDFormat fd_formats[];
+extern const VMStateDescription vmstate_fdc;
+
+uint32_t fdctrl_read(void *opaque, uint32_t reg);
+void fdctrl_write(void *opaque, uint32_t reg, uint32_t value);
+void fdctrl_reset(FDCtrl *fdctrl, int do_irq);
+void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp);
+
+int fdctrl_transfer_handler(void *opaque, int nchan, int dma_pos, int dma_len);
+
+void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds);
+
+#endif
diff --git a/hw/block/fdc-isa.c b/hw/block/fdc-isa.c
new file mode 100644
index 00000000000..3bf64e06657
--- /dev/null
+++ b/hw/block/fdc-isa.c
@@ -0,0 +1,320 @@
+/*
+ * QEMU Floppy disk emulator (Intel 82078)
+ *
+ * Copyright (c) 2003, 2007 Jocelyn Mayer
+ * Copyright (c) 2008 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+/*
+ * The controller is used in Sun4m systems in a slightly different
+ * way. There are changes in DOR register and DMA is not available.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/block/fdc.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/timer.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/irq.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "migration/vmstate.h"
+#include "hw/block/block.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "trace.h"
+#include "qom/object.h"
+#include "fdc-internal.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlISABus, ISA_FDC)
+
+struct FDCtrlISABus {
+    /*< private >*/
+    ISADevice parent_obj;
+    /*< public >*/
+
+    uint32_t iobase;
+    uint32_t irq;
+    uint32_t dma;
+    struct FDCtrl state;
+    int32_t bootindexA;
+    int32_t bootindexB;
+};
+
+static void fdctrl_external_reset_isa(DeviceState *d)
+{
+    FDCtrlISABus *isa = ISA_FDC(d);
+    FDCtrl *s = &isa->state;
+
+    fdctrl_reset(s, 0);
+}
+
+void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds)
+{
+    fdctrl_init_drives(&ISA_FDC(fdc)->state.bus, fds);
+}
+
+static const MemoryRegionPortio fdc_portio_list[] = {
+    { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write },
+    { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write },
+    PORTIO_END_OF_LIST(),
+};
+
+static void isabus_fdc_realize(DeviceState *dev, Error **errp)
+{
+    ISADevice *isadev = ISA_DEVICE(dev);
+    FDCtrlISABus *isa = ISA_FDC(dev);
+    FDCtrl *fdctrl = &isa->state;
+    Error *err = NULL;
+
+    isa_register_portio_list(isadev, &fdctrl->portio_list,
+                             isa->iobase, fdc_portio_list, fdctrl,
+                             "fdc");
+
+    isa_init_irq(isadev, &fdctrl->irq, isa->irq);
+    fdctrl->dma_chann = isa->dma;
+    if (fdctrl->dma_chann != -1) {
+        IsaDmaClass *k;
+        fdctrl->dma = isa_get_dma(isa_bus_from_device(isadev), isa->dma);
+        if (!fdctrl->dma) {
+            error_setg(errp, "ISA controller does not support DMA");
+            return;
+        }
+        k = ISADMA_GET_CLASS(fdctrl->dma);
+        k->register_channel(fdctrl->dma, fdctrl->dma_chann,
+                            &fdctrl_transfer_handler, fdctrl);
+    }
+
+    qdev_set_legacy_instance_id(dev, isa->iobase, 2);
+
+    fdctrl_realize_common(dev, fdctrl, &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+}
+
+FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
+{
+    FDCtrlISABus *isa = ISA_FDC(fdc);
+
+    return isa->state.drives[i].drive;
+}
+
+static void isa_fdc_get_drive_max_chs(FloppyDriveType type, uint8_t *maxc,
+                                      uint8_t *maxh, uint8_t *maxs)
+{
+    const FDFormat *fdf;
+
+    *maxc = *maxh = *maxs = 0;
+    for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) {
+        if (fdf->drive != type) {
+            continue;
+        }
+        if (*maxc < fdf->max_track) {
+            *maxc = fdf->max_track;
+        }
+        if (*maxh < fdf->max_head) {
+            *maxh = fdf->max_head;
+        }
+        if (*maxs < fdf->last_sect) {
+            *maxs = fdf->last_sect;
+        }
+    }
+    (*maxc)--;
+}
+
+static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
+{
+    Aml *dev, *fdi;
+    uint8_t maxc, maxh, maxs;
+
+    isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs);
+
+    dev = aml_device("FLP%c", 'A' + idx);
+
+    aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
+
+    fdi = aml_package(16);
+    aml_append(fdi, aml_int(idx));  /* Drive Number */
+    aml_append(fdi,
+        aml_int(cmos_get_fd_drive_type(type)));  /* Device Type */
+    /*
+     * the values below are the limits of the drive, and are thus independent
+     * of the inserted media
+     */
+    aml_append(fdi, aml_int(maxc));  /* Maximum Cylinder Number */
+    aml_append(fdi, aml_int(maxs));  /* Maximum Sector Number */
+    aml_append(fdi, aml_int(maxh));  /* Maximum Head Number */
+    /*
+     * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of
+     * the drive type, so shall we
+     */
+    aml_append(fdi, aml_int(0xAF));  /* disk_specify_1 */
+    aml_append(fdi, aml_int(0x02));  /* disk_specify_2 */
+    aml_append(fdi, aml_int(0x25));  /* disk_motor_wait */
+    aml_append(fdi, aml_int(0x02));  /* disk_sector_siz */
+    aml_append(fdi, aml_int(0x12));  /* disk_eot */
+    aml_append(fdi, aml_int(0x1B));  /* disk_rw_gap */
+    aml_append(fdi, aml_int(0xFF));  /* disk_dtl */
+    aml_append(fdi, aml_int(0x6C));  /* disk_formt_gap */
+    aml_append(fdi, aml_int(0xF6));  /* disk_fill */
+    aml_append(fdi, aml_int(0x0F));  /* disk_head_sttl */
+    aml_append(fdi, aml_int(0x08));  /* disk_motor_strt */
+
+    aml_append(dev, aml_name_decl("_FDI", fdi));
+    return dev;
+}
+
+int cmos_get_fd_drive_type(FloppyDriveType fd0)
+{
+    int val;
+
+    switch (fd0) {
+    case FLOPPY_DRIVE_TYPE_144:
+        /* 1.44 Mb 3"5 drive */
+        val = 4;
+        break;
+    case FLOPPY_DRIVE_TYPE_288:
+        /* 2.88 Mb 3"5 drive */
+        val = 5;
+        break;
+    case FLOPPY_DRIVE_TYPE_120:
+        /* 1.2 Mb 5"5 drive */
+        val = 2;
+        break;
+    case FLOPPY_DRIVE_TYPE_NONE:
+    default:
+        val = 0;
+        break;
+    }
+    return val;
+}
+
+static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope)
+{
+    Aml *dev;
+    Aml *crs;
+    int i;
+
+#define ACPI_FDE_MAX_FD 4
+    uint32_t fde_buf[5] = {
+        0, 0, 0, 0,     /* presence of floppy drives #0 - #3 */
+        cpu_to_le32(2)  /* tape presence (2 == never present) */
+    };
+
+    crs = aml_resource_template();
+    aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04));
+    aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01));
+    aml_append(crs, aml_irq_no_flags(6));
+    aml_append(crs,
+        aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2));
+
+    dev = aml_device("FDC0");
+    aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700")));
+    aml_append(dev, aml_name_decl("_CRS", crs));
+
+    for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) {
+        FloppyDriveType type = isa_fdc_get_drive_type(isadev, i);
+
+        if (type < FLOPPY_DRIVE_TYPE_NONE) {
+            fde_buf[i] = cpu_to_le32(1);  /* drive present */
+            aml_append(dev, build_fdinfo_aml(i, type));
+        }
+    }
+    aml_append(dev, aml_name_decl("_FDE",
+               aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf)));
+
+    aml_append(scope, dev);
+}
+
+static const VMStateDescription vmstate_isa_fdc = {
+    .name = "fdc",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property isa_fdc_properties[] = {
+    DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0),
+    DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6),
+    DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2),
+    DEFINE_PROP_SIGNED("fdtypeA", FDCtrlISABus, state.qdev_for_drives[0].type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_SIGNED("fdtypeB", FDCtrlISABus, state.qdev_for_drives[1].type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback,
+                        FLOPPY_DRIVE_TYPE_288, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void isabus_fdc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
+
+    dc->desc = "virtual floppy controller";
+    dc->realize = isabus_fdc_realize;
+    dc->fw_name = "fdc";
+    dc->reset = fdctrl_external_reset_isa;
+    dc->vmsd = &vmstate_isa_fdc;
+    isa->build_aml = fdc_isa_build_aml;
+    device_class_set_props(dc, isa_fdc_properties);
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static void isabus_fdc_instance_init(Object *obj)
+{
+    FDCtrlISABus *isa = ISA_FDC(obj);
+
+    device_add_bootindex_property(obj, &isa->bootindexA,
+                                  "bootindexA", "/floppy@0",
+                                  DEVICE(obj));
+    device_add_bootindex_property(obj, &isa->bootindexB,
+                                  "bootindexB", "/floppy@1",
+                                  DEVICE(obj));
+}
+
+static const TypeInfo isa_fdc_info = {
+    .name          = TYPE_ISA_FDC,
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(FDCtrlISABus),
+    .class_init    = isabus_fdc_class_init,
+    .instance_init = isabus_fdc_instance_init,
+};
+
+static void isa_fdc_register_types(void)
+{
+    type_register_static(&isa_fdc_info);
+}
+
+type_init(isa_fdc_register_types)
diff --git a/hw/block/fdc-sysbus.c b/hw/block/fdc-sysbus.c
new file mode 100644
index 00000000000..57fc8773f12
--- /dev/null
+++ b/hw/block/fdc-sysbus.c
@@ -0,0 +1,251 @@
+/*
+ * QEMU Floppy disk emulator (Intel 82078)
+ *
+ * Copyright (c) 2003, 2007 Jocelyn Mayer
+ * Copyright (c) 2008 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "hw/block/fdc.h"
+#include "migration/vmstate.h"
+#include "fdc-internal.h"
+#include "trace.h"
+
+#define TYPE_SYSBUS_FDC "base-sysbus-fdc"
+typedef struct FDCtrlSysBusClass FDCtrlSysBusClass;
+typedef struct FDCtrlSysBus FDCtrlSysBus;
+DECLARE_OBJ_CHECKERS(FDCtrlSysBus, FDCtrlSysBusClass,
+                     SYSBUS_FDC, TYPE_SYSBUS_FDC)
+
+struct FDCtrlSysBusClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    bool use_strict_io;
+};
+
+struct FDCtrlSysBus {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    struct FDCtrl state;
+};
+
+static uint64_t fdctrl_read_mem(void *opaque, hwaddr reg, unsigned ize)
+{
+    return fdctrl_read(opaque, (uint32_t)reg);
+}
+
+static void fdctrl_write_mem(void *opaque, hwaddr reg,
+                             uint64_t value, unsigned size)
+{
+    fdctrl_write(opaque, (uint32_t)reg, value);
+}
+
+static const MemoryRegionOps fdctrl_mem_ops = {
+    .read = fdctrl_read_mem,
+    .write = fdctrl_write_mem,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const MemoryRegionOps fdctrl_mem_strict_ops = {
+    .read = fdctrl_read_mem,
+    .write = fdctrl_write_mem,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static void fdctrl_external_reset_sysbus(DeviceState *d)
+{
+    FDCtrlSysBus *sys = SYSBUS_FDC(d);
+    FDCtrl *s = &sys->state;
+
+    fdctrl_reset(s, 0);
+}
+
+static void fdctrl_handle_tc(void *opaque, int irq, int level)
+{
+    trace_fdctrl_tc_pulse(level);
+}
+
+void fdctrl_init_sysbus(qemu_irq irq, int dma_chann,
+                        hwaddr mmio_base, DriveInfo **fds)
+{
+    FDCtrl *fdctrl;
+    DeviceState *dev;
+    SysBusDevice *sbd;
+    FDCtrlSysBus *sys;
+
+    dev = qdev_new("sysbus-fdc");
+    sys = SYSBUS_FDC(dev);
+    fdctrl = &sys->state;
+    fdctrl->dma_chann = dma_chann; /* FIXME */
+    sbd = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(sbd, &error_fatal);
+    sysbus_connect_irq(sbd, 0, irq);
+    sysbus_mmio_map(sbd, 0, mmio_base);
+
+    fdctrl_init_drives(&sys->state.bus, fds);
+}
+
+void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
+                       DriveInfo **fds, qemu_irq *fdc_tc)
+{
+    DeviceState *dev;
+    FDCtrlSysBus *sys;
+
+    dev = qdev_new("sun-fdtwo");
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sys = SYSBUS_FDC(dev);
+    sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq);
+    sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base);
+    *fdc_tc = qdev_get_gpio_in(dev, 0);
+
+    fdctrl_init_drives(&sys->state.bus, fds);
+}
+
+static void sysbus_fdc_common_instance_init(Object *obj)
+{
+    DeviceState *dev = DEVICE(obj);
+    FDCtrlSysBusClass *sbdc = SYSBUS_FDC_GET_CLASS(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    FDCtrlSysBus *sys = SYSBUS_FDC(obj);
+    FDCtrl *fdctrl = &sys->state;
+
+    qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */
+
+    memory_region_init_io(&fdctrl->iomem, obj,
+                          sbdc->use_strict_io ? &fdctrl_mem_strict_ops
+                                              : &fdctrl_mem_ops,
+                          fdctrl, "fdc", 0x08);
+    sysbus_init_mmio(sbd, &fdctrl->iomem);
+
+    sysbus_init_irq(sbd, &fdctrl->irq);
+    qdev_init_gpio_in(dev, fdctrl_handle_tc, 1);
+}
+
+static void sysbus_fdc_realize(DeviceState *dev, Error **errp)
+{
+    FDCtrlSysBus *sys = SYSBUS_FDC(dev);
+    FDCtrl *fdctrl = &sys->state;
+
+    fdctrl_realize_common(dev, fdctrl, errp);
+}
+
+static const VMStateDescription vmstate_sysbus_fdc = {
+    .name = "fdc",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = sysbus_fdc_realize;
+    dc->reset = fdctrl_external_reset_sysbus;
+    dc->vmsd = &vmstate_sysbus_fdc;
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static const TypeInfo sysbus_fdc_common_typeinfo = {
+    .name          = TYPE_SYSBUS_FDC,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(FDCtrlSysBus),
+    .instance_init = sysbus_fdc_common_instance_init,
+    .abstract      = true,
+    .class_init    = sysbus_fdc_common_class_init,
+    .class_size    = sizeof(FDCtrlSysBusClass),
+};
+
+static Property sysbus_fdc_properties[] = {
+    DEFINE_PROP_SIGNED("fdtypeA", FDCtrlSysBus, state.qdev_for_drives[0].type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
+                        FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sysbus_fdc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "virtual floppy controller";
+    device_class_set_props(dc, sysbus_fdc_properties);
+}
+
+static const TypeInfo sysbus_fdc_typeinfo = {
+    .name          = "sysbus-fdc",
+    .parent        = TYPE_SYSBUS_FDC,
+    .class_init    = sysbus_fdc_class_init,
+};
+
+static Property sun4m_fdc_properties[] = {
+    DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
+                        FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sun4m_fdc_class_init(ObjectClass *klass, void *data)
+{
+    FDCtrlSysBusClass *sbdc = SYSBUS_FDC_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    sbdc->use_strict_io = true;
+    dc->desc = "virtual floppy controller";
+    device_class_set_props(dc, sun4m_fdc_properties);
+}
+
+static const TypeInfo sun4m_fdc_typeinfo = {
+    .name          = "sun-fdtwo",
+    .parent        = TYPE_SYSBUS_FDC,
+    .class_init    = sun4m_fdc_class_init,
+};
+
+static void sysbus_fdc_register_types(void)
+{
+    type_register_static(&sysbus_fdc_common_typeinfo);
+    type_register_static(&sysbus_fdc_typeinfo);
+    type_register_static(&sun4m_fdc_typeinfo);
+}
+
+type_init(sysbus_fdc_register_types)
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index a825c2acbae..21d18ac2e36 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -32,12 +32,10 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
-#include "hw/acpi/aml-build.h"
 #include "hw/irq.h"
 #include "hw/isa/isa.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
-#include "hw/sysbus.h"
 #include "migration/vmstate.h"
 #include "hw/block/block.h"
 #include "sysemu/block-backend.h"
@@ -48,6 +46,7 @@
 #include "qemu/module.h"
 #include "trace.h"
 #include "qom/object.h"
+#include "fdc-internal.h"
 
 /********************************************************/
 /* debug Floppy devices */
@@ -62,21 +61,20 @@
     } while (0)
 
 
+/* Anonymous BlockBackend for empty drive */
+static BlockBackend *blk_create_empty_drive(void)
+{
+    return blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+}
+
 /********************************************************/
 /* qdev floppy bus                                      */
 
 #define TYPE_FLOPPY_BUS "floppy-bus"
 OBJECT_DECLARE_SIMPLE_TYPE(FloppyBus, FLOPPY_BUS)
 
-typedef struct FDCtrl FDCtrl;
-typedef struct FDrive FDrive;
 static FDrive *get_drv(FDCtrl *fdctrl, int unit);
 
-struct FloppyBus {
-    BusState bus;
-    FDCtrl *fdc;
-};
-
 static const TypeInfo floppy_bus_info = {
     .name = TYPE_FLOPPY_BUS,
     .parent = TYPE_BUS,
@@ -85,7 +83,7 @@ static const TypeInfo floppy_bus_info = {
 
 static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev)
 {
-    qbus_create_inplace(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL);
+    qbus_init(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL);
     bus->fdc = fdc;
 }
 
@@ -93,32 +91,11 @@ static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev)
 /********************************************************/
 /* Floppy drive emulation                               */
 
-typedef enum FDriveRate {
-    FDRIVE_RATE_500K = 0x00,  /* 500 Kbps */
-    FDRIVE_RATE_300K = 0x01,  /* 300 Kbps */
-    FDRIVE_RATE_250K = 0x02,  /* 250 Kbps */
-    FDRIVE_RATE_1M   = 0x03,  /*   1 Mbps */
-} FDriveRate;
-
-typedef enum FDriveSize {
-    FDRIVE_SIZE_UNKNOWN,
-    FDRIVE_SIZE_350,
-    FDRIVE_SIZE_525,
-} FDriveSize;
-
-typedef struct FDFormat {
-    FloppyDriveType drive;
-    uint8_t last_sect;
-    uint8_t max_track;
-    uint8_t max_head;
-    FDriveRate rate;
-} FDFormat;
-
 /* In many cases, the total sector size of a format is enough to uniquely
  * identify it. However, there are some total sector collisions between
  * formats of different physical size, and these are noted below by
  * highlighting the total sector size for entries with collisions. */
-static const FDFormat fd_formats[] = {
+const FDFormat fd_formats[] = {
     /* First entry is default format */
     /* 1.44 MB 3"1/2 floppy disks */
     { FLOPPY_DRIVE_TYPE_144, 18, 80, 1, FDRIVE_RATE_500K, }, /* 3.5" 2880 */
@@ -186,35 +163,6 @@ static FDriveSize drive_size(FloppyDriveType drive)
 #define FD_SECTOR_SC           2   /* Sector size code */
 #define FD_RESET_SENSEI_COUNT  4   /* Number of sense interrupts on RESET */
 
-/* Floppy disk drive emulation */
-typedef enum FDiskFlags {
-    FDISK_DBL_SIDES  = 0x01,
-} FDiskFlags;
-
-struct FDrive {
-    FDCtrl *fdctrl;
-    BlockBackend *blk;
-    BlockConf *conf;
-    /* Drive status */
-    FloppyDriveType drive;    /* CMOS drive type        */
-    uint8_t perpendicular;    /* 2.88 MB access mode    */
-    /* Position */
-    uint8_t head;
-    uint8_t track;
-    uint8_t sect;
-    /* Media */
-    FloppyDriveType disk;     /* Current disk type      */
-    FDiskFlags flags;
-    uint8_t last_sect;        /* Nb sector per track    */
-    uint8_t max_track;        /* Nb of tracks           */
-    uint16_t bps;             /* Bytes per sector       */
-    uint8_t ro;               /* Is read-only           */
-    uint8_t media_changed;    /* Is media changed       */
-    uint8_t media_rate;       /* Data rate of medium    */
-
-    bool media_validated;     /* Have we validated the media? */
-};
-
 
 static FloppyDriveType get_fallback_drive_type(FDrive *drv);
 
@@ -544,8 +492,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
     }
 
     if (!dev->conf.blk) {
-        /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
+        dev->conf.blk = blk_create_empty_drive();
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
 
@@ -626,10 +573,7 @@ static const TypeInfo floppy_drive_info = {
 /********************************************************/
 /* Intel 82078 floppy disk controller emulation          */
 
-static void fdctrl_reset(FDCtrl *fdctrl, int do_irq);
 static void fdctrl_to_command_phase(FDCtrl *fdctrl);
-static int fdctrl_transfer_handler (void *opaque, int nchan,
-                                    int dma_pos, int dma_len);
 static void fdctrl_raise_irq(FDCtrl *fdctrl);
 static FDrive *get_cur_drv(FDCtrl *fdctrl);
 
@@ -828,88 +772,12 @@ enum {
 #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
 #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
 
-struct FDCtrl {
-    MemoryRegion iomem;
-    qemu_irq irq;
-    /* Controller state */
-    QEMUTimer *result_timer;
-    int dma_chann;
-    uint8_t phase;
-    IsaDma *dma;
-    /* Controller's identification */
-    uint8_t version;
-    /* HW */
-    uint8_t sra;
-    uint8_t srb;
-    uint8_t dor;
-    uint8_t dor_vmstate; /* only used as temp during vmstate */
-    uint8_t tdr;
-    uint8_t dsr;
-    uint8_t msr;
-    uint8_t cur_drv;
-    uint8_t status0;
-    uint8_t status1;
-    uint8_t status2;
-    /* Command FIFO */
-    uint8_t *fifo;
-    int32_t fifo_size;
-    uint32_t data_pos;
-    uint32_t data_len;
-    uint8_t data_state;
-    uint8_t data_dir;
-    uint8_t eot; /* last wanted sector */
-    /* States kept only to be returned back */
-    /* precompensation */
-    uint8_t precomp_trk;
-    uint8_t config;
-    uint8_t lock;
-    /* Power down config (also with status regB access mode */
-    uint8_t pwrd;
-    /* Floppy drives */
-    FloppyBus bus;
-    uint8_t num_floppies;
-    FDrive drives[MAX_FD];
-    struct {
-        FloppyDriveType type;
-    } qdev_for_drives[MAX_FD];
-    int reset_sensei;
-    FloppyDriveType fallback; /* type=auto failure fallback */
-    /* Timers state */
-    uint8_t timer0;
-    uint8_t timer1;
-    PortioList portio_list;
-};
-
 static FloppyDriveType get_fallback_drive_type(FDrive *drv)
 {
     return drv->fdctrl->fallback;
 }
 
-#define TYPE_SYSBUS_FDC "base-sysbus-fdc"
-OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlSysBus, SYSBUS_FDC)
-
-struct FDCtrlSysBus {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
-
-    struct FDCtrl state;
-};
-
-OBJECT_DECLARE_SIMPLE_TYPE(FDCtrlISABus, ISA_FDC)
-
-struct FDCtrlISABus {
-    ISADevice parent_obj;
-
-    uint32_t iobase;
-    uint32_t irq;
-    uint32_t dma;
-    struct FDCtrl state;
-    int32_t bootindexA;
-    int32_t bootindexB;
-};
-
-static uint32_t fdctrl_read (void *opaque, uint32_t reg)
+uint32_t fdctrl_read(void *opaque, uint32_t reg)
 {
     FDCtrl *fdctrl = opaque;
     uint32_t retval;
@@ -946,7 +814,7 @@ static uint32_t fdctrl_read (void *opaque, uint32_t reg)
     return retval;
 }
 
-static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value)
+void fdctrl_write(void *opaque, uint32_t reg, uint32_t value)
 {
     FDCtrl *fdctrl = opaque;
 
@@ -973,34 +841,6 @@ static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value)
     }
 }
 
-static uint64_t fdctrl_read_mem (void *opaque, hwaddr reg,
-                                 unsigned ize)
-{
-    return fdctrl_read(opaque, (uint32_t)reg);
-}
-
-static void fdctrl_write_mem (void *opaque, hwaddr reg,
-                              uint64_t value, unsigned size)
-{
-    fdctrl_write(opaque, (uint32_t)reg, value);
-}
-
-static const MemoryRegionOps fdctrl_mem_ops = {
-    .read = fdctrl_read_mem,
-    .write = fdctrl_write_mem,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static const MemoryRegionOps fdctrl_mem_strict_ops = {
-    .read = fdctrl_read_mem,
-    .write = fdctrl_write_mem,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 1,
-        .max_access_size = 1,
-    },
-};
-
 static bool fdrive_media_changed_needed(void *opaque)
 {
     FDrive *drive = opaque;
@@ -1178,7 +1018,7 @@ static const VMStateDescription vmstate_fdc_phase = {
     }
 };
 
-static const VMStateDescription vmstate_fdc = {
+const VMStateDescription vmstate_fdc = {
     .name = "fdc",
     .version_id = 2,
     .minimum_version_id = 2,
@@ -1224,32 +1064,6 @@ static const VMStateDescription vmstate_fdc = {
     }
 };
 
-static void fdctrl_external_reset_sysbus(DeviceState *d)
-{
-    FDCtrlSysBus *sys = SYSBUS_FDC(d);
-    FDCtrl *s = &sys->state;
-
-    fdctrl_reset(s, 0);
-}
-
-static void fdctrl_external_reset_isa(DeviceState *d)
-{
-    FDCtrlISABus *isa = ISA_FDC(d);
-    FDCtrl *s = &isa->state;
-
-    fdctrl_reset(s, 0);
-}
-
-static void fdctrl_handle_tc(void *opaque, int irq, int level)
-{
-    //FDCtrl *s = opaque;
-
-    if (level) {
-        // XXX
-        FLOPPY_DPRINTF("TC pulsed\n");
-    }
-}
-
 /* Change IRQ state */
 static void fdctrl_reset_irq(FDCtrl *fdctrl)
 {
@@ -1273,7 +1087,7 @@ static void fdctrl_raise_irq(FDCtrl *fdctrl)
 }
 
 /* Reset controller */
-static void fdctrl_reset(FDCtrl *fdctrl, int do_irq)
+void fdctrl_reset(FDCtrl *fdctrl, int do_irq)
 {
     int i;
 
@@ -1352,7 +1166,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit)
 
 static FDrive *get_cur_drv(FDCtrl *fdctrl)
 {
-    return get_drv(fdctrl, fdctrl->cur_drv);
+    FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv);
+
+    if (!cur_drv->blk) {
+        /*
+         * Kludge: empty drive line selected. Create an anonymous
+         * BlockBackend to avoid NULL deref with various BlockBackend
+         * API calls within this model (CVE-2021-20196).
+         * Due to the controller QOM model limitations, we don't
+         * attach the created to the controller device.
+         */
+        cur_drv->blk = blk_create_empty_drive();
+    }
+    return cur_drv;
 }
 
 /* Status A register : 0x00 (read-only) */
@@ -1752,8 +1578,7 @@ static void fdctrl_start_transfer_del(FDCtrl *fdctrl, int direction)
 }
 
 /* handlers for DMA transfers */
-static int fdctrl_transfer_handler (void *opaque, int nchan,
-                                    int dma_pos, int dma_len)
+int fdctrl_transfer_handler(void *opaque, int nchan, int dma_pos, int dma_len)
 {
     FDCtrl *fdctrl;
     FDrive *cur_drv;
@@ -2489,7 +2314,7 @@ static void fdctrl_result_timer(void *opaque)
 
 /* Init functions */
 
-static void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds)
+void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds)
 {
     DeviceState *dev;
     int i;
@@ -2506,49 +2331,7 @@ static void fdctrl_init_drives(FloppyBus *bus, DriveInfo **fds)
     }
 }
 
-void isa_fdc_init_drives(ISADevice *fdc, DriveInfo **fds)
-{
-    fdctrl_init_drives(&ISA_FDC(fdc)->state.bus, fds);
-}
-
-void fdctrl_init_sysbus(qemu_irq irq, int dma_chann,
-                        hwaddr mmio_base, DriveInfo **fds)
-{
-    FDCtrl *fdctrl;
-    DeviceState *dev;
-    SysBusDevice *sbd;
-    FDCtrlSysBus *sys;
-
-    dev = qdev_new("sysbus-fdc");
-    sys = SYSBUS_FDC(dev);
-    fdctrl = &sys->state;
-    fdctrl->dma_chann = dma_chann; /* FIXME */
-    sbd = SYS_BUS_DEVICE(dev);
-    sysbus_realize_and_unref(sbd, &error_fatal);
-    sysbus_connect_irq(sbd, 0, irq);
-    sysbus_mmio_map(sbd, 0, mmio_base);
-
-    fdctrl_init_drives(&sys->state.bus, fds);
-}
-
-void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
-                       DriveInfo **fds, qemu_irq *fdc_tc)
-{
-    DeviceState *dev;
-    FDCtrlSysBus *sys;
-
-    dev = qdev_new("sun-fdtwo");
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sys = SYSBUS_FDC(dev);
-    sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq);
-    sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base);
-    *fdc_tc = qdev_get_gpio_in(dev, 0);
-
-    fdctrl_init_drives(&sys->state.bus, fds);
-}
-
-static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
-                                  Error **errp)
+void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp)
 {
     int i, j;
     FDrive *drive;
@@ -2582,14 +2365,6 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
     fdctrl->config = FD_CONFIG_EIS | FD_CONFIG_EFIFO; /* Implicit seek, polling & FIFO enabled */
     fdctrl->num_floppies = MAX_FD;
 
-    if (fdctrl->dma_chann != -1) {
-        IsaDmaClass *k;
-        assert(fdctrl->dma);
-        k = ISADMA_GET_CLASS(fdctrl->dma);
-        k->register_channel(fdctrl->dma, fdctrl->dma_chann,
-                            &fdctrl_transfer_handler, fdctrl);
-    }
-
     floppy_bus_create(fdctrl, &fdctrl->bus, dev);
 
     for (i = 0; i < MAX_FD; i++) {
@@ -2600,369 +2375,8 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
     }
 }
 
-static const MemoryRegionPortio fdc_portio_list[] = {
-    { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write },
-    { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write },
-    PORTIO_END_OF_LIST(),
-};
-
-static void isabus_fdc_realize(DeviceState *dev, Error **errp)
-{
-    ISADevice *isadev = ISA_DEVICE(dev);
-    FDCtrlISABus *isa = ISA_FDC(dev);
-    FDCtrl *fdctrl = &isa->state;
-    Error *err = NULL;
-
-    isa_register_portio_list(isadev, &fdctrl->portio_list,
-                             isa->iobase, fdc_portio_list, fdctrl,
-                             "fdc");
-
-    isa_init_irq(isadev, &fdctrl->irq, isa->irq);
-    fdctrl->dma_chann = isa->dma;
-    if (fdctrl->dma_chann != -1) {
-        fdctrl->dma = isa_get_dma(isa_bus_from_device(isadev), isa->dma);
-        if (!fdctrl->dma) {
-            error_setg(errp, "ISA controller does not support DMA");
-            return;
-        }
-    }
-
-    qdev_set_legacy_instance_id(dev, isa->iobase, 2);
-    fdctrl_realize_common(dev, fdctrl, &err);
-    if (err != NULL) {
-        error_propagate(errp, err);
-        return;
-    }
-}
-
-static void sysbus_fdc_initfn(Object *obj)
-{
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-    FDCtrlSysBus *sys = SYSBUS_FDC(obj);
-    FDCtrl *fdctrl = &sys->state;
-
-    fdctrl->dma_chann = -1;
-
-    memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_ops, fdctrl,
-                          "fdc", 0x08);
-    sysbus_init_mmio(sbd, &fdctrl->iomem);
-}
-
-static void sun4m_fdc_initfn(Object *obj)
-{
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-    FDCtrlSysBus *sys = SYSBUS_FDC(obj);
-    FDCtrl *fdctrl = &sys->state;
-
-    fdctrl->dma_chann = -1;
-
-    memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_strict_ops,
-                          fdctrl, "fdctrl", 0x08);
-    sysbus_init_mmio(sbd, &fdctrl->iomem);
-}
-
-static void sysbus_fdc_common_initfn(Object *obj)
-{
-    DeviceState *dev = DEVICE(obj);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-    FDCtrlSysBus *sys = SYSBUS_FDC(obj);
-    FDCtrl *fdctrl = &sys->state;
-
-    qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */
-
-    sysbus_init_irq(sbd, &fdctrl->irq);
-    qdev_init_gpio_in(dev, fdctrl_handle_tc, 1);
-}
-
-static void sysbus_fdc_common_realize(DeviceState *dev, Error **errp)
-{
-    FDCtrlSysBus *sys = SYSBUS_FDC(dev);
-    FDCtrl *fdctrl = &sys->state;
-
-    fdctrl_realize_common(dev, fdctrl, errp);
-}
-
-FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
-{
-    FDCtrlISABus *isa = ISA_FDC(fdc);
-
-    return isa->state.drives[i].drive;
-}
-
-static void isa_fdc_get_drive_max_chs(FloppyDriveType type, uint8_t *maxc,
-                                      uint8_t *maxh, uint8_t *maxs)
-{
-    const FDFormat *fdf;
-
-    *maxc = *maxh = *maxs = 0;
-    for (fdf = fd_formats; fdf->drive != FLOPPY_DRIVE_TYPE_NONE; fdf++) {
-        if (fdf->drive != type) {
-            continue;
-        }
-        if (*maxc < fdf->max_track) {
-            *maxc = fdf->max_track;
-        }
-        if (*maxh < fdf->max_head) {
-            *maxh = fdf->max_head;
-        }
-        if (*maxs < fdf->last_sect) {
-            *maxs = fdf->last_sect;
-        }
-    }
-    (*maxc)--;
-}
-
-static Aml *build_fdinfo_aml(int idx, FloppyDriveType type)
-{
-    Aml *dev, *fdi;
-    uint8_t maxc, maxh, maxs;
-
-    isa_fdc_get_drive_max_chs(type, &maxc, &maxh, &maxs);
-
-    dev = aml_device("FLP%c", 'A' + idx);
-
-    aml_append(dev, aml_name_decl("_ADR", aml_int(idx)));
-
-    fdi = aml_package(16);
-    aml_append(fdi, aml_int(idx));  /* Drive Number */
-    aml_append(fdi,
-        aml_int(cmos_get_fd_drive_type(type)));  /* Device Type */
-    /*
-     * the values below are the limits of the drive, and are thus independent
-     * of the inserted media
-     */
-    aml_append(fdi, aml_int(maxc));  /* Maximum Cylinder Number */
-    aml_append(fdi, aml_int(maxs));  /* Maximum Sector Number */
-    aml_append(fdi, aml_int(maxh));  /* Maximum Head Number */
-    /*
-     * SeaBIOS returns the below values for int 0x13 func 0x08 regardless of
-     * the drive type, so shall we
-     */
-    aml_append(fdi, aml_int(0xAF));  /* disk_specify_1 */
-    aml_append(fdi, aml_int(0x02));  /* disk_specify_2 */
-    aml_append(fdi, aml_int(0x25));  /* disk_motor_wait */
-    aml_append(fdi, aml_int(0x02));  /* disk_sector_siz */
-    aml_append(fdi, aml_int(0x12));  /* disk_eot */
-    aml_append(fdi, aml_int(0x1B));  /* disk_rw_gap */
-    aml_append(fdi, aml_int(0xFF));  /* disk_dtl */
-    aml_append(fdi, aml_int(0x6C));  /* disk_formt_gap */
-    aml_append(fdi, aml_int(0xF6));  /* disk_fill */
-    aml_append(fdi, aml_int(0x0F));  /* disk_head_sttl */
-    aml_append(fdi, aml_int(0x08));  /* disk_motor_strt */
-
-    aml_append(dev, aml_name_decl("_FDI", fdi));
-    return dev;
-}
-
-int cmos_get_fd_drive_type(FloppyDriveType fd0)
-{
-    int val;
-
-    switch (fd0) {
-    case FLOPPY_DRIVE_TYPE_144:
-        /* 1.44 Mb 3"5 drive */
-        val = 4;
-        break;
-    case FLOPPY_DRIVE_TYPE_288:
-        /* 2.88 Mb 3"5 drive */
-        val = 5;
-        break;
-    case FLOPPY_DRIVE_TYPE_120:
-        /* 1.2 Mb 5"5 drive */
-        val = 2;
-        break;
-    case FLOPPY_DRIVE_TYPE_NONE:
-    default:
-        val = 0;
-        break;
-    }
-    return val;
-}
-
-static void fdc_isa_build_aml(ISADevice *isadev, Aml *scope)
-{
-    Aml *dev;
-    Aml *crs;
-    int i;
-
-#define ACPI_FDE_MAX_FD 4
-    uint32_t fde_buf[5] = {
-        0, 0, 0, 0,     /* presence of floppy drives #0 - #3 */
-        cpu_to_le32(2)  /* tape presence (2 == never present) */
-    };
-
-    crs = aml_resource_template();
-    aml_append(crs, aml_io(AML_DECODE16, 0x03F2, 0x03F2, 0x00, 0x04));
-    aml_append(crs, aml_io(AML_DECODE16, 0x03F7, 0x03F7, 0x00, 0x01));
-    aml_append(crs, aml_irq_no_flags(6));
-    aml_append(crs,
-        aml_dma(AML_COMPATIBILITY, AML_NOTBUSMASTER, AML_TRANSFER8, 2));
-
-    dev = aml_device("FDC0");
-    aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0700")));
-    aml_append(dev, aml_name_decl("_CRS", crs));
-
-    for (i = 0; i < MIN(MAX_FD, ACPI_FDE_MAX_FD); i++) {
-        FloppyDriveType type = isa_fdc_get_drive_type(isadev, i);
-
-        if (type < FLOPPY_DRIVE_TYPE_NONE) {
-            fde_buf[i] = cpu_to_le32(1);  /* drive present */
-            aml_append(dev, build_fdinfo_aml(i, type));
-        }
-    }
-    aml_append(dev, aml_name_decl("_FDE",
-               aml_buffer(sizeof(fde_buf), (uint8_t *)fde_buf)));
-
-    aml_append(scope, dev);
-}
-
-static const VMStateDescription vmstate_isa_fdc ={
-    .name = "fdc",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .fields = (VMStateField[]) {
-        VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property isa_fdc_properties[] = {
-    DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0),
-    DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6),
-    DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2),
-    DEFINE_PROP_SIGNED("fdtypeA", FDCtrlISABus, state.qdev_for_drives[0].type,
-                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_SIGNED("fdtypeB", FDCtrlISABus, state.qdev_for_drives[1].type,
-                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_SIGNED("fallback", FDCtrlISABus, state.fallback,
-                        FLOPPY_DRIVE_TYPE_288, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void isabus_fdc_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
-
-    dc->realize = isabus_fdc_realize;
-    dc->fw_name = "fdc";
-    dc->reset = fdctrl_external_reset_isa;
-    dc->vmsd = &vmstate_isa_fdc;
-    isa->build_aml = fdc_isa_build_aml;
-    device_class_set_props(dc, isa_fdc_properties);
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static void isabus_fdc_instance_init(Object *obj)
-{
-    FDCtrlISABus *isa = ISA_FDC(obj);
-
-    device_add_bootindex_property(obj, &isa->bootindexA,
-                                  "bootindexA", "/floppy@0",
-                                  DEVICE(obj));
-    device_add_bootindex_property(obj, &isa->bootindexB,
-                                  "bootindexB", "/floppy@1",
-                                  DEVICE(obj));
-}
-
-static const TypeInfo isa_fdc_info = {
-    .name          = TYPE_ISA_FDC,
-    .parent        = TYPE_ISA_DEVICE,
-    .instance_size = sizeof(FDCtrlISABus),
-    .class_init    = isabus_fdc_class_init,
-    .instance_init = isabus_fdc_instance_init,
-};
-
-static const VMStateDescription vmstate_sysbus_fdc ={
-    .name = "fdc",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .fields = (VMStateField[]) {
-        VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property sysbus_fdc_properties[] = {
-    DEFINE_PROP_SIGNED("fdtypeA", FDCtrlSysBus, state.qdev_for_drives[0].type,
-                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_SIGNED("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type,
-                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
-                        FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void sysbus_fdc_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    device_class_set_props(dc, sysbus_fdc_properties);
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static const TypeInfo sysbus_fdc_info = {
-    .name          = "sysbus-fdc",
-    .parent        = TYPE_SYSBUS_FDC,
-    .instance_init = sysbus_fdc_initfn,
-    .class_init    = sysbus_fdc_class_init,
-};
-
-static Property sun4m_fdc_properties[] = {
-    DEFINE_PROP_SIGNED("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type,
-                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_SIGNED("fallback", FDCtrlSysBus, state.fallback,
-                        FLOPPY_DRIVE_TYPE_144, qdev_prop_fdc_drive_type,
-                        FloppyDriveType),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void sun4m_fdc_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    device_class_set_props(dc, sun4m_fdc_properties);
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static const TypeInfo sun4m_fdc_info = {
-    .name          = "sun-fdtwo",
-    .parent        = TYPE_SYSBUS_FDC,
-    .instance_init = sun4m_fdc_initfn,
-    .class_init    = sun4m_fdc_class_init,
-};
-
-static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = sysbus_fdc_common_realize;
-    dc->reset = fdctrl_external_reset_sysbus;
-    dc->vmsd = &vmstate_sysbus_fdc;
-}
-
-static const TypeInfo sysbus_fdc_type_info = {
-    .name          = TYPE_SYSBUS_FDC,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(FDCtrlSysBus),
-    .instance_init = sysbus_fdc_common_initfn,
-    .abstract      = true,
-    .class_init    = sysbus_fdc_common_class_init,
-};
-
 static void fdc_register_types(void)
 {
-    type_register_static(&isa_fdc_info);
-    type_register_static(&sysbus_fdc_type_info);
-    type_register_static(&sysbus_fdc_info);
-    type_register_static(&sun4m_fdc_info);
     type_register_static(&floppy_bus_info);
     type_register_static(&floppy_drive_info);
 }
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 183d3f44c25..b77503dc845 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -259,6 +259,8 @@ static const FlashPartInfo known_devices[] = {
     { INFO_STACKED("n25q00a",   0x20bb21, 0x1000, 64 << 10, 2048, ER_4K, 4) },
     { INFO_STACKED("mt25ql01g", 0x20ba21, 0x1040, 64 << 10, 2048, ER_4K, 2) },
     { INFO_STACKED("mt25qu01g", 0x20bb21, 0x1040, 64 << 10, 2048, ER_4K, 2) },
+    { INFO_STACKED("mt25ql02g", 0x20ba22, 0x1040, 64 << 10, 4096, ER_4K | ER_32K, 2) },
+    { INFO_STACKED("mt25qu02g", 0x20bb22, 0x1040, 64 << 10, 4096, ER_4K | ER_32K, 2) },
 
     /* Spansion -- single (large) sector size only, at least
      * for the chips listed here (without boot sectors).
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 5b4a7699f98..2389326112a 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -5,6 +5,8 @@ softmmu_ss.add(files(
 ))
 softmmu_ss.add(when: 'CONFIG_ECC', if_true: files('ecc.c'))
 softmmu_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c'))
+softmmu_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c'))
+softmmu_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c'))
 softmmu_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c'))
 softmmu_ss.add(when: 'CONFIG_ONENAND', if_true: files('onenand.c'))
 softmmu_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c'))
@@ -13,7 +15,6 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
-softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
 specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c'))
diff --git a/hw/block/nvme-dif.c b/hw/block/nvme-dif.c
deleted file mode 100644
index 81b0a4cb138..00000000000
--- a/hw/block/nvme-dif.c
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * QEMU NVM Express End-to-End Data Protection support
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *   Klaus Jensen           <k.jensen@samsung.com>
- *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
- */
-
-#include "qemu/osdep.h"
-#include "hw/block/block.h"
-#include "sysemu/dma.h"
-#include "sysemu/block-backend.h"
-#include "qapi/error.h"
-#include "trace.h"
-#include "nvme.h"
-#include "nvme-dif.h"
-
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
-                           uint32_t reftag)
-{
-    if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
-        (ctrl & NVME_RW_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
-        return NVME_INVALID_PROT_INFO | NVME_DNR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
-                           size_t len)
-{
-    unsigned int i;
-
-    for (i = 0; i < len; i++) {
-        crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-    }
-
-    return crc;
-}
-
-void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
-                                 uint32_t reftag)
-{
-    uint8_t *end = buf + len;
-    size_t lsize = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
-    int16_t pil = 0;
-
-    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
-    }
-
-    trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag,
-                                          reftag);
-
-    for (; buf < end; buf += lsize, mbuf += msize) {
-        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
-        uint16_t crc = crc_t10dif(0x0, buf, lsize);
-
-        if (pil) {
-            crc = crc_t10dif(crc, mbuf, pil);
-        }
-
-        dif->guard = cpu_to_be16(crc);
-        dif->apptag = cpu_to_be16(apptag);
-        dif->reftag = cpu_to_be32(reftag);
-
-        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
-            reftag++;
-        }
-    }
-}
-
-static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
-                               uint8_t *buf, uint8_t *mbuf, size_t pil,
-                               uint16_t ctrl, uint16_t apptag,
-                               uint16_t appmask, uint32_t reftag)
-{
-    switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-    case NVME_ID_NS_DPS_TYPE_3:
-        if (be32_to_cpu(dif->reftag) != 0xffffffff) {
-            break;
-        }
-
-        /* fallthrough */
-    case NVME_ID_NS_DPS_TYPE_1:
-    case NVME_ID_NS_DPS_TYPE_2:
-        if (be16_to_cpu(dif->apptag) != 0xffff) {
-            break;
-        }
-
-        trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
-                                          be32_to_cpu(dif->reftag));
-
-        return NVME_SUCCESS;
-    }
-
-    if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) {
-        uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns));
-
-        if (pil) {
-            crc = crc_t10dif(crc, mbuf, pil);
-        }
-
-        trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
-
-        if (be16_to_cpu(dif->guard) != crc) {
-            return NVME_E2E_GUARD_ERROR;
-        }
-    }
-
-    if (ctrl & NVME_RW_PRINFO_PRCHK_APP) {
-        trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
-                                        appmask);
-
-        if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
-            return NVME_E2E_APP_ERROR;
-        }
-    }
-
-    if (ctrl & NVME_RW_PRINFO_PRCHK_REF) {
-        trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
-
-        if (be32_to_cpu(dif->reftag) != reftag) {
-            return NVME_E2E_REF_ERROR;
-        }
-    }
-
-    return NVME_SUCCESS;
-}
-
-uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                        uint8_t *mbuf, size_t mlen, uint16_t ctrl,
-                        uint64_t slba, uint16_t apptag,
-                        uint16_t appmask, uint32_t reftag)
-{
-    uint8_t *end = buf + len;
-    size_t lsize = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
-    int16_t pil = 0;
-    uint16_t status;
-
-    status = nvme_check_prinfo(ns, ctrl, slba, reftag);
-    if (status) {
-        return status;
-    }
-
-    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
-    }
-
-    trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil);
-
-    for (; buf < end; buf += lsize, mbuf += msize) {
-        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
-
-        status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag,
-                                appmask, reftag);
-        if (status) {
-            return status;
-        }
-
-        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
-            reftag++;
-        }
-    }
-
-    return NVME_SUCCESS;
-}
-
-uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
-                               uint64_t slba)
-{
-    BlockBackend *blk = ns->blkconf.blk;
-    BlockDriverState *bs = blk_bs(blk);
-
-    size_t msize = nvme_msize(ns);
-    size_t lsize = nvme_lsize(ns);
-    int64_t moffset = 0, offset = nvme_l2b(ns, slba);
-    uint8_t *mbufp, *end;
-    bool zeroed;
-    int16_t pil = 0;
-    int64_t bytes = (mlen / msize) * lsize;
-    int64_t pnum = 0;
-
-    Error *err = NULL;
-
-
-    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-        pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
-    }
-
-    do {
-        int ret;
-
-        bytes -= pnum;
-
-        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
-        if (ret < 0) {
-            error_setg_errno(&err, -ret, "unable to get block status");
-            error_report_err(err);
-
-            return NVME_INTERNAL_DEV_ERROR;
-        }
-
-        zeroed = !!(ret & BDRV_BLOCK_ZERO);
-
-        trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
-
-        if (zeroed) {
-            mbufp = mbuf + moffset;
-            mlen = (pnum / lsize) * msize;
-            end = mbufp + mlen;
-
-            for (; mbufp < end; mbufp += msize) {
-                memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
-            }
-        }
-
-        moffset += (pnum / lsize) * msize;
-        offset += pnum;
-    } while (pnum != bytes);
-
-    return NVME_SUCCESS;
-}
-
-static void nvme_dif_rw_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-
-    trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
-
-    qemu_iovec_destroy(&ctx->data.iov);
-    g_free(ctx->data.bounce);
-
-    qemu_iovec_destroy(&ctx->mdata.iov);
-    g_free(ctx->mdata.bounce);
-
-    g_free(ctx);
-
-    nvme_rw_complete_cb(req, ret);
-}
-
-static void nvme_dif_rw_check_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeCtrl *n = nvme_ctrl(req);
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint16_t apptag = le16_to_cpu(rw->apptag);
-    uint16_t appmask = le16_to_cpu(rw->appmask);
-    uint32_t reftag = le32_to_cpu(rw->reftag);
-    uint16_t status;
-
-    trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
-                                   appmask, reftag);
-
-    if (ret) {
-        goto out;
-    }
-
-    status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
-                                   slba);
-    if (status) {
-        req->status = status;
-        goto out;
-    }
-
-    status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
-                            ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
-                            slba, apptag, appmask, reftag);
-    if (status) {
-        req->status = status;
-        goto out;
-    }
-
-    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
-                              NVME_TX_DIRECTION_FROM_DEVICE, req);
-    if (status) {
-        req->status = status;
-        goto out;
-    }
-
-    if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) {
-        goto out;
-    }
-
-    status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
-                               NVME_TX_DIRECTION_FROM_DEVICE, req);
-    if (status) {
-        req->status = status;
-    }
-
-out:
-    nvme_dif_rw_cb(ctx, ret);
-}
-
-static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-    size_t mlen = nvme_m2b(ns, nlb);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
-    BlockBackend *blk = ns->blkconf.blk;
-
-    trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        goto out;
-    }
-
-    ctx->mdata.bounce = g_malloc(mlen);
-
-    qemu_iovec_reset(&ctx->mdata.iov);
-    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
-
-    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
-                                nvme_dif_rw_check_cb, ctx);
-    return;
-
-out:
-    nvme_dif_rw_cb(ctx, ret);
-}
-
-static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
-    BlockBackend *blk = ns->blkconf.blk;
-
-    trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        goto out;
-    }
-
-    req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
-                                 nvme_dif_rw_cb, ctx);
-    return;
-
-out:
-    nvme_dif_rw_cb(ctx, ret);
-}
-
-uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
-    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    size_t len = nvme_l2b(ns, nlb);
-    size_t mlen = nvme_m2b(ns, nlb);
-    size_t mapped_len = len;
-    int64_t offset = nvme_l2b(ns, slba);
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint16_t apptag = le16_to_cpu(rw->apptag);
-    uint16_t appmask = le16_to_cpu(rw->appmask);
-    uint32_t reftag = le32_to_cpu(rw->reftag);
-    bool pract = !!(ctrl & NVME_RW_PRINFO_PRACT);
-    NvmeBounceContext *ctx;
-    uint16_t status;
-
-    trace_pci_nvme_dif_rw(pract, NVME_RW_PRINFO(ctrl));
-
-    ctx = g_new0(NvmeBounceContext, 1);
-    ctx->req = req;
-
-    if (wrz) {
-        BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
-
-        if (ctrl & NVME_RW_PRINFO_PRCHK_MASK) {
-            status = NVME_INVALID_PROT_INFO | NVME_DNR;
-            goto err;
-        }
-
-        if (pract) {
-            uint8_t *mbuf, *end;
-            size_t msize = nvme_msize(ns);
-            int16_t pil = msize - sizeof(NvmeDifTuple);
-
-            status = nvme_check_prinfo(ns, ctrl, slba, reftag);
-            if (status) {
-                goto err;
-            }
-
-            flags = 0;
-
-            ctx->mdata.bounce = g_malloc0(mlen);
-
-            qemu_iovec_init(&ctx->mdata.iov, 1);
-            qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
-
-            mbuf = ctx->mdata.bounce;
-            end = mbuf + mlen;
-
-            if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
-                pil = 0;
-            }
-
-            for (; mbuf < end; mbuf += msize) {
-                NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
-
-                dif->apptag = cpu_to_be16(apptag);
-                dif->reftag = cpu_to_be32(reftag);
-
-                switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-                case NVME_ID_NS_DPS_TYPE_1:
-                case NVME_ID_NS_DPS_TYPE_2:
-                    reftag++;
-                }
-            }
-        }
-
-        req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
-                                           nvme_dif_rw_mdata_out_cb, ctx);
-        return NVME_NO_COMPLETE;
-    }
-
-    if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) {
-        mapped_len += mlen;
-    }
-
-    status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
-    if (status) {
-        goto err;
-    }
-
-    ctx->data.bounce = g_malloc(len);
-
-    qemu_iovec_init(&ctx->data.iov, 1);
-    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
-
-    if (req->cmd.opcode == NVME_CMD_READ) {
-        block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
-                         BLOCK_ACCT_READ);
-
-        req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
-                                    nvme_dif_rw_mdata_in_cb, ctx);
-        return NVME_NO_COMPLETE;
-    }
-
-    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
-                              NVME_TX_DIRECTION_TO_DEVICE, req);
-    if (status) {
-        goto err;
-    }
-
-    ctx->mdata.bounce = g_malloc(mlen);
-
-    qemu_iovec_init(&ctx->mdata.iov, 1);
-    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
-
-    if (!(pract && nvme_msize(ns) == 8)) {
-        status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
-                                   NVME_TX_DIRECTION_TO_DEVICE, req);
-        if (status) {
-            goto err;
-        }
-    }
-
-    status = nvme_check_prinfo(ns, ctrl, slba, reftag);
-    if (status) {
-        goto err;
-    }
-
-    if (pract) {
-        /* splice generated protection information into the buffer */
-        nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
-                                    ctx->mdata.bounce, ctx->mdata.iov.size,
-                                    apptag, reftag);
-    } else {
-        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
-                                ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
-                                slba, apptag, appmask, reftag);
-        if (status) {
-            goto err;
-        }
-    }
-
-    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
-                     BLOCK_ACCT_WRITE);
-
-    req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
-                                 nvme_dif_rw_mdata_out_cb, ctx);
-
-    return NVME_NO_COMPLETE;
-
-err:
-    qemu_iovec_destroy(&ctx->data.iov);
-    g_free(ctx->data.bounce);
-
-    qemu_iovec_destroy(&ctx->mdata.iov);
-    g_free(ctx->mdata.bounce);
-
-    g_free(ctx);
-
-    return status;
-}
diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h
deleted file mode 100644
index 524faffbd7a..00000000000
--- a/hw/block/nvme-dif.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * QEMU NVM Express End-to-End Data Protection support
- *
- * Copyright (c) 2021 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *   Klaus Jensen           <k.jensen@samsung.com>
- *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
- */
-
-#ifndef HW_NVME_DIF_H
-#define HW_NVME_DIF_H
-
-/* from Linux kernel (crypto/crct10dif_common.c) */
-static const uint16_t t10_dif_crc_table[256] = {
-    0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-    0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-    0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-    0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-    0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-    0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-    0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-    0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-    0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-    0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-    0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-    0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-    0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-    0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-    0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-    0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-    0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-    0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-    0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-    0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-    0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-    0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-    0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-    0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-    0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-    0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-    0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-    0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-    0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-    0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-    0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-    0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba,
-                           uint32_t reftag);
-uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
-                               uint64_t slba);
-void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
-                                 uint32_t reftag);
-uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
-                        uint8_t *mbuf, size_t mlen, uint16_t ctrl,
-                        uint64_t slba, uint16_t apptag,
-                        uint16_t appmask, uint32_t reftag);
-uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
-
-#endif /* HW_NVME_DIF_H */
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
deleted file mode 100644
index 7bb618f1820..00000000000
--- a/hw/block/nvme-ns.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * QEMU NVM Express Virtual Namespace
- *
- * Copyright (c) 2019 CNEX Labs
- * Copyright (c) 2020 Samsung Electronics
- *
- * Authors:
- *  Klaus Jensen      <k.jensen@samsung.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See the
- * COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qemu/cutils.h"
-#include "qemu/log.h"
-#include "qemu/error-report.h"
-#include "hw/block/block.h"
-#include "hw/pci/pci.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/block-backend.h"
-#include "qapi/error.h"
-
-#include "hw/qdev-properties.h"
-#include "hw/qdev-core.h"
-
-#include "trace.h"
-#include "nvme.h"
-#include "nvme-ns.h"
-
-#define MIN_DISCARD_GRANULARITY (4 * KiB)
-
-void nvme_ns_init_format(NvmeNamespace *ns)
-{
-    NvmeIdNs *id_ns = &ns->id_ns;
-    BlockDriverInfo bdi;
-    int npdg, nlbas, ret;
-
-    nlbas = nvme_ns_nlbas(ns);
-
-    id_ns->nsze = cpu_to_le64(nlbas);
-
-    /* no thin provisioning */
-    id_ns->ncap = id_ns->nsze;
-    id_ns->nuse = id_ns->ncap;
-
-    ns->mdata_offset = nvme_l2b(ns, nlbas);
-
-    npdg = ns->blkconf.discard_granularity / nvme_lsize(ns);
-
-    ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
-    if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
-        npdg = bdi.cluster_size / nvme_lsize(ns);
-    }
-
-    id_ns->npda = id_ns->npdg = npdg - 1;
-}
-
-static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
-{
-    NvmeIdNs *id_ns = &ns->id_ns;
-    uint8_t ds;
-    uint16_t ms;
-    int i;
-
-    ns->csi = NVME_CSI_NVM;
-    ns->status = 0x0;
-
-    ns->id_ns.dlfeat = 0x1;
-
-    /* support DULBE and I/O optimization fields */
-    id_ns->nsfeat |= (0x4 | 0x10);
-
-    if (ns->params.shared) {
-        id_ns->nmic |= NVME_NMIC_NS_SHARED;
-    }
-
-    /* simple copy */
-    id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
-    id_ns->mcl = cpu_to_le32(ns->params.mcl);
-    id_ns->msrc = ns->params.msrc;
-
-    ds = 31 - clz32(ns->blkconf.logical_block_size);
-    ms = ns->params.ms;
-
-    if (ns->params.ms) {
-        id_ns->mc = 0x3;
-
-        if (ns->params.mset) {
-            id_ns->flbas |= 0x10;
-        }
-
-        id_ns->dpc = 0x1f;
-        id_ns->dps = ((ns->params.pil & 0x1) << 3) | ns->params.pi;
-
-        NvmeLBAF lbaf[16] = {
-            [0] = { .ds =  9           },
-            [1] = { .ds =  9, .ms =  8 },
-            [2] = { .ds =  9, .ms = 16 },
-            [3] = { .ds =  9, .ms = 64 },
-            [4] = { .ds = 12           },
-            [5] = { .ds = 12, .ms =  8 },
-            [6] = { .ds = 12, .ms = 16 },
-            [7] = { .ds = 12, .ms = 64 },
-        };
-
-        memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
-        id_ns->nlbaf = 7;
-    } else {
-        NvmeLBAF lbaf[16] = {
-            [0] = { .ds =  9 },
-            [1] = { .ds = 12 },
-        };
-
-        memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
-        id_ns->nlbaf = 1;
-    }
-
-    for (i = 0; i <= id_ns->nlbaf; i++) {
-        NvmeLBAF *lbaf = &id_ns->lbaf[i];
-        if (lbaf->ds == ds) {
-            if (lbaf->ms == ms) {
-                id_ns->flbas |= i;
-                goto lbaf_found;
-            }
-        }
-    }
-
-    /* add non-standard lba format */
-    id_ns->nlbaf++;
-    id_ns->lbaf[id_ns->nlbaf].ds = ds;
-    id_ns->lbaf[id_ns->nlbaf].ms = ms;
-    id_ns->flbas |= id_ns->nlbaf;
-
-lbaf_found:
-    nvme_ns_init_format(ns);
-
-    return 0;
-}
-
-static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
-{
-    bool read_only;
-
-    if (!blkconf_blocksizes(&ns->blkconf, errp)) {
-        return -1;
-    }
-
-    read_only = !blk_supports_write_perm(ns->blkconf.blk);
-    if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
-        return -1;
-    }
-
-    if (ns->blkconf.discard_granularity == -1) {
-        ns->blkconf.discard_granularity =
-            MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
-    }
-
-    ns->size = blk_getlength(ns->blkconf.blk);
-    if (ns->size < 0) {
-        error_setg_errno(errp, -ns->size, "could not get blockdev size");
-        return -1;
-    }
-
-    return 0;
-}
-
-static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
-{
-    uint64_t zone_size, zone_cap;
-    uint32_t lbasz = nvme_lsize(ns);
-
-    /* Make sure that the values of ZNS properties are sane */
-    if (ns->params.zone_size_bs) {
-        zone_size = ns->params.zone_size_bs;
-    } else {
-        zone_size = NVME_DEFAULT_ZONE_SIZE;
-    }
-    if (ns->params.zone_cap_bs) {
-        zone_cap = ns->params.zone_cap_bs;
-    } else {
-        zone_cap = zone_size;
-    }
-    if (zone_cap > zone_size) {
-        error_setg(errp, "zone capacity %"PRIu64"B exceeds "
-                   "zone size %"PRIu64"B", zone_cap, zone_size);
-        return -1;
-    }
-    if (zone_size < lbasz) {
-        error_setg(errp, "zone size %"PRIu64"B too small, "
-                   "must be at least %"PRIu32"B", zone_size, lbasz);
-        return -1;
-    }
-    if (zone_cap < lbasz) {
-        error_setg(errp, "zone capacity %"PRIu64"B too small, "
-                   "must be at least %"PRIu32"B", zone_cap, lbasz);
-        return -1;
-    }
-
-    /*
-     * Save the main zone geometry values to avoid
-     * calculating them later again.
-     */
-    ns->zone_size = zone_size / lbasz;
-    ns->zone_capacity = zone_cap / lbasz;
-    ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size;
-
-    /* Do a few more sanity checks of ZNS properties */
-    if (!ns->num_zones) {
-        error_setg(errp,
-                   "insufficient drive capacity, must be at least the size "
-                   "of one zone (%"PRIu64"B)", zone_size);
-        return -1;
-    }
-
-    if (ns->params.max_open_zones > ns->num_zones) {
-        error_setg(errp,
-                   "max_open_zones value %u exceeds the number of zones %u",
-                   ns->params.max_open_zones, ns->num_zones);
-        return -1;
-    }
-    if (ns->params.max_active_zones > ns->num_zones) {
-        error_setg(errp,
-                   "max_active_zones value %u exceeds the number of zones %u",
-                   ns->params.max_active_zones, ns->num_zones);
-        return -1;
-    }
-
-    if (ns->params.max_active_zones) {
-        if (ns->params.max_open_zones > ns->params.max_active_zones) {
-            error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)",
-                       ns->params.max_open_zones, ns->params.max_active_zones);
-            return -1;
-        }
-
-        if (!ns->params.max_open_zones) {
-            ns->params.max_open_zones = ns->params.max_active_zones;
-        }
-    }
-
-    if (ns->params.zd_extension_size) {
-        if (ns->params.zd_extension_size & 0x3f) {
-            error_setg(errp,
-                "zone descriptor extension size must be a multiple of 64B");
-            return -1;
-        }
-        if ((ns->params.zd_extension_size >> 6) > 0xff) {
-            error_setg(errp, "zone descriptor extension size is too large");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
-{
-    uint64_t start = 0, zone_size = ns->zone_size;
-    uint64_t capacity = ns->num_zones * zone_size;
-    NvmeZone *zone;
-    int i;
-
-    ns->zone_array = g_new0(NvmeZone, ns->num_zones);
-    if (ns->params.zd_extension_size) {
-        ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
-                                      ns->num_zones);
-    }
-
-    QTAILQ_INIT(&ns->exp_open_zones);
-    QTAILQ_INIT(&ns->imp_open_zones);
-    QTAILQ_INIT(&ns->closed_zones);
-    QTAILQ_INIT(&ns->full_zones);
-
-    zone = ns->zone_array;
-    for (i = 0; i < ns->num_zones; i++, zone++) {
-        if (start + zone_size > capacity) {
-            zone_size = capacity - start;
-        }
-        zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
-        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
-        zone->d.za = 0;
-        zone->d.zcap = ns->zone_capacity;
-        zone->d.zslba = start;
-        zone->d.wp = start;
-        zone->w_ptr = start;
-        start += zone_size;
-    }
-
-    ns->zone_size_log2 = 0;
-    if (is_power_of_2(ns->zone_size)) {
-        ns->zone_size_log2 = 63 - clz64(ns->zone_size);
-    }
-}
-
-static void nvme_ns_init_zoned(NvmeNamespace *ns)
-{
-    NvmeIdNsZoned *id_ns_z;
-    int i;
-
-    nvme_ns_zoned_init_state(ns);
-
-    id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
-
-    /* MAR/MOR are zeroes-based, 0xffffffff means no limit */
-    id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
-    id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
-    id_ns_z->zoc = 0;
-    id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
-
-    for (i = 0; i <= ns->id_ns.nlbaf; i++) {
-        id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
-        id_ns_z->lbafe[i].zdes =
-            ns->params.zd_extension_size >> 6; /* Units of 64B */
-    }
-
-    ns->csi = NVME_CSI_ZONED;
-    ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
-    ns->id_ns.ncap = ns->id_ns.nsze;
-    ns->id_ns.nuse = ns->id_ns.ncap;
-
-    /*
-     * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
-     * status of logical blocks. Since the spec defines that logical blocks
-     * SHALL be deallocated when then zone is in the Empty or Offline states,
-     * we can only support DULBE if the zone size is a multiple of the
-     * calculated NPDG.
-     */
-    if (ns->zone_size % (ns->id_ns.npdg + 1)) {
-        warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
-                    "the calculated deallocation granularity (%d blocks); "
-                    "DULBE support disabled",
-                    ns->zone_size, ns->id_ns.npdg + 1);
-
-        ns->id_ns.nsfeat &= ~0x4;
-    }
-
-    ns->id_ns_zoned = id_ns_z;
-}
-
-static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
-{
-    uint8_t state;
-
-    zone->w_ptr = zone->d.wp;
-    state = nvme_get_zone_state(zone);
-    if (zone->d.wp != zone->d.zslba ||
-        (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
-        if (state != NVME_ZONE_STATE_CLOSED) {
-            trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
-            nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
-        }
-        nvme_aor_inc_active(ns);
-        QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
-    } else {
-        trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
-        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
-    }
-}
-
-/*
- * Close all the zones that are currently open.
- */
-static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
-{
-    NvmeZone *zone, *next;
-
-    QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
-        QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
-        nvme_aor_dec_active(ns);
-        nvme_clear_zone(ns, zone);
-    }
-    QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
-        QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
-        nvme_aor_dec_open(ns);
-        nvme_aor_dec_active(ns);
-        nvme_clear_zone(ns, zone);
-    }
-    QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
-        QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
-        nvme_aor_dec_open(ns);
-        nvme_aor_dec_active(ns);
-        nvme_clear_zone(ns, zone);
-    }
-
-    assert(ns->nr_open_zones == 0);
-}
-
-static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns,
-                                     Error **errp)
-{
-    if (!ns->blkconf.blk) {
-        error_setg(errp, "block backend not configured");
-        return -1;
-    }
-
-    if (ns->params.pi && ns->params.ms < 8) {
-        error_setg(errp, "at least 8 bytes of metadata required to enable "
-                   "protection information");
-        return -1;
-    }
-
-    if (ns->params.nsid > NVME_MAX_NAMESPACES) {
-        error_setg(errp, "invalid namespace id (must be between 0 and %d)",
-                   NVME_MAX_NAMESPACES);
-        return -1;
-    }
-
-    if (!n->subsys) {
-        if (ns->params.detached) {
-            error_setg(errp, "detached requires that the nvme device is "
-                       "linked to an nvme-subsys device");
-            return -1;
-        }
-
-        if (ns->params.shared) {
-            error_setg(errp, "shared requires that the nvme device is "
-                       "linked to an nvme-subsys device");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
-{
-    if (nvme_ns_check_constraints(n, ns, errp)) {
-        return -1;
-    }
-
-    if (nvme_ns_init_blk(ns, errp)) {
-        return -1;
-    }
-
-    if (nvme_ns_init(ns, errp)) {
-        return -1;
-    }
-    if (ns->params.zoned) {
-        if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
-            return -1;
-        }
-        nvme_ns_init_zoned(ns);
-    }
-
-    return 0;
-}
-
-void nvme_ns_drain(NvmeNamespace *ns)
-{
-    blk_drain(ns->blkconf.blk);
-}
-
-void nvme_ns_shutdown(NvmeNamespace *ns)
-{
-    blk_flush(ns->blkconf.blk);
-    if (ns->params.zoned) {
-        nvme_zoned_ns_shutdown(ns);
-    }
-}
-
-void nvme_ns_cleanup(NvmeNamespace *ns)
-{
-    if (ns->params.zoned) {
-        g_free(ns->id_ns_zoned);
-        g_free(ns->zone_array);
-        g_free(ns->zd_extensions);
-    }
-}
-
-static void nvme_ns_realize(DeviceState *dev, Error **errp)
-{
-    NvmeNamespace *ns = NVME_NS(dev);
-    BusState *s = qdev_get_parent_bus(dev);
-    NvmeCtrl *n = NVME(s->parent);
-    NvmeSubsystem *subsys = n->subsys;
-    uint32_t nsid = ns->params.nsid;
-    int i;
-
-    if (nvme_ns_setup(n, ns, errp)) {
-        return;
-    }
-
-    if (!nsid) {
-        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
-            if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
-                continue;
-            }
-
-            nsid = ns->params.nsid = i;
-            break;
-        }
-
-        if (!nsid) {
-            error_setg(errp, "no free namespace id");
-            return;
-        }
-    } else {
-        if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
-            error_setg(errp, "namespace id '%d' already allocated", nsid);
-            return;
-        }
-    }
-
-    if (subsys) {
-        subsys->namespaces[nsid] = ns;
-
-        if (ns->params.detached) {
-            return;
-        }
-
-        if (ns->params.shared) {
-            for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
-                NvmeCtrl *ctrl = subsys->ctrls[i];
-
-                if (ctrl) {
-                    nvme_attach_ns(ctrl, ns);
-                }
-            }
-
-            return;
-        }
-    }
-
-    nvme_attach_ns(n, ns);
-}
-
-static Property nvme_ns_props[] = {
-    DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
-    DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
-    DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false),
-    DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
-    DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
-    DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
-    DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
-    DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
-    DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
-    DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
-    DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
-    DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
-    DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
-    DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
-                     NVME_DEFAULT_ZONE_SIZE),
-    DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
-                     0),
-    DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
-                     params.cross_zone_read, false),
-    DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
-                       params.max_active_zones, 0),
-    DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
-                       params.max_open_zones, 0),
-    DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
-                       params.zd_extension_size, 0),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void nvme_ns_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-
-    dc->bus_type = TYPE_NVME_BUS;
-    dc->realize = nvme_ns_realize;
-    device_class_set_props(dc, nvme_ns_props);
-    dc->desc = "Virtual NVMe namespace";
-}
-
-static void nvme_ns_instance_init(Object *obj)
-{
-    NvmeNamespace *ns = NVME_NS(obj);
-    char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
-
-    device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
-                                  bootindex, DEVICE(obj));
-
-    g_free(bootindex);
-}
-
-static const TypeInfo nvme_ns_info = {
-    .name = TYPE_NVME_NS,
-    .parent = TYPE_DEVICE,
-    .class_init = nvme_ns_class_init,
-    .instance_size = sizeof(NvmeNamespace),
-    .instance_init = nvme_ns_instance_init,
-};
-
-static void nvme_ns_register_types(void)
-{
-    type_register_static(&nvme_ns_info);
-}
-
-type_init(nvme_ns_register_types)
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
deleted file mode 100644
index fb0a41f912e..00000000000
--- a/hw/block/nvme-ns.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * QEMU NVM Express Virtual Namespace
- *
- * Copyright (c) 2019 CNEX Labs
- * Copyright (c) 2020 Samsung Electronics
- *
- * Authors:
- *  Klaus Jensen      <k.jensen@samsung.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See the
- * COPYING file in the top-level directory.
- *
- */
-
-#ifndef NVME_NS_H
-#define NVME_NS_H
-
-#include "qemu/uuid.h"
-
-#define TYPE_NVME_NS "nvme-ns"
-#define NVME_NS(obj) \
-    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
-
-typedef struct NvmeZone {
-    NvmeZoneDescr   d;
-    uint64_t        w_ptr;
-    QTAILQ_ENTRY(NvmeZone) entry;
-} NvmeZone;
-
-typedef struct NvmeNamespaceParams {
-    bool     detached;
-    bool     shared;
-    uint32_t nsid;
-    QemuUUID uuid;
-
-    uint16_t ms;
-    uint8_t  mset;
-    uint8_t  pi;
-    uint8_t  pil;
-
-    uint16_t mssrl;
-    uint32_t mcl;
-    uint8_t  msrc;
-
-    bool     zoned;
-    bool     cross_zone_read;
-    uint64_t zone_size_bs;
-    uint64_t zone_cap_bs;
-    uint32_t max_active_zones;
-    uint32_t max_open_zones;
-    uint32_t zd_extension_size;
-} NvmeNamespaceParams;
-
-typedef struct NvmeNamespace {
-    DeviceState  parent_obj;
-    BlockConf    blkconf;
-    int32_t      bootindex;
-    int64_t      size;
-    int64_t      mdata_offset;
-    NvmeIdNs     id_ns;
-    const uint32_t *iocs;
-    uint8_t      csi;
-    uint16_t     status;
-    int          attached;
-
-    QTAILQ_ENTRY(NvmeNamespace) entry;
-
-    NvmeIdNsZoned   *id_ns_zoned;
-    NvmeZone        *zone_array;
-    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
-    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
-    QTAILQ_HEAD(, NvmeZone) closed_zones;
-    QTAILQ_HEAD(, NvmeZone) full_zones;
-    uint32_t        num_zones;
-    uint64_t        zone_size;
-    uint64_t        zone_capacity;
-    uint32_t        zone_size_log2;
-    uint8_t         *zd_extensions;
-    int32_t         nr_open_zones;
-    int32_t         nr_active_zones;
-
-    NvmeNamespaceParams params;
-
-    struct {
-        uint32_t err_rec;
-    } features;
-} NvmeNamespace;
-
-static inline uint16_t nvme_ns_status(NvmeNamespace *ns)
-{
-    return ns->status;
-}
-
-static inline uint32_t nvme_nsid(NvmeNamespace *ns)
-{
-    if (ns) {
-        return ns->params.nsid;
-    }
-
-    return 0;
-}
-
-static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns)
-{
-    NvmeIdNs *id_ns = &ns->id_ns;
-    return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
-}
-
-static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
-{
-    return nvme_ns_lbaf(ns)->ds;
-}
-
-/* convert an LBA to the equivalent in bytes */
-static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
-{
-    return lba << nvme_ns_lbads(ns);
-}
-
-static inline size_t nvme_lsize(NvmeNamespace *ns)
-{
-    return 1 << nvme_ns_lbads(ns);
-}
-
-static inline uint16_t nvme_msize(NvmeNamespace *ns)
-{
-    return nvme_ns_lbaf(ns)->ms;
-}
-
-static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
-{
-    return nvme_msize(ns) * lba;
-}
-
-static inline bool nvme_ns_ext(NvmeNamespace *ns)
-{
-    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
-}
-
-/* calculate the number of LBAs that the namespace can accomodate */
-static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
-{
-    if (nvme_msize(ns)) {
-        return ns->size / (nvme_lsize(ns) + nvme_msize(ns));
-    }
-    return ns->size >> nvme_ns_lbads(ns);
-}
-
-typedef struct NvmeCtrl NvmeCtrl;
-
-static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
-{
-    return zone->d.zs >> 4;
-}
-
-static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
-{
-    zone->d.zs = state << 4;
-}
-
-static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
-{
-    return zone->d.zslba + ns->zone_size;
-}
-
-static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
-{
-    return zone->d.zslba + zone->d.zcap;
-}
-
-static inline bool nvme_wp_is_valid(NvmeZone *zone)
-{
-    uint8_t st = nvme_get_zone_state(zone);
-
-    return st != NVME_ZONE_STATE_FULL &&
-           st != NVME_ZONE_STATE_READ_ONLY &&
-           st != NVME_ZONE_STATE_OFFLINE;
-}
-
-static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
-                                             uint32_t zone_idx)
-{
-    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
-}
-
-static inline void nvme_aor_inc_open(NvmeNamespace *ns)
-{
-    assert(ns->nr_open_zones >= 0);
-    if (ns->params.max_open_zones) {
-        ns->nr_open_zones++;
-        assert(ns->nr_open_zones <= ns->params.max_open_zones);
-    }
-}
-
-static inline void nvme_aor_dec_open(NvmeNamespace *ns)
-{
-    if (ns->params.max_open_zones) {
-        assert(ns->nr_open_zones > 0);
-        ns->nr_open_zones--;
-    }
-    assert(ns->nr_open_zones >= 0);
-}
-
-static inline void nvme_aor_inc_active(NvmeNamespace *ns)
-{
-    assert(ns->nr_active_zones >= 0);
-    if (ns->params.max_active_zones) {
-        ns->nr_active_zones++;
-        assert(ns->nr_active_zones <= ns->params.max_active_zones);
-    }
-}
-
-static inline void nvme_aor_dec_active(NvmeNamespace *ns)
-{
-    if (ns->params.max_active_zones) {
-        assert(ns->nr_active_zones > 0);
-        ns->nr_active_zones--;
-        assert(ns->nr_active_zones >= ns->nr_open_zones);
-    }
-    assert(ns->nr_active_zones >= 0);
-}
-
-void nvme_ns_init_format(NvmeNamespace *ns);
-int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
-void nvme_ns_drain(NvmeNamespace *ns);
-void nvme_ns_shutdown(NvmeNamespace *ns);
-void nvme_ns_cleanup(NvmeNamespace *ns);
-
-#endif /* NVME_NS_H */
diff --git a/hw/block/nvme-subsys.c b/hw/block/nvme-subsys.c
deleted file mode 100644
index 283a97b79d5..00000000000
--- a/hw/block/nvme-subsys.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * QEMU NVM Express Subsystem: nvme-subsys
- *
- * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
- *
- * This code is licensed under the GNU GPL v2.  Refer COPYING.
- */
-
-#include "qemu/units.h"
-#include "qemu/osdep.h"
-#include "qemu/uuid.h"
-#include "qemu/iov.h"
-#include "qemu/cutils.h"
-#include "qapi/error.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-core.h"
-#include "hw/block/block.h"
-#include "block/aio.h"
-#include "block/accounting.h"
-#include "sysemu/sysemu.h"
-#include "hw/pci/pci.h"
-#include "nvme.h"
-#include "nvme-subsys.h"
-
-int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
-{
-    NvmeSubsystem *subsys = n->subsys;
-    int cntlid;
-
-    for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
-        if (!subsys->ctrls[cntlid]) {
-            break;
-        }
-    }
-
-    if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
-        error_setg(errp, "no more free controller id");
-        return -1;
-    }
-
-    subsys->ctrls[cntlid] = n;
-
-    return cntlid;
-}
-
-static void nvme_subsys_setup(NvmeSubsystem *subsys)
-{
-    const char *nqn = subsys->params.nqn ?
-        subsys->params.nqn : subsys->parent_obj.id;
-
-    snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
-             "nqn.2019-08.org.qemu:%s", nqn);
-}
-
-static void nvme_subsys_realize(DeviceState *dev, Error **errp)
-{
-    NvmeSubsystem *subsys = NVME_SUBSYS(dev);
-
-    nvme_subsys_setup(subsys);
-}
-
-static Property nvme_subsystem_props[] = {
-    DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void nvme_subsys_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-
-    dc->realize = nvme_subsys_realize;
-    dc->desc = "Virtual NVMe subsystem";
-
-    device_class_set_props(dc, nvme_subsystem_props);
-}
-
-static const TypeInfo nvme_subsys_info = {
-    .name = TYPE_NVME_SUBSYS,
-    .parent = TYPE_DEVICE,
-    .class_init = nvme_subsys_class_init,
-    .instance_size = sizeof(NvmeSubsystem),
-};
-
-static void nvme_subsys_register_types(void)
-{
-    type_register_static(&nvme_subsys_info);
-}
-
-type_init(nvme_subsys_register_types)
diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h
deleted file mode 100644
index 7d7ef5f7f12..00000000000
--- a/hw/block/nvme-subsys.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * QEMU NVM Express Subsystem: nvme-subsys
- *
- * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
- *
- * This code is licensed under the GNU GPL v2.  Refer COPYING.
- */
-
-#ifndef NVME_SUBSYS_H
-#define NVME_SUBSYS_H
-
-#define TYPE_NVME_SUBSYS "nvme-subsys"
-#define NVME_SUBSYS(obj) \
-    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
-
-#define NVME_SUBSYS_MAX_CTRLS   32
-#define NVME_MAX_NAMESPACES     256
-
-typedef struct NvmeCtrl NvmeCtrl;
-typedef struct NvmeNamespace NvmeNamespace;
-typedef struct NvmeSubsystem {
-    DeviceState parent_obj;
-    uint8_t     subnqn[256];
-
-    NvmeCtrl    *ctrls[NVME_SUBSYS_MAX_CTRLS];
-    /* Allocated namespaces for this subsystem */
-    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
-
-    struct {
-        char *nqn;
-    } params;
-} NvmeSubsystem;
-
-int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
-
-static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
-        uint32_t cntlid)
-{
-    if (!subsys || cntlid >= NVME_SUBSYS_MAX_CTRLS) {
-        return NULL;
-    }
-
-    return subsys->ctrls[cntlid];
-}
-
-/*
- * Return allocated namespace of the specified nsid in the subsystem.
- */
-static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
-        uint32_t nsid)
-{
-    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
-        return NULL;
-    }
-
-    return subsys->namespaces[nsid];
-}
-
-#endif /* NVME_SUBSYS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
deleted file mode 100644
index 5fe082ec34c..00000000000
--- a/hw/block/nvme.c
+++ /dev/null
@@ -1,6363 +0,0 @@
-/*
- * QEMU NVM Express Controller
- *
- * Copyright (c) 2012, Intel Corporation
- *
- * Written by Keith Busch <keith.busch@intel.com>
- *
- * This code is licensed under the GNU GPL v2 or later.
- */
-
-/**
- * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e
- *
- *  https://nvmexpress.org/developers/nvme-specification/
- */
-
-/**
- * Usage: add options:
- *      -drive file=<file>,if=none,id=<drive_id>
- *      -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id>
- *      -device nvme,serial=<serial>,id=<bus_name>, \
- *              cmb_size_mb=<cmb_size_mb[optional]>, \
- *              [pmrdev=<mem_backend_file_id>,] \
- *              max_ioqpairs=<N[optional]>, \
- *              aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
- *              mdts=<N[optional]>,vsl=<N[optional]>, \
- *              zoned.zasl=<N[optional]>, \
- *              subsys=<subsys_id>
- *      -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
- *              zoned=<true|false[optional]>, \
- *              subsys=<subsys_id>,detached=<true|false[optional]>
- *
- * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
- * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
- * device will use the "v1.4 CMB scheme" - use the `legacy-cmb` parameter to
- * always enable the CMBLOC and CMBSZ registers (v1.3 behavior).
- *
- * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
- * For example:
- * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
- *  size=<size> .... -device nvme,...,pmrdev=<mem_id>
- *
- * The PMR will use BAR 4/5 exclusively.
- *
- * To place controller(s) and namespace(s) to a subsystem, then provide
- * nvme-subsys device as above.
- *
- * nvme subsystem device parameters
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * - `nqn`
- *   This parameter provides the `<nqn_id>` part of the string
- *   `nqn.2019-08.org.qemu:<nqn_id>` which will be reported in the SUBNQN field
- *   of subsystem controllers. Note that `<nqn_id>` should be unique per
- *   subsystem, but this is not enforced by QEMU. If not specified, it will
- *   default to the value of the `id` parameter (`<subsys_id>`).
- *
- * nvme device parameters
- * ~~~~~~~~~~~~~~~~~~~~~~
- * - `subsys`
- *   Specifying this parameter attaches the controller to the subsystem and
- *   the SUBNQN field in the controller will report the NQN of the subsystem
- *   device. This also enables multi controller capability represented in
- *   Identify Controller data structure in CMIC (Controller Multi-path I/O and
- *   Namesapce Sharing Capabilities).
- *
- * - `aerl`
- *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
- *   of concurrently outstanding Asynchronous Event Request commands support
- *   by the controller. This is a 0's based value.
- *
- * - `aer_max_queued`
- *   This is the maximum number of events that the device will enqueue for
- *   completion when there are no outstanding AERs. When the maximum number of
- *   enqueued events are reached, subsequent events will be dropped.
- *
- * - `mdts`
- *   Indicates the maximum data transfer size for a command that transfers data
- *   between host-accessible memory and the controller. The value is specified
- *   as a power of two (2^n) and is in units of the minimum memory page size
- *   (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB).
- *
- * - `vsl`
- *   Indicates the maximum data size limit for the Verify command. Like `mdts`,
- *   this value is specified as a power of two (2^n) and is in units of the
- *   minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512
- *   KiB).
- *
- * - `zoned.zasl`
- *   Indicates the maximum data transfer size for the Zone Append command. Like
- *   `mdts`, the value is specified as a power of two (2^n) and is in units of
- *   the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
- *   defaulting to the value of `mdts`).
- *
- * nvme namespace device parameters
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * - `shared`
- *   When the parent nvme device (as defined explicitly by the 'bus' parameter
- *   or implicitly by the most recently defined NvmeBus) is linked to an
- *   nvme-subsys device, the namespace will be attached to all controllers in
- *   the subsystem. If set to 'off' (the default), the namespace will remain a
- *   private namespace and may only be attached to a single controller at a
- *   time.
- *
- * - `detached`
- *   This parameter is only valid together with the `subsys` parameter. If left
- *   at the default value (`false/off`), the namespace will be attached to all
- *   controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
- *   namespace will be be available in the subsystem not not attached to any
- *   controllers.
- *
- * Setting `zoned` to true selects Zoned Command Set at the namespace.
- * In this case, the following namespace properties are available to configure
- * zoned operation:
- *     zoned.zone_size=<zone size in bytes, default: 128MiB>
- *         The number may be followed by K, M, G as in kilo-, mega- or giga-.
- *
- *     zoned.zone_capacity=<zone capacity in bytes, default: zone size>
- *         The value 0 (default) forces zone capacity to be the same as zone
- *         size. The value of this property may not exceed zone size.
- *
- *     zoned.descr_ext_size=<zone descriptor extension size, default 0>
- *         This value needs to be specified in 64B units. If it is zero,
- *         namespace(s) will not support zone descriptor extensions.
- *
- *     zoned.max_active=<Maximum Active Resources (zones), default: 0>
- *         The default value means there is no limit to the number of
- *         concurrently active zones.
- *
- *     zoned.max_open=<Maximum Open Resources (zones), default: 0>
- *         The default value means there is no limit to the number of
- *         concurrently open zones.
- *
- *     zoned.cross_read=<enable RAZB, default: false>
- *         Setting this property to true enables Read Across Zone Boundaries.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qemu/error-report.h"
-#include "hw/block/block.h"
-#include "hw/pci/msix.h"
-#include "hw/pci/pci.h"
-#include "hw/qdev-properties.h"
-#include "migration/vmstate.h"
-#include "sysemu/sysemu.h"
-#include "qapi/error.h"
-#include "qapi/visitor.h"
-#include "sysemu/hostmem.h"
-#include "sysemu/block-backend.h"
-#include "exec/memory.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "qemu/cutils.h"
-#include "trace.h"
-#include "nvme.h"
-#include "nvme-ns.h"
-#include "nvme-dif.h"
-
-#define NVME_MAX_IOQPAIRS 0xffff
-#define NVME_DB_SIZE  4
-#define NVME_SPEC_VER 0x00010400
-#define NVME_CMB_BIR 2
-#define NVME_PMR_BIR 4
-#define NVME_TEMPERATURE 0x143
-#define NVME_TEMPERATURE_WARNING 0x157
-#define NVME_TEMPERATURE_CRITICAL 0x175
-#define NVME_NUM_FW_SLOTS 1
-
-#define NVME_GUEST_ERR(trace, fmt, ...) \
-    do { \
-        (trace_##trace)(__VA_ARGS__); \
-        qemu_log_mask(LOG_GUEST_ERROR, #trace \
-            " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
-    } while (0)
-
-static const bool nvme_feature_support[NVME_FID_MAX] = {
-    [NVME_ARBITRATION]              = true,
-    [NVME_POWER_MANAGEMENT]         = true,
-    [NVME_TEMPERATURE_THRESHOLD]    = true,
-    [NVME_ERROR_RECOVERY]           = true,
-    [NVME_VOLATILE_WRITE_CACHE]     = true,
-    [NVME_NUMBER_OF_QUEUES]         = true,
-    [NVME_INTERRUPT_COALESCING]     = true,
-    [NVME_INTERRUPT_VECTOR_CONF]    = true,
-    [NVME_WRITE_ATOMICITY]          = true,
-    [NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
-    [NVME_TIMESTAMP]                = true,
-};
-
-static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
-    [NVME_TEMPERATURE_THRESHOLD]    = NVME_FEAT_CAP_CHANGE,
-    [NVME_ERROR_RECOVERY]           = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
-    [NVME_VOLATILE_WRITE_CACHE]     = NVME_FEAT_CAP_CHANGE,
-    [NVME_NUMBER_OF_QUEUES]         = NVME_FEAT_CAP_CHANGE,
-    [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
-    [NVME_TIMESTAMP]                = NVME_FEAT_CAP_CHANGE,
-};
-
-static const uint32_t nvme_cse_acs[256] = {
-    [NVME_ADM_CMD_DELETE_SQ]        = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_CREATE_SQ]        = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_GET_LOG_PAGE]     = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_DELETE_CQ]        = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_CREATE_CQ]        = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_IDENTIFY]         = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_ABORT]            = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_SET_FEATURES]     = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_GET_FEATURES]     = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_ASYNC_EV_REQ]     = NVME_CMD_EFF_CSUPP,
-    [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
-    [NVME_ADM_CMD_FORMAT_NVM]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-};
-
-static const uint32_t nvme_cse_iocs_none[256];
-
-static const uint32_t nvme_cse_iocs_nvm[256] = {
-    [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_READ]                 = NVME_CMD_EFF_CSUPP,
-    [NVME_CMD_DSM]                  = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_VERIFY]               = NVME_CMD_EFF_CSUPP,
-    [NVME_CMD_COPY]                 = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_COMPARE]              = NVME_CMD_EFF_CSUPP,
-};
-
-static const uint32_t nvme_cse_iocs_zoned[256] = {
-    [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_READ]                 = NVME_CMD_EFF_CSUPP,
-    [NVME_CMD_DSM]                  = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_VERIFY]               = NVME_CMD_EFF_CSUPP,
-    [NVME_CMD_COPY]                 = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_COMPARE]              = NVME_CMD_EFF_CSUPP,
-    [NVME_CMD_ZONE_APPEND]          = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_ZONE_MGMT_SEND]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
-    [NVME_CMD_ZONE_MGMT_RECV]       = NVME_CMD_EFF_CSUPP,
-};
-
-static void nvme_process_sq(void *opaque);
-
-static uint16_t nvme_sqid(NvmeRequest *req)
-{
-    return le16_to_cpu(req->sq->sqid);
-}
-
-static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
-                                   NvmeZoneState state)
-{
-    if (QTAILQ_IN_USE(zone, entry)) {
-        switch (nvme_get_zone_state(zone)) {
-        case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-            QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
-            break;
-        case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-            QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
-            break;
-        case NVME_ZONE_STATE_CLOSED:
-            QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
-            break;
-        case NVME_ZONE_STATE_FULL:
-            QTAILQ_REMOVE(&ns->full_zones, zone, entry);
-        default:
-            ;
-        }
-    }
-
-    nvme_set_zone_state(zone, state);
-
-    switch (state) {
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-        QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry);
-        break;
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-        QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry);
-        break;
-    case NVME_ZONE_STATE_CLOSED:
-        QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
-        break;
-    case NVME_ZONE_STATE_FULL:
-        QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry);
-    case NVME_ZONE_STATE_READ_ONLY:
-        break;
-    default:
-        zone->d.za = 0;
-    }
-}
-
-/*
- * Check if we can open a zone without exceeding open/active limits.
- * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
- */
-static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
-{
-    if (ns->params.max_active_zones != 0 &&
-        ns->nr_active_zones + act > ns->params.max_active_zones) {
-        trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
-        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
-    }
-    if (ns->params.max_open_zones != 0 &&
-        ns->nr_open_zones + opn > ns->params.max_open_zones) {
-        trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
-        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
-{
-    hwaddr hi, lo;
-
-    if (!n->cmb.cmse) {
-        return false;
-    }
-
-    lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
-    hi = lo + int128_get64(n->cmb.mem.size);
-
-    return addr >= lo && addr < hi;
-}
-
-static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
-{
-    hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
-    return &n->cmb.buf[addr - base];
-}
-
-static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
-{
-    hwaddr hi;
-
-    if (!n->pmr.cmse) {
-        return false;
-    }
-
-    hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
-
-    return addr >= n->pmr.cba && addr < hi;
-}
-
-static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
-{
-    return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
-}
-
-static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
-{
-    hwaddr hi = addr + size - 1;
-    if (hi < addr) {
-        return 1;
-    }
-
-    if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
-        memcpy(buf, nvme_addr_to_cmb(n, addr), size);
-        return 0;
-    }
-
-    if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
-        memcpy(buf, nvme_addr_to_pmr(n, addr), size);
-        return 0;
-    }
-
-    return pci_dma_read(&n->parent_obj, addr, buf, size);
-}
-
-static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
-{
-    hwaddr hi = addr + size - 1;
-    if (hi < addr) {
-        return 1;
-    }
-
-    if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
-        memcpy(nvme_addr_to_cmb(n, addr), buf, size);
-        return 0;
-    }
-
-    if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
-        memcpy(nvme_addr_to_pmr(n, addr), buf, size);
-        return 0;
-    }
-
-    return pci_dma_write(&n->parent_obj, addr, buf, size);
-}
-
-static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
-{
-    return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces);
-}
-
-static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
-{
-    return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
-}
-
-static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
-{
-    return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
-}
-
-static void nvme_inc_cq_tail(NvmeCQueue *cq)
-{
-    cq->tail++;
-    if (cq->tail >= cq->size) {
-        cq->tail = 0;
-        cq->phase = !cq->phase;
-    }
-}
-
-static void nvme_inc_sq_head(NvmeSQueue *sq)
-{
-    sq->head = (sq->head + 1) % sq->size;
-}
-
-static uint8_t nvme_cq_full(NvmeCQueue *cq)
-{
-    return (cq->tail + 1) % cq->size == cq->head;
-}
-
-static uint8_t nvme_sq_empty(NvmeSQueue *sq)
-{
-    return sq->head == sq->tail;
-}
-
-static void nvme_irq_check(NvmeCtrl *n)
-{
-    if (msix_enabled(&(n->parent_obj))) {
-        return;
-    }
-    if (~n->bar.intms & n->irq_status) {
-        pci_irq_assert(&n->parent_obj);
-    } else {
-        pci_irq_deassert(&n->parent_obj);
-    }
-}
-
-static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
-{
-    if (cq->irq_enabled) {
-        if (msix_enabled(&(n->parent_obj))) {
-            trace_pci_nvme_irq_msix(cq->vector);
-            msix_notify(&(n->parent_obj), cq->vector);
-        } else {
-            trace_pci_nvme_irq_pin();
-            assert(cq->vector < 32);
-            n->irq_status |= 1 << cq->vector;
-            nvme_irq_check(n);
-        }
-    } else {
-        trace_pci_nvme_irq_masked();
-    }
-}
-
-static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
-{
-    if (cq->irq_enabled) {
-        if (msix_enabled(&(n->parent_obj))) {
-            return;
-        } else {
-            assert(cq->vector < 32);
-            n->irq_status &= ~(1 << cq->vector);
-            nvme_irq_check(n);
-        }
-    }
-}
-
-static void nvme_req_clear(NvmeRequest *req)
-{
-    req->ns = NULL;
-    req->opaque = NULL;
-    req->aiocb = NULL;
-    memset(&req->cqe, 0x0, sizeof(req->cqe));
-    req->status = NVME_SUCCESS;
-}
-
-static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
-{
-    if (dma) {
-        pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
-        sg->flags = NVME_SG_DMA;
-    } else {
-        qemu_iovec_init(&sg->iov, 0);
-    }
-
-    sg->flags |= NVME_SG_ALLOC;
-}
-
-static inline void nvme_sg_unmap(NvmeSg *sg)
-{
-    if (!(sg->flags & NVME_SG_ALLOC)) {
-        return;
-    }
-
-    if (sg->flags & NVME_SG_DMA) {
-        qemu_sglist_destroy(&sg->qsg);
-    } else {
-        qemu_iovec_destroy(&sg->iov);
-    }
-
-    memset(sg, 0x0, sizeof(*sg));
-}
-
-/*
- * When metadata is transfered as extended LBAs, the DPTR mapped into `sg`
- * holds both data and metadata. This function splits the data and metadata
- * into two separate QSG/IOVs.
- */
-static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
-                          NvmeSg *mdata)
-{
-    NvmeSg *dst = data;
-    size_t size = nvme_lsize(ns);
-    size_t msize = nvme_msize(ns);
-    uint32_t trans_len, count = size;
-    uint64_t offset = 0;
-    bool dma = sg->flags & NVME_SG_DMA;
-    size_t sge_len;
-    size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
-    int sg_idx = 0;
-
-    assert(sg->flags & NVME_SG_ALLOC);
-
-    while (sg_len) {
-        sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
-
-        trans_len = MIN(sg_len, count);
-        trans_len = MIN(trans_len, sge_len - offset);
-
-        if (dst) {
-            if (dma) {
-                qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
-                                trans_len);
-            } else {
-                qemu_iovec_add(&dst->iov,
-                               sg->iov.iov[sg_idx].iov_base + offset,
-                               trans_len);
-            }
-        }
-
-        sg_len -= trans_len;
-        count -= trans_len;
-        offset += trans_len;
-
-        if (count == 0) {
-            dst = (dst == data) ? mdata : data;
-            count = (dst == data) ? size : msize;
-        }
-
-        if (sge_len == offset) {
-            offset = 0;
-            sg_idx++;
-        }
-    }
-}
-
-static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
-                                  size_t len)
-{
-    if (!len) {
-        return NVME_SUCCESS;
-    }
-
-    trace_pci_nvme_map_addr_cmb(addr, len);
-
-    if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
-        return NVME_DATA_TRAS_ERROR;
-    }
-
-    qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
-    size_t len)
-{
-    if (!len) {
-        return NVME_SUCCESS;
-    }
-
-    if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
-        return NVME_DATA_TRAS_ERROR;
-    }
-
-    qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
-{
-    bool cmb = false, pmr = false;
-
-    if (!len) {
-        return NVME_SUCCESS;
-    }
-
-    trace_pci_nvme_map_addr(addr, len);
-
-    if (nvme_addr_is_cmb(n, addr)) {
-        cmb = true;
-    } else if (nvme_addr_is_pmr(n, addr)) {
-        pmr = true;
-    }
-
-    if (cmb || pmr) {
-        if (sg->flags & NVME_SG_DMA) {
-            return NVME_INVALID_USE_OF_CMB | NVME_DNR;
-        }
-
-        if (cmb) {
-            return nvme_map_addr_cmb(n, &sg->iov, addr, len);
-        } else {
-            return nvme_map_addr_pmr(n, &sg->iov, addr, len);
-        }
-    }
-
-    if (!(sg->flags & NVME_SG_DMA)) {
-        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
-    }
-
-    qemu_sglist_add(&sg->qsg, addr, len);
-
-    return NVME_SUCCESS;
-}
-
-static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
-{
-    return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
-}
-
-static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
-                             uint64_t prp2, uint32_t len)
-{
-    hwaddr trans_len = n->page_size - (prp1 % n->page_size);
-    trans_len = MIN(len, trans_len);
-    int num_prps = (len >> n->page_bits) + 1;
-    uint16_t status;
-    int ret;
-
-    trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
-
-    nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
-
-    status = nvme_map_addr(n, sg, prp1, trans_len);
-    if (status) {
-        goto unmap;
-    }
-
-    len -= trans_len;
-    if (len) {
-        if (len > n->page_size) {
-            uint64_t prp_list[n->max_prp_ents];
-            uint32_t nents, prp_trans;
-            int i = 0;
-
-            /*
-             * The first PRP list entry, pointed to by PRP2 may contain offset.
-             * Hence, we need to calculate the number of entries in based on
-             * that offset.
-             */
-            nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
-            prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-            ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
-            if (ret) {
-                trace_pci_nvme_err_addr_read(prp2);
-                status = NVME_DATA_TRAS_ERROR;
-                goto unmap;
-            }
-            while (len != 0) {
-                uint64_t prp_ent = le64_to_cpu(prp_list[i]);
-
-                if (i == nents - 1 && len > n->page_size) {
-                    if (unlikely(prp_ent & (n->page_size - 1))) {
-                        trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-                        status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
-                        goto unmap;
-                    }
-
-                    i = 0;
-                    nents = (len + n->page_size - 1) >> n->page_bits;
-                    nents = MIN(nents, n->max_prp_ents);
-                    prp_trans = nents * sizeof(uint64_t);
-                    ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
-                                         prp_trans);
-                    if (ret) {
-                        trace_pci_nvme_err_addr_read(prp_ent);
-                        status = NVME_DATA_TRAS_ERROR;
-                        goto unmap;
-                    }
-                    prp_ent = le64_to_cpu(prp_list[i]);
-                }
-
-                if (unlikely(prp_ent & (n->page_size - 1))) {
-                    trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-                    status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
-                    goto unmap;
-                }
-
-                trans_len = MIN(len, n->page_size);
-                status = nvme_map_addr(n, sg, prp_ent, trans_len);
-                if (status) {
-                    goto unmap;
-                }
-
-                len -= trans_len;
-                i++;
-            }
-        } else {
-            if (unlikely(prp2 & (n->page_size - 1))) {
-                trace_pci_nvme_err_invalid_prp2_align(prp2);
-                status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
-                goto unmap;
-            }
-            status = nvme_map_addr(n, sg, prp2, len);
-            if (status) {
-                goto unmap;
-            }
-        }
-    }
-
-    return NVME_SUCCESS;
-
-unmap:
-    nvme_sg_unmap(sg);
-    return status;
-}
-
-/*
- * Map 'nsgld' data descriptors from 'segment'. The function will subtract the
- * number of bytes mapped in len.
- */
-static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
-                                  NvmeSglDescriptor *segment, uint64_t nsgld,
-                                  size_t *len, NvmeCmd *cmd)
-{
-    dma_addr_t addr, trans_len;
-    uint32_t dlen;
-    uint16_t status;
-
-    for (int i = 0; i < nsgld; i++) {
-        uint8_t type = NVME_SGL_TYPE(segment[i].type);
-
-        switch (type) {
-        case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
-            if (cmd->opcode == NVME_CMD_WRITE) {
-                continue;
-            }
-        case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
-            break;
-        case NVME_SGL_DESCR_TYPE_SEGMENT:
-        case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
-            return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
-        default:
-            return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
-        }
-
-        dlen = le32_to_cpu(segment[i].len);
-
-        if (!dlen) {
-            continue;
-        }
-
-        if (*len == 0) {
-            /*
-             * All data has been mapped, but the SGL contains additional
-             * segments and/or descriptors. The controller might accept
-             * ignoring the rest of the SGL.
-             */
-            uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
-            if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
-                break;
-            }
-
-            trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
-            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
-        }
-
-        trans_len = MIN(*len, dlen);
-
-        if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) {
-            goto next;
-        }
-
-        addr = le64_to_cpu(segment[i].addr);
-
-        if (UINT64_MAX - addr < dlen) {
-            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
-        }
-
-        status = nvme_map_addr(n, sg, addr, trans_len);
-        if (status) {
-            return status;
-        }
-
-next:
-        *len -= trans_len;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
-                             size_t len, NvmeCmd *cmd)
-{
-    /*
-     * Read the segment in chunks of 256 descriptors (one 4k page) to avoid
-     * dynamically allocating a potentially huge SGL. The spec allows the SGL
-     * to be larger (as in number of bytes required to describe the SGL
-     * descriptors and segment chain) than the command transfer size, so it is
-     * not bounded by MDTS.
-     */
-    const int SEG_CHUNK_SIZE = 256;
-
-    NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
-    uint64_t nsgld;
-    uint32_t seg_len;
-    uint16_t status;
-    hwaddr addr;
-    int ret;
-
-    sgld = &sgl;
-    addr = le64_to_cpu(sgl.addr);
-
-    trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
-
-    nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
-
-    /*
-     * If the entire transfer can be described with a single data block it can
-     * be mapped directly.
-     */
-    if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
-        status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
-        if (status) {
-            goto unmap;
-        }
-
-        goto out;
-    }
-
-    for (;;) {
-        switch (NVME_SGL_TYPE(sgld->type)) {
-        case NVME_SGL_DESCR_TYPE_SEGMENT:
-        case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
-            break;
-        default:
-            return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
-        }
-
-        seg_len = le32_to_cpu(sgld->len);
-
-        /* check the length of the (Last) Segment descriptor */
-        if ((!seg_len || seg_len & 0xf) &&
-            (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) {
-            return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
-        }
-
-        if (UINT64_MAX - addr < seg_len) {
-            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
-        }
-
-        nsgld = seg_len / sizeof(NvmeSglDescriptor);
-
-        while (nsgld > SEG_CHUNK_SIZE) {
-            if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
-                trace_pci_nvme_err_addr_read(addr);
-                status = NVME_DATA_TRAS_ERROR;
-                goto unmap;
-            }
-
-            status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
-                                       &len, cmd);
-            if (status) {
-                goto unmap;
-            }
-
-            nsgld -= SEG_CHUNK_SIZE;
-            addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
-        }
-
-        ret = nvme_addr_read(n, addr, segment, nsgld *
-                             sizeof(NvmeSglDescriptor));
-        if (ret) {
-            trace_pci_nvme_err_addr_read(addr);
-            status = NVME_DATA_TRAS_ERROR;
-            goto unmap;
-        }
-
-        last_sgld = &segment[nsgld - 1];
-
-        /*
-         * If the segment ends with a Data Block or Bit Bucket Descriptor Type,
-         * then we are done.
-         */
-        switch (NVME_SGL_TYPE(last_sgld->type)) {
-        case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
-        case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
-            status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
-            if (status) {
-                goto unmap;
-            }
-
-            goto out;
-
-        default:
-            break;
-        }
-
-        /*
-         * If the last descriptor was not a Data Block or Bit Bucket, then the
-         * current segment must not be a Last Segment.
-         */
-        if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
-            status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
-            goto unmap;
-        }
-
-        sgld = last_sgld;
-        addr = le64_to_cpu(sgld->addr);
-
-        /*
-         * Do not map the last descriptor; it will be a Segment or Last Segment
-         * descriptor and is handled by the next iteration.
-         */
-        status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
-        if (status) {
-            goto unmap;
-        }
-    }
-
-out:
-    /* if there is any residual left in len, the SGL was too short */
-    if (len) {
-        status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
-        goto unmap;
-    }
-
-    return NVME_SUCCESS;
-
-unmap:
-    nvme_sg_unmap(sg);
-    return status;
-}
-
-uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
-                       NvmeCmd *cmd)
-{
-    uint64_t prp1, prp2;
-
-    switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
-    case NVME_PSDT_PRP:
-        prp1 = le64_to_cpu(cmd->dptr.prp1);
-        prp2 = le64_to_cpu(cmd->dptr.prp2);
-
-        return nvme_map_prp(n, sg, prp1, prp2, len);
-    case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
-    case NVME_PSDT_SGL_MPTR_SGL:
-        return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
-    default:
-        return NVME_INVALID_FIELD;
-    }
-}
-
-static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
-                              NvmeCmd *cmd)
-{
-    int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
-    hwaddr mptr = le64_to_cpu(cmd->mptr);
-    uint16_t status;
-
-    if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
-        NvmeSglDescriptor sgl;
-
-        if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
-            return NVME_DATA_TRAS_ERROR;
-        }
-
-        status = nvme_map_sgl(n, sg, sgl, len, cmd);
-        if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
-            status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
-        }
-
-        return status;
-    }
-
-    nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
-    status = nvme_map_addr(n, sg, mptr, len);
-    if (status) {
-        nvme_sg_unmap(sg);
-    }
-
-    return status;
-}
-
-static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    size_t len = nvme_l2b(ns, nlb);
-    uint16_t status;
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
-        (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
-        goto out;
-    }
-
-    if (nvme_ns_ext(ns)) {
-        NvmeSg sg;
-
-        len += nvme_m2b(ns, nlb);
-
-        status = nvme_map_dptr(n, &sg, len, &req->cmd);
-        if (status) {
-            return status;
-        }
-
-        nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
-        nvme_sg_split(&sg, ns, &req->sg, NULL);
-        nvme_sg_unmap(&sg);
-
-        return NVME_SUCCESS;
-    }
-
-out:
-    return nvme_map_dptr(n, &req->sg, len, &req->cmd);
-}
-
-static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    size_t len = nvme_m2b(ns, nlb);
-    uint16_t status;
-
-    if (nvme_ns_ext(ns)) {
-        NvmeSg sg;
-
-        len += nvme_l2b(ns, nlb);
-
-        status = nvme_map_dptr(n, &sg, len, &req->cmd);
-        if (status) {
-            return status;
-        }
-
-        nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
-        nvme_sg_split(&sg, ns, NULL, &req->sg);
-        nvme_sg_unmap(&sg);
-
-        return NVME_SUCCESS;
-    }
-
-    return nvme_map_mptr(n, &req->sg, len, &req->cmd);
-}
-
-static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
-                                    uint32_t len, uint32_t bytes,
-                                    int32_t skip_bytes, int64_t offset,
-                                    NvmeTxDirection dir)
-{
-    hwaddr addr;
-    uint32_t trans_len, count = bytes;
-    bool dma = sg->flags & NVME_SG_DMA;
-    int64_t sge_len;
-    int sg_idx = 0;
-    int ret;
-
-    assert(sg->flags & NVME_SG_ALLOC);
-
-    while (len) {
-        sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
-
-        if (sge_len - offset < 0) {
-            offset -= sge_len;
-            sg_idx++;
-            continue;
-        }
-
-        if (sge_len == offset) {
-            offset = 0;
-            sg_idx++;
-            continue;
-        }
-
-        trans_len = MIN(len, count);
-        trans_len = MIN(trans_len, sge_len - offset);
-
-        if (dma) {
-            addr = sg->qsg.sg[sg_idx].base + offset;
-        } else {
-            addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
-        }
-
-        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
-            ret = nvme_addr_read(n, addr, ptr, trans_len);
-        } else {
-            ret = nvme_addr_write(n, addr, ptr, trans_len);
-        }
-
-        if (ret) {
-            return NVME_DATA_TRAS_ERROR;
-        }
-
-        ptr += trans_len;
-        len -= trans_len;
-        count -= trans_len;
-        offset += trans_len;
-
-        if (count == 0) {
-            count = bytes;
-            offset += skip_bytes;
-        }
-    }
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len,
-                        NvmeTxDirection dir)
-{
-    assert(sg->flags & NVME_SG_ALLOC);
-
-    if (sg->flags & NVME_SG_DMA) {
-        uint64_t residual;
-
-        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
-            residual = dma_buf_write(ptr, len, &sg->qsg);
-        } else {
-            residual = dma_buf_read(ptr, len, &sg->qsg);
-        }
-
-        if (unlikely(residual)) {
-            trace_pci_nvme_err_invalid_dma();
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-    } else {
-        size_t bytes;
-
-        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
-            bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
-        } else {
-            bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
-        }
-
-        if (unlikely(bytes != len)) {
-            trace_pci_nvme_err_invalid_dma();
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-    }
-
-    return NVME_SUCCESS;
-}
-
-static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                                NvmeRequest *req)
-{
-    uint16_t status;
-
-    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
-    if (status) {
-        return status;
-    }
-
-    return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
-}
-
-static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                                NvmeRequest *req)
-{
-    uint16_t status;
-
-    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
-    if (status) {
-        return status;
-    }
-
-    return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
-}
-
-uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                          NvmeTxDirection dir, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-
-    if (nvme_ns_ext(ns) &&
-        !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) {
-        size_t lsize = nvme_lsize(ns);
-        size_t msize = nvme_msize(ns);
-
-        return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0,
-                                   dir);
-    }
-
-    return nvme_tx(n, &req->sg, ptr, len, dir);
-}
-
-uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                           NvmeTxDirection dir, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    uint16_t status;
-
-    if (nvme_ns_ext(ns)) {
-        size_t lsize = nvme_lsize(ns);
-        size_t msize = nvme_msize(ns);
-
-        return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize,
-                                   dir);
-    }
-
-    nvme_sg_unmap(&req->sg);
-
-    status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
-    if (status) {
-        return status;
-    }
-
-    return nvme_tx(n, &req->sg, ptr, len, dir);
-}
-
-static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
-                                 BlockCompletionFunc *cb, NvmeRequest *req)
-{
-    assert(req->sg.flags & NVME_SG_ALLOC);
-
-    if (req->sg.flags & NVME_SG_DMA) {
-        req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
-                                  cb, req);
-    } else {
-        req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
-    }
-}
-
-static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
-                                  BlockCompletionFunc *cb, NvmeRequest *req)
-{
-    assert(req->sg.flags & NVME_SG_ALLOC);
-
-    if (req->sg.flags & NVME_SG_DMA) {
-        req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
-                                   cb, req);
-    } else {
-        req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
-    }
-}
-
-static void nvme_post_cqes(void *opaque)
-{
-    NvmeCQueue *cq = opaque;
-    NvmeCtrl *n = cq->ctrl;
-    NvmeRequest *req, *next;
-    int ret;
-
-    QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
-        NvmeSQueue *sq;
-        hwaddr addr;
-
-        if (nvme_cq_full(cq)) {
-            break;
-        }
-
-        sq = req->sq;
-        req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
-        req->cqe.sq_id = cpu_to_le16(sq->sqid);
-        req->cqe.sq_head = cpu_to_le16(sq->head);
-        addr = cq->dma_addr + cq->tail * n->cqe_size;
-        ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
-                            sizeof(req->cqe));
-        if (ret) {
-            trace_pci_nvme_err_addr_write(addr);
-            trace_pci_nvme_err_cfs();
-            n->bar.csts = NVME_CSTS_FAILED;
-            break;
-        }
-        QTAILQ_REMOVE(&cq->req_list, req, entry);
-        nvme_inc_cq_tail(cq);
-        nvme_sg_unmap(&req->sg);
-        QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
-    }
-    if (cq->tail != cq->head) {
-        nvme_irq_assert(n, cq);
-    }
-}
-
-static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
-{
-    assert(cq->cqid == req->sq->cqid);
-    trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
-                                          req->status);
-
-    if (req->status) {
-        trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
-                                      req->status, req->cmd.opcode);
-    }
-
-    QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
-    QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
-    timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
-}
-
-static void nvme_process_aers(void *opaque)
-{
-    NvmeCtrl *n = opaque;
-    NvmeAsyncEvent *event, *next;
-
-    trace_pci_nvme_process_aers(n->aer_queued);
-
-    QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
-        NvmeRequest *req;
-        NvmeAerResult *result;
-
-        /* can't post cqe if there is nothing to complete */
-        if (!n->outstanding_aers) {
-            trace_pci_nvme_no_outstanding_aers();
-            break;
-        }
-
-        /* ignore if masked (cqe posted, but event not cleared) */
-        if (n->aer_mask & (1 << event->result.event_type)) {
-            trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
-            continue;
-        }
-
-        QTAILQ_REMOVE(&n->aer_queue, event, entry);
-        n->aer_queued--;
-
-        n->aer_mask |= 1 << event->result.event_type;
-        n->outstanding_aers--;
-
-        req = n->aer_reqs[n->outstanding_aers];
-
-        result = (NvmeAerResult *) &req->cqe.result;
-        result->event_type = event->result.event_type;
-        result->event_info = event->result.event_info;
-        result->log_page = event->result.log_page;
-        g_free(event);
-
-        trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
-                                    result->log_page);
-
-        nvme_enqueue_req_completion(&n->admin_cq, req);
-    }
-}
-
-static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
-                               uint8_t event_info, uint8_t log_page)
-{
-    NvmeAsyncEvent *event;
-
-    trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
-
-    if (n->aer_queued == n->params.aer_max_queued) {
-        trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
-        return;
-    }
-
-    event = g_new(NvmeAsyncEvent, 1);
-    event->result = (NvmeAerResult) {
-        .event_type = event_type,
-        .event_info = event_info,
-        .log_page   = log_page,
-    };
-
-    QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
-    n->aer_queued++;
-
-    nvme_process_aers(n);
-}
-
-static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
-{
-    uint8_t aer_info;
-
-    /* Ref SPEC <Asynchronous Event Information 0x2013 SMART / Health Status> */
-    if (!(NVME_AEC_SMART(n->features.async_config) & event)) {
-        return;
-    }
-
-    switch (event) {
-    case NVME_SMART_SPARE:
-        aer_info = NVME_AER_INFO_SMART_SPARE_THRESH;
-        break;
-    case NVME_SMART_TEMPERATURE:
-        aer_info = NVME_AER_INFO_SMART_TEMP_THRESH;
-        break;
-    case NVME_SMART_RELIABILITY:
-    case NVME_SMART_MEDIA_READ_ONLY:
-    case NVME_SMART_FAILED_VOLATILE_MEDIA:
-    case NVME_SMART_PMR_UNRELIABLE:
-        aer_info = NVME_AER_INFO_SMART_RELIABILITY;
-        break;
-    default:
-        return;
-    }
-
-    nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO);
-}
-
-static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
-{
-    n->aer_mask &= ~(1 << event_type);
-    if (!QTAILQ_EMPTY(&n->aer_queue)) {
-        nvme_process_aers(n);
-    }
-}
-
-static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
-{
-    uint8_t mdts = n->params.mdts;
-
-    if (mdts && len > n->page_size << mdts) {
-        trace_pci_nvme_err_mdts(len);
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
-                                         uint32_t nlb)
-{
-    uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
-
-    if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
-        return NVME_LBA_RANGE | NVME_DNR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
-                                 uint32_t nlb)
-{
-    BlockDriverState *bs = blk_bs(ns->blkconf.blk);
-
-    int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
-    int64_t offset = nvme_l2b(ns, slba);
-    bool zeroed;
-    int ret;
-
-    Error *local_err = NULL;
-
-    /*
-     * `pnum` holds the number of bytes after offset that shares the same
-     * allocation status as the byte at offset. If `pnum` is different from
-     * `bytes`, we should check the allocation status of the next range and
-     * continue this until all bytes have been checked.
-     */
-    do {
-        bytes -= pnum;
-
-        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "unable to get block status");
-            error_report_err(local_err);
-
-            return NVME_INTERNAL_DEV_ERROR;
-        }
-
-        zeroed = !!(ret & BDRV_BLOCK_ZERO);
-
-        trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
-
-        if (zeroed) {
-            return NVME_DULB;
-        }
-
-        offset += pnum;
-    } while (pnum != bytes);
-
-    return NVME_SUCCESS;
-}
-
-static void nvme_aio_err(NvmeRequest *req, int ret)
-{
-    uint16_t status = NVME_SUCCESS;
-    Error *local_err = NULL;
-
-    switch (req->cmd.opcode) {
-    case NVME_CMD_READ:
-        status = NVME_UNRECOVERED_READ;
-        break;
-    case NVME_CMD_FLUSH:
-    case NVME_CMD_WRITE:
-    case NVME_CMD_WRITE_ZEROES:
-    case NVME_CMD_ZONE_APPEND:
-        status = NVME_WRITE_FAULT;
-        break;
-    default:
-        status = NVME_INTERNAL_DEV_ERROR;
-        break;
-    }
-
-    trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
-
-    error_setg_errno(&local_err, -ret, "aio failed");
-    error_report_err(local_err);
-
-    /*
-     * Set the command status code to the first encountered error but allow a
-     * subsequent Internal Device Error to trump it.
-     */
-    if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
-        return;
-    }
-
-    req->status = status;
-}
-
-static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
-{
-    return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
-                                    slba / ns->zone_size;
-}
-
-static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
-{
-    uint32_t zone_idx = nvme_zone_idx(ns, slba);
-
-    assert(zone_idx < ns->num_zones);
-    return &ns->zone_array[zone_idx];
-}
-
-static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
-{
-    uint64_t zslba = zone->d.zslba;
-
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_EMPTY:
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-    case NVME_ZONE_STATE_CLOSED:
-        return NVME_SUCCESS;
-    case NVME_ZONE_STATE_FULL:
-        trace_pci_nvme_err_zone_is_full(zslba);
-        return NVME_ZONE_FULL;
-    case NVME_ZONE_STATE_OFFLINE:
-        trace_pci_nvme_err_zone_is_offline(zslba);
-        return NVME_ZONE_OFFLINE;
-    case NVME_ZONE_STATE_READ_ONLY:
-        trace_pci_nvme_err_zone_is_read_only(zslba);
-        return NVME_ZONE_READ_ONLY;
-    default:
-        assert(false);
-    }
-
-    return NVME_INTERNAL_DEV_ERROR;
-}
-
-static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
-                                      uint64_t slba, uint32_t nlb)
-{
-    uint64_t zcap = nvme_zone_wr_boundary(zone);
-    uint16_t status;
-
-    status = nvme_check_zone_state_for_write(zone);
-    if (status) {
-        return status;
-    }
-
-    if (unlikely(slba != zone->w_ptr)) {
-        trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
-        return NVME_ZONE_INVALID_WRITE;
-    }
-
-    if (unlikely((slba + nlb) > zcap)) {
-        trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
-        return NVME_ZONE_BOUNDARY_ERROR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
-{
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_EMPTY:
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-    case NVME_ZONE_STATE_FULL:
-    case NVME_ZONE_STATE_CLOSED:
-    case NVME_ZONE_STATE_READ_ONLY:
-        return NVME_SUCCESS;
-    case NVME_ZONE_STATE_OFFLINE:
-        trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
-        return NVME_ZONE_OFFLINE;
-    default:
-        assert(false);
-    }
-
-    return NVME_INTERNAL_DEV_ERROR;
-}
-
-static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
-                                     uint32_t nlb)
-{
-    NvmeZone *zone = nvme_get_zone_by_slba(ns, slba);
-    uint64_t bndry = nvme_zone_rd_boundary(ns, zone);
-    uint64_t end = slba + nlb;
-    uint16_t status;
-
-    status = nvme_check_zone_state_for_read(zone);
-    if (status) {
-        ;
-    } else if (unlikely(end > bndry)) {
-        if (!ns->params.cross_zone_read) {
-            status = NVME_ZONE_BOUNDARY_ERROR;
-        } else {
-            /*
-             * Read across zone boundary - check that all subsequent
-             * zones that are being read have an appropriate state.
-             */
-            do {
-                zone++;
-                status = nvme_check_zone_state_for_read(zone);
-                if (status) {
-                    break;
-                }
-            } while (end > nvme_zone_rd_boundary(ns, zone));
-        }
-    }
-
-    return status;
-}
-
-static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
-{
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_FULL:
-        return NVME_SUCCESS;
-
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-        nvme_aor_dec_open(ns);
-        /* fallthrough */
-    case NVME_ZONE_STATE_CLOSED:
-        nvme_aor_dec_active(ns);
-        /* fallthrough */
-    case NVME_ZONE_STATE_EMPTY:
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
-        return NVME_SUCCESS;
-
-    default:
-        return NVME_ZONE_INVAL_TRANSITION;
-    }
-}
-
-static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
-{
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-        nvme_aor_dec_open(ns);
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
-        /* fall through */
-    case NVME_ZONE_STATE_CLOSED:
-        return NVME_SUCCESS;
-
-    default:
-        return NVME_ZONE_INVAL_TRANSITION;
-    }
-}
-
-static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
-{
-    NvmeZone *zone;
-
-    if (ns->params.max_open_zones &&
-        ns->nr_open_zones == ns->params.max_open_zones) {
-        zone = QTAILQ_FIRST(&ns->imp_open_zones);
-        if (zone) {
-            /*
-             * Automatically close this implicitly open zone.
-             */
-            QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
-            nvme_zrm_close(ns, zone);
-        }
-    }
-}
-
-static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone,
-                                bool implicit)
-{
-    int act = 0;
-    uint16_t status;
-
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_EMPTY:
-        act = 1;
-
-        /* fallthrough */
-
-    case NVME_ZONE_STATE_CLOSED:
-        nvme_zrm_auto_transition_zone(ns);
-        status = nvme_aor_check(ns, act, 1);
-        if (status) {
-            return status;
-        }
-
-        if (act) {
-            nvme_aor_inc_active(ns);
-        }
-
-        nvme_aor_inc_open(ns);
-
-        if (implicit) {
-            nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
-            return NVME_SUCCESS;
-        }
-
-        /* fallthrough */
-
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-        if (implicit) {
-            return NVME_SUCCESS;
-        }
-
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
-
-        /* fallthrough */
-
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-        return NVME_SUCCESS;
-
-    default:
-        return NVME_ZONE_INVAL_TRANSITION;
-    }
-}
-
-static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone)
-{
-    return __nvme_zrm_open(ns, zone, true);
-}
-
-static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone)
-{
-    return __nvme_zrm_open(ns, zone, false);
-}
-
-static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
-                                   uint32_t nlb)
-{
-    zone->d.wp += nlb;
-
-    if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
-        nvme_zrm_finish(ns, zone);
-    }
-}
-
-static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeZone *zone;
-    uint64_t slba;
-    uint32_t nlb;
-
-    slba = le64_to_cpu(rw->slba);
-    nlb = le16_to_cpu(rw->nlb) + 1;
-    zone = nvme_get_zone_by_slba(ns, slba);
-
-    __nvme_advance_zone_wp(ns, zone, nlb);
-}
-
-static inline bool nvme_is_write(NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-
-    return rw->opcode == NVME_CMD_WRITE ||
-           rw->opcode == NVME_CMD_ZONE_APPEND ||
-           rw->opcode == NVME_CMD_WRITE_ZEROES;
-}
-
-static void nvme_misc_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-
-    BlockBackend *blk = ns->blkconf.blk;
-    BlockAcctCookie *acct = &req->acct;
-    BlockAcctStats *stats = blk_get_stats(blk);
-
-    trace_pci_nvme_misc_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
-    } else {
-        block_acct_done(stats, acct);
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-void nvme_rw_complete_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    BlockAcctCookie *acct = &req->acct;
-    BlockAcctStats *stats = blk_get_stats(blk);
-
-    trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
-    } else {
-        block_acct_done(stats, acct);
-    }
-
-    if (ns->params.zoned && nvme_is_write(req)) {
-        nvme_finalize_zoned_write(ns, req);
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_rw_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-
-    BlockBackend *blk = ns->blkconf.blk;
-
-    trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        goto out;
-    }
-
-    if (nvme_msize(ns)) {
-        NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-        uint64_t slba = le64_to_cpu(rw->slba);
-        uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
-        uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
-
-        if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
-            size_t mlen = nvme_m2b(ns, nlb);
-
-            req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
-                                               BDRV_REQ_MAY_UNMAP,
-                                               nvme_rw_complete_cb, req);
-            return;
-        }
-
-        if (nvme_ns_ext(ns) || req->cmd.mptr) {
-            uint16_t status;
-
-            nvme_sg_unmap(&req->sg);
-            status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
-            if (status) {
-                ret = -EFAULT;
-                goto out;
-            }
-
-            if (req->cmd.opcode == NVME_CMD_READ) {
-                return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req);
-            }
-
-            return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req);
-        }
-    }
-
-out:
-    nvme_rw_complete_cb(req, ret);
-}
-
-struct nvme_aio_format_ctx {
-    NvmeRequest   *req;
-    NvmeNamespace *ns;
-
-    /* number of outstanding write zeroes for this namespace */
-    int *count;
-};
-
-static void nvme_aio_format_cb(void *opaque, int ret)
-{
-    struct nvme_aio_format_ctx *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = ctx->ns;
-    uintptr_t *num_formats = (uintptr_t *)&req->opaque;
-    int *count = ctx->count;
-
-    g_free(ctx);
-
-    if (ret) {
-        nvme_aio_err(req, ret);
-    }
-
-    if (--(*count)) {
-        return;
-    }
-
-    g_free(count);
-    ns->status = 0x0;
-
-    if (--(*num_formats)) {
-        return;
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-struct nvme_aio_flush_ctx {
-    NvmeRequest     *req;
-    NvmeNamespace   *ns;
-    BlockAcctCookie acct;
-};
-
-static void nvme_aio_flush_cb(void *opaque, int ret)
-{
-    struct nvme_aio_flush_ctx *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
-
-    BlockBackend *blk = ctx->ns->blkconf.blk;
-    BlockAcctCookie *acct = &ctx->acct;
-    BlockAcctStats *stats = blk_get_stats(blk);
-
-    trace_pci_nvme_aio_flush_cb(nvme_cid(req), blk_name(blk));
-
-    if (!ret) {
-        block_acct_done(stats, acct);
-    } else {
-        block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
-    }
-
-    (*num_flushes)--;
-    g_free(ctx);
-
-    if (*num_flushes) {
-        return;
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_verify_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    BlockAcctCookie *acct = &req->acct;
-    BlockAcctStats *stats = blk_get_stats(blk);
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint16_t apptag = le16_to_cpu(rw->apptag);
-    uint16_t appmask = le16_to_cpu(rw->appmask);
-    uint32_t reftag = le32_to_cpu(rw->reftag);
-    uint16_t status;
-
-    trace_pci_nvme_verify_cb(nvme_cid(req), NVME_RW_PRINFO(ctrl), apptag,
-                             appmask, reftag);
-
-    if (ret) {
-        block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
-        goto out;
-    }
-
-    block_acct_done(stats, acct);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
-                                       ctx->mdata.iov.size, slba);
-        if (status) {
-            req->status = status;
-            goto out;
-        }
-
-        req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
-                                     ctx->mdata.bounce, ctx->mdata.iov.size,
-                                     ctrl, slba, apptag, appmask, reftag);
-    }
-
-out:
-    qemu_iovec_destroy(&ctx->data.iov);
-    g_free(ctx->data.bounce);
-
-    qemu_iovec_destroy(&ctx->mdata.iov);
-    g_free(ctx->mdata.bounce);
-
-    g_free(ctx);
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-
-static void nvme_verify_mdata_in_cb(void *opaque, int ret)
-{
-    NvmeBounceContext *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-    size_t mlen = nvme_m2b(ns, nlb);
-    uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
-    BlockBackend *blk = ns->blkconf.blk;
-
-    trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
-
-    if (ret) {
-        goto out;
-    }
-
-    ctx->mdata.bounce = g_malloc(mlen);
-
-    qemu_iovec_reset(&ctx->mdata.iov);
-    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
-
-    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
-                                nvme_verify_cb, ctx);
-    return;
-
-out:
-    nvme_verify_cb(ctx, ret);
-}
-
-static void nvme_aio_discard_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    uintptr_t *discards = (uintptr_t *)&req->opaque;
-
-    trace_pci_nvme_aio_discard_cb(nvme_cid(req));
-
-    if (ret) {
-        nvme_aio_err(req, ret);
-    }
-
-    (*discards)--;
-
-    if (*discards) {
-        return;
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-struct nvme_zone_reset_ctx {
-    NvmeRequest *req;
-    NvmeZone    *zone;
-};
-
-static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret)
-{
-    struct nvme_zone_reset_ctx *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeZone *zone = ctx->zone;
-    uintptr_t *resets = (uintptr_t *)&req->opaque;
-
-    if (ret) {
-        nvme_aio_err(req, ret);
-        goto out;
-    }
-
-    switch (nvme_get_zone_state(zone)) {
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-        nvme_aor_dec_open(ns);
-        /* fall through */
-    case NVME_ZONE_STATE_CLOSED:
-        nvme_aor_dec_active(ns);
-        /* fall through */
-    case NVME_ZONE_STATE_FULL:
-        zone->w_ptr = zone->d.zslba;
-        zone->d.wp = zone->w_ptr;
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
-        /* fall through */
-    default:
-        break;
-    }
-
-out:
-    g_free(ctx);
-
-    (*resets)--;
-
-    if (*resets) {
-        return;
-    }
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_aio_zone_reset_cb(void *opaque, int ret)
-{
-    struct nvme_zone_reset_ctx *ctx = opaque;
-    NvmeRequest *req = ctx->req;
-    NvmeNamespace *ns = req->ns;
-    NvmeZone *zone = ctx->zone;
-
-    trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);
-
-    if (ret) {
-        goto out;
-    }
-
-    if (nvme_msize(ns)) {
-        int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba);
-
-        blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
-                              nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
-                              nvme_aio_zone_reset_complete_cb, ctx);
-        return;
-    }
-
-out:
-    nvme_aio_zone_reset_complete_cb(opaque, ret);
-}
-
-struct nvme_copy_ctx {
-    int copies;
-    uint8_t *bounce;
-    uint8_t *mbounce;
-    uint32_t nlb;
-    NvmeCopySourceRange *ranges;
-};
-
-struct nvme_copy_in_ctx {
-    NvmeRequest *req;
-    QEMUIOVector iov;
-    NvmeCopySourceRange *range;
-};
-
-static void nvme_copy_complete_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-    struct nvme_copy_ctx *ctx = req->opaque;
-
-    if (ret) {
-        block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
-        nvme_aio_err(req, ret);
-        goto out;
-    }
-
-    block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
-
-out:
-    if (ns->params.zoned) {
-        NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
-        uint64_t sdlba = le64_to_cpu(copy->sdlba);
-        NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
-
-        __nvme_advance_zone_wp(ns, zone, ctx->nlb);
-    }
-
-    g_free(ctx->bounce);
-    g_free(ctx->mbounce);
-    g_free(ctx);
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_copy_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-    struct nvme_copy_ctx *ctx = req->opaque;
-
-    trace_pci_nvme_copy_cb(nvme_cid(req));
-
-    if (ret) {
-        goto out;
-    }
-
-    if (nvme_msize(ns)) {
-        NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
-        uint64_t sdlba = le64_to_cpu(copy->sdlba);
-        int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba);
-
-        qemu_iovec_reset(&req->sg.iov);
-        qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb));
-
-        req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &req->sg.iov, 0,
-                                     nvme_copy_complete_cb, req);
-        return;
-    }
-
-out:
-    nvme_copy_complete_cb(opaque, ret);
-}
-
-static void nvme_copy_in_complete(NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
-    struct nvme_copy_ctx *ctx = req->opaque;
-    uint64_t sdlba = le64_to_cpu(copy->sdlba);
-    uint16_t status;
-
-    trace_pci_nvme_copy_in_complete(nvme_cid(req));
-
-    block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        uint16_t prinfor = (copy->control[0] >> 4) & 0xf;
-        uint16_t prinfow = (copy->control[2] >> 2) & 0xf;
-        uint16_t nr = copy->nr + 1;
-        NvmeCopySourceRange *range;
-        uint64_t slba;
-        uint32_t nlb;
-        uint16_t apptag, appmask;
-        uint32_t reftag;
-        uint8_t *buf = ctx->bounce, *mbuf = ctx->mbounce;
-        size_t len, mlen;
-        int i;
-
-        /*
-         * The dif helpers expects prinfo to be similar to the control field of
-         * the NvmeRwCmd, so shift by 10 to fake it.
-         */
-        prinfor = prinfor << 10;
-        prinfow = prinfow << 10;
-
-        for (i = 0; i < nr; i++) {
-            range = &ctx->ranges[i];
-            slba = le64_to_cpu(range->slba);
-            nlb = le16_to_cpu(range->nlb) + 1;
-            len = nvme_l2b(ns, nlb);
-            mlen = nvme_m2b(ns, nlb);
-            apptag = le16_to_cpu(range->apptag);
-            appmask = le16_to_cpu(range->appmask);
-            reftag = le32_to_cpu(range->reftag);
-
-            status = nvme_dif_check(ns, buf, len, mbuf, mlen, prinfor, slba,
-                                    apptag, appmask, reftag);
-            if (status) {
-                goto invalid;
-            }
-
-            buf += len;
-            mbuf += mlen;
-        }
-
-        apptag = le16_to_cpu(copy->apptag);
-        appmask = le16_to_cpu(copy->appmask);
-        reftag = le32_to_cpu(copy->reftag);
-
-        if (prinfow & NVME_RW_PRINFO_PRACT) {
-            size_t len = nvme_l2b(ns, ctx->nlb);
-            size_t mlen = nvme_m2b(ns, ctx->nlb);
-
-            status = nvme_check_prinfo(ns, prinfow, sdlba, reftag);
-            if (status) {
-                goto invalid;
-            }
-
-            nvme_dif_pract_generate_dif(ns, ctx->bounce, len, ctx->mbounce,
-                                        mlen, apptag, reftag);
-        } else {
-            status = nvme_dif_check(ns, ctx->bounce, len, ctx->mbounce, mlen,
-                                    prinfow, sdlba, apptag, appmask, reftag);
-            if (status) {
-                goto invalid;
-            }
-        }
-    }
-
-    status = nvme_check_bounds(ns, sdlba, ctx->nlb);
-    if (status) {
-        trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
-        goto invalid;
-    }
-
-    if (ns->params.zoned) {
-        NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
-
-        status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb);
-        if (status) {
-            goto invalid;
-        }
-
-        status = nvme_zrm_auto(ns, zone);
-        if (status) {
-            goto invalid;
-        }
-
-        zone->w_ptr += ctx->nlb;
-    }
-
-    qemu_iovec_init(&req->sg.iov, 1);
-    qemu_iovec_add(&req->sg.iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
-
-    block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
-                     BLOCK_ACCT_WRITE);
-
-    req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
-                                 &req->sg.iov, 0, nvme_copy_cb, req);
-
-    return;
-
-invalid:
-    req->status = status;
-
-    g_free(ctx->bounce);
-    g_free(ctx);
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_aio_copy_in_cb(void *opaque, int ret)
-{
-    struct nvme_copy_in_ctx *in_ctx = opaque;
-    NvmeRequest *req = in_ctx->req;
-    NvmeNamespace *ns = req->ns;
-    struct nvme_copy_ctx *ctx = req->opaque;
-
-    qemu_iovec_destroy(&in_ctx->iov);
-    g_free(in_ctx);
-
-    trace_pci_nvme_aio_copy_in_cb(nvme_cid(req));
-
-    if (ret) {
-        nvme_aio_err(req, ret);
-    }
-
-    ctx->copies--;
-
-    if (ctx->copies) {
-        return;
-    }
-
-    if (req->status) {
-        block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
-
-        g_free(ctx->bounce);
-        g_free(ctx->mbounce);
-        g_free(ctx);
-
-        nvme_enqueue_req_completion(nvme_cq(req), req);
-
-        return;
-    }
-
-    nvme_copy_in_complete(req);
-}
-
-struct nvme_compare_ctx {
-    struct {
-        QEMUIOVector iov;
-        uint8_t *bounce;
-    } data;
-
-    struct {
-        QEMUIOVector iov;
-        uint8_t *bounce;
-    } mdata;
-};
-
-static void nvme_compare_mdata_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeNamespace *ns = req->ns;
-    NvmeCtrl *n = nvme_ctrl(req);
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint16_t apptag = le16_to_cpu(rw->apptag);
-    uint16_t appmask = le16_to_cpu(rw->appmask);
-    uint32_t reftag = le32_to_cpu(rw->reftag);
-    struct nvme_compare_ctx *ctx = req->opaque;
-    g_autofree uint8_t *buf = NULL;
-    uint16_t status = NVME_SUCCESS;
-
-    trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
-
-    buf = g_malloc(ctx->mdata.iov.size);
-
-    status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
-                               NVME_TX_DIRECTION_TO_DEVICE, req);
-    if (status) {
-        req->status = status;
-        goto out;
-    }
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        uint64_t slba = le64_to_cpu(rw->slba);
-        uint8_t *bufp;
-        uint8_t *mbufp = ctx->mdata.bounce;
-        uint8_t *end = mbufp + ctx->mdata.iov.size;
-        size_t msize = nvme_msize(ns);
-        int16_t pil = 0;
-
-        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
-                                ctx->mdata.bounce, ctx->mdata.iov.size, ctrl,
-                                slba, apptag, appmask, reftag);
-        if (status) {
-            req->status = status;
-            goto out;
-        }
-
-        /*
-         * When formatted with protection information, do not compare the DIF
-         * tuple.
-         */
-        if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
-            pil = nvme_msize(ns) - sizeof(NvmeDifTuple);
-        }
-
-        for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) {
-            if (memcmp(bufp + pil, mbufp + pil, msize - pil)) {
-                req->status = NVME_CMP_FAILURE;
-                goto out;
-            }
-        }
-
-        goto out;
-    }
-
-    if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
-        req->status = NVME_CMP_FAILURE;
-        goto out;
-    }
-
-out:
-    qemu_iovec_destroy(&ctx->data.iov);
-    g_free(ctx->data.bounce);
-
-    qemu_iovec_destroy(&ctx->mdata.iov);
-    g_free(ctx->mdata.bounce);
-
-    g_free(ctx);
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static void nvme_compare_data_cb(void *opaque, int ret)
-{
-    NvmeRequest *req = opaque;
-    NvmeCtrl *n = nvme_ctrl(req);
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    BlockAcctCookie *acct = &req->acct;
-    BlockAcctStats *stats = blk_get_stats(blk);
-
-    struct nvme_compare_ctx *ctx = req->opaque;
-    g_autofree uint8_t *buf = NULL;
-    uint16_t status;
-
-    trace_pci_nvme_compare_data_cb(nvme_cid(req));
-
-    if (ret) {
-        block_acct_failed(stats, acct);
-        nvme_aio_err(req, ret);
-        goto out;
-    }
-
-    buf = g_malloc(ctx->data.iov.size);
-
-    status = nvme_bounce_data(n, buf, ctx->data.iov.size,
-                              NVME_TX_DIRECTION_TO_DEVICE, req);
-    if (status) {
-        req->status = status;
-        goto out;
-    }
-
-    if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
-        req->status = NVME_CMP_FAILURE;
-        goto out;
-    }
-
-    if (nvme_msize(ns)) {
-        NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-        uint64_t slba = le64_to_cpu(rw->slba);
-        uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-        size_t mlen = nvme_m2b(ns, nlb);
-        uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba);
-
-        ctx->mdata.bounce = g_malloc(mlen);
-
-        qemu_iovec_init(&ctx->mdata.iov, 1);
-        qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
-
-        req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
-                                    nvme_compare_mdata_cb, req);
-        return;
-    }
-
-    block_acct_done(stats, acct);
-
-out:
-    qemu_iovec_destroy(&ctx->data.iov);
-    g_free(ctx->data.bounce);
-    g_free(ctx);
-
-    nvme_enqueue_req_completion(nvme_cq(req), req);
-}
-
-static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
-
-    uint32_t attr = le32_to_cpu(dsm->attributes);
-    uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
-
-    uint16_t status = NVME_SUCCESS;
-
-    trace_pci_nvme_dsm(nvme_cid(req), nvme_nsid(ns), nr, attr);
-
-    if (attr & NVME_DSMGMT_AD) {
-        int64_t offset;
-        size_t len;
-        NvmeDsmRange range[nr];
-        uintptr_t *discards = (uintptr_t *)&req->opaque;
-
-        status = nvme_h2c(n, (uint8_t *)range, sizeof(range), req);
-        if (status) {
-            return status;
-        }
-
-        /*
-         * AIO callbacks may be called immediately, so initialize discards to 1
-         * to make sure the the callback does not complete the request before
-         * all discards have been issued.
-         */
-        *discards = 1;
-
-        for (int i = 0; i < nr; i++) {
-            uint64_t slba = le64_to_cpu(range[i].slba);
-            uint32_t nlb = le32_to_cpu(range[i].nlb);
-
-            if (nvme_check_bounds(ns, slba, nlb)) {
-                trace_pci_nvme_err_invalid_lba_range(slba, nlb,
-                                                     ns->id_ns.nsze);
-                continue;
-            }
-
-            trace_pci_nvme_dsm_deallocate(nvme_cid(req), nvme_nsid(ns), slba,
-                                          nlb);
-
-            if (nlb > n->dmrsl) {
-                trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
-            }
-
-            offset = nvme_l2b(ns, slba);
-            len = nvme_l2b(ns, nlb);
-
-            while (len) {
-                size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
-
-                (*discards)++;
-
-                blk_aio_pdiscard(ns->blkconf.blk, offset, bytes,
-                                 nvme_aio_discard_cb, req);
-
-                offset += bytes;
-                len -= bytes;
-            }
-        }
-
-        /* account for the 1-initialization */
-        (*discards)--;
-
-        if (*discards) {
-            status = NVME_NO_COMPLETE;
-        } else {
-            status = req->status;
-        }
-    }
-
-    return status;
-}
-
-static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-    size_t len = nvme_l2b(ns, nlb);
-    int64_t offset = nvme_l2b(ns, slba);
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint32_t reftag = le32_to_cpu(rw->reftag);
-    NvmeBounceContext *ctx = NULL;
-    uint16_t status;
-
-    trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        status = nvme_check_prinfo(ns, ctrl, slba, reftag);
-        if (status) {
-            return status;
-        }
-
-        if (ctrl & NVME_RW_PRINFO_PRACT) {
-            return NVME_INVALID_PROT_INFO | NVME_DNR;
-        }
-    }
-
-    if (len > n->page_size << n->params.vsl) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    status = nvme_check_bounds(ns, slba, nlb);
-    if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        return status;
-    }
-
-    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
-        status = nvme_check_dulbe(ns, slba, nlb);
-        if (status) {
-            return status;
-        }
-    }
-
-    ctx = g_new0(NvmeBounceContext, 1);
-    ctx->req = req;
-
-    ctx->data.bounce = g_malloc(len);
-
-    qemu_iovec_init(&ctx->data.iov, 1);
-    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
-
-    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
-                     BLOCK_ACCT_READ);
-
-    req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
-                                nvme_verify_mdata_in_cb, ctx);
-    return NVME_NO_COMPLETE;
-}
-
-static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns = req->ns;
-    NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
-
-    uint16_t nr = copy->nr + 1;
-    uint8_t format = copy->control[0] & 0xf;
-
-    /*
-     * Shift the PRINFOR/PRINFOW values by 10 to allow reusing the
-     * NVME_RW_PRINFO constants.
-     */
-    uint16_t prinfor = ((copy->control[0] >> 4) & 0xf) << 10;
-    uint16_t prinfow = ((copy->control[2] >> 2) & 0xf) << 10;
-
-    uint32_t nlb = 0;
-    uint8_t *bounce = NULL, *bouncep = NULL;
-    uint8_t *mbounce = NULL, *mbouncep = NULL;
-    struct nvme_copy_ctx *ctx;
-    uint16_t status;
-    int i;
-
-    trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
-        ((prinfor & NVME_RW_PRINFO_PRACT) != (prinfow & NVME_RW_PRINFO_PRACT))) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (!(n->id_ctrl.ocfs & (1 << format))) {
-        trace_pci_nvme_err_copy_invalid_format(format);
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (nr > ns->id_ns.msrc + 1) {
-        return NVME_CMD_SIZE_LIMIT | NVME_DNR;
-    }
-
-    ctx = g_new(struct nvme_copy_ctx, 1);
-    ctx->ranges = g_new(NvmeCopySourceRange, nr);
-
-    status = nvme_h2c(n, (uint8_t *)ctx->ranges,
-                      nr * sizeof(NvmeCopySourceRange), req);
-    if (status) {
-        goto out;
-    }
-
-    for (i = 0; i < nr; i++) {
-        uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
-        uint32_t _nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
-
-        if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
-            status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
-            goto out;
-        }
-
-        status = nvme_check_bounds(ns, slba, _nlb);
-        if (status) {
-            trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
-            goto out;
-        }
-
-        if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
-            status = nvme_check_dulbe(ns, slba, _nlb);
-            if (status) {
-                goto out;
-            }
-        }
-
-        if (ns->params.zoned) {
-            status = nvme_check_zone_read(ns, slba, _nlb);
-            if (status) {
-                goto out;
-            }
-        }
-
-        nlb += _nlb;
-    }
-
-    if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
-        status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
-        goto out;
-    }
-
-    bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
-    if (nvme_msize(ns)) {
-        mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb));
-    }
-
-    block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
-                     BLOCK_ACCT_READ);
-
-    ctx->bounce = bounce;
-    ctx->mbounce = mbounce;
-    ctx->nlb = nlb;
-    ctx->copies = 1;
-
-    req->opaque = ctx;
-
-    for (i = 0; i < nr; i++) {
-        uint64_t slba = le64_to_cpu(ctx->ranges[i].slba);
-        uint32_t nlb = le16_to_cpu(ctx->ranges[i].nlb) + 1;
-
-        size_t len = nvme_l2b(ns, nlb);
-        int64_t offset = nvme_l2b(ns, slba);
-
-        trace_pci_nvme_copy_source_range(slba, nlb);
-
-        struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1);
-        in_ctx->req = req;
-
-        qemu_iovec_init(&in_ctx->iov, 1);
-        qemu_iovec_add(&in_ctx->iov, bouncep, len);
-
-        ctx->copies++;
-
-        blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
-                       nvme_aio_copy_in_cb, in_ctx);
-
-        bouncep += len;
-
-        if (nvme_msize(ns)) {
-            len = nvme_m2b(ns, nlb);
-            offset = ns->mdata_offset + nvme_m2b(ns, slba);
-
-            in_ctx = g_new(struct nvme_copy_in_ctx, 1);
-            in_ctx->req = req;
-
-            qemu_iovec_init(&in_ctx->iov, 1);
-            qemu_iovec_add(&in_ctx->iov, mbouncep, len);
-
-            ctx->copies++;
-
-            blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
-                           nvme_aio_copy_in_cb, in_ctx);
-
-            mbouncep += len;
-        }
-    }
-
-    /* account for the 1-initialization */
-    ctx->copies--;
-
-    if (!ctx->copies) {
-        nvme_copy_in_complete(req);
-    }
-
-    return NVME_NO_COMPLETE;
-
-out:
-    g_free(ctx->ranges);
-    g_free(ctx);
-
-    return status;
-}
-
-static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    size_t data_len = nvme_l2b(ns, nlb);
-    size_t len = data_len;
-    int64_t offset = nvme_l2b(ns, slba);
-    struct nvme_compare_ctx *ctx = NULL;
-    uint16_t status;
-
-    trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (ctrl & NVME_RW_PRINFO_PRACT)) {
-        return NVME_INVALID_PROT_INFO | NVME_DNR;
-    }
-
-    if (nvme_ns_ext(ns)) {
-        len += nvme_m2b(ns, nlb);
-    }
-
-    status = nvme_check_mdts(n, len);
-    if (status) {
-        return status;
-    }
-
-    status = nvme_check_bounds(ns, slba, nlb);
-    if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        return status;
-    }
-
-    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
-        status = nvme_check_dulbe(ns, slba, nlb);
-        if (status) {
-            return status;
-        }
-    }
-
-    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
-    if (status) {
-        return status;
-    }
-
-    ctx = g_new(struct nvme_compare_ctx, 1);
-    ctx->data.bounce = g_malloc(data_len);
-
-    req->opaque = ctx;
-
-    qemu_iovec_init(&ctx->data.iov, 1);
-    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
-
-    block_acct_start(blk_get_stats(blk), &req->acct, data_len,
-                     BLOCK_ACCT_READ);
-    req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
-                                nvme_compare_data_cb, req);
-
-    return NVME_NO_COMPLETE;
-}
-
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    uintptr_t *num_flushes = (uintptr_t *)&req->opaque;
-    uint16_t status;
-    struct nvme_aio_flush_ctx *ctx;
-    NvmeNamespace *ns;
-
-    trace_pci_nvme_flush(nvme_cid(req), nsid);
-
-    if (nsid != NVME_NSID_BROADCAST) {
-        req->ns = nvme_ns(n, nsid);
-        if (unlikely(!req->ns)) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        block_acct_start(blk_get_stats(req->ns->blkconf.blk), &req->acct, 0,
-                         BLOCK_ACCT_FLUSH);
-        req->aiocb = blk_aio_flush(req->ns->blkconf.blk, nvme_misc_cb, req);
-        return NVME_NO_COMPLETE;
-    }
-
-    /* 1-initialize; see comment in nvme_dsm */
-    *num_flushes = 1;
-
-    for (int i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        ctx = g_new(struct nvme_aio_flush_ctx, 1);
-        ctx->req = req;
-        ctx->ns = ns;
-
-        (*num_flushes)++;
-
-        block_acct_start(blk_get_stats(ns->blkconf.blk), &ctx->acct, 0,
-                         BLOCK_ACCT_FLUSH);
-        blk_aio_flush(ns->blkconf.blk, nvme_aio_flush_cb, ctx);
-    }
-
-    /* account for the 1-initialization */
-    (*num_flushes)--;
-
-    if (*num_flushes) {
-        status = NVME_NO_COMPLETE;
-    } else {
-        status = req->status;
-    }
-
-    return status;
-}
-
-static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint64_t data_size = nvme_l2b(ns, nlb);
-    uint64_t mapped_size = data_size;
-    uint64_t data_offset;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint16_t status;
-
-    if (nvme_ns_ext(ns)) {
-        mapped_size += nvme_m2b(ns, nlb);
-
-        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-            bool pract = ctrl & NVME_RW_PRINFO_PRACT;
-
-            if (pract && nvme_msize(ns) == 8) {
-                mapped_size = data_size;
-            }
-        }
-    }
-
-    trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
-
-    status = nvme_check_mdts(n, mapped_size);
-    if (status) {
-        goto invalid;
-    }
-
-    status = nvme_check_bounds(ns, slba, nlb);
-    if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        goto invalid;
-    }
-
-    if (ns->params.zoned) {
-        status = nvme_check_zone_read(ns, slba, nlb);
-        if (status) {
-            trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
-            goto invalid;
-        }
-    }
-
-    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
-        status = nvme_check_dulbe(ns, slba, nlb);
-        if (status) {
-            goto invalid;
-        }
-    }
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        return nvme_dif_rw(n, req);
-    }
-
-    status = nvme_map_data(n, nlb, req);
-    if (status) {
-        goto invalid;
-    }
-
-    data_offset = nvme_l2b(ns, slba);
-
-    block_acct_start(blk_get_stats(blk), &req->acct, data_size,
-                     BLOCK_ACCT_READ);
-    nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
-    return NVME_NO_COMPLETE;
-
-invalid:
-    block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
-    return status | NVME_DNR;
-}
-
-static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
-                              bool wrz)
-{
-    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    uint64_t slba = le64_to_cpu(rw->slba);
-    uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
-    uint16_t ctrl = le16_to_cpu(rw->control);
-    uint64_t data_size = nvme_l2b(ns, nlb);
-    uint64_t mapped_size = data_size;
-    uint64_t data_offset;
-    NvmeZone *zone;
-    NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint16_t status;
-
-    if (nvme_ns_ext(ns)) {
-        mapped_size += nvme_m2b(ns, nlb);
-
-        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-            bool pract = ctrl & NVME_RW_PRINFO_PRACT;
-
-            if (pract && nvme_msize(ns) == 8) {
-                mapped_size -= nvme_m2b(ns, nlb);
-            }
-        }
-    }
-
-    trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
-                         nvme_nsid(ns), nlb, mapped_size, slba);
-
-    if (!wrz) {
-        status = nvme_check_mdts(n, mapped_size);
-        if (status) {
-            goto invalid;
-        }
-    }
-
-    status = nvme_check_bounds(ns, slba, nlb);
-    if (status) {
-        trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        goto invalid;
-    }
-
-    if (ns->params.zoned) {
-        zone = nvme_get_zone_by_slba(ns, slba);
-
-        if (append) {
-            bool piremap = !!(ctrl & NVME_RW_PIREMAP);
-
-            if (unlikely(slba != zone->d.zslba)) {
-                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
-                status = NVME_INVALID_FIELD;
-                goto invalid;
-            }
-
-            if (n->params.zasl &&
-                data_size > (uint64_t)n->page_size << n->params.zasl) {
-                trace_pci_nvme_err_zasl(data_size);
-                return NVME_INVALID_FIELD | NVME_DNR;
-            }
-
-            slba = zone->w_ptr;
-            rw->slba = cpu_to_le64(slba);
-            res->slba = cpu_to_le64(slba);
-
-            switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-            case NVME_ID_NS_DPS_TYPE_1:
-                if (!piremap) {
-                    return NVME_INVALID_PROT_INFO | NVME_DNR;
-                }
-
-                /* fallthrough */
-
-            case NVME_ID_NS_DPS_TYPE_2:
-                if (piremap) {
-                    uint32_t reftag = le32_to_cpu(rw->reftag);
-                    rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
-                }
-
-                break;
-
-            case NVME_ID_NS_DPS_TYPE_3:
-                if (piremap) {
-                    return NVME_INVALID_PROT_INFO | NVME_DNR;
-                }
-
-                break;
-            }
-        }
-
-        status = nvme_check_zone_write(ns, zone, slba, nlb);
-        if (status) {
-            goto invalid;
-        }
-
-        status = nvme_zrm_auto(ns, zone);
-        if (status) {
-            goto invalid;
-        }
-
-        zone->w_ptr += nlb;
-    }
-
-    data_offset = nvme_l2b(ns, slba);
-
-    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
-        return nvme_dif_rw(n, req);
-    }
-
-    if (!wrz) {
-        status = nvme_map_data(n, nlb, req);
-        if (status) {
-            goto invalid;
-        }
-
-        block_acct_start(blk_get_stats(blk), &req->acct, data_size,
-                         BLOCK_ACCT_WRITE);
-        nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
-    } else {
-        req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
-                                           BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
-                                           req);
-    }
-
-    return NVME_NO_COMPLETE;
-
-invalid:
-    block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
-    return status | NVME_DNR;
-}
-
-static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
-{
-    return nvme_do_write(n, req, false, false);
-}
-
-static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
-{
-    return nvme_do_write(n, req, false, true);
-}
-
-static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
-{
-    return nvme_do_write(n, req, true, false);
-}
-
-static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
-                                            uint64_t *slba, uint32_t *zone_idx)
-{
-    uint32_t dw10 = le32_to_cpu(c->cdw10);
-    uint32_t dw11 = le32_to_cpu(c->cdw11);
-
-    if (!ns->params.zoned) {
-        trace_pci_nvme_err_invalid_opc(c->opcode);
-        return NVME_INVALID_OPCODE | NVME_DNR;
-    }
-
-    *slba = ((uint64_t)dw11) << 32 | dw10;
-    if (unlikely(*slba >= ns->id_ns.nsze)) {
-        trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
-        *slba = 0;
-        return NVME_LBA_RANGE | NVME_DNR;
-    }
-
-    *zone_idx = nvme_zone_idx(ns, *slba);
-    assert(*zone_idx < ns->num_zones);
-
-    return NVME_SUCCESS;
-}
-
-typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
-                                 NvmeRequest *);
-
-enum NvmeZoneProcessingMask {
-    NVME_PROC_CURRENT_ZONE    = 0,
-    NVME_PROC_OPENED_ZONES    = 1 << 0,
-    NVME_PROC_CLOSED_ZONES    = 1 << 1,
-    NVME_PROC_READ_ONLY_ZONES = 1 << 2,
-    NVME_PROC_FULL_ZONES      = 1 << 3,
-};
-
-static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
-                               NvmeZoneState state, NvmeRequest *req)
-{
-    return nvme_zrm_open(ns, zone);
-}
-
-static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
-                                NvmeZoneState state, NvmeRequest *req)
-{
-    return nvme_zrm_close(ns, zone);
-}
-
-static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
-                                 NvmeZoneState state, NvmeRequest *req)
-{
-    return nvme_zrm_finish(ns, zone);
-}
-
-static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
-                                NvmeZoneState state, NvmeRequest *req)
-{
-    uintptr_t *resets = (uintptr_t *)&req->opaque;
-    struct nvme_zone_reset_ctx *ctx;
-
-    switch (state) {
-    case NVME_ZONE_STATE_EMPTY:
-        return NVME_SUCCESS;
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-    case NVME_ZONE_STATE_CLOSED:
-    case NVME_ZONE_STATE_FULL:
-        break;
-    default:
-        return NVME_ZONE_INVAL_TRANSITION;
-    }
-
-    /*
-     * The zone reset aio callback needs to know the zone that is being reset
-     * in order to transition the zone on completion.
-     */
-    ctx = g_new(struct nvme_zone_reset_ctx, 1);
-    ctx->req = req;
-    ctx->zone = zone;
-
-    (*resets)++;
-
-    blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba),
-                          nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
-                          nvme_aio_zone_reset_cb, ctx);
-
-    return NVME_NO_COMPLETE;
-}
-
-static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
-                                  NvmeZoneState state, NvmeRequest *req)
-{
-    switch (state) {
-    case NVME_ZONE_STATE_READ_ONLY:
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE);
-        /* fall through */
-    case NVME_ZONE_STATE_OFFLINE:
-        return NVME_SUCCESS;
-    default:
-        return NVME_ZONE_INVAL_TRANSITION;
-    }
-}
-
-static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
-{
-    uint16_t status;
-    uint8_t state = nvme_get_zone_state(zone);
-
-    if (state == NVME_ZONE_STATE_EMPTY) {
-        status = nvme_aor_check(ns, 1, 0);
-        if (status) {
-            return status;
-        }
-        nvme_aor_inc_active(ns);
-        zone->d.za |= NVME_ZA_ZD_EXT_VALID;
-        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
-        return NVME_SUCCESS;
-    }
-
-    return NVME_ZONE_INVAL_TRANSITION;
-}
-
-static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
-                                    enum NvmeZoneProcessingMask proc_mask,
-                                    op_handler_t op_hndlr, NvmeRequest *req)
-{
-    uint16_t status = NVME_SUCCESS;
-    NvmeZoneState zs = nvme_get_zone_state(zone);
-    bool proc_zone;
-
-    switch (zs) {
-    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-        proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
-        break;
-    case NVME_ZONE_STATE_CLOSED:
-        proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
-        break;
-    case NVME_ZONE_STATE_READ_ONLY:
-        proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES;
-        break;
-    case NVME_ZONE_STATE_FULL:
-        proc_zone = proc_mask & NVME_PROC_FULL_ZONES;
-        break;
-    default:
-        proc_zone = false;
-    }
-
-    if (proc_zone) {
-        status = op_hndlr(ns, zone, zs, req);
-    }
-
-    return status;
-}
-
-static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
-                                enum NvmeZoneProcessingMask proc_mask,
-                                op_handler_t op_hndlr, NvmeRequest *req)
-{
-    NvmeZone *next;
-    uint16_t status = NVME_SUCCESS;
-    int i;
-
-    if (!proc_mask) {
-        status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
-    } else {
-        if (proc_mask & NVME_PROC_CLOSED_ZONES) {
-            QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
-                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
-                                             req);
-                if (status && status != NVME_NO_COMPLETE) {
-                    goto out;
-                }
-            }
-        }
-        if (proc_mask & NVME_PROC_OPENED_ZONES) {
-            QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
-                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
-                                             req);
-                if (status && status != NVME_NO_COMPLETE) {
-                    goto out;
-                }
-            }
-
-            QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
-                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
-                                             req);
-                if (status && status != NVME_NO_COMPLETE) {
-                    goto out;
-                }
-            }
-        }
-        if (proc_mask & NVME_PROC_FULL_ZONES) {
-            QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
-                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
-                                             req);
-                if (status && status != NVME_NO_COMPLETE) {
-                    goto out;
-                }
-            }
-        }
-
-        if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
-            for (i = 0; i < ns->num_zones; i++, zone++) {
-                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
-                                             req);
-                if (status && status != NVME_NO_COMPLETE) {
-                    goto out;
-                }
-            }
-        }
-    }
-
-out:
-    return status;
-}
-
-static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    NvmeZone *zone;
-    uintptr_t *resets;
-    uint8_t *zd_ext;
-    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
-    uint64_t slba = 0;
-    uint32_t zone_idx = 0;
-    uint16_t status;
-    uint8_t action;
-    bool all;
-    enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
-
-    action = dw13 & 0xff;
-    all = dw13 & 0x100;
-
-    req->status = NVME_SUCCESS;
-
-    if (!all) {
-        status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
-        if (status) {
-            return status;
-        }
-    }
-
-    zone = &ns->zone_array[zone_idx];
-    if (slba != zone->d.zslba) {
-        trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    switch (action) {
-
-    case NVME_ZONE_ACTION_OPEN:
-        if (all) {
-            proc_mask = NVME_PROC_CLOSED_ZONES;
-        }
-        trace_pci_nvme_open_zone(slba, zone_idx, all);
-        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
-        break;
-
-    case NVME_ZONE_ACTION_CLOSE:
-        if (all) {
-            proc_mask = NVME_PROC_OPENED_ZONES;
-        }
-        trace_pci_nvme_close_zone(slba, zone_idx, all);
-        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
-        break;
-
-    case NVME_ZONE_ACTION_FINISH:
-        if (all) {
-            proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
-        }
-        trace_pci_nvme_finish_zone(slba, zone_idx, all);
-        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
-        break;
-
-    case NVME_ZONE_ACTION_RESET:
-        resets = (uintptr_t *)&req->opaque;
-
-        if (all) {
-            proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
-                NVME_PROC_FULL_ZONES;
-        }
-        trace_pci_nvme_reset_zone(slba, zone_idx, all);
-
-        *resets = 1;
-
-        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req);
-
-        (*resets)--;
-
-        return *resets ? NVME_NO_COMPLETE : req->status;
-
-    case NVME_ZONE_ACTION_OFFLINE:
-        if (all) {
-            proc_mask = NVME_PROC_READ_ONLY_ZONES;
-        }
-        trace_pci_nvme_offline_zone(slba, zone_idx, all);
-        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
-        break;
-
-    case NVME_ZONE_ACTION_SET_ZD_EXT:
-        trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
-        if (all || !ns->params.zd_extension_size) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-        zd_ext = nvme_get_zd_extension(ns, zone_idx);
-        status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
-        if (status) {
-            trace_pci_nvme_err_zd_extension_map_error(zone_idx);
-            return status;
-        }
-
-        status = nvme_set_zd_ext(ns, zone);
-        if (status == NVME_SUCCESS) {
-            trace_pci_nvme_zd_extension_set(zone_idx);
-            return status;
-        }
-        break;
-
-    default:
-        trace_pci_nvme_err_invalid_mgmt_action(action);
-        status = NVME_INVALID_FIELD;
-    }
-
-    if (status == NVME_ZONE_INVAL_TRANSITION) {
-        trace_pci_nvme_err_invalid_zone_state_transition(action, slba,
-                                                         zone->d.za);
-    }
-    if (status) {
-        status |= NVME_DNR;
-    }
-
-    return status;
-}
-
-static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
-{
-    NvmeZoneState zs = nvme_get_zone_state(zl);
-
-    switch (zafs) {
-    case NVME_ZONE_REPORT_ALL:
-        return true;
-    case NVME_ZONE_REPORT_EMPTY:
-        return zs == NVME_ZONE_STATE_EMPTY;
-    case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
-        return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN;
-    case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
-        return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN;
-    case NVME_ZONE_REPORT_CLOSED:
-        return zs == NVME_ZONE_STATE_CLOSED;
-    case NVME_ZONE_REPORT_FULL:
-        return zs == NVME_ZONE_STATE_FULL;
-    case NVME_ZONE_REPORT_READ_ONLY:
-        return zs == NVME_ZONE_STATE_READ_ONLY;
-    case NVME_ZONE_REPORT_OFFLINE:
-        return zs == NVME_ZONE_STATE_OFFLINE;
-    default:
-        return false;
-    }
-}
-
-static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
-    NvmeNamespace *ns = req->ns;
-    /* cdw12 is zero-based number of dwords to return. Convert to bytes */
-    uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
-    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
-    uint32_t zone_idx, zra, zrasf, partial;
-    uint64_t max_zones, nr_zones = 0;
-    uint16_t status;
-    uint64_t slba;
-    NvmeZoneDescr *z;
-    NvmeZone *zone;
-    NvmeZoneReportHeader *header;
-    void *buf, *buf_p;
-    size_t zone_entry_sz;
-    int i;
-
-    req->status = NVME_SUCCESS;
-
-    status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
-    if (status) {
-        return status;
-    }
-
-    zra = dw13 & 0xff;
-    if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-    if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    zrasf = (dw13 >> 8) & 0xff;
-    if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (data_size < sizeof(NvmeZoneReportHeader)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    status = nvme_check_mdts(n, data_size);
-    if (status) {
-        return status;
-    }
-
-    partial = (dw13 >> 16) & 0x01;
-
-    zone_entry_sz = sizeof(NvmeZoneDescr);
-    if (zra == NVME_ZONE_REPORT_EXTENDED) {
-        zone_entry_sz += ns->params.zd_extension_size;
-    }
-
-    max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
-    buf = g_malloc0(data_size);
-
-    zone = &ns->zone_array[zone_idx];
-    for (i = zone_idx; i < ns->num_zones; i++) {
-        if (partial && nr_zones >= max_zones) {
-            break;
-        }
-        if (nvme_zone_matches_filter(zrasf, zone++)) {
-            nr_zones++;
-        }
-    }
-    header = (NvmeZoneReportHeader *)buf;
-    header->nr_zones = cpu_to_le64(nr_zones);
-
-    buf_p = buf + sizeof(NvmeZoneReportHeader);
-    for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) {
-        zone = &ns->zone_array[zone_idx];
-        if (nvme_zone_matches_filter(zrasf, zone)) {
-            z = (NvmeZoneDescr *)buf_p;
-            buf_p += sizeof(NvmeZoneDescr);
-
-            z->zt = zone->d.zt;
-            z->zs = zone->d.zs;
-            z->zcap = cpu_to_le64(zone->d.zcap);
-            z->zslba = cpu_to_le64(zone->d.zslba);
-            z->za = zone->d.za;
-
-            if (nvme_wp_is_valid(zone)) {
-                z->wp = cpu_to_le64(zone->d.wp);
-            } else {
-                z->wp = cpu_to_le64(~0ULL);
-            }
-
-            if (zra == NVME_ZONE_REPORT_EXTENDED) {
-                if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
-                    memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
-                           ns->params.zd_extension_size);
-                }
-                buf_p += ns->params.zd_extension_size;
-            }
-
-            max_zones--;
-        }
-    }
-
-    status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
-
-    g_free(buf);
-
-    return status;
-}
-
-static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    uint16_t status;
-
-    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
-                          req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
-
-    if (!nvme_nsid_valid(n, nsid)) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    /*
-     * In the base NVM command set, Flush may apply to all namespaces
-     * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used
-     * along with TP 4056 (Namespace Types), it may be pretty screwed up.
-     *
-     * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the
-     * opcode with a specific command since we cannot determine a unique I/O
-     * command set. Opcode 0x0 could have any other meaning than something
-     * equivalent to flushing and say it DOES have completely different
-     * semantics in some other command set - does an NSID of 0xFFFFFFFF then
-     * mean "for all namespaces, apply whatever command set specific command
-     * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply
-     * whatever command that uses the 0x0 opcode if, and only if, it allows
-     * NSID to be 0xFFFFFFFF"?
-     *
-     * Anyway (and luckily), for now, we do not care about this since the
-     * device only supports namespace types that includes the NVM Flush command
-     * (NVM and Zoned), so always do an NVM Flush.
-     */
-    if (req->cmd.opcode == NVME_CMD_FLUSH) {
-        return nvme_flush(n, req);
-    }
-
-    req->ns = nvme_ns(n, nsid);
-    if (unlikely(!req->ns)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
-        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
-        return NVME_INVALID_OPCODE | NVME_DNR;
-    }
-
-    status = nvme_ns_status(req->ns);
-    if (unlikely(status)) {
-        return status;
-    }
-
-    switch (req->cmd.opcode) {
-    case NVME_CMD_WRITE_ZEROES:
-        return nvme_write_zeroes(n, req);
-    case NVME_CMD_ZONE_APPEND:
-        return nvme_zone_append(n, req);
-    case NVME_CMD_WRITE:
-        return nvme_write(n, req);
-    case NVME_CMD_READ:
-        return nvme_read(n, req);
-    case NVME_CMD_COMPARE:
-        return nvme_compare(n, req);
-    case NVME_CMD_DSM:
-        return nvme_dsm(n, req);
-    case NVME_CMD_VERIFY:
-        return nvme_verify(n, req);
-    case NVME_CMD_COPY:
-        return nvme_copy(n, req);
-    case NVME_CMD_ZONE_MGMT_SEND:
-        return nvme_zone_mgmt_send(n, req);
-    case NVME_CMD_ZONE_MGMT_RECV:
-        return nvme_zone_mgmt_recv(n, req);
-    default:
-        assert(false);
-    }
-
-    return NVME_INVALID_OPCODE | NVME_DNR;
-}
-
-static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
-{
-    n->sq[sq->sqid] = NULL;
-    timer_free(sq->timer);
-    g_free(sq->io_req);
-    if (sq->sqid) {
-        g_free(sq);
-    }
-}
-
-static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
-    NvmeRequest *r, *next;
-    NvmeSQueue *sq;
-    NvmeCQueue *cq;
-    uint16_t qid = le16_to_cpu(c->qid);
-    uint32_t nsid;
-
-    if (unlikely(!qid || nvme_check_sqid(n, qid))) {
-        trace_pci_nvme_err_invalid_del_sq(qid);
-        return NVME_INVALID_QID | NVME_DNR;
-    }
-
-    trace_pci_nvme_del_sq(qid);
-
-    sq = n->sq[qid];
-    while (!QTAILQ_EMPTY(&sq->out_req_list)) {
-        r = QTAILQ_FIRST(&sq->out_req_list);
-        if (r->aiocb) {
-            blk_aio_cancel(r->aiocb);
-        }
-    }
-
-    /*
-     * Drain all namespaces if there are still outstanding requests that we
-     * could not cancel explicitly.
-     */
-    if (!QTAILQ_EMPTY(&sq->out_req_list)) {
-        for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
-            NvmeNamespace *ns = nvme_ns(n, nsid);
-            if (ns) {
-                nvme_ns_drain(ns);
-            }
-        }
-    }
-
-    assert(QTAILQ_EMPTY(&sq->out_req_list));
-
-    if (!nvme_check_cqid(n, sq->cqid)) {
-        cq = n->cq[sq->cqid];
-        QTAILQ_REMOVE(&cq->sq_list, sq, entry);
-
-        nvme_post_cqes(cq);
-        QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
-            if (r->sq == sq) {
-                QTAILQ_REMOVE(&cq->req_list, r, entry);
-                QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
-            }
-        }
-    }
-
-    nvme_free_sq(sq, n);
-    return NVME_SUCCESS;
-}
-
-static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
-                         uint16_t sqid, uint16_t cqid, uint16_t size)
-{
-    int i;
-    NvmeCQueue *cq;
-
-    sq->ctrl = n;
-    sq->dma_addr = dma_addr;
-    sq->sqid = sqid;
-    sq->size = size;
-    sq->cqid = cqid;
-    sq->head = sq->tail = 0;
-    sq->io_req = g_new0(NvmeRequest, sq->size);
-
-    QTAILQ_INIT(&sq->req_list);
-    QTAILQ_INIT(&sq->out_req_list);
-    for (i = 0; i < sq->size; i++) {
-        sq->io_req[i].sq = sq;
-        QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
-    }
-    sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
-
-    assert(n->cq[cqid]);
-    cq = n->cq[cqid];
-    QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
-    n->sq[sqid] = sq;
-}
-
-static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeSQueue *sq;
-    NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
-
-    uint16_t cqid = le16_to_cpu(c->cqid);
-    uint16_t sqid = le16_to_cpu(c->sqid);
-    uint16_t qsize = le16_to_cpu(c->qsize);
-    uint16_t qflags = le16_to_cpu(c->sq_flags);
-    uint64_t prp1 = le64_to_cpu(c->prp1);
-
-    trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
-
-    if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
-        trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
-        return NVME_INVALID_CQID | NVME_DNR;
-    }
-    if (unlikely(!sqid || sqid > n->params.max_ioqpairs ||
-        n->sq[sqid] != NULL)) {
-        trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
-        return NVME_INVALID_QID | NVME_DNR;
-    }
-    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
-        trace_pci_nvme_err_invalid_create_sq_size(qsize);
-        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
-    }
-    if (unlikely(prp1 & (n->page_size - 1))) {
-        trace_pci_nvme_err_invalid_create_sq_addr(prp1);
-        return NVME_INVALID_PRP_OFFSET | NVME_DNR;
-    }
-    if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
-        trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-    sq = g_malloc0(sizeof(*sq));
-    nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
-    return NVME_SUCCESS;
-}
-
-struct nvme_stats {
-    uint64_t units_read;
-    uint64_t units_written;
-    uint64_t read_commands;
-    uint64_t write_commands;
-};
-
-static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
-{
-    BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
-
-    stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
-    stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
-    stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
-    stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
-}
-
-static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
-                                uint64_t off, NvmeRequest *req)
-{
-    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    struct nvme_stats stats = { 0 };
-    NvmeSmartLog smart = { 0 };
-    uint32_t trans_len;
-    NvmeNamespace *ns;
-    time_t current_ms;
-
-    if (off >= sizeof(smart)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (nsid != 0xffffffff) {
-        ns = nvme_ns(n, nsid);
-        if (!ns) {
-            return NVME_INVALID_NSID | NVME_DNR;
-        }
-        nvme_set_blk_stats(ns, &stats);
-    } else {
-        int i;
-
-        for (i = 1; i <= n->num_namespaces; i++) {
-            ns = nvme_ns(n, i);
-            if (!ns) {
-                continue;
-            }
-            nvme_set_blk_stats(ns, &stats);
-        }
-    }
-
-    trans_len = MIN(sizeof(smart) - off, buf_len);
-    smart.critical_warning = n->smart_critical_warning;
-
-    smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
-                                                        1000));
-    smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
-                                                           1000));
-    smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
-    smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
-
-    smart.temperature = cpu_to_le16(n->temperature);
-
-    if ((n->temperature >= n->features.temp_thresh_hi) ||
-        (n->temperature <= n->features.temp_thresh_low)) {
-        smart.critical_warning |= NVME_SMART_TEMPERATURE;
-    }
-
-    current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
-    smart.power_on_hours[0] =
-        cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
-
-    if (!rae) {
-        nvme_clear_events(n, NVME_AER_TYPE_SMART);
-    }
-
-    return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
-}
-
-static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
-                                 NvmeRequest *req)
-{
-    uint32_t trans_len;
-    NvmeFwSlotInfoLog fw_log = {
-        .afi = 0x1,
-    };
-
-    if (off >= sizeof(fw_log)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
-    trans_len = MIN(sizeof(fw_log) - off, buf_len);
-
-    return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
-}
-
-static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
-                                uint64_t off, NvmeRequest *req)
-{
-    uint32_t trans_len;
-    NvmeErrorLog errlog;
-
-    if (off >= sizeof(errlog)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (!rae) {
-        nvme_clear_events(n, NVME_AER_TYPE_ERROR);
-    }
-
-    memset(&errlog, 0x0, sizeof(errlog));
-    trans_len = MIN(sizeof(errlog) - off, buf_len);
-
-    return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
-}
-
-static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
-                                    uint64_t off, NvmeRequest *req)
-{
-    uint32_t nslist[1024];
-    uint32_t trans_len;
-    int i = 0;
-    uint32_t nsid;
-
-    memset(nslist, 0x0, sizeof(nslist));
-    trans_len = MIN(sizeof(nslist) - off, buf_len);
-
-    while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
-            NVME_CHANGED_NSID_SIZE) {
-        /*
-         * If more than 1024 namespaces, the first entry in the log page should
-         * be set to 0xffffffff and the others to 0 as spec.
-         */
-        if (i == ARRAY_SIZE(nslist)) {
-            memset(nslist, 0x0, sizeof(nslist));
-            nslist[0] = 0xffffffff;
-            break;
-        }
-
-        nslist[i++] = nsid;
-        clear_bit(nsid, n->changed_nsids);
-    }
-
-    /*
-     * Remove all the remaining list entries in case returns directly due to
-     * more than 1024 namespaces.
-     */
-    if (nslist[0] == 0xffffffff) {
-        bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
-    }
-
-    if (!rae) {
-        nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
-    }
-
-    return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
-}
-
-static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
-                                 uint64_t off, NvmeRequest *req)
-{
-    NvmeEffectsLog log = {};
-    const uint32_t *src_iocs = NULL;
-    uint32_t trans_len;
-
-    if (off >= sizeof(log)) {
-        trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log));
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    switch (NVME_CC_CSS(n->bar.cc)) {
-    case NVME_CC_CSS_NVM:
-        src_iocs = nvme_cse_iocs_nvm;
-        /* fall through */
-    case NVME_CC_CSS_ADMIN_ONLY:
-        break;
-    case NVME_CC_CSS_CSI:
-        switch (csi) {
-        case NVME_CSI_NVM:
-            src_iocs = nvme_cse_iocs_nvm;
-            break;
-        case NVME_CSI_ZONED:
-            src_iocs = nvme_cse_iocs_zoned;
-            break;
-        }
-    }
-
-    memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
-
-    if (src_iocs) {
-        memcpy(log.iocs, src_iocs, sizeof(log.iocs));
-    }
-
-    trans_len = MIN(sizeof(log) - off, buf_len);
-
-    return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
-}
-
-static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeCmd *cmd = &req->cmd;
-
-    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
-    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
-    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
-    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
-    uint8_t  lid = dw10 & 0xff;
-    uint8_t  lsp = (dw10 >> 8) & 0xf;
-    uint8_t  rae = (dw10 >> 15) & 0x1;
-    uint8_t  csi = le32_to_cpu(cmd->cdw14) >> 24;
-    uint32_t numdl, numdu;
-    uint64_t off, lpol, lpou;
-    size_t   len;
-    uint16_t status;
-
-    numdl = (dw10 >> 16);
-    numdu = (dw11 & 0xffff);
-    lpol = dw12;
-    lpou = dw13;
-
-    len = (((numdu << 16) | numdl) + 1) << 2;
-    off = (lpou << 32ULL) | lpol;
-
-    if (off & 0x3) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
-
-    status = nvme_check_mdts(n, len);
-    if (status) {
-        return status;
-    }
-
-    switch (lid) {
-    case NVME_LOG_ERROR_INFO:
-        return nvme_error_info(n, rae, len, off, req);
-    case NVME_LOG_SMART_INFO:
-        return nvme_smart_info(n, rae, len, off, req);
-    case NVME_LOG_FW_SLOT_INFO:
-        return nvme_fw_log_info(n, len, off, req);
-    case NVME_LOG_CHANGED_NSLIST:
-        return nvme_changed_nslist(n, rae, len, off, req);
-    case NVME_LOG_CMD_EFFECTS:
-        return nvme_cmd_effects(n, csi, len, off, req);
-    default:
-        trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-}
-
-static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
-{
-    n->cq[cq->cqid] = NULL;
-    timer_free(cq->timer);
-    if (msix_enabled(&n->parent_obj)) {
-        msix_vector_unuse(&n->parent_obj, cq->vector);
-    }
-    if (cq->cqid) {
-        g_free(cq);
-    }
-}
-
-static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
-    NvmeCQueue *cq;
-    uint16_t qid = le16_to_cpu(c->qid);
-
-    if (unlikely(!qid || nvme_check_cqid(n, qid))) {
-        trace_pci_nvme_err_invalid_del_cq_cqid(qid);
-        return NVME_INVALID_CQID | NVME_DNR;
-    }
-
-    cq = n->cq[qid];
-    if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
-        trace_pci_nvme_err_invalid_del_cq_notempty(qid);
-        return NVME_INVALID_QUEUE_DEL;
-    }
-    nvme_irq_deassert(n, cq);
-    trace_pci_nvme_del_cq(qid);
-    nvme_free_cq(cq, n);
-    return NVME_SUCCESS;
-}
-
-static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
-                         uint16_t cqid, uint16_t vector, uint16_t size,
-                         uint16_t irq_enabled)
-{
-    int ret;
-
-    if (msix_enabled(&n->parent_obj)) {
-        ret = msix_vector_use(&n->parent_obj, vector);
-        assert(ret == 0);
-    }
-    cq->ctrl = n;
-    cq->cqid = cqid;
-    cq->size = size;
-    cq->dma_addr = dma_addr;
-    cq->phase = 1;
-    cq->irq_enabled = irq_enabled;
-    cq->vector = vector;
-    cq->head = cq->tail = 0;
-    QTAILQ_INIT(&cq->req_list);
-    QTAILQ_INIT(&cq->sq_list);
-    n->cq[cqid] = cq;
-    cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
-}
-
-static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeCQueue *cq;
-    NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
-    uint16_t cqid = le16_to_cpu(c->cqid);
-    uint16_t vector = le16_to_cpu(c->irq_vector);
-    uint16_t qsize = le16_to_cpu(c->qsize);
-    uint16_t qflags = le16_to_cpu(c->cq_flags);
-    uint64_t prp1 = le64_to_cpu(c->prp1);
-
-    trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
-                             NVME_CQ_FLAGS_IEN(qflags) != 0);
-
-    if (unlikely(!cqid || cqid > n->params.max_ioqpairs ||
-        n->cq[cqid] != NULL)) {
-        trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
-        return NVME_INVALID_QID | NVME_DNR;
-    }
-    if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
-        trace_pci_nvme_err_invalid_create_cq_size(qsize);
-        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
-    }
-    if (unlikely(prp1 & (n->page_size - 1))) {
-        trace_pci_nvme_err_invalid_create_cq_addr(prp1);
-        return NVME_INVALID_PRP_OFFSET | NVME_DNR;
-    }
-    if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
-        trace_pci_nvme_err_invalid_create_cq_vector(vector);
-        return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
-    }
-    if (unlikely(vector >= n->params.msix_qsize)) {
-        trace_pci_nvme_err_invalid_create_cq_vector(vector);
-        return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
-    }
-    if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
-        trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    cq = g_malloc0(sizeof(*cq));
-    nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
-                 NVME_CQ_FLAGS_IEN(qflags));
-
-    /*
-     * It is only required to set qs_created when creating a completion queue;
-     * creating a submission queue without a matching completion queue will
-     * fail.
-     */
-    n->qs_created = true;
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
-
-    return nvme_c2h(n, id, sizeof(id), req);
-}
-
-static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns)
-{
-    switch (ns->csi) {
-    case NVME_CSI_NVM:
-    case NVME_CSI_ZONED:
-        return true;
-    }
-    return false;
-}
-
-static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
-{
-    trace_pci_nvme_identify_ctrl();
-
-    return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
-}
-
-static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
-    NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
-
-    trace_pci_nvme_identify_ctrl_csi(c->csi);
-
-    switch (c->csi) {
-    case NVME_CSI_NVM:
-        id_nvm->vsl = n->params.vsl;
-        id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
-        break;
-
-    case NVME_CSI_ZONED:
-        ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
-        break;
-
-    default:
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    return nvme_c2h(n, id, sizeof(id), req);
-}
-
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
-{
-    NvmeNamespace *ns;
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint32_t nsid = le32_to_cpu(c->nsid);
-
-    trace_pci_nvme_identify_ns(nsid);
-
-    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    ns = nvme_ns(n, nsid);
-    if (unlikely(!ns)) {
-        if (!active) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
-            if (!ns) {
-                return nvme_rpt_empty_id_struct(n, req);
-            }
-        } else {
-            return nvme_rpt_empty_id_struct(n, req);
-        }
-    }
-
-    if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
-        return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
-    }
-
-    return NVME_INVALID_CMD_SET | NVME_DNR;
-}
-
-static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint16_t min_id = le16_to_cpu(c->ctrlid);
-    uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
-    uint16_t *ids = &list[1];
-    NvmeNamespace *ns;
-    NvmeCtrl *ctrl;
-    int cntlid, nr_ids = 0;
-
-    trace_pci_nvme_identify_ns_attached_list(min_id);
-
-    if (c->nsid == NVME_NSID_BROADCAST) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    ns = nvme_subsys_ns(n->subsys, c->nsid);
-    if (!ns) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
-        ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
-        if (!ctrl) {
-            continue;
-        }
-
-        if (!nvme_ns(ctrl, c->nsid)) {
-            continue;
-        }
-
-        ids[nr_ids++] = cntlid;
-    }
-
-    list[0] = nr_ids;
-
-    return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
-}
-
-static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
-        bool active)
-{
-    NvmeNamespace *ns;
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint32_t nsid = le32_to_cpu(c->nsid);
-
-    trace_pci_nvme_identify_ns_csi(nsid, c->csi);
-
-    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    ns = nvme_ns(n, nsid);
-    if (unlikely(!ns)) {
-        if (!active) {
-            ns = nvme_subsys_ns(n->subsys, nsid);
-            if (!ns) {
-                return nvme_rpt_empty_id_struct(n, req);
-            }
-        } else {
-            return nvme_rpt_empty_id_struct(n, req);
-        }
-    }
-
-    if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
-        return nvme_rpt_empty_id_struct(n, req);
-    } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
-        return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
-                        req);
-    }
-
-    return NVME_INVALID_FIELD | NVME_DNR;
-}
-
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
-        bool active)
-{
-    NvmeNamespace *ns;
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint32_t min_nsid = le32_to_cpu(c->nsid);
-    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
-    static const int data_len = sizeof(list);
-    uint32_t *list_ptr = (uint32_t *)list;
-    int i, j = 0;
-
-    trace_pci_nvme_identify_nslist(min_nsid);
-
-    /*
-     * Both 0xffffffff (NVME_NSID_BROADCAST) and 0xfffffffe are invalid values
-     * since the Active Namespace ID List should return namespaces with ids
-     * *higher* than the NSID specified in the command. This is also specified
-     * in the spec (NVM Express v1.3d, Section 5.15.4).
-     */
-    if (min_nsid >= NVME_NSID_BROADCAST - 1) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            if (!active) {
-                ns = nvme_subsys_ns(n->subsys, i);
-                if (!ns) {
-                    continue;
-                }
-            } else {
-                continue;
-            }
-        }
-        if (ns->params.nsid <= min_nsid) {
-            continue;
-        }
-        list_ptr[j++] = cpu_to_le32(ns->params.nsid);
-        if (j == data_len / sizeof(uint32_t)) {
-            break;
-        }
-    }
-
-    return nvme_c2h(n, list, data_len, req);
-}
-
-static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
-        bool active)
-{
-    NvmeNamespace *ns;
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint32_t min_nsid = le32_to_cpu(c->nsid);
-    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
-    static const int data_len = sizeof(list);
-    uint32_t *list_ptr = (uint32_t *)list;
-    int i, j = 0;
-
-    trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
-
-    /*
-     * Same as in nvme_identify_nslist(), 0xffffffff/0xfffffffe are invalid.
-     */
-    if (min_nsid >= NVME_NSID_BROADCAST - 1) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            if (!active) {
-                ns = nvme_subsys_ns(n->subsys, i);
-                if (!ns) {
-                    continue;
-                }
-            } else {
-                continue;
-            }
-        }
-        if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
-            continue;
-        }
-        list_ptr[j++] = cpu_to_le32(ns->params.nsid);
-        if (j == data_len / sizeof(uint32_t)) {
-            break;
-        }
-    }
-
-    return nvme_c2h(n, list, data_len, req);
-}
-
-static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns;
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-    uint32_t nsid = le32_to_cpu(c->nsid);
-    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
-
-    struct data {
-        struct {
-            NvmeIdNsDescr hdr;
-            uint8_t v[NVME_NIDL_UUID];
-        } uuid;
-        struct {
-            NvmeIdNsDescr hdr;
-            uint8_t v;
-        } csi;
-    };
-
-    struct data *ns_descrs = (struct data *)list;
-
-    trace_pci_nvme_identify_ns_descr_list(nsid);
-
-    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    ns = nvme_ns(n, nsid);
-    if (unlikely(!ns)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    /*
-     * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data
-     * structure, a Namespace UUID (nidt = 0x3) must be reported in the
-     * Namespace Identification Descriptor. Add the namespace UUID here.
-     */
-    ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
-    ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
-    memcpy(&ns_descrs->uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
-
-    ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
-    ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
-    ns_descrs->csi.v = ns->csi;
-
-    return nvme_c2h(n, list, sizeof(list), req);
-}
-
-static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
-    static const int data_len = sizeof(list);
-
-    trace_pci_nvme_identify_cmd_set();
-
-    NVME_SET_CSI(*list, NVME_CSI_NVM);
-    NVME_SET_CSI(*list, NVME_CSI_ZONED);
-
-    return nvme_c2h(n, list, data_len, req);
-}
-
-static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
-
-    trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
-                            c->csi);
-
-    switch (c->cns) {
-    case NVME_ID_CNS_NS:
-        return nvme_identify_ns(n, req, true);
-    case NVME_ID_CNS_NS_PRESENT:
-        return nvme_identify_ns(n, req, false);
-    case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
-        return nvme_identify_ns_attached_list(n, req);
-    case NVME_ID_CNS_CS_NS:
-        return nvme_identify_ns_csi(n, req, true);
-    case NVME_ID_CNS_CS_NS_PRESENT:
-        return nvme_identify_ns_csi(n, req, false);
-    case NVME_ID_CNS_CTRL:
-        return nvme_identify_ctrl(n, req);
-    case NVME_ID_CNS_CS_CTRL:
-        return nvme_identify_ctrl_csi(n, req);
-    case NVME_ID_CNS_NS_ACTIVE_LIST:
-        return nvme_identify_nslist(n, req, true);
-    case NVME_ID_CNS_NS_PRESENT_LIST:
-        return nvme_identify_nslist(n, req, false);
-    case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
-        return nvme_identify_nslist_csi(n, req, true);
-    case NVME_ID_CNS_CS_NS_PRESENT_LIST:
-        return nvme_identify_nslist_csi(n, req, false);
-    case NVME_ID_CNS_NS_DESCR_LIST:
-        return nvme_identify_ns_descr_list(n, req);
-    case NVME_ID_CNS_IO_COMMAND_SET:
-        return nvme_identify_cmd_set(n, req);
-    default:
-        trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-}
-
-static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
-
-    req->cqe.result = 1;
-    if (nvme_check_sqid(n, sqid)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    return NVME_SUCCESS;
-}
-
-static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
-{
-    trace_pci_nvme_setfeat_timestamp(ts);
-
-    n->host_timestamp = le64_to_cpu(ts);
-    n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
-}
-
-static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
-{
-    uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
-    uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
-
-    union nvme_timestamp {
-        struct {
-            uint64_t timestamp:48;
-            uint64_t sync:1;
-            uint64_t origin:3;
-            uint64_t rsvd1:12;
-        };
-        uint64_t all;
-    };
-
-    union nvme_timestamp ts;
-    ts.all = 0;
-    ts.timestamp = n->host_timestamp + elapsed_time;
-
-    /* If the host timestamp is non-zero, set the timestamp origin */
-    ts.origin = n->host_timestamp ? 0x01 : 0x00;
-
-    trace_pci_nvme_getfeat_timestamp(ts.all);
-
-    return cpu_to_le64(ts.all);
-}
-
-static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint64_t timestamp = nvme_get_timestamp(n);
-
-    return nvme_c2h(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
-}
-
-static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeCmd *cmd = &req->cmd;
-    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
-    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
-    uint32_t nsid = le32_to_cpu(cmd->nsid);
-    uint32_t result;
-    uint8_t fid = NVME_GETSETFEAT_FID(dw10);
-    NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
-    uint16_t iv;
-    NvmeNamespace *ns;
-    int i;
-
-    static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
-        [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
-    };
-
-    trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11);
-
-    if (!nvme_feature_support[fid]) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
-        if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
-            /*
-             * The Reservation Notification Mask and Reservation Persistence
-             * features require a status code of Invalid Field in Command when
-             * NSID is 0xFFFFFFFF. Since the device does not support those
-             * features we can always return Invalid Namespace or Format as we
-             * should do for all other features.
-             */
-            return NVME_INVALID_NSID | NVME_DNR;
-        }
-
-        if (!nvme_ns(n, nsid)) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-    }
-
-    switch (sel) {
-    case NVME_GETFEAT_SELECT_CURRENT:
-        break;
-    case NVME_GETFEAT_SELECT_SAVED:
-        /* no features are saveable by the controller; fallthrough */
-    case NVME_GETFEAT_SELECT_DEFAULT:
-        goto defaults;
-    case NVME_GETFEAT_SELECT_CAP:
-        result = nvme_feature_cap[fid];
-        goto out;
-    }
-
-    switch (fid) {
-    case NVME_TEMPERATURE_THRESHOLD:
-        result = 0;
-
-        /*
-         * The controller only implements the Composite Temperature sensor, so
-         * return 0 for all other sensors.
-         */
-        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
-            goto out;
-        }
-
-        switch (NVME_TEMP_THSEL(dw11)) {
-        case NVME_TEMP_THSEL_OVER:
-            result = n->features.temp_thresh_hi;
-            goto out;
-        case NVME_TEMP_THSEL_UNDER:
-            result = n->features.temp_thresh_low;
-            goto out;
-        }
-
-        return NVME_INVALID_FIELD | NVME_DNR;
-    case NVME_ERROR_RECOVERY:
-        if (!nvme_nsid_valid(n, nsid)) {
-            return NVME_INVALID_NSID | NVME_DNR;
-        }
-
-        ns = nvme_ns(n, nsid);
-        if (unlikely(!ns)) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        result = ns->features.err_rec;
-        goto out;
-    case NVME_VOLATILE_WRITE_CACHE:
-        result = 0;
-        for (i = 1; i <= n->num_namespaces; i++) {
-            ns = nvme_ns(n, i);
-            if (!ns) {
-                continue;
-            }
-
-            result = blk_enable_write_cache(ns->blkconf.blk);
-            if (result) {
-                break;
-            }
-        }
-        trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
-        goto out;
-    case NVME_ASYNCHRONOUS_EVENT_CONF:
-        result = n->features.async_config;
-        goto out;
-    case NVME_TIMESTAMP:
-        return nvme_get_feature_timestamp(n, req);
-    default:
-        break;
-    }
-
-defaults:
-    switch (fid) {
-    case NVME_TEMPERATURE_THRESHOLD:
-        result = 0;
-
-        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
-            break;
-        }
-
-        if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
-            result = NVME_TEMPERATURE_WARNING;
-        }
-
-        break;
-    case NVME_NUMBER_OF_QUEUES:
-        result = (n->params.max_ioqpairs - 1) |
-            ((n->params.max_ioqpairs - 1) << 16);
-        trace_pci_nvme_getfeat_numq(result);
-        break;
-    case NVME_INTERRUPT_VECTOR_CONF:
-        iv = dw11 & 0xffff;
-        if (iv >= n->params.max_ioqpairs + 1) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        result = iv;
-        if (iv == n->admin_cq.vector) {
-            result |= NVME_INTVC_NOCOALESCING;
-        }
-        break;
-    case NVME_COMMAND_SET_PROFILE:
-        result = 0;
-        break;
-    default:
-        result = nvme_feature_default[fid];
-        break;
-    }
-
-out:
-    req->cqe.result = cpu_to_le32(result);
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
-{
-    uint16_t ret;
-    uint64_t timestamp;
-
-    ret = nvme_h2c(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
-    if (ret) {
-        return ret;
-    }
-
-    nvme_set_timestamp(n, timestamp);
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns = NULL;
-
-    NvmeCmd *cmd = &req->cmd;
-    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
-    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
-    uint32_t nsid = le32_to_cpu(cmd->nsid);
-    uint8_t fid = NVME_GETSETFEAT_FID(dw10);
-    uint8_t save = NVME_SETFEAT_SAVE(dw10);
-    int i;
-
-    trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
-
-    if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) {
-        return NVME_FID_NOT_SAVEABLE | NVME_DNR;
-    }
-
-    if (!nvme_feature_support[fid]) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
-        if (nsid != NVME_NSID_BROADCAST) {
-            if (!nvme_nsid_valid(n, nsid)) {
-                return NVME_INVALID_NSID | NVME_DNR;
-            }
-
-            ns = nvme_ns(n, nsid);
-            if (unlikely(!ns)) {
-                return NVME_INVALID_FIELD | NVME_DNR;
-            }
-        }
-    } else if (nsid && nsid != NVME_NSID_BROADCAST) {
-        if (!nvme_nsid_valid(n, nsid)) {
-            return NVME_INVALID_NSID | NVME_DNR;
-        }
-
-        return NVME_FEAT_NOT_NS_SPEC | NVME_DNR;
-    }
-
-    if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) {
-        return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
-    }
-
-    switch (fid) {
-    case NVME_TEMPERATURE_THRESHOLD:
-        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
-            break;
-        }
-
-        switch (NVME_TEMP_THSEL(dw11)) {
-        case NVME_TEMP_THSEL_OVER:
-            n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
-            break;
-        case NVME_TEMP_THSEL_UNDER:
-            n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
-            break;
-        default:
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        if ((n->temperature >= n->features.temp_thresh_hi) ||
-            (n->temperature <= n->features.temp_thresh_low)) {
-            nvme_smart_event(n, NVME_AER_INFO_SMART_TEMP_THRESH);
-        }
-
-        break;
-    case NVME_ERROR_RECOVERY:
-        if (nsid == NVME_NSID_BROADCAST) {
-            for (i = 1; i <= n->num_namespaces; i++) {
-                ns = nvme_ns(n, i);
-
-                if (!ns) {
-                    continue;
-                }
-
-                if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
-                    ns->features.err_rec = dw11;
-                }
-            }
-
-            break;
-        }
-
-        assert(ns);
-        if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat))  {
-            ns->features.err_rec = dw11;
-        }
-        break;
-    case NVME_VOLATILE_WRITE_CACHE:
-        for (i = 1; i <= n->num_namespaces; i++) {
-            ns = nvme_ns(n, i);
-            if (!ns) {
-                continue;
-            }
-
-            if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) {
-                blk_flush(ns->blkconf.blk);
-            }
-
-            blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1);
-        }
-
-        break;
-
-    case NVME_NUMBER_OF_QUEUES:
-        if (n->qs_created) {
-            return NVME_CMD_SEQ_ERROR | NVME_DNR;
-        }
-
-        /*
-         * NVMe v1.3, Section 5.21.1.7: 0xffff is not an allowed value for NCQR
-         * and NSQR.
-         */
-        if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
-                                    ((dw11 >> 16) & 0xFFFF) + 1,
-                                    n->params.max_ioqpairs,
-                                    n->params.max_ioqpairs);
-        req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
-                                      ((n->params.max_ioqpairs - 1) << 16));
-        break;
-    case NVME_ASYNCHRONOUS_EVENT_CONF:
-        n->features.async_config = dw11;
-        break;
-    case NVME_TIMESTAMP:
-        return nvme_set_feature_timestamp(n, req);
-    case NVME_COMMAND_SET_PROFILE:
-        if (dw11 & 0x1ff) {
-            trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
-            return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
-        }
-        break;
-    default:
-        return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
-    }
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
-{
-    trace_pci_nvme_aer(nvme_cid(req));
-
-    if (n->outstanding_aers > n->params.aerl) {
-        trace_pci_nvme_aer_aerl_exceeded();
-        return NVME_AER_LIMIT_EXCEEDED;
-    }
-
-    n->aer_reqs[n->outstanding_aers] = req;
-    n->outstanding_aers++;
-
-    if (!QTAILQ_EMPTY(&n->aer_queue)) {
-        nvme_process_aers(n);
-    }
-
-    return NVME_NO_COMPLETE;
-}
-
-static void nvme_update_dmrsl(NvmeCtrl *n)
-{
-    int nsid;
-
-    for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
-        NvmeNamespace *ns = nvme_ns(n, nsid);
-        if (!ns) {
-            continue;
-        }
-
-        n->dmrsl = MIN_NON_ZERO(n->dmrsl,
-                                BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
-    }
-}
-
-static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns);
-static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns;
-    NvmeCtrl *ctrl;
-    uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
-    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
-    bool attach = !(dw10 & 0xf);
-    uint16_t *nr_ids = &list[0];
-    uint16_t *ids = &list[1];
-    uint16_t ret;
-    int i;
-
-    trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
-
-    if (!nvme_nsid_valid(n, nsid)) {
-        return NVME_INVALID_NSID | NVME_DNR;
-    }
-
-    ns = nvme_subsys_ns(n->subsys, nsid);
-    if (!ns) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
-    if (ret) {
-        return ret;
-    }
-
-    if (!*nr_ids) {
-        return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
-    }
-
-    *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1);
-    for (i = 0; i < *nr_ids; i++) {
-        ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
-        if (!ctrl) {
-            return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
-        }
-
-        if (attach) {
-            if (nvme_ns(ctrl, nsid)) {
-                return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
-            }
-
-            if (ns->attached && !ns->params.shared) {
-                return NVME_NS_PRIVATE | NVME_DNR;
-            }
-
-            nvme_attach_ns(ctrl, ns);
-            __nvme_select_ns_iocs(ctrl, ns);
-        } else {
-            if (!nvme_ns(ctrl, nsid)) {
-                return NVME_NS_NOT_ATTACHED | NVME_DNR;
-            }
-
-            ctrl->namespaces[nsid - 1] = NULL;
-            ns->attached--;
-
-            nvme_update_dmrsl(ctrl);
-        }
-
-        /*
-         * Add namespace id to the changed namespace id list for event clearing
-         * via Get Log Page command.
-         */
-        if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
-            nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
-                               NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
-                               NVME_LOG_CHANGED_NSLIST);
-        }
-    }
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_format_ns(NvmeCtrl *n, NvmeNamespace *ns, uint8_t lbaf,
-                               uint8_t mset, uint8_t pi, uint8_t pil,
-                               NvmeRequest *req)
-{
-    int64_t len, offset;
-    struct nvme_aio_format_ctx *ctx;
-    BlockBackend *blk = ns->blkconf.blk;
-    uint16_t ms;
-    uintptr_t *num_formats = (uintptr_t *)&req->opaque;
-    int *count;
-
-    if (ns->params.zoned) {
-        return NVME_INVALID_FORMAT | NVME_DNR;
-    }
-
-    trace_pci_nvme_format_ns(nvme_cid(req), nvme_nsid(ns), lbaf, mset, pi, pil);
-
-    if (lbaf > ns->id_ns.nlbaf) {
-        return NVME_INVALID_FORMAT | NVME_DNR;
-    }
-
-    ms = ns->id_ns.lbaf[lbaf].ms;
-
-    if (pi && (ms < sizeof(NvmeDifTuple))) {
-        return NVME_INVALID_FORMAT | NVME_DNR;
-    }
-
-    if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    nvme_ns_drain(ns);
-    nvme_ns_shutdown(ns);
-    nvme_ns_cleanup(ns);
-
-    ns->id_ns.dps = (pil << 3) | pi;
-    ns->id_ns.flbas = lbaf | (mset << 4);
-
-    nvme_ns_init_format(ns);
-
-    ns->status = NVME_FORMAT_IN_PROGRESS;
-
-    len = ns->size;
-    offset = 0;
-
-    count = g_new(int, 1);
-    *count = 1;
-
-    (*num_formats)++;
-
-    while (len) {
-        ctx = g_new(struct nvme_aio_format_ctx, 1);
-        ctx->req = req;
-        ctx->ns = ns;
-        ctx->count = count;
-
-        size_t bytes = MIN(BDRV_REQUEST_MAX_BYTES, len);
-
-        (*count)++;
-
-        blk_aio_pwrite_zeroes(blk, offset, bytes, BDRV_REQ_MAY_UNMAP,
-                              nvme_aio_format_cb, ctx);
-
-        offset += bytes;
-        len -= bytes;
-
-    }
-
-    if (--(*count)) {
-        return NVME_NO_COMPLETE;
-    }
-
-    g_free(count);
-    ns->status = 0x0;
-    (*num_formats)--;
-
-    return NVME_SUCCESS;
-}
-
-static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
-{
-    NvmeNamespace *ns;
-    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
-    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
-    uint8_t lbaf = dw10 & 0xf;
-    uint8_t mset = (dw10 >> 4) & 0x1;
-    uint8_t pi = (dw10 >> 5) & 0x7;
-    uint8_t pil = (dw10 >> 8) & 0x1;
-    uintptr_t *num_formats = (uintptr_t *)&req->opaque;
-    uint16_t status;
-    int i;
-
-    trace_pci_nvme_format(nvme_cid(req), nsid, lbaf, mset, pi, pil);
-
-    /* 1-initialize; see the comment in nvme_dsm */
-    *num_formats = 1;
-
-    if (nsid != NVME_NSID_BROADCAST) {
-        if (!nvme_nsid_valid(n, nsid)) {
-            return NVME_INVALID_NSID | NVME_DNR;
-        }
-
-        ns = nvme_ns(n, nsid);
-        if (!ns) {
-            return NVME_INVALID_FIELD | NVME_DNR;
-        }
-
-        status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
-        if (status && status != NVME_NO_COMPLETE) {
-            req->status = status;
-        }
-    } else {
-        for (i = 1; i <= n->num_namespaces; i++) {
-            ns = nvme_ns(n, i);
-            if (!ns) {
-                continue;
-            }
-
-            status = nvme_format_ns(n, ns, lbaf, mset, pi, pil, req);
-            if (status && status != NVME_NO_COMPLETE) {
-                req->status = status;
-                break;
-            }
-        }
-    }
-
-    /* account for the 1-initialization */
-    if (--(*num_formats)) {
-        return NVME_NO_COMPLETE;
-    }
-
-    return req->status;
-}
-
-static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
-{
-    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
-                             nvme_adm_opc_str(req->cmd.opcode));
-
-    if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
-        trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
-        return NVME_INVALID_OPCODE | NVME_DNR;
-    }
-
-    /* SGLs shall not be used for Admin commands in NVMe over PCIe */
-    if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-
-    switch (req->cmd.opcode) {
-    case NVME_ADM_CMD_DELETE_SQ:
-        return nvme_del_sq(n, req);
-    case NVME_ADM_CMD_CREATE_SQ:
-        return nvme_create_sq(n, req);
-    case NVME_ADM_CMD_GET_LOG_PAGE:
-        return nvme_get_log(n, req);
-    case NVME_ADM_CMD_DELETE_CQ:
-        return nvme_del_cq(n, req);
-    case NVME_ADM_CMD_CREATE_CQ:
-        return nvme_create_cq(n, req);
-    case NVME_ADM_CMD_IDENTIFY:
-        return nvme_identify(n, req);
-    case NVME_ADM_CMD_ABORT:
-        return nvme_abort(n, req);
-    case NVME_ADM_CMD_SET_FEATURES:
-        return nvme_set_feature(n, req);
-    case NVME_ADM_CMD_GET_FEATURES:
-        return nvme_get_feature(n, req);
-    case NVME_ADM_CMD_ASYNC_EV_REQ:
-        return nvme_aer(n, req);
-    case NVME_ADM_CMD_NS_ATTACHMENT:
-        return nvme_ns_attachment(n, req);
-    case NVME_ADM_CMD_FORMAT_NVM:
-        return nvme_format(n, req);
-    default:
-        assert(false);
-    }
-
-    return NVME_INVALID_OPCODE | NVME_DNR;
-}
-
-static void nvme_process_sq(void *opaque)
-{
-    NvmeSQueue *sq = opaque;
-    NvmeCtrl *n = sq->ctrl;
-    NvmeCQueue *cq = n->cq[sq->cqid];
-
-    uint16_t status;
-    hwaddr addr;
-    NvmeCmd cmd;
-    NvmeRequest *req;
-
-    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
-        addr = sq->dma_addr + sq->head * n->sqe_size;
-        if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
-            trace_pci_nvme_err_addr_read(addr);
-            trace_pci_nvme_err_cfs();
-            n->bar.csts = NVME_CSTS_FAILED;
-            break;
-        }
-        nvme_inc_sq_head(sq);
-
-        req = QTAILQ_FIRST(&sq->req_list);
-        QTAILQ_REMOVE(&sq->req_list, req, entry);
-        QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
-        nvme_req_clear(req);
-        req->cqe.cid = cmd.cid;
-        memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
-
-        status = sq->sqid ? nvme_io_cmd(n, req) :
-            nvme_admin_cmd(n, req);
-        if (status != NVME_NO_COMPLETE) {
-            req->status = status;
-            nvme_enqueue_req_completion(cq, req);
-        }
-    }
-}
-
-static void nvme_ctrl_reset(NvmeCtrl *n)
-{
-    NvmeNamespace *ns;
-    int i;
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        nvme_ns_drain(ns);
-    }
-
-    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
-        if (n->sq[i] != NULL) {
-            nvme_free_sq(n->sq[i], n);
-        }
-    }
-    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
-        if (n->cq[i] != NULL) {
-            nvme_free_cq(n->cq[i], n);
-        }
-    }
-
-    while (!QTAILQ_EMPTY(&n->aer_queue)) {
-        NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
-        QTAILQ_REMOVE(&n->aer_queue, event, entry);
-        g_free(event);
-    }
-
-    n->aer_queued = 0;
-    n->outstanding_aers = 0;
-    n->qs_created = false;
-
-    n->bar.cc = 0;
-}
-
-static void nvme_ctrl_shutdown(NvmeCtrl *n)
-{
-    NvmeNamespace *ns;
-    int i;
-
-    if (n->pmr.dev) {
-        memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
-    }
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        nvme_ns_shutdown(ns);
-    }
-}
-
-static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns)
-{
-    ns->iocs = nvme_cse_iocs_none;
-    switch (ns->csi) {
-    case NVME_CSI_NVM:
-        if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
-    case NVME_CSI_ZONED:
-        if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) {
-            ns->iocs = nvme_cse_iocs_zoned;
-        } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) {
-            ns->iocs = nvme_cse_iocs_nvm;
-        }
-        break;
-    }
-}
-
-static void nvme_select_ns_iocs(NvmeCtrl *n)
-{
-    NvmeNamespace *ns;
-    int i;
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        __nvme_select_ns_iocs(n, ns);
-    }
-}
-
-static int nvme_start_ctrl(NvmeCtrl *n)
-{
-    uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
-    uint32_t page_size = 1 << page_bits;
-
-    if (unlikely(n->cq[0])) {
-        trace_pci_nvme_err_startfail_cq();
-        return -1;
-    }
-    if (unlikely(n->sq[0])) {
-        trace_pci_nvme_err_startfail_sq();
-        return -1;
-    }
-    if (unlikely(!n->bar.asq)) {
-        trace_pci_nvme_err_startfail_nbarasq();
-        return -1;
-    }
-    if (unlikely(!n->bar.acq)) {
-        trace_pci_nvme_err_startfail_nbaracq();
-        return -1;
-    }
-    if (unlikely(n->bar.asq & (page_size - 1))) {
-        trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq);
-        return -1;
-    }
-    if (unlikely(n->bar.acq & (page_size - 1))) {
-        trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq);
-        return -1;
-    }
-    if (unlikely(!(NVME_CAP_CSS(n->bar.cap) & (1 << NVME_CC_CSS(n->bar.cc))))) {
-        trace_pci_nvme_err_startfail_css(NVME_CC_CSS(n->bar.cc));
-        return -1;
-    }
-    if (unlikely(NVME_CC_MPS(n->bar.cc) <
-                 NVME_CAP_MPSMIN(n->bar.cap))) {
-        trace_pci_nvme_err_startfail_page_too_small(
-                    NVME_CC_MPS(n->bar.cc),
-                    NVME_CAP_MPSMIN(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_MPS(n->bar.cc) >
-                 NVME_CAP_MPSMAX(n->bar.cap))) {
-        trace_pci_nvme_err_startfail_page_too_large(
-                    NVME_CC_MPS(n->bar.cc),
-                    NVME_CAP_MPSMAX(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
-                 NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
-        trace_pci_nvme_err_startfail_cqent_too_small(
-                    NVME_CC_IOCQES(n->bar.cc),
-                    NVME_CTRL_CQES_MIN(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
-                 NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
-        trace_pci_nvme_err_startfail_cqent_too_large(
-                    NVME_CC_IOCQES(n->bar.cc),
-                    NVME_CTRL_CQES_MAX(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
-                 NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
-        trace_pci_nvme_err_startfail_sqent_too_small(
-                    NVME_CC_IOSQES(n->bar.cc),
-                    NVME_CTRL_SQES_MIN(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
-                 NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
-        trace_pci_nvme_err_startfail_sqent_too_large(
-                    NVME_CC_IOSQES(n->bar.cc),
-                    NVME_CTRL_SQES_MAX(n->bar.cap));
-        return -1;
-    }
-    if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
-        trace_pci_nvme_err_startfail_asqent_sz_zero();
-        return -1;
-    }
-    if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
-        trace_pci_nvme_err_startfail_acqent_sz_zero();
-        return -1;
-    }
-
-    n->page_bits = page_bits;
-    n->page_size = page_size;
-    n->max_prp_ents = n->page_size / sizeof(uint64_t);
-    n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
-    n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
-    nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
-                 NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
-    nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
-                 NVME_AQA_ASQS(n->bar.aqa) + 1);
-
-    nvme_set_timestamp(n, 0ULL);
-
-    QTAILQ_INIT(&n->aer_queue);
-
-    nvme_select_ns_iocs(n);
-
-    return 0;
-}
-
-static void nvme_cmb_enable_regs(NvmeCtrl *n)
-{
-    NVME_CMBLOC_SET_CDPCILS(n->bar.cmbloc, 1);
-    NVME_CMBLOC_SET_CDPMLS(n->bar.cmbloc, 1);
-    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR);
-
-    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
-    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
-    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
-    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
-    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
-    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
-    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
-}
-
-static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
-                           unsigned size)
-{
-    if (unlikely(offset & (sizeof(uint32_t) - 1))) {
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
-                       "MMIO write not 32-bit aligned,"
-                       " offset=0x%"PRIx64"", offset);
-        /* should be ignored, fall through for now */
-    }
-
-    if (unlikely(size < sizeof(uint32_t))) {
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
-                       "MMIO write smaller than 32-bits,"
-                       " offset=0x%"PRIx64", size=%u",
-                       offset, size);
-        /* should be ignored, fall through for now */
-    }
-
-    switch (offset) {
-    case 0xc:   /* INTMS */
-        if (unlikely(msix_enabled(&(n->parent_obj)))) {
-            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
-                           "undefined access to interrupt mask set"
-                           " when MSI-X is enabled");
-            /* should be ignored, fall through for now */
-        }
-        n->bar.intms |= data & 0xffffffff;
-        n->bar.intmc = n->bar.intms;
-        trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc);
-        nvme_irq_check(n);
-        break;
-    case 0x10:  /* INTMC */
-        if (unlikely(msix_enabled(&(n->parent_obj)))) {
-            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
-                           "undefined access to interrupt mask clr"
-                           " when MSI-X is enabled");
-            /* should be ignored, fall through for now */
-        }
-        n->bar.intms &= ~(data & 0xffffffff);
-        n->bar.intmc = n->bar.intms;
-        trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc);
-        nvme_irq_check(n);
-        break;
-    case 0x14:  /* CC */
-        trace_pci_nvme_mmio_cfg(data & 0xffffffff);
-        /* Windows first sends data, then sends enable bit */
-        if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
-            !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
-        {
-            n->bar.cc = data;
-        }
-
-        if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
-            n->bar.cc = data;
-            if (unlikely(nvme_start_ctrl(n))) {
-                trace_pci_nvme_err_startfail();
-                n->bar.csts = NVME_CSTS_FAILED;
-            } else {
-                trace_pci_nvme_mmio_start_success();
-                n->bar.csts = NVME_CSTS_READY;
-            }
-        } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
-            trace_pci_nvme_mmio_stopped();
-            nvme_ctrl_reset(n);
-            n->bar.csts &= ~NVME_CSTS_READY;
-        }
-        if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
-            trace_pci_nvme_mmio_shutdown_set();
-            nvme_ctrl_shutdown(n);
-            n->bar.cc = data;
-            n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
-        } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
-            trace_pci_nvme_mmio_shutdown_cleared();
-            n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
-            n->bar.cc = data;
-        }
-        break;
-    case 0x1C:  /* CSTS */
-        if (data & (1 << 4)) {
-            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
-                           "attempted to W1C CSTS.NSSRO"
-                           " but CAP.NSSRS is zero (not supported)");
-        } else if (data != 0) {
-            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts,
-                           "attempted to set a read only bit"
-                           " of controller status");
-        }
-        break;
-    case 0x20:  /* NSSR */
-        if (data == 0x4E564D65) {
-            trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
-        } else {
-            /* The spec says that writes of other values have no effect */
-            return;
-        }
-        break;
-    case 0x24:  /* AQA */
-        n->bar.aqa = data & 0xffffffff;
-        trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
-        break;
-    case 0x28:  /* ASQ */
-        n->bar.asq = size == 8 ? data :
-            (n->bar.asq & ~0xffffffffULL) | (data & 0xffffffff);
-        trace_pci_nvme_mmio_asqaddr(data);
-        break;
-    case 0x2c:  /* ASQ hi */
-        n->bar.asq = (n->bar.asq & 0xffffffff) | (data << 32);
-        trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq);
-        break;
-    case 0x30:  /* ACQ */
-        trace_pci_nvme_mmio_acqaddr(data);
-        n->bar.acq = size == 8 ? data :
-            (n->bar.acq & ~0xffffffffULL) | (data & 0xffffffff);
-        break;
-    case 0x34:  /* ACQ hi */
-        n->bar.acq = (n->bar.acq & 0xffffffff) | (data << 32);
-        trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq);
-        break;
-    case 0x38:  /* CMBLOC */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved,
-                       "invalid write to reserved CMBLOC"
-                       " when CMBSZ is zero, ignored");
-        return;
-    case 0x3C:  /* CMBSZ */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
-                       "invalid write to read only CMBSZ, ignored");
-        return;
-    case 0x50:  /* CMBMSC */
-        if (!NVME_CAP_CMBS(n->bar.cap)) {
-            return;
-        }
-
-        n->bar.cmbmsc = size == 8 ? data :
-            (n->bar.cmbmsc & ~0xffffffff) | (data & 0xffffffff);
-        n->cmb.cmse = false;
-
-        if (NVME_CMBMSC_CRE(data)) {
-            nvme_cmb_enable_regs(n);
-
-            if (NVME_CMBMSC_CMSE(data)) {
-                hwaddr cba = NVME_CMBMSC_CBA(data) << CMBMSC_CBA_SHIFT;
-                if (cba + int128_get64(n->cmb.mem.size) < cba) {
-                    NVME_CMBSTS_SET_CBAI(n->bar.cmbsts, 1);
-                    return;
-                }
-
-                n->cmb.cba = cba;
-                n->cmb.cmse = true;
-            }
-        } else {
-            n->bar.cmbsz = 0;
-            n->bar.cmbloc = 0;
-        }
-
-        return;
-    case 0x54:  /* CMBMSC hi */
-        n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32);
-        return;
-
-    case 0xE00: /* PMRCAP */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
-                       "invalid write to PMRCAP register, ignored");
-        return;
-    case 0xE04: /* PMRCTL */
-        n->bar.pmrctl = data;
-        if (NVME_PMRCTL_EN(data)) {
-            memory_region_set_enabled(&n->pmr.dev->mr, true);
-            n->bar.pmrsts = 0;
-        } else {
-            memory_region_set_enabled(&n->pmr.dev->mr, false);
-            NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 1);
-            n->pmr.cmse = false;
-        }
-        return;
-    case 0xE08: /* PMRSTS */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
-                       "invalid write to PMRSTS register, ignored");
-        return;
-    case 0xE0C: /* PMREBS */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
-                       "invalid write to PMREBS register, ignored");
-        return;
-    case 0xE10: /* PMRSWTP */
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
-                       "invalid write to PMRSWTP register, ignored");
-        return;
-    case 0xE14: /* PMRMSCL */
-        if (!NVME_CAP_PMRS(n->bar.cap)) {
-            return;
-        }
-
-        n->bar.pmrmsc = (n->bar.pmrmsc & ~0xffffffff) | (data & 0xffffffff);
-        n->pmr.cmse = false;
-
-        if (NVME_PMRMSC_CMSE(n->bar.pmrmsc)) {
-            hwaddr cba = NVME_PMRMSC_CBA(n->bar.pmrmsc) << PMRMSC_CBA_SHIFT;
-            if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
-                NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 1);
-                return;
-            }
-
-            n->pmr.cmse = true;
-            n->pmr.cba = cba;
-        }
-
-        return;
-    case 0xE18: /* PMRMSCU */
-        if (!NVME_CAP_PMRS(n->bar.cap)) {
-            return;
-        }
-
-        n->bar.pmrmsc = (n->bar.pmrmsc & 0xffffffff) | (data << 32);
-        return;
-    default:
-        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
-                       "invalid MMIO write,"
-                       " offset=0x%"PRIx64", data=%"PRIx64"",
-                       offset, data);
-        break;
-    }
-}
-
-static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
-{
-    NvmeCtrl *n = (NvmeCtrl *)opaque;
-    uint8_t *ptr = (uint8_t *)&n->bar;
-    uint64_t val = 0;
-
-    trace_pci_nvme_mmio_read(addr, size);
-
-    if (unlikely(addr & (sizeof(uint32_t) - 1))) {
-        NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
-                       "MMIO read not 32-bit aligned,"
-                       " offset=0x%"PRIx64"", addr);
-        /* should RAZ, fall through for now */
-    } else if (unlikely(size < sizeof(uint32_t))) {
-        NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall,
-                       "MMIO read smaller than 32-bits,"
-                       " offset=0x%"PRIx64"", addr);
-        /* should RAZ, fall through for now */
-    }
-
-    if (addr < sizeof(n->bar)) {
-        /*
-         * When PMRWBM bit 1 is set then read from
-         * from PMRSTS should ensure prior writes
-         * made it to persistent media
-         */
-        if (addr == 0xE08 &&
-            (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
-            memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
-        }
-        memcpy(&val, ptr + addr, size);
-    } else {
-        NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
-                       "MMIO read beyond last register,"
-                       " offset=0x%"PRIx64", returning 0", addr);
-    }
-
-    return val;
-}
-
-static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
-{
-    uint32_t qid;
-
-    if (unlikely(addr & ((1 << 2) - 1))) {
-        NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
-                       "doorbell write not 32-bit aligned,"
-                       " offset=0x%"PRIx64", ignoring", addr);
-        return;
-    }
-
-    if (((addr - 0x1000) >> 2) & 1) {
-        /* Completion queue doorbell write */
-
-        uint16_t new_head = val & 0xffff;
-        int start_sqs;
-        NvmeCQueue *cq;
-
-        qid = (addr - (0x1000 + (1 << 2))) >> 3;
-        if (unlikely(nvme_check_cqid(n, qid))) {
-            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq,
-                           "completion queue doorbell write"
-                           " for nonexistent queue,"
-                           " sqid=%"PRIu32", ignoring", qid);
-
-            /*
-             * NVM Express v1.3d, Section 4.1 state: "If host software writes
-             * an invalid value to the Submission Queue Tail Doorbell or
-             * Completion Queue Head Doorbell regiter and an Asynchronous Event
-             * Request command is outstanding, then an asynchronous event is
-             * posted to the Admin Completion Queue with a status code of
-             * Invalid Doorbell Write Value."
-             *
-             * Also note that the spec includes the "Invalid Doorbell Register"
-             * status code, but nowhere does it specify when to use it.
-             * However, it seems reasonable to use it here in a similar
-             * fashion.
-             */
-            if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
-                                   NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
-                                   NVME_LOG_ERROR_INFO);
-            }
-
-            return;
-        }
-
-        cq = n->cq[qid];
-        if (unlikely(new_head >= cq->size)) {
-            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead,
-                           "completion queue doorbell write value"
-                           " beyond queue size, sqid=%"PRIu32","
-                           " new_head=%"PRIu16", ignoring",
-                           qid, new_head);
-
-            if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
-                                   NVME_AER_INFO_ERR_INVALID_DB_VALUE,
-                                   NVME_LOG_ERROR_INFO);
-            }
-
-            return;
-        }
-
-        trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
-
-        start_sqs = nvme_cq_full(cq) ? 1 : 0;
-        cq->head = new_head;
-        if (start_sqs) {
-            NvmeSQueue *sq;
-            QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
-                timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
-            }
-            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
-        }
-
-        if (cq->tail == cq->head) {
-            nvme_irq_deassert(n, cq);
-        }
-    } else {
-        /* Submission queue doorbell write */
-
-        uint16_t new_tail = val & 0xffff;
-        NvmeSQueue *sq;
-
-        qid = (addr - 0x1000) >> 3;
-        if (unlikely(nvme_check_sqid(n, qid))) {
-            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq,
-                           "submission queue doorbell write"
-                           " for nonexistent queue,"
-                           " sqid=%"PRIu32", ignoring", qid);
-
-            if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
-                                   NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
-                                   NVME_LOG_ERROR_INFO);
-            }
-
-            return;
-        }
-
-        sq = n->sq[qid];
-        if (unlikely(new_tail >= sq->size)) {
-            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail,
-                           "submission queue doorbell write value"
-                           " beyond queue size, sqid=%"PRIu32","
-                           " new_tail=%"PRIu16", ignoring",
-                           qid, new_tail);
-
-            if (n->outstanding_aers) {
-                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
-                                   NVME_AER_INFO_ERR_INVALID_DB_VALUE,
-                                   NVME_LOG_ERROR_INFO);
-            }
-
-            return;
-        }
-
-        trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
-
-        sq->tail = new_tail;
-        timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
-    }
-}
-
-static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
-                            unsigned size)
-{
-    NvmeCtrl *n = (NvmeCtrl *)opaque;
-
-    trace_pci_nvme_mmio_write(addr, data, size);
-
-    if (addr < sizeof(n->bar)) {
-        nvme_write_bar(n, addr, data, size);
-    } else {
-        nvme_process_db(n, addr, data);
-    }
-}
-
-static const MemoryRegionOps nvme_mmio_ops = {
-    .read = nvme_mmio_read,
-    .write = nvme_mmio_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .impl = {
-        .min_access_size = 2,
-        .max_access_size = 8,
-    },
-};
-
-static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
-                           unsigned size)
-{
-    NvmeCtrl *n = (NvmeCtrl *)opaque;
-    stn_le_p(&n->cmb.buf[addr], size, data);
-}
-
-static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
-{
-    NvmeCtrl *n = (NvmeCtrl *)opaque;
-    return ldn_le_p(&n->cmb.buf[addr], size);
-}
-
-static const MemoryRegionOps nvme_cmb_ops = {
-    .read = nvme_cmb_read,
-    .write = nvme_cmb_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .impl = {
-        .min_access_size = 1,
-        .max_access_size = 8,
-    },
-};
-
-static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
-{
-    NvmeParams *params = &n->params;
-
-    if (params->num_queues) {
-        warn_report("num_queues is deprecated; please use max_ioqpairs "
-                    "instead");
-
-        params->max_ioqpairs = params->num_queues - 1;
-    }
-
-    if (n->namespace.blkconf.blk && n->subsys) {
-        error_setg(errp, "subsystem support is unavailable with legacy "
-                   "namespace ('drive' property)");
-        return;
-    }
-
-    if (params->max_ioqpairs < 1 ||
-        params->max_ioqpairs > NVME_MAX_IOQPAIRS) {
-        error_setg(errp, "max_ioqpairs must be between 1 and %d",
-                   NVME_MAX_IOQPAIRS);
-        return;
-    }
-
-    if (params->msix_qsize < 1 ||
-        params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) {
-        error_setg(errp, "msix_qsize must be between 1 and %d",
-                   PCI_MSIX_FLAGS_QSIZE + 1);
-        return;
-    }
-
-    if (!params->serial) {
-        error_setg(errp, "serial property not set");
-        return;
-    }
-
-    if (n->pmr.dev) {
-        if (host_memory_backend_is_mapped(n->pmr.dev)) {
-            error_setg(errp, "can't use already busy memdev: %s",
-                       object_get_canonical_path_component(OBJECT(n->pmr.dev)));
-            return;
-        }
-
-        if (!is_power_of_2(n->pmr.dev->size)) {
-            error_setg(errp, "pmr backend size needs to be power of 2 in size");
-            return;
-        }
-
-        host_memory_backend_set_mapped(n->pmr.dev, true);
-    }
-
-    if (n->params.zasl > n->params.mdts) {
-        error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
-                   "than or equal to mdts (Maximum Data Transfer Size)");
-        return;
-    }
-
-    if (!n->params.vsl) {
-        error_setg(errp, "vsl must be non-zero");
-        return;
-    }
-}
-
-static void nvme_init_state(NvmeCtrl *n)
-{
-    n->num_namespaces = NVME_MAX_NAMESPACES;
-    /* add one to max_ioqpairs to account for the admin queue pair */
-    n->reg_size = pow2ceil(sizeof(NvmeBar) +
-                           2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE);
-    n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
-    n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
-    n->temperature = NVME_TEMPERATURE;
-    n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
-    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
-    n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
-}
-
-static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
-{
-    uint64_t cmb_size = n->params.cmb_size_mb * MiB;
-
-    n->cmb.buf = g_malloc0(cmb_size);
-    memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n,
-                          "nvme-cmb", cmb_size);
-    pci_register_bar(pci_dev, NVME_CMB_BIR,
-                     PCI_BASE_ADDRESS_SPACE_MEMORY |
-                     PCI_BASE_ADDRESS_MEM_TYPE_64 |
-                     PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem);
-
-    NVME_CAP_SET_CMBS(n->bar.cap, 1);
-
-    if (n->params.legacy_cmb) {
-        nvme_cmb_enable_regs(n);
-        n->cmb.cmse = true;
-    }
-}
-
-static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
-{
-    NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 1);
-    NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 1);
-    NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR);
-    /* Turn on bit 1 support */
-    NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
-    NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 1);
-
-    pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
-                     PCI_BASE_ADDRESS_SPACE_MEMORY |
-                     PCI_BASE_ADDRESS_MEM_TYPE_64 |
-                     PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
-
-    memory_region_set_enabled(&n->pmr.dev->mr, false);
-}
-
-static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
-{
-    uint8_t *pci_conf = pci_dev->config;
-    uint64_t bar_size, msix_table_size, msix_pba_size;
-    unsigned msix_table_offset, msix_pba_offset;
-    int ret;
-
-    Error *err = NULL;
-
-    pci_conf[PCI_INTERRUPT_PIN] = 1;
-    pci_config_set_prog_interface(pci_conf, 0x2);
-
-    if (n->params.use_intel_id) {
-        pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
-        pci_config_set_device_id(pci_conf, 0x5845);
-    } else {
-        pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
-        pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
-    }
-
-    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
-    pcie_endpoint_cap_init(pci_dev, 0x80);
-
-    bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB);
-    msix_table_offset = bar_size;
-    msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize;
-
-    bar_size += msix_table_size;
-    bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
-    msix_pba_offset = bar_size;
-    msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8;
-
-    bar_size += msix_pba_size;
-    bar_size = pow2ceil(bar_size);
-
-    memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
-    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
-                          n->reg_size);
-    memory_region_add_subregion(&n->bar0, 0, &n->iomem);
-
-    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
-                     PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
-    ret = msix_init(pci_dev, n->params.msix_qsize,
-                    &n->bar0, 0, msix_table_offset,
-                    &n->bar0, 0, msix_pba_offset, 0, &err);
-    if (ret < 0) {
-        if (ret == -ENOTSUP) {
-            warn_report_err(err);
-        } else {
-            error_propagate(errp, err);
-            return ret;
-        }
-    }
-
-    if (n->params.cmb_size_mb) {
-        nvme_init_cmb(n, pci_dev);
-    }
-
-    if (n->pmr.dev) {
-        nvme_init_pmr(n, pci_dev);
-    }
-
-    return 0;
-}
-
-static void nvme_init_subnqn(NvmeCtrl *n)
-{
-    NvmeSubsystem *subsys = n->subsys;
-    NvmeIdCtrl *id = &n->id_ctrl;
-
-    if (!subsys) {
-        snprintf((char *)id->subnqn, sizeof(id->subnqn),
-                 "nqn.2019-08.org.qemu:%s", n->params.serial);
-    } else {
-        pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
-    }
-}
-
-static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
-{
-    NvmeIdCtrl *id = &n->id_ctrl;
-    uint8_t *pci_conf = pci_dev->config;
-
-    id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
-    id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
-    strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
-    strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
-    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
-
-    id->cntlid = cpu_to_le16(n->cntlid);
-
-    id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
-
-    id->rab = 6;
-
-    if (n->params.use_intel_id) {
-        id->ieee[0] = 0xb3;
-        id->ieee[1] = 0x02;
-        id->ieee[2] = 0x00;
-    } else {
-        id->ieee[0] = 0x00;
-        id->ieee[1] = 0x54;
-        id->ieee[2] = 0x52;
-    }
-
-    id->mdts = n->params.mdts;
-    id->ver = cpu_to_le32(NVME_SPEC_VER);
-    id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT);
-    id->cntrltype = 0x1;
-
-    /*
-     * Because the controller always completes the Abort command immediately,
-     * there can never be more than one concurrently executing Abort command,
-     * so this value is never used for anything. Note that there can easily be
-     * many Abort commands in the queues, but they are not considered
-     * "executing" until processed by nvme_abort.
-     *
-     * The specification recommends a value of 3 for Abort Command Limit (four
-     * concurrently outstanding Abort commands), so lets use that though it is
-     * inconsequential.
-     */
-    id->acl = 3;
-    id->aerl = n->params.aerl;
-    id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
-    id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED;
-
-    /* recommended default value (~70 C) */
-    id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
-    id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
-
-    id->sqes = (0x6 << 4) | 0x6;
-    id->cqes = (0x4 << 4) | 0x4;
-    id->nn = cpu_to_le32(n->num_namespaces);
-    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
-                           NVME_ONCS_FEATURES | NVME_ONCS_DSM |
-                           NVME_ONCS_COMPARE | NVME_ONCS_COPY);
-
-    /*
-     * NOTE: If this device ever supports a command set that does NOT use 0x0
-     * as a Flush-equivalent operation, support for the broadcast NSID in Flush
-     * should probably be removed.
-     *
-     * See comment in nvme_io_cmd.
-     */
-    id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
-
-    id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
-    id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
-                           NVME_CTRL_SGLS_BITBUCKET);
-
-    nvme_init_subnqn(n);
-
-    id->psd[0].mp = cpu_to_le16(0x9c4);
-    id->psd[0].enlat = cpu_to_le32(0x10);
-    id->psd[0].exlat = cpu_to_le32(0x4);
-
-    if (n->subsys) {
-        id->cmic |= NVME_CMIC_MULTI_CTRL;
-    }
-
-    NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
-    NVME_CAP_SET_CQR(n->bar.cap, 1);
-    NVME_CAP_SET_TO(n->bar.cap, 0xf);
-    NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_NVM);
-    NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_CSI_SUPP);
-    NVME_CAP_SET_CSS(n->bar.cap, NVME_CAP_CSS_ADMIN_ONLY);
-    NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
-    NVME_CAP_SET_CMBS(n->bar.cap, n->params.cmb_size_mb ? 1 : 0);
-    NVME_CAP_SET_PMRS(n->bar.cap, n->pmr.dev ? 1 : 0);
-
-    n->bar.vs = NVME_SPEC_VER;
-    n->bar.intmc = n->bar.intms = 0;
-}
-
-static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
-{
-    int cntlid;
-
-    if (!n->subsys) {
-        return 0;
-    }
-
-    cntlid = nvme_subsys_register_ctrl(n, errp);
-    if (cntlid < 0) {
-        return -1;
-    }
-
-    n->cntlid = cntlid;
-
-    return 0;
-}
-
-void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
-{
-    uint32_t nsid = ns->params.nsid;
-    assert(nsid && nsid <= NVME_MAX_NAMESPACES);
-
-    n->namespaces[nsid - 1] = ns;
-    ns->attached++;
-
-    n->dmrsl = MIN_NON_ZERO(n->dmrsl,
-                            BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
-}
-
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
-{
-    NvmeCtrl *n = NVME(pci_dev);
-    NvmeNamespace *ns;
-    Error *local_err = NULL;
-
-    nvme_check_constraints(n, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    qbus_create_inplace(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
-                        &pci_dev->qdev, n->parent_obj.qdev.id);
-
-    nvme_init_state(n);
-    if (nvme_init_pci(n, pci_dev, errp)) {
-        return;
-    }
-
-    if (nvme_init_subsys(n, errp)) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    nvme_init_ctrl(n, pci_dev);
-
-    /* setup a namespace if the controller drive property was given */
-    if (n->namespace.blkconf.blk) {
-        ns = &n->namespace;
-        ns->params.nsid = 1;
-
-        if (nvme_ns_setup(n, ns, errp)) {
-            return;
-        }
-
-        nvme_attach_ns(n, ns);
-    }
-}
-
-static void nvme_exit(PCIDevice *pci_dev)
-{
-    NvmeCtrl *n = NVME(pci_dev);
-    NvmeNamespace *ns;
-    int i;
-
-    nvme_ctrl_reset(n);
-
-    for (i = 1; i <= n->num_namespaces; i++) {
-        ns = nvme_ns(n, i);
-        if (!ns) {
-            continue;
-        }
-
-        nvme_ns_cleanup(ns);
-    }
-
-    g_free(n->cq);
-    g_free(n->sq);
-    g_free(n->aer_reqs);
-
-    if (n->params.cmb_size_mb) {
-        g_free(n->cmb.buf);
-    }
-
-    if (n->pmr.dev) {
-        host_memory_backend_set_mapped(n->pmr.dev, false);
-    }
-    msix_uninit(pci_dev, &n->bar0, &n->bar0);
-    memory_region_del_subregion(&n->bar0, &n->iomem);
-}
-
-static Property nvme_props[] = {
-    DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
-    DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
-                     HostMemoryBackend *),
-    DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
-                     NvmeSubsystem *),
-    DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
-    DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
-    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
-    DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
-    DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
-    DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
-    DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
-    DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
-    DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
-    DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
-    DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
-    DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-    NvmeCtrl *n = NVME(obj);
-    uint8_t value = n->smart_critical_warning;
-
-    visit_type_uint8(v, name, &value, errp);
-}
-
-static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-    NvmeCtrl *n = NVME(obj);
-    uint8_t value, old_value, cap = 0, index, event;
-
-    if (!visit_type_uint8(v, name, &value, errp)) {
-        return;
-    }
-
-    cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY
-          | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA;
-    if (NVME_CAP_PMRS(n->bar.cap)) {
-        cap |= NVME_SMART_PMR_UNRELIABLE;
-    }
-
-    if ((value & cap) != value) {
-        error_setg(errp, "unsupported smart critical warning bits: 0x%x",
-                   value & ~cap);
-        return;
-    }
-
-    old_value = n->smart_critical_warning;
-    n->smart_critical_warning = value;
-
-    /* only inject new bits of smart critical warning */
-    for (index = 0; index < NVME_SMART_WARN_MAX; index++) {
-        event = 1 << index;
-        if (value & ~old_value & event)
-            nvme_smart_event(n, event);
-    }
-}
-
-static const VMStateDescription nvme_vmstate = {
-    .name = "nvme",
-    .unmigratable = 1,
-};
-
-static void nvme_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
-
-    pc->realize = nvme_realize;
-    pc->exit = nvme_exit;
-    pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
-    pc->revision = 2;
-
-    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    dc->desc = "Non-Volatile Memory Express";
-    device_class_set_props(dc, nvme_props);
-    dc->vmsd = &nvme_vmstate;
-}
-
-static void nvme_instance_init(Object *obj)
-{
-    NvmeCtrl *n = NVME(obj);
-
-    device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex,
-                                  "bootindex", "/namespace@1,0",
-                                  DEVICE(obj));
-
-    object_property_add(obj, "smart_critical_warning", "uint8",
-                        nvme_get_smart_warning,
-                        nvme_set_smart_warning, NULL, NULL);
-}
-
-static const TypeInfo nvme_info = {
-    .name          = TYPE_NVME,
-    .parent        = TYPE_PCI_DEVICE,
-    .instance_size = sizeof(NvmeCtrl),
-    .instance_init = nvme_instance_init,
-    .class_init    = nvme_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { INTERFACE_PCIE_DEVICE },
-        { }
-    },
-};
-
-static const TypeInfo nvme_bus_info = {
-    .name = TYPE_NVME_BUS,
-    .parent = TYPE_BUS,
-    .instance_size = sizeof(NvmeBus),
-};
-
-static void nvme_register_types(void)
-{
-    type_register_static(&nvme_info);
-    type_register_static(&nvme_bus_info);
-}
-
-type_init(nvme_register_types)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
deleted file mode 100644
index 5d05ec368f7..00000000000
--- a/hw/block/nvme.h
+++ /dev/null
@@ -1,266 +0,0 @@
-#ifndef HW_NVME_H
-#define HW_NVME_H
-
-#include "block/nvme.h"
-#include "hw/pci/pci.h"
-#include "nvme-subsys.h"
-#include "nvme-ns.h"
-
-#define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
-#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
-
-typedef struct NvmeParams {
-    char     *serial;
-    uint32_t num_queues; /* deprecated since 5.1 */
-    uint32_t max_ioqpairs;
-    uint16_t msix_qsize;
-    uint32_t cmb_size_mb;
-    uint8_t  aerl;
-    uint32_t aer_max_queued;
-    uint8_t  mdts;
-    uint8_t  vsl;
-    bool     use_intel_id;
-    uint8_t  zasl;
-    bool     legacy_cmb;
-} NvmeParams;
-
-typedef struct NvmeAsyncEvent {
-    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
-    NvmeAerResult result;
-} NvmeAsyncEvent;
-
-enum {
-    NVME_SG_ALLOC = 1 << 0,
-    NVME_SG_DMA   = 1 << 1,
-};
-
-typedef struct NvmeSg {
-    int flags;
-
-    union {
-        QEMUSGList   qsg;
-        QEMUIOVector iov;
-    };
-} NvmeSg;
-
-typedef struct NvmeRequest {
-    struct NvmeSQueue       *sq;
-    struct NvmeNamespace    *ns;
-    BlockAIOCB              *aiocb;
-    uint16_t                status;
-    void                    *opaque;
-    NvmeCqe                 cqe;
-    NvmeCmd                 cmd;
-    BlockAcctCookie         acct;
-    NvmeSg                  sg;
-    QTAILQ_ENTRY(NvmeRequest)entry;
-} NvmeRequest;
-
-typedef struct NvmeBounceContext {
-    NvmeRequest *req;
-
-    struct {
-        QEMUIOVector iov;
-        uint8_t *bounce;
-    } data, mdata;
-} NvmeBounceContext;
-
-static inline const char *nvme_adm_opc_str(uint8_t opc)
-{
-    switch (opc) {
-    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
-    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
-    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
-    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
-    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
-    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
-    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
-    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
-    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
-    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
-    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
-    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
-    default:                            return "NVME_ADM_CMD_UNKNOWN";
-    }
-}
-
-static inline const char *nvme_io_opc_str(uint8_t opc)
-{
-    switch (opc) {
-    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
-    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
-    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
-    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
-    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
-    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
-    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
-    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
-    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
-    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
-    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
-    default:                        return "NVME_NVM_CMD_UNKNOWN";
-    }
-}
-
-typedef struct NvmeSQueue {
-    struct NvmeCtrl *ctrl;
-    uint16_t    sqid;
-    uint16_t    cqid;
-    uint32_t    head;
-    uint32_t    tail;
-    uint32_t    size;
-    uint64_t    dma_addr;
-    QEMUTimer   *timer;
-    NvmeRequest *io_req;
-    QTAILQ_HEAD(, NvmeRequest) req_list;
-    QTAILQ_HEAD(, NvmeRequest) out_req_list;
-    QTAILQ_ENTRY(NvmeSQueue) entry;
-} NvmeSQueue;
-
-typedef struct NvmeCQueue {
-    struct NvmeCtrl *ctrl;
-    uint8_t     phase;
-    uint16_t    cqid;
-    uint16_t    irq_enabled;
-    uint32_t    head;
-    uint32_t    tail;
-    uint32_t    vector;
-    uint32_t    size;
-    uint64_t    dma_addr;
-    QEMUTimer   *timer;
-    QTAILQ_HEAD(, NvmeSQueue) sq_list;
-    QTAILQ_HEAD(, NvmeRequest) req_list;
-} NvmeCQueue;
-
-#define TYPE_NVME_BUS "nvme-bus"
-#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
-
-typedef struct NvmeBus {
-    BusState parent_bus;
-} NvmeBus;
-
-#define TYPE_NVME "nvme"
-#define NVME(obj) \
-        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
-
-typedef struct NvmeFeatureVal {
-    struct {
-        uint16_t temp_thresh_hi;
-        uint16_t temp_thresh_low;
-    };
-    uint32_t    async_config;
-} NvmeFeatureVal;
-
-typedef struct NvmeCtrl {
-    PCIDevice    parent_obj;
-    MemoryRegion bar0;
-    MemoryRegion iomem;
-    NvmeBar      bar;
-    NvmeParams   params;
-    NvmeBus      bus;
-
-    uint16_t    cntlid;
-    bool        qs_created;
-    uint32_t    page_size;
-    uint16_t    page_bits;
-    uint16_t    max_prp_ents;
-    uint16_t    cqe_size;
-    uint16_t    sqe_size;
-    uint32_t    reg_size;
-    uint32_t    num_namespaces;
-    uint32_t    max_q_ents;
-    uint8_t     outstanding_aers;
-    uint32_t    irq_status;
-    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
-    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
-    uint64_t    starttime_ms;
-    uint16_t    temperature;
-    uint8_t     smart_critical_warning;
-
-    struct {
-        MemoryRegion mem;
-        uint8_t      *buf;
-        bool         cmse;
-        hwaddr       cba;
-    } cmb;
-
-    struct {
-        HostMemoryBackend *dev;
-        bool              cmse;
-        hwaddr            cba;
-    } pmr;
-
-    uint8_t     aer_mask;
-    NvmeRequest **aer_reqs;
-    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
-    int         aer_queued;
-
-    uint32_t    dmrsl;
-
-    /* Namespace ID is started with 1 so bitmap should be 1-based */
-#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
-    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
-
-    NvmeSubsystem   *subsys;
-
-    NvmeNamespace   namespace;
-    /*
-     * Attached namespaces to this controller.  If subsys is not given, all
-     * namespaces in this list will always be attached.
-     */
-    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
-    NvmeSQueue      **sq;
-    NvmeCQueue      **cq;
-    NvmeSQueue      admin_sq;
-    NvmeCQueue      admin_cq;
-    NvmeIdCtrl      id_ctrl;
-    NvmeFeatureVal  features;
-} NvmeCtrl;
-
-static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
-{
-    if (!nsid || nsid > n->num_namespaces) {
-        return NULL;
-    }
-
-    return n->namespaces[nsid - 1];
-}
-
-static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
-{
-    NvmeSQueue *sq = req->sq;
-    NvmeCtrl *n = sq->ctrl;
-
-    return n->cq[sq->cqid];
-}
-
-static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
-{
-    NvmeSQueue *sq = req->sq;
-    return sq->ctrl;
-}
-
-static inline uint16_t nvme_cid(NvmeRequest *req)
-{
-    if (!req) {
-        return 0xffff;
-    }
-
-    return le16_to_cpu(req->cqe.cid);
-}
-
-typedef enum NvmeTxDirection {
-    NVME_TX_DIRECTION_TO_DEVICE   = 0,
-    NVME_TX_DIRECTION_FROM_DEVICE = 1,
-} NvmeTxDirection;
-
-void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
-uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                          NvmeTxDirection dir, NvmeRequest *req);
-uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                           NvmeTxDirection dir, NvmeRequest *req);
-void nvme_rw_complete_cb(void *opaque, int ret);
-uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
-                       NvmeCmd *cmd);
-
-#endif /* HW_NVME_H */
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index 25c053693ce..02c514fb6e0 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -173,7 +173,6 @@ static void pflash_setup_mappings(PFlashCFI02 *pfl)
                                  "pflash-alias", &pfl->orig_mem, 0, size);
         memory_region_add_subregion(&pfl->mem, i * size, &pfl->mem_mappings[i]);
     }
-    pfl->rom_mode = true;
 }
 
 static void pflash_reset_state_machine(PFlashCFI02 *pfl)
@@ -917,8 +916,13 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     /* Allocate memory for a bitmap for sectors being erased. */
     pfl->sector_erase_map = bitmap_new(pfl->total_sectors);
 
-    pflash_setup_mappings(pfl);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
+    pfl->rom_mode = true;
+    if (pfl->mappings > 1) {
+        pflash_setup_mappings(pfl);
+        sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
+    } else {
+        sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->orig_mem);
+    }
 
     timer_init_ns(&pfl->timer, QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
     pfl->status = 0;
diff --git a/hw/block/swim.c b/hw/block/swim.c
index 509c2f49003..333da08ce09 100644
--- a/hw/block/swim.c
+++ b/hw/block/swim.c
@@ -421,8 +421,7 @@ static void sysbus_swim_realize(DeviceState *dev, Error **errp)
     Swim *sys = SWIM(dev);
     SWIMCtrl *swimctrl = &sys->ctrl;
 
-    qbus_create_inplace(&swimctrl->bus, sizeof(SWIMBus), TYPE_SWIM_BUS, dev,
-                        NULL);
+    qbus_init(&swimctrl->bus, sizeof(SWIMBus), TYPE_SWIM_BUS, dev, NULL);
     swimctrl->bus.ctrl = swimctrl;
 }
 
diff --git a/hw/block/trace-events b/hw/block/trace-events
index fa12e3a67a7..d86b53520cc 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -1,9 +1,12 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # fdc.c
 fdc_ioport_read(uint8_t reg, uint8_t value) "read reg 0x%02x val 0x%02x"
 fdc_ioport_write(uint8_t reg, uint8_t value) "write reg 0x%02x val 0x%02x"
 
+# fdc-sysbus.c
+fdctrl_tc_pulse(int level) "TC pulse: %u"
+
 # pflash_cfi01.c
 # pflash_cfi02.c
 pflash_chip_erase_invalid(const char *name, uint64_t offset) "%s: chip erase: invalid address 0x%" PRIx64
@@ -49,212 +52,6 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
 hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
 
-# nvme.c
-# nvme traces for successful events
-pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
-pci_nvme_irq_pin(void) "pulsing IRQ pin"
-pci_nvme_irq_masked(void) "IRQ is masked"
-pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
-pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
-pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
-pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
-pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
-pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
-pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
-pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
-pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
-pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
-pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
-pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8""
-pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
-pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
-pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
-pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
-pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
-pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
-pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
-pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
-pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
-pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
-pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
-pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
-pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
-pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
-pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
-pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
-pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
-pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
-pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8""
-pci_nvme_identify_ctrl(void) "identify controller"
-pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
-pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16""
-pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
-pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
-pci_nvme_identify_cmd_set(void) "identify i/o command set"
-pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
-pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
-pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
-pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""
-pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
-pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
-pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
-pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
-pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
-pci_nvme_process_aers(int queued) "queued %d"
-pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
-pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
-pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
-pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
-pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
-pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
-pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
-pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
-pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
-pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
-pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
-pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
-pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
-pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16""
-pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
-pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
-pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
-pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
-pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
-pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
-pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
-pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
-pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
-pci_nvme_mmio_stopped(void) "cleared controller enable bit"
-pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
-pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
-pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
-pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
-pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
-pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
-
-# nvme traces for error conditions
-pci_nvme_err_mdts(size_t len) "len %zu"
-pci_nvme_err_zasl(size_t len) "len %zu"
-pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
-pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
-pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
-pci_nvme_err_cfs(void) "controller fatal status"
-pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
-pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
-pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
-pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
-pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32""
-pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
-pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64""
-pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
-pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
-pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
-pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
-pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
-pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
-pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
-pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
-pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
-pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
-pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
-pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
-pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
-pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
-pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
-pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
-pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
-pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
-pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
-pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
-pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
-pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
-pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
-pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
-pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
-pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
-pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
-pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
-pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
-pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
-pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
-pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
-pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
-pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
-pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
-pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
-pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
-pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
-pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
-pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
-pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
-pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
-pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
-pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
-pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
-pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
-pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
-pci_nvme_err_startfail(void) "setting controller enable bit failed"
-pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
-
-# Traces for undefined behavior
-pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
-pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
-pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
-pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
-pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
-pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
-pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
-pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
-pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
-pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
-pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
-pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
-pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
-pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
-pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
-pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
-pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
-pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
-pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
-pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
-pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
-pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field"
-
 # xen-block.c
 xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u"
 xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u"
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0b5b9d44cdb..ba13cb87e52 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -31,6 +31,8 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
 
+#define REALIZE_CONNECTION_RETRIES 3
+
 static const int user_feature_bits[] = {
     VIRTIO_BLK_F_SIZE_MAX,
     VIRTIO_BLK_F_SEG_MAX,
@@ -47,9 +49,13 @@ static const int user_feature_bits[] = {
     VIRTIO_RING_F_INDIRECT_DESC,
     VIRTIO_RING_F_EVENT_IDX,
     VIRTIO_F_NOTIFY_ON_EMPTY,
+    VIRTIO_F_RING_PACKED,
+    VIRTIO_F_IOMMU_PLATFORM,
     VHOST_INVALID_FEATURE_BIT
 };
 
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event);
+
 static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -87,11 +93,13 @@ static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
     int ret;
     struct virtio_blk_config blkcfg;
     VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
+    Error *local_err = NULL;
 
     ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg,
-                               sizeof(struct virtio_blk_config));
+                               sizeof(struct virtio_blk_config),
+                               &local_err);
     if (ret < 0) {
-        error_report("get config space failed");
+        error_report_err(local_err);
         return -1;
     }
 
@@ -109,7 +117,7 @@ const VhostDevConfigOps blk_ops = {
     .vhost_dev_config_notifier = vhost_user_blk_handle_config_change,
 };
 
-static int vhost_user_blk_start(VirtIODevice *vdev)
+static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
@@ -117,19 +125,19 @@ static int vhost_user_blk_start(VirtIODevice *vdev)
     int i, ret;
 
     if (!k->set_guest_notifiers) {
-        error_report("binding does not support guest notifiers");
+        error_setg(errp, "binding does not support guest notifiers");
         return -ENOSYS;
     }
 
     ret = vhost_dev_enable_notifiers(&s->dev, vdev);
     if (ret < 0) {
-        error_report("Error enabling host notifiers: %d", -ret);
+        error_setg_errno(errp, -ret, "Error enabling host notifiers");
         return ret;
     }
 
     ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
     if (ret < 0) {
-        error_report("Error binding guest notifier: %d", -ret);
+        error_setg_errno(errp, -ret, "Error binding guest notifier");
         goto err_host_notifiers;
     }
 
@@ -137,27 +145,27 @@ static int vhost_user_blk_start(VirtIODevice *vdev)
 
     ret = vhost_dev_prepare_inflight(&s->dev, vdev);
     if (ret < 0) {
-        error_report("Error set inflight format: %d", -ret);
+        error_setg_errno(errp, -ret, "Error setting inflight format");
         goto err_guest_notifiers;
     }
 
     if (!s->inflight->addr) {
         ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight);
         if (ret < 0) {
-            error_report("Error get inflight: %d", -ret);
+            error_setg_errno(errp, -ret, "Error getting inflight");
             goto err_guest_notifiers;
         }
     }
 
     ret = vhost_dev_set_inflight(&s->dev, s->inflight);
     if (ret < 0) {
-        error_report("Error set inflight: %d", -ret);
+        error_setg_errno(errp, -ret, "Error setting inflight");
         goto err_guest_notifiers;
     }
 
     ret = vhost_dev_start(&s->dev, vdev);
     if (ret < 0) {
-        error_report("Error starting vhost: %d", -ret);
+        error_setg_errno(errp, -ret, "Error starting vhost");
         goto err_guest_notifiers;
     }
     s->started_vu = true;
@@ -210,6 +218,7 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
     bool should_start = virtio_device_started(vdev, status);
+    Error *local_err = NULL;
     int ret;
 
     if (!vdev->vm_running) {
@@ -225,10 +234,9 @@ static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status)
     }
 
     if (should_start) {
-        ret = vhost_user_blk_start(vdev);
+        ret = vhost_user_blk_start(vdev, &local_err);
         if (ret < 0) {
-            error_report("vhost-user-blk: vhost start failed: %s",
-                         strerror(-ret));
+            error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
             qemu_chr_fe_disconnect(&s->chardev);
         }
     } else {
@@ -266,6 +274,7 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
 static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
+    Error *local_err = NULL;
     int i, ret;
 
     if (!vdev->start_on_kick) {
@@ -283,10 +292,9 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
     /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
      * vhost here instead of waiting for .set_status().
      */
-    ret = vhost_user_blk_start(vdev);
+    ret = vhost_user_blk_start(vdev, &local_err);
     if (ret < 0) {
-        error_report("vhost-user-blk: vhost start failed: %s",
-                     strerror(-ret));
+        error_reportf_err(local_err, "vhost-user-blk: vhost start failed: ");
         qemu_chr_fe_disconnect(&s->chardev);
         return;
     }
@@ -309,7 +317,7 @@ static void vhost_user_blk_reset(VirtIODevice *vdev)
     vhost_dev_free_inflight(s->inflight);
 }
 
-static int vhost_user_blk_connect(DeviceState *dev)
+static int vhost_user_blk_connect(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
@@ -320,6 +328,7 @@ static int vhost_user_blk_connect(DeviceState *dev)
     }
     s->connected = true;
 
+    s->dev.num_queues = s->num_queues;
     s->dev.nvqs = s->num_queues;
     s->dev.vqs = s->vhost_vqs;
     s->dev.vq_index = 0;
@@ -327,19 +336,16 @@ static int vhost_user_blk_connect(DeviceState *dev)
 
     vhost_dev_set_config_notifier(&s->dev, &blk_ops);
 
-    ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
+    ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
+                         errp);
     if (ret < 0) {
-        error_report("vhost-user-blk: vhost initialization failed: %s",
-                     strerror(-ret));
         return ret;
     }
 
     /* restore vhost state */
     if (virtio_device_started(vdev, vdev->status)) {
-        ret = vhost_user_blk_start(vdev);
+        ret = vhost_user_blk_start(vdev, errp);
         if (ret < 0) {
-            error_report("vhost-user-blk: vhost start failed: %s",
-                         strerror(-ret));
             return ret;
         }
     }
@@ -362,19 +368,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev)
     vhost_dev_cleanup(&s->dev);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized);
-
-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, false);
-}
-
-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event)
-{
-    vhost_user_blk_event(opaque, event, true);
-}
-
 static void vhost_user_blk_chr_closed_bh(void *opaque)
 {
     DeviceState *dev = opaque;
@@ -382,36 +375,27 @@ static void vhost_user_blk_chr_closed_bh(void *opaque)
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
 
     vhost_user_blk_disconnect(dev);
-    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL,
-            vhost_user_blk_event_oper, NULL, opaque, NULL, true);
+    qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event,
+                             NULL, opaque, NULL, true);
 }
 
-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
-                                 bool realized)
+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
 {
     DeviceState *dev = opaque;
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
+    Error *local_err = NULL;
 
     switch (event) {
     case CHR_EVENT_OPENED:
-        if (vhost_user_blk_connect(dev) < 0) {
+        if (vhost_user_blk_connect(dev, &local_err) < 0) {
+            error_report_err(local_err);
             qemu_chr_fe_disconnect(&s->chardev);
             return;
         }
         break;
     case CHR_EVENT_CLOSED:
-        /*
-         * Closing the connection should happen differently on device
-         * initialization and operation stages.
-         * On initalization, we want to re-start vhost_dev initialization
-         * from the very beginning right away when the connection is closed,
-         * so we clean up vhost_dev on each connection closing.
-         * On operation, we want to postpone vhost_dev cleanup to let the
-         * other code perform its own cleanup sequence using vhost_dev data
-         * (e.g. vhost_dev_set_log).
-         */
-        if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) {
+        if (!runstate_check(RUN_STATE_SHUTDOWN)) {
             /*
              * A close event may happen during a read/write, but vhost
              * code assumes the vhost_dev remains setup, so delay the
@@ -431,8 +415,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
              * knowing its type (in this case vhost-user).
              */
             s->dev.started = false;
-        } else {
-            vhost_user_blk_disconnect(dev);
         }
         break;
     case CHR_EVENT_BREAK:
@@ -443,15 +425,46 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event,
     }
 }
 
+static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp)
+{
+    DeviceState *dev = &s->parent_obj.parent_obj;
+    int ret;
+
+    s->connected = false;
+
+    ret = qemu_chr_fe_wait_connected(&s->chardev, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vhost_user_blk_connect(dev, errp);
+    if (ret < 0) {
+        qemu_chr_fe_disconnect(&s->chardev);
+        return ret;
+    }
+    assert(s->connected);
+
+    ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
+                               sizeof(struct virtio_blk_config), errp);
+    if (ret < 0) {
+        qemu_chr_fe_disconnect(&s->chardev);
+        vhost_dev_cleanup(&s->dev);
+        return ret;
+    }
+
+    return 0;
+}
+
 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 {
+    ERRP_GUARD();
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
-    Error *err = NULL;
+    int retries;
     int i, ret;
 
     if (!s->chardev.chr) {
-        error_setg(errp, "vhost-user-blk: chardev is mandatory");
+        error_setg(errp, "chardev is mandatory");
         return;
     }
 
@@ -459,12 +472,17 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
         s->num_queues = 1;
     }
     if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) {
-        error_setg(errp, "vhost-user-blk: invalid number of IO queues");
+        error_setg(errp, "invalid number of IO queues");
         return;
     }
 
     if (!s->queue_size) {
-        error_setg(errp, "vhost-user-blk: queue size must be non-zero");
+        error_setg(errp, "queue size must be non-zero");
+        return;
+    }
+    if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
+        error_setg(errp, "queue size must not exceed %d",
+                   VIRTQUEUE_MAX_SIZE);
         return;
     }
 
@@ -483,33 +501,25 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 
     s->inflight = g_new0(struct vhost_inflight, 1);
     s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
-    s->connected = false;
 
-    qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_realize, NULL, (void *)dev,
-                             NULL, true);
-
-reconnect:
-    if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) {
-        error_report_err(err);
-        goto virtio_err;
-    }
-
-    /* check whether vhost_user_blk_connect() failed or not */
-    if (!s->connected) {
-        goto reconnect;
-    }
+    retries = REALIZE_CONNECTION_RETRIES;
+    assert(!*errp);
+    do {
+        if (*errp) {
+            error_prepend(errp, "Reconnecting after error: ");
+            error_report_err(*errp);
+            *errp = NULL;
+        }
+        ret = vhost_user_blk_realize_connect(s, errp);
+    } while (ret == -EPROTO && retries--);
 
-    ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
-                               sizeof(struct virtio_blk_config));
     if (ret < 0) {
-        error_report("vhost-user-blk: get block config failed");
-        goto reconnect;
+        goto virtio_err;
     }
 
-    /* we're fully initialized, now we can operate, so change the handler */
+    /* we're fully initialized, now we can operate, so add the handler */
     qemu_chr_fe_set_handlers(&s->chardev,  NULL, NULL,
-                             vhost_user_blk_event_oper, NULL, (void *)dev,
+                             vhost_user_blk_event, NULL, (void *)dev,
                              NULL, true);
     return;
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index d28979efb8d..f139cd7cc9c 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -40,7 +40,7 @@
  * Starting from the discard feature, we can use this array to properly
  * set the config size depending on the features enabled.
  */
-static VirtIOFeature feature_sizes[] = {
+static const VirtIOFeature feature_sizes[] = {
     {.flags = 1ULL << VIRTIO_BLK_F_DISCARD,
      .end = endof(struct virtio_blk_config, discard_sector_alignment)},
     {.flags = 1ULL << VIRTIO_BLK_F_WRITE_ZEROES,
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 83754a43448..674953f1ade 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -728,6 +728,8 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
     XenBlockDrive *drive = NULL;
     QDict *file_layer;
     QDict *driver_layer;
+    struct stat st;
+    int rc;
 
     if (params) {
         char **v = g_strsplit(params, ":", 2);
@@ -761,7 +763,17 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
     file_layer = qdict_new();
     driver_layer = qdict_new();
 
-    qdict_put_str(file_layer, "driver", "file");
+    rc = stat(filename, &st);
+    if (rc) {
+        error_setg_errno(errp, errno, "Could not stat file '%s'", filename);
+        goto done;
+    }
+    if (S_ISBLK(st.st_mode)) {
+        qdict_put_str(file_layer, "driver", "host_device");
+    } else {
+        qdict_put_str(file_layer, "driver", "file");
+    }
+
     qdict_put_str(file_layer, "filename", filename);
     g_free(filename);
 
diff --git a/hw/char/Kconfig b/hw/char/Kconfig
index 4cf36ac637b..6b6cf2fc1df 100644
--- a/hw/char/Kconfig
+++ b/hw/char/Kconfig
@@ -61,9 +61,13 @@ config AVR_USART
 
 config MCHP_PFSOC_MMUART
     bool
+    select SERIAL
 
 config SIFIVE_UART
     bool
 
 config GOLDFISH_TTY
     bool
+
+config SHAKTI_UART
+    bool
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index ceb677bc5a8..c069a30842e 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -235,8 +235,18 @@ static void uart_parameters_setup(CadenceUARTState *s)
 static int uart_can_receive(void *opaque)
 {
     CadenceUARTState *s = opaque;
-    int ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE);
-    uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE;
+    int ret;
+    uint32_t ch_mode;
+
+    /* ignore characters when unclocked or in reset */
+    if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
+                      __func__);
+        return 0;
+    }
+
+    ret = MAX(CADENCE_UART_RX_FIFO_SIZE, CADENCE_UART_TX_FIFO_SIZE);
+    ch_mode = s->r[R_MR] & UART_MR_CHMODE;
 
     if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) {
         ret = MIN(ret, CADENCE_UART_RX_FIFO_SIZE - s->rx_count);
@@ -288,7 +298,7 @@ static void uart_write_rx_fifo(void *opaque, const uint8_t *buf, int size)
     uart_update_status(s);
 }
 
-static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
+static gboolean cadence_uart_xmit(void *do_not_use, GIOCondition cond,
                                   void *opaque)
 {
     CadenceUARTState *s = opaque;
@@ -353,11 +363,6 @@ static void uart_receive(void *opaque, const uint8_t *buf, int size)
     CadenceUARTState *s = opaque;
     uint32_t ch_mode = s->r[R_MR] & UART_MR_CHMODE;
 
-    /* ignore characters when unclocked or in reset */
-    if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
-        return;
-    }
-
     if (ch_mode == NORMAL_MODE || ch_mode == ECHO_MODE) {
         uart_write_rx_fifo(opaque, buf, size);
     }
@@ -373,6 +378,8 @@ static void uart_event(void *opaque, QEMUChrEvent event)
 
     /* ignore characters when unclocked or in reset */
     if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
+                      __func__);
         return;
     }
 
@@ -403,15 +410,22 @@ static void uart_read_rx_fifo(CadenceUARTState *s, uint32_t *c)
     uart_update_status(s);
 }
 
-static void uart_write(void *opaque, hwaddr offset,
-                          uint64_t value, unsigned size)
+static MemTxResult uart_write(void *opaque, hwaddr offset,
+                              uint64_t value, unsigned size, MemTxAttrs attrs)
 {
     CadenceUARTState *s = opaque;
 
+    /* ignore access when unclocked or in reset */
+    if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
+                      __func__);
+        return MEMTX_ERROR;
+    }
+
     DB_PRINT(" offset:%x data:%08x\n", (unsigned)offset, (unsigned)value);
     offset >>= 2;
     if (offset >= CADENCE_UART_R_MAX) {
-        return;
+        return MEMTX_DECODE_ERROR;
     }
     switch (offset) {
     case R_IER: /* ier (wts imr) */
@@ -458,30 +472,41 @@ static void uart_write(void *opaque, hwaddr offset,
         break;
     }
     uart_update_status(s);
+
+    return MEMTX_OK;
 }
 
-static uint64_t uart_read(void *opaque, hwaddr offset,
-        unsigned size)
+static MemTxResult uart_read(void *opaque, hwaddr offset,
+                             uint64_t *value, unsigned size, MemTxAttrs attrs)
 {
     CadenceUARTState *s = opaque;
     uint32_t c = 0;
 
+    /* ignore access when unclocked or in reset */
+    if (!clock_is_enabled(s->refclk) || device_is_in_reset(DEVICE(s))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: uart is unclocked or in reset\n",
+                      __func__);
+        return MEMTX_ERROR;
+    }
+
     offset >>= 2;
     if (offset >= CADENCE_UART_R_MAX) {
-        c = 0;
-    } else if (offset == R_TX_RX) {
+        return MEMTX_DECODE_ERROR;
+    }
+    if (offset == R_TX_RX) {
         uart_read_rx_fifo(s, &c);
     } else {
-       c = s->r[offset];
+        c = s->r[offset];
     }
 
     DB_PRINT(" offset:%x data:%08x\n", (unsigned)(offset << 2), (unsigned)c);
-    return c;
+    *value = c;
+    return MEMTX_OK;
 }
 
 static const MemoryRegionOps uart_ops = {
-    .read = uart_read,
-    .write = uart_write,
+    .read_with_attrs = uart_read,
+    .write_with_attrs = uart_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/char/cmsdk-apb-uart.c b/hw/char/cmsdk-apb-uart.c
index ba2cbbee3d8..f8dc89ee3dc 100644
--- a/hw/char/cmsdk-apb-uart.c
+++ b/hw/char/cmsdk-apb-uart.c
@@ -191,7 +191,7 @@ static uint64_t uart_read(void *opaque, hwaddr offset, unsigned size)
 /* Try to send tx data, and arrange to be called back later if
  * we can't (ie the char backend is busy/blocking).
  */
-static gboolean uart_transmit(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque)
 {
     CMSDKAPBUART *s = CMSDK_APB_UART(opaque);
     int ret;
diff --git a/hw/char/escc.c b/hw/char/escc.c
index 52e79782872..8755d8d34f3 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -86,12 +86,15 @@
 #define W_INTR    1
 #define INTR_INTALL    0x01
 #define INTR_TXINT     0x02
+#define INTR_PAR_SPEC  0x04
 #define INTR_RXMODEMSK 0x18
 #define INTR_RXINT1ST  0x08
 #define INTR_RXINTALL  0x10
+#define INTR_WTRQ_TXRX 0x20
 #define W_IVEC    2
 #define W_RXCTRL  3
 #define RXCTRL_RXEN    0x01
+#define RXCTRL_HUNT    0x10
 #define W_TXCTRL1 4
 #define TXCTRL1_PAREN  0x01
 #define TXCTRL1_PAREV  0x02
@@ -105,6 +108,7 @@
 #define TXCTRL1_CLK64X 0xc0
 #define TXCTRL1_CLKMSK 0xc0
 #define W_TXCTRL2 5
+#define TXCTRL2_TXCRC  0x01
 #define TXCTRL2_TXEN   0x08
 #define TXCTRL2_BITMSK 0x60
 #define TXCTRL2_5BITS  0x00
@@ -115,18 +119,27 @@
 #define W_SYNC2   7
 #define W_TXBUF   8
 #define W_MINTR   9
+#define MINTR_VIS      0x01
+#define MINTR_NV       0x02
 #define MINTR_STATUSHI 0x10
+#define MINTR_SOFTIACK 0x20
 #define MINTR_RST_MASK 0xc0
 #define MINTR_RST_B    0x40
 #define MINTR_RST_A    0x80
 #define MINTR_RST_ALL  0xc0
 #define W_MISC1  10
+#define MISC1_ENC_MASK 0x60
 #define W_CLOCK  11
 #define CLOCK_TRXC     0x08
 #define W_BRGLO  12
 #define W_BRGHI  13
 #define W_MISC2  14
-#define MISC2_PLLDIS   0x30
+#define MISC2_BRG_EN   0x01
+#define MISC2_BRG_SRC  0x02
+#define MISC2_LCL_LOOP 0x10
+#define MISC2_PLLCMD0  0x20
+#define MISC2_PLLCMD1  0x40
+#define MISC2_PLLCMD2  0x80
 #define W_EXTINT 15
 #define EXTINT_DCD     0x08
 #define EXTINT_SYNCINT 0x10
@@ -170,6 +183,7 @@
 #define R_RXBUF   8
 #define R_RXCTRL  9
 #define R_MISC   10
+#define MISC_2CLKMISS  0x40
 #define R_MISC1  11
 #define R_BRGLO  12
 #define R_BRGHI  13
@@ -230,20 +244,23 @@ static uint32_t get_queue(void *opaque)
         q->count--;
     }
     trace_escc_get_queue(CHN_C(s), val);
-    if (q->count > 0)
+    if (q->count > 0) {
         serial_receive_byte(s, 0);
+    }
     return val;
 }
 
 static int escc_update_irq_chn(ESCCChannelState *s)
 {
     if ((((s->wregs[W_INTR] & INTR_TXINT) && (s->txint == 1)) ||
-         // tx ints enabled, pending
-         ((((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINT1ST) ||
-           ((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINTALL)) &&
-          s->rxint == 1) || // rx ints enabled, pending
-         ((s->wregs[W_EXTINT] & EXTINT_BRKINT) &&
-          (s->rregs[R_STATUS] & STATUS_BRK)))) { // break int e&p
+        /* tx ints enabled, pending */
+        ((((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINT1ST) ||
+        ((s->wregs[W_INTR] & INTR_RXMODEMSK) == INTR_RXINTALL)) &&
+            s->rxint == 1) ||
+        /* rx ints enabled, pending */
+        ((s->wregs[W_EXTINT] & EXTINT_BRKINT) &&
+            (s->rregs[R_STATUS] & STATUS_BRK)))) {
+        /* break int e&p */
         return 1;
     }
     return 0;
@@ -262,26 +279,7 @@ static void escc_update_irq(ESCCChannelState *s)
 
 static void escc_reset_chn(ESCCChannelState *s)
 {
-    int i;
-
     s->reg = 0;
-    for (i = 0; i < ESCC_SERIAL_REGS; i++) {
-        s->rregs[i] = 0;
-        s->wregs[i] = 0;
-    }
-    s->wregs[W_TXCTRL1] = TXCTRL1_1STOP; // 1X divisor, 1 stop bit, no parity
-    s->wregs[W_MINTR] = MINTR_RST_ALL;
-    s->wregs[W_CLOCK] = CLOCK_TRXC; // Synch mode tx clock = TRxC
-    s->wregs[W_MISC2] = MISC2_PLLDIS; // PLL disabled
-    s->wregs[W_EXTINT] = EXTINT_DCD | EXTINT_SYNCINT | EXTINT_CTSINT |
-        EXTINT_TXUNDRN | EXTINT_BRKINT; // Enable most interrupts
-    if (s->disabled)
-        s->rregs[R_STATUS] = STATUS_TXEMPTY | STATUS_DCD | STATUS_SYNC |
-            STATUS_CTS | STATUS_TXUNDRN;
-    else
-        s->rregs[R_STATUS] = STATUS_TXEMPTY | STATUS_TXUNDRN;
-    s->rregs[R_SPEC] = SPEC_BITS8 | SPEC_ALLSENT;
-
     s->rx = s->tx = 0;
     s->rxint = s->txint = 0;
     s->rxint_under_svc = s->txint_under_svc = 0;
@@ -289,32 +287,110 @@ static void escc_reset_chn(ESCCChannelState *s)
     clear_queue(s);
 }
 
+static void escc_soft_reset_chn(ESCCChannelState *s)
+{
+    escc_reset_chn(s);
+
+    s->wregs[W_CMD] = 0;
+    s->wregs[W_INTR] &= INTR_PAR_SPEC | INTR_WTRQ_TXRX;
+    s->wregs[W_RXCTRL] &= ~RXCTRL_RXEN;
+    /* 1 stop bit */
+    s->wregs[W_TXCTRL1] |= TXCTRL1_1STOP;
+    s->wregs[W_TXCTRL2] &= TXCTRL2_TXCRC | TXCTRL2_8BITS;
+    s->wregs[W_MINTR] &= ~MINTR_SOFTIACK;
+    s->wregs[W_MISC1] &= MISC1_ENC_MASK;
+    /* PLL disabled */
+    s->wregs[W_MISC2] &= MISC2_BRG_EN | MISC2_BRG_SRC |
+                         MISC2_PLLCMD1 | MISC2_PLLCMD2;
+    s->wregs[W_MISC2] |= MISC2_PLLCMD0;
+    /* Enable most interrupts */
+    s->wregs[W_EXTINT] = EXTINT_DCD | EXTINT_SYNCINT | EXTINT_CTSINT |
+                         EXTINT_TXUNDRN | EXTINT_BRKINT;
+
+    s->rregs[R_STATUS] &= STATUS_DCD | STATUS_SYNC | STATUS_CTS | STATUS_BRK;
+    s->rregs[R_STATUS] |= STATUS_TXEMPTY | STATUS_TXUNDRN;
+    if (s->disabled) {
+        s->rregs[R_STATUS] |= STATUS_DCD | STATUS_SYNC | STATUS_CTS;
+    }
+    s->rregs[R_SPEC] &= SPEC_ALLSENT;
+    s->rregs[R_SPEC] |= SPEC_BITS8;
+    s->rregs[R_INTR] = 0;
+    s->rregs[R_MISC] &= MISC_2CLKMISS;
+}
+
+static void escc_hard_reset_chn(ESCCChannelState *s)
+{
+    escc_soft_reset_chn(s);
+
+    /*
+     * Hard reset is almost identical to soft reset above, except that the
+     * values of WR9 (W_MINTR), WR10 (W_MISC1), WR11 (W_CLOCK) and WR14
+     * (W_MISC2) have extra bits forced to 0/1
+     */
+    s->wregs[W_MINTR] &= MINTR_VIS | MINTR_NV;
+    s->wregs[W_MINTR] |= MINTR_RST_B | MINTR_RST_A;
+    s->wregs[W_MISC1] = 0;
+    s->wregs[W_CLOCK] = CLOCK_TRXC;
+    s->wregs[W_MISC2] &= MISC2_PLLCMD1 | MISC2_PLLCMD2;
+    s->wregs[W_MISC2] |= MISC2_LCL_LOOP | MISC2_PLLCMD0;
+}
+
 static void escc_reset(DeviceState *d)
 {
     ESCCState *s = ESCC(d);
+    int i, j;
+
+    for (i = 0; i < 2; i++) {
+        ESCCChannelState *cs = &s->chn[i];
 
-    escc_reset_chn(&s->chn[0]);
-    escc_reset_chn(&s->chn[1]);
+        /*
+         * According to the ESCC datasheet "Miscellaneous Questions" section
+         * on page 384, the values of the ESCC registers are not guaranteed on
+         * power-on until an explicit hardware or software reset has been
+         * issued. For now we zero the registers so that a device reset always
+         * returns the emulated device to a fixed state.
+         */
+        for (j = 0; j < ESCC_SERIAL_REGS; j++) {
+            cs->rregs[j] = 0;
+            cs->wregs[j] = 0;
+        }
+
+        /*
+         * ...but there is an exception. The "Transmit Interrupts and Transmit
+         * Buffer Empty Bit" section on page 50 of the ESCC datasheet says of
+         * the STATUS_TXEMPTY bit in R_STATUS: "After a hardware reset
+         * (including a hardware reset by software), or a channel reset, this
+         * bit is set to 1". The Sun PROM checks this bit early on startup and
+         * gets stuck in an infinite loop if it is not set.
+         */
+        cs->rregs[R_STATUS] |= STATUS_TXEMPTY;
+
+        escc_reset_chn(cs);
+    }
 }
 
 static inline void set_rxint(ESCCChannelState *s)
 {
     s->rxint = 1;
-    /* XXX: missing daisy chainnig: escc_chn_b rx should have a lower priority
-       than chn_a rx/tx/special_condition service*/
+    /*
+     * XXX: missing daisy chaining: escc_chn_b rx should have a lower priority
+     * than chn_a rx/tx/special_condition service
+     */
     s->rxint_under_svc = 1;
     if (s->chn == escc_chn_a) {
         s->rregs[R_INTR] |= INTR_RXINTA;
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->otherchn->rregs[R_IVEC] = IVEC_HIRXINTA;
-        else
+        } else {
             s->otherchn->rregs[R_IVEC] = IVEC_LORXINTA;
+        }
     } else {
         s->otherchn->rregs[R_INTR] |= INTR_RXINTB;
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->rregs[R_IVEC] = IVEC_HIRXINTB;
-        else
+        } else {
             s->rregs[R_IVEC] = IVEC_LORXINTB;
+        }
     }
     escc_update_irq(s);
 }
@@ -328,17 +404,18 @@ static inline void set_txint(ESCCChannelState *s)
             if (s->wregs[W_INTR] & INTR_TXINT) {
                 s->rregs[R_INTR] |= INTR_TXINTA;
             }
-            if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+            if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
                 s->otherchn->rregs[R_IVEC] = IVEC_HITXINTA;
-            else
+            } else {
                 s->otherchn->rregs[R_IVEC] = IVEC_LOTXINTA;
+            }
         } else {
             s->rregs[R_IVEC] = IVEC_TXINTB;
             if (s->wregs[W_INTR] & INTR_TXINT) {
                 s->otherchn->rregs[R_INTR] |= INTR_TXINTB;
             }
         }
-    escc_update_irq(s);
+        escc_update_irq(s);
     }
 }
 
@@ -347,20 +424,23 @@ static inline void clr_rxint(ESCCChannelState *s)
     s->rxint = 0;
     s->rxint_under_svc = 0;
     if (s->chn == escc_chn_a) {
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->otherchn->rregs[R_IVEC] = IVEC_HINOINT;
-        else
+        } else {
             s->otherchn->rregs[R_IVEC] = IVEC_LONOINT;
+        }
         s->rregs[R_INTR] &= ~INTR_RXINTA;
     } else {
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->rregs[R_IVEC] = IVEC_HINOINT;
-        else
+        } else {
             s->rregs[R_IVEC] = IVEC_LONOINT;
+        }
         s->otherchn->rregs[R_INTR] &= ~INTR_RXINTB;
     }
-    if (s->txint)
+    if (s->txint) {
         set_txint(s);
+    }
     escc_update_irq(s);
 }
 
@@ -369,21 +449,24 @@ static inline void clr_txint(ESCCChannelState *s)
     s->txint = 0;
     s->txint_under_svc = 0;
     if (s->chn == escc_chn_a) {
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->otherchn->rregs[R_IVEC] = IVEC_HINOINT;
-        else
+        } else {
             s->otherchn->rregs[R_IVEC] = IVEC_LONOINT;
+        }
         s->rregs[R_INTR] &= ~INTR_TXINTA;
     } else {
         s->otherchn->rregs[R_INTR] &= ~INTR_TXINTB;
-        if (s->wregs[W_MINTR] & MINTR_STATUSHI)
+        if (s->wregs[W_MINTR] & MINTR_STATUSHI) {
             s->rregs[R_IVEC] = IVEC_HINOINT;
-        else
+        } else {
             s->rregs[R_IVEC] = IVEC_LONOINT;
+        }
         s->otherchn->rregs[R_INTR] &= ~INTR_TXINTB;
     }
-    if (s->rxint)
+    if (s->rxint) {
         set_rxint(s);
+    }
     escc_update_irq(s);
 }
 
@@ -392,21 +475,24 @@ static void escc_update_parameters(ESCCChannelState *s)
     int speed, parity, data_bits, stop_bits;
     QEMUSerialSetParams ssp;
 
-    if (!qemu_chr_fe_backend_connected(&s->chr) || s->type != escc_serial)
+    if (!qemu_chr_fe_backend_connected(&s->chr) || s->type != escc_serial) {
         return;
+    }
 
     if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREN) {
-        if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREV)
+        if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREV) {
             parity = 'E';
-        else
+        } else {
             parity = 'O';
+        }
     } else {
         parity = 'N';
     }
-    if ((s->wregs[W_TXCTRL1] & TXCTRL1_STPMSK) == TXCTRL1_2STOP)
+    if ((s->wregs[W_TXCTRL1] & TXCTRL1_STPMSK) == TXCTRL1_2STOP) {
         stop_bits = 2;
-    else
+    } else {
         stop_bits = 1;
+    }
     switch (s->wregs[W_TXCTRL2] & TXCTRL2_BITMSK) {
     case TXCTRL2_5BITS:
         data_bits = 5;
@@ -487,13 +573,33 @@ static void escc_mem_write(void *opaque, hwaddr addr,
                 break;
             }
             break;
-        case W_INTR ... W_RXCTRL:
+        case W_RXCTRL:
+            s->wregs[s->reg] = val;
+            if (val & RXCTRL_HUNT) {
+                s->rregs[R_STATUS] |= STATUS_SYNC;
+            }
+            break;
+        case W_INTR ... W_IVEC:
         case W_SYNC1 ... W_TXBUF:
         case W_MISC1 ... W_CLOCK:
         case W_MISC2 ... W_EXTINT:
             s->wregs[s->reg] = val;
             break;
         case W_TXCTRL1:
+            s->wregs[s->reg] = val;
+            /*
+             * The ESCC datasheet states that SPEC_ALLSENT is always set in
+             * sync mode, and set in async mode when all characters have
+             * cleared the transmitter. Since writes to SERIAL_DATA use the
+             * blocking qemu_chr_fe_write_all() function to write each
+             * character, the guest can never see the state when async data
+             * is in the process of being transmitted so we can set this bit
+             * unconditionally regardless of the state of the W_TXCTRL1 mode
+             * bits.
+             */
+            s->rregs[R_SPEC] |= SPEC_ALLSENT;
+            escc_update_parameters(s);
+            break;
         case W_TXCTRL2:
             s->wregs[s->reg] = val;
             escc_update_parameters(s);
@@ -510,23 +616,28 @@ static void escc_mem_write(void *opaque, hwaddr addr,
             default:
                 break;
             case MINTR_RST_B:
-                escc_reset_chn(&serial->chn[0]);
+                trace_escc_soft_reset_chn(CHN_C(&serial->chn[0]));
+                escc_soft_reset_chn(&serial->chn[0]);
                 return;
             case MINTR_RST_A:
-                escc_reset_chn(&serial->chn[1]);
+                trace_escc_soft_reset_chn(CHN_C(&serial->chn[1]));
+                escc_soft_reset_chn(&serial->chn[1]);
                 return;
             case MINTR_RST_ALL:
-                escc_reset(DEVICE(serial));
+                trace_escc_hard_reset();
+                escc_hard_reset_chn(&serial->chn[0]);
+                escc_hard_reset_chn(&serial->chn[1]);
                 return;
             }
             break;
         default:
             break;
         }
-        if (s->reg == 0)
+        if (s->reg == 0) {
             s->reg = newreg;
-        else
+        } else {
             s->reg = 0;
+        }
         break;
     case SERIAL_DATA:
         trace_escc_mem_writeb_data(CHN_C(s), val);
@@ -538,17 +649,19 @@ static void escc_mem_write(void *opaque, hwaddr addr,
         s->txint = 0;
         escc_update_irq(s);
         s->tx = val;
-        if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { // tx enabled
+        if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { /* tx enabled */
             if (qemu_chr_fe_backend_connected(&s->chr)) {
-                /* XXX this blocks entire thread. Rewrite to use
-                 * qemu_chr_fe_write and background I/O callbacks */
+                /*
+                 * XXX this blocks entire thread. Rewrite to use
+                 * qemu_chr_fe_write and background I/O callbacks
+                 */
                 qemu_chr_fe_write_all(&s->chr, &s->tx, 1);
             } else if (s->type == escc_kbd && !s->disabled) {
                 handle_kbd_command(s, val);
             }
         }
-        s->rregs[R_STATUS] |= STATUS_TXEMPTY; // Tx buffer empty
-        s->rregs[R_SPEC] |= SPEC_ALLSENT; // All sent
+        s->rregs[R_STATUS] |= STATUS_TXEMPTY; /* Tx buffer empty */
+        s->rregs[R_SPEC] |= SPEC_ALLSENT; /* All sent */
         set_txint(s);
         break;
     default:
@@ -606,12 +719,13 @@ static int serial_can_receive(void *opaque)
     ESCCChannelState *s = opaque;
     int ret;
 
-    if (((s->wregs[W_RXCTRL] & RXCTRL_RXEN) == 0) // Rx not enabled
-        || ((s->rregs[R_STATUS] & STATUS_RXAV) == STATUS_RXAV))
-        // char already available
+    if (((s->wregs[W_RXCTRL] & RXCTRL_RXEN) == 0) /* Rx not enabled */
+        || ((s->rregs[R_STATUS] & STATUS_RXAV) == STATUS_RXAV)) {
+        /* char already available */
         ret = 0;
-    else
+    } else {
         ret = 1;
+    }
     return ret;
 }
 
@@ -638,12 +752,13 @@ static void serial_receive1(void *opaque, const uint8_t *buf, int size)
 static void serial_event(void *opaque, QEMUChrEvent event)
 {
     ESCCChannelState *s = opaque;
-    if (event == CHR_EVENT_BREAK)
+    if (event == CHR_EVENT_BREAK) {
         serial_receive_break(s);
+    }
 }
 
 static const VMStateDescription vmstate_escc_chn = {
-    .name ="escc_chn",
+    .name = "escc_chn",
     .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
@@ -662,7 +777,7 @@ static const VMStateDescription vmstate_escc_chn = {
 };
 
 static const VMStateDescription vmstate_escc = {
-    .name ="escc",
+    .name = "escc",
     .version_id = 2,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
@@ -734,21 +849,21 @@ static QemuInputHandler sunkbd_handler = {
 static void handle_kbd_command(ESCCChannelState *s, int val)
 {
     trace_escc_kbd_command(val);
-    if (s->led_mode) { // Ignore led byte
+    if (s->led_mode) { /* Ignore led byte */
         s->led_mode = 0;
         return;
     }
     switch (val) {
-    case 1: // Reset, return type code
+    case 1: /* Reset, return type code */
         clear_queue(s);
         put_queue(s, 0xff);
-        put_queue(s, 4); // Type 4
+        put_queue(s, 4); /* Type 4 */
         put_queue(s, 0x7f);
         break;
-    case 0xe: // Set leds
+    case 0xe: /* Set leds */
         s->led_mode = 1;
         break;
-    case 7: // Query layout
+    case 7: /* Query layout */
     case 0xf:
         clear_queue(s);
         put_queue(s, 0xfe);
@@ -768,34 +883,39 @@ static void sunmouse_event(void *opaque,
     trace_escc_sunmouse_event(dx, dy, buttons_state);
     ch = 0x80 | 0x7; /* protocol start byte, no buttons pressed */
 
-    if (buttons_state & MOUSE_EVENT_LBUTTON)
+    if (buttons_state & MOUSE_EVENT_LBUTTON) {
         ch ^= 0x4;
-    if (buttons_state & MOUSE_EVENT_MBUTTON)
+    }
+    if (buttons_state & MOUSE_EVENT_MBUTTON) {
         ch ^= 0x2;
-    if (buttons_state & MOUSE_EVENT_RBUTTON)
+    }
+    if (buttons_state & MOUSE_EVENT_RBUTTON) {
         ch ^= 0x1;
+    }
 
     put_queue(s, ch);
 
     ch = dx;
 
-    if (ch > 127)
+    if (ch > 127) {
         ch = 127;
-    else if (ch < -127)
+    } else if (ch < -127) {
         ch = -127;
+    }
 
     put_queue(s, ch & 0xff);
 
     ch = -dy;
 
-    if (ch > 127)
+    if (ch > 127) {
         ch = 127;
-    else if (ch < -127)
+    } else if (ch < -127) {
         ch = -127;
+    }
 
     put_queue(s, ch & 0xff);
 
-    // MSC protocol specify two extra motion bytes
+    /* MSC protocol specifies two extra motion bytes */
 
     put_queue(s, 0);
     put_queue(s, 0);
diff --git a/hw/char/goldfish_tty.c b/hw/char/goldfish_tty.c
index 8365a187614..20b77885c18 100644
--- a/hw/char/goldfish_tty.c
+++ b/hw/char/goldfish_tty.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Goldfish TTY
  *
diff --git a/hw/char/ibex_uart.c b/hw/char/ibex_uart.c
index 73b8f2e45be..e58181fcf42 100644
--- a/hw/char/ibex_uart.c
+++ b/hw/char/ibex_uart.c
@@ -35,6 +35,44 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 
+REG32(INTR_STATE, 0x00)
+    FIELD(INTR_STATE, TX_WATERMARK, 0, 1)
+    FIELD(INTR_STATE, RX_WATERMARK, 1, 1)
+    FIELD(INTR_STATE, TX_EMPTY, 2, 1)
+    FIELD(INTR_STATE, RX_OVERFLOW, 3, 1)
+REG32(INTR_ENABLE, 0x04)
+REG32(INTR_TEST, 0x08)
+REG32(ALERT_TEST, 0x0C)
+REG32(CTRL, 0x10)
+    FIELD(CTRL, TX_ENABLE, 0, 1)
+    FIELD(CTRL, RX_ENABLE, 1, 1)
+    FIELD(CTRL, NF, 2, 1)
+    FIELD(CTRL, SLPBK, 4, 1)
+    FIELD(CTRL, LLPBK, 5, 1)
+    FIELD(CTRL, PARITY_EN, 6, 1)
+    FIELD(CTRL, PARITY_ODD, 7, 1)
+    FIELD(CTRL, RXBLVL, 8, 2)
+    FIELD(CTRL, NCO, 16, 16)
+REG32(STATUS, 0x14)
+    FIELD(STATUS, TXFULL, 0, 1)
+    FIELD(STATUS, RXFULL, 1, 1)
+    FIELD(STATUS, TXEMPTY, 2, 1)
+    FIELD(STATUS, RXIDLE, 4, 1)
+    FIELD(STATUS, RXEMPTY, 5, 1)
+REG32(RDATA, 0x18)
+REG32(WDATA, 0x1C)
+REG32(FIFO_CTRL, 0x20)
+    FIELD(FIFO_CTRL, RXRST, 0, 1)
+    FIELD(FIFO_CTRL, TXRST, 1, 1)
+    FIELD(FIFO_CTRL, RXILVL, 2, 3)
+    FIELD(FIFO_CTRL, TXILVL, 5, 2)
+REG32(FIFO_STATUS, 0x24)
+    FIELD(FIFO_STATUS, TXLVL, 0, 5)
+    FIELD(FIFO_STATUS, RXLVL, 16, 5)
+REG32(OVRD, 0x28)
+REG32(VAL, 0x2C)
+REG32(TIMEOUT_CTRL, 0x30)
+
 static void ibex_uart_update_irqs(IbexUartState *s)
 {
     if (s->uart_intr_state & s->uart_intr_enable & R_INTR_STATE_TX_WATERMARK_MASK) {
@@ -97,7 +135,7 @@ static void ibex_uart_receive(void *opaque, const uint8_t *buf, int size)
     ibex_uart_update_irqs(s);
 }
 
-static gboolean ibex_uart_xmit(GIOChannel *chan, GIOCondition cond,
+static gboolean ibex_uart_xmit(void *do_not_use, GIOCondition cond,
                                void *opaque)
 {
     IbexUartState *s = opaque;
@@ -512,6 +550,7 @@ static void ibex_uart_class_init(ObjectClass *klass, void *data)
     dc->realize = ibex_uart_realize;
     dc->vmsd = &vmstate_ibex_uart;
     device_class_set_props(dc, ibex_uart_properties);
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
 }
 
 static const TypeInfo ibex_uart_info = {
diff --git a/hw/char/lm32_juart.c b/hw/char/lm32_juart.c
deleted file mode 100644
index ce302796506..00000000000
--- a/hw/char/lm32_juart.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- *  LatticeMico32 JTAG UART model.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "qemu/module.h"
-#include "trace.h"
-#include "chardev/char-fe.h"
-
-#include "hw/char/lm32_juart.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "qom/object.h"
-
-enum {
-    LM32_JUART_MIN_SAVE_VERSION = 0,
-    LM32_JUART_CURRENT_SAVE_VERSION = 0,
-    LM32_JUART_MAX_SAVE_VERSION = 0,
-};
-
-enum {
-    JTX_FULL = (1<<8),
-};
-
-enum {
-    JRX_FULL = (1<<8),
-};
-
-OBJECT_DECLARE_SIMPLE_TYPE(LM32JuartState, LM32_JUART)
-
-struct LM32JuartState {
-    SysBusDevice parent_obj;
-
-    CharBackend chr;
-
-    uint32_t jtx;
-    uint32_t jrx;
-};
-
-uint32_t lm32_juart_get_jtx(DeviceState *d)
-{
-    LM32JuartState *s = LM32_JUART(d);
-
-    trace_lm32_juart_get_jtx(s->jtx);
-    return s->jtx;
-}
-
-uint32_t lm32_juart_get_jrx(DeviceState *d)
-{
-    LM32JuartState *s = LM32_JUART(d);
-
-    trace_lm32_juart_get_jrx(s->jrx);
-    return s->jrx;
-}
-
-void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx)
-{
-    LM32JuartState *s = LM32_JUART(d);
-    unsigned char ch = jtx & 0xff;
-
-    trace_lm32_juart_set_jtx(s->jtx);
-
-    s->jtx = jtx;
-    /* XXX this blocks entire thread. Rewrite to use
-     * qemu_chr_fe_write and background I/O callbacks */
-    qemu_chr_fe_write_all(&s->chr, &ch, 1);
-}
-
-void lm32_juart_set_jrx(DeviceState *d, uint32_t jtx)
-{
-    LM32JuartState *s = LM32_JUART(d);
-
-    trace_lm32_juart_set_jrx(s->jrx);
-    s->jrx &= ~JRX_FULL;
-}
-
-static void juart_rx(void *opaque, const uint8_t *buf, int size)
-{
-    LM32JuartState *s = opaque;
-
-    s->jrx = *buf | JRX_FULL;
-}
-
-static int juart_can_rx(void *opaque)
-{
-    LM32JuartState *s = opaque;
-
-    return !(s->jrx & JRX_FULL);
-}
-
-static void juart_event(void *opaque, QEMUChrEvent event)
-{
-}
-
-static void juart_reset(DeviceState *d)
-{
-    LM32JuartState *s = LM32_JUART(d);
-
-    s->jtx = 0;
-    s->jrx = 0;
-}
-
-static void lm32_juart_realize(DeviceState *dev, Error **errp)
-{
-    LM32JuartState *s = LM32_JUART(dev);
-
-    qemu_chr_fe_set_handlers(&s->chr, juart_can_rx, juart_rx,
-                             juart_event, NULL, s, NULL, true);
-}
-
-static const VMStateDescription vmstate_lm32_juart = {
-    .name = "lm32-juart",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(jtx, LM32JuartState),
-        VMSTATE_UINT32(jrx, LM32JuartState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property lm32_juart_properties[] = {
-    DEFINE_PROP_CHR("chardev", LM32JuartState, chr),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void lm32_juart_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->reset = juart_reset;
-    dc->vmsd = &vmstate_lm32_juart;
-    device_class_set_props(dc, lm32_juart_properties);
-    dc->realize = lm32_juart_realize;
-}
-
-static const TypeInfo lm32_juart_info = {
-    .name          = TYPE_LM32_JUART,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(LM32JuartState),
-    .class_init    = lm32_juart_class_init,
-};
-
-static void lm32_juart_register_types(void)
-{
-    type_register_static(&lm32_juart_info);
-}
-
-type_init(lm32_juart_register_types)
diff --git a/hw/char/lm32_uart.c b/hw/char/lm32_uart.c
deleted file mode 100644
index d8e03313111..00000000000
--- a/hw/char/lm32_uart.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- *  QEMU model of the LatticeMico32 UART block.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://www.latticesemi.com/documents/mico32uart.pdf
- */
-
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "chardev/char-fe.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    R_RXTX = 0,
-    R_IER,
-    R_IIR,
-    R_LCR,
-    R_MCR,
-    R_LSR,
-    R_MSR,
-    R_DIV,
-    R_MAX
-};
-
-enum {
-    IER_RBRI = (1<<0),
-    IER_THRI = (1<<1),
-    IER_RLSI = (1<<2),
-    IER_MSI  = (1<<3),
-};
-
-enum {
-    IIR_STAT = (1<<0),
-    IIR_ID0  = (1<<1),
-    IIR_ID1  = (1<<2),
-};
-
-enum {
-    LCR_WLS0 = (1<<0),
-    LCR_WLS1 = (1<<1),
-    LCR_STB  = (1<<2),
-    LCR_PEN  = (1<<3),
-    LCR_EPS  = (1<<4),
-    LCR_SP   = (1<<5),
-    LCR_SB   = (1<<6),
-};
-
-enum {
-    MCR_DTR  = (1<<0),
-    MCR_RTS  = (1<<1),
-};
-
-enum {
-    LSR_DR   = (1<<0),
-    LSR_OE   = (1<<1),
-    LSR_PE   = (1<<2),
-    LSR_FE   = (1<<3),
-    LSR_BI   = (1<<4),
-    LSR_THRE = (1<<5),
-    LSR_TEMT = (1<<6),
-};
-
-enum {
-    MSR_DCTS = (1<<0),
-    MSR_DDSR = (1<<1),
-    MSR_TERI = (1<<2),
-    MSR_DDCD = (1<<3),
-    MSR_CTS  = (1<<4),
-    MSR_DSR  = (1<<5),
-    MSR_RI   = (1<<6),
-    MSR_DCD  = (1<<7),
-};
-
-#define TYPE_LM32_UART "lm32-uart"
-OBJECT_DECLARE_SIMPLE_TYPE(LM32UartState, LM32_UART)
-
-struct LM32UartState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    CharBackend chr;
-    qemu_irq irq;
-
-    uint32_t regs[R_MAX];
-};
-
-static void uart_update_irq(LM32UartState *s)
-{
-    unsigned int irq;
-
-    if ((s->regs[R_LSR] & (LSR_OE | LSR_PE | LSR_FE | LSR_BI))
-            && (s->regs[R_IER] & IER_RLSI)) {
-        irq = 1;
-        s->regs[R_IIR] = IIR_ID1 | IIR_ID0;
-    } else if ((s->regs[R_LSR] & LSR_DR) && (s->regs[R_IER] & IER_RBRI)) {
-        irq = 1;
-        s->regs[R_IIR] = IIR_ID1;
-    } else if ((s->regs[R_LSR] & LSR_THRE) && (s->regs[R_IER] & IER_THRI)) {
-        irq = 1;
-        s->regs[R_IIR] = IIR_ID0;
-    } else if ((s->regs[R_MSR] & 0x0f) && (s->regs[R_IER] & IER_MSI)) {
-        irq = 1;
-        s->regs[R_IIR] = 0;
-    } else {
-        irq = 0;
-        s->regs[R_IIR] = IIR_STAT;
-    }
-
-    trace_lm32_uart_irq_state(irq);
-    qemu_set_irq(s->irq, irq);
-}
-
-static uint64_t uart_read(void *opaque, hwaddr addr,
-                          unsigned size)
-{
-    LM32UartState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_RXTX:
-        r = s->regs[R_RXTX];
-        s->regs[R_LSR] &= ~LSR_DR;
-        uart_update_irq(s);
-        qemu_chr_fe_accept_input(&s->chr);
-        break;
-    case R_IIR:
-    case R_LSR:
-    case R_MSR:
-        r = s->regs[addr];
-        break;
-    case R_IER:
-    case R_LCR:
-    case R_MCR:
-    case R_DIV:
-        error_report("lm32_uart: read access to write only register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    default:
-        error_report("lm32_uart: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_lm32_uart_memory_read(addr << 2, r);
-    return r;
-}
-
-static void uart_write(void *opaque, hwaddr addr,
-                       uint64_t value, unsigned size)
-{
-    LM32UartState *s = opaque;
-    unsigned char ch = value;
-
-    trace_lm32_uart_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_RXTX:
-        /* XXX this blocks entire thread. Rewrite to use
-         * qemu_chr_fe_write and background I/O callbacks */
-        qemu_chr_fe_write_all(&s->chr, &ch, 1);
-        break;
-    case R_IER:
-    case R_LCR:
-    case R_MCR:
-    case R_DIV:
-        s->regs[addr] = value;
-        break;
-    case R_IIR:
-    case R_LSR:
-    case R_MSR:
-        error_report("lm32_uart: write access to read only register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    default:
-        error_report("lm32_uart: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-    uart_update_irq(s);
-}
-
-static const MemoryRegionOps uart_ops = {
-    .read = uart_read,
-    .write = uart_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static void uart_rx(void *opaque, const uint8_t *buf, int size)
-{
-    LM32UartState *s = opaque;
-
-    if (s->regs[R_LSR] & LSR_DR) {
-        s->regs[R_LSR] |= LSR_OE;
-    }
-
-    s->regs[R_LSR] |= LSR_DR;
-    s->regs[R_RXTX] = *buf;
-
-    uart_update_irq(s);
-}
-
-static int uart_can_rx(void *opaque)
-{
-    LM32UartState *s = opaque;
-
-    return !(s->regs[R_LSR] & LSR_DR);
-}
-
-static void uart_event(void *opaque, QEMUChrEvent event)
-{
-}
-
-static void uart_reset(DeviceState *d)
-{
-    LM32UartState *s = LM32_UART(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    /* defaults */
-    s->regs[R_LSR] = LSR_THRE | LSR_TEMT;
-}
-
-static void lm32_uart_init(Object *obj)
-{
-    LM32UartState *s = LM32_UART(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(dev, &s->irq);
-
-    memory_region_init_io(&s->iomem, obj, &uart_ops, s,
-                          "uart", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->iomem);
-}
-
-static void lm32_uart_realize(DeviceState *dev, Error **errp)
-{
-    LM32UartState *s = LM32_UART(dev);
-
-    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
-                             uart_event, NULL, s, NULL, true);
-}
-
-static const VMStateDescription vmstate_lm32_uart = {
-    .name = "lm32-uart",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, LM32UartState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property lm32_uart_properties[] = {
-    DEFINE_PROP_CHR("chardev", LM32UartState, chr),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void lm32_uart_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->reset = uart_reset;
-    dc->vmsd = &vmstate_lm32_uart;
-    device_class_set_props(dc, lm32_uart_properties);
-    dc->realize = lm32_uart_realize;
-}
-
-static const TypeInfo lm32_uart_info = {
-    .name          = TYPE_LM32_UART,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(LM32UartState),
-    .instance_init = lm32_uart_init,
-    .class_init    = lm32_uart_class_init,
-};
-
-static void lm32_uart_register_types(void)
-{
-    type_register_static(&lm32_uart_info);
-}
-
-type_init(lm32_uart_register_types)
diff --git a/hw/char/mchp_pfsoc_mmuart.c b/hw/char/mchp_pfsoc_mmuart.c
index 8a002b0a19f..22f3e78eb9e 100644
--- a/hw/char/mchp_pfsoc_mmuart.c
+++ b/hw/char/mchp_pfsoc_mmuart.c
@@ -22,21 +22,25 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "chardev/char.h"
-#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
 #include "hw/char/mchp_pfsoc_mmuart.h"
+#include "hw/qdev-properties.h"
+
+#define REGS_OFFSET 0x20
 
 static uint64_t mchp_pfsoc_mmuart_read(void *opaque, hwaddr addr, unsigned size)
 {
     MchpPfSoCMMUartState *s = opaque;
 
-    if (addr >= MCHP_PFSOC_MMUART_REG_SIZE) {
+    addr >>= 2;
+    if (addr >= MCHP_PFSOC_MMUART_REG_COUNT) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: read: addr=0x%" HWADDR_PRIx "\n",
-                      __func__, addr);
+                      __func__, addr << 2);
         return 0;
     }
 
-    return s->reg[addr / sizeof(uint32_t)];
+    return s->reg[addr];
 }
 
 static void mchp_pfsoc_mmuart_write(void *opaque, hwaddr addr,
@@ -45,13 +49,14 @@ static void mchp_pfsoc_mmuart_write(void *opaque, hwaddr addr,
     MchpPfSoCMMUartState *s = opaque;
     uint32_t val32 = (uint32_t)value;
 
-    if (addr >= MCHP_PFSOC_MMUART_REG_SIZE) {
+    addr >>= 2;
+    if (addr >= MCHP_PFSOC_MMUART_REG_COUNT) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write: addr=0x%" HWADDR_PRIx
-                      " v=0x%x\n", __func__, addr, val32);
+                      " v=0x%x\n", __func__, addr << 2, val32);
         return;
     }
 
-    s->reg[addr / sizeof(uint32_t)] = val32;
+    s->reg[addr] = val32;
 }
 
 static const MemoryRegionOps mchp_pfsoc_mmuart_ops = {
@@ -64,23 +69,95 @@ static const MemoryRegionOps mchp_pfsoc_mmuart_ops = {
     },
 };
 
-MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem,
-    hwaddr base, qemu_irq irq, Chardev *chr)
+static void mchp_pfsoc_mmuart_reset(DeviceState *dev)
+{
+    MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(dev);
+
+    memset(s->reg, 0, sizeof(s->reg));
+    device_cold_reset(DEVICE(&s->serial_mm));
+}
+
+static void mchp_pfsoc_mmuart_init(Object *obj)
+{
+    MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(obj);
+
+    object_initialize_child(obj, "serial-mm", &s->serial_mm, TYPE_SERIAL_MM);
+    object_property_add_alias(obj, "chardev", OBJECT(&s->serial_mm), "chardev");
+}
+
+static void mchp_pfsoc_mmuart_realize(DeviceState *dev, Error **errp)
 {
-    MchpPfSoCMMUartState *s;
+    MchpPfSoCMMUartState *s = MCHP_PFSOC_UART(dev);
+
+    qdev_prop_set_uint8(DEVICE(&s->serial_mm), "regshift", 2);
+    qdev_prop_set_uint32(DEVICE(&s->serial_mm), "baudbase", 399193);
+    qdev_prop_set_uint8(DEVICE(&s->serial_mm), "endianness",
+                        DEVICE_LITTLE_ENDIAN);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->serial_mm), errp)) {
+        return;
+    }
+
+    sysbus_pass_irq(SYS_BUS_DEVICE(dev), SYS_BUS_DEVICE(&s->serial_mm));
 
-    s = g_new0(MchpPfSoCMMUartState, 1);
+    memory_region_init(&s->container, OBJECT(s), "mchp.pfsoc.mmuart", 0x1000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
 
-    memory_region_init_io(&s->iomem, NULL, &mchp_pfsoc_mmuart_ops, s,
-                          "mchp.pfsoc.mmuart", 0x1000);
+    memory_region_add_subregion(&s->container, 0,
+                    sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->serial_mm), 0));
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &mchp_pfsoc_mmuart_ops, s,
+                          "mchp.pfsoc.mmuart.regs", 0x1000 - REGS_OFFSET);
+    memory_region_add_subregion(&s->container, REGS_OFFSET, &s->iomem);
+}
 
-    s->base = base;
-    s->irq = irq;
+static const VMStateDescription mchp_pfsoc_mmuart_vmstate = {
+    .name = "mchp.pfsoc.uart",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(reg, MchpPfSoCMMUartState,
+                             MCHP_PFSOC_MMUART_REG_COUNT),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void mchp_pfsoc_mmuart_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = mchp_pfsoc_mmuart_realize;
+    dc->reset = mchp_pfsoc_mmuart_reset;
+    dc->vmsd = &mchp_pfsoc_mmuart_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+}
+
+static const TypeInfo mchp_pfsoc_mmuart_info = {
+    .name          = TYPE_MCHP_PFSOC_UART,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(MchpPfSoCMMUartState),
+    .instance_init = mchp_pfsoc_mmuart_init,
+    .class_init    = mchp_pfsoc_mmuart_class_init,
+};
+
+static void mchp_pfsoc_mmuart_register_types(void)
+{
+    type_register_static(&mchp_pfsoc_mmuart_info);
+}
+
+type_init(mchp_pfsoc_mmuart_register_types)
+
+MchpPfSoCMMUartState *mchp_pfsoc_mmuart_create(MemoryRegion *sysmem,
+                                               hwaddr base,
+                                               qemu_irq irq, Chardev *chr)
+{
+    DeviceState *dev = qdev_new(TYPE_MCHP_PFSOC_UART);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
-    s->serial = serial_mm_init(sysmem, base, 2, irq, 399193, chr,
-                               DEVICE_LITTLE_ENDIAN);
+    qdev_prop_set_chr(dev, "chardev", chr);
+    sysbus_realize(sbd, &error_fatal);
 
-    memory_region_add_subregion(sysmem, base + 0x20, &s->iomem);
+    memory_region_add_subregion(sysmem, base, sysbus_mmio_get_region(sbd, 0));
+    sysbus_connect_irq(sbd, 0, irq);
 
-    return s;
+    return MCHP_PFSOC_UART(dev);
 }
diff --git a/hw/char/meson.build b/hw/char/meson.build
index da5bb8b762e..7b594f51b86 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -8,9 +8,6 @@ softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_serial.c'))
 softmmu_ss.add(when: 'CONFIG_IPACK', if_true: files('ipoctal232.c'))
 softmmu_ss.add(when: 'CONFIG_ISA_BUS', if_true: files('parallel-isa.c'))
 softmmu_ss.add(when: 'CONFIG_ISA_DEBUG', if_true: files('debugcon.c'))
-softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_juart.c'))
-softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_uart.c'))
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-uart.c'))
 softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_uart.c'))
 softmmu_ss.add(when: 'CONFIG_PARALLEL', if_true: files('parallel.c'))
 softmmu_ss.add(when: 'CONFIG_PL011', if_true: files('pl011.c'))
@@ -19,6 +16,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL', if_true: files('serial.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_ISA', if_true: files('serial-isa.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c'))
+softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
 softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c'))
 softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
diff --git a/hw/char/milkymist-uart.c b/hw/char/milkymist-uart.c
deleted file mode 100644
index cb1b3470ad5..00000000000
--- a/hw/char/milkymist-uart.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- *  QEMU model of the Milkymist UART block.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/uart.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "chardev/char-fe.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    R_RXTX = 0,
-    R_DIV,
-    R_STAT,
-    R_CTRL,
-    R_DBG,
-    R_MAX
-};
-
-enum {
-    STAT_THRE   = (1<<0),
-    STAT_RX_EVT = (1<<1),
-    STAT_TX_EVT = (1<<2),
-};
-
-enum {
-    CTRL_RX_IRQ_EN = (1<<0),
-    CTRL_TX_IRQ_EN = (1<<1),
-    CTRL_THRU_EN   = (1<<2),
-};
-
-enum {
-    DBG_BREAK_EN = (1<<0),
-};
-
-#define TYPE_MILKYMIST_UART "milkymist-uart"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistUartState, MILKYMIST_UART)
-
-struct MilkymistUartState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-    CharBackend chr;
-    qemu_irq irq;
-
-    uint32_t regs[R_MAX];
-};
-
-static void uart_update_irq(MilkymistUartState *s)
-{
-    int rx_event = s->regs[R_STAT] & STAT_RX_EVT;
-    int tx_event = s->regs[R_STAT] & STAT_TX_EVT;
-    int rx_irq_en = s->regs[R_CTRL] & CTRL_RX_IRQ_EN;
-    int tx_irq_en = s->regs[R_CTRL] & CTRL_TX_IRQ_EN;
-
-    if ((rx_irq_en && rx_event) || (tx_irq_en && tx_event)) {
-        trace_milkymist_uart_raise_irq();
-        qemu_irq_raise(s->irq);
-    } else {
-        trace_milkymist_uart_lower_irq();
-        qemu_irq_lower(s->irq);
-    }
-}
-
-static uint64_t uart_read(void *opaque, hwaddr addr,
-                          unsigned size)
-{
-    MilkymistUartState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_RXTX:
-        r = s->regs[addr];
-        break;
-    case R_DIV:
-    case R_STAT:
-    case R_CTRL:
-    case R_DBG:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_uart: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_uart_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void uart_write(void *opaque, hwaddr addr, uint64_t value,
-                       unsigned size)
-{
-    MilkymistUartState *s = opaque;
-    unsigned char ch = value;
-
-    trace_milkymist_uart_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_RXTX:
-        qemu_chr_fe_write_all(&s->chr, &ch, 1);
-        s->regs[R_STAT] |= STAT_TX_EVT;
-        break;
-    case R_DIV:
-    case R_CTRL:
-    case R_DBG:
-        s->regs[addr] = value;
-        break;
-
-    case R_STAT:
-        /* write one to clear bits */
-        s->regs[addr] &= ~(value & (STAT_RX_EVT | STAT_TX_EVT));
-        qemu_chr_fe_accept_input(&s->chr);
-        break;
-
-    default:
-        error_report("milkymist_uart: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    uart_update_irq(s);
-}
-
-static const MemoryRegionOps uart_mmio_ops = {
-    .read = uart_read,
-    .write = uart_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void uart_rx(void *opaque, const uint8_t *buf, int size)
-{
-    MilkymistUartState *s = opaque;
-
-    assert(!(s->regs[R_STAT] & STAT_RX_EVT));
-
-    s->regs[R_STAT] |= STAT_RX_EVT;
-    s->regs[R_RXTX] = *buf;
-
-    uart_update_irq(s);
-}
-
-static int uart_can_rx(void *opaque)
-{
-    MilkymistUartState *s = opaque;
-
-    return !(s->regs[R_STAT] & STAT_RX_EVT);
-}
-
-static void uart_event(void *opaque, QEMUChrEvent event)
-{
-}
-
-static void milkymist_uart_reset(DeviceState *d)
-{
-    MilkymistUartState *s = MILKYMIST_UART(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    /* THRE is always set */
-    s->regs[R_STAT] = STAT_THRE;
-}
-
-static void milkymist_uart_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistUartState *s = MILKYMIST_UART(dev);
-
-    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
-                             uart_event, NULL, s, NULL, true);
-}
-
-static void milkymist_uart_init(Object *obj)
-{
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-    MilkymistUartState *s = MILKYMIST_UART(obj);
-
-    sysbus_init_irq(sbd, &s->irq);
-
-    memory_region_init_io(&s->regs_region, OBJECT(s), &uart_mmio_ops, s,
-                          "milkymist-uart", R_MAX * 4);
-    sysbus_init_mmio(sbd, &s->regs_region);
-}
-
-static const VMStateDescription vmstate_milkymist_uart = {
-    .name = "milkymist-uart",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistUartState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_uart_properties[] = {
-    DEFINE_PROP_CHR("chardev", MilkymistUartState, chr),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_uart_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_uart_realize;
-    dc->reset = milkymist_uart_reset;
-    dc->vmsd = &vmstate_milkymist_uart;
-    device_class_set_props(dc, milkymist_uart_properties);
-}
-
-static const TypeInfo milkymist_uart_info = {
-    .name          = TYPE_MILKYMIST_UART,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistUartState),
-    .instance_init = milkymist_uart_init,
-    .class_init    = milkymist_uart_class_init,
-};
-
-static void milkymist_uart_register_types(void)
-{
-    type_register_static(&milkymist_uart_info);
-}
-
-type_init(milkymist_uart_register_types)
diff --git a/hw/char/nrf51_uart.c b/hw/char/nrf51_uart.c
index 045ca5fa40d..3c6f982de97 100644
--- a/hw/char/nrf51_uart.c
+++ b/hw/char/nrf51_uart.c
@@ -75,7 +75,7 @@ static uint64_t uart_read(void *opaque, hwaddr addr, unsigned int size)
     return r;
 }
 
-static gboolean uart_transmit(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean uart_transmit(void *do_not_use, GIOCondition cond, void *opaque)
 {
     NRF51UARTState *s = NRF51_UART(opaque);
     int r;
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index dc85527a5f9..6e2d7f75095 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -26,6 +26,7 @@
 #include "hw/qdev-properties-system.h"
 #include "migration/vmstate.h"
 #include "chardev/char-fe.h"
+#include "chardev/char-serial.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "trace.h"
@@ -231,6 +232,11 @@ static void pl011_write(void *opaque, hwaddr offset,
             s->read_count = 0;
             s->read_pos = 0;
         }
+        if ((s->lcr ^ value) & 0x1) {
+            int break_enable = value & 0x1;
+            qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
+                              &break_enable);
+        }
         s->lcr = value;
         pl011_set_read_trigger(s);
         break;
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
index ba1af1cfc45..ddae738d563 100644
--- a/hw/char/riscv_htif.c
+++ b/hw/char/riscv_htif.c
@@ -23,7 +23,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
-#include "hw/sysbus.h"
 #include "hw/char/riscv_htif.h"
 #include "hw/char/serial.h"
 #include "chardev/char.h"
diff --git a/hw/char/serial.c b/hw/char/serial.c
index bc2e3229704..7061aacbce9 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -220,7 +220,7 @@ static void serial_update_msl(SerialState *s)
     }
 }
 
-static gboolean serial_watch_cb(GIOChannel *chan, GIOCondition cond,
+static gboolean serial_watch_cb(void *do_not_use, GIOCondition cond,
                                 void *opaque)
 {
     SerialState *s = opaque;
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 167f4d8cb90..355886ee3a1 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -26,13 +26,17 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/sysbus.h"
 #include "hw/irq.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "hw/sh4/sh.h"
 #include "chardev/char-fe.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
-
-//#define DEBUG_SERIAL
+#include "qemu/log.h"
+#include "trace.h"
 
 #define SH_SERIAL_FLAG_TEND (1 << 0)
 #define SH_SERIAL_FLAG_TDE  (1 << 1)
@@ -42,10 +46,10 @@
 
 #define SH_RX_FIFO_LENGTH (16)
 
-typedef struct {
-    MemoryRegion iomem;
-    MemoryRegion iomem_p4;
-    MemoryRegion iomem_a7;
+OBJECT_DECLARE_SIMPLE_TYPE(SHSerialState, SH_SERIAL)
+
+struct SHSerialState {
+    SysBusDevice parent;
     uint8_t smr;
     uint8_t brr;
     uint8_t scr;
@@ -59,13 +63,12 @@ typedef struct {
     uint8_t rx_tail;
     uint8_t rx_head;
 
-    int freq;
-    int feat;
+    uint8_t feat;
     int flags;
     int rtrg;
 
     CharBackend chr;
-    QEMUTimer *fifo_timeout_timer;
+    QEMUTimer fifo_timeout_timer;
     uint64_t etu; /* Elementary Time Unit (ns) */
 
     qemu_irq eri;
@@ -73,9 +76,13 @@ typedef struct {
     qemu_irq txi;
     qemu_irq tei;
     qemu_irq bri;
-} sh_serial_state;
+};
+
+typedef struct {} SHSerialStateClass;
 
-static void sh_serial_clear_fifo(sh_serial_state * s)
+OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE)
+
+static void sh_serial_clear_fifo(SHSerialState *s)
 {
     memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH);
     s->rx_cnt = 0;
@@ -86,14 +93,12 @@ static void sh_serial_clear_fifo(sh_serial_state * s)
 static void sh_serial_write(void *opaque, hwaddr offs,
                             uint64_t val, unsigned size)
 {
-    sh_serial_state *s = opaque;
+    SHSerialState *s = opaque;
+    DeviceState *d = DEVICE(s);
     unsigned char ch;
 
-#ifdef DEBUG_SERIAL
-    printf("sh_serial: write offs=0x%02x val=0x%02x\n",
-           offs, val);
-#endif
-    switch(offs) {
+    trace_sh_serial_write(d->id, size, offs, val);
+    switch (offs) {
     case 0x00: /* SMR */
         s->smr = val & ((s->feat & SH_SERIAL_FEAT_SCIF) ? 0x7b : 0xff);
         return;
@@ -103,8 +108,9 @@ static void sh_serial_write(void *opaque, hwaddr offs,
     case 0x08: /* SCR */
         /* TODO : For SH7751, SCIF mask should be 0xfb. */
         s->scr = val & ((s->feat & SH_SERIAL_FEAT_SCIF) ? 0xfa : 0xff);
-        if (!(val & (1 << 5)))
+        if (!(val & (1 << 5))) {
             s->flags |= SH_SERIAL_FLAG_TEND;
+        }
         if ((s->feat & SH_SERIAL_FEAT_SCIF) && s->txi) {
             qemu_set_irq(s->txi, val & (1 << 7));
         }
@@ -115,8 +121,10 @@ static void sh_serial_write(void *opaque, hwaddr offs,
     case 0x0c: /* FTDR / TDR */
         if (qemu_chr_fe_backend_connected(&s->chr)) {
             ch = val;
-            /* XXX this blocks entire thread. Rewrite to use
-             * qemu_chr_fe_write and background I/O callbacks */
+            /*
+             * XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks
+             */
             qemu_chr_fe_write_all(&s->chr, &ch, 1);
         }
         s->dr = val;
@@ -129,18 +137,23 @@ static void sh_serial_write(void *opaque, hwaddr offs,
 #endif
     }
     if (s->feat & SH_SERIAL_FEAT_SCIF) {
-        switch(offs) {
+        switch (offs) {
         case 0x10: /* FSR */
-            if (!(val & (1 << 6)))
+            if (!(val & (1 << 6))) {
                 s->flags &= ~SH_SERIAL_FLAG_TEND;
-            if (!(val & (1 << 5)))
+            }
+            if (!(val & (1 << 5))) {
                 s->flags &= ~SH_SERIAL_FLAG_TDE;
-            if (!(val & (1 << 4)))
+            }
+            if (!(val & (1 << 4))) {
                 s->flags &= ~SH_SERIAL_FLAG_BRK;
-            if (!(val & (1 << 1)))
+            }
+            if (!(val & (1 << 1))) {
                 s->flags &= ~SH_SERIAL_FLAG_RDF;
-            if (!(val & (1 << 0)))
+            }
+            if (!(val & (1 << 0))) {
                 s->flags &= ~SH_SERIAL_FLAG_DR;
+            }
 
             if (!(val & (1 << 1)) || !(val & (1 << 0))) {
                 if (s->rxi) {
@@ -176,9 +189,8 @@ static void sh_serial_write(void *opaque, hwaddr offs,
         case 0x24: /* LSR */
             return;
         }
-    }
-    else {
-        switch(offs) {
+    } else {
+        switch (offs) {
 #if 0
         case 0x0c:
             ret = s->dr;
@@ -192,20 +204,20 @@ static void sh_serial_write(void *opaque, hwaddr offs,
             return;
         }
     }
-
-    fprintf(stderr, "sh_serial: unsupported write to 0x%02"
-            HWADDR_PRIx "\n", offs);
-    abort();
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "%s: unsupported write to 0x%02" HWADDR_PRIx "\n",
+                  __func__, offs);
 }
 
 static uint64_t sh_serial_read(void *opaque, hwaddr offs,
                                unsigned size)
 {
-    sh_serial_state *s = opaque;
-    uint32_t ret = ~0;
+    SHSerialState *s = opaque;
+    DeviceState *d = DEVICE(s);
+    uint32_t ret = UINT32_MAX;
 
 #if 0
-    switch(offs) {
+    switch (offs) {
     case 0x00:
         ret = s->smr;
         break;
@@ -221,7 +233,7 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs,
     }
 #endif
     if (s->feat & SH_SERIAL_FEAT_SCIF) {
-        switch(offs) {
+        switch (offs) {
         case 0x00: /* SMR */
             ret = s->smr;
             break;
@@ -230,29 +242,37 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs,
             break;
         case 0x10: /* FSR */
             ret = 0;
-            if (s->flags & SH_SERIAL_FLAG_TEND)
+            if (s->flags & SH_SERIAL_FLAG_TEND) {
                 ret |= (1 << 6);
-            if (s->flags & SH_SERIAL_FLAG_TDE)
+            }
+            if (s->flags & SH_SERIAL_FLAG_TDE) {
                 ret |= (1 << 5);
-            if (s->flags & SH_SERIAL_FLAG_BRK)
+            }
+            if (s->flags & SH_SERIAL_FLAG_BRK) {
                 ret |= (1 << 4);
-            if (s->flags & SH_SERIAL_FLAG_RDF)
+            }
+            if (s->flags & SH_SERIAL_FLAG_RDF) {
                 ret |= (1 << 1);
-            if (s->flags & SH_SERIAL_FLAG_DR)
+            }
+            if (s->flags & SH_SERIAL_FLAG_DR) {
                 ret |= (1 << 0);
+            }
 
-            if (s->scr & (1 << 5))
+            if (s->scr & (1 << 5)) {
                 s->flags |= SH_SERIAL_FLAG_TDE | SH_SERIAL_FLAG_TEND;
+            }
 
             break;
         case 0x14:
             if (s->rx_cnt > 0) {
                 ret = s->rx_fifo[s->rx_tail++];
                 s->rx_cnt--;
-                if (s->rx_tail == SH_RX_FIFO_LENGTH)
+                if (s->rx_tail == SH_RX_FIFO_LENGTH) {
                     s->rx_tail = 0;
-                if (s->rx_cnt < s->rtrg)
+                }
+                if (s->rx_cnt < s->rtrg) {
                     s->flags &= ~SH_SERIAL_FLAG_RDF;
+                }
             }
             break;
         case 0x18:
@@ -268,9 +288,8 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs,
             ret = 0;
             break;
         }
-    }
-    else {
-        switch(offs) {
+    } else {
+        switch (offs) {
 #if 0
         case 0x0c:
             ret = s->dr;
@@ -287,40 +306,39 @@ static uint64_t sh_serial_read(void *opaque, hwaddr offs,
             break;
         }
     }
-#ifdef DEBUG_SERIAL
-    printf("sh_serial: read offs=0x%02x val=0x%x\n",
-           offs, ret);
-#endif
+    trace_sh_serial_read(d->id, size, offs, ret);
 
-    if (ret & ~((1 << 16) - 1)) {
-        fprintf(stderr, "sh_serial: unsupported read from 0x%02"
-                HWADDR_PRIx "\n", offs);
-        abort();
+    if (ret > UINT16_MAX) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: unsupported read from 0x%02" HWADDR_PRIx "\n",
+                      __func__, offs);
+        ret = 0;
     }
 
     return ret;
 }
 
-static int sh_serial_can_receive(sh_serial_state *s)
+static int sh_serial_can_receive(SHSerialState *s)
 {
     return s->scr & (1 << 4);
 }
 
-static void sh_serial_receive_break(sh_serial_state *s)
+static void sh_serial_receive_break(SHSerialState *s)
 {
-    if (s->feat & SH_SERIAL_FEAT_SCIF)
+    if (s->feat & SH_SERIAL_FEAT_SCIF) {
         s->sr |= (1 << 4);
+    }
 }
 
 static int sh_serial_can_receive1(void *opaque)
 {
-    sh_serial_state *s = opaque;
+    SHSerialState *s = opaque;
     return sh_serial_can_receive(s);
 }
 
 static void sh_serial_timeout_int(void *opaque)
 {
-    sh_serial_state *s = opaque;
+    SHSerialState *s = opaque;
 
     s->flags |= SH_SERIAL_FLAG_RDF;
     if (s->scr & (1 << 6) && s->rxi) {
@@ -330,7 +348,7 @@ static void sh_serial_timeout_int(void *opaque)
 
 static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size)
 {
-    sh_serial_state *s = opaque;
+    SHSerialState *s = opaque;
 
     if (s->feat & SH_SERIAL_FEAT_SCIF) {
         int i;
@@ -344,11 +362,11 @@ static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size)
                 if (s->rx_cnt >= s->rtrg) {
                     s->flags |= SH_SERIAL_FLAG_RDF;
                     if (s->scr & (1 << 6) && s->rxi) {
-                        timer_del(s->fifo_timeout_timer);
+                        timer_del(&s->fifo_timeout_timer);
                         qemu_set_irq(s->rxi, 1);
                     }
                 } else {
-                    timer_mod(s->fifo_timeout_timer,
+                    timer_mod(&s->fifo_timeout_timer,
                         qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 15 * s->etu);
                 }
             }
@@ -360,9 +378,10 @@ static void sh_serial_receive1(void *opaque, const uint8_t *buf, int size)
 
 static void sh_serial_event(void *opaque, QEMUChrEvent event)
 {
-    sh_serial_state *s = opaque;
-    if (event == CHR_EVENT_BREAK)
+    SHSerialState *s = opaque;
+    if (event == CHR_EVENT_BREAK) {
         sh_serial_receive_break(s);
+    }
 }
 
 static const MemoryRegionOps sh_serial_ops = {
@@ -371,20 +390,10 @@ static const MemoryRegionOps sh_serial_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-void sh_serial_init(MemoryRegion *sysmem,
-                    hwaddr base, int feat,
-                    uint32_t freq, Chardev *chr,
-                    qemu_irq eri_source,
-                    qemu_irq rxi_source,
-                    qemu_irq txi_source,
-                    qemu_irq tei_source,
-                    qemu_irq bri_source)
+static void sh_serial_reset(DeviceState *dev)
 {
-    sh_serial_state *s;
+    SHSerialState *s = SH_SERIAL(dev);
 
-    s = g_malloc0(sizeof(sh_serial_state));
-
-    s->feat = feat;
     s->flags = SH_SERIAL_FLAG_TEND | SH_SERIAL_FLAG_TDE;
     s->rtrg = 1;
 
@@ -393,39 +402,64 @@ void sh_serial_init(MemoryRegion *sysmem,
     s->scr = 1 << 5; /* pretend that TX is enabled so early printk works */
     s->sptr = 0;
 
-    if (feat & SH_SERIAL_FEAT_SCIF) {
+    if (s->feat & SH_SERIAL_FEAT_SCIF) {
         s->fcr = 0;
-    }
-    else {
+    } else {
         s->dr = 0xff;
     }
 
     sh_serial_clear_fifo(s);
+}
 
-    memory_region_init_io(&s->iomem, NULL, &sh_serial_ops, s,
-                          "serial", 0x100000000ULL);
-
-    memory_region_init_alias(&s->iomem_p4, NULL, "serial-p4", &s->iomem,
-                             0, 0x28);
-    memory_region_add_subregion(sysmem, P4ADDR(base), &s->iomem_p4);
-
-    memory_region_init_alias(&s->iomem_a7, NULL, "serial-a7", &s->iomem,
-                             0, 0x28);
-    memory_region_add_subregion(sysmem, A7ADDR(base), &s->iomem_a7);
-
-    if (chr) {
-        qemu_chr_fe_init(&s->chr, chr, &error_abort);
+static void sh_serial_realize(DeviceState *d, Error **errp)
+{
+    SHSerialState *s = SH_SERIAL(d);
+    MemoryRegion *iomem = g_malloc(sizeof(*iomem));
+
+    assert(d->id);
+    memory_region_init_io(iomem, OBJECT(d), &sh_serial_ops, s, d->id, 0x28);
+    sysbus_init_mmio(SYS_BUS_DEVICE(d), iomem);
+    qdev_init_gpio_out_named(d, &s->eri, "eri", 1);
+    qdev_init_gpio_out_named(d, &s->rxi, "rxi", 1);
+    qdev_init_gpio_out_named(d, &s->txi, "txi", 1);
+    qdev_init_gpio_out_named(d, &s->tei, "tei", 1);
+    qdev_init_gpio_out_named(d, &s->bri, "bri", 1);
+
+    if (qemu_chr_fe_backend_connected(&s->chr)) {
         qemu_chr_fe_set_handlers(&s->chr, sh_serial_can_receive1,
                                  sh_serial_receive1,
                                  sh_serial_event, NULL, s, NULL, true);
     }
 
-    s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                         sh_serial_timeout_int, s);
+    timer_init_ns(&s->fifo_timeout_timer, QEMU_CLOCK_VIRTUAL,
+                  sh_serial_timeout_int, s);
     s->etu = NANOSECONDS_PER_SECOND / 9600;
-    s->eri = eri_source;
-    s->rxi = rxi_source;
-    s->txi = txi_source;
-    s->tei = tei_source;
-    s->bri = bri_source;
+}
+
+static void sh_serial_finalize(Object *obj)
+{
+    SHSerialState *s = SH_SERIAL(obj);
+
+    timer_del(&s->fifo_timeout_timer);
+}
+
+static void sh_serial_init(Object *obj)
+{
+}
+
+static Property sh_serial_properties[] = {
+    DEFINE_PROP_CHR("chardev", SHSerialState, chr),
+    DEFINE_PROP_UINT8("features", SHSerialState, feat, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void sh_serial_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    device_class_set_props(dc, sh_serial_properties);
+    dc->realize = sh_serial_realize;
+    dc->reset = sh_serial_reset;
+    /* Reason: part of SuperH CPU/SoC, needs to be wired up */
+    dc->user_creatable = false;
 }
diff --git a/hw/char/shakti_uart.c b/hw/char/shakti_uart.c
new file mode 100644
index 00000000000..98b142c7df8
--- /dev/null
+++ b/hw/char/shakti_uart.c
@@ -0,0 +1,186 @@
+/*
+ * SHAKTI UART
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/char/shakti_uart.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "qemu/log.h"
+
+static uint64_t shakti_uart_read(void *opaque, hwaddr addr, unsigned size)
+{
+    ShaktiUartState *s = opaque;
+
+    switch (addr) {
+    case SHAKTI_UART_BAUD:
+        return s->uart_baud;
+    case SHAKTI_UART_RX:
+        qemu_chr_fe_accept_input(&s->chr);
+        s->uart_status &= ~SHAKTI_UART_STATUS_RX_NOT_EMPTY;
+        return s->uart_rx;
+    case SHAKTI_UART_STATUS:
+        return s->uart_status;
+    case SHAKTI_UART_DELAY:
+        return s->uart_delay;
+    case SHAKTI_UART_CONTROL:
+        return s->uart_control;
+    case SHAKTI_UART_INT_EN:
+        return s->uart_interrupt;
+    case SHAKTI_UART_IQ_CYCLES:
+        return s->uart_iq_cycles;
+    case SHAKTI_UART_RX_THRES:
+        return s->uart_rx_threshold;
+    default:
+        /* Also handles TX REG which is write only */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+    }
+
+    return 0;
+}
+
+static void shakti_uart_write(void *opaque, hwaddr addr,
+                              uint64_t data, unsigned size)
+{
+    ShaktiUartState *s = opaque;
+    uint32_t value = data;
+    uint8_t ch;
+
+    switch (addr) {
+    case SHAKTI_UART_BAUD:
+        s->uart_baud = value;
+        break;
+    case SHAKTI_UART_TX:
+        ch = value;
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
+        s->uart_status &= ~SHAKTI_UART_STATUS_TX_FULL;
+        break;
+    case SHAKTI_UART_STATUS:
+        s->uart_status = value;
+        break;
+    case SHAKTI_UART_DELAY:
+        s->uart_delay = value;
+        break;
+    case SHAKTI_UART_CONTROL:
+        s->uart_control = value;
+        break;
+    case SHAKTI_UART_INT_EN:
+        s->uart_interrupt = value;
+        break;
+    case SHAKTI_UART_IQ_CYCLES:
+        s->uart_iq_cycles = value;
+        break;
+    case SHAKTI_UART_RX_THRES:
+        s->uart_rx_threshold = value;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+    }
+}
+
+static const MemoryRegionOps shakti_uart_ops = {
+    .read = shakti_uart_read,
+    .write = shakti_uart_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {.min_access_size = 1, .max_access_size = 4},
+    .valid = {.min_access_size = 1, .max_access_size = 4},
+};
+
+static void shakti_uart_reset(DeviceState *dev)
+{
+    ShaktiUartState *s = SHAKTI_UART(dev);
+
+    s->uart_baud = SHAKTI_UART_BAUD_DEFAULT;
+    s->uart_tx = 0x0;
+    s->uart_rx = 0x0;
+    s->uart_status = 0x0000;
+    s->uart_delay = 0x0000;
+    s->uart_control = SHAKTI_UART_CONTROL_DEFAULT;
+    s->uart_interrupt = 0x0000;
+    s->uart_iq_cycles = 0x00;
+    s->uart_rx_threshold = 0x00;
+}
+
+static int shakti_uart_can_receive(void *opaque)
+{
+    ShaktiUartState *s = opaque;
+
+    return !(s->uart_status & SHAKTI_UART_STATUS_RX_NOT_EMPTY);
+}
+
+static void shakti_uart_receive(void *opaque, const uint8_t *buf, int size)
+{
+    ShaktiUartState *s = opaque;
+
+    s->uart_rx = *buf;
+    s->uart_status |= SHAKTI_UART_STATUS_RX_NOT_EMPTY;
+}
+
+static void shakti_uart_realize(DeviceState *dev, Error **errp)
+{
+    ShaktiUartState *sus = SHAKTI_UART(dev);
+    qemu_chr_fe_set_handlers(&sus->chr, shakti_uart_can_receive,
+                             shakti_uart_receive, NULL, NULL, sus, NULL, true);
+}
+
+static void shakti_uart_instance_init(Object *obj)
+{
+    ShaktiUartState *sus = SHAKTI_UART(obj);
+    memory_region_init_io(&sus->mmio,
+                          obj,
+                          &shakti_uart_ops,
+                          sus,
+                          TYPE_SHAKTI_UART,
+                          0x1000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &sus->mmio);
+}
+
+static Property shakti_uart_properties[] = {
+    DEFINE_PROP_CHR("chardev", ShaktiUartState, chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void shakti_uart_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->reset = shakti_uart_reset;
+    dc->realize = shakti_uart_realize;
+    device_class_set_props(dc, shakti_uart_properties);
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+}
+
+static const TypeInfo shakti_uart_info = {
+    .name = TYPE_SHAKTI_UART,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ShaktiUartState),
+    .class_init = shakti_uart_class_init,
+    .instance_init = shakti_uart_instance_init,
+};
+
+static void shakti_uart_register_types(void)
+{
+    type_register_static(&shakti_uart_info);
+}
+type_init(shakti_uart_register_types)
diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c
index 3a00ba7f006..1c75f792b35 100644
--- a/hw/char/sifive_uart.c
+++ b/hw/char/sifive_uart.c
@@ -19,12 +19,12 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
-#include "hw/sysbus.h"
+#include "migration/vmstate.h"
 #include "chardev/char.h"
 #include "chardev/char-fe.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/char/sifive_uart.h"
+#include "hw/qdev-properties-system.h"
 
 /*
  * Not yet implemented:
@@ -33,7 +33,7 @@
  */
 
 /* Returns the state of the IP (interrupt pending) register */
-static uint64_t uart_ip(SiFiveUARTState *s)
+static uint64_t sifive_uart_ip(SiFiveUARTState *s)
 {
     uint64_t ret = 0;
 
@@ -50,7 +50,7 @@ static uint64_t uart_ip(SiFiveUARTState *s)
     return ret;
 }
 
-static void update_irq(SiFiveUARTState *s)
+static void sifive_uart_update_irq(SiFiveUARTState *s)
 {
     int cond = 0;
     if ((s->ie & SIFIVE_UART_IE_TXWM) ||
@@ -65,7 +65,7 @@ static void update_irq(SiFiveUARTState *s)
 }
 
 static uint64_t
-uart_read(void *opaque, hwaddr addr, unsigned int size)
+sifive_uart_read(void *opaque, hwaddr addr, unsigned int size)
 {
     SiFiveUARTState *s = opaque;
     unsigned char r;
@@ -76,7 +76,7 @@ uart_read(void *opaque, hwaddr addr, unsigned int size)
             memmove(s->rx_fifo, s->rx_fifo + 1, s->rx_fifo_len - 1);
             s->rx_fifo_len--;
             qemu_chr_fe_accept_input(&s->chr);
-            update_irq(s);
+            sifive_uart_update_irq(s);
             return r;
         }
         return 0x80000000;
@@ -86,7 +86,7 @@ uart_read(void *opaque, hwaddr addr, unsigned int size)
     case SIFIVE_UART_IE:
         return s->ie;
     case SIFIVE_UART_IP:
-        return uart_ip(s);
+        return sifive_uart_ip(s);
     case SIFIVE_UART_TXCTRL:
         return s->txctrl;
     case SIFIVE_UART_RXCTRL:
@@ -101,8 +101,8 @@ uart_read(void *opaque, hwaddr addr, unsigned int size)
 }
 
 static void
-uart_write(void *opaque, hwaddr addr,
-           uint64_t val64, unsigned int size)
+sifive_uart_write(void *opaque, hwaddr addr,
+                  uint64_t val64, unsigned int size)
 {
     SiFiveUARTState *s = opaque;
     uint32_t value = val64;
@@ -111,11 +111,11 @@ uart_write(void *opaque, hwaddr addr,
     switch (addr) {
     case SIFIVE_UART_TXFIFO:
         qemu_chr_fe_write(&s->chr, &ch, 1);
-        update_irq(s);
+        sifive_uart_update_irq(s);
         return;
     case SIFIVE_UART_IE:
         s->ie = val64;
-        update_irq(s);
+        sifive_uart_update_irq(s);
         return;
     case SIFIVE_UART_TXCTRL:
         s->txctrl = val64;
@@ -131,9 +131,9 @@ uart_write(void *opaque, hwaddr addr,
                   __func__, (int)addr, (int)value);
 }
 
-static const MemoryRegionOps uart_ops = {
-    .read = uart_read,
-    .write = uart_write,
+static const MemoryRegionOps sifive_uart_ops = {
+    .read = sifive_uart_read,
+    .write = sifive_uart_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -141,7 +141,7 @@ static const MemoryRegionOps uart_ops = {
     }
 };
 
-static void uart_rx(void *opaque, const uint8_t *buf, int size)
+static void sifive_uart_rx(void *opaque, const uint8_t *buf, int size)
 {
     SiFiveUARTState *s = opaque;
 
@@ -152,43 +152,138 @@ static void uart_rx(void *opaque, const uint8_t *buf, int size)
     }
     s->rx_fifo[s->rx_fifo_len++] = *buf;
 
-    update_irq(s);
+    sifive_uart_update_irq(s);
 }
 
-static int uart_can_rx(void *opaque)
+static int sifive_uart_can_rx(void *opaque)
 {
     SiFiveUARTState *s = opaque;
 
     return s->rx_fifo_len < sizeof(s->rx_fifo);
 }
 
-static void uart_event(void *opaque, QEMUChrEvent event)
+static void sifive_uart_event(void *opaque, QEMUChrEvent event)
 {
 }
 
-static int uart_be_change(void *opaque)
+static int sifive_uart_be_change(void *opaque)
 {
     SiFiveUARTState *s = opaque;
 
-    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event,
-        uart_be_change, s, NULL, true);
+    qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx,
+                             sifive_uart_event, sifive_uart_be_change, s,
+                             NULL, true);
 
     return 0;
 }
 
+static Property sifive_uart_properties[] = {
+    DEFINE_PROP_CHR("chardev", SiFiveUARTState, chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sifive_uart_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SiFiveUARTState *s = SIFIVE_UART(obj);
+
+    memory_region_init_io(&s->mmio, OBJECT(s), &sifive_uart_ops, s,
+                          TYPE_SIFIVE_UART, SIFIVE_UART_MAX);
+    sysbus_init_mmio(sbd, &s->mmio);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static void sifive_uart_realize(DeviceState *dev, Error **errp)
+{
+    SiFiveUARTState *s = SIFIVE_UART(dev);
+
+    qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx,
+                             sifive_uart_event, sifive_uart_be_change, s,
+                             NULL, true);
+
+}
+
+static void sifive_uart_reset_enter(Object *obj, ResetType type)
+{
+    SiFiveUARTState *s = SIFIVE_UART(obj);
+    s->ie = 0;
+    s->ip = 0;
+    s->txctrl = 0;
+    s->rxctrl = 0;
+    s->div = 0;
+    s->rx_fifo_len = 0;
+}
+
+static void sifive_uart_reset_hold(Object *obj)
+{
+    SiFiveUARTState *s = SIFIVE_UART(obj);
+    qemu_irq_lower(s->irq);
+}
+
+static const VMStateDescription vmstate_sifive_uart = {
+    .name = TYPE_SIFIVE_UART,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(rx_fifo, SiFiveUARTState,
+                            SIFIVE_UART_RX_FIFO_SIZE),
+        VMSTATE_UINT8(rx_fifo_len, SiFiveUARTState),
+        VMSTATE_UINT32(ie, SiFiveUARTState),
+        VMSTATE_UINT32(ip, SiFiveUARTState),
+        VMSTATE_UINT32(txctrl, SiFiveUARTState),
+        VMSTATE_UINT32(rxctrl, SiFiveUARTState),
+        VMSTATE_UINT32(div, SiFiveUARTState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+
+static void sifive_uart_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    ResettableClass *rc = RESETTABLE_CLASS(oc);
+
+    dc->realize = sifive_uart_realize;
+    dc->vmsd = &vmstate_sifive_uart;
+    rc->phases.enter = sifive_uart_reset_enter;
+    rc->phases.hold  = sifive_uart_reset_hold;
+    device_class_set_props(dc, sifive_uart_properties);
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+}
+
+static const TypeInfo sifive_uart_info = {
+    .name          = TYPE_SIFIVE_UART,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SiFiveUARTState),
+    .instance_init = sifive_uart_init,
+    .class_init    = sifive_uart_class_init,
+};
+
+static void sifive_uart_register_types(void)
+{
+    type_register_static(&sifive_uart_info);
+}
+
+type_init(sifive_uart_register_types)
+
 /*
  * Create UART device.
  */
 SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base,
     Chardev *chr, qemu_irq irq)
 {
-    SiFiveUARTState *s = g_malloc0(sizeof(SiFiveUARTState));
-    s->irq = irq;
-    qemu_chr_fe_init(&s->chr, chr, &error_abort);
-    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx, uart_event,
-        uart_be_change, s, NULL, true);
-    memory_region_init_io(&s->mmio, NULL, &uart_ops, s,
-                          TYPE_SIFIVE_UART, SIFIVE_UART_MAX);
-    memory_region_add_subregion(address_space, base, &s->mmio);
-    return s;
+    DeviceState *dev;
+    SysBusDevice *s;
+    SiFiveUARTState *r;
+
+    dev = qdev_new("riscv.sifive.uart");
+    s = SYS_BUS_DEVICE(dev);
+    qdev_prop_set_chr(dev, "chardev", chr);
+    sysbus_realize_and_unref(s, &error_fatal);
+    memory_region_add_subregion(address_space, base,
+                                sysbus_mmio_get_region(s, 0));
+    sysbus_connect_irq(s, 0, irq);
+
+    r = SIFIVE_UART(dev);
+    return r;
 }
diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c
index 79eaa2fa523..91eae1a5988 100644
--- a/hw/char/spapr_vty.c
+++ b/hw/char/spapr_vty.c
@@ -2,7 +2,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "migration/vmstate.h"
 #include "chardev/char-fe.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/char/trace-events b/hw/char/trace-events
index 76d52938ead..2ecb36232e9 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # parallel.c
 parallel_ioport_read(const char *desc, uint16_t addr, uint8_t value) "read [%s] addr 0x%02x val 0x%02x"
@@ -35,24 +35,9 @@ grlib_apbuart_event(int event) "event:%d"
 grlib_apbuart_writel_unknown(uint64_t addr, uint32_t value) "addr 0x%"PRIx64" value 0x%x"
 grlib_apbuart_readl_unknown(uint64_t addr) "addr 0x%"PRIx64
 
-# lm32_juart.c
-lm32_juart_get_jtx(uint32_t value) "jtx 0x%08x"
-lm32_juart_set_jtx(uint32_t value) "jtx 0x%08x"
-lm32_juart_get_jrx(uint32_t value) "jrx 0x%08x"
-lm32_juart_set_jrx(uint32_t value) "jrx 0x%08x"
-
-# lm32_uart.c
-lm32_uart_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-lm32_uart_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-lm32_uart_irq_state(int level) "irq state %d"
-
-# milkymist-uart.c
-milkymist_uart_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_uart_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_uart_raise_irq(void) "Raise IRQ"
-milkymist_uart_lower_irq(void) "Lower IRQ"
-
 # escc.c
+escc_hard_reset(void) "hard reset"
+escc_soft_reset_chn(char channel) "soft reset channel %c"
 escc_put_queue(char channel, int b) "channel %c put: 0x%02x"
 escc_get_queue(char channel, int val) "channel %c get 0x%02x"
 escc_update_irq(int irq) "IRQ = %d"
@@ -90,6 +75,10 @@ cmsdk_apb_uart_set_params(int speed) "CMSDK APB UART: params set to %d 8N1"
 nrf51_uart_read(uint64_t addr, uint64_t r, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u"
 nrf51_uart_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u"
 
+# shakti_uart.c
+shakti_uart_read(uint64_t addr, uint16_t r, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx16 " size %u"
+shakti_uart_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%" PRIx64 " value 0x%" PRIx64 " size %u"
+
 # exynos4210_uart.c
 exynos_uart_dmabusy(uint32_t channel) "UART%d: DMA busy (Rx buffer empty)"
 exynos_uart_dmaready(uint32_t channel) "UART%d: DMA ready"
@@ -112,3 +101,7 @@ exynos_uart_rx_timeout(uint32_t channel, uint32_t stat, uint32_t intsp) "UART%d:
 
 # cadence_uart.c
 cadence_uart_baudrate(unsigned baudrate) "baudrate %u"
+
+# sh_serial.c
+sh_serial_read(char *id, unsigned size, uint64_t offs, uint64_t val) " %s size %d offs 0x%02" PRIx64 " -> 0x%02" PRIx64
+sh_serial_write(char *id, unsigned size, uint64_t offs, uint64_t val) "%s size %d offs 0x%02" PRIx64 " <- 0x%02" PRIx64
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 6b132caa29d..dd5a02e3392 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -38,7 +38,7 @@ struct VirtConsole {
  * Callback function that's called from chardevs when backend becomes
  * writable.
  */
-static gboolean chr_write_unblocked(GIOChannel *chan, GIOCondition cond,
+static gboolean chr_write_unblocked(void *do_not_use, GIOCondition cond,
                                     void *opaque)
 {
     VirtConsole *vcon = opaque;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index b20038991a6..f01ec2137c9 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -28,7 +28,6 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "hw/qdev-properties.h"
-#include "hw/sysbus.h"
 #include "trace.h"
 #include "hw/virtio/virtio-serial.h"
 #include "hw/virtio/virtio-access.h"
@@ -1049,8 +1048,8 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
                 config_size);
 
     /* Spawn a new virtio-serial bus on which the ports will ride as devices */
-    qbus_create_inplace(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS,
-                        dev, vdev->bus_name);
+    qbus_init(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS,
+              dev, vdev->bus_name);
     qbus_set_hotplug_handler(BUS(&vser->bus), OBJECT(vser));
     vser->bus.vser = vser;
     QTAILQ_INIT(&vser->ports);
diff --git a/hw/core/bus.c b/hw/core/bus.c
index 9cfbc3a6877..c7831b5293b 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -99,7 +99,8 @@ static void bus_reset_child_foreach(Object *obj, ResettableChildCallback cb,
     }
 }
 
-static void qbus_init(BusState *bus, DeviceState *parent, const char *name)
+static void qbus_init_internal(BusState *bus, DeviceState *parent,
+                               const char *name)
 {
     const char *typename = object_get_typename(OBJECT(bus));
     BusClass *bc;
@@ -151,19 +152,19 @@ static void bus_unparent(Object *obj)
     bus->parent = NULL;
 }
 
-void qbus_create_inplace(void *bus, size_t size, const char *typename,
-                         DeviceState *parent, const char *name)
+void qbus_init(void *bus, size_t size, const char *typename,
+               DeviceState *parent, const char *name)
 {
     object_initialize(bus, size, typename);
-    qbus_init(bus, parent, name);
+    qbus_init_internal(bus, parent, name);
 }
 
-BusState *qbus_create(const char *typename, DeviceState *parent, const char *name)
+BusState *qbus_new(const char *typename, DeviceState *parent, const char *name)
 {
     BusState *bus;
 
     bus = BUS(object_new(typename));
-    qbus_init(bus, parent, name);
+    qbus_init_internal(bus, parent, name);
 
     return bus;
 }
diff --git a/hw/core/clock-vmstate.c b/hw/core/clock-vmstate.c
index 260b13fc2c8..9d9174ffbd7 100644
--- a/hw/core/clock-vmstate.c
+++ b/hw/core/clock-vmstate.c
@@ -14,12 +14,50 @@
 #include "migration/vmstate.h"
 #include "hw/clock.h"
 
+static bool muldiv_needed(void *opaque)
+{
+    Clock *clk = opaque;
+
+    return clk->multiplier != 1 || clk->divider != 1;
+}
+
+static int clock_pre_load(void *opaque)
+{
+    Clock *clk = opaque;
+    /*
+     * The initial out-of-reset settings of the Clock might have been
+     * configured by the device to be different from what we set
+     * in clock_initfn(), so we must here set the default values to
+     * be used if they are not in the inbound migration state.
+     */
+    clk->multiplier = 1;
+    clk->divider = 1;
+
+    return 0;
+}
+
+const VMStateDescription vmstate_muldiv = {
+    .name = "clock/muldiv",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = muldiv_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(multiplier, Clock),
+        VMSTATE_UINT32(divider, Clock),
+    },
+};
+
 const VMStateDescription vmstate_clock = {
     .name = "clock",
     .version_id = 0,
     .minimum_version_id = 0,
+    .pre_load = clock_pre_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(period, Clock),
         VMSTATE_END_OF_LIST()
-    }
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_muldiv,
+        NULL
+    },
 };
diff --git a/hw/core/clock.c b/hw/core/clock.c
index fc5a99683f8..916875e07a2 100644
--- a/hw/core/clock.c
+++ b/hw/core/clock.c
@@ -64,6 +64,15 @@ bool clock_set(Clock *clk, uint64_t period)
     return true;
 }
 
+static uint64_t clock_get_child_period(Clock *clk)
+{
+    /*
+     * Return the period to be used for child clocks, which is the parent
+     * clock period adjusted for for multiplier and divider effects.
+     */
+    return muldiv64(clk->period, clk->multiplier, clk->divider);
+}
+
 static void clock_call_callback(Clock *clk, ClockEvent event)
 {
     /*
@@ -78,15 +87,16 @@ static void clock_call_callback(Clock *clk, ClockEvent event)
 static void clock_propagate_period(Clock *clk, bool call_callbacks)
 {
     Clock *child;
+    uint64_t child_period = clock_get_child_period(clk);
 
     QLIST_FOREACH(child, &clk->children, sibling) {
-        if (child->period != clk->period) {
+        if (child->period != child_period) {
             if (call_callbacks) {
                 clock_call_callback(child, ClockPreUpdate);
             }
-            child->period = clk->period;
+            child->period = child_period;
             trace_clock_update(CLOCK_PATH(child), CLOCK_PATH(clk),
-                               CLOCK_PERIOD_TO_HZ(clk->period),
+                               CLOCK_PERIOD_TO_HZ(child->period),
                                call_callbacks);
             if (call_callbacks) {
                 clock_call_callback(child, ClockUpdate);
@@ -110,7 +120,7 @@ void clock_set_source(Clock *clk, Clock *src)
 
     trace_clock_set_source(CLOCK_PATH(clk), CLOCK_PATH(src));
 
-    clk->period = src->period;
+    clk->period = clock_get_child_period(src);
     QLIST_INSERT_HEAD(&src->children, clk, sibling);
     clk->source = src;
     clock_propagate_period(clk, false);
@@ -133,10 +143,23 @@ char *clock_display_freq(Clock *clk)
     return freq_to_str(clock_get_hz(clk));
 }
 
+void clock_set_mul_div(Clock *clk, uint32_t multiplier, uint32_t divider)
+{
+    assert(divider != 0);
+
+    trace_clock_set_mul_div(CLOCK_PATH(clk), clk->multiplier, multiplier,
+                            clk->divider, divider);
+    clk->multiplier = multiplier;
+    clk->divider = divider;
+}
+
 static void clock_initfn(Object *obj)
 {
     Clock *clk = CLOCK(obj);
 
+    clk->multiplier = 1;
+    clk->divider = 1;
+
     QLIST_INIT(&clk->children);
 }
 
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
new file mode 100644
index 00000000000..fbea86d3303
--- /dev/null
+++ b/hw/core/cpu-common.c
@@ -0,0 +1,307 @@
+/*
+ * QEMU CPU model
+ *
+ * Copyright (c) 2012-2014 SUSE LINUX Products GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/core/cpu.h"
+#include "sysemu/hw_accel.h"
+#include "qemu/notify.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "exec/log.h"
+#include "exec/cpu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/qemu-print.h"
+#include "sysemu/tcg.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "trace/trace-root.h"
+#include "qemu/plugin.h"
+
+CPUState *cpu_by_arch_id(int64_t id)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        CPUClass *cc = CPU_GET_CLASS(cpu);
+
+        if (cc->get_arch_id(cpu) == id) {
+            return cpu;
+        }
+    }
+    return NULL;
+}
+
+bool cpu_exists(int64_t id)
+{
+    return !!cpu_by_arch_id(id);
+}
+
+CPUState *cpu_create(const char *typename)
+{
+    Error *err = NULL;
+    CPUState *cpu = CPU(object_new(typename));
+    if (!qdev_realize(DEVICE(cpu), NULL, &err)) {
+        error_report_err(err);
+        object_unref(OBJECT(cpu));
+        exit(EXIT_FAILURE);
+    }
+    return cpu;
+}
+
+/* Resetting the IRQ comes from across the code base so we take the
+ * BQL here if we need to.  cpu_interrupt assumes it is held.*/
+void cpu_reset_interrupt(CPUState *cpu, int mask)
+{
+    bool need_lock = !qemu_mutex_iothread_locked();
+
+    if (need_lock) {
+        qemu_mutex_lock_iothread();
+    }
+    cpu->interrupt_request &= ~mask;
+    if (need_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+void cpu_exit(CPUState *cpu)
+{
+    qatomic_set(&cpu->exit_request, 1);
+    /* Ensure cpu_exec will see the exit request after TCG has exited.  */
+    smp_wmb();
+    qatomic_set(&cpu->icount_decr_ptr->u16.high, -1);
+}
+
+static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
+{
+    return 0;
+}
+
+static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg)
+{
+    return 0;
+}
+
+void cpu_dump_state(CPUState *cpu, FILE *f, int flags)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->dump_state) {
+        cpu_synchronize_state(cpu);
+        cc->dump_state(cpu, f, flags);
+    }
+}
+
+void cpu_reset(CPUState *cpu)
+{
+    device_cold_reset(DEVICE(cpu));
+
+    trace_guest_cpu_reset(cpu);
+}
+
+static void cpu_common_reset(DeviceState *dev)
+{
+    CPUState *cpu = CPU(dev);
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
+        log_cpu_state(cpu, cc->reset_dump_flags);
+    }
+
+    cpu->interrupt_request = 0;
+    cpu->halted = cpu->start_powered_off;
+    cpu->mem_io_pc = 0;
+    cpu->icount_extra = 0;
+    qatomic_set(&cpu->icount_decr_ptr->u32, 0);
+    cpu->can_do_io = 1;
+    cpu->exception_index = -1;
+    cpu->crash_occurred = false;
+    cpu->cflags_next_tb = -1;
+
+    if (tcg_enabled()) {
+        cpu_tb_jmp_cache_clear(cpu);
+
+        tcg_flush_softmmu_tlb(cpu);
+    }
+}
+
+static bool cpu_common_has_work(CPUState *cs)
+{
+    return false;
+}
+
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
+{
+    CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
+
+    assert(cpu_model && cc->class_by_name);
+    return cc->class_by_name(cpu_model);
+}
+
+static void cpu_common_parse_features(const char *typename, char *features,
+                                      Error **errp)
+{
+    char *val;
+    static bool cpu_globals_initialized;
+    /* Single "key=value" string being parsed */
+    char *featurestr = features ? strtok(features, ",") : NULL;
+
+    /* should be called only once, catch invalid users */
+    assert(!cpu_globals_initialized);
+    cpu_globals_initialized = true;
+
+    while (featurestr) {
+        val = strchr(featurestr, '=');
+        if (val) {
+            GlobalProperty *prop = g_new0(typeof(*prop), 1);
+            *val = 0;
+            val++;
+            prop->driver = typename;
+            prop->property = g_strdup(featurestr);
+            prop->value = g_strdup(val);
+            qdev_prop_register_global(prop);
+        } else {
+            error_setg(errp, "Expected key=value format, found %s.",
+                       featurestr);
+            return;
+        }
+        featurestr = strtok(NULL, ",");
+    }
+}
+
+static void cpu_common_realizefn(DeviceState *dev, Error **errp)
+{
+    CPUState *cpu = CPU(dev);
+    Object *machine = qdev_get_machine();
+
+    /* qdev_get_machine() can return something that's not TYPE_MACHINE
+     * if this is one of the user-only emulators; in that case there's
+     * no need to check the ignore_memory_transaction_failures board flag.
+     */
+    if (object_dynamic_cast(machine, TYPE_MACHINE)) {
+        ObjectClass *oc = object_get_class(machine);
+        MachineClass *mc = MACHINE_CLASS(oc);
+
+        if (mc) {
+            cpu->ignore_memory_transaction_failures =
+                mc->ignore_memory_transaction_failures;
+        }
+    }
+
+    if (dev->hotplugged) {
+        cpu_synchronize_post_init(cpu);
+        cpu_resume(cpu);
+    }
+
+    /* NOTE: latest generic point where the cpu is fully realized */
+    trace_init_vcpu(cpu);
+}
+
+static void cpu_common_unrealizefn(DeviceState *dev)
+{
+    CPUState *cpu = CPU(dev);
+
+    /* NOTE: latest generic point before the cpu is fully unrealized */
+    trace_fini_vcpu(cpu);
+    cpu_exec_unrealizefn(cpu);
+}
+
+static void cpu_common_initfn(Object *obj)
+{
+    CPUState *cpu = CPU(obj);
+    CPUClass *cc = CPU_GET_CLASS(obj);
+
+    cpu->cpu_index = UNASSIGNED_CPU_INDEX;
+    cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
+    cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
+    /* *-user doesn't have configurable SMP topology */
+    /* the default value is changed by qemu_init_vcpu() for softmmu */
+    cpu->nr_cores = 1;
+    cpu->nr_threads = 1;
+
+    qemu_mutex_init(&cpu->work_mutex);
+    QSIMPLEQ_INIT(&cpu->work_list);
+    QTAILQ_INIT(&cpu->breakpoints);
+    QTAILQ_INIT(&cpu->watchpoints);
+
+    cpu_exec_initfn(cpu);
+}
+
+static void cpu_common_post_initfn(Object *obj)
+{
+    /* Now that cpu->env_ptr has been initialized set up instruction logging. */
+#ifdef CONFIG_TCG_LOG_INSTR
+    qemu_log_instr_init(CPU(obj));
+#endif
+}
+
+static void cpu_common_finalize(Object *obj)
+{
+    CPUState *cpu = CPU(obj);
+
+    qemu_mutex_destroy(&cpu->work_mutex);
+}
+
+static int64_t cpu_common_get_arch_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
+static void cpu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    CPUClass *k = CPU_CLASS(klass);
+
+    k->parse_features = cpu_common_parse_features;
+    k->get_arch_id = cpu_common_get_arch_id;
+    k->has_work = cpu_common_has_work;
+    k->gdb_read_register = cpu_common_gdb_read_register;
+    k->gdb_write_register = cpu_common_gdb_write_register;
+    set_bit(DEVICE_CATEGORY_CPU, dc->categories);
+    dc->realize = cpu_common_realizefn;
+    dc->unrealize = cpu_common_unrealizefn;
+    dc->reset = cpu_common_reset;
+    cpu_class_init_props(dc);
+    /*
+     * Reason: CPUs still need special care by board code: wiring up
+     * IRQs, adding reset handlers, halting non-first CPUs, ...
+     */
+    dc->user_creatable = false;
+}
+
+static const TypeInfo cpu_type_info = {
+    .name = TYPE_CPU,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(CPUState),
+    .instance_init = cpu_common_initfn,
+    .instance_post_init = cpu_common_post_initfn,
+    .instance_finalize = cpu_common_finalize,
+    .abstract = true,
+    .class_size = sizeof(CPUClass),
+    .class_init = cpu_class_init,
+};
+
+static void cpu_register_types(void)
+{
+    type_register_static(&cpu_type_info);
+}
+
+type_init(cpu_register_types)
diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
new file mode 100644
index 00000000000..00253f89293
--- /dev/null
+++ b/hw/core/cpu-sysemu.c
@@ -0,0 +1,145 @@
+/*
+ * QEMU CPU model (system emulation specific)
+ *
+ * Copyright (c) 2012-2014 SUSE LINUX Products GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/core/cpu.h"
+#include "hw/core/sysemu-cpu-ops.h"
+
+bool cpu_paging_enabled(const CPUState *cpu)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->sysemu_ops->get_paging_enabled) {
+        return cc->sysemu_ops->get_paging_enabled(cpu);
+    }
+
+    return false;
+}
+
+void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
+                            Error **errp)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->sysemu_ops->get_memory_mapping) {
+        cc->sysemu_ops->get_memory_mapping(cpu, list, errp);
+        return;
+    }
+
+    error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
+}
+
+hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
+                                     MemTxAttrs *attrs)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->sysemu_ops->get_phys_page_attrs_debug) {
+        return cc->sysemu_ops->get_phys_page_attrs_debug(cpu, addr, attrs);
+    }
+    /* Fallback for CPUs which don't implement the _attrs_ hook */
+    *attrs = MEMTXATTRS_UNSPECIFIED;
+    return cc->sysemu_ops->get_phys_page_debug(cpu, addr);
+}
+
+hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
+{
+    MemTxAttrs attrs = {};
+
+    return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs);
+}
+
+int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    int ret = 0;
+
+    if (cc->sysemu_ops->asidx_from_attrs) {
+        ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
+        assert(ret < cpu->num_ases && ret >= 0);
+    }
+    return ret;
+}
+
+int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
+                             void *opaque)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (!cc->sysemu_ops->write_elf32_qemunote) {
+        return 0;
+    }
+    return (*cc->sysemu_ops->write_elf32_qemunote)(f, cpu, opaque);
+}
+
+int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
+                         int cpuid, void *opaque)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (!cc->sysemu_ops->write_elf32_note) {
+        return -1;
+    }
+    return (*cc->sysemu_ops->write_elf32_note)(f, cpu, cpuid, opaque);
+}
+
+int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
+                             void *opaque)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (!cc->sysemu_ops->write_elf64_qemunote) {
+        return 0;
+    }
+    return (*cc->sysemu_ops->write_elf64_qemunote)(f, cpu, opaque);
+}
+
+int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
+                         int cpuid, void *opaque)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (!cc->sysemu_ops->write_elf64_note) {
+        return -1;
+    }
+    return (*cc->sysemu_ops->write_elf64_note)(f, cpu, cpuid, opaque);
+}
+
+bool cpu_virtio_is_big_endian(CPUState *cpu)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->sysemu_ops->virtio_is_big_endian) {
+        return cc->sysemu_ops->virtio_is_big_endian(cpu);
+    }
+    return target_words_bigendian();
+}
+
+GuestPanicInformation *cpu_get_crash_info(CPUState *cpu)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    GuestPanicInformation *res = NULL;
+
+    if (cc->sysemu_ops->get_crash_info) {
+        res = cc->sysemu_ops->get_crash_info(cpu);
+    }
+    return res;
+}
diff --git a/hw/core/cpu.c b/hw/core/cpu.c
deleted file mode 100644
index 9d46cea876a..00000000000
--- a/hw/core/cpu.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * QEMU CPU model
- *
- * Copyright (c) 2012-2014 SUSE LINUX Products GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see
- * <http://www.gnu.org/licenses/gpl-2.0.html>
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "hw/core/cpu.h"
-#include "sysemu/hw_accel.h"
-#include "qemu/notify.h"
-#include "qemu/log.h"
-#include "qemu/main-loop.h"
-#include "exec/log.h"
-#include "exec/cpu-common.h"
-#include "qemu/error-report.h"
-#include "qemu/qemu-print.h"
-#include "sysemu/tcg.h"
-#include "hw/boards.h"
-#include "hw/qdev-properties.h"
-#include "trace/trace-root.h"
-#include "qemu/plugin.h"
-#include "sysemu/hw_accel.h"
-
-CPUState *cpu_by_arch_id(int64_t id)
-{
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-
-        if (cc->get_arch_id(cpu) == id) {
-            return cpu;
-        }
-    }
-    return NULL;
-}
-
-bool cpu_exists(int64_t id)
-{
-    return !!cpu_by_arch_id(id);
-}
-
-CPUState *cpu_create(const char *typename)
-{
-    Error *err = NULL;
-    CPUState *cpu = CPU(object_new(typename));
-    if (!qdev_realize(DEVICE(cpu), NULL, &err)) {
-        error_report_err(err);
-        object_unref(OBJECT(cpu));
-        exit(EXIT_FAILURE);
-    }
-    return cpu;
-}
-
-bool cpu_paging_enabled(const CPUState *cpu)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    return cc->get_paging_enabled(cpu);
-}
-
-static bool cpu_common_get_paging_enabled(const CPUState *cpu)
-{
-    return false;
-}
-
-void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list,
-                            Error **errp)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    cc->get_memory_mapping(cpu, list, errp);
-}
-
-static void cpu_common_get_memory_mapping(CPUState *cpu,
-                                          MemoryMappingList *list,
-                                          Error **errp)
-{
-    error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
-}
-
-/* Resetting the IRQ comes from across the code base so we take the
- * BQL here if we need to.  cpu_interrupt assumes it is held.*/
-void cpu_reset_interrupt(CPUState *cpu, int mask)
-{
-    bool need_lock = !qemu_mutex_iothread_locked();
-
-    if (need_lock) {
-        qemu_mutex_lock_iothread();
-    }
-    cpu->interrupt_request &= ~mask;
-    if (need_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-}
-
-void cpu_exit(CPUState *cpu)
-{
-    qatomic_set(&cpu->exit_request, 1);
-    /* Ensure cpu_exec will see the exit request after TCG has exited.  */
-    smp_wmb();
-    qatomic_set(&cpu->icount_decr_ptr->u16.high, -1);
-}
-
-int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
-                             void *opaque)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    return (*cc->write_elf32_qemunote)(f, cpu, opaque);
-}
-
-static int cpu_common_write_elf32_qemunote(WriteCoreDumpFunction f,
-                                           CPUState *cpu, void *opaque)
-{
-    return 0;
-}
-
-int cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu,
-                         int cpuid, void *opaque)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    return (*cc->write_elf32_note)(f, cpu, cpuid, opaque);
-}
-
-static int cpu_common_write_elf32_note(WriteCoreDumpFunction f,
-                                       CPUState *cpu, int cpuid,
-                                       void *opaque)
-{
-    return -1;
-}
-
-int cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
-                             void *opaque)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    return (*cc->write_elf64_qemunote)(f, cpu, opaque);
-}
-
-static int cpu_common_write_elf64_qemunote(WriteCoreDumpFunction f,
-                                           CPUState *cpu, void *opaque)
-{
-    return 0;
-}
-
-int cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
-                         int cpuid, void *opaque)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    return (*cc->write_elf64_note)(f, cpu, cpuid, opaque);
-}
-
-static int cpu_common_write_elf64_note(WriteCoreDumpFunction f,
-                                       CPUState *cpu, int cpuid,
-                                       void *opaque)
-{
-    return -1;
-}
-
-
-static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
-{
-    return 0;
-}
-
-static int cpu_common_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg)
-{
-    return 0;
-}
-
-static bool cpu_common_virtio_is_big_endian(CPUState *cpu)
-{
-    return target_words_bigendian();
-}
-
-/*
- * XXX the following #if is always true because this is a common_ss
- * module, so target CONFIG_* is never defined.
- */
-#if !defined(CONFIG_USER_ONLY)
-GuestPanicInformation *cpu_get_crash_info(CPUState *cpu)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-    GuestPanicInformation *res = NULL;
-
-    if (cc->get_crash_info) {
-        res = cc->get_crash_info(cpu);
-    }
-    return res;
-}
-#endif
-
-void cpu_dump_state(CPUState *cpu, FILE *f, int flags)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->dump_state) {
-        cpu_synchronize_state(cpu);
-        cc->dump_state(cpu, f, flags);
-    }
-}
-
-void cpu_dump_statistics(CPUState *cpu, int flags)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->dump_statistics) {
-        cc->dump_statistics(cpu, flags);
-    }
-}
-
-void cpu_reset(CPUState *cpu)
-{
-    device_cold_reset(DEVICE(cpu));
-
-    trace_guest_cpu_reset(cpu);
-}
-
-static void cpu_common_reset(DeviceState *dev)
-{
-    CPUState *cpu = CPU(dev);
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (qemu_loglevel_mask(CPU_LOG_RESET)) {
-        qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
-        log_cpu_state(cpu, cc->reset_dump_flags);
-    }
-
-    cpu->interrupt_request = 0;
-    cpu->halted = cpu->start_powered_off;
-    cpu->mem_io_pc = 0;
-    cpu->icount_extra = 0;
-    qatomic_set(&cpu->icount_decr_ptr->u32, 0);
-    cpu->can_do_io = 1;
-    cpu->exception_index = -1;
-    cpu->crash_occurred = false;
-    cpu->cflags_next_tb = -1;
-
-    if (tcg_enabled()) {
-        cpu_tb_jmp_cache_clear(cpu);
-
-        tcg_flush_softmmu_tlb(cpu);
-    }
-}
-
-static bool cpu_common_has_work(CPUState *cs)
-{
-    return false;
-}
-
-ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
-{
-    CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
-
-    assert(cpu_model && cc->class_by_name);
-    return cc->class_by_name(cpu_model);
-}
-
-static void cpu_common_parse_features(const char *typename, char *features,
-                                      Error **errp)
-{
-    char *val;
-    static bool cpu_globals_initialized;
-    /* Single "key=value" string being parsed */
-    char *featurestr = features ? strtok(features, ",") : NULL;
-
-    /* should be called only once, catch invalid users */
-    assert(!cpu_globals_initialized);
-    cpu_globals_initialized = true;
-
-    while (featurestr) {
-        val = strchr(featurestr, '=');
-        if (val) {
-            GlobalProperty *prop = g_new0(typeof(*prop), 1);
-            *val = 0;
-            val++;
-            prop->driver = typename;
-            prop->property = g_strdup(featurestr);
-            prop->value = g_strdup(val);
-            qdev_prop_register_global(prop);
-        } else {
-            error_setg(errp, "Expected key=value format, found %s.",
-                       featurestr);
-            return;
-        }
-        featurestr = strtok(NULL, ",");
-    }
-}
-
-static void cpu_common_realizefn(DeviceState *dev, Error **errp)
-{
-    CPUState *cpu = CPU(dev);
-    Object *machine = qdev_get_machine();
-
-    /* qdev_get_machine() can return something that's not TYPE_MACHINE
-     * if this is one of the user-only emulators; in that case there's
-     * no need to check the ignore_memory_transaction_failures board flag.
-     */
-    if (object_dynamic_cast(machine, TYPE_MACHINE)) {
-        ObjectClass *oc = object_get_class(machine);
-        MachineClass *mc = MACHINE_CLASS(oc);
-
-        if (mc) {
-            cpu->ignore_memory_transaction_failures =
-                mc->ignore_memory_transaction_failures;
-        }
-    }
-
-    if (dev->hotplugged) {
-        cpu_synchronize_post_init(cpu);
-        cpu_resume(cpu);
-    }
-
-    /* NOTE: latest generic point where the cpu is fully realized */
-    trace_init_vcpu(cpu);
-}
-
-static void cpu_common_unrealizefn(DeviceState *dev)
-{
-    CPUState *cpu = CPU(dev);
-
-    /* NOTE: latest generic point before the cpu is fully unrealized */
-    trace_fini_vcpu(cpu);
-    cpu_exec_unrealizefn(cpu);
-}
-
-static void cpu_common_initfn(Object *obj)
-{
-    CPUState *cpu = CPU(obj);
-    CPUClass *cc = CPU_GET_CLASS(obj);
-
-    cpu->cpu_index = UNASSIGNED_CPU_INDEX;
-    cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
-    cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
-    /* *-user doesn't have configurable SMP topology */
-    /* the default value is changed by qemu_init_vcpu() for softmmu */
-    cpu->nr_cores = 1;
-    cpu->nr_threads = 1;
-
-    qemu_mutex_init(&cpu->work_mutex);
-    QSIMPLEQ_INIT(&cpu->work_list);
-    QTAILQ_INIT(&cpu->breakpoints);
-    QTAILQ_INIT(&cpu->watchpoints);
-
-    cpu_exec_initfn(cpu);
-}
-
-static void cpu_common_post_initfn(Object *obj)
-{
-    /* Now that cpu->env_ptr has been initialized set up instruction logging. */
-#ifdef CONFIG_TCG_LOG_INSTR
-    qemu_log_instr_init(CPU(obj));
-#endif
-}
-
-static void cpu_common_finalize(Object *obj)
-{
-    CPUState *cpu = CPU(obj);
-
-    qemu_mutex_destroy(&cpu->work_mutex);
-}
-
-static int64_t cpu_common_get_arch_id(CPUState *cpu)
-{
-    return cpu->cpu_index;
-}
-
-static Property cpu_common_props[] = {
-#ifndef CONFIG_USER_ONLY
-    /* Create a memory property for softmmu CPU object,
-     * so users can wire up its memory. (This can't go in hw/core/cpu.c
-     * because that file is compiled only once for both user-mode
-     * and system builds.) The default if no link is set up is to use
-     * the system address space.
-     */
-    DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION,
-                     MemoryRegion *),
-#endif
-    DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void cpu_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    CPUClass *k = CPU_CLASS(klass);
-
-    k->parse_features = cpu_common_parse_features;
-    k->get_arch_id = cpu_common_get_arch_id;
-    k->has_work = cpu_common_has_work;
-    k->get_paging_enabled = cpu_common_get_paging_enabled;
-    k->get_memory_mapping = cpu_common_get_memory_mapping;
-    k->write_elf32_qemunote = cpu_common_write_elf32_qemunote;
-    k->write_elf32_note = cpu_common_write_elf32_note;
-    k->write_elf64_qemunote = cpu_common_write_elf64_qemunote;
-    k->write_elf64_note = cpu_common_write_elf64_note;
-    k->gdb_read_register = cpu_common_gdb_read_register;
-    k->gdb_write_register = cpu_common_gdb_write_register;
-    k->virtio_is_big_endian = cpu_common_virtio_is_big_endian;
-    set_bit(DEVICE_CATEGORY_CPU, dc->categories);
-    dc->realize = cpu_common_realizefn;
-    dc->unrealize = cpu_common_unrealizefn;
-    dc->reset = cpu_common_reset;
-    device_class_set_props(dc, cpu_common_props);
-    /*
-     * Reason: CPUs still need special care by board code: wiring up
-     * IRQs, adding reset handlers, halting non-first CPUs, ...
-     */
-    dc->user_creatable = false;
-}
-
-static const TypeInfo cpu_type_info = {
-    .name = TYPE_CPU,
-    .parent = TYPE_DEVICE,
-    .instance_size = sizeof(CPUState),
-    .instance_init = cpu_common_initfn,
-    .instance_post_init = cpu_common_post_initfn,
-    .instance_finalize = cpu_common_finalize,
-    .abstract = true,
-    .class_size = sizeof(CPUClass),
-    .class_init = cpu_class_init,
-};
-
-static void cpu_register_types(void)
-{
-    type_register_static(&cpu_type_info);
-}
-
-type_init(cpu_register_types)
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index 2b2a7b5e9aa..d14f932eea2 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -32,7 +32,6 @@
 
 #include "qemu/osdep.h"
 #include "hw/core/cpu.h"
-#include "hw/sysbus.h"
 #include "sysemu/dma.h"
 #include "sysemu/reset.h"
 #include "hw/boards.h"
diff --git a/hw/core/gpio.c b/hw/core/gpio.c
new file mode 100644
index 00000000000..8e6b4f5edf3
--- /dev/null
+++ b/hw/core/gpio.c
@@ -0,0 +1,197 @@
+/*
+ * qdev GPIO helpers
+ *
+ *  Copyright (c) 2009 CodeSourcery
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+#include "hw/irq.h"
+#include "qapi/error.h"
+
+static NamedGPIOList *qdev_get_named_gpio_list(DeviceState *dev,
+                                               const char *name)
+{
+    NamedGPIOList *ngl;
+
+    QLIST_FOREACH(ngl, &dev->gpios, node) {
+        /* NULL is a valid and matchable name. */
+        if (g_strcmp0(name, ngl->name) == 0) {
+            return ngl;
+        }
+    }
+
+    ngl = g_malloc0(sizeof(*ngl));
+    ngl->name = g_strdup(name);
+    QLIST_INSERT_HEAD(&dev->gpios, ngl, node);
+    return ngl;
+}
+
+void qdev_init_gpio_in_named_with_opaque(DeviceState *dev,
+                                         qemu_irq_handler handler,
+                                         void *opaque,
+                                         const char *name, int n)
+{
+    int i;
+    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
+
+    assert(gpio_list->num_out == 0 || !name);
+    gpio_list->in = qemu_extend_irqs(gpio_list->in, gpio_list->num_in, handler,
+                                     opaque, n);
+
+    if (!name) {
+        name = "unnamed-gpio-in";
+    }
+    for (i = gpio_list->num_in; i < gpio_list->num_in + n; i++) {
+        gchar *propname = g_strdup_printf("%s[%u]", name, i);
+
+        object_property_add_child(OBJECT(dev), propname,
+                                  OBJECT(gpio_list->in[i]));
+        g_free(propname);
+    }
+
+    gpio_list->num_in += n;
+}
+
+void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n)
+{
+    qdev_init_gpio_in_named(dev, handler, NULL, n);
+}
+
+void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins,
+                              const char *name, int n)
+{
+    int i;
+    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
+
+    assert(gpio_list->num_in == 0 || !name);
+
+    if (!name) {
+        name = "unnamed-gpio-out";
+    }
+    memset(pins, 0, sizeof(*pins) * n);
+    for (i = 0; i < n; ++i) {
+        gchar *propname = g_strdup_printf("%s[%u]", name,
+                                          gpio_list->num_out + i);
+
+        object_property_add_link(OBJECT(dev), propname, TYPE_IRQ,
+                                 (Object **)&pins[i],
+                                 object_property_allow_set_link,
+                                 OBJ_PROP_LINK_STRONG);
+        g_free(propname);
+    }
+    gpio_list->num_out += n;
+}
+
+void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n)
+{
+    qdev_init_gpio_out_named(dev, pins, NULL, n);
+}
+
+qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n)
+{
+    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
+
+    assert(n >= 0 && n < gpio_list->num_in);
+    return gpio_list->in[n];
+}
+
+qemu_irq qdev_get_gpio_in(DeviceState *dev, int n)
+{
+    return qdev_get_gpio_in_named(dev, NULL, n);
+}
+
+void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n,
+                                 qemu_irq pin)
+{
+    char *propname = g_strdup_printf("%s[%d]",
+                                     name ? name : "unnamed-gpio-out", n);
+    if (pin && !OBJECT(pin)->parent) {
+        /* We need a name for object_property_set_link to work */
+        object_property_add_child(container_get(qdev_get_machine(),
+                                                "/unattached"),
+                                  "non-qdev-gpio[*]", OBJECT(pin));
+    }
+    object_property_set_link(OBJECT(dev), propname, OBJECT(pin), &error_abort);
+    g_free(propname);
+}
+
+qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n)
+{
+    g_autofree char *propname = g_strdup_printf("%s[%d]",
+                                     name ? name : "unnamed-gpio-out", n);
+
+    qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname,
+                                                      NULL);
+
+    return ret;
+}
+
+/* disconnect a GPIO output, returning the disconnected input (if any) */
+
+static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev,
+                                               const char *name, int n)
+{
+    char *propname = g_strdup_printf("%s[%d]",
+                                     name ? name : "unnamed-gpio-out", n);
+
+    qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname,
+                                                      NULL);
+    if (ret) {
+        object_property_set_link(OBJECT(dev), propname, NULL, NULL);
+    }
+    g_free(propname);
+    return ret;
+}
+
+qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt,
+                                 const char *name, int n)
+{
+    qemu_irq disconnected = qdev_disconnect_gpio_out_named(dev, name, n);
+    qdev_connect_gpio_out_named(dev, name, n, icpt);
+    return disconnected;
+}
+
+void qdev_connect_gpio_out(DeviceState *dev, int n, qemu_irq pin)
+{
+    qdev_connect_gpio_out_named(dev, NULL, n, pin);
+}
+
+void qdev_pass_gpios(DeviceState *dev, DeviceState *container,
+                     const char *name)
+{
+    int i;
+    NamedGPIOList *ngl = qdev_get_named_gpio_list(dev, name);
+
+    for (i = 0; i < ngl->num_in; i++) {
+        const char *nm = ngl->name ? ngl->name : "unnamed-gpio-in";
+        char *propname = g_strdup_printf("%s[%d]", nm, i);
+
+        object_property_add_alias(OBJECT(container), propname,
+                                  OBJECT(dev), propname);
+        g_free(propname);
+    }
+    for (i = 0; i < ngl->num_out; i++) {
+        const char *nm = ngl->name ? ngl->name : "unnamed-gpio-out";
+        char *propname = g_strdup_printf("%s[%d]", nm, i);
+
+        object_property_add_alias(OBJECT(container), propname,
+                                  OBJECT(dev), propname);
+        g_free(propname);
+    }
+    QLIST_REMOVE(ngl, node);
+    QLIST_INSERT_HEAD(&container->gpios, ngl, node);
+}
diff --git a/hw/core/guest-loader.c b/hw/core/guest-loader.c
index bde44e27b43..d3f9d1a06eb 100644
--- a/hw/core/guest-loader.c
+++ b/hw/core/guest-loader.c
@@ -26,7 +26,6 @@
 
 #include "qemu/osdep.h"
 #include "hw/core/cpu.h"
-#include "hw/sysbus.h"
 #include "sysemu/dma.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/core/hotplug-stubs.c b/hw/core/hotplug-stubs.c
new file mode 100644
index 00000000000..7aadaa29bd5
--- /dev/null
+++ b/hw/core/hotplug-stubs.c
@@ -0,0 +1,34 @@
+/*
+ * Hotplug handler stubs
+ *
+ * Copyright (c) Red Hat
+ *
+ * Authors:
+ *  Philippe Mathieu-Daudé <philmd@redhat.com>,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+
+HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
+{
+    return NULL;
+}
+
+void hotplug_handler_pre_plug(HotplugHandler *plug_handler,
+                              DeviceState *plugged_dev,
+                              Error **errp)
+{
+    g_assert_not_reached();
+}
+
+void hotplug_handler_plug(HotplugHandler *plug_handler,
+                          DeviceState *plugged_dev,
+                          Error **errp)
+{
+    g_assert_not_reached();
+}
diff --git a/hw/core/loader.c b/hw/core/loader.c
index d3e5f3b423f..052a0fd7198 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -46,6 +46,8 @@
 #include "qemu-common.h"
 #include "qemu/datadir.h"
 #include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/type-helpers.h"
 #include "trace.h"
 #include "hw/hw.h"
 #include "disas/disas.h"
@@ -57,7 +59,6 @@
 #include "hw/loader.h"
 #include "hw/nvram/fw_cfg.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "qemu/cutils.h"
 #include "sysemu/runstate.h"
@@ -327,7 +328,7 @@ static void *load_at(int fd, off_t offset, size_t size)
 #define SZ		64
 #include "hw/elf_ops.h"
 
-const char *load_elf_strerror(int error)
+const char *load_elf_strerror(ssize_t error)
 {
     switch (error) {
     case 0:
@@ -403,12 +404,12 @@ void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp)
 }
 
 /* return < 0 if error, otherwise the number of bytes loaded in memory */
-int load_elf(const char *filename,
-             uint64_t (*elf_note_fn)(void *, void *, bool),
-             uint64_t (*translate_fn)(void *, uint64_t),
-             void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-             uint64_t *highaddr, uint32_t *pflags, int big_endian,
-             int elf_machine, int clear_lsb, int data_swab)
+ssize_t load_elf(const char *filename,
+                 uint64_t (*elf_note_fn)(void *, void *, bool),
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                 int elf_machine, int clear_lsb, int data_swab)
 {
     return load_elf_as(filename, elf_note_fn, translate_fn, translate_opaque,
                        pentry, lowaddr, highaddr, pflags, big_endian,
@@ -416,12 +417,13 @@ int load_elf(const char *filename,
 }
 
 /* return < 0 if error, otherwise the number of bytes loaded in memory */
-int load_elf_as(const char *filename,
-                uint64_t (*elf_note_fn)(void *, void *, bool),
-                uint64_t (*translate_fn)(void *, uint64_t),
-                void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                int elf_machine, int clear_lsb, int data_swab, AddressSpace *as)
+ssize_t load_elf_as(const char *filename,
+                    uint64_t (*elf_note_fn)(void *, void *, bool),
+                    uint64_t (*translate_fn)(void *, uint64_t),
+                    void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                    uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                    int elf_machine, int clear_lsb, int data_swab,
+                    AddressSpace *as)
 {
     return load_elf_ram(filename, elf_note_fn, translate_fn, translate_opaque,
                         pentry, lowaddr, highaddr, pflags, big_endian,
@@ -429,13 +431,13 @@ int load_elf_as(const char *filename,
 }
 
 /* return < 0 if error, otherwise the number of bytes loaded in memory */
-int load_elf_ram(const char *filename,
-                 uint64_t (*elf_note_fn)(void *, void *, bool),
-                 uint64_t (*translate_fn)(void *, uint64_t),
-                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                 int elf_machine, int clear_lsb, int data_swab,
-                 AddressSpace *as, bool load_rom)
+ssize_t load_elf_ram(const char *filename,
+                     uint64_t (*elf_note_fn)(void *, void *, bool),
+                     uint64_t (*translate_fn)(void *, uint64_t),
+                     void *translate_opaque, uint64_t *pentry,
+                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
+                     int big_endian, int elf_machine, int clear_lsb,
+                     int data_swab, AddressSpace *as, bool load_rom)
 {
     return load_elf_ram_sym(filename, elf_note_fn,
                             translate_fn, translate_opaque,
@@ -445,16 +447,17 @@ int load_elf_ram(const char *filename,
 }
 
 /* return < 0 if error, otherwise the number of bytes loaded in memory */
-int load_elf_ram_sym(const char *filename,
-                     uint64_t (*elf_note_fn)(void *, void *, bool),
-                     uint64_t (*translate_fn)(void *, uint64_t),
-                     void *translate_opaque, uint64_t *pentry,
-                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
-                     int big_endian, int elf_machine,
-                     int clear_lsb, int data_swab,
-                     AddressSpace *as, bool load_rom, symbol_fn_t sym_cb)
+ssize_t load_elf_ram_sym(const char *filename,
+                         uint64_t (*elf_note_fn)(void *, void *, bool),
+                         uint64_t (*translate_fn)(void *, uint64_t),
+                         void *translate_opaque, uint64_t *pentry,
+                         uint64_t *lowaddr, uint64_t *highaddr,
+                         uint32_t *pflags, int big_endian, int elf_machine,
+                         int clear_lsb, int data_swab,
+                         AddressSpace *as, bool load_rom, symbol_fn_t sym_cb)
 {
-    int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
+    int fd, data_order, target_data_order, must_swab;
+    ssize_t ret = ELF_LOAD_FAILED;
     uint8_t e_ident[EI_NIDENT];
 
     fd = open(filename, O_RDONLY | O_BINARY);
@@ -556,24 +559,35 @@ ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
 
     /* skip header */
     i = 10;
+    if (srclen < 4) {
+        goto toosmall;
+    }
     flags = src[3];
     if (src[2] != DEFLATED || (flags & RESERVED) != 0) {
         puts ("Error: Bad gzipped data\n");
         return -1;
     }
-    if ((flags & EXTRA_FIELD) != 0)
+    if ((flags & EXTRA_FIELD) != 0) {
+        if (srclen < 12) {
+            goto toosmall;
+        }
         i = 12 + src[10] + (src[11] << 8);
-    if ((flags & ORIG_NAME) != 0)
-        while (src[i++] != 0)
-            ;
-    if ((flags & COMMENT) != 0)
-        while (src[i++] != 0)
-            ;
-    if ((flags & HEAD_CRC) != 0)
+    }
+    if ((flags & ORIG_NAME) != 0) {
+        while (i < srclen && src[i++] != 0) {
+            /* do nothing */
+        }
+    }
+    if ((flags & COMMENT) != 0) {
+        while (i < srclen && src[i++] != 0) {
+            /* do nothing */
+        }
+    }
+    if ((flags & HEAD_CRC) != 0) {
         i += 2;
+    }
     if (i >= srclen) {
-        puts ("Error: gunzip out of data in header\n");
-        return -1;
+        goto toosmall;
     }
 
     s.zalloc = zalloc;
@@ -597,6 +611,10 @@ ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
     inflateEnd(&s);
 
     return dstbytes;
+
+toosmall:
+    puts("Error: gunzip out of data in header\n");
+    return -1;
 }
 
 /* Load a U-Boot image.  */
@@ -1458,32 +1476,35 @@ void *rom_ptr_for_as(AddressSpace *as, hwaddr addr, size_t size)
     return cbdata.rom;
 }
 
-void hmp_info_roms(Monitor *mon, const QDict *qdict)
+HumanReadableText *qmp_x_query_roms(Error **errp)
 {
     Rom *rom;
+    g_autoptr(GString) buf = g_string_new("");
 
     QTAILQ_FOREACH(rom, &roms, next) {
         if (rom->mr) {
-            monitor_printf(mon, "%s"
-                           " size=0x%06zx name=\"%s\"\n",
-                           memory_region_name(rom->mr),
-                           rom->romsize,
-                           rom->name);
+            g_string_append_printf(buf, "%s"
+                                   " size=0x%06zx name=\"%s\"\n",
+                                   memory_region_name(rom->mr),
+                                   rom->romsize,
+                                   rom->name);
         } else if (!rom->fw_file) {
-            monitor_printf(mon, "addr=" TARGET_FMT_plx
-                           " size=0x%06zx mem=%s name=\"%s\"\n",
-                           rom->addr, rom->romsize,
-                           rom->isrom ? "rom" : "ram",
-                           rom->name);
+            g_string_append_printf(buf, "addr=" TARGET_FMT_plx
+                                   " size=0x%06zx mem=%s name=\"%s\"\n",
+                                   rom->addr, rom->romsize,
+                                   rom->isrom ? "rom" : "ram",
+                                   rom->name);
         } else {
-            monitor_printf(mon, "fw=%s/%s"
-                           " size=0x%06zx name=\"%s\"\n",
-                           rom->fw_dir,
-                           rom->fw_file,
-                           rom->romsize,
-                           rom->name);
+            g_string_append_printf(buf, "fw=%s/%s"
+                                   " size=0x%06zx name=\"%s\"\n",
+                                   rom->fw_dir,
+                                   rom->fw_file,
+                                   rom->romsize,
+                                   rom->name);
         }
     }
+
+    return human_readable_text_from_str(buf);
 }
 
 typedef enum HexRecord HexRecord;
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 58248cffa37..4e2f319aebd 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -53,8 +53,7 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
     HotpluggableCPUList *saved = l;
     CpuInstanceProperties *c;
 
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -110,6 +109,12 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict)
                        m->value->dump ? "true" : "false");
         monitor_printf(mon, "  prealloc: %s\n",
                        m->value->prealloc ? "true" : "false");
+        monitor_printf(mon, "  share: %s\n",
+                       m->value->share ? "true" : "false");
+        if (m->value->has_reserve) {
+            monitor_printf(mon, "  reserve: %s\n",
+                           m->value->reserve ? "true" : "false");
+        }
         monitor_printf(mon, "  policy: %s\n",
                        HostMemPolicy_str(m->value->policy));
         visit_complete(v, &str);
@@ -125,38 +130,3 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict)
     qapi_free_MemdevList(memdev_list);
     hmp_handle_error(mon, err);
 }
-
-void hmp_info_numa(Monitor *mon, const QDict *qdict)
-{
-    int i, nb_numa_nodes;
-    NumaNodeMem *node_mem;
-    CpuInfoFastList *cpu_list, *cpu;
-    MachineState *ms = MACHINE(qdev_get_machine());
-
-    nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
-    monitor_printf(mon, "%d nodes\n", nb_numa_nodes);
-    if (!nb_numa_nodes) {
-        return;
-    }
-
-    cpu_list = qmp_query_cpus_fast(&error_abort);
-    node_mem = g_new0(NumaNodeMem, nb_numa_nodes);
-
-    query_numa_node_mem(node_mem, ms);
-    for (i = 0; i < nb_numa_nodes; i++) {
-        monitor_printf(mon, "node %d cpus:", i);
-        for (cpu = cpu_list; cpu; cpu = cpu->next) {
-            if (cpu->value->has_props && cpu->value->props->has_node_id &&
-                cpu->value->props->node_id == i) {
-                monitor_printf(mon, " %" PRIi64, cpu->value->cpu_index);
-            }
-        }
-        monitor_printf(mon, "\n");
-        monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i,
-                       node_mem[i].node_mem >> 20);
-        monitor_printf(mon, "node %d plugged: %" PRId64 " MB\n", i,
-                       node_mem[i].node_plugged_mem >> 20);
-    }
-    qapi_free_CpuInfoFastList(cpu_list);
-    g_free(node_mem);
-}
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index 68a942595a2..4f4ab30f8c3 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -8,7 +8,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "qapi/qapi-builtin-visit.h"
@@ -16,13 +15,13 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi/qobject-input-visitor.h"
+#include "qapi/type-helpers.h"
 #include "qemu/main-loop.h"
 #include "qom/qom-qobject.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/numa.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 
 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
 {
@@ -159,6 +158,7 @@ void qmp_set_numa_node(NumaOptions *cmd, Error **errp)
 
 static int query_memdev(Object *obj, void *opaque)
 {
+    Error *err = NULL;
     MemdevList **list = opaque;
     Memdev *m;
     QObject *host_nodes;
@@ -174,6 +174,13 @@ static int query_memdev(Object *obj, void *opaque)
         m->merge = object_property_get_bool(obj, "merge", &error_abort);
         m->dump = object_property_get_bool(obj, "dump", &error_abort);
         m->prealloc = object_property_get_bool(obj, "prealloc", &error_abort);
+        m->share = object_property_get_bool(obj, "share", &error_abort);
+        m->reserve = object_property_get_bool(obj, "reserve", &err);
+        if (err) {
+            error_free_or_abort(&err);
+        } else {
+            m->has_reserve = true;
+        }
         m->policy = object_property_get_enum(obj, "policy", "HostMemPolicy",
                                              &error_abort);
         host_nodes = object_property_get_qobject(obj,
@@ -198,3 +205,42 @@ MemdevList *qmp_query_memdev(Error **errp)
     object_child_foreach(obj, query_memdev, &list);
     return list;
 }
+
+HumanReadableText *qmp_x_query_numa(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+    int i, nb_numa_nodes;
+    NumaNodeMem *node_mem;
+    CpuInfoFastList *cpu_list, *cpu;
+    MachineState *ms = MACHINE(qdev_get_machine());
+
+    nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
+    g_string_append_printf(buf, "%d nodes\n", nb_numa_nodes);
+    if (!nb_numa_nodes) {
+        goto done;
+    }
+
+    cpu_list = qmp_query_cpus_fast(&error_abort);
+    node_mem = g_new0(NumaNodeMem, nb_numa_nodes);
+
+    query_numa_node_mem(node_mem, ms);
+    for (i = 0; i < nb_numa_nodes; i++) {
+        g_string_append_printf(buf, "node %d cpus:", i);
+        for (cpu = cpu_list; cpu; cpu = cpu->next) {
+            if (cpu->value->has_props && cpu->value->props->has_node_id &&
+                cpu->value->props->node_id == i) {
+                g_string_append_printf(buf, " %" PRIi64, cpu->value->cpu_index);
+            }
+        }
+        g_string_append_printf(buf, "\n");
+        g_string_append_printf(buf, "node %d size: %" PRId64 " MB\n", i,
+                               node_mem[i].node_mem >> 20);
+        g_string_append_printf(buf, "node %d plugged: %" PRId64 " MB\n", i,
+                               node_mem[i].node_plugged_mem >> 20);
+    }
+    qapi_free_CpuInfoFastList(cpu_list);
+    g_free(node_mem);
+
+ done:
+    return human_readable_text_from_str(buf);
+}
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
new file mode 100644
index 00000000000..116a0cbbfab
--- /dev/null
+++ b/hw/core/machine-smp.c
@@ -0,0 +1,181 @@
+/*
+ * QEMU Machine core (related to -smp parsing)
+ *
+ * Copyright (c) 2021 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+
+/*
+ * Report information of a machine's supported CPU topology hierarchy.
+ * Topology members will be ordered from the largest to the smallest
+ * in the string.
+ */
+static char *cpu_hierarchy_to_string(MachineState *ms)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    GString *s = g_string_new(NULL);
+
+    g_string_append_printf(s, "sockets (%u)", ms->smp.sockets);
+
+    if (mc->smp_props.dies_supported) {
+        g_string_append_printf(s, " * dies (%u)", ms->smp.dies);
+    }
+
+    g_string_append_printf(s, " * cores (%u)", ms->smp.cores);
+    g_string_append_printf(s, " * threads (%u)", ms->smp.threads);
+
+    return g_string_free(s, false);
+}
+
+/*
+ * smp_parse - Generic function used to parse the given SMP configuration
+ *
+ * Any missing parameter in "cpus/maxcpus/sockets/cores/threads" will be
+ * automatically computed based on the provided ones.
+ *
+ * In the calculation of omitted sockets/cores/threads: we prefer sockets
+ * over cores over threads before 6.2, while preferring cores over sockets
+ * over threads since 6.2.
+ *
+ * In the calculation of cpus/maxcpus: When both maxcpus and cpus are omitted,
+ * maxcpus will be computed from the given parameters and cpus will be set
+ * equal to maxcpus. When only one of maxcpus and cpus is given then the
+ * omitted one will be set to its given counterpart's value. Both maxcpus and
+ * cpus may be specified, but maxcpus must be equal to or greater than cpus.
+ *
+ * For compatibility, apart from the parameters that will be computed, newly
+ * introduced topology members which are likely to be target specific should
+ * be directly set as 1 if they are omitted (e.g. dies for PC since 4.1).
+ */
+void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    unsigned cpus    = config->has_cpus ? config->cpus : 0;
+    unsigned sockets = config->has_sockets ? config->sockets : 0;
+    unsigned dies    = config->has_dies ? config->dies : 0;
+    unsigned cores   = config->has_cores ? config->cores : 0;
+    unsigned threads = config->has_threads ? config->threads : 0;
+    unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
+
+    /*
+     * Specified CPU topology parameters must be greater than zero,
+     * explicit configuration like "cpus=0" is not allowed.
+     */
+    if ((config->has_cpus && config->cpus == 0) ||
+        (config->has_sockets && config->sockets == 0) ||
+        (config->has_dies && config->dies == 0) ||
+        (config->has_cores && config->cores == 0) ||
+        (config->has_threads && config->threads == 0) ||
+        (config->has_maxcpus && config->maxcpus == 0)) {
+        warn_report("Deprecated CPU topology (considered invalid): "
+                    "CPU topology parameters must be greater than zero");
+    }
+
+    /*
+     * If not supported by the machine, a topology parameter must be
+     * omitted or specified equal to 1.
+     */
+    if (!mc->smp_props.dies_supported && dies > 1) {
+        error_setg(errp, "dies not supported by this machine's CPU topology");
+        return;
+    }
+
+    dies = dies > 0 ? dies : 1;
+
+    /* compute missing values based on the provided ones */
+    if (cpus == 0 && maxcpus == 0) {
+        sockets = sockets > 0 ? sockets : 1;
+        cores = cores > 0 ? cores : 1;
+        threads = threads > 0 ? threads : 1;
+    } else {
+        maxcpus = maxcpus > 0 ? maxcpus : cpus;
+
+        if (mc->smp_props.prefer_sockets) {
+            /* prefer sockets over cores before 6.2 */
+            if (sockets == 0) {
+                cores = cores > 0 ? cores : 1;
+                threads = threads > 0 ? threads : 1;
+                sockets = maxcpus / (dies * cores * threads);
+            } else if (cores == 0) {
+                threads = threads > 0 ? threads : 1;
+                cores = maxcpus / (sockets * dies * threads);
+            }
+        } else {
+            /* prefer cores over sockets since 6.2 */
+            if (cores == 0) {
+                sockets = sockets > 0 ? sockets : 1;
+                threads = threads > 0 ? threads : 1;
+                cores = maxcpus / (sockets * dies * threads);
+            } else if (sockets == 0) {
+                threads = threads > 0 ? threads : 1;
+                sockets = maxcpus / (dies * cores * threads);
+            }
+        }
+
+        /* try to calculate omitted threads at last */
+        if (threads == 0) {
+            threads = maxcpus / (sockets * dies * cores);
+        }
+    }
+
+    maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads;
+    cpus = cpus > 0 ? cpus : maxcpus;
+
+    ms->smp.cpus = cpus;
+    ms->smp.sockets = sockets;
+    ms->smp.dies = dies;
+    ms->smp.cores = cores;
+    ms->smp.threads = threads;
+    ms->smp.max_cpus = maxcpus;
+
+    /* sanity-check of the computed topology */
+    if (sockets * dies * cores * threads != maxcpus) {
+        g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
+        error_setg(errp, "Invalid CPU topology: "
+                   "product of the hierarchy must match maxcpus: "
+                   "%s != maxcpus (%u)",
+                   topo_msg, maxcpus);
+        return;
+    }
+
+    if (maxcpus < cpus) {
+        g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
+        error_setg(errp, "Invalid CPU topology: "
+                   "maxcpus must be equal to or greater than smp: "
+                   "%s == maxcpus (%u) < smp_cpus (%u)",
+                   topo_msg, maxcpus, cpus);
+        return;
+    }
+
+    if (ms->smp.cpus < mc->min_cpus) {
+        error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
+                   "supported by machine '%s' is %d",
+                   ms->smp.cpus,
+                   mc->name, mc->min_cpus);
+        return;
+    }
+
+    if (ms->smp.max_cpus > mc->max_cpus) {
+        error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
+                   "supported by machine '%s' is %d",
+                   ms->smp.max_cpus,
+                   mc->name, mc->max_cpus);
+        return;
+    }
+}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 40def78183a..53a99abc560 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -19,6 +19,7 @@
 #include "hw/loader.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-common.h"
+#include "qapi/qapi-visit-machine.h"
 #include "qapi/visitor.h"
 #include "hw/sysbus.h"
 #include "sysemu/cpus.h"
@@ -36,11 +37,27 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
 
+GlobalProperty hw_compat_6_1[] = {
+    { "vhost-user-vsock-device", "seqpacket", "off" },
+    { "nvme-ns", "shared", "off" },
+};
+const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1);
+
+GlobalProperty hw_compat_6_0[] = {
+    { "gpex-pcihost", "allow-unmapped-accesses", "false" },
+    { "i8042", "extended-state", "false"},
+    { "nvme-ns", "eui64-default", "off"},
+    { "e1000", "init-vet", "off" },
+    { "e1000e", "init-vet", "off" },
+    { "vhost-vsock-device", "seqpacket", "off" },
+};
+const size_t hw_compat_6_0_len = G_N_ELEMENTS(hw_compat_6_0);
+
 GlobalProperty hw_compat_5_2[] = {
     { "ICH9-LPC", "smm-compat", "on"},
     { "PIIX4_PM", "smm-compat", "on"},
     { "virtio-blk-device", "report-discard-granularity", "off" },
-    { "virtio-net-pci", "vectors", "3"},
+    { "virtio-net-pci-base", "vectors", "3"},
 };
 const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
 
@@ -532,35 +549,30 @@ void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type)
 
 bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev)
 {
-    bool allowed = false;
-    strList *wl;
     Object *obj = OBJECT(dev);
 
     if (!object_dynamic_cast(obj, TYPE_SYS_BUS_DEVICE)) {
         return false;
     }
 
+    return device_type_is_dynamic_sysbus(mc, object_get_typename(obj));
+}
+
+bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type)
+{
+    bool allowed = false;
+    strList *wl;
+    ObjectClass *klass = object_class_by_name(type);
+
     for (wl = mc->allowed_dynamic_sysbus_devices;
          !allowed && wl;
          wl = wl->next) {
-        allowed |= !!object_dynamic_cast(obj, wl->value);
+        allowed |= !!object_class_dynamic_cast(klass, wl->value);
     }
 
     return allowed;
 }
 
-static void validate_sysbus_device(SysBusDevice *sbdev, void *opaque)
-{
-    MachineState *machine = opaque;
-    MachineClass *mc = MACHINE_GET_CLASS(machine);
-
-    if (!device_is_dynamic_sysbus(mc, DEVICE(sbdev))) {
-        error_report("Option '-device %s' cannot be handled by this machine",
-                     object_class_get_name(object_get_class(OBJECT(sbdev))));
-        exit(1);
-    }
-}
-
 static char *machine_get_memdev(Object *obj, Error **errp)
 {
     MachineState *ms = MACHINE(obj);
@@ -576,18 +588,6 @@ static void machine_set_memdev(Object *obj, const char *value, Error **errp)
     ms->ram_memdev_id = g_strdup(value);
 }
 
-
-static void machine_init_notify(Notifier *notifier, void *data)
-{
-    MachineState *machine = MACHINE(qdev_get_machine());
-
-    /*
-     * Loop through all dynamically created sysbus devices and check if they are
-     * all allowed.  If a device is not allowed, error out.
-     */
-    foreach_dynamic_sysbus_device(validate_sysbus_device, machine);
-}
-
 HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
 {
     int i;
@@ -720,7 +720,8 @@ void machine_set_cpu_numa_node(MachineState *machine,
             if ((numa_info[props->node_id].initiator < MAX_NODES) &&
                 (props->node_id != numa_info[props->node_id].initiator)) {
                 error_setg(errp, "The initiator of CPU NUMA node %" PRId64
-                        " should be itself", props->node_id);
+                           " should be itself (got %" PRIu16 ")",
+                           props->node_id, numa_info[props->node_id].initiator);
                 return;
             }
             numa_info[props->node_id].has_cpu = true;
@@ -733,69 +734,34 @@ void machine_set_cpu_numa_node(MachineState *machine,
     }
 }
 
-static void smp_parse(MachineState *ms, QemuOpts *opts)
+static void machine_get_smp(Object *obj, Visitor *v, const char *name,
+                            void *opaque, Error **errp)
 {
-    if (opts) {
-        unsigned cpus    = qemu_opt_get_number(opts, "cpus", 0);
-        unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
-        unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
-        unsigned threads = qemu_opt_get_number(opts, "threads", 0);
-
-        /* compute missing values, prefer sockets over cores over threads */
-        if (cpus == 0 || sockets == 0) {
-            cores = cores > 0 ? cores : 1;
-            threads = threads > 0 ? threads : 1;
-            if (cpus == 0) {
-                sockets = sockets > 0 ? sockets : 1;
-                cpus = cores * threads * sockets;
-            } else {
-                ms->smp.max_cpus =
-                        qemu_opt_get_number(opts, "maxcpus", cpus);
-                sockets = ms->smp.max_cpus / (cores * threads);
-            }
-        } else if (cores == 0) {
-            threads = threads > 0 ? threads : 1;
-            cores = cpus / (sockets * threads);
-            cores = cores > 0 ? cores : 1;
-        } else if (threads == 0) {
-            threads = cpus / (cores * sockets);
-            threads = threads > 0 ? threads : 1;
-        } else if (sockets * cores * threads < cpus) {
-            error_report("cpu topology: "
-                         "sockets (%u) * cores (%u) * threads (%u) < "
-                         "smp_cpus (%u)",
-                         sockets, cores, threads, cpus);
-            exit(1);
-        }
-
-        ms->smp.max_cpus =
-                qemu_opt_get_number(opts, "maxcpus", cpus);
-
-        if (ms->smp.max_cpus < cpus) {
-            error_report("maxcpus must be equal to or greater than smp");
-            exit(1);
-        }
+    MachineState *ms = MACHINE(obj);
+    SMPConfiguration *config = &(SMPConfiguration){
+        .has_cpus = true, .cpus = ms->smp.cpus,
+        .has_sockets = true, .sockets = ms->smp.sockets,
+        .has_dies = true, .dies = ms->smp.dies,
+        .has_cores = true, .cores = ms->smp.cores,
+        .has_threads = true, .threads = ms->smp.threads,
+        .has_maxcpus = true, .maxcpus = ms->smp.max_cpus,
+    };
+    if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) {
+        return;
+    }
+}
 
-        if (sockets * cores * threads != ms->smp.max_cpus) {
-            error_report("Invalid CPU topology: "
-                         "sockets (%u) * cores (%u) * threads (%u) "
-                         "!= maxcpus (%u)",
-                         sockets, cores, threads,
-                         ms->smp.max_cpus);
-            exit(1);
-        }
+static void machine_set_smp(Object *obj, Visitor *v, const char *name,
+                            void *opaque, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+    g_autoptr(SMPConfiguration) config = NULL;
 
-        ms->smp.cpus = cpus;
-        ms->smp.cores = cores;
-        ms->smp.threads = threads;
-        ms->smp.sockets = sockets;
+    if (!visit_type_SMPConfiguration(v, name, &config, errp)) {
+        return;
     }
 
-    if (ms->smp.cpus > 1) {
-        Error *blocker = NULL;
-        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
-        replay_add_blocker(blocker);
-    }
+    smp_parse(ms, config, errp);
 }
 
 static void machine_class_init(ObjectClass *oc, void *data)
@@ -805,7 +771,6 @@ static void machine_class_init(ObjectClass *oc, void *data)
     /* Default 128 MB as guest ram size */
     mc->default_ram_size = 128 * MiB;
     mc->rom_file_has_mr = true;
-    mc->smp_parse = smp_parse;
 
     /* numa node memory size aligned on 8MB by default.
      * On Linux, each node's border has to be 8MB aligned
@@ -837,6 +802,12 @@ static void machine_class_init(ObjectClass *oc, void *data)
     object_class_property_set_description(oc, "dumpdtb",
         "Dump current dtb to a file and quit");
 
+    object_class_property_add(oc, "smp", "SMPConfiguration",
+        machine_get_smp, machine_set_smp,
+        NULL, NULL);
+    object_class_property_set_description(oc, "smp",
+        "CPU topology");
+
     object_class_property_add(oc, "phandle-start", "int",
         machine_get_phandle_start, machine_set_phandle_start,
         NULL, NULL);
@@ -956,16 +927,13 @@ static void machine_initfn(Object *obj)
                                         "Table (HMAT)");
     }
 
-    /* Register notifier when init is done for sysbus sanity checks */
-    ms->sysbus_notifier.notify = machine_init_notify;
-    qemu_add_machine_init_done_notifier(&ms->sysbus_notifier);
-
     /* default to mc->default_cpus */
     ms->smp.cpus = mc->default_cpus;
     ms->smp.max_cpus = mc->default_cpus;
+    ms->smp.sockets = 1;
+    ms->smp.dies = 1;
     ms->smp.cores = 1;
     ms->smp.threads = 1;
-    ms->smp.sockets = 1;
 }
 
 static void machine_finalize(Object *obj)
@@ -1011,6 +979,9 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
         g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id);
     }
     if (cpu->props.has_die_id) {
+        if (s->len) {
+            g_string_append_printf(s, ", ");
+        }
         g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id);
     }
     if (cpu->props.has_core_id) {
@@ -1124,29 +1095,6 @@ MemoryRegion *machine_consume_memdev(MachineState *machine,
     return ret;
 }
 
-bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp)
-{
-    MachineClass *mc = MACHINE_GET_CLASS(ms);
-
-    mc->smp_parse(ms, opts);
-
-    /* sanity-check smp_cpus and max_cpus against mc */
-    if (ms->smp.cpus < mc->min_cpus) {
-        error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
-                   "supported by machine '%s' is %d",
-                   ms->smp.cpus,
-                   mc->name, mc->min_cpus);
-        return false;
-    } else if (ms->smp.max_cpus > mc->max_cpus) {
-        error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
-                   "supported by machine '%s' is %d",
-                   current_machine->smp.max_cpus,
-                   mc->name, mc->max_cpus);
-        return false;
-    }
-    return true;
-}
-
 void machine_run_board_init(MachineState *machine)
 {
     MachineClass *machine_class = MACHINE_GET_CLASS(machine);
@@ -1229,6 +1177,7 @@ void machine_run_board_init(MachineState *machine)
                                    "on", false);
     }
 
+    accel_init_interfaces(ACCEL_GET_CLASS(machine->accelerator));
     machine_class->init(machine);
     phase_advance(PHASE_MACHINE_INITIALIZED);
 }
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 59f1605bb07..0f884d6fd4d 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -1,7 +1,6 @@
 # core qdev-related obj files, also used by *-user and unit tests
-hwcore_files = files(
+hwcore_ss.add(files(
   'bus.c',
-  'hotplug.c',
   'qdev-properties.c',
   'qdev.c',
   'reset.c',
@@ -11,21 +10,34 @@ hwcore_files = files(
   'irq.c',
   'clock.c',
   'qdev-clock.c',
-)
+))
+if have_system
+  hwcore_ss.add(files(
+    'hotplug.c',
+    'qdev-hotplug.c',
+  ))
+else
+  hwcore_ss.add(files(
+    'hotplug-stubs.c',
+  ))
+endif
 
-common_ss.add(files('cpu.c'))
-common_ss.add(when: 'CONFIG_FITLOADER', if_true: files('loader-fit.c'))
-common_ss.add(when: 'CONFIG_GENERIC_LOADER', if_true: files('generic-loader.c'))
-common_ss.add(when: ['CONFIG_GUEST_LOADER', fdt], if_true: files('guest-loader.c'))
-common_ss.add(when: 'CONFIG_OR_IRQ', if_true: files('or-irq.c'))
-common_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('platform-bus.c'))
-common_ss.add(when: 'CONFIG_PTIMER', if_true: files('ptimer.c'))
-common_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c'))
-common_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c'))
-common_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
+common_ss.add(files('cpu-common.c'))
+common_ss.add(files('machine-smp.c'))
+softmmu_ss.add(when: 'CONFIG_FITLOADER', if_true: files('loader-fit.c'))
+softmmu_ss.add(when: 'CONFIG_GENERIC_LOADER', if_true: files('generic-loader.c'))
+softmmu_ss.add(when: ['CONFIG_GUEST_LOADER', fdt], if_true: files('guest-loader.c'))
+softmmu_ss.add(when: 'CONFIG_OR_IRQ', if_true: files('or-irq.c'))
+softmmu_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('platform-bus.c'))
+softmmu_ss.add(when: 'CONFIG_PTIMER', if_true: files('ptimer.c'))
+softmmu_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c'))
+softmmu_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c'))
+softmmu_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
 
 softmmu_ss.add(files(
+  'cpu-sysemu.c',
   'fw-path-provider.c',
+  'gpio.c',
   'loader.c',
   'machine-hmp-cmds.c',
   'machine.c',
diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
index 7e693523d75..f586a4bef54 100644
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "hw/boards.h"
-#include "sysemu/sysemu.h"
 #include "exec/address-spaces.h"
 #include "hw/core/cpu.h"
 
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 68cee65f614..e6050b22739 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -26,7 +26,6 @@
 #include "qemu/units.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/numa.h"
-#include "sysemu/sysemu.h"
 #include "exec/cpu-common.h"
 #include "exec/ramlist.h"
 #include "qemu/bitmap.h"
@@ -89,6 +88,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
         return;
     }
 
+    /*
+     * If not set the initiator, set it to MAX_NODES. And if
+     * HMAT is enabled and this node has no cpus, QEMU will raise error.
+     */
+    numa_info[nodenr].initiator = MAX_NODES;
+    if (node->has_initiator) {
+        if (!ms->numa_state->hmat_enabled) {
+            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
+                       "(HMAT) is disabled, enable it with -machine hmat=on "
+                       "before using any of hmat specific options");
+            return;
+        }
+
+        if (node->initiator >= MAX_NODES) {
+            error_report("The initiator id %" PRIu16 " expects an integer "
+                         "between 0 and %d", node->initiator,
+                         MAX_NODES - 1);
+            return;
+        }
+
+        numa_info[nodenr].initiator = node->initiator;
+    }
+
     for (cpus = node->cpus; cpus; cpus = cpus->next) {
         CpuInstanceProperties props;
         if (cpus->value >= max_cpus) {
@@ -143,28 +165,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node,
         numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
     }
 
-    /*
-     * If not set the initiator, set it to MAX_NODES. And if
-     * HMAT is enabled and this node has no cpus, QEMU will raise error.
-     */
-    numa_info[nodenr].initiator = MAX_NODES;
-    if (node->has_initiator) {
-        if (!ms->numa_state->hmat_enabled) {
-            error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
-                       "(HMAT) is disabled, enable it with -machine hmat=on "
-                       "before using any of hmat specific options");
-            return;
-        }
-
-        if (node->initiator >= MAX_NODES) {
-            error_report("The initiator id %" PRIu16 " expects an integer "
-                         "between 0 and %d", node->initiator,
-                         MAX_NODES - 1);
-            return;
-        }
-
-        numa_info[nodenr].initiator = node->initiator;
-    }
     numa_info[nodenr].present = true;
     max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1);
     ms->numa_state->num_nodes++;
@@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
     PCDIMMDeviceInfo     *pcdimm_info;
     VirtioPMEMDeviceInfo *vpi;
     VirtioMEMDeviceInfo *vmi;
+    SgxEPCDeviceInfo *se;
 
     for (info = info_list; info; info = info->next) {
         MemoryDeviceInfo *value = info->value;
@@ -781,6 +782,12 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
                 node_mem[vmi->node].node_mem += vmi->size;
                 node_mem[vmi->node].node_plugged_mem += vmi->size;
                 break;
+            case MEMORY_DEVICE_INFO_KIND_SGX_EPC:
+                se = value->u.sgx_epc.data;
+                /* TODO: once we support numa, assign to right node */
+                node_mem[0].node_mem += se->size;
+                node_mem[0].node_plugged_mem += se->size;
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -803,9 +810,27 @@ void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms)
     }
 }
 
+static int ram_block_notify_add_single(RAMBlock *rb, void *opaque)
+{
+    const ram_addr_t max_size = qemu_ram_get_max_length(rb);
+    const ram_addr_t size = qemu_ram_get_used_length(rb);
+    void *host = qemu_ram_get_host_addr(rb);
+    RAMBlockNotifier *notifier = opaque;
+
+    if (host) {
+        notifier->ram_block_added(notifier, host, size, max_size);
+    }
+    return 0;
+}
+
 void ram_block_notifier_add(RAMBlockNotifier *n)
 {
     QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
+
+    /* Notify about all existing ram blocks. */
+    if (n->ram_block_added) {
+        qemu_ram_foreach_block(ram_block_notify_add_single, n);
+    }
 }
 
 void ram_block_notifier_remove(RAMBlockNotifier *n)
@@ -813,20 +838,35 @@ void ram_block_notifier_remove(RAMBlockNotifier *n)
     QLIST_REMOVE(n, next);
 }
 
-void ram_block_notify_add(void *host, size_t size)
+void ram_block_notify_add(void *host, size_t size, size_t max_size)
+{
+    RAMBlockNotifier *notifier;
+
+    QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
+        if (notifier->ram_block_added) {
+            notifier->ram_block_added(notifier, host, size, max_size);
+        }
+    }
+}
+
+void ram_block_notify_remove(void *host, size_t size, size_t max_size)
 {
     RAMBlockNotifier *notifier;
 
     QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
-        notifier->ram_block_added(notifier, host, size);
+        if (notifier->ram_block_removed) {
+            notifier->ram_block_removed(notifier, host, size, max_size);
+        }
     }
 }
 
-void ram_block_notify_remove(void *host, size_t size)
+void ram_block_notify_resize(void *host, size_t old_size, size_t new_size)
 {
     RAMBlockNotifier *notifier;
 
     QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
-        notifier->ram_block_removed(notifier, host, size);
+        if (notifier->ram_block_resized) {
+            notifier->ram_block_resized(notifier, host, old_size, new_size);
+        }
     }
 }
diff --git a/hw/core/qdev-hotplug.c b/hw/core/qdev-hotplug.c
new file mode 100644
index 00000000000..d495d0e9c70
--- /dev/null
+++ b/hw/core/qdev-hotplug.c
@@ -0,0 +1,73 @@
+/*
+ * QDev Hotplug handlers
+ *
+ * Copyright (c) Red Hat
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-core.h"
+#include "hw/boards.h"
+
+HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
+{
+    MachineState *machine;
+    MachineClass *mc;
+    Object *m_obj = qdev_get_machine();
+
+    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+        machine = MACHINE(m_obj);
+        mc = MACHINE_GET_CLASS(machine);
+        if (mc->get_hotplug_handler) {
+            return mc->get_hotplug_handler(machine, dev);
+        }
+    }
+
+    return NULL;
+}
+
+bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
+{
+    MachineState *machine;
+    MachineClass *mc;
+    Object *m_obj = qdev_get_machine();
+
+    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
+        machine = MACHINE(m_obj);
+        mc = MACHINE_GET_CLASS(machine);
+        if (mc->hotplug_allowed) {
+            return mc->hotplug_allowed(machine, dev, errp);
+        }
+    }
+
+    return true;
+}
+
+HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
+{
+    if (dev->parent_bus) {
+        return dev->parent_bus->hotplug_handler;
+    }
+    return NULL;
+}
+
+HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_machine_hotplug_handler(dev);
+
+    if (hotplug_ctrl == NULL && dev->parent_bus) {
+        hotplug_ctrl = qdev_get_bus_hotplug_handler(dev);
+    }
+    return hotplug_ctrl;
+}
+
+/* can be used as ->unplug() callback for the simple cases */
+void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
+                                  DeviceState *dev, Error **errp)
+{
+    qdev_unrealize(dev);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 2760c21f111..a91f60567aa 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -36,11 +36,11 @@
 
 static bool check_prop_still_unset(Object *obj, const char *name,
                                    const void *old_val, const char *new_val,
-                                   Error **errp)
+                                   bool allow_override, Error **errp)
 {
     const GlobalProperty *prop = qdev_find_global_prop(obj, name);
 
-    if (!old_val) {
+    if (!old_val || (!prop && allow_override)) {
         return true;
     }
 
@@ -93,16 +93,34 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
     BlockBackend *blk;
     bool blk_created = false;
     int ret;
+    BlockDriverState *bs;
+    AioContext *ctx;
 
     if (!visit_type_str(v, name, &str, errp)) {
         return;
     }
 
-    /*
-     * TODO Should this really be an error?  If no, the old value
-     * needs to be released before we store the new one.
-     */
-    if (!check_prop_still_unset(obj, name, *ptr, str, errp)) {
+    if (!check_prop_still_unset(obj, name, *ptr, str, true, errp)) {
+        return;
+    }
+
+    if (*ptr) {
+        /* BlockBackend alread exists. So, we want to change attached node */
+        blk = *ptr;
+        ctx = blk_get_aio_context(blk);
+        bs = bdrv_lookup_bs(NULL, str, errp);
+        if (!bs) {
+            return;
+        }
+
+        if (ctx != bdrv_get_aio_context(bs)) {
+            error_setg(errp, "Different aio context is not supported for new "
+                       "node");
+        }
+
+        aio_context_acquire(ctx);
+        blk_replace_bs(blk, bs, errp);
+        aio_context_release(ctx);
         return;
     }
 
@@ -114,7 +132,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
 
     blk = blk_by_name(str);
     if (!blk) {
-        BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
+        bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
             /*
              * If the device supports iothreads, it will make sure to move the
@@ -123,8 +141,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
              * aware of iothreads require their BlockBackends to be in the main
              * AioContext.
              */
-            AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
-                                         qemu_get_aio_context();
+            ctx = iothread ? bdrv_get_aio_context(bs) : qemu_get_aio_context();
             blk = blk_new(ctx, 0, BLK_PERM_ALL);
             blk_created = true;
 
@@ -196,6 +213,7 @@ static void release_drive(Object *obj, const char *name, void *opaque)
 const PropertyInfo qdev_prop_drive = {
     .name  = "str",
     .description = "Node name or ID of a block device to use as a backend",
+    .realized_set_allowed = true,
     .get   = get_drive,
     .set   = set_drive,
     .release = release_drive,
@@ -204,6 +222,7 @@ const PropertyInfo qdev_prop_drive = {
 const PropertyInfo qdev_prop_drive_iothread = {
     .name  = "str",
     .description = "Node name or ID of a block device to use as a backend",
+    .realized_set_allowed = true,
     .get   = get_drive,
     .set   = set_drive_iothread,
     .release = release_drive,
@@ -238,7 +257,7 @@ static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque,
      * TODO Should this really be an error?  If no, the old value
      * needs to be released before we store the new one.
      */
-    if (!check_prop_still_unset(obj, name, be->chr, str, errp)) {
+    if (!check_prop_still_unset(obj, name, be->chr, str, false, errp)) {
         return;
     }
 
@@ -408,10 +427,16 @@ static void set_netdev(Object *obj, Visitor *v, const char *name,
          * TODO Should this really be an error?  If no, the old value
          * needs to be released before we store the new one.
          */
-        if (!check_prop_still_unset(obj, name, ncs[i], str, errp)) {
+        if (!check_prop_still_unset(obj, name, ncs[i], str, false, errp)) {
             goto out;
         }
 
+        if (peers[i]->info->check_peer_type) {
+            if (!peers[i]->info->check_peer_type(peers[i], obj->class, errp)) {
+                goto out;
+            }
+        }
+
         ncs[i] = peers[i];
         ncs[i]->queue_index = i;
     }
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 50f40949f52..c34aac6ebc9 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -26,11 +26,11 @@ void qdev_prop_set_after_realize(DeviceState *dev, const char *name,
 
 /* returns: true if property is allowed to be set, false otherwise */
 static bool qdev_prop_allow_set(Object *obj, const char *name,
-                                Error **errp)
+                                const PropertyInfo *info, Error **errp)
 {
     DeviceState *dev = DEVICE(obj);
 
-    if (dev->realized) {
+    if (dev->realized && !info->realized_set_allowed) {
         qdev_prop_set_after_realize(dev, name, errp);
         return false;
     }
@@ -79,7 +79,7 @@ static void field_prop_set(Object *obj, Visitor *v, const char *name,
 {
     Property *prop = opaque;
 
-    if (!qdev_prop_allow_set(obj, name, errp)) {
+    if (!qdev_prop_allow_set(obj, name, prop->info, errp)) {
         return;
     }
 
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index cefc5eaa0a9..84f3019440f 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -28,11 +28,11 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-qdev.h"
+#include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "qemu/option.h"
-#include "hw/hotplug.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/boards.h"
@@ -211,14 +211,17 @@ void device_listener_unregister(DeviceListener *listener)
     QTAILQ_REMOVE(&device_listeners, listener, link);
 }
 
-bool qdev_should_hide_device(QemuOpts *opts)
+bool qdev_should_hide_device(const QDict *opts, bool from_json, Error **errp)
 {
+    ERRP_GUARD();
     DeviceListener *listener;
 
     QTAILQ_FOREACH(listener, &device_listeners, link) {
         if (listener->hide_device) {
-            if (listener->hide_device(listener, opts)) {
+            if (listener->hide_device(listener, opts, from_json, errp)) {
                 return true;
+            } else if (*errp) {
+                return false;
             }
         }
     }
@@ -234,58 +237,6 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
     dev->alias_required_for_version = required_for_version;
 }
 
-HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
-{
-    MachineState *machine;
-    MachineClass *mc;
-    Object *m_obj = qdev_get_machine();
-
-    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
-        machine = MACHINE(m_obj);
-        mc = MACHINE_GET_CLASS(machine);
-        if (mc->get_hotplug_handler) {
-            return mc->get_hotplug_handler(machine, dev);
-        }
-    }
-
-    return NULL;
-}
-
-bool qdev_hotplug_allowed(DeviceState *dev, Error **errp)
-{
-    MachineState *machine;
-    MachineClass *mc;
-    Object *m_obj = qdev_get_machine();
-
-    if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
-        machine = MACHINE(m_obj);
-        mc = MACHINE_GET_CLASS(machine);
-        if (mc->hotplug_allowed) {
-            return mc->hotplug_allowed(machine, dev, errp);
-        }
-    }
-
-    return true;
-}
-
-HotplugHandler *qdev_get_bus_hotplug_handler(DeviceState *dev)
-{
-    if (dev->parent_bus) {
-        return dev->parent_bus->hotplug_handler;
-    }
-    return NULL;
-}
-
-HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
-{
-    HotplugHandler *hotplug_ctrl = qdev_get_machine_hotplug_handler(dev);
-
-    if (hotplug_ctrl == NULL && dev->parent_bus) {
-        hotplug_ctrl = qdev_get_bus_hotplug_handler(dev);
-    }
-    return hotplug_ctrl;
-}
-
 static int qdev_prereset(DeviceState *dev, void *opaque)
 {
     trace_qdev_reset_tree(dev, object_get_typename(OBJECT(dev)));
@@ -367,13 +318,6 @@ static void device_reset_child_foreach(Object *obj, ResettableChildCallback cb,
     }
 }
 
-/* can be used as ->unplug() callback for the simple cases */
-void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
-                                  DeviceState *dev, Error **errp)
-{
-    qdev_unrealize(dev);
-}
-
 bool qdev_realize(DeviceState *dev, BusState *bus, Error **errp)
 {
     assert(!dev->realized && !dev->parent_bus);
@@ -432,180 +376,6 @@ BusState *qdev_get_parent_bus(DeviceState *dev)
     return dev->parent_bus;
 }
 
-static NamedGPIOList *qdev_get_named_gpio_list(DeviceState *dev,
-                                               const char *name)
-{
-    NamedGPIOList *ngl;
-
-    QLIST_FOREACH(ngl, &dev->gpios, node) {
-        /* NULL is a valid and matchable name. */
-        if (g_strcmp0(name, ngl->name) == 0) {
-            return ngl;
-        }
-    }
-
-    ngl = g_malloc0(sizeof(*ngl));
-    ngl->name = g_strdup(name);
-    QLIST_INSERT_HEAD(&dev->gpios, ngl, node);
-    return ngl;
-}
-
-void qdev_init_gpio_in_named_with_opaque(DeviceState *dev,
-                                         qemu_irq_handler handler,
-                                         void *opaque,
-                                         const char *name, int n)
-{
-    int i;
-    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
-
-    assert(gpio_list->num_out == 0 || !name);
-    gpio_list->in = qemu_extend_irqs(gpio_list->in, gpio_list->num_in, handler,
-                                     opaque, n);
-
-    if (!name) {
-        name = "unnamed-gpio-in";
-    }
-    for (i = gpio_list->num_in; i < gpio_list->num_in + n; i++) {
-        gchar *propname = g_strdup_printf("%s[%u]", name, i);
-
-        object_property_add_child(OBJECT(dev), propname,
-                                  OBJECT(gpio_list->in[i]));
-        g_free(propname);
-    }
-
-    gpio_list->num_in += n;
-}
-
-void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n)
-{
-    qdev_init_gpio_in_named(dev, handler, NULL, n);
-}
-
-void qdev_init_gpio_out_named(DeviceState *dev, qemu_irq *pins,
-                              const char *name, int n)
-{
-    int i;
-    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
-
-    assert(gpio_list->num_in == 0 || !name);
-
-    if (!name) {
-        name = "unnamed-gpio-out";
-    }
-    memset(pins, 0, sizeof(*pins) * n);
-    for (i = 0; i < n; ++i) {
-        gchar *propname = g_strdup_printf("%s[%u]", name,
-                                          gpio_list->num_out + i);
-
-        object_property_add_link(OBJECT(dev), propname, TYPE_IRQ,
-                                 (Object **)&pins[i],
-                                 object_property_allow_set_link,
-                                 OBJ_PROP_LINK_STRONG);
-        g_free(propname);
-    }
-    gpio_list->num_out += n;
-}
-
-void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n)
-{
-    qdev_init_gpio_out_named(dev, pins, NULL, n);
-}
-
-qemu_irq qdev_get_gpio_in_named(DeviceState *dev, const char *name, int n)
-{
-    NamedGPIOList *gpio_list = qdev_get_named_gpio_list(dev, name);
-
-    assert(n >= 0 && n < gpio_list->num_in);
-    return gpio_list->in[n];
-}
-
-qemu_irq qdev_get_gpio_in(DeviceState *dev, int n)
-{
-    return qdev_get_gpio_in_named(dev, NULL, n);
-}
-
-void qdev_connect_gpio_out_named(DeviceState *dev, const char *name, int n,
-                                 qemu_irq pin)
-{
-    char *propname = g_strdup_printf("%s[%d]",
-                                     name ? name : "unnamed-gpio-out", n);
-    if (pin && !OBJECT(pin)->parent) {
-        /* We need a name for object_property_set_link to work */
-        object_property_add_child(container_get(qdev_get_machine(),
-                                                "/unattached"),
-                                  "non-qdev-gpio[*]", OBJECT(pin));
-    }
-    object_property_set_link(OBJECT(dev), propname, OBJECT(pin), &error_abort);
-    g_free(propname);
-}
-
-qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n)
-{
-    g_autofree char *propname = g_strdup_printf("%s[%d]",
-                                     name ? name : "unnamed-gpio-out", n);
-
-    qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname,
-                                                      NULL);
-
-    return ret;
-}
-
-/* disconnect a GPIO output, returning the disconnected input (if any) */
-
-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev,
-                                               const char *name, int n)
-{
-    char *propname = g_strdup_printf("%s[%d]",
-                                     name ? name : "unnamed-gpio-out", n);
-
-    qemu_irq ret = (qemu_irq)object_property_get_link(OBJECT(dev), propname,
-                                                      NULL);
-    if (ret) {
-        object_property_set_link(OBJECT(dev), propname, NULL, NULL);
-    }
-    g_free(propname);
-    return ret;
-}
-
-qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt,
-                                 const char *name, int n)
-{
-    qemu_irq disconnected = qdev_disconnect_gpio_out_named(dev, name, n);
-    qdev_connect_gpio_out_named(dev, name, n, icpt);
-    return disconnected;
-}
-
-void qdev_connect_gpio_out(DeviceState * dev, int n, qemu_irq pin)
-{
-    qdev_connect_gpio_out_named(dev, NULL, n, pin);
-}
-
-void qdev_pass_gpios(DeviceState *dev, DeviceState *container,
-                     const char *name)
-{
-    int i;
-    NamedGPIOList *ngl = qdev_get_named_gpio_list(dev, name);
-
-    for (i = 0; i < ngl->num_in; i++) {
-        const char *nm = ngl->name ? ngl->name : "unnamed-gpio-in";
-        char *propname = g_strdup_printf("%s[%d]", nm, i);
-
-        object_property_add_alias(OBJECT(container), propname,
-                                  OBJECT(dev), propname);
-        g_free(propname);
-    }
-    for (i = 0; i < ngl->num_out; i++) {
-        const char *nm = ngl->name ? ngl->name : "unnamed-gpio-out";
-        char *propname = g_strdup_printf("%s[%d]", nm, i);
-
-        object_property_add_alias(OBJECT(container), propname,
-                                  OBJECT(dev), propname);
-        g_free(propname);
-    }
-    QLIST_REMOVE(ngl, node);
-    QLIST_INSERT_HEAD(&container->gpios, ngl, node);
-}
-
 BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
 {
     BusState *bus;
@@ -955,7 +725,8 @@ static void device_finalize(Object *obj)
         dev->canonical_path = NULL;
     }
 
-    qemu_opts_del(dev->opts);
+    qobject_unref(dev->opts);
+    g_free(dev->id);
 }
 
 static void device_class_base_init(ObjectClass *class, void *data)
diff --git a/hw/core/register.c b/hw/core/register.c
index d6f8c208161..95b0150c0aa 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -300,6 +300,18 @@ RegisterInfoArray *register_init_block32(DeviceState *owner,
                                data, ops, debug_enabled, memory_size, 32);
 }
 
+RegisterInfoArray *register_init_block64(DeviceState *owner,
+                                         const RegisterAccessInfo *rae,
+                                         int num, RegisterInfo *ri,
+                                         uint64_t *data,
+                                         const MemoryRegionOps *ops,
+                                         bool debug_enabled,
+                                         uint64_t memory_size)
+{
+    return register_init_block(owner, rae, num, ri, (void *)
+                               data, ops, debug_enabled, memory_size, 64);
+}
+
 void register_finalize_block(RegisterInfoArray *r_array)
 {
     object_unparent(OBJECT(&r_array->mem));
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index aaae8e23cc4..05c1da3d311 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -340,11 +340,13 @@ static BusState *main_system_bus;
 
 static void main_system_bus_create(void)
 {
-    /* assign main_system_bus before qbus_create_inplace()
-     * in order to make "if (bus != sysbus_get_default())" work */
+    /*
+     * assign main_system_bus before qbus_init()
+     * in order to make "if (bus != sysbus_get_default())" work
+     */
     main_system_bus = g_malloc0(system_bus_info.instance_size);
-    qbus_create_inplace(main_system_bus, system_bus_info.instance_size,
-                        TYPE_SYSTEM_BUS, NULL, "main-system-bus");
+    qbus_init(main_system_bus, system_bus_info.instance_size,
+              TYPE_SYSTEM_BUS, NULL, "main-system-bus");
     OBJECT(main_system_bus)->free = g_free;
 }
 
diff --git a/hw/core/trace-events b/hw/core/trace-events
index 360ddeb2c87..9b3ecce3b2f 100644
--- a/hw/core/trace-events
+++ b/hw/core/trace-events
@@ -34,3 +34,4 @@ clock_disconnect(const char *clk) "'%s'"
 clock_set(const char *clk, uint64_t old, uint64_t new) "'%s', %"PRIu64"Hz->%"PRIu64"Hz"
 clock_propagate(const char *clk) "'%s'"
 clock_update(const char *clk, const char *src, uint64_t hz, int cb) "'%s', src='%s', val=%"PRIu64"Hz cb=%d"
+clock_set_mul_div(const char *clk, uint32_t oldmul, uint32_t mul, uint32_t olddiv, uint32_t div) "'%s', mul: %u -> %u, div: %u -> %u"
diff --git a/hw/cris/axis_dev88.c b/hw/cris/axis_dev88.c
index af5a0e35173..d82050d927d 100644
--- a/hw/cris/axis_dev88.c
+++ b/hw/cris/axis_dev88.c
@@ -34,7 +34,6 @@
 #include "hw/loader.h"
 #include "elf.h"
 #include "boot.h"
-#include "exec/address-spaces.h"
 #include "sysemu/qtest.h"
 #include "sysemu/sysemu.h"
 
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index ca46b5830e7..a2306b67d87 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -72,10 +72,6 @@ config BLIZZARD
 config FRAMEBUFFER
     bool
 
-config MILKYMIST_TMU2
-    bool
-    depends on OPENGL && X11
-
 config SM501
     bool
     select I2C
diff --git a/hw/display/artist.c b/hw/display/artist.c
index aa7bd594aac..21b7fd1b440 100644
--- a/hw/display/artist.c
+++ b/hw/display/artist.c
@@ -1170,8 +1170,8 @@ static void artist_vram_write(void *opaque, hwaddr addr, uint64_t val,
     }
 
     buf = vram_write_buffer(s);
-    posy = ADDR_TO_Y(addr);
-    posx = ADDR_TO_X(addr);
+    posy = ADDR_TO_Y(addr >> 2);
+    posx = ADDR_TO_X(addr >> 2);
 
     if (!buf->size) {
         return;
@@ -1232,8 +1232,8 @@ static uint64_t artist_vram_read(void *opaque, hwaddr addr, unsigned size)
         return 0;
     }
 
-    posy = ADDR_TO_Y(addr);
-    posx = ADDR_TO_X(addr);
+    posy = ADDR_TO_Y(addr >> 2);
+    posx = ADDR_TO_X(addr >> 2);
 
     if (posy > buf->height || posx > buf->width) {
         return 0;
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 4c3ad8f47b0..31f22754dce 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -968,7 +968,7 @@ static void ati_vga_realize(PCIDevice *dev, Error **errp)
     I2CBus *i2cbus = i2c_init_bus(DEVICE(s), "ati-vga.ddc");
     bitbang_i2c_init(&s->bbi2c, i2cbus);
     I2CSlave *i2cddc = I2C_SLAVE(qdev_new(TYPE_I2CDDC));
-    i2c_set_slave_address(i2cddc, 0x50);
+    i2c_slave_set_address(i2cddc, 0x50);
     qdev_realize_and_unref(DEVICE(i2cddc), BUS(i2cbus), &error_abort);
 
     /* mmio register space */
diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c
index a1bea9a3aa3..f2b874d5e35 100644
--- a/hw/display/edid-generate.c
+++ b/hw/display/edid-generate.c
@@ -45,6 +45,35 @@ static const struct edid_mode {
     { .xres =  640,   .yres =  480,   .byte  = 35,   .bit = 5 },
 };
 
+typedef struct Timings {
+    uint32_t xfront;
+    uint32_t xsync;
+    uint32_t xblank;
+
+    uint32_t yfront;
+    uint32_t ysync;
+    uint32_t yblank;
+
+    uint64_t clock;
+} Timings;
+
+static void generate_timings(Timings *timings, uint32_t refresh_rate,
+                             uint32_t xres, uint32_t yres)
+{
+    /* pull some realistic looking timings out of thin air */
+    timings->xfront = xres * 25 / 100;
+    timings->xsync  = xres *  3 / 100;
+    timings->xblank = xres * 35 / 100;
+
+    timings->yfront = yres *  5 / 1000;
+    timings->ysync  = yres *  5 / 1000;
+    timings->yblank = yres * 35 / 1000;
+
+    timings->clock  = ((uint64_t)refresh_rate *
+                       (xres + timings->xblank) *
+                       (yres + timings->yblank)) / 10000000;
+}
+
 static void edid_ext_dta(uint8_t *dta)
 {
     dta[0] = 0x02;
@@ -130,20 +159,39 @@ static void edid_fill_modes(uint8_t *edid, uint8_t *xtra3, uint8_t *dta,
     }
 }
 
-static void edid_checksum(uint8_t *edid)
+static void edid_checksum(uint8_t *edid, size_t len)
 {
     uint32_t sum = 0;
     int i;
 
-    for (i = 0; i < 127; i++) {
+    for (i = 0; i < len; i++) {
         sum += edid[i];
     }
     sum &= 0xff;
     if (sum) {
-        edid[127] = 0x100 - sum;
+        edid[len] = 0x100 - sum;
     }
 }
 
+static uint8_t *edid_desc_next(uint8_t *edid, uint8_t *dta, uint8_t *desc)
+{
+    if (desc == NULL) {
+        return NULL;
+    }
+    if (desc + 18 + 18 < edid + 127) {
+        return desc + 18;
+    }
+    if (dta) {
+        if (desc < edid + 127) {
+            return dta + dta[2];
+        }
+        if (desc + 18 + 18 < dta + 127) {
+            return desc + 18;
+        }
+    }
+    return NULL;
+}
+
 static void edid_desc_type(uint8_t *desc, uint8_t type)
 {
     desc[0] = 0;
@@ -181,8 +229,8 @@ static void edid_desc_ranges(uint8_t *desc)
     desc[7] =  30;
     desc[8] = 160;
 
-    /* max dot clock (1200 MHz) */
-    desc[9] = 1200 / 10;
+    /* max dot clock (2550 MHz) */
+    desc[9] = 2550 / 10;
 
     /* no extended timing information */
     desc[10] = 0x01;
@@ -204,42 +252,33 @@ static void edid_desc_dummy(uint8_t *desc)
     edid_desc_type(desc, 0x10);
 }
 
-static void edid_desc_timing(uint8_t *desc,
+static void edid_desc_timing(uint8_t *desc, uint32_t refresh_rate,
                              uint32_t xres, uint32_t yres,
                              uint32_t xmm, uint32_t ymm)
 {
-    /* pull some realistic looking timings out of thin air */
-    uint32_t xfront = xres * 25 / 100;
-    uint32_t xsync  = xres *  3 / 100;
-    uint32_t xblank = xres * 35 / 100;
-
-    uint32_t yfront = yres *  5 / 1000;
-    uint32_t ysync  = yres *  5 / 1000;
-    uint32_t yblank = yres * 35 / 1000;
-
-    uint32_t clock  = 75 * (xres + xblank) * (yres + yblank);
-
-    stl_le_p(desc, clock / 10000);
+    Timings timings;
+    generate_timings(&timings, refresh_rate, xres, yres);
+    stl_le_p(desc, timings.clock);
 
     desc[2] = xres   & 0xff;
-    desc[3] = xblank & 0xff;
+    desc[3] = timings.xblank & 0xff;
     desc[4] = (((xres   & 0xf00) >> 4) |
-               ((xblank & 0xf00) >> 8));
+               ((timings.xblank & 0xf00) >> 8));
 
     desc[5] = yres   & 0xff;
-    desc[6] = yblank & 0xff;
+    desc[6] = timings.yblank & 0xff;
     desc[7] = (((yres   & 0xf00) >> 4) |
-               ((yblank & 0xf00) >> 8));
+               ((timings.yblank & 0xf00) >> 8));
 
-    desc[8] = xfront & 0xff;
-    desc[9] = xsync  & 0xff;
+    desc[8] = timings.xfront & 0xff;
+    desc[9] = timings.xsync  & 0xff;
 
-    desc[10] = (((yfront & 0x00f) << 4) |
-                ((ysync  & 0x00f) << 0));
-    desc[11] = (((xfront & 0x300) >> 2) |
-                ((xsync  & 0x300) >> 4) |
-                ((yfront & 0x030) >> 2) |
-                ((ysync  & 0x030) >> 4));
+    desc[10] = (((timings.yfront & 0x00f) << 4) |
+                ((timings.ysync  & 0x00f) << 0));
+    desc[11] = (((timings.xfront & 0x300) >> 2) |
+                ((timings.xsync  & 0x300) >> 4) |
+                ((timings.yfront & 0x030) >> 2) |
+                ((timings.ysync  & 0x030) >> 4));
 
     desc[12] = xmm & 0xff;
     desc[13] = ymm & 0xff;
@@ -297,14 +336,61 @@ uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res)
     return res * 254 / 10 / dpi;
 }
 
+static void init_displayid(uint8_t *did)
+{
+    did[0] = 0x70; /* display id extension */
+    did[1] = 0x13; /* version 1.3 */
+    did[2] = 4;    /* length */
+    did[3] = 0x03; /* product type (0x03 == standalone display device) */
+    edid_checksum(did + 1, did[2] + 4);
+}
+
+static void qemu_displayid_generate(uint8_t *did, uint32_t refresh_rate,
+                                    uint32_t xres, uint32_t yres,
+                                    uint32_t xmm, uint32_t ymm)
+{
+    Timings timings;
+    generate_timings(&timings, refresh_rate, xres, yres);
+
+    did[0] = 0x70; /* display id extension */
+    did[1] = 0x13; /* version 1.3 */
+    did[2] = 23;   /* length */
+    did[3] = 0x03; /* product type (0x03 == standalone display device) */
+
+    did[5] = 0x03; /* Detailed Timings Data Block */
+    did[6] = 0x00; /* revision */
+    did[7] = 0x14; /* block length */
+
+    did[8]  = timings.clock  & 0xff;
+    did[9]  = (timings.clock & 0xff00) >> 8;
+    did[10] = (timings.clock & 0xff0000) >> 16;
+
+    did[11] = 0x88; /* leave aspect ratio undefined */
+
+    stw_le_p(did + 12, 0xffff & (xres - 1));
+    stw_le_p(did + 14, 0xffff & (timings.xblank - 1));
+    stw_le_p(did + 16, 0xffff & (timings.xfront - 1));
+    stw_le_p(did + 18, 0xffff & (timings.xsync - 1));
+
+    stw_le_p(did + 20, 0xffff & (yres - 1));
+    stw_le_p(did + 22, 0xffff & (timings.yblank - 1));
+    stw_le_p(did + 24, 0xffff & (timings.yfront - 1));
+    stw_le_p(did + 26, 0xffff & (timings.ysync - 1));
+
+    edid_checksum(did + 1, did[2] + 4);
+}
+
 void qemu_edid_generate(uint8_t *edid, size_t size,
                         qemu_edid_info *info)
 {
-    uint32_t desc = 54;
+    uint8_t *desc = edid + 54;
     uint8_t *xtra3 = NULL;
     uint8_t *dta = NULL;
+    uint8_t *did = NULL;
     uint32_t width_mm, height_mm;
+    uint32_t refresh_rate = info->refresh_rate ? info->refresh_rate : 75000;
     uint32_t dpi = 100; /* if no width_mm/height_mm */
+    uint32_t large_screen = 0;
 
     /* =============== set defaults  =============== */
 
@@ -320,6 +406,9 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
     if (!info->prefy) {
         info->prefy = 768;
     }
+    if (info->prefx >= 4096 || info->prefy >= 4096) {
+        large_screen = 1;
+    }
     if (info->width_mm && info->height_mm) {
         width_mm = info->width_mm;
         height_mm = info->height_mm;
@@ -337,6 +426,12 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
         edid_ext_dta(dta);
     }
 
+    if (size >= 384 && large_screen) {
+        did = edid + 256;
+        edid[126]++;
+        init_displayid(did);
+    }
+
     /* =============== header information =============== */
 
     /* fixed */
@@ -401,40 +496,55 @@ void qemu_edid_generate(uint8_t *edid, size_t size,
 
     /* =============== descriptor blocks =============== */
 
-    edid_desc_timing(edid + desc, info->prefx, info->prefy,
-                     width_mm, height_mm);
-    desc += 18;
+    if (!large_screen) {
+        /* The DTD section has only 12 bits to store the resolution */
+        edid_desc_timing(desc, refresh_rate, info->prefx, info->prefy,
+                         width_mm, height_mm);
+        desc = edid_desc_next(edid, dta, desc);
+    }
 
-    edid_desc_ranges(edid + desc);
-    desc += 18;
+    xtra3 = desc;
+    edid_desc_xtra3_std(xtra3);
+    desc = edid_desc_next(edid, dta, desc);
+    edid_fill_modes(edid, xtra3, dta, info->maxx, info->maxy);
+    /*
+     * dta video data block is finished at thus point,
+     * so dta descriptor offsets don't move any more.
+     */
+
+    edid_desc_ranges(desc);
+    desc = edid_desc_next(edid, dta, desc);
 
-    if (info->name) {
-        edid_desc_text(edid + desc, 0xfc, info->name);
-        desc += 18;
+    if (desc && info->name) {
+        edid_desc_text(desc, 0xfc, info->name);
+        desc = edid_desc_next(edid, dta, desc);
     }
 
-    if (info->serial) {
-        edid_desc_text(edid + desc, 0xff, info->serial);
-        desc += 18;
+    if (desc && info->serial) {
+        edid_desc_text(desc, 0xff, info->serial);
+        desc = edid_desc_next(edid, dta, desc);
     }
 
-    if (desc < 126) {
-        xtra3 = edid + desc;
-        edid_desc_xtra3_std(xtra3);
-        desc += 18;
+    while (desc) {
+        edid_desc_dummy(desc);
+        desc = edid_desc_next(edid, dta, desc);
     }
 
-    while (desc < 126) {
-        edid_desc_dummy(edid + desc);
-        desc += 18;
+    /* =============== display id extensions =============== */
+
+    if (did && large_screen) {
+        qemu_displayid_generate(did, refresh_rate, info->prefx, info->prefy,
+                                width_mm, height_mm);
     }
 
     /* =============== finish up =============== */
 
-    edid_fill_modes(edid, xtra3, dta, info->maxx, info->maxy);
-    edid_checksum(edid);
+    edid_checksum(edid, 127);
     if (dta) {
-        edid_checksum(dta);
+        edid_checksum(dta, 127);
+    }
+    if (did) {
+        edid_checksum(did, 127);
     }
 }
 
diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c
index 8f1725432cd..caca86d7738 100644
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -22,6 +22,7 @@
 #include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "ui/console.h"
@@ -33,7 +34,6 @@
 
 typedef struct G364State {
     /* hardware */
-    uint8_t *vram;
     uint32_t vram_size;
     qemu_irq irq;
     MemoryRegion mem_vram;
@@ -125,7 +125,7 @@ static void g364fb_draw_graphic8(G364State *s)
         xcursor = ycursor = -65;
     }
 
-    vram = s->vram + s->top_of_screen;
+    vram = memory_region_get_ram_ptr(&s->mem_vram) + s->top_of_screen;
     /* XXX: out of range in vram? */
     data_display = dd = surface_data(surface);
     snap = memory_region_snapshot_and_clear_dirty(&s->mem_vram, 0, s->vram_size,
@@ -274,6 +274,8 @@ static inline void g364fb_invalidate_display(void *opaque)
 
 static void g364fb_reset(G364State *s)
 {
+    uint8_t *vram = memory_region_get_ram_ptr(&s->mem_vram);
+
     qemu_irq_lower(s->irq);
 
     memset(s->color_palette, 0, sizeof(s->color_palette));
@@ -283,7 +285,7 @@ static void g364fb_reset(G364State *s)
     s->ctla = 0;
     s->top_of_screen = 0;
     s->width = s->height = 0;
-    memset(s->vram, 0, s->vram_size);
+    memset(vram, 0, s->vram_size);
     g364fb_invalidate_display(s);
 }
 
@@ -450,11 +452,10 @@ static int g364fb_post_load(void *opaque, int version_id)
 
 static const VMStateDescription vmstate_g364fb = {
     .name = "g364fb",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .post_load = g364fb_post_load,
     .fields = (VMStateField[]) {
-        VMSTATE_VBUFFER_UINT32(vram, G364State, 1, NULL, vram_size),
         VMSTATE_BUFFER_UNSAFE(color_palette, G364State, 0, 256 * 3),
         VMSTATE_BUFFER_UNSAFE(cursor_palette, G364State, 0, 9),
         VMSTATE_UINT16_ARRAY(cursor, G364State, 512),
@@ -474,15 +475,12 @@ static const GraphicHwOps g364fb_ops = {
 
 static void g364fb_init(DeviceState *dev, G364State *s)
 {
-    s->vram = g_malloc0(s->vram_size);
-
     s->con = graphic_console_init(dev, 0, &g364fb_ops, s);
 
     memory_region_init_io(&s->mem_ctrl, OBJECT(dev), &g364fb_ctrl_ops, s,
                           "ctrl", 0x180000);
-    memory_region_init_ram_ptr(&s->mem_vram, NULL, "vram",
-                               s->vram_size, s->vram);
-    vmstate_register_ram(&s->mem_vram, dev);
+    memory_region_init_ram(&s->mem_vram, NULL, "g364fb.vram", s->vram_size,
+                           &error_fatal);
     memory_region_set_log(&s->mem_vram, true, DIRTY_MEMORY_VGA);
 }
 
@@ -519,6 +517,16 @@ static Property g364fb_sysbus_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static const VMStateDescription vmstate_g364fb_sysbus = {
+    .name = "g364fb-sysbus",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(g364, G364SysBusState, 2, vmstate_g364fb, G364State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void g364fb_sysbus_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -527,7 +535,7 @@ static void g364fb_sysbus_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
     dc->desc = "G364 framebuffer";
     dc->reset = g364fb_sysbus_reset;
-    dc->vmsd = &vmstate_g364fb;
+    dc->vmsd = &vmstate_g364fb_sysbus;
     device_class_set_props(dc, g364fb_sysbus_properties);
 }
 
diff --git a/hw/display/macfb.c b/hw/display/macfb.c
index ff8bdb846b9..277d3e66333 100644
--- a/hw/display/macfb.c
+++ b/hw/display/macfb.c
@@ -20,16 +20,102 @@
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "trace.h"
 
-#define VIDEO_BASE 0x00001000
+#define VIDEO_BASE 0x0
 #define DAFB_BASE  0x00800000
 
 #define MACFB_PAGE_SIZE 4096
 #define MACFB_VRAM_SIZE (4 * MiB)
 
-#define DAFB_RESET      0x200
-#define DAFB_LUT        0x213
+#define DAFB_MODE_VADDR1    0x0
+#define DAFB_MODE_VADDR2    0x4
+#define DAFB_MODE_CTRL1     0x8
+#define DAFB_MODE_CTRL2     0xc
+#define DAFB_MODE_SENSE     0x1c
+#define DAFB_INTR_MASK      0x104
+#define DAFB_INTR_STAT      0x108
+#define DAFB_INTR_CLEAR     0x10c
+#define DAFB_RESET          0x200
+#define DAFB_LUT            0x213
 
+#define DAFB_INTR_VBL   0x4
+
+/* Vertical Blank period (60.15Hz) */
+#define DAFB_INTR_VBL_PERIOD_NS 16625800
+
+/*
+ * Quadra sense codes taken from Apple Technical Note HW26:
+ * "Macintosh Quadra Built-In Video". The sense codes and
+ * extended sense codes have different meanings:
+ *
+ * Sense:
+ *    bit 2: SENSE2 (pin 10)
+ *    bit 1: SENSE1 (pin 7)
+ *    bit 0: SENSE0 (pin 4)
+ *
+ * 0 = pin tied to ground
+ * 1 = pin unconnected
+ *
+ * Extended Sense:
+ *    bit 2: pins 4-10
+ *    bit 1: pins 10-7
+ *    bit 0: pins 7-4
+ *
+ * 0 = pins tied together
+ * 1 = pins unconnected
+ *
+ * Reads from the sense register appear to be active low, i.e. a 1 indicates
+ * that the pin is tied to ground, a 0 indicates the pin is disconnected.
+ *
+ * Writes to the sense register appear to activate pulldowns i.e. a 1 enables
+ * a pulldown on a particular pin.
+ *
+ * The MacOS toolbox appears to use a series of reads and writes to first
+ * determine if extended sense is to be used, and then check which pins are
+ * tied together in order to determine the display type.
+ */
+
+typedef struct MacFbSense {
+    uint8_t type;
+    uint8_t sense;
+    uint8_t ext_sense;
+} MacFbSense;
+
+static MacFbSense macfb_sense_table[] = {
+    { MACFB_DISPLAY_APPLE_21_COLOR, 0x0, 0 },
+    { MACFB_DISPLAY_APPLE_PORTRAIT, 0x1, 0 },
+    { MACFB_DISPLAY_APPLE_12_RGB, 0x2, 0 },
+    { MACFB_DISPLAY_APPLE_2PAGE_MONO, 0x3, 0 },
+    { MACFB_DISPLAY_NTSC_UNDERSCAN, 0x4, 0 },
+    { MACFB_DISPLAY_NTSC_OVERSCAN, 0x4, 0 },
+    { MACFB_DISPLAY_APPLE_12_MONO, 0x6, 0 },
+    { MACFB_DISPLAY_APPLE_13_RGB, 0x6, 0 },
+    { MACFB_DISPLAY_16_COLOR, 0x7, 0x3 },
+    { MACFB_DISPLAY_PAL1_UNDERSCAN, 0x7, 0x0 },
+    { MACFB_DISPLAY_PAL1_OVERSCAN, 0x7, 0x0 },
+    { MACFB_DISPLAY_PAL2_UNDERSCAN, 0x7, 0x6 },
+    { MACFB_DISPLAY_PAL2_OVERSCAN, 0x7, 0x6 },
+    { MACFB_DISPLAY_VGA, 0x7, 0x5 },
+    { MACFB_DISPLAY_SVGA, 0x7, 0x5 },
+};
+
+static MacFbMode macfb_mode_table[] = {
+    { MACFB_DISPLAY_VGA, 1, 0x100, 0x71e, 640, 480, 0x400, 0x1000 },
+    { MACFB_DISPLAY_VGA, 2, 0x100, 0x70e, 640, 480, 0x400, 0x1000 },
+    { MACFB_DISPLAY_VGA, 4, 0x100, 0x706, 640, 480, 0x400, 0x1000 },
+    { MACFB_DISPLAY_VGA, 8, 0x100, 0x702, 640, 480, 0x400, 0x1000 },
+    { MACFB_DISPLAY_VGA, 24, 0x100, 0x7ff, 640, 480, 0x1000, 0x1000 },
+    { MACFB_DISPLAY_VGA, 1, 0xd0 , 0x70e, 800, 600, 0x340, 0xe00 },
+    { MACFB_DISPLAY_VGA, 2, 0xd0 , 0x706, 800, 600, 0x340, 0xe00 },
+    { MACFB_DISPLAY_VGA, 4, 0xd0 , 0x702, 800, 600, 0x340, 0xe00 },
+    { MACFB_DISPLAY_VGA, 8, 0xd0,  0x700, 800, 600, 0x340, 0xe00 },
+    { MACFB_DISPLAY_VGA, 24, 0x340, 0x100, 800, 600, 0xd00, 0xe00 },
+    { MACFB_DISPLAY_APPLE_21_COLOR, 1, 0x90, 0x506, 1152, 870, 0x240, 0x80 },
+    { MACFB_DISPLAY_APPLE_21_COLOR, 2, 0x90, 0x502, 1152, 870, 0x240, 0x80 },
+    { MACFB_DISPLAY_APPLE_21_COLOR, 4, 0x90, 0x500, 1152, 870, 0x240, 0x80 },
+    { MACFB_DISPLAY_APPLE_21_COLOR, 8, 0x120, 0x5ff, 1152, 870, 0x480, 0x80 },
+};
 
 typedef void macfb_draw_line_func(MacfbState *s, uint8_t *d, uint32_t addr,
                                   int width);
@@ -49,7 +135,9 @@ static void macfb_draw_line1(MacfbState *s, uint8_t *d, uint32_t addr,
     for (x = 0; x < width; x++) {
         int bit = x & 7;
         int idx = (macfb_read_byte(s, addr) >> (7 - bit)) & 1;
-        r = g = b  = ((1 - idx) << 7);
+        r = s->color_palette[idx * 3];
+        g = s->color_palette[idx * 3 + 1];
+        b = s->color_palette[idx * 3 + 2];
         addr += (bit == 7);
 
         *(uint32_t *)d = rgb_to_pixel32(r, g, b);
@@ -143,10 +231,10 @@ static void macfb_draw_line24(MacfbState *s, uint8_t *d, uint32_t addr,
     int x;
 
     for (x = 0; x < width; x++) {
-        r = macfb_read_byte(s, addr);
-        g = macfb_read_byte(s, addr + 1);
-        b = macfb_read_byte(s, addr + 2);
-        addr += 3;
+        r = macfb_read_byte(s, addr + 1);
+        g = macfb_read_byte(s, addr + 2);
+        b = macfb_read_byte(s, addr + 3);
+        addr += 4;
 
         *(uint32_t *)d = rgb_to_pixel32(r, g, b);
         d += 4;
@@ -187,7 +275,7 @@ static void macfb_draw_graphic(MacfbState *s)
     ram_addr_t page;
     uint32_t v = 0;
     int y, ymin;
-    int macfb_stride = (s->depth * s->width + 7) / 8;
+    int macfb_stride = s->mode->stride;
     macfb_draw_line_func *macfb_draw_line;
 
     switch (s->depth) {
@@ -219,7 +307,7 @@ static void macfb_draw_graphic(MacfbState *s)
                                              DIRTY_MEMORY_VGA);
 
     ymin = -1;
-    page = 0;
+    page = s->mode->offset;
     for (y = 0; y < s->height; y++, page += macfb_stride) {
         if (macfb_check_dirty(s, snap, page, macfb_stride)) {
             uint8_t *data_display;
@@ -252,6 +340,121 @@ static void macfb_invalidate_display(void *opaque)
     memory_region_set_dirty(&s->mem_vram, 0, MACFB_VRAM_SIZE);
 }
 
+static uint32_t macfb_sense_read(MacfbState *s)
+{
+    MacFbSense *macfb_sense;
+    uint8_t sense;
+
+    assert(s->type < ARRAY_SIZE(macfb_sense_table));
+    macfb_sense = &macfb_sense_table[s->type];
+    if (macfb_sense->sense == 0x7) {
+        /* Extended sense */
+        sense = 0;
+        if (!(macfb_sense->ext_sense & 1)) {
+            /* Pins 7-4 together */
+            if (~s->regs[DAFB_MODE_SENSE >> 2] & 3) {
+                sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 3;
+            }
+        }
+        if (!(macfb_sense->ext_sense & 2)) {
+            /* Pins 10-7 together */
+            if (~s->regs[DAFB_MODE_SENSE >> 2] & 6) {
+                sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 6;
+            }
+        }
+        if (!(macfb_sense->ext_sense & 4)) {
+            /* Pins 4-10 together */
+            if (~s->regs[DAFB_MODE_SENSE >> 2] & 5) {
+                sense = (~s->regs[DAFB_MODE_SENSE >> 2] & 7) | 5;
+            }
+        }
+    } else {
+        /* Normal sense */
+        sense = (~macfb_sense->sense & 7) |
+                (~s->regs[DAFB_MODE_SENSE >> 2] & 7);
+    }
+
+    trace_macfb_sense_read(sense);
+    return sense;
+}
+
+static void macfb_sense_write(MacfbState *s, uint32_t val)
+{
+    s->regs[DAFB_MODE_SENSE >> 2] = val;
+
+    trace_macfb_sense_write(val);
+    return;
+}
+
+static void macfb_update_mode(MacfbState *s)
+{
+    s->width = s->mode->width;
+    s->height = s->mode->height;
+    s->depth = s->mode->depth;
+
+    trace_macfb_update_mode(s->width, s->height, s->depth);
+    macfb_invalidate_display(s);
+}
+
+static void macfb_mode_write(MacfbState *s)
+{
+    MacFbMode *macfb_mode;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) {
+        macfb_mode = &macfb_mode_table[i];
+
+        if (s->type != macfb_mode->type) {
+            continue;
+        }
+
+        if ((s->regs[DAFB_MODE_CTRL1 >> 2] & 0xff) ==
+             (macfb_mode->mode_ctrl1 & 0xff) &&
+            (s->regs[DAFB_MODE_CTRL2 >> 2] & 0xff) ==
+             (macfb_mode->mode_ctrl2 & 0xff)) {
+            s->mode = macfb_mode;
+            macfb_update_mode(s);
+            break;
+        }
+    }
+}
+
+static MacFbMode *macfb_find_mode(MacfbDisplayType display_type,
+                                  uint16_t width, uint16_t height,
+                                  uint8_t depth)
+{
+    MacFbMode *macfb_mode;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) {
+        macfb_mode = &macfb_mode_table[i];
+
+        if (display_type == macfb_mode->type && width == macfb_mode->width &&
+                height == macfb_mode->height && depth == macfb_mode->depth) {
+            return macfb_mode;
+        }
+    }
+
+    return NULL;
+}
+
+static gchar *macfb_mode_list(void)
+{
+    GString *list = g_string_new("");
+    MacFbMode *macfb_mode;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(macfb_mode_table); i++) {
+        macfb_mode = &macfb_mode_table[i];
+
+        g_string_append_printf(list, "    %dx%dx%d\n", macfb_mode->width,
+                               macfb_mode->height, macfb_mode->depth);
+    }
+
+    return g_string_free(list, FALSE);
+}
+
+
 static void macfb_update_display(void *opaque)
 {
     MacfbState *s = opaque;
@@ -271,6 +474,36 @@ static void macfb_update_display(void *opaque)
     macfb_draw_graphic(s);
 }
 
+static void macfb_update_irq(MacfbState *s)
+{
+    uint32_t irq_state = s->irq_state & s->irq_mask;
+
+    if (irq_state) {
+        qemu_irq_raise(s->irq);
+    } else {
+        qemu_irq_lower(s->irq);
+    }
+}
+
+static int64_t macfb_next_vbl(void)
+{
+    return (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + DAFB_INTR_VBL_PERIOD_NS) /
+            DAFB_INTR_VBL_PERIOD_NS * DAFB_INTR_VBL_PERIOD_NS;
+}
+
+static void macfb_vbl_timer(void *opaque)
+{
+    MacfbState *s = opaque;
+    int64_t next_vbl;
+
+    s->irq_state |= DAFB_INTR_VBL;
+    macfb_update_irq(s);
+
+    /* 60 Hz irq */
+    next_vbl = macfb_next_vbl();
+    timer_mod(s->vbl_timer, next_vbl);
+}
+
 static void macfb_reset(MacfbState *s)
 {
     int i;
@@ -289,7 +522,26 @@ static uint64_t macfb_ctrl_read(void *opaque,
                                 hwaddr addr,
                                 unsigned int size)
 {
-    return 0;
+    MacfbState *s = opaque;
+    uint64_t val = 0;
+
+    switch (addr) {
+    case DAFB_MODE_VADDR1:
+    case DAFB_MODE_VADDR2:
+    case DAFB_MODE_CTRL1:
+    case DAFB_MODE_CTRL2:
+        val = s->regs[addr >> 2];
+        break;
+    case DAFB_INTR_STAT:
+        val = s->irq_state;
+        break;
+    case DAFB_MODE_SENSE:
+        val = macfb_sense_read(s);
+        break;
+    }
+
+    trace_macfb_ctrl_read(addr, val, size);
+    return val;
 }
 
 static void macfb_ctrl_write(void *opaque,
@@ -298,17 +550,52 @@ static void macfb_ctrl_write(void *opaque,
                              unsigned int size)
 {
     MacfbState *s = opaque;
+    int64_t next_vbl;
+
     switch (addr) {
+    case DAFB_MODE_VADDR1:
+    case DAFB_MODE_VADDR2:
+        s->regs[addr >> 2] = val;
+        break;
+    case DAFB_MODE_CTRL1 ... DAFB_MODE_CTRL1 + 3:
+    case DAFB_MODE_CTRL2 ... DAFB_MODE_CTRL2 + 3:
+        s->regs[addr >> 2] = val;
+        if (val) {
+            macfb_mode_write(s);
+        }
+        break;
+    case DAFB_MODE_SENSE:
+        macfb_sense_write(s, val);
+        break;
+    case DAFB_INTR_MASK:
+        s->irq_mask = val;
+        if (val & DAFB_INTR_VBL) {
+            next_vbl = macfb_next_vbl();
+            timer_mod(s->vbl_timer, next_vbl);
+        } else {
+            timer_del(s->vbl_timer);
+        }
+        break;
+    case DAFB_INTR_CLEAR:
+        s->irq_state &= ~DAFB_INTR_VBL;
+        macfb_update_irq(s);
+        break;
     case DAFB_RESET:
         s->palette_current = 0;
+        s->irq_state &= ~DAFB_INTR_VBL;
+        macfb_update_irq(s);
         break;
     case DAFB_LUT:
-        s->color_palette[s->palette_current++] = val;
+        s->color_palette[s->palette_current] = val;
+        s->palette_current = (s->palette_current + 1) %
+                             ARRAY_SIZE(s->color_palette);
         if (s->palette_current % 3) {
             macfb_invalidate_display(s);
         }
         break;
     }
+
+    trace_macfb_ctrl_write(addr, val, size);
 }
 
 static const MemoryRegionOps macfb_ctrl_ops = {
@@ -321,7 +608,7 @@ static const MemoryRegionOps macfb_ctrl_ops = {
 
 static int macfb_post_load(void *opaque, int version_id)
 {
-    macfb_invalidate_display(opaque);
+    macfb_mode_write(opaque);
     return 0;
 }
 
@@ -334,6 +621,7 @@ static const VMStateDescription vmstate_macfb = {
     .fields = (VMStateField[]) {
         VMSTATE_UINT8_ARRAY(color_palette, MacfbState, 256 * 3),
         VMSTATE_UINT32(palette_current, MacfbState),
+        VMSTATE_UINT32_ARRAY(regs, MacfbState, MACFB_NUM_REGS),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -343,14 +631,20 @@ static const GraphicHwOps macfb_ops = {
     .gfx_update = macfb_update_display,
 };
 
-static void macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp)
+static bool macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp)
 {
     DisplaySurface *surface;
 
-    if (s->depth != 1 && s->depth != 2 && s->depth != 4 && s->depth != 8 &&
-        s->depth != 16 && s->depth != 24) {
-        error_setg(errp, "unknown guest depth %d", s->depth);
-        return;
+    s->mode = macfb_find_mode(s->type, s->width, s->height, s->depth);
+    if (!s->mode) {
+        gchar *list;
+        error_setg(errp, "unknown display mode: width %d, height %d, depth %d",
+                   s->width, s->height, s->depth);
+        list = macfb_mode_list();
+        error_append_hint(errp, "Available modes:\n%s", list);
+        g_free(list);
+
+        return false;
     }
 
     s->con = graphic_console_init(dev, 0, &macfb_ops, s);
@@ -359,18 +653,21 @@ static void macfb_common_realize(DeviceState *dev, MacfbState *s, Error **errp)
     if (surface_bits_per_pixel(surface) != 32) {
         error_setg(errp, "unknown host depth %d",
                    surface_bits_per_pixel(surface));
-        return;
+        return false;
     }
 
     memory_region_init_io(&s->mem_ctrl, OBJECT(dev), &macfb_ctrl_ops, s,
                           "macfb-ctrl", 0x1000);
 
-    memory_region_init_ram_nomigrate(&s->mem_vram, OBJECT(s), "macfb-vram",
-                                     MACFB_VRAM_SIZE, errp);
+    memory_region_init_ram(&s->mem_vram, OBJECT(dev), "macfb-vram",
+                           MACFB_VRAM_SIZE, &error_abort);
     s->vram = memory_region_get_ram_ptr(&s->mem_vram);
     s->vram_bit_mask = MACFB_VRAM_SIZE - 1;
-    vmstate_register_ram(&s->mem_vram, dev);
     memory_region_set_coalescing(&s->mem_vram);
+
+    s->vbl_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, macfb_vbl_timer, s);
+    macfb_update_mode(s);
+    return true;
 }
 
 static void macfb_sysbus_realize(DeviceState *dev, Error **errp)
@@ -378,14 +675,23 @@ static void macfb_sysbus_realize(DeviceState *dev, Error **errp)
     MacfbSysBusState *s = MACFB(dev);
     MacfbState *ms = &s->macfb;
 
-    macfb_common_realize(dev, ms, errp);
+    if (!macfb_common_realize(dev, ms, errp)) {
+        return;
+    }
+
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &ms->mem_ctrl);
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &ms->mem_vram);
+
+    qdev_init_gpio_out(dev, &ms->irq, 1);
 }
 
-const uint8_t macfb_rom[] = {
-    255, 0, 0, 0,
-};
+static void macfb_nubus_set_irq(void *opaque, int n, int level)
+{
+    MacfbNubusState *s = NUBUS_MACFB(opaque);
+    NubusDevice *nd = NUBUS_DEVICE(s);
+
+    nubus_set_irq(nd, level);
+}
 
 static void macfb_nubus_realize(DeviceState *dev, Error **errp)
 {
@@ -395,12 +701,29 @@ static void macfb_nubus_realize(DeviceState *dev, Error **errp)
     MacfbState *ms = &s->macfb;
 
     ndc->parent_realize(dev, errp);
+    if (*errp) {
+        return;
+    }
+
+    if (!macfb_common_realize(dev, ms, errp)) {
+        return;
+    }
 
-    macfb_common_realize(dev, ms, errp);
     memory_region_add_subregion(&nd->slot_mem, DAFB_BASE, &ms->mem_ctrl);
     memory_region_add_subregion(&nd->slot_mem, VIDEO_BASE, &ms->mem_vram);
 
-    nubus_register_rom(nd, macfb_rom, sizeof(macfb_rom), 1, 9, 0xf);
+    ms->irq = qemu_allocate_irq(macfb_nubus_set_irq, s, 0);
+}
+
+static void macfb_nubus_unrealize(DeviceState *dev)
+{
+    MacfbNubusState *s = NUBUS_MACFB(dev);
+    MacfbNubusDeviceClass *ndc = NUBUS_MACFB_GET_CLASS(dev);
+    MacfbState *ms = &s->macfb;
+
+    ndc->parent_unrealize(dev);
+
+    qemu_free_irq(ms->irq);
 }
 
 static void macfb_sysbus_reset(DeviceState *d)
@@ -419,6 +742,8 @@ static Property macfb_sysbus_properties[] = {
     DEFINE_PROP_UINT32("width", MacfbSysBusState, macfb.width, 640),
     DEFINE_PROP_UINT32("height", MacfbSysBusState, macfb.height, 480),
     DEFINE_PROP_UINT8("depth", MacfbSysBusState, macfb.depth, 8),
+    DEFINE_PROP_UINT8("display", MacfbSysBusState, macfb.type,
+                      MACFB_DISPLAY_VGA),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -426,6 +751,8 @@ static Property macfb_nubus_properties[] = {
     DEFINE_PROP_UINT32("width", MacfbNubusState, macfb.width, 640),
     DEFINE_PROP_UINT32("height", MacfbNubusState, macfb.height, 480),
     DEFINE_PROP_UINT8("depth", MacfbNubusState, macfb.depth, 8),
+    DEFINE_PROP_UINT8("display", MacfbNubusState, macfb.type,
+                      MACFB_DISPLAY_VGA),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -447,9 +774,12 @@ static void macfb_nubus_class_init(ObjectClass *klass, void *data)
 
     device_class_set_parent_realize(dc, macfb_nubus_realize,
                                     &ndc->parent_realize);
+    device_class_set_parent_unrealize(dc, macfb_nubus_unrealize,
+                                      &ndc->parent_unrealize);
     dc->desc = "Nubus Macintosh framebuffer";
     dc->reset = macfb_nubus_reset;
     dc->vmsd = &vmstate_macfb;
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
     device_class_set_props(dc, macfb_nubus_properties);
 }
 
diff --git a/hw/display/meson.build b/hw/display/meson.build
index 9d79e3951d9..861c43ff984 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -48,7 +48,6 @@ endif
 softmmu_ss.add(when: 'CONFIG_DPCD', if_true: files('dpcd.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx_dp.c'))
 
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-vgafb.c'))
 softmmu_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c'))
 
 softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d.c', 'ati_dbg.c'))
@@ -56,11 +55,16 @@ softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d
 if config_all_devices.has_key('CONFIG_VIRTIO_GPU')
   virtio_gpu_ss = ss.source_set()
   virtio_gpu_ss.add(when: 'CONFIG_VIRTIO_GPU',
-                    if_true: [files('virtio-gpu-base.c', 'virtio-gpu.c'), pixman, virgl])
-  virtio_gpu_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRGL'],
-                    if_true: [files('virtio-gpu-3d.c'), pixman, virgl])
+                    if_true: [files('virtio-gpu-base.c', 'virtio-gpu.c'), pixman])
+  virtio_gpu_ss.add(when: 'CONFIG_LINUX', if_true: files('virtio-gpu-udmabuf.c'),
+                                          if_false: files('virtio-gpu-udmabuf-stubs.c'))
   virtio_gpu_ss.add(when: 'CONFIG_VHOST_USER_GPU', if_true: files('vhost-user-gpu.c'))
   hw_display_modules += {'virtio-gpu': virtio_gpu_ss}
+
+  virtio_gpu_gl_ss = ss.source_set()
+  virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', virgl, opengl],
+                       if_true: [files('virtio-gpu-gl.c', 'virtio-gpu-virgl.c'), pixman, virgl])
+  hw_display_modules += {'virtio-gpu-gl': virtio_gpu_gl_ss}
 endif
 
 if config_all_devices.has_key('CONFIG_VIRTIO_PCI')
@@ -70,6 +74,11 @@ if config_all_devices.has_key('CONFIG_VIRTIO_PCI')
   virtio_gpu_pci_ss.add(when: ['CONFIG_VHOST_USER_GPU', 'CONFIG_VIRTIO_PCI'],
                         if_true: files('vhost-user-gpu-pci.c'))
   hw_display_modules += {'virtio-gpu-pci': virtio_gpu_pci_ss}
+
+  virtio_gpu_pci_gl_ss = ss.source_set()
+  virtio_gpu_pci_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRTIO_PCI', virgl, opengl],
+                           if_true: [files('virtio-gpu-pci-gl.c'), pixman])
+  hw_display_modules += {'virtio-gpu-pci-gl': virtio_gpu_pci_gl_ss}
 endif
 
 if config_all_devices.has_key('CONFIG_VIRTIO_VGA')
@@ -79,9 +88,13 @@ if config_all_devices.has_key('CONFIG_VIRTIO_VGA')
   virtio_vga_ss.add(when: 'CONFIG_VHOST_USER_VGA',
                     if_true: files('vhost-user-vga.c'))
   hw_display_modules += {'virtio-vga': virtio_vga_ss}
+
+  virtio_vga_gl_ss = ss.source_set()
+  virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA', virgl, opengl],
+                       if_true: [files('virtio-vga-gl.c'), pixman])
+  hw_display_modules += {'virtio-vga-gl': virtio_vga_gl_ss}
 endif
 
-specific_ss.add(when: [x11, opengl, 'CONFIG_MILKYMIST_TMU2'], if_true: files('milkymist-tmu2.c'))
 specific_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_lcdc.c'))
 
 modules += { 'hw-display': hw_display_modules }
diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
deleted file mode 100644
index 02a28c807b5..00000000000
--- a/hw/display/milkymist-tmu2.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- *  QEMU model of the Milkymist texture mapping unit.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *  Copyright (c) 2010 Sebastien Bourdeauducq
- *                       <sebastien.bourdeauducq@lekernel.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/tmu2.pdf
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qapi/error.h"
-#include "hw/display/milkymist_tmu2.h"
-
-#include <X11/Xlib.h>
-#include <epoxy/gl.h>
-#include <epoxy/glx.h>
-#include "qom/object.h"
-
-enum {
-    R_CTL = 0,
-    R_HMESHLAST,
-    R_VMESHLAST,
-    R_BRIGHTNESS,
-    R_CHROMAKEY,
-    R_VERTICESADDR,
-    R_TEXFBUF,
-    R_TEXHRES,
-    R_TEXVRES,
-    R_TEXHMASK,
-    R_TEXVMASK,
-    R_DSTFBUF,
-    R_DSTHRES,
-    R_DSTVRES,
-    R_DSTHOFFSET,
-    R_DSTVOFFSET,
-    R_DSTSQUAREW,
-    R_DSTSQUAREH,
-    R_ALPHA,
-    R_MAX
-};
-
-enum {
-    CTL_START_BUSY  = (1<<0),
-    CTL_CHROMAKEY   = (1<<1),
-};
-
-enum {
-    MAX_BRIGHTNESS = 63,
-    MAX_ALPHA      = 63,
-};
-
-enum {
-    MESH_MAXSIZE = 128,
-};
-
-struct vertex {
-    int x;
-    int y;
-} QEMU_PACKED;
-
-#define TYPE_MILKYMIST_TMU2 "milkymist-tmu2"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistTMU2State, MILKYMIST_TMU2)
-
-struct MilkymistTMU2State {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-    Chardev *chr;
-    qemu_irq irq;
-
-    uint32_t regs[R_MAX];
-
-    Display *dpy;
-    GLXFBConfig glx_fb_config;
-    GLXContext glx_context;
-};
-
-static const int glx_fbconfig_attr[] = {
-    GLX_GREEN_SIZE, 5,
-    GLX_GREEN_SIZE, 6,
-    GLX_BLUE_SIZE, 5,
-    None
-};
-
-static int tmu2_glx_init(MilkymistTMU2State *s)
-{
-    GLXFBConfig *configs;
-    int nelements;
-
-    s->dpy = XOpenDisplay(NULL); /* FIXME: call XCloseDisplay() */
-    if (s->dpy == NULL) {
-        return 1;
-    }
-
-    configs = glXChooseFBConfig(s->dpy, 0, glx_fbconfig_attr, &nelements);
-    if (configs == NULL) {
-        return 1;
-    }
-
-    s->glx_fb_config = *configs;
-    XFree(configs);
-
-    /* FIXME: call glXDestroyContext() */
-    s->glx_context = glXCreateNewContext(s->dpy, s->glx_fb_config,
-            GLX_RGBA_TYPE, NULL, 1);
-    if (s->glx_context == NULL) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static void tmu2_gl_map(struct vertex *mesh, int texhres, int texvres,
-        int hmeshlast, int vmeshlast, int ho, int vo, int sw, int sh)
-{
-    int x, y;
-    int x0, y0, x1, y1;
-    int u0, v0, u1, v1, u2, v2, u3, v3;
-    double xscale = 1.0 / ((double)(64 * texhres));
-    double yscale = 1.0 / ((double)(64 * texvres));
-
-    glLoadIdentity();
-    glTranslatef(ho, vo, 0);
-    glEnable(GL_TEXTURE_2D);
-    glBegin(GL_QUADS);
-
-    for (y = 0; y < vmeshlast; y++) {
-        y0 = y * sh;
-        y1 = y0 + sh;
-        for (x = 0; x < hmeshlast; x++) {
-            x0 = x * sw;
-            x1 = x0 + sw;
-
-            u0 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x].x);
-            v0 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x].y);
-            u1 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x + 1].x);
-            v1 = be32_to_cpu(mesh[MESH_MAXSIZE * y + x + 1].y);
-            u2 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x + 1].x);
-            v2 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x + 1].y);
-            u3 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x].x);
-            v3 = be32_to_cpu(mesh[MESH_MAXSIZE * (y + 1) + x].y);
-
-            glTexCoord2d(((double)u0) * xscale, ((double)v0) * yscale);
-            glVertex3i(x0, y0, 0);
-            glTexCoord2d(((double)u1) * xscale, ((double)v1) * yscale);
-            glVertex3i(x1, y0, 0);
-            glTexCoord2d(((double)u2) * xscale, ((double)v2) * yscale);
-            glVertex3i(x1, y1, 0);
-            glTexCoord2d(((double)u3) * xscale, ((double)v3) * yscale);
-            glVertex3i(x0, y1, 0);
-        }
-    }
-
-    glEnd();
-}
-
-static void tmu2_start(MilkymistTMU2State *s)
-{
-    int pbuffer_attrib[6] = {
-        GLX_PBUFFER_WIDTH,
-        0,
-        GLX_PBUFFER_HEIGHT,
-        0,
-        GLX_PRESERVED_CONTENTS,
-        True
-    };
-
-    GLXPbuffer pbuffer;
-    GLuint texture;
-    void *fb;
-    hwaddr fb_len;
-    void *mesh;
-    hwaddr mesh_len;
-    float m;
-
-    trace_milkymist_tmu2_start();
-
-    /* Create and set up a suitable OpenGL context */
-    pbuffer_attrib[1] = s->regs[R_DSTHRES];
-    pbuffer_attrib[3] = s->regs[R_DSTVRES];
-    pbuffer = glXCreatePbuffer(s->dpy, s->glx_fb_config, pbuffer_attrib);
-    glXMakeContextCurrent(s->dpy, pbuffer, pbuffer, s->glx_context);
-
-    /* Fixup endianness. TODO: would it work on BE hosts? */
-    glPixelStorei(GL_UNPACK_SWAP_BYTES, 1);
-    glPixelStorei(GL_PACK_SWAP_BYTES, 1);
-
-    /* Row alignment */
-    glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
-    glPixelStorei(GL_PACK_ALIGNMENT, 2);
-
-    /* Read the QEMU source framebuffer into an OpenGL texture */
-    glGenTextures(1, &texture);
-    glBindTexture(GL_TEXTURE_2D, texture);
-    fb_len = 2ULL * s->regs[R_TEXHRES] * s->regs[R_TEXVRES];
-    fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], &fb_len, false);
-    if (fb == NULL) {
-        glDeleteTextures(1, &texture);
-        glXMakeContextCurrent(s->dpy, None, None, NULL);
-        glXDestroyPbuffer(s->dpy, pbuffer);
-        return;
-    }
-    glTexImage2D(GL_TEXTURE_2D, 0, 3, s->regs[R_TEXHRES], s->regs[R_TEXVRES],
-            0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, fb);
-    cpu_physical_memory_unmap(fb, fb_len, 0, fb_len);
-
-    /* Set up texturing options */
-    /* WARNING:
-     * Many cases of TMU2 masking are not supported by OpenGL.
-     * We only implement the most common ones:
-     *  - full bilinear filtering vs. nearest texel
-     *  - texture clamping vs. texture wrapping
-     */
-    if ((s->regs[R_TEXHMASK] & 0x3f) > 0x20) {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    } else {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-    }
-    if ((s->regs[R_TEXHMASK] >> 6) & s->regs[R_TEXHRES]) {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
-    } else {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
-    }
-    if ((s->regs[R_TEXVMASK] >> 6) & s->regs[R_TEXVRES]) {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
-    } else {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
-    }
-
-    /* Translucency and decay */
-    glEnable(GL_BLEND);
-    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-    m = (float)(s->regs[R_BRIGHTNESS] + 1) / 64.0f;
-    glColor4f(m, m, m, (float)(s->regs[R_ALPHA] + 1) / 64.0f);
-
-    /* Read the QEMU dest. framebuffer into the OpenGL framebuffer */
-    fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
-    fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, false);
-    if (fb == NULL) {
-        glDeleteTextures(1, &texture);
-        glXMakeContextCurrent(s->dpy, None, None, NULL);
-        glXDestroyPbuffer(s->dpy, pbuffer);
-        return;
-    }
-
-    glDrawPixels(s->regs[R_DSTHRES], s->regs[R_DSTVRES], GL_RGB,
-            GL_UNSIGNED_SHORT_5_6_5, fb);
-    cpu_physical_memory_unmap(fb, fb_len, 0, fb_len);
-    glViewport(0, 0, s->regs[R_DSTHRES], s->regs[R_DSTVRES]);
-    glMatrixMode(GL_PROJECTION);
-    glLoadIdentity();
-    glOrtho(0.0, s->regs[R_DSTHRES], 0.0, s->regs[R_DSTVRES], -1.0, 1.0);
-    glMatrixMode(GL_MODELVIEW);
-
-    /* Map the texture */
-    mesh_len = MESH_MAXSIZE*MESH_MAXSIZE*sizeof(struct vertex);
-    mesh = cpu_physical_memory_map(s->regs[R_VERTICESADDR], &mesh_len, false);
-    if (mesh == NULL) {
-        glDeleteTextures(1, &texture);
-        glXMakeContextCurrent(s->dpy, None, None, NULL);
-        glXDestroyPbuffer(s->dpy, pbuffer);
-        return;
-    }
-
-    tmu2_gl_map((struct vertex *)mesh,
-        s->regs[R_TEXHRES], s->regs[R_TEXVRES],
-        s->regs[R_HMESHLAST], s->regs[R_VMESHLAST],
-        s->regs[R_DSTHOFFSET], s->regs[R_DSTVOFFSET],
-        s->regs[R_DSTSQUAREW], s->regs[R_DSTSQUAREH]);
-    cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len);
-
-    /* Write back the OpenGL framebuffer to the QEMU framebuffer */
-    fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
-    fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, true);
-    if (fb == NULL) {
-        glDeleteTextures(1, &texture);
-        glXMakeContextCurrent(s->dpy, None, None, NULL);
-        glXDestroyPbuffer(s->dpy, pbuffer);
-        return;
-    }
-
-    glReadPixels(0, 0, s->regs[R_DSTHRES], s->regs[R_DSTVRES], GL_RGB,
-            GL_UNSIGNED_SHORT_5_6_5, fb);
-    cpu_physical_memory_unmap(fb, fb_len, 1, fb_len);
-
-    /* Free OpenGL allocs */
-    glDeleteTextures(1, &texture);
-    glXMakeContextCurrent(s->dpy, None, None, NULL);
-    glXDestroyPbuffer(s->dpy, pbuffer);
-
-    s->regs[R_CTL] &= ~CTL_START_BUSY;
-
-    trace_milkymist_tmu2_pulse_irq();
-    qemu_irq_pulse(s->irq);
-}
-
-static uint64_t tmu2_read(void *opaque, hwaddr addr,
-                          unsigned size)
-{
-    MilkymistTMU2State *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTL:
-    case R_HMESHLAST:
-    case R_VMESHLAST:
-    case R_BRIGHTNESS:
-    case R_CHROMAKEY:
-    case R_VERTICESADDR:
-    case R_TEXFBUF:
-    case R_TEXHRES:
-    case R_TEXVRES:
-    case R_TEXHMASK:
-    case R_TEXVMASK:
-    case R_DSTFBUF:
-    case R_DSTHRES:
-    case R_DSTVRES:
-    case R_DSTHOFFSET:
-    case R_DSTVOFFSET:
-    case R_DSTSQUAREW:
-    case R_DSTSQUAREH:
-    case R_ALPHA:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_tmu2: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_tmu2_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void tmu2_check_registers(MilkymistTMU2State *s)
-{
-    if (s->regs[R_BRIGHTNESS] > MAX_BRIGHTNESS) {
-        error_report("milkymist_tmu2: max brightness is %d", MAX_BRIGHTNESS);
-    }
-
-    if (s->regs[R_ALPHA] > MAX_ALPHA) {
-        error_report("milkymist_tmu2: max alpha is %d", MAX_ALPHA);
-    }
-
-    if (s->regs[R_VERTICESADDR] & 0x07) {
-        error_report("milkymist_tmu2: vertex mesh address has to be 64-bit "
-                "aligned");
-    }
-
-    if (s->regs[R_TEXFBUF] & 0x01) {
-        error_report("milkymist_tmu2: texture buffer address has to be "
-                "16-bit aligned");
-    }
-}
-
-static void tmu2_write(void *opaque, hwaddr addr, uint64_t value,
-                       unsigned size)
-{
-    MilkymistTMU2State *s = opaque;
-
-    trace_milkymist_tmu2_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTL:
-        s->regs[addr] = value;
-        if (value & CTL_START_BUSY) {
-            tmu2_start(s);
-        }
-        break;
-    case R_BRIGHTNESS:
-    case R_HMESHLAST:
-    case R_VMESHLAST:
-    case R_CHROMAKEY:
-    case R_VERTICESADDR:
-    case R_TEXFBUF:
-    case R_TEXHRES:
-    case R_TEXVRES:
-    case R_TEXHMASK:
-    case R_TEXVMASK:
-    case R_DSTFBUF:
-    case R_DSTHRES:
-    case R_DSTVRES:
-    case R_DSTHOFFSET:
-    case R_DSTVOFFSET:
-    case R_DSTSQUAREW:
-    case R_DSTSQUAREH:
-    case R_ALPHA:
-        s->regs[addr] = value;
-        break;
-
-    default:
-        error_report("milkymist_tmu2: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    tmu2_check_registers(s);
-}
-
-static const MemoryRegionOps tmu2_mmio_ops = {
-    .read = tmu2_read,
-    .write = tmu2_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_tmu2_reset(DeviceState *d)
-{
-    MilkymistTMU2State *s = MILKYMIST_TMU2(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-}
-
-static void milkymist_tmu2_init(Object *obj)
-{
-    MilkymistTMU2State *s = MILKYMIST_TMU2(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(dev, &s->irq);
-
-    memory_region_init_io(&s->regs_region, obj, &tmu2_mmio_ops, s,
-            "milkymist-tmu2", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->regs_region);
-}
-
-static void milkymist_tmu2_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistTMU2State *s = MILKYMIST_TMU2(dev);
-
-    if (tmu2_glx_init(s)) {
-        error_setg(errp, "tmu2_glx_init failed");
-    }
-}
-
-static const VMStateDescription vmstate_milkymist_tmu2 = {
-    .name = "milkymist-tmu2",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistTMU2State, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void milkymist_tmu2_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_tmu2_realize;
-    dc->reset = milkymist_tmu2_reset;
-    dc->vmsd = &vmstate_milkymist_tmu2;
-}
-
-static const TypeInfo milkymist_tmu2_info = {
-    .name          = TYPE_MILKYMIST_TMU2,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistTMU2State),
-    .instance_init = milkymist_tmu2_init,
-    .class_init    = milkymist_tmu2_class_init,
-};
-
-static void milkymist_tmu2_register_types(void)
-{
-    type_register_static(&milkymist_tmu2_info);
-}
-
-type_init(milkymist_tmu2_register_types)
-
-DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq)
-{
-    DeviceState *dev;
-    Display *d;
-    GLXFBConfig *configs;
-    int nelements;
-    int ver_major, ver_minor;
-
-    /* check that GLX will work */
-    d = XOpenDisplay(NULL);
-    if (d == NULL) {
-        return NULL;
-    }
-
-    if (!glXQueryVersion(d, &ver_major, &ver_minor)) {
-        /*
-         * Yeah, sometimes getting the GLX version can fail.
-         * Isn't X beautiful?
-         */
-        XCloseDisplay(d);
-        return NULL;
-    }
-
-    if ((ver_major < 1) || ((ver_major == 1) && (ver_minor < 3))) {
-        printf("Your GLX version is %d.%d,"
-          "but TMU emulation needs at least 1.3. TMU disabled.\n",
-          ver_major, ver_minor);
-        XCloseDisplay(d);
-        return NULL;
-    }
-
-    configs = glXChooseFBConfig(d, 0, glx_fbconfig_attr, &nelements);
-    if (configs == NULL) {
-        XCloseDisplay(d);
-        return NULL;
-    }
-
-    XFree(configs);
-    XCloseDisplay(d);
-
-    dev = qdev_new(TYPE_MILKYMIST_TMU2);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
-
-    return dev;
-}
diff --git a/hw/display/milkymist-vgafb.c b/hw/display/milkymist-vgafb.c
deleted file mode 100644
index e2c587e2dfc..00000000000
--- a/hw/display/milkymist-vgafb.c
+++ /dev/null
@@ -1,360 +0,0 @@
-
-/*
- *  QEMU model of the Milkymist VGA framebuffer.
- *
- *  Copyright (c) 2010-2012 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/vgafb.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/hw.h"
-#include "hw/qdev-properties.h"
-#include "hw/sysbus.h"
-#include "trace.h"
-#include "ui/console.h"
-#include "framebuffer.h"
-#include "ui/pixel_ops.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-#define BITS 8
-#include "migration/vmstate.h"
-#include "milkymist-vgafb_template.h"
-#define BITS 15
-#include "milkymist-vgafb_template.h"
-#define BITS 16
-#include "milkymist-vgafb_template.h"
-#define BITS 24
-#include "milkymist-vgafb_template.h"
-#define BITS 32
-#include "milkymist-vgafb_template.h"
-
-enum {
-    R_CTRL = 0,
-    R_HRES,
-    R_HSYNC_START,
-    R_HSYNC_END,
-    R_HSCAN,
-    R_VRES,
-    R_VSYNC_START,
-    R_VSYNC_END,
-    R_VSCAN,
-    R_BASEADDRESS,
-    R_BASEADDRESS_ACT,
-    R_BURST_COUNT,
-    R_DDC,
-    R_SOURCE_CLOCK,
-    R_MAX
-};
-
-enum {
-    CTRL_RESET = (1<<0),
-};
-
-#define TYPE_MILKYMIST_VGAFB "milkymist-vgafb"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistVgafbState, MILKYMIST_VGAFB)
-
-struct MilkymistVgafbState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-    MemoryRegionSection fbsection;
-    QemuConsole *con;
-
-    int invalidate;
-    uint32_t fb_offset;
-    uint32_t fb_mask;
-
-    uint32_t regs[R_MAX];
-};
-
-static int vgafb_enabled(MilkymistVgafbState *s)
-{
-    return !(s->regs[R_CTRL] & CTRL_RESET);
-}
-
-static void vgafb_update_display(void *opaque)
-{
-    MilkymistVgafbState *s = opaque;
-    SysBusDevice *sbd;
-    DisplaySurface *surface = qemu_console_surface(s->con);
-    int src_width;
-    int first = 0;
-    int last = 0;
-    drawfn fn;
-
-    if (!vgafb_enabled(s)) {
-        return;
-    }
-
-    sbd = SYS_BUS_DEVICE(s);
-    int dest_width = s->regs[R_HRES];
-
-    switch (surface_bits_per_pixel(surface)) {
-    case 0:
-        return;
-    case 8:
-        fn = draw_line_8;
-        break;
-    case 15:
-        fn = draw_line_15;
-        dest_width *= 2;
-        break;
-    case 16:
-        fn = draw_line_16;
-        dest_width *= 2;
-        break;
-    case 24:
-        fn = draw_line_24;
-        dest_width *= 3;
-        break;
-    case 32:
-        fn = draw_line_32;
-        dest_width *= 4;
-        break;
-    default:
-        hw_error("milkymist_vgafb: bad color depth\n");
-        break;
-    }
-
-    src_width = s->regs[R_HRES] * 2;
-    if (s->invalidate) {
-        framebuffer_update_memory_section(&s->fbsection,
-                                          sysbus_address_space(sbd),
-                                          s->regs[R_BASEADDRESS] + s->fb_offset,
-                                          s->regs[R_VRES], src_width);
-    }
-
-    framebuffer_update_display(surface, &s->fbsection,
-                               s->regs[R_HRES],
-                               s->regs[R_VRES],
-                               src_width,
-                               dest_width,
-                               0,
-                               s->invalidate,
-                               fn,
-                               NULL,
-                               &first, &last);
-
-    if (first >= 0) {
-        dpy_gfx_update(s->con, 0, first, s->regs[R_HRES], last - first + 1);
-    }
-    s->invalidate = 0;
-}
-
-static void vgafb_invalidate_display(void *opaque)
-{
-    MilkymistVgafbState *s = opaque;
-    s->invalidate = 1;
-}
-
-static void vgafb_resize(MilkymistVgafbState *s)
-{
-    if (!vgafb_enabled(s)) {
-        return;
-    }
-
-    qemu_console_resize(s->con, s->regs[R_HRES], s->regs[R_VRES]);
-    s->invalidate = 1;
-}
-
-static uint64_t vgafb_read(void *opaque, hwaddr addr,
-                           unsigned size)
-{
-    MilkymistVgafbState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTRL:
-    case R_HRES:
-    case R_HSYNC_START:
-    case R_HSYNC_END:
-    case R_HSCAN:
-    case R_VRES:
-    case R_VSYNC_START:
-    case R_VSYNC_END:
-    case R_VSCAN:
-    case R_BASEADDRESS:
-    case R_BURST_COUNT:
-    case R_DDC:
-    case R_SOURCE_CLOCK:
-        r = s->regs[addr];
-    break;
-    case R_BASEADDRESS_ACT:
-        r = s->regs[R_BASEADDRESS];
-    break;
-
-    default:
-        error_report("milkymist_vgafb: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_vgafb_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void vgafb_write(void *opaque, hwaddr addr, uint64_t value,
-                        unsigned size)
-{
-    MilkymistVgafbState *s = opaque;
-
-    trace_milkymist_vgafb_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTRL:
-        s->regs[addr] = value;
-        vgafb_resize(s);
-        break;
-    case R_HSYNC_START:
-    case R_HSYNC_END:
-    case R_HSCAN:
-    case R_VSYNC_START:
-    case R_VSYNC_END:
-    case R_VSCAN:
-    case R_BURST_COUNT:
-    case R_DDC:
-    case R_SOURCE_CLOCK:
-        s->regs[addr] = value;
-        break;
-    case R_BASEADDRESS:
-        if (value & 0x1f) {
-            error_report("milkymist_vgafb: framebuffer base address have to "
-                     "be 32 byte aligned");
-            break;
-        }
-        s->regs[addr] = value & s->fb_mask;
-        s->invalidate = 1;
-        break;
-    case R_HRES:
-    case R_VRES:
-        s->regs[addr] = value;
-        vgafb_resize(s);
-        break;
-    case R_BASEADDRESS_ACT:
-        error_report("milkymist_vgafb: write to read-only register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-
-    default:
-        error_report("milkymist_vgafb: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-}
-
-static const MemoryRegionOps vgafb_mmio_ops = {
-    .read = vgafb_read,
-    .write = vgafb_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_vgafb_reset(DeviceState *d)
-{
-    MilkymistVgafbState *s = MILKYMIST_VGAFB(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    /* defaults */
-    s->regs[R_CTRL] = CTRL_RESET;
-    s->regs[R_HRES] = 640;
-    s->regs[R_VRES] = 480;
-    s->regs[R_BASEADDRESS] = 0;
-}
-
-static const GraphicHwOps vgafb_ops = {
-    .invalidate  = vgafb_invalidate_display,
-    .gfx_update  = vgafb_update_display,
-};
-
-static void milkymist_vgafb_init(Object *obj)
-{
-    MilkymistVgafbState *s = MILKYMIST_VGAFB(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    memory_region_init_io(&s->regs_region, OBJECT(s), &vgafb_mmio_ops, s,
-            "milkymist-vgafb", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->regs_region);
-}
-
-static void milkymist_vgafb_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistVgafbState *s = MILKYMIST_VGAFB(dev);
-
-    s->con = graphic_console_init(dev, 0, &vgafb_ops, s);
-}
-
-static int vgafb_post_load(void *opaque, int version_id)
-{
-    vgafb_invalidate_display(opaque);
-    return 0;
-}
-
-static const VMStateDescription vmstate_milkymist_vgafb = {
-    .name = "milkymist-vgafb",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .post_load = vgafb_post_load,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistVgafbState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_vgafb_properties[] = {
-    DEFINE_PROP_UINT32("fb_offset", MilkymistVgafbState, fb_offset, 0x0),
-    DEFINE_PROP_UINT32("fb_mask", MilkymistVgafbState, fb_mask, 0xffffffff),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_vgafb_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->reset = milkymist_vgafb_reset;
-    dc->vmsd = &vmstate_milkymist_vgafb;
-    device_class_set_props(dc, milkymist_vgafb_properties);
-    dc->realize = milkymist_vgafb_realize;
-}
-
-static const TypeInfo milkymist_vgafb_info = {
-    .name          = TYPE_MILKYMIST_VGAFB,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistVgafbState),
-    .instance_init = milkymist_vgafb_init,
-    .class_init    = milkymist_vgafb_class_init,
-};
-
-static void milkymist_vgafb_register_types(void)
-{
-    type_register_static(&milkymist_vgafb_info);
-}
-
-type_init(milkymist_vgafb_register_types)
diff --git a/hw/display/milkymist-vgafb_template.h b/hw/display/milkymist-vgafb_template.h
deleted file mode 100644
index 96137f97093..00000000000
--- a/hw/display/milkymist-vgafb_template.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- *  QEMU model of the Milkymist VGA framebuffer.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#if BITS == 8
-#define COPY_PIXEL(to, r, g, b)                    \
-    do {                                           \
-        *to = rgb_to_pixel8(r, g, b);              \
-        to += 1;                                   \
-    } while (0)
-#elif BITS == 15
-#define COPY_PIXEL(to, r, g, b)                    \
-    do {                                           \
-        *(uint16_t *)to = rgb_to_pixel15(r, g, b); \
-        to += 2;                                   \
-    } while (0)
-#elif BITS == 16
-#define COPY_PIXEL(to, r, g, b)                    \
-    do {                                           \
-        *(uint16_t *)to = rgb_to_pixel16(r, g, b); \
-        to += 2;                                   \
-    } while (0)
-#elif BITS == 24
-#define COPY_PIXEL(to, r, g, b)                    \
-    do {                                           \
-        uint32_t tmp = rgb_to_pixel24(r, g, b);    \
-        *(to++) =         tmp & 0xff;              \
-        *(to++) =  (tmp >> 8) & 0xff;              \
-        *(to++) = (tmp >> 16) & 0xff;              \
-    } while (0)
-#elif BITS == 32
-#define COPY_PIXEL(to, r, g, b)                    \
-    do {                                           \
-        *(uint32_t *)to = rgb_to_pixel32(r, g, b); \
-        to += 4;                                   \
-    } while (0)
-#else
-#error unknown bit depth
-#endif
-
-static void glue(draw_line_, BITS)(void *opaque, uint8_t *d, const uint8_t *s,
-        int width, int deststep)
-{
-    uint16_t rgb565;
-    uint8_t r, g, b;
-
-    while (width--) {
-        rgb565 = lduw_be_p(s);
-        r = ((rgb565 >> 11) & 0x1f) << 3;
-        g = ((rgb565 >>  5) & 0x3f) << 2;
-        b = ((rgb565 >>  0) & 0x1f) << 3;
-        COPY_PIXEL(d, r, g, b);
-        s += 2;
-    }
-}
-
-#undef BITS
-#undef COPY_PIXEL
diff --git a/hw/display/next-fb.c b/hw/display/next-fb.c
index e2d895109db..dd6a1aa8aee 100644
--- a/hw/display/next-fb.c
+++ b/hw/display/next-fb.c
@@ -24,8 +24,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "ui/console.h"
-#include "hw/hw.h"
-#include "hw/boards.h"
 #include "hw/loader.h"
 #include "framebuffer.h"
 #include "ui/pixel_ops.h"
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 93907e82a33..29c80b4289b 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -30,7 +30,6 @@
 #include "qemu/module.h"
 #include "hw/qdev-properties.h"
 #include "sysemu/runstate.h"
-#include "migration/blocker.h"
 #include "migration/vmstate.h"
 #include "trace.h"
 
@@ -321,7 +320,7 @@ static ram_addr_t qxl_rom_size(void)
 #define QXL_ROM_SZ 8192
 
     QEMU_BUILD_BUG_ON(QXL_REQUIRED_SZ > QXL_ROM_SZ);
-    return QXL_ROM_SZ;
+    return QEMU_ALIGN_UP(QXL_REQUIRED_SZ, qemu_real_host_page_size);
 }
 
 static void init_qxl_rom(PCIQXLDevice *d)
@@ -666,30 +665,6 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext)
         qxl->guest_primary.commands++;
         qxl_track_command(qxl, ext);
         qxl_log_command(qxl, "cmd", ext);
-        {
-            /*
-             * Windows 8 drivers place qxl commands in the vram
-             * (instead of the ram) bar.  We can't live migrate such a
-             * guest, so add a migration blocker in case we detect
-             * this, to avoid triggering the assert in pre_save().
-             *
-             * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa
-             */
-            void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
-            if (msg != NULL && (
-                    msg < (void *)qxl->vga.vram_ptr ||
-                    msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) {
-                if (!qxl->migration_blocker) {
-                    Error *local_err = NULL;
-                    error_setg(&qxl->migration_blocker,
-                               "qxl: guest bug: command not in ram bar");
-                    migrate_add_blocker(qxl->migration_blocker, &local_err);
-                    if (local_err) {
-                        error_report_err(local_err);
-                    }
-                }
-            }
-        }
         trace_qxl_ring_command_get(qxl->id, qxl_mode_to_string(qxl->mode));
         return true;
     default:
@@ -1283,12 +1258,6 @@ static void qxl_hard_reset(PCIQXLDevice *d, int loadvm)
     qemu_spice_create_host_memslot(&d->ssd);
     qxl_soft_reset(d);
 
-    if (d->migration_blocker) {
-        migrate_del_blocker(d->migration_blocker);
-        error_free(d->migration_blocker);
-        d->migration_blocker = NULL;
-    }
-
     if (startstop) {
         qemu_spice_display_start();
     }
@@ -2283,7 +2252,9 @@ static int qxl_pre_save(void *opaque)
     } else {
         d->last_release_offset = (uint8_t *)d->last_release - ram_start;
     }
-    assert(d->last_release_offset < d->vga.vram_size);
+    if (d->last_release_offset >= d->vga.vram_size) {
+        return 1;
+    }
 
     return 0;
 }
@@ -2384,7 +2355,7 @@ static bool qxl_monitors_config_needed(void *opaque)
 }
 
 
-static VMStateDescription qxl_memslot = {
+static const VMStateDescription qxl_memslot = {
     .name               = "qxl-memslot",
     .version_id         = QXL_SAVE_VERSION,
     .minimum_version_id = QXL_SAVE_VERSION,
@@ -2396,7 +2367,7 @@ static VMStateDescription qxl_memslot = {
     }
 };
 
-static VMStateDescription qxl_surface = {
+static const VMStateDescription qxl_surface = {
     .name               = "qxl-surface",
     .version_id         = QXL_SAVE_VERSION,
     .minimum_version_id = QXL_SAVE_VERSION,
@@ -2414,7 +2385,7 @@ static VMStateDescription qxl_surface = {
     }
 };
 
-static VMStateDescription qxl_vmstate_monitors_config = {
+static const VMStateDescription qxl_vmstate_monitors_config = {
     .name               = "qxl/monitors-config",
     .version_id         = 1,
     .minimum_version_id = 1,
@@ -2425,7 +2396,7 @@ static VMStateDescription qxl_vmstate_monitors_config = {
     },
 };
 
-static VMStateDescription qxl_vmstate = {
+static const VMStateDescription qxl_vmstate = {
     .name               = "qxl",
     .version_id         = QXL_SAVE_VERSION,
     .minimum_version_id = QXL_SAVE_VERSION,
@@ -2522,6 +2493,7 @@ static const TypeInfo qxl_primary_info = {
     .parent        = TYPE_PCI_QXL,
     .class_init    = qxl_primary_class_init,
 };
+module_obj("qxl-vga");
 
 static void qxl_secondary_class_init(ObjectClass *klass, void *data)
 {
@@ -2538,6 +2510,7 @@ static const TypeInfo qxl_secondary_info = {
     .parent        = TYPE_PCI_QXL,
     .class_init    = qxl_secondary_class_init,
 };
+module_obj("qxl");
 
 static void qxl_register_types(void)
 {
@@ -2547,3 +2520,5 @@ static void qxl_register_types(void)
 }
 
 type_init(qxl_register_types)
+
+module_dep("ui-spice-core");
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index 379d3304abc..30d21f4d0bd 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -39,7 +39,6 @@ struct PCIQXLDevice {
     uint32_t           cmdlog;
 
     uint32_t           guest_bug;
-    Error              *migration_blocker;
 
     enum qxl_mode      mode;
     uint32_t           cmdflags;
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 8789722ef27..663c37e7f28 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -1033,16 +1033,18 @@ static void sm501_i2c_write(void *opaque, hwaddr addr, uint64_t value,
     case SM501_I2C_CONTROL:
         if (value & SM501_I2C_CONTROL_ENABLE) {
             if (value & SM501_I2C_CONTROL_START) {
+                bool is_recv = s->i2c_addr & 1;
                 int res = i2c_start_transfer(s->i2c_bus,
                                              s->i2c_addr >> 1,
-                                             s->i2c_addr & 1);
-                s->i2c_status |= (res ? SM501_I2C_STATUS_ERROR : 0);
-                if (!res) {
+                                             is_recv);
+                if (res) {
+                    s->i2c_status |= SM501_I2C_STATUS_ERROR;
+                } else {
                     int i;
                     for (i = 0; i <= s->i2c_byte_count; i++) {
-                        res = i2c_send_recv(s->i2c_bus, &s->i2c_data[i],
-                                            !(s->i2c_addr & 1));
-                        if (res) {
+                        if (is_recv) {
+                            s->i2c_data[i] = i2c_recv(s->i2c_bus);
+                        } else if (i2c_send(s->i2c_bus, s->i2c_data[i]) < 0) {
                             s->i2c_status |= SM501_I2C_STATUS_ERROR;
                             return;
                         }
@@ -1826,7 +1828,7 @@ static void sm501_init(SM501State *s, DeviceState *dev,
     s->i2c_bus = i2c_init_bus(dev, "sm501.i2c");
     /* ddc */
     I2CDDCState *ddc = I2CDDC(qdev_new(TYPE_I2CDDC));
-    i2c_set_slave_address(I2C_SLAVE(ddc), 0x50);
+    i2c_slave_set_address(I2C_SLAVE(ddc), 0x50);
     qdev_realize_and_unref(DEVICE(ddc), BUS(s->i2c_bus), &error_abort);
 
     /* mmio */
diff --git a/hw/display/trace-events b/hw/display/trace-events
index 957b8ba9943..3a7a2c957f4 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # jazz_led.c
 jazz_led_read(uint64_t addr, uint8_t val) "read addr=0x%"PRIx64": 0x%x"
@@ -13,16 +13,6 @@ xenfb_input_connected(void *xendev, int abs_pointer_wanted) "%p abs %d"
 g364fb_read(uint64_t addr, uint32_t val) "read addr=0x%"PRIx64": 0x%x"
 g364fb_write(uint64_t addr, uint32_t new) "write addr=0x%"PRIx64": 0x%x"
 
-# milkymist-tmu2.c
-milkymist_tmu2_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_tmu2_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_tmu2_start(void) "Start TMU"
-milkymist_tmu2_pulse_irq(void) "Pulse IRQ"
-
-# milkymist-vgafb.c
-milkymist_vgafb_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-
 # vmware_vga.c
 vmware_value_read(uint32_t index, uint32_t value) "index %d, value 0x%x"
 vmware_value_write(uint32_t index, uint32_t value) "index %d, value 0x%x"
@@ -40,8 +30,10 @@ virtio_gpu_features(bool virgl) "virgl %d"
 virtio_gpu_cmd_get_display_info(void) ""
 virtio_gpu_cmd_get_edid(uint32_t scanout) "scanout %d"
 virtio_gpu_cmd_set_scanout(uint32_t id, uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "id %d, res 0x%x, w %d, h %d, x %d, y %d"
+virtio_gpu_cmd_set_scanout_blob(uint32_t id, uint32_t res, uint32_t w, uint32_t h, uint32_t x, uint32_t y) "id %d, res 0x%x, w %d, h %d, x %d, y %d"
 virtio_gpu_cmd_res_create_2d(uint32_t res, uint32_t fmt, uint32_t w, uint32_t h) "res 0x%x, fmt 0x%x, w %d, h %d"
 virtio_gpu_cmd_res_create_3d(uint32_t res, uint32_t fmt, uint32_t w, uint32_t h, uint32_t d) "res 0x%x, fmt 0x%x, w %d, h %d, d %d"
+virtio_gpu_cmd_res_create_blob(uint32_t res, uint64_t size) "res 0x%x, size %" PRId64
 virtio_gpu_cmd_res_unref(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_back_attach(uint32_t res) "res 0x%x"
 virtio_gpu_cmd_res_back_detach(uint32_t res) "res 0x%x"
@@ -175,3 +167,10 @@ sm501_disp_ctrl_read(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x"
 sm501_disp_ctrl_write(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x"
 sm501_2d_engine_read(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x"
 sm501_2d_engine_write(uint32_t addr, uint32_t val) "addr=0x%x, val=0x%x"
+
+# macfb.c
+macfb_ctrl_read(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%"PRIx64 " value 0x%"PRIx64 " size %u"
+macfb_ctrl_write(uint64_t addr, uint64_t value, unsigned int size) "addr 0x%"PRIx64 " value 0x%"PRIx64 " size %u"
+macfb_sense_read(uint32_t value) "video sense: 0x%"PRIx32
+macfb_sense_write(uint32_t value) "video sense: 0x%"PRIx32
+macfb_update_mode(uint32_t width, uint32_t height, uint8_t depth) "setting mode to width %"PRId32 " height %"PRId32 " size %d"
diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
index 48d29630ab7..62fb5c38c1f 100644
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -49,7 +49,7 @@ struct PCIVGAState {
     qemu_edid_info edid_info;
     MemoryRegion mmio;
     MemoryRegion mrs[4];
-    uint8_t edid[256];
+    uint8_t edid[384];
 };
 
 #define TYPE_PCI_VGA "pci-vga"
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 836ad50c7b6..9d1f66af402 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -39,6 +39,8 @@
 //#define DEBUG_VGA_MEM
 //#define DEBUG_VGA_REG
 
+bool have_vga = true;
+
 /* 16 state changes per vertical frame @60 Hz */
 #define VGA_TEXT_CURSOR_PERIOD_MS       (1000 * 2 * 16 / 60)
 
@@ -750,7 +752,8 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
                 val == VBE_DISPI_ID1 ||
                 val == VBE_DISPI_ID2 ||
                 val == VBE_DISPI_ID3 ||
-                val == VBE_DISPI_ID4) {
+                val == VBE_DISPI_ID4 ||
+                val == VBE_DISPI_ID5) {
                 s->vbe_regs[s->vbe_index] = val;
             }
             break;
diff --git a/hw/display/vhost-user-gpu-pci.c b/hw/display/vhost-user-gpu-pci.c
index a02b23ecaf1..daefcf71015 100644
--- a/hw/display/vhost-user-gpu-pci.c
+++ b/hw/display/vhost-user-gpu-pci.c
@@ -43,6 +43,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_gpu_pci_info = {
     .instance_size = sizeof(VhostUserGPUPCI),
     .instance_init = vhost_user_gpu_pci_initfn,
 };
+module_obj(TYPE_VHOST_USER_GPU_PCI);
 
 static void vhost_user_gpu_pci_register_types(void)
 {
diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 6cdaa1c73b9..49df56cd14e 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -415,14 +415,16 @@ vhost_user_gpu_get_config(VirtIODevice *vdev, uint8_t *config_data)
     VirtIOGPUBase *b = VIRTIO_GPU_BASE(vdev);
     struct virtio_gpu_config *vgconfig =
         (struct virtio_gpu_config *)config_data;
+    Error *local_err = NULL;
     int ret;
 
     memset(config_data, 0, sizeof(struct virtio_gpu_config));
 
     ret = vhost_dev_get_config(&g->vhost->dev,
-                               config_data, sizeof(struct virtio_gpu_config));
+                               config_data, sizeof(struct virtio_gpu_config),
+                               &local_err);
     if (ret) {
-        error_report("vhost-user-gpu: get device config space failed");
+        error_report_err(local_err);
         return;
     }
 
@@ -596,6 +598,7 @@ static const TypeInfo vhost_user_gpu_info = {
     .instance_finalize = vhost_user_gpu_instance_finalize,
     .class_init = vhost_user_gpu_class_init,
 };
+module_obj(TYPE_VHOST_USER_GPU);
 
 static void vhost_user_gpu_register_types(void)
 {
diff --git a/hw/display/vhost-user-vga.c b/hw/display/vhost-user-vga.c
index a34a99856d7..072c9c65bc7 100644
--- a/hw/display/vhost-user-vga.c
+++ b/hw/display/vhost-user-vga.c
@@ -44,6 +44,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_vga_info = {
     .instance_size = sizeof(VhostUserVGA),
     .instance_init = vhost_user_vga_inst_initfn,
 };
+module_obj(TYPE_VHOST_USER_VGA);
 
 static void vhost_user_vga_register_types(void)
 {
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
deleted file mode 100644
index d98964858e1..00000000000
--- a/hw/display/virtio-gpu-3d.c
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * Virtio GPU Device
- *
- * Copyright Red Hat, Inc. 2013-2014
- *
- * Authors:
- *     Dave Airlie <airlied@redhat.com>
- *     Gerd Hoffmann <kraxel@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/iov.h"
-#include "trace.h"
-#include "hw/virtio/virtio.h"
-#include "hw/virtio/virtio-gpu.h"
-
-#include <virglrenderer.h>
-
-static struct virgl_renderer_callbacks virtio_gpu_3d_cbs;
-
-static void virgl_cmd_create_resource_2d(VirtIOGPU *g,
-                                         struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_create_2d c2d;
-    struct virgl_renderer_resource_create_args args;
-
-    VIRTIO_GPU_FILL_CMD(c2d);
-    trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format,
-                                       c2d.width, c2d.height);
-
-    args.handle = c2d.resource_id;
-    args.target = 2;
-    args.format = c2d.format;
-    args.bind = (1 << 1);
-    args.width = c2d.width;
-    args.height = c2d.height;
-    args.depth = 1;
-    args.array_size = 1;
-    args.last_level = 0;
-    args.nr_samples = 0;
-    args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
-    virgl_renderer_resource_create(&args, NULL, 0);
-}
-
-static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
-                                         struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_create_3d c3d;
-    struct virgl_renderer_resource_create_args args;
-
-    VIRTIO_GPU_FILL_CMD(c3d);
-    trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format,
-                                       c3d.width, c3d.height, c3d.depth);
-
-    args.handle = c3d.resource_id;
-    args.target = c3d.target;
-    args.format = c3d.format;
-    args.bind = c3d.bind;
-    args.width = c3d.width;
-    args.height = c3d.height;
-    args.depth = c3d.depth;
-    args.array_size = c3d.array_size;
-    args.last_level = c3d.last_level;
-    args.nr_samples = c3d.nr_samples;
-    args.flags = c3d.flags;
-    virgl_renderer_resource_create(&args, NULL, 0);
-}
-
-static void virgl_cmd_resource_unref(VirtIOGPU *g,
-                                     struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_unref unref;
-    struct iovec *res_iovs = NULL;
-    int num_iovs = 0;
-
-    VIRTIO_GPU_FILL_CMD(unref);
-    trace_virtio_gpu_cmd_res_unref(unref.resource_id);
-
-    virgl_renderer_resource_detach_iov(unref.resource_id,
-                                       &res_iovs,
-                                       &num_iovs);
-    if (res_iovs != NULL && num_iovs != 0) {
-        virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
-    }
-    virgl_renderer_resource_unref(unref.resource_id);
-}
-
-static void virgl_cmd_context_create(VirtIOGPU *g,
-                                     struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_ctx_create cc;
-
-    VIRTIO_GPU_FILL_CMD(cc);
-    trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id,
-                                    cc.debug_name);
-
-    virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
-                                  cc.debug_name);
-}
-
-static void virgl_cmd_context_destroy(VirtIOGPU *g,
-                                      struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_ctx_destroy cd;
-
-    VIRTIO_GPU_FILL_CMD(cd);
-    trace_virtio_gpu_cmd_ctx_destroy(cd.hdr.ctx_id);
-
-    virgl_renderer_context_destroy(cd.hdr.ctx_id);
-}
-
-static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y,
-                                int width, int height)
-{
-    if (!g->parent_obj.scanout[idx].con) {
-        return;
-    }
-
-    dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height);
-}
-
-static void virgl_cmd_resource_flush(VirtIOGPU *g,
-                                     struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_flush rf;
-    int i;
-
-    VIRTIO_GPU_FILL_CMD(rf);
-    trace_virtio_gpu_cmd_res_flush(rf.resource_id,
-                                   rf.r.width, rf.r.height, rf.r.x, rf.r.y);
-
-    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
-        if (g->parent_obj.scanout[i].resource_id != rf.resource_id) {
-            continue;
-        }
-        virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height);
-    }
-}
-
-static void virgl_cmd_set_scanout(VirtIOGPU *g,
-                                  struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_set_scanout ss;
-    struct virgl_renderer_resource_info info;
-    int ret;
-
-    VIRTIO_GPU_FILL_CMD(ss);
-    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
-                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);
-
-    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
-                      __func__, ss.scanout_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
-        return;
-    }
-    g->parent_obj.enable = 1;
-
-    memset(&info, 0, sizeof(info));
-
-    if (ss.resource_id && ss.r.width && ss.r.height) {
-        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
-        if (ret == -1) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                          "%s: illegal resource specified %d\n",
-                          __func__, ss.resource_id);
-            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
-            return;
-        }
-        qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con,
-                            ss.r.width, ss.r.height);
-        virgl_renderer_force_ctx_0();
-        dpy_gl_scanout_texture(
-            g->parent_obj.scanout[ss.scanout_id].con, info.tex_id,
-            info.flags & 1 /* FIXME: Y_0_TOP */,
-            info.width, info.height,
-            ss.r.x, ss.r.y, ss.r.width, ss.r.height);
-    } else {
-        dpy_gfx_replace_surface(
-            g->parent_obj.scanout[ss.scanout_id].con, NULL);
-        dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con);
-    }
-    g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id;
-}
-
-static void virgl_cmd_submit_3d(VirtIOGPU *g,
-                                struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_cmd_submit cs;
-    void *buf;
-    size_t s;
-
-    VIRTIO_GPU_FILL_CMD(cs);
-    trace_virtio_gpu_cmd_ctx_submit(cs.hdr.ctx_id, cs.size);
-
-    buf = g_malloc(cs.size);
-    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
-                   sizeof(cs), buf, cs.size);
-    if (s != cs.size) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: size mismatch (%zd/%d)",
-                      __func__, s, cs.size);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
-        goto out;
-    }
-
-    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
-        g->stats.req_3d++;
-        g->stats.bytes_3d += cs.size;
-    }
-
-    virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
-
-out:
-    g_free(buf);
-}
-
-static void virgl_cmd_transfer_to_host_2d(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_transfer_to_host_2d t2d;
-    struct virtio_gpu_box box;
-
-    VIRTIO_GPU_FILL_CMD(t2d);
-    trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id);
-
-    box.x = t2d.r.x;
-    box.y = t2d.r.y;
-    box.z = 0;
-    box.w = t2d.r.width;
-    box.h = t2d.r.height;
-    box.d = 1;
-
-    virgl_renderer_transfer_write_iov(t2d.resource_id,
-                                      0,
-                                      0,
-                                      0,
-                                      0,
-                                      (struct virgl_box *)&box,
-                                      t2d.offset, NULL, 0);
-}
-
-static void virgl_cmd_transfer_to_host_3d(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_transfer_host_3d t3d;
-
-    VIRTIO_GPU_FILL_CMD(t3d);
-    trace_virtio_gpu_cmd_res_xfer_toh_3d(t3d.resource_id);
-
-    virgl_renderer_transfer_write_iov(t3d.resource_id,
-                                      t3d.hdr.ctx_id,
-                                      t3d.level,
-                                      t3d.stride,
-                                      t3d.layer_stride,
-                                      (struct virgl_box *)&t3d.box,
-                                      t3d.offset, NULL, 0);
-}
-
-static void
-virgl_cmd_transfer_from_host_3d(VirtIOGPU *g,
-                                struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_transfer_host_3d tf3d;
-
-    VIRTIO_GPU_FILL_CMD(tf3d);
-    trace_virtio_gpu_cmd_res_xfer_fromh_3d(tf3d.resource_id);
-
-    virgl_renderer_transfer_read_iov(tf3d.resource_id,
-                                     tf3d.hdr.ctx_id,
-                                     tf3d.level,
-                                     tf3d.stride,
-                                     tf3d.layer_stride,
-                                     (struct virgl_box *)&tf3d.box,
-                                     tf3d.offset, NULL, 0);
-}
-
-
-static void virgl_resource_attach_backing(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_attach_backing att_rb;
-    struct iovec *res_iovs;
-    int ret;
-
-    VIRTIO_GPU_FILL_CMD(att_rb);
-    trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id);
-
-    ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs);
-    if (ret != 0) {
-        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
-        return;
-    }
-
-    ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
-                                             res_iovs, att_rb.nr_entries);
-
-    if (ret != 0)
-        virtio_gpu_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries);
-}
-
-static void virgl_resource_detach_backing(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_resource_detach_backing detach_rb;
-    struct iovec *res_iovs = NULL;
-    int num_iovs = 0;
-
-    VIRTIO_GPU_FILL_CMD(detach_rb);
-    trace_virtio_gpu_cmd_res_back_detach(detach_rb.resource_id);
-
-    virgl_renderer_resource_detach_iov(detach_rb.resource_id,
-                                       &res_iovs,
-                                       &num_iovs);
-    if (res_iovs == NULL || num_iovs == 0) {
-        return;
-    }
-    virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
-}
-
-
-static void virgl_cmd_ctx_attach_resource(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_ctx_resource att_res;
-
-    VIRTIO_GPU_FILL_CMD(att_res);
-    trace_virtio_gpu_cmd_ctx_res_attach(att_res.hdr.ctx_id,
-                                        att_res.resource_id);
-
-    virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
-}
-
-static void virgl_cmd_ctx_detach_resource(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_ctx_resource det_res;
-
-    VIRTIO_GPU_FILL_CMD(det_res);
-    trace_virtio_gpu_cmd_ctx_res_detach(det_res.hdr.ctx_id,
-                                        det_res.resource_id);
-
-    virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
-}
-
-static void virgl_cmd_get_capset_info(VirtIOGPU *g,
-                                      struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_get_capset_info info;
-    struct virtio_gpu_resp_capset_info resp;
-
-    VIRTIO_GPU_FILL_CMD(info);
-
-    memset(&resp, 0, sizeof(resp));
-    if (info.capset_index == 0) {
-        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
-        virgl_renderer_get_cap_set(resp.capset_id,
-                                   &resp.capset_max_version,
-                                   &resp.capset_max_size);
-    } else if (info.capset_index == 1) {
-        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2;
-        virgl_renderer_get_cap_set(resp.capset_id,
-                                   &resp.capset_max_version,
-                                   &resp.capset_max_size);
-    } else {
-        resp.capset_max_version = 0;
-        resp.capset_max_size = 0;
-    }
-    resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
-    virtio_gpu_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
-}
-
-static void virgl_cmd_get_capset(VirtIOGPU *g,
-                                 struct virtio_gpu_ctrl_command *cmd)
-{
-    struct virtio_gpu_get_capset gc;
-    struct virtio_gpu_resp_capset *resp;
-    uint32_t max_ver, max_size;
-    VIRTIO_GPU_FILL_CMD(gc);
-
-    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
-                               &max_size);
-    if (!max_size) {
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
-        return;
-    }
-
-    resp = g_malloc0(sizeof(*resp) + max_size);
-    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
-    virgl_renderer_fill_caps(gc.capset_id,
-                             gc.capset_version,
-                             (void *)resp->capset_data);
-    virtio_gpu_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
-    g_free(resp);
-}
-
-void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
-                                      struct virtio_gpu_ctrl_command *cmd)
-{
-    VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
-
-    virgl_renderer_force_ctx_0();
-    switch (cmd->cmd_hdr.type) {
-    case VIRTIO_GPU_CMD_CTX_CREATE:
-        virgl_cmd_context_create(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_CTX_DESTROY:
-        virgl_cmd_context_destroy(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
-        virgl_cmd_create_resource_2d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
-        virgl_cmd_create_resource_3d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_SUBMIT_3D:
-        virgl_cmd_submit_3d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
-        virgl_cmd_transfer_to_host_2d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
-        virgl_cmd_transfer_to_host_3d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
-        virgl_cmd_transfer_from_host_3d(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
-        virgl_resource_attach_backing(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
-        virgl_resource_detach_backing(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_SET_SCANOUT:
-        virgl_cmd_set_scanout(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
-        virgl_cmd_resource_flush(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
-        virgl_cmd_resource_unref(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
-        /* TODO add security */
-        virgl_cmd_ctx_attach_resource(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
-        /* TODO add security */
-        virgl_cmd_ctx_detach_resource(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
-        virgl_cmd_get_capset_info(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_GET_CAPSET:
-        virgl_cmd_get_capset(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
-        virtio_gpu_get_display_info(g, cmd);
-        break;
-    case VIRTIO_GPU_CMD_GET_EDID:
-        virtio_gpu_get_edid(g, cmd);
-        break;
-    default:
-        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
-        break;
-    }
-
-    if (cmd->finished) {
-        return;
-    }
-    if (cmd->error) {
-        fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\n", __func__,
-                cmd->cmd_hdr.type, cmd->error);
-        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error);
-        return;
-    }
-    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
-        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
-        return;
-    }
-
-    trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
-    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
-}
-
-static void virgl_write_fence(void *opaque, uint32_t fence)
-{
-    VirtIOGPU *g = opaque;
-    struct virtio_gpu_ctrl_command *cmd, *tmp;
-
-    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
-        /*
-         * the guest can end up emitting fences out of order
-         * so we should check all fenced cmds not just the first one.
-         */
-        if (cmd->cmd_hdr.fence_id > fence) {
-            continue;
-        }
-        trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
-        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
-        QTAILQ_REMOVE(&g->fenceq, cmd, next);
-        g_free(cmd);
-        g->inflight--;
-        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
-            fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
-        }
-    }
-}
-
-static virgl_renderer_gl_context
-virgl_create_context(void *opaque, int scanout_idx,
-                     struct virgl_renderer_gl_ctx_param *params)
-{
-    VirtIOGPU *g = opaque;
-    QEMUGLContext ctx;
-    QEMUGLParams qparams;
-
-    qparams.major_ver = params->major_ver;
-    qparams.minor_ver = params->minor_ver;
-
-    ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams);
-    return (virgl_renderer_gl_context)ctx;
-}
-
-static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx)
-{
-    VirtIOGPU *g = opaque;
-    QEMUGLContext qctx = (QEMUGLContext)ctx;
-
-    dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx);
-}
-
-static int virgl_make_context_current(void *opaque, int scanout_idx,
-                                      virgl_renderer_gl_context ctx)
-{
-    VirtIOGPU *g = opaque;
-    QEMUGLContext qctx = (QEMUGLContext)ctx;
-
-    return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con,
-                                   qctx);
-}
-
-static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
-    .version             = 1,
-    .write_fence         = virgl_write_fence,
-    .create_gl_context   = virgl_create_context,
-    .destroy_gl_context  = virgl_destroy_context,
-    .make_current        = virgl_make_context_current,
-};
-
-static void virtio_gpu_print_stats(void *opaque)
-{
-    VirtIOGPU *g = opaque;
-
-    if (g->stats.requests) {
-        fprintf(stderr, "stats: vq req %4d, %3d -- 3D %4d (%5d)\n",
-                g->stats.requests,
-                g->stats.max_inflight,
-                g->stats.req_3d,
-                g->stats.bytes_3d);
-        g->stats.requests     = 0;
-        g->stats.max_inflight = 0;
-        g->stats.req_3d       = 0;
-        g->stats.bytes_3d     = 0;
-    } else {
-        fprintf(stderr, "stats: idle\r");
-    }
-    timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
-}
-
-static void virtio_gpu_fence_poll(void *opaque)
-{
-    VirtIOGPU *g = opaque;
-
-    virgl_renderer_poll();
-    virtio_gpu_process_cmdq(g);
-    if (!QTAILQ_EMPTY(&g->cmdq) || !QTAILQ_EMPTY(&g->fenceq)) {
-        timer_mod(g->fence_poll, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10);
-    }
-}
-
-void virtio_gpu_virgl_fence_poll(VirtIOGPU *g)
-{
-    virtio_gpu_fence_poll(g);
-}
-
-void virtio_gpu_virgl_reset(VirtIOGPU *g)
-{
-    int i;
-
-    virgl_renderer_reset();
-    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
-        dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
-        dpy_gl_scanout_disable(g->parent_obj.scanout[i].con);
-    }
-}
-
-int virtio_gpu_virgl_init(VirtIOGPU *g)
-{
-    int ret;
-
-    ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs);
-    if (ret != 0) {
-        return ret;
-    }
-
-    g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL,
-                                 virtio_gpu_fence_poll, g);
-
-    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
-        g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL,
-                                      virtio_gpu_print_stats, g);
-        timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
-    }
-    return 0;
-}
-
-int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
-{
-    uint32_t capset2_max_ver, capset2_max_size;
-    virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
-                              &capset2_max_ver,
-                              &capset2_max_size);
-
-    return capset2_max_ver ? 2 : 1;
-}
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 25f8920fdb6..c8da4806e0b 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -25,7 +25,6 @@ virtio_gpu_base_reset(VirtIOGPUBase *g)
     int i;
 
     g->enable = 0;
-    g->use_virgl_renderer = false;
 
     for (i = 0; i < g->conf.max_outputs; i++) {
         g->scanout[i].resource_id = 0;
@@ -162,7 +161,6 @@ virtio_gpu_base_device_realize(DeviceState *qdev,
         return false;
     }
 
-    g->use_virgl_renderer = false;
     if (virtio_gpu_virgl_enabled(g->conf)) {
         error_setg(&g->migration_blocker, "virgl is not yet migratable");
         if (migrate_add_blocker(g->migration_blocker, errp) < 0) {
@@ -210,6 +208,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features,
     if (virtio_gpu_edid_enabled(g->conf)) {
         features |= (1 << VIRTIO_GPU_F_EDID);
     }
+    if (virtio_gpu_blob_enabled(g->conf)) {
+        features |= (1 << VIRTIO_GPU_F_RESOURCE_BLOB);
+    }
 
     return features;
 }
@@ -218,10 +219,8 @@ static void
 virtio_gpu_base_set_features(VirtIODevice *vdev, uint64_t features)
 {
     static const uint32_t virgl = (1 << VIRTIO_GPU_F_VIRGL);
-    VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
 
-    g->use_virgl_renderer = ((features & virgl) == virgl);
-    trace_virtio_gpu_features(g->use_virgl_renderer);
+    trace_virtio_gpu_features(((features & virgl) == virgl));
 }
 
 static void
@@ -257,6 +256,7 @@ static const TypeInfo virtio_gpu_base_info = {
     .class_init = virtio_gpu_base_class_init,
     .abstract = true
 };
+module_obj(TYPE_VIRTIO_GPU_BASE);
 
 static void
 virtio_register_types(void)
diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
new file mode 100644
index 00000000000..6cc4313b1af
--- /dev/null
+++ b/hw/display/virtio-gpu-gl.c
@@ -0,0 +1,171 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "hw/qdev-properties.h"
+
+#include <virglrenderer.h>
+
+static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
+                                             struct virtio_gpu_scanout *s,
+                                             uint32_t resource_id)
+{
+    uint32_t width, height;
+    uint32_t pixels, *data;
+
+    data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
+    if (!data) {
+        return;
+    }
+
+    if (width != s->current_cursor->width ||
+        height != s->current_cursor->height) {
+        free(data);
+        return;
+    }
+
+    pixels = s->current_cursor->width * s->current_cursor->height;
+    memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t));
+    free(data);
+}
+
+static void virtio_gpu_gl_flushed(VirtIOGPUBase *b)
+{
+    VirtIOGPU *g = VIRTIO_GPU(b);
+
+    virtio_gpu_process_cmdq(g);
+}
+
+static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
+    struct virtio_gpu_ctrl_command *cmd;
+
+    if (!virtio_queue_ready(vq)) {
+        return;
+    }
+
+    if (!gl->renderer_inited) {
+        virtio_gpu_virgl_init(g);
+        gl->renderer_inited = true;
+    }
+    if (gl->renderer_reset) {
+        gl->renderer_reset = false;
+        virtio_gpu_virgl_reset(g);
+    }
+
+    cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
+    while (cmd) {
+        cmd->vq = vq;
+        cmd->error = 0;
+        cmd->finished = false;
+        QTAILQ_INSERT_TAIL(&g->cmdq, cmd, next);
+        cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
+    }
+
+    virtio_gpu_process_cmdq(g);
+    virtio_gpu_virgl_fence_poll(g);
+}
+
+static void virtio_gpu_gl_reset(VirtIODevice *vdev)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
+
+    virtio_gpu_reset(vdev);
+
+    /*
+     * GL functions must be called with the associated GL context in main
+     * thread, and when the renderer is unblocked.
+     */
+    if (gl->renderer_inited && !gl->renderer_reset) {
+        virtio_gpu_virgl_reset_scanout(g);
+        gl->renderer_reset = true;
+    }
+}
+
+static void virtio_gpu_gl_device_realize(DeviceState *qdev, Error **errp)
+{
+    VirtIOGPU *g = VIRTIO_GPU(qdev);
+
+#if defined(HOST_WORDS_BIGENDIAN)
+    error_setg(errp, "virgl is not supported on bigendian platforms");
+    return;
+#endif
+
+    if (!object_resolve_path_type("", TYPE_VIRTIO_GPU_GL, NULL)) {
+        error_setg(errp, "at most one %s device is permitted", TYPE_VIRTIO_GPU_GL);
+        return;
+    }
+
+    if (!display_opengl) {
+        error_setg(errp, "opengl is not available");
+        return;
+    }
+
+    g->parent_obj.conf.flags |= (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
+    VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
+        virtio_gpu_virgl_get_num_capsets(g);
+
+    virtio_gpu_device_realize(qdev, errp);
+}
+
+static Property virtio_gpu_gl_properties[] = {
+    DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
+                    VIRTIO_GPU_FLAG_STATS_ENABLED, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_gpu_gl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    VirtIOGPUBaseClass *vbc = VIRTIO_GPU_BASE_CLASS(klass);
+    VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass);
+
+    vbc->gl_flushed = virtio_gpu_gl_flushed;
+    vgc->handle_ctrl = virtio_gpu_gl_handle_ctrl;
+    vgc->process_cmd = virtio_gpu_virgl_process_cmd;
+    vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data;
+
+    vdc->realize = virtio_gpu_gl_device_realize;
+    vdc->reset = virtio_gpu_gl_reset;
+    device_class_set_props(dc, virtio_gpu_gl_properties);
+}
+
+static const TypeInfo virtio_gpu_gl_info = {
+    .name = TYPE_VIRTIO_GPU_GL,
+    .parent = TYPE_VIRTIO_GPU,
+    .instance_size = sizeof(VirtIOGPUGL),
+    .class_init = virtio_gpu_gl_class_init,
+};
+module_obj(TYPE_VIRTIO_GPU_GL);
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_gpu_gl_info);
+}
+
+type_init(virtio_register_types)
+
+module_dep("hw-display-virtio-gpu");
diff --git a/hw/display/virtio-gpu-pci-gl.c b/hw/display/virtio-gpu-pci-gl.c
new file mode 100644
index 00000000000..99b14a07185
--- /dev/null
+++ b/hw/display/virtio-gpu-pci-gl.c
@@ -0,0 +1,58 @@
+/*
+ * Virtio video device
+ *
+ * Copyright Red Hat
+ *
+ * Authors:
+ *  Dave Airlie
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-gpu-pci.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_GPU_GL_PCI "virtio-gpu-gl-pci"
+typedef struct VirtIOGPUGLPCI VirtIOGPUGLPCI;
+DECLARE_INSTANCE_CHECKER(VirtIOGPUGLPCI, VIRTIO_GPU_GL_PCI,
+                         TYPE_VIRTIO_GPU_GL_PCI)
+
+struct VirtIOGPUGLPCI {
+    VirtIOGPUPCIBase parent_obj;
+    VirtIOGPUGL vdev;
+};
+
+static void virtio_gpu_gl_initfn(Object *obj)
+{
+    VirtIOGPUGLPCI *dev = VIRTIO_GPU_GL_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_GPU_GL);
+    VIRTIO_GPU_PCI_BASE(obj)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_gpu_gl_pci_info = {
+    .generic_name = TYPE_VIRTIO_GPU_GL_PCI,
+    .parent = TYPE_VIRTIO_GPU_PCI_BASE,
+    .instance_size = sizeof(VirtIOGPUGLPCI),
+    .instance_init = virtio_gpu_gl_initfn,
+};
+module_obj(TYPE_VIRTIO_GPU_GL_PCI);
+
+static void virtio_gpu_gl_pci_register_types(void)
+{
+    virtio_pci_types_register(&virtio_gpu_gl_pci_info);
+}
+
+type_init(virtio_gpu_gl_pci_register_types)
+
+module_dep("hw-display-virtio-gpu-pci");
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index d742a30aecf..e36eee0c409 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -64,6 +64,7 @@ static const TypeInfo virtio_gpu_pci_base_info = {
     .class_init = virtio_gpu_pci_base_class_init,
     .abstract = true
 };
+module_obj(TYPE_VIRTIO_GPU_PCI_BASE);
 
 #define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci"
 typedef struct VirtIOGPUPCI VirtIOGPUPCI;
@@ -90,6 +91,7 @@ static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = {
     .instance_size = sizeof(VirtIOGPUPCI),
     .instance_init = virtio_gpu_initfn,
 };
+module_obj(TYPE_VIRTIO_GPU_PCI);
 
 static void virtio_gpu_pci_register_types(void)
 {
diff --git a/hw/display/virtio-gpu-udmabuf-stubs.c b/hw/display/virtio-gpu-udmabuf-stubs.c
new file mode 100644
index 00000000000..f692e135103
--- /dev/null
+++ b/hw/display/virtio-gpu-udmabuf-stubs.c
@@ -0,0 +1,28 @@
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio-gpu.h"
+
+bool virtio_gpu_have_udmabuf(void)
+{
+    /* nothing (stub) */
+    return false;
+}
+
+void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    /* nothing (stub) */
+}
+
+void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    /* nothing (stub) */
+}
+
+int virtio_gpu_update_dmabuf(VirtIOGPU *g,
+                             uint32_t scanout_id,
+                             struct virtio_gpu_simple_resource *res,
+                             struct virtio_gpu_framebuffer *fb,
+                             struct virtio_gpu_rect *r)
+{
+    /* nothing (stub) */
+    return 0;
+}
diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
new file mode 100644
index 00000000000..1597921c51f
--- /dev/null
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -0,0 +1,230 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "ui/console.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "trace.h"
+#include "exec/ramblock.h"
+#include "sysemu/hostmem.h"
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/memfd.h>
+#include "qemu/memfd.h"
+#include "standard-headers/linux/udmabuf.h"
+
+static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    struct udmabuf_create_list *list;
+    RAMBlock *rb;
+    ram_addr_t offset;
+    int udmabuf, i;
+
+    udmabuf = udmabuf_fd();
+    if (udmabuf < 0) {
+        return;
+    }
+
+    list = g_malloc0(sizeof(struct udmabuf_create_list) +
+                     sizeof(struct udmabuf_create_item) * res->iov_cnt);
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        rcu_read_lock();
+        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
+        rcu_read_unlock();
+
+        if (!rb || rb->fd < 0) {
+            g_free(list);
+            return;
+        }
+
+        list->list[i].memfd  = rb->fd;
+        list->list[i].offset = offset;
+        list->list[i].size   = res->iov[i].iov_len;
+    }
+
+    list->count = res->iov_cnt;
+    list->flags = UDMABUF_FLAGS_CLOEXEC;
+
+    res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
+    if (res->dmabuf_fd < 0) {
+        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
+                    strerror(errno));
+    }
+    g_free(list);
+}
+
+static void virtio_gpu_remap_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    res->remapped = mmap(NULL, res->blob_size, PROT_READ,
+                         MAP_SHARED, res->dmabuf_fd, 0);
+    if (res->remapped == MAP_FAILED) {
+        warn_report("%s: dmabuf mmap failed: %s", __func__,
+                    strerror(errno));
+        res->remapped = NULL;
+    }
+}
+
+static void virtio_gpu_destroy_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    if (res->remapped) {
+        munmap(res->remapped, res->blob_size);
+        res->remapped = NULL;
+    }
+    if (res->dmabuf_fd >= 0) {
+        close(res->dmabuf_fd);
+        res->dmabuf_fd = -1;
+    }
+}
+
+static int find_memory_backend_type(Object *obj, void *opaque)
+{
+    bool *memfd_backend = opaque;
+    int ret;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+        RAMBlock *rb = backend->mr.ram_block;
+
+        if (rb && rb->fd > 0) {
+            ret = fcntl(rb->fd, F_GET_SEALS);
+            if (ret > 0) {
+                *memfd_backend = true;
+            }
+        }
+    }
+
+    return 0;
+}
+
+bool virtio_gpu_have_udmabuf(void)
+{
+    Object *memdev_root;
+    int udmabuf;
+    bool memfd_backend = false;
+
+    udmabuf = udmabuf_fd();
+    if (udmabuf < 0) {
+        return false;
+    }
+
+    memdev_root = object_resolve_path("/objects", NULL);
+    object_child_foreach(memdev_root, find_memory_backend_type, &memfd_backend);
+
+    return memfd_backend;
+}
+
+void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    void *pdata = NULL;
+
+    res->dmabuf_fd = -1;
+    if (res->iov_cnt == 1) {
+        pdata = res->iov[0].iov_base;
+    } else {
+        virtio_gpu_create_udmabuf(res);
+        if (res->dmabuf_fd < 0) {
+            return;
+        }
+        virtio_gpu_remap_udmabuf(res);
+        if (!res->remapped) {
+            return;
+        }
+        pdata = res->remapped;
+    }
+
+    res->blob = pdata;
+}
+
+void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res)
+{
+    if (res->remapped) {
+        virtio_gpu_destroy_udmabuf(res);
+    }
+}
+
+static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)
+{
+    struct virtio_gpu_scanout *scanout;
+
+    scanout = &g->parent_obj.scanout[dmabuf->scanout_id];
+    dpy_gl_release_dmabuf(scanout->con, &dmabuf->buf);
+    QTAILQ_REMOVE(&g->dmabuf.bufs, dmabuf, next);
+    g_free(dmabuf);
+}
+
+static VGPUDMABuf
+*virtio_gpu_create_dmabuf(VirtIOGPU *g,
+                          uint32_t scanout_id,
+                          struct virtio_gpu_simple_resource *res,
+                          struct virtio_gpu_framebuffer *fb,
+                          struct virtio_gpu_rect *r)
+{
+    VGPUDMABuf *dmabuf;
+
+    if (res->dmabuf_fd < 0) {
+        return NULL;
+    }
+
+    dmabuf = g_new0(VGPUDMABuf, 1);
+    dmabuf->buf.width = fb->width;
+    dmabuf->buf.height = fb->height;
+    dmabuf->buf.stride = fb->stride;
+    dmabuf->buf.x = r->x;
+    dmabuf->buf.y = r->y;
+    dmabuf->buf.scanout_width = r->width;
+    dmabuf->buf.scanout_height = r->height;
+    dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
+    dmabuf->buf.fd = res->dmabuf_fd;
+    dmabuf->buf.allow_fences = true;
+    dmabuf->buf.draw_submitted = false;
+    dmabuf->scanout_id = scanout_id;
+    QTAILQ_INSERT_HEAD(&g->dmabuf.bufs, dmabuf, next);
+
+    return dmabuf;
+}
+
+int virtio_gpu_update_dmabuf(VirtIOGPU *g,
+                             uint32_t scanout_id,
+                             struct virtio_gpu_simple_resource *res,
+                             struct virtio_gpu_framebuffer *fb,
+                             struct virtio_gpu_rect *r)
+{
+    struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
+    VGPUDMABuf *new_primary, *old_primary = NULL;
+
+    new_primary = virtio_gpu_create_dmabuf(g, scanout_id, res, fb, r);
+    if (!new_primary) {
+        return -EINVAL;
+    }
+
+    if (g->dmabuf.primary[scanout_id]) {
+        old_primary = g->dmabuf.primary[scanout_id];
+    }
+
+    g->dmabuf.primary[scanout_id] = new_primary;
+    qemu_console_resize(scanout->con,
+                        new_primary->buf.scanout_width,
+                        new_primary->buf.scanout_height);
+    dpy_gl_scanout_dmabuf(scanout->con, &new_primary->buf);
+
+    if (old_primary) {
+        virtio_gpu_free_dmabuf(g, old_primary);
+    }
+
+    return 0;
+}
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
new file mode 100644
index 00000000000..18d054922fe
--- /dev/null
+++ b/hw/display/virtio-gpu-virgl.c
@@ -0,0 +1,634 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "trace.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-gpu.h"
+
+#include <virglrenderer.h>
+
+static struct virgl_renderer_callbacks virtio_gpu_3d_cbs;
+
+static void virgl_cmd_create_resource_2d(VirtIOGPU *g,
+                                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_2d c2d;
+    struct virgl_renderer_resource_create_args args;
+
+    VIRTIO_GPU_FILL_CMD(c2d);
+    trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format,
+                                       c2d.width, c2d.height);
+
+    args.handle = c2d.resource_id;
+    args.target = 2;
+    args.format = c2d.format;
+    args.bind = (1 << 1);
+    args.width = c2d.width;
+    args.height = c2d.height;
+    args.depth = 1;
+    args.array_size = 1;
+    args.last_level = 0;
+    args.nr_samples = 0;
+    args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void virgl_cmd_create_resource_3d(VirtIOGPU *g,
+                                         struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_create_3d c3d;
+    struct virgl_renderer_resource_create_args args;
+
+    VIRTIO_GPU_FILL_CMD(c3d);
+    trace_virtio_gpu_cmd_res_create_3d(c3d.resource_id, c3d.format,
+                                       c3d.width, c3d.height, c3d.depth);
+
+    args.handle = c3d.resource_id;
+    args.target = c3d.target;
+    args.format = c3d.format;
+    args.bind = c3d.bind;
+    args.width = c3d.width;
+    args.height = c3d.height;
+    args.depth = c3d.depth;
+    args.array_size = c3d.array_size;
+    args.last_level = c3d.last_level;
+    args.nr_samples = c3d.nr_samples;
+    args.flags = c3d.flags;
+    virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void virgl_cmd_resource_unref(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_unref unref;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
+
+    VIRTIO_GPU_FILL_CMD(unref);
+    trace_virtio_gpu_cmd_res_unref(unref.resource_id);
+
+    virgl_renderer_resource_detach_iov(unref.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs != NULL && num_iovs != 0) {
+        virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
+    }
+    virgl_renderer_resource_unref(unref.resource_id);
+}
+
+static void virgl_cmd_context_create(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_create cc;
+
+    VIRTIO_GPU_FILL_CMD(cc);
+    trace_virtio_gpu_cmd_ctx_create(cc.hdr.ctx_id,
+                                    cc.debug_name);
+
+    virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
+                                  cc.debug_name);
+}
+
+static void virgl_cmd_context_destroy(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_destroy cd;
+
+    VIRTIO_GPU_FILL_CMD(cd);
+    trace_virtio_gpu_cmd_ctx_destroy(cd.hdr.ctx_id);
+
+    virgl_renderer_context_destroy(cd.hdr.ctx_id);
+}
+
+static void virtio_gpu_rect_update(VirtIOGPU *g, int idx, int x, int y,
+                                int width, int height)
+{
+    if (!g->parent_obj.scanout[idx].con) {
+        return;
+    }
+
+    dpy_gl_update(g->parent_obj.scanout[idx].con, x, y, width, height);
+}
+
+static void virgl_cmd_resource_flush(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_flush rf;
+    int i;
+
+    VIRTIO_GPU_FILL_CMD(rf);
+    trace_virtio_gpu_cmd_res_flush(rf.resource_id,
+                                   rf.r.width, rf.r.height, rf.r.x, rf.r.y);
+
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        if (g->parent_obj.scanout[i].resource_id != rf.resource_id) {
+            continue;
+        }
+        virtio_gpu_rect_update(g, i, rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+    }
+}
+
+static void virgl_cmd_set_scanout(VirtIOGPU *g,
+                                  struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_set_scanout ss;
+    struct virgl_renderer_resource_info info;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(ss);
+    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
+                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);
+
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
+                      __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+    g->parent_obj.enable = 1;
+
+    memset(&info, 0, sizeof(info));
+
+    if (ss.resource_id && ss.r.width && ss.r.height) {
+        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+        if (ret == -1) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: illegal resource specified %d\n",
+                          __func__, ss.resource_id);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+            return;
+        }
+        qemu_console_resize(g->parent_obj.scanout[ss.scanout_id].con,
+                            ss.r.width, ss.r.height);
+        virgl_renderer_force_ctx_0();
+        dpy_gl_scanout_texture(
+            g->parent_obj.scanout[ss.scanout_id].con, info.tex_id,
+            info.flags & 1 /* FIXME: Y_0_TOP */,
+            info.width, info.height,
+            ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+    } else {
+        dpy_gfx_replace_surface(
+            g->parent_obj.scanout[ss.scanout_id].con, NULL);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[ss.scanout_id].con);
+    }
+    g->parent_obj.scanout[ss.scanout_id].resource_id = ss.resource_id;
+}
+
+static void virgl_cmd_submit_3d(VirtIOGPU *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_cmd_submit cs;
+    void *buf;
+    size_t s;
+
+    VIRTIO_GPU_FILL_CMD(cs);
+    trace_virtio_gpu_cmd_ctx_submit(cs.hdr.ctx_id, cs.size);
+
+    buf = g_malloc(cs.size);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(cs), buf, cs.size);
+    if (s != cs.size) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: size mismatch (%zd/%d)",
+                      __func__, s, cs.size);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        goto out;
+    }
+
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+        g->stats.req_3d++;
+        g->stats.bytes_3d += cs.size;
+    }
+
+    virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
+
+out:
+    g_free(buf);
+}
+
+static void virgl_cmd_transfer_to_host_2d(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_to_host_2d t2d;
+    struct virtio_gpu_box box;
+
+    VIRTIO_GPU_FILL_CMD(t2d);
+    trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id);
+
+    box.x = t2d.r.x;
+    box.y = t2d.r.y;
+    box.z = 0;
+    box.w = t2d.r.width;
+    box.h = t2d.r.height;
+    box.d = 1;
+
+    virgl_renderer_transfer_write_iov(t2d.resource_id,
+                                      0,
+                                      0,
+                                      0,
+                                      0,
+                                      (struct virgl_box *)&box,
+                                      t2d.offset, NULL, 0);
+}
+
+static void virgl_cmd_transfer_to_host_3d(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d t3d;
+
+    VIRTIO_GPU_FILL_CMD(t3d);
+    trace_virtio_gpu_cmd_res_xfer_toh_3d(t3d.resource_id);
+
+    virgl_renderer_transfer_write_iov(t3d.resource_id,
+                                      t3d.hdr.ctx_id,
+                                      t3d.level,
+                                      t3d.stride,
+                                      t3d.layer_stride,
+                                      (struct virgl_box *)&t3d.box,
+                                      t3d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_from_host_3d(VirtIOGPU *g,
+                                struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_transfer_host_3d tf3d;
+
+    VIRTIO_GPU_FILL_CMD(tf3d);
+    trace_virtio_gpu_cmd_res_xfer_fromh_3d(tf3d.resource_id);
+
+    virgl_renderer_transfer_read_iov(tf3d.resource_id,
+                                     tf3d.hdr.ctx_id,
+                                     tf3d.level,
+                                     tf3d.stride,
+                                     tf3d.layer_stride,
+                                     (struct virgl_box *)&tf3d.box,
+                                     tf3d.offset, NULL, 0);
+}
+
+
+static void virgl_resource_attach_backing(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_attach_backing att_rb;
+    struct iovec *res_iovs;
+    uint32_t res_niov;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(att_rb);
+    trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id);
+
+    ret = virtio_gpu_create_mapping_iov(g, att_rb.nr_entries, sizeof(att_rb),
+                                        cmd, NULL, &res_iovs, &res_niov);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
+                                             res_iovs, res_niov);
+
+    if (ret != 0)
+        virtio_gpu_cleanup_mapping_iov(g, res_iovs, res_niov);
+}
+
+static void virgl_resource_detach_backing(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resource_detach_backing detach_rb;
+    struct iovec *res_iovs = NULL;
+    int num_iovs = 0;
+
+    VIRTIO_GPU_FILL_CMD(detach_rb);
+    trace_virtio_gpu_cmd_res_back_detach(detach_rb.resource_id);
+
+    virgl_renderer_resource_detach_iov(detach_rb.resource_id,
+                                       &res_iovs,
+                                       &num_iovs);
+    if (res_iovs == NULL || num_iovs == 0) {
+        return;
+    }
+    virtio_gpu_cleanup_mapping_iov(g, res_iovs, num_iovs);
+}
+
+
+static void virgl_cmd_ctx_attach_resource(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource att_res;
+
+    VIRTIO_GPU_FILL_CMD(att_res);
+    trace_virtio_gpu_cmd_ctx_res_attach(att_res.hdr.ctx_id,
+                                        att_res.resource_id);
+
+    virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
+}
+
+static void virgl_cmd_ctx_detach_resource(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_ctx_resource det_res;
+
+    VIRTIO_GPU_FILL_CMD(det_res);
+    trace_virtio_gpu_cmd_ctx_res_detach(det_res.hdr.ctx_id,
+                                        det_res.resource_id);
+
+    virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
+}
+
+static void virgl_cmd_get_capset_info(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset_info info;
+    struct virtio_gpu_resp_capset_info resp;
+
+    VIRTIO_GPU_FILL_CMD(info);
+
+    memset(&resp, 0, sizeof(resp));
+    if (info.capset_index == 0) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else if (info.capset_index == 1) {
+        resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2;
+        virgl_renderer_get_cap_set(resp.capset_id,
+                                   &resp.capset_max_version,
+                                   &resp.capset_max_size);
+    } else {
+        resp.capset_max_version = 0;
+        resp.capset_max_size = 0;
+    }
+    resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
+    virtio_gpu_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
+}
+
+static void virgl_cmd_get_capset(VirtIOGPU *g,
+                                 struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_get_capset gc;
+    struct virtio_gpu_resp_capset *resp;
+    uint32_t max_ver, max_size;
+    VIRTIO_GPU_FILL_CMD(gc);
+
+    virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
+                               &max_size);
+    if (!max_size) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    resp = g_malloc0(sizeof(*resp) + max_size);
+    resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
+    virgl_renderer_fill_caps(gc.capset_id,
+                             gc.capset_version,
+                             (void *)resp->capset_data);
+    virtio_gpu_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
+    g_free(resp);
+}
+
+void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
+
+    virgl_renderer_force_ctx_0();
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_CTX_CREATE:
+        virgl_cmd_context_create(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DESTROY:
+        virgl_cmd_context_destroy(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        virgl_cmd_create_resource_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
+        virgl_cmd_create_resource_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SUBMIT_3D:
+        virgl_cmd_submit_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        virgl_cmd_transfer_to_host_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
+        virgl_cmd_transfer_to_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
+        virgl_cmd_transfer_from_host_3d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        virgl_resource_attach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        virgl_resource_detach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        virgl_cmd_set_scanout(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        virgl_cmd_resource_flush(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        virgl_cmd_resource_unref(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_attach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
+        /* TODO add security */
+        virgl_cmd_ctx_detach_resource(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
+        virgl_cmd_get_capset_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_CAPSET:
+        virgl_cmd_get_capset(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        virtio_gpu_get_display_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_GET_EDID:
+        virtio_gpu_get_edid(g, cmd);
+        break;
+    default:
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+
+    if (cmd->finished) {
+        return;
+    }
+    if (cmd->error) {
+        fprintf(stderr, "%s: ctrl 0x%x, error 0x%x\n", __func__,
+                cmd->cmd_hdr.type, cmd->error);
+        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error);
+        return;
+    }
+    if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
+        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        return;
+    }
+
+    trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+    virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+}
+
+static void virgl_write_fence(void *opaque, uint32_t fence)
+{
+    VirtIOGPU *g = opaque;
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+        /*
+         * the guest can end up emitting fences out of order
+         * so we should check all fenced cmds not just the first one.
+         */
+        if (cmd->cmd_hdr.fence_id > fence) {
+            continue;
+        }
+        trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        QTAILQ_REMOVE(&g->fenceq, cmd, next);
+        g_free(cmd);
+        g->inflight--;
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+            fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
+        }
+    }
+}
+
+static virgl_renderer_gl_context
+virgl_create_context(void *opaque, int scanout_idx,
+                     struct virgl_renderer_gl_ctx_param *params)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext ctx;
+    QEMUGLParams qparams;
+
+    qparams.major_ver = params->major_ver;
+    qparams.minor_ver = params->minor_ver;
+
+    ctx = dpy_gl_ctx_create(g->parent_obj.scanout[scanout_idx].con, &qparams);
+    return (virgl_renderer_gl_context)ctx;
+}
+
+static void virgl_destroy_context(void *opaque, virgl_renderer_gl_context ctx)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext qctx = (QEMUGLContext)ctx;
+
+    dpy_gl_ctx_destroy(g->parent_obj.scanout[0].con, qctx);
+}
+
+static int virgl_make_context_current(void *opaque, int scanout_idx,
+                                      virgl_renderer_gl_context ctx)
+{
+    VirtIOGPU *g = opaque;
+    QEMUGLContext qctx = (QEMUGLContext)ctx;
+
+    return dpy_gl_ctx_make_current(g->parent_obj.scanout[scanout_idx].con,
+                                   qctx);
+}
+
+static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
+    .version             = 1,
+    .write_fence         = virgl_write_fence,
+    .create_gl_context   = virgl_create_context,
+    .destroy_gl_context  = virgl_destroy_context,
+    .make_current        = virgl_make_context_current,
+};
+
+static void virtio_gpu_print_stats(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+
+    if (g->stats.requests) {
+        fprintf(stderr, "stats: vq req %4d, %3d -- 3D %4d (%5d)\n",
+                g->stats.requests,
+                g->stats.max_inflight,
+                g->stats.req_3d,
+                g->stats.bytes_3d);
+        g->stats.requests     = 0;
+        g->stats.max_inflight = 0;
+        g->stats.req_3d       = 0;
+        g->stats.bytes_3d     = 0;
+    } else {
+        fprintf(stderr, "stats: idle\r");
+    }
+    timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
+}
+
+static void virtio_gpu_fence_poll(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+
+    virgl_renderer_poll();
+    virtio_gpu_process_cmdq(g);
+    if (!QTAILQ_EMPTY(&g->cmdq) || !QTAILQ_EMPTY(&g->fenceq)) {
+        timer_mod(g->fence_poll, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10);
+    }
+}
+
+void virtio_gpu_virgl_fence_poll(VirtIOGPU *g)
+{
+    virtio_gpu_fence_poll(g);
+}
+
+void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
+{
+    int i;
+
+    for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+        dpy_gfx_replace_surface(g->parent_obj.scanout[i].con, NULL);
+        dpy_gl_scanout_disable(g->parent_obj.scanout[i].con);
+    }
+}
+
+void virtio_gpu_virgl_reset(VirtIOGPU *g)
+{
+    virgl_renderer_reset();
+}
+
+int virtio_gpu_virgl_init(VirtIOGPU *g)
+{
+    int ret;
+
+    ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs);
+    if (ret != 0) {
+        return ret;
+    }
+
+    g->fence_poll = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                 virtio_gpu_fence_poll, g);
+
+    if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+        g->print_stats = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                      virtio_gpu_print_stats, g);
+        timer_mod(g->print_stats, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000);
+    }
+    return 0;
+}
+
+int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
+{
+    uint32_t capset2_max_ver, capset2_max_size;
+    virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
+                              &capset2_max_ver,
+                              &capset2_max_size);
+
+    return capset2_max_ver ? 2 : 1;
+}
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index c9f5e36fd07..d78b9700c7d 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -35,80 +35,51 @@
 
 static struct virtio_gpu_simple_resource*
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id,
+                               bool require_backing,
+                               const char *caller, uint32_t *error);
 
 static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
                                        struct virtio_gpu_simple_resource *res);
 
-#ifdef CONFIG_VIRGL
-#include <virglrenderer.h>
-#define VIRGL(_g, _virgl, _simple, ...)                     \
-    do {                                                    \
-        if (_g->parent_obj.use_virgl_renderer) {            \
-            _virgl(__VA_ARGS__);                            \
-        } else {                                            \
-            _simple(__VA_ARGS__);                           \
-        }                                                   \
-    } while (0)
-#else
-#define VIRGL(_g, _virgl, _simple, ...)                 \
-    do {                                                \
-        _simple(__VA_ARGS__);                           \
-    } while (0)
-#endif
-
-static void update_cursor_data_simple(VirtIOGPU *g,
-                                      struct virtio_gpu_scanout *s,
-                                      uint32_t resource_id)
+void virtio_gpu_update_cursor_data(VirtIOGPU *g,
+                                   struct virtio_gpu_scanout *s,
+                                   uint32_t resource_id)
 {
     struct virtio_gpu_simple_resource *res;
     uint32_t pixels;
+    void *data;
 
-    res = virtio_gpu_find_resource(g, resource_id);
+    res = virtio_gpu_find_check_resource(g, resource_id, false,
+                                         __func__, NULL);
     if (!res) {
         return;
     }
 
-    if (pixman_image_get_width(res->image)  != s->current_cursor->width ||
-        pixman_image_get_height(res->image) != s->current_cursor->height) {
-        return;
+    if (res->blob_size) {
+        if (res->blob_size < (s->current_cursor->width *
+                              s->current_cursor->height * 4)) {
+            return;
+        }
+        data = res->blob;
+    } else {
+        if (pixman_image_get_width(res->image)  != s->current_cursor->width ||
+            pixman_image_get_height(res->image) != s->current_cursor->height) {
+            return;
+        }
+        data = pixman_image_get_data(res->image);
     }
 
     pixels = s->current_cursor->width * s->current_cursor->height;
-    memcpy(s->current_cursor->data,
-           pixman_image_get_data(res->image),
+    memcpy(s->current_cursor->data, data,
            pixels * sizeof(uint32_t));
 }
 
-#ifdef CONFIG_VIRGL
-
-static void update_cursor_data_virgl(VirtIOGPU *g,
-                                     struct virtio_gpu_scanout *s,
-                                     uint32_t resource_id)
-{
-    uint32_t width, height;
-    uint32_t pixels, *data;
-
-    data = virgl_renderer_get_cursor_data(resource_id, &width, &height);
-    if (!data) {
-        return;
-    }
-
-    if (width != s->current_cursor->width ||
-        height != s->current_cursor->height) {
-        free(data);
-        return;
-    }
-
-    pixels = s->current_cursor->width * s->current_cursor->height;
-    memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t));
-    free(data);
-}
-
-#endif
-
 static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
 {
     struct virtio_gpu_scanout *s;
+    VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
     bool move = cursor->hdr.type == VIRTIO_GPU_CMD_MOVE_CURSOR;
 
     if (cursor->pos.scanout_id >= g->parent_obj.conf.max_outputs) {
@@ -131,8 +102,7 @@ static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
         s->current_cursor->hot_y = cursor->hot_y;
 
         if (cursor->resource_id > 0) {
-            VIRGL(g, update_cursor_data_virgl, update_cursor_data_simple,
-                  g, s, cursor->resource_id);
+            vgc->update_cursor_data(g, s, cursor->resource_id);
         }
         dpy_cursor_define(s->con, s->current_cursor);
 
@@ -158,6 +128,37 @@ virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
     return NULL;
 }
 
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id,
+                               bool require_backing,
+                               const char *caller, uint32_t *error)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    res = virtio_gpu_find_resource(g, resource_id);
+    if (!res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid resource specified %d\n",
+                      caller, resource_id);
+        if (error) {
+            *error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        }
+        return NULL;
+    }
+
+    if (require_backing) {
+        if (!res->iov || (!res->image && !res->blob)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: no backing storage %d\n",
+                          caller, resource_id);
+            if (error) {
+                *error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+            }
+            return NULL;
+        }
+    }
+
+    return res;
+}
+
 void virtio_gpu_ctrl_response(VirtIOGPU *g,
                               struct virtio_gpu_ctrl_command *cmd,
                               struct virtio_gpu_ctrl_hdr *resp,
@@ -321,6 +322,56 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
     g->hostmem += res->hostmem;
 }
 
+static void virtio_gpu_resource_create_blob(VirtIOGPU *g,
+                                            struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_create_blob cblob;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(cblob);
+    virtio_gpu_create_blob_bswap(&cblob);
+    trace_virtio_gpu_cmd_res_create_blob(cblob.resource_id, cblob.size);
+
+    if (cblob.resource_id == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: resource id 0 is not allowed\n",
+                      __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (cblob.blob_mem != VIRTIO_GPU_BLOB_MEM_GUEST &&
+        cblob.blob_flags != VIRTIO_GPU_BLOB_FLAG_USE_SHAREABLE) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid memory type\n",
+                      __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    if (virtio_gpu_find_resource(g, cblob.resource_id)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: resource already exists %d\n",
+                      __func__, cblob.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = g_new0(struct virtio_gpu_simple_resource, 1);
+    res->resource_id = cblob.resource_id;
+    res->blob_size = cblob.size;
+
+    ret = virtio_gpu_create_mapping_iov(g, cblob.nr_entries, sizeof(cblob),
+                                        cmd, &res->addrs, &res->iov,
+                                        &res->iov_cnt);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        g_free(res);
+        return;
+    }
+
+    virtio_gpu_init_udmabuf(res);
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+}
+
 static void virtio_gpu_disable_scanout(VirtIOGPU *g, int scanout_id)
 {
     struct virtio_gpu_scanout *scanout = &g->parent_obj.scanout[scanout_id];
@@ -355,7 +406,7 @@ static void virtio_gpu_resource_destroy(VirtIOGPU *g,
         }
     }
 
-    pixman_image_unref(res->image);
+    qemu_pixman_image_unref(res->image);
     virtio_gpu_cleanup_mapping(g, res);
     QTAILQ_REMOVE(&g->reslist, res, next);
     g->hostmem -= res->hostmem;
@@ -396,11 +447,9 @@ static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g,
     virtio_gpu_t2d_bswap(&t2d);
     trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id);
 
-    res = virtio_gpu_find_resource(g, t2d.resource_id);
-    if (!res || !res->iov) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
-                      __func__, t2d.resource_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+    res = virtio_gpu_find_check_resource(g, t2d.resource_id, true,
+                                         __func__, &cmd->error);
+    if (!res || res->blob) {
         return;
     }
 
@@ -446,6 +495,7 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
 {
     struct virtio_gpu_simple_resource *res;
     struct virtio_gpu_resource_flush rf;
+    struct virtio_gpu_scanout *scanout;
     pixman_region16_t flush_region;
     int i;
 
@@ -454,20 +504,31 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
     trace_virtio_gpu_cmd_res_flush(rf.resource_id,
                                    rf.r.width, rf.r.height, rf.r.x, rf.r.y);
 
-    res = virtio_gpu_find_resource(g, rf.resource_id);
+    res = virtio_gpu_find_check_resource(g, rf.resource_id, false,
+                                         __func__, &cmd->error);
     if (!res) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
-                      __func__, rf.resource_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
         return;
     }
 
-    if (rf.r.x > res->width ||
+    if (res->blob) {
+        for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
+            scanout = &g->parent_obj.scanout[i];
+            if (scanout->resource_id == res->resource_id &&
+                console_has_gl(scanout->con)) {
+                dpy_gl_update(scanout->con, 0, 0, scanout->width,
+                              scanout->height);
+            }
+        }
+        return;
+    }
+
+    if (!res->blob &&
+        (rf.r.x > res->width ||
         rf.r.y > res->height ||
         rf.r.width > res->width ||
         rf.r.height > res->height ||
         rf.r.x + rf.r.width > res->width ||
-        rf.r.y + rf.r.height > res->height) {
+        rf.r.y + rf.r.height > res->height)) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: flush bounds outside resource"
                       " bounds for resource %d: %d %d %d %d vs %d %d\n",
                       __func__, rf.resource_id, rf.r.x, rf.r.y,
@@ -479,7 +540,6 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
     pixman_region_init_rect(&flush_region,
                             rf.r.x, rf.r.y, rf.r.width, rf.r.height);
     for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
-        struct virtio_gpu_scanout *scanout;
         pixman_region16_t region, finalregion;
         pixman_box16_t *extents;
 
@@ -512,14 +572,109 @@ static void virtio_unref_resource(pixman_image_t *image, void *data)
     pixman_image_unref(data);
 }
 
+static void virtio_gpu_update_scanout(VirtIOGPU *g,
+                                      uint32_t scanout_id,
+                                      struct virtio_gpu_simple_resource *res,
+                                      struct virtio_gpu_rect *r)
+{
+    struct virtio_gpu_simple_resource *ores;
+    struct virtio_gpu_scanout *scanout;
+
+    scanout = &g->parent_obj.scanout[scanout_id];
+    ores = virtio_gpu_find_resource(g, scanout->resource_id);
+    if (ores) {
+        ores->scanout_bitmask &= ~(1 << scanout_id);
+    }
+
+    res->scanout_bitmask |= (1 << scanout_id);
+    scanout->resource_id = res->resource_id;
+    scanout->x = r->x;
+    scanout->y = r->y;
+    scanout->width = r->width;
+    scanout->height = r->height;
+}
+
+static void virtio_gpu_do_set_scanout(VirtIOGPU *g,
+                                      uint32_t scanout_id,
+                                      struct virtio_gpu_framebuffer *fb,
+                                      struct virtio_gpu_simple_resource *res,
+                                      struct virtio_gpu_rect *r,
+                                      uint32_t *error)
+{
+    struct virtio_gpu_scanout *scanout;
+    uint8_t *data;
+
+    scanout = &g->parent_obj.scanout[scanout_id];
+
+    if (r->x > fb->width ||
+        r->y > fb->height ||
+        r->width < 16 ||
+        r->height < 16 ||
+        r->width > fb->width ||
+        r->height > fb->height ||
+        r->x + r->width > fb->width ||
+        r->y + r->height > fb->height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout %d bounds for"
+                      " resource %d, rect (%d,%d)+%d,%d, fb %d %d\n",
+                      __func__, scanout_id, res->resource_id,
+                      r->x, r->y, r->width, r->height,
+                      fb->width, fb->height);
+        *error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    g->parent_obj.enable = 1;
+
+    if (res->blob) {
+        if (console_has_gl(scanout->con)) {
+            if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) {
+                virtio_gpu_update_scanout(g, scanout_id, res, r);
+                return;
+            }
+        }
+
+        data = res->blob;
+    } else {
+        data = (uint8_t *)pixman_image_get_data(res->image);
+    }
+
+    /* create a surface for this scanout */
+    if ((res->blob && !console_has_gl(scanout->con)) ||
+        !scanout->ds ||
+        surface_data(scanout->ds) != data + fb->offset ||
+        scanout->width != r->width ||
+        scanout->height != r->height) {
+        pixman_image_t *rect;
+        void *ptr = data + fb->offset;
+        rect = pixman_image_create_bits(fb->format, r->width, r->height,
+                                        ptr, fb->stride);
+
+        if (res->image) {
+            pixman_image_ref(res->image);
+            pixman_image_set_destroy_function(rect, virtio_unref_resource,
+                                              res->image);
+        }
+
+        /* realloc the surface ptr */
+        scanout->ds = qemu_create_displaysurface_pixman(rect);
+        if (!scanout->ds) {
+            *error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+            return;
+        }
+
+        pixman_image_unref(rect);
+        dpy_gfx_replace_surface(g->parent_obj.scanout[scanout_id].con,
+                                scanout->ds);
+    }
+
+    virtio_gpu_update_scanout(g, scanout_id, res, r);
+}
+
 static void virtio_gpu_set_scanout(VirtIOGPU *g,
                                    struct virtio_gpu_ctrl_command *cmd)
 {
-    struct virtio_gpu_simple_resource *res, *ores;
-    struct virtio_gpu_scanout *scanout;
-    pixman_format_code_t format;
-    uint32_t offset;
-    int bpp;
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_framebuffer fb = { 0 };
     struct virtio_gpu_set_scanout ss;
 
     VIRTIO_GPU_FILL_CMD(ss);
@@ -534,97 +689,111 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g,
         return;
     }
 
-    g->parent_obj.enable = 1;
     if (ss.resource_id == 0) {
         virtio_gpu_disable_scanout(g, ss.scanout_id);
         return;
     }
 
-    /* create a surface for this scanout */
-    res = virtio_gpu_find_resource(g, ss.resource_id);
+    res = virtio_gpu_find_check_resource(g, ss.resource_id, true,
+                                         __func__, &cmd->error);
     if (!res) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
-                      __func__, ss.resource_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
         return;
     }
 
-    if (ss.r.x > res->width ||
-        ss.r.y > res->height ||
-        ss.r.width < 16 ||
-        ss.r.height < 16 ||
-        ss.r.width > res->width ||
-        ss.r.height > res->height ||
-        ss.r.x + ss.r.width > res->width ||
-        ss.r.y + ss.r.height > res->height) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout %d bounds for"
-                      " resource %d, (%d,%d)+%d,%d vs %d %d\n",
-                      __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y,
-                      ss.r.width, ss.r.height, res->width, res->height);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+    fb.format = pixman_image_get_format(res->image);
+    fb.bytes_pp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(fb.format), 8);
+    fb.width  = pixman_image_get_width(res->image);
+    fb.height = pixman_image_get_height(res->image);
+    fb.stride = pixman_image_get_stride(res->image);
+    fb.offset = ss.r.x * fb.bytes_pp + ss.r.y * fb.stride;
+
+    virtio_gpu_do_set_scanout(g, ss.scanout_id,
+                              &fb, res, &ss.r, &cmd->error);
+}
+
+static void virtio_gpu_set_scanout_blob(VirtIOGPU *g,
+                                        struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_framebuffer fb = { 0 };
+    struct virtio_gpu_set_scanout_blob ss;
+    uint64_t fbend;
+
+    VIRTIO_GPU_FILL_CMD(ss);
+    virtio_gpu_scanout_blob_bswap(&ss);
+    trace_virtio_gpu_cmd_set_scanout_blob(ss.scanout_id, ss.resource_id,
+                                          ss.r.width, ss.r.height, ss.r.x,
+                                          ss.r.y);
+
+    if (ss.scanout_id >= g->parent_obj.conf.max_outputs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
+                      __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
         return;
     }
 
-    scanout = &g->parent_obj.scanout[ss.scanout_id];
+    if (ss.resource_id == 0) {
+        virtio_gpu_disable_scanout(g, ss.scanout_id);
+        return;
+    }
 
-    format = pixman_image_get_format(res->image);
-    bpp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8);
-    offset = (ss.r.x * bpp) + ss.r.y * pixman_image_get_stride(res->image);
-    if (!scanout->ds || surface_data(scanout->ds)
-        != ((uint8_t *)pixman_image_get_data(res->image) + offset) ||
-        scanout->width != ss.r.width ||
-        scanout->height != ss.r.height) {
-        pixman_image_t *rect;
-        void *ptr = (uint8_t *)pixman_image_get_data(res->image) + offset;
-        rect = pixman_image_create_bits(format, ss.r.width, ss.r.height, ptr,
-                                        pixman_image_get_stride(res->image));
-        pixman_image_ref(res->image);
-        pixman_image_set_destroy_function(rect, virtio_unref_resource,
-                                          res->image);
-        /* realloc the surface ptr */
-        scanout->ds = qemu_create_displaysurface_pixman(rect);
-        if (!scanout->ds) {
-            cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
-            return;
-        }
-        pixman_image_unref(rect);
-        dpy_gfx_replace_surface(g->parent_obj.scanout[ss.scanout_id].con,
-                                scanout->ds);
+    res = virtio_gpu_find_check_resource(g, ss.resource_id, true,
+                                         __func__, &cmd->error);
+    if (!res) {
+        return;
     }
 
-    ores = virtio_gpu_find_resource(g, scanout->resource_id);
-    if (ores) {
-        ores->scanout_bitmask &= ~(1 << ss.scanout_id);
+    fb.format = virtio_gpu_get_pixman_format(ss.format);
+    if (!fb.format) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: host couldn't handle guest format %d\n",
+                      __func__, ss.format);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
     }
 
-    res->scanout_bitmask |= (1 << ss.scanout_id);
-    scanout->resource_id = ss.resource_id;
-    scanout->x = ss.r.x;
-    scanout->y = ss.r.y;
-    scanout->width = ss.r.width;
-    scanout->height = ss.r.height;
+    fb.bytes_pp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(fb.format), 8);
+    fb.width = ss.width;
+    fb.height = ss.height;
+    fb.stride = ss.strides[0];
+    fb.offset = ss.offsets[0] + ss.r.x * fb.bytes_pp + ss.r.y * fb.stride;
+
+    fbend = fb.offset;
+    fbend += fb.stride * (ss.r.height - 1);
+    fbend += fb.bytes_pp * ss.r.width;
+    if (fbend > res->blob_size) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: fb end out of range\n",
+                      __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    virtio_gpu_do_set_scanout(g, ss.scanout_id,
+                              &fb, res, &ss.r, &cmd->error);
 }
 
 int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
-                                  struct virtio_gpu_resource_attach_backing *ab,
+                                  uint32_t nr_entries, uint32_t offset,
                                   struct virtio_gpu_ctrl_command *cmd,
-                                  uint64_t **addr, struct iovec **iov)
+                                  uint64_t **addr, struct iovec **iov,
+                                  uint32_t *niov)
 {
     struct virtio_gpu_mem_entry *ents;
     size_t esize, s;
-    int i;
+    int e, v;
 
-    if (ab->nr_entries > 16384) {
+    if (nr_entries > 16384) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: nr_entries is too big (%d > 16384)\n",
-                      __func__, ab->nr_entries);
+                      __func__, nr_entries);
         return -1;
     }
 
-    esize = sizeof(*ents) * ab->nr_entries;
+    esize = sizeof(*ents) * nr_entries;
     ents = g_malloc(esize);
     s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
-                   sizeof(*ab), ents, esize);
+                   offset, ents, esize);
     if (s != esize) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: command data size incorrect %zu vs %zu\n",
@@ -633,37 +802,52 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
         return -1;
     }
 
-    *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
+    *iov = NULL;
     if (addr) {
-        *addr = g_malloc0(sizeof(uint64_t) * ab->nr_entries);
-    }
-    for (i = 0; i < ab->nr_entries; i++) {
-        uint64_t a = le64_to_cpu(ents[i].addr);
-        uint32_t l = le32_to_cpu(ents[i].length);
-        hwaddr len = l;
-        (*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as,
-                                            a, &len, DMA_DIRECTION_TO_DEVICE);
-        (*iov)[i].iov_len = len;
-        if (addr) {
-            (*addr)[i] = a;
-        }
-        if (!(*iov)[i].iov_base || len != l) {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for"
-                          " resource %d element %d\n",
-                          __func__, ab->resource_id, i);
-            if ((*iov)[i].iov_base) {
-                i++; /* cleanup the 'i'th map */
+        *addr = NULL;
+    }
+    for (e = 0, v = 0; e < nr_entries; e++) {
+        uint64_t a = le64_to_cpu(ents[e].addr);
+        uint32_t l = le32_to_cpu(ents[e].length);
+        hwaddr len;
+        void *map;
+
+        do {
+            len = l;
+            map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as,
+                                 a, &len, DMA_DIRECTION_TO_DEVICE);
+            if (!map) {
+                qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for"
+                              " element %d\n", __func__, e);
+                virtio_gpu_cleanup_mapping_iov(g, *iov, v);
+                g_free(ents);
+                *iov = NULL;
+                if (addr) {
+                    g_free(*addr);
+                    *addr = NULL;
+                }
+                return -1;
+            }
+
+            if (!(v % 16)) {
+                *iov = g_realloc(*iov, sizeof(struct iovec) * (v + 16));
+                if (addr) {
+                    *addr = g_realloc(*addr, sizeof(uint64_t) * (v + 16));
+                }
             }
-            virtio_gpu_cleanup_mapping_iov(g, *iov, i);
-            g_free(ents);
-            *iov = NULL;
+            (*iov)[v].iov_base = map;
+            (*iov)[v].iov_len = len;
             if (addr) {
-                g_free(*addr);
-                *addr = NULL;
+                (*addr)[v] = a;
             }
-            return -1;
-        }
+
+            a += len;
+            l -= len;
+            v += 1;
+        } while (l > 0);
     }
+    *niov = v;
+
     g_free(ents);
     return 0;
 }
@@ -690,6 +874,10 @@ static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
     res->iov_cnt = 0;
     g_free(res->addrs);
     res->addrs = NULL;
+
+    if (res->blob) {
+        virtio_gpu_fini_udmabuf(res);
+    }
 }
 
 static void
@@ -717,13 +905,12 @@ virtio_gpu_resource_attach_backing(VirtIOGPU *g,
         return;
     }
 
-    ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, &res->iov);
+    ret = virtio_gpu_create_mapping_iov(g, ab.nr_entries, sizeof(ab), cmd,
+                                        &res->addrs, &res->iov, &res->iov_cnt);
     if (ret != 0) {
         cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
         return;
     }
-
-    res->iov_cnt = ab.nr_entries;
 }
 
 static void
@@ -737,18 +924,16 @@ virtio_gpu_resource_detach_backing(VirtIOGPU *g,
     virtio_gpu_bswap_32(&detach, sizeof(detach));
     trace_virtio_gpu_cmd_res_back_detach(detach.resource_id);
 
-    res = virtio_gpu_find_resource(g, detach.resource_id);
-    if (!res || !res->iov) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
-                      __func__, detach.resource_id);
-        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+    res = virtio_gpu_find_check_resource(g, detach.resource_id, true,
+                                         __func__, &cmd->error);
+    if (!res) {
         return;
     }
     virtio_gpu_cleanup_mapping(g, res);
 }
 
-static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
-                                          struct virtio_gpu_ctrl_command *cmd)
+void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
+                                   struct virtio_gpu_ctrl_command *cmd)
 {
     VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
     virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr);
@@ -763,6 +948,13 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
     case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
         virtio_gpu_resource_create_2d(g, cmd);
         break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB:
+        if (!virtio_gpu_blob_enabled(g->parent_obj.conf)) {
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+            break;
+        }
+        virtio_gpu_resource_create_blob(g, cmd);
+        break;
     case VIRTIO_GPU_CMD_RESOURCE_UNREF:
         virtio_gpu_resource_unref(g, cmd);
         break;
@@ -775,6 +967,13 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
     case VIRTIO_GPU_CMD_SET_SCANOUT:
         virtio_gpu_set_scanout(g, cmd);
         break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT_BLOB:
+        if (!virtio_gpu_blob_enabled(g->parent_obj.conf)) {
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+            break;
+        }
+        virtio_gpu_set_scanout_blob(g, cmd);
+        break;
     case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
         virtio_gpu_resource_attach_backing(g, cmd);
         break;
@@ -786,8 +985,10 @@ static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
         break;
     }
     if (!cmd->finished) {
-        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
-                                        VIRTIO_GPU_RESP_OK_NODATA);
+        if (!g->parent_obj.renderer_blocked) {
+            virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
+                                            VIRTIO_GPU_RESP_OK_NODATA);
+        }
     }
 }
 
@@ -806,6 +1007,7 @@ static void virtio_gpu_handle_cursor_cb(VirtIODevice *vdev, VirtQueue *vq)
 void virtio_gpu_process_cmdq(VirtIOGPU *g)
 {
     struct virtio_gpu_ctrl_command *cmd;
+    VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
 
     if (g->processing_cmdq) {
         return;
@@ -819,8 +1021,7 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
         }
 
         /* process command */
-        VIRGL(g, virtio_gpu_virgl_process_cmd, virtio_gpu_simple_process_cmd,
-              g, cmd);
+        vgc->process_cmd(g, cmd);
 
         QTAILQ_REMOVE(&g->cmdq, cmd, next);
         if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
@@ -843,16 +1044,27 @@ void virtio_gpu_process_cmdq(VirtIOGPU *g)
     g->processing_cmdq = false;
 }
 
-static void virtio_gpu_gl_flushed(VirtIOGPUBase *b)
+static void virtio_gpu_process_fenceq(VirtIOGPU *g)
 {
-    VirtIOGPU *g = VIRTIO_GPU(b);
+    struct virtio_gpu_ctrl_command *cmd, *tmp;
 
-#ifdef CONFIG_VIRGL
-    if (g->renderer_reset) {
-        g->renderer_reset = false;
-        virtio_gpu_virgl_reset(g);
+    QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+        trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+        virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+        QTAILQ_REMOVE(&g->fenceq, cmd, next);
+        g_free(cmd);
+        g->inflight--;
+        if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+            fprintf(stderr, "inflight: %3d (-)\r", g->inflight);
+        }
     }
-#endif
+}
+
+static void virtio_gpu_handle_gl_flushed(VirtIOGPUBase *b)
+{
+    VirtIOGPU *g = container_of(b, VirtIOGPU, parent_obj);
+
+    virtio_gpu_process_fenceq(g);
     virtio_gpu_process_cmdq(g);
 }
 
@@ -865,13 +1077,6 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
         return;
     }
 
-#ifdef CONFIG_VIRGL
-    if (!g->renderer_inited && g->parent_obj.use_virgl_renderer) {
-        virtio_gpu_virgl_init(g);
-        g->renderer_inited = true;
-    }
-#endif
-
     cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
     while (cmd) {
         cmd->vq = vq;
@@ -882,18 +1087,14 @@ static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     }
 
     virtio_gpu_process_cmdq(g);
-
-#ifdef CONFIG_VIRGL
-    if (g->parent_obj.use_virgl_renderer) {
-        virtio_gpu_virgl_fence_poll(g);
-    }
-#endif
 }
 
 static void virtio_gpu_ctrl_bh(void *opaque)
 {
     VirtIOGPU *g = opaque;
-    virtio_gpu_handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq);
+    VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g);
+
+    vgc->handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq);
 }
 
 static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
@@ -1105,24 +1306,21 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
     return 0;
 }
 
-static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
+void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
     VirtIOGPU *g = VIRTIO_GPU(qdev);
-    bool have_virgl;
-
-#if !defined(CONFIG_VIRGL) || defined(HOST_WORDS_BIGENDIAN)
-    have_virgl = false;
-#else
-    have_virgl = display_opengl;
-#endif
-    if (!have_virgl) {
-        g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
-    } else {
-#if defined(CONFIG_VIRGL)
-        VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
-            virtio_gpu_virgl_get_num_capsets(g);
-#endif
+
+    if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
+        if (!virtio_gpu_have_udmabuf()) {
+            error_setg(errp, "cannot enable blob resources without udmabuf");
+            return;
+        }
+
+        if (virtio_gpu_virgl_enabled(g->parent_obj.conf)) {
+            error_setg(errp, "blobs and virgl are not compatible (yet)");
+            return;
+        }
     }
 
     if (!virtio_gpu_base_device_realize(qdev,
@@ -1141,18 +1339,12 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     QTAILQ_INIT(&g->fenceq);
 }
 
-static void virtio_gpu_reset(VirtIODevice *vdev)
+void virtio_gpu_reset(VirtIODevice *vdev)
 {
     VirtIOGPU *g = VIRTIO_GPU(vdev);
     struct virtio_gpu_simple_resource *res, *tmp;
     struct virtio_gpu_ctrl_command *cmd;
 
-#ifdef CONFIG_VIRGL
-    if (g->parent_obj.use_virgl_renderer) {
-        virtio_gpu_virgl_reset(g);
-    }
-#endif
-
     QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
         virtio_gpu_resource_destroy(g, res);
     }
@@ -1170,17 +1362,6 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
         g_free(cmd);
     }
 
-#ifdef CONFIG_VIRGL
-    if (g->parent_obj.use_virgl_renderer) {
-        if (g->parent_obj.renderer_blocked) {
-            g->renderer_reset = true;
-        } else {
-            virtio_gpu_virgl_reset(g);
-        }
-        g->parent_obj.use_virgl_renderer = false;
-    }
-#endif
-
     virtio_gpu_base_reset(VIRTIO_GPU_BASE(vdev));
 }
 
@@ -1235,12 +1416,8 @@ static Property virtio_gpu_properties[] = {
     VIRTIO_GPU_BASE_PROPERTIES(VirtIOGPU, parent_obj.conf),
     DEFINE_PROP_SIZE("max_hostmem", VirtIOGPU, conf_max_hostmem,
                      256 * MiB),
-#ifdef CONFIG_VIRGL
-    DEFINE_PROP_BIT("virgl", VirtIOGPU, parent_obj.conf.flags,
-                    VIRTIO_GPU_FLAG_VIRGL_ENABLED, true),
-    DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
-                    VIRTIO_GPU_FLAG_STATS_ENABLED, false),
-#endif
+    DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
+                    VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1248,9 +1425,14 @@ static void virtio_gpu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
-    VirtIOGPUBaseClass *vgc = VIRTIO_GPU_BASE_CLASS(klass);
+    VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass);
+    VirtIOGPUBaseClass *vgbc = &vgc->parent;
+
+    vgc->handle_ctrl = virtio_gpu_handle_ctrl;
+    vgc->process_cmd = virtio_gpu_simple_process_cmd;
+    vgc->update_cursor_data = virtio_gpu_update_cursor_data;
+    vgbc->gl_flushed = virtio_gpu_handle_gl_flushed;
 
-    vgc->gl_flushed = virtio_gpu_gl_flushed;
     vdc->realize = virtio_gpu_device_realize;
     vdc->reset = virtio_gpu_reset;
     vdc->get_config = virtio_gpu_get_config;
@@ -1264,8 +1446,10 @@ static const TypeInfo virtio_gpu_info = {
     .name = TYPE_VIRTIO_GPU,
     .parent = TYPE_VIRTIO_GPU_BASE,
     .instance_size = sizeof(VirtIOGPU),
+    .class_size = sizeof(VirtIOGPUClass),
     .class_init = virtio_gpu_class_init,
 };
+module_obj(TYPE_VIRTIO_GPU);
 
 static void virtio_register_types(void)
 {
diff --git a/hw/display/virtio-vga-gl.c b/hw/display/virtio-vga-gl.c
new file mode 100644
index 00000000000..f22549097c5
--- /dev/null
+++ b/hw/display/virtio-vga-gl.c
@@ -0,0 +1,50 @@
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "hw/display/vga.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+#include "virtio-vga.h"
+#include "qom/object.h"
+
+#define TYPE_VIRTIO_VGA_GL "virtio-vga-gl"
+
+typedef struct VirtIOVGAGL VirtIOVGAGL;
+DECLARE_INSTANCE_CHECKER(VirtIOVGAGL, VIRTIO_VGA_GL,
+                         TYPE_VIRTIO_VGA_GL)
+
+struct VirtIOVGAGL {
+    VirtIOVGABase parent_obj;
+
+    VirtIOGPUGL   vdev;
+};
+
+static void virtio_vga_gl_inst_initfn(Object *obj)
+{
+    VirtIOVGAGL *dev = VIRTIO_VGA_GL(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_GPU_GL);
+    VIRTIO_VGA_BASE(dev)->vgpu = VIRTIO_GPU_BASE(&dev->vdev);
+}
+
+
+static VirtioPCIDeviceTypeInfo virtio_vga_gl_info = {
+    .generic_name  = TYPE_VIRTIO_VGA_GL,
+    .parent        = TYPE_VIRTIO_VGA_BASE,
+    .instance_size = sizeof(VirtIOVGAGL),
+    .instance_init = virtio_vga_gl_inst_initfn,
+};
+module_obj(TYPE_VIRTIO_VGA_GL);
+
+static void virtio_vga_register_types(void)
+{
+    if (have_vga) {
+        virtio_pci_types_register(&virtio_vga_gl_info);
+    }
+}
+
+type_init(virtio_vga_register_types)
+
+module_dep("hw-display-virtio-vga");
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index d3c64040615..9e57f61e9ed 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -239,6 +239,7 @@ static TypeInfo virtio_vga_base_info = {
     .class_init    = virtio_vga_base_class_init,
     .abstract      = true,
 };
+module_obj(TYPE_VIRTIO_VGA_BASE);
 
 #define TYPE_VIRTIO_VGA "virtio-vga"
 
@@ -268,6 +269,7 @@ static VirtioPCIDeviceTypeInfo virtio_vga_info = {
     .instance_size = sizeof(VirtIOVGA),
     .instance_init = virtio_vga_inst_initfn,
 };
+module_obj(TYPE_VIRTIO_VGA);
 
 static void virtio_vga_register_types(void)
 {
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
index 4fd6aeb18b5..9bb781e3125 100644
--- a/hw/display/xlnx_dp.c
+++ b/hw/display/xlnx_dp.c
@@ -714,7 +714,11 @@ static uint64_t xlnx_dp_read(void *opaque, hwaddr offset, unsigned size)
         break;
     default:
         assert(offset <= (0x3AC >> 2));
-        ret = s->core_registers[offset];
+        if (offset == (0x3A8 >> 2) || offset == (0x3AC >> 2)) {
+            ret = s->core_registers[DP_INT_MASK];
+        } else {
+            ret = s->core_registers[offset];
+        }
         break;
     }
 
@@ -1253,7 +1257,7 @@ static void xlnx_dp_init(Object *obj)
     object_property_add_child(OBJECT(s), "dpcd", OBJECT(s->dpcd));
 
     s->edid = I2CDDC(qdev_new("i2c-ddc"));
-    i2c_set_slave_address(I2C_SLAVE(s->edid), 0x50);
+    i2c_slave_set_address(I2C_SLAVE(s->edid), 0x50);
     object_property_add_child(OBJECT(s), "edid", OBJECT(s->edid));
 
     fifo8_create(&s->rx_fifo, 16);
diff --git a/hw/dma/meson.build b/hw/dma/meson.build
index 5c78a4e05ff..f3f0661bc3c 100644
--- a/hw/dma/meson.build
+++ b/hw/dma/meson.build
@@ -1,4 +1,3 @@
-softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_dma.c'))
 softmmu_ss.add(when: 'CONFIG_RC4030', if_true: files('rc4030.c'))
 softmmu_ss.add(when: 'CONFIG_PL080', if_true: files('pl080.c'))
 softmmu_ss.add(when: 'CONFIG_PL330', if_true: files('pl330.c'))
diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index f1a586b1d71..2627307cc85 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -10,7 +10,6 @@
 #include "qemu/osdep.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
-#include "exec/address-spaces.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "hw/dma/pl080.h"
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 944ba296b08..0cb46191c19 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -269,6 +269,9 @@ struct PL330State {
     uint8_t num_faulting;
     uint8_t periph_busy[PL330_PERIPH_NUM];
 
+    /* Memory region that DMA operation access */
+    MemoryRegion *mem_mr;
+    AddressSpace *mem_as;
 };
 
 #define TYPE_PL330 "pl330"
@@ -1108,7 +1111,7 @@ static inline const PL330InsnDesc *pl330_fetch_insn(PL330Chan *ch)
     uint8_t opcode;
     int i;
 
-    dma_memory_read(&address_space_memory, ch->pc, &opcode, 1);
+    dma_memory_read(ch->parent->mem_as, ch->pc, &opcode, 1);
     for (i = 0; insn_desc[i].size; i++) {
         if ((opcode & insn_desc[i].opmask) == insn_desc[i].opcode) {
             return &insn_desc[i];
@@ -1122,7 +1125,7 @@ static inline void pl330_exec_insn(PL330Chan *ch, const PL330InsnDesc *insn)
     uint8_t buf[PL330_INSN_MAXSIZE];
 
     assert(insn->size <= PL330_INSN_MAXSIZE);
-    dma_memory_read(&address_space_memory, ch->pc, buf, insn->size);
+    dma_memory_read(ch->parent->mem_as, ch->pc, buf, insn->size);
     insn->exec(ch, buf[0], &buf[1], insn->size - 1);
 }
 
@@ -1186,7 +1189,7 @@ static int pl330_exec_cycle(PL330Chan *channel)
     if (q != NULL && q->len <= pl330_fifo_num_free(&s->fifo)) {
         int len = q->len - (q->addr & (q->len - 1));
 
-        dma_memory_read(&address_space_memory, q->addr, buf, len);
+        dma_memory_read(s->mem_as, q->addr, buf, len);
         trace_pl330_exec_cycle(q->addr, len);
         if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) {
             pl330_hexdump(buf, len);
@@ -1217,7 +1220,7 @@ static int pl330_exec_cycle(PL330Chan *channel)
             fifo_res = pl330_fifo_get(&s->fifo, buf, len, q->tag);
         }
         if (fifo_res == PL330_FIFO_OK || q->z) {
-            dma_memory_write(&address_space_memory, q->addr, buf, len);
+            dma_memory_write(s->mem_as, q->addr, buf, len);
             trace_pl330_exec_cycle(q->addr, len);
             if (trace_event_get_state_backends(TRACE_PL330_HEXDUMP)) {
                 pl330_hexdump(buf, len);
@@ -1562,6 +1565,18 @@ static void pl330_realize(DeviceState *dev, Error **errp)
                           "dma", PL330_IOMEM_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
 
+    if (!s->mem_mr) {
+        error_setg(errp, "'memory' link is not set");
+        return;
+    } else if (s->mem_mr == get_system_memory()) {
+        /* Avoid creating new AS for system memory. */
+        s->mem_as = &address_space_memory;
+    } else {
+        s->mem_as = g_new0(AddressSpace, 1);
+        address_space_init(s->mem_as, s->mem_mr,
+                           memory_region_name(s->mem_mr));
+    }
+
     s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pl330_exec_cycle_timer, s);
 
     s->cfg[0] = (s->mgr_ns_at_rst ? 0x4 : 0) |
@@ -1656,6 +1671,9 @@ static Property pl330_properties[] = {
     DEFINE_PROP_UINT8("rd_q_dep", PL330State, rd_q_dep, 16),
     DEFINE_PROP_UINT16("data_buffer_dep", PL330State, data_buffer_dep, 256),
 
+    DEFINE_PROP_LINK("memory", PL330State, mem_mr,
+                     TYPE_MEMORY_REGION, MemoryRegion *),
+
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/dma/puv3_dma.c b/hw/dma/puv3_dma.c
deleted file mode 100644
index cca1e9ec21b..00000000000
--- a/hw/dma/puv3_dma.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * DMA device simulation in PKUnity SoC
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "qom/object.h"
-
-#undef DEBUG_PUV3
-#include "hw/unicore32/puv3.h"
-#include "qemu/module.h"
-#include "qemu/log.h"
-
-#define PUV3_DMA_CH_NR          (6)
-#define PUV3_DMA_CH_MASK        (0xff)
-#define PUV3_DMA_CH(offset)     ((offset) >> 8)
-
-#define TYPE_PUV3_DMA "puv3_dma"
-OBJECT_DECLARE_SIMPLE_TYPE(PUV3DMAState, PUV3_DMA)
-
-struct PUV3DMAState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    uint32_t reg_CFG[PUV3_DMA_CH_NR];
-};
-
-static uint64_t puv3_dma_read(void *opaque, hwaddr offset,
-        unsigned size)
-{
-    PUV3DMAState *s = opaque;
-    uint32_t ret = 0;
-
-    assert(PUV3_DMA_CH(offset) < PUV3_DMA_CH_NR);
-
-    switch (offset & PUV3_DMA_CH_MASK) {
-    case 0x10:
-        ret = s->reg_CFG[PUV3_DMA_CH(offset)];
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad read offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, ret);
-
-    return ret;
-}
-
-static void puv3_dma_write(void *opaque, hwaddr offset,
-        uint64_t value, unsigned size)
-{
-    PUV3DMAState *s = opaque;
-
-    assert(PUV3_DMA_CH(offset) < PUV3_DMA_CH_NR);
-
-    switch (offset & PUV3_DMA_CH_MASK) {
-    case 0x10:
-        s->reg_CFG[PUV3_DMA_CH(offset)] = value;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad write offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
-}
-
-static const MemoryRegionOps puv3_dma_ops = {
-    .read = puv3_dma_read,
-    .write = puv3_dma_write,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void puv3_dma_realize(DeviceState *dev, Error **errp)
-{
-    PUV3DMAState *s = PUV3_DMA(dev);
-    int i;
-
-    for (i = 0; i < PUV3_DMA_CH_NR; i++) {
-        s->reg_CFG[i] = 0x0;
-    }
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &puv3_dma_ops, s, "puv3_dma",
-            PUV3_REGS_OFFSET);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
-}
-
-static void puv3_dma_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = puv3_dma_realize;
-}
-
-static const TypeInfo puv3_dma_info = {
-    .name = TYPE_PUV3_DMA,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(PUV3DMAState),
-    .class_init = puv3_dma_class_init,
-};
-
-static void puv3_dma_register_type(void)
-{
-    type_register_static(&puv3_dma_info);
-}
-
-type_init(puv3_dma_register_type)
diff --git a/hw/dma/pxa2xx_dma.c b/hw/dma/pxa2xx_dma.c
index b3707ff3de2..fa896f7edf7 100644
--- a/hw/dma/pxa2xx_dma.c
+++ b/hw/dma/pxa2xx_dma.c
@@ -525,7 +525,7 @@ static bool is_version_0(void *opaque, int version_id)
     return version_id == 0;
 }
 
-static VMStateDescription vmstate_pxa2xx_dma_chan = {
+static const VMStateDescription vmstate_pxa2xx_dma_chan = {
     .name = "pxa2xx_dma_chan",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -540,7 +540,7 @@ static VMStateDescription vmstate_pxa2xx_dma_chan = {
     },
 };
 
-static VMStateDescription vmstate_pxa2xx_dma = {
+static const VMStateDescription vmstate_pxa2xx_dma = {
     .name = "pxa2xx_dma",
     .version_id = 1,
     .minimum_version_id = 0,
diff --git a/hw/dma/sifive_pdma.c b/hw/dma/sifive_pdma.c
index e1f6fedbda7..85fe34f5f31 100644
--- a/hw/dma/sifive_pdma.c
+++ b/hw/dma/sifive_pdma.c
@@ -24,7 +24,6 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
@@ -55,6 +54,13 @@
 #define DMA_EXEC_DST        0x110
 #define DMA_EXEC_SRC        0x118
 
+/*
+ * FU540/FU740 docs are incorrect with NextConfig.wsize/rsize reset values.
+ * The reset values tested on Unleashed/Unmatched boards are 6 instead of 0.
+ */
+#define CONFIG_WRSZ_DEFAULT 6
+#define CONFIG_RDSZ_DEFAULT 6
+
 enum dma_chan_state {
     DMA_CHAN_STATE_IDLE,
     DMA_CHAN_STATE_STARTED,
@@ -68,13 +74,13 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch)
     uint64_t dst = s->chan[ch].next_dst;
     uint64_t src = s->chan[ch].next_src;
     uint32_t config = s->chan[ch].next_config;
-    int wsize, rsize, size;
+    int wsize, rsize, size, remainder;
     uint8_t buf[64];
     int n;
 
     /* do nothing if bytes to transfer is zero */
     if (!bytes) {
-        goto error;
+        goto done;
     }
 
     /*
@@ -100,11 +106,7 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch)
         size = 6;
     }
     size = 1 << size;
-
-    /* the bytes to transfer should be multiple of transaction size */
-    if (bytes % size) {
-        goto error;
-    }
+    remainder = bytes % size;
 
     /* indicate a DMA transfer is started */
     s->chan[ch].state = DMA_CHAN_STATE_STARTED;
@@ -125,10 +127,13 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch)
         s->chan[ch].exec_bytes -= size;
     }
 
-    /* indicate a DMA transfer is done */
-    s->chan[ch].state = DMA_CHAN_STATE_DONE;
-    s->chan[ch].control &= ~CONTROL_RUN;
-    s->chan[ch].control |= CONTROL_DONE;
+    if (remainder) {
+        cpu_physical_memory_read(s->chan[ch].exec_src, buf, remainder);
+        cpu_physical_memory_write(s->chan[ch].exec_dst, buf, remainder);
+        s->chan[ch].exec_src += remainder;
+        s->chan[ch].exec_dst += remainder;
+        s->chan[ch].exec_bytes -= remainder;
+    }
 
     /* reload exec_ registers if repeat is required */
     if (s->chan[ch].next_config & CONFIG_REPEAT) {
@@ -137,6 +142,11 @@ static void sifive_pdma_run(SiFivePDMAState *s, int ch)
         s->chan[ch].exec_src = src;
     }
 
+done:
+    /* indicate a DMA transfer is done */
+    s->chan[ch].state = DMA_CHAN_STATE_DONE;
+    s->chan[ch].control &= ~CONTROL_RUN;
+    s->chan[ch].control |= CONTROL_DONE;
     return;
 
 error:
@@ -222,6 +232,7 @@ static void sifive_pdma_write(void *opaque, hwaddr offset,
 {
     SiFivePDMAState *s = opaque;
     int ch = SIFIVE_PDMA_CHAN_NO(offset);
+    bool claimed, run;
 
     if (ch >= SIFIVE_PDMA_CHANS) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid channel no %d\n",
@@ -232,8 +243,35 @@ static void sifive_pdma_write(void *opaque, hwaddr offset,
     offset &= 0xfff;
     switch (offset) {
     case DMA_CONTROL:
+        claimed = !!(s->chan[ch].control & CONTROL_CLAIM);
+        run = !!(s->chan[ch].control & CONTROL_RUN);
+
+        if (!claimed && (value & CONTROL_CLAIM)) {
+            /* reset Next* registers */
+            s->chan[ch].next_config = (CONFIG_RDSZ_DEFAULT << CONFIG_RDSZ_SHIFT) |
+                                      (CONFIG_WRSZ_DEFAULT << CONFIG_WRSZ_SHIFT);
+            s->chan[ch].next_bytes = 0;
+            s->chan[ch].next_dst = 0;
+            s->chan[ch].next_src = 0;
+        }
+
+        /* claim bit can only be cleared when run is low */
+        if (run && !(value & CONTROL_CLAIM)) {
+            value |= CONTROL_CLAIM;
+        }
+
         s->chan[ch].control = value;
 
+        /*
+         * If channel was not claimed before run bit is set,
+         * or if the channel is disclaimed when run was low,
+         * DMA won't run.
+         */
+        if (!claimed || (!run && !(value & CONTROL_CLAIM))) {
+            s->chan[ch].control &= ~CONTROL_RUN;
+            return;
+        }
+
         if (value & CONTROL_RUN) {
             sifive_pdma_run(s, ch);
         }
diff --git a/hw/dma/trace-events b/hw/dma/trace-events
index 44893995f63..3c47df54e4d 100644
--- a/hw/dma/trace-events
+++ b/hw/dma/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # rc4030.c
 jazzio_read(uint64_t addr, uint32_t ret) "read reg[0x%"PRIx64"] = 0x%x"
diff --git a/hw/dma/xlnx-zdma.c b/hw/dma/xlnx-zdma.c
index fa38a556341..a5a92b4ff8c 100644
--- a/hw/dma/xlnx-zdma.c
+++ b/hw/dma/xlnx-zdma.c
@@ -320,9 +320,9 @@ static bool zdma_load_descriptor(XlnxZDMA *s, uint64_t addr,
         return false;
     }
 
-    descr->addr = address_space_ldq_le(s->dma_as, addr, s->attr, NULL);
-    descr->size = address_space_ldl_le(s->dma_as, addr + 8, s->attr, NULL);
-    descr->attr = address_space_ldl_le(s->dma_as, addr + 12, s->attr, NULL);
+    descr->addr = address_space_ldq_le(&s->dma_as, addr, s->attr, NULL);
+    descr->size = address_space_ldl_le(&s->dma_as, addr + 8, s->attr, NULL);
+    descr->attr = address_space_ldl_le(&s->dma_as, addr + 12, s->attr, NULL);
     return true;
 }
 
@@ -354,7 +354,7 @@ static void zdma_update_descr_addr(XlnxZDMA *s, bool type,
     } else {
         addr = zdma_get_regaddr64(s, basereg);
         addr += sizeof(s->dsc_dst);
-        next = address_space_ldq_le(s->dma_as, addr, s->attr, NULL);
+        next = address_space_ldq_le(&s->dma_as, addr, s->attr, NULL);
     }
 
     zdma_put_regaddr64(s, basereg, next);
@@ -421,7 +421,7 @@ static void zdma_write_dst(XlnxZDMA *s, uint8_t *buf, uint32_t len)
             }
         }
 
-        address_space_write(s->dma_as, s->dsc_dst.addr, s->attr, buf, dlen);
+        address_space_write(&s->dma_as, s->dsc_dst.addr, s->attr, buf, dlen);
         if (burst_type == AXI_BURST_INCR) {
             s->dsc_dst.addr += dlen;
         }
@@ -497,7 +497,7 @@ static void zdma_process_descr(XlnxZDMA *s)
                 len = s->cfg.bus_width / 8;
             }
         } else {
-            address_space_read(s->dma_as, src_addr, s->attr, s->buf, len);
+            address_space_read(&s->dma_as, src_addr, s->attr, s->buf, len);
             if (burst_type == AXI_BURST_INCR) {
                 src_addr += len;
             }
@@ -765,6 +765,12 @@ static void zdma_realize(DeviceState *dev, Error **errp)
     XlnxZDMA *s = XLNX_ZDMA(dev);
     unsigned int i;
 
+    if (!s->dma_mr) {
+        error_setg(errp, TYPE_XLNX_ZDMA " 'dma' link not set");
+        return;
+    }
+    address_space_init(&s->dma_as, s->dma_mr, "zdma-dma");
+
     for (i = 0; i < ARRAY_SIZE(zdma_regs_info); ++i) {
         RegisterInfo *r = &s->regs_info[zdma_regs_info[i].addr / 4];
 
@@ -777,12 +783,6 @@ static void zdma_realize(DeviceState *dev, Error **errp)
         };
     }
 
-    if (s->dma_mr) {
-        s->dma_as = g_malloc0(sizeof(AddressSpace));
-        address_space_init(s->dma_as, s->dma_mr, NULL);
-    } else {
-        s->dma_as = &address_space_memory;
-    }
     s->attr = MEMTXATTRS_UNSPECIFIED;
 }
 
diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c
index 98324dadcd4..896bb3574dd 100644
--- a/hw/dma/xlnx_csu_dma.c
+++ b/hw/dma/xlnx_csu_dma.c
@@ -21,7 +21,6 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
@@ -202,11 +201,11 @@ static uint32_t xlnx_csu_dma_read(XlnxCSUDMA *s, uint8_t *buf, uint32_t len)
         for (i = 0; i < len && (result == MEMTX_OK); i += s->width) {
             uint32_t mlen = MIN(len - i, s->width);
 
-            result = address_space_rw(s->dma_as, addr, s->attr,
+            result = address_space_rw(&s->dma_as, addr, s->attr,
                                       buf + i, mlen, false);
         }
     } else {
-        result = address_space_rw(s->dma_as, addr, s->attr, buf, len, false);
+        result = address_space_rw(&s->dma_as, addr, s->attr, buf, len, false);
     }
 
     if (result == MEMTX_OK) {
@@ -233,12 +232,12 @@ static uint32_t xlnx_csu_dma_write(XlnxCSUDMA *s, uint8_t *buf, uint32_t len)
         for (i = 0; i < len && (result == MEMTX_OK); i += s->width) {
             uint32_t mlen = MIN(len - i, s->width);
 
-            result = address_space_rw(s->dma_as, addr, s->attr,
+            result = address_space_rw(&s->dma_as, addr, s->attr,
                                       buf, mlen, true);
             buf += mlen;
         }
     } else {
-        result = address_space_rw(s->dma_as, addr, s->attr, buf, len, true);
+        result = address_space_rw(&s->dma_as, addr, s->attr, buf, len, true);
     }
 
     if (result != MEMTX_OK) {
@@ -627,6 +626,17 @@ static void xlnx_csu_dma_realize(DeviceState *dev, Error **errp)
     XlnxCSUDMA *s = XLNX_CSU_DMA(dev);
     RegisterInfoArray *reg_array;
 
+    if (!s->is_dst && !s->tx_dev) {
+        error_setg(errp, "zynqmp.csu-dma: Stream not connected");
+        return;
+    }
+
+    if (!s->dma_mr) {
+        error_setg(errp, TYPE_XLNX_CSU_DMA " 'dma' link not set");
+        return;
+    }
+    address_space_init(&s->dma_as, s->dma_mr, "csu-dma");
+
     reg_array =
         register_init_block32(dev, xlnx_csu_dma_regs_info[!!s->is_dst],
                               XLNX_CSU_DMA_R_MAX,
@@ -641,21 +651,9 @@ static void xlnx_csu_dma_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
     sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
 
-    if (!s->is_dst && !s->tx_dev) {
-        error_setg(errp, "zynqmp.csu-dma: Stream not connected");
-        return;
-    }
-
     s->src_timer = ptimer_init(xlnx_csu_dma_src_timeout_hit,
                                s, PTIMER_POLICY_DEFAULT);
 
-    if (s->dma_mr) {
-        s->dma_as = g_malloc0(sizeof(AddressSpace));
-        address_space_init(s->dma_as, s->dma_mr, NULL);
-    } else {
-        s->dma_as = &address_space_memory;
-    }
-
     s->attr = MEMTXATTRS_UNSPECIFIED;
 
     s->r_size_last_word = 0;
diff --git a/hw/gpio/aspeed_gpio.c b/hw/gpio/aspeed_gpio.c
index 985a259e05b..911d21c8cfb 100644
--- a/hw/gpio/aspeed_gpio.c
+++ b/hw/gpio/aspeed_gpio.c
@@ -10,17 +10,13 @@
 #include "qemu/host-utils.h"
 #include "qemu/log.h"
 #include "hw/gpio/aspeed_gpio.h"
-#include "include/hw/misc/aspeed_scu.h"
+#include "hw/misc/aspeed_scu.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
 
-#define GPIOS_PER_REG 32
-#define GPIOS_PER_SET GPIOS_PER_REG
-#define GPIO_PIN_GAP_SIZE 4
 #define GPIOS_PER_GROUP 8
-#define GPIO_GROUP_SHIFT 3
 
 /* GPIO Source Types */
 #define ASPEED_CMD_SRC_MASK         0x01010101
@@ -164,50 +160,48 @@
 #define GPIO_YZAAAB_DIRECTION      (0x1E4 >> 2)
 #define GPIO_AC_DATA_VALUE         (0x1E8 >> 2)
 #define GPIO_AC_DIRECTION          (0x1EC >> 2)
-#define GPIO_3_6V_MEM_SIZE         0x1F0
-#define GPIO_3_6V_REG_ARRAY_SIZE   (GPIO_3_6V_MEM_SIZE >> 2)
+#define GPIO_3_3V_MEM_SIZE         0x1F0
+#define GPIO_3_3V_REG_ARRAY_SIZE   (GPIO_3_3V_MEM_SIZE >> 2)
 
 /* AST2600 only - 1.8V gpios */
 /*
- * The AST2600 has same 3.6V gpios as the AST2400 (memory offsets 0x0-0x198)
- * and addtional 1.8V gpios (memory offsets 0x800-0x9D4).
+ * The AST2600 two copies of the GPIO controller: the same 3.3V gpios as the
+ * AST2400 (memory offsets 0x0-0x198) and a second controller with 1.8V gpios
+ * (memory offsets 0x800-0x9D4).
  */
-#define GPIO_1_8V_REG_OFFSET          0x800
-#define GPIO_1_8V_ABCD_DATA_VALUE     ((0x800 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_DIRECTION      ((0x804 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INT_ENABLE     ((0x808 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INT_SENS_0     ((0x80C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INT_SENS_1     ((0x810 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INT_SENS_2     ((0x814 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INT_STATUS     ((0x818 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_RESET_TOLERANT ((0x81C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_DATA_VALUE        ((0x820 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_DIRECTION         ((0x824 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INT_ENABLE        ((0x828 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INT_SENS_0        ((0x82C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INT_SENS_1        ((0x830 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INT_SENS_2        ((0x834 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INT_STATUS        ((0x838 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_RESET_TOLERANT    ((0x83C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_DEBOUNCE_1     ((0x840 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_DEBOUNCE_2     ((0x844 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_DEBOUNCE_1        ((0x848 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_DEBOUNCE_2        ((0x84C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_DEBOUNCE_TIME_1     ((0x850 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_DEBOUNCE_TIME_2     ((0x854 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_DEBOUNCE_TIME_3     ((0x858 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_COMMAND_SRC_0  ((0x860 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_COMMAND_SRC_1  ((0x864 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_COMMAND_SRC_0     ((0x868 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_COMMAND_SRC_1     ((0x86C - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_DATA_READ      ((0x8C0 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_DATA_READ         ((0x8C4 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_ABCD_INPUT_MASK     ((0x9D0 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_E_INPUT_MASK        ((0x9D4 - GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_1_8V_MEM_SIZE            0x9D8
-#define GPIO_1_8V_REG_ARRAY_SIZE      ((GPIO_1_8V_MEM_SIZE - \
-                                      GPIO_1_8V_REG_OFFSET) >> 2)
-#define GPIO_MAX_MEM_SIZE           MAX(GPIO_3_6V_MEM_SIZE, GPIO_1_8V_MEM_SIZE)
+#define GPIO_1_8V_ABCD_DATA_VALUE     (0x000 >> 2)
+#define GPIO_1_8V_ABCD_DIRECTION      (0x004 >> 2)
+#define GPIO_1_8V_ABCD_INT_ENABLE     (0x008 >> 2)
+#define GPIO_1_8V_ABCD_INT_SENS_0     (0x00C >> 2)
+#define GPIO_1_8V_ABCD_INT_SENS_1     (0x010 >> 2)
+#define GPIO_1_8V_ABCD_INT_SENS_2     (0x014 >> 2)
+#define GPIO_1_8V_ABCD_INT_STATUS     (0x018 >> 2)
+#define GPIO_1_8V_ABCD_RESET_TOLERANT (0x01C >> 2)
+#define GPIO_1_8V_E_DATA_VALUE        (0x020 >> 2)
+#define GPIO_1_8V_E_DIRECTION         (0x024 >> 2)
+#define GPIO_1_8V_E_INT_ENABLE        (0x028 >> 2)
+#define GPIO_1_8V_E_INT_SENS_0        (0x02C >> 2)
+#define GPIO_1_8V_E_INT_SENS_1        (0x030 >> 2)
+#define GPIO_1_8V_E_INT_SENS_2        (0x034 >> 2)
+#define GPIO_1_8V_E_INT_STATUS        (0x038 >> 2)
+#define GPIO_1_8V_E_RESET_TOLERANT    (0x03C >> 2)
+#define GPIO_1_8V_ABCD_DEBOUNCE_1     (0x040 >> 2)
+#define GPIO_1_8V_ABCD_DEBOUNCE_2     (0x044 >> 2)
+#define GPIO_1_8V_E_DEBOUNCE_1        (0x048 >> 2)
+#define GPIO_1_8V_E_DEBOUNCE_2        (0x04C >> 2)
+#define GPIO_1_8V_DEBOUNCE_TIME_1     (0x050 >> 2)
+#define GPIO_1_8V_DEBOUNCE_TIME_2     (0x054 >> 2)
+#define GPIO_1_8V_DEBOUNCE_TIME_3     (0x058 >> 2)
+#define GPIO_1_8V_ABCD_COMMAND_SRC_0  (0x060 >> 2)
+#define GPIO_1_8V_ABCD_COMMAND_SRC_1  (0x064 >> 2)
+#define GPIO_1_8V_E_COMMAND_SRC_0     (0x068 >> 2)
+#define GPIO_1_8V_E_COMMAND_SRC_1     (0x06C >> 2)
+#define GPIO_1_8V_ABCD_DATA_READ      (0x0C0 >> 2)
+#define GPIO_1_8V_E_DATA_READ         (0x0C4 >> 2)
+#define GPIO_1_8V_ABCD_INPUT_MASK     (0x1D0 >> 2)
+#define GPIO_1_8V_E_INPUT_MASK        (0x1D4 >> 2)
+#define GPIO_1_8V_MEM_SIZE            0x1D8
+#define GPIO_1_8V_REG_ARRAY_SIZE      (GPIO_1_8V_MEM_SIZE >> 2)
 
 static int aspeed_evaluate_irq(GPIOSets *regs, int gpio_prev_high, int gpio)
 {
@@ -261,7 +255,7 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs,
 
     diff = old ^ new;
     if (diff) {
-        for (gpio = 0; gpio < GPIOS_PER_REG; gpio++) {
+        for (gpio = 0; gpio < ASPEED_GPIOS_PER_SET; gpio++) {
             uint32_t mask = 1 << gpio;
 
             /* If the gpio needs to be updated... */
@@ -285,8 +279,7 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs,
             if (direction & mask) {
                 /* ...trigger the line-state IRQ */
                 ptrdiff_t set = aspeed_gpio_set_idx(s, regs);
-                size_t offset = set * GPIOS_PER_SET + gpio;
-                qemu_set_irq(s->gpios[offset], !!(new & mask));
+                qemu_set_irq(s->gpios[set][gpio], !!(new & mask));
             } else {
                 /* ...otherwise if we meet the line's current IRQ policy... */
                 if (aspeed_evaluate_irq(regs, old & mask, gpio)) {
@@ -299,21 +292,6 @@ static void aspeed_gpio_update(AspeedGPIOState *s, GPIOSets *regs,
     qemu_set_irq(s->irq, !!(s->pending));
 }
 
-static uint32_t aspeed_adjust_pin(AspeedGPIOState *s, uint32_t pin)
-{
-    AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s);
-    /*
-     * The 2500 has a 4 pin gap in group AB and the 2400 has a 4 pin
-     * gap in group Y (and only four pins in AB but this is the last group so
-     * it doesn't matter).
-     */
-    if (agc->gap && pin >= agc->gap) {
-        pin += GPIO_PIN_GAP_SIZE;
-    }
-
-    return pin;
-}
-
 static bool aspeed_gpio_get_pin_level(AspeedGPIOState *s, uint32_t set_idx,
                                       uint32_t pin)
 {
@@ -369,7 +347,7 @@ static uint32_t update_value_control_source(GPIOSets *regs, uint32_t old_value,
     uint32_t new_value = 0;
 
     /* for each group in set */
-    for (i = 0; i < GPIOS_PER_REG; i += GPIOS_PER_GROUP) {
+    for (i = 0; i < ASPEED_GPIOS_PER_SET; i += GPIOS_PER_GROUP) {
         cmd_source = extract32(regs->cmd_source_0, i, 1)
                 | (extract32(regs->cmd_source_1, i, 1) << 1);
 
@@ -382,7 +360,7 @@ static uint32_t update_value_control_source(GPIOSets *regs, uint32_t old_value,
     return new_value;
 }
 
-static const AspeedGPIOReg aspeed_3_6v_gpios[GPIO_3_6V_REG_ARRAY_SIZE] = {
+static const AspeedGPIOReg aspeed_3_3v_gpios[GPIO_3_3V_REG_ARRAY_SIZE] = {
     /* Set ABCD */
     [GPIO_ABCD_DATA_VALUE] =     { 0, gpio_reg_data_value },
     [GPIO_ABCD_DIRECTION] =      { 0, gpio_reg_direction },
@@ -639,7 +617,7 @@ static void aspeed_gpio_write(void *opaque, hwaddr offset, uint64_t data,
          *   bidirectional  |   1       |   1        |  data
          *   input only     |   1       |   0        |   0
          *   output only    |   0       |   1        |   1
-         *   no pin / gap   |   0       |   0        |   0
+         *   no pin         |   0       |   0        |   0
          *
          *  which is captured by:
          *  data = ( data | ~input) & output;
@@ -781,7 +759,7 @@ static void aspeed_gpio_set_pin(Object *obj, Visitor *v, const char *name,
 }
 
 /****************** Setup functions ******************/
-static const GPIOSetProperties ast2400_set_props[] = {
+static const GPIOSetProperties ast2400_set_props[ASPEED_GPIO_MAX_NR_SETS] = {
     [0] = {0xffffffff,  0xffffffff,  {"A", "B", "C", "D"} },
     [1] = {0xffffffff,  0xffffffff,  {"E", "F", "G", "H"} },
     [2] = {0xffffffff,  0xffffffff,  {"I", "J", "K", "L"} },
@@ -791,28 +769,28 @@ static const GPIOSetProperties ast2400_set_props[] = {
     [6] = {0x0000000f,  0x0fffff0f,  {"Y", "Z", "AA", "AB"} },
 };
 
-static const GPIOSetProperties ast2500_set_props[] = {
+static const GPIOSetProperties ast2500_set_props[ASPEED_GPIO_MAX_NR_SETS] = {
     [0] = {0xffffffff,  0xffffffff,  {"A", "B", "C", "D"} },
     [1] = {0xffffffff,  0xffffffff,  {"E", "F", "G", "H"} },
     [2] = {0xffffffff,  0xffffffff,  {"I", "J", "K", "L"} },
     [3] = {0xffffffff,  0xffffffff,  {"M", "N", "O", "P"} },
     [4] = {0xffffffff,  0xffffffff,  {"Q", "R", "S", "T"} },
     [5] = {0xffffffff,  0x0000ffff,  {"U", "V", "W", "X"} },
-    [6] = {0xffffff0f,  0x0fffff0f,  {"Y", "Z", "AA", "AB"} },
+    [6] = {0x0fffffff,  0x0fffffff,  {"Y", "Z", "AA", "AB"} },
     [7] = {0x000000ff,  0x000000ff,  {"AC"} },
 };
 
-static GPIOSetProperties ast2600_3_6v_set_props[] = {
+static GPIOSetProperties ast2600_3_3v_set_props[ASPEED_GPIO_MAX_NR_SETS] = {
     [0] = {0xffffffff,  0xffffffff,  {"A", "B", "C", "D"} },
     [1] = {0xffffffff,  0xffffffff,  {"E", "F", "G", "H"} },
     [2] = {0xffffffff,  0xffffffff,  {"I", "J", "K", "L"} },
     [3] = {0xffffffff,  0xffffffff,  {"M", "N", "O", "P"} },
-    [4] = {0xffffffff,  0xffffffff,  {"Q", "R", "S", "T"} },
-    [5] = {0xffffffff,  0x0000ffff,  {"U", "V", "W", "X"} },
-    [6] = {0xffff0000,  0x0fff0000,  {"Y", "Z", "", ""} },
+    [4] = {0xffffffff,  0x00ffffff,  {"Q", "R", "S", "T"} },
+    [5] = {0xffffffff,  0xffffff00,  {"U", "V", "W", "X"} },
+    [6] = {0x0000ffff,  0x0000ffff,  {"Y", "Z"} },
 };
 
-static GPIOSetProperties ast2600_1_8v_set_props[] = {
+static GPIOSetProperties ast2600_1_8v_set_props[ASPEED_GPIO_MAX_NR_SETS] = {
     [0] = {0xffffffff,  0xffffffff,  {"18A", "18B", "18C", "18D"} },
     [1] = {0x0000000f,  0x0000000f,  {"18E"} },
 };
@@ -838,18 +816,24 @@ static void aspeed_gpio_realize(DeviceState *dev, Error **errp)
     AspeedGPIOState *s = ASPEED_GPIO(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s);
-    int pin;
 
     /* Interrupt parent line */
     sysbus_init_irq(sbd, &s->irq);
 
     /* Individual GPIOs */
-    for (pin = 0; pin < agc->nr_gpio_pins; pin++) {
-        sysbus_init_irq(sbd, &s->gpios[pin]);
+    for (int i = 0; i < ASPEED_GPIO_MAX_NR_SETS; i++) {
+        const GPIOSetProperties *props = &agc->props[i];
+        uint32_t skip = ~(props->input | props->output);
+        for (int j = 0; j < ASPEED_GPIOS_PER_SET; j++) {
+            if (skip >> j & 1) {
+                continue;
+            }
+            sysbus_init_irq(sbd, &s->gpios[i][j]);
+        }
     }
 
     memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_gpio_ops, s,
-            TYPE_ASPEED_GPIO, GPIO_MAX_MEM_SIZE);
+            TYPE_ASPEED_GPIO, 0x800);
 
     sysbus_init_mmio(sbd, &s->iomem);
 }
@@ -858,20 +842,22 @@ static void aspeed_gpio_init(Object *obj)
 {
     AspeedGPIOState *s = ASPEED_GPIO(obj);
     AspeedGPIOClass *agc = ASPEED_GPIO_GET_CLASS(s);
-    int pin;
-
-    for (pin = 0; pin < agc->nr_gpio_pins; pin++) {
-        char *name;
-        int set_idx = pin / GPIOS_PER_SET;
-        int pin_idx = aspeed_adjust_pin(s, pin) - (set_idx * GPIOS_PER_SET);
-        int group_idx = pin_idx >> GPIO_GROUP_SHIFT;
-        const GPIOSetProperties *props = &agc->props[set_idx];
-
-        name = g_strdup_printf("gpio%s%d", props->group_label[group_idx],
-                               pin_idx % GPIOS_PER_GROUP);
-        object_property_add(obj, name, "bool", aspeed_gpio_get_pin,
-                            aspeed_gpio_set_pin, NULL, NULL);
-        g_free(name);
+
+    for (int i = 0; i < ASPEED_GPIO_MAX_NR_SETS; i++) {
+        const GPIOSetProperties *props = &agc->props[i];
+        uint32_t skip = ~(props->input | props->output);
+        for (int j = 0; j < ASPEED_GPIOS_PER_SET; j++) {
+            if (skip >> j & 1) {
+                continue;
+            }
+            int group_idx = j / GPIOS_PER_GROUP;
+            int pin_idx = j % GPIOS_PER_GROUP;
+            const char *group = &props->group_label[group_idx][0];
+            char *name = g_strdup_printf("gpio%s%d", group, pin_idx);
+            object_property_add(obj, name, "bool", aspeed_gpio_get_pin,
+                                aspeed_gpio_set_pin, NULL, NULL);
+            g_free(name);
+        }
     }
 }
 
@@ -928,8 +914,7 @@ static void aspeed_gpio_ast2400_class_init(ObjectClass *klass, void *data)
     agc->props = ast2400_set_props;
     agc->nr_gpio_pins = 216;
     agc->nr_gpio_sets = 7;
-    agc->gap = 196;
-    agc->reg_table = aspeed_3_6v_gpios;
+    agc->reg_table = aspeed_3_3v_gpios;
 }
 
 static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data)
@@ -939,18 +924,17 @@ static void aspeed_gpio_2500_class_init(ObjectClass *klass, void *data)
     agc->props = ast2500_set_props;
     agc->nr_gpio_pins = 228;
     agc->nr_gpio_sets = 8;
-    agc->gap = 220;
-    agc->reg_table = aspeed_3_6v_gpios;
+    agc->reg_table = aspeed_3_3v_gpios;
 }
 
-static void aspeed_gpio_ast2600_3_6v_class_init(ObjectClass *klass, void *data)
+static void aspeed_gpio_ast2600_3_3v_class_init(ObjectClass *klass, void *data)
 {
     AspeedGPIOClass *agc = ASPEED_GPIO_CLASS(klass);
 
-    agc->props = ast2600_3_6v_set_props;
+    agc->props = ast2600_3_3v_set_props;
     agc->nr_gpio_pins = 208;
     agc->nr_gpio_sets = 7;
-    agc->reg_table = aspeed_3_6v_gpios;
+    agc->reg_table = aspeed_3_3v_gpios;
 }
 
 static void aspeed_gpio_ast2600_1_8v_class_init(ObjectClass *klass, void *data)
@@ -986,10 +970,10 @@ static const TypeInfo aspeed_gpio_ast2500_info = {
     .instance_init  = aspeed_gpio_init,
 };
 
-static const TypeInfo aspeed_gpio_ast2600_3_6v_info = {
+static const TypeInfo aspeed_gpio_ast2600_3_3v_info = {
     .name           = TYPE_ASPEED_GPIO "-ast2600",
     .parent         = TYPE_ASPEED_GPIO,
-    .class_init     = aspeed_gpio_ast2600_3_6v_class_init,
+    .class_init     = aspeed_gpio_ast2600_3_3v_class_init,
     .instance_init  = aspeed_gpio_init,
 };
 
@@ -1005,7 +989,7 @@ static void aspeed_gpio_register_types(void)
     type_register_static(&aspeed_gpio_info);
     type_register_static(&aspeed_gpio_ast2400_info);
     type_register_static(&aspeed_gpio_ast2500_info);
-    type_register_static(&aspeed_gpio_ast2600_3_6v_info);
+    type_register_static(&aspeed_gpio_ast2600_3_3v_info);
     type_register_static(&aspeed_gpio_ast2600_1_8v_info);
 }
 
diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c
index abdddbc67c2..c995bba1d9f 100644
--- a/hw/gpio/bcm2835_gpio.c
+++ b/hw/gpio/bcm2835_gpio.c
@@ -299,8 +299,7 @@ static void bcm2835_gpio_init(Object *obj)
     DeviceState *dev = DEVICE(obj);
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                        TYPE_SD_BUS, DEVICE(s), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, DEVICE(s), "sd-bus");
 
     memory_region_init_io(&s->iomem, obj,
             &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
diff --git a/hw/gpio/gpio_pwr.c b/hw/gpio/gpio_pwr.c
index 7714fa0dc4d..dbaf1c70c88 100644
--- a/hw/gpio/gpio_pwr.c
+++ b/hw/gpio/gpio_pwr.c
@@ -43,7 +43,7 @@ static void gpio_pwr_reset(void *opaque, int n, int level)
 static void gpio_pwr_shutdown(void *opaque, int n, int level)
 {
     if (level) {
-        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     }
 }
 
diff --git a/hw/gpio/meson.build b/hw/gpio/meson.build
index 79568f00ce3..7bd6a57264b 100644
--- a/hw/gpio/meson.build
+++ b/hw/gpio/meson.build
@@ -3,7 +3,6 @@ softmmu_ss.add(when: 'CONFIG_GPIO_KEY', if_true: files('gpio_key.c'))
 softmmu_ss.add(when: 'CONFIG_GPIO_PWR', if_true: files('gpio_pwr.c'))
 softmmu_ss.add(when: 'CONFIG_MAX7310', if_true: files('max7310.c'))
 softmmu_ss.add(when: 'CONFIG_PL061', if_true: files('pl061.c'))
-softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_gpio.c'))
 softmmu_ss.add(when: 'CONFIG_ZAURUS', if_true: files('zaurus.c'))
 
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_gpio.c'))
diff --git a/hw/gpio/pl061.c b/hw/gpio/pl061.c
index e72e77572a0..899be861cc5 100644
--- a/hw/gpio/pl061.c
+++ b/hw/gpio/pl061.c
@@ -6,28 +6,39 @@
  * Written by Paul Brook
  *
  * This code is licensed under the GPL.
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the device registers
+ *  + sysbus IRQ: the GPIOINTR interrupt line
+ *  + unnamed GPIO inputs 0..7: inputs to connect to the emulated GPIO lines
+ *  + unnamed GPIO outputs 0..7: the emulated GPIO lines, considered as
+ *    outputs
+ *  + QOM property "pullups": an integer defining whether non-floating lines
+ *    configured as inputs should be pulled up to logical 1 (ie whether in
+ *    real hardware they have a pullup resistor on the line out of the PL061).
+ *    This should be an 8-bit value, where bit 0 is 1 if GPIO line 0 should
+ *    be pulled high, bit 1 configures line 1, and so on. The default is 0xff,
+ *    indicating that all GPIO lines are pulled up to logical 1.
+ *  + QOM property "pulldowns": an integer defining whether non-floating lines
+ *    configured as inputs should be pulled down to logical 0 (ie whether in
+ *    real hardware they have a pulldown resistor on the line out of the PL061).
+ *    This should be an 8-bit value, where bit 0 is 1 if GPIO line 0 should
+ *    be pulled low, bit 1 configures line 1, and so on. The default is 0x0.
+ *    It is an error to set a bit in both "pullups" and "pulldowns". If a bit
+ *    is 0 in both, then the line is considered to be floating, and it will
+ *    not have qemu_set_irq() called on it when it is configured as an input.
  */
 
 #include "qemu/osdep.h"
 #include "hw/irq.h"
 #include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qom/object.h"
-
-//#define DEBUG_PL061 1
-
-#ifdef DEBUG_PL061
-#define DPRINTF(fmt, ...) \
-do { printf("pl061: " fmt , ## __VA_ARGS__); } while (0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "pl061: error: " fmt , ## __VA_ARGS__); exit(1);} while (0)
-#else
-#define DPRINTF(fmt, ...) do {} while(0)
-#define BADF(fmt, ...) \
-do { fprintf(stderr, "pl061: error: " fmt , ## __VA_ARGS__);} while (0)
-#endif
+#include "trace.h"
 
 static const uint8_t pl061_id[12] =
   { 0x00, 0x00, 0x00, 0x00, 0x61, 0x10, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1 };
@@ -67,7 +78,9 @@ struct PL061State {
     qemu_irq irq;
     qemu_irq out[N_GPIOS];
     const unsigned char *id;
-    uint32_t rsvd_start; /* reserved area: [rsvd_start, 0xfcc] */
+    /* Properties, for non-Luminary PL061 */
+    uint32_t pullups;
+    uint32_t pulldowns;
 };
 
 static const VMStateDescription vmstate_pl061 = {
@@ -100,26 +113,75 @@ static const VMStateDescription vmstate_pl061 = {
     }
 };
 
+static uint8_t pl061_floating(PL061State *s)
+{
+    /*
+     * Return mask of bits which correspond to pins configured as inputs
+     * and which are floating (neither pulled up to 1 nor down to 0).
+     */
+    uint8_t floating;
+
+    if (s->id == pl061_id_luminary) {
+        /*
+         * If both PUR and PDR bits are clear, there is neither a pullup
+         * nor a pulldown in place, and the output truly floats.
+         */
+        floating = ~(s->pur | s->pdr);
+    } else {
+        floating = ~(s->pullups | s->pulldowns);
+    }
+    return floating & ~s->dir;
+}
+
+static uint8_t pl061_pullups(PL061State *s)
+{
+    /*
+     * Return mask of bits which correspond to pins configured as inputs
+     * and which are pulled up to 1.
+     */
+    uint8_t pullups;
+
+    if (s->id == pl061_id_luminary) {
+        /*
+         * The Luminary variant of the PL061 has an extra registers which
+         * the guest can use to configure whether lines should be pullup
+         * or pulldown.
+         */
+        pullups = s->pur;
+    } else {
+        pullups = s->pullups;
+    }
+    return pullups & ~s->dir;
+}
+
 static void pl061_update(PL061State *s)
 {
     uint8_t changed;
     uint8_t mask;
     uint8_t out;
     int i;
-
-    DPRINTF("dir = %d, data = %d\n", s->dir, s->data);
-
-    /* Outputs float high.  */
-    /* FIXME: This is board dependent.  */
-    out = (s->data & s->dir) | ~s->dir;
+    uint8_t pullups = pl061_pullups(s);
+    uint8_t floating = pl061_floating(s);
+
+    trace_pl061_update(DEVICE(s)->canonical_path, s->dir, s->data,
+                       pullups, floating);
+
+    /*
+     * Pins configured as output are driven from the data register;
+     * otherwise if they're pulled up they're 1, and if they're floating
+     * then we give them the same value they had previously, so we don't
+     * report any change to the other end.
+     */
+    out = (s->data & s->dir) | pullups | (s->old_out_data & floating);
     changed = s->old_out_data ^ out;
     if (changed) {
         s->old_out_data = out;
         for (i = 0; i < N_GPIOS; i++) {
             mask = 1 << i;
             if (changed & mask) {
-                DPRINTF("Set output %d = %d\n", i, (out & mask) != 0);
-                qemu_set_irq(s->out[i], (out & mask) != 0);
+                int level = (out & mask) != 0;
+                trace_pl061_set_output(DEVICE(s)->canonical_path, i, level);
+                qemu_set_irq(s->out[i], level);
             }
         }
     }
@@ -131,7 +193,8 @@ static void pl061_update(PL061State *s)
         for (i = 0; i < N_GPIOS; i++) {
             mask = 1 << i;
             if (changed & mask) {
-                DPRINTF("Changed input %d = %d\n", i, (s->data & mask) != 0);
+                trace_pl061_input_change(DEVICE(s)->canonical_path, i,
+                                         (s->data & mask) != 0);
 
                 if (!(s->isense & mask)) {
                     /* Edge interrupt */
@@ -150,7 +213,8 @@ static void pl061_update(PL061State *s)
     /* Level interrupt */
     s->istate |= ~(s->data ^ s->iev) & s->isense;
 
-    DPRINTF("istate = %02X\n", s->istate);
+    trace_pl061_update_istate(DEVICE(s)->canonical_path,
+                              s->istate, s->im, (s->istate & s->im) != 0);
 
     qemu_set_irq(s->irq, (s->istate & s->im) != 0);
 }
@@ -159,62 +223,114 @@ static uint64_t pl061_read(void *opaque, hwaddr offset,
                            unsigned size)
 {
     PL061State *s = (PL061State *)opaque;
+    uint64_t r = 0;
 
-    if (offset < 0x400) {
-        return s->data & (offset >> 2);
-    }
-    if (offset >= s->rsvd_start && offset <= 0xfcc) {
-        goto err_out;
-    }
-    if (offset >= 0xfd0 && offset < 0x1000) {
-        return s->id[(offset - 0xfd0) >> 2];
-    }
     switch (offset) {
+    case 0x0 ... 0x3ff: /* Data */
+        r = s->data & (offset >> 2);
+        break;
     case 0x400: /* Direction */
-        return s->dir;
+        r = s->dir;
+        break;
     case 0x404: /* Interrupt sense */
-        return s->isense;
+        r = s->isense;
+        break;
     case 0x408: /* Interrupt both edges */
-        return s->ibe;
+        r = s->ibe;
+        break;
     case 0x40c: /* Interrupt event */
-        return s->iev;
+        r = s->iev;
+        break;
     case 0x410: /* Interrupt mask */
-        return s->im;
+        r = s->im;
+        break;
     case 0x414: /* Raw interrupt status */
-        return s->istate;
+        r = s->istate;
+        break;
     case 0x418: /* Masked interrupt status */
-        return s->istate & s->im;
+        r = s->istate & s->im;
+        break;
     case 0x420: /* Alternate function select */
-        return s->afsel;
+        r = s->afsel;
+        break;
     case 0x500: /* 2mA drive */
-        return s->dr2r;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->dr2r;
+        break;
     case 0x504: /* 4mA drive */
-        return s->dr4r;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->dr4r;
+        break;
     case 0x508: /* 8mA drive */
-        return s->dr8r;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->dr8r;
+        break;
     case 0x50c: /* Open drain */
-        return s->odr;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->odr;
+        break;
     case 0x510: /* Pull-up */
-        return s->pur;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->pur;
+        break;
     case 0x514: /* Pull-down */
-        return s->pdr;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->pdr;
+        break;
     case 0x518: /* Slew rate control */
-        return s->slr;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->slr;
+        break;
     case 0x51c: /* Digital enable */
-        return s->den;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->den;
+        break;
     case 0x520: /* Lock */
-        return s->locked;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->locked;
+        break;
     case 0x524: /* Commit */
-        return s->cr;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->cr;
+        break;
     case 0x528: /* Analog mode select */
-        return s->amsel;
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
+        r = s->amsel;
+        break;
+    case 0xfd0 ... 0xfff: /* ID registers */
+        r = s->id[(offset - 0xfd0) >> 2];
+        break;
     default:
+    bad_offset:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl061_read: Bad offset %x\n", (int)offset);
         break;
     }
-err_out:
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "pl061_read: Bad offset %x\n", (int)offset);
-    return 0;
+
+    trace_pl061_read(DEVICE(s)->canonical_path, offset, r);
+    return r;
 }
 
 static void pl061_write(void *opaque, hwaddr offset,
@@ -223,16 +339,14 @@ static void pl061_write(void *opaque, hwaddr offset,
     PL061State *s = (PL061State *)opaque;
     uint8_t mask;
 
-    if (offset < 0x400) {
+    trace_pl061_write(DEVICE(s)->canonical_path, offset, value);
+
+    switch (offset) {
+    case 0 ... 0x3ff:
         mask = (offset >> 2) & s->dir;
         s->data = (s->data & ~mask) | (value & mask);
         pl061_update(s);
         return;
-    }
-    if (offset >= s->rsvd_start) {
-        goto err_out;
-    }
-    switch (offset) {
     case 0x400: /* Direction */
         s->dir = value & 0xff;
         break;
@@ -256,56 +370,99 @@ static void pl061_write(void *opaque, hwaddr offset,
         s->afsel = (s->afsel & ~mask) | (value & mask);
         break;
     case 0x500: /* 2mA drive */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->dr2r = value & 0xff;
         break;
     case 0x504: /* 4mA drive */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->dr4r = value & 0xff;
         break;
     case 0x508: /* 8mA drive */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->dr8r = value & 0xff;
         break;
     case 0x50c: /* Open drain */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->odr = value & 0xff;
         break;
     case 0x510: /* Pull-up */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->pur = value & 0xff;
         break;
     case 0x514: /* Pull-down */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->pdr = value & 0xff;
         break;
     case 0x518: /* Slew rate control */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->slr = value & 0xff;
         break;
     case 0x51c: /* Digital enable */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->den = value & 0xff;
         break;
     case 0x520: /* Lock */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->locked = (value != 0xacce551);
         break;
     case 0x524: /* Commit */
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         if (!s->locked)
             s->cr = value & 0xff;
         break;
     case 0x528:
+        if (s->id != pl061_id_luminary) {
+            goto bad_offset;
+        }
         s->amsel = value & 0xff;
         break;
     default:
-        goto err_out;
+    bad_offset:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl061_write: Bad offset %x\n", (int)offset);
+        return;
     }
     pl061_update(s);
     return;
-err_out:
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "pl061_write: Bad offset %x\n", (int)offset);
 }
 
-static void pl061_reset(DeviceState *dev)
+static void pl061_enter_reset(Object *obj, ResetType type)
 {
-    PL061State *s = PL061(dev);
+    PL061State *s = PL061(obj);
+
+    trace_pl061_reset(DEVICE(s)->canonical_path);
 
     /* reset values from PL061 TRM, Stellaris LM3S5P31 & LM3S8962 Data Sheet */
+
+    /*
+     * FIXME: For the LM3S6965, not all of the PL061 instances have the
+     * same reset values for GPIOPUR, GPIOAFSEL and GPIODEN, so in theory
+     * we should allow the board to configure these via properties.
+     * In practice, we don't wire anything up to the affected GPIO lines
+     * (PB7, PC0, PC1, PC2, PC3 -- they're used for JTAG), so we can
+     * get away with this inaccuracy.
+     */
     s->data = 0;
-    s->old_out_data = 0;
     s->old_in_data = 0;
     s->dir = 0;
     s->isense = 0;
@@ -327,6 +484,24 @@ static void pl061_reset(DeviceState *dev)
     s->amsel = 0;
 }
 
+static void pl061_hold_reset(Object *obj)
+{
+    PL061State *s = PL061(obj);
+    int i, level;
+    uint8_t floating = pl061_floating(s);
+    uint8_t pullups = pl061_pullups(s);
+
+    for (i = 0; i < N_GPIOS; i++) {
+        if (extract32(floating, i, 1)) {
+            continue;
+        }
+        level = extract32(pullups, i, 1);
+        trace_pl061_set_output(DEVICE(s)->canonical_path, i, level);
+        qemu_set_irq(s->out[i], level);
+    }
+    s->old_out_data = pullups;
+}
+
 static void pl061_set_irq(void * opaque, int irq, int level)
 {
     PL061State *s = (PL061State *)opaque;
@@ -352,7 +527,6 @@ static void pl061_luminary_init(Object *obj)
     PL061State *s = PL061(obj);
 
     s->id = pl061_id_luminary;
-    s->rsvd_start = 0x52c;
 }
 
 static void pl061_init(Object *obj)
@@ -362,7 +536,6 @@ static void pl061_init(Object *obj)
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
     s->id = pl061_id;
-    s->rsvd_start = 0x424;
 
     memory_region_init_io(&s->iomem, obj, &pl061_ops, s, "pl061", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
@@ -371,12 +544,40 @@ static void pl061_init(Object *obj)
     qdev_init_gpio_out(dev, s->out, N_GPIOS);
 }
 
+static void pl061_realize(DeviceState *dev, Error **errp)
+{
+    PL061State *s = PL061(dev);
+
+    if (s->pullups > 0xff) {
+        error_setg(errp, "pullups property must be between 0 and 0xff");
+        return;
+    }
+    if (s->pulldowns > 0xff) {
+        error_setg(errp, "pulldowns property must be between 0 and 0xff");
+        return;
+    }
+    if (s->pullups & s->pulldowns) {
+        error_setg(errp, "no bit may be set both in pullups and pulldowns");
+        return;
+    }
+}
+
+static Property pl061_props[] = {
+    DEFINE_PROP_UINT32("pullups", PL061State, pullups, 0xff),
+    DEFINE_PROP_UINT32("pulldowns", PL061State, pulldowns, 0x0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void pl061_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
 
     dc->vmsd = &vmstate_pl061;
-    dc->reset = &pl061_reset;
+    dc->realize = pl061_realize;
+    device_class_set_props(dc, pl061_props);
+    rc->phases.enter = pl061_enter_reset;
+    rc->phases.hold = pl061_hold_reset;
 }
 
 static const TypeInfo pl061_info = {
diff --git a/hw/gpio/puv3_gpio.c b/hw/gpio/puv3_gpio.c
deleted file mode 100644
index e003ae505cf..00000000000
--- a/hw/gpio/puv3_gpio.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * GPIO device simulation in PKUnity SoC
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "qom/object.h"
-
-#undef DEBUG_PUV3
-#include "hw/unicore32/puv3.h"
-#include "qemu/module.h"
-#include "qemu/log.h"
-
-#define TYPE_PUV3_GPIO "puv3_gpio"
-OBJECT_DECLARE_SIMPLE_TYPE(PUV3GPIOState, PUV3_GPIO)
-
-struct PUV3GPIOState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    qemu_irq irq[9];
-
-    uint32_t reg_GPLR;
-    uint32_t reg_GPDR;
-    uint32_t reg_GPIR;
-};
-
-static uint64_t puv3_gpio_read(void *opaque, hwaddr offset,
-        unsigned size)
-{
-    PUV3GPIOState *s = opaque;
-    uint32_t ret = 0;
-
-    switch (offset) {
-    case 0x00:
-        ret = s->reg_GPLR;
-        break;
-    case 0x04:
-        ret = s->reg_GPDR;
-        break;
-    case 0x20:
-        ret = s->reg_GPIR;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad read offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, ret);
-
-    return ret;
-}
-
-static void puv3_gpio_write(void *opaque, hwaddr offset,
-        uint64_t value, unsigned size)
-{
-    PUV3GPIOState *s = opaque;
-
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
-    switch (offset) {
-    case 0x04:
-        s->reg_GPDR = value;
-        break;
-    case 0x08:
-        if (s->reg_GPDR & value) {
-            s->reg_GPLR |= value;
-        } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n",
-                          __func__);
-        }
-        break;
-    case 0x0c:
-        if (s->reg_GPDR & value) {
-            s->reg_GPLR &= ~value;
-        } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: Write gpio input port\n",
-                          __func__);
-        }
-        break;
-    case 0x10: /* GRER */
-    case 0x14: /* GFER */
-    case 0x18: /* GEDR */
-        break;
-    case 0x20: /* GPIR */
-        s->reg_GPIR = value;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad write offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-}
-
-static const MemoryRegionOps puv3_gpio_ops = {
-    .read = puv3_gpio_read,
-    .write = puv3_gpio_write,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void puv3_gpio_realize(DeviceState *dev, Error **errp)
-{
-    PUV3GPIOState *s = PUV3_GPIO(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    s->reg_GPLR = 0;
-    s->reg_GPDR = 0;
-
-    /* FIXME: these irqs not handled yet */
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW0]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW1]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW2]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW3]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW4]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW5]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW6]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOLOW7]);
-    sysbus_init_irq(sbd, &s->irq[PUV3_IRQS_GPIOHIGH]);
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &puv3_gpio_ops, s, "puv3_gpio",
-            PUV3_REGS_OFFSET);
-    sysbus_init_mmio(sbd, &s->iomem);
-}
-
-static void puv3_gpio_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = puv3_gpio_realize;
-}
-
-static const TypeInfo puv3_gpio_info = {
-    .name = TYPE_PUV3_GPIO,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(PUV3GPIOState),
-    .class_init = puv3_gpio_class_init,
-};
-
-static void puv3_gpio_register_type(void)
-{
-    type_register_static(&puv3_gpio_info);
-}
-
-type_init(puv3_gpio_register_type)
diff --git a/hw/gpio/trace-events b/hw/gpio/trace-events
index 46ab9323bd0..1dab99c5604 100644
--- a/hw/gpio/trace-events
+++ b/hw/gpio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # npcm7xx_gpio.c
 npcm7xx_gpio_read(const char *id, uint64_t offset, uint64_t value) " %s offset: 0x%04" PRIx64 " value 0x%08" PRIx64
@@ -13,6 +13,15 @@ nrf51_gpio_write(uint64_t offset, uint64_t value) "offset 0x%" PRIx64 " value 0x
 nrf51_gpio_set(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64
 nrf51_gpio_update_output_irq(int64_t line, int64_t value) "line %" PRIi64 " value %" PRIi64
 
+# pl061.c
+pl061_update(const char *id, uint32_t dir, uint32_t data, uint32_t pullups, uint32_t floating) "%s GPIODIR 0x%x GPIODATA 0x%x pullups 0x%x floating 0x%x"
+pl061_set_output(const char *id, int gpio, int level) "%s setting output %d to %d"
+pl061_input_change(const char *id, int gpio, int level) "%s input %d changed to %d"
+pl061_update_istate(const char *id, uint32_t istate, uint32_t im, int level) "%s GPIORIS 0x%x GPIOIE 0x%x interrupt level %d"
+pl061_read(const char *id, uint64_t offset, uint64_t r) "%s offset 0x%" PRIx64 " value 0x%" PRIx64
+pl061_write(const char *id, uint64_t offset, uint64_t value) "%s offset 0x%" PRIx64 " value 0x%" PRIx64
+pl061_reset(const char *id) "%s reset"
+
 # sifive_gpio.c
 sifive_gpio_read(uint64_t offset, uint64_t r) "offset 0x%" PRIx64 " value 0x%" PRIx64
 sifive_gpio_write(uint64_t offset, uint64_t value) "offset 0x%" PRIx64 " value 0x%" PRIx64
diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c
index 5b82c9440d1..eab96dd84ef 100644
--- a/hw/hppa/dino.c
+++ b/hw/hppa/dino.c
@@ -14,13 +14,11 @@
 #include "qemu/module.h"
 #include "qemu/units.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/irq.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "migration/vmstate.h"
 #include "hppa_sys.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "qom/object.h"
 
diff --git a/hw/hppa/lasi.c b/hw/hppa/lasi.c
index 1a856579484..88c3791eb68 100644
--- a/hw/hppa/lasi.c
+++ b/hw/hppa/lasi.c
@@ -13,9 +13,7 @@
 #include "qemu/units.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "trace.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
@@ -24,7 +22,6 @@
 #include "hw/char/parallel.h"
 #include "hw/char/serial.h"
 #include "hw/input/lasips2.h"
-#include "exec/address-spaces.h"
 #include "migration/vmstate.h"
 #include "qom/object.h"
 
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index f2b71db9bd7..2a46af5bc9b 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -9,7 +9,6 @@
 #include "cpu.h"
 #include "elf.h"
 #include "hw/loader.h"
-#include "hw/boards.h"
 #include "qemu/error-report.h"
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
diff --git a/hw/hppa/trace-events b/hw/hppa/trace-events
index 3ff620319a6..3f42be9056f 100644
--- a/hw/hppa/trace-events
+++ b/hw/hppa/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
 hppa_pci_iack_write(void) ""
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 984caf898dc..dbce3b35fba 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2372,6 +2372,14 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
 
     assert(!qemu_uuid_is_null(&vdev->instanceid));
 
+    if (!qemu_uuid_is_null(&vdc->instanceid)) {
+        /* Class wants to only have a single instance with a fixed UUID */
+        if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
+            error_setg(&err, "instance id can't be changed");
+            goto error_out;
+        }
+    }
+
     /* Check for instance id collision for this class id */
     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
@@ -2438,18 +2446,22 @@ static void vmbus_dev_unrealize(DeviceState *dev)
     free_channels(vdev);
 }
 
+static Property vmbus_dev_props[] = {
+    DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+
 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *kdev = DEVICE_CLASS(klass);
+    device_class_set_props(kdev, vmbus_dev_props);
     kdev->bus_type = TYPE_VMBUS;
     kdev->realize = vmbus_dev_realize;
     kdev->unrealize = vmbus_dev_unrealize;
     kdev->reset = vmbus_dev_reset;
 }
 
-static Property vmbus_dev_instanceid =
-                        DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid);
-
 static void vmbus_dev_instance_init(Object *obj)
 {
     VMBusDevice *vdev = VMBUS_DEVICE(obj);
@@ -2458,8 +2470,6 @@ static void vmbus_dev_instance_init(Object *obj)
     if (!qemu_uuid_is_null(&vdc->instanceid)) {
         /* Class wants to only have a single instance with a fixed UUID */
         vdev->instanceid = vdc->instanceid;
-    } else {
-        qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid);
     }
 }
 
@@ -2719,7 +2729,7 @@ static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus"));
+    bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
 }
 
 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
diff --git a/hw/i2c/Kconfig b/hw/i2c/Kconfig
index 09642a6dcb7..8217cb50411 100644
--- a/hw/i2c/Kconfig
+++ b/hw/i2c/Kconfig
@@ -28,3 +28,11 @@ config IMX_I2C
 config MPC_I2C
     bool
     select I2C
+
+config PCA954X
+    bool
+    select I2C
+
+config PMBUS
+    bool
+    select SMBUS
diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c
index 518a3f5c6f9..03a4f5a9101 100644
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@@ -601,7 +601,7 @@ static void aspeed_i2c_bus_write(void *opaque, hwaddr offset,
             break;
         }
 
-        bus->dma_addr = value & 0xfffffffc;
+        bus->dma_addr = value & 0x3ffffffc;
         break;
 
     case I2CD_DMA_LEN:
@@ -740,20 +740,20 @@ static const VMStateDescription aspeed_i2c_vmstate = {
 
 static void aspeed_i2c_reset(DeviceState *dev)
 {
-    int i;
     AspeedI2CState *s = ASPEED_I2C(dev);
-    AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s);
 
     s->intr_status = 0;
+}
+
+static void aspeed_i2c_instance_init(Object *obj)
+{
+    AspeedI2CState *s = ASPEED_I2C(obj);
+    AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s);
+    int i;
 
     for (i = 0; i < aic->num_busses; i++) {
-        s->busses[i].intr_ctrl = 0;
-        s->busses[i].intr_status = 0;
-        s->busses[i].cmd = 0;
-        s->busses[i].buf = 0;
-        s->busses[i].dma_addr = 0;
-        s->busses[i].dma_len = 0;
-        i2c_end_transfer(s->busses[i].bus);
+        object_initialize_child(obj, "bus[*]", &s->busses[i],
+                                TYPE_ASPEED_I2C_BUS);
     }
 }
 
@@ -791,17 +791,21 @@ static void aspeed_i2c_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(sbd, &s->iomem);
 
     for (i = 0; i < aic->num_busses; i++) {
-        char name[32];
+        Object *bus = OBJECT(&s->busses[i]);
         int offset = i < aic->gap ? 1 : 5;
 
-        sysbus_init_irq(sbd, &s->busses[i].irq);
-        snprintf(name, sizeof(name), "aspeed.i2c.%d", i);
-        s->busses[i].controller = s;
-        s->busses[i].id = i;
-        s->busses[i].bus = i2c_init_bus(dev, name);
-        memory_region_init_io(&s->busses[i].mr, OBJECT(dev),
-                              &aspeed_i2c_bus_ops, &s->busses[i], name,
-                              aic->reg_size);
+        if (!object_property_set_link(bus, "controller", OBJECT(s), errp)) {
+            return;
+        }
+
+        if (!object_property_set_uint(bus, "bus-id", i, errp)) {
+            return;
+        }
+
+        if (!sysbus_realize(SYS_BUS_DEVICE(bus), errp)) {
+            return;
+        }
+
         memory_region_add_subregion(&s->iomem, aic->reg_size * (i + offset),
                                     &s->busses[i].mr);
     }
@@ -816,7 +820,8 @@ static void aspeed_i2c_realize(DeviceState *dev, Error **errp)
             return;
         }
 
-        address_space_init(&s->dram_as, s->dram_mr, "dma-dram");
+        address_space_init(&s->dram_as, s->dram_mr,
+                           TYPE_ASPEED_I2C "-dma-dram");
     }
 }
 
@@ -840,12 +845,72 @@ static void aspeed_i2c_class_init(ObjectClass *klass, void *data)
 static const TypeInfo aspeed_i2c_info = {
     .name          = TYPE_ASPEED_I2C,
     .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_init = aspeed_i2c_instance_init,
     .instance_size = sizeof(AspeedI2CState),
     .class_init    = aspeed_i2c_class_init,
     .class_size = sizeof(AspeedI2CClass),
     .abstract   = true,
 };
 
+static void aspeed_i2c_bus_reset(DeviceState *dev)
+{
+    AspeedI2CBus *s = ASPEED_I2C_BUS(dev);
+
+    s->intr_ctrl = 0;
+    s->intr_status = 0;
+    s->cmd = 0;
+    s->buf = 0;
+    s->dma_addr = 0;
+    s->dma_len = 0;
+    i2c_end_transfer(s->bus);
+}
+
+static void aspeed_i2c_bus_realize(DeviceState *dev, Error **errp)
+{
+    AspeedI2CBus *s = ASPEED_I2C_BUS(dev);
+    AspeedI2CClass *aic;
+    g_autofree char *name = g_strdup_printf(TYPE_ASPEED_I2C_BUS ".%d", s->id);
+
+    if (!s->controller) {
+        error_setg(errp, TYPE_ASPEED_I2C_BUS ": 'controller' link not set");
+        return;
+    }
+
+    aic = ASPEED_I2C_GET_CLASS(s->controller);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
+
+    s->bus = i2c_init_bus(dev, name);
+
+    memory_region_init_io(&s->mr, OBJECT(s), &aspeed_i2c_bus_ops,
+                          s, name, aic->reg_size);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mr);
+}
+
+static Property aspeed_i2c_bus_properties[] = {
+    DEFINE_PROP_UINT8("bus-id", AspeedI2CBus, id, 0),
+    DEFINE_PROP_LINK("controller", AspeedI2CBus, controller, TYPE_ASPEED_I2C,
+                     AspeedI2CState *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void aspeed_i2c_bus_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "Aspeed I2C Bus";
+    dc->realize = aspeed_i2c_bus_realize;
+    dc->reset = aspeed_i2c_bus_reset;
+    device_class_set_props(dc, aspeed_i2c_bus_properties);
+}
+
+static const TypeInfo aspeed_i2c_bus_info = {
+    .name           = TYPE_ASPEED_I2C_BUS,
+    .parent         = TYPE_SYS_BUS_DEVICE,
+    .instance_size  = sizeof(AspeedI2CBus),
+    .class_init     = aspeed_i2c_bus_class_init,
+};
+
 static qemu_irq aspeed_2400_i2c_bus_get_irq(AspeedI2CBus *bus)
 {
     return bus->controller->irq;
@@ -950,6 +1015,7 @@ static const TypeInfo aspeed_2600_i2c_info = {
 
 static void aspeed_i2c_register_types(void)
 {
+    type_register_static(&aspeed_i2c_bus_info);
     type_register_static(&aspeed_i2c_info);
     type_register_static(&aspeed_2400_i2c_info);
     type_register_static(&aspeed_2500_i2c_info);
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index 21ec52ac5ad..0e7d2763b9e 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -60,13 +60,13 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name)
 {
     I2CBus *bus;
 
-    bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name));
+    bus = I2C_BUS(qbus_new(TYPE_I2C_BUS, parent, name));
     QLIST_INIT(&bus->current_devs);
     vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus);
     return bus;
 }
 
-void i2c_set_slave_address(I2CSlave *dev, uint8_t address)
+void i2c_slave_set_address(I2CSlave *dev, uint8_t address)
 {
     dev->address = address;
 }
@@ -77,6 +77,30 @@ int i2c_bus_busy(I2CBus *bus)
     return !QLIST_EMPTY(&bus->current_devs);
 }
 
+bool i2c_scan_bus(I2CBus *bus, uint8_t address, bool broadcast,
+                  I2CNodeList *current_devs)
+{
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        I2CSlave *candidate = I2C_SLAVE(qdev);
+        I2CSlaveClass *sc = I2C_SLAVE_GET_CLASS(candidate);
+
+        if (sc->match_and_add(candidate, address, broadcast, current_devs)) {
+            if (!broadcast) {
+                return true;
+            }
+        }
+    }
+
+    /*
+     * If broadcast was true, and the list was full or empty, return true. If
+     * broadcast was false, return false.
+     */
+    return broadcast;
+}
+
 /* TODO: Make this handle multiple masters.  */
 /*
  * Start or continue an i2c transaction.  When this is called for the
@@ -90,10 +114,12 @@ int i2c_bus_busy(I2CBus *bus)
  * protocol uses a start transfer to switch from write to read mode
  * without releasing the bus.  If that fails, the bus is still
  * in a transaction.
+ *
+ * @event must be I2C_START_RECV or I2C_START_SEND.
  */
-int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
+static int i2c_do_start_transfer(I2CBus *bus, uint8_t address,
+                                 enum i2c_event event)
 {
-    BusChild *kid;
     I2CSlaveClass *sc;
     I2CNode *node;
     bool bus_scanned = false;
@@ -115,18 +141,8 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
      * terminating the previous transaction.
      */
     if (QLIST_EMPTY(&bus->current_devs)) {
-        QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
-            DeviceState *qdev = kid->child;
-            I2CSlave *candidate = I2C_SLAVE(qdev);
-            if ((candidate->address == address) || (bus->broadcast)) {
-                node = g_malloc(sizeof(struct I2CNode));
-                node->elt = candidate;
-                QLIST_INSERT_HEAD(&bus->current_devs, node, next);
-                if (!bus->broadcast) {
-                    break;
-                }
-            }
-        }
+        /* Disregard whether devices were found. */
+        (void)i2c_scan_bus(bus, address, bus->broadcast, &bus->current_devs);
         bus_scanned = true;
     }
 
@@ -144,7 +160,7 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
 
         if (sc->event) {
             trace_i2c_event("start", s->address);
-            rv = sc->event(s, recv ? I2C_START_RECV : I2C_START_SEND);
+            rv = sc->event(s, event);
             if (rv && !bus->broadcast) {
                 if (bus_scanned) {
                     /* First call, terminate the transfer. */
@@ -157,6 +173,23 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
     return 0;
 }
 
+int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv)
+{
+    return i2c_do_start_transfer(bus, address, is_recv
+                                               ? I2C_START_RECV
+                                               : I2C_START_SEND);
+}
+
+int i2c_start_recv(I2CBus *bus, uint8_t address)
+{
+    return i2c_do_start_transfer(bus, address, I2C_START_RECV);
+}
+
+int i2c_start_send(I2CBus *bus, uint8_t address)
+{
+    return i2c_do_start_transfer(bus, address, I2C_START_SEND);
+}
+
 void i2c_end_transfer(I2CBus *bus)
 {
     I2CSlaveClass *sc;
@@ -175,50 +208,42 @@ void i2c_end_transfer(I2CBus *bus)
     bus->broadcast = false;
 }
 
-int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send)
+int i2c_send(I2CBus *bus, uint8_t data)
 {
     I2CSlaveClass *sc;
     I2CSlave *s;
     I2CNode *node;
     int ret = 0;
 
-    if (send) {
-        QLIST_FOREACH(node, &bus->current_devs, next) {
-            s = node->elt;
-            sc = I2C_SLAVE_GET_CLASS(s);
-            if (sc->send) {
-                trace_i2c_send(s->address, *data);
-                ret = ret || sc->send(s, *data);
-            } else {
-                ret = -1;
-            }
-        }
-        return ret ? -1 : 0;
-    } else {
-        ret = 0xff;
-        if (!QLIST_EMPTY(&bus->current_devs) && !bus->broadcast) {
-            sc = I2C_SLAVE_GET_CLASS(QLIST_FIRST(&bus->current_devs)->elt);
-            if (sc->recv) {
-                s = QLIST_FIRST(&bus->current_devs)->elt;
-                ret = sc->recv(s);
-                trace_i2c_recv(s->address, ret);
-            }
+    QLIST_FOREACH(node, &bus->current_devs, next) {
+        s = node->elt;
+        sc = I2C_SLAVE_GET_CLASS(s);
+        if (sc->send) {
+            trace_i2c_send(s->address, data);
+            ret = ret || sc->send(s, data);
+        } else {
+            ret = -1;
         }
-        *data = ret;
-        return 0;
     }
-}
 
-int i2c_send(I2CBus *bus, uint8_t data)
-{
-    return i2c_send_recv(bus, &data, true);
+    return ret ? -1 : 0;
 }
 
 uint8_t i2c_recv(I2CBus *bus)
 {
     uint8_t data = 0xff;
+    I2CSlaveClass *sc;
+    I2CSlave *s;
+
+    if (!QLIST_EMPTY(&bus->current_devs) && !bus->broadcast) {
+        sc = I2C_SLAVE_GET_CLASS(QLIST_FIRST(&bus->current_devs)->elt);
+        if (sc->recv) {
+            s = QLIST_FIRST(&bus->current_devs)->elt;
+            data = sc->recv(s);
+            trace_i2c_recv(s->address, data);
+        }
+    }
 
-    i2c_send_recv(bus, &data, false);
     return data;
 }
 
@@ -290,12 +315,28 @@ I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr)
     return dev;
 }
 
+static bool i2c_slave_match(I2CSlave *candidate, uint8_t address,
+                            bool broadcast, I2CNodeList *current_devs)
+{
+    if ((candidate->address == address) || (broadcast)) {
+        I2CNode *node = g_malloc(sizeof(struct I2CNode));
+        node->elt = candidate;
+        QLIST_INSERT_HEAD(current_devs, node, next);
+        return true;
+    }
+
+    /* Not found and not broadcast. */
+    return false;
+}
+
 static void i2c_slave_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *k = DEVICE_CLASS(klass);
+    I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass);
     set_bit(DEVICE_CATEGORY_MISC, k->categories);
     k->bus_type = TYPE_I2C_BUS;
     device_class_set_props(k, i2c_props);
+    sc->match_and_add = i2c_slave_match;
 }
 
 static const TypeInfo i2c_slave_type_info = {
diff --git a/hw/i2c/i2c_mux_pca954x.c b/hw/i2c/i2c_mux_pca954x.c
new file mode 100644
index 00000000000..847c59921cf
--- /dev/null
+++ b/hw/i2c/i2c_mux_pca954x.c
@@ -0,0 +1,290 @@
+/*
+ * I2C multiplexer for PCA954x series of I2C multiplexer/switch chips.
+ *
+ * Copyright 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/i2c_mux_pca954x.h"
+#include "hw/i2c/smbus_slave.h"
+#include "hw/qdev-core.h"
+#include "hw/sysbus.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/queue.h"
+#include "qom/object.h"
+#include "trace.h"
+
+#define PCA9548_CHANNEL_COUNT 8
+#define PCA9546_CHANNEL_COUNT 4
+
+/*
+ * struct Pca954xChannel - The i2c mux device will have N of these states
+ * that own the i2c channel bus.
+ * @bus: The owned channel bus.
+ * @enabled: Is this channel active?
+ */
+typedef struct Pca954xChannel {
+    SysBusDevice parent;
+
+    I2CBus       *bus;
+
+    bool         enabled;
+} Pca954xChannel;
+
+#define TYPE_PCA954X_CHANNEL "pca954x-channel"
+#define PCA954X_CHANNEL(obj) \
+    OBJECT_CHECK(Pca954xChannel, (obj), TYPE_PCA954X_CHANNEL)
+
+/*
+ * struct Pca954xState - The pca954x state object.
+ * @control: The value written to the mux control.
+ * @channel: The set of i2c channel buses that act as channels which own the
+ * i2c children.
+ */
+typedef struct Pca954xState {
+    SMBusDevice parent;
+
+    uint8_t control;
+
+    /* The channel i2c buses. */
+    Pca954xChannel channel[PCA9548_CHANNEL_COUNT];
+} Pca954xState;
+
+/*
+ * struct Pca954xClass - The pca954x class object.
+ * @nchans: The number of i2c channels this device has.
+ */
+typedef struct Pca954xClass {
+    SMBusDeviceClass parent;
+
+    uint8_t nchans;
+} Pca954xClass;
+
+#define TYPE_PCA954X "pca954x"
+OBJECT_DECLARE_TYPE(Pca954xState, Pca954xClass, PCA954X)
+
+/*
+ * For each channel, if it's enabled, recursively call match on those children.
+ */
+static bool pca954x_match(I2CSlave *candidate, uint8_t address,
+                          bool broadcast,
+                          I2CNodeList *current_devs)
+{
+    Pca954xState *mux = PCA954X(candidate);
+    Pca954xClass *mc = PCA954X_GET_CLASS(mux);
+    int i;
+
+    /* They are talking to the mux itself (or all devices enabled). */
+    if ((candidate->address == address) || broadcast) {
+        I2CNode *node = g_malloc(sizeof(struct I2CNode));
+        node->elt = candidate;
+        QLIST_INSERT_HEAD(current_devs, node, next);
+        if (!broadcast) {
+            return true;
+        }
+    }
+
+    for (i = 0; i < mc->nchans; i++) {
+        if (!mux->channel[i].enabled) {
+            continue;
+        }
+
+        if (i2c_scan_bus(mux->channel[i].bus, address, broadcast,
+                         current_devs)) {
+            if (!broadcast) {
+                return true;
+            }
+        }
+    }
+
+    /* If we arrived here we didn't find a match, return broadcast. */
+    return broadcast;
+}
+
+static void pca954x_enable_channel(Pca954xState *s, uint8_t enable_mask)
+{
+    Pca954xClass *mc = PCA954X_GET_CLASS(s);
+    int i;
+
+    /*
+     * For each channel, check if their bit is set in enable_mask and if yes,
+     * enable it, otherwise disable, hide it.
+     */
+    for (i = 0; i < mc->nchans; i++) {
+        if (enable_mask & (1 << i)) {
+            s->channel[i].enabled = true;
+        } else {
+            s->channel[i].enabled = false;
+        }
+    }
+}
+
+static void pca954x_write(Pca954xState *s, uint8_t data)
+{
+    s->control = data;
+    pca954x_enable_channel(s, data);
+
+    trace_pca954x_write_bytes(data);
+}
+
+static int pca954x_write_data(SMBusDevice *d, uint8_t *buf, uint8_t len)
+{
+    Pca954xState *s = PCA954X(d);
+
+    if (len == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__);
+        return -1;
+    }
+
+    /*
+     * len should be 1, because they write one byte to enable/disable channels.
+     */
+    if (len > 1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+            "%s: extra data after channel selection mask\n",
+            __func__);
+        return -1;
+    }
+
+    pca954x_write(s, buf[0]);
+    return 0;
+}
+
+static uint8_t pca954x_read_byte(SMBusDevice *d)
+{
+    Pca954xState *s = PCA954X(d);
+    uint8_t data = s->control;
+    trace_pca954x_read_data(data);
+    return data;
+}
+
+static void pca954x_enter_reset(Object *obj, ResetType type)
+{
+    Pca954xState *s = PCA954X(obj);
+    /* Reset will disable all channels. */
+    pca954x_write(s, 0);
+}
+
+I2CBus *pca954x_i2c_get_bus(I2CSlave *mux, uint8_t channel)
+{
+    Pca954xClass *pc = PCA954X_GET_CLASS(mux);
+    Pca954xState *pca954x = PCA954X(mux);
+
+    g_assert(channel < pc->nchans);
+    return I2C_BUS(qdev_get_child_bus(DEVICE(&pca954x->channel[channel]),
+                                      "i2c-bus"));
+}
+
+static void pca954x_channel_init(Object *obj)
+{
+    Pca954xChannel *s = PCA954X_CHANNEL(obj);
+    s->bus = i2c_init_bus(DEVICE(s), "i2c-bus");
+
+    /* Start all channels as disabled. */
+    s->enabled = false;
+}
+
+static void pca954x_channel_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->desc = "Pca954x Channel";
+}
+
+static void pca9546_class_init(ObjectClass *klass, void *data)
+{
+    Pca954xClass *s = PCA954X_CLASS(klass);
+    s->nchans = PCA9546_CHANNEL_COUNT;
+}
+
+static void pca9548_class_init(ObjectClass *klass, void *data)
+{
+    Pca954xClass *s = PCA954X_CLASS(klass);
+    s->nchans = PCA9548_CHANNEL_COUNT;
+}
+
+static void pca954x_realize(DeviceState *dev, Error **errp)
+{
+    Pca954xState *s = PCA954X(dev);
+    Pca954xClass *c = PCA954X_GET_CLASS(s);
+    int i;
+
+    /* SMBus modules. Cannot fail. */
+    for (i = 0; i < c->nchans; i++) {
+        sysbus_realize(SYS_BUS_DEVICE(&s->channel[i]), &error_abort);
+    }
+}
+
+static void pca954x_init(Object *obj)
+{
+    Pca954xState *s = PCA954X(obj);
+    Pca954xClass *c = PCA954X_GET_CLASS(obj);
+    int i;
+
+    /* Only initialize the children we expect. */
+    for (i = 0; i < c->nchans; i++) {
+        object_initialize_child(obj, "channel[*]", &s->channel[i],
+                                TYPE_PCA954X_CHANNEL);
+    }
+}
+
+static void pca954x_class_init(ObjectClass *klass, void *data)
+{
+    I2CSlaveClass *sc = I2C_SLAVE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SMBusDeviceClass *k = SMBUS_DEVICE_CLASS(klass);
+
+    sc->match_and_add = pca954x_match;
+
+    rc->phases.enter = pca954x_enter_reset;
+
+    dc->desc = "Pca954x i2c-mux";
+    dc->realize = pca954x_realize;
+
+    k->write_data = pca954x_write_data;
+    k->receive_byte = pca954x_read_byte;
+}
+
+static const TypeInfo pca954x_info[] = {
+    {
+        .name          = TYPE_PCA954X,
+        .parent        = TYPE_SMBUS_DEVICE,
+        .instance_size = sizeof(Pca954xState),
+        .instance_init = pca954x_init,
+        .class_size    = sizeof(Pca954xClass),
+        .class_init    = pca954x_class_init,
+        .abstract      = true,
+    },
+    {
+        .name          = TYPE_PCA9546,
+        .parent        = TYPE_PCA954X,
+        .class_init    = pca9546_class_init,
+    },
+    {
+        .name          = TYPE_PCA9548,
+        .parent        = TYPE_PCA954X,
+        .class_init    = pca9548_class_init,
+    },
+    {
+        .name = TYPE_PCA954X_CHANNEL,
+        .parent = TYPE_SYS_BUS_DEVICE,
+        .class_init = pca954x_channel_class_init,
+        .instance_size = sizeof(Pca954xChannel),
+        .instance_init = pca954x_channel_init,
+    }
+};
+
+DEFINE_TYPES(pca954x_info)
diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c
index 2e02e1c4faa..9792583fea7 100644
--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c
@@ -171,7 +171,7 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
     switch (offset) {
     case IADR_ADDR:
         s->iadr = value & IADR_MASK;
-        /* i2c_set_slave_address(s->bus, (uint8_t)s->iadr); */
+        /* i2c_slave_set_address(s->bus, (uint8_t)s->iadr); */
         break;
     case IFDR_ADDR:
         s->ifdr = value & IFDR_MASK;
diff --git a/hw/i2c/meson.build b/hw/i2c/meson.build
index cdcd694a7fb..d3df273251f 100644
--- a/hw/i2c/meson.build
+++ b/hw/i2c/meson.build
@@ -14,4 +14,6 @@ i2c_ss.add(when: 'CONFIG_SMBUS_EEPROM', if_true: files('smbus_eeprom.c'))
 i2c_ss.add(when: 'CONFIG_VERSATILE_I2C', if_true: files('versatile_i2c.c'))
 i2c_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_i2c.c'))
 i2c_ss.add(when: 'CONFIG_PPC4XX', if_true: files('ppc4xx_i2c.c'))
+i2c_ss.add(when: 'CONFIG_PCA954X', if_true: files('i2c_mux_pca954x.c'))
+i2c_ss.add(when: 'CONFIG_PMBUS', if_true: files('pmbus_device.c'))
 softmmu_ss.add_all(when: 'CONFIG_I2C', if_true: i2c_ss)
diff --git a/hw/i2c/mpc_i2c.c b/hw/i2c/mpc_i2c.c
index 720d2331e95..845392505ff 100644
--- a/hw/i2c/mpc_i2c.c
+++ b/hw/i2c/mpc_i2c.c
@@ -20,7 +20,6 @@
 #include "qemu/osdep.h"
 #include "hw/i2c/i2c.h"
 #include "hw/irq.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
diff --git a/hw/i2c/pm_smbus.c b/hw/i2c/pm_smbus.c
index 06e1e5321b9..d7eae548cbc 100644
--- a/hw/i2c/pm_smbus.c
+++ b/hw/i2c/pm_smbus.c
@@ -128,14 +128,14 @@ static void smb_transaction(PMSMBus *s)
          * So at least Linux may or may not set the read bit here.
          * So just ignore the read bit for this command.
          */
-        if (i2c_start_transfer(bus, addr, 0)) {
+        if (i2c_start_send(bus, addr)) {
             goto error;
         }
         ret = i2c_send(bus, s->smb_data1);
         if (ret) {
             goto error;
         }
-        if (i2c_start_transfer(bus, addr, 1)) {
+        if (i2c_start_recv(bus, addr)) {
             goto error;
         }
         s->in_i2c_block_read = true;
diff --git a/hw/i2c/pmbus_device.c b/hw/i2c/pmbus_device.c
new file mode 100644
index 00000000000..24f8f522d9f
--- /dev/null
+++ b/hw/i2c/pmbus_device.c
@@ -0,0 +1,1612 @@
+/*
+ * PMBus wrapper over SMBus
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <math.h>
+#include <string.h>
+#include "hw/i2c/pmbus_device.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "qemu/log.h"
+
+uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value)
+{
+    /* R is usually negative to fit large readings into 16 bits */
+    uint16_t y = (c.m * value + c.b) * pow(10, c.R);
+    return y;
+}
+
+uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value)
+{
+    /* X = (Y * 10^-R - b) / m */
+    uint32_t x = (value / pow(10, c.R) - c.b) / c.m;
+    return x;
+}
+
+void pmbus_send(PMBusDevice *pmdev, const uint8_t *data, uint16_t len)
+{
+    if (pmdev->out_buf_len + len > SMBUS_DATA_MAX_LEN) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "PMBus device tried to send too much data");
+        len = 0;
+    }
+
+    for (int i = len - 1; i >= 0; i--) {
+        pmdev->out_buf[i + pmdev->out_buf_len] = data[len - i - 1];
+    }
+    pmdev->out_buf_len += len;
+}
+
+/* Internal only, convert unsigned ints to the little endian bus */
+static void pmbus_send_uint(PMBusDevice *pmdev, uint64_t data, uint8_t size)
+{
+    uint8_t bytes[8];
+    g_assert(size <= 8);
+
+    for (int i = 0; i < size; i++) {
+        bytes[i] = data & 0xFF;
+        data = data >> 8;
+    }
+    pmbus_send(pmdev, bytes, size);
+}
+
+void pmbus_send8(PMBusDevice *pmdev, uint8_t data)
+{
+    pmbus_send_uint(pmdev, data, 1);
+}
+
+void pmbus_send16(PMBusDevice *pmdev, uint16_t data)
+{
+    pmbus_send_uint(pmdev, data, 2);
+}
+
+void pmbus_send32(PMBusDevice *pmdev, uint32_t data)
+{
+    pmbus_send_uint(pmdev, data, 4);
+}
+
+void pmbus_send64(PMBusDevice *pmdev, uint64_t data)
+{
+    pmbus_send_uint(pmdev, data, 8);
+}
+
+void pmbus_send_string(PMBusDevice *pmdev, const char *data)
+{
+    size_t len = strlen(data);
+    g_assert(len > 0);
+    g_assert(len + pmdev->out_buf_len < SMBUS_DATA_MAX_LEN);
+    pmdev->out_buf[len + pmdev->out_buf_len] = len;
+
+    for (int i = len - 1; i >= 0; i--) {
+        pmdev->out_buf[i + pmdev->out_buf_len] = data[len - 1 - i];
+    }
+    pmdev->out_buf_len += len + 1;
+}
+
+
+static uint64_t pmbus_receive_uint(const uint8_t *buf, uint8_t len)
+{
+    uint64_t ret = 0;
+
+    /* Exclude command code from return value */
+    buf++;
+    len--;
+
+    for (int i = len - 1; i >= 0; i--) {
+        ret = ret << 8 | buf[i];
+    }
+    return ret;
+}
+
+uint8_t pmbus_receive8(PMBusDevice *pmdev)
+{
+    if (pmdev->in_buf_len - 1 != 1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: length mismatch. Expected 1 byte, got %d bytes\n",
+                      __func__, pmdev->in_buf_len - 1);
+    }
+    return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len);
+}
+
+uint16_t pmbus_receive16(PMBusDevice *pmdev)
+{
+    if (pmdev->in_buf_len - 1 != 2) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: length mismatch. Expected 2 bytes, got %d bytes\n",
+                      __func__, pmdev->in_buf_len - 1);
+    }
+    return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len);
+}
+
+uint32_t pmbus_receive32(PMBusDevice *pmdev)
+{
+    if (pmdev->in_buf_len - 1 != 4) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: length mismatch. Expected 4 bytes, got %d bytes\n",
+                      __func__, pmdev->in_buf_len - 1);
+    }
+    return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len);
+}
+
+uint64_t pmbus_receive64(PMBusDevice *pmdev)
+{
+    if (pmdev->in_buf_len - 1 != 8) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: length mismatch. Expected 8 bytes, got %d bytes\n",
+                      __func__, pmdev->in_buf_len - 1);
+    }
+    return pmbus_receive_uint(pmdev->in_buf, pmdev->in_buf_len);
+}
+
+static uint8_t pmbus_out_buf_pop(PMBusDevice *pmdev)
+{
+    if (pmdev->out_buf_len == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: tried to read from empty buffer",
+                      __func__);
+        return 0xFF;
+    }
+    uint8_t data = pmdev->out_buf[pmdev->out_buf_len - 1];
+    pmdev->out_buf_len--;
+    return data;
+}
+
+static void pmbus_quick_cmd(SMBusDevice *smd, uint8_t read)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(smd);
+    PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev);
+
+    if (pmdc->quick_cmd) {
+        pmdc->quick_cmd(pmdev, read);
+    }
+}
+
+static void pmbus_pages_alloc(PMBusDevice *pmdev)
+{
+    /* some PMBus devices don't use the PAGE command, so they get 1 page */
+    PMBusDeviceClass *k = PMBUS_DEVICE_GET_CLASS(pmdev);
+    if (k->device_num_pages == 0) {
+        k->device_num_pages = 1;
+    }
+    pmdev->num_pages = k->device_num_pages;
+    pmdev->pages = g_new0(PMBusPage, k->device_num_pages);
+}
+
+void pmbus_check_limits(PMBusDevice *pmdev)
+{
+    for (int i = 0; i < pmdev->num_pages; i++) {
+        if ((pmdev->pages[i].operation & PB_OP_ON) == 0) {
+            continue;   /* don't check powered off devices */
+        }
+
+        if (pmdev->pages[i].read_vout > pmdev->pages[i].vout_ov_fault_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_VOUT;
+            pmdev->pages[i].status_vout |= PB_STATUS_VOUT_OV_FAULT;
+        }
+
+        if (pmdev->pages[i].read_vout > pmdev->pages[i].vout_ov_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_VOUT;
+            pmdev->pages[i].status_vout |= PB_STATUS_VOUT_OV_WARN;
+        }
+
+        if (pmdev->pages[i].read_vout < pmdev->pages[i].vout_uv_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_VOUT;
+            pmdev->pages[i].status_vout |= PB_STATUS_VOUT_UV_WARN;
+        }
+
+        if (pmdev->pages[i].read_vout < pmdev->pages[i].vout_uv_fault_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_VOUT;
+            pmdev->pages[i].status_vout |= PB_STATUS_VOUT_UV_FAULT;
+        }
+
+        if (pmdev->pages[i].read_vin > pmdev->pages[i].vin_ov_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_INPUT;
+            pmdev->pages[i].status_input |= PB_STATUS_INPUT_VIN_OV_WARN;
+        }
+
+        if (pmdev->pages[i].read_vin < pmdev->pages[i].vin_uv_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_INPUT;
+            pmdev->pages[i].status_input |= PB_STATUS_INPUT_VIN_UV_WARN;
+        }
+
+        if (pmdev->pages[i].read_iout > pmdev->pages[i].iout_oc_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_IOUT_POUT;
+            pmdev->pages[i].status_iout |= PB_STATUS_IOUT_OC_WARN;
+        }
+
+        if (pmdev->pages[i].read_iout > pmdev->pages[i].iout_oc_fault_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_IOUT_POUT;
+            pmdev->pages[i].status_iout |= PB_STATUS_IOUT_OC_FAULT;
+        }
+
+        if (pmdev->pages[i].read_pin > pmdev->pages[i].pin_op_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_INPUT;
+            pmdev->pages[i].status_input |= PB_STATUS_INPUT_PIN_OP_WARN;
+        }
+
+        if (pmdev->pages[i].read_temperature_1
+                > pmdev->pages[i].ot_fault_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_TEMPERATURE;
+            pmdev->pages[i].status_temperature |= PB_STATUS_OT_FAULT;
+        }
+
+        if (pmdev->pages[i].read_temperature_1
+                > pmdev->pages[i].ot_warn_limit) {
+            pmdev->pages[i].status_word |= PB_STATUS_TEMPERATURE;
+            pmdev->pages[i].status_temperature |= PB_STATUS_OT_WARN;
+        }
+    }
+}
+
+static uint8_t pmbus_receive_byte(SMBusDevice *smd)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(smd);
+    PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev);
+    uint8_t ret = 0xFF;
+    uint8_t index = pmdev->page;
+
+    if (pmdev->out_buf_len != 0) {
+        ret = pmbus_out_buf_pop(pmdev);
+        return ret;
+    }
+
+    switch (pmdev->code) {
+    case PMBUS_PAGE:
+        pmbus_send8(pmdev, pmdev->page);
+        break;
+
+    case PMBUS_OPERATION:                 /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].operation);
+        break;
+
+    case PMBUS_ON_OFF_CONFIG:             /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].on_off_config);
+        break;
+
+    case PMBUS_PHASE:                     /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].phase);
+        break;
+
+    case PMBUS_WRITE_PROTECT:             /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].write_protect);
+        break;
+
+    case PMBUS_CAPABILITY:
+        pmbus_send8(pmdev, pmdev->capability);
+        break;
+
+    case PMBUS_VOUT_MODE:                 /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MODE) {
+            pmbus_send8(pmdev, pmdev->pages[index].vout_mode);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_COMMAND:              /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_command);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_TRIM:                 /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_trim);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_CAL_OFFSET:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_cal_offset);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_MAX:                  /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_MARGIN_HIGH:          /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_margin_high);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_MARGIN_LOW:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_margin_low);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_TRANSITION_RATE:      /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_transition_rate);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_DROOP:                /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_droop);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_SCALE_LOOP:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_scale_loop);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_SCALE_MONITOR:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_scale_monitor);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    /* TODO: implement coefficients support */
+
+    case PMBUS_POUT_MAX:                  /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].pout_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_ON:                    /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_on);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_OFF:                   /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_off);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_CAL_GAIN:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT_GAIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].iout_cal_gain);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_ov_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].vout_ov_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_ov_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_uv_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].vout_uv_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].vout_uv_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].iout_oc_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].iout_oc_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_LV_FAULT_LIMIT:    /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].iout_oc_lv_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_LV_FAULT_RESPONSE: /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].iout_oc_lv_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].iout_oc_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_UC_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].iout_uc_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IOUT_UC_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].iout_uc_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_OT_FAULT_LIMIT:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send16(pmdev, pmdev->pages[index].ot_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_OT_FAULT_RESPONSE:         /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send8(pmdev, pmdev->pages[index].ot_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_OT_WARN_LIMIT:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send16(pmdev, pmdev->pages[index].ot_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_UT_WARN_LIMIT:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send16(pmdev, pmdev->pages[index].ut_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_UT_FAULT_LIMIT:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send16(pmdev, pmdev->pages[index].ut_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_UT_FAULT_RESPONSE:         /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send8(pmdev, pmdev->pages[index].ut_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_ov_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send8(pmdev, pmdev->pages[index].vin_ov_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_ov_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_uv_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].vin_uv_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send8(pmdev, pmdev->pages[index].vin_uv_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].iin_oc_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmbus_send8(pmdev, pmdev->pages[index].iin_oc_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].iin_oc_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].pout_op_fault_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].pout_op_fault_response);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].pout_op_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_PIN_OP_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_PIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].pin_op_warn_limit);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_STATUS_BYTE:               /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].status_word & 0xFF);
+        break;
+
+    case PMBUS_STATUS_WORD:               /* R/W word */
+        pmbus_send16(pmdev, pmdev->pages[index].status_word);
+        break;
+
+    case PMBUS_STATUS_VOUT:               /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].status_vout);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_STATUS_IOUT:               /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send8(pmdev, pmdev->pages[index].status_iout);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_STATUS_INPUT:              /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN ||
+            pmdev->pages[index].page_flags & PB_HAS_IIN ||
+            pmdev->pages[index].page_flags & PB_HAS_PIN) {
+            pmbus_send8(pmdev, pmdev->pages[index].status_input);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_STATUS_TEMPERATURE:        /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send8(pmdev, pmdev->pages[index].status_temperature);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_STATUS_CML:                /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].status_cml);
+        break;
+
+    case PMBUS_STATUS_OTHER:              /* R/W byte */
+        pmbus_send8(pmdev, pmdev->pages[index].status_other);
+        break;
+
+    case PMBUS_READ_EIN:                  /* Read-Only block 5 bytes */
+        if (pmdev->pages[index].page_flags & PB_HAS_EIN) {
+            pmbus_send(pmdev, pmdev->pages[index].read_ein, 5);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_EOUT:                 /* Read-Only block 5 bytes */
+        if (pmdev->pages[index].page_flags & PB_HAS_EOUT) {
+            pmbus_send(pmdev, pmdev->pages[index].read_eout, 5);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_VIN:                  /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_vin);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_IIN:                  /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_iin);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_VOUT:                 /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_vout);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_IOUT:                 /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_iout);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_TEMPERATURE_1:        /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_temperature_1);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_TEMPERATURE_2:        /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMP2) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_temperature_2);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_TEMPERATURE_3:        /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMP3) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_temperature_3);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_POUT:                 /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_pout);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_READ_PIN:                  /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_PIN) {
+            pmbus_send16(pmdev, pmdev->pages[index].read_pin);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_REVISION:                  /* Read-Only byte */
+        pmbus_send8(pmdev, pmdev->pages[index].revision);
+        break;
+
+    case PMBUS_MFR_ID:                    /* R/W block */
+        if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) {
+            pmbus_send_string(pmdev, pmdev->pages[index].mfr_id);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_MODEL:                 /* R/W block */
+        if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) {
+            pmbus_send_string(pmdev, pmdev->pages[index].mfr_model);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_REVISION:              /* R/W block */
+        if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) {
+            pmbus_send_string(pmdev, pmdev->pages[index].mfr_revision);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_LOCATION:              /* R/W block */
+        if (pmdev->pages[index].page_flags & PB_HAS_MFR_INFO) {
+            pmbus_send_string(pmdev, pmdev->pages[index].mfr_location);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_VIN_MIN:               /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_vin_min);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_VIN_MAX:               /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_vin_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_IIN_MAX:               /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_iin_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_PIN_MAX:               /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_PIN_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_pin_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_VOUT_MIN:              /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_vout_min);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_VOUT_MAX:              /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_vout_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_IOUT_MAX:              /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_iout_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_POUT_MAX:              /* Read-Only word */
+        if (pmdev->pages[index].page_flags & PB_HAS_POUT_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_pout_max);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_MAX_TEMP_1:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_1);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_MAX_TEMP_2:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_2);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_MFR_MAX_TEMP_3:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMP_RATING) {
+            pmbus_send16(pmdev, pmdev->pages[index].mfr_max_temp_3);
+        } else {
+            goto passthough;
+        }
+        break;
+
+    case PMBUS_CLEAR_FAULTS:              /* Send Byte */
+    case PMBUS_PAGE_PLUS_WRITE:           /* Block Write-only */
+    case PMBUS_STORE_DEFAULT_ALL:         /* Send Byte */
+    case PMBUS_RESTORE_DEFAULT_ALL:       /* Send Byte */
+    case PMBUS_STORE_DEFAULT_CODE:        /* Write-only Byte */
+    case PMBUS_RESTORE_DEFAULT_CODE:      /* Write-only Byte */
+    case PMBUS_STORE_USER_ALL:            /* Send Byte */
+    case PMBUS_RESTORE_USER_ALL:          /* Send Byte */
+    case PMBUS_STORE_USER_CODE:           /* Write-only Byte */
+    case PMBUS_RESTORE_USER_CODE:         /* Write-only Byte */
+    case PMBUS_QUERY:                     /* Write-Only */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: reading from write only register 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+
+passthough:
+    default:
+        /* Pass through read request if not handled */
+        if (pmdc->receive_byte) {
+            ret = pmdc->receive_byte(pmdev);
+        }
+        break;
+    }
+
+    if (pmdev->out_buf_len != 0) {
+        ret = pmbus_out_buf_pop(pmdev);
+        return ret;
+    }
+
+    return ret;
+}
+
+/*
+ * PMBus clear faults command applies to all status registers, existing faults
+ * should separately get re-asserted.
+ */
+static void pmbus_clear_faults(PMBusDevice *pmdev)
+{
+    for (uint8_t i = 0; i < pmdev->num_pages; i++) {
+        pmdev->pages[i].status_word = 0;
+        pmdev->pages[i].status_vout = 0;
+        pmdev->pages[i].status_iout = 0;
+        pmdev->pages[i].status_input = 0;
+        pmdev->pages[i].status_temperature = 0;
+        pmdev->pages[i].status_cml = 0;
+        pmdev->pages[i].status_other = 0;
+        pmdev->pages[i].status_mfr_specific = 0;
+        pmdev->pages[i].status_fans_1_2 = 0;
+        pmdev->pages[i].status_fans_3_4 = 0;
+    }
+
+}
+
+/*
+ * PMBus operation is used to turn On and Off PSUs
+ * Therefore, default value for the Operation should be PB_OP_ON or 0x80
+ */
+static void pmbus_operation(PMBusDevice *pmdev)
+{
+    uint8_t index = pmdev->page;
+    if ((pmdev->pages[index].operation & PB_OP_ON) == 0) {
+        pmdev->pages[index].read_vout = 0;
+        pmdev->pages[index].read_iout = 0;
+        pmdev->pages[index].read_pout = 0;
+        return;
+    }
+
+    if (pmdev->pages[index].operation & (PB_OP_ON | PB_OP_MARGIN_HIGH)) {
+        pmdev->pages[index].read_vout = pmdev->pages[index].vout_margin_high;
+    }
+
+    if (pmdev->pages[index].operation & (PB_OP_ON | PB_OP_MARGIN_LOW)) {
+        pmdev->pages[index].read_vout = pmdev->pages[index].vout_margin_low;
+    }
+    pmbus_check_limits(pmdev);
+}
+
+static int pmbus_write_data(SMBusDevice *smd, uint8_t *buf, uint8_t len)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(smd);
+    PMBusDeviceClass *pmdc = PMBUS_DEVICE_GET_CLASS(pmdev);
+    int ret = 0;
+    uint8_t index;
+
+    if (len == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__);
+        return -1;
+    }
+
+    if (!pmdev->pages) { /* allocate memory for pages on first use */
+        pmbus_pages_alloc(pmdev);
+    }
+
+    pmdev->in_buf_len = len;
+    pmdev->in_buf = buf;
+
+    pmdev->code = buf[0]; /* PMBus command code */
+    if (len == 1) { /* Single length writes are command codes only */
+        return 0;
+    }
+
+    if (pmdev->code == PMBUS_PAGE) {
+        pmdev->page = pmbus_receive8(pmdev);
+        return 0;
+    }
+    /* loop through all the pages when 0xFF is received */
+    if (pmdev->page == PB_ALL_PAGES) {
+        for (int i = 0; i < pmdev->num_pages; i++) {
+            pmdev->page = i;
+            pmbus_write_data(smd, buf, len);
+        }
+        pmdev->page = PB_ALL_PAGES;
+        return 0;
+    }
+
+    index = pmdev->page;
+
+    switch (pmdev->code) {
+    case PMBUS_OPERATION:                 /* R/W byte */
+        pmdev->pages[index].operation = pmbus_receive8(pmdev);
+        pmbus_operation(pmdev);
+        break;
+
+    case PMBUS_ON_OFF_CONFIG:             /* R/W byte */
+        pmdev->pages[index].on_off_config = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_CLEAR_FAULTS:              /* Send Byte */
+        pmbus_clear_faults(pmdev);
+        break;
+
+    case PMBUS_PHASE:                     /* R/W byte */
+        pmdev->pages[index].phase = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_PAGE_PLUS_WRITE:           /* Block Write-only */
+    case PMBUS_WRITE_PROTECT:             /* R/W byte */
+        pmdev->pages[index].write_protect = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_VOUT_MODE:                 /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MODE) {
+            pmdev->pages[index].vout_mode = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_COMMAND:              /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_command = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_TRIM:                 /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_trim = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_CAL_OFFSET:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_cal_offset = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_MAX:                  /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_max = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_MARGIN_HIGH:          /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) {
+            pmdev->pages[index].vout_margin_high = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_MARGIN_LOW:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT_MARGIN) {
+            pmdev->pages[index].vout_margin_low = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_TRANSITION_RATE:      /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_transition_rate = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_DROOP:                /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_droop = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_SCALE_LOOP:           /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_scale_loop = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_SCALE_MONITOR:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_scale_monitor = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_POUT_MAX:                  /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].pout_max = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_ON:                    /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_on = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_OFF:                   /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_off = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_CAL_GAIN:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT_GAIN) {
+            pmdev->pages[index].iout_cal_gain = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_ov_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_ov_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_OV_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_ov_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_uv_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_uv_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VOUT_UV_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].vout_uv_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_oc_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_oc_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_LV_FAULT_LIMIT:    /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_oc_lv_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_LV_FAULT_RESPONSE: /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_oc_lv_fault_response
+                = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_OC_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_oc_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_UC_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_uc_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IOUT_UC_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].iout_uc_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_OT_FAULT_LIMIT:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ot_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_OT_FAULT_RESPONSE:         /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ot_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_OT_WARN_LIMIT:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ot_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_UT_WARN_LIMIT:             /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ut_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_UT_FAULT_LIMIT:            /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ut_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_UT_FAULT_RESPONSE:         /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].ut_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_ov_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_ov_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_OV_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_ov_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_uv_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_uv_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_VIN_UV_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VIN) {
+            pmdev->pages[index].vin_uv_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_FAULT_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmdev->pages[index].iin_oc_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_FAULT_RESPONSE:     /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmdev->pages[index].iin_oc_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_IIN_OC_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_IIN) {
+            pmdev->pages[index].iin_oc_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_FAULT_LIMIT:       /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].pout_op_fault_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_FAULT_RESPONSE:    /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].pout_op_fault_response = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_POUT_OP_WARN_LIMIT:        /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].pout_op_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_PIN_OP_WARN_LIMIT:         /* R/W word */
+        if (pmdev->pages[index].page_flags & PB_HAS_PIN) {
+            pmdev->pages[index].pin_op_warn_limit = pmbus_receive16(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_STATUS_BYTE:               /* R/W byte */
+        pmdev->pages[index].status_word = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_STATUS_WORD:               /* R/W word */
+        pmdev->pages[index].status_word = pmbus_receive16(pmdev);
+        break;
+
+    case PMBUS_STATUS_VOUT:               /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_VOUT) {
+            pmdev->pages[index].status_vout = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_STATUS_IOUT:               /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_IOUT) {
+            pmdev->pages[index].status_iout = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_STATUS_INPUT:              /* R/W byte */
+        pmdev->pages[index].status_input = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_STATUS_TEMPERATURE:        /* R/W byte */
+        if (pmdev->pages[index].page_flags & PB_HAS_TEMPERATURE) {
+            pmdev->pages[index].status_temperature = pmbus_receive8(pmdev);
+        } else {
+            goto passthrough;
+        }
+        break;
+
+    case PMBUS_STATUS_CML:                /* R/W byte */
+        pmdev->pages[index].status_cml = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_STATUS_OTHER:              /* R/W byte */
+        pmdev->pages[index].status_other = pmbus_receive8(pmdev);
+        break;
+
+    case PMBUS_PAGE_PLUS_READ:            /* Block Read-only */
+    case PMBUS_CAPABILITY:                /* Read-Only byte */
+    case PMBUS_COEFFICIENTS:              /* Read-only block 5 bytes */
+    case PMBUS_READ_EIN:                  /* Read-Only block 5 bytes */
+    case PMBUS_READ_EOUT:                 /* Read-Only block 5 bytes */
+    case PMBUS_READ_VIN:                  /* Read-Only word */
+    case PMBUS_READ_IIN:                  /* Read-Only word */
+    case PMBUS_READ_VCAP:                 /* Read-Only word */
+    case PMBUS_READ_VOUT:                 /* Read-Only word */
+    case PMBUS_READ_IOUT:                 /* Read-Only word */
+    case PMBUS_READ_TEMPERATURE_1:        /* Read-Only word */
+    case PMBUS_READ_TEMPERATURE_2:        /* Read-Only word */
+    case PMBUS_READ_TEMPERATURE_3:        /* Read-Only word */
+    case PMBUS_READ_FAN_SPEED_1:          /* Read-Only word */
+    case PMBUS_READ_FAN_SPEED_2:          /* Read-Only word */
+    case PMBUS_READ_FAN_SPEED_3:          /* Read-Only word */
+    case PMBUS_READ_FAN_SPEED_4:          /* Read-Only word */
+    case PMBUS_READ_DUTY_CYCLE:           /* Read-Only word */
+    case PMBUS_READ_FREQUENCY:            /* Read-Only word */
+    case PMBUS_READ_POUT:                 /* Read-Only word */
+    case PMBUS_READ_PIN:                  /* Read-Only word */
+    case PMBUS_REVISION:                  /* Read-Only byte */
+    case PMBUS_APP_PROFILE_SUPPORT:       /* Read-Only block-read */
+    case PMBUS_MFR_VIN_MIN:               /* Read-Only word */
+    case PMBUS_MFR_VIN_MAX:               /* Read-Only word */
+    case PMBUS_MFR_IIN_MAX:               /* Read-Only word */
+    case PMBUS_MFR_PIN_MAX:               /* Read-Only word */
+    case PMBUS_MFR_VOUT_MIN:              /* Read-Only word */
+    case PMBUS_MFR_VOUT_MAX:              /* Read-Only word */
+    case PMBUS_MFR_IOUT_MAX:              /* Read-Only word */
+    case PMBUS_MFR_POUT_MAX:              /* Read-Only word */
+    case PMBUS_MFR_TAMBIENT_MAX:          /* Read-Only word */
+    case PMBUS_MFR_TAMBIENT_MIN:          /* Read-Only word */
+    case PMBUS_MFR_EFFICIENCY_LL:         /* Read-Only block 14 bytes */
+    case PMBUS_MFR_EFFICIENCY_HL:         /* Read-Only block 14 bytes */
+    case PMBUS_MFR_PIN_ACCURACY:          /* Read-Only byte */
+    case PMBUS_IC_DEVICE_ID:              /* Read-Only block-read */
+    case PMBUS_IC_DEVICE_REV:             /* Read-Only block-read */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: writing to read-only register 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+
+passthrough:
+    /* Unimplimented registers get passed to the device */
+    default:
+        if (pmdc->write_data) {
+            ret = pmdc->write_data(pmdev, buf, len);
+        }
+        break;
+    }
+    pmbus_check_limits(pmdev);
+    pmdev->in_buf_len = 0;
+    return ret;
+}
+
+int pmbus_page_config(PMBusDevice *pmdev, uint8_t index, uint64_t flags)
+{
+    if (!pmdev->pages) { /* allocate memory for pages on first use */
+        pmbus_pages_alloc(pmdev);
+    }
+
+    /* The 0xFF page is special for commands applying to all pages */
+    if (index == PB_ALL_PAGES) {
+        for (int i = 0; i < pmdev->num_pages; i++) {
+            pmdev->pages[i].page_flags = flags;
+        }
+        return 0;
+    }
+
+    if (index > pmdev->num_pages - 1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: index %u is out of range\n",
+                      __func__, index);
+        return -1;
+    }
+
+    pmdev->pages[index].page_flags = flags;
+
+    return 0;
+}
+
+/* TODO: include pmbus page info in vmstate */
+const VMStateDescription vmstate_pmbus_device = {
+    .name = TYPE_PMBUS_DEVICE,
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_SMBUS_DEVICE(smb, PMBusDevice),
+        VMSTATE_UINT8(num_pages, PMBusDevice),
+        VMSTATE_UINT8(code, PMBusDevice),
+        VMSTATE_UINT8(page, PMBusDevice),
+        VMSTATE_UINT8(capability, PMBusDevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void pmbus_device_finalize(Object *obj)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(obj);
+    g_free(pmdev->pages);
+}
+
+static void pmbus_device_class_init(ObjectClass *klass, void *data)
+{
+    SMBusDeviceClass *k = SMBUS_DEVICE_CLASS(klass);
+
+    k->quick_cmd = pmbus_quick_cmd;
+    k->write_data = pmbus_write_data;
+    k->receive_byte = pmbus_receive_byte;
+}
+
+static const TypeInfo pmbus_device_type_info = {
+    .name = TYPE_PMBUS_DEVICE,
+    .parent = TYPE_SMBUS_DEVICE,
+    .instance_size = sizeof(PMBusDevice),
+    .instance_finalize = pmbus_device_finalize,
+    .abstract = true,
+    .class_size = sizeof(PMBusDeviceClass),
+    .class_init = pmbus_device_class_init,
+};
+
+static void pmbus_device_register_types(void)
+{
+    type_register_static(&pmbus_device_type_info);
+}
+
+type_init(pmbus_device_register_types)
diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c
index c0a8e045670..75d50f15158 100644
--- a/hw/i2c/ppc4xx_i2c.c
+++ b/hw/i2c/ppc4xx_i2c.c
@@ -1,6 +1,8 @@
 /*
  * PPC4xx I2C controller emulation
  *
+ * Documentation: PPC405GP User's Manual, Chapter 22. IIC Bus Interface
+ *
  * Copyright (c) 2007 Jocelyn Mayer
  * Copyright (c) 2012 François Revol
  * Copyright (c) 2016-2018 BALATON Zoltan
@@ -238,11 +240,14 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr addr, uint64_t value,
                         i2c->sts &= ~IIC_STS_ERR;
                     }
                 }
-                if (!(i2c->sts & IIC_STS_ERR) &&
-                    i2c_send_recv(i2c->bus, &i2c->mdata[i], !recv)) {
-                    i2c->sts |= IIC_STS_ERR;
-                    i2c->extsts |= IIC_EXTSTS_XFRA;
-                    break;
+                if (!(i2c->sts & IIC_STS_ERR)) {
+                    if (recv) {
+                        i2c->mdata[i] = i2c_recv(i2c->bus);
+                    } else if (i2c_send(i2c->bus, i2c->mdata[i]) < 0) {
+                        i2c->sts |= IIC_STS_ERR;
+                        i2c->extsts |= IIC_EXTSTS_XFRA;
+                        break;
+                    }
                 }
                 if (value & IIC_CNTL_RPST || !(value & IIC_CNTL_CHT)) {
                     i2c_end_transfer(i2c->bus);
diff --git a/hw/i2c/smbus_eeprom.c b/hw/i2c/smbus_eeprom.c
index 4d2bf99207a..12c5741f388 100644
--- a/hw/i2c/smbus_eeprom.c
+++ b/hw/i2c/smbus_eeprom.c
@@ -276,7 +276,7 @@ uint8_t *spd_data_generate(enum sdram_type type, ram_addr_t ram_size)
     spd[18] = 12;   /* ~CAS latencies supported */
     spd[19] = (type == DDR2 ? 0 : 1); /* reserved / ~CS latencies supported */
     spd[20] = 2;    /* DIMM type / ~WE latencies */
-                    /* module features */
+    spd[21] = (type < DDR2 ? 0x20 : 0); /* module features */
                     /* memory chip features */
     spd[23] = 0x12; /* clock cycle time @ medium CAS latency */
                     /* data access time */
diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index dc43b8637d1..6a53c34e70b 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -29,7 +29,7 @@ int smbus_receive_byte(I2CBus *bus, uint8_t addr)
 {
     uint8_t data;
 
-    if (i2c_start_transfer(bus, addr, 1)) {
+    if (i2c_start_recv(bus, addr)) {
         return -1;
     }
     data = i2c_recv(bus);
@@ -40,7 +40,7 @@ int smbus_receive_byte(I2CBus *bus, uint8_t addr)
 
 int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data)
 {
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, data);
@@ -51,11 +51,11 @@ int smbus_send_byte(I2CBus *bus, uint8_t addr, uint8_t data)
 int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
 {
     uint8_t data;
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, command);
-    if (i2c_start_transfer(bus, addr, 1)) {
+    if (i2c_start_recv(bus, addr)) {
         i2c_end_transfer(bus);
         return -1;
     }
@@ -67,7 +67,7 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
 
 int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data)
 {
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, command);
@@ -79,11 +79,11 @@ int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data)
 int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
 {
     uint16_t data;
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, command);
-    if (i2c_start_transfer(bus, addr, 1)) {
+    if (i2c_start_recv(bus, addr)) {
         i2c_end_transfer(bus);
         return -1;
     }
@@ -96,7 +96,7 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
 
 int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data)
 {
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, command);
@@ -113,12 +113,12 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
     int i;
 
     if (send_cmd) {
-        if (i2c_start_transfer(bus, addr, 0)) {
+        if (i2c_start_send(bus, addr)) {
             return -1;
         }
         i2c_send(bus, command);
     }
-    if (i2c_start_transfer(bus, addr, 1)) {
+    if (i2c_start_recv(bus, addr)) {
         if (send_cmd) {
             i2c_end_transfer(bus);
         }
@@ -149,7 +149,7 @@ int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
         len = 32;
     }
 
-    if (i2c_start_transfer(bus, addr, 0)) {
+    if (i2c_start_send(bus, addr)) {
         return -1;
     }
     i2c_send(bus, command);
diff --git a/hw/i2c/trace-events b/hw/i2c/trace-events
index 82fe6f965f4..7d8907c1eed 100644
--- a/hw/i2c/trace-events
+++ b/hw/i2c/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # core.c
 
@@ -26,3 +26,8 @@ npcm7xx_smbus_recv_byte(const char *id, uint8_t value) "%s recv byte: 0x%02x"
 npcm7xx_smbus_stop(const char *id) "%s stopping"
 npcm7xx_smbus_nack(const char *id) "%s nacking"
 npcm7xx_smbus_recv_fifo(const char *id, uint8_t received, uint8_t expected) "%s recv fifo: received %u, expected %u"
+
+# i2c-mux-pca954x.c
+
+pca954x_write_bytes(uint8_t value) "PCA954X write data: 0x%02x"
+pca954x_read_data(uint8_t value) "PCA954X read data: 0x%02x"
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 7f91f30877f..d22ac4a4b95 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -1,4 +1,12 @@
+config X86_FW_OVMF
+    bool
+
 config SEV
+    bool
+    select X86_FW_OVMF
+    depends on KVM
+
+config SGX
     bool
     depends on KVM
 
@@ -17,13 +25,15 @@ config PC
     imply PVPANIC_ISA
     imply QXL
     imply SEV
+    imply SGX
     imply SGA
     imply TEST_DEVICES
     imply TPM_CRB
     imply TPM_TIS_ISA
     imply VGA_PCI
     imply VIRTIO_VGA
-    select FDC
+    imply NVDIMM
+    select FDC_ISA
     select I8259
     select I8254
     select PCKBD
@@ -49,6 +59,7 @@ config PC_ACPI
     select ACPI_X86
     select ACPI_CPU_HOTPLUG
     select ACPI_MEMORY_HOTPLUG
+    select ACPI_VIOT
     select SMBUS_EEPROM
     select PFLASH_CFI01
     depends on ACPI_SMBUS
@@ -106,6 +117,7 @@ config MICROVM
     select ACPI_HW_REDUCED
     select PCI_EXPRESS_GENERIC_BRIDGE
     select USB_XHCI_SYSBUS
+    select I8254
 
 config X86_IOMMU
     bool
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index de98750aeff..a99c6e4fe3f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -43,7 +43,6 @@
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
 #include "hw/acpi/vmgenid.h"
-#include "hw/boards.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/rtc/mc146818rtc_regs.h"
 #include "migration/vmstate.h"
@@ -69,9 +68,11 @@
 #include "qom/qom-qobject.h"
 #include "hw/i386/amd_iommu.h"
 #include "hw/i386/intel_iommu.h"
+#include "hw/virtio/virtio-iommu.h"
 
 #include "hw/acpi/ipmi.h"
 #include "hw/acpi/hmat.h"
+#include "hw/acpi/viot.h"
 
 /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and
  * -M pc-i440fx-2.0.  Even if the actual amount of AML generated grows
@@ -108,7 +109,9 @@ typedef struct AcpiPmInfo {
 typedef struct AcpiMiscInfo {
     bool is_piix4;
     bool has_hpet;
+#ifdef CONFIG_TPM
     TPMVersion tpm_version;
+#endif
     const unsigned char *dsdt_code;
     unsigned dsdt_size;
     uint16_t pvpanic_port;
@@ -218,10 +221,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm)
         /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */
         pm->fadt.rev = 1;
         pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE;
-        pm->pcihp_io_base =
-            object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL);
-        pm->pcihp_io_len =
-            object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL);
     }
     if (lpc) {
         uint64_t smi_features = object_property_get_uint(lpc,
@@ -237,6 +236,10 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm)
         pm->smi_on_cpu_unplug =
             !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT));
     }
+    pm->pcihp_io_base =
+        object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL);
+    pm->pcihp_io_len =
+        object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL);
 
     /* The above need not be conditional on machine type because the reset port
      * happens to be the same on PIIX (pc) and ICH9 (q35). */
@@ -266,10 +269,10 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm)
     qobject_unref(o);
 
     pm->pcihp_bridge_en =
-        object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support",
+        object_property_get_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
                                  NULL);
     pm->pcihp_root_en =
-        object_property_get_bool(obj, "acpi-root-pci-hotplug",
+        object_property_get_bool(obj, ACPI_PM_PROP_ACPI_PCI_ROOTHP,
                                  NULL);
 }
 
@@ -287,7 +290,9 @@ static void acpi_get_misc_info(AcpiMiscInfo *info)
     }
 
     info->has_hpet = hpet_find();
+#ifdef CONFIG_TPM
     info->tpm_version = tpm_get_version(tpm_find());
+#endif
     info->pvpanic_port = pvpanic_port();
     info->applesmc_io_base = applesmc_port();
 }
@@ -296,17 +301,13 @@ static void acpi_get_misc_info(AcpiMiscInfo *info)
  * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE.
  * On i386 arch we only have two pci hosts, so we can look only for them.
  */
-static Object *acpi_get_i386_pci_host(void)
+Object *acpi_get_i386_pci_host(void)
 {
     PCIHostState *host;
 
-    host = OBJECT_CHECK(PCIHostState,
-                        object_resolve_path("/machine/i440fx", NULL),
-                        TYPE_PCI_HOST_BRIDGE);
+    host = PCI_HOST_BRIDGE(object_resolve_path("/machine/i440fx", NULL));
     if (!host) {
-        host = OBJECT_CHECK(PCIHostState,
-                            object_resolve_path("/machine/q35", NULL),
-                            TYPE_PCI_HOST_BRIDGE);
+        host = PCI_HOST_BRIDGE(object_resolve_path("/machine/q35", NULL));
     }
 
     return OBJECT(host);
@@ -317,7 +318,10 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64)
     Object *pci_host;
 
     pci_host = acpi_get_i386_pci_host();
-    g_assert(pci_host);
+
+    if (!pci_host) {
+        return;
+    }
 
     range_set_bounds1(hole,
                       object_property_get_uint(pci_host,
@@ -343,13 +347,23 @@ static void acpi_align_size(GArray *blob, unsigned align)
     g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align));
 }
 
-/* FACS */
+/*
+ * ACPI spec 1.0b,
+ * 5.2.6 Firmware ACPI Control Structure
+ */
 static void
 build_facs(GArray *table_data)
 {
-    AcpiFacsDescriptorRev1 *facs = acpi_data_push(table_data, sizeof *facs);
-    memcpy(&facs->signature, "FACS", 4);
-    facs->length = cpu_to_le32(sizeof(*facs));
+    const char *sig = "FACS";
+    const uint8_t reserved[40] = {};
+
+    g_array_append_vals(table_data, sig, 4); /* Signature */
+    build_append_int_noprefix(table_data, 64, 4); /* Length */
+    build_append_int_noprefix(table_data, 0, 4); /* Hardware Signature */
+    build_append_int_noprefix(table_data, 0, 4); /* Firmware Waking Vector */
+    build_append_int_noprefix(table_data, 0, 4); /* Global Lock */
+    build_append_int_noprefix(table_data, 0, 4); /* Flags */
+    g_array_append_vals(table_data, reserved, 40); /* Reserved */
 }
 
 static void build_append_pcihp_notify_entry(Aml *method, int slot)
@@ -368,7 +382,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
     Aml *dev, *notify_method = NULL, *method;
     QObject *bsel;
     PCIBus *sec;
-    int i;
+    int devfn;
 
     bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL);
     if (bsel) {
@@ -378,20 +392,31 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
         notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED);
     }
 
-    for (i = 0; i < ARRAY_SIZE(bus->devices); i += PCI_FUNC_MAX) {
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
         DeviceClass *dc;
         PCIDeviceClass *pc;
-        PCIDevice *pdev = bus->devices[i];
-        int slot = PCI_SLOT(i);
+        PCIDevice *pdev = bus->devices[devfn];
+        int slot = PCI_SLOT(devfn);
+        int func = PCI_FUNC(devfn);
+        /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */
+        int adr = slot << 16 | func;
         bool hotplug_enabled_dev;
         bool bridge_in_acpi;
         bool cold_plugged_bridge;
 
         if (!pdev) {
-            if (bsel) { /* add hotplug slots for non present devices */
-                dev = aml_device("S%.02X", PCI_DEVFN(slot, 0));
+            /*
+             * add hotplug slots for non present devices.
+             * hotplug is supported only for non-multifunction device
+             * so generate device description only for function 0
+             */
+            if (bsel && !func) {
+                if (pci_bus_is_express(bus) && slot > 0) {
+                    break;
+                }
+                dev = aml_device("S%.02X", devfn);
                 aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
-                aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16)));
+                aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
                 method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
                 aml_append(method,
                     aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN"))
@@ -427,16 +452,29 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
             continue;
         }
 
-        /* start to compose PCI slot descriptor */
-        dev = aml_device("S%.02X", PCI_DEVFN(slot, 0));
-        aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16)));
+        /*
+         * allow describing coldplugged bridges in ACPI even if they are not
+         * on function 0, as they are not unpluggable, for all other devices
+         * generate description only for function 0 per slot
+         */
+        if (func && !bridge_in_acpi) {
+            continue;
+        }
+
+        /* start to compose PCI device descriptor */
+        dev = aml_device("S%.02X", devfn);
+        aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
 
         if (bsel) {
-            aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
+            /*
+             * Can't declare _SUN here for every device as it changes 'slot'
+             * enumeration order in linux kernel, so use another variable for it
+             */
+            aml_append(dev, aml_name_decl("ASUN", aml_int(slot)));
             method = aml_method("_DSM", 4, AML_SERIALIZED);
             aml_append(method, aml_return(
                 aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2),
-                          aml_arg(3), aml_name("BSEL"), aml_name("_SUN"))
+                          aml_arg(3), aml_name("BSEL"), aml_name("ASUN"))
             ));
             aml_append(dev, method);
         }
@@ -463,6 +501,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
             aml_append(method, aml_return(aml_int(s3d)));
             aml_append(dev, method);
         } else if (hotplug_enabled_dev) {
+            aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
             /* add _EJ0 to make slot hotpluggable  */
             method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
             aml_append(method,
@@ -482,7 +521,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
 
             build_append_pci_bus_devices(dev, sec_bus, pcihp_bridge_en);
         }
-        /* slot descriptor has been composed, add it into parent context */
+        /* device descriptor has been composed, add it into parent context */
         aml_append(parent_scope, dev);
     }
 
@@ -511,13 +550,12 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
         /* Notify about child bus events in any case */
         if (pcihp_bridge_en) {
             QLIST_FOREACH(sec, &bus->child, sibling) {
-                int32_t devfn = sec->parent_dev->devfn;
-
-                if (pci_bus_is_root(sec) || pci_bus_is_express(sec)) {
+                if (pci_bus_is_root(sec)) {
                     continue;
                 }
 
-                aml_append(method, aml_name("^S%.02X.PCNT", devfn));
+                aml_append(method, aml_name("^S%.02X.PCNT",
+                                            sec->parent_dev->devfn));
             }
         }
 
@@ -1243,7 +1281,7 @@ static void build_piix4_isa_bridge(Aml *table)
     aml_append(table, scope);
 }
 
-static void build_piix4_pci_hotplug(Aml *table)
+static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr)
 {
     Aml *scope;
     Aml *field;
@@ -1252,20 +1290,22 @@ static void build_piix4_pci_hotplug(Aml *table)
     scope =  aml_scope("_SB.PCI0");
 
     aml_append(scope,
-        aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08));
+        aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08));
     field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
     aml_append(field, aml_named_field("PCIU", 32));
     aml_append(field, aml_named_field("PCID", 32));
     aml_append(scope, field);
 
     aml_append(scope,
-        aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04));
+        aml_operation_region("SEJ", AML_SYSTEM_IO,
+                             aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04));
     field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
     aml_append(field, aml_named_field("B0EJ", 32));
     aml_append(scope, field);
 
     aml_append(scope,
-        aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x08));
+        aml_operation_region("BNMR", AML_SYSTEM_IO,
+                             aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08));
     field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
     aml_append(field, aml_named_field("BNUM", 32));
     aml_append(field, aml_named_field("PIDX", 32));
@@ -1297,7 +1337,7 @@ static void build_piix4_pci_hotplug(Aml *table)
     aml_append(table, scope);
 }
 
-static Aml *build_q35_osc_method(void)
+static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug)
 {
     Aml *if_ctx;
     Aml *if_ctx2;
@@ -1319,8 +1359,10 @@ static Aml *build_q35_osc_method(void)
     /*
      * Always allow native PME, AER (no dependencies)
      * Allow SHPC (PCI bridges can have SHPC controller)
+     * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled.
      */
-    aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl));
+    aml_append(if_ctx, aml_and(a_ctrl,
+        aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl));
 
     if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1))));
     /* Unknown revision */
@@ -1372,15 +1414,17 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
     uint32_t nr_mem = machine->ram_slots;
     int root_bus_limit = 0xFF;
     PCIBus *bus = NULL;
+#ifdef CONFIG_TPM
     TPMIf *tpm = tpm_find();
+#endif
     int i;
     VMBusBridge *vmbus_bridge = vmbus_bridge_find();
+    AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id,
+                        .oem_table_id = x86ms->oem_table_id };
 
+    acpi_table_begin(&table, table_data);
     dsdt = init_aml_allocator();
 
-    /* Reserve space for header */
-    acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader));
-
     build_dbg_aml(dsdt);
     if (misc->is_piix4) {
         sb_scope = aml_scope("_SB");
@@ -1397,7 +1441,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         build_piix4_isa_bridge(dsdt);
         build_isa_devices_aml(dsdt);
         if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
-            build_piix4_pci_hotplug(dsdt);
+            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
         }
         build_piix4_pci0_int(dsdt);
     } else {
@@ -1407,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
-        aml_append(dev, build_q35_osc_method());
+        aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en));
         aml_append(sb_scope, dev);
         if (mcfg_valid) {
             aml_append(sb_scope, build_q35_dram_controller(&mcfg));
@@ -1445,6 +1489,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         }
         build_q35_isa_bridge(dsdt);
         build_isa_devices_aml(dsdt);
+        if (pm->pcihp_bridge_en) {
+            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
+        }
         build_q35_pci0_int(dsdt);
         if (pcms->smbus && !pcmc->do_not_add_smb_acpi) {
             build_smb0(dsdt, pcms->smbus, ICH9_SMB_DEV, ICH9_SMB_FUNC);
@@ -1479,7 +1526,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
     {
         aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006")));
 
-        if (misc->is_piix4 && (pm->pcihp_bridge_en || pm->pcihp_root_en)) {
+        if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
             method = aml_method("_E01", 0, AML_NOTSERIALIZED);
             aml_append(method,
                 aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
@@ -1520,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             if (pci_bus_is_express(bus)) {
                 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
                 aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
-                aml_append(dev, build_q35_osc_method());
+
+                /* Expander bridges do not have ACPI PCI Hot-plug enabled */
+                aml_append(dev, build_q35_osc_method(true));
             } else {
                 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
             }
@@ -1605,10 +1654,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         }
     }
 
+#ifdef CONFIG_TPM
     if (TPM_IS_TIS_ISA(tpm_find())) {
         aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE,
                    TPM_TIS_ADDR_SIZE, AML_READ_WRITE));
     }
+#endif
     aml_append(scope, aml_name_decl("_CRS", crs));
 
     /* reserve GPE0 block resources */
@@ -1745,6 +1796,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         PCIBus *bus = NULL;
 
         pci_host = acpi_get_i386_pci_host();
+
         if (pci_host) {
             bus = PCI_HOST_BRIDGE(pci_host)->bus;
         }
@@ -1754,6 +1806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             /* Scan all PCI buses. Generate tables to support hotplug. */
             build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en);
 
+#ifdef CONFIG_TPM
             if (TPM_IS_TIS_ISA(tpm)) {
                 if (misc->tpm_version == TPM_VERSION_2_0) {
                     dev = aml_device("TPM");
@@ -1781,11 +1834,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 
                 aml_append(scope, dev);
             }
+#endif
 
             aml_append(sb_scope, scope);
         }
     }
 
+#ifdef CONFIG_TPM
     if (TPM_IS_CRB(tpm)) {
         dev = aml_device("TPM");
         aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
@@ -1800,120 +1855,176 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 
         aml_append(sb_scope, dev);
     }
+#endif
+
+    if (pcms->sgx_epc.size != 0) {
+        uint64_t epc_base = pcms->sgx_epc.base;
+        uint64_t epc_size = pcms->sgx_epc.size;
 
+        dev = aml_device("EPC");
+        aml_append(dev, aml_name_decl("_HID", aml_eisaid("INT0E0C")));
+        aml_append(dev, aml_name_decl("_STR",
+                                      aml_unicode("Enclave Page Cache 1.0")));
+        crs = aml_resource_template();
+        aml_append(crs,
+                   aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+                                    AML_MAX_FIXED, AML_NON_CACHEABLE,
+                                    AML_READ_WRITE, 0, epc_base,
+                                    epc_base + epc_size - 1, 0, epc_size));
+        aml_append(dev, aml_name_decl("_CRS", crs));
+
+        method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+        aml_append(method, aml_return(aml_int(0x0f)));
+        aml_append(dev, method);
+
+        aml_append(sb_scope, dev);
+    }
     aml_append(dsdt, sb_scope);
 
     /* copy AML table into ACPI tables blob and patch header there */
     g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
-    build_header(linker, table_data,
-        (void *)(table_data->data + table_data->len - dsdt->buf->len),
-                 "DSDT", dsdt->buf->len, 1, x86ms->oem_id, x86ms->oem_table_id);
+    acpi_table_end(linker, &table);
     free_aml_allocator();
 }
 
+/*
+ * IA-PC HPET (High Precision Event Timers) Specification (Revision: 1.0a)
+ * 3.2.4The ACPI 2.0 HPET Description Table (HPET)
+ */
 static void
 build_hpet(GArray *table_data, BIOSLinker *linker, const char *oem_id,
            const char *oem_table_id)
 {
-    Acpi20Hpet *hpet;
+    AcpiTable table = { .sig = "HPET", .rev = 1,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 
-    hpet = acpi_data_push(table_data, sizeof(*hpet));
+    acpi_table_begin(&table, table_data);
     /* Note timer_block_id value must be kept in sync with value advertised by
      * emulated hpet
      */
-    hpet->timer_block_id = cpu_to_le32(0x8086a201);
-    hpet->addr.address = cpu_to_le64(HPET_BASE);
-    build_header(linker, table_data,
-                 (void *)hpet, "HPET", sizeof(*hpet), 1, oem_id, oem_table_id);
+    /* Event Timer Block ID */
+    build_append_int_noprefix(table_data, 0x8086a201, 4);
+    /* BASE_ADDRESS */
+    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0, 0, 0, HPET_BASE);
+    /* HPET Number */
+    build_append_int_noprefix(table_data, 0, 1);
+    /* Main Counter Minimum Clock_tick in Periodic Mode */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Page Protection And OEM Attribute */
+    build_append_int_noprefix(table_data, 0, 1);
+    acpi_table_end(linker, &table);
 }
 
+#ifdef CONFIG_TPM
+/*
+ * TCPA Description Table
+ *
+ * Following Level 00, Rev 00.37 of specs:
+ * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification
+ * 7.1.2 ACPI Table Layout
+ */
 static void
 build_tpm_tcpa(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
                const char *oem_id, const char *oem_table_id)
 {
-    Acpi20Tcpa *tcpa = acpi_data_push(table_data, sizeof *tcpa);
-    unsigned log_addr_size = sizeof(tcpa->log_area_start_address);
-    unsigned log_addr_offset =
-        (char *)&tcpa->log_area_start_address - table_data->data;
-
-    tcpa->platform_class = cpu_to_le16(TPM_TCPA_ACPI_CLASS_CLIENT);
-    tcpa->log_area_minimum_length = cpu_to_le32(TPM_LOG_AREA_MINIMUM_SIZE);
-    acpi_data_push(tcpalog, le32_to_cpu(tcpa->log_area_minimum_length));
+    unsigned log_addr_offset;
+    AcpiTable table = { .sig = "TCPA", .rev = 2,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+    /* Platform Class */
+    build_append_int_noprefix(table_data, TPM_TCPA_ACPI_CLASS_CLIENT, 2);
+    /* Log Area Minimum Length (LAML) */
+    build_append_int_noprefix(table_data, TPM_LOG_AREA_MINIMUM_SIZE, 4);
+    /* Log Area Start Address (LASA) */
+    log_addr_offset = table_data->len;
+    build_append_int_noprefix(table_data, 0, 8);
 
+    /* allocate/reserve space for TPM log area */
+    acpi_data_push(tcpalog, TPM_LOG_AREA_MINIMUM_SIZE);
     bios_linker_loader_alloc(linker, ACPI_BUILD_TPMLOG_FILE, tcpalog, 1,
                              false /* high memory */);
-
     /* log area start address to be filled by Guest linker */
-    bios_linker_loader_add_pointer(linker,
-        ACPI_BUILD_TABLE_FILE, log_addr_offset, log_addr_size,
-        ACPI_BUILD_TPMLOG_FILE, 0);
+    bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
+        log_addr_offset, 8, ACPI_BUILD_TPMLOG_FILE, 0);
 
-    build_header(linker, table_data,
-                 (void *)tcpa, "TCPA", sizeof(*tcpa), 2, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
+#endif
 
 #define HOLE_640K_START  (640 * KiB)
 #define HOLE_640K_END   (1 * MiB)
 
+/*
+ * ACPI spec, Revision 3.0
+ * 5.2.15 System Resource Affinity Table (SRAT)
+ */
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
-    AcpiSystemResourceAffinityTable *srat;
-    AcpiSratMemoryAffinity *numamem;
-
     int i;
-    int srat_start, numa_start, slots;
+    int numa_mem_start, slots;
     uint64_t mem_len, mem_base, next_base;
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     X86MachineState *x86ms = X86_MACHINE(machine);
     const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
     PCMachineState *pcms = PC_MACHINE(machine);
-    ram_addr_t hotplugabble_address_space_size =
+    int nb_numa_nodes = machine->numa_state->num_nodes;
+    NodeInfo *numa_info = machine->numa_state->nodes;
+    ram_addr_t hotpluggable_address_space_size =
         object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
                                 NULL);
+    AcpiTable table = { .sig = "SRAT", .rev = 1, .oem_id = x86ms->oem_id,
+                        .oem_table_id = x86ms->oem_table_id };
 
-    srat_start = table_data->len;
-
-    srat = acpi_data_push(table_data, sizeof *srat);
-    srat->reserved1 = cpu_to_le32(1);
+    acpi_table_begin(&table, table_data);
+    build_append_int_noprefix(table_data, 1, 4); /* Reserved */
+    build_append_int_noprefix(table_data, 0, 8); /* Reserved */
 
     for (i = 0; i < apic_ids->len; i++) {
         int node_id = apic_ids->cpus[i].props.node_id;
         uint32_t apic_id = apic_ids->cpus[i].arch_id;
 
         if (apic_id < 255) {
-            AcpiSratProcessorAffinity *core;
-
-            core = acpi_data_push(table_data, sizeof *core);
-            core->type = ACPI_SRAT_PROCESSOR_APIC;
-            core->length = sizeof(*core);
-            core->local_apic_id = apic_id;
-            core->proximity_lo = node_id;
-            memset(core->proximity_hi, 0, 3);
-            core->local_sapic_eid = 0;
-            core->flags = cpu_to_le32(1);
+            /* 5.2.15.1 Processor Local APIC/SAPIC Affinity Structure */
+            build_append_int_noprefix(table_data, 0, 1);  /* Type  */
+            build_append_int_noprefix(table_data, 16, 1); /* Length */
+            /* Proximity Domain [7:0] */
+            build_append_int_noprefix(table_data, node_id, 1);
+            build_append_int_noprefix(table_data, apic_id, 1); /* APIC ID */
+            /* Flags, Table 5-36 */
+            build_append_int_noprefix(table_data, 1, 4);
+            build_append_int_noprefix(table_data, 0, 1); /* Local SAPIC EID */
+            /* Proximity Domain [31:8] */
+            build_append_int_noprefix(table_data, 0, 3);
+            build_append_int_noprefix(table_data, 0, 4); /* Reserved */
         } else {
-            AcpiSratProcessorX2ApicAffinity *core;
-
-            core = acpi_data_push(table_data, sizeof *core);
-            core->type = ACPI_SRAT_PROCESSOR_x2APIC;
-            core->length = sizeof(*core);
-            core->x2apic_id = cpu_to_le32(apic_id);
-            core->proximity_domain = cpu_to_le32(node_id);
-            core->flags = cpu_to_le32(1);
+            /*
+             * ACPI spec, Revision 4.0
+             * 5.2.16.3 Processor Local x2APIC Affinity Structure
+             */
+            build_append_int_noprefix(table_data, 2, 1);  /* Type  */
+            build_append_int_noprefix(table_data, 24, 1); /* Length */
+            build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+            /* Proximity Domain */
+            build_append_int_noprefix(table_data, node_id, 4);
+            build_append_int_noprefix(table_data, apic_id, 4); /* X2APIC ID */
+            /* Flags, Table 5-39 */
+            build_append_int_noprefix(table_data, 1 /* Enabled */, 4);
+            build_append_int_noprefix(table_data, 0, 4); /* Clock Domain */
+            build_append_int_noprefix(table_data, 0, 4); /* Reserved */
         }
     }
 
-
     /* the memory map is a bit tricky, it contains at least one hole
      * from 640k-1M and possibly another one from 3.5G-4G.
      */
     next_base = 0;
-    numa_start = table_data->len;
+    numa_mem_start = table_data->len;
 
-    for (i = 1; i < pcms->numa_nodes + 1; ++i) {
+    for (i = 1; i < nb_numa_nodes + 1; ++i) {
         mem_base = next_base;
-        mem_len = pcms->node_mem[i - 1];
+        mem_len = numa_info[i - 1].node_mem;
         next_base = mem_base + mem_len;
 
         /* Cut out the 640K hole */
@@ -1921,8 +2032,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
             next_base > HOLE_640K_START) {
             mem_len -= next_base - HOLE_640K_START;
             if (mem_len > 0) {
-                numamem = acpi_data_push(table_data, sizeof *numamem);
-                build_srat_memory(numamem, mem_base, mem_len, i - 1,
+                build_srat_memory(table_data, mem_base, mem_len, i - 1,
                                   MEM_AFFINITY_ENABLED);
             }
 
@@ -1940,8 +2050,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
             next_base > x86ms->below_4g_mem_size) {
             mem_len -= next_base - x86ms->below_4g_mem_size;
             if (mem_len > 0) {
-                numamem = acpi_data_push(table_data, sizeof *numamem);
-                build_srat_memory(numamem, mem_base, mem_len, i - 1,
+                build_srat_memory(table_data, mem_base, mem_len, i - 1,
                                   MEM_AFFINITY_ENABLED);
             }
             mem_base = 1ULL << 32;
@@ -1950,8 +2059,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
         }
 
         if (mem_len > 0) {
-            numamem = acpi_data_push(table_data, sizeof *numamem);
-            build_srat_memory(numamem, mem_base, mem_len, i - 1,
+            build_srat_memory(table_data, mem_base, mem_len, i - 1,
                               MEM_AFFINITY_ENABLED);
         }
     }
@@ -1960,10 +2068,15 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
         nvdimm_build_srat(table_data);
     }
 
-    slots = (table_data->len - numa_start) / sizeof *numamem;
-    for (; slots < pcms->numa_nodes + 2; slots++) {
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-        build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
+    /*
+     * TODO: this part is not in ACPI spec and current linux kernel boots fine
+     * without these entries. But I recall there were issues the last time I
+     * tried to remove it with some ancient guest OS, however I can't remember
+     * what that was so keep this around for now
+     */
+    slots = (table_data->len - numa_mem_start) / 40 /* mem affinity len */;
+    for (; slots < nb_numa_nodes + 2; slots++) {
+        build_srat_memory(table_data, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
     }
 
     /*
@@ -1974,77 +2087,144 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
      * Memory devices may override proximity set by this entry,
      * providing _PXM method if necessary.
      */
-    if (hotplugabble_address_space_size) {
-        numamem = acpi_data_push(table_data, sizeof *numamem);
-        build_srat_memory(numamem, machine->device_memory->base,
-                          hotplugabble_address_space_size, pcms->numa_nodes - 1,
+    if (hotpluggable_address_space_size) {
+        build_srat_memory(table_data, machine->device_memory->base,
+                          hotpluggable_address_space_size, nb_numa_nodes - 1,
                           MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
     }
 
-    build_header(linker, table_data,
-                 (void *)(table_data->data + srat_start),
-                 "SRAT",
-                 table_data->len - srat_start, 1, x86ms->oem_id,
-                 x86ms->oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 /*
- * VT-d spec 8.1 DMA Remapping Reporting Structure
- * (version Oct. 2014 or later)
+ * Insert DMAR scope for PCI bridges and endpoint devcie
+ */
+static void
+insert_scope(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    const size_t device_scope_size = 6 /* device scope structure */ +
+                                     2 /* 1 path entry */;
+    GArray *scope_blob = opaque;
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
+        /* Dmar Scope Type: 0x02 for PCI Bridge */
+        build_append_int_noprefix(scope_blob, 0x02, 1);
+    } else {
+        /* Dmar Scope Type: 0x01 for PCI Endpoint Device */
+        build_append_int_noprefix(scope_blob, 0x01, 1);
+    }
+
+    /* length */
+    build_append_int_noprefix(scope_blob, device_scope_size, 1);
+    /* reserved */
+    build_append_int_noprefix(scope_blob, 0, 2);
+    /* enumeration_id */
+    build_append_int_noprefix(scope_blob, 0, 1);
+    /* bus */
+    build_append_int_noprefix(scope_blob, pci_bus_num(bus), 1);
+    /* device */
+    build_append_int_noprefix(scope_blob, PCI_SLOT(dev->devfn), 1);
+    /* function */
+    build_append_int_noprefix(scope_blob, PCI_FUNC(dev->devfn), 1);
+}
+
+/* For a given PCI host bridge, walk and insert DMAR scope */
+static int
+dmar_host_bridges(Object *obj, void *opaque)
+{
+    GArray *scope_blob = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
+        PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
+
+        if (bus && !pci_bus_bypass_iommu(bus)) {
+            pci_for_each_device_under_bus(bus, insert_scope, scope_blob);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Intel ® Virtualization Technology for Directed I/O
+ * Architecture Specification. Revision 3.3
+ * 8.1 DMA Remapping Reporting Structure
  */
 static void
 build_dmar_q35(GArray *table_data, BIOSLinker *linker, const char *oem_id,
                const char *oem_table_id)
 {
-    int dmar_start = table_data->len;
-
-    AcpiTableDmar *dmar;
-    AcpiDmarHardwareUnit *drhd;
-    AcpiDmarRootPortATS *atsr;
     uint8_t dmar_flags = 0;
+    uint8_t rsvd10[10] = {};
+    /* Root complex IOAPIC uses one path only */
+    const size_t ioapic_scope_size = 6 /* device scope structure */ +
+                                     2 /* 1 path entry */;
     X86IOMMUState *iommu = x86_iommu_get_default();
-    AcpiDmarDeviceScope *scope = NULL;
-    /* Root complex IOAPIC use one path[0] only */
-    size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
     IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
+    GArray *scope_blob = g_array_new(false, true, 1);
+
+    AcpiTable table = { .sig = "DMAR", .rev = 1, .oem_id = oem_id,
+                        .oem_table_id = oem_table_id };
+
+    /*
+     * A PCI bus walk, for each PCI host bridge.
+     * Insert scope for each PCI bridge and endpoint device which
+     * is attached to a bus with iommu enabled.
+     */
+    object_child_foreach_recursive(object_get_root(),
+                                   dmar_host_bridges, scope_blob);
 
     assert(iommu);
     if (x86_iommu_ir_supported(iommu)) {
         dmar_flags |= 0x1;      /* Flags: 0x1: INT_REMAP */
     }
 
-    dmar = acpi_data_push(table_data, sizeof(*dmar));
-    dmar->host_address_width = intel_iommu->aw_bits - 1;
-    dmar->flags = dmar_flags;
+    acpi_table_begin(&table, table_data);
+    /* Host Address Width */
+    build_append_int_noprefix(table_data, intel_iommu->aw_bits - 1, 1);
+    build_append_int_noprefix(table_data, dmar_flags, 1); /* Flags */
+    g_array_append_vals(table_data, rsvd10, sizeof(rsvd10)); /* Reserved */
 
-    /* DMAR Remapping Hardware Unit Definition structure */
-    drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size);
-    drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
-    drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size);
-    drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
-    drhd->pci_segment = cpu_to_le16(0);
-    drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+    /* 8.3 DMAR Remapping Hardware Unit Definition structure */
+    build_append_int_noprefix(table_data, 0, 2); /* Type */
+    /* Length */
+    build_append_int_noprefix(table_data,
+                              16 + ioapic_scope_size + scope_blob->len, 2);
+    /* Flags */
+    build_append_int_noprefix(table_data, 0 /* Don't include all pci device */ ,
+                              1);
+    build_append_int_noprefix(table_data, 0 , 1); /* Reserved */
+    build_append_int_noprefix(table_data, 0 , 2); /* Segment Number */
+    /* Register Base Address */
+    build_append_int_noprefix(table_data, Q35_HOST_BRIDGE_IOMMU_ADDR , 8);
 
     /* Scope definition for the root-complex IOAPIC. See VT-d spec
      * 8.3.1 (version Oct. 2014 or later). */
-    scope = &drhd->scope[0];
-    scope->entry_type = 0x03;   /* Type: 0x03 for IOAPIC */
-    scope->length = ioapic_scope_size;
-    scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
-    scope->bus = Q35_PSEUDO_BUS_PLATFORM;
-    scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
-    scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
+    build_append_int_noprefix(table_data, 0x03 /* IOAPIC */, 1); /* Type */
+    build_append_int_noprefix(table_data, ioapic_scope_size, 1); /* Length */
+    build_append_int_noprefix(table_data, 0, 2); /* Reserved */
+    /* Enumeration ID */
+    build_append_int_noprefix(table_data, ACPI_BUILD_IOAPIC_ID, 1);
+    /* Start Bus Number */
+    build_append_int_noprefix(table_data, Q35_PSEUDO_BUS_PLATFORM, 1);
+    /* Path, {Device, Function} pair */
+    build_append_int_noprefix(table_data, PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC), 1);
+    build_append_int_noprefix(table_data, PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC), 1);
+
+    /* Add scope found above */
+    g_array_append_vals(table_data, scope_blob->data, scope_blob->len);
+    g_array_free(scope_blob, true);
 
     if (iommu->dt_supported) {
-        atsr = acpi_data_push(table_data, sizeof(*atsr));
-        atsr->type = cpu_to_le16(ACPI_DMAR_TYPE_ATSR);
-        atsr->length = cpu_to_le16(sizeof(*atsr));
-        atsr->flags = ACPI_DMAR_ATSR_ALL_PORTS;
-        atsr->pci_segment = cpu_to_le16(0);
+        /* 8.5 Root Port ATS Capability Reporting Structure */
+        build_append_int_noprefix(table_data, 2, 2); /* Type */
+        build_append_int_noprefix(table_data, 8, 2); /* Length */
+        build_append_int_noprefix(table_data, 1 /* ALL_PORTS */, 1); /* Flags */
+        build_append_int_noprefix(table_data, 0, 1); /* Reserved */
+        build_append_int_noprefix(table_data, 0, 2); /* Segment Number */
     }
 
-    build_header(linker, table_data, (void *)(table_data->data + dmar_start),
-                 "DMAR", table_data->len - dmar_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 /*
@@ -2058,10 +2238,10 @@ static void
 build_waet(GArray *table_data, BIOSLinker *linker, const char *oem_id,
            const char *oem_table_id)
 {
-    int waet_start = table_data->len;
+    AcpiTable table = { .sig = "WAET", .rev = 1, .oem_id = oem_id,
+                        .oem_table_id = oem_table_id };
 
-    /* WAET header */
-    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, table_data);
     /*
      * Set "ACPI PM timer good" flag.
      *
@@ -2070,9 +2250,7 @@ build_waet(GArray *table_data, BIOSLinker *linker, const char *oem_id,
      * Which avoids costly VMExits caused by guest re-reading it unnecessarily.
      */
     build_append_int_noprefix(table_data, 1 << 1 /* ACPI PM timer good */, 4);
-
-    build_header(linker, table_data, (void *)(table_data->data + waet_start),
-                 "WAET", table_data->len - waet_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 /*
@@ -2165,8 +2343,8 @@ ivrs_host_bridges(Object *obj, void *opaque)
     if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) {
         PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus;
 
-        if (bus) {
-            pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob);
+        if (bus && !pci_bus_bypass_iommu(bus)) {
+            pci_for_each_device_under_bus(bus, insert_ivhd, ivhd_blob);
         }
     }
 
@@ -2178,12 +2356,12 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
                 const char *oem_table_id)
 {
     int ivhd_table_len = 24;
-    int iommu_start = table_data->len;
     AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
     GArray *ivhd_blob = g_array_new(false, true, 1);
+    AcpiTable table = { .sig = "IVRS", .rev = 1, .oem_id = oem_id,
+                        .oem_table_id = oem_table_id };
 
-    /* IVRS header */
-    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    acpi_table_begin(&table, table_data);
     /* IVinfo - IO virtualization information common to all
      * IOMMU units in a system
      */
@@ -2268,10 +2446,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
                                  0x48,                      /* special device */
                                  8);
     }
-
-    build_header(linker, table_data, (void *)(table_data->data + iommu_start),
-                 "IVRS", table_data->len - iommu_start, 1, oem_id,
-                 oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
 typedef
@@ -2291,7 +2466,9 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
     QObject *o;
 
     pci_host = acpi_get_i386_pci_host();
-    g_assert(pci_host);
+    if (!pci_host) {
+        return false;
+    }
 
     o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL);
     if (!o) {
@@ -2316,12 +2493,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     PCMachineState *pcms = PC_MACHINE(machine);
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     X86MachineState *x86ms = X86_MACHINE(machine);
+    DeviceState *iommu = pcms->iommu;
     GArray *table_offsets;
     unsigned facs, dsdt, rsdt, fadt;
     AcpiPmInfo pm;
     AcpiMiscInfo misc;
     AcpiMcfgInfo mcfg;
-    Range pci_hole, pci_hole64;
+    Range pci_hole = {}, pci_hole64 = {};
     uint8_t *u;
     size_t aml_len = 0;
     GArray *tables_blob = tables->table_data;
@@ -2400,6 +2578,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
         build_hpet(tables_blob, tables->linker, x86ms->oem_id,
                    x86ms->oem_table_id);
     }
+#ifdef CONFIG_TPM
     if (misc.tpm_version != TPM_VERSION_UNSPEC) {
         if (misc.tpm_version == TPM_VERSION_1_2) {
             acpi_add_table(table_offsets, tables_blob);
@@ -2411,7 +2590,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
                        x86ms->oem_id, x86ms->oem_table_id);
         }
     }
-    if (pcms->numa_nodes) {
+#endif
+    if (machine->numa_state->num_nodes) {
         acpi_add_table(table_offsets, tables_blob);
         build_srat(tables_blob, tables->linker, machine);
         if (machine->numa_state->have_numa_distance) {
@@ -2430,17 +2610,20 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
         build_mcfg(tables_blob, tables->linker, &mcfg, x86ms->oem_id,
                    x86ms->oem_table_id);
     }
-    if (x86_iommu_get_default()) {
-        IommuType IOMMUType = x86_iommu_get_type();
-        if (IOMMUType == TYPE_AMD) {
-            acpi_add_table(table_offsets, tables_blob);
-            build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id,
-                            x86ms->oem_table_id);
-        } else if (IOMMUType == TYPE_INTEL) {
-            acpi_add_table(table_offsets, tables_blob);
-            build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id,
-                           x86ms->oem_table_id);
-        }
+    if (object_dynamic_cast(OBJECT(iommu), TYPE_AMD_IOMMU_DEVICE)) {
+        acpi_add_table(table_offsets, tables_blob);
+        build_amd_iommu(tables_blob, tables->linker, x86ms->oem_id,
+                        x86ms->oem_table_id);
+    } else if (object_dynamic_cast(OBJECT(iommu), TYPE_INTEL_IOMMU_DEVICE)) {
+        acpi_add_table(table_offsets, tables_blob);
+        build_dmar_q35(tables_blob, tables->linker, x86ms->oem_id,
+                       x86ms->oem_table_id);
+    } else if (object_dynamic_cast(OBJECT(iommu), TYPE_VIRTIO_IOMMU_PCI)) {
+        PCIDevice *pdev = PCI_DEVICE(iommu);
+
+        acpi_add_table(table_offsets, tables_blob);
+        build_viot(machine, tables_blob, tables->linker, pci_get_bdf(pdev),
+                   x86ms->oem_id, x86ms->oem_table_id);
     }
     if (machine->nvdimms_state->is_enabled) {
         nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
@@ -2602,8 +2785,10 @@ void acpi_setup(void)
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
     Object *vmgenid_dev;
+#ifdef CONFIG_TPM
     TPMIf *tpm;
     static FwCfgTPMConfig tpm_config;
+#endif
 
     if (!x86ms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2635,6 +2820,7 @@ void acpi_setup(void)
         acpi_add_rom_blob(acpi_build_update, build_state,
                           tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE);
 
+#ifdef CONFIG_TPM
     fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                     tables.tcpalog->data, acpi_data_len(tables.tcpalog));
 
@@ -2648,6 +2834,7 @@ void acpi_setup(void)
         fw_cfg_add_file(x86ms->fw_cfg, "etc/tpm/config",
                         &tpm_config, sizeof tpm_config);
     }
+#endif
 
     vmgenid_dev = find_vmgenid_dev();
     if (vmgenid_dev) {
diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h
index 74df5fc6128..0dce155c8cc 100644
--- a/hw/i386/acpi-build.h
+++ b/hw/i386/acpi-build.h
@@ -5,6 +5,11 @@
 
 extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio;
 
+/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */
+#define ACPI_PCIHP_SEJ_BASE 0x8
+#define ACPI_PCIHP_BNMR_BASE 0x10
+
 void acpi_setup(void);
+Object *acpi_get_i386_pci_host(void);
 
 #endif
diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c
index 1f5947fcf96..4aaafbdd7b5 100644
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -34,9 +34,13 @@
 #include "acpi-common.h"
 
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
-                       const CPUArchIdList *apic_ids, GArray *entry)
+                       const CPUArchIdList *apic_ids, GArray *entry,
+                       bool force_enabled)
 {
     uint32_t apic_id = apic_ids->cpus[uid].arch_id;
+    /* Flags – Local APIC Flags */
+    uint32_t flags = apic_ids->cpus[uid].cpu != NULL || force_enabled ?
+                     1 /* Enabled */ : 0;
 
     /* ACPI spec says that LAPIC entry for non present
      * CPU may be omitted from MADT or it must be marked
@@ -45,82 +49,84 @@ void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
      * should be put in MADT but kept disabled.
      */
     if (apic_id < 255) {
-        AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
-
-        apic->type = ACPI_APIC_PROCESSOR;
-        apic->length = sizeof(*apic);
-        apic->processor_id = uid;
-        apic->local_apic_id = apic_id;
-        if (apic_ids->cpus[uid].cpu != NULL) {
-            apic->flags = cpu_to_le32(1);
-        } else {
-            apic->flags = cpu_to_le32(0);
-        }
+        /* Rev 1.0b, Table 5-13 Processor Local APIC Structure */
+        build_append_int_noprefix(entry, 0, 1);       /* Type */
+        build_append_int_noprefix(entry, 8, 1);       /* Length */
+        build_append_int_noprefix(entry, uid, 1);     /* ACPI Processor ID */
+        build_append_int_noprefix(entry, apic_id, 1); /* APIC ID */
+        build_append_int_noprefix(entry, flags, 4); /* Flags */
     } else {
-        AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic);
-
-        apic->type = ACPI_APIC_LOCAL_X2APIC;
-        apic->length = sizeof(*apic);
-        apic->uid = cpu_to_le32(uid);
-        apic->x2apic_id = cpu_to_le32(apic_id);
-        if (apic_ids->cpus[uid].cpu != NULL) {
-            apic->flags = cpu_to_le32(1);
-        } else {
-            apic->flags = cpu_to_le32(0);
-        }
+        /* Rev 4.0, 5.2.12.12 Processor Local x2APIC Structure */
+        build_append_int_noprefix(entry, 9, 1);       /* Type */
+        build_append_int_noprefix(entry, 16, 1);      /* Length */
+        build_append_int_noprefix(entry, 0, 2);       /* Reserved */
+        build_append_int_noprefix(entry, apic_id, 4); /* X2APIC ID */
+        build_append_int_noprefix(entry, flags, 4);   /* Flags */
+        build_append_int_noprefix(entry, uid, 4);     /* ACPI Processor UID */
     }
 }
 
+static void build_ioapic(GArray *entry, uint8_t id, uint32_t addr, uint32_t irq)
+{
+    /* Rev 1.0b, 5.2.8.2 IO APIC */
+    build_append_int_noprefix(entry, 1, 1);    /* Type */
+    build_append_int_noprefix(entry, 12, 1);   /* Length */
+    build_append_int_noprefix(entry, id, 1);   /* IO APIC ID */
+    build_append_int_noprefix(entry, 0, 1);    /* Reserved */
+    build_append_int_noprefix(entry, addr, 4); /* IO APIC Address */
+    build_append_int_noprefix(entry, irq, 4);  /* System Vector Base */
+}
+
+static void
+build_xrupt_override(GArray *entry, uint8_t src, uint32_t gsi, uint16_t flags)
+{
+    /* Rev 1.0b, 5.2.8.3.1 Interrupt Source Overrides */
+    build_append_int_noprefix(entry, 2, 1);  /* Type */
+    build_append_int_noprefix(entry, 10, 1); /* Length */
+    build_append_int_noprefix(entry, 0, 1);  /* Bus */
+    build_append_int_noprefix(entry, src, 1);  /* Source */
+    /* Global System Interrupt Vector */
+    build_append_int_noprefix(entry, gsi, 4);
+    build_append_int_noprefix(entry, flags, 2);  /* Flags */
+}
+
+/*
+ * ACPI spec, Revision 1.0b
+ * 5.2.8 Multiple APIC Description Table
+ */
 void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
                      X86MachineState *x86ms, AcpiDeviceIf *adev,
                      const char *oem_id, const char *oem_table_id)
 {
+    int i;
+    bool x2apic_mode = false;
     MachineClass *mc = MACHINE_GET_CLASS(x86ms);
     const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms));
-    int madt_start = table_data->len;
     AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev);
-    bool x2apic_mode = false;
+    AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = oem_id,
+                        .oem_table_id = oem_table_id };
 
-    AcpiMultipleApicTable *madt;
-    AcpiMadtIoApic *io_apic;
-    AcpiMadtIntsrcovr *intsrcovr;
-    int i;
-
-    madt = acpi_data_push(table_data, sizeof *madt);
-    madt->local_apic_address = cpu_to_le32(APIC_DEFAULT_ADDRESS);
-    madt->flags = cpu_to_le32(1);
+    acpi_table_begin(&table, table_data);
+    /* Local APIC Address */
+    build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4);
+    build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */
 
     for (i = 0; i < apic_ids->len; i++) {
-        adevc->madt_cpu(adev, i, apic_ids, table_data);
+        adevc->madt_cpu(adev, i, apic_ids, table_data, false);
         if (apic_ids->cpus[i].arch_id > 254) {
             x2apic_mode = true;
         }
     }
 
-    io_apic = acpi_data_push(table_data, sizeof *io_apic);
-    io_apic->type = ACPI_APIC_IO;
-    io_apic->length = sizeof(*io_apic);
-    io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
-    io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
-    io_apic->interrupt = cpu_to_le32(0);
-
+    build_ioapic(table_data, ACPI_BUILD_IOAPIC_ID, IO_APIC_DEFAULT_ADDRESS, 0);
     if (x86ms->ioapic2) {
-        AcpiMadtIoApic *io_apic2;
-        io_apic2 = acpi_data_push(table_data, sizeof *io_apic);
-        io_apic2->type = ACPI_APIC_IO;
-        io_apic2->length = sizeof(*io_apic);
-        io_apic2->io_apic_id = ACPI_BUILD_IOAPIC_ID + 1;
-        io_apic2->address = cpu_to_le32(IO_APIC_SECONDARY_ADDRESS);
-        io_apic2->interrupt = cpu_to_le32(IO_APIC_SECONDARY_IRQBASE);
+        build_ioapic(table_data, ACPI_BUILD_IOAPIC_ID + 1,
+                     IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
     }
 
     if (x86ms->apic_xrupt_override) {
-        intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr);
-        intsrcovr->type   = ACPI_APIC_XRUPT_OVERRIDE;
-        intsrcovr->length = sizeof(*intsrcovr);
-        intsrcovr->source = 0;
-        intsrcovr->gsi    = cpu_to_le32(2);
-        intsrcovr->flags  = cpu_to_le16(0); /* conforms to bus specifications */
+        build_xrupt_override(table_data, 0, 2,
+            0 /* Flags: Conforms to the specifications of the bus */);
     }
 
     for (i = 1; i < 16; i++) {
@@ -128,36 +134,32 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
             /* No need for a INT source override structure. */
             continue;
         }
-        intsrcovr = acpi_data_push(table_data, sizeof *intsrcovr);
-        intsrcovr->type   = ACPI_APIC_XRUPT_OVERRIDE;
-        intsrcovr->length = sizeof(*intsrcovr);
-        intsrcovr->source = i;
-        intsrcovr->gsi    = cpu_to_le32(i);
-        intsrcovr->flags  = cpu_to_le16(0xd); /* active high, level triggered */
+        build_xrupt_override(table_data, i, i,
+            0xd /* Flags: Active high, Level Triggered */);
     }
 
     if (x2apic_mode) {
-        AcpiMadtLocalX2ApicNmi *local_nmi;
-
-        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
-        local_nmi->type   = ACPI_APIC_LOCAL_X2APIC_NMI;
-        local_nmi->length = sizeof(*local_nmi);
-        local_nmi->uid    = 0xFFFFFFFF; /* all processors */
-        local_nmi->flags  = cpu_to_le16(0);
-        local_nmi->lint   = 1; /* ACPI_LINT1 */
+        /* Rev 4.0, 5.2.12.13 Local x2APIC NMI Structure*/
+        build_append_int_noprefix(table_data, 0xA, 1); /* Type */
+        build_append_int_noprefix(table_data, 12, 1);  /* Length */
+        build_append_int_noprefix(table_data, 0, 2);   /* Flags */
+        /* ACPI Processor UID */
+        build_append_int_noprefix(table_data, 0xFFFFFFFF /* all processors */,
+                                  4);
+        /* Local x2APIC LINT# */
+        build_append_int_noprefix(table_data, 1 /* ACPI_LINT1 */, 1);
+        build_append_int_noprefix(table_data, 0, 3); /* Reserved */
     } else {
-        AcpiMadtLocalNmi *local_nmi;
-
-        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
-        local_nmi->type         = ACPI_APIC_LOCAL_NMI;
-        local_nmi->length       = sizeof(*local_nmi);
-        local_nmi->processor_id = 0xff; /* all processors */
-        local_nmi->flags        = cpu_to_le16(0);
-        local_nmi->lint         = 1; /* ACPI_LINT1 */
+        /* Rev 1.0b, 5.2.8.3.3 Local APIC NMI */
+        build_append_int_noprefix(table_data, 4, 1);  /* Type */
+        build_append_int_noprefix(table_data, 6, 1);  /* Length */
+        /* ACPI Processor ID */
+        build_append_int_noprefix(table_data, 0xFF /* all processors */, 1);
+        build_append_int_noprefix(table_data, 0, 2);  /* Flags */
+        /* Local APIC INTI# */
+        build_append_int_noprefix(table_data, 1 /* ACPI_LINT1 */, 1);
     }
 
-    build_header(linker, table_data,
-                 (void *)(table_data->data + madt_start), "APIC",
-                 table_data->len - madt_start, 1, oem_id, oem_table_id);
+    acpi_table_end(linker, &table);
 }
 
diff --git a/hw/i386/acpi-common.h b/hw/i386/acpi-common.h
index b12cd73ea5d..a68825acf50 100644
--- a/hw/i386/acpi-common.h
+++ b/hw/i386/acpi-common.h
@@ -1,9 +1,9 @@
 #ifndef HW_I386_ACPI_COMMON_H
 #define HW_I386_ACPI_COMMON_H
-#include "include/hw/acpi/acpi_dev_interface.h"
 
-#include "include/hw/acpi/bios-linker-loader.h"
-#include "include/hw/i386/x86.h"
+#include "hw/acpi/acpi_dev_interface.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/i386/x86.h"
 
 /* Default IOAPIC ID */
 #define ACPI_BUILD_IOAPIC_ID 0x0
diff --git a/hw/i386/acpi-microvm.c b/hw/i386/acpi-microvm.c
index ccd3303aaca..196d3184995 100644
--- a/hw/i386/acpi-microvm.c
+++ b/hw/i386/acpi-microvm.c
@@ -30,7 +30,6 @@
 #include "hw/acpi/bios-linker-loader.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/acpi/utils.h"
-#include "hw/boards.h"
 #include "hw/i386/fw_cfg.h"
 #include "hw/i386/microvm.h"
 #include "hw/pci/pci.h"
@@ -114,16 +113,16 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
     Aml *dsdt, *sb_scope, *scope, *pkg;
     bool ambiguous;
     Object *isabus;
+    AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = x86ms->oem_id,
+                        .oem_table_id = x86ms->oem_table_id };
 
     isabus = object_resolve_path_type("", TYPE_ISA_BUS, &ambiguous);
     assert(isabus);
     assert(!ambiguous);
 
+    acpi_table_begin(&table, table_data);
     dsdt = init_aml_allocator();
 
-    /* Reserve space for header */
-    acpi_data_push(dsdt->buf, sizeof(AcpiTableHeader));
-
     sb_scope = aml_scope("_SB");
     fw_cfg_add_acpi_dsdt(sb_scope, x86ms->fw_cfg);
     isa_build_aml(ISA_BUS(isabus), sb_scope);
@@ -145,11 +144,10 @@ build_dsdt_microvm(GArray *table_data, BIOSLinker *linker,
     aml_append(scope, aml_name_decl("_S5", pkg));
     aml_append(dsdt, scope);
 
-    /* copy AML table into ACPI tables blob and patch header there */
+    /* copy AML bytecode into ACPI tables blob */
     g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
-    build_header(linker, table_data,
-        (void *)(table_data->data + table_data->len - dsdt->buf->len),
-                 "DSDT", dsdt->buf->len, 2, x86ms->oem_id, x86ms->oem_table_id);
+
+    acpi_table_end(linker, &table);
     free_aml_allocator();
 }
 
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 74a93a5d93f..91fe34ae589 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -99,7 +99,7 @@ static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
 }
 
 /* internal write */
-static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr)
+static void amdvi_writeq_raw(AMDVIState *s, hwaddr addr, uint64_t val)
 {
     stq_le_p(&s->mmior[addr], val);
 }
@@ -382,7 +382,7 @@ static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
     }
     /* set completion interrupt */
     if (extract64(cmd[0], 1, 1)) {
-        amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
         /* generate interrupt */
         amdvi_generate_msi_interrupt(s);
     }
@@ -553,7 +553,7 @@ static void amdvi_cmdbuf_run(AMDVIState *s)
         trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
         amdvi_cmdbuf_exec(s);
         s->cmdbuf_head += AMDVI_COMMAND_SIZE;
-        amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD);
+        amdvi_writeq_raw(s, AMDVI_MMIO_COMMAND_HEAD, s->cmdbuf_head);
 
         /* wrap head pointer */
         if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
@@ -860,8 +860,8 @@ static inline uint8_t get_pte_translation_mode(uint64_t pte)
 
 static inline uint64_t pte_override_page_mask(uint64_t pte)
 {
-    uint8_t page_mask = 12;
-    uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK;
+    uint8_t page_mask = 13;
+    uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
     /* find the first zero bit */
     while (addr & 1) {
         page_mask++;
@@ -1526,7 +1526,7 @@ static void amdvi_init(AMDVIState *s)
             AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
 }
 
-static void amdvi_reset(DeviceState *dev)
+static void amdvi_sysbus_reset(DeviceState *dev)
 {
     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
 
@@ -1534,11 +1534,10 @@ static void amdvi_reset(DeviceState *dev)
     amdvi_init(s);
 }
 
-static void amdvi_realize(DeviceState *dev, Error **errp)
+static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
 {
     int ret = 0;
     AMDVIState *s = AMD_IOMMU_DEVICE(dev);
-    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
     MachineState *ms = MACHINE(qdev_get_machine());
     PCMachineState *pcms = PC_MACHINE(ms);
     X86MachineState *x86ms = X86_MACHINE(ms);
@@ -1548,7 +1547,6 @@ static void amdvi_realize(DeviceState *dev, Error **errp)
                                      amdvi_uint64_equal, g_free, g_free);
 
     /* This device should take care of IOMMU PCI properties */
-    x86_iommu->type = TYPE_AMD;
     if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
         return;
     }
@@ -1585,27 +1583,27 @@ static void amdvi_realize(DeviceState *dev, Error **errp)
     amdvi_init(s);
 }
 
-static const VMStateDescription vmstate_amdvi = {
+static const VMStateDescription vmstate_amdvi_sysbus = {
     .name = "amd-iommu",
     .unmigratable = 1
 };
 
-static void amdvi_instance_init(Object *klass)
+static void amdvi_sysbus_instance_init(Object *klass)
 {
     AMDVIState *s = AMD_IOMMU_DEVICE(klass);
 
     object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
 }
 
-static void amdvi_class_init(ObjectClass *klass, void* data)
+static void amdvi_sysbus_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     X86IOMMUClass *dc_class = X86_IOMMU_DEVICE_CLASS(klass);
 
-    dc->reset = amdvi_reset;
-    dc->vmsd = &vmstate_amdvi;
+    dc->reset = amdvi_sysbus_reset;
+    dc->vmsd = &vmstate_amdvi_sysbus;
     dc->hotpluggable = false;
-    dc_class->realize = amdvi_realize;
+    dc_class->realize = amdvi_sysbus_realize;
     dc_class->int_remap = amdvi_int_remap;
     /* Supported by the pc-q35-* machine types */
     dc->user_creatable = true;
@@ -1613,18 +1611,27 @@ static void amdvi_class_init(ObjectClass *klass, void* data)
     dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
 }
 
-static const TypeInfo amdvi = {
+static const TypeInfo amdvi_sysbus = {
     .name = TYPE_AMD_IOMMU_DEVICE,
     .parent = TYPE_X86_IOMMU_DEVICE,
     .instance_size = sizeof(AMDVIState),
-    .instance_init = amdvi_instance_init,
-    .class_init = amdvi_class_init
+    .instance_init = amdvi_sysbus_instance_init,
+    .class_init = amdvi_sysbus_class_init
 };
 
-static const TypeInfo amdviPCI = {
+static void amdvi_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
+}
+
+static const TypeInfo amdvi_pci = {
     .name = TYPE_AMD_IOMMU_PCI,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(AMDVIPCIState),
+    .class_init = amdvi_pci_class_init,
     .interfaces = (InterfaceInfo[]) {
         { INTERFACE_CONVENTIONAL_PCI_DEVICE },
         { },
@@ -1645,11 +1652,11 @@ static const TypeInfo amdvi_iommu_memory_region_info = {
     .class_init = amdvi_iommu_memory_region_class_init,
 };
 
-static void amdviPCI_register_types(void)
+static void amdvi_register_types(void)
 {
-    type_register_static(&amdviPCI);
-    type_register_static(&amdvi);
+    type_register_static(&amdvi_pci);
+    type_register_static(&amdvi_sysbus);
     type_register_static(&amdvi_iommu_memory_region_info);
 }
 
-type_init(amdviPCI_register_types);
+type_init(amdvi_register_types);
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index e48a54fa364..a283785a8de 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -22,6 +22,7 @@
 #include "hw/nvram/fw_cfg.h"
 #include "e820_memory_layout.h"
 #include "kvm/kvm_i386.h"
+#include "qapi/error.h"
 #include CONFIG_DEVICES
 
 struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
@@ -78,7 +79,8 @@ void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg)
     }
     smbios_get_tables(ms, mem_array, array_count,
                       &smbios_tables, &smbios_tables_len,
-                      &smbios_anchor, &smbios_anchor_len);
+                      &smbios_anchor, &smbios_anchor_len,
+                      &error_fatal);
     g_free(mem_array);
 
     if (smbios_anchor) {
@@ -157,7 +159,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg)
 {
     X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
     CPUX86State *env = &cpu->env;
-    uint32_t unused, ecx, edx;
+    uint32_t unused, ebx, ecx, edx;
     uint64_t feature_control_bits = 0;
     uint64_t *val;
 
@@ -172,6 +174,16 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg)
         feature_control_bits |= FEATURE_CONTROL_LMCE;
     }
 
+    if (env->cpuid_level >= 7) {
+        cpu_x86_cpuid(env, 0x7, 0, &unused, &ebx, &ecx, &unused);
+        if (ebx & CPUID_7_0_EBX_SGX) {
+            feature_control_bits |= FEATURE_CONTROL_SGX;
+        }
+        if (ecx & CPUID_7_0_ECX_SGX_LC) {
+            feature_control_bits |= FEATURE_CONTROL_SGX_LC;
+        }
+    }
+
     if (!feature_control_bits) {
         return;
     }
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 6be8f329185..f584449d8d1 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -24,14 +24,12 @@
 #include "qemu/main-loop.h"
 #include "qapi/error.h"
 #include "hw/sysbus.h"
-#include "exec/address-spaces.h"
 #include "intel_iommu_internal.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic-msidef.h"
-#include "hw/boards.h"
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
@@ -681,7 +679,7 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
         }
         break;
     default:
-        /* Unknwon type */
+        /* Unknown type */
         return false;
     }
     return true;
@@ -694,7 +692,7 @@ static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
 
 /**
  * Caller of this function should check present bit if wants
- * to use pdir entry for futher usage except for fpd bit check.
+ * to use pdir entry for further usage except for fpd bit check.
  */
 static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base,
                                          uint32_t pasid,
@@ -748,7 +746,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s,
 
 /**
  * Caller of this function should check present bit if wants
- * to use pasid entry for futher usage except for fpd bit check.
+ * to use pasid entry for further usage except for fpd bit check.
  */
 static int vtd_get_pe_from_pdire(IntelIOMMUState *s,
                                  uint32_t pasid,
@@ -1107,7 +1105,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info)
         .translated_addr = entry->translated_addr,
         .perm = entry->perm,
     };
-    DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
+    const DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
 
     if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) {
         trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
@@ -1509,7 +1507,7 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
 }
 
 /*
- * Check if specific device is configed to bypass address
+ * Check if specific device is configured to bypass address
  * translation for DMA requests. In Scalable Mode, bypass
  * 1st-level translation or 2nd-level translation, it depends
  * on PGTT setting.
@@ -3631,6 +3629,12 @@ static void vtd_init(IntelIOMMUState *s)
     vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
                                                          x86_iommu->dt_supported);
 
+    if (s->scalable_mode) {
+        vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+        vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+        vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+    }
+
     if (x86_iommu_ir_supported(x86_iommu)) {
         s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
         if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -3808,9 +3812,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     X86MachineState *x86ms = X86_MACHINE(ms);
     PCIBus *bus = pcms->bus;
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
-    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
-
-    x86_iommu->type = TYPE_INTEL;
 
     if (!vtd_decide_config(s, errp)) {
         return;
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 3d5487fe2ce..a6c788049ba 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -388,6 +388,8 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8
 
 /* Rsvd field masks for spte */
+#define VTD_SPTE_SNP 0x800ULL
+
 #define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, dt_supported) \
         dt_supported ? \
         (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 3dbff2be2e2..1e89ca0899c 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/pci/msi.h"
 #include "sysemu/hw_accel.h"
@@ -146,7 +145,7 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
 
     ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic);
     if (ret < 0) {
-        fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(-ret));
         abort();
     }
 }
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 51872dd84c0..df70b4a0338 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,7 +14,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/host-utils.h"
 #include "qemu/module.h"
 #include "sysemu/kvm.h"
@@ -106,7 +105,7 @@ static void kvm_update_clock(KVMClockState *s)
 
     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
     if (ret < 0) {
-        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(-ret));
                 abort();
     }
     s->clock = data.clock;
@@ -190,7 +189,7 @@ static void kvmclock_vm_state_change(void *opaque, bool running,
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
-            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret));
+            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(-ret));
             abort();
         }
 
diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c
index c558893961b..191a26fa57e 100644
--- a/hw/i386/kvm/i8254.c
+++ b/hw/i386/kvm/i8254.c
@@ -59,11 +59,6 @@ struct KVMPITClass {
     DeviceRealize parent_realize;
 };
 
-static int64_t abs64(int64_t v)
-{
-    return v < 0 ? -v : v;
-}
-
 static void kvm_pit_update_clock_offset(KVMPITState *s)
 {
     int64_t offset, clock_offset;
@@ -81,7 +76,7 @@ static void kvm_pit_update_clock_offset(KVMPITState *s)
         clock_gettime(CLOCK_MONOTONIC, &ts);
         offset -= ts.tv_nsec;
         offset -= (int64_t)ts.tv_sec * 1000000000;
-        if (abs64(offset) < abs64(clock_offset)) {
+        if (uabs64(offset) < uabs64(clock_offset)) {
             clock_offset = offset;
         }
     }
@@ -104,7 +99,7 @@ static void kvm_pit_get(PITCommonState *pit)
     if (kvm_has_pit_state2()) {
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit);
         if (ret < 0) {
-            fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(ret));
+            fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(-ret));
             abort();
         }
         pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -115,7 +110,7 @@ static void kvm_pit_get(PITCommonState *pit)
          */
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT, &kpit);
         if (ret < 0) {
-            fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(ret));
+            fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(-ret));
             abort();
         }
     }
@@ -180,7 +175,7 @@ static void kvm_pit_put(PITCommonState *pit)
     if (ret < 0) {
         fprintf(stderr, "%s failed: %s\n",
                 kvm_has_pit_state2() ? "KVM_SET_PIT2" : "KVM_SET_PIT",
-                strerror(ret));
+                strerror(-ret));
         abort();
     }
 }
@@ -272,7 +267,7 @@ static void kvm_pit_realizefn(DeviceState *dev, Error **errp)
     }
     if (ret < 0) {
         error_setg(errp, "Create kernel PIC irqchip failed: %s",
-                   strerror(ret));
+                   strerror(-ret));
         return;
     }
     switch (s->lost_tick_policy) {
@@ -286,7 +281,7 @@ static void kvm_pit_realizefn(DeviceState *dev, Error **errp)
             if (ret < 0) {
                 error_setg(errp,
                            "Can't disable in-kernel PIT reinjection: %s",
-                           strerror(ret));
+                           strerror(-ret));
                 return;
             }
         }
diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c
index 3f8bf69e9ca..d61bae4dc35 100644
--- a/hw/i386/kvm/i8259.c
+++ b/hw/i386/kvm/i8259.c
@@ -43,7 +43,7 @@ static void kvm_pic_get(PICCommonState *s)
     chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE;
     ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip);
     if (ret < 0) {
-        fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret));
         abort();
     }
 
@@ -96,7 +96,7 @@ static void kvm_pic_put(PICCommonState *s)
 
     ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
     if (ret < 0) {
-        fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret));
         abort();
     }
 }
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index dfc3c980057..ee7c8ef68be 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "monitor/monitor.h"
 #include "hw/i386/x86.h"
-#include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/i386/ioapic_internal.h"
 #include "hw/i386/apic_internal.h"
@@ -63,7 +62,7 @@ static void kvm_ioapic_get(IOAPICCommonState *s)
     chip.chip_id = KVM_IRQCHIP_IOAPIC;
     ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip);
     if (ret < 0) {
-        fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret));
         abort();
     }
 
@@ -96,7 +95,7 @@ static void kvm_ioapic_put(IOAPICCommonState *s)
 
     ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
     if (ret < 0) {
-        fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret));
         abort();
     }
 }
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 46315445d22..43f8a8f679e 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -11,7 +11,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/cpus.h"
 #include "sysemu/hw_accel.h"
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index e5d109f5c64..213e2e82b3d 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -11,11 +11,13 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
                                       if_false: files('x86-iommu-stub.c'))
 i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'))
 i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c'))
-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c'))
+i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c'))
 i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
 i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
 i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
 i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
+i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
+                                if_false: files('sgx-stub.c'))
 
 i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c'))
 i386_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device_x86.c'))
@@ -24,6 +26,8 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
   'pc_sysfw.c',
   'acpi-build.c',
   'port92.c'))
+i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
+                                        if_false: files('pc_sysfw_ovmf-stubs.c'))
 
 subdir('kvm')
 subdir('xen')
diff --git a/hw/i386/microvm-dt.c b/hw/i386/microvm-dt.c
new file mode 100644
index 00000000000..9c3c4995b41
--- /dev/null
+++ b/hw/i386/microvm-dt.c
@@ -0,0 +1,348 @@
+/*
+ * microvm device tree support
+ *
+ * This generates an device tree for microvm and exports it via fw_cfg
+ * as "etc/fdt" to the firmware (edk2 specifically).
+ *
+ * The use case is to allow edk2 find the pcie ecam and the virtio
+ * devices, without adding an ACPI parser, reusing the fdt parser
+ * which is needed anyway for the arm platform.
+ *
+ * Note 1: The device tree is incomplete. CPUs and memory is missing
+ *         for example, those can be detected using other fw_cfg files.
+ *         Also pci ecam irq routing is not there, edk2 doesn't use
+ *         interrupts.
+ *
+ * Note 2: This is for firmware only. OSes should use the more
+ *         complete ACPI tables for hardware discovery.
+ *
+ * ----------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "sysemu/device_tree.h"
+#include "hw/char/serial.h"
+#include "hw/i386/fw_cfg.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "hw/sysbus.h"
+#include "hw/virtio/virtio-mmio.h"
+#include "hw/usb/xhci.h"
+
+#include "microvm-dt.h"
+
+static bool debug;
+
+static void dt_add_microvm_irq(MicrovmMachineState *mms,
+                               const char *nodename, uint32_t irq)
+{
+    int index = 0;
+
+    if (irq >= IO_APIC_SECONDARY_IRQBASE) {
+        irq -= IO_APIC_SECONDARY_IRQBASE;
+        index++;
+    }
+
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "interrupt-parent",
+                          mms->ioapic_phandle[index]);
+    qemu_fdt_setprop_cells(mms->fdt, nodename, "interrupts", irq, 0);
+}
+
+static void dt_add_virtio(MicrovmMachineState *mms, VirtIOMMIOProxy *mmio)
+{
+    SysBusDevice *dev = SYS_BUS_DEVICE(mmio);
+    VirtioBusState *mmio_virtio_bus = &mmio->bus;
+    BusState *mmio_bus = &mmio_virtio_bus->parent_obj;
+    char *nodename;
+
+    if (QTAILQ_EMPTY(&mmio_bus->children)) {
+        return;
+    }
+
+    hwaddr base = dev->mmio[0].addr;
+    hwaddr size = 512;
+    unsigned index = (base - VIRTIO_MMIO_BASE) / size;
+    uint32_t irq = mms->virtio_irq_base + index;
+
+    nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop_string(mms->fdt, nodename, "compatible", "virtio,mmio");
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size);
+    qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0);
+    dt_add_microvm_irq(mms, nodename, irq);
+    g_free(nodename);
+}
+
+static void dt_add_xhci(MicrovmMachineState *mms)
+{
+    const char compat[] = "generic-xhci";
+    uint32_t irq = MICROVM_XHCI_IRQ;
+    hwaddr base = MICROVM_XHCI_BASE;
+    hwaddr size = XHCI_LEN_REGS;
+    char *nodename;
+
+    nodename = g_strdup_printf("/usb@%" PRIx64, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size);
+    qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0);
+    dt_add_microvm_irq(mms, nodename, irq);
+    g_free(nodename);
+}
+
+static void dt_add_pcie(MicrovmMachineState *mms)
+{
+    hwaddr base = PCIE_MMIO_BASE;
+    int nr_pcie_buses;
+    char *nodename;
+
+    nodename = g_strdup_printf("/pcie@%" PRIx64, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop_string(mms->fdt, nodename,
+                            "compatible", "pci-host-ecam-generic");
+    qemu_fdt_setprop_string(mms->fdt, nodename, "device_type", "pci");
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "#address-cells", 3);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "#size-cells", 2);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "linux,pci-domain", 0);
+    qemu_fdt_setprop(mms->fdt, nodename, "dma-coherent", NULL, 0);
+
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg",
+                                 2, PCIE_ECAM_BASE, 2, PCIE_ECAM_SIZE);
+    if (mms->gpex.mmio64.size) {
+        qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "ranges",
+
+                                     1, FDT_PCI_RANGE_MMIO,
+                                     2, mms->gpex.mmio32.base,
+                                     2, mms->gpex.mmio32.base,
+                                     2, mms->gpex.mmio32.size,
+
+                                     1, FDT_PCI_RANGE_MMIO_64BIT,
+                                     2, mms->gpex.mmio64.base,
+                                     2, mms->gpex.mmio64.base,
+                                     2, mms->gpex.mmio64.size);
+    } else {
+        qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "ranges",
+
+                                     1, FDT_PCI_RANGE_MMIO,
+                                     2, mms->gpex.mmio32.base,
+                                     2, mms->gpex.mmio32.base,
+                                     2, mms->gpex.mmio32.size);
+    }
+
+    nr_pcie_buses = PCIE_ECAM_SIZE / PCIE_MMCFG_SIZE_MIN;
+    qemu_fdt_setprop_cells(mms->fdt, nodename, "bus-range", 0,
+                           nr_pcie_buses - 1);
+
+    g_free(nodename);
+}
+
+static void dt_add_ioapic(MicrovmMachineState *mms, SysBusDevice *dev)
+{
+    hwaddr base = dev->mmio[0].addr;
+    char *nodename;
+    uint32_t ph;
+    int index;
+
+    switch (base) {
+    case IO_APIC_DEFAULT_ADDRESS:
+        index = 0;
+        break;
+    case IO_APIC_SECONDARY_ADDRESS:
+        index = 1;
+        break;
+    default:
+        fprintf(stderr, "unknown ioapic @ %" PRIx64 "\n", base);
+        return;
+    }
+
+    nodename = g_strdup_printf("/ioapic%d@%" PRIx64, index + 1, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop_string(mms->fdt, nodename,
+                            "compatible", "intel,ce4100-ioapic");
+    qemu_fdt_setprop(mms->fdt, nodename, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "#interrupt-cells", 0x2);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "#address-cells", 0x2);
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg",
+                                 2, base, 2, 0x1000);
+
+    ph = qemu_fdt_alloc_phandle(mms->fdt);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "phandle", ph);
+    qemu_fdt_setprop_cell(mms->fdt, nodename, "linux,phandle", ph);
+    mms->ioapic_phandle[index] = ph;
+
+    g_free(nodename);
+}
+
+static void dt_add_isa_serial(MicrovmMachineState *mms, ISADevice *dev)
+{
+    const char compat[] = "ns16550";
+    uint32_t irq = object_property_get_int(OBJECT(dev), "irq", NULL);
+    hwaddr base = object_property_get_int(OBJECT(dev), "iobase", NULL);
+    hwaddr size = 8;
+    char *nodename;
+
+    nodename = g_strdup_printf("/serial@%" PRIx64, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size);
+    dt_add_microvm_irq(mms, nodename, irq);
+
+    if (base == 0x3f8 /* com1 */) {
+        qemu_fdt_setprop_string(mms->fdt, "/chosen", "stdout-path", nodename);
+    }
+
+    g_free(nodename);
+}
+
+static void dt_add_isa_rtc(MicrovmMachineState *mms, ISADevice *dev)
+{
+    const char compat[] = "motorola,mc146818";
+    uint32_t irq = RTC_ISA_IRQ;
+    hwaddr base = RTC_ISA_BASE;
+    hwaddr size = 8;
+    char *nodename;
+
+    nodename = g_strdup_printf("/rtc@%" PRIx64, base);
+    qemu_fdt_add_subnode(mms->fdt, nodename);
+    qemu_fdt_setprop(mms->fdt, nodename, "compatible", compat, sizeof(compat));
+    qemu_fdt_setprop_sized_cells(mms->fdt, nodename, "reg", 2, base, 2, size);
+    dt_add_microvm_irq(mms, nodename, irq);
+    g_free(nodename);
+}
+
+static void dt_setup_isa_bus(MicrovmMachineState *mms, DeviceState *bridge)
+{
+    BusState *bus = qdev_get_child_bus(bridge, "isa.0");
+    BusChild *kid;
+    Object *obj;
+
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+
+        /* serial */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL);
+        if (obj) {
+            dt_add_isa_serial(mms, ISA_DEVICE(obj));
+            continue;
+        }
+
+        /* rtc */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC);
+        if (obj) {
+            dt_add_isa_rtc(mms, ISA_DEVICE(obj));
+            continue;
+        }
+
+        if (debug) {
+            fprintf(stderr, "%s: unhandled: %s\n", __func__,
+                    object_get_typename(OBJECT(dev)));
+        }
+    }
+}
+
+static void dt_setup_sys_bus(MicrovmMachineState *mms)
+{
+    BusState *bus;
+    BusChild *kid;
+    Object *obj;
+
+    /* sysbus devices */
+    bus = sysbus_get_default();
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+
+        /* ioapic */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_IOAPIC);
+        if (obj) {
+            dt_add_ioapic(mms, SYS_BUS_DEVICE(obj));
+            continue;
+        }
+    }
+
+    QTAILQ_FOREACH(kid, &bus->children, sibling) {
+        DeviceState *dev = kid->child;
+
+        /* virtio */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MMIO);
+        if (obj) {
+            dt_add_virtio(mms, VIRTIO_MMIO(obj));
+            continue;
+        }
+
+        /* xhci */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_XHCI_SYSBUS);
+        if (obj) {
+            dt_add_xhci(mms);
+            continue;
+        }
+
+        /* pcie */
+        obj = object_dynamic_cast(OBJECT(dev), TYPE_GPEX_HOST);
+        if (obj) {
+            dt_add_pcie(mms);
+            continue;
+        }
+
+        /* isa */
+        obj = object_dynamic_cast(OBJECT(dev), "isabus-bridge");
+        if (obj) {
+            dt_setup_isa_bus(mms, DEVICE(obj));
+            continue;
+        }
+
+        if (debug) {
+            obj = object_dynamic_cast(OBJECT(dev), TYPE_IOAPIC);
+            if (obj) {
+                /* ioapic already added in first pass */
+                continue;
+            }
+            fprintf(stderr, "%s: unhandled: %s\n", __func__,
+                    object_get_typename(OBJECT(dev)));
+        }
+    }
+}
+
+void dt_setup_microvm(MicrovmMachineState *mms)
+{
+    X86MachineState *x86ms = X86_MACHINE(mms);
+    int size = 0;
+
+    mms->fdt = create_device_tree(&size);
+
+    /* root node */
+    qemu_fdt_setprop_string(mms->fdt, "/", "compatible", "linux,microvm");
+    qemu_fdt_setprop_cell(mms->fdt, "/", "#address-cells", 0x2);
+    qemu_fdt_setprop_cell(mms->fdt, "/", "#size-cells", 0x2);
+
+    qemu_fdt_add_subnode(mms->fdt, "/chosen");
+    dt_setup_sys_bus(mms);
+
+    /* add to fw_cfg */
+    if (debug) {
+        fprintf(stderr, "%s: add etc/fdt to fw_cfg\n", __func__);
+    }
+    fw_cfg_add_file(x86ms->fw_cfg, "etc/fdt", mms->fdt, size);
+
+    if (debug) {
+        fprintf(stderr, "%s: writing microvm.fdt\n", __func__);
+        if (!g_file_set_contents("microvm.fdt", mms->fdt, size, NULL)) {
+            fprintf(stderr, "%s: writing microvm.fdt failed\n", __func__);
+            return;
+        }
+        int ret = system("dtc -I dtb -O dts microvm.fdt");
+        if (ret != 0) {
+            fprintf(stderr, "%s: oops, dtc not installed?\n", __func__);
+        }
+    }
+}
diff --git a/hw/i386/microvm-dt.h b/hw/i386/microvm-dt.h
new file mode 100644
index 00000000000..77c79cbdd9f
--- /dev/null
+++ b/hw/i386/microvm-dt.h
@@ -0,0 +1,8 @@
+#ifndef HW_I386_MICROVM_DT_H
+#define HW_I386_MICROVM_DT_H
+
+#include "hw/i386/microvm.h"
+
+void dt_setup_microvm(MicrovmMachineState *mms);
+
+#endif
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index edf2b0f0618..4b3b1dd262f 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -28,6 +28,7 @@
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 #include "acpi-microvm.h"
+#include "microvm-dt.h"
 
 #include "hw/loader.h"
 #include "hw/irq.h"
@@ -49,7 +50,6 @@
 #include "hw/pci-host/gpex.h"
 #include "hw/usb/xhci.h"
 
-#include "cpu.h"
 #include "elf.h"
 #include "kvm/kvm_i386.h"
 #include "hw/xen/start_info.h"
@@ -332,7 +332,7 @@ static void microvm_memory_init(MicrovmMachineState *mms)
     rom_set_fw(fw_cfg);
 
     if (machine->kernel_filename != NULL) {
-        x86_load_linux(x86ms, fw_cfg, 0, true, true);
+        x86_load_linux(x86ms, fw_cfg, 0, true);
     }
 
     if (mms->option_roms) {
@@ -459,15 +459,10 @@ static void microvm_machine_state_init(MachineState *machine)
 {
     MicrovmMachineState *mms = MICROVM_MACHINE(machine);
     X86MachineState *x86ms = X86_MACHINE(machine);
-    Error *local_err = NULL;
 
     microvm_memory_init(mms);
 
     x86_cpus_init(x86ms, CPU_VERSION_LATEST);
-    if (local_err) {
-        error_report_err(local_err);
-        exit(1);
-    }
 
     microvm_devices_init(mms);
 }
@@ -632,6 +627,7 @@ static void microvm_machine_done(Notifier *notifier, void *data)
                                             machine_done);
 
     acpi_setup_microvm(mms);
+    dt_setup_microvm(mms);
 }
 
 static void microvm_powerdown_req(Notifier *notifier, void *data)
@@ -673,6 +669,7 @@ static void microvm_machine_initfn(Object *obj)
 
 static void microvm_class_init(ObjectClass *oc, void *data)
 {
+    X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
     MachineClass *mc = MACHINE_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
@@ -703,6 +700,8 @@ static void microvm_class_init(ObjectClass *oc, void *data)
     hc->unplug_request = microvm_device_unplug_request_cb;
     hc->unplug = microvm_device_unplug_cb;
 
+    x86mc->fwcfg_dma_enabled = true;
+
     object_class_property_add(oc, MICROVM_MACHINE_PIC, "OnOffAuto",
                               microvm_machine_get_pic,
                               microvm_machine_set_pic,
diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c
index 9e7d69d4705..0a10089f14b 100644
--- a/hw/i386/multiboot.c
+++ b/hw/i386/multiboot.c
@@ -143,7 +143,8 @@ static void mb_add_mod(MultibootState *s,
     s->mb_mods_count++;
 }
 
-int load_multiboot(FWCfgState *fw_cfg,
+int load_multiboot(X86MachineState *x86ms,
+                   FWCfgState *fw_cfg,
                    FILE *f,
                    const char *kernel_filename,
                    const char *initrd_filename,
@@ -151,6 +152,7 @@ int load_multiboot(FWCfgState *fw_cfg,
                    int kernel_file_size,
                    uint8_t *header)
 {
+    bool multiboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
     int i, is_multiboot = 0;
     uint32_t flags = 0;
     uint32_t mh_entry_addr;
@@ -401,7 +403,11 @@ int load_multiboot(FWCfgState *fw_cfg,
     fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, mb_bootinfo_data,
                      sizeof(bootinfo));
 
-    option_rom[nb_option_roms].name = "multiboot.bin";
+    if (multiboot_dma_enabled) {
+        option_rom[nb_option_roms].name = "multiboot_dma.bin";
+    } else {
+        option_rom[nb_option_roms].name = "multiboot.bin";
+    }
     option_rom[nb_option_roms].bootindex = 0;
     nb_option_roms++;
 
diff --git a/hw/i386/multiboot.h b/hw/i386/multiboot.h
index 60de309cd13..2b9182a8ea8 100644
--- a/hw/i386/multiboot.h
+++ b/hw/i386/multiboot.h
@@ -2,8 +2,10 @@
 #define QEMU_MULTIBOOT_H
 
 #include "hw/nvram/fw_cfg.h"
+#include "hw/i386/x86.h"
 
-int load_multiboot(FWCfgState *fw_cfg,
+int load_multiboot(X86MachineState *x86ms,
+                   FWCfgState *fw_cfg,
                    FILE *f,
                    const char *kernel_filename,
                    const char *initrd_filename,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8a84b25a031..a2ef40ecbc2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -65,8 +65,6 @@
 #include "hw/xen/start_info.h"
 #include "ui/qemu-spice.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
-#include "sysemu/arch_init.h"
 #include "qemu/bitmap.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -74,7 +72,6 @@
 #include "qemu/cutils.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/cpu_hotplug.h"
-#include "hw/boards.h"
 #include "acpi-build.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/mem/nvdimm.h"
@@ -86,6 +83,7 @@
 #include "hw/i386/intel_iommu.h"
 #include "hw/net/ne2000-isa.h"
 #include "standard-headers/asm-x86/bootparam.h"
+#include "hw/virtio/virtio-iommu.h"
 #include "hw/virtio/virtio-pmem-pci.h"
 #include "hw/virtio/virtio-mem-pci.h"
 #include "hw/mem/memory-device.h"
@@ -96,6 +94,24 @@
 #include "trace.h"
 #include CONFIG_DEVICES
 
+GlobalProperty pc_compat_6_1[] = {
+    { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" },
+    { TYPE_X86_CPU, "hv-version-id-major", "0x0006" },
+    { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" },
+    { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" },
+};
+const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1);
+
+GlobalProperty pc_compat_6_0[] = {
+    { "qemu64" "-" TYPE_X86_CPU, "family", "6" },
+    { "qemu64" "-" TYPE_X86_CPU, "model", "6" },
+    { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" },
+    { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" },
+    { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" },
+    { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" },
+};
+const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0);
+
 GlobalProperty pc_compat_5_2[] = {
     { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" },
 };
@@ -304,7 +320,7 @@ const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0);
 GlobalProperty pc_compat_1_7[] = {
     PC_CPU_MODEL_IDS("1.7.0")
     { TYPE_USB_DEVICE, "msos-desc", "no" },
-    { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" },
+    { "PIIX4_PM", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" },
     { "hpet", HPET_INTCAP, "4" },
 };
 const size_t pc_compat_1_7_len = G_N_ELEMENTS(pc_compat_1_7);
@@ -701,79 +717,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
     }
 }
 
-/*
- * This function is very similar to smp_parse()
- * in hw/core/machine.c but includes CPU die support.
- */
-void pc_smp_parse(MachineState *ms, QemuOpts *opts)
-{
-    X86MachineState *x86ms = X86_MACHINE(ms);
-
-    if (opts) {
-        unsigned cpus    = qemu_opt_get_number(opts, "cpus", 0);
-        unsigned sockets = qemu_opt_get_number(opts, "sockets", 0);
-        unsigned dies = qemu_opt_get_number(opts, "dies", 1);
-        unsigned cores   = qemu_opt_get_number(opts, "cores", 0);
-        unsigned threads = qemu_opt_get_number(opts, "threads", 0);
-
-        /* compute missing values, prefer sockets over cores over threads */
-        if (cpus == 0 || sockets == 0) {
-            cores = cores > 0 ? cores : 1;
-            threads = threads > 0 ? threads : 1;
-            if (cpus == 0) {
-                sockets = sockets > 0 ? sockets : 1;
-                cpus = cores * threads * dies * sockets;
-            } else {
-                ms->smp.max_cpus =
-                        qemu_opt_get_number(opts, "maxcpus", cpus);
-                sockets = ms->smp.max_cpus / (cores * threads * dies);
-            }
-        } else if (cores == 0) {
-            threads = threads > 0 ? threads : 1;
-            cores = cpus / (sockets * dies * threads);
-            cores = cores > 0 ? cores : 1;
-        } else if (threads == 0) {
-            threads = cpus / (cores * dies * sockets);
-            threads = threads > 0 ? threads : 1;
-        } else if (sockets * dies * cores * threads < cpus) {
-            error_report("cpu topology: "
-                         "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
-                         "smp_cpus (%u)",
-                         sockets, dies, cores, threads, cpus);
-            exit(1);
-        }
-
-        ms->smp.max_cpus =
-                qemu_opt_get_number(opts, "maxcpus", cpus);
-
-        if (ms->smp.max_cpus < cpus) {
-            error_report("maxcpus must be equal to or greater than smp");
-            exit(1);
-        }
-
-        if (sockets * dies * cores * threads != ms->smp.max_cpus) {
-            error_report("Invalid CPU topology deprecated: "
-                         "sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
-                         "!= maxcpus (%u)",
-                         sockets, dies, cores, threads,
-                         ms->smp.max_cpus);
-            exit(1);
-        }
-
-        ms->smp.cpus = cpus;
-        ms->smp.cores = cores;
-        ms->smp.threads = threads;
-        ms->smp.sockets = sockets;
-        x86ms->smp_dies = dies;
-    }
-
-    if (ms->smp.cpus > 1) {
-        Error *blocker = NULL;
-        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
-        replay_add_blocker(blocker);
-    }
-}
-
 static
 void pc_machine_done(Notifier *notifier, void *data)
 {
@@ -805,18 +748,9 @@ void pc_machine_done(Notifier *notifier, void *data)
 
 void pc_guest_info_init(PCMachineState *pcms)
 {
-    int i;
-    MachineState *ms = MACHINE(pcms);
     X86MachineState *x86ms = X86_MACHINE(pcms);
 
     x86ms->apic_xrupt_override = true;
-    pcms->numa_nodes = ms->numa_state->num_nodes;
-    pcms->node_mem = g_malloc0(pcms->numa_nodes *
-                                    sizeof *pcms->node_mem);
-    for (i = 0; i < ms->numa_state->num_nodes; i++) {
-        pcms->node_mem[i] = ms->numa_state->nodes[i].node_mem;
-    }
-
     pcms->machine_done.notify = pc_machine_done;
     qemu_add_machine_init_done_notifier(&pcms->machine_done);
 }
@@ -844,17 +778,24 @@ void xen_load_linux(PCMachineState *pcms)
     rom_set_fw(fw_cfg);
 
     x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
-                   pcmc->pvh_enabled, pcmc->linuxboot_dma_enabled);
+                   pcmc->pvh_enabled);
     for (i = 0; i < nb_option_roms; i++) {
         assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
                !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
                !strcmp(option_rom[i].name, "pvh.bin") ||
-               !strcmp(option_rom[i].name, "multiboot.bin"));
+               !strcmp(option_rom[i].name, "multiboot.bin") ||
+               !strcmp(option_rom[i].name, "multiboot_dma.bin"));
         rom_add_option(option_rom[i].name, option_rom[i].bootindex);
     }
     x86ms->fw_cfg = fw_cfg;
 }
 
+#define PC_ROM_MIN_VGA     0xc0000
+#define PC_ROM_MIN_OPTION  0xc8000
+#define PC_ROM_MAX         0xe0000
+#define PC_ROM_ALIGN       0x800
+#define PC_ROM_SIZE        (PC_ROM_MAX - PC_ROM_MIN_VGA)
+
 void pc_memory_init(PCMachineState *pcms,
                     MemoryRegion *system_memory,
                     MemoryRegion *rom_memory,
@@ -895,6 +836,10 @@ void pc_memory_init(PCMachineState *pcms,
         e820_add_entry(0x100000000ULL, x86ms->above_4g_mem_size, E820_RAM);
     }
 
+    if (pcms->sgx_epc.size != 0) {
+        e820_add_entry(pcms->sgx_epc.base, pcms->sgx_epc.size, E820_RESERVED);
+    }
+
     if (!pcmc->has_reserved_memory &&
         (machine->ram_slots ||
          (machine->maxram_size > machine->ram_size))) {
@@ -925,8 +870,15 @@ void pc_memory_init(PCMachineState *pcms,
             exit(EXIT_FAILURE);
         }
 
+        if (pcms->sgx_epc.size != 0) {
+            machine->device_memory->base = sgx_epc_above_4g_end(&pcms->sgx_epc);
+        } else {
+            machine->device_memory->base =
+                0x100000000ULL + x86ms->above_4g_mem_size;
+        }
+
         machine->device_memory->base =
-            ROUND_UP(0x100000000ULL + x86ms->above_4g_mem_size, 1 * GiB);
+            ROUND_UP(machine->device_memory->base, 1 * GiB);
 
         if (pcmc->enforce_aligned_dimm) {
             /* size device region assuming 1G page max alignment per slot */
@@ -979,7 +931,7 @@ void pc_memory_init(PCMachineState *pcms,
 
     if (linux_boot) {
         x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
-                       pcmc->pvh_enabled, pcmc->linuxboot_dma_enabled);
+                       pcmc->pvh_enabled);
     }
 
     for (i = 0; i < nb_option_roms; i++) {
@@ -1011,6 +963,8 @@ uint64_t pc_pci_hole64_start(void)
         if (!pcmc->broken_reserved_end) {
             hole64_start += memory_region_size(&ms->device_memory->mr);
         }
+    } else if (pcms->sgx_epc.size != 0) {
+            hole64_start = sgx_epc_above_4g_end(&pcms->sgx_epc);
     } else {
         hole64_start = 0x100000000ULL + x86ms->above_4g_mem_size;
     }
@@ -1380,6 +1334,27 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) ||
                object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
         pc_virtio_md_pci_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+        /* Declare the APIC range as the reserved MSI region */
+        char *resv_prop_str = g_strdup_printf("0xfee00000:0xfeefffff:%d",
+                                              VIRTIO_IOMMU_RESV_MEM_T_MSI);
+
+        object_property_set_uint(OBJECT(dev), "len-reserved-regions", 1, errp);
+        object_property_set_str(OBJECT(dev), "reserved-regions[0]",
+                                resv_prop_str, errp);
+        g_free(resv_prop_str);
+    }
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
+        PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+
+        if (pcms->iommu) {
+            error_setg(errp, "QEMU does not support multiple vIOMMUs "
+                       "for x86 yet.");
+            return;
+        }
+        pcms->iommu = dev;
     }
 }
 
@@ -1434,7 +1409,9 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
         object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
         object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) ||
-        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
+        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
         return HOTPLUG_HANDLER(machine);
     }
 
@@ -1529,6 +1506,21 @@ static void pc_machine_set_hpet(Object *obj, bool value, Error **errp)
     pcms->hpet_enabled = value;
 }
 
+static bool pc_machine_get_default_bus_bypass_iommu(Object *obj, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    return pcms->default_bus_bypass_iommu;
+}
+
+static void pc_machine_set_default_bus_bypass_iommu(Object *obj, bool value,
+                                                    Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    pcms->default_bus_bypass_iommu = value;
+}
+
 static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
                                             const char *name, void *opaque,
                                             Error **errp)
@@ -1628,6 +1620,7 @@ static void pc_machine_initfn(Object *obj)
 #ifdef CONFIG_HPET
     pcms->hpet_enabled = true;
 #endif
+    pcms->default_bus_bypass_iommu = false;
 
     pc_system_flash_create(pcms);
     pcms->pcspk = isa_new(TYPE_PC_SPEAKER);
@@ -1698,7 +1691,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
      * to be used at the moment, 32K should be enough for a while.  */
     pcmc->acpi_data_size = 0x20000 + 0x8000;
-    pcmc->linuxboot_dma_enabled = true;
     pcmc->pvh_enabled = true;
     pcmc->kvmclock_create_always = true;
     assert(!mc->get_hotplug_handler);
@@ -1711,7 +1703,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->auto_enable_numa_with_memdev = true;
     mc->has_hotpluggable_cpus = true;
     mc->default_boot_order = "cad";
-    mc->smp_parse = pc_smp_parse;
     mc->block_default_type = IF_IDE;
     mc->max_cpus = 255;
     mc->reset = pc_machine_reset;
@@ -1722,6 +1713,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     hc->unplug = pc_machine_device_unplug_cb;
     mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE;
     mc->nvdimm_supported = true;
+    mc->smp_props.dies_supported = true;
     mc->default_ram_id = "pc.ram";
 
     object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
@@ -1752,6 +1744,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     object_class_property_add_bool(oc, "hpet",
         pc_machine_get_hpet, pc_machine_set_hpet);
 
+    object_class_property_add_bool(oc, "default-bus-bypass-iommu",
+        pc_machine_get_default_bus_bypass_iommu,
+        pc_machine_set_default_bus_bypass_iommu);
+
     object_class_property_add(oc, PC_MACHINE_MAX_FW_SIZE, "size",
         pc_machine_get_max_fw_size, pc_machine_set_max_fw_size,
         NULL, NULL);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 46cc951073b..223dd3e05d1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -42,15 +42,11 @@
 #include "hw/irq.h"
 #include "sysemu/kvm.h"
 #include "hw/kvm/clock.h"
-#include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
-#include "sysemu/arch_init.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/xen/xen-x86.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "hw/acpi/acpi.h"
-#include "cpu.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/xen.h"
@@ -64,6 +60,7 @@
 #include "hw/hyperv/vmbus-bridge.h"
 #include "hw/mem/nvdimm.h"
 #include "hw/i386/acpi-build.h"
+#include "kvm/kvm-cpu.h"
 
 #define MAX_IDE_BUS 2
 
@@ -156,6 +153,7 @@ static void pc_init1(MachineState *machine,
         }
     }
 
+    pc_machine_init_sgx_epc(pcms);
     x86_cpus_init(x86ms, pcmc->default_cpu_version);
 
     if (pcmc->kvmclock_enabled) {
@@ -415,7 +413,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
 }
 
-static void pc_i440fx_6_0_machine_options(MachineClass *m)
+static void pc_i440fx_6_2_machine_options(MachineClass *m)
 {
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_i440fx_machine_options(m);
@@ -424,6 +422,31 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m)
     pcmc->default_cpu_version = 1;
 }
 
+DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL,
+                      pc_i440fx_6_2_machine_options);
+
+static void pc_i440fx_6_1_machine_options(MachineClass *m)
+{
+    pc_i440fx_6_2_machine_options(m);
+    m->alias = NULL;
+    m->is_default = false;
+    compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
+    m->smp_props.prefer_sockets = true;
+}
+
+DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL,
+                      pc_i440fx_6_1_machine_options);
+
+static void pc_i440fx_6_0_machine_options(MachineClass *m)
+{
+    pc_i440fx_6_1_machine_options(m);
+    m->alias = NULL;
+    m->is_default = false;
+    compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
+    compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
+}
+
 DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL,
                       pc_i440fx_6_0_machine_options);
 
@@ -597,11 +620,12 @@ DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL,
 
 static void pc_i440fx_2_6_machine_options(MachineClass *m)
 {
+    X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 
     pc_i440fx_2_7_machine_options(m);
     pcmc->legacy_cpu_hotplug = true;
-    pcmc->linuxboot_dma_enabled = false;
+    x86mc->fwcfg_dma_enabled = false;
     compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len);
     compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 53450190f54..e1e100316d9 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -31,14 +31,13 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "hw/loader.h"
-#include "sysemu/arch_init.h"
 #include "hw/i2c/smbus_eeprom.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "sysemu/kvm.h"
 #include "hw/kvm/clock.h"
 #include "hw/pci-host/q35.h"
+#include "hw/pci/pcie_port.h"
 #include "hw/qdev-properties.h"
-#include "exec/address-spaces.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/ich9.h"
@@ -137,6 +136,8 @@ static void pc_q35_init(MachineState *machine)
     ram_addr_t lowmem;
     DriveInfo *hd[MAX_SATA_PORTS];
     MachineClass *mc = MACHINE_GET_CLASS(machine);
+    bool acpi_pcihp;
+    bool keep_pci_slot_hpc;
 
     /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
      * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -177,6 +178,7 @@ static void pc_q35_init(MachineState *machine)
         x86ms->below_4g_mem_size = machine->ram_size;
     }
 
+    pc_machine_init_sgx_epc(pcms);
     x86_cpus_init(x86ms, pcmc->default_cpu_version);
 
     kvmclock_create(pcmc->kvmclock_create_always);
@@ -237,6 +239,19 @@ static void pc_q35_init(MachineState *machine)
     object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
                              OBJECT(lpc), &error_abort);
 
+    acpi_pcihp = object_property_get_bool(OBJECT(lpc),
+                                          ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
+                                          NULL);
+
+    keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc),
+                                                 "x-keep-pci-slot-hpc",
+                                                 NULL);
+
+    if (!keep_pci_slot_hpc && acpi_pcihp) {
+        object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug",
+                                   "false", true);
+    }
+
     /* irq lines */
     gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled);
 
@@ -345,7 +360,7 @@ static void pc_q35_machine_options(MachineClass *m)
     m->max_cpus = 288;
 }
 
-static void pc_q35_6_0_machine_options(MachineClass *m)
+static void pc_q35_6_2_machine_options(MachineClass *m)
 {
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_q35_machine_options(m);
@@ -353,6 +368,29 @@ static void pc_q35_6_0_machine_options(MachineClass *m)
     pcmc->default_cpu_version = 1;
 }
 
+DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL,
+                   pc_q35_6_2_machine_options);
+
+static void pc_q35_6_1_machine_options(MachineClass *m)
+{
+    pc_q35_6_2_machine_options(m);
+    m->alias = NULL;
+    compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
+    m->smp_props.prefer_sockets = true;
+}
+
+DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL,
+                   pc_q35_6_1_machine_options);
+
+static void pc_q35_6_0_machine_options(MachineClass *m)
+{
+    pc_q35_6_1_machine_options(m);
+    m->alias = NULL;
+    compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
+    compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
+}
+
 DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL,
                    pc_q35_6_0_machine_options);
 
@@ -540,11 +578,12 @@ DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL,
 
 static void pc_q35_2_6_machine_options(MachineClass *m)
 {
+    X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 
     pc_q35_2_7_machine_options(m);
     pcmc->legacy_cpu_hotplug = true;
-    pcmc->linuxboot_dma_enabled = false;
+    x86mc->fwcfg_dma_enabled = false;
     compat_props_add(m->compat_props, hw_compat_2_6, hw_compat_2_6_len);
     compat_props_add(m->compat_props, pc_compat_2_6, pc_compat_2_6_len);
 }
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 9fe72b370e8..c8b17af9535 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -35,10 +35,9 @@
 #include "hw/i386/pc.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
-#include "sysemu/sysemu.h"
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
-#include "sysemu/sev.h"
+#include "sev.h"
 
 #define FLASH_SECTOR_SIZE 4096
 
@@ -125,113 +124,6 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
     }
 }
 
-#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d"
-
-static uint8_t *ovmf_table;
-static int ovmf_table_len;
-
-static void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size)
-{
-    uint8_t *ptr;
-    QemuUUID guid;
-    int tot_len;
-
-    /* should only be called once */
-    if (ovmf_table) {
-        return;
-    }
-
-    if (flash_size < TARGET_PAGE_SIZE) {
-        return;
-    }
-
-    /*
-     * if this is OVMF there will be a table footer
-     * guid 48 bytes before the end of the flash file.  If it's
-     * not found, silently abort the flash parsing.
-     */
-    qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid);
-    guid = qemu_uuid_bswap(guid); /* guids are LE */
-    ptr = flash_ptr + flash_size - 48;
-    if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) {
-        return;
-    }
-
-    /* if found, just before is two byte table length */
-    ptr -= sizeof(uint16_t);
-    tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t);
-
-    if (tot_len <= 0) {
-        return;
-    }
-
-    ovmf_table = g_malloc(tot_len);
-    ovmf_table_len = tot_len;
-
-    /*
-     * ptr is the foot of the table, so copy it all to the newly
-     * allocated ovmf_table and then set the ovmf_table pointer
-     * to the table foot
-     */
-    memcpy(ovmf_table, ptr - tot_len, tot_len);
-    ovmf_table += tot_len;
-}
-
-bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
-                               int *data_len)
-{
-    uint8_t *ptr = ovmf_table;
-    int tot_len = ovmf_table_len;
-    QemuUUID entry_guid;
-
-    if (qemu_uuid_parse(entry, &entry_guid) < 0) {
-        return false;
-    }
-
-    if (!ptr) {
-        return false;
-    }
-
-    entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */
-    while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) {
-        int len;
-        QemuUUID *guid;
-
-        /*
-         * The data structure is
-         *   arbitrary length data
-         *   2 byte length of entire entry
-         *   16 byte guid
-         */
-        guid = (QemuUUID *)(ptr - sizeof(QemuUUID));
-        len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) -
-                                        sizeof(uint16_t)));
-
-        /*
-         * just in case the table is corrupt, wouldn't want to spin in
-         * the zero case
-         */
-        if (len < sizeof(QemuUUID) + sizeof(uint16_t)) {
-            return false;
-        } else if (len > tot_len) {
-            return false;
-        }
-
-        ptr -= len;
-        tot_len -= len;
-        if (qemu_uuid_is_equal(guid, &entry_guid)) {
-            if (data) {
-                *data = ptr;
-            }
-            if (data_len) {
-                *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t);
-            }
-            return true;
-        }
-    }
-    return false;
-}
-
 /*
  * Map the pcms->flash[] from 4GiB downward, and realize.
  * Map them in descending order, i.e. pcms->flash[0] at the top,
diff --git a/hw/i386/pc_sysfw_ovmf-stubs.c b/hw/i386/pc_sysfw_ovmf-stubs.c
new file mode 100644
index 00000000000..aabe78b2710
--- /dev/null
+++ b/hw/i386/pc_sysfw_ovmf-stubs.c
@@ -0,0 +1,26 @@
+/*
+ * QEMU PC System Firmware (OVMF stubs)
+ *
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Author:
+ *   Philippe Mathieu-Daudé <philmd@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+
+bool pc_system_ovmf_table_find(const char *entry, uint8_t **data, int *data_len)
+{
+    g_assert_not_reached();
+}
+
+void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size)
+{
+    g_assert_not_reached();
+}
diff --git a/hw/i386/pc_sysfw_ovmf.c b/hw/i386/pc_sysfw_ovmf.c
new file mode 100644
index 00000000000..f4dd92c5882
--- /dev/null
+++ b/hw/i386/pc_sysfw_ovmf.c
@@ -0,0 +1,151 @@
+/*
+ * QEMU PC System Firmware (OVMF specific)
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2011-2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+#include "cpu.h"
+
+#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-a366c55a082d"
+
+static bool ovmf_flash_parsed;
+static uint8_t *ovmf_table;
+static int ovmf_table_len;
+
+void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size)
+{
+    uint8_t *ptr;
+    QemuUUID guid;
+    int tot_len;
+
+    /* should only be called once */
+    if (ovmf_flash_parsed) {
+        return;
+    }
+
+    ovmf_flash_parsed = true;
+
+    if (flash_size < TARGET_PAGE_SIZE) {
+        return;
+    }
+
+    /*
+     * if this is OVMF there will be a table footer
+     * guid 48 bytes before the end of the flash file.  If it's
+     * not found, silently abort the flash parsing.
+     */
+    qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid);
+    guid = qemu_uuid_bswap(guid); /* guids are LE */
+    ptr = flash_ptr + flash_size - 48;
+    if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) {
+        return;
+    }
+
+    /* if found, just before is two byte table length */
+    ptr -= sizeof(uint16_t);
+    tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) - sizeof(uint16_t);
+
+    if (tot_len <= 0) {
+        return;
+    }
+
+    ovmf_table = g_malloc(tot_len);
+    ovmf_table_len = tot_len;
+
+    /*
+     * ptr is the foot of the table, so copy it all to the newly
+     * allocated ovmf_table and then set the ovmf_table pointer
+     * to the table foot
+     */
+    memcpy(ovmf_table, ptr - tot_len, tot_len);
+    ovmf_table += tot_len;
+}
+
+/**
+ * pc_system_ovmf_table_find - Find the data associated with an entry in OVMF's
+ * reset vector GUIDed table.
+ *
+ * @entry: GUID string of the entry to lookup
+ * @data: Filled with a pointer to the entry's value (if not NULL)
+ * @data_len: Filled with the length of the entry's value (if not NULL). Pass
+ *            NULL here if the length of data is known.
+ *
+ * Return: true if the entry was found in the OVMF table; false otherwise.
+ */
+bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
+                               int *data_len)
+{
+    uint8_t *ptr = ovmf_table;
+    int tot_len = ovmf_table_len;
+    QemuUUID entry_guid;
+
+    assert(ovmf_flash_parsed);
+
+    if (qemu_uuid_parse(entry, &entry_guid) < 0) {
+        return false;
+    }
+
+    if (!ptr) {
+        return false;
+    }
+
+    entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */
+    while (tot_len >= sizeof(QemuUUID) + sizeof(uint16_t)) {
+        int len;
+        QemuUUID *guid;
+
+        /*
+         * The data structure is
+         *   arbitrary length data
+         *   2 byte length of entire entry
+         *   16 byte guid
+         */
+        guid = (QemuUUID *)(ptr - sizeof(QemuUUID));
+        len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) -
+                                        sizeof(uint16_t)));
+
+        /*
+         * just in case the table is corrupt, wouldn't want to spin in
+         * the zero case
+         */
+        if (len < sizeof(QemuUUID) + sizeof(uint16_t)) {
+            return false;
+        } else if (len > tot_len) {
+            return false;
+        }
+
+        ptr -= len;
+        tot_len -= len;
+        if (qemu_uuid_is_equal(guid, &entry_guid)) {
+            if (data) {
+                *data = ptr;
+            }
+            if (data_len) {
+                *data_len = len - sizeof(QemuUUID) - sizeof(uint16_t);
+            }
+            return true;
+        }
+    }
+    return false;
+}
diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c
new file mode 100644
index 00000000000..e508827e787
--- /dev/null
+++ b/hw/i386/sgx-epc.c
@@ -0,0 +1,185 @@
+/*
+ * SGX EPC device
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/sgx-epc.h"
+#include "hw/mem/memory-device.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "target/i386/cpu.h"
+#include "exec/address-spaces.h"
+
+static Property sgx_epc_properties[] = {
+    DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0),
+    DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem,
+                     TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sgx_epc_get_size(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    Error *local_err = NULL;
+    uint64_t value;
+
+    value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void sgx_epc_init(Object *obj)
+{
+    object_property_add(obj, SGX_EPC_SIZE_PROP, "uint64", sgx_epc_get_size,
+                        NULL, NULL, NULL);
+}
+
+static void sgx_epc_realize(DeviceState *dev, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    X86MachineState *x86ms = X86_MACHINE(pcms);
+    MemoryDeviceState *md = MEMORY_DEVICE(dev);
+    SGXEPCState *sgx_epc = &pcms->sgx_epc;
+    SGXEPCDevice *epc = SGX_EPC(dev);
+    HostMemoryBackend *hostmem;
+    const char *path;
+
+    if (x86ms->boot_cpus != 0) {
+        error_setg(errp, "'" TYPE_SGX_EPC "' can't be created after vCPUs,"
+                         "e.g. via -device");
+        return;
+    }
+
+    if (!epc->hostmem) {
+        error_setg(errp, "'" SGX_EPC_MEMDEV_PROP "' property is not set");
+        return;
+    }
+    hostmem = MEMORY_BACKEND(epc->hostmem);
+    if (host_memory_backend_is_mapped(hostmem)) {
+        path = object_get_canonical_path_component(OBJECT(hostmem));
+        error_setg(errp, "can't use already busy memdev: %s", path);
+        return;
+    }
+
+    epc->addr = sgx_epc->base + sgx_epc->size;
+
+    memory_region_add_subregion(&sgx_epc->mr, epc->addr - sgx_epc->base,
+                                host_memory_backend_get_memory(hostmem));
+
+    host_memory_backend_set_mapped(hostmem, true);
+
+    sgx_epc->sections = g_renew(SGXEPCDevice *, sgx_epc->sections,
+                                sgx_epc->nr_sections + 1);
+    sgx_epc->sections[sgx_epc->nr_sections++] = epc;
+
+    sgx_epc->size += memory_device_get_region_size(md, errp);
+}
+
+static void sgx_epc_unrealize(DeviceState *dev)
+{
+    SGXEPCDevice *epc = SGX_EPC(dev);
+    HostMemoryBackend *hostmem = MEMORY_BACKEND(epc->hostmem);
+
+    host_memory_backend_set_mapped(hostmem, false);
+}
+
+static uint64_t sgx_epc_md_get_addr(const MemoryDeviceState *md)
+{
+    const SGXEPCDevice *epc = SGX_EPC(md);
+
+    return epc->addr;
+}
+
+static void sgx_epc_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                Error **errp)
+{
+    object_property_set_uint(OBJECT(md), SGX_EPC_ADDR_PROP, addr, errp);
+}
+
+static uint64_t sgx_epc_md_get_plugged_size(const MemoryDeviceState *md,
+                                            Error **errp)
+{
+    return 0;
+}
+
+static MemoryRegion *sgx_epc_md_get_memory_region(MemoryDeviceState *md,
+                                                  Error **errp)
+{
+    SGXEPCDevice *epc = SGX_EPC(md);
+    HostMemoryBackend *hostmem;
+
+    if (!epc->hostmem) {
+        error_setg(errp, "'" SGX_EPC_MEMDEV_PROP "' property must be set");
+        return NULL;
+    }
+
+    hostmem = MEMORY_BACKEND(epc->hostmem);
+    return host_memory_backend_get_memory(hostmem);
+}
+
+static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md,
+                                        MemoryDeviceInfo *info)
+{
+    SgxEPCDeviceInfo *se = g_new0(SgxEPCDeviceInfo, 1);
+    SGXEPCDevice *epc = SGX_EPC(md);
+
+    se->memaddr = epc->addr;
+    se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP,
+                                        NULL);
+    se->memdev = object_get_canonical_path(OBJECT(epc->hostmem));
+
+    info->u.sgx_epc.data = se;
+    info->type = MEMORY_DEVICE_INFO_KIND_SGX_EPC;
+}
+
+static void sgx_epc_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
+
+    dc->hotpluggable = false;
+    dc->realize = sgx_epc_realize;
+    dc->unrealize = sgx_epc_unrealize;
+    dc->desc = "SGX EPC section";
+    dc->user_creatable = false;
+    device_class_set_props(dc, sgx_epc_properties);
+
+    mdc->get_addr = sgx_epc_md_get_addr;
+    mdc->set_addr = sgx_epc_md_set_addr;
+    mdc->get_plugged_size = sgx_epc_md_get_plugged_size;
+    mdc->get_memory_region = sgx_epc_md_get_memory_region;
+    mdc->fill_device_info = sgx_epc_md_fill_device_info;
+}
+
+static TypeInfo sgx_epc_info = {
+    .name          = TYPE_SGX_EPC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SGXEPCDevice),
+    .instance_init = sgx_epc_init,
+    .class_init    = sgx_epc_class_init,
+    .class_size    = sizeof(DeviceClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_MEMORY_DEVICE },
+        { }
+    },
+};
+
+static void sgx_epc_register_types(void)
+{
+    type_register_static(&sgx_epc_info);
+}
+
+type_init(sgx_epc_register_types)
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
new file mode 100644
index 00000000000..c9b379e6651
--- /dev/null
+++ b/hw/i386/sgx-stub.c
@@ -0,0 +1,34 @@
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/sgx-epc.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-misc-target.h"
+
+SGXInfo *qmp_query_sgx(Error **errp)
+{
+    error_setg(errp, "SGX support is not compiled in");
+    return NULL;
+}
+
+SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+{
+    error_setg(errp, "SGX support is not compiled in");
+    return NULL;
+}
+
+void hmp_info_sgx(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "SGX is not available in this QEMU\n");
+}
+
+void pc_machine_init_sgx_epc(PCMachineState *pcms)
+{
+    memset(&pcms->sgx_epc, 0, sizeof(SGXEPCState));
+}
+
+bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
+{
+    g_assert_not_reached();
+}
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
new file mode 100644
index 00000000000..8fef3dd8fad
--- /dev/null
+++ b/hw/i386/sgx.c
@@ -0,0 +1,243 @@
+/*
+ * SGX common code
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Authors:
+ *   Yang Zhong<yang.zhong@intel.com>
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/sgx-epc.h"
+#include "hw/mem/memory-device.h"
+#include "monitor/qdev.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-misc-target.h"
+#include "exec/address-spaces.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/reset.h"
+#include <sys/ioctl.h>
+
+#define SGX_MAX_EPC_SECTIONS            8
+#define SGX_CPUID_EPC_INVALID           0x0
+
+/* A valid EPC section. */
+#define SGX_CPUID_EPC_SECTION           0x1
+#define SGX_CPUID_EPC_MASK              0xF
+
+#define SGX_MAGIC 0xA4
+#define SGX_IOC_VEPC_REMOVE_ALL       _IO(SGX_MAGIC, 0x04)
+
+#define RETRY_NUM                       2
+
+static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
+{
+    return (low & MAKE_64BIT_MASK(12, 20)) +
+           ((high & MAKE_64BIT_MASK(0, 20)) << 32);
+}
+
+static uint64_t sgx_calc_host_epc_section_size(void)
+{
+    uint32_t i, type;
+    uint32_t eax, ebx, ecx, edx;
+    uint64_t size = 0;
+
+    for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) {
+        host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx);
+
+        type = eax & SGX_CPUID_EPC_MASK;
+        if (type == SGX_CPUID_EPC_INVALID) {
+            break;
+        }
+
+        if (type != SGX_CPUID_EPC_SECTION) {
+            break;
+        }
+
+        size += sgx_calc_section_metric(ecx, edx);
+    }
+
+    return size;
+}
+
+static void sgx_epc_reset(void *opaque)
+{
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    HostMemoryBackend *hostmem;
+    SGXEPCDevice *epc;
+    int failures;
+    int fd, i, j, r;
+    static bool warned = false;
+
+    /*
+     * The second pass is needed to remove SECS pages that could not
+     * be removed during the first.
+     */
+    for (i = 0; i < RETRY_NUM; i++) {
+        failures = 0;
+        for (j = 0; j < pcms->sgx_epc.nr_sections; j++) {
+            epc = pcms->sgx_epc.sections[j];
+            hostmem = MEMORY_BACKEND(epc->hostmem);
+            fd = memory_region_get_fd(host_memory_backend_get_memory(hostmem));
+
+            r = ioctl(fd, SGX_IOC_VEPC_REMOVE_ALL);
+            if (r == -ENOTTY && !warned) {
+                warned = true;
+                warn_report("kernel does not support SGX_IOC_VEPC_REMOVE_ALL");
+                warn_report("SGX might operate incorrectly in the guest after reset");
+                break;
+            } else if (r > 0) {
+                /* SECS pages remain */
+                failures++;
+                if (i == 1) {
+                    error_report("cannot reset vEPC section %d", j);
+                }
+            }
+        }
+        if (!failures) {
+            break;
+        }
+     }
+}
+
+SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+{
+    SGXInfo *info = NULL;
+    uint32_t eax, ebx, ecx, edx;
+
+    int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
+    if (fd < 0) {
+        error_setg(errp, "SGX is not enabled in KVM");
+        return NULL;
+    }
+
+    info = g_new0(SGXInfo, 1);
+    host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
+
+    info->sgx = ebx & (1U << 2) ? true : false;
+    info->flc = ecx & (1U << 30) ? true : false;
+
+    host_cpuid(0x12, 0, &eax, &ebx, &ecx, &edx);
+    info->sgx1 = eax & (1U << 0) ? true : false;
+    info->sgx2 = eax & (1U << 1) ? true : false;
+
+    info->section_size = sgx_calc_host_epc_section_size();
+
+    close(fd);
+
+    return info;
+}
+
+SGXInfo *qmp_query_sgx(Error **errp)
+{
+    SGXInfo *info = NULL;
+    X86MachineState *x86ms;
+    PCMachineState *pcms =
+        (PCMachineState *)object_dynamic_cast(qdev_get_machine(),
+                                              TYPE_PC_MACHINE);
+    if (!pcms) {
+        error_setg(errp, "SGX is only supported on PC machines");
+        return NULL;
+    }
+
+    x86ms = X86_MACHINE(pcms);
+    if (!x86ms->sgx_epc_list) {
+        error_setg(errp, "No EPC regions defined, SGX not available");
+        return NULL;
+    }
+
+    SGXEPCState *sgx_epc = &pcms->sgx_epc;
+    info = g_new0(SGXInfo, 1);
+
+    info->sgx = true;
+    info->sgx1 = true;
+    info->sgx2 = true;
+    info->flc = true;
+    info->section_size = sgx_epc->size;
+
+    return info;
+}
+
+void hmp_info_sgx(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    g_autoptr(SGXInfo) info = qmp_query_sgx(&err);
+
+    if (err) {
+        error_report_err(err);
+        return;
+    }
+    monitor_printf(mon, "SGX support: %s\n",
+                   info->sgx ? "enabled" : "disabled");
+    monitor_printf(mon, "SGX1 support: %s\n",
+                   info->sgx1 ? "enabled" : "disabled");
+    monitor_printf(mon, "SGX2 support: %s\n",
+                   info->sgx2 ? "enabled" : "disabled");
+    monitor_printf(mon, "FLC support: %s\n",
+                   info->flc ? "enabled" : "disabled");
+    monitor_printf(mon, "size: %" PRIu64 "\n",
+                   info->section_size);
+}
+
+bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
+{
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    SGXEPCDevice *epc;
+
+    if (pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) {
+        return true;
+    }
+
+    epc = pcms->sgx_epc.sections[section_nr];
+
+    *addr = epc->addr;
+    *size = memory_device_get_region_size(MEMORY_DEVICE(epc), &error_fatal);
+
+    return false;
+}
+
+void pc_machine_init_sgx_epc(PCMachineState *pcms)
+{
+    SGXEPCState *sgx_epc = &pcms->sgx_epc;
+    X86MachineState *x86ms = X86_MACHINE(pcms);
+    SgxEPCList *list = NULL;
+    Object *obj;
+
+    memset(sgx_epc, 0, sizeof(SGXEPCState));
+    if (!x86ms->sgx_epc_list) {
+        return;
+    }
+
+    sgx_epc->base = 0x100000000ULL + x86ms->above_4g_mem_size;
+
+    memory_region_init(&sgx_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
+    memory_region_add_subregion(get_system_memory(), sgx_epc->base,
+                                &sgx_epc->mr);
+
+    for (list = x86ms->sgx_epc_list; list; list = list->next) {
+        obj = object_new("sgx-epc");
+
+        /* set the memdev link with memory backend */
+        object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev,
+                              &error_fatal);
+        object_property_set_bool(obj, "realized", true, &error_fatal);
+        object_unref(obj);
+    }
+
+    if ((sgx_epc->base + sgx_epc->size) < sgx_epc->base) {
+        error_report("Size of all 'sgx-epc' =0x%"PRIu64" causes EPC to wrap",
+                     sgx_epc->size);
+        exit(EXIT_FAILURE);
+    }
+
+    memory_region_set_size(&sgx_epc->mr, sgx_epc->size);
+
+    /* register the reset callback for sgx epc */
+    qemu_register_reset(sgx_epc_reset, NULL);
+}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index e48bef2b0d6..5bf7e52bf52 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # x86-iommu.c
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
diff --git a/hw/i386/vmmouse.c b/hw/i386/vmmouse.c
index df4798f5028..3d663682863 100644
--- a/hw/i386/vmmouse.c
+++ b/hw/i386/vmmouse.c
@@ -158,6 +158,7 @@ static void vmmouse_read_id(VMMouseState *s)
 
     s->queue[s->nb_queue++] = VMMOUSE_VERSION;
     s->status = 0;
+    vmmouse_update_handler(s, s->absolute);
 }
 
 static void vmmouse_request_relative(VMMouseState *s)
diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c
index 490a57f52c8..7cc75dbc6da 100644
--- a/hw/i386/vmport.c
+++ b/hw/i386/vmport.c
@@ -37,7 +37,6 @@
 #include "sysemu/hw_accel.h"
 #include "sysemu/qtest.h"
 #include "qemu/log.h"
-#include "cpu.h"
 #include "trace.h"
 #include "qom/object.h"
 
diff --git a/hw/i386/x86-iommu-stub.c b/hw/i386/x86-iommu-stub.c
index c5ba077f9d1..781b5ff9222 100644
--- a/hw/i386/x86-iommu-stub.c
+++ b/hw/i386/x86-iommu-stub.c
@@ -36,8 +36,3 @@ bool x86_iommu_ir_supported(X86IOMMUState *s)
 {
     return false;
 }
-
-IommuType x86_iommu_get_type(void)
-{
-    abort();
-}
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 5f4301639c5..01d11325a69 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "hw/sysbus.h"
-#include "hw/boards.h"
 #include "hw/i386/x86-iommu.h"
 #include "hw/qdev-properties.h"
 #include "hw/i386/pc.h"
@@ -78,30 +77,17 @@ void x86_iommu_irq_to_msi_message(X86IOMMUIrq *irq, MSIMessage *msg_out)
     msg_out->data = msg.msi_data;
 }
 
-/* Default X86 IOMMU device */
-static X86IOMMUState *x86_iommu_default = NULL;
-
-static void x86_iommu_set_default(X86IOMMUState *x86_iommu)
-{
-    assert(x86_iommu);
-
-    if (x86_iommu_default) {
-        error_report("QEMU does not support multiple vIOMMUs "
-                     "for x86 yet.");
-        exit(1);
-    }
-
-    x86_iommu_default = x86_iommu;
-}
-
 X86IOMMUState *x86_iommu_get_default(void)
 {
-    return x86_iommu_default;
-}
+    MachineState *ms = MACHINE(qdev_get_machine());
+    PCMachineState *pcms =
+        PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
 
-IommuType x86_iommu_get_type(void)
-{
-    return x86_iommu_default->type;
+    if (pcms &&
+        object_dynamic_cast(OBJECT(pcms->iommu), TYPE_X86_IOMMU_DEVICE)) {
+        return X86_IOMMU_DEVICE(pcms->iommu);
+    }
+    return NULL;
 }
 
 static void x86_iommu_realize(DeviceState *dev, Error **errp)
@@ -137,8 +123,6 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
     if (x86_class->realize) {
         x86_class->realize(dev, errp);
     }
-
-    x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
 }
 
 static Property x86_iommu_properties[] = {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ed796fe6bad..b84840a1bb9 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -30,6 +30,8 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qapi-visit-common.h"
+#include "qapi/clone-visitor.h"
+#include "qapi/qapi-visit-machine.h"
 #include "qapi/visitor.h"
 #include "sysemu/qtest.h"
 #include "sysemu/whpx.h"
@@ -45,6 +47,7 @@
 #include "hw/i386/fw_cfg.h"
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
+#include "target/i386/sev.h"
 
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -64,7 +67,7 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info,
 {
     MachineState *ms = MACHINE(x86ms);
 
-    topo_info->dies_per_pkg = x86ms->smp_dies;
+    topo_info->dies_per_pkg = ms->smp.dies;
     topo_info->cores_per_die = ms->smp.cores;
     topo_info->threads_per_core = ms->smp.threads;
 }
@@ -293,7 +296,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
 
     init_topo_info(&topo_info, x86ms);
 
-    env->nr_dies = x86ms->smp_dies;
+    env->nr_dies = ms->smp.dies;
 
     /*
      * If APIC ID is not set,
@@ -301,13 +304,13 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
      */
     if (cpu->apic_id == UNASSIGNED_APIC_ID) {
         int max_socket = (ms->smp.max_cpus - 1) /
-                                smp_threads / smp_cores / x86ms->smp_dies;
+                                smp_threads / smp_cores / ms->smp.dies;
 
         /*
          * die-id was optional in QEMU 4.0 and older, so keep it optional
          * if there's only one die per socket.
          */
-        if (cpu->die_id < 0 && x86ms->smp_dies == 1) {
+        if (cpu->die_id < 0 && ms->smp.dies == 1) {
             cpu->die_id = 0;
         }
 
@@ -322,9 +325,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
         if (cpu->die_id < 0) {
             error_setg(errp, "CPU die-id is not set");
             return;
-        } else if (cpu->die_id > x86ms->smp_dies - 1) {
+        } else if (cpu->die_id > ms->smp.dies - 1) {
             error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u",
-                       cpu->die_id, x86ms->smp_dies - 1);
+                       cpu->die_id, ms->smp.dies - 1);
             return;
         }
         if (cpu->core_id < 0) {
@@ -477,7 +480,7 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
                                  &topo_info, &topo_ids);
         ms->possible_cpus->cpus[i].props.has_socket_id = true;
         ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id;
-        if (x86ms->smp_dies > 1) {
+        if (ms->smp.dies > 1) {
             ms->possible_cpus->cpus[i].props.has_die_id = true;
             ms->possible_cpus->cpus[i].props.die_id = topo_ids.die_id;
         }
@@ -761,9 +764,9 @@ static bool load_elfboot(const char *kernel_filename,
 void x86_load_linux(X86MachineState *x86ms,
                     FWCfgState *fw_cfg,
                     int acpi_data_size,
-                    bool pvh_enabled,
-                    bool linuxboot_dma_enabled)
+                    bool pvh_enabled)
 {
+    bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
     uint16_t protocol;
     int setup_size, kernel_size, cmdline_size;
     int dtb_size, setup_data_offset;
@@ -778,6 +781,7 @@ void x86_load_linux(X86MachineState *x86ms,
     const char *initrd_filename = machine->initrd_filename;
     const char *dtb_filename = machine->dtb;
     const char *kernel_cmdline = machine->kernel_cmdline;
+    SevKernelLoaderContext sev_load_ctx = {};
 
     /* Align to 16 bytes as a paranoia measure */
     cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
@@ -810,7 +814,7 @@ void x86_load_linux(X86MachineState *x86ms,
          * PVH), so we try multiboot first since we check the multiboot magic
          * header before to load it.
          */
-        if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
+        if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename,
                            kernel_cmdline, kernel_size, header)) {
             return;
         }
@@ -924,6 +928,8 @@ void x86_load_linux(X86MachineState *x86ms,
     fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1);
     fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+    sev_load_ctx.cmdline_data = (char *)kernel_cmdline;
+    sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1;
 
     if (protocol >= 0x202) {
         stl_p(header + 0x228, cmdline_addr);
@@ -1005,6 +1011,8 @@ void x86_load_linux(X86MachineState *x86ms,
         fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
         fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size);
+        sev_load_ctx.initrd_data = initrd_data;
+        sev_load_ctx.initrd_size = initrd_size;
 
         stl_p(header + 0x218, initrd_addr);
         stl_p(header + 0x21c, initrd_size);
@@ -1063,15 +1071,32 @@ void x86_load_linux(X86MachineState *x86ms,
         load_image_size(dtb_filename, setup_data->data, dtb_size);
     }
 
-    memcpy(setup, header, MIN(sizeof(header), setup_size));
+    /*
+     * If we're starting an encrypted VM, it will be OVMF based, which uses the
+     * efi stub for booting and doesn't require any values to be placed in the
+     * kernel header.  We therefore don't update the header so the hash of the
+     * kernel on the other side of the fw_cfg interface matches the hash of the
+     * file the user passed in.
+     */
+    if (!sev_enabled()) {
+        memcpy(setup, header, MIN(sizeof(header), setup_size));
+    }
 
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
+    sev_load_ctx.kernel_data = (char *)kernel;
+    sev_load_ctx.kernel_size = kernel_size;
 
     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
+    sev_load_ctx.setup_data = (char *)setup;
+    sev_load_ctx.setup_size = setup_size;
+
+    if (sev_enabled()) {
+        sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal);
+    }
 
     option_rom[nb_option_roms].bootindex = 0;
     option_rom[nb_option_roms].name = "linuxboot.bin";
@@ -1246,16 +1271,54 @@ static void x86_machine_set_oem_table_id(Object *obj, const char *value,
     strncpy(x86ms->oem_table_id, value, 8);
 }
 
+static void x86_machine_get_bus_lock_ratelimit(Object *obj, Visitor *v,
+                                const char *name, void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+    uint64_t bus_lock_ratelimit = x86ms->bus_lock_ratelimit;
+
+    visit_type_uint64(v, name, &bus_lock_ratelimit, errp);
+}
+
+static void x86_machine_set_bus_lock_ratelimit(Object *obj, Visitor *v,
+                               const char *name, void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+
+    visit_type_uint64(v, name, &x86ms->bus_lock_ratelimit, errp);
+}
+
+static void machine_get_sgx_epc(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+    SgxEPCList *list = x86ms->sgx_epc_list;
+
+    visit_type_SgxEPCList(v, name, &list, errp);
+}
+
+static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+    SgxEPCList *list;
+
+    list = x86ms->sgx_epc_list;
+    visit_type_SgxEPCList(v, name, &x86ms->sgx_epc_list, errp);
+
+    qapi_free_SgxEPCList(list);
+}
+
 static void x86_machine_initfn(Object *obj)
 {
     X86MachineState *x86ms = X86_MACHINE(obj);
 
     x86ms->smm = ON_OFF_AUTO_AUTO;
     x86ms->acpi = ON_OFF_AUTO_AUTO;
-    x86ms->smp_dies = 1;
     x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS;
     x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
     x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+    x86ms->bus_lock_ratelimit = 0;
 }
 
 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1269,6 +1332,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
     mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
     x86mc->compat_apic_id_mode = false;
     x86mc->save_tsc_khz = true;
+    x86mc->fwcfg_dma_enabled = true;
     nc->nmi_monitor_handler = x86_nmi;
 
     object_class_property_add(oc, X86_MACHINE_SMM, "OnOffAuto",
@@ -1299,6 +1363,18 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
                                           "Override the default value of field OEM Table ID "
                                           "in ACPI table header."
                                           "The string may be up to 8 bytes in size");
+
+    object_class_property_add(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, "uint64_t",
+                                x86_machine_get_bus_lock_ratelimit,
+                                x86_machine_set_bus_lock_ratelimit, NULL, NULL);
+    object_class_property_set_description(oc, X86_MACHINE_BUS_LOCK_RATELIMIT,
+            "Set the ratelimit for the bus locks acquired in VMs");
+
+    object_class_property_add(oc, "sgx-epc", "SgxEPC",
+        machine_get_sgx_epc, machine_set_sgx_epc,
+        NULL, NULL);
+    object_class_property_set_description(oc, "sgx-epc",
+        "SGX EPC device");
 }
 
 static const TypeInfo x86_machine_info = {
diff --git a/hw/i386/xen/trace-events b/hw/i386/xen/trace-events
index ca3a4948baa..5d6be610908 100644
--- a/hw/i386/xen/trace-events
+++ b/hw/i386/xen/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # xen_platform.c
 xen_platform_log(char *s) "xen platform: %s"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 7ce672e5a5c..482be95415e 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -33,7 +33,6 @@
 #include "sysemu/xen.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
-#include "exec/address-spaces.h"
 
 #include <xen/hvm/ioreq.h>
 #include <xen/hvm/e820.h>
@@ -109,6 +108,7 @@ typedef struct XenIOState {
     shared_iopage_t *shared_page;
     shared_vmport_iopage_t *shared_vmport_page;
     buffered_iopage_t *buffered_io_page;
+    xenforeignmemory_resource_handle *fres;
     QEMUTimer *buffered_io_timer;
     CPUState **cpu_by_vcpu_id;
     /* the evtchn port for polling the notification, */
@@ -721,6 +721,7 @@ static void xen_log_global_stop(MemoryListener *listener)
 }
 
 static MemoryListener xen_memory_listener = {
+    .name = "xen-memory",
     .region_add = xen_region_add,
     .region_del = xen_region_del,
     .log_start = xen_log_start,
@@ -732,6 +733,7 @@ static MemoryListener xen_memory_listener = {
 };
 
 static MemoryListener xen_io_listener = {
+    .name = "xen-io",
     .region_add = xen_io_add,
     .region_del = xen_io_del,
     .priority = 10,
@@ -1254,6 +1256,9 @@ static void xen_exit_notifier(Notifier *n, void *data)
     XenIOState *state = container_of(n, XenIOState, exit);
 
     xen_destroy_ioreq_server(xen_domid, state->ioservid);
+    if (state->fres != NULL) {
+        xenforeignmemory_unmap_resource(xen_fmem, state->fres);
+    }
 
     xenevtchn_close(state->xce_handle);
     xs_daemon_close(state->xenstore);
@@ -1321,7 +1326,6 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data)
 static int xen_map_ioreq_server(XenIOState *state)
 {
     void *addr = NULL;
-    xenforeignmemory_resource_handle *fres;
     xen_pfn_t ioreq_pfn;
     xen_pfn_t bufioreq_pfn;
     evtchn_port_t bufioreq_evtchn;
@@ -1333,12 +1337,12 @@ static int xen_map_ioreq_server(XenIOState *state)
      */
     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
     QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
-    fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
+    state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
                                          XENMEM_resource_ioreq_server,
                                          state->ioservid, 0, 2,
                                          &addr,
                                          PROT_READ | PROT_WRITE, 0);
-    if (fres != NULL) {
+    if (state->fres != NULL) {
         trace_xen_map_resource_ioreq(state->ioservid, addr);
         state->buffered_io_page = addr;
         state->shared_page = addr + TARGET_PAGE_SIZE;
@@ -1609,8 +1613,8 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
     if (enable) {
-        memory_global_dirty_log_start();
+        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
     } else {
-        memory_global_dirty_log_stop();
+        memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
     }
 }
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 5b120ed44b1..bd47c3d672f 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -169,9 +169,23 @@ static void xen_remap_bucket(MapCacheEntry *entry,
 
     if (entry->vaddr_base != NULL) {
         if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
-            ram_block_notify_remove(entry->vaddr_base, entry->size);
+            ram_block_notify_remove(entry->vaddr_base, entry->size,
+                                    entry->size);
         }
-        if (munmap(entry->vaddr_base, entry->size) != 0) {
+
+        /*
+         * If an entry is being replaced by another mapping and we're using
+         * MAP_FIXED flag for it - there is possibility of a race for vaddr
+         * address with another thread doing an mmap call itself
+         * (see man 2 mmap). To avoid that we skip explicit unmapping here
+         * and allow the kernel to destroy the previous mappings by replacing
+         * them in mmap call later.
+         *
+         * Non-identical replacements are not allowed therefore.
+         */
+        assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size));
+
+        if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) {
             perror("unmap fails");
             exit(-1);
         }
@@ -211,7 +225,7 @@ static void xen_remap_bucket(MapCacheEntry *entry,
     }
 
     if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
-        ram_block_notify_add(vaddr_base, size);
+        ram_block_notify_add(vaddr_base, size, size);
     }
 
     entry->vaddr_base = vaddr_base;
@@ -452,7 +466,7 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
     }
 
     pentry->next = entry->next;
-    ram_block_notify_remove(entry->vaddr_base, entry->size);
+    ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
     if (munmap(entry->vaddr_base, entry->size) != 0) {
         perror("unmap fails");
         exit(-1);
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 01ae1fb1618..72028449bae 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -27,12 +27,10 @@
 #include "qapi/error.h"
 #include "hw/ide.h"
 #include "hw/pci/pci.h"
-#include "hw/irq.h"
 #include "hw/xen/xen_common.h"
 #include "migration/vmstate.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "trace.h"
-#include "exec/address-spaces.h"
 #include "sysemu/xen.h"
 #include "sysemu/block-backend.h"
 #include "qemu/error-report.h"
diff --git a/hw/ide/Kconfig b/hw/ide/Kconfig
index 5d9106b1ac2..dd85fa3619f 100644
--- a/hw/ide/Kconfig
+++ b/hw/ide/Kconfig
@@ -8,7 +8,7 @@ config IDE_QDEV
 config IDE_PCI
     bool
     depends on PCI
-    select IDE_CORE
+    select IDE_QDEV
 
 config IDE_ISA
     bool
@@ -41,6 +41,7 @@ config IDE_VIA
 config MICRODRIVE
     bool
     select IDE_QDEV
+    depends on PCMCIA
 
 config AHCI
     bool
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f2c51574839..a94c6e26fb0 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1548,7 +1548,7 @@ void ahci_realize(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports)
     for (i = 0; i < s->ports; i++) {
         AHCIDevice *ad = &s->dev[i];
 
-        ide_bus_new(&ad->port, sizeof(ad->port), qdev, i, 1);
+        ide_bus_init(&ad->port, sizeof(ad->port), qdev, i, 1);
         ide_init2(&ad->port, irqs[i]);
 
         ad->hba = s;
diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h
index 7f32e87731d..109de9e2d11 100644
--- a/hw/ide/ahci_internal.h
+++ b/hw/ide/ahci_internal.h
@@ -26,7 +26,6 @@
 
 #include "hw/ide/ahci.h"
 #include "hw/ide/internal.h"
-#include "hw/sysbus.h"
 #include "hw/pci/pci.h"
 
 #define AHCI_MEM_BAR_SIZE         0x1000
diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c
index c2546314855..94c576262c1 100644
--- a/hw/ide/cmd646.c
+++ b/hw/ide/cmd646.c
@@ -293,7 +293,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp)
 
     qdev_init_gpio_in(ds, cmd646_set_irq, 2);
     for (i = 0; i < 2; i++) {
-        ide_bus_new(&d->bus[i], sizeof(d->bus[i]), ds, i, 2);
+        ide_bus_init(&d->bus[i], sizeof(d->bus[i]), ds, i, 2);
         ide_init2(&d->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&d->bus[i], &d->bmdma[i], d);
diff --git a/hw/ide/core.c b/hw/ide/core.c
index fd69ca3167d..e28f8aad611 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -98,8 +98,12 @@ static void put_le16(uint16_t *p, unsigned int v)
 static void ide_identify_size(IDEState *s)
 {
     uint16_t *p = (uint16_t *)s->identify_data;
-    put_le16(p + 60, s->nb_sectors);
-    put_le16(p + 61, s->nb_sectors >> 16);
+    int64_t nb_sectors_lba28 = s->nb_sectors;
+    if (nb_sectors_lba28 >= 1 << 28) {
+        nb_sectors_lba28 = (1 << 28) - 1;
+    }
+    put_le16(p + 60, nb_sectors_lba28);
+    put_le16(p + 61, nb_sectors_lba28 >> 16);
     put_le16(p + 100, s->nb_sectors);
     put_le16(p + 101, s->nb_sectors >> 16);
     put_le16(p + 102, s->nb_sectors >> 32);
diff --git a/hw/ide/ioport.c b/hw/ide/ioport.c
index b613ff3bbaf..e6caa537fa8 100644
--- a/hw/ide/ioport.c
+++ b/hw/ide/ioport.c
@@ -50,15 +50,19 @@ static const MemoryRegionPortio ide_portio2_list[] = {
     PORTIO_END_OF_LIST(),
 };
 
-void ide_init_ioport(IDEBus *bus, ISADevice *dev, int iobase, int iobase2)
+int ide_init_ioport(IDEBus *bus, ISADevice *dev, int iobase, int iobase2)
 {
+    int ret;
+
     /* ??? Assume only ISA and PCI configurations, and that the PCI-ISA
        bridge has been setup properly to always register with ISA.  */
-    isa_register_portio_list(dev, &bus->portio_list,
-                             iobase, ide_portio_list, bus, "ide");
+    ret = isa_register_portio_list(dev, &bus->portio_list,
+                                   iobase, ide_portio_list, bus, "ide");
 
-    if (iobase2) {
-        isa_register_portio_list(dev, &bus->portio2_list,
-                                 iobase2, ide_portio2_list, bus, "ide");
+    if (ret == 0 && iobase2) {
+        ret = isa_register_portio_list(dev, &bus->portio2_list,
+                                       iobase2, ide_portio2_list, bus, "ide");
     }
+
+    return ret;
 }
diff --git a/hw/ide/isa.c b/hw/ide/isa.c
index 6bc19de2265..24bbde24c2b 100644
--- a/hw/ide/isa.c
+++ b/hw/ide/isa.c
@@ -73,7 +73,7 @@ static void isa_ide_realizefn(DeviceState *dev, Error **errp)
     ISADevice *isadev = ISA_DEVICE(dev);
     ISAIDEState *s = ISA_IDE(dev);
 
-    ide_bus_new(&s->bus, sizeof(s->bus), dev, 0, 2);
+    ide_bus_init(&s->bus, sizeof(s->bus), dev, 0, 2);
     ide_init_ioport(&s->bus, isadev, s->iobase, s->iobase2);
     isa_init_irq(isadev, &s->irq, s->isairq);
     ide_init2(&s->bus, s->irq);
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index b270a101632..b03d401ceb5 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -449,7 +449,7 @@ static void macio_ide_initfn(Object *obj)
     SysBusDevice *d = SYS_BUS_DEVICE(obj);
     MACIOIDEState *s = MACIO_IDE(obj);
 
-    ide_bus_new(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2);
+    ide_bus_init(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2);
     memory_region_init_io(&s->mem, obj, &pmac_ide_ops, s, "pmac-ide", 0x1000);
     sysbus_init_mmio(d, &s->mem);
     sysbus_init_irq(d, &s->real_ide_irq);
diff --git a/hw/ide/microdrive.c b/hw/ide/microdrive.c
index 58a14fea363..6df9b4cbbe1 100644
--- a/hw/ide/microdrive.c
+++ b/hw/ide/microdrive.c
@@ -605,7 +605,7 @@ static void microdrive_init(Object *obj)
 {
     MicroDriveState *md = MICRODRIVE(obj);
 
-    ide_bus_new(&md->bus, sizeof(md->bus), DEVICE(obj), 0, 1);
+    ide_bus_init(&md->bus, sizeof(md->bus), DEVICE(obj), 0, 1);
 }
 
 static void microdrive_class_init(ObjectClass *oc, void *data)
diff --git a/hw/ide/mmio.c b/hw/ide/mmio.c
index 36e2f4790ab..fb2ebd4847f 100644
--- a/hw/ide/mmio.c
+++ b/hw/ide/mmio.c
@@ -142,7 +142,7 @@ static void mmio_ide_initfn(Object *obj)
     SysBusDevice *d = SYS_BUS_DEVICE(obj);
     MMIOState *s = MMIO_IDE(obj);
 
-    ide_bus_new(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2);
+    ide_bus_init(&s->bus, sizeof(s->bus), DEVICE(obj), 0, 2);
     sysbus_init_irq(d, &s->irq);
 }
 
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index b9860e35a5c..ce89fd0aa36 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "hw/pci/pci.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
 #include "qemu/module.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
@@ -123,7 +124,8 @@ static void piix_ide_reset(DeviceState *dev)
     pci_conf[0x20] = 0x01; /* BMIBA: 20-23h */
 }
 
-static void pci_piix_init_ports(PCIIDEState *d) {
+static int pci_piix_init_ports(PCIIDEState *d)
+{
     static const struct {
         int iobase;
         int iobase2;
@@ -132,24 +134,30 @@ static void pci_piix_init_ports(PCIIDEState *d) {
         {0x1f0, 0x3f6, 14},
         {0x170, 0x376, 15},
     };
-    int i;
+    int i, ret;
 
     for (i = 0; i < 2; i++) {
-        ide_bus_new(&d->bus[i], sizeof(d->bus[i]), DEVICE(d), i, 2);
-        ide_init_ioport(&d->bus[i], NULL, port_info[i].iobase,
-                        port_info[i].iobase2);
+        ide_bus_init(&d->bus[i], sizeof(d->bus[i]), DEVICE(d), i, 2);
+        ret = ide_init_ioport(&d->bus[i], NULL, port_info[i].iobase,
+                              port_info[i].iobase2);
+        if (ret) {
+            return ret;
+        }
         ide_init2(&d->bus[i], isa_get_irq(NULL, port_info[i].isairq));
 
         bmdma_init(&d->bus[i], &d->bmdma[i], d);
         d->bmdma[i].bus = &d->bus[i];
         ide_register_restart_cb(&d->bus[i]);
     }
+
+    return 0;
 }
 
 static void pci_piix_ide_realize(PCIDevice *dev, Error **errp)
 {
     PCIIDEState *d = PCI_IDE(dev);
     uint8_t *pci_conf = dev->config;
+    int rc;
 
     pci_conf[PCI_CLASS_PROG] = 0x80; // legacy ATA mode
 
@@ -158,7 +166,11 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error **errp)
 
     vmstate_register(VMSTATE_IF(dev), 0, &vmstate_ide_pci, d);
 
-    pci_piix_init_ports(d);
+    rc = pci_piix_init_ports(d);
+    if (rc) {
+        error_setg_errno(errp, -rc, "Failed to realize %s",
+                         object_get_typename(OBJECT(dev)));
+    }
 }
 
 int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index e70ebc83a07..618045b85ac 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -68,10 +68,10 @@ static const TypeInfo ide_bus_info = {
     .class_init = ide_bus_class_init,
 };
 
-void ide_bus_new(IDEBus *idebus, size_t idebus_size, DeviceState *dev,
+void ide_bus_init(IDEBus *idebus, size_t idebus_size, DeviceState *dev,
                  int bus_id, int max_units)
 {
-    qbus_create_inplace(idebus, idebus_size, TYPE_IDE_BUS, dev, NULL);
+    qbus_init(idebus, idebus_size, TYPE_IDE_BUS, dev, NULL);
     idebus->bus_id = bus_id;
     idebus->max_units = max_units;
 }
diff --git a/hw/ide/sii3112.c b/hw/ide/sii3112.c
index 34c347b9c20..46204f10d75 100644
--- a/hw/ide/sii3112.c
+++ b/hw/ide/sii3112.c
@@ -283,7 +283,7 @@ static void sii3112_pci_realize(PCIDevice *dev, Error **errp)
 
     qdev_init_gpio_in(ds, sii3112_set_irq, 2);
     for (i = 0; i < 2; i++) {
-        ide_bus_new(&s->bus[i], sizeof(s->bus[i]), ds, i, 1);
+        ide_bus_init(&s->bus[i], sizeof(s->bus[i]), ds, i, 1);
         ide_init2(&s->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&s->bus[i], &s->bmdma[i], s);
diff --git a/hw/ide/trace-events b/hw/ide/trace-events
index 6e357685f9b..15d7921f156 100644
--- a/hw/ide/trace-events
+++ b/hw/ide/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # core.c
 # portio
diff --git a/hw/ide/via.c b/hw/ide/via.c
index be09912b334..82def819c41 100644
--- a/hw/ide/via.c
+++ b/hw/ide/via.c
@@ -29,7 +29,7 @@
 #include "migration/vmstate.h"
 #include "qemu/module.h"
 #include "sysemu/dma.h"
-
+#include "hw/isa/vt82c686.h"
 #include "hw/ide/pci.h"
 #include "trace.h"
 
@@ -112,7 +112,7 @@ static void via_ide_set_irq(void *opaque, int n, int level)
         d->config[0x70 + n * 8] &= ~0x80;
     }
 
-    qemu_set_irq(isa_get_irq(NULL, 14 + n), level);
+    via_isa_set_irq(pci_get_function_0(d), 14 + n, level);
 }
 
 static void via_ide_reset(DeviceState *dev)
@@ -190,7 +190,7 @@ static void via_ide_realize(PCIDevice *dev, Error **errp)
 
     qdev_init_gpio_in(ds, via_ide_set_irq, 2);
     for (i = 0; i < 2; i++) {
-        ide_bus_new(&d->bus[i], sizeof(d->bus[i]), ds, i, 2);
+        ide_bus_init(&d->bus[i], sizeof(d->bus[i]), ds, i, 2);
         ide_init2(&d->bus[i], qdev_get_gpio_in(ds, i));
 
         bmdma_init(&d->bus[i], &d->bmdma[i], d);
@@ -217,6 +217,9 @@ static void via_ide_class_init(ObjectClass *klass, void *data)
 
     dc->reset = via_ide_reset;
     dc->vmsd = &vmstate_ide_pci;
+    /* Reason: only works as function of VIA southbridge */
+    dc->user_creatable = false;
+
     k->realize = via_ide_realize;
     k->exit = via_ide_exitfn;
     k->vendor_id = PCI_VENDOR_ID_VIA;
diff --git a/hw/input/hid.c b/hw/input/hid.c
index e1d2e460837..8aab0521f40 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -51,8 +51,8 @@ static const uint8_t hid_usage_keys[0x100] = {
     0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
     0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65,
+    0x88, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x8a, 0x00, 0x8b, 0x00, 0x89, 0xe7, 0x65,
 
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
diff --git a/hw/input/lasips2.c b/hw/input/lasips2.c
index 0786e573388..68d741d3421 100644
--- a/hw/input/lasips2.c
+++ b/hw/input/lasips2.c
@@ -24,12 +24,9 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "hw/qdev-properties.h"
-#include "hw/hw.h"
 #include "hw/input/ps2.h"
 #include "hw/input/lasips2.h"
-#include "hw/sysbus.h"
 #include "exec/hwaddr.h"
-#include "sysemu/sysemu.h"
 #include "trace.h"
 #include "exec/address-spaces.h"
 #include "migration/vmstate.h"
@@ -99,7 +96,7 @@ typedef enum {
     LASIPS2_STATUS_CLKSHD = 0x80,
 } lasips2_status_reg_t;
 
-static const char *artist_read_reg_name(uint64_t addr)
+static const char *lasips2_read_reg_name(uint64_t addr)
 {
     switch (addr & 0xc) {
     case REG_PS2_ID:
@@ -119,7 +116,7 @@ static const char *artist_read_reg_name(uint64_t addr)
     }
 }
 
-static const char *artist_write_reg_name(uint64_t addr)
+static const char *lasips2_write_reg_name(uint64_t addr)
 {
     switch (addr & 0x0c) {
     case REG_PS2_RESET:
@@ -148,7 +145,7 @@ static void lasips2_reg_write(void *opaque, hwaddr addr, uint64_t val,
     LASIPS2Port *port = opaque;
 
     trace_lasips2_reg_write(size, port->id, addr,
-                            artist_write_reg_name(addr), val);
+                            lasips2_write_reg_name(addr), val);
 
     switch (addr & 0xc) {
     case REG_PS2_CONTROL:
@@ -242,7 +239,7 @@ static uint64_t lasips2_reg_read(void *opaque, hwaddr addr, unsigned size)
         break;
     }
     trace_lasips2_reg_read(size, port->id, addr,
-                           artist_read_reg_name(addr), ret);
+                           lasips2_read_reg_name(addr), ret);
 
     return ret;
 }
diff --git a/hw/input/lm832x.c b/hw/input/lm832x.c
index 4cb1e9de01f..19a646d9bb4 100644
--- a/hw/input/lm832x.c
+++ b/hw/input/lm832x.c
@@ -19,6 +19,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/input/lm832x.h"
 #include "hw/i2c/i2c.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
@@ -27,7 +28,6 @@
 #include "ui/console.h"
 #include "qom/object.h"
 
-#define TYPE_LM8323 "lm8323"
 OBJECT_DECLARE_SIMPLE_TYPE(LM823KbdState, LM8323)
 
 struct LM823KbdState {
diff --git a/hw/input/meson.build b/hw/input/meson.build
index 0042c3f0dc5..8deb011d4a6 100644
--- a/hw/input/meson.build
+++ b/hw/input/meson.build
@@ -13,7 +13,6 @@ softmmu_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-hid.c')
 softmmu_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host.c'))
 softmmu_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c'))
 
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-softusb.c'))
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_keypad.c'))
 softmmu_ss.add(when: 'CONFIG_TSC210X', if_true: files('tsc210x.c'))
 softmmu_ss.add(when: 'CONFIG_LASIPS2', if_true: files('lasips2.c'))
diff --git a/hw/input/milkymist-softusb.c b/hw/input/milkymist-softusb.c
deleted file mode 100644
index d885c708d7c..00000000000
--- a/hw/input/milkymist-softusb.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- *  QEMU model of the Milkymist SoftUSB block.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   not available yet
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "ui/console.h"
-#include "hw/input/hid.h"
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    R_CTRL = 0,
-    R_MAX
-};
-
-enum {
-    CTRL_RESET = (1<<0),
-};
-
-#define COMLOC_DEBUG_PRODUCE 0x1000
-#define COMLOC_DEBUG_BASE    0x1001
-#define COMLOC_MEVT_PRODUCE  0x1101
-#define COMLOC_MEVT_BASE     0x1102
-#define COMLOC_KEVT_PRODUCE  0x1142
-#define COMLOC_KEVT_BASE     0x1143
-
-#define TYPE_MILKYMIST_SOFTUSB "milkymist-softusb"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistSoftUsbState, MILKYMIST_SOFTUSB)
-
-struct MilkymistSoftUsbState {
-    SysBusDevice parent_obj;
-
-    HIDState hid_kbd;
-    HIDState hid_mouse;
-
-    MemoryRegion regs_region;
-    MemoryRegion pmem;
-    MemoryRegion dmem;
-    qemu_irq irq;
-
-    void *pmem_ptr;
-    void *dmem_ptr;
-
-    /* device properties */
-    uint32_t pmem_size;
-    uint32_t dmem_size;
-
-    /* device registers */
-    uint32_t regs[R_MAX];
-
-    /* mouse state */
-    uint8_t mouse_hid_buffer[4];
-
-    /* keyboard state */
-    uint8_t kbd_hid_buffer[8];
-};
-
-static uint64_t softusb_read(void *opaque, hwaddr addr,
-                             unsigned size)
-{
-    MilkymistSoftUsbState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTRL:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_softusb: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_softusb_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void
-softusb_write(void *opaque, hwaddr addr, uint64_t value,
-              unsigned size)
-{
-    MilkymistSoftUsbState *s = opaque;
-
-    trace_milkymist_softusb_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTRL:
-        s->regs[addr] = value;
-        break;
-
-    default:
-        error_report("milkymist_softusb: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-}
-
-static const MemoryRegionOps softusb_mmio_ops = {
-    .read = softusb_read,
-    .write = softusb_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static inline void softusb_read_dmem(MilkymistSoftUsbState *s,
-        uint32_t offset, uint8_t *buf, uint32_t len)
-{
-    if (offset + len >= s->dmem_size) {
-        error_report("milkymist_softusb: read dmem out of bounds "
-                "at offset 0x%x, len %d", offset, len);
-        memset(buf, 0, len);
-        return;
-    }
-
-    memcpy(buf, s->dmem_ptr + offset, len);
-}
-
-static inline void softusb_write_dmem(MilkymistSoftUsbState *s,
-        uint32_t offset, uint8_t *buf, uint32_t len)
-{
-    if (offset + len >= s->dmem_size) {
-        error_report("milkymist_softusb: write dmem out of bounds "
-                "at offset 0x%x, len %d", offset, len);
-        return;
-    }
-
-    memcpy(s->dmem_ptr + offset, buf, len);
-}
-
-static void softusb_mouse_changed(MilkymistSoftUsbState *s)
-{
-    uint8_t m;
-
-    softusb_read_dmem(s, COMLOC_MEVT_PRODUCE, &m, 1);
-    trace_milkymist_softusb_mevt(m);
-    softusb_write_dmem(s, COMLOC_MEVT_BASE + 4 * m, s->mouse_hid_buffer, 4);
-    m = (m + 1) & 0xf;
-    softusb_write_dmem(s, COMLOC_MEVT_PRODUCE, &m, 1);
-
-    trace_milkymist_softusb_pulse_irq();
-    qemu_irq_pulse(s->irq);
-}
-
-static void softusb_kbd_changed(MilkymistSoftUsbState *s)
-{
-    uint8_t m;
-
-    softusb_read_dmem(s, COMLOC_KEVT_PRODUCE, &m, 1);
-    trace_milkymist_softusb_kevt(m);
-    softusb_write_dmem(s, COMLOC_KEVT_BASE + 8 * m, s->kbd_hid_buffer, 8);
-    m = (m + 1) & 0x7;
-    softusb_write_dmem(s, COMLOC_KEVT_PRODUCE, &m, 1);
-
-    trace_milkymist_softusb_pulse_irq();
-    qemu_irq_pulse(s->irq);
-}
-
-static void softusb_kbd_hid_datain(HIDState *hs)
-{
-    MilkymistSoftUsbState *s = container_of(hs, MilkymistSoftUsbState, hid_kbd);
-    int len;
-
-    /* if device is in reset, do nothing */
-    if (s->regs[R_CTRL] & CTRL_RESET) {
-        return;
-    }
-
-    while (hid_has_events(hs)) {
-        len = hid_keyboard_poll(hs, s->kbd_hid_buffer,
-                sizeof(s->kbd_hid_buffer));
-
-        if (len == 8) {
-            softusb_kbd_changed(s);
-        }
-    }
-}
-
-static void softusb_mouse_hid_datain(HIDState *hs)
-{
-    MilkymistSoftUsbState *s =
-            container_of(hs, MilkymistSoftUsbState, hid_mouse);
-    int len;
-
-    /* if device is in reset, do nothing */
-    if (s->regs[R_CTRL] & CTRL_RESET) {
-        return;
-    }
-
-    while (hid_has_events(hs)) {
-        len = hid_pointer_poll(hs, s->mouse_hid_buffer,
-                sizeof(s->mouse_hid_buffer));
-
-        if (len == 4) {
-            softusb_mouse_changed(s);
-        }
-    }
-}
-
-static void milkymist_softusb_reset(DeviceState *d)
-{
-    MilkymistSoftUsbState *s = MILKYMIST_SOFTUSB(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-    memset(s->kbd_hid_buffer, 0, sizeof(s->kbd_hid_buffer));
-    memset(s->mouse_hid_buffer, 0, sizeof(s->mouse_hid_buffer));
-
-    hid_reset(&s->hid_kbd);
-    hid_reset(&s->hid_mouse);
-
-    /* defaults */
-    s->regs[R_CTRL] = CTRL_RESET;
-}
-
-static void milkymist_softusb_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistSoftUsbState *s = MILKYMIST_SOFTUSB(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    sysbus_init_irq(sbd, &s->irq);
-
-    memory_region_init_io(&s->regs_region, OBJECT(s), &softusb_mmio_ops, s,
-                          "milkymist-softusb", R_MAX * 4);
-    sysbus_init_mmio(sbd, &s->regs_region);
-
-    /* register pmem and dmem */
-    memory_region_init_ram_nomigrate(&s->pmem, OBJECT(s), "milkymist-softusb.pmem",
-                           s->pmem_size, &error_fatal);
-    vmstate_register_ram_global(&s->pmem);
-    s->pmem_ptr = memory_region_get_ram_ptr(&s->pmem);
-    sysbus_init_mmio(sbd, &s->pmem);
-    memory_region_init_ram_nomigrate(&s->dmem, OBJECT(s), "milkymist-softusb.dmem",
-                           s->dmem_size, &error_fatal);
-    vmstate_register_ram_global(&s->dmem);
-    s->dmem_ptr = memory_region_get_ram_ptr(&s->dmem);
-    sysbus_init_mmio(sbd, &s->dmem);
-
-    hid_init(&s->hid_kbd, HID_KEYBOARD, softusb_kbd_hid_datain);
-    hid_init(&s->hid_mouse, HID_MOUSE, softusb_mouse_hid_datain);
-}
-
-static const VMStateDescription vmstate_milkymist_softusb = {
-    .name = "milkymist-softusb",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistSoftUsbState, R_MAX),
-        VMSTATE_HID_KEYBOARD_DEVICE(hid_kbd, MilkymistSoftUsbState),
-        VMSTATE_HID_POINTER_DEVICE(hid_mouse, MilkymistSoftUsbState),
-        VMSTATE_BUFFER(kbd_hid_buffer, MilkymistSoftUsbState),
-        VMSTATE_BUFFER(mouse_hid_buffer, MilkymistSoftUsbState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_softusb_properties[] = {
-    DEFINE_PROP_UINT32("pmem_size", MilkymistSoftUsbState, pmem_size, 0x00001000),
-    DEFINE_PROP_UINT32("dmem_size", MilkymistSoftUsbState, dmem_size, 0x00002000),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_softusb_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_softusb_realize;
-    dc->reset = milkymist_softusb_reset;
-    dc->vmsd = &vmstate_milkymist_softusb;
-    device_class_set_props(dc, milkymist_softusb_properties);
-}
-
-static const TypeInfo milkymist_softusb_info = {
-    .name          = TYPE_MILKYMIST_SOFTUSB,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistSoftUsbState),
-    .class_init    = milkymist_softusb_class_init,
-};
-
-static void milkymist_softusb_register_types(void)
-{
-    type_register_static(&milkymist_softusb_info);
-}
-
-type_init(milkymist_softusb_register_types)
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index dde85ba6c68..baba62f357a 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -23,13 +23,16 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu/log.h"
+#include "qemu/timer.h"
 #include "hw/isa/isa.h"
 #include "migration/vmstate.h"
 #include "hw/acpi/aml-build.h"
 #include "hw/input/ps2.h"
 #include "hw/irq.h"
 #include "hw/input/i8042.h"
+#include "hw/qdev-properties.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 
@@ -59,21 +62,6 @@
 #define KBD_CCMD_RESET          0xFE    /* Pulse bit 0 of the output port P2 = CPU reset. */
 #define KBD_CCMD_NO_OP          0xFF    /* Pulse no bits of the output port P2. */
 
-/* Keyboard Commands */
-#define KBD_CMD_SET_LEDS	0xED	/* Set keyboard leds */
-#define KBD_CMD_ECHO     	0xEE
-#define KBD_CMD_GET_ID 	        0xF2	/* get keyboard ID */
-#define KBD_CMD_SET_RATE	0xF3	/* Set typematic rate */
-#define KBD_CMD_ENABLE		0xF4	/* Enable scanning */
-#define KBD_CMD_RESET_DISABLE	0xF5	/* reset and disable scanning */
-#define KBD_CMD_RESET_ENABLE   	0xF6    /* reset and enable scanning */
-#define KBD_CMD_RESET		0xFF	/* Reset */
-
-/* Keyboard Replies */
-#define KBD_REPLY_POR		0xAA	/* Power on reset */
-#define KBD_REPLY_ACK		0xFA	/* Command ACK */
-#define KBD_REPLY_RESEND	0xFE	/* Command NACK, send the cmd again */
-
 /* Status Register Bits */
 #define KBD_STAT_OBF 		0x01	/* Keyboard output buffer full */
 #define KBD_STAT_IBF 		0x02	/* Keyboard input buffer full */
@@ -106,41 +94,37 @@
  */
 #define KBD_OUT_ONES            0xcc
 
-/* Mouse Commands */
-#define AUX_SET_SCALE11		0xE6	/* Set 1:1 scaling */
-#define AUX_SET_SCALE21		0xE7	/* Set 2:1 scaling */
-#define AUX_SET_RES		0xE8	/* Set resolution */
-#define AUX_GET_SCALE		0xE9	/* Get scaling factor */
-#define AUX_SET_STREAM		0xEA	/* Set stream mode */
-#define AUX_POLL		0xEB	/* Poll */
-#define AUX_RESET_WRAP		0xEC	/* Reset wrap mode */
-#define AUX_SET_WRAP		0xEE	/* Set wrap mode */
-#define AUX_SET_REMOTE		0xF0	/* Set remote mode */
-#define AUX_GET_TYPE		0xF2	/* Get type */
-#define AUX_SET_SAMPLE		0xF3	/* Set sample rate */
-#define AUX_ENABLE_DEV		0xF4	/* Enable aux device */
-#define AUX_DISABLE_DEV		0xF5	/* Disable aux device */
-#define AUX_SET_DEFAULT		0xF6
-#define AUX_RESET		0xFF	/* Reset aux device */
-#define AUX_ACK			0xFA	/* Command byte ACK. */
-
-#define MOUSE_STATUS_REMOTE     0x40
-#define MOUSE_STATUS_ENABLED    0x20
-#define MOUSE_STATUS_SCALE21    0x10
-
-#define KBD_PENDING_KBD         1
-#define KBD_PENDING_AUX         2
+#define KBD_PENDING_KBD_COMPAT  0x01
+#define KBD_PENDING_AUX_COMPAT  0x02
+#define KBD_PENDING_CTRL_KBD    0x04
+#define KBD_PENDING_CTRL_AUX    0x08
+#define KBD_PENDING_KBD         KBD_MODE_DISABLE_KBD    /* 0x10 */
+#define KBD_PENDING_AUX         KBD_MODE_DISABLE_MOUSE  /* 0x20 */
+
+#define KBD_MIGR_TIMER_PENDING  0x1
+
+#define KBD_OBSRC_KBD           0x01
+#define KBD_OBSRC_MOUSE         0x02
+#define KBD_OBSRC_CTRL          0x04
 
 typedef struct KBDState {
     uint8_t write_cmd; /* if non zero, write data to port 60 is expected */
     uint8_t status;
     uint8_t mode;
     uint8_t outport;
+    uint32_t migration_flags;
+    uint32_t obsrc;
     bool outport_present;
+    bool extended_state;
+    bool extended_state_loaded;
     /* Bitmask of devices with data available.  */
     uint8_t pending;
+    uint8_t obdata;
+    uint8_t cbdata;
+    uint8_t pending_tmp;
     void *kbd;
     void *mouse;
+    QEMUTimer *throttle_timer;
 
     qemu_irq irq_kbd;
     qemu_irq irq_mouse;
@@ -148,56 +132,123 @@ typedef struct KBDState {
     hwaddr mask;
 } KBDState;
 
-/* update irq and KBD_STAT_[MOUSE_]OBF */
 /* XXX: not generating the irqs if KBD_MODE_DISABLE_KBD is set may be
    incorrect, but it avoids having to simulate exact delays */
-static void kbd_update_irq(KBDState *s)
+static void kbd_update_irq_lines(KBDState *s)
 {
     int irq_kbd_level, irq_mouse_level;
 
     irq_kbd_level = 0;
     irq_mouse_level = 0;
+
+    if (s->status & KBD_STAT_OBF) {
+        if (s->status & KBD_STAT_MOUSE_OBF) {
+            if (s->mode & KBD_MODE_MOUSE_INT) {
+                irq_mouse_level = 1;
+            }
+        } else {
+            if ((s->mode & KBD_MODE_KBD_INT) &&
+                !(s->mode & KBD_MODE_DISABLE_KBD)) {
+                irq_kbd_level = 1;
+            }
+        }
+    }
+    qemu_set_irq(s->irq_kbd, irq_kbd_level);
+    qemu_set_irq(s->irq_mouse, irq_mouse_level);
+}
+
+static void kbd_deassert_irq(KBDState *s)
+{
+    s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF);
+    s->outport &= ~(KBD_OUT_OBF | KBD_OUT_MOUSE_OBF);
+    kbd_update_irq_lines(s);
+}
+
+static uint8_t kbd_pending(KBDState *s)
+{
+    if (s->extended_state) {
+        return s->pending & (~s->mode | ~(KBD_PENDING_KBD | KBD_PENDING_AUX));
+    } else {
+        return s->pending;
+    }
+}
+
+/* update irq and KBD_STAT_[MOUSE_]OBF */
+static void kbd_update_irq(KBDState *s)
+{
+    uint8_t pending = kbd_pending(s);
+
     s->status &= ~(KBD_STAT_OBF | KBD_STAT_MOUSE_OBF);
     s->outport &= ~(KBD_OUT_OBF | KBD_OUT_MOUSE_OBF);
-    if (s->pending) {
+    if (pending) {
         s->status |= KBD_STAT_OBF;
         s->outport |= KBD_OUT_OBF;
-        /* kbd data takes priority over aux data.  */
-        if (s->pending == KBD_PENDING_AUX) {
+        if (pending & KBD_PENDING_CTRL_KBD) {
+            s->obsrc = KBD_OBSRC_CTRL;
+        } else if (pending & KBD_PENDING_CTRL_AUX) {
             s->status |= KBD_STAT_MOUSE_OBF;
             s->outport |= KBD_OUT_MOUSE_OBF;
-            if (s->mode & KBD_MODE_MOUSE_INT)
-                irq_mouse_level = 1;
+            s->obsrc = KBD_OBSRC_CTRL;
+        } else if (pending & KBD_PENDING_KBD) {
+            s->obsrc = KBD_OBSRC_KBD;
         } else {
-            if ((s->mode & KBD_MODE_KBD_INT) &&
-                !(s->mode & KBD_MODE_DISABLE_KBD))
-                irq_kbd_level = 1;
+            s->status |= KBD_STAT_MOUSE_OBF;
+            s->outport |= KBD_OUT_MOUSE_OBF;
+            s->obsrc = KBD_OBSRC_MOUSE;
         }
     }
-    qemu_set_irq(s->irq_kbd, irq_kbd_level);
-    qemu_set_irq(s->irq_mouse, irq_mouse_level);
+    kbd_update_irq_lines(s);
+}
+
+static void kbd_safe_update_irq(KBDState *s)
+{
+    /*
+     * with KBD_STAT_OBF set, a call to kbd_read_data() will eventually call
+     * kbd_update_irq()
+     */
+    if (s->status & KBD_STAT_OBF) {
+        return;
+    }
+    /* the throttle timer is pending and will call kbd_update_irq() */
+    if (s->throttle_timer && timer_pending(s->throttle_timer)) {
+        return;
+    }
+    if (kbd_pending(s)) {
+        kbd_update_irq(s);
+    }
 }
 
 static void kbd_update_kbd_irq(void *opaque, int level)
 {
-    KBDState *s = (KBDState *)opaque;
+    KBDState *s = opaque;
 
-    if (level)
+    if (level) {
         s->pending |= KBD_PENDING_KBD;
-    else
+    } else {
         s->pending &= ~KBD_PENDING_KBD;
-    kbd_update_irq(s);
+    }
+    kbd_safe_update_irq(s);
 }
 
 static void kbd_update_aux_irq(void *opaque, int level)
 {
-    KBDState *s = (KBDState *)opaque;
+    KBDState *s = opaque;
 
-    if (level)
+    if (level) {
         s->pending |= KBD_PENDING_AUX;
-    else
+    } else {
         s->pending &= ~KBD_PENDING_AUX;
-    kbd_update_irq(s);
+    }
+    kbd_safe_update_irq(s);
+}
+
+static void kbd_throttle_timeout(void *opaque)
+{
+    KBDState *s = opaque;
+
+    if (kbd_pending(s)) {
+        kbd_update_irq(s);
+    }
 }
 
 static uint64_t kbd_read_status(void *opaque, hwaddr addr,
@@ -212,10 +263,25 @@ static uint64_t kbd_read_status(void *opaque, hwaddr addr,
 
 static void kbd_queue(KBDState *s, int b, int aux)
 {
-    if (aux)
-        ps2_queue(s->mouse, b);
-    else
-        ps2_queue(s->kbd, b);
+    if (s->extended_state) {
+        s->cbdata = b;
+        s->pending &= ~KBD_PENDING_CTRL_KBD & ~KBD_PENDING_CTRL_AUX;
+        s->pending |= aux ? KBD_PENDING_CTRL_AUX : KBD_PENDING_CTRL_KBD;
+        kbd_safe_update_irq(s);
+    } else {
+        ps2_queue(aux ? s->mouse : s->kbd, b);
+    }
+}
+
+static uint8_t kbd_dequeue(KBDState *s)
+{
+    uint8_t b = s->cbdata;
+
+    s->pending &= ~KBD_PENDING_CTRL_KBD & ~KBD_PENDING_CTRL_AUX;
+    if (kbd_pending(s)) {
+        kbd_update_irq(s);
+    }
+    return b;
 }
 
 static void outport_write(KBDState *s, uint32_t val)
@@ -265,6 +331,7 @@ static void kbd_write_command(void *opaque, hwaddr addr,
         break;
     case KBD_CCMD_MOUSE_ENABLE:
         s->mode &= ~KBD_MODE_DISABLE_MOUSE;
+        kbd_safe_update_irq(s);
         break;
     case KBD_CCMD_TEST_MOUSE:
         kbd_queue(s, 0x00, 0);
@@ -278,11 +345,10 @@ static void kbd_write_command(void *opaque, hwaddr addr,
         break;
     case KBD_CCMD_KBD_DISABLE:
         s->mode |= KBD_MODE_DISABLE_KBD;
-        kbd_update_irq(s);
         break;
     case KBD_CCMD_KBD_ENABLE:
         s->mode &= ~KBD_MODE_DISABLE_KBD;
-        kbd_update_irq(s);
+        kbd_safe_update_irq(s);
         break;
     case KBD_CCMD_READ_INPORT:
         kbd_queue(s, 0x80, 0);
@@ -315,15 +381,24 @@ static uint64_t kbd_read_data(void *opaque, hwaddr addr,
                               unsigned size)
 {
     KBDState *s = opaque;
-    uint32_t val;
 
-    if (s->pending == KBD_PENDING_AUX)
-        val = ps2_read_data(s->mouse);
-    else
-        val = ps2_read_data(s->kbd);
+    if (s->status & KBD_STAT_OBF) {
+        kbd_deassert_irq(s);
+        if (s->obsrc & KBD_OBSRC_KBD) {
+            if (s->throttle_timer) {
+                timer_mod(s->throttle_timer,
+                          qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + 1000);
+            }
+            s->obdata = ps2_read_data(s->kbd);
+        } else if (s->obsrc & KBD_OBSRC_MOUSE) {
+            s->obdata = ps2_read_data(s->mouse);
+        } else if (s->obsrc & KBD_OBSRC_CTRL) {
+            s->obdata = kbd_dequeue(s);
+        }
+    }
 
-    trace_pckbd_kbd_read_data(val);
-    return val;
+    trace_pckbd_kbd_read_data(s->obdata);
+    return s->obdata;
 }
 
 static void kbd_write_data(void *opaque, hwaddr addr,
@@ -336,12 +411,23 @@ static void kbd_write_data(void *opaque, hwaddr addr,
     switch(s->write_cmd) {
     case 0:
         ps2_write_keyboard(s->kbd, val);
+        /* sending data to the keyboard reenables PS/2 communication */
+        s->mode &= ~KBD_MODE_DISABLE_KBD;
+        kbd_safe_update_irq(s);
         break;
     case KBD_CCMD_WRITE_MODE:
         s->mode = val;
         ps2_keyboard_set_translation(s->kbd, (s->mode & KBD_MODE_KCC) != 0);
-        /* ??? */
-        kbd_update_irq(s);
+        /*
+         * a write to the mode byte interrupt enable flags directly updates
+         * the irq lines
+         */
+        kbd_update_irq_lines(s);
+        /*
+         * a write to the mode byte disable interface flags may raise
+         * an irq if there is pending data in the PS/2 queues.
+         */
+        kbd_safe_update_irq(s);
         break;
     case KBD_CCMD_WRITE_OBUF:
         kbd_queue(s, val, 0);
@@ -354,6 +440,9 @@ static void kbd_write_data(void *opaque, hwaddr addr,
         break;
     case KBD_CCMD_WRITE_MOUSE:
         ps2_write_mouse(s->mouse, val);
+        /* sending data to the mouse reenables PS/2 communication */
+        s->mode &= ~KBD_MODE_DISABLE_MOUSE;
+        kbd_safe_update_irq(s);
         break;
     default:
         break;
@@ -368,7 +457,11 @@ static void kbd_reset(void *opaque)
     s->mode = KBD_MODE_KBD_INT | KBD_MODE_MOUSE_INT;
     s->status = KBD_STAT_CMD | KBD_STAT_UNLOCKED;
     s->outport = KBD_OUT_RESET | KBD_OUT_A20 | KBD_OUT_ONES;
-    s->outport_present = false;
+    s->pending = 0;
+    kbd_deassert_irq(s);
+    if (s->throttle_timer) {
+        timer_del(s->throttle_timer);
+    }
 }
 
 static uint8_t kbd_outport_default(KBDState *s)
@@ -403,13 +496,99 @@ static const VMStateDescription vmstate_kbd_outport = {
     }
 };
 
+static int kbd_extended_state_pre_save(void *opaque)
+{
+    KBDState *s = opaque;
+
+    s->migration_flags = 0;
+    if (s->throttle_timer && timer_pending(s->throttle_timer)) {
+        s->migration_flags |= KBD_MIGR_TIMER_PENDING;
+    }
+
+    return 0;
+}
+
+static int kbd_extended_state_post_load(void *opaque, int version_id)
+{
+    KBDState *s = opaque;
+
+    if (s->migration_flags & KBD_MIGR_TIMER_PENDING) {
+        kbd_throttle_timeout(s);
+    }
+    s->extended_state_loaded = true;
+
+    return 0;
+}
+
+static bool kbd_extended_state_needed(void *opaque)
+{
+    KBDState *s = opaque;
+
+    return s->extended_state;
+}
+
+static const VMStateDescription vmstate_kbd_extended_state = {
+    .name = "pckbd/extended_state",
+    .post_load = kbd_extended_state_post_load,
+    .pre_save = kbd_extended_state_pre_save,
+    .needed = kbd_extended_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(migration_flags, KBDState),
+        VMSTATE_UINT32(obsrc, KBDState),
+        VMSTATE_UINT8(obdata, KBDState),
+        VMSTATE_UINT8(cbdata, KBDState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int kbd_pre_save(void *opaque)
+{
+    KBDState *s = opaque;
+
+    if (s->extended_state) {
+        s->pending_tmp = s->pending;
+    } else {
+        s->pending_tmp = 0;
+        if (s->pending & KBD_PENDING_KBD) {
+            s->pending_tmp |= KBD_PENDING_KBD_COMPAT;
+        }
+        if (s->pending & KBD_PENDING_AUX) {
+            s->pending_tmp |= KBD_PENDING_AUX_COMPAT;
+        }
+    }
+    return 0;
+}
+
+static int kbd_pre_load(void *opaque)
+{
+    KBDState *s = opaque;
+
+    s->outport_present = false;
+    s->extended_state_loaded = false;
+    return 0;
+}
+
 static int kbd_post_load(void *opaque, int version_id)
 {
     KBDState *s = opaque;
     if (!s->outport_present) {
         s->outport = kbd_outport_default(s);
     }
-    s->outport_present = false;
+    s->pending = s->pending_tmp;
+    if (!s->extended_state_loaded) {
+        s->obsrc = s->status & KBD_STAT_OBF ?
+            (s->status & KBD_STAT_MOUSE_OBF ? KBD_OBSRC_MOUSE : KBD_OBSRC_KBD) :
+            0;
+        if (s->pending & KBD_PENDING_KBD_COMPAT) {
+            s->pending |= KBD_PENDING_KBD;
+        }
+        if (s->pending & KBD_PENDING_AUX_COMPAT) {
+            s->pending |= KBD_PENDING_AUX;
+        }
+    }
+    /* clear all unused flags */
+    s->pending &= KBD_PENDING_CTRL_KBD | KBD_PENDING_CTRL_AUX |
+                  KBD_PENDING_KBD | KBD_PENDING_AUX;
     return 0;
 }
 
@@ -417,16 +596,19 @@ static const VMStateDescription vmstate_kbd = {
     .name = "pckbd",
     .version_id = 3,
     .minimum_version_id = 3,
+    .pre_load = kbd_pre_load,
     .post_load = kbd_post_load,
+    .pre_save = kbd_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(write_cmd, KBDState),
         VMSTATE_UINT8(status, KBDState),
         VMSTATE_UINT8(mode, KBDState),
-        VMSTATE_UINT8(pending, KBDState),
+        VMSTATE_UINT8(pending_tmp, KBDState),
         VMSTATE_END_OF_LIST()
     },
     .subsections = (const VMStateDescription*[]) {
         &vmstate_kbd_outport,
+        &vmstate_kbd_extended_state,
         NULL
     }
 };
@@ -472,6 +654,8 @@ void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
     s->irq_mouse = mouse_irq;
     s->mask = mask;
 
+    s->extended_state = true;
+
     vmstate_register(NULL, 0, &vmstate_kbd, s);
 
     memory_region_init_io(region, NULL, &i8042_mmio_ops, s, "i8042", size);
@@ -485,6 +669,7 @@ struct ISAKBDState {
     ISADevice parent_obj;
 
     KBDState kbd;
+    bool kbd_throttle;
     MemoryRegion io[2];
 };
 
@@ -557,6 +742,13 @@ static void i8042_realizefn(DeviceState *dev, Error **errp)
 
     s->kbd = ps2_kbd_init(kbd_update_kbd_irq, s);
     s->mouse = ps2_mouse_init(kbd_update_aux_irq, s);
+    if (isa_s->kbd_throttle && !isa_s->kbd.extended_state) {
+        warn_report(TYPE_I8042 ": can't enable kbd-throttle without"
+                    " extended-state, disabling kbd-throttle");
+    } else if (isa_s->kbd_throttle) {
+        s->throttle_timer = timer_new_us(QEMU_CLOCK_VIRTUAL,
+                                         kbd_throttle_timeout, s);
+    }
     qemu_register_reset(kbd_reset, s);
 }
 
@@ -588,11 +780,18 @@ static void i8042_build_aml(ISADevice *isadev, Aml *scope)
     aml_append(scope, mou);
 }
 
+static Property i8042_properties[] = {
+    DEFINE_PROP_BOOL("extended-state", ISAKBDState, kbd.extended_state, true),
+    DEFINE_PROP_BOOL("kbd-throttle", ISAKBDState, kbd_throttle, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void i8042_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
 
+    device_class_set_props(dc, i8042_properties);
     dc->realize = i8042_realizefn;
     dc->vmsd = &vmstate_kbd_isa;
     isa->build_aml = i8042_build_aml;
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 72cdb80ae1c..9376a8f4ce5 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -74,7 +74,13 @@
 #define MOUSE_STATUS_ENABLED    0x20
 #define MOUSE_STATUS_SCALE21    0x10
 
-#define PS2_QUEUE_SIZE 16  /* Buffer size required by PS/2 protocol */
+/*
+ * PS/2 buffer size. Keep 256 bytes for compatibility with
+ * older QEMU versions.
+ */
+#define PS2_BUFFER_SIZE     256
+#define PS2_QUEUE_SIZE      16  /* Queue size required by PS/2 protocol */
+#define PS2_QUEUE_HEADROOM  8   /* Queue size for keyboard command replies */
 
 /* Bits for 'modifiers' field in PS2KbdState */
 #define MOD_CTRL_L  (1 << 0)
@@ -85,10 +91,8 @@
 #define MOD_ALT_R   (1 << 5)
 
 typedef struct {
-    /* Keep the data array 256 bytes long, which compatibility
-     with older qemu versions. */
-    uint8_t data[256];
-    int rptr, wptr, count;
+    uint8_t data[PS2_BUFFER_SIZE];
+    int rptr, wptr, cwptr, count;
 } PS2Queue;
 
 struct PS2State {
@@ -183,6 +187,7 @@ static void ps2_reset_queue(PS2State *s)
 
     q->rptr = 0;
     q->wptr = 0;
+    q->cwptr = -1;
     q->count = 0;
 }
 
@@ -195,13 +200,14 @@ void ps2_queue_noirq(PS2State *s, int b)
 {
     PS2Queue *q = &s->queue;
 
-    if (q->count == PS2_QUEUE_SIZE) {
+    if (q->count >= PS2_QUEUE_SIZE) {
         return;
     }
 
     q->data[q->wptr] = b;
-    if (++q->wptr == PS2_QUEUE_SIZE)
+    if (++q->wptr == PS2_BUFFER_SIZE) {
         q->wptr = 0;
+    }
     q->count++;
 }
 
@@ -212,8 +218,12 @@ void ps2_raise_irq(PS2State *s)
 
 void ps2_queue(PS2State *s, int b)
 {
+    if (PS2_QUEUE_SIZE - s->queue.count < 1) {
+        return;
+    }
+
     ps2_queue_noirq(s, b);
-    s->update_irq(s->update_arg, 1);
+    ps2_raise_irq(s);
 }
 
 void ps2_queue_2(PS2State *s, int b1, int b2)
@@ -224,7 +234,7 @@ void ps2_queue_2(PS2State *s, int b1, int b2)
 
     ps2_queue_noirq(s, b1);
     ps2_queue_noirq(s, b2);
-    s->update_irq(s->update_arg, 1);
+    ps2_raise_irq(s);
 }
 
 void ps2_queue_3(PS2State *s, int b1, int b2, int b3)
@@ -236,7 +246,7 @@ void ps2_queue_3(PS2State *s, int b1, int b2, int b3)
     ps2_queue_noirq(s, b1);
     ps2_queue_noirq(s, b2);
     ps2_queue_noirq(s, b3);
-    s->update_irq(s->update_arg, 1);
+    ps2_raise_irq(s);
 }
 
 void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4)
@@ -249,7 +259,64 @@ void ps2_queue_4(PS2State *s, int b1, int b2, int b3, int b4)
     ps2_queue_noirq(s, b2);
     ps2_queue_noirq(s, b3);
     ps2_queue_noirq(s, b4);
-    s->update_irq(s->update_arg, 1);
+    ps2_raise_irq(s);
+}
+
+static void ps2_cqueue_data(PS2Queue *q, int b)
+{
+    q->data[q->cwptr] = b;
+    if (++q->cwptr >= PS2_BUFFER_SIZE) {
+        q->cwptr = 0;
+    }
+    q->count++;
+}
+
+static void ps2_cqueue_1(PS2State *s, int b1)
+{
+    PS2Queue *q = &s->queue;
+
+    q->rptr = (q->rptr - 1) & (PS2_BUFFER_SIZE - 1);
+    q->cwptr = q->rptr;
+    ps2_cqueue_data(q, b1);
+    ps2_raise_irq(s);
+}
+
+static void ps2_cqueue_2(PS2State *s, int b1, int b2)
+{
+    PS2Queue *q = &s->queue;
+
+    q->rptr = (q->rptr - 2) & (PS2_BUFFER_SIZE - 1);
+    q->cwptr = q->rptr;
+    ps2_cqueue_data(q, b1);
+    ps2_cqueue_data(q, b2);
+    ps2_raise_irq(s);
+}
+
+static void ps2_cqueue_3(PS2State *s, int b1, int b2, int b3)
+{
+    PS2Queue *q = &s->queue;
+
+    q->rptr = (q->rptr - 3) & (PS2_BUFFER_SIZE - 1);
+    q->cwptr = q->rptr;
+    ps2_cqueue_data(q, b1);
+    ps2_cqueue_data(q, b2);
+    ps2_cqueue_data(q, b3);
+    ps2_raise_irq(s);
+}
+
+static void ps2_cqueue_reset(PS2State *s)
+{
+    PS2Queue *q = &s->queue;
+    int ccount;
+
+    if (q->cwptr == -1) {
+        return;
+    }
+
+    ccount = (q->cwptr - q->rptr) & (PS2_BUFFER_SIZE - 1);
+    q->count -= ccount;
+    q->rptr = q->cwptr;
+    q->cwptr = -1;
 }
 
 /* keycode is the untranslated scancode in the current scancode set. */
@@ -293,7 +360,8 @@ static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src,
     qcode = qemu_input_key_value_to_qcode(key->key);
 
     mod = ps2_modifier_bit(qcode);
-    trace_ps2_keyboard_event(s, qcode, key->down, mod, s->modifiers);
+    trace_ps2_keyboard_event(s, qcode, key->down, mod,
+                             s->modifiers, s->scancode_set, s->translate);
     if (key->down) {
         s->modifiers |= mod;
     } else {
@@ -504,18 +572,26 @@ uint32_t ps2_read_data(PS2State *s)
            (needed for EMM386) */
         /* XXX: need a timer to do things correctly */
         index = q->rptr - 1;
-        if (index < 0)
-            index = PS2_QUEUE_SIZE - 1;
+        if (index < 0) {
+            index = PS2_BUFFER_SIZE - 1;
+        }
         val = q->data[index];
     } else {
         val = q->data[q->rptr];
-        if (++q->rptr == PS2_QUEUE_SIZE)
+        if (++q->rptr == PS2_BUFFER_SIZE) {
             q->rptr = 0;
+        }
         q->count--;
+        if (q->rptr == q->cwptr) {
+            /* command reply queue is empty */
+            q->cwptr = -1;
+        }
         /* reading deasserts IRQ */
         s->update_irq(s->update_arg, 0);
         /* reassert IRQs if data left */
-        s->update_irq(s->update_arg, q->count != 0);
+        if (q->count) {
+            s->update_irq(s->update_arg, 1);
+        }
     }
     return val;
 }
@@ -541,92 +617,83 @@ void ps2_write_keyboard(void *opaque, int val)
     PS2KbdState *s = (PS2KbdState *)opaque;
 
     trace_ps2_write_keyboard(opaque, val);
+    ps2_cqueue_reset(&s->common);
     switch(s->common.write_cmd) {
     default:
     case -1:
         switch(val) {
         case 0x00:
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         case 0x05:
-            ps2_queue(&s->common, KBD_REPLY_RESEND);
+            ps2_cqueue_1(&s->common, KBD_REPLY_RESEND);
             break;
         case KBD_CMD_GET_ID:
             /* We emulate a MF2 AT keyboard here */
-            if (s->translate)
-                ps2_queue_3(&s->common,
-                    KBD_REPLY_ACK,
-                    KBD_REPLY_ID,
-                    0x41);
-            else
-                ps2_queue_3(&s->common,
-                    KBD_REPLY_ACK,
-                    KBD_REPLY_ID,
-                    0x83);
+            ps2_cqueue_3(&s->common, KBD_REPLY_ACK, KBD_REPLY_ID,
+                s->translate ? 0x41 : 0x83);
             break;
         case KBD_CMD_ECHO:
-            ps2_queue(&s->common, KBD_CMD_ECHO);
+            ps2_cqueue_1(&s->common, KBD_CMD_ECHO);
             break;
         case KBD_CMD_ENABLE:
             s->scan_enabled = 1;
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         case KBD_CMD_SCANCODE:
         case KBD_CMD_SET_LEDS:
         case KBD_CMD_SET_RATE:
         case KBD_CMD_SET_MAKE_BREAK:
             s->common.write_cmd = val;
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         case KBD_CMD_RESET_DISABLE:
             ps2_reset_keyboard(s);
             s->scan_enabled = 0;
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         case KBD_CMD_RESET_ENABLE:
             ps2_reset_keyboard(s);
             s->scan_enabled = 1;
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         case KBD_CMD_RESET:
             ps2_reset_keyboard(s);
-            ps2_queue_2(&s->common,
+            ps2_cqueue_2(&s->common,
                 KBD_REPLY_ACK,
                 KBD_REPLY_POR);
             break;
         case KBD_CMD_SET_TYPEMATIC:
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
             break;
         default:
-            ps2_queue(&s->common, KBD_REPLY_RESEND);
+            ps2_cqueue_1(&s->common, KBD_REPLY_RESEND);
             break;
         }
         break;
     case KBD_CMD_SET_MAKE_BREAK:
-        ps2_queue(&s->common, KBD_REPLY_ACK);
+        ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
         s->common.write_cmd = -1;
         break;
     case KBD_CMD_SCANCODE:
         if (val == 0) {
-            if (s->common.queue.count <= PS2_QUEUE_SIZE - 2) {
-                ps2_queue(&s->common, KBD_REPLY_ACK);
-                ps2_put_keycode(s, s->scancode_set);
-            }
+            ps2_cqueue_2(&s->common, KBD_REPLY_ACK, s->translate ?
+                translate_table[s->scancode_set] : s->scancode_set);
         } else if (val >= 1 && val <= 3) {
             s->scancode_set = val;
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
         } else {
-            ps2_queue(&s->common, KBD_REPLY_RESEND);
+            ps2_cqueue_1(&s->common, KBD_REPLY_RESEND);
         }
         s->common.write_cmd = -1;
         break;
     case KBD_CMD_SET_LEDS:
         ps2_set_ledstate(s, val);
-        ps2_queue(&s->common, KBD_REPLY_ACK);
+        ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
         s->common.write_cmd = -1;
         break;
     case KBD_CMD_SET_RATE:
-        ps2_queue(&s->common, KBD_REPLY_ACK);
+        ps2_cqueue_1(&s->common, KBD_REPLY_ACK);
         s->common.write_cmd = -1;
         break;
     }
@@ -645,7 +712,8 @@ void ps2_keyboard_set_translation(void *opaque, int mode)
 
 static int ps2_mouse_send_packet(PS2MouseState *s)
 {
-    const int needed = 3 + (s->mouse_type - 2);
+    /* IMPS/2 and IMEX send 4 bytes, PS2 sends 3 bytes */
+    const int needed = s->mouse_type ? 4 : 3;
     unsigned int b;
     int dx1, dy1, dz1;
 
@@ -918,30 +986,27 @@ static void ps2_common_reset(PS2State *s)
 static void ps2_common_post_load(PS2State *s)
 {
     PS2Queue *q = &s->queue;
-    uint8_t i, size;
-    uint8_t tmp_data[PS2_QUEUE_SIZE];
-
-    /* set the useful data buffer queue size, < PS2_QUEUE_SIZE */
-    size = q->count;
-    if (q->count < 0) {
-        size = 0;
-    } else if (q->count > PS2_QUEUE_SIZE) {
-        size = PS2_QUEUE_SIZE;
-    }
+    int ccount = 0;
 
-    /* move the queue elements to the start of data array */
-    for (i = 0; i < size; i++) {
-        if (q->rptr < 0 || q->rptr >= sizeof(q->data)) {
-            q->rptr = 0;
+    /* limit the number of queued command replies to PS2_QUEUE_HEADROOM */
+    if (q->cwptr != -1) {
+        ccount = (q->cwptr - q->rptr) & (PS2_BUFFER_SIZE - 1);
+        if (ccount > PS2_QUEUE_HEADROOM) {
+            ccount = PS2_QUEUE_HEADROOM;
         }
-        tmp_data[i] = q->data[q->rptr++];
     }
-    memcpy(q->data, tmp_data, size);
 
-    /* reset rptr/wptr/count */
-    q->rptr = 0;
-    q->wptr = (size == PS2_QUEUE_SIZE) ? 0 : size;
-    q->count = size;
+    /* limit the scancode queue size to PS2_QUEUE_SIZE */
+    if (q->count < ccount) {
+        q->count = ccount;
+    } else if (q->count > ccount + PS2_QUEUE_SIZE) {
+        q->count = ccount + PS2_QUEUE_SIZE;
+    }
+
+    /* sanitize rptr and recalculate wptr and cwptr */
+    q->rptr = q->rptr & (PS2_BUFFER_SIZE - 1);
+    q->wptr = (q->rptr + q->count) & (PS2_BUFFER_SIZE - 1);
+    q->cwptr = ccount ? (q->rptr + ccount) & (PS2_BUFFER_SIZE - 1) : -1;
 }
 
 static void ps2_kbd_reset(void *opaque)
@@ -1032,6 +1097,22 @@ static const VMStateDescription vmstate_ps2_keyboard_need_high_bit = {
     }
 };
 
+static bool ps2_keyboard_cqueue_needed(void *opaque)
+{
+    PS2KbdState *s = opaque;
+
+    return s->common.queue.cwptr != -1; /* the queue is mostly empty */
+}
+
+static const VMStateDescription vmstate_ps2_keyboard_cqueue = {
+    .name = "ps2kbd/command_reply_queue",
+    .needed = ps2_keyboard_cqueue_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(common.queue.cwptr, PS2KbdState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static int ps2_kbd_post_load(void* opaque, int version_id)
 {
     PS2KbdState *s = (PS2KbdState*)opaque;
@@ -1045,22 +1126,11 @@ static int ps2_kbd_post_load(void* opaque, int version_id)
     return 0;
 }
 
-static int ps2_kbd_pre_save(void *opaque)
-{
-    PS2KbdState *s = (PS2KbdState *)opaque;
-    PS2State *ps2 = &s->common;
-
-    ps2_common_post_load(ps2);
-
-    return 0;
-}
-
 static const VMStateDescription vmstate_ps2_keyboard = {
     .name = "ps2kbd",
     .version_id = 3,
     .minimum_version_id = 2,
     .post_load = ps2_kbd_post_load,
-    .pre_save = ps2_kbd_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(common, PS2KbdState, 0, vmstate_ps2_common, PS2State),
         VMSTATE_INT32(scan_enabled, PS2KbdState),
@@ -1071,6 +1141,7 @@ static const VMStateDescription vmstate_ps2_keyboard = {
     .subsections = (const VMStateDescription*[]) {
         &vmstate_ps2_keyboard_ledstate,
         &vmstate_ps2_keyboard_need_high_bit,
+        &vmstate_ps2_keyboard_cqueue,
         NULL
     }
 };
@@ -1085,22 +1156,11 @@ static int ps2_mouse_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static int ps2_mouse_pre_save(void *opaque)
-{
-    PS2MouseState *s = (PS2MouseState *)opaque;
-    PS2State *ps2 = &s->common;
-
-    ps2_common_post_load(ps2);
-
-    return 0;
-}
-
 static const VMStateDescription vmstate_ps2_mouse = {
     .name = "ps2mouse",
     .version_id = 2,
     .minimum_version_id = 2,
     .post_load = ps2_mouse_post_load,
-    .pre_save = ps2_mouse_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT(common, PS2MouseState, 0, vmstate_ps2_common, PS2State),
         VMSTATE_UINT8(mouse_status, PS2MouseState),
diff --git a/hw/input/trace-events b/hw/input/trace-events
index 1dd8ad6018a..e0bfe7f3ee4 100644
--- a/hw/input/trace-events
+++ b/hw/input/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # adb-kbd.c
 adb_device_kbd_no_key(void) "Ignoring NO_KEY"
@@ -30,7 +30,7 @@ pckbd_kbd_write_data(uint64_t val) "0x%02"PRIx64
 
 # ps2.c
 ps2_put_keycode(void *opaque, int keycode) "%p keycode 0x%02x"
-ps2_keyboard_event(void *opaque, int qcode, int down, unsigned int modifier, unsigned int modifiers) "%p qcode %d down %d modifier 0x%x modifiers 0x%x"
+ps2_keyboard_event(void *opaque, int qcode, int down, unsigned int modifier, unsigned int modifiers, int set, int xlate) "%p qcode %d down %d modifier 0x%x modifiers 0x%x set %d xlate %d"
 ps2_read_data(void *opaque) "%p"
 ps2_set_ledstate(void *s, int ledstate) "%p ledstate %d"
 ps2_reset_keyboard(void *s) "%p"
@@ -44,13 +44,6 @@ ps2_mouse_reset(void *opaque) "%p"
 ps2_kbd_init(void *s) "%p"
 ps2_mouse_init(void *s) "%p"
 
-# milkymist-softusb.c
-milkymist_softusb_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_softusb_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_softusb_mevt(uint8_t m) "m %d"
-milkymist_softusb_kevt(uint8_t m) "m %d"
-milkymist_softusb_pulse_irq(void) "Pulse IRQ"
-
 # hid.c
 hid_kbd_queue_full(void) "queue full"
 hid_kbd_queue_empty(void) "queue empty"
diff --git a/hw/input/vhost-user-input.c b/hw/input/vhost-user-input.c
index 63984a8ba78..273e96a7b12 100644
--- a/hw/input/vhost-user-input.c
+++ b/hw/input/vhost-user-input.c
@@ -49,13 +49,15 @@ static void vhost_input_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
     VirtIOInput *vinput = VIRTIO_INPUT(vdev);
     VHostUserInput *vhi = VHOST_USER_INPUT(vdev);
+    Error *local_err = NULL;
     int ret;
 
     memset(config_data, 0, vinput->cfg_size);
 
-    ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size);
+    ret = vhost_dev_get_config(&vhi->vhost->dev, config_data, vinput->cfg_size,
+                               &local_err);
     if (ret) {
-        error_report("vhost-user-input: get device config space failed");
+        error_report_err(local_err);
         return;
     }
 }
diff --git a/hw/input/virtio-input-host.c b/hw/input/virtio-input-host.c
index 85daf73f1a8..137efba57b0 100644
--- a/hw/input/virtio-input-host.c
+++ b/hw/input/virtio-input-host.c
@@ -193,13 +193,16 @@ static void virtio_input_host_handle_status(VirtIOInput *vinput,
 {
     VirtIOInputHost *vih = VIRTIO_INPUT_HOST(vinput);
     struct input_event evdev;
+    struct timeval tval;
     int rc;
 
-    if (gettimeofday(&evdev.time, NULL)) {
+    if (gettimeofday(&tval, NULL)) {
         perror("virtio_input_host_handle_status: gettimeofday");
         return;
     }
 
+    evdev.input_event_sec = tval.tv_sec;
+    evdev.input_event_usec = tval.tv_usec;
     evdev.type = le16_to_cpu(event->type);
     evdev.code = le16_to_cpu(event->code);
     evdev.value = le32_to_cpu(event->value);
diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
index f4694088a48..78aed93c454 100644
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -62,7 +62,7 @@ config RX_ICU
 config LOONGSON_LIOINTC
     bool
 
-config SIFIVE_CLINT
+config RISCV_ACLINT
     bool
 
 config SIFIVE_PLIC
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index f4f50f974e6..3df11c34d68 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -17,7 +17,6 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/thread.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/i386/apic.h"
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 97dd96dffaa..2a20982066d 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -22,7 +22,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "qapi/visitor.h"
 #include "hw/i386/apic.h"
 #include "hw/i386/apic_internal.h"
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 9494185cf46..7d2a13273a4 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -22,8 +22,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "cpu.h"
-#include "hw/sysbus.h"
 #include "migration/blocker.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c
index 66eaa971982..9f5f815db9b 100644
--- a/hw/intc/arm_gicv3.c
+++ b/hw/intc/arm_gicv3.c
@@ -18,7 +18,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "hw/sysbus.h"
 #include "hw/intc/arm_gicv3.h"
 #include "gicv3_internal.h"
 
@@ -166,6 +165,16 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs)
         cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq);
     }
 
+    if ((cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) && cs->gic->lpi_enable &&
+        (cs->hpplpi.prio != 0xff)) {
+        if (irqbetter(cs, cs->hpplpi.irq, cs->hpplpi.prio)) {
+            cs->hppi.irq = cs->hpplpi.irq;
+            cs->hppi.prio = cs->hpplpi.prio;
+            cs->hppi.grp = cs->hpplpi.grp;
+            seenbetter = true;
+        }
+    }
+
     /* If the best interrupt we just found would preempt whatever
      * was the previous best interrupt before this update, then
      * we know it's definitely the best one now.
@@ -177,7 +186,9 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs)
      * interrupt has reduced in priority and any other interrupt could
      * now be the new best one).
      */
-    if (!seenbetter && cs->hppi.prio != 0xff && cs->hppi.irq < GIC_INTERNAL) {
+    if (!seenbetter && cs->hppi.prio != 0xff &&
+        (cs->hppi.irq < GIC_INTERNAL ||
+         cs->hppi.irq >= GICV3_LPI_INTID_START)) {
         gicv3_full_update_noirqset(cs->gic);
     }
 }
@@ -340,9 +351,13 @@ static void gicv3_set_irq(void *opaque, int irq, int level)
 
 static void arm_gicv3_post_load(GICv3State *s)
 {
+    int i;
     /* Recalculate our cached idea of the current highest priority
      * pending interrupt, but don't set IRQ or FIQ lines.
      */
+    for (i = 0; i < s->num_cpu; i++) {
+        gicv3_redist_update_lpi_only(&s->cpu[i]);
+    }
     gicv3_full_update_noirqset(s);
     /* Repopulate the cache of GICv3CPUState pointers for target CPUs */
     gicv3_cache_all_target_cpustates(s);
@@ -374,17 +389,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (s->nb_redist_regions != 1) {
-        error_setg(errp, "VGICv3 redist region number(%d) not equal to 1",
-                   s->nb_redist_regions);
-        return;
-    }
-
-    gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops);
 
     gicv3_init_cpuif(s);
 }
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 58ef65f589e..9884d2e39b9 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -250,21 +250,11 @@ static const VMStateDescription vmstate_gicv3 = {
 };
 
 void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
-                              const MemoryRegionOps *ops, Error **errp)
+                              const MemoryRegionOps *ops)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(s);
-    int rdist_capacity = 0;
     int i;
-
-    for (i = 0; i < s->nb_redist_regions; i++) {
-        rdist_capacity += s->redist_region_count[i];
-    }
-    if (rdist_capacity < s->num_cpu) {
-        error_setg(errp, "Capacity of the redist regions(%d) "
-                   "is less than number of vcpus(%d)",
-                   rdist_capacity, s->num_cpu);
-        return;
-    }
+    int cpuidx;
 
     /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU.
      * GPIO array layout is thus:
@@ -293,14 +283,20 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
                           "gicv3_dist", 0x10000);
     sysbus_init_mmio(sbd, &s->iomem_dist);
 
-    s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions);
+    s->redist_regions = g_new0(GICv3RedistRegion, s->nb_redist_regions);
+    cpuidx = 0;
     for (i = 0; i < s->nb_redist_regions; i++) {
         char *name = g_strdup_printf("gicv3_redist_region[%d]", i);
+        GICv3RedistRegion *region = &s->redist_regions[i];
 
-        memory_region_init_io(&s->iomem_redist[i], OBJECT(s),
-                              ops ? &ops[1] : NULL, s, name,
+        region->gic = s;
+        region->cpuidx = cpuidx;
+        cpuidx += s->redist_region_count[i];
+
+        memory_region_init_io(&region->iomem, OBJECT(s),
+                              ops ? &ops[1] : NULL, region, name,
                               s->redist_region_count[i] * GICV3_REDIST_SIZE);
-        sysbus_init_mmio(sbd, &s->iomem_redist[i]);
+        sysbus_init_mmio(sbd, &region->iomem);
         g_free(name);
     }
 }
@@ -308,7 +304,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
 static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = ARM_GICV3_COMMON(dev);
-    int i;
+    int i, rdist_capacity, cpuidx;
 
     /* revision property is actually reserved and currently used only in order
      * to keep the interface compatible with GICv2 code, avoiding extra
@@ -345,12 +341,27 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (s->lpi_enable && !s->dma) {
+        error_setg(errp, "Redist-ITS: Guest 'sysmem' reference link not set");
+        return;
+    }
+
+    rdist_capacity = 0;
+    for (i = 0; i < s->nb_redist_regions; i++) {
+        rdist_capacity += s->redist_region_count[i];
+    }
+    if (rdist_capacity < s->num_cpu) {
+        error_setg(errp, "Capacity of the redist regions(%d) "
+                   "is less than number of vcpus(%d)",
+                   rdist_capacity, s->num_cpu);
+        return;
+    }
+
     s->cpu = g_new0(GICv3CPUState, s->num_cpu);
 
     for (i = 0; i < s->num_cpu; i++) {
         CPUState *cpu = qemu_get_cpu(i);
         uint64_t cpu_affid;
-        int last;
 
         s->cpu[i].cpu = cpu;
         s->cpu[i].gic = s;
@@ -370,7 +381,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
          *  PLPIS == 0 (physical LPIs not supported)
          */
         cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL);
-        last = (i == s->num_cpu - 1);
 
         /* The CPU mp-affinity property is in MPIDR register format; squash
          * the affinity bytes into 32 bits as the GICR_TYPER has them.
@@ -379,8 +389,21 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
                      (cpu_affid & 0xFFFFFF);
         s->cpu[i].gicr_typer = (cpu_affid << 32) |
             (1 << 24) |
-            (i << 8) |
-            (last << 4);
+            (i << 8);
+
+        if (s->lpi_enable) {
+            s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS;
+        }
+    }
+
+    /*
+     * Now go through and set GICR_TYPER.Last for the final
+     * redistributor in each region.
+     */
+    cpuidx = 0;
+    for (i = 0; i < s->nb_redist_regions; i++) {
+        cpuidx += s->redist_region_count[i];
+        s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST;
     }
 }
 
@@ -426,6 +449,7 @@ static void arm_gicv3_common_reset(DeviceState *dev)
         memset(cs->gicr_ipriorityr, 0, sizeof(cs->gicr_ipriorityr));
 
         cs->hppi.prio = 0xff;
+        cs->hpplpi.prio = 0xff;
 
         /* State in the CPU interface must *not* be reset here, because it
          * is part of the CPU's reset domain, not the GIC device's.
@@ -494,9 +518,12 @@ static Property arm_gicv3_common_properties[] = {
     DEFINE_PROP_UINT32("num-cpu", GICv3State, num_cpu, 1),
     DEFINE_PROP_UINT32("num-irq", GICv3State, num_irq, 32),
     DEFINE_PROP_UINT32("revision", GICv3State, revision, 3),
+    DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0),
     DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0),
     DEFINE_PROP_ARRAY("redist-region-count", GICv3State, nb_redist_regions,
                       redist_region_count, qdev_prop_uint32, uint32_t),
+    DEFINE_PROP_LINK("sysmem", GICv3State, dma, TYPE_MEMORY_REGION,
+                     MemoryRegion *),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 43ef1d7a840..85fc369e550 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -14,6 +14,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
+#include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "trace.h"
 #include "gicv3_internal.h"
@@ -350,7 +351,8 @@ static uint32_t maintenance_interrupt_state(GICv3CPUState *cs)
     /* Scan list registers and fill in the U, NP and EOI bits */
     eoi_maintenance_interrupt_state(cs, &value);
 
-    if (cs->ich_hcr_el2 & (ICH_HCR_EL2_LRENPIE | ICH_HCR_EL2_EOICOUNT_MASK)) {
+    if ((cs->ich_hcr_el2 & ICH_HCR_EL2_LRENPIE) &&
+        (cs->ich_hcr_el2 & ICH_HCR_EL2_EOICOUNT_MASK)) {
         value |= ICH_MISR_EL2_LRENP;
     }
 
@@ -416,8 +418,9 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
         }
     }
 
-    if (cs->ich_hcr_el2 & ICH_HCR_EL2_EN) {
-        maintlevel = maintenance_interrupt_state(cs);
+    if ((cs->ich_hcr_el2 & ICH_HCR_EL2_EN) &&
+        maintenance_interrupt_state(cs) != 0) {
+        maintlevel = 1;
     }
 
     trace_gicv3_cpuif_virt_set_irqs(gicv3_redist_affid(cs), fiqlevel,
@@ -651,7 +654,7 @@ static uint64_t icv_iar_read(CPUARMState *env, const ARMCPRegInfo *ri)
 
         if (thisgrp == grp && icv_hppi_can_preempt(cs, lr)) {
             intid = ich_lr_vintid(lr);
-            if (intid < INTID_SECURE) {
+            if (!gicv3_intid_is_special(intid)) {
                 icv_activate_irq(cs, idx, grp);
             } else {
                 /* Interrupt goes from Pending to Invalid */
@@ -898,10 +901,12 @@ static void icc_activate_irq(GICv3CPUState *cs, int irq)
         cs->gicr_iactiver0 = deposit32(cs->gicr_iactiver0, irq, 1, 1);
         cs->gicr_ipendr0 = deposit32(cs->gicr_ipendr0, irq, 1, 0);
         gicv3_redist_update(cs);
-    } else {
+    } else if (irq < GICV3_LPI_INTID_START) {
         gicv3_gicd_active_set(cs->gic, irq);
         gicv3_gicd_pending_clear(cs->gic, irq);
         gicv3_update(cs->gic, irq, 1);
+    } else {
+        gicv3_redist_lpi_pending(cs, irq, 0);
     }
 }
 
@@ -993,7 +998,7 @@ static uint64_t icc_iar0_read(CPUARMState *env, const ARMCPRegInfo *ri)
         intid = icc_hppir0_value(cs, env);
     }
 
-    if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) {
+    if (!gicv3_intid_is_special(intid)) {
         icc_activate_irq(cs, intid);
     }
 
@@ -1016,7 +1021,7 @@ static uint64_t icc_iar1_read(CPUARMState *env, const ARMCPRegInfo *ri)
         intid = icc_hppir1_value(cs, env);
     }
 
-    if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) {
+    if (!gicv3_intid_is_special(intid)) {
         icc_activate_irq(cs, intid);
     }
 
@@ -1226,7 +1231,7 @@ static void icv_dir_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     trace_gicv3_icv_dir_write(gicv3_redist_affid(cs), value);
 
-    if (irq >= cs->gic->num_irq) {
+    if (irq >= GICV3_MAXIRQ) {
         /* Also catches special interrupt numbers and LPIs */
         return;
     }
@@ -1261,8 +1266,7 @@ static void icv_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
     trace_gicv3_icv_eoir_write(ri->crm == 8 ? 0 : 1,
                                gicv3_redist_affid(cs), value);
 
-    if (irq >= cs->gic->num_irq) {
-        /* Also catches special interrupt numbers and LPIs */
+    if (gicv3_intid_is_special(irq)) {
         return;
     }
 
@@ -1307,28 +1311,18 @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
     GICv3CPUState *cs = icc_cs_from_env(env);
     int irq = value & 0xffffff;
     int grp;
+    bool is_eoir0 = ri->crm == 8;
 
-    if (icv_access(env, ri->crm == 8 ? HCR_FMO : HCR_IMO)) {
+    if (icv_access(env, is_eoir0 ? HCR_FMO : HCR_IMO)) {
         icv_eoir_write(env, ri, value);
         return;
     }
 
-    trace_gicv3_icc_eoir_write(ri->crm == 8 ? 0 : 1,
+    trace_gicv3_icc_eoir_write(is_eoir0 ? 0 : 1,
                                gicv3_redist_affid(cs), value);
 
-    if (ri->crm == 8) {
-        /* EOIR0 */
-        grp = GICV3_G0;
-    } else {
-        /* EOIR1 */
-        if (arm_is_secure(env)) {
-            grp = GICV3_G1;
-        } else {
-            grp = GICV3_G1NS;
-        }
-    }
-
-    if (irq >= cs->gic->num_irq) {
+    if ((irq >= cs->gic->num_irq) &&
+        !(cs->gic->lpi_enable && (irq >= GICV3_LPI_INTID_START))) {
         /* This handles two cases:
          * 1. If software writes the ID of a spurious interrupt [ie 1020-1023]
          * to the GICC_EOIR, the GIC ignores that write.
@@ -1340,7 +1334,36 @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
         return;
     }
 
-    if (icc_highest_active_group(cs) != grp) {
+    grp = icc_highest_active_group(cs);
+    switch (grp) {
+    case GICV3_G0:
+        if (!is_eoir0) {
+            return;
+        }
+        if (!(cs->gic->gicd_ctlr & GICD_CTLR_DS)
+            && arm_feature(env, ARM_FEATURE_EL3) && !arm_is_secure(env)) {
+            return;
+        }
+        break;
+    case GICV3_G1:
+        if (is_eoir0) {
+            return;
+        }
+        if (!arm_is_secure(env)) {
+            return;
+        }
+        break;
+    case GICV3_G1NS:
+        if (is_eoir0) {
+            return;
+        }
+        if (!arm_is_el3_or_mon(env) && arm_is_secure(env)) {
+            return;
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: IRQ %d isn't active\n", __func__, irq);
         return;
     }
 
diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c
index b65f56f9035..4164500ea96 100644
--- a/hw/intc/arm_gicv3_dist.c
+++ b/hw/intc/arm_gicv3_dist.c
@@ -262,8 +262,21 @@ static void gicd_write_irouter(GICv3State *s, MemTxAttrs attrs, int irq,
     gicv3_update(s, irq, 1);
 }
 
-static MemTxResult gicd_readb(GICv3State *s, hwaddr offset,
-                              uint64_t *data, MemTxAttrs attrs)
+/**
+ * gicd_readb
+ * gicd_readw
+ * gicd_readl
+ * gicd_readq
+ * gicd_writeb
+ * gicd_writew
+ * gicd_writel
+ * gicd_writeq
+ *
+ * Return %true if the operation succeeded, %false otherwise.
+ */
+
+static bool gicd_readb(GICv3State *s, hwaddr offset,
+                       uint64_t *data, MemTxAttrs attrs)
 {
     /* Most GICv3 distributor registers do not support byte accesses. */
     switch (offset) {
@@ -273,17 +286,17 @@ static MemTxResult gicd_readb(GICv3State *s, hwaddr offset,
         /* This GIC implementation always has affinity routing enabled,
          * so these registers are all RAZ/WI.
          */
-        return MEMTX_OK;
+        return true;
     case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff:
         *data = gicd_read_ipriorityr(s, attrs, offset - GICD_IPRIORITYR);
-        return MEMTX_OK;
+        return true;
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
-static MemTxResult gicd_writeb(GICv3State *s, hwaddr offset,
-                               uint64_t value, MemTxAttrs attrs)
+static bool gicd_writeb(GICv3State *s, hwaddr offset,
+                        uint64_t value, MemTxAttrs attrs)
 {
     /* Most GICv3 distributor registers do not support byte accesses. */
     switch (offset) {
@@ -293,25 +306,25 @@ static MemTxResult gicd_writeb(GICv3State *s, hwaddr offset,
         /* This GIC implementation always has affinity routing enabled,
          * so these registers are all RAZ/WI.
          */
-        return MEMTX_OK;
+        return true;
     case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff:
     {
         int irq = offset - GICD_IPRIORITYR;
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
         gicd_write_ipriorityr(s, attrs, irq, value);
         gicv3_update(s, irq, 1);
-        return MEMTX_OK;
+        return true;
     }
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
-static MemTxResult gicd_readw(GICv3State *s, hwaddr offset,
-                              uint64_t *data, MemTxAttrs attrs)
+static bool gicd_readw(GICv3State *s, hwaddr offset,
+                       uint64_t *data, MemTxAttrs attrs)
 {
     /* Only GICD_SETSPI_NSR, GICD_CLRSPI_NSR, GICD_SETSPI_SR and GICD_SETSPI_NSR
      * support 16 bit accesses, and those registers are all part of the
@@ -319,11 +332,11 @@ static MemTxResult gicd_readw(GICv3State *s, hwaddr offset,
      * implement (ie for us GICD_TYPER.MBIS == 0), so for us they are
      * reserved.
      */
-    return MEMTX_ERROR;
+    return false;
 }
 
-static MemTxResult gicd_writew(GICv3State *s, hwaddr offset,
-                               uint64_t value, MemTxAttrs attrs)
+static bool gicd_writew(GICv3State *s, hwaddr offset,
+                        uint64_t value, MemTxAttrs attrs)
 {
     /* Only GICD_SETSPI_NSR, GICD_CLRSPI_NSR, GICD_SETSPI_SR and GICD_SETSPI_NSR
      * support 16 bit accesses, and those registers are all part of the
@@ -331,11 +344,11 @@ static MemTxResult gicd_writew(GICv3State *s, hwaddr offset,
      * implement (ie for us GICD_TYPER.MBIS == 0), so for us they are
      * reserved.
      */
-    return MEMTX_ERROR;
+    return false;
 }
 
-static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
-                              uint64_t *data, MemTxAttrs attrs)
+static bool gicd_readl(GICv3State *s, hwaddr offset,
+                       uint64_t *data, MemTxAttrs attrs)
 {
     /* Almost all GICv3 distributor registers are 32-bit.
      * Note that WO registers must return an UNKNOWN value on reads,
@@ -363,7 +376,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
         } else {
             *data = s->gicd_ctlr;
         }
-        return MEMTX_OK;
+        return true;
     case GICD_TYPER:
     {
         /* For this implementation:
@@ -371,7 +384,9 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
          * A3V == 1 (non-zero values of Affinity level 3 supported)
          * IDbits == 0xf (we support 16-bit interrupt identifiers)
          * DVIS == 0 (Direct virtual LPI injection not supported)
-         * LPIS == 0 (LPIs not supported)
+         * LPIS == 1 (LPIs are supported if affinity routing is enabled)
+         * num_LPIs == 0b00000 (bits [15:11],Number of LPIs as indicated
+         *                      by GICD_TYPER.IDbits)
          * MBIS == 0 (message-based SPIs not supported)
          * SecurityExtn == 1 if security extns supported
          * CPUNumber == 0 since for us ARE is always 1
@@ -386,62 +401,63 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
         bool sec_extn = !(s->gicd_ctlr & GICD_CTLR_DS);
 
         *data = (1 << 25) | (1 << 24) | (sec_extn << 10) |
+            (s->lpi_enable << GICD_TYPER_LPIS_SHIFT) |
             (0xf << 19) | itlinesnumber;
-        return MEMTX_OK;
+        return true;
     }
     case GICD_IIDR:
         /* We claim to be an ARM r0p0 with a zero ProductID.
          * This is the same as an r0p0 GIC-500.
          */
         *data = gicv3_iidr();
-        return MEMTX_OK;
+        return true;
     case GICD_STATUSR:
         /* RAZ/WI for us (this is an optional register and our implementation
          * does not track RO/WO/reserved violations to report them to the guest)
          */
         *data = 0;
-        return MEMTX_OK;
+        return true;
     case GICD_IGROUPR ... GICD_IGROUPR + 0x7f:
     {
         int irq;
 
         if (!attrs.secure && !(s->gicd_ctlr & GICD_CTLR_DS)) {
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
         /* RAZ/WI for SGIs, PPIs, unimplemented irqs */
         irq = (offset - GICD_IGROUPR) * 8;
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
         *data = *gic_bmp_ptr32(s->group, irq);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_ISENABLER ... GICD_ISENABLER + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->enabled, NULL,
                                      offset - GICD_ISENABLER);
-        return MEMTX_OK;
+        return true;
     case GICD_ICENABLER ... GICD_ICENABLER + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->enabled, NULL,
                                      offset - GICD_ICENABLER);
-        return MEMTX_OK;
+        return true;
     case GICD_ISPENDR ... GICD_ISPENDR + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge1,
                                      offset - GICD_ISPENDR);
-        return MEMTX_OK;
+        return true;
     case GICD_ICPENDR ... GICD_ICPENDR + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge2,
                                      offset - GICD_ICPENDR);
-        return MEMTX_OK;
+        return true;
     case GICD_ISACTIVER ... GICD_ISACTIVER + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->active, mask_nsacr_ge2,
                                      offset - GICD_ISACTIVER);
-        return MEMTX_OK;
+        return true;
     case GICD_ICACTIVER ... GICD_ICACTIVER + 0x7f:
         *data = gicd_read_bitmap_reg(s, attrs, s->active, mask_nsacr_ge2,
                                      offset - GICD_ICACTIVER);
-        return MEMTX_OK;
+        return true;
     case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff:
     {
         int i, irq = offset - GICD_IPRIORITYR;
@@ -452,12 +468,12 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
             value |= gicd_read_ipriorityr(s, attrs, i);
         }
         *data = value;
-        return MEMTX_OK;
+        return true;
     }
     case GICD_ITARGETSR ... GICD_ITARGETSR + 0x3ff:
         /* RAZ/WI since affinity routing is always enabled */
         *data = 0;
-        return MEMTX_OK;
+        return true;
     case GICD_ICFGR ... GICD_ICFGR + 0xff:
     {
         /* Here only the even bits are used; odd bits are RES0 */
@@ -466,7 +482,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
 
         /* Since our edge_trigger bitmap is one bit per irq, we only need
@@ -478,7 +494,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
         value = extract32(value, (irq & 0x1f) ? 16 : 0, 16);
         value = half_shuffle32(value) << 1;
         *data = value;
-        return MEMTX_OK;
+        return true;
     }
     case GICD_IGRPMODR ... GICD_IGRPMODR + 0xff:
     {
@@ -489,16 +505,16 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
              * security enabled and this is an NS access
              */
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
         /* RAZ/WI for SGIs, PPIs, unimplemented irqs */
         irq = (offset - GICD_IGRPMODR) * 8;
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
         *data = *gic_bmp_ptr32(s->grpmod, irq);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_NSACR ... GICD_NSACR + 0xff:
     {
@@ -507,7 +523,7 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
 
         if ((s->gicd_ctlr & GICD_CTLR_DS) || !attrs.secure) {
@@ -515,17 +531,17 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
              * security enabled and this is an NS access
              */
             *data = 0;
-            return MEMTX_OK;
+            return true;
         }
 
         *data = s->gicd_nsacr[irq / 16];
-        return MEMTX_OK;
+        return true;
     }
     case GICD_CPENDSGIR ... GICD_CPENDSGIR + 0xf:
     case GICD_SPENDSGIR ... GICD_SPENDSGIR + 0xf:
         /* RAZ/WI since affinity routing is always enabled */
         *data = 0;
-        return MEMTX_OK;
+        return true;
     case GICD_IROUTER ... GICD_IROUTER + 0x1fdf:
     {
         uint64_t r;
@@ -537,26 +553,26 @@ static MemTxResult gicd_readl(GICv3State *s, hwaddr offset,
         } else {
             *data = (uint32_t)r;
         }
-        return MEMTX_OK;
+        return true;
     }
     case GICD_IDREGS ... GICD_IDREGS + 0x2f:
         /* ID registers */
         *data = gicv3_idreg(offset - GICD_IDREGS);
-        return MEMTX_OK;
+        return true;
     case GICD_SGIR:
         /* WO registers, return unknown value */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest read from WO register at offset "
                       TARGET_FMT_plx "\n", __func__, offset);
         *data = 0;
-        return MEMTX_OK;
+        return true;
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
-static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
-                               uint64_t value, MemTxAttrs attrs)
+static bool gicd_writel(GICv3State *s, hwaddr offset,
+                        uint64_t value, MemTxAttrs attrs)
 {
     /* Almost all GICv3 distributor registers are 32-bit. Note that
      * RO registers must ignore writes, not abort.
@@ -600,68 +616,68 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
             s->gicd_ctlr &= ~(GICD_CTLR_EN_GRP1S | GICD_CTLR_ARE_NS);
         }
         gicv3_full_update(s);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_STATUSR:
         /* RAZ/WI for our implementation */
-        return MEMTX_OK;
+        return true;
     case GICD_IGROUPR ... GICD_IGROUPR + 0x7f:
     {
         int irq;
 
         if (!attrs.secure && !(s->gicd_ctlr & GICD_CTLR_DS)) {
-            return MEMTX_OK;
+            return true;
         }
         /* RAZ/WI for SGIs, PPIs, unimplemented irqs */
         irq = (offset - GICD_IGROUPR) * 8;
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
         *gic_bmp_ptr32(s->group, irq) = value;
         gicv3_update(s, irq, 32);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_ISENABLER ... GICD_ISENABLER + 0x7f:
         gicd_write_set_bitmap_reg(s, attrs, s->enabled, NULL,
                                   offset - GICD_ISENABLER, value);
-        return MEMTX_OK;
+        return true;
     case GICD_ICENABLER ... GICD_ICENABLER + 0x7f:
         gicd_write_clear_bitmap_reg(s, attrs, s->enabled, NULL,
                                     offset - GICD_ICENABLER, value);
-        return MEMTX_OK;
+        return true;
     case GICD_ISPENDR ... GICD_ISPENDR + 0x7f:
         gicd_write_set_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge1,
                                   offset - GICD_ISPENDR, value);
-        return MEMTX_OK;
+        return true;
     case GICD_ICPENDR ... GICD_ICPENDR + 0x7f:
         gicd_write_clear_bitmap_reg(s, attrs, s->pending, mask_nsacr_ge2,
                                     offset - GICD_ICPENDR, value);
-        return MEMTX_OK;
+        return true;
     case GICD_ISACTIVER ... GICD_ISACTIVER + 0x7f:
         gicd_write_set_bitmap_reg(s, attrs, s->active, NULL,
                                   offset - GICD_ISACTIVER, value);
-        return MEMTX_OK;
+        return true;
     case GICD_ICACTIVER ... GICD_ICACTIVER + 0x7f:
         gicd_write_clear_bitmap_reg(s, attrs, s->active, NULL,
                                     offset - GICD_ICACTIVER, value);
-        return MEMTX_OK;
+        return true;
     case GICD_IPRIORITYR ... GICD_IPRIORITYR + 0x3ff:
     {
         int i, irq = offset - GICD_IPRIORITYR;
 
         if (irq < GIC_INTERNAL || irq + 3 >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
 
         for (i = irq; i < irq + 4; i++, value >>= 8) {
             gicd_write_ipriorityr(s, attrs, i, value);
         }
         gicv3_update(s, irq, 4);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_ITARGETSR ... GICD_ITARGETSR + 0x3ff:
         /* RAZ/WI since affinity routing is always enabled */
-        return MEMTX_OK;
+        return true;
     case GICD_ICFGR ... GICD_ICFGR + 0xff:
     {
         /* Here only the odd bits are used; even bits are RES0 */
@@ -669,7 +685,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
         uint32_t mask, oldval;
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
 
         /* Since our edge_trigger bitmap is one bit per irq, our input
@@ -687,7 +703,7 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
         oldval = *gic_bmp_ptr32(s->edge_trigger, (irq & ~0x1f));
         value = (oldval & ~mask) | (value & mask);
         *gic_bmp_ptr32(s->edge_trigger, irq & ~0x1f) = value;
-        return MEMTX_OK;
+        return true;
     }
     case GICD_IGRPMODR ... GICD_IGRPMODR + 0xff:
     {
@@ -697,16 +713,16 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
             /* RAZ/WI if security disabled, or if
              * security enabled and this is an NS access
              */
-            return MEMTX_OK;
+            return true;
         }
         /* RAZ/WI for SGIs, PPIs, unimplemented irqs */
         irq = (offset - GICD_IGRPMODR) * 8;
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
         *gic_bmp_ptr32(s->grpmod, irq) = value;
         gicv3_update(s, irq, 32);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_NSACR ... GICD_NSACR + 0xff:
     {
@@ -714,41 +730,41 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
         int irq = (offset - GICD_NSACR) * 4;
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
 
         if ((s->gicd_ctlr & GICD_CTLR_DS) || !attrs.secure) {
             /* RAZ/WI if security disabled, or if
              * security enabled and this is an NS access
              */
-            return MEMTX_OK;
+            return true;
         }
 
         s->gicd_nsacr[irq / 16] = value;
         /* No update required as this only affects access permission checks */
-        return MEMTX_OK;
+        return true;
     }
     case GICD_SGIR:
         /* RES0 if affinity routing is enabled */
-        return MEMTX_OK;
+        return true;
     case GICD_CPENDSGIR ... GICD_CPENDSGIR + 0xf:
     case GICD_SPENDSGIR ... GICD_SPENDSGIR + 0xf:
         /* RAZ/WI since affinity routing is always enabled */
-        return MEMTX_OK;
+        return true;
     case GICD_IROUTER ... GICD_IROUTER + 0x1fdf:
     {
         uint64_t r;
         int irq = (offset - GICD_IROUTER) / 8;
 
         if (irq < GIC_INTERNAL || irq >= s->num_irq) {
-            return MEMTX_OK;
+            return true;
         }
 
         /* Write half of the 64-bit register */
         r = gicd_read_irouter(s, attrs, irq);
         r = deposit64(r, (offset & 7) ? 32 : 0, 32, value);
         gicd_write_irouter(s, attrs, irq, r);
-        return MEMTX_OK;
+        return true;
     }
     case GICD_IDREGS ... GICD_IDREGS + 0x2f:
     case GICD_TYPER:
@@ -757,14 +773,14 @@ static MemTxResult gicd_writel(GICv3State *s, hwaddr offset,
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest write to RO register at offset "
                       TARGET_FMT_plx "\n", __func__, offset);
-        return MEMTX_OK;
+        return true;
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
-static MemTxResult gicd_writell(GICv3State *s, hwaddr offset,
-                                uint64_t value, MemTxAttrs attrs)
+static bool gicd_writeq(GICv3State *s, hwaddr offset,
+                        uint64_t value, MemTxAttrs attrs)
 {
     /* Our only 64-bit registers are GICD_IROUTER<n> */
     int irq;
@@ -773,14 +789,14 @@ static MemTxResult gicd_writell(GICv3State *s, hwaddr offset,
     case GICD_IROUTER ... GICD_IROUTER + 0x1fdf:
         irq = (offset - GICD_IROUTER) / 8;
         gicd_write_irouter(s, attrs, irq, value);
-        return MEMTX_OK;
+        return true;
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
-static MemTxResult gicd_readll(GICv3State *s, hwaddr offset,
-                               uint64_t *data, MemTxAttrs attrs)
+static bool gicd_readq(GICv3State *s, hwaddr offset,
+                       uint64_t *data, MemTxAttrs attrs)
 {
     /* Our only 64-bit registers are GICD_IROUTER<n> */
     int irq;
@@ -789,9 +805,9 @@ static MemTxResult gicd_readll(GICv3State *s, hwaddr offset,
     case GICD_IROUTER ... GICD_IROUTER + 0x1fdf:
         irq = (offset - GICD_IROUTER) / 8;
         *data = gicd_read_irouter(s, attrs, irq);
-        return MEMTX_OK;
+        return true;
     default:
-        return MEMTX_ERROR;
+        return false;
     }
 }
 
@@ -799,7 +815,7 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data,
                             unsigned size, MemTxAttrs attrs)
 {
     GICv3State *s = (GICv3State *)opaque;
-    MemTxResult r;
+    bool r;
 
     switch (size) {
     case 1:
@@ -812,14 +828,14 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data,
         r = gicd_readl(s, offset, data, attrs);
         break;
     case 8:
-        r = gicd_readll(s, offset, data, attrs);
+        r = gicd_readq(s, offset, data, attrs);
         break;
     default:
-        r = MEMTX_ERROR;
+        r = false;
         break;
     }
 
-    if (r == MEMTX_ERROR) {
+    if (!r) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest read at offset " TARGET_FMT_plx
                       "size %u\n", __func__, offset, size);
@@ -829,19 +845,18 @@ MemTxResult gicv3_dist_read(void *opaque, hwaddr offset, uint64_t *data,
          * trigger the guest-error logging but don't return it to
          * the caller, or we'll cause a spurious guest data abort.
          */
-        r = MEMTX_OK;
         *data = 0;
     } else {
         trace_gicv3_dist_read(offset, *data, size, attrs.secure);
     }
-    return r;
+    return MEMTX_OK;
 }
 
 MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data,
                              unsigned size, MemTxAttrs attrs)
 {
     GICv3State *s = (GICv3State *)opaque;
-    MemTxResult r;
+    bool r;
 
     switch (size) {
     case 1:
@@ -854,14 +869,14 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data,
         r = gicd_writel(s, offset, data, attrs);
         break;
     case 8:
-        r = gicd_writell(s, offset, data, attrs);
+        r = gicd_writeq(s, offset, data, attrs);
         break;
     default:
-        r = MEMTX_ERROR;
+        r = false;
         break;
     }
 
-    if (r == MEMTX_ERROR) {
+    if (!r) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest write at offset " TARGET_FMT_plx
                       "size %u\n", __func__, offset, size);
@@ -871,11 +886,10 @@ MemTxResult gicv3_dist_write(void *opaque, hwaddr offset, uint64_t data,
          * trigger the guest-error logging but don't return it to
          * the caller, or we'll cause a spurious guest data abort.
          */
-        r = MEMTX_OK;
     } else {
         trace_gicv3_dist_write(offset, data, size, attrs.secure);
     }
-    return r;
+    return MEMTX_OK;
 }
 
 void gicv3_dist_set_irq(GICv3State *s, int irq, int level)
diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
new file mode 100644
index 00000000000..c929a9cb5c3
--- /dev/null
+++ b/hw/intc/arm_gicv3_its.c
@@ -0,0 +1,1323 @@
+/*
+ * ITS emulation for a GICv3-based system
+ *
+ * Copyright Linaro.org 2021
+ *
+ * Authors:
+ *  Shashi Mallela <shashi.mallela@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/arm_gicv3_its_common.h"
+#include "gicv3_internal.h"
+#include "qom/object.h"
+#include "qapi/error.h"
+
+typedef struct GICv3ITSClass GICv3ITSClass;
+/* This is reusing the GICv3ITSState typedef from ARM_GICV3_ITS_COMMON */
+DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass,
+                     ARM_GICV3_ITS, TYPE_ARM_GICV3_ITS)
+
+struct GICv3ITSClass {
+    GICv3ITSCommonClass parent_class;
+    void (*parent_reset)(DeviceState *dev);
+};
+
+/*
+ * This is an internal enum used to distinguish between LPI triggered
+ * via command queue and LPI triggered via gits_translater write.
+ */
+typedef enum ItsCmdType {
+    NONE = 0, /* internal indication for GITS_TRANSLATER write */
+    CLEAR = 1,
+    DISCARD = 2,
+    INTERRUPT = 3,
+} ItsCmdType;
+
+typedef struct {
+    uint32_t iteh;
+    uint64_t itel;
+} IteEntry;
+
+static uint64_t baser_base_addr(uint64_t value, uint32_t page_sz)
+{
+    uint64_t result = 0;
+
+    switch (page_sz) {
+    case GITS_PAGE_SIZE_4K:
+    case GITS_PAGE_SIZE_16K:
+        result = FIELD_EX64(value, GITS_BASER, PHYADDR) << 12;
+        break;
+
+    case GITS_PAGE_SIZE_64K:
+        result = FIELD_EX64(value, GITS_BASER, PHYADDRL_64K) << 16;
+        result |= FIELD_EX64(value, GITS_BASER, PHYADDRH_64K) << 48;
+        break;
+
+    default:
+        break;
+    }
+    return result;
+}
+
+static bool get_cte(GICv3ITSState *s, uint16_t icid, uint64_t *cte,
+                    MemTxResult *res)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t l2t_addr;
+    uint64_t value;
+    bool valid_l2t;
+    uint32_t l2t_id;
+    uint32_t max_l2_entries;
+
+    if (s->ct.indirect) {
+        l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
+
+        value = address_space_ldq_le(as,
+                                     s->ct.base_addr +
+                                     (l2t_id * L1TABLE_ENTRY_SIZE),
+                                     MEMTXATTRS_UNSPECIFIED, res);
+
+        if (*res == MEMTX_OK) {
+            valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+            if (valid_l2t) {
+                max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
+
+                l2t_addr = value & ((1ULL << 51) - 1);
+
+                *cte =  address_space_ldq_le(as, l2t_addr +
+                                    ((icid % max_l2_entries) * GITS_CTE_SIZE),
+                                    MEMTXATTRS_UNSPECIFIED, res);
+           }
+       }
+    } else {
+        /* Flat level table */
+        *cte =  address_space_ldq_le(as, s->ct.base_addr +
+                                     (icid * GITS_CTE_SIZE),
+                                      MEMTXATTRS_UNSPECIFIED, res);
+    }
+
+    return (*cte & TABLE_ENTRY_VALID_MASK) != 0;
+}
+
+static bool update_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
+                       IteEntry ite)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t itt_addr;
+    MemTxResult res = MEMTX_OK;
+
+    itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
+    itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
+
+    address_space_stq_le(as, itt_addr + (eventid * (sizeof(uint64_t) +
+                         sizeof(uint32_t))), ite.itel, MEMTXATTRS_UNSPECIFIED,
+                         &res);
+
+    if (res == MEMTX_OK) {
+        address_space_stl_le(as, itt_addr + (eventid * (sizeof(uint64_t) +
+                             sizeof(uint32_t))) + sizeof(uint32_t), ite.iteh,
+                             MEMTXATTRS_UNSPECIFIED, &res);
+    }
+    if (res != MEMTX_OK) {
+        return false;
+    } else {
+        return true;
+    }
+}
+
+static bool get_ite(GICv3ITSState *s, uint32_t eventid, uint64_t dte,
+                    uint16_t *icid, uint32_t *pIntid, MemTxResult *res)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t itt_addr;
+    bool status = false;
+    IteEntry ite = {};
+
+    itt_addr = (dte & GITS_DTE_ITTADDR_MASK) >> GITS_DTE_ITTADDR_SHIFT;
+    itt_addr <<= ITTADDR_SHIFT; /* 256 byte aligned */
+
+    ite.itel = address_space_ldq_le(as, itt_addr +
+                                    (eventid * (sizeof(uint64_t) +
+                                    sizeof(uint32_t))), MEMTXATTRS_UNSPECIFIED,
+                                    res);
+
+    if (*res == MEMTX_OK) {
+        ite.iteh = address_space_ldl_le(as, itt_addr +
+                                        (eventid * (sizeof(uint64_t) +
+                                        sizeof(uint32_t))) + sizeof(uint32_t),
+                                        MEMTXATTRS_UNSPECIFIED, res);
+
+        if (*res == MEMTX_OK) {
+            if (ite.itel & TABLE_ENTRY_VALID_MASK) {
+                if ((ite.itel >> ITE_ENTRY_INTTYPE_SHIFT) &
+                    GITS_TYPE_PHYSICAL) {
+                    *pIntid = (ite.itel & ITE_ENTRY_INTID_MASK) >>
+                               ITE_ENTRY_INTID_SHIFT;
+                    *icid = ite.iteh & ITE_ENTRY_ICID_MASK;
+                    status = true;
+                }
+            }
+        }
+    }
+    return status;
+}
+
+static uint64_t get_dte(GICv3ITSState *s, uint32_t devid, MemTxResult *res)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t l2t_addr;
+    uint64_t value;
+    bool valid_l2t;
+    uint32_t l2t_id;
+    uint32_t max_l2_entries;
+
+    if (s->dt.indirect) {
+        l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE);
+
+        value = address_space_ldq_le(as,
+                                     s->dt.base_addr +
+                                     (l2t_id * L1TABLE_ENTRY_SIZE),
+                                     MEMTXATTRS_UNSPECIFIED, res);
+
+        if (*res == MEMTX_OK) {
+            valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+            if (valid_l2t) {
+                max_l2_entries = s->dt.page_sz / s->dt.entry_sz;
+
+                l2t_addr = value & ((1ULL << 51) - 1);
+
+                value =  address_space_ldq_le(as, l2t_addr +
+                                   ((devid % max_l2_entries) * GITS_DTE_SIZE),
+                                   MEMTXATTRS_UNSPECIFIED, res);
+            }
+        }
+    } else {
+        /* Flat level table */
+        value = address_space_ldq_le(as, s->dt.base_addr +
+                                     (devid * GITS_DTE_SIZE),
+                                     MEMTXATTRS_UNSPECIFIED, res);
+    }
+
+    return value;
+}
+
+/*
+ * This function handles the processing of following commands based on
+ * the ItsCmdType parameter passed:-
+ * 1. triggering of lpi interrupt translation via ITS INT command
+ * 2. triggering of lpi interrupt translation via gits_translater register
+ * 3. handling of ITS CLEAR command
+ * 4. handling of ITS DISCARD command
+ */
+static bool process_its_cmd(GICv3ITSState *s, uint64_t value, uint32_t offset,
+                            ItsCmdType cmd)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint32_t devid, eventid;
+    MemTxResult res = MEMTX_OK;
+    bool dte_valid;
+    uint64_t dte = 0;
+    uint32_t max_eventid;
+    uint16_t icid = 0;
+    uint32_t pIntid = 0;
+    bool ite_valid = false;
+    uint64_t cte = 0;
+    bool cte_valid = false;
+    bool result = false;
+    uint64_t rdbase;
+
+    if (cmd == NONE) {
+        devid = offset;
+    } else {
+        devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
+
+        offset += NUM_BYTES_IN_DW;
+        value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                     MEMTXATTRS_UNSPECIFIED, &res);
+    }
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    eventid = (value & EVENTID_MASK);
+
+    dte = get_dte(s, devid, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+    dte_valid = dte & TABLE_ENTRY_VALID_MASK;
+
+    if (dte_valid) {
+        max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1));
+
+        ite_valid = get_ite(s, eventid, dte, &icid, &pIntid, &res);
+
+        if (res != MEMTX_OK) {
+            return result;
+        }
+
+        if (ite_valid) {
+            cte_valid = get_cte(s, icid, &cte, &res);
+        }
+
+        if (res != MEMTX_OK) {
+            return result;
+        }
+    }
+
+    if ((devid > s->dt.maxids.max_devids) || !dte_valid || !ite_valid ||
+            !cte_valid || (eventid > max_eventid)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid command attributes "
+                      "devid %d or eventid %d or invalid dte %d or"
+                      "invalid cte %d or invalid ite %d\n",
+                      __func__, devid, eventid, dte_valid, cte_valid,
+                      ite_valid);
+        /*
+         * in this implementation, in case of error
+         * we ignore this command and move onto the next
+         * command in the queue
+         */
+    } else {
+        /*
+         * Current implementation only supports rdbase == procnum
+         * Hence rdbase physical address is ignored
+         */
+        rdbase = (cte & GITS_CTE_RDBASE_PROCNUM_MASK) >> 1U;
+
+        if (rdbase > s->gicv3->num_cpu) {
+            return result;
+        }
+
+        if ((cmd == CLEAR) || (cmd == DISCARD)) {
+            gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 0);
+        } else {
+            gicv3_redist_process_lpi(&s->gicv3->cpu[rdbase], pIntid, 1);
+        }
+
+        if (cmd == DISCARD) {
+            IteEntry ite = {};
+            /* remove mapping from interrupt translation table */
+            result = update_ite(s, eventid, dte, ite);
+        }
+    }
+
+    return result;
+}
+
+static bool process_mapti(GICv3ITSState *s, uint64_t value, uint32_t offset,
+                          bool ignore_pInt)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint32_t devid, eventid;
+    uint32_t pIntid = 0;
+    uint32_t max_eventid, max_Intid;
+    bool dte_valid;
+    MemTxResult res = MEMTX_OK;
+    uint16_t icid = 0;
+    uint64_t dte = 0;
+    IteEntry ite;
+    uint32_t int_spurious = INTID_SPURIOUS;
+    bool result = false;
+
+    devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
+    offset += NUM_BYTES_IN_DW;
+    value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                 MEMTXATTRS_UNSPECIFIED, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    eventid = (value & EVENTID_MASK);
+
+    if (!ignore_pInt) {
+        pIntid = ((value & pINTID_MASK) >> pINTID_SHIFT);
+    }
+
+    offset += NUM_BYTES_IN_DW;
+    value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                 MEMTXATTRS_UNSPECIFIED, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    icid = value & ICID_MASK;
+
+    dte = get_dte(s, devid, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+    dte_valid = dte & TABLE_ENTRY_VALID_MASK;
+
+    max_eventid = (1UL << (((dte >> 1U) & SIZE_MASK) + 1));
+
+    if (!ignore_pInt) {
+        max_Intid = (1ULL << (GICD_TYPER_IDBITS + 1)) - 1;
+    }
+
+    if ((devid > s->dt.maxids.max_devids) || (icid > s->ct.maxids.max_collids)
+            || !dte_valid || (eventid > max_eventid) ||
+            (!ignore_pInt && (((pIntid < GICV3_LPI_INTID_START) ||
+            (pIntid > max_Intid)) && (pIntid != INTID_SPURIOUS)))) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid command attributes "
+                      "devid %d or icid %d or eventid %d or pIntid %d or"
+                      "unmapped dte %d\n", __func__, devid, icid, eventid,
+                      pIntid, dte_valid);
+        /*
+         * in this implementation, in case of error
+         * we ignore this command and move onto the next
+         * command in the queue
+         */
+    } else {
+        /* add ite entry to interrupt translation table */
+        ite.itel = (dte_valid & TABLE_ENTRY_VALID_MASK) |
+                    (GITS_TYPE_PHYSICAL << ITE_ENTRY_INTTYPE_SHIFT);
+
+        if (ignore_pInt) {
+            ite.itel |= (eventid << ITE_ENTRY_INTID_SHIFT);
+        } else {
+            ite.itel |= (pIntid << ITE_ENTRY_INTID_SHIFT);
+        }
+        ite.itel |= (int_spurious << ITE_ENTRY_INTSP_SHIFT);
+        ite.iteh = icid;
+
+        result = update_ite(s, eventid, dte, ite);
+    }
+
+    return result;
+}
+
+static bool update_cte(GICv3ITSState *s, uint16_t icid, bool valid,
+                       uint64_t rdbase)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t value;
+    uint64_t l2t_addr;
+    bool valid_l2t;
+    uint32_t l2t_id;
+    uint32_t max_l2_entries;
+    uint64_t cte = 0;
+    MemTxResult res = MEMTX_OK;
+
+    if (!s->ct.valid) {
+        return true;
+    }
+
+    if (valid) {
+        /* add mapping entry to collection table */
+        cte = (valid & TABLE_ENTRY_VALID_MASK) | (rdbase << 1ULL);
+    }
+
+    /*
+     * The specification defines the format of level 1 entries of a
+     * 2-level table, but the format of level 2 entries and the format
+     * of flat-mapped tables is IMPDEF.
+     */
+    if (s->ct.indirect) {
+        l2t_id = icid / (s->ct.page_sz / L1TABLE_ENTRY_SIZE);
+
+        value = address_space_ldq_le(as,
+                                     s->ct.base_addr +
+                                     (l2t_id * L1TABLE_ENTRY_SIZE),
+                                     MEMTXATTRS_UNSPECIFIED, &res);
+
+        if (res != MEMTX_OK) {
+            return false;
+        }
+
+        valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+        if (valid_l2t) {
+            max_l2_entries = s->ct.page_sz / s->ct.entry_sz;
+
+            l2t_addr = value & ((1ULL << 51) - 1);
+
+            address_space_stq_le(as, l2t_addr +
+                                 ((icid % max_l2_entries) * GITS_CTE_SIZE),
+                                 cte, MEMTXATTRS_UNSPECIFIED, &res);
+        }
+    } else {
+        /* Flat level table */
+        address_space_stq_le(as, s->ct.base_addr + (icid * GITS_CTE_SIZE),
+                             cte, MEMTXATTRS_UNSPECIFIED, &res);
+    }
+    if (res != MEMTX_OK) {
+        return false;
+    } else {
+        return true;
+    }
+}
+
+static bool process_mapc(GICv3ITSState *s, uint32_t offset)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint16_t icid;
+    uint64_t rdbase;
+    bool valid;
+    MemTxResult res = MEMTX_OK;
+    bool result = false;
+    uint64_t value;
+
+    offset += NUM_BYTES_IN_DW;
+    offset += NUM_BYTES_IN_DW;
+
+    value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                 MEMTXATTRS_UNSPECIFIED, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    icid = value & ICID_MASK;
+
+    rdbase = (value & R_MAPC_RDBASE_MASK) >> R_MAPC_RDBASE_SHIFT;
+    rdbase &= RDBASE_PROCNUM_MASK;
+
+    valid = (value & CMD_FIELD_VALID_MASK);
+
+    if ((icid > s->ct.maxids.max_collids) || (rdbase > s->gicv3->num_cpu)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "ITS MAPC: invalid collection table attributes "
+                      "icid %d rdbase %" PRIu64 "\n",  icid, rdbase);
+        /*
+         * in this implementation, in case of error
+         * we ignore this command and move onto the next
+         * command in the queue
+         */
+    } else {
+        result = update_cte(s, icid, valid, rdbase);
+    }
+
+    return result;
+}
+
+static bool update_dte(GICv3ITSState *s, uint32_t devid, bool valid,
+                       uint8_t size, uint64_t itt_addr)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint64_t value;
+    uint64_t l2t_addr;
+    bool valid_l2t;
+    uint32_t l2t_id;
+    uint32_t max_l2_entries;
+    uint64_t dte = 0;
+    MemTxResult res = MEMTX_OK;
+
+    if (s->dt.valid) {
+        if (valid) {
+            /* add mapping entry to device table */
+            dte = (valid & TABLE_ENTRY_VALID_MASK) |
+                  ((size & SIZE_MASK) << 1U) |
+                  (itt_addr << GITS_DTE_ITTADDR_SHIFT);
+        }
+    } else {
+        return true;
+    }
+
+    /*
+     * The specification defines the format of level 1 entries of a
+     * 2-level table, but the format of level 2 entries and the format
+     * of flat-mapped tables is IMPDEF.
+     */
+    if (s->dt.indirect) {
+        l2t_id = devid / (s->dt.page_sz / L1TABLE_ENTRY_SIZE);
+
+        value = address_space_ldq_le(as,
+                                     s->dt.base_addr +
+                                     (l2t_id * L1TABLE_ENTRY_SIZE),
+                                     MEMTXATTRS_UNSPECIFIED, &res);
+
+        if (res != MEMTX_OK) {
+            return false;
+        }
+
+        valid_l2t = (value & L2_TABLE_VALID_MASK) != 0;
+
+        if (valid_l2t) {
+            max_l2_entries = s->dt.page_sz / s->dt.entry_sz;
+
+            l2t_addr = value & ((1ULL << 51) - 1);
+
+            address_space_stq_le(as, l2t_addr +
+                                 ((devid % max_l2_entries) * GITS_DTE_SIZE),
+                                 dte, MEMTXATTRS_UNSPECIFIED, &res);
+        }
+    } else {
+        /* Flat level table */
+        address_space_stq_le(as, s->dt.base_addr + (devid * GITS_DTE_SIZE),
+                             dte, MEMTXATTRS_UNSPECIFIED, &res);
+    }
+    if (res != MEMTX_OK) {
+        return false;
+    } else {
+        return true;
+    }
+}
+
+static bool process_mapd(GICv3ITSState *s, uint64_t value, uint32_t offset)
+{
+    AddressSpace *as = &s->gicv3->dma_as;
+    uint32_t devid;
+    uint8_t size;
+    uint64_t itt_addr;
+    bool valid;
+    MemTxResult res = MEMTX_OK;
+    bool result = false;
+
+    devid = ((value & DEVID_MASK) >> DEVID_SHIFT);
+
+    offset += NUM_BYTES_IN_DW;
+    value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                 MEMTXATTRS_UNSPECIFIED, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    size = (value & SIZE_MASK);
+
+    offset += NUM_BYTES_IN_DW;
+    value = address_space_ldq_le(as, s->cq.base_addr + offset,
+                                 MEMTXATTRS_UNSPECIFIED, &res);
+
+    if (res != MEMTX_OK) {
+        return result;
+    }
+
+    itt_addr = (value & ITTADDR_MASK) >> ITTADDR_SHIFT;
+
+    valid = (value & CMD_FIELD_VALID_MASK);
+
+    if ((devid > s->dt.maxids.max_devids) ||
+        (size > FIELD_EX64(s->typer, GITS_TYPER, IDBITS))) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "ITS MAPD: invalid device table attributes "
+                      "devid %d or size %d\n", devid, size);
+        /*
+         * in this implementation, in case of error
+         * we ignore this command and move onto the next
+         * command in the queue
+         */
+    } else {
+        result = update_dte(s, devid, valid, size, itt_addr);
+    }
+
+    return result;
+}
+
+/*
+ * Current implementation blocks until all
+ * commands are processed
+ */
+static void process_cmdq(GICv3ITSState *s)
+{
+    uint32_t wr_offset = 0;
+    uint32_t rd_offset = 0;
+    uint32_t cq_offset = 0;
+    uint64_t data;
+    AddressSpace *as = &s->gicv3->dma_as;
+    MemTxResult res = MEMTX_OK;
+    bool result = true;
+    uint8_t cmd;
+    int i;
+
+    if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+        return;
+    }
+
+    wr_offset = FIELD_EX64(s->cwriter, GITS_CWRITER, OFFSET);
+
+    if (wr_offset > s->cq.max_entries) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid write offset "
+                      "%d\n", __func__, wr_offset);
+        return;
+    }
+
+    rd_offset = FIELD_EX64(s->creadr, GITS_CREADR, OFFSET);
+
+    if (rd_offset > s->cq.max_entries) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid read offset "
+                      "%d\n", __func__, rd_offset);
+        return;
+    }
+
+    while (wr_offset != rd_offset) {
+        cq_offset = (rd_offset * GITS_CMDQ_ENTRY_SIZE);
+        data = address_space_ldq_le(as, s->cq.base_addr + cq_offset,
+                                    MEMTXATTRS_UNSPECIFIED, &res);
+        if (res != MEMTX_OK) {
+            result = false;
+        }
+        cmd = (data & CMD_MASK);
+
+        switch (cmd) {
+        case GITS_CMD_INT:
+            res = process_its_cmd(s, data, cq_offset, INTERRUPT);
+            break;
+        case GITS_CMD_CLEAR:
+            res = process_its_cmd(s, data, cq_offset, CLEAR);
+            break;
+        case GITS_CMD_SYNC:
+            /*
+             * Current implementation makes a blocking synchronous call
+             * for every command issued earlier, hence the internal state
+             * is already consistent by the time SYNC command is executed.
+             * Hence no further processing is required for SYNC command.
+             */
+            break;
+        case GITS_CMD_MAPD:
+            result = process_mapd(s, data, cq_offset);
+            break;
+        case GITS_CMD_MAPC:
+            result = process_mapc(s, cq_offset);
+            break;
+        case GITS_CMD_MAPTI:
+            result = process_mapti(s, data, cq_offset, false);
+            break;
+        case GITS_CMD_MAPI:
+            result = process_mapti(s, data, cq_offset, true);
+            break;
+        case GITS_CMD_DISCARD:
+            result = process_its_cmd(s, data, cq_offset, DISCARD);
+            break;
+        case GITS_CMD_INV:
+        case GITS_CMD_INVALL:
+            /*
+             * Current implementation doesn't cache any ITS tables,
+             * but the calculated lpi priority information. We only
+             * need to trigger lpi priority re-calculation to be in
+             * sync with LPI config table or pending table changes.
+             */
+            for (i = 0; i < s->gicv3->num_cpu; i++) {
+                gicv3_redist_update_lpi(&s->gicv3->cpu[i]);
+            }
+            break;
+        default:
+            break;
+        }
+        if (result) {
+            rd_offset++;
+            rd_offset %= s->cq.max_entries;
+            s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, OFFSET, rd_offset);
+        } else {
+            /*
+             * in this implementation, in case of dma read/write error
+             * we stall the command processing
+             */
+            s->creadr = FIELD_DP64(s->creadr, GITS_CREADR, STALLED, 1);
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: %x cmd processing failed\n", __func__, cmd);
+            break;
+        }
+    }
+}
+
+/*
+ * This function extracts the ITS Device and Collection table specific
+ * parameters (like base_addr, size etc) from GITS_BASER register.
+ * It is called during ITS enable and also during post_load migration
+ */
+static void extract_table_params(GICv3ITSState *s)
+{
+    uint16_t num_pages = 0;
+    uint8_t  page_sz_type;
+    uint8_t type;
+    uint32_t page_sz = 0;
+    uint64_t value;
+
+    for (int i = 0; i < 8; i++) {
+        value = s->baser[i];
+
+        if (!value) {
+            continue;
+        }
+
+        page_sz_type = FIELD_EX64(value, GITS_BASER, PAGESIZE);
+
+        switch (page_sz_type) {
+        case 0:
+            page_sz = GITS_PAGE_SIZE_4K;
+            break;
+
+        case 1:
+            page_sz = GITS_PAGE_SIZE_16K;
+            break;
+
+        case 2:
+        case 3:
+            page_sz = GITS_PAGE_SIZE_64K;
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+
+        num_pages = FIELD_EX64(value, GITS_BASER, SIZE) + 1;
+
+        type = FIELD_EX64(value, GITS_BASER, TYPE);
+
+        switch (type) {
+
+        case GITS_BASER_TYPE_DEVICE:
+            memset(&s->dt, 0 , sizeof(s->dt));
+            s->dt.valid = FIELD_EX64(value, GITS_BASER, VALID);
+
+            if (!s->dt.valid) {
+                return;
+            }
+
+            s->dt.page_sz = page_sz;
+            s->dt.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
+            s->dt.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
+
+            if (!s->dt.indirect) {
+                s->dt.max_entries = (num_pages * page_sz) / s->dt.entry_sz;
+            } else {
+                s->dt.max_entries = (((num_pages * page_sz) /
+                                     L1TABLE_ENTRY_SIZE) *
+                                     (page_sz / s->dt.entry_sz));
+            }
+
+            s->dt.maxids.max_devids = (1UL << (FIELD_EX64(s->typer, GITS_TYPER,
+                                       DEVBITS) + 1));
+
+            s->dt.base_addr = baser_base_addr(value, page_sz);
+
+            break;
+
+        case GITS_BASER_TYPE_COLLECTION:
+            memset(&s->ct, 0 , sizeof(s->ct));
+            s->ct.valid = FIELD_EX64(value, GITS_BASER, VALID);
+
+            /*
+             * GITS_TYPER.HCC is 0 for this implementation
+             * hence writes are discarded if ct.valid is 0
+             */
+            if (!s->ct.valid) {
+                return;
+            }
+
+            s->ct.page_sz = page_sz;
+            s->ct.indirect = FIELD_EX64(value, GITS_BASER, INDIRECT);
+            s->ct.entry_sz = FIELD_EX64(value, GITS_BASER, ENTRYSIZE);
+
+            if (!s->ct.indirect) {
+                s->ct.max_entries = (num_pages * page_sz) / s->ct.entry_sz;
+            } else {
+                s->ct.max_entries = (((num_pages * page_sz) /
+                                     L1TABLE_ENTRY_SIZE) *
+                                     (page_sz / s->ct.entry_sz));
+            }
+
+            if (FIELD_EX64(s->typer, GITS_TYPER, CIL)) {
+                s->ct.maxids.max_collids = (1UL << (FIELD_EX64(s->typer,
+                                            GITS_TYPER, CIDBITS) + 1));
+            } else {
+                /* 16-bit CollectionId supported when CIL == 0 */
+                s->ct.maxids.max_collids = (1UL << 16);
+            }
+
+            s->ct.base_addr = baser_base_addr(value, page_sz);
+
+            break;
+
+        default:
+            break;
+        }
+    }
+}
+
+static void extract_cmdq_params(GICv3ITSState *s)
+{
+    uint16_t num_pages = 0;
+    uint64_t value = s->cbaser;
+
+    num_pages = FIELD_EX64(value, GITS_CBASER, SIZE) + 1;
+
+    memset(&s->cq, 0 , sizeof(s->cq));
+    s->cq.valid = FIELD_EX64(value, GITS_CBASER, VALID);
+
+    if (s->cq.valid) {
+        s->cq.max_entries = (num_pages * GITS_PAGE_SIZE_4K) /
+                             GITS_CMDQ_ENTRY_SIZE;
+        s->cq.base_addr = FIELD_EX64(value, GITS_CBASER, PHYADDR);
+        s->cq.base_addr <<= R_GITS_CBASER_PHYADDR_SHIFT;
+    }
+}
+
+static MemTxResult gicv3_its_translation_write(void *opaque, hwaddr offset,
+                                               uint64_t data, unsigned size,
+                                               MemTxAttrs attrs)
+{
+    GICv3ITSState *s = (GICv3ITSState *)opaque;
+    bool result = true;
+    uint32_t devid = 0;
+
+    switch (offset) {
+    case GITS_TRANSLATER:
+        if (s->ctlr & ITS_CTLR_ENABLED) {
+            devid = attrs.requester_id;
+            result = process_its_cmd(s, data, devid, NONE);
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (result) {
+        return MEMTX_OK;
+    } else {
+        return MEMTX_ERROR;
+    }
+}
+
+static bool its_writel(GICv3ITSState *s, hwaddr offset,
+                              uint64_t value, MemTxAttrs attrs)
+{
+    bool result = true;
+    int index;
+
+    switch (offset) {
+    case GITS_CTLR:
+        if (value & R_GITS_CTLR_ENABLED_MASK) {
+            s->ctlr |= ITS_CTLR_ENABLED;
+            extract_table_params(s);
+            extract_cmdq_params(s);
+            s->creadr = 0;
+            process_cmdq(s);
+        } else {
+            s->ctlr &= ~ITS_CTLR_ENABLED;
+        }
+        break;
+    case GITS_CBASER:
+        /*
+         * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
+         *                 already enabled
+         */
+        if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+            s->cbaser = deposit64(s->cbaser, 0, 32, value);
+            s->creadr = 0;
+            s->cwriter = s->creadr;
+        }
+        break;
+    case GITS_CBASER + 4:
+        /*
+         * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
+         *                 already enabled
+         */
+        if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+            s->cbaser = deposit64(s->cbaser, 32, 32, value);
+            s->creadr = 0;
+            s->cwriter = s->creadr;
+        }
+        break;
+    case GITS_CWRITER:
+        s->cwriter = deposit64(s->cwriter, 0, 32,
+                               (value & ~R_GITS_CWRITER_RETRY_MASK));
+        if (s->cwriter != s->creadr) {
+            process_cmdq(s);
+        }
+        break;
+    case GITS_CWRITER + 4:
+        s->cwriter = deposit64(s->cwriter, 32, 32, value);
+        break;
+    case GITS_CREADR:
+        if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
+            s->creadr = deposit64(s->creadr, 0, 32,
+                                  (value & ~R_GITS_CREADR_STALLED_MASK));
+        } else {
+            /* RO register, ignore the write */
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: invalid guest write to RO register at offset "
+                          TARGET_FMT_plx "\n", __func__, offset);
+        }
+        break;
+    case GITS_CREADR + 4:
+        if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
+            s->creadr = deposit64(s->creadr, 32, 32, value);
+        } else {
+            /* RO register, ignore the write */
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: invalid guest write to RO register at offset "
+                          TARGET_FMT_plx "\n", __func__, offset);
+        }
+        break;
+    case GITS_BASER ... GITS_BASER + 0x3f:
+        /*
+         * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
+         *                 already enabled
+         */
+        if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+            index = (offset - GITS_BASER) / 8;
+
+            if (offset & 7) {
+                value <<= 32;
+                value &= ~GITS_BASER_RO_MASK;
+                s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(0, 32);
+                s->baser[index] |= value;
+            } else {
+                value &= ~GITS_BASER_RO_MASK;
+                s->baser[index] &= GITS_BASER_RO_MASK | MAKE_64BIT_MASK(32, 32);
+                s->baser[index] |= value;
+            }
+        }
+        break;
+    case GITS_IIDR:
+    case GITS_IDREGS ... GITS_IDREGS + 0x2f:
+        /* RO registers, ignore the write */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid guest write to RO register at offset "
+                      TARGET_FMT_plx "\n", __func__, offset);
+        break;
+    default:
+        result = false;
+        break;
+    }
+    return result;
+}
+
+static bool its_readl(GICv3ITSState *s, hwaddr offset,
+                             uint64_t *data, MemTxAttrs attrs)
+{
+    bool result = true;
+    int index;
+
+    switch (offset) {
+    case GITS_CTLR:
+        *data = s->ctlr;
+        break;
+    case GITS_IIDR:
+        *data = gicv3_iidr();
+        break;
+    case GITS_IDREGS ... GITS_IDREGS + 0x2f:
+        /* ID registers */
+        *data = gicv3_idreg(offset - GITS_IDREGS);
+        break;
+    case GITS_TYPER:
+        *data = extract64(s->typer, 0, 32);
+        break;
+    case GITS_TYPER + 4:
+        *data = extract64(s->typer, 32, 32);
+        break;
+    case GITS_CBASER:
+        *data = extract64(s->cbaser, 0, 32);
+        break;
+    case GITS_CBASER + 4:
+        *data = extract64(s->cbaser, 32, 32);
+        break;
+    case GITS_CREADR:
+        *data = extract64(s->creadr, 0, 32);
+        break;
+    case GITS_CREADR + 4:
+        *data = extract64(s->creadr, 32, 32);
+        break;
+    case GITS_CWRITER:
+        *data = extract64(s->cwriter, 0, 32);
+        break;
+    case GITS_CWRITER + 4:
+        *data = extract64(s->cwriter, 32, 32);
+        break;
+    case GITS_BASER ... GITS_BASER + 0x3f:
+        index = (offset - GITS_BASER) / 8;
+        if (offset & 7) {
+            *data = extract64(s->baser[index], 32, 32);
+        } else {
+            *data = extract64(s->baser[index], 0, 32);
+        }
+        break;
+    default:
+        result = false;
+        break;
+    }
+    return result;
+}
+
+static bool its_writell(GICv3ITSState *s, hwaddr offset,
+                               uint64_t value, MemTxAttrs attrs)
+{
+    bool result = true;
+    int index;
+
+    switch (offset) {
+    case GITS_BASER ... GITS_BASER + 0x3f:
+        /*
+         * IMPDEF choice:- GITS_BASERn register becomes RO if ITS is
+         *                 already enabled
+         */
+        if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+            index = (offset - GITS_BASER) / 8;
+            s->baser[index] &= GITS_BASER_RO_MASK;
+            s->baser[index] |= (value & ~GITS_BASER_RO_MASK);
+        }
+        break;
+    case GITS_CBASER:
+        /*
+         * IMPDEF choice:- GITS_CBASER register becomes RO if ITS is
+         *                 already enabled
+         */
+        if (!(s->ctlr & ITS_CTLR_ENABLED)) {
+            s->cbaser = value;
+            s->creadr = 0;
+            s->cwriter = s->creadr;
+        }
+        break;
+    case GITS_CWRITER:
+        s->cwriter = value & ~R_GITS_CWRITER_RETRY_MASK;
+        if (s->cwriter != s->creadr) {
+            process_cmdq(s);
+        }
+        break;
+    case GITS_CREADR:
+        if (s->gicv3->gicd_ctlr & GICD_CTLR_DS) {
+            s->creadr = value & ~R_GITS_CREADR_STALLED_MASK;
+        } else {
+            /* RO register, ignore the write */
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: invalid guest write to RO register at offset "
+                          TARGET_FMT_plx "\n", __func__, offset);
+        }
+        break;
+    case GITS_TYPER:
+        /* RO registers, ignore the write */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid guest write to RO register at offset "
+                      TARGET_FMT_plx "\n", __func__, offset);
+        break;
+    default:
+        result = false;
+        break;
+    }
+    return result;
+}
+
+static bool its_readll(GICv3ITSState *s, hwaddr offset,
+                              uint64_t *data, MemTxAttrs attrs)
+{
+    bool result = true;
+    int index;
+
+    switch (offset) {
+    case GITS_TYPER:
+        *data = s->typer;
+        break;
+    case GITS_BASER ... GITS_BASER + 0x3f:
+        index = (offset - GITS_BASER) / 8;
+        *data = s->baser[index];
+        break;
+    case GITS_CBASER:
+        *data = s->cbaser;
+        break;
+    case GITS_CREADR:
+        *data = s->creadr;
+        break;
+    case GITS_CWRITER:
+        *data = s->cwriter;
+        break;
+    default:
+        result = false;
+        break;
+    }
+    return result;
+}
+
+static MemTxResult gicv3_its_read(void *opaque, hwaddr offset, uint64_t *data,
+                                  unsigned size, MemTxAttrs attrs)
+{
+    GICv3ITSState *s = (GICv3ITSState *)opaque;
+    bool result;
+
+    switch (size) {
+    case 4:
+        result = its_readl(s, offset, data, attrs);
+        break;
+    case 8:
+        result = its_readll(s, offset, data, attrs);
+        break;
+    default:
+        result = false;
+        break;
+    }
+
+    if (!result) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid guest read at offset " TARGET_FMT_plx
+                      "size %u\n", __func__, offset, size);
+        /*
+         * The spec requires that reserved registers are RAZ/WI;
+         * so use false returns from leaf functions as a way to
+         * trigger the guest-error logging but don't return it to
+         * the caller, or we'll cause a spurious guest data abort.
+         */
+        *data = 0;
+    }
+    return MEMTX_OK;
+}
+
+static MemTxResult gicv3_its_write(void *opaque, hwaddr offset, uint64_t data,
+                                   unsigned size, MemTxAttrs attrs)
+{
+    GICv3ITSState *s = (GICv3ITSState *)opaque;
+    bool result;
+
+    switch (size) {
+    case 4:
+        result = its_writel(s, offset, data, attrs);
+        break;
+    case 8:
+        result = its_writell(s, offset, data, attrs);
+        break;
+    default:
+        result = false;
+        break;
+    }
+
+    if (!result) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid guest write at offset " TARGET_FMT_plx
+                      "size %u\n", __func__, offset, size);
+        /*
+         * The spec requires that reserved registers are RAZ/WI;
+         * so use false returns from leaf functions as a way to
+         * trigger the guest-error logging but don't return it to
+         * the caller, or we'll cause a spurious guest data abort.
+         */
+    }
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps gicv3_its_control_ops = {
+    .read_with_attrs = gicv3_its_read,
+    .write_with_attrs = gicv3_its_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const MemoryRegionOps gicv3_its_translation_ops = {
+    .write_with_attrs = gicv3_its_translation_write,
+    .valid.min_access_size = 2,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
+{
+    GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
+    int i;
+
+    for (i = 0; i < s->gicv3->num_cpu; i++) {
+        if (!(s->gicv3->cpu[i].gicr_typer & GICR_TYPER_PLPIS)) {
+            error_setg(errp, "Physical LPI not supported by CPU %d", i);
+            return;
+        }
+    }
+
+    gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
+
+    address_space_init(&s->gicv3->dma_as, s->gicv3->dma,
+                       "gicv3-its-sysmem");
+
+    /* set the ITS default features supported */
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL,
+                          GITS_TYPE_PHYSICAL);
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE,
+                          ITS_ITT_ENTRY_SIZE - 1);
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS);
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS);
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1);
+    s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIDBITS, ITS_CIDBITS);
+}
+
+static void gicv3_its_reset(DeviceState *dev)
+{
+    GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
+    GICv3ITSClass *c = ARM_GICV3_ITS_GET_CLASS(s);
+
+    c->parent_reset(dev);
+
+    /* Quiescent bit reset to 1 */
+    s->ctlr = FIELD_DP32(s->ctlr, GITS_CTLR, QUIESCENT, 1);
+
+    /*
+     * setting GITS_BASER0.Type = 0b001 (Device)
+     *         GITS_BASER1.Type = 0b100 (Collection Table)
+     *         GITS_BASER<n>.Type,where n = 3 to 7 are 0b00 (Unimplemented)
+     *         GITS_BASER<0,1>.Page_Size = 64KB
+     * and default translation table entry size to 16 bytes
+     */
+    s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, TYPE,
+                             GITS_BASER_TYPE_DEVICE);
+    s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, PAGESIZE,
+                             GITS_BASER_PAGESIZE_64K);
+    s->baser[0] = FIELD_DP64(s->baser[0], GITS_BASER, ENTRYSIZE,
+                             GITS_DTE_SIZE - 1);
+
+    s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, TYPE,
+                             GITS_BASER_TYPE_COLLECTION);
+    s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, PAGESIZE,
+                             GITS_BASER_PAGESIZE_64K);
+    s->baser[1] = FIELD_DP64(s->baser[1], GITS_BASER, ENTRYSIZE,
+                             GITS_CTE_SIZE - 1);
+}
+
+static void gicv3_its_post_load(GICv3ITSState *s)
+{
+    if (s->ctlr & ITS_CTLR_ENABLED) {
+        extract_table_params(s);
+        extract_cmdq_params(s);
+    }
+}
+
+static Property gicv3_its_props[] = {
+    DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
+                     GICv3State *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void gicv3_its_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    GICv3ITSClass *ic = ARM_GICV3_ITS_CLASS(klass);
+    GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass);
+
+    dc->realize = gicv3_arm_its_realize;
+    device_class_set_props(dc, gicv3_its_props);
+    device_class_set_parent_reset(dc, gicv3_its_reset, &ic->parent_reset);
+    icc->post_load = gicv3_its_post_load;
+}
+
+static const TypeInfo gicv3_its_info = {
+    .name = TYPE_ARM_GICV3_ITS,
+    .parent = TYPE_ARM_GICV3_ITS_COMMON,
+    .instance_size = sizeof(GICv3ITSState),
+    .class_init = gicv3_its_class_init,
+    .class_size = sizeof(GICv3ITSClass),
+};
+
+static void gicv3_its_register_types(void)
+{
+    type_register_static(&gicv3_its_info);
+}
+
+type_init(gicv3_its_register_types)
diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
index 66c4c6a1888..90b85f1e25c 100644
--- a/hw/intc/arm_gicv3_its_common.c
+++ b/hw/intc/arm_gicv3_its_common.c
@@ -99,14 +99,15 @@ static const MemoryRegionOps gicv3_its_trans_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops)
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops,
+                         const MemoryRegionOps *tops)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(s);
 
     memory_region_init_io(&s->iomem_its_cntrl, OBJECT(s), ops, s,
                           "control", ITS_CONTROL_SIZE);
     memory_region_init_io(&s->iomem_its_translation, OBJECT(s),
-                          &gicv3_its_trans_ops, s,
+                          tops ? tops : &gicv3_its_trans_ops, s,
                           "translation", ITS_TRANS_SIZE);
 
     /* Our two regions are always adjacent, therefore we now combine them
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
index b554d2ede0a..0b4cbed28b3 100644
--- a/hw/intc/arm_gicv3_its_kvm.c
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -106,7 +106,7 @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp)
     kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
                             KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd, 0);
 
-    gicv3_its_init_mmio(s, NULL);
+    gicv3_its_init_mmio(s, NULL, NULL);
 
     if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
         GITS_CTLR)) {
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 65a4c880a35..5ec5ff9ef6e 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -22,7 +22,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
-#include "hw/sysbus.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/kvm.h"
@@ -788,11 +787,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
+    gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
     for (i = 0; i < s->num_cpu; i++) {
         ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
@@ -830,7 +825,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
                             KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0);
 
     if (!multiple_redist_region_allowed) {
-        kvm_arm_register_device(&s->iomem_redist[0], -1,
+        kvm_arm_register_device(&s->redist_regions[0].iomem, -1,
                                 KVM_DEV_ARM_VGIC_GRP_ADDR,
                                 KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0);
     } else {
@@ -843,7 +838,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
             uint64_t addr_ormask =
                         i | ((uint64_t)s->redist_region_count[i] << 52);
 
-            kvm_arm_register_device(&s->iomem_redist[i], -1,
+            kvm_arm_register_device(&s->redist_regions[i].iomem, -1,
                                     KVM_DEV_ARM_VGIC_GRP_ADDR,
                                     KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
                                     s->dev_fd, addr_ormask);
diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c
index 8645220d618..c8ff3eca085 100644
--- a/hw/intc/arm_gicv3_redist.c
+++ b/hw/intc/arm_gicv3_redist.c
@@ -248,10 +248,20 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset,
     case GICR_CTLR:
         /* For our implementation, GICR_TYPER.DPGS is 0 and so all
          * the DPG bits are RAZ/WI. We don't do anything asynchronously,
-         * so UWP and RWP are RAZ/WI. And GICR_TYPER.LPIS is 0 (we don't
-         * implement LPIs) so Enable_LPIs is RES0. So there are no writable
-         * bits for us.
+         * so UWP and RWP are RAZ/WI. GICR_TYPER.LPIS is 1 (we
+         * implement LPIs) so Enable_LPIs is programmable.
          */
+        if (cs->gicr_typer & GICR_TYPER_PLPIS) {
+            if (value & GICR_CTLR_ENABLE_LPIS) {
+                cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS;
+                /* Check for any pending interr in pending table */
+                gicv3_redist_update_lpi(cs);
+            } else {
+                cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS;
+                /* cs->hppi might have been an LPI; recalculate */
+                gicv3_redist_update(cs);
+            }
+        }
         return MEMTX_OK;
     case GICR_STATUSR:
         /* RAZ/WI for our implementation */
@@ -416,22 +426,24 @@ static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset,
 MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
                               unsigned size, MemTxAttrs attrs)
 {
-    GICv3State *s = opaque;
+    GICv3RedistRegion *region = opaque;
+    GICv3State *s = region->gic;
     GICv3CPUState *cs;
     MemTxResult r;
     int cpuidx;
 
     assert((offset & (size - 1)) == 0);
 
-    /* This region covers all the redistributor pages; there are
-     * (for GICv3) two 64K pages per CPU. At the moment they are
-     * all contiguous (ie in this one region), though we might later
-     * want to allow splitting of redistributor pages into several
-     * blocks so we can support more CPUs.
+    /*
+     * There are (for GICv3) two 64K redistributor pages per CPU.
+     * In some cases the redistributor pages for all CPUs are not
+     * contiguous (eg on the virt board they are split into two
+     * parts if there are too many CPUs to all fit in the same place
+     * in the memory map); if so then the GIC has multiple MemoryRegions
+     * for the redistributors.
      */
-    cpuidx = offset / 0x20000;
-    offset %= 0x20000;
-    assert(cpuidx < s->num_cpu);
+    cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE;
+    offset %= GICV3_REDIST_SIZE;
 
     cs = &s->cpu[cpuidx];
 
@@ -453,7 +465,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
     if (r == MEMTX_ERROR) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest read at offset " TARGET_FMT_plx
-                      "size %u\n", __func__, offset, size);
+                      " size %u\n", __func__, offset, size);
         trace_gicv3_redist_badread(gicv3_redist_affid(cs), offset,
                                    size, attrs.secure);
         /* The spec requires that reserved registers are RAZ/WI;
@@ -473,22 +485,24 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data,
 MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
                                unsigned size, MemTxAttrs attrs)
 {
-    GICv3State *s = opaque;
+    GICv3RedistRegion *region = opaque;
+    GICv3State *s = region->gic;
     GICv3CPUState *cs;
     MemTxResult r;
     int cpuidx;
 
     assert((offset & (size - 1)) == 0);
 
-    /* This region covers all the redistributor pages; there are
-     * (for GICv3) two 64K pages per CPU. At the moment they are
-     * all contiguous (ie in this one region), though we might later
-     * want to allow splitting of redistributor pages into several
-     * blocks so we can support more CPUs.
+    /*
+     * There are (for GICv3) two 64K redistributor pages per CPU.
+     * In some cases the redistributor pages for all CPUs are not
+     * contiguous (eg on the virt board they are split into two
+     * parts if there are too many CPUs to all fit in the same place
+     * in the memory map); if so then the GIC has multiple MemoryRegions
+     * for the redistributors.
      */
-    cpuidx = offset / 0x20000;
-    offset %= 0x20000;
-    assert(cpuidx < s->num_cpu);
+    cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE;
+    offset %= GICV3_REDIST_SIZE;
 
     cs = &s->cpu[cpuidx];
 
@@ -510,7 +524,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
     if (r == MEMTX_ERROR) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: invalid guest write at offset " TARGET_FMT_plx
-                      "size %u\n", __func__, offset, size);
+                      " size %u\n", __func__, offset, size);
         trace_gicv3_redist_badwrite(gicv3_redist_affid(cs), offset, data,
                                     size, attrs.secure);
         /* The spec requires that reserved registers are RAZ/WI;
@@ -526,6 +540,149 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
     return r;
 }
 
+static void gicv3_redist_check_lpi_priority(GICv3CPUState *cs, int irq)
+{
+    AddressSpace *as = &cs->gic->dma_as;
+    uint64_t lpict_baddr;
+    uint8_t lpite;
+    uint8_t prio;
+
+    lpict_baddr = cs->gicr_propbaser & R_GICR_PROPBASER_PHYADDR_MASK;
+
+    address_space_read(as, lpict_baddr + ((irq - GICV3_LPI_INTID_START) *
+                       sizeof(lpite)), MEMTXATTRS_UNSPECIFIED, &lpite,
+                       sizeof(lpite));
+
+    if (!(lpite & LPI_CTE_ENABLED)) {
+        return;
+    }
+
+    if (cs->gic->gicd_ctlr & GICD_CTLR_DS) {
+        prio = lpite & LPI_PRIORITY_MASK;
+    } else {
+        prio = ((lpite & LPI_PRIORITY_MASK) >> 1) | 0x80;
+    }
+
+    if ((prio < cs->hpplpi.prio) ||
+        ((prio == cs->hpplpi.prio) && (irq <= cs->hpplpi.irq))) {
+        cs->hpplpi.irq = irq;
+        cs->hpplpi.prio = prio;
+        /* LPIs are always non-secure Grp1 interrupts */
+        cs->hpplpi.grp = GICV3_G1NS;
+    }
+}
+
+void gicv3_redist_update_lpi_only(GICv3CPUState *cs)
+{
+    /*
+     * This function scans the LPI pending table and for each pending
+     * LPI, reads the corresponding entry from LPI configuration table
+     * to extract the priority info and determine if the current LPI
+     * priority is lower than the last computed high priority lpi interrupt.
+     * If yes, replace current LPI as the new high priority lpi interrupt.
+     */
+    AddressSpace *as = &cs->gic->dma_as;
+    uint64_t lpipt_baddr;
+    uint32_t pendt_size = 0;
+    uint8_t pend;
+    int i, bit;
+    uint64_t idbits;
+
+    idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS),
+                 GICD_TYPER_IDBITS);
+
+    if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser ||
+        !cs->gicr_pendbaser) {
+        return;
+    }
+
+    cs->hpplpi.prio = 0xff;
+
+    lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK;
+
+    /* Determine the highest priority pending interrupt among LPIs */
+    pendt_size = (1ULL << (idbits + 1));
+
+    for (i = GICV3_LPI_INTID_START / 8; i < pendt_size / 8; i++) {
+        address_space_read(as, lpipt_baddr + i, MEMTXATTRS_UNSPECIFIED, &pend,
+                           sizeof(pend));
+
+        while (pend) {
+            bit = ctz32(pend);
+            gicv3_redist_check_lpi_priority(cs, i * 8 + bit);
+            pend &= ~(1 << bit);
+        }
+    }
+}
+
+void gicv3_redist_update_lpi(GICv3CPUState *cs)
+{
+    gicv3_redist_update_lpi_only(cs);
+    gicv3_redist_update(cs);
+}
+
+void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level)
+{
+    /*
+     * This function updates the pending bit in lpi pending table for
+     * the irq being activated or deactivated.
+     */
+    AddressSpace *as = &cs->gic->dma_as;
+    uint64_t lpipt_baddr;
+    bool ispend = false;
+    uint8_t pend;
+
+    /*
+     * get the bit value corresponding to this irq in the
+     * lpi pending table
+     */
+    lpipt_baddr = cs->gicr_pendbaser & R_GICR_PENDBASER_PHYADDR_MASK;
+
+    address_space_read(as, lpipt_baddr + ((irq / 8) * sizeof(pend)),
+                       MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend));
+
+    ispend = extract32(pend, irq % 8, 1);
+
+    /* no change in the value of pending bit, return */
+    if (ispend == level) {
+        return;
+    }
+    pend = deposit32(pend, irq % 8, 1, level ? 1 : 0);
+
+    address_space_write(as, lpipt_baddr + ((irq / 8) * sizeof(pend)),
+                        MEMTXATTRS_UNSPECIFIED, &pend, sizeof(pend));
+
+    /*
+     * check if this LPI is better than the current hpplpi, if yes
+     * just set hpplpi.prio and .irq without doing a full rescan
+     */
+    if (level) {
+        gicv3_redist_check_lpi_priority(cs, irq);
+        gicv3_redist_update(cs);
+    } else {
+        if (irq == cs->hpplpi.irq) {
+            gicv3_redist_update_lpi(cs);
+        }
+    }
+}
+
+void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level)
+{
+    uint64_t idbits;
+
+    idbits = MIN(FIELD_EX64(cs->gicr_propbaser, GICR_PROPBASER, IDBITS),
+                 GICD_TYPER_IDBITS);
+
+    if (!(cs->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) || !cs->gicr_propbaser ||
+         !cs->gicr_pendbaser || (irq > (1ULL << (idbits + 1)) - 1) ||
+         irq < GICV3_LPI_INTID_START) {
+        return;
+    }
+
+    /* set/clear the pending bit for this irq */
+    gicv3_redist_lpi_pending(cs, irq, level);
+}
+
 void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level)
 {
     /* Update redistributor state for a change in an external PPI input line */
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 0d8426dafc9..13df002ce4d 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
 #include "qemu/timer.h"
@@ -128,15 +127,14 @@ static bool nvic_isrpending(NVICState *s)
 {
     int irq;
 
-    /* We can shortcut if the highest priority pending interrupt
-     * happens to be external or if there is nothing pending.
+    /*
+     * We can shortcut if the highest priority pending interrupt
+     * happens to be external; if not we need to check the whole
+     * vectors[] array.
      */
     if (s->vectpending > NVIC_FIRST_IRQ) {
         return true;
     }
-    if (s->vectpending == 0) {
-        return false;
-    }
 
     for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) {
         if (s->vectors[irq].pending) {
@@ -806,6 +804,16 @@ void armv7m_nvic_acknowledge_irq(void *opaque)
     nvic_irq_update(s);
 }
 
+static bool vectpending_targets_secure(NVICState *s)
+{
+    /* Return true if s->vectpending targets Secure state */
+    if (s->vectpending_is_s_banked) {
+        return true;
+    }
+    return !exc_is_banked(s->vectpending) &&
+        exc_targets_secure(s, s->vectpending);
+}
+
 void armv7m_nvic_get_pending_irq_info(void *opaque,
                                       int *pirq, bool *ptargets_secure)
 {
@@ -815,12 +823,7 @@ void armv7m_nvic_get_pending_irq_info(void *opaque,
 
     assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
 
-    if (s->vectpending_is_s_banked) {
-        targets_secure = true;
-    } else {
-        targets_secure = !exc_is_banked(pending) &&
-            exc_targets_secure(s, pending);
-    }
+    targets_secure = vectpending_targets_secure(s);
 
     trace_nvic_get_pending_irq_info(pending, targets_secure);
 
@@ -1041,7 +1044,19 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
         /* VECTACTIVE */
         val = cpu->env.v7m.exception;
         /* VECTPENDING */
-        val |= (s->vectpending & 0xff) << 12;
+        if (s->vectpending) {
+            /*
+             * From v8.1M VECTPENDING must read as 1 if accessed as
+             * NonSecure and the highest priority pending and enabled
+             * exception targets Secure.
+             */
+            int vp = s->vectpending;
+            if (!attrs.secure && arm_feature(&cpu->env, ARM_FEATURE_V8_1M) &&
+                vectpending_targets_secure(s)) {
+                vp = 1;
+            }
+            val |= (vp & 0x1ff) << 12;
+        }
         /* ISRPENDING - set if any external IRQ is pending */
         if (nvic_isrpending(s)) {
             val |= (1 << 22);
@@ -2455,172 +2470,6 @@ static const MemoryRegionOps nvic_sysreg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static MemTxResult nvic_sysreg_ns_write(void *opaque, hwaddr addr,
-                                        uint64_t value, unsigned size,
-                                        MemTxAttrs attrs)
-{
-    MemoryRegion *mr = opaque;
-
-    if (attrs.secure) {
-        /* S accesses to the alias act like NS accesses to the real region */
-        attrs.secure = 0;
-        return memory_region_dispatch_write(mr, addr, value,
-                                            size_memop(size) | MO_TE, attrs);
-    } else {
-        /* NS attrs are RAZ/WI for privileged, and BusFault for user */
-        if (attrs.user) {
-            return MEMTX_ERROR;
-        }
-        return MEMTX_OK;
-    }
-}
-
-static MemTxResult nvic_sysreg_ns_read(void *opaque, hwaddr addr,
-                                       uint64_t *data, unsigned size,
-                                       MemTxAttrs attrs)
-{
-    MemoryRegion *mr = opaque;
-
-    if (attrs.secure) {
-        /* S accesses to the alias act like NS accesses to the real region */
-        attrs.secure = 0;
-        return memory_region_dispatch_read(mr, addr, data,
-                                           size_memop(size) | MO_TE, attrs);
-    } else {
-        /* NS attrs are RAZ/WI for privileged, and BusFault for user */
-        if (attrs.user) {
-            return MEMTX_ERROR;
-        }
-        *data = 0;
-        return MEMTX_OK;
-    }
-}
-
-static const MemoryRegionOps nvic_sysreg_ns_ops = {
-    .read_with_attrs = nvic_sysreg_ns_read,
-    .write_with_attrs = nvic_sysreg_ns_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static MemTxResult nvic_systick_write(void *opaque, hwaddr addr,
-                                      uint64_t value, unsigned size,
-                                      MemTxAttrs attrs)
-{
-    NVICState *s = opaque;
-    MemoryRegion *mr;
-
-    /* Direct the access to the correct systick */
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
-    return memory_region_dispatch_write(mr, addr, value,
-                                        size_memop(size) | MO_TE, attrs);
-}
-
-static MemTxResult nvic_systick_read(void *opaque, hwaddr addr,
-                                     uint64_t *data, unsigned size,
-                                     MemTxAttrs attrs)
-{
-    NVICState *s = opaque;
-    MemoryRegion *mr;
-
-    /* Direct the access to the correct systick */
-    mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->systick[attrs.secure]), 0);
-    return memory_region_dispatch_read(mr, addr, data, size_memop(size) | MO_TE,
-                                       attrs);
-}
-
-static const MemoryRegionOps nvic_systick_ops = {
-    .read_with_attrs = nvic_systick_read,
-    .write_with_attrs = nvic_systick_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-
-static MemTxResult ras_read(void *opaque, hwaddr addr,
-                            uint64_t *data, unsigned size,
-                            MemTxAttrs attrs)
-{
-    if (attrs.user) {
-        return MEMTX_ERROR;
-    }
-
-    switch (addr) {
-    case 0xe10: /* ERRIIDR */
-        /* architect field = Arm; product/variant/revision 0 */
-        *data = 0x43b;
-        break;
-    case 0xfc8: /* ERRDEVID */
-        /* Minimal RAS: we implement 0 error record indexes */
-        *data = 0;
-        break;
-    default:
-        qemu_log_mask(LOG_UNIMP, "Read RAS register offset 0x%x\n",
-                      (uint32_t)addr);
-        *data = 0;
-        break;
-    }
-    return MEMTX_OK;
-}
-
-static MemTxResult ras_write(void *opaque, hwaddr addr,
-                             uint64_t value, unsigned size,
-                             MemTxAttrs attrs)
-{
-    if (attrs.user) {
-        return MEMTX_ERROR;
-    }
-
-    switch (addr) {
-    default:
-        qemu_log_mask(LOG_UNIMP, "Write to RAS register offset 0x%x\n",
-                      (uint32_t)addr);
-        break;
-    }
-    return MEMTX_OK;
-}
-
-static const MemoryRegionOps ras_ops = {
-    .read_with_attrs = ras_read,
-    .write_with_attrs = ras_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-/*
- * Unassigned portions of the PPB space are RAZ/WI for privileged
- * accesses, and fault for non-privileged accesses.
- */
-static MemTxResult ppb_default_read(void *opaque, hwaddr addr,
-                                    uint64_t *data, unsigned size,
-                                    MemTxAttrs attrs)
-{
-    qemu_log_mask(LOG_UNIMP, "Read of unassigned area of PPB: offset 0x%x\n",
-                  (uint32_t)addr);
-    if (attrs.user) {
-        return MEMTX_ERROR;
-    }
-    *data = 0;
-    return MEMTX_OK;
-}
-
-static MemTxResult ppb_default_write(void *opaque, hwaddr addr,
-                                     uint64_t value, unsigned size,
-                                     MemTxAttrs attrs)
-{
-    qemu_log_mask(LOG_UNIMP, "Write of unassigned area of PPB: offset 0x%x\n",
-                  (uint32_t)addr);
-    if (attrs.user) {
-        return MEMTX_ERROR;
-    }
-    return MEMTX_OK;
-}
-
-static const MemoryRegionOps ppb_default_ops = {
-    .read_with_attrs = ppb_default_read,
-    .write_with_attrs = ppb_default_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid.min_access_size = 1,
-    .valid.max_access_size = 8,
-};
-
 static int nvic_post_load(void *opaque, int version_id)
 {
     NVICState *s = opaque;
@@ -2836,128 +2685,22 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
 
     s->num_prio_bits = arm_feature(&s->cpu->env, ARM_FEATURE_V7) ? 8 : 2;
 
-    if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), errp)) {
-        return;
-    }
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_NS]), 0,
-                       qdev_get_gpio_in_named(dev, "systick-trigger",
-                                              M_REG_NS));
-
-    if (arm_feature(&s->cpu->env, ARM_FEATURE_M_SECURITY)) {
-        /* We couldn't init the secure systick device in instance_init
-         * as we didn't know then if the CPU had the security extensions;
-         * so we have to do it here.
-         */
-        object_initialize_child(OBJECT(dev), "systick-reg-s",
-                                &s->systick[M_REG_S], TYPE_SYSTICK);
-
-        if (!sysbus_realize(SYS_BUS_DEVICE(&s->systick[M_REG_S]), errp)) {
-            return;
-        }
-        sysbus_connect_irq(SYS_BUS_DEVICE(&s->systick[M_REG_S]), 0,
-                           qdev_get_gpio_in_named(dev, "systick-trigger",
-                                                  M_REG_S));
-    }
-
     /*
-     * This device provides a single sysbus memory region which
-     * represents the whole of the "System PPB" space. This is the
-     * range from 0xe0000000 to 0xe00fffff and includes the NVIC,
-     * the System Control Space (system registers), the systick timer,
-     * and for CPUs with the Security extension an NS banked version
-     * of all of these.
-     *
-     * The default behaviour for unimplemented registers/ranges
-     * (for instance the Data Watchpoint and Trace unit at 0xe0001000)
-     * is to RAZ/WI for privileged access and BusFault for non-privileged
-     * access.
-     *
-     * The NVIC and System Control Space (SCS) starts at 0xe000e000
-     * and looks like this:
-     *  0x004 - ICTR
-     *  0x010 - 0xff - systick
-     *  0x100..0x7ec - NVIC
-     *  0x7f0..0xcff - Reserved
-     *  0xd00..0xd3c - SCS registers
-     *  0xd40..0xeff - Reserved or Not implemented
-     *  0xf00 - STIR
-     *
-     * Some registers within this space are banked between security states.
-     * In v8M there is a second range 0xe002e000..0xe002efff which is the
-     * NonSecure alias SCS; secure accesses to this behave like NS accesses
-     * to the main SCS range, and non-secure accesses (including when
-     * the security extension is not implemented) are RAZ/WI.
-     * Note that both the main SCS range and the alias range are defined
-     * to be exempt from memory attribution (R_BLJT) and so the memory
-     * transaction attribute always matches the current CPU security
-     * state (attrs.secure == env->v7m.secure). In the nvic_sysreg_ns_ops
-     * wrappers we change attrs.secure to indicate the NS access; so
-     * generally code determining which banked register to use should
-     * use attrs.secure; code determining actual behaviour of the system
-     * should use env->v7m.secure.
-     *
-     * The container covers the whole PPB space. Within it the priority
-     * of overlapping regions is:
-     *  - default region (for RAZ/WI and BusFault) : -1
-     *  - system register regions : 0
-     *  - systick : 1
-     * This is because the systick device is a small block of registers
-     * in the middle of the other system control registers.
+     * This device provides a single memory region which covers the
+     * sysreg/NVIC registers from 0xE000E000 .. 0xE000EFFF, with the
+     * exception of the systick timer registers 0xE000E010 .. 0xE000E0FF.
      */
-    memory_region_init(&s->container, OBJECT(s), "nvic", 0x100000);
-    memory_region_init_io(&s->defaultmem, OBJECT(s), &ppb_default_ops, s,
-                          "nvic-default", 0x100000);
-    memory_region_add_subregion_overlap(&s->container, 0, &s->defaultmem, -1);
     memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
                           "nvic_sysregs", 0x1000);
-    memory_region_add_subregion(&s->container, 0xe000, &s->sysregmem);
-
-    memory_region_init_io(&s->systickmem, OBJECT(s),
-                          &nvic_systick_ops, s,
-                          "nvic_systick", 0xe0);
-
-    memory_region_add_subregion_overlap(&s->container, 0xe010,
-                                        &s->systickmem, 1);
-
-    if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) {
-        memory_region_init_io(&s->sysreg_ns_mem, OBJECT(s),
-                              &nvic_sysreg_ns_ops, &s->sysregmem,
-                              "nvic_sysregs_ns", 0x1000);
-        memory_region_add_subregion(&s->container, 0x2e000, &s->sysreg_ns_mem);
-        memory_region_init_io(&s->systick_ns_mem, OBJECT(s),
-                              &nvic_sysreg_ns_ops, &s->systickmem,
-                              "nvic_systick_ns", 0xe0);
-        memory_region_add_subregion_overlap(&s->container, 0x2e010,
-                                            &s->systick_ns_mem, 1);
-    }
-
-    if (cpu_isar_feature(aa32_ras, s->cpu)) {
-        memory_region_init_io(&s->ras_mem, OBJECT(s),
-                              &ras_ops, s, "nvic_ras", 0x1000);
-        memory_region_add_subregion(&s->container, 0x5000, &s->ras_mem);
-    }
-
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->sysregmem);
 }
 
 static void armv7m_nvic_instance_init(Object *obj)
 {
-    /* We have a different default value for the num-irq property
-     * than our superclass. This function runs after qdev init
-     * has set the defaults from the Property array and before
-     * any user-specified property setting, so just modify the
-     * value in the GICState struct.
-     */
     DeviceState *dev = DEVICE(obj);
     NVICState *nvic = NVIC(obj);
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-    object_initialize_child(obj, "systick-reg-ns", &nvic->systick[M_REG_NS],
-                            TYPE_SYSTICK);
-    /* We can't initialize the secure systick here, as we don't know
-     * yet if we need it.
-     */
-
     sysbus_init_irq(sbd, &nvic->excpout);
     qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
     qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger",
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index 05303a55c88..b9c37453b04 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -24,6 +24,7 @@
 #ifndef QEMU_ARM_GICV3_INTERNAL_H
 #define QEMU_ARM_GICV3_INTERNAL_H
 
+#include "hw/registerfields.h"
 #include "hw/intc/arm_gicv3_common.h"
 
 /* Distributor registers, as offsets from the distributor base address */
@@ -67,6 +68,11 @@
 #define GICD_CTLR_E1NWF             (1U << 7)
 #define GICD_CTLR_RWP               (1U << 31)
 
+#define GICD_TYPER_LPIS_SHIFT          17
+
+/* 16 bits EventId */
+#define GICD_TYPER_IDBITS            0xf
+
 /*
  * Redistributor frame offsets from RD_base
  */
@@ -122,17 +128,19 @@
 #define GICR_WAKER_ProcessorSleep    (1U << 1)
 #define GICR_WAKER_ChildrenAsleep    (1U << 2)
 
-#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK (7ULL << 56)
-#define GICR_PROPBASER_ADDR_MASK               (0xfffffffffULL << 12)
-#define GICR_PROPBASER_SHAREABILITY_MASK       (3U << 10)
-#define GICR_PROPBASER_CACHEABILITY_MASK       (7U << 7)
-#define GICR_PROPBASER_IDBITS_MASK             (0x1f)
+FIELD(GICR_PROPBASER, IDBITS, 0, 5)
+FIELD(GICR_PROPBASER, INNERCACHE, 7, 3)
+FIELD(GICR_PROPBASER, SHAREABILITY, 10, 2)
+FIELD(GICR_PROPBASER, PHYADDR, 12, 40)
+FIELD(GICR_PROPBASER, OUTERCACHE, 56, 3)
 
-#define GICR_PENDBASER_PTZ                     (1ULL << 62)
-#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK (7ULL << 56)
-#define GICR_PENDBASER_ADDR_MASK               (0xffffffffULL << 16)
-#define GICR_PENDBASER_SHAREABILITY_MASK       (3U << 10)
-#define GICR_PENDBASER_CACHEABILITY_MASK       (7U << 7)
+FIELD(GICR_PENDBASER, INNERCACHE, 7, 3)
+FIELD(GICR_PENDBASER, SHAREABILITY, 10, 2)
+FIELD(GICR_PENDBASER, PHYADDR, 16, 36)
+FIELD(GICR_PENDBASER, OUTERCACHE, 56, 3)
+FIELD(GICR_PENDBASER, PTZ, 62, 1)
+
+#define GICR_PROPBASER_IDBITS_THRESHOLD          0xd
 
 #define ICC_CTLR_EL1_CBPR           (1U << 0)
 #define ICC_CTLR_EL1_EOIMODE        (1U << 1)
@@ -239,6 +247,163 @@
 #define ICH_VTR_EL2_PREBITS_SHIFT 26
 #define ICH_VTR_EL2_PRIBITS_SHIFT 29
 
+/* ITS Registers */
+
+FIELD(GITS_BASER, SIZE, 0, 8)
+FIELD(GITS_BASER, PAGESIZE, 8, 2)
+FIELD(GITS_BASER, SHAREABILITY, 10, 2)
+FIELD(GITS_BASER, PHYADDR, 12, 36)
+FIELD(GITS_BASER, PHYADDRL_64K, 16, 32)
+FIELD(GITS_BASER, PHYADDRH_64K, 12, 4)
+FIELD(GITS_BASER, ENTRYSIZE, 48, 5)
+FIELD(GITS_BASER, OUTERCACHE, 53, 3)
+FIELD(GITS_BASER, TYPE, 56, 3)
+FIELD(GITS_BASER, INNERCACHE, 59, 3)
+FIELD(GITS_BASER, INDIRECT, 62, 1)
+FIELD(GITS_BASER, VALID, 63, 1)
+
+FIELD(GITS_CBASER, SIZE, 0, 8)
+FIELD(GITS_CBASER, SHAREABILITY, 10, 2)
+FIELD(GITS_CBASER, PHYADDR, 12, 40)
+FIELD(GITS_CBASER, OUTERCACHE, 53, 3)
+FIELD(GITS_CBASER, INNERCACHE, 59, 3)
+FIELD(GITS_CBASER, VALID, 63, 1)
+
+FIELD(GITS_CREADR, STALLED, 0, 1)
+FIELD(GITS_CREADR, OFFSET, 5, 15)
+
+FIELD(GITS_CWRITER, RETRY, 0, 1)
+FIELD(GITS_CWRITER, OFFSET, 5, 15)
+
+FIELD(GITS_CTLR, ENABLED, 0, 1)
+FIELD(GITS_CTLR, QUIESCENT, 31, 1)
+
+FIELD(GITS_TYPER, PHYSICAL, 0, 1)
+FIELD(GITS_TYPER, ITT_ENTRY_SIZE, 4, 4)
+FIELD(GITS_TYPER, IDBITS, 8, 5)
+FIELD(GITS_TYPER, DEVBITS, 13, 5)
+FIELD(GITS_TYPER, SEIS, 18, 1)
+FIELD(GITS_TYPER, PTA, 19, 1)
+FIELD(GITS_TYPER, CIDBITS, 32, 4)
+FIELD(GITS_TYPER, CIL, 36, 1)
+
+#define GITS_IDREGS           0xFFD0
+
+#define ITS_CTLR_ENABLED               (1U)  /* ITS Enabled */
+
+#define GITS_BASER_RO_MASK                  (R_GITS_BASER_ENTRYSIZE_MASK | \
+                                              R_GITS_BASER_TYPE_MASK)
+
+#define GITS_BASER_PAGESIZE_4K                0
+#define GITS_BASER_PAGESIZE_16K               1
+#define GITS_BASER_PAGESIZE_64K               2
+
+#define GITS_BASER_TYPE_DEVICE               1ULL
+#define GITS_BASER_TYPE_COLLECTION           4ULL
+
+#define GITS_PAGE_SIZE_4K       0x1000
+#define GITS_PAGE_SIZE_16K      0x4000
+#define GITS_PAGE_SIZE_64K      0x10000
+
+#define L1TABLE_ENTRY_SIZE         8
+
+#define LPI_CTE_ENABLED          TABLE_ENTRY_VALID_MASK
+#define LPI_PRIORITY_MASK         0xfc
+
+#define GITS_CMDQ_ENTRY_SIZE               32
+#define NUM_BYTES_IN_DW                     8
+
+#define CMD_MASK                  0xff
+
+/* ITS Commands */
+#define GITS_CMD_CLEAR            0x04
+#define GITS_CMD_DISCARD          0x0F
+#define GITS_CMD_INT              0x03
+#define GITS_CMD_MAPC             0x09
+#define GITS_CMD_MAPD             0x08
+#define GITS_CMD_MAPI             0x0B
+#define GITS_CMD_MAPTI            0x0A
+#define GITS_CMD_INV              0x0C
+#define GITS_CMD_INVALL           0x0D
+#define GITS_CMD_SYNC             0x05
+
+/* MAPC command fields */
+#define ICID_LENGTH                  16
+#define ICID_MASK                 ((1U << ICID_LENGTH) - 1)
+FIELD(MAPC, RDBASE, 16, 32)
+
+#define RDBASE_PROCNUM_LENGTH        16
+#define RDBASE_PROCNUM_MASK       ((1ULL << RDBASE_PROCNUM_LENGTH) - 1)
+
+/* MAPD command fields */
+#define ITTADDR_LENGTH               44
+#define ITTADDR_SHIFT                 8
+#define ITTADDR_MASK             MAKE_64BIT_MASK(ITTADDR_SHIFT, ITTADDR_LENGTH)
+#define SIZE_MASK                 0x1f
+
+/* MAPI command fields */
+#define EVENTID_MASK              ((1ULL << 32) - 1)
+
+/* MAPTI command fields */
+#define pINTID_SHIFT                 32
+#define pINTID_MASK               MAKE_64BIT_MASK(32, 32)
+
+#define DEVID_SHIFT                  32
+#define DEVID_MASK                MAKE_64BIT_MASK(32, 32)
+
+#define VALID_SHIFT               63
+#define CMD_FIELD_VALID_MASK      (1ULL << VALID_SHIFT)
+#define L2_TABLE_VALID_MASK       CMD_FIELD_VALID_MASK
+#define TABLE_ENTRY_VALID_MASK    (1ULL << 0)
+
+/**
+ * Default features advertised by this version of ITS
+ */
+/* Physical LPIs supported */
+#define GITS_TYPE_PHYSICAL           (1U << 0)
+
+/*
+ * 12 bytes Interrupt translation Table Entry size
+ * as per Table 5.3 in GICv3 spec
+ * ITE Lower 8 Bytes
+ *   Bits:    | 49 ... 26 | 25 ... 2 |   1     |   0    |
+ *   Values:  |    1023   |  IntNum  | IntType |  Valid |
+ * ITE Higher 4 Bytes
+ *   Bits:    | 31 ... 16 | 15 ...0 |
+ *   Values:  |  vPEID    |  ICID   |
+ */
+#define ITS_ITT_ENTRY_SIZE            0xC
+#define ITE_ENTRY_INTTYPE_SHIFT        1
+#define ITE_ENTRY_INTID_SHIFT          2
+#define ITE_ENTRY_INTID_MASK         MAKE_64BIT_MASK(2, 24)
+#define ITE_ENTRY_INTSP_SHIFT          26
+#define ITE_ENTRY_ICID_MASK          MAKE_64BIT_MASK(0, 16)
+
+/* 16 bits EventId */
+#define ITS_IDBITS                   GICD_TYPER_IDBITS
+
+/* 16 bits DeviceId */
+#define ITS_DEVBITS                   0xF
+
+/* 16 bits CollectionId */
+#define ITS_CIDBITS                  0xF
+
+/*
+ * 8 bytes Device Table Entry size
+ * Valid = 1 bit,ITTAddr = 44 bits,Size = 5 bits
+ */
+#define GITS_DTE_SIZE                 (0x8ULL)
+#define GITS_DTE_ITTADDR_SHIFT           6
+#define GITS_DTE_ITTADDR_MASK         MAKE_64BIT_MASK(GITS_DTE_ITTADDR_SHIFT, \
+                                                      ITTADDR_LENGTH)
+
+/*
+ * 8 bytes Collection Table Entry size
+ * Valid = 1 bit,RDBase = 36 bits(considering max RDBASE)
+ */
+#define GITS_CTE_SIZE                 (0x8ULL)
+#define GITS_CTE_RDBASE_PROCNUM_MASK  MAKE_64BIT_MASK(1, RDBASE_PROCNUM_LENGTH)
+
 /* Special interrupt IDs */
 #define INTID_SECURE 1020
 #define INTID_NONSECURE 1021
@@ -246,6 +411,19 @@
 
 /* Functions internal to the emulated GICv3 */
 
+/**
+ * gicv3_intid_is_special:
+ * @intid: interrupt ID
+ *
+ * Return true if @intid is a special interrupt ID (1020 to
+ * 1023 inclusive). This corresponds to the GIC spec pseudocode
+ * IsSpecial() function.
+ */
+static inline bool gicv3_intid_is_special(int intid)
+{
+    return intid >= INTID_SECURE && intid <= INTID_SPURIOUS;
+}
+
 /**
  * gicv3_redist_update:
  * @cs: GICv3CPUState for this redistributor
@@ -296,6 +474,26 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data,
                                unsigned size, MemTxAttrs attrs);
 void gicv3_dist_set_irq(GICv3State *s, int irq, int level);
 void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level);
+void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level);
+void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level);
+/**
+ * gicv3_redist_update_lpi:
+ * @cs: GICv3CPUState
+ *
+ * Scan the LPI pending table and recalculate the highest priority
+ * pending LPI and also the overall highest priority pending interrupt.
+ */
+void gicv3_redist_update_lpi(GICv3CPUState *cs);
+/**
+ * gicv3_redist_update_lpi_only:
+ * @cs: GICv3CPUState
+ *
+ * Scan the LPI pending table and recalculate cs->hpplpi only,
+ * without calling gicv3_redist_update() to recalculate the overall
+ * highest priority pending interrupt. This should be called after
+ * an incoming migration has loaded new state.
+ */
+void gicv3_redist_update_lpi_only(GICv3CPUState *cs);
 void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns);
 void gicv3_init_cpuif(GICv3State *s);
 
diff --git a/hw/intc/goldfish_pic.c b/hw/intc/goldfish_pic.c
index e3b43a69f16..dfd53275f69 100644
--- a/hw/intc/goldfish_pic.c
+++ b/hw/intc/goldfish_pic.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Goldfish PIC
  *
diff --git a/hw/intc/grlib_irqmp.c b/hw/intc/grlib_irqmp.c
index 984334fa7bf..3bfe2544b7c 100644
--- a/hw/intc/grlib_irqmp.c
+++ b/hw/intc/grlib_irqmp.c
@@ -27,7 +27,6 @@
 #include "qemu/osdep.h"
 #include "hw/irq.h"
 #include "hw/sysbus.h"
-#include "cpu.h"
 
 #include "hw/qdev-properties.h"
 #include "hw/sparc/grlib.h"
diff --git a/hw/intc/ibex_plic.c b/hw/intc/ibex_plic.c
deleted file mode 100644
index c1b72fcab0b..00000000000
--- a/hw/intc/ibex_plic.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * QEMU RISC-V lowRISC Ibex PLIC
- *
- * Copyright (c) 2020 Western Digital
- *
- * Documentation avaliable: https://docs.opentitan.org/hw/ip/rv_plic/doc/
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "hw/qdev-properties.h"
-#include "hw/core/cpu.h"
-#include "hw/boards.h"
-#include "hw/pci/msi.h"
-#include "target/riscv/cpu_bits.h"
-#include "target/riscv/cpu.h"
-#include "hw/intc/ibex_plic.h"
-
-static bool addr_between(uint32_t addr, uint32_t base, uint32_t num)
-{
-    uint32_t end = base + (num * 0x04);
-
-    if (addr >= base && addr < end) {
-        return true;
-    }
-
-    return false;
-}
-
-static void ibex_plic_irqs_set_pending(IbexPlicState *s, int irq, bool level)
-{
-    int pending_num = irq / 32;
-
-    if (!level) {
-        /*
-         * If the level is low make sure we clear the hidden_pending.
-         */
-        s->hidden_pending[pending_num] &= ~(1 << (irq % 32));
-    }
-
-    if (s->claimed[pending_num] & 1 << (irq % 32)) {
-        /*
-         * The interrupt has been claimed, but not completed.
-         * The pending bit can't be set.
-         * Save the pending level for after the interrupt is completed.
-         */
-        s->hidden_pending[pending_num] |= level << (irq % 32);
-    } else {
-        s->pending[pending_num] |= level << (irq % 32);
-    }
-}
-
-static bool ibex_plic_irqs_pending(IbexPlicState *s, uint32_t context)
-{
-    int i;
-    uint32_t max_irq = 0;
-    uint32_t max_prio = s->threshold;
-
-    for (i = 0; i < s->pending_num; i++) {
-        uint32_t irq_num = ctz64(s->pending[i]) + (i * 32);
-
-        if (!(s->pending[i] & s->enable[i])) {
-            /* No pending and enabled IRQ */
-            continue;
-        }
-
-        if (s->priority[irq_num] > max_prio) {
-            max_irq = irq_num;
-            max_prio = s->priority[irq_num];
-        }
-    }
-
-    if (max_irq) {
-        s->claim = max_irq;
-        return true;
-    }
-
-    return false;
-}
-
-static void ibex_plic_update(IbexPlicState *s)
-{
-    CPUState *cpu;
-    int level, i;
-
-    for (i = 0; i < s->num_cpus; i++) {
-        cpu = qemu_get_cpu(i);
-
-        if (!cpu) {
-            continue;
-        }
-
-        level = ibex_plic_irqs_pending(s, 0);
-
-        riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MEIP, BOOL_TO_MASK(level));
-    }
-}
-
-static void ibex_plic_reset(DeviceState *dev)
-{
-    IbexPlicState *s = IBEX_PLIC(dev);
-
-    s->threshold = 0x00000000;
-    s->claim = 0x00000000;
-}
-
-static uint64_t ibex_plic_read(void *opaque, hwaddr addr,
-                               unsigned int size)
-{
-    IbexPlicState *s = opaque;
-    int offset;
-    uint32_t ret = 0;
-
-    if (addr_between(addr, s->pending_base, s->pending_num)) {
-        offset = (addr - s->pending_base) / 4;
-        ret = s->pending[offset];
-    } else if (addr_between(addr, s->source_base, s->source_num)) {
-        qemu_log_mask(LOG_UNIMP,
-                      "%s: Interrupt source mode not supported\n", __func__);
-    } else if (addr_between(addr, s->priority_base, s->priority_num)) {
-        offset = (addr - s->priority_base) / 4;
-        ret = s->priority[offset];
-    } else if (addr_between(addr, s->enable_base, s->enable_num)) {
-        offset = (addr - s->enable_base) / 4;
-        ret = s->enable[offset];
-    } else if (addr_between(addr, s->threshold_base, 1)) {
-        ret = s->threshold;
-    } else if (addr_between(addr, s->claim_base, 1)) {
-        int pending_num = s->claim / 32;
-        s->pending[pending_num] &= ~(1 << (s->claim % 32));
-
-        /* Set the interrupt as claimed, but not completed */
-        s->claimed[pending_num] |= 1 << (s->claim % 32);
-
-        /* Return the current claimed interrupt */
-        ret = s->claim;
-
-        /* Clear the claimed interrupt */
-        s->claim = 0x00000000;
-
-        /* Update the interrupt status after the claim */
-        ibex_plic_update(s);
-    }
-
-    return ret;
-}
-
-static void ibex_plic_write(void *opaque, hwaddr addr,
-                            uint64_t value, unsigned int size)
-{
-    IbexPlicState *s = opaque;
-
-    if (addr_between(addr, s->pending_base, s->pending_num)) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Pending registers are read only\n", __func__);
-    } else if (addr_between(addr, s->source_base, s->source_num)) {
-        qemu_log_mask(LOG_UNIMP,
-                      "%s: Interrupt source mode not supported\n", __func__);
-    } else if (addr_between(addr, s->priority_base, s->priority_num)) {
-        uint32_t irq = ((addr - s->priority_base) >> 2) + 1;
-        s->priority[irq] = value & 7;
-        ibex_plic_update(s);
-    } else if (addr_between(addr, s->enable_base, s->enable_num)) {
-        uint32_t enable_reg = (addr - s->enable_base) / 4;
-
-        s->enable[enable_reg] = value;
-    } else if (addr_between(addr, s->threshold_base, 1)) {
-        s->threshold = value & 3;
-    } else if (addr_between(addr, s->claim_base, 1)) {
-        if (s->claim == value) {
-            /* Interrupt was completed */
-            s->claim = 0;
-        }
-        if (s->claimed[value / 32] & 1 << (value % 32)) {
-            int pending_num = value / 32;
-
-            /* This value was already claimed, clear it. */
-            s->claimed[pending_num] &= ~(1 << (value % 32));
-
-            if (s->hidden_pending[pending_num] & (1 << (value % 32))) {
-                /*
-                 * If the bit in hidden_pending is set then that means we
-                 * received an interrupt between claiming and completing
-                 * the interrupt that hasn't since been de-asserted.
-                 * On hardware this would trigger an interrupt, so let's
-                 * trigger one here as well.
-                 */
-                s->pending[pending_num] |= 1 << (value % 32);
-            }
-        }
-    }
-
-    ibex_plic_update(s);
-}
-
-static const MemoryRegionOps ibex_plic_ops = {
-    .read = ibex_plic_read,
-    .write = ibex_plic_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4
-    }
-};
-
-static void ibex_plic_irq_request(void *opaque, int irq, int level)
-{
-    IbexPlicState *s = opaque;
-
-    ibex_plic_irqs_set_pending(s, irq, level > 0);
-    ibex_plic_update(s);
-}
-
-static Property ibex_plic_properties[] = {
-    DEFINE_PROP_UINT32("num-cpus", IbexPlicState, num_cpus, 1),
-    DEFINE_PROP_UINT32("num-sources", IbexPlicState, num_sources, 80),
-
-    DEFINE_PROP_UINT32("pending-base", IbexPlicState, pending_base, 0),
-    DEFINE_PROP_UINT32("pending-num", IbexPlicState, pending_num, 3),
-
-    DEFINE_PROP_UINT32("source-base", IbexPlicState, source_base, 0x0c),
-    DEFINE_PROP_UINT32("source-num", IbexPlicState, source_num, 3),
-
-    DEFINE_PROP_UINT32("priority-base", IbexPlicState, priority_base, 0x18),
-    DEFINE_PROP_UINT32("priority-num", IbexPlicState, priority_num, 80),
-
-    DEFINE_PROP_UINT32("enable-base", IbexPlicState, enable_base, 0x200),
-    DEFINE_PROP_UINT32("enable-num", IbexPlicState, enable_num, 3),
-
-    DEFINE_PROP_UINT32("threshold-base", IbexPlicState, threshold_base, 0x20c),
-
-    DEFINE_PROP_UINT32("claim-base", IbexPlicState, claim_base, 0x210),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void ibex_plic_init(Object *obj)
-{
-    IbexPlicState *s = IBEX_PLIC(obj);
-
-    memory_region_init_io(&s->mmio, obj, &ibex_plic_ops, s,
-                          TYPE_IBEX_PLIC, 0x400);
-    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
-}
-
-static void ibex_plic_realize(DeviceState *dev, Error **errp)
-{
-    IbexPlicState *s = IBEX_PLIC(dev);
-    int i;
-
-    s->pending = g_new0(uint32_t, s->pending_num);
-    s->hidden_pending = g_new0(uint32_t, s->pending_num);
-    s->claimed = g_new0(uint32_t, s->pending_num);
-    s->source = g_new0(uint32_t, s->source_num);
-    s->priority = g_new0(uint32_t, s->priority_num);
-    s->enable = g_new0(uint32_t, s->enable_num);
-
-    qdev_init_gpio_in(dev, ibex_plic_irq_request, s->num_sources);
-
-    /*
-     * We can't allow the supervisor to control SEIP as this would allow the
-     * supervisor to clear a pending external interrupt which will result in
-     * a lost interrupt in the case a PLIC is attached. The SEIP bit must be
-     * hardware controlled when a PLIC is attached.
-     */
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int smp_cpus = ms->smp.cpus;
-    for (i = 0; i < smp_cpus; i++) {
-        RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(i));
-        if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) {
-            error_report("SEIP already claimed");
-            exit(1);
-        }
-    }
-
-    msi_nonbroken = true;
-}
-
-static void ibex_plic_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->reset = ibex_plic_reset;
-    device_class_set_props(dc, ibex_plic_properties);
-    dc->realize = ibex_plic_realize;
-}
-
-static const TypeInfo ibex_plic_info = {
-    .name          = TYPE_IBEX_PLIC,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(IbexPlicState),
-    .instance_init = ibex_plic_init,
-    .class_init    = ibex_plic_class_init,
-};
-
-static void ibex_plic_register_types(void)
-{
-    type_register_static(&ibex_plic_info);
-}
-
-type_init(ibex_plic_register_types)
diff --git a/hw/intc/imx_gpcv2.c b/hw/intc/imx_gpcv2.c
index 17007a40780..237d5f97eba 100644
--- a/hw/intc/imx_gpcv2.c
+++ b/hw/intc/imx_gpcv2.c
@@ -12,7 +12,6 @@
 #include "qemu/osdep.h"
 #include "hw/intc/imx_gpcv2.h"
 #include "migration/vmstate.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 
 #define GPC_PU_PGC_SW_PUP_REQ       0x0f8
diff --git a/hw/intc/lm32_pic.c b/hw/intc/lm32_pic.c
deleted file mode 100644
index 991a90bc99e..00000000000
--- a/hw/intc/lm32_pic.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- *  LatticeMico32 CPU interrupt controller logic.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "migration/vmstate.h"
-#include "monitor/monitor.h"
-#include "qemu/module.h"
-#include "hw/sysbus.h"
-#include "trace.h"
-#include "hw/lm32/lm32_pic.h"
-#include "hw/intc/intc.h"
-#include "hw/irq.h"
-#include "qom/object.h"
-
-#define TYPE_LM32_PIC "lm32-pic"
-OBJECT_DECLARE_SIMPLE_TYPE(LM32PicState, LM32_PIC)
-
-struct LM32PicState {
-    SysBusDevice parent_obj;
-
-    qemu_irq parent_irq;
-    uint32_t im;        /* interrupt mask */
-    uint32_t ip;        /* interrupt pending */
-    uint32_t irq_state;
-
-    /* statistics */
-    uint64_t stats_irq_count[32];
-};
-
-static void update_irq(LM32PicState *s)
-{
-    s->ip |= s->irq_state;
-
-    if (s->ip & s->im) {
-        trace_lm32_pic_raise_irq();
-        qemu_irq_raise(s->parent_irq);
-    } else {
-        trace_lm32_pic_lower_irq();
-        qemu_irq_lower(s->parent_irq);
-    }
-}
-
-static void irq_handler(void *opaque, int irq, int level)
-{
-    LM32PicState *s = opaque;
-
-    assert(irq < 32);
-    trace_lm32_pic_interrupt(irq, level);
-
-    if (level) {
-        s->irq_state |= (1 << irq);
-        s->stats_irq_count[irq]++;
-    } else {
-        s->irq_state &= ~(1 << irq);
-    }
-
-    update_irq(s);
-}
-
-void lm32_pic_set_im(DeviceState *d, uint32_t im)
-{
-    LM32PicState *s = LM32_PIC(d);
-
-    trace_lm32_pic_set_im(im);
-    s->im = im;
-
-    update_irq(s);
-}
-
-void lm32_pic_set_ip(DeviceState *d, uint32_t ip)
-{
-    LM32PicState *s = LM32_PIC(d);
-
-    trace_lm32_pic_set_ip(ip);
-
-    /* ack interrupt */
-    s->ip &= ~ip;
-
-    update_irq(s);
-}
-
-uint32_t lm32_pic_get_im(DeviceState *d)
-{
-    LM32PicState *s = LM32_PIC(d);
-
-    trace_lm32_pic_get_im(s->im);
-    return s->im;
-}
-
-uint32_t lm32_pic_get_ip(DeviceState *d)
-{
-    LM32PicState *s = LM32_PIC(d);
-
-    trace_lm32_pic_get_ip(s->ip);
-    return s->ip;
-}
-
-static void pic_reset(DeviceState *d)
-{
-    LM32PicState *s = LM32_PIC(d);
-    int i;
-
-    s->im = 0;
-    s->ip = 0;
-    s->irq_state = 0;
-    for (i = 0; i < 32; i++) {
-        s->stats_irq_count[i] = 0;
-    }
-}
-
-static bool lm32_get_statistics(InterruptStatsProvider *obj,
-                                uint64_t **irq_counts, unsigned int *nb_irqs)
-{
-    LM32PicState *s = LM32_PIC(obj);
-    *irq_counts = s->stats_irq_count;
-    *nb_irqs = ARRAY_SIZE(s->stats_irq_count);
-    return true;
-}
-
-static void lm32_print_info(InterruptStatsProvider *obj, Monitor *mon)
-{
-    LM32PicState *s = LM32_PIC(obj);
-    monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n",
-            s->im, s->ip, s->irq_state);
-}
-
-static void lm32_pic_init(Object *obj)
-{
-    DeviceState *dev = DEVICE(obj);
-    LM32PicState *s = LM32_PIC(obj);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-
-    qdev_init_gpio_in(dev, irq_handler, 32);
-    sysbus_init_irq(sbd, &s->parent_irq);
-}
-
-static const VMStateDescription vmstate_lm32_pic = {
-    .name = "lm32-pic",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(im, LM32PicState),
-        VMSTATE_UINT32(ip, LM32PicState),
-        VMSTATE_UINT32(irq_state, LM32PicState),
-        VMSTATE_UINT64_ARRAY(stats_irq_count, LM32PicState, 32),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void lm32_pic_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass);
-
-    dc->reset = pic_reset;
-    dc->vmsd = &vmstate_lm32_pic;
-    ic->get_statistics = lm32_get_statistics;
-    ic->print_info = lm32_print_info;
-}
-
-static const TypeInfo lm32_pic_info = {
-    .name          = TYPE_LM32_PIC,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(LM32PicState),
-    .instance_init = lm32_pic_init,
-    .class_init    = lm32_pic_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_INTERRUPT_STATS_PROVIDER },
-        { }
-    },
-};
-
-static void lm32_pic_register_types(void)
-{
-    type_register_static(&lm32_pic_info);
-}
-
-type_init(lm32_pic_register_types)
diff --git a/hw/intc/m68k_irqc.c b/hw/intc/m68k_irqc.c
index 2133d2a698a..0c515e4ecb7 100644
--- a/hw/intc/m68k_irqc.c
+++ b/hw/intc/m68k_irqc.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * QEMU Motorola 680x0 IRQ Controller
  *
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 1c299039f65..c89d2ca180e 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -8,16 +8,15 @@ softmmu_ss.add(when: 'CONFIG_ARM_GIC', if_true: files(
   'arm_gicv3_dist.c',
   'arm_gicv3_its_common.c',
   'arm_gicv3_redist.c',
+  'arm_gicv3_its.c',
 ))
 softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_pic.c'))
 softmmu_ss.add(when: 'CONFIG_HEATHROW_PIC', if_true: files('heathrow_pic.c'))
 softmmu_ss.add(when: 'CONFIG_I8259', if_true: files('i8259_common.c', 'i8259.c'))
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_avic.c', 'imx_gpcv2.c'))
 softmmu_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic_common.c'))
-softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_pic.c'))
 softmmu_ss.add(when: 'CONFIG_OPENPIC', if_true: files('openpic.c'))
 softmmu_ss.add(when: 'CONFIG_PL190', if_true: files('pl190.c'))
-softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_intc.c'))
 softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_gic.c'))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_intctl.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_intc.c'))
@@ -33,7 +32,6 @@ specific_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_nvic.c'))
 specific_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_vic.c'))
 specific_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_gic.c', 'exynos4210_combiner.c'))
 specific_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_irqmp.c'))
-specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_plic.c'))
 specific_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c'))
 specific_ss.add(when: 'CONFIG_LOONGSON_LIOINTC', if_true: files('loongson_liointc.c'))
 specific_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_gic.c'))
@@ -48,7 +46,7 @@ specific_ss.add(when: 'CONFIG_RX_ICU', if_true: files('rx_icu.c'))
 specific_ss.add(when: 'CONFIG_S390_FLIC', if_true: files('s390_flic.c'))
 specific_ss.add(when: 'CONFIG_S390_FLIC_KVM', if_true: files('s390_flic_kvm.c'))
 specific_ss.add(when: 'CONFIG_SH_INTC', if_true: files('sh_intc.c'))
-specific_ss.add(when: 'CONFIG_SIFIVE_CLINT', if_true: files('sifive_clint.c'))
+specific_ss.add(when: 'CONFIG_RISCV_ACLINT', if_true: files('riscv_aclint.c'))
 specific_ss.add(when: 'CONFIG_SIFIVE_PLIC', if_true: files('sifive_plic.c'))
 specific_ss.add(when: 'CONFIG_XICS', if_true: files('xics.c'))
 specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XICS'],
diff --git a/hw/intc/ompic.c b/hw/intc/ompic.c
index 1731a106838..1f10314807d 100644
--- a/hw/intc/ompic.c
+++ b/hw/intc/ompic.c
@@ -7,7 +7,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "hw/irq.h"
diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 65970e1b378..49504e740f3 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -25,12 +25,8 @@
 /*
  *
  * Based on OpenPic implementations:
- * - Intel GW80314 I/O companion chip developer's manual
  * - Motorola MPC8245 & MPC8540 user manuals.
- * - Motorola MCP750 (aka Raven) programmer manual.
- * - Motorola Harrier programmer manuel
- *
- * Serial interrupts, as implemented in Raven chipset are not supported yet.
+ * - Motorola Harrier programmer manual
  *
  */
 
@@ -47,12 +43,11 @@
 #include "qapi/error.h"
 #include "qemu/bitops.h"
 #include "qapi/qmp/qerror.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 
-//#define DEBUG_OPENPIC
+/* #define DEBUG_OPENPIC */
 
 #ifdef DEBUG_OPENPIC
 static const int debug_openpic = 1;
@@ -123,7 +118,8 @@ static FslMpicInfo fsl_mpic_42 = {
 #define ILR_INTTGT_CINT   0x01 /* critical */
 #define ILR_INTTGT_MCP    0x02 /* machine check */
 
-/* The currently supported INTTGT values happen to be the same as QEMU's
+/*
+ * The currently supported INTTGT values happen to be the same as QEMU's
  * openpic output codes, but don't depend on this.  The output codes
  * could change (unlikely, but...) or support could be added for
  * more INTTGT values.
@@ -182,10 +178,11 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx);
 static void openpic_reset(DeviceState *d);
 
-/* Convert between openpic clock ticks and nanosecs.  In the hardware the clock
-   frequency is driven by board inputs to the PIC which the PIC would then
-   divide by 4 or 8.  For now hard code to 25MZ.
-*/
+/*
+ * Convert between openpic clock ticks and nanosecs.  In the hardware the clock
+ * frequency is driven by board inputs to the PIC which the PIC would then
+ * divide by 4 or 8.  For now hard code to 25MZ.
+ */
 #define OPENPIC_TIMER_FREQ_MHZ 25
 #define OPENPIC_TIMER_NS_PER_TICK (1000 / OPENPIC_TIMER_FREQ_MHZ)
 static inline uint64_t ns_to_ticks(uint64_t ns)
@@ -258,7 +255,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ,
                 __func__, src->output, n_IRQ, active, was_active,
                 dst->outputs_active[src->output]);
 
-        /* On Freescale MPIC, critical interrupts ignore priority,
+        /*
+         * On Freescale MPIC, critical interrupts ignore priority,
          * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
          * masking.
          */
@@ -281,7 +279,8 @@ static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ,
 
     priority = IVPR_PRIORITY(src->ivpr);
 
-    /* Even if the interrupt doesn't have enough priority,
+    /*
+     * Even if the interrupt doesn't have enough priority,
      * it is still raised, in case ctpr is lowered later.
      */
     if (active) {
@@ -413,7 +412,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
         }
 
         if (src->output != OPENPIC_OUTPUT_INT) {
-            /* Edge-triggered interrupts shouldn't be used
+            /*
+             * Edge-triggered interrupts shouldn't be used
              * with non-INT delivery, but just in case,
              * try to make it do something sane rather than
              * cause an interrupt storm.  This is close to
@@ -506,7 +506,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val)
 {
     uint32_t mask;
 
-    /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
+    /*
+     * NOTE when implementing newer FSL MPIC models: starting with v4.0,
      * the polarity bit is read-only on internal interrupts.
      */
     mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
@@ -516,7 +517,8 @@ static inline void write_IRQreg_ivpr(OpenPICState *opp, int n_IRQ, uint32_t val)
     opp->src[n_IRQ].ivpr =
         (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
 
-    /* For FSL internal interrupts, The sense bit is reserved and zero,
+    /*
+     * For FSL internal interrupts, The sense bit is reserved and zero,
      * and the interrupt is always level-triggered.  Timers and IPIs
      * have no sense or polarity bits, and are edge-triggered.
      */
@@ -700,16 +702,20 @@ static void qemu_timer_cb(void *opaque)
     openpic_set_irq(opp, n_IRQ, 0);
 }
 
-/* If enabled is true, arranges for an interrupt to be raised val clocks into
-   the future, if enabled is false cancels the timer. */
+/*
+ * If enabled is true, arranges for an interrupt to be raised val clocks into
+ * the future, if enabled is false cancels the timer.
+ */
 static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
 {
     uint64_t ns = ticks_to_ns(val & ~TCCR_TOG);
-    /* A count of zero causes a timer to be set to expire immediately.  This
-       effectively stops the simulation since the timer is constantly expiring
-       which prevents guest code execution, so we don't honor that
-       configuration.  On real hardware, this situation would generate an
-       interrupt on every clock cycle if the interrupt was unmasked. */
+    /*
+     * A count of zero causes a timer to be set to expire immediately.  This
+     * effectively stops the simulation since the timer is constantly expiring
+     * which prevents guest code execution, so we don't honor that
+     * configuration.  On real hardware, this situation would generate an
+     * interrupt on every clock cycle if the interrupt was unmasked.
+     */
     if ((ns == 0) || !enabled) {
         tmr->qemu_timer_active = false;
         tmr->tccr = tmr->tccr & TCCR_TOG;
@@ -722,8 +728,10 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t val, bool enabled)
     }
 }
 
-/* Returns the currrent tccr value, i.e., timer value (in clocks) with
-   appropriate TOG. */
+/*
+ * Returns the currrent tccr value, i.e., timer value (in clocks) with
+ * appropriate TOG.
+ */
 static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr)
 {
     uint64_t retval;
@@ -1277,6 +1285,15 @@ static void openpic_reset(DeviceState *d)
             break;
         }
 
+        /* Mask all IPI interrupts for Freescale OpenPIC */
+        if ((opp->model == OPENPIC_MODEL_FSL_MPIC_20) ||
+            (opp->model == OPENPIC_MODEL_FSL_MPIC_42)) {
+            if (i >= opp->irq_ipi0 && i < opp->irq_tim0) {
+                write_IRQreg_idr(opp, i, 0);
+                continue;
+            }
+        }
+
         write_IRQreg_idr(opp, i, opp->idr_reset);
     }
     /* Initialise IRQ destinations */
@@ -1305,7 +1322,7 @@ static void openpic_reset(DeviceState *d)
 typedef struct MemReg {
     const char             *name;
     MemoryRegionOps const  *ops;
-    hwaddr      start_addr;
+    hwaddr                  start_addr;
     ram_addr_t              size;
 } MemReg;
 
@@ -1556,28 +1573,6 @@ static void openpic_realize(DeviceState *dev, Error **errp)
 
         break;
 
-    case OPENPIC_MODEL_RAVEN:
-        opp->nb_irqs = RAVEN_MAX_EXT;
-        opp->vid = VID_REVISION_1_3;
-        opp->vir = VIR_GENERIC;
-        opp->vector_mask = 0xFF;
-        opp->tfrr_reset = 4160000;
-        opp->ivpr_reset = IVPR_MASK_MASK | IVPR_MODE_MASK;
-        opp->idr_reset = 0;
-        opp->max_irq = RAVEN_MAX_IRQ;
-        opp->irq_ipi0 = RAVEN_IPI_IRQ;
-        opp->irq_tim0 = RAVEN_TMR_IRQ;
-        opp->brr1 = -1;
-        opp->mpic_mode_mask = GCR_MODE_MIXED;
-
-        if (opp->nb_cpus != 1) {
-            error_setg(errp, "Only UP supported today");
-            return;
-        }
-
-        map_list(opp, list_le, &list_count);
-        break;
-
     case OPENPIC_MODEL_KEYLARGO:
         opp->nb_irqs = KEYLARGO_MAX_EXT;
         opp->vid = VID_REVISION_1_2;
diff --git a/hw/intc/openpic_kvm.c b/hw/intc/openpic_kvm.c
index e1a39e33cb1..557dd0c2bf5 100644
--- a/hw/intc/openpic_kvm.c
+++ b/hw/intc/openpic_kvm.c
@@ -24,9 +24,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include <sys/ioctl.h>
-#include "exec/address-spaces.h"
 #include "hw/ppc/openpic.h"
 #include "hw/ppc/openpic_kvm.h"
 #include "hw/pci/msi.h"
@@ -236,6 +234,7 @@ static void kvm_openpic_realize(DeviceState *dev, Error **errp)
 
     opp->mem_listener.region_add = kvm_openpic_region_add;
     opp->mem_listener.region_del = kvm_openpic_region_del;
+    opp->mem_listener.name = "openpic-kvm";
     memory_listener_register(&opp->mem_listener, &address_space_memory);
 
     /* indicate pic capabilities */
diff --git a/hw/intc/ppc-uic.c b/hw/intc/ppc-uic.c
index 7171de7b355..60013f2dde3 100644
--- a/hw/intc/ppc-uic.c
+++ b/hw/intc/ppc-uic.c
@@ -23,7 +23,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "include/hw/intc/ppc-uic.h"
+#include "hw/intc/ppc-uic.h"
 #include "hw/irq.h"
 #include "cpu.h"
 #include "hw/ppc/ppc.h"
diff --git a/hw/intc/puv3_intc.c b/hw/intc/puv3_intc.c
deleted file mode 100644
index 65226f5e7c4..00000000000
--- a/hw/intc/puv3_intc.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * INTC device simulation in PKUnity SoC
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "qom/object.h"
-
-#undef DEBUG_PUV3
-#include "hw/unicore32/puv3.h"
-#include "qemu/module.h"
-#include "qemu/log.h"
-
-#define TYPE_PUV3_INTC "puv3_intc"
-OBJECT_DECLARE_SIMPLE_TYPE(PUV3INTCState, PUV3_INTC)
-
-struct PUV3INTCState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    qemu_irq parent_irq;
-
-    uint32_t reg_ICMR;
-    uint32_t reg_ICPR;
-};
-
-/* Update interrupt status after enabled or pending bits have been changed.  */
-static void puv3_intc_update(PUV3INTCState *s)
-{
-    if (s->reg_ICMR & s->reg_ICPR) {
-        qemu_irq_raise(s->parent_irq);
-    } else {
-        qemu_irq_lower(s->parent_irq);
-    }
-}
-
-/* Process a change in an external INTC input. */
-static void puv3_intc_handler(void *opaque, int irq, int level)
-{
-    PUV3INTCState *s = opaque;
-
-    DPRINTF("irq 0x%x, level 0x%x\n", irq, level);
-    if (level) {
-        s->reg_ICPR |= (1 << irq);
-    } else {
-        s->reg_ICPR &= ~(1 << irq);
-    }
-    puv3_intc_update(s);
-}
-
-static uint64_t puv3_intc_read(void *opaque, hwaddr offset,
-        unsigned size)
-{
-    PUV3INTCState *s = opaque;
-    uint32_t ret = 0;
-
-    switch (offset) {
-    case 0x04: /* INTC_ICMR */
-        ret = s->reg_ICMR;
-        break;
-    case 0x0c: /* INTC_ICIP */
-        ret = s->reg_ICPR; /* the same value with ICPR */
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad read offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, ret);
-    return ret;
-}
-
-static void puv3_intc_write(void *opaque, hwaddr offset,
-        uint64_t value, unsigned size)
-{
-    PUV3INTCState *s = opaque;
-
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
-    switch (offset) {
-    case 0x00: /* INTC_ICLR */
-    case 0x14: /* INTC_ICCR */
-        break;
-    case 0x04: /* INTC_ICMR */
-        s->reg_ICMR = value;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad write offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-        return;
-    }
-    puv3_intc_update(s);
-}
-
-static const MemoryRegionOps puv3_intc_ops = {
-    .read = puv3_intc_read,
-    .write = puv3_intc_write,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void puv3_intc_realize(DeviceState *dev, Error **errp)
-{
-    PUV3INTCState *s = PUV3_INTC(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    qdev_init_gpio_in(dev, puv3_intc_handler, PUV3_IRQS_NR);
-    sysbus_init_irq(sbd, &s->parent_irq);
-
-    s->reg_ICMR = 0;
-    s->reg_ICPR = 0;
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &puv3_intc_ops, s, "puv3_intc",
-                          PUV3_REGS_OFFSET);
-    sysbus_init_mmio(sbd, &s->iomem);
-}
-
-static void puv3_intc_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    dc->realize = puv3_intc_realize;
-}
-
-static const TypeInfo puv3_intc_info = {
-    .name = TYPE_PUV3_INTC,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(PUV3INTCState),
-    .class_init = puv3_intc_class_init,
-};
-
-static void puv3_intc_register_type(void)
-{
-    type_register_static(&puv3_intc_info);
-}
-
-type_init(puv3_intc_register_type)
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
new file mode 100644
index 00000000000..f1a5d3d284f
--- /dev/null
+++ b/hw/intc/riscv_aclint.c
@@ -0,0 +1,460 @@
+/*
+ * RISC-V ACLINT (Advanced Core Local Interruptor)
+ * URL: https://github.com/riscv/riscv-aclint
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017 SiFive, Inc.
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This provides real-time clock, timer and interprocessor interrupts.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/sysbus.h"
+#include "target/riscv/cpu.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/riscv_aclint.h"
+#include "qemu/timer.h"
+#include "hw/irq.h"
+
+typedef struct riscv_aclint_mtimer_callback {
+    RISCVAclintMTimerState *s;
+    int num;
+} riscv_aclint_mtimer_callback;
+
+static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+        timebase_freq, NANOSECONDS_PER_SECOND);
+}
+
+/*
+ * Called when timecmp is written to update the QEMU timer or immediately
+ * trigger timer interrupt if mtimecmp <= current timer value.
+ */
+static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
+                                              RISCVCPU *cpu,
+                                              int hartid,
+                                              uint64_t value,
+                                              uint32_t timebase_freq)
+{
+    uint64_t next;
+    uint64_t diff;
+
+    uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq);
+
+    cpu->env.timecmp = value;
+    if (cpu->env.timecmp <= rtc_r) {
+        /*
+         * If we're setting an MTIMECMP value in the "past",
+         * immediately raise the timer interrupt
+         */
+        qemu_irq_raise(mtimer->timer_irqs[hartid - mtimer->hartid_base]);
+        return;
+    }
+
+    /* otherwise, set up the future timer interrupt */
+    qemu_irq_lower(mtimer->timer_irqs[hartid - mtimer->hartid_base]);
+    diff = cpu->env.timecmp - rtc_r;
+    /* back to ns (note args switched in muldiv64) */
+    uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
+
+    /*
+     * check if ns_diff overflowed and check if the addition would potentially
+     * overflow
+     */
+    if ((NANOSECONDS_PER_SECOND > timebase_freq && ns_diff < diff) ||
+        ns_diff > INT64_MAX) {
+        next = INT64_MAX;
+    } else {
+        /*
+         * as it is very unlikely qemu_clock_get_ns will return a value
+         * greater than INT64_MAX, no additional check is needed for an
+         * unsigned integer overflow.
+         */
+        next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns_diff;
+        /*
+         * if ns_diff is INT64_MAX next may still be outside the range
+         * of a signed integer.
+         */
+        next = MIN(next, INT64_MAX);
+    }
+
+    timer_mod(cpu->env.timer, next);
+}
+
+/*
+ * Callback used when the timer set using timer_mod expires.
+ * Should raise the timer interrupt line
+ */
+static void riscv_aclint_mtimer_cb(void *opaque)
+{
+    riscv_aclint_mtimer_callback *state = opaque;
+
+    qemu_irq_raise(state->s->timer_irqs[state->num]);
+}
+
+/* CPU read MTIMER register */
+static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr,
+    unsigned size)
+{
+    RISCVAclintMTimerState *mtimer = opaque;
+
+    if (addr >= mtimer->timecmp_base &&
+        addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) {
+        size_t hartid = mtimer->hartid_base +
+                        ((addr - mtimer->timecmp_base) >> 3);
+        CPUState *cpu = qemu_get_cpu(hartid);
+        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+        if (!env) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "aclint-mtimer: invalid hartid: %zu", hartid);
+        } else if ((addr & 0x7) == 0) {
+            /* timecmp_lo */
+            uint64_t timecmp = env->timecmp;
+            return timecmp & 0xFFFFFFFF;
+        } else if ((addr & 0x7) == 4) {
+            /* timecmp_hi */
+            uint64_t timecmp = env->timecmp;
+            return (timecmp >> 32) & 0xFFFFFFFF;
+        } else {
+            qemu_log_mask(LOG_UNIMP,
+                          "aclint-mtimer: invalid read: %08x", (uint32_t)addr);
+            return 0;
+        }
+    } else if (addr == mtimer->time_base) {
+        /* time_lo */
+        return cpu_riscv_read_rtc(mtimer->timebase_freq) & 0xFFFFFFFF;
+    } else if (addr == mtimer->time_base + 4) {
+        /* time_hi */
+        return (cpu_riscv_read_rtc(mtimer->timebase_freq) >> 32) & 0xFFFFFFFF;
+    }
+
+    qemu_log_mask(LOG_UNIMP,
+                  "aclint-mtimer: invalid read: %08x", (uint32_t)addr);
+    return 0;
+}
+
+/* CPU write MTIMER register */
+static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
+    uint64_t value, unsigned size)
+{
+    RISCVAclintMTimerState *mtimer = opaque;
+
+    if (addr >= mtimer->timecmp_base &&
+        addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) {
+        size_t hartid = mtimer->hartid_base +
+                        ((addr - mtimer->timecmp_base) >> 3);
+        CPUState *cpu = qemu_get_cpu(hartid);
+        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+        if (!env) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "aclint-mtimer: invalid hartid: %zu", hartid);
+        } else if ((addr & 0x7) == 0) {
+            /* timecmp_lo */
+            uint64_t timecmp_hi = env->timecmp >> 32;
+            riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
+                timecmp_hi << 32 | (value & 0xFFFFFFFF),
+                mtimer->timebase_freq);
+            return;
+        } else if ((addr & 0x7) == 4) {
+            /* timecmp_hi */
+            uint64_t timecmp_lo = env->timecmp;
+            riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu), hartid,
+                value << 32 | (timecmp_lo & 0xFFFFFFFF),
+                mtimer->timebase_freq);
+        } else {
+            qemu_log_mask(LOG_UNIMP,
+                          "aclint-mtimer: invalid timecmp write: %08x",
+                          (uint32_t)addr);
+        }
+        return;
+    } else if (addr == mtimer->time_base) {
+        /* time_lo */
+        qemu_log_mask(LOG_UNIMP,
+                      "aclint-mtimer: time_lo write not implemented");
+        return;
+    } else if (addr == mtimer->time_base + 4) {
+        /* time_hi */
+        qemu_log_mask(LOG_UNIMP,
+                      "aclint-mtimer: time_hi write not implemented");
+        return;
+    }
+
+    qemu_log_mask(LOG_UNIMP,
+                  "aclint-mtimer: invalid write: %08x", (uint32_t)addr);
+}
+
+static const MemoryRegionOps riscv_aclint_mtimer_ops = {
+    .read = riscv_aclint_mtimer_read,
+    .write = riscv_aclint_mtimer_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 8
+    }
+};
+
+static Property riscv_aclint_mtimer_properties[] = {
+    DEFINE_PROP_UINT32("hartid-base", RISCVAclintMTimerState,
+        hartid_base, 0),
+    DEFINE_PROP_UINT32("num-harts", RISCVAclintMTimerState, num_harts, 1),
+    DEFINE_PROP_UINT32("timecmp-base", RISCVAclintMTimerState,
+        timecmp_base, RISCV_ACLINT_DEFAULT_MTIMECMP),
+    DEFINE_PROP_UINT32("time-base", RISCVAclintMTimerState,
+        time_base, RISCV_ACLINT_DEFAULT_MTIME),
+    DEFINE_PROP_UINT32("aperture-size", RISCVAclintMTimerState,
+        aperture_size, RISCV_ACLINT_DEFAULT_MTIMER_SIZE),
+    DEFINE_PROP_UINT32("timebase-freq", RISCVAclintMTimerState,
+        timebase_freq, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp)
+{
+    RISCVAclintMTimerState *s = RISCV_ACLINT_MTIMER(dev);
+    int i;
+
+    memory_region_init_io(&s->mmio, OBJECT(dev), &riscv_aclint_mtimer_ops,
+                          s, TYPE_RISCV_ACLINT_MTIMER, s->aperture_size);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
+
+    s->timer_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts);
+    qdev_init_gpio_out(dev, s->timer_irqs, s->num_harts);
+
+    /* Claim timer interrupt bits */
+    for (i = 0; i < s->num_harts; i++) {
+        RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i));
+        if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) {
+            error_report("MTIP already claimed");
+            exit(1);
+        }
+    }
+}
+
+static void riscv_aclint_mtimer_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = riscv_aclint_mtimer_realize;
+    device_class_set_props(dc, riscv_aclint_mtimer_properties);
+}
+
+static const TypeInfo riscv_aclint_mtimer_info = {
+    .name          = TYPE_RISCV_ACLINT_MTIMER,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(RISCVAclintMTimerState),
+    .class_init    = riscv_aclint_mtimer_class_init,
+};
+
+/*
+ * Create ACLINT MTIMER device.
+ */
+DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size,
+    uint32_t hartid_base, uint32_t num_harts,
+    uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq,
+    bool provide_rdtime)
+{
+    int i;
+    DeviceState *dev = qdev_new(TYPE_RISCV_ACLINT_MTIMER);
+
+    assert(num_harts <= RISCV_ACLINT_MAX_HARTS);
+    assert(!(addr & 0x7));
+    assert(!(timecmp_base & 0x7));
+    assert(!(time_base & 0x7));
+
+    qdev_prop_set_uint32(dev, "hartid-base", hartid_base);
+    qdev_prop_set_uint32(dev, "num-harts", num_harts);
+    qdev_prop_set_uint32(dev, "timecmp-base", timecmp_base);
+    qdev_prop_set_uint32(dev, "time-base", time_base);
+    qdev_prop_set_uint32(dev, "aperture-size", size);
+    qdev_prop_set_uint32(dev, "timebase-freq", timebase_freq);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+
+    for (i = 0; i < num_harts; i++) {
+        CPUState *cpu = qemu_get_cpu(hartid_base + i);
+        RISCVCPU *rvcpu = RISCV_CPU(cpu);
+        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+        riscv_aclint_mtimer_callback *cb =
+            g_malloc0(sizeof(riscv_aclint_mtimer_callback));
+
+        if (!env) {
+            g_free(cb);
+            continue;
+        }
+        if (provide_rdtime) {
+            riscv_cpu_set_rdtime_fn(env, cpu_riscv_read_rtc, timebase_freq);
+        }
+
+        cb->s = RISCV_ACLINT_MTIMER(dev);
+        cb->num = i;
+        env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                  &riscv_aclint_mtimer_cb, cb);
+        env->timecmp = 0;
+
+        qdev_connect_gpio_out(dev, i,
+                              qdev_get_gpio_in(DEVICE(rvcpu), IRQ_M_TIMER));
+    }
+
+    return dev;
+}
+
+/* CPU read [M|S]SWI register */
+static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr,
+    unsigned size)
+{
+    RISCVAclintSwiState *swi = opaque;
+
+    if (addr < (swi->num_harts << 2)) {
+        size_t hartid = swi->hartid_base + (addr >> 2);
+        CPUState *cpu = qemu_get_cpu(hartid);
+        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+        if (!env) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "aclint-swi: invalid hartid: %zu", hartid);
+        } else if ((addr & 0x3) == 0) {
+            return (swi->sswi) ? 0 : ((env->mip & MIP_MSIP) > 0);
+        }
+    }
+
+    qemu_log_mask(LOG_UNIMP,
+                  "aclint-swi: invalid read: %08x", (uint32_t)addr);
+    return 0;
+}
+
+/* CPU write [M|S]SWI register */
+static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value,
+        unsigned size)
+{
+    RISCVAclintSwiState *swi = opaque;
+
+    if (addr < (swi->num_harts << 2)) {
+        size_t hartid = swi->hartid_base + (addr >> 2);
+        CPUState *cpu = qemu_get_cpu(hartid);
+        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
+        if (!env) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "aclint-swi: invalid hartid: %zu", hartid);
+        } else if ((addr & 0x3) == 0) {
+            if (value & 0x1) {
+                qemu_irq_raise(swi->soft_irqs[hartid - swi->hartid_base]);
+            } else {
+                if (!swi->sswi) {
+                    qemu_irq_lower(swi->soft_irqs[hartid - swi->hartid_base]);
+                }
+            }
+            return;
+        }
+    }
+
+    qemu_log_mask(LOG_UNIMP,
+                  "aclint-swi: invalid write: %08x", (uint32_t)addr);
+}
+
+static const MemoryRegionOps riscv_aclint_swi_ops = {
+    .read = riscv_aclint_swi_read,
+    .write = riscv_aclint_swi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4
+    }
+};
+
+static Property riscv_aclint_swi_properties[] = {
+    DEFINE_PROP_UINT32("hartid-base", RISCVAclintSwiState, hartid_base, 0),
+    DEFINE_PROP_UINT32("num-harts", RISCVAclintSwiState, num_harts, 1),
+    DEFINE_PROP_UINT32("sswi", RISCVAclintSwiState, sswi, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void riscv_aclint_swi_realize(DeviceState *dev, Error **errp)
+{
+    RISCVAclintSwiState *swi = RISCV_ACLINT_SWI(dev);
+    int i;
+
+    memory_region_init_io(&swi->mmio, OBJECT(dev), &riscv_aclint_swi_ops, swi,
+                          TYPE_RISCV_ACLINT_SWI, RISCV_ACLINT_SWI_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &swi->mmio);
+
+    swi->soft_irqs = g_malloc(sizeof(qemu_irq) * swi->num_harts);
+    qdev_init_gpio_out(dev, swi->soft_irqs, swi->num_harts);
+
+    /* Claim software interrupt bits */
+    for (i = 0; i < swi->num_harts; i++) {
+        RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(swi->hartid_base + i));
+        /* We don't claim mip.SSIP because it is writeable by software */
+        if (riscv_cpu_claim_interrupts(cpu, swi->sswi ? 0 : MIP_MSIP) < 0) {
+            error_report("MSIP already claimed");
+            exit(1);
+        }
+    }
+}
+
+static void riscv_aclint_swi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = riscv_aclint_swi_realize;
+    device_class_set_props(dc, riscv_aclint_swi_properties);
+}
+
+static const TypeInfo riscv_aclint_swi_info = {
+    .name          = TYPE_RISCV_ACLINT_SWI,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(RISCVAclintSwiState),
+    .class_init    = riscv_aclint_swi_class_init,
+};
+
+/*
+ * Create ACLINT [M|S]SWI device.
+ */
+DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
+    uint32_t num_harts, bool sswi)
+{
+    int i;
+    DeviceState *dev = qdev_new(TYPE_RISCV_ACLINT_SWI);
+
+    assert(num_harts <= RISCV_ACLINT_MAX_HARTS);
+    assert(!(addr & 0x3));
+
+    qdev_prop_set_uint32(dev, "hartid-base", hartid_base);
+    qdev_prop_set_uint32(dev, "num-harts", num_harts);
+    qdev_prop_set_uint32(dev, "sswi", sswi ? true : false);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+
+    for (i = 0; i < num_harts; i++) {
+        CPUState *cpu = qemu_get_cpu(hartid_base + i);
+        RISCVCPU *rvcpu = RISCV_CPU(cpu);
+
+        qdev_connect_gpio_out(dev, i,
+                              qdev_get_gpio_in(DEVICE(rvcpu),
+                                  (sswi) ? IRQ_S_SOFT : IRQ_M_SOFT));
+    }
+
+    return dev;
+}
+
+static void riscv_aclint_register_types(void)
+{
+    type_register_static(&riscv_aclint_mtimer_info);
+    type_register_static(&riscv_aclint_swi_info);
+}
+
+type_init(riscv_aclint_register_types)
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index aacdb1bbc26..74e02858d43 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -20,7 +20,6 @@
 #include "hw/qdev-properties.h"
 #include "hw/s390x/css.h"
 #include "trace.h"
-#include "cpu.h"
 #include "qapi/error.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index b3fb9f83952..efe5054182c 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -11,13 +11,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
-#include "kvm_s390x.h"
+#include "kvm/kvm_s390x.h"
 #include <sys/ioctl.h>
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "hw/sysbus.h"
 #include "sysemu/kvm.h"
 #include "hw/s390x/s390_flic.h"
 #include "hw/s390x/adapter.h"
diff --git a/hw/intc/sh_intc.c b/hw/intc/sh_intc.c
index 72a55e32dd4..c9b0b0c1ecc 100644
--- a/hw/intc/sh_intc.c
+++ b/hw/intc/sh_intc.c
@@ -9,40 +9,37 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/sh4/sh_intc.h"
 #include "hw/irq.h"
 #include "hw/sh4/sh.h"
-
-//#define DEBUG_INTC
-//#define DEBUG_INTC_SOURCES
-
-#define INTC_A7(x) ((x) & 0x1fffffff)
+#include "trace.h"
 
 void sh_intc_toggle_source(struct intc_source *source,
-			   int enable_adj, int assert_adj)
+                           int enable_adj, int assert_adj)
 {
     int enable_changed = 0;
     int pending_changed = 0;
     int old_pending;
 
-    if ((source->enable_count == source->enable_max) && (enable_adj == -1))
+    if (source->enable_count == source->enable_max && enable_adj == -1) {
         enable_changed = -1;
-
+    }
     source->enable_count += enable_adj;
 
-    if (source->enable_count == source->enable_max)
+    if (source->enable_count == source->enable_max) {
         enable_changed = 1;
-
+    }
     source->asserted += assert_adj;
 
     old_pending = source->pending;
     source->pending = source->asserted &&
       (source->enable_count == source->enable_max);
 
-    if (old_pending != source->pending)
+    if (old_pending != source->pending) {
         pending_changed = 1;
-
+    }
     if (pending_changed) {
         if (source->pending) {
             source->parent->pending++;
@@ -54,35 +51,30 @@ void sh_intc_toggle_source(struct intc_source *source,
             if (source->parent->pending == 0) {
                 cpu_reset_interrupt(first_cpu, CPU_INTERRUPT_HARD);
             }
-	}
+        }
     }
 
-  if (enable_changed || assert_adj || pending_changed) {
-#ifdef DEBUG_INTC_SOURCES
-            printf("sh_intc: (%d/%d/%d/%d) interrupt source 0x%x %s%s%s\n",
-		   source->parent->pending,
-		   source->asserted,
-		   source->enable_count,
-		   source->enable_max,
-		   source->vect,
-		   source->asserted ? "asserted " :
-		   assert_adj ? "deasserted" : "",
-		   enable_changed == 1 ? "enabled " :
-		   enable_changed == -1 ? "disabled " : "",
-		   source->pending ? "pending" : "");
-#endif
-  }
+    if (enable_changed || assert_adj || pending_changed) {
+        trace_sh_intc_sources(source->parent->pending, source->asserted,
+                              source->enable_count, source->enable_max,
+                              source->vect, source->asserted ? "asserted " :
+                              assert_adj ? "deasserted" : "",
+                              enable_changed == 1 ? "enabled " :
+                              enable_changed == -1 ? "disabled " : "",
+                              source->pending ? "pending" : "");
+    }
 }
 
-static void sh_intc_set_irq (void *opaque, int n, int level)
+static void sh_intc_set_irq(void *opaque, int n, int level)
 {
-  struct intc_desc *desc = opaque;
-  struct intc_source *source = &(desc->sources[n]);
+    struct intc_desc *desc = opaque;
+    struct intc_source *source = &desc->sources[n];
 
-  if (level && !source->asserted)
-    sh_intc_toggle_source(source, 0, 1);
-  else if (!level && source->asserted)
-    sh_intc_toggle_source(source, 0, -1);
+    if (level && !source->asserted) {
+        sh_intc_toggle_source(source, 0, 1);
+    } else if (!level && source->asserted) {
+        sh_intc_toggle_source(source, 0, -1);
+    }
 }
 
 int sh_intc_get_pending_vector(struct intc_desc *desc, int imask)
@@ -97,147 +89,124 @@ int sh_intc_get_pending_vector(struct intc_desc *desc, int imask)
     }
 
     for (i = 0; i < desc->nr_sources; i++) {
-        struct intc_source *source = desc->sources + i;
+        struct intc_source *source = &desc->sources[i];
 
-	if (source->pending) {
-#ifdef DEBUG_INTC_SOURCES
-            printf("sh_intc: (%d) returning interrupt source 0x%x\n",
-		   desc->pending, source->vect);
-#endif
+        if (source->pending) {
+            trace_sh_intc_pending(desc->pending, source->vect);
             return source->vect;
-	}
+        }
     }
-
-    abort();
+    g_assert_not_reached();
 }
 
-#define INTC_MODE_NONE       0
-#define INTC_MODE_DUAL_SET   1
-#define INTC_MODE_DUAL_CLR   2
-#define INTC_MODE_ENABLE_REG 3
-#define INTC_MODE_MASK_REG   4
-#define INTC_MODE_IS_PRIO    8
-
-static unsigned int sh_intc_mode(unsigned long address,
-				 unsigned long set_reg, unsigned long clr_reg)
+typedef enum {
+    INTC_MODE_NONE,
+    INTC_MODE_DUAL_SET,
+    INTC_MODE_DUAL_CLR,
+    INTC_MODE_ENABLE_REG,
+    INTC_MODE_MASK_REG,
+} SHIntCMode;
+#define INTC_MODE_IS_PRIO 0x80
+
+static SHIntCMode sh_intc_mode(unsigned long address, unsigned long set_reg,
+                               unsigned long clr_reg)
 {
-    if ((address != INTC_A7(set_reg)) &&
-	(address != INTC_A7(clr_reg)))
+    if (address != A7ADDR(set_reg) && address != A7ADDR(clr_reg)) {
         return INTC_MODE_NONE;
-
+    }
     if (set_reg && clr_reg) {
-        if (address == INTC_A7(set_reg))
-            return INTC_MODE_DUAL_SET;
-	else
-            return INTC_MODE_DUAL_CLR;
+        return address == A7ADDR(set_reg) ?
+               INTC_MODE_DUAL_SET : INTC_MODE_DUAL_CLR;
     }
-
-    if (set_reg)
-        return INTC_MODE_ENABLE_REG;
-    else
-        return INTC_MODE_MASK_REG;
+    return set_reg ? INTC_MODE_ENABLE_REG : INTC_MODE_MASK_REG;
 }
 
 static void sh_intc_locate(struct intc_desc *desc,
-			   unsigned long address,
-			   unsigned long **datap,
-			   intc_enum **enums,
-			   unsigned int *first,
-			   unsigned int *width,
-			   unsigned int *modep)
+                           unsigned long address,
+                           unsigned long **datap,
+                           intc_enum **enums,
+                           unsigned int *first,
+                           unsigned int *width,
+                           unsigned int *modep)
 {
-    unsigned int i, mode;
+    SHIntCMode mode;
+    unsigned int i;
 
     /* this is slow but works for now */
 
     if (desc->mask_regs) {
         for (i = 0; i < desc->nr_mask_regs; i++) {
-	    struct intc_mask_reg *mr = desc->mask_regs + i;
-
-	    mode = sh_intc_mode(address, mr->set_reg, mr->clr_reg);
-	    if (mode == INTC_MODE_NONE)
-                continue;
-
-	    *modep = mode;
-	    *datap = &mr->value;
-	    *enums = mr->enum_ids;
-	    *first = mr->reg_width - 1;
-	    *width = 1;
-	    return;
-	}
+            struct intc_mask_reg *mr = &desc->mask_regs[i];
+
+            mode = sh_intc_mode(address, mr->set_reg, mr->clr_reg);
+            if (mode != INTC_MODE_NONE) {
+                *modep = mode;
+                *datap = &mr->value;
+                *enums = mr->enum_ids;
+                *first = mr->reg_width - 1;
+                *width = 1;
+                return;
+            }
+        }
     }
 
     if (desc->prio_regs) {
         for (i = 0; i < desc->nr_prio_regs; i++) {
-	    struct intc_prio_reg *pr = desc->prio_regs + i;
-
-	    mode = sh_intc_mode(address, pr->set_reg, pr->clr_reg);
-	    if (mode == INTC_MODE_NONE)
-                continue;
-
-	    *modep = mode | INTC_MODE_IS_PRIO;
-	    *datap = &pr->value;
-	    *enums = pr->enum_ids;
-	    *first = (pr->reg_width / pr->field_width) - 1;
-	    *width = pr->field_width;
-	    return;
-	}
+            struct intc_prio_reg *pr = &desc->prio_regs[i];
+
+            mode = sh_intc_mode(address, pr->set_reg, pr->clr_reg);
+            if (mode != INTC_MODE_NONE) {
+                *modep = mode | INTC_MODE_IS_PRIO;
+                *datap = &pr->value;
+                *enums = pr->enum_ids;
+                *first = pr->reg_width / pr->field_width - 1;
+                *width = pr->field_width;
+                return;
+            }
+        }
     }
-
-    abort();
+    g_assert_not_reached();
 }
 
 static void sh_intc_toggle_mask(struct intc_desc *desc, intc_enum id,
-				int enable, int is_group)
+                                int enable, int is_group)
 {
-    struct intc_source *source = desc->sources + id;
-
-    if (!id)
-	return;
+    struct intc_source *source = &desc->sources[id];
 
+    if (!id) {
+        return;
+    }
     if (!source->next_enum_id && (!source->enable_max || !source->vect)) {
-#ifdef DEBUG_INTC_SOURCES
-        printf("sh_intc: reserved interrupt source %d modified\n", id);
-#endif
-	return;
+        qemu_log_mask(LOG_UNIMP,
+                      "sh_intc: reserved interrupt source %d modified\n", id);
+        return;
     }
 
-    if (source->vect)
+    if (source->vect) {
         sh_intc_toggle_source(source, enable ? 1 : -1, 0);
-
-#ifdef DEBUG_INTC
-    else {
-        printf("setting interrupt group %d to %d\n", id, !!enable);
     }
-#endif
 
     if ((is_group || !source->vect) && source->next_enum_id) {
         sh_intc_toggle_mask(desc, source->next_enum_id, enable, 1);
     }
 
-#ifdef DEBUG_INTC
     if (!source->vect) {
-        printf("setting interrupt group %d to %d - done\n", id, !!enable);
+        trace_sh_intc_set(id, !!enable);
     }
-#endif
 }
 
-static uint64_t sh_intc_read(void *opaque, hwaddr offset,
-                             unsigned size)
+static uint64_t sh_intc_read(void *opaque, hwaddr offset, unsigned size)
 {
     struct intc_desc *desc = opaque;
-    intc_enum *enum_ids = NULL;
-    unsigned int first = 0;
-    unsigned int width = 0;
-    unsigned int mode = 0;
+    intc_enum *enum_ids;
+    unsigned int first;
+    unsigned int width;
+    unsigned int mode;
     unsigned long *valuep;
 
-#ifdef DEBUG_INTC
-    printf("sh_intc_read 0x%lx\n", (unsigned long) offset);
-#endif
-
-    sh_intc_locate(desc, (unsigned long)offset, &valuep, 
-		   &enum_ids, &first, &width, &mode);
+    sh_intc_locate(desc, (unsigned long)offset, &valuep,
+                   &enum_ids, &first, &width, &mode);
+    trace_sh_intc_read(size, (uint64_t)offset, *valuep);
     return *valuep;
 }
 
@@ -245,45 +214,40 @@ static void sh_intc_write(void *opaque, hwaddr offset,
                           uint64_t value, unsigned size)
 {
     struct intc_desc *desc = opaque;
-    intc_enum *enum_ids = NULL;
-    unsigned int first = 0;
-    unsigned int width = 0;
-    unsigned int mode = 0;
-    unsigned int k;
+    intc_enum *enum_ids;
+    unsigned int first;
+    unsigned int width;
+    unsigned int mode;
     unsigned long *valuep;
+    unsigned int k;
     unsigned long mask;
 
-#ifdef DEBUG_INTC
-    printf("sh_intc_write 0x%lx 0x%08x\n", (unsigned long) offset, value);
-#endif
-
-    sh_intc_locate(desc, (unsigned long)offset, &valuep, 
-		   &enum_ids, &first, &width, &mode);
-
+    trace_sh_intc_write(size, (uint64_t)offset, value);
+    sh_intc_locate(desc, (unsigned long)offset, &valuep,
+                   &enum_ids, &first, &width, &mode);
     switch (mode) {
-    case INTC_MODE_ENABLE_REG | INTC_MODE_IS_PRIO: break;
-    case INTC_MODE_DUAL_SET: value |= *valuep; break;
-    case INTC_MODE_DUAL_CLR: value = *valuep & ~value; break;
-    default: abort();
+    case INTC_MODE_ENABLE_REG | INTC_MODE_IS_PRIO:
+        break;
+    case INTC_MODE_DUAL_SET:
+        value |= *valuep;
+        break;
+    case INTC_MODE_DUAL_CLR:
+        value = *valuep & ~value;
+        break;
+    default:
+        g_assert_not_reached();
     }
 
     for (k = 0; k <= first; k++) {
-        mask = ((1 << width) - 1) << ((first - k) * width);
-
-	if ((*valuep & mask) == (value & mask))
-            continue;
-#if 0
-	printf("k = %d, first = %d, enum = %d, mask = 0x%08x\n", 
-	       k, first, enum_ids[k], (unsigned int)mask);
-#endif
-        sh_intc_toggle_mask(desc, enum_ids[k], value & mask, 0);
+        mask = (1 << width) - 1;
+        mask <<= (first - k) * width;
+
+        if ((*valuep & mask) != (value & mask)) {
+            sh_intc_toggle_mask(desc, enum_ids[k], value & mask, 0);
+        }
     }
 
     *valuep = value;
-
-#ifdef DEBUG_INTC
-    printf("sh_intc_write 0x%lx -> 0x%08x\n", (unsigned long) offset, value);
-#endif
 }
 
 static const MemoryRegionOps sh_intc_ops = {
@@ -292,155 +256,136 @@ static const MemoryRegionOps sh_intc_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-struct intc_source *sh_intc_source(struct intc_desc *desc, intc_enum id)
-{
-    if (id)
-        return desc->sources + id;
-
-    return NULL;
-}
-
-static unsigned int sh_intc_register(MemoryRegion *sysmem,
-                             struct intc_desc *desc,
-                             const unsigned long address,
-                             const char *type,
-                             const char *action,
-                             const unsigned int index)
-{
-    char name[60];
-    MemoryRegion *iomem, *iomem_p4, *iomem_a7;
-
-    if (!address) {
-        return 0;
-    }
-
-    iomem = &desc->iomem;
-    iomem_p4 = desc->iomem_aliases + index;
-    iomem_a7 = iomem_p4 + 1;
-
-#define SH_INTC_IOMEM_FORMAT "interrupt-controller-%s-%s-%s"
-    snprintf(name, sizeof(name), SH_INTC_IOMEM_FORMAT, type, action, "p4");
-    memory_region_init_alias(iomem_p4, NULL, name, iomem, INTC_A7(address), 4);
-    memory_region_add_subregion(sysmem, P4ADDR(address), iomem_p4);
-
-    snprintf(name, sizeof(name), SH_INTC_IOMEM_FORMAT, type, action, "a7");
-    memory_region_init_alias(iomem_a7, NULL, name, iomem, INTC_A7(address), 4);
-    memory_region_add_subregion(sysmem, A7ADDR(address), iomem_a7);
-#undef SH_INTC_IOMEM_FORMAT
-
-    /* used to increment aliases index */
-    return 2;
-}
-
 static void sh_intc_register_source(struct intc_desc *desc,
-				    intc_enum source,
-				    struct intc_group *groups,
-				    int nr_groups)
+                                    intc_enum source,
+                                    struct intc_group *groups,
+                                    int nr_groups)
 {
     unsigned int i, k;
-    struct intc_source *s;
+    intc_enum id;
 
     if (desc->mask_regs) {
         for (i = 0; i < desc->nr_mask_regs; i++) {
-	    struct intc_mask_reg *mr = desc->mask_regs + i;
-
-	    for (k = 0; k < ARRAY_SIZE(mr->enum_ids); k++) {
-                if (mr->enum_ids[k] != source)
-                    continue;
+            struct intc_mask_reg *mr = &desc->mask_regs[i];
 
-		s = sh_intc_source(desc, mr->enum_ids[k]);
-		if (s)
-                    s->enable_max++;
-	    }
-	}
+            for (k = 0; k < ARRAY_SIZE(mr->enum_ids); k++) {
+                id = mr->enum_ids[k];
+                if (id && id == source) {
+                    desc->sources[id].enable_max++;
+                }
+            }
+        }
     }
 
     if (desc->prio_regs) {
         for (i = 0; i < desc->nr_prio_regs; i++) {
-	    struct intc_prio_reg *pr = desc->prio_regs + i;
-
-	    for (k = 0; k < ARRAY_SIZE(pr->enum_ids); k++) {
-                if (pr->enum_ids[k] != source)
-                    continue;
+            struct intc_prio_reg *pr = &desc->prio_regs[i];
 
-		s = sh_intc_source(desc, pr->enum_ids[k]);
-		if (s)
-                    s->enable_max++;
-	    }
-	}
+            for (k = 0; k < ARRAY_SIZE(pr->enum_ids); k++) {
+                id = pr->enum_ids[k];
+                if (id && id == source) {
+                    desc->sources[id].enable_max++;
+                }
+            }
+        }
     }
 
     if (groups) {
         for (i = 0; i < nr_groups; i++) {
-	    struct intc_group *gr = groups + i;
+            struct intc_group *gr = &groups[i];
 
-	    for (k = 0; k < ARRAY_SIZE(gr->enum_ids); k++) {
-                if (gr->enum_ids[k] != source)
-                    continue;
-
-		s = sh_intc_source(desc, gr->enum_ids[k]);
-		if (s)
-                    s->enable_max++;
-	    }
-	}
+            for (k = 0; k < ARRAY_SIZE(gr->enum_ids); k++) {
+                id = gr->enum_ids[k];
+                if (id && id == source) {
+                    desc->sources[id].enable_max++;
+                }
+            }
+        }
     }
 
 }
 
 void sh_intc_register_sources(struct intc_desc *desc,
-			      struct intc_vect *vectors,
-			      int nr_vectors,
-			      struct intc_group *groups,
-			      int nr_groups)
+                              struct intc_vect *vectors,
+                              int nr_vectors,
+                              struct intc_group *groups,
+                              int nr_groups)
 {
     unsigned int i, k;
+    intc_enum id;
     struct intc_source *s;
 
     for (i = 0; i < nr_vectors; i++) {
-	struct intc_vect *vect = vectors + i;
+        struct intc_vect *vect = &vectors[i];
 
-	sh_intc_register_source(desc, vect->enum_id, groups, nr_groups);
-	s = sh_intc_source(desc, vect->enum_id);
-        if (s) {
+        sh_intc_register_source(desc, vect->enum_id, groups, nr_groups);
+        id = vect->enum_id;
+        if (id) {
+            s = &desc->sources[id];
             s->vect = vect->vect;
-
-#ifdef DEBUG_INTC_SOURCES
-            printf("sh_intc: registered source %d -> 0x%04x (%d/%d)\n",
-                   vect->enum_id, s->vect, s->enable_count, s->enable_max);
-#endif
+            trace_sh_intc_register("source", vect->enum_id, s->vect,
+                                   s->enable_count, s->enable_max);
         }
     }
 
     if (groups) {
         for (i = 0; i < nr_groups; i++) {
-	    struct intc_group *gr = groups + i;
+            struct intc_group *gr = &groups[i];
+
+            id = gr->enum_id;
+            s = &desc->sources[id];
+            s->next_enum_id = gr->enum_ids[0];
+
+            for (k = 1; k < ARRAY_SIZE(gr->enum_ids); k++) {
+                if (gr->enum_ids[k]) {
+                    id = gr->enum_ids[k - 1];
+                    s = &desc->sources[id];
+                    s->next_enum_id = gr->enum_ids[k];
+                }
+            }
+            trace_sh_intc_register("group", gr->enum_id, 0xffff,
+                                   s->enable_count, s->enable_max);
+        }
+    }
+}
+
+static unsigned int sh_intc_register(MemoryRegion *sysmem,
+                                     struct intc_desc *desc,
+                                     const unsigned long address,
+                                     const char *type,
+                                     const char *action,
+                                     const unsigned int index)
+{
+    char name[60];
+    MemoryRegion *iomem, *iomem_p4, *iomem_a7;
 
-	    s = sh_intc_source(desc, gr->enum_id);
-	    s->next_enum_id = gr->enum_ids[0];
+    if (!address) {
+        return 0;
+    }
 
-	    for (k = 1; k < ARRAY_SIZE(gr->enum_ids); k++) {
-                if (!gr->enum_ids[k])
-                    continue;
+    iomem = &desc->iomem;
+    iomem_p4 = &desc->iomem_aliases[index];
+    iomem_a7 = iomem_p4 + 1;
 
-		s = sh_intc_source(desc, gr->enum_ids[k - 1]);
-		s->next_enum_id = gr->enum_ids[k];
-	    }
+    snprintf(name, sizeof(name), "intc-%s-%s-%s", type, action, "p4");
+    memory_region_init_alias(iomem_p4, NULL, name, iomem, A7ADDR(address), 4);
+    memory_region_add_subregion(sysmem, P4ADDR(address), iomem_p4);
 
-#ifdef DEBUG_INTC_SOURCES
-	    printf("sh_intc: registered group %d (%d/%d)\n",
-		   gr->enum_id, s->enable_count, s->enable_max);
-#endif
-	}
-    }
+    snprintf(name, sizeof(name), "intc-%s-%s-%s", type, action, "a7");
+    memory_region_init_alias(iomem_a7, NULL, name, iomem, A7ADDR(address), 4);
+    memory_region_add_subregion(sysmem, A7ADDR(address), iomem_a7);
+
+    /* used to increment aliases index */
+    return 2;
 }
 
 int sh_intc_init(MemoryRegion *sysmem,
-         struct intc_desc *desc,
-		 int nr_sources,
-		 struct intc_mask_reg *mask_regs,
-		 int nr_mask_regs,
-		 struct intc_prio_reg *prio_regs,
-		 int nr_prio_regs)
+                 struct intc_desc *desc,
+                 int nr_sources,
+                 struct intc_mask_reg *mask_regs,
+                 int nr_mask_regs,
+                 struct intc_prio_reg *prio_regs,
+                 int nr_prio_regs)
 {
     unsigned int i, j;
 
@@ -450,65 +395,55 @@ int sh_intc_init(MemoryRegion *sysmem,
     desc->nr_mask_regs = nr_mask_regs;
     desc->prio_regs = prio_regs;
     desc->nr_prio_regs = nr_prio_regs;
-    /* Allocate 4 MemoryRegions per register (2 actions * 2 aliases).
-     **/
+    /* Allocate 4 MemoryRegions per register (2 actions * 2 aliases) */
     desc->iomem_aliases = g_new0(MemoryRegion,
                                  (nr_mask_regs + nr_prio_regs) * 4);
-
-    j = 0;
-    i = sizeof(struct intc_source) * nr_sources;
-    desc->sources = g_malloc0(i);
-
-    for (i = 0; i < desc->nr_sources; i++) {
-        struct intc_source *source = desc->sources + i;
-
-        source->parent = desc;
+    desc->sources = g_new0(struct intc_source, nr_sources);
+    for (i = 0; i < nr_sources; i++) {
+        desc->sources[i].parent = desc;
     }
-
     desc->irqs = qemu_allocate_irqs(sh_intc_set_irq, desc, nr_sources);
- 
-    memory_region_init_io(&desc->iomem, NULL, &sh_intc_ops, desc,
-                          "interrupt-controller", 0x100000000ULL);
-
-#define INT_REG_PARAMS(reg_struct, type, action, j) \
-        reg_struct->action##_reg, #type, #action, j
+    memory_region_init_io(&desc->iomem, NULL, &sh_intc_ops, desc, "intc",
+                          0x100000000ULL);
+    j = 0;
     if (desc->mask_regs) {
         for (i = 0; i < desc->nr_mask_regs; i++) {
-	    struct intc_mask_reg *mr = desc->mask_regs + i;
+            struct intc_mask_reg *mr = &desc->mask_regs[i];
 
-            j += sh_intc_register(sysmem, desc,
-                                  INT_REG_PARAMS(mr, mask, set, j));
-            j += sh_intc_register(sysmem, desc,
-                                  INT_REG_PARAMS(mr, mask, clr, j));
-	}
+            j += sh_intc_register(sysmem, desc, mr->set_reg, "mask", "set", j);
+            j += sh_intc_register(sysmem, desc, mr->clr_reg, "mask", "clr", j);
+        }
     }
 
     if (desc->prio_regs) {
         for (i = 0; i < desc->nr_prio_regs; i++) {
-	    struct intc_prio_reg *pr = desc->prio_regs + i;
+            struct intc_prio_reg *pr = &desc->prio_regs[i];
 
-            j += sh_intc_register(sysmem, desc,
-                                  INT_REG_PARAMS(pr, prio, set, j));
-            j += sh_intc_register(sysmem, desc,
-                                  INT_REG_PARAMS(pr, prio, clr, j));
-	}
+            j += sh_intc_register(sysmem, desc, pr->set_reg, "prio", "set", j);
+            j += sh_intc_register(sysmem, desc, pr->clr_reg, "prio", "clr", j);
+        }
     }
-#undef INT_REG_PARAMS
 
     return 0;
 }
 
-/* Assert level <n> IRL interrupt. 
-   0:deassert. 1:lowest priority,... 15:highest priority. */
+/*
+ * Assert level <n> IRL interrupt.
+ * 0:deassert. 1:lowest priority,... 15:highest priority
+ */
 void sh_intc_set_irl(void *opaque, int n, int level)
 {
     struct intc_source *s = opaque;
     int i, irl = level ^ 15;
-    for (i = 0; (s = sh_intc_source(s->parent, s->next_enum_id)); i++) {
-	if (i == irl)
-	    sh_intc_toggle_source(s, s->enable_count?0:1, s->asserted?0:1);
-	else
-	    if (s->asserted)
-	        sh_intc_toggle_source(s, 0, -1);
+    intc_enum id = s->next_enum_id;
+
+    for (i = 0; id; id = s->next_enum_id, i++) {
+        s = &s->parent->sources[id];
+        if (i == irl) {
+            sh_intc_toggle_source(s, s->enable_count ? 0 : 1,
+                                  s->asserted ? 0 : 1);
+        } else if (s->asserted) {
+            sh_intc_toggle_source(s, 0, -1);
+        }
     }
 }
diff --git a/hw/intc/sifive_clint.c b/hw/intc/sifive_clint.c
deleted file mode 100644
index 0f41e5ea1ce..00000000000
--- a/hw/intc/sifive_clint.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * SiFive CLINT (Core Local Interruptor)
- *
- * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
- * Copyright (c) 2017 SiFive, Inc.
- *
- * This provides real-time clock, timer and interprocessor interrupts.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "hw/sysbus.h"
-#include "target/riscv/cpu.h"
-#include "hw/qdev-properties.h"
-#include "hw/intc/sifive_clint.h"
-#include "qemu/timer.h"
-
-static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq)
-{
-    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-        timebase_freq, NANOSECONDS_PER_SECOND);
-}
-
-/*
- * Called when timecmp is written to update the QEMU timer or immediately
- * trigger timer interrupt if mtimecmp <= current timer value.
- */
-static void sifive_clint_write_timecmp(RISCVCPU *cpu, uint64_t value,
-                                       uint32_t timebase_freq)
-{
-    uint64_t next;
-    uint64_t diff;
-
-    uint64_t rtc_r = cpu_riscv_read_rtc(timebase_freq);
-
-    cpu->env.timecmp = value;
-    if (cpu->env.timecmp <= rtc_r) {
-        /* if we're setting an MTIMECMP value in the "past",
-           immediately raise the timer interrupt */
-        riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1));
-        return;
-    }
-
-    /* otherwise, set up the future timer interrupt */
-    riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(0));
-    diff = cpu->env.timecmp - rtc_r;
-    /* back to ns (note args switched in muldiv64) */
-    next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-        muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
-    timer_mod(cpu->env.timer, next);
-}
-
-/*
- * Callback used when the timer set using timer_mod expires.
- * Should raise the timer interrupt line
- */
-static void sifive_clint_timer_cb(void *opaque)
-{
-    RISCVCPU *cpu = opaque;
-    riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(1));
-}
-
-/* CPU wants to read rtc or timecmp register */
-static uint64_t sifive_clint_read(void *opaque, hwaddr addr, unsigned size)
-{
-    SiFiveCLINTState *clint = opaque;
-    if (addr >= clint->sip_base &&
-        addr < clint->sip_base + (clint->num_harts << 2)) {
-        size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2);
-        CPUState *cpu = qemu_get_cpu(hartid);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            error_report("clint: invalid timecmp hartid: %zu", hartid);
-        } else if ((addr & 0x3) == 0) {
-            return (env->mip & MIP_MSIP) > 0;
-        } else {
-            error_report("clint: invalid read: %08x", (uint32_t)addr);
-            return 0;
-        }
-    } else if (addr >= clint->timecmp_base &&
-        addr < clint->timecmp_base + (clint->num_harts << 3)) {
-        size_t hartid = clint->hartid_base +
-            ((addr - clint->timecmp_base) >> 3);
-        CPUState *cpu = qemu_get_cpu(hartid);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            error_report("clint: invalid timecmp hartid: %zu", hartid);
-        } else if ((addr & 0x7) == 0) {
-            /* timecmp_lo */
-            uint64_t timecmp = env->timecmp;
-            return timecmp & 0xFFFFFFFF;
-        } else if ((addr & 0x7) == 4) {
-            /* timecmp_hi */
-            uint64_t timecmp = env->timecmp;
-            return (timecmp >> 32) & 0xFFFFFFFF;
-        } else {
-            error_report("clint: invalid read: %08x", (uint32_t)addr);
-            return 0;
-        }
-    } else if (addr == clint->time_base) {
-        /* time_lo */
-        return cpu_riscv_read_rtc(clint->timebase_freq) & 0xFFFFFFFF;
-    } else if (addr == clint->time_base + 4) {
-        /* time_hi */
-        return (cpu_riscv_read_rtc(clint->timebase_freq) >> 32) & 0xFFFFFFFF;
-    }
-
-    error_report("clint: invalid read: %08x", (uint32_t)addr);
-    return 0;
-}
-
-/* CPU wrote to rtc or timecmp register */
-static void sifive_clint_write(void *opaque, hwaddr addr, uint64_t value,
-        unsigned size)
-{
-    SiFiveCLINTState *clint = opaque;
-
-    if (addr >= clint->sip_base &&
-        addr < clint->sip_base + (clint->num_harts << 2)) {
-        size_t hartid = clint->hartid_base + ((addr - clint->sip_base) >> 2);
-        CPUState *cpu = qemu_get_cpu(hartid);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            error_report("clint: invalid timecmp hartid: %zu", hartid);
-        } else if ((addr & 0x3) == 0) {
-            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MSIP, BOOL_TO_MASK(value));
-        } else {
-            error_report("clint: invalid sip write: %08x", (uint32_t)addr);
-        }
-        return;
-    } else if (addr >= clint->timecmp_base &&
-        addr < clint->timecmp_base + (clint->num_harts << 3)) {
-        size_t hartid = clint->hartid_base +
-            ((addr - clint->timecmp_base) >> 3);
-        CPUState *cpu = qemu_get_cpu(hartid);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            error_report("clint: invalid timecmp hartid: %zu", hartid);
-        } else if ((addr & 0x7) == 0) {
-            /* timecmp_lo */
-            uint64_t timecmp_hi = env->timecmp >> 32;
-            sifive_clint_write_timecmp(RISCV_CPU(cpu),
-                timecmp_hi << 32 | (value & 0xFFFFFFFF), clint->timebase_freq);
-            return;
-        } else if ((addr & 0x7) == 4) {
-            /* timecmp_hi */
-            uint64_t timecmp_lo = env->timecmp;
-            sifive_clint_write_timecmp(RISCV_CPU(cpu),
-                value << 32 | (timecmp_lo & 0xFFFFFFFF), clint->timebase_freq);
-        } else {
-            error_report("clint: invalid timecmp write: %08x", (uint32_t)addr);
-        }
-        return;
-    } else if (addr == clint->time_base) {
-        /* time_lo */
-        error_report("clint: time_lo write not implemented");
-        return;
-    } else if (addr == clint->time_base + 4) {
-        /* time_hi */
-        error_report("clint: time_hi write not implemented");
-        return;
-    }
-
-    error_report("clint: invalid write: %08x", (uint32_t)addr);
-}
-
-static const MemoryRegionOps sifive_clint_ops = {
-    .read = sifive_clint_read,
-    .write = sifive_clint_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 8
-    }
-};
-
-static Property sifive_clint_properties[] = {
-    DEFINE_PROP_UINT32("hartid-base", SiFiveCLINTState, hartid_base, 0),
-    DEFINE_PROP_UINT32("num-harts", SiFiveCLINTState, num_harts, 0),
-    DEFINE_PROP_UINT32("sip-base", SiFiveCLINTState, sip_base, 0),
-    DEFINE_PROP_UINT32("timecmp-base", SiFiveCLINTState, timecmp_base, 0),
-    DEFINE_PROP_UINT32("time-base", SiFiveCLINTState, time_base, 0),
-    DEFINE_PROP_UINT32("aperture-size", SiFiveCLINTState, aperture_size, 0),
-    DEFINE_PROP_UINT32("timebase-freq", SiFiveCLINTState, timebase_freq, 0),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void sifive_clint_realize(DeviceState *dev, Error **errp)
-{
-    SiFiveCLINTState *s = SIFIVE_CLINT(dev);
-    memory_region_init_io(&s->mmio, OBJECT(dev), &sifive_clint_ops, s,
-                          TYPE_SIFIVE_CLINT, s->aperture_size);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
-}
-
-static void sifive_clint_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    dc->realize = sifive_clint_realize;
-    device_class_set_props(dc, sifive_clint_properties);
-}
-
-static const TypeInfo sifive_clint_info = {
-    .name          = TYPE_SIFIVE_CLINT,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(SiFiveCLINTState),
-    .class_init    = sifive_clint_class_init,
-};
-
-static void sifive_clint_register_types(void)
-{
-    type_register_static(&sifive_clint_info);
-}
-
-type_init(sifive_clint_register_types)
-
-
-/*
- * Create CLINT device.
- */
-DeviceState *sifive_clint_create(hwaddr addr, hwaddr size,
-    uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base,
-    uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq,
-    bool provide_rdtime)
-{
-    int i;
-    for (i = 0; i < num_harts; i++) {
-        CPUState *cpu = qemu_get_cpu(hartid_base + i);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            continue;
-        }
-        if (provide_rdtime) {
-            riscv_cpu_set_rdtime_fn(env, cpu_riscv_read_rtc, timebase_freq);
-        }
-        env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                  &sifive_clint_timer_cb, cpu);
-        env->timecmp = 0;
-    }
-
-    DeviceState *dev = qdev_new(TYPE_SIFIVE_CLINT);
-    qdev_prop_set_uint32(dev, "hartid-base", hartid_base);
-    qdev_prop_set_uint32(dev, "num-harts", num_harts);
-    qdev_prop_set_uint32(dev, "sip-base", sip_base);
-    qdev_prop_set_uint32(dev, "timecmp-base", timecmp_base);
-    qdev_prop_set_uint32(dev, "time-base", time_base);
-    qdev_prop_set_uint32(dev, "aperture-size", size);
-    qdev_prop_set_uint32(dev, "timebase-freq", timebase_freq);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
-    return dev;
-}
diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
index 97a1a27a9ac..877e76877cc 100644
--- a/hw/intc/sifive_plic.c
+++ b/hw/intc/sifive_plic.c
@@ -25,12 +25,11 @@
 #include "qemu/error-report.h"
 #include "hw/sysbus.h"
 #include "hw/pci/msi.h"
-#include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "hw/intc/sifive_plic.h"
 #include "target/riscv/cpu.h"
-#include "sysemu/sysemu.h"
 #include "migration/vmstate.h"
+#include "hw/irq.h"
 
 #define RISCV_DEBUG_PLIC 0
 
@@ -141,18 +140,14 @@ static void sifive_plic_update(SiFivePLICState *plic)
     for (addrid = 0; addrid < plic->num_addrs; addrid++) {
         uint32_t hartid = plic->addr_config[addrid].hartid;
         PLICMode mode = plic->addr_config[addrid].mode;
-        CPUState *cpu = qemu_get_cpu(hartid);
-        CPURISCVState *env = cpu ? cpu->env_ptr : NULL;
-        if (!env) {
-            continue;
-        }
         int level = sifive_plic_irqs_pending(plic, addrid);
+
         switch (mode) {
         case PLICMode_M:
-            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_MEIP, BOOL_TO_MASK(level));
+            qemu_set_irq(plic->m_external_irqs[hartid - plic->hartid_base], level);
             break;
         case PLICMode_S:
-            riscv_cpu_update_mip(RISCV_CPU(cpu), MIP_SEIP, BOOL_TO_MASK(level));
+            qemu_set_irq(plic->s_external_irqs[hartid - plic->hartid_base], level);
             break;
         default:
             break;
@@ -360,21 +355,6 @@ static const MemoryRegionOps sifive_plic_ops = {
     }
 };
 
-static Property sifive_plic_properties[] = {
-    DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config),
-    DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0),
-    DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0),
-    DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0),
-    DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0),
-    DEFINE_PROP_UINT32("pending-base", SiFivePLICState, pending_base, 0),
-    DEFINE_PROP_UINT32("enable-base", SiFivePLICState, enable_base, 0),
-    DEFINE_PROP_UINT32("enable-stride", SiFivePLICState, enable_stride, 0),
-    DEFINE_PROP_UINT32("context-base", SiFivePLICState, context_base, 0),
-    DEFINE_PROP_UINT32("context-stride", SiFivePLICState, context_stride, 0),
-    DEFINE_PROP_UINT32("aperture-size", SiFivePLICState, aperture_size, 0),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
 /*
  * parse PLIC hart/mode address offset config
  *
@@ -432,39 +412,46 @@ static void parse_hart_config(SiFivePLICState *plic)
 
 static void sifive_plic_irq_request(void *opaque, int irq, int level)
 {
-    SiFivePLICState *plic = opaque;
-    if (RISCV_DEBUG_PLIC) {
-        qemu_log("sifive_plic_irq_request: irq=%d level=%d\n", irq, level);
-    }
-    sifive_plic_set_pending(plic, irq, level > 0);
-    sifive_plic_update(plic);
+    SiFivePLICState *s = opaque;
+
+    sifive_plic_set_pending(s, irq, level > 0);
+    sifive_plic_update(s);
 }
 
 static void sifive_plic_realize(DeviceState *dev, Error **errp)
 {
-    SiFivePLICState *plic = SIFIVE_PLIC(dev);
+    SiFivePLICState *s = SIFIVE_PLIC(dev);
     int i;
 
-    memory_region_init_io(&plic->mmio, OBJECT(dev), &sifive_plic_ops, plic,
-                          TYPE_SIFIVE_PLIC, plic->aperture_size);
-    parse_hart_config(plic);
-    plic->bitfield_words = (plic->num_sources + 31) >> 5;
-    plic->num_enables = plic->bitfield_words * plic->num_addrs;
-    plic->source_priority = g_new0(uint32_t, plic->num_sources);
-    plic->target_priority = g_new(uint32_t, plic->num_addrs);
-    plic->pending = g_new0(uint32_t, plic->bitfield_words);
-    plic->claimed = g_new0(uint32_t, plic->bitfield_words);
-    plic->enable = g_new0(uint32_t, plic->num_enables);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &plic->mmio);
-    qdev_init_gpio_in(dev, sifive_plic_irq_request, plic->num_sources);
+    memory_region_init_io(&s->mmio, OBJECT(dev), &sifive_plic_ops, s,
+                          TYPE_SIFIVE_PLIC, s->aperture_size);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
+
+    parse_hart_config(s);
+
+    s->bitfield_words = (s->num_sources + 31) >> 5;
+    s->num_enables = s->bitfield_words * s->num_addrs;
+    s->source_priority = g_new0(uint32_t, s->num_sources);
+    s->target_priority = g_new(uint32_t, s->num_addrs);
+    s->pending = g_new0(uint32_t, s->bitfield_words);
+    s->claimed = g_new0(uint32_t, s->bitfield_words);
+    s->enable = g_new0(uint32_t, s->num_enables);
+
+    qdev_init_gpio_in(dev, sifive_plic_irq_request, s->num_sources);
+
+    s->s_external_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts);
+    qdev_init_gpio_out(dev, s->s_external_irqs, s->num_harts);
+
+    s->m_external_irqs = g_malloc(sizeof(qemu_irq) * s->num_harts);
+    qdev_init_gpio_out(dev, s->m_external_irqs, s->num_harts);
 
     /* We can't allow the supervisor to control SEIP as this would allow the
      * supervisor to clear a pending external interrupt which will result in
      * lost a interrupt in the case a PLIC is attached. The SEIP bit must be
      * hardware controlled when a PLIC is attached.
      */
-    for (i = 0; i < plic->num_harts; i++) {
-        RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(plic->hartid_base + i));
+    for (i = 0; i < s->num_harts; i++) {
+        RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i));
         if (riscv_cpu_claim_interrupts(cpu, MIP_SEIP) < 0) {
             error_report("SEIP already claimed");
             exit(1);
@@ -495,6 +482,21 @@ static const VMStateDescription vmstate_sifive_plic = {
         }
 };
 
+static Property sifive_plic_properties[] = {
+    DEFINE_PROP_STRING("hart-config", SiFivePLICState, hart_config),
+    DEFINE_PROP_UINT32("hartid-base", SiFivePLICState, hartid_base, 0),
+    DEFINE_PROP_UINT32("num-sources", SiFivePLICState, num_sources, 0),
+    DEFINE_PROP_UINT32("num-priorities", SiFivePLICState, num_priorities, 0),
+    DEFINE_PROP_UINT32("priority-base", SiFivePLICState, priority_base, 0),
+    DEFINE_PROP_UINT32("pending-base", SiFivePLICState, pending_base, 0),
+    DEFINE_PROP_UINT32("enable-base", SiFivePLICState, enable_base, 0),
+    DEFINE_PROP_UINT32("enable-stride", SiFivePLICState, enable_stride, 0),
+    DEFINE_PROP_UINT32("context-base", SiFivePLICState, context_base, 0),
+    DEFINE_PROP_UINT32("context-stride", SiFivePLICState, context_stride, 0),
+    DEFINE_PROP_UINT32("aperture-size", SiFivePLICState, aperture_size, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void sifive_plic_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -522,6 +524,7 @@ type_init(sifive_plic_register_types)
  * Create PLIC device.
  */
 DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
+    uint32_t num_harts,
     uint32_t hartid_base, uint32_t num_sources,
     uint32_t num_priorities, uint32_t priority_base,
     uint32_t pending_base, uint32_t enable_base,
@@ -529,6 +532,8 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
     uint32_t context_stride, uint32_t aperture_size)
 {
     DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC);
+    int i;
+
     assert(enable_stride == (enable_stride & -enable_stride));
     assert(context_stride == (context_stride & -context_stride));
     qdev_prop_set_string(dev, "hart-config", hart_config);
@@ -544,5 +549,15 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
     qdev_prop_set_uint32(dev, "aperture-size", aperture_size);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
+
+    for (i = 0; i < num_harts; i++) {
+        CPUState *cpu = qemu_get_cpu(hartid_base + i);
+
+        qdev_connect_gpio_out(dev, i,
+                              qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT));
+        qdev_connect_gpio_out(dev, num_harts + i,
+                              qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT));
+    }
+
     return dev;
 }
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 801bc193416..4ec659b93e1 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -185,7 +185,7 @@ static void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
                        xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
                        pq & XIVE_ESB_VAL_P ? 'P' : '-',
                        pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
-                       xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ',
+                       xive_source_is_asserted(xsrc, i) ? 'A' : ' ',
                        xive_eas_is_masked(eas) ? "M" : " ",
                        (int) xive_get_field64(EAS_END_DATA, eas->w));
 
@@ -1798,7 +1798,7 @@ static target_ulong h_int_reset(PowerPCCPU *cpu,
         return H_PARAMETER;
     }
 
-    device_legacy_reset(DEVICE(xive));
+    device_cold_reset(DEVICE(xive));
 
     if (spapr_xive_in_kernel(xive)) {
         Error *local_err = NULL;
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index c0083311607..61fe7bd2d32 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -236,11 +236,13 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp)
     SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
     uint64_t state = 0;
 
+    trace_kvm_xive_source_reset(srcno);
+
     assert(xive->fd != -1);
 
     if (xive_source_irq_is_lsi(xsrc, srcno)) {
         state |= KVM_XIVE_LEVEL_SENSITIVE;
-        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+        if (xive_source_is_asserted(xsrc, srcno)) {
             state |= KVM_XIVE_LEVEL_ASSERTED;
         }
     }
@@ -297,11 +299,9 @@ static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
     return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3;
 }
 
-static void xive_esb_trigger(XiveSource *xsrc, int srcno)
+static void kvmppc_xive_esb_trigger(XiveSource *xsrc, int srcno)
 {
-    uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno);
-
-    *addr = 0x0;
+    xive_esb_rw(xsrc, srcno, 0, 0, true);
 }
 
 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
@@ -311,8 +311,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
         return xive_esb_rw(xsrc, srcno, offset, data, 1);
     }
 
-    trace_kvm_xive_source_reset(srcno);
-
     /*
      * Special Load EOI handling for LSI sources. Q bit is never set
      * and the interrupt should be re-triggered if the level is still
@@ -321,8 +319,8 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
     if (xive_source_irq_is_lsi(xsrc, srcno) &&
         offset == XIVE_ESB_LOAD_EOI) {
         xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00);
-        if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
-            xive_esb_trigger(xsrc, srcno);
+        if (xive_source_is_asserted(xsrc, srcno)) {
+            kvmppc_xive_esb_trigger(xsrc, srcno);
         }
         return 0;
     } else {
@@ -359,14 +357,10 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
             return;
         }
     } else {
-        if (val) {
-            xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
-        } else {
-            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
-        }
+        xive_source_set_asserted(xsrc, srcno, val);
     }
 
-    xive_esb_trigger(xsrc, srcno);
+    kvmppc_xive_esb_trigger(xsrc, srcno);
 }
 
 /*
@@ -533,7 +527,7 @@ static void kvmppc_xive_change_state_handler(void *opaque, bool running,
              * generate a trigger.
              */
             if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) {
-                xive_esb_trigger(xsrc, i);
+                kvmppc_xive_esb_trigger(xsrc, i);
             }
         }
 
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index c9ab17234b4..9aba7e3a7a4 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # i8259.c
 pic_update_irq(bool master, uint8_t imr, uint8_t irr, uint8_t padd) "master %d imr %"PRIu8" irr %"PRIu8" padd %"PRIu8
@@ -51,15 +51,6 @@ grlib_irqmp_set_irq(int irq) "Raise CPU IRQ %d"
 grlib_irqmp_readl_unknown(uint64_t addr) "addr 0x%"PRIx64
 grlib_irqmp_writel_unknown(uint64_t addr, uint32_t value) "addr 0x%"PRIx64" value 0x%x"
 
-# lm32_pic.c
-lm32_pic_raise_irq(void) "Raise CPU interrupt"
-lm32_pic_lower_irq(void) "Lower CPU interrupt"
-lm32_pic_interrupt(int irq, int level) "Set IRQ%d %d"
-lm32_pic_set_im(uint32_t im) "im 0x%08x"
-lm32_pic_set_ip(uint32_t ip) "ip 0x%08x"
-lm32_pic_get_im(uint32_t im) "im 0x%08x"
-lm32_pic_get_ip(uint32_t ip) "ip 0x%08x"
-
 # xics.c
 xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=0x%x"
 xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR 0x%"PRIx32"->0x%"PRIx32
@@ -228,14 +219,14 @@ kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x"
 xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK"
 xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !"
 xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x"
-xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x0x%"PRIx64" IRQ 0x%x val=0x0x%"PRIx64
-xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x0x%"PRIx64" IRQ 0x%x val=0x0x%"PRIx64
+xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64
+xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64
 xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "END 0x%02x/0x%04x -> enqueue 0x%08x"
 xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x"
-xive_tctx_tm_write(uint64_t offset, unsigned int size, uint64_t value) "@0x0x%"PRIx64" sz=%d val=0x%" PRIx64
-xive_tctx_tm_read(uint64_t offset, unsigned int size, uint64_t value) "@0x0x%"PRIx64" sz=%d val=0x%" PRIx64
+xive_tctx_tm_write(uint64_t offset, unsigned int size, uint64_t value) "@0x%"PRIx64" sz=%d val=0x%" PRIx64
+xive_tctx_tm_read(uint64_t offset, unsigned int size, uint64_t value) "@0x%"PRIx64" sz=%d val=0x%" PRIx64
 xive_presenter_notify(uint8_t nvt_blk, uint32_t nvt_idx, uint8_t ring) "found NVT 0x%x/0x%x ring=0x%x"
-xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x0x%"PRIx64
+xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x/0x%x @0x%"PRIx64
 
 # pnv_xive.c
 pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64
@@ -247,3 +238,11 @@ goldfish_pic_write(void *dev, int idx, unsigned int addr, unsigned int size, uin
 goldfish_pic_reset(void *dev, int idx) "pic: %p goldfish-irq.%d"
 goldfish_pic_realize(void *dev, int idx) "pic: %p goldfish-irq.%d"
 goldfish_pic_instance_init(void *dev) "pic: %p goldfish-irq"
+
+# sh_intc.c
+sh_intc_sources(int p, int a, int c, int m, unsigned short v, const char *s1, const char *s2, const char *s3) "(%d/%d/%d/%d) interrupt source 0x%x %s%s%s"
+sh_intc_pending(int p, unsigned short v) "(%d) returning interrupt source 0x%x"
+sh_intc_register(const char *s, int id, unsigned short v, int c, int m) "%s %u -> 0x%04x (%d/%d)"
+sh_intc_read(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " -> 0x%lx"
+sh_intc_write(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " <- 0x%lx"
+sh_intc_set(int id, int enable) "setting interrupt group %d to %d"
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 68f9d44feb4..48a835eab7c 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "trace.h"
 #include "qemu/timer.h"
 #include "hw/ppc/xics.h"
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 570d635bcc0..f5bfc501bc1 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -28,7 +28,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h"
 #include "trace.h"
 #include "sysemu/kvm.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 8ae4f41459c..37b2d99977a 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -26,7 +26,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "trace.h"
 #include "qemu/timer.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index eeb4e62ba95..190194d27f8 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -27,17 +27,6 @@
  * XIVE Thread Interrupt Management context
  */
 
-/*
- * Convert a priority number to an Interrupt Pending Buffer (IPB)
- * register, which indicates a pending interrupt at the priority
- * corresponding to the bit number
- */
-static uint8_t priority_to_ipb(uint8_t priority)
-{
-    return priority > XIVE_PRIORITY_MAX ?
-        0 : 1 << (XIVE_PRIORITY_MAX - priority);
-}
-
 /*
  * Convert an Interrupt Pending Buffer (IPB) register to a Pending
  * Interrupt Priority Register (PIPR), which contains the priority of
@@ -89,7 +78,7 @@ static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
         regs[TM_CPPR] = cppr;
 
         /* Reset the pending buffer bit */
-        regs[TM_IPB] &= ~priority_to_ipb(cppr);
+        regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
         regs[TM_PIPR] = ipb_to_pipr(regs[TM_IPB]);
 
         /* Drop Exception bit */
@@ -152,11 +141,6 @@ void xive_tctx_ipb_update(XiveTCTX *tctx, uint8_t ring, uint8_t ipb)
     xive_tctx_notify(tctx, ring);
 }
 
-static inline uint32_t xive_tctx_word2(uint8_t *ring)
-{
-    return *((uint32_t *) &ring[TM_WORD2]);
-}
-
 /*
  * XIVE Thread Interrupt Management Area (TIMA)
  */
@@ -353,7 +337,7 @@ static void xive_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
 static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
                                    hwaddr offset, uint64_t value, unsigned size)
 {
-    xive_tctx_ipb_update(tctx, TM_QW1_OS, priority_to_ipb(value & 0xff));
+    xive_tctx_ipb_update(tctx, TM_QW1_OS, xive_priority_to_ipb(value & 0xff));
 }
 
 static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
@@ -816,7 +800,7 @@ void xive_tctx_destroy(XiveTCTX *tctx)
  * XIVE ESB helpers
  */
 
-static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
+uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
 {
     uint8_t old_pq = *pq & 0x3;
 
@@ -826,7 +810,7 @@ static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
     return old_pq;
 }
 
-static bool xive_esb_trigger(uint8_t *pq)
+bool xive_esb_trigger(uint8_t *pq)
 {
     uint8_t old_pq = *pq & 0x3;
 
@@ -846,7 +830,7 @@ static bool xive_esb_trigger(uint8_t *pq)
     }
 }
 
-static bool xive_esb_eoi(uint8_t *pq)
+bool xive_esb_eoi(uint8_t *pq)
 {
     uint8_t old_pq = *pq & 0x3;
 
@@ -891,7 +875,7 @@ static bool xive_source_lsi_trigger(XiveSource *xsrc, uint32_t srcno)
 {
     uint8_t old_pq = xive_source_esb_get(xsrc, srcno);
 
-    xsrc->status[srcno] |= XIVE_STATUS_ASSERTED;
+    xive_source_set_asserted(xsrc, srcno, true);
 
     switch (old_pq) {
     case XIVE_ESB_RESET:
@@ -939,7 +923,7 @@ static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
      * notification
      */
     if (xive_source_irq_is_lsi(xsrc, srcno) &&
-        xsrc->status[srcno] & XIVE_STATUS_ASSERTED) {
+        xive_source_is_asserted(xsrc, srcno)) {
         ret = xive_source_lsi_trigger(xsrc, srcno);
     }
 
@@ -1120,7 +1104,7 @@ void xive_source_set_irq(void *opaque, int srcno, int val)
         if (val) {
             notify = xive_source_lsi_trigger(xsrc, srcno);
         } else {
-            xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED;
+            xive_source_set_asserted(xsrc, srcno, false);
         }
     } else {
         if (val) {
@@ -1149,7 +1133,7 @@ void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
                        xive_source_irq_is_lsi(xsrc, i) ? "LSI" : "MSI",
                        pq & XIVE_ESB_VAL_P ? 'P' : '-',
                        pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
-                       xsrc->status[i] & XIVE_STATUS_ASSERTED ? 'A' : ' ');
+                       xive_source_is_asserted(xsrc, i) ? 'A' : ' ');
     }
 }
 
@@ -1514,10 +1498,10 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
  *
  * The parameters represent what is sent on the PowerBus
  */
-static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
-                                  uint8_t nvt_blk, uint32_t nvt_idx,
-                                  bool cam_ignore, uint8_t priority,
-                                  uint32_t logic_serv)
+bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
+                           uint8_t nvt_blk, uint32_t nvt_idx,
+                           bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv)
 {
     XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
     XiveTCTXMatch match = { .tctx = NULL, .ring = 0 };
@@ -1535,7 +1519,8 @@ static bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
     /* handle CPU exception delivery */
     if (count) {
         trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring);
-        xive_tctx_ipb_update(match.tctx, match.ring, priority_to_ipb(priority));
+        xive_tctx_ipb_update(match.tctx, match.ring,
+                             xive_priority_to_ipb(priority));
     }
 
     return !!count;
@@ -1682,7 +1667,8 @@ static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
          * use. The presenter will resend the interrupt when the vCPU
          * is dispatched again on a HW thread.
          */
-        ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) | priority_to_ipb(priority);
+        ipb = xive_get_field32(NVT_W4_IPB, nvt.w4) |
+            xive_priority_to_ipb(priority);
         nvt.w4 = xive_set_field32(NVT_W4_IPB, nvt.w4, ipb);
         xive_router_write_nvt(xrtr, nvt_blk, nvt_idx, &nvt, 4);
 
diff --git a/hw/ipack/ipack.c b/hw/ipack/ipack.c
index f19ecaeb1cf..ae20f36da68 100644
--- a/hw/ipack/ipack.c
+++ b/hw/ipack/ipack.c
@@ -30,12 +30,12 @@ IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot)
     return NULL;
 }
 
-void ipack_bus_new_inplace(IPackBus *bus, size_t bus_size,
-                           DeviceState *parent,
-                           const char *name, uint8_t n_slots,
-                           qemu_irq_handler handler)
+void ipack_bus_init(IPackBus *bus, size_t bus_size,
+                    DeviceState *parent,
+                    uint8_t n_slots,
+                    qemu_irq_handler handler)
 {
-    qbus_create_inplace(bus, bus_size, TYPE_IPACK_BUS, parent, name);
+    qbus_init(bus, bus_size, TYPE_IPACK_BUS, parent, NULL);
     bus->n_slots = n_slots;
     bus->set_irq = handler;
 }
diff --git a/hw/ipack/tpci200.c b/hw/ipack/tpci200.c
index d107e134c4e..1f764fc85ba 100644
--- a/hw/ipack/tpci200.c
+++ b/hw/ipack/tpci200.c
@@ -611,8 +611,8 @@ static void tpci200_realize(PCIDevice *pci_dev, Error **errp)
     pci_register_bar(&s->dev, 4, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->las2);
     pci_register_bar(&s->dev, 5, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->las3);
 
-    ipack_bus_new_inplace(&s->bus, sizeof(s->bus), DEVICE(pci_dev), NULL,
-                          N_MODULES, tpci200_set_irq);
+    ipack_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev),
+                   N_MODULES, tpci200_set_irq);
 }
 
 static const VMStateDescription vmstate_tpci200 = {
diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c
index 55fb81fa5a9..905e091094b 100644
--- a/hw/ipmi/ipmi_bmc_sim.c
+++ b/hw/ipmi/ipmi_bmc_sim.c
@@ -189,7 +189,7 @@ struct IPMIBmcSim {
     uint8_t  watchdog_use;
     uint8_t  watchdog_action;
     uint8_t  watchdog_pretimeout; /* In seconds */
-    bool     watchdog_expired;
+    uint8_t  watchdog_expired;
     uint16_t watchdog_timeout; /* in 100's of milliseconds */
 
     bool     watchdog_running;
@@ -2110,7 +2110,7 @@ static const VMStateDescription vmstate_ipmi_sim = {
         VMSTATE_UINT8(watchdog_use, IPMIBmcSim),
         VMSTATE_UINT8(watchdog_action, IPMIBmcSim),
         VMSTATE_UINT8(watchdog_pretimeout, IPMIBmcSim),
-        VMSTATE_BOOL(watchdog_expired, IPMIBmcSim),
+        VMSTATE_UINT8(watchdog_expired, IPMIBmcSim),
         VMSTATE_UINT16(watchdog_timeout, IPMIBmcSim),
         VMSTATE_BOOL(watchdog_running, IPMIBmcSim),
         VMSTATE_BOOL(watchdog_preaction_ran, IPMIBmcSim),
diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c
index b7c2ad557b2..02625eb94ed 100644
--- a/hw/ipmi/isa_ipmi_bt.c
+++ b/hw/ipmi/isa_ipmi_bt.c
@@ -23,7 +23,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "hw/irq.h"
diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c
index 7dd6bf0040a..3b23ad08b34 100644
--- a/hw/ipmi/isa_ipmi_kcs.c
+++ b/hw/ipmi/isa_ipmi_kcs.c
@@ -23,7 +23,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "hw/irq.h"
diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig
index 55e0003ce40..d42143a991e 100644
--- a/hw/isa/Kconfig
+++ b/hw/isa/Kconfig
@@ -17,6 +17,7 @@ config ISA_SUPERIO
     bool
     select ISA_BUS
     select PCKBD
+    select FDC_ISA
 
 config PC87312
     bool
@@ -27,7 +28,7 @@ config PC87312
     select MC146818RTC
     select SERIAL_ISA
     select PARALLEL
-    select FDC
+    select FDC_ISA
     select IDE_ISA
 
 config PIIX3
@@ -46,16 +47,21 @@ config VT82C686
     select ISA_SUPERIO
     select ACPI_SMBUS
     select SERIAL_ISA
-    select FDC
+    select FDC_ISA
     select USB_UHCI
     select APM
+    select I8254
+    select I8257
+    select I8259
+    select MC146818RTC
+    select PARALLEL
 
 config SMC37C669
     bool
     select ISA_SUPERIO
     select SERIAL_ISA
     select PARALLEL
-    select FDC
+    select FDC_ISA
 
 config LPC_ICH9
     bool
diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c
index 7820068e6e1..6c31398dda6 100644
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -64,7 +64,7 @@ ISABus *isa_bus_new(DeviceState *dev, MemoryRegion* address_space,
         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     }
 
-    isabus = ISA_BUS(qbus_create(TYPE_ISA_BUS, dev, NULL));
+    isabus = ISA_BUS(qbus_new(TYPE_ISA_BUS, dev, NULL));
     isabus->address_space = address_space;
     isabus->address_space_io = address_space_io;
     return isabus;
@@ -131,13 +131,17 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start)
     isa_init_ioport(dev, start);
 }
 
-void isa_register_portio_list(ISADevice *dev,
-                              PortioList *piolist, uint16_t start,
-                              const MemoryRegionPortio *pio_start,
-                              void *opaque, const char *name)
+int isa_register_portio_list(ISADevice *dev,
+                             PortioList *piolist, uint16_t start,
+                             const MemoryRegionPortio *pio_start,
+                             void *opaque, const char *name)
 {
     assert(piolist && !piolist->owner);
 
+    if (!isabus) {
+        return -ENODEV;
+    }
+
     /* START is how we should treat DEV, regardless of the actual
        contents of the portio array.  This is how the old code
        actually handled e.g. the FDC device.  */
@@ -145,6 +149,8 @@ void isa_register_portio_list(ISADevice *dev,
 
     portio_list_init(piolist, OBJECT(dev), pio_start, opaque, name);
     portio_list_add(piolist, isabus->address_space_io, start);
+
+    return 0;
 }
 
 static void isa_device_init(Object *obj)
diff --git a/hw/isa/isa-superio.c b/hw/isa/isa-superio.c
index 179c1856956..c81bfe58ef8 100644
--- a/hw/isa/isa-superio.c
+++ b/hw/isa/isa-superio.c
@@ -14,7 +14,6 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/blockdev.h"
 #include "chardev/char.h"
 #include "hw/block/fdc.h"
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 3963b735207..5f143dca17a 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -31,10 +31,10 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "cpu.h"
+#include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "qemu/range.h"
 #include "hw/isa/isa.h"
-#include "hw/sysbus.h"
 #include "migration/vmstate.h"
 #include "hw/irq.h"
 #include "hw/isa/apm.h"
@@ -45,7 +45,6 @@
 #include "hw/acpi/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
-#include "exec/address-spaces.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
 #include "hw/core/cpu.h"
@@ -678,6 +677,18 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp)
     DeviceState *dev = DEVICE(d);
     ISABus *isa_bus;
 
+    if ((lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)) &&
+        !(lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) {
+        /*
+         * smi_features_ok_callback() throws an error on this.
+         *
+         * So bail out here instead of advertizing the invalid
+         * configuration and get obscure firmware failures from that.
+         */
+        error_setg(errp, "cpu hot-unplug requires cpu hot-plug");
+        return;
+    }
+
     isa_bus = isa_bus_new(DEVICE(d), get_system_memory(), get_system_io(),
                           errp);
     if (!isa_bus) {
diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index f46ccae25cf..dab901c9ad9 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -29,7 +29,6 @@
 #include "hw/isa/isa.h"
 #include "hw/xen/xen.h"
 #include "sysemu/xen.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 #include "migration/vmstate.h"
diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index b3b6a4378a3..0fe7b69bc4c 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -29,7 +29,6 @@
 #include "hw/southbridge/piix.h"
 #include "hw/pci/pci.h"
 #include "hw/isa/isa.h"
-#include "hw/sysbus.h"
 #include "hw/intc/i8259.h"
 #include "hw/dma/i8257.h"
 #include "hw/timer/i8254.h"
@@ -268,8 +267,9 @@ DeviceState *piix4_create(PCIBus *pci_bus, ISABus **isa_bus, I2CBus **smbus)
     pci_create_simple(pci_bus, devfn + 2, "piix4-usb-uhci");
     if (smbus) {
         *smbus = piix4_pm_init(pci_bus, devfn + 3, 0x1100,
-                               isa_get_irq(NULL, 9), NULL, 0, NULL);
-   }
+                               qdev_get_gpio_in_named(dev, "isa", 9),
+                               NULL, 0, NULL);
+    }
 
     return dev;
 }
diff --git a/hw/isa/trace-events b/hw/isa/trace-events
index 641d69eedf7..b8f877e1ed8 100644
--- a/hw/isa/trace-events
+++ b/hw/isa/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # isa-superio.c
 superio_create_parallel(int id, uint16_t base, unsigned int irq) "id=%d, base 0x%03x, irq %u"
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index 98325bb32b3..8f656251b8d 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -8,6 +8,9 @@
  *
  * Contributions after 2012-01-13 are licensed under the terms of the
  * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * VT8231 south bridge support and general clean up to allow it
+ * Copyright (c) 2018-2020 BALATON Zoltan
  */
 
 #include "qemu/osdep.h"
@@ -30,7 +33,6 @@
 #include "qemu/module.h"
 #include "qemu/range.h"
 #include "qemu/timer.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 
 #define TYPE_VIA_PM "via-pm"
@@ -265,15 +267,80 @@ static const TypeInfo vt8231_pm_info = {
 };
 
 
-typedef struct SuperIOConfig {
+#define TYPE_VIA_SUPERIO "via-superio"
+OBJECT_DECLARE_SIMPLE_TYPE(ViaSuperIOState, VIA_SUPERIO)
+
+struct ViaSuperIOState {
+    ISASuperIODevice superio;
     uint8_t regs[0x100];
+    const MemoryRegionOps *io_ops;
     MemoryRegion io;
-} SuperIOConfig;
+};
+
+static inline void via_superio_io_enable(ViaSuperIOState *s, bool enable)
+{
+    memory_region_set_enabled(&s->io, enable);
+}
+
+static void via_superio_realize(DeviceState *d, Error **errp)
+{
+    ViaSuperIOState *s = VIA_SUPERIO(d);
+    ISASuperIOClass *ic = ISA_SUPERIO_GET_CLASS(s);
+    Error *local_err = NULL;
+
+    assert(s->io_ops);
+    ic->parent_realize(d, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    memory_region_init_io(&s->io, OBJECT(d), s->io_ops, s, "via-superio", 2);
+    memory_region_set_enabled(&s->io, false);
+    /* The floppy also uses 0x3f0 and 0x3f1 but this seems to work anyway */
+    memory_region_add_subregion(isa_address_space_io(ISA_DEVICE(s)), 0x3f0,
+                                &s->io);
+}
+
+static uint64_t via_superio_cfg_read(void *opaque, hwaddr addr, unsigned size)
+{
+    ViaSuperIOState *sc = opaque;
+    uint8_t idx = sc->regs[0];
+    uint8_t val = sc->regs[idx];
+
+    if (addr == 0) {
+        return idx;
+    }
+    if (addr == 1 && idx == 0) {
+        val = 0; /* reading reg 0 where we store index value */
+    }
+    trace_via_superio_read(idx, val);
+    return val;
+}
+
+static void via_superio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass);
+
+    sc->parent_realize = dc->realize;
+    dc->realize = via_superio_realize;
+}
+
+static const TypeInfo via_superio_info = {
+    .name          = TYPE_VIA_SUPERIO,
+    .parent        = TYPE_ISA_SUPERIO,
+    .instance_size = sizeof(ViaSuperIOState),
+    .class_size    = sizeof(ISASuperIOClass),
+    .class_init    = via_superio_class_init,
+    .abstract      = true,
+};
 
-static void superio_cfg_write(void *opaque, hwaddr addr, uint64_t data,
-                              unsigned size)
+#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio"
+
+static void vt82c686b_superio_cfg_write(void *opaque, hwaddr addr,
+                                        uint64_t data, unsigned size)
 {
-    SuperIOConfig *sc = opaque;
+    ViaSuperIOState *sc = opaque;
     uint8_t idx = sc->regs[0];
 
     if (addr == 0) { /* config index register */
@@ -304,25 +371,105 @@ static void superio_cfg_write(void *opaque, hwaddr addr, uint64_t data,
     sc->regs[idx] = data;
 }
 
-static uint64_t superio_cfg_read(void *opaque, hwaddr addr, unsigned size)
+static const MemoryRegionOps vt82c686b_superio_cfg_ops = {
+    .read = via_superio_cfg_read,
+    .write = vt82c686b_superio_cfg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static void vt82c686b_superio_reset(DeviceState *dev)
 {
-    SuperIOConfig *sc = opaque;
+    ViaSuperIOState *s = VIA_SUPERIO(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+    /* Device ID */
+    vt82c686b_superio_cfg_write(s, 0, 0xe0, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0x3c, 1);
+    /* Function select - all disabled */
+    vt82c686b_superio_cfg_write(s, 0, 0xe2, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0x03, 1);
+    /* Floppy ctrl base addr 0x3f0-7 */
+    vt82c686b_superio_cfg_write(s, 0, 0xe3, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0xfc, 1);
+    /* Parallel port base addr 0x378-f */
+    vt82c686b_superio_cfg_write(s, 0, 0xe6, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0xde, 1);
+    /* Serial port 1 base addr 0x3f8-f */
+    vt82c686b_superio_cfg_write(s, 0, 0xe7, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0xfe, 1);
+    /* Serial port 2 base addr 0x2f8-f */
+    vt82c686b_superio_cfg_write(s, 0, 0xe8, 1);
+    vt82c686b_superio_cfg_write(s, 1, 0xbe, 1);
+
+    vt82c686b_superio_cfg_write(s, 0, 0, 1);
+}
+
+static void vt82c686b_superio_init(Object *obj)
+{
+    VIA_SUPERIO(obj)->io_ops = &vt82c686b_superio_cfg_ops;
+}
+
+static void vt82c686b_superio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass);
+
+    dc->reset = vt82c686b_superio_reset;
+    sc->serial.count = 2;
+    sc->parallel.count = 1;
+    sc->ide.count = 0; /* emulated by via-ide */
+    sc->floppy.count = 1;
+}
+
+static const TypeInfo vt82c686b_superio_info = {
+    .name          = TYPE_VT82C686B_SUPERIO,
+    .parent        = TYPE_VIA_SUPERIO,
+    .instance_size = sizeof(ViaSuperIOState),
+    .instance_init = vt82c686b_superio_init,
+    .class_size    = sizeof(ISASuperIOClass),
+    .class_init    = vt82c686b_superio_class_init,
+};
+
+
+#define TYPE_VT8231_SUPERIO "vt8231-superio"
+
+static void vt8231_superio_cfg_write(void *opaque, hwaddr addr,
+                                     uint64_t data, unsigned size)
+{
+    ViaSuperIOState *sc = opaque;
     uint8_t idx = sc->regs[0];
-    uint8_t val = sc->regs[idx];
 
-    if (addr == 0) {
-        return idx;
+    if (addr == 0) { /* config index register */
+        sc->regs[0] = data;
+        return;
     }
-    if (addr == 1 && idx == 0) {
-        val = 0; /* reading reg 0 where we store index value */
+
+    /* config data register */
+    trace_via_superio_write(idx, data);
+    switch (idx) {
+    case 0x00 ... 0xdf:
+    case 0xe7 ... 0xef:
+    case 0xf0 ... 0xf1:
+    case 0xf5:
+    case 0xf8:
+    case 0xfd:
+        /* ignore write to read only registers */
+        return;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "via_superio_cfg: unimplemented register 0x%x\n", idx);
+        break;
     }
-    trace_via_superio_read(idx, val);
-    return val;
+    sc->regs[idx] = data;
 }
 
-static const MemoryRegionOps superio_cfg_ops = {
-    .read = superio_cfg_read,
-    .write = superio_cfg_write,
+static const MemoryRegionOps vt8231_superio_cfg_ops = {
+    .read = via_superio_cfg_read,
+    .write = vt8231_superio_cfg_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .impl = {
         .min_access_size = 1,
@@ -330,47 +477,151 @@ static const MemoryRegionOps superio_cfg_ops = {
     },
 };
 
+static void vt8231_superio_reset(DeviceState *dev)
+{
+    ViaSuperIOState *s = VIA_SUPERIO(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+    /* Device ID */
+    s->regs[0xf0] = 0x3c;
+    /* Device revision */
+    s->regs[0xf1] = 0x01;
+    /* Function select - all disabled */
+    vt8231_superio_cfg_write(s, 0, 0xf2, 1);
+    vt8231_superio_cfg_write(s, 1, 0x03, 1);
+    /* Serial port base addr */
+    vt8231_superio_cfg_write(s, 0, 0xf4, 1);
+    vt8231_superio_cfg_write(s, 1, 0xfe, 1);
+    /* Parallel port base addr */
+    vt8231_superio_cfg_write(s, 0, 0xf6, 1);
+    vt8231_superio_cfg_write(s, 1, 0xde, 1);
+    /* Floppy ctrl base addr */
+    vt8231_superio_cfg_write(s, 0, 0xf7, 1);
+    vt8231_superio_cfg_write(s, 1, 0xfc, 1);
+
+    vt8231_superio_cfg_write(s, 0, 0, 1);
+}
+
+static void vt8231_superio_init(Object *obj)
+{
+    VIA_SUPERIO(obj)->io_ops = &vt8231_superio_cfg_ops;
+}
+
+static uint16_t vt8231_superio_serial_iobase(ISASuperIODevice *sio,
+                                             uint8_t index)
+{
+        return 0x2f8; /* FIXME: This should be settable via registers f2-f4 */
+}
+
+static void vt8231_superio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass);
+
+    dc->reset = vt8231_superio_reset;
+    sc->serial.count = 1;
+    sc->serial.get_iobase = vt8231_superio_serial_iobase;
+    sc->parallel.count = 1;
+    sc->ide.count = 0; /* emulated by via-ide */
+    sc->floppy.count = 1;
+}
+
+static const TypeInfo vt8231_superio_info = {
+    .name          = TYPE_VT8231_SUPERIO,
+    .parent        = TYPE_VIA_SUPERIO,
+    .instance_size = sizeof(ViaSuperIOState),
+    .instance_init = vt8231_superio_init,
+    .class_size    = sizeof(ISASuperIOClass),
+    .class_init    = vt8231_superio_class_init,
+};
+
 
-OBJECT_DECLARE_SIMPLE_TYPE(VT82C686BISAState, VT82C686B_ISA)
+#define TYPE_VIA_ISA "via-isa"
+OBJECT_DECLARE_SIMPLE_TYPE(ViaISAState, VIA_ISA)
 
-struct VT82C686BISAState {
+struct ViaISAState {
     PCIDevice dev;
     qemu_irq cpu_intr;
-    SuperIOConfig superio_cfg;
+    qemu_irq *isa_irqs;
+    ISABus *isa_bus;
+    ViaSuperIOState *via_sio;
+};
+
+static const VMStateDescription vmstate_via = {
+    .name = "via-isa",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, ViaISAState),
+        VMSTATE_END_OF_LIST()
+    }
 };
 
+static const TypeInfo via_isa_info = {
+    .name          = TYPE_VIA_ISA,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(ViaISAState),
+    .abstract      = true,
+    .interfaces    = (InterfaceInfo[]) {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    },
+};
+
+void via_isa_set_irq(PCIDevice *d, int n, int level)
+{
+    ViaISAState *s = VIA_ISA(d);
+    qemu_set_irq(s->isa_irqs[n], level);
+}
+
 static void via_isa_request_i8259_irq(void *opaque, int irq, int level)
 {
-    VT82C686BISAState *s = opaque;
+    ViaISAState *s = opaque;
     qemu_set_irq(s->cpu_intr, level);
 }
 
+static void via_isa_realize(PCIDevice *d, Error **errp)
+{
+    ViaISAState *s = VIA_ISA(d);
+    DeviceState *dev = DEVICE(d);
+    qemu_irq *isa_irq;
+    int i;
+
+    qdev_init_gpio_out(dev, &s->cpu_intr, 1);
+    isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1);
+    s->isa_bus = isa_bus_new(dev, get_system_memory(), pci_address_space_io(d),
+                          &error_fatal);
+    s->isa_irqs = i8259_init(s->isa_bus, *isa_irq);
+    isa_bus_irqs(s->isa_bus, s->isa_irqs);
+    i8254_pit_init(s->isa_bus, 0x40, 0, NULL);
+    i8257_dma_init(s->isa_bus, 0);
+    mc146818_rtc_init(s->isa_bus, 2000, NULL);
+
+    for (i = 0; i < PCI_CONFIG_HEADER_SIZE; i++) {
+        if (i < PCI_COMMAND || i >= PCI_REVISION_ID) {
+            d->wmask[i] = 0;
+        }
+    }
+}
+
+/* TYPE_VT82C686B_ISA */
+
 static void vt82c686b_write_config(PCIDevice *d, uint32_t addr,
                                    uint32_t val, int len)
 {
-    VT82C686BISAState *s = VT82C686B_ISA(d);
+    ViaISAState *s = VIA_ISA(d);
 
     trace_via_isa_write(addr, val, len);
     pci_default_write_config(d, addr, val, len);
     if (addr == 0x85) {
         /* BIT(1): enable or disable superio config io ports */
-        memory_region_set_enabled(&s->superio_cfg.io, val & BIT(1));
+        via_superio_io_enable(s->via_sio, val & BIT(1));
     }
 }
 
-static const VMStateDescription vmstate_via = {
-    .name = "vt82c686b",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_PCI_DEVICE(dev, VT82C686BISAState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static void vt82c686b_isa_reset(DeviceState *dev)
 {
-    VT82C686BISAState *s = VT82C686B_ISA(dev);
+    ViaISAState *s = VIA_ISA(dev);
     uint8_t *pci_conf = s->dev.config;
 
     pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x000000c0);
@@ -386,51 +637,18 @@ static void vt82c686b_isa_reset(DeviceState *dev)
     pci_conf[0x5a] = 0x04; /* KBC/RTC Control*/
     pci_conf[0x5f] = 0x04;
     pci_conf[0x77] = 0x10; /* GPIO Control 1/2/3/4 */
-
-    s->superio_cfg.regs[0xe0] = 0x3c; /* Device ID */
-    s->superio_cfg.regs[0xe2] = 0x03; /* Function select */
-    s->superio_cfg.regs[0xe3] = 0xfc; /* Floppy ctrl base addr */
-    s->superio_cfg.regs[0xe6] = 0xde; /* Parallel port base addr */
-    s->superio_cfg.regs[0xe7] = 0xfe; /* Serial port 1 base addr */
-    s->superio_cfg.regs[0xe8] = 0xbe; /* Serial port 2 base addr */
 }
 
 static void vt82c686b_realize(PCIDevice *d, Error **errp)
 {
-    VT82C686BISAState *s = VT82C686B_ISA(d);
-    DeviceState *dev = DEVICE(d);
-    ISABus *isa_bus;
-    qemu_irq *isa_irq;
-    int i;
+    ViaISAState *s = VIA_ISA(d);
 
-    qdev_init_gpio_out(dev, &s->cpu_intr, 1);
-    isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1);
-    isa_bus = isa_bus_new(dev, get_system_memory(), pci_address_space_io(d),
-                          &error_fatal);
-    isa_bus_irqs(isa_bus, i8259_init(isa_bus, *isa_irq));
-    i8254_pit_init(isa_bus, 0x40, 0, NULL);
-    i8257_dma_init(isa_bus, 0);
-    isa_create_simple(isa_bus, TYPE_VT82C686B_SUPERIO);
-    mc146818_rtc_init(isa_bus, 2000, NULL);
-
-    for (i = 0; i < PCI_CONFIG_HEADER_SIZE; i++) {
-        if (i < PCI_COMMAND || i >= PCI_REVISION_ID) {
-            d->wmask[i] = 0;
-        }
-    }
-
-    memory_region_init_io(&s->superio_cfg.io, OBJECT(d), &superio_cfg_ops,
-                          &s->superio_cfg, "superio_cfg", 2);
-    memory_region_set_enabled(&s->superio_cfg.io, false);
-    /*
-     * The floppy also uses 0x3f0 and 0x3f1.
-     * But we do not emulate a floppy, so just set it here.
-     */
-    memory_region_add_subregion(isa_bus->address_space_io, 0x3f0,
-                                &s->superio_cfg.io);
+    via_isa_realize(d, errp);
+    s->via_sio = VIA_SUPERIO(isa_create_simple(s->isa_bus,
+                                               TYPE_VT82C686B_SUPERIO));
 }
 
-static void via_class_init(ObjectClass *klass, void *data)
+static void vt82c686b_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
@@ -438,47 +656,85 @@ static void via_class_init(ObjectClass *klass, void *data)
     k->realize = vt82c686b_realize;
     k->config_write = vt82c686b_write_config;
     k->vendor_id = PCI_VENDOR_ID_VIA;
-    k->device_id = PCI_DEVICE_ID_VIA_ISA_BRIDGE;
+    k->device_id = PCI_DEVICE_ID_VIA_82C686B_ISA;
     k->class_id = PCI_CLASS_BRIDGE_ISA;
     k->revision = 0x40;
     dc->reset = vt82c686b_isa_reset;
     dc->desc = "ISA bridge";
     dc->vmsd = &vmstate_via;
-    /*
-     * Reason: part of VIA VT82C686 southbridge, needs to be wired up,
-     * e.g. by mips_fuloong2e_init()
-     */
+    /* Reason: part of VIA VT82C686 southbridge, needs to be wired up */
     dc->user_creatable = false;
 }
 
-static const TypeInfo via_info = {
+static const TypeInfo vt82c686b_isa_info = {
     .name          = TYPE_VT82C686B_ISA,
-    .parent        = TYPE_PCI_DEVICE,
-    .instance_size = sizeof(VT82C686BISAState),
-    .class_init    = via_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
-        { },
-    },
+    .parent        = TYPE_VIA_ISA,
+    .instance_size = sizeof(ViaISAState),
+    .class_init    = vt82c686b_class_init,
 };
 
+/* TYPE_VT8231_ISA */
 
-static void vt82c686b_superio_class_init(ObjectClass *klass, void *data)
+static void vt8231_write_config(PCIDevice *d, uint32_t addr,
+                                uint32_t val, int len)
 {
-    ISASuperIOClass *sc = ISA_SUPERIO_CLASS(klass);
+    ViaISAState *s = VIA_ISA(d);
 
-    sc->serial.count = 2;
-    sc->parallel.count = 1;
-    sc->ide.count = 0;
-    sc->floppy.count = 1;
+    trace_via_isa_write(addr, val, len);
+    pci_default_write_config(d, addr, val, len);
+    if (addr == 0x50) {
+        /* BIT(2): enable or disable superio config io ports */
+        via_superio_io_enable(s->via_sio, val & BIT(2));
+    }
 }
 
-static const TypeInfo via_superio_info = {
-    .name          = TYPE_VT82C686B_SUPERIO,
-    .parent        = TYPE_ISA_SUPERIO,
-    .instance_size = sizeof(ISASuperIODevice),
-    .class_size    = sizeof(ISASuperIOClass),
-    .class_init    = vt82c686b_superio_class_init,
+static void vt8231_isa_reset(DeviceState *dev)
+{
+    ViaISAState *s = VIA_ISA(dev);
+    uint8_t *pci_conf = s->dev.config;
+
+    pci_set_long(pci_conf + PCI_CAPABILITY_LIST, 0x000000c0);
+    pci_set_word(pci_conf + PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+                 PCI_COMMAND_MASTER | PCI_COMMAND_SPECIAL);
+    pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_DEVSEL_MEDIUM);
+
+    pci_conf[0x58] = 0x40; /* Miscellaneous Control 0 */
+    pci_conf[0x67] = 0x08; /* Fast IR Config */
+    pci_conf[0x6b] = 0x01; /* Fast IR I/O Base */
+}
+
+static void vt8231_realize(PCIDevice *d, Error **errp)
+{
+    ViaISAState *s = VIA_ISA(d);
+
+    via_isa_realize(d, errp);
+    s->via_sio = VIA_SUPERIO(isa_create_simple(s->isa_bus,
+                                               TYPE_VT8231_SUPERIO));
+}
+
+static void vt8231_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = vt8231_realize;
+    k->config_write = vt8231_write_config;
+    k->vendor_id = PCI_VENDOR_ID_VIA;
+    k->device_id = PCI_DEVICE_ID_VIA_8231_ISA;
+    k->class_id = PCI_CLASS_BRIDGE_ISA;
+    k->revision = 0x10;
+    dc->reset = vt8231_isa_reset;
+    dc->desc = "ISA bridge";
+    dc->vmsd = &vmstate_via;
+    /* Reason: part of VIA VT8231 southbridge, needs to be wired up */
+    dc->user_creatable = false;
+}
+
+static const TypeInfo vt8231_isa_info = {
+    .name          = TYPE_VT8231_ISA,
+    .parent        = TYPE_VIA_ISA,
+    .instance_size = sizeof(ViaISAState),
+    .class_init    = vt8231_class_init,
 };
 
 
@@ -487,8 +743,12 @@ static void vt82c686b_register_types(void)
     type_register_static(&via_pm_info);
     type_register_static(&vt82c686b_pm_info);
     type_register_static(&vt8231_pm_info);
-    type_register_static(&via_info);
     type_register_static(&via_superio_info);
+    type_register_static(&vt82c686b_superio_info);
+    type_register_static(&vt8231_superio_info);
+    type_register_static(&via_isa_info);
+    type_register_static(&vt82c686b_isa_info);
+    type_register_static(&vt8231_isa_info);
 }
 
 type_init(vt82c686b_register_types)
diff --git a/hw/lm32/Kconfig b/hw/lm32/Kconfig
deleted file mode 100644
index 8ac94205d71..00000000000
--- a/hw/lm32/Kconfig
+++ /dev/null
@@ -1,18 +0,0 @@
-config LM32_DEVICES
-    bool
-    select PTIMER
-
-config MILKYMIST
-    bool
-    # FIXME: disabling it results in compile-time errors
-    select MILKYMIST_TMU2 if OPENGL && X11
-    select PFLASH_CFI01
-    select FRAMEBUFFER
-    select SD
-    select USB_OHCI
-    select LM32_DEVICES
-
-config LM32_EVR
-    bool
-    select LM32_DEVICES
-    select PFLASH_CFI02
diff --git a/hw/lm32/lm32.h b/hw/lm32/lm32.h
deleted file mode 100644
index 7b4f6255b91..00000000000
--- a/hw/lm32/lm32.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef HW_LM32_H
-#define HW_LM32_H
-
-#include "hw/char/lm32_juart.h"
-#include "hw/qdev-properties.h"
-#include "qapi/error.h"
-
-static inline DeviceState *lm32_pic_init(qemu_irq cpu_irq)
-{
-    DeviceState *dev;
-    SysBusDevice *d;
-
-    dev = qdev_new("lm32-pic");
-    d = SYS_BUS_DEVICE(dev);
-    sysbus_realize_and_unref(d, &error_fatal);
-    sysbus_connect_irq(d, 0, cpu_irq);
-
-    return dev;
-}
-
-static inline DeviceState *lm32_juart_init(Chardev *chr)
-{
-    DeviceState *dev;
-
-    dev = qdev_new(TYPE_LM32_JUART);
-    qdev_prop_set_chr(dev, "chardev", chr);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-
-    return dev;
-}
-
-static inline DeviceState *lm32_uart_create(hwaddr addr,
-                                            qemu_irq irq,
-                                            Chardev *chr)
-{
-    DeviceState *dev;
-    SysBusDevice *s;
-
-    dev = qdev_new("lm32-uart");
-    s = SYS_BUS_DEVICE(dev);
-    qdev_prop_set_chr(dev, "chardev", chr);
-    sysbus_realize_and_unref(s, &error_fatal);
-    sysbus_mmio_map(s, 0, addr);
-    sysbus_connect_irq(s, 0, irq);
-    return dev;
-}
-
-#endif
diff --git a/hw/lm32/lm32_boards.c b/hw/lm32/lm32_boards.c
deleted file mode 100644
index b5d97dd53ed..00000000000
--- a/hw/lm32/lm32_boards.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- *  QEMU models for LatticeMico32 uclinux and evr32 boards.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qemu/cutils.h"
-#include "qemu/error-report.h"
-#include "cpu.h"
-#include "hw/sysbus.h"
-#include "hw/irq.h"
-#include "hw/block/flash.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "elf.h"
-#include "lm32_hwsetup.h"
-#include "lm32.h"
-#include "exec/address-spaces.h"
-#include "sysemu/reset.h"
-#include "sysemu/sysemu.h"
-
-typedef struct {
-    LM32CPU *cpu;
-    hwaddr bootstrap_pc;
-    hwaddr flash_base;
-    hwaddr hwsetup_base;
-    hwaddr initrd_base;
-    size_t initrd_size;
-    hwaddr cmdline_base;
-} ResetInfo;
-
-static void cpu_irq_handler(void *opaque, int irq, int level)
-{
-    LM32CPU *cpu = opaque;
-    CPUState *cs = CPU(cpu);
-
-    if (level) {
-        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-    } else {
-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-    }
-}
-
-static void main_cpu_reset(void *opaque)
-{
-    ResetInfo *reset_info = opaque;
-    CPULM32State *env = &reset_info->cpu->env;
-
-    cpu_reset(CPU(reset_info->cpu));
-
-    /* init defaults */
-    env->pc = (uint32_t)reset_info->bootstrap_pc;
-    env->regs[R_R1] = (uint32_t)reset_info->hwsetup_base;
-    env->regs[R_R2] = (uint32_t)reset_info->cmdline_base;
-    env->regs[R_R3] = (uint32_t)reset_info->initrd_base;
-    env->regs[R_R4] = (uint32_t)(reset_info->initrd_base +
-        reset_info->initrd_size);
-    env->eba = reset_info->flash_base;
-    env->deba = reset_info->flash_base;
-}
-
-static void lm32_evr_init(MachineState *machine)
-{
-    MachineClass *mc = MACHINE_GET_CLASS(machine);
-    const char *kernel_filename = machine->kernel_filename;
-    LM32CPU *cpu;
-    CPULM32State *env;
-    DriveInfo *dinfo;
-    MemoryRegion *address_space_mem =  get_system_memory();
-    qemu_irq irq[32];
-    ResetInfo *reset_info;
-    int i;
-
-    if (machine->ram_size != mc->default_ram_size) {
-        char *sz = size_to_str(mc->default_ram_size);
-        error_report("Invalid RAM size, should be %s", sz);
-        g_free(sz);
-        exit(EXIT_FAILURE);
-    }
-
-    /* memory map */
-    hwaddr flash_base  = 0x04000000;
-    size_t flash_sector_size       = 256 * KiB;
-    size_t flash_size              = 32 * MiB;
-    hwaddr ram_base    = 0x08000000;
-    hwaddr timer0_base = 0x80002000;
-    hwaddr uart0_base  = 0x80006000;
-    hwaddr timer1_base = 0x8000a000;
-    int uart0_irq                  = 0;
-    int timer0_irq                 = 1;
-    int timer1_irq                 = 3;
-
-    reset_info = g_malloc0(sizeof(ResetInfo));
-
-    cpu = LM32_CPU(cpu_create(machine->cpu_type));
-
-    env = &cpu->env;
-    reset_info->cpu = cpu;
-
-    reset_info->flash_base = flash_base;
-
-    memory_region_add_subregion(address_space_mem, ram_base, machine->ram);
-
-    dinfo = drive_get(IF_PFLASH, 0, 0);
-    /* Spansion S29NS128P */
-    pflash_cfi02_register(flash_base, "lm32_evr.flash", flash_size,
-                          dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          flash_sector_size,
-                          1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1);
-
-    /* create irq lines */
-    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0));
-    for (i = 0; i < 32; i++) {
-        irq[i] = qdev_get_gpio_in(env->pic_state, i);
-    }
-
-    lm32_uart_create(uart0_base, irq[uart0_irq], serial_hd(0));
-    sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
-    sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
-
-    /* make sure juart isn't the first chardev */
-    env->juart_state = lm32_juart_init(serial_hd(1));
-
-    reset_info->bootstrap_pc = flash_base;
-
-    if (kernel_filename) {
-        uint64_t entry;
-        int kernel_size;
-
-        kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
-                               &entry, NULL, NULL, NULL,
-                               1, EM_LATTICEMICO32, 0, 0);
-        reset_info->bootstrap_pc = entry;
-
-        if (kernel_size < 0) {
-            kernel_size = load_image_targphys(kernel_filename, ram_base,
-                                              machine->ram_size);
-            reset_info->bootstrap_pc = ram_base;
-        }
-
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-    }
-
-    qemu_register_reset(main_cpu_reset, reset_info);
-}
-
-static void lm32_uclinux_init(MachineState *machine)
-{
-    MachineClass *mc = MACHINE_GET_CLASS(machine);
-    const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
-    const char *initrd_filename = machine->initrd_filename;
-    LM32CPU *cpu;
-    CPULM32State *env;
-    DriveInfo *dinfo;
-    MemoryRegion *address_space_mem =  get_system_memory();
-    qemu_irq irq[32];
-    HWSetup *hw;
-    ResetInfo *reset_info;
-    int i;
-
-    if (machine->ram_size != mc->default_ram_size) {
-        char *sz = size_to_str(mc->default_ram_size);
-        error_report("Invalid RAM size, should be %s", sz);
-        g_free(sz);
-        exit(EXIT_FAILURE);
-    }
-
-    /* memory map */
-    hwaddr flash_base   = 0x04000000;
-    size_t flash_sector_size        = 256 * KiB;
-    size_t flash_size               = 32 * MiB;
-    hwaddr ram_base     = 0x08000000;
-    hwaddr uart0_base   = 0x80000000;
-    hwaddr timer0_base  = 0x80002000;
-    hwaddr timer1_base  = 0x80010000;
-    hwaddr timer2_base  = 0x80012000;
-    int uart0_irq                   = 0;
-    int timer0_irq                  = 1;
-    int timer1_irq                  = 20;
-    int timer2_irq                  = 21;
-    hwaddr hwsetup_base = 0x0bffe000;
-    hwaddr cmdline_base = 0x0bfff000;
-    hwaddr initrd_base  = 0x08400000;
-    size_t initrd_max               = 0x01000000;
-
-    reset_info = g_malloc0(sizeof(ResetInfo));
-
-    cpu = LM32_CPU(cpu_create(machine->cpu_type));
-
-    env = &cpu->env;
-    reset_info->cpu = cpu;
-
-    reset_info->flash_base = flash_base;
-
-    memory_region_add_subregion(address_space_mem, ram_base, machine->ram);
-
-    dinfo = drive_get(IF_PFLASH, 0, 0);
-    /* Spansion S29NS128P */
-    pflash_cfi02_register(flash_base, "lm32_uclinux.flash", flash_size,
-                          dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          flash_sector_size,
-                          1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1);
-
-    /* create irq lines */
-    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, env, 0));
-    for (i = 0; i < 32; i++) {
-        irq[i] = qdev_get_gpio_in(env->pic_state, i);
-    }
-
-    lm32_uart_create(uart0_base, irq[uart0_irq], serial_hd(0));
-    sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
-    sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
-    sysbus_create_simple("lm32-timer", timer2_base, irq[timer2_irq]);
-
-    /* make sure juart isn't the first chardev */
-    env->juart_state = lm32_juart_init(serial_hd(1));
-
-    reset_info->bootstrap_pc = flash_base;
-
-    if (kernel_filename) {
-        uint64_t entry;
-        int kernel_size;
-
-        kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
-                               &entry, NULL, NULL, NULL,
-                               1, EM_LATTICEMICO32, 0, 0);
-        reset_info->bootstrap_pc = entry;
-
-        if (kernel_size < 0) {
-            kernel_size = load_image_targphys(kernel_filename, ram_base,
-                                              machine->ram_size);
-            reset_info->bootstrap_pc = ram_base;
-        }
-
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-    }
-
-    /* generate a rom with the hardware description */
-    hw = hwsetup_init();
-    hwsetup_add_cpu(hw, "LM32", 75000000);
-    hwsetup_add_flash(hw, "flash", flash_base, flash_size);
-    hwsetup_add_ddr_sdram(hw, "ddr_sdram", ram_base, machine->ram_size);
-    hwsetup_add_timer(hw, "timer0", timer0_base, timer0_irq);
-    hwsetup_add_timer(hw, "timer1_dev_only", timer1_base, timer1_irq);
-    hwsetup_add_timer(hw, "timer2_dev_only", timer2_base, timer2_irq);
-    hwsetup_add_uart(hw, "uart", uart0_base, uart0_irq);
-    hwsetup_add_trailer(hw);
-    hwsetup_create_rom(hw, hwsetup_base);
-    hwsetup_free(hw);
-
-    reset_info->hwsetup_base = hwsetup_base;
-
-    if (kernel_cmdline && strlen(kernel_cmdline)) {
-        pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE,
-                kernel_cmdline);
-        reset_info->cmdline_base = cmdline_base;
-    }
-
-    if (initrd_filename) {
-        size_t initrd_size;
-        initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                initrd_max);
-        reset_info->initrd_base = initrd_base;
-        reset_info->initrd_size = initrd_size;
-    }
-
-    qemu_register_reset(main_cpu_reset, reset_info);
-}
-
-static void lm32_evr_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "LatticeMico32 EVR32 eval system";
-    mc->init = lm32_evr_init;
-    mc->is_default = true;
-    mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full");
-    mc->default_ram_size = 64 * MiB;
-    mc->default_ram_id = "lm32_evr.sdram";
-}
-
-static const TypeInfo lm32_evr_type = {
-    .name = MACHINE_TYPE_NAME("lm32-evr"),
-    .parent = TYPE_MACHINE,
-    .class_init = lm32_evr_class_init,
-};
-
-static void lm32_uclinux_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "lm32 platform for uClinux and u-boot by Theobroma Systems";
-    mc->init = lm32_uclinux_init;
-    mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full");
-    mc->default_ram_size = 64 * MiB;
-    mc->default_ram_id = "lm32_uclinux.sdram";
-}
-
-static const TypeInfo lm32_uclinux_type = {
-    .name = MACHINE_TYPE_NAME("lm32-uclinux"),
-    .parent = TYPE_MACHINE,
-    .class_init = lm32_uclinux_class_init,
-};
-
-static void lm32_machine_init(void)
-{
-    type_register_static(&lm32_evr_type);
-    type_register_static(&lm32_uclinux_type);
-}
-
-type_init(lm32_machine_init)
diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h
deleted file mode 100644
index e6cd30ad686..00000000000
--- a/hw/lm32/lm32_hwsetup.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  LatticeMico32 hwsetup helper functions.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * These are helper functions for creating the hardware description blob used
- * in the Theobroma's uClinux port.
- */
-
-#ifndef QEMU_HW_LM32_HWSETUP_H
-#define QEMU_HW_LM32_HWSETUP_H
-
-#include "qemu/cutils.h"
-#include "hw/loader.h"
-
-typedef struct {
-    void *data;
-    void *ptr;
-} HWSetup;
-
-enum hwsetup_tag {
-    HWSETUP_TAG_EOL         = 0,
-    HWSETUP_TAG_CPU         = 1,
-    HWSETUP_TAG_ASRAM       = 2,
-    HWSETUP_TAG_FLASH       = 3,
-    HWSETUP_TAG_SDRAM       = 4,
-    HWSETUP_TAG_OCM         = 5,
-    HWSETUP_TAG_DDR_SDRAM   = 6,
-    HWSETUP_TAG_DDR2_SDRAM  = 7,
-    HWSETUP_TAG_TIMER       = 8,
-    HWSETUP_TAG_UART        = 9,
-    HWSETUP_TAG_GPIO        = 10,
-    HWSETUP_TAG_TRISPEEDMAC = 11,
-    HWSETUP_TAG_I2CM        = 12,
-    HWSETUP_TAG_LEDS        = 13,
-    HWSETUP_TAG_7SEG        = 14,
-    HWSETUP_TAG_SPI_S       = 15,
-    HWSETUP_TAG_SPI_M       = 16,
-};
-
-static inline HWSetup *hwsetup_init(void)
-{
-    HWSetup *hw;
-
-    hw = g_malloc(sizeof(HWSetup));
-    hw->data = g_malloc0(TARGET_PAGE_SIZE);
-    hw->ptr = hw->data;
-
-    return hw;
-}
-
-static inline void hwsetup_free(HWSetup *hw)
-{
-    g_free(hw->data);
-    g_free(hw);
-}
-
-static inline void hwsetup_create_rom(HWSetup *hw,
-        hwaddr base)
-{
-    rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE,
-                 TARGET_PAGE_SIZE, base, NULL, NULL, NULL, NULL, true);
-}
-
-static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u)
-{
-    stb_p(hw->ptr, u);
-    hw->ptr += 1;
-}
-
-static inline void hwsetup_add_u32(HWSetup *hw, uint32_t u)
-{
-    stl_p(hw->ptr, u);
-    hw->ptr += 4;
-}
-
-static inline void hwsetup_add_tag(HWSetup *hw, enum hwsetup_tag t)
-{
-    stl_p(hw->ptr, t);
-    hw->ptr += 4;
-}
-
-static inline void hwsetup_add_str(HWSetup *hw, const char *str)
-{
-    pstrcpy(hw->ptr, 32, str);
-    hw->ptr += 32;
-}
-
-static inline void hwsetup_add_trailer(HWSetup *hw)
-{
-    hwsetup_add_u32(hw, 8); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_EOL);
-}
-
-static inline void hwsetup_add_cpu(HWSetup *hw,
-        const char *name, uint32_t frequency)
-{
-    hwsetup_add_u32(hw, 44); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_CPU);
-    hwsetup_add_str(hw, name);
-    hwsetup_add_u32(hw, frequency);
-}
-
-static inline void hwsetup_add_flash(HWSetup *hw,
-        const char *name, uint32_t base, uint32_t size)
-{
-    hwsetup_add_u32(hw, 52); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_FLASH);
-    hwsetup_add_str(hw, name);
-    hwsetup_add_u32(hw, base);
-    hwsetup_add_u32(hw, size);
-    hwsetup_add_u8(hw, 8); /* read latency */
-    hwsetup_add_u8(hw, 8); /* write latency */
-    hwsetup_add_u8(hw, 25); /* address width */
-    hwsetup_add_u8(hw, 32); /* data width */
-}
-
-static inline void hwsetup_add_ddr_sdram(HWSetup *hw,
-        const char *name, uint32_t base, uint32_t size)
-{
-    hwsetup_add_u32(hw, 48); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_DDR_SDRAM);
-    hwsetup_add_str(hw, name);
-    hwsetup_add_u32(hw, base);
-    hwsetup_add_u32(hw, size);
-}
-
-static inline void hwsetup_add_timer(HWSetup *hw,
-        const char *name, uint32_t base, uint32_t irq)
-{
-    hwsetup_add_u32(hw, 56); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_TIMER);
-    hwsetup_add_str(hw, name);
-    hwsetup_add_u32(hw, base);
-    hwsetup_add_u8(hw, 1); /* wr_tickcount */
-    hwsetup_add_u8(hw, 1); /* rd_tickcount */
-    hwsetup_add_u8(hw, 1); /* start_stop_control */
-    hwsetup_add_u8(hw, 32); /* counter_width */
-    hwsetup_add_u32(hw, 20); /* reload_ticks */
-    hwsetup_add_u8(hw, irq);
-    hwsetup_add_u8(hw, 0); /* padding */
-    hwsetup_add_u8(hw, 0); /* padding */
-    hwsetup_add_u8(hw, 0); /* padding */
-}
-
-static inline void hwsetup_add_uart(HWSetup *hw,
-        const char *name, uint32_t base, uint32_t irq)
-{
-    hwsetup_add_u32(hw, 56); /* size */
-    hwsetup_add_tag(hw, HWSETUP_TAG_UART);
-    hwsetup_add_str(hw, name);
-    hwsetup_add_u32(hw, base);
-    hwsetup_add_u32(hw, 115200); /* baudrate */
-    hwsetup_add_u8(hw, 8); /* databits */
-    hwsetup_add_u8(hw, 1); /* stopbits */
-    hwsetup_add_u8(hw, 1); /* use_interrupt */
-    hwsetup_add_u8(hw, 1); /* block_on_transmit */
-    hwsetup_add_u8(hw, 1); /* block_on_receive */
-    hwsetup_add_u8(hw, 4); /* rx_buffer_size */
-    hwsetup_add_u8(hw, 4); /* tx_buffer_size */
-    hwsetup_add_u8(hw, irq);
-}
-
-#endif /* QEMU_HW_LM32_HWSETUP_H */
diff --git a/hw/lm32/meson.build b/hw/lm32/meson.build
deleted file mode 100644
index 42d6f8db3d9..00000000000
--- a/hw/lm32/meson.build
+++ /dev/null
@@ -1,6 +0,0 @@
-lm32_ss = ss.source_set()
-# LM32 boards
-lm32_ss.add(when: 'CONFIG_LM32_EVR', if_true: files('lm32_boards.c'))
-lm32_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist.c'))
-
-hw_arch += {'lm32': lm32_ss}
diff --git a/hw/lm32/milkymist-hw.h b/hw/lm32/milkymist-hw.h
deleted file mode 100644
index 5dca5d52f57..00000000000
--- a/hw/lm32/milkymist-hw.h
+++ /dev/null
@@ -1,133 +0,0 @@
-#ifndef QEMU_HW_MILKYMIST_HW_H
-#define QEMU_HW_MILKYMIST_HW_H
-
-#include "hw/qdev-core.h"
-#include "net/net.h"
-#include "qapi/error.h"
-
-static inline DeviceState *milkymist_uart_create(hwaddr base,
-                                                 qemu_irq irq,
-                                                 Chardev *chr)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-uart");
-    qdev_prop_set_chr(dev, "chardev", chr);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_hpdmc_create(hwaddr base)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-hpdmc");
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_vgafb_create(hwaddr base,
-        uint32_t fb_offset, uint32_t fb_mask)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-vgafb");
-    qdev_prop_set_uint32(dev, "fb_offset", fb_offset);
-    qdev_prop_set_uint32(dev, "fb_mask", fb_mask);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_sysctl_create(hwaddr base,
-        qemu_irq gpio_irq, qemu_irq timer0_irq, qemu_irq timer1_irq,
-        uint32_t freq_hz, uint32_t system_id, uint32_t capabilities,
-        uint32_t gpio_strappings)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-sysctl");
-    qdev_prop_set_uint32(dev, "frequency", freq_hz);
-    qdev_prop_set_uint32(dev, "systemid", system_id);
-    qdev_prop_set_uint32(dev, "capabilities", capabilities);
-    qdev_prop_set_uint32(dev, "gpio_strappings", gpio_strappings);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, gpio_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, timer0_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, timer1_irq);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_pfpu_create(hwaddr base,
-        qemu_irq irq)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-pfpu");
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
-    return dev;
-}
-
-static inline DeviceState *milkymist_ac97_create(hwaddr base,
-        qemu_irq crrequest_irq, qemu_irq crreply_irq, qemu_irq dmar_irq,
-        qemu_irq dmaw_irq)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-ac97");
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, crrequest_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, crreply_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 2, dmar_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 3, dmaw_irq);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_minimac2_create(hwaddr base,
-        hwaddr buffers_base, qemu_irq rx_irq, qemu_irq tx_irq)
-{
-    DeviceState *dev;
-
-    qemu_check_nic_model(&nd_table[0], "minimac2");
-    dev = qdev_new("milkymist-minimac2");
-    qdev_set_nic_properties(dev, &nd_table[0]);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, buffers_base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, rx_irq);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, tx_irq);
-
-    return dev;
-}
-
-static inline DeviceState *milkymist_softusb_create(hwaddr base,
-        qemu_irq irq, uint32_t pmem_base, uint32_t pmem_size,
-        uint32_t dmem_base, uint32_t dmem_size)
-{
-    DeviceState *dev;
-
-    dev = qdev_new("milkymist-softusb");
-    qdev_prop_set_uint32(dev, "pmem_size", pmem_size);
-    qdev_prop_set_uint32(dev, "dmem_size", dmem_size);
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, pmem_base);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, dmem_base);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
-
-    return dev;
-}
-
-#endif /* QEMU_HW_MILKYMIST_HW_H */
diff --git a/hw/lm32/milkymist.c b/hw/lm32/milkymist.c
deleted file mode 100644
index 72d1326531a..00000000000
--- a/hw/lm32/milkymist.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- *  QEMU model for the Milkymist board.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/units.h"
-#include "qemu/error-report.h"
-#include "qemu-common.h"
-#include "qemu/datadir.h"
-#include "cpu.h"
-#include "hw/sysbus.h"
-#include "hw/irq.h"
-#include "hw/block/flash.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/qtest.h"
-#include "sysemu/reset.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "hw/qdev-properties.h"
-#include "elf.h"
-#include "milkymist-hw.h"
-#include "hw/display/milkymist_tmu2.h"
-#include "hw/sd/sd.h"
-#include "lm32.h"
-#include "exec/address-spaces.h"
-#include "qemu/cutils.h"
-
-#define BIOS_FILENAME    "mmone-bios.bin"
-#define BIOS_OFFSET      0x00860000
-#define BIOS_SIZE        (512 * KiB)
-#define KERNEL_LOAD_ADDR 0x40000000
-
-typedef struct {
-    LM32CPU *cpu;
-    hwaddr bootstrap_pc;
-    hwaddr flash_base;
-    hwaddr initrd_base;
-    size_t initrd_size;
-    hwaddr cmdline_base;
-} ResetInfo;
-
-static void cpu_irq_handler(void *opaque, int irq, int level)
-{
-    LM32CPU *cpu = opaque;
-    CPUState *cs = CPU(cpu);
-
-    if (level) {
-        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-    } else {
-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-    }
-}
-
-static void main_cpu_reset(void *opaque)
-{
-    ResetInfo *reset_info = opaque;
-    CPULM32State *env = &reset_info->cpu->env;
-
-    cpu_reset(CPU(reset_info->cpu));
-
-    /* init defaults */
-    env->pc = reset_info->bootstrap_pc;
-    env->regs[R_R1] = reset_info->cmdline_base;
-    env->regs[R_R2] = reset_info->initrd_base;
-    env->regs[R_R3] = reset_info->initrd_base + reset_info->initrd_size;
-    env->eba = reset_info->flash_base;
-    env->deba = reset_info->flash_base;
-}
-
-static DeviceState *milkymist_memcard_create(hwaddr base)
-{
-    DeviceState *dev;
-    DriveInfo *dinfo;
-
-    dev = qdev_new("milkymist-memcard");
-    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
-
-    dinfo = drive_get_next(IF_SD);
-    if (dinfo) {
-        DeviceState *card;
-
-        card = qdev_new(TYPE_SD_CARD);
-        qdev_prop_set_drive_err(card, "drive", blk_by_legacy_dinfo(dinfo),
-                                &error_fatal);
-        qdev_realize_and_unref(card, qdev_get_child_bus(dev, "sd-bus"),
-                               &error_fatal);
-    }
-
-    return dev;
-}
-
-static void
-milkymist_init(MachineState *machine)
-{
-    MachineClass *mc = MACHINE_GET_CLASS(machine);
-    const char *bios_name = machine->firmware ?: BIOS_FILENAME;
-    const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
-    const char *initrd_filename = machine->initrd_filename;
-    LM32CPU *cpu;
-    CPULM32State *env;
-    int kernel_size;
-    DriveInfo *dinfo;
-    MemoryRegion *address_space_mem = get_system_memory();
-    qemu_irq irq[32];
-    int i;
-    char *bios_filename;
-    ResetInfo *reset_info;
-
-    if (machine->ram_size != mc->default_ram_size) {
-        char *sz = size_to_str(mc->default_ram_size);
-        error_report("Invalid RAM size, should be %s", sz);
-        g_free(sz);
-        exit(EXIT_FAILURE);
-    }
-
-    /* memory map */
-    hwaddr flash_base   = 0x00000000;
-    size_t flash_sector_size        = 128 * KiB;
-    size_t flash_size               = 32 * MiB;
-    hwaddr sdram_base   = 0x40000000;
-
-    hwaddr initrd_base  = sdram_base + 0x1002000;
-    hwaddr cmdline_base = sdram_base + 0x1000000;
-    size_t initrd_max = machine->ram_size - 0x1002000;
-
-    reset_info = g_malloc0(sizeof(ResetInfo));
-
-    cpu = LM32_CPU(cpu_create(machine->cpu_type));
-
-    env = &cpu->env;
-    reset_info->cpu = cpu;
-
-    cpu_lm32_set_phys_msb_ignore(env, 1);
-
-    memory_region_add_subregion(address_space_mem, sdram_base, machine->ram);
-
-    dinfo = drive_get(IF_PFLASH, 0, 0);
-    /* Numonyx JS28F256J3F105 */
-    pflash_cfi01_register(flash_base, "milkymist.flash", flash_size,
-                          dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
-                          flash_sector_size, 2, 0x00, 0x89, 0x00, 0x1d, 1);
-
-    /* create irq lines */
-    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0));
-    for (i = 0; i < 32; i++) {
-        irq[i] = qdev_get_gpio_in(env->pic_state, i);
-    }
-
-    /* load bios rom */
-    bios_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
-
-    if (bios_filename) {
-        if (load_image_targphys(bios_filename, BIOS_OFFSET, BIOS_SIZE) < 0) {
-            error_report("could not load bios '%s'", bios_filename);
-            exit(1);
-        }
-    }
-
-    reset_info->bootstrap_pc = BIOS_OFFSET;
-
-    /* if no kernel is given no valid bios rom is a fatal error */
-    if (!kernel_filename && !dinfo && !bios_filename && !qtest_enabled()) {
-        error_report("could not load Milkymist One bios '%s'", bios_name);
-        exit(1);
-    }
-    g_free(bios_filename);
-
-    milkymist_uart_create(0x60000000, irq[0], serial_hd(0));
-    milkymist_sysctl_create(0x60001000, irq[1], irq[2], irq[3],
-            80000000, 0x10014d31, 0x0000041f, 0x00000001);
-    milkymist_hpdmc_create(0x60002000);
-    milkymist_vgafb_create(0x60003000, 0x40000000, 0x0fffffff);
-    milkymist_memcard_create(0x60004000);
-    milkymist_ac97_create(0x60005000, irq[4], irq[5], irq[6], irq[7]);
-    milkymist_pfpu_create(0x60006000, irq[8]);
-    if (machine->enable_graphics) {
-        milkymist_tmu2_create(0x60007000, irq[9]);
-    }
-    milkymist_minimac2_create(0x60008000, 0x30000000, irq[10], irq[11]);
-    milkymist_softusb_create(0x6000f000, irq[15],
-            0x20000000, 0x1000, 0x20020000, 0x2000);
-
-    /* make sure juart isn't the first chardev */
-    env->juart_state = lm32_juart_init(serial_hd(1));
-
-    if (kernel_filename) {
-        uint64_t entry;
-
-        /* Boots a kernel elf binary.  */
-        kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
-                               &entry, NULL, NULL, NULL,
-                               1, EM_LATTICEMICO32, 0, 0);
-        reset_info->bootstrap_pc = entry;
-
-        if (kernel_size < 0) {
-            kernel_size = load_image_targphys(kernel_filename, sdram_base,
-                                              machine->ram_size);
-            reset_info->bootstrap_pc = sdram_base;
-        }
-
-        if (kernel_size < 0) {
-            error_report("could not load kernel '%s'", kernel_filename);
-            exit(1);
-        }
-    }
-
-    if (kernel_cmdline && strlen(kernel_cmdline)) {
-        pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE,
-                kernel_cmdline);
-        reset_info->cmdline_base = (uint32_t)cmdline_base;
-    }
-
-    if (initrd_filename) {
-        size_t initrd_size;
-        initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                initrd_max);
-        reset_info->initrd_base = (uint32_t)initrd_base;
-        reset_info->initrd_size = (uint32_t)initrd_size;
-    }
-
-    qemu_register_reset(main_cpu_reset, reset_info);
-}
-
-static void milkymist_machine_init(MachineClass *mc)
-{
-    mc->desc = "Milkymist One";
-    mc->init = milkymist_init;
-    mc->default_cpu_type = LM32_CPU_TYPE_NAME("lm32-full");
-    mc->default_ram_size = 128 * MiB;
-    mc->default_ram_id = "milkymist.sdram";
-}
-
-DEFINE_MACHINE("milkymist", milkymist_machine_init)
diff --git a/hw/m68k/an5206.c b/hw/m68k/an5206.c
index 673898b0eac..11ae4c9795b 100644
--- a/hw/m68k/an5206.c
+++ b/hw/m68k/an5206.c
@@ -13,7 +13,6 @@
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "elf.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
 
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
index 7a03c71059b..93812ee206e 100644
--- a/hw/m68k/mcf5208.c
+++ b/hw/m68k/mcf5208.c
@@ -26,7 +26,6 @@
 #include "hw/loader.h"
 #include "hw/sysbus.h"
 #include "elf.h"
-#include "exec/address-spaces.h"
 
 #define SYS_FREQ 166666666
 
diff --git a/hw/m68k/mcf_intc.c b/hw/m68k/mcf_intc.c
index cf02f57a711..4cd30188c05 100644
--- a/hw/m68k/mcf_intc.c
+++ b/hw/m68k/mcf_intc.c
@@ -11,7 +11,6 @@
 #include "qemu/module.h"
 #include "qemu/log.h"
 #include "cpu.h"
-#include "hw/hw.h"
 #include "hw/irq.h"
 #include "hw/sysbus.h"
 #include "hw/m68k/mcf.h"
diff --git a/hw/m68k/next-cube.c b/hw/m68k/next-cube.c
index 92b45d760f1..e0d4a94f9db 100644
--- a/hw/m68k/next-cube.c
+++ b/hw/m68k/next-cube.c
@@ -10,9 +10,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "exec/hwaddr.h"
-#include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "hw/irq.h"
@@ -986,8 +984,8 @@ static void next_cube_init(MachineState *machine)
     sysbus_mmio_map(SYS_BUS_DEVICE(pcdev), 1, 0x02100000);
 
     /* BMAP memory */
-    memory_region_init_ram_shared_nomigrate(bmapm1, NULL, "next.bmapmem", 64,
-                                            true, &error_fatal);
+    memory_region_init_ram_flags_nomigrate(bmapm1, NULL, "next.bmapmem", 64,
+                                           RAM_SHARED, &error_fatal);
     memory_region_add_subregion(sysmem, 0x020c0000, bmapm1);
     /* The Rev_2.5_v66.bin firmware accesses it at 0x820c0020, too */
     memory_region_init_alias(bmapm2, NULL, "next.bmapmem2", bmapm1, 0x0, 64);
diff --git a/hw/m68k/next-kbd.c b/hw/m68k/next-kbd.c
index c11b5281f19..0544160e916 100644
--- a/hw/m68k/next-kbd.c
+++ b/hw/m68k/next-kbd.c
@@ -29,12 +29,9 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
-#include "exec/address-spaces.h"
-#include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "hw/m68k/next-cube.h"
 #include "ui/console.h"
-#include "sysemu/sysemu.h"
 #include "migration/vmstate.h"
 #include "qom/object.h"
 
diff --git a/hw/m68k/q800.c b/hw/m68k/q800.c
index 4d2e866eec7..e4c7c9b88ad 100644
--- a/hw/m68k/q800.c
+++ b/hw/m68k/q800.c
@@ -26,14 +26,12 @@
 #include "qemu/datadir.h"
 #include "sysemu/sysemu.h"
 #include "cpu.h"
-#include "hw/hw.h"
 #include "hw/boards.h"
-#include "hw/irq.h"
 #include "hw/or-irq.h"
+#include "hw/nmi.h"
 #include "elf.h"
 #include "hw/loader.h"
 #include "ui/console.h"
-#include "exec/address-spaces.h"
 #include "hw/char/escc.h"
 #include "hw/sysbus.h"
 #include "hw/scsi/esp.h"
@@ -70,18 +68,24 @@
 #define ASC_BASE              (IO_BASE + 0x14000)
 #define SWIM_BASE             (IO_BASE + 0x1E000)
 
-#define NUBUS_SUPER_SLOT_BASE 0x60000000
-#define NUBUS_SLOT_BASE       0xf0000000
+#define SONIC_PROM_SIZE       0x1000
 
 /*
  * the video base, whereas it a Nubus address,
  * is needed by the kernel to have early display and
  * thus provided by the bootloader
  */
-#define VIDEO_BASE            0xf9001000
+#define VIDEO_BASE            0xf9000000
 
 #define MAC_CLOCK  3686418
 
+/*
+ * Slot 0x9 is reserved for use by the in-built framebuffer whilst only
+ * slots 0xc, 0xd and 0xe physically exist on the Quadra 800
+ */
+#define Q800_NUBUS_SLOTS_AVAILABLE    (BIT(0x9) | BIT(0xc) | BIT(0xd) | \
+                                       BIT(0xe))
+
 /*
  * The GLUE (General Logic Unit) is an Apple custom integrated circuit chip
  * that performs a variety of functions (RAM management, clock generation, ...).
@@ -97,13 +101,110 @@ struct GLUEState {
     SysBusDevice parent_obj;
     M68kCPU *cpu;
     uint8_t ipr;
+    uint8_t auxmode;
+    qemu_irq irqs[1];
+    QEMUTimer *nmi_release;
 };
 
+#define GLUE_IRQ_IN_VIA1       0
+#define GLUE_IRQ_IN_VIA2       1
+#define GLUE_IRQ_IN_SONIC      2
+#define GLUE_IRQ_IN_ESCC       3
+#define GLUE_IRQ_IN_NMI        4
+
+#define GLUE_IRQ_NUBUS_9       0
+
+/*
+ * The GLUE logic on the Quadra 800 supports 2 different IRQ routing modes
+ * controlled from the VIA1 auxmode GPIO (port B bit 6) which are documented
+ * in NetBSD as follows:
+ *
+ * A/UX mode (Linux, NetBSD, auxmode GPIO low)
+ *
+ *   Level 0:        Spurious: ignored
+ *   Level 1:        Software
+ *   Level 2:        VIA2 (except ethernet, sound)
+ *   Level 3:        Ethernet
+ *   Level 4:        Serial (SCC)
+ *   Level 5:        Sound
+ *   Level 6:        VIA1
+ *   Level 7:        NMIs: parity errors, RESET button, YANCC error
+ *
+ * Classic mode (default: used by MacOS, A/UX 3.0.1, auxmode GPIO high)
+ *
+ *   Level 0:        Spurious: ignored
+ *   Level 1:        VIA1 (clock, ADB)
+ *   Level 2:        VIA2 (NuBus, SCSI)
+ *   Level 3:
+ *   Level 4:        Serial (SCC)
+ *   Level 5:
+ *   Level 6:
+ *   Level 7:        Non-maskable: parity errors, RESET button
+ *
+ * Note that despite references to A/UX mode in Linux and NetBSD, at least
+ * A/UX 3.0.1 still uses Classic mode.
+ */
+
 static void GLUE_set_irq(void *opaque, int irq, int level)
 {
     GLUEState *s = opaque;
     int i;
 
+    if (s->auxmode) {
+        /* Classic mode */
+        switch (irq) {
+        case GLUE_IRQ_IN_VIA1:
+            irq = 0;
+            break;
+
+        case GLUE_IRQ_IN_VIA2:
+            irq = 1;
+            break;
+
+        case GLUE_IRQ_IN_SONIC:
+            /* Route to VIA2 instead */
+            qemu_set_irq(s->irqs[GLUE_IRQ_NUBUS_9], level);
+            return;
+
+        case GLUE_IRQ_IN_ESCC:
+            irq = 3;
+            break;
+
+        case GLUE_IRQ_IN_NMI:
+            irq = 6;
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        /* A/UX mode */
+        switch (irq) {
+        case GLUE_IRQ_IN_VIA1:
+            irq = 5;
+            break;
+
+        case GLUE_IRQ_IN_VIA2:
+            irq = 1;
+            break;
+
+        case GLUE_IRQ_IN_SONIC:
+            irq = 2;
+            break;
+
+        case GLUE_IRQ_IN_ESCC:
+            irq = 3;
+            break;
+
+        case GLUE_IRQ_IN_NMI:
+            irq = 6;
+            break;
+
+        default:
+            g_assert_not_reached();
+        }
+    }
+
     if (level) {
         s->ipr |= 1 << irq;
     } else {
@@ -119,11 +220,37 @@ static void GLUE_set_irq(void *opaque, int irq, int level)
     m68k_set_irq_level(s->cpu, 0, 0);
 }
 
+static void glue_auxmode_set_irq(void *opaque, int irq, int level)
+{
+    GLUEState *s = GLUE(opaque);
+
+    s->auxmode = level;
+}
+
+static void glue_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+    GLUEState *s = GLUE(n);
+
+    /* Hold NMI active for 100ms */
+    GLUE_set_irq(s, GLUE_IRQ_IN_NMI, 1);
+    timer_mod(s->nmi_release, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
+}
+
+static void glue_nmi_release(void *opaque)
+{
+    GLUEState *s = GLUE(opaque);
+
+    GLUE_set_irq(s, GLUE_IRQ_IN_NMI, 0);
+}
+
 static void glue_reset(DeviceState *dev)
 {
     GLUEState *s = GLUE(dev);
 
     s->ipr = 0;
+    s->auxmode = 0;
+
+    timer_del(s->nmi_release);
 }
 
 static const VMStateDescription vmstate_glue = {
@@ -132,6 +259,8 @@ static const VMStateDescription vmstate_glue = {
     .minimum_version_id = 0,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(ipr, GLUEState),
+        VMSTATE_UINT8(auxmode, GLUEState),
+        VMSTATE_TIMER_PTR(nmi_release, GLUEState),
         VMSTATE_END_OF_LIST(),
     },
 };
@@ -147,20 +276,36 @@ static Property glue_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void glue_finalize(Object *obj)
+{
+    GLUEState *s = GLUE(obj);
+
+    timer_free(s->nmi_release);
+}
+
 static void glue_init(Object *obj)
 {
     DeviceState *dev = DEVICE(obj);
+    GLUEState *s = GLUE(dev);
 
     qdev_init_gpio_in(dev, GLUE_set_irq, 8);
+    qdev_init_gpio_in_named(dev, glue_auxmode_set_irq, "auxmode", 1);
+
+    qdev_init_gpio_out(dev, s->irqs, 1);
+
+    /* NMI release timer */
+    s->nmi_release = timer_new_ms(QEMU_CLOCK_VIRTUAL, glue_nmi_release, s);
 }
 
 static void glue_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    NMIClass *nc = NMI_CLASS(klass);
 
     dc->vmsd = &vmstate_glue;
     dc->reset = glue_reset;
     device_class_set_props(dc, glue_properties);
+    nc->nmi_monitor_handler = glue_nmi;
 }
 
 static const TypeInfo glue_info = {
@@ -168,7 +313,12 @@ static const TypeInfo glue_info = {
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(GLUEState),
     .instance_init = glue_init,
+    .instance_finalize = glue_finalize,
     .class_init = glue_class_init,
+    .interfaces = (InterfaceInfo[]) {
+         { TYPE_NMI },
+         { }
+    },
 };
 
 static void main_cpu_reset(void *opaque)
@@ -214,8 +364,11 @@ static void q800_init(MachineState *machine)
     int32_t initrd_size;
     MemoryRegion *rom;
     MemoryRegion *io;
+    MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1);
+    uint8_t *prom;
     const int io_slice_nb = (IO_SIZE / IO_SLICE) - 1;
-    int i;
+    int i, checksum;
+    MacFbMode *macfb_mode;
     ram_addr_t ram_size = machine->ram_size;
     const char *kernel_filename = machine->kernel_filename;
     const char *initrd_filename = machine->initrd_filename;
@@ -224,7 +377,7 @@ static void q800_init(MachineState *machine)
     hwaddr parameters_base;
     CPUState *cs;
     DeviceState *dev;
-    DeviceState *via_dev;
+    DeviceState *via1_dev, *via2_dev;
     DeviceState *escc_orgate;
     SysBusESPState *sysbus_esp;
     ESPState *esp;
@@ -269,28 +422,33 @@ static void q800_init(MachineState *machine)
     object_property_set_link(OBJECT(glue), "cpu", OBJECT(cpu), &error_abort);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(glue), &error_fatal);
 
-    /* VIA */
-
-    via_dev = qdev_new(TYPE_MAC_VIA);
+    /* VIA 1 */
+    via1_dev = qdev_new(TYPE_MOS6522_Q800_VIA1);
     dinfo = drive_get(IF_MTD, 0, 0);
     if (dinfo) {
-        qdev_prop_set_drive(via_dev, "drive", blk_by_legacy_dinfo(dinfo));
+        qdev_prop_set_drive(via1_dev, "drive", blk_by_legacy_dinfo(dinfo));
     }
-    sysbus = SYS_BUS_DEVICE(via_dev);
+    sysbus = SYS_BUS_DEVICE(via1_dev);
     sysbus_realize_and_unref(sysbus, &error_fatal);
-    sysbus_mmio_map(sysbus, 0, VIA_BASE);
-    qdev_connect_gpio_out_named(DEVICE(sysbus), "irq", 0,
-                                qdev_get_gpio_in(glue, 0));
-    qdev_connect_gpio_out_named(DEVICE(sysbus), "irq", 1,
-                                qdev_get_gpio_in(glue, 1));
-
+    sysbus_mmio_map(sysbus, 1, VIA_BASE);
+    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_VIA1));
+    /* A/UX mode */
+    qdev_connect_gpio_out(via1_dev, 0,
+                          qdev_get_gpio_in_named(glue, "auxmode", 0));
 
-    adb_bus = qdev_get_child_bus(via_dev, "adb.0");
+    adb_bus = qdev_get_child_bus(via1_dev, "adb.0");
     dev = qdev_new(TYPE_ADB_KEYBOARD);
     qdev_realize_and_unref(dev, adb_bus, &error_fatal);
     dev = qdev_new(TYPE_ADB_MOUSE);
     qdev_realize_and_unref(dev, adb_bus, &error_fatal);
 
+    /* VIA 2 */
+    via2_dev = qdev_new(TYPE_MOS6522_Q800_VIA2);
+    sysbus = SYS_BUS_DEVICE(via2_dev);
+    sysbus_realize_and_unref(sysbus, &error_fatal);
+    sysbus_mmio_map(sysbus, 1, VIA_BASE + VIA_SIZE);
+    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_VIA2));
+
     /* MACSONIC */
 
     if (nb_nics > 1) {
@@ -322,8 +480,21 @@ static void q800_init(MachineState *machine)
     sysbus = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(sysbus, &error_fatal);
     sysbus_mmio_map(sysbus, 0, SONIC_BASE);
-    sysbus_mmio_map(sysbus, 1, SONIC_PROM_BASE);
-    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, 2));
+    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(glue, GLUE_IRQ_IN_SONIC));
+
+    memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-q800.prom",
+                           SONIC_PROM_SIZE, &error_fatal);
+    memory_region_add_subregion(get_system_memory(), SONIC_PROM_BASE,
+                                dp8393x_prom);
+
+    /* Add MAC address with valid checksum to PROM */
+    prom = memory_region_get_ram_ptr(dp8393x_prom);
+    checksum = 0;
+    for (i = 0; i < 6; i++) {
+        prom[i] = revbit8(nd_table[0].macaddr.a[i]);
+        checksum ^= prom[i];
+    }
+    prom[7] = 0xff - checksum;
 
     /* SCC */
 
@@ -345,7 +516,8 @@ static void q800_init(MachineState *machine)
     qdev_realize_and_unref(escc_orgate, NULL, &error_fatal);
     sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(escc_orgate, 0));
     sysbus_connect_irq(sysbus, 1, qdev_get_gpio_in(escc_orgate, 1));
-    qdev_connect_gpio_out(DEVICE(escc_orgate), 0, qdev_get_gpio_in(glue, 3));
+    qdev_connect_gpio_out(DEVICE(escc_orgate), 0,
+                          qdev_get_gpio_in(glue, GLUE_IRQ_IN_ESCC));
     sysbus_mmio_map(sysbus, 0, SCC_BASE);
 
     /* SCSI */
@@ -361,12 +533,10 @@ static void q800_init(MachineState *machine)
 
     sysbus = SYS_BUS_DEVICE(dev);
     sysbus_realize_and_unref(sysbus, &error_fatal);
-    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in_named(via_dev,
-                                                         "via2-irq",
-                                                         VIA2_IRQ_SCSI_BIT));
-    sysbus_connect_irq(sysbus, 1,
-                       qdev_get_gpio_in_named(via_dev, "via2-irq",
-                                              VIA2_IRQ_SCSI_DATA_BIT));
+    sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(via2_dev,
+                                                   VIA2_IRQ_SCSI_BIT));
+    sysbus_connect_irq(sysbus, 1, qdev_get_gpio_in(via2_dev,
+                                                   VIA2_IRQ_SCSI_DATA_BIT));
     sysbus_mmio_map(sysbus, 0, ESP_BASE);
     sysbus_mmio_map(sysbus, 1, ESP_PDMA);
 
@@ -381,20 +551,48 @@ static void q800_init(MachineState *machine)
     /* NuBus */
 
     dev = qdev_new(TYPE_MAC_NUBUS_BRIDGE);
+    qdev_prop_set_uint32(dev, "slot-available-mask",
+                         Q800_NUBUS_SLOTS_AVAILABLE);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, NUBUS_SUPER_SLOT_BASE);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, NUBUS_SLOT_BASE);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0,
+                    MAC_NUBUS_FIRST_SLOT * NUBUS_SUPER_SLOT_SIZE);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, NUBUS_SLOT_BASE +
+                    MAC_NUBUS_FIRST_SLOT * NUBUS_SLOT_SIZE);
+    qdev_connect_gpio_out(dev, 9,
+                          qdev_get_gpio_in_named(via2_dev, "nubus-irq",
+                          VIA2_NUBUS_IRQ_INTVIDEO));
+    for (i = 1; i < VIA2_NUBUS_IRQ_NB; i++) {
+        qdev_connect_gpio_out(dev, 9 + i,
+                              qdev_get_gpio_in_named(via2_dev, "nubus-irq",
+                                                     VIA2_NUBUS_IRQ_9 + i));
+    }
+
+    /*
+     * Since the framebuffer in slot 0x9 uses a separate IRQ, wire the unused
+     * IRQ via GLUE for use by SONIC Ethernet in classic mode
+     */
+    qdev_connect_gpio_out(glue, GLUE_IRQ_NUBUS_9,
+                          qdev_get_gpio_in_named(via2_dev, "nubus-irq",
+                                                 VIA2_NUBUS_IRQ_9));
 
-    nubus = MAC_NUBUS_BRIDGE(dev)->bus;
+    nubus = &NUBUS_BRIDGE(dev)->bus;
 
     /* framebuffer in nubus slot #9 */
 
     dev = qdev_new(TYPE_NUBUS_MACFB);
+    qdev_prop_set_uint32(dev, "slot", 9);
     qdev_prop_set_uint32(dev, "width", graphic_width);
     qdev_prop_set_uint32(dev, "height", graphic_height);
     qdev_prop_set_uint8(dev, "depth", graphic_depth);
+    if (graphic_width == 1152 && graphic_height == 870) {
+        qdev_prop_set_uint8(dev, "display", MACFB_DISPLAY_APPLE_21_COLOR);
+    } else {
+        qdev_prop_set_uint8(dev, "display", MACFB_DISPLAY_VGA);
+    }
     qdev_realize_and_unref(dev, BUS(nubus), &error_fatal);
 
+    macfb_mode = (NUBUS_MACFB(dev)->macfb).mode;
+
     cs = CPU(cpu);
     if (linux_boot) {
         uint64_t high;
@@ -417,12 +615,12 @@ static void q800_init(MachineState *machine)
         BOOTINFO1(cs->as, parameters_base,
                   BI_MAC_MEMSIZE, ram_size >> 20); /* in MB */
         BOOTINFO2(cs->as, parameters_base, BI_MEMCHUNK, 0, ram_size);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR, VIDEO_BASE);
+        BOOTINFO1(cs->as, parameters_base, BI_MAC_VADDR,
+                  VIDEO_BASE + macfb_mode->offset);
         BOOTINFO1(cs->as, parameters_base, BI_MAC_VDEPTH, graphic_depth);
         BOOTINFO1(cs->as, parameters_base, BI_MAC_VDIM,
                   (graphic_height << 16) | graphic_width);
-        BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW,
-                  (graphic_width * graphic_depth + 7) / 8);
+        BOOTINFO1(cs->as, parameters_base, BI_MAC_VROW, macfb_mode->stride);
         BOOTINFO1(cs->as, parameters_base, BI_MAC_SCCBASE, SCC_BASE);
 
         rom = g_malloc(sizeof(*rom));
diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c
index e9a5d4c69b9..0efa4a45c7f 100644
--- a/hw/m68k/virt.c
+++ b/hw/m68k/virt.c
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * QEMU Vitual M68K Machine
  *
@@ -12,14 +12,11 @@
 #include "qemu-common.h"
 #include "sysemu/sysemu.h"
 #include "cpu.h"
-#include "hw/hw.h"
 #include "hw/boards.h"
-#include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "elf.h"
 #include "hw/loader.h"
 #include "ui/console.h"
-#include "exec/address-spaces.h"
 #include "hw/sysbus.h"
 #include "standard-headers/asm-m68k/bootinfo.h"
 #include "standard-headers/asm-m68k/bootinfo-virt.h"
@@ -307,7 +304,21 @@ type_init(virt_machine_register_types)
     } \
     type_init(machvirt_machine_##major##_##minor##_init);
 
+static void virt_machine_6_2_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE(6, 2, true)
+
+static void virt_machine_6_1_options(MachineClass *mc)
+{
+    virt_machine_6_2_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+}
+DEFINE_VIRT_MACHINE(6, 1, false)
+
 static void virt_machine_6_0_options(MachineClass *mc)
 {
+    virt_machine_6_1_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
 }
-DEFINE_VIRT_MACHINE(6, 0, true)
+DEFINE_VIRT_MACHINE(6, 0, false)
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
index a0ef2cf648e..03dbb3c7df5 100644
--- a/hw/mem/Kconfig
+++ b/hw/mem/Kconfig
@@ -7,6 +7,7 @@ config MEM_DEVICE
 
 config NVDIMM
     bool
-    default y
-    depends on (PC || PSERIES || ARM_VIRT)
     select MEM_DEVICE
+
+config SPARSE_MEM
+    bool
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index ef79e046787..82f86d117e6 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -1,8 +1,9 @@
 mem_ss = ss.source_set()
 mem_ss.add(files('memory-device.c'))
-mem_ss.add(when: 'CONFIG_FUZZ', if_true: files('sparse-mem.c'))
 mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
 mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
 mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
 
 softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
+
+softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c'))
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 12b655eda8b..48b913aba67 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -34,6 +34,16 @@
 
 static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
+static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
+{
+    if (!dimm->hostmem) {
+        error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
+        return NULL;
+    }
+
+    return host_memory_backend_get_memory(dimm->hostmem);
+}
+
 void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
                       const uint64_t *legacy_align, Error **errp)
 {
@@ -66,9 +76,8 @@ void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
 
 void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
 {
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
-                                                              &error_abort);
+    MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
+                                                         &error_abort);
 
     memory_device_plug(MEMORY_DEVICE(dimm), machine);
     vmstate_register_ram(vmstate_mr, DEVICE(dimm));
@@ -76,9 +85,8 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
 
 void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
 {
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
-                                                              &error_abort);
+    MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
+                                                         &error_abort);
 
     memory_device_unplug(MEMORY_DEVICE(dimm), machine);
     vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
@@ -173,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MachineState *ms = MACHINE(qdev_get_machine());
-    int nb_numa_nodes = ms->numa_state->num_nodes;
+
+    if (ms->numa_state) {
+        int nb_numa_nodes = ms->numa_state->num_nodes;
+
+        if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
+            (!nb_numa_nodes && dimm->node)) {
+            error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+                       PRIu32 "' which exceeds the number of numa nodes: %d",
+                       dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
+            return;
+        }
+    } else if (dimm->node > 0) {
+        error_setg(errp, "machine doesn't support NUMA");
+        return;
+    }
 
     if (!dimm->hostmem) {
         error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
@@ -183,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
                    object_get_canonical_path_component(OBJECT(dimm->hostmem)));
         return;
     }
-    if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
-        (!nb_numa_nodes && dimm->node)) {
-        error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
-                   PRIu32 "' which exceeds the number of numa nodes: %d",
-                   dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
-        return;
-    }
 
     if (ddc->realize) {
         ddc->realize(dimm, errp);
@@ -205,16 +220,6 @@ static void pc_dimm_unrealize(DeviceState *dev)
     host_memory_backend_set_mapped(dimm->hostmem, false);
 }
 
-static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
-{
-    if (!dimm->hostmem) {
-        error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
-        return NULL;
-    }
-
-    return host_memory_backend_get_memory(dimm->hostmem);
-}
-
 static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md)
 {
     return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP,
@@ -266,7 +271,6 @@ static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md,
 static void pc_dimm_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
-    PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
     MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
 
     dc->realize = pc_dimm_realize;
@@ -274,8 +278,6 @@ static void pc_dimm_class_init(ObjectClass *oc, void *data)
     device_class_set_props(dc, pc_dimm_properties);
     dc->desc = "DIMM memory module";
 
-    ddc->get_vmstate_memory_region = pc_dimm_get_memory_region;
-
     mdc->get_addr = pc_dimm_md_get_addr;
     mdc->set_addr = pc_dimm_md_set_addr;
     /* for a dimm plugged_size == region_size */
diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
index a13ac74dd9f..e6640eb8e72 100644
--- a/hw/mem/sparse-mem.c
+++ b/hw/mem/sparse-mem.c
@@ -12,7 +12,6 @@
 
 #include "qemu/osdep.h"
 
-#include "exec/address-spaces.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "qapi/error.h"
diff --git a/hw/mem/trace-events b/hw/mem/trace-events
index 9f6b52acd7e..8b6b02b5bf2 100644
--- a/hw/mem/trace-events
+++ b/hw/mem/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pc-dimm.c
 mhp_pc_dimm_assigned_slot(int slot) "%d"
diff --git a/hw/meson.build b/hw/meson.build
index 8ba79b1a528..b3366c888ef 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -21,6 +21,7 @@ subdir('mem')
 subdir('misc')
 subdir('net')
 subdir('nubus')
+subdir('nvme')
 subdir('nvram')
 subdir('pci')
 subdir('pci-bridge')
@@ -30,6 +31,7 @@ subdir('rdma')
 subdir('rtc')
 subdir('scsi')
 subdir('sd')
+subdir('sensor')
 subdir('smbios')
 subdir('ssi')
 subdir('timer')
@@ -47,11 +49,9 @@ subdir('avr')
 subdir('cris')
 subdir('hppa')
 subdir('i386')
-subdir('lm32')
 subdir('m68k')
 subdir('microblaze')
 subdir('mips')
-subdir('moxie')
 subdir('nios2')
 subdir('openrisc')
 subdir('ppc')
@@ -63,5 +63,4 @@ subdir('sh4')
 subdir('sparc')
 subdir('sparc64')
 subdir('tricore')
-subdir('unicore32')
 subdir('xtensa')
diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c
index caaba1aa4c1..8821d009f1a 100644
--- a/hw/microblaze/boot.c
+++ b/hw/microblaze/boot.c
@@ -33,7 +33,6 @@
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/reset.h"
-#include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "elf.h"
diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig
index aadd436bf4e..b4c5549ce84 100644
--- a/hw/mips/Kconfig
+++ b/hw/mips/Kconfig
@@ -20,7 +20,7 @@ config JAZZ
     select G364FB
     select DP8393X
     select ESP
-    select FDC
+    select FDC_SYSBUS
     select MC146818RTC
     select PCKBD
     select SERIAL
@@ -47,9 +47,15 @@ config LOONGSON3V
 config MIPS_CPS
     bool
     select PTIMER
+    select MIPS_ITU
 
 config MIPS_BOSTON
     bool
+    select FITLOADER
+    select MIPS_CPS
+    select PCI_EXPRESS_XILINX
+    select AHCI_ICH9
+    select SERIAL
 
 config FW_CFG_MIPS
     bool
diff --git a/hw/mips/bootloader.c b/hw/mips/bootloader.c
index 6ec83144909..99991f8b2b5 100644
--- a/hw/mips/bootloader.c
+++ b/hw/mips/bootloader.c
@@ -182,7 +182,11 @@ void bl_gen_write_ulong(uint32_t **p, target_ulong addr, target_ulong val)
 {
     bl_gen_load_ulong(p, BL_REG_K0, val);
     bl_gen_load_ulong(p, BL_REG_K1, addr);
-    bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0);
+    if (bootcpu_supports_isa(ISA_MIPS3)) {
+        bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0);
+    } else {
+        bl_gen_sw(p, BL_REG_K0, BL_REG_K1, 0x0);
+    }
 }
 
 void bl_gen_write_u32(uint32_t **p, target_ulong addr, uint32_t val)
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
index ac2e93a05aa..59ca08b93a9 100644
--- a/hw/mips/boston.c
+++ b/hw/mips/boston.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 
-#include "exec/address-spaces.h"
+#include "elf.h"
 #include "hw/boards.h"
 #include "hw/char/serial.h"
 #include "hw/ide/pci.h"
@@ -49,6 +49,15 @@ typedef struct BostonState BostonState;
 DECLARE_INSTANCE_CHECKER(BostonState, BOSTON,
                          TYPE_BOSTON)
 
+#define FDT_IRQ_TYPE_NONE       0
+#define FDT_IRQ_TYPE_LEVEL_HIGH 4
+#define FDT_GIC_SHARED          0
+#define FDT_GIC_LOCAL           1
+#define FDT_BOSTON_CLK_SYS      1
+#define FDT_BOSTON_CLK_CPU      2
+#define FDT_PCI_IRQ_MAP_PINS    4
+#define FDT_PCI_IRQ_MAP_DESCS   6
+
 struct BostonState {
     SysBusDevice parent_obj;
 
@@ -65,6 +74,44 @@ struct BostonState {
     hwaddr fdt_base;
 };
 
+enum {
+    BOSTON_LOWDDR,
+    BOSTON_PCIE0,
+    BOSTON_PCIE1,
+    BOSTON_PCIE2,
+    BOSTON_PCIE2_MMIO,
+    BOSTON_CM,
+    BOSTON_GIC,
+    BOSTON_CDMM,
+    BOSTON_CPC,
+    BOSTON_PLATREG,
+    BOSTON_UART,
+    BOSTON_LCD,
+    BOSTON_FLASH,
+    BOSTON_PCIE1_MMIO,
+    BOSTON_PCIE0_MMIO,
+    BOSTON_HIGHDDR,
+};
+
+static const MemMapEntry boston_memmap[] = {
+    [BOSTON_LOWDDR] =     {        0x0,    0x10000000 },
+    [BOSTON_PCIE0] =      { 0x10000000,     0x2000000 },
+    [BOSTON_PCIE1] =      { 0x12000000,     0x2000000 },
+    [BOSTON_PCIE2] =      { 0x14000000,     0x2000000 },
+    [BOSTON_PCIE2_MMIO] = { 0x16000000,      0x100000 },
+    [BOSTON_CM] =         { 0x16100000,       0x20000 },
+    [BOSTON_GIC] =        { 0x16120000,       0x20000 },
+    [BOSTON_CDMM] =       { 0x16140000,        0x8000 },
+    [BOSTON_CPC] =        { 0x16200000,        0x8000 },
+    [BOSTON_PLATREG] =    { 0x17ffd000,        0x1000 },
+    [BOSTON_UART] =       { 0x17ffe000,          0x20 },
+    [BOSTON_LCD] =        { 0x17fff000,           0x8 },
+    [BOSTON_FLASH] =      { 0x18000000,     0x8000000 },
+    [BOSTON_PCIE1_MMIO] = { 0x20000000,    0x20000000 },
+    [BOSTON_PCIE0_MMIO] = { 0x40000000,    0x40000000 },
+    [BOSTON_HIGHDDR] =    { 0x80000000,           0x0 },
+};
+
 enum boston_plat_reg {
     PLAT_FPGA_BUILD     = 0x00,
     PLAT_CORE_CL        = 0x04,
@@ -276,24 +323,24 @@ type_init(boston_register_types)
 
 static void gen_firmware(uint32_t *p, hwaddr kernel_entry, hwaddr fdt_addr)
 {
-    const uint32_t cm_base = 0x16100000;
-    const uint32_t gic_base = 0x16120000;
-    const uint32_t cpc_base = 0x16200000;
+    uint64_t regaddr;
 
     /* Move CM GCRs */
-    bl_gen_write_ulong(&p,
-                       cpu_mips_phys_to_kseg1(NULL, GCR_BASE_ADDR + GCR_BASE_OFS),
-                       cm_base);
+    regaddr = cpu_mips_phys_to_kseg1(NULL, GCR_BASE_ADDR + GCR_BASE_OFS),
+    bl_gen_write_ulong(&p, regaddr,
+                       boston_memmap[BOSTON_CM].base);
 
     /* Move & enable GIC GCRs */
-    bl_gen_write_ulong(&p,
-                       cpu_mips_phys_to_kseg1(NULL, cm_base + GCR_GIC_BASE_OFS),
-                       gic_base | GCR_GIC_BASE_GICEN_MSK);
+    regaddr = cpu_mips_phys_to_kseg1(NULL, boston_memmap[BOSTON_CM].base
+                                           + GCR_GIC_BASE_OFS),
+    bl_gen_write_ulong(&p, regaddr,
+                       boston_memmap[BOSTON_GIC].base | GCR_GIC_BASE_GICEN_MSK);
 
     /* Move & enable CPC GCRs */
-    bl_gen_write_ulong(&p,
-                       cpu_mips_phys_to_kseg1(NULL, cm_base + GCR_CPC_BASE_OFS),
-                       cpc_base | GCR_CPC_BASE_CPCEN_MSK);
+    regaddr = cpu_mips_phys_to_kseg1(NULL, boston_memmap[BOSTON_CM].base
+                                           + GCR_CPC_BASE_OFS),
+    bl_gen_write_ulong(&p, regaddr,
+                       boston_memmap[BOSTON_CPC].base | GCR_CPC_BASE_CPCEN_MSK);
 
     /*
      * Setup argument registers to follow the UHI boot protocol:
@@ -334,8 +381,9 @@ static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
     ram_low_sz = MIN(256 * MiB, machine->ram_size);
     ram_high_sz = machine->ram_size - ram_low_sz;
     qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
-                                 1, 0x00000000, 1, ram_low_sz,
-                                 1, 0x90000000, 1, ram_high_sz);
+                        1, boston_memmap[BOSTON_LOWDDR].base, 1, ram_low_sz,
+                        1, boston_memmap[BOSTON_HIGHDDR].base + ram_low_sz,
+                        1, ram_high_sz);
 
     fdt = g_realloc(fdt, fdt_totalsize(fdt));
     qemu_fdt_dumpdtb(fdt, fdt_sz);
@@ -398,6 +446,222 @@ xilinx_pcie_init(MemoryRegion *sys_mem, uint32_t bus_nr,
     return XILINX_PCIE_HOST(dev);
 }
 
+
+static void fdt_create_pcie(void *fdt, int gic_ph, int irq, hwaddr reg_base,
+                            hwaddr reg_size, hwaddr mmio_base, hwaddr mmio_size)
+{
+    int i;
+    char *name, *intc_name;
+    uint32_t intc_ph;
+    uint32_t interrupt_map[FDT_PCI_IRQ_MAP_PINS][FDT_PCI_IRQ_MAP_DESCS];
+
+    intc_ph = qemu_fdt_alloc_phandle(fdt);
+    name = g_strdup_printf("/soc/pci@%" HWADDR_PRIx, reg_base);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible",
+                            "xlnx,axi-pcie-host-1.00.a");
+    qemu_fdt_setprop_string(fdt, name, "device_type", "pci");
+    qemu_fdt_setprop_cells(fdt, name, "reg", reg_base, reg_size);
+
+    qemu_fdt_setprop_cell(fdt, name, "#address-cells", 3);
+    qemu_fdt_setprop_cell(fdt, name, "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", 1);
+
+    qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", gic_ph);
+    qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_SHARED, irq,
+                            FDT_IRQ_TYPE_LEVEL_HIGH);
+
+    qemu_fdt_setprop_cells(fdt, name, "ranges", 0x02000000, 0, mmio_base,
+                            mmio_base, 0, mmio_size);
+    qemu_fdt_setprop_cells(fdt, name, "bus-range", 0x00, 0xff);
+
+
+
+    intc_name = g_strdup_printf("%s/interrupt-controller", name);
+    qemu_fdt_add_subnode(fdt, intc_name);
+    qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(fdt, intc_name, "#address-cells", 0);
+    qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1);
+    qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_ph);
+
+    qemu_fdt_setprop_cells(fdt, name, "interrupt-map-mask", 0, 0, 0, 7);
+    for (i = 0; i < FDT_PCI_IRQ_MAP_PINS; i++) {
+        uint32_t *irqmap = interrupt_map[i];
+
+        irqmap[0] = cpu_to_be32(0);
+        irqmap[1] = cpu_to_be32(0);
+        irqmap[2] = cpu_to_be32(0);
+        irqmap[3] = cpu_to_be32(i + 1);
+        irqmap[4] = cpu_to_be32(intc_ph);
+        irqmap[5] = cpu_to_be32(i + 1);
+    }
+    qemu_fdt_setprop(fdt, name, "interrupt-map",
+                     &interrupt_map, sizeof(interrupt_map));
+
+    g_free(intc_name);
+    g_free(name);
+}
+
+static const void *create_fdt(BostonState *s,
+                              const MemMapEntry *memmap, int *dt_size)
+{
+    void *fdt;
+    int cpu;
+    MachineState *mc = s->mach;
+    uint32_t platreg_ph, gic_ph, clk_ph;
+    char *name, *gic_name, *platreg_name, *stdout_name;
+    static const char * const syscon_compat[2] = {
+        "img,boston-platform-regs", "syscon"
+    };
+
+    fdt = create_device_tree(dt_size);
+    if (!fdt) {
+        error_report("create_device_tree() failed");
+        exit(1);
+    }
+
+    platreg_ph = qemu_fdt_alloc_phandle(fdt);
+    gic_ph = qemu_fdt_alloc_phandle(fdt);
+    clk_ph = qemu_fdt_alloc_phandle(fdt);
+
+    qemu_fdt_setprop_string(fdt, "/", "model", "img,boston");
+    qemu_fdt_setprop_string(fdt, "/", "compatible", "img,boston");
+    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x1);
+    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x1);
+
+
+    qemu_fdt_add_subnode(fdt, "/cpus");
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1);
+
+    for (cpu = 0; cpu < mc->smp.cpus; cpu++) {
+        name = g_strdup_printf("/cpus/cpu@%d", cpu);
+        qemu_fdt_add_subnode(fdt, name);
+        qemu_fdt_setprop_string(fdt, name, "compatible", "img,mips");
+        qemu_fdt_setprop_string(fdt, name, "status", "okay");
+        qemu_fdt_setprop_cell(fdt, name, "reg", cpu);
+        qemu_fdt_setprop_string(fdt, name, "device_type", "cpu");
+        qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_CPU);
+        g_free(name);
+    }
+
+    qemu_fdt_add_subnode(fdt, "/soc");
+    qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0);
+    qemu_fdt_setprop_string(fdt, "/soc", "compatible", "simple-bus");
+    qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x1);
+    qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x1);
+
+    fdt_create_pcie(fdt, gic_ph, 2,
+                memmap[BOSTON_PCIE0].base, memmap[BOSTON_PCIE0].size,
+                memmap[BOSTON_PCIE0_MMIO].base, memmap[BOSTON_PCIE0_MMIO].size);
+
+    fdt_create_pcie(fdt, gic_ph, 1,
+                memmap[BOSTON_PCIE1].base, memmap[BOSTON_PCIE1].size,
+                memmap[BOSTON_PCIE1_MMIO].base, memmap[BOSTON_PCIE1_MMIO].size);
+
+    fdt_create_pcie(fdt, gic_ph, 0,
+                memmap[BOSTON_PCIE2].base, memmap[BOSTON_PCIE2].size,
+                memmap[BOSTON_PCIE2_MMIO].base, memmap[BOSTON_PCIE2_MMIO].size);
+
+    /* GIC with it's timer node */
+    gic_name = g_strdup_printf("/soc/interrupt-controller@%" HWADDR_PRIx,
+                                memmap[BOSTON_GIC].base);
+    qemu_fdt_add_subnode(fdt, gic_name);
+    qemu_fdt_setprop_string(fdt, gic_name, "compatible", "mti,gic");
+    qemu_fdt_setprop_cells(fdt, gic_name, "reg", memmap[BOSTON_GIC].base,
+                            memmap[BOSTON_GIC].size);
+    qemu_fdt_setprop(fdt, gic_name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(fdt, gic_name, "#interrupt-cells", 3);
+    qemu_fdt_setprop_cell(fdt, gic_name, "phandle", gic_ph);
+
+    name = g_strdup_printf("%s/timer", gic_name);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "mti,gic-timer");
+    qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_LOCAL, 1,
+                            FDT_IRQ_TYPE_NONE);
+    qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_CPU);
+    g_free(name);
+    g_free(gic_name);
+
+    /* CDMM node */
+    name = g_strdup_printf("/soc/cdmm@%" HWADDR_PRIx, memmap[BOSTON_CDMM].base);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "mti,mips-cdmm");
+    qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_CDMM].base,
+                            memmap[BOSTON_CDMM].size);
+    g_free(name);
+
+    /* CPC node */
+    name = g_strdup_printf("/soc/cpc@%" HWADDR_PRIx, memmap[BOSTON_CPC].base);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "mti,mips-cpc");
+    qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_CPC].base,
+                            memmap[BOSTON_CPC].size);
+    g_free(name);
+
+    /* platreg and it's clk node */
+    platreg_name = g_strdup_printf("/soc/system-controller@%" HWADDR_PRIx,
+                                   memmap[BOSTON_PLATREG].base);
+    qemu_fdt_add_subnode(fdt, platreg_name);
+    qemu_fdt_setprop_string_array(fdt, platreg_name, "compatible",
+                                 (char **)&syscon_compat,
+                                 ARRAY_SIZE(syscon_compat));
+    qemu_fdt_setprop_cells(fdt, platreg_name, "reg",
+                           memmap[BOSTON_PLATREG].base,
+                           memmap[BOSTON_PLATREG].size);
+    qemu_fdt_setprop_cell(fdt, platreg_name, "phandle", platreg_ph);
+
+    name = g_strdup_printf("%s/clock", platreg_name);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "img,boston-clock");
+    qemu_fdt_setprop_cell(fdt, name, "#clock-cells", 1);
+    qemu_fdt_setprop_cell(fdt, name, "phandle", clk_ph);
+    g_free(name);
+    g_free(platreg_name);
+
+    /* reboot node */
+    name = g_strdup_printf("/soc/reboot");
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot");
+    qemu_fdt_setprop_cell(fdt, name, "regmap", platreg_ph);
+    qemu_fdt_setprop_cell(fdt, name, "offset", 0x10);
+    qemu_fdt_setprop_cell(fdt, name, "mask", 0x10);
+    g_free(name);
+
+    /* uart node */
+    name = g_strdup_printf("/soc/uart@%" HWADDR_PRIx, memmap[BOSTON_UART].base);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a");
+    qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_UART].base,
+                            memmap[BOSTON_UART].size);
+    qemu_fdt_setprop_cell(fdt, name, "reg-shift", 0x2);
+    qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", gic_ph);
+    qemu_fdt_setprop_cells(fdt, name, "interrupts", FDT_GIC_SHARED, 3,
+                            FDT_IRQ_TYPE_LEVEL_HIGH);
+    qemu_fdt_setprop_cells(fdt, name, "clocks", clk_ph, FDT_BOSTON_CLK_SYS);
+
+    qemu_fdt_add_subnode(fdt, "/chosen");
+    stdout_name = g_strdup_printf("%s:115200", name);
+    qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", stdout_name);
+    g_free(stdout_name);
+    g_free(name);
+
+    /* lcd node */
+    name = g_strdup_printf("/soc/lcd@%" HWADDR_PRIx, memmap[BOSTON_LCD].base);
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "compatible", "img,boston-lcd");
+    qemu_fdt_setprop_cells(fdt, name, "reg", memmap[BOSTON_LCD].base,
+                            memmap[BOSTON_LCD].size);
+    g_free(name);
+
+    name = g_strdup_printf("/memory@0");
+    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_setprop_string(fdt, name, "device_type", "memory");
+    g_free(name);
+
+    return fdt;
+}
+
 static void boston_mach_init(MachineState *machine)
 {
     DeviceState *dev;
@@ -439,11 +703,15 @@ static void boston_mach_init(MachineState *machine)
     sysbus_mmio_map_overlap(SYS_BUS_DEVICE(&s->cps), 0, 0, 1);
 
     flash =  g_new(MemoryRegion, 1);
-    memory_region_init_rom(flash, NULL, "boston.flash", 128 * MiB,
-                           &error_fatal);
-    memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
+    memory_region_init_rom(flash, NULL, "boston.flash",
+                           boston_memmap[BOSTON_FLASH].size, &error_fatal);
+    memory_region_add_subregion_overlap(sys_mem,
+                                        boston_memmap[BOSTON_FLASH].base,
+                                        flash, 0);
 
-    memory_region_add_subregion_overlap(sys_mem, 0x80000000, machine->ram, 0);
+    memory_region_add_subregion_overlap(sys_mem,
+                                        boston_memmap[BOSTON_HIGHDDR].base,
+                                        machine->ram, 0);
 
     ddr_low_alias = g_new(MemoryRegion, 1);
     memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr",
@@ -452,32 +720,41 @@ static void boston_mach_init(MachineState *machine)
     memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0);
 
     xilinx_pcie_init(sys_mem, 0,
-                     0x10000000, 32 * MiB,
-                     0x40000000, 1 * GiB,
+                     boston_memmap[BOSTON_PCIE0].base,
+                     boston_memmap[BOSTON_PCIE0].size,
+                     boston_memmap[BOSTON_PCIE0_MMIO].base,
+                     boston_memmap[BOSTON_PCIE0_MMIO].size,
                      get_cps_irq(&s->cps, 2), false);
 
     xilinx_pcie_init(sys_mem, 1,
-                     0x12000000, 32 * MiB,
-                     0x20000000, 512 * MiB,
+                     boston_memmap[BOSTON_PCIE1].base,
+                     boston_memmap[BOSTON_PCIE1].size,
+                     boston_memmap[BOSTON_PCIE1_MMIO].base,
+                     boston_memmap[BOSTON_PCIE1_MMIO].size,
                      get_cps_irq(&s->cps, 1), false);
 
     pcie2 = xilinx_pcie_init(sys_mem, 2,
-                             0x14000000, 32 * MiB,
-                             0x16000000, 1 * MiB,
+                             boston_memmap[BOSTON_PCIE2].base,
+                             boston_memmap[BOSTON_PCIE2].size,
+                             boston_memmap[BOSTON_PCIE2_MMIO].base,
+                             boston_memmap[BOSTON_PCIE2_MMIO].size,
                              get_cps_irq(&s->cps, 0), true);
 
     platreg = g_new(MemoryRegion, 1);
     memory_region_init_io(platreg, NULL, &boston_platreg_ops, s,
-                          "boston-platregs", 0x1000);
-    memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0);
+                          "boston-platregs",
+                          boston_memmap[BOSTON_PLATREG].size);
+    memory_region_add_subregion_overlap(sys_mem,
+                          boston_memmap[BOSTON_PLATREG].base, platreg, 0);
 
-    s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2,
+    s->uart = serial_mm_init(sys_mem, boston_memmap[BOSTON_UART].base, 2,
                              get_cps_irq(&s->cps, 3), 10000000,
                              serial_hd(0), DEVICE_NATIVE_ENDIAN);
 
     lcd = g_new(MemoryRegion, 1);
     memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8);
-    memory_region_add_subregion_overlap(sys_mem, 0x17fff000, lcd, 0);
+    memory_region_add_subregion_overlap(sys_mem,
+                                        boston_memmap[BOSTON_LCD].base, lcd, 0);
 
     chr = qemu_chr_new("lcd", "vc:320x240", NULL);
     qemu_chr_fe_init(&s->lcd_display, chr, NULL);
@@ -500,10 +777,40 @@ static void boston_mach_init(MachineState *machine)
             exit(1);
         }
     } else if (machine->kernel_filename) {
-        fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
-        if (fit_err) {
-            error_report("unable to load FIT image");
-            exit(1);
+        uint64_t kernel_entry, kernel_high;
+        ssize_t kernel_size;
+
+        kernel_size = load_elf(machine->kernel_filename, NULL,
+                           cpu_mips_kseg0_to_phys, NULL,
+                           &kernel_entry, NULL, &kernel_high,
+                           NULL, 0, EM_MIPS, 1, 0);
+
+        if (kernel_size > 0) {
+            int dt_size;
+            g_autofree const void *dtb_file_data, *dtb_load_data;
+            hwaddr dtb_paddr = QEMU_ALIGN_UP(kernel_high, 64 * KiB);
+            hwaddr dtb_vaddr = cpu_mips_phys_to_kseg0(NULL, dtb_paddr);
+
+            s->kernel_entry = kernel_entry;
+            if (machine->dtb) {
+                dtb_file_data = load_device_tree(machine->dtb, &dt_size);
+            } else {
+                dtb_file_data = create_fdt(s, boston_memmap, &dt_size);
+            }
+
+            dtb_load_data = boston_fdt_filter(s, dtb_file_data,
+                                              NULL, &dtb_vaddr);
+
+            /* Calculate real fdt size after filter */
+            dt_size = fdt_totalsize(dtb_load_data);
+            rom_add_blob_fixed("dtb", dtb_load_data, dt_size, dtb_paddr);
+        } else {
+            /* Try to load file as FIT */
+            fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+            if (fit_err) {
+                error_report("unable to load kernel image");
+                exit(1);
+            }
         }
 
         gen_firmware(memory_region_get_ram_ptr(flash) + 0x7c00000,
diff --git a/hw/mips/fuloong2e.c b/hw/mips/fuloong2e.c
index 4f61f2c873b..c1b8066a13b 100644
--- a/hw/mips/fuloong2e.c
+++ b/hw/mips/fuloong2e.c
@@ -33,13 +33,11 @@
 #include "hw/mips/bootloader.h"
 #include "hw/mips/cpudevs.h"
 #include "hw/pci/pci.h"
-#include "qemu/log.h"
 #include "hw/loader.h"
 #include "hw/ide/pci.h"
 #include "hw/qdev-properties.h"
 #include "elf.h"
 #include "hw/isa/vt82c686.h"
-#include "exec/address-spaces.h"
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
diff --git a/hw/mips/gt64xxx_pci.c b/hw/mips/gt64xxx_pci.c
index 43349d6837d..c7480bd0196 100644
--- a/hw/mips/gt64xxx_pci.c
+++ b/hw/mips/gt64xxx_pci.c
@@ -33,7 +33,6 @@
 #include "migration/vmstate.h"
 #include "hw/intc/i8259.h"
 #include "hw/irq.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "qom/object.h"
 
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 1a0888a0fd5..f5a26e174d5 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -35,7 +35,6 @@
 #include "hw/isa/isa.h"
 #include "hw/block/fdc.h"
 #include "sysemu/sysemu.h"
-#include "sysemu/arch_init.h"
 #include "hw/boards.h"
 #include "net/net.h"
 #include "hw/scsi/esp.h"
@@ -47,7 +46,6 @@
 #include "hw/audio/pcspk.h"
 #include "hw/input/i8042.h"
 #include "hw/sysbus.h"
-#include "exec/address-spaces.h"
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
 #include "qapi/error.h"
@@ -120,39 +118,17 @@ static const MemoryRegionOps dma_dummy_ops = {
 #define MAGNUM_BIOS_SIZE                                                       \
         (BIOS_SIZE < MAGNUM_BIOS_SIZE_MAX ? BIOS_SIZE : MAGNUM_BIOS_SIZE_MAX)
 
-#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
-static void (*real_do_transaction_failed)(CPUState *cpu, hwaddr physaddr,
-                                          vaddr addr, unsigned size,
-                                          MMUAccessType access_type,
-                                          int mmu_idx, MemTxAttrs attrs,
-                                          MemTxResult response,
-                                          uintptr_t retaddr);
-
-static void mips_jazz_do_transaction_failed(CPUState *cs, hwaddr physaddr,
-                                            vaddr addr, unsigned size,
-                                            MMUAccessType access_type,
-                                            int mmu_idx, MemTxAttrs attrs,
-                                            MemTxResult response,
-                                            uintptr_t retaddr)
-{
-    if (access_type != MMU_INST_FETCH) {
-        /* ignore invalid access (ie do not raise exception) */
-        return;
-    }
-    (*real_do_transaction_failed)(cs, physaddr, addr, size, access_type,
-                                  mmu_idx, attrs, response, retaddr);
-}
-#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */
+#define SONIC_PROM_SIZE 0x1000
 
 static void mips_jazz_init(MachineState *machine,
                            enum jazz_model_e jazz_model)
 {
     MemoryRegion *address_space = get_system_memory();
     char *filename;
-    int bios_size, n;
+    int bios_size, n, big_endian;
     Clock *cpuclk;
     MIPSCPU *cpu;
-    CPUClass *cc;
+    MIPSCPUClass *mcc;
     CPUMIPSState *env;
     qemu_irq *i8259;
     rc4030_dma *dmas;
@@ -162,6 +138,7 @@ static void mips_jazz_init(MachineState *machine,
     MemoryRegion *rtc = g_new(MemoryRegion, 1);
     MemoryRegion *i8042 = g_new(MemoryRegion, 1);
     MemoryRegion *dma_dummy = g_new(MemoryRegion, 1);
+    MemoryRegion *dp8393x_prom = g_new(MemoryRegion, 1);
     NICInfo *nd;
     DeviceState *dev, *rc4030;
     SysBusDevice *sysbus;
@@ -180,6 +157,12 @@ static void mips_jazz_init(MachineState *machine,
         [JAZZ_PICA61] = {33333333, 4},
     };
 
+#ifdef TARGET_WORDS_BIGENDIAN
+    big_endian = 1;
+#else
+    big_endian = 0;
+#endif
+
     if (machine->ram_size > 256 * MiB) {
         error_report("RAM size more than 256Mb is not supported");
         exit(EXIT_FAILURE);
@@ -199,8 +182,6 @@ static void mips_jazz_init(MachineState *machine,
      * However, we can't simply add a global memory region to catch
      * everything, as this would make all accesses including instruction
      * accesses be ignored and not raise exceptions.
-     * So instead we hijack the do_transaction_failed method on the CPU, and
-     * do not raise exceptions for data access.
      *
      * NOTE: this behaviour of raising exceptions for bad instruction
      * fetches but not bad data accesses was added in commit 54e755588cf1e9
@@ -210,11 +191,8 @@ static void mips_jazz_init(MachineState *machine,
      * we could replace this hijacking of CPU methods with a simple global
      * memory region that catches all memory accesses, as we do on Malta.
      */
-    cc = CPU_GET_CLASS(cpu);
-#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
-    real_do_transaction_failed = cc->tcg_ops->do_transaction_failed;
-    cc->tcg_ops->do_transaction_failed = mips_jazz_do_transaction_failed;
-#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */
+    mcc = MIPS_CPU_GET_CLASS(cpu);
+    mcc->no_data_aborts = true;
 
     /* allocate RAM */
     memory_region_add_subregion(address_space, 0, machine->ram);
@@ -258,6 +236,10 @@ static void mips_jazz_init(MachineState *machine,
                           NULL, "dummy_dma", 0x1000);
     memory_region_add_subregion(address_space, 0x8000d000, dma_dummy);
 
+    memory_region_init_rom(dp8393x_prom, NULL, "dp8393x-jazz.prom",
+                           SONIC_PROM_SIZE, &error_fatal);
+    memory_region_add_subregion(address_space, 0x8000b000, dp8393x_prom);
+
     /* ISA bus: IO space at 0x90000000, mem space at 0x91000000 */
     memory_region_init(isa_io, NULL, "isa-io", 0x00010000);
     memory_region_init(isa_mem, NULL, "isa-mem", 0x01000000);
@@ -305,18 +287,33 @@ static void mips_jazz_init(MachineState *machine,
             nd->model = g_strdup("dp83932");
         }
         if (strcmp(nd->model, "dp83932") == 0) {
+            int checksum, i;
+            uint8_t *prom;
+
             qemu_check_nic_model(nd, "dp83932");
 
             dev = qdev_new("dp8393x");
             qdev_set_nic_properties(dev, nd);
             qdev_prop_set_uint8(dev, "it_shift", 2);
+            qdev_prop_set_bit(dev, "big_endian", big_endian > 0);
             object_property_set_link(OBJECT(dev), "dma_mr",
                                      OBJECT(rc4030_dma_mr), &error_abort);
             sysbus = SYS_BUS_DEVICE(dev);
             sysbus_realize_and_unref(sysbus, &error_fatal);
             sysbus_mmio_map(sysbus, 0, 0x80001000);
-            sysbus_mmio_map(sysbus, 1, 0x8000b000);
             sysbus_connect_irq(sysbus, 0, qdev_get_gpio_in(rc4030, 4));
+
+            /* Add MAC address with valid checksum to PROM */
+            prom = memory_region_get_ram_ptr(dp8393x_prom);
+            checksum = 0;
+            for (i = 0; i < 6; i++) {
+                prom[i] = nd->macaddr.a[i];
+                checksum += prom[i];
+                if (checksum > 0xff) {
+                    checksum = (checksum + 1) & 0xff;
+                }
+            }
+            prom[7] = 0xff - checksum;
             break;
         } else if (is_help_option(nd->model)) {
             error_report("Supported NICs: dp83932");
@@ -363,16 +360,12 @@ static void mips_jazz_init(MachineState *machine,
     memory_region_add_subregion(address_space, 0x80005000, i8042);
 
     /* Serial ports */
-    if (serial_hd(0)) {
-        serial_mm_init(address_space, 0x80006000, 0,
-                       qdev_get_gpio_in(rc4030, 8), 8000000 / 16,
-                       serial_hd(0), DEVICE_NATIVE_ENDIAN);
-    }
-    if (serial_hd(1)) {
-        serial_mm_init(address_space, 0x80007000, 0,
-                       qdev_get_gpio_in(rc4030, 9), 8000000 / 16,
-                       serial_hd(1), DEVICE_NATIVE_ENDIAN);
-    }
+    serial_mm_init(address_space, 0x80006000, 0,
+                   qdev_get_gpio_in(rc4030, 8), 8000000 / 16,
+                   serial_hd(0), DEVICE_NATIVE_ENDIAN);
+    serial_mm_init(address_space, 0x80007000, 0,
+                   qdev_get_gpio_in(rc4030, 9), 8000000 / 16,
+                   serial_hd(1), DEVICE_NATIVE_ENDIAN);
 
     /* Parallel port */
     if (parallel_hds[0])
diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c
index b15071defc6..ae192db0c8b 100644
--- a/hw/mips/loongson3_virt.c
+++ b/hw/mips/loongson3_virt.c
@@ -29,10 +29,8 @@
 #include "qemu/cutils.h"
 #include "qemu/datadir.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "elf.h"
 #include "kvm_mips.h"
-#include "hw/boards.h"
 #include "hw/char/serial.h"
 #include "hw/intc/loongson_liointc.h"
 #include "hw/mips/mips.h"
@@ -49,12 +47,10 @@
 #include "hw/pci-host/gpex.h"
 #include "hw/usb.h"
 #include "net/net.h"
-#include "exec/address-spaces.h"
 #include "sysemu/kvm.h"
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 
 #define PM_CNTL_MODE          0x10
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index fba13dda5c9..981d4a8cb93 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -27,7 +27,6 @@
 #include "qemu/bitops.h"
 #include "qemu-common.h"
 #include "qemu/datadir.h"
-#include "cpu.h"
 #include "hw/clock.h"
 #include "hw/southbridge/piix.h"
 #include "hw/isa/superio.h"
@@ -39,15 +38,12 @@
 #include "hw/mips/mips.h"
 #include "hw/mips/cpudevs.h"
 #include "hw/pci/pci.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/arch_init.h"
 #include "qemu/log.h"
 #include "hw/mips/bios.h"
 #include "hw/ide.h"
 #include "hw/irq.h"
 #include "hw/loader.h"
 #include "elf.h"
-#include "exec/address-spaces.h"
 #include "qom/object.h"
 #include "hw/sysbus.h"             /* SysBusDevice */
 #include "qemu/host-utils.h"
diff --git a/hw/mips/meson.build b/hw/mips/meson.build
index 1195716dc73..dd0101ad4d8 100644
--- a/hw/mips/meson.build
+++ b/hw/mips/meson.build
@@ -1,12 +1,15 @@
 mips_ss = ss.source_set()
 mips_ss.add(files('bootloader.c', 'mips_int.c'))
 mips_ss.add(when: 'CONFIG_FW_CFG_MIPS', if_true: files('fw_cfg.c'))
-mips_ss.add(when: 'CONFIG_FULOONG', if_true: files('fuloong2e.c'))
 mips_ss.add(when: 'CONFIG_LOONGSON3V', if_true: files('loongson3_bootp.c', 'loongson3_virt.c'))
-mips_ss.add(when: 'CONFIG_JAZZ', if_true: files('jazz.c'))
 mips_ss.add(when: 'CONFIG_MALTA', if_true: files('gt64xxx_pci.c', 'malta.c'))
+mips_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('cps.c'))
+
+if 'CONFIG_TCG' in config_all
+mips_ss.add(when: 'CONFIG_JAZZ', if_true: files('jazz.c'))
 mips_ss.add(when: 'CONFIG_MIPSSIM', if_true: files('mipssim.c'))
+mips_ss.add(when: 'CONFIG_FULOONG', if_true: files('fuloong2e.c'))
 mips_ss.add(when: 'CONFIG_MIPS_BOSTON', if_true: [files('boston.c'), fdt])
-mips_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('cps.c'))
+endif
 
 hw_arch += {'mips': mips_ss}
diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c
index 0f9c6f07c1c..2db5e10fe0b 100644
--- a/hw/mips/mips_int.c
+++ b/hw/mips/mips_int.c
@@ -24,7 +24,6 @@
 #include "qemu/main-loop.h"
 #include "hw/irq.h"
 #include "hw/mips/cpudevs.h"
-#include "cpu.h"
 #include "sysemu/kvm.h"
 #include "kvm_mips.h"
 
diff --git a/hw/mips/mipssim.c b/hw/mips/mipssim.c
index f9212826a9b..a9525a1d351 100644
--- a/hw/mips/mipssim.c
+++ b/hw/mips/mipssim.c
@@ -29,7 +29,6 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/datadir.h"
-#include "cpu.h"
 #include "hw/clock.h"
 #include "hw/mips/mips.h"
 #include "hw/mips/cpudevs.h"
@@ -43,7 +42,6 @@
 #include "elf.h"
 #include "hw/sysbus.h"
 #include "hw/qdev-properties.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index c71ed258204..507058d8bff 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -11,21 +11,6 @@ config ARMSSE_MHU
 config ARMSSE_CPU_PWRCTRL
     bool
 
-config MAX111X
-    bool
-
-config TMP105
-    bool
-    depends on I2C
-
-config TMP421
-    bool
-    depends on I2C
-
-config EMC141X
-    bool
-    depends on I2C
-
 config ISA_DEBUG
     bool
     depends on ISA_BUS
diff --git a/hw/misc/armv7m_ras.c b/hw/misc/armv7m_ras.c
new file mode 100644
index 00000000000..de24922c944
--- /dev/null
+++ b/hw/misc/armv7m_ras.c
@@ -0,0 +1,93 @@
+/*
+ * Arm M-profile RAS (Reliability, Availability and Serviceability) block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/misc/armv7m_ras.h"
+#include "qemu/log.h"
+
+static MemTxResult ras_read(void *opaque, hwaddr addr,
+                            uint64_t *data, unsigned size,
+                            MemTxAttrs attrs)
+{
+    if (attrs.user) {
+        return MEMTX_ERROR;
+    }
+
+    switch (addr) {
+    case 0xe10: /* ERRIIDR */
+        /* architect field = Arm; product/variant/revision 0 */
+        *data = 0x43b;
+        break;
+    case 0xfc8: /* ERRDEVID */
+        /* Minimal RAS: we implement 0 error record indexes */
+        *data = 0;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Read RAS register offset 0x%x\n",
+                      (uint32_t)addr);
+        *data = 0;
+        break;
+    }
+    return MEMTX_OK;
+}
+
+static MemTxResult ras_write(void *opaque, hwaddr addr,
+                             uint64_t value, unsigned size,
+                             MemTxAttrs attrs)
+{
+    if (attrs.user) {
+        return MEMTX_ERROR;
+    }
+
+    switch (addr) {
+    default:
+        qemu_log_mask(LOG_UNIMP, "Write to RAS register offset 0x%x\n",
+                      (uint32_t)addr);
+        break;
+    }
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps ras_ops = {
+    .read_with_attrs = ras_read,
+    .write_with_attrs = ras_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+
+static void armv7m_ras_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    ARMv7MRAS *s = ARMV7M_RAS(obj);
+
+    memory_region_init_io(&s->iomem, obj, &ras_ops,
+                          s, "armv7m-ras", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static void armv7m_ras_class_init(ObjectClass *klass, void *data)
+{
+    /* This device has no state: no need for vmstate or reset */
+}
+
+static const TypeInfo armv7m_ras_info = {
+    .name = TYPE_ARMV7M_RAS,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMv7MRAS),
+    .instance_init = armv7m_ras_init,
+    .class_init = armv7m_ras_class_init,
+};
+
+static void armv7m_ras_register_types(void)
+{
+    type_register_static(&armv7m_ras_info);
+}
+
+type_init(armv7m_ras_register_types);
diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
new file mode 100644
index 00000000000..10f00e65f4e
--- /dev/null
+++ b/hw/misc/aspeed_hace.c
@@ -0,0 +1,389 @@
+/*
+ * ASPEED Hash and Crypto Engine
+ *
+ * Copyright (C) 2021 IBM Corp.
+ *
+ * Joel Stanley <joel@jms.id.au>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "hw/misc/aspeed_hace.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "crypto/hash.h"
+#include "hw/qdev-properties.h"
+#include "hw/irq.h"
+
+#define R_CRYPT_CMD     (0x10 / 4)
+
+#define R_STATUS        (0x1c / 4)
+#define HASH_IRQ        BIT(9)
+#define CRYPT_IRQ       BIT(12)
+#define TAG_IRQ         BIT(15)
+
+#define R_HASH_SRC      (0x20 / 4)
+#define R_HASH_DEST     (0x24 / 4)
+#define R_HASH_SRC_LEN  (0x2c / 4)
+
+#define R_HASH_CMD      (0x30 / 4)
+/* Hash algorithm selection */
+#define  HASH_ALGO_MASK                 (BIT(4) | BIT(5) | BIT(6))
+#define  HASH_ALGO_MD5                  0
+#define  HASH_ALGO_SHA1                 BIT(5)
+#define  HASH_ALGO_SHA224               BIT(6)
+#define  HASH_ALGO_SHA256               (BIT(4) | BIT(6))
+#define  HASH_ALGO_SHA512_SERIES        (BIT(5) | BIT(6))
+/* SHA512 algorithm selection */
+#define  SHA512_HASH_ALGO_MASK          (BIT(10) | BIT(11) | BIT(12))
+#define  HASH_ALGO_SHA512_SHA512        0
+#define  HASH_ALGO_SHA512_SHA384        BIT(10)
+#define  HASH_ALGO_SHA512_SHA256        BIT(11)
+#define  HASH_ALGO_SHA512_SHA224        (BIT(10) | BIT(11))
+/* HMAC modes */
+#define  HASH_HMAC_MASK                 (BIT(7) | BIT(8))
+#define  HASH_DIGEST                    0
+#define  HASH_DIGEST_HMAC               BIT(7)
+#define  HASH_DIGEST_ACCUM              BIT(8)
+#define  HASH_HMAC_KEY                  (BIT(7) | BIT(8))
+/* Cascaded operation modes */
+#define  HASH_ONLY                      0
+#define  HASH_ONLY2                     BIT(0)
+#define  HASH_CRYPT_THEN_HASH           BIT(1)
+#define  HASH_HASH_THEN_CRYPT           (BIT(0) | BIT(1))
+/* Other cmd bits */
+#define  HASH_IRQ_EN                    BIT(9)
+#define  HASH_SG_EN                     BIT(18)
+/* Scatter-gather data list */
+#define SG_LIST_LEN_SIZE                4
+#define SG_LIST_LEN_MASK                0x0FFFFFFF
+#define SG_LIST_LEN_LAST                BIT(31)
+#define SG_LIST_ADDR_SIZE               4
+#define SG_LIST_ADDR_MASK               0x7FFFFFFF
+#define SG_LIST_ENTRY_SIZE              (SG_LIST_LEN_SIZE + SG_LIST_ADDR_SIZE)
+#define ASPEED_HACE_MAX_SG              256        /* max number of entries */
+
+static const struct {
+    uint32_t mask;
+    QCryptoHashAlgorithm algo;
+} hash_algo_map[] = {
+    { HASH_ALGO_MD5, QCRYPTO_HASH_ALG_MD5 },
+    { HASH_ALGO_SHA1, QCRYPTO_HASH_ALG_SHA1 },
+    { HASH_ALGO_SHA224, QCRYPTO_HASH_ALG_SHA224 },
+    { HASH_ALGO_SHA256, QCRYPTO_HASH_ALG_SHA256 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA512, QCRYPTO_HASH_ALG_SHA512 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA384, QCRYPTO_HASH_ALG_SHA384 },
+    { HASH_ALGO_SHA512_SERIES | HASH_ALGO_SHA512_SHA256, QCRYPTO_HASH_ALG_SHA256 },
+};
+
+static int hash_algo_lookup(uint32_t reg)
+{
+    int i;
+
+    reg &= HASH_ALGO_MASK | SHA512_HASH_ALGO_MASK;
+
+    for (i = 0; i < ARRAY_SIZE(hash_algo_map); i++) {
+        if (reg == hash_algo_map[i].mask) {
+            return hash_algo_map[i].algo;
+        }
+    }
+
+    return -1;
+}
+
+static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode)
+{
+    struct iovec iov[ASPEED_HACE_MAX_SG];
+    g_autofree uint8_t *digest_buf;
+    size_t digest_len = 0;
+    int i;
+
+    if (sg_mode) {
+        uint32_t len = 0;
+
+        for (i = 0; !(len & SG_LIST_LEN_LAST); i++) {
+            uint32_t addr, src;
+            hwaddr plen;
+
+            if (i == ASPEED_HACE_MAX_SG) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                        "aspeed_hace: guest failed to set end of sg list marker\n");
+                break;
+            }
+
+            src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE);
+
+            len = address_space_ldl_le(&s->dram_as, src,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+
+            addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE,
+                                        MEMTXATTRS_UNSPECIFIED, NULL);
+            addr &= SG_LIST_ADDR_MASK;
+
+            iov[i].iov_len = len & SG_LIST_LEN_MASK;
+            plen = iov[i].iov_len;
+            iov[i].iov_base = address_space_map(&s->dram_as, addr, &plen, false,
+                                                MEMTXATTRS_UNSPECIFIED);
+        }
+    } else {
+        hwaddr len = s->regs[R_HASH_SRC_LEN];
+
+        iov[0].iov_len = len;
+        iov[0].iov_base = address_space_map(&s->dram_as, s->regs[R_HASH_SRC],
+                                            &len, false,
+                                            MEMTXATTRS_UNSPECIFIED);
+        i = 1;
+    }
+
+    if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf, &digest_len, NULL) < 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__);
+        return;
+    }
+
+    if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST],
+                            MEMTXATTRS_UNSPECIFIED,
+                            digest_buf, digest_len)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "aspeed_hace: address space write failed\n");
+    }
+
+    for (; i > 0; i--) {
+        address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
+                            iov[i - 1].iov_len, false,
+                            iov[i - 1].iov_len);
+    }
+
+    /*
+     * Set status bits to indicate completion. Testing shows hardware sets
+     * these irrespective of HASH_IRQ_EN.
+     */
+    s->regs[R_STATUS] |= HASH_IRQ;
+}
+
+static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    AspeedHACEState *s = ASPEED_HACE(opaque);
+
+    addr >>= 2;
+
+    if (addr >= ASPEED_HACE_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr << 2);
+        return 0;
+    }
+
+    return s->regs[addr];
+}
+
+static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
+                              unsigned int size)
+{
+    AspeedHACEState *s = ASPEED_HACE(opaque);
+    AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
+
+    addr >>= 2;
+
+    if (addr >= ASPEED_HACE_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr << 2);
+        return;
+    }
+
+    switch (addr) {
+    case R_STATUS:
+        if (data & HASH_IRQ) {
+            data &= ~HASH_IRQ;
+
+            if (s->regs[addr] & HASH_IRQ) {
+                qemu_irq_lower(s->irq);
+            }
+        }
+        break;
+    case R_HASH_SRC:
+        data &= ahc->src_mask;
+        break;
+    case R_HASH_DEST:
+        data &= ahc->dest_mask;
+        break;
+    case R_HASH_SRC_LEN:
+        data &= 0x0FFFFFFF;
+        break;
+    case R_HASH_CMD: {
+        int algo;
+        data &= ahc->hash_mask;
+
+        if ((data & HASH_HMAC_MASK)) {
+            qemu_log_mask(LOG_UNIMP,
+                          "%s: HMAC engine command mode %"PRIx64" not implemented",
+                          __func__, (data & HASH_HMAC_MASK) >> 8);
+        }
+        if (data & BIT(1)) {
+            qemu_log_mask(LOG_UNIMP,
+                          "%s: Cascaded mode not implemented",
+                          __func__);
+        }
+        algo = hash_algo_lookup(data);
+        if (algo < 0) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                        "%s: Invalid hash algorithm selection 0x%"PRIx64"\n",
+                        __func__, data & ahc->hash_mask);
+                break;
+        }
+        do_hash_operation(s, algo, data & HASH_SG_EN);
+
+        if (data & HASH_IRQ_EN) {
+            qemu_irq_raise(s->irq);
+        }
+        break;
+    }
+    case R_CRYPT_CMD:
+        qemu_log_mask(LOG_UNIMP, "%s: Crypt commands not implemented\n",
+                       __func__);
+        break;
+    default:
+        break;
+    }
+
+    s->regs[addr] = data;
+}
+
+static const MemoryRegionOps aspeed_hace_ops = {
+    .read = aspeed_hace_read,
+    .write = aspeed_hace_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void aspeed_hace_reset(DeviceState *dev)
+{
+    struct AspeedHACEState *s = ASPEED_HACE(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+}
+
+static void aspeed_hace_realize(DeviceState *dev, Error **errp)
+{
+    AspeedHACEState *s = ASPEED_HACE(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    sysbus_init_irq(sbd, &s->irq);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s,
+            TYPE_ASPEED_HACE, 0x1000);
+
+    if (!s->dram_mr) {
+        error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set");
+        return;
+    }
+
+    address_space_init(&s->dram_as, s->dram_mr, "dram");
+
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static Property aspeed_hace_properties[] = {
+    DEFINE_PROP_LINK("dram", AspeedHACEState, dram_mr,
+                     TYPE_MEMORY_REGION, MemoryRegion *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+
+static const VMStateDescription vmstate_aspeed_hace = {
+    .name = TYPE_ASPEED_HACE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static void aspeed_hace_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = aspeed_hace_realize;
+    dc->reset = aspeed_hace_reset;
+    device_class_set_props(dc, aspeed_hace_properties);
+    dc->vmsd = &vmstate_aspeed_hace;
+}
+
+static const TypeInfo aspeed_hace_info = {
+    .name = TYPE_ASPEED_HACE,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(AspeedHACEState),
+    .class_init = aspeed_hace_class_init,
+    .class_size = sizeof(AspeedHACEClass)
+};
+
+static void aspeed_ast2400_hace_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass);
+
+    dc->desc = "AST2400 Hash and Crypto Engine";
+
+    ahc->src_mask = 0x0FFFFFFF;
+    ahc->dest_mask = 0x0FFFFFF8;
+    ahc->hash_mask = 0x000003ff; /* No SG or SHA512 modes */
+}
+
+static const TypeInfo aspeed_ast2400_hace_info = {
+    .name = TYPE_ASPEED_AST2400_HACE,
+    .parent = TYPE_ASPEED_HACE,
+    .class_init = aspeed_ast2400_hace_class_init,
+};
+
+static void aspeed_ast2500_hace_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass);
+
+    dc->desc = "AST2500 Hash and Crypto Engine";
+
+    ahc->src_mask = 0x3fffffff;
+    ahc->dest_mask = 0x3ffffff8;
+    ahc->hash_mask = 0x000003ff; /* No SG or SHA512 modes */
+}
+
+static const TypeInfo aspeed_ast2500_hace_info = {
+    .name = TYPE_ASPEED_AST2500_HACE,
+    .parent = TYPE_ASPEED_HACE,
+    .class_init = aspeed_ast2500_hace_class_init,
+};
+
+static void aspeed_ast2600_hace_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedHACEClass *ahc = ASPEED_HACE_CLASS(klass);
+
+    dc->desc = "AST2600 Hash and Crypto Engine";
+
+    ahc->src_mask = 0x7FFFFFFF;
+    ahc->dest_mask = 0x7FFFFFF8;
+    ahc->hash_mask = 0x00147FFF;
+}
+
+static const TypeInfo aspeed_ast2600_hace_info = {
+    .name = TYPE_ASPEED_AST2600_HACE,
+    .parent = TYPE_ASPEED_HACE,
+    .class_init = aspeed_ast2600_hace_class_init,
+};
+
+static void aspeed_hace_register_types(void)
+{
+    type_register_static(&aspeed_ast2400_hace_info);
+    type_register_static(&aspeed_ast2500_hace_info);
+    type_register_static(&aspeed_ast2600_hace_info);
+    type_register_static(&aspeed_hace_info);
+}
+
+type_init(aspeed_hace_register_types);
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 40a38ebd854..d06e179a6e6 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -101,14 +101,26 @@
 #define AST2600_CLK_STOP_CTRL_CLR TO_REG(0x84)
 #define AST2600_CLK_STOP_CTRL2     TO_REG(0x90)
 #define AST2600_CLK_STOP_CTRL2_CLR TO_REG(0x94)
+#define AST2600_DEBUG_CTRL        TO_REG(0xC8)
+#define AST2600_DEBUG_CTRL2       TO_REG(0xD8)
 #define AST2600_SDRAM_HANDSHAKE   TO_REG(0x100)
 #define AST2600_HPLL_PARAM        TO_REG(0x200)
 #define AST2600_HPLL_EXT          TO_REG(0x204)
+#define AST2600_APLL_PARAM        TO_REG(0x210)
+#define AST2600_APLL_EXT          TO_REG(0x214)
+#define AST2600_MPLL_PARAM        TO_REG(0x220)
 #define AST2600_MPLL_EXT          TO_REG(0x224)
+#define AST2600_EPLL_PARAM        TO_REG(0x240)
 #define AST2600_EPLL_EXT          TO_REG(0x244)
+#define AST2600_DPLL_PARAM        TO_REG(0x260)
+#define AST2600_DPLL_EXT          TO_REG(0x264)
 #define AST2600_CLK_SEL           TO_REG(0x300)
 #define AST2600_CLK_SEL2          TO_REG(0x304)
-#define AST2600_CLK_SEL3          TO_REG(0x310)
+#define AST2600_CLK_SEL3          TO_REG(0x308)
+#define AST2600_CLK_SEL4          TO_REG(0x310)
+#define AST2600_CLK_SEL5          TO_REG(0x314)
+#define AST2600_UARTCLK           TO_REG(0x338)
+#define AST2600_HUARTCLK          TO_REG(0x33C)
 #define AST2600_HW_STRAP1         TO_REG(0x500)
 #define AST2600_HW_STRAP1_CLR     TO_REG(0x504)
 #define AST2600_HW_STRAP1_PROT    TO_REG(0x508)
@@ -433,6 +445,8 @@ static uint32_t aspeed_silicon_revs[] = {
     AST2500_A1_SILICON_REV,
     AST2600_A0_SILICON_REV,
     AST2600_A1_SILICON_REV,
+    AST2600_A2_SILICON_REV,
+    AST2600_A3_SILICON_REV,
 };
 
 bool is_supported_silicon_rev(uint32_t silicon_rev)
@@ -651,16 +665,28 @@ static const MemoryRegionOps aspeed_ast2600_scu_ops = {
     .valid.unaligned = false,
 };
 
-static const uint32_t ast2600_a1_resets[ASPEED_AST2600_SCU_NR_REGS] = {
+static const uint32_t ast2600_a3_resets[ASPEED_AST2600_SCU_NR_REGS] = {
     [AST2600_SYS_RST_CTRL]      = 0xF7C3FED8,
-    [AST2600_SYS_RST_CTRL2]     = 0xFFFFFFFC,
+    [AST2600_SYS_RST_CTRL2]     = 0x0DFFFFFC,
     [AST2600_CLK_STOP_CTRL]     = 0xFFFF7F8A,
     [AST2600_CLK_STOP_CTRL2]    = 0xFFF0FFF0,
+    [AST2600_DEBUG_CTRL]        = 0x00000FFF,
+    [AST2600_DEBUG_CTRL2]       = 0x000000FF,
     [AST2600_SDRAM_HANDSHAKE]   = 0x00000000,
-    [AST2600_HPLL_PARAM]        = 0x1000405F,
+    [AST2600_HPLL_PARAM]        = 0x1000408F,
+    [AST2600_APLL_PARAM]        = 0x1000405F,
+    [AST2600_MPLL_PARAM]        = 0x1008405F,
+    [AST2600_EPLL_PARAM]        = 0x1004077F,
+    [AST2600_DPLL_PARAM]        = 0x1078405F,
+    [AST2600_CLK_SEL]           = 0xF3940000,
+    [AST2600_CLK_SEL2]          = 0x00700000,
+    [AST2600_CLK_SEL3]          = 0x00000000,
+    [AST2600_CLK_SEL4]          = 0xF3F40000,
+    [AST2600_CLK_SEL5]          = 0x30000000,
+    [AST2600_UARTCLK]           = 0x00014506,
+    [AST2600_HUARTCLK]          = 0x000145C0,
     [AST2600_CHIP_ID0]          = 0x1234ABCD,
     [AST2600_CHIP_ID1]          = 0x88884444,
-
 };
 
 static void aspeed_ast2600_scu_reset(DeviceState *dev)
@@ -675,7 +701,7 @@ static void aspeed_ast2600_scu_reset(DeviceState *dev)
      * of actual revision. QEMU and Linux only support A1 onwards so this is
      * sufficient.
      */
-    s->regs[AST2600_SILICON_REV] = AST2600_A1_SILICON_REV;
+    s->regs[AST2600_SILICON_REV] = AST2600_A3_SILICON_REV;
     s->regs[AST2600_SILICON_REV2] = s->silicon_rev;
     s->regs[AST2600_HW_STRAP1] = s->hw_strap1;
     s->regs[AST2600_HW_STRAP2] = s->hw_strap2;
@@ -689,7 +715,7 @@ static void aspeed_2600_scu_class_init(ObjectClass *klass, void *data)
 
     dc->desc = "ASPEED 2600 System Control Unit";
     dc->reset = aspeed_ast2600_scu_reset;
-    asc->resets = ast2600_a1_resets;
+    asc->resets = ast2600_a3_resets;
     asc->calc_hpll = aspeed_2500_scu_calc_hpll; /* No change since AST2500 */
     asc->apb_divider = 4;
     asc->nr_regs = ASPEED_AST2600_SCU_NR_REGS;
diff --git a/hw/misc/aspeed_xdma.c b/hw/misc/aspeed_xdma.c
index 533d237e3ce..1c21577c98c 100644
--- a/hw/misc/aspeed_xdma.c
+++ b/hw/misc/aspeed_xdma.c
@@ -30,6 +30,19 @@
 #define  XDMA_IRQ_ENG_STAT_US_COMP BIT(4)
 #define  XDMA_IRQ_ENG_STAT_DS_COMP BIT(5)
 #define  XDMA_IRQ_ENG_STAT_RESET   0xF8000000
+
+#define XDMA_AST2600_BMC_CMDQ_ADDR   0x14
+#define XDMA_AST2600_BMC_CMDQ_ENDP   0x18
+#define XDMA_AST2600_BMC_CMDQ_WRP    0x1c
+#define XDMA_AST2600_BMC_CMDQ_RDP    0x20
+#define XDMA_AST2600_IRQ_CTRL        0x38
+#define  XDMA_AST2600_IRQ_CTRL_US_COMP    BIT(16)
+#define  XDMA_AST2600_IRQ_CTRL_DS_COMP    BIT(17)
+#define  XDMA_AST2600_IRQ_CTRL_W_MASK     0x017003FF
+#define XDMA_AST2600_IRQ_STATUS      0x3c
+#define  XDMA_AST2600_IRQ_STATUS_US_COMP  BIT(16)
+#define  XDMA_AST2600_IRQ_STATUS_DS_COMP  BIT(17)
+
 #define XDMA_MEM_SIZE              0x1000
 
 #define TO_REG(addr) ((addr) / sizeof(uint32_t))
@@ -52,56 +65,48 @@ static void aspeed_xdma_write(void *opaque, hwaddr addr, uint64_t val,
     unsigned int idx;
     uint32_t val32 = (uint32_t)val;
     AspeedXDMAState *xdma = opaque;
+    AspeedXDMAClass *axc = ASPEED_XDMA_GET_CLASS(xdma);
 
     if (addr >= ASPEED_XDMA_REG_SIZE) {
         return;
     }
 
-    switch (addr) {
-    case XDMA_BMC_CMDQ_ENDP:
+    if (addr == axc->cmdq_endp) {
         xdma->regs[TO_REG(addr)] = val32 & XDMA_BMC_CMDQ_W_MASK;
-        break;
-    case XDMA_BMC_CMDQ_WRP:
+    } else if (addr == axc->cmdq_wrp) {
         idx = TO_REG(addr);
         xdma->regs[idx] = val32 & XDMA_BMC_CMDQ_W_MASK;
-        xdma->regs[TO_REG(XDMA_BMC_CMDQ_RDP)] = xdma->regs[idx];
+        xdma->regs[TO_REG(axc->cmdq_rdp)] = xdma->regs[idx];
 
         trace_aspeed_xdma_write(addr, val);
 
         if (xdma->bmc_cmdq_readp_set) {
             xdma->bmc_cmdq_readp_set = 0;
         } else {
-            xdma->regs[TO_REG(XDMA_IRQ_ENG_STAT)] |=
-                XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP;
+            xdma->regs[TO_REG(axc->intr_status)] |= axc->intr_complete;
 
-            if (xdma->regs[TO_REG(XDMA_IRQ_ENG_CTRL)] &
-                (XDMA_IRQ_ENG_CTRL_US_COMP | XDMA_IRQ_ENG_CTRL_DS_COMP))
+            if (xdma->regs[TO_REG(axc->intr_ctrl)] & axc->intr_complete) {
                 qemu_irq_raise(xdma->irq);
+            }
         }
-        break;
-    case XDMA_BMC_CMDQ_RDP:
+    } else if (addr == axc->cmdq_rdp) {
         trace_aspeed_xdma_write(addr, val);
 
         if (val32 == XDMA_BMC_CMDQ_RDP_MAGIC) {
             xdma->bmc_cmdq_readp_set = 1;
         }
-        break;
-    case XDMA_IRQ_ENG_CTRL:
-        xdma->regs[TO_REG(addr)] = val32 & XDMA_IRQ_ENG_CTRL_W_MASK;
-        break;
-    case XDMA_IRQ_ENG_STAT:
+    } else if (addr == axc->intr_ctrl) {
+        xdma->regs[TO_REG(addr)] = val32 & axc->intr_ctrl_mask;
+    } else if (addr == axc->intr_status) {
         trace_aspeed_xdma_write(addr, val);
 
         idx = TO_REG(addr);
-        if (val32 & (XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP)) {
-            xdma->regs[idx] &=
-                ~(XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP);
+        if (val32 & axc->intr_complete) {
+            xdma->regs[idx] &= ~axc->intr_complete;
             qemu_irq_lower(xdma->irq);
         }
-        break;
-    default:
+    } else {
         xdma->regs[TO_REG(addr)] = val32;
-        break;
     }
 }
 
@@ -127,10 +132,11 @@ static void aspeed_xdma_realize(DeviceState *dev, Error **errp)
 static void aspeed_xdma_reset(DeviceState *dev)
 {
     AspeedXDMAState *xdma = ASPEED_XDMA(dev);
+    AspeedXDMAClass *axc = ASPEED_XDMA_GET_CLASS(xdma);
 
     xdma->bmc_cmdq_readp_set = 0;
     memset(xdma->regs, 0, ASPEED_XDMA_REG_SIZE);
-    xdma->regs[TO_REG(XDMA_IRQ_ENG_STAT)] = XDMA_IRQ_ENG_STAT_RESET;
+    xdma->regs[TO_REG(axc->intr_status)] = XDMA_IRQ_ENG_STAT_RESET;
 
     qemu_irq_lower(xdma->irq);
 }
@@ -144,6 +150,73 @@ static const VMStateDescription aspeed_xdma_vmstate = {
     },
 };
 
+static void aspeed_2600_xdma_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass);
+
+    dc->desc = "ASPEED 2600 XDMA Controller";
+
+    axc->cmdq_endp = XDMA_AST2600_BMC_CMDQ_ENDP;
+    axc->cmdq_wrp = XDMA_AST2600_BMC_CMDQ_WRP;
+    axc->cmdq_rdp = XDMA_AST2600_BMC_CMDQ_RDP;
+    axc->intr_ctrl = XDMA_AST2600_IRQ_CTRL;
+    axc->intr_ctrl_mask = XDMA_AST2600_IRQ_CTRL_W_MASK;
+    axc->intr_status = XDMA_AST2600_IRQ_STATUS;
+    axc->intr_complete = XDMA_AST2600_IRQ_STATUS_US_COMP |
+        XDMA_AST2600_IRQ_STATUS_DS_COMP;
+}
+
+static const TypeInfo aspeed_2600_xdma_info = {
+    .name = TYPE_ASPEED_2600_XDMA,
+    .parent = TYPE_ASPEED_XDMA,
+    .class_init = aspeed_2600_xdma_class_init,
+};
+
+static void aspeed_2500_xdma_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass);
+
+    dc->desc = "ASPEED 2500 XDMA Controller";
+
+    axc->cmdq_endp = XDMA_BMC_CMDQ_ENDP;
+    axc->cmdq_wrp = XDMA_BMC_CMDQ_WRP;
+    axc->cmdq_rdp = XDMA_BMC_CMDQ_RDP;
+    axc->intr_ctrl = XDMA_IRQ_ENG_CTRL;
+    axc->intr_ctrl_mask = XDMA_IRQ_ENG_CTRL_W_MASK;
+    axc->intr_status = XDMA_IRQ_ENG_STAT;
+    axc->intr_complete = XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP;
+};
+
+static const TypeInfo aspeed_2500_xdma_info = {
+    .name = TYPE_ASPEED_2500_XDMA,
+    .parent = TYPE_ASPEED_XDMA,
+    .class_init = aspeed_2500_xdma_class_init,
+};
+
+static void aspeed_2400_xdma_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedXDMAClass *axc = ASPEED_XDMA_CLASS(klass);
+
+    dc->desc = "ASPEED 2400 XDMA Controller";
+
+    axc->cmdq_endp = XDMA_BMC_CMDQ_ENDP;
+    axc->cmdq_wrp = XDMA_BMC_CMDQ_WRP;
+    axc->cmdq_rdp = XDMA_BMC_CMDQ_RDP;
+    axc->intr_ctrl = XDMA_IRQ_ENG_CTRL;
+    axc->intr_ctrl_mask = XDMA_IRQ_ENG_CTRL_W_MASK;
+    axc->intr_status = XDMA_IRQ_ENG_STAT;
+    axc->intr_complete = XDMA_IRQ_ENG_STAT_US_COMP | XDMA_IRQ_ENG_STAT_DS_COMP;
+};
+
+static const TypeInfo aspeed_2400_xdma_info = {
+    .name = TYPE_ASPEED_2400_XDMA,
+    .parent = TYPE_ASPEED_XDMA,
+    .class_init = aspeed_2400_xdma_class_init,
+};
+
 static void aspeed_xdma_class_init(ObjectClass *classp, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(classp);
@@ -158,10 +231,15 @@ static const TypeInfo aspeed_xdma_info = {
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(AspeedXDMAState),
     .class_init    = aspeed_xdma_class_init,
+    .class_size    = sizeof(AspeedXDMAClass),
+    .abstract      = true,
 };
 
 static void aspeed_xdma_register_type(void)
 {
     type_register_static(&aspeed_xdma_info);
+    type_register_static(&aspeed_2400_xdma_info);
+    type_register_static(&aspeed_2500_xdma_info);
+    type_register_static(&aspeed_2600_xdma_info);
 }
 type_init(aspeed_xdma_register_type);
diff --git a/hw/misc/auxbus.c b/hw/misc/auxbus.c
index 6c099ae2a2d..8a8012f5f08 100644
--- a/hw/misc/auxbus.c
+++ b/hw/misc/auxbus.c
@@ -65,7 +65,7 @@ AUXBus *aux_bus_init(DeviceState *parent, const char *name)
     AUXBus *bus;
     Object *auxtoi2c;
 
-    bus = AUX_BUS(qbus_create(TYPE_AUX_BUS, parent, name));
+    bus = AUX_BUS(qbus_new(TYPE_AUX_BUS, parent, name));
     auxtoi2c = object_new_with_props(TYPE_AUXTOI2C, OBJECT(bus), "i2c",
                                      &error_abort, NULL);
 
@@ -106,7 +106,6 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
     AUXReply ret = AUX_NACK;
     I2CBus *i2c_bus = aux_get_i2c_bus(bus);
     size_t i;
-    bool is_write = false;
 
     DPRINTF("request at address 0x%" PRIX32 ", command %u, len %u\n", address,
             cmd, len);
@@ -117,11 +116,10 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
      */
     case WRITE_AUX:
     case READ_AUX:
-        is_write = cmd == READ_AUX ? false : true;
         for (i = 0; i < len; i++) {
             if (!address_space_rw(&bus->aux_addr_space, address++,
                                   MEMTXATTRS_UNSPECIFIED, data++, 1,
-                                  is_write)) {
+                                  cmd == WRITE_AUX)) {
                 ret = AUX_I2C_ACK;
             } else {
                 ret = AUX_NACK;
@@ -133,24 +131,37 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
      * Classic I2C transactions..
      */
     case READ_I2C:
+        if (i2c_bus_busy(i2c_bus)) {
+            i2c_end_transfer(i2c_bus);
+        }
+
+        if (i2c_start_recv(i2c_bus, address)) {
+            ret = AUX_I2C_NACK;
+            break;
+        }
+
+        ret = AUX_I2C_ACK;
+        for (i = 0; i < len; i++) {
+            data[i] = i2c_recv(i2c_bus);
+        }
+        i2c_end_transfer(i2c_bus);
+        break;
     case WRITE_I2C:
-        is_write = cmd == READ_I2C ? false : true;
         if (i2c_bus_busy(i2c_bus)) {
             i2c_end_transfer(i2c_bus);
         }
 
-        if (i2c_start_transfer(i2c_bus, address, is_write)) {
+        if (i2c_start_send(i2c_bus, address)) {
             ret = AUX_I2C_NACK;
             break;
         }
 
         ret = AUX_I2C_ACK;
-        while (len > 0) {
-            if (i2c_send_recv(i2c_bus, data++, is_write) < 0) {
+        for (i = 0; i < len; i++) {
+            if (i2c_send(i2c_bus, data[i]) < 0) {
                 ret = AUX_I2C_NACK;
                 break;
             }
-            len--;
         }
         i2c_end_transfer(i2c_bus);
         break;
@@ -163,14 +174,12 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
      *  - We changed the address.
      */
     case WRITE_I2C_MOT:
-    case READ_I2C_MOT:
-        is_write = cmd == READ_I2C_MOT ? false : true;
         ret = AUX_I2C_NACK;
         if (!i2c_bus_busy(i2c_bus)) {
             /*
              * No transactions started..
              */
-            if (i2c_start_transfer(i2c_bus, address, is_write)) {
+            if (i2c_start_send(i2c_bus, address)) {
                 break;
             }
         } else if ((address != bus->last_i2c_address) ||
@@ -179,23 +188,48 @@ AUXReply aux_request(AUXBus *bus, AUXCommand cmd, uint32_t address,
              * Transaction started but we need to restart..
              */
             i2c_end_transfer(i2c_bus);
-            if (i2c_start_transfer(i2c_bus, address, is_write)) {
+            if (i2c_start_send(i2c_bus, address)) {
                 break;
             }
         }
 
         bus->last_transaction = cmd;
         bus->last_i2c_address = address;
-        while (len > 0) {
-            if (i2c_send_recv(i2c_bus, data++, is_write) < 0) {
+        ret = AUX_I2C_ACK;
+        for (i = 0; i < len; i++) {
+            if (i2c_send(i2c_bus, data[i]) < 0) {
                 i2c_end_transfer(i2c_bus);
+                ret = AUX_I2C_NACK;
                 break;
             }
-            len--;
         }
-        if (len == 0) {
-            ret = AUX_I2C_ACK;
+        break;
+    case READ_I2C_MOT:
+        ret = AUX_I2C_NACK;
+        if (!i2c_bus_busy(i2c_bus)) {
+            /*
+             * No transactions started..
+             */
+            if (i2c_start_recv(i2c_bus, address)) {
+                break;
+            }
+        } else if ((address != bus->last_i2c_address) ||
+                   (bus->last_transaction != cmd)) {
+            /*
+             * Transaction started but we need to restart..
+             */
+            i2c_end_transfer(i2c_bus);
+            if (i2c_start_recv(i2c_bus, address)) {
+                break;
+            }
+        }
+
+        bus->last_transaction = cmd;
+        bus->last_i2c_address = address;
+        for (i = 0; i < len; i++) {
+            data[i] = i2c_recv(i2c_bus);
         }
+        ret = AUX_I2C_ACK;
         break;
     default:
         qemu_log_mask(LOG_UNIMP, "AUX cmd=%u not implemented\n", cmd);
diff --git a/hw/misc/bcm2835_powermgt.c b/hw/misc/bcm2835_powermgt.c
new file mode 100644
index 00000000000..25fa804cbda
--- /dev/null
+++ b/hw/misc/bcm2835_powermgt.c
@@ -0,0 +1,160 @@
+/*
+ * BCM2835 Power Management emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/misc/bcm2835_powermgt.h"
+#include "migration/vmstate.h"
+#include "sysemu/runstate.h"
+
+#define PASSWORD 0x5a000000
+#define PASSWORD_MASK 0xff000000
+
+#define R_RSTC 0x1c
+#define V_RSTC_RESET 0x20
+#define R_RSTS 0x20
+#define V_RSTS_POWEROFF 0x555 /* Linux uses partition 63 to indicate halt. */
+#define R_WDOG 0x24
+
+static uint64_t bcm2835_powermgt_read(void *opaque, hwaddr offset,
+                                      unsigned size)
+{
+    BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
+    uint32_t res = 0;
+
+    switch (offset) {
+    case R_RSTC:
+        res = s->rstc;
+        break;
+    case R_RSTS:
+        res = s->rsts;
+        break;
+    case R_WDOG:
+        res = s->wdog;
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "bcm2835_powermgt_read: Unknown offset 0x%08"HWADDR_PRIx
+                      "\n", offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_powermgt_write(void *opaque, hwaddr offset,
+                                   uint64_t value, unsigned size)
+{
+    BCM2835PowerMgtState *s = (BCM2835PowerMgtState *)opaque;
+
+    if ((value & PASSWORD_MASK) != PASSWORD) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_powermgt_write: Bad password 0x%"PRIx64
+                      " at offset 0x%08"HWADDR_PRIx"\n",
+                      value, offset);
+        return;
+    }
+
+    value = value & ~PASSWORD_MASK;
+
+    switch (offset) {
+    case R_RSTC:
+        s->rstc = value;
+        if (value & V_RSTC_RESET) {
+            if ((s->rsts & 0xfff) == V_RSTS_POWEROFF) {
+                qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+            } else {
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+            }
+        }
+        break;
+    case R_RSTS:
+        qemu_log_mask(LOG_UNIMP,
+                      "bcm2835_powermgt_write: RSTS\n");
+        s->rsts = value;
+        break;
+    case R_WDOG:
+        qemu_log_mask(LOG_UNIMP,
+                      "bcm2835_powermgt_write: WDOG\n");
+        s->wdog = value;
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "bcm2835_powermgt_write: Unknown offset 0x%08"HWADDR_PRIx
+                      "\n", offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_powermgt_ops = {
+    .read = bcm2835_powermgt_read,
+    .write = bcm2835_powermgt_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+};
+
+static const VMStateDescription vmstate_bcm2835_powermgt = {
+    .name = TYPE_BCM2835_POWERMGT,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(rstc, BCM2835PowerMgtState),
+        VMSTATE_UINT32(rsts, BCM2835PowerMgtState),
+        VMSTATE_UINT32(wdog, BCM2835PowerMgtState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_powermgt_init(Object *obj)
+{
+    BCM2835PowerMgtState *s = BCM2835_POWERMGT(obj);
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_powermgt_ops, s,
+                          TYPE_BCM2835_POWERMGT, 0x200);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void bcm2835_powermgt_reset(DeviceState *dev)
+{
+    BCM2835PowerMgtState *s = BCM2835_POWERMGT(dev);
+
+    /* https://elinux.org/BCM2835_registers#PM */
+    s->rstc = 0x00000102;
+    s->rsts = 0x00001000;
+    s->wdog = 0x00000000;
+}
+
+static void bcm2835_powermgt_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_powermgt_reset;
+    dc->vmsd = &vmstate_bcm2835_powermgt;
+}
+
+static TypeInfo bcm2835_powermgt_info = {
+    .name          = TYPE_BCM2835_POWERMGT,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835PowerMgtState),
+    .class_init    = bcm2835_powermgt_class_init,
+    .instance_init = bcm2835_powermgt_init,
+};
+
+static void bcm2835_powermgt_register_types(void)
+{
+    type_register_static(&bcm2835_powermgt_info);
+}
+
+type_init(bcm2835_powermgt_register_types)
diff --git a/hw/misc/imx7_snvs.c b/hw/misc/imx7_snvs.c
index 45972a59202..ee7698bd9cd 100644
--- a/hw/misc/imx7_snvs.c
+++ b/hw/misc/imx7_snvs.c
@@ -14,7 +14,6 @@
 
 #include "qemu/osdep.h"
 #include "hw/misc/imx7_snvs.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
 
diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c
index 08a50ee4c8a..9403c5daa36 100644
--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c
@@ -13,7 +13,6 @@
 
 #include "qemu/osdep.h"
 #include "hw/misc/imx_ccm.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 
 #ifndef DEBUG_IMX_CCM
diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c
index 4c270df2db0..632c03779cb 100644
--- a/hw/misc/imx_rngc.c
+++ b/hw/misc/imx_rngc.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
-#include "qemu/log.h"
 #include "qemu/guest-random.h"
 #include "hw/irq.h"
 #include "hw/misc/imx_rngc.h"
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index a1fa4878bef..1ba4a98377c 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -493,9 +493,8 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
     size = buf.st_size;
 
     /* mmap the region and map into the BAR2 */
-    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
-                                   "ivshmem.bar2", size, true, fd, 0,
-                                   &local_err);
+    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
+                                   size, RAM_SHARED, fd, 0, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
diff --git a/hw/misc/led.c b/hw/misc/led.c
index f552b8b6483..f6d6d68bce5 100644
--- a/hw/misc/led.c
+++ b/hw/misc/led.c
@@ -10,7 +10,6 @@
 #include "migration/vmstate.h"
 #include "hw/qdev-properties.h"
 #include "hw/misc/led.h"
-#include "hw/irq.h"
 #include "trace.h"
 
 #define LED_INTENSITY_PERCENT_MAX   100
diff --git a/hw/misc/mac_via.c b/hw/misc/mac_via.c
index ff0156db76f..b378e6b3055 100644
--- a/hw/misc/mac_via.c
+++ b/hw/misc/mac_via.c
@@ -34,11 +34,9 @@
 #include "qemu/log.h"
 
 /*
- * VIAs: There are two in every machine,
+ * VIAs: There are two in every machine
  */
 
-#define VIA_SIZE (0x2000)
-
 /*
  * Not all of these are true post MacII I think.
  * CSA: probably the ones CHRP marks as 'unused' change purposes
@@ -132,6 +130,10 @@
                                 * On SE/30, vertical sync interrupt enable.
                                 * 0=enabled. This vSync interrupt shows up
                                 * as a slot $E interrupt.
+                                * On Quadra 800 this bit toggles A/UX mode which
+                                * configures the glue logic to deliver some IRQs
+                                * at different levels compared to a classic
+                                * Mac.
                                 */
 #define VIA1B_vADBS2   0x20    /* ADB state input bit 1 (unused on IIfx) */
 #define VIA1B_vADBS1   0x10    /* ADB state input bit 0 (unused on IIfx) */
@@ -374,11 +376,10 @@ static void via2_irq_request(void *opaque, int irq, int level)
 }
 
 
-static void pram_update(MacVIAState *m)
+static void pram_update(MOS6522Q800VIA1State *v1s)
 {
-    if (m->blk) {
-        if (blk_pwrite(m->blk, 0, m->mos6522_via1.PRAM,
-                       sizeof(m->mos6522_via1.PRAM), 0) < 0) {
+    if (v1s->blk) {
+        if (blk_pwrite(v1s->blk, 0, v1s->PRAM, sizeof(v1s->PRAM), 0) < 0) {
             qemu_log("pram_update: cannot write to file\n");
         }
     }
@@ -434,9 +435,8 @@ static int via1_rtc_compact_cmd(uint8_t value)
     return REG_INVALID;
 }
 
-static void via1_rtc_update(MacVIAState *m)
+static void via1_rtc_update(MOS6522Q800VIA1State *v1s)
 {
-    MOS6522Q800VIA1State *v1s = &m->mos6522_via1;
     MOS6522State *s = MOS6522(v1s);
     int cmd, sector, addr;
     uint32_t time;
@@ -448,40 +448,40 @@ static void via1_rtc_update(MacVIAState *m)
     if (s->dirb & VIA1B_vRTCData) {
         /* send bits to the RTC */
         if (!(v1s->last_b & VIA1B_vRTCClk) && (s->b & VIA1B_vRTCClk)) {
-            m->data_out <<= 1;
-            m->data_out |= s->b & VIA1B_vRTCData;
-            m->data_out_cnt++;
+            v1s->data_out <<= 1;
+            v1s->data_out |= s->b & VIA1B_vRTCData;
+            v1s->data_out_cnt++;
         }
-        trace_via1_rtc_update_data_out(m->data_out_cnt, m->data_out);
+        trace_via1_rtc_update_data_out(v1s->data_out_cnt, v1s->data_out);
     } else {
-        trace_via1_rtc_update_data_in(m->data_in_cnt, m->data_in);
+        trace_via1_rtc_update_data_in(v1s->data_in_cnt, v1s->data_in);
         /* receive bits from the RTC */
         if ((v1s->last_b & VIA1B_vRTCClk) &&
             !(s->b & VIA1B_vRTCClk) &&
-            m->data_in_cnt) {
+            v1s->data_in_cnt) {
             s->b = (s->b & ~VIA1B_vRTCData) |
-                   ((m->data_in >> 7) & VIA1B_vRTCData);
-            m->data_in <<= 1;
-            m->data_in_cnt--;
+                   ((v1s->data_in >> 7) & VIA1B_vRTCData);
+            v1s->data_in <<= 1;
+            v1s->data_in_cnt--;
         }
         return;
     }
 
-    if (m->data_out_cnt != 8) {
+    if (v1s->data_out_cnt != 8) {
         return;
     }
 
-    m->data_out_cnt = 0;
+    v1s->data_out_cnt = 0;
 
-    trace_via1_rtc_internal_status(m->cmd, m->alt, m->data_out);
+    trace_via1_rtc_internal_status(v1s->cmd, v1s->alt, v1s->data_out);
     /* first byte: it's a command */
-    if (m->cmd == REG_EMPTY) {
+    if (v1s->cmd == REG_EMPTY) {
 
-        cmd = via1_rtc_compact_cmd(m->data_out);
+        cmd = via1_rtc_compact_cmd(v1s->data_out);
         trace_via1_rtc_internal_cmd(cmd);
 
         if (cmd == REG_INVALID) {
-            trace_via1_rtc_cmd_invalid(m->data_out);
+            trace_via1_rtc_cmd_invalid(v1s->data_out);
             return;
         }
 
@@ -493,20 +493,20 @@ static void via1_rtc_update(MacVIAState *m)
                  * register 3 is highest-order byte
                  */
 
-                time = m->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)
+                time = v1s->tick_offset + (qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)
                        / NANOSECONDS_PER_SECOND);
                 trace_via1_rtc_internal_time(time);
-                m->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff;
-                m->data_in_cnt = 8;
+                v1s->data_in = (time >> ((cmd & 0x03) << 3)) & 0xff;
+                v1s->data_in_cnt = 8;
                 trace_via1_rtc_cmd_seconds_read((cmd & 0x7f) - REG_0,
-                                                m->data_in);
+                                                v1s->data_in);
                 break;
             case REG_PRAM_ADDR...REG_PRAM_ADDR_LAST:
                 /* PRAM address 0x00 -> 0x13 */
-                m->data_in = v1s->PRAM[(cmd & 0x7f) - REG_PRAM_ADDR];
-                m->data_in_cnt = 8;
+                v1s->data_in = v1s->PRAM[(cmd & 0x7f) - REG_PRAM_ADDR];
+                v1s->data_in_cnt = 8;
                 trace_via1_rtc_cmd_pram_read((cmd & 0x7f) - REG_PRAM_ADDR,
-                                             m->data_in);
+                                             v1s->data_in);
                 break;
             case REG_PRAM_SECT...REG_PRAM_SECT_LAST:
                 /*
@@ -514,7 +514,7 @@ static void via1_rtc_update(MacVIAState *m)
                  * the only two-byte read command
                  */
                 trace_via1_rtc_internal_set_cmd(cmd);
-                m->cmd = cmd;
+                v1s->cmd = cmd;
                 break;
             default:
                 g_assert_not_reached();
@@ -524,9 +524,9 @@ static void via1_rtc_update(MacVIAState *m)
         }
 
         /* this is a write command, needs a parameter */
-        if (cmd == REG_WPROTECT || !m->wprotect) {
+        if (cmd == REG_WPROTECT || !v1s->wprotect) {
             trace_via1_rtc_internal_set_cmd(cmd);
-            m->cmd = cmd;
+            v1s->cmd = cmd;
         } else {
             trace_via1_rtc_internal_ignore_cmd(cmd);
         }
@@ -534,46 +534,47 @@ static void via1_rtc_update(MacVIAState *m)
     }
 
     /* second byte: it's a parameter */
-    if (m->alt == REG_EMPTY) {
-        switch (m->cmd & 0x7f) {
+    if (v1s->alt == REG_EMPTY) {
+        switch (v1s->cmd & 0x7f) {
         case REG_0...REG_3: /* seconds register */
             /* FIXME */
-            trace_via1_rtc_cmd_seconds_write(m->cmd - REG_0, m->data_out);
-            m->cmd = REG_EMPTY;
+            trace_via1_rtc_cmd_seconds_write(v1s->cmd - REG_0, v1s->data_out);
+            v1s->cmd = REG_EMPTY;
             break;
         case REG_TEST:
             /* device control: nothing to do */
-            trace_via1_rtc_cmd_test_write(m->data_out);
-            m->cmd = REG_EMPTY;
+            trace_via1_rtc_cmd_test_write(v1s->data_out);
+            v1s->cmd = REG_EMPTY;
             break;
         case REG_WPROTECT:
             /* Write Protect register */
-            trace_via1_rtc_cmd_wprotect_write(m->data_out);
-            m->wprotect = !!(m->data_out & 0x80);
-            m->cmd = REG_EMPTY;
+            trace_via1_rtc_cmd_wprotect_write(v1s->data_out);
+            v1s->wprotect = !!(v1s->data_out & 0x80);
+            v1s->cmd = REG_EMPTY;
             break;
         case REG_PRAM_ADDR...REG_PRAM_ADDR_LAST:
             /* PRAM address 0x00 -> 0x13 */
-            trace_via1_rtc_cmd_pram_write(m->cmd - REG_PRAM_ADDR, m->data_out);
-            v1s->PRAM[m->cmd - REG_PRAM_ADDR] = m->data_out;
-            pram_update(m);
-            m->cmd = REG_EMPTY;
+            trace_via1_rtc_cmd_pram_write(v1s->cmd - REG_PRAM_ADDR,
+                                          v1s->data_out);
+            v1s->PRAM[v1s->cmd - REG_PRAM_ADDR] = v1s->data_out;
+            pram_update(v1s);
+            v1s->cmd = REG_EMPTY;
             break;
         case REG_PRAM_SECT...REG_PRAM_SECT_LAST:
-            addr = (m->data_out >> 2) & 0x1f;
-            sector = (m->cmd & 0x7f) - REG_PRAM_SECT;
-            if (m->cmd & 0x80) {
+            addr = (v1s->data_out >> 2) & 0x1f;
+            sector = (v1s->cmd & 0x7f) - REG_PRAM_SECT;
+            if (v1s->cmd & 0x80) {
                 /* it's a read */
-                m->data_in = v1s->PRAM[sector * 32 + addr];
-                m->data_in_cnt = 8;
+                v1s->data_in = v1s->PRAM[sector * 32 + addr];
+                v1s->data_in_cnt = 8;
                 trace_via1_rtc_cmd_pram_sect_read(sector, addr,
                                                   sector * 32 + addr,
-                                                  m->data_in);
-                m->cmd = REG_EMPTY;
+                                                  v1s->data_in);
+                v1s->cmd = REG_EMPTY;
             } else {
                 /* it's a write, we need one more parameter */
                 trace_via1_rtc_internal_set_alt(addr, sector, addr);
-                m->alt = addr;
+                v1s->alt = addr;
             }
             break;
         default:
@@ -584,22 +585,21 @@ static void via1_rtc_update(MacVIAState *m)
     }
 
     /* third byte: it's the data of a REG_PRAM_SECT write */
-    g_assert(REG_PRAM_SECT <= m->cmd && m->cmd <= REG_PRAM_SECT_LAST);
-    sector = m->cmd - REG_PRAM_SECT;
-    v1s->PRAM[sector * 32 + m->alt] = m->data_out;
-    pram_update(m);
-    trace_via1_rtc_cmd_pram_sect_write(sector, m->alt, sector * 32 + m->alt,
-                                       m->data_out);
-    m->alt = REG_EMPTY;
-    m->cmd = REG_EMPTY;
+    g_assert(REG_PRAM_SECT <= v1s->cmd && v1s->cmd <= REG_PRAM_SECT_LAST);
+    sector = v1s->cmd - REG_PRAM_SECT;
+    v1s->PRAM[sector * 32 + v1s->alt] = v1s->data_out;
+    pram_update(v1s);
+    trace_via1_rtc_cmd_pram_sect_write(sector, v1s->alt, sector * 32 + v1s->alt,
+                                       v1s->data_out);
+    v1s->alt = REG_EMPTY;
+    v1s->cmd = REG_EMPTY;
 }
 
 static void adb_via_poll(void *opaque)
 {
-    MacVIAState *m = opaque;
-    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&m->mos6522_via1);
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
     MOS6522State *s = MOS6522(v1s);
-    ADBBusState *adb_bus = &m->adb_bus;
+    ADBBusState *adb_bus = &v1s->adb_bus;
     uint8_t obuf[9];
     uint8_t *data = &s->sr;
     int olen;
@@ -611,50 +611,50 @@ static void adb_via_poll(void *opaque)
      */
     adb_autopoll_block(adb_bus);
 
-    if (m->adb_data_in_size > 0 && m->adb_data_in_index == 0) {
+    if (v1s->adb_data_in_size > 0 && v1s->adb_data_in_index == 0) {
         /*
          * For older Linux kernels that switch to IDLE mode after sending the
          * ADB command, detect if there is an existing response and return that
          * as a a "fake" autopoll reply or bus timeout accordingly
          */
-        *data = m->adb_data_out[0];
-        olen = m->adb_data_in_size;
+        *data = v1s->adb_data_out[0];
+        olen = v1s->adb_data_in_size;
 
         s->b &= ~VIA1B_vADBInt;
-        qemu_irq_raise(m->adb_data_ready);
+        qemu_irq_raise(v1s->adb_data_ready);
     } else {
         /*
          * Otherwise poll as normal
          */
-        m->adb_data_in_index = 0;
-        m->adb_data_out_index = 0;
+        v1s->adb_data_in_index = 0;
+        v1s->adb_data_out_index = 0;
         olen = adb_poll(adb_bus, obuf, adb_bus->autopoll_mask);
 
         if (olen > 0) {
             /* Autopoll response */
             *data = obuf[0];
             olen--;
-            memcpy(m->adb_data_in, &obuf[1], olen);
-            m->adb_data_in_size = olen;
+            memcpy(v1s->adb_data_in, &obuf[1], olen);
+            v1s->adb_data_in_size = olen;
 
             s->b &= ~VIA1B_vADBInt;
-            qemu_irq_raise(m->adb_data_ready);
+            qemu_irq_raise(v1s->adb_data_ready);
         } else {
-            *data = m->adb_autopoll_cmd;
+            *data = v1s->adb_autopoll_cmd;
             obuf[0] = 0xff;
             obuf[1] = 0xff;
             olen = 2;
 
-            memcpy(m->adb_data_in, obuf, olen);
-            m->adb_data_in_size = olen;
+            memcpy(v1s->adb_data_in, obuf, olen);
+            v1s->adb_data_in_size = olen;
 
             s->b &= ~VIA1B_vADBInt;
-            qemu_irq_raise(m->adb_data_ready);
+            qemu_irq_raise(v1s->adb_data_ready);
         }
     }
 
     trace_via1_adb_poll(*data, (s->b & VIA1B_vADBInt) ? "+" : "-",
-                        adb_bus->status, m->adb_data_in_index, olen);
+                        adb_bus->status, v1s->adb_data_in_index, olen);
 }
 
 static int adb_via_send_len(uint8_t data)
@@ -687,11 +687,10 @@ static int adb_via_send_len(uint8_t data)
     }
 }
 
-static void adb_via_send(MacVIAState *s, int state, uint8_t data)
+static void adb_via_send(MOS6522Q800VIA1State *v1s, int state, uint8_t data)
 {
-    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&s->mos6522_via1);
     MOS6522State *ms = MOS6522(v1s);
-    ADBBusState *adb_bus = &s->adb_bus;
+    ADBBusState *adb_bus = &v1s->adb_bus;
     uint16_t autopoll_mask;
 
     switch (state) {
@@ -707,22 +706,22 @@ static void adb_via_send(MacVIAState *s, int state, uint8_t data)
             ms->b &= ~VIA1B_vADBInt;
         } else {
             ms->b |= VIA1B_vADBInt;
-            s->adb_data_out_index = 0;
-            s->adb_data_out[s->adb_data_out_index++] = data;
+            v1s->adb_data_out_index = 0;
+            v1s->adb_data_out[v1s->adb_data_out_index++] = data;
         }
 
         trace_via1_adb_send(" NEW", data, (ms->b & VIA1B_vADBInt) ? "+" : "-");
-        qemu_irq_raise(s->adb_data_ready);
+        qemu_irq_raise(v1s->adb_data_ready);
         break;
 
     case ADB_STATE_EVEN:
     case ADB_STATE_ODD:
         ms->b |= VIA1B_vADBInt;
-        s->adb_data_out[s->adb_data_out_index++] = data;
+        v1s->adb_data_out[v1s->adb_data_out_index++] = data;
 
         trace_via1_adb_send(state == ADB_STATE_EVEN ? "EVEN" : " ODD",
                             data, (ms->b & VIA1B_vADBInt) ? "+" : "-");
-        qemu_irq_raise(s->adb_data_ready);
+        qemu_irq_raise(v1s->adb_data_ready);
         break;
 
     case ADB_STATE_IDLE:
@@ -730,40 +729,39 @@ static void adb_via_send(MacVIAState *s, int state, uint8_t data)
     }
 
     /* If the command is complete, execute it */
-    if (s->adb_data_out_index == adb_via_send_len(s->adb_data_out[0])) {
-        s->adb_data_in_size = adb_request(adb_bus, s->adb_data_in,
-                                          s->adb_data_out,
-                                          s->adb_data_out_index);
-        s->adb_data_in_index = 0;
+    if (v1s->adb_data_out_index == adb_via_send_len(v1s->adb_data_out[0])) {
+        v1s->adb_data_in_size = adb_request(adb_bus, v1s->adb_data_in,
+                                            v1s->adb_data_out,
+                                            v1s->adb_data_out_index);
+        v1s->adb_data_in_index = 0;
 
         if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) {
             /*
              * Bus timeout (but allow first EVEN and ODD byte to indicate
              * timeout via vADBInt and SRQ status)
              */
-            s->adb_data_in[0] = 0xff;
-            s->adb_data_in[1] = 0xff;
-            s->adb_data_in_size = 2;
+            v1s->adb_data_in[0] = 0xff;
+            v1s->adb_data_in[1] = 0xff;
+            v1s->adb_data_in_size = 2;
         }
 
         /*
          * If last command is TALK, store it for use by autopoll and adjust
          * the autopoll mask accordingly
          */
-        if ((s->adb_data_out[0] & 0xc) == 0xc) {
-            s->adb_autopoll_cmd = s->adb_data_out[0];
+        if ((v1s->adb_data_out[0] & 0xc) == 0xc) {
+            v1s->adb_autopoll_cmd = v1s->adb_data_out[0];
 
-            autopoll_mask = 1 << (s->adb_autopoll_cmd >> 4);
+            autopoll_mask = 1 << (v1s->adb_autopoll_cmd >> 4);
             adb_set_autopoll_mask(adb_bus, autopoll_mask);
         }
     }
 }
 
-static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
+static void adb_via_receive(MOS6522Q800VIA1State *v1s, int state, uint8_t *data)
 {
-    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&s->mos6522_via1);
     MOS6522State *ms = MOS6522(v1s);
-    ADBBusState *adb_bus = &s->adb_bus;
+    ADBBusState *adb_bus = &v1s->adb_bus;
     uint16_t pending;
 
     switch (state) {
@@ -777,16 +775,16 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
 
         trace_via1_adb_receive("IDLE", *data,
                         (ms->b & VIA1B_vADBInt) ? "+" : "-", adb_bus->status,
-                        s->adb_data_in_index, s->adb_data_in_size);
+                        v1s->adb_data_in_index, v1s->adb_data_in_size);
 
         break;
 
     case ADB_STATE_EVEN:
     case ADB_STATE_ODD:
-        switch (s->adb_data_in_index) {
+        switch (v1s->adb_data_in_index) {
         case 0:
             /* First EVEN byte: vADBInt indicates bus timeout */
-            *data = s->adb_data_in[s->adb_data_in_index];
+            *data = v1s->adb_data_in[v1s->adb_data_in_index];
             if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) {
                 ms->b &= ~VIA1B_vADBInt;
             } else {
@@ -795,16 +793,16 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
 
             trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD",
                                    *data, (ms->b & VIA1B_vADBInt) ? "+" : "-",
-                                   adb_bus->status, s->adb_data_in_index,
-                                   s->adb_data_in_size);
+                                   adb_bus->status, v1s->adb_data_in_index,
+                                   v1s->adb_data_in_size);
 
-            s->adb_data_in_index++;
+            v1s->adb_data_in_index++;
             break;
 
         case 1:
             /* First ODD byte: vADBInt indicates SRQ */
-            *data = s->adb_data_in[s->adb_data_in_index];
-            pending = adb_bus->pending & ~(1 << (s->adb_autopoll_cmd >> 4));
+            *data = v1s->adb_data_in[v1s->adb_data_in_index];
+            pending = adb_bus->pending & ~(1 << (v1s->adb_autopoll_cmd >> 4));
             if (pending) {
                 ms->b &= ~VIA1B_vADBInt;
             } else {
@@ -813,10 +811,10 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
 
             trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD",
                                    *data, (ms->b & VIA1B_vADBInt) ? "+" : "-",
-                                   adb_bus->status, s->adb_data_in_index,
-                                   s->adb_data_in_size);
+                                   adb_bus->status, v1s->adb_data_in_index,
+                                   v1s->adb_data_in_size);
 
-            s->adb_data_in_index++;
+            v1s->adb_data_in_index++;
             break;
 
         default:
@@ -826,11 +824,11 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
              * end of the poll reply, so provide these extra bytes below to
              * keep it happy
              */
-            if (s->adb_data_in_index < s->adb_data_in_size) {
+            if (v1s->adb_data_in_index < v1s->adb_data_in_size) {
                 /* Next data byte */
-                *data = s->adb_data_in[s->adb_data_in_index];
+                *data = v1s->adb_data_in[v1s->adb_data_in_index];
                 ms->b |= VIA1B_vADBInt;
-            } else if (s->adb_data_in_index == s->adb_data_in_size) {
+            } else if (v1s->adb_data_in_index == v1s->adb_data_in_size) {
                 if (adb_bus->status & ADB_STATUS_BUSTIMEOUT) {
                     /* Bus timeout (no more data) */
                     *data = 0xff;
@@ -849,23 +847,22 @@ static void adb_via_receive(MacVIAState *s, int state, uint8_t *data)
 
             trace_via1_adb_receive(state == ADB_STATE_EVEN ? "EVEN" : " ODD",
                                    *data, (ms->b & VIA1B_vADBInt) ? "+" : "-",
-                                   adb_bus->status, s->adb_data_in_index,
-                                   s->adb_data_in_size);
+                                   adb_bus->status, v1s->adb_data_in_index,
+                                   v1s->adb_data_in_size);
 
-            if (s->adb_data_in_index <= s->adb_data_in_size) {
-                s->adb_data_in_index++;
+            if (v1s->adb_data_in_index <= v1s->adb_data_in_size) {
+                v1s->adb_data_in_index++;
             }
             break;
         }
 
-        qemu_irq_raise(s->adb_data_ready);
+        qemu_irq_raise(v1s->adb_data_ready);
         break;
     }
 }
 
-static void via1_adb_update(MacVIAState *m)
+static void via1_adb_update(MOS6522Q800VIA1State *v1s)
 {
-    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(&m->mos6522_via1);
     MOS6522State *s = MOS6522(v1s);
     int oldstate, state;
 
@@ -875,14 +872,29 @@ static void via1_adb_update(MacVIAState *m)
     if (state != oldstate) {
         if (s->acr & VIA1ACR_vShiftOut) {
             /* output mode */
-            adb_via_send(m, state, s->sr);
+            adb_via_send(v1s, state, s->sr);
         } else {
             /* input mode */
-            adb_via_receive(m, state, &s->sr);
+            adb_via_receive(v1s, state, &s->sr);
         }
     }
 }
 
+static void via1_auxmode_update(MOS6522Q800VIA1State *v1s)
+{
+    MOS6522State *s = MOS6522(v1s);
+    int oldirq, irq;
+
+    oldirq = (v1s->last_b & VIA1B_vMystery) ? 1 : 0;
+    irq = (s->b & VIA1B_vMystery) ? 1 : 0;
+
+    /* Check to see if the A/UX mode bit has changed */
+    if (irq != oldirq) {
+        trace_via1_auxmode(irq);
+        qemu_set_irq(v1s->auxmode_irq, irq);
+    }
+}
+
 static uint64_t mos6522_q800_via1_read(void *opaque, hwaddr addr, unsigned size)
 {
     MOS6522Q800VIA1State *s = MOS6522_Q800_VIA1(opaque);
@@ -896,7 +908,6 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val,
                                     unsigned size)
 {
     MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
-    MacVIAState *m = container_of(v1s, MacVIAState, mos6522_via1);
     MOS6522State *ms = MOS6522(v1s);
 
     addr = (addr >> 9) & 0xf;
@@ -904,8 +915,9 @@ static void mos6522_q800_via1_write(void *opaque, hwaddr addr, uint64_t val,
 
     switch (addr) {
     case VIA_REG_B:
-        via1_rtc_update(m);
-        via1_adb_update(m);
+        via1_rtc_update(v1s);
+        via1_adb_update(v1s);
+        via1_auxmode_update(v1s);
 
         v1s->last_b = ms->b;
         break;
@@ -951,216 +963,158 @@ static const MemoryRegionOps mos6522_q800_via2_ops = {
     },
 };
 
-static void mac_via_reset(DeviceState *dev)
+static void via1_postload_update_cb(void *opaque, bool running, RunState state)
 {
-    MacVIAState *m = MAC_VIA(dev);
-    ADBBusState *adb_bus = &m->adb_bus;
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
 
-    adb_set_autopoll_enabled(adb_bus, true);
+    qemu_del_vm_change_state_handler(v1s->vmstate);
+    v1s->vmstate = NULL;
 
-    m->cmd = REG_EMPTY;
-    m->alt = REG_EMPTY;
+    pram_update(v1s);
 }
 
-static void mac_via_realize(DeviceState *dev, Error **errp)
+static int via1_post_load(void *opaque, int version_id)
 {
-    MacVIAState *m = MAC_VIA(dev);
-    MOS6522State *ms;
-    ADBBusState *adb_bus = &m->adb_bus;
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(opaque);
+
+    if (v1s->blk) {
+        v1s->vmstate = qemu_add_vm_change_state_handler(
+                           via1_postload_update_cb, v1s);
+    }
+
+    return 0;
+}
+
+/* VIA 1 */
+static void mos6522_q800_via1_reset(DeviceState *dev)
+{
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(dev);
+    MOS6522State *ms = MOS6522(v1s);
+    MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(ms);
+    ADBBusState *adb_bus = &v1s->adb_bus;
+
+    mdc->parent_reset(dev);
+
+    ms->timers[0].frequency = VIA_TIMER_FREQ;
+    ms->timers[1].frequency = VIA_TIMER_FREQ;
+
+    ms->b = VIA1B_vADB_StateMask | VIA1B_vADBInt | VIA1B_vRTCEnb;
+
+    /* ADB/RTC */
+    adb_set_autopoll_enabled(adb_bus, true);
+    v1s->cmd = REG_EMPTY;
+    v1s->alt = REG_EMPTY;
+}
+
+static void mos6522_q800_via1_realize(DeviceState *dev, Error **errp)
+{
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(dev);
+    ADBBusState *adb_bus = &v1s->adb_bus;
     struct tm tm;
     int ret;
 
-    /* Init VIAs 1 and 2 */
-    object_initialize_child(OBJECT(dev), "via1", &m->mos6522_via1,
-                            TYPE_MOS6522_Q800_VIA1);
-
-    object_initialize_child(OBJECT(dev), "via2", &m->mos6522_via2,
-                            TYPE_MOS6522_Q800_VIA2);
-
-    /* Pass through mos6522 output IRQs */
-    ms = MOS6522(&m->mos6522_via1);
-    object_property_add_alias(OBJECT(dev), "irq[0]", OBJECT(ms),
-                              SYSBUS_DEVICE_GPIO_IRQ "[0]");
-    ms = MOS6522(&m->mos6522_via2);
-    object_property_add_alias(OBJECT(dev), "irq[1]", OBJECT(ms),
-                              SYSBUS_DEVICE_GPIO_IRQ "[0]");
-
-    sysbus_realize(SYS_BUS_DEVICE(&m->mos6522_via1), &error_abort);
-    sysbus_realize(SYS_BUS_DEVICE(&m->mos6522_via2), &error_abort);
-
-    /* Pass through mos6522 input IRQs */
-    qdev_pass_gpios(DEVICE(&m->mos6522_via1), dev, "via1-irq");
-    qdev_pass_gpios(DEVICE(&m->mos6522_via2), dev, "via2-irq");
-
-    /* VIA 1 */
-    m->mos6522_via1.one_second_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
-                                                     via1_one_second,
-                                                     &m->mos6522_via1);
-    via1_one_second_update(&m->mos6522_via1);
-    m->mos6522_via1.sixty_hz_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
-                                                  via1_sixty_hz,
-                                                  &m->mos6522_via1);
-    via1_sixty_hz_update(&m->mos6522_via1);
+    v1s->one_second_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, via1_one_second,
+                                         v1s);
+    via1_one_second_update(v1s);
+    v1s->sixty_hz_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, via1_sixty_hz,
+                                       v1s);
+    via1_sixty_hz_update(v1s);
 
     qemu_get_timedate(&tm, 0);
-    m->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
+    v1s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
 
-    adb_register_autopoll_callback(adb_bus, adb_via_poll, m);
-    m->adb_data_ready = qdev_get_gpio_in_named(dev, "via1-irq",
-                                               VIA1_IRQ_ADB_READY_BIT);
+    adb_register_autopoll_callback(adb_bus, adb_via_poll, v1s);
+    v1s->adb_data_ready = qdev_get_gpio_in(dev, VIA1_IRQ_ADB_READY_BIT);
 
-    if (m->blk) {
-        int64_t len = blk_getlength(m->blk);
+    if (v1s->blk) {
+        int64_t len = blk_getlength(v1s->blk);
         if (len < 0) {
             error_setg_errno(errp, -len,
                              "could not get length of backing image");
             return;
         }
-        ret = blk_set_perm(m->blk,
+        ret = blk_set_perm(v1s->blk,
                            BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
                            BLK_PERM_ALL, errp);
         if (ret < 0) {
             return;
         }
 
-        len = blk_pread(m->blk, 0, m->mos6522_via1.PRAM,
-                        sizeof(m->mos6522_via1.PRAM));
-        if (len != sizeof(m->mos6522_via1.PRAM)) {
+        len = blk_pread(v1s->blk, 0, v1s->PRAM, sizeof(v1s->PRAM));
+        if (len != sizeof(v1s->PRAM)) {
             error_setg(errp, "can't read PRAM contents");
             return;
         }
     }
 }
 
-static void mac_via_init(Object *obj)
+static void mos6522_q800_via1_init(Object *obj)
 {
-    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-    MacVIAState *m = MAC_VIA(obj);
-
-    /* MMIO */
-    memory_region_init(&m->mmio, obj, "mac-via", 2 * VIA_SIZE);
-    sysbus_init_mmio(sbd, &m->mmio);
-
-    memory_region_init_io(&m->via1mem, obj, &mos6522_q800_via1_ops,
-                          &m->mos6522_via1, "via1", VIA_SIZE);
-    memory_region_add_subregion(&m->mmio, 0x0, &m->via1mem);
+    MOS6522Q800VIA1State *v1s = MOS6522_Q800_VIA1(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(v1s);
 
-    memory_region_init_io(&m->via2mem, obj, &mos6522_q800_via2_ops,
-                          &m->mos6522_via2, "via2", VIA_SIZE);
-    memory_region_add_subregion(&m->mmio, VIA_SIZE, &m->via2mem);
+    memory_region_init_io(&v1s->via_mem, obj, &mos6522_q800_via1_ops, v1s,
+                          "via1", VIA_SIZE);
+    sysbus_init_mmio(sbd, &v1s->via_mem);
 
     /* ADB */
-    qbus_create_inplace((BusState *)&m->adb_bus, sizeof(m->adb_bus),
-                        TYPE_ADB_BUS, DEVICE(obj), "adb.0");
-}
-
-static void postload_update_cb(void *opaque, bool running, RunState state)
-{
-    MacVIAState *m = MAC_VIA(opaque);
+    qbus_init((BusState *)&v1s->adb_bus, sizeof(v1s->adb_bus),
+              TYPE_ADB_BUS, DEVICE(v1s), "adb.0");
 
-    qemu_del_vm_change_state_handler(m->vmstate);
-    m->vmstate = NULL;
+    qdev_init_gpio_in(DEVICE(obj), via1_irq_request, VIA1_IRQ_NB);
 
-    pram_update(m);
+    /* A/UX mode */
+    qdev_init_gpio_out(DEVICE(obj), &v1s->auxmode_irq, 1);
 }
 
-static int mac_via_post_load(void *opaque, int version_id)
-{
-    MacVIAState *m = MAC_VIA(opaque);
-
-    if (m->blk) {
-        m->vmstate = qemu_add_vm_change_state_handler(postload_update_cb,
-                                                      m);
-    }
-
-    return 0;
-}
-
-static const VMStateDescription vmstate_mac_via = {
-    .name = "mac-via",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .post_load = mac_via_post_load,
+static const VMStateDescription vmstate_q800_via1 = {
+    .name = "q800-via1",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .post_load = via1_post_load,
     .fields = (VMStateField[]) {
-        /* VIAs */
-        VMSTATE_STRUCT(mos6522_via1.parent_obj, MacVIAState, 0, vmstate_mos6522,
-                       MOS6522State),
-        VMSTATE_UINT8(mos6522_via1.last_b, MacVIAState),
-        VMSTATE_BUFFER(mos6522_via1.PRAM, MacVIAState),
-        VMSTATE_TIMER_PTR(mos6522_via1.one_second_timer, MacVIAState),
-        VMSTATE_INT64(mos6522_via1.next_second, MacVIAState),
-        VMSTATE_TIMER_PTR(mos6522_via1.sixty_hz_timer, MacVIAState),
-        VMSTATE_INT64(mos6522_via1.next_sixty_hz, MacVIAState),
-        VMSTATE_STRUCT(mos6522_via2.parent_obj, MacVIAState, 0, vmstate_mos6522,
+        VMSTATE_STRUCT(parent_obj, MOS6522Q800VIA1State, 0, vmstate_mos6522,
                        MOS6522State),
+        VMSTATE_UINT8(last_b, MOS6522Q800VIA1State),
         /* RTC */
-        VMSTATE_UINT32(tick_offset, MacVIAState),
-        VMSTATE_UINT8(data_out, MacVIAState),
-        VMSTATE_INT32(data_out_cnt, MacVIAState),
-        VMSTATE_UINT8(data_in, MacVIAState),
-        VMSTATE_UINT8(data_in_cnt, MacVIAState),
-        VMSTATE_UINT8(cmd, MacVIAState),
-        VMSTATE_INT32(wprotect, MacVIAState),
-        VMSTATE_INT32(alt, MacVIAState),
+        VMSTATE_BUFFER(PRAM, MOS6522Q800VIA1State),
+        VMSTATE_UINT32(tick_offset, MOS6522Q800VIA1State),
+        VMSTATE_UINT8(data_out, MOS6522Q800VIA1State),
+        VMSTATE_INT32(data_out_cnt, MOS6522Q800VIA1State),
+        VMSTATE_UINT8(data_in, MOS6522Q800VIA1State),
+        VMSTATE_UINT8(data_in_cnt, MOS6522Q800VIA1State),
+        VMSTATE_UINT8(cmd, MOS6522Q800VIA1State),
+        VMSTATE_INT32(wprotect, MOS6522Q800VIA1State),
+        VMSTATE_INT32(alt, MOS6522Q800VIA1State),
         /* ADB */
-        VMSTATE_INT32(adb_data_in_size, MacVIAState),
-        VMSTATE_INT32(adb_data_in_index, MacVIAState),
-        VMSTATE_INT32(adb_data_out_index, MacVIAState),
-        VMSTATE_BUFFER(adb_data_in, MacVIAState),
-        VMSTATE_BUFFER(adb_data_out, MacVIAState),
-        VMSTATE_UINT8(adb_autopoll_cmd, MacVIAState),
+        VMSTATE_INT32(adb_data_in_size, MOS6522Q800VIA1State),
+        VMSTATE_INT32(adb_data_in_index, MOS6522Q800VIA1State),
+        VMSTATE_INT32(adb_data_out_index, MOS6522Q800VIA1State),
+        VMSTATE_BUFFER(adb_data_in, MOS6522Q800VIA1State),
+        VMSTATE_BUFFER(adb_data_out, MOS6522Q800VIA1State),
+        VMSTATE_UINT8(adb_autopoll_cmd, MOS6522Q800VIA1State),
+        /* Timers */
+        VMSTATE_TIMER_PTR(one_second_timer, MOS6522Q800VIA1State),
+        VMSTATE_INT64(next_second, MOS6522Q800VIA1State),
+        VMSTATE_TIMER_PTR(sixty_hz_timer, MOS6522Q800VIA1State),
+        VMSTATE_INT64(next_sixty_hz, MOS6522Q800VIA1State),
         VMSTATE_END_OF_LIST()
     }
 };
 
-static Property mac_via_properties[] = {
-    DEFINE_PROP_DRIVE("drive", MacVIAState, blk),
+static Property mos6522_q800_via1_properties[] = {
+    DEFINE_PROP_DRIVE("drive", MOS6522Q800VIA1State, blk),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void mac_via_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = mac_via_realize;
-    dc->reset = mac_via_reset;
-    dc->vmsd = &vmstate_mac_via;
-    device_class_set_props(dc, mac_via_properties);
-}
-
-static TypeInfo mac_via_info = {
-    .name = TYPE_MAC_VIA,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MacVIAState),
-    .instance_init = mac_via_init,
-    .class_init = mac_via_class_init,
-};
-
-/* VIA 1 */
-static void mos6522_q800_via1_reset(DeviceState *dev)
-{
-    MOS6522State *ms = MOS6522(dev);
-    MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(ms);
-
-    mdc->parent_reset(dev);
-
-    ms->timers[0].frequency = VIA_TIMER_FREQ;
-    ms->timers[1].frequency = VIA_TIMER_FREQ;
-
-    ms->b = VIA1B_vADB_StateMask | VIA1B_vADBInt | VIA1B_vRTCEnb;
-}
-
-static void mos6522_q800_via1_init(Object *obj)
-{
-    qdev_init_gpio_in_named(DEVICE(obj), via1_irq_request, "via1-irq",
-                            VIA1_IRQ_NB);
-}
-
 static void mos6522_q800_via1_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
 
+    dc->realize = mos6522_q800_via1_realize;
     dc->reset = mos6522_q800_via1_reset;
+    dc->vmsd = &vmstate_q800_via1;
+    device_class_set_props(dc, mos6522_q800_via1_properties);
 }
 
 static const TypeInfo mos6522_q800_via1_type_info = {
@@ -1192,20 +1146,61 @@ static void mos6522_q800_via2_reset(DeviceState *dev)
 
     ms->dirb = 0;
     ms->b = 0;
+    ms->dira = 0;
+    ms->a = 0x7f;
+}
+
+static void via2_nubus_irq_request(void *opaque, int irq, int level)
+{
+    MOS6522Q800VIA2State *v2s = opaque;
+    MOS6522State *s = MOS6522(v2s);
+    MOS6522DeviceClass *mdc = MOS6522_GET_CLASS(s);
+
+    if (level) {
+        /* Port A nubus IRQ inputs are active LOW */
+        s->a &= ~(1 << irq);
+        s->ifr |= 1 << VIA2_IRQ_NUBUS_BIT;
+    } else {
+        s->a |= (1 << irq);
+        s->ifr &= ~(1 << VIA2_IRQ_NUBUS_BIT);
+    }
+
+    mdc->update_irq(s);
 }
 
 static void mos6522_q800_via2_init(Object *obj)
 {
-    qdev_init_gpio_in_named(DEVICE(obj), via2_irq_request, "via2-irq",
-                            VIA2_IRQ_NB);
+    MOS6522Q800VIA2State *v2s = MOS6522_Q800_VIA2(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(v2s);
+
+    memory_region_init_io(&v2s->via_mem, obj, &mos6522_q800_via2_ops, v2s,
+                          "via2", VIA_SIZE);
+    sysbus_init_mmio(sbd, &v2s->via_mem);
+
+    qdev_init_gpio_in(DEVICE(obj), via2_irq_request, VIA2_IRQ_NB);
+
+    qdev_init_gpio_in_named(DEVICE(obj), via2_nubus_irq_request, "nubus-irq",
+                            VIA2_NUBUS_IRQ_NB);
 }
 
+static const VMStateDescription vmstate_q800_via2 = {
+    .name = "q800-via2",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(parent_obj, MOS6522Q800VIA2State, 0, vmstate_mos6522,
+                       MOS6522State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void mos6522_q800_via2_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     MOS6522DeviceClass *mdc = MOS6522_CLASS(oc);
 
     dc->reset = mos6522_q800_via2_reset;
+    dc->vmsd = &vmstate_q800_via2;
     mdc->portB_write = mos6522_q800_via2_portB_write;
 }
 
@@ -1221,7 +1216,6 @@ static void mac_via_register_types(void)
 {
     type_register_static(&mos6522_q800_via1_type_info);
     type_register_static(&mos6522_q800_via2_type_info);
-    type_register_static(&mac_via_info);
 }
 
 type_init(mac_via_register_types);
diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index edbd4186b2a..e917a6a095b 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -553,8 +553,8 @@ static void cuda_init(Object *obj)
     memory_region_init_io(&s->mem, obj, &mos6522_cuda_ops, s, "cuda", 0x2000);
     sysbus_init_mmio(sbd, &s->mem);
 
-    qbus_create_inplace(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS,
-                        DEVICE(obj), "adb.0");
+    qbus_init(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS,
+              DEVICE(obj), "adb.0");
 }
 
 static Property cuda_properties[] = {
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index e6eeb575d53..c1fad43f6c6 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -35,7 +35,6 @@
 #include "hw/char/escc.h"
 #include "hw/misc/macio/macio.h"
 #include "hw/intc/heathrow_pic.h"
-#include "sysemu/sysemu.h"
 #include "trace.h"
 
 /* Note: this code is strongly inspirated from the corresponding code
@@ -388,8 +387,8 @@ static void macio_instance_init(Object *obj)
 
     memory_region_init(&s->bar, obj, "macio", 0x80000);
 
-    qbus_create_inplace(&s->macio_bus, sizeof(s->macio_bus), TYPE_MACIO_BUS,
-                        DEVICE(obj), "macio.0");
+    qbus_init(&s->macio_bus, sizeof(s->macio_bus), TYPE_MACIO_BUS,
+              DEVICE(obj), "macio.0");
 
     object_initialize_child(OBJECT(s), "dbdma", &s->dbdma, TYPE_MAC_DBDMA);
 
diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c
index 71924d4768c..eb39c64694a 100644
--- a/hw/misc/macio/pmu.c
+++ b/hw/misc/macio/pmu.c
@@ -718,6 +718,7 @@ static const VMStateDescription vmstate_pmu = {
     },
     .subsections = (const VMStateDescription * []) {
         &vmstate_pmu_adb,
+        NULL
     }
 };
 
@@ -754,8 +755,8 @@ static void pmu_realize(DeviceState *dev, Error **errp)
     timer_mod(s->one_sec_timer, s->one_sec_target);
 
     if (s->has_adb) {
-        qbus_create_inplace(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS,
-                            dev, "adb.0");
+        qbus_init(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS,
+                  dev, "adb.0");
         adb_register_autopoll_callback(adb_bus, pmu_adb_poll, s);
     }
 }
diff --git a/hw/misc/macio/trace-events b/hw/misc/macio/trace-events
index e4a1cc0d246..ad4b9d1c08e 100644
--- a/hw/misc/macio/trace-events
+++ b/hw/misc/macio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # cuda.c
 cuda_delay_set_sr_int(void) ""
diff --git a/hw/misc/mchp_pfsoc_dmc.c b/hw/misc/mchp_pfsoc_dmc.c
index 15cf3d77252..43d8e970abc 100644
--- a/hw/misc/mchp_pfsoc_dmc.c
+++ b/hw/misc/mchp_pfsoc_dmc.c
@@ -24,7 +24,6 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "hw/misc/mchp_pfsoc_dmc.h"
 
diff --git a/hw/misc/mchp_pfsoc_ioscb.c b/hw/misc/mchp_pfsoc_ioscb.c
index 8b0d1cacd7e..f4fd55a0e5c 100644
--- a/hw/misc/mchp_pfsoc_ioscb.c
+++ b/hw/misc/mchp_pfsoc_ioscb.c
@@ -24,7 +24,6 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "hw/misc/mchp_pfsoc_ioscb.h"
 
diff --git a/hw/misc/mchp_pfsoc_sysreg.c b/hw/misc/mchp_pfsoc_sysreg.c
index 248a3133455..89571eded53 100644
--- a/hw/misc/mchp_pfsoc_sysreg.c
+++ b/hw/misc/mchp_pfsoc_sysreg.c
@@ -24,7 +24,6 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "hw/hw.h"
 #include "hw/sysbus.h"
 #include "hw/misc/mchp_pfsoc_sysreg.h"
 
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 21034dc60a8..3f41a3a5b27 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -3,13 +3,9 @@ softmmu_ss.add(when: 'CONFIG_EDU', if_true: files('edu.c'))
 softmmu_ss.add(when: 'CONFIG_FW_CFG_DMA', if_true: files('vmcoreinfo.c'))
 softmmu_ss.add(when: 'CONFIG_ISA_DEBUG', if_true: files('debugexit.c'))
 softmmu_ss.add(when: 'CONFIG_ISA_TESTDEV', if_true: files('pc-testdev.c'))
-softmmu_ss.add(when: 'CONFIG_MAX111X', if_true: files('max111x.c'))
 softmmu_ss.add(when: 'CONFIG_PCA9552', if_true: files('pca9552.c'))
 softmmu_ss.add(when: 'CONFIG_PCI_TESTDEV', if_true: files('pci-testdev.c'))
 softmmu_ss.add(when: 'CONFIG_SGA', if_true: files('sga.c'))
-softmmu_ss.add(when: 'CONFIG_TMP105', if_true: files('tmp105.c'))
-softmmu_ss.add(when: 'CONFIG_TMP421', if_true: files('tmp421.c'))
-softmmu_ss.add(when: 'CONFIG_EMC141X', if_true: files('emc141x.c'))
 softmmu_ss.add(when: 'CONFIG_UNIMP', if_true: files('unimp.c'))
 softmmu_ss.add(when: 'CONFIG_EMPTY_SLOT', if_true: files('empty_slot.c'))
 softmmu_ss.add(when: 'CONFIG_LED', if_true: files('led.c'))
@@ -21,6 +17,8 @@ softmmu_ss.add(when: 'CONFIG_INTEGRATOR_DEBUG', if_true: files('arm_integrator_d
 softmmu_ss.add(when: 'CONFIG_A9SCU', if_true: files('a9scu.c'))
 softmmu_ss.add(when: 'CONFIG_ARM11SCU', if_true: files('arm11scu.c'))
 
+softmmu_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_ras.c'))
+
 # Mac devices
 softmmu_ss.add(when: 'CONFIG_MOS6522', if_true: files('mos6522.c'))
 
@@ -36,9 +34,6 @@ softmmu_ss.add(when: 'CONFIG_SIFIVE_E_PRCI', if_true: files('sifive_e_prci.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_U_OTP', if_true: files('sifive_u_otp.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_U_PRCI', if_true: files('sifive_u_prci.c'))
 
-# PKUnity SoC devices
-softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_pm.c'))
-
 subdir('macio')
 
 softmmu_ss.add(when: 'CONFIG_IVSHMEM_DEVICE', if_true: files('ivshmem.c'))
@@ -63,7 +58,6 @@ softmmu_ss.add(when: 'CONFIG_IMX', if_true: files(
   'imx_ccm.c',
   'imx_rngc.c',
 ))
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-hpdmc.c', 'milkymist-pfpu.c'))
 softmmu_ss.add(when: 'CONFIG_MAINSTONE', if_true: files('mst_fpga.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files(
   'npcm7xx_clk.c',
@@ -86,9 +80,10 @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files(
   'bcm2835_rng.c',
   'bcm2835_thermal.c',
   'bcm2835_cprman.c',
+  'bcm2835_powermgt.c',
 ))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c'))
-softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c', 'zynq-xadc.c'))
+softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-xramc.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c'))
@@ -109,6 +104,7 @@ softmmu_ss.add(when: 'CONFIG_PVPANIC_ISA', if_true: files('pvpanic-isa.c'))
 softmmu_ss.add(when: 'CONFIG_PVPANIC_PCI', if_true: files('pvpanic-pci.c'))
 softmmu_ss.add(when: 'CONFIG_AUX', if_true: files('auxbus.c'))
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files(
+  'aspeed_hace.c',
   'aspeed_lpc.c',
   'aspeed_scu.c',
   'aspeed_sdmc.c',
diff --git a/hw/misc/milkymist-hpdmc.c b/hw/misc/milkymist-hpdmc.c
deleted file mode 100644
index 09a3875f02c..00000000000
--- a/hw/misc/milkymist-hpdmc.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- *  QEMU model of the Milkymist High Performance Dynamic Memory Controller.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/hpdmc.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    R_SYSTEM = 0,
-    R_BYPASS,
-    R_TIMING,
-    R_IODELAY,
-    R_MAX
-};
-
-enum {
-    IODELAY_DQSDELAY_RDY = (1<<5),
-    IODELAY_PLL1_LOCKED  = (1<<6),
-    IODELAY_PLL2_LOCKED  = (1<<7),
-};
-
-#define TYPE_MILKYMIST_HPDMC "milkymist-hpdmc"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistHpdmcState, MILKYMIST_HPDMC)
-
-struct MilkymistHpdmcState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-
-    uint32_t regs[R_MAX];
-};
-
-static uint64_t hpdmc_read(void *opaque, hwaddr addr,
-                           unsigned size)
-{
-    MilkymistHpdmcState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_SYSTEM:
-    case R_BYPASS:
-    case R_TIMING:
-    case R_IODELAY:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_hpdmc: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_hpdmc_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void hpdmc_write(void *opaque, hwaddr addr, uint64_t value,
-                        unsigned size)
-{
-    MilkymistHpdmcState *s = opaque;
-
-    trace_milkymist_hpdmc_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_SYSTEM:
-    case R_BYPASS:
-    case R_TIMING:
-        s->regs[addr] = value;
-        break;
-    case R_IODELAY:
-        /* ignore writes */
-        break;
-
-    default:
-        error_report("milkymist_hpdmc: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-}
-
-static const MemoryRegionOps hpdmc_mmio_ops = {
-    .read = hpdmc_read,
-    .write = hpdmc_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_hpdmc_reset(DeviceState *d)
-{
-    MilkymistHpdmcState *s = MILKYMIST_HPDMC(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    /* defaults */
-    s->regs[R_IODELAY] = IODELAY_DQSDELAY_RDY | IODELAY_PLL1_LOCKED
-                         | IODELAY_PLL2_LOCKED;
-}
-
-static void milkymist_hpdmc_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistHpdmcState *s = MILKYMIST_HPDMC(dev);
-
-    memory_region_init_io(&s->regs_region, OBJECT(dev), &hpdmc_mmio_ops, s,
-            "milkymist-hpdmc", R_MAX * 4);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->regs_region);
-}
-
-static const VMStateDescription vmstate_milkymist_hpdmc = {
-    .name = "milkymist-hpdmc",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistHpdmcState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void milkymist_hpdmc_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_hpdmc_realize;
-    dc->reset = milkymist_hpdmc_reset;
-    dc->vmsd = &vmstate_milkymist_hpdmc;
-}
-
-static const TypeInfo milkymist_hpdmc_info = {
-    .name          = TYPE_MILKYMIST_HPDMC,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistHpdmcState),
-    .class_init    = milkymist_hpdmc_class_init,
-};
-
-static void milkymist_hpdmc_register_types(void)
-{
-    type_register_static(&milkymist_hpdmc_info);
-}
-
-type_init(milkymist_hpdmc_register_types)
diff --git a/hw/misc/milkymist-pfpu.c b/hw/misc/milkymist-pfpu.c
deleted file mode 100644
index e4ee209c101..00000000000
--- a/hw/misc/milkymist-pfpu.c
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- *  QEMU model of the Milkymist programmable FPU.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/pfpu.pdf
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "qemu/error-report.h"
-#include <math.h>
-#include "qom/object.h"
-
-/* #define TRACE_EXEC */
-
-#ifdef TRACE_EXEC
-#    define D_EXEC(x) x
-#else
-#    define D_EXEC(x)
-#endif
-
-enum {
-    R_CTL = 0,
-    R_MESHBASE,
-    R_HMESHLAST,
-    R_VMESHLAST,
-    R_CODEPAGE,
-    R_VERTICES,
-    R_COLLISIONS,
-    R_STRAYWRITES,
-    R_LASTDMA,
-    R_PC,
-    R_DREGBASE,
-    R_CODEBASE,
-    R_MAX
-};
-
-enum {
-    CTL_START_BUSY = (1<<0),
-};
-
-enum {
-    OP_NOP = 0,
-    OP_FADD,
-    OP_FSUB,
-    OP_FMUL,
-    OP_FABS,
-    OP_F2I,
-    OP_I2F,
-    OP_VECTOUT,
-    OP_SIN,
-    OP_COS,
-    OP_ABOVE,
-    OP_EQUAL,
-    OP_COPY,
-    OP_IF,
-    OP_TSIGN,
-    OP_QUAKE,
-};
-
-enum {
-    GPR_X = 0,
-    GPR_Y = 1,
-    GPR_FLAGS = 2,
-};
-
-enum {
-    LATENCY_FADD = 5,
-    LATENCY_FSUB = 5,
-    LATENCY_FMUL = 7,
-    LATENCY_FABS = 2,
-    LATENCY_F2I = 2,
-    LATENCY_I2F = 3,
-    LATENCY_VECTOUT = 0,
-    LATENCY_SIN = 4,
-    LATENCY_COS = 4,
-    LATENCY_ABOVE = 2,
-    LATENCY_EQUAL = 2,
-    LATENCY_COPY = 2,
-    LATENCY_IF = 2,
-    LATENCY_TSIGN = 2,
-    LATENCY_QUAKE = 2,
-    MAX_LATENCY = 7
-};
-
-#define GPR_BEGIN       0x100
-#define GPR_END         0x17f
-#define MICROCODE_BEGIN 0x200
-#define MICROCODE_END   0x3ff
-#define MICROCODE_WORDS 2048
-
-#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
-
-#ifdef TRACE_EXEC
-static const char *opcode_to_str[] = {
-    "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
-    "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
-};
-#endif
-
-#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistPFPUState, MILKYMIST_PFPU)
-
-struct MilkymistPFPUState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-    Chardev *chr;
-    qemu_irq irq;
-
-    uint32_t regs[R_MAX];
-    uint32_t gp_regs[128];
-    uint32_t microcode[MICROCODE_WORDS];
-
-    int output_queue_pos;
-    uint32_t output_queue[MAX_LATENCY];
-};
-
-static inline uint32_t
-get_dma_address(uint32_t base, uint32_t x, uint32_t y)
-{
-    return base + 8 * (128 * y + x);
-}
-
-static inline void
-output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
-{
-    s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
-}
-
-static inline uint32_t
-output_queue_remove(MilkymistPFPUState *s)
-{
-    return s->output_queue[s->output_queue_pos];
-}
-
-static inline void
-output_queue_advance(MilkymistPFPUState *s)
-{
-    s->output_queue[s->output_queue_pos] = 0;
-    s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
-}
-
-static int pfpu_decode_insn(MilkymistPFPUState *s)
-{
-    uint32_t pc = s->regs[R_PC];
-    uint32_t insn = s->microcode[pc];
-    uint32_t reg_a = (insn >> 18) & 0x7f;
-    uint32_t reg_b = (insn >> 11) & 0x7f;
-    uint32_t op = (insn >> 7) & 0xf;
-    uint32_t reg_d = insn & 0x7f;
-    uint32_t r = 0;
-    int latency = 0;
-
-    switch (op) {
-    case OP_NOP:
-        break;
-    case OP_FADD:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = a + b;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_FADD;
-        D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_FSUB:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = a - b;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_FSUB;
-        D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_FMUL:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = a * b;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_FMUL;
-        D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_FABS:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float t = fabsf(a);
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_FABS;
-        D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
-    } break;
-    case OP_F2I:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        int32_t t = a;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_F2I;
-        D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
-    } break;
-    case OP_I2F:
-    {
-        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
-        float t = a;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_I2F;
-        D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
-    } break;
-    case OP_VECTOUT:
-    {
-        uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
-        uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
-        hwaddr dma_ptr =
-            get_dma_address(s->regs[R_MESHBASE],
-                    s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
-        cpu_physical_memory_write(dma_ptr, &a, 4);
-        cpu_physical_memory_write(dma_ptr + 4, &b, 4);
-        s->regs[R_LASTDMA] = dma_ptr + 4;
-        D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
-        trace_milkymist_pfpu_vectout(a, b, dma_ptr);
-    } break;
-    case OP_SIN:
-    {
-        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
-        float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_SIN;
-        D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
-    } break;
-    case OP_COS:
-    {
-        int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
-        float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_COS;
-        D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
-    } break;
-    case OP_ABOVE:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = (a > b) ? 1.0f : 0.0f;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_ABOVE;
-        D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_EQUAL:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = (a == b) ? 1.0f : 0.0f;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_EQUAL;
-        D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_COPY:
-    {
-        r = s->gp_regs[reg_a];
-        latency = LATENCY_COPY;
-        D_EXEC(qemu_log("COPY"));
-    } break;
-    case OP_IF:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        uint32_t f = s->gp_regs[GPR_FLAGS];
-        float t = (f != 0) ? a : b;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_IF;
-        D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
-    } break;
-    case OP_TSIGN:
-    {
-        float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
-        float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
-        float t = (b < 0) ? -a : a;
-        r = REINTERPRET_CAST(uint32_t, t);
-        latency = LATENCY_TSIGN;
-        D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
-    } break;
-    case OP_QUAKE:
-    {
-        uint32_t a = s->gp_regs[reg_a];
-        r = 0x5f3759df - (a >> 1);
-        latency = LATENCY_QUAKE;
-        D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
-    } break;
-
-    default:
-        error_report("milkymist_pfpu: unknown opcode %d", op);
-        break;
-    }
-
-    if (!reg_d) {
-        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
-                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
-                    s->regs[R_PC] + latency));
-    } else {
-        D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
-                    s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
-                    s->regs[R_PC] + latency, reg_d));
-    }
-
-    if (op == OP_VECTOUT) {
-        return 0;
-    }
-
-    /* store output for this cycle */
-    if (reg_d) {
-        uint32_t val = output_queue_remove(s);
-        D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
-        s->gp_regs[reg_d] = val;
-    }
-
-    output_queue_advance(s);
-
-    /* store op output */
-    if (op != OP_NOP) {
-        output_queue_insert(s, r, latency-1);
-    }
-
-    /* advance PC */
-    s->regs[R_PC]++;
-
-    return 1;
-};
-
-static void pfpu_start(MilkymistPFPUState *s)
-{
-    int x, y;
-    int i;
-
-    for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
-        for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
-            D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
-
-            /* set current position */
-            s->gp_regs[GPR_X] = x;
-            s->gp_regs[GPR_Y] = y;
-
-            /* run microcode on this position */
-            i = 0;
-            while (pfpu_decode_insn(s)) {
-                /* decode at most MICROCODE_WORDS instructions */
-                if (++i >= MICROCODE_WORDS) {
-                    error_report("milkymist_pfpu: too many instructions "
-                            "executed in microcode. No VECTOUT?");
-                    break;
-                }
-            }
-
-            /* reset pc for next run */
-            s->regs[R_PC] = 0;
-        }
-    }
-
-    s->regs[R_VERTICES] = x * y;
-
-    trace_milkymist_pfpu_pulse_irq();
-    qemu_irq_pulse(s->irq);
-}
-
-static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
-{
-    return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
-}
-
-static uint64_t pfpu_read(void *opaque, hwaddr addr,
-                          unsigned size)
-{
-    MilkymistPFPUState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTL:
-    case R_MESHBASE:
-    case R_HMESHLAST:
-    case R_VMESHLAST:
-    case R_CODEPAGE:
-    case R_VERTICES:
-    case R_COLLISIONS:
-    case R_STRAYWRITES:
-    case R_LASTDMA:
-    case R_PC:
-    case R_DREGBASE:
-    case R_CODEBASE:
-        r = s->regs[addr];
-        break;
-    case GPR_BEGIN ... GPR_END:
-        r = s->gp_regs[addr - GPR_BEGIN];
-        break;
-    case MICROCODE_BEGIN ...  MICROCODE_END:
-        r = s->microcode[get_microcode_address(s, addr)];
-        break;
-
-    default:
-        error_report("milkymist_pfpu: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_pfpu_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
-                       unsigned size)
-{
-    MilkymistPFPUState *s = opaque;
-
-    trace_milkymist_pfpu_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CTL:
-        if (value & CTL_START_BUSY) {
-            pfpu_start(s);
-        }
-        break;
-    case R_MESHBASE:
-    case R_HMESHLAST:
-    case R_VMESHLAST:
-    case R_CODEPAGE:
-    case R_VERTICES:
-    case R_COLLISIONS:
-    case R_STRAYWRITES:
-    case R_LASTDMA:
-    case R_PC:
-    case R_DREGBASE:
-    case R_CODEBASE:
-        s->regs[addr] = value;
-        break;
-    case GPR_BEGIN ...  GPR_END:
-        s->gp_regs[addr - GPR_BEGIN] = value;
-        break;
-    case MICROCODE_BEGIN ...  MICROCODE_END:
-        s->microcode[get_microcode_address(s, addr)] = value;
-        break;
-
-    default:
-        error_report("milkymist_pfpu: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-}
-
-static const MemoryRegionOps pfpu_mmio_ops = {
-    .read = pfpu_read,
-    .write = pfpu_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_pfpu_reset(DeviceState *d)
-{
-    MilkymistPFPUState *s = MILKYMIST_PFPU(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-    for (i = 0; i < 128; i++) {
-        s->gp_regs[i] = 0;
-    }
-    for (i = 0; i < MICROCODE_WORDS; i++) {
-        s->microcode[i] = 0;
-    }
-    s->output_queue_pos = 0;
-    for (i = 0; i < MAX_LATENCY; i++) {
-        s->output_queue[i] = 0;
-    }
-}
-
-static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    sysbus_init_irq(sbd, &s->irq);
-
-    memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
-            "milkymist-pfpu", MICROCODE_END * 4);
-    sysbus_init_mmio(sbd, &s->regs_region);
-}
-
-static const VMStateDescription vmstate_milkymist_pfpu = {
-    .name = "milkymist-pfpu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
-        VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
-        VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
-        VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
-        VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_pfpu_realize;
-    dc->reset = milkymist_pfpu_reset;
-    dc->vmsd = &vmstate_milkymist_pfpu;
-}
-
-static const TypeInfo milkymist_pfpu_info = {
-    .name          = TYPE_MILKYMIST_PFPU,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistPFPUState),
-    .class_init    = milkymist_pfpu_class_init,
-};
-
-static void milkymist_pfpu_register_types(void)
-{
-    type_register_static(&milkymist_pfpu_info);
-}
-
-type_init(milkymist_pfpu_register_types)
diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c
index 133399598fc..80683fed318 100644
--- a/hw/misc/mips_itu.c
+++ b/hw/misc/mips_itu.c
@@ -22,7 +22,6 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 #include "hw/misc/mips_itu.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/misc/mps2-scc.c b/hw/misc/mps2-scc.c
index c56aca86ad5..b3b42a792cd 100644
--- a/hw/misc/mps2-scc.c
+++ b/hw/misc/mps2-scc.c
@@ -23,6 +23,7 @@
 #include "qemu/bitops.h"
 #include "trace.h"
 #include "hw/sysbus.h"
+#include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "hw/registerfields.h"
 #include "hw/misc/mps2-scc.h"
@@ -186,10 +187,13 @@ static void mps2_scc_write(void *opaque, hwaddr offset, uint64_t value,
     switch (offset) {
     case A_CFG0:
         /*
-         * TODO on some boards bit 0 controls RAM remapping;
-         * on others bit 1 is CPU_WAIT.
+         * On some boards bit 0 controls board-specific remapping;
+         * we always reflect bit 0 in the 'remap' GPIO output line,
+         * and let the board wire it up or not as it chooses.
+         * TODO on some boards bit 1 is CPU_WAIT.
          */
         s->cfg0 = value;
+        qemu_set_irq(s->remap, s->cfg0 & 1);
         break;
     case A_CFG1:
         s->cfg1 = value;
@@ -283,7 +287,7 @@ static void mps2_scc_reset(DeviceState *dev)
     int i;
 
     trace_mps2_scc_reset();
-    s->cfg0 = 0;
+    s->cfg0 = s->cfg0_reset;
     s->cfg1 = 0;
     s->cfg2 = 0;
     s->cfg5 = 0;
@@ -308,6 +312,7 @@ static void mps2_scc_init(Object *obj)
 
     memory_region_init_io(&s->iomem, obj, &mps2_scc_ops, s, "mps2-scc", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
+    qdev_init_gpio_out_named(DEVICE(obj), &s->remap, "remap", 1);
 }
 
 static void mps2_scc_realize(DeviceState *dev, Error **errp)
@@ -353,6 +358,8 @@ static Property mps2_scc_properties[] = {
     DEFINE_PROP_UINT32("scc-cfg4", MPS2SCC, cfg4, 0),
     DEFINE_PROP_UINT32("scc-aid", MPS2SCC, aid, 0),
     DEFINE_PROP_UINT32("scc-id", MPS2SCC, id, 0),
+    /* Reset value for CFG0 register */
+    DEFINE_PROP_UINT32("scc-cfg0", MPS2SCC, cfg0_reset, 0),
     /*
      * These are the initial settings for the source clocks on the board.
      * In hardware they can be configured via a config file read by the
diff --git a/hw/misc/mst_fpga.c b/hw/misc/mst_fpga.c
index edfc35d5f0f..2aaadfa9668 100644
--- a/hw/misc/mst_fpga.c
+++ b/hw/misc/mst_fpga.c
@@ -222,7 +222,7 @@ static void mst_fpga_init(Object *obj)
     sysbus_init_mmio(sbd, &s->iomem);
 }
 
-static VMStateDescription vmstate_mst_fpga_regs = {
+static const VMStateDescription vmstate_mst_fpga_regs = {
     .name = "mainstone_fpga",
     .version_id = 0,
     .minimum_version_id = 0,
diff --git a/hw/misc/npcm7xx_clk.c b/hw/misc/npcm7xx_clk.c
index a1ee67dc9a1..0b61070c52f 100644
--- a/hw/misc/npcm7xx_clk.c
+++ b/hw/misc/npcm7xx_clk.c
@@ -35,7 +35,7 @@
 #define NPCM7XX_CLOCK_REF_HZ            (25000000)
 
 /* Register Field Definitions */
-#define NPCM7XX_CLK_WDRCR_CA9C  BIT(0) /* Cortex A9 Cores */
+#define NPCM7XX_CLK_WDRCR_CA9C  BIT(0) /* Cortex-A9 Cores */
 
 #define PLLCON_LOKI     BIT(31)
 #define PLLCON_LOKS     BIT(30)
diff --git a/hw/misc/pca9552.c b/hw/misc/pca9552.c
index b7686e27d7f..fff19e369a3 100644
--- a/hw/misc/pca9552.c
+++ b/hw/misc/pca9552.c
@@ -272,7 +272,7 @@ static void pca955x_get_led(Object *obj, Visitor *v, const char *name,
      * reading the INPUTx reg
      */
     reg = PCA9552_LS0 + led / 4;
-    state = (pca955x_read(s, reg) >> (led % 8)) & 0x3;
+    state = (pca955x_read(s, reg) >> ((led % 4) * 2)) & 0x3;
     visit_type_str(v, name, (char **)&led_state[state], errp);
 }
 
diff --git a/hw/misc/puv3_pm.c b/hw/misc/puv3_pm.c
deleted file mode 100644
index 676c23f7dbc..00000000000
--- a/hw/misc/puv3_pm.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Power Management device simulation in PKUnity SoC
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "qom/object.h"
-
-#undef DEBUG_PUV3
-#include "hw/unicore32/puv3.h"
-#include "qemu/module.h"
-#include "qemu/log.h"
-
-#define TYPE_PUV3_PM "puv3_pm"
-OBJECT_DECLARE_SIMPLE_TYPE(PUV3PMState, PUV3_PM)
-
-struct PUV3PMState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-
-    uint32_t reg_PMCR;
-    uint32_t reg_PCGR;
-    uint32_t reg_PLL_SYS_CFG;
-    uint32_t reg_PLL_DDR_CFG;
-    uint32_t reg_PLL_VGA_CFG;
-    uint32_t reg_DIVCFG;
-};
-
-static uint64_t puv3_pm_read(void *opaque, hwaddr offset,
-        unsigned size)
-{
-    PUV3PMState *s = opaque;
-    uint32_t ret = 0;
-
-    switch (offset) {
-    case 0x14:
-        ret = s->reg_PCGR;
-        break;
-    case 0x18:
-        ret = s->reg_PLL_SYS_CFG;
-        break;
-    case 0x1c:
-        ret = s->reg_PLL_DDR_CFG;
-        break;
-    case 0x20:
-        ret = s->reg_PLL_VGA_CFG;
-        break;
-    case 0x24:
-        ret = s->reg_DIVCFG;
-        break;
-    case 0x28: /* PLL SYS STATUS */
-        ret = 0x00002401;
-        break;
-    case 0x2c: /* PLL DDR STATUS */
-        ret = 0x00100c00;
-        break;
-    case 0x30: /* PLL VGA STATUS */
-        ret = 0x00003801;
-        break;
-    case 0x34: /* DIV STATUS */
-        ret = 0x22f52015;
-        break;
-    case 0x38: /* SW RESET */
-        ret = 0x0;
-        break;
-    case 0x44: /* PLL DFC DONE */
-        ret = 0x7;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad read offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, ret);
-
-    return ret;
-}
-
-static void puv3_pm_write(void *opaque, hwaddr offset,
-        uint64_t value, unsigned size)
-{
-    PUV3PMState *s = opaque;
-
-    switch (offset) {
-    case 0x0:
-        s->reg_PMCR = value;
-        break;
-    case 0x14:
-        s->reg_PCGR = value;
-        break;
-    case 0x18:
-        s->reg_PLL_SYS_CFG = value;
-        break;
-    case 0x1c:
-        s->reg_PLL_DDR_CFG = value;
-        break;
-    case 0x20:
-        s->reg_PLL_VGA_CFG = value;
-        break;
-    case 0x24:
-    case 0x38:
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad write offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
-}
-
-static const MemoryRegionOps puv3_pm_ops = {
-    .read = puv3_pm_read,
-    .write = puv3_pm_write,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void puv3_pm_realize(DeviceState *dev, Error **errp)
-{
-    PUV3PMState *s = PUV3_PM(dev);
-
-    s->reg_PCGR = 0x0;
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &puv3_pm_ops, s, "puv3_pm",
-            PUV3_REGS_OFFSET);
-    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
-}
-
-static void puv3_pm_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = puv3_pm_realize;
-}
-
-static const TypeInfo puv3_pm_info = {
-    .name = TYPE_PUV3_PM,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(PUV3PMState),
-    .class_init = puv3_pm_class_init,
-};
-
-static void puv3_pm_register_type(void)
-{
-    type_register_static(&puv3_pm_info);
-}
-
-type_init(puv3_pm_register_type)
diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c
index 27113abd6cf..7b66d58acc8 100644
--- a/hw/misc/pvpanic-isa.c
+++ b/hw/misc/pvpanic-isa.c
@@ -13,7 +13,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
 
diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c
index d629639d8f4..af8cbe28305 100644
--- a/hw/misc/pvpanic-pci.c
+++ b/hw/misc/pvpanic-pci.c
@@ -12,7 +12,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
 
diff --git a/hw/misc/sga.c b/hw/misc/sga.c
index 4dbe6d78f9e..1d04672b013 100644
--- a/hw/misc/sga.c
+++ b/hw/misc/sga.c
@@ -30,6 +30,7 @@
 #include "hw/loader.h"
 #include "qemu/module.h"
 #include "qom/object.h"
+#include "qemu/error-report.h"
 
 #define SGABIOS_FILENAME "sgabios.bin"
 
@@ -42,6 +43,7 @@ struct ISASGAState {
 
 static void sga_realizefn(DeviceState *dev, Error **errp)
 {
+    warn_report("-device sga is deprecated, use -machine graphics=off");
     rom_add_vga(SGABIOS_FILENAME);
 }
 
diff --git a/hw/misc/sifive_e_prci.c b/hw/misc/sifive_e_prci.c
index 8ec4ee4b41f..a8702c6a5d4 100644
--- a/hw/misc/sifive_e_prci.c
+++ b/hw/misc/sifive_e_prci.c
@@ -23,7 +23,6 @@
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
-#include "hw/hw.h"
 #include "hw/misc/sifive_e_prci.h"
 
 static uint64_t sifive_e_prci_read(void *opaque, hwaddr addr, unsigned int size)
diff --git a/hw/misc/sifive_test.c b/hw/misc/sifive_test.c
index 2deb2072cc8..56df45bfe59 100644
--- a/hw/misc/sifive_test.c
+++ b/hw/misc/sifive_test.c
@@ -24,7 +24,6 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
-#include "hw/hw.h"
 #include "hw/misc/sifive_test.h"
 
 static uint64_t sifive_test_read(void *opaque, hwaddr addr, unsigned int size)
diff --git a/hw/misc/sifive_u_otp.c b/hw/misc/sifive_u_otp.c
index 18aa0bd55d8..52fdb750c0e 100644
--- a/hw/misc/sifive_u_otp.c
+++ b/hw/misc/sifive_u_otp.c
@@ -209,7 +209,14 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp)
                           TYPE_SIFIVE_U_OTP, SIFIVE_U_OTP_REG_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
 
-    dinfo = drive_get_next(IF_NONE);
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!dinfo) {
+        dinfo = drive_get_next(IF_NONE);
+        if (dinfo) {
+            warn_report("using \"-drive if=none\" for the OTP is deprecated, "
+                        "use \"-drive if=pflash\" instead.");
+        }
+    }
     if (dinfo) {
         int ret;
         uint64_t perm;
@@ -235,14 +242,10 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp)
 
             if (blk_pread(s->blk, 0, s->fuse, filesize) != filesize) {
                 error_setg(errp, "failed to read the initial flash content");
+                return;
             }
         }
     }
-}
-
-static void sifive_u_otp_reset(DeviceState *dev)
-{
-    SiFiveUOTPState *s = SIFIVE_U_OTP(dev);
 
     /* Initialize all fuses' initial value to 0xFFs */
     memset(s->fuse, 0xff, sizeof(s->fuse));
@@ -259,13 +262,15 @@ static void sifive_u_otp_reset(DeviceState *dev)
         serial_data = s->serial;
         if (blk_pwrite(s->blk, index * SIFIVE_U_OTP_FUSE_WORD,
                        &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) {
-            error_report("write error index<%d>", index);
+            error_setg(errp, "failed to write index<%d>", index);
+            return;
         }
 
         serial_data = ~(s->serial);
         if (blk_pwrite(s->blk, (index + 1) * SIFIVE_U_OTP_FUSE_WORD,
                        &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) {
-            error_report("write error index<%d>", index + 1);
+            error_setg(errp, "failed to write index<%d>", index + 1);
+            return;
         }
     }
 
@@ -279,7 +284,6 @@ static void sifive_u_otp_class_init(ObjectClass *klass, void *data)
 
     device_class_set_props(dc, sifive_u_otp_properties);
     dc->realize = sifive_u_otp_realize;
-    dc->reset = sifive_u_otp_reset;
 }
 
 static const TypeInfo sifive_u_otp_info = {
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index d0a89eb0596..2da96d167a7 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # allwinner-cpucfg.c
 allwinner_cpucfg_cpu_reset(uint8_t cpu_id, uint32_t reset_addr) "id %u, reset_addr 0x%" PRIu32
@@ -67,16 +67,6 @@ slavio_sysctrl_mem_readl(uint32_t ret) "Read system control 0x%08x"
 slavio_led_mem_writew(uint32_t val) "Write diagnostic LED 0x%04x"
 slavio_led_mem_readw(uint32_t ret) "Read diagnostic LED 0x%04x"
 
-# milkymist-hpdmc.c
-milkymist_hpdmc_memory_read(uint32_t addr, uint32_t value) "addr=0x%08x value=0x%08x"
-milkymist_hpdmc_memory_write(uint32_t addr, uint32_t value) "addr=0x%08x value=0x%08x"
-
-# milkymist-pfpu.c
-milkymist_pfpu_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_pfpu_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_pfpu_vectout(uint32_t a, uint32_t b, uint32_t dma_ptr) "a 0x%08x b 0x%08x dma_ptr 0x%08x"
-milkymist_pfpu_pulse_irq(void) "Pulse IRQ"
-
 # aspeed_scu.c
 aspeed_scu_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
 
@@ -238,6 +228,7 @@ via1_rtc_cmd_pram_sect_write(int sector, int offset, int addr, int value) "secto
 via1_adb_send(const char *state, uint8_t data, const char *vadbint) "state %s data=0x%02x vADBInt=%s"
 via1_adb_receive(const char *state, uint8_t data, const char *vadbint, int status, int index, int size) "state %s data=0x%02x vADBInt=%s status=0x%x index=%d size=%d"
 via1_adb_poll(uint8_t data, const char *vadbint, int status, int index, int size) "data=0x%02x vADBInt=%s status=0x%x index=%d size=%d"
+via1_auxmode(int mode) "setting auxmode to %d"
 
 # grlib_ahb_apb_pnp.c
 grlib_ahb_pnp_read(uint64_t addr, uint32_t value) "AHB PnP read addr:0x%03"PRIx64" data:0x%08x"
diff --git a/hw/misc/virt_ctrl.c b/hw/misc/virt_ctrl.c
index 2ea01bd7a1f..e75d1e7e17b 100644
--- a/hw/misc/virt_ctrl.c
+++ b/hw/misc/virt_ctrl.c
@@ -1,11 +1,10 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Virt system Controller
  */
 
 #include "qemu/osdep.h"
-#include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index 5086e6b7ed2..8b702859618 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -269,6 +269,21 @@ static uint64_t zynq_slcr_compute_clock(const uint64_t periods[],
     zynq_slcr_compute_clock((plls), (state)->regs[reg], \
                             reg ## _ ## enable_field ## _SHIFT)
 
+static void zynq_slcr_compute_clocks_internal(ZynqSLCRState *s, uint64_t ps_clk)
+{
+    uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]);
+    uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]);
+    uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]);
+
+    uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll};
+
+    /* compute uartX reference clocks */
+    clock_set(s->uart0_ref_clk,
+              ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0));
+    clock_set(s->uart1_ref_clk,
+              ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1));
+}
+
 /**
  * Compute and set the ouputs clocks periods.
  * But do not propagate them further. Connected clocks
@@ -283,17 +298,7 @@ static void zynq_slcr_compute_clocks(ZynqSLCRState *s)
         ps_clk = 0;
     }
 
-    uint64_t io_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_IO_PLL_CTRL]);
-    uint64_t arm_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_ARM_PLL_CTRL]);
-    uint64_t ddr_pll = zynq_slcr_compute_pll(ps_clk, s->regs[R_DDR_PLL_CTRL]);
-
-    uint64_t uart_mux[4] = {io_pll, io_pll, arm_pll, ddr_pll};
-
-    /* compute uartX reference clocks */
-    clock_set(s->uart0_ref_clk,
-              ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT0));
-    clock_set(s->uart1_ref_clk,
-              ZYNQ_COMPUTE_CLK(s, uart_mux, R_UART_CLK_CTRL, CLKACT1));
+    zynq_slcr_compute_clocks_internal(s, ps_clk);
 }
 
 /**
@@ -416,7 +421,7 @@ static void zynq_slcr_reset_hold(Object *obj)
     ZynqSLCRState *s = ZYNQ_SLCR(obj);
 
     /* will disable all output clocks */
-    zynq_slcr_compute_clocks(s);
+    zynq_slcr_compute_clocks_internal(s, 0);
     zynq_slcr_propagate_clocks(s);
 }
 
@@ -425,7 +430,7 @@ static void zynq_slcr_reset_exit(Object *obj)
     ZynqSLCRState *s = ZYNQ_SLCR(obj);
 
     /* will compute output clocks according to ps_clk and registers */
-    zynq_slcr_compute_clocks(s);
+    zynq_slcr_compute_clocks_internal(s, clock_get(s->ps_clk));
     zynq_slcr_propagate_clocks(s);
 }
 
diff --git a/hw/moxie/Kconfig b/hw/moxie/Kconfig
deleted file mode 100644
index 3793ef03724..00000000000
--- a/hw/moxie/Kconfig
+++ /dev/null
@@ -1,3 +0,0 @@
-config MOXIESIM
-    bool
-    select SERIAL
diff --git a/hw/moxie/meson.build b/hw/moxie/meson.build
deleted file mode 100644
index 05a7c2e00fc..00000000000
--- a/hw/moxie/meson.build
+++ /dev/null
@@ -1,4 +0,0 @@
-moxie_ss = ss.source_set()
-moxie_ss.add(when: 'CONFIG_MOXIESIM', if_true: files('moxiesim.c'))
-
-hw_arch += {'moxie': moxie_ss}
diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c
deleted file mode 100644
index f7b57fcae19..00000000000
--- a/hw/moxie/moxiesim.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * QEMU/moxiesim emulation
- *
- * Emulates a very simple machine model similar to the one used by the
- * GDB moxie simulator.
- *
- * Copyright (c) 2008, 2009, 2010, 2013 Anthony Green
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "cpu.h"
-#include "hw/sysbus.h"
-#include "net/net.h"
-#include "sysemu/reset.h"
-#include "sysemu/sysemu.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "hw/char/serial.h"
-#include "exec/address-spaces.h"
-#include "elf.h"
-
-#define PHYS_MEM_BASE 0x80000000
-#define FIRMWARE_BASE 0x1000
-#define FIRMWARE_SIZE (128 * 0x1000)
-
-typedef struct {
-    uint64_t ram_size;
-    const char *kernel_filename;
-    const char *kernel_cmdline;
-    const char *initrd_filename;
-} LoaderParams;
-
-static void load_kernel(MoxieCPU *cpu, LoaderParams *loader_params)
-{
-    uint64_t entry, kernel_high;
-    int64_t initrd_size;
-    long kernel_size;
-    ram_addr_t initrd_offset;
-
-    kernel_size = load_elf(loader_params->kernel_filename,  NULL, NULL, NULL,
-                           &entry, NULL, &kernel_high, NULL, 1, EM_MOXIE,
-                           0, 0);
-
-    if (kernel_size <= 0) {
-        error_report("could not load kernel '%s'",
-                     loader_params->kernel_filename);
-        exit(1);
-    }
-
-    /* load initrd */
-    initrd_size = 0;
-    initrd_offset = 0;
-    if (loader_params->initrd_filename) {
-        initrd_size = get_image_size(loader_params->initrd_filename);
-        if (initrd_size > 0) {
-            initrd_offset = (kernel_high + ~TARGET_PAGE_MASK)
-              & TARGET_PAGE_MASK;
-            if (initrd_offset + initrd_size > loader_params->ram_size) {
-                error_report("memory too small for initial ram disk '%s'",
-                             loader_params->initrd_filename);
-                exit(1);
-            }
-            initrd_size = load_image_targphys(loader_params->initrd_filename,
-                                              initrd_offset,
-                                              loader_params->ram_size);
-        }
-        if (initrd_size == (target_ulong)-1) {
-            error_report("could not load initial ram disk '%s'",
-                         loader_params->initrd_filename);
-            exit(1);
-        }
-    }
-}
-
-static void main_cpu_reset(void *opaque)
-{
-    MoxieCPU *cpu = opaque;
-
-    cpu_reset(CPU(cpu));
-}
-
-static void moxiesim_init(MachineState *machine)
-{
-    MoxieCPU *cpu = NULL;
-    ram_addr_t ram_size = machine->ram_size;
-    const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
-    const char *initrd_filename = machine->initrd_filename;
-    CPUMoxieState *env;
-    MemoryRegion *address_space_mem = get_system_memory();
-    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    MemoryRegion *rom = g_new(MemoryRegion, 1);
-    hwaddr ram_base = 0x200000;
-    LoaderParams loader_params;
-
-    /* Init CPUs. */
-    cpu = MOXIE_CPU(cpu_create(machine->cpu_type));
-    env = &cpu->env;
-
-    qemu_register_reset(main_cpu_reset, cpu);
-
-    /* Allocate RAM. */
-    memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal);
-    memory_region_add_subregion(address_space_mem, ram_base, ram);
-
-    memory_region_init_ram(rom, NULL, "moxie.rom", FIRMWARE_SIZE, &error_fatal);
-    memory_region_add_subregion(get_system_memory(), FIRMWARE_BASE, rom);
-
-    if (kernel_filename) {
-        loader_params.ram_size = ram_size;
-        loader_params.kernel_filename = kernel_filename;
-        loader_params.kernel_cmdline = kernel_cmdline;
-        loader_params.initrd_filename = initrd_filename;
-        load_kernel(cpu, &loader_params);
-    }
-    if (machine->firmware) {
-        if (load_image_targphys(machine->firmware, FIRMWARE_BASE, FIRMWARE_SIZE) < 0) {
-            error_report("Failed to load firmware '%s'", machine->firmware);
-        }
-    }
-
-    /* A single 16450 sits at offset 0x3f8.  */
-    if (serial_hd(0)) {
-        serial_mm_init(address_space_mem, 0x3f8, 0, env->irq[4],
-                       8000000/16, serial_hd(0), DEVICE_LITTLE_ENDIAN);
-    }
-}
-
-static void moxiesim_machine_init(MachineClass *mc)
-{
-    mc->desc = "Moxie simulator platform";
-    mc->init = moxiesim_init;
-    mc->is_default = true;
-    mc->default_cpu_type = MOXIE_CPU_TYPE_NAME("MoxieLite");
-}
-
-DEFINE_MACHINE("moxiesim", moxiesim_machine_init)
diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c
index 42d2f99dfb1..34eea684ced 100644
--- a/hw/net/can/can_sja1000.c
+++ b/hw/net/can/can_sja1000.c
@@ -275,6 +275,10 @@ static void buff2frame_pel(const uint8_t *buff, qemu_can_frame *frame)
     }
     frame->can_dlc = buff[0] & 0x0f;
 
+    if (frame->can_dlc > 8) {
+        frame->can_dlc = 8;
+    }
+
     if (buff[0] & 0x80) { /* Extended */
         frame->can_id |= QEMU_CAN_EFF_FLAG;
         frame->can_id |= buff[1] << 21; /* ID.28~ID.21 */
@@ -311,6 +315,10 @@ static void buff2frame_bas(const uint8_t *buff, qemu_can_frame *frame)
     }
     frame->can_dlc = buff[1] & 0x0f;
 
+    if (frame->can_dlc > 8) {
+        frame->can_dlc = 8;
+    }
+
     for (i = 0; i < frame->can_dlc; i++) {
         frame->data[i] = buff[2 + i];
     }
diff --git a/hw/net/can/xlnx-zynqmp-can.c b/hw/net/can/xlnx-zynqmp-can.c
index affa21a5ed3..22bb8910fa8 100644
--- a/hw/net/can/xlnx-zynqmp-can.c
+++ b/hw/net/can/xlnx-zynqmp-can.c
@@ -37,7 +37,6 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qemu/cutils.h"
-#include "sysemu/sysemu.h"
 #include "migration/vmstate.h"
 #include "hw/qdev-properties.h"
 #include "net/can_emu.h"
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
index 533a8304d0b..45b954e46c2 100644
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@@ -28,15 +28,9 @@
 #include "qemu/timer.h"
 #include <zlib.h>
 #include "qom/object.h"
+#include "trace.h"
 
-//#define DEBUG_SONIC
-
-#define SONIC_PROM_SIZE 0x1000
-
-#ifdef DEBUG_SONIC
-#define DPRINTF(fmt, ...) \
-do { printf("sonic: " fmt , ##  __VA_ARGS__); } while (0)
-static const char* reg_names[] = {
+static const char *reg_names[] = {
     "CR", "DCR", "RCR", "TCR", "IMR", "ISR", "UTDA", "CTDA",
     "TPS", "TFC", "TSA0", "TSA1", "TFS", "URDA", "CRDA", "CRBA0",
     "CRBA1", "RBWC0", "RBWC1", "EOBC", "URRA", "RSA", "REA", "RRP",
@@ -45,12 +39,6 @@ static const char* reg_names[] = {
     "SR", "WT0", "WT1", "RSC", "CRCT", "FAET", "MPT", "MDT",
     "0x30", "0x31", "0x32", "0x33", "0x34", "0x35", "0x36", "0x37",
     "0x38", "0x39", "0x3a", "0x3b", "0x3c", "0x3d", "0x3e", "DCR2" };
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
-
-#define SONIC_ERROR(fmt, ...) \
-do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
 
 #define SONIC_CR     0x00
 #define SONIC_DCR    0x01
@@ -97,6 +85,7 @@ do { printf("sonic ERROR: %s: " fmt, __func__ , ## __VA_ARGS__); } while (0)
 #define SONIC_MPT    0x2e
 #define SONIC_MDT    0x2f
 #define SONIC_DCR2   0x3f
+#define SONIC_REG_COUNT  0x40
 
 #define SONIC_CR_HTX     0x0001
 #define SONIC_CR_TXP     0x0002
@@ -161,23 +150,19 @@ struct dp8393xState {
     bool big_endian;
     bool last_rba_is_full;
     qemu_irq irq;
-#ifdef DEBUG_SONIC
     int irq_level;
-#endif
     QEMUTimer *watchdog;
     int64_t wt_last_update;
     NICConf conf;
     NICState *nic;
     MemoryRegion mmio;
-    MemoryRegion prom;
 
     /* Registers */
-    uint8_t cam[16][6];
-    uint16_t regs[0x40];
+    uint16_t cam[16][3];
+    uint16_t regs[SONIC_REG_COUNT];
 
     /* Temporaries */
     uint8_t tx_buffer[0x10000];
-    uint16_t data[12];
     int loopback_packet;
 
     /* Memory access */
@@ -185,7 +170,8 @@ struct dp8393xState {
     AddressSpace as;
 };
 
-/* Accessor functions for values which are formed by
+/*
+ * Accessor functions for values which are formed by
  * concatenating two 16 bit device registers. By putting these
  * in their own functions with a uint32_t return type we avoid the
  * pitfall of implicit sign extension where ((x << 16) | y) is a
@@ -233,34 +219,48 @@ static uint32_t dp8393x_wt(dp8393xState *s)
     return s->regs[SONIC_WT1] << 16 | s->regs[SONIC_WT0];
 }
 
-static uint16_t dp8393x_get(dp8393xState *s, int width, int offset)
+static uint16_t dp8393x_get(dp8393xState *s, hwaddr addr, int offset)
 {
+    const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
     uint16_t val;
 
-    if (s->big_endian) {
-        val = be16_to_cpu(s->data[offset * width + width - 1]);
+    if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
+        addr += offset << 2;
+        if (s->big_endian) {
+            val = address_space_ldl_be(&s->as, addr, attrs, NULL);
+        } else {
+            val = address_space_ldl_le(&s->as, addr, attrs, NULL);
+        }
     } else {
-        val = le16_to_cpu(s->data[offset * width]);
+        addr += offset << 1;
+        if (s->big_endian) {
+            val = address_space_lduw_be(&s->as, addr, attrs, NULL);
+        } else {
+            val = address_space_lduw_le(&s->as, addr, attrs, NULL);
+        }
     }
+
     return val;
 }
 
-static void dp8393x_put(dp8393xState *s, int width, int offset,
-                        uint16_t val)
+static void dp8393x_put(dp8393xState *s,
+                        hwaddr addr, int offset, uint16_t val)
 {
-    if (s->big_endian) {
-        if (width == 2) {
-            s->data[offset * 2] = 0;
-            s->data[offset * 2 + 1] = cpu_to_be16(val);
+    const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
+
+    if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
+        addr += offset << 2;
+        if (s->big_endian) {
+            address_space_stl_be(&s->as, addr, val, attrs, NULL);
         } else {
-            s->data[offset] = cpu_to_be16(val);
+            address_space_stl_le(&s->as, addr, val, attrs, NULL);
         }
     } else {
-        if (width == 2) {
-            s->data[offset * 2] = cpu_to_le16(val);
-            s->data[offset * 2 + 1] = 0;
+        addr += offset << 1;
+        if (s->big_endian) {
+            address_space_stw_be(&s->as, addr, val, attrs, NULL);
         } else {
-            s->data[offset] = cpu_to_le16(val);
+            address_space_stw_le(&s->as, addr, val, attrs, NULL);
         }
     }
 }
@@ -269,16 +269,14 @@ static void dp8393x_update_irq(dp8393xState *s)
 {
     int level = (s->regs[SONIC_IMR] & s->regs[SONIC_ISR]) ? 1 : 0;
 
-#ifdef DEBUG_SONIC
     if (level != s->irq_level) {
         s->irq_level = level;
         if (level) {
-            DPRINTF("raise irq, isr is 0x%04x\n", s->regs[SONIC_ISR]);
+            trace_dp8393x_raise_irq(s->regs[SONIC_ISR]);
         } else {
-            DPRINTF("lower irq\n");
+            trace_dp8393x_lower_irq();
         }
     }
-#endif
 
     qemu_set_irq(s->irq, level);
 }
@@ -286,35 +284,29 @@ static void dp8393x_update_irq(dp8393xState *s)
 static void dp8393x_do_load_cam(dp8393xState *s)
 {
     int width, size;
-    uint16_t index = 0;
+    uint16_t index;
 
     width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
     size = sizeof(uint16_t) * 4 * width;
 
     while (s->regs[SONIC_CDC] & 0x1f) {
         /* Fill current entry */
-        address_space_read(&s->as, dp8393x_cdp(s),
-                           MEMTXATTRS_UNSPECIFIED, s->data, size);
-        s->cam[index][0] = dp8393x_get(s, width, 1) & 0xff;
-        s->cam[index][1] = dp8393x_get(s, width, 1) >> 8;
-        s->cam[index][2] = dp8393x_get(s, width, 2) & 0xff;
-        s->cam[index][3] = dp8393x_get(s, width, 2) >> 8;
-        s->cam[index][4] = dp8393x_get(s, width, 3) & 0xff;
-        s->cam[index][5] = dp8393x_get(s, width, 3) >> 8;
-        DPRINTF("load cam[%d] with %02x%02x%02x%02x%02x%02x\n", index,
-            s->cam[index][0], s->cam[index][1], s->cam[index][2],
-            s->cam[index][3], s->cam[index][4], s->cam[index][5]);
+        index = dp8393x_get(s, dp8393x_cdp(s), 0) & 0xf;
+        s->cam[index][0] = dp8393x_get(s, dp8393x_cdp(s), 1);
+        s->cam[index][1] = dp8393x_get(s, dp8393x_cdp(s), 2);
+        s->cam[index][2] = dp8393x_get(s, dp8393x_cdp(s), 3);
+        trace_dp8393x_load_cam(index,
+                               s->cam[index][0] >> 8, s->cam[index][0] & 0xff,
+                               s->cam[index][1] >> 8, s->cam[index][1] & 0xff,
+                               s->cam[index][2] >> 8, s->cam[index][2] & 0xff);
         /* Move to next entry */
         s->regs[SONIC_CDC]--;
         s->regs[SONIC_CDP] += size;
-        index++;
     }
 
     /* Read CAM enable */
-    address_space_read(&s->as, dp8393x_cdp(s),
-                       MEMTXATTRS_UNSPECIFIED, s->data, size);
-    s->regs[SONIC_CE] = dp8393x_get(s, width, 0);
-    DPRINTF("load cam done. cam enable mask 0x%04x\n", s->regs[SONIC_CE]);
+    s->regs[SONIC_CE] = dp8393x_get(s, dp8393x_cdp(s), 0);
+    trace_dp8393x_load_cam_done(s->regs[SONIC_CE]);
 
     /* Done */
     s->regs[SONIC_CR] &= ~SONIC_CR_LCAM;
@@ -329,17 +321,14 @@ static void dp8393x_do_read_rra(dp8393xState *s)
     /* Read memory */
     width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
     size = sizeof(uint16_t) * 4 * width;
-    address_space_read(&s->as, dp8393x_rrp(s),
-                       MEMTXATTRS_UNSPECIFIED, s->data, size);
 
     /* Update SONIC registers */
-    s->regs[SONIC_CRBA0] = dp8393x_get(s, width, 0);
-    s->regs[SONIC_CRBA1] = dp8393x_get(s, width, 1);
-    s->regs[SONIC_RBWC0] = dp8393x_get(s, width, 2);
-    s->regs[SONIC_RBWC1] = dp8393x_get(s, width, 3);
-    DPRINTF("CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x\n",
-        s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1],
-        s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]);
+    s->regs[SONIC_CRBA0] = dp8393x_get(s, dp8393x_rrp(s), 0);
+    s->regs[SONIC_CRBA1] = dp8393x_get(s, dp8393x_rrp(s), 1);
+    s->regs[SONIC_RBWC0] = dp8393x_get(s, dp8393x_rrp(s), 2);
+    s->regs[SONIC_RBWC1] = dp8393x_get(s, dp8393x_rrp(s), 3);
+    trace_dp8393x_read_rra_regs(s->regs[SONIC_CRBA0], s->regs[SONIC_CRBA1],
+                                s->regs[SONIC_RBWC0], s->regs[SONIC_RBWC1]);
 
     /* Go to next entry */
     s->regs[SONIC_RRP] += size;
@@ -350,8 +339,7 @@ static void dp8393x_do_read_rra(dp8393xState *s)
     }
 
     /* Warn the host if CRBA now has the last available resource */
-    if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP])
-    {
+    if (s->regs[SONIC_RRP] == s->regs[SONIC_RWP]) {
         s->regs[SONIC_ISR] |= SONIC_ISR_RBE;
         dp8393x_update_irq(s);
     }
@@ -364,7 +352,8 @@ static void dp8393x_do_software_reset(dp8393xState *s)
 {
     timer_del(s->watchdog);
 
-    s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP | SONIC_CR_HTX);
+    s->regs[SONIC_CR] &= ~(SONIC_CR_LCAM | SONIC_CR_RRRA | SONIC_CR_TXP |
+                           SONIC_CR_HTX);
     s->regs[SONIC_CR] |= SONIC_CR_RST | SONIC_CR_RXDIS;
 }
 
@@ -433,28 +422,22 @@ static void dp8393x_do_receiver_disable(dp8393xState *s)
 static void dp8393x_do_transmit_packets(dp8393xState *s)
 {
     NetClientState *nc = qemu_get_queue(s->nic);
-    int width, size;
     int tx_len, len;
     uint16_t i;
 
-    width = (s->regs[SONIC_DCR] & SONIC_DCR_DW) ? 2 : 1;
-
     while (1) {
         /* Read memory */
-        size = sizeof(uint16_t) * 6 * width;
         s->regs[SONIC_TTDA] = s->regs[SONIC_CTDA];
-        DPRINTF("Transmit packet at %08x\n", dp8393x_ttda(s));
-        address_space_read(&s->as, dp8393x_ttda(s) + sizeof(uint16_t) * width,
-                           MEMTXATTRS_UNSPECIFIED, s->data, size);
+        trace_dp8393x_transmit_packet(dp8393x_ttda(s));
         tx_len = 0;
 
         /* Update registers */
-        s->regs[SONIC_TCR] = dp8393x_get(s, width, 0) & 0xf000;
-        s->regs[SONIC_TPS] = dp8393x_get(s, width, 1);
-        s->regs[SONIC_TFC] = dp8393x_get(s, width, 2);
-        s->regs[SONIC_TSA0] = dp8393x_get(s, width, 3);
-        s->regs[SONIC_TSA1] = dp8393x_get(s, width, 4);
-        s->regs[SONIC_TFS] = dp8393x_get(s, width, 5);
+        s->regs[SONIC_TCR] = dp8393x_get(s, dp8393x_ttda(s), 1) & 0xf000;
+        s->regs[SONIC_TPS] = dp8393x_get(s, dp8393x_ttda(s), 2);
+        s->regs[SONIC_TFC] = dp8393x_get(s, dp8393x_ttda(s), 3);
+        s->regs[SONIC_TSA0] = dp8393x_get(s, dp8393x_ttda(s), 4);
+        s->regs[SONIC_TSA1] = dp8393x_get(s, dp8393x_ttda(s), 5);
+        s->regs[SONIC_TFS] = dp8393x_get(s, dp8393x_ttda(s), 6);
 
         /* Handle programmable interrupt */
         if (s->regs[SONIC_TCR] & SONIC_TCR_PINT) {
@@ -476,27 +459,26 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
             i++;
             if (i != s->regs[SONIC_TFC]) {
                 /* Read next fragment details */
-                size = sizeof(uint16_t) * 3 * width;
-                address_space_read(&s->as,
-                                   dp8393x_ttda(s)
-                                   + sizeof(uint16_t) * width * (4 + 3 * i),
-                                   MEMTXATTRS_UNSPECIFIED, s->data,
-                                   size);
-                s->regs[SONIC_TSA0] = dp8393x_get(s, width, 0);
-                s->regs[SONIC_TSA1] = dp8393x_get(s, width, 1);
-                s->regs[SONIC_TFS] = dp8393x_get(s, width, 2);
+                s->regs[SONIC_TSA0] = dp8393x_get(s, dp8393x_ttda(s),
+                                                  4 + 3 * i);
+                s->regs[SONIC_TSA1] = dp8393x_get(s, dp8393x_ttda(s),
+                                                  5 + 3 * i);
+                s->regs[SONIC_TFS] = dp8393x_get(s, dp8393x_ttda(s),
+                                                 6 + 3 * i);
             }
         }
 
         /* Handle Ethernet checksum */
         if (!(s->regs[SONIC_TCR] & SONIC_TCR_CRCI)) {
-            /* Don't append FCS there, to look like slirp packets
-             * which don't have one */
+            /*
+             * Don't append FCS there, to look like slirp packets
+             * which don't have one
+             */
         } else {
             /* Remove existing FCS */
             tx_len -= 4;
             if (tx_len < 0) {
-                SONIC_ERROR("tx_len is %d\n", tx_len);
+                trace_dp8393x_transmit_txlen_error(tx_len);
                 break;
             }
         }
@@ -515,22 +497,12 @@ static void dp8393x_do_transmit_packets(dp8393xState *s)
         s->regs[SONIC_TCR] |= SONIC_TCR_PTX;
 
         /* Write status */
-        dp8393x_put(s, width, 0,
-                    s->regs[SONIC_TCR] & 0x0fff); /* status */
-        size = sizeof(uint16_t) * width;
-        address_space_write(&s->as, dp8393x_ttda(s),
-                            MEMTXATTRS_UNSPECIFIED, s->data, size);
+        dp8393x_put(s, dp8393x_ttda(s), 0, s->regs[SONIC_TCR] & 0x0fff);
 
         if (!(s->regs[SONIC_CR] & SONIC_CR_HTX)) {
             /* Read footer of packet */
-            size = sizeof(uint16_t) * width;
-            address_space_read(&s->as,
-                               dp8393x_ttda(s)
-                               + sizeof(uint16_t) * width
-                                 * (4 + 3 * s->regs[SONIC_TFC]),
-                               MEMTXATTRS_UNSPECIFIED, s->data,
-                               size);
-            s->regs[SONIC_CTDA] = dp8393x_get(s, width, 0);
+            s->regs[SONIC_CTDA] = dp8393x_get(s, dp8393x_ttda(s),
+                                              4 + 3 * s->regs[SONIC_TFC]);
             if (s->regs[SONIC_CTDA] & SONIC_DESC_EOL) {
                 /* EOL detected */
                 break;
@@ -558,26 +530,34 @@ static void dp8393x_do_command(dp8393xState *s, uint16_t command)
 
     s->regs[SONIC_CR] |= (command & SONIC_CR_MASK);
 
-    if (command & SONIC_CR_HTX)
+    if (command & SONIC_CR_HTX) {
         dp8393x_do_halt_transmission(s);
-    if (command & SONIC_CR_TXP)
+    }
+    if (command & SONIC_CR_TXP) {
         dp8393x_do_transmit_packets(s);
-    if (command & SONIC_CR_RXDIS)
+    }
+    if (command & SONIC_CR_RXDIS) {
         dp8393x_do_receiver_disable(s);
-    if (command & SONIC_CR_RXEN)
+    }
+    if (command & SONIC_CR_RXEN) {
         dp8393x_do_receiver_enable(s);
-    if (command & SONIC_CR_STP)
+    }
+    if (command & SONIC_CR_STP) {
         dp8393x_do_stop_timer(s);
-    if (command & SONIC_CR_ST)
+    }
+    if (command & SONIC_CR_ST) {
         dp8393x_do_start_timer(s);
-    if (command & SONIC_CR_RST)
+    }
+    if (command & SONIC_CR_RST) {
         dp8393x_do_software_reset(s);
+    }
     if (command & SONIC_CR_RRRA) {
         dp8393x_do_read_rra(s);
         s->regs[SONIC_CR] &= ~SONIC_CR_RRRA;
     }
-    if (command & SONIC_CR_LCAM)
+    if (command & SONIC_CR_LCAM) {
         dp8393x_do_load_cam(s);
+    }
 }
 
 static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
@@ -587,110 +567,108 @@ static uint64_t dp8393x_read(void *opaque, hwaddr addr, unsigned int size)
     uint16_t val = 0;
 
     switch (reg) {
-        /* Update data before reading it */
-        case SONIC_WT0:
-        case SONIC_WT1:
-            dp8393x_update_wt_regs(s);
-            val = s->regs[reg];
-            break;
-        /* Accept read to some registers only when in reset mode */
-        case SONIC_CAP2:
-        case SONIC_CAP1:
-        case SONIC_CAP0:
-            if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-                val = s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg) + 1] << 8;
-                val |= s->cam[s->regs[SONIC_CEP] & 0xf][2* (SONIC_CAP0 - reg)];
-            }
-            break;
-        /* All other registers have no special contrainst */
-        default:
-            val = s->regs[reg];
+    /* Update data before reading it */
+    case SONIC_WT0:
+    case SONIC_WT1:
+        dp8393x_update_wt_regs(s);
+        val = s->regs[reg];
+        break;
+    /* Accept read to some registers only when in reset mode */
+    case SONIC_CAP2:
+    case SONIC_CAP1:
+    case SONIC_CAP0:
+        if (s->regs[SONIC_CR] & SONIC_CR_RST) {
+            val = s->cam[s->regs[SONIC_CEP] & 0xf][SONIC_CAP0 - reg];
+        }
+        break;
+    /* All other registers have no special contraints */
+    default:
+        val = s->regs[reg];
     }
 
-    DPRINTF("read 0x%04x from reg %s\n", val, reg_names[reg]);
+    trace_dp8393x_read(reg, reg_names[reg], val, size);
 
-    return s->big_endian ? val << 16 : val;
+    return val;
 }
 
-static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
+static void dp8393x_write(void *opaque, hwaddr addr, uint64_t val,
                           unsigned int size)
 {
     dp8393xState *s = opaque;
     int reg = addr >> s->it_shift;
-    uint32_t val = s->big_endian ? data >> 16 : data;
 
-    DPRINTF("write 0x%04x to reg %s\n", (uint16_t)val, reg_names[reg]);
+    trace_dp8393x_write(reg, reg_names[reg], val, size);
 
     switch (reg) {
-        /* Command register */
-        case SONIC_CR:
-            dp8393x_do_command(s, val);
-            break;
-        /* Prevent write to read-only registers */
-        case SONIC_CAP2:
-        case SONIC_CAP1:
-        case SONIC_CAP0:
-        case SONIC_SR:
-        case SONIC_MDT:
-            DPRINTF("writing to reg %d invalid\n", reg);
-            break;
-        /* Accept write to some registers only when in reset mode */
-        case SONIC_DCR:
-            if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-                s->regs[reg] = val & 0xbfff;
-            } else {
-                DPRINTF("writing to DCR invalid\n");
-            }
-            break;
-        case SONIC_DCR2:
-            if (s->regs[SONIC_CR] & SONIC_CR_RST) {
-                s->regs[reg] = val & 0xf017;
-            } else {
-                DPRINTF("writing to DCR2 invalid\n");
-            }
-            break;
-        /* 12 lower bytes are Read Only */
-        case SONIC_TCR:
-            s->regs[reg] = val & 0xf000;
-            break;
-        /* 9 lower bytes are Read Only */
-        case SONIC_RCR:
-            s->regs[reg] = val & 0xffe0;
-            break;
-        /* Ignore most significant bit */
-        case SONIC_IMR:
-            s->regs[reg] = val & 0x7fff;
-            dp8393x_update_irq(s);
-            break;
-        /* Clear bits by writing 1 to them */
-        case SONIC_ISR:
-            val &= s->regs[reg];
-            s->regs[reg] &= ~val;
-            if (val & SONIC_ISR_RBE) {
-                dp8393x_do_read_rra(s);
-            }
-            dp8393x_update_irq(s);
-            break;
-        /* The guest is required to store aligned pointers here */
-        case SONIC_RSA:
-        case SONIC_REA:
-        case SONIC_RRP:
-        case SONIC_RWP:
-            if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
-                s->regs[reg] = val & 0xfffc;
-            } else {
-                s->regs[reg] = val & 0xfffe;
-            }
-            break;
-        /* Invert written value for some registers */
-        case SONIC_CRCT:
-        case SONIC_FAET:
-        case SONIC_MPT:
-            s->regs[reg] = val ^ 0xffff;
-            break;
-        /* All other registers have no special contrainst */
-        default:
-            s->regs[reg] = val;
+    /* Command register */
+    case SONIC_CR:
+        dp8393x_do_command(s, val);
+        break;
+    /* Prevent write to read-only registers */
+    case SONIC_CAP2:
+    case SONIC_CAP1:
+    case SONIC_CAP0:
+    case SONIC_SR:
+    case SONIC_MDT:
+        trace_dp8393x_write_invalid(reg);
+        break;
+    /* Accept write to some registers only when in reset mode */
+    case SONIC_DCR:
+        if (s->regs[SONIC_CR] & SONIC_CR_RST) {
+            s->regs[reg] = val & 0xbfff;
+        } else {
+            trace_dp8393x_write_invalid_dcr("DCR");
+        }
+        break;
+    case SONIC_DCR2:
+        if (s->regs[SONIC_CR] & SONIC_CR_RST) {
+            s->regs[reg] = val & 0xf017;
+        } else {
+            trace_dp8393x_write_invalid_dcr("DCR2");
+        }
+        break;
+    /* 12 lower bytes are Read Only */
+    case SONIC_TCR:
+        s->regs[reg] = val & 0xf000;
+        break;
+    /* 9 lower bytes are Read Only */
+    case SONIC_RCR:
+        s->regs[reg] = val & 0xffe0;
+        break;
+    /* Ignore most significant bit */
+    case SONIC_IMR:
+        s->regs[reg] = val & 0x7fff;
+        dp8393x_update_irq(s);
+        break;
+    /* Clear bits by writing 1 to them */
+    case SONIC_ISR:
+        val &= s->regs[reg];
+        s->regs[reg] &= ~val;
+        if (val & SONIC_ISR_RBE) {
+            dp8393x_do_read_rra(s);
+        }
+        dp8393x_update_irq(s);
+        break;
+    /* The guest is required to store aligned pointers here */
+    case SONIC_RSA:
+    case SONIC_REA:
+    case SONIC_RRP:
+    case SONIC_RWP:
+        if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
+            s->regs[reg] = val & 0xfffc;
+        } else {
+            s->regs[reg] = val & 0xfffe;
+        }
+        break;
+    /* Invert written value for some registers */
+    case SONIC_CRCT:
+    case SONIC_FAET:
+    case SONIC_MPT:
+        s->regs[reg] = val ^ 0xffff;
+        break;
+    /* All other registers have no special contrainst */
+    default:
+        s->regs[reg] = val;
     }
 
     if (reg == SONIC_WT0 || reg == SONIC_WT1) {
@@ -698,11 +676,16 @@ static void dp8393x_write(void *opaque, hwaddr addr, uint64_t data,
     }
 }
 
+/*
+ * Since .impl.max_access_size is effectively controlled by the it_shift
+ * property, leave it unspecified for now to allow the memory API to
+ * correctly zero extend the 16-bit register values to the access size up to and
+ * including it_shift.
+ */
 static const MemoryRegionOps dp8393x_ops = {
     .read = dp8393x_read,
     .write = dp8393x_write,
-    .impl.min_access_size = 4,
-    .impl.max_access_size = 4,
+    .impl.min_access_size = 2,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
@@ -747,17 +730,18 @@ static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
     }
 
     /* Check broadcast */
-    if ((s->regs[SONIC_RCR] & SONIC_RCR_BRD) && !memcmp(buf, bcast, sizeof(bcast))) {
+    if ((s->regs[SONIC_RCR] & SONIC_RCR_BRD) &&
+         !memcmp(buf, bcast, sizeof(bcast))) {
         return SONIC_RCR_BC;
     }
 
     /* Check CAM */
     for (i = 0; i < 16; i++) {
         if (s->regs[SONIC_CE] & (1 << i)) {
-             /* Entry enabled */
-             if (!memcmp(buf, s->cam[i], sizeof(s->cam[i]))) {
-                 return 0;
-             }
+            /* Entry enabled */
+            if (!memcmp(buf, s->cam[i], sizeof(s->cam[i]))) {
+                return 0;
+            }
         }
     }
 
@@ -770,7 +754,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
     dp8393xState *s = qemu_get_nic_opaque(nc);
     int packet_type;
     uint32_t available, address;
-    int width, rx_len, padded_len;
+    int rx_len, padded_len;
     uint32_t checksum;
     int size;
 
@@ -783,15 +767,13 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
 
     rx_len = pkt_size + sizeof(checksum);
     if (s->regs[SONIC_DCR] & SONIC_DCR_DW) {
-        width = 2;
         padded_len = ((rx_len - 1) | 3) + 1;
     } else {
-        width = 1;
         padded_len = ((rx_len - 1) | 1) + 1;
     }
 
     if (padded_len > dp8393x_rbwc(s) * 2) {
-        DPRINTF("oversize packet, pkt_size is %d\n", pkt_size);
+        trace_dp8393x_receive_oversize(pkt_size);
         s->regs[SONIC_ISR] |= SONIC_ISR_RBAE;
         dp8393x_update_irq(s);
         s->regs[SONIC_RCR] |= SONIC_RCR_LPKT;
@@ -800,18 +782,14 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
 
     packet_type = dp8393x_receive_filter(s, buf, pkt_size);
     if (packet_type < 0) {
-        DPRINTF("packet not for netcard\n");
+        trace_dp8393x_receive_not_netcard();
         return -1;
     }
 
     /* Check for EOL */
     if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
         /* Are we still in resource exhaustion? */
-        size = sizeof(uint16_t) * 1 * width;
-        address = dp8393x_crda(s) + sizeof(uint16_t) * 5 * width;
-        address_space_read(&s->as, address, MEMTXATTRS_UNSPECIFIED,
-                           s->data, size);
-        s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
+        s->regs[SONIC_LLFA] = dp8393x_get(s, dp8393x_crda(s), 5);
         if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
             /* Still EOL ; stop reception */
             return -1;
@@ -819,11 +797,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
         /* Link has been updated by host */
 
         /* Clear in_use */
-        size = sizeof(uint16_t) * width;
-        address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
-        dp8393x_put(s, width, 0, 0);
-        address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
-                         (uint8_t *)s->data, size, 1);
+        dp8393x_put(s, dp8393x_crda(s), 6, 0x0000);
 
         /* Move to next descriptor */
         s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
@@ -838,7 +812,7 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
     checksum = cpu_to_le32(crc32(0, buf, pkt_size));
 
     /* Put packet into RBA */
-    DPRINTF("Receive packet at %08x\n", dp8393x_crba(s));
+    trace_dp8393x_receive_packet(dp8393x_crba(s));
     address = dp8393x_crba(s);
     address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
                         buf, pkt_size);
@@ -852,8 +826,8 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
     /* Pad short packets to keep pointers aligned */
     if (rx_len < padded_len) {
         size = padded_len - rx_len;
-        address_space_rw(&s->as, address, MEMTXATTRS_UNSPECIFIED,
-            (uint8_t *)"\xFF\xFF\xFF", size, 1);
+        address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
+                            "\xFF\xFF\xFF", size);
         address += size;
     }
 
@@ -876,33 +850,21 @@ static ssize_t dp8393x_receive(NetClientState *nc, const uint8_t * buf,
     }
 
     /* Write status to memory */
-    DPRINTF("Write status at %08x\n", dp8393x_crda(s));
-    dp8393x_put(s, width, 0, s->regs[SONIC_RCR]); /* status */
-    dp8393x_put(s, width, 1, rx_len); /* byte count */
-    dp8393x_put(s, width, 2, s->regs[SONIC_TRBA0]); /* pkt_ptr0 */
-    dp8393x_put(s, width, 3, s->regs[SONIC_TRBA1]); /* pkt_ptr1 */
-    dp8393x_put(s, width, 4, s->regs[SONIC_RSC]); /* seq_no */
-    size = sizeof(uint16_t) * 5 * width;
-    address_space_write(&s->as, dp8393x_crda(s),
-                        MEMTXATTRS_UNSPECIFIED,
-                        s->data, size);
+    trace_dp8393x_receive_write_status(dp8393x_crda(s));
+    dp8393x_put(s, dp8393x_crda(s), 0, s->regs[SONIC_RCR]); /* status */
+    dp8393x_put(s, dp8393x_crda(s), 1, rx_len); /* byte count */
+    dp8393x_put(s, dp8393x_crda(s), 2, s->regs[SONIC_TRBA0]); /* pkt_ptr0 */
+    dp8393x_put(s, dp8393x_crda(s), 3, s->regs[SONIC_TRBA1]); /* pkt_ptr1 */
+    dp8393x_put(s, dp8393x_crda(s), 4, s->regs[SONIC_RSC]); /* seq_no */
 
     /* Check link field */
-    size = sizeof(uint16_t) * width;
-    address_space_read(&s->as,
-                       dp8393x_crda(s) + sizeof(uint16_t) * 5 * width,
-                       MEMTXATTRS_UNSPECIFIED, s->data, size);
-    s->regs[SONIC_LLFA] = dp8393x_get(s, width, 0);
+    s->regs[SONIC_LLFA] = dp8393x_get(s, dp8393x_crda(s), 5);
     if (s->regs[SONIC_LLFA] & SONIC_DESC_EOL) {
         /* EOL detected */
         s->regs[SONIC_ISR] |= SONIC_ISR_RDE;
     } else {
         /* Clear in_use */
-        size = sizeof(uint16_t) * width;
-        address = dp8393x_crda(s) + sizeof(uint16_t) * 6 * width;
-        dp8393x_put(s, width, 0, 0);
-        address_space_write(&s->as, address, MEMTXATTRS_UNSPECIFIED,
-                            s->data, size);
+        dp8393x_put(s, dp8393x_crda(s), 6, 0x0000);
 
         /* Move to next descriptor */
         s->regs[SONIC_CRDA] = s->regs[SONIC_LLFA];
@@ -938,7 +900,8 @@ static void dp8393x_reset(DeviceState *dev)
     s->regs[SONIC_SR] = 0x0004; /* only revision recognized by Linux/mips */
     s->regs[SONIC_CR] = SONIC_CR_RST | SONIC_CR_STP | SONIC_CR_RXDIS;
     s->regs[SONIC_DCR] &= ~(SONIC_DCR_EXBUS | SONIC_DCR_LBR);
-    s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD | SONIC_RCR_RNT);
+    s->regs[SONIC_RCR] &= ~(SONIC_RCR_LB0 | SONIC_RCR_LB1 | SONIC_RCR_BRD |
+                            SONIC_RCR_RNT);
     s->regs[SONIC_TCR] |= SONIC_TCR_NCRS | SONIC_TCR_PTX;
     s->regs[SONIC_TCR] &= ~SONIC_TCR_BCM;
     s->regs[SONIC_IMR] = 0;
@@ -968,52 +931,31 @@ static void dp8393x_instance_init(Object *obj)
     dp8393xState *s = DP8393X(obj);
 
     sysbus_init_mmio(sbd, &s->mmio);
-    sysbus_init_mmio(sbd, &s->prom);
     sysbus_init_irq(sbd, &s->irq);
 }
 
 static void dp8393x_realize(DeviceState *dev, Error **errp)
 {
     dp8393xState *s = DP8393X(dev);
-    int i, checksum;
-    uint8_t *prom;
-    Error *local_err = NULL;
 
     address_space_init(&s->as, s->dma_mr, "dp8393x");
     memory_region_init_io(&s->mmio, OBJECT(dev), &dp8393x_ops, s,
-                          "dp8393x-regs", 0x40 << s->it_shift);
+                          "dp8393x-regs", SONIC_REG_COUNT << s->it_shift);
 
     s->nic = qemu_new_nic(&net_dp83932_info, &s->conf,
                           object_get_typename(OBJECT(dev)), dev->id, s);
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 
     s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
-
-    memory_region_init_rom(&s->prom, OBJECT(dev), "dp8393x-prom",
-                           SONIC_PROM_SIZE, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    prom = memory_region_get_ram_ptr(&s->prom);
-    checksum = 0;
-    for (i = 0; i < 6; i++) {
-        prom[i] = s->conf.macaddr.a[i];
-        checksum += prom[i];
-        if (checksum > 0xff) {
-            checksum = (checksum + 1) & 0xff;
-        }
-    }
-    prom[7] = 0xff - checksum;
 }
 
 static const VMStateDescription vmstate_dp8393x = {
     .name = "dp8393x",
-    .version_id = 0,
-    .minimum_version_id = 0,
+    .version_id = 1,
+    .minimum_version_id = 1,
     .fields = (VMStateField []) {
-        VMSTATE_BUFFER_UNSAFE(cam, dp8393xState, 0, 16 * 6),
-        VMSTATE_UINT16_ARRAY(regs, dp8393xState, 0x40),
+        VMSTATE_UINT16_2DARRAY(cam, dp8393xState, 16, 3),
+        VMSTATE_UINT16_ARRAY(regs, dp8393xState, SONIC_REG_COUNT),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 4f75b44cfcb..f5bc81296d1 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -29,6 +29,7 @@
 #include "hw/pci/pci.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "net/eth.h"
 #include "net/net.h"
 #include "net/checksum.h"
 #include "sysemu/sysemu.h"
@@ -106,6 +107,7 @@ struct E1000State_st {
         e1000x_txd_props props;
         e1000x_txd_props tso_props;
         uint16_t tso_frames;
+        bool busy;
     } tx;
 
     struct {
@@ -130,10 +132,13 @@ struct E1000State_st {
 #define E1000_FLAG_MIT_BIT 1
 #define E1000_FLAG_MAC_BIT 2
 #define E1000_FLAG_TSO_BIT 3
+#define E1000_FLAG_VET_BIT 4
 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
 #define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
 #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
+#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
+
     uint32_t compat_flags;
     bool received_tx_tso;
     bool use_tso_for_migration;
@@ -361,6 +366,13 @@ e1000_autoneg_timer(void *opaque)
     }
 }
 
+static bool e1000_vet_init_need(void *opaque)
+{
+    E1000State *s = opaque;
+
+    return chkflag(VET);
+}
+
 static void e1000_reset(void *opaque)
 {
     E1000State *d = opaque;
@@ -386,6 +398,10 @@ static void e1000_reset(void *opaque)
     }
 
     e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr);
+
+    if (e1000_vet_init_need(d)) {
+        d->mac_reg[VET] = ETH_P_VLAN;
+    }
 }
 
 static void
@@ -748,6 +764,11 @@ start_xmit(E1000State *s)
         return;
     }
 
+    if (s->tx.busy) {
+        return;
+    }
+    s->tx.busy = true;
+
     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
         base = tx_desc_base(s) +
                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
@@ -774,6 +795,7 @@ start_xmit(E1000State *s)
             break;
         }
     }
+    s->tx.busy = false;
     set_ics(s, 0, cause);
 }
 
@@ -1737,6 +1759,8 @@ static Property e1000_properties[] = {
                     compat_flags, E1000_FLAG_MAC_BIT, true),
     DEFINE_PROP_BIT("migrate_tso_props", E1000State,
                     compat_flags, E1000_FLAG_TSO_BIT, true),
+    DEFINE_PROP_BIT("init-vet", E1000State,
+                    compat_flags, E1000_FLAG_VET_BIT, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index a8a77eca956..ac96f7665af 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -35,6 +35,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "net/eth.h"
 #include "net/net.h"
 #include "net/tap.h"
 #include "qemu/module.h"
@@ -79,7 +80,7 @@ struct E1000EState {
     bool disable_vnet;
 
     E1000ECore core;
-
+    bool init_vet;
 };
 
 #define E1000E_MMIO_IDX     0
@@ -527,6 +528,10 @@ static void e1000e_qdev_reset(DeviceState *dev)
     trace_e1000e_cb_qdev_reset();
 
     e1000e_core_reset(&s->core);
+
+    if (s->init_vet) {
+        s->core.mac[VET] = ETH_P_VLAN;
+    }
 }
 
 static int e1000e_pre_save(void *opaque)
@@ -666,6 +671,7 @@ static Property e1000e_properties[] = {
                         e1000e_prop_subsys_ven, uint16_t),
     DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0,
                         e1000e_prop_subsys, uint16_t),
+    DEFINE_PROP_BOOL("init-vet", E1000EState, init_vet, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index b75f2ab8fc1..8ae6fb7e145 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -731,7 +731,7 @@ e1000e_process_tx_desc(E1000ECore *core,
             if (e1000x_vlan_enabled(core->mac) &&
                 e1000x_is_vlan_txd(txd_lower)) {
                 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt,
-                    le16_to_cpu(dp->upper.fields.special), core->vet);
+                    le16_to_cpu(dp->upper.fields.special), core->mac[VET]);
             }
             if (e1000e_tx_pkt_send(core, tx, queue_index)) {
                 e1000e_on_tx_done_update_stats(core, tx->tx_pkt);
@@ -1012,7 +1012,7 @@ e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size)
 {
     uint32_t rctl = core->mac[RCTL];
 
-    if (e1000x_is_vlan_packet(buf, core->vet) &&
+    if (e1000x_is_vlan_packet(buf, core->mac[VET]) &&
         e1000x_vlan_rx_filter_enabled(core->mac)) {
         uint16_t vid = lduw_be_p(buf + 14);
         uint32_t vfta = ldl_le_p((uint32_t *)(core->mac + VFTA) +
@@ -1285,7 +1285,6 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc,
                              &d->special);
     d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
     d->status = (uint8_t) le32_to_cpu(status_flags);
-    d->special = 0;
 }
 
 static inline void
@@ -1686,7 +1685,7 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
     }
 
     net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-                               e1000x_vlan_enabled(core->mac), core->vet);
+                               e1000x_vlan_enabled(core->mac), core->mac[VET]);
 
     e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info);
     e1000e_rx_ring_init(core, &rxr, rss_info.queue);
@@ -2397,8 +2396,7 @@ static void
 e1000e_set_vet(E1000ECore *core, int index, uint32_t val)
 {
     core->mac[VET] = val & 0xffff;
-    core->vet = le16_to_cpu(core->mac[VET]);
-    trace_e1000e_vlan_vet(core->vet);
+    trace_e1000e_vlan_vet(core->mac[VET]);
 }
 
 static void
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index 055c3a1470c..ec21e2699a1 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -12,7 +12,6 @@
 #include "qemu/timer.h"
 #include "net/net.h"
 #include "net/eth.h"
-#include "sysemu/sysemu.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index f03450c0280..9c7035bc948 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -283,9 +283,8 @@ static uint32_t imx_phy_read(IMXFECState *s, int reg)
     uint32_t phy = reg / 32;
 
     if (phy != s->phy_num) {
-        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n",
-                      TYPE_IMX_FEC, __func__, phy);
-        return 0;
+        trace_imx_phy_read_num(phy, s->phy_num);
+        return 0xffff;
     }
 
     reg %= 32;
@@ -345,8 +344,7 @@ static void imx_phy_write(IMXFECState *s, int reg, uint32_t val)
     uint32_t phy = reg / 32;
 
     if (phy != s->phy_num) {
-        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad phy num %u\n",
-                      TYPE_IMX_FEC, __func__, phy);
+        trace_imx_phy_write_num(phy, s->phy_num);
         return;
     }
 
diff --git a/hw/net/lasi_i82596.c b/hw/net/lasi_i82596.c
index 820b63f3509..e37f7fabe95 100644
--- a/hw/net/lasi_i82596.c
+++ b/hw/net/lasi_i82596.c
@@ -18,7 +18,6 @@
 #include "hw/net/lasi_82596.h"
 #include "hw/net/i82596.h"
 #include "trace.h"
-#include "sysemu/sysemu.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 
diff --git a/hw/net/meson.build b/hw/net/meson.build
index af0749c42bb..bdf71f1f405 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -39,7 +39,6 @@ softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_emc.c'))
 
 softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_eth.c'))
 softmmu_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_fec.c'))
-specific_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-minimac2.c'))
 specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_llan.c'))
 specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c'))
 
diff --git a/hw/net/milkymist-minimac2.c b/hw/net/milkymist-minimac2.c
deleted file mode 100644
index 5826944fd57..00000000000
--- a/hw/net/milkymist-minimac2.c
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- *  QEMU model of the Milkymist minimac2 block.
- *
- *  Copyright (c) 2011 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   not available yet
- *
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qom/object.h"
-#include "cpu.h" /* FIXME: why does this use TARGET_PAGE_ALIGN? */
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "net/net.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "qemu/error-report.h"
-
-#include <zlib.h>
-
-enum {
-    R_SETUP = 0,
-    R_MDIO,
-    R_STATE0,
-    R_COUNT0,
-    R_STATE1,
-    R_COUNT1,
-    R_TXCOUNT,
-    R_MAX
-};
-
-enum {
-    SETUP_PHY_RST = (1<<0),
-};
-
-enum {
-    MDIO_DO  = (1<<0),
-    MDIO_DI  = (1<<1),
-    MDIO_OE  = (1<<2),
-    MDIO_CLK = (1<<3),
-};
-
-enum {
-    STATE_EMPTY   = 0,
-    STATE_LOADED  = 1,
-    STATE_PENDING = 2,
-};
-
-enum {
-    MDIO_OP_WRITE = 1,
-    MDIO_OP_READ  = 2,
-};
-
-enum mdio_state {
-    MDIO_STATE_IDLE,
-    MDIO_STATE_READING,
-    MDIO_STATE_WRITING,
-};
-
-enum {
-    R_PHY_ID1  = 2,
-    R_PHY_ID2  = 3,
-    R_PHY_MAX  = 32
-};
-
-#define MINIMAC2_MTU 1530
-#define MINIMAC2_BUFFER_SIZE 2048
-
-struct MilkymistMinimac2MdioState {
-    int last_clk;
-    int count;
-    uint32_t data;
-    uint16_t data_out;
-    int state;
-
-    uint8_t phy_addr;
-    uint8_t reg_addr;
-};
-typedef struct MilkymistMinimac2MdioState MilkymistMinimac2MdioState;
-
-#define TYPE_MILKYMIST_MINIMAC2 "milkymist-minimac2"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistMinimac2State, MILKYMIST_MINIMAC2)
-
-struct MilkymistMinimac2State {
-    SysBusDevice parent_obj;
-
-    NICState *nic;
-    NICConf conf;
-    char *phy_model;
-    MemoryRegion buffers;
-    MemoryRegion regs_region;
-
-    qemu_irq rx_irq;
-    qemu_irq tx_irq;
-
-    uint32_t regs[R_MAX];
-
-    MilkymistMinimac2MdioState mdio;
-
-    uint16_t phy_regs[R_PHY_MAX];
-
-    uint8_t *rx0_buf;
-    uint8_t *rx1_buf;
-    uint8_t *tx_buf;
-};
-
-static const uint8_t preamble_sfd[] = {
-        0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0xd5
-};
-
-static void minimac2_mdio_write_reg(MilkymistMinimac2State *s,
-        uint8_t phy_addr, uint8_t reg_addr, uint16_t value)
-{
-    trace_milkymist_minimac2_mdio_write(phy_addr, reg_addr, value);
-
-    /* nop */
-}
-
-static uint16_t minimac2_mdio_read_reg(MilkymistMinimac2State *s,
-        uint8_t phy_addr, uint8_t reg_addr)
-{
-    uint16_t r = s->phy_regs[reg_addr];
-
-    trace_milkymist_minimac2_mdio_read(phy_addr, reg_addr, r);
-
-    return r;
-}
-
-static void minimac2_update_mdio(MilkymistMinimac2State *s)
-{
-    MilkymistMinimac2MdioState *m = &s->mdio;
-
-    /* detect rising clk edge */
-    if (m->last_clk == 0 && (s->regs[R_MDIO] & MDIO_CLK)) {
-        /* shift data in */
-        int bit = ((s->regs[R_MDIO] & MDIO_DO)
-                   && (s->regs[R_MDIO] & MDIO_OE)) ? 1 : 0;
-        m->data = (m->data << 1) | bit;
-
-        /* check for sync */
-        if (m->data == 0xffffffff) {
-            m->count = 32;
-        }
-
-        if (m->count == 16) {
-            uint8_t start = (m->data >> 14) & 0x3;
-            uint8_t op = (m->data >> 12) & 0x3;
-            uint8_t ta = (m->data) & 0x3;
-
-            if (start == 1 && op == MDIO_OP_WRITE && ta == 2) {
-                m->state = MDIO_STATE_WRITING;
-            } else if (start == 1 && op == MDIO_OP_READ && (ta & 1) == 0) {
-                m->state = MDIO_STATE_READING;
-            } else {
-                m->state = MDIO_STATE_IDLE;
-            }
-
-            if (m->state != MDIO_STATE_IDLE) {
-                m->phy_addr = (m->data >> 7) & 0x1f;
-                m->reg_addr = (m->data >> 2) & 0x1f;
-            }
-
-            if (m->state == MDIO_STATE_READING) {
-                m->data_out = minimac2_mdio_read_reg(s, m->phy_addr,
-                        m->reg_addr);
-            }
-        }
-
-        if (m->count < 16 && m->state == MDIO_STATE_READING) {
-            int bit = (m->data_out & 0x8000) ? 1 : 0;
-            m->data_out <<= 1;
-
-            if (bit) {
-                s->regs[R_MDIO] |= MDIO_DI;
-            } else {
-                s->regs[R_MDIO] &= ~MDIO_DI;
-            }
-        }
-
-        if (m->count == 0 && m->state) {
-            if (m->state == MDIO_STATE_WRITING) {
-                uint16_t data = m->data & 0xffff;
-                minimac2_mdio_write_reg(s, m->phy_addr, m->reg_addr, data);
-            }
-            m->state = MDIO_STATE_IDLE;
-        }
-        m->count--;
-    }
-
-    m->last_clk = (s->regs[R_MDIO] & MDIO_CLK) ? 1 : 0;
-}
-
-static size_t assemble_frame(uint8_t *buf, size_t size,
-        const uint8_t *payload, size_t payload_size)
-{
-    uint32_t crc;
-
-    if (size < payload_size + 12) {
-        qemu_log_mask(LOG_GUEST_ERROR, "milkymist_minimac2: frame too big "
-                      "(%zd bytes)\n", payload_size);
-        return 0;
-    }
-
-    /* prepend preamble and sfd */
-    memcpy(buf, preamble_sfd, 8);
-
-    /* now copy the payload */
-    memcpy(buf + 8, payload, payload_size);
-
-    /* pad frame if needed */
-    if (payload_size < 60) {
-        memset(buf + payload_size + 8, 0, 60 - payload_size);
-        payload_size = 60;
-    }
-
-    /* append fcs */
-    crc = cpu_to_le32(crc32(0, buf + 8, payload_size));
-    memcpy(buf + payload_size + 8, &crc, 4);
-
-    return payload_size + 12;
-}
-
-static void minimac2_tx(MilkymistMinimac2State *s)
-{
-    uint32_t txcount = s->regs[R_TXCOUNT];
-    uint8_t *buf = s->tx_buf;
-
-    if (txcount < 64) {
-        error_report("milkymist_minimac2: ethernet frame too small (%u < %u)",
-                txcount, 64);
-        goto err;
-    }
-
-    if (txcount > MINIMAC2_MTU) {
-        error_report("milkymist_minimac2: MTU exceeded (%u > %u)",
-                txcount, MINIMAC2_MTU);
-        goto err;
-    }
-
-    if (memcmp(buf, preamble_sfd, 8) != 0) {
-        error_report("milkymist_minimac2: frame doesn't contain the preamble "
-                "and/or the SFD (%02x %02x %02x %02x %02x %02x %02x %02x)",
-                buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
-        goto err;
-    }
-
-    trace_milkymist_minimac2_tx_frame(txcount - 12);
-
-    /* send packet, skipping preamble and sfd */
-    qemu_send_packet_raw(qemu_get_queue(s->nic), buf + 8, txcount - 12);
-
-    s->regs[R_TXCOUNT] = 0;
-
-err:
-    trace_milkymist_minimac2_pulse_irq_tx();
-    qemu_irq_pulse(s->tx_irq);
-}
-
-static void update_rx_interrupt(MilkymistMinimac2State *s)
-{
-    if (s->regs[R_STATE0] == STATE_PENDING
-            || s->regs[R_STATE1] == STATE_PENDING) {
-        trace_milkymist_minimac2_raise_irq_rx();
-        qemu_irq_raise(s->rx_irq);
-    } else {
-        trace_milkymist_minimac2_lower_irq_rx();
-        qemu_irq_lower(s->rx_irq);
-    }
-}
-
-static ssize_t minimac2_rx(NetClientState *nc, const uint8_t *buf, size_t size)
-{
-    MilkymistMinimac2State *s = qemu_get_nic_opaque(nc);
-
-    uint32_t r_count;
-    uint32_t r_state;
-    uint8_t *rx_buf;
-
-    size_t frame_size;
-
-    trace_milkymist_minimac2_rx_frame(buf, size);
-
-    /* choose appropriate slot */
-    if (s->regs[R_STATE0] == STATE_LOADED) {
-        r_count = R_COUNT0;
-        r_state = R_STATE0;
-        rx_buf = s->rx0_buf;
-    } else if (s->regs[R_STATE1] == STATE_LOADED) {
-        r_count = R_COUNT1;
-        r_state = R_STATE1;
-        rx_buf = s->rx1_buf;
-    } else {
-        return 0;
-    }
-
-    /* assemble frame */
-    frame_size = assemble_frame(rx_buf, MINIMAC2_BUFFER_SIZE, buf, size);
-
-    if (frame_size == 0) {
-        return size;
-    }
-
-    trace_milkymist_minimac2_rx_transfer(rx_buf, frame_size);
-
-    /* update slot */
-    s->regs[r_count] = frame_size;
-    s->regs[r_state] = STATE_PENDING;
-
-    update_rx_interrupt(s);
-
-    return size;
-}
-
-static uint64_t
-minimac2_read(void *opaque, hwaddr addr, unsigned size)
-{
-    MilkymistMinimac2State *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_SETUP:
-    case R_MDIO:
-    case R_STATE0:
-    case R_COUNT0:
-    case R_STATE1:
-    case R_COUNT1:
-    case R_TXCOUNT:
-        r = s->regs[addr];
-        break;
-
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "milkymist_minimac2_rd%d: 0x%" HWADDR_PRIx "\n",
-                      size, addr << 2);
-        break;
-    }
-
-    trace_milkymist_minimac2_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static int minimac2_can_rx(MilkymistMinimac2State *s)
-{
-    if (s->regs[R_STATE0] == STATE_LOADED) {
-        return 1;
-    }
-    if (s->regs[R_STATE1] == STATE_LOADED) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static void
-minimac2_write(void *opaque, hwaddr addr, uint64_t value,
-               unsigned size)
-{
-    MilkymistMinimac2State *s = opaque;
-
-    trace_milkymist_minimac2_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_MDIO:
-    {
-        /* MDIO_DI is read only */
-        int mdio_di = (s->regs[R_MDIO] & MDIO_DI);
-        s->regs[R_MDIO] = value;
-        if (mdio_di) {
-            s->regs[R_MDIO] |= mdio_di;
-        } else {
-            s->regs[R_MDIO] &= ~mdio_di;
-        }
-
-        minimac2_update_mdio(s);
-    } break;
-    case R_TXCOUNT:
-        s->regs[addr] = value;
-        if (value > 0) {
-            minimac2_tx(s);
-        }
-        break;
-    case R_STATE0:
-    case R_STATE1:
-        s->regs[addr] = value;
-        update_rx_interrupt(s);
-        if (minimac2_can_rx(s)) {
-            qemu_flush_queued_packets(qemu_get_queue(s->nic));
-        }
-        break;
-    case R_SETUP:
-    case R_COUNT0:
-    case R_COUNT1:
-        s->regs[addr] = value;
-        break;
-
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "milkymist_minimac2_wr%d: 0x%" HWADDR_PRIx
-                      " = 0x%" PRIx64 "\n",
-                      size, addr << 2, value);
-        break;
-    }
-}
-
-static const MemoryRegionOps minimac2_ops = {
-    .read = minimac2_read,
-    .write = minimac2_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_minimac2_reset(DeviceState *d)
-{
-    MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-    for (i = 0; i < R_PHY_MAX; i++) {
-        s->phy_regs[i] = 0;
-    }
-
-    /* defaults */
-    s->phy_regs[R_PHY_ID1] = 0x0022; /* Micrel KSZ8001L */
-    s->phy_regs[R_PHY_ID2] = 0x161a;
-}
-
-static NetClientInfo net_milkymist_minimac2_info = {
-    .type = NET_CLIENT_DRIVER_NIC,
-    .size = sizeof(NICState),
-    .receive = minimac2_rx,
-};
-
-static void milkymist_minimac2_realize(DeviceState *dev, Error **errp)
-{
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-    MilkymistMinimac2State *s = MILKYMIST_MINIMAC2(dev);
-    size_t buffers_size = TARGET_PAGE_ALIGN(3 * MINIMAC2_BUFFER_SIZE);
-
-    sysbus_init_irq(sbd, &s->rx_irq);
-    sysbus_init_irq(sbd, &s->tx_irq);
-
-    memory_region_init_io(&s->regs_region, OBJECT(dev), &minimac2_ops, s,
-                          "milkymist-minimac2", R_MAX * 4);
-    sysbus_init_mmio(sbd, &s->regs_region);
-
-    /* register buffers memory */
-    memory_region_init_ram_nomigrate(&s->buffers, OBJECT(dev), "milkymist-minimac2.buffers",
-                           buffers_size, &error_fatal);
-    vmstate_register_ram_global(&s->buffers);
-    s->rx0_buf = memory_region_get_ram_ptr(&s->buffers);
-    s->rx1_buf = s->rx0_buf + MINIMAC2_BUFFER_SIZE;
-    s->tx_buf = s->rx1_buf + MINIMAC2_BUFFER_SIZE;
-
-    sysbus_init_mmio(sbd, &s->buffers);
-
-    qemu_macaddr_default_if_unset(&s->conf.macaddr);
-    s->nic = qemu_new_nic(&net_milkymist_minimac2_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
-    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-}
-
-static const VMStateDescription vmstate_milkymist_minimac2_mdio = {
-    .name = "milkymist-minimac2-mdio",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT32(last_clk, MilkymistMinimac2MdioState),
-        VMSTATE_INT32(count, MilkymistMinimac2MdioState),
-        VMSTATE_UINT32(data, MilkymistMinimac2MdioState),
-        VMSTATE_UINT16(data_out, MilkymistMinimac2MdioState),
-        VMSTATE_INT32(state, MilkymistMinimac2MdioState),
-        VMSTATE_UINT8(phy_addr, MilkymistMinimac2MdioState),
-        VMSTATE_UINT8(reg_addr, MilkymistMinimac2MdioState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static const VMStateDescription vmstate_milkymist_minimac2 = {
-    .name = "milkymist-minimac2",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistMinimac2State, R_MAX),
-        VMSTATE_UINT16_ARRAY(phy_regs, MilkymistMinimac2State, R_PHY_MAX),
-        VMSTATE_STRUCT(mdio, MilkymistMinimac2State, 0,
-                vmstate_milkymist_minimac2_mdio, MilkymistMinimac2MdioState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_minimac2_properties[] = {
-    DEFINE_NIC_PROPERTIES(MilkymistMinimac2State, conf),
-    DEFINE_PROP_STRING("phy_model", MilkymistMinimac2State, phy_model),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_minimac2_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_minimac2_realize;
-    dc->reset = milkymist_minimac2_reset;
-    dc->vmsd = &vmstate_milkymist_minimac2;
-    device_class_set_props(dc, milkymist_minimac2_properties);
-}
-
-static const TypeInfo milkymist_minimac2_info = {
-    .name          = TYPE_MILKYMIST_MINIMAC2,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistMinimac2State),
-    .class_init    = milkymist_minimac2_class_init,
-};
-
-static void milkymist_minimac2_register_types(void)
-{
-    type_register_static(&milkymist_minimac2_info);
-}
-
-type_init(milkymist_minimac2_register_types)
diff --git a/hw/net/msf2-emac.c b/hw/net/msf2-emac.c
index 3e6206044f8..9278fdce0b3 100644
--- a/hw/net/msf2-emac.c
+++ b/hw/net/msf2-emac.c
@@ -32,7 +32,6 @@
 #include "qemu-common.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "exec/address-spaces.h"
 #include "hw/registerfields.h"
 #include "hw/net/msf2-emac.h"
 #include "hw/net/mii.h"
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 1f9aa59eca2..1cb1125d9fe 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -450,11 +450,13 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
     pkt->payload_len = 0;
     pkt->payload_frags = 0;
 
-    assert(pkt->raw);
-    for (i = 0; i < pkt->raw_frags; i++) {
-        assert(pkt->raw[i].iov_base);
-        pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len,
-                      DMA_DIRECTION_TO_DEVICE, 0);
+    if (pkt->max_raw_frags > 0) {
+        assert(pkt->raw);
+        for (i = 0; i < pkt->raw_frags; i++) {
+            assert(pkt->raw[i].iov_base);
+            pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base,
+                          pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
+        }
     }
     pkt->raw_frags = 0;
 
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index 941c9322658..412fa44d017 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -25,14 +25,9 @@
 #if defined(DEBUG_ROCKER)
 #  define DPRINTF(fmt, ...) \
     do {                                                           \
-        struct timeval tv;                                         \
-        char timestr[64];                                          \
-        time_t now;                                                \
-        gettimeofday(&tv, NULL);                                   \
-        now = tv.tv_sec;                                           \
-        strftime(timestr, sizeof(timestr), "%T", localtime(&now)); \
-        fprintf(stderr, "%s.%06ld ", timestr, tv.tv_usec);         \
-        fprintf(stderr, "ROCKER: " fmt, ## __VA_ARGS__);           \
+        g_autoptr(GDateTime) now = g_date_time_new_now_local();    \
+        g_autofree char *nowstr = g_date_time_format(now, "%T.%f");\
+        fprintf(stderr, "%s ROCKER: " fmt, nowstr, ## __VA_ARGS__);\
     } while (0)
 #else
 static inline GCC_FMT_ATTR(1, 2) int DPRINTF(const char *fmt, ...)
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 10e85a45560..a6876a936db 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -26,7 +26,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "net/net.h"
diff --git a/hw/net/trace-events b/hw/net/trace-events
index baf25ffa7e7..643338f6109 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # allwinner-sun8i-emac.c
 allwinner_sun8i_emac_mii_write_reg(uint32_t reg, uint32_t value) "MII write: reg=0x%" PRIx32 " value=0x%" PRIx32
@@ -19,18 +19,6 @@ mdio_bitbang(bool mdc, bool mdio, int state, uint16_t cnt, unsigned int drive) "
 lance_mem_readw(uint64_t addr, uint32_t ret) "addr=0x%"PRIx64"val=0x%04x"
 lance_mem_writew(uint64_t addr, uint32_t val) "addr=0x%"PRIx64"val=0x%04x"
 
-# milkymist-minimac2.c
-milkymist_minimac2_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_minimac2_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_minimac2_mdio_write(uint8_t phy_addr, uint8_t addr, uint16_t value) "phy_addr 0x%02x addr 0x%02x value 0x%04x"
-milkymist_minimac2_mdio_read(uint8_t phy_addr, uint8_t addr, uint16_t value) "phy_addr 0x%02x addr 0x%02x value 0x%04x"
-milkymist_minimac2_tx_frame(uint32_t length) "length %u"
-milkymist_minimac2_rx_frame(const void *buf, uint32_t length) "buf %p length %u"
-milkymist_minimac2_rx_transfer(const void *buf, uint32_t length) "buf %p length %d"
-milkymist_minimac2_raise_irq_rx(void) "Raise IRQ RX"
-milkymist_minimac2_lower_irq_rx(void) "Lower IRQ RX"
-milkymist_minimac2_pulse_irq_tx(void) "Pulse IRQ TX"
-
 # mipsnet.c
 mipsnet_send(uint32_t size) "sending len=%u"
 mipsnet_receive(uint32_t size) "receiving len=%u"
@@ -414,7 +402,9 @@ i82596_channel_attention(void *s) "%p: Received CHANNEL ATTENTION"
 
 # imx_fec.c
 imx_phy_read(uint32_t val, int phy, int reg) "0x%04"PRIx32" <= phy[%d].reg[%d]"
+imx_phy_read_num(int phy, int configured) "read request from unconfigured phy %d (configured %d)"
 imx_phy_write(uint32_t val, int phy, int reg) "0x%04"PRIx32" => phy[%d].reg[%d]"
+imx_phy_write_num(int phy, int configured) "write request to unconfigured phy %d (configured %d)"
 imx_phy_update_link(const char *s) "%s"
 imx_phy_reset(void) ""
 imx_fec_read_bd(uint64_t addr, int flags, int len, int data) "tx_bd 0x%"PRIx64" flags 0x%04x len %d data 0x%08x"
@@ -446,3 +436,20 @@ npcm7xx_emc_received_packet(uint32_t len) "Received %u byte packet"
 npcm7xx_emc_rx_done(uint32_t crxdsa) "RX done, CRXDSA=0x%x"
 npcm7xx_emc_reg_read(int emc_num, uint32_t result, const char *name, int regno) "emc%d: 0x%x = reg[%s/%d]"
 npcm7xx_emc_reg_write(int emc_num, const char *name, int regno, uint32_t value) "emc%d: reg[%s/%d] = 0x%x"
+
+# dp8398x.c
+dp8393x_raise_irq(int isr) "raise irq, isr is 0x%04x"
+dp8393x_lower_irq(void) "lower irq"
+dp8393x_load_cam(int idx, int cam0, int cam1, int cam2, int cam3, int cam4, int cam5) "load cam[%d] with 0x%02x0x%02x0x%02x0x%02x0x%02x0x%02x"
+dp8393x_load_cam_done(int cen) "load cam done. cam enable mask 0x%04x"
+dp8393x_read_rra_regs(int crba0, int crba1, int rbwc0, int rbwc1) "CRBA0/1: 0x%04x/0x%04x, RBWC0/1: 0x%04x/0x%04x"
+dp8393x_transmit_packet(int ttda) "Transmit packet at 0x%"PRIx32
+dp8393x_transmit_txlen_error(int len) "tx_len is %d"
+dp8393x_read(int reg, const char *name, int val, int size) "reg=0x%x [%s] val=0x%04x size=%d"
+dp8393x_write(int reg, const char *name, int val, int size) "reg=0x%x [%s] val=0x%04x size=%d"
+dp8393x_write_invalid(int reg) "writing to reg %d invalid"
+dp8393x_write_invalid_dcr(const char *name) "writing to %s invalid"
+dp8393x_receive_oversize(int size) "oversize packet, pkt_size is %d"
+dp8393x_receive_not_netcard(void) "packet not for netcard"
+dp8393x_receive_packet(int crba) "Receive packet at 0x%"PRIx32
+dp8393x_receive_write_status(int crba) "Write status at 0x%"PRIx32
diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index a7f42526304..89d71cfb8e1 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -33,13 +33,13 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
 
 int vhost_net_start(VirtIODevice *dev,
                     NetClientState *ncs,
-                    int total_queues)
+                    int data_queue_pairs, int cvq)
 {
     return -ENOSYS;
 }
 void vhost_net_stop(VirtIODevice *dev,
                     NetClientState *ncs,
-                    int total_queues)
+                    int data_queue_pairs, int cvq)
 {
 }
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 24d555e764a..30379d2ca41 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -22,6 +22,7 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 
@@ -45,6 +46,7 @@ static const int kernel_feature_bits[] = {
     VIRTIO_NET_F_MTU,
     VIRTIO_F_IOMMU_PLATFORM,
     VIRTIO_F_RING_PACKED,
+    VIRTIO_NET_F_HASH_REPORT,
     VHOST_INVALID_FEATURE_BIT
 };
 
@@ -71,6 +73,8 @@ static const int user_feature_bits[] = {
     VIRTIO_NET_F_MTU,
     VIRTIO_F_IOMMU_PLATFORM,
     VIRTIO_F_RING_PACKED,
+    VIRTIO_NET_F_RSS,
+    VIRTIO_NET_F_HASH_REPORT,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,
@@ -113,7 +117,7 @@ uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
                          uint32_t config_len)
 {
-    return vhost_dev_get_config(&net->dev, config, config_len);
+    return vhost_dev_get_config(&net->dev, config, config_len, NULL);
 }
 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
                          uint32_t offset, uint32_t size, uint32_t flags)
@@ -154,15 +158,16 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
     struct vhost_net *net = g_new0(struct vhost_net, 1);
     uint64_t features = 0;
+    Error *local_err = NULL;
 
     if (!options->net_backend) {
         fprintf(stderr, "vhost-net requires net backend to be setup\n");
         goto fail;
     }
     net->nc = options->net_backend;
+    net->dev.nvqs = options->nvqs;
 
     net->dev.max_queues = 1;
-    net->dev.nvqs = 2;
     net->dev.vqs = net->vqs;
 
     if (backend_kernel) {
@@ -184,8 +189,10 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     }
 
     r = vhost_dev_init(&net->dev, options->opaque,
-                       options->backend_type, options->busyloop_timeout);
+                       options->backend_type, options->busyloop_timeout,
+                       &local_err);
     if (r < 0) {
+        error_report_err(local_err);
         goto fail;
     }
     if (backend_kernel) {
@@ -224,9 +231,11 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     return NULL;
 }
 
-static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
+static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
+                                   int vq_index_end)
 {
     net->dev.vq_index = vq_index;
+    net->dev.vq_index_end = vq_index_end;
 }
 
 static int vhost_net_start_one(struct vhost_net *net,
@@ -235,9 +244,6 @@ static int vhost_net_start_one(struct vhost_net *net,
     struct vhost_vring_file file = { };
     int r;
 
-    net->dev.nvqs = 2;
-    net->dev.vqs = net->vqs;
-
     r = vhost_dev_enable_notifiers(&net->dev, dev);
     if (r < 0) {
         goto fail_notifiers;
@@ -311,25 +317,37 @@ static void vhost_net_stop_one(struct vhost_net *net,
 }
 
 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
-                    int total_queues)
+                    int data_queue_pairs, int cvq)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
     VirtioBusState *vbus = VIRTIO_BUS(qbus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
+    int total_notifiers = data_queue_pairs * 2 + cvq;
+    VirtIONet *n = VIRTIO_NET(dev);
+    int nvhosts = data_queue_pairs + cvq;
     struct vhost_net *net;
-    int r, e, i;
+    int r, e, i, index_end = data_queue_pairs * 2;
     NetClientState *peer;
 
+    if (cvq) {
+        index_end += 1;
+    }
+
     if (!k->set_guest_notifiers) {
         error_report("binding does not support guest notifiers");
         return -ENOSYS;
     }
 
-    for (i = 0; i < total_queues; i++) {
+    for (i = 0; i < nvhosts; i++) {
+
+        if (i < data_queue_pairs) {
+            peer = qemu_get_peer(ncs, i);
+        } else { /* Control Virtqueue */
+            peer = qemu_get_peer(ncs, n->max_queue_pairs);
+        }
 
-        peer = qemu_get_peer(ncs, i);
         net = get_vhost_net(peer);
-        vhost_net_set_vq_index(net, i * 2);
+        vhost_net_set_vq_index(net, i * 2, index_end);
 
         /* Suppress the masking guest notifiers on vhost user
          * because vhost user doesn't interrupt masking/unmasking
@@ -340,14 +358,18 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         }
      }
 
-    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
+    r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
     if (r < 0) {
         error_report("Error binding guest notifier: %d", -r);
         goto err;
     }
 
-    for (i = 0; i < total_queues; i++) {
-        peer = qemu_get_peer(ncs, i);
+    for (i = 0; i < nvhosts; i++) {
+        if (i < data_queue_pairs) {
+            peer = qemu_get_peer(ncs, i);
+        } else {
+            peer = qemu_get_peer(ncs, n->max_queue_pairs);
+        }
         r = vhost_net_start_one(get_vhost_net(peer), dev);
 
         if (r < 0) {
@@ -371,7 +393,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         peer = qemu_get_peer(ncs , i);
         vhost_net_stop_one(get_vhost_net(peer), dev);
     }
-    e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
+    e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
     if (e < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
         fflush(stderr);
@@ -381,18 +403,27 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
 }
 
 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
-                    int total_queues)
+                    int data_queue_pairs, int cvq)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
     VirtioBusState *vbus = VIRTIO_BUS(qbus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
+    VirtIONet *n = VIRTIO_NET(dev);
+    NetClientState *peer;
+    int total_notifiers = data_queue_pairs * 2 + cvq;
+    int nvhosts = data_queue_pairs + cvq;
     int i, r;
 
-    for (i = 0; i < total_queues; i++) {
-        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
+    for (i = 0; i < nvhosts; i++) {
+        if (i < data_queue_pairs) {
+            peer = qemu_get_peer(ncs, i);
+        } else {
+            peer = qemu_get_peer(ncs, n->max_queue_pairs);
+        }
+        vhost_net_stop_one(get_vhost_net(peer), dev);
     }
 
-    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
+    r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
     if (r < 0) {
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
         fflush(stderr);
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 760203309ba..f2014d5ea0b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -54,7 +54,7 @@
 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
 
-/* for now, only allow larger queues; with virtio-1, guest can downsize */
+/* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
 
@@ -89,7 +89,7 @@
                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
 
-static VirtIOFeature feature_sizes[] = {
+static const VirtIOFeature feature_sizes[] = {
     {.flags = 1ULL << VIRTIO_NET_F_MAC,
      .end = endof(struct virtio_net_config, mac)},
     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
@@ -131,7 +131,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
     int ret = 0;
     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
     virtio_stw_p(vdev, &netcfg.status, n->status);
-    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
+    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
     memcpy(netcfg.mac, n->mac, ETH_ALEN);
     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
@@ -243,7 +243,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     NetClientState *nc = qemu_get_queue(n->nic);
-    int queues = n->multiqueue ? n->max_queues : 1;
+    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    int cvq = n->max_ncs - n->max_queue_pairs;
 
     if (!get_vhost_net(nc->peer)) {
         return;
@@ -266,7 +267,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
         /* Any packets outstanding? Purge them to avoid touching rings
          * when vhost is running.
          */
-        for (i = 0;  i < queues; i++) {
+        for (i = 0;  i < queue_pairs; i++) {
             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 
             /* Purge both directions: TX and RX. */
@@ -285,14 +286,14 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
         }
 
         n->vhost_started = 1;
-        r = vhost_net_start(vdev, n->nic->ncs, queues);
+        r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
         if (r < 0) {
             error_report("unable to start vhost net: %d: "
                          "falling back on userspace virtio", -r);
             n->vhost_started = 0;
         }
     } else {
-        vhost_net_stop(vdev, n->nic->ncs, queues);
+        vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
         n->vhost_started = 0;
     }
 }
@@ -309,11 +310,11 @@ static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 }
 
 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
-                                       int queues, bool enable)
+                                       int queue_pairs, bool enable)
 {
     int i;
 
-    for (i = 0; i < queues; i++) {
+    for (i = 0; i < queue_pairs; i++) {
         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
             enable) {
             while (--i >= 0) {
@@ -330,7 +331,7 @@ static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
-    int queues = n->multiqueue ? n->max_queues : 1;
+    int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 
     if (virtio_net_started(n, status)) {
         /* Before using the device, we tell the network backend about the
@@ -339,14 +340,14 @@ static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
          * virtio-net code.
          */
         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
-                                                            queues, true);
+                                                            queue_pairs, true);
     } else if (virtio_net_started(n, vdev->status)) {
         /* After using the device, we need to reset the network backend to
          * the default (guest native endianness), otherwise the guest may
          * lose network connectivity if it is rebooted into a different
          * endianness.
          */
-        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
+        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
     }
 }
 
@@ -368,12 +369,12 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
     virtio_net_vnet_endian_status(n, status);
     virtio_net_vhost_status(n, status);
 
-    for (i = 0; i < n->max_queues; i++) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
         bool queue_started;
         q = &n->vqs[i];
 
-        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
+        if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
             queue_status = 0;
         } else {
             queue_status = status;
@@ -540,7 +541,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
     n->nouni = 0;
     n->nobcast = 0;
     /* multiqueue is disabled by default */
-    n->curr_queues = 1;
+    n->curr_queue_pairs = 1;
     timer_del(n->announce_timer.tm);
     n->announce_timer.round = 0;
     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
@@ -556,7 +557,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
     memset(n->vlans, 0, MAX_VLAN >> 3);
 
     /* Flush any async TX */
-    for (i = 0;  i < n->max_queues; i++) {
+    for (i = 0;  i < n->max_queue_pairs; i++) {
         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 
         if (nc->peer) {
@@ -610,7 +611,7 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
             sizeof(struct virtio_net_hdr);
     }
 
-    for (i = 0; i < n->max_queues; i++) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
         nc = qemu_get_subqueue(n->nic, i);
 
         if (peer_has_vnet_hdr(n) &&
@@ -655,7 +656,7 @@ static int peer_attach(VirtIONet *n, int index)
         return 0;
     }
 
-    if (n->max_queues == 1) {
+    if (n->max_queue_pairs == 1) {
         return 0;
     }
 
@@ -681,7 +682,7 @@ static int peer_detach(VirtIONet *n, int index)
     return tap_disable(nc->peer);
 }
 
-static void virtio_net_set_queues(VirtIONet *n)
+static void virtio_net_set_queue_pairs(VirtIONet *n)
 {
     int i;
     int r;
@@ -690,8 +691,8 @@ static void virtio_net_set_queues(VirtIONet *n)
         return;
     }
 
-    for (i = 0; i < n->max_queues; i++) {
-        if (i < n->curr_queues) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
+        if (i < n->curr_queue_pairs) {
             r = peer_attach(n, i);
             assert(!r);
         } else {
@@ -737,8 +738,9 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
         return features;
     }
 
-    virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
-    virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
+    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
+        virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
+    }
     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
     vdev->backend_features = features;
 
@@ -795,48 +797,34 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 
 typedef struct {
     VirtIONet *n;
-    char *id;
-} FailoverId;
+    DeviceState *dev;
+} FailoverDevice;
 
 /**
- * Set the id of the failover primary device
+ * Set the failover primary device
  *
  * @opaque: FailoverId to setup
  * @opts: opts for device we are handling
  * @errp: returns an error if this function fails
  */
-static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
+static int failover_set_primary(DeviceState *dev, void *opaque)
 {
-    FailoverId *fid = opaque;
-    const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
+    FailoverDevice *fdev = opaque;
+    PCIDevice *pci_dev = (PCIDevice *)
+        object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
+
+    if (!pci_dev) {
+        return 0;
+    }
 
-    if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
-        fid->id = g_strdup(opts->id);
+    if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
+        fdev->dev = dev;
         return 1;
     }
 
     return 0;
 }
 
-/**
- * Find the primary device id for this failover virtio-net
- *
- * @n: VirtIONet device
- * @errp: returns an error if this function fails
- */
-static char *failover_find_primary_device_id(VirtIONet *n)
-{
-    Error *err = NULL;
-    FailoverId fid;
-
-    fid.n = n;
-    if (!qemu_opts_foreach(qemu_find_opts("device"),
-                           failover_set_primary, &fid, &err)) {
-        return NULL;
-    }
-    return fid.id;
-}
-
 /**
  * Find the primary device for this failover virtio-net
  *
@@ -845,39 +833,38 @@ static char *failover_find_primary_device_id(VirtIONet *n)
  */
 static DeviceState *failover_find_primary_device(VirtIONet *n)
 {
-    char *id = failover_find_primary_device_id(n);
-
-    if (!id) {
-        return NULL;
-    }
+    FailoverDevice fdev = {
+        .n = n,
+    };
 
-    return qdev_find_recursive(sysbus_get_default(), id);
+    qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
+                       NULL, NULL, &fdev);
+    return fdev.dev;
 }
 
 static void failover_add_primary(VirtIONet *n, Error **errp)
 {
     Error *err = NULL;
-    QemuOpts *opts;
-    char *id;
     DeviceState *dev = failover_find_primary_device(n);
 
     if (dev) {
         return;
     }
 
-    id = failover_find_primary_device_id(n);
-    if (!id) {
+    if (!n->primary_opts) {
         error_setg(errp, "Primary device not found");
         error_append_hint(errp, "Virtio-net failover will not work. Make "
                           "sure primary device has parameter"
                           " failover_pair_id=%s\n", n->netclient_name);
         return;
     }
-    opts = qemu_opts_find(qemu_find_opts("device"), id);
-    g_assert(opts); /* cannot be NULL because id was found using opts list */
-    dev = qdev_device_add(opts, &err);
+
+    dev = qdev_device_add_from_qdict(n->primary_opts,
+                                     n->primary_opts_from_json,
+                                     &err);
     if (err) {
-        qemu_opts_del(opts);
+        qobject_unref(n->primary_opts);
+        n->primary_opts = NULL;
     } else {
         object_unref(OBJECT(dev));
     }
@@ -919,7 +906,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
         virtio_net_apply_guest_offloads(n);
     }
 
-    for (i = 0;  i < n->max_queues; i++) {
+    for (i = 0;  i < n->max_queue_pairs; i++) {
         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 
         if (!get_vhost_net(nc->peer)) {
@@ -1163,12 +1150,79 @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
     }
 }
 
+static void virtio_net_detach_epbf_rss(VirtIONet *n);
+
 static void virtio_net_disable_rss(VirtIONet *n)
 {
     if (n->rss_data.enabled) {
         trace_virtio_net_rss_disable();
     }
     n->rss_data.enabled = false;
+
+    virtio_net_detach_epbf_rss(n);
+}
+
+static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
+{
+    NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
+    if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
+        return false;
+    }
+
+    return nc->info->set_steering_ebpf(nc, prog_fd);
+}
+
+static void rss_data_to_rss_config(struct VirtioNetRssData *data,
+                                   struct EBPFRSSConfig *config)
+{
+    config->redirect = data->redirect;
+    config->populate_hash = data->populate_hash;
+    config->hash_types = data->hash_types;
+    config->indirections_len = data->indirections_len;
+    config->default_queue = data->default_queue;
+}
+
+static bool virtio_net_attach_epbf_rss(VirtIONet *n)
+{
+    struct EBPFRSSConfig config = {};
+
+    if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
+        return false;
+    }
+
+    rss_data_to_rss_config(&n->rss_data, &config);
+
+    if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
+                          n->rss_data.indirections_table, n->rss_data.key)) {
+        return false;
+    }
+
+    if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
+        return false;
+    }
+
+    return true;
+}
+
+static void virtio_net_detach_epbf_rss(VirtIONet *n)
+{
+    virtio_net_attach_ebpf_to_backend(n->nic, -1);
+}
+
+static bool virtio_net_load_ebpf(VirtIONet *n)
+{
+    if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
+        /* backend does't support steering ebpf */
+        return false;
+    }
+
+    return ebpf_rss_load(&n->ebpf_rss);
+}
+
+static void virtio_net_unload_ebpf(VirtIONet *n)
+{
+    virtio_net_attach_ebpf_to_backend(n->nic, -1);
+    ebpf_rss_unload(&n->ebpf_rss);
 }
 
 static uint16_t virtio_net_handle_rss(VirtIONet *n,
@@ -1179,7 +1233,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     struct virtio_net_rss_config cfg;
     size_t s, offset = 0, size_get;
-    uint16_t queues, i;
+    uint16_t queue_pairs, i;
     struct {
         uint16_t us;
         uint8_t b;
@@ -1221,7 +1275,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
     }
     n->rss_data.default_queue = do_rss ?
         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
-    if (n->rss_data.default_queue >= n->max_queues) {
+    if (n->rss_data.default_queue >= n->max_queue_pairs) {
         err_msg = "Invalid default queue";
         err_value = n->rss_data.default_queue;
         goto error;
@@ -1250,14 +1304,14 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
     size_get = sizeof(temp);
     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
     if (s != size_get) {
-        err_msg = "Can't get queues";
+        err_msg = "Can't get queue_pairs";
         err_value = (uint32_t)s;
         goto error;
     }
-    queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
-    if (queues == 0 || queues > n->max_queues) {
-        err_msg = "Invalid number of queues";
-        err_value = queues;
+    queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
+    if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
+        err_msg = "Invalid number of queue_pairs";
+        err_value = queue_pairs;
         goto error;
     }
     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
@@ -1272,7 +1326,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
     }
     if (!temp.b && !n->rss_data.hash_types) {
         virtio_net_disable_rss(n);
-        return queues;
+        return queue_pairs;
     }
     offset += size_get;
     size_get = temp.b;
@@ -1283,10 +1337,29 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
         goto error;
     }
     n->rss_data.enabled = true;
+
+    if (!n->rss_data.populate_hash) {
+        if (!virtio_net_attach_epbf_rss(n)) {
+            /* EBPF must be loaded for vhost */
+            if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
+                warn_report("Can't load eBPF RSS for vhost");
+                goto error;
+            }
+            /* fallback to software RSS */
+            warn_report("Can't load eBPF RSS - fallback to software RSS");
+            n->rss_data.enabled_software_rss = true;
+        }
+    } else {
+        /* use software RSS for hash populating */
+        /* and detach eBPF if was loaded before */
+        virtio_net_detach_epbf_rss(n);
+        n->rss_data.enabled_software_rss = true;
+    }
+
     trace_virtio_net_rss_enable(n->rss_data.hash_types,
                                 n->rss_data.indirections_len,
                                 temp.b);
-    return queues;
+    return queue_pairs;
 error:
     trace_virtio_net_rss_error(err_msg, err_value);
     virtio_net_disable_rss(n);
@@ -1297,15 +1370,15 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
                                 struct iovec *iov, unsigned int iov_cnt)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
-    uint16_t queues;
+    uint16_t queue_pairs;
 
     virtio_net_disable_rss(n);
     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
-        queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
-        return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
+        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
+        return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
     }
     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
-        queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
+        queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
         struct virtio_net_ctrl_mq mq;
         size_t s;
@@ -1316,24 +1389,24 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
         if (s != sizeof(mq)) {
             return VIRTIO_NET_ERR;
         }
-        queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
+        queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
 
     } else {
         return VIRTIO_NET_ERR;
     }
 
-    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
-        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
-        queues > n->max_queues ||
+    if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+        queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
+        queue_pairs > n->max_queue_pairs ||
         !n->multiqueue) {
         return VIRTIO_NET_ERR;
     }
 
-    n->curr_queues = queues;
-    /* stop the backend before changing the number of queues to avoid handling a
+    n->curr_queue_pairs = queue_pairs;
+    /* stop the backend before changing the number of queue_pairs to avoid handling a
      * disabled queue */
     virtio_net_set_status(vdev, vdev->status);
-    virtio_net_set_queues(n);
+    virtio_net_set_queue_pairs(n);
 
     return VIRTIO_NET_OK;
 }
@@ -1411,7 +1484,7 @@ static bool virtio_net_can_receive(NetClientState *nc)
         return false;
     }
 
-    if (nc->queue_index >= n->curr_queues) {
+    if (nc->queue_index >= n->curr_queue_pairs) {
         return false;
     }
 
@@ -1671,7 +1744,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
         return -1;
     }
 
-    if (!no_rss && n->rss_data.enabled) {
+    if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
         int index = virtio_net_process_rss(nc, buf, size);
         if (index >= 0) {
             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
@@ -2691,11 +2764,11 @@ static void virtio_net_del_queue(VirtIONet *n, int index)
     virtio_del_queue(vdev, index * 2 + 1);
 }
 
-static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
+static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
     int old_num_queues = virtio_get_num_queues(vdev);
-    int new_num_queues = new_max_queues * 2 + 1;
+    int new_num_queues = new_max_queue_pairs * 2 + 1;
     int i;
 
     assert(old_num_queues >= 3);
@@ -2728,12 +2801,12 @@ static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
 
 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
 {
-    int max = multiqueue ? n->max_queues : 1;
+    int max = multiqueue ? n->max_queue_pairs : 1;
 
     n->multiqueue = multiqueue;
-    virtio_net_change_num_queues(n, max);
+    virtio_net_change_num_queue_pairs(n, max);
 
-    virtio_net_set_queues(n);
+    virtio_net_set_queue_pairs(n);
 }
 
 static int virtio_net_post_load_device(void *opaque, int version_id)
@@ -2766,7 +2839,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
      */
     n->saved_guest_offloads = n->curr_guest_offloads;
 
-    virtio_net_set_queues(n);
+    virtio_net_set_queue_pairs(n);
 
     /* Find the first multicast entry in the saved MAC filter */
     for (i = 0; i < n->mac_table.in_use; i++) {
@@ -2779,7 +2852,7 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
     /* nc.link_down can't be migrated, so infer link_down according
      * to link status bit in n->status */
     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
-    for (i = 0; i < n->max_queues; i++) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
         qemu_get_subqueue(n->nic, i)->link_down = link_down;
     }
 
@@ -2797,6 +2870,19 @@ static int virtio_net_post_load_device(void *opaque, int version_id)
     }
 
     if (n->rss_data.enabled) {
+        n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
+        if (!n->rss_data.populate_hash) {
+            if (!virtio_net_attach_epbf_rss(n)) {
+                if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
+                    warn_report("Can't post-load eBPF RSS for vhost");
+                } else {
+                    warn_report("Can't post-load eBPF RSS - "
+                                "fallback to software RSS");
+                    n->rss_data.enabled_software_rss = true;
+                }
+            }
+        }
+
         trace_virtio_net_rss_enable(n->rss_data.hash_types,
                                     n->rss_data.indirections_len,
                                     sizeof(n->rss_data.key));
@@ -2831,9 +2917,9 @@ static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
    },
 };
 
-static bool max_queues_gt_1(void *opaque, int version_id)
+static bool max_queue_pairs_gt_1(void *opaque, int version_id)
 {
-    return VIRTIO_NET(opaque)->max_queues > 1;
+    return VIRTIO_NET(opaque)->max_queue_pairs > 1;
 }
 
 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
@@ -2858,13 +2944,13 @@ static bool mac_table_doesnt_fit(void *opaque, int version_id)
 struct VirtIONetMigTmp {
     VirtIONet      *parent;
     VirtIONetQueue *vqs_1;
-    uint16_t        curr_queues_1;
+    uint16_t        curr_queue_pairs_1;
     uint8_t         has_ufo;
     uint32_t        has_vnet_hdr;
 };
 
 /* The 2nd and subsequent tx_waiting flags are loaded later than
- * the 1st entry in the queues and only if there's more than one
+ * the 1st entry in the queue_pairs and only if there's more than one
  * entry.  We use the tmp mechanism to calculate a temporary
  * pointer and count and also validate the count.
  */
@@ -2874,9 +2960,9 @@ static int virtio_net_tx_waiting_pre_save(void *opaque)
     struct VirtIONetMigTmp *tmp = opaque;
 
     tmp->vqs_1 = tmp->parent->vqs + 1;
-    tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
-    if (tmp->parent->curr_queues == 0) {
-        tmp->curr_queues_1 = 0;
+    tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
+    if (tmp->parent->curr_queue_pairs == 0) {
+        tmp->curr_queue_pairs_1 = 0;
     }
 
     return 0;
@@ -2889,9 +2975,9 @@ static int virtio_net_tx_waiting_pre_load(void *opaque)
     /* Reuse the pointer setup from save */
     virtio_net_tx_waiting_pre_save(opaque);
 
-    if (tmp->parent->curr_queues > tmp->parent->max_queues) {
-        error_report("virtio-net: curr_queues %x > max_queues %x",
-            tmp->parent->curr_queues, tmp->parent->max_queues);
+    if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
+        error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
+            tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
 
         return -EINVAL;
     }
@@ -2905,7 +2991,7 @@ static const VMStateDescription vmstate_virtio_net_tx_waiting = {
     .pre_save  = virtio_net_tx_waiting_pre_save,
     .fields    = (VMStateField[]) {
         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
-                                     curr_queues_1,
+                                     curr_queue_pairs_1,
                                      vmstate_virtio_net_queue_tx_waiting,
                                      struct VirtIONetQueue),
         VMSTATE_END_OF_LIST()
@@ -3047,9 +3133,9 @@ static const VMStateDescription vmstate_virtio_net_device = {
         VMSTATE_UINT8(nobcast, VirtIONet),
         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
                          vmstate_virtio_net_has_ufo),
-        VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
+        VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
                             vmstate_info_uint16_equal, uint16_t),
-        VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
+        VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
                          vmstate_virtio_net_tx_waiting),
         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
@@ -3159,6 +3245,7 @@ static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
         }
         hotplug_handler_plug(hotplug_ctrl, dev, &err);
     }
+    pdev->partially_hotplugged = false;
 
 out:
     error_propagate(errp, err);
@@ -3203,7 +3290,9 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
 }
 
 static bool failover_hide_primary_device(DeviceListener *listener,
-                                         QemuOpts *device_opts)
+                                         const QDict *device_opts,
+                                         bool from_json,
+                                         Error **errp)
 {
     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
     const char *standby_id;
@@ -3211,11 +3300,42 @@ static bool failover_hide_primary_device(DeviceListener *listener,
     if (!device_opts) {
         return false;
     }
-    standby_id = qemu_opt_get(device_opts, "failover_pair_id");
+
+    if (!qdict_haskey(device_opts, "failover_pair_id")) {
+        return false;
+    }
+
+    if (!qdict_haskey(device_opts, "id")) {
+        error_setg(errp, "Device with failover_pair_id needs to have id");
+        return false;
+    }
+
+    standby_id = qdict_get_str(device_opts, "failover_pair_id");
     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
         return false;
     }
 
+    /*
+     * The hide helper can be called several times for a given device.
+     * Check there is only one primary for a virtio-net device but
+     * don't duplicate the qdict several times if it's called for the same
+     * device.
+     */
+    if (n->primary_opts) {
+        const char *old, *new;
+        /* devices with failover_pair_id always have an id */
+        old = qdict_get_str(n->primary_opts, "id");
+        new = qdict_get_str(device_opts, "id");
+        if (strcmp(old, new) != 0) {
+            error_setg(errp, "Cannot attach more than one primary device to "
+                       "'%s': '%s' and '%s'", n->netclient_name, old, new);
+            return false;
+        }
+    } else {
+        n->primary_opts = qdict_clone_shallow(device_opts);
+        n->primary_opts_from_json = from_json;
+    }
+
     /* failover_primary_hidden is set during feature negotiation */
     return qatomic_read(&n->failover_primary_hidden);
 }
@@ -3292,16 +3412,30 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
-    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
-        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
+    n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
+
+    /*
+     * Figure out the datapath queue pairs since the backend could
+     * provide control queue via peers as well.
+     */
+    if (n->nic_conf.peers.queues) {
+        for (i = 0; i < n->max_ncs; i++) {
+            if (n->nic_conf.peers.ncs[i]->is_datapath) {
+                ++n->max_queue_pairs;
+            }
+        }
+    }
+    n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
+
+    if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
+        error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
                    "must be a positive integer less than %d.",
-                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
+                   n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
         virtio_cleanup(vdev);
         return;
     }
-    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
-    n->curr_queues = 1;
+    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs);
+    n->curr_queue_pairs = 1;
     n->tx_timeout = n->net_conf.txtimer;
 
     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
@@ -3315,7 +3449,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
                                     n->net_conf.tx_queue_size);
 
-    for (i = 0; i < n->max_queues; i++) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
         virtio_net_add_queue(n, i);
     }
 
@@ -3339,13 +3473,13 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
                               object_get_typename(OBJECT(dev)), dev->id, n);
     }
 
-    for (i = 0; i < n->max_queues; i++) {
+    for (i = 0; i < n->max_queue_pairs; i++) {
         n->nic->ncs[i].do_not_pad = true;
     }
 
     peer_test_vnet_hdr(n);
     if (peer_has_vnet_hdr(n)) {
-        for (i = 0; i < n->max_queues; i++) {
+        for (i = 0; i < n->max_queue_pairs; i++) {
             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
         }
         n->host_hdr_len = sizeof(struct virtio_net_hdr);
@@ -3377,13 +3511,21 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     n->qdev = dev;
 
     net_rx_pkt_init(&n->rx_pkt, false);
+
+    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
+        virtio_net_load_ebpf(n);
+    }
 }
 
 static void virtio_net_device_unrealize(DeviceState *dev)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIONet *n = VIRTIO_NET(dev);
-    int i, max_queues;
+    int i, max_queue_pairs;
+
+    if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
+        virtio_net_unload_ebpf(n);
+    }
 
     /* This will stop vhost backend if appropriate. */
     virtio_net_set_status(vdev, 0);
@@ -3397,15 +3539,19 @@ static void virtio_net_device_unrealize(DeviceState *dev)
     g_free(n->vlans);
 
     if (n->failover) {
+        qobject_unref(n->primary_opts);
         device_listener_unregister(&n->primary_listener);
+        remove_migration_state_change_notifier(&n->migration_state);
+    } else {
+        assert(n->primary_opts == NULL);
     }
 
-    max_queues = n->multiqueue ? n->max_queues : 1;
-    for (i = 0; i < max_queues; i++) {
+    max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+    for (i = 0; i < max_queue_pairs; i++) {
         virtio_net_del_queue(n, i);
     }
     /* delete also control vq */
-    virtio_del_queue(vdev, max_queues * 2);
+    virtio_del_queue(vdev, max_queue_pairs * 2);
     qemu_announce_timer_del(&n->announce_timer, false);
     g_free(n->vqs);
     qemu_del_nic(n->nic);
@@ -3427,6 +3573,8 @@ static void virtio_net_instance_init(Object *obj)
     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
                                   "bootindex", "/ethernet-phy@0",
                                   DEVICE(n));
+
+    ebpf_rss_init(&n->ebpf_rss);
 }
 
 static int virtio_net_pre_save(void *opaque)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index eff299f6290..f65af4e9ef2 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -23,6 +23,7 @@
 #include "net/checksum.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bswap.h"
+#include "qemu/log.h"
 #include "qemu/module.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/msi.h"
@@ -1093,8 +1094,12 @@ vmxnet3_io_bar0_write(void *opaque, hwaddr addr,
         int tx_queue_idx =
             VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
                                      VMXNET3_REG_ALIGN);
-        assert(tx_queue_idx <= s->txq_num);
-        vmxnet3_process_tx_queue(s, tx_queue_idx);
+        if (tx_queue_idx <= s->txq_num) {
+            vmxnet3_process_tx_queue(s, tx_queue_idx);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Illegal TX queue %d/%d\n",
+                          tx_queue_idx, s->txq_num);
+        }
         return;
     }
 
@@ -1376,7 +1381,7 @@ static void vmxnet3_validate_interrupts(VMXNET3State *s)
     }
 }
 
-static void vmxnet3_validate_queues(VMXNET3State *s)
+static bool vmxnet3_validate_queues(VMXNET3State *s)
 {
     /*
     * txq_num and rxq_num are total number of queues
@@ -1385,12 +1390,18 @@ static void vmxnet3_validate_queues(VMXNET3State *s)
     */
 
     if (s->txq_num > VMXNET3_DEVICE_MAX_TX_QUEUES) {
-        hw_error("Bad TX queues number: %d\n", s->txq_num);
+        qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad TX queues number: %d\n",
+                      s->txq_num);
+        return false;
     }
 
     if (s->rxq_num > VMXNET3_DEVICE_MAX_RX_QUEUES) {
-        hw_error("Bad RX queues number: %d\n", s->rxq_num);
+        qemu_log_mask(LOG_GUEST_ERROR, "vmxnet3: Bad RX queues number: %d\n",
+                      s->rxq_num);
+        return false;
     }
+
+    return true;
 }
 
 static void vmxnet3_activate_device(VMXNET3State *s)
@@ -1414,12 +1425,23 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         return;
     }
 
+    s->txq_num =
+        VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
+    s->rxq_num =
+        VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
+
+    VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
+    if (!vmxnet3_validate_queues(s)) {
+        return;
+    }
+
     vmxnet3_adjust_by_guest_type(s);
     vmxnet3_update_features(s);
     vmxnet3_update_pm_state(s);
     vmxnet3_setup_rx_filtering(s);
     /* Cache fields from shared memory */
     s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu);
+    assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU);
     VMW_CFPRN("MTU is %u", s->mtu);
 
     s->max_rx_frags =
@@ -1440,14 +1462,6 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.intrConf.autoMask);
     VMW_CFPRN("Automatic interrupt masking is %d", (int)s->auto_int_masking);
 
-    s->txq_num =
-        VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numTxQueues);
-    s->rxq_num =
-        VMXNET3_READ_DRV_SHARED8(d, s->drv_shmem, devRead.misc.numRxQueues);
-
-    VMW_CFPRN("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
-    vmxnet3_validate_queues(s);
-
     qdescr_table_pa =
         VMXNET3_READ_DRV_SHARED64(d, s->drv_shmem, devRead.misc.queueDescPA);
     VMW_CFPRN("TX queues descriptors table is at 0x%" PRIx64, qdescr_table_pa);
@@ -1473,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* Read rings memory locations for TX queues */
         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA);
         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize);
+        if (size > VMXNET3_TX_RING_MAX_SIZE) {
+            size = VMXNET3_TX_RING_MAX_SIZE;
+        }
 
         vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size,
                           sizeof(struct Vmxnet3_TxDesc), false);
@@ -1483,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* TXC ring */
         pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA);
         size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize);
+        if (size > VMXNET3_TC_RING_MAX_SIZE) {
+            size = VMXNET3_TC_RING_MAX_SIZE;
+        }
         vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size,
                           sizeof(struct Vmxnet3_TxCompDesc), true);
         VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring);
@@ -1524,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
             /* RX rings */
             pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]);
             size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]);
+            if (size > VMXNET3_RX_RING_MAX_SIZE) {
+                size = VMXNET3_RX_RING_MAX_SIZE;
+            }
             vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size,
                               sizeof(struct Vmxnet3_RxDesc), false);
             VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
@@ -1533,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s)
         /* RXC ring */
         pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA);
         size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize);
+        if (size > VMXNET3_RC_RING_MAX_SIZE) {
+            size = VMXNET3_RC_RING_MAX_SIZE;
+        }
         vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size,
                           sizeof(struct Vmxnet3_RxCompDesc), true);
         VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
@@ -2399,7 +2425,9 @@ static int vmxnet3_post_load(void *opaque, int version_id)
         }
     }
 
-    vmxnet3_validate_queues(s);
+    if (!vmxnet3_validate_queues(s)) {
+        return -1;
+    }
     vmxnet3_validate_interrupts(s);
 
     return 0;
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index 00859a7d508..0ab6ae91aa1 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -29,7 +29,6 @@
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "net/net.h"
 #include "qom/object.h"
diff --git a/hw/nios2/10m50_devboard.c b/hw/nios2/10m50_devboard.c
index a14fc31e86b..3d1205b8bd7 100644
--- a/hw/nios2/10m50_devboard.c
+++ b/hw/nios2/10m50_devboard.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 
 #include "hw/sysbus.h"
 #include "hw/char/serial.h"
diff --git a/hw/nios2/boot.c b/hw/nios2/boot.c
index d9969ac1482..5b3e4efed5b 100644
--- a/hw/nios2/boot.c
+++ b/hw/nios2/boot.c
@@ -32,13 +32,11 @@
 #include "qemu/units.h"
 #include "qemu-common.h"
 #include "qemu/datadir.h"
-#include "cpu.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/reset.h"
-#include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "elf.h"
diff --git a/hw/nios2/generic_nommu.c b/hw/nios2/generic_nommu.c
index 19899e2c1ef..fbc18dbd04c 100644
--- a/hw/nios2/generic_nommu.c
+++ b/hw/nios2/generic_nommu.c
@@ -29,9 +29,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h"
 
-#include "hw/sysbus.h"
 #include "hw/char/serial.h"
 #include "hw/boards.h"
 #include "exec/memory.h"
diff --git a/hw/nubus/mac-nubus-bridge.c b/hw/nubus/mac-nubus-bridge.c
index 7c329300b82..a0da5a8b2fa 100644
--- a/hw/nubus/mac-nubus-bridge.c
+++ b/hw/nubus/mac-nubus-bridge.c
@@ -1,5 +1,7 @@
 /*
- *  Copyright (c) 2013-2018 Laurent Vivier <laurent@vivier.eu>
+ * QEMU Macintosh Nubus
+ *
+ * Copyright (c) 2013-2018 Laurent Vivier <laurent@vivier.eu>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -13,13 +15,29 @@
 
 static void mac_nubus_bridge_init(Object *obj)
 {
-    MacNubusState *s = MAC_NUBUS_BRIDGE(obj);
+    MacNubusBridge *s = MAC_NUBUS_BRIDGE(obj);
+    NubusBridge *nb = NUBUS_BRIDGE(obj);
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
-
-    s->bus = NUBUS_BUS(qbus_create(TYPE_NUBUS_BUS, DEVICE(s), NULL));
-
-    sysbus_init_mmio(sbd, &s->bus->super_slot_io);
-    sysbus_init_mmio(sbd, &s->bus->slot_io);
+    NubusBus *bus = &nb->bus;
+
+    /* Macintosh only has slots 0x9 to 0xe available */
+    bus->slot_available_mask = MAKE_64BIT_MASK(MAC_NUBUS_FIRST_SLOT,
+                                               MAC_NUBUS_SLOT_NB);
+
+    /* Aliases for slots 0x9 to 0xe */
+    memory_region_init_alias(&s->super_slot_alias, obj, "super-slot-alias",
+                             &bus->nubus_mr,
+                             MAC_NUBUS_FIRST_SLOT * NUBUS_SUPER_SLOT_SIZE,
+                             MAC_NUBUS_SLOT_NB * NUBUS_SUPER_SLOT_SIZE);
+
+    memory_region_init_alias(&s->slot_alias, obj, "slot-alias",
+                             &bus->nubus_mr,
+                             NUBUS_SLOT_BASE +
+                             MAC_NUBUS_FIRST_SLOT * NUBUS_SLOT_SIZE,
+                             MAC_NUBUS_SLOT_NB * NUBUS_SLOT_SIZE);
+
+    sysbus_init_mmio(sbd, &s->super_slot_alias);
+    sysbus_init_mmio(sbd, &s->slot_alias);
 }
 
 static void mac_nubus_bridge_class_init(ObjectClass *klass, void *data)
@@ -33,7 +51,7 @@ static const TypeInfo mac_nubus_bridge_info = {
     .name          = TYPE_MAC_NUBUS_BRIDGE,
     .parent        = TYPE_NUBUS_BRIDGE,
     .instance_init = mac_nubus_bridge_init,
-    .instance_size = sizeof(MacNubusState),
+    .instance_size = sizeof(MacNubusBridge),
     .class_init    = mac_nubus_bridge_class_init,
 };
 
diff --git a/hw/nubus/nubus-bridge.c b/hw/nubus/nubus-bridge.c
index cd8c6a91eb5..a42c86080f2 100644
--- a/hw/nubus/nubus-bridge.c
+++ b/hw/nubus/nubus-bridge.c
@@ -1,5 +1,5 @@
 /*
- * QEMU Macintosh Nubus
+ * QEMU Nubus
  *
  * Copyright (c) 2013-2018 Laurent Vivier <laurent@vivier.eu>
  *
@@ -12,17 +12,36 @@
 #include "hw/sysbus.h"
 #include "hw/nubus/nubus.h"
 
+
+static void nubus_bridge_init(Object *obj)
+{
+    NubusBridge *s = NUBUS_BRIDGE(obj);
+    NubusBus *bus = &s->bus;
+
+    qbus_init(bus, sizeof(s->bus), TYPE_NUBUS_BUS, DEVICE(s), NULL);
+
+    qdev_init_gpio_out(DEVICE(s), bus->irqs, NUBUS_IRQS);
+}
+
+static Property nubus_bridge_properties[] = {
+    DEFINE_PROP_UINT16("slot-available-mask", NubusBridge,
+                       bus.slot_available_mask, 0xffff),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void nubus_bridge_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->fw_name = "nubus";
+    device_class_set_props(dc, nubus_bridge_properties);
 }
 
 static const TypeInfo nubus_bridge_info = {
     .name          = TYPE_NUBUS_BRIDGE,
     .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(SysBusDevice),
+    .instance_init = nubus_bridge_init,
+    .instance_size = sizeof(NubusBridge),
     .class_init    = nubus_bridge_class_init,
 };
 
diff --git a/hw/nubus/nubus-bus.c b/hw/nubus/nubus-bus.c
index 942a6d5342d..07c279bde5c 100644
--- a/hw/nubus/nubus-bus.c
+++ b/hw/nubus/nubus-bus.c
@@ -8,10 +8,18 @@
  *
  */
 
+/*
+ * References:
+ *   Nubus Specification (TI)
+ *     http://www.bitsavers.org/pdf/ti/nubus/2242825-0001_NuBus_Spec1983.pdf
+ *
+ *   Designing Cards and Drivers for the Macintosh Family (Apple)
+ */
+
 #include "qemu/osdep.h"
 #include "hw/nubus/nubus.h"
-#include "hw/sysbus.h"
 #include "qapi/error.h"
+#include "trace.h"
 
 
 static NubusBus *nubus_find(void)
@@ -20,72 +28,138 @@ static NubusBus *nubus_find(void)
     return NUBUS_BUS(object_resolve_path_type("", TYPE_NUBUS_BUS, NULL));
 }
 
-static void nubus_slot_write(void *opaque, hwaddr addr, uint64_t val,
-                             unsigned int size)
+static MemTxResult nubus_slot_write(void *opaque, hwaddr addr, uint64_t val,
+                                    unsigned size, MemTxAttrs attrs)
 {
-    /* read only */
+    trace_nubus_slot_write(addr, val, size);
+    return MEMTX_DECODE_ERROR;
 }
 
-
-static uint64_t nubus_slot_read(void *opaque, hwaddr addr,
-                                unsigned int size)
+static MemTxResult nubus_slot_read(void *opaque, hwaddr addr, uint64_t *data,
+                                   unsigned size, MemTxAttrs attrs)
 {
-    return 0;
+    trace_nubus_slot_read(addr, size);
+    return MEMTX_DECODE_ERROR;
 }
 
 static const MemoryRegionOps nubus_slot_ops = {
-    .read  = nubus_slot_read,
-    .write = nubus_slot_write,
+    .read_with_attrs  = nubus_slot_read,
+    .write_with_attrs = nubus_slot_write,
     .endianness = DEVICE_BIG_ENDIAN,
     .valid = {
         .min_access_size = 1,
-        .max_access_size = 1,
+        .max_access_size = 4,
     },
 };
 
-static void nubus_super_slot_write(void *opaque, hwaddr addr, uint64_t val,
-                                   unsigned int size)
+static MemTxResult nubus_super_slot_write(void *opaque, hwaddr addr,
+                                          uint64_t val, unsigned size,
+                                          MemTxAttrs attrs)
 {
-    /* read only */
+    trace_nubus_super_slot_write(addr, val, size);
+    return MEMTX_DECODE_ERROR;
 }
 
-static uint64_t nubus_super_slot_read(void *opaque, hwaddr addr,
-                                      unsigned int size)
+static MemTxResult nubus_super_slot_read(void *opaque, hwaddr addr,
+                                         uint64_t *data, unsigned size,
+                                         MemTxAttrs attrs)
 {
-    return 0;
+    trace_nubus_super_slot_read(addr, size);
+    return MEMTX_DECODE_ERROR;
 }
 
 static const MemoryRegionOps nubus_super_slot_ops = {
-    .read  = nubus_super_slot_read,
-    .write = nubus_super_slot_write,
+    .read_with_attrs = nubus_super_slot_read,
+    .write_with_attrs = nubus_super_slot_write,
     .endianness = DEVICE_BIG_ENDIAN,
     .valid = {
         .min_access_size = 1,
-        .max_access_size = 1,
+        .max_access_size = 4,
     },
 };
 
+static void nubus_unrealize(BusState *bus)
+{
+    NubusBus *nubus = NUBUS_BUS(bus);
+
+    address_space_destroy(&nubus->nubus_as);
+}
+
 static void nubus_realize(BusState *bus, Error **errp)
 {
+    NubusBus *nubus = NUBUS_BUS(bus);
+
     if (!nubus_find()) {
         error_setg(errp, "at most one %s device is permitted", TYPE_NUBUS_BUS);
         return;
     }
+
+    address_space_init(&nubus->nubus_as, &nubus->nubus_mr, "nubus");
 }
 
 static void nubus_init(Object *obj)
 {
     NubusBus *nubus = NUBUS_BUS(obj);
 
+    memory_region_init(&nubus->nubus_mr, obj, "nubus", 0x100000000);
+
     memory_region_init_io(&nubus->super_slot_io, obj, &nubus_super_slot_ops,
                           nubus, "nubus-super-slots",
-                          NUBUS_SUPER_SLOT_NB * NUBUS_SUPER_SLOT_SIZE);
+                          (NUBUS_SUPER_SLOT_NB + 1) * NUBUS_SUPER_SLOT_SIZE);
+    memory_region_add_subregion(&nubus->nubus_mr, 0x0, &nubus->super_slot_io);
 
     memory_region_init_io(&nubus->slot_io, obj, &nubus_slot_ops,
                           nubus, "nubus-slots",
                           NUBUS_SLOT_NB * NUBUS_SLOT_SIZE);
+    memory_region_add_subregion(&nubus->nubus_mr,
+                                (NUBUS_SUPER_SLOT_NB + 1) *
+                                NUBUS_SUPER_SLOT_SIZE, &nubus->slot_io);
+
+    nubus->slot_available_mask = MAKE_64BIT_MASK(NUBUS_FIRST_SLOT,
+                                                 NUBUS_SLOT_NB);
+}
+
+static char *nubus_get_dev_path(DeviceState *dev)
+{
+    NubusDevice *nd = NUBUS_DEVICE(dev);
+    BusState *bus = qdev_get_parent_bus(dev);
+    char *p = qdev_get_dev_path(bus->parent);
+
+    if (p) {
+        char *ret = g_strdup_printf("%s/%s/%02x", p, bus->name, nd->slot);
+        g_free(p);
+        return ret;
+    } else {
+        return g_strdup_printf("%s/%02x", bus->name, nd->slot);
+    }
+}
+
+static bool nubus_check_address(BusState *bus, DeviceState *dev, Error **errp)
+{
+    NubusDevice *nd = NUBUS_DEVICE(dev);
+    NubusBus *nubus = NUBUS_BUS(bus);
+
+    if (nd->slot == -1) {
+        /* No slot specified, find first available free slot */
+        int s = ctz32(nubus->slot_available_mask);
+        if (s != 32) {
+            nd->slot = s;
+        } else {
+            error_setg(errp, "Cannot register nubus card, no free slot "
+                             "available");
+            return false;
+        }
+    } else {
+        /* Slot specified, make sure the slot is available */
+        if (!(nubus->slot_available_mask & BIT(nd->slot))) {
+            error_setg(errp, "Cannot register nubus card, slot %d is "
+                             "unavailable or already occupied", nd->slot);
+            return false;
+        }
+    }
 
-    nubus->current_slot = NUBUS_FIRST_SLOT;
+    nubus->slot_available_mask &= ~BIT(nd->slot);
+    return true;
 }
 
 static void nubus_class_init(ObjectClass *oc, void *data)
@@ -93,6 +167,9 @@ static void nubus_class_init(ObjectClass *oc, void *data)
     BusClass *bc = BUS_CLASS(oc);
 
     bc->realize = nubus_realize;
+    bc->unrealize = nubus_unrealize;
+    bc->check_address = nubus_check_address;
+    bc->get_dev_path = nubus_get_dev_path;
 }
 
 static const TypeInfo nubus_bus_info = {
diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c
index ffe78a88231..0f1852f671e 100644
--- a/hw/nubus/nubus-device.c
+++ b/hw/nubus/nubus-device.c
@@ -9,194 +9,99 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/datadir.h"
+#include "hw/irq.h"
+#include "hw/loader.h"
 #include "hw/nubus/nubus.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 
 
-/* The Format Block Structure */
-
-#define FBLOCK_DIRECTORY_OFFSET 0
-#define FBLOCK_LENGTH           4
-#define FBLOCK_CRC              8
-#define FBLOCK_REVISION_LEVEL   12
-#define FBLOCK_FORMAT           13
-#define FBLOCK_TEST_PATTERN     14
-#define FBLOCK_RESERVED         18
-#define FBLOCK_BYTE_LANES       19
-
-#define FBLOCK_SIZE             20
-#define FBLOCK_PATTERN_VAL      0x5a932bc7
-
-static uint64_t nubus_fblock_read(void *opaque, hwaddr addr, unsigned int size)
-{
-    NubusDevice *dev = opaque;
-    uint64_t val;
-
-#define BYTE(v, b) (((v) >> (24 - 8 * (b))) & 0xff)
-    switch (addr) {
-    case FBLOCK_BYTE_LANES:
-        val = dev->byte_lanes;
-        val |= (val ^ 0xf) << 4;
-        break;
-    case FBLOCK_RESERVED:
-        val = 0x00;
-        break;
-    case FBLOCK_TEST_PATTERN...FBLOCK_TEST_PATTERN + 3:
-        val = BYTE(FBLOCK_PATTERN_VAL, addr - FBLOCK_TEST_PATTERN);
-        break;
-    case FBLOCK_FORMAT:
-        val = dev->rom_format;
-        break;
-    case FBLOCK_REVISION_LEVEL:
-        val = dev->rom_rev;
-        break;
-    case FBLOCK_CRC...FBLOCK_CRC + 3:
-        val = BYTE(dev->rom_crc, addr - FBLOCK_CRC);
-        break;
-    case FBLOCK_LENGTH...FBLOCK_LENGTH + 3:
-        val = BYTE(dev->rom_length, addr - FBLOCK_LENGTH);
-        break;
-    case FBLOCK_DIRECTORY_OFFSET...FBLOCK_DIRECTORY_OFFSET + 3:
-        val = BYTE(dev->directory_offset, addr - FBLOCK_DIRECTORY_OFFSET);
-        break;
-    default:
-        val = 0;
-        break;
-    }
-    return val;
-}
-
-static void nubus_fblock_write(void *opaque, hwaddr addr, uint64_t val,
-                               unsigned int size)
-{
-    /* read only */
-}
-
-static const MemoryRegionOps nubus_format_block_ops = {
-    .read = nubus_fblock_read,
-    .write = nubus_fblock_write,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .valid = {
-        .min_access_size = 1,
-        .max_access_size = 1,
-    }
-};
-
-static void nubus_register_format_block(NubusDevice *dev)
-{
-    char *fblock_name;
-
-    fblock_name = g_strdup_printf("nubus-slot-%d-format-block",
-                                  dev->slot_nb);
-
-    hwaddr fblock_offset = memory_region_size(&dev->slot_mem) - FBLOCK_SIZE;
-    memory_region_init_io(&dev->fblock_io, NULL, &nubus_format_block_ops,
-                          dev, fblock_name, FBLOCK_SIZE);
-    memory_region_add_subregion(&dev->slot_mem, fblock_offset,
-                                &dev->fblock_io);
-
-    g_free(fblock_name);
-}
-
-static void mac_nubus_rom_write(void *opaque, hwaddr addr, uint64_t val,
-                                       unsigned int size)
-{
-    /* read only */
-}
-
-static uint64_t mac_nubus_rom_read(void *opaque, hwaddr addr,
-                                    unsigned int size)
+void nubus_set_irq(NubusDevice *nd, int level)
 {
-    NubusDevice *dev = opaque;
+    NubusBus *nubus = NUBUS_BUS(qdev_get_parent_bus(DEVICE(nd)));
 
-    return dev->rom[addr];
-}
-
-static const MemoryRegionOps mac_nubus_rom_ops = {
-    .read  = mac_nubus_rom_read,
-    .write = mac_nubus_rom_write,
-    .endianness = DEVICE_BIG_ENDIAN,
-    .valid = {
-        .min_access_size = 1,
-        .max_access_size = 1,
-    },
-};
-
-
-void nubus_register_rom(NubusDevice *dev, const uint8_t *rom, uint32_t size,
-                        int revision, int format, uint8_t byte_lanes)
-{
-    hwaddr rom_offset;
-    char *rom_name;
-
-    /* FIXME : really compute CRC */
-    dev->rom_length = 0;
-    dev->rom_crc = 0;
-
-    dev->rom_rev = revision;
-    dev->rom_format = format;
-
-    dev->byte_lanes = byte_lanes;
-    dev->directory_offset = -size;
-
-    /* ROM */
-
-    dev->rom = rom;
-    rom_name = g_strdup_printf("nubus-slot-%d-rom", dev->slot_nb);
-    memory_region_init_io(&dev->rom_io, NULL, &mac_nubus_rom_ops,
-                          dev, rom_name, size);
-    memory_region_set_readonly(&dev->rom_io, true);
-
-    rom_offset = memory_region_size(&dev->slot_mem) - FBLOCK_SIZE +
-                 dev->directory_offset;
-    memory_region_add_subregion(&dev->slot_mem, rom_offset, &dev->rom_io);
-
-    g_free(rom_name);
+    qemu_set_irq(nubus->irqs[nd->slot], level);
 }
 
 static void nubus_device_realize(DeviceState *dev, Error **errp)
 {
     NubusBus *nubus = NUBUS_BUS(qdev_get_parent_bus(dev));
     NubusDevice *nd = NUBUS_DEVICE(dev);
-    char *name;
+    char *name, *path;
     hwaddr slot_offset;
+    int64_t size;
+    int ret;
 
-    if (nubus->current_slot < NUBUS_FIRST_SLOT ||
-            nubus->current_slot > NUBUS_LAST_SLOT) {
-        error_setg(errp, "Cannot register nubus card, not enough slots");
-        return;
-    }
+    /* Super */
+    slot_offset = nd->slot * NUBUS_SUPER_SLOT_SIZE;
 
-    nd->slot_nb = nubus->current_slot++;
-    name = g_strdup_printf("nubus-slot-%d", nd->slot_nb);
+    name = g_strdup_printf("nubus-super-slot-%x", nd->slot);
+    memory_region_init(&nd->super_slot_mem, OBJECT(dev), name,
+                       NUBUS_SUPER_SLOT_SIZE);
+    memory_region_add_subregion(&nubus->super_slot_io, slot_offset,
+                                &nd->super_slot_mem);
+    g_free(name);
 
-    if (nd->slot_nb < NUBUS_FIRST_SLOT) {
-        /* Super */
-        slot_offset = (nd->slot_nb - 6) * NUBUS_SUPER_SLOT_SIZE;
+    /* Normal */
+    slot_offset = nd->slot * NUBUS_SLOT_SIZE;
 
-        memory_region_init(&nd->slot_mem, OBJECT(dev), name,
-                           NUBUS_SUPER_SLOT_SIZE);
-        memory_region_add_subregion(&nubus->super_slot_io, slot_offset,
-                                    &nd->slot_mem);
-    } else {
-        /* Normal */
-        slot_offset = nd->slot_nb * NUBUS_SLOT_SIZE;
+    name = g_strdup_printf("nubus-slot-%x", nd->slot);
+    memory_region_init(&nd->slot_mem, OBJECT(dev), name, NUBUS_SLOT_SIZE);
+    memory_region_add_subregion(&nubus->slot_io, slot_offset,
+                                &nd->slot_mem);
+    g_free(name);
 
-        memory_region_init(&nd->slot_mem, OBJECT(dev), name, NUBUS_SLOT_SIZE);
-        memory_region_add_subregion(&nubus->slot_io, slot_offset,
-                                    &nd->slot_mem);
+    /* Declaration ROM */
+    if (nd->romfile != NULL) {
+        path = qemu_find_file(QEMU_FILE_TYPE_BIOS, nd->romfile);
+        if (path == NULL) {
+            path = g_strdup(nd->romfile);
+        }
+
+        size = get_image_size(path);
+        if (size < 0) {
+            error_setg(errp, "failed to find romfile \"%s\"", nd->romfile);
+            g_free(path);
+            return;
+        } else if (size == 0) {
+            error_setg(errp, "romfile \"%s\" is empty", nd->romfile);
+            g_free(path);
+            return;
+        } else if (size > NUBUS_DECL_ROM_MAX_SIZE) {
+            error_setg(errp, "romfile \"%s\" too large (maximum size 128K)",
+                       nd->romfile);
+            g_free(path);
+            return;
+        }
+
+        name = g_strdup_printf("nubus-slot-%x-declaration-rom", nd->slot);
+        memory_region_init_rom(&nd->decl_rom, OBJECT(dev), name, size,
+                               &error_abort);
+        ret = load_image_mr(path, &nd->decl_rom);
+        g_free(path);
+        if (ret < 0) {
+            error_setg(errp, "could not load romfile \"%s\"", nd->romfile);
+            return;
+        }
+        memory_region_add_subregion(&nd->slot_mem, NUBUS_SLOT_SIZE - size,
+                                    &nd->decl_rom);
     }
-
-    g_free(name);
-    nubus_register_format_block(nd);
 }
 
+static Property nubus_device_properties[] = {
+    DEFINE_PROP_INT32("slot", NubusDevice, slot, -1),
+    DEFINE_PROP_STRING("romfile", NubusDevice, romfile),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void nubus_device_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = nubus_device_realize;
     dc->bus_type = TYPE_NUBUS_BUS;
+    device_class_set_props(dc, nubus_device_properties);
 }
 
 static const TypeInfo nubus_device_type_info = {
diff --git a/hw/nubus/trace-events b/hw/nubus/trace-events
new file mode 100644
index 00000000000..e31833d694a
--- /dev/null
+++ b/hw/nubus/trace-events
@@ -0,0 +1,7 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# nubus-bus.c
+nubus_slot_read(uint64_t addr, int size) "reading unassigned addr 0x%"PRIx64 " size %d"
+nubus_slot_write(uint64_t addr, uint64_t val, int size) "writing unassigned addr 0x%"PRIx64 " value 0x%"PRIx64 " size %d"
+nubus_super_slot_read(uint64_t addr, int size) "reading unassigned addr 0x%"PRIx64 " size %d"
+nubus_super_slot_write(uint64_t addr, uint64_t val, int size) "writing unassigned addr 0x%"PRIx64 " value 0x%"PRIx64 " size %d"
diff --git a/hw/nubus/trace.h b/hw/nubus/trace.h
new file mode 100644
index 00000000000..3749420da17
--- /dev/null
+++ b/hw/nubus/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_nubus.h"
diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig
new file mode 100644
index 00000000000..8ac90942e55
--- /dev/null
+++ b/hw/nvme/Kconfig
@@ -0,0 +1,4 @@
+config NVME_PCI
+    bool
+    default y if PCI_DEVICES
+    depends on PCI
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
new file mode 100644
index 00000000000..5f573c417b3
--- /dev/null
+++ b/hw/nvme/ctrl.c
@@ -0,0 +1,6731 @@
+/*
+ * QEMU NVM Express Controller
+ *
+ * Copyright (c) 2012, Intel Corporation
+ *
+ * Written by Keith Busch <keith.busch@intel.com>
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+/**
+ * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e
+ *
+ *  https://nvmexpress.org/developers/nvme-specification/
+ *
+ *
+ * Notes on coding style
+ * ---------------------
+ * While QEMU coding style prefers lowercase hexadecimals in constants, the
+ * NVMe subsystem use thes format from the NVMe specifications in the comments
+ * (i.e. 'h' suffix instead of '0x' prefix).
+ *
+ * Usage
+ * -----
+ * See docs/system/nvme.rst for extensive documentation.
+ *
+ * Add options:
+ *      -drive file=<file>,if=none,id=<drive_id>
+ *      -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id>
+ *      -device nvme,serial=<serial>,id=<bus_name>, \
+ *              cmb_size_mb=<cmb_size_mb[optional]>, \
+ *              [pmrdev=<mem_backend_file_id>,] \
+ *              max_ioqpairs=<N[optional]>, \
+ *              aerl=<N[optional]>,aer_max_queued=<N[optional]>, \
+ *              mdts=<N[optional]>,vsl=<N[optional]>, \
+ *              zoned.zasl=<N[optional]>, \
+ *              zoned.auto_transition=<on|off[optional]>, \
+ *              subsys=<subsys_id>
+ *      -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
+ *              zoned=<true|false[optional]>, \
+ *              subsys=<subsys_id>,detached=<true|false[optional]>
+ *
+ * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
+ * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the
+ * device will use the "v1.4 CMB scheme" - use the `legacy-cmb` parameter to
+ * always enable the CMBLOC and CMBSZ registers (v1.3 behavior).
+ *
+ * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
+ * For example:
+ * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
+ *  size=<size> .... -device nvme,...,pmrdev=<mem_id>
+ *
+ * The PMR will use BAR 4/5 exclusively.
+ *
+ * To place controller(s) and namespace(s) to a subsystem, then provide
+ * nvme-subsys device as above.
+ *
+ * nvme subsystem device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * - `nqn`
+ *   This parameter provides the `<nqn_id>` part of the string
+ *   `nqn.2019-08.org.qemu:<nqn_id>` which will be reported in the SUBNQN field
+ *   of subsystem controllers. Note that `<nqn_id>` should be unique per
+ *   subsystem, but this is not enforced by QEMU. If not specified, it will
+ *   default to the value of the `id` parameter (`<subsys_id>`).
+ *
+ * nvme device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ * - `subsys`
+ *   Specifying this parameter attaches the controller to the subsystem and
+ *   the SUBNQN field in the controller will report the NQN of the subsystem
+ *   device. This also enables multi controller capability represented in
+ *   Identify Controller data structure in CMIC (Controller Multi-path I/O and
+ *   Namesapce Sharing Capabilities).
+ *
+ * - `aerl`
+ *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
+ *   of concurrently outstanding Asynchronous Event Request commands support
+ *   by the controller. This is a 0's based value.
+ *
+ * - `aer_max_queued`
+ *   This is the maximum number of events that the device will enqueue for
+ *   completion when there are no outstanding AERs. When the maximum number of
+ *   enqueued events are reached, subsequent events will be dropped.
+ *
+ * - `mdts`
+ *   Indicates the maximum data transfer size for a command that transfers data
+ *   between host-accessible memory and the controller. The value is specified
+ *   as a power of two (2^n) and is in units of the minimum memory page size
+ *   (CAP.MPSMIN). The default value is 7 (i.e. 512 KiB).
+ *
+ * - `vsl`
+ *   Indicates the maximum data size limit for the Verify command. Like `mdts`,
+ *   this value is specified as a power of two (2^n) and is in units of the
+ *   minimum memory page size (CAP.MPSMIN). The default value is 7 (i.e. 512
+ *   KiB).
+ *
+ * - `zoned.zasl`
+ *   Indicates the maximum data transfer size for the Zone Append command. Like
+ *   `mdts`, the value is specified as a power of two (2^n) and is in units of
+ *   the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
+ *   defaulting to the value of `mdts`).
+ *
+ * - `zoned.auto_transition`
+ *   Indicates if zones in zone state implicitly opened can be automatically
+ *   transitioned to zone state closed for resource management purposes.
+ *   Defaults to 'on'.
+ *
+ * nvme namespace device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * - `shared`
+ *   When the parent nvme device (as defined explicitly by the 'bus' parameter
+ *   or implicitly by the most recently defined NvmeBus) is linked to an
+ *   nvme-subsys device, the namespace will be attached to all controllers in
+ *   the subsystem. If set to 'off' (the default), the namespace will remain a
+ *   private namespace and may only be attached to a single controller at a
+ *   time.
+ *
+ * - `detached`
+ *   This parameter is only valid together with the `subsys` parameter. If left
+ *   at the default value (`false/off`), the namespace will be attached to all
+ *   controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
+ *   namespace will be available in the subsystem but not attached to any
+ *   controllers.
+ *
+ * Setting `zoned` to true selects Zoned Command Set at the namespace.
+ * In this case, the following namespace properties are available to configure
+ * zoned operation:
+ *     zoned.zone_size=<zone size in bytes, default: 128MiB>
+ *         The number may be followed by K, M, G as in kilo-, mega- or giga-.
+ *
+ *     zoned.zone_capacity=<zone capacity in bytes, default: zone size>
+ *         The value 0 (default) forces zone capacity to be the same as zone
+ *         size. The value of this property may not exceed zone size.
+ *
+ *     zoned.descr_ext_size=<zone descriptor extension size, default 0>
+ *         This value needs to be specified in 64B units. If it is zero,
+ *         namespace(s) will not support zone descriptor extensions.
+ *
+ *     zoned.max_active=<Maximum Active Resources (zones), default: 0>
+ *         The default value means there is no limit to the number of
+ *         concurrently active zones.
+ *
+ *     zoned.max_open=<Maximum Open Resources (zones), default: 0>
+ *         The default value means there is no limit to the number of
+ *         concurrently open zones.
+ *
+ *     zoned.cross_read=<enable RAZB, default: false>
+ *         Setting this property to true enables Read Across Zone Boundaries.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/hostmem.h"
+#include "hw/pci/msix.h"
+#include "migration/vmstate.h"
+
+#include "nvme.h"
+#include "trace.h"
+
+#define NVME_MAX_IOQPAIRS 0xffff
+#define NVME_DB_SIZE  4
+#define NVME_SPEC_VER 0x00010400
+#define NVME_CMB_BIR 2
+#define NVME_PMR_BIR 4
+#define NVME_TEMPERATURE 0x143
+#define NVME_TEMPERATURE_WARNING 0x157
+#define NVME_TEMPERATURE_CRITICAL 0x175
+#define NVME_NUM_FW_SLOTS 1
+#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
+
+#define NVME_GUEST_ERR(trace, fmt, ...) \
+    do { \
+        (trace_##trace)(__VA_ARGS__); \
+        qemu_log_mask(LOG_GUEST_ERROR, #trace \
+            " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
+    } while (0)
+
+static const bool nvme_feature_support[NVME_FID_MAX] = {
+    [NVME_ARBITRATION]              = true,
+    [NVME_POWER_MANAGEMENT]         = true,
+    [NVME_TEMPERATURE_THRESHOLD]    = true,
+    [NVME_ERROR_RECOVERY]           = true,
+    [NVME_VOLATILE_WRITE_CACHE]     = true,
+    [NVME_NUMBER_OF_QUEUES]         = true,
+    [NVME_INTERRUPT_COALESCING]     = true,
+    [NVME_INTERRUPT_VECTOR_CONF]    = true,
+    [NVME_WRITE_ATOMICITY]          = true,
+    [NVME_ASYNCHRONOUS_EVENT_CONF]  = true,
+    [NVME_TIMESTAMP]                = true,
+    [NVME_COMMAND_SET_PROFILE]      = true,
+};
+
+static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
+    [NVME_TEMPERATURE_THRESHOLD]    = NVME_FEAT_CAP_CHANGE,
+    [NVME_ERROR_RECOVERY]           = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
+    [NVME_VOLATILE_WRITE_CACHE]     = NVME_FEAT_CAP_CHANGE,
+    [NVME_NUMBER_OF_QUEUES]         = NVME_FEAT_CAP_CHANGE,
+    [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
+    [NVME_TIMESTAMP]                = NVME_FEAT_CAP_CHANGE,
+    [NVME_COMMAND_SET_PROFILE]      = NVME_FEAT_CAP_CHANGE,
+};
+
+static const uint32_t nvme_cse_acs[256] = {
+    [NVME_ADM_CMD_DELETE_SQ]        = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_CREATE_SQ]        = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_GET_LOG_PAGE]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_DELETE_CQ]        = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_CREATE_CQ]        = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_IDENTIFY]         = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_ABORT]            = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_SET_FEATURES]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_GET_FEATURES]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_ASYNC_EV_REQ]     = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
+    [NVME_ADM_CMD_FORMAT_NVM]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+};
+
+static const uint32_t nvme_cse_iocs_none[256];
+
+static const uint32_t nvme_cse_iocs_nvm[256] = {
+    [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_READ]                 = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_DSM]                  = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_VERIFY]               = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_COPY]                 = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_COMPARE]              = NVME_CMD_EFF_CSUPP,
+};
+
+static const uint32_t nvme_cse_iocs_zoned[256] = {
+    [NVME_CMD_FLUSH]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_WRITE_ZEROES]         = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_WRITE]                = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_READ]                 = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_DSM]                  = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_VERIFY]               = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_COPY]                 = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_COMPARE]              = NVME_CMD_EFF_CSUPP,
+    [NVME_CMD_ZONE_APPEND]          = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_ZONE_MGMT_SEND]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_CMD_ZONE_MGMT_RECV]       = NVME_CMD_EFF_CSUPP,
+};
+
+static void nvme_process_sq(void *opaque);
+
+static uint16_t nvme_sqid(NvmeRequest *req)
+{
+    return le16_to_cpu(req->sq->sqid);
+}
+
+static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
+                                   NvmeZoneState state)
+{
+    if (QTAILQ_IN_USE(zone, entry)) {
+        switch (nvme_get_zone_state(zone)) {
+        case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+            QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
+            break;
+        case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+            QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
+            break;
+        case NVME_ZONE_STATE_CLOSED:
+            QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
+            break;
+        case NVME_ZONE_STATE_FULL:
+            QTAILQ_REMOVE(&ns->full_zones, zone, entry);
+        default:
+            ;
+        }
+    }
+
+    nvme_set_zone_state(zone, state);
+
+    switch (state) {
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+        QTAILQ_INSERT_TAIL(&ns->exp_open_zones, zone, entry);
+        break;
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        QTAILQ_INSERT_TAIL(&ns->imp_open_zones, zone, entry);
+        break;
+    case NVME_ZONE_STATE_CLOSED:
+        QTAILQ_INSERT_TAIL(&ns->closed_zones, zone, entry);
+        break;
+    case NVME_ZONE_STATE_FULL:
+        QTAILQ_INSERT_TAIL(&ns->full_zones, zone, entry);
+    case NVME_ZONE_STATE_READ_ONLY:
+        break;
+    default:
+        zone->d.za = 0;
+    }
+}
+
+/*
+ * Check if we can open a zone without exceeding open/active limits.
+ * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
+ */
+static int nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
+{
+    if (ns->params.max_active_zones != 0 &&
+        ns->nr_active_zones + act > ns->params.max_active_zones) {
+        trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
+        return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
+    }
+    if (ns->params.max_open_zones != 0 &&
+        ns->nr_open_zones + opn > ns->params.max_open_zones) {
+        trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
+        return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
+{
+    hwaddr hi, lo;
+
+    if (!n->cmb.cmse) {
+        return false;
+    }
+
+    lo = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
+    hi = lo + int128_get64(n->cmb.mem.size);
+
+    return addr >= lo && addr < hi;
+}
+
+static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
+{
+    hwaddr base = n->params.legacy_cmb ? n->cmb.mem.addr : n->cmb.cba;
+    return &n->cmb.buf[addr - base];
+}
+
+static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
+{
+    hwaddr hi;
+
+    if (!n->pmr.cmse) {
+        return false;
+    }
+
+    hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
+
+    return addr >= n->pmr.cba && addr < hi;
+}
+
+static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
+{
+    return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
+}
+
+static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+    hwaddr hi = addr + size - 1;
+    if (hi < addr) {
+        return 1;
+    }
+
+    if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
+        memcpy(buf, nvme_addr_to_cmb(n, addr), size);
+        return 0;
+    }
+
+    if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
+        memcpy(buf, nvme_addr_to_pmr(n, addr), size);
+        return 0;
+    }
+
+    return pci_dma_read(&n->parent_obj, addr, buf, size);
+}
+
+static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+    hwaddr hi = addr + size - 1;
+    if (hi < addr) {
+        return 1;
+    }
+
+    if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr) && nvme_addr_is_cmb(n, hi)) {
+        memcpy(nvme_addr_to_cmb(n, addr), buf, size);
+        return 0;
+    }
+
+    if (nvme_addr_is_pmr(n, addr) && nvme_addr_is_pmr(n, hi)) {
+        memcpy(nvme_addr_to_pmr(n, addr), buf, size);
+        return 0;
+    }
+
+    return pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
+static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid)
+{
+    return nsid &&
+        (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES);
+}
+
+static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
+{
+    return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1;
+}
+
+static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
+{
+    return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1;
+}
+
+static void nvme_inc_cq_tail(NvmeCQueue *cq)
+{
+    cq->tail++;
+    if (cq->tail >= cq->size) {
+        cq->tail = 0;
+        cq->phase = !cq->phase;
+    }
+}
+
+static void nvme_inc_sq_head(NvmeSQueue *sq)
+{
+    sq->head = (sq->head + 1) % sq->size;
+}
+
+static uint8_t nvme_cq_full(NvmeCQueue *cq)
+{
+    return (cq->tail + 1) % cq->size == cq->head;
+}
+
+static uint8_t nvme_sq_empty(NvmeSQueue *sq)
+{
+    return sq->head == sq->tail;
+}
+
+static void nvme_irq_check(NvmeCtrl *n)
+{
+    uint32_t intms = ldl_le_p(&n->bar.intms);
+
+    if (msix_enabled(&(n->parent_obj))) {
+        return;
+    }
+    if (~intms & n->irq_status) {
+        pci_irq_assert(&n->parent_obj);
+    } else {
+        pci_irq_deassert(&n->parent_obj);
+    }
+}
+
+static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+    if (cq->irq_enabled) {
+        if (msix_enabled(&(n->parent_obj))) {
+            trace_pci_nvme_irq_msix(cq->vector);
+            msix_notify(&(n->parent_obj), cq->vector);
+        } else {
+            trace_pci_nvme_irq_pin();
+            assert(cq->vector < 32);
+            n->irq_status |= 1 << cq->vector;
+            nvme_irq_check(n);
+        }
+    } else {
+        trace_pci_nvme_irq_masked();
+    }
+}
+
+static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+    if (cq->irq_enabled) {
+        if (msix_enabled(&(n->parent_obj))) {
+            return;
+        } else {
+            assert(cq->vector < 32);
+            if (!n->cq_pending) {
+                n->irq_status &= ~(1 << cq->vector);
+            }
+            nvme_irq_check(n);
+        }
+    }
+}
+
+static void nvme_req_clear(NvmeRequest *req)
+{
+    req->ns = NULL;
+    req->opaque = NULL;
+    req->aiocb = NULL;
+    memset(&req->cqe, 0x0, sizeof(req->cqe));
+    req->status = NVME_SUCCESS;
+}
+
+static inline void nvme_sg_init(NvmeCtrl *n, NvmeSg *sg, bool dma)
+{
+    if (dma) {
+        pci_dma_sglist_init(&sg->qsg, &n->parent_obj, 0);
+        sg->flags = NVME_SG_DMA;
+    } else {
+        qemu_iovec_init(&sg->iov, 0);
+    }
+
+    sg->flags |= NVME_SG_ALLOC;
+}
+
+static inline void nvme_sg_unmap(NvmeSg *sg)
+{
+    if (!(sg->flags & NVME_SG_ALLOC)) {
+        return;
+    }
+
+    if (sg->flags & NVME_SG_DMA) {
+        qemu_sglist_destroy(&sg->qsg);
+    } else {
+        qemu_iovec_destroy(&sg->iov);
+    }
+
+    memset(sg, 0x0, sizeof(*sg));
+}
+
+/*
+ * When metadata is transfered as extended LBAs, the DPTR mapped into `sg`
+ * holds both data and metadata. This function splits the data and metadata
+ * into two separate QSG/IOVs.
+ */
+static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data,
+                          NvmeSg *mdata)
+{
+    NvmeSg *dst = data;
+    uint32_t trans_len, count = ns->lbasz;
+    uint64_t offset = 0;
+    bool dma = sg->flags & NVME_SG_DMA;
+    size_t sge_len;
+    size_t sg_len = dma ? sg->qsg.size : sg->iov.size;
+    int sg_idx = 0;
+
+    assert(sg->flags & NVME_SG_ALLOC);
+
+    while (sg_len) {
+        sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
+
+        trans_len = MIN(sg_len, count);
+        trans_len = MIN(trans_len, sge_len - offset);
+
+        if (dst) {
+            if (dma) {
+                qemu_sglist_add(&dst->qsg, sg->qsg.sg[sg_idx].base + offset,
+                                trans_len);
+            } else {
+                qemu_iovec_add(&dst->iov,
+                               sg->iov.iov[sg_idx].iov_base + offset,
+                               trans_len);
+            }
+        }
+
+        sg_len -= trans_len;
+        count -= trans_len;
+        offset += trans_len;
+
+        if (count == 0) {
+            dst = (dst == data) ? mdata : data;
+            count = (dst == data) ? ns->lbasz : ns->lbaf.ms;
+        }
+
+        if (sge_len == offset) {
+            offset = 0;
+            sg_idx++;
+        }
+    }
+}
+
+static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
+                                  size_t len)
+{
+    if (!len) {
+        return NVME_SUCCESS;
+    }
+
+    trace_pci_nvme_map_addr_cmb(addr, len);
+
+    if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
+        return NVME_DATA_TRAS_ERROR;
+    }
+
+    qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
+                                  size_t len)
+{
+    if (!len) {
+        return NVME_SUCCESS;
+    }
+
+    if (!nvme_addr_is_pmr(n, addr) || !nvme_addr_is_pmr(n, addr + len - 1)) {
+        return NVME_DATA_TRAS_ERROR;
+    }
+
+    qemu_iovec_add(iov, nvme_addr_to_pmr(n, addr), len);
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_addr(NvmeCtrl *n, NvmeSg *sg, hwaddr addr, size_t len)
+{
+    bool cmb = false, pmr = false;
+
+    if (!len) {
+        return NVME_SUCCESS;
+    }
+
+    trace_pci_nvme_map_addr(addr, len);
+
+    if (nvme_addr_is_cmb(n, addr)) {
+        cmb = true;
+    } else if (nvme_addr_is_pmr(n, addr)) {
+        pmr = true;
+    }
+
+    if (cmb || pmr) {
+        if (sg->flags & NVME_SG_DMA) {
+            return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+        }
+
+        if (sg->iov.niov + 1 > IOV_MAX) {
+            goto max_mappings_exceeded;
+        }
+
+        if (cmb) {
+            return nvme_map_addr_cmb(n, &sg->iov, addr, len);
+        } else {
+            return nvme_map_addr_pmr(n, &sg->iov, addr, len);
+        }
+    }
+
+    if (!(sg->flags & NVME_SG_DMA)) {
+        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+    }
+
+    if (sg->qsg.nsg + 1 > IOV_MAX) {
+        goto max_mappings_exceeded;
+    }
+
+    qemu_sglist_add(&sg->qsg, addr, len);
+
+    return NVME_SUCCESS;
+
+max_mappings_exceeded:
+    NVME_GUEST_ERR(pci_nvme_ub_too_many_mappings,
+                   "number of mappings exceed 1024");
+    return NVME_INTERNAL_DEV_ERROR | NVME_DNR;
+}
+
+static inline bool nvme_addr_is_dma(NvmeCtrl *n, hwaddr addr)
+{
+    return !(nvme_addr_is_cmb(n, addr) || nvme_addr_is_pmr(n, addr));
+}
+
+static uint16_t nvme_map_prp(NvmeCtrl *n, NvmeSg *sg, uint64_t prp1,
+                             uint64_t prp2, uint32_t len)
+{
+    hwaddr trans_len = n->page_size - (prp1 % n->page_size);
+    trans_len = MIN(len, trans_len);
+    int num_prps = (len >> n->page_bits) + 1;
+    uint16_t status;
+    int ret;
+
+    trace_pci_nvme_map_prp(trans_len, len, prp1, prp2, num_prps);
+
+    nvme_sg_init(n, sg, nvme_addr_is_dma(n, prp1));
+
+    status = nvme_map_addr(n, sg, prp1, trans_len);
+    if (status) {
+        goto unmap;
+    }
+
+    len -= trans_len;
+    if (len) {
+        if (len > n->page_size) {
+            uint64_t prp_list[n->max_prp_ents];
+            uint32_t nents, prp_trans;
+            int i = 0;
+
+            /*
+             * The first PRP list entry, pointed to by PRP2 may contain offset.
+             * Hence, we need to calculate the number of entries in based on
+             * that offset.
+             */
+            nents = (n->page_size - (prp2 & (n->page_size - 1))) >> 3;
+            prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
+            ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+            if (ret) {
+                trace_pci_nvme_err_addr_read(prp2);
+                status = NVME_DATA_TRAS_ERROR;
+                goto unmap;
+            }
+            while (len != 0) {
+                uint64_t prp_ent = le64_to_cpu(prp_list[i]);
+
+                if (i == nents - 1 && len > n->page_size) {
+                    if (unlikely(prp_ent & (n->page_size - 1))) {
+                        trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+                        status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+                        goto unmap;
+                    }
+
+                    i = 0;
+                    nents = (len + n->page_size - 1) >> n->page_bits;
+                    nents = MIN(nents, n->max_prp_ents);
+                    prp_trans = nents * sizeof(uint64_t);
+                    ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
+                                         prp_trans);
+                    if (ret) {
+                        trace_pci_nvme_err_addr_read(prp_ent);
+                        status = NVME_DATA_TRAS_ERROR;
+                        goto unmap;
+                    }
+                    prp_ent = le64_to_cpu(prp_list[i]);
+                }
+
+                if (unlikely(prp_ent & (n->page_size - 1))) {
+                    trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+                    status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+                    goto unmap;
+                }
+
+                trans_len = MIN(len, n->page_size);
+                status = nvme_map_addr(n, sg, prp_ent, trans_len);
+                if (status) {
+                    goto unmap;
+                }
+
+                len -= trans_len;
+                i++;
+            }
+        } else {
+            if (unlikely(prp2 & (n->page_size - 1))) {
+                trace_pci_nvme_err_invalid_prp2_align(prp2);
+                status = NVME_INVALID_PRP_OFFSET | NVME_DNR;
+                goto unmap;
+            }
+            status = nvme_map_addr(n, sg, prp2, len);
+            if (status) {
+                goto unmap;
+            }
+        }
+    }
+
+    return NVME_SUCCESS;
+
+unmap:
+    nvme_sg_unmap(sg);
+    return status;
+}
+
+/*
+ * Map 'nsgld' data descriptors from 'segment'. The function will subtract the
+ * number of bytes mapped in len.
+ */
+static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg,
+                                  NvmeSglDescriptor *segment, uint64_t nsgld,
+                                  size_t *len, NvmeCmd *cmd)
+{
+    dma_addr_t addr, trans_len;
+    uint32_t dlen;
+    uint16_t status;
+
+    for (int i = 0; i < nsgld; i++) {
+        uint8_t type = NVME_SGL_TYPE(segment[i].type);
+
+        switch (type) {
+        case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
+            if (cmd->opcode == NVME_CMD_WRITE) {
+                continue;
+            }
+        case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+            break;
+        case NVME_SGL_DESCR_TYPE_SEGMENT:
+        case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+            return NVME_INVALID_NUM_SGL_DESCRS | NVME_DNR;
+        default:
+            return NVME_SGL_DESCR_TYPE_INVALID | NVME_DNR;
+        }
+
+        dlen = le32_to_cpu(segment[i].len);
+
+        if (!dlen) {
+            continue;
+        }
+
+        if (*len == 0) {
+            /*
+             * All data has been mapped, but the SGL contains additional
+             * segments and/or descriptors. The controller might accept
+             * ignoring the rest of the SGL.
+             */
+            uint32_t sgls = le32_to_cpu(n->id_ctrl.sgls);
+            if (sgls & NVME_CTRL_SGLS_EXCESS_LENGTH) {
+                break;
+            }
+
+            trace_pci_nvme_err_invalid_sgl_excess_length(dlen);
+            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+        }
+
+        trans_len = MIN(*len, dlen);
+
+        if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) {
+            goto next;
+        }
+
+        addr = le64_to_cpu(segment[i].addr);
+
+        if (UINT64_MAX - addr < dlen) {
+            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+        }
+
+        status = nvme_map_addr(n, sg, addr, trans_len);
+        if (status) {
+            return status;
+        }
+
+next:
+        *len -= trans_len;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
+                             size_t len, NvmeCmd *cmd)
+{
+    /*
+     * Read the segment in chunks of 256 descriptors (one 4k page) to avoid
+     * dynamically allocating a potentially huge SGL. The spec allows the SGL
+     * to be larger (as in number of bytes required to describe the SGL
+     * descriptors and segment chain) than the command transfer size, so it is
+     * not bounded by MDTS.
+     */
+    const int SEG_CHUNK_SIZE = 256;
+
+    NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+    uint64_t nsgld;
+    uint32_t seg_len;
+    uint16_t status;
+    hwaddr addr;
+    int ret;
+
+    sgld = &sgl;
+    addr = le64_to_cpu(sgl.addr);
+
+    trace_pci_nvme_map_sgl(NVME_SGL_TYPE(sgl.type), len);
+
+    nvme_sg_init(n, sg, nvme_addr_is_dma(n, addr));
+
+    /*
+     * If the entire transfer can be described with a single data block it can
+     * be mapped directly.
+     */
+    if (NVME_SGL_TYPE(sgl.type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) {
+        status = nvme_map_sgl_data(n, sg, sgld, 1, &len, cmd);
+        if (status) {
+            goto unmap;
+        }
+
+        goto out;
+    }
+
+    for (;;) {
+        switch (NVME_SGL_TYPE(sgld->type)) {
+        case NVME_SGL_DESCR_TYPE_SEGMENT:
+        case NVME_SGL_DESCR_TYPE_LAST_SEGMENT:
+            break;
+        default:
+            return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+        }
+
+        seg_len = le32_to_cpu(sgld->len);
+
+        /* check the length of the (Last) Segment descriptor */
+        if ((!seg_len || seg_len & 0xf) &&
+            (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) {
+            return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+        }
+
+        if (UINT64_MAX - addr < seg_len) {
+            return NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+        }
+
+        nsgld = seg_len / sizeof(NvmeSglDescriptor);
+
+        while (nsgld > SEG_CHUNK_SIZE) {
+            if (nvme_addr_read(n, addr, segment, sizeof(segment))) {
+                trace_pci_nvme_err_addr_read(addr);
+                status = NVME_DATA_TRAS_ERROR;
+                goto unmap;
+            }
+
+            status = nvme_map_sgl_data(n, sg, segment, SEG_CHUNK_SIZE,
+                                       &len, cmd);
+            if (status) {
+                goto unmap;
+            }
+
+            nsgld -= SEG_CHUNK_SIZE;
+            addr += SEG_CHUNK_SIZE * sizeof(NvmeSglDescriptor);
+        }
+
+        ret = nvme_addr_read(n, addr, segment, nsgld *
+                             sizeof(NvmeSglDescriptor));
+        if (ret) {
+            trace_pci_nvme_err_addr_read(addr);
+            status = NVME_DATA_TRAS_ERROR;
+            goto unmap;
+        }
+
+        last_sgld = &segment[nsgld - 1];
+
+        /*
+         * If the segment ends with a Data Block or Bit Bucket Descriptor Type,
+         * then we are done.
+         */
+        switch (NVME_SGL_TYPE(last_sgld->type)) {
+        case NVME_SGL_DESCR_TYPE_DATA_BLOCK:
+        case NVME_SGL_DESCR_TYPE_BIT_BUCKET:
+            status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd);
+            if (status) {
+                goto unmap;
+            }
+
+            goto out;
+
+        default:
+            break;
+        }
+
+        /*
+         * If the last descriptor was not a Data Block or Bit Bucket, then the
+         * current segment must not be a Last Segment.
+         */
+        if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) {
+            status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR;
+            goto unmap;
+        }
+
+        sgld = last_sgld;
+        addr = le64_to_cpu(sgld->addr);
+
+        /*
+         * Do not map the last descriptor; it will be a Segment or Last Segment
+         * descriptor and is handled by the next iteration.
+         */
+        status = nvme_map_sgl_data(n, sg, segment, nsgld - 1, &len, cmd);
+        if (status) {
+            goto unmap;
+        }
+    }
+
+out:
+    /* if there is any residual left in len, the SGL was too short */
+    if (len) {
+        status = NVME_DATA_SGL_LEN_INVALID | NVME_DNR;
+        goto unmap;
+    }
+
+    return NVME_SUCCESS;
+
+unmap:
+    nvme_sg_unmap(sg);
+    return status;
+}
+
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+                       NvmeCmd *cmd)
+{
+    uint64_t prp1, prp2;
+
+    switch (NVME_CMD_FLAGS_PSDT(cmd->flags)) {
+    case NVME_PSDT_PRP:
+        prp1 = le64_to_cpu(cmd->dptr.prp1);
+        prp2 = le64_to_cpu(cmd->dptr.prp2);
+
+        return nvme_map_prp(n, sg, prp1, prp2, len);
+    case NVME_PSDT_SGL_MPTR_CONTIGUOUS:
+    case NVME_PSDT_SGL_MPTR_SGL:
+        return nvme_map_sgl(n, sg, cmd->dptr.sgl, len, cmd);
+    default:
+        return NVME_INVALID_FIELD;
+    }
+}
+
+static uint16_t nvme_map_mptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+                              NvmeCmd *cmd)
+{
+    int psdt = NVME_CMD_FLAGS_PSDT(cmd->flags);
+    hwaddr mptr = le64_to_cpu(cmd->mptr);
+    uint16_t status;
+
+    if (psdt == NVME_PSDT_SGL_MPTR_SGL) {
+        NvmeSglDescriptor sgl;
+
+        if (nvme_addr_read(n, mptr, &sgl, sizeof(sgl))) {
+            return NVME_DATA_TRAS_ERROR;
+        }
+
+        status = nvme_map_sgl(n, sg, sgl, len, cmd);
+        if (status && (status & 0x7ff) == NVME_DATA_SGL_LEN_INVALID) {
+            status = NVME_MD_SGL_LEN_INVALID | NVME_DNR;
+        }
+
+        return status;
+    }
+
+    nvme_sg_init(n, sg, nvme_addr_is_dma(n, mptr));
+    status = nvme_map_addr(n, sg, mptr, len);
+    if (status) {
+        nvme_sg_unmap(sg);
+    }
+
+    return status;
+}
+
+static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
+    bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
+    size_t len = nvme_l2b(ns, nlb);
+    uint16_t status;
+
+    if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+        NvmeSg sg;
+
+        len += nvme_m2b(ns, nlb);
+
+        status = nvme_map_dptr(n, &sg, len, &req->cmd);
+        if (status) {
+            return status;
+        }
+
+        nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
+        nvme_sg_split(&sg, ns, &req->sg, NULL);
+        nvme_sg_unmap(&sg);
+
+        return NVME_SUCCESS;
+    }
+
+    return nvme_map_dptr(n, &req->sg, len, &req->cmd);
+}
+
+static uint16_t nvme_map_mdata(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    size_t len = nvme_m2b(ns, nlb);
+    uint16_t status;
+
+    if (nvme_ns_ext(ns)) {
+        NvmeSg sg;
+
+        len += nvme_l2b(ns, nlb);
+
+        status = nvme_map_dptr(n, &sg, len, &req->cmd);
+        if (status) {
+            return status;
+        }
+
+        nvme_sg_init(n, &req->sg, sg.flags & NVME_SG_DMA);
+        nvme_sg_split(&sg, ns, NULL, &req->sg);
+        nvme_sg_unmap(&sg);
+
+        return NVME_SUCCESS;
+    }
+
+    return nvme_map_mptr(n, &req->sg, len, &req->cmd);
+}
+
+static uint16_t nvme_tx_interleaved(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr,
+                                    uint32_t len, uint32_t bytes,
+                                    int32_t skip_bytes, int64_t offset,
+                                    NvmeTxDirection dir)
+{
+    hwaddr addr;
+    uint32_t trans_len, count = bytes;
+    bool dma = sg->flags & NVME_SG_DMA;
+    int64_t sge_len;
+    int sg_idx = 0;
+    int ret;
+
+    assert(sg->flags & NVME_SG_ALLOC);
+
+    while (len) {
+        sge_len = dma ? sg->qsg.sg[sg_idx].len : sg->iov.iov[sg_idx].iov_len;
+
+        if (sge_len - offset < 0) {
+            offset -= sge_len;
+            sg_idx++;
+            continue;
+        }
+
+        if (sge_len == offset) {
+            offset = 0;
+            sg_idx++;
+            continue;
+        }
+
+        trans_len = MIN(len, count);
+        trans_len = MIN(trans_len, sge_len - offset);
+
+        if (dma) {
+            addr = sg->qsg.sg[sg_idx].base + offset;
+        } else {
+            addr = (hwaddr)(uintptr_t)sg->iov.iov[sg_idx].iov_base + offset;
+        }
+
+        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+            ret = nvme_addr_read(n, addr, ptr, trans_len);
+        } else {
+            ret = nvme_addr_write(n, addr, ptr, trans_len);
+        }
+
+        if (ret) {
+            return NVME_DATA_TRAS_ERROR;
+        }
+
+        ptr += trans_len;
+        len -= trans_len;
+        count -= trans_len;
+        offset += trans_len;
+
+        if (count == 0) {
+            count = bytes;
+            offset += skip_bytes;
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_tx(NvmeCtrl *n, NvmeSg *sg, uint8_t *ptr, uint32_t len,
+                        NvmeTxDirection dir)
+{
+    assert(sg->flags & NVME_SG_ALLOC);
+
+    if (sg->flags & NVME_SG_DMA) {
+        uint64_t residual;
+
+        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+            residual = dma_buf_write(ptr, len, &sg->qsg);
+        } else {
+            residual = dma_buf_read(ptr, len, &sg->qsg);
+        }
+
+        if (unlikely(residual)) {
+            trace_pci_nvme_err_invalid_dma();
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    } else {
+        size_t bytes;
+
+        if (dir == NVME_TX_DIRECTION_TO_DEVICE) {
+            bytes = qemu_iovec_to_buf(&sg->iov, 0, ptr, len);
+        } else {
+            bytes = qemu_iovec_from_buf(&sg->iov, 0, ptr, len);
+        }
+
+        if (unlikely(bytes != len)) {
+            trace_pci_nvme_err_invalid_dma();
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
+static inline uint16_t nvme_c2h(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                                NvmeRequest *req)
+{
+    uint16_t status;
+
+    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+    if (status) {
+        return status;
+    }
+
+    return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_FROM_DEVICE);
+}
+
+static inline uint16_t nvme_h2c(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                                NvmeRequest *req)
+{
+    uint16_t status;
+
+    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+    if (status) {
+        return status;
+    }
+
+    return nvme_tx(n, &req->sg, ptr, len, NVME_TX_DIRECTION_TO_DEVICE);
+}
+
+uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                          NvmeTxDirection dir, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    bool pi = !!NVME_ID_NS_DPS_TYPE(ns->id_ns.dps);
+    bool pract = !!(le16_to_cpu(rw->control) & NVME_RW_PRINFO_PRACT);
+
+    if (nvme_ns_ext(ns) && !(pi && pract && ns->lbaf.ms == 8)) {
+        return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz,
+                                   ns->lbaf.ms, 0, dir);
+    }
+
+    return nvme_tx(n, &req->sg, ptr, len, dir);
+}
+
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                           NvmeTxDirection dir, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    uint16_t status;
+
+    if (nvme_ns_ext(ns)) {
+        return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms,
+                                   ns->lbasz, ns->lbasz, dir);
+    }
+
+    nvme_sg_unmap(&req->sg);
+
+    status = nvme_map_mptr(n, &req->sg, len, &req->cmd);
+    if (status) {
+        return status;
+    }
+
+    return nvme_tx(n, &req->sg, ptr, len, dir);
+}
+
+static inline void nvme_blk_read(BlockBackend *blk, int64_t offset,
+                                 BlockCompletionFunc *cb, NvmeRequest *req)
+{
+    assert(req->sg.flags & NVME_SG_ALLOC);
+
+    if (req->sg.flags & NVME_SG_DMA) {
+        req->aiocb = dma_blk_read(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+                                  cb, req);
+    } else {
+        req->aiocb = blk_aio_preadv(blk, offset, &req->sg.iov, 0, cb, req);
+    }
+}
+
+static inline void nvme_blk_write(BlockBackend *blk, int64_t offset,
+                                  BlockCompletionFunc *cb, NvmeRequest *req)
+{
+    assert(req->sg.flags & NVME_SG_ALLOC);
+
+    if (req->sg.flags & NVME_SG_DMA) {
+        req->aiocb = dma_blk_write(blk, &req->sg.qsg, offset, BDRV_SECTOR_SIZE,
+                                   cb, req);
+    } else {
+        req->aiocb = blk_aio_pwritev(blk, offset, &req->sg.iov, 0, cb, req);
+    }
+}
+
+static void nvme_post_cqes(void *opaque)
+{
+    NvmeCQueue *cq = opaque;
+    NvmeCtrl *n = cq->ctrl;
+    NvmeRequest *req, *next;
+    bool pending = cq->head != cq->tail;
+    int ret;
+
+    QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
+        NvmeSQueue *sq;
+        hwaddr addr;
+
+        if (nvme_cq_full(cq)) {
+            break;
+        }
+
+        sq = req->sq;
+        req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
+        req->cqe.sq_id = cpu_to_le16(sq->sqid);
+        req->cqe.sq_head = cpu_to_le16(sq->head);
+        addr = cq->dma_addr + cq->tail * n->cqe_size;
+        ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+                            sizeof(req->cqe));
+        if (ret) {
+            trace_pci_nvme_err_addr_write(addr);
+            trace_pci_nvme_err_cfs();
+            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
+            break;
+        }
+        QTAILQ_REMOVE(&cq->req_list, req, entry);
+        nvme_inc_cq_tail(cq);
+        nvme_sg_unmap(&req->sg);
+        QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+    }
+    if (cq->tail != cq->head) {
+        if (cq->irq_enabled && !pending) {
+            n->cq_pending++;
+        }
+
+        nvme_irq_assert(n, cq);
+    }
+}
+
+static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
+{
+    assert(cq->cqid == req->sq->cqid);
+    trace_pci_nvme_enqueue_req_completion(nvme_cid(req), cq->cqid,
+                                          le32_to_cpu(req->cqe.result),
+                                          le32_to_cpu(req->cqe.dw1),
+                                          req->status);
+
+    if (req->status) {
+        trace_pci_nvme_err_req_status(nvme_cid(req), nvme_nsid(req->ns),
+                                      req->status, req->cmd.opcode);
+    }
+
+    QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
+    QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
+    timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
+static void nvme_process_aers(void *opaque)
+{
+    NvmeCtrl *n = opaque;
+    NvmeAsyncEvent *event, *next;
+
+    trace_pci_nvme_process_aers(n->aer_queued);
+
+    QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+        NvmeRequest *req;
+        NvmeAerResult *result;
+
+        /* can't post cqe if there is nothing to complete */
+        if (!n->outstanding_aers) {
+            trace_pci_nvme_no_outstanding_aers();
+            break;
+        }
+
+        /* ignore if masked (cqe posted, but event not cleared) */
+        if (n->aer_mask & (1 << event->result.event_type)) {
+            trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
+            continue;
+        }
+
+        QTAILQ_REMOVE(&n->aer_queue, event, entry);
+        n->aer_queued--;
+
+        n->aer_mask |= 1 << event->result.event_type;
+        n->outstanding_aers--;
+
+        req = n->aer_reqs[n->outstanding_aers];
+
+        result = (NvmeAerResult *) &req->cqe.result;
+        result->event_type = event->result.event_type;
+        result->event_info = event->result.event_info;
+        result->log_page = event->result.log_page;
+        g_free(event);
+
+        trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
+                                    result->log_page);
+
+        nvme_enqueue_req_completion(&n->admin_cq, req);
+    }
+}
+
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+                               uint8_t event_info, uint8_t log_page)
+{
+    NvmeAsyncEvent *event;
+
+    trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
+
+    if (n->aer_queued == n->params.aer_max_queued) {
+        trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
+        return;
+    }
+
+    event = g_new(NvmeAsyncEvent, 1);
+    event->result = (NvmeAerResult) {
+        .event_type = event_type,
+        .event_info = event_info,
+        .log_page   = log_page,
+    };
+
+    QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
+    n->aer_queued++;
+
+    nvme_process_aers(n);
+}
+
+static void nvme_smart_event(NvmeCtrl *n, uint8_t event)
+{
+    uint8_t aer_info;
+
+    /* Ref SPEC <Asynchronous Event Information 0x2013 SMART / Health Status> */
+    if (!(NVME_AEC_SMART(n->features.async_config) & event)) {
+        return;
+    }
+
+    switch (event) {
+    case NVME_SMART_SPARE:
+        aer_info = NVME_AER_INFO_SMART_SPARE_THRESH;
+        break;
+    case NVME_SMART_TEMPERATURE:
+        aer_info = NVME_AER_INFO_SMART_TEMP_THRESH;
+        break;
+    case NVME_SMART_RELIABILITY:
+    case NVME_SMART_MEDIA_READ_ONLY:
+    case NVME_SMART_FAILED_VOLATILE_MEDIA:
+    case NVME_SMART_PMR_UNRELIABLE:
+        aer_info = NVME_AER_INFO_SMART_RELIABILITY;
+        break;
+    default:
+        return;
+    }
+
+    nvme_enqueue_event(n, NVME_AER_TYPE_SMART, aer_info, NVME_LOG_SMART_INFO);
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+    n->aer_mask &= ~(1 << event_type);
+    if (!QTAILQ_EMPTY(&n->aer_queue)) {
+        nvme_process_aers(n);
+    }
+}
+
+static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
+{
+    uint8_t mdts = n->params.mdts;
+
+    if (mdts && len > n->page_size << mdts) {
+        trace_pci_nvme_err_mdts(len);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba,
+                                         uint32_t nlb)
+{
+    uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
+
+    if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
+        trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze);
+        return NVME_LBA_RANGE | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static int nvme_block_status_all(NvmeNamespace *ns, uint64_t slba,
+                                 uint32_t nlb, int flags)
+{
+    BlockDriverState *bs = blk_bs(ns->blkconf.blk);
+
+    int64_t pnum = 0, bytes = nvme_l2b(ns, nlb);
+    int64_t offset = nvme_l2b(ns, slba);
+    int ret;
+
+    /*
+     * `pnum` holds the number of bytes after offset that shares the same
+     * allocation status as the byte at offset. If `pnum` is different from
+     * `bytes`, we should check the allocation status of the next range and
+     * continue this until all bytes have been checked.
+     */
+    do {
+        bytes -= pnum;
+
+        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+
+
+        trace_pci_nvme_block_status(offset, bytes, pnum, ret,
+                                    !!(ret & BDRV_BLOCK_ZERO));
+
+        if (!(ret & flags)) {
+            return 1;
+        }
+
+        offset += pnum;
+    } while (pnum != bytes);
+
+    return 0;
+}
+
+static uint16_t nvme_check_dulbe(NvmeNamespace *ns, uint64_t slba,
+                                 uint32_t nlb)
+{
+    int ret;
+    Error *err = NULL;
+
+    ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_DATA);
+    if (ret) {
+        if (ret < 0) {
+            error_setg_errno(&err, -ret, "unable to get block status");
+            error_report_err(err);
+
+            return NVME_INTERNAL_DEV_ERROR;
+        }
+
+        return NVME_DULB;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static void nvme_aio_err(NvmeRequest *req, int ret)
+{
+    uint16_t status = NVME_SUCCESS;
+    Error *local_err = NULL;
+
+    switch (req->cmd.opcode) {
+    case NVME_CMD_READ:
+        status = NVME_UNRECOVERED_READ;
+        break;
+    case NVME_CMD_FLUSH:
+    case NVME_CMD_WRITE:
+    case NVME_CMD_WRITE_ZEROES:
+    case NVME_CMD_ZONE_APPEND:
+        status = NVME_WRITE_FAULT;
+        break;
+    default:
+        status = NVME_INTERNAL_DEV_ERROR;
+        break;
+    }
+
+    trace_pci_nvme_err_aio(nvme_cid(req), strerror(-ret), status);
+
+    error_setg_errno(&local_err, -ret, "aio failed");
+    error_report_err(local_err);
+
+    /*
+     * Set the command status code to the first encountered error but allow a
+     * subsequent Internal Device Error to trump it.
+     */
+    if (req->status && status != NVME_INTERNAL_DEV_ERROR) {
+        return;
+    }
+
+    req->status = status;
+}
+
+static inline uint32_t nvme_zone_idx(NvmeNamespace *ns, uint64_t slba)
+{
+    return ns->zone_size_log2 > 0 ? slba >> ns->zone_size_log2 :
+                                    slba / ns->zone_size;
+}
+
+static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
+{
+    uint32_t zone_idx = nvme_zone_idx(ns, slba);
+
+    if (zone_idx >= ns->num_zones) {
+        return NULL;
+    }
+
+    return &ns->zone_array[zone_idx];
+}
+
+static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
+{
+    uint64_t zslba = zone->d.zslba;
+
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_EMPTY:
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+    case NVME_ZONE_STATE_CLOSED:
+        return NVME_SUCCESS;
+    case NVME_ZONE_STATE_FULL:
+        trace_pci_nvme_err_zone_is_full(zslba);
+        return NVME_ZONE_FULL;
+    case NVME_ZONE_STATE_OFFLINE:
+        trace_pci_nvme_err_zone_is_offline(zslba);
+        return NVME_ZONE_OFFLINE;
+    case NVME_ZONE_STATE_READ_ONLY:
+        trace_pci_nvme_err_zone_is_read_only(zslba);
+        return NVME_ZONE_READ_ONLY;
+    default:
+        assert(false);
+    }
+
+    return NVME_INTERNAL_DEV_ERROR;
+}
+
+static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
+                                      uint64_t slba, uint32_t nlb)
+{
+    uint64_t zcap = nvme_zone_wr_boundary(zone);
+    uint16_t status;
+
+    status = nvme_check_zone_state_for_write(zone);
+    if (status) {
+        return status;
+    }
+
+    if (unlikely(slba != zone->w_ptr)) {
+        trace_pci_nvme_err_write_not_at_wp(slba, zone->d.zslba, zone->w_ptr);
+        return NVME_ZONE_INVALID_WRITE;
+    }
+
+    if (unlikely((slba + nlb) > zcap)) {
+        trace_pci_nvme_err_zone_boundary(slba, nlb, zcap);
+        return NVME_ZONE_BOUNDARY_ERROR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_check_zone_state_for_read(NvmeZone *zone)
+{
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_EMPTY:
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+    case NVME_ZONE_STATE_FULL:
+    case NVME_ZONE_STATE_CLOSED:
+    case NVME_ZONE_STATE_READ_ONLY:
+        return NVME_SUCCESS;
+    case NVME_ZONE_STATE_OFFLINE:
+        trace_pci_nvme_err_zone_is_offline(zone->d.zslba);
+        return NVME_ZONE_OFFLINE;
+    default:
+        assert(false);
+    }
+
+    return NVME_INTERNAL_DEV_ERROR;
+}
+
+static uint16_t nvme_check_zone_read(NvmeNamespace *ns, uint64_t slba,
+                                     uint32_t nlb)
+{
+    NvmeZone *zone;
+    uint64_t bndry, end;
+    uint16_t status;
+
+    zone = nvme_get_zone_by_slba(ns, slba);
+    assert(zone);
+
+    bndry = nvme_zone_rd_boundary(ns, zone);
+    end = slba + nlb;
+
+    status = nvme_check_zone_state_for_read(zone);
+    if (status) {
+        ;
+    } else if (unlikely(end > bndry)) {
+        if (!ns->params.cross_zone_read) {
+            status = NVME_ZONE_BOUNDARY_ERROR;
+        } else {
+            /*
+             * Read across zone boundary - check that all subsequent
+             * zones that are being read have an appropriate state.
+             */
+            do {
+                zone++;
+                status = nvme_check_zone_state_for_read(zone);
+                if (status) {
+                    break;
+                }
+            } while (end > nvme_zone_rd_boundary(ns, zone));
+        }
+    }
+
+    return status;
+}
+
+static uint16_t nvme_zrm_finish(NvmeNamespace *ns, NvmeZone *zone)
+{
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_FULL:
+        return NVME_SUCCESS;
+
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+        nvme_aor_dec_open(ns);
+        /* fallthrough */
+    case NVME_ZONE_STATE_CLOSED:
+        nvme_aor_dec_active(ns);
+        /* fallthrough */
+    case NVME_ZONE_STATE_EMPTY:
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_FULL);
+        return NVME_SUCCESS;
+
+    default:
+        return NVME_ZONE_INVAL_TRANSITION;
+    }
+}
+
+static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
+{
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        nvme_aor_dec_open(ns);
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+        /* fall through */
+    case NVME_ZONE_STATE_CLOSED:
+        return NVME_SUCCESS;
+
+    default:
+        return NVME_ZONE_INVAL_TRANSITION;
+    }
+}
+
+static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
+{
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        nvme_aor_dec_open(ns);
+        /* fallthrough */
+    case NVME_ZONE_STATE_CLOSED:
+        nvme_aor_dec_active(ns);
+        /* fallthrough */
+    case NVME_ZONE_STATE_FULL:
+        zone->w_ptr = zone->d.zslba;
+        zone->d.wp = zone->w_ptr;
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
+        /* fallthrough */
+    case NVME_ZONE_STATE_EMPTY:
+        return NVME_SUCCESS;
+
+    default:
+        return NVME_ZONE_INVAL_TRANSITION;
+    }
+}
+
+static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
+{
+    NvmeZone *zone;
+
+    if (ns->params.max_open_zones &&
+        ns->nr_open_zones == ns->params.max_open_zones) {
+        zone = QTAILQ_FIRST(&ns->imp_open_zones);
+        if (zone) {
+            /*
+             * Automatically close this implicitly open zone.
+             */
+            QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
+            nvme_zrm_close(ns, zone);
+        }
+    }
+}
+
+enum {
+    NVME_ZRM_AUTO = 1 << 0,
+};
+
+static uint16_t nvme_zrm_open_flags(NvmeCtrl *n, NvmeNamespace *ns,
+                                    NvmeZone *zone, int flags)
+{
+    int act = 0;
+    uint16_t status;
+
+    switch (nvme_get_zone_state(zone)) {
+    case NVME_ZONE_STATE_EMPTY:
+        act = 1;
+
+        /* fallthrough */
+
+    case NVME_ZONE_STATE_CLOSED:
+        if (n->params.auto_transition_zones) {
+            nvme_zrm_auto_transition_zone(ns);
+        }
+        status = nvme_aor_check(ns, act, 1);
+        if (status) {
+            return status;
+        }
+
+        if (act) {
+            nvme_aor_inc_active(ns);
+        }
+
+        nvme_aor_inc_open(ns);
+
+        if (flags & NVME_ZRM_AUTO) {
+            nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN);
+            return NVME_SUCCESS;
+        }
+
+        /* fallthrough */
+
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        if (flags & NVME_ZRM_AUTO) {
+            return NVME_SUCCESS;
+        }
+
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EXPLICITLY_OPEN);
+
+        /* fallthrough */
+
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+        return NVME_SUCCESS;
+
+    default:
+        return NVME_ZONE_INVAL_TRANSITION;
+    }
+}
+
+static inline uint16_t nvme_zrm_auto(NvmeCtrl *n, NvmeNamespace *ns,
+                                     NvmeZone *zone)
+{
+    return nvme_zrm_open_flags(n, ns, zone, NVME_ZRM_AUTO);
+}
+
+static inline uint16_t nvme_zrm_open(NvmeCtrl *n, NvmeNamespace *ns,
+                                     NvmeZone *zone)
+{
+    return nvme_zrm_open_flags(n, ns, zone, 0);
+}
+
+static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone,
+                                 uint32_t nlb)
+{
+    zone->d.wp += nlb;
+
+    if (zone->d.wp == nvme_zone_wr_boundary(zone)) {
+        nvme_zrm_finish(ns, zone);
+    }
+}
+
+static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeZone *zone;
+    uint64_t slba;
+    uint32_t nlb;
+
+    slba = le64_to_cpu(rw->slba);
+    nlb = le16_to_cpu(rw->nlb) + 1;
+    zone = nvme_get_zone_by_slba(ns, slba);
+    assert(zone);
+
+    nvme_advance_zone_wp(ns, zone, nlb);
+}
+
+static inline bool nvme_is_write(NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+
+    return rw->opcode == NVME_CMD_WRITE ||
+           rw->opcode == NVME_CMD_ZONE_APPEND ||
+           rw->opcode == NVME_CMD_WRITE_ZEROES;
+}
+
+static AioContext *nvme_get_aio_context(BlockAIOCB *acb)
+{
+    return qemu_get_aio_context();
+}
+
+static void nvme_misc_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+
+    trace_pci_nvme_misc_cb(nvme_cid(req));
+
+    if (ret) {
+        nvme_aio_err(req, ret);
+    }
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+void nvme_rw_complete_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockAcctCookie *acct = &req->acct;
+    BlockAcctStats *stats = blk_get_stats(blk);
+
+    trace_pci_nvme_rw_complete_cb(nvme_cid(req), blk_name(blk));
+
+    if (ret) {
+        block_acct_failed(stats, acct);
+        nvme_aio_err(req, ret);
+    } else {
+        block_acct_done(stats, acct);
+    }
+
+    if (ns->params.zoned && nvme_is_write(req)) {
+        nvme_finalize_zoned_write(ns, req);
+    }
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_rw_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+    NvmeNamespace *ns = req->ns;
+
+    BlockBackend *blk = ns->blkconf.blk;
+
+    trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
+
+    if (ret) {
+        goto out;
+    }
+
+    if (ns->lbaf.ms) {
+        NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+        uint64_t slba = le64_to_cpu(rw->slba);
+        uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+        uint64_t offset = nvme_moff(ns, slba);
+
+        if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) {
+            size_t mlen = nvme_m2b(ns, nlb);
+
+            req->aiocb = blk_aio_pwrite_zeroes(blk, offset, mlen,
+                                               BDRV_REQ_MAY_UNMAP,
+                                               nvme_rw_complete_cb, req);
+            return;
+        }
+
+        if (nvme_ns_ext(ns) || req->cmd.mptr) {
+            uint16_t status;
+
+            nvme_sg_unmap(&req->sg);
+            status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
+            if (status) {
+                ret = -EFAULT;
+                goto out;
+            }
+
+            if (req->cmd.opcode == NVME_CMD_READ) {
+                return nvme_blk_read(blk, offset, nvme_rw_complete_cb, req);
+            }
+
+            return nvme_blk_write(blk, offset, nvme_rw_complete_cb, req);
+        }
+    }
+
+out:
+    nvme_rw_complete_cb(req, ret);
+}
+
+static void nvme_verify_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockAcctCookie *acct = &req->acct;
+    BlockAcctStats *stats = blk_get_stats(blk);
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint16_t apptag = le16_to_cpu(rw->apptag);
+    uint16_t appmask = le16_to_cpu(rw->appmask);
+    uint32_t reftag = le32_to_cpu(rw->reftag);
+    uint16_t status;
+
+    trace_pci_nvme_verify_cb(nvme_cid(req), prinfo, apptag, appmask, reftag);
+
+    if (ret) {
+        block_acct_failed(stats, acct);
+        nvme_aio_err(req, ret);
+        goto out;
+    }
+
+    block_acct_done(stats, acct);
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce,
+                                       ctx->mdata.iov.size, slba);
+        if (status) {
+            req->status = status;
+            goto out;
+        }
+
+        req->status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+                                     ctx->mdata.bounce, ctx->mdata.iov.size,
+                                     prinfo, slba, apptag, appmask, &reftag);
+    }
+
+out:
+    qemu_iovec_destroy(&ctx->data.iov);
+    g_free(ctx->data.bounce);
+
+    qemu_iovec_destroy(&ctx->mdata.iov);
+    g_free(ctx->mdata.bounce);
+
+    g_free(ctx);
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+
+static void nvme_verify_mdata_in_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+    size_t mlen = nvme_m2b(ns, nlb);
+    uint64_t offset = nvme_moff(ns, slba);
+    BlockBackend *blk = ns->blkconf.blk;
+
+    trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk));
+
+    if (ret) {
+        goto out;
+    }
+
+    ctx->mdata.bounce = g_malloc(mlen);
+
+    qemu_iovec_reset(&ctx->mdata.iov);
+    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+                                nvme_verify_cb, ctx);
+    return;
+
+out:
+    nvme_verify_cb(ctx, ret);
+}
+
+struct nvme_compare_ctx {
+    struct {
+        QEMUIOVector iov;
+        uint8_t *bounce;
+    } data;
+
+    struct {
+        QEMUIOVector iov;
+        uint8_t *bounce;
+    } mdata;
+};
+
+static void nvme_compare_mdata_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+    NvmeNamespace *ns = req->ns;
+    NvmeCtrl *n = nvme_ctrl(req);
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint16_t apptag = le16_to_cpu(rw->apptag);
+    uint16_t appmask = le16_to_cpu(rw->appmask);
+    uint32_t reftag = le32_to_cpu(rw->reftag);
+    struct nvme_compare_ctx *ctx = req->opaque;
+    g_autofree uint8_t *buf = NULL;
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockAcctCookie *acct = &req->acct;
+    BlockAcctStats *stats = blk_get_stats(blk);
+    uint16_t status = NVME_SUCCESS;
+
+    trace_pci_nvme_compare_mdata_cb(nvme_cid(req));
+
+    if (ret) {
+        block_acct_failed(stats, acct);
+        nvme_aio_err(req, ret);
+        goto out;
+    }
+
+    buf = g_malloc(ctx->mdata.iov.size);
+
+    status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size,
+                               NVME_TX_DIRECTION_TO_DEVICE, req);
+    if (status) {
+        req->status = status;
+        goto out;
+    }
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        uint64_t slba = le64_to_cpu(rw->slba);
+        uint8_t *bufp;
+        uint8_t *mbufp = ctx->mdata.bounce;
+        uint8_t *end = mbufp + ctx->mdata.iov.size;
+        int16_t pil = 0;
+
+        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+                                ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+                                slba, apptag, appmask, &reftag);
+        if (status) {
+            req->status = status;
+            goto out;
+        }
+
+        /*
+         * When formatted with protection information, do not compare the DIF
+         * tuple.
+         */
+        if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+            pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+        }
+
+        for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) {
+            if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) {
+                req->status = NVME_CMP_FAILURE;
+                goto out;
+            }
+        }
+
+        goto out;
+    }
+
+    if (memcmp(buf, ctx->mdata.bounce, ctx->mdata.iov.size)) {
+        req->status = NVME_CMP_FAILURE;
+        goto out;
+    }
+
+    block_acct_done(stats, acct);
+
+out:
+    qemu_iovec_destroy(&ctx->data.iov);
+    g_free(ctx->data.bounce);
+
+    qemu_iovec_destroy(&ctx->mdata.iov);
+    g_free(ctx->mdata.bounce);
+
+    g_free(ctx);
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_compare_data_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+    NvmeCtrl *n = nvme_ctrl(req);
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockAcctCookie *acct = &req->acct;
+    BlockAcctStats *stats = blk_get_stats(blk);
+
+    struct nvme_compare_ctx *ctx = req->opaque;
+    g_autofree uint8_t *buf = NULL;
+    uint16_t status;
+
+    trace_pci_nvme_compare_data_cb(nvme_cid(req));
+
+    if (ret) {
+        block_acct_failed(stats, acct);
+        nvme_aio_err(req, ret);
+        goto out;
+    }
+
+    buf = g_malloc(ctx->data.iov.size);
+
+    status = nvme_bounce_data(n, buf, ctx->data.iov.size,
+                              NVME_TX_DIRECTION_TO_DEVICE, req);
+    if (status) {
+        req->status = status;
+        goto out;
+    }
+
+    if (memcmp(buf, ctx->data.bounce, ctx->data.iov.size)) {
+        req->status = NVME_CMP_FAILURE;
+        goto out;
+    }
+
+    if (ns->lbaf.ms) {
+        NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+        uint64_t slba = le64_to_cpu(rw->slba);
+        uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+        size_t mlen = nvme_m2b(ns, nlb);
+        uint64_t offset = nvme_moff(ns, slba);
+
+        ctx->mdata.bounce = g_malloc(mlen);
+
+        qemu_iovec_init(&ctx->mdata.iov, 1);
+        qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+        req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+                                    nvme_compare_mdata_cb, req);
+        return;
+    }
+
+    block_acct_done(stats, acct);
+
+out:
+    qemu_iovec_destroy(&ctx->data.iov);
+    g_free(ctx->data.bounce);
+    g_free(ctx);
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+typedef struct NvmeDSMAIOCB {
+    BlockAIOCB common;
+    BlockAIOCB *aiocb;
+    NvmeRequest *req;
+    QEMUBH *bh;
+    int ret;
+
+    NvmeDsmRange *range;
+    unsigned int nr;
+    unsigned int idx;
+} NvmeDSMAIOCB;
+
+static void nvme_dsm_cancel(BlockAIOCB *aiocb)
+{
+    NvmeDSMAIOCB *iocb = container_of(aiocb, NvmeDSMAIOCB, common);
+
+    /* break nvme_dsm_cb loop */
+    iocb->idx = iocb->nr;
+    iocb->ret = -ECANCELED;
+
+    if (iocb->aiocb) {
+        blk_aio_cancel_async(iocb->aiocb);
+        iocb->aiocb = NULL;
+    } else {
+        /*
+         * We only reach this if nvme_dsm_cancel() has already been called or
+         * the command ran to completion and nvme_dsm_bh is scheduled to run.
+         */
+        assert(iocb->idx == iocb->nr);
+    }
+}
+
+static const AIOCBInfo nvme_dsm_aiocb_info = {
+    .aiocb_size   = sizeof(NvmeDSMAIOCB),
+    .cancel_async = nvme_dsm_cancel,
+};
+
+static void nvme_dsm_bh(void *opaque)
+{
+    NvmeDSMAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_unref(iocb);
+}
+
+static void nvme_dsm_cb(void *opaque, int ret);
+
+static void nvme_dsm_md_cb(void *opaque, int ret)
+{
+    NvmeDSMAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeDsmRange *range;
+    uint64_t slba;
+    uint32_t nlb;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto done;
+    }
+
+    if (!ns->lbaf.ms) {
+        nvme_dsm_cb(iocb, 0);
+        return;
+    }
+
+    range = &iocb->range[iocb->idx - 1];
+    slba = le64_to_cpu(range->slba);
+    nlb = le32_to_cpu(range->nlb);
+
+    /*
+     * Check that all block were discarded (zeroed); otherwise we do not zero
+     * the metadata.
+     */
+
+    ret = nvme_block_status_all(ns, slba, nlb, BDRV_BLOCK_ZERO);
+    if (ret) {
+        if (ret < 0) {
+            iocb->ret = ret;
+            goto done;
+        }
+
+        nvme_dsm_cb(iocb, 0);
+    }
+
+    iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba),
+                                        nvme_m2b(ns, nlb), BDRV_REQ_MAY_UNMAP,
+                                        nvme_dsm_cb, iocb);
+    return;
+
+done:
+    iocb->aiocb = NULL;
+    qemu_bh_schedule(iocb->bh);
+}
+
+static void nvme_dsm_cb(void *opaque, int ret)
+{
+    NvmeDSMAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeCtrl *n = nvme_ctrl(req);
+    NvmeNamespace *ns = req->ns;
+    NvmeDsmRange *range;
+    uint64_t slba;
+    uint32_t nlb;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto done;
+    }
+
+next:
+    if (iocb->idx == iocb->nr) {
+        goto done;
+    }
+
+    range = &iocb->range[iocb->idx++];
+    slba = le64_to_cpu(range->slba);
+    nlb = le32_to_cpu(range->nlb);
+
+    trace_pci_nvme_dsm_deallocate(slba, nlb);
+
+    if (nlb > n->dmrsl) {
+        trace_pci_nvme_dsm_single_range_limit_exceeded(nlb, n->dmrsl);
+        goto next;
+    }
+
+    if (nvme_check_bounds(ns, slba, nlb)) {
+        trace_pci_nvme_err_invalid_lba_range(slba, nlb,
+                                             ns->id_ns.nsze);
+        goto next;
+    }
+
+    iocb->aiocb = blk_aio_pdiscard(ns->blkconf.blk, nvme_l2b(ns, slba),
+                                   nvme_l2b(ns, nlb),
+                                   nvme_dsm_md_cb, iocb);
+    return;
+
+done:
+    iocb->aiocb = NULL;
+    qemu_bh_schedule(iocb->bh);
+}
+
+static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    NvmeDsmCmd *dsm = (NvmeDsmCmd *) &req->cmd;
+    uint32_t attr = le32_to_cpu(dsm->attributes);
+    uint32_t nr = (le32_to_cpu(dsm->nr) & 0xff) + 1;
+    uint16_t status = NVME_SUCCESS;
+
+    trace_pci_nvme_dsm(nr, attr);
+
+    if (attr & NVME_DSMGMT_AD) {
+        NvmeDSMAIOCB *iocb = blk_aio_get(&nvme_dsm_aiocb_info, ns->blkconf.blk,
+                                         nvme_misc_cb, req);
+
+        iocb->req = req;
+        iocb->bh = qemu_bh_new(nvme_dsm_bh, iocb);
+        iocb->ret = 0;
+        iocb->range = g_new(NvmeDsmRange, nr);
+        iocb->nr = nr;
+        iocb->idx = 0;
+
+        status = nvme_h2c(n, (uint8_t *)iocb->range, sizeof(NvmeDsmRange) * nr,
+                          req);
+        if (status) {
+            return status;
+        }
+
+        req->aiocb = &iocb->common;
+        nvme_dsm_cb(iocb, 0);
+
+        return NVME_NO_COMPLETE;
+    }
+
+    return status;
+}
+
+static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+    size_t len = nvme_l2b(ns, nlb);
+    int64_t offset = nvme_l2b(ns, slba);
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint32_t reftag = le32_to_cpu(rw->reftag);
+    NvmeBounceContext *ctx = NULL;
+    uint16_t status;
+
+    trace_pci_nvme_verify(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        status = nvme_check_prinfo(ns, prinfo, slba, reftag);
+        if (status) {
+            return status;
+        }
+
+        if (prinfo & NVME_PRINFO_PRACT) {
+            return NVME_INVALID_PROT_INFO | NVME_DNR;
+        }
+    }
+
+    if (len > n->page_size << n->params.vsl) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    status = nvme_check_bounds(ns, slba, nlb);
+    if (status) {
+        return status;
+    }
+
+    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+        status = nvme_check_dulbe(ns, slba, nlb);
+        if (status) {
+            return status;
+        }
+    }
+
+    ctx = g_new0(NvmeBounceContext, 1);
+    ctx->req = req;
+
+    ctx->data.bounce = g_malloc(len);
+
+    qemu_iovec_init(&ctx->data.iov, 1);
+    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
+
+    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+                     BLOCK_ACCT_READ);
+
+    req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+                                nvme_verify_mdata_in_cb, ctx);
+    return NVME_NO_COMPLETE;
+}
+
+typedef struct NvmeCopyAIOCB {
+    BlockAIOCB common;
+    BlockAIOCB *aiocb;
+    NvmeRequest *req;
+    QEMUBH *bh;
+    int ret;
+
+    NvmeCopySourceRange *ranges;
+    int nr;
+    int idx;
+
+    uint8_t *bounce;
+    QEMUIOVector iov;
+    struct {
+        BlockAcctCookie read;
+        BlockAcctCookie write;
+    } acct;
+
+    uint32_t reftag;
+    uint64_t slba;
+
+    NvmeZone *zone;
+} NvmeCopyAIOCB;
+
+static void nvme_copy_cancel(BlockAIOCB *aiocb)
+{
+    NvmeCopyAIOCB *iocb = container_of(aiocb, NvmeCopyAIOCB, common);
+
+    iocb->ret = -ECANCELED;
+
+    if (iocb->aiocb) {
+        blk_aio_cancel_async(iocb->aiocb);
+        iocb->aiocb = NULL;
+    }
+}
+
+static const AIOCBInfo nvme_copy_aiocb_info = {
+    .aiocb_size   = sizeof(NvmeCopyAIOCB),
+    .cancel_async = nvme_copy_cancel,
+};
+
+static void nvme_copy_bh(void *opaque)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    BlockAcctStats *stats = blk_get_stats(ns->blkconf.blk);
+
+    if (iocb->idx != iocb->nr) {
+        req->cqe.result = cpu_to_le32(iocb->idx);
+    }
+
+    qemu_iovec_destroy(&iocb->iov);
+    g_free(iocb->bounce);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    if (iocb->ret < 0) {
+        block_acct_failed(stats, &iocb->acct.read);
+        block_acct_failed(stats, &iocb->acct.write);
+    } else {
+        block_acct_done(stats, &iocb->acct.read);
+        block_acct_done(stats, &iocb->acct.write);
+    }
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+    qemu_aio_unref(iocb);
+}
+
+static void nvme_copy_cb(void *opaque, int ret);
+
+static void nvme_copy_out_completed_cb(void *opaque, int ret)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCopySourceRange *range = &iocb->ranges[iocb->idx];
+    uint32_t nlb = le32_to_cpu(range->nlb) + 1;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto out;
+    } else if (iocb->ret < 0) {
+        goto out;
+    }
+
+    if (ns->params.zoned) {
+        nvme_advance_zone_wp(ns, iocb->zone, nlb);
+    }
+
+    iocb->idx++;
+    iocb->slba += nlb;
+out:
+    nvme_copy_cb(iocb, iocb->ret);
+}
+
+static void nvme_copy_out_cb(void *opaque, int ret)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCopySourceRange *range;
+    uint32_t nlb;
+    size_t mlen;
+    uint8_t *mbounce;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto out;
+    } else if (iocb->ret < 0) {
+        goto out;
+    }
+
+    if (!ns->lbaf.ms) {
+        nvme_copy_out_completed_cb(iocb, 0);
+        return;
+    }
+
+    range = &iocb->ranges[iocb->idx];
+    nlb = le32_to_cpu(range->nlb) + 1;
+
+    mlen = nvme_m2b(ns, nlb);
+    mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+
+    qemu_iovec_reset(&iocb->iov);
+    qemu_iovec_add(&iocb->iov, mbounce, mlen);
+
+    iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_moff(ns, iocb->slba),
+                                  &iocb->iov, 0, nvme_copy_out_completed_cb,
+                                  iocb);
+
+    return;
+
+out:
+    nvme_copy_cb(iocb, ret);
+}
+
+static void nvme_copy_in_completed_cb(void *opaque, int ret)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCopySourceRange *range;
+    uint32_t nlb;
+    size_t len;
+    uint16_t status;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto out;
+    } else if (iocb->ret < 0) {
+        goto out;
+    }
+
+    range = &iocb->ranges[iocb->idx];
+    nlb = le32_to_cpu(range->nlb) + 1;
+    len = nvme_l2b(ns, nlb);
+
+    trace_pci_nvme_copy_out(iocb->slba, nlb);
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+
+        uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
+        uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+
+        uint16_t apptag = le16_to_cpu(range->apptag);
+        uint16_t appmask = le16_to_cpu(range->appmask);
+        uint32_t reftag = le32_to_cpu(range->reftag);
+
+        uint64_t slba = le64_to_cpu(range->slba);
+        size_t mlen = nvme_m2b(ns, nlb);
+        uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb);
+
+        status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor,
+                                slba, apptag, appmask, &reftag);
+        if (status) {
+            goto invalid;
+        }
+
+        apptag = le16_to_cpu(copy->apptag);
+        appmask = le16_to_cpu(copy->appmask);
+
+        if (prinfow & NVME_PRINFO_PRACT) {
+            status = nvme_check_prinfo(ns, prinfow, iocb->slba, iocb->reftag);
+            if (status) {
+                goto invalid;
+            }
+
+            nvme_dif_pract_generate_dif(ns, iocb->bounce, len, mbounce, mlen,
+                                        apptag, &iocb->reftag);
+        } else {
+            status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen,
+                                    prinfow, iocb->slba, apptag, appmask,
+                                    &iocb->reftag);
+            if (status) {
+                goto invalid;
+            }
+        }
+    }
+
+    status = nvme_check_bounds(ns, iocb->slba, nlb);
+    if (status) {
+        goto invalid;
+    }
+
+    if (ns->params.zoned) {
+        status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
+        if (status) {
+            goto invalid;
+        }
+
+        iocb->zone->w_ptr += nlb;
+    }
+
+    qemu_iovec_reset(&iocb->iov);
+    qemu_iovec_add(&iocb->iov, iocb->bounce, len);
+
+    iocb->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, iocb->slba),
+                                  &iocb->iov, 0, nvme_copy_out_cb, iocb);
+
+    return;
+
+invalid:
+    req->status = status;
+    iocb->aiocb = NULL;
+    if (iocb->bh) {
+        qemu_bh_schedule(iocb->bh);
+    }
+
+    return;
+
+out:
+    nvme_copy_cb(iocb, ret);
+}
+
+static void nvme_copy_in_cb(void *opaque, int ret)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCopySourceRange *range;
+    uint64_t slba;
+    uint32_t nlb;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto out;
+    } else if (iocb->ret < 0) {
+        goto out;
+    }
+
+    if (!ns->lbaf.ms) {
+        nvme_copy_in_completed_cb(iocb, 0);
+        return;
+    }
+
+    range = &iocb->ranges[iocb->idx];
+    slba = le64_to_cpu(range->slba);
+    nlb = le32_to_cpu(range->nlb) + 1;
+
+    qemu_iovec_reset(&iocb->iov);
+    qemu_iovec_add(&iocb->iov, iocb->bounce + nvme_l2b(ns, nlb),
+                   nvme_m2b(ns, nlb));
+
+    iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_moff(ns, slba),
+                                 &iocb->iov, 0, nvme_copy_in_completed_cb,
+                                 iocb);
+    return;
+
+out:
+    nvme_copy_cb(iocb, iocb->ret);
+}
+
+static void nvme_copy_cb(void *opaque, int ret)
+{
+    NvmeCopyAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCopySourceRange *range;
+    uint64_t slba;
+    uint32_t nlb;
+    size_t len;
+    uint16_t status;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto done;
+    } else if (iocb->ret < 0) {
+        goto done;
+    }
+
+    if (iocb->idx == iocb->nr) {
+        goto done;
+    }
+
+    range = &iocb->ranges[iocb->idx];
+    slba = le64_to_cpu(range->slba);
+    nlb = le32_to_cpu(range->nlb) + 1;
+    len = nvme_l2b(ns, nlb);
+
+    trace_pci_nvme_copy_source_range(slba, nlb);
+
+    if (nlb > le16_to_cpu(ns->id_ns.mssrl)) {
+        status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+        goto invalid;
+    }
+
+    status = nvme_check_bounds(ns, slba, nlb);
+    if (status) {
+        goto invalid;
+    }
+
+    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+        status = nvme_check_dulbe(ns, slba, nlb);
+        if (status) {
+            goto invalid;
+        }
+    }
+
+    if (ns->params.zoned) {
+        status = nvme_check_zone_read(ns, slba, nlb);
+        if (status) {
+            goto invalid;
+        }
+    }
+
+    qemu_iovec_reset(&iocb->iov);
+    qemu_iovec_add(&iocb->iov, iocb->bounce, len);
+
+    iocb->aiocb = blk_aio_preadv(ns->blkconf.blk, nvme_l2b(ns, slba),
+                                 &iocb->iov, 0, nvme_copy_in_cb, iocb);
+    return;
+
+invalid:
+    req->status = status;
+done:
+    iocb->aiocb = NULL;
+    if (iocb->bh) {
+        qemu_bh_schedule(iocb->bh);
+    }
+}
+
+
+static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns = req->ns;
+    NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+    NvmeCopyAIOCB *iocb = blk_aio_get(&nvme_copy_aiocb_info, ns->blkconf.blk,
+                                      nvme_misc_cb, req);
+    uint16_t nr = copy->nr + 1;
+    uint8_t format = copy->control[0] & 0xf;
+    uint16_t prinfor = ((copy->control[0] >> 4) & 0xf);
+    uint16_t prinfow = ((copy->control[2] >> 2) & 0xf);
+
+    uint16_t status;
+
+    trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+
+    iocb->ranges = NULL;
+    iocb->zone = NULL;
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) &&
+        ((prinfor & NVME_PRINFO_PRACT) != (prinfow & NVME_PRINFO_PRACT))) {
+        status = NVME_INVALID_FIELD | NVME_DNR;
+        goto invalid;
+    }
+
+    if (!(n->id_ctrl.ocfs & (1 << format))) {
+        trace_pci_nvme_err_copy_invalid_format(format);
+        status = NVME_INVALID_FIELD | NVME_DNR;
+        goto invalid;
+    }
+
+    if (nr > ns->id_ns.msrc + 1) {
+        status = NVME_CMD_SIZE_LIMIT | NVME_DNR;
+        goto invalid;
+    }
+
+    iocb->ranges = g_new(NvmeCopySourceRange, nr);
+
+    status = nvme_h2c(n, (uint8_t *)iocb->ranges,
+                      sizeof(NvmeCopySourceRange) * nr, req);
+    if (status) {
+        goto invalid;
+    }
+
+    iocb->slba = le64_to_cpu(copy->sdlba);
+
+    if (ns->params.zoned) {
+        iocb->zone = nvme_get_zone_by_slba(ns, iocb->slba);
+        if (!iocb->zone) {
+            status = NVME_LBA_RANGE | NVME_DNR;
+            goto invalid;
+        }
+
+        status = nvme_zrm_auto(n, ns, iocb->zone);
+        if (status) {
+            goto invalid;
+        }
+    }
+
+    iocb->req = req;
+    iocb->bh = qemu_bh_new(nvme_copy_bh, iocb);
+    iocb->ret = 0;
+    iocb->nr = nr;
+    iocb->idx = 0;
+    iocb->reftag = le32_to_cpu(copy->reftag);
+    iocb->bounce = g_malloc_n(le16_to_cpu(ns->id_ns.mssrl),
+                              ns->lbasz + ns->lbaf.ms);
+
+    qemu_iovec_init(&iocb->iov, 1);
+
+    block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.read, 0,
+                     BLOCK_ACCT_READ);
+    block_acct_start(blk_get_stats(ns->blkconf.blk), &iocb->acct.write, 0,
+                     BLOCK_ACCT_WRITE);
+
+    req->aiocb = &iocb->common;
+    nvme_copy_cb(iocb, 0);
+
+    return NVME_NO_COMPLETE;
+
+invalid:
+    g_free(iocb->ranges);
+    qemu_aio_unref(iocb);
+    return status;
+}
+
+static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    size_t data_len = nvme_l2b(ns, nlb);
+    size_t len = data_len;
+    int64_t offset = nvme_l2b(ns, slba);
+    struct nvme_compare_ctx *ctx = NULL;
+    uint16_t status;
+
+    trace_pci_nvme_compare(nvme_cid(req), nvme_nsid(ns), slba, nlb);
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && (prinfo & NVME_PRINFO_PRACT)) {
+        return NVME_INVALID_PROT_INFO | NVME_DNR;
+    }
+
+    if (nvme_ns_ext(ns)) {
+        len += nvme_m2b(ns, nlb);
+    }
+
+    status = nvme_check_mdts(n, len);
+    if (status) {
+        return status;
+    }
+
+    status = nvme_check_bounds(ns, slba, nlb);
+    if (status) {
+        return status;
+    }
+
+    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+        status = nvme_check_dulbe(ns, slba, nlb);
+        if (status) {
+            return status;
+        }
+    }
+
+    status = nvme_map_dptr(n, &req->sg, len, &req->cmd);
+    if (status) {
+        return status;
+    }
+
+    ctx = g_new(struct nvme_compare_ctx, 1);
+    ctx->data.bounce = g_malloc(data_len);
+
+    req->opaque = ctx;
+
+    qemu_iovec_init(&ctx->data.iov, 1);
+    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, data_len);
+
+    block_acct_start(blk_get_stats(blk), &req->acct, data_len,
+                     BLOCK_ACCT_READ);
+    req->aiocb = blk_aio_preadv(blk, offset, &ctx->data.iov, 0,
+                                nvme_compare_data_cb, req);
+
+    return NVME_NO_COMPLETE;
+}
+
+typedef struct NvmeFlushAIOCB {
+    BlockAIOCB common;
+    BlockAIOCB *aiocb;
+    NvmeRequest *req;
+    QEMUBH *bh;
+    int ret;
+
+    NvmeNamespace *ns;
+    uint32_t nsid;
+    bool broadcast;
+} NvmeFlushAIOCB;
+
+static void nvme_flush_cancel(BlockAIOCB *acb)
+{
+    NvmeFlushAIOCB *iocb = container_of(acb, NvmeFlushAIOCB, common);
+
+    iocb->ret = -ECANCELED;
+
+    if (iocb->aiocb) {
+        blk_aio_cancel_async(iocb->aiocb);
+    }
+}
+
+static const AIOCBInfo nvme_flush_aiocb_info = {
+    .aiocb_size = sizeof(NvmeFlushAIOCB),
+    .cancel_async = nvme_flush_cancel,
+    .get_aio_context = nvme_get_aio_context,
+};
+
+static void nvme_flush_ns_cb(void *opaque, int ret)
+{
+    NvmeFlushAIOCB *iocb = opaque;
+    NvmeNamespace *ns = iocb->ns;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto out;
+    } else if (iocb->ret < 0) {
+        goto out;
+    }
+
+    if (ns) {
+        trace_pci_nvme_flush_ns(iocb->nsid);
+
+        iocb->ns = NULL;
+        iocb->aiocb = blk_aio_flush(ns->blkconf.blk, nvme_flush_ns_cb, iocb);
+        return;
+    }
+
+out:
+    iocb->aiocb = NULL;
+    qemu_bh_schedule(iocb->bh);
+}
+
+static void nvme_flush_bh(void *opaque)
+{
+    NvmeFlushAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeCtrl *n = nvme_ctrl(req);
+    int i;
+
+    if (iocb->ret < 0) {
+        goto done;
+    }
+
+    if (iocb->broadcast) {
+        for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
+            iocb->ns = nvme_ns(n, i);
+            if (iocb->ns) {
+                iocb->nsid = i;
+                break;
+            }
+        }
+    }
+
+    if (!iocb->ns) {
+        goto done;
+    }
+
+    nvme_flush_ns_cb(iocb, 0);
+    return;
+
+done:
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_aio_unref(iocb);
+
+    return;
+}
+
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeFlushAIOCB *iocb;
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    uint16_t status;
+
+    iocb = qemu_aio_get(&nvme_flush_aiocb_info, NULL, nvme_misc_cb, req);
+
+    iocb->req = req;
+    iocb->bh = qemu_bh_new(nvme_flush_bh, iocb);
+    iocb->ret = 0;
+    iocb->ns = NULL;
+    iocb->nsid = 0;
+    iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
+
+    if (!iocb->broadcast) {
+        if (!nvme_nsid_valid(n, nsid)) {
+            status = NVME_INVALID_NSID | NVME_DNR;
+            goto out;
+        }
+
+        iocb->ns = nvme_ns(n, nsid);
+        if (!iocb->ns) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+            goto out;
+        }
+
+        iocb->nsid = nsid;
+    }
+
+    req->aiocb = &iocb->common;
+    qemu_bh_schedule(iocb->bh);
+
+    return NVME_NO_COMPLETE;
+
+out:
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_unref(iocb);
+
+    return status;
+}
+
+static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint64_t data_size = nvme_l2b(ns, nlb);
+    uint64_t mapped_size = data_size;
+    uint64_t data_offset;
+    BlockBackend *blk = ns->blkconf.blk;
+    uint16_t status;
+
+    if (nvme_ns_ext(ns)) {
+        mapped_size += nvme_m2b(ns, nlb);
+
+        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+            bool pract = prinfo & NVME_PRINFO_PRACT;
+
+            if (pract && ns->lbaf.ms == 8) {
+                mapped_size = data_size;
+            }
+        }
+    }
+
+    trace_pci_nvme_read(nvme_cid(req), nvme_nsid(ns), nlb, mapped_size, slba);
+
+    status = nvme_check_mdts(n, mapped_size);
+    if (status) {
+        goto invalid;
+    }
+
+    status = nvme_check_bounds(ns, slba, nlb);
+    if (status) {
+        goto invalid;
+    }
+
+    if (ns->params.zoned) {
+        status = nvme_check_zone_read(ns, slba, nlb);
+        if (status) {
+            trace_pci_nvme_err_zone_read_not_ok(slba, nlb, status);
+            goto invalid;
+        }
+    }
+
+    if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+        status = nvme_check_dulbe(ns, slba, nlb);
+        if (status) {
+            goto invalid;
+        }
+    }
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        return nvme_dif_rw(n, req);
+    }
+
+    status = nvme_map_data(n, nlb, req);
+    if (status) {
+        goto invalid;
+    }
+
+    data_offset = nvme_l2b(ns, slba);
+
+    block_acct_start(blk_get_stats(blk), &req->acct, data_size,
+                     BLOCK_ACCT_READ);
+    nvme_blk_read(blk, data_offset, nvme_rw_cb, req);
+    return NVME_NO_COMPLETE;
+
+invalid:
+    block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_READ);
+    return status | NVME_DNR;
+}
+
+static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append,
+                              bool wrz)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+    uint16_t ctrl = le16_to_cpu(rw->control);
+    uint8_t prinfo = NVME_RW_PRINFO(ctrl);
+    uint64_t data_size = nvme_l2b(ns, nlb);
+    uint64_t mapped_size = data_size;
+    uint64_t data_offset;
+    NvmeZone *zone;
+    NvmeZonedResult *res = (NvmeZonedResult *)&req->cqe;
+    BlockBackend *blk = ns->blkconf.blk;
+    uint16_t status;
+
+    if (nvme_ns_ext(ns)) {
+        mapped_size += nvme_m2b(ns, nlb);
+
+        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+            bool pract = prinfo & NVME_PRINFO_PRACT;
+
+            if (pract && ns->lbaf.ms == 8) {
+                mapped_size -= nvme_m2b(ns, nlb);
+            }
+        }
+    }
+
+    trace_pci_nvme_write(nvme_cid(req), nvme_io_opc_str(rw->opcode),
+                         nvme_nsid(ns), nlb, mapped_size, slba);
+
+    if (!wrz) {
+        status = nvme_check_mdts(n, mapped_size);
+        if (status) {
+            goto invalid;
+        }
+    }
+
+    status = nvme_check_bounds(ns, slba, nlb);
+    if (status) {
+        goto invalid;
+    }
+
+    if (ns->params.zoned) {
+        zone = nvme_get_zone_by_slba(ns, slba);
+        assert(zone);
+
+        if (append) {
+            bool piremap = !!(ctrl & NVME_RW_PIREMAP);
+
+            if (unlikely(slba != zone->d.zslba)) {
+                trace_pci_nvme_err_append_not_at_start(slba, zone->d.zslba);
+                status = NVME_INVALID_FIELD;
+                goto invalid;
+            }
+
+            if (n->params.zasl &&
+                data_size > (uint64_t)n->page_size << n->params.zasl) {
+                trace_pci_nvme_err_zasl(data_size);
+                return NVME_INVALID_FIELD | NVME_DNR;
+            }
+
+            slba = zone->w_ptr;
+            rw->slba = cpu_to_le64(slba);
+            res->slba = cpu_to_le64(slba);
+
+            switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+            case NVME_ID_NS_DPS_TYPE_1:
+                if (!piremap) {
+                    return NVME_INVALID_PROT_INFO | NVME_DNR;
+                }
+
+                /* fallthrough */
+
+            case NVME_ID_NS_DPS_TYPE_2:
+                if (piremap) {
+                    uint32_t reftag = le32_to_cpu(rw->reftag);
+                    rw->reftag = cpu_to_le32(reftag + (slba - zone->d.zslba));
+                }
+
+                break;
+
+            case NVME_ID_NS_DPS_TYPE_3:
+                if (piremap) {
+                    return NVME_INVALID_PROT_INFO | NVME_DNR;
+                }
+
+                break;
+            }
+        }
+
+        status = nvme_check_zone_write(ns, zone, slba, nlb);
+        if (status) {
+            goto invalid;
+        }
+
+        status = nvme_zrm_auto(n, ns, zone);
+        if (status) {
+            goto invalid;
+        }
+
+        zone->w_ptr += nlb;
+    }
+
+    data_offset = nvme_l2b(ns, slba);
+
+    if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+        return nvme_dif_rw(n, req);
+    }
+
+    if (!wrz) {
+        status = nvme_map_data(n, nlb, req);
+        if (status) {
+            goto invalid;
+        }
+
+        block_acct_start(blk_get_stats(blk), &req->acct, data_size,
+                         BLOCK_ACCT_WRITE);
+        nvme_blk_write(blk, data_offset, nvme_rw_cb, req);
+    } else {
+        req->aiocb = blk_aio_pwrite_zeroes(blk, data_offset, data_size,
+                                           BDRV_REQ_MAY_UNMAP, nvme_rw_cb,
+                                           req);
+    }
+
+    return NVME_NO_COMPLETE;
+
+invalid:
+    block_acct_invalid(blk_get_stats(blk), BLOCK_ACCT_WRITE);
+    return status | NVME_DNR;
+}
+
+static inline uint16_t nvme_write(NvmeCtrl *n, NvmeRequest *req)
+{
+    return nvme_do_write(n, req, false, false);
+}
+
+static inline uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
+{
+    return nvme_do_write(n, req, false, true);
+}
+
+static inline uint16_t nvme_zone_append(NvmeCtrl *n, NvmeRequest *req)
+{
+    return nvme_do_write(n, req, true, false);
+}
+
+static uint16_t nvme_get_mgmt_zone_slba_idx(NvmeNamespace *ns, NvmeCmd *c,
+                                            uint64_t *slba, uint32_t *zone_idx)
+{
+    uint32_t dw10 = le32_to_cpu(c->cdw10);
+    uint32_t dw11 = le32_to_cpu(c->cdw11);
+
+    if (!ns->params.zoned) {
+        trace_pci_nvme_err_invalid_opc(c->opcode);
+        return NVME_INVALID_OPCODE | NVME_DNR;
+    }
+
+    *slba = ((uint64_t)dw11) << 32 | dw10;
+    if (unlikely(*slba >= ns->id_ns.nsze)) {
+        trace_pci_nvme_err_invalid_lba_range(*slba, 0, ns->id_ns.nsze);
+        *slba = 0;
+        return NVME_LBA_RANGE | NVME_DNR;
+    }
+
+    *zone_idx = nvme_zone_idx(ns, *slba);
+    assert(*zone_idx < ns->num_zones);
+
+    return NVME_SUCCESS;
+}
+
+typedef uint16_t (*op_handler_t)(NvmeNamespace *, NvmeZone *, NvmeZoneState,
+                                 NvmeRequest *);
+
+enum NvmeZoneProcessingMask {
+    NVME_PROC_CURRENT_ZONE    = 0,
+    NVME_PROC_OPENED_ZONES    = 1 << 0,
+    NVME_PROC_CLOSED_ZONES    = 1 << 1,
+    NVME_PROC_READ_ONLY_ZONES = 1 << 2,
+    NVME_PROC_FULL_ZONES      = 1 << 3,
+};
+
+static uint16_t nvme_open_zone(NvmeNamespace *ns, NvmeZone *zone,
+                               NvmeZoneState state, NvmeRequest *req)
+{
+    return nvme_zrm_open(nvme_ctrl(req), ns, zone);
+}
+
+static uint16_t nvme_close_zone(NvmeNamespace *ns, NvmeZone *zone,
+                                NvmeZoneState state, NvmeRequest *req)
+{
+    return nvme_zrm_close(ns, zone);
+}
+
+static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
+                                 NvmeZoneState state, NvmeRequest *req)
+{
+    return nvme_zrm_finish(ns, zone);
+}
+
+static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
+                                  NvmeZoneState state, NvmeRequest *req)
+{
+    switch (state) {
+    case NVME_ZONE_STATE_READ_ONLY:
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_OFFLINE);
+        /* fall through */
+    case NVME_ZONE_STATE_OFFLINE:
+        return NVME_SUCCESS;
+    default:
+        return NVME_ZONE_INVAL_TRANSITION;
+    }
+}
+
+static uint16_t nvme_set_zd_ext(NvmeNamespace *ns, NvmeZone *zone)
+{
+    uint16_t status;
+    uint8_t state = nvme_get_zone_state(zone);
+
+    if (state == NVME_ZONE_STATE_EMPTY) {
+        status = nvme_aor_check(ns, 1, 0);
+        if (status) {
+            return status;
+        }
+        nvme_aor_inc_active(ns);
+        zone->d.za |= NVME_ZA_ZD_EXT_VALID;
+        nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_CLOSED);
+        return NVME_SUCCESS;
+    }
+
+    return NVME_ZONE_INVAL_TRANSITION;
+}
+
+static uint16_t nvme_bulk_proc_zone(NvmeNamespace *ns, NvmeZone *zone,
+                                    enum NvmeZoneProcessingMask proc_mask,
+                                    op_handler_t op_hndlr, NvmeRequest *req)
+{
+    uint16_t status = NVME_SUCCESS;
+    NvmeZoneState zs = nvme_get_zone_state(zone);
+    bool proc_zone;
+
+    switch (zs) {
+    case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+    case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+        proc_zone = proc_mask & NVME_PROC_OPENED_ZONES;
+        break;
+    case NVME_ZONE_STATE_CLOSED:
+        proc_zone = proc_mask & NVME_PROC_CLOSED_ZONES;
+        break;
+    case NVME_ZONE_STATE_READ_ONLY:
+        proc_zone = proc_mask & NVME_PROC_READ_ONLY_ZONES;
+        break;
+    case NVME_ZONE_STATE_FULL:
+        proc_zone = proc_mask & NVME_PROC_FULL_ZONES;
+        break;
+    default:
+        proc_zone = false;
+    }
+
+    if (proc_zone) {
+        status = op_hndlr(ns, zone, zs, req);
+    }
+
+    return status;
+}
+
+static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
+                                enum NvmeZoneProcessingMask proc_mask,
+                                op_handler_t op_hndlr, NvmeRequest *req)
+{
+    NvmeZone *next;
+    uint16_t status = NVME_SUCCESS;
+    int i;
+
+    if (!proc_mask) {
+        status = op_hndlr(ns, zone, nvme_get_zone_state(zone), req);
+    } else {
+        if (proc_mask & NVME_PROC_CLOSED_ZONES) {
+            QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
+                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+                                             req);
+                if (status && status != NVME_NO_COMPLETE) {
+                    goto out;
+                }
+            }
+        }
+        if (proc_mask & NVME_PROC_OPENED_ZONES) {
+            QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
+                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+                                             req);
+                if (status && status != NVME_NO_COMPLETE) {
+                    goto out;
+                }
+            }
+
+            QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
+                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+                                             req);
+                if (status && status != NVME_NO_COMPLETE) {
+                    goto out;
+                }
+            }
+        }
+        if (proc_mask & NVME_PROC_FULL_ZONES) {
+            QTAILQ_FOREACH_SAFE(zone, &ns->full_zones, entry, next) {
+                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+                                             req);
+                if (status && status != NVME_NO_COMPLETE) {
+                    goto out;
+                }
+            }
+        }
+
+        if (proc_mask & NVME_PROC_READ_ONLY_ZONES) {
+            for (i = 0; i < ns->num_zones; i++, zone++) {
+                status = nvme_bulk_proc_zone(ns, zone, proc_mask, op_hndlr,
+                                             req);
+                if (status && status != NVME_NO_COMPLETE) {
+                    goto out;
+                }
+            }
+        }
+    }
+
+out:
+    return status;
+}
+
+typedef struct NvmeZoneResetAIOCB {
+    BlockAIOCB common;
+    BlockAIOCB *aiocb;
+    NvmeRequest *req;
+    QEMUBH *bh;
+    int ret;
+
+    bool all;
+    int idx;
+    NvmeZone *zone;
+} NvmeZoneResetAIOCB;
+
+static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
+{
+    NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+
+    iocb->idx = ns->num_zones;
+
+    iocb->ret = -ECANCELED;
+
+    if (iocb->aiocb) {
+        blk_aio_cancel_async(iocb->aiocb);
+        iocb->aiocb = NULL;
+    }
+}
+
+static const AIOCBInfo nvme_zone_reset_aiocb_info = {
+    .aiocb_size = sizeof(NvmeZoneResetAIOCB),
+    .cancel_async = nvme_zone_reset_cancel,
+};
+
+static void nvme_zone_reset_bh(void *opaque)
+{
+    NvmeZoneResetAIOCB *iocb = opaque;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_unref(iocb);
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret);
+
+static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
+{
+    NvmeZoneResetAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+    int64_t moff;
+    int count;
+
+    if (ret < 0) {
+        nvme_zone_reset_cb(iocb, ret);
+        return;
+    }
+
+    if (!ns->lbaf.ms) {
+        nvme_zone_reset_cb(iocb, 0);
+        return;
+    }
+
+    moff = nvme_moff(ns, iocb->zone->d.zslba);
+    count = nvme_m2b(ns, ns->zone_size);
+
+    iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
+                                        BDRV_REQ_MAY_UNMAP,
+                                        nvme_zone_reset_cb, iocb);
+    return;
+}
+
+static void nvme_zone_reset_cb(void *opaque, int ret)
+{
+    NvmeZoneResetAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = req->ns;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto done;
+    }
+
+    if (iocb->zone) {
+        nvme_zrm_reset(ns, iocb->zone);
+
+        if (!iocb->all) {
+            goto done;
+        }
+    }
+
+    while (iocb->idx < ns->num_zones) {
+        NvmeZone *zone = &ns->zone_array[iocb->idx++];
+
+        switch (nvme_get_zone_state(zone)) {
+        case NVME_ZONE_STATE_EMPTY:
+            if (!iocb->all) {
+                goto done;
+            }
+
+            continue;
+
+        case NVME_ZONE_STATE_EXPLICITLY_OPEN:
+        case NVME_ZONE_STATE_IMPLICITLY_OPEN:
+        case NVME_ZONE_STATE_CLOSED:
+        case NVME_ZONE_STATE_FULL:
+            iocb->zone = zone;
+            break;
+
+        default:
+            continue;
+        }
+
+        trace_pci_nvme_zns_zone_reset(zone->d.zslba);
+
+        iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
+                                            nvme_l2b(ns, zone->d.zslba),
+                                            nvme_l2b(ns, ns->zone_size),
+                                            BDRV_REQ_MAY_UNMAP,
+                                            nvme_zone_reset_epilogue_cb,
+                                            iocb);
+        return;
+    }
+
+done:
+    iocb->aiocb = NULL;
+    if (iocb->bh) {
+        qemu_bh_schedule(iocb->bh);
+    }
+}
+
+static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    NvmeZone *zone;
+    NvmeZoneResetAIOCB *iocb;
+    uint8_t *zd_ext;
+    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+    uint64_t slba = 0;
+    uint32_t zone_idx = 0;
+    uint16_t status;
+    uint8_t action;
+    bool all;
+    enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;
+
+    action = dw13 & 0xff;
+    all = !!(dw13 & 0x100);
+
+    req->status = NVME_SUCCESS;
+
+    if (!all) {
+        status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
+        if (status) {
+            return status;
+        }
+    }
+
+    zone = &ns->zone_array[zone_idx];
+    if (slba != zone->d.zslba) {
+        trace_pci_nvme_err_unaligned_zone_cmd(action, slba, zone->d.zslba);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    switch (action) {
+
+    case NVME_ZONE_ACTION_OPEN:
+        if (all) {
+            proc_mask = NVME_PROC_CLOSED_ZONES;
+        }
+        trace_pci_nvme_open_zone(slba, zone_idx, all);
+        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_open_zone, req);
+        break;
+
+    case NVME_ZONE_ACTION_CLOSE:
+        if (all) {
+            proc_mask = NVME_PROC_OPENED_ZONES;
+        }
+        trace_pci_nvme_close_zone(slba, zone_idx, all);
+        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_close_zone, req);
+        break;
+
+    case NVME_ZONE_ACTION_FINISH:
+        if (all) {
+            proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES;
+        }
+        trace_pci_nvme_finish_zone(slba, zone_idx, all);
+        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_finish_zone, req);
+        break;
+
+    case NVME_ZONE_ACTION_RESET:
+        trace_pci_nvme_reset_zone(slba, zone_idx, all);
+
+        iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
+                           nvme_misc_cb, req);
+
+        iocb->req = req;
+        iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb);
+        iocb->ret = 0;
+        iocb->all = all;
+        iocb->idx = zone_idx;
+        iocb->zone = NULL;
+
+        req->aiocb = &iocb->common;
+        nvme_zone_reset_cb(iocb, 0);
+
+        return NVME_NO_COMPLETE;
+
+    case NVME_ZONE_ACTION_OFFLINE:
+        if (all) {
+            proc_mask = NVME_PROC_READ_ONLY_ZONES;
+        }
+        trace_pci_nvme_offline_zone(slba, zone_idx, all);
+        status = nvme_do_zone_op(ns, zone, proc_mask, nvme_offline_zone, req);
+        break;
+
+    case NVME_ZONE_ACTION_SET_ZD_EXT:
+        trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
+        if (all || !ns->params.zd_extension_size) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+        zd_ext = nvme_get_zd_extension(ns, zone_idx);
+        status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
+        if (status) {
+            trace_pci_nvme_err_zd_extension_map_error(zone_idx);
+            return status;
+        }
+
+        status = nvme_set_zd_ext(ns, zone);
+        if (status == NVME_SUCCESS) {
+            trace_pci_nvme_zd_extension_set(zone_idx);
+            return status;
+        }
+        break;
+
+    default:
+        trace_pci_nvme_err_invalid_mgmt_action(action);
+        status = NVME_INVALID_FIELD;
+    }
+
+    if (status == NVME_ZONE_INVAL_TRANSITION) {
+        trace_pci_nvme_err_invalid_zone_state_transition(action, slba,
+                                                         zone->d.za);
+    }
+    if (status) {
+        status |= NVME_DNR;
+    }
+
+    return status;
+}
+
+static bool nvme_zone_matches_filter(uint32_t zafs, NvmeZone *zl)
+{
+    NvmeZoneState zs = nvme_get_zone_state(zl);
+
+    switch (zafs) {
+    case NVME_ZONE_REPORT_ALL:
+        return true;
+    case NVME_ZONE_REPORT_EMPTY:
+        return zs == NVME_ZONE_STATE_EMPTY;
+    case NVME_ZONE_REPORT_IMPLICITLY_OPEN:
+        return zs == NVME_ZONE_STATE_IMPLICITLY_OPEN;
+    case NVME_ZONE_REPORT_EXPLICITLY_OPEN:
+        return zs == NVME_ZONE_STATE_EXPLICITLY_OPEN;
+    case NVME_ZONE_REPORT_CLOSED:
+        return zs == NVME_ZONE_STATE_CLOSED;
+    case NVME_ZONE_REPORT_FULL:
+        return zs == NVME_ZONE_STATE_FULL;
+    case NVME_ZONE_REPORT_READ_ONLY:
+        return zs == NVME_ZONE_STATE_READ_ONLY;
+    case NVME_ZONE_REPORT_OFFLINE:
+        return zs == NVME_ZONE_STATE_OFFLINE;
+    default:
+        return false;
+    }
+}
+
+static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    /* cdw12 is zero-based number of dwords to return. Convert to bytes */
+    uint32_t data_size = (le32_to_cpu(cmd->cdw12) + 1) << 2;
+    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+    uint32_t zone_idx, zra, zrasf, partial;
+    uint64_t max_zones, nr_zones = 0;
+    uint16_t status;
+    uint64_t slba;
+    NvmeZoneDescr *z;
+    NvmeZone *zone;
+    NvmeZoneReportHeader *header;
+    void *buf, *buf_p;
+    size_t zone_entry_sz;
+    int i;
+
+    req->status = NVME_SUCCESS;
+
+    status = nvme_get_mgmt_zone_slba_idx(ns, cmd, &slba, &zone_idx);
+    if (status) {
+        return status;
+    }
+
+    zra = dw13 & 0xff;
+    if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+    if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    zrasf = (dw13 >> 8) & 0xff;
+    if (zrasf > NVME_ZONE_REPORT_OFFLINE) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (data_size < sizeof(NvmeZoneReportHeader)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    status = nvme_check_mdts(n, data_size);
+    if (status) {
+        return status;
+    }
+
+    partial = (dw13 >> 16) & 0x01;
+
+    zone_entry_sz = sizeof(NvmeZoneDescr);
+    if (zra == NVME_ZONE_REPORT_EXTENDED) {
+        zone_entry_sz += ns->params.zd_extension_size;
+    }
+
+    max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
+    buf = g_malloc0(data_size);
+
+    zone = &ns->zone_array[zone_idx];
+    for (i = zone_idx; i < ns->num_zones; i++) {
+        if (partial && nr_zones >= max_zones) {
+            break;
+        }
+        if (nvme_zone_matches_filter(zrasf, zone++)) {
+            nr_zones++;
+        }
+    }
+    header = (NvmeZoneReportHeader *)buf;
+    header->nr_zones = cpu_to_le64(nr_zones);
+
+    buf_p = buf + sizeof(NvmeZoneReportHeader);
+    for (; zone_idx < ns->num_zones && max_zones > 0; zone_idx++) {
+        zone = &ns->zone_array[zone_idx];
+        if (nvme_zone_matches_filter(zrasf, zone)) {
+            z = (NvmeZoneDescr *)buf_p;
+            buf_p += sizeof(NvmeZoneDescr);
+
+            z->zt = zone->d.zt;
+            z->zs = zone->d.zs;
+            z->zcap = cpu_to_le64(zone->d.zcap);
+            z->zslba = cpu_to_le64(zone->d.zslba);
+            z->za = zone->d.za;
+
+            if (nvme_wp_is_valid(zone)) {
+                z->wp = cpu_to_le64(zone->d.wp);
+            } else {
+                z->wp = cpu_to_le64(~0ULL);
+            }
+
+            if (zra == NVME_ZONE_REPORT_EXTENDED) {
+                if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
+                    memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
+                           ns->params.zd_extension_size);
+                }
+                buf_p += ns->params.zd_extension_size;
+            }
+
+            max_zones--;
+        }
+    }
+
+    status = nvme_c2h(n, (uint8_t *)buf, data_size, req);
+
+    g_free(buf);
+
+    return status;
+}
+
+static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns;
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+
+    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
+                          req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode));
+
+    if (!nvme_nsid_valid(n, nsid)) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    /*
+     * In the base NVM command set, Flush may apply to all namespaces
+     * (indicated by NSID being set to FFFFFFFFh). But if that feature is used
+     * along with TP 4056 (Namespace Types), it may be pretty screwed up.
+     *
+     * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the
+     * opcode with a specific command since we cannot determine a unique I/O
+     * command set. Opcode 0h could have any other meaning than something
+     * equivalent to flushing and say it DOES have completely different
+     * semantics in some other command set - does an NSID of FFFFFFFFh then
+     * mean "for all namespaces, apply whatever command set specific command
+     * that uses the 0h opcode?" Or does it mean "for all namespaces, apply
+     * whatever command that uses the 0h opcode if, and only if, it allows NSID
+     * to be FFFFFFFFh"?
+     *
+     * Anyway (and luckily), for now, we do not care about this since the
+     * device only supports namespace types that includes the NVM Flush command
+     * (NVM and Zoned), so always do an NVM Flush.
+     */
+    if (req->cmd.opcode == NVME_CMD_FLUSH) {
+        return nvme_flush(n, req);
+    }
+
+    ns = nvme_ns(n, nsid);
+    if (unlikely(!ns)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
+        return NVME_INVALID_OPCODE | NVME_DNR;
+    }
+
+    if (ns->status) {
+        return ns->status;
+    }
+
+    if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
+        return NVME_INVALID_FIELD;
+    }
+
+    req->ns = ns;
+
+    switch (req->cmd.opcode) {
+    case NVME_CMD_WRITE_ZEROES:
+        return nvme_write_zeroes(n, req);
+    case NVME_CMD_ZONE_APPEND:
+        return nvme_zone_append(n, req);
+    case NVME_CMD_WRITE:
+        return nvme_write(n, req);
+    case NVME_CMD_READ:
+        return nvme_read(n, req);
+    case NVME_CMD_COMPARE:
+        return nvme_compare(n, req);
+    case NVME_CMD_DSM:
+        return nvme_dsm(n, req);
+    case NVME_CMD_VERIFY:
+        return nvme_verify(n, req);
+    case NVME_CMD_COPY:
+        return nvme_copy(n, req);
+    case NVME_CMD_ZONE_MGMT_SEND:
+        return nvme_zone_mgmt_send(n, req);
+    case NVME_CMD_ZONE_MGMT_RECV:
+        return nvme_zone_mgmt_recv(n, req);
+    default:
+        assert(false);
+    }
+
+    return NVME_INVALID_OPCODE | NVME_DNR;
+}
+
+static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
+{
+    n->sq[sq->sqid] = NULL;
+    timer_free(sq->timer);
+    g_free(sq->io_req);
+    if (sq->sqid) {
+        g_free(sq);
+    }
+}
+
+static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
+    NvmeRequest *r, *next;
+    NvmeSQueue *sq;
+    NvmeCQueue *cq;
+    uint16_t qid = le16_to_cpu(c->qid);
+
+    if (unlikely(!qid || nvme_check_sqid(n, qid))) {
+        trace_pci_nvme_err_invalid_del_sq(qid);
+        return NVME_INVALID_QID | NVME_DNR;
+    }
+
+    trace_pci_nvme_del_sq(qid);
+
+    sq = n->sq[qid];
+    while (!QTAILQ_EMPTY(&sq->out_req_list)) {
+        r = QTAILQ_FIRST(&sq->out_req_list);
+        assert(r->aiocb);
+        blk_aio_cancel(r->aiocb);
+    }
+
+    assert(QTAILQ_EMPTY(&sq->out_req_list));
+
+    if (!nvme_check_cqid(n, sq->cqid)) {
+        cq = n->cq[sq->cqid];
+        QTAILQ_REMOVE(&cq->sq_list, sq, entry);
+
+        nvme_post_cqes(cq);
+        QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
+            if (r->sq == sq) {
+                QTAILQ_REMOVE(&cq->req_list, r, entry);
+                QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
+            }
+        }
+    }
+
+    nvme_free_sq(sq, n);
+    return NVME_SUCCESS;
+}
+
+static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
+                         uint16_t sqid, uint16_t cqid, uint16_t size)
+{
+    int i;
+    NvmeCQueue *cq;
+
+    sq->ctrl = n;
+    sq->dma_addr = dma_addr;
+    sq->sqid = sqid;
+    sq->size = size;
+    sq->cqid = cqid;
+    sq->head = sq->tail = 0;
+    sq->io_req = g_new0(NvmeRequest, sq->size);
+
+    QTAILQ_INIT(&sq->req_list);
+    QTAILQ_INIT(&sq->out_req_list);
+    for (i = 0; i < sq->size; i++) {
+        sq->io_req[i].sq = sq;
+        QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
+    }
+    sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
+
+    assert(n->cq[cqid]);
+    cq = n->cq[cqid];
+    QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
+    n->sq[sqid] = sq;
+}
+
+static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeSQueue *sq;
+    NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
+
+    uint16_t cqid = le16_to_cpu(c->cqid);
+    uint16_t sqid = le16_to_cpu(c->sqid);
+    uint16_t qsize = le16_to_cpu(c->qsize);
+    uint16_t qflags = le16_to_cpu(c->sq_flags);
+    uint64_t prp1 = le64_to_cpu(c->prp1);
+
+    trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
+
+    if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
+        trace_pci_nvme_err_invalid_create_sq_cqid(cqid);
+        return NVME_INVALID_CQID | NVME_DNR;
+    }
+    if (unlikely(!sqid || sqid > n->params.max_ioqpairs ||
+        n->sq[sqid] != NULL)) {
+        trace_pci_nvme_err_invalid_create_sq_sqid(sqid);
+        return NVME_INVALID_QID | NVME_DNR;
+    }
+    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
+        trace_pci_nvme_err_invalid_create_sq_size(qsize);
+        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
+    }
+    if (unlikely(prp1 & (n->page_size - 1))) {
+        trace_pci_nvme_err_invalid_create_sq_addr(prp1);
+        return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+    }
+    if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
+        trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+    sq = g_malloc0(sizeof(*sq));
+    nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
+    return NVME_SUCCESS;
+}
+
+struct nvme_stats {
+    uint64_t units_read;
+    uint64_t units_written;
+    uint64_t read_commands;
+    uint64_t write_commands;
+};
+
+static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
+{
+    BlockAcctStats *s = blk_get_stats(ns->blkconf.blk);
+
+    stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
+    stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
+    stats->read_commands += s->nr_ops[BLOCK_ACCT_READ];
+    stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
+}
+
+static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
+{
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    struct nvme_stats stats = { 0 };
+    NvmeSmartLog smart = { 0 };
+    uint32_t trans_len;
+    NvmeNamespace *ns;
+    time_t current_ms;
+
+    if (off >= sizeof(smart)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (nsid != 0xffffffff) {
+        ns = nvme_ns(n, nsid);
+        if (!ns) {
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+        nvme_set_blk_stats(ns, &stats);
+    } else {
+        int i;
+
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+            ns = nvme_ns(n, i);
+            if (!ns) {
+                continue;
+            }
+            nvme_set_blk_stats(ns, &stats);
+        }
+    }
+
+    trans_len = MIN(sizeof(smart) - off, buf_len);
+    smart.critical_warning = n->smart_critical_warning;
+
+    smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
+                                                        1000));
+    smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
+                                                           1000));
+    smart.host_read_commands[0] = cpu_to_le64(stats.read_commands);
+    smart.host_write_commands[0] = cpu_to_le64(stats.write_commands);
+
+    smart.temperature = cpu_to_le16(n->temperature);
+
+    if ((n->temperature >= n->features.temp_thresh_hi) ||
+        (n->temperature <= n->features.temp_thresh_low)) {
+        smart.critical_warning |= NVME_SMART_TEMPERATURE;
+    }
+
+    current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    smart.power_on_hours[0] =
+        cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_SMART);
+    }
+
+    return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req);
+}
+
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
+                                 NvmeRequest *req)
+{
+    uint32_t trans_len;
+    NvmeFwSlotInfoLog fw_log = {
+        .afi = 0x1,
+    };
+
+    if (off >= sizeof(fw_log)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
+    trans_len = MIN(sizeof(fw_log) - off, buf_len);
+
+    return nvme_c2h(n, (uint8_t *) &fw_log + off, trans_len, req);
+}
+
+static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
+{
+    uint32_t trans_len;
+    NvmeErrorLog errlog;
+
+    if (off >= sizeof(errlog)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+    }
+
+    memset(&errlog, 0x0, sizeof(errlog));
+    trans_len = MIN(sizeof(errlog) - off, buf_len);
+
+    return nvme_c2h(n, (uint8_t *)&errlog, trans_len, req);
+}
+
+static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+                                    uint64_t off, NvmeRequest *req)
+{
+    uint32_t nslist[1024];
+    uint32_t trans_len;
+    int i = 0;
+    uint32_t nsid;
+
+    if (off >= sizeof(nslist)) {
+        trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    memset(nslist, 0x0, sizeof(nslist));
+    trans_len = MIN(sizeof(nslist) - off, buf_len);
+
+    while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
+            NVME_CHANGED_NSID_SIZE) {
+        /*
+         * If more than 1024 namespaces, the first entry in the log page should
+         * be set to FFFFFFFFh and the others to 0 as spec.
+         */
+        if (i == ARRAY_SIZE(nslist)) {
+            memset(nslist, 0x0, sizeof(nslist));
+            nslist[0] = 0xffffffff;
+            break;
+        }
+
+        nslist[i++] = nsid;
+        clear_bit(nsid, n->changed_nsids);
+    }
+
+    /*
+     * Remove all the remaining list entries in case returns directly due to
+     * more than 1024 namespaces.
+     */
+    if (nslist[0] == 0xffffffff) {
+        bitmap_zero(n->changed_nsids, NVME_CHANGED_NSID_SIZE);
+    }
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_NOTICE);
+    }
+
+    return nvme_c2h(n, ((uint8_t *)nslist) + off, trans_len, req);
+}
+
+static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
+                                 uint64_t off, NvmeRequest *req)
+{
+    NvmeEffectsLog log = {};
+    const uint32_t *src_iocs = NULL;
+    uint32_t trans_len;
+
+    if (off >= sizeof(log)) {
+        trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(log));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    switch (NVME_CC_CSS(ldl_le_p(&n->bar.cc))) {
+    case NVME_CC_CSS_NVM:
+        src_iocs = nvme_cse_iocs_nvm;
+        /* fall through */
+    case NVME_CC_CSS_ADMIN_ONLY:
+        break;
+    case NVME_CC_CSS_CSI:
+        switch (csi) {
+        case NVME_CSI_NVM:
+            src_iocs = nvme_cse_iocs_nvm;
+            break;
+        case NVME_CSI_ZONED:
+            src_iocs = nvme_cse_iocs_zoned;
+            break;
+        }
+    }
+
+    memcpy(log.acs, nvme_cse_acs, sizeof(nvme_cse_acs));
+
+    if (src_iocs) {
+        memcpy(log.iocs, src_iocs, sizeof(log.iocs));
+    }
+
+    trans_len = MIN(sizeof(log) - off, buf_len);
+
+    return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
+}
+
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeCmd *cmd = &req->cmd;
+
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
+    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+    uint8_t  lid = dw10 & 0xff;
+    uint8_t  lsp = (dw10 >> 8) & 0xf;
+    uint8_t  rae = (dw10 >> 15) & 0x1;
+    uint8_t  csi = le32_to_cpu(cmd->cdw14) >> 24;
+    uint32_t numdl, numdu;
+    uint64_t off, lpol, lpou;
+    size_t   len;
+    uint16_t status;
+
+    numdl = (dw10 >> 16);
+    numdu = (dw11 & 0xffff);
+    lpol = dw12;
+    lpou = dw13;
+
+    len = (((numdu << 16) | numdl) + 1) << 2;
+    off = (lpou << 32ULL) | lpol;
+
+    if (off & 0x3) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
+
+    status = nvme_check_mdts(n, len);
+    if (status) {
+        return status;
+    }
+
+    switch (lid) {
+    case NVME_LOG_ERROR_INFO:
+        return nvme_error_info(n, rae, len, off, req);
+    case NVME_LOG_SMART_INFO:
+        return nvme_smart_info(n, rae, len, off, req);
+    case NVME_LOG_FW_SLOT_INFO:
+        return nvme_fw_log_info(n, len, off, req);
+    case NVME_LOG_CHANGED_NSLIST:
+        return nvme_changed_nslist(n, rae, len, off, req);
+    case NVME_LOG_CMD_EFFECTS:
+        return nvme_cmd_effects(n, csi, len, off, req);
+    default:
+        trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+}
+
+static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
+{
+    n->cq[cq->cqid] = NULL;
+    timer_free(cq->timer);
+    if (msix_enabled(&n->parent_obj)) {
+        msix_vector_unuse(&n->parent_obj, cq->vector);
+    }
+    if (cq->cqid) {
+        g_free(cq);
+    }
+}
+
+static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
+    NvmeCQueue *cq;
+    uint16_t qid = le16_to_cpu(c->qid);
+
+    if (unlikely(!qid || nvme_check_cqid(n, qid))) {
+        trace_pci_nvme_err_invalid_del_cq_cqid(qid);
+        return NVME_INVALID_CQID | NVME_DNR;
+    }
+
+    cq = n->cq[qid];
+    if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
+        trace_pci_nvme_err_invalid_del_cq_notempty(qid);
+        return NVME_INVALID_QUEUE_DEL;
+    }
+
+    if (cq->irq_enabled && cq->tail != cq->head) {
+        n->cq_pending--;
+    }
+
+    nvme_irq_deassert(n, cq);
+    trace_pci_nvme_del_cq(qid);
+    nvme_free_cq(cq, n);
+    return NVME_SUCCESS;
+}
+
+static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
+                         uint16_t cqid, uint16_t vector, uint16_t size,
+                         uint16_t irq_enabled)
+{
+    int ret;
+
+    if (msix_enabled(&n->parent_obj)) {
+        ret = msix_vector_use(&n->parent_obj, vector);
+        assert(ret == 0);
+    }
+    cq->ctrl = n;
+    cq->cqid = cqid;
+    cq->size = size;
+    cq->dma_addr = dma_addr;
+    cq->phase = 1;
+    cq->irq_enabled = irq_enabled;
+    cq->vector = vector;
+    cq->head = cq->tail = 0;
+    QTAILQ_INIT(&cq->req_list);
+    QTAILQ_INIT(&cq->sq_list);
+    n->cq[cqid] = cq;
+    cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+}
+
+static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeCQueue *cq;
+    NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
+    uint16_t cqid = le16_to_cpu(c->cqid);
+    uint16_t vector = le16_to_cpu(c->irq_vector);
+    uint16_t qsize = le16_to_cpu(c->qsize);
+    uint16_t qflags = le16_to_cpu(c->cq_flags);
+    uint64_t prp1 = le64_to_cpu(c->prp1);
+
+    trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
+                             NVME_CQ_FLAGS_IEN(qflags) != 0);
+
+    if (unlikely(!cqid || cqid > n->params.max_ioqpairs ||
+        n->cq[cqid] != NULL)) {
+        trace_pci_nvme_err_invalid_create_cq_cqid(cqid);
+        return NVME_INVALID_QID | NVME_DNR;
+    }
+    if (unlikely(!qsize || qsize > NVME_CAP_MQES(ldq_le_p(&n->bar.cap)))) {
+        trace_pci_nvme_err_invalid_create_cq_size(qsize);
+        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
+    }
+    if (unlikely(prp1 & (n->page_size - 1))) {
+        trace_pci_nvme_err_invalid_create_cq_addr(prp1);
+        return NVME_INVALID_PRP_OFFSET | NVME_DNR;
+    }
+    if (unlikely(!msix_enabled(&n->parent_obj) && vector)) {
+        trace_pci_nvme_err_invalid_create_cq_vector(vector);
+        return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
+    }
+    if (unlikely(vector >= n->params.msix_qsize)) {
+        trace_pci_nvme_err_invalid_create_cq_vector(vector);
+        return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
+    }
+    if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
+        trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    cq = g_malloc0(sizeof(*cq));
+    nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
+                 NVME_CQ_FLAGS_IEN(qflags));
+
+    /*
+     * It is only required to set qs_created when creating a completion queue;
+     * creating a submission queue without a matching completion queue will
+     * fail.
+     */
+    n->qs_created = true;
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
+
+    return nvme_c2h(n, id, sizeof(id), req);
+}
+
+static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
+{
+    trace_pci_nvme_identify_ctrl();
+
+    return nvme_c2h(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), req);
+}
+
+static uint16_t nvme_identify_ctrl_csi(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint8_t id[NVME_IDENTIFY_DATA_SIZE] = {};
+    NvmeIdCtrlNvm *id_nvm = (NvmeIdCtrlNvm *)&id;
+
+    trace_pci_nvme_identify_ctrl_csi(c->csi);
+
+    switch (c->csi) {
+    case NVME_CSI_NVM:
+        id_nvm->vsl = n->params.vsl;
+        id_nvm->dmrsl = cpu_to_le32(n->dmrsl);
+        break;
+
+    case NVME_CSI_ZONED:
+        ((NvmeIdCtrlZoned *)&id)->zasl = n->params.zasl;
+        break;
+
+    default:
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return nvme_c2h(n, id, sizeof(id), req);
+}
+
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req, bool active)
+{
+    NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t nsid = le32_to_cpu(c->nsid);
+
+    trace_pci_nvme_identify_ns(nsid);
+
+    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    ns = nvme_ns(n, nsid);
+    if (unlikely(!ns)) {
+        if (!active) {
+            ns = nvme_subsys_ns(n->subsys, nsid);
+            if (!ns) {
+                return nvme_rpt_empty_id_struct(n, req);
+            }
+        } else {
+            return nvme_rpt_empty_id_struct(n, req);
+        }
+    }
+
+    if (active || ns->csi == NVME_CSI_NVM) {
+        return nvme_c2h(n, (uint8_t *)&ns->id_ns, sizeof(NvmeIdNs), req);
+    }
+
+    return NVME_INVALID_CMD_SET | NVME_DNR;
+}
+
+static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
+                                        bool attached)
+{
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t nsid = le32_to_cpu(c->nsid);
+    uint16_t min_id = le16_to_cpu(c->ctrlid);
+    uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+    uint16_t *ids = &list[1];
+    NvmeNamespace *ns;
+    NvmeCtrl *ctrl;
+    int cntlid, nr_ids = 0;
+
+    trace_pci_nvme_identify_ctrl_list(c->cns, min_id);
+
+    if (!n->subsys) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (attached) {
+        if (nsid == NVME_NSID_BROADCAST) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        ns = nvme_subsys_ns(n->subsys, nsid);
+        if (!ns) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    }
+
+    for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {
+        ctrl = nvme_subsys_ctrl(n->subsys, cntlid);
+        if (!ctrl) {
+            continue;
+        }
+
+        if (attached && !nvme_ns(ctrl, nsid)) {
+            continue;
+        }
+
+        ids[nr_ids++] = cntlid;
+    }
+
+    list[0] = nr_ids;
+
+    return nvme_c2h(n, (uint8_t *)list, sizeof(list), req);
+}
+
+static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
+                                     bool active)
+{
+    NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t nsid = le32_to_cpu(c->nsid);
+
+    trace_pci_nvme_identify_ns_csi(nsid, c->csi);
+
+    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    ns = nvme_ns(n, nsid);
+    if (unlikely(!ns)) {
+        if (!active) {
+            ns = nvme_subsys_ns(n->subsys, nsid);
+            if (!ns) {
+                return nvme_rpt_empty_id_struct(n, req);
+            }
+        } else {
+            return nvme_rpt_empty_id_struct(n, req);
+        }
+    }
+
+    if (c->csi == NVME_CSI_NVM) {
+        return nvme_rpt_empty_id_struct(n, req);
+    } else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
+        return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
+                        req);
+    }
+
+    return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req,
+                                     bool active)
+{
+    NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t min_nsid = le32_to_cpu(c->nsid);
+    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
+    static const int data_len = sizeof(list);
+    uint32_t *list_ptr = (uint32_t *)list;
+    int i, j = 0;
+
+    trace_pci_nvme_identify_nslist(min_nsid);
+
+    /*
+     * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values
+     * since the Active Namespace ID List should return namespaces with ids
+     * *higher* than the NSID specified in the command. This is also specified
+     * in the spec (NVM Express v1.3d, Section 5.15.4).
+     */
+    if (min_nsid >= NVME_NSID_BROADCAST - 1) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            if (!active) {
+                ns = nvme_subsys_ns(n->subsys, i);
+                if (!ns) {
+                    continue;
+                }
+            } else {
+                continue;
+            }
+        }
+        if (ns->params.nsid <= min_nsid) {
+            continue;
+        }
+        list_ptr[j++] = cpu_to_le32(ns->params.nsid);
+        if (j == data_len / sizeof(uint32_t)) {
+            break;
+        }
+    }
+
+    return nvme_c2h(n, list, data_len, req);
+}
+
+static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req,
+                                         bool active)
+{
+    NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t min_nsid = le32_to_cpu(c->nsid);
+    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
+    static const int data_len = sizeof(list);
+    uint32_t *list_ptr = (uint32_t *)list;
+    int i, j = 0;
+
+    trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi);
+
+    /*
+     * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid.
+     */
+    if (min_nsid >= NVME_NSID_BROADCAST - 1) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    if (c->csi != NVME_CSI_NVM && c->csi != NVME_CSI_ZONED) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            if (!active) {
+                ns = nvme_subsys_ns(n->subsys, i);
+                if (!ns) {
+                    continue;
+                }
+            } else {
+                continue;
+            }
+        }
+        if (ns->params.nsid <= min_nsid || c->csi != ns->csi) {
+            continue;
+        }
+        list_ptr[j++] = cpu_to_le32(ns->params.nsid);
+        if (j == data_len / sizeof(uint32_t)) {
+            break;
+        }
+    }
+
+    return nvme_c2h(n, list, data_len, req);
+}
+
+static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+    uint32_t nsid = le32_to_cpu(c->nsid);
+    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
+    uint8_t *pos = list;
+    struct {
+        NvmeIdNsDescr hdr;
+        uint8_t v[NVME_NIDL_UUID];
+    } QEMU_PACKED uuid = {};
+    struct {
+        NvmeIdNsDescr hdr;
+        uint64_t v;
+    } QEMU_PACKED eui64 = {};
+    struct {
+        NvmeIdNsDescr hdr;
+        uint8_t v;
+    } QEMU_PACKED csi = {};
+
+    trace_pci_nvme_identify_ns_descr_list(nsid);
+
+    if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    ns = nvme_ns(n, nsid);
+    if (unlikely(!ns)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    /*
+     * If the EUI-64 field is 0 and the NGUID field is 0, the namespace must
+     * provide a valid Namespace UUID in the Namespace Identification Descriptor
+     * data structure. QEMU does not yet support setting NGUID.
+     */
+    uuid.hdr.nidt = NVME_NIDT_UUID;
+    uuid.hdr.nidl = NVME_NIDL_UUID;
+    memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
+    memcpy(pos, &uuid, sizeof(uuid));
+    pos += sizeof(uuid);
+
+    if (ns->params.eui64) {
+        eui64.hdr.nidt = NVME_NIDT_EUI64;
+        eui64.hdr.nidl = NVME_NIDL_EUI64;
+        eui64.v = cpu_to_be64(ns->params.eui64);
+        memcpy(pos, &eui64, sizeof(eui64));
+        pos += sizeof(eui64);
+    }
+
+    csi.hdr.nidt = NVME_NIDT_CSI;
+    csi.hdr.nidl = NVME_NIDL_CSI;
+    csi.v = ns->csi;
+    memcpy(pos, &csi, sizeof(csi));
+    pos += sizeof(csi);
+
+    return nvme_c2h(n, list, sizeof(list), req);
+}
+
+static uint16_t nvme_identify_cmd_set(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
+    static const int data_len = sizeof(list);
+
+    trace_pci_nvme_identify_cmd_set();
+
+    NVME_SET_CSI(*list, NVME_CSI_NVM);
+    NVME_SET_CSI(*list, NVME_CSI_ZONED);
+
+    return nvme_c2h(n, list, data_len, req);
+}
+
+static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+
+    trace_pci_nvme_identify(nvme_cid(req), c->cns, le16_to_cpu(c->ctrlid),
+                            c->csi);
+
+    switch (c->cns) {
+    case NVME_ID_CNS_NS:
+        return nvme_identify_ns(n, req, true);
+    case NVME_ID_CNS_NS_PRESENT:
+        return nvme_identify_ns(n, req, false);
+    case NVME_ID_CNS_NS_ATTACHED_CTRL_LIST:
+        return nvme_identify_ctrl_list(n, req, true);
+    case NVME_ID_CNS_CTRL_LIST:
+        return nvme_identify_ctrl_list(n, req, false);
+    case NVME_ID_CNS_CS_NS:
+        return nvme_identify_ns_csi(n, req, true);
+    case NVME_ID_CNS_CS_NS_PRESENT:
+        return nvme_identify_ns_csi(n, req, false);
+    case NVME_ID_CNS_CTRL:
+        return nvme_identify_ctrl(n, req);
+    case NVME_ID_CNS_CS_CTRL:
+        return nvme_identify_ctrl_csi(n, req);
+    case NVME_ID_CNS_NS_ACTIVE_LIST:
+        return nvme_identify_nslist(n, req, true);
+    case NVME_ID_CNS_NS_PRESENT_LIST:
+        return nvme_identify_nslist(n, req, false);
+    case NVME_ID_CNS_CS_NS_ACTIVE_LIST:
+        return nvme_identify_nslist_csi(n, req, true);
+    case NVME_ID_CNS_CS_NS_PRESENT_LIST:
+        return nvme_identify_nslist_csi(n, req, false);
+    case NVME_ID_CNS_NS_DESCR_LIST:
+        return nvme_identify_ns_descr_list(n, req);
+    case NVME_ID_CNS_IO_COMMAND_SET:
+        return nvme_identify_cmd_set(n, req);
+    default:
+        trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+}
+
+static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
+
+    req->cqe.result = 1;
+    if (nvme_check_sqid(n, sqid)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts)
+{
+    trace_pci_nvme_setfeat_timestamp(ts);
+
+    n->host_timestamp = le64_to_cpu(ts);
+    n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+}
+
+static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
+{
+    uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms;
+
+    union nvme_timestamp {
+        struct {
+            uint64_t timestamp:48;
+            uint64_t sync:1;
+            uint64_t origin:3;
+            uint64_t rsvd1:12;
+        };
+        uint64_t all;
+    };
+
+    union nvme_timestamp ts;
+    ts.all = 0;
+    ts.timestamp = n->host_timestamp + elapsed_time;
+
+    /* If the host timestamp is non-zero, set the timestamp origin */
+    ts.origin = n->host_timestamp ? 0x01 : 0x00;
+
+    trace_pci_nvme_getfeat_timestamp(ts.all);
+
+    return cpu_to_le64(ts.all);
+}
+
+static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint64_t timestamp = nvme_get_timestamp(n);
+
+    return nvme_c2h(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
+}
+
+static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeCmd *cmd = &req->cmd;
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t nsid = le32_to_cpu(cmd->nsid);
+    uint32_t result;
+    uint8_t fid = NVME_GETSETFEAT_FID(dw10);
+    NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10);
+    uint16_t iv;
+    NvmeNamespace *ns;
+    int i;
+
+    static const uint32_t nvme_feature_default[NVME_FID_MAX] = {
+        [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT,
+    };
+
+    trace_pci_nvme_getfeat(nvme_cid(req), nsid, fid, sel, dw11);
+
+    if (!nvme_feature_support[fid]) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
+        if (!nvme_nsid_valid(n, nsid) || nsid == NVME_NSID_BROADCAST) {
+            /*
+             * The Reservation Notification Mask and Reservation Persistence
+             * features require a status code of Invalid Field in Command when
+             * NSID is FFFFFFFFh. Since the device does not support those
+             * features we can always return Invalid Namespace or Format as we
+             * should do for all other features.
+             */
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+
+        if (!nvme_ns(n, nsid)) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+    }
+
+    switch (sel) {
+    case NVME_GETFEAT_SELECT_CURRENT:
+        break;
+    case NVME_GETFEAT_SELECT_SAVED:
+        /* no features are saveable by the controller; fallthrough */
+    case NVME_GETFEAT_SELECT_DEFAULT:
+        goto defaults;
+    case NVME_GETFEAT_SELECT_CAP:
+        result = nvme_feature_cap[fid];
+        goto out;
+    }
+
+    switch (fid) {
+    case NVME_TEMPERATURE_THRESHOLD:
+        result = 0;
+
+        /*
+         * The controller only implements the Composite Temperature sensor, so
+         * return 0 for all other sensors.
+         */
+        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
+            goto out;
+        }
+
+        switch (NVME_TEMP_THSEL(dw11)) {
+        case NVME_TEMP_THSEL_OVER:
+            result = n->features.temp_thresh_hi;
+            goto out;
+        case NVME_TEMP_THSEL_UNDER:
+            result = n->features.temp_thresh_low;
+            goto out;
+        }
+
+        return NVME_INVALID_FIELD | NVME_DNR;
+    case NVME_ERROR_RECOVERY:
+        if (!nvme_nsid_valid(n, nsid)) {
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+
+        ns = nvme_ns(n, nsid);
+        if (unlikely(!ns)) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        result = ns->features.err_rec;
+        goto out;
+    case NVME_VOLATILE_WRITE_CACHE:
+        result = 0;
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+            ns = nvme_ns(n, i);
+            if (!ns) {
+                continue;
+            }
+
+            result = blk_enable_write_cache(ns->blkconf.blk);
+            if (result) {
+                break;
+            }
+        }
+        trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
+        goto out;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        result = n->features.async_config;
+        goto out;
+    case NVME_TIMESTAMP:
+        return nvme_get_feature_timestamp(n, req);
+    default:
+        break;
+    }
+
+defaults:
+    switch (fid) {
+    case NVME_TEMPERATURE_THRESHOLD:
+        result = 0;
+
+        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
+            break;
+        }
+
+        if (NVME_TEMP_THSEL(dw11) == NVME_TEMP_THSEL_OVER) {
+            result = NVME_TEMPERATURE_WARNING;
+        }
+
+        break;
+    case NVME_NUMBER_OF_QUEUES:
+        result = (n->params.max_ioqpairs - 1) |
+            ((n->params.max_ioqpairs - 1) << 16);
+        trace_pci_nvme_getfeat_numq(result);
+        break;
+    case NVME_INTERRUPT_VECTOR_CONF:
+        iv = dw11 & 0xffff;
+        if (iv >= n->params.max_ioqpairs + 1) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        result = iv;
+        if (iv == n->admin_cq.vector) {
+            result |= NVME_INTVC_NOCOALESCING;
+        }
+        break;
+    default:
+        result = nvme_feature_default[fid];
+        break;
+    }
+
+out:
+    req->cqe.result = cpu_to_le32(result);
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint16_t ret;
+    uint64_t timestamp;
+
+    ret = nvme_h2c(n, (uint8_t *)&timestamp, sizeof(timestamp), req);
+    if (ret) {
+        return ret;
+    }
+
+    nvme_set_timestamp(n, timestamp);
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns = NULL;
+
+    NvmeCmd *cmd = &req->cmd;
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t nsid = le32_to_cpu(cmd->nsid);
+    uint8_t fid = NVME_GETSETFEAT_FID(dw10);
+    uint8_t save = NVME_SETFEAT_SAVE(dw10);
+    int i;
+
+    trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
+
+    if (save && !(nvme_feature_cap[fid] & NVME_FEAT_CAP_SAVE)) {
+        return NVME_FID_NOT_SAVEABLE | NVME_DNR;
+    }
+
+    if (!nvme_feature_support[fid]) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (nvme_feature_cap[fid] & NVME_FEAT_CAP_NS) {
+        if (nsid != NVME_NSID_BROADCAST) {
+            if (!nvme_nsid_valid(n, nsid)) {
+                return NVME_INVALID_NSID | NVME_DNR;
+            }
+
+            ns = nvme_ns(n, nsid);
+            if (unlikely(!ns)) {
+                return NVME_INVALID_FIELD | NVME_DNR;
+            }
+        }
+    } else if (nsid && nsid != NVME_NSID_BROADCAST) {
+        if (!nvme_nsid_valid(n, nsid)) {
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+
+        return NVME_FEAT_NOT_NS_SPEC | NVME_DNR;
+    }
+
+    if (!(nvme_feature_cap[fid] & NVME_FEAT_CAP_CHANGE)) {
+        return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
+    }
+
+    switch (fid) {
+    case NVME_TEMPERATURE_THRESHOLD:
+        if (NVME_TEMP_TMPSEL(dw11) != NVME_TEMP_TMPSEL_COMPOSITE) {
+            break;
+        }
+
+        switch (NVME_TEMP_THSEL(dw11)) {
+        case NVME_TEMP_THSEL_OVER:
+            n->features.temp_thresh_hi = NVME_TEMP_TMPTH(dw11);
+            break;
+        case NVME_TEMP_THSEL_UNDER:
+            n->features.temp_thresh_low = NVME_TEMP_TMPTH(dw11);
+            break;
+        default:
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        if ((n->temperature >= n->features.temp_thresh_hi) ||
+            (n->temperature <= n->features.temp_thresh_low)) {
+            nvme_smart_event(n, NVME_AER_INFO_SMART_TEMP_THRESH);
+        }
+
+        break;
+    case NVME_ERROR_RECOVERY:
+        if (nsid == NVME_NSID_BROADCAST) {
+            for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+                ns = nvme_ns(n, i);
+
+                if (!ns) {
+                    continue;
+                }
+
+                if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat)) {
+                    ns->features.err_rec = dw11;
+                }
+            }
+
+            break;
+        }
+
+        assert(ns);
+        if (NVME_ID_NS_NSFEAT_DULBE(ns->id_ns.nsfeat))  {
+            ns->features.err_rec = dw11;
+        }
+        break;
+    case NVME_VOLATILE_WRITE_CACHE:
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+            ns = nvme_ns(n, i);
+            if (!ns) {
+                continue;
+            }
+
+            if (!(dw11 & 0x1) && blk_enable_write_cache(ns->blkconf.blk)) {
+                blk_flush(ns->blkconf.blk);
+            }
+
+            blk_set_enable_write_cache(ns->blkconf.blk, dw11 & 1);
+        }
+
+        break;
+
+    case NVME_NUMBER_OF_QUEUES:
+        if (n->qs_created) {
+            return NVME_CMD_SEQ_ERROR | NVME_DNR;
+        }
+
+        /*
+         * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR
+         * and NSQR.
+         */
+        if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1,
+                                    ((dw11 >> 16) & 0xffff) + 1,
+                                    n->params.max_ioqpairs,
+                                    n->params.max_ioqpairs);
+        req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
+                                      ((n->params.max_ioqpairs - 1) << 16));
+        break;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        n->features.async_config = dw11;
+        break;
+    case NVME_TIMESTAMP:
+        return nvme_set_feature_timestamp(n, req);
+    case NVME_COMMAND_SET_PROFILE:
+        if (dw11 & 0x1ff) {
+            trace_pci_nvme_err_invalid_iocsci(dw11 & 0x1ff);
+            return NVME_CMD_SET_CMB_REJECTED | NVME_DNR;
+        }
+        break;
+    default:
+        return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
+    }
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
+{
+    trace_pci_nvme_aer(nvme_cid(req));
+
+    if (n->outstanding_aers > n->params.aerl) {
+        trace_pci_nvme_aer_aerl_exceeded();
+        return NVME_AER_LIMIT_EXCEEDED;
+    }
+
+    n->aer_reqs[n->outstanding_aers] = req;
+    n->outstanding_aers++;
+
+    if (!QTAILQ_EMPTY(&n->aer_queue)) {
+        nvme_process_aers(n);
+    }
+
+    return NVME_NO_COMPLETE;
+}
+
+static void nvme_update_dmrsl(NvmeCtrl *n)
+{
+    int nsid;
+
+    for (nsid = 1; nsid <= NVME_MAX_NAMESPACES; nsid++) {
+        NvmeNamespace *ns = nvme_ns(n, nsid);
+        if (!ns) {
+            continue;
+        }
+
+        n->dmrsl = MIN_NON_ZERO(n->dmrsl,
+                                BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+    }
+}
+
+static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    uint32_t cc = ldl_le_p(&n->bar.cc);
+
+    ns->iocs = nvme_cse_iocs_none;
+    switch (ns->csi) {
+    case NVME_CSI_NVM:
+        if (NVME_CC_CSS(cc) != NVME_CC_CSS_ADMIN_ONLY) {
+            ns->iocs = nvme_cse_iocs_nvm;
+        }
+        break;
+    case NVME_CSI_ZONED:
+        if (NVME_CC_CSS(cc) == NVME_CC_CSS_CSI) {
+            ns->iocs = nvme_cse_iocs_zoned;
+        } else if (NVME_CC_CSS(cc) == NVME_CC_CSS_NVM) {
+            ns->iocs = nvme_cse_iocs_nvm;
+        }
+        break;
+    }
+}
+
+static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeNamespace *ns;
+    NvmeCtrl *ctrl;
+    uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+    uint8_t sel = dw10 & 0xf;
+    uint16_t *nr_ids = &list[0];
+    uint16_t *ids = &list[1];
+    uint16_t ret;
+    int i;
+
+    trace_pci_nvme_ns_attachment(nvme_cid(req), dw10 & 0xf);
+
+    if (!nvme_nsid_valid(n, nsid)) {
+        return NVME_INVALID_NSID | NVME_DNR;
+    }
+
+    ns = nvme_subsys_ns(n->subsys, nsid);
+    if (!ns) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    ret = nvme_h2c(n, (uint8_t *)list, 4096, req);
+    if (ret) {
+        return ret;
+    }
+
+    if (!*nr_ids) {
+        return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+    }
+
+    *nr_ids = MIN(*nr_ids, NVME_CONTROLLER_LIST_SIZE - 1);
+    for (i = 0; i < *nr_ids; i++) {
+        ctrl = nvme_subsys_ctrl(n->subsys, ids[i]);
+        if (!ctrl) {
+            return NVME_NS_CTRL_LIST_INVALID | NVME_DNR;
+        }
+
+        switch (sel) {
+        case NVME_NS_ATTACHMENT_ATTACH:
+            if (nvme_ns(ctrl, nsid)) {
+                return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
+            }
+
+            if (ns->attached && !ns->params.shared) {
+                return NVME_NS_PRIVATE | NVME_DNR;
+            }
+
+            nvme_attach_ns(ctrl, ns);
+            nvme_select_iocs_ns(ctrl, ns);
+
+            break;
+
+        case NVME_NS_ATTACHMENT_DETACH:
+            if (!nvme_ns(ctrl, nsid)) {
+                return NVME_NS_NOT_ATTACHED | NVME_DNR;
+            }
+
+            ctrl->namespaces[nsid] = NULL;
+            ns->attached--;
+
+            nvme_update_dmrsl(ctrl);
+
+            break;
+
+        default:
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        /*
+         * Add namespace id to the changed namespace id list for event clearing
+         * via Get Log Page command.
+         */
+        if (!test_and_set_bit(nsid, ctrl->changed_nsids)) {
+            nvme_enqueue_event(ctrl, NVME_AER_TYPE_NOTICE,
+                               NVME_AER_INFO_NOTICE_NS_ATTR_CHANGED,
+                               NVME_LOG_CHANGED_NSLIST);
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
+typedef struct NvmeFormatAIOCB {
+    BlockAIOCB common;
+    BlockAIOCB *aiocb;
+    QEMUBH *bh;
+    NvmeRequest *req;
+    int ret;
+
+    NvmeNamespace *ns;
+    uint32_t nsid;
+    bool broadcast;
+    int64_t offset;
+} NvmeFormatAIOCB;
+
+static void nvme_format_bh(void *opaque);
+
+static void nvme_format_cancel(BlockAIOCB *aiocb)
+{
+    NvmeFormatAIOCB *iocb = container_of(aiocb, NvmeFormatAIOCB, common);
+
+    if (iocb->aiocb) {
+        blk_aio_cancel_async(iocb->aiocb);
+    }
+}
+
+static const AIOCBInfo nvme_format_aiocb_info = {
+    .aiocb_size = sizeof(NvmeFormatAIOCB),
+    .cancel_async = nvme_format_cancel,
+    .get_aio_context = nvme_get_aio_context,
+};
+
+static void nvme_format_set(NvmeNamespace *ns, NvmeCmd *cmd)
+{
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint8_t lbaf = dw10 & 0xf;
+    uint8_t pi = (dw10 >> 5) & 0x7;
+    uint8_t mset = (dw10 >> 4) & 0x1;
+    uint8_t pil = (dw10 >> 8) & 0x1;
+
+    trace_pci_nvme_format_set(ns->params.nsid, lbaf, mset, pi, pil);
+
+    ns->id_ns.dps = (pil << 3) | pi;
+    ns->id_ns.flbas = lbaf | (mset << 4);
+
+    nvme_ns_init_format(ns);
+}
+
+static void nvme_format_ns_cb(void *opaque, int ret)
+{
+    NvmeFormatAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeNamespace *ns = iocb->ns;
+    int bytes;
+
+    if (ret < 0) {
+        iocb->ret = ret;
+        goto done;
+    }
+
+    assert(ns);
+
+    if (iocb->offset < ns->size) {
+        bytes = MIN(BDRV_REQUEST_MAX_BYTES, ns->size - iocb->offset);
+
+        iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, iocb->offset,
+                                            bytes, BDRV_REQ_MAY_UNMAP,
+                                            nvme_format_ns_cb, iocb);
+
+        iocb->offset += bytes;
+        return;
+    }
+
+    nvme_format_set(ns, &req->cmd);
+    ns->status = 0x0;
+    iocb->ns = NULL;
+    iocb->offset = 0;
+
+done:
+    iocb->aiocb = NULL;
+    qemu_bh_schedule(iocb->bh);
+}
+
+static uint16_t nvme_format_check(NvmeNamespace *ns, uint8_t lbaf, uint8_t pi)
+{
+    if (ns->params.zoned) {
+        return NVME_INVALID_FORMAT | NVME_DNR;
+    }
+
+    if (lbaf > ns->id_ns.nlbaf) {
+        return NVME_INVALID_FORMAT | NVME_DNR;
+    }
+
+    if (pi && (ns->id_ns.lbaf[lbaf].ms < sizeof(NvmeDifTuple))) {
+        return NVME_INVALID_FORMAT | NVME_DNR;
+    }
+
+    if (pi && pi > NVME_ID_NS_DPS_TYPE_3) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+static void nvme_format_bh(void *opaque)
+{
+    NvmeFormatAIOCB *iocb = opaque;
+    NvmeRequest *req = iocb->req;
+    NvmeCtrl *n = nvme_ctrl(req);
+    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+    uint8_t lbaf = dw10 & 0xf;
+    uint8_t pi = (dw10 >> 5) & 0x7;
+    uint16_t status;
+    int i;
+
+    if (iocb->ret < 0) {
+        goto done;
+    }
+
+    if (iocb->broadcast) {
+        for (i = iocb->nsid + 1; i <= NVME_MAX_NAMESPACES; i++) {
+            iocb->ns = nvme_ns(n, i);
+            if (iocb->ns) {
+                iocb->nsid = i;
+                break;
+            }
+        }
+    }
+
+    if (!iocb->ns) {
+        goto done;
+    }
+
+    status = nvme_format_check(iocb->ns, lbaf, pi);
+    if (status) {
+        req->status = status;
+        goto done;
+    }
+
+    iocb->ns->status = NVME_FORMAT_IN_PROGRESS;
+    nvme_format_ns_cb(iocb, 0);
+    return;
+
+done:
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+
+    iocb->common.cb(iocb->common.opaque, iocb->ret);
+
+    qemu_aio_unref(iocb);
+}
+
+static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeFormatAIOCB *iocb;
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
+    uint16_t status;
+
+    iocb = qemu_aio_get(&nvme_format_aiocb_info, NULL, nvme_misc_cb, req);
+
+    iocb->req = req;
+    iocb->bh = qemu_bh_new(nvme_format_bh, iocb);
+    iocb->ret = 0;
+    iocb->ns = NULL;
+    iocb->nsid = 0;
+    iocb->broadcast = (nsid == NVME_NSID_BROADCAST);
+    iocb->offset = 0;
+
+    if (!iocb->broadcast) {
+        if (!nvme_nsid_valid(n, nsid)) {
+            status = NVME_INVALID_NSID | NVME_DNR;
+            goto out;
+        }
+
+        iocb->ns = nvme_ns(n, nsid);
+        if (!iocb->ns) {
+            status = NVME_INVALID_FIELD | NVME_DNR;
+            goto out;
+        }
+    }
+
+    req->aiocb = &iocb->common;
+    qemu_bh_schedule(iocb->bh);
+
+    return NVME_NO_COMPLETE;
+
+out:
+    qemu_bh_delete(iocb->bh);
+    iocb->bh = NULL;
+    qemu_aio_unref(iocb);
+    return status;
+}
+
+static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
+{
+    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
+                             nvme_adm_opc_str(req->cmd.opcode));
+
+    if (!(nvme_cse_acs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) {
+        trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
+        return NVME_INVALID_OPCODE | NVME_DNR;
+    }
+
+    /* SGLs shall not be used for Admin commands in NVMe over PCIe */
+    if (NVME_CMD_FLAGS_PSDT(req->cmd.flags) != NVME_PSDT_PRP) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (NVME_CMD_FLAGS_FUSE(req->cmd.flags)) {
+        return NVME_INVALID_FIELD;
+    }
+
+    switch (req->cmd.opcode) {
+    case NVME_ADM_CMD_DELETE_SQ:
+        return nvme_del_sq(n, req);
+    case NVME_ADM_CMD_CREATE_SQ:
+        return nvme_create_sq(n, req);
+    case NVME_ADM_CMD_GET_LOG_PAGE:
+        return nvme_get_log(n, req);
+    case NVME_ADM_CMD_DELETE_CQ:
+        return nvme_del_cq(n, req);
+    case NVME_ADM_CMD_CREATE_CQ:
+        return nvme_create_cq(n, req);
+    case NVME_ADM_CMD_IDENTIFY:
+        return nvme_identify(n, req);
+    case NVME_ADM_CMD_ABORT:
+        return nvme_abort(n, req);
+    case NVME_ADM_CMD_SET_FEATURES:
+        return nvme_set_feature(n, req);
+    case NVME_ADM_CMD_GET_FEATURES:
+        return nvme_get_feature(n, req);
+    case NVME_ADM_CMD_ASYNC_EV_REQ:
+        return nvme_aer(n, req);
+    case NVME_ADM_CMD_NS_ATTACHMENT:
+        return nvme_ns_attachment(n, req);
+    case NVME_ADM_CMD_FORMAT_NVM:
+        return nvme_format(n, req);
+    default:
+        assert(false);
+    }
+
+    return NVME_INVALID_OPCODE | NVME_DNR;
+}
+
+static void nvme_process_sq(void *opaque)
+{
+    NvmeSQueue *sq = opaque;
+    NvmeCtrl *n = sq->ctrl;
+    NvmeCQueue *cq = n->cq[sq->cqid];
+
+    uint16_t status;
+    hwaddr addr;
+    NvmeCmd cmd;
+    NvmeRequest *req;
+
+    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
+        addr = sq->dma_addr + sq->head * n->sqe_size;
+        if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
+            trace_pci_nvme_err_addr_read(addr);
+            trace_pci_nvme_err_cfs();
+            stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
+            break;
+        }
+        nvme_inc_sq_head(sq);
+
+        req = QTAILQ_FIRST(&sq->req_list);
+        QTAILQ_REMOVE(&sq->req_list, req, entry);
+        QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
+        nvme_req_clear(req);
+        req->cqe.cid = cmd.cid;
+        memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
+
+        status = sq->sqid ? nvme_io_cmd(n, req) :
+            nvme_admin_cmd(n, req);
+        if (status != NVME_NO_COMPLETE) {
+            req->status = status;
+            nvme_enqueue_req_completion(cq, req);
+        }
+    }
+}
+
+static void nvme_ctrl_reset(NvmeCtrl *n)
+{
+    NvmeNamespace *ns;
+    int i;
+
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
+
+        nvme_ns_drain(ns);
+    }
+
+    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
+        if (n->sq[i] != NULL) {
+            nvme_free_sq(n->sq[i], n);
+        }
+    }
+    for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
+        if (n->cq[i] != NULL) {
+            nvme_free_cq(n->cq[i], n);
+        }
+    }
+
+    while (!QTAILQ_EMPTY(&n->aer_queue)) {
+        NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
+        QTAILQ_REMOVE(&n->aer_queue, event, entry);
+        g_free(event);
+    }
+
+    n->aer_queued = 0;
+    n->outstanding_aers = 0;
+    n->qs_created = false;
+}
+
+static void nvme_ctrl_shutdown(NvmeCtrl *n)
+{
+    NvmeNamespace *ns;
+    int i;
+
+    if (n->pmr.dev) {
+        memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
+    }
+
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
+
+        nvme_ns_shutdown(ns);
+    }
+}
+
+static void nvme_select_iocs(NvmeCtrl *n)
+{
+    NvmeNamespace *ns;
+    int i;
+
+    for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
+
+        nvme_select_iocs_ns(n, ns);
+    }
+}
+
+static int nvme_start_ctrl(NvmeCtrl *n)
+{
+    uint64_t cap = ldq_le_p(&n->bar.cap);
+    uint32_t cc = ldl_le_p(&n->bar.cc);
+    uint32_t aqa = ldl_le_p(&n->bar.aqa);
+    uint64_t asq = ldq_le_p(&n->bar.asq);
+    uint64_t acq = ldq_le_p(&n->bar.acq);
+    uint32_t page_bits = NVME_CC_MPS(cc) + 12;
+    uint32_t page_size = 1 << page_bits;
+
+    if (unlikely(n->cq[0])) {
+        trace_pci_nvme_err_startfail_cq();
+        return -1;
+    }
+    if (unlikely(n->sq[0])) {
+        trace_pci_nvme_err_startfail_sq();
+        return -1;
+    }
+    if (unlikely(asq & (page_size - 1))) {
+        trace_pci_nvme_err_startfail_asq_misaligned(asq);
+        return -1;
+    }
+    if (unlikely(acq & (page_size - 1))) {
+        trace_pci_nvme_err_startfail_acq_misaligned(acq);
+        return -1;
+    }
+    if (unlikely(!(NVME_CAP_CSS(cap) & (1 << NVME_CC_CSS(cc))))) {
+        trace_pci_nvme_err_startfail_css(NVME_CC_CSS(cc));
+        return -1;
+    }
+    if (unlikely(NVME_CC_MPS(cc) < NVME_CAP_MPSMIN(cap))) {
+        trace_pci_nvme_err_startfail_page_too_small(
+                    NVME_CC_MPS(cc),
+                    NVME_CAP_MPSMIN(cap));
+        return -1;
+    }
+    if (unlikely(NVME_CC_MPS(cc) >
+                 NVME_CAP_MPSMAX(cap))) {
+        trace_pci_nvme_err_startfail_page_too_large(
+                    NVME_CC_MPS(cc),
+                    NVME_CAP_MPSMAX(cap));
+        return -1;
+    }
+    if (unlikely(NVME_CC_IOCQES(cc) <
+                 NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
+        trace_pci_nvme_err_startfail_cqent_too_small(
+                    NVME_CC_IOCQES(cc),
+                    NVME_CTRL_CQES_MIN(cap));
+        return -1;
+    }
+    if (unlikely(NVME_CC_IOCQES(cc) >
+                 NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
+        trace_pci_nvme_err_startfail_cqent_too_large(
+                    NVME_CC_IOCQES(cc),
+                    NVME_CTRL_CQES_MAX(cap));
+        return -1;
+    }
+    if (unlikely(NVME_CC_IOSQES(cc) <
+                 NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
+        trace_pci_nvme_err_startfail_sqent_too_small(
+                    NVME_CC_IOSQES(cc),
+                    NVME_CTRL_SQES_MIN(cap));
+        return -1;
+    }
+    if (unlikely(NVME_CC_IOSQES(cc) >
+                 NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
+        trace_pci_nvme_err_startfail_sqent_too_large(
+                    NVME_CC_IOSQES(cc),
+                    NVME_CTRL_SQES_MAX(cap));
+        return -1;
+    }
+    if (unlikely(!NVME_AQA_ASQS(aqa))) {
+        trace_pci_nvme_err_startfail_asqent_sz_zero();
+        return -1;
+    }
+    if (unlikely(!NVME_AQA_ACQS(aqa))) {
+        trace_pci_nvme_err_startfail_acqent_sz_zero();
+        return -1;
+    }
+
+    n->page_bits = page_bits;
+    n->page_size = page_size;
+    n->max_prp_ents = n->page_size / sizeof(uint64_t);
+    n->cqe_size = 1 << NVME_CC_IOCQES(cc);
+    n->sqe_size = 1 << NVME_CC_IOSQES(cc);
+    nvme_init_cq(&n->admin_cq, n, acq, 0, 0, NVME_AQA_ACQS(aqa) + 1, 1);
+    nvme_init_sq(&n->admin_sq, n, asq, 0, 0, NVME_AQA_ASQS(aqa) + 1);
+
+    nvme_set_timestamp(n, 0ULL);
+
+    QTAILQ_INIT(&n->aer_queue);
+
+    nvme_select_iocs(n);
+
+    return 0;
+}
+
+static void nvme_cmb_enable_regs(NvmeCtrl *n)
+{
+    uint32_t cmbloc = ldl_le_p(&n->bar.cmbloc);
+    uint32_t cmbsz = ldl_le_p(&n->bar.cmbsz);
+
+    NVME_CMBLOC_SET_CDPCILS(cmbloc, 1);
+    NVME_CMBLOC_SET_CDPMLS(cmbloc, 1);
+    NVME_CMBLOC_SET_BIR(cmbloc, NVME_CMB_BIR);
+    stl_le_p(&n->bar.cmbloc, cmbloc);
+
+    NVME_CMBSZ_SET_SQS(cmbsz, 1);
+    NVME_CMBSZ_SET_CQS(cmbsz, 0);
+    NVME_CMBSZ_SET_LISTS(cmbsz, 1);
+    NVME_CMBSZ_SET_RDS(cmbsz, 1);
+    NVME_CMBSZ_SET_WDS(cmbsz, 1);
+    NVME_CMBSZ_SET_SZU(cmbsz, 2); /* MBs */
+    NVME_CMBSZ_SET_SZ(cmbsz, n->params.cmb_size_mb);
+    stl_le_p(&n->bar.cmbsz, cmbsz);
+}
+
+static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
+                           unsigned size)
+{
+    uint64_t cap = ldq_le_p(&n->bar.cap);
+    uint32_t cc = ldl_le_p(&n->bar.cc);
+    uint32_t intms = ldl_le_p(&n->bar.intms);
+    uint32_t csts = ldl_le_p(&n->bar.csts);
+    uint32_t pmrsts = ldl_le_p(&n->bar.pmrsts);
+
+    if (unlikely(offset & (sizeof(uint32_t) - 1))) {
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32,
+                       "MMIO write not 32-bit aligned,"
+                       " offset=0x%"PRIx64"", offset);
+        /* should be ignored, fall through for now */
+    }
+
+    if (unlikely(size < sizeof(uint32_t))) {
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall,
+                       "MMIO write smaller than 32-bits,"
+                       " offset=0x%"PRIx64", size=%u",
+                       offset, size);
+        /* should be ignored, fall through for now */
+    }
+
+    switch (offset) {
+    case NVME_REG_INTMS:
+        if (unlikely(msix_enabled(&(n->parent_obj)))) {
+            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
+                           "undefined access to interrupt mask set"
+                           " when MSI-X is enabled");
+            /* should be ignored, fall through for now */
+        }
+        intms |= data;
+        stl_le_p(&n->bar.intms, intms);
+        n->bar.intmc = n->bar.intms;
+        trace_pci_nvme_mmio_intm_set(data & 0xffffffff, intms);
+        nvme_irq_check(n);
+        break;
+    case NVME_REG_INTMC:
+        if (unlikely(msix_enabled(&(n->parent_obj)))) {
+            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix,
+                           "undefined access to interrupt mask clr"
+                           " when MSI-X is enabled");
+            /* should be ignored, fall through for now */
+        }
+        intms &= ~data;
+        stl_le_p(&n->bar.intms, intms);
+        n->bar.intmc = n->bar.intms;
+        trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, intms);
+        nvme_irq_check(n);
+        break;
+    case NVME_REG_CC:
+        trace_pci_nvme_mmio_cfg(data & 0xffffffff);
+
+        /* Windows first sends data, then sends enable bit */
+        if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) &&
+            !NVME_CC_SHN(data) && !NVME_CC_SHN(cc))
+        {
+            cc = data;
+        }
+
+        if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) {
+            cc = data;
+
+            /* flush CC since nvme_start_ctrl() needs the value */
+            stl_le_p(&n->bar.cc, cc);
+            if (unlikely(nvme_start_ctrl(n))) {
+                trace_pci_nvme_err_startfail();
+                csts = NVME_CSTS_FAILED;
+            } else {
+                trace_pci_nvme_mmio_start_success();
+                csts = NVME_CSTS_READY;
+            }
+        } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) {
+            trace_pci_nvme_mmio_stopped();
+            nvme_ctrl_reset(n);
+            cc = 0;
+            csts &= ~NVME_CSTS_READY;
+        }
+
+        if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) {
+            trace_pci_nvme_mmio_shutdown_set();
+            nvme_ctrl_shutdown(n);
+            cc = data;
+            csts |= NVME_CSTS_SHST_COMPLETE;
+        } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) {
+            trace_pci_nvme_mmio_shutdown_cleared();
+            csts &= ~NVME_CSTS_SHST_COMPLETE;
+            cc = data;
+        }
+
+        stl_le_p(&n->bar.cc, cc);
+        stl_le_p(&n->bar.csts, csts);
+
+        break;
+    case NVME_REG_CSTS:
+        if (data & (1 << 4)) {
+            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported,
+                           "attempted to W1C CSTS.NSSRO"
+                           " but CAP.NSSRS is zero (not supported)");
+        } else if (data != 0) {
+            NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts,
+                           "attempted to set a read only bit"
+                           " of controller status");
+        }
+        break;
+    case NVME_REG_NSSR:
+        if (data == 0x4e564d65) {
+            trace_pci_nvme_ub_mmiowr_ssreset_unsupported();
+        } else {
+            /* The spec says that writes of other values have no effect */
+            return;
+        }
+        break;
+    case NVME_REG_AQA:
+        stl_le_p(&n->bar.aqa, data);
+        trace_pci_nvme_mmio_aqattr(data & 0xffffffff);
+        break;
+    case NVME_REG_ASQ:
+        stn_le_p(&n->bar.asq, size, data);
+        trace_pci_nvme_mmio_asqaddr(data);
+        break;
+    case NVME_REG_ASQ + 4:
+        stl_le_p((uint8_t *)&n->bar.asq + 4, data);
+        trace_pci_nvme_mmio_asqaddr_hi(data, ldq_le_p(&n->bar.asq));
+        break;
+    case NVME_REG_ACQ:
+        trace_pci_nvme_mmio_acqaddr(data);
+        stn_le_p(&n->bar.acq, size, data);
+        break;
+    case NVME_REG_ACQ + 4:
+        stl_le_p((uint8_t *)&n->bar.acq + 4, data);
+        trace_pci_nvme_mmio_acqaddr_hi(data, ldq_le_p(&n->bar.acq));
+        break;
+    case NVME_REG_CMBLOC:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved,
+                       "invalid write to reserved CMBLOC"
+                       " when CMBSZ is zero, ignored");
+        return;
+    case NVME_REG_CMBSZ:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
+                       "invalid write to read only CMBSZ, ignored");
+        return;
+    case NVME_REG_CMBMSC:
+        if (!NVME_CAP_CMBS(cap)) {
+            return;
+        }
+
+        stn_le_p(&n->bar.cmbmsc, size, data);
+        n->cmb.cmse = false;
+
+        if (NVME_CMBMSC_CRE(data)) {
+            nvme_cmb_enable_regs(n);
+
+            if (NVME_CMBMSC_CMSE(data)) {
+                uint64_t cmbmsc = ldq_le_p(&n->bar.cmbmsc);
+                hwaddr cba = NVME_CMBMSC_CBA(cmbmsc) << CMBMSC_CBA_SHIFT;
+                if (cba + int128_get64(n->cmb.mem.size) < cba) {
+                    uint32_t cmbsts = ldl_le_p(&n->bar.cmbsts);
+                    NVME_CMBSTS_SET_CBAI(cmbsts, 1);
+                    stl_le_p(&n->bar.cmbsts, cmbsts);
+                    return;
+                }
+
+                n->cmb.cba = cba;
+                n->cmb.cmse = true;
+            }
+        } else {
+            n->bar.cmbsz = 0;
+            n->bar.cmbloc = 0;
+        }
+
+        return;
+    case NVME_REG_CMBMSC + 4:
+        stl_le_p((uint8_t *)&n->bar.cmbmsc + 4, data);
+        return;
+
+    case NVME_REG_PMRCAP:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly,
+                       "invalid write to PMRCAP register, ignored");
+        return;
+    case NVME_REG_PMRCTL:
+        if (!NVME_CAP_PMRS(cap)) {
+            return;
+        }
+
+        stl_le_p(&n->bar.pmrctl, data);
+        if (NVME_PMRCTL_EN(data)) {
+            memory_region_set_enabled(&n->pmr.dev->mr, true);
+            pmrsts = 0;
+        } else {
+            memory_region_set_enabled(&n->pmr.dev->mr, false);
+            NVME_PMRSTS_SET_NRDY(pmrsts, 1);
+            n->pmr.cmse = false;
+        }
+        stl_le_p(&n->bar.pmrsts, pmrsts);
+        return;
+    case NVME_REG_PMRSTS:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly,
+                       "invalid write to PMRSTS register, ignored");
+        return;
+    case NVME_REG_PMREBS:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly,
+                       "invalid write to PMREBS register, ignored");
+        return;
+    case NVME_REG_PMRSWTP:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly,
+                       "invalid write to PMRSWTP register, ignored");
+        return;
+    case NVME_REG_PMRMSCL:
+        if (!NVME_CAP_PMRS(cap)) {
+            return;
+        }
+
+        stl_le_p(&n->bar.pmrmscl, data);
+        n->pmr.cmse = false;
+
+        if (NVME_PMRMSCL_CMSE(data)) {
+            uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu);
+            hwaddr cba = pmrmscu << 32 |
+                (NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT);
+            if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
+                NVME_PMRSTS_SET_CBAI(pmrsts, 1);
+                stl_le_p(&n->bar.pmrsts, pmrsts);
+                return;
+            }
+
+            n->pmr.cmse = true;
+            n->pmr.cba = cba;
+        }
+
+        return;
+    case NVME_REG_PMRMSCU:
+        if (!NVME_CAP_PMRS(cap)) {
+            return;
+        }
+
+        stl_le_p(&n->bar.pmrmscu, data);
+        return;
+    default:
+        NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid,
+                       "invalid MMIO write,"
+                       " offset=0x%"PRIx64", data=%"PRIx64"",
+                       offset, data);
+        break;
+    }
+}
+
+static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+    uint8_t *ptr = (uint8_t *)&n->bar;
+
+    trace_pci_nvme_mmio_read(addr, size);
+
+    if (unlikely(addr & (sizeof(uint32_t) - 1))) {
+        NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32,
+                       "MMIO read not 32-bit aligned,"
+                       " offset=0x%"PRIx64"", addr);
+        /* should RAZ, fall through for now */
+    } else if (unlikely(size < sizeof(uint32_t))) {
+        NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall,
+                       "MMIO read smaller than 32-bits,"
+                       " offset=0x%"PRIx64"", addr);
+        /* should RAZ, fall through for now */
+    }
+
+    if (addr > sizeof(n->bar) - size) {
+        NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs,
+                       "MMIO read beyond last register,"
+                       " offset=0x%"PRIx64", returning 0", addr);
+
+        return 0;
+    }
+
+    /*
+     * When PMRWBM bit 1 is set then read from
+     * from PMRSTS should ensure prior writes
+     * made it to persistent media
+     */
+    if (addr == NVME_REG_PMRSTS &&
+        (NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) {
+        memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
+    }
+
+    return ldn_le_p(ptr + addr, size);
+}
+
+static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
+{
+    uint32_t qid;
+
+    if (unlikely(addr & ((1 << 2) - 1))) {
+        NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned,
+                       "doorbell write not 32-bit aligned,"
+                       " offset=0x%"PRIx64", ignoring", addr);
+        return;
+    }
+
+    if (((addr - 0x1000) >> 2) & 1) {
+        /* Completion queue doorbell write */
+
+        uint16_t new_head = val & 0xffff;
+        int start_sqs;
+        NvmeCQueue *cq;
+
+        qid = (addr - (0x1000 + (1 << 2))) >> 3;
+        if (unlikely(nvme_check_cqid(n, qid))) {
+            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq,
+                           "completion queue doorbell write"
+                           " for nonexistent queue,"
+                           " sqid=%"PRIu32", ignoring", qid);
+
+            /*
+             * NVM Express v1.3d, Section 4.1 state: "If host software writes
+             * an invalid value to the Submission Queue Tail Doorbell or
+             * Completion Queue Head Doorbell regiter and an Asynchronous Event
+             * Request command is outstanding, then an asynchronous event is
+             * posted to the Admin Completion Queue with a status code of
+             * Invalid Doorbell Write Value."
+             *
+             * Also note that the spec includes the "Invalid Doorbell Register"
+             * status code, but nowhere does it specify when to use it.
+             * However, it seems reasonable to use it here in a similar
+             * fashion.
+             */
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                                   NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+                                   NVME_LOG_ERROR_INFO);
+            }
+
+            return;
+        }
+
+        cq = n->cq[qid];
+        if (unlikely(new_head >= cq->size)) {
+            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead,
+                           "completion queue doorbell write value"
+                           " beyond queue size, sqid=%"PRIu32","
+                           " new_head=%"PRIu16", ignoring",
+                           qid, new_head);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                                   NVME_AER_INFO_ERR_INVALID_DB_VALUE,
+                                   NVME_LOG_ERROR_INFO);
+            }
+
+            return;
+        }
+
+        trace_pci_nvme_mmio_doorbell_cq(cq->cqid, new_head);
+
+        start_sqs = nvme_cq_full(cq) ? 1 : 0;
+        cq->head = new_head;
+        if (start_sqs) {
+            NvmeSQueue *sq;
+            QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
+                timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+            }
+            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+        }
+
+        if (cq->tail == cq->head) {
+            if (cq->irq_enabled) {
+                n->cq_pending--;
+            }
+
+            nvme_irq_deassert(n, cq);
+        }
+    } else {
+        /* Submission queue doorbell write */
+
+        uint16_t new_tail = val & 0xffff;
+        NvmeSQueue *sq;
+
+        qid = (addr - 0x1000) >> 3;
+        if (unlikely(nvme_check_sqid(n, qid))) {
+            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq,
+                           "submission queue doorbell write"
+                           " for nonexistent queue,"
+                           " sqid=%"PRIu32", ignoring", qid);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                                   NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+                                   NVME_LOG_ERROR_INFO);
+            }
+
+            return;
+        }
+
+        sq = n->sq[qid];
+        if (unlikely(new_tail >= sq->size)) {
+            NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail,
+                           "submission queue doorbell write value"
+                           " beyond queue size, sqid=%"PRIu32","
+                           " new_tail=%"PRIu16", ignoring",
+                           qid, new_tail);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                                   NVME_AER_INFO_ERR_INVALID_DB_VALUE,
+                                   NVME_LOG_ERROR_INFO);
+            }
+
+            return;
+        }
+
+        trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
+
+        sq->tail = new_tail;
+        timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+    }
+}
+
+static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
+                            unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+
+    trace_pci_nvme_mmio_write(addr, data, size);
+
+    if (addr < sizeof(n->bar)) {
+        nvme_write_bar(n, addr, data, size);
+    } else {
+        nvme_process_db(n, addr, data);
+    }
+}
+
+static const MemoryRegionOps nvme_mmio_ops = {
+    .read = nvme_mmio_read,
+    .write = nvme_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 2,
+        .max_access_size = 8,
+    },
+};
+
+static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
+                           unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+    stn_le_p(&n->cmb.buf[addr], size, data);
+}
+
+static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+    return ldn_le_p(&n->cmb.buf[addr], size);
+}
+
+static const MemoryRegionOps nvme_cmb_ops = {
+    .read = nvme_cmb_read,
+    .write = nvme_cmb_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
+{
+    NvmeParams *params = &n->params;
+
+    if (params->num_queues) {
+        warn_report("num_queues is deprecated; please use max_ioqpairs "
+                    "instead");
+
+        params->max_ioqpairs = params->num_queues - 1;
+    }
+
+    if (n->namespace.blkconf.blk && n->subsys) {
+        error_setg(errp, "subsystem support is unavailable with legacy "
+                   "namespace ('drive' property)");
+        return;
+    }
+
+    if (params->max_ioqpairs < 1 ||
+        params->max_ioqpairs > NVME_MAX_IOQPAIRS) {
+        error_setg(errp, "max_ioqpairs must be between 1 and %d",
+                   NVME_MAX_IOQPAIRS);
+        return;
+    }
+
+    if (params->msix_qsize < 1 ||
+        params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) {
+        error_setg(errp, "msix_qsize must be between 1 and %d",
+                   PCI_MSIX_FLAGS_QSIZE + 1);
+        return;
+    }
+
+    if (!params->serial) {
+        error_setg(errp, "serial property not set");
+        return;
+    }
+
+    if (n->pmr.dev) {
+        if (host_memory_backend_is_mapped(n->pmr.dev)) {
+            error_setg(errp, "can't use already busy memdev: %s",
+                       object_get_canonical_path_component(OBJECT(n->pmr.dev)));
+            return;
+        }
+
+        if (!is_power_of_2(n->pmr.dev->size)) {
+            error_setg(errp, "pmr backend size needs to be power of 2 in size");
+            return;
+        }
+
+        host_memory_backend_set_mapped(n->pmr.dev, true);
+    }
+
+    if (n->params.zasl > n->params.mdts) {
+        error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
+                   "than or equal to mdts (Maximum Data Transfer Size)");
+        return;
+    }
+
+    if (!n->params.vsl) {
+        error_setg(errp, "vsl must be non-zero");
+        return;
+    }
+}
+
+static void nvme_init_state(NvmeCtrl *n)
+{
+    /* add one to max_ioqpairs to account for the admin queue pair */
+    n->reg_size = pow2ceil(sizeof(NvmeBar) +
+                           2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE);
+    n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1);
+    n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1);
+    n->temperature = NVME_TEMPERATURE;
+    n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
+    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
+}
+
+static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    uint64_t cmb_size = n->params.cmb_size_mb * MiB;
+    uint64_t cap = ldq_le_p(&n->bar.cap);
+
+    n->cmb.buf = g_malloc0(cmb_size);
+    memory_region_init_io(&n->cmb.mem, OBJECT(n), &nvme_cmb_ops, n,
+                          "nvme-cmb", cmb_size);
+    pci_register_bar(pci_dev, NVME_CMB_BIR,
+                     PCI_BASE_ADDRESS_SPACE_MEMORY |
+                     PCI_BASE_ADDRESS_MEM_TYPE_64 |
+                     PCI_BASE_ADDRESS_MEM_PREFETCH, &n->cmb.mem);
+
+    NVME_CAP_SET_CMBS(cap, 1);
+    stq_le_p(&n->bar.cap, cap);
+
+    if (n->params.legacy_cmb) {
+        nvme_cmb_enable_regs(n);
+        n->cmb.cmse = true;
+    }
+}
+
+static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    uint32_t pmrcap = ldl_le_p(&n->bar.pmrcap);
+
+    NVME_PMRCAP_SET_RDS(pmrcap, 1);
+    NVME_PMRCAP_SET_WDS(pmrcap, 1);
+    NVME_PMRCAP_SET_BIR(pmrcap, NVME_PMR_BIR);
+    /* Turn on bit 1 support */
+    NVME_PMRCAP_SET_PMRWBM(pmrcap, 0x02);
+    NVME_PMRCAP_SET_CMSS(pmrcap, 1);
+    stl_le_p(&n->bar.pmrcap, pmrcap);
+
+    pci_register_bar(pci_dev, NVME_PMR_BIR,
+                     PCI_BASE_ADDRESS_SPACE_MEMORY |
+                     PCI_BASE_ADDRESS_MEM_TYPE_64 |
+                     PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
+
+    memory_region_set_enabled(&n->pmr.dev->mr, false);
+}
+
+static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
+{
+    uint8_t *pci_conf = pci_dev->config;
+    uint64_t bar_size, msix_table_size, msix_pba_size;
+    unsigned msix_table_offset, msix_pba_offset;
+    int ret;
+
+    Error *err = NULL;
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1;
+    pci_config_set_prog_interface(pci_conf, 0x2);
+
+    if (n->params.use_intel_id) {
+        pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+        pci_config_set_device_id(pci_conf, 0x5845);
+    } else {
+        pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT);
+        pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME);
+    }
+
+    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+    pcie_endpoint_cap_init(pci_dev, 0x80);
+
+    bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB);
+    msix_table_offset = bar_size;
+    msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize;
+
+    bar_size += msix_table_size;
+    bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB);
+    msix_pba_offset = bar_size;
+    msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8;
+
+    bar_size += msix_pba_size;
+    bar_size = pow2ceil(bar_size);
+
+    memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size);
+    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+                          n->reg_size);
+    memory_region_add_subregion(&n->bar0, 0, &n->iomem);
+
+    pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+                     PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
+    ret = msix_init(pci_dev, n->params.msix_qsize,
+                    &n->bar0, 0, msix_table_offset,
+                    &n->bar0, 0, msix_pba_offset, 0, &err);
+    if (ret < 0) {
+        if (ret == -ENOTSUP) {
+            warn_report_err(err);
+        } else {
+            error_propagate(errp, err);
+            return ret;
+        }
+    }
+
+    if (n->params.cmb_size_mb) {
+        nvme_init_cmb(n, pci_dev);
+    }
+
+    if (n->pmr.dev) {
+        nvme_init_pmr(n, pci_dev);
+    }
+
+    return 0;
+}
+
+static void nvme_init_subnqn(NvmeCtrl *n)
+{
+    NvmeSubsystem *subsys = n->subsys;
+    NvmeIdCtrl *id = &n->id_ctrl;
+
+    if (!subsys) {
+        snprintf((char *)id->subnqn, sizeof(id->subnqn),
+                 "nqn.2019-08.org.qemu:%s", n->params.serial);
+    } else {
+        pstrcpy((char *)id->subnqn, sizeof(id->subnqn), (char*)subsys->subnqn);
+    }
+}
+
+static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    NvmeIdCtrl *id = &n->id_ctrl;
+    uint8_t *pci_conf = pci_dev->config;
+    uint64_t cap = ldq_le_p(&n->bar.cap);
+
+    id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
+    id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
+    strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
+    strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
+    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+
+    id->cntlid = cpu_to_le16(n->cntlid);
+
+    id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
+
+    id->rab = 6;
+
+    if (n->params.use_intel_id) {
+        id->ieee[0] = 0xb3;
+        id->ieee[1] = 0x02;
+        id->ieee[2] = 0x00;
+    } else {
+        id->ieee[0] = 0x00;
+        id->ieee[1] = 0x54;
+        id->ieee[2] = 0x52;
+    }
+
+    id->mdts = n->params.mdts;
+    id->ver = cpu_to_le32(NVME_SPEC_VER);
+    id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT);
+    id->cntrltype = 0x1;
+
+    /*
+     * Because the controller always completes the Abort command immediately,
+     * there can never be more than one concurrently executing Abort command,
+     * so this value is never used for anything. Note that there can easily be
+     * many Abort commands in the queues, but they are not considered
+     * "executing" until processed by nvme_abort.
+     *
+     * The specification recommends a value of 3 for Abort Command Limit (four
+     * concurrently outstanding Abort commands), so lets use that though it is
+     * inconsequential.
+     */
+    id->acl = 3;
+    id->aerl = n->params.aerl;
+    id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
+    id->lpa = NVME_LPA_NS_SMART | NVME_LPA_CSE | NVME_LPA_EXTENDED;
+
+    /* recommended default value (~70 C) */
+    id->wctemp = cpu_to_le16(NVME_TEMPERATURE_WARNING);
+    id->cctemp = cpu_to_le16(NVME_TEMPERATURE_CRITICAL);
+
+    id->sqes = (0x6 << 4) | 0x6;
+    id->cqes = (0x4 << 4) | 0x4;
+    id->nn = cpu_to_le32(NVME_MAX_NAMESPACES);
+    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
+                           NVME_ONCS_FEATURES | NVME_ONCS_DSM |
+                           NVME_ONCS_COMPARE | NVME_ONCS_COPY);
+
+    /*
+     * NOTE: If this device ever supports a command set that does NOT use 0x0
+     * as a Flush-equivalent operation, support for the broadcast NSID in Flush
+     * should probably be removed.
+     *
+     * See comment in nvme_io_cmd.
+     */
+    id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT;
+
+    id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
+    id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
+                           NVME_CTRL_SGLS_BITBUCKET);
+
+    nvme_init_subnqn(n);
+
+    id->psd[0].mp = cpu_to_le16(0x9c4);
+    id->psd[0].enlat = cpu_to_le32(0x10);
+    id->psd[0].exlat = cpu_to_le32(0x4);
+
+    if (n->subsys) {
+        id->cmic |= NVME_CMIC_MULTI_CTRL;
+    }
+
+    NVME_CAP_SET_MQES(cap, 0x7ff);
+    NVME_CAP_SET_CQR(cap, 1);
+    NVME_CAP_SET_TO(cap, 0xf);
+    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_NVM);
+    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_CSI_SUPP);
+    NVME_CAP_SET_CSS(cap, NVME_CAP_CSS_ADMIN_ONLY);
+    NVME_CAP_SET_MPSMAX(cap, 4);
+    NVME_CAP_SET_CMBS(cap, n->params.cmb_size_mb ? 1 : 0);
+    NVME_CAP_SET_PMRS(cap, n->pmr.dev ? 1 : 0);
+    stq_le_p(&n->bar.cap, cap);
+
+    stl_le_p(&n->bar.vs, NVME_SPEC_VER);
+    n->bar.intmc = n->bar.intms = 0;
+}
+
+static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
+{
+    int cntlid;
+
+    if (!n->subsys) {
+        return 0;
+    }
+
+    cntlid = nvme_subsys_register_ctrl(n, errp);
+    if (cntlid < 0) {
+        return -1;
+    }
+
+    n->cntlid = cntlid;
+
+    return 0;
+}
+
+void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    uint32_t nsid = ns->params.nsid;
+    assert(nsid && nsid <= NVME_MAX_NAMESPACES);
+
+    n->namespaces[nsid] = ns;
+    ns->attached++;
+
+    n->dmrsl = MIN_NON_ZERO(n->dmrsl,
+                            BDRV_REQUEST_MAX_BYTES / nvme_l2b(ns, 1));
+}
+
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+    NvmeCtrl *n = NVME(pci_dev);
+    NvmeNamespace *ns;
+    Error *local_err = NULL;
+
+    nvme_check_constraints(n, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
+              &pci_dev->qdev, n->parent_obj.qdev.id);
+
+    nvme_init_state(n);
+    if (nvme_init_pci(n, pci_dev, errp)) {
+        return;
+    }
+
+    if (nvme_init_subsys(n, errp)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    nvme_init_ctrl(n, pci_dev);
+
+    /* setup a namespace if the controller drive property was given */
+    if (n->namespace.blkconf.blk) {
+        ns = &n->namespace;
+        ns->params.nsid = 1;
+
+        if (nvme_ns_setup(ns, errp)) {
+            return;
+        }
+
+        nvme_attach_ns(n, ns);
+    }
+}
+
+static void nvme_exit(PCIDevice *pci_dev)
+{
+    NvmeCtrl *n = NVME(pci_dev);
+    NvmeNamespace *ns;
+    int i;
+
+    nvme_ctrl_reset(n);
+
+    if (n->subsys) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+            ns = nvme_ns(n, i);
+            if (ns) {
+                ns->attached--;
+            }
+        }
+
+        nvme_subsys_unregister_ctrl(n->subsys, n);
+    }
+
+    g_free(n->cq);
+    g_free(n->sq);
+    g_free(n->aer_reqs);
+
+    if (n->params.cmb_size_mb) {
+        g_free(n->cmb.buf);
+    }
+
+    if (n->pmr.dev) {
+        host_memory_backend_set_mapped(n->pmr.dev, false);
+    }
+    msix_uninit(pci_dev, &n->bar0, &n->bar0);
+    memory_region_del_subregion(&n->bar0, &n->iomem);
+}
+
+static Property nvme_props[] = {
+    DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.blkconf),
+    DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmr.dev, TYPE_MEMORY_BACKEND,
+                     HostMemoryBackend *),
+    DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
+                     NvmeSubsystem *),
+    DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
+    DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
+    DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
+    DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
+    DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
+    DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
+    DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
+    DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
+    DEFINE_PROP_UINT8("vsl", NvmeCtrl, params.vsl, 7),
+    DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
+    DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
+    DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
+    DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
+                     params.auto_transition_zones, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NvmeCtrl *n = NVME(obj);
+    uint8_t value = n->smart_critical_warning;
+
+    visit_type_uint8(v, name, &value, errp);
+}
+
+static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NvmeCtrl *n = NVME(obj);
+    uint8_t value, old_value, cap = 0, index, event;
+
+    if (!visit_type_uint8(v, name, &value, errp)) {
+        return;
+    }
+
+    cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY
+          | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA;
+    if (NVME_CAP_PMRS(ldq_le_p(&n->bar.cap))) {
+        cap |= NVME_SMART_PMR_UNRELIABLE;
+    }
+
+    if ((value & cap) != value) {
+        error_setg(errp, "unsupported smart critical warning bits: 0x%x",
+                   value & ~cap);
+        return;
+    }
+
+    old_value = n->smart_critical_warning;
+    n->smart_critical_warning = value;
+
+    /* only inject new bits of smart critical warning */
+    for (index = 0; index < NVME_SMART_WARN_MAX; index++) {
+        event = 1 << index;
+        if (value & ~old_value & event)
+            nvme_smart_event(n, event);
+    }
+}
+
+static const VMStateDescription nvme_vmstate = {
+    .name = "nvme",
+    .unmigratable = 1,
+};
+
+static void nvme_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+    pc->realize = nvme_realize;
+    pc->exit = nvme_exit;
+    pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
+    pc->revision = 2;
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    dc->desc = "Non-Volatile Memory Express";
+    device_class_set_props(dc, nvme_props);
+    dc->vmsd = &nvme_vmstate;
+}
+
+static void nvme_instance_init(Object *obj)
+{
+    NvmeCtrl *n = NVME(obj);
+
+    device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex,
+                                  "bootindex", "/namespace@1,0",
+                                  DEVICE(obj));
+
+    object_property_add(obj, "smart_critical_warning", "uint8",
+                        nvme_get_smart_warning,
+                        nvme_set_smart_warning, NULL, NULL);
+}
+
+static const TypeInfo nvme_info = {
+    .name          = TYPE_NVME,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(NvmeCtrl),
+    .instance_init = nvme_instance_init,
+    .class_init    = nvme_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_PCIE_DEVICE },
+        { }
+    },
+};
+
+static const TypeInfo nvme_bus_info = {
+    .name = TYPE_NVME_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(NvmeBus),
+};
+
+static void nvme_register_types(void)
+{
+    type_register_static(&nvme_info);
+    type_register_static(&nvme_bus_info);
+}
+
+type_init(nvme_register_types)
diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c
new file mode 100644
index 00000000000..5dbd18b2a4a
--- /dev/null
+++ b/hw/nvme/dif.c
@@ -0,0 +1,509 @@
+/*
+ * QEMU NVM Express End-to-End Data Protection support
+ *
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *   Klaus Jensen           <k.jensen@samsung.com>
+ *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/block-backend.h"
+
+#include "nvme.h"
+#include "trace.h"
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
+                           uint32_t reftag)
+{
+    if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_1) &&
+        (prinfo & NVME_PRINFO_PRCHK_REF) && (slba & 0xffffffff) != reftag) {
+        return NVME_INVALID_PROT_INFO | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static uint16_t crc_t10dif(uint16_t crc, const unsigned char *buffer,
+                           size_t len)
+{
+    unsigned int i;
+
+    for (i = 0; i < len; i++) {
+        crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+    }
+
+    return crc;
+}
+
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
+                                 uint32_t *reftag)
+{
+    uint8_t *end = buf + len;
+    int16_t pil = 0;
+
+    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+    }
+
+    trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil,
+                                          apptag, *reftag);
+
+    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
+        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
+
+        if (pil) {
+            crc = crc_t10dif(crc, mbuf, pil);
+        }
+
+        dif->guard = cpu_to_be16(crc);
+        dif->apptag = cpu_to_be16(apptag);
+        dif->reftag = cpu_to_be32(*reftag);
+
+        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+            (*reftag)++;
+        }
+    }
+}
+
+static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif,
+                               uint8_t *buf, uint8_t *mbuf, size_t pil,
+                               uint8_t prinfo, uint16_t apptag,
+                               uint16_t appmask, uint32_t reftag)
+{
+    switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+    case NVME_ID_NS_DPS_TYPE_3:
+        if (be32_to_cpu(dif->reftag) != 0xffffffff) {
+            break;
+        }
+
+        /* fallthrough */
+    case NVME_ID_NS_DPS_TYPE_1:
+    case NVME_ID_NS_DPS_TYPE_2:
+        if (be16_to_cpu(dif->apptag) != 0xffff) {
+            break;
+        }
+
+        trace_pci_nvme_dif_prchk_disabled(be16_to_cpu(dif->apptag),
+                                          be32_to_cpu(dif->reftag));
+
+        return NVME_SUCCESS;
+    }
+
+    if (prinfo & NVME_PRINFO_PRCHK_GUARD) {
+        uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz);
+
+        if (pil) {
+            crc = crc_t10dif(crc, mbuf, pil);
+        }
+
+        trace_pci_nvme_dif_prchk_guard(be16_to_cpu(dif->guard), crc);
+
+        if (be16_to_cpu(dif->guard) != crc) {
+            return NVME_E2E_GUARD_ERROR;
+        }
+    }
+
+    if (prinfo & NVME_PRINFO_PRCHK_APP) {
+        trace_pci_nvme_dif_prchk_apptag(be16_to_cpu(dif->apptag), apptag,
+                                        appmask);
+
+        if ((be16_to_cpu(dif->apptag) & appmask) != (apptag & appmask)) {
+            return NVME_E2E_APP_ERROR;
+        }
+    }
+
+    if (prinfo & NVME_PRINFO_PRCHK_REF) {
+        trace_pci_nvme_dif_prchk_reftag(be32_to_cpu(dif->reftag), reftag);
+
+        if (be32_to_cpu(dif->reftag) != reftag) {
+            return NVME_E2E_REF_ERROR;
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                        uint8_t *mbuf, size_t mlen, uint8_t prinfo,
+                        uint64_t slba, uint16_t apptag,
+                        uint16_t appmask, uint32_t *reftag)
+{
+    uint8_t *end = buf + len;
+    int16_t pil = 0;
+    uint16_t status;
+
+    status = nvme_check_prinfo(ns, prinfo, slba, *reftag);
+    if (status) {
+        return status;
+    }
+
+    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+    }
+
+    trace_pci_nvme_dif_check(prinfo, ns->lbasz + pil);
+
+    for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) {
+        NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+
+        status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, prinfo, apptag,
+                                appmask, *reftag);
+        if (status) {
+            return status;
+        }
+
+        if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) != NVME_ID_NS_DPS_TYPE_3) {
+            (*reftag)++;
+        }
+    }
+
+    return NVME_SUCCESS;
+}
+
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+                               uint64_t slba)
+{
+    BlockBackend *blk = ns->blkconf.blk;
+    BlockDriverState *bs = blk_bs(blk);
+
+    int64_t moffset = 0, offset = nvme_l2b(ns, slba);
+    uint8_t *mbufp, *end;
+    bool zeroed;
+    int16_t pil = 0;
+    int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds;
+    int64_t pnum = 0;
+
+    Error *err = NULL;
+
+
+    if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) {
+        pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+    }
+
+    do {
+        int ret;
+
+        bytes -= pnum;
+
+        ret = bdrv_block_status(bs, offset, bytes, &pnum, NULL, NULL);
+        if (ret < 0) {
+            error_setg_errno(&err, -ret, "unable to get block status");
+            error_report_err(err);
+
+            return NVME_INTERNAL_DEV_ERROR;
+        }
+
+        zeroed = !!(ret & BDRV_BLOCK_ZERO);
+
+        trace_pci_nvme_block_status(offset, bytes, pnum, ret, zeroed);
+
+        if (zeroed) {
+            mbufp = mbuf + moffset;
+            mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
+            end = mbufp + mlen;
+
+            for (; mbufp < end; mbufp += ns->lbaf.ms) {
+                memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple));
+            }
+        }
+
+        moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms;
+        offset += pnum;
+    } while (pnum != bytes);
+
+    return NVME_SUCCESS;
+}
+
+static void nvme_dif_rw_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+
+    trace_pci_nvme_dif_rw_cb(nvme_cid(req), blk_name(blk));
+
+    qemu_iovec_destroy(&ctx->data.iov);
+    g_free(ctx->data.bounce);
+
+    qemu_iovec_destroy(&ctx->mdata.iov);
+    g_free(ctx->mdata.bounce);
+
+    g_free(ctx);
+
+    nvme_rw_complete_cb(req, ret);
+}
+
+static void nvme_dif_rw_check_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeCtrl *n = nvme_ctrl(req);
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint16_t apptag = le16_to_cpu(rw->apptag);
+    uint16_t appmask = le16_to_cpu(rw->appmask);
+    uint32_t reftag = le32_to_cpu(rw->reftag);
+    uint16_t status;
+
+    trace_pci_nvme_dif_rw_check_cb(nvme_cid(req), prinfo, apptag, appmask,
+                                   reftag);
+
+    if (ret) {
+        goto out;
+    }
+
+    status = nvme_dif_mangle_mdata(ns, ctx->mdata.bounce, ctx->mdata.iov.size,
+                                   slba);
+    if (status) {
+        req->status = status;
+        goto out;
+    }
+
+    status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+                            ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+                            slba, apptag, appmask, &reftag);
+    if (status) {
+        req->status = status;
+        goto out;
+    }
+
+    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
+                              NVME_TX_DIRECTION_FROM_DEVICE, req);
+    if (status) {
+        req->status = status;
+        goto out;
+    }
+
+    if (prinfo & NVME_PRINFO_PRACT && ns->lbaf.ms == 8) {
+        goto out;
+    }
+
+    status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
+                               NVME_TX_DIRECTION_FROM_DEVICE, req);
+    if (status) {
+        req->status = status;
+    }
+
+out:
+    nvme_dif_rw_cb(ctx, ret);
+}
+
+static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+    size_t mlen = nvme_m2b(ns, nlb);
+    uint64_t offset = nvme_moff(ns, slba);
+    BlockBackend *blk = ns->blkconf.blk;
+
+    trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk));
+
+    if (ret) {
+        goto out;
+    }
+
+    ctx->mdata.bounce = g_malloc(mlen);
+
+    qemu_iovec_reset(&ctx->mdata.iov);
+    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+    req->aiocb = blk_aio_preadv(blk, offset, &ctx->mdata.iov, 0,
+                                nvme_dif_rw_check_cb, ctx);
+    return;
+
+out:
+    nvme_dif_rw_cb(ctx, ret);
+}
+
+static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret)
+{
+    NvmeBounceContext *ctx = opaque;
+    NvmeRequest *req = ctx->req;
+    NvmeNamespace *ns = req->ns;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    uint64_t offset = nvme_moff(ns, slba);
+    BlockBackend *blk = ns->blkconf.blk;
+
+    trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk));
+
+    if (ret) {
+        goto out;
+    }
+
+    req->aiocb = blk_aio_pwritev(blk, offset, &ctx->mdata.iov, 0,
+                                 nvme_dif_rw_cb, ctx);
+    return;
+
+out:
+    nvme_dif_rw_cb(ctx, ret);
+}
+
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req)
+{
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
+    NvmeNamespace *ns = req->ns;
+    BlockBackend *blk = ns->blkconf.blk;
+    bool wrz = rw->opcode == NVME_CMD_WRITE_ZEROES;
+    uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
+    uint64_t slba = le64_to_cpu(rw->slba);
+    size_t len = nvme_l2b(ns, nlb);
+    size_t mlen = nvme_m2b(ns, nlb);
+    size_t mapped_len = len;
+    int64_t offset = nvme_l2b(ns, slba);
+    uint8_t prinfo = NVME_RW_PRINFO(le16_to_cpu(rw->control));
+    uint16_t apptag = le16_to_cpu(rw->apptag);
+    uint16_t appmask = le16_to_cpu(rw->appmask);
+    uint32_t reftag = le32_to_cpu(rw->reftag);
+    bool pract = !!(prinfo & NVME_PRINFO_PRACT);
+    NvmeBounceContext *ctx;
+    uint16_t status;
+
+    trace_pci_nvme_dif_rw(pract, prinfo);
+
+    ctx = g_new0(NvmeBounceContext, 1);
+    ctx->req = req;
+
+    if (wrz) {
+        BdrvRequestFlags flags = BDRV_REQ_MAY_UNMAP;
+
+        if (prinfo & NVME_PRINFO_PRCHK_MASK) {
+            status = NVME_INVALID_PROT_INFO | NVME_DNR;
+            goto err;
+        }
+
+        if (pract) {
+            uint8_t *mbuf, *end;
+            int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple);
+
+            status = nvme_check_prinfo(ns, prinfo, slba, reftag);
+            if (status) {
+                goto err;
+            }
+
+            flags = 0;
+
+            ctx->mdata.bounce = g_malloc0(mlen);
+
+            qemu_iovec_init(&ctx->mdata.iov, 1);
+            qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+            mbuf = ctx->mdata.bounce;
+            end = mbuf + mlen;
+
+            if (ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT) {
+                pil = 0;
+            }
+
+            for (; mbuf < end; mbuf += ns->lbaf.ms) {
+                NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil);
+
+                dif->apptag = cpu_to_be16(apptag);
+                dif->reftag = cpu_to_be32(reftag);
+
+                switch (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) {
+                case NVME_ID_NS_DPS_TYPE_1:
+                case NVME_ID_NS_DPS_TYPE_2:
+                    reftag++;
+                }
+            }
+        }
+
+        req->aiocb = blk_aio_pwrite_zeroes(blk, offset, len, flags,
+                                           nvme_dif_rw_mdata_out_cb, ctx);
+        return NVME_NO_COMPLETE;
+    }
+
+    if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) {
+        mapped_len += mlen;
+    }
+
+    status = nvme_map_dptr(n, &req->sg, mapped_len, &req->cmd);
+    if (status) {
+        goto err;
+    }
+
+    ctx->data.bounce = g_malloc(len);
+
+    qemu_iovec_init(&ctx->data.iov, 1);
+    qemu_iovec_add(&ctx->data.iov, ctx->data.bounce, len);
+
+    if (req->cmd.opcode == NVME_CMD_READ) {
+        block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+                         BLOCK_ACCT_READ);
+
+        req->aiocb = blk_aio_preadv(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+                                    nvme_dif_rw_mdata_in_cb, ctx);
+        return NVME_NO_COMPLETE;
+    }
+
+    status = nvme_bounce_data(n, ctx->data.bounce, ctx->data.iov.size,
+                              NVME_TX_DIRECTION_TO_DEVICE, req);
+    if (status) {
+        goto err;
+    }
+
+    ctx->mdata.bounce = g_malloc(mlen);
+
+    qemu_iovec_init(&ctx->mdata.iov, 1);
+    qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen);
+
+    if (!(pract && ns->lbaf.ms == 8)) {
+        status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size,
+                                   NVME_TX_DIRECTION_TO_DEVICE, req);
+        if (status) {
+            goto err;
+        }
+    }
+
+    status = nvme_check_prinfo(ns, prinfo, slba, reftag);
+    if (status) {
+        goto err;
+    }
+
+    if (pract) {
+        /* splice generated protection information into the buffer */
+        nvme_dif_pract_generate_dif(ns, ctx->data.bounce, ctx->data.iov.size,
+                                    ctx->mdata.bounce, ctx->mdata.iov.size,
+                                    apptag, &reftag);
+    } else {
+        status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size,
+                                ctx->mdata.bounce, ctx->mdata.iov.size, prinfo,
+                                slba, apptag, appmask, &reftag);
+        if (status) {
+            goto err;
+        }
+    }
+
+    block_acct_start(blk_get_stats(blk), &req->acct, ctx->data.iov.size,
+                     BLOCK_ACCT_WRITE);
+
+    req->aiocb = blk_aio_pwritev(ns->blkconf.blk, offset, &ctx->data.iov, 0,
+                                 nvme_dif_rw_mdata_out_cb, ctx);
+
+    return NVME_NO_COMPLETE;
+
+err:
+    qemu_iovec_destroy(&ctx->data.iov);
+    g_free(ctx->data.bounce);
+
+    qemu_iovec_destroy(&ctx->mdata.iov);
+    g_free(ctx->mdata.bounce);
+
+    g_free(ctx);
+
+    return status;
+}
diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build
new file mode 100644
index 00000000000..3cf40046eea
--- /dev/null
+++ b/hw/nvme/meson.build
@@ -0,0 +1 @@
+softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c'))
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
new file mode 100644
index 00000000000..8b5f98c7618
--- /dev/null
+++ b/hw/nvme/ns.c
@@ -0,0 +1,595 @@
+/*
+ * QEMU NVM Express Virtual Namespace
+ *
+ * Copyright (c) 2019 CNEX Labs
+ * Copyright (c) 2020 Samsung Electronics
+ *
+ * Authors:
+ *  Klaus Jensen      <k.jensen@samsung.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+
+#include "nvme.h"
+#include "trace.h"
+
+#define MIN_DISCARD_GRANULARITY (4 * KiB)
+#define NVME_DEFAULT_ZONE_SIZE   (128 * MiB)
+
+void nvme_ns_init_format(NvmeNamespace *ns)
+{
+    NvmeIdNs *id_ns = &ns->id_ns;
+    BlockDriverInfo bdi;
+    int npdg, nlbas, ret;
+
+    ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
+    ns->lbasz = 1 << ns->lbaf.ds;
+
+    nlbas = ns->size / (ns->lbasz + ns->lbaf.ms);
+
+    id_ns->nsze = cpu_to_le64(nlbas);
+
+    /* no thin provisioning */
+    id_ns->ncap = id_ns->nsze;
+    id_ns->nuse = id_ns->ncap;
+
+    ns->moff = (int64_t)nlbas << ns->lbaf.ds;
+
+    npdg = ns->blkconf.discard_granularity / ns->lbasz;
+
+    ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi);
+    if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) {
+        npdg = bdi.cluster_size / ns->lbasz;
+    }
+
+    id_ns->npda = id_ns->npdg = npdg - 1;
+}
+
+static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
+{
+    static uint64_t ns_count;
+    NvmeIdNs *id_ns = &ns->id_ns;
+    uint8_t ds;
+    uint16_t ms;
+    int i;
+
+    ns->csi = NVME_CSI_NVM;
+    ns->status = 0x0;
+
+    ns->id_ns.dlfeat = 0x1;
+
+    /* support DULBE and I/O optimization fields */
+    id_ns->nsfeat |= (0x4 | 0x10);
+
+    if (ns->params.shared) {
+        id_ns->nmic |= NVME_NMIC_NS_SHARED;
+    }
+
+    /* Substitute a missing EUI-64 by an autogenerated one */
+    ++ns_count;
+    if (!ns->params.eui64 && ns->params.eui64_default) {
+        ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT;
+    }
+
+    /* simple copy */
+    id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
+    id_ns->mcl = cpu_to_le32(ns->params.mcl);
+    id_ns->msrc = ns->params.msrc;
+    id_ns->eui64 = cpu_to_be64(ns->params.eui64);
+
+    ds = 31 - clz32(ns->blkconf.logical_block_size);
+    ms = ns->params.ms;
+
+    id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE;
+
+    if (ms && ns->params.mset) {
+        id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED;
+    }
+
+    id_ns->dpc = 0x1f;
+    id_ns->dps = ns->params.pi;
+    if (ns->params.pi && ns->params.pil) {
+        id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT;
+    }
+
+    static const NvmeLBAF lbaf[16] = {
+        [0] = { .ds =  9           },
+        [1] = { .ds =  9, .ms =  8 },
+        [2] = { .ds =  9, .ms = 16 },
+        [3] = { .ds =  9, .ms = 64 },
+        [4] = { .ds = 12           },
+        [5] = { .ds = 12, .ms =  8 },
+        [6] = { .ds = 12, .ms = 16 },
+        [7] = { .ds = 12, .ms = 64 },
+    };
+
+    memcpy(&id_ns->lbaf, &lbaf, sizeof(lbaf));
+    id_ns->nlbaf = 7;
+
+    for (i = 0; i <= id_ns->nlbaf; i++) {
+        NvmeLBAF *lbaf = &id_ns->lbaf[i];
+        if (lbaf->ds == ds) {
+            if (lbaf->ms == ms) {
+                id_ns->flbas |= i;
+                goto lbaf_found;
+            }
+        }
+    }
+
+    /* add non-standard lba format */
+    id_ns->nlbaf++;
+    id_ns->lbaf[id_ns->nlbaf].ds = ds;
+    id_ns->lbaf[id_ns->nlbaf].ms = ms;
+    id_ns->flbas |= id_ns->nlbaf;
+
+lbaf_found:
+    nvme_ns_init_format(ns);
+
+    return 0;
+}
+
+static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
+{
+    bool read_only;
+
+    if (!blkconf_blocksizes(&ns->blkconf, errp)) {
+        return -1;
+    }
+
+    read_only = !blk_supports_write_perm(ns->blkconf.blk);
+    if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) {
+        return -1;
+    }
+
+    if (ns->blkconf.discard_granularity == -1) {
+        ns->blkconf.discard_granularity =
+            MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
+    }
+
+    ns->size = blk_getlength(ns->blkconf.blk);
+    if (ns->size < 0) {
+        error_setg_errno(errp, -ns->size, "could not get blockdev size");
+        return -1;
+    }
+
+    return 0;
+}
+
+static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
+{
+    uint64_t zone_size, zone_cap;
+
+    /* Make sure that the values of ZNS properties are sane */
+    if (ns->params.zone_size_bs) {
+        zone_size = ns->params.zone_size_bs;
+    } else {
+        zone_size = NVME_DEFAULT_ZONE_SIZE;
+    }
+    if (ns->params.zone_cap_bs) {
+        zone_cap = ns->params.zone_cap_bs;
+    } else {
+        zone_cap = zone_size;
+    }
+    if (zone_cap > zone_size) {
+        error_setg(errp, "zone capacity %"PRIu64"B exceeds "
+                   "zone size %"PRIu64"B", zone_cap, zone_size);
+        return -1;
+    }
+    if (zone_size < ns->lbasz) {
+        error_setg(errp, "zone size %"PRIu64"B too small, "
+                   "must be at least %zuB", zone_size, ns->lbasz);
+        return -1;
+    }
+    if (zone_cap < ns->lbasz) {
+        error_setg(errp, "zone capacity %"PRIu64"B too small, "
+                   "must be at least %zuB", zone_cap, ns->lbasz);
+        return -1;
+    }
+
+    /*
+     * Save the main zone geometry values to avoid
+     * calculating them later again.
+     */
+    ns->zone_size = zone_size / ns->lbasz;
+    ns->zone_capacity = zone_cap / ns->lbasz;
+    ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size;
+
+    /* Do a few more sanity checks of ZNS properties */
+    if (!ns->num_zones) {
+        error_setg(errp,
+                   "insufficient drive capacity, must be at least the size "
+                   "of one zone (%"PRIu64"B)", zone_size);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
+{
+    uint64_t start = 0, zone_size = ns->zone_size;
+    uint64_t capacity = ns->num_zones * zone_size;
+    NvmeZone *zone;
+    int i;
+
+    ns->zone_array = g_new0(NvmeZone, ns->num_zones);
+    if (ns->params.zd_extension_size) {
+        ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
+                                      ns->num_zones);
+    }
+
+    QTAILQ_INIT(&ns->exp_open_zones);
+    QTAILQ_INIT(&ns->imp_open_zones);
+    QTAILQ_INIT(&ns->closed_zones);
+    QTAILQ_INIT(&ns->full_zones);
+
+    zone = ns->zone_array;
+    for (i = 0; i < ns->num_zones; i++, zone++) {
+        if (start + zone_size > capacity) {
+            zone_size = capacity - start;
+        }
+        zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
+        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
+        zone->d.za = 0;
+        zone->d.zcap = ns->zone_capacity;
+        zone->d.zslba = start;
+        zone->d.wp = start;
+        zone->w_ptr = start;
+        start += zone_size;
+    }
+
+    ns->zone_size_log2 = 0;
+    if (is_power_of_2(ns->zone_size)) {
+        ns->zone_size_log2 = 63 - clz64(ns->zone_size);
+    }
+}
+
+static void nvme_ns_init_zoned(NvmeNamespace *ns)
+{
+    NvmeIdNsZoned *id_ns_z;
+    int i;
+
+    nvme_ns_zoned_init_state(ns);
+
+    id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
+
+    /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */
+    id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
+    id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
+    id_ns_z->zoc = 0;
+    id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
+
+    for (i = 0; i <= ns->id_ns.nlbaf; i++) {
+        id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
+        id_ns_z->lbafe[i].zdes =
+            ns->params.zd_extension_size >> 6; /* Units of 64B */
+    }
+
+    ns->csi = NVME_CSI_ZONED;
+    ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
+    ns->id_ns.ncap = ns->id_ns.nsze;
+    ns->id_ns.nuse = ns->id_ns.ncap;
+
+    /*
+     * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
+     * status of logical blocks. Since the spec defines that logical blocks
+     * SHALL be deallocated when then zone is in the Empty or Offline states,
+     * we can only support DULBE if the zone size is a multiple of the
+     * calculated NPDG.
+     */
+    if (ns->zone_size % (ns->id_ns.npdg + 1)) {
+        warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
+                    "the calculated deallocation granularity (%d blocks); "
+                    "DULBE support disabled",
+                    ns->zone_size, ns->id_ns.npdg + 1);
+
+        ns->id_ns.nsfeat &= ~0x4;
+    }
+
+    ns->id_ns_zoned = id_ns_z;
+}
+
+static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
+{
+    uint8_t state;
+
+    zone->w_ptr = zone->d.wp;
+    state = nvme_get_zone_state(zone);
+    if (zone->d.wp != zone->d.zslba ||
+        (zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
+        if (state != NVME_ZONE_STATE_CLOSED) {
+            trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
+            nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
+        }
+        nvme_aor_inc_active(ns);
+        QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
+    } else {
+        trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
+        nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
+    }
+}
+
+/*
+ * Close all the zones that are currently open.
+ */
+static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
+{
+    NvmeZone *zone, *next;
+
+    QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
+        QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
+        nvme_aor_dec_active(ns);
+        nvme_clear_zone(ns, zone);
+    }
+    QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
+        QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
+        nvme_aor_dec_open(ns);
+        nvme_aor_dec_active(ns);
+        nvme_clear_zone(ns, zone);
+    }
+    QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
+        QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
+        nvme_aor_dec_open(ns);
+        nvme_aor_dec_active(ns);
+        nvme_clear_zone(ns, zone);
+    }
+
+    assert(ns->nr_open_zones == 0);
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+    if (!ns->blkconf.blk) {
+        error_setg(errp, "block backend not configured");
+        return -1;
+    }
+
+    if (ns->params.pi && ns->params.ms < 8) {
+        error_setg(errp, "at least 8 bytes of metadata required to enable "
+                   "protection information");
+        return -1;
+    }
+
+    if (ns->params.nsid > NVME_MAX_NAMESPACES) {
+        error_setg(errp, "invalid namespace id (must be between 0 and %d)",
+                   NVME_MAX_NAMESPACES);
+        return -1;
+    }
+
+    if (ns->params.zoned) {
+        if (ns->params.max_active_zones) {
+            if (ns->params.max_open_zones > ns->params.max_active_zones) {
+                error_setg(errp, "max_open_zones (%u) exceeds "
+                           "max_active_zones (%u)", ns->params.max_open_zones,
+                           ns->params.max_active_zones);
+                return -1;
+            }
+
+            if (!ns->params.max_open_zones) {
+                ns->params.max_open_zones = ns->params.max_active_zones;
+            }
+        }
+
+        if (ns->params.zd_extension_size) {
+            if (ns->params.zd_extension_size & 0x3f) {
+                error_setg(errp, "zone descriptor extension size must be a "
+                           "multiple of 64B");
+                return -1;
+            }
+            if ((ns->params.zd_extension_size >> 6) > 0xff) {
+                error_setg(errp,
+                           "zone descriptor extension size is too large");
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
+{
+    if (nvme_ns_check_constraints(ns, errp)) {
+        return -1;
+    }
+
+    if (nvme_ns_init_blk(ns, errp)) {
+        return -1;
+    }
+
+    if (nvme_ns_init(ns, errp)) {
+        return -1;
+    }
+    if (ns->params.zoned) {
+        if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
+            return -1;
+        }
+        nvme_ns_init_zoned(ns);
+    }
+
+    return 0;
+}
+
+void nvme_ns_drain(NvmeNamespace *ns)
+{
+    blk_drain(ns->blkconf.blk);
+}
+
+void nvme_ns_shutdown(NvmeNamespace *ns)
+{
+    blk_flush(ns->blkconf.blk);
+    if (ns->params.zoned) {
+        nvme_zoned_ns_shutdown(ns);
+    }
+}
+
+void nvme_ns_cleanup(NvmeNamespace *ns)
+{
+    if (ns->params.zoned) {
+        g_free(ns->id_ns_zoned);
+        g_free(ns->zone_array);
+        g_free(ns->zd_extensions);
+    }
+}
+
+static void nvme_ns_unrealize(DeviceState *dev)
+{
+    NvmeNamespace *ns = NVME_NS(dev);
+
+    nvme_ns_drain(ns);
+    nvme_ns_shutdown(ns);
+    nvme_ns_cleanup(ns);
+}
+
+static void nvme_ns_realize(DeviceState *dev, Error **errp)
+{
+    NvmeNamespace *ns = NVME_NS(dev);
+    BusState *s = qdev_get_parent_bus(dev);
+    NvmeCtrl *n = NVME(s->parent);
+    NvmeSubsystem *subsys = n->subsys;
+    uint32_t nsid = ns->params.nsid;
+    int i;
+
+    if (!n->subsys) {
+        if (ns->params.detached) {
+            error_setg(errp, "detached requires that the nvme device is "
+                       "linked to an nvme-subsys device");
+            return;
+        }
+    } else {
+        /*
+         * If this namespace belongs to a subsystem (through a link on the
+         * controller device), reparent the device.
+         */
+        if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) {
+            return;
+        }
+    }
+
+    if (nvme_ns_setup(ns, errp)) {
+        return;
+    }
+
+    if (!nsid) {
+        for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+            if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) {
+                continue;
+            }
+
+            nsid = ns->params.nsid = i;
+            break;
+        }
+
+        if (!nsid) {
+            error_setg(errp, "no free namespace id");
+            return;
+        }
+    } else {
+        if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) {
+            error_setg(errp, "namespace id '%d' already allocated", nsid);
+            return;
+        }
+    }
+
+    if (subsys) {
+        subsys->namespaces[nsid] = ns;
+
+        if (ns->params.detached) {
+            return;
+        }
+
+        if (ns->params.shared) {
+            for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
+                NvmeCtrl *ctrl = subsys->ctrls[i];
+
+                if (ctrl) {
+                    nvme_attach_ns(ctrl, ns);
+                }
+            }
+
+            return;
+        }
+    }
+
+    nvme_attach_ns(n, ns);
+}
+
+static Property nvme_ns_props[] = {
+    DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
+    DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false),
+    DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true),
+    DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
+    DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
+    DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0),
+    DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
+    DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0),
+    DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0),
+    DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0),
+    DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
+    DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
+    DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
+    DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
+    DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
+                     NVME_DEFAULT_ZONE_SIZE),
+    DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
+                     0),
+    DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
+                     params.cross_zone_read, false),
+    DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
+                       params.max_active_zones, 0),
+    DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
+                       params.max_open_zones, 0),
+    DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
+                       params.zd_extension_size, 0),
+    DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default,
+                     true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_ns_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+    dc->bus_type = TYPE_NVME_BUS;
+    dc->realize = nvme_ns_realize;
+    dc->unrealize = nvme_ns_unrealize;
+    device_class_set_props(dc, nvme_ns_props);
+    dc->desc = "Virtual NVMe namespace";
+}
+
+static void nvme_ns_instance_init(Object *obj)
+{
+    NvmeNamespace *ns = NVME_NS(obj);
+    char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
+
+    device_add_bootindex_property(obj, &ns->bootindex, "bootindex",
+                                  bootindex, DEVICE(obj));
+
+    g_free(bootindex);
+}
+
+static const TypeInfo nvme_ns_info = {
+    .name = TYPE_NVME_NS,
+    .parent = TYPE_DEVICE,
+    .class_init = nvme_ns_class_init,
+    .instance_size = sizeof(NvmeNamespace),
+    .instance_init = nvme_ns_instance_init,
+};
+
+static void nvme_ns_register_types(void)
+{
+    type_register_static(&nvme_ns_info);
+}
+
+type_init(nvme_ns_register_types)
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
new file mode 100644
index 00000000000..83ffabade4c
--- /dev/null
+++ b/hw/nvme/nvme.h
@@ -0,0 +1,556 @@
+/*
+ * QEMU NVM Express
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Copyright (c) 2021 Minwoo Im
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *   Keith Busch            <kbusch@kernel.org>
+ *   Klaus Jensen           <k.jensen@samsung.com>
+ *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
+ *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
+ *   Minwoo Im              <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+#ifndef HW_NVME_INTERNAL_H
+#define HW_NVME_INTERNAL_H
+
+#include "qemu/uuid.h"
+#include "hw/pci/pci.h"
+#include "hw/block/block.h"
+
+#include "block/nvme.h"
+
+#define NVME_MAX_CONTROLLERS 32
+#define NVME_MAX_NAMESPACES  256
+#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
+
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+
+#define TYPE_NVME_BUS "nvme-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
+
+typedef struct NvmeBus {
+    BusState parent_bus;
+} NvmeBus;
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+
+typedef struct NvmeSubsystem {
+    DeviceState parent_obj;
+    NvmeBus     bus;
+    uint8_t     subnqn[256];
+
+    NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
+    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+
+    struct {
+        char *nqn;
+    } params;
+} NvmeSubsystem;
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
+
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+                                         uint32_t cntlid)
+{
+    if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
+        return NULL;
+    }
+
+    return subsys->ctrls[cntlid];
+}
+
+static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
+                                            uint32_t nsid)
+{
+    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+        return NULL;
+    }
+
+    return subsys->namespaces[nsid];
+}
+
+#define TYPE_NVME_NS "nvme-ns"
+#define NVME_NS(obj) \
+    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
+
+typedef struct NvmeZone {
+    NvmeZoneDescr   d;
+    uint64_t        w_ptr;
+    QTAILQ_ENTRY(NvmeZone) entry;
+} NvmeZone;
+
+typedef struct NvmeNamespaceParams {
+    bool     detached;
+    bool     shared;
+    uint32_t nsid;
+    QemuUUID uuid;
+    uint64_t eui64;
+    bool     eui64_default;
+
+    uint16_t ms;
+    uint8_t  mset;
+    uint8_t  pi;
+    uint8_t  pil;
+
+    uint16_t mssrl;
+    uint32_t mcl;
+    uint8_t  msrc;
+
+    bool     zoned;
+    bool     cross_zone_read;
+    uint64_t zone_size_bs;
+    uint64_t zone_cap_bs;
+    uint32_t max_active_zones;
+    uint32_t max_open_zones;
+    uint32_t zd_extension_size;
+} NvmeNamespaceParams;
+
+typedef struct NvmeNamespace {
+    DeviceState  parent_obj;
+    BlockConf    blkconf;
+    int32_t      bootindex;
+    int64_t      size;
+    int64_t      moff;
+    NvmeIdNs     id_ns;
+    NvmeLBAF     lbaf;
+    size_t       lbasz;
+    const uint32_t *iocs;
+    uint8_t      csi;
+    uint16_t     status;
+    int          attached;
+
+    QTAILQ_ENTRY(NvmeNamespace) entry;
+
+    NvmeIdNsZoned   *id_ns_zoned;
+    NvmeZone        *zone_array;
+    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
+    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
+    QTAILQ_HEAD(, NvmeZone) closed_zones;
+    QTAILQ_HEAD(, NvmeZone) full_zones;
+    uint32_t        num_zones;
+    uint64_t        zone_size;
+    uint64_t        zone_capacity;
+    uint32_t        zone_size_log2;
+    uint8_t         *zd_extensions;
+    int32_t         nr_open_zones;
+    int32_t         nr_active_zones;
+
+    NvmeNamespaceParams params;
+
+    struct {
+        uint32_t err_rec;
+    } features;
+} NvmeNamespace;
+
+static inline uint32_t nvme_nsid(NvmeNamespace *ns)
+{
+    if (ns) {
+        return ns->params.nsid;
+    }
+
+    return 0;
+}
+
+static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
+{
+    return lba << ns->lbaf.ds;
+}
+
+static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
+{
+    return ns->lbaf.ms * lba;
+}
+
+static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
+{
+    return ns->moff + nvme_m2b(ns, lba);
+}
+
+static inline bool nvme_ns_ext(NvmeNamespace *ns)
+{
+    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
+}
+
+static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
+{
+    return zone->d.zs >> 4;
+}
+
+static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
+{
+    zone->d.zs = state << 4;
+}
+
+static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
+{
+    return zone->d.zslba + ns->zone_size;
+}
+
+static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
+{
+    return zone->d.zslba + zone->d.zcap;
+}
+
+static inline bool nvme_wp_is_valid(NvmeZone *zone)
+{
+    uint8_t st = nvme_get_zone_state(zone);
+
+    return st != NVME_ZONE_STATE_FULL &&
+           st != NVME_ZONE_STATE_READ_ONLY &&
+           st != NVME_ZONE_STATE_OFFLINE;
+}
+
+static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
+                                             uint32_t zone_idx)
+{
+    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
+}
+
+static inline void nvme_aor_inc_open(NvmeNamespace *ns)
+{
+    assert(ns->nr_open_zones >= 0);
+    if (ns->params.max_open_zones) {
+        ns->nr_open_zones++;
+        assert(ns->nr_open_zones <= ns->params.max_open_zones);
+    }
+}
+
+static inline void nvme_aor_dec_open(NvmeNamespace *ns)
+{
+    if (ns->params.max_open_zones) {
+        assert(ns->nr_open_zones > 0);
+        ns->nr_open_zones--;
+    }
+    assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeNamespace *ns)
+{
+    assert(ns->nr_active_zones >= 0);
+    if (ns->params.max_active_zones) {
+        ns->nr_active_zones++;
+        assert(ns->nr_active_zones <= ns->params.max_active_zones);
+    }
+}
+
+static inline void nvme_aor_dec_active(NvmeNamespace *ns)
+{
+    if (ns->params.max_active_zones) {
+        assert(ns->nr_active_zones > 0);
+        ns->nr_active_zones--;
+        assert(ns->nr_active_zones >= ns->nr_open_zones);
+    }
+    assert(ns->nr_active_zones >= 0);
+}
+
+void nvme_ns_init_format(NvmeNamespace *ns);
+int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
+void nvme_ns_drain(NvmeNamespace *ns);
+void nvme_ns_shutdown(NvmeNamespace *ns);
+void nvme_ns_cleanup(NvmeNamespace *ns);
+
+typedef struct NvmeAsyncEvent {
+    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
+    NvmeAerResult result;
+} NvmeAsyncEvent;
+
+enum {
+    NVME_SG_ALLOC = 1 << 0,
+    NVME_SG_DMA   = 1 << 1,
+};
+
+typedef struct NvmeSg {
+    int flags;
+
+    union {
+        QEMUSGList   qsg;
+        QEMUIOVector iov;
+    };
+} NvmeSg;
+
+typedef enum NvmeTxDirection {
+    NVME_TX_DIRECTION_TO_DEVICE   = 0,
+    NVME_TX_DIRECTION_FROM_DEVICE = 1,
+} NvmeTxDirection;
+
+typedef struct NvmeRequest {
+    struct NvmeSQueue       *sq;
+    struct NvmeNamespace    *ns;
+    BlockAIOCB              *aiocb;
+    uint16_t                status;
+    void                    *opaque;
+    NvmeCqe                 cqe;
+    NvmeCmd                 cmd;
+    BlockAcctCookie         acct;
+    NvmeSg                  sg;
+    QTAILQ_ENTRY(NvmeRequest)entry;
+} NvmeRequest;
+
+typedef struct NvmeBounceContext {
+    NvmeRequest *req;
+
+    struct {
+        QEMUIOVector iov;
+        uint8_t *bounce;
+    } data, mdata;
+} NvmeBounceContext;
+
+static inline const char *nvme_adm_opc_str(uint8_t opc)
+{
+    switch (opc) {
+    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
+    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
+    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
+    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
+    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
+    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
+    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
+    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
+    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
+    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
+    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
+    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
+    default:                            return "NVME_ADM_CMD_UNKNOWN";
+    }
+}
+
+static inline const char *nvme_io_opc_str(uint8_t opc)
+{
+    switch (opc) {
+    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
+    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
+    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
+    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
+    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
+    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
+    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
+    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
+    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
+    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
+    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
+    default:                        return "NVME_NVM_CMD_UNKNOWN";
+    }
+}
+
+typedef struct NvmeSQueue {
+    struct NvmeCtrl *ctrl;
+    uint16_t    sqid;
+    uint16_t    cqid;
+    uint32_t    head;
+    uint32_t    tail;
+    uint32_t    size;
+    uint64_t    dma_addr;
+    QEMUTimer   *timer;
+    NvmeRequest *io_req;
+    QTAILQ_HEAD(, NvmeRequest) req_list;
+    QTAILQ_HEAD(, NvmeRequest) out_req_list;
+    QTAILQ_ENTRY(NvmeSQueue) entry;
+} NvmeSQueue;
+
+typedef struct NvmeCQueue {
+    struct NvmeCtrl *ctrl;
+    uint8_t     phase;
+    uint16_t    cqid;
+    uint16_t    irq_enabled;
+    uint32_t    head;
+    uint32_t    tail;
+    uint32_t    vector;
+    uint32_t    size;
+    uint64_t    dma_addr;
+    QEMUTimer   *timer;
+    QTAILQ_HEAD(, NvmeSQueue) sq_list;
+    QTAILQ_HEAD(, NvmeRequest) req_list;
+} NvmeCQueue;
+
+#define TYPE_NVME "nvme"
+#define NVME(obj) \
+        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
+
+typedef struct NvmeParams {
+    char     *serial;
+    uint32_t num_queues; /* deprecated since 5.1 */
+    uint32_t max_ioqpairs;
+    uint16_t msix_qsize;
+    uint32_t cmb_size_mb;
+    uint8_t  aerl;
+    uint32_t aer_max_queued;
+    uint8_t  mdts;
+    uint8_t  vsl;
+    bool     use_intel_id;
+    uint8_t  zasl;
+    bool     auto_transition_zones;
+    bool     legacy_cmb;
+} NvmeParams;
+
+typedef struct NvmeCtrl {
+    PCIDevice    parent_obj;
+    MemoryRegion bar0;
+    MemoryRegion iomem;
+    NvmeBar      bar;
+    NvmeParams   params;
+    NvmeBus      bus;
+
+    uint16_t    cntlid;
+    bool        qs_created;
+    uint32_t    page_size;
+    uint16_t    page_bits;
+    uint16_t    max_prp_ents;
+    uint16_t    cqe_size;
+    uint16_t    sqe_size;
+    uint32_t    reg_size;
+    uint32_t    max_q_ents;
+    uint8_t     outstanding_aers;
+    uint32_t    irq_status;
+    int         cq_pending;
+    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
+    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
+    uint64_t    starttime_ms;
+    uint16_t    temperature;
+    uint8_t     smart_critical_warning;
+
+    struct {
+        MemoryRegion mem;
+        uint8_t      *buf;
+        bool         cmse;
+        hwaddr       cba;
+    } cmb;
+
+    struct {
+        HostMemoryBackend *dev;
+        bool              cmse;
+        hwaddr            cba;
+    } pmr;
+
+    uint8_t     aer_mask;
+    NvmeRequest **aer_reqs;
+    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
+    int         aer_queued;
+
+    uint32_t    dmrsl;
+
+    /* Namespace ID is started with 1 so bitmap should be 1-based */
+#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
+    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
+
+    NvmeSubsystem   *subsys;
+
+    NvmeNamespace   namespace;
+    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
+    NvmeSQueue      **sq;
+    NvmeCQueue      **cq;
+    NvmeSQueue      admin_sq;
+    NvmeCQueue      admin_cq;
+    NvmeIdCtrl      id_ctrl;
+
+    struct {
+        struct {
+            uint16_t temp_thresh_hi;
+            uint16_t temp_thresh_low;
+        };
+        uint32_t    async_config;
+    } features;
+} NvmeCtrl;
+
+static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
+{
+    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
+        return NULL;
+    }
+
+    return n->namespaces[nsid];
+}
+
+static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
+{
+    NvmeSQueue *sq = req->sq;
+    NvmeCtrl *n = sq->ctrl;
+
+    return n->cq[sq->cqid];
+}
+
+static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
+{
+    NvmeSQueue *sq = req->sq;
+    return sq->ctrl;
+}
+
+static inline uint16_t nvme_cid(NvmeRequest *req)
+{
+    if (!req) {
+        return 0xffff;
+    }
+
+    return le16_to_cpu(req->cqe.cid);
+}
+
+void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
+uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                          NvmeTxDirection dir, NvmeRequest *req);
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                           NvmeTxDirection dir, NvmeRequest *req);
+void nvme_rw_complete_cb(void *opaque, int ret);
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+                       NvmeCmd *cmd);
+
+/* from Linux kernel (crypto/crct10dif_common.c) */
+static const uint16_t t10_dif_crc_table[256] = {
+    0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+    0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+    0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+    0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+    0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+    0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+    0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+    0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+    0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+    0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+    0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+    0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+    0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+    0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+    0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+    0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+    0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+    0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+    0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+    0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+    0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+    0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+    0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+    0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+    0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+    0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+    0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+    0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+    0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+    0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+    0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+    0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba,
+                           uint32_t reftag);
+uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen,
+                               uint64_t slba);
+void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                                 uint8_t *mbuf, size_t mlen, uint16_t apptag,
+                                 uint32_t *reftag);
+uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len,
+                        uint8_t *mbuf, size_t mlen, uint8_t prinfo,
+                        uint64_t slba, uint16_t apptag,
+                        uint16_t appmask, uint32_t *reftag);
+uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req);
+
+
+#endif /* HW_NVME_INTERNAL_H */
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
new file mode 100644
index 00000000000..fb58d639504
--- /dev/null
+++ b/hw/nvme/subsys.c
@@ -0,0 +1,96 @@
+/*
+ * QEMU NVM Express Subsystem: nvme-subsys
+ *
+ * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2.  Refer COPYING.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "nvme.h"
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
+{
+    NvmeSubsystem *subsys = n->subsys;
+    int cntlid, nsid;
+
+    for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+        if (!subsys->ctrls[cntlid]) {
+            break;
+        }
+    }
+
+    if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+        error_setg(errp, "no more free controller id");
+        return -1;
+    }
+
+    subsys->ctrls[cntlid] = n;
+
+    for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) {
+        NvmeNamespace *ns = subsys->namespaces[nsid];
+        if (ns && ns->params.shared && !ns->params.detached) {
+            nvme_attach_ns(n, ns);
+        }
+    }
+
+    return cntlid;
+}
+
+void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
+{
+    subsys->ctrls[n->cntlid] = NULL;
+    n->cntlid = -1;
+}
+
+static void nvme_subsys_setup(NvmeSubsystem *subsys)
+{
+    const char *nqn = subsys->params.nqn ?
+        subsys->params.nqn : subsys->parent_obj.id;
+
+    snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn),
+             "nqn.2019-08.org.qemu:%s", nqn);
+}
+
+static void nvme_subsys_realize(DeviceState *dev, Error **errp)
+{
+    NvmeSubsystem *subsys = NVME_SUBSYS(dev);
+
+    qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id);
+
+    nvme_subsys_setup(subsys);
+}
+
+static Property nvme_subsystem_props[] = {
+    DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_subsys_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+    dc->realize = nvme_subsys_realize;
+    dc->desc = "Virtual NVMe subsystem";
+    dc->hotpluggable = false;
+
+    device_class_set_props(dc, nvme_subsystem_props);
+}
+
+static const TypeInfo nvme_subsys_info = {
+    .name = TYPE_NVME_SUBSYS,
+    .parent = TYPE_DEVICE,
+    .class_init = nvme_subsys_class_init,
+    .instance_size = sizeof(NvmeSubsystem),
+};
+
+static void nvme_subsys_register_types(void)
+{
+    type_register_static(&nvme_subsys_info);
+}
+
+type_init(nvme_subsys_register_types)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
new file mode 100644
index 00000000000..ff6cafd520d
--- /dev/null
+++ b/hw/nvme/trace-events
@@ -0,0 +1,200 @@
+# successful events
+pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
+pci_nvme_irq_pin(void) "pulsing IRQ pin"
+pci_nvme_irq_masked(void) "IRQ is masked"
+pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
+pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
+pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64""
+pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid 0x%"PRIx32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'"
+pci_nvme_flush_ns(uint32_t nsid) "nsid 0x%"PRIx32""
+pci_nvme_format_set(uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8""
+pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
+pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
+pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_misc_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8""
+pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16""
+pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16""
+pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16""
+pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32""
+pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
+pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_copy_out(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32""
+pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d"
+pci_nvme_dsm(uint32_t nr, uint32_t attr) "nr %"PRIu32" attr 0x%"PRIx32""
+pci_nvme_dsm_deallocate(uint64_t slba, uint32_t nlb) "slba %"PRIu64" nlb %"PRIu32""
+pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32""
+pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
+pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
+pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
+pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
+pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16""
+pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8""
+pci_nvme_identify_ctrl(void) "identify controller"
+pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
+pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16""
+pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
+pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
+pci_nvme_identify_cmd_set(void) "identify i/o command set"
+pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
+pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
+pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32""
+pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32""
+pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
+pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
+pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64""
+pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64""
+pci_nvme_process_aers(int queued) "queued %d"
+pci_nvme_aer(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aer_aerl_exceeded(void) "aerl exceeded"
+pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8""
+pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32""
+pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
+pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
+pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
+pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" dw1 0x%"PRIx32" status 0x%"PRIx16""
+pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
+pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
+pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16""
+pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16""
+pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
+pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
+pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
+pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
+pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
+pci_nvme_mmio_stopped(void) "cleared controller enable bit"
+pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
+pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
+pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
+pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
+pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
+pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
+pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
+
+# error conditions
+pci_nvme_err_mdts(size_t len) "len %zu"
+pci_nvme_err_zasl(size_t len) "len %zu"
+pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8""
+pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_cfs(void) "controller fatal status"
+pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16""
+pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
+pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
+pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8""
+pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32""
+pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
+pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64""
+pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
+pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
+pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
+pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
+pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
+pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64""
+pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
+pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
+pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
+pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
+pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64""
+pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64""
+pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64""
+pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
+pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
+pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded"
+pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded"
+pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32""
+pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
+pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
+pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
+pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
+pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
+pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
+pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
+pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
+pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
+pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
+pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
+pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
+pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
+pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
+pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
+pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
+pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
+pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
+pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
+pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
+pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
+pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
+pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
+pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
+pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
+pci_nvme_err_startfail(void) "setting controller enable bit failed"
+pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
+
+# undefined behavior
+pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
+pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
+pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
+pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
+pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
+pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
+pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
+pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
+pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
+pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
+pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
+pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
+pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
+pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
+pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
+pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
+pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
+pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
+pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
+pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field"
+pci_nvme_ub_too_many_mappings(void) "too many prp/sgl mappings"
diff --git a/hw/nvme/trace.h b/hw/nvme/trace.h
new file mode 100644
index 00000000000..b398ea107f5
--- /dev/null
+++ b/hw/nvme/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_nvme.h"
diff --git a/hw/nvram/Kconfig b/hw/nvram/Kconfig
index e872fcb1941..24cfc18f8b1 100644
--- a/hw/nvram/Kconfig
+++ b/hw/nvram/Kconfig
@@ -15,3 +15,22 @@ config NMC93XX_EEPROM
 
 config CHRP_NVRAM
     bool
+
+config XLNX_EFUSE_CRC
+    bool
+
+config XLNX_EFUSE
+    bool
+    select XLNX_EFUSE_CRC
+
+config XLNX_EFUSE_VERSAL
+    bool
+    select XLNX_EFUSE
+
+config XLNX_EFUSE_ZYNQMP
+    bool
+    select XLNX_EFUSE
+
+config XLNX_BBRAM
+    bool
+    select XLNX_EFUSE_CRC
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 9b8dcca4ead..c06b30de112 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -878,6 +878,7 @@ static struct {
     { "etc/tpm/log", 150 },
     { "etc/acpi/rsdp", 160 },
     { "bootorder", 170 },
+    { "etc/msr_feature_control", 180 },
 
 #define FW_CFG_ORDER_OVERRIDE_LAST 200
 };
diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build
index fd2951a860f..202a5466e63 100644
--- a/hw/nvram/meson.build
+++ b/hw/nvram/meson.build
@@ -9,5 +9,13 @@ softmmu_ss.add(when: 'CONFIG_AT24C', if_true: files('eeprom_at24c.c'))
 softmmu_ss.add(when: 'CONFIG_MAC_NVRAM', if_true: files('mac_nvram.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_otp.c'))
 softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_nvm.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_CRC', if_true: files('xlnx-efuse-crc.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE', if_true: files('xlnx-efuse.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_VERSAL', if_true: files(
+                                                   'xlnx-versal-efuse-cache.c',
+                                                   'xlnx-versal-efuse-ctrl.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_ZYNQMP', if_true: files(
+                                                   'xlnx-zynqmp-efuse.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_BBRAM', if_true: files('xlnx-bbram.c'))
 
 specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_nvram.c'))
diff --git a/hw/nvram/nrf51_nvm.c b/hw/nvram/nrf51_nvm.c
index 7b3460d52da..7f1db8c4239 100644
--- a/hw/nvram/nrf51_nvm.c
+++ b/hw/nvram/nrf51_nvm.c
@@ -21,7 +21,6 @@
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
-#include "exec/address-spaces.h"
 #include "hw/arm/nrf51.h"
 #include "hw/nvram/nrf51_nvm.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 01f77520146..fbfdf47e268 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -26,14 +26,12 @@
 #include "qemu/module.h"
 #include "qemu/units.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include <libfdt.h>
 
 #include "sysemu/block-backend.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
-#include "hw/sysbus.h"
 #include "migration/vmstate.h"
 #include "hw/nvram/chrp_nvram.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/nvram/trace-events b/hw/nvram/trace-events
index e0231932959..5e33b24d47a 100644
--- a/hw/nvram/trace-events
+++ b/hw/nvram/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # ds1225y.c
 nvram_read(uint32_t addr, uint32_t ret) "read addr %d: 0x%02x"
diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c
new file mode 100644
index 00000000000..b70828e5bf1
--- /dev/null
+++ b/hw/nvram/xlnx-bbram.c
@@ -0,0 +1,545 @@
+/*
+ * QEMU model of the Xilinx BBRAM Battery Backed RAM
+ *
+ * Copyright (c) 2014-2021 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-bbram.h"
+
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "sysemu/blockdev.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#ifndef XLNX_BBRAM_ERR_DEBUG
+#define XLNX_BBRAM_ERR_DEBUG 0
+#endif
+
+REG32(BBRAM_STATUS, 0x0)
+    FIELD(BBRAM_STATUS, AES_CRC_PASS, 9, 1)
+    FIELD(BBRAM_STATUS, AES_CRC_DONE, 8, 1)
+    FIELD(BBRAM_STATUS, BBRAM_ZEROIZED, 4, 1)
+    FIELD(BBRAM_STATUS, PGM_MODE, 0, 1)
+REG32(BBRAM_CTRL, 0x4)
+    FIELD(BBRAM_CTRL, ZEROIZE, 0, 1)
+REG32(PGM_MODE, 0x8)
+REG32(BBRAM_AES_CRC, 0xc)
+REG32(BBRAM_0, 0x10)
+REG32(BBRAM_1, 0x14)
+REG32(BBRAM_2, 0x18)
+REG32(BBRAM_3, 0x1c)
+REG32(BBRAM_4, 0x20)
+REG32(BBRAM_5, 0x24)
+REG32(BBRAM_6, 0x28)
+REG32(BBRAM_7, 0x2c)
+REG32(BBRAM_8, 0x30)
+REG32(BBRAM_SLVERR, 0x34)
+    FIELD(BBRAM_SLVERR, ENABLE, 0, 1)
+REG32(BBRAM_ISR, 0x38)
+    FIELD(BBRAM_ISR, APB_SLVERR, 0, 1)
+REG32(BBRAM_IMR, 0x3c)
+    FIELD(BBRAM_IMR, APB_SLVERR, 0, 1)
+REG32(BBRAM_IER, 0x40)
+    FIELD(BBRAM_IER, APB_SLVERR, 0, 1)
+REG32(BBRAM_IDR, 0x44)
+    FIELD(BBRAM_IDR, APB_SLVERR, 0, 1)
+REG32(BBRAM_MSW_LOCK, 0x4c)
+    FIELD(BBRAM_MSW_LOCK, VAL, 0, 1)
+
+#define R_MAX (R_BBRAM_MSW_LOCK + 1)
+
+#define RAM_MAX (A_BBRAM_8 + 4 - A_BBRAM_0)
+
+#define BBRAM_PGM_MAGIC 0x757bdf0d
+
+QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxBBRam *)0)->regs));
+
+static bool bbram_msw_locked(XlnxBBRam *s)
+{
+    return ARRAY_FIELD_EX32(s->regs, BBRAM_MSW_LOCK, VAL) != 0;
+}
+
+static bool bbram_pgm_enabled(XlnxBBRam *s)
+{
+    return ARRAY_FIELD_EX32(s->regs, BBRAM_STATUS, PGM_MODE) != 0;
+}
+
+static void bbram_bdrv_error(XlnxBBRam *s, int rc, gchar *detail)
+{
+    Error *errp;
+
+    error_setg_errno(&errp, -rc, "%s: BBRAM backstore %s failed.",
+                     blk_name(s->blk), detail);
+    error_report("%s", error_get_pretty(errp));
+    error_free(errp);
+
+    g_free(detail);
+}
+
+static void bbram_bdrv_read(XlnxBBRam *s, Error **errp)
+{
+    uint32_t *ram = &s->regs[R_BBRAM_0];
+    int nr = RAM_MAX;
+
+    if (!s->blk) {
+        return;
+    }
+
+    s->blk_ro = !blk_supports_write_perm(s->blk);
+    if (!s->blk_ro) {
+        int rc;
+
+        rc = blk_set_perm(s->blk,
+                          (BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE),
+                          BLK_PERM_ALL, NULL);
+        if (rc) {
+            s->blk_ro = true;
+        }
+    }
+    if (s->blk_ro) {
+        warn_report("%s: Skip saving updates to read-only BBRAM backstore.",
+                    blk_name(s->blk));
+    }
+
+    if (blk_pread(s->blk, 0, ram, nr) < 0) {
+        error_setg(errp,
+                   "%s: Failed to read %u bytes from BBRAM backstore.",
+                   blk_name(s->blk), nr);
+        return;
+    }
+
+    /* Convert from little-endian backstore for each 32-bit word */
+    nr /= 4;
+    while (nr--) {
+        ram[nr] = le32_to_cpu(ram[nr]);
+    }
+}
+
+static void bbram_bdrv_sync(XlnxBBRam *s, uint64_t hwaddr)
+{
+    uint32_t le32;
+    unsigned offset;
+    int rc;
+
+    assert(A_BBRAM_0 <= hwaddr && hwaddr <= A_BBRAM_8);
+
+    /* Backstore is always in little-endian */
+    le32 = cpu_to_le32(s->regs[hwaddr / 4]);
+
+    /* Update zeroized flag */
+    if (le32 && (hwaddr != A_BBRAM_8 || s->bbram8_wo)) {
+        ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, BBRAM_ZEROIZED, 0);
+    }
+
+    if (!s->blk || s->blk_ro) {
+        return;
+    }
+
+    offset = hwaddr - A_BBRAM_0;
+    rc = blk_pwrite(s->blk, offset, &le32, 4, 0);
+    if (rc < 0) {
+        bbram_bdrv_error(s, rc, g_strdup_printf("write to offset %u", offset));
+    }
+}
+
+static void bbram_bdrv_zero(XlnxBBRam *s)
+{
+    int rc;
+
+    ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, BBRAM_ZEROIZED, 1);
+
+    if (!s->blk || s->blk_ro) {
+        return;
+    }
+
+    rc = blk_make_zero(s->blk, 0);
+    if (rc < 0) {
+        bbram_bdrv_error(s, rc, g_strdup("zeroizing"));
+    }
+
+    /* Restore bbram8 if it is non-zero */
+    if (s->regs[R_BBRAM_8]) {
+        bbram_bdrv_sync(s, A_BBRAM_8);
+    }
+}
+
+static void bbram_zeroize(XlnxBBRam *s)
+{
+    int nr = RAM_MAX - (s->bbram8_wo ? 0 : 4); /* only wo bbram8 is cleared */
+
+    memset(&s->regs[R_BBRAM_0], 0, nr);
+    bbram_bdrv_zero(s);
+}
+
+static void bbram_update_irq(XlnxBBRam *s)
+{
+    bool pending = s->regs[R_BBRAM_ISR] & ~s->regs[R_BBRAM_IMR];
+
+    qemu_set_irq(s->irq_bbram, pending);
+}
+
+static void bbram_ctrl_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t val = val64;
+
+    if (val & R_BBRAM_CTRL_ZEROIZE_MASK) {
+        bbram_zeroize(s);
+        /* The bit is self clearing */
+        s->regs[R_BBRAM_CTRL] &= ~R_BBRAM_CTRL_ZEROIZE_MASK;
+    }
+}
+
+static void bbram_pgm_mode_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t val = val64;
+
+    if (val == BBRAM_PGM_MAGIC) {
+        bbram_zeroize(s);
+
+        /* The status bit is cleared only by POR */
+        ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, PGM_MODE, 1);
+    }
+}
+
+static void bbram_aes_crc_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t calc_crc;
+
+    if (!bbram_pgm_enabled(s)) {
+        /* We are not in programming mode, don't do anything */
+        return;
+    }
+
+    /* Perform the AES integrity check */
+    s->regs[R_BBRAM_STATUS] |= R_BBRAM_STATUS_AES_CRC_DONE_MASK;
+
+    /*
+     * Set check status.
+     *
+     * ZynqMP BBRAM check has a zero-u32 prepended; see:
+     *  https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_bbramps_zynqmp.c#L311
+     */
+    calc_crc = xlnx_efuse_calc_crc(&s->regs[R_BBRAM_0],
+                                   (R_BBRAM_8 - R_BBRAM_0), s->crc_zpads);
+
+    ARRAY_FIELD_DP32(s->regs, BBRAM_STATUS, AES_CRC_PASS,
+                     (s->regs[R_BBRAM_AES_CRC] == calc_crc));
+}
+
+static uint64_t bbram_key_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t original_data = *(uint32_t *) reg->data;
+
+    if (bbram_pgm_enabled(s)) {
+        return val64;
+    } else {
+        /* We are not in programming mode, don't do anything */
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Not in programming mode, dropping the write\n");
+        return original_data;
+    }
+}
+
+static void bbram_key_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    bbram_bdrv_sync(s, reg->access->addr);
+}
+
+static uint64_t bbram_wo_postr(RegisterInfo *reg, uint64_t val)
+{
+    return 0;
+}
+
+static uint64_t bbram_r8_postr(RegisterInfo *reg, uint64_t val)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    return s->bbram8_wo ? bbram_wo_postr(reg, val) : val;
+}
+
+static bool bbram_r8_readonly(XlnxBBRam *s)
+{
+    return !bbram_pgm_enabled(s) || bbram_msw_locked(s);
+}
+
+static uint64_t bbram_r8_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    if (bbram_r8_readonly(s)) {
+        val64 = *(uint32_t *)reg->data;
+    }
+
+    return val64;
+}
+
+static void bbram_r8_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    if (!bbram_r8_readonly(s)) {
+        bbram_bdrv_sync(s, A_BBRAM_8);
+    }
+}
+
+static uint64_t bbram_msw_lock_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    /* Never lock if bbram8 is wo; and, only POR can clear the lock */
+    if (s->bbram8_wo) {
+        val64 = 0;
+    } else {
+        val64 |= s->regs[R_BBRAM_MSW_LOCK];
+    }
+
+    return val64;
+}
+
+static void bbram_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+
+    bbram_update_irq(s);
+}
+
+static uint64_t bbram_ier_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_BBRAM_IMR] &= ~val;
+    bbram_update_irq(s);
+    return 0;
+}
+
+static uint64_t bbram_idr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxBBRam *s = XLNX_BBRAM(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_BBRAM_IMR] |= val;
+    bbram_update_irq(s);
+    return 0;
+}
+
+static RegisterAccessInfo bbram_ctrl_regs_info[] = {
+    {   .name = "BBRAM_STATUS",  .addr = A_BBRAM_STATUS,
+        .rsvd = 0xee,
+        .ro = 0x3ff,
+    },{ .name = "BBRAM_CTRL",  .addr = A_BBRAM_CTRL,
+        .post_write = bbram_ctrl_postw,
+    },{ .name = "PGM_MODE",  .addr = A_PGM_MODE,
+        .post_write = bbram_pgm_mode_postw,
+    },{ .name = "BBRAM_AES_CRC",  .addr = A_BBRAM_AES_CRC,
+        .post_write = bbram_aes_crc_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_0",  .addr = A_BBRAM_0,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_1",  .addr = A_BBRAM_1,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_2",  .addr = A_BBRAM_2,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_3",  .addr = A_BBRAM_3,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_4",  .addr = A_BBRAM_4,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_5",  .addr = A_BBRAM_5,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_6",  .addr = A_BBRAM_6,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_7",  .addr = A_BBRAM_7,
+        .pre_write = bbram_key_prew,
+        .post_write = bbram_key_postw,
+        .post_read = bbram_wo_postr,
+    },{ .name = "BBRAM_8",  .addr = A_BBRAM_8,
+        .pre_write = bbram_r8_prew,
+        .post_write = bbram_r8_postw,
+        .post_read = bbram_r8_postr,
+    },{ .name = "BBRAM_SLVERR",  .addr = A_BBRAM_SLVERR,
+        .rsvd = ~1,
+    },{ .name = "BBRAM_ISR",  .addr = A_BBRAM_ISR,
+        .w1c = 0x1,
+        .post_write = bbram_isr_postw,
+    },{ .name = "BBRAM_IMR",  .addr = A_BBRAM_IMR,
+        .ro = 0x1,
+    },{ .name = "BBRAM_IER",  .addr = A_BBRAM_IER,
+        .pre_write = bbram_ier_prew,
+    },{ .name = "BBRAM_IDR",  .addr = A_BBRAM_IDR,
+        .pre_write = bbram_idr_prew,
+    },{ .name = "BBRAM_MSW_LOCK",  .addr = A_BBRAM_MSW_LOCK,
+        .pre_write = bbram_msw_lock_prew,
+        .ro = ~R_BBRAM_MSW_LOCK_VAL_MASK,
+    }
+};
+
+static void bbram_ctrl_reset(DeviceState *dev)
+{
+    XlnxBBRam *s = XLNX_BBRAM(dev);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        if (i < R_BBRAM_0 || i > R_BBRAM_8) {
+            register_reset(&s->regs_info[i]);
+        }
+    }
+
+    bbram_update_irq(s);
+}
+
+static const MemoryRegionOps bbram_ctrl_ops = {
+    .read = register_read_memory,
+    .write = register_write_memory,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void bbram_ctrl_realize(DeviceState *dev, Error **errp)
+{
+    XlnxBBRam *s = XLNX_BBRAM(dev);
+
+    if (s->crc_zpads) {
+        s->bbram8_wo = true;
+    }
+
+    bbram_bdrv_read(s, errp);
+}
+
+static void bbram_ctrl_init(Object *obj)
+{
+    XlnxBBRam *s = XLNX_BBRAM(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    RegisterInfoArray *reg_array;
+
+    reg_array =
+        register_init_block32(DEVICE(obj), bbram_ctrl_regs_info,
+                              ARRAY_SIZE(bbram_ctrl_regs_info),
+                              s->regs_info, s->regs,
+                              &bbram_ctrl_ops,
+                              XLNX_BBRAM_ERR_DEBUG,
+                              R_MAX * 4);
+
+    sysbus_init_mmio(sbd, &reg_array->mem);
+    sysbus_init_irq(sbd, &s->irq_bbram);
+}
+
+static void bbram_prop_set_drive(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+
+    qdev_prop_drive.set(obj, v, name, opaque, errp);
+
+    /* Fill initial data if backend is attached after realized */
+    if (dev->realized) {
+        bbram_bdrv_read(XLNX_BBRAM(obj), errp);
+    }
+}
+
+static void bbram_prop_get_drive(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    qdev_prop_drive.get(obj, v, name, opaque, errp);
+}
+
+static void bbram_prop_release_drive(Object *obj, const char *name,
+                                     void *opaque)
+{
+    qdev_prop_drive.release(obj, name, opaque);
+}
+
+static const PropertyInfo bbram_prop_drive = {
+    .name  = "str",
+    .description = "Node name or ID of a block device to use as BBRAM backend",
+    .realized_set_allowed = true,
+    .get = bbram_prop_get_drive,
+    .set = bbram_prop_set_drive,
+    .release = bbram_prop_release_drive,
+};
+
+static const VMStateDescription vmstate_bbram_ctrl = {
+    .name = TYPE_XLNX_BBRAM,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, XlnxBBRam, R_MAX),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property bbram_ctrl_props[] = {
+    DEFINE_PROP("drive", XlnxBBRam, blk, bbram_prop_drive, BlockBackend *),
+    DEFINE_PROP_UINT32("crc-zpads", XlnxBBRam, crc_zpads, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void bbram_ctrl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bbram_ctrl_reset;
+    dc->realize = bbram_ctrl_realize;
+    dc->vmsd = &vmstate_bbram_ctrl;
+    device_class_set_props(dc, bbram_ctrl_props);
+}
+
+static const TypeInfo bbram_ctrl_info = {
+    .name          = TYPE_XLNX_BBRAM,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxBBRam),
+    .class_init    = bbram_ctrl_class_init,
+    .instance_init = bbram_ctrl_init,
+};
+
+static void bbram_ctrl_register_types(void)
+{
+    type_register_static(&bbram_ctrl_info);
+}
+
+type_init(bbram_ctrl_register_types)
diff --git a/hw/nvram/xlnx-efuse-crc.c b/hw/nvram/xlnx-efuse-crc.c
new file mode 100644
index 00000000000..5a5cc13f39f
--- /dev/null
+++ b/hw/nvram/xlnx-efuse-crc.c
@@ -0,0 +1,119 @@
+/*
+ * Xilinx eFuse/bbram CRC calculator
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+static uint32_t xlnx_efuse_u37_crc(uint32_t prev_crc, uint32_t data,
+                                   uint32_t addr)
+{
+    /* A table for 7-bit slicing */
+    static const uint32_t crc_tab[128] = {
+        0x00000000, 0xe13b70f7, 0xc79a971f, 0x26a1e7e8,
+        0x8ad958cf, 0x6be22838, 0x4d43cfd0, 0xac78bf27,
+        0x105ec76f, 0xf165b798, 0xd7c45070, 0x36ff2087,
+        0x9a879fa0, 0x7bbcef57, 0x5d1d08bf, 0xbc267848,
+        0x20bd8ede, 0xc186fe29, 0xe72719c1, 0x061c6936,
+        0xaa64d611, 0x4b5fa6e6, 0x6dfe410e, 0x8cc531f9,
+        0x30e349b1, 0xd1d83946, 0xf779deae, 0x1642ae59,
+        0xba3a117e, 0x5b016189, 0x7da08661, 0x9c9bf696,
+        0x417b1dbc, 0xa0406d4b, 0x86e18aa3, 0x67dafa54,
+        0xcba24573, 0x2a993584, 0x0c38d26c, 0xed03a29b,
+        0x5125dad3, 0xb01eaa24, 0x96bf4dcc, 0x77843d3b,
+        0xdbfc821c, 0x3ac7f2eb, 0x1c661503, 0xfd5d65f4,
+        0x61c69362, 0x80fde395, 0xa65c047d, 0x4767748a,
+        0xeb1fcbad, 0x0a24bb5a, 0x2c855cb2, 0xcdbe2c45,
+        0x7198540d, 0x90a324fa, 0xb602c312, 0x5739b3e5,
+        0xfb410cc2, 0x1a7a7c35, 0x3cdb9bdd, 0xdde0eb2a,
+        0x82f63b78, 0x63cd4b8f, 0x456cac67, 0xa457dc90,
+        0x082f63b7, 0xe9141340, 0xcfb5f4a8, 0x2e8e845f,
+        0x92a8fc17, 0x73938ce0, 0x55326b08, 0xb4091bff,
+        0x1871a4d8, 0xf94ad42f, 0xdfeb33c7, 0x3ed04330,
+        0xa24bb5a6, 0x4370c551, 0x65d122b9, 0x84ea524e,
+        0x2892ed69, 0xc9a99d9e, 0xef087a76, 0x0e330a81,
+        0xb21572c9, 0x532e023e, 0x758fe5d6, 0x94b49521,
+        0x38cc2a06, 0xd9f75af1, 0xff56bd19, 0x1e6dcdee,
+        0xc38d26c4, 0x22b65633, 0x0417b1db, 0xe52cc12c,
+        0x49547e0b, 0xa86f0efc, 0x8ecee914, 0x6ff599e3,
+        0xd3d3e1ab, 0x32e8915c, 0x144976b4, 0xf5720643,
+        0x590ab964, 0xb831c993, 0x9e902e7b, 0x7fab5e8c,
+        0xe330a81a, 0x020bd8ed, 0x24aa3f05, 0xc5914ff2,
+        0x69e9f0d5, 0x88d28022, 0xae7367ca, 0x4f48173d,
+        0xf36e6f75, 0x12551f82, 0x34f4f86a, 0xd5cf889d,
+        0x79b737ba, 0x988c474d, 0xbe2da0a5, 0x5f16d052
+    };
+
+    /*
+     * eFuse calculation is shown here:
+     *  https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1496
+     *
+     * Each u32 word is appended a 5-bit value, for a total of 37 bits; see:
+     *  https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1356
+     */
+    uint32_t crc = prev_crc;
+    const unsigned rshf = 7;
+    const uint32_t im = (1 << rshf) - 1;
+    const uint32_t rm = (1 << (32 - rshf)) - 1;
+    const uint32_t i2 = (1 << 2) - 1;
+    const uint32_t r2 = (1 << 30) - 1;
+
+    unsigned j;
+    uint32_t i, r;
+    uint64_t w;
+
+    w = (uint64_t)(addr) << 32;
+    w |= data;
+
+    /* Feed 35 bits, in 5 rounds, each a slice of 7 bits */
+    for (j = 0; j < 5; j++) {
+        r = rm & (crc >> rshf);
+        i = im & (crc ^ w);
+        crc = crc_tab[i] ^ r;
+
+        w >>= rshf;
+    }
+
+    /* Feed the remaining 2 bits */
+    r = r2 & (crc >> 2);
+    i = i2 & (crc ^ w);
+    crc = crc_tab[i << (rshf - 2)] ^ r;
+
+    return crc;
+}
+
+uint32_t xlnx_efuse_calc_crc(const uint32_t *data, unsigned u32_cnt,
+                             unsigned zpads)
+{
+    uint32_t crc = 0;
+    unsigned index;
+
+    for (index = zpads; index; index--) {
+        crc = xlnx_efuse_u37_crc(crc, 0, (index + u32_cnt));
+    }
+
+    for (index = u32_cnt; index; index--) {
+        crc = xlnx_efuse_u37_crc(crc, data[index - 1], index);
+    }
+
+    return crc;
+}
diff --git a/hw/nvram/xlnx-efuse.c b/hw/nvram/xlnx-efuse.c
new file mode 100644
index 00000000000..a0fd77b586d
--- /dev/null
+++ b/hw/nvram/xlnx-efuse.c
@@ -0,0 +1,283 @@
+/*
+ * QEMU model of the EFUSE eFuse
+ *
+ * Copyright (c) 2015 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "sysemu/blockdev.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+
+#define TBIT0_OFFSET     28
+#define TBIT1_OFFSET     29
+#define TBIT2_OFFSET     30
+#define TBIT3_OFFSET     31
+#define TBITS_PATTERN    (0x0AU << TBIT0_OFFSET)
+#define TBITS_MASK       (0x0FU << TBIT0_OFFSET)
+
+bool xlnx_efuse_get_bit(XlnxEFuse *s, unsigned int bit)
+{
+    bool b = s->fuse32[bit / 32] & (1 << (bit % 32));
+    return b;
+}
+
+static int efuse_bytes(XlnxEFuse *s)
+{
+    return ROUND_UP((s->efuse_nr * s->efuse_size) / 8, 4);
+}
+
+static int efuse_bdrv_read(XlnxEFuse *s, Error **errp)
+{
+    uint32_t *ram = s->fuse32;
+    int nr = efuse_bytes(s);
+
+    if (!s->blk) {
+        return 0;
+    }
+
+    s->blk_ro = !blk_supports_write_perm(s->blk);
+    if (!s->blk_ro) {
+        int rc;
+
+        rc = blk_set_perm(s->blk,
+                          (BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE),
+                          BLK_PERM_ALL, NULL);
+        if (rc) {
+            s->blk_ro = true;
+        }
+    }
+    if (s->blk_ro) {
+        warn_report("%s: Skip saving updates to read-only eFUSE backstore.",
+                    blk_name(s->blk));
+    }
+
+    if (blk_pread(s->blk, 0, ram, nr) < 0) {
+        error_setg(errp, "%s: Failed to read %u bytes from eFUSE backstore.",
+                   blk_name(s->blk), nr);
+        return -1;
+    }
+
+    /* Convert from little-endian backstore for each 32-bit row */
+    nr /= 4;
+    while (nr--) {
+        ram[nr] = le32_to_cpu(ram[nr]);
+    }
+
+    return 0;
+}
+
+static void efuse_bdrv_sync(XlnxEFuse *s, unsigned int bit)
+{
+    unsigned int row_offset;
+    uint32_t le32;
+
+    if (!s->blk || s->blk_ro) {
+        return;  /* Silent on read-only backend to avoid message flood */
+    }
+
+    /* Backstore is always in little-endian */
+    le32 = cpu_to_le32(xlnx_efuse_get_row(s, bit));
+
+    row_offset = (bit / 32) * 4;
+    if (blk_pwrite(s->blk, row_offset, &le32, 4, 0) < 0) {
+        error_report("%s: Failed to write offset %u of eFUSE backstore.",
+                     blk_name(s->blk), row_offset);
+    }
+}
+
+static int efuse_ro_bits_cmp(const void *a, const void *b)
+{
+    uint32_t i = *(const uint32_t *)a;
+    uint32_t j = *(const uint32_t *)b;
+
+    return (i > j) - (i < j);
+}
+
+static void efuse_ro_bits_sort(XlnxEFuse *s)
+{
+    uint32_t *ary = s->ro_bits;
+    const uint32_t cnt = s->ro_bits_cnt;
+
+    if (ary && cnt > 1) {
+        qsort(ary, cnt, sizeof(ary[0]), efuse_ro_bits_cmp);
+    }
+}
+
+static bool efuse_ro_bits_find(XlnxEFuse *s, uint32_t k)
+{
+    const uint32_t *ary = s->ro_bits;
+    const uint32_t cnt = s->ro_bits_cnt;
+
+    if (!ary || !cnt) {
+        return false;
+    }
+
+    return bsearch(&k, ary, cnt, sizeof(ary[0]), efuse_ro_bits_cmp) != NULL;
+}
+
+bool xlnx_efuse_set_bit(XlnxEFuse *s, unsigned int bit)
+{
+    if (efuse_ro_bits_find(s, bit)) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: WARN: "
+                      "Ignored setting of readonly efuse bit<%u,%u>!\n",
+                      path, (bit / 32), (bit % 32));
+        return false;
+    }
+
+    s->fuse32[bit / 32] |= 1 << (bit % 32);
+    efuse_bdrv_sync(s, bit);
+    return true;
+}
+
+bool xlnx_efuse_k256_check(XlnxEFuse *s, uint32_t crc, unsigned start)
+{
+    uint32_t calc;
+
+    /* A key always occupies multiple of whole rows */
+    assert((start % 32) == 0);
+
+    calc = xlnx_efuse_calc_crc(&s->fuse32[start / 32], (256 / 32), 0);
+    return calc == crc;
+}
+
+uint32_t xlnx_efuse_tbits_check(XlnxEFuse *s)
+{
+    int nr;
+    uint32_t check = 0;
+
+    for (nr = s->efuse_nr; nr-- > 0; ) {
+        int efuse_start_row_num = (s->efuse_size * nr) / 32;
+        uint32_t data = s->fuse32[efuse_start_row_num];
+
+        /*
+         * If the option is on, auto-init blank T-bits.
+         * (non-blank will still be reported as '0' in the check, e.g.,
+         *  for error-injection tests)
+         */
+        if ((data & TBITS_MASK) == 0 && s->init_tbits) {
+            data |= TBITS_PATTERN;
+
+            s->fuse32[efuse_start_row_num] = data;
+            efuse_bdrv_sync(s, (efuse_start_row_num * 32 + TBIT0_OFFSET));
+        }
+
+        check = (check << 1) | ((data & TBITS_MASK) == TBITS_PATTERN);
+    }
+
+    return check;
+}
+
+static void efuse_realize(DeviceState *dev, Error **errp)
+{
+    XlnxEFuse *s = XLNX_EFUSE(dev);
+
+    /* Sort readonly-list for bsearch lookup */
+    efuse_ro_bits_sort(s);
+
+    if ((s->efuse_size % 32) != 0) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        error_setg(errp,
+                   "%s.efuse-size: %u: property value not multiple of 32.",
+                   path, s->efuse_size);
+        return;
+    }
+
+    s->fuse32 = g_malloc0(efuse_bytes(s));
+    if (efuse_bdrv_read(s, errp)) {
+        g_free(s->fuse32);
+    }
+}
+
+static void efuse_prop_set_drive(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+
+    qdev_prop_drive.set(obj, v, name, opaque, errp);
+
+    /* Fill initial data if backend is attached after realized */
+    if (dev->realized) {
+        efuse_bdrv_read(XLNX_EFUSE(obj), errp);
+    }
+}
+
+static void efuse_prop_get_drive(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    qdev_prop_drive.get(obj, v, name, opaque, errp);
+}
+
+static void efuse_prop_release_drive(Object *obj, const char *name,
+                                     void *opaque)
+{
+    qdev_prop_drive.release(obj, name, opaque);
+}
+
+static const PropertyInfo efuse_prop_drive = {
+    .name  = "str",
+    .description = "Node name or ID of a block device to use as eFUSE backend",
+    .realized_set_allowed = true,
+    .get = efuse_prop_get_drive,
+    .set = efuse_prop_set_drive,
+    .release = efuse_prop_release_drive,
+};
+
+static Property efuse_properties[] = {
+    DEFINE_PROP("drive", XlnxEFuse, blk, efuse_prop_drive, BlockBackend *),
+    DEFINE_PROP_UINT8("efuse-nr", XlnxEFuse, efuse_nr, 3),
+    DEFINE_PROP_UINT32("efuse-size", XlnxEFuse, efuse_size, 64 * 32),
+    DEFINE_PROP_BOOL("init-factory-tbits", XlnxEFuse, init_tbits, true),
+    DEFINE_PROP_ARRAY("read-only", XlnxEFuse, ro_bits_cnt, ro_bits,
+                      qdev_prop_uint32, uint32_t),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void efuse_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = efuse_realize;
+    device_class_set_props(dc, efuse_properties);
+}
+
+static const TypeInfo efuse_info = {
+    .name          = TYPE_XLNX_EFUSE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(XlnxEFuse),
+    .class_init    = efuse_class_init,
+};
+
+static void efuse_register_types(void)
+{
+    type_register_static(&efuse_info);
+}
+type_init(efuse_register_types)
diff --git a/hw/nvram/xlnx-versal-efuse-cache.c b/hw/nvram/xlnx-versal-efuse-cache.c
new file mode 100644
index 00000000000..eaec64d785e
--- /dev/null
+++ b/hw/nvram/xlnx-versal-efuse-cache.c
@@ -0,0 +1,114 @@
+/*
+ * QEMU model of the EFuse_Cache
+ *
+ * Copyright (c) 2017 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
+
+#include "qemu/log.h"
+#include "hw/qdev-properties.h"
+
+#define MR_SIZE 0xC00
+
+static uint64_t efuse_cache_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XlnxVersalEFuseCache *s = XLNX_VERSAL_EFUSE_CACHE(opaque);
+    unsigned int w0 = QEMU_ALIGN_DOWN(addr * 8, 32);
+    unsigned int w1 = QEMU_ALIGN_DOWN((addr + size - 1) * 8, 32);
+
+    uint64_t ret;
+
+    assert(w0 == w1 || (w0 + 32) == w1);
+
+    ret = xlnx_versal_efuse_read_row(s->efuse, w1, NULL);
+    if (w0 < w1) {
+        ret <<= 32;
+        ret |= xlnx_versal_efuse_read_row(s->efuse, w0, NULL);
+    }
+
+    /* If 'addr' unaligned, the guest is always assumed to be little-endian. */
+    addr &= 3;
+    if (addr) {
+        ret >>= 8 * addr;
+    }
+
+    return ret;
+}
+
+static void efuse_cache_write(void *opaque, hwaddr addr, uint64_t value,
+                              unsigned size)
+{
+    /* No Register Writes allowed */
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: efuse cache registers are read-only",
+                  __func__);
+}
+
+static const MemoryRegionOps efuse_cache_ops = {
+    .read = efuse_cache_read,
+    .write = efuse_cache_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void efuse_cache_init(Object *obj)
+{
+    XlnxVersalEFuseCache *s = XLNX_VERSAL_EFUSE_CACHE(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    memory_region_init_io(&s->iomem, obj, &efuse_cache_ops, s,
+                          TYPE_XLNX_VERSAL_EFUSE_CACHE, MR_SIZE);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static Property efuse_cache_props[] = {
+    DEFINE_PROP_LINK("efuse",
+                     XlnxVersalEFuseCache, efuse,
+                     TYPE_XLNX_EFUSE, XlnxEFuse *),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void efuse_cache_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, efuse_cache_props);
+}
+
+static const TypeInfo efuse_cache_info = {
+    .name          = TYPE_XLNX_VERSAL_EFUSE_CACHE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalEFuseCache),
+    .class_init    = efuse_cache_class_init,
+    .instance_init = efuse_cache_init,
+};
+
+static void efuse_cache_register_types(void)
+{
+    type_register_static(&efuse_cache_info);
+}
+
+type_init(efuse_cache_register_types)
diff --git a/hw/nvram/xlnx-versal-efuse-ctrl.c b/hw/nvram/xlnx-versal-efuse-ctrl.c
new file mode 100644
index 00000000000..b35ba65ab57
--- /dev/null
+++ b/hw/nvram/xlnx-versal-efuse-ctrl.c
@@ -0,0 +1,793 @@
+/*
+ * QEMU model of the Versal eFuse controller
+ *
+ * Copyright (c) 2020 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
+
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+
+#ifndef XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG
+#define XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG 0
+#endif
+
+REG32(WR_LOCK, 0x0)
+    FIELD(WR_LOCK, LOCK, 0, 16)
+REG32(CFG, 0x4)
+    FIELD(CFG, SLVERR_ENABLE, 5, 1)
+    FIELD(CFG, MARGIN_RD, 2, 1)
+    FIELD(CFG, PGM_EN, 1, 1)
+REG32(STATUS, 0x8)
+    FIELD(STATUS, AES_USER_KEY_1_CRC_PASS, 11, 1)
+    FIELD(STATUS, AES_USER_KEY_1_CRC_DONE, 10, 1)
+    FIELD(STATUS, AES_USER_KEY_0_CRC_PASS, 9, 1)
+    FIELD(STATUS, AES_USER_KEY_0_CRC_DONE, 8, 1)
+    FIELD(STATUS, AES_CRC_PASS, 7, 1)
+    FIELD(STATUS, AES_CRC_DONE, 6, 1)
+    FIELD(STATUS, CACHE_DONE, 5, 1)
+    FIELD(STATUS, CACHE_LOAD, 4, 1)
+    FIELD(STATUS, EFUSE_2_TBIT, 2, 1)
+    FIELD(STATUS, EFUSE_1_TBIT, 1, 1)
+    FIELD(STATUS, EFUSE_0_TBIT, 0, 1)
+REG32(EFUSE_PGM_ADDR, 0xc)
+    FIELD(EFUSE_PGM_ADDR, PAGE, 13, 4)
+    FIELD(EFUSE_PGM_ADDR, ROW, 5, 8)
+    FIELD(EFUSE_PGM_ADDR, COLUMN, 0, 5)
+REG32(EFUSE_RD_ADDR, 0x10)
+    FIELD(EFUSE_RD_ADDR, PAGE, 13, 4)
+    FIELD(EFUSE_RD_ADDR, ROW, 5, 8)
+REG32(EFUSE_RD_DATA, 0x14)
+REG32(TPGM, 0x18)
+    FIELD(TPGM, VALUE, 0, 16)
+REG32(TRD, 0x1c)
+    FIELD(TRD, VALUE, 0, 8)
+REG32(TSU_H_PS, 0x20)
+    FIELD(TSU_H_PS, VALUE, 0, 8)
+REG32(TSU_H_PS_CS, 0x24)
+    FIELD(TSU_H_PS_CS, VALUE, 0, 8)
+REG32(TRDM, 0x28)
+    FIELD(TRDM, VALUE, 0, 8)
+REG32(TSU_H_CS, 0x2c)
+    FIELD(TSU_H_CS, VALUE, 0, 8)
+REG32(EFUSE_ISR, 0x30)
+    FIELD(EFUSE_ISR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_ISR, CACHE_PARITY_E2, 14, 1)
+    FIELD(EFUSE_ISR, CACHE_PARITY_E1, 13, 1)
+    FIELD(EFUSE_ISR, CACHE_PARITY_E0S, 12, 1)
+    FIELD(EFUSE_ISR, CACHE_PARITY_E0R, 11, 1)
+    FIELD(EFUSE_ISR, CACHE_APB_SLVERR, 10, 1)
+    FIELD(EFUSE_ISR, CACHE_REQ_ERROR, 9, 1)
+    FIELD(EFUSE_ISR, MAIN_REQ_ERROR, 8, 1)
+    FIELD(EFUSE_ISR, READ_ON_CACHE_LD, 7, 1)
+    FIELD(EFUSE_ISR, CACHE_FSM_ERROR, 6, 1)
+    FIELD(EFUSE_ISR, MAIN_FSM_ERROR, 5, 1)
+    FIELD(EFUSE_ISR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_ISR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_ISR, RD_DONE, 2, 1)
+    FIELD(EFUSE_ISR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_ISR, PGM_DONE, 0, 1)
+REG32(EFUSE_IMR, 0x34)
+    FIELD(EFUSE_IMR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IMR, CACHE_PARITY_E2, 14, 1)
+    FIELD(EFUSE_IMR, CACHE_PARITY_E1, 13, 1)
+    FIELD(EFUSE_IMR, CACHE_PARITY_E0S, 12, 1)
+    FIELD(EFUSE_IMR, CACHE_PARITY_E0R, 11, 1)
+    FIELD(EFUSE_IMR, CACHE_APB_SLVERR, 10, 1)
+    FIELD(EFUSE_IMR, CACHE_REQ_ERROR, 9, 1)
+    FIELD(EFUSE_IMR, MAIN_REQ_ERROR, 8, 1)
+    FIELD(EFUSE_IMR, READ_ON_CACHE_LD, 7, 1)
+    FIELD(EFUSE_IMR, CACHE_FSM_ERROR, 6, 1)
+    FIELD(EFUSE_IMR, MAIN_FSM_ERROR, 5, 1)
+    FIELD(EFUSE_IMR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IMR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IMR, RD_DONE, 2, 1)
+    FIELD(EFUSE_IMR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IMR, PGM_DONE, 0, 1)
+REG32(EFUSE_IER, 0x38)
+    FIELD(EFUSE_IER, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IER, CACHE_PARITY_E2, 14, 1)
+    FIELD(EFUSE_IER, CACHE_PARITY_E1, 13, 1)
+    FIELD(EFUSE_IER, CACHE_PARITY_E0S, 12, 1)
+    FIELD(EFUSE_IER, CACHE_PARITY_E0R, 11, 1)
+    FIELD(EFUSE_IER, CACHE_APB_SLVERR, 10, 1)
+    FIELD(EFUSE_IER, CACHE_REQ_ERROR, 9, 1)
+    FIELD(EFUSE_IER, MAIN_REQ_ERROR, 8, 1)
+    FIELD(EFUSE_IER, READ_ON_CACHE_LD, 7, 1)
+    FIELD(EFUSE_IER, CACHE_FSM_ERROR, 6, 1)
+    FIELD(EFUSE_IER, MAIN_FSM_ERROR, 5, 1)
+    FIELD(EFUSE_IER, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IER, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IER, RD_DONE, 2, 1)
+    FIELD(EFUSE_IER, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IER, PGM_DONE, 0, 1)
+REG32(EFUSE_IDR, 0x3c)
+    FIELD(EFUSE_IDR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IDR, CACHE_PARITY_E2, 14, 1)
+    FIELD(EFUSE_IDR, CACHE_PARITY_E1, 13, 1)
+    FIELD(EFUSE_IDR, CACHE_PARITY_E0S, 12, 1)
+    FIELD(EFUSE_IDR, CACHE_PARITY_E0R, 11, 1)
+    FIELD(EFUSE_IDR, CACHE_APB_SLVERR, 10, 1)
+    FIELD(EFUSE_IDR, CACHE_REQ_ERROR, 9, 1)
+    FIELD(EFUSE_IDR, MAIN_REQ_ERROR, 8, 1)
+    FIELD(EFUSE_IDR, READ_ON_CACHE_LD, 7, 1)
+    FIELD(EFUSE_IDR, CACHE_FSM_ERROR, 6, 1)
+    FIELD(EFUSE_IDR, MAIN_FSM_ERROR, 5, 1)
+    FIELD(EFUSE_IDR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IDR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IDR, RD_DONE, 2, 1)
+    FIELD(EFUSE_IDR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IDR, PGM_DONE, 0, 1)
+REG32(EFUSE_CACHE_LOAD, 0x40)
+    FIELD(EFUSE_CACHE_LOAD, LOAD, 0, 1)
+REG32(EFUSE_PGM_LOCK, 0x44)
+    FIELD(EFUSE_PGM_LOCK, SPK_ID_LOCK, 0, 1)
+REG32(EFUSE_AES_CRC, 0x48)
+REG32(EFUSE_AES_USR_KEY0_CRC, 0x4c)
+REG32(EFUSE_AES_USR_KEY1_CRC, 0x50)
+REG32(EFUSE_PD, 0x54)
+REG32(EFUSE_ANLG_OSC_SW_1LP, 0x60)
+REG32(EFUSE_TEST_CTRL, 0x100)
+
+#define R_MAX (R_EFUSE_TEST_CTRL + 1)
+
+#define R_WR_LOCK_UNLOCK_PASSCODE   (0xDF0D)
+
+/*
+ * eFuse layout references:
+ *   https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilnvm/src/xnvm_efuse_hw.h
+ */
+#define BIT_POS_OF(A_) \
+    ((uint32_t)((A_) & (R_EFUSE_PGM_ADDR_ROW_MASK | \
+                        R_EFUSE_PGM_ADDR_COLUMN_MASK)))
+
+#define BIT_POS(R_, C_) \
+        ((uint32_t)((R_EFUSE_PGM_ADDR_ROW_MASK                  \
+                    & ((R_) << R_EFUSE_PGM_ADDR_ROW_SHIFT))     \
+                    |                                           \
+                    (R_EFUSE_PGM_ADDR_COLUMN_MASK               \
+                     & ((C_) << R_EFUSE_PGM_ADDR_COLUMN_SHIFT))))
+
+#define EFUSE_TBIT_POS(A_)          (BIT_POS_OF(A_) >= BIT_POS(0, 28))
+
+#define EFUSE_ANCHOR_ROW            (0)
+#define EFUSE_ANCHOR_3_COL          (27)
+#define EFUSE_ANCHOR_1_COL          (1)
+
+#define EFUSE_AES_KEY_START         BIT_POS(12, 0)
+#define EFUSE_AES_KEY_END           BIT_POS(19, 31)
+#define EFUSE_USER_KEY_0_START      BIT_POS(20, 0)
+#define EFUSE_USER_KEY_0_END        BIT_POS(27, 31)
+#define EFUSE_USER_KEY_1_START      BIT_POS(28, 0)
+#define EFUSE_USER_KEY_1_END        BIT_POS(35, 31)
+
+#define EFUSE_RD_BLOCKED_START      EFUSE_AES_KEY_START
+#define EFUSE_RD_BLOCKED_END        EFUSE_USER_KEY_1_END
+
+#define EFUSE_GLITCH_DET_WR_LK      BIT_POS(4, 31)
+#define EFUSE_PPK0_WR_LK            BIT_POS(43, 6)
+#define EFUSE_PPK1_WR_LK            BIT_POS(43, 7)
+#define EFUSE_PPK2_WR_LK            BIT_POS(43, 8)
+#define EFUSE_AES_WR_LK             BIT_POS(43, 11)
+#define EFUSE_USER_KEY_0_WR_LK      BIT_POS(43, 13)
+#define EFUSE_USER_KEY_1_WR_LK      BIT_POS(43, 15)
+#define EFUSE_PUF_SYN_LK            BIT_POS(43, 16)
+#define EFUSE_DNA_WR_LK             BIT_POS(43, 27)
+#define EFUSE_BOOT_ENV_WR_LK        BIT_POS(43, 28)
+
+#define EFUSE_PGM_LOCKED_START      BIT_POS(44, 0)
+#define EFUSE_PGM_LOCKED_END        BIT_POS(51, 31)
+
+#define EFUSE_PUF_PAGE              (2)
+#define EFUSE_PUF_SYN_START         BIT_POS(129, 0)
+#define EFUSE_PUF_SYN_END           BIT_POS(255, 27)
+
+#define EFUSE_KEY_CRC_LK_ROW           (43)
+#define EFUSE_AES_KEY_CRC_LK_MASK      ((1U << 9) | (1U << 10))
+#define EFUSE_USER_KEY_0_CRC_LK_MASK   (1U << 12)
+#define EFUSE_USER_KEY_1_CRC_LK_MASK   (1U << 14)
+
+/*
+ * A handy macro to return value of an array element,
+ * or a specific default if given index is out of bound.
+ */
+#define ARRAY_GET(A_, I_, D_) \
+    ((unsigned int)(I_) < ARRAY_SIZE(A_) ? (A_)[I_] : (D_))
+
+QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxVersalEFuseCtrl *)0)->regs));
+
+typedef struct XlnxEFuseLkSpec {
+    uint16_t row;
+    uint16_t lk_bit;
+} XlnxEFuseLkSpec;
+
+static void efuse_imr_update_irq(XlnxVersalEFuseCtrl *s)
+{
+    bool pending = s->regs[R_EFUSE_ISR] & ~s->regs[R_EFUSE_IMR];
+    qemu_set_irq(s->irq_efuse_imr, pending);
+}
+
+static void efuse_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    efuse_imr_update_irq(s);
+}
+
+static uint64_t efuse_ier_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_EFUSE_IMR] &= ~val;
+    efuse_imr_update_irq(s);
+    return 0;
+}
+
+static uint64_t efuse_idr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_EFUSE_IMR] |= val;
+    efuse_imr_update_irq(s);
+    return 0;
+}
+
+static void efuse_status_tbits_sync(XlnxVersalEFuseCtrl *s)
+{
+    uint32_t check = xlnx_efuse_tbits_check(s->efuse);
+    uint32_t val = s->regs[R_STATUS];
+
+    val = FIELD_DP32(val, STATUS, EFUSE_0_TBIT, !!(check & (1 << 0)));
+    val = FIELD_DP32(val, STATUS, EFUSE_1_TBIT, !!(check & (1 << 1)));
+    val = FIELD_DP32(val, STATUS, EFUSE_2_TBIT, !!(check & (1 << 2)));
+
+    s->regs[R_STATUS] = val;
+}
+
+static void efuse_anchor_bits_check(XlnxVersalEFuseCtrl *s)
+{
+    unsigned page;
+
+    if (!s->efuse || !s->efuse->init_tbits) {
+        return;
+    }
+
+    for (page = 0; page < s->efuse->efuse_nr; page++) {
+        uint32_t row = 0, bit;
+
+        row = FIELD_DP32(row, EFUSE_PGM_ADDR, PAGE, page);
+        row = FIELD_DP32(row, EFUSE_PGM_ADDR, ROW, EFUSE_ANCHOR_ROW);
+
+        bit = FIELD_DP32(row, EFUSE_PGM_ADDR, COLUMN, EFUSE_ANCHOR_3_COL);
+        if (!xlnx_efuse_get_bit(s->efuse, bit)) {
+            xlnx_efuse_set_bit(s->efuse, bit);
+        }
+
+        bit = FIELD_DP32(row, EFUSE_PGM_ADDR, COLUMN, EFUSE_ANCHOR_1_COL);
+        if (!xlnx_efuse_get_bit(s->efuse, bit)) {
+            xlnx_efuse_set_bit(s->efuse, bit);
+        }
+    }
+}
+
+static void efuse_key_crc_check(RegisterInfo *reg, uint32_t crc,
+                                uint32_t pass_mask, uint32_t done_mask,
+                                unsigned first, uint32_t lk_mask)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    uint32_t r, lk_bits;
+
+    /*
+     * To start, assume both DONE and PASS, and clear PASS by xor
+     * if CRC-check fails or CRC-check disabled by lock fuse.
+     */
+    r = s->regs[R_STATUS] | done_mask | pass_mask;
+
+    lk_bits = xlnx_efuse_get_row(s->efuse, EFUSE_KEY_CRC_LK_ROW) & lk_mask;
+    if (lk_bits == 0 && xlnx_efuse_k256_check(s->efuse, crc, first)) {
+        pass_mask = 0;
+    }
+
+    s->regs[R_STATUS] = r ^ pass_mask;
+}
+
+static void efuse_data_sync(XlnxVersalEFuseCtrl *s)
+{
+    efuse_status_tbits_sync(s);
+}
+
+static int efuse_lk_spec_cmp(const void *a, const void *b)
+{
+    uint16_t r1 = ((const XlnxEFuseLkSpec *)a)->row;
+    uint16_t r2 = ((const XlnxEFuseLkSpec *)b)->row;
+
+    return (r1 > r2) - (r1 < r2);
+}
+
+static void efuse_lk_spec_sort(XlnxVersalEFuseCtrl *s)
+{
+    XlnxEFuseLkSpec *ary = s->extra_pg0_lock_spec;
+    const uint32_t n8 = s->extra_pg0_lock_n16 * 2;
+    const uint32_t sz  = sizeof(ary[0]);
+    const uint32_t cnt = n8 / sz;
+
+    if (ary && cnt) {
+        qsort(ary, cnt, sz, efuse_lk_spec_cmp);
+    }
+}
+
+static uint32_t efuse_lk_spec_find(XlnxVersalEFuseCtrl *s, uint32_t row)
+{
+    const XlnxEFuseLkSpec *ary = s->extra_pg0_lock_spec;
+    const uint32_t n8  = s->extra_pg0_lock_n16 * 2;
+    const uint32_t sz  = sizeof(ary[0]);
+    const uint32_t cnt = n8 / sz;
+    const XlnxEFuseLkSpec *item = NULL;
+
+    if (ary && cnt) {
+        XlnxEFuseLkSpec k = { .row = row, };
+
+        item = bsearch(&k, ary, cnt, sz, efuse_lk_spec_cmp);
+    }
+
+    return item ? item->lk_bit : 0;
+}
+
+static uint32_t efuse_bit_locked(XlnxVersalEFuseCtrl *s, uint32_t bit)
+{
+    /* Hard-coded locks */
+    static const uint16_t pg0_hard_lock[] = {
+        [4] = EFUSE_GLITCH_DET_WR_LK,
+        [37] = EFUSE_BOOT_ENV_WR_LK,
+
+        [8 ... 11]  = EFUSE_DNA_WR_LK,
+        [12 ... 19] = EFUSE_AES_WR_LK,
+        [20 ... 27] = EFUSE_USER_KEY_0_WR_LK,
+        [28 ... 35] = EFUSE_USER_KEY_1_WR_LK,
+        [64 ... 71] = EFUSE_PPK0_WR_LK,
+        [72 ... 79] = EFUSE_PPK1_WR_LK,
+        [80 ... 87] = EFUSE_PPK2_WR_LK,
+    };
+
+    uint32_t row = FIELD_EX32(bit, EFUSE_PGM_ADDR, ROW);
+    uint32_t lk_bit = ARRAY_GET(pg0_hard_lock, row, 0);
+
+    return lk_bit ? lk_bit : efuse_lk_spec_find(s, row);
+}
+
+static bool efuse_pgm_locked(XlnxVersalEFuseCtrl *s, unsigned int bit)
+{
+
+    unsigned int lock = 1;
+
+    /* Global lock */
+    if (!ARRAY_FIELD_EX32(s->regs, CFG, PGM_EN)) {
+        goto ret_lock;
+    }
+
+    /* Row lock */
+    switch (FIELD_EX32(bit, EFUSE_PGM_ADDR, PAGE)) {
+    case 0:
+        if (ARRAY_FIELD_EX32(s->regs, EFUSE_PGM_LOCK, SPK_ID_LOCK) &&
+            bit >= EFUSE_PGM_LOCKED_START && bit <= EFUSE_PGM_LOCKED_END) {
+            goto ret_lock;
+        }
+
+        lock = efuse_bit_locked(s, bit);
+        break;
+    case EFUSE_PUF_PAGE:
+        if (bit < EFUSE_PUF_SYN_START || bit > EFUSE_PUF_SYN_END) {
+            lock = 0;
+            goto ret_lock;
+        }
+
+        lock = EFUSE_PUF_SYN_LK;
+        break;
+    default:
+        lock = 0;
+        goto ret_lock;
+    }
+
+    /* Row lock by an efuse bit */
+    if (lock) {
+        lock = xlnx_efuse_get_bit(s->efuse, lock);
+    }
+
+ ret_lock:
+    return lock != 0;
+}
+
+static void efuse_pgm_addr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    unsigned bit = val64;
+    bool ok = false;
+
+    /* Always zero out PGM_ADDR because it is write-only */
+    s->regs[R_EFUSE_PGM_ADDR] = 0;
+
+    /*
+     * Indicate error if bit is write-protected (or read-only
+     * as guarded by efuse_set_bit()).
+     *
+     * Keep it simple by not modeling program timing.
+     *
+     * Note: model must NEVER clear the PGM_ERROR bit; it is
+     *       up to guest to do so (or by reset).
+     */
+    if (efuse_pgm_locked(s, bit)) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Denied setting of efuse<%u, %u, %u>\n",
+                      path,
+                      FIELD_EX32(bit, EFUSE_PGM_ADDR, PAGE),
+                      FIELD_EX32(bit, EFUSE_PGM_ADDR, ROW),
+                      FIELD_EX32(bit, EFUSE_PGM_ADDR, COLUMN));
+    } else if (xlnx_efuse_set_bit(s->efuse, bit)) {
+        ok = true;
+        if (EFUSE_TBIT_POS(bit)) {
+            efuse_status_tbits_sync(s);
+        }
+    }
+
+    if (!ok) {
+        ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 1);
+    }
+
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_DONE, 1);
+    efuse_imr_update_irq(s);
+}
+
+static void efuse_rd_addr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+    unsigned bit = val64;
+    bool denied;
+
+    /* Always zero out RD_ADDR because it is write-only */
+    s->regs[R_EFUSE_RD_ADDR] = 0;
+
+    /*
+     * Indicate error if row is read-blocked.
+     *
+     * Note: model must NEVER clear the RD_ERROR bit; it is
+     *       up to guest to do so (or by reset).
+     */
+    s->regs[R_EFUSE_RD_DATA] = xlnx_versal_efuse_read_row(s->efuse,
+                                                          bit, &denied);
+    if (denied) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Denied reading of efuse<%u, %u>\n",
+                      path,
+                      FIELD_EX32(bit, EFUSE_RD_ADDR, PAGE),
+                      FIELD_EX32(bit, EFUSE_RD_ADDR, ROW));
+
+        ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 1);
+    }
+
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 1);
+    efuse_imr_update_irq(s);
+    return;
+}
+
+static uint64_t efuse_cache_load_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+
+    if (val64 & R_EFUSE_CACHE_LOAD_LOAD_MASK) {
+        efuse_data_sync(s);
+
+        ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1);
+        efuse_imr_update_irq(s);
+    }
+
+    return 0;
+}
+
+static uint64_t efuse_pgm_lock_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(reg->opaque);
+
+    /* Ignore all other bits */
+    val64 = FIELD_EX32(val64, EFUSE_PGM_LOCK, SPK_ID_LOCK);
+
+    /* Once the bit is written 1, only reset will clear it to 0 */
+    val64 |= ARRAY_FIELD_EX32(s->regs, EFUSE_PGM_LOCK, SPK_ID_LOCK);
+
+    return val64;
+}
+
+static void efuse_aes_crc_postw(RegisterInfo *reg, uint64_t val64)
+{
+    efuse_key_crc_check(reg, val64,
+                        R_STATUS_AES_CRC_PASS_MASK,
+                        R_STATUS_AES_CRC_DONE_MASK,
+                        EFUSE_AES_KEY_START,
+                        EFUSE_AES_KEY_CRC_LK_MASK);
+}
+
+static void efuse_aes_u0_crc_postw(RegisterInfo *reg, uint64_t val64)
+{
+    efuse_key_crc_check(reg, val64,
+                        R_STATUS_AES_USER_KEY_0_CRC_PASS_MASK,
+                        R_STATUS_AES_USER_KEY_0_CRC_DONE_MASK,
+                        EFUSE_USER_KEY_0_START,
+                        EFUSE_USER_KEY_0_CRC_LK_MASK);
+}
+
+static void efuse_aes_u1_crc_postw(RegisterInfo *reg, uint64_t val64)
+{
+    efuse_key_crc_check(reg, val64,
+                        R_STATUS_AES_USER_KEY_1_CRC_PASS_MASK,
+                        R_STATUS_AES_USER_KEY_1_CRC_DONE_MASK,
+                        EFUSE_USER_KEY_1_START,
+                        EFUSE_USER_KEY_1_CRC_LK_MASK);
+}
+
+static uint64_t efuse_wr_lock_prew(RegisterInfo *reg, uint64_t val)
+{
+    return val != R_WR_LOCK_UNLOCK_PASSCODE;
+}
+
+static const RegisterAccessInfo efuse_ctrl_regs_info[] = {
+    {   .name = "WR_LOCK",  .addr = A_WR_LOCK,
+        .reset = 0x1,
+        .pre_write = efuse_wr_lock_prew,
+    },{ .name = "CFG",  .addr = A_CFG,
+        .rsvd = 0x9,
+    },{ .name = "STATUS",  .addr = A_STATUS,
+        .rsvd = 0x8,
+        .ro = 0xfff,
+    },{ .name = "EFUSE_PGM_ADDR",  .addr = A_EFUSE_PGM_ADDR,
+        .post_write = efuse_pgm_addr_postw,
+    },{ .name = "EFUSE_RD_ADDR",  .addr = A_EFUSE_RD_ADDR,
+        .rsvd = 0x1f,
+        .post_write = efuse_rd_addr_postw,
+    },{ .name = "EFUSE_RD_DATA",  .addr = A_EFUSE_RD_DATA,
+        .ro = 0xffffffff,
+    },{ .name = "TPGM",  .addr = A_TPGM,
+    },{ .name = "TRD",  .addr = A_TRD,
+        .reset = 0x19,
+    },{ .name = "TSU_H_PS",  .addr = A_TSU_H_PS,
+        .reset = 0xff,
+    },{ .name = "TSU_H_PS_CS",  .addr = A_TSU_H_PS_CS,
+        .reset = 0x11,
+    },{ .name = "TRDM",  .addr = A_TRDM,
+        .reset = 0x3a,
+    },{ .name = "TSU_H_CS",  .addr = A_TSU_H_CS,
+        .reset = 0x16,
+    },{ .name = "EFUSE_ISR",  .addr = A_EFUSE_ISR,
+        .rsvd = 0x7fff8000,
+        .w1c = 0x80007fff,
+        .post_write = efuse_isr_postw,
+    },{ .name = "EFUSE_IMR",  .addr = A_EFUSE_IMR,
+        .reset = 0x80007fff,
+        .rsvd = 0x7fff8000,
+        .ro = 0xffffffff,
+    },{ .name = "EFUSE_IER",  .addr = A_EFUSE_IER,
+        .rsvd = 0x7fff8000,
+        .pre_write = efuse_ier_prew,
+    },{ .name = "EFUSE_IDR",  .addr = A_EFUSE_IDR,
+        .rsvd = 0x7fff8000,
+        .pre_write = efuse_idr_prew,
+    },{ .name = "EFUSE_CACHE_LOAD",  .addr = A_EFUSE_CACHE_LOAD,
+        .pre_write = efuse_cache_load_prew,
+    },{ .name = "EFUSE_PGM_LOCK",  .addr = A_EFUSE_PGM_LOCK,
+        .pre_write = efuse_pgm_lock_prew,
+    },{ .name = "EFUSE_AES_CRC",  .addr = A_EFUSE_AES_CRC,
+        .post_write = efuse_aes_crc_postw,
+    },{ .name = "EFUSE_AES_USR_KEY0_CRC",  .addr = A_EFUSE_AES_USR_KEY0_CRC,
+        .post_write = efuse_aes_u0_crc_postw,
+    },{ .name = "EFUSE_AES_USR_KEY1_CRC",  .addr = A_EFUSE_AES_USR_KEY1_CRC,
+        .post_write = efuse_aes_u1_crc_postw,
+    },{ .name = "EFUSE_PD",  .addr = A_EFUSE_PD,
+        .ro = 0xfffffffe,
+    },{ .name = "EFUSE_ANLG_OSC_SW_1LP",  .addr = A_EFUSE_ANLG_OSC_SW_1LP,
+    },{ .name = "EFUSE_TEST_CTRL",  .addr = A_EFUSE_TEST_CTRL,
+        .reset = 0x8,
+    }
+};
+
+static void efuse_ctrl_reg_write(void *opaque, hwaddr addr,
+                                 uint64_t data, unsigned size)
+{
+    RegisterInfoArray *reg_array = opaque;
+    XlnxVersalEFuseCtrl *s;
+    Object *dev;
+
+    assert(reg_array != NULL);
+
+    dev = reg_array->mem.owner;
+    assert(dev);
+
+    s = XLNX_VERSAL_EFUSE_CTRL(dev);
+
+    if (addr != A_WR_LOCK && s->regs[R_WR_LOCK]) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s[reg_0x%02lx]: Attempt to write locked register.\n",
+                      path, (long)addr);
+    } else {
+        register_write_memory(opaque, addr, data, size);
+    }
+}
+
+static void efuse_ctrl_register_reset(RegisterInfo *reg)
+{
+    if (!reg->data || !reg->access) {
+        return;
+    }
+
+    /* Reset must not trigger some registers' writers */
+    switch (reg->access->addr) {
+    case A_EFUSE_AES_CRC:
+    case A_EFUSE_AES_USR_KEY0_CRC:
+    case A_EFUSE_AES_USR_KEY1_CRC:
+        *(uint32_t *)reg->data = reg->access->reset;
+        return;
+    }
+
+    register_reset(reg);
+}
+
+static void efuse_ctrl_reset(DeviceState *dev)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        efuse_ctrl_register_reset(&s->regs_info[i]);
+    }
+
+    efuse_anchor_bits_check(s);
+    efuse_data_sync(s);
+    efuse_imr_update_irq(s);
+}
+
+static const MemoryRegionOps efuse_ctrl_ops = {
+    .read = register_read_memory,
+    .write = efuse_ctrl_reg_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void efuse_ctrl_realize(DeviceState *dev, Error **errp)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev);
+    const uint32_t lks_sz = sizeof(XlnxEFuseLkSpec) / 2;
+
+    if (!s->efuse) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        error_setg(errp, "%s.efuse: link property not connected to XLNX-EFUSE",
+                   path);
+        return;
+    }
+
+    /* Sort property-defined pgm-locks for bsearch lookup */
+    if ((s->extra_pg0_lock_n16 % lks_sz) != 0) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        error_setg(errp,
+                   "%s.pg0-lock: array property item-count not multiple of %u",
+                   path, lks_sz);
+        return;
+    }
+
+    efuse_lk_spec_sort(s);
+}
+
+static void efuse_ctrl_init(Object *obj)
+{
+    XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    RegisterInfoArray *reg_array;
+
+    reg_array =
+        register_init_block32(DEVICE(obj), efuse_ctrl_regs_info,
+                              ARRAY_SIZE(efuse_ctrl_regs_info),
+                              s->regs_info, s->regs,
+                              &efuse_ctrl_ops,
+                              XLNX_VERSAL_EFUSE_CTRL_ERR_DEBUG,
+                              R_MAX * 4);
+
+    sysbus_init_mmio(sbd, &reg_array->mem);
+    sysbus_init_irq(sbd, &s->irq_efuse_imr);
+}
+
+static const VMStateDescription vmstate_efuse_ctrl = {
+    .name = TYPE_XLNX_VERSAL_EFUSE_CTRL,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, XlnxVersalEFuseCtrl, R_MAX),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property efuse_ctrl_props[] = {
+    DEFINE_PROP_LINK("efuse",
+                     XlnxVersalEFuseCtrl, efuse,
+                     TYPE_XLNX_EFUSE, XlnxEFuse *),
+    DEFINE_PROP_ARRAY("pg0-lock",
+                      XlnxVersalEFuseCtrl, extra_pg0_lock_n16,
+                      extra_pg0_lock_spec, qdev_prop_uint16, uint16_t),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void efuse_ctrl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = efuse_ctrl_reset;
+    dc->realize = efuse_ctrl_realize;
+    dc->vmsd = &vmstate_efuse_ctrl;
+    device_class_set_props(dc, efuse_ctrl_props);
+}
+
+static const TypeInfo efuse_ctrl_info = {
+    .name          = TYPE_XLNX_VERSAL_EFUSE_CTRL,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxVersalEFuseCtrl),
+    .class_init    = efuse_ctrl_class_init,
+    .instance_init = efuse_ctrl_init,
+};
+
+static void efuse_ctrl_register_types(void)
+{
+    type_register_static(&efuse_ctrl_info);
+}
+
+type_init(efuse_ctrl_register_types)
+
+/*
+ * Retrieve a row, with unreadable bits returned as 0.
+ */
+uint32_t xlnx_versal_efuse_read_row(XlnxEFuse *efuse,
+                                    uint32_t bit, bool *denied)
+{
+    bool dummy;
+
+    if (!denied) {
+        denied = &dummy;
+    }
+
+    if (bit >= EFUSE_RD_BLOCKED_START && bit <= EFUSE_RD_BLOCKED_END) {
+        *denied = true;
+        return 0;
+    }
+
+    *denied = false;
+    return xlnx_efuse_get_row(efuse, bit);
+}
diff --git a/hw/nvram/xlnx-zynqmp-efuse.c b/hw/nvram/xlnx-zynqmp-efuse.c
new file mode 100644
index 00000000000..228ba0bbfaf
--- /dev/null
+++ b/hw/nvram/xlnx-zynqmp-efuse.c
@@ -0,0 +1,861 @@
+/*
+ * QEMU model of the ZynqMP eFuse
+ *
+ * Copyright (c) 2015 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-zynqmp-efuse.h"
+
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+
+#ifndef ZYNQMP_EFUSE_ERR_DEBUG
+#define ZYNQMP_EFUSE_ERR_DEBUG 0
+#endif
+
+REG32(WR_LOCK, 0x0)
+    FIELD(WR_LOCK, LOCK, 0, 16)
+REG32(CFG, 0x4)
+    FIELD(CFG, SLVERR_ENABLE, 5, 1)
+    FIELD(CFG, MARGIN_RD, 2, 2)
+    FIELD(CFG, PGM_EN, 1, 1)
+    FIELD(CFG, EFUSE_CLK_SEL, 0, 1)
+REG32(STATUS, 0x8)
+    FIELD(STATUS, AES_CRC_PASS, 7, 1)
+    FIELD(STATUS, AES_CRC_DONE, 6, 1)
+    FIELD(STATUS, CACHE_DONE, 5, 1)
+    FIELD(STATUS, CACHE_LOAD, 4, 1)
+    FIELD(STATUS, EFUSE_3_TBIT, 2, 1)
+    FIELD(STATUS, EFUSE_2_TBIT, 1, 1)
+    FIELD(STATUS, EFUSE_0_TBIT, 0, 1)
+REG32(EFUSE_PGM_ADDR, 0xc)
+    FIELD(EFUSE_PGM_ADDR, EFUSE, 11, 2)
+    FIELD(EFUSE_PGM_ADDR, ROW, 5, 6)
+    FIELD(EFUSE_PGM_ADDR, COLUMN, 0, 5)
+REG32(EFUSE_RD_ADDR, 0x10)
+    FIELD(EFUSE_RD_ADDR, EFUSE, 11, 2)
+    FIELD(EFUSE_RD_ADDR, ROW, 5, 6)
+REG32(EFUSE_RD_DATA, 0x14)
+REG32(TPGM, 0x18)
+    FIELD(TPGM, VALUE, 0, 16)
+REG32(TRD, 0x1c)
+    FIELD(TRD, VALUE, 0, 8)
+REG32(TSU_H_PS, 0x20)
+    FIELD(TSU_H_PS, VALUE, 0, 8)
+REG32(TSU_H_PS_CS, 0x24)
+    FIELD(TSU_H_PS_CS, VALUE, 0, 8)
+REG32(TSU_H_CS, 0x2c)
+    FIELD(TSU_H_CS, VALUE, 0, 4)
+REG32(EFUSE_ISR, 0x30)
+    FIELD(EFUSE_ISR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_ISR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_ISR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_ISR, RD_DONE, 2, 1)
+    FIELD(EFUSE_ISR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_ISR, PGM_DONE, 0, 1)
+REG32(EFUSE_IMR, 0x34)
+    FIELD(EFUSE_IMR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IMR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IMR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IMR, RD_DONE, 2, 1)
+    FIELD(EFUSE_IMR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IMR, PGM_DONE, 0, 1)
+REG32(EFUSE_IER, 0x38)
+    FIELD(EFUSE_IER, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IER, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IER, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IER, RD_DONE, 2, 1)
+    FIELD(EFUSE_IER, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IER, PGM_DONE, 0, 1)
+REG32(EFUSE_IDR, 0x3c)
+    FIELD(EFUSE_IDR, APB_SLVERR, 31, 1)
+    FIELD(EFUSE_IDR, CACHE_ERROR, 4, 1)
+    FIELD(EFUSE_IDR, RD_ERROR, 3, 1)
+    FIELD(EFUSE_IDR, RD_DONE, 2, 1)
+    FIELD(EFUSE_IDR, PGM_ERROR, 1, 1)
+    FIELD(EFUSE_IDR, PGM_DONE, 0, 1)
+REG32(EFUSE_CACHE_LOAD, 0x40)
+    FIELD(EFUSE_CACHE_LOAD, LOAD, 0, 1)
+REG32(EFUSE_PGM_LOCK, 0x44)
+    FIELD(EFUSE_PGM_LOCK, SPK_ID_LOCK, 0, 1)
+REG32(EFUSE_AES_CRC, 0x48)
+REG32(EFUSE_TBITS_PRGRMG_EN, 0x100)
+    FIELD(EFUSE_TBITS_PRGRMG_EN, TBITS_PRGRMG_EN, 3, 1)
+REG32(DNA_0, 0x100c)
+REG32(DNA_1, 0x1010)
+REG32(DNA_2, 0x1014)
+REG32(IPDISABLE, 0x1018)
+    FIELD(IPDISABLE, VCU_DIS, 8, 1)
+    FIELD(IPDISABLE, GPU_DIS, 5, 1)
+    FIELD(IPDISABLE, APU3_DIS, 3, 1)
+    FIELD(IPDISABLE, APU2_DIS, 2, 1)
+    FIELD(IPDISABLE, APU1_DIS, 1, 1)
+    FIELD(IPDISABLE, APU0_DIS, 0, 1)
+REG32(SYSOSC_CTRL, 0x101c)
+    FIELD(SYSOSC_CTRL, SYSOSC_EN, 0, 1)
+REG32(USER_0, 0x1020)
+REG32(USER_1, 0x1024)
+REG32(USER_2, 0x1028)
+REG32(USER_3, 0x102c)
+REG32(USER_4, 0x1030)
+REG32(USER_5, 0x1034)
+REG32(USER_6, 0x1038)
+REG32(USER_7, 0x103c)
+REG32(MISC_USER_CTRL, 0x1040)
+    FIELD(MISC_USER_CTRL, FPD_SC_EN_0, 14, 1)
+    FIELD(MISC_USER_CTRL, LPD_SC_EN_0, 11, 1)
+    FIELD(MISC_USER_CTRL, LBIST_EN, 10, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_7, 7, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_6, 6, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_5, 5, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_4, 4, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_3, 3, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_2, 2, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_1, 1, 1)
+    FIELD(MISC_USER_CTRL, USR_WRLK_0, 0, 1)
+REG32(ROM_RSVD, 0x1044)
+    FIELD(ROM_RSVD, PBR_BOOT_ERROR, 0, 3)
+REG32(PUF_CHASH, 0x1050)
+REG32(PUF_MISC, 0x1054)
+    FIELD(PUF_MISC, REGISTER_DIS, 31, 1)
+    FIELD(PUF_MISC, SYN_WRLK, 30, 1)
+    FIELD(PUF_MISC, SYN_INVLD, 29, 1)
+    FIELD(PUF_MISC, TEST2_DIS, 28, 1)
+    FIELD(PUF_MISC, UNUSED27, 27, 1)
+    FIELD(PUF_MISC, UNUSED26, 26, 1)
+    FIELD(PUF_MISC, UNUSED25, 25, 1)
+    FIELD(PUF_MISC, UNUSED24, 24, 1)
+    FIELD(PUF_MISC, AUX, 0, 24)
+REG32(SEC_CTRL, 0x1058)
+    FIELD(SEC_CTRL, PPK1_INVLD, 30, 2)
+    FIELD(SEC_CTRL, PPK1_WRLK, 29, 1)
+    FIELD(SEC_CTRL, PPK0_INVLD, 27, 2)
+    FIELD(SEC_CTRL, PPK0_WRLK, 26, 1)
+    FIELD(SEC_CTRL, RSA_EN, 11, 15)
+    FIELD(SEC_CTRL, SEC_LOCK, 10, 1)
+    FIELD(SEC_CTRL, PROG_GATE_2, 9, 1)
+    FIELD(SEC_CTRL, PROG_GATE_1, 8, 1)
+    FIELD(SEC_CTRL, PROG_GATE_0, 7, 1)
+    FIELD(SEC_CTRL, DFT_DIS, 6, 1)
+    FIELD(SEC_CTRL, JTAG_DIS, 5, 1)
+    FIELD(SEC_CTRL, ERROR_DIS, 4, 1)
+    FIELD(SEC_CTRL, BBRAM_DIS, 3, 1)
+    FIELD(SEC_CTRL, ENC_ONLY, 2, 1)
+    FIELD(SEC_CTRL, AES_WRLK, 1, 1)
+    FIELD(SEC_CTRL, AES_RDLK, 0, 1)
+REG32(SPK_ID, 0x105c)
+REG32(PPK0_0, 0x10a0)
+REG32(PPK0_1, 0x10a4)
+REG32(PPK0_2, 0x10a8)
+REG32(PPK0_3, 0x10ac)
+REG32(PPK0_4, 0x10b0)
+REG32(PPK0_5, 0x10b4)
+REG32(PPK0_6, 0x10b8)
+REG32(PPK0_7, 0x10bc)
+REG32(PPK0_8, 0x10c0)
+REG32(PPK0_9, 0x10c4)
+REG32(PPK0_10, 0x10c8)
+REG32(PPK0_11, 0x10cc)
+REG32(PPK1_0, 0x10d0)
+REG32(PPK1_1, 0x10d4)
+REG32(PPK1_2, 0x10d8)
+REG32(PPK1_3, 0x10dc)
+REG32(PPK1_4, 0x10e0)
+REG32(PPK1_5, 0x10e4)
+REG32(PPK1_6, 0x10e8)
+REG32(PPK1_7, 0x10ec)
+REG32(PPK1_8, 0x10f0)
+REG32(PPK1_9, 0x10f4)
+REG32(PPK1_10, 0x10f8)
+REG32(PPK1_11, 0x10fc)
+
+#define BIT_POS(ROW, COLUMN)    (ROW * 32 + COLUMN)
+#define R_MAX (R_PPK1_11 + 1)
+
+/* #define EFUSE_XOSC            26 */
+
+/*
+ * eFUSE layout references:
+ *   ZynqMP: UG1085 (v2.1) August 21, 2019, p.277, Table 12-13
+ */
+#define EFUSE_AES_RDLK        BIT_POS(22, 0)
+#define EFUSE_AES_WRLK        BIT_POS(22, 1)
+#define EFUSE_ENC_ONLY        BIT_POS(22, 2)
+#define EFUSE_BBRAM_DIS       BIT_POS(22, 3)
+#define EFUSE_ERROR_DIS       BIT_POS(22, 4)
+#define EFUSE_JTAG_DIS        BIT_POS(22, 5)
+#define EFUSE_DFT_DIS         BIT_POS(22, 6)
+#define EFUSE_PROG_GATE_0     BIT_POS(22, 7)
+#define EFUSE_PROG_GATE_1     BIT_POS(22, 7)
+#define EFUSE_PROG_GATE_2     BIT_POS(22, 9)
+#define EFUSE_SEC_LOCK        BIT_POS(22, 10)
+#define EFUSE_RSA_EN          BIT_POS(22, 11)
+#define EFUSE_RSA_EN14        BIT_POS(22, 25)
+#define EFUSE_PPK0_WRLK       BIT_POS(22, 26)
+#define EFUSE_PPK0_INVLD      BIT_POS(22, 27)
+#define EFUSE_PPK0_INVLD_1    BIT_POS(22, 28)
+#define EFUSE_PPK1_WRLK       BIT_POS(22, 29)
+#define EFUSE_PPK1_INVLD      BIT_POS(22, 30)
+#define EFUSE_PPK1_INVLD_1    BIT_POS(22, 31)
+
+/* Areas.  */
+#define EFUSE_TRIM_START      BIT_POS(1, 0)
+#define EFUSE_TRIM_END        BIT_POS(1, 30)
+#define EFUSE_DNA_START       BIT_POS(3, 0)
+#define EFUSE_DNA_END         BIT_POS(5, 31)
+#define EFUSE_AES_START       BIT_POS(24, 0)
+#define EFUSE_AES_END         BIT_POS(31, 31)
+#define EFUSE_ROM_START       BIT_POS(17, 0)
+#define EFUSE_ROM_END         BIT_POS(17, 31)
+#define EFUSE_IPDIS_START     BIT_POS(6, 0)
+#define EFUSE_IPDIS_END       BIT_POS(6, 31)
+#define EFUSE_USER_START      BIT_POS(8, 0)
+#define EFUSE_USER_END        BIT_POS(15, 31)
+#define EFUSE_BISR_START      BIT_POS(32, 0)
+#define EFUSE_BISR_END        BIT_POS(39, 31)
+
+#define EFUSE_USER_CTRL_START BIT_POS(16, 0)
+#define EFUSE_USER_CTRL_END   BIT_POS(16, 16)
+#define EFUSE_USER_CTRL_MASK  ((uint32_t)MAKE_64BIT_MASK(0, 17))
+
+#define EFUSE_PUF_CHASH_START BIT_POS(20, 0)
+#define EFUSE_PUF_CHASH_END   BIT_POS(20, 31)
+#define EFUSE_PUF_MISC_START  BIT_POS(21, 0)
+#define EFUSE_PUF_MISC_END    BIT_POS(21, 31)
+#define EFUSE_PUF_SYN_WRLK    BIT_POS(21, 30)
+
+#define EFUSE_SPK_START       BIT_POS(23, 0)
+#define EFUSE_SPK_END         BIT_POS(23, 31)
+
+#define EFUSE_PPK0_START      BIT_POS(40, 0)
+#define EFUSE_PPK0_END        BIT_POS(51, 31)
+#define EFUSE_PPK1_START      BIT_POS(52, 0)
+#define EFUSE_PPK1_END        BIT_POS(63, 31)
+
+#define EFUSE_CACHE_FLD(s, reg, field) \
+    ARRAY_FIELD_DP32((s)->regs, reg, field, \
+                     (xlnx_efuse_get_row((s->efuse), EFUSE_ ## field) \
+                      >> (EFUSE_ ## field % 32)))
+
+#define EFUSE_CACHE_BIT(s, reg, field) \
+    ARRAY_FIELD_DP32((s)->regs, reg, field, xlnx_efuse_get_bit((s->efuse), \
+                EFUSE_ ## field))
+
+#define FBIT_UNKNOWN (~0)
+
+QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxZynqMPEFuse *)0)->regs));
+
+static void update_tbit_status(XlnxZynqMPEFuse *s)
+{
+    unsigned int check = xlnx_efuse_tbits_check(s->efuse);
+    uint32_t val = s->regs[R_STATUS];
+
+    val = FIELD_DP32(val, STATUS, EFUSE_0_TBIT, !!(check & (1 << 0)));
+    val = FIELD_DP32(val, STATUS, EFUSE_2_TBIT, !!(check & (1 << 1)));
+    val = FIELD_DP32(val, STATUS, EFUSE_3_TBIT, !!(check & (1 << 2)));
+
+    s->regs[R_STATUS] = val;
+}
+
+/* Update the u32 array from efuse bits. Slow but simple approach.  */
+static void cache_sync_u32(XlnxZynqMPEFuse *s, unsigned int r_start,
+                           unsigned int f_start, unsigned int f_end,
+                           unsigned int f_written)
+{
+    uint32_t *u32 = &s->regs[r_start];
+    unsigned int fbit, wbits = 0, u32_off = 0;
+
+    /* Avoid working on bits that are not relevant.  */
+    if (f_written != FBIT_UNKNOWN
+        && (f_written < f_start || f_written > f_end)) {
+        return;
+    }
+
+    for (fbit = f_start; fbit <= f_end; fbit++, wbits++) {
+        if (wbits == 32) {
+            /* Update the key offset.  */
+            u32_off += 1;
+            wbits = 0;
+        }
+        u32[u32_off] |= xlnx_efuse_get_bit(s->efuse, fbit) << wbits;
+    }
+}
+
+/*
+ * Keep the syncs in bit order so we can bail out for the
+ * slower ones.
+ */
+static void zynqmp_efuse_sync_cache(XlnxZynqMPEFuse *s, unsigned int bit)
+{
+    EFUSE_CACHE_BIT(s, SEC_CTRL, AES_RDLK);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, AES_WRLK);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, ENC_ONLY);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, BBRAM_DIS);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, ERROR_DIS);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, JTAG_DIS);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, DFT_DIS);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_0);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_1);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, PROG_GATE_2);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, SEC_LOCK);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, PPK0_WRLK);
+    EFUSE_CACHE_BIT(s, SEC_CTRL, PPK1_WRLK);
+
+    EFUSE_CACHE_FLD(s, SEC_CTRL, RSA_EN);
+    EFUSE_CACHE_FLD(s, SEC_CTRL, PPK0_INVLD);
+    EFUSE_CACHE_FLD(s, SEC_CTRL, PPK1_INVLD);
+
+    /* Update the tbits.  */
+    update_tbit_status(s);
+
+    /* Sync the various areas.  */
+    s->regs[R_MISC_USER_CTRL] = xlnx_efuse_get_row(s->efuse,
+                                                   EFUSE_USER_CTRL_START)
+                                & EFUSE_USER_CTRL_MASK;
+    s->regs[R_PUF_CHASH] = xlnx_efuse_get_row(s->efuse, EFUSE_PUF_CHASH_START);
+    s->regs[R_PUF_MISC]  = xlnx_efuse_get_row(s->efuse, EFUSE_PUF_MISC_START);
+
+    cache_sync_u32(s, R_DNA_0, EFUSE_DNA_START, EFUSE_DNA_END, bit);
+
+    if (bit < EFUSE_AES_START) {
+        return;
+    }
+
+    cache_sync_u32(s, R_ROM_RSVD, EFUSE_ROM_START, EFUSE_ROM_END, bit);
+    cache_sync_u32(s, R_IPDISABLE, EFUSE_IPDIS_START, EFUSE_IPDIS_END, bit);
+    cache_sync_u32(s, R_USER_0, EFUSE_USER_START, EFUSE_USER_END, bit);
+    cache_sync_u32(s, R_SPK_ID, EFUSE_SPK_START, EFUSE_SPK_END, bit);
+    cache_sync_u32(s, R_PPK0_0, EFUSE_PPK0_START, EFUSE_PPK0_END, bit);
+    cache_sync_u32(s, R_PPK1_0, EFUSE_PPK1_START, EFUSE_PPK1_END, bit);
+}
+
+static void zynqmp_efuse_update_irq(XlnxZynqMPEFuse *s)
+{
+    bool pending = s->regs[R_EFUSE_ISR] & s->regs[R_EFUSE_IMR];
+    qemu_set_irq(s->irq, pending);
+}
+
+static void zynqmp_efuse_isr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    zynqmp_efuse_update_irq(s);
+}
+
+static uint64_t zynqmp_efuse_ier_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_EFUSE_IMR] |= val;
+    zynqmp_efuse_update_irq(s);
+    return 0;
+}
+
+static uint64_t zynqmp_efuse_idr_prew(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    uint32_t val = val64;
+
+    s->regs[R_EFUSE_IMR] &= ~val;
+    zynqmp_efuse_update_irq(s);
+    return 0;
+}
+
+static void zynqmp_efuse_pgm_addr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    unsigned bit = val64;
+    unsigned page = FIELD_EX32(bit, EFUSE_PGM_ADDR, EFUSE);
+    bool puf_prot = false;
+    const char *errmsg = NULL;
+
+    /* Allow only valid array, and adjust for skipped array 1 */
+    switch (page) {
+    case 0:
+        break;
+    case 2 ... 3:
+        bit = FIELD_DP32(bit, EFUSE_PGM_ADDR, EFUSE, page - 1);
+        puf_prot = xlnx_efuse_get_bit(s->efuse, EFUSE_PUF_SYN_WRLK);
+        break;
+    default:
+        errmsg = "Invalid address";
+        goto pgm_done;
+    }
+
+    if (ARRAY_FIELD_EX32(s->regs, WR_LOCK, LOCK)) {
+        errmsg = "Array write-locked";
+        goto pgm_done;
+    }
+
+    if (!ARRAY_FIELD_EX32(s->regs, CFG, PGM_EN)) {
+        errmsg = "Array pgm-disabled";
+        goto pgm_done;
+    }
+
+    if (puf_prot) {
+        errmsg = "PUF_HD-store write-locked";
+        goto pgm_done;
+    }
+
+    if (ARRAY_FIELD_EX32(s->regs, SEC_CTRL, AES_WRLK)
+        && bit >= EFUSE_AES_START && bit <= EFUSE_AES_END) {
+        errmsg = "AES key-store Write-locked";
+        goto pgm_done;
+    }
+
+    if (!xlnx_efuse_set_bit(s->efuse, bit)) {
+        errmsg = "Write failed";
+    }
+
+ pgm_done:
+    if (!errmsg) {
+        ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 0);
+    } else {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_ERROR, 1);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s - eFuse write error: %s; addr=0x%x\n",
+                      path, errmsg, (unsigned)val64);
+    }
+
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, PGM_DONE, 1);
+    zynqmp_efuse_update_irq(s);
+}
+
+static void zynqmp_efuse_rd_addr_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    g_autofree char *path = NULL;
+
+    /*
+     * Grant reads only to allowed bits; reference sources:
+     * 1/ XilSKey - XilSKey_ZynqMp_EfusePs_ReadRow()
+     * 2/ UG1085, v2.0, table 12-13
+     * (note: enumerates the masks as <first, last> per described in
+     *  references to avoid mental translation).
+     */
+#define COL_MASK(L_, H_) \
+    ((uint32_t)MAKE_64BIT_MASK((L_), (1 + (H_) - (L_))))
+
+    static const uint32_t ary0_col_mask[] = {
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_TBITS_ROW */
+        [0]  = COL_MASK(28, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_USR{0:7}_FUSE_ROW */
+        [8]  = COL_MASK(0, 31), [9]  = COL_MASK(0, 31),
+        [10] = COL_MASK(0, 31), [11] = COL_MASK(0, 31),
+        [12] = COL_MASK(0, 31), [13] = COL_MASK(0, 31),
+        [14] = COL_MASK(0, 31), [15] = COL_MASK(0, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_MISC_USR_CTRL_ROW */
+        [16] = COL_MASK(0, 7) | COL_MASK(10, 16),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_PBR_BOOT_ERR_ROW */
+        [17] = COL_MASK(0, 2),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_PUF_CHASH_ROW */
+        [20] = COL_MASK(0, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_PUF_AUX_ROW */
+        [21] = COL_MASK(0, 23) | COL_MASK(29, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_SEC_CTRL_ROW */
+        [22] = COL_MASK(0, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_SPK_ID_ROW */
+        [23] = COL_MASK(0, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_PPK0_START_ROW */
+        [40] = COL_MASK(0, 31), [41] = COL_MASK(0, 31),
+        [42] = COL_MASK(0, 31), [43] = COL_MASK(0, 31),
+        [44] = COL_MASK(0, 31), [45] = COL_MASK(0, 31),
+        [46] = COL_MASK(0, 31), [47] = COL_MASK(0, 31),
+        [48] = COL_MASK(0, 31), [49] = COL_MASK(0, 31),
+        [50] = COL_MASK(0, 31), [51] = COL_MASK(0, 31),
+
+        /* XilSKey - XSK_ZYNQMP_EFUSEPS_PPK1_START_ROW */
+        [52] = COL_MASK(0, 31), [53] = COL_MASK(0, 31),
+        [54] = COL_MASK(0, 31), [55] = COL_MASK(0, 31),
+        [56] = COL_MASK(0, 31), [57] = COL_MASK(0, 31),
+        [58] = COL_MASK(0, 31), [59] = COL_MASK(0, 31),
+        [60] = COL_MASK(0, 31), [61] = COL_MASK(0, 31),
+        [62] = COL_MASK(0, 31), [63] = COL_MASK(0, 31),
+    };
+
+    uint32_t col_mask = COL_MASK(0, 31);
+#undef COL_MASK
+
+    uint32_t efuse_idx = s->regs[R_EFUSE_RD_ADDR];
+    uint32_t efuse_ary = FIELD_EX32(efuse_idx, EFUSE_RD_ADDR, EFUSE);
+    uint32_t efuse_row = FIELD_EX32(efuse_idx, EFUSE_RD_ADDR, ROW);
+
+    switch (efuse_ary) {
+    case 0:     /* Various */
+        if (efuse_row >= ARRAY_SIZE(ary0_col_mask)) {
+            goto denied;
+        }
+
+        col_mask = ary0_col_mask[efuse_row];
+        if (!col_mask) {
+            goto denied;
+        }
+        break;
+    case 2:     /* PUF helper data, adjust for skipped array 1 */
+    case 3:
+        val64 = FIELD_DP32(efuse_idx, EFUSE_RD_ADDR, EFUSE, efuse_ary - 1);
+        break;
+    default:
+        goto denied;
+    }
+
+    s->regs[R_EFUSE_RD_DATA] = xlnx_efuse_get_row(s->efuse, val64) & col_mask;
+
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 0);
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 1);
+    zynqmp_efuse_update_irq(s);
+    return;
+
+ denied:
+    path = object_get_canonical_path(OBJECT(s));
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "%s: Denied efuse read from array %u, row %u\n",
+                  path, efuse_ary, efuse_row);
+
+    s->regs[R_EFUSE_RD_DATA] = 0;
+
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_ERROR, 1);
+    ARRAY_FIELD_DP32(s->regs, EFUSE_ISR, RD_DONE, 0);
+    zynqmp_efuse_update_irq(s);
+}
+
+static void zynqmp_efuse_aes_crc_postw(RegisterInfo *reg, uint64_t val64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+    bool ok;
+
+    ok = xlnx_efuse_k256_check(s->efuse, (uint32_t)val64, EFUSE_AES_START);
+
+    ARRAY_FIELD_DP32(s->regs, STATUS, AES_CRC_PASS, (ok ? 1 : 0));
+    ARRAY_FIELD_DP32(s->regs, STATUS, AES_CRC_DONE, 1);
+
+    s->regs[R_EFUSE_AES_CRC] = 0;   /* crc value is write-only */
+}
+
+static uint64_t zynqmp_efuse_cache_load_prew(RegisterInfo *reg,
+                                             uint64_t valu64)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(reg->opaque);
+
+    if (valu64 & R_EFUSE_CACHE_LOAD_LOAD_MASK) {
+        zynqmp_efuse_sync_cache(s, FBIT_UNKNOWN);
+        ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1);
+        zynqmp_efuse_update_irq(s);
+    }
+
+    return 0;
+}
+
+static uint64_t zynqmp_efuse_wr_lock_prew(RegisterInfo *reg, uint64_t val)
+{
+    return val == 0xDF0D ? 0 : 1;
+}
+
+static RegisterAccessInfo zynqmp_efuse_regs_info[] = {
+    {   .name = "WR_LOCK",  .addr = A_WR_LOCK,
+        .reset = 0x1,
+        .pre_write = zynqmp_efuse_wr_lock_prew,
+    },{ .name = "CFG",  .addr = A_CFG,
+    },{ .name = "STATUS",  .addr = A_STATUS,
+        .rsvd = 0x8,
+        .ro = 0xff,
+    },{ .name = "EFUSE_PGM_ADDR",  .addr = A_EFUSE_PGM_ADDR,
+         .post_write = zynqmp_efuse_pgm_addr_postw
+    },{ .name = "EFUSE_RD_ADDR",  .addr = A_EFUSE_RD_ADDR,
+        .rsvd = 0x1f,
+        .post_write = zynqmp_efuse_rd_addr_postw,
+    },{ .name = "EFUSE_RD_DATA",  .addr = A_EFUSE_RD_DATA,
+        .ro = 0xffffffff,
+    },{ .name = "TPGM",  .addr = A_TPGM,
+    },{ .name = "TRD",  .addr = A_TRD,
+        .reset = 0x1b,
+    },{ .name = "TSU_H_PS",  .addr = A_TSU_H_PS,
+        .reset = 0xff,
+    },{ .name = "TSU_H_PS_CS",  .addr = A_TSU_H_PS_CS,
+        .reset = 0xb,
+    },{ .name = "TSU_H_CS",  .addr = A_TSU_H_CS,
+        .reset = 0x7,
+    },{ .name = "EFUSE_ISR",  .addr = A_EFUSE_ISR,
+        .rsvd = 0x7fffffe0,
+        .w1c = 0x8000001f,
+        .post_write = zynqmp_efuse_isr_postw,
+    },{ .name = "EFUSE_IMR",  .addr = A_EFUSE_IMR,
+        .reset = 0x8000001f,
+        .rsvd = 0x7fffffe0,
+        .ro = 0xffffffff,
+    },{ .name = "EFUSE_IER",  .addr = A_EFUSE_IER,
+        .rsvd = 0x7fffffe0,
+        .pre_write = zynqmp_efuse_ier_prew,
+    },{ .name = "EFUSE_IDR",  .addr = A_EFUSE_IDR,
+        .rsvd = 0x7fffffe0,
+        .pre_write = zynqmp_efuse_idr_prew,
+    },{ .name = "EFUSE_CACHE_LOAD",  .addr = A_EFUSE_CACHE_LOAD,
+        .pre_write = zynqmp_efuse_cache_load_prew,
+    },{ .name = "EFUSE_PGM_LOCK",  .addr = A_EFUSE_PGM_LOCK,
+    },{ .name = "EFUSE_AES_CRC",  .addr = A_EFUSE_AES_CRC,
+        .post_write = zynqmp_efuse_aes_crc_postw,
+    },{ .name = "EFUSE_TBITS_PRGRMG_EN",  .addr = A_EFUSE_TBITS_PRGRMG_EN,
+        .reset = R_EFUSE_TBITS_PRGRMG_EN_TBITS_PRGRMG_EN_MASK,
+    },{ .name = "DNA_0",  .addr = A_DNA_0,
+        .ro = 0xffffffff,
+    },{ .name = "DNA_1",  .addr = A_DNA_1,
+        .ro = 0xffffffff,
+    },{ .name = "DNA_2",  .addr = A_DNA_2,
+        .ro = 0xffffffff,
+    },{ .name = "IPDISABLE",  .addr = A_IPDISABLE,
+        .ro = 0xffffffff,
+    },{ .name = "SYSOSC_CTRL",  .addr = A_SYSOSC_CTRL,
+        .ro = 0xffffffff,
+    },{ .name = "USER_0",  .addr = A_USER_0,
+        .ro = 0xffffffff,
+    },{ .name = "USER_1",  .addr = A_USER_1,
+        .ro = 0xffffffff,
+    },{ .name = "USER_2",  .addr = A_USER_2,
+        .ro = 0xffffffff,
+    },{ .name = "USER_3",  .addr = A_USER_3,
+        .ro = 0xffffffff,
+    },{ .name = "USER_4",  .addr = A_USER_4,
+        .ro = 0xffffffff,
+    },{ .name = "USER_5",  .addr = A_USER_5,
+        .ro = 0xffffffff,
+    },{ .name = "USER_6",  .addr = A_USER_6,
+        .ro = 0xffffffff,
+    },{ .name = "USER_7",  .addr = A_USER_7,
+        .ro = 0xffffffff,
+    },{ .name = "MISC_USER_CTRL",  .addr = A_MISC_USER_CTRL,
+        .ro = 0xffffffff,
+    },{ .name = "ROM_RSVD",  .addr = A_ROM_RSVD,
+        .ro = 0xffffffff,
+    },{ .name = "PUF_CHASH", .addr = A_PUF_CHASH,
+        .ro = 0xffffffff,
+    },{ .name = "PUF_MISC",  .addr = A_PUF_MISC,
+        .ro = 0xffffffff,
+    },{ .name = "SEC_CTRL",  .addr = A_SEC_CTRL,
+        .ro = 0xffffffff,
+    },{ .name = "SPK_ID",  .addr = A_SPK_ID,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_0",  .addr = A_PPK0_0,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_1",  .addr = A_PPK0_1,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_2",  .addr = A_PPK0_2,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_3",  .addr = A_PPK0_3,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_4",  .addr = A_PPK0_4,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_5",  .addr = A_PPK0_5,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_6",  .addr = A_PPK0_6,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_7",  .addr = A_PPK0_7,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_8",  .addr = A_PPK0_8,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_9",  .addr = A_PPK0_9,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_10",  .addr = A_PPK0_10,
+        .ro = 0xffffffff,
+    },{ .name = "PPK0_11",  .addr = A_PPK0_11,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_0",  .addr = A_PPK1_0,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_1",  .addr = A_PPK1_1,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_2",  .addr = A_PPK1_2,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_3",  .addr = A_PPK1_3,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_4",  .addr = A_PPK1_4,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_5",  .addr = A_PPK1_5,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_6",  .addr = A_PPK1_6,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_7",  .addr = A_PPK1_7,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_8",  .addr = A_PPK1_8,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_9",  .addr = A_PPK1_9,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_10",  .addr = A_PPK1_10,
+        .ro = 0xffffffff,
+    },{ .name = "PPK1_11",  .addr = A_PPK1_11,
+        .ro = 0xffffffff,
+    }
+};
+
+static void zynqmp_efuse_reg_write(void *opaque, hwaddr addr,
+                                   uint64_t data, unsigned size)
+{
+    RegisterInfoArray *reg_array = opaque;
+    XlnxZynqMPEFuse *s;
+    Object *dev;
+
+    assert(reg_array != NULL);
+
+    dev = reg_array->mem.owner;
+    assert(dev);
+
+    s = XLNX_ZYNQMP_EFUSE(dev);
+
+    if (addr != A_WR_LOCK && s->regs[R_WR_LOCK]) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s[reg_0x%02lx]: Attempt to write locked register.\n",
+                      path, (long)addr);
+    } else {
+        register_write_memory(opaque, addr, data, size);
+    }
+}
+
+static const MemoryRegionOps zynqmp_efuse_ops = {
+    .read = register_read_memory,
+    .write = zynqmp_efuse_reg_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void zynqmp_efuse_register_reset(RegisterInfo *reg)
+{
+    if (!reg->data || !reg->access) {
+        return;
+    }
+
+    /* Reset must not trigger some registers' writers */
+    switch (reg->access->addr) {
+    case A_EFUSE_AES_CRC:
+        *(uint32_t *)reg->data = reg->access->reset;
+        return;
+    }
+
+    register_reset(reg);
+}
+
+static void zynqmp_efuse_reset(DeviceState *dev)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev);
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) {
+        zynqmp_efuse_register_reset(&s->regs_info[i]);
+    }
+
+    zynqmp_efuse_sync_cache(s, FBIT_UNKNOWN);
+    ARRAY_FIELD_DP32(s->regs, STATUS, CACHE_DONE, 1);
+    zynqmp_efuse_update_irq(s);
+}
+
+static void zynqmp_efuse_realize(DeviceState *dev, Error **errp)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev);
+
+    if (!s->efuse) {
+        g_autofree char *path = object_get_canonical_path(OBJECT(s));
+
+        error_setg(errp, "%s.efuse: link property not connected to XLNX-EFUSE",
+                   path);
+        return;
+    }
+
+    s->efuse->dev = dev;
+}
+
+static void zynqmp_efuse_init(Object *obj)
+{
+    XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    RegisterInfoArray *reg_array;
+
+    reg_array =
+        register_init_block32(DEVICE(obj), zynqmp_efuse_regs_info,
+                              ARRAY_SIZE(zynqmp_efuse_regs_info),
+                              s->regs_info, s->regs,
+                              &zynqmp_efuse_ops,
+                              ZYNQMP_EFUSE_ERR_DEBUG,
+                              R_MAX * 4);
+
+    sysbus_init_mmio(sbd, &reg_array->mem);
+    sysbus_init_irq(sbd, &s->irq);
+}
+
+static const VMStateDescription vmstate_efuse = {
+    .name = TYPE_XLNX_ZYNQMP_EFUSE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, XlnxZynqMPEFuse, R_MAX),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property zynqmp_efuse_props[] = {
+    DEFINE_PROP_LINK("efuse",
+                     XlnxZynqMPEFuse, efuse,
+                     TYPE_XLNX_EFUSE, XlnxEFuse *),
+
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void zynqmp_efuse_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = zynqmp_efuse_reset;
+    dc->realize = zynqmp_efuse_realize;
+    dc->vmsd = &vmstate_efuse;
+    device_class_set_props(dc, zynqmp_efuse_props);
+}
+
+
+static const TypeInfo efuse_info = {
+    .name          = TYPE_XLNX_ZYNQMP_EFUSE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XlnxZynqMPEFuse),
+    .class_init    = zynqmp_efuse_class_init,
+    .instance_init = zynqmp_efuse_init,
+};
+
+static void efuse_register_types(void)
+{
+    type_register_static(&efuse_info);
+}
+
+type_init(efuse_register_types)
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index 39f1d344ae9..73fe383c2de 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -29,7 +29,6 @@
 #include "net/net.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
-#include "exec/address-spaces.h"
 #include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
 #include "sysemu/qtest.h"
diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index ec9907917eb..20099a8ae31 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -28,6 +28,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(GenPCIERootPort, GEN_PCIE_ROOT_PORT)
         (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
 
 #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR       1
+#define GEN_PCIE_ROOT_DEFAULT_IO_RANGE          4096
 
 struct GenPCIERootPort {
     /*< private >*/
@@ -75,6 +76,7 @@ static bool gen_rp_test_migrate_msix(void *opaque, int version_id)
 static void gen_rp_realize(DeviceState *dev, Error **errp)
 {
     PCIDevice *d = PCI_DEVICE(dev);
+    PCIESlot *s = PCIE_SLOT(d);
     GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d);
     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);
     Error *local_err = NULL;
@@ -85,6 +87,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (grp->res_reserve.io == -1 && s->hotplug && !s->native_hotplug) {
+        grp->res_reserve.io = GEN_PCIE_ROOT_DEFAULT_IO_RANGE;
+    }
     int rc = pci_bridge_qemu_reserve_cap_init(d, 0,
                                               grp->res_reserve, errp);
 
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index aedded10642..10e6e7c2ab0 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -57,6 +57,7 @@ struct PXBDev {
 
     uint8_t bus_nr;
     uint16_t numa_node;
+    bool bypass_iommu;
 };
 
 static PXBDev *convert_to_pxb(PCIDevice *dev)
@@ -244,7 +245,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
     } else {
         bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
         bds = qdev_new("pci-bridge");
-        bds->id = dev_name;
+        bds->id = g_strdup(dev_name);
         qdev_prop_set_uint8(bds, PCI_BRIDGE_DEV_PROP_CHASSIS_NR, pxb->bus_nr);
         qdev_prop_set_bit(bds, PCI_BRIDGE_DEV_PROP_SHPC, false);
     }
@@ -255,6 +256,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
     bus->map_irq = pxb_map_irq_fn;
 
     PCI_HOST_BRIDGE(ds)->bus = bus;
+    PCI_HOST_BRIDGE(ds)->bypass_iommu = pxb->bypass_iommu;
 
     pxb_register_bus(dev, bus, &local_err);
     if (local_err) {
@@ -301,6 +303,7 @@ static Property pxb_dev_properties[] = {
     /* Note: 0 is not a legal PXB bus number. */
     DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
     DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
+    DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index 2ccc96f02ce..2b5f7d58cc5 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -6,7 +6,7 @@ config XEN_IGD_PASSTHROUGH
     default y
     depends on XEN && PCI_I440FX
 
-config PREP_PCI
+config RAVEN_PCI
     bool
     select PCI
     select OR_IRQ
@@ -72,3 +72,8 @@ config REMOTE_PCIHOST
 config SH_PCI
     bool
     select PCI
+
+config MV64361
+    bool
+    select PCI
+    select I8259
diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c
index 2a2db7cea69..a57e81e3a97 100644
--- a/hw/pci-host/bonito.c
+++ b/hw/pci-host/bonito.c
@@ -49,10 +49,10 @@
 #include "migration/vmstate.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
-#include "exec/address-spaces.h"
 #include "hw/misc/unimp.h"
 #include "hw/registerfields.h"
 #include "qom/object.h"
+#include "trace.h"
 
 /* #define DEBUG_BONITO */
 
@@ -186,7 +186,8 @@ FIELD(BONGENCFG, PCIQUEUE,      12, 1)
 #define BONITO_PCICONF_IDSEL_OFFSET    11
 #define BONITO_PCICONF_FUN_MASK        0x700    /* [10:8] */
 #define BONITO_PCICONF_FUN_OFFSET      8
-#define BONITO_PCICONF_REG_MASK        0xFC
+#define BONITO_PCICONF_REG_MASK_DS     (~3)         /* Per datasheet */
+#define BONITO_PCICONF_REG_MASK_HW     0xff         /* As seen running PMON */
 #define BONITO_PCICONF_REG_OFFSET      0
 
 
@@ -465,7 +466,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr addr)
              BONITO_PCICONF_IDSEL_OFFSET;
     devno = ctz32(idsel);
     funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET;
-    regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET;
+    regno = (cfgaddr & BONITO_PCICONF_REG_MASK_HW) >> BONITO_PCICONF_REG_OFFSET;
 
     if (idsel == 0) {
         error_report("error in bonito pci config address 0x" TARGET_FMT_plx
@@ -496,6 +497,9 @@ static void bonito_spciconf_write(void *opaque, hwaddr addr, uint64_t val,
     if (pciaddr == 0xffffffff) {
         return;
     }
+    if (addr & ~BONITO_PCICONF_REG_MASK_DS) {
+        trace_bonito_spciconf_small_access(addr, size);
+    }
 
     /* set the pci address in s->config_reg */
     phb->config_reg = (pciaddr) | (1u << 31);
@@ -522,6 +526,9 @@ static uint64_t bonito_spciconf_read(void *opaque, hwaddr addr, unsigned size)
     if (pciaddr == 0xffffffff) {
         return MAKE_64BIT_MASK(0, size * 8);
     }
+    if (addr & ~BONITO_PCICONF_REG_MASK_DS) {
+        trace_bonito_spciconf_small_access(addr, size);
+    }
 
     /* set the pci address in s->config_reg */
     phb->config_reg = (pciaddr) | (1u << 31);
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 0f01f13a6ed..e7e162a00ab 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -112,26 +112,10 @@ static void acpi_dsdt_add_pci_osc(Aml *dev)
     UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
     ifctx = aml_if(aml_equal(aml_arg(0), UUID));
     ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(0)));
-    uint8_t byte_list[] = {
-                0x1 << 0 /* support for functions other than function 0 */ |
-                0x1 << 5 /* support for function 5 */
-                };
-    buf = aml_buffer(ARRAY_SIZE(byte_list), byte_list);
+    uint8_t byte_list[1] = {1};
+    buf = aml_buffer(1, byte_list);
     aml_append(ifctx1, aml_return(buf));
     aml_append(ifctx, ifctx1);
-
-    /*
-     * PCI Firmware Specification 3.1
-     * 4.6.5. _DSM for Ignoring PCI Boot Configurations
-     */
-    /* Arg2: Function Index: 5 */
-    ifctx1 = aml_if(aml_equal(aml_arg(2), aml_int(5)));
-    /*
-     * 0 - The operating system must not ignore the PCI configuration that
-     *     firmware has done at boot time.
-     */
-    aml_append(ifctx1, aml_return(aml_int(0)));
-    aml_append(ifctx, ifctx1);
     aml_append(method, ifctx);
 
     byte_list[0] = 0;
diff --git a/hw/pci-host/gpex.c b/hw/pci-host/gpex.c
index 2bdbe7b4561..a6752fac5e8 100644
--- a/hw/pci-host/gpex.c
+++ b/hw/pci-host/gpex.c
@@ -83,12 +83,51 @@ static void gpex_host_realize(DeviceState *dev, Error **errp)
     int i;
 
     pcie_host_mmcfg_init(pex, PCIE_MMCFG_SIZE_MAX);
+    sysbus_init_mmio(sbd, &pex->mmio);
+
+    /*
+     * Note that the MemoryRegions io_mmio and io_ioport that we pass
+     * to pci_register_root_bus() are not the same as the
+     * MemoryRegions io_mmio_window and io_ioport_window that we
+     * expose as SysBus MRs. The difference is in the behaviour of
+     * accesses to addresses where no PCI device has been mapped.
+     *
+     * io_mmio and io_ioport are the underlying PCI view of the PCI
+     * address space, and when a PCI device does a bus master access
+     * to a bad address this is reported back to it as a transaction
+     * failure.
+     *
+     * io_mmio_window and io_ioport_window implement "unmapped
+     * addresses read as -1 and ignore writes"; this is traditional
+     * x86 PC behaviour, which is not mandated by the PCI spec proper
+     * but expected by much PCI-using guest software, including Linux.
+     *
+     * In the interests of not being unnecessarily surprising, we
+     * implement it in the gpex PCI host controller, by providing the
+     * _window MRs, which are containers with io ops that implement
+     * the 'background' behaviour and which hold the real PCI MRs as
+     * subregions.
+     */
     memory_region_init(&s->io_mmio, OBJECT(s), "gpex_mmio", UINT64_MAX);
     memory_region_init(&s->io_ioport, OBJECT(s), "gpex_ioport", 64 * 1024);
 
-    sysbus_init_mmio(sbd, &pex->mmio);
-    sysbus_init_mmio(sbd, &s->io_mmio);
-    sysbus_init_mmio(sbd, &s->io_ioport);
+    if (s->allow_unmapped_accesses) {
+        memory_region_init_io(&s->io_mmio_window, OBJECT(s),
+                              &unassigned_io_ops, OBJECT(s),
+                              "gpex_mmio_window", UINT64_MAX);
+        memory_region_init_io(&s->io_ioport_window, OBJECT(s),
+                              &unassigned_io_ops, OBJECT(s),
+                              "gpex_ioport_window", 64 * 1024);
+
+        memory_region_add_subregion(&s->io_mmio_window, 0, &s->io_mmio);
+        memory_region_add_subregion(&s->io_ioport_window, 0, &s->io_ioport);
+        sysbus_init_mmio(sbd, &s->io_mmio_window);
+        sysbus_init_mmio(sbd, &s->io_ioport_window);
+    } else {
+        sysbus_init_mmio(sbd, &s->io_mmio);
+        sysbus_init_mmio(sbd, &s->io_ioport);
+    }
+
     for (i = 0; i < GPEX_NUM_IRQS; i++) {
         sysbus_init_irq(sbd, &s->irq[i]);
         s->irq_num[i] = -1;
@@ -108,6 +147,16 @@ static const char *gpex_host_root_bus_path(PCIHostState *host_bridge,
     return "0000:00";
 }
 
+static Property gpex_host_properties[] = {
+    /*
+     * Permit CPU accesses to unmapped areas of the PIO and MMIO windows
+     * (discarding writes and returning -1 for reads) rather than aborting.
+     */
+    DEFINE_PROP_BOOL("allow-unmapped-accesses", GPEXHost,
+                     allow_unmapped_accesses, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void gpex_host_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -117,6 +166,7 @@ static void gpex_host_class_init(ObjectClass *klass, void *data)
     dc->realize = gpex_host_realize;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->fw_name = "pci";
+    device_class_set_props(dc, gpex_host_properties);
 }
 
 static void gpex_host_initfn(Object *obj)
diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
index 28c9bae8994..e08716142b6 100644
--- a/hw/pci-host/i440fx.c
+++ b/hw/pci-host/i440fx.c
@@ -314,14 +314,6 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
     return b;
 }
 
-PCIBus *find_i440fx(void)
-{
-    PCIHostState *s = OBJECT_CHECK(PCIHostState,
-                                   object_resolve_path("/machine/i440fx", NULL),
-                                   TYPE_PCI_HOST_BRIDGE);
-    return s ? s->bus : NULL;
-}
-
 static void i440fx_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index 87a896973e7..4c4f39c15c6 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -3,7 +3,7 @@ pci_ss.add(when: 'CONFIG_PAM', if_true: files('pam.c'))
 pci_ss.add(when: 'CONFIG_PCI_BONITO', if_true: files('bonito.c'))
 pci_ss.add(when: 'CONFIG_PCI_EXPRESS_DESIGNWARE', if_true: files('designware.c'))
 pci_ss.add(when: 'CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', if_true: files('gpex.c'))
-pci_ss.add(when: 'CONFIG_ACPI', if_true: files('gpex-acpi.c'))
+pci_ss.add(when: ['CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', 'CONFIG_ACPI'], if_true: files('gpex-acpi.c'))
 pci_ss.add(when: 'CONFIG_PCI_EXPRESS_Q35', if_true: files('q35.c'))
 pci_ss.add(when: 'CONFIG_PCI_EXPRESS_XILINX', if_true: files('xilinx-pcie.c'))
 pci_ss.add(when: 'CONFIG_PCI_I440FX', if_true: files('i440fx.c'))
@@ -13,12 +13,14 @@ pci_ss.add(when: 'CONFIG_REMOTE_PCIHOST', if_true: files('remote.c'))
 pci_ss.add(when: 'CONFIG_SH_PCI', if_true: files('sh_pci.c'))
 
 # PPC devices
-pci_ss.add(when: 'CONFIG_PREP_PCI', if_true: files('prep.c'))
+pci_ss.add(when: 'CONFIG_RAVEN_PCI', if_true: files('raven.c'))
 pci_ss.add(when: 'CONFIG_GRACKLE_PCI', if_true: files('grackle.c'))
 # NewWorld PowerMac
 pci_ss.add(when: 'CONFIG_UNIN_PCI', if_true: files('uninorth.c'))
 # PowerPC E500 boards
 pci_ss.add(when: 'CONFIG_PPCE500_PCI', if_true: files('ppce500.c'))
+# Pegasos2
+pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c'))
 
 # ARM devices
 pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c'))
diff --git a/hw/pci-host/mv64361.c b/hw/pci-host/mv64361.c
new file mode 100644
index 00000000000..00b3ff7d909
--- /dev/null
+++ b/hw/pci-host/mv64361.c
@@ -0,0 +1,951 @@
+/*
+ * Marvell Discovery II MV64361 System Controller for
+ * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
+ *
+ * Copyright (c) 2018-2020 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/irq.h"
+#include "hw/intc/i8259.h"
+#include "hw/qdev-properties.h"
+#include "exec/address-spaces.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "hw/pci-host/mv64361.h"
+#include "mv643xx.h"
+
+#define TYPE_MV64361_PCI_BRIDGE "mv64361-pcibridge"
+
+static void mv64361_pcibridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_MARVELL;
+    k->device_id = PCI_DEVICE_ID_MARVELL_MV6436X;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+    /*
+     * PCI-facing part of the host bridge,
+     * not usable without the host-facing part
+     */
+    dc->user_creatable = false;
+}
+
+static const TypeInfo mv64361_pcibridge_info = {
+    .name          = TYPE_MV64361_PCI_BRIDGE,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIDevice),
+    .class_init    = mv64361_pcibridge_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    },
+};
+
+
+#define TYPE_MV64361_PCI "mv64361-pcihost"
+OBJECT_DECLARE_SIMPLE_TYPE(MV64361PCIState, MV64361_PCI)
+
+struct MV64361PCIState {
+    PCIHostState parent_obj;
+
+    uint8_t index;
+    MemoryRegion io;
+    MemoryRegion mem;
+    qemu_irq irq[PCI_NUM_PINS];
+
+    uint32_t io_base;
+    uint32_t io_size;
+    uint32_t mem_base[4];
+    uint32_t mem_size[4];
+    uint64_t remap[5];
+};
+
+static int mv64361_pcihost_map_irq(PCIDevice *pci_dev, int n)
+{
+    return (n + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS;
+}
+
+static void mv64361_pcihost_set_irq(void *opaque, int n, int level)
+{
+    MV64361PCIState *s = opaque;
+    qemu_set_irq(s->irq[n], level);
+}
+
+static void mv64361_pcihost_realize(DeviceState *dev, Error **errp)
+{
+    MV64361PCIState *s = MV64361_PCI(dev);
+    PCIHostState *h = PCI_HOST_BRIDGE(dev);
+    char *name;
+
+    name = g_strdup_printf("pci%d-io", s->index);
+    memory_region_init(&s->io, OBJECT(dev), name, 0x10000);
+    g_free(name);
+    name = g_strdup_printf("pci%d-mem", s->index);
+    memory_region_init(&s->mem, OBJECT(dev), name, 1ULL << 32);
+    g_free(name);
+    name = g_strdup_printf("pci.%d", s->index);
+    h->bus = pci_register_root_bus(dev, name, mv64361_pcihost_set_irq,
+                                   mv64361_pcihost_map_irq, dev,
+                                   &s->mem, &s->io, 0, 4, TYPE_PCI_BUS);
+    g_free(name);
+    pci_create_simple(h->bus, 0, TYPE_MV64361_PCI_BRIDGE);
+}
+
+static Property mv64361_pcihost_props[] = {
+    DEFINE_PROP_UINT8("index", MV64361PCIState, index, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void mv64361_pcihost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = mv64361_pcihost_realize;
+    device_class_set_props(dc, mv64361_pcihost_props);
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+}
+
+static const TypeInfo mv64361_pcihost_info = {
+       .name          = TYPE_MV64361_PCI,
+       .parent        = TYPE_PCI_HOST_BRIDGE,
+       .instance_size = sizeof(MV64361PCIState),
+       .class_init    = mv64361_pcihost_class_init,
+};
+
+static void mv64361_pci_register_types(void)
+{
+   type_register_static(&mv64361_pcihost_info);
+   type_register_static(&mv64361_pcibridge_info);
+}
+
+type_init(mv64361_pci_register_types)
+
+
+OBJECT_DECLARE_SIMPLE_TYPE(MV64361State, MV64361)
+
+struct MV64361State {
+    SysBusDevice parent_obj;
+
+    MemoryRegion regs;
+    MV64361PCIState pci[2];
+    MemoryRegion cpu_win[19];
+    qemu_irq cpu_irq;
+
+    /* registers state */
+    uint32_t cpu_conf;
+    uint32_t regs_base;
+    uint32_t base_addr_enable;
+    uint64_t main_int_cr;
+    uint64_t cpu0_int_mask;
+    uint32_t gpp_io;
+    uint32_t gpp_level;
+    uint32_t gpp_value;
+    uint32_t gpp_int_cr;
+    uint32_t gpp_int_mask;
+    bool gpp_int_level;
+};
+
+enum mv64361_irq_cause {
+    MV64361_IRQ_DEVERR = 1,
+    MV64361_IRQ_DMAERR = 2,
+    MV64361_IRQ_CPUERR = 3,
+    MV64361_IRQ_IDMA0 = 4,
+    MV64361_IRQ_IDMA1 = 5,
+    MV64361_IRQ_IDMA2 = 6,
+    MV64361_IRQ_IDMA3 = 7,
+    MV64361_IRQ_TIMER0 = 8,
+    MV64361_IRQ_TIMER1 = 9,
+    MV64361_IRQ_TIMER2 = 10,
+    MV64361_IRQ_TIMER3 = 11,
+    MV64361_IRQ_PCI0 = 12,
+    MV64361_IRQ_SRAMERR = 13,
+    MV64361_IRQ_GBEERR = 14,
+    MV64361_IRQ_CERR = 15,
+    MV64361_IRQ_PCI1 = 16,
+    MV64361_IRQ_DRAMERR = 17,
+    MV64361_IRQ_WDNMI = 18,
+    MV64361_IRQ_WDE = 19,
+    MV64361_IRQ_PCI0IN = 20,
+    MV64361_IRQ_PCI0OUT = 21,
+    MV64361_IRQ_PCI1IN = 22,
+    MV64361_IRQ_PCI1OUT = 23,
+    MV64361_IRQ_P1_GPP0_7 = 24,
+    MV64361_IRQ_P1_GPP8_15 = 25,
+    MV64361_IRQ_P1_GPP16_23 = 26,
+    MV64361_IRQ_P1_GPP24_31 = 27,
+    MV64361_IRQ_P1_CPU_DB = 28,
+    /* 29-31: reserved */
+    MV64361_IRQ_GBE0 = 32,
+    MV64361_IRQ_GBE1 = 33,
+    MV64361_IRQ_GBE2 = 34,
+    /* 35: reserved */
+    MV64361_IRQ_SDMA0 = 36,
+    MV64361_IRQ_TWSI = 37,
+    MV64361_IRQ_SDMA1 = 38,
+    MV64361_IRQ_BRG = 39,
+    MV64361_IRQ_MPSC0 = 40,
+    MV64361_IRQ_MPSC1 = 41,
+    MV64361_IRQ_G0RX = 42,
+    MV64361_IRQ_G0TX = 43,
+    MV64361_IRQ_G0MISC = 44,
+    MV64361_IRQ_G1RX = 45,
+    MV64361_IRQ_G1TX = 46,
+    MV64361_IRQ_G1MISC = 47,
+    MV64361_IRQ_G2RX = 48,
+    MV64361_IRQ_G2TX = 49,
+    MV64361_IRQ_G2MISC = 50,
+    /* 51-55: reserved */
+    MV64361_IRQ_P0_GPP0_7 = 56,
+    MV64361_IRQ_P0_GPP8_15 = 57,
+    MV64361_IRQ_P0_GPP16_23 = 58,
+    MV64361_IRQ_P0_GPP24_31 = 59,
+    MV64361_IRQ_P0_CPU_DB = 60,
+    /* 61-63: reserved */
+};
+
+PCIBus *mv64361_get_pci_bus(DeviceState *dev, int n)
+{
+    MV64361State *mv = MV64361(dev);
+    return PCI_HOST_BRIDGE(&mv->pci[n])->bus;
+}
+
+static void unmap_region(MemoryRegion *mr)
+{
+    if (memory_region_is_mapped(mr)) {
+        memory_region_del_subregion(get_system_memory(), mr);
+        object_unparent(OBJECT(mr));
+    }
+}
+
+static void map_pci_region(MemoryRegion *mr, MemoryRegion *parent,
+                           struct Object *owner, const char *name,
+                           hwaddr poffs, uint64_t size, hwaddr moffs)
+{
+    memory_region_init_alias(mr, owner, name, parent, poffs, size);
+    memory_region_add_subregion(get_system_memory(), moffs, mr);
+    trace_mv64361_region_map(name, poffs, size, moffs);
+}
+
+static void set_mem_windows(MV64361State *s, uint32_t val)
+{
+    MV64361PCIState *p;
+    MemoryRegion *mr;
+    uint32_t mask;
+    int i;
+
+    val &= 0x1fffff;
+    for (mask = 1, i = 0; i < 21; i++, mask <<= 1) {
+        if ((val & mask) != (s->base_addr_enable & mask)) {
+            trace_mv64361_region_enable(!(val & mask) ? "enable" : "disable", i);
+            /*
+             * 0-3 are SDRAM chip selects but we map all RAM directly
+             * 4-7 are device chip selects (not sure what those are)
+             * 8 is Boot device (ROM) chip select but we map that directly too
+             */
+            if (i == 9) {
+                p = &s->pci[0];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->io, OBJECT(s), "pci0-io-win",
+                                   p->remap[4], (p->io_size + 1) << 16,
+                                   (p->io_base & 0xfffff) << 16);
+                }
+            } else if (i == 10) {
+                p = &s->pci[0];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem0-win",
+                                   p->remap[0], (p->mem_size[0] + 1) << 16,
+                                   (p->mem_base[0] & 0xfffff) << 16);
+                }
+            } else if (i == 11) {
+                p = &s->pci[0];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem1-win",
+                                   p->remap[1], (p->mem_size[1] + 1) << 16,
+                                   (p->mem_base[1] & 0xfffff) << 16);
+                }
+            } else if (i == 12) {
+                p = &s->pci[0];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem2-win",
+                                   p->remap[2], (p->mem_size[2] + 1) << 16,
+                                   (p->mem_base[2] & 0xfffff) << 16);
+                }
+            } else if (i == 13) {
+                p = &s->pci[0];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci0-mem3-win",
+                                   p->remap[3], (p->mem_size[3] + 1) << 16,
+                                   (p->mem_base[3] & 0xfffff) << 16);
+                }
+            } else if (i == 14) {
+                p = &s->pci[1];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->io, OBJECT(s), "pci1-io-win",
+                                   p->remap[4], (p->io_size + 1) << 16,
+                                   (p->io_base & 0xfffff) << 16);
+                }
+            } else if (i == 15) {
+                p = &s->pci[1];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem0-win",
+                                   p->remap[0], (p->mem_size[0] + 1) << 16,
+                                   (p->mem_base[0] & 0xfffff) << 16);
+                }
+            } else if (i == 16) {
+                p = &s->pci[1];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem1-win",
+                                   p->remap[1], (p->mem_size[1] + 1) << 16,
+                                   (p->mem_base[1] & 0xfffff) << 16);
+                }
+            } else if (i == 17) {
+                p = &s->pci[1];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem2-win",
+                                   p->remap[2], (p->mem_size[2] + 1) << 16,
+                                   (p->mem_base[2] & 0xfffff) << 16);
+                }
+            } else if (i == 18) {
+                p = &s->pci[1];
+                mr = &s->cpu_win[i];
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    map_pci_region(mr, &p->mem, OBJECT(s), "pci1-mem3-win",
+                                   p->remap[3], (p->mem_size[3] + 1) << 16,
+                                   (p->mem_base[3] & 0xfffff) << 16);
+                }
+            /* 19 is integrated SRAM */
+            } else if (i == 20) {
+                mr = &s->regs;
+                unmap_region(mr);
+                if (!(val & mask)) {
+                    memory_region_add_subregion(get_system_memory(),
+                        (s->regs_base & 0xfffff) << 16, mr);
+                }
+            }
+        }
+    }
+    s->base_addr_enable = val;
+}
+
+static void mv64361_update_irq(void *opaque, int n, int level)
+{
+    MV64361State *s = opaque;
+    uint64_t val = s->main_int_cr;
+
+    if (level) {
+        val |= BIT_ULL(n);
+    } else {
+        val &= ~BIT_ULL(n);
+    }
+    if ((s->main_int_cr & s->cpu0_int_mask) != (val & s->cpu0_int_mask)) {
+        qemu_set_irq(s->cpu_irq, level);
+    }
+    s->main_int_cr = val;
+}
+
+static uint64_t mv64361_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    MV64361State *s = MV64361(opaque);
+    uint32_t ret = 0;
+
+    switch (addr) {
+    case MV64340_CPU_CONFIG:
+        ret = s->cpu_conf;
+        break;
+    case MV64340_PCI_0_IO_BASE_ADDR:
+        ret = s->pci[0].io_base;
+        break;
+    case MV64340_PCI_0_IO_SIZE:
+        ret = s->pci[0].io_size;
+        break;
+    case MV64340_PCI_0_IO_ADDR_REMAP:
+        ret = s->pci[0].remap[4] >> 16;
+        break;
+    case MV64340_PCI_0_MEMORY0_BASE_ADDR:
+        ret = s->pci[0].mem_base[0];
+        break;
+    case MV64340_PCI_0_MEMORY0_SIZE:
+        ret = s->pci[0].mem_size[0];
+        break;
+    case MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP:
+        ret = (s->pci[0].remap[0] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP:
+        ret = s->pci[0].remap[0] >> 32;
+        break;
+    case MV64340_PCI_0_MEMORY1_BASE_ADDR:
+        ret = s->pci[0].mem_base[1];
+        break;
+    case MV64340_PCI_0_MEMORY1_SIZE:
+        ret = s->pci[0].mem_size[1];
+        break;
+    case MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP:
+        ret = (s->pci[0].remap[1] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP:
+        ret = s->pci[0].remap[1] >> 32;
+        break;
+    case MV64340_PCI_0_MEMORY2_BASE_ADDR:
+        ret = s->pci[0].mem_base[2];
+        break;
+    case MV64340_PCI_0_MEMORY2_SIZE:
+        ret = s->pci[0].mem_size[2];
+        break;
+    case MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP:
+        ret = (s->pci[0].remap[2] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP:
+        ret = s->pci[0].remap[2] >> 32;
+        break;
+    case MV64340_PCI_0_MEMORY3_BASE_ADDR:
+        ret = s->pci[0].mem_base[3];
+        break;
+    case MV64340_PCI_0_MEMORY3_SIZE:
+        ret = s->pci[0].mem_size[3];
+        break;
+    case MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP:
+        ret = (s->pci[0].remap[3] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP:
+        ret = s->pci[0].remap[3] >> 32;
+        break;
+    case MV64340_PCI_1_IO_BASE_ADDR:
+        ret = s->pci[1].io_base;
+        break;
+    case MV64340_PCI_1_IO_SIZE:
+        ret = s->pci[1].io_size;
+        break;
+    case MV64340_PCI_1_IO_ADDR_REMAP:
+        ret = s->pci[1].remap[4] >> 16;
+        break;
+    case MV64340_PCI_1_MEMORY0_BASE_ADDR:
+        ret = s->pci[1].mem_base[0];
+        break;
+    case MV64340_PCI_1_MEMORY0_SIZE:
+        ret = s->pci[1].mem_size[0];
+        break;
+    case MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP:
+        ret = (s->pci[1].remap[0] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP:
+        ret = s->pci[1].remap[0] >> 32;
+        break;
+    case MV64340_PCI_1_MEMORY1_BASE_ADDR:
+        ret = s->pci[1].mem_base[1];
+        break;
+    case MV64340_PCI_1_MEMORY1_SIZE:
+        ret = s->pci[1].mem_size[1];
+        break;
+    case MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP:
+        ret = (s->pci[1].remap[1] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP:
+        ret = s->pci[1].remap[1] >> 32;
+        break;
+    case MV64340_PCI_1_MEMORY2_BASE_ADDR:
+        ret = s->pci[1].mem_base[2];
+        break;
+    case MV64340_PCI_1_MEMORY2_SIZE:
+        ret = s->pci[1].mem_size[2];
+        break;
+    case MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP:
+        ret = (s->pci[1].remap[2] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP:
+        ret = s->pci[1].remap[2] >> 32;
+        break;
+    case MV64340_PCI_1_MEMORY3_BASE_ADDR:
+        ret = s->pci[1].mem_base[3];
+        break;
+    case MV64340_PCI_1_MEMORY3_SIZE:
+        ret = s->pci[1].mem_size[3];
+        break;
+    case MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP:
+        ret = (s->pci[1].remap[3] & 0xffff0000) >> 16;
+        break;
+    case MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP:
+        ret = s->pci[1].remap[3] >> 32;
+        break;
+    case MV64340_INTERNAL_SPACE_BASE_ADDR:
+        ret = s->regs_base;
+        break;
+    case MV64340_BASE_ADDR_ENABLE:
+        ret = s->base_addr_enable;
+        break;
+    case MV64340_PCI_0_CONFIG_ADDR:
+        ret = pci_host_conf_le_ops.read(PCI_HOST_BRIDGE(&s->pci[0]), 0, size);
+        break;
+    case MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG ...
+         MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG + 3:
+        ret = pci_host_data_le_ops.read(PCI_HOST_BRIDGE(&s->pci[0]),
+                  addr - MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG, size);
+        break;
+    case MV64340_PCI_1_CONFIG_ADDR:
+        ret = pci_host_conf_le_ops.read(PCI_HOST_BRIDGE(&s->pci[1]), 0, size);
+        break;
+    case MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG ...
+         MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG + 3:
+        ret = pci_host_data_le_ops.read(PCI_HOST_BRIDGE(&s->pci[1]),
+                  addr - MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG, size);
+        break;
+    case MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG:
+        /* FIXME: Should this be sent via the PCI bus somehow? */
+        if (s->gpp_int_level && (s->gpp_value & BIT(31))) {
+            ret = pic_read_irq(isa_pic);
+        }
+        break;
+    case MV64340_MAIN_INTERRUPT_CAUSE_LOW:
+        ret = s->main_int_cr;
+        break;
+    case MV64340_MAIN_INTERRUPT_CAUSE_HIGH:
+        ret = s->main_int_cr >> 32;
+        break;
+    case MV64340_CPU_INTERRUPT0_MASK_LOW:
+        ret = s->cpu0_int_mask;
+        break;
+    case MV64340_CPU_INTERRUPT0_MASK_HIGH:
+        ret = s->cpu0_int_mask >> 32;
+        break;
+    case MV64340_CPU_INTERRUPT0_SELECT_CAUSE:
+        ret = s->main_int_cr;
+        if (s->main_int_cr & s->cpu0_int_mask) {
+            if (!(s->main_int_cr & s->cpu0_int_mask & 0xffffffff)) {
+                ret = s->main_int_cr >> 32 | BIT(30);
+            } else if ((s->main_int_cr & s->cpu0_int_mask) >> 32) {
+                ret |= BIT(31);
+            }
+        }
+        break;
+    case MV64340_CUNIT_ARBITER_CONTROL_REG:
+        ret = 0x11ff0000 | (s->gpp_int_level << 10);
+        break;
+    case MV64340_GPP_IO_CONTROL:
+        ret = s->gpp_io;
+        break;
+    case MV64340_GPP_LEVEL_CONTROL:
+        ret = s->gpp_level;
+        break;
+    case MV64340_GPP_VALUE:
+        ret = s->gpp_value;
+        break;
+    case MV64340_GPP_VALUE_SET:
+    case MV64340_GPP_VALUE_CLEAR:
+        ret = 0;
+        break;
+    case MV64340_GPP_INTERRUPT_CAUSE:
+        ret = s->gpp_int_cr;
+        break;
+    case MV64340_GPP_INTERRUPT_MASK0:
+    case MV64340_GPP_INTERRUPT_MASK1:
+        ret = s->gpp_int_mask;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register read 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        break;
+    }
+    if (addr != MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG) {
+        trace_mv64361_reg_read(addr, ret);
+    }
+    return ret;
+}
+
+static void warn_swap_bit(uint64_t val)
+{
+    if ((val & 0x3000000ULL) >> 24 != 1) {
+        qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__);
+    }
+}
+
+static void mv64361_set_pci_mem_remap(MV64361State *s, int bus, int idx,
+                                      uint64_t val, bool high)
+{
+    if (high) {
+        s->pci[bus].remap[idx] = val;
+    } else {
+        s->pci[bus].remap[idx] &= 0xffffffff00000000ULL;
+        s->pci[bus].remap[idx] |= (val & 0xffffULL) << 16;
+    }
+}
+
+static void mv64361_write(void *opaque, hwaddr addr, uint64_t val,
+                          unsigned int size)
+{
+    MV64361State *s = MV64361(opaque);
+
+    trace_mv64361_reg_write(addr, val);
+    switch (addr) {
+    case MV64340_CPU_CONFIG:
+        s->cpu_conf = val & 0xe4e3bffULL;
+        s->cpu_conf |= BIT(23);
+        break;
+    case MV64340_PCI_0_IO_BASE_ADDR:
+        s->pci[0].io_base = val & 0x30fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            s->pci[0].remap[4] = (val & 0xffffULL) << 16;
+        }
+        break;
+    case MV64340_PCI_0_IO_SIZE:
+        s->pci[0].io_size = val & 0xffffULL;
+        break;
+    case MV64340_PCI_0_IO_ADDR_REMAP:
+        s->pci[0].remap[4] = (val & 0xffffULL) << 16;
+        break;
+    case MV64340_PCI_0_MEMORY0_BASE_ADDR:
+        s->pci[0].mem_base[0] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 0, 0, val, false);
+        }
+        break;
+    case MV64340_PCI_0_MEMORY0_SIZE:
+        s->pci[0].mem_size[0] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP:
+    case MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 0, 0, val,
+            (addr == MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_0_MEMORY1_BASE_ADDR:
+        s->pci[0].mem_base[1] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 0, 1, val, false);
+        }
+        break;
+    case MV64340_PCI_0_MEMORY1_SIZE:
+        s->pci[0].mem_size[1] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP:
+    case MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 0, 1, val,
+            (addr == MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_0_MEMORY2_BASE_ADDR:
+        s->pci[0].mem_base[2] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 0, 2, val, false);
+        }
+        break;
+    case MV64340_PCI_0_MEMORY2_SIZE:
+        s->pci[0].mem_size[2] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP:
+    case MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 0, 2, val,
+            (addr == MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_0_MEMORY3_BASE_ADDR:
+        s->pci[0].mem_base[3] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 0, 3, val, false);
+        }
+        break;
+    case MV64340_PCI_0_MEMORY3_SIZE:
+        s->pci[0].mem_size[3] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP:
+    case MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 0, 3, val,
+            (addr == MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_1_IO_BASE_ADDR:
+        s->pci[1].io_base = val & 0x30fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            s->pci[1].remap[4] = (val & 0xffffULL) << 16;
+        }
+        break;
+    case MV64340_PCI_1_IO_SIZE:
+        s->pci[1].io_size = val & 0xffffULL;
+        break;
+    case MV64340_PCI_1_MEMORY0_BASE_ADDR:
+        s->pci[1].mem_base[0] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 1, 0, val, false);
+        }
+        break;
+    case MV64340_PCI_1_MEMORY0_SIZE:
+        s->pci[1].mem_size[0] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP:
+    case MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 1, 0, val,
+            (addr == MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_1_MEMORY1_BASE_ADDR:
+        s->pci[1].mem_base[1] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 1, 1, val, false);
+        }
+        break;
+    case MV64340_PCI_1_MEMORY1_SIZE:
+        s->pci[1].mem_size[1] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP:
+    case MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 1, 1, val,
+            (addr == MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_1_MEMORY2_BASE_ADDR:
+        s->pci[1].mem_base[2] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 1, 2, val, false);
+        }
+        break;
+    case MV64340_PCI_1_MEMORY2_SIZE:
+        s->pci[1].mem_size[2] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP:
+    case MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 1, 2, val,
+            (addr == MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_PCI_1_MEMORY3_BASE_ADDR:
+        s->pci[1].mem_base[3] = val & 0x70fffffULL;
+        warn_swap_bit(val);
+        if (!(s->cpu_conf & BIT(27))) {
+            mv64361_set_pci_mem_remap(s, 1, 3, val, false);
+        }
+        break;
+    case MV64340_PCI_1_MEMORY3_SIZE:
+        s->pci[1].mem_size[3] = val & 0xffffULL;
+        break;
+    case MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP:
+    case MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP:
+        mv64361_set_pci_mem_remap(s, 1, 3, val,
+            (addr == MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP));
+        break;
+    case MV64340_INTERNAL_SPACE_BASE_ADDR:
+        s->regs_base = val & 0xfffffULL;
+        break;
+    case MV64340_BASE_ADDR_ENABLE:
+        set_mem_windows(s, val);
+        break;
+    case MV64340_PCI_0_CONFIG_ADDR:
+        pci_host_conf_le_ops.write(PCI_HOST_BRIDGE(&s->pci[0]), 0, val, size);
+        break;
+    case MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG ...
+         MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG + 3:
+        pci_host_data_le_ops.write(PCI_HOST_BRIDGE(&s->pci[0]),
+            addr - MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG, val, size);
+        break;
+    case MV64340_PCI_1_CONFIG_ADDR:
+        pci_host_conf_le_ops.write(PCI_HOST_BRIDGE(&s->pci[1]), 0, val, size);
+        break;
+    case MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG ...
+         MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG + 3:
+        pci_host_data_le_ops.write(PCI_HOST_BRIDGE(&s->pci[1]),
+            addr - MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG, val, size);
+        break;
+    case MV64340_CPU_INTERRUPT0_MASK_LOW:
+        s->cpu0_int_mask &= 0xffffffff00000000ULL;
+        s->cpu0_int_mask |= val & 0xffffffffULL;
+        break;
+    case MV64340_CPU_INTERRUPT0_MASK_HIGH:
+        s->cpu0_int_mask &= 0xffffffffULL;
+        s->cpu0_int_mask |= val << 32;
+        break;
+    case MV64340_CUNIT_ARBITER_CONTROL_REG:
+        s->gpp_int_level = !!(val & BIT(10));
+        break;
+    case MV64340_GPP_IO_CONTROL:
+        s->gpp_io = val;
+        break;
+    case MV64340_GPP_LEVEL_CONTROL:
+        s->gpp_level = val;
+        break;
+    case MV64340_GPP_VALUE:
+        s->gpp_value &= ~s->gpp_io;
+        s->gpp_value |= val & s->gpp_io;
+        break;
+    case MV64340_GPP_VALUE_SET:
+        s->gpp_value |= val & s->gpp_io;
+        break;
+    case MV64340_GPP_VALUE_CLEAR:
+        s->gpp_value &= ~(val & s->gpp_io);
+        break;
+    case MV64340_GPP_INTERRUPT_CAUSE:
+        if (!s->gpp_int_level && val != s->gpp_int_cr) {
+            int i;
+            uint32_t ch = s->gpp_int_cr ^ val;
+            s->gpp_int_cr = val;
+            for (i = 0; i < 4; i++) {
+                if ((ch & 0xff << i) && !(val & 0xff << i)) {
+                    mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + i, 0);
+                }
+            }
+        } else {
+            s->gpp_int_cr = val;
+        }
+        break;
+    case MV64340_GPP_INTERRUPT_MASK0:
+    case MV64340_GPP_INTERRUPT_MASK1:
+        s->gpp_int_mask = val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: Unimplemented register write 0x%"
+                      HWADDR_PRIx " = %"PRIx64"\n", __func__, addr, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps mv64361_ops = {
+    .read = mv64361_read,
+    .write = mv64361_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void mv64361_gpp_irq(void *opaque, int n, int level)
+{
+    MV64361State *s = opaque;
+    uint32_t mask = BIT(n);
+    uint32_t val = s->gpp_value & ~mask;
+
+    if (s->gpp_level & mask) {
+        level = !level;
+    }
+    val |= level << n;
+    if (val > s->gpp_value) {
+        s->gpp_value = val;
+        s->gpp_int_cr |= mask;
+        if (s->gpp_int_mask & mask) {
+            mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + n / 8, 1);
+        }
+    } else if (val < s->gpp_value) {
+        int b = n / 8;
+        s->gpp_value = val;
+        if (s->gpp_int_level && !(val & 0xff << b)) {
+            mv64361_update_irq(opaque, MV64361_IRQ_P0_GPP0_7 + b, 0);
+        }
+    }
+}
+
+static void mv64361_realize(DeviceState *dev, Error **errp)
+{
+    MV64361State *s = MV64361(dev);
+    int i;
+
+    s->base_addr_enable = 0x1fffff;
+    memory_region_init_io(&s->regs, OBJECT(s), &mv64361_ops, s,
+                          TYPE_MV64361, 0x10000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->regs);
+    for (i = 0; i < 2; i++) {
+        g_autofree char *name = g_strdup_printf("pcihost%d", i);
+        object_initialize_child(OBJECT(dev), name, &s->pci[i],
+                                TYPE_MV64361_PCI);
+        DeviceState *pci = DEVICE(&s->pci[i]);
+        qdev_prop_set_uint8(pci, "index", i);
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(pci), &error_fatal);
+    }
+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->cpu_irq);
+    qdev_init_gpio_in_named(dev, mv64361_gpp_irq, "gpp", 32);
+    /* FIXME: PCI IRQ connections may be board specific */
+    for (i = 0; i < PCI_NUM_PINS; i++) {
+        s->pci[1].irq[i] = qdev_get_gpio_in_named(dev, "gpp", 12 + i);
+    }
+}
+
+static void mv64361_reset(DeviceState *dev)
+{
+    MV64361State *s = MV64361(dev);
+    int i, j;
+
+    /*
+     * These values may be board specific
+     * Real chip supports init from an eprom but that's not modelled
+     */
+    set_mem_windows(s, 0x1fffff);
+    s->cpu_conf = 0x28000ff;
+    s->regs_base = 0x100f100;
+    s->pci[0].io_base = 0x100f800;
+    s->pci[0].io_size = 0xff;
+    s->pci[0].mem_base[0] = 0x100c000;
+    s->pci[0].mem_size[0] = 0x1fff;
+    s->pci[0].mem_base[1] = 0x100f900;
+    s->pci[0].mem_size[1] = 0xff;
+    s->pci[0].mem_base[2] = 0x100f400;
+    s->pci[0].mem_size[2] = 0x1ff;
+    s->pci[0].mem_base[3] = 0x100f600;
+    s->pci[0].mem_size[3] = 0x1ff;
+    s->pci[1].io_base = 0x100fe00;
+    s->pci[1].io_size = 0xff;
+    s->pci[1].mem_base[0] = 0x1008000;
+    s->pci[1].mem_size[0] = 0x3fff;
+    s->pci[1].mem_base[1] = 0x100fd00;
+    s->pci[1].mem_size[1] = 0xff;
+    s->pci[1].mem_base[2] = 0x1002600;
+    s->pci[1].mem_size[2] = 0x1ff;
+    s->pci[1].mem_base[3] = 0x100ff80;
+    s->pci[1].mem_size[3] = 0x7f;
+    for (i = 0; i < 2; i++) {
+        for (j = 0; j < 4; j++) {
+            s->pci[i].remap[j] = s->pci[i].mem_base[j] << 16;
+        }
+    }
+    s->pci[0].remap[1] = 0;
+    s->pci[1].remap[1] = 0;
+    set_mem_windows(s, 0xfbfff);
+}
+
+static void mv64361_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = mv64361_realize;
+    dc->reset = mv64361_reset;
+}
+
+static const TypeInfo mv64361_type_info = {
+    .name = TYPE_MV64361,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(MV64361State),
+    .class_init = mv64361_class_init,
+};
+
+static void mv64361_register_types(void)
+{
+    type_register_static(&mv64361_type_info);
+}
+
+type_init(mv64361_register_types)
diff --git a/hw/pci-host/mv643xx.h b/hw/pci-host/mv643xx.h
new file mode 100644
index 00000000000..cd26a43f188
--- /dev/null
+++ b/hw/pci-host/mv643xx.h
@@ -0,0 +1,918 @@
+/*
+ * mv643xx.h - MV-643XX Internal registers definition file.
+ *
+ * Copyright 2002 Momentum Computer, Inc.
+ *      Author: Matthew Dharm <mdharm@momenco.com>
+ * Copyright 2002 GALILEO TECHNOLOGY, LTD.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#ifndef ASM_MV643XX_H
+#define ASM_MV643XX_H
+
+/****************************************/
+/* Processor Address Space              */
+/****************************************/
+
+/* DDR SDRAM BAR and size registers */
+
+#define MV64340_CS_0_BASE_ADDR                                      0x008
+#define MV64340_CS_0_SIZE                                           0x010
+#define MV64340_CS_1_BASE_ADDR                                      0x208
+#define MV64340_CS_1_SIZE                                           0x210
+#define MV64340_CS_2_BASE_ADDR                                      0x018
+#define MV64340_CS_2_SIZE                                           0x020
+#define MV64340_CS_3_BASE_ADDR                                      0x218
+#define MV64340_CS_3_SIZE                                           0x220
+
+/* Devices BAR and size registers */
+
+#define MV64340_DEV_CS0_BASE_ADDR                                   0x028
+#define MV64340_DEV_CS0_SIZE                                        0x030
+#define MV64340_DEV_CS1_BASE_ADDR                                   0x228
+#define MV64340_DEV_CS1_SIZE                                        0x230
+#define MV64340_DEV_CS2_BASE_ADDR                                   0x248
+#define MV64340_DEV_CS2_SIZE                                        0x250
+#define MV64340_DEV_CS3_BASE_ADDR                                   0x038
+#define MV64340_DEV_CS3_SIZE                                        0x040
+#define MV64340_BOOTCS_BASE_ADDR                                    0x238
+#define MV64340_BOOTCS_SIZE                                         0x240
+
+/* PCI 0 BAR and size registers */
+
+#define MV64340_PCI_0_IO_BASE_ADDR                                  0x048
+#define MV64340_PCI_0_IO_SIZE                                       0x050
+#define MV64340_PCI_0_MEMORY0_BASE_ADDR                             0x058
+#define MV64340_PCI_0_MEMORY0_SIZE                                  0x060
+#define MV64340_PCI_0_MEMORY1_BASE_ADDR                             0x080
+#define MV64340_PCI_0_MEMORY1_SIZE                                  0x088
+#define MV64340_PCI_0_MEMORY2_BASE_ADDR                             0x258
+#define MV64340_PCI_0_MEMORY2_SIZE                                  0x260
+#define MV64340_PCI_0_MEMORY3_BASE_ADDR                             0x280
+#define MV64340_PCI_0_MEMORY3_SIZE                                  0x288
+
+/* PCI 1 BAR and size registers */
+#define MV64340_PCI_1_IO_BASE_ADDR                                  0x090
+#define MV64340_PCI_1_IO_SIZE                                       0x098
+#define MV64340_PCI_1_MEMORY0_BASE_ADDR                             0x0a0
+#define MV64340_PCI_1_MEMORY0_SIZE                                  0x0a8
+#define MV64340_PCI_1_MEMORY1_BASE_ADDR                             0x0b0
+#define MV64340_PCI_1_MEMORY1_SIZE                                  0x0b8
+#define MV64340_PCI_1_MEMORY2_BASE_ADDR                             0x2a0
+#define MV64340_PCI_1_MEMORY2_SIZE                                  0x2a8
+#define MV64340_PCI_1_MEMORY3_BASE_ADDR                             0x2b0
+#define MV64340_PCI_1_MEMORY3_SIZE                                  0x2b8
+
+/* SRAM base address */
+#define MV64340_INTEGRATED_SRAM_BASE_ADDR                           0x268
+
+/* internal registers space base address */
+#define MV64340_INTERNAL_SPACE_BASE_ADDR                            0x068
+
+/* Enables the CS , DEV_CS , PCI 0 and PCI 1 windows above */
+#define MV64340_BASE_ADDR_ENABLE                                    0x278
+
+/****************************************/
+/* PCI remap registers                  */
+/****************************************/
+
+      /* PCI 0 */
+#define MV64340_PCI_0_IO_ADDR_REMAP                                 0x0f0
+#define MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP                        0x0f8
+#define MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP                       0x320
+#define MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP                        0x100
+#define MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP                       0x328
+#define MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP                        0x2f8
+#define MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP                       0x330
+#define MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP                        0x300
+#define MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP                       0x338
+      /* PCI 1 */
+#define MV64340_PCI_1_IO_ADDR_REMAP                                 0x108
+#define MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP                        0x110
+#define MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP                       0x340
+#define MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP                        0x118
+#define MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP                       0x348
+#define MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP                        0x310
+#define MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP                       0x350
+#define MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP                        0x318
+#define MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP                       0x358
+
+#define MV64340_CPU_PCI_0_HEADERS_RETARGET_CONTROL                  0x3b0
+#define MV64340_CPU_PCI_0_HEADERS_RETARGET_BASE                     0x3b8
+#define MV64340_CPU_PCI_1_HEADERS_RETARGET_CONTROL                  0x3c0
+#define MV64340_CPU_PCI_1_HEADERS_RETARGET_BASE                     0x3c8
+#define MV64340_CPU_GE_HEADERS_RETARGET_CONTROL                     0x3d0
+#define MV64340_CPU_GE_HEADERS_RETARGET_BASE                        0x3d8
+#define MV64340_CPU_IDMA_HEADERS_RETARGET_CONTROL                   0x3e0
+#define MV64340_CPU_IDMA_HEADERS_RETARGET_BASE                      0x3e8
+
+/****************************************/
+/*         CPU Control Registers        */
+/****************************************/
+
+#define MV64340_CPU_CONFIG                                          0x000
+#define MV64340_CPU_MODE                                            0x120
+#define MV64340_CPU_MASTER_CONTROL                                  0x160
+#define MV64340_CPU_CROSS_BAR_CONTROL_LOW                           0x150
+#define MV64340_CPU_CROSS_BAR_CONTROL_HIGH                          0x158
+#define MV64340_CPU_CROSS_BAR_TIMEOUT                               0x168
+
+/****************************************/
+/* SMP RegisterS                        */
+/****************************************/
+
+#define MV64340_SMP_WHO_AM_I                                        0x200
+#define MV64340_SMP_CPU0_DOORBELL                                   0x214
+#define MV64340_SMP_CPU0_DOORBELL_CLEAR                             0x21C
+#define MV64340_SMP_CPU1_DOORBELL                                   0x224
+#define MV64340_SMP_CPU1_DOORBELL_CLEAR                             0x22C
+#define MV64340_SMP_CPU0_DOORBELL_MASK                              0x234
+#define MV64340_SMP_CPU1_DOORBELL_MASK                              0x23C
+#define MV64340_SMP_SEMAPHOR0                                       0x244
+#define MV64340_SMP_SEMAPHOR1                                       0x24c
+#define MV64340_SMP_SEMAPHOR2                                       0x254
+#define MV64340_SMP_SEMAPHOR3                                       0x25c
+#define MV64340_SMP_SEMAPHOR4                                       0x264
+#define MV64340_SMP_SEMAPHOR5                                       0x26c
+#define MV64340_SMP_SEMAPHOR6                                       0x274
+#define MV64340_SMP_SEMAPHOR7                                       0x27c
+
+/****************************************/
+/*  CPU Sync Barrier Register           */
+/****************************************/
+
+#define MV64340_CPU_0_SYNC_BARRIER_TRIGGER                          0x0c0
+#define MV64340_CPU_0_SYNC_BARRIER_VIRTUAL                          0x0c8
+#define MV64340_CPU_1_SYNC_BARRIER_TRIGGER                          0x0d0
+#define MV64340_CPU_1_SYNC_BARRIER_VIRTUAL                          0x0d8
+
+/****************************************/
+/* CPU Access Protect                   */
+/****************************************/
+
+#define MV64340_CPU_PROTECT_WINDOW_0_BASE_ADDR                      0x180
+#define MV64340_CPU_PROTECT_WINDOW_0_SIZE                           0x188
+#define MV64340_CPU_PROTECT_WINDOW_1_BASE_ADDR                      0x190
+#define MV64340_CPU_PROTECT_WINDOW_1_SIZE                           0x198
+#define MV64340_CPU_PROTECT_WINDOW_2_BASE_ADDR                      0x1a0
+#define MV64340_CPU_PROTECT_WINDOW_2_SIZE                           0x1a8
+#define MV64340_CPU_PROTECT_WINDOW_3_BASE_ADDR                      0x1b0
+#define MV64340_CPU_PROTECT_WINDOW_3_SIZE                           0x1b8
+
+
+/****************************************/
+/*          CPU Error Report            */
+/****************************************/
+
+#define MV64340_CPU_ERROR_ADDR_LOW                                  0x070
+#define MV64340_CPU_ERROR_ADDR_HIGH                                 0x078
+#define MV64340_CPU_ERROR_DATA_LOW                                  0x128
+#define MV64340_CPU_ERROR_DATA_HIGH                                 0x130
+#define MV64340_CPU_ERROR_PARITY                                    0x138
+#define MV64340_CPU_ERROR_CAUSE                                     0x140
+#define MV64340_CPU_ERROR_MASK                                      0x148
+
+/****************************************/
+/*      CPU Interface Debug Registers   */
+/****************************************/
+
+#define MV64340_PUNIT_SLAVE_DEBUG_LOW                               0x360
+#define MV64340_PUNIT_SLAVE_DEBUG_HIGH                              0x368
+#define MV64340_PUNIT_MASTER_DEBUG_LOW                              0x370
+#define MV64340_PUNIT_MASTER_DEBUG_HIGH                             0x378
+#define MV64340_PUNIT_MMASK                                         0x3e4
+
+/****************************************/
+/*  Integrated SRAM Registers           */
+/****************************************/
+
+#define MV64340_SRAM_CONFIG                                         0x380
+#define MV64340_SRAM_TEST_MODE                                      0X3F4
+#define MV64340_SRAM_ERROR_CAUSE                                    0x388
+#define MV64340_SRAM_ERROR_ADDR                                     0x390
+#define MV64340_SRAM_ERROR_ADDR_HIGH                                0X3F8
+#define MV64340_SRAM_ERROR_DATA_LOW                                 0x398
+#define MV64340_SRAM_ERROR_DATA_HIGH                                0x3a0
+#define MV64340_SRAM_ERROR_DATA_PARITY                              0x3a8
+
+/****************************************/
+/* SDRAM Configuration                  */
+/****************************************/
+
+#define MV64340_SDRAM_CONFIG                                        0x1400
+#define MV64340_D_UNIT_CONTROL_LOW                                  0x1404
+#define MV64340_D_UNIT_CONTROL_HIGH                                 0x1424
+#define MV64340_SDRAM_TIMING_CONTROL_LOW                            0x1408
+#define MV64340_SDRAM_TIMING_CONTROL_HIGH                           0x140c
+#define MV64340_SDRAM_ADDR_CONTROL                                  0x1410
+#define MV64340_SDRAM_OPEN_PAGES_CONTROL                            0x1414
+#define MV64340_SDRAM_OPERATION                                     0x1418
+#define MV64340_SDRAM_MODE                                          0x141c
+#define MV64340_EXTENDED_DRAM_MODE                                  0x1420
+#define MV64340_SDRAM_CROSS_BAR_CONTROL_LOW                         0x1430
+#define MV64340_SDRAM_CROSS_BAR_CONTROL_HIGH                        0x1434
+#define MV64340_SDRAM_CROSS_BAR_TIMEOUT                             0x1438
+#define MV64340_SDRAM_ADDR_CTRL_PADS_CALIBRATION                    0x14c0
+#define MV64340_SDRAM_DATA_PADS_CALIBRATION                         0x14c4
+
+/****************************************/
+/* SDRAM Error Report                   */
+/****************************************/
+
+#define MV64340_SDRAM_ERROR_DATA_LOW                                0x1444
+#define MV64340_SDRAM_ERROR_DATA_HIGH                               0x1440
+#define MV64340_SDRAM_ERROR_ADDR                                    0x1450
+#define MV64340_SDRAM_RECEIVED_ECC                                  0x1448
+#define MV64340_SDRAM_CALCULATED_ECC                                0x144c
+#define MV64340_SDRAM_ECC_CONTROL                                   0x1454
+#define MV64340_SDRAM_ECC_ERROR_COUNTER                             0x1458
+
+/******************************************/
+/*  Controlled Delay Line (CDL) Registers */
+/******************************************/
+
+#define MV64340_DFCDL_CONFIG0                                       0x1480
+#define MV64340_DFCDL_CONFIG1                                       0x1484
+#define MV64340_DLL_WRITE                                           0x1488
+#define MV64340_DLL_READ                                            0x148c
+#define MV64340_SRAM_ADDR                                           0x1490
+#define MV64340_SRAM_DATA0                                          0x1494
+#define MV64340_SRAM_DATA1                                          0x1498
+#define MV64340_SRAM_DATA2                                          0x149c
+#define MV64340_DFCL_PROBE                                          0x14a0
+
+/******************************************/
+/*   Debug Registers                      */
+/******************************************/
+
+#define MV64340_DUNIT_DEBUG_LOW                                     0x1460
+#define MV64340_DUNIT_DEBUG_HIGH                                    0x1464
+#define MV64340_DUNIT_MMASK                                         0X1b40
+
+/****************************************/
+/* Device Parameters                    */
+/****************************************/
+
+#define MV64340_DEVICE_BANK0_PARAMETERS                             0x45c
+#define MV64340_DEVICE_BANK1_PARAMETERS                             0x460
+#define MV64340_DEVICE_BANK2_PARAMETERS                             0x464
+#define MV64340_DEVICE_BANK3_PARAMETERS                             0x468
+#define MV64340_DEVICE_BOOT_BANK_PARAMETERS                         0x46c
+#define MV64340_DEVICE_INTERFACE_CONTROL                            0x4c0
+#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_LOW              0x4c8
+#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_HIGH             0x4cc
+#define MV64340_DEVICE_INTERFACE_CROSS_BAR_TIMEOUT                  0x4c4
+
+/****************************************/
+/* Device interrupt registers           */
+/****************************************/
+
+#define MV64340_DEVICE_INTERRUPT_CAUSE                              0x4d0
+#define MV64340_DEVICE_INTERRUPT_MASK                               0x4d4
+#define MV64340_DEVICE_ERROR_ADDR                                   0x4d8
+#define MV64340_DEVICE_ERROR_DATA                                   0x4dc
+#define MV64340_DEVICE_ERROR_PARITY                                 0x4e0
+
+/****************************************/
+/* Device debug registers               */
+/****************************************/
+
+#define MV64340_DEVICE_DEBUG_LOW                                    0x4e4
+#define MV64340_DEVICE_DEBUG_HIGH                                   0x4e8
+#define MV64340_RUNIT_MMASK                                         0x4f0
+
+/****************************************/
+/* PCI Slave Address Decoding registers */
+/****************************************/
+
+#define MV64340_PCI_0_CS_0_BANK_SIZE                                0xc08
+#define MV64340_PCI_1_CS_0_BANK_SIZE                                0xc88
+#define MV64340_PCI_0_CS_1_BANK_SIZE                                0xd08
+#define MV64340_PCI_1_CS_1_BANK_SIZE                                0xd88
+#define MV64340_PCI_0_CS_2_BANK_SIZE                                0xc0c
+#define MV64340_PCI_1_CS_2_BANK_SIZE                                0xc8c
+#define MV64340_PCI_0_CS_3_BANK_SIZE                                0xd0c
+#define MV64340_PCI_1_CS_3_BANK_SIZE                                0xd8c
+#define MV64340_PCI_0_DEVCS_0_BANK_SIZE                             0xc10
+#define MV64340_PCI_1_DEVCS_0_BANK_SIZE                             0xc90
+#define MV64340_PCI_0_DEVCS_1_BANK_SIZE                             0xd10
+#define MV64340_PCI_1_DEVCS_1_BANK_SIZE                             0xd90
+#define MV64340_PCI_0_DEVCS_2_BANK_SIZE                             0xd18
+#define MV64340_PCI_1_DEVCS_2_BANK_SIZE                             0xd98
+#define MV64340_PCI_0_DEVCS_3_BANK_SIZE                             0xc14
+#define MV64340_PCI_1_DEVCS_3_BANK_SIZE                             0xc94
+#define MV64340_PCI_0_DEVCS_BOOT_BANK_SIZE                          0xd14
+#define MV64340_PCI_1_DEVCS_BOOT_BANK_SIZE                          0xd94
+#define MV64340_PCI_0_P2P_MEM0_BAR_SIZE                             0xd1c
+#define MV64340_PCI_1_P2P_MEM0_BAR_SIZE                             0xd9c
+#define MV64340_PCI_0_P2P_MEM1_BAR_SIZE                             0xd20
+#define MV64340_PCI_1_P2P_MEM1_BAR_SIZE                             0xda0
+#define MV64340_PCI_0_P2P_I_O_BAR_SIZE                              0xd24
+#define MV64340_PCI_1_P2P_I_O_BAR_SIZE                              0xda4
+#define MV64340_PCI_0_CPU_BAR_SIZE                                  0xd28
+#define MV64340_PCI_1_CPU_BAR_SIZE                                  0xda8
+#define MV64340_PCI_0_INTERNAL_SRAM_BAR_SIZE                        0xe00
+#define MV64340_PCI_1_INTERNAL_SRAM_BAR_SIZE                        0xe80
+#define MV64340_PCI_0_EXPANSION_ROM_BAR_SIZE                        0xd2c
+#define MV64340_PCI_1_EXPANSION_ROM_BAR_SIZE                        0xd9c
+#define MV64340_PCI_0_BASE_ADDR_REG_ENABLE                          0xc3c
+#define MV64340_PCI_1_BASE_ADDR_REG_ENABLE                          0xcbc
+#define MV64340_PCI_0_CS_0_BASE_ADDR_REMAP                          0xc48
+#define MV64340_PCI_1_CS_0_BASE_ADDR_REMAP                          0xcc8
+#define MV64340_PCI_0_CS_1_BASE_ADDR_REMAP                          0xd48
+#define MV64340_PCI_1_CS_1_BASE_ADDR_REMAP                          0xdc8
+#define MV64340_PCI_0_CS_2_BASE_ADDR_REMAP                          0xc4c
+#define MV64340_PCI_1_CS_2_BASE_ADDR_REMAP                          0xccc
+#define MV64340_PCI_0_CS_3_BASE_ADDR_REMAP                          0xd4c
+#define MV64340_PCI_1_CS_3_BASE_ADDR_REMAP                          0xdcc
+#define MV64340_PCI_0_CS_0_BASE_HIGH_ADDR_REMAP                     0xF04
+#define MV64340_PCI_1_CS_0_BASE_HIGH_ADDR_REMAP                     0xF84
+#define MV64340_PCI_0_CS_1_BASE_HIGH_ADDR_REMAP                     0xF08
+#define MV64340_PCI_1_CS_1_BASE_HIGH_ADDR_REMAP                     0xF88
+#define MV64340_PCI_0_CS_2_BASE_HIGH_ADDR_REMAP                     0xF0C
+#define MV64340_PCI_1_CS_2_BASE_HIGH_ADDR_REMAP                     0xF8C
+#define MV64340_PCI_0_CS_3_BASE_HIGH_ADDR_REMAP                     0xF10
+#define MV64340_PCI_1_CS_3_BASE_HIGH_ADDR_REMAP                     0xF90
+#define MV64340_PCI_0_DEVCS_0_BASE_ADDR_REMAP                       0xc50
+#define MV64340_PCI_1_DEVCS_0_BASE_ADDR_REMAP                       0xcd0
+#define MV64340_PCI_0_DEVCS_1_BASE_ADDR_REMAP                       0xd50
+#define MV64340_PCI_1_DEVCS_1_BASE_ADDR_REMAP                       0xdd0
+#define MV64340_PCI_0_DEVCS_2_BASE_ADDR_REMAP                       0xd58
+#define MV64340_PCI_1_DEVCS_2_BASE_ADDR_REMAP                       0xdd8
+#define MV64340_PCI_0_DEVCS_3_BASE_ADDR_REMAP                       0xc54
+#define MV64340_PCI_1_DEVCS_3_BASE_ADDR_REMAP                       0xcd4
+#define MV64340_PCI_0_DEVCS_BOOTCS_BASE_ADDR_REMAP                  0xd54
+#define MV64340_PCI_1_DEVCS_BOOTCS_BASE_ADDR_REMAP                  0xdd4
+#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_LOW                  0xd5c
+#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_LOW                  0xddc
+#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_HIGH                 0xd60
+#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_HIGH                 0xde0
+#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_LOW                  0xd64
+#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_LOW                  0xde4
+#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_HIGH                 0xd68
+#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_HIGH                 0xde8
+#define MV64340_PCI_0_P2P_I_O_BASE_ADDR_REMAP                       0xd6c
+#define MV64340_PCI_1_P2P_I_O_BASE_ADDR_REMAP                       0xdec
+#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_LOW                       0xd70
+#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_LOW                       0xdf0
+#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_HIGH                      0xd74
+#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_HIGH                      0xdf4
+#define MV64340_PCI_0_INTEGRATED_SRAM_BASE_ADDR_REMAP               0xf00
+#define MV64340_PCI_1_INTEGRATED_SRAM_BASE_ADDR_REMAP               0xf80
+#define MV64340_PCI_0_EXPANSION_ROM_BASE_ADDR_REMAP                 0xf38
+#define MV64340_PCI_1_EXPANSION_ROM_BASE_ADDR_REMAP                 0xfb8
+#define MV64340_PCI_0_ADDR_DECODE_CONTROL                           0xd3c
+#define MV64340_PCI_1_ADDR_DECODE_CONTROL                           0xdbc
+#define MV64340_PCI_0_HEADERS_RETARGET_CONTROL                      0xF40
+#define MV64340_PCI_1_HEADERS_RETARGET_CONTROL                      0xFc0
+#define MV64340_PCI_0_HEADERS_RETARGET_BASE                         0xF44
+#define MV64340_PCI_1_HEADERS_RETARGET_BASE                         0xFc4
+#define MV64340_PCI_0_HEADERS_RETARGET_HIGH                         0xF48
+#define MV64340_PCI_1_HEADERS_RETARGET_HIGH                         0xFc8
+
+/***********************************/
+/*   PCI Control Register Map      */
+/***********************************/
+
+#define MV64340_PCI_0_DLL_STATUS_AND_COMMAND                        0x1d20
+#define MV64340_PCI_1_DLL_STATUS_AND_COMMAND                        0x1da0
+#define MV64340_PCI_0_MPP_PADS_DRIVE_CONTROL                        0x1d1C
+#define MV64340_PCI_1_MPP_PADS_DRIVE_CONTROL                        0x1d9C
+#define MV64340_PCI_0_COMMAND                                       0xc00
+#define MV64340_PCI_1_COMMAND                                       0xc80
+#define MV64340_PCI_0_MODE                                          0xd00
+#define MV64340_PCI_1_MODE                                          0xd80
+#define MV64340_PCI_0_RETRY                                         0xc04
+#define MV64340_PCI_1_RETRY                                         0xc84
+#define MV64340_PCI_0_READ_BUFFER_DISCARD_TIMER                     0xd04
+#define MV64340_PCI_1_READ_BUFFER_DISCARD_TIMER                     0xd84
+#define MV64340_PCI_0_MSI_TRIGGER_TIMER                             0xc38
+#define MV64340_PCI_1_MSI_TRIGGER_TIMER                             0xcb8
+#define MV64340_PCI_0_ARBITER_CONTROL                               0x1d00
+#define MV64340_PCI_1_ARBITER_CONTROL                               0x1d80
+#define MV64340_PCI_0_CROSS_BAR_CONTROL_LOW                         0x1d08
+#define MV64340_PCI_1_CROSS_BAR_CONTROL_LOW                         0x1d88
+#define MV64340_PCI_0_CROSS_BAR_CONTROL_HIGH                        0x1d0c
+#define MV64340_PCI_1_CROSS_BAR_CONTROL_HIGH                        0x1d8c
+#define MV64340_PCI_0_CROSS_BAR_TIMEOUT                             0x1d04
+#define MV64340_PCI_1_CROSS_BAR_TIMEOUT                             0x1d84
+#define MV64340_PCI_0_SYNC_BARRIER_TRIGGER_REG                      0x1D18
+#define MV64340_PCI_1_SYNC_BARRIER_TRIGGER_REG                      0x1D98
+#define MV64340_PCI_0_SYNC_BARRIER_VIRTUAL_REG                      0x1d10
+#define MV64340_PCI_1_SYNC_BARRIER_VIRTUAL_REG                      0x1d90
+#define MV64340_PCI_0_P2P_CONFIG                                    0x1d14
+#define MV64340_PCI_1_P2P_CONFIG                                    0x1d94
+
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_LOW                     0x1e00
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_HIGH                    0x1e04
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_0                         0x1e08
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_LOW                     0x1e10
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_HIGH                    0x1e14
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_1                         0x1e18
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_LOW                     0x1e20
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_HIGH                    0x1e24
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_2                         0x1e28
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_LOW                     0x1e30
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_HIGH                    0x1e34
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_3                         0x1e38
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_LOW                     0x1e40
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_HIGH                    0x1e44
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_4                         0x1e48
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_LOW                     0x1e50
+#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_HIGH                    0x1e54
+#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_5                         0x1e58
+
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_LOW                     0x1e80
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_HIGH                    0x1e84
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_0                         0x1e88
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_LOW                     0x1e90
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_HIGH                    0x1e94
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_1                         0x1e98
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_LOW                     0x1ea0
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_HIGH                    0x1ea4
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_2                         0x1ea8
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_LOW                     0x1eb0
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_HIGH                    0x1eb4
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_3                         0x1eb8
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_LOW                     0x1ec0
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_HIGH                    0x1ec4
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_4                         0x1ec8
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_LOW                     0x1ed0
+#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_HIGH                    0x1ed4
+#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_5                         0x1ed8
+
+/****************************************/
+/*   PCI Configuration Access Registers */
+/****************************************/
+
+#define MV64340_PCI_0_CONFIG_ADDR                                   0xcf8
+#define MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG                       0xcfc
+#define MV64340_PCI_1_CONFIG_ADDR                                   0xc78
+#define MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG                       0xc7c
+#define MV64340_PCI_0_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG             0xc34
+#define MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG             0xcb4
+
+/****************************************/
+/*   PCI Error Report Registers         */
+/****************************************/
+
+#define MV64340_PCI_0_SERR_MASK                                     0xc28
+#define MV64340_PCI_1_SERR_MASK                                     0xca8
+#define MV64340_PCI_0_ERROR_ADDR_LOW                                0x1d40
+#define MV64340_PCI_1_ERROR_ADDR_LOW                                0x1dc0
+#define MV64340_PCI_0_ERROR_ADDR_HIGH                               0x1d44
+#define MV64340_PCI_1_ERROR_ADDR_HIGH                               0x1dc4
+#define MV64340_PCI_0_ERROR_ATTRIBUTE                               0x1d48
+#define MV64340_PCI_1_ERROR_ATTRIBUTE                               0x1dc8
+#define MV64340_PCI_0_ERROR_COMMAND                                 0x1d50
+#define MV64340_PCI_1_ERROR_COMMAND                                 0x1dd0
+#define MV64340_PCI_0_ERROR_CAUSE                                   0x1d58
+#define MV64340_PCI_1_ERROR_CAUSE                                   0x1dd8
+#define MV64340_PCI_0_ERROR_MASK                                    0x1d5c
+#define MV64340_PCI_1_ERROR_MASK                                    0x1ddc
+
+/****************************************/
+/*   PCI Debug Registers                */
+/****************************************/
+
+#define MV64340_PCI_0_MMASK                                         0X1D24
+#define MV64340_PCI_1_MMASK                                         0X1DA4
+
+/*********************************************/
+/* PCI Configuration, Function 0, Registers  */
+/*********************************************/
+
+#define MV64340_PCI_DEVICE_AND_VENDOR_ID                            0x000
+#define MV64340_PCI_STATUS_AND_COMMAND                              0x004
+#define MV64340_PCI_CLASS_CODE_AND_REVISION_ID                      0x008
+#define MV64340_PCI_BIST_HEADER_TYPE_LATENCY_TIMER_CACHE_LINE       0x00C
+
+#define MV64340_PCI_SCS_0_BASE_ADDR_LOW                             0x010
+#define MV64340_PCI_SCS_0_BASE_ADDR_HIGH                            0x014
+#define MV64340_PCI_SCS_1_BASE_ADDR_LOW                             0x018
+#define MV64340_PCI_SCS_1_BASE_ADDR_HIGH                            0x01C
+#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_LOW           0x020
+#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_HIGH          0x024
+#define MV64340_PCI_SUBSYSTEM_ID_AND_SUBSYSTEM_VENDOR_ID            0x02c
+#define MV64340_PCI_EXPANSION_ROM_BASE_ADDR_REG                     0x030
+#define MV64340_PCI_CAPABILTY_LIST_POINTER                          0x034
+#define MV64340_PCI_INTERRUPT_PIN_AND_LINE                          0x03C
+       /* capability list */
+#define MV64340_PCI_POWER_MANAGEMENT_CAPABILITY                     0x040
+#define MV64340_PCI_POWER_MANAGEMENT_STATUS_AND_CONTROL             0x044
+#define MV64340_PCI_VPD_ADDR                                        0x048
+#define MV64340_PCI_VPD_DATA                                        0x04c
+#define MV64340_PCI_MSI_MESSAGE_CONTROL                             0x050
+#define MV64340_PCI_MSI_MESSAGE_ADDR                                0x054
+#define MV64340_PCI_MSI_MESSAGE_UPPER_ADDR                          0x058
+#define MV64340_PCI_MSI_MESSAGE_DATA                                0x05c
+#define MV64340_PCI_X_COMMAND                                       0x060
+#define MV64340_PCI_X_STATUS                                        0x064
+#define MV64340_PCI_COMPACT_PCI_HOT_SWAP                            0x068
+
+/***********************************************/
+/*   PCI Configuration, Function 1, Registers  */
+/***********************************************/
+
+#define MV64340_PCI_SCS_2_BASE_ADDR_LOW                             0x110
+#define MV64340_PCI_SCS_2_BASE_ADDR_HIGH                            0x114
+#define MV64340_PCI_SCS_3_BASE_ADDR_LOW                             0x118
+#define MV64340_PCI_SCS_3_BASE_ADDR_HIGH                            0x11c
+#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_LOW                     0x120
+#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_HIGH                    0x124
+
+/***********************************************/
+/*  PCI Configuration, Function 2, Registers   */
+/***********************************************/
+
+#define MV64340_PCI_DEVCS_0_BASE_ADDR_LOW                           0x210
+#define MV64340_PCI_DEVCS_0_BASE_ADDR_HIGH                          0x214
+#define MV64340_PCI_DEVCS_1_BASE_ADDR_LOW                           0x218
+#define MV64340_PCI_DEVCS_1_BASE_ADDR_HIGH                          0x21c
+#define MV64340_PCI_DEVCS_2_BASE_ADDR_LOW                           0x220
+#define MV64340_PCI_DEVCS_2_BASE_ADDR_HIGH                          0x224
+
+/***********************************************/
+/*  PCI Configuration, Function 3, Registers   */
+/***********************************************/
+
+#define MV64340_PCI_DEVCS_3_BASE_ADDR_LOW                           0x310
+#define MV64340_PCI_DEVCS_3_BASE_ADDR_HIGH                          0x314
+#define MV64340_PCI_BOOT_CS_BASE_ADDR_LOW                           0x318
+#define MV64340_PCI_BOOT_CS_BASE_ADDR_HIGH                          0x31c
+#define MV64340_PCI_CPU_BASE_ADDR_LOW                               0x220
+#define MV64340_PCI_CPU_BASE_ADDR_HIGH                              0x224
+
+/***********************************************/
+/*  PCI Configuration, Function 4, Registers   */
+/***********************************************/
+
+#define MV64340_PCI_P2P_MEM0_BASE_ADDR_LOW                          0x410
+#define MV64340_PCI_P2P_MEM0_BASE_ADDR_HIGH                         0x414
+#define MV64340_PCI_P2P_MEM1_BASE_ADDR_LOW                          0x418
+#define MV64340_PCI_P2P_MEM1_BASE_ADDR_HIGH                         0x41c
+#define MV64340_PCI_P2P_I_O_BASE_ADDR                               0x420
+#define MV64340_PCI_INTERNAL_REGS_I_O_MAPPED_BASE_ADDR              0x424
+
+/****************************************/
+/* Messaging Unit Registers (I20)       */
+/****************************************/
+
+#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_0_SIDE                 0x010
+#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_0_SIDE                 0x014
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_0_SIDE                0x018
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_0_SIDE                0x01C
+#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_0_SIDE                 0x020
+#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE          0x024
+#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE           0x028
+#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_0_SIDE                0x02C
+#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE         0x030
+#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE          0x034
+#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE       0x040
+#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE      0x044
+#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_0_SIDE                    0x050
+#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_0_SIDE                  0x054
+#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE        0x060
+#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE        0x064
+#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE        0x068
+#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE        0x06C
+#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE       0x070
+#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE       0x074
+#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE       0x0F8
+#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE       0x0FC
+
+#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_1_SIDE                 0x090
+#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_1_SIDE                 0x094
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_1_SIDE                0x098
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_1_SIDE                0x09C
+#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_1_SIDE                 0x0A0
+#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE          0x0A4
+#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE           0x0A8
+#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_1_SIDE                0x0AC
+#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE         0x0B0
+#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE          0x0B4
+#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE       0x0C0
+#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE      0x0C4
+#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_1_SIDE                    0x0D0
+#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_1_SIDE                  0x0D4
+#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE        0x0E0
+#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE        0x0E4
+#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE        0x0E8
+#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE        0x0EC
+#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE       0x0F0
+#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE       0x0F4
+#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE       0x078
+#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE       0x07C
+
+#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU0_SIDE                  0x1C10
+#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU0_SIDE                  0x1C14
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU0_SIDE                 0x1C18
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU0_SIDE                 0x1C1C
+#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU0_SIDE                  0x1C20
+#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE           0x1C24
+#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU0_SIDE            0x1C28
+#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU0_SIDE                 0x1C2C
+#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE          0x1C30
+#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU0_SIDE           0x1C34
+#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE        0x1C40
+#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE       0x1C44
+#define MV64340_I2O_QUEUE_CONTROL_REG_CPU0_SIDE                     0x1C50
+#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU0_SIDE                   0x1C54
+#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE         0x1C60
+#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE         0x1C64
+#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE         0x1C68
+#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE         0x1C6C
+#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE        0x1C70
+#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE        0x1C74
+#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE        0x1CF8
+#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE        0x1CFC
+#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU1_SIDE                  0x1C90
+#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU1_SIDE                  0x1C94
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU1_SIDE                 0x1C98
+#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU1_SIDE                 0x1C9C
+#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU1_SIDE                  0x1CA0
+#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE           0x1CA4
+#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU1_SIDE            0x1CA8
+#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU1_SIDE                 0x1CAC
+#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE          0x1CB0
+#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU1_SIDE           0x1CB4
+#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE        0x1CC0
+#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE       0x1CC4
+#define MV64340_I2O_QUEUE_CONTROL_REG_CPU1_SIDE                     0x1CD0
+#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU1_SIDE                   0x1CD4
+#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE         0x1CE0
+#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE         0x1CE4
+#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE         0x1CE8
+#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE         0x1CEC
+#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE        0x1CF0
+#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE        0x1CF4
+#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE        0x1C78
+#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE        0x1C7C
+
+/****************************************/
+/*        Ethernet Unit Registers       */
+/****************************************/
+
+/*******************************************/
+/*          CUNIT  Registers               */
+/*******************************************/
+
+         /* Address Decoding Register Map */
+
+#define MV64340_CUNIT_BASE_ADDR_REG0                                0xf200
+#define MV64340_CUNIT_BASE_ADDR_REG1                                0xf208
+#define MV64340_CUNIT_BASE_ADDR_REG2                                0xf210
+#define MV64340_CUNIT_BASE_ADDR_REG3                                0xf218
+#define MV64340_CUNIT_SIZE0                                         0xf204
+#define MV64340_CUNIT_SIZE1                                         0xf20c
+#define MV64340_CUNIT_SIZE2                                         0xf214
+#define MV64340_CUNIT_SIZE3                                         0xf21c
+#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG0                          0xf240
+#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG1                          0xf244
+#define MV64340_CUNIT_BASE_ADDR_ENABLE_REG                          0xf250
+#define MV64340_MPSC0_ACCESS_PROTECTION_REG                         0xf254
+#define MV64340_MPSC1_ACCESS_PROTECTION_REG                         0xf258
+#define MV64340_CUNIT_INTERNAL_SPACE_BASE_ADDR_REG                  0xf25C
+
+        /*  Error Report Registers  */
+
+#define MV64340_CUNIT_INTERRUPT_CAUSE_REG                           0xf310
+#define MV64340_CUNIT_INTERRUPT_MASK_REG                            0xf314
+#define MV64340_CUNIT_ERROR_ADDR                                    0xf318
+
+        /*  Cunit Control Registers */
+
+#define MV64340_CUNIT_ARBITER_CONTROL_REG                           0xf300
+#define MV64340_CUNIT_CONFIG_REG                                    0xb40c
+#define MV64340_CUNIT_CRROSBAR_TIMEOUT_REG                          0xf304
+
+        /*  Cunit Debug Registers   */
+
+#define MV64340_CUNIT_DEBUG_LOW                                     0xf340
+#define MV64340_CUNIT_DEBUG_HIGH                                    0xf344
+#define MV64340_CUNIT_MMASK                                         0xf380
+
+        /*  MPSCs Clocks Routing Registers  */
+
+#define MV64340_MPSC_ROUTING_REG                                    0xb400
+#define MV64340_MPSC_RX_CLOCK_ROUTING_REG                           0xb404
+#define MV64340_MPSC_TX_CLOCK_ROUTING_REG                           0xb408
+
+        /*  MPSCs Interrupts Registers    */
+
+#define MV64340_MPSC_CAUSE_REG(port)                     (0xb804 + (port << 3))
+#define MV64340_MPSC_MASK_REG(port)                      (0xb884 + (port << 3))
+
+#define MV64340_MPSC_MAIN_CONFIG_LOW(port)              (0x8000 + (port << 12))
+#define MV64340_MPSC_MAIN_CONFIG_HIGH(port)             (0x8004 + (port << 12))
+#define MV64340_MPSC_PROTOCOL_CONFIG(port)              (0x8008 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG1(port)                 (0x800c + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG2(port)                 (0x8010 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG3(port)                 (0x8014 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG4(port)                 (0x8018 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG5(port)                 (0x801c + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG6(port)                 (0x8020 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG7(port)                 (0x8024 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG8(port)                 (0x8028 + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG9(port)                 (0x802c + (port << 12))
+#define MV64340_MPSC_CHANNEL_REG10(port)                (0x8030 + (port << 12))
+
+        /*  MPSC0 Registers      */
+
+
+/***************************************/
+/*          SDMA Registers             */
+/***************************************/
+
+#define MV64340_SDMA_CONFIG_REG(channel)                    (0x4000 + (channel << 13))
+#define MV64340_SDMA_COMMAND_REG(channel)                   (0x4008 + (channel << 13))
+#define MV64340_SDMA_CURRENT_RX_DESCRIPTOR_POINTER(channel) (0x4810 + (channel << 13))
+#define MV64340_SDMA_CURRENT_TX_DESCRIPTOR_POINTER(channel) (0x4c10 + (channel << 13))
+#define MV64340_SDMA_FIRST_TX_DESCRIPTOR_POINTER(channel)   (0x4c14 + (channel << 13))
+
+#define MV64340_SDMA_CAUSE_REG                                      0xb800
+#define MV64340_SDMA_MASK_REG                                       0xb880
+
+/* BRG Interrupts */
+
+#define MV64340_BRG_CONFIG_REG(brg)                       (0xb200 + (brg << 3))
+#define MV64340_BRG_BAUDE_TUNING_REG(brg)                 (0xb208 + (brg << 3))
+#define MV64340_BRG_CAUSE_REG                                       0xb834
+#define MV64340_BRG_MASK_REG                                        0xb8b4
+
+/****************************************/
+/* DMA Channel Control                  */
+/****************************************/
+
+#define MV64340_DMA_CHANNEL0_CONTROL                                0x840
+#define MV64340_DMA_CHANNEL0_CONTROL_HIGH                           0x880
+#define MV64340_DMA_CHANNEL1_CONTROL                                0x844
+#define MV64340_DMA_CHANNEL1_CONTROL_HIGH                           0x884
+#define MV64340_DMA_CHANNEL2_CONTROL                                0x848
+#define MV64340_DMA_CHANNEL2_CONTROL_HIGH                           0x888
+#define MV64340_DMA_CHANNEL3_CONTROL                                0x84C
+#define MV64340_DMA_CHANNEL3_CONTROL_HIGH                           0x88C
+
+
+/****************************************/
+/*           IDMA Registers             */
+/****************************************/
+
+#define MV64340_DMA_CHANNEL0_BYTE_COUNT                             0x800
+#define MV64340_DMA_CHANNEL1_BYTE_COUNT                             0x804
+#define MV64340_DMA_CHANNEL2_BYTE_COUNT                             0x808
+#define MV64340_DMA_CHANNEL3_BYTE_COUNT                             0x80C
+#define MV64340_DMA_CHANNEL0_SOURCE_ADDR                            0x810
+#define MV64340_DMA_CHANNEL1_SOURCE_ADDR                            0x814
+#define MV64340_DMA_CHANNEL2_SOURCE_ADDR                            0x818
+#define MV64340_DMA_CHANNEL3_SOURCE_ADDR                            0x81c
+#define MV64340_DMA_CHANNEL0_DESTINATION_ADDR                       0x820
+#define MV64340_DMA_CHANNEL1_DESTINATION_ADDR                       0x824
+#define MV64340_DMA_CHANNEL2_DESTINATION_ADDR                       0x828
+#define MV64340_DMA_CHANNEL3_DESTINATION_ADDR                       0x82C
+#define MV64340_DMA_CHANNEL0_NEXT_DESCRIPTOR_POINTER                0x830
+#define MV64340_DMA_CHANNEL1_NEXT_DESCRIPTOR_POINTER                0x834
+#define MV64340_DMA_CHANNEL2_NEXT_DESCRIPTOR_POINTER                0x838
+#define MV64340_DMA_CHANNEL3_NEXT_DESCRIPTOR_POINTER                0x83C
+#define MV64340_DMA_CHANNEL0_CURRENT_DESCRIPTOR_POINTER             0x870
+#define MV64340_DMA_CHANNEL1_CURRENT_DESCRIPTOR_POINTER             0x874
+#define MV64340_DMA_CHANNEL2_CURRENT_DESCRIPTOR_POINTER             0x878
+#define MV64340_DMA_CHANNEL3_CURRENT_DESCRIPTOR_POINTER             0x87C
+
+ /*  IDMA Address Decoding Base Address Registers  */
+
+#define MV64340_DMA_BASE_ADDR_REG0                                  0xa00
+#define MV64340_DMA_BASE_ADDR_REG1                                  0xa08
+#define MV64340_DMA_BASE_ADDR_REG2                                  0xa10
+#define MV64340_DMA_BASE_ADDR_REG3                                  0xa18
+#define MV64340_DMA_BASE_ADDR_REG4                                  0xa20
+#define MV64340_DMA_BASE_ADDR_REG5                                  0xa28
+#define MV64340_DMA_BASE_ADDR_REG6                                  0xa30
+#define MV64340_DMA_BASE_ADDR_REG7                                  0xa38
+
+ /*  IDMA Address Decoding Size Address Register   */
+
+#define MV64340_DMA_SIZE_REG0                                       0xa04
+#define MV64340_DMA_SIZE_REG1                                       0xa0c
+#define MV64340_DMA_SIZE_REG2                                       0xa14
+#define MV64340_DMA_SIZE_REG3                                       0xa1c
+#define MV64340_DMA_SIZE_REG4                                       0xa24
+#define MV64340_DMA_SIZE_REG5                                       0xa2c
+#define MV64340_DMA_SIZE_REG6                                       0xa34
+#define MV64340_DMA_SIZE_REG7                                       0xa3C
+
+ /* IDMA Address Decoding High Address Remap and Access Protection Registers */
+
+#define MV64340_DMA_HIGH_ADDR_REMAP_REG0                            0xa60
+#define MV64340_DMA_HIGH_ADDR_REMAP_REG1                            0xa64
+#define MV64340_DMA_HIGH_ADDR_REMAP_REG2                            0xa68
+#define MV64340_DMA_HIGH_ADDR_REMAP_REG3                            0xa6C
+#define MV64340_DMA_BASE_ADDR_ENABLE_REG                            0xa80
+#define MV64340_DMA_CHANNEL0_ACCESS_PROTECTION_REG                  0xa70
+#define MV64340_DMA_CHANNEL1_ACCESS_PROTECTION_REG                  0xa74
+#define MV64340_DMA_CHANNEL2_ACCESS_PROTECTION_REG                  0xa78
+#define MV64340_DMA_CHANNEL3_ACCESS_PROTECTION_REG                  0xa7c
+#define MV64340_DMA_ARBITER_CONTROL                                 0x860
+#define MV64340_DMA_CROSS_BAR_TIMEOUT                               0x8d0
+
+ /*  IDMA Headers Retarget Registers   */
+
+#define MV64340_DMA_HEADERS_RETARGET_CONTROL                        0xa84
+#define MV64340_DMA_HEADERS_RETARGET_BASE                           0xa88
+
+ /*  IDMA Interrupt Register  */
+
+#define MV64340_DMA_INTERRUPT_CAUSE_REG                             0x8c0
+#define MV64340_DMA_INTERRUPT_CAUSE_MASK                            0x8c4
+#define MV64340_DMA_ERROR_ADDR                                      0x8c8
+#define MV64340_DMA_ERROR_SELECT                                    0x8cc
+
+ /*  IDMA Debug Register ( for internal use )    */
+
+#define MV64340_DMA_DEBUG_LOW                                       0x8e0
+#define MV64340_DMA_DEBUG_HIGH                                      0x8e4
+#define MV64340_DMA_SPARE                                           0xA8C
+
+/****************************************/
+/* Timer_Counter                        */
+/****************************************/
+
+#define MV64340_TIMER_COUNTER0                                      0x850
+#define MV64340_TIMER_COUNTER1                                      0x854
+#define MV64340_TIMER_COUNTER2                                      0x858
+#define MV64340_TIMER_COUNTER3                                      0x85C
+#define MV64340_TIMER_COUNTER_0_3_CONTROL                           0x864
+#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_CAUSE                   0x868
+#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_MASK                    0x86c
+
+/****************************************/
+/*         Watchdog registers           */
+/****************************************/
+
+#define MV64340_WATCHDOG_CONFIG_REG                                 0xb410
+#define MV64340_WATCHDOG_VALUE_REG                                  0xb414
+
+/****************************************/
+/* I2C Registers                        */
+/****************************************/
+
+#define MV64XXX_I2C_OFFSET                                          0xc000
+#define MV64XXX_I2C_REG_BLOCK_SIZE                                  0x0020
+
+/****************************************/
+/* GPP Interface Registers              */
+/****************************************/
+
+#define MV64340_GPP_IO_CONTROL                                      0xf100
+#define MV64340_GPP_LEVEL_CONTROL                                   0xf110
+#define MV64340_GPP_VALUE                                           0xf104
+#define MV64340_GPP_INTERRUPT_CAUSE                                 0xf108
+#define MV64340_GPP_INTERRUPT_MASK0                                 0xf10c
+#define MV64340_GPP_INTERRUPT_MASK1                                 0xf114
+#define MV64340_GPP_VALUE_SET                                       0xf118
+#define MV64340_GPP_VALUE_CLEAR                                     0xf11c
+
+/****************************************/
+/* Interrupt Controller Registers       */
+/****************************************/
+
+/****************************************/
+/* Interrupts                           */
+/****************************************/
+
+#define MV64340_MAIN_INTERRUPT_CAUSE_LOW                            0x004
+#define MV64340_MAIN_INTERRUPT_CAUSE_HIGH                           0x00c
+#define MV64340_CPU_INTERRUPT0_MASK_LOW                             0x014
+#define MV64340_CPU_INTERRUPT0_MASK_HIGH                            0x01c
+#define MV64340_CPU_INTERRUPT0_SELECT_CAUSE                         0x024
+#define MV64340_CPU_INTERRUPT1_MASK_LOW                             0x034
+#define MV64340_CPU_INTERRUPT1_MASK_HIGH                            0x03c
+#define MV64340_CPU_INTERRUPT1_SELECT_CAUSE                         0x044
+#define MV64340_INTERRUPT0_MASK_0_LOW                               0x054
+#define MV64340_INTERRUPT0_MASK_0_HIGH                              0x05c
+#define MV64340_INTERRUPT0_SELECT_CAUSE                             0x064
+#define MV64340_INTERRUPT1_MASK_0_LOW                               0x074
+#define MV64340_INTERRUPT1_MASK_0_HIGH                              0x07c
+#define MV64340_INTERRUPT1_SELECT_CAUSE                             0x084
+
+/****************************************/
+/*      MPP Interface Registers         */
+/****************************************/
+
+#define MV64340_MPP_CONTROL0                                        0xf000
+#define MV64340_MPP_CONTROL1                                        0xf004
+#define MV64340_MPP_CONTROL2                                        0xf008
+#define MV64340_MPP_CONTROL3                                        0xf00c
+
+/****************************************/
+/*    Serial Initialization registers   */
+/****************************************/
+
+#define MV64340_SERIAL_INIT_LAST_DATA                               0xf324
+#define MV64340_SERIAL_INIT_CONTROL                                 0xf328
+#define MV64340_SERIAL_INIT_STATUS                                  0xf32c
+
+#endif /* ASM_MV643XX_H */
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 54f57c660a9..5c375a9f285 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -392,7 +392,7 @@ static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
             v &= 0xffffffffffff0000ull;
             v |= 0x000000000000cfffull & val;
         }
-        *tptr = val;
+        *tptr = v;
         break;
     }
     case IODA3_TBL_MBT:
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index 5ad1424b31a..89c1b53dd72 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -415,7 +415,6 @@ static const VMStateDescription vmstate_ppce500_pci = {
     }
 };
 
-#include "exec/address-spaces.h"
 
 static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp)
 {
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
deleted file mode 100644
index 0a9162fba97..00000000000
--- a/hw/pci-host/prep.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * QEMU PREP PCI host
- *
- * Copyright (c) 2006 Fabrice Bellard
- * Copyright (c) 2011-2013 Andreas Färber
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/datadir.h"
-#include "qemu/units.h"
-#include "qemu/log.h"
-#include "qapi/error.h"
-#include "hw/pci/pci.h"
-#include "hw/pci/pci_bus.h"
-#include "hw/pci/pci_host.h"
-#include "hw/qdev-properties.h"
-#include "migration/vmstate.h"
-#include "hw/intc/i8259.h"
-#include "hw/irq.h"
-#include "hw/loader.h"
-#include "hw/or-irq.h"
-#include "exec/address-spaces.h"
-#include "elf.h"
-#include "qom/object.h"
-
-#define TYPE_RAVEN_PCI_DEVICE "raven"
-#define TYPE_RAVEN_PCI_HOST_BRIDGE "raven-pcihost"
-
-OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE)
-
-struct RavenPCIState {
-    PCIDevice dev;
-
-    uint32_t elf_machine;
-    char *bios_name;
-    MemoryRegion bios;
-};
-
-typedef struct PRePPCIState PREPPCIState;
-DECLARE_INSTANCE_CHECKER(PREPPCIState, RAVEN_PCI_HOST_BRIDGE,
-                         TYPE_RAVEN_PCI_HOST_BRIDGE)
-
-struct PRePPCIState {
-    PCIHostState parent_obj;
-
-    qemu_or_irq *or_irq;
-    qemu_irq pci_irqs[PCI_NUM_PINS];
-    PCIBus pci_bus;
-    AddressSpace pci_io_as;
-    MemoryRegion pci_io;
-    MemoryRegion pci_io_non_contiguous;
-    MemoryRegion pci_memory;
-    MemoryRegion pci_intack;
-    MemoryRegion bm;
-    MemoryRegion bm_ram_alias;
-    MemoryRegion bm_pci_memory_alias;
-    AddressSpace bm_as;
-    RavenPCIState pci_dev;
-
-    int contiguous_map;
-    bool is_legacy_prep;
-};
-
-#define BIOS_SIZE (1 * MiB)
-
-static inline uint32_t raven_pci_io_config(hwaddr addr)
-{
-    int i;
-
-    for (i = 0; i < 11; i++) {
-        if ((addr & (1 << (11 + i))) != 0) {
-            break;
-        }
-    }
-    return (addr & 0x7ff) |  (i << 11);
-}
-
-static void raven_pci_io_write(void *opaque, hwaddr addr,
-                               uint64_t val, unsigned int size)
-{
-    PREPPCIState *s = opaque;
-    PCIHostState *phb = PCI_HOST_BRIDGE(s);
-    pci_data_write(phb->bus, raven_pci_io_config(addr), val, size);
-}
-
-static uint64_t raven_pci_io_read(void *opaque, hwaddr addr,
-                                  unsigned int size)
-{
-    PREPPCIState *s = opaque;
-    PCIHostState *phb = PCI_HOST_BRIDGE(s);
-    return pci_data_read(phb->bus, raven_pci_io_config(addr), size);
-}
-
-static const MemoryRegionOps raven_pci_io_ops = {
-    .read = raven_pci_io_read,
-    .write = raven_pci_io_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static uint64_t raven_intack_read(void *opaque, hwaddr addr,
-                                  unsigned int size)
-{
-    return pic_read_irq(isa_pic);
-}
-
-static void raven_intack_write(void *opaque, hwaddr addr,
-                                        uint64_t data, unsigned size)
-{
-    qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__);
-}
-
-static const MemoryRegionOps raven_intack_ops = {
-    .read = raven_intack_read,
-    .write = raven_intack_write,
-    .valid = {
-        .max_access_size = 1,
-    },
-};
-
-static inline hwaddr raven_io_address(PREPPCIState *s,
-                                      hwaddr addr)
-{
-    if (s->contiguous_map == 0) {
-        /* 64 KB contiguous space for IOs */
-        addr &= 0xFFFF;
-    } else {
-        /* 8 MB non-contiguous space for IOs */
-        addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7);
-    }
-
-    /* FIXME: handle endianness switch */
-
-    return addr;
-}
-
-static uint64_t raven_io_read(void *opaque, hwaddr addr,
-                              unsigned int size)
-{
-    PREPPCIState *s = opaque;
-    uint8_t buf[4];
-
-    addr = raven_io_address(s, addr);
-    address_space_read(&s->pci_io_as, addr + 0x80000000,
-                       MEMTXATTRS_UNSPECIFIED, buf, size);
-
-    if (size == 1) {
-        return buf[0];
-    } else if (size == 2) {
-        return lduw_le_p(buf);
-    } else if (size == 4) {
-        return ldl_le_p(buf);
-    } else {
-        g_assert_not_reached();
-    }
-}
-
-static void raven_io_write(void *opaque, hwaddr addr,
-                           uint64_t val, unsigned int size)
-{
-    PREPPCIState *s = opaque;
-    uint8_t buf[4];
-
-    addr = raven_io_address(s, addr);
-
-    if (size == 1) {
-        buf[0] = val;
-    } else if (size == 2) {
-        stw_le_p(buf, val);
-    } else if (size == 4) {
-        stl_le_p(buf, val);
-    } else {
-        g_assert_not_reached();
-    }
-
-    address_space_write(&s->pci_io_as, addr + 0x80000000,
-                        MEMTXATTRS_UNSPECIFIED, buf, size);
-}
-
-static const MemoryRegionOps raven_io_ops = {
-    .read = raven_io_read,
-    .write = raven_io_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .impl.max_access_size = 4,
-    .valid.unaligned = true,
-};
-
-static int raven_map_irq(PCIDevice *pci_dev, int irq_num)
-{
-    return (irq_num + (pci_dev->devfn >> 3)) & 1;
-}
-
-static void raven_set_irq(void *opaque, int irq_num, int level)
-{
-    PREPPCIState *s = opaque;
-
-    qemu_set_irq(s->pci_irqs[irq_num], level);
-}
-
-static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque,
-                                             int devfn)
-{
-    PREPPCIState *s = opaque;
-
-    return &s->bm_as;
-}
-
-static void raven_change_gpio(void *opaque, int n, int level)
-{
-    PREPPCIState *s = opaque;
-
-    s->contiguous_map = level;
-}
-
-static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
-{
-    SysBusDevice *dev = SYS_BUS_DEVICE(d);
-    PCIHostState *h = PCI_HOST_BRIDGE(dev);
-    PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev);
-    MemoryRegion *address_space_mem = get_system_memory();
-    int i;
-
-    if (s->is_legacy_prep) {
-        for (i = 0; i < PCI_NUM_PINS; i++) {
-            sysbus_init_irq(dev, &s->pci_irqs[i]);
-        }
-    } else {
-        /* According to PReP specification section 6.1.6 "System Interrupt
-         * Assignments", all PCI interrupts are routed via IRQ 15 */
-        s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
-        object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
-                                &error_fatal);
-        qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
-        sysbus_init_irq(dev, &s->or_irq->out_irq);
-
-        for (i = 0; i < PCI_NUM_PINS; i++) {
-            s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
-        }
-    }
-
-    qdev_init_gpio_in(d, raven_change_gpio, 1);
-
-    pci_bus_irqs(&s->pci_bus, raven_set_irq, raven_map_irq, s, PCI_NUM_PINS);
-
-    memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops, s,
-                          "pci-conf-idx", 4);
-    memory_region_add_subregion(&s->pci_io, 0xcf8, &h->conf_mem);
-
-    memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops, s,
-                          "pci-conf-data", 4);
-    memory_region_add_subregion(&s->pci_io, 0xcfc, &h->data_mem);
-
-    memory_region_init_io(&h->mmcfg, OBJECT(s), &raven_pci_io_ops, s,
-                          "pciio", 0x00400000);
-    memory_region_add_subregion(address_space_mem, 0x80800000, &h->mmcfg);
-
-    memory_region_init_io(&s->pci_intack, OBJECT(s), &raven_intack_ops, s,
-                          "pci-intack", 1);
-    memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack);
-
-    /* TODO Remove once realize propagates to child devices. */
-    qdev_realize(DEVICE(&s->pci_dev), BUS(&s->pci_bus), errp);
-}
-
-static void raven_pcihost_initfn(Object *obj)
-{
-    PCIHostState *h = PCI_HOST_BRIDGE(obj);
-    PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(obj);
-    MemoryRegion *address_space_mem = get_system_memory();
-    DeviceState *pci_dev;
-
-    memory_region_init(&s->pci_io, obj, "pci-io", 0x3f800000);
-    memory_region_init_io(&s->pci_io_non_contiguous, obj, &raven_io_ops, s,
-                          "pci-io-non-contiguous", 0x00800000);
-    memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000);
-    address_space_init(&s->pci_io_as, &s->pci_io, "raven-io");
-
-    /* CPU address space */
-    memory_region_add_subregion(address_space_mem, 0x80000000, &s->pci_io);
-    memory_region_add_subregion_overlap(address_space_mem, 0x80000000,
-                                        &s->pci_io_non_contiguous, 1);
-    memory_region_add_subregion(address_space_mem, 0xc0000000, &s->pci_memory);
-    pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL,
-                             &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS);
-
-    /* Bus master address space */
-    memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB);
-    memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory",
-                             &s->pci_memory, 0,
-                             memory_region_size(&s->pci_memory));
-    memory_region_init_alias(&s->bm_ram_alias, obj, "bm-system",
-                             get_system_memory(), 0, 0x80000000);
-    memory_region_add_subregion(&s->bm, 0         , &s->bm_pci_memory_alias);
-    memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias);
-    address_space_init(&s->bm_as, &s->bm, "raven-bm");
-    pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s);
-
-    h->bus = &s->pci_bus;
-
-    object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_RAVEN_PCI_DEVICE);
-    pci_dev = DEVICE(&s->pci_dev);
-    object_property_set_int(OBJECT(&s->pci_dev), "addr", PCI_DEVFN(0, 0),
-                            NULL);
-    qdev_prop_set_bit(pci_dev, "multifunction", false);
-}
-
-static void raven_realize(PCIDevice *d, Error **errp)
-{
-    RavenPCIState *s = RAVEN_PCI_DEVICE(d);
-    char *filename;
-    int bios_size = -1;
-
-    d->config[0x0C] = 0x08; // cache_line_size
-    d->config[0x0D] = 0x10; // latency_timer
-    d->config[0x34] = 0x00; // capabilities_pointer
-
-    memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
-                                     &error_fatal);
-    memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
-                                &s->bios);
-    if (s->bios_name) {
-        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
-        if (filename) {
-            if (s->elf_machine != EM_NONE) {
-                bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
-                                     NULL, NULL, NULL, 1, s->elf_machine,
-                                     0, 0);
-            }
-            if (bios_size < 0) {
-                bios_size = get_image_size(filename);
-                if (bios_size > 0 && bios_size <= BIOS_SIZE) {
-                    hwaddr bios_addr;
-                    bios_size = (bios_size + 0xfff) & ~0xfff;
-                    bios_addr = (uint32_t)(-BIOS_SIZE);
-                    bios_size = load_image_targphys(filename, bios_addr,
-                                                    bios_size);
-                }
-            }
-        }
-        g_free(filename);
-        if (bios_size < 0 || bios_size > BIOS_SIZE) {
-            memory_region_del_subregion(get_system_memory(), &s->bios);
-            error_setg(errp, "Could not load bios image '%s'", s->bios_name);
-            return;
-        }
-    }
-
-    vmstate_register_ram_global(&s->bios);
-}
-
-static const VMStateDescription vmstate_raven = {
-    .name = "raven",
-    .version_id = 0,
-    .minimum_version_id = 0,
-    .fields = (VMStateField[]) {
-        VMSTATE_PCI_DEVICE(dev, RavenPCIState),
-        VMSTATE_END_OF_LIST()
-    },
-};
-
-static void raven_class_init(ObjectClass *klass, void *data)
-{
-    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    k->realize = raven_realize;
-    k->vendor_id = PCI_VENDOR_ID_MOTOROLA;
-    k->device_id = PCI_DEVICE_ID_MOTOROLA_RAVEN;
-    k->revision = 0x00;
-    k->class_id = PCI_CLASS_BRIDGE_HOST;
-    dc->desc = "PReP Host Bridge - Motorola Raven";
-    dc->vmsd = &vmstate_raven;
-    /*
-     * Reason: PCI-facing part of the host bridge, not usable without
-     * the host-facing part, which can't be device_add'ed, yet.
-     */
-    dc->user_creatable = false;
-}
-
-static const TypeInfo raven_info = {
-    .name = TYPE_RAVEN_PCI_DEVICE,
-    .parent = TYPE_PCI_DEVICE,
-    .instance_size = sizeof(RavenPCIState),
-    .class_init = raven_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
-        { },
-    },
-};
-
-static Property raven_pcihost_properties[] = {
-    DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine,
-                       EM_NONE),
-    DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name),
-    /* Temporary workaround until legacy prep machine is removed */
-    DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep,
-                     false),
-    DEFINE_PROP_END_OF_LIST()
-};
-
-static void raven_pcihost_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
-    dc->realize = raven_pcihost_realizefn;
-    device_class_set_props(dc, raven_pcihost_properties);
-    dc->fw_name = "pci";
-}
-
-static const TypeInfo raven_pcihost_info = {
-    .name = TYPE_RAVEN_PCI_HOST_BRIDGE,
-    .parent = TYPE_PCI_HOST_BRIDGE,
-    .instance_size = sizeof(PREPPCIState),
-    .instance_init = raven_pcihost_initfn,
-    .class_init = raven_pcihost_class_init,
-};
-
-static void raven_register_types(void)
-{
-    type_register_static(&raven_pcihost_info);
-    type_register_static(&raven_info);
-}
-
-type_init(raven_register_types)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 2eb729dff58..ab5a47aff56 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -29,6 +29,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/i386/pc.h"
 #include "hw/pci-host/q35.h"
 #include "hw/qdev-properties.h"
@@ -64,6 +65,8 @@ static void q35_host_realize(DeviceState *dev, Error **errp)
                                 s->mch.address_space_io,
                                 0, TYPE_PCIE_BUS);
     PC_MACHINE(qdev_get_machine())->bus = pci->bus;
+    pci->bypass_iommu =
+        PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu;
     qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal);
 }
 
@@ -318,6 +321,8 @@ static void mch_update_pciexbar(MCHPCIState *mch)
         addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
         break;
     case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD:
+        qemu_log_mask(LOG_GUEST_ERROR, "Q35: Reserved PCIEXBAR LENGTH\n");
+        return;
     default:
         abort();
     }
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
new file mode 100644
index 00000000000..6e514f75eb8
--- /dev/null
+++ b/hw/pci-host/raven.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU PREP PCI host
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2011-2013 Andreas Färber
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_host.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/intc/i8259.h"
+#include "hw/irq.h"
+#include "hw/loader.h"
+#include "hw/or-irq.h"
+#include "elf.h"
+#include "qom/object.h"
+
+#define TYPE_RAVEN_PCI_DEVICE "raven"
+#define TYPE_RAVEN_PCI_HOST_BRIDGE "raven-pcihost"
+
+OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE)
+
+struct RavenPCIState {
+    PCIDevice dev;
+
+    uint32_t elf_machine;
+    char *bios_name;
+    MemoryRegion bios;
+};
+
+typedef struct PRePPCIState PREPPCIState;
+DECLARE_INSTANCE_CHECKER(PREPPCIState, RAVEN_PCI_HOST_BRIDGE,
+                         TYPE_RAVEN_PCI_HOST_BRIDGE)
+
+struct PRePPCIState {
+    PCIHostState parent_obj;
+
+    qemu_or_irq *or_irq;
+    qemu_irq pci_irqs[PCI_NUM_PINS];
+    PCIBus pci_bus;
+    AddressSpace pci_io_as;
+    MemoryRegion pci_io;
+    MemoryRegion pci_io_non_contiguous;
+    MemoryRegion pci_memory;
+    MemoryRegion pci_intack;
+    MemoryRegion bm;
+    MemoryRegion bm_ram_alias;
+    MemoryRegion bm_pci_memory_alias;
+    AddressSpace bm_as;
+    RavenPCIState pci_dev;
+
+    int contiguous_map;
+    bool is_legacy_prep;
+};
+
+#define BIOS_SIZE (1 * MiB)
+
+#define PCI_IO_BASE_ADDR    0x80000000  /* Physical address on main bus */
+
+static inline uint32_t raven_pci_io_config(hwaddr addr)
+{
+    int i;
+
+    for (i = 0; i < 11; i++) {
+        if ((addr & (1 << (11 + i))) != 0) {
+            break;
+        }
+    }
+    return (addr & 0x7ff) |  (i << 11);
+}
+
+static void raven_pci_io_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    PREPPCIState *s = opaque;
+    PCIHostState *phb = PCI_HOST_BRIDGE(s);
+    pci_data_write(phb->bus, raven_pci_io_config(addr), val, size);
+}
+
+static uint64_t raven_pci_io_read(void *opaque, hwaddr addr,
+                                  unsigned int size)
+{
+    PREPPCIState *s = opaque;
+    PCIHostState *phb = PCI_HOST_BRIDGE(s);
+    return pci_data_read(phb->bus, raven_pci_io_config(addr), size);
+}
+
+static const MemoryRegionOps raven_pci_io_ops = {
+    .read = raven_pci_io_read,
+    .write = raven_pci_io_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t raven_intack_read(void *opaque, hwaddr addr,
+                                  unsigned int size)
+{
+    return pic_read_irq(isa_pic);
+}
+
+static void raven_intack_write(void *opaque, hwaddr addr,
+                                        uint64_t data, unsigned size)
+{
+    qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__);
+}
+
+static const MemoryRegionOps raven_intack_ops = {
+    .read = raven_intack_read,
+    .write = raven_intack_write,
+    .valid = {
+        .max_access_size = 1,
+    },
+};
+
+static inline hwaddr raven_io_address(PREPPCIState *s,
+                                      hwaddr addr)
+{
+    if (s->contiguous_map == 0) {
+        /* 64 KB contiguous space for IOs */
+        addr &= 0xFFFF;
+    } else {
+        /* 8 MB non-contiguous space for IOs */
+        addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7);
+    }
+
+    /* FIXME: handle endianness switch */
+
+    return addr;
+}
+
+static uint64_t raven_io_read(void *opaque, hwaddr addr,
+                              unsigned int size)
+{
+    PREPPCIState *s = opaque;
+    uint8_t buf[4];
+
+    addr = raven_io_address(s, addr);
+    address_space_read(&s->pci_io_as, addr + PCI_IO_BASE_ADDR,
+                       MEMTXATTRS_UNSPECIFIED, buf, size);
+
+    if (size == 1) {
+        return buf[0];
+    } else if (size == 2) {
+        return lduw_le_p(buf);
+    } else if (size == 4) {
+        return ldl_le_p(buf);
+    } else {
+        g_assert_not_reached();
+    }
+}
+
+static void raven_io_write(void *opaque, hwaddr addr,
+                           uint64_t val, unsigned int size)
+{
+    PREPPCIState *s = opaque;
+    uint8_t buf[4];
+
+    addr = raven_io_address(s, addr);
+
+    if (size == 1) {
+        buf[0] = val;
+    } else if (size == 2) {
+        stw_le_p(buf, val);
+    } else if (size == 4) {
+        stl_le_p(buf, val);
+    } else {
+        g_assert_not_reached();
+    }
+
+    address_space_write(&s->pci_io_as, addr + PCI_IO_BASE_ADDR,
+                        MEMTXATTRS_UNSPECIFIED, buf, size);
+}
+
+static const MemoryRegionOps raven_io_ops = {
+    .read = raven_io_read,
+    .write = raven_io_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl.max_access_size = 4,
+    .valid.unaligned = true,
+};
+
+static int raven_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    return (irq_num + (pci_dev->devfn >> 3)) & 1;
+}
+
+static void raven_set_irq(void *opaque, int irq_num, int level)
+{
+    PREPPCIState *s = opaque;
+
+    qemu_set_irq(s->pci_irqs[irq_num], level);
+}
+
+static AddressSpace *raven_pcihost_set_iommu(PCIBus *bus, void *opaque,
+                                             int devfn)
+{
+    PREPPCIState *s = opaque;
+
+    return &s->bm_as;
+}
+
+static void raven_change_gpio(void *opaque, int n, int level)
+{
+    PREPPCIState *s = opaque;
+
+    s->contiguous_map = level;
+}
+
+static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
+{
+    SysBusDevice *dev = SYS_BUS_DEVICE(d);
+    PCIHostState *h = PCI_HOST_BRIDGE(dev);
+    PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev);
+    MemoryRegion *address_space_mem = get_system_memory();
+    int i;
+
+    if (s->is_legacy_prep) {
+        for (i = 0; i < PCI_NUM_PINS; i++) {
+            sysbus_init_irq(dev, &s->pci_irqs[i]);
+        }
+    } else {
+        /* According to PReP specification section 6.1.6 "System Interrupt
+         * Assignments", all PCI interrupts are routed via IRQ 15 */
+        s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
+        object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
+                                &error_fatal);
+        qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
+        sysbus_init_irq(dev, &s->or_irq->out_irq);
+
+        for (i = 0; i < PCI_NUM_PINS; i++) {
+            s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
+        }
+    }
+
+    qdev_init_gpio_in(d, raven_change_gpio, 1);
+
+    pci_bus_irqs(&s->pci_bus, raven_set_irq, raven_map_irq, s, PCI_NUM_PINS);
+
+    memory_region_init_io(&h->conf_mem, OBJECT(h), &pci_host_conf_le_ops, s,
+                          "pci-conf-idx", 4);
+    memory_region_add_subregion(&s->pci_io, 0xcf8, &h->conf_mem);
+
+    memory_region_init_io(&h->data_mem, OBJECT(h), &pci_host_data_le_ops, s,
+                          "pci-conf-data", 4);
+    memory_region_add_subregion(&s->pci_io, 0xcfc, &h->data_mem);
+
+    memory_region_init_io(&h->mmcfg, OBJECT(s), &raven_pci_io_ops, s,
+                          "pciio", 0x00400000);
+    memory_region_add_subregion(address_space_mem, 0x80800000, &h->mmcfg);
+
+    memory_region_init_io(&s->pci_intack, OBJECT(s), &raven_intack_ops, s,
+                          "pci-intack", 1);
+    memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->pci_intack);
+
+    /* TODO Remove once realize propagates to child devices. */
+    qdev_realize(DEVICE(&s->pci_dev), BUS(&s->pci_bus), errp);
+}
+
+static void raven_pcihost_initfn(Object *obj)
+{
+    PCIHostState *h = PCI_HOST_BRIDGE(obj);
+    PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(obj);
+    MemoryRegion *address_space_mem = get_system_memory();
+    DeviceState *pci_dev;
+
+    memory_region_init(&s->pci_io, obj, "pci-io", 0x3f800000);
+    memory_region_init_io(&s->pci_io_non_contiguous, obj, &raven_io_ops, s,
+                          "pci-io-non-contiguous", 0x00800000);
+    memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000);
+    address_space_init(&s->pci_io_as, &s->pci_io, "raven-io");
+
+    /* CPU address space */
+    memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR,
+                                &s->pci_io);
+    memory_region_add_subregion_overlap(address_space_mem, PCI_IO_BASE_ADDR,
+                                        &s->pci_io_non_contiguous, 1);
+    memory_region_add_subregion(address_space_mem, 0xc0000000, &s->pci_memory);
+    pci_root_bus_init(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), NULL,
+                      &s->pci_memory, &s->pci_io, 0, TYPE_PCI_BUS);
+
+    /* Bus master address space */
+    memory_region_init(&s->bm, obj, "bm-raven", 4 * GiB);
+    memory_region_init_alias(&s->bm_pci_memory_alias, obj, "bm-pci-memory",
+                             &s->pci_memory, 0,
+                             memory_region_size(&s->pci_memory));
+    memory_region_init_alias(&s->bm_ram_alias, obj, "bm-system",
+                             get_system_memory(), 0, 0x80000000);
+    memory_region_add_subregion(&s->bm, 0         , &s->bm_pci_memory_alias);
+    memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias);
+    address_space_init(&s->bm_as, &s->bm, "raven-bm");
+    pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s);
+
+    h->bus = &s->pci_bus;
+
+    object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_RAVEN_PCI_DEVICE);
+    pci_dev = DEVICE(&s->pci_dev);
+    object_property_set_int(OBJECT(&s->pci_dev), "addr", PCI_DEVFN(0, 0),
+                            NULL);
+    qdev_prop_set_bit(pci_dev, "multifunction", false);
+}
+
+static void raven_realize(PCIDevice *d, Error **errp)
+{
+    RavenPCIState *s = RAVEN_PCI_DEVICE(d);
+    char *filename;
+    int bios_size = -1;
+
+    d->config[0x0C] = 0x08; // cache_line_size
+    d->config[0x0D] = 0x10; // latency_timer
+    d->config[0x34] = 0x00; // capabilities_pointer
+
+    memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
+                                     &error_fatal);
+    memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
+                                &s->bios);
+    if (s->bios_name) {
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
+        if (filename) {
+            if (s->elf_machine != EM_NONE) {
+                bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
+                                     NULL, NULL, NULL, 1, s->elf_machine,
+                                     0, 0);
+            }
+            if (bios_size < 0) {
+                bios_size = get_image_size(filename);
+                if (bios_size > 0 && bios_size <= BIOS_SIZE) {
+                    hwaddr bios_addr;
+                    bios_size = (bios_size + 0xfff) & ~0xfff;
+                    bios_addr = (uint32_t)(-BIOS_SIZE);
+                    bios_size = load_image_targphys(filename, bios_addr,
+                                                    bios_size);
+                }
+            }
+        }
+        g_free(filename);
+        if (bios_size < 0 || bios_size > BIOS_SIZE) {
+            memory_region_del_subregion(get_system_memory(), &s->bios);
+            error_setg(errp, "Could not load bios image '%s'", s->bios_name);
+            return;
+        }
+    }
+
+    vmstate_register_ram_global(&s->bios);
+}
+
+static const VMStateDescription vmstate_raven = {
+    .name = "raven",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, RavenPCIState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void raven_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    k->realize = raven_realize;
+    k->vendor_id = PCI_VENDOR_ID_MOTOROLA;
+    k->device_id = PCI_DEVICE_ID_MOTOROLA_RAVEN;
+    k->revision = 0x00;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+    dc->desc = "PReP Host Bridge - Motorola Raven";
+    dc->vmsd = &vmstate_raven;
+    /*
+     * Reason: PCI-facing part of the host bridge, not usable without
+     * the host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->user_creatable = false;
+}
+
+static const TypeInfo raven_info = {
+    .name = TYPE_RAVEN_PCI_DEVICE,
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(RavenPCIState),
+    .class_init = raven_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    },
+};
+
+static Property raven_pcihost_properties[] = {
+    DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine,
+                       EM_NONE),
+    DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name),
+    /* Temporary workaround until legacy prep machine is removed */
+    DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep,
+                     false),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void raven_pcihost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->realize = raven_pcihost_realizefn;
+    device_class_set_props(dc, raven_pcihost_properties);
+    dc->fw_name = "pci";
+}
+
+static const TypeInfo raven_pcihost_info = {
+    .name = TYPE_RAVEN_PCI_HOST_BRIDGE,
+    .parent = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(PREPPCIState),
+    .instance_init = raven_pcihost_initfn,
+    .class_init = raven_pcihost_class_init,
+};
+
+static void raven_register_types(void)
+{
+    type_register_static(&raven_pcihost_info);
+    type_register_static(&raven_info);
+}
+
+type_init(raven_register_types)
diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c
index f41a0cc3013..949ecc21f2b 100644
--- a/hw/pci-host/sabre.c
+++ b/hw/pci-host/sabre.c
@@ -34,7 +34,6 @@
 #include "hw/irq.h"
 #include "hw/pci-bridge/simba.h"
 #include "hw/pci-host/sabre.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c
index 734892f47c7..719d6ca2a6d 100644
--- a/hw/pci-host/sh_pci.c
+++ b/hw/pci-host/sh_pci.c
@@ -30,7 +30,6 @@
 #include "hw/pci/pci_host.h"
 #include "qemu/bswap.h"
 #include "qemu/module.h"
-#include "exec/address-spaces.h"
 #include "qom/object.h"
 
 #define TYPE_SH_PCI_HOST_BRIDGE "sh_pci"
@@ -50,13 +49,12 @@ struct SHPCIState {
     uint32_t iobr;
 };
 
-static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val,
-                              unsigned size)
+static void sh_pci_reg_write(void *p, hwaddr addr, uint64_t val, unsigned size)
 {
     SHPCIState *pcic = p;
     PCIHostState *phb = PCI_HOST_BRIDGE(pcic);
 
-    switch(addr) {
+    switch (addr) {
     case 0 ... 0xfc:
         stl_le_p(pcic->dev->config + addr, val);
         break;
@@ -76,13 +74,12 @@ static void sh_pci_reg_write (void *p, hwaddr addr, uint64_t val,
     }
 }
 
-static uint64_t sh_pci_reg_read (void *p, hwaddr addr,
-                                 unsigned size)
+static uint64_t sh_pci_reg_read(void *p, hwaddr addr, unsigned size)
 {
     SHPCIState *pcic = p;
     PCIHostState *phb = PCI_HOST_BRIDGE(pcic);
 
-    switch(addr) {
+    switch (addr) {
     case 0 ... 0xfc:
         return ldl_le_p(pcic->dev->config + addr);
     case 0x1c0:
diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events
index 7d8063ac421..630e9fcc5e7 100644
--- a/hw/pci-host/trace-events
+++ b/hw/pci-host/trace-events
@@ -1,8 +1,17 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
+
+# bonito.c
+bonito_spciconf_small_access(uint64_t addr, unsigned size) "PCI config address is smaller then 32-bit, addr: 0x%"PRIx64", size: %u"
 
 # grackle.c
 grackle_set_irq(int irq_num, int level) "set_irq num %d level %d"
 
+# mv64361.c
+mv64361_region_map(const char *name, uint64_t poffs, uint64_t size, uint64_t moffs) "Mapping %s 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64
+mv64361_region_enable(const char *op, int num) "Should %s region %d"
+mv64361_reg_read(uint64_t addr, uint32_t val) "0x%"PRIx64" -> 0x%x"
+mv64361_reg_write(uint64_t addr, uint64_t val) "0x%"PRIx64" <- 0x%"PRIx64
+
 # sabre.c
 sabre_set_request(int irq_num) "request irq %d"
 sabre_clear_request(int irq_num) "clear request irq %d"
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 3553277f941..f66384fa02d 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -405,9 +405,9 @@ static void pci_vpb_realize(DeviceState *dev, Error **errp)
     memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 4 * GiB);
     memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 4 * GiB);
 
-    pci_root_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci",
-                             &s->pci_mem_space, &s->pci_io_space,
-                             PCI_DEVFN(11, 0), TYPE_PCI_BUS);
+    pci_root_bus_init(&s->pci_bus, sizeof(s->pci_bus), dev, "pci",
+                      &s->pci_mem_space, &s->pci_io_space,
+                      PCI_DEVFN(11, 0), TYPE_PCI_BUS);
     h->bus = &s->pci_bus;
 
     object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 8f35e13a0cb..e5993c1ef52 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -45,7 +45,6 @@
 #include "trace.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
-#include "exec/address-spaces.h"
 #include "hw/hotplug.h"
 #include "hw/boards.h"
 #include "qapi/error.h"
@@ -417,10 +416,26 @@ const char *pci_root_bus_path(PCIDevice *dev)
     return rootbus->qbus.name;
 }
 
-static void pci_root_bus_init(PCIBus *bus, DeviceState *parent,
-                              MemoryRegion *address_space_mem,
-                              MemoryRegion *address_space_io,
-                              uint8_t devfn_min)
+bool pci_bus_bypass_iommu(PCIBus *bus)
+{
+    PCIBus *rootbus = bus;
+    PCIHostState *host_bridge;
+
+    if (!pci_bus_is_root(bus)) {
+        rootbus = pci_device_root_bus(bus->parent_dev);
+    }
+
+    host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent);
+
+    assert(host_bridge->bus == rootbus);
+
+    return host_bridge->bypass_iommu;
+}
+
+static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent,
+                                       MemoryRegion *address_space_mem,
+                                       MemoryRegion *address_space_io,
+                                       uint8_t devfn_min)
 {
     assert(PCI_FUNC(devfn_min) == 0);
     bus->devfn_min = devfn_min;
@@ -445,15 +460,15 @@ bool pci_bus_is_express(PCIBus *bus)
     return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS);
 }
 
-void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
-                              const char *name,
-                              MemoryRegion *address_space_mem,
-                              MemoryRegion *address_space_io,
-                              uint8_t devfn_min, const char *typename)
+void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
+                       const char *name,
+                       MemoryRegion *address_space_mem,
+                       MemoryRegion *address_space_io,
+                       uint8_t devfn_min, const char *typename)
 {
-    qbus_create_inplace(bus, bus_size, typename, parent, name);
-    pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
-                      devfn_min);
+    qbus_init(bus, bus_size, typename, parent, name);
+    pci_root_bus_internal_init(bus, parent, address_space_mem,
+                               address_space_io, devfn_min);
 }
 
 PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
@@ -463,9 +478,9 @@ PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
 {
     PCIBus *bus;
 
-    bus = PCI_BUS(qbus_create(typename, parent, name));
-    pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
-                      devfn_min);
+    bus = PCI_BUS(qbus_new(typename, parent, name));
+    pci_root_bus_internal_init(bus, parent, address_space_mem,
+                               address_space_io, devfn_min);
     return bus;
 }
 
@@ -522,6 +537,22 @@ int pci_bus_num(PCIBus *s)
     return PCI_BUS_GET_CLASS(s)->bus_num(s);
 }
 
+/* Returns the min and max bus numbers of a PCI bus hierarchy */
+void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus)
+{
+    int i;
+    *min_bus = *max_bus = pci_bus_num(bus);
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        PCIDevice *dev = bus->devices[i];
+
+        if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) {
+            *min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]);
+            *max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]);
+        }
+    }
+}
+
 int pci_bus_numa_node(PCIBus *bus)
 {
     return PCI_BUS_GET_CLASS(bus)->numa_node(bus);
@@ -1349,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d)
             continue;
 
         new_addr = pci_bar_address(d, i, r->type, r->size);
+        if (!d->has_power) {
+            new_addr = PCI_BAR_UNMAPPED;
+        }
 
         /* This bar isn't changed */
         if (new_addr == r->addr)
@@ -1433,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
     if (range_covers_byte(addr, l, PCI_COMMAND)) {
         pci_update_irq_disabled(d, was_irq_disabled);
         memory_region_set_enabled(&d->bus_master_enable_region,
-                                  pci_get_word(d->config + PCI_COMMAND)
-                                    & PCI_COMMAND_MASTER);
+                                  (pci_get_word(d->config + PCI_COMMAND)
+                                   & PCI_COMMAND_MASTER) && d->has_power);
     }
 
     msi_write_config(d, addr, val_in, l);
@@ -1623,11 +1657,9 @@ static const pci_class_desc pci_class_descriptions[] =
     { 0, NULL}
 };
 
-static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
-                                                  void (*fn)(PCIBus *b,
-                                                             PCIDevice *d,
-                                                             void *opaque),
-                                                  void *opaque)
+void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                           pci_bus_dev_fn fn,
+                                           void *opaque)
 {
     PCIDevice *d;
     int devfn;
@@ -1641,8 +1673,7 @@ static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
 }
 
 void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
-                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
-                         void *opaque)
+                                 pci_bus_dev_fn fn, void *opaque)
 {
     bus = pci_find_bus_nr(bus, bus_num);
 
@@ -1651,10 +1682,8 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
     }
 }
 
-static void pci_for_each_device_under_bus(PCIBus *bus,
-                                          void (*fn)(PCIBus *b, PCIDevice *d,
-                                                     void *opaque),
-                                          void *opaque)
+void pci_for_each_device_under_bus(PCIBus *bus,
+                                   pci_bus_dev_fn fn, void *opaque)
 {
     PCIDevice *d;
     int devfn;
@@ -1668,8 +1697,7 @@ static void pci_for_each_device_under_bus(PCIBus *bus,
 }
 
 void pci_for_each_device(PCIBus *bus, int bus_num,
-                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
-                         void *opaque)
+                         pci_bus_dev_fn fn, void *opaque)
 {
     bus = pci_find_bus_nr(bus, bus_num);
 
@@ -2047,10 +2075,8 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
     return NULL;
 }
 
-void pci_for_each_bus_depth_first(PCIBus *bus,
-                                  void *(*begin)(PCIBus *bus, void *parent_state),
-                                  void (*end)(PCIBus *bus, void *state),
-                                  void *parent_state)
+void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
+                                  pci_bus_fn end, void *parent_state)
 {
     PCIBus *sec;
     void *state;
@@ -2159,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev));
         return;
     }
+
+    pci_set_power(pci_dev, true);
 }
 
 PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
@@ -2719,7 +2747,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
 
         iommu_bus = parent_bus;
     }
-    if (iommu_bus && iommu_bus->iommu_fn) {
+    if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
         return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
     }
     return &address_space_memory;
@@ -2830,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
     return msg;
 }
 
+void pci_set_power(PCIDevice *d, bool state)
+{
+    if (d->has_power == state) {
+        return;
+    }
+
+    d->has_power = state;
+    pci_update_mappings(d);
+    memory_region_set_enabled(&d->bus_master_enable_region,
+                              (pci_get_word(d->config + PCI_COMMAND)
+                               & PCI_COMMAND_MASTER) && d->has_power);
+    if (!d->has_power) {
+        pci_device_reset(d);
+    }
+}
+
 static const TypeInfo pci_device_type_info = {
     .name = TYPE_PCI_DEVICE,
     .parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 3789c17edc2..da34c8ebcd1 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -374,8 +374,8 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
             br->bus_name = dev->qdev.id;
     }
 
-    qbus_create_inplace(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev),
-                        br->bus_name);
+    qbus_init(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev),
+              br->bus_name);
     sec_bus->parent_dev = dev;
     sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn;
     sec_bus->address_space_mem = &br->address_space_mem;
@@ -448,11 +448,11 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
     PCIBridgeQemuCap cap = {
             .len = cap_len,
             .type = REDHAT_PCI_CAP_RESOURCE_RESERVE,
-            .bus_res = res_reserve.bus,
-            .io = res_reserve.io,
-            .mem = res_reserve.mem_non_pref,
-            .mem_pref_32 = res_reserve.mem_pref_32,
-            .mem_pref_64 = res_reserve.mem_pref_64
+            .bus_res = cpu_to_le32(res_reserve.bus),
+            .io = cpu_to_le64(res_reserve.io),
+            .mem = cpu_to_le32(res_reserve.mem_non_pref),
+            .mem_pref_32 = cpu_to_le32(res_reserve.mem_pref_32),
+            .mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64)
     };
 
     int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 8ca5fadcbd4..7beafd40a8e 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return;
     }
 
@@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
     /* non-zero functions are only exposed when function 0 is present,
      * allowing direct removal of unexposed functions.
      */
-    if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+    if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+        !pci_dev->has_power) {
         return ~0x0;
     }
 
@@ -222,6 +224,7 @@ const VMStateDescription vmstate_pcihost = {
 static Property pci_host_properties_common[] = {
     DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState,
                      mig_enabled, true),
+    DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index fd0fa157e81..d7d73a31e4c 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev)
     }
 }
 
+static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+    bool *power = opaque;
+
+    pci_set_power(dev, *power);
+}
+
+static void pcie_cap_update_power(PCIDevice *hotplug_dev)
+{
+    uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev));
+    uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+    bool power = true;
+
+    if (sltcap & PCI_EXP_SLTCAP_PCP) {
+        power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON;
+    }
+
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        pcie_set_power_device, &power);
+}
+
 /*
  * A PCI Express Hot-Plug Event has occurred, so update slot status register
  * and notify OS of the event if necessary.
@@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
             pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
                                        PCI_EXP_LNKSTA_DLLLA);
         }
+        pcie_cap_update_power(hotplug_pdev);
         return;
     }
 
@@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
         }
         pcie_cap_slot_event(hotplug_pdev,
                             PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+        pcie_cap_update_power(hotplug_pdev);
     }
 }
 
@@ -472,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
     object_unparent(OBJECT(dev));
 }
 
+static void pcie_cap_slot_do_unplug(PCIDevice *dev)
+{
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+    uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+    uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+
+    pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL);
+
+    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+                                 PCI_EXP_SLTSTA_PDS);
+    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
+        (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                     PCI_EXP_LNKSTA_DLLLA);
+    }
+    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+                               PCI_EXP_SLTSTA_PDC);
+}
+
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
                                      DeviceState *dev, Error **errp)
 {
@@ -481,6 +525,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
     PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev);
     uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap;
     uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP);
+    uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
 
     /* Check if hot-unplug is disabled on the slot */
     if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) {
@@ -496,7 +541,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
+    if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) {
+        error_setg(errp, "Hot-unplug failed: "
+                   "guest is busy (power indicator blinking)");
+        return;
+    }
+
     dev->pending_deleted_event = true;
+    dev->pending_deleted_expires_ms =
+        qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */
 
     /* In case user cancel the operation of multi-function hot-add,
      * remove the function that is unexposed to guest individually,
@@ -509,6 +562,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
         return;
     }
 
+    if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) &&
+        ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) {
+        /* slot is powered off -> unplug without round-trip to the guest */
+        pcie_cap_slot_do_unplug(hotplug_pdev);
+        hotplug_event_notify(hotplug_pdev);
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+                                     PCI_EXP_SLTSTA_ABP);
+        return;
+    }
+
     pcie_cap_slot_push_attention_button(hotplug_pdev);
 }
 
@@ -529,7 +592,13 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s)
                                PCI_EXP_SLTCAP_PIP |
                                PCI_EXP_SLTCAP_AIP |
                                PCI_EXP_SLTCAP_ABP);
-    if (s->hotplug) {
+
+    /*
+     * Enable native hot-plug on all hot-plugged bridges unless
+     * hot-plug is disabled on the slot.
+     */
+    if (s->hotplug &&
+        (s->native_hotplug || DEVICE(dev)->hotplugged)) {
         pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP,
                                    PCI_EXP_SLTCAP_HPS |
                                    PCI_EXP_SLTCAP_HPC);
@@ -619,6 +688,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
                                  PCI_EXP_SLTSTA_PDC |
                                  PCI_EXP_SLTSTA_ABP);
 
+    pcie_cap_update_power(dev);
     hotplug_event_update_event_status(dev);
 }
 
@@ -637,7 +707,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
     uint32_t pos = dev->exp.exp_cap;
     uint8_t *exp_cap = dev->config + pos;
     uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
-    uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
 
     if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
         /*
@@ -687,20 +756,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
         (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
         (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
         (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
-        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
-        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                            pcie_unplug_device, NULL);
-
-        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
-                                     PCI_EXP_SLTSTA_PDS);
-        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
-            (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
-            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
-                                         PCI_EXP_LNKSTA_DLLLA);
-        }
-        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                                       PCI_EXP_SLTSTA_PDC);
+        pcie_cap_slot_do_unplug(dev);
     }
+    pcie_cap_update_power(dev);
 
     hotplug_event_notify(dev);
 
@@ -727,6 +785,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id)
 {
     PCIDevice *dev = opaque;
     hotplug_event_update_event_status(dev);
+    pcie_cap_update_power(dev);
     return 0;
 }
 
@@ -870,8 +929,8 @@ void pcie_add_capability(PCIDevice *dev,
                          uint16_t offset, uint16_t size)
 {
     assert(offset >= PCI_CONFIG_SPACE_SIZE);
-    assert(offset < offset + size);
-    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
+    assert(offset < (uint16_t)(offset + size));
+    assert((uint16_t)(offset + size) <= PCIE_CONFIG_SPACE_SIZE);
     assert(size >= 8);
     assert(pci_is_express(dev));
 
diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c
index 3534006f993..5abbe832202 100644
--- a/hw/pci/pcie_host.c
+++ b/hw/pci/pcie_host.c
@@ -23,7 +23,6 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pcie_host.h"
 #include "qemu/module.h"
-#include "exec/address-spaces.h"
 
 /* a helper function to get a PCIDevice for a given mmconfig address */
 static inline PCIDevice *pcie_dev_find_by_mmcfg_addr(PCIBus *s,
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index eb563ad4354..e95c1e5519c 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -148,6 +148,7 @@ static Property pcie_slot_props[] = {
     DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
     DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
     DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true),
+    DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index def4b3926d4..fc777d0b5e6 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
 pci_update_mappings_del(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
diff --git a/hw/pcmcia/meson.build b/hw/pcmcia/meson.build
index ab50bd325d6..51f2512b8ed 100644
--- a/hw/pcmcia/meson.build
+++ b/hw/pcmcia/meson.build
@@ -1,2 +1,2 @@
-softmmu_ss.add(files('pcmcia.c'))
+softmmu_ss.add(when: 'CONFIG_PCMCIA', if_true: files('pcmcia.c'))
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c'))
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index d11dc30509d..400511c6b70 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -3,6 +3,7 @@ config PSERIES
     imply PCI_DEVICES
     imply TEST_DEVICES
     imply VIRTIO_VGA
+    imply NVDIMM
     select DIMM
     select PCI
     select SPAPR_VSCSI
@@ -12,6 +13,7 @@ config PSERIES
     select MSI_NONBROKEN
     select FDT_PPC
     select CHRP_NVRAM
+    select VOF
 
 config SPAPR_RNG
     bool
@@ -68,12 +70,23 @@ config SAM460EX
     select USB_OHCI
     select FDT_PPC
 
+config PEGASOS2
+    bool
+    select MV64361
+    select VT82C686
+    select IDE_VIA
+    select SMBUS_EEPROM
+    select VOF
+# This should come with VT82C686
+    select ACPI_X86
+    imply ATI_VGA
+
 config PREP
     bool
     imply PCI_DEVICES
     imply TEST_DEVICES
     select CS4231A
-    select PREP_PCI
+    select RAVEN_PCI
     select I82378
     select LSI_SCSI_PCI
     select M48T59
@@ -118,6 +131,7 @@ config E500
     select SERIAL
     select MPC_I2C
     select FDT_PPC
+    select DS1338
 
 config VIRTEX
     bool
@@ -134,3 +148,6 @@ config FW_CFG_PPC
 
 config FDT_PPC
     bool
+
+config VOF
+    bool
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 79467ac5123..960e7efcd31 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -25,7 +25,6 @@
 #include "qemu/config-file.h"
 #include "hw/char/serial.h"
 #include "hw/pci/pci.h"
-#include "hw/boards.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
@@ -39,7 +38,6 @@
 #include "hw/loader.h"
 #include "elf.h"
 #include "hw/sysbus.h"
-#include "exec/address-spaces.h"
 #include "qemu/host-utils.h"
 #include "qemu/option.h"
 #include "hw/pci-host/ppce500.h"
@@ -1008,7 +1006,7 @@ void ppce500_init(MachineState *machine)
     /* Platform Bus Device */
     if (pmc->has_platform_bus) {
         dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
-        dev->id = TYPE_PLATFORM_BUS_DEVICE;
+        dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
         qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs);
         qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size);
         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 21759628466..7bb7ac39975 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -58,7 +58,6 @@
 #include "hw/pci/pci.h"
 #include "net/net.h"
 #include "sysemu/sysemu.h"
-#include "hw/boards.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/char/escc.h"
 #include "hw/misc/macio/macio.h"
@@ -71,7 +70,6 @@
 #include "sysemu/reset.h"
 #include "kvm_ppc.h"
 #include "hw/usb.h"
-#include "exec/address-spaces.h"
 #include "hw/sysbus.h"
 #include "trace.h"
 
@@ -157,6 +155,10 @@ static void ppc_core99_init(MachineState *machine)
     }
 
     /* allocate RAM */
+    if (machine->ram_size > 2 * GiB) {
+        error_report("RAM size more than 2 GiB is not supported");
+        exit(1);
+    }
     memory_region_add_subregion(get_system_memory(), 0, machine->ram);
 
     /* allocate and load firmware ROM */
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 963d247f5f5..de2be960e6c 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -38,7 +38,6 @@
 #include "hw/isa/isa.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
-#include "hw/boards.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/char/escc.h"
 #include "hw/misc/macio/macio.h"
@@ -49,7 +48,6 @@
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
 #include "kvm_ppc.h"
-#include "exec/address-spaces.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 218631c883b..aa4c8e6a2ea 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -29,6 +29,9 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
   'spapr_numa.c',
   'pef.c',
 ))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(
+  'spapr_softmmu.c',
+))
 ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
 ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
   'spapr_pci_vfio.c',
@@ -78,5 +81,10 @@ ppc_ss.add(when: 'CONFIG_E500', if_true: files(
 ))
 # PowerPC 440 Xilinx ML507 reference board.
 ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c'))
+# Pegasos2
+ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c'))
+
+ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c'))
+ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c'))
 
 hw_arch += {'ppc': ppc_ss}
diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
index 573be3ed79b..cc44d5e3396 100644
--- a/hw/ppc/pef.c
+++ b/hw/ppc/pef.c
@@ -41,7 +41,7 @@ struct PefGuest {
     ConfidentialGuestSupport parent_obj;
 };
 
-static int kvmppc_svm_init(Error **errp)
+static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
 #ifdef CONFIG_KVM
     static Error *pef_mig_blocker;
@@ -65,6 +65,8 @@ static int kvmppc_svm_init(Error **errp)
     /* NB: This can fail if --only-migratable is used */
     migrate_add_blocker(pef_mig_blocker, &error_fatal);
 
+    cgs->ready = true;
+
     return 0;
 #else
     g_assert_not_reached();
@@ -102,7 +104,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
         return -1;
     }
 
-    return kvmppc_svm_init(errp);
+    return kvmppc_svm_init(cgs, errp);
 }
 
 int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
new file mode 100644
index 00000000000..298e6b93e2d
--- /dev/null
+++ b/hw/ppc/pegasos2.c
@@ -0,0 +1,952 @@
+/*
+ * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
+ *
+ * Copyright (c) 2018-2021 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/ppc/ppc.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci_host.h"
+#include "hw/irq.h"
+#include "hw/pci-host/mv64361.h"
+#include "hw/isa/vt82c686.h"
+#include "hw/ide/pci.h"
+#include "hw/i2c/smbus_eeprom.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/qtest.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "exec/address-spaces.h"
+#include "qom/qom-qobject.h"
+#include "qapi/qmp/qdict.h"
+#include "trace.h"
+#include "qemu/datadir.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/vof.h"
+
+#include <libfdt.h>
+
+#define PROM_FILENAME "vof.bin"
+#define PROM_ADDR     0xfff00000
+#define PROM_SIZE     0x80000
+
+#define KVMPPC_HCALL_BASE    0xf000
+#define KVMPPC_H_RTAS        (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT  (KVMPPC_HCALL_BASE + 0x5)
+
+#define H_SUCCESS     0
+#define H_PRIVILEGE  -3  /* Caller not privileged */
+#define H_PARAMETER  -4  /* Parameter invalid, out-of-range or conflicting */
+
+#define BUS_FREQ_HZ 133333333
+
+#define PCI0_CFG_ADDR 0xcf8
+#define PCI0_MEM_BASE 0xc0000000
+#define PCI0_MEM_SIZE 0x20000000
+#define PCI0_IO_BASE  0xf8000000
+#define PCI0_IO_SIZE  0x10000
+
+#define PCI1_CFG_ADDR 0xc78
+#define PCI1_MEM_BASE 0x80000000
+#define PCI1_MEM_SIZE 0x40000000
+#define PCI1_IO_BASE  0xfe000000
+#define PCI1_IO_SIZE  0x10000
+
+#define TYPE_PEGASOS2_MACHINE  MACHINE_TYPE_NAME("pegasos2")
+OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
+
+struct Pegasos2MachineState {
+    MachineState parent_obj;
+    PowerPCCPU *cpu;
+    DeviceState *mv;
+    Vof *vof;
+    void *fdt_blob;
+    uint64_t kernel_addr;
+    uint64_t kernel_entry;
+    uint64_t kernel_size;
+};
+
+static void *build_fdt(MachineState *machine, int *fdt_size);
+
+static void pegasos2_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine);
+
+    cpu_reset(CPU(cpu));
+    cpu->env.spr[SPR_HID1] = 7ULL << 28;
+    if (pm->vof) {
+        cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
+        cpu->env.nip = 0x100;
+    }
+}
+
+static void pegasos2_init(MachineState *machine)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    CPUPPCState *env;
+    MemoryRegion *rom = g_new(MemoryRegion, 1);
+    PCIBus *pci_bus;
+    PCIDevice *dev;
+    I2CBus *i2c_bus;
+    const char *fwname = machine->firmware ?: PROM_FILENAME;
+    char *filename;
+    int sz;
+    uint8_t *spd_data;
+
+    /* init CPU */
+    pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+    env = &pm->cpu->env;
+    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+        error_report("Incompatible CPU, only 6xx bus supported");
+        exit(1);
+    }
+
+    /* Set time-base frequency */
+    cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
+    qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
+
+    /* RAM */
+    if (machine->ram_size > 2 * GiB) {
+        error_report("RAM size more than 2 GiB is not supported");
+        exit(1);
+    }
+    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+    /* allocate and load firmware */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, fwname);
+    if (!filename) {
+        error_report("Could not find firmware '%s'", fwname);
+        exit(1);
+    }
+    if (!machine->firmware && !pm->vof) {
+        pm->vof = g_malloc0(sizeof(*pm->vof));
+    }
+    memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal);
+    memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom);
+    sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1,
+                  PPC_ELF_MACHINE, 0, 0);
+    if (sz <= 0) {
+        sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE);
+    }
+    if (sz <= 0 || sz > PROM_SIZE) {
+        error_report("Could not load firmware '%s'", filename);
+        exit(1);
+    }
+    g_free(filename);
+    if (pm->vof) {
+        pm->vof->fw_size = sz;
+    }
+
+    /* Marvell Discovery II system controller */
+    pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
+                             ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]));
+    pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+
+    /* VIA VT8231 South Bridge (multifunction PCI device) */
+    /* VT8231 function 0: PCI-to-ISA Bridge */
+    dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
+                                          TYPE_VT8231_ISA);
+    qdev_connect_gpio_out(DEVICE(dev), 0,
+                          qdev_get_gpio_in_named(pm->mv, "gpp", 31));
+
+    /* VT8231 function 1: IDE Controller */
+    dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
+    pci_ide_create_devs(dev);
+
+    /* VT8231 function 2-3: USB Ports */
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 2), "vt82c686b-usb-uhci");
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 3), "vt82c686b-usb-uhci");
+
+    /* VT8231 function 4: Power Management Controller */
+    dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 4), TYPE_VT8231_PM);
+    i2c_bus = I2C_BUS(qdev_get_child_bus(DEVICE(dev), "i2c"));
+    spd_data = spd_data_generate(DDR, machine->ram_size);
+    smbus_eeprom_init_one(i2c_bus, 0x57, spd_data);
+
+    /* VT8231 function 5-6: AC97 Audio & Modem */
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 5), TYPE_VIA_AC97);
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 6), TYPE_VIA_MC97);
+
+    /* other PC hardware */
+    pci_vga_init(pci_bus);
+
+    if (machine->kernel_filename) {
+        sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+                      &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1,
+                      PPC_ELF_MACHINE, 0, 0);
+        if (sz <= 0) {
+            error_report("Could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+        pm->kernel_size = sz;
+        if (!pm->vof) {
+            warn_report("Option -kernel may be ineffective with -bios.");
+        }
+    } else if (pm->vof && !qtest_enabled()) {
+        warn_report("Using Virtual OpenFirmware but no -kernel option.");
+    }
+
+    if (!pm->vof && machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+        warn_report("Option -append may be ineffective with -bios.");
+    }
+}
+
+static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm,
+                                     uint32_t addr, uint32_t len)
+{
+    MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+    uint64_t val = 0xffffffffULL;
+    memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE,
+                                MEMTXATTRS_UNSPECIFIED);
+    return val;
+}
+
+static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr,
+                                  uint32_t len, uint32_t val)
+{
+    MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+    memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE,
+                                 MEMTXATTRS_UNSPECIFIED);
+}
+
+static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus,
+                                         uint32_t addr, uint32_t len)
+{
+    hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+    uint64_t val = 0xffffffffULL;
+
+    if (len <= 4) {
+        pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+        val = pegasos2_mv_reg_read(pm, pcicfg + 4, len);
+    }
+    return val;
+}
+
+static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus,
+                                      uint32_t addr, uint32_t len, uint32_t val)
+{
+    hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+
+    pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+    pegasos2_mv_reg_write(pm, pcicfg + 4, len, val);
+}
+
+static void pegasos2_machine_reset(MachineState *machine)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    void *fdt;
+    uint64_t d[2];
+    int sz;
+
+    qemu_devices_reset();
+    if (!pm->vof) {
+        return; /* Firmware should set up machine so nothing to do */
+    }
+
+    /* Otherwise, set up devices that board firmware would normally do */
+    pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff);
+    pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc);
+    pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400);
+    pegasos2_mv_reg_write(pm, 0xf10c, 4, 0x80000000);
+    pegasos2_mv_reg_write(pm, 0x1c, 4, 0x8000000);
+    pegasos2_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO |
+                              PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+    pegasos2_pci_config_write(pm, 1, PCI_COMMAND, 2, PCI_COMMAND_IO |
+                              PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x9);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+                              0x50, 1, 0x2);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x109);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_CLASS_PROG, 1, 0xf);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              0x40, 1, 0xb);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              0x50, 4, 0x17171717);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_COMMAND, 2, 0x87);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x409);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x409);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x9);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x48, 4, 0xf00);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x40, 4, 0x558020);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x90, 4, 0xd00);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 5) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x309);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x309);
+
+    /* Device tree and VOF set up */
+    vof_init(pm->vof, machine->ram_size, &error_fatal);
+    if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) {
+        error_report("Memory allocation for stack failed");
+        exit(1);
+    }
+    if (pm->kernel_size &&
+        vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) {
+        error_report("Memory for kernel is in use");
+        exit(1);
+    }
+    fdt = build_fdt(machine, &sz);
+    /* FIXME: VOF assumes entry is same as load address */
+    d[0] = cpu_to_be64(pm->kernel_entry);
+    d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
+    qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
+
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+    g_free(pm->fdt_blob);
+    pm->fdt_blob = fdt;
+
+    vof_build_dt(fdt, pm->vof);
+    vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe");
+    pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
+}
+
+enum pegasos2_rtas_tokens {
+    RTAS_RESTART_RTAS = 0,
+    RTAS_NVRAM_FETCH = 1,
+    RTAS_NVRAM_STORE = 2,
+    RTAS_GET_TIME_OF_DAY = 3,
+    RTAS_SET_TIME_OF_DAY = 4,
+    RTAS_EVENT_SCAN = 6,
+    RTAS_CHECK_EXCEPTION = 7,
+    RTAS_READ_PCI_CONFIG = 8,
+    RTAS_WRITE_PCI_CONFIG = 9,
+    RTAS_DISPLAY_CHARACTER = 10,
+    RTAS_SET_INDICATOR = 11,
+    RTAS_POWER_OFF = 17,
+    RTAS_SUSPEND = 18,
+    RTAS_HIBERNATE = 19,
+    RTAS_SYSTEM_REBOOT = 20,
+};
+
+static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm,
+                                  target_ulong args_real)
+{
+    AddressSpace *as = CPU(cpu)->as;
+    uint32_t token = ldl_be_phys(as, args_real);
+    uint32_t nargs = ldl_be_phys(as, args_real + 4);
+    uint32_t nrets = ldl_be_phys(as, args_real + 8);
+    uint32_t args = args_real + 12;
+    uint32_t rets = args_real + 12 + nargs * 4;
+
+    if (nrets < 1) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n");
+        return H_PARAMETER;
+    }
+    switch (token) {
+    case RTAS_GET_TIME_OF_DAY:
+    {
+        QObject *qo = object_property_get_qobject(qdev_get_machine(),
+                                                  "rtc-time", &error_fatal);
+        QDict *qd = qobject_to(QDict, qo);
+
+        if (nargs != 0 || nrets != 8 || !qd) {
+            stl_be_phys(as, rets, -1);
+            qobject_unref(qo);
+            return H_PARAMETER;
+        }
+
+        stl_be_phys(as, rets, 0);
+        stl_be_phys(as, rets + 4, qdict_get_int(qd, "tm_year") + 1900);
+        stl_be_phys(as, rets + 8, qdict_get_int(qd, "tm_mon") + 1);
+        stl_be_phys(as, rets + 12, qdict_get_int(qd, "tm_mday"));
+        stl_be_phys(as, rets + 16, qdict_get_int(qd, "tm_hour"));
+        stl_be_phys(as, rets + 20, qdict_get_int(qd, "tm_min"));
+        stl_be_phys(as, rets + 24, qdict_get_int(qd, "tm_sec"));
+        stl_be_phys(as, rets + 28, 0);
+        qobject_unref(qo);
+        return H_SUCCESS;
+    }
+    case RTAS_READ_PCI_CONFIG:
+    {
+        uint32_t addr, len, val;
+
+        if (nargs != 2 || nrets != 2) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        addr = ldl_be_phys(as, args);
+        len = ldl_be_phys(as, args + 4);
+        val = pegasos2_pci_config_read(pm, !(addr >> 24),
+                                       addr & 0x0fffffff, len);
+        stl_be_phys(as, rets, 0);
+        stl_be_phys(as, rets + 4, val);
+        return H_SUCCESS;
+    }
+    case RTAS_WRITE_PCI_CONFIG:
+    {
+        uint32_t addr, len, val;
+
+        if (nargs != 3 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        addr = ldl_be_phys(as, args);
+        len = ldl_be_phys(as, args + 4);
+        val = ldl_be_phys(as, args + 8);
+        pegasos2_pci_config_write(pm, !(addr >> 24),
+                                  addr & 0x0fffffff, len, val);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+    case RTAS_DISPLAY_CHARACTER:
+        if (nargs != 1 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args));
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    case RTAS_POWER_OFF:
+    {
+        if (nargs != 2 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+    default:
+        qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n",
+                      token, nargs, nrets);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+}
+
+static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
+    CPUPPCState *env = &cpu->env;
+
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (msr_pr) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else if (env->gpr[3] == KVMPPC_H_RTAS) {
+        env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]);
+    } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) {
+        int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob,
+                                  env->gpr[4]);
+        env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx
+                      "\n", env->gpr[3]);
+        env->gpr[3] = -1;
+    }
+}
+
+static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+}
+
+static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+    return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1];
+}
+
+static bool pegasos2_setprop(MachineState *ms, const char *path,
+                             const char *propname, void *val, int vallen)
+{
+    return true;
+}
+
+static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
+
+    mc->desc = "Genesi/bPlan Pegasos II";
+    mc->init = pegasos2_init;
+    mc->reset = pegasos2_machine_reset;
+    mc->block_default_type = IF_IDE;
+    mc->default_boot_order = "cd";
+    mc->default_display = "std";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
+    mc->default_ram_id = "pegasos2.ram";
+    mc->default_ram_size = 512 * MiB;
+
+    vhc->hypercall = pegasos2_hypercall;
+    vhc->cpu_exec_enter = vhyp_nop;
+    vhc->cpu_exec_exit = vhyp_nop;
+    vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr;
+
+    vmc->setprop = pegasos2_setprop;
+}
+
+static const TypeInfo pegasos2_machine_info = {
+    .name          = TYPE_PEGASOS2_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .class_init    = pegasos2_machine_class_init,
+    .instance_size = sizeof(Pegasos2MachineState),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_VOF_MACHINE_IF },
+        { }
+    },
+};
+
+static void pegasos2_machine_register_types(void)
+{
+    type_register_static(&pegasos2_machine_info);
+}
+
+type_init(pegasos2_machine_register_types)
+
+/* FDT creation for passing to firmware */
+
+typedef struct {
+    void *fdt;
+    const char *path;
+} FDTInfo;
+
+/* We do everything in reverse order so it comes out right in the tree */
+
+static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi");
+}
+
+static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0);
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1);
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb");
+}
+
+static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    GString *name = g_string_sized_new(64);
+    uint32_t cells[3];
+
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1);
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2);
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa");
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa");
+
+    /* addional devices */
+    g_string_printf(name, "%s/lpt@i3bc", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(7);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x3bc);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt");
+
+    g_string_printf(name, "%s/fdc@i3f0", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(6);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x3f0);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc");
+
+    g_string_printf(name, "%s/timer@i40", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x40);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer");
+
+    g_string_printf(name, "%s/rtc@i70", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc");
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(8);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x70);
+    cells[2] = cpu_to_be32(2);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc");
+
+    g_string_printf(name, "%s/keyboard@i60", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x60);
+    cells[2] = cpu_to_be32(5);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard");
+
+    g_string_printf(name, "%s/8042@i60", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1);
+    qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", "");
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x60);
+    cells[2] = cpu_to_be32(5);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042");
+
+    g_string_printf(name, "%s/serial@i2f8", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(3);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x2f8);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial");
+
+    g_string_free(name, TRUE);
+}
+
+static struct {
+    const char *id;
+    const char *name;
+    void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi);
+} device_map[] = {
+    { "pci11ab,6460", "host", NULL },
+    { "pci1106,8231", "isa", dt_isa },
+    { "pci1106,571", "ide", dt_ide },
+    { "pci1106,3044", "firewire", NULL },
+    { "pci1106,3038", "usb", dt_usb },
+    { "pci1106,8235", "other", NULL },
+    { "pci1106,3058", "sound", NULL },
+    { NULL, NULL }
+};
+
+static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+    FDTInfo *fi = opaque;
+    GString *node = g_string_new(NULL);
+    uint32_t cells[(PCI_NUM_REGIONS + 1) * 5];
+    int i, j;
+    const char *name = NULL;
+    g_autofree const gchar *pn = g_strdup_printf("pci%x,%x",
+                                     pci_get_word(&d->config[PCI_VENDOR_ID]),
+                                     pci_get_word(&d->config[PCI_DEVICE_ID]));
+
+    for (i = 0; device_map[i].id; i++) {
+        if (!strcmp(pn, device_map[i].id)) {
+            name = device_map[i].name;
+            break;
+        }
+    }
+    g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn),
+                    PCI_SLOT(d->devfn));
+    if (PCI_FUNC(d->devfn)) {
+        g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn));
+    }
+
+    qemu_fdt_add_subnode(fi->fdt, node->str);
+    if (device_map[i].dtf) {
+        FDTInfo cfi = { fi->fdt, node->str };
+        device_map[i].dtf(bus, d, &cfi);
+    }
+    cells[0] = cpu_to_be32(d->devfn << 8);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = 0;
+    cells[4] = 0;
+    j = 5;
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        if (!d->io_regions[i].size) {
+            continue;
+        }
+        cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4));
+        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+            cells[j] |= cpu_to_be32(1 << 24);
+        } else {
+            cells[j] |= cpu_to_be32(2 << 24);
+            if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+                cells[j] |= cpu_to_be32(4 << 28);
+            }
+        }
+        cells[j + 1] = 0;
+        cells[j + 2] = 0;
+        cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32);
+        cells[j + 4] = cpu_to_be32(d->io_regions[i].size);
+        j += 5;
+    }
+    qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn);
+    if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) {
+        qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts",
+                              pci_get_byte(&d->config[PCI_INTERRUPT_PIN]));
+    }
+    /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id",
+                          pci_get_word(&d->config[PCI_SUBSYSTEM_ID]));
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id",
+                          pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID]));
+    cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id",
+                          pci_get_word(&d->config[PCI_DEVICE_ID]));
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id",
+                          pci_get_word(&d->config[PCI_VENDOR_ID]));
+
+    g_string_free(node, TRUE);
+}
+
+static void *build_fdt(MachineState *machine, int *fdt_size)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    PowerPCCPU *cpu = pm->cpu;
+    PCIBus *pci_bus;
+    FDTInfo fi;
+    uint32_t cells[16];
+    void *fdt = create_device_tree(fdt_size);
+
+    fi.fdt = fdt;
+
+    /* root node */
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description",
+                            "Pegasos CHRP PowerPC System");
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2");
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH");
+    qemu_fdt_setprop_string(fdt, "/", "revision", "2B");
+    qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2");
+    qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp");
+    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1);
+    qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2");
+
+    /* pci@c0000000 */
+    qemu_fdt_add_subnode(fdt, "/pci@c0000000");
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range",
+                     cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1);
+    cells[0] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[1] = cpu_to_be32(PCI0_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(0x01000000);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = cpu_to_be32(PCI0_IO_BASE);
+    cells[4] = 0;
+    cells[5] = cpu_to_be32(PCI0_IO_SIZE);
+    cells[6] = cpu_to_be32(0x02000000);
+    cells[7] = 0;
+    cells[8] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[9] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[10] = 0;
+    cells[11] = cpu_to_be32(PCI0_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges",
+                     cells, 12 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3);
+    qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci");
+    qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci");
+
+    fi.path = "/pci@c0000000";
+    pci_bus = mv64361_get_pci_bus(pm->mv, 0);
+    pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+    /* pci@80000000 */
+    qemu_fdt_add_subnode(fdt, "/pci@80000000");
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range",
+                     cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0);
+    cells[0] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[1] = cpu_to_be32(PCI1_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge",
+                          0xf1000cb4);
+    cells[0] = cpu_to_be32(0x01000000);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = cpu_to_be32(PCI1_IO_BASE);
+    cells[4] = 0;
+    cells[5] = cpu_to_be32(PCI1_IO_SIZE);
+    cells[6] = cpu_to_be32(0x02000000);
+    cells[7] = 0;
+    cells[8] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[9] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[10] = 0;
+    cells[11] = cpu_to_be32(PCI1_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@80000000", "ranges",
+                     cells, 12 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3);
+    qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci");
+    qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci");
+
+    fi.path = "/pci@80000000";
+    pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+    pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+    qemu_fdt_add_subnode(fdt, "/failsafe");
+    qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
+    qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
+
+    qemu_fdt_add_subnode(fdt, "/rtas");
+    qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "display-character",
+                          RTAS_DISPLAY_CHARACTER);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config",
+                          RTAS_WRITE_PCI_CONFIG);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config",
+                          RTAS_READ_PCI_CONFIG);
+    /* Pegasos2 firmware misspells check-exception and guests use that */
+    qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption",
+                          RTAS_CHECK_EXCEPTION);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day",
+                          RTAS_SET_TIME_OF_DAY);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day",
+                          RTAS_GET_TIME_OF_DAY);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);
+
+    /* cpus */
+    qemu_fdt_add_subnode(fdt, "/cpus");
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+    qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus");
+
+    /* FIXME Get CPU name from CPU object */
+    const char *cp = "/cpus/PowerPC,G4";
+    qemu_fdt_add_subnode(fdt, cp);
+    qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size",
+                          cpu->env.dcache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size",
+                          cpu->env.dcache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size",
+                          cpu->env.icache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
+                          cpu->env.icache_line_size);
+    if (cpu->env.id_tlbs) {
+        qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
+        qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
+        qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
+        qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way);
+        qemu_fdt_setprop_string(fdt, cp, "tlb-split", "");
+    }
+    qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways);
+    qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb);
+    qemu_fdt_setprop_string(fdt, cp, "state", "running");
+    if (cpu->env.insns_flags & PPC_ALTIVEC) {
+        qemu_fdt_setprop_string(fdt, cp, "altivec", "");
+        qemu_fdt_setprop_string(fdt, cp, "data-streams", "");
+    }
+    /*
+     * FIXME What flags do data-streams, external-control and
+     * performance-monitor depend on?
+     */
+    qemu_fdt_setprop_string(fdt, cp, "external-control", "");
+    if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) {
+        qemu_fdt_setprop_string(fdt, cp, "general-purpose", "");
+    }
+    qemu_fdt_setprop_string(fdt, cp, "performance-monitor", "");
+    if (cpu->env.insns_flags & PPC_FLOAT_FRES) {
+        qemu_fdt_setprop_string(fdt, cp, "graphics", "");
+    }
+    qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4);
+    qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency",
+                          cpu->env.tb_env->tb_freq);
+    qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ);
+    qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5);
+    qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]);
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu");
+    qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1);
+
+    /* memory */
+    qemu_fdt_add_subnode(fdt, "/memory@0");
+    cells[0] = 0;
+    cells[1] = cpu_to_be32(machine->ram_size);
+    qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory");
+    qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory");
+
+    qemu_fdt_add_subnode(fdt, "/chosen");
+    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+                            machine->kernel_cmdline ?: "");
+    qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen");
+
+    qemu_fdt_add_subnode(fdt, "/openprom");
+    qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1");
+
+    return fdt;
+}
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 77af846cdfe..71e45515f13 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -32,14 +32,12 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/hw_accel.h"
 #include "target/ppc/cpu.h"
-#include "qemu/log.h"
 #include "hw/ppc/fdt.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/pnv.h"
 #include "hw/ppc/pnv_core.h"
 #include "hw/loader.h"
 #include "hw/nmi.h"
-#include "exec/address-spaces.h"
 #include "qapi/visitor.h"
 #include "monitor/monitor.h"
 #include "hw/intc/intc.h"
@@ -53,7 +51,6 @@
 #include "hw/ppc/pnv_pnor.h"
 
 #include "hw/isa/isa.h"
-#include "hw/boards.h"
 #include "hw/char/serial.h"
 #include "hw/rtc/mc146818rtc.h"
 
@@ -199,7 +196,7 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
-    if (env->spr_cb[SPR_PURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
     }
 
@@ -713,6 +710,25 @@ static void pnv_chip_power10_pic_print_info(PnvChip *chip, Monitor *mon)
     pnv_psi_pic_print_info(&chip10->psi, mon);
 }
 
+/* Always give the first 1GB to chip 0 else we won't boot */
+static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id)
+{
+    MachineState *machine = MACHINE(pnv);
+    uint64_t ram_per_chip;
+
+    assert(machine->ram_size >= 1 * GiB);
+
+    ram_per_chip = machine->ram_size / pnv->num_chips;
+    if (ram_per_chip >= 1 * GiB) {
+        return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+    }
+
+    assert(pnv->num_chips > 1);
+
+    ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1);
+    return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+}
+
 static void pnv_init(MachineState *machine)
 {
     const char *bios_name = machine->firmware ?: FW_FILE_NAME;
@@ -720,6 +736,7 @@ static void pnv_init(MachineState *machine)
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     char *fw_filename;
     long fw_size;
+    uint64_t chip_ram_start = 0;
     int i;
     char *chip_typename;
     DriveInfo *pnor = drive_get(IF_MTD, 0, 0);
@@ -812,9 +829,10 @@ static void pnv_init(MachineState *machine)
      * TODO: should we decide on how many chips we can create based
      * on #cores and Venice vs. Murano vs. Naples chip type etc...,
      */
-    if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 4) {
+    if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 16) {
         error_report("invalid number of chips: '%d'", pnv->num_chips);
-        error_printf("Try '-smp sockets=N'. Valid values are : 1, 2 or 4.\n");
+        error_printf(
+            "Try '-smp sockets=N'. Valid values are : 1, 2, 4, 8 and 16.\n");
         exit(1);
     }
 
@@ -822,22 +840,20 @@ static void pnv_init(MachineState *machine)
     for (i = 0; i < pnv->num_chips; i++) {
         char chip_name[32];
         Object *chip = OBJECT(qdev_new(chip_typename));
+        uint64_t chip_ram_size =  pnv_chip_get_ram_size(pnv, i);
 
         pnv->chips[i] = PNV_CHIP(chip);
 
-        /*
-         * TODO: put all the memory in one node on chip 0 until we find a
-         * way to specify different ranges for each chip
-         */
-        if (i == 0) {
-            object_property_set_int(chip, "ram-size", machine->ram_size,
-                                    &error_fatal);
-        }
+        /* Distribute RAM among the chips  */
+        object_property_set_int(chip, "ram-start", chip_ram_start,
+                                &error_fatal);
+        object_property_set_int(chip, "ram-size", chip_ram_size,
+                                &error_fatal);
+        chip_ram_start += chip_ram_size;
 
-        snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
+        snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
         object_property_add_child(OBJECT(pnv), chip_name, chip);
-        object_property_set_int(chip, "chip-id", PNV_CHIP_HWID(i),
-                                &error_fatal);
+        object_property_set_int(chip, "chip-id", i, &error_fatal);
         object_property_set_int(chip, "nr-cores", machine->smp.cores,
                                 &error_fatal);
         object_property_set_int(chip, "nr-threads", machine->smp.threads,
@@ -1354,10 +1370,10 @@ static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
                                            sizeof(*eq), TYPE_PNV_QUAD,
                                            &error_fatal, NULL);
 
-        object_property_set_int(OBJECT(eq), "id", core_id, &error_fatal);
+        object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
         qdev_realize(DEVICE(eq), NULL, &error_fatal);
 
-        pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->id),
+        pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
                                 &eq->xscom_regs);
     }
 }
@@ -1919,7 +1935,7 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
     static const char compat[] = "qemu,powernv10\0ibm,powernv";
 
     mc->desc = "IBM PowerNV (Non-Virtualized) POWER10";
-    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v1.0");
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
 
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index bd2bf2e0447..19e8eb885f7 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -29,6 +29,7 @@
 #include "hw/ppc/pnv_xscom.h"
 #include "hw/ppc/xics.h"
 #include "hw/qdev-properties.h"
+#include "helper_regs.h"
 
 static const char *pnv_core_cpu_typename(PnvCore *pc)
 {
@@ -55,8 +56,8 @@ static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
     env->gpr[3] = PNV_FDT_ADDR;
     env->nip = 0x10;
     env->msr |= MSR_HVB; /* Hypervisor mode */
-
     env->spr[SPR_HRMOR] = pc->hrmor;
+    hreg_compute_hflags(env);
 
     pcc->intc_reset(pc->chip, cpu);
 }
@@ -346,7 +347,7 @@ static const TypeInfo pnv_core_infos[] = {
     DEFINE_PNV_CORE_TYPE(power8, "power8_v2.0"),
     DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"),
     DEFINE_PNV_CORE_TYPE(power9, "power9_v2.0"),
-    DEFINE_PNV_CORE_TYPE(power10, "power10_v1.0"),
+    DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"),
 };
 
 DEFINE_TYPES(pnv_core_infos)
@@ -406,13 +407,13 @@ static void pnv_quad_realize(DeviceState *dev, Error **errp)
     PnvQuad *eq = PNV_QUAD(dev);
     char name[32];
 
-    snprintf(name, sizeof(name), "xscom-quad.%d", eq->id);
+    snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
     pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
                           eq, name, PNV9_XSCOM_EQ_SIZE);
 }
 
 static Property pnv_quad_properties[] = {
-    DEFINE_PROP_UINT32("id", PnvQuad, id, 0),
+    DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
index 4b455de1ea7..83ecccca28d 100644
--- a/hw/ppc/pnv_pnor.c
+++ b/hw/ppc/pnv_pnor.c
@@ -10,7 +10,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
-#include "qemu/log.h"
 #include "qemu/units.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
@@ -37,7 +36,7 @@ static void pnv_pnor_update(PnvPnor *s, int offset, int size)
     int offset_end;
     int ret;
 
-    if (s->blk) {
+    if (!s->blk || !blk_is_writable(s->blk)) {
         return;
     }
 
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
index 3e868c8c8da..cd9a2c5952a 100644
--- a/hw/ppc/pnv_psi.c
+++ b/hw/ppc/pnv_psi.c
@@ -26,7 +26,6 @@
 #include "qapi/error.h"
 #include "monitor/monitor.h"
 
-#include "exec/address-spaces.h"
 
 #include "hw/ppc/fdt.h"
 #include "hw/ppc/pnv.h"
@@ -466,7 +465,7 @@ static void pnv_psi_reset(DeviceState *dev)
 
 static void pnv_psi_reset_handler(void *dev)
 {
-    device_legacy_reset(DEVICE(dev));
+    device_cold_reset(DEVICE(dev));
 }
 
 static void pnv_psi_realize(DeviceState *dev, Error **errp)
@@ -710,7 +709,7 @@ static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
         break;
     case PSIHB9_INTERRUPT_CONTROL:
         if (val & PSIHB9_IRQ_RESET) {
-            device_legacy_reset(DEVICE(&psi9->source));
+            device_cold_reset(DEVICE(&psi9->source));
         }
         psi->regs[reg] = val;
         break;
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
index be7018e8ac5..9ce018dbc27 100644
--- a/hw/ppc/pnv_xscom.c
+++ b/hw/ppc/pnv_xscom.c
@@ -284,6 +284,12 @@ int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
     _FDT(xscom_offset);
     g_free(name);
     _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+    /*
+     * On P10, the xscom bus id has been deprecated and the chip id is
+     * calculated from the "Primary topology table index". See skiboot.
+     */
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index",
+                           chip->chip_id)));
     _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
     _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
     _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index bf28d6bfc8d..e8127599c90 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -23,7 +23,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "hw/irq.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/ppc_e500.h"
@@ -38,22 +37,6 @@
 #include "migration/vmstate.h"
 #include "trace.h"
 
-//#define PPC_DEBUG_IRQ
-//#define PPC_DEBUG_TB
-
-#ifdef PPC_DEBUG_IRQ
-#  define LOG_IRQ(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
-#else
-#  define LOG_IRQ(...) do { } while (0)
-#endif
-
-
-#ifdef PPC_DEBUG_TB
-#  define LOG_TB(...) qemu_log(__VA_ARGS__)
-#else
-#  define LOG_TB(...) do { } while (0)
-#endif
-
 static void cpu_ppc_tb_stop (CPUPPCState *env);
 static void cpu_ppc_tb_start (CPUPPCState *env);
 
@@ -87,9 +70,8 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
     }
 
 
-    LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
-                "req %08x\n", __func__, env, n_IRQ, level,
-                env->pending_interrupts, CPU(cpu)->interrupt_request);
+    trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts,
+                           CPU(cpu)->interrupt_request);
 
     if (locked) {
         qemu_mutex_unlock_iothread();
@@ -103,8 +85,8 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
     CPUPPCState *env = &cpu->env;
     int cur_level;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-                env, pin, level);
+    trace_ppc_irq_set(env, pin, level);
+
     cur_level = (env->irq_input_state >> pin) & 1;
     /* Don't generate spurious events */
     if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -113,8 +95,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
         switch (pin) {
         case PPC6xx_INPUT_TBEN:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: %s the time base\n",
-                        __func__, level ? "start" : "stop");
+            trace_ppc_irq_set_state("time base", level);
             if (level) {
                 cpu_ppc_tb_start(env);
             } else {
@@ -123,14 +104,12 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
             break;
         case PPC6xx_INPUT_INT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the external IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("external IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC6xx_INPUT_SMI:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the SMI IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("SMI IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
             break;
         case PPC6xx_INPUT_MCP:
@@ -139,8 +118,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
              *            603/604/740/750: check HID0[EMCP]
              */
             if (cur_level == 1 && level == 0) {
-                LOG_IRQ("%s: raise machine check state\n",
-                            __func__);
+                trace_ppc_irq_set_state("machine check", 1);
                 ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
             }
             break;
@@ -149,26 +127,23 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
             /* XXX: TODO: relay the signal to CKSTP_OUT pin */
             /* XXX: Note that the only way to restart the CPU is to reset it */
             if (level) {
-                LOG_IRQ("%s: stop the CPU\n", __func__);
+                trace_ppc_irq_cpu("stop");
                 cs->halted = 1;
             }
             break;
         case PPC6xx_INPUT_HRESET:
             /* Level sensitive - active low */
             if (level) {
-                LOG_IRQ("%s: reset the CPU\n", __func__);
+                trace_ppc_irq_reset("CPU");
                 cpu_interrupt(cs, CPU_INTERRUPT_RESET);
             }
             break;
         case PPC6xx_INPUT_SRESET:
-            LOG_IRQ("%s: set the RESET IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("RESET IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
             break;
         default:
-            /* Unknown pin - do nothing */
-            LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-            return;
+            g_assert_not_reached();
         }
         if (level)
             env->irq_input_state |= 1 << pin;
@@ -193,8 +168,8 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
     CPUPPCState *env = &cpu->env;
     int cur_level;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-                env, pin, level);
+    trace_ppc_irq_set(env, pin, level);
+
     cur_level = (env->irq_input_state >> pin) & 1;
     /* Don't generate spurious events */
     if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -203,14 +178,12 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
         switch (pin) {
         case PPC970_INPUT_INT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the external IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("external IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC970_INPUT_THINT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the SMI IRQ state to %d\n", __func__,
-                        level);
+            trace_ppc_irq_set_state("SMI IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level);
             break;
         case PPC970_INPUT_MCP:
@@ -219,8 +192,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
              *            603/604/740/750: check HID0[EMCP]
              */
             if (cur_level == 1 && level == 0) {
-                LOG_IRQ("%s: raise machine check state\n",
-                            __func__);
+                trace_ppc_irq_set_state("machine check", 1);
                 ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
             }
             break;
@@ -228,10 +200,10 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
             /* Level sensitive - active low */
             /* XXX: TODO: relay the signal to CKSTP_OUT pin */
             if (level) {
-                LOG_IRQ("%s: stop the CPU\n", __func__);
+                trace_ppc_irq_cpu("stop");
                 cs->halted = 1;
             } else {
-                LOG_IRQ("%s: restart the CPU\n", __func__);
+                trace_ppc_irq_cpu("restart");
                 cs->halted = 0;
                 qemu_cpu_kick(cs);
             }
@@ -243,19 +215,15 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
             }
             break;
         case PPC970_INPUT_SRESET:
-            LOG_IRQ("%s: set the RESET IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("RESET IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
             break;
         case PPC970_INPUT_TBEN:
-            LOG_IRQ("%s: set the TBEN state to %d\n", __func__,
-                        level);
+            trace_ppc_irq_set_state("TBEN IRQ", level);
             /* XXX: TODO */
             break;
         default:
-            /* Unknown pin - do nothing */
-            LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-            return;
+            g_assert_not_reached();
         }
         if (level)
             env->irq_input_state |= 1 << pin;
@@ -277,20 +245,16 @@ static void power7_set_irq(void *opaque, int pin, int level)
 {
     PowerPCCPU *cpu = opaque;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-            &cpu->env, pin, level);
+    trace_ppc_irq_set(&cpu->env, pin, level);
 
     switch (pin) {
     case POWER7_INPUT_INT:
         /* Level sensitive - active high */
-        LOG_IRQ("%s: set the external IRQ state to %d\n",
-                __func__, level);
+        trace_ppc_irq_set_state("external IRQ", level);
         ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
         break;
     default:
-        /* Unknown pin - do nothing */
-        LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-        return;
+        g_assert_not_reached();
     }
 }
 
@@ -307,25 +271,21 @@ static void power9_set_irq(void *opaque, int pin, int level)
 {
     PowerPCCPU *cpu = opaque;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-            &cpu->env, pin, level);
+    trace_ppc_irq_set(&cpu->env, pin, level);
 
     switch (pin) {
     case POWER9_INPUT_INT:
         /* Level sensitive - active high */
-        LOG_IRQ("%s: set the external IRQ state to %d\n",
-                __func__, level);
+        trace_ppc_irq_set_state("external IRQ", level);
         ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
         break;
     case POWER9_INPUT_HINT:
         /* Level sensitive - active high */
-        LOG_IRQ("%s: set the external IRQ state to %d\n",
-                __func__, level);
+        trace_ppc_irq_set_state("HV external IRQ", level);
         ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level);
         break;
     default:
-        /* Unknown pin - do nothing */
-        LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
+        g_assert_not_reached();
         return;
     }
 }
@@ -376,6 +336,8 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val)
 {
     PowerPCCPU *cpu = env_archcpu(env);
 
+    qemu_mutex_lock_iothread();
+
     switch ((val >> 28) & 0x3) {
     case 0x0:
         /* No action */
@@ -393,6 +355,8 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val)
         ppc40x_system_reset(cpu);
         break;
     }
+
+    qemu_mutex_unlock_iothread();
 }
 
 /* PowerPC 40x internal IRQ controller */
@@ -402,8 +366,8 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
     CPUPPCState *env = &cpu->env;
     int cur_level;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-                env, pin, level);
+    trace_ppc_irq_set(env, pin, level);
+
     cur_level = (env->irq_input_state >> pin) & 1;
     /* Don't generate spurious events */
     if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
@@ -412,57 +376,51 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
         switch (pin) {
         case PPC40x_INPUT_RESET_SYS:
             if (level) {
-                LOG_IRQ("%s: reset the PowerPC system\n",
-                            __func__);
+                trace_ppc_irq_reset("system");
                 ppc40x_system_reset(cpu);
             }
             break;
         case PPC40x_INPUT_RESET_CHIP:
             if (level) {
-                LOG_IRQ("%s: reset the PowerPC chip\n", __func__);
+                trace_ppc_irq_reset("chip");
                 ppc40x_chip_reset(cpu);
             }
             break;
         case PPC40x_INPUT_RESET_CORE:
             /* XXX: TODO: update DBSR[MRR] */
             if (level) {
-                LOG_IRQ("%s: reset the PowerPC core\n", __func__);
+                trace_ppc_irq_reset("core");
                 ppc40x_core_reset(cpu);
             }
             break;
         case PPC40x_INPUT_CINT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the critical IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("critical IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
             break;
         case PPC40x_INPUT_INT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the external IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("external IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC40x_INPUT_HALT:
             /* Level sensitive - active low */
             if (level) {
-                LOG_IRQ("%s: stop the CPU\n", __func__);
+                trace_ppc_irq_cpu("stop");
                 cs->halted = 1;
             } else {
-                LOG_IRQ("%s: restart the CPU\n", __func__);
+                trace_ppc_irq_cpu("restart");
                 cs->halted = 0;
                 qemu_cpu_kick(cs);
             }
             break;
         case PPC40x_INPUT_DEBUG:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the debug pin state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("debug pin", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
             break;
         default:
-            /* Unknown pin - do nothing */
-            LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-            return;
+            g_assert_not_reached();
         }
         if (level)
             env->irq_input_state |= 1 << pin;
@@ -486,47 +444,41 @@ static void ppce500_set_irq(void *opaque, int pin, int level)
     CPUPPCState *env = &cpu->env;
     int cur_level;
 
-    LOG_IRQ("%s: env %p pin %d level %d\n", __func__,
-                env, pin, level);
+    trace_ppc_irq_set(env, pin, level);
+
     cur_level = (env->irq_input_state >> pin) & 1;
     /* Don't generate spurious events */
     if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
         switch (pin) {
         case PPCE500_INPUT_MCK:
             if (level) {
-                LOG_IRQ("%s: reset the PowerPC system\n",
-                            __func__);
+                trace_ppc_irq_reset("system");
                 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
             }
             break;
         case PPCE500_INPUT_RESET_CORE:
             if (level) {
-                LOG_IRQ("%s: reset the PowerPC core\n", __func__);
+                trace_ppc_irq_reset("core");
                 ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level);
             }
             break;
         case PPCE500_INPUT_CINT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the critical IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("critical IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
             break;
         case PPCE500_INPUT_INT:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the core IRQ state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("core IRQ", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPCE500_INPUT_DEBUG:
             /* Level sensitive - active high */
-            LOG_IRQ("%s: set the debug pin state to %d\n",
-                        __func__, level);
+            trace_ppc_irq_set_state("debug pin", level);
             ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
             break;
         default:
-            /* Unknown pin - do nothing */
-            LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin);
-            return;
+            g_assert_not_reached();
         }
         if (level)
             env->irq_input_state |= 1 << pin;
@@ -577,7 +529,7 @@ uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
     }
 
     tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
-    LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+    trace_ppc_tb_load(tb);
 
     return tb;
 }
@@ -588,7 +540,7 @@ static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env)
     uint64_t tb;
 
     tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
-    LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+    trace_ppc_tb_load(tb);
 
     return tb >> 32;
 }
@@ -608,8 +560,7 @@ static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
     *tb_offsetp = value -
         muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
 
-    LOG_TB("%s: tb %016" PRIx64 " offset %08" PRIx64 "\n",
-                __func__, value, *tb_offsetp);
+    trace_ppc_tb_store(value, *tb_offsetp);
 }
 
 void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value)
@@ -645,7 +596,7 @@ uint64_t cpu_ppc_load_atbl (CPUPPCState *env)
     uint64_t tb;
 
     tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
-    LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+    trace_ppc_tb_load(tb);
 
     return tb;
 }
@@ -656,7 +607,7 @@ uint32_t cpu_ppc_load_atbu (CPUPPCState *env)
     uint64_t tb;
 
     tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
-    LOG_TB("%s: tb %016" PRIx64 "\n", __func__, tb);
+    trace_ppc_tb_load(tb);
 
     return tb >> 32;
 }
@@ -775,7 +726,7 @@ static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
     }  else {
         decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
     }
-    LOG_TB("%s: %016" PRIx64 "\n", __func__, decr);
+    trace_ppc_decr_load(decr);
 
     return decr;
 }
@@ -834,7 +785,7 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env)
 static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu)
 {
     /* Raise it */
-    LOG_TB("raise decrementer exception\n");
+    trace_ppc_decr_excp("raise");
     ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
 }
 
@@ -848,7 +799,7 @@ static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
     CPUPPCState *env = &cpu->env;
 
     /* Raise it */
-    LOG_TB("raise hv decrementer exception\n");
+    trace_ppc_decr_excp("raise HV");
 
     /* The architecture specifies that we don't deliver HDEC
      * interrupts in a PM state. Not only they don't cause a
@@ -874,17 +825,14 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
     CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t now, next;
-    bool negative;
+    int64_t signed_value;
+    int64_t signed_decr;
 
     /* Truncate value to decr_width and sign extend for simplicity */
-    value &= ((1ULL << nr_bits) - 1);
-    negative = !!(value & (1ULL << (nr_bits - 1)));
-    if (negative) {
-        value |= (0xFFFFFFFFULL << nr_bits);
-    }
+    signed_value = sextract64(value, 0, nr_bits);
+    signed_decr = sextract64(decr, 0, nr_bits);
 
-    LOG_TB("%s: " TARGET_FMT_lx " => " TARGET_FMT_lx "\n", __func__,
-                decr, value);
+    trace_ppc_decr_store(nr_bits, decr, value);
 
     if (kvm_enabled()) {
         /* KVM handles decrementer exceptions, we don't need our own timer */
@@ -905,15 +853,15 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
      * an edge interrupt, so raise it here too.
      */
     if ((value < 3) ||
-        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && negative) ||
-        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && negative
-          && !(decr & (1ULL << (nr_bits - 1))))) {
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+          && signed_decr >= 0)) {
         (*raise_excp)(cpu);
         return;
     }
 
     /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
-    if (!negative && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+    if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
         (*lower_excp)(cpu);
     }
 
@@ -1212,9 +1160,8 @@ static void cpu_4xx_fit_cb (void *opaque)
     if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
         ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1);
     }
-    LOG_TB("%s: ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__,
-           (int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
-           env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+    trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
+                         env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
 }
 
 /* Programmable interval timer */
@@ -1228,11 +1175,10 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
         !((env->spr[SPR_40x_TCR] >> 26) & 0x1) ||
         (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) {
         /* Stop PIT */
-        LOG_TB("%s: stop PIT\n", __func__);
+        trace_ppc4xx_pit_stop();
         timer_del(tb_env->decr_timer);
     } else {
-        LOG_TB("%s: start PIT %016" PRIx64 "\n",
-                    __func__, ppc40x_timer->pit_reload);
+        trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
         now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
         next = now + muldiv64(ppc40x_timer->pit_reload,
                               NANOSECONDS_PER_SECOND, tb_env->decr_freq);
@@ -1261,9 +1207,7 @@ static void cpu_4xx_pit_cb (void *opaque)
         ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1);
     }
     start_stop_pit(env, tb_env, 1);
-    LOG_TB("%s: ar %d ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " "
-           "%016" PRIx64 "\n", __func__,
-           (int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
+    trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
            (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1),
            env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR],
            ppc40x_timer->pit_reload);
@@ -1303,8 +1247,7 @@ static void cpu_4xx_wdt_cb (void *opaque)
     next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
     if (next == now)
         next++;
-    LOG_TB("%s: TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__,
-           env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+    trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
     switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
     case 0x0:
     case 0x1:
@@ -1347,7 +1290,7 @@ void store_40x_pit (CPUPPCState *env, target_ulong val)
 
     tb_env = env->tb_env;
     ppc40x_timer = tb_env->opaque;
-    LOG_TB("%s val" TARGET_FMT_lx "\n", __func__, val);
+    trace_ppc40x_store_pit(val);
     ppc40x_timer->pit_reload = val;
     start_stop_pit(env, tb_env, 0);
 }
@@ -1362,8 +1305,7 @@ static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq)
     CPUPPCState *env = opaque;
     ppc_tb_t *tb_env = env->tb_env;
 
-    LOG_TB("%s set new frequency to %" PRIu32 "\n", __func__,
-                freq);
+    trace_ppc40x_set_tb_clk(freq);
     tb_env->tb_freq = freq;
     tb_env->decr_freq = freq;
     /* XXX: we should also update all timers */
@@ -1382,7 +1324,7 @@ clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq,
     tb_env->tb_freq = freq;
     tb_env->decr_freq = freq;
     tb_env->opaque = ppc40x_timer;
-    LOG_TB("%s freq %" PRIu32 "\n", __func__, freq);
+    trace_ppc40x_timers_init(freq);
     if (ppc40x_timer != NULL) {
         /* We use decr timer for PIT */
         tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env);
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index 8f77887fb18..972a7a4a3e5 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -34,15 +34,12 @@
 #include "ppc405.h"
 #include "hw/rtc/m48t59.h"
 #include "hw/block/flash.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "sysemu/reset.h"
 #include "sysemu/block-backend.h"
 #include "hw/boards.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "qemu/cutils.h"
 
 #define BIOS_FILENAME "ppc405_rom.bin"
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index fe047074a17..e632c408bdf 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -34,7 +34,6 @@
 #include "qemu/timer.h"
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
-#include "qemu/log.h"
 #include "exec/address-spaces.h"
 #include "hw/intc/ppc-uic.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index b7539aa7210..7fb620b9a05 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -25,7 +25,6 @@
 #include "sysemu/device_tree.h"
 #include "hw/loader.h"
 #include "elf.h"
-#include "exec/address-spaces.h"
 #include "hw/char/serial.h"
 #include "hw/ppc/ppc.h"
 #include "ppc405.h"
diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index 91cbcd0504a..788d25514a4 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -28,7 +28,6 @@
 #include "hw/ppc/ppc4xx.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "qom/object.h"
 
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
index f6f89058ab7..993e3ba955d 100644
--- a/hw/ppc/ppc440_uc.c
+++ b/hw/ppc/ppc440_uc.c
@@ -14,9 +14,7 @@
 #include "qapi/error.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/irq.h"
-#include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "hw/ppc/ppc.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
index fe9d4f7155e..980c48944fc 100644
--- a/hw/ppc/ppc4xx_devs.c
+++ b/hw/ppc/ppc4xx_devs.c
@@ -29,7 +29,6 @@
 #include "hw/irq.h"
 #include "hw/ppc/ppc.h"
 #include "hw/ppc/ppc4xx.h"
-#include "hw/boards.h"
 #include "hw/intc/ppc-uic.h"
 #include "hw/qdev-properties.h"
 #include "qemu/log.h"
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index e8789f64e80..304a29349c2 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -28,7 +28,6 @@
 #include "sysemu/reset.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "qom/object.h"
 
@@ -49,12 +48,14 @@ OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE)
 #define PPC4xx_PCI_NR_PMMS 3
 #define PPC4xx_PCI_NR_PTMS 2
 
+#define PPC4xx_PCI_NUM_DEVS 5
+
 struct PPC4xxPCIState {
     PCIHostState parent_obj;
 
     struct PCIMasterMap pmm[PPC4xx_PCI_NR_PMMS];
     struct PCITargetMap ptm[PPC4xx_PCI_NR_PTMS];
-    qemu_irq irq[PCI_NUM_PINS];
+    qemu_irq irq[PPC4xx_PCI_NUM_DEVS];
 
     MemoryRegion container;
     MemoryRegion iomem;
@@ -247,7 +248,7 @@ static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
 
     trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot);
 
-    return slot - 1;
+    return slot > 0 ? slot - 1 : PPC4xx_PCI_NUM_DEVS - 1;
 }
 
 static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
@@ -255,7 +256,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
     qemu_irq *pci_irqs = opaque;
 
     trace_ppc4xx_pci_set_irq(irq_num);
-    assert(irq_num >= 0);
+    assert(irq_num >= 0 && irq_num < PPC4xx_PCI_NUM_DEVS);
     qemu_set_irq(pci_irqs[irq_num], level);
 }
 
diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c
index 974c0c8a752..10b643861f2 100644
--- a/hw/ppc/ppc_booke.c
+++ b/hw/ppc/ppc_booke.c
@@ -28,7 +28,6 @@
 #include "qemu/timer.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
-#include "qemu/log.h"
 #include "hw/loader.h"
 #include "kvm_ppc.h"
 
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index f1b1efdcef9..25a2e86b421 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -24,12 +24,10 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "hw/rtc/m48t59.h"
 #include "hw/char/serial.h"
 #include "hw/block/fdc.h"
 #include "net/net.h"
-#include "sysemu/sysemu.h"
 #include "hw/isa/isa.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
@@ -38,15 +36,12 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
-#include "hw/irq.h"
 #include "hw/loader.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/isa/pc87312.h"
 #include "hw/qdev-properties.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 #include "elf.h"
 #include "qemu/units.h"
diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c
index 4db5b51a2dd..c0bc212e924 100644
--- a/hw/ppc/rs6000_mc.c
+++ b/hw/ppc/rs6000_mc.c
@@ -23,7 +23,6 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "exec/address-spaces.h"
-#include "hw/boards.h"
 #include "qapi/error.h"
 #include "trace.h"
 #include "qom/object.h"
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
index 0c6baf77e8c..0737234d66e 100644
--- a/hw/ppc/sam460ex.c
+++ b/hw/ppc/sam460ex.c
@@ -24,7 +24,6 @@
 #include "sysemu/block-backend.h"
 #include "hw/loader.h"
 #include "elf.h"
-#include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "ppc440.h"
 #include "ppc405.h"
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e4be00b732a..3b5fd749be8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hostmem.h"
@@ -54,7 +55,6 @@
 #include "cpu-models.h"
 #include "hw/core/cpu.h"
 
-#include "hw/boards.h"
 #include "hw/ppc/ppc.h"
 #include "hw/loader.h"
 
@@ -70,7 +70,6 @@
 #include "hw/virtio/virtio-scsi.h"
 #include "hw/virtio/vhost-scsi-common.h"
 
-#include "exec/address-spaces.h"
 #include "exec/ram_addr.h"
 #include "hw/usb.h"
 #include "qemu/config-file.h"
@@ -100,9 +99,10 @@
  *
  * We load our kernel at 4M, leaving space for SLOF initial image
  */
-#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
+#define FDT_MAX_ADDR            0x80000000 /* FDT must stay below that */
 #define FW_MAX_SIZE             0x400000
 #define FW_FILE_NAME            "slof.bin"
+#define FW_FILE_NAME_VOF        "vof.bin"
 #define FW_OVERHEAD             0x2800000
 #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
 
@@ -705,10 +705,10 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
 
-    if (env->spr_cb[SPR_PURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
     }
-    if (env->spr_cb[SPR_SPURR].oea_read) {
+    if (ppc_has_spr(cpu, SPR_PURR)) {
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
     }
 
@@ -882,6 +882,10 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
     add_str(hypertas, "hcall-copy");
     add_str(hypertas, "hcall-debug");
     add_str(hypertas, "hcall-vphn");
+    if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) {
+        add_str(hypertas, "hcall-rpt-invalidate");
+    }
+
     add_str(qemu_hypertas, "hcall-memop1");
 
     if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
@@ -921,9 +925,13 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
      *
      * The extra 8 bytes is required because Linux's FWNMI error log check
      * is off-by-one.
+     *
+     * RTAS_MIN_SIZE is required for the RTAS blob itself.
      */
-    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
-			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE +
+                          RTAS_ERROR_LOG_MAX +
+                          ms->smp.max_cpus * sizeof(uint64_t) * 2 +
+                          sizeof(uint64_t)));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
                           RTAS_ERROR_LOG_MAX));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
@@ -981,6 +989,7 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
          */
         val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */
         val[3] = 0x00; /* Hash */
+        spapr_check_mmu_mode(false);
     } else if (kvm_enabled()) {
         if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
             val[3] = 0x80; /* OV5_MMU_BOTH */
@@ -1006,7 +1015,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
     _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
 
     if (reset) {
-        const char *boot_device = machine->boot_order;
+        const char *boot_device = spapr->boot_device;
         char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
         size_t cb = 0;
         char *bootlist = get_boot_devices_list(&cb);
@@ -1405,7 +1414,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
         kvmppc_write_hpte(ptex, pte0, pte1);
     } else {
         if (pte0 & HPTE64_V_VALID) {
-            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+            stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
             /*
              * When setting valid, we write PTE1 first. This ensures
              * proper synchronization with the reading code in
@@ -1421,7 +1430,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
              * ppc_hash64_pteg_search()
              */
             smp_wmb();
-            stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+            stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
         }
     }
 }
@@ -1429,7 +1438,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
 static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
                              uint64_t pte1)
 {
-    hwaddr offset = ptex * HASH_PTE_SIZE_64 + 15;
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
     SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
 
     if (!spapr->htab) {
@@ -1445,7 +1454,7 @@ static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
 static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex,
                              uint64_t pte1)
 {
-    hwaddr offset = ptex * HASH_PTE_SIZE_64 + 14;
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
     SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
 
     if (!spapr->htab) {
@@ -1558,6 +1567,22 @@ void spapr_setup_hpt(SpaprMachineState *spapr)
     }
 }
 
+void spapr_check_mmu_mode(bool guest_radix)
+{
+    if (guest_radix) {
+        if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
+            error_report("Guest requested unavailable MMU mode (radix).");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
+            && !kvmppc_has_cap_mmu_hash_v3()) {
+            error_report("Guest requested unavailable MMU mode (hash).");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
 static void spapr_machine_reset(MachineState *machine)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(machine);
@@ -1617,29 +1642,36 @@ static void spapr_machine_reset(MachineState *machine)
     spapr_clear_pending_events(spapr);
 
     /*
-     * We place the device tree and RTAS just below either the top of the RMA,
+     * We place the device tree just below either the top of the RMA,
      * or just below 2GB, whichever is lower, so that it can be
      * processed with 32-bit real mode code if necessary
      */
-    fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE;
+    fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE;
 
     fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE);
+    if (spapr->vof) {
+        spapr_vof_reset(spapr, fdt, &error_fatal);
+        /*
+         * Do not pack the FDT as the client may change properties.
+         * VOF client does not expect the FDT so we do not load it to the VM.
+         */
+    } else {
+        rc = fdt_pack(fdt);
+        /* Should only fail if we've built a corrupted tree */
+        assert(rc == 0);
 
-    rc = fdt_pack(fdt);
-
-    /* Should only fail if we've built a corrupted tree */
-    assert(rc == 0);
-
-    /* Load the fdt */
+        spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+                                  0, fdt_addr, 0);
+        cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    }
     qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
-    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+
     g_free(spapr->fdt_blob);
     spapr->fdt_size = fdt_totalsize(fdt);
     spapr->fdt_initial_size = spapr->fdt_size;
     spapr->fdt_blob = fdt;
 
     /* Set up the entry state */
-    spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0);
     first_ppc_cpu->env.gpr[5] = 0;
 
     spapr->fwnmi_system_reset_addr = -1;
@@ -2003,6 +2035,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_ccf_assist,
         &vmstate_spapr_cap_fwnmi,
         &vmstate_spapr_fwnmi,
+        &vmstate_spapr_cap_rpt_invalidate,
         NULL
     }
 };
@@ -2361,8 +2394,10 @@ static SaveVMHandlers savevm_htab_handlers = {
 static void spapr_boot_set(void *opaque, const char *boot_device,
                            Error **errp)
 {
-    MachineState *machine = MACHINE(opaque);
-    machine->boot_order = g_strdup(boot_device);
+    SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
+
+    g_free(spapr->boot_device);
+    spapr->boot_device = g_strdup(boot_device);
 }
 
 static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr)
@@ -2640,7 +2675,8 @@ static void spapr_machine_init(MachineState *machine)
     SpaprMachineState *spapr = SPAPR_MACHINE(machine);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
-    const char *bios_name = machine->firmware ?: FW_FILE_NAME;
+    const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME;
+    const char *bios_name = machine->firmware ?: bios_default;
     const char *kernel_filename = machine->kernel_filename;
     const char *initrd_filename = machine->initrd_filename;
     PCIHostState *phb;
@@ -2694,7 +2730,7 @@ static void spapr_machine_init(MachineState *machine)
     spapr->rma_size = spapr_rma_size(spapr, &error_fatal);
 
     /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
-    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
+    load_limit = MIN(spapr->rma_size, FDT_MAX_ADDR) - FW_OVERHEAD;
 
     /*
      * VSMT must be set in order to be able to compute VCPU ids, ie to
@@ -2717,6 +2753,11 @@ static void spapr_machine_init(MachineState *machine)
 
     spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
 
+    /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */
+    if (!smc->pre_6_2_numa_affinity) {
+        spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY);
+    }
+
     /* advertise support for dedicated HP event source to guests */
     if (spapr->use_hotplug_event_source) {
         spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
@@ -2738,39 +2779,6 @@ static void spapr_machine_init(MachineState *machine)
     /* init CPUs */
     spapr_init_cpus(spapr);
 
-    /*
-     * check we don't have a memory-less/cpu-less NUMA node
-     * Firmware relies on the existing memory/cpu topology to provide the
-     * NUMA topology to the kernel.
-     * And the linux kernel needs to know the NUMA topology at start
-     * to be able to hotplug CPUs later.
-     */
-    if (machine->numa_state->num_nodes) {
-        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
-            /* check for memory-less node */
-            if (machine->numa_state->nodes[i].node_mem == 0) {
-                CPUState *cs;
-                int found = 0;
-                /* check for cpu-less node */
-                CPU_FOREACH(cs) {
-                    PowerPCCPU *cpu = POWERPC_CPU(cs);
-                    if (cpu->node_id == i) {
-                        found = 1;
-                        break;
-                    }
-                }
-                /* memory-less and cpu-less node */
-                if (!found) {
-                    error_report(
-                       "Memory-less/cpu-less nodes are not supported (node %d)",
-                                 i);
-                    exit(1);
-                }
-            }
-        }
-
-    }
-
     spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
 
     /* Init numa_assoc_array */
@@ -2997,6 +3005,10 @@ static void spapr_machine_init(MachineState *machine)
     }
 
     qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
+    if (spapr->vof) {
+        spapr->vof->fw_size = fw_size; /* for claim() on itself */
+        spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
+    }
 }
 
 #define DEFAULT_KVM_TYPE "auto"
@@ -3089,7 +3101,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
      */
     if (strcmp("usb-host", qdev_fw_name(dev)) == 0) {
         USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE);
-        if (usb_host_dev_is_scsi_storage(usbdev)) {
+        if (usb_device_is_scsi_storage(usbdev)) {
             return g_strdup_printf("storage@%s/disk", usbdev->port->path);
         }
     }
@@ -3187,6 +3199,28 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
     }
 }
 
+static bool spapr_get_vof(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return spapr->vof != NULL;
+}
+
+static void spapr_set_vof(Object *obj, bool value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (spapr->vof) {
+        vof_cleanup(spapr->vof);
+        g_free(spapr->vof);
+        spapr->vof = NULL;
+    }
+    if (!value) {
+        return;
+    }
+    spapr->vof = g_malloc0(sizeof(*spapr->vof));
+}
+
 static char *spapr_get_ic_mode(Object *obj, Error **errp)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3312,6 +3346,11 @@ static void spapr_instance_init(Object *obj)
                                     stringify(KERNEL_LOAD_ADDR)
                                     " for -kernel is the default");
     spapr->kernel_addr = KERNEL_LOAD_ADDR;
+
+    object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof);
+    object_property_set_description(obj, "x-vof",
+                                    "Enable Virtual Open Firmware (experimental)");
+
     /* The machine class defines the default interrupt controller mode */
     spapr->irq = smc->irq;
     object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
@@ -3620,11 +3659,18 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
 
     /*
      * Tell QAPI that something happened and the memory
-     * hotunplug wasn't successful.
+     * hotunplug wasn't successful. Keep sending
+     * MEM_UNPLUG_ERROR even while sending
+     * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of
+     * MEM_UNPLUG_ERROR is due.
      */
     qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
                                  "for device %s", dev->id);
-    qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+
+    qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
+
+    qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+                                              dev->canonical_path);
 }
 
 /* Callback to be called during DRC release. */
@@ -4475,6 +4521,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
     InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
     XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+    VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant)";
     mc->ignore_boot_device_suffixes = true;
@@ -4487,7 +4534,16 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->init = spapr_machine_init;
     mc->reset = spapr_machine_reset;
     mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 1024;
+
+    /*
+     * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values
+     * should be limited by the host capability instead of hardcoded.
+     * max_cpus for KVM guests will be checked in kvm_init(), and TCG
+     * guests are welcome to have as many CPUs as the host are capable
+     * of emulate.
+     */
+    mc->max_cpus = INT32_MAX;
+
     mc->no_parallel = 1;
     mc->default_boot_order = "";
     mc->default_ram_size = 512 * MiB;
@@ -4547,6 +4603,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
@@ -4554,6 +4611,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->smp_threads_vsmt = true;
     smc->nr_xirqs = SPAPR_NR_XIRQS;
     xfc->match_nvt = spapr_match_nvt;
+    vmc->client_architecture_support = spapr_vof_client_architecture_support;
+    vmc->quiesce = spapr_vof_quiesce;
+    vmc->setprop = spapr_vof_setprop;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -4573,6 +4633,7 @@ static const TypeInfo spapr_machine_info = {
         { TYPE_XICS_FABRIC },
         { TYPE_INTERRUPT_STATS_PROVIDER },
         { TYPE_XIVE_FABRIC },
+        { TYPE_VOF_MACHINE_IF },
         { }
     },
 };
@@ -4604,15 +4665,41 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
     }                                                                \
     type_init(spapr_machine_register_##suffix)
 
+/*
+ * pseries-6.2
+ */
+static void spapr_machine_6_2_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
+
+/*
+ * pseries-6.1
+ */
+static void spapr_machine_6_1_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_6_2_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    smc->pre_6_2_numa_affinity = true;
+    mc->smp_props.prefer_sockets = true;
+}
+
+DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
+
 /*
  * pseries-6.0
  */
 static void spapr_machine_6_0_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    spapr_machine_6_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
 }
 
-DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
+DEFINE_SPAPR_MACHINE(6_0, "6.0", false);
 
 /*
  * pseries-5.2
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9ea7ddd1e9a..ed7c077a0d9 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -371,6 +371,65 @@ static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
     return true;
 }
 
+static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
+                                 bool (*cb)(void *, uint32_t, uint32_t),
+                                 void *opaque)
+{
+    PPCHash64Options *opts = cpu->hash64_opts;
+    int i;
+    int n = 0;
+    bool ci_largepage = false;
+
+    assert(opts);
+
+    n = 0;
+    for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
+        PPCHash64SegmentPageSizes *sps = &opts->sps[i];
+        int j;
+        int m = 0;
+
+        assert(n <= i);
+
+        if (!sps->page_shift) {
+            break;
+        }
+
+        for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
+            PPCHash64PageSize *ps = &sps->enc[j];
+
+            assert(m <= j);
+            if (!ps->page_shift) {
+                break;
+            }
+
+            if (cb(opaque, sps->page_shift, ps->page_shift)) {
+                if (ps->page_shift >= 16) {
+                    ci_largepage = true;
+                }
+                sps->enc[m++] = *ps;
+            }
+        }
+
+        /* Clear rest of the row */
+        for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
+            memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
+        }
+
+        if (m) {
+            n++;
+        }
+    }
+
+    /* Clear the rest of the table */
+    for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
+        memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
+    }
+
+    if (!ci_largepage) {
+        opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
+    }
+}
+
 static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr,
                                           PowerPCCPU *cpu,
                                           uint8_t val, Error **errp)
@@ -523,6 +582,37 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
     }
 }
 
+static void cap_rpt_invalidate_apply(SpaprMachineState *spapr,
+                                     uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!val) {
+        /* capability disabled by default */
+        return;
+    }
+
+    if (tcg_enabled()) {
+        error_setg(errp, "No H_RPT_INVALIDATE support in TCG");
+        error_append_hint(errp,
+                          "Try appending -machine cap-rpt-invalidate=off\n");
+    } else if (kvm_enabled()) {
+        if (!kvmppc_has_cap_mmu_radix()) {
+            error_setg(errp, "H_RPT_INVALIDATE only supported on Radix");
+            return;
+        }
+
+        if (!kvmppc_has_cap_rpt_invalidate()) {
+            error_setg(errp,
+                       "KVM implementation does not support H_RPT_INVALIDATE");
+            error_append_hint(errp,
+                              "Try appending -machine cap-rpt-invalidate=off\n");
+        } else {
+            kvmppc_enable_h_rpt_invalidate();
+        }
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -631,6 +721,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_fwnmi_apply,
     },
+    [SPAPR_CAP_RPT_INVALIDATE] = {
+        .name = "rpt-invalidate",
+        .description = "Allow H_RPT_INVALIDATE",
+        .index = SPAPR_CAP_RPT_INVALIDATE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_rpt_invalidate_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -771,6 +870,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 4f316a6f9d3..58e7341cb78 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -382,6 +382,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
     DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
     DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
     DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
 #ifdef CONFIG_KVM
     DEFINE_SPAPR_CPU_CORE_TYPE("host"),
 #endif
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 9e16505fa1b..f8ac0a10df1 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -13,11 +13,12 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qnull.h"
-#include "cpu.h"
 #include "qemu/cutils.h"
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -151,9 +152,34 @@ static uint32_t drc_isolate_logical(SpaprDrc *drc)
 
 static uint32_t drc_unisolate_logical(SpaprDrc *drc)
 {
+    SpaprMachineState *spapr = NULL;
+
     switch (drc->state) {
     case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
     case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        /*
+         * Unisolating a logical DRC that was marked for unplug
+         * means that the kernel is refusing the removal.
+         */
+        if (drc->unplug_requested && drc->dev) {
+            if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+                spapr = SPAPR_MACHINE(qdev_get_machine());
+
+                spapr_memory_unplug_rollback(spapr, drc->dev);
+            }
+
+            drc->unplug_requested = false;
+
+            if (drc->dev->id) {
+                error_report("Device hotunplug rejected by the guest "
+                             "for device %s", drc->dev->id);
+            }
+
+            qapi_event_send_device_unplug_guest_error(!!drc->dev->id,
+                                                      drc->dev->id,
+                                                      drc->dev->canonical_path);
+        }
+
         return RTAS_OUT_SUCCESS; /* Nothing to do */
     case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
         break; /* see below */
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index d51daedfa6e..630e86282c9 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/runstate.h"
 
@@ -873,7 +872,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUState *cs = CPU(cpu);
     int ret;
-    Error *local_err = NULL;
 
     if (spapr->fwnmi_machine_check_addr == -1) {
         /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
@@ -913,16 +911,17 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
         }
     }
 
-    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
+    /*
+     * Try to block migration while FWNMI is being handled, so the
+     * machine check handler runs where the information passed to it
+     * actually makes sense.  This shouldn't actually block migration,
+     * only delay it slightly, assuming migration is retried.  If the
+     * attempt to block fails, carry on.  Unfortunately, it always
+     * fails when running with -only-migrate.  A proper interface to
+     * delay migration completion for a bit could avoid that.
+     */
+    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL);
     if (ret == -EBUSY) {
-        /*
-         * We don't want to abort so we let the migration to continue.
-         * In a rare case, the machine check handler will run on the target.
-         * Though this is not preferable, it is better than aborting
-         * the migration or killing the VM. It is okay to call
-         * migrate_del_blocker on a blocker that was not added (which the
-         * nmi-interlock handler would do when it's called after this).
-         */
         warn_report("Received a fwnmi while migration was in progress");
     }
 
@@ -935,7 +934,6 @@ static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
                             uint32_t nret, target_ulong rets)
 {
     uint32_t mask, buf, len, event_len;
-    uint64_t xinfo;
     SpaprEventLogEntry *event;
     struct rtas_error_log header;
     int i;
@@ -945,13 +943,9 @@ static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
         return;
     }
 
-    xinfo = rtas_ld(args, 1);
     mask = rtas_ld(args, 2);
     buf = rtas_ld(args, 4);
     len = rtas_ld(args, 5);
-    if (nargs == 7) {
-        xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
-    }
 
     event = rtas_event_log_dequeue(spapr, mask);
     if (!event) {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 7b5cd3553c2..222c1b6bbdb 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -7,7 +7,6 @@
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 #include "helper_regs.h"
 #include "hw/ppc/spapr.h"
@@ -18,27 +17,11 @@
 #include "kvm_ppc.h"
 #include "hw/ppc/fdt.h"
 #include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_numa.h"
 #include "mmu-book3s-v3.h"
 #include "hw/mem/memory-device.h"
 
-static bool has_spr(PowerPCCPU *cpu, int spr)
-{
-    /* We can test whether the SPR is defined by checking for a valid name */
-    return cpu->env.spr_cb[spr].name != NULL;
-}
-
-static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
-{
-    /*
-     * hash value/pteg group index is normalized by HPT mask
-     */
-    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
-        return false;
-    }
-    return true;
-}
-
-static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
 {
     MachineState *machine = MACHINE(spapr);
     DeviceMemoryState *dms = machine->device_memory;
@@ -54,355 +37,6 @@ static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
     return false;
 }
 
-static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                            target_ulong opcode, target_ulong *args)
-{
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong pteh = args[2];
-    target_ulong ptel = args[3];
-    unsigned apshift;
-    target_ulong raddr;
-    target_ulong slot;
-    const ppc_hash_pte64_t *hptes;
-
-    apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
-    if (!apshift) {
-        /* Bad page size encoding */
-        return H_PARAMETER;
-    }
-
-    raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
-
-    if (is_ram_address(spapr, raddr)) {
-        /* Regular RAM - should have WIMG=0010 */
-        if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
-            return H_PARAMETER;
-        }
-    } else {
-        target_ulong wimg_flags;
-        /* Looks like an IO address */
-        /* FIXME: What WIMG combinations could be sensible for IO?
-         * For now we allow WIMG=010x, but are there others? */
-        /* FIXME: Should we check against registered IO addresses? */
-        wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
-
-        if (wimg_flags != HPTE64_R_I &&
-            wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
-            return H_PARAMETER;
-        }
-    }
-
-    pteh &= ~0x60ULL;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    slot = ptex & 7ULL;
-    ptex = ptex & ~7ULL;
-
-    if (likely((flags & H_EXACT) == 0)) {
-        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
-        for (slot = 0; slot < 8; slot++) {
-            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
-                break;
-            }
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-        if (slot == 8) {
-            return H_PTEG_FULL;
-        }
-    } else {
-        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
-        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
-            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
-            return H_PTEG_FULL;
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-    }
-
-    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
-
-    args[0] = ptex + slot;
-    return H_SUCCESS;
-}
-
-typedef enum {
-    REMOVE_SUCCESS = 0,
-    REMOVE_NOT_FOUND = 1,
-    REMOVE_PARM = 2,
-    REMOVE_HW = 3,
-} RemoveResult;
-
-static RemoveResult remove_hpte(PowerPCCPU *cpu
-                                , target_ulong ptex,
-                                target_ulong avpn,
-                                target_ulong flags,
-                                target_ulong *vp, target_ulong *rp)
-{
-    const ppc_hash_pte64_t *hptes;
-    target_ulong v, r;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return REMOVE_PARM;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
-    v = ppc_hash64_hpte0(cpu, hptes, 0);
-    r = ppc_hash64_hpte1(cpu, hptes, 0);
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-
-    if ((v & HPTE64_V_VALID) == 0 ||
-        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
-        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
-        return REMOVE_NOT_FOUND;
-    }
-    *vp = v;
-    *rp = r;
-    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
-    return REMOVE_SUCCESS;
-}
-
-static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                             target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong avpn = args[2];
-    RemoveResult ret;
-
-    ret = remove_hpte(cpu, ptex, avpn, flags,
-                      &args[0], &args[1]);
-
-    switch (ret) {
-    case REMOVE_SUCCESS:
-        check_tlb_flush(env, true);
-        return H_SUCCESS;
-
-    case REMOVE_NOT_FOUND:
-        return H_NOT_FOUND;
-
-    case REMOVE_PARM:
-        return H_PARAMETER;
-
-    case REMOVE_HW:
-        return H_HARDWARE;
-    }
-
-    g_assert_not_reached();
-}
-
-#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
-#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
-#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
-#define   H_BULK_REMOVE_END            0xc000000000000000ULL
-#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
-#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
-#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
-#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
-#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
-#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
-#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
-#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
-#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
-#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
-#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
-
-#define H_BULK_REMOVE_MAX_BATCH        4
-
-static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                                  target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    int i;
-    target_ulong rc = H_SUCCESS;
-
-    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
-        target_ulong *tsh = &args[i*2];
-        target_ulong tsl = args[i*2 + 1];
-        target_ulong v, r, ret;
-
-        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
-            break;
-        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
-            return H_PARAMETER;
-        }
-
-        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
-        *tsh |= H_BULK_REMOVE_RESPONSE;
-
-        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
-            *tsh |= H_BULK_REMOVE_PARM;
-            return H_PARAMETER;
-        }
-
-        ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
-                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
-                          &v, &r);
-
-        *tsh |= ret << 60;
-
-        switch (ret) {
-        case REMOVE_SUCCESS:
-            *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
-            break;
-
-        case REMOVE_PARM:
-            rc = H_PARAMETER;
-            goto exit;
-
-        case REMOVE_HW:
-            rc = H_HARDWARE;
-            goto exit;
-        }
-    }
- exit:
-    check_tlb_flush(env, true);
-
-    return rc;
-}
-
-static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                              target_ulong opcode, target_ulong *args)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    target_ulong avpn = args[2];
-    const ppc_hash_pte64_t *hptes;
-    target_ulong v, r;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
-    v = ppc_hash64_hpte0(cpu, hptes, 0);
-    r = ppc_hash64_hpte1(cpu, hptes, 0);
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
-
-    if ((v & HPTE64_V_VALID) == 0 ||
-        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
-        return H_NOT_FOUND;
-    }
-
-    r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
-           HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
-    r |= (flags << 55) & HPTE64_R_PP0;
-    r |= (flags << 48) & HPTE64_R_KEY_HI;
-    r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    spapr_store_hpte(cpu, ptex,
-                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
-    /* Flush the tlb */
-    check_tlb_flush(env, true);
-    /* Don't need a memory barrier, due to qemu's global lock */
-    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
-    return H_SUCCESS;
-}
-
-static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
-                           target_ulong opcode, target_ulong *args)
-{
-    target_ulong flags = args[0];
-    target_ulong ptex = args[1];
-    int i, ridx, n_entries = 1;
-    const ppc_hash_pte64_t *hptes;
-
-    if (!valid_ptex(cpu, ptex)) {
-        return H_PARAMETER;
-    }
-
-    if (flags & H_READ_4) {
-        /* Clear the two low order bits */
-        ptex &= ~(3ULL);
-        n_entries = 4;
-    }
-
-    hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
-    for (i = 0, ridx = 0; i < n_entries; i++) {
-        args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
-        args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
-    }
-    ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
-
-    return H_SUCCESS;
-}
-
-struct SpaprPendingHpt {
-    /* These fields are read-only after initialization */
-    int shift;
-    QemuThread thread;
-
-    /* These fields are protected by the BQL */
-    bool complete;
-
-    /* These fields are private to the preparation thread if
-     * !complete, otherwise protected by the BQL */
-    int ret;
-    void *hpt;
-};
-
-static void free_pending_hpt(SpaprPendingHpt *pending)
-{
-    if (pending->hpt) {
-        qemu_vfree(pending->hpt);
-    }
-
-    g_free(pending);
-}
-
-static void *hpt_prepare_thread(void *opaque)
-{
-    SpaprPendingHpt *pending = opaque;
-    size_t size = 1ULL << pending->shift;
-
-    pending->hpt = qemu_try_memalign(size, size);
-    if (pending->hpt) {
-        memset(pending->hpt, 0, size);
-        pending->ret = H_SUCCESS;
-    } else {
-        pending->ret = H_NO_MEM;
-    }
-
-    qemu_mutex_lock_iothread();
-
-    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
-        /* Ready to go */
-        pending->complete = true;
-    } else {
-        /* We've been cancelled, clean ourselves up */
-        free_pending_hpt(pending);
-    }
-
-    qemu_mutex_unlock_iothread();
-    return NULL;
-}
-
-/* Must be called with BQL held */
-static void cancel_hpt_prepare(SpaprMachineState *spapr)
-{
-    SpaprPendingHpt *pending = spapr->pending_hpt;
-
-    /* Let the thread know it's cancelled */
-    spapr->pending_hpt = NULL;
-
-    if (!pending) {
-        /* Nothing to do */
-        return;
-    }
-
-    if (!pending->complete) {
-        /* thread will clean itself up */
-        return;
-    }
-
-    free_pending_hpt(pending);
-}
-
 /* Convert a return code from the KVM ioctl()s implementing resize HPT
  * into a PAPR hypercall return code */
 static target_ulong resize_hpt_convert_rc(int ret)
@@ -448,7 +82,6 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
 {
     target_ulong flags = args[0];
     int shift = args[1];
-    SpaprPendingHpt *pending = spapr->pending_hpt;
     uint64_t current_ram_size;
     int rc;
 
@@ -485,182 +118,11 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
         return resize_hpt_convert_rc(rc);
     }
 
-    if (pending) {
-        /* something already in progress */
-        if (pending->shift == shift) {
-            /* and it's suitable */
-            if (pending->complete) {
-                return pending->ret;
-            } else {
-                return H_LONG_BUSY_ORDER_100_MSEC;
-            }
-        }
-
-        /* not suitable, cancel and replace */
-        cancel_hpt_prepare(spapr);
-    }
-
-    if (!shift) {
-        /* nothing to do */
-        return H_SUCCESS;
-    }
-
-    /* start new prepare */
-
-    pending = g_new0(SpaprPendingHpt, 1);
-    pending->shift = shift;
-    pending->ret = H_HARDWARE;
-
-    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
-                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
-
-    spapr->pending_hpt = pending;
-
-    /* In theory we could estimate the time more accurately based on
-     * the new size, but there's not much point */
-    return H_LONG_BUSY_ORDER_100_MSEC;
-}
-
-static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
-{
-    uint8_t *addr = htab;
-
-    addr += pteg * HASH_PTEG_SIZE_64;
-    addr += slot * HASH_PTE_SIZE_64;
-    return  ldq_p(addr);
-}
-
-static void new_hpte_store(void *htab, uint64_t pteg, int slot,
-                           uint64_t pte0, uint64_t pte1)
-{
-    uint8_t *addr = htab;
-
-    addr += pteg * HASH_PTEG_SIZE_64;
-    addr += slot * HASH_PTE_SIZE_64;
-
-    stq_p(addr, pte0);
-    stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1);
-}
-
-static int rehash_hpte(PowerPCCPU *cpu,
-                       const ppc_hash_pte64_t *hptes,
-                       void *old_hpt, uint64_t oldsize,
-                       void *new_hpt, uint64_t newsize,
-                       uint64_t pteg, int slot)
-{
-    uint64_t old_hash_mask = (oldsize >> 7) - 1;
-    uint64_t new_hash_mask = (newsize >> 7) - 1;
-    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
-    target_ulong pte1;
-    uint64_t avpn;
-    unsigned base_pg_shift;
-    uint64_t hash, new_pteg, replace_pte0;
-
-    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
-        return H_SUCCESS;
-    }
-
-    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
-
-    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
-    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
-    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
-
-    if (pte0 & HPTE64_V_SECONDARY) {
-        pteg = ~pteg;
-    }
-
-    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
-        uint64_t offset, vsid;
-
-        /* We only have 28 - 23 bits of offset in avpn */
-        offset = (avpn & 0x1f) << 23;
-        vsid = avpn >> 5;
-        /* We can find more bits from the pteg value */
-        if (base_pg_shift < 23) {
-            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
-        }
-
-        hash = vsid ^ (offset >> base_pg_shift);
-    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
-        uint64_t offset, vsid;
-
-        /* We only have 40 - 23 bits of seg_off in avpn */
-        offset = (avpn & 0x1ffff) << 23;
-        vsid = avpn >> 17;
-        if (base_pg_shift < 23) {
-            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
-                << base_pg_shift;
-        }
-
-        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
-    } else {
-        error_report("rehash_pte: Bad segment size in HPTE");
+    if (kvm_enabled()) {
         return H_HARDWARE;
     }
 
-    new_pteg = hash & new_hash_mask;
-    if (pte0 & HPTE64_V_SECONDARY) {
-        assert(~pteg == (hash & old_hash_mask));
-        new_pteg = ~new_pteg;
-    } else {
-        assert(pteg == (hash & old_hash_mask));
-    }
-    assert((oldsize != newsize) || (pteg == new_pteg));
-    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
-    /*
-     * Strictly speaking, we don't need all these tests, since we only
-     * ever rehash bolted HPTEs.  We might in future handle non-bolted
-     * HPTEs, though so make the logic correct for those cases as
-     * well.
-     */
-    if (replace_pte0 & HPTE64_V_VALID) {
-        assert(newsize < oldsize);
-        if (replace_pte0 & HPTE64_V_BOLTED) {
-            if (pte0 & HPTE64_V_BOLTED) {
-                /* Bolted collision, nothing we can do */
-                return H_PTEG_FULL;
-            } else {
-                /* Discard this hpte */
-                return H_SUCCESS;
-            }
-        }
-    }
-
-    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
-    return H_SUCCESS;
-}
-
-static int rehash_hpt(PowerPCCPU *cpu,
-                      void *old_hpt, uint64_t oldsize,
-                      void *new_hpt, uint64_t newsize)
-{
-    uint64_t n_ptegs = oldsize >> 7;
-    uint64_t pteg;
-    int slot;
-    int rc;
-
-    for (pteg = 0; pteg < n_ptegs; pteg++) {
-        hwaddr ptex = pteg * HPTES_PER_GROUP;
-        const ppc_hash_pte64_t *hptes
-            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
-
-        if (!hptes) {
-            return H_HARDWARE;
-        }
-
-        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
-            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
-                             pteg, slot);
-            if (rc != H_SUCCESS) {
-                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-                return rc;
-            }
-        }
-        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
-    }
-
-    return H_SUCCESS;
+    return softmmu_resize_hpt_prepare(cpu, spapr, shift);
 }
 
 static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
@@ -676,7 +138,7 @@ static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
     }
 }
 
-static void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
 {
     CPUState *cs;
 
@@ -701,9 +163,7 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
 {
     target_ulong flags = args[0];
     target_ulong shift = args[1];
-    SpaprPendingHpt *pending = spapr->pending_hpt;
     int rc;
-    size_t newsize;
 
     if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
         return H_AUTHORITY;
@@ -726,42 +186,14 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
         return rc;
     }
 
-    if (flags != 0) {
-        return H_PARAMETER;
-    }
-
-    if (!pending || (pending->shift != shift)) {
-        /* no matching prepare */
-        return H_CLOSED;
-    }
-
-    if (!pending->complete) {
-        /* prepare has not completed */
-        return H_BUSY;
+    if (kvm_enabled()) {
+        return H_HARDWARE;
     }
 
-    /* Shouldn't have got past PREPARE without an HPT */
-    g_assert(spapr->htab_shift);
-
-    newsize = 1ULL << pending->shift;
-    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
-                    pending->hpt, newsize);
-    if (rc == H_SUCCESS) {
-        qemu_vfree(spapr->htab);
-        spapr->htab = pending->hpt;
-        spapr->htab_shift = pending->shift;
-
-        push_sregs_to_kvm_pr(spapr);
-
-        pending->hpt = NULL; /* so it's not free()d */
-    }
+    return softmmu_resize_hpt_commit(cpu, spapr, flags, shift);
+}
 
-    /* Clean up */
-    spapr->pending_hpt = NULL;
-    free_pending_hpt(pending);
 
-    return rc;
-}
 
 static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                 target_ulong opcode, target_ulong *args)
@@ -775,12 +207,12 @@ static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr,
 static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
-    if (!has_spr(cpu, SPR_DABR)) {
+    if (!ppc_has_spr(cpu, SPR_DABR)) {
         return H_HARDWARE;              /* DABR register not available */
     }
     cpu_synchronize_state(CPU(cpu));
 
-    if (has_spr(cpu, SPR_DABRX)) {
+    if (ppc_has_spr(cpu, SPR_DABRX)) {
         cpu->env.spr[SPR_DABRX] = 0x3;  /* Use Problem and Privileged state */
     } else if (!(args[0] & 0x4)) {      /* Breakpoint Translation set? */
         return H_RESERVED_DABR;
@@ -795,7 +227,7 @@ static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
 {
     target_ulong dabrx = args[1];
 
-    if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) {
+    if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) {
         return H_HARDWARE;
     }
 
@@ -1395,7 +827,13 @@ static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
         return H_P4;
     }
 
-    if (mflags == AIL_RESERVED) {
+    if (mflags == 1) {
+        /* AIL=1 is reserved in POWER8/POWER9/POWER10 */
+        return H_UNSUPPORTED_FLAG;
+    }
+
+    if (mflags == 2 && (pcc->insns_flags2 & PPC2_ISA310)) {
+        /* AIL=2 is reserved in POWER10 (ISA v3.1) */
         return H_UNSUPPORTED_FLAG;
     }
 
@@ -1755,21 +1193,17 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
     spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
     spapr_ovec_cleanup(ov5_guest);
 
-    if (guest_radix) {
-        if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
-            error_report("Guest requested unavailable MMU mode (radix).");
-            exit(EXIT_FAILURE);
-        }
-    } else {
-        if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
-            && !kvmppc_has_cap_mmu_hash_v3()) {
-            error_report("Guest requested unavailable MMU mode (hash).");
-            exit(EXIT_FAILURE);
-        }
-    }
+    spapr_check_mmu_mode(guest_radix);
+
     spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
     spapr_ovec_cleanup(ov1_guest);
 
+    /*
+     * Check for NUMA affinity conditions now that we know which NUMA
+     * affinity the guest will use.
+     */
+    spapr_numa_associativity_check(spapr);
+
     /*
      * Ensure the guest asks for an interrupt mode we support;
      * otherwise terminate the boot.
@@ -1806,8 +1240,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
         spapr_setup_hpt(spapr);
     }
 
-    fdt = spapr_build_fdt(spapr, false, fdt_bufsize);
-
+    fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize);
     g_free(spapr->fdt_blob);
     spapr->fdt_size = fdt_totalsize(fdt);
     spapr->fdt_initial_size = spapr->fdt_size;
@@ -1850,6 +1283,25 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
     return ret;
 }
 
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+                                                   CPUState *cs,
+                                                   target_ulong ovec_addr)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr,
+                                                      ovec_addr, FDT_MAX_SIZE);
+
+    /*
+     * This adds stdout and generates phandles for boottime and CAS FDTs.
+     * It is alright to update the FDT here as do_client_architecture_support()
+     * does not pack it.
+     */
+    spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob);
+
+    return ret;
+}
+
 static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
                                               SpaprMachineState *spapr,
                                               target_ulong opcode,
@@ -1872,6 +1324,8 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
         behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
         break;
     case SPAPR_CAP_FIXED:
+        behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
+        behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
         break;
     default: /* broken */
         assert(safe_cache == SPAPR_CAP_BROKEN);
@@ -2018,16 +1472,34 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
     return H_FUNCTION;
 }
 
-static void hypercall_register_types(void)
+#ifndef CONFIG_TCG
+static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    g_assert_not_reached();
+}
+
+static void hypercall_register_softmmu(void)
 {
     /* hcall-pft */
-    spapr_register_hypercall(H_ENTER, h_enter);
-    spapr_register_hypercall(H_REMOVE, h_remove);
-    spapr_register_hypercall(H_PROTECT, h_protect);
-    spapr_register_hypercall(H_READ, h_read);
+    spapr_register_hypercall(H_ENTER, h_softmmu);
+    spapr_register_hypercall(H_REMOVE, h_softmmu);
+    spapr_register_hypercall(H_PROTECT, h_softmmu);
+    spapr_register_hypercall(H_READ, h_softmmu);
 
     /* hcall-bulk */
-    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+    spapr_register_hypercall(H_BULK_REMOVE, h_softmmu);
+}
+#else
+static void hypercall_register_softmmu(void)
+{
+    /* DO NOTHING */
+}
+#endif
+
+static void hypercall_register_types(void)
+{
+    hypercall_register_softmmu();
 
     /* hcall-hpt-resize */
     spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 24537ffcbd3..db010718589 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -25,7 +25,6 @@
 #include "kvm_ppc.h"
 #include "migration/vmstate.h"
 #include "sysemu/dma.h"
-#include "exec/address-spaces.h"
 #include "trace.h"
 
 #include "hw/ppc/spapr.h"
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 779f18b9943..e9ef7e76469 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,25 +19,88 @@
 /* Moved from hw/ppc/spapr_pci_nvlink2.c */
 #define SPAPR_GPU_NUMA_ID           (cpu_to_be32(1))
 
-static bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
+/*
+ * Retrieves max_dist_ref_points of the current NUMA affinity.
+ */
+static int get_max_dist_ref_points(SpaprMachineState *spapr)
 {
-    MachineState *machine = MACHINE(spapr);
-    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return FORM2_DIST_REF_POINTS;
+    }
+
+    return FORM1_DIST_REF_POINTS;
+}
+
+/*
+ * Retrieves numa_assoc_size of the current NUMA affinity.
+ */
+static int get_numa_assoc_size(SpaprMachineState *spapr)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return FORM2_NUMA_ASSOC_SIZE;
+    }
+
+    return FORM1_NUMA_ASSOC_SIZE;
+}
 
-    return smc->pre_5_2_numa_associativity ||
-           machine->numa_state->num_nodes <= 1;
+/*
+ * Retrieves vcpu_assoc_size of the current NUMA affinity.
+ *
+ * vcpu_assoc_size is the size of ibm,associativity array
+ * for CPUs, which has an extra element (vcpu_id) in the end.
+ */
+static int get_vcpu_assoc_size(SpaprMachineState *spapr)
+{
+    return get_numa_assoc_size(spapr) + 1;
+}
+
+/*
+ * Retrieves the ibm,associativity array of NUMA node 'node_id'
+ * for the current NUMA affinity.
+ */
+static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return spapr->FORM2_assoc_array[node_id];
+    }
+    return spapr->FORM1_assoc_array[node_id];
+}
+
+/*
+ * Wrapper that returns node distance from ms->numa_state->nodes
+ * after handling edge cases where the distance might be absent.
+ */
+static int get_numa_distance(MachineState *ms, int src, int dst)
+{
+    NodeInfo *numa_info = ms->numa_state->nodes;
+    int ret = numa_info[src].distance[dst];
+
+    if (ret != 0) {
+        return ret;
+    }
+
+    /*
+     * In case QEMU adds a default NUMA single node when the user
+     * did not add any, or where the user did not supply distances,
+     * the distance will be absent (zero). Return local/remote
+     * distance in this case.
+     */
+    if (src == dst) {
+        return NUMA_DISTANCE_MIN;
+    }
+
+    return NUMA_DISTANCE_DEFAULT;
 }
 
 static bool spapr_numa_is_symmetrical(MachineState *ms)
 {
-    int src, dst;
     int nb_numa_nodes = ms->numa_state->num_nodes;
-    NodeInfo *numa_info = ms->numa_state->nodes;
+    int src, dst;
 
     for (src = 0; src < nb_numa_nodes; src++) {
         for (dst = src; dst < nb_numa_nodes; dst++) {
-            if (numa_info[src].distance[dst] !=
-                numa_info[dst].distance[src]) {
+            if (get_numa_distance(ms, src, dst) !=
+                get_numa_distance(ms, dst, src)) {
                 return false;
             }
         }
@@ -92,12 +155,22 @@ static uint8_t spapr_numa_get_numa_level(uint8_t distance)
     return 0;
 }
 
-static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
+static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
 {
     MachineState *ms = MACHINE(spapr);
-    NodeInfo *numa_info = ms->numa_state->nodes;
     int nb_numa_nodes = ms->numa_state->num_nodes;
-    int src, dst, i;
+    int src, dst, i, j;
+
+    /*
+     * Fill all associativity domains of non-zero NUMA nodes with
+     * node_id. This is required because the default value (0) is
+     * considered a match with associativity domains of node 0.
+     */
+    for (i = 1; i < nb_numa_nodes; i++) {
+        for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+            spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
+        }
+    }
 
     for (src = 0; src < nb_numa_nodes; src++) {
         for (dst = src; dst < nb_numa_nodes; dst++) {
@@ -121,7 +194,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
              * The PPC kernel expects the associativity domains of node 0 to
              * be always 0, and this algorithm will grant that by default.
              */
-            uint8_t distance = numa_info[src].distance[dst];
+            uint8_t distance = get_numa_distance(ms, src, dst);
             uint8_t n_level = spapr_numa_get_numa_level(distance);
             uint32_t assoc_src;
 
@@ -132,7 +205,7 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
              *
              * The Linux kernel will assume that the distance between src and
              * dst, in this case of no match, is 10 (local distance) doubled
-             * for each NUMA it didn't match. We have MAX_DISTANCE_REF_POINTS
+             * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
              * levels (4), so this gives us 10*2*2*2*2 = 160.
              *
              * This logic can be seen in the Linux kernel source code, as of
@@ -147,25 +220,69 @@ static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
              * and going up to 0x1.
              */
             for (i = n_level; i > 0; i--) {
-                assoc_src = spapr->numa_assoc_array[src][i];
-                spapr->numa_assoc_array[dst][i] = assoc_src;
+                assoc_src = spapr->FORM1_assoc_array[src][i];
+                spapr->FORM1_assoc_array[dst][i] = assoc_src;
             }
         }
     }
 
 }
 
-void spapr_numa_associativity_init(SpaprMachineState *spapr,
-                                   MachineState *machine)
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+    int i;
+
+    /*
+     * Check we don't have a memory-less/cpu-less NUMA node
+     * Firmware relies on the existing memory/cpu topology to provide the
+     * NUMA topology to the kernel.
+     * And the linux kernel needs to know the NUMA topology at start
+     * to be able to hotplug CPUs later.
+     */
+    if (machine->numa_state->num_nodes) {
+        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+            /* check for memory-less node */
+            if (machine->numa_state->nodes[i].node_mem == 0) {
+                CPUState *cs;
+                int found = 0;
+                /* check for cpu-less node */
+                CPU_FOREACH(cs) {
+                    PowerPCCPU *cpu = POWERPC_CPU(cs);
+                    if (cpu->node_id == i) {
+                        found = 1;
+                        break;
+                    }
+                }
+                /* memory-less and cpu-less node */
+                if (!found) {
+                    error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+                    exit(EXIT_FAILURE);
+                }
+            }
+        }
+    }
+
+    if (!spapr_numa_is_symmetrical(machine)) {
+        error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
+        exit(EXIT_FAILURE);
+    }
+}
+
+/*
+ * Set NUMA machine state data based on FORM1 affinity semantics.
+ */
+static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
+                                           MachineState *machine)
 {
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
     int nb_numa_nodes = machine->numa_state->num_nodes;
     int i, j, max_nodes_with_gpus;
-    bool using_legacy_numa = spapr_machine_using_legacy_numa(spapr);
 
     /*
      * For all associativity arrays: first position is the size,
-     * position MAX_DISTANCE_REF_POINTS is always the numa_id,
+     * position FORM1_DIST_REF_POINTS is always the numa_id,
      * represented by the index 'i'.
      *
      * This will break on sparse NUMA setups, when/if QEMU starts
@@ -173,19 +290,8 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
      * 'i' will be a valid node_id set by the user.
      */
     for (i = 0; i < nb_numa_nodes; i++) {
-        spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
-        spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
-
-        /*
-         * Fill all associativity domains of non-zero NUMA nodes with
-         * node_id. This is required because the default value (0) is
-         * considered a match with associativity domains of node 0.
-         */
-        if (!using_legacy_numa && i != 0) {
-            for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
-                spapr->numa_assoc_array[i][j] = cpu_to_be32(i);
-            }
-        }
+        spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+        spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
     }
 
     /*
@@ -199,47 +305,95 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
     max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
 
     for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
-        spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+        spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
 
-        for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
+        for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
             uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
                                  SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
-            spapr->numa_assoc_array[i][j] = gpu_assoc;
+            spapr->FORM1_assoc_array[i][j] = gpu_assoc;
         }
 
-        spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
+        spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
     }
 
     /*
-     * Legacy NUMA guests (pseries-5.1 and older, or guests with only
-     * 1 NUMA node) will not benefit from anything we're going to do
-     * after this point.
+     * Guests pseries-5.1 and older uses zeroed associativity domains,
+     * i.e. no domain definition based on NUMA distance input.
+     *
+     * Same thing with guests that have only one NUMA node.
      */
-    if (using_legacy_numa) {
+    if (smc->pre_5_2_numa_associativity ||
+        machine->numa_state->num_nodes <= 1) {
         return;
     }
 
-    if (!spapr_numa_is_symmetrical(machine)) {
-        error_report("Asymmetrical NUMA topologies aren't supported "
-                     "in the pSeries machine");
-        exit(EXIT_FAILURE);
+    spapr_numa_define_FORM1_domains(spapr);
+}
+
+/*
+ * Init NUMA FORM2 machine state data
+ */
+static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
+{
+    int i;
+
+    /*
+     * For all resources but CPUs, FORM2 associativity arrays will
+     * be a size 2 array with the following format:
+     *
+     * ibm,associativity = {1, numa_id}
+     *
+     * CPUs will write an additional 'vcpu_id' on top of the arrays
+     * being initialized here. 'numa_id' is represented by the
+     * index 'i' of the loop.
+     *
+     * Given that this initialization is also valid for GPU associativity
+     * arrays, handle everything in one single step by populating the
+     * arrays up to NUMA_NODES_MAX_NUM.
+     */
+    for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
+        spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
+        spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i);
+    }
+}
+
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+                                   MachineState *machine)
+{
+    spapr_numa_FORM1_affinity_init(spapr, machine);
+    spapr_numa_FORM2_affinity_init(spapr);
+}
+
+void spapr_numa_associativity_check(SpaprMachineState *spapr)
+{
+    /*
+     * FORM2 does not have any restrictions we need to handle
+     * at CAS time, for now.
+     */
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return;
     }
 
-    spapr_numa_define_associativity_domains(spapr);
+    spapr_numa_FORM1_affinity_check(MACHINE(spapr));
 }
 
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
                                        int offset, int nodeid)
 {
+    const uint32_t *associativity = get_associativity(spapr, nodeid);
+
     _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
-                      spapr->numa_assoc_array[nodeid],
-                      sizeof(spapr->numa_assoc_array[nodeid]))));
+                      associativity,
+                      get_numa_assoc_size(spapr) * sizeof(uint32_t))));
 }
 
 static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
                                            PowerPCCPU *cpu)
 {
-    uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
+    const uint32_t *associativity = get_associativity(spapr, cpu->node_id);
+    int max_distance_ref_points = get_max_dist_ref_points(spapr);
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+    uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size);
     int index = spapr_get_vcpu_id(cpu);
 
     /*
@@ -248,10 +402,10 @@ static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
      * 0, put cpu_id last, then copy the remaining associativity
      * domains.
      */
-    vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
-    vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
-    memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
-           (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));
+    vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1);
+    vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index);
+    memcpy(vcpu_assoc + 1, associativity + 1,
+           (vcpu_assoc_size - 2) * sizeof(uint32_t));
 
     return vcpu_assoc;
 }
@@ -260,12 +414,13 @@ int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
                             int offset, PowerPCCPU *cpu)
 {
     g_autofree uint32_t *vcpu_assoc = NULL;
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
 
     vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
 
     /* Advertise NUMA via ibm,associativity */
     return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
-                       VCPU_ASSOC_SIZE * sizeof(uint32_t));
+                       vcpu_assoc_size * sizeof(uint32_t));
 }
 
 
@@ -273,27 +428,28 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
                                          int offset)
 {
     MachineState *machine = MACHINE(spapr);
+    int max_distance_ref_points = get_max_dist_ref_points(spapr);
     int nb_numa_nodes = machine->numa_state->num_nodes;
     int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
     uint32_t *int_buf, *cur_index, buf_len;
     int ret, i;
 
     /* ibm,associativity-lookup-arrays */
-    buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
+    buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t);
     cur_index = int_buf = g_malloc0(buf_len);
     int_buf[0] = cpu_to_be32(nr_nodes);
      /* Number of entries per associativity list */
-    int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
+    int_buf[1] = cpu_to_be32(max_distance_ref_points);
     cur_index += 2;
     for (i = 0; i < nr_nodes; i++) {
         /*
-         * For the lookup-array we use the ibm,associativity array,
-         * from numa_assoc_array. without the first element (size).
+         * For the lookup-array we use the ibm,associativity array of the
+         * current NUMA affinity, without the first element (size).
          */
-        uint32_t *associativity = spapr->numa_assoc_array[i];
+        const uint32_t *associativity = get_associativity(spapr, i);
         memcpy(cur_index, ++associativity,
-               sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
-        cur_index += MAX_DISTANCE_REF_POINTS;
+               sizeof(uint32_t) * max_distance_ref_points);
+        cur_index += max_distance_ref_points;
     }
     ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
                       (cur_index - int_buf) * sizeof(uint32_t));
@@ -302,12 +458,8 @@ int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
     return ret;
 }
 
-/*
- * Helper that writes ibm,associativity-reference-points and
- * max-associativity-domains in the RTAS pointed by @rtas
- * in the DT @fdt.
- */
-void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
+                                           void *fdt, int rtas)
 {
     MachineState *ms = MACHINE(spapr);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
@@ -329,7 +481,8 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
         cpu_to_be32(maxdomain)
     };
 
-    if (spapr_machine_using_legacy_numa(spapr)) {
+    if (smc->pre_5_2_numa_associativity ||
+        ms->numa_state->num_nodes <= 1) {
         uint32_t legacy_refpoints[] = {
             cpu_to_be32(0x4),
             cpu_to_be32(0x4),
@@ -365,6 +518,113 @@ void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
                      maxdomains, sizeof(maxdomains)));
 }
 
+static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
+                                               void *fdt, int rtas)
+{
+    MachineState *ms = MACHINE(spapr);
+    int nb_numa_nodes = ms->numa_state->num_nodes;
+    int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
+    g_autofree uint32_t *lookup_index_table = NULL;
+    g_autofree uint8_t *distance_table = NULL;
+    int src, dst, i, distance_table_size;
+
+    /*
+     * ibm,numa-lookup-index-table: array with length and a
+     * list of NUMA ids present in the guest.
+     */
+    lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
+    lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        lookup_index_table[i + 1] = cpu_to_be32(i);
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
+                     lookup_index_table,
+                     (nb_numa_nodes + 1) * sizeof(uint32_t)));
+
+    /*
+     * ibm,numa-distance-table: contains all node distances. First
+     * element is the size of the table as uint32, followed up
+     * by all the uint8 distances from the first NUMA node, then all
+     * distances from the second NUMA node and so on.
+     *
+     * ibm,numa-lookup-index-table is used by guest to navigate this
+     * array because NUMA ids can be sparse (node 0 is the first,
+     * node 8 is the second ...).
+     */
+    distance_table_size = distance_table_entries * sizeof(uint8_t) +
+                          sizeof(uint32_t);
+    distance_table = g_new0(uint8_t, distance_table_size);
+    stl_be_p(distance_table, distance_table_entries);
+
+    /* Skip the uint32_t array length at the start */
+    i = sizeof(uint32_t);
+
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = 0; dst < nb_numa_nodes; dst++) {
+            distance_table[i++] = get_numa_distance(ms, src, dst);
+        }
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
+                     distance_table, distance_table_size));
+}
+
+/*
+ * This helper could be compressed in a single function with
+ * FORM1 logic since we're setting the same DT values, with the
+ * difference being a call to spapr_numa_FORM2_write_rtas_tables()
+ * in the end. The separation was made to avoid clogging FORM1 code
+ * which already has to deal with compat modes from previous
+ * QEMU machine types.
+ */
+static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
+                                           void *fdt, int rtas)
+{
+    MachineState *ms = MACHINE(spapr);
+    uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+                                   spapr_numa_initial_nvgpu_numa_id(ms);
+
+    /*
+     * In FORM2, ibm,associativity-reference-points will point to
+     * the element in the ibm,associativity array that contains the
+     * primary domain index (for FORM2, the first element).
+     *
+     * This value (in our case, the numa-id) is then used as an index
+     * to retrieve all other attributes of the node (distance,
+     * bandwidth, latency) via ibm,numa-lookup-index-table and other
+     * ibm,numa-*-table properties.
+     */
+    uint32_t refpoints[] = { cpu_to_be32(1) };
+
+    uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+    uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+                     refpoints, sizeof(refpoints)));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+                     maxdomains, sizeof(maxdomains)));
+
+    spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas);
+}
+
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas);
+        return;
+    }
+
+    spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
+}
+
 static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
                                               SpaprMachineState *spapr,
                                               target_ulong opcode,
@@ -375,6 +635,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
     target_ulong procno = args[1];
     PowerPCCPU *tcpu;
     int idx, assoc_idx;
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
 
     /* only support procno from H_REGISTER_VPA */
     if (flags != 0x1) {
@@ -393,7 +654,7 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
      * 12 associativity domains for vcpus. Assert and bail if that's
      * not the case.
      */
-    G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);
+    g_assert((vcpu_assoc_size - 1) <= 12);
 
     vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
     /* assoc_idx starts at 1 to skip associativity size */
@@ -414,9 +675,9 @@ static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
          * macro. The ternary will fill the remaining registers with -1
          * after we went through vcpu_assoc[].
          */
-        a = assoc_idx < VCPU_ASSOC_SIZE ?
+        a = assoc_idx < vcpu_assoc_size ?
             be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
-        b = assoc_idx < VCPU_ASSOC_SIZE ?
+        b = assoc_idx < vcpu_assoc_size ?
             be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
 
         args[idx] = ASSOCIATIVITY(a, b);
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index b46c36917c9..91de1052f23 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -31,6 +31,22 @@
 #include "qemu/range.h"
 #include "hw/ppc/spapr_numa.h"
 
+/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED PPC_BIT(0)
+
+/*
+ * The nvdimm size should be aligned to SCM block size.
+ * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
+ * in order to have SCM regions not to overlap with dimm memory regions.
+ * The SCM devices can have variable block sizes. For now, fixing the
+ * block size to the minimum value.
+ */
+#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
+
+/* Have an explicit check for alignment */
+QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
+
 bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
                            uint64_t size, Error **errp)
 {
@@ -159,11 +175,11 @@ int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
 
 void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
 {
-    int offset = fdt_subnode_offset(fdt, 0, "persistent-memory");
+    int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory");
     GSList *iter, *nvdimms = nvdimm_get_device_list();
 
     if (offset < 0) {
-        offset = fdt_add_subnode(fdt, 0, "persistent-memory");
+        offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory");
         _FDT(offset);
         _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
         _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
@@ -467,6 +483,37 @@ static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 target_ulong opcode, target_ulong *args)
+{
+
+    NVDIMMDevice *nvdimm;
+    uint64_t hbitmap = 0;
+    uint32_t drc_index = args[0];
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED;
+
+
+    /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+
+    /* Update if the nvdimm is unarmed and send its status via health bitmaps */
+    if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) {
+        hbitmap |= PAPR_PMEM_UNARMED;
+    }
+
+    /* Update the out args with health bitmap/mask */
+    args[0] = hbitmap;
+    args[1] = hbitmap_mask;
+
+    return H_SUCCESS;
+}
+
 static void spapr_scm_register_types(void)
 {
     /* qemu/scm specific hcalls */
@@ -475,6 +522,7 @@ static void spapr_scm_register_types(void)
     spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
     spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
     spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
+    spapr_register_hypercall(H_SCM_HEALTH, h_scm_health);
 }
 
 type_init(spapr_scm_register_types)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index feba18cb126..5bfd4aa9e5a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -25,7 +25,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "hw/irq.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
@@ -35,7 +34,6 @@
 #include "hw/pci/pci_host.h"
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
-#include "exec/address-spaces.h"
 #include "exec/ram_addr.h"
 #include <libfdt.h>
 #include "trace.h"
@@ -784,33 +782,29 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
 static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
 {
-    char *path = NULL, *buf = NULL, *host = NULL;
+    g_autofree char *path = NULL;
+    g_autofree char *host = NULL;
+    g_autofree char *devspec = NULL;
+    char *buf = NULL;
 
     /* Get the PCI VFIO host id */
     host = object_property_get_str(OBJECT(pdev), "host", NULL);
     if (!host) {
-        goto err_out;
+        return NULL;
     }
 
     /* Construct the path of the file that will give us the DT location */
     path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
-    g_free(host);
-    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
-        goto err_out;
+    if (!g_file_get_contents(path, &devspec, NULL, NULL)) {
+        return NULL;
     }
-    g_free(path);
 
     /* Construct and read from host device tree the loc-code */
-    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
-    g_free(buf);
+    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec);
     if (!g_file_get_contents(path, &buf, NULL, NULL)) {
-        goto err_out;
+        return NULL;
     }
     return buf;
-
-err_out:
-    g_free(path);
-    return NULL;
 }
 
 static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
@@ -1323,8 +1317,7 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
                           RESOURCE_CELLS_SIZE));
 
     assert(bus);
-    pci_for_each_device_reverse(bus, pci_bus_num(bus),
-                                spapr_dt_pci_device_cb, &cbinfo);
+    pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo);
     if (cbinfo.err) {
         return cbinfo.err;
     }
@@ -2312,8 +2305,8 @@ static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
         return;
     }
 
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        spapr_phb_pci_enumerate_bridge, bus_no);
+    pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge,
+                                  bus_no);
     pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
 }
 
@@ -2322,9 +2315,8 @@ static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
     PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
     unsigned int bus_no = 0;
 
-    pci_for_each_device(bus, pci_bus_num(bus),
-                        spapr_phb_pci_enumerate_bridge,
-                        &bus_no);
+    pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge,
+                                  &bus_no);
 
 }
 
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
index 8ef9b40a18d..7fb0cf4d044 100644
--- a/hw/ppc/spapr_pci_nvlink2.c
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -164,8 +164,7 @@ static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev,
         return;
     }
 
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        spapr_phb_pci_collect_nvgpu, opaque);
+    pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque);
 }
 
 void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
@@ -183,8 +182,8 @@ void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
     sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr;
 
     bus = PCI_HOST_BRIDGE(sphb)->bus;
-    pci_for_each_device(bus, pci_bus_num(bus),
-                        spapr_phb_pci_collect_nvgpu, sphb->nvgpus);
+    pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu,
+                                  sphb->nvgpus);
 
     if (sphb->nvgpus->err) {
         error_propagate(errp, sphb->nvgpus->err);
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index e0547b17408..2a76b4e0b51 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include <linux/vfio.h>
-#include "cpu.h"
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
 #include "hw/pci/msix.h"
@@ -47,6 +46,16 @@ void spapr_phb_vfio_reset(DeviceState *qdev)
     spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
 }
 
+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
+                                      void *opaque)
+{
+    bool *found = opaque;
+
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        *found = true;
+    }
+}
+
 int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
                                   unsigned int addr, int option)
 {
@@ -59,17 +68,33 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
         break;
     case RTAS_EEH_ENABLE: {
         PCIHostState *phb;
-        PCIDevice *pdev;
+        bool found = false;
 
         /*
-         * The EEH functionality is enabled on basis of PCI device,
-         * instead of PE. We need check the validity of the PCI
-         * device address.
+         * The EEH functionality is enabled per sphb level instead of
+         * per PCI device. We have already identified this specific sphb
+         * based on buid passed as argument to ibm,set-eeh-option rtas
+         * call. Now we just need to check the validity of the PCI
+         * pass-through devices (vfio-pci) under this sphb bus.
+         * We have already validated that all the devices under this sphb
+         * are from same iommu group (within same PE) before comming here.
+         *
+         * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
+         * Rework device EEH PE determination") kernel would call
+         * eeh-set-option for each device in the PE using the device's
+         * config_address as the argument rather than the PE address.
+         * Hence if we check validity of supplied config_addr whether
+         * it matches to this PHB will cause issues with older kernel
+         * versions v5.9 and older. If we return an error from
+         * eeh-set-option when the argument isn't a valid PE address
+         * then older kernels (v5.9 and older) will interpret that as
+         * EEH not being supported.
          */
         phb = PCI_HOST_BRIDGE(sphb);
-        pdev = pci_find_device(phb->bus,
-                               (addr >> 16) & 0xFF, (addr >> 8) & 0xFF);
-        if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
+                            spapr_eeh_pci_find_device, &found);
+
+        if (!found) {
             return RTAS_OUT_PARAM_ERROR;
         }
 
@@ -139,8 +164,8 @@ static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
 
 static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
 {
-       pci_for_each_device(bus, pci_bus_num(bus),
-                           spapr_phb_vfio_eeh_clear_dev_msix, NULL);
+       pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
+                                     NULL);
 }
 
 static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
index d14800e9def..df5c4b96873 100644
--- a/hw/ppc/spapr_rng.c
+++ b/hw/ppc/spapr_rng.c
@@ -19,7 +19,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 8a79f9c6289..b476382ae6f 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -26,7 +26,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
@@ -41,7 +40,6 @@
 #include "hw/ppc/spapr_rtas.h"
 #include "hw/ppc/spapr_cpu_core.h"
 #include "hw/ppc/ppc.h"
-#include "hw/boards.h"
 
 #include <libfdt.h>
 #include "hw/ppc/spapr_drc.h"
@@ -51,6 +49,7 @@
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
 #include "migration/blocker.h"
+#include "helper_regs.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                    uint32_t token, uint32_t nargs,
@@ -133,8 +132,8 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
     target_ulong id, start, r3;
     PowerPCCPU *newcpu;
     CPUPPCState *env;
-    PowerPCCPUClass *pcc;
     target_ulong lpcr;
+    target_ulong caller_lpcr;
 
     if (nargs != 3 || nret != 1) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -153,7 +152,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
     }
 
     env = &newcpu->env;
-    pcc = POWERPC_CPU_GET_CLASS(newcpu);
 
     if (!CPU(newcpu)->halted) {
         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
@@ -163,12 +161,17 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
     cpu_synchronize_state(CPU(newcpu));
 
     env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
+    hreg_compute_hflags(env);
 
-    /* Enable Power-saving mode Exit Cause exceptions for the new CPU */
+    caller_lpcr = callcpu->env.spr[SPR_LPCR];
     lpcr = env->spr[SPR_LPCR];
-    if (!pcc->interrupts_big_endian(callcpu)) {
-        lpcr |= LPCR_ILE;
-    }
+
+    /* Set ILE the same way */
+    lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE);
+
+    /* Set AIL the same way */
+    lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL);
+
     if (env->mmu_model == POWERPC_MMU_3_00) {
         /*
          * New cpus are expected to start in the same radix/hash mode
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
index 3501b058199..3e826e1308c 100644
--- a/hw/ppc/spapr_rtas_ddw.c
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -18,7 +18,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
index 68cfc578a3a..fba4dfca358 100644
--- a/hw/ppc/spapr_rtc.c
+++ b/hw/ppc/spapr_rtc.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "cpu.h"
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "hw/ppc/spapr.h"
diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c
new file mode 100644
index 00000000000..4ee03c83e48
--- /dev/null
+++ b/hw/ppc/spapr_softmmu.c
@@ -0,0 +1,612 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "cpu.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
+
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
+{
+    /*
+     * hash value/pteg group index is normalized by HPT mask
+     */
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
+        return false;
+    }
+    return true;
+}
+
+static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong pteh = args[2];
+    target_ulong ptel = args[3];
+    unsigned apshift;
+    target_ulong raddr;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
+
+    apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
+    if (!apshift) {
+        /* Bad page size encoding */
+        return H_PARAMETER;
+    }
+
+    raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
+
+    if (is_ram_address(spapr, raddr)) {
+        /* Regular RAM - should have WIMG=0010 */
+        if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
+            return H_PARAMETER;
+        }
+    } else {
+        target_ulong wimg_flags;
+        /* Looks like an IO address */
+        /* FIXME: What WIMG combinations could be sensible for IO?
+         * For now we allow WIMG=010x, but are there others? */
+        /* FIXME: Should we check against registered IO addresses? */
+        wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
+
+        if (wimg_flags != HPTE64_R_I &&
+            wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
+            return H_PARAMETER;
+        }
+    }
+
+    pteh &= ~0x60ULL;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
+    if (likely((flags & H_EXACT) == 0)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
+                break;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
+            return H_PTEG_FULL;
+        }
+    } else {
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
+            return H_PTEG_FULL;
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+    }
+
+    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
+
+    args[0] = ptex + slot;
+    return H_SUCCESS;
+}
+
+typedef enum {
+    REMOVE_SUCCESS = 0,
+    REMOVE_NOT_FOUND = 1,
+    REMOVE_PARM = 2,
+    REMOVE_HW = 3,
+} RemoveResult;
+
+static RemoveResult remove_hpte(PowerPCCPU *cpu
+                                , target_ulong ptex,
+                                target_ulong avpn,
+                                target_ulong flags,
+                                target_ulong *vp, target_ulong *rp)
+{
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return REMOVE_PARM;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
+        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
+        return REMOVE_NOT_FOUND;
+    }
+    *vp = v;
+    *rp = r;
+    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    return REMOVE_SUCCESS;
+}
+
+static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    RemoveResult ret;
+
+    ret = remove_hpte(cpu, ptex, avpn, flags,
+                      &args[0], &args[1]);
+
+    switch (ret) {
+    case REMOVE_SUCCESS:
+        check_tlb_flush(env, true);
+        return H_SUCCESS;
+
+    case REMOVE_NOT_FOUND:
+        return H_NOT_FOUND;
+
+    case REMOVE_PARM:
+        return H_PARAMETER;
+
+    case REMOVE_HW:
+        return H_HARDWARE;
+    }
+
+    g_assert_not_reached();
+}
+
+#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
+#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
+#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
+#define   H_BULK_REMOVE_END            0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
+#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
+#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
+#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
+#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
+#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
+#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
+#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
+#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
+
+#define H_BULK_REMOVE_MAX_BATCH        4
+
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    int i;
+    target_ulong rc = H_SUCCESS;
+
+    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+        target_ulong *tsh = &args[i*2];
+        target_ulong tsl = args[i*2 + 1];
+        target_ulong v, r, ret;
+
+        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+            break;
+        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
+            return H_PARAMETER;
+        }
+
+        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+        *tsh |= H_BULK_REMOVE_RESPONSE;
+
+        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
+            *tsh |= H_BULK_REMOVE_PARM;
+            return H_PARAMETER;
+        }
+
+        ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
+                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
+                          &v, &r);
+
+        *tsh |= ret << 60;
+
+        switch (ret) {
+        case REMOVE_SUCCESS:
+            *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
+            break;
+
+        case REMOVE_PARM:
+            rc = H_PARAMETER;
+            goto exit;
+
+        case REMOVE_HW:
+            rc = H_HARDWARE;
+            goto exit;
+        }
+    }
+ exit:
+    check_tlb_flush(env, true);
+
+    return rc;
+}
+
+static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
+        return H_NOT_FOUND;
+    }
+
+    r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
+           HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
+    r |= (flags << 55) & HPTE64_R_PP0;
+    r |= (flags << 48) & HPTE64_R_KEY_HI;
+    r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
+    spapr_store_hpte(cpu, ptex,
+                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, true);
+    /* Don't need a memory barrier, due to qemu's global lock */
+    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
+    return H_SUCCESS;
+}
+
+static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    int i, ridx, n_entries = 1;
+    const ppc_hash_pte64_t *hptes;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    if (flags & H_READ_4) {
+        /* Clear the two low order bits */
+        ptex &= ~(3ULL);
+        n_entries = 4;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
+    for (i = 0, ridx = 0; i < n_entries; i++) {
+        args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
+        args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
+    }
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
+
+    return H_SUCCESS;
+}
+
+struct SpaprPendingHpt {
+    /* These fields are read-only after initialization */
+    int shift;
+    QemuThread thread;
+
+    /* These fields are protected by the BQL */
+    bool complete;
+
+    /* These fields are private to the preparation thread if
+     * !complete, otherwise protected by the BQL */
+    int ret;
+    void *hpt;
+};
+
+static void free_pending_hpt(SpaprPendingHpt *pending)
+{
+    if (pending->hpt) {
+        qemu_vfree(pending->hpt);
+    }
+
+    g_free(pending);
+}
+
+static void *hpt_prepare_thread(void *opaque)
+{
+    SpaprPendingHpt *pending = opaque;
+    size_t size = 1ULL << pending->shift;
+
+    pending->hpt = qemu_try_memalign(size, size);
+    if (pending->hpt) {
+        memset(pending->hpt, 0, size);
+        pending->ret = H_SUCCESS;
+    } else {
+        pending->ret = H_NO_MEM;
+    }
+
+    qemu_mutex_lock_iothread();
+
+    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
+        /* Ready to go */
+        pending->complete = true;
+    } else {
+        /* We've been cancelled, clean ourselves up */
+        free_pending_hpt(pending);
+    }
+
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+
+/* Must be called with BQL held */
+static void cancel_hpt_prepare(SpaprMachineState *spapr)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    /* Let the thread know it's cancelled */
+    spapr->pending_hpt = NULL;
+
+    if (!pending) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!pending->complete) {
+        /* thread will clean itself up */
+        return;
+    }
+
+    free_pending_hpt(pending);
+}
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    if (pending) {
+        /* something already in progress */
+        if (pending->shift == shift) {
+            /* and it's suitable */
+            if (pending->complete) {
+                return pending->ret;
+            } else {
+                return H_LONG_BUSY_ORDER_100_MSEC;
+            }
+        }
+
+        /* not suitable, cancel and replace */
+        cancel_hpt_prepare(spapr);
+    }
+
+    if (!shift) {
+        /* nothing to do */
+        return H_SUCCESS;
+    }
+
+    /* start new prepare */
+
+    pending = g_new0(SpaprPendingHpt, 1);
+    pending->shift = shift;
+    pending->ret = H_HARDWARE;
+
+    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+
+    spapr->pending_hpt = pending;
+
+    /* In theory we could estimate the time more accurately based on
+     * the new size, but there's not much point */
+    return H_LONG_BUSY_ORDER_100_MSEC;
+}
+
+static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+    return  ldq_p(addr);
+}
+
+static void new_hpte_store(void *htab, uint64_t pteg, int slot,
+                           uint64_t pte0, uint64_t pte1)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+
+    stq_p(addr, pte0);
+    stq_p(addr + HPTE64_DW1, pte1);
+}
+
+static int rehash_hpte(PowerPCCPU *cpu,
+                       const ppc_hash_pte64_t *hptes,
+                       void *old_hpt, uint64_t oldsize,
+                       void *new_hpt, uint64_t newsize,
+                       uint64_t pteg, int slot)
+{
+    uint64_t old_hash_mask = (oldsize >> 7) - 1;
+    uint64_t new_hash_mask = (newsize >> 7) - 1;
+    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
+    target_ulong pte1;
+    uint64_t avpn;
+    unsigned base_pg_shift;
+    uint64_t hash, new_pteg, replace_pte0;
+
+    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
+        return H_SUCCESS;
+    }
+
+    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
+
+    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
+    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
+    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
+
+    if (pte0 & HPTE64_V_SECONDARY) {
+        pteg = ~pteg;
+    }
+
+    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
+        uint64_t offset, vsid;
+
+        /* We only have 28 - 23 bits of offset in avpn */
+        offset = (avpn & 0x1f) << 23;
+        vsid = avpn >> 5;
+        /* We can find more bits from the pteg value */
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
+        }
+
+        hash = vsid ^ (offset >> base_pg_shift);
+    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
+        uint64_t offset, vsid;
+
+        /* We only have 40 - 23 bits of seg_off in avpn */
+        offset = (avpn & 0x1ffff) << 23;
+        vsid = avpn >> 17;
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
+                << base_pg_shift;
+        }
+
+        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
+    } else {
+        error_report("rehash_pte: Bad segment size in HPTE");
+        return H_HARDWARE;
+    }
+
+    new_pteg = hash & new_hash_mask;
+    if (pte0 & HPTE64_V_SECONDARY) {
+        assert(~pteg == (hash & old_hash_mask));
+        new_pteg = ~new_pteg;
+    } else {
+        assert(pteg == (hash & old_hash_mask));
+    }
+    assert((oldsize != newsize) || (pteg == new_pteg));
+    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
+    /*
+     * Strictly speaking, we don't need all these tests, since we only
+     * ever rehash bolted HPTEs.  We might in future handle non-bolted
+     * HPTEs, though so make the logic correct for those cases as
+     * well.
+     */
+    if (replace_pte0 & HPTE64_V_VALID) {
+        assert(newsize < oldsize);
+        if (replace_pte0 & HPTE64_V_BOLTED) {
+            if (pte0 & HPTE64_V_BOLTED) {
+                /* Bolted collision, nothing we can do */
+                return H_PTEG_FULL;
+            } else {
+                /* Discard this hpte */
+                return H_SUCCESS;
+            }
+        }
+    }
+
+    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
+    return H_SUCCESS;
+}
+
+static int rehash_hpt(PowerPCCPU *cpu,
+                      void *old_hpt, uint64_t oldsize,
+                      void *new_hpt, uint64_t newsize)
+{
+    uint64_t n_ptegs = oldsize >> 7;
+    uint64_t pteg;
+    int slot;
+    int rc;
+
+    for (pteg = 0; pteg < n_ptegs; pteg++) {
+        hwaddr ptex = pteg * HPTES_PER_GROUP;
+        const ppc_hash_pte64_t *hptes
+            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+
+        if (!hptes) {
+            return H_HARDWARE;
+        }
+
+        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
+            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
+                             pteg, slot);
+            if (rc != H_SUCCESS) {
+                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+                return rc;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+    }
+
+    return H_SUCCESS;
+}
+
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong flags,
+                                        target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+    int rc;
+    size_t newsize;
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (!pending || (pending->shift != shift)) {
+        /* no matching prepare */
+        return H_CLOSED;
+    }
+
+    if (!pending->complete) {
+        /* prepare has not completed */
+        return H_BUSY;
+    }
+
+    /* Shouldn't have got past PREPARE without an HPT */
+    g_assert(spapr->htab_shift);
+
+    newsize = 1ULL << pending->shift;
+    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
+                    pending->hpt, newsize);
+    if (rc == H_SUCCESS) {
+        qemu_vfree(spapr->htab);
+        spapr->htab = pending->hpt;
+        spapr->htab_shift = pending->shift;
+
+        push_sregs_to_kvm_pr(spapr);
+
+        pending->hpt = NULL; /* so it's not free()d */
+    }
+
+    /* Clean up */
+    spapr->pending_hpt = NULL;
+    free_pending_hpt(pending);
+
+    return rc;
+}
+
+static void hypercall_register_types(void)
+{
+    /* hcall-pft */
+    spapr_register_hypercall(H_ENTER, h_enter);
+    spapr_register_hypercall(H_REMOVE, h_remove);
+    spapr_register_hypercall(H_PROTECT, h_protect);
+    spapr_register_hypercall(H_READ, h_read);
+
+    /* hcall-bulk */
+    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c
index a01f81f9e04..2454086744b 100644
--- a/hw/ppc/spapr_tpm_proxy.c
+++ b/hw/ppc/spapr_tpm_proxy.c
@@ -15,7 +15,6 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/reset.h"
-#include "cpu.h"
 #include "hw/ppc/spapr.h"
 #include "hw/qdev-properties.h"
 #include "trace.h"
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index ef06e0362c8..b975ed29cad 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -310,7 +310,7 @@ int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq)
 static void spapr_vio_quiesce_one(SpaprVioDevice *dev)
 {
     if (dev->tcet) {
-        device_legacy_reset(DEVICE(dev->tcet));
+        device_cold_reset(DEVICE(dev->tcet));
     }
     free_crq(dev);
 }
@@ -577,7 +577,7 @@ SpaprVioBus *spapr_vio_bus_init(void)
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     /* Create bus on bridge device */
-    qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
+    qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
     bus = SPAPR_VIO_BUS(qbus);
     bus->next_reg = SPAPR_VIO_REG_BASE;
 
diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
new file mode 100644
index 00000000000..40ce8fe0037
--- /dev/null
+++ b/hw/ppc/spapr_vof.c
@@ -0,0 +1,167 @@
+/*
+ * SPAPR machine hooks to Virtual Open Firmware,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/vof.h"
+#include "sysemu/sysemu.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *_args)
+{
+    int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob,
+                              ppc64_phys_to_real(_args[0]));
+
+    if (ret) {
+        return H_PARAMETER;
+    }
+    return H_SUCCESS;
+}
+
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
+{
+    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+
+    vof_build_dt(fdt, spapr->vof);
+
+    if (spapr->vof->bootargs) {
+        int chosen;
+
+        _FDT(chosen = fdt_path_offset(fdt, "/chosen"));
+        /*
+         * If the client did not change "bootargs", spapr_dt_chosen() must have
+         * stored machine->kernel_cmdline in it before getting here.
+         */
+        _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs));
+    }
+
+    /*
+     * SLOF-less setup requires an open instance of stdout for early
+     * kernel printk. By now all phandles are settled so we can open
+     * the default serial console.
+     */
+    if (stdout_path) {
+        _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout",
+                                   stdout_path));
+    }
+}
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+    target_ulong stack_ptr;
+    Vof *vof = spapr->vof;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    vof_init(vof, spapr->rma_size, errp);
+
+    stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE);
+    if (stack_ptr == -1) {
+        error_setg(errp, "Memory allocation for stack failed");
+        return;
+    }
+    /* Stack grows downwards plus reserve space for the minimum stack frame */
+    stack_ptr += VOF_STACK_SIZE - 0x20;
+
+    if (spapr->kernel_size &&
+        vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) {
+        error_setg(errp, "Memory for kernel is in use");
+        return;
+    }
+
+    if (spapr->initrd_size &&
+        vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) {
+        error_setg(errp, "Memory for initramdisk is in use");
+        return;
+    }
+
+    spapr_vof_client_dt_finalize(spapr, fdt);
+
+    spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+                              stack_ptr, spapr->initrd_base,
+                              spapr->initrd_size);
+    /* VOF is 32bit BE so enforce MSR here */
+    first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE));
+
+    /*
+     * At this point the expected allocation map is:
+     *
+     * 0..c38 - the initial firmware
+     * 8000..10000 - stack
+     * 400000.. - kernel
+     * 3ea0000.. - initramdisk
+     *
+     * We skip writing FDT as nothing expects it; OF client interface is
+     * going to be used for reading the device tree.
+     */
+}
+
+void spapr_vof_quiesce(MachineState *ms)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+    spapr->fdt_initial_size = spapr->fdt_size;
+}
+
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+                       void *val, int vallen)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    /*
+     * We only allow changing properties which we know how to update in QEMU
+     * OR
+     * the ones which we know that they need to survive during "quiesce".
+     */
+
+    if (strcmp(path, "/rtas") == 0) {
+        if (strcmp(propname, "linux,rtas-base") == 0 ||
+            strcmp(propname, "linux,rtas-entry") == 0) {
+            /* These need to survive quiesce so let them store in the FDT */
+            return true;
+        }
+    }
+
+    if (strcmp(path, "/chosen") == 0) {
+        if (strcmp(propname, "bootargs") == 0) {
+            Vof *vof = spapr->vof;
+
+            g_free(vof->bootargs);
+            vof->bootargs = g_strndup(val, vallen);
+            return true;
+        }
+        if (strcmp(propname, "linux,initrd-start") == 0) {
+            if (vallen == sizeof(uint32_t)) {
+                spapr->initrd_base = ldl_be_p(val);
+                return true;
+            }
+            if (vallen == sizeof(uint64_t)) {
+                spapr->initrd_base = ldq_be_p(val);
+                return true;
+            }
+            return false;
+        }
+        if (strcmp(propname, "linux,initrd-end") == 0) {
+            if (vallen == sizeof(uint32_t)) {
+                spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base;
+                return true;
+            }
+            if (vallen == sizeof(uint64_t)) {
+                spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base;
+                return true;
+            }
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index b4bbfbb0134..3bf43fa340f 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # spapr_pci.c
 spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=0x%x)"
@@ -71,9 +71,53 @@ spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx3
 spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
 spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
 
+# vof.c
+vof_error_str_truncated(const char *s, int len) "%s truncated to %d"
+vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d"
+vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d"
+vof_error_unknown_method(const char *method) "\"%s\""
+vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x"
+vof_error_unknown_path(const char *path) "\"%s\""
+vof_error_write(uint32_t ih) "ih=0x%x"
+vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x"
+vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x"
+vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x"
+vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x"
+vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]"
+vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d"
+vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d"
+vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x"
+vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x"
+vof_package_to_path(uint32_t ph, const char *tmp, int ret) "ph=0x%x => %s len=%d"
+vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, int ret) "ih=0x%x ph=0x%x => %s len=%d"
+vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x"
+vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\""
+vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+
 # ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
+ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64
+ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64
+
+ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64
+ppc_decr_excp(const char *action) "%s decrementer"
+ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64
+
+ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc4xx_pit_stop(void) ""
+ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64
+ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64
+ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64
+ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32
+ppc40x_timers_init(uint32_t value) "frequency %" PRIu32
 
+ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d"
+ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32
+ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d"
+ppc_irq_reset(const char *name) "%s"
+ppc_irq_cpu(const char *action) "%s"
 
 # prep_systemio.c
 prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index cb421570dab..9c575403b85 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -38,9 +38,7 @@
 #include "elf.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
-#include "qemu/log.h"
 #include "qemu/option.h"
-#include "exec/address-spaces.h"
 
 #include "hw/intc/ppc-uic.h"
 #include "hw/ppc/ppc.h"
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
new file mode 100644
index 00000000000..73adc44ec21
--- /dev/null
+++ b/hw/ppc/vof.c
@@ -0,0 +1,1062 @@
+/*
+ * QEMU PowerPC Virtual Open Firmware.
+ *
+ * This implements client interface from OpenFirmware IEEE1275 on the QEMU
+ * side to leave only a very basic firmware in the VM.
+ *
+ * Copyright (c) 2021 IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/ppc/vof.h"
+#include "hw/ppc/fdt.h"
+#include "sysemu/runstate.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+/*
+ * OF 1275 "nextprop" description suggests is it 32 bytes max but
+ * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long.
+ */
+#define OF_PROPNAME_LEN_MAX 64
+
+#define VOF_MAX_PATH        256
+#define VOF_MAX_SETPROPLEN  2048
+#define VOF_MAX_METHODLEN   256
+#define VOF_MAX_FORTHCODE   256
+#define VOF_VTY_BUF_SIZE    256
+
+typedef struct {
+    uint64_t start;
+    uint64_t size;
+} OfClaimed;
+
+typedef struct {
+    char *path; /* the path used to open the instance */
+    uint32_t phandle;
+} OfInstance;
+
+static int readstr(hwaddr pa, char *buf, int size)
+{
+    if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) {
+        return -1;
+    }
+    if (strnlen(buf, size) == size) {
+        buf[size - 1] = '\0';
+        trace_vof_error_str_truncated(buf, size);
+        return -1;
+    }
+    return 0;
+}
+
+static bool cmpservice(const char *s, unsigned nargs, unsigned nret,
+                       const char *s1, unsigned nargscheck, unsigned nretcheck)
+{
+    if (strcmp(s, s1)) {
+        return false;
+    }
+    if ((nargscheck && (nargs != nargscheck)) ||
+        (nretcheck && (nret != nretcheck))) {
+        trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret);
+        return false;
+    }
+
+    return true;
+}
+
+static void prop_format(char *tval, int tlen, const void *prop, int len)
+{
+    int i;
+    const unsigned char *c;
+    char *t;
+    const char bin[] = "...";
+
+    for (i = 0, c = prop; i < len; ++i, ++c) {
+        if (*c == '\0' && i == len - 1) {
+            strncpy(tval, prop, tlen - 1);
+            return;
+        }
+        if (*c < 0x20 || *c >= 0x80) {
+            break;
+        }
+    }
+
+    for (i = 0, c = prop, t = tval; i < len; ++i, ++c) {
+        if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) {
+            strcpy(t, bin);
+            return;
+        }
+        if (i && i % 4 == 0 && i != len - 1) {
+            strcat(t, " ");
+            ++t;
+        }
+        t += sprintf(t, "%02X", *c & 0xFF);
+    }
+}
+
+static int get_path(const void *fdt, int offset, char *buf, int len)
+{
+    int ret;
+
+    ret = fdt_get_path(fdt, offset, buf, len - 1);
+    if (ret < 0) {
+        return ret;
+    }
+
+    buf[len - 1] = '\0';
+
+    return strlen(buf) + 1;
+}
+
+static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len)
+{
+    int ret;
+
+    ret = fdt_node_offset_by_phandle(fdt, ph);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return get_path(fdt, ret, buf, len);
+}
+
+static int path_offset(const void *fdt, const char *path)
+{
+    g_autofree char *p = NULL;
+    char *at;
+
+    /*
+     * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
+     *
+     * "Conversion from numeric representation to text representation shall use
+     * the lower case forms of the hexadecimal digits in the range a..f,
+     * suppressing leading zeros".
+     */
+    p = g_strdup(path);
+    for (at = strchr(p, '@'); at && *at; ) {
+            if (*at == '/') {
+                at = strchr(at, '@');
+            } else {
+                *at = tolower(*at);
+                ++at;
+            }
+    }
+
+    return fdt_path_offset(fdt, p);
+}
+
+static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr)
+{
+    char fullnode[VOF_MAX_PATH];
+    uint32_t ret = PROM_ERROR;
+    int offset;
+
+    if (readstr(nodeaddr, fullnode, sizeof(fullnode))) {
+        return (uint32_t) ret;
+    }
+
+    offset = path_offset(fdt, fullnode);
+    if (offset >= 0) {
+        ret = fdt_get_phandle(fdt, offset);
+    }
+    trace_vof_finddevice(fullnode, ret);
+    return ret;
+}
+
+static const void *getprop(const void *fdt, int nodeoff, const char *propname,
+                           int *proplen, bool *write0)
+{
+    const char *unit, *prop;
+    const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen);
+
+    if (ret) {
+        if (write0) {
+            *write0 = false;
+        }
+        return ret;
+    }
+
+    if (strcmp(propname, "name")) {
+        return NULL;
+    }
+    /*
+     * We return a value for "name" from path if queried but property does not
+     * exist. @proplen does not include the unit part in this case.
+     */
+    prop = fdt_get_name(fdt, nodeoff, proplen);
+    if (!prop) {
+        *proplen = 0;
+        return NULL;
+    }
+
+    unit = memchr(prop, '@', *proplen);
+    if (unit) {
+        *proplen = unit - prop;
+    }
+    *proplen += 1;
+
+    /*
+     * Since it might be cut at "@" and there will be no trailing zero
+     * in the prop buffer, tell the caller to write zero at the end.
+     */
+    if (write0) {
+        *write0 = true;
+    }
+    return prop;
+}
+
+static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname,
+                            uint32_t valaddr, uint32_t vallen)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = 0;
+    int proplen = 0;
+    const void *prop;
+    char trval[64] = "";
+    int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+    bool write0;
+
+    if (nodeoff < 0) {
+        return PROM_ERROR;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        return PROM_ERROR;
+    }
+    prop = getprop(fdt, nodeoff, propname, &proplen, &write0);
+    if (prop) {
+        const char zero = 0;
+        int cb = MIN(proplen, vallen);
+
+        if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK ||
+            /* if that was "name" with a unit address, overwrite '@' with '0' */
+            (write0 &&
+             cb == proplen &&
+             VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) {
+            ret = PROM_ERROR;
+        } else {
+            /*
+             * OF1275 says:
+             * "Size is either the actual size of the property, or -1 if name
+             * does not exist", hence returning proplen instead of cb.
+             */
+            ret = proplen;
+            /* Do not format a value if tracepoint is silent, for performance */
+            if (trace_event_get_state(TRACE_VOF_GETPROP) &&
+                qemu_loglevel_mask(LOG_TRACE)) {
+                prop_format(trval, sizeof(trval), prop, ret);
+            }
+        }
+    } else {
+        ret = PROM_ERROR;
+    }
+    trace_vof_getprop(nodeph, propname, ret, trval);
+
+    return ret;
+}
+
+static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = 0;
+    int proplen = 0;
+    const void *prop;
+    int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+
+    if (nodeoff < 0) {
+        return PROM_ERROR;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        return PROM_ERROR;
+    }
+    prop = getprop(fdt, nodeoff, propname, &proplen, NULL);
+    if (prop) {
+        ret = proplen;
+    } else {
+        ret = PROM_ERROR;
+    }
+    trace_vof_getproplen(nodeph, propname, ret);
+
+    return ret;
+}
+
+static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
+                            uint32_t nodeph, uint32_t pname,
+                            uint32_t valaddr, uint32_t vallen)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = PROM_ERROR;
+    int offset, rc;
+    char trval[64] = "";
+    char nodepath[VOF_MAX_PATH] = "";
+    Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+    VofMachineIfClass *vmc;
+    g_autofree char *val = NULL;
+
+    if (vallen > VOF_MAX_SETPROPLEN) {
+        goto trace_exit;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        goto trace_exit;
+    }
+    offset = fdt_node_offset_by_phandle(fdt, nodeph);
+    if (offset < 0) {
+        goto trace_exit;
+    }
+    rc = get_path(fdt, offset, nodepath, sizeof(nodepath));
+    if (rc <= 0) {
+        goto trace_exit;
+    }
+
+    val = g_malloc0(vallen);
+    if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
+        goto trace_exit;
+    }
+
+    if (!vmo) {
+        goto trace_exit;
+    }
+
+    vmc = VOF_MACHINE_GET_CLASS(vmo);
+    if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) {
+        goto trace_exit;
+    }
+
+    rc = fdt_setprop(fdt, offset, propname, val, vallen);
+    if (rc) {
+        goto trace_exit;
+    }
+
+    if (trace_event_get_state(TRACE_VOF_SETPROP) &&
+        qemu_loglevel_mask(LOG_TRACE)) {
+        prop_format(trval, sizeof(trval), val, vallen);
+    }
+    ret = vallen;
+
+trace_exit:
+    trace_vof_setprop(nodeph, propname, trval, vallen, ret);
+
+    return ret;
+}
+
+static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
+                             uint32_t prevaddr, uint32_t nameaddr)
+{
+    int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
+    char prev[OF_PROPNAME_LEN_MAX + 1];
+    const char *tmp;
+
+    if (readstr(prevaddr, prev, sizeof(prev))) {
+        return PROM_ERROR;
+    }
+
+    fdt_for_each_property_offset(offset, fdt, nodeoff) {
+        if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+            return 0;
+        }
+        if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
+            if (prev[0] != '\0') {
+                offset = fdt_next_property_offset(fdt, offset);
+                if (offset < 0) {
+                    return 0;
+                }
+            }
+            if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+                return 0;
+            }
+
+            if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
+                return PROM_ERROR;
+            }
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static uint32_t vof_peer(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc;
+
+    if (phandle == 0) {
+        rc = fdt_path_offset(fdt, "/");
+    } else {
+        rc = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+    }
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_child(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_parent(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path)
+{
+    uint32_t ret = PROM_ERROR;
+    OfInstance *inst = NULL;
+
+    if (vof->of_instance_last == 0xFFFFFFFF) {
+        /* We do not recycle ihandles yet */
+        goto trace_exit;
+    }
+
+    inst = g_new0(OfInstance, 1);
+    inst->phandle = fdt_get_phandle(fdt, offset);
+    g_assert(inst->phandle);
+    ++vof->of_instance_last;
+
+    inst->path = g_strdup(path);
+    g_hash_table_insert(vof->of_instances,
+                        GINT_TO_POINTER(vof->of_instance_last),
+                        inst);
+    ret = vof->of_instance_last;
+
+trace_exit:
+    trace_vof_open(path, inst ? inst->phandle : 0, ret);
+
+    return ret;
+}
+
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+                               const char *prop, const char *path)
+{
+    int offset, node = fdt_path_offset(fdt, nodename);
+    uint32_t inst;
+
+    offset = fdt_path_offset(fdt, path);
+    if (offset < 0) {
+        trace_vof_error_unknown_path(path);
+        return PROM_ERROR;
+    }
+
+    inst = vof_do_open(fdt, vof, offset, path);
+
+    return fdt_setprop_cell(fdt, node, prop, inst) >= 0 ? 0 : PROM_ERROR;
+}
+
+static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr)
+{
+    char path[VOF_MAX_PATH];
+    int offset;
+
+    if (readstr(pathaddr, path, sizeof(path))) {
+        return PROM_ERROR;
+    }
+
+    offset = path_offset(fdt, path);
+    if (offset < 0) {
+        trace_vof_error_unknown_path(path);
+        return PROM_ERROR;
+    }
+
+    return vof_do_open(fdt, vof, offset, path);
+}
+
+static void vof_close(Vof *vof, uint32_t ihandle)
+{
+    if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) {
+        trace_vof_error_unknown_ihandle_close(ihandle);
+    }
+}
+
+static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle)
+{
+    gpointer instp = g_hash_table_lookup(vof->of_instances,
+                                         GINT_TO_POINTER(ihandle));
+    uint32_t ret = PROM_ERROR;
+
+    if (instp) {
+        ret = ((OfInstance *)instp)->phandle;
+    }
+    trace_vof_instance_to_package(ihandle, ret);
+
+    return ret;
+}
+
+static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle,
+                                    uint32_t buf, uint32_t len)
+{
+    int rc;
+    char tmp[VOF_MAX_PATH] = "";
+
+    rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+    if (rc > 0) {
+        if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+            rc = -1;
+        }
+    }
+
+    trace_vof_package_to_path(phandle, tmp, rc);
+
+    return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle,
+                                     uint32_t buf, uint32_t len)
+{
+    int rc = -1;
+    uint32_t phandle = vof_instance_to_package(vof, ihandle);
+    char tmp[VOF_MAX_PATH] = "";
+
+    if (phandle != -1) {
+        rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+        if (rc > 0) {
+            if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+                rc = -1;
+            }
+        }
+    }
+    trace_vof_instance_to_path(ihandle, phandle, tmp, rc);
+
+    return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
+                          uint32_t len)
+{
+    char tmp[VOF_VTY_BUF_SIZE];
+    unsigned cb;
+    OfInstance *inst = (OfInstance *)
+        g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
+
+    if (!inst) {
+        trace_vof_error_write(ihandle);
+        return PROM_ERROR;
+    }
+
+    for ( ; len > 0; len -= cb) {
+        cb = MIN(len, sizeof(tmp) - 1);
+        if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
+            return PROM_ERROR;
+        }
+
+        /* FIXME: there is no backend(s) yet so just call a trace */
+        if (trace_event_get_state(TRACE_VOF_WRITE) &&
+            qemu_loglevel_mask(LOG_TRACE)) {
+            tmp[cb] = '\0';
+            trace_vof_write(ihandle, cb, tmp);
+        }
+    }
+
+    return len;
+}
+
+static void vof_claimed_dump(GArray *claimed)
+{
+    int i;
+    OfClaimed c;
+
+    if (trace_event_get_state(TRACE_VOF_CLAIMED) &&
+        qemu_loglevel_mask(LOG_TRACE)) {
+
+        for (i = 0; i < claimed->len; ++i) {
+            c = g_array_index(claimed, OfClaimed, i);
+            trace_vof_claimed(c.start, c.start + c.size, c.size);
+        }
+    }
+}
+
+static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size)
+{
+    int i;
+    OfClaimed c;
+
+    for (i = 0; i < claimed->len; ++i) {
+        c = g_array_index(claimed, OfClaimed, i);
+        if (ranges_overlap(c.start, c.size, virt, size)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size)
+{
+    OfClaimed newclaim;
+
+    newclaim.start = virt;
+    newclaim.size = size;
+    g_array_append_val(claimed, newclaim);
+}
+
+static gint of_claimed_compare_func(gconstpointer a, gconstpointer b)
+{
+    return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start;
+}
+
+static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base)
+{
+    int i, n, offset, proplen = 0, sc, ac;
+    target_ulong mem0_end;
+    const uint8_t *mem0_reg;
+    g_autofree uint8_t *avail = NULL;
+    uint8_t *availcur;
+
+    if (!fdt || !claimed) {
+        return;
+    }
+
+    offset = fdt_path_offset(fdt, "/");
+    _FDT(offset);
+    ac = fdt_address_cells(fdt, offset);
+    g_assert(ac == 1 || ac == 2);
+    sc = fdt_size_cells(fdt, offset);
+    g_assert(sc == 1 || sc == 2);
+
+    offset = fdt_path_offset(fdt, "/memory@0");
+    _FDT(offset);
+
+    mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
+    g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
+    if (sc == 2) {
+        mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+    } else {
+        mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
+    }
+
+    g_array_sort(claimed, of_claimed_compare_func);
+    vof_claimed_dump(claimed);
+
+    /*
+     * VOF resides in the first page so we do not need to check if there is
+     * available memory before the first claimed block
+     */
+    g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0));
+
+    avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len);
+    for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) {
+        OfClaimed c = g_array_index(claimed, OfClaimed, i);
+        uint64_t start, size;
+
+        start = c.start + c.size;
+        if (i < claimed->len - 1) {
+            OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1);
+
+            size = cn.start - start;
+        } else {
+            size = mem0_end - start;
+        }
+
+        if (ac == 2) {
+            *(uint64_t *) availcur = cpu_to_be64(start);
+        } else {
+            *(uint32_t *) availcur = cpu_to_be32(start);
+        }
+        availcur += sizeof(uint32_t) * ac;
+        if (sc == 2) {
+            *(uint64_t *) availcur = cpu_to_be64(size);
+        } else {
+            *(uint32_t *) availcur = cpu_to_be32(size);
+        }
+        availcur += sizeof(uint32_t) * sc;
+
+        if (size) {
+            trace_vof_avail(c.start + c.size, c.start + c.size + size, size);
+            ++n;
+        }
+    }
+    _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail)));
+}
+
+/*
+ * OF1275:
+ * "Allocates size bytes of memory. If align is zero, the allocated range
+ * begins at the virtual address virt. Otherwise, an aligned address is
+ * automatically chosen and the input argument virt is ignored".
+ *
+ * In other words, exactly one of @virt and @align is non-zero.
+ */
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size,
+                   uint64_t align)
+{
+    uint64_t ret;
+
+    if (size == 0) {
+        ret = -1;
+    } else if (align == 0) {
+        if (!vof_claim_avail(vof->claimed, virt, size)) {
+            ret = -1;
+        } else {
+            ret = virt;
+        }
+    } else {
+        vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align);
+        while (1) {
+            if (vof->claimed_base >= vof->top_addr) {
+                error_report("Out of RMA memory for the OF client");
+                return -1;
+            }
+            if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) {
+                break;
+            }
+            vof->claimed_base += size;
+        }
+        ret = vof->claimed_base;
+    }
+
+    if (ret != -1) {
+        vof->claimed_base = MAX(vof->claimed_base, ret + size);
+        vof_claim_add(vof->claimed, ret, size);
+    }
+    trace_vof_claim(virt, size, align, ret);
+
+    return ret;
+}
+
+static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size)
+{
+    uint32_t ret = PROM_ERROR;
+    int i;
+    GArray *claimed = vof->claimed;
+    OfClaimed c;
+
+    for (i = 0; i < claimed->len; ++i) {
+        c = g_array_index(claimed, OfClaimed, i);
+        if (c.start == virt && c.size == size) {
+            g_array_remove_index(claimed, i);
+            ret = 0;
+            break;
+        }
+    }
+
+    trace_vof_release(virt, size, ret);
+
+    return ret;
+}
+
+static void vof_instantiate_rtas(Error **errp)
+{
+    error_setg(errp, "The firmware should have instantiated RTAS");
+}
+
+static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr,
+                                uint32_t ihandle, uint32_t param1,
+                                uint32_t param2, uint32_t param3,
+                                uint32_t param4, uint32_t *ret2)
+{
+    uint32_t ret = PROM_ERROR;
+    char method[VOF_MAX_METHODLEN] = "";
+    OfInstance *inst;
+
+    if (!ihandle) {
+        goto trace_exit;
+    }
+
+    inst = (OfInstance *)g_hash_table_lookup(vof->of_instances,
+                                             GINT_TO_POINTER(ihandle));
+    if (!inst) {
+        goto trace_exit;
+    }
+
+    if (readstr(methodaddr, method, sizeof(method))) {
+        goto trace_exit;
+    }
+
+    if (strcmp(inst->path, "/") == 0) {
+        if (strcmp(method, "ibm,client-architecture-support") == 0) {
+            Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+
+            if (vmo) {
+                VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+                g_assert(vmc->client_architecture_support);
+                ret = (uint32_t)vmc->client_architecture_support(ms, first_cpu,
+                                                                 param1);
+            }
+
+            *ret2 = 0;
+        }
+    } else if (strcmp(inst->path, "/rtas") == 0) {
+        if (strcmp(method, "instantiate-rtas") == 0) {
+            vof_instantiate_rtas(&error_fatal);
+            ret = 0;
+            *ret2 = param1; /* rtas-base */
+        }
+    } else {
+        trace_vof_error_unknown_method(method);
+    }
+
+trace_exit:
+    trace_vof_method(ihandle, method, param1, ret, *ret2);
+
+    return ret;
+}
+
+static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1,
+                                   uint32_t param2, uint32_t *ret2)
+{
+    uint32_t ret = PROM_ERROR;
+    char cmd[VOF_MAX_FORTHCODE] = "";
+
+    /* No interpret implemented so just call a trace */
+    readstr(cmdaddr, cmd, sizeof(cmd));
+    trace_vof_interpret(cmd, param1, param2, ret, *ret2);
+
+    return ret;
+}
+
+static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof)
+{
+    Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+    /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */
+    int rc = fdt_pack(fdt);
+
+    assert(rc == 0);
+
+    if (vmo) {
+        VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+        if (vmc->quiesce) {
+            vmc->quiesce(ms);
+        }
+    }
+
+    vof_claimed_dump(vof->claimed);
+}
+
+static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof,
+                                  const char *service,
+                                  uint32_t *args, unsigned nargs,
+                                  uint32_t *rets, unsigned nrets)
+{
+    uint32_t ret = 0;
+
+    /* @nrets includes the value which this function returns */
+#define cmpserv(s, a, r) \
+    cmpservice(service, nargs, nrets, (s), (a), (r))
+
+    if (cmpserv("finddevice", 1, 1)) {
+        ret = vof_finddevice(fdt, args[0]);
+    } else if (cmpserv("getprop", 4, 1)) {
+        ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]);
+    } else if (cmpserv("getproplen", 2, 1)) {
+        ret = vof_getproplen(fdt, args[0], args[1]);
+    } else if (cmpserv("setprop", 4, 1)) {
+        ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]);
+    } else if (cmpserv("nextprop", 3, 1)) {
+        ret = vof_nextprop(fdt, args[0], args[1], args[2]);
+    } else if (cmpserv("peer", 1, 1)) {
+        ret = vof_peer(fdt, args[0]);
+    } else if (cmpserv("child", 1, 1)) {
+        ret = vof_child(fdt, args[0]);
+    } else if (cmpserv("parent", 1, 1)) {
+        ret = vof_parent(fdt, args[0]);
+    } else if (cmpserv("open", 1, 1)) {
+        ret = vof_open(fdt, vof, args[0]);
+    } else if (cmpserv("close", 1, 0)) {
+        vof_close(vof, args[0]);
+    } else if (cmpserv("instance-to-package", 1, 1)) {
+        ret = vof_instance_to_package(vof, args[0]);
+    } else if (cmpserv("package-to-path", 3, 1)) {
+        ret = vof_package_to_path(fdt, args[0], args[1], args[2]);
+    } else if (cmpserv("instance-to-path", 3, 1)) {
+        ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]);
+    } else if (cmpserv("write", 3, 1)) {
+        ret = vof_write(vof, args[0], args[1], args[2]);
+    } else if (cmpserv("claim", 3, 1)) {
+        uint64_t ret64 = vof_claim(vof, args[0], args[1], args[2]);
+
+        if (ret64 < 0x100000000UL) {
+            vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+            ret = (uint32_t)ret64;
+        } else {
+            if (ret64 != -1) {
+                vof_release(vof, ret, args[1]);
+            }
+            ret = PROM_ERROR;
+        }
+    } else if (cmpserv("release", 2, 0)) {
+        ret = vof_release(vof, args[0], args[1]);
+        if (ret != PROM_ERROR) {
+            vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+        }
+    } else if (cmpserv("call-method", 0, 0)) {
+        ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3],
+                              args[4], args[5], rets);
+    } else if (cmpserv("interpret", 0, 0)) {
+        ret = vof_call_interpret(args[0], args[1], args[2], rets);
+    } else if (cmpserv("milliseconds", 0, 1)) {
+        ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    } else if (cmpserv("quiesce", 0, 0)) {
+        vof_quiesce(ms, fdt, vof);
+    } else if (cmpserv("exit", 0, 0)) {
+        error_report("Stopped as the VM requested \"exit\"");
+        vm_stop(RUN_STATE_PAUSED);
+    } else {
+        trace_vof_error_unknown_service(service, nargs, nrets);
+        ret = -1;
+    }
+
+#undef cmpserv
+
+    return ret;
+}
+
+/* Defined as Big Endian */
+struct prom_args {
+    uint32_t service;
+    uint32_t nargs;
+    uint32_t nret;
+    uint32_t args[10];
+} QEMU_PACKED;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+                    target_ulong args_real)
+{
+    struct prom_args args_be;
+    uint32_t args[ARRAY_SIZE(args_be.args)];
+    uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret;
+    char service[64];
+    unsigned nargs, nret, i;
+
+    if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) {
+        return -EINVAL;
+    }
+    nargs = be32_to_cpu(args_be.nargs);
+    if (nargs >= ARRAY_SIZE(args_be.args)) {
+        return -EINVAL;
+    }
+
+    if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) !=
+        MEMTX_OK) {
+        return -EINVAL;
+    }
+    if (strnlen(service, sizeof(service)) == sizeof(service)) {
+        /* Too long service name */
+        return -EINVAL;
+    }
+
+    for (i = 0; i < nargs; ++i) {
+        args[i] = be32_to_cpu(args_be.args[i]);
+    }
+
+    nret = be32_to_cpu(args_be.nret);
+    if (nret > ARRAY_SIZE(args_be.args) - nargs) {
+        return -EINVAL;
+    }
+    ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret);
+    if (!nret) {
+        return 0;
+    }
+
+    /* @nrets includes the value which this function returns */
+    args_be.args[nargs] = cpu_to_be32(ret);
+    for (i = 1; i < nret; ++i) {
+        args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]);
+    }
+
+    if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]),
+                      args_be.args + nargs, sizeof(args_be.args[0]) * nret) !=
+        MEMTX_OK) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static void vof_instance_free(gpointer data)
+{
+    OfInstance *inst = (OfInstance *)data;
+
+    g_free(inst->path);
+    g_free(inst);
+}
+
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp)
+{
+    vof_cleanup(vof);
+
+    vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+                                              NULL, vof_instance_free);
+    vof->claimed = g_array_new(false, false, sizeof(OfClaimed));
+
+    /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */
+    vof->top_addr = MIN(top_addr, 4 * GiB);
+    if (vof_claim(vof, 0, vof->fw_size, 0) == -1) {
+        error_setg(errp, "Memory for firmware is in use");
+    }
+}
+
+void vof_cleanup(Vof *vof)
+{
+    if (vof->claimed) {
+        g_array_unref(vof->claimed);
+    }
+    if (vof->of_instances) {
+        g_hash_table_unref(vof->of_instances);
+    }
+    vof->claimed = NULL;
+    vof->of_instances = NULL;
+}
+
+void vof_build_dt(void *fdt, Vof *vof)
+{
+    uint32_t phandle = fdt_get_max_phandle(fdt);
+    int offset, proplen = 0;
+    const void *prop;
+
+    /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */
+    for (offset = fdt_next_node(fdt, -1, NULL);
+         offset >= 0;
+         offset = fdt_next_node(fdt, offset, NULL)) {
+        prop = fdt_getprop(fdt, offset, "phandle", &proplen);
+        if (prop) {
+            continue;
+        }
+        ++phandle;
+        _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle));
+    }
+
+    vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+}
+
+static const TypeInfo vof_machine_if_info = {
+    .name = TYPE_VOF_MACHINE_IF,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(VofMachineIfClass),
+};
+
+static void vof_machine_if_register_types(void)
+{
+    type_register_static(&vof_machine_if_info);
+}
+type_init(vof_machine_if_register_types)
diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 49141d4074b..cfd85de3e66 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -27,58 +27,58 @@
 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
 
-void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
-{
-    monitor_printf(mon, "\ttx               : %" PRId64 "\n",
-                   dev_res->stats.tx);
-    monitor_printf(mon, "\ttx_len           : %" PRId64 "\n",
-                   dev_res->stats.tx_len);
-    monitor_printf(mon, "\ttx_err           : %" PRId64 "\n",
-                   dev_res->stats.tx_err);
-    monitor_printf(mon, "\trx_bufs          : %" PRId64 "\n",
-                   dev_res->stats.rx_bufs);
-    monitor_printf(mon, "\trx_srq           : %" PRId64 "\n",
-                   dev_res->stats.rx_srq);
-    monitor_printf(mon, "\trx_bufs_len      : %" PRId64 "\n",
-                   dev_res->stats.rx_bufs_len);
-    monitor_printf(mon, "\trx_bufs_err      : %" PRId64 "\n",
-                   dev_res->stats.rx_bufs_err);
-    monitor_printf(mon, "\tcomps            : %" PRId64 "\n",
-                   dev_res->stats.completions);
-    monitor_printf(mon, "\tmissing_comps    : %" PRId32 "\n",
-                   dev_res->stats.missing_cqe);
-    monitor_printf(mon, "\tpoll_cq (bk)     : %" PRId64 "\n",
-                   dev_res->stats.poll_cq_from_bk);
-    monitor_printf(mon, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
-                   dev_res->stats.poll_cq_ppoll_to);
-    monitor_printf(mon, "\tpoll_cq (fe)     : %" PRId64 "\n",
-                   dev_res->stats.poll_cq_from_guest);
-    monitor_printf(mon, "\tpoll_cq_empty    : %" PRId64 "\n",
-                   dev_res->stats.poll_cq_from_guest_empty);
-    monitor_printf(mon, "\tmad_tx           : %" PRId64 "\n",
-                   dev_res->stats.mad_tx);
-    monitor_printf(mon, "\tmad_tx_err       : %" PRId64 "\n",
-                   dev_res->stats.mad_tx_err);
-    monitor_printf(mon, "\tmad_rx           : %" PRId64 "\n",
-                   dev_res->stats.mad_rx);
-    monitor_printf(mon, "\tmad_rx_err       : %" PRId64 "\n",
-                   dev_res->stats.mad_rx_err);
-    monitor_printf(mon, "\tmad_rx_bufs      : %" PRId64 "\n",
-                   dev_res->stats.mad_rx_bufs);
-    monitor_printf(mon, "\tmad_rx_bufs_err  : %" PRId64 "\n",
-                   dev_res->stats.mad_rx_bufs_err);
-    monitor_printf(mon, "\tPDs              : %" PRId32 "\n",
-                   dev_res->pd_tbl.used);
-    monitor_printf(mon, "\tMRs              : %" PRId32 "\n",
-                   dev_res->mr_tbl.used);
-    monitor_printf(mon, "\tUCs              : %" PRId32 "\n",
-                   dev_res->uc_tbl.used);
-    monitor_printf(mon, "\tQPs              : %" PRId32 "\n",
-                   dev_res->qp_tbl.used);
-    monitor_printf(mon, "\tCQs              : %" PRId32 "\n",
-                   dev_res->cq_tbl.used);
-    monitor_printf(mon, "\tCEQ_CTXs         : %" PRId32 "\n",
-                   dev_res->cqe_ctx_tbl.used);
+void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf)
+{
+    g_string_append_printf(buf, "\ttx               : %" PRId64 "\n",
+                           dev_res->stats.tx);
+    g_string_append_printf(buf, "\ttx_len           : %" PRId64 "\n",
+                           dev_res->stats.tx_len);
+    g_string_append_printf(buf, "\ttx_err           : %" PRId64 "\n",
+                           dev_res->stats.tx_err);
+    g_string_append_printf(buf, "\trx_bufs          : %" PRId64 "\n",
+                           dev_res->stats.rx_bufs);
+    g_string_append_printf(buf, "\trx_srq           : %" PRId64 "\n",
+                           dev_res->stats.rx_srq);
+    g_string_append_printf(buf, "\trx_bufs_len      : %" PRId64 "\n",
+                           dev_res->stats.rx_bufs_len);
+    g_string_append_printf(buf, "\trx_bufs_err      : %" PRId64 "\n",
+                           dev_res->stats.rx_bufs_err);
+    g_string_append_printf(buf, "\tcomps            : %" PRId64 "\n",
+                           dev_res->stats.completions);
+    g_string_append_printf(buf, "\tmissing_comps    : %" PRId32 "\n",
+                           dev_res->stats.missing_cqe);
+    g_string_append_printf(buf, "\tpoll_cq (bk)     : %" PRId64 "\n",
+                           dev_res->stats.poll_cq_from_bk);
+    g_string_append_printf(buf, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
+                           dev_res->stats.poll_cq_ppoll_to);
+    g_string_append_printf(buf, "\tpoll_cq (fe)     : %" PRId64 "\n",
+                           dev_res->stats.poll_cq_from_guest);
+    g_string_append_printf(buf, "\tpoll_cq_empty    : %" PRId64 "\n",
+                           dev_res->stats.poll_cq_from_guest_empty);
+    g_string_append_printf(buf, "\tmad_tx           : %" PRId64 "\n",
+                           dev_res->stats.mad_tx);
+    g_string_append_printf(buf, "\tmad_tx_err       : %" PRId64 "\n",
+                           dev_res->stats.mad_tx_err);
+    g_string_append_printf(buf, "\tmad_rx           : %" PRId64 "\n",
+                           dev_res->stats.mad_rx);
+    g_string_append_printf(buf, "\tmad_rx_err       : %" PRId64 "\n",
+                           dev_res->stats.mad_rx_err);
+    g_string_append_printf(buf, "\tmad_rx_bufs      : %" PRId64 "\n",
+                           dev_res->stats.mad_rx_bufs);
+    g_string_append_printf(buf, "\tmad_rx_bufs_err  : %" PRId64 "\n",
+                           dev_res->stats.mad_rx_bufs_err);
+    g_string_append_printf(buf, "\tPDs              : %" PRId32 "\n",
+                           dev_res->pd_tbl.used);
+    g_string_append_printf(buf, "\tMRs              : %" PRId32 "\n",
+                           dev_res->mr_tbl.used);
+    g_string_append_printf(buf, "\tUCs              : %" PRId32 "\n",
+                           dev_res->uc_tbl.used);
+    g_string_append_printf(buf, "\tQPs              : %" PRId32 "\n",
+                           dev_res->qp_tbl.used);
+    g_string_append_printf(buf, "\tCQs              : %" PRId32 "\n",
+                           dev_res->cq_tbl.used);
+    g_string_append_printf(buf, "\tCEQ_CTXs         : %" PRId32 "\n",
+                           dev_res->cqe_ctx_tbl.used);
 }
 
 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index e8639909cd3..d69a917795d 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -92,6 +92,6 @@ static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
 {
     return &dev_res->port.gid_tbl[sgid_idx].gid;
 }
-void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res);
+void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf);
 
 #endif
diff --git a/hw/rdma/trace-events b/hw/rdma/trace-events
index 2022a820cbe..9accb149734 100644
--- a/hw/rdma/trace-events
+++ b/hw/rdma/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # rdma_backend.c
 rdma_check_dev_attr(const char *name, int max_bk, int max_fe) "%s: be=%d, fe=%d"
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index f59879e2574..da7ddfa548f 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -38,6 +38,13 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma,
         return NULL;
     }
 
+    length = ROUND_UP(length, TARGET_PAGE_SIZE);
+    if (nchunks * TARGET_PAGE_SIZE != length) {
+        rdma_error_report("Invalid nchunks/length (%u, %lu)", nchunks,
+                          (unsigned long)length);
+        return NULL;
+    }
+
     dir = rdma_pci_dma_map(pdev, pdir_dma, TARGET_PAGE_SIZE);
     if (!dir) {
         rdma_error_report("Failed to map to page directory");
diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c
index 074ac59b84d..42130667a7d 100644
--- a/hw/rdma/vmw/pvrdma_dev_ring.c
+++ b/hw/rdma/vmw/pvrdma_dev_ring.c
@@ -41,7 +41,7 @@ int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev,
     qatomic_set(&ring->ring_state->cons_head, 0);
     */
     ring->npages = npages;
-    ring->pages = g_malloc(npages * sizeof(void *));
+    ring->pages = g_malloc0(npages * sizeof(void *));
 
     for (i = 0; i < npages; i++) {
         if (!tbl[i]) {
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 84ae8024fcf..91206dbb8eb 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -58,24 +58,25 @@ static Property pvrdma_dev_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj)
+static void pvrdma_format_statistics(RdmaProvider *obj, GString *buf)
 {
     PVRDMADev *dev = PVRDMA_DEV(obj);
     PCIDevice *pdev = PCI_DEVICE(dev);
 
-    monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
-                   PCI_FUNC(pdev->devfn));
-    monitor_printf(mon, "\tcommands         : %" PRId64 "\n",
-                   dev->stats.commands);
-    monitor_printf(mon, "\tregs_reads       : %" PRId64 "\n",
-                   dev->stats.regs_reads);
-    monitor_printf(mon, "\tregs_writes      : %" PRId64 "\n",
-                   dev->stats.regs_writes);
-    monitor_printf(mon, "\tuar_writes       : %" PRId64 "\n",
-                   dev->stats.uar_writes);
-    monitor_printf(mon, "\tinterrupts       : %" PRId64 "\n",
-                   dev->stats.interrupts);
-    rdma_dump_device_counters(mon, &dev->rdma_dev_res);
+    g_string_append_printf(buf, "%s, %x.%x\n",
+                           pdev->name, PCI_SLOT(pdev->devfn),
+                           PCI_FUNC(pdev->devfn));
+    g_string_append_printf(buf, "\tcommands         : %" PRId64 "\n",
+                           dev->stats.commands);
+    g_string_append_printf(buf, "\tregs_reads       : %" PRId64 "\n",
+                           dev->stats.regs_reads);
+    g_string_append_printf(buf, "\tregs_writes      : %" PRId64 "\n",
+                           dev->stats.regs_writes);
+    g_string_append_printf(buf, "\tuar_writes       : %" PRId64 "\n",
+                           dev->stats.uar_writes);
+    g_string_append_printf(buf, "\tinterrupts       : %" PRId64 "\n",
+                           dev->stats.interrupts);
+    rdma_format_device_counters(&dev->rdma_dev_res, buf);
 }
 
 static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring,
@@ -92,6 +93,11 @@ static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state,
     uint64_t *dir, *tbl;
     int rc = 0;
 
+    if (!num_pages) {
+        rdma_error_report("Ring pages count must be strictly positive");
+        return -EINVAL;
+    }
+
     dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE);
     if (!dir) {
         rdma_error_report("Failed to map to page directory (ring %s)", name);
@@ -694,7 +700,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
     device_class_set_props(dc, pvrdma_dev_properties);
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 
-    ir->print_statistics = pvrdma_print_statistics;
+    ir->format_statistics = pvrdma_format_statistics;
 }
 
 static const TypeInfo pvrdma_info = {
diff --git a/hw/rdma/vmw/trace-events b/hw/rdma/vmw/trace-events
index 323fca8456d..a6c77e1e10a 100644
--- a/hw/rdma/vmw/trace-events
+++ b/hw/rdma/vmw/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pvrdma_main.c
 pvrdma_regs_read(uint64_t addr, uint64_t val) "pvrdma.regs[0x%"PRIx64"]=0x%"PRIx64
diff --git a/hw/remote/iohub.c b/hw/remote/iohub.c
index e4ff131a6b8..547d597f0fe 100644
--- a/hw/remote/iohub.c
+++ b/hw/remote/iohub.c
@@ -15,7 +15,6 @@
 #include "hw/pci/pci_ids.h"
 #include "hw/pci/pci_bus.h"
 #include "qemu/thread.h"
-#include "hw/boards.h"
 #include "hw/remote/machine.h"
 #include "hw/remote/iohub.h"
 #include "qemu/main-loop.h"
diff --git a/hw/remote/machine.c b/hw/remote/machine.c
index c0ab4f528aa..952105eab5a 100644
--- a/hw/remote/machine.c
+++ b/hw/remote/machine.c
@@ -17,7 +17,6 @@
 #include "qemu-common.h"
 
 #include "hw/remote/machine.h"
-#include "exec/address-spaces.h"
 #include "exec/memory.h"
 #include "qapi/error.h"
 #include "hw/pci/pci_host.h"
diff --git a/hw/remote/memory.c b/hw/remote/memory.c
index 32085b1e05e..6e21ab1a45c 100644
--- a/hw/remote/memory.c
+++ b/hw/remote/memory.c
@@ -12,7 +12,6 @@
 #include "qemu-common.h"
 
 #include "hw/remote/memory.h"
-#include "exec/address-spaces.h"
 #include "exec/ram_addr.h"
 #include "qapi/error.h"
 
@@ -42,13 +41,12 @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp)
 
     remote_sysmem_reset();
 
-    for (region = 0; region < msg->num_fds; region++) {
-        g_autofree char *name;
+    for (region = 0; region < msg->num_fds; region++, suffix++) {
+        g_autofree char *name = g_strdup_printf("remote-mem-%u", suffix);
         subregion = g_new(MemoryRegion, 1);
-        name = g_strdup_printf("remote-mem-%u", suffix++);
         memory_region_init_ram_from_fd(subregion, NULL,
                                        name, sysmem_info->sizes[region],
-                                       true, msg->fds[region],
+                                       RAM_SHARED, msg->fds[region],
                                        sysmem_info->offsets[region],
                                        errp);
 
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 9ce31526e8f..7e841820e52 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -34,7 +34,6 @@
  */
 bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 {
-    ERRP_GUARD();
     bool iolock = qemu_mutex_iothread_locked();
     bool iothread = qemu_in_iothread();
     struct iovec send[2] = {};
@@ -97,7 +96,6 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds,
                            size_t *nfds, Error **errp)
 {
-    ERRP_GUARD();
     struct iovec iov = { .iov_base = buf, .iov_len = len };
     bool iolock = qemu_mutex_iothread_locked();
     bool iothread = qemu_in_iothread();
@@ -192,7 +190,6 @@ bool mpqemu_msg_recv(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
                                          Error **errp)
 {
-    ERRP_GUARD();
     MPQemuMsg msg_reply = {0};
     uint64_t ret = UINT64_MAX;
 
@@ -218,7 +215,7 @@ uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
 
 bool mpqemu_msg_valid(MPQemuMsg *msg)
 {
-    if (msg->cmd >= MPQEMU_CMD_MAX && msg->cmd < 0) {
+    if (msg->cmd >= MPQEMU_CMD_MAX || msg->cmd < 0) {
         return false;
     }
 
diff --git a/hw/remote/proxy-memory-listener.c b/hw/remote/proxy-memory-listener.c
index af1fa6f5aaa..882c9b4854d 100644
--- a/hw/remote/proxy-memory-listener.c
+++ b/hw/remote/proxy-memory-listener.c
@@ -14,9 +14,7 @@
 #include "qemu/range.h"
 #include "exec/memory.h"
 #include "exec/cpu-common.h"
-#include "cpu.h"
 #include "exec/ram_addr.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
 #include "hw/remote/mpqemu-link.h"
 #include "hw/remote/proxy-memory-listener.h"
@@ -221,6 +219,7 @@ void proxy_memory_listener_configure(ProxyMemoryListener *proxy_listener,
     proxy_listener->listener.region_add = proxy_memory_listener_region_addnop;
     proxy_listener->listener.region_nop = proxy_memory_listener_region_addnop;
     proxy_listener->listener.priority = 10;
+    proxy_listener->listener.name = "proxy";
 
     memory_listener_register(&proxy_listener->listener,
                              &address_space_memory);
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index 4fa4be079d7..bad164299dd 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -102,10 +102,18 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
     }
 
     dev->ioc = qio_channel_new_fd(fd, errp);
+    if (!dev->ioc) {
+        close(fd);
+        return;
+    }
 
     error_setg(&dev->migration_blocker, "%s does not support migration",
                TYPE_PCI_PROXY_DEV);
-    migrate_add_blocker(dev->migration_blocker, errp);
+    if (migrate_add_blocker(dev->migration_blocker, errp) < 0) {
+        error_free(dev->migration_blocker);
+        object_unref(dev->ioc);
+        return;
+    }
 
     qemu_mutex_init(&dev->io_mutex);
     qio_channel_set_blocking(dev->ioc, true, NULL);
@@ -316,6 +324,7 @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
         set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
         break;
     case PCI_BASE_CLASS_NETWORK:
+    case PCI_BASE_CLASS_WIRELESS:
         set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
         break;
     case PCI_BASE_CLASS_INPUT:
@@ -347,13 +356,12 @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
                    PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY;
 
         if (size) {
-            g_autofree char *name;
+            g_autofree char *name = g_strdup_printf("bar-region-%d", i);
             pdev->region[i].dev = pdev;
             pdev->region[i].present = true;
             if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) {
                 pdev->region[i].memory = true;
             }
-            name = g_strdup_printf("bar-region-%d", i);
             memory_region_init_io(&pdev->region[i].mr, OBJECT(pdev),
                                   &proxy_mr_ops, &pdev->region[i],
                                   name, size);
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 1de18cdcf1f..d2d869aaade 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_NUMA
+    bool
+
 config IBEX
     bool
 
@@ -9,7 +12,7 @@ config MICROCHIP_PFSOC
     select MCHP_PFSOC_MMUART
     select MCHP_PFSOC_SYSREG
     select MSI_NONBROKEN
-    select SIFIVE_CLINT
+    select RISCV_ACLINT
     select SIFIVE_PDMA
     select SIFIVE_PLIC
     select UNIMP
@@ -19,17 +22,26 @@ config OPENTITAN
     select IBEX
     select UNIMP
 
+config SHAKTI_C
+    bool
+    select UNIMP
+    select SHAKTI_UART
+    select RISCV_ACLINT
+    select SIFIVE_PLIC
+
 config RISCV_VIRT
     bool
     imply PCI_DEVICES
+    imply VIRTIO_VGA
     imply TEST_DEVICES
+    select RISCV_NUMA
     select GOLDFISH_RTC
     select MSI_NONBROKEN
     select PCI
     select PCI_EXPRESS_GENERIC_BRIDGE
     select PFLASH_CFI01
     select SERIAL
-    select SIFIVE_CLINT
+    select RISCV_ACLINT
     select SIFIVE_PLIC
     select SIFIVE_TEST
     select VIRTIO_MMIO
@@ -38,7 +50,7 @@ config RISCV_VIRT
 config SIFIVE_E
     bool
     select MSI_NONBROKEN
-    select SIFIVE_CLINT
+    select RISCV_ACLINT
     select SIFIVE_GPIO
     select SIFIVE_PLIC
     select SIFIVE_UART
@@ -49,7 +61,7 @@ config SIFIVE_U
     bool
     select CADENCE
     select MSI_NONBROKEN
-    select SIFIVE_CLINT
+    select RISCV_ACLINT
     select SIFIVE_GPIO
     select SIFIVE_PDMA
     select SIFIVE_PLIC
@@ -57,13 +69,15 @@ config SIFIVE_U
     select SIFIVE_UART
     select SIFIVE_U_OTP
     select SIFIVE_U_PRCI
+    select SIFIVE_PWM
     select SSI_M25P80
     select SSI_SD
     select UNIMP
 
 config SPIKE
     bool
+    select RISCV_NUMA
     select HTIF
     select MSI_NONBROKEN
-    select SIFIVE_CLINT
+    select RISCV_ACLINT
     select SIFIVE_PLIC
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 0d38bb7426b..519fa455a15 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -35,7 +35,32 @@
 
 bool riscv_is_32bit(RISCVHartArrayState *harts)
 {
-    return riscv_cpu_is_32bit(&harts->harts[0].env);
+    return harts->harts[0].env.misa_mxl_max == MXL_RV32;
+}
+
+/*
+ * Return the per-socket PLIC hart topology configuration string
+ * (caller must free with g_free())
+ */
+char *riscv_plic_hart_config_string(int hart_count)
+{
+    g_autofree const char **vals = g_new(const char *, hart_count + 1);
+    int i;
+
+    for (i = 0; i < hart_count; i++) {
+        CPUState *cs = qemu_get_cpu(i);
+        CPURISCVState *env = &RISCV_CPU(cs)->env;
+
+        if (riscv_has_ext(env, RVS)) {
+            vals[i] = "MS";
+        } else {
+            vals[i] = "M";
+        }
+    }
+    vals[i] = NULL;
+
+    /* g_strjoinv() obliges us to cast away const here */
+    return g_strjoinv(",", (char **)vals);
 }
 
 target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts,
@@ -182,7 +207,7 @@ uint32_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt)
 {
     uint32_t temp, fdt_addr;
     hwaddr dram_end = dram_base + mem_size;
-    int fdtsize = fdt_totalsize(fdt);
+    int ret, fdtsize = fdt_totalsize(fdt);
 
     if (fdtsize <= 0) {
         error_report("invalid device-tree");
@@ -198,7 +223,9 @@ uint32_t riscv_load_fdt(hwaddr dram_base, uint64_t mem_size, void *fdt)
     temp = MIN(dram_end, 3072 * MiB);
     fdt_addr = QEMU_ALIGN_DOWN(temp - fdtsize, 16 * MiB);
 
-    fdt_pack(fdt);
+    ret = fdt_pack(fdt);
+    /* Should only fail if we've built a corrupted tree */
+    g_assert(ret == 0);
     /* copy in the device tree */
     qemu_fdt_dumpdtb(fdt, fdtsize);
 
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 275c0f7eb7c..ab6cae57eae 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -1,9 +1,10 @@
 riscv_ss = ss.source_set()
 riscv_ss.add(files('boot.c'), fdt)
-riscv_ss.add(files('numa.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: files('numa.c'))
 riscv_ss.add(files('riscv_hart.c'))
 riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
 riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
+riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
 riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index 4704337fb00..57d779fb555 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -36,12 +36,10 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
-#include "qemu/log.h"
 #include "qemu/units.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
-#include "hw/irq.h"
 #include "hw/loader.h"
 #include "hw/sysbus.h"
 #include "chardev/char.h"
@@ -51,8 +49,9 @@
 #include "hw/riscv/boot.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/microchip_pfsoc.h"
-#include "hw/intc/sifive_clint.h"
+#include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
+#include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 
 /*
@@ -188,7 +187,6 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
     MemoryRegion *envm_data = g_new(MemoryRegion, 1);
     MemoryRegion *qspi_xip_mem = g_new(MemoryRegion, 1);
     char *plic_hart_config;
-    size_t plic_hart_config_len;
     NICInfo *nd;
     int i;
 
@@ -235,9 +233,12 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
         memmap[MICROCHIP_PFSOC_BUSERR_UNIT4].size);
 
     /* CLINT */
-    sifive_clint_create(memmap[MICROCHIP_PFSOC_CLINT].base,
-        memmap[MICROCHIP_PFSOC_CLINT].size, 0, ms->smp.cpus,
-        SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
+    riscv_aclint_swi_create(memmap[MICROCHIP_PFSOC_CLINT].base,
+        0, ms->smp.cpus, false);
+    riscv_aclint_mtimer_create(
+        memmap[MICROCHIP_PFSOC_CLINT].base + RISCV_ACLINT_SWI_SIZE,
+        RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
+        RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
         CLINT_TIMEBASE_FREQ, false);
 
     /* L2 cache controller */
@@ -260,22 +261,11 @@ static void microchip_pfsoc_soc_realize(DeviceState *dev, Error **errp)
                                 l2lim_mem);
 
     /* create PLIC hart topology configuration string */
-    plic_hart_config_len = (strlen(MICROCHIP_PFSOC_PLIC_HART_CONFIG) + 1) *
-                           ms->smp.cpus;
-    plic_hart_config = g_malloc0(plic_hart_config_len);
-    for (i = 0; i < ms->smp.cpus; i++) {
-        if (i != 0) {
-            strncat(plic_hart_config, "," MICROCHIP_PFSOC_PLIC_HART_CONFIG,
-                    plic_hart_config_len);
-        } else {
-            strncat(plic_hart_config, "M", plic_hart_config_len);
-        }
-        plic_hart_config_len -= (strlen(MICROCHIP_PFSOC_PLIC_HART_CONFIG) + 1);
-    }
+    plic_hart_config = riscv_plic_hart_config_string(ms->smp.cpus);
 
     /* PLIC */
     s->plic = sifive_plic_create(memmap[MICROCHIP_PFSOC_PLIC].base,
-        plic_hart_config, 0,
+        plic_hart_config, ms->smp.cpus, 0,
         MICROCHIP_PFSOC_PLIC_NUM_SOURCES,
         MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES,
         MICROCHIP_PFSOC_PLIC_PRIORITY_BASE,
@@ -461,7 +451,13 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
     MemoryRegion *mem_low_alias = g_new(MemoryRegion, 1);
     MemoryRegion *mem_high = g_new(MemoryRegion, 1);
     MemoryRegion *mem_high_alias = g_new(MemoryRegion, 1);
-    uint64_t mem_high_size;
+    uint64_t mem_low_size, mem_high_size;
+    hwaddr firmware_load_addr;
+    const char *firmware_name;
+    bool kernel_as_payload = false;
+    target_ulong firmware_end_addr, kernel_start_addr;
+    uint64_t kernel_entry;
+    uint32_t fdt_load_addr;
     DriveInfo *dinfo = drive_get_next(IF_SD);
 
     /* Sanity check on RAM size */
@@ -477,38 +473,38 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
                             TYPE_MICROCHIP_PFSOC);
     qdev_realize(DEVICE(&s->soc), NULL, &error_abort);
 
+    /* Split RAM into low and high regions using aliases to machine->ram */
+    mem_low_size = memmap[MICROCHIP_PFSOC_DRAM_LO].size;
+    mem_high_size = machine->ram_size - mem_low_size;
+    memory_region_init_alias(mem_low, NULL,
+                             "microchip.icicle.kit.ram_low", machine->ram,
+                             0, mem_low_size);
+    memory_region_init_alias(mem_high, NULL,
+                             "microchip.icicle.kit.ram_high", machine->ram,
+                             mem_low_size, mem_high_size);
+
     /* Register RAM */
-    memory_region_init_ram(mem_low, NULL, "microchip.icicle.kit.ram_low",
-                           memmap[MICROCHIP_PFSOC_DRAM_LO].size,
-                           &error_fatal);
-    memory_region_init_alias(mem_low_alias, NULL,
-                             "microchip.icicle.kit.ram_low.alias",
-                             mem_low, 0,
-                             memmap[MICROCHIP_PFSOC_DRAM_LO_ALIAS].size);
     memory_region_add_subregion(system_memory,
                                 memmap[MICROCHIP_PFSOC_DRAM_LO].base,
                                 mem_low);
+    memory_region_add_subregion(system_memory,
+                                memmap[MICROCHIP_PFSOC_DRAM_HI].base,
+                                mem_high);
+
+    /* Create aliases for the low and high RAM regions */
+    memory_region_init_alias(mem_low_alias, NULL,
+                             "microchip.icicle.kit.ram_low.alias",
+                             mem_low, 0, mem_low_size);
     memory_region_add_subregion(system_memory,
                                 memmap[MICROCHIP_PFSOC_DRAM_LO_ALIAS].base,
                                 mem_low_alias);
-
-    mem_high_size = machine->ram_size - 1 * GiB;
-
-    memory_region_init_ram(mem_high, NULL, "microchip.icicle.kit.ram_high",
-                           mem_high_size, &error_fatal);
     memory_region_init_alias(mem_high_alias, NULL,
                              "microchip.icicle.kit.ram_high.alias",
                              mem_high, 0, mem_high_size);
-    memory_region_add_subregion(system_memory,
-                                memmap[MICROCHIP_PFSOC_DRAM_HI].base,
-                                mem_high);
     memory_region_add_subregion(system_memory,
                                 memmap[MICROCHIP_PFSOC_DRAM_HI_ALIAS].base,
                                 mem_high_alias);
 
-    /* Load the firmware */
-    (void)riscv_find_and_load_firmware(machine, BIOS_FILENAME, RESET_VECTOR, NULL);
-
     /* Attach an SD card */
     if (dinfo) {
         CadenceSDHCIState *sdhci = &(s->soc.sdhci);
@@ -518,6 +514,77 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
                                 &error_fatal);
         qdev_realize_and_unref(card, sdhci->bus, &error_fatal);
     }
+
+    /*
+     * We follow the following table to select which payload we execute.
+     *
+     *  -bios |    -kernel | payload
+     * -------+------------+--------
+     *      N |          N | HSS
+     *      Y | don't care | HSS
+     *      N |          Y | kernel
+     *
+     * This ensures backwards compatibility with how we used to expose -bios
+     * to users but allows them to run through direct kernel booting as well.
+     *
+     * When -kernel is used for direct boot, -dtb must be present to provide
+     * a valid device tree for the board, as we don't generate device tree.
+     */
+
+    if (machine->kernel_filename && machine->dtb) {
+        int fdt_size;
+        machine->fdt = load_device_tree(machine->dtb, &fdt_size);
+        if (!machine->fdt) {
+            error_report("load_device_tree() failed");
+            exit(1);
+        }
+
+        firmware_name = RISCV64_BIOS_BIN;
+        firmware_load_addr = memmap[MICROCHIP_PFSOC_DRAM_LO].base;
+        kernel_as_payload = true;
+    }
+
+    if (!kernel_as_payload) {
+        firmware_name = BIOS_FILENAME;
+        firmware_load_addr = RESET_VECTOR;
+    }
+
+    /* Load the firmware */
+    firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
+                                                     firmware_load_addr, NULL);
+
+    if (kernel_as_payload) {
+        kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
+                                                         firmware_end_addr);
+
+        kernel_entry = riscv_load_kernel(machine->kernel_filename,
+                                         kernel_start_addr, NULL);
+
+        if (machine->initrd_filename) {
+            hwaddr start;
+            hwaddr end = riscv_load_initrd(machine->initrd_filename,
+                                           machine->ram_size, kernel_entry,
+                                           &start);
+            qemu_fdt_setprop_cell(machine->fdt, "/chosen",
+                                  "linux,initrd-start", start);
+            qemu_fdt_setprop_cell(machine->fdt, "/chosen",
+                                  "linux,initrd-end", end);
+        }
+
+        if (machine->kernel_cmdline) {
+            qemu_fdt_setprop_string(machine->fdt, "/chosen",
+                                    "bootargs", machine->kernel_cmdline);
+        }
+
+        /* Compute the fdt load address in dram */
+        fdt_load_addr = riscv_load_fdt(memmap[MICROCHIP_PFSOC_DRAM_LO].base,
+                                       machine->ram_size, machine->fdt);
+        /* Load the reset vector */
+        riscv_setup_rom_reset_vec(machine, &s->soc.u_cpus, firmware_load_addr,
+                                  memmap[MICROCHIP_PFSOC_ENVM_DATA].base,
+                                  memmap[MICROCHIP_PFSOC_ENVM_DATA].size,
+                                  kernel_entry, fdt_load_addr, machine->fdt);
+    }
 }
 
 static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
@@ -530,6 +597,7 @@ static void microchip_icicle_kit_machine_class_init(ObjectClass *oc, void *data)
                    MICROCHIP_PFSOC_COMPUTE_CPU_COUNT;
     mc->min_cpus = MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT + 1;
     mc->default_cpus = mc->min_cpus;
+    mc->default_ram_id = "microchip.icicle.kit.ram";
 
     /*
      * Map 513 MiB high memory, the mimimum required high memory size, because
diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
index 4f92307102f..7fe92d402f6 100644
--- a/hw/riscv/numa.c
+++ b/hw/riscv/numa.c
@@ -18,7 +18,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index e168bffe692..c531450b9fd 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -19,12 +19,12 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "hw/riscv/opentitan.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
 #include "hw/misc/unimp.h"
 #include "hw/riscv/boot.h"
-#include "exec/address-spaces.h"
 #include "qemu/units.h"
 #include "sysemu/sysemu.h"
 
@@ -37,46 +37,53 @@ static const MemMapEntry ibex_memmap[] = {
     [IBEX_DEV_SPI] =            {  0x40050000,  0x1000  },
     [IBEX_DEV_I2C] =            {  0x40080000,  0x1000  },
     [IBEX_DEV_PATTGEN] =        {  0x400e0000,  0x1000  },
-    [IBEX_DEV_RV_TIMER] =       {  0x40100000,  0x1000  },
+    [IBEX_DEV_TIMER] =          {  0x40100000,  0x1000  },
     [IBEX_DEV_SENSOR_CTRL] =    {  0x40110000,  0x1000  },
     [IBEX_DEV_OTP_CTRL] =       {  0x40130000,  0x4000  },
+    [IBEX_DEV_USBDEV] =         {  0x40150000,  0x1000  },
     [IBEX_DEV_PWRMGR] =         {  0x40400000,  0x1000  },
     [IBEX_DEV_RSTMGR] =         {  0x40410000,  0x1000  },
     [IBEX_DEV_CLKMGR] =         {  0x40420000,  0x1000  },
     [IBEX_DEV_PINMUX] =         {  0x40460000,  0x1000  },
     [IBEX_DEV_PADCTRL] =        {  0x40470000,  0x1000  },
-    [IBEX_DEV_USBDEV] =         {  0x40500000,  0x1000  },
     [IBEX_DEV_FLASH_CTRL] =     {  0x41000000,  0x1000  },
-    [IBEX_DEV_PLIC] =           {  0x41010000,  0x1000  },
     [IBEX_DEV_AES] =            {  0x41100000,  0x1000  },
     [IBEX_DEV_HMAC] =           {  0x41110000,  0x1000  },
     [IBEX_DEV_KMAC] =           {  0x41120000,  0x1000  },
-    [IBEX_DEV_KEYMGR] =         {  0x41130000,  0x1000  },
+    [IBEX_DEV_OTBN] =           {  0x41130000,  0x10000 },
+    [IBEX_DEV_KEYMGR] =         {  0x41140000,  0x1000  },
     [IBEX_DEV_CSRNG] =          {  0x41150000,  0x1000  },
     [IBEX_DEV_ENTROPY] =        {  0x41160000,  0x1000  },
     [IBEX_DEV_EDNO] =           {  0x41170000,  0x1000  },
     [IBEX_DEV_EDN1] =           {  0x41180000,  0x1000  },
     [IBEX_DEV_ALERT_HANDLER] =  {  0x411b0000,  0x1000  },
     [IBEX_DEV_NMI_GEN] =        {  0x411c0000,  0x1000  },
-    [IBEX_DEV_OTBN] =           {  0x411d0000,  0x10000 },
+    [IBEX_DEV_PERI] =           {  0x411f0000,  0x10000 },
+    [IBEX_DEV_PLIC] =           {  0x48000000,  0x4005000  },
+    [IBEX_DEV_FLASH_VIRTUAL] =  {  0x80000000,  0x80000 },
 };
 
 static void opentitan_board_init(MachineState *machine)
 {
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
     const MemMapEntry *memmap = ibex_memmap;
     OpenTitanState *s = g_new0(OpenTitanState, 1);
     MemoryRegion *sys_mem = get_system_memory();
-    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
+
+    if (machine->ram_size != mc->default_ram_size) {
+        char *sz = size_to_str(mc->default_ram_size);
+        error_report("Invalid RAM size, should be %s", sz);
+        g_free(sz);
+        exit(EXIT_FAILURE);
+    }
 
     /* Initialize SoC */
     object_initialize_child(OBJECT(machine), "soc", &s->soc,
                             TYPE_RISCV_IBEX_SOC);
     qdev_realize(DEVICE(&s->soc), NULL, &error_abort);
 
-    memory_region_init_ram(main_mem, NULL, "riscv.lowrisc.ibex.ram",
-        memmap[IBEX_DEV_RAM].size, &error_fatal);
     memory_region_add_subregion(sys_mem,
-        memmap[IBEX_DEV_RAM].base, main_mem);
+        memmap[IBEX_DEV_RAM].base, machine->ram);
 
     if (machine->firmware) {
         riscv_load_firmware(machine->firmware, memmap[IBEX_DEV_RAM].base, NULL);
@@ -94,6 +101,8 @@ static void opentitan_machine_init(MachineClass *mc)
     mc->init = opentitan_board_init;
     mc->max_cpus = 1;
     mc->default_cpu_type = TYPE_RISCV_CPU_IBEX;
+    mc->default_ram_id = "riscv.lowrisc.ibex.ram";
+    mc->default_ram_size = ibex_memmap[IBEX_DEV_RAM].size;
 }
 
 DEFINE_MACHINE("opentitan", opentitan_machine_init)
@@ -104,9 +113,11 @@ static void lowrisc_ibex_soc_init(Object *obj)
 
     object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY);
 
-    object_initialize_child(obj, "plic", &s->plic, TYPE_IBEX_PLIC);
+    object_initialize_child(obj, "plic", &s->plic, TYPE_SIFIVE_PLIC);
 
     object_initialize_child(obj, "uart", &s->uart, TYPE_IBEX_UART);
+
+    object_initialize_child(obj, "timer", &s->timer, TYPE_IBEX_TIMER);
 }
 
 static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
@@ -115,12 +126,13 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
     MachineState *ms = MACHINE(qdev_get_machine());
     LowRISCIbexSoCState *s = RISCV_IBEX_SOC(dev_soc);
     MemoryRegion *sys_mem = get_system_memory();
+    int i;
 
     object_property_set_str(OBJECT(&s->cpus), "cpu-type", ms->cpu_type,
                             &error_abort);
     object_property_set_int(OBJECT(&s->cpus), "num-harts", ms->smp.cpus,
                             &error_abort);
-    object_property_set_int(OBJECT(&s->cpus), "resetvec", 0x8090, &error_abort);
+    object_property_set_int(OBJECT(&s->cpus), "resetvec", 0x8080, &error_abort);
     sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_abort);
 
     /* Boot ROM */
@@ -132,15 +144,39 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
     /* Flash memory */
     memory_region_init_rom(&s->flash_mem, OBJECT(dev_soc), "riscv.lowrisc.ibex.flash",
                            memmap[IBEX_DEV_FLASH].size, &error_fatal);
+    memory_region_init_alias(&s->flash_alias, OBJECT(dev_soc),
+                             "riscv.lowrisc.ibex.flash_virtual", &s->flash_mem, 0,
+                             memmap[IBEX_DEV_FLASH_VIRTUAL].size);
     memory_region_add_subregion(sys_mem, memmap[IBEX_DEV_FLASH].base,
                                 &s->flash_mem);
+    memory_region_add_subregion(sys_mem, memmap[IBEX_DEV_FLASH_VIRTUAL].base,
+                                &s->flash_alias);
 
     /* PLIC */
+    qdev_prop_set_string(DEVICE(&s->plic), "hart-config", "M");
+    qdev_prop_set_uint32(DEVICE(&s->plic), "hartid-base", 0);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "num-sources", 180);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "num-priorities", 3);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "priority-base", 0x00);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "pending-base", 0x1000);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "enable-base", 0x2000);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "enable-stride", 0x18);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "context-base", 0x200000);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "context-stride", 8);
+    qdev_prop_set_uint32(DEVICE(&s->plic), "aperture-size", memmap[IBEX_DEV_PLIC].size);
+
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->plic), errp)) {
         return;
     }
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->plic), 0, memmap[IBEX_DEV_PLIC].base);
 
+    for (i = 0; i < ms->smp.cpus; i++) {
+        CPUState *cpu = qemu_get_cpu(i);
+
+        qdev_connect_gpio_out(DEVICE(&s->plic), ms->smp.cpus + i,
+                              qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT));
+    }
+
     /* UART */
     qdev_prop_set_chr(DEVICE(&(s->uart)), "chardev", serial_hd(0));
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart), errp)) {
@@ -149,16 +185,27 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart), 0, memmap[IBEX_DEV_UART].base);
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart),
                        0, qdev_get_gpio_in(DEVICE(&s->plic),
-                       IBEX_UART_TX_WATERMARK_IRQ));
+                       IBEX_UART0_TX_WATERMARK_IRQ));
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart),
                        1, qdev_get_gpio_in(DEVICE(&s->plic),
-                       IBEX_UART_RX_WATERMARK_IRQ));
+                       IBEX_UART0_RX_WATERMARK_IRQ));
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart),
                        2, qdev_get_gpio_in(DEVICE(&s->plic),
-                       IBEX_UART_TX_EMPTY_IRQ));
+                       IBEX_UART0_TX_EMPTY_IRQ));
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart),
                        3, qdev_get_gpio_in(DEVICE(&s->plic),
-                       IBEX_UART_RX_OVERFLOW_IRQ));
+                       IBEX_UART0_RX_OVERFLOW_IRQ));
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->timer), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->timer), 0, memmap[IBEX_DEV_TIMER].base);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->timer),
+                       0, qdev_get_gpio_in(DEVICE(&s->plic),
+                       IBEX_TIMER_TIMEREXPIRED0_0));
+    qdev_connect_gpio_out(DEVICE(&s->timer), 0,
+                          qdev_get_gpio_in(DEVICE(qemu_get_cpu(0)),
+                                           IRQ_M_TIMER));
 
     create_unimplemented_device("riscv.lowrisc.ibex.gpio",
         memmap[IBEX_DEV_GPIO].base, memmap[IBEX_DEV_GPIO].size);
@@ -168,8 +215,6 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
         memmap[IBEX_DEV_I2C].base, memmap[IBEX_DEV_I2C].size);
     create_unimplemented_device("riscv.lowrisc.ibex.pattgen",
         memmap[IBEX_DEV_PATTGEN].base, memmap[IBEX_DEV_PATTGEN].size);
-    create_unimplemented_device("riscv.lowrisc.ibex.rv_timer",
-        memmap[IBEX_DEV_RV_TIMER].base, memmap[IBEX_DEV_RV_TIMER].size);
     create_unimplemented_device("riscv.lowrisc.ibex.sensor_ctrl",
         memmap[IBEX_DEV_SENSOR_CTRL].base, memmap[IBEX_DEV_SENSOR_CTRL].size);
     create_unimplemented_device("riscv.lowrisc.ibex.otp_ctrl",
@@ -210,6 +255,8 @@ static void lowrisc_ibex_soc_realize(DeviceState *dev_soc, Error **errp)
         memmap[IBEX_DEV_NMI_GEN].base, memmap[IBEX_DEV_NMI_GEN].size);
     create_unimplemented_device("riscv.lowrisc.ibex.otbn",
         memmap[IBEX_DEV_OTBN].base, memmap[IBEX_DEV_OTBN].size);
+    create_unimplemented_device("riscv.lowrisc.ibex.peri",
+        memmap[IBEX_DEV_PERI].base, memmap[IBEX_DEV_PERI].size);
 }
 
 static void lowrisc_ibex_soc_class_init(ObjectClass *oc, void *data)
diff --git a/hw/riscv/shakti_c.c b/hw/riscv/shakti_c.c
new file mode 100644
index 00000000000..90e2cf609f3
--- /dev/null
+++ b/hw/riscv/shakti_c.c
@@ -0,0 +1,190 @@
+/*
+ * Shakti C-class SoC emulation
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/boards.h"
+#include "hw/riscv/shakti_c.h"
+#include "qapi/error.h"
+#include "hw/intc/sifive_plic.h"
+#include "hw/intc/riscv_aclint.h"
+#include "sysemu/sysemu.h"
+#include "hw/qdev-properties.h"
+#include "exec/address-spaces.h"
+#include "hw/riscv/boot.h"
+
+
+static const struct MemmapEntry {
+    hwaddr base;
+    hwaddr size;
+} shakti_c_memmap[] = {
+    [SHAKTI_C_ROM]   =  {  0x00001000,  0x2000   },
+    [SHAKTI_C_RAM]   =  {  0x80000000,  0x0      },
+    [SHAKTI_C_UART]  =  {  0x00011300,  0x00040  },
+    [SHAKTI_C_GPIO]  =  {  0x020d0000,  0x00100  },
+    [SHAKTI_C_PLIC]  =  {  0x0c000000,  0x20000  },
+    [SHAKTI_C_CLINT] =  {  0x02000000,  0xc0000  },
+    [SHAKTI_C_I2C]   =  {  0x20c00000,  0x00100  },
+};
+
+static void shakti_c_machine_state_init(MachineState *mstate)
+{
+    ShaktiCMachineState *sms = RISCV_SHAKTI_MACHINE(mstate);
+    MemoryRegion *system_memory = get_system_memory();
+
+    /* Allow only Shakti C CPU for this platform */
+    if (strcmp(mstate->cpu_type, TYPE_RISCV_CPU_SHAKTI_C) != 0) {
+        error_report("This board can only be used with Shakti C CPU");
+        exit(1);
+    }
+
+    /* Initialize SoC */
+    object_initialize_child(OBJECT(mstate), "soc", &sms->soc,
+                            TYPE_RISCV_SHAKTI_SOC);
+    qdev_realize(DEVICE(&sms->soc), NULL, &error_abort);
+
+    /* register RAM */
+    memory_region_add_subregion(system_memory,
+                                shakti_c_memmap[SHAKTI_C_RAM].base,
+                                mstate->ram);
+
+    /* ROM reset vector */
+    riscv_setup_rom_reset_vec(mstate, &sms->soc.cpus,
+                              shakti_c_memmap[SHAKTI_C_RAM].base,
+                              shakti_c_memmap[SHAKTI_C_ROM].base,
+                              shakti_c_memmap[SHAKTI_C_ROM].size, 0, 0,
+                              NULL);
+    if (mstate->firmware) {
+        riscv_load_firmware(mstate->firmware,
+                            shakti_c_memmap[SHAKTI_C_RAM].base,
+                            NULL);
+    }
+}
+
+static void shakti_c_machine_instance_init(Object *obj)
+{
+}
+
+static void shakti_c_machine_class_init(ObjectClass *klass, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(klass);
+    mc->desc = "RISC-V Board compatible with Shakti SDK";
+    mc->init = shakti_c_machine_state_init;
+    mc->default_cpu_type = TYPE_RISCV_CPU_SHAKTI_C;
+    mc->default_ram_id = "riscv.shakti.c.ram";
+}
+
+static const TypeInfo shakti_c_machine_type_info = {
+    .name = TYPE_RISCV_SHAKTI_MACHINE,
+    .parent = TYPE_MACHINE,
+    .class_init = shakti_c_machine_class_init,
+    .instance_init = shakti_c_machine_instance_init,
+    .instance_size = sizeof(ShaktiCMachineState),
+};
+
+static void shakti_c_machine_type_info_register(void)
+{
+    type_register_static(&shakti_c_machine_type_info);
+}
+type_init(shakti_c_machine_type_info_register)
+
+static void shakti_c_soc_state_realize(DeviceState *dev, Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    ShaktiCSoCState *sss = RISCV_SHAKTI_SOC(dev);
+    MemoryRegion *system_memory = get_system_memory();
+
+    sysbus_realize(SYS_BUS_DEVICE(&sss->cpus), &error_abort);
+
+    sss->plic = sifive_plic_create(shakti_c_memmap[SHAKTI_C_PLIC].base,
+        (char *)SHAKTI_C_PLIC_HART_CONFIG, ms->smp.cpus, 0,
+        SHAKTI_C_PLIC_NUM_SOURCES,
+        SHAKTI_C_PLIC_NUM_PRIORITIES,
+        SHAKTI_C_PLIC_PRIORITY_BASE,
+        SHAKTI_C_PLIC_PENDING_BASE,
+        SHAKTI_C_PLIC_ENABLE_BASE,
+        SHAKTI_C_PLIC_ENABLE_STRIDE,
+        SHAKTI_C_PLIC_CONTEXT_BASE,
+        SHAKTI_C_PLIC_CONTEXT_STRIDE,
+        shakti_c_memmap[SHAKTI_C_PLIC].size);
+
+    riscv_aclint_swi_create(shakti_c_memmap[SHAKTI_C_CLINT].base,
+        0, 1, false);
+    riscv_aclint_mtimer_create(shakti_c_memmap[SHAKTI_C_CLINT].base +
+            RISCV_ACLINT_SWI_SIZE,
+        RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, 1,
+        RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+        RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false);
+
+    qdev_prop_set_chr(DEVICE(&(sss->uart)), "chardev", serial_hd(0));
+    if (!sysbus_realize(SYS_BUS_DEVICE(&sss->uart), errp)) {
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&sss->uart), 0,
+                    shakti_c_memmap[SHAKTI_C_UART].base);
+
+    /* ROM */
+    memory_region_init_rom(&sss->rom, OBJECT(dev), "riscv.shakti.c.rom",
+                           shakti_c_memmap[SHAKTI_C_ROM].size, &error_fatal);
+    memory_region_add_subregion(system_memory,
+        shakti_c_memmap[SHAKTI_C_ROM].base, &sss->rom);
+}
+
+static void shakti_c_soc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = shakti_c_soc_state_realize;
+    /*
+     * Reasons:
+     *     - Creates CPUS in riscv_hart_realize(), and can create unintended
+     *       CPUs
+     *     - Uses serial_hds in realize function, thus can't be used twice
+     */
+    dc->user_creatable = false;
+}
+
+static void shakti_c_soc_instance_init(Object *obj)
+{
+    ShaktiCSoCState *sss = RISCV_SHAKTI_SOC(obj);
+
+    object_initialize_child(obj, "cpus", &sss->cpus, TYPE_RISCV_HART_ARRAY);
+    object_initialize_child(obj, "uart", &sss->uart, TYPE_SHAKTI_UART);
+
+    /*
+     * CPU type is fixed and we are not supporting passing from commandline yet.
+     * So let it be in instance_init. When supported should use ms->cpu_type
+     * instead of TYPE_RISCV_CPU_SHAKTI_C
+     */
+    object_property_set_str(OBJECT(&sss->cpus), "cpu-type",
+                            TYPE_RISCV_CPU_SHAKTI_C, &error_abort);
+    object_property_set_int(OBJECT(&sss->cpus), "num-harts", 1,
+                            &error_abort);
+}
+
+static const TypeInfo shakti_c_type_info = {
+    .name = TYPE_RISCV_SHAKTI_SOC,
+    .parent = TYPE_DEVICE,
+    .class_init = shakti_c_soc_class_init,
+    .instance_init = shakti_c_soc_instance_init,
+    .instance_size = sizeof(ShaktiCSoCState),
+};
+
+static void shakti_c_type_info_register(void)
+{
+    type_register_static(&shakti_c_type_info);
+}
+type_init(shakti_c_type_info_register)
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index f939bcf9ea6..9b206407a65 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -29,7 +29,7 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
+#include "qemu/cutils.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
@@ -42,15 +42,13 @@
 #include "hw/riscv/sifive_e.h"
 #include "hw/riscv/boot.h"
 #include "hw/char/sifive_uart.h"
-#include "hw/intc/sifive_clint.h"
+#include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_e_prci.h"
 #include "chardev/char.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
-#include "exec/address-spaces.h"
 
-static MemMapEntry sifive_e_memmap[] = {
+static const MemMapEntry sifive_e_memmap[] = {
     [SIFIVE_E_DEV_DEBUG] =    {        0x0,     0x1000 },
     [SIFIVE_E_DEV_MROM] =     {     0x1000,     0x2000 },
     [SIFIVE_E_DEV_OTP] =      {    0x20000,     0x2000 },
@@ -74,22 +72,27 @@ static MemMapEntry sifive_e_memmap[] = {
 
 static void sifive_e_machine_init(MachineState *machine)
 {
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
     const MemMapEntry *memmap = sifive_e_memmap;
 
     SiFiveEState *s = RISCV_E_MACHINE(machine);
     MemoryRegion *sys_mem = get_system_memory();
-    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
     int i;
 
+    if (machine->ram_size != mc->default_ram_size) {
+        char *sz = size_to_str(mc->default_ram_size);
+        error_report("Invalid RAM size, should be %s", sz);
+        g_free(sz);
+        exit(EXIT_FAILURE);
+    }
+
     /* Initialize SoC */
     object_initialize_child(OBJECT(machine), "soc", &s->soc, TYPE_RISCV_E_SOC);
     qdev_realize(DEVICE(&s->soc), NULL, &error_abort);
 
     /* Data Tightly Integrated Memory */
-    memory_region_init_ram(main_mem, NULL, "riscv.sifive.e.ram",
-        memmap[SIFIVE_E_DEV_DTIM].size, &error_fatal);
     memory_region_add_subregion(sys_mem,
-        memmap[SIFIVE_E_DEV_DTIM].base, main_mem);
+        memmap[SIFIVE_E_DEV_DTIM].base, machine->ram);
 
     /* Mask ROM reset vector */
     uint32_t reset_vec[4];
@@ -145,6 +148,8 @@ static void sifive_e_machine_class_init(ObjectClass *oc, void *data)
     mc->init = sifive_e_machine_init;
     mc->max_cpus = 1;
     mc->default_cpu_type = SIFIVE_E_CPU;
+    mc->default_ram_id = "riscv.sifive.e.ram";
+    mc->default_ram_size = sifive_e_memmap[SIFIVE_E_DEV_DTIM].size;
 
     object_class_property_add_bool(oc, "revb", sifive_e_machine_get_revb,
                                    sifive_e_machine_set_revb);
@@ -200,7 +205,7 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
 
     /* MMIO */
     s->plic = sifive_plic_create(memmap[SIFIVE_E_DEV_PLIC].base,
-        (char *)SIFIVE_E_PLIC_HART_CONFIG, 0,
+        (char *)SIFIVE_E_PLIC_HART_CONFIG, ms->smp.cpus, 0,
         SIFIVE_E_PLIC_NUM_SOURCES,
         SIFIVE_E_PLIC_NUM_PRIORITIES,
         SIFIVE_E_PLIC_PRIORITY_BASE,
@@ -210,10 +215,13 @@ static void sifive_e_soc_realize(DeviceState *dev, Error **errp)
         SIFIVE_E_PLIC_CONTEXT_BASE,
         SIFIVE_E_PLIC_CONTEXT_STRIDE,
         memmap[SIFIVE_E_DEV_PLIC].size);
-    sifive_clint_create(memmap[SIFIVE_E_DEV_CLINT].base,
-        memmap[SIFIVE_E_DEV_CLINT].size, 0, ms->smp.cpus,
-        SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
-        SIFIVE_CLINT_TIMEBASE_FREQ, false);
+    riscv_aclint_swi_create(memmap[SIFIVE_E_DEV_CLINT].base,
+        0, ms->smp.cpus, false);
+    riscv_aclint_mtimer_create(memmap[SIFIVE_E_DEV_CLINT].base +
+            RISCV_ACLINT_SWI_SIZE,
+        RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
+        RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+        RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false);
     create_unimplemented_device("riscv.sifive.e.aon",
         memmap[SIFIVE_E_DEV_AON].base, memmap[SIFIVE_E_DEV_AON].size);
     sifive_e_prci_create(memmap[SIFIVE_E_DEV_PRCI].base);
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 6fa730b6674..8514c99e51d 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -17,6 +17,7 @@
  * 7) DMA (Direct Memory Access Controller)
  * 8) SPI0 connected to an SPI flash
  * 9) SPI2 connected to an SD card
+ * 10) PWM0 and PWM1
  *
  * This board currently generates devicetree dynamically that indicates at least
  * two harts and up to five harts.
@@ -35,7 +36,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
@@ -52,17 +52,19 @@
 #include "hw/riscv/sifive_u.h"
 #include "hw/riscv/boot.h"
 #include "hw/char/sifive_uart.h"
-#include "hw/intc/sifive_clint.h"
+#include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
 #include "chardev/char.h"
 #include "net/eth.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
 
 #include <libfdt.h>
 
+/* CLINT timebase frequency */
+#define CLINT_TIMEBASE_FREQ 1000000
+
 static const MemMapEntry sifive_u_memmap[] = {
     [SIFIVE_U_DEV_DEBUG] =    {        0x0,      0x100 },
     [SIFIVE_U_DEV_MROM] =     {     0x1000,     0xf000 },
@@ -74,6 +76,8 @@ static const MemMapEntry sifive_u_memmap[] = {
     [SIFIVE_U_DEV_PRCI] =     { 0x10000000,     0x1000 },
     [SIFIVE_U_DEV_UART0] =    { 0x10010000,     0x1000 },
     [SIFIVE_U_DEV_UART1] =    { 0x10011000,     0x1000 },
+    [SIFIVE_U_DEV_PWM0] =     { 0x10020000,     0x1000 },
+    [SIFIVE_U_DEV_PWM1] =     { 0x10021000,     0x1000 },
     [SIFIVE_U_DEV_QSPI0] =    { 0x10040000,     0x1000 },
     [SIFIVE_U_DEV_QSPI2] =    { 0x10050000,     0x1000 },
     [SIFIVE_U_DEV_GPIO] =     { 0x10060000,     0x1000 },
@@ -96,9 +100,15 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
     int cpu;
     uint32_t *cells;
     char *nodename;
-    char ethclk_names[] = "pclk\0hclk";
     uint32_t plic_phandle, prci_phandle, gpio_phandle, phandle = 1;
     uint32_t hfclk_phandle, rtcclk_phandle, phy_phandle;
+    static const char * const ethclk_names[2] = { "pclk", "hclk" };
+    static const char * const clint_compat[2] = {
+        "sifive,clint0", "riscv,clint0"
+    };
+    static const char * const plic_compat[2] = {
+        "sifive,plic-1.0.0", "riscv,plic0"
+    };
 
     if (ms->dtb) {
         fdt = s->fdt = load_device_tree(ms->dtb, &s->fdt_size);
@@ -160,7 +170,7 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
 
     qemu_fdt_add_subnode(fdt, "/cpus");
     qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency",
-        SIFIVE_CLINT_TIMEBASE_FREQ);
+        CLINT_TIMEBASE_FREQ);
     qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0);
     qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1);
 
@@ -210,7 +220,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
     nodename = g_strdup_printf("/soc/clint@%lx",
         (long)memmap[SIFIVE_U_DEV_CLINT].base);
     qemu_fdt_add_subnode(fdt, nodename);
-    qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,clint0");
+    qemu_fdt_setprop_string_array(fdt, nodename, "compatible",
+        (char **)&clint_compat, ARRAY_SIZE(clint_compat));
     qemu_fdt_setprop_cells(fdt, nodename, "reg",
         0x0, memmap[SIFIVE_U_DEV_CLINT].base,
         0x0, memmap[SIFIVE_U_DEV_CLINT].size);
@@ -267,7 +278,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
         (long)memmap[SIFIVE_U_DEV_PLIC].base);
     qemu_fdt_add_subnode(fdt, nodename);
     qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 1);
-    qemu_fdt_setprop_string(fdt, nodename, "compatible", "riscv,plic0");
+    qemu_fdt_setprop_string_array(fdt, nodename, "compatible",
+        (char **)&plic_compat, ARRAY_SIZE(plic_compat));
     qemu_fdt_setprop(fdt, nodename, "interrupt-controller", NULL, 0);
     qemu_fdt_setprop(fdt, nodename, "interrupts-extended",
         cells, (ms->smp.cpus * 4 - 2) * sizeof(uint32_t));
@@ -413,8 +425,8 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
     qemu_fdt_setprop_cell(fdt, nodename, "interrupts", SIFIVE_U_GEM_IRQ);
     qemu_fdt_setprop_cells(fdt, nodename, "clocks",
         prci_phandle, PRCI_CLK_GEMGXLPLL, prci_phandle, PRCI_CLK_GEMGXLPLL);
-    qemu_fdt_setprop(fdt, nodename, "clock-names", ethclk_names,
-        sizeof(ethclk_names));
+    qemu_fdt_setprop_string_array(fdt, nodename, "clock-names",
+        (char **)&ethclk_names, ARRAY_SIZE(ethclk_names));
     qemu_fdt_setprop(fdt, nodename, "local-mac-address",
         s->soc.gem.conf.macaddr.a, ETH_ALEN);
     qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", 1);
@@ -432,6 +444,38 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap,
     qemu_fdt_setprop_cell(fdt, nodename, "reg", 0x0);
     g_free(nodename);
 
+    nodename = g_strdup_printf("/soc/pwm@%lx",
+        (long)memmap[SIFIVE_U_DEV_PWM0].base);
+    qemu_fdt_add_subnode(fdt, nodename);
+    qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
+    qemu_fdt_setprop_cells(fdt, nodename, "reg",
+        0x0, memmap[SIFIVE_U_DEV_PWM0].base,
+        0x0, memmap[SIFIVE_U_DEV_PWM0].size);
+    qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
+    qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
+                           SIFIVE_U_PWM0_IRQ0, SIFIVE_U_PWM0_IRQ1,
+                           SIFIVE_U_PWM0_IRQ2, SIFIVE_U_PWM0_IRQ3);
+    qemu_fdt_setprop_cells(fdt, nodename, "clocks",
+                           prci_phandle, PRCI_CLK_TLCLK);
+    qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0);
+    g_free(nodename);
+
+    nodename = g_strdup_printf("/soc/pwm@%lx",
+        (long)memmap[SIFIVE_U_DEV_PWM1].base);
+    qemu_fdt_add_subnode(fdt, nodename);
+    qemu_fdt_setprop_string(fdt, nodename, "compatible", "sifive,pwm0");
+    qemu_fdt_setprop_cells(fdt, nodename, "reg",
+        0x0, memmap[SIFIVE_U_DEV_PWM1].base,
+        0x0, memmap[SIFIVE_U_DEV_PWM1].size);
+    qemu_fdt_setprop_cell(fdt, nodename, "interrupt-parent", plic_phandle);
+    qemu_fdt_setprop_cells(fdt, nodename, "interrupts",
+                           SIFIVE_U_PWM1_IRQ0, SIFIVE_U_PWM1_IRQ1,
+                           SIFIVE_U_PWM1_IRQ2, SIFIVE_U_PWM1_IRQ3);
+    qemu_fdt_setprop_cells(fdt, nodename, "clocks",
+                           prci_phandle, PRCI_CLK_TLCLK);
+    qemu_fdt_setprop_cell(fdt, nodename, "#pwm-cells", 0);
+    g_free(nodename);
+
     nodename = g_strdup_printf("/soc/serial@%lx",
         (long)memmap[SIFIVE_U_DEV_UART1].base);
     qemu_fdt_add_subnode(fdt, nodename);
@@ -484,7 +528,6 @@ static void sifive_u_machine_init(MachineState *machine)
     const MemMapEntry *memmap = sifive_u_memmap;
     SiFiveUState *s = RISCV_U_MACHINE(machine);
     MemoryRegion *system_memory = get_system_memory();
-    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
     MemoryRegion *flash0 = g_new(MemoryRegion, 1);
     target_ulong start_addr = memmap[SIFIVE_U_DEV_DRAM].base;
     target_ulong firmware_end_addr, kernel_start_addr;
@@ -505,10 +548,8 @@ static void sifive_u_machine_init(MachineState *machine)
     qdev_realize(DEVICE(&s->soc), NULL, &error_abort);
 
     /* register RAM */
-    memory_region_init_ram(main_mem, NULL, "riscv.sifive.u.ram",
-                           machine->ram_size, &error_fatal);
     memory_region_add_subregion(system_memory, memmap[SIFIVE_U_DEV_DRAM].base,
-                                main_mem);
+                                machine->ram);
 
     /* register QSPI0 Flash */
     memory_region_init_ram(flash0, NULL, "riscv.sifive.u.flash0",
@@ -558,7 +599,7 @@ static void sifive_u_machine_init(MachineState *machine)
             /* Use a purecap BBL as the BIOS for CHERI */
             "bbl-riscv32cheri-generic-fw_jump.bin",
 #else
-            "opensbi-riscv32-generic-fw_dynamic.bin",
+            RISCV32_BIOS_BIN,
 #endif
             start_addr, NULL);
     } else {
@@ -568,7 +609,7 @@ static void sifive_u_machine_init(MachineState *machine)
             /* Use a purecap BBL as the BIOS for CHERI */
             "bbl-riscv64cheri-generic-fw_jump.bin",
 #else
-            "opensbi-riscv64-generic-fw_dynamic.bin",
+            RISCV64_BIOS_BIN,
 #endif
             start_addr, NULL);
     }
@@ -606,10 +647,10 @@ static void sifive_u_machine_init(MachineState *machine)
     }
 
     /* reset vector */
-    uint32_t reset_vec[11] = {
+    uint32_t reset_vec[12] = {
         s->msel,                       /* MSEL pin state */
         0x00000297,                    /* 1:  auipc  t0, %pcrel_hi(fw_dyn) */
-        0x02828613,                    /*     addi   a2, t0, %pcrel_lo(1b) */
+        0x02c28613,                    /*     addi   a2, t0, %pcrel_lo(1b) */
         0xf1402573,                    /*     csrr   a0, mhartid  */
         0,
         0,
@@ -617,6 +658,7 @@ static void sifive_u_machine_init(MachineState *machine)
         start_addr,                    /* start: .dword */
         start_addr_hi32,
         fdt_load_addr,                 /* fdt_laddr: .dword */
+        0x00000000,
         0x00000000,
                                        /* fw_dyn: */
     };
@@ -717,6 +759,7 @@ static void sifive_u_machine_class_init(ObjectClass *oc, void *data)
     mc->min_cpus = SIFIVE_U_MANAGEMENT_CPU_COUNT + 1;
     mc->default_cpu_type = SIFIVE_U_CPU;
     mc->default_cpus = mc->min_cpus;
+    mc->default_ram_id = "riscv.sifive.u.ram";
 
     object_class_property_add_bool(oc, "start-in-flash",
                                    sifive_u_machine_get_start_in_flash,
@@ -769,6 +812,8 @@ static void sifive_u_soc_instance_init(Object *obj)
     object_initialize_child(obj, "pdma", &s->dma, TYPE_SIFIVE_PDMA);
     object_initialize_child(obj, "spi0", &s->spi0, TYPE_SIFIVE_SPI);
     object_initialize_child(obj, "spi2", &s->spi2, TYPE_SIFIVE_SPI);
+    object_initialize_child(obj, "pwm0", &s->pwm[0], TYPE_SIFIVE_PWM);
+    object_initialize_child(obj, "pwm1", &s->pwm[1], TYPE_SIFIVE_PWM);
 }
 
 static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
@@ -780,8 +825,7 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
     MemoryRegion *l2lim_mem = g_new(MemoryRegion, 1);
     char *plic_hart_config;
-    size_t plic_hart_config_len;
-    int i;
+    int i, j;
     NICInfo *nd = &nd_table[0];
 
     qdev_prop_set_uint32(DEVICE(&s->u_cpus), "num-harts", ms->smp.cpus - 1);
@@ -821,22 +865,11 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
                                 l2lim_mem);
 
     /* create PLIC hart topology configuration string */
-    plic_hart_config_len = (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1) *
-                           ms->smp.cpus;
-    plic_hart_config = g_malloc0(plic_hart_config_len);
-    for (i = 0; i < ms->smp.cpus; i++) {
-        if (i != 0) {
-            strncat(plic_hart_config, "," SIFIVE_U_PLIC_HART_CONFIG,
-                    plic_hart_config_len);
-        } else {
-            strncat(plic_hart_config, "M", plic_hart_config_len);
-        }
-        plic_hart_config_len -= (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1);
-    }
+    plic_hart_config = riscv_plic_hart_config_string(ms->smp.cpus);
 
     /* MMIO */
     s->plic = sifive_plic_create(memmap[SIFIVE_U_DEV_PLIC].base,
-        plic_hart_config, 0,
+        plic_hart_config, ms->smp.cpus, 0,
         SIFIVE_U_PLIC_NUM_SOURCES,
         SIFIVE_U_PLIC_NUM_PRIORITIES,
         SIFIVE_U_PLIC_PRIORITY_BASE,
@@ -851,10 +884,13 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
         serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART0_IRQ));
     sifive_uart_create(system_memory, memmap[SIFIVE_U_DEV_UART1].base,
         serial_hd(1), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART1_IRQ));
-    sifive_clint_create(memmap[SIFIVE_U_DEV_CLINT].base,
-        memmap[SIFIVE_U_DEV_CLINT].size, 0, ms->smp.cpus,
-        SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
-        SIFIVE_CLINT_TIMEBASE_FREQ, false);
+    riscv_aclint_swi_create(memmap[SIFIVE_U_DEV_CLINT].base, 0,
+        ms->smp.cpus, false);
+    riscv_aclint_mtimer_create(memmap[SIFIVE_U_DEV_CLINT].base +
+            RISCV_ACLINT_SWI_SIZE,
+        RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
+        RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+        CLINT_TIMEBASE_FREQ, false);
 
     if (!sysbus_realize(SYS_BUS_DEVICE(&s->prci), errp)) {
         return;
@@ -908,6 +944,22 @@ static void sifive_u_soc_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->gem), 0,
                        qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_GEM_IRQ));
 
+    /* PWM */
+    for (i = 0; i < 2; i++) {
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->pwm[i]), errp)) {
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->pwm[i]), 0,
+                                memmap[SIFIVE_U_DEV_PWM0].base + (0x1000 * i));
+
+        /* Connect PWM interrupts to the PLIC */
+        for (j = 0; j < SIFIVE_PWM_IRQS; j++) {
+            sysbus_connect_irq(SYS_BUS_DEVICE(&s->pwm[i]), j,
+                               qdev_get_gpio_in(DEVICE(s->plic),
+                                        SIFIVE_U_PWM0_IRQ0 + (i * 4) + j));
+        }
+    }
+
     create_unimplemented_device("riscv.sifive.u.gem-mgmt",
         memmap[SIFIVE_U_DEV_GEM_MGMT].base, memmap[SIFIVE_U_DEV_GEM_MGMT].size);
 
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index a789b216d89..0541d75fac6 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -24,7 +24,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
@@ -36,9 +35,8 @@
 #include "hw/riscv/boot.h"
 #include "hw/riscv/numa.h"
 #include "hw/char/riscv_htif.h"
-#include "hw/intc/sifive_clint.h"
+#include "hw/intc/riscv_aclint.h"
 #include "chardev/char.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 #ifdef TARGET_CHERI
@@ -63,6 +61,9 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap,
     uint32_t cpu_phandle, intc_phandle, phandle = 1;
     char *name, *mem_name, *clint_name, *clust_name;
     char *core_name, *cpu_name, *intc_name;
+    static const char * const clint_compat[2] = {
+        "sifive,clint0", "riscv,clint0"
+    };
 
     fdt = s->fdt = create_device_tree(&s->fdt_size);
     if (!fdt) {
@@ -86,7 +87,7 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap,
 
     qemu_fdt_add_subnode(fdt, "/cpus");
     qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency",
-        SIFIVE_CLINT_TIMEBASE_FREQ);
+        RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
     qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0);
     qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1);
     qemu_fdt_add_subnode(fdt, "/cpus/cpu-map");
@@ -156,7 +157,8 @@ static void create_fdt(SpikeState *s, const MemMapEntry *memmap,
             (memmap[SPIKE_CLINT].size * socket);
         clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
         qemu_fdt_add_subnode(fdt, clint_name);
-        qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0");
+        qemu_fdt_setprop_string_array(fdt, clint_name, "compatible",
+            (char **)&clint_compat, ARRAY_SIZE(clint_compat));
         qemu_fdt_setprop_cells(fdt, clint_name, "reg",
             0x0, clint_addr, 0x0, memmap[SPIKE_CLINT].size);
         qemu_fdt_setprop(fdt, clint_name, "interrupts-extended",
@@ -181,7 +183,6 @@ static void spike_board_init(MachineState *machine)
     const MemMapEntry *memmap = spike_memmap;
     SpikeState *s = SPIKE_MACHINE(machine);
     MemoryRegion *system_memory = get_system_memory();
-    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
     target_ulong firmware_end_addr, kernel_start_addr;
     uint32_t fdt_load_addr;
@@ -228,21 +229,20 @@ static void spike_board_init(MachineState *machine)
         sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort);
 
         /* Core Local Interruptor (timer and IPI) for each socket */
-        sifive_clint_create(
+        riscv_aclint_swi_create(
             memmap[SPIKE_CLINT].base + i * memmap[SPIKE_CLINT].size,
-            memmap[SPIKE_CLINT].size, base_hartid, hart_count,
-            SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
-            SIFIVE_CLINT_TIMEBASE_FREQ, false);
+            base_hartid, hart_count, false);
+        riscv_aclint_mtimer_create(
+            memmap[SPIKE_CLINT].base + i * memmap[SPIKE_CLINT].size +
+                RISCV_ACLINT_SWI_SIZE,
+            RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
+            RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false);
     }
 
     /* register system main memory (actual RAM) */
-    memory_region_init_ram(main_mem, NULL, "riscv.spike.ram",
-                           machine->ram_size, &error_fatal);
     memory_region_add_subregion(system_memory, memmap[SPIKE_DRAM].base,
-        main_mem);
-#ifdef TARGET_CHERI
-    cheri_tag_init(main_mem, machine->ram_size);
-#endif
+        machine->ram);
 
     /* create device tree */
     create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline,
@@ -261,13 +261,11 @@ static void spike_board_init(MachineState *machine)
      */
     if (riscv_is_32bit(&s->soc[0])) {
         firmware_end_addr = riscv_find_and_load_firmware(machine,
-                                    "opensbi-riscv32-generic-fw_dynamic.elf",
-                                    memmap[SPIKE_DRAM].base,
+                                    RISCV32_BIOS_ELF, memmap[SPIKE_DRAM].base,
                                     htif_symbol_callback);
     } else {
         firmware_end_addr = riscv_find_and_load_firmware(machine,
-                                    "opensbi-riscv64-generic-fw_dynamic.elf",
-                                    memmap[SPIKE_DRAM].base,
+                                    RISCV64_BIOS_ELF, memmap[SPIKE_DRAM].base,
                                     htif_symbol_callback);
     }
 
@@ -328,6 +326,7 @@ static void spike_machine_class_init(ObjectClass *oc, void *data)
     mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
     mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
     mc->numa_mem_supported = true;
+    mc->default_ram_id = "riscv.spike.ram";
 }
 
 static const TypeInfo spike_machine_typeinfo = {
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 29db70633f7..6ab2074e596 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -20,7 +20,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/boards.h"
@@ -33,11 +32,10 @@
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
 #include "hw/riscv/numa.h"
-#include "hw/intc/sifive_clint.h"
+#include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_test.h"
 #include "chardev/char.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 #include "hw/pci/pci.h"
@@ -53,6 +51,7 @@ static const MemMapEntry virt_memmap[] = {
     [VIRT_TEST] =        {   0x100000,        0x1000 },
     [VIRT_RTC] =         {   0x101000,        0x1000 },
     [VIRT_CLINT] =       {  0x2000000,       0x10000 },
+    [VIRT_ACLINT_SSWI] = {  0x2F00000,        0x4000 },
     [VIRT_PCIE_PIO] =    {  0x3000000,       0x10000 },
     [VIRT_PLIC] =        {  0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
     [VIRT_UART0] =       { 0x10000000,         0x100 },
@@ -181,206 +180,342 @@ static void create_pcie_irq_map(void *fdt, char *nodename,
                            0x1800, 0, 0, 0x7);
 }
 
-static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
-                       uint64_t mem_size, const char *cmdline, bool is_32_bit)
+static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
+                                   char *clust_name, uint32_t *phandle,
+                                   bool is_32_bit, uint32_t *intc_phandles)
 {
-    void *fdt;
-    int i, cpu, socket;
+    int cpu;
+    uint32_t cpu_phandle;
     MachineState *mc = MACHINE(s);
+    char *name, *cpu_name, *core_name, *intc_name;
+
+    for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
+        cpu_phandle = (*phandle)++;
+
+        cpu_name = g_strdup_printf("/cpus/cpu@%d",
+            s->soc[socket].hartid_base + cpu);
+        qemu_fdt_add_subnode(mc->fdt, cpu_name);
+        qemu_fdt_setprop_string(mc->fdt, cpu_name, "mmu-type",
+            (is_32_bit) ? "riscv,sv32" : "riscv,sv48");
+        name = riscv_isa_string(&s->soc[socket].harts[cpu]);
+        qemu_fdt_setprop_string(mc->fdt, cpu_name, "riscv,isa", name);
+        g_free(name);
+        qemu_fdt_setprop_string(mc->fdt, cpu_name, "compatible", "riscv");
+        qemu_fdt_setprop_string(mc->fdt, cpu_name, "status", "okay");
+        qemu_fdt_setprop_cell(mc->fdt, cpu_name, "reg",
+            s->soc[socket].hartid_base + cpu);
+        qemu_fdt_setprop_string(mc->fdt, cpu_name, "device_type", "cpu");
+        riscv_socket_fdt_write_id(mc, mc->fdt, cpu_name, socket);
+        qemu_fdt_setprop_cell(mc->fdt, cpu_name, "phandle", cpu_phandle);
+
+        intc_phandles[cpu] = (*phandle)++;
+
+        intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
+        qemu_fdt_add_subnode(mc->fdt, intc_name);
+        qemu_fdt_setprop_cell(mc->fdt, intc_name, "phandle",
+            intc_phandles[cpu]);
+        qemu_fdt_setprop_string(mc->fdt, intc_name, "compatible",
+            "riscv,cpu-intc");
+        qemu_fdt_setprop(mc->fdt, intc_name, "interrupt-controller", NULL, 0);
+        qemu_fdt_setprop_cell(mc->fdt, intc_name, "#interrupt-cells", 1);
+
+        core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
+        qemu_fdt_add_subnode(mc->fdt, core_name);
+        qemu_fdt_setprop_cell(mc->fdt, core_name, "cpu", cpu_phandle);
+
+        g_free(core_name);
+        g_free(intc_name);
+        g_free(cpu_name);
+    }
+}
+
+static void create_fdt_socket_memory(RISCVVirtState *s,
+                                     const MemMapEntry *memmap, int socket)
+{
+    char *mem_name;
     uint64_t addr, size;
-    uint32_t *clint_cells, *plic_cells;
-    unsigned long clint_addr, plic_addr;
-    uint32_t plic_phandle[MAX_NODES];
-    uint32_t cpu_phandle, intc_phandle, test_phandle;
-    uint32_t phandle = 1, plic_mmio_phandle = 1;
-    uint32_t plic_pcie_phandle = 1, plic_virtio_phandle = 1;
-    char *mem_name, *cpu_name, *core_name, *intc_name;
-    char *name, *clint_name, *plic_name, *clust_name;
-    hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
-    hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
+    MachineState *mc = MACHINE(s);
 
-    if (mc->dtb) {
-        fdt = mc->fdt = load_device_tree(mc->dtb, &s->fdt_size);
-        if (!fdt) {
-            error_report("load_device_tree() failed");
-            exit(1);
-        }
-        goto update_bootargs;
-    } else {
-        fdt = mc->fdt = create_device_tree(&s->fdt_size);
-        if (!fdt) {
-            error_report("create_device_tree() failed");
-            exit(1);
-        }
+    addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(mc, socket);
+    size = riscv_socket_mem_size(mc, socket);
+    mem_name = g_strdup_printf("/memory@%lx", (long)addr);
+    qemu_fdt_add_subnode(mc->fdt, mem_name);
+    qemu_fdt_setprop_cells(mc->fdt, mem_name, "reg",
+        addr >> 32, addr, size >> 32, size);
+    qemu_fdt_setprop_string(mc->fdt, mem_name, "device_type", "memory");
+    riscv_socket_fdt_write_id(mc, mc->fdt, mem_name, socket);
+    g_free(mem_name);
+}
+
+static void create_fdt_socket_clint(RISCVVirtState *s,
+                                    const MemMapEntry *memmap, int socket,
+                                    uint32_t *intc_phandles)
+{
+    int cpu;
+    char *clint_name;
+    uint32_t *clint_cells;
+    unsigned long clint_addr;
+    MachineState *mc = MACHINE(s);
+    static const char * const clint_compat[2] = {
+        "sifive,clint0", "riscv,clint0"
+    };
+
+    clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
+
+    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+        clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT);
+        clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]);
+        clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER);
     }
 
-    qemu_fdt_setprop_string(fdt, "/", "model", "riscv-virtio,qemu");
-    qemu_fdt_setprop_string(fdt, "/", "compatible", "riscv-virtio");
-    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
-    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2);
+    clint_addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
+    clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
+    qemu_fdt_add_subnode(mc->fdt, clint_name);
+    qemu_fdt_setprop_string_array(mc->fdt, clint_name, "compatible",
+                                  (char **)&clint_compat,
+                                  ARRAY_SIZE(clint_compat));
+    qemu_fdt_setprop_cells(mc->fdt, clint_name, "reg",
+        0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
+    qemu_fdt_setprop(mc->fdt, clint_name, "interrupts-extended",
+        clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
+    riscv_socket_fdt_write_id(mc, mc->fdt, clint_name, socket);
+    g_free(clint_name);
+
+    g_free(clint_cells);
+}
+
+static void create_fdt_socket_aclint(RISCVVirtState *s,
+                                     const MemMapEntry *memmap, int socket,
+                                     uint32_t *intc_phandles)
+{
+    int cpu;
+    char *name;
+    unsigned long addr;
+    uint32_t aclint_cells_size;
+    uint32_t *aclint_mswi_cells;
+    uint32_t *aclint_sswi_cells;
+    uint32_t *aclint_mtimer_cells;
+    MachineState *mc = MACHINE(s);
 
-    qemu_fdt_add_subnode(fdt, "/soc");
-    qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0);
-    qemu_fdt_setprop_string(fdt, "/soc", "compatible", "simple-bus");
-    qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x2);
-    qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2);
+    aclint_mswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+    aclint_mtimer_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+    aclint_sswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
+
+    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+        aclint_mswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aclint_mswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_SOFT);
+        aclint_mtimer_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aclint_mtimer_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_TIMER);
+        aclint_sswi_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        aclint_sswi_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_SOFT);
+    }
+    aclint_cells_size = s->soc[socket].num_harts * sizeof(uint32_t) * 2;
 
-    qemu_fdt_add_subnode(fdt, "/cpus");
-    qemu_fdt_setprop_cell(fdt, "/cpus", "timebase-frequency",
-                          SIFIVE_CLINT_TIMEBASE_FREQ);
-    qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0x0);
-    qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1);
-    qemu_fdt_add_subnode(fdt, "/cpus/cpu-map");
+    addr = memmap[VIRT_CLINT].base + (memmap[VIRT_CLINT].size * socket);
+    name = g_strdup_printf("/soc/mswi@%lx", addr);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-mswi");
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+        0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
+    qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
+        aclint_mswi_cells, aclint_cells_size);
+    qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
+    riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
+    g_free(name);
+
+    addr = memmap[VIRT_CLINT].base + RISCV_ACLINT_SWI_SIZE +
+        (memmap[VIRT_CLINT].size * socket);
+    name = g_strdup_printf("/soc/mtimer@%lx", addr);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible",
+        "riscv,aclint-mtimer");
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+        0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
+        0x0, memmap[VIRT_CLINT].size - RISCV_ACLINT_SWI_SIZE -
+             RISCV_ACLINT_DEFAULT_MTIME,
+        0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
+        0x0, RISCV_ACLINT_DEFAULT_MTIME);
+    qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
+        aclint_mtimer_cells, aclint_cells_size);
+    riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
+    g_free(name);
+
+    addr = memmap[VIRT_ACLINT_SSWI].base +
+        (memmap[VIRT_ACLINT_SSWI].size * socket);
+    name = g_strdup_printf("/soc/sswi@%lx", addr);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible", "riscv,aclint-sswi");
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+        0x0, addr, 0x0, memmap[VIRT_ACLINT_SSWI].size);
+    qemu_fdt_setprop(mc->fdt, name, "interrupts-extended",
+        aclint_sswi_cells, aclint_cells_size);
+    qemu_fdt_setprop(mc->fdt, name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells", 0);
+    riscv_socket_fdt_write_id(mc, mc->fdt, name, socket);
+    g_free(name);
+
+    g_free(aclint_mswi_cells);
+    g_free(aclint_mtimer_cells);
+    g_free(aclint_sswi_cells);
+}
+
+static void create_fdt_socket_plic(RISCVVirtState *s,
+                                   const MemMapEntry *memmap, int socket,
+                                   uint32_t *phandle, uint32_t *intc_phandles,
+                                   uint32_t *plic_phandles)
+{
+    int cpu;
+    char *plic_name;
+    uint32_t *plic_cells;
+    unsigned long plic_addr;
+    MachineState *mc = MACHINE(s);
+    static const char * const plic_compat[2] = {
+        "sifive,plic-1.0.0", "riscv,plic0"
+    };
+
+    plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
+
+    for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) {
+        plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandles[cpu]);
+        plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT);
+        plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandles[cpu]);
+        plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT);
+    }
+
+    plic_phandles[socket] = (*phandle)++;
+    plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket);
+    plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr);
+    qemu_fdt_add_subnode(mc->fdt, plic_name);
+    qemu_fdt_setprop_cell(mc->fdt, plic_name,
+        "#address-cells", FDT_PLIC_ADDR_CELLS);
+    qemu_fdt_setprop_cell(mc->fdt, plic_name,
+        "#interrupt-cells", FDT_PLIC_INT_CELLS);
+    qemu_fdt_setprop_string_array(mc->fdt, plic_name, "compatible",
+                                  (char **)&plic_compat,
+                                  ARRAY_SIZE(plic_compat));
+    qemu_fdt_setprop(mc->fdt, plic_name, "interrupt-controller", NULL, 0);
+    qemu_fdt_setprop(mc->fdt, plic_name, "interrupts-extended",
+        plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
+    qemu_fdt_setprop_cells(mc->fdt, plic_name, "reg",
+        0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size);
+    qemu_fdt_setprop_cell(mc->fdt, plic_name, "riscv,ndev", VIRTIO_NDEV);
+    riscv_socket_fdt_write_id(mc, mc->fdt, plic_name, socket);
+    qemu_fdt_setprop_cell(mc->fdt, plic_name, "phandle",
+        plic_phandles[socket]);
+    g_free(plic_name);
+
+    g_free(plic_cells);
+}
+
+static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap,
+                               bool is_32_bit, uint32_t *phandle,
+                               uint32_t *irq_mmio_phandle,
+                               uint32_t *irq_pcie_phandle,
+                               uint32_t *irq_virtio_phandle)
+{
+    int socket;
+    char *clust_name;
+    uint32_t *intc_phandles;
+    MachineState *mc = MACHINE(s);
+    uint32_t xplic_phandles[MAX_NODES];
+
+    qemu_fdt_add_subnode(mc->fdt, "/cpus");
+    qemu_fdt_setprop_cell(mc->fdt, "/cpus", "timebase-frequency",
+                          RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ);
+    qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#size-cells", 0x0);
+    qemu_fdt_setprop_cell(mc->fdt, "/cpus", "#address-cells", 0x1);
+    qemu_fdt_add_subnode(mc->fdt, "/cpus/cpu-map");
 
     for (socket = (riscv_socket_count(mc) - 1); socket >= 0; socket--) {
         clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
-        qemu_fdt_add_subnode(fdt, clust_name);
-
-        plic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
-        clint_cells = g_new0(uint32_t, s->soc[socket].num_harts * 4);
-
-        for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
-            cpu_phandle = phandle++;
-
-            cpu_name = g_strdup_printf("/cpus/cpu@%d",
-                s->soc[socket].hartid_base + cpu);
-            qemu_fdt_add_subnode(fdt, cpu_name);
-            if (is_32_bit) {
-                qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv32");
-            } else {
-                qemu_fdt_setprop_string(fdt, cpu_name, "mmu-type", "riscv,sv48");
-            }
-            name = riscv_isa_string(&s->soc[socket].harts[cpu]);
-            qemu_fdt_setprop_string(fdt, cpu_name, "riscv,isa", name);
-            g_free(name);
-            qemu_fdt_setprop_string(fdt, cpu_name, "compatible", "riscv");
-            qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay");
-            qemu_fdt_setprop_cell(fdt, cpu_name, "reg",
-                s->soc[socket].hartid_base + cpu);
-            qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu");
-            riscv_socket_fdt_write_id(mc, fdt, cpu_name, socket);
-            qemu_fdt_setprop_cell(fdt, cpu_name, "phandle", cpu_phandle);
-
-            intc_name = g_strdup_printf("%s/interrupt-controller", cpu_name);
-            qemu_fdt_add_subnode(fdt, intc_name);
-            intc_phandle = phandle++;
-            qemu_fdt_setprop_cell(fdt, intc_name, "phandle", intc_phandle);
-            qemu_fdt_setprop_string(fdt, intc_name, "compatible",
-                "riscv,cpu-intc");
-            qemu_fdt_setprop(fdt, intc_name, "interrupt-controller", NULL, 0);
-            qemu_fdt_setprop_cell(fdt, intc_name, "#interrupt-cells", 1);
-
-            clint_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle);
-            clint_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_SOFT);
-            clint_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle);
-            clint_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_M_TIMER);
-
-            plic_cells[cpu * 4 + 0] = cpu_to_be32(intc_phandle);
-            plic_cells[cpu * 4 + 1] = cpu_to_be32(IRQ_M_EXT);
-            plic_cells[cpu * 4 + 2] = cpu_to_be32(intc_phandle);
-            plic_cells[cpu * 4 + 3] = cpu_to_be32(IRQ_S_EXT);
-
-            core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
-            qemu_fdt_add_subnode(fdt, core_name);
-            qemu_fdt_setprop_cell(fdt, core_name, "cpu", cpu_phandle);
-
-            g_free(core_name);
-            g_free(intc_name);
-            g_free(cpu_name);
+        qemu_fdt_add_subnode(mc->fdt, clust_name);
+
+        intc_phandles = g_new0(uint32_t, s->soc[socket].num_harts);
+
+        create_fdt_socket_cpus(s, socket, clust_name, phandle,
+            is_32_bit, intc_phandles);
+
+        create_fdt_socket_memory(s, memmap, socket);
+
+        if (s->have_aclint) {
+            create_fdt_socket_aclint(s, memmap, socket, intc_phandles);
+        } else {
+            create_fdt_socket_clint(s, memmap, socket, intc_phandles);
         }
 
-        addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(mc, socket);
-        size = riscv_socket_mem_size(mc, socket);
-        mem_name = g_strdup_printf("/memory@%lx", (long)addr);
-        qemu_fdt_add_subnode(fdt, mem_name);
-        qemu_fdt_setprop_cells(fdt, mem_name, "reg",
-            addr >> 32, addr, size >> 32, size);
-        qemu_fdt_setprop_string(fdt, mem_name, "device_type", "memory");
-        riscv_socket_fdt_write_id(mc, fdt, mem_name, socket);
-        g_free(mem_name);
-
-        clint_addr = memmap[VIRT_CLINT].base +
-            (memmap[VIRT_CLINT].size * socket);
-        clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
-        qemu_fdt_add_subnode(fdt, clint_name);
-        qemu_fdt_setprop_string(fdt, clint_name, "compatible", "riscv,clint0");
-        qemu_fdt_setprop_cells(fdt, clint_name, "reg",
-            0x0, clint_addr, 0x0, memmap[VIRT_CLINT].size);
-        qemu_fdt_setprop(fdt, clint_name, "interrupts-extended",
-            clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
-        riscv_socket_fdt_write_id(mc, fdt, clint_name, socket);
-        g_free(clint_name);
-
-        plic_phandle[socket] = phandle++;
-        plic_addr = memmap[VIRT_PLIC].base + (memmap[VIRT_PLIC].size * socket);
-        plic_name = g_strdup_printf("/soc/plic@%lx", plic_addr);
-        qemu_fdt_add_subnode(fdt, plic_name);
-        qemu_fdt_setprop_cell(fdt, plic_name,
-            "#address-cells", FDT_PLIC_ADDR_CELLS);
-        qemu_fdt_setprop_cell(fdt, plic_name,
-            "#interrupt-cells", FDT_PLIC_INT_CELLS);
-        qemu_fdt_setprop_string(fdt, plic_name, "compatible", "riscv,plic0");
-        qemu_fdt_setprop(fdt, plic_name, "interrupt-controller", NULL, 0);
-        qemu_fdt_setprop(fdt, plic_name, "interrupts-extended",
-            plic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
-        qemu_fdt_setprop_cells(fdt, plic_name, "reg",
-            0x0, plic_addr, 0x0, memmap[VIRT_PLIC].size);
-        qemu_fdt_setprop_cell(fdt, plic_name, "riscv,ndev", VIRTIO_NDEV);
-        riscv_socket_fdt_write_id(mc, fdt, plic_name, socket);
-        qemu_fdt_setprop_cell(fdt, plic_name, "phandle", plic_phandle[socket]);
-        g_free(plic_name);
-
-        g_free(clint_cells);
-        g_free(plic_cells);
+        create_fdt_socket_plic(s, memmap, socket, phandle,
+            intc_phandles, xplic_phandles);
+
+        g_free(intc_phandles);
         g_free(clust_name);
     }
 
     for (socket = 0; socket < riscv_socket_count(mc); socket++) {
         if (socket == 0) {
-            plic_mmio_phandle = plic_phandle[socket];
-            plic_virtio_phandle = plic_phandle[socket];
-            plic_pcie_phandle = plic_phandle[socket];
+            *irq_mmio_phandle = xplic_phandles[socket];
+            *irq_virtio_phandle = xplic_phandles[socket];
+            *irq_pcie_phandle = xplic_phandles[socket];
         }
         if (socket == 1) {
-            plic_virtio_phandle = plic_phandle[socket];
-            plic_pcie_phandle = plic_phandle[socket];
+            *irq_virtio_phandle = xplic_phandles[socket];
+            *irq_pcie_phandle = xplic_phandles[socket];
         }
         if (socket == 2) {
-            plic_pcie_phandle = plic_phandle[socket];
+            *irq_pcie_phandle = xplic_phandles[socket];
         }
     }
 
-    riscv_socket_fdt_write_distance_matrix(mc, fdt);
+    riscv_socket_fdt_write_distance_matrix(mc, mc->fdt);
+}
+
+static void create_fdt_virtio(RISCVVirtState *s, const MemMapEntry *memmap,
+                              uint32_t irq_virtio_phandle)
+{
+    int i;
+    char *name;
+    MachineState *mc = MACHINE(s);
 
     for (i = 0; i < VIRTIO_COUNT; i++) {
         name = g_strdup_printf("/soc/virtio_mmio@%lx",
             (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
-        qemu_fdt_add_subnode(fdt, name);
-        qemu_fdt_setprop_string(fdt, name, "compatible", "virtio,mmio");
-        qemu_fdt_setprop_cells(fdt, name, "reg",
+        qemu_fdt_add_subnode(mc->fdt, name);
+        qemu_fdt_setprop_string(mc->fdt, name, "compatible", "virtio,mmio");
+        qemu_fdt_setprop_cells(mc->fdt, name, "reg",
             0x0, memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size,
             0x0, memmap[VIRT_VIRTIO].size);
-        qemu_fdt_setprop_cell(fdt, name, "interrupt-parent",
-            plic_virtio_phandle);
-        qemu_fdt_setprop_cell(fdt, name, "interrupts", VIRTIO_IRQ + i);
+        qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent",
+            irq_virtio_phandle);
+        qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", VIRTIO_IRQ + i);
         g_free(name);
     }
+}
+
+static void create_fdt_pcie(RISCVVirtState *s, const MemMapEntry *memmap,
+                            uint32_t irq_pcie_phandle)
+{
+    char *name;
+    MachineState *mc = MACHINE(s);
 
     name = g_strdup_printf("/soc/pci@%lx",
         (long) memmap[VIRT_PCIE_ECAM].base);
-    qemu_fdt_add_subnode(fdt, name);
-    qemu_fdt_setprop_cell(fdt, name, "#address-cells", FDT_PCI_ADDR_CELLS);
-    qemu_fdt_setprop_cell(fdt, name, "#interrupt-cells", FDT_PCI_INT_CELLS);
-    qemu_fdt_setprop_cell(fdt, name, "#size-cells", 0x2);
-    qemu_fdt_setprop_string(fdt, name, "compatible", "pci-host-ecam-generic");
-    qemu_fdt_setprop_string(fdt, name, "device_type", "pci");
-    qemu_fdt_setprop_cell(fdt, name, "linux,pci-domain", 0);
-    qemu_fdt_setprop_cells(fdt, name, "bus-range", 0,
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_cell(mc->fdt, name, "#address-cells",
+        FDT_PCI_ADDR_CELLS);
+    qemu_fdt_setprop_cell(mc->fdt, name, "#interrupt-cells",
+        FDT_PCI_INT_CELLS);
+    qemu_fdt_setprop_cell(mc->fdt, name, "#size-cells", 0x2);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible",
+        "pci-host-ecam-generic");
+    qemu_fdt_setprop_string(mc->fdt, name, "device_type", "pci");
+    qemu_fdt_setprop_cell(mc->fdt, name, "linux,pci-domain", 0);
+    qemu_fdt_setprop_cells(mc->fdt, name, "bus-range", 0,
         memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1);
-    qemu_fdt_setprop(fdt, name, "dma-coherent", NULL, 0);
-    qemu_fdt_setprop_cells(fdt, name, "reg", 0,
+    qemu_fdt_setprop(mc->fdt, name, "dma-coherent", NULL, 0);
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg", 0,
         memmap[VIRT_PCIE_ECAM].base, 0, memmap[VIRT_PCIE_ECAM].size);
-    qemu_fdt_setprop_sized_cells(fdt, name, "ranges",
+    qemu_fdt_setprop_sized_cells(mc->fdt, name, "ranges",
         1, FDT_PCI_RANGE_IOPORT, 2, 0,
         2, memmap[VIRT_PCIE_PIO].base, 2, memmap[VIRT_PCIE_PIO].size,
         1, FDT_PCI_RANGE_MMIO,
@@ -390,65 +525,98 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
         2, virt_high_pcie_memmap.base,
         2, virt_high_pcie_memmap.base, 2, virt_high_pcie_memmap.size);
 
-    create_pcie_irq_map(fdt, name, plic_pcie_phandle);
+    create_pcie_irq_map(mc->fdt, name, irq_pcie_phandle);
     g_free(name);
+}
+
+static void create_fdt_reset(RISCVVirtState *s, const MemMapEntry *memmap,
+                             uint32_t *phandle)
+{
+    char *name;
+    uint32_t test_phandle;
+    MachineState *mc = MACHINE(s);
 
-    test_phandle = phandle++;
+    test_phandle = (*phandle)++;
     name = g_strdup_printf("/soc/test@%lx",
         (long)memmap[VIRT_TEST].base);
-    qemu_fdt_add_subnode(fdt, name);
+    qemu_fdt_add_subnode(mc->fdt, name);
     {
-        const char compat[] = "sifive,test1\0sifive,test0\0syscon";
-        qemu_fdt_setprop(fdt, name, "compatible", compat, sizeof(compat));
+        static const char * const compat[3] = {
+            "sifive,test1", "sifive,test0", "syscon"
+        };
+        qemu_fdt_setprop_string_array(mc->fdt, name, "compatible",
+                                      (char **)&compat, ARRAY_SIZE(compat));
     }
-    qemu_fdt_setprop_cells(fdt, name, "reg",
-        0x0, memmap[VIRT_TEST].base,
-        0x0, memmap[VIRT_TEST].size);
-    qemu_fdt_setprop_cell(fdt, name, "phandle", test_phandle);
-    test_phandle = qemu_fdt_get_phandle(fdt, name);
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+        0x0, memmap[VIRT_TEST].base, 0x0, memmap[VIRT_TEST].size);
+    qemu_fdt_setprop_cell(mc->fdt, name, "phandle", test_phandle);
+    test_phandle = qemu_fdt_get_phandle(mc->fdt, name);
     g_free(name);
 
     name = g_strdup_printf("/soc/reboot");
-    qemu_fdt_add_subnode(fdt, name);
-    qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-reboot");
-    qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle);
-    qemu_fdt_setprop_cell(fdt, name, "offset", 0x0);
-    qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_RESET);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible", "syscon-reboot");
+    qemu_fdt_setprop_cell(mc->fdt, name, "regmap", test_phandle);
+    qemu_fdt_setprop_cell(mc->fdt, name, "offset", 0x0);
+    qemu_fdt_setprop_cell(mc->fdt, name, "value", FINISHER_RESET);
     g_free(name);
 
     name = g_strdup_printf("/soc/poweroff");
-    qemu_fdt_add_subnode(fdt, name);
-    qemu_fdt_setprop_string(fdt, name, "compatible", "syscon-poweroff");
-    qemu_fdt_setprop_cell(fdt, name, "regmap", test_phandle);
-    qemu_fdt_setprop_cell(fdt, name, "offset", 0x0);
-    qemu_fdt_setprop_cell(fdt, name, "value", FINISHER_PASS);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible", "syscon-poweroff");
+    qemu_fdt_setprop_cell(mc->fdt, name, "regmap", test_phandle);
+    qemu_fdt_setprop_cell(mc->fdt, name, "offset", 0x0);
+    qemu_fdt_setprop_cell(mc->fdt, name, "value", FINISHER_PASS);
     g_free(name);
+}
+
+static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
+                            uint32_t irq_mmio_phandle)
+{
+    char *name;
+    MachineState *mc = MACHINE(s);
 
     name = g_strdup_printf("/soc/uart@%lx", (long)memmap[VIRT_UART0].base);
-    qemu_fdt_add_subnode(fdt, name);
-    qemu_fdt_setprop_string(fdt, name, "compatible", "ns16550a");
-    qemu_fdt_setprop_cells(fdt, name, "reg",
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible", "ns16550a");
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
         0x0, memmap[VIRT_UART0].base,
         0x0, memmap[VIRT_UART0].size);
-    qemu_fdt_setprop_cell(fdt, name, "clock-frequency", 3686400);
-    qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle);
-    qemu_fdt_setprop_cell(fdt, name, "interrupts", UART0_IRQ);
+    qemu_fdt_setprop_cell(mc->fdt, name, "clock-frequency", 3686400);
+    qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent", irq_mmio_phandle);
+    qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", UART0_IRQ);
 
-    qemu_fdt_add_subnode(fdt, "/chosen");
-    qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", name);
+    qemu_fdt_add_subnode(mc->fdt, "/chosen");
+    qemu_fdt_setprop_string(mc->fdt, "/chosen", "stdout-path", name);
     g_free(name);
+}
+
+static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
+                           uint32_t irq_mmio_phandle)
+{
+    char *name;
+    MachineState *mc = MACHINE(s);
 
     name = g_strdup_printf("/soc/rtc@%lx", (long)memmap[VIRT_RTC].base);
-    qemu_fdt_add_subnode(fdt, name);
-    qemu_fdt_setprop_string(fdt, name, "compatible", "google,goldfish-rtc");
-    qemu_fdt_setprop_cells(fdt, name, "reg",
-        0x0, memmap[VIRT_RTC].base,
-        0x0, memmap[VIRT_RTC].size);
-    qemu_fdt_setprop_cell(fdt, name, "interrupt-parent", plic_mmio_phandle);
-    qemu_fdt_setprop_cell(fdt, name, "interrupts", RTC_IRQ);
+    qemu_fdt_add_subnode(mc->fdt, name);
+    qemu_fdt_setprop_string(mc->fdt, name, "compatible",
+        "google,goldfish-rtc");
+    qemu_fdt_setprop_cells(mc->fdt, name, "reg",
+        0x0, memmap[VIRT_RTC].base, 0x0, memmap[VIRT_RTC].size);
+    qemu_fdt_setprop_cell(mc->fdt, name, "interrupt-parent",
+        irq_mmio_phandle);
+    qemu_fdt_setprop_cell(mc->fdt, name, "interrupts", RTC_IRQ);
     g_free(name);
+}
+
+static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
+{
+    char *name;
+    MachineState *mc = MACHINE(s);
+    hwaddr flashsize = virt_memmap[VIRT_FLASH].size / 2;
+    hwaddr flashbase = virt_memmap[VIRT_FLASH].base;
 
-    name = g_strdup_printf("/soc/flash@%" PRIx64, flashbase);
+    name = g_strdup_printf("/flash@%" PRIx64, flashbase);
     qemu_fdt_add_subnode(mc->fdt, name);
     qemu_fdt_setprop_string(mc->fdt, name, "compatible", "cfi-flash");
     qemu_fdt_setprop_sized_cells(mc->fdt, name, "reg",
@@ -456,10 +624,59 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
                                  2, flashbase + flashsize, 2, flashsize);
     qemu_fdt_setprop_cell(mc->fdt, name, "bank-width", 4);
     g_free(name);
+}
+
+static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
+                       uint64_t mem_size, const char *cmdline, bool is_32_bit)
+{
+    MachineState *mc = MACHINE(s);
+    uint32_t phandle = 1, irq_mmio_phandle = 1;
+    uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1;
+
+    if (mc->dtb) {
+        mc->fdt = load_device_tree(mc->dtb, &s->fdt_size);
+        if (!mc->fdt) {
+            error_report("load_device_tree() failed");
+            exit(1);
+        }
+        goto update_bootargs;
+    } else {
+        mc->fdt = create_device_tree(&s->fdt_size);
+        if (!mc->fdt) {
+            error_report("create_device_tree() failed");
+            exit(1);
+        }
+    }
+
+    qemu_fdt_setprop_string(mc->fdt, "/", "model", "riscv-virtio,qemu");
+    qemu_fdt_setprop_string(mc->fdt, "/", "compatible", "riscv-virtio");
+    qemu_fdt_setprop_cell(mc->fdt, "/", "#size-cells", 0x2);
+    qemu_fdt_setprop_cell(mc->fdt, "/", "#address-cells", 0x2);
+
+    qemu_fdt_add_subnode(mc->fdt, "/soc");
+    qemu_fdt_setprop(mc->fdt, "/soc", "ranges", NULL, 0);
+    qemu_fdt_setprop_string(mc->fdt, "/soc", "compatible", "simple-bus");
+    qemu_fdt_setprop_cell(mc->fdt, "/soc", "#size-cells", 0x2);
+    qemu_fdt_setprop_cell(mc->fdt, "/soc", "#address-cells", 0x2);
+
+    create_fdt_sockets(s, memmap, is_32_bit, &phandle,
+        &irq_mmio_phandle, &irq_pcie_phandle, &irq_virtio_phandle);
+
+    create_fdt_virtio(s, memmap, irq_virtio_phandle);
+
+    create_fdt_pcie(s, memmap, irq_pcie_phandle);
+
+    create_fdt_reset(s, memmap, &phandle);
+
+    create_fdt_uart(s, memmap, irq_mmio_phandle);
+
+    create_fdt_rtc(s, memmap, irq_mmio_phandle);
+
+    create_fdt_flash(s, memmap);
 
 update_bootargs:
     if (cmdline) {
-        qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+        qemu_fdt_setprop_string(mc->fdt, "/chosen", "bootargs", cmdline);
     }
 }
 
@@ -539,16 +756,14 @@ static void virt_machine_init(MachineState *machine)
     const MemMapEntry *memmap = virt_memmap;
     RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
     MemoryRegion *system_memory = get_system_memory();
-    MemoryRegion *main_mem = g_new(MemoryRegion, 1);
     MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
     char *plic_hart_config, *soc_name;
-    size_t plic_hart_config_len;
     target_ulong start_addr = memmap[VIRT_DRAM].base;
     target_ulong firmware_end_addr, kernel_start_addr;
     uint32_t fdt_load_addr;
     uint64_t kernel_entry;
     DeviceState *mmio_plic, *virtio_plic, *pcie_plic;
-    int i, j, base_hartid, hart_count;
+    int i, base_hartid, hart_count;
 
     /* Check socket count limit */
     if (VIRT_SOCKETS_MAX < riscv_socket_count(machine)) {
@@ -590,29 +805,31 @@ static void virt_machine_init(MachineState *machine)
         sysbus_realize(SYS_BUS_DEVICE(&s->soc[i]), &error_abort);
 
         /* Per-socket CLINT */
-        sifive_clint_create(
+        riscv_aclint_swi_create(
             memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size,
-            memmap[VIRT_CLINT].size, base_hartid, hart_count,
-            SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE,
-            SIFIVE_CLINT_TIMEBASE_FREQ, true);
+            base_hartid, hart_count, false);
+        riscv_aclint_mtimer_create(
+            memmap[VIRT_CLINT].base + i * memmap[VIRT_CLINT].size +
+                RISCV_ACLINT_SWI_SIZE,
+            RISCV_ACLINT_DEFAULT_MTIMER_SIZE, base_hartid, hart_count,
+            RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
+            RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, true);
+
+        /* Per-socket ACLINT SSWI */
+        if (s->have_aclint) {
+            riscv_aclint_swi_create(
+                memmap[VIRT_ACLINT_SSWI].base +
+                    i * memmap[VIRT_ACLINT_SSWI].size,
+                base_hartid, hart_count, true);
+        }
 
         /* Per-socket PLIC hart topology configuration string */
-        plic_hart_config_len =
-            (strlen(VIRT_PLIC_HART_CONFIG) + 1) * hart_count;
-        plic_hart_config = g_malloc0(plic_hart_config_len);
-        for (j = 0; j < hart_count; j++) {
-            if (j != 0) {
-                strncat(plic_hart_config, ",", plic_hart_config_len);
-            }
-            strncat(plic_hart_config, VIRT_PLIC_HART_CONFIG,
-                plic_hart_config_len);
-            plic_hart_config_len -= (strlen(VIRT_PLIC_HART_CONFIG) + 1);
-        }
+        plic_hart_config = riscv_plic_hart_config_string(hart_count);
 
         /* Per-socket PLIC */
         s->plic[i] = sifive_plic_create(
             memmap[VIRT_PLIC].base + i * memmap[VIRT_PLIC].size,
-            plic_hart_config, base_hartid,
+            plic_hart_config, hart_count, base_hartid,
             VIRT_PLIC_NUM_SOURCES,
             VIRT_PLIC_NUM_PRIORITIES,
             VIRT_PLIC_PRIORITY_BASE,
@@ -657,13 +874,8 @@ static void virt_machine_init(MachineState *machine)
     }
 
     /* register system main memory (actual RAM) */
-    memory_region_init_ram(main_mem, NULL, "riscv_virt_board.ram",
-                           machine->ram_size, &error_fatal);
     memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base,
-        main_mem);
-#ifdef TARGET_CHERI
-    cheri_tag_init(main_mem, machine->ram_size);
-#endif
+        machine->ram);
 
     /* create device tree */
     create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline,
@@ -682,7 +894,7 @@ static void virt_machine_init(MachineState *machine)
             /* Use a purecap BBL as the BIOS for CHERI. */
             "bbl-riscv32cheri-virt-fw_jump.bin",
 #else
-            "opensbi-riscv32-generic-fw_dynamic.bin",
+            RISCV32_BIOS_BIN,
 #endif
             start_addr, NULL);
     } else {
@@ -692,7 +904,7 @@ static void virt_machine_init(MachineState *machine)
             /* Use a purecap BBL as the BIOS for CHERI. */
             "bbl-riscv64cheri-virt-fw_jump.bin",
 #else
-            "opensbi-riscv64-generic-fw_dynamic.bin",
+            RISCV64_BIOS_BIN,
 #endif
             start_addr, NULL);
     }
@@ -787,6 +999,22 @@ static void virt_machine_instance_init(Object *obj)
 {
 }
 
+static bool virt_get_aclint(Object *obj, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+    RISCVVirtState *s = RISCV_VIRT_MACHINE(ms);
+
+    return s->have_aclint;
+}
+
+static void virt_set_aclint(Object *obj, bool value, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+    RISCVVirtState *s = RISCV_VIRT_MACHINE(ms);
+
+    s->have_aclint = value;
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -800,8 +1028,15 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props;
     mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
     mc->numa_mem_supported = true;
+    mc->default_ram_id = "riscv_virt_board.ram";
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
+
+    object_class_property_add_bool(oc, "aclint", virt_get_aclint,
+                                   virt_set_aclint);
+    object_class_property_set_description(oc, "aclint",
+                                          "Set on/off to enable/disable "
+                                          "emulating ACLINT devices");
 }
 
 static const TypeInfo virt_machine_typeinfo = {
diff --git a/hw/rtc/m48t59.c b/hw/rtc/m48t59.c
index d54929e8612..690f4e071a1 100644
--- a/hw/rtc/m48t59.c
+++ b/hw/rtc/m48t59.c
@@ -32,7 +32,6 @@
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
 #include "hw/sysbus.h"
-#include "exec/address-spaces.h"
 #include "qapi/error.h"
 #include "qemu/bcd.h"
 #include "qemu/module.h"
diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 5d0fcacd0c0..4fbafddb226 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -42,7 +42,6 @@
 #include "qapi/error.h"
 #include "qapi/qapi-events-misc-target.h"
 #include "qapi/visitor.h"
-#include "exec/address-spaces.h"
 #include "hw/rtc/mc146818rtc_regs.h"
 
 #ifdef TARGET_I386
@@ -872,22 +871,6 @@ static void rtc_notify_suspend(Notifier *notifier, void *data)
     rtc_set_memory(ISA_DEVICE(s), 0xF, 0xFE);
 }
 
-static void rtc_reset(void *opaque)
-{
-    RTCState *s = opaque;
-
-    s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE);
-    s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF);
-    check_update_timer(s);
-
-    qemu_irq_lower(s->irq);
-
-    if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
-        s->irq_coalesced = 0;
-        s->irq_reinject_on_ack_count = 0;
-    }
-}
-
 static const MemoryRegionOps cmos_ops = {
     .read = cmos_ioport_read,
     .write = cmos_ioport_write,
@@ -962,7 +945,6 @@ static void rtc_realizefn(DeviceState *dev, Error **errp)
     memory_region_add_coalescing(&s->coalesced_io, 0, 1);
 
     qdev_set_legacy_instance_id(dev, RTC_ISA_BASE, 3);
-    qemu_register_reset(rtc_reset, s);
 
     object_property_add_tm(OBJECT(s), "date", rtc_get_date);
 
@@ -998,15 +980,32 @@ static Property mc146818rtc_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static void rtc_resetdev(DeviceState *d)
+static void rtc_reset_enter(Object *obj, ResetType type)
 {
-    RTCState *s = MC146818_RTC(d);
+    RTCState *s = MC146818_RTC(obj);
 
     /* Reason: VM do suspend self will set 0xfe
      * Reset any values other than 0xfe(Guest suspend case) */
     if (s->cmos_data[0x0f] != 0xfe) {
         s->cmos_data[0x0f] = 0x00;
     }
+
+    s->cmos_data[RTC_REG_B] &= ~(REG_B_PIE | REG_B_AIE | REG_B_SQWE);
+    s->cmos_data[RTC_REG_C] &= ~(REG_C_UF | REG_C_IRQF | REG_C_PF | REG_C_AF);
+    check_update_timer(s);
+
+
+    if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        s->irq_coalesced = 0;
+        s->irq_reinject_on_ack_count = 0;
+    }
+}
+
+static void rtc_reset_hold(Object *obj)
+{
+    RTCState *s = MC146818_RTC(obj);
+
+    qemu_irq_lower(s->irq);
 }
 
 static void rtc_build_aml(ISADevice *isadev, Aml *scope)
@@ -1033,13 +1032,16 @@ static void rtc_build_aml(ISADevice *isadev, Aml *scope)
 static void rtc_class_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
     ISADeviceClass *isa = ISA_DEVICE_CLASS(klass);
 
     dc->realize = rtc_realizefn;
-    dc->reset = rtc_resetdev;
     dc->vmsd = &vmstate_rtc;
+    rc->phases.enter = rtc_reset_enter;
+    rc->phases.hold = rtc_reset_hold;
     isa->build_aml = rtc_build_aml;
     device_class_set_props(dc, mc146818rtc_properties);
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
 static const TypeInfo mc146818rtc_info = {
diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build
index 7cecdee5ddb..8fd8d8f9a71 100644
--- a/hw/rtc/meson.build
+++ b/hw/rtc/meson.build
@@ -2,7 +2,7 @@
 softmmu_ss.add(when: 'CONFIG_DS1338', if_true: files('ds1338.c'))
 softmmu_ss.add(when: 'CONFIG_M41T80', if_true: files('m41t80.c'))
 softmmu_ss.add(when: 'CONFIG_M48T59', if_true: files('m48t59.c'))
-softmmu_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c'))
+specific_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c'))
 softmmu_ss.add(when: 'CONFIG_TWL92230', if_true: files('twl92230.c'))
 softmmu_ss.add(when: ['CONFIG_ISA_BUS', 'CONFIG_M48T59'], if_true: files('m48t59-isa.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP', if_true: files('xlnx-zynqmp-rtc.c'))
diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c
index 2bbb2062ac8..e7ced90b025 100644
--- a/hw/rtc/pl031.c
+++ b/hw/rtc/pl031.c
@@ -24,6 +24,7 @@
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "trace.h"
+#include "qapi/qapi-events-misc-target.h"
 
 #define RTC_DR      0x00    /* Data read register */
 #define RTC_MR      0x04    /* Match register */
@@ -136,10 +137,17 @@ static void pl031_write(void * opaque, hwaddr offset,
     trace_pl031_write(offset, value);
 
     switch (offset) {
-    case RTC_LR:
+    case RTC_LR: {
+        struct tm tm;
+
         s->tick_offset += value - pl031_get_count(s);
+
+        qemu_get_timedate(&tm, s->tick_offset);
+        qapi_event_send_rtc_change(qemu_timedate_diff(&tm));
+
         pl031_set_alarm(s);
         break;
+    }
     case RTC_MR:
         s->mr = value;
         pl031_set_alarm(s);
diff --git a/hw/rtc/trace-events b/hw/rtc/trace-events
index 8bdcf742640..ebb311a5b0e 100644
--- a/hw/rtc/trace-events
+++ b/hw/rtc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # allwinner-rtc.c
 allwinner_rtc_read(uint64_t addr, uint64_t value) "addr 0x%" PRIx64 " value 0x%" PRIx64
diff --git a/hw/rx/rx-gdbsim.c b/hw/rx/rx-gdbsim.c
index b1d7c2488ff..75d1fec6ca4 100644
--- a/hw/rx/rx-gdbsim.c
+++ b/hw/rx/rx-gdbsim.c
@@ -21,12 +21,8 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h"
-#include "hw/hw.h"
-#include "hw/sysbus.h"
 #include "hw/loader.h"
 #include "hw/rx/rx62n.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "sysemu/device_tree.h"
 #include "hw/boards.h"
@@ -93,6 +89,7 @@ static void rx_gdbsim_init(MachineState *machine)
         char *sz = size_to_str(mc->default_ram_size);
         error_report("Invalid RAM size, should be more than %s", sz);
         g_free(sz);
+        exit(1);
     }
 
     /* Allocate memory space */
diff --git a/hw/rx/rx62n.c b/hw/rx/rx62n.c
index 9c34ce14de6..fa5add9f9db 100644
--- a/hw/rx/rx62n.c
+++ b/hw/rx/rx62n.c
@@ -23,13 +23,11 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
-#include "hw/hw.h"
 #include "hw/rx/rx62n.h"
 #include "hw/loader.h"
 #include "hw/sysbus.h"
 #include "hw/qdev-properties.h"
 #include "sysemu/sysemu.h"
-#include "cpu.h"
 #include "qom/object.h"
 
 /*
diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c
index f3e7342b1e8..69e6783ade5 100644
--- a/hw/s390x/3270-ccw.c
+++ b/hw/s390x/3270-ccw.c
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/s390x/css.h"
 #include "hw/s390x/css-bridge.h"
 #include "hw/qdev-properties.h"
@@ -130,6 +129,7 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp)
                                 EMULATED_CCW_3270_CHPID_TYPE);
     sch->do_subchannel_work = do_subchannel_work_virtual;
     sch->ccw_cb = emulated_ccw_3270_cb;
+    sch->irb_cb = build_irb_virtual;
 
     ck->init(dev, &err);
     if (err) {
@@ -159,7 +159,6 @@ static void emulated_ccw_3270_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     device_class_set_props(dc, emulated_ccw_3270_properties);
-    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
     dc->realize = emulated_ccw_3270_realize;
     dc->hotpluggable = false;
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c
index 8bcf8ece9dd..ef8fa2b15be 100644
--- a/hw/s390x/ap-bridge.c
+++ b/hw/s390x/ap-bridge.c
@@ -55,7 +55,7 @@ void s390_init_ap(void)
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     /* Create bus on bridge device */
-    bus = qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS);
+    bus = qbus_new(TYPE_AP_BUS, dev, TYPE_AP_BUS);
 
     /* Enable hotplugging */
     qbus_set_hotplug_handler(bus, OBJECT(dev));
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index c9707110e9c..95f269ab441 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -59,6 +59,7 @@ static void ccw_device_class_init(ObjectClass *klass, void *data)
     k->refill_ids = ccw_device_refill_ids;
     device_class_set_props(dc, ccw_device_properties);
     dc->reset = ccw_device_reset;
+    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
 }
 
 const VMStateDescription vmstate_ccw_dev = {
diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h
index 832c78cd421..6dff95225df 100644
--- a/hw/s390x/ccw-device.h
+++ b/hw/s390x/ccw-device.h
@@ -14,6 +14,7 @@
 #include "qom/object.h"
 #include "hw/qdev-core.h"
 #include "hw/s390x/css.h"
+#include "hw/s390x/css-bridge.h"
 
 struct CcwDevice {
     DeviceState parent_obj;
diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c
index 9d793d671e1..4017081d49c 100644
--- a/hw/s390x/css-bridge.c
+++ b/hw/s390x/css-bridge.c
@@ -20,7 +20,6 @@
 #include "hw/s390x/css.h"
 #include "ccw-device.h"
 #include "hw/s390x/css-bridge.h"
-#include "cpu.h"
 
 /*
  * Invoke device-specific unplug handler, disable the subchannel
@@ -107,7 +106,7 @@ VirtualCssBus *virtual_css_bus_init(void)
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     /* Create bus on bridge device */
-    bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
+    bus = qbus_new(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
     cbus = VIRTUAL_CSS_BUS(bus);
 
     /* Enable hotplugging */
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 4149b8e5a79..7d9523f8113 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -15,7 +15,6 @@
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
 #include "exec/address-spaces.h"
-#include "cpu.h"
 #include "hw/s390x/ioinst.h"
 #include "hw/qdev-properties.h"
 #include "hw/s390x/css.h"
@@ -1207,23 +1206,53 @@ static void sch_handle_start_func_virtual(SubchDev *sch)
 
 }
 
-static void sch_handle_halt_func_passthrough(SubchDev *sch)
+static IOInstEnding sch_handle_halt_func_passthrough(SubchDev *sch)
 {
     int ret;
 
     ret = s390_ccw_halt(sch);
     if (ret == -ENOSYS) {
         sch_handle_halt_func(sch);
+        return IOINST_CC_EXPECTED;
+    }
+    /*
+     * Some conditions may have been detected prior to starting the halt
+     * function; map them to the correct cc.
+     * Note that we map both -ENODEV and -EACCES to cc 3 (there's not really
+     * anything else we can do.)
+     */
+    switch (ret) {
+    case -EBUSY:
+        return IOINST_CC_BUSY;
+    case -ENODEV:
+    case -EACCES:
+        return IOINST_CC_NOT_OPERATIONAL;
+    default:
+        return IOINST_CC_EXPECTED;
     }
 }
 
-static void sch_handle_clear_func_passthrough(SubchDev *sch)
+static IOInstEnding sch_handle_clear_func_passthrough(SubchDev *sch)
 {
     int ret;
 
     ret = s390_ccw_clear(sch);
     if (ret == -ENOSYS) {
         sch_handle_clear_func(sch);
+        return IOINST_CC_EXPECTED;
+    }
+    /*
+     * Some conditions may have been detected prior to starting the clear
+     * function; map them to the correct cc.
+     * Note that we map both -ENODEV and -EACCES to cc 3 (there's not really
+     * anything else we can do.)
+     */
+    switch (ret) {
+    case -ENODEV:
+    case -EACCES:
+        return IOINST_CC_NOT_OPERATIONAL;
+    default:
+        return IOINST_CC_EXPECTED;
     }
 }
 
@@ -1266,9 +1295,9 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sch)
     SCHIB *schib = &sch->curr_status;
 
     if (schib->scsw.ctrl & SCSW_FCTL_CLEAR_FUNC) {
-        sch_handle_clear_func_passthrough(sch);
+        return sch_handle_clear_func_passthrough(sch);
     } else if (schib->scsw.ctrl & SCSW_FCTL_HALT_FUNC) {
-        sch_handle_halt_func_passthrough(sch);
+        return sch_handle_halt_func_passthrough(sch);
     } else if (schib->scsw.ctrl & SCSW_FCTL_START_FUNC) {
         return sch_handle_start_func_passthrough(sch);
     }
@@ -1336,6 +1365,14 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
     }
 }
 
+void copy_esw_to_guest(ESW *dest, const ESW *src)
+{
+    dest->word0 = cpu_to_be32(src->word0);
+    dest->erw = cpu_to_be32(src->erw);
+    dest->word2 = cpu_to_be64(src->word2);
+    dest->word4 = cpu_to_be32(src->word4);
+}
+
 IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib)
 {
     int ret;
@@ -1605,9 +1642,8 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw,
 
     copy_scsw_to_guest(&dest->scsw, &src->scsw);
 
-    for (i = 0; i < ARRAY_SIZE(dest->esw); i++) {
-        dest->esw[i] = cpu_to_be32(src->esw[i]);
-    }
+    copy_esw_to_guest(&dest->esw, &src->esw);
+
     for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) {
         dest->ecw[i] = cpu_to_be32(src->ecw[i]);
     }
@@ -1633,6 +1669,55 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw,
     *irb_len = sizeof(*dest);
 }
 
+static void build_irb_sense_data(SubchDev *sch, IRB *irb)
+{
+    int i;
+
+    /* Attention: sense_data is already BE! */
+    memcpy(irb->ecw, sch->sense_data, sizeof(sch->sense_data));
+    for (i = 0; i < ARRAY_SIZE(irb->ecw); i++) {
+        irb->ecw[i] = be32_to_cpu(irb->ecw[i]);
+    }
+}
+
+void build_irb_passthrough(SubchDev *sch, IRB *irb)
+{
+    /* Copy ESW from hardware */
+    irb->esw = sch->esw;
+
+    /*
+     * If (irb->esw.erw & ESW_ERW_SENSE) is true, then the contents
+     * of the ECW is sense data. If false, then it is model-dependent
+     * information. Either way, copy it into the IRB for the guest to
+     * read/decide what to do with.
+     */
+    build_irb_sense_data(sch, irb);
+}
+
+void build_irb_virtual(SubchDev *sch, IRB *irb)
+{
+    SCHIB *schib = &sch->curr_status;
+    uint16_t stctl = schib->scsw.ctrl & SCSW_CTRL_MASK_STCTL;
+
+    if (stctl & SCSW_STCTL_STATUS_PEND) {
+        if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK |
+                        SCSW_CSTAT_CHN_CTRL_CHK |
+                        SCSW_CSTAT_INTF_CTRL_CHK)) {
+            irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF;
+            irb->esw.word0 = 0x04804000;
+        } else {
+            irb->esw.word0 = 0x00800000;
+        }
+        /* If a unit check is pending, copy sense data. */
+        if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) &&
+            (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) {
+            irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
+            build_irb_sense_data(sch, irb);
+            irb->esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8);
+        }
+    }
+}
+
 int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len)
 {
     SCHIB *schib = &sch->curr_status;
@@ -1651,29 +1736,12 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len)
 
     /* Copy scsw from current status. */
     irb.scsw = schib->scsw;
-    if (stctl & SCSW_STCTL_STATUS_PEND) {
-        if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK |
-                        SCSW_CSTAT_CHN_CTRL_CHK |
-                        SCSW_CSTAT_INTF_CTRL_CHK)) {
-            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF;
-            irb.esw[0] = 0x04804000;
-        } else {
-            irb.esw[0] = 0x00800000;
-        }
-        /* If a unit check is pending, copy sense data. */
-        if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) &&
-            (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) {
-            int i;
 
-            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
-            /* Attention: sense_data is already BE! */
-            memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data));
-            for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) {
-                irb.ecw[i] = be32_to_cpu(irb.ecw[i]);
-            }
-            irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8);
-        }
+    /* Build other IRB data, if necessary */
+    if (sch->irb_cb) {
+        sch->irb_cb(sch, &irb);
     }
+
     /* Store the irb to the guest. */
     p = schib->pmcw;
     copy_irb_to_guest(target_irb, &irb, &p, irb_len);
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index ed92ce510d9..6fa47b889ca 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -427,8 +427,8 @@ static void init_event_facility(Object *obj)
                              sclp_event_set_allow_all_mask_sizes);
 
     /* Spawn a new bus for SCLP events */
-    qbus_create_inplace(&event_facility->sbus, sizeof(event_facility->sbus),
-                        TYPE_SCLP_EVENTS_BUS, sdev, NULL);
+    qbus_init(&event_facility->sbus, sizeof(event_facility->sbus),
+              TYPE_SCLP_EVENTS_BUS, sdev, NULL);
 
     object_initialize_child(obj, TYPE_SCLP_QUIESCE,
                             &event_facility->quiesce,
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index ff6b55e8167..7ddca0127fc 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -18,9 +18,7 @@
 #include "qapi/error.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/tcg.h"
-#include "cpu.h"
 #include "elf.h"
 #include "hw/loader.h"
 #include "hw/qdev-properties.h"
@@ -40,6 +38,7 @@
 #define KERN_IMAGE_START                0x010000UL
 #define LINUX_MAGIC_ADDR                0x010008UL
 #define KERN_PARM_AREA                  0x010480UL
+#define KERN_PARM_AREA_SIZE             0x000380UL
 #define INITRD_START                    0x800000UL
 #define INITRD_PARM_START               0x010408UL
 #define PARMFILE_START                  0x001000UL
@@ -192,10 +191,19 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp)
          * loader) and it won't work. For this case we force it to 0x10000, too.
          */
         if (pentry == KERN_IMAGE_START || pentry == 0x800) {
-            char *parm_area = rom_ptr(KERN_PARM_AREA, strlen(ipl->cmdline) + 1);
+            size_t cmdline_size = strlen(ipl->cmdline) + 1;
+            char *parm_area = rom_ptr(KERN_PARM_AREA, cmdline_size);
+
             ipl->start_addr = KERN_IMAGE_START;
             /* Overwrite parameters in the kernel image, which are "rom" */
             if (parm_area) {
+                if (cmdline_size > KERN_PARM_AREA_SIZE) {
+                    error_setg(errp,
+                               "kernel command line exceeds maximum size: %zu > %lu",
+                               cmdline_size, KERN_PARM_AREA_SIZE);
+                    return;
+                }
+
                 strcpy(parm_area, ipl->cmdline);
             }
         } else {
@@ -713,7 +721,6 @@ int s390_ipl_pv_unpack(void)
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
     S390IPLState *ipl = get_ipl_device();
-    Error *err = NULL;
 
     cpu->env.psw.addr = ipl->start_addr;
     cpu->env.psw.mask = IPL_PSW_MASK;
@@ -725,10 +732,7 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
         }
     }
     if (ipl->netboot) {
-        if (load_netboot_image(&err) < 0) {
-            error_report_err(err);
-            exit(1);
-        }
+        load_netboot_image(&error_fatal);
         ipl->qipl.netboot_start_addr = cpu_to_be64(ipl->start_addr);
     }
     s390_ipl_set_boot_menu(ipl);
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 327e9c93afa..28484256ec0 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -16,7 +16,6 @@ s390x_ss.add(files(
   'sclp.c',
   'sclpcpu.c',
   'sclpquiesce.c',
-  'tod-qemu.c',
   'tod.c',
 ))
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
@@ -25,6 +24,9 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
   's390-stattrib-kvm.c',
   'pv.c',
 ))
+s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'tod-tcg.c',
+))
 s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files('s390-virtio-ccw.c'))
 s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
 s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c'))
diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index 93eccfc05d5..401b63d6cb6 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -13,7 +13,6 @@
 
 #include <linux/kvm.h>
 
-#include "cpu.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
index b497571863f..2fc8bb9c232 100644
--- a/hw/s390x/s390-ccw.c
+++ b/hw/s390x/s390-ccw.c
@@ -15,7 +15,6 @@
 #include <libgen.h>
 #include "qapi/error.h"
 #include "qemu/module.h"
-#include "hw/sysbus.h"
 #include "hw/s390x/css.h"
 #include "hw/s390x/css-bridge.h"
 #include "hw/s390x/s390-ccw.h"
@@ -125,6 +124,7 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp)
     }
     sch->driver_data = cdev;
     sch->do_subchannel_work = do_subchannel_work_passthrough;
+    sch->irb_cb = build_irb_passthrough;
 
     ccw_dev->sch = sch;
     ret = css_sch_build_schib(sch, &cdev->hostid);
@@ -177,10 +177,8 @@ static void s390_ccw_instance_init(Object *obj)
 
 static void s390_ccw_class_init(ObjectClass *klass, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(klass);
     S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
 
-    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
     cdc->realize = s390_ccw_realize;
     cdc->unrealize = s390_ccw_unrealize;
 }
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index dd138dae94a..1b51a728385 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
-#include "cpu.h"
 #include "hw/s390x/s390-pci-bus.h"
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-vfio.h"
@@ -331,7 +330,7 @@ static unsigned int calc_sx(dma_addr_t ptr)
 
 static unsigned int calc_px(dma_addr_t ptr)
 {
-    return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+    return ((unsigned long) ptr >> TARGET_PAGE_BITS) & ZPCI_PT_MASK;
 }
 
 static uint64_t get_rt_sto(uint64_t entry)
@@ -507,7 +506,7 @@ uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
     int8_t ett = 1;
     uint16_t error = 0;
 
-    entry->iova = addr & PAGE_MASK;
+    entry->iova = addr & TARGET_PAGE_MASK;
     entry->translated_addr = 0;
     entry->perm = IOMMU_RW;
 
@@ -527,7 +526,7 @@ static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
 {
     S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
     S390IOTLBEntry *entry;
-    uint64_t iova = addr & PAGE_MASK;
+    uint64_t iova = addr & TARGET_PAGE_MASK;
     uint16_t error = 0;
     IOMMUTLBEntry ret = {
         .target_as = &address_space_memory,
@@ -563,7 +562,7 @@ static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
         ret.perm = entry->perm;
     } else {
         ret.iova = iova;
-        ret.addr_mask = ~PAGE_MASK;
+        ret.addr_mask = ~TARGET_PAGE_MASK;
         ret.perm = IOMMU_NONE;
     }
 
@@ -814,7 +813,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
     qbus_set_hotplug_handler(bus, OBJECT(dev));
     phb->bus = b;
 
-    s->bus = S390_PCI_BUS(qbus_create(TYPE_S390_PCI_BUS, dev, NULL));
+    s->bus = S390_PCI_BUS(qbus_new(TYPE_S390_PCI_BUS, dev, NULL));
     qbus_set_hotplug_handler(BUS(s->bus), OBJECT(dev));
 
     s->iommu_table = g_hash_table_new_full(g_int64_hash, g_int64_equal,
@@ -869,7 +868,7 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev)
 
     name = g_strdup_printf("msix-s390-%04x", pbdev->uid);
     memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev),
-                          &s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE);
+                          &s390_msi_ctrl_ops, pbdev, name, TARGET_PAGE_SIZE);
     memory_region_add_subregion(&pbdev->iommu->mr,
                                 pbdev->pci_group->zpci_group.msia,
                                 &pbdev->msix_notify_mr);
@@ -1164,8 +1163,7 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
     }
 
     /* Assign numbers to all child bridges. The last is the highest number. */
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        s390_pci_enumerate_bridge, s);
+    pci_for_each_device_under_bus(sec_bus, s390_pci_enumerate_bridge, s);
     pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1);
 }
 
@@ -1194,7 +1192,7 @@ static void s390_pcihost_reset(DeviceState *dev)
      * on every system reset, we also have to reassign numbers.
      */
     s->bus_no = 0;
-    pci_for_each_device(bus, pci_bus_num(bus), s390_pci_enumerate_bridge, s);
+    pci_for_each_device_under_bus(bus, s390_pci_enumerate_bridge, s);
 }
 
 static void s390_pcihost_class_init(ObjectClass *klass, void *data)
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 4b8326afa4f..1c8ad91175b 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -12,7 +12,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "exec/memop.h"
 #include "exec/memory-internal.h"
 #include "qemu/error-report.h"
@@ -614,7 +613,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
             .iova = entry->iova,
             .translated_addr = entry->translated_addr,
             .perm = entry->perm,
-            .addr_mask = ~PAGE_MASK,
+            .addr_mask = ~TARGET_PAGE_MASK,
         },
     };
 
@@ -641,7 +640,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu,
         cache = g_new(S390IOTLBEntry, 1);
         cache->iova = entry->iova;
         cache->translated_addr = entry->translated_addr;
-        cache->len = PAGE_SIZE;
+        cache->len = TARGET_PAGE_SIZE;
         cache->perm = entry->perm;
         g_hash_table_replace(iommu->iotlb, &cache->iova, cache);
         dec_dma_avail(iommu);
@@ -726,8 +725,8 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra)
         while (entry.iova < start && entry.iova < end &&
                (dma_avail > 0 || entry.perm == IOMMU_NONE)) {
             dma_avail = s390_pci_update_iotlb(iommu, &entry);
-            entry.iova += PAGE_SIZE;
-            entry.translated_addr += PAGE_SIZE;
+            entry.iova += TARGET_PAGE_SIZE;
+            entry.translated_addr += TARGET_PAGE_SIZE;
         }
     }
 err:
diff --git a/hw/s390x/s390-skeys-kvm.c b/hw/s390x/s390-skeys-kvm.c
index 1c4d805ad8f..3ff9d94b802 100644
--- a/hw/s390x/s390-skeys-kvm.c
+++ b/hw/s390x/s390-skeys-kvm.c
@@ -15,7 +15,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 
-static int kvm_s390_skeys_enabled(S390SKeysState *ss)
+static bool kvm_s390_skeys_are_enabled(S390SKeysState *ss)
 {
     S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss);
     uint8_t single_key;
@@ -57,7 +57,7 @@ static void kvm_s390_skeys_class_init(ObjectClass *oc, void *data)
     S390SKeysClass *skeyclass = S390_SKEYS_CLASS(oc);
     DeviceClass *dc = DEVICE_CLASS(oc);
 
-    skeyclass->skeys_enabled = kvm_s390_skeys_enabled;
+    skeyclass->skeys_are_enabled = kvm_s390_skeys_are_enabled;
     skeyclass->get_skeys = kvm_s390_skeys_get;
     skeyclass->set_skeys = kvm_s390_skeys_set;
 
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
index 9a8d60d1d9a..5024faf4113 100644
--- a/hw/s390x/s390-skeys.c
+++ b/hw/s390x/s390-skeys.c
@@ -17,6 +17,8 @@
 #include "qapi/qapi-commands-misc-target.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/error-report.h"
+#include "sysemu/memory_mapping.h"
+#include "exec/address-spaces.h"
 #include "sysemu/kvm.h"
 #include "migration/qemu-file-types.h"
 #include "migration/register.h"
@@ -80,11 +82,18 @@ void hmp_info_skeys(Monitor *mon, const QDict *qdict)
     int r;
 
     /* Quick check to see if guest is using storage keys*/
-    if (!skeyclass->skeys_enabled(ss)) {
+    if (!skeyclass->skeys_are_enabled(ss)) {
         monitor_printf(mon, "Error: This guest is not using storage keys\n");
         return;
     }
 
+    if (!address_space_access_valid(&address_space_memory,
+                                    addr & TARGET_PAGE_MASK, TARGET_PAGE_SIZE,
+                                    false, MEMTXATTRS_UNSPECIFIED)) {
+        monitor_printf(mon, "Error: The given address is not valid\n");
+        return;
+    }
+
     r = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
     if (r < 0) {
         monitor_printf(mon, "Error: %s\n", strerror(-r));
@@ -109,18 +118,17 @@ void qmp_dump_skeys(const char *filename, Error **errp)
 {
     S390SKeysState *ss = s390_get_skeys_device();
     S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss);
-    MachineState *ms = MACHINE(qdev_get_machine());
-    const uint64_t total_count = ms->ram_size / TARGET_PAGE_SIZE;
-    uint64_t handled_count = 0, cur_count;
+    GuestPhysBlockList guest_phys_blocks;
+    GuestPhysBlock *block;
+    uint64_t pages, gfn;
     Error *lerr = NULL;
-    vaddr cur_gfn = 0;
     uint8_t *buf;
     int ret;
     int fd;
     FILE *f;
 
     /* Quick check to see if guest is using storage keys*/
-    if (!skeyclass->skeys_enabled(ss)) {
+    if (!skeyclass->skeys_are_enabled(ss)) {
         error_setg(errp, "This guest is not using storage keys - "
                          "nothing to dump");
         return;
@@ -144,53 +152,86 @@ void qmp_dump_skeys(const char *filename, Error **errp)
         goto out;
     }
 
-    /* we'll only dump initial memory for now */
-    while (handled_count < total_count) {
-        /* Calculate how many keys to ask for & handle overflow case */
-        cur_count = MIN(total_count - handled_count, S390_SKEYS_BUFFER_SIZE);
+    assert(qemu_mutex_iothread_locked());
+    guest_phys_blocks_init(&guest_phys_blocks);
+    guest_phys_blocks_append(&guest_phys_blocks);
 
-        ret = skeyclass->get_skeys(ss, cur_gfn, cur_count, buf);
-        if (ret < 0) {
-            error_setg(errp, "get_keys error %d", ret);
-            goto out_free;
-        }
+    QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
+        assert(QEMU_IS_ALIGNED(block->target_start, TARGET_PAGE_SIZE));
+        assert(QEMU_IS_ALIGNED(block->target_end, TARGET_PAGE_SIZE));
 
-        /* write keys to stream */
-        write_keys(f, buf, cur_gfn, cur_count, &lerr);
-        if (lerr) {
-            goto out_free;
-        }
+        gfn = block->target_start / TARGET_PAGE_SIZE;
+        pages = (block->target_end - block->target_start) / TARGET_PAGE_SIZE;
+
+        while (pages) {
+            const uint64_t cur_pages = MIN(pages, S390_SKEYS_BUFFER_SIZE);
 
-        cur_gfn += cur_count;
-        handled_count += cur_count;
+            ret = skeyclass->get_skeys(ss, gfn, cur_pages, buf);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "get_keys error");
+                goto out_free;
+            }
+
+            /* write keys to stream */
+            write_keys(f, buf, gfn, cur_pages, &lerr);
+            if (lerr) {
+                goto out_free;
+            }
+
+            gfn += cur_pages;
+            pages -= cur_pages;
+        }
     }
 
 out_free:
+    guest_phys_blocks_free(&guest_phys_blocks);
     error_propagate(errp, lerr);
     g_free(buf);
 out:
     fclose(f);
 }
 
-static void qemu_s390_skeys_init(Object *obj)
+static bool qemu_s390_skeys_are_enabled(S390SKeysState *ss)
 {
-    QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(obj);
-    MachineState *machine = MACHINE(qdev_get_machine());
+    QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(ss);
 
-    skeys->key_count = machine->ram_size / TARGET_PAGE_SIZE;
-    skeys->keydata = g_malloc0(skeys->key_count);
+    /* Lockless check is sufficient. */
+    return !!skeys->keydata;
 }
 
-static int qemu_s390_skeys_enabled(S390SKeysState *ss)
+static bool qemu_s390_enable_skeys(S390SKeysState *ss)
 {
-    return 1;
+    QEMUS390SKeysState *skeys = QEMU_S390_SKEYS(ss);
+    static gsize initialized;
+
+    if (likely(skeys->keydata)) {
+        return true;
+    }
+
+    /*
+     * TODO: Modern Linux doesn't use storage keys unless running KVM guests
+     *       that use storage keys. Therefore, we keep it simple for now.
+     *
+     * 1) We should initialize to "referenced+changed" for an initial
+     *    over-indication. Let's avoid touching megabytes of data for now and
+     *    assume that any sane user will issue a storage key instruction before
+     *    actually relying on this data.
+     * 2) Relying on ram_size and allocating a big array is ugly. We should
+     *    allocate and manage storage key data per RAMBlock or optimally using
+     *    some sparse data structure.
+     * 3) We only ever have a single S390SKeysState, so relying on
+     *    g_once_init_enter() is good enough.
+     */
+    if (g_once_init_enter(&initialized)) {
+        MachineState *machine = MACHINE(qdev_get_machine());
+
+        skeys->key_count = machine->ram_size / TARGET_PAGE_SIZE;
+        skeys->keydata = g_malloc0(skeys->key_count);
+        g_once_init_leave(&initialized, 1);
+    }
+    return false;
 }
 
-/*
- * TODO: for memory hotplug support qemu_s390_skeys_set and qemu_s390_skeys_get
- * will have to make sure that the given gfn belongs to a memory region and not
- * a memory hole.
- */
 static int qemu_s390_skeys_set(S390SKeysState *ss, uint64_t start_gfn,
                               uint64_t count, uint8_t *keys)
 {
@@ -198,9 +239,10 @@ static int qemu_s390_skeys_set(S390SKeysState *ss, uint64_t start_gfn,
     int i;
 
     /* Check for uint64 overflow and access beyond end of key data */
-    if (start_gfn + count > skeydev->key_count || start_gfn + count < count) {
-        error_report("Error: Setting storage keys for page beyond the end "
-                     "of memory: gfn=%" PRIx64 " count=%" PRId64,
+    if (unlikely(!skeydev->keydata || start_gfn + count > skeydev->key_count ||
+                  start_gfn + count < count)) {
+        error_report("Error: Setting storage keys for pages with unallocated "
+                     "storage key memory: gfn=%" PRIx64 " count=%" PRId64,
                      start_gfn, count);
         return -EINVAL;
     }
@@ -218,9 +260,10 @@ static int qemu_s390_skeys_get(S390SKeysState *ss, uint64_t start_gfn,
     int i;
 
     /* Check for uint64 overflow and access beyond end of key data */
-    if (start_gfn + count > skeydev->key_count || start_gfn + count < count) {
-        error_report("Error: Getting storage keys for page beyond the end "
-                     "of memory: gfn=%" PRIx64 " count=%" PRId64,
+    if (unlikely(!skeydev->keydata || start_gfn + count > skeydev->key_count ||
+                  start_gfn + count < count)) {
+        error_report("Error: Getting storage keys for pages with unallocated "
+                     "storage key memory: gfn=%" PRIx64 " count=%" PRId64,
                      start_gfn, count);
         return -EINVAL;
     }
@@ -236,7 +279,8 @@ static void qemu_s390_skeys_class_init(ObjectClass *oc, void *data)
     S390SKeysClass *skeyclass = S390_SKEYS_CLASS(oc);
     DeviceClass *dc = DEVICE_CLASS(oc);
 
-    skeyclass->skeys_enabled = qemu_s390_skeys_enabled;
+    skeyclass->skeys_are_enabled = qemu_s390_skeys_are_enabled;
+    skeyclass->enable_skeys = qemu_s390_enable_skeys;
     skeyclass->get_skeys = qemu_s390_skeys_get;
     skeyclass->set_skeys = qemu_s390_skeys_set;
 
@@ -247,7 +291,6 @@ static void qemu_s390_skeys_class_init(ObjectClass *oc, void *data)
 static const TypeInfo qemu_s390_skeys_info = {
     .name          = TYPE_QEMU_S390_SKEYS,
     .parent        = TYPE_S390_SKEYS,
-    .instance_init = qemu_s390_skeys_init,
     .instance_size = sizeof(QEMUS390SKeysState),
     .class_init    = qemu_s390_skeys_class_init,
     .class_size    = sizeof(S390SKeysClass),
@@ -257,14 +300,13 @@ static void s390_storage_keys_save(QEMUFile *f, void *opaque)
 {
     S390SKeysState *ss = S390_SKEYS(opaque);
     S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss);
-    MachineState *ms = MACHINE(qdev_get_machine());
-    uint64_t pages_left = ms->ram_size / TARGET_PAGE_SIZE;
-    uint64_t read_count, eos = S390_SKEYS_SAVE_FLAG_EOS;
-    vaddr cur_gfn = 0;
+    GuestPhysBlockList guest_phys_blocks;
+    GuestPhysBlock *block;
+    uint64_t pages, gfn;
     int error = 0;
     uint8_t *buf;
 
-    if (!skeyclass->skeys_enabled(ss)) {
+    if (!skeyclass->skeys_are_enabled(ss)) {
         goto end_stream;
     }
 
@@ -274,36 +316,52 @@ static void s390_storage_keys_save(QEMUFile *f, void *opaque)
         goto end_stream;
     }
 
-    /* We only support initial memory. Standby memory is not handled yet. */
-    qemu_put_be64(f, (cur_gfn * TARGET_PAGE_SIZE) | S390_SKEYS_SAVE_FLAG_SKEYS);
-    qemu_put_be64(f, pages_left);
-
-    while (pages_left) {
-        read_count = MIN(pages_left, S390_SKEYS_BUFFER_SIZE);
-
-        if (!error) {
-            error = skeyclass->get_skeys(ss, cur_gfn, read_count, buf);
-            if (error) {
-                /*
-                 * If error: we want to fill the stream with valid data instead
-                 * of stopping early so we pad the stream with 0x00 values and
-                 * use S390_SKEYS_SAVE_FLAG_ERROR to indicate failure to the
-                 * reading side.
-                 */
-                error_report("S390_GET_KEYS error %d", error);
-                memset(buf, 0, S390_SKEYS_BUFFER_SIZE);
-                eos = S390_SKEYS_SAVE_FLAG_ERROR;
+    guest_phys_blocks_init(&guest_phys_blocks);
+    guest_phys_blocks_append(&guest_phys_blocks);
+
+    /* Send each contiguous physical memory range separately. */
+    QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
+        assert(QEMU_IS_ALIGNED(block->target_start, TARGET_PAGE_SIZE));
+        assert(QEMU_IS_ALIGNED(block->target_end, TARGET_PAGE_SIZE));
+
+        gfn = block->target_start / TARGET_PAGE_SIZE;
+        pages = (block->target_end - block->target_start) / TARGET_PAGE_SIZE;
+        qemu_put_be64(f, block->target_start | S390_SKEYS_SAVE_FLAG_SKEYS);
+        qemu_put_be64(f, pages);
+
+        while (pages) {
+            const uint64_t cur_pages = MIN(pages, S390_SKEYS_BUFFER_SIZE);
+
+            if (!error) {
+                error = skeyclass->get_skeys(ss, gfn, cur_pages, buf);
+                if (error) {
+                    /*
+                     * Create a valid stream with all 0x00 and indicate
+                     * S390_SKEYS_SAVE_FLAG_ERROR to the destination.
+                     */
+                    error_report("S390_GET_KEYS error %d", error);
+                    memset(buf, 0, S390_SKEYS_BUFFER_SIZE);
+                }
             }
+
+            qemu_put_buffer(f, buf, cur_pages);
+            gfn += cur_pages;
+            pages -= cur_pages;
         }
 
-        qemu_put_buffer(f, buf, read_count);
-        cur_gfn += read_count;
-        pages_left -= read_count;
+        if (error) {
+            break;
+        }
     }
 
+    guest_phys_blocks_free(&guest_phys_blocks);
     g_free(buf);
 end_stream:
-    qemu_put_be64(f, eos);
+    if (error) {
+        qemu_put_be64(f, S390_SKEYS_SAVE_FLAG_ERROR);
+    } else {
+        qemu_put_be64(f, S390_SKEYS_SAVE_FLAG_EOS);
+    }
 }
 
 static int s390_storage_keys_load(QEMUFile *f, void *opaque, int version_id)
@@ -312,6 +370,14 @@ static int s390_storage_keys_load(QEMUFile *f, void *opaque, int version_id)
     S390SKeysClass *skeyclass = S390_SKEYS_GET_CLASS(ss);
     int ret = 0;
 
+    /*
+     * Make sure to lazy-enable if required to be done explicitly. No need to
+     * flush any TLB as the VM is not running yet.
+     */
+    if (skeyclass->enable_skeys) {
+        skeyclass->enable_skeys(ss);
+    }
+
     while (!ret) {
         ram_addr_t addr;
         int flags;
diff --git a/hw/s390x/s390-stattrib-kvm.c b/hw/s390x/s390-stattrib-kvm.c
index f89d8d9d169..24cd01382e2 100644
--- a/hw/s390x/s390-stattrib-kvm.c
+++ b/hw/s390x/s390-stattrib-kvm.c
@@ -16,8 +16,7 @@
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "exec/ram_addr.h"
-#include "cpu.h"
-#include "kvm_s390x.h"
+#include "kvm/kvm_s390x.h"
 
 Object *kvm_s390_stattrib_create(void)
 {
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 4441e1d331c..9eda1c3b2a2 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -11,7 +11,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
-#include "cpu.h"
 #include "migration/qemu-file.h"
 #include "migration/register.h"
 #include "hw/s390x/storage-attributes.h"
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2972b607f36..653587ea62f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -13,11 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "hw/boards.h"
-#include "exec/address-spaces.h"
 #include "exec/ram_addr.h"
-#include "hw/boards.h"
 #include "hw/s390x/s390-virtio-hcall.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/s390_flic.h"
@@ -795,14 +791,47 @@ bool css_migration_enabled(void)
     }                                                                         \
     type_init(ccw_machine_register_##suffix)
 
+static void ccw_machine_6_2_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_6_2_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(6_2, "6.2", true);
+
+static void ccw_machine_6_1_instance_options(MachineState *machine)
+{
+    ccw_machine_6_2_instance_options(machine);
+    s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA);
+    s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2);
+    s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH);
+    s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP);
+    s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI);
+}
+
+static void ccw_machine_6_1_class_options(MachineClass *mc)
+{
+    ccw_machine_6_2_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    mc->smp_props.prefer_sockets = true;
+}
+DEFINE_CCW_MACHINE(6_1, "6.1", false);
+
 static void ccw_machine_6_0_instance_options(MachineState *machine)
 {
+    static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 };
+
+    ccw_machine_6_1_instance_options(machine);
+    s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat);
 }
 
 static void ccw_machine_6_0_class_options(MachineClass *mc)
 {
+    ccw_machine_6_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
 }
-DEFINE_CCW_MACHINE(6_0, "6.0", true);
+DEFINE_CCW_MACHINE(6_0, "6.0", false);
 
 static void ccw_machine_5_2_instance_options(MachineState *machine)
 {
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 0cf22908267..89c30a8a91a 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -15,8 +15,6 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qapi/error.h"
-#include "cpu.h"
-#include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/event-facility.h"
@@ -53,7 +51,7 @@ static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len,
                                  uint32_t code)
 {
     uint64_t sccb_max_addr = sccb_addr + sccb_len - 1;
-    uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE;
+    uint64_t sccb_boundary = (sccb_addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 
     switch (code & SCLP_CMD_CODE_MASK) {
     case SCLP_CMDW_READ_SCP_INFO:
diff --git a/hw/s390x/sclpcpu.c b/hw/s390x/sclpcpu.c
index 62806d32737..f2b1a4b0371 100644
--- a/hw/s390x/sclpcpu.c
+++ b/hw/s390x/sclpcpu.c
@@ -17,7 +17,6 @@
 #include "hw/s390x/sclp.h"
 #include "qemu/module.h"
 #include "hw/s390x/event-facility.h"
-#include "cpu.h"
 #include "sysemu/cpus.h"
 
 typedef struct ConfigMgtData {
diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c
index 0b944774861..ec855811aeb 100644
--- a/hw/s390x/tod-kvm.c
+++ b/hw/s390x/tod-kvm.c
@@ -13,7 +13,7 @@
 #include "qemu/module.h"
 #include "sysemu/runstate.h"
 #include "hw/s390x/tod.h"
-#include "kvm_s390x.h"
+#include "kvm/kvm_s390x.h"
 
 static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp)
 {
diff --git a/hw/s390x/tod-qemu.c b/hw/s390x/tod-qemu.c
deleted file mode 100644
index e91b9590f58..00000000000
--- a/hw/s390x/tod-qemu.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * TOD (Time Of Day) clock - QEMU implementation
- *
- * Copyright 2018 Red Hat, Inc.
- * Author(s): David Hildenbrand <david@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qapi/error.h"
-#include "hw/s390x/tod.h"
-#include "qemu/timer.h"
-#include "qemu/cutils.h"
-#include "qemu/module.h"
-#include "cpu.h"
-#include "tcg_s390x.h"
-
-static void qemu_s390_tod_get(const S390TODState *td, S390TOD *tod,
-                              Error **errp)
-{
-    *tod = td->base;
-
-    tod->low += time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-    if (tod->low < td->base.low) {
-        tod->high++;
-    }
-}
-
-static void qemu_s390_tod_set(S390TODState *td, const S390TOD *tod,
-                              Error **errp)
-{
-    CPUState *cpu;
-
-    td->base = *tod;
-
-    td->base.low -= time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-    if (td->base.low > tod->low) {
-        td->base.high--;
-    }
-
-    /*
-     * The TOD has been changed and we have to recalculate the CKC values
-     * for all CPUs. We do this asynchronously, as "SET CLOCK should be
-     * issued only while all other activity on all CPUs .. has been
-     * suspended".
-     */
-    CPU_FOREACH(cpu) {
-        async_run_on_cpu(cpu, tcg_s390_tod_updated, RUN_ON_CPU_NULL);
-    }
-}
-
-static void qemu_s390_tod_class_init(ObjectClass *oc, void *data)
-{
-    S390TODClass *tdc = S390_TOD_CLASS(oc);
-
-    tdc->get = qemu_s390_tod_get;
-    tdc->set = qemu_s390_tod_set;
-}
-
-static void qemu_s390_tod_init(Object *obj)
-{
-    S390TODState *td = S390_TOD(obj);
-    struct tm tm;
-
-    qemu_get_timedate(&tm, 0);
-    td->base.high = 0;
-    td->base.low = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL);
-    if (td->base.low < TOD_UNIX_EPOCH) {
-        td->base.high += 1;
-    }
-}
-
-static TypeInfo qemu_s390_tod_info = {
-    .name = TYPE_QEMU_S390_TOD,
-    .parent = TYPE_S390_TOD,
-    .instance_size = sizeof(S390TODState),
-    .instance_init = qemu_s390_tod_init,
-    .class_init = qemu_s390_tod_class_init,
-    .class_size = sizeof(S390TODClass),
-};
-
-static void register_types(void)
-{
-    type_register_static(&qemu_s390_tod_info);
-}
-type_init(register_types);
diff --git a/hw/s390x/tod-tcg.c b/hw/s390x/tod-tcg.c
new file mode 100644
index 00000000000..9bb94ff72bc
--- /dev/null
+++ b/hw/s390x/tod-tcg.c
@@ -0,0 +1,89 @@
+/*
+ * TOD (Time Of Day) clock - TCG implementation
+ *
+ * Copyright 2018 Red Hat, Inc.
+ * Author(s): David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/s390x/tod.h"
+#include "qemu/timer.h"
+#include "qemu/cutils.h"
+#include "qemu/module.h"
+#include "cpu.h"
+#include "tcg/tcg_s390x.h"
+
+static void qemu_s390_tod_get(const S390TODState *td, S390TOD *tod,
+                              Error **errp)
+{
+    *tod = td->base;
+
+    tod->low += time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    if (tod->low < td->base.low) {
+        tod->high++;
+    }
+}
+
+static void qemu_s390_tod_set(S390TODState *td, const S390TOD *tod,
+                              Error **errp)
+{
+    CPUState *cpu;
+
+    td->base = *tod;
+
+    td->base.low -= time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    if (td->base.low > tod->low) {
+        td->base.high--;
+    }
+
+    /*
+     * The TOD has been changed and we have to recalculate the CKC values
+     * for all CPUs. We do this asynchronously, as "SET CLOCK should be
+     * issued only while all other activity on all CPUs .. has been
+     * suspended".
+     */
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, tcg_s390_tod_updated, RUN_ON_CPU_NULL);
+    }
+}
+
+static void qemu_s390_tod_class_init(ObjectClass *oc, void *data)
+{
+    S390TODClass *tdc = S390_TOD_CLASS(oc);
+
+    tdc->get = qemu_s390_tod_get;
+    tdc->set = qemu_s390_tod_set;
+}
+
+static void qemu_s390_tod_init(Object *obj)
+{
+    S390TODState *td = S390_TOD(obj);
+    struct tm tm;
+
+    qemu_get_timedate(&tm, 0);
+    td->base.high = 0;
+    td->base.low = TOD_UNIX_EPOCH + (time2tod(mktimegm(&tm)) * 1000000000ULL);
+    if (td->base.low < TOD_UNIX_EPOCH) {
+        td->base.high += 1;
+    }
+}
+
+static TypeInfo qemu_s390_tod_info = {
+    .name = TYPE_QEMU_S390_TOD,
+    .parent = TYPE_S390_TOD,
+    .instance_size = sizeof(S390TODState),
+    .instance_init = qemu_s390_tod_init,
+    .class_init = qemu_s390_tod_class_init,
+    .class_size = sizeof(S390TODClass),
+};
+
+static void register_types(void)
+{
+    type_register_static(&qemu_s390_tod_info);
+}
+type_init(register_types);
diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c
index 3c2979175ef..fd5a36bf24e 100644
--- a/hw/s390x/tod.c
+++ b/hw/s390x/tod.c
@@ -14,6 +14,8 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
+#include "sysemu/qtest.h"
 #include "migration/qemu-file-types.h"
 #include "migration/register.h"
 
@@ -23,8 +25,13 @@ void s390_init_tod(void)
 
     if (kvm_enabled()) {
         obj = object_new(TYPE_KVM_S390_TOD);
-    } else {
+    } else if (tcg_enabled()) {
         obj = object_new(TYPE_QEMU_S390_TOD);
+    } else if (qtest_enabled()) {
+        return;
+    } else {
+        error_report("current accelerator not handled in s390_init_tod!");
+        abort();
     }
     object_property_add_child(qdev_get_machine(), TYPE_S390_TOD, obj);
     object_unref(obj);
diff --git a/hw/s390x/trace-events b/hw/s390x/trace-events
index 8156693749d..8b9213eab90 100644
--- a/hw/s390x/trace-events
+++ b/hw/s390x/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # css.c
 css_enable_facility(const char *facility) "CSS: enable %s"
diff --git a/hw/s390x/virtio-ccw-gpu.c b/hw/s390x/virtio-ccw-gpu.c
index 75a9e4bb390..5868a2a0709 100644
--- a/hw/s390x/virtio-ccw-gpu.c
+++ b/hw/s390x/virtio-ccw-gpu.c
@@ -59,6 +59,7 @@ static const TypeInfo virtio_ccw_gpu = {
     .instance_init = virtio_ccw_gpu_instance_init,
     .class_init    = virtio_ccw_gpu_class_init,
 };
+module_obj(TYPE_VIRTIO_GPU_CCW);
 
 static void virtio_ccw_gpu_register(void)
 {
@@ -68,3 +69,5 @@ static void virtio_ccw_gpu_register(void)
 }
 
 type_init(virtio_ccw_gpu_register)
+
+module_arch("s390x");
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 8195f3546e4..c845a92c3a8 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -17,7 +17,6 @@
 #include "hw/virtio/virtio.h"
 #include "migration/qemu-file-types.h"
 #include "hw/virtio/virtio-net.h"
-#include "hw/sysbus.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
@@ -32,6 +31,7 @@
 #include "trace.h"
 #include "hw/s390x/css-bridge.h"
 #include "hw/s390x/s390-virtio-ccw.h"
+#include "sysemu/replay.h"
 
 #define NR_CLASSIC_INDICATOR_BITS 64
 
@@ -754,6 +754,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     sch->id.reserved = 0xff;
     sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
     sch->do_subchannel_work = do_subchannel_work_virtual;
+    sch->irb_cb = build_irb_virtual;
     ccw_dev->sch = sch;
     dev->indicators = NULL;
     dev->revision = -1;
@@ -770,6 +771,11 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
         dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
     }
 
+    /* fd-based ioevents can't be synchronized in record/replay */
+    if (replay_mode != REPLAY_MODE_NONE) {
+        dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
+    }
+
     if (k->realize) {
         k->realize(dev, &err);
         if (err) {
@@ -1235,7 +1241,6 @@ static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
     k->unplug = virtio_ccw_busdev_unplug;
     dc->realize = virtio_ccw_busdev_realize;
     dc->unrealize = virtio_ccw_busdev_unrealize;
-    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
     device_class_set_parent_reset(dc, virtio_ccw_reset, &vdc->parent_reset);
 }
 
@@ -1256,8 +1261,7 @@ static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size,
     DeviceState *qdev = DEVICE(dev);
     char virtio_bus_name[] = "virtio-bus";
 
-    qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_CCW_BUS,
-                        qdev, virtio_bus_name);
+    qbus_init(bus, bus_size, TYPE_VIRTIO_CCW_BUS, qdev, virtio_bus_name);
 }
 
 static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data)
diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c
index 9db10b1a487..dac054aeed4 100644
--- a/hw/scsi/esp-pci.c
+++ b/hw/scsi/esp-pci.c
@@ -388,7 +388,7 @@ static void esp_pci_scsi_realize(PCIDevice *dev, Error **errp)
     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->io);
     s->irq = pci_allocate_irq(dev);
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), d, &esp_pci_scsi_info);
 }
 
 static void esp_pci_scsi_exit(PCIDevice *d)
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index b668acef82d..58d0edbd56d 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -204,16 +204,11 @@ static int esp_select(ESPState *s)
     s->ti_size = 0;
     fifo8_reset(&s->fifo);
 
-    if (s->current_req) {
-        /* Started a new command before the old one finished.  Cancel it.  */
-        scsi_req_cancel(s->current_req);
-    }
-
     s->current_dev = scsi_device_find(&s->bus, 0, target, 0);
     if (!s->current_dev) {
         /* No such drive */
         s->rregs[ESP_RSTAT] = 0;
-        s->rregs[ESP_RINTR] |= INTR_DC;
+        s->rregs[ESP_RINTR] = INTR_DC;
         s->rregs[ESP_RSEQ] = SEQ_0;
         esp_raise_irq(s);
         return -1;
@@ -221,7 +216,7 @@ static int esp_select(ESPState *s)
 
     /*
      * Note that we deliberately don't raise the IRQ here: this will be done
-     * either in do_busid_cmd() for DATA OUT transfers or by the deferred
+     * either in do_command_phase() for DATA OUT transfers or by the deferred
      * IRQ mechanism in esp_transfer_data() for DATA IN transfers
      */
     s->rregs[ESP_RINTR] |= INTR_FC;
@@ -235,6 +230,11 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
     uint32_t dmalen, n;
     int target;
 
+    if (s->current_req) {
+        /* Started a new command before the old one finished.  Cancel it.  */
+        scsi_req_cancel(s->current_req);
+    }
+
     target = s->wregs[ESP_WBUSID] & BUSID_DID;
     if (s->dma) {
         dmalen = MIN(esp_get_tc(s), maxlen);
@@ -260,9 +260,6 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
             return 0;
         }
         n = esp_fifo_pop_buf(&s->fifo, buf, dmalen);
-        if (n >= 3) {
-            buf[0] = buf[2] >> 5;
-        }
         n = MIN(fifo8_num_free(&s->cmdfifo), n);
         fifo8_push_all(&s->cmdfifo, buf, n);
     }
@@ -275,24 +272,22 @@ static uint32_t get_cmd(ESPState *s, uint32_t maxlen)
     return dmalen;
 }
 
-static void do_busid_cmd(ESPState *s, uint8_t busid)
+static void do_command_phase(ESPState *s)
 {
     uint32_t cmdlen;
     int32_t datalen;
-    int lun;
     SCSIDevice *current_lun;
     uint8_t buf[ESP_CMDFIFO_SZ];
 
-    trace_esp_do_busid_cmd(busid);
-    lun = busid & 7;
+    trace_esp_do_command_phase(s->lun);
     cmdlen = fifo8_num_used(&s->cmdfifo);
     if (!cmdlen || !s->current_dev) {
         return;
     }
     esp_fifo_pop_buf(&s->cmdfifo, buf, cmdlen);
 
-    current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, lun);
-    s->current_req = scsi_req_new(current_lun, 0, lun, buf, s);
+    current_lun = scsi_device_find(&s->bus, 0, s->current_dev->id, s->lun);
+    s->current_req = scsi_req_new(current_lun, 0, s->lun, buf, s);
     datalen = scsi_req_enqueue(s->current_req);
     s->ti_size = datalen;
     fifo8_reset(&s->cmdfifo);
@@ -319,28 +314,36 @@ static void do_busid_cmd(ESPState *s, uint8_t busid)
     }
 }
 
-static void do_cmd(ESPState *s)
+static void do_message_phase(ESPState *s)
 {
-    uint8_t busid = esp_fifo_pop(&s->cmdfifo);
-    int len;
+    if (s->cmdfifo_cdb_offset) {
+        uint8_t message = esp_fifo_pop(&s->cmdfifo);
 
-    s->cmdfifo_cdb_offset--;
+        trace_esp_do_identify(message);
+        s->lun = message & 7;
+        s->cmdfifo_cdb_offset--;
+    }
 
     /* Ignore extended messages for now */
     if (s->cmdfifo_cdb_offset) {
-        len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo));
+        int len = MIN(s->cmdfifo_cdb_offset, fifo8_num_used(&s->cmdfifo));
         esp_fifo_pop_buf(&s->cmdfifo, NULL, len);
         s->cmdfifo_cdb_offset = 0;
     }
+}
 
-    do_busid_cmd(s, busid);
+static void do_cmd(ESPState *s)
+{
+    do_message_phase(s);
+    assert(s->cmdfifo_cdb_offset == 0);
+    do_command_phase(s);
 }
 
 static void satn_pdma_cb(ESPState *s)
 {
-    s->do_cmd = 0;
-    if (!fifo8_is_empty(&s->cmdfifo)) {
+    if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
         s->cmdfifo_cdb_offset = 1;
+        s->do_cmd = 0;
         do_cmd(s);
     }
 }
@@ -369,13 +372,10 @@ static void handle_satn(ESPState *s)
 
 static void s_without_satn_pdma_cb(ESPState *s)
 {
-    uint32_t len;
-
-    s->do_cmd = 0;
-    len = fifo8_num_used(&s->cmdfifo);
-    if (len) {
+    if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
         s->cmdfifo_cdb_offset = 0;
-        do_busid_cmd(s, 0);
+        s->do_cmd = 0;
+        do_cmd(s);
     }
 }
 
@@ -392,7 +392,7 @@ static void handle_s_without_atn(ESPState *s)
     if (cmdlen > 0) {
         s->cmdfifo_cdb_offset = 0;
         s->do_cmd = 0;
-        do_busid_cmd(s, 0);
+        do_cmd(s);
     } else if (cmdlen == 0) {
         s->do_cmd = 1;
         /* Target present, but no cmd yet - switch to command phase */
@@ -403,8 +403,7 @@ static void handle_s_without_atn(ESPState *s)
 
 static void satn_stop_pdma_cb(ESPState *s)
 {
-    s->do_cmd = 0;
-    if (!fifo8_is_empty(&s->cmdfifo)) {
+    if (!esp_get_tc(s) && !fifo8_is_empty(&s->cmdfifo)) {
         trace_esp_handle_satn_stop(fifo8_num_used(&s->cmdfifo));
         s->do_cmd = 1;
         s->cmdfifo_cdb_offset = 1;
@@ -481,7 +480,6 @@ static void esp_dma_done(ESPState *s)
 {
     s->rregs[ESP_RSTAT] |= STAT_TC;
     s->rregs[ESP_RINTR] |= INTR_BS;
-    s->rregs[ESP_RSEQ] = 0;
     s->rregs[ESP_RFLAGS] = 0;
     esp_set_tc(s, 0);
     esp_raise_irq(s);
@@ -494,10 +492,32 @@ static void do_dma_pdma_cb(ESPState *s)
     uint32_t n;
 
     if (s->do_cmd) {
+        /* Ensure we have received complete command after SATN and stop */
+        if (esp_get_tc(s) || fifo8_is_empty(&s->cmdfifo)) {
+            return;
+        }
+
         s->ti_size = 0;
-        s->do_cmd = 0;
-        do_cmd(s);
-        esp_lower_drq(s);
+        if ((s->rregs[ESP_RSTAT] & 7) == STAT_CD) {
+            /* No command received */
+            if (s->cmdfifo_cdb_offset == fifo8_num_used(&s->cmdfifo)) {
+                return;
+            }
+
+            /* Command has been received */
+            s->do_cmd = 0;
+            do_cmd(s);
+        } else {
+            /*
+             * Extra message out bytes received: update cmdfifo_cdb_offset
+             * and then switch to commmand phase
+             */
+            s->cmdfifo_cdb_offset = fifo8_num_used(&s->cmdfifo);
+            s->rregs[ESP_RSTAT] = STAT_TC | STAT_CD;
+            s->rregs[ESP_RSEQ] = SEQ_CD;
+            s->rregs[ESP_RINTR] |= INTR_BS;
+            esp_raise_irq(s);
+        }
         return;
     }
 
@@ -740,20 +760,17 @@ static void esp_do_nodma(ESPState *s)
         s->async_len -= len;
         s->ti_size += len;
     } else {
-        len = MIN(s->ti_size, s->async_len);
-        len = MIN(len, fifo8_num_free(&s->fifo));
-        fifo8_push_all(&s->fifo, s->async_buf, len);
-        s->async_buf += len;
-        s->async_len -= len;
-        s->ti_size -= len;
+        if (fifo8_is_empty(&s->fifo)) {
+            fifo8_push(&s->fifo, s->async_buf[0]);
+            s->async_buf++;
+            s->async_len--;
+            s->ti_size--;
+        }
     }
 
     if (s->async_len == 0) {
         scsi_req_continue(s->current_req);
-
-        if (to_device || s->ti_size == 0) {
-            return;
-        }
+        return;
     }
 
     s->rregs[ESP_RINTR] |= INTR_BS;
@@ -763,20 +780,37 @@ static void esp_do_nodma(ESPState *s)
 void esp_command_complete(SCSIRequest *req, size_t resid)
 {
     ESPState *s = req->hba_private;
+    int to_device = ((s->rregs[ESP_RSTAT] & 7) == STAT_DO);
 
     trace_esp_command_complete();
-    if (s->ti_size != 0) {
-        trace_esp_command_complete_unexpected();
+
+    /*
+     * Non-DMA transfers from the target will leave the last byte in
+     * the FIFO so don't reset ti_size in this case
+     */
+    if (s->dma || to_device) {
+        if (s->ti_size != 0) {
+            trace_esp_command_complete_unexpected();
+        }
+        s->ti_size = 0;
     }
-    s->ti_size = 0;
+
     s->async_len = 0;
     if (req->status) {
         trace_esp_command_complete_fail();
     }
     s->status = req->status;
-    s->rregs[ESP_RSTAT] = STAT_ST;
-    esp_dma_done(s);
-    esp_lower_drq(s);
+
+    /*
+     * If the transfer is finished, switch to status phase. For non-DMA
+     * transfers from the target the last byte is still in the FIFO
+     */
+    if (s->ti_size == 0) {
+        s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
+        esp_dma_done(s);
+        esp_lower_drq(s);
+    }
+
     if (s->current_req) {
         scsi_req_unref(s->current_req);
         s->current_req = NULL;
@@ -804,16 +838,6 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
         s->rregs[ESP_RSTAT] |= STAT_TC;
         s->rregs[ESP_RINTR] |= INTR_BS;
         esp_raise_irq(s);
-
-        /*
-         * If data is ready to transfer and the TI command has already
-         * been executed, start DMA immediately. Otherwise DMA will start
-         * when host sends the TI command
-         */
-        if (s->ti_size && (s->rregs[ESP_CMD] == (CMD_TI | CMD_DMA))) {
-            esp_do_dma(s);
-        }
-        return;
     }
 
     if (s->ti_cmd == 0) {
@@ -827,7 +851,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
         return;
     }
 
-    if (s->ti_cmd & CMD_DMA) {
+    if (s->ti_cmd == (CMD_TI | CMD_DMA)) {
         if (dmalen) {
             esp_do_dma(s);
         } else if (s->ti_size <= 0) {
@@ -838,7 +862,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
             esp_dma_done(s);
             esp_lower_drq(s);
         }
-    } else {
+    } else if (s->ti_cmd == CMD_TI) {
         esp_do_nodma(s);
     }
 }
@@ -870,6 +894,7 @@ void esp_hard_reset(ESPState *s)
     memset(s->wregs, 0, ESP_REGS);
     s->tchi_written = 0;
     s->ti_size = 0;
+    s->async_len = 0;
     fifo8_reset(&s->fifo);
     fifo8_reset(&s->cmdfifo);
     s->dma = 0;
@@ -905,6 +930,17 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
             qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n");
             s->rregs[ESP_FIFO] = 0;
         } else {
+            if ((s->rregs[ESP_RSTAT] & 0x7) == STAT_DI) {
+                if (s->ti_size) {
+                    esp_do_nodma(s);
+                } else {
+                    /*
+                     * The last byte of a non-DMA transfer has been read out
+                     * of the FIFO so switch to status phase
+                     */
+                    s->rregs[ESP_RSTAT] = STAT_TC | STAT_ST;
+                }
+            }
             s->rregs[ESP_FIFO] = esp_fifo_pop(&s->fifo);
         }
         val = s->rregs[ESP_FIFO];
@@ -917,7 +953,15 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
         val = s->rregs[ESP_RINTR];
         s->rregs[ESP_RINTR] = 0;
         s->rregs[ESP_RSTAT] &= ~STAT_TC;
-        s->rregs[ESP_RSEQ] = SEQ_0;
+        /*
+         * According to the datasheet ESP_RSEQ should be cleared, but as the
+         * emulation currently defers information transfers to the next TI
+         * command leave it for now so that pedantic guests such as the old
+         * Linux 2.6 driver see the correct flags before the next SCSI phase
+         * transition.
+         *
+         * s->rregs[ESP_RSEQ] = SEQ_0;
+         */
         esp_lower_irq(s);
         break;
     case ESP_TCHI:
@@ -955,15 +999,18 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
     case ESP_FIFO:
         if (s->do_cmd) {
             esp_fifo_push(&s->cmdfifo, val);
+
+            /*
+             * If any unexpected message out/command phase data is
+             * transferred using non-DMA, raise the interrupt
+             */
+            if (s->rregs[ESP_CMD] == CMD_TI) {
+                s->rregs[ESP_RINTR] |= INTR_BS;
+                esp_raise_irq(s);
+            }
         } else {
             esp_fifo_push(&s->fifo, val);
         }
-
-        /* Non-DMA transfers raise an interrupt after every byte */
-        if (s->rregs[ESP_CMD] == CMD_TI) {
-            s->rregs[ESP_RINTR] |= INTR_FC | INTR_BS;
-            esp_raise_irq(s);
-        }
         break;
     case ESP_CMD:
         s->rregs[saddr] = val;
@@ -1088,7 +1135,15 @@ static bool esp_is_version_5(void *opaque, int version_id)
     ESPState *s = ESP(opaque);
 
     version_id = MIN(version_id, s->mig_version_id);
-    return version_id == 5;
+    return version_id >= 5;
+}
+
+static bool esp_is_version_6(void *opaque, int version_id)
+{
+    ESPState *s = ESP(opaque);
+
+    version_id = MIN(version_id, s->mig_version_id);
+    return version_id >= 6;
 }
 
 int esp_pre_save(void *opaque)
@@ -1128,7 +1183,7 @@ static int esp_post_load(void *opaque, int version_id)
 
 const VMStateDescription vmstate_esp = {
     .name = "esp",
-    .version_id = 5,
+    .version_id = 6,
     .minimum_version_id = 3,
     .post_load = esp_post_load,
     .fields = (VMStateField[]) {
@@ -1157,6 +1212,7 @@ const VMStateDescription vmstate_esp = {
         VMSTATE_FIFO8_TEST(fifo, ESPState, esp_is_version_5),
         VMSTATE_FIFO8_TEST(cmdfifo, ESPState, esp_is_version_5),
         VMSTATE_UINT8_TEST(ti_cmd, ESPState, esp_is_version_5),
+        VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -1195,7 +1251,6 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr,
 {
     SysBusESPState *sysbus = opaque;
     ESPState *s = ESP(&sysbus->esp);
-    uint32_t dmalen;
 
     trace_esp_pdma_write(size);
 
@@ -1208,10 +1263,7 @@ static void sysbus_esp_pdma_write(void *opaque, hwaddr addr,
         esp_pdma_write(s, val);
         break;
     }
-    dmalen = esp_get_tc(s);
-    if (dmalen == 0 || fifo8_num_free(&s->fifo) < 2) {
-        s->pdma_cb(s);
-    }
+    s->pdma_cb(s);
 }
 
 static uint64_t sysbus_esp_pdma_read(void *opaque, hwaddr addr,
@@ -1297,7 +1349,7 @@ static void sysbus_esp_realize(DeviceState *dev, Error **errp)
 
     qdev_init_gpio_in(dev, sysbus_esp_gpio_demux, 2);
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), dev, &esp_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), dev, &esp_scsi_info);
 }
 
 static void sysbus_esp_hard_reset(DeviceState *dev)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index e2c19180a0d..85e907a7854 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -2309,7 +2309,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
     pci_register_bar(dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io);
     QTAILQ_INIT(&s->queue);
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), d, &lsi_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), d, &lsi_scsi_info);
 }
 
 static void lsi_scsi_exit(PCIDevice *dev)
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 8f2389d2c6a..4ff51221d4c 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2416,8 +2416,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
         s->frames[i].state = s;
     }
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
-                 &megasas_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &megasas_scsi_info);
 }
 
 static Property megasas_properties_gen1[] = {
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index db3219e7d20..f6c77655443 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1315,7 +1315,7 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
 
     s->request_bh = qemu_bh_new(mptsas_fetch_requests, s);
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info);
 }
 
 static void mptsas_scsi_uninit(PCIDevice *dev)
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 2a0a98cac91..77325d8cc7a 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -134,10 +134,10 @@ void scsi_device_unit_attention_reported(SCSIDevice *s)
 }
 
 /* Create a scsi bus, and attach devices to it.  */
-void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host,
-                  const SCSIBusInfo *info, const char *bus_name)
+void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
+                         const SCSIBusInfo *info, const char *bus_name)
 {
-    qbus_create_inplace(bus, bus_size, TYPE_SCSI_BUS, host, bus_name);
+    qbus_init(bus, bus_size, TYPE_SCSI_BUS, host, bus_name);
     bus->busnr = next_scsi_bus++;
     bus->info = info;
     qbus_set_bus_hotplug_handler(BUS(bus));
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 3580e7ee619..d4914178ea0 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1087,6 +1087,7 @@ static int mode_sense_page(SCSIDiskState *s, int page, uint8_t **p_outbuf,
     uint8_t *p = *p_outbuf + 2;
     int length;
 
+    assert(page < ARRAY_SIZE(mode_sense_valid));
     if ((mode_sense_valid[page] & (1 << s->qdev.type)) == 0) {
         return -1;
     }
@@ -1428,6 +1429,11 @@ static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
         return -1;
     }
 
+    /* MODE_PAGE_ALLS is only valid for MODE SENSE commands */
+    if (page == MODE_PAGE_ALLS) {
+        return -1;
+    }
+
     p = mode_current;
     memset(mode_current, 0, inlen + 2);
     len = mode_sense_page(s, page, &p, 0);
@@ -1582,6 +1588,7 @@ static void scsi_disk_emulate_mode_select(SCSIDiskReq *r, uint8_t *inbuf)
     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
 }
 
+/* sector_num and nb_sectors expected to be in qdev blocksize */
 static inline bool check_lba_range(SCSIDiskState *s,
                                    uint64_t sector_num, uint32_t nb_sectors)
 {
@@ -1614,11 +1621,12 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
     assert(r->req.aiocb == NULL);
 
     if (data->count > 0) {
-        r->sector = ldq_be_p(&data->inbuf[0])
-            * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
-        r->sector_count = (ldl_be_p(&data->inbuf[8]) & 0xffffffffULL)
-            * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
-        if (!check_lba_range(s, r->sector, r->sector_count)) {
+        uint64_t sector_num = ldq_be_p(&data->inbuf[0]);
+        uint32_t nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
+        r->sector = sector_num * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
+        r->sector_count = nb_sectors * (s->qdev.blocksize / BDRV_SECTOR_SIZE);
+
+        if (!check_lba_range(s, sector_num, nb_sectors)) {
             block_acct_invalid(blk_get_stats(s->qdev.conf.blk),
                                BLOCK_ACCT_UNMAP);
             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 98c30c5d5c3..0306ccc7b1e 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -147,7 +147,7 @@ static int execute_command(BlockBackend *blk,
     return 0;
 }
 
-static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
+static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
 {
     uint8_t page, page_idx;
 
@@ -179,10 +179,12 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
         (r->req.cmd.buf[1] & 0x01)) {
         page = r->req.cmd.buf[2];
         if (page == 0xb0) {
-            uint32_t max_transfer =
-                blk_get_max_transfer(s->conf.blk) / s->blocksize;
+            uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
+            uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
 
             assert(max_transfer);
+            max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size)
+                / s->blocksize;
             stl_be_p(&r->buf[8], max_transfer);
             /* Also take care of the opt xfer len. */
             stl_be_p(&r->buf[12],
@@ -213,8 +215,13 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
                 r->buf[page_idx] = 0xb0;
             }
             stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1);
+
+            if (len < r->buflen) {
+                len++;
+            }
         }
     }
+    return len;
 }
 
 static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
@@ -332,7 +339,7 @@ static void scsi_read_complete(void * opaque, int ret)
         }
     }
     if (r->req.cmd.buf[0] == INQUIRY) {
-        scsi_handle_inquiry_reply(r, s);
+        len = scsi_handle_inquiry_reply(r, s, len);
     }
 
 req_complete:
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index ca5c13c4a8e..a07a8e1523f 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -34,7 +34,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/module.h"
-#include "cpu.h"
 #include "hw/scsi/scsi.h"
 #include "migration/vmstate.h"
 #include "scsi/constants.h"
@@ -1217,8 +1216,7 @@ static void spapr_vscsi_realize(SpaprVioDevice *dev, Error **errp)
 
     dev->crq.SendFunc = vscsi_do_crq;
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
-                 &vscsi_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &vscsi_scsi_info);
 
     /* ibmvscsi SCSI bus does not allow hotplug. */
     qbus_set_hotplug_handler(BUS(&s->bus), NULL);
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index 1c331fb1896..92d5b40f892 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # scsi-bus.c
 scsi_req_alloc(int target, int lun, int tag) "target %d lun %d tag %d"
@@ -166,7 +166,8 @@ esp_dma_disable(void) "Lower enable"
 esp_pdma_read(int size) "pDMA read %u bytes"
 esp_pdma_write(int size) "pDMA write %u bytes"
 esp_get_cmd(uint32_t dmalen, int target) "len %d target %d"
-esp_do_busid_cmd(uint8_t busid) "busid 0x%x"
+esp_do_command_phase(uint8_t busid) "busid 0x%x"
+esp_do_identify(uint8_t byte) "0x%x"
 esp_handle_satn_stop(uint32_t cmdlen) "cmdlen %d"
 esp_write_response(uint32_t status) "Transfer status (status=%d)"
 esp_do_dma(uint32_t cmdlen, uint32_t len) "command len %d + %d"
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 4d70fa036bb..039caf2614e 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -208,7 +208,6 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
                 "target SCSI device state or use shared storage over network), "
                 "set 'migratable' property to true to enable migration.");
         if (migrate_add_blocker(vsc->migration_blocker, errp) < 0) {
-            error_free(vsc->migration_blocker);
             goto free_virtio;
         }
     }
@@ -219,10 +218,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
     vsc->dev.backend_features = 0;
 
     ret = vhost_dev_init(&vsc->dev, (void *)(uintptr_t)vhostfd,
-                         VHOST_BACKEND_TYPE_KERNEL, 0);
+                         VHOST_BACKEND_TYPE_KERNEL, 0, errp);
     if (ret < 0) {
-        error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
-                   strerror(-ret));
         goto free_vqs;
     }
 
@@ -235,11 +232,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
     return;
 
  free_vqs:
+    g_free(vsc->dev.vqs);
     if (!vsc->migratable) {
         migrate_del_blocker(vsc->migration_blocker);
     }
-    g_free(vsc->dev.vqs);
  free_virtio:
+    error_free(vsc->migration_blocker);
     virtio_scsi_common_unrealize(dev);
  close_fd:
     close(vhostfd);
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 46660194428..1b2f7eed988 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -122,10 +122,8 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
     vqs = vsc->dev.vqs;
 
     ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
-                         VHOST_BACKEND_TYPE_USER, 0);
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
     if (ret < 0) {
-        error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s",
-                   strerror(-ret));
         goto free_vhost;
     }
 
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 4ad87934064..18eb824c97f 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -94,8 +94,7 @@ static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
     return progress;
 }
 
-static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
-                                  VirtIOHandleAIOOutput fn)
+static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
     int rc;
@@ -109,7 +108,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
         return rc;
     }
 
-    virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, fn);
     return 0;
 }
 
@@ -154,40 +152,63 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
         goto fail_guest_notifiers;
     }
 
-    aio_context_acquire(s->ctx);
-    rc = virtio_scsi_vring_init(s, vs->ctrl_vq, 0,
-                                virtio_scsi_data_plane_handle_ctrl);
-    if (rc) {
-        goto fail_vrings;
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+
+    rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0);
+    if (rc != 0) {
+        goto fail_host_notifiers;
     }
 
     vq_init_count++;
-    rc = virtio_scsi_vring_init(s, vs->event_vq, 1,
-                                virtio_scsi_data_plane_handle_event);
-    if (rc) {
-        goto fail_vrings;
+    rc = virtio_scsi_set_host_notifier(s, vs->event_vq, 1);
+    if (rc != 0) {
+        goto fail_host_notifiers;
     }
 
     vq_init_count++;
+
     for (i = 0; i < vs->conf.num_queues; i++) {
-        rc = virtio_scsi_vring_init(s, vs->cmd_vqs[i], i + 2,
-                                    virtio_scsi_data_plane_handle_cmd);
+        rc = virtio_scsi_set_host_notifier(s, vs->cmd_vqs[i], i + 2);
         if (rc) {
-            goto fail_vrings;
+            goto fail_host_notifiers;
         }
         vq_init_count++;
     }
 
+    memory_region_transaction_commit();
+
+    aio_context_acquire(s->ctx);
+    virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx,
+                                            virtio_scsi_data_plane_handle_ctrl);
+    virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx,
+                                           virtio_scsi_data_plane_handle_event);
+
+    for (i = 0; i < vs->conf.num_queues; i++) {
+        virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx,
+                                             virtio_scsi_data_plane_handle_cmd);
+    }
+
     s->dataplane_starting = false;
     s->dataplane_started = true;
     aio_context_release(s->ctx);
     return 0;
 
-fail_vrings:
-    aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
-    aio_context_release(s->ctx);
+fail_host_notifiers:
     for (i = 0; i < vq_init_count; i++) {
         virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+    }
+
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    for (i = 0; i < vq_init_count; i++) {
         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
     }
     k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false);
@@ -225,8 +246,23 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
 
     blk_drain_all(); /* ensure there are no in-flight requests */
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
+    memory_region_transaction_begin();
+
     for (i = 0; i < vs->conf.num_queues + 2; i++) {
         virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+    }
+
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
+    memory_region_transaction_commit();
+
+    for (i = 0; i < vs->conf.num_queues + 2; i++) {
         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
     }
 
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 6d807302870..51fd09522ac 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1019,8 +1019,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), dev,
-                 &virtio_scsi_scsi_info, vdev->bus_name);
+    scsi_bus_init_named(&s->bus, sizeof(s->bus), dev,
+                       &virtio_scsi_scsi_info, vdev->bus_name);
     /* override default SCSI bus hotplug-handler, with virtio-scsi's one */
     qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(dev));
 
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 1f30cb020a1..cd76bd67ab7 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1180,8 +1180,7 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
 
     s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s);
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(pci_dev),
-                 &pvscsi_scsi_info, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info);
     /* override default SCSI bus hotplug-handler, with pvscsi's one */
     qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(s));
     pvscsi_reset_state(s);
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index bea6d97ef87..9166d6638de 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -738,8 +738,8 @@ static void allwinner_sdhost_init(Object *obj)
 {
     AwSdHostState *s = AW_SDHOST(obj);
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                         TYPE_AW_SDHOST_BUS, DEVICE(s), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus),
+              TYPE_AW_SDHOST_BUS, DEVICE(s), "sd-bus");
 
     memory_region_init_io(&s->iomem, obj, &allwinner_sdhost_ops, s,
                            TYPE_AW_SDHOST, 4 * KiB);
diff --git a/hw/sd/aspeed_sdhci.c b/hw/sd/aspeed_sdhci.c
index 3299844de6d..df1bdf1fa4e 100644
--- a/hw/sd/aspeed_sdhci.c
+++ b/hw/sd/aspeed_sdhci.c
@@ -14,6 +14,7 @@
 #include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "hw/qdev-properties.h"
+#include "trace.h"
 
 #define ASPEED_SDHCI_INFO            0x00
 #define  ASPEED_SDHCI_INFO_SLOT1     (1 << 17)
@@ -60,6 +61,8 @@ static uint64_t aspeed_sdhci_read(void *opaque, hwaddr addr, unsigned int size)
         }
     }
 
+    trace_aspeed_sdhci_read(addr, size, (uint64_t) val);
+
     return (uint64_t)val;
 }
 
@@ -68,6 +71,8 @@ static void aspeed_sdhci_write(void *opaque, hwaddr addr, uint64_t val,
 {
     AspeedSDHCIState *sdhci = opaque;
 
+    trace_aspeed_sdhci_write(addr, size, val);
+
     switch (addr) {
     case ASPEED_SDHCI_INFO:
         /* The RESET bit automatically clears. */
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
index 50f5fdb88bc..088a7ac6ed4 100644
--- a/hw/sd/bcm2835_sdhost.c
+++ b/hw/sd/bcm2835_sdhost.c
@@ -403,8 +403,8 @@ static void bcm2835_sdhost_init(Object *obj)
 {
     BCM2835SDHostState *s = BCM2835_SDHOST(obj);
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                        TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus),
+              TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus");
 
     memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s,
                           TYPE_BCM2835_SDHOST, 0x1000);
diff --git a/hw/sd/cadence_sdhci.c b/hw/sd/cadence_sdhci.c
index 0b371c843d8..56b8bae1c3f 100644
--- a/hw/sd/cadence_sdhci.c
+++ b/hw/sd/cadence_sdhci.c
@@ -23,10 +23,8 @@
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
-#include "qemu/log.h"
 #include "qapi/error.h"
 #include "migration/vmstate.h"
-#include "hw/irq.h"
 #include "hw/sd/cadence_sdhci.h"
 #include "sdhci-internal.h"
 
diff --git a/hw/sd/meson.build b/hw/sd/meson.build
index 9c29691e13e..807ca07b7cc 100644
--- a/hw/sd/meson.build
+++ b/hw/sd/meson.build
@@ -4,10 +4,10 @@ softmmu_ss.add(when: 'CONFIG_SDHCI', if_true: files('sdhci.c'))
 softmmu_ss.add(when: 'CONFIG_SDHCI_PCI', if_true: files('sdhci-pci.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_SD', if_true: files('ssi-sd.c'))
 
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-memcard.c'))
 softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_mmc.c'))
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_mmci.c'))
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_sdhost.c'))
 softmmu_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_sdhci.c'))
 softmmu_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-sdhost.c'))
+softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_sdhci.c'))
 softmmu_ss.add(when: 'CONFIG_CADENCE_SDHCI', if_true: files('cadence_sdhci.c'))
diff --git a/hw/sd/milkymist-memcard.c b/hw/sd/milkymist-memcard.c
deleted file mode 100644
index a1235aa46c1..00000000000
--- a/hw/sd/milkymist-memcard.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- *  QEMU model of the Milkymist SD Card Controller.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/memcard.pdf
- */
-
-#include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "qemu/module.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qapi/error.h"
-#include "sysemu/block-backend.h"
-#include "sysemu/blockdev.h"
-#include "hw/qdev-properties.h"
-#include "hw/sd/sd.h"
-#include "qom/object.h"
-
-enum {
-    ENABLE_CMD_TX   = (1<<0),
-    ENABLE_CMD_RX   = (1<<1),
-    ENABLE_DAT_TX   = (1<<2),
-    ENABLE_DAT_RX   = (1<<3),
-};
-
-enum {
-    PENDING_CMD_TX   = (1<<0),
-    PENDING_CMD_RX   = (1<<1),
-    PENDING_DAT_TX   = (1<<2),
-    PENDING_DAT_RX   = (1<<3),
-};
-
-enum {
-    START_CMD_TX    = (1<<0),
-    START_DAT_RX    = (1<<1),
-};
-
-enum {
-    R_CLK2XDIV = 0,
-    R_ENABLE,
-    R_PENDING,
-    R_START,
-    R_CMD,
-    R_DAT,
-    R_MAX
-};
-
-#define TYPE_MILKYMIST_MEMCARD "milkymist-memcard"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistMemcardState, MILKYMIST_MEMCARD)
-
-#define TYPE_MILKYMIST_SDBUS "milkymist-sdbus"
-
-struct MilkymistMemcardState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-    SDBus sdbus;
-
-    int command_write_ptr;
-    int response_read_ptr;
-    int response_len;
-    int ignore_next_cmd;
-    int enabled;
-    uint8_t command[6];
-    uint8_t response[17];
-    uint32_t regs[R_MAX];
-};
-
-static void update_pending_bits(MilkymistMemcardState *s)
-{
-    /* transmits are instantaneous, thus tx pending bits are never set */
-    s->regs[R_PENDING] = 0;
-    /* if rx is enabled the corresponding pending bits are always set */
-    if (s->regs[R_ENABLE] & ENABLE_CMD_RX) {
-        s->regs[R_PENDING] |= PENDING_CMD_RX;
-    }
-    if (s->regs[R_ENABLE] & ENABLE_DAT_RX) {
-        s->regs[R_PENDING] |= PENDING_DAT_RX;
-    }
-}
-
-static void memcard_sd_command(MilkymistMemcardState *s)
-{
-    SDRequest req;
-
-    req.cmd = s->command[0] & 0x3f;
-    req.arg = ldl_be_p(s->command + 1);
-    req.crc = s->command[5];
-
-    s->response[0] = req.cmd;
-    s->response_len = sdbus_do_command(&s->sdbus, &req, s->response + 1);
-    s->response_read_ptr = 0;
-
-    if (s->response_len == 16) {
-        /* R2 response */
-        s->response[0] = 0x3f;
-        s->response_len += 1;
-    } else if (s->response_len == 4) {
-        /* no crc calculation, insert dummy byte */
-        s->response[5] = 0;
-        s->response_len += 2;
-    }
-
-    if (req.cmd == 0) {
-        /* next write is a dummy byte to clock the initialization of the sd
-         * card */
-        s->ignore_next_cmd = 1;
-    }
-}
-
-static uint64_t memcard_read(void *opaque, hwaddr addr,
-                             unsigned size)
-{
-    MilkymistMemcardState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_CMD:
-        if (!s->enabled) {
-            r = 0xff;
-        } else {
-            r = s->response[s->response_read_ptr++];
-            if (s->response_read_ptr > s->response_len) {
-                qemu_log_mask(LOG_GUEST_ERROR, "milkymist_memcard: "
-                              "read more cmd bytes than available: clipping\n");
-                s->response_read_ptr = 0;
-            }
-        }
-        break;
-    case R_DAT:
-        if (!s->enabled) {
-            r = 0xffffffff;
-        } else {
-            sdbus_read_data(&s->sdbus, &r, sizeof(r));
-            be32_to_cpus(&r);
-        }
-        break;
-    case R_CLK2XDIV:
-    case R_ENABLE:
-    case R_PENDING:
-    case R_START:
-        r = s->regs[addr];
-        break;
-
-    default:
-        qemu_log_mask(LOG_UNIMP, "milkymist_memcard: "
-                      "read access to unknown register 0x%" HWADDR_PRIx "\n",
-                      addr << 2);
-        break;
-    }
-
-    trace_milkymist_memcard_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void memcard_write(void *opaque, hwaddr addr, uint64_t value,
-                          unsigned size)
-{
-    MilkymistMemcardState *s = opaque;
-    uint32_t val32;
-
-    trace_milkymist_memcard_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_PENDING:
-        /* clear rx pending bits */
-        s->regs[R_PENDING] &= ~(value & (PENDING_CMD_RX | PENDING_DAT_RX));
-        update_pending_bits(s);
-        break;
-    case R_CMD:
-        if (!s->enabled) {
-            break;
-        }
-        if (s->ignore_next_cmd) {
-            s->ignore_next_cmd = 0;
-            break;
-        }
-        s->command[s->command_write_ptr] = value & 0xff;
-        s->command_write_ptr = (s->command_write_ptr + 1) % 6;
-        if (s->command_write_ptr == 0) {
-            memcard_sd_command(s);
-        }
-        break;
-    case R_DAT:
-        if (!s->enabled) {
-            break;
-        }
-        val32 = cpu_to_be32(value);
-        sdbus_write_data(&s->sdbus, &val32, sizeof(val32));
-        break;
-    case R_ENABLE:
-        s->regs[addr] = value;
-        update_pending_bits(s);
-        break;
-    case R_CLK2XDIV:
-    case R_START:
-        s->regs[addr] = value;
-        break;
-
-    default:
-        qemu_log_mask(LOG_UNIMP, "milkymist_memcard: "
-                      "write access to unknown register 0x%" HWADDR_PRIx " "
-                      "(value 0x%" PRIx64 ")\n", addr << 2, value);
-        break;
-    }
-}
-
-static const MemoryRegionOps memcard_mmio_ops = {
-    .read = memcard_read,
-    .write = memcard_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void milkymist_memcard_reset(DeviceState *d)
-{
-    MilkymistMemcardState *s = MILKYMIST_MEMCARD(d);
-    int i;
-
-    s->command_write_ptr = 0;
-    s->response_read_ptr = 0;
-    s->response_len = 0;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-}
-
-static void milkymist_memcard_set_readonly(DeviceState *dev, bool level)
-{
-    qemu_log_mask(LOG_UNIMP,
-                  "milkymist_memcard: read-only mode not supported\n");
-}
-
-static void milkymist_memcard_set_inserted(DeviceState *dev, bool level)
-{
-    MilkymistMemcardState *s = MILKYMIST_MEMCARD(dev);
-
-    s->enabled = !!level;
-}
-
-static void milkymist_memcard_init(Object *obj)
-{
-    MilkymistMemcardState *s = MILKYMIST_MEMCARD(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    memory_region_init_io(&s->regs_region, OBJECT(s), &memcard_mmio_ops, s,
-            "milkymist-memcard", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->regs_region);
-
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS,
-                        DEVICE(obj), "sd-bus");
-}
-
-static const VMStateDescription vmstate_milkymist_memcard = {
-    .name = "milkymist-memcard",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT32(command_write_ptr, MilkymistMemcardState),
-        VMSTATE_INT32(response_read_ptr, MilkymistMemcardState),
-        VMSTATE_INT32(response_len, MilkymistMemcardState),
-        VMSTATE_INT32(ignore_next_cmd, MilkymistMemcardState),
-        VMSTATE_INT32(enabled, MilkymistMemcardState),
-        VMSTATE_UINT8_ARRAY(command, MilkymistMemcardState, 6),
-        VMSTATE_UINT8_ARRAY(response, MilkymistMemcardState, 17),
-        VMSTATE_UINT32_ARRAY(regs, MilkymistMemcardState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static void milkymist_memcard_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->reset = milkymist_memcard_reset;
-    dc->vmsd = &vmstate_milkymist_memcard;
-    /* Reason: output IRQs should be wired up */
-    dc->user_creatable = false;
-}
-
-static const TypeInfo milkymist_memcard_info = {
-    .name          = TYPE_MILKYMIST_MEMCARD,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistMemcardState),
-    .instance_init = milkymist_memcard_init,
-    .class_init    = milkymist_memcard_class_init,
-};
-
-static void milkymist_sdbus_class_init(ObjectClass *klass, void *data)
-{
-    SDBusClass *sbc = SD_BUS_CLASS(klass);
-
-    sbc->set_inserted = milkymist_memcard_set_inserted;
-    sbc->set_readonly = milkymist_memcard_set_readonly;
-}
-
-static const TypeInfo milkymist_sdbus_info = {
-    .name = TYPE_MILKYMIST_SDBUS,
-    .parent = TYPE_SD_BUS,
-    .instance_size = sizeof(SDBus),
-    .class_init = milkymist_sdbus_class_init,
-};
-
-static void milkymist_memcard_register_types(void)
-{
-    type_register_static(&milkymist_memcard_info);
-    type_register_static(&milkymist_sdbus_info);
-}
-
-type_init(milkymist_memcard_register_types)
diff --git a/hw/sd/npcm7xx_sdhci.c b/hw/sd/npcm7xx_sdhci.c
new file mode 100644
index 00000000000..ef503365dfb
--- /dev/null
+++ b/hw/sd/npcm7xx_sdhci.c
@@ -0,0 +1,182 @@
+/*
+ * NPCM7xx SD-3.0 / eMMC-4.51 Host Controller
+ *
+ * Copyright (c) 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/sd/npcm7xx_sdhci.h"
+#include "migration/vmstate.h"
+#include "sdhci-internal.h"
+#include "qemu/log.h"
+
+static uint64_t npcm7xx_sdhci_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NPCM7xxSDHCIState *s = opaque;
+    uint64_t val = 0;
+
+    switch (addr) {
+    case NPCM7XX_PRSTVALS_0:
+    case NPCM7XX_PRSTVALS_1:
+    case NPCM7XX_PRSTVALS_2:
+    case NPCM7XX_PRSTVALS_3:
+    case NPCM7XX_PRSTVALS_4:
+    case NPCM7XX_PRSTVALS_5:
+        val = s->regs.prstvals[(addr - NPCM7XX_PRSTVALS_0) / 2];
+        break;
+    case NPCM7XX_BOOTTOCTRL:
+        val = s->regs.boottoctrl;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "SDHCI read of nonexistent reg: 0x%02"
+                      HWADDR_PRIx, addr);
+        break;
+    }
+
+    return val;
+}
+
+static void npcm7xx_sdhci_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned int size)
+{
+    NPCM7xxSDHCIState *s = opaque;
+
+    switch (addr) {
+    case NPCM7XX_BOOTTOCTRL:
+        s->regs.boottoctrl = val;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "SDHCI write of nonexistent reg: 0x%02"
+                      HWADDR_PRIx, addr);
+        break;
+    }
+}
+
+static bool npcm7xx_sdhci_check_mem_op(void *opaque, hwaddr addr,
+                                       unsigned size, bool is_write,
+                                       MemTxAttrs attrs)
+{
+    switch (addr) {
+    case NPCM7XX_PRSTVALS_0:
+    case NPCM7XX_PRSTVALS_1:
+    case NPCM7XX_PRSTVALS_2:
+    case NPCM7XX_PRSTVALS_3:
+    case NPCM7XX_PRSTVALS_4:
+    case NPCM7XX_PRSTVALS_5:
+        /* RO Word */
+        return !is_write && size == 2;
+    case NPCM7XX_BOOTTOCTRL:
+        /* R/W Dword */
+        return size == 4;
+    default:
+        return false;
+    }
+}
+
+static const MemoryRegionOps npcm7xx_sdhci_ops = {
+    .read = npcm7xx_sdhci_read,
+    .write = npcm7xx_sdhci_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+        .unaligned = false,
+        .accepts = npcm7xx_sdhci_check_mem_op,
+    },
+};
+
+static void npcm7xx_sdhci_realize(DeviceState *dev, Error **errp)
+{
+    NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    SysBusDevice *sbd_sdhci = SYS_BUS_DEVICE(&s->sdhci);
+
+    memory_region_init(&s->container, OBJECT(s),
+                       "npcm7xx.sdhci-container", 0x1000);
+    sysbus_init_mmio(sbd, &s->container);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &npcm7xx_sdhci_ops, s,
+                          TYPE_NPCM7XX_SDHCI, NPCM7XX_SDHCI_REGSIZE);
+    memory_region_add_subregion_overlap(&s->container, NPCM7XX_PRSTVALS,
+                                        &s->iomem, 1);
+
+    sysbus_realize(sbd_sdhci, errp);
+    memory_region_add_subregion(&s->container, 0,
+                                sysbus_mmio_get_region(sbd_sdhci, 0));
+
+    /* propagate irq and "sd-bus" from generic-sdhci */
+    sysbus_pass_irq(sbd, sbd_sdhci);
+    s->bus = qdev_get_child_bus(DEVICE(sbd_sdhci), "sd-bus");
+
+    /* Set the read only preset values. */
+    memset(s->regs.prstvals, 0, sizeof(s->regs.prstvals));
+    s->regs.prstvals[0] = NPCM7XX_PRSTVALS_0_RESET;
+    s->regs.prstvals[1] = NPCM7XX_PRSTVALS_1_RESET;
+    s->regs.prstvals[3] = NPCM7XX_PRSTVALS_3_RESET;
+}
+
+static void npcm7xx_sdhci_reset(DeviceState *dev)
+{
+    NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(dev);
+    device_cold_reset(DEVICE(&s->sdhci));
+    s->regs.boottoctrl = 0;
+
+    s->sdhci.prnsts = NPCM7XX_PRSNTS_RESET;
+    s->sdhci.blkgap = NPCM7XX_BLKGAP_RESET;
+    s->sdhci.capareg = NPCM7XX_CAPAB_RESET;
+    s->sdhci.maxcurr = NPCM7XX_MAXCURR_RESET;
+    s->sdhci.version = NPCM7XX_HCVER_RESET;
+}
+
+static const VMStateDescription vmstate_npcm7xx_sdhci = {
+    .name = TYPE_NPCM7XX_SDHCI,
+    .version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(regs.boottoctrl, NPCM7xxSDHCIState),
+        VMSTATE_END_OF_LIST(),
+    },
+};
+
+static void npcm7xx_sdhci_class_init(ObjectClass *classp, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(classp);
+
+    dc->desc = "NPCM7xx SD/eMMC Host Controller";
+    dc->realize = npcm7xx_sdhci_realize;
+    dc->reset = npcm7xx_sdhci_reset;
+    dc->vmsd = &vmstate_npcm7xx_sdhci;
+}
+
+static void npcm7xx_sdhci_instance_init(Object *obj)
+{
+    NPCM7xxSDHCIState *s = NPCM7XX_SDHCI(obj);
+
+    object_initialize_child(OBJECT(s), "generic-sdhci", &s->sdhci,
+                            TYPE_SYSBUS_SDHCI);
+}
+
+static TypeInfo npcm7xx_sdhci_info = {
+    .name = TYPE_NPCM7XX_SDHCI,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(NPCM7xxSDHCIState),
+    .instance_init = npcm7xx_sdhci_instance_init,
+    .class_init = npcm7xx_sdhci_class_init,
+};
+
+static void npcm7xx_sdhci_register_types(void)
+{
+    type_register_static(&npcm7xx_sdhci_info);
+}
+
+type_init(npcm7xx_sdhci_register_types)
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index 1f946908fe1..b67def63813 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -318,7 +318,7 @@ void omap_mmc_reset(struct omap_mmc_s *host)
      * into any bus, and we must reset it manually. When omap_mmc is
      * QOMified this must move into the QOM reset function.
      */
-    device_legacy_reset(DEVICE(host->card));
+    device_cold_reset(DEVICE(host->card));
 }
 
 static uint64_t omap_mmc_read(void *opaque, hwaddr offset,
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index 960f1550981..5e554bd4676 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -506,8 +506,7 @@ static void pl181_init(Object *obj)
     qdev_init_gpio_out_named(dev, &s->card_readonly, "card-read-only", 1);
     qdev_init_gpio_out_named(dev, &s->card_inserted, "card-inserted", 1);
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                        TYPE_PL181_BUS, dev, "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_PL181_BUS, dev, "sd-bus");
 }
 
 static void pl181_class_init(ObjectClass *klass, void *data)
diff --git a/hw/sd/pxa2xx_mmci.c b/hw/sd/pxa2xx_mmci.c
index 3dd2fc7a83f..124fbf8bbd4 100644
--- a/hw/sd/pxa2xx_mmci.c
+++ b/hw/sd/pxa2xx_mmci.c
@@ -560,8 +560,8 @@ static void pxa2xx_mmci_instance_init(Object *obj)
     qdev_init_gpio_out_named(dev, &s->rx_dma, "rx-dma", 1);
     qdev_init_gpio_out_named(dev, &s->tx_dma, "tx-dma", 1);
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                        TYPE_PXA2XX_MMCI_BUS, DEVICE(obj), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus),
+              TYPE_PXA2XX_MMCI_BUS, DEVICE(obj), "sd-bus");
 }
 
 static void pxa2xx_mmci_class_init(ObjectClass *klass, void *data)
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 282d39a7042..bb5dbff68c0 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -821,8 +821,15 @@ static uint32_t sd_wpbits(SDState *sd, uint64_t addr)
     wpnum = sd_addr_to_wpnum(addr);
 
     for (i = 0; i < 32; i++, wpnum++, addr += WPGROUP_SIZE) {
+        if (addr >= sd->size) {
+            /*
+             * If the addresses of the last groups are outside the valid range,
+             * then the corresponding write protection bits shall be set to 0.
+             */
+            continue;
+        }
         assert(wpnum < sd->wpgrps_size);
-        if (addr < sd->size && test_bit(wpnum, sd->wp_groups)) {
+        if (test_bit(wpnum, sd->wp_groups)) {
             ret |= (1 << i);
         }
     }
@@ -937,6 +944,19 @@ static void sd_lock_command(SDState *sd)
         sd->card_status &= ~CARD_IS_LOCKED;
 }
 
+static bool address_in_range(SDState *sd, const char *desc,
+                             uint64_t addr, uint32_t length)
+{
+    if (addr + length > sd->size) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s offset %"PRIu64" > card %"PRIu64" [%%%u]\n",
+                      desc, addr, sd->size, length);
+        sd->card_status |= ADDRESS_ERROR;
+        return false;
+    }
+    return true;
+}
+
 static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 {
     uint32_t rca = 0x0000;
@@ -1218,8 +1238,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         switch (sd->state) {
         case sd_transfer_state:
 
-            if (addr + sd->blk_len > sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "READ_BLOCK", addr, sd->blk_len)) {
                 return sd_r1;
             }
 
@@ -1264,8 +1283,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         switch (sd->state) {
         case sd_transfer_state:
 
-            if (addr + sd->blk_len > sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "WRITE_BLOCK", addr, sd->blk_len)) {
                 return sd_r1;
             }
 
@@ -1325,8 +1343,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 
         switch (sd->state) {
         case sd_transfer_state:
-            if (addr >= sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "SET_WRITE_PROT", addr, 1)) {
                 return sd_r1b;
             }
 
@@ -1348,8 +1365,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 
         switch (sd->state) {
         case sd_transfer_state:
-            if (addr >= sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "CLR_WRITE_PROT", addr, 1)) {
                 return sd_r1b;
             }
 
@@ -1371,6 +1387,11 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
 
         switch (sd->state) {
         case sd_transfer_state:
+            if (!address_in_range(sd, "SEND_WRITE_PROT",
+                                  req.arg, sd->blk_len)) {
+                return sd_r1;
+            }
+
             sd->state = sd_sendingdata_state;
             *(uint32_t *) sd->data = sd_wpbits(sd, req.arg);
             sd->data_start = addr;
@@ -1504,7 +1525,8 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
         return sd_illegal;
     }
 
-    qemu_log_mask(LOG_GUEST_ERROR, "SD: CMD%i in a wrong state\n", req.cmd);
+    qemu_log_mask(LOG_GUEST_ERROR, "SD: CMD%i in a wrong state: %s\n",
+                  req.cmd, sd_state_name(sd->state));
     return sd_illegal;
 }
 
@@ -1825,8 +1847,8 @@ void sd_write_byte(SDState *sd, uint8_t value)
     case 25:	/* CMD25:  WRITE_MULTIPLE_BLOCK */
         if (sd->data_offset == 0) {
             /* Start of the block - let's check the address is valid */
-            if (sd->data_start + sd->blk_len > sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "WRITE_MULTIPLE_BLOCK",
+                                  sd->data_start, sd->blk_len)) {
                 break;
             }
             if (sd->size <= SDSC_MAX_CAPACITY) {
@@ -1998,8 +2020,8 @@ uint8_t sd_read_byte(SDState *sd)
 
     case 18:	/* CMD18:  READ_MULTIPLE_BLOCK */
         if (sd->data_offset == 0) {
-            if (sd->data_start + io_len > sd->size) {
-                sd->card_status |= ADDRESS_ERROR;
+            if (!address_in_range(sd, "READ_MULTIPLE_BLOCK",
+                                  sd->data_start, io_len)) {
                 return 0x00;
             }
             BLK_READ_BLOCK(sd->data_start, io_len);
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 5b8678110b0..c9dc065cc52 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1337,8 +1337,7 @@ static void sdhci_init_readonly_registers(SDHCIState *s, Error **errp)
 
 void sdhci_initfn(SDHCIState *s)
 {
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
-                        TYPE_SDHCI_BUS, DEVICE(s), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SDHCI_BUS, DEVICE(s), "sd-bus");
 
     s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_raise_insertion_irq, s);
     s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, s);
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 97ee58e20cf..e60854eeefc 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -373,8 +373,7 @@ static void ssi_sd_realize(SSIPeripheral *d, Error **errp)
     DeviceState *carddev;
     DriveInfo *dinfo;
 
-    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS,
-                        DEVICE(d), "sd-bus");
+    qbus_init(&s->sdbus, sizeof(s->sdbus), TYPE_SD_BUS, DEVICE(d), "sd-bus");
 
     /* Create and plug in the sd card */
     /* FIXME use a qdev drive property instead of drive_get_next() */
diff --git a/hw/sd/trace-events b/hw/sd/trace-events
index 4140e485403..94a00557b26 100644
--- a/hw/sd/trace-events
+++ b/hw/sd/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # allwinner-sdhost.c
 allwinner_sdhost_set_inserted(bool inserted) "inserted %u"
@@ -55,10 +55,6 @@ sdcard_write_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint8_t
 sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint32_t length) "%s %20s/ CMD%02d len %" PRIu32
 sdcard_set_voltage(uint16_t millivolts) "%u mV"
 
-# milkymist-memcard.c
-milkymist_memcard_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_memcard_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-
 # pxa2xx_mmci.c
 pxa2xx_mmci_read(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 0x%02x value 0x%08x"
 pxa2xx_mmci_write(uint8_t size, uint32_t addr, uint32_t value) "size %d addr 0x%02x value 0x%08x"
@@ -72,3 +68,7 @@ pl181_fifo_push(uint32_t data) "FIFO push 0x%08" PRIx32
 pl181_fifo_pop(uint32_t data) "FIFO pop 0x%08" PRIx32
 pl181_fifo_transfer_complete(void) "FIFO transfer complete"
 pl181_data_engine_idle(void) "data engine idle"
+
+# aspeed_sdhci.c
+aspeed_sdhci_read(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64
+aspeed_sdhci_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64
diff --git a/hw/sensor/Kconfig b/hw/sensor/Kconfig
new file mode 100644
index 00000000000..9c8a049b068
--- /dev/null
+++ b/hw/sensor/Kconfig
@@ -0,0 +1,23 @@
+config TMP105
+    bool
+    depends on I2C
+
+config TMP421
+    bool
+    depends on I2C
+
+config DPS310
+    bool
+    depends on I2C
+
+config EMC141X
+    bool
+    depends on I2C
+
+config ADM1272
+    bool
+    depends on I2C
+
+config MAX34451
+    bool
+    depends on I2C
diff --git a/hw/sensor/adm1272.c b/hw/sensor/adm1272.c
new file mode 100644
index 00000000000..7310c769be2
--- /dev/null
+++ b/hw/sensor/adm1272.c
@@ -0,0 +1,543 @@
+/*
+ * Analog Devices ADM1272 High Voltage Positive Hot Swap Controller and Digital
+ * Power Monitor with PMBus
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <string.h>
+#include "hw/i2c/pmbus_device.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+
+#define TYPE_ADM1272 "adm1272"
+#define ADM1272(obj) OBJECT_CHECK(ADM1272State, (obj), TYPE_ADM1272)
+
+#define ADM1272_RESTART_TIME            0xCC
+#define ADM1272_MFR_PEAK_IOUT           0xD0
+#define ADM1272_MFR_PEAK_VIN            0xD1
+#define ADM1272_MFR_PEAK_VOUT           0xD2
+#define ADM1272_MFR_PMON_CONTROL        0xD3
+#define ADM1272_MFR_PMON_CONFIG         0xD4
+#define ADM1272_MFR_ALERT1_CONFIG       0xD5
+#define ADM1272_MFR_ALERT2_CONFIG       0xD6
+#define ADM1272_MFR_PEAK_TEMPERATURE    0xD7
+#define ADM1272_MFR_DEVICE_CONFIG       0xD8
+#define ADM1272_MFR_POWER_CYCLE         0xD9
+#define ADM1272_MFR_PEAK_PIN            0xDA
+#define ADM1272_MFR_READ_PIN_EXT        0xDB
+#define ADM1272_MFR_READ_EIN_EXT        0xDC
+
+#define ADM1272_HYSTERESIS_LOW          0xF2
+#define ADM1272_HYSTERESIS_HIGH         0xF3
+#define ADM1272_STATUS_HYSTERESIS       0xF4
+#define ADM1272_STATUS_GPIO             0xF5
+#define ADM1272_STRT_UP_IOUT_LIM        0xF6
+
+/* Defaults */
+#define ADM1272_OPERATION_DEFAULT       0x80
+#define ADM1272_CAPABILITY_DEFAULT      0xB0
+#define ADM1272_CAPABILITY_NO_PEC       0x30
+#define ADM1272_DIRECT_MODE             0x40
+#define ADM1272_HIGH_LIMIT_DEFAULT      0x0FFF
+#define ADM1272_PIN_OP_DEFAULT          0x7FFF
+#define ADM1272_PMBUS_REVISION_DEFAULT  0x22
+#define ADM1272_MFR_ID_DEFAULT          "ADI"
+#define ADM1272_MODEL_DEFAULT           "ADM1272-A1"
+#define ADM1272_MFR_DEFAULT_REVISION    "25"
+#define ADM1272_DEFAULT_DATE            "160301"
+#define ADM1272_RESTART_TIME_DEFAULT    0x64
+#define ADM1272_PMON_CONTROL_DEFAULT    0x1
+#define ADM1272_PMON_CONFIG_DEFAULT     0x3F35
+#define ADM1272_DEVICE_CONFIG_DEFAULT   0x8
+#define ADM1272_HYSTERESIS_HIGH_DEFAULT     0xFFFF
+#define ADM1272_STRT_UP_IOUT_LIM_DEFAULT    0x000F
+#define ADM1272_VOLT_DEFAULT            12000
+#define ADM1272_IOUT_DEFAULT            25000
+#define ADM1272_PWR_DEFAULT             300  /* 12V 25A */
+#define ADM1272_SHUNT                   300 /* micro-ohms */
+#define ADM1272_VOLTAGE_COEFF_DEFAULT   1
+#define ADM1272_CURRENT_COEFF_DEFAULT   3
+#define ADM1272_PWR_COEFF_DEFAULT       7
+#define ADM1272_IOUT_OFFSET             0x5000
+#define ADM1272_IOUT_OFFSET             0x5000
+
+
+typedef struct ADM1272State {
+    PMBusDevice parent;
+
+    uint64_t ein_ext;
+    uint32_t pin_ext;
+    uint8_t restart_time;
+
+    uint16_t peak_vin;
+    uint16_t peak_vout;
+    uint16_t peak_iout;
+    uint16_t peak_temperature;
+    uint16_t peak_pin;
+
+    uint8_t pmon_control;
+    uint16_t pmon_config;
+    uint16_t alert1_config;
+    uint16_t alert2_config;
+    uint16_t device_config;
+
+    uint16_t hysteresis_low;
+    uint16_t hysteresis_high;
+    uint8_t status_hysteresis;
+    uint8_t status_gpio;
+
+    uint16_t strt_up_iout_lim;
+
+} ADM1272State;
+
+static const PMBusCoefficients adm1272_coefficients[] = {
+    [0] = { 6770, 0, -2 },        /* voltage, vrange 60V */
+    [1] = { 4062, 0, -2 },        /* voltage, vrange 100V */
+    [2] = { 1326, 20480, -1 },    /* current, vsense range 15mV */
+    [3] = { 663, 20480, -1 },     /* current, vsense range 30mV */
+    [4] = { 3512, 0, -2 },        /* power, vrange 60V, irange 15mV */
+    [5] = { 21071, 0, -3 },       /* power, vrange 100V, irange 15mV */
+    [6] = { 17561, 0, -3 },       /* power, vrange 60V, irange 30mV */
+    [7] = { 10535, 0, -3 },       /* power, vrange 100V, irange 30mV */
+    [8] = { 42, 31871, -1 },      /* temperature */
+};
+
+static void adm1272_check_limits(ADM1272State *s)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(s);
+
+    pmbus_check_limits(pmdev);
+
+    if (pmdev->pages[0].read_vout > s->peak_vout) {
+        s->peak_vout = pmdev->pages[0].read_vout;
+    }
+
+    if (pmdev->pages[0].read_vin > s->peak_vin) {
+        s->peak_vin = pmdev->pages[0].read_vin;
+    }
+
+    if (pmdev->pages[0].read_iout > s->peak_iout) {
+        s->peak_iout = pmdev->pages[0].read_iout;
+    }
+
+    if (pmdev->pages[0].read_temperature_1 > s->peak_temperature) {
+        s->peak_temperature = pmdev->pages[0].read_temperature_1;
+    }
+
+    if (pmdev->pages[0].read_pin > s->peak_pin) {
+        s->peak_pin = pmdev->pages[0].read_pin;
+    }
+}
+
+static uint16_t adm1272_millivolts_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT];
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_millivolts(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT];
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static uint16_t adm1272_milliamps_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT];
+    /* Y = (m * r_sense * x - b) * 10^R */
+    c.m = c.m * ADM1272_SHUNT / 1000; /* micro-ohms */
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_milliamps(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static uint16_t adm1272_watts_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_watts(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static void adm1272_exit_reset(Object *obj)
+{
+    ADM1272State *s = ADM1272(obj);
+    PMBusDevice *pmdev = PMBUS_DEVICE(obj);
+
+    pmdev->page = 0;
+    pmdev->pages[0].operation = ADM1272_OPERATION_DEFAULT;
+
+
+    pmdev->capability = ADM1272_CAPABILITY_NO_PEC;
+    pmdev->pages[0].revision = ADM1272_PMBUS_REVISION_DEFAULT;
+    pmdev->pages[0].vout_mode = ADM1272_DIRECT_MODE;
+    pmdev->pages[0].vout_ov_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT;
+    pmdev->pages[0].vout_uv_warn_limit = 0;
+    pmdev->pages[0].iout_oc_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT;
+    pmdev->pages[0].ot_fault_limit = ADM1272_HIGH_LIMIT_DEFAULT;
+    pmdev->pages[0].ot_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT;
+    pmdev->pages[0].vin_ov_warn_limit = ADM1272_HIGH_LIMIT_DEFAULT;
+    pmdev->pages[0].vin_uv_warn_limit = 0;
+    pmdev->pages[0].pin_op_warn_limit = ADM1272_PIN_OP_DEFAULT;
+
+    pmdev->pages[0].status_word = 0;
+    pmdev->pages[0].status_vout = 0;
+    pmdev->pages[0].status_iout = 0;
+    pmdev->pages[0].status_input = 0;
+    pmdev->pages[0].status_temperature = 0;
+    pmdev->pages[0].status_mfr_specific = 0;
+
+    pmdev->pages[0].read_vin
+        = adm1272_millivolts_to_direct(ADM1272_VOLT_DEFAULT);
+    pmdev->pages[0].read_vout
+        = adm1272_millivolts_to_direct(ADM1272_VOLT_DEFAULT);
+    pmdev->pages[0].read_iout
+        = adm1272_milliamps_to_direct(ADM1272_IOUT_DEFAULT);
+    pmdev->pages[0].read_temperature_1 = 0;
+    pmdev->pages[0].read_pin = adm1272_watts_to_direct(ADM1272_PWR_DEFAULT);
+    pmdev->pages[0].revision = ADM1272_PMBUS_REVISION_DEFAULT;
+    pmdev->pages[0].mfr_id = ADM1272_MFR_ID_DEFAULT;
+    pmdev->pages[0].mfr_model = ADM1272_MODEL_DEFAULT;
+    pmdev->pages[0].mfr_revision = ADM1272_MFR_DEFAULT_REVISION;
+    pmdev->pages[0].mfr_date = ADM1272_DEFAULT_DATE;
+
+    s->pin_ext = 0;
+    s->ein_ext = 0;
+    s->restart_time = ADM1272_RESTART_TIME_DEFAULT;
+
+    s->peak_vin = 0;
+    s->peak_vout = 0;
+    s->peak_iout = 0;
+    s->peak_temperature = 0;
+    s->peak_pin = 0;
+
+    s->pmon_control = ADM1272_PMON_CONTROL_DEFAULT;
+    s->pmon_config = ADM1272_PMON_CONFIG_DEFAULT;
+    s->alert1_config = 0;
+    s->alert2_config = 0;
+    s->device_config = ADM1272_DEVICE_CONFIG_DEFAULT;
+
+    s->hysteresis_low = 0;
+    s->hysteresis_high = ADM1272_HYSTERESIS_HIGH_DEFAULT;
+    s->status_hysteresis = 0;
+    s->status_gpio = 0;
+
+    s->strt_up_iout_lim = ADM1272_STRT_UP_IOUT_LIM_DEFAULT;
+}
+
+static uint8_t adm1272_read_byte(PMBusDevice *pmdev)
+{
+    ADM1272State *s = ADM1272(pmdev);
+
+    switch (pmdev->code) {
+    case ADM1272_RESTART_TIME:
+        pmbus_send8(pmdev, s->restart_time);
+        break;
+
+    case ADM1272_MFR_PEAK_IOUT:
+        pmbus_send16(pmdev, s->peak_iout);
+        break;
+
+    case ADM1272_MFR_PEAK_VIN:
+        pmbus_send16(pmdev, s->peak_vin);
+        break;
+
+    case ADM1272_MFR_PEAK_VOUT:
+        pmbus_send16(pmdev, s->peak_vout);
+        break;
+
+    case ADM1272_MFR_PMON_CONTROL:
+        pmbus_send8(pmdev, s->pmon_control);
+        break;
+
+    case ADM1272_MFR_PMON_CONFIG:
+        pmbus_send16(pmdev, s->pmon_config);
+        break;
+
+    case ADM1272_MFR_ALERT1_CONFIG:
+        pmbus_send16(pmdev, s->alert1_config);
+        break;
+
+    case ADM1272_MFR_ALERT2_CONFIG:
+        pmbus_send16(pmdev, s->alert2_config);
+        break;
+
+    case ADM1272_MFR_PEAK_TEMPERATURE:
+        pmbus_send16(pmdev, s->peak_temperature);
+        break;
+
+    case ADM1272_MFR_DEVICE_CONFIG:
+        pmbus_send16(pmdev, s->device_config);
+        break;
+
+    case ADM1272_MFR_PEAK_PIN:
+        pmbus_send16(pmdev, s->peak_pin);
+        break;
+
+    case ADM1272_MFR_READ_PIN_EXT:
+        pmbus_send32(pmdev, s->pin_ext);
+        break;
+
+    case ADM1272_MFR_READ_EIN_EXT:
+        pmbus_send64(pmdev, s->ein_ext);
+        break;
+
+    case ADM1272_HYSTERESIS_LOW:
+        pmbus_send16(pmdev, s->hysteresis_low);
+        break;
+
+    case ADM1272_HYSTERESIS_HIGH:
+        pmbus_send16(pmdev, s->hysteresis_high);
+        break;
+
+    case ADM1272_STATUS_HYSTERESIS:
+        pmbus_send16(pmdev, s->status_hysteresis);
+        break;
+
+    case ADM1272_STATUS_GPIO:
+        pmbus_send16(pmdev, s->status_gpio);
+        break;
+
+    case ADM1272_STRT_UP_IOUT_LIM:
+        pmbus_send16(pmdev, s->strt_up_iout_lim);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: reading from unsupported register: 0x%02x\n",
+                      __func__, pmdev->code);
+        return 0xFF;
+        break;
+    }
+
+    return 0;
+}
+
+static int adm1272_write_data(PMBusDevice *pmdev, const uint8_t *buf,
+                              uint8_t len)
+{
+    ADM1272State *s = ADM1272(pmdev);
+
+    if (len == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__);
+        return -1;
+    }
+
+    pmdev->code = buf[0]; /* PMBus command code */
+
+    if (len == 1) {
+        return 0;
+    }
+
+    /* Exclude command code from buffer */
+    buf++;
+    len--;
+
+    switch (pmdev->code) {
+
+    case ADM1272_RESTART_TIME:
+        s->restart_time = pmbus_receive8(pmdev);
+        break;
+
+    case ADM1272_MFR_PMON_CONTROL:
+        s->pmon_control = pmbus_receive8(pmdev);
+        break;
+
+    case ADM1272_MFR_PMON_CONFIG:
+        s->pmon_config = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_MFR_ALERT1_CONFIG:
+        s->alert1_config = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_MFR_ALERT2_CONFIG:
+        s->alert2_config = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_MFR_DEVICE_CONFIG:
+        s->device_config = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_MFR_POWER_CYCLE:
+        adm1272_exit_reset((Object *)s);
+        break;
+
+    case ADM1272_HYSTERESIS_LOW:
+        s->hysteresis_low = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_HYSTERESIS_HIGH:
+        s->hysteresis_high = pmbus_receive16(pmdev);
+        break;
+
+    case ADM1272_STRT_UP_IOUT_LIM:
+        s->strt_up_iout_lim = pmbus_receive16(pmdev);
+        adm1272_check_limits(s);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: writing to unsupported register: 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+    }
+    return 0;
+}
+
+static void adm1272_get(Object *obj, Visitor *v, const char *name, void *opaque,
+                        Error **errp)
+{
+    uint16_t value;
+
+    if (strcmp(name, "vin") == 0 || strcmp(name, "vout") == 0) {
+        value = adm1272_direct_to_millivolts(*(uint16_t *)opaque);
+    } else if (strcmp(name, "iout") == 0) {
+        value = adm1272_direct_to_milliamps(*(uint16_t *)opaque);
+    } else if (strcmp(name, "pin") == 0) {
+        value = adm1272_direct_to_watts(*(uint16_t *)opaque);
+    } else {
+        value = *(uint16_t *)opaque;
+    }
+
+    visit_type_uint16(v, name, &value, errp);
+}
+
+static void adm1272_set(Object *obj, Visitor *v, const char *name, void *opaque,
+                        Error **errp)
+{
+    ADM1272State *s = ADM1272(obj);
+    uint16_t *internal = opaque;
+    uint16_t value;
+
+    if (!visit_type_uint16(v, name, &value, errp)) {
+        return;
+    }
+
+    if (strcmp(name, "vin") == 0 || strcmp(name, "vout") == 0) {
+        *internal = adm1272_millivolts_to_direct(value);
+    } else if (strcmp(name, "iout") == 0) {
+        *internal = adm1272_milliamps_to_direct(value);
+    } else if (strcmp(name, "pin") == 0) {
+        *internal = adm1272_watts_to_direct(value);
+    } else {
+        *internal = value;
+    }
+
+    adm1272_check_limits(s);
+}
+
+static const VMStateDescription vmstate_adm1272 = {
+    .name = "ADM1272",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]){
+        VMSTATE_PMBUS_DEVICE(parent, ADM1272State),
+        VMSTATE_UINT64(ein_ext, ADM1272State),
+        VMSTATE_UINT32(pin_ext, ADM1272State),
+        VMSTATE_UINT8(restart_time, ADM1272State),
+
+        VMSTATE_UINT16(peak_vin, ADM1272State),
+        VMSTATE_UINT16(peak_vout, ADM1272State),
+        VMSTATE_UINT16(peak_iout, ADM1272State),
+        VMSTATE_UINT16(peak_temperature, ADM1272State),
+        VMSTATE_UINT16(peak_pin, ADM1272State),
+
+        VMSTATE_UINT8(pmon_control, ADM1272State),
+        VMSTATE_UINT16(pmon_config, ADM1272State),
+        VMSTATE_UINT16(alert1_config, ADM1272State),
+        VMSTATE_UINT16(alert2_config, ADM1272State),
+        VMSTATE_UINT16(device_config, ADM1272State),
+
+        VMSTATE_UINT16(hysteresis_low, ADM1272State),
+        VMSTATE_UINT16(hysteresis_high, ADM1272State),
+        VMSTATE_UINT8(status_hysteresis, ADM1272State),
+        VMSTATE_UINT8(status_gpio, ADM1272State),
+
+        VMSTATE_UINT16(strt_up_iout_lim, ADM1272State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void adm1272_init(Object *obj)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(obj);
+    uint64_t flags = PB_HAS_VOUT_MODE | PB_HAS_VOUT | PB_HAS_VIN | PB_HAS_IOUT |
+                     PB_HAS_PIN | PB_HAS_TEMPERATURE | PB_HAS_MFR_INFO;
+
+    pmbus_page_config(pmdev, 0, flags);
+
+    object_property_add(obj, "vin", "uint16",
+                        adm1272_get,
+                        adm1272_set, NULL, &pmdev->pages[0].read_vin);
+
+    object_property_add(obj, "vout", "uint16",
+                        adm1272_get,
+                        adm1272_set, NULL, &pmdev->pages[0].read_vout);
+
+    object_property_add(obj, "iout", "uint16",
+                        adm1272_get,
+                        adm1272_set, NULL, &pmdev->pages[0].read_iout);
+
+    object_property_add(obj, "pin", "uint16",
+                        adm1272_get,
+                        adm1272_set, NULL, &pmdev->pages[0].read_pin);
+
+}
+
+static void adm1272_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PMBusDeviceClass *k = PMBUS_DEVICE_CLASS(klass);
+
+    dc->desc = "Analog Devices ADM1272 Hot Swap controller";
+    dc->vmsd = &vmstate_adm1272;
+    k->write_data = adm1272_write_data;
+    k->receive_byte = adm1272_read_byte;
+    k->device_num_pages = 1;
+
+    rc->phases.exit = adm1272_exit_reset;
+}
+
+static const TypeInfo adm1272_info = {
+    .name = TYPE_ADM1272,
+    .parent = TYPE_PMBUS_DEVICE,
+    .instance_size = sizeof(ADM1272State),
+    .instance_init = adm1272_init,
+    .class_init = adm1272_class_init,
+};
+
+static void adm1272_register_types(void)
+{
+    type_register_static(&adm1272_info);
+}
+
+type_init(adm1272_register_types)
diff --git a/hw/sensor/dps310.c b/hw/sensor/dps310.c
new file mode 100644
index 00000000000..d60a18ac41b
--- /dev/null
+++ b/hw/sensor/dps310.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2017-2021 Joel Stanley <joel@jms.id.au>, IBM Corporation
+ *
+ * Infineon DPS310 temperature and humidity sensor
+ *
+ * https://www.infineon.com/cms/en/product/sensor/pressure-sensors/pressure-sensors-for-iot/dps310/
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/hw.h"
+#include "hw/i2c/i2c.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "migration/vmstate.h"
+
+#define NUM_REGISTERS   0x33
+
+typedef struct DPS310State {
+    /*< private >*/
+    I2CSlave i2c;
+
+    /*< public >*/
+    uint8_t regs[NUM_REGISTERS];
+
+    uint8_t len;
+    uint8_t pointer;
+
+} DPS310State;
+
+#define TYPE_DPS310 "dps310"
+#define DPS310(obj) OBJECT_CHECK(DPS310State, (obj), TYPE_DPS310)
+
+#define DPS310_PRS_B2           0x00
+#define DPS310_PRS_B1           0x01
+#define DPS310_PRS_B0           0x02
+#define DPS310_TMP_B2           0x03
+#define DPS310_TMP_B1           0x04
+#define DPS310_TMP_B0           0x05
+#define DPS310_PRS_CFG          0x06
+#define DPS310_TMP_CFG          0x07
+#define  DPS310_TMP_RATE_BITS   (0x70)
+#define DPS310_MEAS_CFG         0x08
+#define  DPS310_MEAS_CTRL_BITS  (0x07)
+#define   DPS310_PRESSURE_EN    BIT(0)
+#define   DPS310_TEMP_EN        BIT(1)
+#define   DPS310_BACKGROUND     BIT(2)
+#define  DPS310_PRS_RDY         BIT(4)
+#define  DPS310_TMP_RDY         BIT(5)
+#define  DPS310_SENSOR_RDY      BIT(6)
+#define  DPS310_COEF_RDY        BIT(7)
+#define DPS310_CFG_REG          0x09
+#define DPS310_RESET            0x0c
+#define  DPS310_RESET_MAGIC     (BIT(0) | BIT(3))
+#define DPS310_COEF_BASE        0x10
+#define DPS310_COEF_LAST        0x21
+#define DPS310_COEF_SRC         0x28
+
+static void dps310_reset(DeviceState *dev)
+{
+    DPS310State *s = DPS310(dev);
+
+    static const uint8_t regs_reset_state[sizeof(s->regs)] = {
+        0xfe, 0x2f, 0xee, 0x02, 0x69, 0xa6, 0x00, 0x80, 0xc7, 0x00, 0x00, 0x00,
+        0x00, 0x10, 0x00, 0x00, 0x0e, 0x1e, 0xdd, 0x13, 0xca, 0x5f, 0x21, 0x52,
+        0xf9, 0xc6, 0x04, 0xd1, 0xdb, 0x47, 0x00, 0x5b, 0xfb, 0x3a, 0x00, 0x00,
+        0x20, 0x49, 0x4e, 0xa5, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x60, 0x15, 0x02
+    };
+
+    memcpy(s->regs, regs_reset_state, sizeof(s->regs));
+    s->pointer = 0;
+
+    /* TODO: assert these after some timeout ? */
+    s->regs[DPS310_MEAS_CFG] = DPS310_COEF_RDY | DPS310_SENSOR_RDY
+        | DPS310_TMP_RDY | DPS310_PRS_RDY;
+}
+
+static uint8_t dps310_read(DPS310State *s, uint8_t reg)
+{
+    if (reg >= sizeof(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: register 0x%02x out of bounds\n",
+                      __func__, s->pointer);
+        return 0xFF;
+    }
+
+    switch (reg) {
+    case DPS310_PRS_B2:
+    case DPS310_PRS_B1:
+    case DPS310_PRS_B0:
+    case DPS310_TMP_B2:
+    case DPS310_TMP_B1:
+    case DPS310_TMP_B0:
+    case DPS310_PRS_CFG:
+    case DPS310_TMP_CFG:
+    case DPS310_MEAS_CFG:
+    case DPS310_CFG_REG:
+    case DPS310_COEF_BASE...DPS310_COEF_LAST:
+    case DPS310_COEF_SRC:
+    case 0x32: /* Undocumented register to indicate workaround not required */
+        return s->regs[reg];
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: register 0x%02x unimplemented\n",
+                      __func__, reg);
+        return 0xFF;
+    }
+}
+
+static void dps310_write(DPS310State *s, uint8_t reg, uint8_t data)
+{
+    if (reg >= sizeof(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: register %d out of bounds\n",
+                      __func__, s->pointer);
+        return;
+    }
+
+    switch (reg) {
+    case DPS310_RESET:
+        if (data == DPS310_RESET_MAGIC) {
+            device_cold_reset(DEVICE(s));
+        }
+        break;
+    case DPS310_PRS_CFG:
+    case DPS310_TMP_CFG:
+    case DPS310_MEAS_CFG:
+    case DPS310_CFG_REG:
+        s->regs[reg] = data;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: register 0x%02x unimplemented\n",
+                      __func__, reg);
+        return;
+    }
+}
+
+static uint8_t dps310_rx(I2CSlave *i2c)
+{
+    DPS310State *s = DPS310(i2c);
+
+    if (s->len == 1) {
+        return dps310_read(s, s->pointer++);
+    } else {
+        return 0xFF;
+    }
+}
+
+static int dps310_tx(I2CSlave *i2c, uint8_t data)
+{
+    DPS310State *s = DPS310(i2c);
+
+    if (s->len == 0) {
+        /*
+         * first byte is the register pointer for a read or write
+         * operation
+         */
+        s->pointer = data;
+        s->len++;
+    } else if (s->len == 1) {
+        dps310_write(s, s->pointer++, data);
+    }
+
+    return 0;
+}
+
+static int dps310_event(I2CSlave *i2c, enum i2c_event event)
+{
+    DPS310State *s = DPS310(i2c);
+
+    switch (event) {
+    case I2C_START_SEND:
+        s->pointer = 0xFF;
+        s->len = 0;
+        break;
+    case I2C_START_RECV:
+        if (s->len != 1) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid recv sequence\n",
+                          __func__);
+        }
+        break;
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_dps310 = {
+    .name = "DPS310",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(len, DPS310State),
+        VMSTATE_UINT8_ARRAY(regs, DPS310State, NUM_REGISTERS),
+        VMSTATE_UINT8(pointer, DPS310State),
+        VMSTATE_I2C_SLAVE(i2c, DPS310State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void dps310_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    I2CSlaveClass *k = I2C_SLAVE_CLASS(klass);
+
+    k->event = dps310_event;
+    k->recv = dps310_rx;
+    k->send = dps310_tx;
+    dc->reset = dps310_reset;
+    dc->vmsd = &vmstate_dps310;
+}
+
+static const TypeInfo dps310_info = {
+    .name          = TYPE_DPS310,
+    .parent        = TYPE_I2C_SLAVE,
+    .instance_size = sizeof(DPS310State),
+    .class_init    = dps310_class_init,
+};
+
+static void dps310_register_types(void)
+{
+    type_register_static(&dps310_info);
+}
+
+type_init(dps310_register_types)
diff --git a/hw/misc/emc141x.c b/hw/sensor/emc141x.c
similarity index 99%
rename from hw/misc/emc141x.c
rename to hw/sensor/emc141x.c
index f7c53d48a42..7ce8f4e9794 100644
--- a/hw/misc/emc141x.c
+++ b/hw/sensor/emc141x.c
@@ -25,7 +25,7 @@
 #include "qapi/visitor.h"
 #include "qemu/module.h"
 #include "qom/object.h"
-#include "hw/misc/emc141x_regs.h"
+#include "hw/sensor/emc141x_regs.h"
 
 #define SENSORS_COUNT_MAX    4
 
diff --git a/hw/sensor/max34451.c b/hw/sensor/max34451.c
new file mode 100644
index 00000000000..a91d8bd487c
--- /dev/null
+++ b/hw/sensor/max34451.c
@@ -0,0 +1,775 @@
+/*
+ * Maxim MAX34451 PMBus 16-Channel V/I monitor and 12-Channel Sequencer/Marginer
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i2c/pmbus_device.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+
+#define TYPE_MAX34451 "max34451"
+#define MAX34451(obj) OBJECT_CHECK(MAX34451State, (obj), TYPE_MAX34451)
+
+#define MAX34451_MFR_MODE               0xD1
+#define MAX34451_MFR_PSEN_CONFIG        0xD2
+#define MAX34451_MFR_VOUT_PEAK          0xD4
+#define MAX34451_MFR_IOUT_PEAK          0xD5
+#define MAX34451_MFR_TEMPERATURE_PEAK   0xD6
+#define MAX34451_MFR_VOUT_MIN           0xD7
+#define MAX34451_MFR_NV_LOG_CONFIG      0xD8
+#define MAX34451_MFR_FAULT_RESPONSE     0xD9
+#define MAX34451_MFR_FAULT_RETRY        0xDA
+#define MAX34451_MFR_NV_FAULT_LOG       0xDC
+#define MAX34451_MFR_TIME_COUNT         0xDD
+#define MAX34451_MFR_MARGIN_CONFIG      0xDF
+#define MAX34451_MFR_FW_SERIAL          0xE0
+#define MAX34451_MFR_IOUT_AVG           0xE2
+#define MAX34451_MFR_CHANNEL_CONFIG     0xE4
+#define MAX34451_MFR_TON_SEQ_MAX        0xE6
+#define MAX34451_MFR_PWM_CONFIG         0xE7
+#define MAX34451_MFR_SEQ_CONFIG         0xE8
+#define MAX34451_MFR_STORE_ALL          0xEE
+#define MAX34451_MFR_RESTORE_ALL        0xEF
+#define MAX34451_MFR_TEMP_SENSOR_CONFIG 0xF0
+#define MAX34451_MFR_STORE_SINGLE       0xFC
+#define MAX34451_MFR_CRC                0xFE
+
+#define MAX34451_NUM_MARGINED_PSU       12
+#define MAX34451_NUM_PWR_DEVICES        16
+#define MAX34451_NUM_TEMP_DEVICES       5
+#define MAX34451_NUM_PAGES              21
+
+#define DEFAULT_OP_ON                   0x80
+#define DEFAULT_CAPABILITY              0x20
+#define DEFAULT_ON_OFF_CONFIG           0x1a
+#define DEFAULT_VOUT_MODE               0x40
+#define DEFAULT_TEMPERATURE             2500
+#define DEFAULT_SCALE                   0x7FFF
+#define DEFAULT_OV_LIMIT                0x7FFF
+#define DEFAULT_OC_LIMIT                0x7FFF
+#define DEFAULT_OT_LIMIT                0x7FFF
+#define DEFAULT_VMIN                    0x7FFF
+#define DEFAULT_TON_FAULT_LIMIT         0xFFFF
+#define DEFAULT_CHANNEL_CONFIG          0x20
+#define DEFAULT_TEXT                    0x3130313031303130
+
+/**
+ * MAX34451State:
+ * @code: The command code received
+ * @page: Each page corresponds to a device monitored by the Max 34451
+ * The page register determines the available commands depending on device
+  ___________________________________________________________________________
+ |   0   |  Power supply monitored by RS0, controlled by PSEN0, and          |
+ |       |  margined with PWM0.                                              |
+ |_______|___________________________________________________________________|
+ |   1   |  Power supply monitored by RS1, controlled by PSEN1, and          |
+ |       |  margined with PWM1.                                              |
+ |_______|___________________________________________________________________|
+ |   2   |  Power supply monitored by RS2, controlled by PSEN2, and          |
+ |       |  margined with PWM2.                                              |
+ |_______|___________________________________________________________________|
+ |   3   |  Power supply monitored by RS3, controlled by PSEN3, and          |
+ |       |  margined with PWM3.                                              |
+ |_______|___________________________________________________________________|
+ |   4   |  Power supply monitored by RS4, controlled by PSEN4, and          |
+ |       |  margined with PWM4.                                              |
+ |_______|___________________________________________________________________|
+ |   5   |  Power supply monitored by RS5, controlled by PSEN5, and          |
+ |       |  margined with PWM5.                                              |
+ |_______|___________________________________________________________________|
+ |   6   |  Power supply monitored by RS6, controlled by PSEN6, and          |
+ |       |  margined with PWM6.                                              |
+ |_______|___________________________________________________________________|
+ |   7   |  Power supply monitored by RS7, controlled by PSEN7, and          |
+ |       |  margined with PWM7.                                              |
+ |_______|___________________________________________________________________|
+ |   8   |  Power supply monitored by RS8, controlled by PSEN8, and          |
+ |       | optionally margined by OUT0 of external DS4424 at I2C address A0h.|
+ |_______|___________________________________________________________________|
+ |   9   |  Power supply monitored by RS9, controlled by PSEN9, and          |
+ |       | optionally margined by OUT1 of external DS4424 at I2C address A0h.|
+ |_______|___________________________________________________________________|
+ |   10  |  Power supply monitored by RS10, controlled by PSEN10, and        |
+ |       | optionally margined by OUT2 of external DS4424 at I2C address A0h.|
+ |_______|___________________________________________________________________|
+ |   11  |  Power supply monitored by RS11, controlled by PSEN11, and        |
+ |       | optionally margined by OUT3 of external DS4424 at I2C address A0h.|
+ |_______|___________________________________________________________________|
+ |   12  |  ADC channel 12 (monitors voltage or current) or GPI.             |
+ |_______|___________________________________________________________________|
+ |   13  |  ADC channel 13 (monitors voltage or current) or GPI.             |
+ |_______|___________________________________________________________________|
+ |   14  |  ADC channel 14 (monitors voltage or current) or GPI.             |
+ |_______|___________________________________________________________________|
+ |   15  |  ADC channel 15 (monitors voltage or current) or GPI.             |
+ |_______|___________________________________________________________________|
+ |   16  |  Internal temperature sensor.                                     |
+ |_______|___________________________________________________________________|
+ |   17  |  External DS75LV temperature sensor with I2C address 90h.         |
+ |_______|___________________________________________________________________|
+ |   18  |  External DS75LV temperature sensor with I2C address 92h.         |
+ |_______|___________________________________________________________________|
+ |   19  |  External DS75LV temperature sensor with I2C address 94h.         |
+ |_______|___________________________________________________________________|
+ |   20  |  External DS75LV temperature sensor with I2C address 96h.         |
+ |_______|___________________________________________________________________|
+ | 21=E2=80=93254|  Reserved.                                                        |
+ |_______|___________________________________________________________________|
+ |   255 |  Applies to all pages.                                            |
+ |_______|___________________________________________________________________|
+ *
+ * @operation: Turn on and off power supplies
+ * @on_off_config: Configure the power supply on and off transition behaviour
+ * @write_protect: protect against changes to the device's memory
+ * @vout_margin_high: the voltage when OPERATION is set to margin high
+ * @vout_margin_low: the voltage when OPERATION is set to margin low
+ * @vout_scale: scale ADC reading to actual device reading if different
+ * @iout_cal_gain: set ratio of the voltage at the ADC input to sensed current
+ */
+typedef struct MAX34451State {
+    PMBusDevice parent;
+
+    uint16_t power_good_on[MAX34451_NUM_PWR_DEVICES];
+    uint16_t power_good_off[MAX34451_NUM_PWR_DEVICES];
+    uint16_t ton_delay[MAX34451_NUM_MARGINED_PSU];
+    uint16_t ton_max_fault_limit[MAX34451_NUM_MARGINED_PSU];
+    uint16_t toff_delay[MAX34451_NUM_MARGINED_PSU];
+    uint8_t status_mfr_specific[MAX34451_NUM_PWR_DEVICES];
+    /* Manufacturer specific function */
+    uint64_t mfr_location;
+    uint64_t mfr_date;
+    uint64_t mfr_serial;
+    uint16_t mfr_mode;
+    uint32_t psen_config[MAX34451_NUM_MARGINED_PSU];
+    uint16_t vout_peak[MAX34451_NUM_PWR_DEVICES];
+    uint16_t iout_peak[MAX34451_NUM_PWR_DEVICES];
+    uint16_t temperature_peak[MAX34451_NUM_TEMP_DEVICES];
+    uint16_t vout_min[MAX34451_NUM_PWR_DEVICES];
+    uint16_t nv_log_config;
+    uint32_t fault_response[MAX34451_NUM_PWR_DEVICES];
+    uint16_t fault_retry;
+    uint32_t fault_log;
+    uint32_t time_count;
+    uint16_t margin_config[MAX34451_NUM_MARGINED_PSU];
+    uint16_t fw_serial;
+    uint16_t iout_avg[MAX34451_NUM_PWR_DEVICES];
+    uint16_t channel_config[MAX34451_NUM_PWR_DEVICES];
+    uint16_t ton_seq_max[MAX34451_NUM_MARGINED_PSU];
+    uint32_t pwm_config[MAX34451_NUM_MARGINED_PSU];
+    uint32_t seq_config[MAX34451_NUM_MARGINED_PSU];
+    uint16_t temp_sensor_config[MAX34451_NUM_TEMP_DEVICES];
+    uint16_t store_single;
+    uint16_t crc;
+} MAX34451State;
+
+
+static void max34451_check_limits(MAX34451State *s)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(s);
+
+    pmbus_check_limits(pmdev);
+
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        if (pmdev->pages[i].read_vout == 0) { /* PSU disabled */
+            continue;
+        }
+
+        if (pmdev->pages[i].read_vout > s->vout_peak[i]) {
+            s->vout_peak[i] = pmdev->pages[i].read_vout;
+        }
+
+        if (pmdev->pages[i].read_vout < s->vout_min[i]) {
+            s->vout_min[i] = pmdev->pages[i].read_vout;
+        }
+
+        if (pmdev->pages[i].read_iout > s->iout_peak[i]) {
+            s->iout_peak[i] = pmdev->pages[i].read_iout;
+        }
+    }
+
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        if (pmdev->pages[i + 16].read_temperature_1 > s->temperature_peak[i]) {
+            s->temperature_peak[i] = pmdev->pages[i + 16].read_temperature_1;
+        }
+    }
+}
+
+static uint8_t max34451_read_byte(PMBusDevice *pmdev)
+{
+    MAX34451State *s = MAX34451(pmdev);
+    switch (pmdev->code) {
+
+    case PMBUS_POWER_GOOD_ON:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->power_good_on[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_POWER_GOOD_OFF:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->power_good_off[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_TON_DELAY:
+        if (pmdev->page < 12) {
+            pmbus_send16(pmdev, s->ton_delay[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_TON_MAX_FAULT_LIMIT:
+        if (pmdev->page < 12) {
+            pmbus_send16(pmdev, s->ton_max_fault_limit[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_TOFF_DELAY:
+        if (pmdev->page < 12) {
+            pmbus_send16(pmdev, s->toff_delay[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_STATUS_MFR_SPECIFIC:
+        if (pmdev->page < 16) {
+            pmbus_send8(pmdev, s->status_mfr_specific[pmdev->page]);
+        }
+        break;
+
+    case PMBUS_MFR_ID:
+        pmbus_send8(pmdev, 0x4d); /* Maxim */
+        break;
+
+    case PMBUS_MFR_MODEL:
+        pmbus_send8(pmdev, 0x59);
+        break;
+
+    case PMBUS_MFR_LOCATION:
+        pmbus_send64(pmdev, s->mfr_location);
+        break;
+
+    case PMBUS_MFR_DATE:
+        pmbus_send64(pmdev, s->mfr_date);
+        break;
+
+    case PMBUS_MFR_SERIAL:
+        pmbus_send64(pmdev, s->mfr_serial);
+        break;
+
+    case MAX34451_MFR_MODE:
+        pmbus_send16(pmdev, s->mfr_mode);
+        break;
+
+    case MAX34451_MFR_PSEN_CONFIG:
+        if (pmdev->page < 12) {
+            pmbus_send32(pmdev, s->psen_config[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_VOUT_PEAK:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->vout_peak[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_IOUT_PEAK:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->iout_peak[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_TEMPERATURE_PEAK:
+        if (15 < pmdev->page && pmdev->page < 21) {
+            pmbus_send16(pmdev, s->temperature_peak[pmdev->page % 16]);
+        } else {
+            pmbus_send16(pmdev, s->temperature_peak[0]);
+        }
+        break;
+
+    case MAX34451_MFR_VOUT_MIN:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->vout_min[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_NV_LOG_CONFIG:
+        pmbus_send16(pmdev, s->nv_log_config);
+        break;
+
+    case MAX34451_MFR_FAULT_RESPONSE:
+        if (pmdev->page < 16) {
+            pmbus_send32(pmdev, s->fault_response[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_FAULT_RETRY:
+        pmbus_send32(pmdev, s->fault_retry);
+        break;
+
+    case MAX34451_MFR_NV_FAULT_LOG:
+        pmbus_send32(pmdev, s->fault_log);
+        break;
+
+    case MAX34451_MFR_TIME_COUNT:
+        pmbus_send32(pmdev, s->time_count);
+        break;
+
+    case MAX34451_MFR_MARGIN_CONFIG:
+        if (pmdev->page < 12) {
+            pmbus_send16(pmdev, s->margin_config[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_FW_SERIAL:
+        if (pmdev->page == 255) {
+            pmbus_send16(pmdev, 1); /* Firmware revision */
+        }
+        break;
+
+    case MAX34451_MFR_IOUT_AVG:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->iout_avg[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_CHANNEL_CONFIG:
+        if (pmdev->page < 16) {
+            pmbus_send16(pmdev, s->channel_config[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_TON_SEQ_MAX:
+        if (pmdev->page < 12) {
+            pmbus_send16(pmdev, s->ton_seq_max[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_PWM_CONFIG:
+        if (pmdev->page < 12) {
+            pmbus_send32(pmdev, s->pwm_config[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_SEQ_CONFIG:
+        if (pmdev->page < 12) {
+            pmbus_send32(pmdev, s->seq_config[pmdev->page]);
+        }
+        break;
+
+    case MAX34451_MFR_TEMP_SENSOR_CONFIG:
+        if (15 < pmdev->page && pmdev->page < 21) {
+            pmbus_send32(pmdev, s->temp_sensor_config[pmdev->page % 16]);
+        }
+        break;
+
+    case MAX34451_MFR_STORE_SINGLE:
+        pmbus_send32(pmdev, s->store_single);
+        break;
+
+    case MAX34451_MFR_CRC:
+        pmbus_send32(pmdev, s->crc);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: reading from unsupported register: 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+    }
+    return 0xFF;
+}
+
+static int max34451_write_data(PMBusDevice *pmdev, const uint8_t *buf,
+                               uint8_t len)
+{
+    MAX34451State *s = MAX34451(pmdev);
+
+    if (len == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: writing empty data\n", __func__);
+        return -1;
+    }
+
+    pmdev->code = buf[0]; /* PMBus command code */
+
+    if (len == 1) {
+        return 0;
+    }
+
+    /* Exclude command code from buffer */
+    buf++;
+    len--;
+    uint8_t index = pmdev->page;
+
+    switch (pmdev->code) {
+    case MAX34451_MFR_STORE_ALL:
+    case MAX34451_MFR_RESTORE_ALL:
+    case MAX34451_MFR_STORE_SINGLE:
+        /*
+         * TODO: hardware behaviour is to move the contents of volatile
+         * memory to non-volatile memory.
+         */
+        break;
+
+    case PMBUS_POWER_GOOD_ON: /* R/W word */
+        if (pmdev->page < MAX34451_NUM_PWR_DEVICES) {
+            s->power_good_on[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case PMBUS_POWER_GOOD_OFF: /* R/W word */
+        if (pmdev->page < MAX34451_NUM_PWR_DEVICES) {
+            s->power_good_off[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case PMBUS_TON_DELAY: /* R/W word */
+        if (pmdev->page < 12) {
+            s->ton_delay[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case PMBUS_TON_MAX_FAULT_LIMIT: /* R/W word */
+        if (pmdev->page < 12) {
+            s->ton_max_fault_limit[pmdev->page]
+                = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case PMBUS_TOFF_DELAY: /* R/W word */
+        if (pmdev->page < 12) {
+            s->toff_delay[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case PMBUS_MFR_LOCATION: /* R/W 64 */
+        s->mfr_location = pmbus_receive64(pmdev);
+        break;
+
+    case PMBUS_MFR_DATE: /* R/W 64 */
+        s->mfr_date = pmbus_receive64(pmdev);
+        break;
+
+    case PMBUS_MFR_SERIAL: /* R/W 64 */
+        s->mfr_serial = pmbus_receive64(pmdev);
+        break;
+
+    case MAX34451_MFR_MODE: /* R/W word */
+         s->mfr_mode = pmbus_receive16(pmdev);
+        break;
+
+    case MAX34451_MFR_PSEN_CONFIG: /* R/W 32 */
+        if (pmdev->page < 12) {
+            s->psen_config[pmdev->page] = pmbus_receive32(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_VOUT_PEAK: /* R/W word */
+        if (pmdev->page < 16) {
+            s->vout_peak[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_IOUT_PEAK: /* R/W word */
+        if (pmdev->page < 16) {
+            s->iout_peak[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_TEMPERATURE_PEAK: /* R/W word */
+        if (15 < pmdev->page && pmdev->page < 21) {
+            s->temperature_peak[pmdev->page % 16]
+                = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_VOUT_MIN: /* R/W word */
+        if (pmdev->page < 16) {
+            s->vout_min[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_NV_LOG_CONFIG: /* R/W word */
+         s->nv_log_config = pmbus_receive16(pmdev);
+        break;
+
+    case MAX34451_MFR_FAULT_RESPONSE: /* R/W 32 */
+        if (pmdev->page < 16) {
+            s->fault_response[pmdev->page] = pmbus_receive32(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_FAULT_RETRY: /* R/W word */
+        s->fault_retry = pmbus_receive16(pmdev);
+        break;
+
+    case MAX34451_MFR_TIME_COUNT: /* R/W 32 */
+        s->time_count = pmbus_receive32(pmdev);
+        break;
+
+    case MAX34451_MFR_MARGIN_CONFIG: /* R/W word */
+        if (pmdev->page < 12) {
+            s->margin_config[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_CHANNEL_CONFIG: /* R/W word */
+        if (pmdev->page < 16) {
+            s->channel_config[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_TON_SEQ_MAX: /* R/W word */
+        if (pmdev->page < 12) {
+            s->ton_seq_max[pmdev->page] = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_PWM_CONFIG: /* R/W 32 */
+        if (pmdev->page < 12) {
+            s->pwm_config[pmdev->page] = pmbus_receive32(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_SEQ_CONFIG:  /* R/W 32 */
+        if (pmdev->page < 12) {
+            s->seq_config[pmdev->page] = pmbus_receive32(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_TEMP_SENSOR_CONFIG:  /* R/W word */
+        if (15 < pmdev->page && pmdev->page < 21) {
+            s->temp_sensor_config[pmdev->page % 16]
+                = pmbus_receive16(pmdev);
+        }
+        break;
+
+    case MAX34451_MFR_CRC: /* R/W word */
+        s->crc = pmbus_receive16(pmdev);
+        break;
+
+    case MAX34451_MFR_NV_FAULT_LOG:
+    case MAX34451_MFR_FW_SERIAL:
+    case MAX34451_MFR_IOUT_AVG:
+        /* Read only commands */
+        pmdev->pages[index].status_word |= PMBUS_STATUS_CML;
+        pmdev->pages[index].status_cml |= PB_CML_FAULT_INVALID_DATA;
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: writing to read-only register 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: writing to unsupported register: 0x%02x\n",
+                      __func__, pmdev->code);
+        break;
+    }
+
+    return 0;
+}
+
+static void max34451_get(Object *obj, Visitor *v, const char *name,
+                                     void *opaque, Error **errp)
+{
+    visit_type_uint16(v, name, (uint16_t *)opaque, errp);
+}
+
+static void max34451_set(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    MAX34451State *s = MAX34451(obj);
+    uint16_t *internal = opaque;
+    uint16_t value;
+    if (!visit_type_uint16(v, name, &value, errp)) {
+        return;
+    }
+
+    *internal = value;
+    max34451_check_limits(s);
+}
+
+/* used to init uint16_t arrays */
+static inline void *memset_word(void *s, uint16_t c, size_t n)
+{
+    size_t i;
+    uint16_t *p = s;
+
+    for (i = 0; i < n; i++) {
+        p[i] = c;
+    }
+
+    return s;
+}
+
+static void max34451_exit_reset(Object *obj)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(obj);
+    MAX34451State *s = MAX34451(obj);
+    pmdev->capability = DEFAULT_CAPABILITY;
+
+    for (int i = 0; i < MAX34451_NUM_PAGES; i++) {
+        pmdev->pages[i].operation = DEFAULT_OP_ON;
+        pmdev->pages[i].on_off_config = DEFAULT_ON_OFF_CONFIG;
+        pmdev->pages[i].revision = 0x11;
+        pmdev->pages[i].vout_mode = DEFAULT_VOUT_MODE;
+    }
+
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        pmdev->pages[i].vout_scale_monitor = DEFAULT_SCALE;
+        pmdev->pages[i].vout_ov_fault_limit = DEFAULT_OV_LIMIT;
+        pmdev->pages[i].vout_ov_warn_limit = DEFAULT_OV_LIMIT;
+        pmdev->pages[i].iout_oc_warn_limit = DEFAULT_OC_LIMIT;
+        pmdev->pages[i].iout_oc_fault_limit = DEFAULT_OC_LIMIT;
+    }
+
+    for (int i = 0; i < MAX34451_NUM_MARGINED_PSU; i++) {
+        pmdev->pages[i].ton_max_fault_limit = DEFAULT_TON_FAULT_LIMIT;
+    }
+
+    for (int i = 16; i < MAX34451_NUM_TEMP_DEVICES + 16; i++) {
+        pmdev->pages[i].read_temperature_1 = DEFAULT_TEMPERATURE;
+        pmdev->pages[i].ot_warn_limit = DEFAULT_OT_LIMIT;
+        pmdev->pages[i].ot_fault_limit = DEFAULT_OT_LIMIT;
+    }
+
+    memset_word(s->ton_max_fault_limit, DEFAULT_TON_FAULT_LIMIT,
+                MAX34451_NUM_MARGINED_PSU);
+    memset_word(s->channel_config, DEFAULT_CHANNEL_CONFIG,
+                MAX34451_NUM_PWR_DEVICES);
+    memset_word(s->vout_min, DEFAULT_VMIN, MAX34451_NUM_PWR_DEVICES);
+
+    s->mfr_location = DEFAULT_TEXT;
+    s->mfr_date = DEFAULT_TEXT;
+    s->mfr_serial = DEFAULT_TEXT;
+}
+
+static const VMStateDescription vmstate_max34451 = {
+    .name = TYPE_MAX34451,
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]){
+        VMSTATE_PMBUS_DEVICE(parent, MAX34451State),
+        VMSTATE_UINT16_ARRAY(power_good_on, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(power_good_off, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(ton_delay, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT16_ARRAY(ton_max_fault_limit, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT16_ARRAY(toff_delay, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT8_ARRAY(status_mfr_specific, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT64(mfr_location, MAX34451State),
+        VMSTATE_UINT64(mfr_date, MAX34451State),
+        VMSTATE_UINT64(mfr_serial, MAX34451State),
+        VMSTATE_UINT16(mfr_mode, MAX34451State),
+        VMSTATE_UINT32_ARRAY(psen_config, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT16_ARRAY(vout_peak, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(iout_peak, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(temperature_peak, MAX34451State,
+                             MAX34451_NUM_TEMP_DEVICES),
+        VMSTATE_UINT16_ARRAY(vout_min, MAX34451State, MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16(nv_log_config, MAX34451State),
+        VMSTATE_UINT32_ARRAY(fault_response, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16(fault_retry, MAX34451State),
+        VMSTATE_UINT32(fault_log, MAX34451State),
+        VMSTATE_UINT32(time_count, MAX34451State),
+        VMSTATE_UINT16_ARRAY(margin_config, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT16(fw_serial, MAX34451State),
+        VMSTATE_UINT16_ARRAY(iout_avg, MAX34451State, MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(channel_config, MAX34451State,
+                             MAX34451_NUM_PWR_DEVICES),
+        VMSTATE_UINT16_ARRAY(ton_seq_max, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT32_ARRAY(pwm_config, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT32_ARRAY(seq_config, MAX34451State,
+                             MAX34451_NUM_MARGINED_PSU),
+        VMSTATE_UINT16_ARRAY(temp_sensor_config, MAX34451State,
+                             MAX34451_NUM_TEMP_DEVICES),
+        VMSTATE_UINT16(store_single, MAX34451State),
+        VMSTATE_UINT16(crc, MAX34451State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void max34451_init(Object *obj)
+{
+    PMBusDevice *pmdev = PMBUS_DEVICE(obj);
+    uint64_t psu_flags = PB_HAS_VOUT | PB_HAS_IOUT | PB_HAS_VOUT_MODE |
+                         PB_HAS_IOUT_GAIN;
+
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        pmbus_page_config(pmdev, i, psu_flags);
+    }
+
+    for (int i = 0; i < MAX34451_NUM_MARGINED_PSU; i++) {
+        pmbus_page_config(pmdev, i, psu_flags | PB_HAS_VOUT_MARGIN);
+    }
+
+    for (int i = 16; i < MAX34451_NUM_TEMP_DEVICES + 16; i++) {
+        pmbus_page_config(pmdev, i, PB_HAS_TEMPERATURE | PB_HAS_VOUT_MODE);
+    }
+
+    /* get and set the voltage in millivolts, max is 32767 mV */
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        object_property_add(obj, "vout[*]", "uint16",
+                            max34451_get,
+                            max34451_set, NULL, &pmdev->pages[i].read_vout);
+    }
+
+    /*
+     * get and set the temperature of the internal temperature sensor in
+     * centidegrees Celcius i.e.: 2500 -> 25.00 C, max is 327.67 C
+     */
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        object_property_add(obj, "temperature[*]", "uint16",
+                            max34451_get,
+                            max34451_set,
+                            NULL,
+                            &pmdev->pages[i + 16].read_temperature_1);
+    }
+
+}
+
+static void max34451_class_init(ObjectClass *klass, void *data)
+{
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PMBusDeviceClass *k = PMBUS_DEVICE_CLASS(klass);
+    dc->desc = "Maxim MAX34451 16-Channel V/I monitor";
+    dc->vmsd = &vmstate_max34451;
+    k->write_data = max34451_write_data;
+    k->receive_byte = max34451_read_byte;
+    k->device_num_pages = MAX34451_NUM_PAGES;
+    rc->phases.exit = max34451_exit_reset;
+}
+
+static const TypeInfo max34451_info = {
+    .name = TYPE_MAX34451,
+    .parent = TYPE_PMBUS_DEVICE,
+    .instance_size = sizeof(MAX34451State),
+    .instance_init = max34451_init,
+    .class_init = max34451_class_init,
+};
+
+static void max34451_register_types(void)
+{
+    type_register_static(&max34451_info);
+}
+
+type_init(max34451_register_types)
diff --git a/hw/sensor/meson.build b/hw/sensor/meson.build
new file mode 100644
index 00000000000..059c4ca935b
--- /dev/null
+++ b/hw/sensor/meson.build
@@ -0,0 +1,6 @@
+softmmu_ss.add(when: 'CONFIG_TMP105', if_true: files('tmp105.c'))
+softmmu_ss.add(when: 'CONFIG_TMP421', if_true: files('tmp421.c'))
+softmmu_ss.add(when: 'CONFIG_DPS310', if_true: files('dps310.c'))
+softmmu_ss.add(when: 'CONFIG_EMC141X', if_true: files('emc141x.c'))
+softmmu_ss.add(when: 'CONFIG_ADM1272', if_true: files('adm1272.c'))
+softmmu_ss.add(when: 'CONFIG_MAX34451', if_true: files('max34451.c'))
diff --git a/hw/misc/tmp105.c b/hw/sensor/tmp105.c
similarity index 99%
rename from hw/misc/tmp105.c
rename to hw/sensor/tmp105.c
index d299d9b21b7..20564494899 100644
--- a/hw/misc/tmp105.c
+++ b/hw/sensor/tmp105.c
@@ -22,7 +22,7 @@
 #include "hw/i2c/i2c.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
-#include "tmp105.h"
+#include "hw/sensor/tmp105.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "qemu/module.h"
diff --git a/hw/misc/tmp421.c b/hw/sensor/tmp421.c
similarity index 100%
rename from hw/misc/tmp421.c
rename to hw/sensor/tmp421.c
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index 443820901d4..72759413f37 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "cpu.h"
 #include "hw/sysbus.h"
 #include "hw/sh4/sh.h"
@@ -42,7 +43,6 @@
 #include "hw/loader.h"
 #include "hw/usb.h"
 #include "hw/block/flash.h"
-#include "exec/address-spaces.h"
 
 #define FLASH_BASE 0x00000000
 #define FLASH_SIZE (16 * MiB)
@@ -57,10 +57,10 @@
 #define LINUX_LOAD_OFFSET  0x0800000
 #define INITRD_LOAD_OFFSET 0x1800000
 
-#define PA_IRLMSK	0x00
-#define PA_POWOFF	0x30
-#define PA_VERREG	0x32
-#define PA_OUTPORT	0x36
+#define PA_IRLMSK 0x00
+#define PA_POWOFF 0x30
+#define PA_VERREG 0x32
+#define PA_OUTPORT 0x36
 
 typedef struct {
     uint16_t bcr;
@@ -97,38 +97,41 @@ enum r2d_fpga_irq {
 };
 
 static const struct { short irl; uint16_t msk; } irqtab[NR_IRQS] = {
-    [CF_IDE]	= {  1, 1<<9 },
-    [CF_CD]	= {  2, 1<<8 },
-    [PCI_INTA]	= {  9, 1<<14 },
-    [PCI_INTB]	= { 10, 1<<13 },
-    [PCI_INTC]	= {  3, 1<<12 },
-    [PCI_INTD]	= {  0, 1<<11 },
-    [SM501]	= {  4, 1<<10 },
-    [KEY]	= {  5, 1<<6 },
-    [RTC_A]	= {  6, 1<<5 },
-    [RTC_T]	= {  7, 1<<4 },
-    [SDCARD]	= {  8, 1<<7 },
-    [EXT]	= { 11, 1<<0 },
-    [TP]	= { 12, 1<<15 },
+    [CF_IDE] =   {  1, 1 << 9 },
+    [CF_CD] =    {  2, 1 << 8 },
+    [PCI_INTA] = {  9, 1 << 14 },
+    [PCI_INTB] = { 10, 1 << 13 },
+    [PCI_INTC] = {  3, 1 << 12 },
+    [PCI_INTD] = {  0, 1 << 11 },
+    [SM501] =    {  4, 1 << 10 },
+    [KEY] =      {  5, 1 << 6 },
+    [RTC_A] =    {  6, 1 << 5 },
+    [RTC_T] =    {  7, 1 << 4 },
+    [SDCARD] =   {  8, 1 << 7 },
+    [EXT] =      { 11, 1 << 0 },
+    [TP] =       { 12, 1 << 15 },
 };
 
 static void update_irl(r2d_fpga_t *fpga)
 {
     int i, irl = 15;
-    for (i = 0; i < NR_IRQS; i++)
-        if (fpga->irlmon & fpga->irlmsk & irqtab[i].msk)
-            if (irqtab[i].irl < irl)
-                irl = irqtab[i].irl;
+    for (i = 0; i < NR_IRQS; i++) {
+        if ((fpga->irlmon & fpga->irlmsk & irqtab[i].msk) &&
+            irqtab[i].irl < irl) {
+            irl = irqtab[i].irl;
+        }
+    }
     qemu_set_irq(fpga->irl, irl ^ 15);
 }
 
 static void r2d_fpga_irq_set(void *opaque, int n, int level)
 {
     r2d_fpga_t *fpga = opaque;
-    if (level)
+    if (level) {
         fpga->irlmon |= irqtab[n].msk;
-    else
+    } else {
         fpga->irlmon &= ~irqtab[n].msk;
+    }
     update_irl(fpga);
 }
 
@@ -307,7 +310,7 @@ static void r2d_init(MachineState *machine)
     /* NIC: rtl8139 on-board, and 2 slots. */
     for (i = 0; i < nb_nics; i++)
         pci_nic_init_nofail(&nd_table[i], pci_bus,
-                            "rtl8139", i==0 ? "2" : NULL);
+                            "rtl8139", i == 0 ? "2" : NULL);
 
     /* USB keyboard */
     usb_create_simple(usb_bus_find(-1), "usb-kbd");
@@ -322,8 +325,8 @@ static void r2d_init(MachineState *machine)
                                           SDRAM_BASE + LINUX_LOAD_OFFSET,
                                           INITRD_LOAD_OFFSET - LINUX_LOAD_OFFSET);
         if (kernel_size < 0) {
-          fprintf(stderr, "qemu: could not load kernel '%s'\n", kernel_filename);
-          exit(1);
+            error_report("qemu: could not load kernel '%s'", kernel_filename);
+            exit(1);
         }
 
         /* initialization which should be done by firmware */
@@ -331,7 +334,8 @@ static void r2d_init(MachineState *machine)
                           MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 SDRAM */
         address_space_stw(&address_space_memory, SH7750_BCR2, 3 << (3 * 2),
                           MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 32bit */
-        reset_info->vector = (SDRAM_BASE + LINUX_LOAD_OFFSET) | 0xa0000000; /* Start from P2 area */
+        /* Start from P2 area */
+        reset_info->vector = (SDRAM_BASE + LINUX_LOAD_OFFSET) | 0xa0000000;
     }
 
     if (initrd_filename) {
@@ -342,8 +346,8 @@ static void r2d_init(MachineState *machine)
                                           SDRAM_SIZE - INITRD_LOAD_OFFSET);
 
         if (initrd_size < 0) {
-          fprintf(stderr, "qemu: could not load initrd '%s'\n", initrd_filename);
-          exit(1);
+            error_report("qemu: could not load initrd '%s'", initrd_filename);
+            exit(1);
         }
 
         /* initialization which should be done by firmware */
@@ -353,8 +357,10 @@ static void r2d_init(MachineState *machine)
     }
 
     if (kernel_cmdline) {
-        /* I see no evidence that this .kernel_cmdline buffer requires
-           NUL-termination, so using strncpy should be ok. */
+        /*
+         * I see no evidence that this .kernel_cmdline buffer requires
+         * NUL-termination, so using strncpy should be ok.
+         */
         strncpy(boot_params.kernel_cmdline, kernel_cmdline,
                 sizeof(boot_params.kernel_cmdline));
     }
diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c
index f8ac3ec6e32..43dfb6497b5 100644
--- a/hw/sh4/sh7750.c
+++ b/hw/sh4/sh7750.c
@@ -24,15 +24,19 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
 #include "hw/irq.h"
 #include "hw/sh4/sh.h"
 #include "sysemu/sysemu.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
 #include "sh7750_regs.h"
 #include "sh7750_regnames.h"
 #include "hw/sh4/sh_intc.h"
 #include "hw/timer/tmu012.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
+#include "trace.h"
 
 #define NB_DEVICES 4
 
@@ -61,17 +65,17 @@ typedef struct SH7750State {
     uint16_t gpioic;
     uint32_t pctra;
     uint32_t pctrb;
-    uint16_t portdira;		/* Cached */
-    uint16_t portpullupa;	/* Cached */
-    uint16_t portdirb;		/* Cached */
-    uint16_t portpullupb;	/* Cached */
+    uint16_t portdira;        /* Cached */
+    uint16_t portpullupa;     /* Cached */
+    uint16_t portdirb;        /* Cached */
+    uint16_t portpullupb;     /* Cached */
     uint16_t pdtra;
     uint16_t pdtrb;
-    uint16_t periph_pdtra;	/* Imposed by the peripherals */
-    uint16_t periph_portdira;	/* Direction seen from the peripherals */
-    uint16_t periph_pdtrb;	/* Imposed by the peripherals */
-    uint16_t periph_portdirb;	/* Direction seen from the peripherals */
-    sh7750_io_device *devices[NB_DEVICES];	/* External peripherals */
+    uint16_t periph_pdtra;    /* Imposed by the peripherals */
+    uint16_t periph_portdira; /* Direction seen from the peripherals */
+    uint16_t periph_pdtrb;    /* Imposed by the peripherals */
+    uint16_t periph_portdirb; /* Direction seen from the peripherals */
+    sh7750_io_device *devices[NB_DEVICES]; /* External peripherals */
 
     /* Cache */
     uint32_t ccr;
@@ -79,143 +83,145 @@ typedef struct SH7750State {
     struct intc_desc intc;
 } SH7750State;
 
-static inline int has_bcr3_and_bcr4(SH7750State * s)
+static inline int has_bcr3_and_bcr4(SH7750State *s)
 {
     return s->cpu->env.features & SH_FEATURE_BCR3_AND_BCR4;
 }
-/**********************************************************************
- I/O ports
-**********************************************************************/
 
-int sh7750_register_io_device(SH7750State * s, sh7750_io_device * device)
+/*
+ * I/O ports
+ */
+
+int sh7750_register_io_device(SH7750State *s, sh7750_io_device *device)
 {
     int i;
 
     for (i = 0; i < NB_DEVICES; i++) {
-	if (s->devices[i] == NULL) {
-	    s->devices[i] = device;
-	    return 0;
-	}
+        if (s->devices[i] == NULL) {
+            s->devices[i] = device;
+            return 0;
+        }
     }
     return -1;
 }
 
 static uint16_t portdir(uint32_t v)
 {
-#define EVENPORTMASK(n) ((v & (1<<((n)<<1))) >> (n))
+#define EVENPORTMASK(n) ((v & (1 << ((n) << 1))) >> (n))
     return
-	EVENPORTMASK(15) | EVENPORTMASK(14) | EVENPORTMASK(13) |
-	EVENPORTMASK(12) | EVENPORTMASK(11) | EVENPORTMASK(10) |
-	EVENPORTMASK(9) | EVENPORTMASK(8) | EVENPORTMASK(7) |
-	EVENPORTMASK(6) | EVENPORTMASK(5) | EVENPORTMASK(4) |
-	EVENPORTMASK(3) | EVENPORTMASK(2) | EVENPORTMASK(1) |
-	EVENPORTMASK(0);
+        EVENPORTMASK(15) | EVENPORTMASK(14) | EVENPORTMASK(13) |
+        EVENPORTMASK(12) | EVENPORTMASK(11) | EVENPORTMASK(10) |
+        EVENPORTMASK(9) | EVENPORTMASK(8) | EVENPORTMASK(7) |
+        EVENPORTMASK(6) | EVENPORTMASK(5) | EVENPORTMASK(4) |
+        EVENPORTMASK(3) | EVENPORTMASK(2) | EVENPORTMASK(1) |
+        EVENPORTMASK(0);
 }
 
 static uint16_t portpullup(uint32_t v)
 {
-#define ODDPORTMASK(n) ((v & (1<<(((n)<<1)+1))) >> (n))
+#define ODDPORTMASK(n) ((v & (1 << (((n) << 1) + 1))) >> (n))
     return
-	ODDPORTMASK(15) | ODDPORTMASK(14) | ODDPORTMASK(13) |
-	ODDPORTMASK(12) | ODDPORTMASK(11) | ODDPORTMASK(10) |
-	ODDPORTMASK(9) | ODDPORTMASK(8) | ODDPORTMASK(7) | ODDPORTMASK(6) |
-	ODDPORTMASK(5) | ODDPORTMASK(4) | ODDPORTMASK(3) | ODDPORTMASK(2) |
-	ODDPORTMASK(1) | ODDPORTMASK(0);
+        ODDPORTMASK(15) | ODDPORTMASK(14) | ODDPORTMASK(13) |
+        ODDPORTMASK(12) | ODDPORTMASK(11) | ODDPORTMASK(10) |
+        ODDPORTMASK(9) | ODDPORTMASK(8) | ODDPORTMASK(7) | ODDPORTMASK(6) |
+        ODDPORTMASK(5) | ODDPORTMASK(4) | ODDPORTMASK(3) | ODDPORTMASK(2) |
+        ODDPORTMASK(1) | ODDPORTMASK(0);
 }
 
-static uint16_t porta_lines(SH7750State * s)
+static uint16_t porta_lines(SH7750State *s)
 {
-    return (s->portdira & s->pdtra) |	/* CPU */
-	(s->periph_portdira & s->periph_pdtra) |	/* Peripherals */
-	(~(s->portdira | s->periph_portdira) & s->portpullupa);	/* Pullups */
+    return (s->portdira & s->pdtra) | /* CPU */
+        (s->periph_portdira & s->periph_pdtra) | /* Peripherals */
+        (~(s->portdira | s->periph_portdira) & s->portpullupa); /* Pullups */
 }
 
-static uint16_t portb_lines(SH7750State * s)
+static uint16_t portb_lines(SH7750State *s)
 {
-    return (s->portdirb & s->pdtrb) |	/* CPU */
-	(s->periph_portdirb & s->periph_pdtrb) |	/* Peripherals */
-	(~(s->portdirb | s->periph_portdirb) & s->portpullupb);	/* Pullups */
+    return (s->portdirb & s->pdtrb) | /* CPU */
+        (s->periph_portdirb & s->periph_pdtrb) | /* Peripherals */
+        (~(s->portdirb | s->periph_portdirb) & s->portpullupb); /* Pullups */
 }
 
-static void gen_port_interrupts(SH7750State * s)
+static void gen_port_interrupts(SH7750State *s)
 {
     /* XXXXX interrupts not generated */
 }
 
-static void porta_changed(SH7750State * s, uint16_t prev)
+static void porta_changed(SH7750State *s, uint16_t prev)
 {
     uint16_t currenta, changes;
     int i, r = 0;
 
-#if 0
-    fprintf(stderr, "porta changed from 0x%04x to 0x%04x\n",
-	    prev, porta_lines(s));
-    fprintf(stderr, "pdtra=0x%04x, pctra=0x%08x\n", s->pdtra, s->pctra);
-#endif
     currenta = porta_lines(s);
-    if (currenta == prev)
-	return;
+    if (currenta == prev) {
+        return;
+    }
+    trace_sh7750_porta(prev, currenta, s->pdtra, s->pctra);
     changes = currenta ^ prev;
 
     for (i = 0; i < NB_DEVICES; i++) {
-	if (s->devices[i] && (s->devices[i]->portamask_trigger & changes)) {
-	    r |= s->devices[i]->port_change_cb(currenta, portb_lines(s),
-					       &s->periph_pdtra,
-					       &s->periph_portdira,
-					       &s->periph_pdtrb,
-					       &s->periph_portdirb);
-	}
+        if (s->devices[i] && (s->devices[i]->portamask_trigger & changes)) {
+            r |= s->devices[i]->port_change_cb(currenta, portb_lines(s),
+                                               &s->periph_pdtra,
+                                               &s->periph_portdira,
+                                               &s->periph_pdtrb,
+                                               &s->periph_portdirb);
+        }
     }
 
-    if (r)
-	gen_port_interrupts(s);
+    if (r) {
+        gen_port_interrupts(s);
+    }
 }
 
-static void portb_changed(SH7750State * s, uint16_t prev)
+static void portb_changed(SH7750State *s, uint16_t prev)
 {
     uint16_t currentb, changes;
     int i, r = 0;
 
     currentb = portb_lines(s);
-    if (currentb == prev)
-	return;
+    if (currentb == prev) {
+        return;
+    }
+    trace_sh7750_portb(prev, currentb, s->pdtrb, s->pctrb);
     changes = currentb ^ prev;
 
     for (i = 0; i < NB_DEVICES; i++) {
-	if (s->devices[i] && (s->devices[i]->portbmask_trigger & changes)) {
-	    r |= s->devices[i]->port_change_cb(portb_lines(s), currentb,
-					       &s->periph_pdtra,
-					       &s->periph_portdira,
-					       &s->periph_pdtrb,
-					       &s->periph_portdirb);
-	}
+        if (s->devices[i] && (s->devices[i]->portbmask_trigger & changes)) {
+            r |= s->devices[i]->port_change_cb(portb_lines(s), currentb,
+                                               &s->periph_pdtra,
+                                               &s->periph_portdira,
+                                               &s->periph_pdtrb,
+                                               &s->periph_portdirb);
+        }
     }
 
-    if (r)
-	gen_port_interrupts(s);
+    if (r) {
+        gen_port_interrupts(s);
+    }
 }
 
-/**********************************************************************
- Memory
-**********************************************************************/
+/*
+ * Memory
+ */
 
 static void error_access(const char *kind, hwaddr addr)
 {
     fprintf(stderr, "%s to %s (0x" TARGET_FMT_plx ") not supported\n",
-	    kind, regname(addr), addr);
+            kind, regname(addr), addr);
 }
 
 static void ignore_access(const char *kind, hwaddr addr)
 {
     fprintf(stderr, "%s to %s (0x" TARGET_FMT_plx ") ignored\n",
-	    kind, regname(addr), addr);
+            kind, regname(addr), addr);
 }
 
 static uint32_t sh7750_mem_readb(void *opaque, hwaddr addr)
 {
     switch (addr) {
     default:
-	error_access("byte read", addr);
+        error_access("byte read", addr);
         abort();
     }
 }
@@ -226,30 +232,31 @@ static uint32_t sh7750_mem_readw(void *opaque, hwaddr addr)
 
     switch (addr) {
     case SH7750_BCR2_A7:
-	return s->bcr2;
+        return s->bcr2;
     case SH7750_BCR3_A7:
-	if(!has_bcr3_and_bcr4(s))
-	    error_access("word read", addr);
-	return s->bcr3;
+        if (!has_bcr3_and_bcr4(s)) {
+            error_access("word read", addr);
+        }
+        return s->bcr3;
     case SH7750_FRQCR_A7:
-	return 0;
+        return 0;
     case SH7750_PCR_A7:
-	return s->pcr;
+        return s->pcr;
     case SH7750_RFCR_A7:
-	fprintf(stderr,
-		"Read access to refresh count register, incrementing\n");
-	return s->rfcr++;
+        fprintf(stderr,
+                "Read access to refresh count register, incrementing\n");
+        return s->rfcr++;
     case SH7750_PDTRA_A7:
-	return porta_lines(s);
+        return porta_lines(s);
     case SH7750_PDTRB_A7:
-	return portb_lines(s);
+        return portb_lines(s);
     case SH7750_RTCOR_A7:
     case SH7750_RTCNT_A7:
     case SH7750_RTCSR_A7:
-	ignore_access("word read", addr);
-	return 0;
+        ignore_access("word read", addr);
+        return 0;
     default:
-	error_access("word read", addr);
+        error_access("word read", addr);
         abort();
     }
 }
@@ -261,11 +268,12 @@ static uint32_t sh7750_mem_readl(void *opaque, hwaddr addr)
 
     switch (addr) {
     case SH7750_BCR1_A7:
-	return s->bcr1;
+        return s->bcr1;
     case SH7750_BCR4_A7:
-	if(!has_bcr3_and_bcr4(s))
-	    error_access("long read", addr);
-	return s->bcr4;
+        if (!has_bcr3_and_bcr4(s)) {
+            error_access("long read", addr);
+        }
+        return s->bcr4;
     case SH7750_WCR1_A7:
     case SH7750_WCR2_A7:
     case SH7750_WCR3_A7:
@@ -289,31 +297,31 @@ static uint32_t sh7750_mem_readl(void *opaque, hwaddr addr)
     case SH7750_INTEVT_A7:
         return s->cpu->env.intevt;
     case SH7750_CCR_A7:
-	return s->ccr;
-    case 0x1f000030:		/* Processor version */
+        return s->ccr;
+    case 0x1f000030: /* Processor version */
         scc = SUPERH_CPU_GET_CLASS(s->cpu);
         return scc->pvr;
-    case 0x1f000040:		/* Cache version */
+    case 0x1f000040: /* Cache version */
         scc = SUPERH_CPU_GET_CLASS(s->cpu);
         return scc->cvr;
-    case 0x1f000044:		/* Processor revision */
+    case 0x1f000044: /* Processor revision */
         scc = SUPERH_CPU_GET_CLASS(s->cpu);
         return scc->prr;
     default:
-	error_access("long read", addr);
+        error_access("long read", addr);
         abort();
     }
 }
 
 #define is_in_sdrmx(a, x) (a >= SH7750_SDMR ## x ## _A7 \
-			&& a <= (SH7750_SDMR ## x ## _A7 + SH7750_SDMR ## x ## _REGNB))
+                        && a <= (SH7750_SDMR ## x ## _A7 + SH7750_SDMR ## x ## _REGNB))
 static void sh7750_mem_writeb(void *opaque, hwaddr addr,
-			      uint32_t mem_value)
+                              uint32_t mem_value)
 {
 
     if (is_in_sdrmx(addr, 2) || is_in_sdrmx(addr, 3)) {
-	ignore_access("byte write", addr);
-	return;
+        ignore_access("byte write", addr);
+        return;
     }
 
     error_access("byte write", addr);
@@ -321,94 +329,96 @@ static void sh7750_mem_writeb(void *opaque, hwaddr addr,
 }
 
 static void sh7750_mem_writew(void *opaque, hwaddr addr,
-			      uint32_t mem_value)
+                              uint32_t mem_value)
 {
     SH7750State *s = opaque;
     uint16_t temp;
 
     switch (addr) {
-	/* SDRAM controller */
+        /* SDRAM controller */
     case SH7750_BCR2_A7:
         s->bcr2 = mem_value;
         return;
     case SH7750_BCR3_A7:
-	if(!has_bcr3_and_bcr4(s))
-	    error_access("word write", addr);
-	s->bcr3 = mem_value;
-	return;
+        if (!has_bcr3_and_bcr4(s)) {
+            error_access("word write", addr);
+        }
+        s->bcr3 = mem_value;
+        return;
     case SH7750_PCR_A7:
-	s->pcr = mem_value;
-	return;
+        s->pcr = mem_value;
+        return;
     case SH7750_RTCNT_A7:
     case SH7750_RTCOR_A7:
     case SH7750_RTCSR_A7:
-	ignore_access("word write", addr);
-	return;
-	/* IO ports */
+        ignore_access("word write", addr);
+        return;
+        /* IO ports */
     case SH7750_PDTRA_A7:
-	temp = porta_lines(s);
-	s->pdtra = mem_value;
-	porta_changed(s, temp);
-	return;
+        temp = porta_lines(s);
+        s->pdtra = mem_value;
+        porta_changed(s, temp);
+        return;
     case SH7750_PDTRB_A7:
-	temp = portb_lines(s);
-	s->pdtrb = mem_value;
-	portb_changed(s, temp);
-	return;
+        temp = portb_lines(s);
+        s->pdtrb = mem_value;
+        portb_changed(s, temp);
+        return;
     case SH7750_RFCR_A7:
-	fprintf(stderr, "Write access to refresh count register\n");
-	s->rfcr = mem_value;
-	return;
+        fprintf(stderr, "Write access to refresh count register\n");
+        s->rfcr = mem_value;
+        return;
     case SH7750_GPIOIC_A7:
-	s->gpioic = mem_value;
-	if (mem_value != 0) {
-	    fprintf(stderr, "I/O interrupts not implemented\n");
+        s->gpioic = mem_value;
+        if (mem_value != 0) {
+            fprintf(stderr, "I/O interrupts not implemented\n");
             abort();
-	}
-	return;
+        }
+        return;
     default:
-	error_access("word write", addr);
+        error_access("word write", addr);
         abort();
     }
 }
 
 static void sh7750_mem_writel(void *opaque, hwaddr addr,
-			      uint32_t mem_value)
+                              uint32_t mem_value)
 {
     SH7750State *s = opaque;
     uint16_t temp;
 
     switch (addr) {
-	/* SDRAM controller */
+        /* SDRAM controller */
     case SH7750_BCR1_A7:
         s->bcr1 = mem_value;
         return;
     case SH7750_BCR4_A7:
-	if(!has_bcr3_and_bcr4(s))
-	    error_access("long write", addr);
-	s->bcr4 = mem_value;
-	return;
+        if (!has_bcr3_and_bcr4(s)) {
+            error_access("long write", addr);
+        }
+        s->bcr4 = mem_value;
+        return;
     case SH7750_WCR1_A7:
     case SH7750_WCR2_A7:
     case SH7750_WCR3_A7:
     case SH7750_MCR_A7:
-	ignore_access("long write", addr);
-	return;
-	/* IO ports */
+        ignore_access("long write", addr);
+        return;
+        /* IO ports */
     case SH7750_PCTRA_A7:
-	temp = porta_lines(s);
-	s->pctra = mem_value;
-	s->portdira = portdir(mem_value);
-	s->portpullupa = portpullup(mem_value);
-	porta_changed(s, temp);
-	return;
+        temp = porta_lines(s);
+        s->pctra = mem_value;
+        s->portdira = portdir(mem_value);
+        s->portpullupa = portpullup(mem_value);
+        porta_changed(s, temp);
+        return;
     case SH7750_PCTRB_A7:
-	temp = portb_lines(s);
-	s->pctrb = mem_value;
-	s->portdirb = portdir(mem_value);
-	s->portpullupb = portpullup(mem_value);
-	portb_changed(s, temp);
-	return;
+        temp = portb_lines(s);
+        s->pctrb = mem_value;
+        s->portdirb = portdir(mem_value);
+        s->portpullupb = portpullup(mem_value);
+        portb_changed(s, temp);
+        return;
     case SH7750_MMUCR_A7:
         if (mem_value & MMUCR_TI) {
             cpu_sh4_invalidate_tlb(&s->cpu->env);
@@ -444,10 +454,10 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr,
         s->cpu->env.intevt = mem_value & 0x000007ff;
         return;
     case SH7750_CCR_A7:
-	s->ccr = mem_value;
-	return;
+        s->ccr = mem_value;
+        return;
     default:
-	error_access("long write", addr);
+        error_access("long write", addr);
         abort();
     }
 }
@@ -492,161 +502,161 @@ static const MemoryRegionOps sh7750_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-/* sh775x interrupt controller tables for sh_intc.c
+/*
+ * sh775x interrupt controller tables for sh_intc.c
  * stolen from linux/arch/sh/kernel/cpu/sh4/setup-sh7750.c
  */
 
 enum {
-	UNUSED = 0,
-
-	/* interrupt sources */
-	IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, IRL_7,
-	IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E,
-	IRL0, IRL1, IRL2, IRL3,
-	HUDI, GPIOI,
-	DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3,
-	DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7,
-	DMAC_DMAE,
-	PCIC0_PCISERR, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
-	PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3,
-	TMU3, TMU4, TMU0, TMU1, TMU2_TUNI, TMU2_TICPI,
-	RTC_ATI, RTC_PRI, RTC_CUI,
-	SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI,
-	SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI,
-	WDT,
-	REF_RCMI, REF_ROVI,
-
-	/* interrupt groups */
-	DMAC, PCIC1, TMU2, RTC, SCI1, SCIF, REF,
-	/* irl bundle */
-	IRL,
-
-	NR_SOURCES,
+    UNUSED = 0,
+
+    /* interrupt sources */
+    IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6, IRL_7,
+    IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E,
+    IRL0, IRL1, IRL2, IRL3,
+    HUDI, GPIOI,
+    DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2, DMAC_DMTE3,
+    DMAC_DMTE4, DMAC_DMTE5, DMAC_DMTE6, DMAC_DMTE7,
+    DMAC_DMAE,
+    PCIC0_PCISERR, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
+    PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3,
+    TMU3, TMU4, TMU0, TMU1, TMU2_TUNI, TMU2_TICPI,
+    RTC_ATI, RTC_PRI, RTC_CUI,
+    SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI,
+    SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI,
+    WDT,
+    REF_RCMI, REF_ROVI,
+
+    /* interrupt groups */
+    DMAC, PCIC1, TMU2, RTC, SCI1, SCIF, REF,
+    /* irl bundle */
+    IRL,
+
+    NR_SOURCES,
 };
 
 static struct intc_vect vectors[] = {
-	INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620),
-	INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
-	INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
-	INTC_VECT(RTC_ATI, 0x480), INTC_VECT(RTC_PRI, 0x4a0),
-	INTC_VECT(RTC_CUI, 0x4c0),
-	INTC_VECT(SCI1_ERI, 0x4e0), INTC_VECT(SCI1_RXI, 0x500),
-	INTC_VECT(SCI1_TXI, 0x520), INTC_VECT(SCI1_TEI, 0x540),
-	INTC_VECT(SCIF_ERI, 0x700), INTC_VECT(SCIF_RXI, 0x720),
-	INTC_VECT(SCIF_BRI, 0x740), INTC_VECT(SCIF_TXI, 0x760),
-	INTC_VECT(WDT, 0x560),
-	INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0),
+    INTC_VECT(HUDI, 0x600), INTC_VECT(GPIOI, 0x620),
+    INTC_VECT(TMU0, 0x400), INTC_VECT(TMU1, 0x420),
+    INTC_VECT(TMU2_TUNI, 0x440), INTC_VECT(TMU2_TICPI, 0x460),
+    INTC_VECT(RTC_ATI, 0x480), INTC_VECT(RTC_PRI, 0x4a0),
+    INTC_VECT(RTC_CUI, 0x4c0),
+    INTC_VECT(SCI1_ERI, 0x4e0), INTC_VECT(SCI1_RXI, 0x500),
+    INTC_VECT(SCI1_TXI, 0x520), INTC_VECT(SCI1_TEI, 0x540),
+    INTC_VECT(SCIF_ERI, 0x700), INTC_VECT(SCIF_RXI, 0x720),
+    INTC_VECT(SCIF_BRI, 0x740), INTC_VECT(SCIF_TXI, 0x760),
+    INTC_VECT(WDT, 0x560),
+    INTC_VECT(REF_RCMI, 0x580), INTC_VECT(REF_ROVI, 0x5a0),
 };
 
 static struct intc_group groups[] = {
-	INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI),
-	INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI),
-	INTC_GROUP(SCI1, SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI),
-	INTC_GROUP(SCIF, SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI),
-	INTC_GROUP(REF, REF_RCMI, REF_ROVI),
+    INTC_GROUP(TMU2, TMU2_TUNI, TMU2_TICPI),
+    INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI),
+    INTC_GROUP(SCI1, SCI1_ERI, SCI1_RXI, SCI1_TXI, SCI1_TEI),
+    INTC_GROUP(SCIF, SCIF_ERI, SCIF_RXI, SCIF_BRI, SCIF_TXI),
+    INTC_GROUP(REF, REF_RCMI, REF_ROVI),
 };
 
 static struct intc_prio_reg prio_registers[] = {
-	{ 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } },
-	{ 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, REF, SCI1, 0 } },
-	{ 0xffd0000c, 0, 16, 4, /* IPRC */ { GPIOI, DMAC, SCIF, HUDI } },
-	{ 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } },
-	{ 0xfe080000, 0, 32, 4, /* INTPRI00 */ { 0, 0, 0, 0,
-						 TMU4, TMU3,
-						 PCIC1, PCIC0_PCISERR } },
+    { 0xffd00004, 0, 16, 4, /* IPRA */ { TMU0, TMU1, TMU2, RTC } },
+    { 0xffd00008, 0, 16, 4, /* IPRB */ { WDT, REF, SCI1, 0 } },
+    { 0xffd0000c, 0, 16, 4, /* IPRC */ { GPIOI, DMAC, SCIF, HUDI } },
+    { 0xffd00010, 0, 16, 4, /* IPRD */ { IRL0, IRL1, IRL2, IRL3 } },
+    { 0xfe080000, 0, 32, 4, /* INTPRI00 */ { 0, 0, 0, 0, TMU4, TMU3,
+                                             PCIC1, PCIC0_PCISERR } },
 };
 
 /* SH7750, SH7750S, SH7751 and SH7091 all have 4-channel DMA controllers */
 
 static struct intc_vect vectors_dma4[] = {
-	INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
-	INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
-	INTC_VECT(DMAC_DMAE, 0x6c0),
+    INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
+    INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
+    INTC_VECT(DMAC_DMAE, 0x6c0),
 };
 
 static struct intc_group groups_dma4[] = {
-	INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
-		   DMAC_DMTE3, DMAC_DMAE),
+    INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
+               DMAC_DMTE3, DMAC_DMAE),
 };
 
 /* SH7750R and SH7751R both have 8-channel DMA controllers */
 
 static struct intc_vect vectors_dma8[] = {
-	INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
-	INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
-	INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0),
-	INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0),
-	INTC_VECT(DMAC_DMAE, 0x6c0),
+    INTC_VECT(DMAC_DMTE0, 0x640), INTC_VECT(DMAC_DMTE1, 0x660),
+    INTC_VECT(DMAC_DMTE2, 0x680), INTC_VECT(DMAC_DMTE3, 0x6a0),
+    INTC_VECT(DMAC_DMTE4, 0x780), INTC_VECT(DMAC_DMTE5, 0x7a0),
+    INTC_VECT(DMAC_DMTE6, 0x7c0), INTC_VECT(DMAC_DMTE7, 0x7e0),
+    INTC_VECT(DMAC_DMAE, 0x6c0),
 };
 
 static struct intc_group groups_dma8[] = {
-	INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
-		   DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5,
-		   DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE),
+    INTC_GROUP(DMAC, DMAC_DMTE0, DMAC_DMTE1, DMAC_DMTE2,
+               DMAC_DMTE3, DMAC_DMTE4, DMAC_DMTE5,
+               DMAC_DMTE6, DMAC_DMTE7, DMAC_DMAE),
 };
 
 /* SH7750R, SH7751 and SH7751R all have two extra timer channels */
 
 static struct intc_vect vectors_tmu34[] = {
-	INTC_VECT(TMU3, 0xb00), INTC_VECT(TMU4, 0xb80),
+    INTC_VECT(TMU3, 0xb00), INTC_VECT(TMU4, 0xb80),
 };
 
 static struct intc_mask_reg mask_registers[] = {
-	{ 0xfe080040, 0xfe080060, 32, /* INTMSK00 / INTMSKCLR00 */
-	  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	    0, 0, 0, 0, 0, 0, TMU4, TMU3,
-	    PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
-	    PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2,
-	    PCIC1_PCIDMA3, PCIC0_PCISERR } },
+    { 0xfe080040, 0xfe080060, 32, /* INTMSK00 / INTMSKCLR00 */
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, TMU4, TMU3,
+        PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
+        PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2,
+        PCIC1_PCIDMA3, PCIC0_PCISERR } },
 };
 
 /* SH7750S, SH7750R, SH7751 and SH7751R all have IRLM priority registers */
 
 static struct intc_vect vectors_irlm[] = {
-	INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0),
-	INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360),
+    INTC_VECT(IRL0, 0x240), INTC_VECT(IRL1, 0x2a0),
+    INTC_VECT(IRL2, 0x300), INTC_VECT(IRL3, 0x360),
 };
 
 /* SH7751 and SH7751R both have PCI */
 
 static struct intc_vect vectors_pci[] = {
-	INTC_VECT(PCIC0_PCISERR, 0xa00), INTC_VECT(PCIC1_PCIERR, 0xae0),
-	INTC_VECT(PCIC1_PCIPWDWN, 0xac0), INTC_VECT(PCIC1_PCIPWON, 0xaa0),
-	INTC_VECT(PCIC1_PCIDMA0, 0xa80), INTC_VECT(PCIC1_PCIDMA1, 0xa60),
-	INTC_VECT(PCIC1_PCIDMA2, 0xa40), INTC_VECT(PCIC1_PCIDMA3, 0xa20),
+    INTC_VECT(PCIC0_PCISERR, 0xa00), INTC_VECT(PCIC1_PCIERR, 0xae0),
+    INTC_VECT(PCIC1_PCIPWDWN, 0xac0), INTC_VECT(PCIC1_PCIPWON, 0xaa0),
+    INTC_VECT(PCIC1_PCIDMA0, 0xa80), INTC_VECT(PCIC1_PCIDMA1, 0xa60),
+    INTC_VECT(PCIC1_PCIDMA2, 0xa40), INTC_VECT(PCIC1_PCIDMA3, 0xa20),
 };
 
 static struct intc_group groups_pci[] = {
-	INTC_GROUP(PCIC1, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
-		   PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3),
+    INTC_GROUP(PCIC1, PCIC1_PCIERR, PCIC1_PCIPWDWN, PCIC1_PCIPWON,
+               PCIC1_PCIDMA0, PCIC1_PCIDMA1, PCIC1_PCIDMA2, PCIC1_PCIDMA3),
 };
 
 static struct intc_vect vectors_irl[] = {
-	INTC_VECT(IRL_0, 0x200),
-	INTC_VECT(IRL_1, 0x220),
-	INTC_VECT(IRL_2, 0x240),
-	INTC_VECT(IRL_3, 0x260),
-	INTC_VECT(IRL_4, 0x280),
-	INTC_VECT(IRL_5, 0x2a0),
-	INTC_VECT(IRL_6, 0x2c0),
-	INTC_VECT(IRL_7, 0x2e0),
-	INTC_VECT(IRL_8, 0x300),
-	INTC_VECT(IRL_9, 0x320),
-	INTC_VECT(IRL_A, 0x340),
-	INTC_VECT(IRL_B, 0x360),
-	INTC_VECT(IRL_C, 0x380),
-	INTC_VECT(IRL_D, 0x3a0),
-	INTC_VECT(IRL_E, 0x3c0),
+    INTC_VECT(IRL_0, 0x200),
+    INTC_VECT(IRL_1, 0x220),
+    INTC_VECT(IRL_2, 0x240),
+    INTC_VECT(IRL_3, 0x260),
+    INTC_VECT(IRL_4, 0x280),
+    INTC_VECT(IRL_5, 0x2a0),
+    INTC_VECT(IRL_6, 0x2c0),
+    INTC_VECT(IRL_7, 0x2e0),
+    INTC_VECT(IRL_8, 0x300),
+    INTC_VECT(IRL_9, 0x320),
+    INTC_VECT(IRL_A, 0x340),
+    INTC_VECT(IRL_B, 0x360),
+    INTC_VECT(IRL_C, 0x380),
+    INTC_VECT(IRL_D, 0x3a0),
+    INTC_VECT(IRL_E, 0x3c0),
 };
 
 static struct intc_group groups_irl[] = {
-	INTC_GROUP(IRL, IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6,
-		IRL_7, IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E),
+    INTC_GROUP(IRL, IRL_0, IRL_1, IRL_2, IRL_3, IRL_4, IRL_5, IRL_6,
+        IRL_7, IRL_8, IRL_9, IRL_A, IRL_B, IRL_C, IRL_D, IRL_E),
 };
 
-/**********************************************************************
- Memory mapped cache and TLB
-**********************************************************************/
+/*
+ * Memory mapped cache and TLB
+ */
 
 #define MM_REGION_MASK   0x07000000
 #define MM_ICACHE_ADDR   (0)
@@ -680,7 +690,7 @@ static uint64_t sh7750_mmct_read(void *opaque, hwaddr addr,
     case MM_ICACHE_ADDR:
     case MM_ICACHE_DATA:
         /* do nothing */
-	break;
+        break;
     case MM_ITLB_ADDR:
         ret = cpu_sh4_read_mmaped_itlb_addr(&s->cpu->env, addr);
         break;
@@ -690,7 +700,7 @@ static uint64_t sh7750_mmct_read(void *opaque, hwaddr addr,
     case MM_OCACHE_ADDR:
     case MM_OCACHE_DATA:
         /* do nothing */
-	break;
+        break;
     case MM_UTLB_ADDR:
         ret = cpu_sh4_read_mmaped_utlb_addr(&s->cpu->env, addr);
         break;
@@ -723,27 +733,27 @@ static void sh7750_mmct_write(void *opaque, hwaddr addr,
     case MM_ICACHE_ADDR:
     case MM_ICACHE_DATA:
         /* do nothing */
-	break;
+        break;
     case MM_ITLB_ADDR:
         cpu_sh4_write_mmaped_itlb_addr(&s->cpu->env, addr, mem_value);
         break;
     case MM_ITLB_DATA:
         cpu_sh4_write_mmaped_itlb_data(&s->cpu->env, addr, mem_value);
         abort();
-	break;
+        break;
     case MM_OCACHE_ADDR:
     case MM_OCACHE_DATA:
         /* do nothing */
-	break;
+        break;
     case MM_UTLB_ADDR:
         cpu_sh4_write_mmaped_utlb_addr(&s->cpu->env, addr, mem_value);
-	break;
+        break;
     case MM_UTLB_DATA:
         cpu_sh4_write_mmaped_utlb_data(&s->cpu->env, addr, mem_value);
-	break;
+        break;
     default:
         abort();
-	break;
+        break;
     }
 }
 
@@ -756,10 +766,13 @@ static const MemoryRegionOps sh7750_mmct_ops = {
 SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem)
 {
     SH7750State *s;
+    DeviceState *dev;
+    SysBusDevice *sb;
+    MemoryRegion *mr, *alias;
 
     s = g_malloc0(sizeof(SH7750State));
     s->cpu = cpu;
-    s->periph_freq = 60000000;	/* 60MHz */
+    s->periph_freq = 60000000; /* 60MHz */
     memory_region_init_io(&s->iomem, NULL, &sh7750_mem_ops, s,
                           "memory", 0x1fc01000);
 
@@ -792,81 +805,100 @@ SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem)
     memory_region_add_subregion(sysmem, 0xf0000000, &s->mmct_iomem);
 
     sh_intc_init(sysmem, &s->intc, NR_SOURCES,
-		 _INTC_ARRAY(mask_registers),
-		 _INTC_ARRAY(prio_registers));
+                 _INTC_ARRAY(mask_registers),
+                 _INTC_ARRAY(prio_registers));
 
     sh_intc_register_sources(&s->intc,
-			     _INTC_ARRAY(vectors),
-			     _INTC_ARRAY(groups));
+                             _INTC_ARRAY(vectors),
+                             _INTC_ARRAY(groups));
 
     cpu->env.intc_handle = &s->intc;
 
-    sh_serial_init(sysmem, 0x1fe00000,
-                   0, s->periph_freq, serial_hd(0),
-                   s->intc.irqs[SCI1_ERI],
-                   s->intc.irqs[SCI1_RXI],
-                   s->intc.irqs[SCI1_TXI],
-                   s->intc.irqs[SCI1_TEI],
-                   NULL);
-    sh_serial_init(sysmem, 0x1fe80000,
-                   SH_SERIAL_FEAT_SCIF,
-                   s->periph_freq, serial_hd(1),
-                   s->intc.irqs[SCIF_ERI],
-                   s->intc.irqs[SCIF_RXI],
-                   s->intc.irqs[SCIF_TXI],
-                   NULL,
-                   s->intc.irqs[SCIF_BRI]);
+    /* SCI */
+    dev = qdev_new(TYPE_SH_SERIAL);
+    dev->id = g_strdup("sci");
+    qdev_prop_set_chr(dev, "chardev", serial_hd(0));
+    sb = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(sb, &error_fatal);
+    sysbus_mmio_map(sb, 0, 0xffe00000);
+    alias = g_malloc(sizeof(*alias));
+    mr = sysbus_mmio_get_region(sb, 0);
+    memory_region_init_alias(alias, OBJECT(dev), "sci-a7", mr,
+                             0, memory_region_size(mr));
+    memory_region_add_subregion(sysmem, A7ADDR(0xffe00000), alias);
+    qdev_connect_gpio_out_named(dev, "eri", 0, s->intc.irqs[SCI1_ERI]);
+    qdev_connect_gpio_out_named(dev, "rxi", 0, s->intc.irqs[SCI1_RXI]);
+    qdev_connect_gpio_out_named(dev, "txi", 0, s->intc.irqs[SCI1_TXI]);
+    qdev_connect_gpio_out_named(dev, "tei", 0, s->intc.irqs[SCI1_TEI]);
+
+    /* SCIF */
+    dev = qdev_new(TYPE_SH_SERIAL);
+    dev->id = g_strdup("scif");
+    qdev_prop_set_chr(dev, "chardev", serial_hd(1));
+    qdev_prop_set_uint8(dev, "features", SH_SERIAL_FEAT_SCIF);
+    sb = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(sb, &error_fatal);
+    sysbus_mmio_map(sb, 0, 0xffe80000);
+    alias = g_malloc(sizeof(*alias));
+    mr = sysbus_mmio_get_region(sb, 0);
+    memory_region_init_alias(alias, OBJECT(dev), "scif-a7", mr,
+                             0, memory_region_size(mr));
+    memory_region_add_subregion(sysmem, A7ADDR(0xffe80000), alias);
+    qdev_connect_gpio_out_named(dev, "eri", 0, s->intc.irqs[SCIF_ERI]);
+    qdev_connect_gpio_out_named(dev, "rxi", 0, s->intc.irqs[SCIF_RXI]);
+    qdev_connect_gpio_out_named(dev, "txi", 0, s->intc.irqs[SCIF_TXI]);
+    qdev_connect_gpio_out_named(dev, "bri", 0, s->intc.irqs[SCIF_BRI]);
 
     tmu012_init(sysmem, 0x1fd80000,
-		TMU012_FEAT_TOCR | TMU012_FEAT_3CHAN | TMU012_FEAT_EXTCLK,
-		s->periph_freq,
-		s->intc.irqs[TMU0],
-		s->intc.irqs[TMU1],
-		s->intc.irqs[TMU2_TUNI],
-		s->intc.irqs[TMU2_TICPI]);
+                TMU012_FEAT_TOCR | TMU012_FEAT_3CHAN | TMU012_FEAT_EXTCLK,
+                s->periph_freq,
+                s->intc.irqs[TMU0],
+                s->intc.irqs[TMU1],
+                s->intc.irqs[TMU2_TUNI],
+                s->intc.irqs[TMU2_TICPI]);
 
     if (cpu->env.id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) {
         sh_intc_register_sources(&s->intc,
-				 _INTC_ARRAY(vectors_dma4),
-				 _INTC_ARRAY(groups_dma4));
+                                 _INTC_ARRAY(vectors_dma4),
+                                 _INTC_ARRAY(groups_dma4));
     }
 
     if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) {
         sh_intc_register_sources(&s->intc,
-				 _INTC_ARRAY(vectors_dma8),
-				 _INTC_ARRAY(groups_dma8));
+                                 _INTC_ARRAY(vectors_dma8),
+                                 _INTC_ARRAY(groups_dma8));
     }
 
     if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) {
         sh_intc_register_sources(&s->intc,
-				 _INTC_ARRAY(vectors_tmu34),
-				 NULL, 0);
+                                 _INTC_ARRAY(vectors_tmu34),
+                                 NULL, 0);
         tmu012_init(sysmem, 0x1e100000, 0, s->periph_freq,
-		    s->intc.irqs[TMU3],
-		    s->intc.irqs[TMU4],
-		    NULL, NULL);
+                    s->intc.irqs[TMU3],
+                    s->intc.irqs[TMU4],
+                    NULL, NULL);
     }
 
     if (cpu->env.id & (SH_CPU_SH7751_ALL)) {
         sh_intc_register_sources(&s->intc,
-				 _INTC_ARRAY(vectors_pci),
-				 _INTC_ARRAY(groups_pci));
+                                 _INTC_ARRAY(vectors_pci),
+                                 _INTC_ARRAY(groups_pci));
     }
 
     if (cpu->env.id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) {
         sh_intc_register_sources(&s->intc,
-				 _INTC_ARRAY(vectors_irlm),
-				 NULL, 0);
+                                 _INTC_ARRAY(vectors_irlm),
+                                 NULL, 0);
     }
 
     sh_intc_register_sources(&s->intc,
-				_INTC_ARRAY(vectors_irl),
-				_INTC_ARRAY(groups_irl));
+                                _INTC_ARRAY(vectors_irl),
+                                _INTC_ARRAY(groups_irl));
     return s;
 }
 
 qemu_irq sh7750_irl(SH7750State *s)
 {
-    sh_intc_toggle_source(sh_intc_source(&s->intc, IRL), 1, 0); /* enable */
-    return qemu_allocate_irq(sh_intc_set_irl, sh_intc_source(&s->intc, IRL), 0);
+    sh_intc_toggle_source(&s->intc.sources[IRL], 1, 0); /* enable */
+    return qemu_allocate_irq(sh_intc_set_irl, &s->intc.sources[IRL], 0);
 }
diff --git a/hw/sh4/sh7750_regnames.c b/hw/sh4/sh7750_regnames.c
index 0630fe3cf4a..e531d46a8ed 100644
--- a/hw/sh4/sh7750_regnames.c
+++ b/hw/sh4/sh7750_regnames.c
@@ -12,85 +12,87 @@ typedef struct {
 
 static regname_t regnames[] = {
     REGNAME(SH7750_PTEH_A7)
-	REGNAME(SH7750_PTEL_A7)
-	REGNAME(SH7750_PTEA_A7)
-	REGNAME(SH7750_TTB_A7)
-	REGNAME(SH7750_TEA_A7)
-	REGNAME(SH7750_MMUCR_A7)
-	REGNAME(SH7750_CCR_A7)
-	REGNAME(SH7750_QACR0_A7)
-	REGNAME(SH7750_QACR1_A7)
-	REGNAME(SH7750_TRA_A7)
-	REGNAME(SH7750_EXPEVT_A7)
-	REGNAME(SH7750_INTEVT_A7)
-	REGNAME(SH7750_STBCR_A7)
-	REGNAME(SH7750_STBCR2_A7)
-	REGNAME(SH7750_FRQCR_A7)
-	REGNAME(SH7750_WTCNT_A7)
-	REGNAME(SH7750_WTCSR_A7)
-	REGNAME(SH7750_R64CNT_A7)
-	REGNAME(SH7750_RSECCNT_A7)
-	REGNAME(SH7750_RMINCNT_A7)
-	REGNAME(SH7750_RHRCNT_A7)
-	REGNAME(SH7750_RWKCNT_A7)
-	REGNAME(SH7750_RDAYCNT_A7)
-	REGNAME(SH7750_RMONCNT_A7)
-	REGNAME(SH7750_RYRCNT_A7)
-	REGNAME(SH7750_RSECAR_A7)
-	REGNAME(SH7750_RMINAR_A7)
-	REGNAME(SH7750_RHRAR_A7)
-	REGNAME(SH7750_RWKAR_A7)
-	REGNAME(SH7750_RDAYAR_A7)
-	REGNAME(SH7750_RMONAR_A7)
-	REGNAME(SH7750_RCR1_A7)
-	REGNAME(SH7750_RCR2_A7)
-	REGNAME(SH7750_BCR1_A7)
-	REGNAME(SH7750_BCR2_A7)
-	REGNAME(SH7750_WCR1_A7)
-	REGNAME(SH7750_WCR2_A7)
-	REGNAME(SH7750_WCR3_A7)
-	REGNAME(SH7750_MCR_A7)
-	REGNAME(SH7750_PCR_A7)
-	REGNAME(SH7750_RTCSR_A7)
-	REGNAME(SH7750_RTCNT_A7)
-	REGNAME(SH7750_RTCOR_A7)
-	REGNAME(SH7750_RFCR_A7)
-	REGNAME(SH7750_SAR0_A7)
-	REGNAME(SH7750_SAR1_A7)
-	REGNAME(SH7750_SAR2_A7)
-	REGNAME(SH7750_SAR3_A7)
-	REGNAME(SH7750_DAR0_A7)
-	REGNAME(SH7750_DAR1_A7)
-	REGNAME(SH7750_DAR2_A7)
-	REGNAME(SH7750_DAR3_A7)
-	REGNAME(SH7750_DMATCR0_A7)
-	REGNAME(SH7750_DMATCR1_A7)
-	REGNAME(SH7750_DMATCR2_A7)
-	REGNAME(SH7750_DMATCR3_A7)
-	REGNAME(SH7750_CHCR0_A7)
-	REGNAME(SH7750_CHCR1_A7)
-	REGNAME(SH7750_CHCR2_A7)
-	REGNAME(SH7750_CHCR3_A7)
-	REGNAME(SH7750_DMAOR_A7)
-	REGNAME(SH7750_PCTRA_A7)
-	REGNAME(SH7750_PDTRA_A7)
-	REGNAME(SH7750_PCTRB_A7)
-	REGNAME(SH7750_PDTRB_A7)
-	REGNAME(SH7750_GPIOIC_A7)
-	REGNAME(SH7750_ICR_A7)
-	REGNAME(SH7750_BCR3_A7)
-	REGNAME(SH7750_BCR4_A7)
-	REGNAME(SH7750_SDMR2_A7)
-	REGNAME(SH7750_SDMR3_A7) {(uint32_t) - 1, NULL}
+    REGNAME(SH7750_PTEL_A7)
+    REGNAME(SH7750_PTEA_A7)
+    REGNAME(SH7750_TTB_A7)
+    REGNAME(SH7750_TEA_A7)
+    REGNAME(SH7750_MMUCR_A7)
+    REGNAME(SH7750_CCR_A7)
+    REGNAME(SH7750_QACR0_A7)
+    REGNAME(SH7750_QACR1_A7)
+    REGNAME(SH7750_TRA_A7)
+    REGNAME(SH7750_EXPEVT_A7)
+    REGNAME(SH7750_INTEVT_A7)
+    REGNAME(SH7750_STBCR_A7)
+    REGNAME(SH7750_STBCR2_A7)
+    REGNAME(SH7750_FRQCR_A7)
+    REGNAME(SH7750_WTCNT_A7)
+    REGNAME(SH7750_WTCSR_A7)
+    REGNAME(SH7750_R64CNT_A7)
+    REGNAME(SH7750_RSECCNT_A7)
+    REGNAME(SH7750_RMINCNT_A7)
+    REGNAME(SH7750_RHRCNT_A7)
+    REGNAME(SH7750_RWKCNT_A7)
+    REGNAME(SH7750_RDAYCNT_A7)
+    REGNAME(SH7750_RMONCNT_A7)
+    REGNAME(SH7750_RYRCNT_A7)
+    REGNAME(SH7750_RSECAR_A7)
+    REGNAME(SH7750_RMINAR_A7)
+    REGNAME(SH7750_RHRAR_A7)
+    REGNAME(SH7750_RWKAR_A7)
+    REGNAME(SH7750_RDAYAR_A7)
+    REGNAME(SH7750_RMONAR_A7)
+    REGNAME(SH7750_RCR1_A7)
+    REGNAME(SH7750_RCR2_A7)
+    REGNAME(SH7750_BCR1_A7)
+    REGNAME(SH7750_BCR2_A7)
+    REGNAME(SH7750_WCR1_A7)
+    REGNAME(SH7750_WCR2_A7)
+    REGNAME(SH7750_WCR3_A7)
+    REGNAME(SH7750_MCR_A7)
+    REGNAME(SH7750_PCR_A7)
+    REGNAME(SH7750_RTCSR_A7)
+    REGNAME(SH7750_RTCNT_A7)
+    REGNAME(SH7750_RTCOR_A7)
+    REGNAME(SH7750_RFCR_A7)
+    REGNAME(SH7750_SAR0_A7)
+    REGNAME(SH7750_SAR1_A7)
+    REGNAME(SH7750_SAR2_A7)
+    REGNAME(SH7750_SAR3_A7)
+    REGNAME(SH7750_DAR0_A7)
+    REGNAME(SH7750_DAR1_A7)
+    REGNAME(SH7750_DAR2_A7)
+    REGNAME(SH7750_DAR3_A7)
+    REGNAME(SH7750_DMATCR0_A7)
+    REGNAME(SH7750_DMATCR1_A7)
+    REGNAME(SH7750_DMATCR2_A7)
+    REGNAME(SH7750_DMATCR3_A7)
+    REGNAME(SH7750_CHCR0_A7)
+    REGNAME(SH7750_CHCR1_A7)
+    REGNAME(SH7750_CHCR2_A7)
+    REGNAME(SH7750_CHCR3_A7)
+    REGNAME(SH7750_DMAOR_A7)
+    REGNAME(SH7750_PCTRA_A7)
+    REGNAME(SH7750_PDTRA_A7)
+    REGNAME(SH7750_PCTRB_A7)
+    REGNAME(SH7750_PDTRB_A7)
+    REGNAME(SH7750_GPIOIC_A7)
+    REGNAME(SH7750_ICR_A7)
+    REGNAME(SH7750_BCR3_A7)
+    REGNAME(SH7750_BCR4_A7)
+    REGNAME(SH7750_SDMR2_A7)
+    REGNAME(SH7750_SDMR3_A7)
+    { (uint32_t)-1, NULL }
 };
 
 const char *regname(uint32_t addr)
 {
     unsigned int i;
 
-    for (i = 0; regnames[i].regaddr != (uint32_t) - 1; i++) {
-	if (regnames[i].regaddr == addr)
-	    return regnames[i].regname;
+    for (i = 0; regnames[i].regaddr != (uint32_t)-1; i++) {
+        if (regnames[i].regaddr == addr) {
+            return regnames[i].regname;
+        }
     }
 
     return "<unknown reg>";
diff --git a/hw/sh4/sh7750_regs.h b/hw/sh4/sh7750_regs.h
index ab073dadc74..beb571d5e9b 100644
--- a/hw/sh4/sh7750_regs.h
+++ b/hw/sh4/sh7750_regs.h
@@ -43,9 +43,8 @@
  * All register has 2 addresses: in 0xff000000 - 0xffffffff (P4 address)  and
  * in 0x1f000000 - 0x1fffffff (area 7 address)
  */
-#define SH7750_P4_BASE       0xff000000	/* Accessible only in
-					   privileged mode */
-#define SH7750_A7_BASE       0x1f000000	/* Accessible only using TLB */
+#define SH7750_P4_BASE       0xff000000 /* Accessible only in privileged mode */
+#define SH7750_A7_BASE       0x1f000000 /* Accessible only using TLB */
 
 #define SH7750_P4_REG32(ofs) (SH7750_P4_BASE + (ofs))
 #define SH7750_A7_REG32(ofs) (SH7750_A7_BASE + (ofs))
@@ -55,84 +54,84 @@
  */
 
 /* Page Table Entry High register - PTEH */
-#define SH7750_PTEH_REGOFS    0x000000	/* offset */
+#define SH7750_PTEH_REGOFS    0x000000  /* offset */
 #define SH7750_PTEH           SH7750_P4_REG32(SH7750_PTEH_REGOFS)
 #define SH7750_PTEH_A7        SH7750_A7_REG32(SH7750_PTEH_REGOFS)
-#define SH7750_PTEH_VPN       0xfffffd00	/* Virtual page number */
+#define SH7750_PTEH_VPN       0xfffffd00 /* Virtual page number */
 #define SH7750_PTEH_VPN_S     10
-#define SH7750_PTEH_ASID      0x000000ff	/* Address space identifier */
+#define SH7750_PTEH_ASID      0x000000ff /* Address space identifier */
 #define SH7750_PTEH_ASID_S    0
 
 /* Page Table Entry Low register - PTEL */
-#define SH7750_PTEL_REGOFS    0x000004	/* offset */
+#define SH7750_PTEL_REGOFS    0x000004  /* offset */
 #define SH7750_PTEL           SH7750_P4_REG32(SH7750_PTEL_REGOFS)
 #define SH7750_PTEL_A7        SH7750_A7_REG32(SH7750_PTEL_REGOFS)
-#define SH7750_PTEL_PPN       0x1ffffc00	/* Physical page number */
+#define SH7750_PTEL_PPN       0x1ffffc00 /* Physical page number */
 #define SH7750_PTEL_PPN_S     10
-#define SH7750_PTEL_V         0x00000100	/* Validity (0-entry is invalid) */
-#define SH7750_PTEL_SZ1       0x00000080	/* Page size bit 1 */
-#define SH7750_PTEL_SZ0       0x00000010	/* Page size bit 0 */
-#define SH7750_PTEL_SZ_1KB    0x00000000	/*   1-kbyte page */
-#define SH7750_PTEL_SZ_4KB    0x00000010	/*   4-kbyte page */
-#define SH7750_PTEL_SZ_64KB   0x00000080	/*   64-kbyte page */
-#define SH7750_PTEL_SZ_1MB    0x00000090	/*   1-Mbyte page */
-#define SH7750_PTEL_PR        0x00000060	/* Protection Key Data */
-#define SH7750_PTEL_PR_ROPO   0x00000000	/*   read-only in priv mode */
-#define SH7750_PTEL_PR_RWPO   0x00000020	/*   read-write in priv mode */
-#define SH7750_PTEL_PR_ROPU   0x00000040	/*   read-only in priv or user mode */
-#define SH7750_PTEL_PR_RWPU   0x00000060	/*   read-write in priv or user mode */
-#define SH7750_PTEL_C         0x00000008	/* Cacheability
-						   (0 - page not cacheable) */
-#define SH7750_PTEL_D         0x00000004	/* Dirty bit (1 - write has been
-						   performed to a page) */
-#define SH7750_PTEL_SH        0x00000002	/* Share Status bit (1 - page are
-						   shared by processes) */
-#define SH7750_PTEL_WT        0x00000001	/* Write-through bit, specifies the
-						   cache write mode:
-						   0 - Copy-back mode
-						   1 - Write-through mode */
+#define SH7750_PTEL_V         0x00000100 /* Validity (0-entry is invalid) */
+#define SH7750_PTEL_SZ1       0x00000080 /* Page size bit 1 */
+#define SH7750_PTEL_SZ0       0x00000010 /* Page size bit 0 */
+#define SH7750_PTEL_SZ_1KB    0x00000000 /*   1-kbyte page */
+#define SH7750_PTEL_SZ_4KB    0x00000010 /*   4-kbyte page */
+#define SH7750_PTEL_SZ_64KB   0x00000080 /*   64-kbyte page */
+#define SH7750_PTEL_SZ_1MB    0x00000090 /*   1-Mbyte page */
+#define SH7750_PTEL_PR        0x00000060 /* Protection Key Data */
+#define SH7750_PTEL_PR_ROPO   0x00000000 /*   read-only in priv mode */
+#define SH7750_PTEL_PR_RWPO   0x00000020 /*   read-write in priv mode */
+#define SH7750_PTEL_PR_ROPU   0x00000040 /*   read-only in priv or user mode */
+#define SH7750_PTEL_PR_RWPU   0x00000060 /*   read-write in priv or user mode */
+#define SH7750_PTEL_C         0x00000008 /* Cacheability */
+                                         /*   (0 - page not cacheable) */
+#define SH7750_PTEL_D         0x00000004 /* Dirty bit (1 - write has been */
+                                         /*   performed to a page) */
+#define SH7750_PTEL_SH        0x00000002 /* Share Status bit (1 - page are */
+                                         /*   shared by processes) */
+#define SH7750_PTEL_WT        0x00000001 /* Write-through bit, specifies the */
+                                         /*   cache write mode: */
+                                         /*     0 - Copy-back mode */
+                                         /*     1 - Write-through mode */
 
 /* Page Table Entry Assistance register - PTEA */
-#define SH7750_PTEA_REGOFS    0x000034	/* offset */
+#define SH7750_PTEA_REGOFS    0x000034 /* offset */
 #define SH7750_PTEA           SH7750_P4_REG32(SH7750_PTEA_REGOFS)
 #define SH7750_PTEA_A7        SH7750_A7_REG32(SH7750_PTEA_REGOFS)
-#define SH7750_PTEA_TC        0x00000008	/* Timing Control bit
-						   0 - use area 5 wait states
-						   1 - use area 6 wait states */
-#define SH7750_PTEA_SA        0x00000007	/* Space Attribute bits: */
-#define SH7750_PTEA_SA_UNDEF  0x00000000	/*    0 - undefined */
-#define SH7750_PTEA_SA_IOVAR  0x00000001	/*    1 - variable-size I/O space */
-#define SH7750_PTEA_SA_IO8    0x00000002	/*    2 - 8-bit I/O space */
-#define SH7750_PTEA_SA_IO16   0x00000003	/*    3 - 16-bit I/O space */
-#define SH7750_PTEA_SA_CMEM8  0x00000004	/*    4 - 8-bit common memory space */
-#define SH7750_PTEA_SA_CMEM16 0x00000005	/*    5 - 16-bit common memory space */
-#define SH7750_PTEA_SA_AMEM8  0x00000006	/*    6 - 8-bit attr memory space */
-#define SH7750_PTEA_SA_AMEM16 0x00000007	/*    7 - 16-bit attr memory space */
+#define SH7750_PTEA_TC        0x00000008 /* Timing Control bit */
+                                         /*   0 - use area 5 wait states */
+                                         /*   1 - use area 6 wait states */
+#define SH7750_PTEA_SA        0x00000007 /* Space Attribute bits: */
+#define SH7750_PTEA_SA_UNDEF  0x00000000 /*   0 - undefined */
+#define SH7750_PTEA_SA_IOVAR  0x00000001 /*   1 - variable-size I/O space */
+#define SH7750_PTEA_SA_IO8    0x00000002 /*   2 - 8-bit I/O space */
+#define SH7750_PTEA_SA_IO16   0x00000003 /*   3 - 16-bit I/O space */
+#define SH7750_PTEA_SA_CMEM8  0x00000004 /*   4 - 8-bit common memory space */
+#define SH7750_PTEA_SA_CMEM16 0x00000005 /*   5 - 16-bit common memory space */
+#define SH7750_PTEA_SA_AMEM8  0x00000006 /*   6 - 8-bit attr memory space */
+#define SH7750_PTEA_SA_AMEM16 0x00000007 /*   7 - 16-bit attr memory space */
 
 
 /* Translation table base register */
-#define SH7750_TTB_REGOFS     0x000008	/* offset */
+#define SH7750_TTB_REGOFS     0x000008 /* offset */
 #define SH7750_TTB            SH7750_P4_REG32(SH7750_TTB_REGOFS)
 #define SH7750_TTB_A7         SH7750_A7_REG32(SH7750_TTB_REGOFS)
 
 /* TLB exeption address register - TEA */
-#define SH7750_TEA_REGOFS     0x00000c	/* offset */
+#define SH7750_TEA_REGOFS     0x00000c /* offset */
 #define SH7750_TEA            SH7750_P4_REG32(SH7750_TEA_REGOFS)
 #define SH7750_TEA_A7         SH7750_A7_REG32(SH7750_TEA_REGOFS)
 
 /* MMU control register - MMUCR */
-#define SH7750_MMUCR_REGOFS   0x000010	/* offset */
+#define SH7750_MMUCR_REGOFS   0x000010 /* offset */
 #define SH7750_MMUCR          SH7750_P4_REG32(SH7750_MMUCR_REGOFS)
 #define SH7750_MMUCR_A7       SH7750_A7_REG32(SH7750_MMUCR_REGOFS)
-#define SH7750_MMUCR_AT       0x00000001	/* Address translation bit */
-#define SH7750_MMUCR_TI       0x00000004	/* TLB invalidate */
-#define SH7750_MMUCR_SV       0x00000100	/* Single Virtual Mode bit */
-#define SH7750_MMUCR_SQMD     0x00000200	/* Store Queue Mode bit */
-#define SH7750_MMUCR_URC      0x0000FC00	/* UTLB Replace Counter */
+#define SH7750_MMUCR_AT       0x00000001 /* Address translation bit */
+#define SH7750_MMUCR_TI       0x00000004 /* TLB invalidate */
+#define SH7750_MMUCR_SV       0x00000100 /* Single Virtual Mode bit */
+#define SH7750_MMUCR_SQMD     0x00000200 /* Store Queue Mode bit */
+#define SH7750_MMUCR_URC      0x0000FC00 /* UTLB Replace Counter */
 #define SH7750_MMUCR_URC_S    10
-#define SH7750_MMUCR_URB      0x00FC0000	/* UTLB Replace Boundary */
+#define SH7750_MMUCR_URB      0x00FC0000 /* UTLB Replace Boundary */
 #define SH7750_MMUCR_URB_S    18
-#define SH7750_MMUCR_LRUI     0xFC000000	/* Least Recently Used ITLB */
+#define SH7750_MMUCR_LRUI     0xFC000000 /* Least Recently Used ITLB */
 #define SH7750_MMUCR_LRUI_S   26
 
 
@@ -145,30 +144,30 @@
  */
 
 /* Cache Control Register - CCR */
-#define SH7750_CCR_REGOFS     0x00001c	/* offset */
+#define SH7750_CCR_REGOFS     0x00001c /* offset */
 #define SH7750_CCR            SH7750_P4_REG32(SH7750_CCR_REGOFS)
 #define SH7750_CCR_A7         SH7750_A7_REG32(SH7750_CCR_REGOFS)
 
-#define SH7750_CCR_IIX      0x00008000	/* IC index enable bit */
-#define SH7750_CCR_ICI      0x00000800	/* IC invalidation bit:
-					   set it to clear IC */
-#define SH7750_CCR_ICE      0x00000100	/* IC enable bit */
-#define SH7750_CCR_OIX      0x00000080	/* OC index enable bit */
-#define SH7750_CCR_ORA      0x00000020	/* OC RAM enable bit
-					   if you set OCE = 0,
-					   you should set ORA = 0 */
-#define SH7750_CCR_OCI      0x00000008	/* OC invalidation bit */
-#define SH7750_CCR_CB       0x00000004	/* Copy-back bit for P1 area */
-#define SH7750_CCR_WT       0x00000002	/* Write-through bit for P0,U0,P3 area */
-#define SH7750_CCR_OCE      0x00000001	/* OC enable bit */
+#define SH7750_CCR_IIX      0x00008000 /* IC index enable bit */
+#define SH7750_CCR_ICI      0x00000800 /* IC invalidation bit: */
+                                       /*  set it to clear IC */
+#define SH7750_CCR_ICE      0x00000100 /* IC enable bit */
+#define SH7750_CCR_OIX      0x00000080 /* OC index enable bit */
+#define SH7750_CCR_ORA      0x00000020 /* OC RAM enable bit */
+                                       /*  if you set OCE = 0, */
+                                       /*  you should set ORA = 0 */
+#define SH7750_CCR_OCI      0x00000008 /* OC invalidation bit */
+#define SH7750_CCR_CB       0x00000004 /* Copy-back bit for P1 area */
+#define SH7750_CCR_WT       0x00000002 /* Write-through bit for P0,U0,P3 area */
+#define SH7750_CCR_OCE      0x00000001 /* OC enable bit */
 
 /* Queue address control register 0 - QACR0 */
-#define SH7750_QACR0_REGOFS   0x000038	/* offset */
+#define SH7750_QACR0_REGOFS   0x000038  /* offset */
 #define SH7750_QACR0          SH7750_P4_REG32(SH7750_QACR0_REGOFS)
 #define SH7750_QACR0_A7       SH7750_A7_REG32(SH7750_QACR0_REGOFS)
 
 /* Queue address control register 1 - QACR1 */
-#define SH7750_QACR1_REGOFS   0x00003c	/* offset */
+#define SH7750_QACR1_REGOFS   0x00003c /* offset */
 #define SH7750_QACR1          SH7750_P4_REG32(SH7750_QACR1_REGOFS)
 #define SH7750_QACR1_A7       SH7750_A7_REG32(SH7750_QACR1_REGOFS)
 
@@ -178,11 +177,11 @@
  */
 
 /* Immediate data for TRAPA instruction - TRA */
-#define SH7750_TRA_REGOFS     0x000020	/* offset */
+#define SH7750_TRA_REGOFS     0x000020 /* offset */
 #define SH7750_TRA            SH7750_P4_REG32(SH7750_TRA_REGOFS)
 #define SH7750_TRA_A7         SH7750_A7_REG32(SH7750_TRA_REGOFS)
 
-#define SH7750_TRA_IMM      0x000003fd	/* Immediate data operand */
+#define SH7750_TRA_IMM      0x000003fd /* Immediate data operand */
 #define SH7750_TRA_IMM_S    2
 
 /* Exeption event register - EXPEVT */
@@ -190,14 +189,14 @@
 #define SH7750_EXPEVT         SH7750_P4_REG32(SH7750_EXPEVT_REGOFS)
 #define SH7750_EXPEVT_A7      SH7750_A7_REG32(SH7750_EXPEVT_REGOFS)
 
-#define SH7750_EXPEVT_EX      0x00000fff	/* Exeption code */
+#define SH7750_EXPEVT_EX      0x00000fff /* Exeption code */
 #define SH7750_EXPEVT_EX_S    0
 
 /* Interrupt event register */
 #define SH7750_INTEVT_REGOFS  0x000028
 #define SH7750_INTEVT         SH7750_P4_REG32(SH7750_INTEVT_REGOFS)
 #define SH7750_INTEVT_A7      SH7750_A7_REG32(SH7750_INTEVT_REGOFS)
-#define SH7750_INTEVT_EX    0x00000fff	/* Exeption code */
+#define SH7750_INTEVT_EX    0x00000fff /* Exeption code */
 #define SH7750_INTEVT_EX_S  0
 
 /*
@@ -206,683 +205,684 @@
 #define SH7750_EVT_TO_NUM(evt)  ((evt) >> 5)
 
 /* Reset exception category */
-#define SH7750_EVT_POWER_ON_RST        0x000	/* Power-on reset */
-#define SH7750_EVT_MANUAL_RST          0x020	/* Manual reset */
-#define SH7750_EVT_TLB_MULT_HIT        0x140	/* TLB multiple-hit exception */
+#define SH7750_EVT_POWER_ON_RST        0x000 /* Power-on reset */
+#define SH7750_EVT_MANUAL_RST          0x020 /* Manual reset */
+#define SH7750_EVT_TLB_MULT_HIT        0x140 /* TLB multiple-hit exception */
 
 /* General exception category */
-#define SH7750_EVT_USER_BREAK          0x1E0	/* User break */
-#define SH7750_EVT_IADDR_ERR           0x0E0	/* Instruction address error */
-#define SH7750_EVT_TLB_READ_MISS       0x040	/* ITLB miss exception /
-						   DTLB miss exception (read) */
-#define SH7750_EVT_TLB_READ_PROTV      0x0A0	/* ITLB protection violation /
-						   DTLB protection violation (read) */
-#define SH7750_EVT_ILLEGAL_INSTR       0x180	/* General Illegal Instruction
-						   exception */
-#define SH7750_EVT_SLOT_ILLEGAL_INSTR  0x1A0	/* Slot Illegal Instruction
-						   exception */
-#define SH7750_EVT_FPU_DISABLE         0x800	/* General FPU disable exception */
-#define SH7750_EVT_SLOT_FPU_DISABLE    0x820	/* Slot FPU disable exception */
-#define SH7750_EVT_DATA_READ_ERR       0x0E0	/* Data address error (read) */
-#define SH7750_EVT_DATA_WRITE_ERR      0x100	/* Data address error (write) */
-#define SH7750_EVT_DTLB_WRITE_MISS     0x060	/* DTLB miss exception (write) */
-#define SH7750_EVT_DTLB_WRITE_PROTV    0x0C0	/* DTLB protection violation
-						   exception (write) */
-#define SH7750_EVT_FPU_EXCEPTION       0x120	/* FPU exception */
-#define SH7750_EVT_INITIAL_PGWRITE     0x080	/* Initial Page Write exception */
-#define SH7750_EVT_TRAPA               0x160	/* Unconditional trap (TRAPA) */
+#define SH7750_EVT_USER_BREAK          0x1E0 /* User break */
+#define SH7750_EVT_IADDR_ERR           0x0E0 /* Instruction address error */
+#define SH7750_EVT_TLB_READ_MISS       0x040 /* ITLB miss exception / */
+                                             /*    DTLB miss exception (read) */
+#define SH7750_EVT_TLB_READ_PROTV      0x0A0 /* ITLB protection violation, */
+                                             /* DTLB protection violation */
+                                             /*    (read) */
+#define SH7750_EVT_ILLEGAL_INSTR       0x180 /* General Illegal Instruction */
+                                             /*    exception */
+#define SH7750_EVT_SLOT_ILLEGAL_INSTR  0x1A0 /* Slot Illegal Instruction */
+                                             /*    exception */
+#define SH7750_EVT_FPU_DISABLE         0x800 /* General FPU disable exception */
+#define SH7750_EVT_SLOT_FPU_DISABLE    0x820 /* Slot FPU disable exception */
+#define SH7750_EVT_DATA_READ_ERR       0x0E0 /* Data address error (read) */
+#define SH7750_EVT_DATA_WRITE_ERR      0x100 /* Data address error (write) */
+#define SH7750_EVT_DTLB_WRITE_MISS     0x060 /* DTLB miss exception (write) */
+#define SH7750_EVT_DTLB_WRITE_PROTV    0x0C0 /* DTLB protection violation */
+                                             /*    exception (write) */
+#define SH7750_EVT_FPU_EXCEPTION       0x120 /* FPU exception */
+#define SH7750_EVT_INITIAL_PGWRITE     0x080 /* Initial Page Write exception */
+#define SH7750_EVT_TRAPA               0x160 /* Unconditional trap (TRAPA) */
 
 /* Interrupt exception category */
-#define SH7750_EVT_NMI                 0x1C0	/* Non-maskable interrupt */
-#define SH7750_EVT_IRQ0                0x200	/* External Interrupt 0 */
-#define SH7750_EVT_IRQ1                0x220	/* External Interrupt 1 */
-#define SH7750_EVT_IRQ2                0x240	/* External Interrupt 2 */
-#define SH7750_EVT_IRQ3                0x260	/* External Interrupt 3 */
-#define SH7750_EVT_IRQ4                0x280	/* External Interrupt 4 */
-#define SH7750_EVT_IRQ5                0x2A0	/* External Interrupt 5 */
-#define SH7750_EVT_IRQ6                0x2C0	/* External Interrupt 6 */
-#define SH7750_EVT_IRQ7                0x2E0	/* External Interrupt 7 */
-#define SH7750_EVT_IRQ8                0x300	/* External Interrupt 8 */
-#define SH7750_EVT_IRQ9                0x320	/* External Interrupt 9 */
-#define SH7750_EVT_IRQA                0x340	/* External Interrupt A */
-#define SH7750_EVT_IRQB                0x360	/* External Interrupt B */
-#define SH7750_EVT_IRQC                0x380	/* External Interrupt C */
-#define SH7750_EVT_IRQD                0x3A0	/* External Interrupt D */
-#define SH7750_EVT_IRQE                0x3C0	/* External Interrupt E */
+#define SH7750_EVT_NMI                 0x1C0 /* Non-maskable interrupt */
+#define SH7750_EVT_IRQ0                0x200 /* External Interrupt 0 */
+#define SH7750_EVT_IRQ1                0x220 /* External Interrupt 1 */
+#define SH7750_EVT_IRQ2                0x240 /* External Interrupt 2 */
+#define SH7750_EVT_IRQ3                0x260 /* External Interrupt 3 */
+#define SH7750_EVT_IRQ4                0x280 /* External Interrupt 4 */
+#define SH7750_EVT_IRQ5                0x2A0 /* External Interrupt 5 */
+#define SH7750_EVT_IRQ6                0x2C0 /* External Interrupt 6 */
+#define SH7750_EVT_IRQ7                0x2E0 /* External Interrupt 7 */
+#define SH7750_EVT_IRQ8                0x300 /* External Interrupt 8 */
+#define SH7750_EVT_IRQ9                0x320 /* External Interrupt 9 */
+#define SH7750_EVT_IRQA                0x340 /* External Interrupt A */
+#define SH7750_EVT_IRQB                0x360 /* External Interrupt B */
+#define SH7750_EVT_IRQC                0x380 /* External Interrupt C */
+#define SH7750_EVT_IRQD                0x3A0 /* External Interrupt D */
+#define SH7750_EVT_IRQE                0x3C0 /* External Interrupt E */
 
 /* Peripheral Module Interrupts - Timer Unit (TMU) */
-#define SH7750_EVT_TUNI0               0x400	/* TMU Underflow Interrupt 0 */
-#define SH7750_EVT_TUNI1               0x420	/* TMU Underflow Interrupt 1 */
-#define SH7750_EVT_TUNI2               0x440	/* TMU Underflow Interrupt 2 */
-#define SH7750_EVT_TICPI2              0x460	/* TMU Input Capture Interrupt 2 */
+#define SH7750_EVT_TUNI0               0x400 /* TMU Underflow Interrupt 0 */
+#define SH7750_EVT_TUNI1               0x420 /* TMU Underflow Interrupt 1 */
+#define SH7750_EVT_TUNI2               0x440 /* TMU Underflow Interrupt 2 */
+#define SH7750_EVT_TICPI2              0x460 /* TMU Input Capture Interrupt 2 */
 
 /* Peripheral Module Interrupts - Real-Time Clock (RTC) */
-#define SH7750_EVT_RTC_ATI             0x480	/* Alarm Interrupt Request */
-#define SH7750_EVT_RTC_PRI             0x4A0	/* Periodic Interrupt Request */
-#define SH7750_EVT_RTC_CUI             0x4C0	/* Carry Interrupt Request */
+#define SH7750_EVT_RTC_ATI             0x480 /* Alarm Interrupt Request */
+#define SH7750_EVT_RTC_PRI             0x4A0 /* Periodic Interrupt Request */
+#define SH7750_EVT_RTC_CUI             0x4C0 /* Carry Interrupt Request */
 
 /* Peripheral Module Interrupts - Serial Communication Interface (SCI) */
-#define SH7750_EVT_SCI_ERI             0x4E0	/* Receive Error */
-#define SH7750_EVT_SCI_RXI             0x500	/* Receive Data Register Full */
-#define SH7750_EVT_SCI_TXI             0x520	/* Transmit Data Register Empty */
-#define SH7750_EVT_SCI_TEI             0x540	/* Transmit End */
+#define SH7750_EVT_SCI_ERI             0x4E0 /* Receive Error */
+#define SH7750_EVT_SCI_RXI             0x500 /* Receive Data Register Full */
+#define SH7750_EVT_SCI_TXI             0x520 /* Transmit Data Register Empty */
+#define SH7750_EVT_SCI_TEI             0x540 /* Transmit End */
 
 /* Peripheral Module Interrupts - Watchdog Timer (WDT) */
-#define SH7750_EVT_WDT_ITI             0x560	/* Interval Timer Interrupt
-						   (used when WDT operates in
-						   interval timer mode) */
+#define SH7750_EVT_WDT_ITI             0x560 /* Interval Timer Interrupt */
+                                             /*    (used when WDT operates in */
+                                             /*    interval timer mode) */
 
 /* Peripheral Module Interrupts - Memory Refresh Unit (REF) */
-#define SH7750_EVT_REF_RCMI            0x580	/* Compare-match Interrupt */
-#define SH7750_EVT_REF_ROVI            0x5A0	/* Refresh Counter Overflow
-						   interrupt */
+#define SH7750_EVT_REF_RCMI            0x580 /* Compare-match Interrupt */
+#define SH7750_EVT_REF_ROVI            0x5A0 /* Refresh Counter Overflow */
+                                             /*    interrupt */
 
 /* Peripheral Module Interrupts - Hitachi User Debug Interface (H-UDI) */
-#define SH7750_EVT_HUDI                0x600	/* UDI interrupt */
+#define SH7750_EVT_HUDI                0x600 /* UDI interrupt */
 
 /* Peripheral Module Interrupts - General-Purpose I/O (GPIO) */
-#define SH7750_EVT_GPIO                0x620	/* GPIO Interrupt */
+#define SH7750_EVT_GPIO                0x620 /* GPIO Interrupt */
 
 /* Peripheral Module Interrupts - DMA Controller (DMAC) */
-#define SH7750_EVT_DMAC_DMTE0          0x640	/* DMAC 0 Transfer End Interrupt */
-#define SH7750_EVT_DMAC_DMTE1          0x660	/* DMAC 1 Transfer End Interrupt */
-#define SH7750_EVT_DMAC_DMTE2          0x680	/* DMAC 2 Transfer End Interrupt */
-#define SH7750_EVT_DMAC_DMTE3          0x6A0	/* DMAC 3 Transfer End Interrupt */
-#define SH7750_EVT_DMAC_DMAE           0x6C0	/* DMAC Address Error Interrupt */
-
-/* Peripheral Module Interrupts - Serial Communication Interface with FIFO */
-/*                                                                  (SCIF) */
-#define SH7750_EVT_SCIF_ERI            0x700	/* Receive Error */
-#define SH7750_EVT_SCIF_RXI            0x720	/* Receive FIFO Data Full or
-						   Receive Data ready interrupt */
-#define SH7750_EVT_SCIF_BRI            0x740	/* Break or overrun error */
-#define SH7750_EVT_SCIF_TXI            0x760	/* Transmit FIFO Data Empty */
+#define SH7750_EVT_DMAC_DMTE0          0x640 /* DMAC 0 Transfer End Interrupt */
+#define SH7750_EVT_DMAC_DMTE1          0x660 /* DMAC 1 Transfer End Interrupt */
+#define SH7750_EVT_DMAC_DMTE2          0x680 /* DMAC 2 Transfer End Interrupt */
+#define SH7750_EVT_DMAC_DMTE3          0x6A0 /* DMAC 3 Transfer End Interrupt */
+#define SH7750_EVT_DMAC_DMAE           0x6C0 /* DMAC Address Error Interrupt */
+
+/* Peripheral Module Interrupts Serial Communication Interface w/ FIFO (SCIF) */
+#define SH7750_EVT_SCIF_ERI            0x700 /* Receive Error */
+#define SH7750_EVT_SCIF_RXI            0x720 /* Receive FIFO Data Full or */
+                                             /* Receive Data ready interrupt */
+#define SH7750_EVT_SCIF_BRI            0x740 /* Break or overrun error */
+#define SH7750_EVT_SCIF_TXI            0x760 /* Transmit FIFO Data Empty */
 
 /*
  * Power Management
  */
-#define SH7750_STBCR_REGOFS   0xC00004	/* offset */
+#define SH7750_STBCR_REGOFS   0xC00004 /* offset */
 #define SH7750_STBCR          SH7750_P4_REG32(SH7750_STBCR_REGOFS)
 #define SH7750_STBCR_A7       SH7750_A7_REG32(SH7750_STBCR_REGOFS)
 
-#define SH7750_STBCR_STBY     0x80	/* Specifies a transition to standby mode:
-					   0 - Transition to SLEEP mode on SLEEP
-					   1 - Transition to STANDBY mode on SLEEP */
-#define SH7750_STBCR_PHZ      0x40	/* State of peripheral module pins in
-					   standby mode:
-					   0 - normal state
-					   1 - high-impendance state */
+#define SH7750_STBCR_STBY     0x80 /* Specifies a transition to standby mode: */
+                                   /*   0 Transition to SLEEP mode on SLEEP */
+                                   /*   1 Transition to STANDBY mode on SLEEP */
+#define SH7750_STBCR_PHZ      0x40 /* State of peripheral module pins in */
+                                   /* standby mode: */
+                                   /*   0 normal state */
+                                   /*   1 high-impendance state */
 
-#define SH7750_STBCR_PPU      0x20	/* Peripheral module pins pull-up controls */
-#define SH7750_STBCR_MSTP4    0x10	/* Stopping the clock supply to DMAC */
+#define SH7750_STBCR_PPU      0x20 /* Peripheral module pins pull-up controls */
+#define SH7750_STBCR_MSTP4    0x10 /* Stopping the clock supply to DMAC */
 #define SH7750_STBCR_DMAC_STP SH7750_STBCR_MSTP4
-#define SH7750_STBCR_MSTP3    0x08	/* Stopping the clock supply to SCIF */
+#define SH7750_STBCR_MSTP3    0x08 /* Stopping the clock supply to SCIF */
 #define SH7750_STBCR_SCIF_STP SH7750_STBCR_MSTP3
-#define SH7750_STBCR_MSTP2    0x04	/* Stopping the clock supply to TMU */
+#define SH7750_STBCR_MSTP2    0x04 /* Stopping the clock supply to TMU */
 #define SH7750_STBCR_TMU_STP  SH7750_STBCR_MSTP2
-#define SH7750_STBCR_MSTP1    0x02	/* Stopping the clock supply to RTC */
+#define SH7750_STBCR_MSTP1    0x02 /* Stopping the clock supply to RTC */
 #define SH7750_STBCR_RTC_STP  SH7750_STBCR_MSTP1
-#define SH7750_STBCR_MSPT0    0x01	/* Stopping the clock supply to SCI */
+#define SH7750_STBCR_MSPT0    0x01 /* Stopping the clock supply to SCI */
 #define SH7750_STBCR_SCI_STP  SH7750_STBCR_MSTP0
 
 #define SH7750_STBCR_STBY     0x80
 
 
-#define SH7750_STBCR2_REGOFS  0xC00010	/* offset */
+#define SH7750_STBCR2_REGOFS  0xC00010 /* offset */
 #define SH7750_STBCR2         SH7750_P4_REG32(SH7750_STBCR2_REGOFS)
 #define SH7750_STBCR2_A7      SH7750_A7_REG32(SH7750_STBCR2_REGOFS)
 
-#define SH7750_STBCR2_DSLP    0x80	/* Specifies transition to deep sleep mode:
-					   0 - transition to sleep or standby mode
-					   as it is specified in STBY bit
-					   1 - transition to deep sleep mode on
-					   execution of SLEEP instruction */
-#define SH7750_STBCR2_MSTP6   0x02	/* Stopping the clock supply to Store Queue
-					   in the cache controller */
+#define SH7750_STBCR2_DSLP    0x80 /* Specifies transition to deep sleep mode */
+                                   /*   0 transition to sleep or standby mode */
+                                   /*     as it is specified in STBY bit */
+                                   /*   1 transition to deep sleep mode on */
+                                   /*     execution of SLEEP instruction */
+#define SH7750_STBCR2_MSTP6   0x02 /* Stopping the clock supply to the */
+                                   /*   Store Queue in the cache controller */
 #define SH7750_STBCR2_SQ_STP  SH7750_STBCR2_MSTP6
-#define SH7750_STBCR2_MSTP5   0x01	/* Stopping the clock supply to the User
-					   Break Controller (UBC) */
+#define SH7750_STBCR2_MSTP5   0x01 /* Stopping the clock supply to the  */
+                                   /*   User Break Controller (UBC) */
 #define SH7750_STBCR2_UBC_STP SH7750_STBCR2_MSTP5
 
 /*
  * Clock Pulse Generator (CPG)
  */
-#define SH7750_FRQCR_REGOFS   0xC00000	/* offset */
+#define SH7750_FRQCR_REGOFS   0xC00000 /* offset */
 #define SH7750_FRQCR          SH7750_P4_REG32(SH7750_FRQCR_REGOFS)
 #define SH7750_FRQCR_A7       SH7750_A7_REG32(SH7750_FRQCR_REGOFS)
 
-#define SH7750_FRQCR_CKOEN    0x0800	/* Clock Output Enable
-					   0 - CKIO pin goes to HiZ/pullup
-					   1 - Clock is output from CKIO */
-#define SH7750_FRQCR_PLL1EN   0x0400	/* PLL circuit 1 enable */
-#define SH7750_FRQCR_PLL2EN   0x0200	/* PLL circuit 2 enable */
-
-#define SH7750_FRQCR_IFC      0x01C0	/* CPU clock frequency division ratio: */
-#define SH7750_FRQCR_IFCDIV1  0x0000	/*    0 - * 1 */
-#define SH7750_FRQCR_IFCDIV2  0x0040	/*    1 - * 1/2 */
-#define SH7750_FRQCR_IFCDIV3  0x0080	/*    2 - * 1/3 */
-#define SH7750_FRQCR_IFCDIV4  0x00C0	/*    3 - * 1/4 */
-#define SH7750_FRQCR_IFCDIV6  0x0100	/*    4 - * 1/6 */
-#define SH7750_FRQCR_IFCDIV8  0x0140	/*    5 - * 1/8 */
-
-#define SH7750_FRQCR_BFC      0x0038	/* Bus clock frequency division ratio: */
-#define SH7750_FRQCR_BFCDIV1  0x0000	/*    0 - * 1 */
-#define SH7750_FRQCR_BFCDIV2  0x0008	/*    1 - * 1/2 */
-#define SH7750_FRQCR_BFCDIV3  0x0010	/*    2 - * 1/3 */
-#define SH7750_FRQCR_BFCDIV4  0x0018	/*    3 - * 1/4 */
-#define SH7750_FRQCR_BFCDIV6  0x0020	/*    4 - * 1/6 */
-#define SH7750_FRQCR_BFCDIV8  0x0028	/*    5 - * 1/8 */
-
-#define SH7750_FRQCR_PFC      0x0007	/* Peripheral module clock frequency
-					   division ratio: */
-#define SH7750_FRQCR_PFCDIV2  0x0000	/*    0 - * 1/2 */
-#define SH7750_FRQCR_PFCDIV3  0x0001	/*    1 - * 1/3 */
-#define SH7750_FRQCR_PFCDIV4  0x0002	/*    2 - * 1/4 */
-#define SH7750_FRQCR_PFCDIV6  0x0003	/*    3 - * 1/6 */
-#define SH7750_FRQCR_PFCDIV8  0x0004	/*    4 - * 1/8 */
+#define SH7750_FRQCR_CKOEN    0x0800 /* Clock Output Enable */
+                                     /*    0 - CKIO pin goes to HiZ/pullup */
+                                     /*    1 - Clock is output from CKIO */
+#define SH7750_FRQCR_PLL1EN   0x0400 /* PLL circuit 1 enable */
+#define SH7750_FRQCR_PLL2EN   0x0200 /* PLL circuit 2 enable */
+
+#define SH7750_FRQCR_IFC      0x01C0 /* CPU clock frequency division ratio: */
+#define SH7750_FRQCR_IFCDIV1  0x0000 /*    0 - * 1 */
+#define SH7750_FRQCR_IFCDIV2  0x0040 /*    1 - * 1/2 */
+#define SH7750_FRQCR_IFCDIV3  0x0080 /*    2 - * 1/3 */
+#define SH7750_FRQCR_IFCDIV4  0x00C0 /*    3 - * 1/4 */
+#define SH7750_FRQCR_IFCDIV6  0x0100 /*    4 - * 1/6 */
+#define SH7750_FRQCR_IFCDIV8  0x0140 /*    5 - * 1/8 */
+
+#define SH7750_FRQCR_BFC      0x0038 /* Bus clock frequency division ratio: */
+#define SH7750_FRQCR_BFCDIV1  0x0000 /*    0 - * 1 */
+#define SH7750_FRQCR_BFCDIV2  0x0008 /*    1 - * 1/2 */
+#define SH7750_FRQCR_BFCDIV3  0x0010 /*    2 - * 1/3 */
+#define SH7750_FRQCR_BFCDIV4  0x0018 /*    3 - * 1/4 */
+#define SH7750_FRQCR_BFCDIV6  0x0020 /*    4 - * 1/6 */
+#define SH7750_FRQCR_BFCDIV8  0x0028 /*    5 - * 1/8 */
+
+#define SH7750_FRQCR_PFC      0x0007 /* Peripheral module clock frequency */
+                                     /*    division ratio: */
+#define SH7750_FRQCR_PFCDIV2  0x0000 /*    0 - * 1/2 */
+#define SH7750_FRQCR_PFCDIV3  0x0001 /*    1 - * 1/3 */
+#define SH7750_FRQCR_PFCDIV4  0x0002 /*    2 - * 1/4 */
+#define SH7750_FRQCR_PFCDIV6  0x0003 /*    3 - * 1/6 */
+#define SH7750_FRQCR_PFCDIV8  0x0004 /*    4 - * 1/8 */
 
 /*
  * Watchdog Timer (WDT)
  */
 
 /* Watchdog Timer Counter register - WTCNT */
-#define SH7750_WTCNT_REGOFS   0xC00008	/* offset */
+#define SH7750_WTCNT_REGOFS   0xC00008 /* offset */
 #define SH7750_WTCNT          SH7750_P4_REG32(SH7750_WTCNT_REGOFS)
 #define SH7750_WTCNT_A7       SH7750_A7_REG32(SH7750_WTCNT_REGOFS)
-#define SH7750_WTCNT_KEY      0x5A00	/* When WTCNT byte register written,
-					   you have to set the upper byte to
-					   0x5A */
+#define SH7750_WTCNT_KEY      0x5A00 /* When WTCNT byte register written, you */
+                                     /* have to set the upper byte to 0x5A */
 
 /* Watchdog Timer Control/Status register - WTCSR */
-#define SH7750_WTCSR_REGOFS   0xC0000C	/* offset */
+#define SH7750_WTCSR_REGOFS   0xC0000C /* offset */
 #define SH7750_WTCSR          SH7750_P4_REG32(SH7750_WTCSR_REGOFS)
 #define SH7750_WTCSR_A7       SH7750_A7_REG32(SH7750_WTCSR_REGOFS)
-#define SH7750_WTCSR_KEY      0xA500	/* When WTCSR byte register written,
-					   you have to set the upper byte to
-					   0xA5 */
-#define SH7750_WTCSR_TME      0x80	/* Timer enable (1-upcount start) */
-#define SH7750_WTCSR_MODE     0x40	/* Timer Mode Select: */
-#define SH7750_WTCSR_MODE_WT  0x40	/*    Watchdog Timer Mode */
-#define SH7750_WTCSR_MODE_IT  0x00	/*    Interval Timer Mode */
-#define SH7750_WTCSR_RSTS     0x20	/* Reset Select: */
-#define SH7750_WTCSR_RST_MAN  0x20	/*    Manual Reset */
-#define SH7750_WTCSR_RST_PWR  0x00	/*    Power-on Reset */
-#define SH7750_WTCSR_WOVF     0x10	/* Watchdog Timer Overflow Flag */
-#define SH7750_WTCSR_IOVF     0x08	/* Interval Timer Overflow Flag */
-#define SH7750_WTCSR_CKS      0x07	/* Clock Select: */
-#define SH7750_WTCSR_CKS_DIV32   0x00	/*   1/32 of frequency divider 2 input */
-#define SH7750_WTCSR_CKS_DIV64   0x01	/*   1/64 */
-#define SH7750_WTCSR_CKS_DIV128  0x02	/*   1/128 */
-#define SH7750_WTCSR_CKS_DIV256  0x03	/*   1/256 */
-#define SH7750_WTCSR_CKS_DIV512  0x04	/*   1/512 */
-#define SH7750_WTCSR_CKS_DIV1024 0x05	/*   1/1024 */
-#define SH7750_WTCSR_CKS_DIV2048 0x06	/*   1/2048 */
-#define SH7750_WTCSR_CKS_DIV4096 0x07	/*   1/4096 */
+#define SH7750_WTCSR_KEY      0xA500 /* When WTCSR byte register written, you */
+                                     /* have to set the upper byte to 0xA5 */
+#define SH7750_WTCSR_TME      0x80 /* Timer enable (1-upcount start) */
+#define SH7750_WTCSR_MODE     0x40 /* Timer Mode Select: */
+#define SH7750_WTCSR_MODE_WT  0x40 /*    Watchdog Timer Mode */
+#define SH7750_WTCSR_MODE_IT  0x00 /*    Interval Timer Mode */
+#define SH7750_WTCSR_RSTS     0x20 /* Reset Select: */
+#define SH7750_WTCSR_RST_MAN  0x20 /*    Manual Reset */
+#define SH7750_WTCSR_RST_PWR  0x00 /*    Power-on Reset */
+#define SH7750_WTCSR_WOVF     0x10 /* Watchdog Timer Overflow Flag */
+#define SH7750_WTCSR_IOVF     0x08 /* Interval Timer Overflow Flag */
+#define SH7750_WTCSR_CKS      0x07 /* Clock Select: */
+#define SH7750_WTCSR_CKS_DIV32   0x00 /*   1/32 of frequency divider 2 input */
+#define SH7750_WTCSR_CKS_DIV64   0x01 /*   1/64 */
+#define SH7750_WTCSR_CKS_DIV128  0x02 /*   1/128 */
+#define SH7750_WTCSR_CKS_DIV256  0x03 /*   1/256 */
+#define SH7750_WTCSR_CKS_DIV512  0x04 /*   1/512 */
+#define SH7750_WTCSR_CKS_DIV1024 0x05 /*   1/1024 */
+#define SH7750_WTCSR_CKS_DIV2048 0x06 /*   1/2048 */
+#define SH7750_WTCSR_CKS_DIV4096 0x07 /*   1/4096 */
 
 /*
  * Real-Time Clock (RTC)
  */
 /* 64-Hz Counter Register (byte, read-only) - R64CNT */
-#define SH7750_R64CNT_REGOFS  0xC80000	/* offset */
+#define SH7750_R64CNT_REGOFS  0xC80000 /* offset */
 #define SH7750_R64CNT         SH7750_P4_REG32(SH7750_R64CNT_REGOFS)
 #define SH7750_R64CNT_A7      SH7750_A7_REG32(SH7750_R64CNT_REGOFS)
 
 /* Second Counter Register (byte, BCD-coded) - RSECCNT */
-#define SH7750_RSECCNT_REGOFS 0xC80004	/* offset */
+#define SH7750_RSECCNT_REGOFS 0xC80004 /* offset */
 #define SH7750_RSECCNT        SH7750_P4_REG32(SH7750_RSECCNT_REGOFS)
 #define SH7750_RSECCNT_A7     SH7750_A7_REG32(SH7750_RSECCNT_REGOFS)
 
 /* Minute Counter Register (byte, BCD-coded) - RMINCNT */
-#define SH7750_RMINCNT_REGOFS 0xC80008	/* offset */
+#define SH7750_RMINCNT_REGOFS 0xC80008 /* offset */
 #define SH7750_RMINCNT        SH7750_P4_REG32(SH7750_RMINCNT_REGOFS)
 #define SH7750_RMINCNT_A7     SH7750_A7_REG32(SH7750_RMINCNT_REGOFS)
 
 /* Hour Counter Register (byte, BCD-coded) - RHRCNT */
-#define SH7750_RHRCNT_REGOFS  0xC8000C	/* offset */
+#define SH7750_RHRCNT_REGOFS  0xC8000C /* offset */
 #define SH7750_RHRCNT         SH7750_P4_REG32(SH7750_RHRCNT_REGOFS)
 #define SH7750_RHRCNT_A7      SH7750_A7_REG32(SH7750_RHRCNT_REGOFS)
 
 /* Day-of-Week Counter Register (byte) - RWKCNT */
-#define SH7750_RWKCNT_REGOFS  0xC80010	/* offset */
+#define SH7750_RWKCNT_REGOFS  0xC80010 /* offset */
 #define SH7750_RWKCNT         SH7750_P4_REG32(SH7750_RWKCNT_REGOFS)
 #define SH7750_RWKCNT_A7      SH7750_A7_REG32(SH7750_RWKCNT_REGOFS)
 
-#define SH7750_RWKCNT_SUN     0	/* Sunday */
-#define SH7750_RWKCNT_MON     1	/* Monday */
-#define SH7750_RWKCNT_TUE     2	/* Tuesday */
-#define SH7750_RWKCNT_WED     3	/* Wednesday */
-#define SH7750_RWKCNT_THU     4	/* Thursday */
-#define SH7750_RWKCNT_FRI     5	/* Friday */
-#define SH7750_RWKCNT_SAT     6	/* Saturday */
+#define SH7750_RWKCNT_SUN     0 /* Sunday */
+#define SH7750_RWKCNT_MON     1 /* Monday */
+#define SH7750_RWKCNT_TUE     2 /* Tuesday */
+#define SH7750_RWKCNT_WED     3 /* Wednesday */
+#define SH7750_RWKCNT_THU     4 /* Thursday */
+#define SH7750_RWKCNT_FRI     5 /* Friday */
+#define SH7750_RWKCNT_SAT     6 /* Saturday */
 
 /* Day Counter Register (byte, BCD-coded) - RDAYCNT */
-#define SH7750_RDAYCNT_REGOFS 0xC80014	/* offset */
+#define SH7750_RDAYCNT_REGOFS 0xC80014 /* offset */
 #define SH7750_RDAYCNT        SH7750_P4_REG32(SH7750_RDAYCNT_REGOFS)
 #define SH7750_RDAYCNT_A7     SH7750_A7_REG32(SH7750_RDAYCNT_REGOFS)
 
 /* Month Counter Register (byte, BCD-coded) - RMONCNT */
-#define SH7750_RMONCNT_REGOFS 0xC80018	/* offset */
+#define SH7750_RMONCNT_REGOFS 0xC80018 /* offset */
 #define SH7750_RMONCNT        SH7750_P4_REG32(SH7750_RMONCNT_REGOFS)
 #define SH7750_RMONCNT_A7     SH7750_A7_REG32(SH7750_RMONCNT_REGOFS)
 
 /* Year Counter Register (half, BCD-coded) - RYRCNT */
-#define SH7750_RYRCNT_REGOFS  0xC8001C	/* offset */
+#define SH7750_RYRCNT_REGOFS  0xC8001C /* offset */
 #define SH7750_RYRCNT         SH7750_P4_REG32(SH7750_RYRCNT_REGOFS)
 #define SH7750_RYRCNT_A7      SH7750_A7_REG32(SH7750_RYRCNT_REGOFS)
 
 /* Second Alarm Register (byte, BCD-coded) - RSECAR */
-#define SH7750_RSECAR_REGOFS  0xC80020	/* offset */
+#define SH7750_RSECAR_REGOFS  0xC80020 /* offset */
 #define SH7750_RSECAR         SH7750_P4_REG32(SH7750_RSECAR_REGOFS)
 #define SH7750_RSECAR_A7      SH7750_A7_REG32(SH7750_RSECAR_REGOFS)
-#define SH7750_RSECAR_ENB     0x80	/* Second Alarm Enable */
+#define SH7750_RSECAR_ENB     0x80 /* Second Alarm Enable */
 
 /* Minute Alarm Register (byte, BCD-coded) - RMINAR */
-#define SH7750_RMINAR_REGOFS  0xC80024	/* offset */
+#define SH7750_RMINAR_REGOFS  0xC80024 /* offset */
 #define SH7750_RMINAR         SH7750_P4_REG32(SH7750_RMINAR_REGOFS)
 #define SH7750_RMINAR_A7      SH7750_A7_REG32(SH7750_RMINAR_REGOFS)
-#define SH7750_RMINAR_ENB     0x80	/* Minute Alarm Enable */
+#define SH7750_RMINAR_ENB     0x80 /* Minute Alarm Enable */
 
 /* Hour Alarm Register (byte, BCD-coded) - RHRAR */
-#define SH7750_RHRAR_REGOFS   0xC80028	/* offset */
+#define SH7750_RHRAR_REGOFS   0xC80028 /* offset */
 #define SH7750_RHRAR          SH7750_P4_REG32(SH7750_RHRAR_REGOFS)
 #define SH7750_RHRAR_A7       SH7750_A7_REG32(SH7750_RHRAR_REGOFS)
-#define SH7750_RHRAR_ENB      0x80	/* Hour Alarm Enable */
+#define SH7750_RHRAR_ENB      0x80 /* Hour Alarm Enable */
 
 /* Day-of-Week Alarm Register (byte) - RWKAR */
-#define SH7750_RWKAR_REGOFS   0xC8002C	/* offset */
+#define SH7750_RWKAR_REGOFS   0xC8002C /* offset */
 #define SH7750_RWKAR          SH7750_P4_REG32(SH7750_RWKAR_REGOFS)
 #define SH7750_RWKAR_A7       SH7750_A7_REG32(SH7750_RWKAR_REGOFS)
-#define SH7750_RWKAR_ENB      0x80	/* Day-of-week Alarm Enable */
+#define SH7750_RWKAR_ENB      0x80 /* Day-of-week Alarm Enable */
 
-#define SH7750_RWKAR_SUN      0	/* Sunday */
-#define SH7750_RWKAR_MON      1	/* Monday */
-#define SH7750_RWKAR_TUE      2	/* Tuesday */
-#define SH7750_RWKAR_WED      3	/* Wednesday */
-#define SH7750_RWKAR_THU      4	/* Thursday */
-#define SH7750_RWKAR_FRI      5	/* Friday */
-#define SH7750_RWKAR_SAT      6	/* Saturday */
+#define SH7750_RWKAR_SUN      0 /* Sunday */
+#define SH7750_RWKAR_MON      1 /* Monday */
+#define SH7750_RWKAR_TUE      2 /* Tuesday */
+#define SH7750_RWKAR_WED      3 /* Wednesday */
+#define SH7750_RWKAR_THU      4 /* Thursday */
+#define SH7750_RWKAR_FRI      5 /* Friday */
+#define SH7750_RWKAR_SAT      6 /* Saturday */
 
 /* Day Alarm Register (byte, BCD-coded) - RDAYAR */
-#define SH7750_RDAYAR_REGOFS  0xC80030	/* offset */
+#define SH7750_RDAYAR_REGOFS  0xC80030 /* offset */
 #define SH7750_RDAYAR         SH7750_P4_REG32(SH7750_RDAYAR_REGOFS)
 #define SH7750_RDAYAR_A7      SH7750_A7_REG32(SH7750_RDAYAR_REGOFS)
-#define SH7750_RDAYAR_ENB     0x80	/* Day Alarm Enable */
+#define SH7750_RDAYAR_ENB     0x80 /* Day Alarm Enable */
 
 /* Month Counter Register (byte, BCD-coded) - RMONAR */
-#define SH7750_RMONAR_REGOFS  0xC80034	/* offset */
+#define SH7750_RMONAR_REGOFS  0xC80034 /* offset */
 #define SH7750_RMONAR         SH7750_P4_REG32(SH7750_RMONAR_REGOFS)
 #define SH7750_RMONAR_A7      SH7750_A7_REG32(SH7750_RMONAR_REGOFS)
-#define SH7750_RMONAR_ENB     0x80	/* Month Alarm Enable */
+#define SH7750_RMONAR_ENB     0x80 /* Month Alarm Enable */
 
 /* RTC Control Register 1 (byte) - RCR1 */
-#define SH7750_RCR1_REGOFS    0xC80038	/* offset */
+#define SH7750_RCR1_REGOFS    0xC80038 /* offset */
 #define SH7750_RCR1           SH7750_P4_REG32(SH7750_RCR1_REGOFS)
 #define SH7750_RCR1_A7        SH7750_A7_REG32(SH7750_RCR1_REGOFS)
-#define SH7750_RCR1_CF        0x80	/* Carry Flag */
-#define SH7750_RCR1_CIE       0x10	/* Carry Interrupt Enable */
-#define SH7750_RCR1_AIE       0x08	/* Alarm Interrupt Enable */
-#define SH7750_RCR1_AF        0x01	/* Alarm Flag */
+#define SH7750_RCR1_CF        0x80 /* Carry Flag */
+#define SH7750_RCR1_CIE       0x10 /* Carry Interrupt Enable */
+#define SH7750_RCR1_AIE       0x08 /* Alarm Interrupt Enable */
+#define SH7750_RCR1_AF        0x01 /* Alarm Flag */
 
 /* RTC Control Register 2 (byte) - RCR2 */
-#define SH7750_RCR2_REGOFS    0xC8003C	/* offset */
+#define SH7750_RCR2_REGOFS    0xC8003C /* offset */
 #define SH7750_RCR2           SH7750_P4_REG32(SH7750_RCR2_REGOFS)
 #define SH7750_RCR2_A7        SH7750_A7_REG32(SH7750_RCR2_REGOFS)
-#define SH7750_RCR2_PEF        0x80	/* Periodic Interrupt Flag */
-#define SH7750_RCR2_PES        0x70	/* Periodic Interrupt Enable: */
-#define SH7750_RCR2_PES_DIS    0x00	/*   Periodic Interrupt Disabled */
-#define SH7750_RCR2_PES_DIV256 0x10	/*   Generated at 1/256 sec interval */
-#define SH7750_RCR2_PES_DIV64  0x20	/*   Generated at 1/64 sec interval */
-#define SH7750_RCR2_PES_DIV16  0x30	/*   Generated at 1/16 sec interval */
-#define SH7750_RCR2_PES_DIV4   0x40	/*   Generated at 1/4 sec interval */
-#define SH7750_RCR2_PES_DIV2   0x50	/*   Generated at 1/2 sec interval */
-#define SH7750_RCR2_PES_x1     0x60	/*   Generated at 1 sec interval */
-#define SH7750_RCR2_PES_x2     0x70	/*   Generated at 2 sec interval */
-#define SH7750_RCR2_RTCEN      0x08	/* RTC Crystal Oscillator is Operated */
-#define SH7750_RCR2_ADJ        0x04	/* 30-Second Adjastment */
-#define SH7750_RCR2_RESET      0x02	/* Frequency divider circuits are reset */
-#define SH7750_RCR2_START      0x01	/* 0 - sec, min, hr, day-of-week, month,
-					   year counters are stopped
-					   1 - sec, min, hr, day-of-week, month,
-					   year counters operate normally */
+#define SH7750_RCR2_PEF        0x80 /* Periodic Interrupt Flag */
+#define SH7750_RCR2_PES        0x70 /* Periodic Interrupt Enable: */
+#define SH7750_RCR2_PES_DIS    0x00 /*   Periodic Interrupt Disabled */
+#define SH7750_RCR2_PES_DIV256 0x10 /*   Generated at 1/256 sec interval */
+#define SH7750_RCR2_PES_DIV64  0x20 /*   Generated at 1/64 sec interval */
+#define SH7750_RCR2_PES_DIV16  0x30 /*   Generated at 1/16 sec interval */
+#define SH7750_RCR2_PES_DIV4   0x40 /*   Generated at 1/4 sec interval */
+#define SH7750_RCR2_PES_DIV2   0x50 /*   Generated at 1/2 sec interval */
+#define SH7750_RCR2_PES_x1     0x60 /*   Generated at 1 sec interval */
+#define SH7750_RCR2_PES_x2     0x70 /*   Generated at 2 sec interval */
+#define SH7750_RCR2_RTCEN      0x08 /* RTC Crystal Oscillator is Operated */
+#define SH7750_RCR2_ADJ        0x04 /* 30-Second Adjastment */
+#define SH7750_RCR2_RESET      0x02 /* Frequency divider circuits are reset */
+#define SH7750_RCR2_START      0x01 /* 0 - sec, min, hr, day-of-week, month, */
+                                    /*     year counters are stopped */
+                                    /* 1 - sec, min, hr, day-of-week, month, */
+                                    /*     year counters operate normally */
 /*
  * Bus State Controller - BSC
  */
 /* Bus Control Register 1 - BCR1 */
-#define SH7750_BCR1_REGOFS    0x800000	/* offset */
+#define SH7750_BCR1_REGOFS    0x800000 /* offset */
 #define SH7750_BCR1           SH7750_P4_REG32(SH7750_BCR1_REGOFS)
 #define SH7750_BCR1_A7        SH7750_A7_REG32(SH7750_BCR1_REGOFS)
-#define SH7750_BCR1_ENDIAN    0x80000000	/* Endianness (1 - little endian) */
-#define SH7750_BCR1_MASTER    0x40000000	/* Master/Slave mode (1-master) */
-#define SH7750_BCR1_A0MPX     0x20000000	/* Area 0 Memory Type (0-SRAM,1-MPX) */
-#define SH7750_BCR1_IPUP      0x02000000	/* Input Pin Pull-up Control:
-						   0 - pull-up resistor is on for
-						   control input pins
-						   1 - pull-up resistor is off */
-#define SH7750_BCR1_OPUP      0x01000000	/* Output Pin Pull-up Control:
-						   0 - pull-up resistor is on for
-						   control output pins
-						   1 - pull-up resistor is off */
-#define SH7750_BCR1_A1MBC     0x00200000	/* Area 1 SRAM Byte Control Mode:
-						   0 - Area 1 SRAM is set to
-						   normal mode
-						   1 - Area 1 SRAM is set to byte
-						   control mode */
-#define SH7750_BCR1_A4MBC     0x00100000	/* Area 4 SRAM Byte Control Mode:
-						   0 - Area 4 SRAM is set to
-						   normal mode
-						   1 - Area 4 SRAM is set to byte
-						   control mode */
-#define SH7750_BCR1_BREQEN    0x00080000	/* BREQ Enable:
-						   0 - External requests are  not
-						   accepted
-						   1 - External requests are
-						   accepted */
-#define SH7750_BCR1_PSHR      0x00040000	/* Partial Sharing Bit:
-						   0 - Master Mode
-						   1 - Partial-sharing Mode */
-#define SH7750_BCR1_MEMMPX    0x00020000	/* Area 1 to 6 MPX Interface:
-						   0 - SRAM/burst ROM interface
-						   1 - MPX interface */
-#define SH7750_BCR1_HIZMEM    0x00008000	/* High Impendance Control. Specifies
-						   the state of A[25:0], BS\, CSn\,
-						   RD/WR\, CE2A\, CE2B\ in standby
-						   mode and when bus is released:
-						   0 - signals go to High-Z mode
-						   1 - signals driven */
-#define SH7750_BCR1_HIZCNT    0x00004000	/* High Impendance Control. Specifies
-						   the state of the RAS\, RAS2\, WEn\,
-						   CASn\, DQMn, RD\, CASS\, FRAME\,
-						   RD2\ signals in standby mode and
-						   when bus is released:
-						   0 - signals go to High-Z mode
-						   1 - signals driven */
-#define SH7750_BCR1_A0BST     0x00003800	/* Area 0 Burst ROM Control */
-#define SH7750_BCR1_A0BST_SRAM    0x0000	/*   Area 0 accessed as SRAM i/f */
-#define SH7750_BCR1_A0BST_ROM4    0x0800	/*   Area 0 accessed as burst ROM
-						   interface, 4 cosequtive access */
-#define SH7750_BCR1_A0BST_ROM8    0x1000	/*   Area 0 accessed as burst ROM
-						   interface, 8 cosequtive access */
-#define SH7750_BCR1_A0BST_ROM16   0x1800	/*   Area 0 accessed as burst ROM
-						   interface, 16 cosequtive access */
-#define SH7750_BCR1_A0BST_ROM32   0x2000	/*   Area 0 accessed as burst ROM
-						   interface, 32 cosequtive access */
-
-#define SH7750_BCR1_A5BST     0x00000700	/* Area 5 Burst ROM Control */
-#define SH7750_BCR1_A5BST_SRAM    0x0000	/*   Area 5 accessed as SRAM i/f */
-#define SH7750_BCR1_A5BST_ROM4    0x0100	/*   Area 5 accessed as burst ROM
-						   interface, 4 cosequtive access */
-#define SH7750_BCR1_A5BST_ROM8    0x0200	/*   Area 5 accessed as burst ROM
-						   interface, 8 cosequtive access */
-#define SH7750_BCR1_A5BST_ROM16   0x0300	/*   Area 5 accessed as burst ROM
-						   interface, 16 cosequtive access */
-#define SH7750_BCR1_A5BST_ROM32   0x0400	/*   Area 5 accessed as burst ROM
-						   interface, 32 cosequtive access */
-
-#define SH7750_BCR1_A6BST     0x000000E0	/* Area 6 Burst ROM Control */
-#define SH7750_BCR1_A6BST_SRAM    0x0000	/*   Area 6 accessed as SRAM i/f */
-#define SH7750_BCR1_A6BST_ROM4    0x0020	/*   Area 6 accessed as burst ROM
-						   interface, 4 cosequtive access */
-#define SH7750_BCR1_A6BST_ROM8    0x0040	/*   Area 6 accessed as burst ROM
-						   interface, 8 cosequtive access */
-#define SH7750_BCR1_A6BST_ROM16   0x0060	/*   Area 6 accessed as burst ROM
-						   interface, 16 cosequtive access */
-#define SH7750_BCR1_A6BST_ROM32   0x0080	/*   Area 6 accessed as burst ROM
-						   interface, 32 cosequtive access */
-
-#define SH7750_BCR1_DRAMTP        0x001C	/* Area 2 and 3 Memory Type */
-#define SH7750_BCR1_DRAMTP_2SRAM_3SRAM   0x0000	/* Area 2 and 3 are SRAM or MPX
-						   interface. */
-#define SH7750_BCR1_DRAMTP_2SRAM_3SDRAM  0x0008	/* Area 2 - SRAM/MPX, Area 3 -
-						   synchronous DRAM */
-#define SH7750_BCR1_DRAMTP_2SDRAM_3SDRAM 0x000C	/* Area 2 and 3 are synchronous
-						   DRAM interface */
-#define SH7750_BCR1_DRAMTP_2SRAM_3DRAM   0x0010	/* Area 2 - SRAM/MPX, Area 3 -
-						   DRAM interface */
-#define SH7750_BCR1_DRAMTP_2DRAM_3DRAM   0x0014	/* Area 2 and 3 are DRAM
-						   interface */
-
-#define SH7750_BCR1_A56PCM    0x00000001	/* Area 5 and 6 Bus Type:
-						   0 - SRAM interface
-						   1 - PCMCIA interface */
+#define SH7750_BCR1_ENDIAN    0x80000000 /* Endianness (1 - little endian) */
+#define SH7750_BCR1_MASTER    0x40000000 /* Master/Slave mode (1-master) */
+#define SH7750_BCR1_A0MPX     0x20000000 /* Area 0 Memory Type (0-SRAM,1-MPX) */
+#define SH7750_BCR1_IPUP      0x02000000 /* Input Pin Pull-up Control: */
+                                         /*   0 - pull-up resistor is on for */
+                                         /*       control input pins */
+                                         /*   1 - pull-up resistor is off */
+#define SH7750_BCR1_OPUP      0x01000000 /* Output Pin Pull-up Control: */
+                                         /*   0 - pull-up resistor is on for */
+                                         /*       control output pins */
+                                         /*   1 - pull-up resistor is off */
+#define SH7750_BCR1_A1MBC     0x00200000 /* Area 1 SRAM Byte Control Mode: */
+                                         /*   0 - Area 1 SRAM is set to */
+                                         /*       normal mode */
+                                         /*   1 - Area 1 SRAM is set to byte */
+                                         /*       control mode */
+#define SH7750_BCR1_A4MBC     0x00100000 /* Area 4 SRAM Byte Control Mode: */
+                                         /*   0 - Area 4 SRAM is set to */
+                                         /*       normal mode */
+                                         /*   1 - Area 4 SRAM is set to byte */
+                                         /*       control mode */
+#define SH7750_BCR1_BREQEN    0x00080000 /* BREQ Enable: */
+                                         /*   0 - External requests are  not */
+                                         /*       accepted */
+                                         /*   1 - External requests are */
+                                         /*       accepted */
+#define SH7750_BCR1_PSHR      0x00040000 /* Partial Sharing Bit: */
+                                         /*   0 - Master Mode */
+                                         /*   1 - Partial-sharing Mode */
+#define SH7750_BCR1_MEMMPX    0x00020000 /* Area 1 to 6 MPX Interface: */
+                                         /*   0 - SRAM/burst ROM interface */
+                                         /*   1 - MPX interface */
+#define SH7750_BCR1_HIZMEM    0x00008000 /* High Impendance Control. */
+                                         /*   Specifies the state of A[25:0], */
+                                         /*   BS\, CSn\, RD/WR\, CE2A\, CE2B\ */
+                                         /*   in standby mode and when bus is */
+                                         /*   released: */
+                                         /*   0 - signals go to High-Z mode */
+                                         /*   1 - signals driven */
+#define SH7750_BCR1_HIZCNT    0x00004000 /* High Impendance Control. */
+                                         /*   Specifies the state of the */
+                                         /*   RAS\, RAS2\, WEn\, CASn\, DQMn, */
+                                         /*   RD\, CASS\, FRAME\, RD2\ */
+                                         /*   signals in standby mode and */
+                                         /* when bus is released: */
+                                         /*   0 - signals go to High-Z mode */
+                                         /*   1 - signals driven */
+#define SH7750_BCR1_A0BST     0x00003800 /* Area 0 Burst ROM Control */
+#define SH7750_BCR1_A0BST_SRAM    0x0000 /*   Area 0 accessed as SRAM i/f */
+#define SH7750_BCR1_A0BST_ROM4    0x0800 /*   Area 0 accessed as burst ROM */
+                                         /*   interface, 4 cosequtive access */
+#define SH7750_BCR1_A0BST_ROM8    0x1000 /*   Area 0 accessed as burst ROM */
+                                         /*   interface, 8 cosequtive access */
+#define SH7750_BCR1_A0BST_ROM16   0x1800 /*   Area 0 accessed as burst ROM */
+                                         /*   interface, 16 cosequtive access */
+#define SH7750_BCR1_A0BST_ROM32   0x2000 /*   Area 0 accessed as burst ROM */
+                                         /*   interface, 32 cosequtive access */
+
+#define SH7750_BCR1_A5BST     0x00000700 /* Area 5 Burst ROM Control */
+#define SH7750_BCR1_A5BST_SRAM    0x0000 /*   Area 5 accessed as SRAM i/f */
+#define SH7750_BCR1_A5BST_ROM4    0x0100 /*   Area 5 accessed as burst ROM */
+                                         /*   interface, 4 cosequtive access */
+#define SH7750_BCR1_A5BST_ROM8    0x0200 /*   Area 5 accessed as burst ROM */
+                                         /*   interface, 8 cosequtive access */
+#define SH7750_BCR1_A5BST_ROM16   0x0300 /*   Area 5 accessed as burst ROM */
+                                         /*   interface, 16 cosequtive access */
+#define SH7750_BCR1_A5BST_ROM32   0x0400 /*   Area 5 accessed as burst ROM */
+                                         /*   interface, 32 cosequtive access */
+
+#define SH7750_BCR1_A6BST     0x000000E0 /* Area 6 Burst ROM Control */
+#define SH7750_BCR1_A6BST_SRAM    0x0000 /*   Area 6 accessed as SRAM i/f */
+#define SH7750_BCR1_A6BST_ROM4    0x0020 /*   Area 6 accessed as burst ROM */
+                                         /*   interface, 4 cosequtive access */
+#define SH7750_BCR1_A6BST_ROM8    0x0040 /*   Area 6 accessed as burst ROM */
+                                         /*   interface, 8 cosequtive access */
+#define SH7750_BCR1_A6BST_ROM16   0x0060 /*   Area 6 accessed as burst ROM */
+                                         /*   interface, 16 cosequtive access */
+#define SH7750_BCR1_A6BST_ROM32   0x0080 /*   Area 6 accessed as burst ROM */
+                                         /*   interface, 32 cosequtive access */
+
+#define SH7750_BCR1_DRAMTP        0x001C /* Area 2 and 3 Memory Type */
+#define SH7750_BCR1_DRAMTP_2SRAM_3SRAM   0x0000 /* Area 2 and 3 are SRAM or */
+                                                /* MPX interface. */
+#define SH7750_BCR1_DRAMTP_2SRAM_3SDRAM  0x0008 /* Area 2 - SRAM/MPX, Area 3 */
+                                                /* synchronous DRAM */
+#define SH7750_BCR1_DRAMTP_2SDRAM_3SDRAM 0x000C /* Area 2 and 3 are */
+                                                /* synchronous DRAM interface */
+#define SH7750_BCR1_DRAMTP_2SRAM_3DRAM   0x0010 /* Area 2 - SRAM/MPX, Area 3 */
+                                                /* DRAM interface */
+#define SH7750_BCR1_DRAMTP_2DRAM_3DRAM   0x0014 /* Area 2 and 3 are DRAM */
+                                                /* interface */
+
+#define SH7750_BCR1_A56PCM    0x00000001 /* Area 5 and 6 Bus Type: */
+                                         /*   0 - SRAM interface */
+                                         /*   1 - PCMCIA interface */
 
 /* Bus Control Register 2 (half) - BCR2 */
-#define SH7750_BCR2_REGOFS    0x800004	/* offset */
+#define SH7750_BCR2_REGOFS    0x800004 /* offset */
 #define SH7750_BCR2           SH7750_P4_REG32(SH7750_BCR2_REGOFS)
 #define SH7750_BCR2_A7        SH7750_A7_REG32(SH7750_BCR2_REGOFS)
 
-#define SH7750_BCR2_A0SZ      0xC000	/* Area 0 Bus Width */
+#define SH7750_BCR2_A0SZ      0xC000 /* Area 0 Bus Width */
 #define SH7750_BCR2_A0SZ_S    14
-#define SH7750_BCR2_A6SZ      0x3000	/* Area 6 Bus Width */
+#define SH7750_BCR2_A6SZ      0x3000 /* Area 6 Bus Width */
 #define SH7750_BCR2_A6SZ_S    12
-#define SH7750_BCR2_A5SZ      0x0C00	/* Area 5 Bus Width */
+#define SH7750_BCR2_A5SZ      0x0C00 /* Area 5 Bus Width */
 #define SH7750_BCR2_A5SZ_S    10
-#define SH7750_BCR2_A4SZ      0x0300	/* Area 4 Bus Width */
+#define SH7750_BCR2_A4SZ      0x0300 /* Area 4 Bus Width */
 #define SH7750_BCR2_A4SZ_S    8
-#define SH7750_BCR2_A3SZ      0x00C0	/* Area 3 Bus Width */
+#define SH7750_BCR2_A3SZ      0x00C0 /* Area 3 Bus Width */
 #define SH7750_BCR2_A3SZ_S    6
-#define SH7750_BCR2_A2SZ      0x0030	/* Area 2 Bus Width */
+#define SH7750_BCR2_A2SZ      0x0030 /* Area 2 Bus Width */
 #define SH7750_BCR2_A2SZ_S    4
-#define SH7750_BCR2_A1SZ      0x000C	/* Area 1 Bus Width */
+#define SH7750_BCR2_A1SZ      0x000C /* Area 1 Bus Width */
 #define SH7750_BCR2_A1SZ_S    2
-#define SH7750_BCR2_SZ_64     0	/* 64 bits */
-#define SH7750_BCR2_SZ_8      1	/* 8 bits */
-#define SH7750_BCR2_SZ_16     2	/* 16 bits */
-#define SH7750_BCR2_SZ_32     3	/* 32 bits */
-#define SH7750_BCR2_PORTEN    0x0001	/* Port Function Enable :
-					   0 - D51-D32 are not used as a port
-					   1 - D51-D32 are used as a port */
+#define SH7750_BCR2_SZ_64     0 /* 64 bits */
+#define SH7750_BCR2_SZ_8      1 /* 8 bits */
+#define SH7750_BCR2_SZ_16     2 /* 16 bits */
+#define SH7750_BCR2_SZ_32     3 /* 32 bits */
+#define SH7750_BCR2_PORTEN    0x0001 /* Port Function Enable */
+                                     /* 0 - D51-D32 are not used as a port */
+                                     /* 1 - D51-D32 are used as a port */
 
 /* Wait Control Register 1 - WCR1 */
-#define SH7750_WCR1_REGOFS    0x800008	/* offset */
+#define SH7750_WCR1_REGOFS    0x800008 /* offset */
 #define SH7750_WCR1           SH7750_P4_REG32(SH7750_WCR1_REGOFS)
 #define SH7750_WCR1_A7        SH7750_A7_REG32(SH7750_WCR1_REGOFS)
-#define SH7750_WCR1_DMAIW     0x70000000	/* DACK Device Inter-Cycle Idle
-						   specification */
+#define SH7750_WCR1_DMAIW     0x70000000 /* DACK Device Inter-Cycle Idle */
+                                         /*   specification */
 #define SH7750_WCR1_DMAIW_S   28
-#define SH7750_WCR1_A6IW      0x07000000	/* Area 6 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A6IW      0x07000000 /* Area 6 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A6IW_S    24
-#define SH7750_WCR1_A5IW      0x00700000	/* Area 5 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A5IW      0x00700000 /* Area 5 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A5IW_S    20
-#define SH7750_WCR1_A4IW      0x00070000	/* Area 4 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A4IW      0x00070000 /* Area 4 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A4IW_S    16
-#define SH7750_WCR1_A3IW      0x00007000	/* Area 3 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A3IW      0x00007000 /* Area 3 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A3IW_S    12
-#define SH7750_WCR1_A2IW      0x00000700	/* Area 2 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A2IW      0x00000700 /* Area 2 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A2IW_S    8
-#define SH7750_WCR1_A1IW      0x00000070	/* Area 1 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A1IW      0x00000070 /* Area 1 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A1IW_S    4
-#define SH7750_WCR1_A0IW      0x00000007	/* Area 0 Inter-Cycle Idle spec. */
+#define SH7750_WCR1_A0IW      0x00000007 /* Area 0 Inter-Cycle Idle spec. */
 #define SH7750_WCR1_A0IW_S    0
 
 /* Wait Control Register 2 - WCR2 */
-#define SH7750_WCR2_REGOFS    0x80000C	/* offset */
+#define SH7750_WCR2_REGOFS    0x80000C /* offset */
 #define SH7750_WCR2           SH7750_P4_REG32(SH7750_WCR2_REGOFS)
 #define SH7750_WCR2_A7        SH7750_A7_REG32(SH7750_WCR2_REGOFS)
 
-#define SH7750_WCR2_A6W       0xE0000000	/* Area 6 Wait Control */
+#define SH7750_WCR2_A6W       0xE0000000 /* Area 6 Wait Control */
 #define SH7750_WCR2_A6W_S     29
-#define SH7750_WCR2_A6B       0x1C000000	/* Area 6 Burst Pitch */
+#define SH7750_WCR2_A6B       0x1C000000 /* Area 6 Burst Pitch */
 #define SH7750_WCR2_A6B_S     26
-#define SH7750_WCR2_A5W       0x03800000	/* Area 5 Wait Control */
+#define SH7750_WCR2_A5W       0x03800000 /* Area 5 Wait Control */
 #define SH7750_WCR2_A5W_S     23
-#define SH7750_WCR2_A5B       0x00700000	/* Area 5 Burst Pitch */
+#define SH7750_WCR2_A5B       0x00700000 /* Area 5 Burst Pitch */
 #define SH7750_WCR2_A5B_S     20
-#define SH7750_WCR2_A4W       0x000E0000	/* Area 4 Wait Control */
+#define SH7750_WCR2_A4W       0x000E0000 /* Area 4 Wait Control */
 #define SH7750_WCR2_A4W_S     17
-#define SH7750_WCR2_A3W       0x0000E000	/* Area 3 Wait Control */
+#define SH7750_WCR2_A3W       0x0000E000 /* Area 3 Wait Control */
 #define SH7750_WCR2_A3W_S     13
-#define SH7750_WCR2_A2W       0x00000E00	/* Area 2 Wait Control */
+#define SH7750_WCR2_A2W       0x00000E00 /* Area 2 Wait Control */
 #define SH7750_WCR2_A2W_S     9
-#define SH7750_WCR2_A1W       0x000001C0	/* Area 1 Wait Control */
+#define SH7750_WCR2_A1W       0x000001C0 /* Area 1 Wait Control */
 #define SH7750_WCR2_A1W_S     6
-#define SH7750_WCR2_A0W       0x00000038	/* Area 0 Wait Control */
+#define SH7750_WCR2_A0W       0x00000038 /* Area 0 Wait Control */
 #define SH7750_WCR2_A0W_S     3
-#define SH7750_WCR2_A0B       0x00000007	/* Area 0 Burst Pitch */
+#define SH7750_WCR2_A0B       0x00000007 /* Area 0 Burst Pitch */
 #define SH7750_WCR2_A0B_S     0
 
-#define SH7750_WCR2_WS0       0	/* 0 wait states inserted */
-#define SH7750_WCR2_WS1       1	/* 1 wait states inserted */
-#define SH7750_WCR2_WS2       2	/* 2 wait states inserted */
-#define SH7750_WCR2_WS3       3	/* 3 wait states inserted */
-#define SH7750_WCR2_WS6       4	/* 6 wait states inserted */
-#define SH7750_WCR2_WS9       5	/* 9 wait states inserted */
-#define SH7750_WCR2_WS12      6	/* 12 wait states inserted */
-#define SH7750_WCR2_WS15      7	/* 15 wait states inserted */
-
-#define SH7750_WCR2_BPWS0     0	/* 0 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS1     1	/* 1 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS2     2	/* 2 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS3     3	/* 3 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS4     4	/* 4 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS5     5	/* 5 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS6     6	/* 6 wait states inserted from 2nd access */
-#define SH7750_WCR2_BPWS7     7	/* 7 wait states inserted from 2nd access */
+#define SH7750_WCR2_WS0       0 /* 0 wait states inserted */
+#define SH7750_WCR2_WS1       1 /* 1 wait states inserted */
+#define SH7750_WCR2_WS2       2 /* 2 wait states inserted */
+#define SH7750_WCR2_WS3       3 /* 3 wait states inserted */
+#define SH7750_WCR2_WS6       4 /* 6 wait states inserted */
+#define SH7750_WCR2_WS9       5 /* 9 wait states inserted */
+#define SH7750_WCR2_WS12      6 /* 12 wait states inserted */
+#define SH7750_WCR2_WS15      7 /* 15 wait states inserted */
+
+#define SH7750_WCR2_BPWS0     0 /* 0 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS1     1 /* 1 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS2     2 /* 2 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS3     3 /* 3 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS4     4 /* 4 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS5     5 /* 5 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS6     6 /* 6 wait states inserted from 2nd access */
+#define SH7750_WCR2_BPWS7     7 /* 7 wait states inserted from 2nd access */
 
 /* DRAM CAS\ Assertion Delay (area 3,2) */
-#define SH7750_WCR2_DRAM_CAS_ASW1   0	/* 1 cycle */
-#define SH7750_WCR2_DRAM_CAS_ASW2   1	/* 2 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW3   2	/* 3 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW4   3	/* 4 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW7   4	/* 7 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW10  5	/* 10 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW13  6	/* 13 cycles */
-#define SH7750_WCR2_DRAM_CAS_ASW16  7	/* 16 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW1   0 /* 1 cycle */
+#define SH7750_WCR2_DRAM_CAS_ASW2   1 /* 2 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW3   2 /* 3 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW4   3 /* 4 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW7   4 /* 7 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW10  5 /* 10 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW13  6 /* 13 cycles */
+#define SH7750_WCR2_DRAM_CAS_ASW16  7 /* 16 cycles */
 
 /* SDRAM CAS\ Latency Cycles */
-#define SH7750_WCR2_SDRAM_CAS_LAT1  1	/* 1 cycle */
-#define SH7750_WCR2_SDRAM_CAS_LAT2  2	/* 2 cycles */
-#define SH7750_WCR2_SDRAM_CAS_LAT3  3	/* 3 cycles */
-#define SH7750_WCR2_SDRAM_CAS_LAT4  4	/* 4 cycles */
-#define SH7750_WCR2_SDRAM_CAS_LAT5  5	/* 5 cycles */
+#define SH7750_WCR2_SDRAM_CAS_LAT1  1 /* 1 cycle */
+#define SH7750_WCR2_SDRAM_CAS_LAT2  2 /* 2 cycles */
+#define SH7750_WCR2_SDRAM_CAS_LAT3  3 /* 3 cycles */
+#define SH7750_WCR2_SDRAM_CAS_LAT4  4 /* 4 cycles */
+#define SH7750_WCR2_SDRAM_CAS_LAT5  5 /* 5 cycles */
 
 /* Wait Control Register 3 - WCR3 */
-#define SH7750_WCR3_REGOFS    0x800010	/* offset */
+#define SH7750_WCR3_REGOFS    0x800010 /* offset */
 #define SH7750_WCR3           SH7750_P4_REG32(SH7750_WCR3_REGOFS)
 #define SH7750_WCR3_A7        SH7750_A7_REG32(SH7750_WCR3_REGOFS)
 
-#define SH7750_WCR3_A6S       0x04000000	/* Area 6 Write Strobe Setup time */
-#define SH7750_WCR3_A6H       0x03000000	/* Area 6 Data Hold Time */
+#define SH7750_WCR3_A6S       0x04000000 /* Area 6 Write Strobe Setup time */
+#define SH7750_WCR3_A6H       0x03000000 /* Area 6 Data Hold Time */
 #define SH7750_WCR3_A6H_S     24
-#define SH7750_WCR3_A5S       0x00400000	/* Area 5 Write Strobe Setup time */
-#define SH7750_WCR3_A5H       0x00300000	/* Area 5 Data Hold Time */
+#define SH7750_WCR3_A5S       0x00400000 /* Area 5 Write Strobe Setup time */
+#define SH7750_WCR3_A5H       0x00300000 /* Area 5 Data Hold Time */
 #define SH7750_WCR3_A5H_S     20
-#define SH7750_WCR3_A4S       0x00040000	/* Area 4 Write Strobe Setup time */
-#define SH7750_WCR3_A4H       0x00030000	/* Area 4 Data Hold Time */
+#define SH7750_WCR3_A4S       0x00040000 /* Area 4 Write Strobe Setup time */
+#define SH7750_WCR3_A4H       0x00030000 /* Area 4 Data Hold Time */
 #define SH7750_WCR3_A4H_S     16
-#define SH7750_WCR3_A3S       0x00004000	/* Area 3 Write Strobe Setup time */
-#define SH7750_WCR3_A3H       0x00003000	/* Area 3 Data Hold Time */
+#define SH7750_WCR3_A3S       0x00004000 /* Area 3 Write Strobe Setup time */
+#define SH7750_WCR3_A3H       0x00003000 /* Area 3 Data Hold Time */
 #define SH7750_WCR3_A3H_S     12
-#define SH7750_WCR3_A2S       0x00000400	/* Area 2 Write Strobe Setup time */
-#define SH7750_WCR3_A2H       0x00000300	/* Area 2 Data Hold Time */
+#define SH7750_WCR3_A2S       0x00000400 /* Area 2 Write Strobe Setup time */
+#define SH7750_WCR3_A2H       0x00000300 /* Area 2 Data Hold Time */
 #define SH7750_WCR3_A2H_S     8
-#define SH7750_WCR3_A1S       0x00000040	/* Area 1 Write Strobe Setup time */
-#define SH7750_WCR3_A1H       0x00000030	/* Area 1 Data Hold Time */
+#define SH7750_WCR3_A1S       0x00000040 /* Area 1 Write Strobe Setup time */
+#define SH7750_WCR3_A1H       0x00000030 /* Area 1 Data Hold Time */
 #define SH7750_WCR3_A1H_S     4
-#define SH7750_WCR3_A0S       0x00000004	/* Area 0 Write Strobe Setup time */
-#define SH7750_WCR3_A0H       0x00000003	/* Area 0 Data Hold Time */
+#define SH7750_WCR3_A0S       0x00000004 /* Area 0 Write Strobe Setup time */
+#define SH7750_WCR3_A0H       0x00000003 /* Area 0 Data Hold Time */
 #define SH7750_WCR3_A0H_S     0
 
-#define SH7750_WCR3_DHWS_0    0	/* 0 wait states data hold time */
-#define SH7750_WCR3_DHWS_1    1	/* 1 wait states data hold time */
-#define SH7750_WCR3_DHWS_2    2	/* 2 wait states data hold time */
-#define SH7750_WCR3_DHWS_3    3	/* 3 wait states data hold time */
+#define SH7750_WCR3_DHWS_0    0 /* 0 wait states data hold time */
+#define SH7750_WCR3_DHWS_1    1 /* 1 wait states data hold time */
+#define SH7750_WCR3_DHWS_2    2 /* 2 wait states data hold time */
+#define SH7750_WCR3_DHWS_3    3 /* 3 wait states data hold time */
 
-#define SH7750_MCR_REGOFS     0x800014	/* offset */
+#define SH7750_MCR_REGOFS     0x800014 /* offset */
 #define SH7750_MCR            SH7750_P4_REG32(SH7750_MCR_REGOFS)
 #define SH7750_MCR_A7         SH7750_A7_REG32(SH7750_MCR_REGOFS)
 
-#define SH7750_MCR_RASD       0x80000000	/* RAS Down mode */
-#define SH7750_MCR_MRSET      0x40000000	/* SDRAM Mode Register Set */
-#define SH7750_MCR_PALL       0x00000000	/* SDRAM Precharge All cmd. Mode */
-#define SH7750_MCR_TRC        0x38000000	/* RAS Precharge Time at End of
-						   Refresh: */
-#define SH7750_MCR_TRC_0      0x00000000	/*    0 */
-#define SH7750_MCR_TRC_3      0x08000000	/*    3 */
-#define SH7750_MCR_TRC_6      0x10000000	/*    6 */
-#define SH7750_MCR_TRC_9      0x18000000	/*    9 */
-#define SH7750_MCR_TRC_12     0x20000000	/*    12 */
-#define SH7750_MCR_TRC_15     0x28000000	/*    15 */
-#define SH7750_MCR_TRC_18     0x30000000	/*    18 */
-#define SH7750_MCR_TRC_21     0x38000000	/*    21 */
-
-#define SH7750_MCR_TCAS       0x00800000	/* CAS Negation Period */
-#define SH7750_MCR_TCAS_1     0x00000000	/*    1 */
-#define SH7750_MCR_TCAS_2     0x00800000	/*    2 */
-
-#define SH7750_MCR_TPC        0x00380000	/* DRAM: RAS Precharge Period
-						   SDRAM: minimum number of cycles
-						   until the next bank active cmd
-						   is output after precharging */
+#define SH7750_MCR_RASD       0x80000000 /* RAS Down mode */
+#define SH7750_MCR_MRSET      0x40000000 /* SDRAM Mode Register Set */
+#define SH7750_MCR_PALL       0x00000000 /* SDRAM Precharge All cmd. Mode */
+#define SH7750_MCR_TRC        0x38000000 /* RAS Precharge Time at End of */
+                                         /* Refresh: */
+#define SH7750_MCR_TRC_0      0x00000000 /*    0 */
+#define SH7750_MCR_TRC_3      0x08000000 /*    3 */
+#define SH7750_MCR_TRC_6      0x10000000 /*    6 */
+#define SH7750_MCR_TRC_9      0x18000000 /*    9 */
+#define SH7750_MCR_TRC_12     0x20000000 /*    12 */
+#define SH7750_MCR_TRC_15     0x28000000 /*    15 */
+#define SH7750_MCR_TRC_18     0x30000000 /*    18 */
+#define SH7750_MCR_TRC_21     0x38000000 /*    21 */
+
+#define SH7750_MCR_TCAS       0x00800000 /* CAS Negation Period */
+#define SH7750_MCR_TCAS_1     0x00000000 /*    1 */
+#define SH7750_MCR_TCAS_2     0x00800000 /*    2 */
+
+#define SH7750_MCR_TPC        0x00380000 /* DRAM: RAS Precharge Period */
+                                         /* SDRAM: minimum number of cycles */
+                                         /* until the next bank active cmd */
+                                         /* is output after precharging */
 #define SH7750_MCR_TPC_S      19
-#define SH7750_MCR_TPC_SDRAM_1 0x00000000	/* 1 cycle */
-#define SH7750_MCR_TPC_SDRAM_2 0x00080000	/* 2 cycles */
-#define SH7750_MCR_TPC_SDRAM_3 0x00100000	/* 3 cycles */
-#define SH7750_MCR_TPC_SDRAM_4 0x00180000	/* 4 cycles */
-#define SH7750_MCR_TPC_SDRAM_5 0x00200000	/* 5 cycles */
-#define SH7750_MCR_TPC_SDRAM_6 0x00280000	/* 6 cycles */
-#define SH7750_MCR_TPC_SDRAM_7 0x00300000	/* 7 cycles */
-#define SH7750_MCR_TPC_SDRAM_8 0x00380000	/* 8 cycles */
-
-#define SH7750_MCR_RCD        0x00030000	/* DRAM: RAS-CAS Assertion Delay time
-						   SDRAM: bank active-read/write cmd
-						   delay time */
-#define SH7750_MCR_RCD_DRAM_2  0x00000000	/* DRAM delay 2 clocks */
-#define SH7750_MCR_RCD_DRAM_3  0x00010000	/* DRAM delay 3 clocks */
-#define SH7750_MCR_RCD_DRAM_4  0x00020000	/* DRAM delay 4 clocks */
-#define SH7750_MCR_RCD_DRAM_5  0x00030000	/* DRAM delay 5 clocks */
-#define SH7750_MCR_RCD_SDRAM_2 0x00010000	/* DRAM delay 2 clocks */
-#define SH7750_MCR_RCD_SDRAM_3 0x00020000	/* DRAM delay 3 clocks */
-#define SH7750_MCR_RCD_SDRAM_4 0x00030000	/* DRAM delay 4 clocks */
-
-#define SH7750_MCR_TRWL       0x0000E000	/* SDRAM Write Precharge Delay */
-#define SH7750_MCR_TRWL_1     0x00000000	/*    1 */
-#define SH7750_MCR_TRWL_2     0x00002000	/*    2 */
-#define SH7750_MCR_TRWL_3     0x00004000	/*    3 */
-#define SH7750_MCR_TRWL_4     0x00006000	/*    4 */
-#define SH7750_MCR_TRWL_5     0x00008000	/*    5 */
-
-#define SH7750_MCR_TRAS       0x00001C00	/* DRAM: CAS-Before-RAS Refresh RAS
-						   asserting period
-						   SDRAM: Command interval after
-						   synchronous DRAM refresh */
-#define SH7750_MCR_TRAS_DRAM_2         0x00000000	/* 2 */
-#define SH7750_MCR_TRAS_DRAM_3         0x00000400	/* 3 */
-#define SH7750_MCR_TRAS_DRAM_4         0x00000800	/* 4 */
-#define SH7750_MCR_TRAS_DRAM_5         0x00000C00	/* 5 */
-#define SH7750_MCR_TRAS_DRAM_6         0x00001000	/* 6 */
-#define SH7750_MCR_TRAS_DRAM_7         0x00001400	/* 7 */
-#define SH7750_MCR_TRAS_DRAM_8         0x00001800	/* 8 */
-#define SH7750_MCR_TRAS_DRAM_9         0x00001C00	/* 9 */
-
-#define SH7750_MCR_TRAS_SDRAM_TRC_4    0x00000000	/* 4 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_5    0x00000400	/* 5 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_6    0x00000800	/* 6 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_7    0x00000C00	/* 7 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_8    0x00001000	/* 8 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_9    0x00001400	/* 9 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_10   0x00001800	/* 10 + TRC */
-#define SH7750_MCR_TRAS_SDRAM_TRC_11   0x00001C00	/* 11 + TRC */
-
-#define SH7750_MCR_BE         0x00000200	/* Burst Enable */
-#define SH7750_MCR_SZ         0x00000180	/* Memory Data Size */
-#define SH7750_MCR_SZ_64      0x00000000	/*    64 bits */
-#define SH7750_MCR_SZ_16      0x00000100	/*    16 bits */
-#define SH7750_MCR_SZ_32      0x00000180	/*    32 bits */
-
-#define SH7750_MCR_AMX        0x00000078	/* Address Multiplexing */
+#define SH7750_MCR_TPC_SDRAM_1 0x00000000 /* 1 cycle */
+#define SH7750_MCR_TPC_SDRAM_2 0x00080000 /* 2 cycles */
+#define SH7750_MCR_TPC_SDRAM_3 0x00100000 /* 3 cycles */
+#define SH7750_MCR_TPC_SDRAM_4 0x00180000 /* 4 cycles */
+#define SH7750_MCR_TPC_SDRAM_5 0x00200000 /* 5 cycles */
+#define SH7750_MCR_TPC_SDRAM_6 0x00280000 /* 6 cycles */
+#define SH7750_MCR_TPC_SDRAM_7 0x00300000 /* 7 cycles */
+#define SH7750_MCR_TPC_SDRAM_8 0x00380000 /* 8 cycles */
+
+#define SH7750_MCR_RCD         0x00030000 /* DRAM: RAS-CAS Assertion Delay */
+                                          /*   time */
+                                          /* SDRAM: bank active-read/write */
+                                          /*   command delay time */
+#define SH7750_MCR_RCD_DRAM_2  0x00000000 /* DRAM delay 2 clocks */
+#define SH7750_MCR_RCD_DRAM_3  0x00010000 /* DRAM delay 3 clocks */
+#define SH7750_MCR_RCD_DRAM_4  0x00020000 /* DRAM delay 4 clocks */
+#define SH7750_MCR_RCD_DRAM_5  0x00030000 /* DRAM delay 5 clocks */
+#define SH7750_MCR_RCD_SDRAM_2 0x00010000 /* DRAM delay 2 clocks */
+#define SH7750_MCR_RCD_SDRAM_3 0x00020000 /* DRAM delay 3 clocks */
+#define SH7750_MCR_RCD_SDRAM_4 0x00030000 /* DRAM delay 4 clocks */
+
+#define SH7750_MCR_TRWL       0x0000E000 /* SDRAM Write Precharge Delay */
+#define SH7750_MCR_TRWL_1     0x00000000 /* 1 */
+#define SH7750_MCR_TRWL_2     0x00002000 /* 2 */
+#define SH7750_MCR_TRWL_3     0x00004000 /* 3 */
+#define SH7750_MCR_TRWL_4     0x00006000 /* 4 */
+#define SH7750_MCR_TRWL_5     0x00008000 /* 5 */
+
+#define SH7750_MCR_TRAS       0x00001C00 /* DRAM: CAS-Before-RAS Refresh RAS */
+                                         /* asserting period */
+                                         /* SDRAM: Command interval after */
+                                         /* synchronous DRAM refresh */
+#define SH7750_MCR_TRAS_DRAM_2         0x00000000 /* 2 */
+#define SH7750_MCR_TRAS_DRAM_3         0x00000400 /* 3 */
+#define SH7750_MCR_TRAS_DRAM_4         0x00000800 /* 4 */
+#define SH7750_MCR_TRAS_DRAM_5         0x00000C00 /* 5 */
+#define SH7750_MCR_TRAS_DRAM_6         0x00001000 /* 6 */
+#define SH7750_MCR_TRAS_DRAM_7         0x00001400 /* 7 */
+#define SH7750_MCR_TRAS_DRAM_8         0x00001800 /* 8 */
+#define SH7750_MCR_TRAS_DRAM_9         0x00001C00 /* 9 */
+
+#define SH7750_MCR_TRAS_SDRAM_TRC_4    0x00000000 /* 4 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_5    0x00000400 /* 5 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_6    0x00000800 /* 6 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_7    0x00000C00 /* 7 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_8    0x00001000 /* 8 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_9    0x00001400 /* 9 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_10   0x00001800 /* 10 + TRC */
+#define SH7750_MCR_TRAS_SDRAM_TRC_11   0x00001C00 /* 11 + TRC */
+
+#define SH7750_MCR_BE         0x00000200 /* Burst Enable */
+#define SH7750_MCR_SZ         0x00000180 /* Memory Data Size */
+#define SH7750_MCR_SZ_64      0x00000000 /* 64 bits */
+#define SH7750_MCR_SZ_16      0x00000100 /* 16 bits */
+#define SH7750_MCR_SZ_32      0x00000180 /* 32 bits */
+
+#define SH7750_MCR_AMX        0x00000078 /* Address Multiplexing */
 #define SH7750_MCR_AMX_S      3
-#define SH7750_MCR_AMX_DRAM_8BIT_COL    0x00000000	/* 8-bit column addr */
-#define SH7750_MCR_AMX_DRAM_9BIT_COL    0x00000008	/* 9-bit column addr */
-#define SH7750_MCR_AMX_DRAM_10BIT_COL   0x00000010	/* 10-bit column addr */
-#define SH7750_MCR_AMX_DRAM_11BIT_COL   0x00000018	/* 11-bit column addr */
-#define SH7750_MCR_AMX_DRAM_12BIT_COL   0x00000020	/* 12-bit column addr */
+#define SH7750_MCR_AMX_DRAM_8BIT_COL    0x00000000 /* 8-bit column addr */
+#define SH7750_MCR_AMX_DRAM_9BIT_COL    0x00000008 /* 9-bit column addr */
+#define SH7750_MCR_AMX_DRAM_10BIT_COL   0x00000010 /* 10-bit column addr */
+#define SH7750_MCR_AMX_DRAM_11BIT_COL   0x00000018 /* 11-bit column addr */
+#define SH7750_MCR_AMX_DRAM_12BIT_COL   0x00000020 /* 12-bit column addr */
 /* See SH7750 Hardware Manual for SDRAM address multiplexor selection */
 
-#define SH7750_MCR_RFSH       0x00000004	/* Refresh Control */
-#define SH7750_MCR_RMODE      0x00000002	/* Refresh Mode: */
-#define SH7750_MCR_RMODE_NORMAL 0x00000000	/* Normal Refresh Mode */
-#define SH7750_MCR_RMODE_SELF   0x00000002	/* Self-Refresh Mode */
-#define SH7750_MCR_RMODE_EDO    0x00000001	/* EDO Mode */
+#define SH7750_MCR_RFSH       0x00000004 /* Refresh Control */
+#define SH7750_MCR_RMODE      0x00000002 /* Refresh Mode: */
+#define SH7750_MCR_RMODE_NORMAL 0x00000000 /* Normal Refresh Mode */
+#define SH7750_MCR_RMODE_SELF   0x00000002 /* Self-Refresh Mode */
+#define SH7750_MCR_RMODE_EDO    0x00000001 /* EDO Mode */
 
 /* SDRAM Mode Set address */
 #define SH7750_SDRAM_MODE_A2_BASE  0xFF900000
@@ -894,119 +894,119 @@
 
 
 /* PCMCIA Control Register (half) - PCR */
-#define SH7750_PCR_REGOFS     0x800018	/* offset */
+#define SH7750_PCR_REGOFS     0x800018 /* offset */
 #define SH7750_PCR            SH7750_P4_REG32(SH7750_PCR_REGOFS)
 #define SH7750_PCR_A7         SH7750_A7_REG32(SH7750_PCR_REGOFS)
 
-#define SH7750_PCR_A5PCW      0xC000	/* Area 5 PCMCIA Wait - Number of wait
-					   states to be added to the number of
-					   waits specified by WCR2 in a low-speed
-					   PCMCIA wait cycle */
-#define SH7750_PCR_A5PCW_0    0x0000	/*    0 waits inserted */
-#define SH7750_PCR_A5PCW_15   0x4000	/*    15 waits inserted */
-#define SH7750_PCR_A5PCW_30   0x8000	/*    30 waits inserted */
-#define SH7750_PCR_A5PCW_50   0xC000	/*    50 waits inserted */
-
-#define SH7750_PCR_A6PCW      0x3000	/* Area 6 PCMCIA Wait - Number of wait
-					   states to be added to the number of
-					   waits specified by WCR2 in a low-speed
-					   PCMCIA wait cycle */
-#define SH7750_PCR_A6PCW_0    0x0000	/*    0 waits inserted */
-#define SH7750_PCR_A6PCW_15   0x1000	/*    15 waits inserted */
-#define SH7750_PCR_A6PCW_30   0x2000	/*    30 waits inserted */
-#define SH7750_PCR_A6PCW_50   0x3000	/*    50 waits inserted */
-
-#define SH7750_PCR_A5TED      0x0E00	/* Area 5 Address-OE\/WE\ Assertion Delay,
-					   delay time from address output to
-					   OE\/WE\ assertion on the connected
-					   PCMCIA interface */
+#define SH7750_PCR_A5PCW      0xC000 /* Area 5 PCMCIA Wait - Number of wait */
+                                     /* states to be added to the number of */
+                                     /* waits specified by WCR2 in a */
+                                     /* low-speed PCMCIA wait cycle */
+#define SH7750_PCR_A5PCW_0    0x0000 /*    0 waits inserted */
+#define SH7750_PCR_A5PCW_15   0x4000 /*    15 waits inserted */
+#define SH7750_PCR_A5PCW_30   0x8000 /*    30 waits inserted */
+#define SH7750_PCR_A5PCW_50   0xC000 /*    50 waits inserted */
+
+#define SH7750_PCR_A6PCW      0x3000 /* Area 6 PCMCIA Wait - Number of wait */
+                                     /* states to be added to the number of */
+                                     /* waits specified by WCR2 in a */
+                                     /* low-speed PCMCIA wait cycle */
+#define SH7750_PCR_A6PCW_0    0x0000 /*    0 waits inserted */
+#define SH7750_PCR_A6PCW_15   0x1000 /*    15 waits inserted */
+#define SH7750_PCR_A6PCW_30   0x2000 /*    30 waits inserted */
+#define SH7750_PCR_A6PCW_50   0x3000 /*    50 waits inserted */
+
+#define SH7750_PCR_A5TED      0x0E00 /* Area 5 Addr-OE\/WE\ Assertion Delay */
+                                     /* delay time from address output to */
+                                     /* OE\/WE\ assertion on the connected */
+                                     /* PCMCIA interface */
 #define SH7750_PCR_A5TED_S    9
-#define SH7750_PCR_A6TED      0x01C0	/* Area 6 Address-OE\/WE\ Assertion Delay */
+#define SH7750_PCR_A6TED      0x01C0 /* Area 6 Addr-OE\/WE\ Assertion Delay */
 #define SH7750_PCR_A6TED_S    6
 
-#define SH7750_PCR_TED_0WS    0	/* 0 Waits inserted */
-#define SH7750_PCR_TED_1WS    1	/* 1 Waits inserted */
-#define SH7750_PCR_TED_2WS    2	/* 2 Waits inserted */
-#define SH7750_PCR_TED_3WS    3	/* 3 Waits inserted */
-#define SH7750_PCR_TED_6WS    4	/* 6 Waits inserted */
-#define SH7750_PCR_TED_9WS    5	/* 9 Waits inserted */
-#define SH7750_PCR_TED_12WS   6	/* 12 Waits inserted */
-#define SH7750_PCR_TED_15WS   7	/* 15 Waits inserted */
-
-#define SH7750_PCR_A5TEH      0x0038	/* Area 5 OE\/WE\ Negation Address delay,
-					   address hold delay time from OE\/WE\
-					   negation in a write on the connected
-					   PCMCIA interface */
+#define SH7750_PCR_TED_0WS    0 /* 0 Waits inserted */
+#define SH7750_PCR_TED_1WS    1 /* 1 Waits inserted */
+#define SH7750_PCR_TED_2WS    2 /* 2 Waits inserted */
+#define SH7750_PCR_TED_3WS    3 /* 3 Waits inserted */
+#define SH7750_PCR_TED_6WS    4 /* 6 Waits inserted */
+#define SH7750_PCR_TED_9WS    5 /* 9 Waits inserted */
+#define SH7750_PCR_TED_12WS   6 /* 12 Waits inserted */
+#define SH7750_PCR_TED_15WS   7 /* 15 Waits inserted */
+
+#define SH7750_PCR_A5TEH      0x0038 /* Area 5 OE\/WE\ Negation Addr delay, */
+                                     /* address hold delay time from OE\/WE\ */
+                                     /* negation in a write on the connected */
+                                     /* PCMCIA interface */
 #define SH7750_PCR_A5TEH_S    3
 
-#define SH7750_PCR_A6TEH      0x0007	/* Area 6 OE\/WE\ Negation Address delay */
+#define SH7750_PCR_A6TEH      0x0007 /* Area 6 OE\/WE\ Negation Address delay */
 #define SH7750_PCR_A6TEH_S    0
 
-#define SH7750_PCR_TEH_0WS    0	/* 0 Waits inserted */
-#define SH7750_PCR_TEH_1WS    1	/* 1 Waits inserted */
-#define SH7750_PCR_TEH_2WS    2	/* 2 Waits inserted */
-#define SH7750_PCR_TEH_3WS    3	/* 3 Waits inserted */
-#define SH7750_PCR_TEH_6WS    4	/* 6 Waits inserted */
-#define SH7750_PCR_TEH_9WS    5	/* 9 Waits inserted */
-#define SH7750_PCR_TEH_12WS   6	/* 12 Waits inserted */
-#define SH7750_PCR_TEH_15WS   7	/* 15 Waits inserted */
+#define SH7750_PCR_TEH_0WS    0 /* 0 Waits inserted */
+#define SH7750_PCR_TEH_1WS    1 /* 1 Waits inserted */
+#define SH7750_PCR_TEH_2WS    2 /* 2 Waits inserted */
+#define SH7750_PCR_TEH_3WS    3 /* 3 Waits inserted */
+#define SH7750_PCR_TEH_6WS    4 /* 6 Waits inserted */
+#define SH7750_PCR_TEH_9WS    5 /* 9 Waits inserted */
+#define SH7750_PCR_TEH_12WS   6 /* 12 Waits inserted */
+#define SH7750_PCR_TEH_15WS   7 /* 15 Waits inserted */
 
 /* Refresh Timer Control/Status Register (half) - RTSCR */
-#define SH7750_RTCSR_REGOFS   0x80001C	/* offset */
+#define SH7750_RTCSR_REGOFS   0x80001C /* offset */
 #define SH7750_RTCSR          SH7750_P4_REG32(SH7750_RTCSR_REGOFS)
 #define SH7750_RTCSR_A7       SH7750_A7_REG32(SH7750_RTCSR_REGOFS)
 
-#define SH7750_RTCSR_KEY      0xA500	/* RTCSR write key */
-#define SH7750_RTCSR_CMF      0x0080	/* Compare-Match Flag (indicates a
-					   match between the refresh timer
-					   counter and refresh time constant) */
-#define SH7750_RTCSR_CMIE     0x0040	/* Compare-Match Interrupt Enable */
-#define SH7750_RTCSR_CKS      0x0038	/* Refresh Counter Clock Selects */
-#define SH7750_RTCSR_CKS_DIS          0x0000	/* Clock Input Disabled */
-#define SH7750_RTCSR_CKS_CKIO_DIV4    0x0008	/* Bus Clock / 4 */
-#define SH7750_RTCSR_CKS_CKIO_DIV16   0x0010	/* Bus Clock / 16 */
-#define SH7750_RTCSR_CKS_CKIO_DIV64   0x0018	/* Bus Clock / 64 */
-#define SH7750_RTCSR_CKS_CKIO_DIV256  0x0020	/* Bus Clock / 256 */
-#define SH7750_RTCSR_CKS_CKIO_DIV1024 0x0028	/* Bus Clock / 1024 */
-#define SH7750_RTCSR_CKS_CKIO_DIV2048 0x0030	/* Bus Clock / 2048 */
-#define SH7750_RTCSR_CKS_CKIO_DIV4096 0x0038	/* Bus Clock / 4096 */
-
-#define SH7750_RTCSR_OVF      0x0004	/* Refresh Count Overflow Flag */
-#define SH7750_RTCSR_OVIE     0x0002	/* Refresh Count Overflow Interrupt
-					   Enable */
-#define SH7750_RTCSR_LMTS     0x0001	/* Refresh Count Overflow Limit Select */
-#define SH7750_RTCSR_LMTS_1024 0x0000	/* Count Limit is 1024 */
-#define SH7750_RTCSR_LMTS_512  0x0001	/* Count Limit is 512 */
+#define SH7750_RTCSR_KEY      0xA500 /* RTCSR write key */
+#define SH7750_RTCSR_CMF      0x0080 /* Compare-Match Flag (indicates a */
+                                     /* match between the refresh timer */
+                                     /* counter and refresh time constant) */
+#define SH7750_RTCSR_CMIE     0x0040 /* Compare-Match Interrupt Enable */
+#define SH7750_RTCSR_CKS      0x0038 /* Refresh Counter Clock Selects */
+#define SH7750_RTCSR_CKS_DIS          0x0000 /* Clock Input Disabled */
+#define SH7750_RTCSR_CKS_CKIO_DIV4    0x0008 /* Bus Clock / 4 */
+#define SH7750_RTCSR_CKS_CKIO_DIV16   0x0010 /* Bus Clock / 16 */
+#define SH7750_RTCSR_CKS_CKIO_DIV64   0x0018 /* Bus Clock / 64 */
+#define SH7750_RTCSR_CKS_CKIO_DIV256  0x0020 /* Bus Clock / 256 */
+#define SH7750_RTCSR_CKS_CKIO_DIV1024 0x0028 /* Bus Clock / 1024 */
+#define SH7750_RTCSR_CKS_CKIO_DIV2048 0x0030 /* Bus Clock / 2048 */
+#define SH7750_RTCSR_CKS_CKIO_DIV4096 0x0038 /* Bus Clock / 4096 */
+
+#define SH7750_RTCSR_OVF      0x0004 /* Refresh Count Overflow Flag */
+#define SH7750_RTCSR_OVIE     0x0002 /* Refresh Count Overflow Interrupt */
+                                     /*   Enable */
+#define SH7750_RTCSR_LMTS     0x0001 /* Refresh Count Overflow Limit Select */
+#define SH7750_RTCSR_LMTS_1024 0x0000 /* Count Limit is 1024 */
+#define SH7750_RTCSR_LMTS_512  0x0001 /* Count Limit is 512 */
 
 /* Refresh Timer Counter (half) - RTCNT */
-#define SH7750_RTCNT_REGOFS   0x800020	/* offset */
+#define SH7750_RTCNT_REGOFS   0x800020 /* offset */
 #define SH7750_RTCNT          SH7750_P4_REG32(SH7750_RTCNT_REGOFS)
 #define SH7750_RTCNT_A7       SH7750_A7_REG32(SH7750_RTCNT_REGOFS)
 
-#define SH7750_RTCNT_KEY      0xA500	/* RTCNT write key */
+#define SH7750_RTCNT_KEY      0xA500 /* RTCNT write key */
 
 /* Refresh Time Constant Register (half) - RTCOR */
-#define SH7750_RTCOR_REGOFS   0x800024	/* offset */
+#define SH7750_RTCOR_REGOFS   0x800024 /* offset */
 #define SH7750_RTCOR          SH7750_P4_REG32(SH7750_RTCOR_REGOFS)
 #define SH7750_RTCOR_A7       SH7750_A7_REG32(SH7750_RTCOR_REGOFS)
 
-#define SH7750_RTCOR_KEY      0xA500	/* RTCOR write key */
+#define SH7750_RTCOR_KEY      0xA500 /* RTCOR write key */
 
 /* Refresh Count Register (half) - RFCR */
-#define SH7750_RFCR_REGOFS    0x800028	/* offset */
+#define SH7750_RFCR_REGOFS    0x800028 /* offset */
 #define SH7750_RFCR           SH7750_P4_REG32(SH7750_RFCR_REGOFS)
 #define SH7750_RFCR_A7        SH7750_A7_REG32(SH7750_RFCR_REGOFS)
 
-#define SH7750_RFCR_KEY       0xA400	/* RFCR write key */
+#define SH7750_RFCR_KEY       0xA400 /* RFCR write key */
 
 /* Synchronous DRAM mode registers - SDMR */
-#define SH7750_SDMR2_REGOFS   0x900000	/* base offset */
-#define SH7750_SDMR2_REGNB    0x0FFC	/* nb of register */
+#define SH7750_SDMR2_REGOFS   0x900000 /* base offset */
+#define SH7750_SDMR2_REGNB    0x0FFC /* nb of register */
 #define SH7750_SDMR2          SH7750_P4_REG32(SH7750_SDMR2_REGOFS)
 #define SH7750_SDMR2_A7       SH7750_A7_REG32(SH7750_SDMR2_REGOFS)
 
-#define SH7750_SDMR3_REGOFS   0x940000	/* offset */
-#define SH7750_SDMR3_REGNB    0x0FFC	/* nb of register */
+#define SH7750_SDMR3_REGOFS   0x940000 /* offset */
+#define SH7750_SDMR3_REGNB    0x0FFC /* nb of register */
 #define SH7750_SDMR3          SH7750_P4_REG32(SH7750_SDMR3_REGOFS)
 #define SH7750_SDMR3_A7       SH7750_A7_REG32(SH7750_SDMR3_REGOFS)
 
@@ -1015,7 +1015,7 @@
  */
 
 /* DMA Source Address Register - SAR0, SAR1, SAR2, SAR3 */
-#define SH7750_SAR_REGOFS(n)  (0xA00000 + ((n)*16))	/* offset */
+#define SH7750_SAR_REGOFS(n)  (0xA00000 + ((n) * 16)) /* offset */
 #define SH7750_SAR(n)         SH7750_P4_REG32(SH7750_SAR_REGOFS(n))
 #define SH7750_SAR_A7(n)      SH7750_A7_REG32(SH7750_SAR_REGOFS(n))
 #define SH7750_SAR0           SH7750_SAR(0)
@@ -1028,7 +1028,7 @@
 #define SH7750_SAR3_A7        SH7750_SAR_A7(3)
 
 /* DMA Destination Address Register - DAR0, DAR1, DAR2, DAR3 */
-#define SH7750_DAR_REGOFS(n)  (0xA00004 + ((n)*16))	/* offset */
+#define SH7750_DAR_REGOFS(n)  (0xA00004 + ((n) * 16)) /* offset */
 #define SH7750_DAR(n)         SH7750_P4_REG32(SH7750_DAR_REGOFS(n))
 #define SH7750_DAR_A7(n)      SH7750_A7_REG32(SH7750_DAR_REGOFS(n))
 #define SH7750_DAR0           SH7750_DAR(0)
@@ -1041,7 +1041,7 @@
 #define SH7750_DAR3_A7        SH7750_DAR_A7(3)
 
 /* DMA Transfer Count Register - DMATCR0, DMATCR1, DMATCR2, DMATCR3 */
-#define SH7750_DMATCR_REGOFS(n)  (0xA00008 + ((n)*16))	/* offset */
+#define SH7750_DMATCR_REGOFS(n)  (0xA00008 + ((n) * 16)) /* offset */
 #define SH7750_DMATCR(n)      SH7750_P4_REG32(SH7750_DMATCR_REGOFS(n))
 #define SH7750_DMATCR_A7(n)   SH7750_A7_REG32(SH7750_DMATCR_REGOFS(n))
 #define SH7750_DMATCR0_P4     SH7750_DMATCR(0)
@@ -1054,7 +1054,7 @@
 #define SH7750_DMATCR3_A7     SH7750_DMATCR_A7(3)
 
 /* DMA Channel Control Register - CHCR0, CHCR1, CHCR2, CHCR3 */
-#define SH7750_CHCR_REGOFS(n)  (0xA0000C + ((n)*16))	/* offset */
+#define SH7750_CHCR_REGOFS(n)  (0xA0000C + ((n) * 16)) /* offset */
 #define SH7750_CHCR(n)        SH7750_P4_REG32(SH7750_CHCR_REGOFS(n))
 #define SH7750_CHCR_A7(n)     SH7750_A7_REG32(SH7750_CHCR_REGOFS(n))
 #define SH7750_CHCR0          SH7750_CHCR(0)
@@ -1066,227 +1066,227 @@
 #define SH7750_CHCR2_A7       SH7750_CHCR_A7(2)
 #define SH7750_CHCR3_A7       SH7750_CHCR_A7(3)
 
-#define SH7750_CHCR_SSA       0xE0000000	/* Source Address Space Attribute */
-#define SH7750_CHCR_SSA_PCMCIA  0x00000000	/* Reserved in PCMCIA access */
-#define SH7750_CHCR_SSA_DYNBSZ  0x20000000	/* Dynamic Bus Sizing I/O space */
-#define SH7750_CHCR_SSA_IO8     0x40000000	/* 8-bit I/O space */
-#define SH7750_CHCR_SSA_IO16    0x60000000	/* 16-bit I/O space */
-#define SH7750_CHCR_SSA_CMEM8   0x80000000	/* 8-bit common memory space */
-#define SH7750_CHCR_SSA_CMEM16  0xA0000000	/* 16-bit common memory space */
-#define SH7750_CHCR_SSA_AMEM8   0xC0000000	/* 8-bit attribute memory space */
-#define SH7750_CHCR_SSA_AMEM16  0xE0000000	/* 16-bit attribute memory space */
-
-#define SH7750_CHCR_STC       0x10000000	/* Source Address Wait Control Select,
-						   specifies CS5 or CS6 space wait
-						   control for PCMCIA access */
-
-#define SH7750_CHCR_DSA       0x0E000000	/* Source Address Space Attribute */
-#define SH7750_CHCR_DSA_PCMCIA  0x00000000	/* Reserved in PCMCIA access */
-#define SH7750_CHCR_DSA_DYNBSZ  0x02000000	/* Dynamic Bus Sizing I/O space */
-#define SH7750_CHCR_DSA_IO8     0x04000000	/* 8-bit I/O space */
-#define SH7750_CHCR_DSA_IO16    0x06000000	/* 16-bit I/O space */
-#define SH7750_CHCR_DSA_CMEM8   0x08000000	/* 8-bit common memory space */
-#define SH7750_CHCR_DSA_CMEM16  0x0A000000	/* 16-bit common memory space */
-#define SH7750_CHCR_DSA_AMEM8   0x0C000000	/* 8-bit attribute memory space */
-#define SH7750_CHCR_DSA_AMEM16  0x0E000000	/* 16-bit attribute memory space */
-
-#define SH7750_CHCR_DTC       0x01000000	/* Destination Address Wait Control
-						   Select, specifies CS5 or CS6
-						   space wait control for PCMCIA
-						   access */
-
-#define SH7750_CHCR_DS        0x00080000	/* DREQ\ Select : */
-#define SH7750_CHCR_DS_LOWLVL 0x00000000	/*     Low Level Detection */
-#define SH7750_CHCR_DS_FALL   0x00080000	/*     Falling Edge Detection */
-
-#define SH7750_CHCR_RL        0x00040000	/* Request Check Level: */
-#define SH7750_CHCR_RL_ACTH   0x00000000	/*     DRAK is an active high out */
-#define SH7750_CHCR_RL_ACTL   0x00040000	/*     DRAK is an active low out */
-
-#define SH7750_CHCR_AM        0x00020000	/* Acknowledge Mode: */
-#define SH7750_CHCR_AM_RD     0x00000000	/*     DACK is output in read cycle */
-#define SH7750_CHCR_AM_WR     0x00020000	/*     DACK is output in write cycle */
-
-#define SH7750_CHCR_AL        0x00010000	/* Acknowledge Level: */
-#define SH7750_CHCR_AL_ACTH   0x00000000	/*     DACK is an active high out */
-#define SH7750_CHCR_AL_ACTL   0x00010000	/*     DACK is an active low out */
-
-#define SH7750_CHCR_DM        0x0000C000	/* Destination Address Mode: */
-#define SH7750_CHCR_DM_FIX    0x00000000	/*     Destination Addr Fixed */
-#define SH7750_CHCR_DM_INC    0x00004000	/*     Destination Addr Incremented */
-#define SH7750_CHCR_DM_DEC    0x00008000	/*     Destination Addr Decremented */
-
-#define SH7750_CHCR_SM        0x00003000	/* Source Address Mode: */
-#define SH7750_CHCR_SM_FIX    0x00000000	/*     Source Addr Fixed */
-#define SH7750_CHCR_SM_INC    0x00001000	/*     Source Addr Incremented */
-#define SH7750_CHCR_SM_DEC    0x00002000	/*     Source Addr Decremented */
-
-#define SH7750_CHCR_RS        0x00000F00	/* Request Source Select: */
-#define SH7750_CHCR_RS_ER_DA_EA_TO_EA   0x000	/* External Request, Dual Address
-						   Mode (External Addr Space->
-						   External Addr Space) */
-#define SH7750_CHCR_RS_ER_SA_EA_TO_ED   0x200	/* External Request, Single
-						   Address Mode (External Addr
-						   Space -> External Device) */
-#define SH7750_CHCR_RS_ER_SA_ED_TO_EA   0x300	/* External Request, Single
-						   Address Mode, (External
-						   Device -> External Addr
-						   Space) */
-#define SH7750_CHCR_RS_AR_EA_TO_EA      0x400	/* Auto-Request (External Addr
-						   Space -> External Addr Space) */
-
-#define SH7750_CHCR_RS_AR_EA_TO_OCP     0x500	/* Auto-Request (External Addr
-						   Space -> On-chip Peripheral
-						   Module) */
-#define SH7750_CHCR_RS_AR_OCP_TO_EA     0x600	/* Auto-Request (On-chip
-						   Peripheral Module ->
-						   External Addr Space */
-#define SH7750_CHCR_RS_SCITX_EA_TO_SC   0x800	/* SCI Transmit-Data-Empty intr
-						   transfer request (external
-						   address space -> SCTDR1) */
-#define SH7750_CHCR_RS_SCIRX_SC_TO_EA   0x900	/* SCI Receive-Data-Full intr
-						   transfer request (SCRDR1 ->
-						   External Addr Space) */
-#define SH7750_CHCR_RS_SCIFTX_EA_TO_SC  0xA00	/* SCIF Transmit-Data-Empty intr
-						   transfer request (external
-						   address space -> SCFTDR1) */
-#define SH7750_CHCR_RS_SCIFRX_SC_TO_EA  0xB00	/* SCIF Receive-Data-Full intr
-						   transfer request (SCFRDR2 ->
-						   External Addr Space) */
-#define SH7750_CHCR_RS_TMU2_EA_TO_EA    0xC00	/* TMU Channel 2 (input capture
-						   interrupt), (external address
-						   space -> external address
-						   space) */
-#define SH7750_CHCR_RS_TMU2_EA_TO_OCP   0xD00	/* TMU Channel 2 (input capture
-						   interrupt), (external address
-						   space -> on-chip peripheral
-						   module) */
-#define SH7750_CHCR_RS_TMU2_OCP_TO_EA   0xE00	/* TMU Channel 2 (input capture
-						   interrupt), (on-chip
-						   peripheral module -> external
-						   address space) */
-
-#define SH7750_CHCR_TM        0x00000080	/* Transmit mode: */
-#define SH7750_CHCR_TM_CSTEAL 0x00000000	/*     Cycle Steal Mode */
-#define SH7750_CHCR_TM_BURST  0x00000080	/*     Burst Mode */
-
-#define SH7750_CHCR_TS        0x00000070	/* Transmit Size: */
-#define SH7750_CHCR_TS_QUAD   0x00000000	/*     Quadword Size (64 bits) */
-#define SH7750_CHCR_TS_BYTE   0x00000010	/*     Byte Size (8 bit) */
-#define SH7750_CHCR_TS_WORD   0x00000020	/*     Word Size (16 bit) */
-#define SH7750_CHCR_TS_LONG   0x00000030	/*     Longword Size (32 bit) */
-#define SH7750_CHCR_TS_BLOCK  0x00000040	/*     32-byte block transfer */
-
-#define SH7750_CHCR_IE        0x00000004	/* Interrupt Enable */
-#define SH7750_CHCR_TE        0x00000002	/* Transfer End */
-#define SH7750_CHCR_DE        0x00000001	/* DMAC Enable */
+#define SH7750_CHCR_SSA       0xE0000000 /* Source Address Space Attribute */
+#define SH7750_CHCR_SSA_PCMCIA  0x00000000 /* Reserved in PCMCIA access */
+#define SH7750_CHCR_SSA_DYNBSZ  0x20000000 /* Dynamic Bus Sizing I/O space */
+#define SH7750_CHCR_SSA_IO8     0x40000000 /* 8-bit I/O space */
+#define SH7750_CHCR_SSA_IO16    0x60000000 /* 16-bit I/O space */
+#define SH7750_CHCR_SSA_CMEM8   0x80000000 /* 8-bit common memory space */
+#define SH7750_CHCR_SSA_CMEM16  0xA0000000 /* 16-bit common memory space */
+#define SH7750_CHCR_SSA_AMEM8   0xC0000000 /* 8-bit attribute memory space */
+#define SH7750_CHCR_SSA_AMEM16  0xE0000000 /* 16-bit attribute memory space */
+
+#define SH7750_CHCR_STC       0x10000000 /* Source Addr Wait Control Select */
+                                         /*   specifies CS5 or CS6 space wait */
+                                         /*   control for PCMCIA access */
+
+#define SH7750_CHCR_DSA       0x0E000000 /* Source Address Space Attribute */
+#define SH7750_CHCR_DSA_PCMCIA  0x00000000 /* Reserved in PCMCIA access */
+#define SH7750_CHCR_DSA_DYNBSZ  0x02000000 /* Dynamic Bus Sizing I/O space */
+#define SH7750_CHCR_DSA_IO8     0x04000000 /* 8-bit I/O space */
+#define SH7750_CHCR_DSA_IO16    0x06000000 /* 16-bit I/O space */
+#define SH7750_CHCR_DSA_CMEM8   0x08000000 /* 8-bit common memory space */
+#define SH7750_CHCR_DSA_CMEM16  0x0A000000 /* 16-bit common memory space */
+#define SH7750_CHCR_DSA_AMEM8   0x0C000000 /* 8-bit attribute memory space */
+#define SH7750_CHCR_DSA_AMEM16  0x0E000000 /* 16-bit attribute memory space */
+
+#define SH7750_CHCR_DTC       0x01000000 /* Destination Address Wait Control */
+                                         /*   Select, specifies CS5 or CS6 */
+                                         /*   space wait control for PCMCIA */
+                                         /*   access */
+
+#define SH7750_CHCR_DS        0x00080000 /* DREQ\ Select : */
+#define SH7750_CHCR_DS_LOWLVL 0x00000000 /*   Low Level Detection */
+#define SH7750_CHCR_DS_FALL   0x00080000 /*   Falling Edge Detection */
+
+#define SH7750_CHCR_RL        0x00040000 /* Request Check Level: */
+#define SH7750_CHCR_RL_ACTH   0x00000000 /*   DRAK is an active high out */
+#define SH7750_CHCR_RL_ACTL   0x00040000 /*   DRAK is an active low out */
+
+#define SH7750_CHCR_AM        0x00020000 /* Acknowledge Mode: */
+#define SH7750_CHCR_AM_RD     0x00000000 /*   DACK is output in read cycle */
+#define SH7750_CHCR_AM_WR     0x00020000 /*   DACK is output in write cycle */
+
+#define SH7750_CHCR_AL        0x00010000 /* Acknowledge Level: */
+#define SH7750_CHCR_AL_ACTH   0x00000000 /*   DACK is an active high out */
+#define SH7750_CHCR_AL_ACTL   0x00010000 /*   DACK is an active low out */
+
+#define SH7750_CHCR_DM        0x0000C000 /* Destination Address Mode: */
+#define SH7750_CHCR_DM_FIX    0x00000000 /*   Destination Addr Fixed */
+#define SH7750_CHCR_DM_INC    0x00004000 /*   Destination Addr Incremented */
+#define SH7750_CHCR_DM_DEC    0x00008000 /*   Destination Addr Decremented */
+
+#define SH7750_CHCR_SM        0x00003000 /* Source Address Mode: */
+#define SH7750_CHCR_SM_FIX    0x00000000 /*   Source Addr Fixed */
+#define SH7750_CHCR_SM_INC    0x00001000 /*   Source Addr Incremented */
+#define SH7750_CHCR_SM_DEC    0x00002000 /*   Source Addr Decremented */
+
+#define SH7750_CHCR_RS        0x00000F00 /* Request Source Select: */
+#define SH7750_CHCR_RS_ER_DA_EA_TO_EA   0x000 /* External Request, Dual Addr */
+                                              /*   Mode, External Addr Space */
+                                              /*   -> External Addr Space) */
+#define SH7750_CHCR_RS_ER_SA_EA_TO_ED   0x200 /* External Request, Single */
+                                              /*   Address Mode (Ext. Addr */
+                                              /*   Space -> External Device) */
+#define SH7750_CHCR_RS_ER_SA_ED_TO_EA   0x300 /* External Request, Single */
+                                              /*   Address Mode, (External */
+                                              /*   Device -> External Addr */
+                                              /*   Space) */
+#define SH7750_CHCR_RS_AR_EA_TO_EA      0x400 /* Auto-Request (External Addr */
+                                              /*   Space -> Ext. Addr Space) */
+
+#define SH7750_CHCR_RS_AR_EA_TO_OCP     0x500 /* Auto-Request (External Addr */
+                                              /*   Space -> On-chip */
+                                              /*   Peripheral Module) */
+#define SH7750_CHCR_RS_AR_OCP_TO_EA     0x600 /* Auto-Request (On-chip */
+                                              /*   Peripheral Module -> */
+                                              /*   External Addr Space */
+#define SH7750_CHCR_RS_SCITX_EA_TO_SC   0x800 /* SCI Transmit-Data-Empty intr */
+                                              /*   transfer request (external */
+                                              /*   address space -> SCTDR1) */
+#define SH7750_CHCR_RS_SCIRX_SC_TO_EA   0x900 /* SCI Receive-Data-Full intr */
+                                              /*   transfer request (SCRDR1 */
+                                              /*   -> External Addr Space) */
+#define SH7750_CHCR_RS_SCIFTX_EA_TO_SC  0xA00 /* SCIF TX-Data-Empty intr */
+                                              /*   transfer request (external */
+                                              /*   address space -> SCFTDR1) */
+#define SH7750_CHCR_RS_SCIFRX_SC_TO_EA  0xB00 /* SCIF Receive-Data-Full intr */
+                                              /*   transfer request (SCFRDR2 */
+                                              /*   -> External Addr Space) */
+#define SH7750_CHCR_RS_TMU2_EA_TO_EA    0xC00 /* TMU Channel 2 (input capture */
+                                              /*   interrupt), (external */
+                                              /*   address space -> external */
+                                              /*   address space) */
+#define SH7750_CHCR_RS_TMU2_EA_TO_OCP   0xD00 /* TMU Channel 2 (input capture */
+                                              /*   interrupt), (external */
+                                              /*   address space -> on-chip */
+                                              /*   peripheral module) */
+#define SH7750_CHCR_RS_TMU2_OCP_TO_EA   0xE00 /* TMU Channel 2 (input capture */
+                                              /*   interrupt), (on-chip */
+                                              /*   peripheral module -> */
+                                              /*   external address space) */
+
+#define SH7750_CHCR_TM        0x00000080 /* Transmit mode: */
+#define SH7750_CHCR_TM_CSTEAL 0x00000000 /*     Cycle Steal Mode */
+#define SH7750_CHCR_TM_BURST  0x00000080 /*     Burst Mode */
+
+#define SH7750_CHCR_TS        0x00000070 /* Transmit Size: */
+#define SH7750_CHCR_TS_QUAD   0x00000000 /*     Quadword Size (64 bits) */
+#define SH7750_CHCR_TS_BYTE   0x00000010 /*     Byte Size (8 bit) */
+#define SH7750_CHCR_TS_WORD   0x00000020 /*     Word Size (16 bit) */
+#define SH7750_CHCR_TS_LONG   0x00000030 /*     Longword Size (32 bit) */
+#define SH7750_CHCR_TS_BLOCK  0x00000040 /*     32-byte block transfer */
+
+#define SH7750_CHCR_IE        0x00000004 /* Interrupt Enable */
+#define SH7750_CHCR_TE        0x00000002 /* Transfer End */
+#define SH7750_CHCR_DE        0x00000001 /* DMAC Enable */
 
 /* DMA Operation Register - DMAOR */
-#define SH7750_DMAOR_REGOFS   0xA00040	/* offset */
+#define SH7750_DMAOR_REGOFS   0xA00040 /* offset */
 #define SH7750_DMAOR          SH7750_P4_REG32(SH7750_DMAOR_REGOFS)
 #define SH7750_DMAOR_A7       SH7750_A7_REG32(SH7750_DMAOR_REGOFS)
 
-#define SH7750_DMAOR_DDT      0x00008000	/* On-Demand Data Transfer Mode */
+#define SH7750_DMAOR_DDT      0x00008000 /* On-Demand Data Transfer Mode */
 
-#define SH7750_DMAOR_PR       0x00000300	/* Priority Mode: */
-#define SH7750_DMAOR_PR_0123  0x00000000	/*     CH0 > CH1 > CH2 > CH3 */
-#define SH7750_DMAOR_PR_0231  0x00000100	/*     CH0 > CH2 > CH3 > CH1 */
-#define SH7750_DMAOR_PR_2013  0x00000200	/*     CH2 > CH0 > CH1 > CH3 */
-#define SH7750_DMAOR_PR_RR    0x00000300	/*     Round-robin mode */
+#define SH7750_DMAOR_PR       0x00000300 /* Priority Mode: */
+#define SH7750_DMAOR_PR_0123  0x00000000 /*     CH0 > CH1 > CH2 > CH3 */
+#define SH7750_DMAOR_PR_0231  0x00000100 /*     CH0 > CH2 > CH3 > CH1 */
+#define SH7750_DMAOR_PR_2013  0x00000200 /*     CH2 > CH0 > CH1 > CH3 */
+#define SH7750_DMAOR_PR_RR    0x00000300 /*     Round-robin mode */
 
-#define SH7750_DMAOR_COD      0x00000010	/* Check Overrun for DREQ\ */
-#define SH7750_DMAOR_AE       0x00000004	/* Address Error flag */
-#define SH7750_DMAOR_NMIF     0x00000002	/* NMI Flag */
-#define SH7750_DMAOR_DME      0x00000001	/* DMAC Master Enable */
+#define SH7750_DMAOR_COD      0x00000010 /* Check Overrun for DREQ\ */
+#define SH7750_DMAOR_AE       0x00000004 /* Address Error flag */
+#define SH7750_DMAOR_NMIF     0x00000002 /* NMI Flag */
+#define SH7750_DMAOR_DME      0x00000001 /* DMAC Master Enable */
 
 /*
  * I/O Ports
  */
 /* Port Control Register A - PCTRA */
-#define SH7750_PCTRA_REGOFS   0x80002C	/* offset */
+#define SH7750_PCTRA_REGOFS   0x80002C /* offset */
 #define SH7750_PCTRA          SH7750_P4_REG32(SH7750_PCTRA_REGOFS)
 #define SH7750_PCTRA_A7       SH7750_A7_REG32(SH7750_PCTRA_REGOFS)
 
-#define SH7750_PCTRA_PBPUP(n) 0	/* Bit n is pulled up */
-#define SH7750_PCTRA_PBNPUP(n) (1 << ((n)*2+1))	/* Bit n is not pulled up */
-#define SH7750_PCTRA_PBINP(n) 0	/* Bit n is an input */
-#define SH7750_PCTRA_PBOUT(n) (1 << ((n)*2))	/* Bit n is an output */
+#define SH7750_PCTRA_PBPUP(n) 0 /* Bit n is pulled up */
+#define SH7750_PCTRA_PBNPUP(n) (1 << ((n) * 2 + 1)) /* Bit n is not pulled up */
+#define SH7750_PCTRA_PBINP(n) 0 /* Bit n is an input */
+#define SH7750_PCTRA_PBOUT(n) (1 << ((n) * 2)) /* Bit n is an output */
 
 /* Port Data Register A - PDTRA(half) */
-#define SH7750_PDTRA_REGOFS   0x800030	/* offset */
+#define SH7750_PDTRA_REGOFS   0x800030 /* offset */
 #define SH7750_PDTRA          SH7750_P4_REG32(SH7750_PDTRA_REGOFS)
 #define SH7750_PDTRA_A7       SH7750_A7_REG32(SH7750_PDTRA_REGOFS)
 
 #define SH7750_PDTRA_BIT(n) (1 << (n))
 
 /* Port Control Register B - PCTRB */
-#define SH7750_PCTRB_REGOFS   0x800040	/* offset */
+#define SH7750_PCTRB_REGOFS   0x800040 /* offset */
 #define SH7750_PCTRB          SH7750_P4_REG32(SH7750_PCTRB_REGOFS)
 #define SH7750_PCTRB_A7       SH7750_A7_REG32(SH7750_PCTRB_REGOFS)
 
-#define SH7750_PCTRB_PBPUP(n) 0	/* Bit n is pulled up */
-#define SH7750_PCTRB_PBNPUP(n) (1 << ((n-16)*2+1))	/* Bit n is not pulled up */
-#define SH7750_PCTRB_PBINP(n) 0	/* Bit n is an input */
-#define SH7750_PCTRB_PBOUT(n) (1 << ((n-16)*2))	/* Bit n is an output */
+#define SH7750_PCTRB_PBPUP(n) 0 /* Bit n is pulled up */
+#define SH7750_PCTRB_PBNPUP(n) (1 << ((n - 16) * 2 + 1)) /* Bit n is not pulled up */
+#define SH7750_PCTRB_PBINP(n) 0 /* Bit n is an input */
+#define SH7750_PCTRB_PBOUT(n) (1 << ((n - 16) * 2)) /* Bit n is an output */
 
 /* Port Data Register B - PDTRB(half) */
-#define SH7750_PDTRB_REGOFS   0x800044	/* offset */
+#define SH7750_PDTRB_REGOFS   0x800044 /* offset */
 #define SH7750_PDTRB          SH7750_P4_REG32(SH7750_PDTRB_REGOFS)
 #define SH7750_PDTRB_A7       SH7750_A7_REG32(SH7750_PDTRB_REGOFS)
 
-#define SH7750_PDTRB_BIT(n) (1 << ((n)-16))
+#define SH7750_PDTRB_BIT(n) (1 << ((n) - 16))
 
 /* GPIO Interrupt Control Register - GPIOIC(half) */
-#define SH7750_GPIOIC_REGOFS  0x800048	/* offset */
+#define SH7750_GPIOIC_REGOFS  0x800048 /* offset */
 #define SH7750_GPIOIC         SH7750_P4_REG32(SH7750_GPIOIC_REGOFS)
 #define SH7750_GPIOIC_A7      SH7750_A7_REG32(SH7750_GPIOIC_REGOFS)
 
-#define SH7750_GPIOIC_PTIREN(n) (1 << (n))	/* Port n is used as a GPIO int */
+#define SH7750_GPIOIC_PTIREN(n) (1 << (n)) /* Port n is used as a GPIO int */
 
 /*
  * Interrupt Controller - INTC
  */
 /* Interrupt Control Register - ICR (half) */
-#define SH7750_ICR_REGOFS     0xD00000	/* offset */
+#define SH7750_ICR_REGOFS     0xD00000 /* offset */
 #define SH7750_ICR            SH7750_P4_REG32(SH7750_ICR_REGOFS)
 #define SH7750_ICR_A7         SH7750_A7_REG32(SH7750_ICR_REGOFS)
 
-#define SH7750_ICR_NMIL       0x8000	/* NMI Input Level */
-#define SH7750_ICR_MAI        0x4000	/* NMI Interrupt Mask */
+#define SH7750_ICR_NMIL       0x8000 /* NMI Input Level */
+#define SH7750_ICR_MAI        0x4000 /* NMI Interrupt Mask */
 
-#define SH7750_ICR_NMIB       0x0200	/* NMI Block Mode: */
-#define SH7750_ICR_NMIB_BLK   0x0000	/*   NMI requests held pending while
-					   SR.BL bit is set to 1 */
-#define SH7750_ICR_NMIB_NBLK  0x0200	/*   NMI requests detected when SR.BL bit
-					   set to 1 */
+#define SH7750_ICR_NMIB       0x0200 /* NMI Block Mode: */
+#define SH7750_ICR_NMIB_BLK   0x0000 /*   NMI requests held pending while */
+                                     /*     SR.BL bit is set to 1 */
+#define SH7750_ICR_NMIB_NBLK  0x0200 /*   NMI requests detected when SR.BL */
+                                     /*     bit set to 1 */
 
-#define SH7750_ICR_NMIE       0x0100	/* NMI Edge Select: */
-#define SH7750_ICR_NMIE_FALL  0x0000	/*   Interrupt request detected on falling
-					   edge of NMI input */
-#define SH7750_ICR_NMIE_RISE  0x0100	/*   Interrupt request detected on rising
-					   edge of NMI input */
+#define SH7750_ICR_NMIE       0x0100 /* NMI Edge Select: */
+#define SH7750_ICR_NMIE_FALL  0x0000 /*   Interrupt request detected on */
+                                     /*     falling edge of NMI input */
+#define SH7750_ICR_NMIE_RISE  0x0100 /*   Interrupt request detected on */
+                                     /*     rising edge of NMI input */
 
-#define SH7750_ICR_IRLM       0x0080	/* IRL Pin Mode: */
-#define SH7750_ICR_IRLM_ENC   0x0000	/*   IRL\ pins used as a level-encoded
-					   interrupt requests */
-#define SH7750_ICR_IRLM_RAW   0x0080	/*   IRL\ pins used as a four independent
-					   interrupt requests */
+#define SH7750_ICR_IRLM       0x0080 /* IRL Pin Mode: */
+#define SH7750_ICR_IRLM_ENC   0x0000 /*   IRL\ pins used as a level-encoded */
+                                     /*     interrupt requests */
+#define SH7750_ICR_IRLM_RAW   0x0080 /*   IRL\ pins used as a four */
+                                     /*     independent interrupt requests */
 
 /*
  * User Break Controller registers
  */
-#define SH7750_BARA           0x200000	/* Break address regiser A */
-#define SH7750_BAMRA          0x200004	/* Break address mask regiser A */
-#define SH7750_BBRA           0x200008	/* Break bus cycle regiser A */
-#define SH7750_BARB           0x20000c	/* Break address regiser B */
-#define SH7750_BAMRB          0x200010	/* Break address mask regiser B */
-#define SH7750_BBRB           0x200014	/* Break bus cycle regiser B */
-#define SH7750_BASRB          0x000018	/* Break ASID regiser B */
-#define SH7750_BDRB           0x200018	/* Break data regiser B */
-#define SH7750_BDMRB          0x20001c	/* Break data mask regiser B */
-#define SH7750_BRCR           0x200020	/* Break control register */
-
-#define SH7750_BRCR_UDBE        0x0001	/* User break debug enable bit */
+#define SH7750_BARA           0x200000 /* Break address regiser A */
+#define SH7750_BAMRA          0x200004 /* Break address mask regiser A */
+#define SH7750_BBRA           0x200008 /* Break bus cycle regiser A */
+#define SH7750_BARB           0x20000c /* Break address regiser B */
+#define SH7750_BAMRB          0x200010 /* Break address mask regiser B */
+#define SH7750_BBRB           0x200014 /* Break bus cycle regiser B */
+#define SH7750_BASRB          0x000018 /* Break ASID regiser B */
+#define SH7750_BDRB           0x200018 /* Break data regiser B */
+#define SH7750_BDMRB          0x20001c /* Break data mask regiser B */
+#define SH7750_BRCR           0x200020 /* Break control register */
+
+#define SH7750_BRCR_UDBE        0x0001 /* User break debug enable bit */
 
 /*
  * Missing in RTEMS, added for QEMU
diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c
index d9a9fcbc598..aa812512f0c 100644
--- a/hw/sh4/shix.c
+++ b/hw/sh4/shix.c
@@ -22,20 +22,18 @@
  * THE SOFTWARE.
  */
 /*
-   Shix 2.0 board by Alexis Polti, described at
-   https://web.archive.org/web/20070917001736/perso.enst.fr/~polti/realisations/shix20
-
-   More information in target/sh4/README.sh4
-*/
+ * Shix 2.0 board by Alexis Polti, described at
+ * https://web.archive.org/web/20070917001736/perso.enst.fr/~polti/realisations/shix20
+ *
+ * More information in target/sh4/README.sh4
+ */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "hw/sh4/sh.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 
 #define BIOS_FILENAME "shix_bios.bin"
@@ -50,7 +48,7 @@ static void shix_init(MachineState *machine)
     MemoryRegion *rom = g_new(MemoryRegion, 1);
     MemoryRegion *sdram = g_new(MemoryRegion, 2);
     const char *bios_name = machine->firmware ?: BIOS_FILENAME;
-    
+
     cpu = SUPERH_CPU(cpu_create(machine->cpu_type));
 
     /* Allocate memory space */
diff --git a/hw/sh4/trace-events b/hw/sh4/trace-events
new file mode 100644
index 00000000000..4b61cd56c89
--- /dev/null
+++ b/hw/sh4/trace-events
@@ -0,0 +1,3 @@
+# sh7750.c
+sh7750_porta(uint16_t prev, uint16_t cur, uint16_t pdtr, uint16_t pctr) "porta changed from 0x%04x to 0x%04x\npdtra=0x%04x, pctra=0x%08x"
+sh7750_portb(uint16_t prev, uint16_t cur, uint16_t pdtr, uint16_t pctr) "portb changed from 0x%04x to 0x%04x\npdtrb=0x%04x, pctrb=0x%08x"
diff --git a/hw/sh4/trace.h b/hw/sh4/trace.h
new file mode 100644
index 00000000000..e2c13323b7a
--- /dev/null
+++ b/hw/sh4/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_sh4.h"
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index f22c4f5b734..7397e567373 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -27,6 +27,7 @@
 #include "hw/firmware/smbios.h"
 #include "hw/loader.h"
 #include "hw/boards.h"
+#include "hw/pci/pci_bus.h"
 #include "smbios_build.h"
 
 /* legacy structures and constants for <= 2.0 machines */
@@ -118,6 +119,28 @@ static struct {
     uint16_t speed;
 } type17;
 
+static QEnumLookup type41_kind_lookup = {
+    .array = (const char *const[]) {
+        "other",
+        "unknown",
+        "video",
+        "scsi",
+        "ethernet",
+        "tokenring",
+        "sound",
+        "pata",
+        "sata",
+        "sas",
+    },
+    .size = 10
+};
+struct type41_instance {
+    const char *designation, *pcidev;
+    uint8_t instance, kind;
+    QTAILQ_ENTRY(type41_instance) next;
+};
+static QTAILQ_HEAD(, type41_instance) type41 = QTAILQ_HEAD_INITIALIZER(type41);
+
 static QemuOptsList qemu_smbios_opts = {
     .name = "smbios",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_smbios_opts.head),
@@ -358,6 +381,32 @@ static const QemuOptDesc qemu_smbios_type17_opts[] = {
     { /* end of list */ }
 };
 
+static const QemuOptDesc qemu_smbios_type41_opts[] = {
+    {
+        .name = "type",
+        .type = QEMU_OPT_NUMBER,
+        .help = "SMBIOS element type",
+    },{
+        .name = "designation",
+        .type = QEMU_OPT_STRING,
+        .help = "reference designation string",
+    },{
+        .name = "kind",
+        .type = QEMU_OPT_STRING,
+        .help = "device type",
+        .def_value_str = "other",
+    },{
+        .name = "instance",
+        .type = QEMU_OPT_NUMBER,
+        .help = "device type instance",
+    },{
+        .name = "pcidev",
+        .type = QEMU_OPT_STRING,
+        .help = "PCI device",
+    },
+    { /* end of list */ }
+};
+
 static void smbios_register_config(void)
 {
     qemu_add_opts(&qemu_smbios_opts);
@@ -773,6 +822,53 @@ static void smbios_build_type_32_table(void)
     SMBIOS_BUILD_TABLE_POST;
 }
 
+static void smbios_build_type_41_table(Error **errp)
+{
+    unsigned instance = 0;
+    struct type41_instance *t41;
+
+    QTAILQ_FOREACH(t41, &type41, next) {
+        SMBIOS_BUILD_TABLE_PRE(41, 0x2900 + instance, true);
+
+        SMBIOS_TABLE_SET_STR(41, reference_designation_str, t41->designation);
+        t->device_type = t41->kind;
+        t->device_type_instance = t41->instance;
+        t->segment_group_number = cpu_to_le16(0);
+        t->bus_number = 0;
+        t->device_number = 0;
+
+        if (t41->pcidev) {
+            PCIDevice *pdev = NULL;
+            int rc = pci_qdev_find_device(t41->pcidev, &pdev);
+            if (rc != 0) {
+                error_setg(errp,
+                           "No PCI device %s for SMBIOS type 41 entry %s",
+                           t41->pcidev, t41->designation);
+                return;
+            }
+            /*
+             * We only handle the case were the device is attached to
+             * the PCI root bus. The general case is more complex as
+             * bridges are enumerated later and the table would need
+             * to be updated at this moment.
+             */
+            if (!pci_bus_is_root(pci_get_bus(pdev))) {
+                error_setg(errp,
+                           "Cannot create type 41 entry for PCI device %s: "
+                           "not attached to the root bus",
+                           t41->pcidev);
+                return;
+            }
+            t->segment_group_number = cpu_to_le16(0);
+            t->bus_number = pci_dev_bus_num(pdev);
+            t->device_number = pdev->devfn;
+        }
+
+        SMBIOS_BUILD_TABLE_POST;
+        instance++;
+    }
+}
+
 static void smbios_build_type_127_table(void)
 {
     SMBIOS_BUILD_TABLE_PRE(127, 0x7F00, true); /* required */
@@ -883,7 +979,8 @@ void smbios_get_tables(MachineState *ms,
                        const struct smbios_phys_mem_area *mem_array,
                        const unsigned int mem_array_size,
                        uint8_t **tables, size_t *tables_len,
-                       uint8_t **anchor, size_t *anchor_len)
+                       uint8_t **anchor, size_t *anchor_len,
+                       Error **errp)
 {
     unsigned i, dimm_cnt;
 
@@ -928,6 +1025,7 @@ void smbios_get_tables(MachineState *ms,
 
         smbios_build_type_32_table();
         smbios_build_type_38_table();
+        smbios_build_type_41_table(errp);
         smbios_build_type_127_table();
 
         smbios_validate_table(ms);
@@ -1224,6 +1322,30 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
             save_opt(&type17.part, opts, "part");
             type17.speed = qemu_opt_get_number(opts, "speed", 0);
             return;
+        case 41: {
+            struct type41_instance *t;
+            Error *local_err = NULL;
+
+            if (!qemu_opts_validate(opts, qemu_smbios_type41_opts, errp)) {
+                return;
+            }
+            t = g_new0(struct type41_instance, 1);
+            save_opt(&t->designation, opts, "designation");
+            t->kind = qapi_enum_parse(&type41_kind_lookup,
+                                      qemu_opt_get(opts, "kind"),
+                                      0, &local_err) + 1;
+            t->kind |= 0x80;     /* enabled */
+            if (local_err != NULL) {
+                error_propagate(errp, local_err);
+                g_free(t);
+                return;
+            }
+            t->instance = qemu_opt_get_number(opts, "instance", 1);
+            save_opt(&t->pcidev, opts, "pcidev");
+
+            QTAILQ_INSERT_TAIL(&type41, t, next);
+            return;
+        }
         default:
             error_setg(errp,
                        "Don't know how to build fields for SMBIOS type %ld",
diff --git a/hw/sparc/Kconfig b/hw/sparc/Kconfig
index 8dcb10086fd..79d58beb7a6 100644
--- a/hw/sparc/Kconfig
+++ b/hw/sparc/Kconfig
@@ -8,7 +8,7 @@ config SUN4M
     select UNIMP
     select ESCC
     select ESP
-    select FDC
+    select FDC_SYSBUS
     select SLAVIO
     select LANCE
     select M48T59
diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c
index 7e16eea9e67..7b4dec17211 100644
--- a/hw/sparc/leon3.c
+++ b/hw/sparc/leon3.c
@@ -40,7 +40,6 @@
 #include "hw/loader.h"
 #include "elf.h"
 #include "trace.h"
-#include "exec/address-spaces.h"
 
 #include "hw/sparc/grlib.h"
 #include "hw/misc/grlib_ahb_apb_pnp.h"
@@ -137,7 +136,36 @@ static void main_cpu_reset(void *opaque)
     env->regbase[6] = s->sp;
 }
 
-void leon3_irq_ack(void *irq_manager, int intno)
+static void leon3_cache_control_int(CPUSPARCState *env)
+{
+    uint32_t state = 0;
+
+    if (env->cache_control & CACHE_CTRL_IF) {
+        /* Instruction cache state */
+        state = env->cache_control & CACHE_STATE_MASK;
+        if (state == CACHE_ENABLED) {
+            state = CACHE_FROZEN;
+            trace_int_helper_icache_freeze();
+        }
+
+        env->cache_control &= ~CACHE_STATE_MASK;
+        env->cache_control |= state;
+    }
+
+    if (env->cache_control & CACHE_CTRL_DF) {
+        /* Data cache state */
+        state = (env->cache_control >> 2) & CACHE_STATE_MASK;
+        if (state == CACHE_ENABLED) {
+            state = CACHE_FROZEN;
+            trace_int_helper_dcache_freeze();
+        }
+
+        env->cache_control &= ~(CACHE_STATE_MASK << 2);
+        env->cache_control |= (state << 2);
+    }
+}
+
+static void leon3_irq_ack(void *irq_manager, int intno)
 {
     grlib_irqmp_ack((DeviceState *)irq_manager, intno);
 }
@@ -181,6 +209,12 @@ static void leon3_set_pil_in(void *opaque, int n, int level)
     }
 }
 
+static void leon3_irq_manager(CPUSPARCState *env, void *irq_manager, int intno)
+{
+    leon3_irq_ack(irq_manager, intno);
+    leon3_cache_control_int(env);
+}
+
 static void leon3_generic_hw_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 1a00816d9a8..7f3a7c00278 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -107,6 +107,17 @@ struct sun4m_hwdef {
     uint8_t nvram_machine_id;
 };
 
+struct Sun4mMachineClass {
+    /*< private >*/
+    MachineClass parent_obj;
+    /*< public >*/
+    const struct sun4m_hwdef *hwdef;
+};
+typedef struct Sun4mMachineClass Sun4mMachineClass;
+
+#define TYPE_SUN4M_MACHINE MACHINE_TYPE_NAME("sun4m-common")
+DECLARE_CLASS_CHECKERS(Sun4mMachineClass, SUN4M_MACHINE, TYPE_SUN4M_MACHINE)
+
 const char *fw_cfg_arch_key_name(uint16_t key)
 {
     static const struct {
@@ -159,38 +170,6 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr,
     }
 }
 
-void cpu_check_irqs(CPUSPARCState *env)
-{
-    CPUState *cs;
-
-    /* We should be holding the BQL before we mess with IRQs */
-    g_assert(qemu_mutex_iothread_locked());
-
-    if (env->pil_in && (env->interrupt_index == 0 ||
-                        (env->interrupt_index & ~15) == TT_EXTINT)) {
-        unsigned int i;
-
-        for (i = 15; i > 0; i--) {
-            if (env->pil_in & (1 << i)) {
-                int old_interrupt = env->interrupt_index;
-
-                env->interrupt_index = TT_EXTINT | i;
-                if (old_interrupt != env->interrupt_index) {
-                    cs = env_cpu(env);
-                    trace_sun4m_cpu_interrupt(i);
-                    cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-                }
-                break;
-            }
-        }
-    } else if (!env->pil_in && (env->interrupt_index & ~15) == TT_EXTINT) {
-        cs = env_cpu(env);
-        trace_sun4m_cpu_reset_interrupt(env->interrupt_index & 15);
-        env->interrupt_index = 0;
-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-    }
-}
-
 static void cpu_kick_irq(SPARCCPU *cpu)
 {
     CPUSPARCState *env = &cpu->env;
@@ -824,11 +803,11 @@ static void cpu_devinit(const char *cpu_type, unsigned int id,
     cpu = SPARC_CPU(object_new(cpu_type));
     env = &cpu->env;
 
-    cpu_sparc_set_id(env, id);
     qemu_register_reset(sun4m_cpu_reset, cpu);
     object_property_set_bool(OBJECT(cpu), "start-powered-off", id != 0,
                              &error_fatal);
     qdev_realize_and_unref(DEVICE(cpu), NULL, &error_fatal);
+    cpu_sparc_set_id(env, id);
     *cpu_irqs = qemu_allocate_irqs(cpu_set_irq, cpu, MAX_PILS);
     env->prom_addr = prom_addr;
 }
@@ -837,9 +816,9 @@ static void dummy_fdc_tc(void *opaque, int irq, int level)
 {
 }
 
-static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
-                          MachineState *machine)
+static void sun4m_hw_init(MachineState *machine)
 {
+    const struct sun4m_hwdef *hwdef = SUN4M_MACHINE_GET_CLASS(machine)->hwdef;
     DeviceState *slavio_intctl;
     unsigned int i;
     Nvram *nvram;
@@ -1127,9 +1106,22 @@ enum {
     ss600mp_id,
 };
 
-static const struct sun4m_hwdef sun4m_hwdefs[] = {
-    /* SS-5 */
-    {
+static void sun4m_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->init = sun4m_hw_init;
+    mc->block_default_type = IF_SCSI;
+    mc->default_boot_order = "c";
+    mc->default_display = "tcx";
+    mc->default_ram_id = "sun4m.ram";
+}
+
+static void ss5_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss5_hwdef = {
         .iommu_base   = 0x10000000,
         .iommu_pad_base = 0x10004000,
         .iommu_pad_len  = 0x0fffb000,
@@ -1154,9 +1146,19 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .machine_id = ss5_id,
         .iommu_version = 0x05000000,
         .max_mem = 0x10000000,
-    },
-    /* SS-10 */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation 5";
+    mc->is_default = true;
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
+    smc->hwdef = &ss5_hwdef;
+}
+
+static void ss10_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss10_hwdef = {
         .iommu_base   = 0xfe0000000ULL,
         .tcx_base     = 0xe20000000ULL,
         .slavio_base  = 0xff0000000ULL,
@@ -1170,18 +1172,28 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .dma_base     = 0xef0400000ULL,
         .esp_base     = 0xef0800000ULL,
         .le_base      = 0xef0c00000ULL,
-        .apc_base     = 0xefa000000ULL, // XXX should not exist
+        .apc_base     = 0xefa000000ULL, /* XXX should not exist */
         .aux1_base    = 0xff1800000ULL,
         .aux2_base    = 0xff1a01000ULL,
         .ecc_base     = 0xf00000000ULL,
-        .ecc_version  = 0x10000000, // version 0, implementation 1
+        .ecc_version  = 0x10000000, /* version 0, implementation 1 */
         .nvram_machine_id = 0x72,
         .machine_id = ss10_id,
         .iommu_version = 0x03000000,
         .max_mem = 0xf00000000ULL,
-    },
-    /* SS-600MP */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation 10";
+    mc->max_cpus = 4;
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
+    smc->hwdef = &ss10_hwdef;
+}
+
+static void ss600mp_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss600mp_hwdef = {
         .iommu_base   = 0xfe0000000ULL,
         .tcx_base     = 0xe20000000ULL,
         .slavio_base  = 0xff0000000ULL,
@@ -1193,18 +1205,28 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .dma_base     = 0xef0081000ULL,
         .esp_base     = 0xef0080000ULL,
         .le_base      = 0xef0060000ULL,
-        .apc_base     = 0xefa000000ULL, // XXX should not exist
+        .apc_base     = 0xefa000000ULL, /* XXX should not exist */
         .aux1_base    = 0xff1800000ULL,
-        .aux2_base    = 0xff1a01000ULL, // XXX should not exist
+        .aux2_base    = 0xff1a01000ULL, /* XXX should not exist */
         .ecc_base     = 0xf00000000ULL,
-        .ecc_version  = 0x00000000, // version 0, implementation 0
+        .ecc_version  = 0x00000000, /* version 0, implementation 0 */
         .nvram_machine_id = 0x71,
         .machine_id = ss600mp_id,
         .iommu_version = 0x01000000,
         .max_mem = 0xf00000000ULL,
-    },
-    /* SS-20 */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCserver 600MP";
+    mc->max_cpus = 4;
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
+    smc->hwdef = &ss600mp_hwdef;
+}
+
+static void ss20_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss20_hwdef = {
         .iommu_base   = 0xfe0000000ULL,
         .tcx_base     = 0xe20000000ULL,
         .slavio_base  = 0xff0000000ULL,
@@ -1219,7 +1241,7 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .esp_base     = 0xef0800000ULL,
         .le_base      = 0xef0c00000ULL,
         .bpp_base     = 0xef4800000ULL,
-        .apc_base     = 0xefa000000ULL, // XXX should not exist
+        .apc_base     = 0xefa000000ULL, /* XXX should not exist */
         .aux1_base    = 0xff1800000ULL,
         .aux2_base    = 0xff1a01000ULL,
         .dbri_base    = 0xee0000000ULL,
@@ -1238,14 +1260,24 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
             }
         },
         .ecc_base     = 0xf00000000ULL,
-        .ecc_version  = 0x20000000, // version 0, implementation 2
+        .ecc_version  = 0x20000000, /* version 0, implementation 2 */
         .nvram_machine_id = 0x72,
         .machine_id = ss20_id,
         .iommu_version = 0x13000000,
         .max_mem = 0xf00000000ULL,
-    },
-    /* Voyager */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation 20";
+    mc->max_cpus = 4;
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
+    smc->hwdef = &ss20_hwdef;
+}
+
+static void voyager_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef voyager_hwdef = {
         .iommu_base   = 0x10000000,
         .tcx_base     = 0x50000000,
         .slavio_base  = 0x70000000,
@@ -1259,16 +1291,25 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .dma_base     = 0x78400000,
         .esp_base     = 0x78800000,
         .le_base      = 0x78c00000,
-        .apc_base     = 0x71300000, // pmc
+        .apc_base     = 0x71300000, /* pmc */
         .aux1_base    = 0x71900000,
         .aux2_base    = 0x71910000,
         .nvram_machine_id = 0x80,
         .machine_id = vger_id,
         .iommu_version = 0x05000000,
         .max_mem = 0x10000000,
-    },
-    /* LX */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation Voyager";
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
+    smc->hwdef = &voyager_hwdef;
+}
+
+static void ss_lx_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss_lx_hwdef = {
         .iommu_base   = 0x10000000,
         .iommu_pad_base = 0x10004000,
         .iommu_pad_len  = 0x0fffb000,
@@ -1290,9 +1331,18 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .machine_id = lx_id,
         .iommu_version = 0x04000000,
         .max_mem = 0x10000000,
-    },
-    /* SS-4 */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation LX";
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I");
+    smc->hwdef = &ss_lx_hwdef;
+}
+
+static void ss4_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef ss4_hwdef = {
         .iommu_base   = 0x10000000,
         .tcx_base     = 0x50000000,
         .cs_base      = 0x6c000000,
@@ -1314,9 +1364,18 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .machine_id = ss4_id,
         .iommu_version = 0x05000000,
         .max_mem = 0x10000000,
-    },
-    /* SPARCClassic */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCstation 4";
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
+    smc->hwdef = &ss4_hwdef;
+}
+
+static void scls_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef scls_hwdef = {
         .iommu_base   = 0x10000000,
         .tcx_base     = 0x50000000,
         .slavio_base  = 0x70000000,
@@ -1337,11 +1396,20 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .machine_id = scls_id,
         .iommu_version = 0x05000000,
         .max_mem = 0x10000000,
-    },
-    /* SPARCbook */
-    {
+    };
+
+    mc->desc = "Sun4m platform, SPARCClassic";
+    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I");
+    smc->hwdef = &scls_hwdef;
+}
+
+static void sbook_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    Sun4mMachineClass *smc = SUN4M_MACHINE_CLASS(mc);
+    static const struct sun4m_hwdef sbook_hwdef = {
         .iommu_base   = 0x10000000,
-        .tcx_base     = 0x50000000, // XXX
+        .tcx_base     = 0x50000000, /* XXX */
         .slavio_base  = 0x70000000,
         .ms_kb_base   = 0x71000000,
         .serial_base  = 0x71100000,
@@ -1360,254 +1428,67 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
         .machine_id = sbook_id,
         .iommu_version = 0x05000000,
         .max_mem = 0x10000000,
-    },
-};
-
-/* SPARCstation 5 hardware initialisation */
-static void ss5_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[0], machine);
-}
-
-/* SPARCstation 10 hardware initialisation */
-static void ss10_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[1], machine);
-}
-
-/* SPARCserver 600MP hardware initialisation */
-static void ss600mp_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[2], machine);
-}
-
-/* SPARCstation 20 hardware initialisation */
-static void ss20_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[3], machine);
-}
-
-/* SPARCstation Voyager hardware initialisation */
-static void vger_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[4], machine);
-}
-
-/* SPARCstation LX hardware initialisation */
-static void ss_lx_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[5], machine);
-}
-
-/* SPARCstation 4 hardware initialisation */
-static void ss4_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[6], machine);
-}
-
-/* SPARCClassic hardware initialisation */
-static void scls_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[7], machine);
-}
-
-/* SPARCbook hardware initialisation */
-static void sbook_init(MachineState *machine)
-{
-    sun4m_hw_init(&sun4m_hwdefs[8], machine);
-}
-
-static void ss5_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation 5";
-    mc->init = ss5_init;
-    mc->block_default_type = IF_SCSI;
-    mc->is_default = true;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss5_type = {
-    .name = MACHINE_TYPE_NAME("SS-5"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss5_class_init,
-};
-
-static void ss10_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation 10";
-    mc->init = ss10_init;
-    mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 4;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss10_type = {
-    .name = MACHINE_TYPE_NAME("SS-10"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss10_class_init,
-};
-
-static void ss600mp_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCserver 600MP";
-    mc->init = ss600mp_init;
-    mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 4;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss600mp_type = {
-    .name = MACHINE_TYPE_NAME("SS-600MP"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss600mp_class_init,
-};
-
-static void ss20_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation 20";
-    mc->init = ss20_init;
-    mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 4;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss20_type = {
-    .name = MACHINE_TYPE_NAME("SS-20"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss20_class_init,
-};
-
-static void voyager_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation Voyager";
-    mc->init = vger_init;
-    mc->block_default_type = IF_SCSI;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo voyager_type = {
-    .name = MACHINE_TYPE_NAME("Voyager"),
-    .parent = TYPE_MACHINE,
-    .class_init = voyager_class_init,
-};
-
-static void ss_lx_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation LX";
-    mc->init = ss_lx_init;
-    mc->block_default_type = IF_SCSI;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss_lx_type = {
-    .name = MACHINE_TYPE_NAME("LX"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss_lx_class_init,
-};
-
-static void ss4_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCstation 4";
-    mc->init = ss4_init;
-    mc->block_default_type = IF_SCSI;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo ss4_type = {
-    .name = MACHINE_TYPE_NAME("SS-4"),
-    .parent = TYPE_MACHINE,
-    .class_init = ss4_class_init,
-};
-
-static void scls_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
-    mc->desc = "Sun4m platform, SPARCClassic";
-    mc->init = scls_init;
-    mc->block_default_type = IF_SCSI;
-    mc->default_boot_order = "c";
-    mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
-}
-
-static const TypeInfo scls_type = {
-    .name = MACHINE_TYPE_NAME("SPARCClassic"),
-    .parent = TYPE_MACHINE,
-    .class_init = scls_class_init,
-};
-
-static void sbook_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
+    };
 
     mc->desc = "Sun4m platform, SPARCbook";
-    mc->init = sbook_init;
-    mc->block_default_type = IF_SCSI;
-    mc->default_boot_order = "c";
     mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I");
-    mc->default_display = "tcx";
-    mc->default_ram_id = "sun4m.ram";
+    smc->hwdef = &sbook_hwdef;
 }
 
-static const TypeInfo sbook_type = {
-    .name = MACHINE_TYPE_NAME("SPARCbook"),
-    .parent = TYPE_MACHINE,
-    .class_init = sbook_class_init,
+static const TypeInfo sun4m_machine_types[] = {
+    {
+        .name           = MACHINE_TYPE_NAME("SS-5"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss5_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SS-10"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss10_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SS-600MP"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss600mp_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SS-20"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss20_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("Voyager"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = voyager_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("LX"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss_lx_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SS-4"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = ss4_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SPARCClassic"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = scls_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("SPARCbook"),
+        .parent         = TYPE_SUN4M_MACHINE,
+        .class_init     = sbook_class_init,
+    }, {
+        .name           = TYPE_SUN4M_MACHINE,
+        .parent         = TYPE_MACHINE,
+        .class_size     = sizeof(Sun4mMachineClass),
+        .class_init     = sun4m_machine_class_init,
+        .abstract       = true,
+    }
 };
 
+DEFINE_TYPES(sun4m_machine_types)
+
 static void sun4m_register_types(void)
 {
     type_register_static(&idreg_info);
     type_register_static(&afx_info);
     type_register_static(&prom_info);
     type_register_static(&ram_info);
-
-    type_register_static(&ss5_type);
-    type_register_static(&ss10_type);
-    type_register_static(&ss600mp_type);
-    type_register_static(&ss20_type);
-    type_register_static(&voyager_type);
-    type_register_static(&ss_lx_type);
-    type_register_static(&ss4_type);
-    type_register_static(&scls_type);
-    type_register_static(&sbook_type);
 }
 
 type_init(sun4m_register_types)
diff --git a/hw/sparc/trace-events b/hw/sparc/trace-events
index 355b07ae057..00b0212c3bd 100644
--- a/hw/sparc/trace-events
+++ b/hw/sparc/trace-events
@@ -1,8 +1,6 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # sun4m.c
-sun4m_cpu_interrupt(unsigned int level) "Set CPU IRQ %d"
-sun4m_cpu_reset_interrupt(unsigned int level) "Reset CPU IRQ %d"
 sun4m_cpu_set_irq_raise(int level) "Raise CPU IRQ %d"
 sun4m_cpu_set_irq_lower(int level) "Lower CPU IRQ %d"
 
@@ -19,3 +17,5 @@ sun4m_iommu_bad_addr(uint64_t addr) "bad addr 0x%"PRIx64
 # leon3.c
 leon3_set_irq(int intno) "Set CPU IRQ %d"
 leon3_reset_irq(int intno) "Reset CPU IRQ %d"
+int_helper_icache_freeze(void) "Instruction cache: freeze"
+int_helper_dcache_freeze(void) "Data cache: freeze"
diff --git a/hw/sparc64/Kconfig b/hw/sparc64/Kconfig
index 980a201bb73..7e557ad17b0 100644
--- a/hw/sparc64/Kconfig
+++ b/hw/sparc64/Kconfig
@@ -6,7 +6,7 @@ config SUN4U
     imply PARALLEL
     select M48T59
     select ISA_BUS
-    select FDC
+    select FDC_ISA
     select SERIAL_ISA
     select PCI_SABRE
     select IDE_CMD646
diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c
index a87d55f6bb1..f3e42d03266 100644
--- a/hw/sparc64/niagara.c
+++ b/hw/sparc64/niagara.c
@@ -31,7 +31,6 @@
 #include "hw/loader.h"
 #include "hw/sparc/sparc64.h"
 #include "hw/rtc/sun4v-rtc.h"
-#include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
 #include "qemu/error-report.h"
 #include "sysemu/qtest.h"
diff --git a/hw/sparc64/sparc64.c b/hw/sparc64/sparc64.c
index e3f9219a101..8654e955eb1 100644
--- a/hw/sparc64/sparc64.c
+++ b/hw/sparc64/sparc64.c
@@ -26,7 +26,6 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "hw/boards.h"
-#include "hw/char/serial.h"
 #include "hw/sparc/sparc64.h"
 #include "qemu/timer.h"
 #include "sysemu/reset.h"
@@ -35,68 +34,6 @@
 
 #define TICK_MAX             0x7fffffffffffffffULL
 
-void cpu_check_irqs(CPUSPARCState *env)
-{
-    CPUState *cs;
-    uint32_t pil = env->pil_in |
-                  (env->softint & ~(SOFTINT_TIMER | SOFTINT_STIMER));
-
-    /* We should be holding the BQL before we mess with IRQs */
-    g_assert(qemu_mutex_iothread_locked());
-
-    /* TT_IVEC has a higher priority (16) than TT_EXTINT (31..17) */
-    if (env->ivec_status & 0x20) {
-        return;
-    }
-    cs = env_cpu(env);
-    /* check if TM or SM in SOFTINT are set
-       setting these also causes interrupt 14 */
-    if (env->softint & (SOFTINT_TIMER | SOFTINT_STIMER)) {
-        pil |= 1 << 14;
-    }
-
-    /* The bit corresponding to psrpil is (1<< psrpil), the next bit
-       is (2 << psrpil). */
-    if (pil < (2 << env->psrpil)) {
-        if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
-            trace_sparc64_cpu_check_irqs_reset_irq(env->interrupt_index);
-            env->interrupt_index = 0;
-            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-        }
-        return;
-    }
-
-    if (cpu_interrupts_enabled(env)) {
-
-        unsigned int i;
-
-        for (i = 15; i > env->psrpil; i--) {
-            if (pil & (1 << i)) {
-                int old_interrupt = env->interrupt_index;
-                int new_interrupt = TT_EXTINT | i;
-
-                if (unlikely(env->tl > 0 && cpu_tsptr(env)->tt > new_interrupt
-                  && ((cpu_tsptr(env)->tt & 0x1f0) == TT_EXTINT))) {
-                    trace_sparc64_cpu_check_irqs_noset_irq(env->tl,
-                                                      cpu_tsptr(env)->tt,
-                                                      new_interrupt);
-                } else if (old_interrupt != new_interrupt) {
-                    env->interrupt_index = new_interrupt;
-                    trace_sparc64_cpu_check_irqs_set_irq(i, old_interrupt,
-                                                         new_interrupt);
-                    cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-                }
-                break;
-            }
-        }
-    } else if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
-        trace_sparc64_cpu_check_irqs_disabled(pil, env->pil_in, env->softint,
-                                              env->interrupt_index);
-        env->interrupt_index = 0;
-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-    }
-}
-
 static void cpu_kick_irq(SPARCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
diff --git a/hw/sparc64/trace-events b/hw/sparc64/trace-events
index a0b29987d2b..3eb4bacf796 100644
--- a/hw/sparc64/trace-events
+++ b/hw/sparc64/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # sun4u.c
 ebus_isa_irq_handler(int n, int level) "Set ISA IRQ %d level %d"
@@ -9,10 +9,6 @@ sun4u_iommu_mem_write(uint64_t addr, uint64_t val, int size) "addr: 0x%"PRIx64"
 sun4u_iommu_translate(uint64_t addr, uint64_t trans_addr, uint64_t tte) "xlate 0x%"PRIx64" => pa 0x%"PRIx64" tte: 0x%"PRIx64
 
 # sparc64.c
-sparc64_cpu_check_irqs_reset_irq(int intno) "Reset CPU IRQ (current interrupt 0x%x)"
-sparc64_cpu_check_irqs_noset_irq(uint32_t tl, uint32_t tt, int intno) "Not setting CPU IRQ: TL=%d current 0x%x >= pending 0x%x"
-sparc64_cpu_check_irqs_set_irq(unsigned int i, int old, int new) "Set CPU IRQ %d old=0x%x new=0x%x"
-sparc64_cpu_check_irqs_disabled(uint32_t pil, uint32_t pil_in, uint32_t softint, int intno) "Interrupts disabled, pil=0x%08x pil_in=0x%08x softint=0x%08x current interrupt 0x%x"
 sparc64_cpu_ivec_raise_irq(int irq) "Raise IVEC IRQ %d"
 sparc64_cpu_ivec_lower_irq(int irq) "Lower IVEC IRQ %d"
 sparc64_cpu_tick_irq_disabled(void) "tick_irq: softint disabled"
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 16addee4dc8..ff154eb84f8 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -29,7 +29,6 @@
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
-#include "exec/address-spaces.h"
 #include "qemu/units.h"
 #include "trace.h"
 
@@ -125,8 +124,17 @@
 /* SPI dummy cycle data */
 #define R_DUMMY_DATA      (0x54 / 4)
 
+/* FMC_WDT2 Control/Status Register for Alternate Boot (AST2600) */
+#define R_FMC_WDT2_CTRL   (0x64 / 4)
+#define   FMC_WDT2_CTRL_ALT_BOOT_MODE    BIT(6) /* O: 2 chips 1: 1 chip */
+#define   FMC_WDT2_CTRL_SINGLE_BOOT_MODE BIT(5)
+#define   FMC_WDT2_CTRL_BOOT_SOURCE      BIT(4) /* O: primary 1: alternate */
+#define   FMC_WDT2_CTRL_EN               BIT(0)
+
 /* DMA Control/Status Register */
 #define R_DMA_CTRL        (0x80 / 4)
+#define   DMA_CTRL_REQUEST      (1 << 31)
+#define   DMA_CTRL_GRANT        (1 << 30)
 #define   DMA_CTRL_DELAY_MASK   0xf
 #define   DMA_CTRL_DELAY_SHIFT  8
 #define   DMA_CTRL_FREQ_MASK    0xf
@@ -161,11 +169,6 @@
 #define ASPEED_SMC_R_SPI_MAX (0x20 / 4)
 #define ASPEED_SMC_R_SMC_MAX (0x20 / 4)
 
-#define ASPEED_SOC_SMC_FLASH_BASE   0x10000000
-#define ASPEED_SOC_FMC_FLASH_BASE   0x20000000
-#define ASPEED_SOC_SPI_FLASH_BASE   0x30000000
-#define ASPEED_SOC_SPI2_FLASH_BASE  0x38000000
-
 /*
  * DMA DRAM addresses should be 4 bytes aligned and the valid address
  * range is 0x40000000 - 0x5FFFFFFF (AST2400)
@@ -178,10 +181,8 @@
  *   0: 4 bytes
  *   0x7FFFFF: 32M bytes
  */
-#define DMA_DRAM_ADDR(s, val)   ((s)->sdram_base | \
-                                 ((val) & (s)->ctrl->dma_dram_mask))
-#define DMA_FLASH_ADDR(s, val)  ((s)->ctrl->flash_window_base | \
-                                ((val) & (s)->ctrl->dma_flash_mask))
+#define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
+#define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
 #define DMA_LENGTH(val)         ((val) & 0x01FFFFFC)
 
 /* Flash opcodes. */
@@ -195,311 +196,48 @@
  * controller. These can be changed when board is initialized with the
  * Segment Address Registers.
  */
-static const AspeedSegments aspeed_segments_legacy[] = {
-    { 0x10000000, 32 * 1024 * 1024 },
-};
-
-static const AspeedSegments aspeed_segments_fmc[] = {
-    { 0x20000000, 64 * 1024 * 1024 }, /* start address is readonly */
-    { 0x24000000, 32 * 1024 * 1024 },
-    { 0x26000000, 32 * 1024 * 1024 },
-    { 0x28000000, 32 * 1024 * 1024 },
-    { 0x2A000000, 32 * 1024 * 1024 }
-};
-
-static const AspeedSegments aspeed_segments_spi[] = {
-    { 0x30000000, 64 * 1024 * 1024 },
-};
-
-static const AspeedSegments aspeed_segments_ast2500_fmc[] = {
-    { 0x20000000, 128 * 1024 * 1024 }, /* start address is readonly */
-    { 0x28000000,  32 * 1024 * 1024 },
-    { 0x2A000000,  32 * 1024 * 1024 },
-};
-
-static const AspeedSegments aspeed_segments_ast2500_spi1[] = {
-    { 0x30000000, 32 * 1024 * 1024 }, /* start address is readonly */
-    { 0x32000000, 96 * 1024 * 1024 }, /* end address is readonly */
-};
-
-static const AspeedSegments aspeed_segments_ast2500_spi2[] = {
-    { 0x38000000, 32 * 1024 * 1024 }, /* start address is readonly */
-    { 0x3A000000, 96 * 1024 * 1024 }, /* end address is readonly */
-};
-static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s,
-                                          const AspeedSegments *seg);
-static void aspeed_smc_reg_to_segment(const AspeedSMCState *s, uint32_t reg,
-                                      AspeedSegments *seg);
-
-/*
- * AST2600 definitions
- */
-#define ASPEED26_SOC_FMC_FLASH_BASE   0x20000000
-#define ASPEED26_SOC_SPI_FLASH_BASE   0x30000000
-#define ASPEED26_SOC_SPI2_FLASH_BASE  0x50000000
+static const AspeedSegments aspeed_2500_spi1_segments[];
+static const AspeedSegments aspeed_2500_spi2_segments[];
 
-static const AspeedSegments aspeed_segments_ast2600_fmc[] = {
-    { 0x0, 128 * MiB }, /* start address is readonly */
-    { 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
-    { 0x0, 0 }, /* disabled */
-};
-
-static const AspeedSegments aspeed_segments_ast2600_spi1[] = {
-    { 0x0, 128 * MiB }, /* start address is readonly */
-    { 0x0, 0 }, /* disabled */
-};
-
-static const AspeedSegments aspeed_segments_ast2600_spi2[] = {
-    { 0x0, 128 * MiB }, /* start address is readonly */
-    { 0x0, 0 }, /* disabled */
-    { 0x0, 0 }, /* disabled */
-};
-
-static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s,
-                                               const AspeedSegments *seg);
-static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s,
-                                           uint32_t reg, AspeedSegments *seg);
-
-static const AspeedSMCController controllers[] = {
-    {
-        .name              = "aspeed.smc-ast2400",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 1,
-        .segments          = aspeed_segments_legacy,
-        .flash_window_base = ASPEED_SOC_SMC_FLASH_BASE,
-        .flash_window_size = 0x6000000,
-        .has_dma           = false,
-        .nregs             = ASPEED_SMC_R_SMC_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.fmc-ast2400",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 5,
-        .segments          = aspeed_segments_fmc,
-        .flash_window_base = ASPEED_SOC_FMC_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = true,
-        .dma_flash_mask    = 0x0FFFFFFC,
-        .dma_dram_mask     = 0x1FFFFFFC,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.spi1-ast2400",
-        .r_conf            = R_SPI_CONF,
-        .r_ce_ctrl         = 0xff,
-        .r_ctrl0           = R_SPI_CTRL0,
-        .r_timings         = R_SPI_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = SPI_CONF_ENABLE_W0,
-        .max_peripherals   = 1,
-        .segments          = aspeed_segments_spi,
-        .flash_window_base = ASPEED_SOC_SPI_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = false,
-        .nregs             = ASPEED_SMC_R_SPI_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.fmc-ast2500",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 3,
-        .segments          = aspeed_segments_ast2500_fmc,
-        .flash_window_base = ASPEED_SOC_FMC_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = true,
-        .dma_flash_mask    = 0x0FFFFFFC,
-        .dma_dram_mask     = 0x3FFFFFFC,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.spi1-ast2500",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 2,
-        .segments          = aspeed_segments_ast2500_spi1,
-        .flash_window_base = ASPEED_SOC_SPI_FLASH_BASE,
-        .flash_window_size = 0x8000000,
-        .has_dma           = false,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.spi2-ast2500",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 2,
-        .segments          = aspeed_segments_ast2500_spi2,
-        .flash_window_base = ASPEED_SOC_SPI2_FLASH_BASE,
-        .flash_window_size = 0x8000000,
-        .has_dma           = false,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.fmc-ast2600",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 1,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 3,
-        .segments          = aspeed_segments_ast2600_fmc,
-        .flash_window_base = ASPEED26_SOC_FMC_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = true,
-        .dma_flash_mask    = 0x0FFFFFFC,
-        .dma_dram_mask     = 0x3FFFFFFC,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_2600_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_2600_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.spi1-ast2600",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 2,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 2,
-        .segments          = aspeed_segments_ast2600_spi1,
-        .flash_window_base = ASPEED26_SOC_SPI_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = true,
-        .dma_flash_mask    = 0x0FFFFFFC,
-        .dma_dram_mask     = 0x3FFFFFFC,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_2600_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_2600_smc_reg_to_segment,
-    }, {
-        .name              = "aspeed.spi2-ast2600",
-        .r_conf            = R_CONF,
-        .r_ce_ctrl         = R_CE_CTRL,
-        .r_ctrl0           = R_CTRL0,
-        .r_timings         = R_TIMINGS,
-        .nregs_timings     = 3,
-        .conf_enable_w0    = CONF_ENABLE_W0,
-        .max_peripherals   = 3,
-        .segments          = aspeed_segments_ast2600_spi2,
-        .flash_window_base = ASPEED26_SOC_SPI2_FLASH_BASE,
-        .flash_window_size = 0x10000000,
-        .has_dma           = true,
-        .dma_flash_mask    = 0x0FFFFFFC,
-        .dma_dram_mask     = 0x3FFFFFFC,
-        .nregs             = ASPEED_SMC_R_MAX,
-        .segment_to_reg    = aspeed_2600_smc_segment_to_reg,
-        .reg_to_segment    = aspeed_2600_smc_reg_to_segment,
-    },
-};
-
-/*
- * The Segment Registers of the AST2400 and AST2500 have a 8MB
- * unit. The address range of a flash SPI peripheral is encoded with
- * absolute addresses which should be part of the overall controller
- * window.
- */
-static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s,
-                                          const AspeedSegments *seg)
-{
-    uint32_t reg = 0;
-    reg |= ((seg->addr >> 23) & SEG_START_MASK) << SEG_START_SHIFT;
-    reg |= (((seg->addr + seg->size) >> 23) & SEG_END_MASK) << SEG_END_SHIFT;
-    return reg;
-}
+#define ASPEED_SMC_FEATURE_DMA       0x1
+#define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
+#define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
 
-static void aspeed_smc_reg_to_segment(const AspeedSMCState *s,
-                                      uint32_t reg, AspeedSegments *seg)
+static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc)
 {
-    seg->addr = ((reg >> SEG_START_SHIFT) & SEG_START_MASK) << 23;
-    seg->size = (((reg >> SEG_END_SHIFT) & SEG_END_MASK) << 23) - seg->addr;
+    return !!(asc->features & ASPEED_SMC_FEATURE_DMA);
 }
 
-/*
- * The Segment Registers of the AST2600 have a 1MB unit. The address
- * range of a flash SPI peripheral is encoded with offsets in the overall
- * controller window. The previous SoC AST2400 and AST2500 used
- * absolute addresses. Only bits [27:20] are relevant and the end
- * address is an upper bound limit.
- */
-#define AST2600_SEG_ADDR_MASK 0x0ff00000
-
-static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s,
-                                               const AspeedSegments *seg)
+static inline bool aspeed_smc_has_wdt_control(const AspeedSMCClass *asc)
 {
-    uint32_t reg = 0;
-
-    /* Disabled segments have a nil register */
-    if (!seg->size) {
-        return 0;
-    }
-
-    reg |= (seg->addr & AST2600_SEG_ADDR_MASK) >> 16; /* start offset */
-    reg |= (seg->addr + seg->size - 1) & AST2600_SEG_ADDR_MASK; /* end offset */
-    return reg;
+    return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
 }
 
-static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s,
-                                           uint32_t reg, AspeedSegments *seg)
-{
-    uint32_t start_offset = (reg << 16) & AST2600_SEG_ADDR_MASK;
-    uint32_t end_offset = reg & AST2600_SEG_ADDR_MASK;
-
-    if (reg) {
-        seg->addr = s->ctrl->flash_window_base + start_offset;
-        seg->size = end_offset + MiB - start_offset;
-    } else {
-        seg->addr = s->ctrl->flash_window_base;
-        seg->size = 0;
-    }
-}
+#define aspeed_smc_error(fmt, ...)                                      \
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ## __VA_ARGS__)
 
 static bool aspeed_smc_flash_overlap(const AspeedSMCState *s,
                                      const AspeedSegments *new,
                                      int cs)
 {
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     AspeedSegments seg;
     int i;
 
-    for (i = 0; i < s->ctrl->max_peripherals; i++) {
+    for (i = 0; i < asc->max_peripherals; i++) {
         if (i == cs) {
             continue;
         }
 
-        s->ctrl->reg_to_segment(s, s->regs[R_SEG_ADDR0 + i], &seg);
+        asc->reg_to_segment(s, s->regs[R_SEG_ADDR0 + i], &seg);
 
         if (new->addr + new->size > seg.addr &&
             new->addr < seg.addr + seg.size) {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment CS%d [ 0x%"
-                          HWADDR_PRIx" - 0x%"HWADDR_PRIx" ] overlaps with "
-                          "CS%d [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
-                          s->ctrl->name, cs, new->addr, new->addr + new->size,
-                          i, seg.addr, seg.addr + seg.size);
+            aspeed_smc_error("new segment CS%d [ 0x%"
+                             HWADDR_PRIx" - 0x%"HWADDR_PRIx" ] overlaps with "
+                             "CS%d [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]",
+                             cs, new->addr, new->addr + new->size,
+                             i, seg.addr, seg.addr + seg.size);
             return true;
         }
     }
@@ -509,14 +247,15 @@ static bool aspeed_smc_flash_overlap(const AspeedSMCState *s,
 static void aspeed_smc_flash_set_segment_region(AspeedSMCState *s, int cs,
                                                 uint64_t regval)
 {
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     AspeedSMCFlash *fl = &s->flashes[cs];
     AspeedSegments seg;
 
-    s->ctrl->reg_to_segment(s, regval, &seg);
+    asc->reg_to_segment(s, regval, &seg);
 
     memory_region_transaction_begin();
     memory_region_set_size(&fl->mmio, seg.size);
-    memory_region_set_address(&fl->mmio, seg.addr - s->ctrl->flash_window_base);
+    memory_region_set_address(&fl->mmio, seg.addr - asc->flash_window_base);
     memory_region_set_enabled(&fl->mmio, !!seg.size);
     memory_region_transaction_commit();
 
@@ -526,53 +265,52 @@ static void aspeed_smc_flash_set_segment_region(AspeedSMCState *s, int cs,
 static void aspeed_smc_flash_set_segment(AspeedSMCState *s, int cs,
                                          uint64_t new)
 {
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     AspeedSegments seg;
 
-    s->ctrl->reg_to_segment(s, new, &seg);
+    asc->reg_to_segment(s, new, &seg);
 
     trace_aspeed_smc_flash_set_segment(cs, new, seg.addr, seg.addr + seg.size);
 
     /* The start address of CS0 is read-only */
-    if (cs == 0 && seg.addr != s->ctrl->flash_window_base) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Tried to change CS0 start address to 0x%"
-                      HWADDR_PRIx "\n", s->ctrl->name, seg.addr);
-        seg.addr = s->ctrl->flash_window_base;
-        new = s->ctrl->segment_to_reg(s, &seg);
+    if (cs == 0 && seg.addr != asc->flash_window_base) {
+        aspeed_smc_error("Tried to change CS0 start address to 0x%"
+                         HWADDR_PRIx, seg.addr);
+        seg.addr = asc->flash_window_base;
+        new = asc->segment_to_reg(s, &seg);
     }
 
     /*
      * The end address of the AST2500 spi controllers is also
      * read-only.
      */
-    if ((s->ctrl->segments == aspeed_segments_ast2500_spi1 ||
-         s->ctrl->segments == aspeed_segments_ast2500_spi2) &&
-        cs == s->ctrl->max_peripherals &&
-        seg.addr + seg.size != s->ctrl->segments[cs].addr +
-        s->ctrl->segments[cs].size) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Tried to change CS%d end address to 0x%"
-                      HWADDR_PRIx "\n", s->ctrl->name, cs, seg.addr + seg.size);
-        seg.size = s->ctrl->segments[cs].addr + s->ctrl->segments[cs].size -
+    if ((asc->segments == aspeed_2500_spi1_segments ||
+         asc->segments == aspeed_2500_spi2_segments) &&
+        cs == asc->max_peripherals &&
+        seg.addr + seg.size != asc->segments[cs].addr +
+        asc->segments[cs].size) {
+        aspeed_smc_error("Tried to change CS%d end address to 0x%"
+                         HWADDR_PRIx, cs, seg.addr + seg.size);
+        seg.size = asc->segments[cs].addr + asc->segments[cs].size -
             seg.addr;
-        new = s->ctrl->segment_to_reg(s, &seg);
+        new = asc->segment_to_reg(s, &seg);
     }
 
     /* Keep the segment in the overall flash window */
     if (seg.size &&
-        (seg.addr + seg.size <= s->ctrl->flash_window_base ||
-         seg.addr > s->ctrl->flash_window_base + s->ctrl->flash_window_size)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is invalid : "
-                      "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
-                      s->ctrl->name, cs, seg.addr, seg.addr + seg.size);
+        (seg.addr + seg.size <= asc->flash_window_base ||
+         seg.addr > asc->flash_window_base + asc->flash_window_size)) {
+        aspeed_smc_error("new segment for CS%d is invalid : "
+                         "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]",
+                         cs, seg.addr, seg.addr + seg.size);
         return;
     }
 
     /* Check start address vs. alignment */
     if (seg.size && !QEMU_IS_ALIGNED(seg.addr, seg.size)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is not "
-                      "aligned : [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
-                      s->ctrl->name, cs, seg.addr, seg.addr + seg.size);
+        aspeed_smc_error("new segment for CS%d is not "
+                         "aligned : [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]",
+                         cs, seg.addr, seg.addr + seg.size);
     }
 
     /* And segments should not overlap (in the specs) */
@@ -585,16 +323,15 @@ static void aspeed_smc_flash_set_segment(AspeedSMCState *s, int cs,
 static uint64_t aspeed_smc_flash_default_read(void *opaque, hwaddr addr,
                                               unsigned size)
 {
-    qemu_log_mask(LOG_GUEST_ERROR, "%s: To 0x%" HWADDR_PRIx " of size %u"
-                  PRIx64 "\n", __func__, addr, size);
+    aspeed_smc_error("To 0x%" HWADDR_PRIx " of size %u" PRIx64, addr, size);
     return 0;
 }
 
 static void aspeed_smc_flash_default_write(void *opaque, hwaddr addr,
                                            uint64_t data, unsigned size)
 {
-    qemu_log_mask(LOG_GUEST_ERROR, "%s: To 0x%" HWADDR_PRIx " of size %u: 0x%"
-                  PRIx64 "\n", __func__, addr, size, data);
+    aspeed_smc_error("To 0x%" HWADDR_PRIx " of size %u: 0x%" PRIx64,
+                     addr, size, data);
 }
 
 static const MemoryRegionOps aspeed_smc_flash_default_ops = {
@@ -611,20 +348,20 @@ static inline int aspeed_smc_flash_mode(const AspeedSMCFlash *fl)
 {
     const AspeedSMCState *s = fl->controller;
 
-    return s->regs[s->r_ctrl0 + fl->id] & CTRL_CMD_MODE_MASK;
+    return s->regs[s->r_ctrl0 + fl->cs] & CTRL_CMD_MODE_MASK;
 }
 
 static inline bool aspeed_smc_is_writable(const AspeedSMCFlash *fl)
 {
     const AspeedSMCState *s = fl->controller;
 
-    return s->regs[s->r_conf] & (1 << (s->conf_enable_w0 + fl->id));
+    return s->regs[s->r_conf] & (1 << (s->conf_enable_w0 + fl->cs));
 }
 
 static inline int aspeed_smc_flash_cmd(const AspeedSMCFlash *fl)
 {
     const AspeedSMCState *s = fl->controller;
-    int cmd = (s->regs[s->r_ctrl0 + fl->id] >> CTRL_CMD_SHIFT) & CTRL_CMD_MASK;
+    int cmd = (s->regs[s->r_ctrl0 + fl->cs] >> CTRL_CMD_SHIFT) & CTRL_CMD_MASK;
 
     /*
      * In read mode, the default SPI command is READ (0x3). In other
@@ -637,21 +374,22 @@ static inline int aspeed_smc_flash_cmd(const AspeedSMCFlash *fl)
     }
 
     if (!cmd) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: no command defined for mode %d\n",
-                      __func__, aspeed_smc_flash_mode(fl));
+        aspeed_smc_error("no command defined for mode %d",
+                         aspeed_smc_flash_mode(fl));
     }
 
     return cmd;
 }
 
-static inline int aspeed_smc_flash_is_4byte(const AspeedSMCFlash *fl)
+static inline int aspeed_smc_flash_addr_width(const AspeedSMCFlash *fl)
 {
     const AspeedSMCState *s = fl->controller;
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
 
-    if (s->ctrl->segments == aspeed_segments_spi) {
-        return s->regs[s->r_ctrl0] & CTRL_AST2400_SPI_4BYTE;
+    if (asc->addr_width) {
+        return asc->addr_width(s);
     } else {
-        return s->regs[s->r_ce_ctrl] & (1 << (CTRL_EXTENDED0 + fl->id));
+        return s->regs[s->r_ce_ctrl] & (1 << (CTRL_EXTENDED0 + fl->cs)) ? 4 : 3;
     }
 }
 
@@ -659,9 +397,9 @@ static void aspeed_smc_flash_do_select(AspeedSMCFlash *fl, bool unselect)
 {
     AspeedSMCState *s = fl->controller;
 
-    trace_aspeed_smc_flash_select(fl->id, unselect ? "un" : "");
+    trace_aspeed_smc_flash_select(fl->cs, unselect ? "un" : "");
 
-    qemu_set_irq(s->cs_lines[fl->id], unselect);
+    qemu_set_irq(s->cs_lines[fl->cs], unselect);
 }
 
 static void aspeed_smc_flash_select(AspeedSMCFlash *fl)
@@ -678,15 +416,14 @@ static uint32_t aspeed_smc_check_segment_addr(const AspeedSMCFlash *fl,
                                               uint32_t addr)
 {
     const AspeedSMCState *s = fl->controller;
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     AspeedSegments seg;
 
-    s->ctrl->reg_to_segment(s, s->regs[R_SEG_ADDR0 + fl->id], &seg);
+    asc->reg_to_segment(s, s->regs[R_SEG_ADDR0 + fl->cs], &seg);
     if ((addr % seg.size) != addr) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: invalid address 0x%08x for CS%d segment : "
-                      "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
-                      s->ctrl->name, addr, fl->id, seg.addr,
-                      seg.addr + seg.size);
+        aspeed_smc_error("invalid address 0x%08x for CS%d segment : "
+                         "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]",
+                         addr, fl->cs, seg.addr, seg.addr + seg.size);
         addr %= seg.size;
     }
 
@@ -696,7 +433,7 @@ static uint32_t aspeed_smc_check_segment_addr(const AspeedSMCFlash *fl,
 static int aspeed_smc_flash_dummies(const AspeedSMCFlash *fl)
 {
     const AspeedSMCState *s = fl->controller;
-    uint32_t r_ctrl0 = s->regs[s->r_ctrl0 + fl->id];
+    uint32_t r_ctrl0 = s->regs[s->r_ctrl0 + fl->cs];
     uint32_t dummy_high = (r_ctrl0 >> CTRL_DUMMY_HIGH_SHIFT) & 0x1;
     uint32_t dummy_low = (r_ctrl0 >> CTRL_DUMMY_LOW_SHIFT) & 0x3;
     uint32_t dummies = ((dummy_high << 2) | dummy_low) * 8;
@@ -712,7 +449,7 @@ static void aspeed_smc_flash_setup(AspeedSMCFlash *fl, uint32_t addr)
 {
     const AspeedSMCState *s = fl->controller;
     uint8_t cmd = aspeed_smc_flash_cmd(fl);
-    int i = aspeed_smc_flash_is_4byte(fl) ? 4 : 3;
+    int i = aspeed_smc_flash_addr_width(fl);
 
     /* Flash access can not exceed CS segment */
     addr = aspeed_smc_check_segment_addr(fl, addr);
@@ -762,11 +499,10 @@ static uint64_t aspeed_smc_flash_read(void *opaque, hwaddr addr, unsigned size)
         aspeed_smc_flash_unselect(fl);
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid flash mode %d\n",
-                      __func__, aspeed_smc_flash_mode(fl));
+        aspeed_smc_error("invalid flash mode %d", aspeed_smc_flash_mode(fl));
     }
 
-    trace_aspeed_smc_flash_read(fl->id, addr, size, ret,
+    trace_aspeed_smc_flash_read(fl->cs, addr, size, ret,
                                 aspeed_smc_flash_mode(fl));
     return ret;
 }
@@ -821,9 +557,9 @@ static bool aspeed_smc_do_snoop(AspeedSMCFlash *fl,  uint64_t data,
                                 unsigned size)
 {
     AspeedSMCState *s = fl->controller;
-    uint8_t addr_width = aspeed_smc_flash_is_4byte(fl) ? 4 : 3;
+    uint8_t addr_width = aspeed_smc_flash_addr_width(fl);
 
-    trace_aspeed_smc_do_snoop(fl->id, s->snoop_index, s->snoop_dummies,
+    trace_aspeed_smc_do_snoop(fl->cs, s->snoop_index, s->snoop_dummies,
                               (uint8_t) data & 0xff);
 
     if (s->snoop_index == SNOOP_OFF) {
@@ -876,12 +612,11 @@ static void aspeed_smc_flash_write(void *opaque, hwaddr addr, uint64_t data,
     AspeedSMCState *s = fl->controller;
     int i;
 
-    trace_aspeed_smc_flash_write(fl->id, addr, size, data,
+    trace_aspeed_smc_flash_write(fl->cs, addr, size, data,
                                  aspeed_smc_flash_mode(fl));
 
     if (!aspeed_smc_is_writable(fl)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: flash is not writable at 0x%"
-                      HWADDR_PRIx "\n", __func__, addr);
+        aspeed_smc_error("flash is not writable at 0x%" HWADDR_PRIx, addr);
         return;
     }
 
@@ -906,8 +641,7 @@ static void aspeed_smc_flash_write(void *opaque, hwaddr addr, uint64_t data,
         aspeed_smc_flash_unselect(fl);
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid flash mode %d\n",
-                      __func__, aspeed_smc_flash_mode(fl));
+        aspeed_smc_error("invalid flash mode %d", aspeed_smc_flash_mode(fl));
     }
 }
 
@@ -930,12 +664,12 @@ static void aspeed_smc_flash_update_ctrl(AspeedSMCFlash *fl, uint32_t value)
     unselect = (value & CTRL_CMD_MODE_MASK) != CTRL_USERMODE;
 
     /* A change of CTRL_CE_STOP_ACTIVE from 0 to 1, unselects the CS */
-    if (!(s->regs[s->r_ctrl0 + fl->id] & CTRL_CE_STOP_ACTIVE) &&
+    if (!(s->regs[s->r_ctrl0 + fl->cs] & CTRL_CE_STOP_ACTIVE) &&
         value & CTRL_CE_STOP_ACTIVE) {
         unselect = true;
     }
 
-    s->regs[s->r_ctrl0 + fl->id] = value;
+    s->regs[s->r_ctrl0 + fl->cs] = value;
 
     s->snoop_index = unselect ? SNOOP_OFF : SNOOP_START;
 
@@ -945,9 +679,14 @@ static void aspeed_smc_flash_update_ctrl(AspeedSMCFlash *fl, uint32_t value)
 static void aspeed_smc_reset(DeviceState *d)
 {
     AspeedSMCState *s = ASPEED_SMC(d);
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     int i;
 
-    memset(s->regs, 0, sizeof s->regs);
+    if (asc->resets) {
+        memcpy(s->regs, asc->resets, sizeof s->regs);
+    } else {
+        memset(s->regs, 0, sizeof s->regs);
+    }
 
     /* Unselect all peripherals */
     for (i = 0; i < s->num_cs; ++i) {
@@ -956,30 +695,9 @@ static void aspeed_smc_reset(DeviceState *d)
     }
 
     /* setup the default segment register values and regions for all */
-    for (i = 0; i < s->ctrl->max_peripherals; ++i) {
+    for (i = 0; i < asc->max_peripherals; ++i) {
         aspeed_smc_flash_set_segment_region(s, i,
-                    s->ctrl->segment_to_reg(s, &s->ctrl->segments[i]));
-    }
-
-    /* HW strapping flash type for the AST2600 controllers  */
-    if (s->ctrl->segments == aspeed_segments_ast2600_fmc) {
-        /* flash type is fixed to SPI for all */
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0);
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1);
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE2);
-    }
-
-    /* HW strapping flash type for FMC controllers  */
-    if (s->ctrl->segments == aspeed_segments_ast2500_fmc) {
-        /* flash type is fixed to SPI for CE0 and CE1 */
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0);
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1);
-    }
-
-    /* HW strapping for AST2400 FMC controllers (SCU70). Let's use the
-     * configuration of the palmetto-bmc machine */
-    if (s->ctrl->segments == aspeed_segments_fmc) {
-        s->regs[s->r_conf] |= (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0);
+                    asc->segment_to_reg(s, &asc->segments[i]));
     }
 
     s->snoop_index = SNOOP_OFF;
@@ -989,26 +707,28 @@ static void aspeed_smc_reset(DeviceState *d)
 static uint64_t aspeed_smc_read(void *opaque, hwaddr addr, unsigned int size)
 {
     AspeedSMCState *s = ASPEED_SMC(opaque);
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(opaque);
 
     addr >>= 2;
 
     if (addr == s->r_conf ||
         (addr >= s->r_timings &&
-         addr < s->r_timings + s->ctrl->nregs_timings) ||
+         addr < s->r_timings + asc->nregs_timings) ||
         addr == s->r_ce_ctrl ||
         addr == R_CE_CMD_CTRL ||
         addr == R_INTR_CTRL ||
         addr == R_DUMMY_DATA ||
-        (s->ctrl->has_dma && addr == R_DMA_CTRL) ||
-        (s->ctrl->has_dma && addr == R_DMA_FLASH_ADDR) ||
-        (s->ctrl->has_dma && addr == R_DMA_DRAM_ADDR) ||
-        (s->ctrl->has_dma && addr == R_DMA_LEN) ||
-        (s->ctrl->has_dma && addr == R_DMA_CHECKSUM) ||
+        (aspeed_smc_has_wdt_control(asc) && addr == R_FMC_WDT2_CTRL) ||
+        (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) ||
+        (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR) ||
+        (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR) ||
+        (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) ||
+        (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM) ||
         (addr >= R_SEG_ADDR0 &&
-         addr < R_SEG_ADDR0 + s->ctrl->max_peripherals) ||
-        (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->ctrl->max_peripherals)) {
+         addr < R_SEG_ADDR0 + asc->max_peripherals) ||
+        (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + asc->max_peripherals)) {
 
-        trace_aspeed_smc_read(addr, size, s->regs[addr]);
+        trace_aspeed_smc_read(addr << 2, size, s->regs[addr]);
 
         return s->regs[addr];
     } else {
@@ -1032,7 +752,7 @@ static uint8_t aspeed_smc_hclk_divisor(uint8_t hclk_mask)
         }
     }
 
-    qemu_log_mask(LOG_GUEST_ERROR, "invalid HCLK mask %x", hclk_mask);
+    aspeed_smc_error("invalid HCLK mask %x", hclk_mask);
     return 0;
 }
 
@@ -1112,8 +832,7 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
     uint32_t data;
 
     if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: invalid direction for DMA checksum\n",  __func__);
+        aspeed_smc_error("invalid direction for DMA checksum");
         return;
     }
 
@@ -1125,8 +844,8 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
         data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
                                     MEMTXATTRS_UNSPECIFIED, &result);
         if (result != MEMTX_OK) {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash read failed @%08x\n",
-                          __func__, s->regs[R_DMA_FLASH_ADDR]);
+            aspeed_smc_error("Flash read failed @%08x",
+                             s->regs[R_DMA_FLASH_ADDR]);
             return;
         }
         trace_aspeed_smc_dma_checksum(s->regs[R_DMA_FLASH_ADDR], data);
@@ -1161,32 +880,32 @@ static void aspeed_smc_dma_rw(AspeedSMCState *s)
             data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
                                         MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM read failed @%08x\n",
-                              __func__, s->regs[R_DMA_DRAM_ADDR]);
+                aspeed_smc_error("DRAM read failed @%08x",
+                                 s->regs[R_DMA_DRAM_ADDR]);
                 return;
             }
 
             address_space_stl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
                                  data, MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash write failed @%08x\n",
-                              __func__, s->regs[R_DMA_FLASH_ADDR]);
+                aspeed_smc_error("Flash write failed @%08x",
+                                 s->regs[R_DMA_FLASH_ADDR]);
                 return;
             }
         } else {
             data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
                                         MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s: Flash read failed @%08x\n",
-                              __func__, s->regs[R_DMA_FLASH_ADDR]);
+                aspeed_smc_error("Flash read failed @%08x",
+                                 s->regs[R_DMA_FLASH_ADDR]);
                 return;
             }
 
             address_space_stl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
                                  data, MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed @%08x\n",
-                              __func__, s->regs[R_DMA_DRAM_ADDR]);
+                aspeed_smc_error("DRAM write failed @%08x",
+                                 s->regs[R_DMA_DRAM_ADDR]);
                 return;
             }
         }
@@ -1236,7 +955,7 @@ static void aspeed_smc_dma_done(AspeedSMCState *s)
     }
 }
 
-static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl)
+static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint32_t dma_ctrl)
 {
     if (!(dma_ctrl & DMA_CTRL_ENABLE)) {
         s->regs[R_DMA_CTRL] = dma_ctrl;
@@ -1246,7 +965,7 @@ static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl)
     }
 
     if (aspeed_smc_dma_in_progress(s)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: DMA in progress\n",  __func__);
+        aspeed_smc_error("DMA in progress !");
         return;
     }
 
@@ -1261,26 +980,69 @@ static void aspeed_smc_dma_ctrl(AspeedSMCState *s, uint64_t dma_ctrl)
     aspeed_smc_dma_done(s);
 }
 
+static inline bool aspeed_smc_dma_granted(AspeedSMCState *s)
+{
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+
+    if (!(asc->features & ASPEED_SMC_FEATURE_DMA_GRANT)) {
+        return true;
+    }
+
+    if (!(s->regs[R_DMA_CTRL] & DMA_CTRL_GRANT)) {
+        aspeed_smc_error("DMA not granted");
+        return false;
+    }
+
+    return true;
+}
+
+static void aspeed_2600_smc_dma_ctrl(AspeedSMCState *s, uint32_t dma_ctrl)
+{
+    /* Preserve DMA bits  */
+    dma_ctrl |= s->regs[R_DMA_CTRL] & (DMA_CTRL_REQUEST | DMA_CTRL_GRANT);
+
+    if (dma_ctrl == 0xAEED0000) {
+        /* automatically grant request */
+        s->regs[R_DMA_CTRL] |= (DMA_CTRL_REQUEST | DMA_CTRL_GRANT);
+        return;
+    }
+
+    /* clear request */
+    if (dma_ctrl == 0xDEEA0000) {
+        s->regs[R_DMA_CTRL] &= ~(DMA_CTRL_REQUEST | DMA_CTRL_GRANT);
+        return;
+    }
+
+    if (!aspeed_smc_dma_granted(s)) {
+        aspeed_smc_error("DMA not granted");
+        return;
+    }
+
+    aspeed_smc_dma_ctrl(s, dma_ctrl);
+    s->regs[R_DMA_CTRL] &= ~(DMA_CTRL_REQUEST | DMA_CTRL_GRANT);
+}
+
 static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data,
                              unsigned int size)
 {
     AspeedSMCState *s = ASPEED_SMC(opaque);
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     uint32_t value = data;
 
-    addr >>= 2;
-
     trace_aspeed_smc_write(addr, size, data);
 
+    addr >>= 2;
+
     if (addr == s->r_conf ||
         (addr >= s->r_timings &&
-         addr < s->r_timings + s->ctrl->nregs_timings) ||
+         addr < s->r_timings + asc->nregs_timings) ||
         addr == s->r_ce_ctrl) {
         s->regs[addr] = value;
     } else if (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->num_cs) {
         int cs = addr - s->r_ctrl0;
         aspeed_smc_flash_update_ctrl(&s->flashes[cs], value);
     } else if (addr >= R_SEG_ADDR0 &&
-               addr < R_SEG_ADDR0 + s->ctrl->max_peripherals) {
+               addr < R_SEG_ADDR0 + asc->max_peripherals) {
         int cs = addr - R_SEG_ADDR0;
 
         if (value != s->regs[R_SEG_ADDR0 + cs]) {
@@ -1290,15 +1052,20 @@ static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data,
         s->regs[addr] = value & 0xff;
     } else if (addr == R_DUMMY_DATA) {
         s->regs[addr] = value & 0xff;
+    } else if (aspeed_smc_has_wdt_control(asc) && addr == R_FMC_WDT2_CTRL) {
+        s->regs[addr] = value & FMC_WDT2_CTRL_EN;
     } else if (addr == R_INTR_CTRL) {
         s->regs[addr] = value;
-    } else if (s->ctrl->has_dma && addr == R_DMA_CTRL) {
-        aspeed_smc_dma_ctrl(s, value);
-    } else if (s->ctrl->has_dma && addr == R_DMA_DRAM_ADDR) {
-        s->regs[addr] = DMA_DRAM_ADDR(s, value);
-    } else if (s->ctrl->has_dma && addr == R_DMA_FLASH_ADDR) {
-        s->regs[addr] = DMA_FLASH_ADDR(s, value);
-    } else if (s->ctrl->has_dma && addr == R_DMA_LEN) {
+    } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) {
+        asc->dma_ctrl(s, value);
+    } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR &&
+               aspeed_smc_dma_granted(s)) {
+        s->regs[addr] = DMA_DRAM_ADDR(asc, value);
+    } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR &&
+               aspeed_smc_dma_granted(s)) {
+        s->regs[addr] = DMA_FLASH_ADDR(asc, value);
+    } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN &&
+               aspeed_smc_dma_granted(s)) {
         s->regs[addr] = DMA_LENGTH(value);
     } else {
         qemu_log_mask(LOG_UNIMP, "%s: not implemented: 0x%" HWADDR_PRIx "\n",
@@ -1313,50 +1080,53 @@ static const MemoryRegionOps aspeed_smc_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void aspeed_smc_instance_init(Object *obj)
+{
+    AspeedSMCState *s = ASPEED_SMC(obj);
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+    int i;
+
+    for (i = 0; i < asc->max_peripherals; i++) {
+        object_initialize_child(obj, "flash[*]", &s->flashes[i],
+                                TYPE_ASPEED_SMC_FLASH);
+    }
+}
+
 /*
  * Initialize the custom address spaces for DMAs
  */
 static void aspeed_smc_dma_setup(AspeedSMCState *s, Error **errp)
 {
-    char *name;
-
     if (!s->dram_mr) {
         error_setg(errp, TYPE_ASPEED_SMC ": 'dram' link not set");
         return;
     }
 
-    name = g_strdup_printf("%s-dma-flash", s->ctrl->name);
-    address_space_init(&s->flash_as, &s->mmio_flash, name);
-    g_free(name);
-
-    name = g_strdup_printf("%s-dma-dram", s->ctrl->name);
-    address_space_init(&s->dram_as, s->dram_mr, name);
-    g_free(name);
+    address_space_init(&s->flash_as, &s->mmio_flash,
+                       TYPE_ASPEED_SMC ".dma-flash");
+    address_space_init(&s->dram_as, s->dram_mr,
+                       TYPE_ASPEED_SMC ".dma-dram");
 }
 
 static void aspeed_smc_realize(DeviceState *dev, Error **errp)
 {
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     AspeedSMCState *s = ASPEED_SMC(dev);
-    AspeedSMCClass *mc = ASPEED_SMC_GET_CLASS(s);
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
     int i;
-    char name[32];
     hwaddr offset = 0;
 
-    s->ctrl = mc->ctrl;
-
     /* keep a copy under AspeedSMCState to speed up accesses */
-    s->r_conf = s->ctrl->r_conf;
-    s->r_ce_ctrl = s->ctrl->r_ce_ctrl;
-    s->r_ctrl0 = s->ctrl->r_ctrl0;
-    s->r_timings = s->ctrl->r_timings;
-    s->conf_enable_w0 = s->ctrl->conf_enable_w0;
+    s->r_conf = asc->r_conf;
+    s->r_ce_ctrl = asc->r_ce_ctrl;
+    s->r_ctrl0 = asc->r_ctrl0;
+    s->r_timings = asc->r_timings;
+    s->conf_enable_w0 = asc->conf_enable_w0;
 
     /* Enforce some real HW limits */
-    if (s->num_cs > s->ctrl->max_peripherals) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: num_cs cannot exceed: %d\n",
-                      __func__, s->ctrl->max_peripherals);
-        s->num_cs = s->ctrl->max_peripherals;
+    if (s->num_cs > asc->max_peripherals) {
+        aspeed_smc_error("num_cs cannot exceed: %d", asc->max_peripherals);
+        s->num_cs = asc->max_peripherals;
     }
 
     /* DMA irq. Keep it first for the initialization in the SoC */
@@ -1373,7 +1143,7 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp)
 
     /* The memory region for the controller registers */
     memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_ops, s,
-                          s->ctrl->name, s->ctrl->nregs * 4);
+                          TYPE_ASPEED_SMC, asc->nregs * 4);
     sysbus_init_mmio(sbd, &s->mmio);
 
     /*
@@ -1381,14 +1151,17 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp)
      * window in which the flash modules are mapped. The size and
      * address depends on the SoC model and controller type.
      */
-    snprintf(name, sizeof(name), "%s.flash", s->ctrl->name);
+    memory_region_init(&s->mmio_flash_container, OBJECT(s),
+                       TYPE_ASPEED_SMC ".container",
+                       asc->flash_window_size);
+    sysbus_init_mmio(sbd, &s->mmio_flash_container);
 
     memory_region_init_io(&s->mmio_flash, OBJECT(s),
-                          &aspeed_smc_flash_default_ops, s, name,
-                          s->ctrl->flash_window_size);
-    sysbus_init_mmio(sbd, &s->mmio_flash);
-
-    s->flashes = g_new0(AspeedSMCFlash, s->ctrl->max_peripherals);
+                          &aspeed_smc_flash_default_ops, s,
+                          TYPE_ASPEED_SMC ".flash",
+                          asc->flash_window_size);
+    memory_region_add_subregion(&s->mmio_flash_container, 0x0,
+                                &s->mmio_flash);
 
     /*
      * Let's create a sub memory region for each possible peripheral. All
@@ -1397,22 +1170,26 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp)
      * module behind to handle the memory accesses. This depends on
      * the board configuration.
      */
-    for (i = 0; i < s->ctrl->max_peripherals; ++i) {
+    for (i = 0; i < asc->max_peripherals; ++i) {
         AspeedSMCFlash *fl = &s->flashes[i];
 
-        snprintf(name, sizeof(name), "%s.%d", s->ctrl->name, i);
+        if (!object_property_set_link(OBJECT(fl), "controller", OBJECT(s),
+                                      errp)) {
+            return;
+        }
+        if (!object_property_set_uint(OBJECT(fl), "cs", i, errp)) {
+            return;
+        }
+        if (!sysbus_realize(SYS_BUS_DEVICE(fl), errp)) {
+            return;
+        }
 
-        fl->id = i;
-        fl->controller = s;
-        fl->size = s->ctrl->segments[i].size;
-        memory_region_init_io(&fl->mmio, OBJECT(s), &aspeed_smc_flash_ops,
-                              fl, name, fl->size);
         memory_region_add_subregion(&s->mmio_flash, offset, &fl->mmio);
-        offset += fl->size;
+        offset += asc->segments[i].size;
     }
 
     /* DMA support */
-    if (s->ctrl->has_dma) {
+    if (aspeed_smc_has_dma(asc)) {
         aspeed_smc_dma_setup(s, errp);
     }
 }
@@ -1432,7 +1209,6 @@ static const VMStateDescription vmstate_aspeed_smc = {
 static Property aspeed_smc_properties[] = {
     DEFINE_PROP_UINT32("num-cs", AspeedSMCState, num_cs, 1),
     DEFINE_PROP_BOOL("inject-failure", AspeedSMCState, inject_failure, false),
-    DEFINE_PROP_UINT64("sdram-base", AspeedSMCState, sdram_base, 0),
     DEFINE_PROP_LINK("dram", AspeedSMCState, dram_mr,
                      TYPE_MEMORY_REGION, MemoryRegion *),
     DEFINE_PROP_END_OF_LIST(),
@@ -1441,37 +1217,494 @@ static Property aspeed_smc_properties[] = {
 static void aspeed_smc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    AspeedSMCClass *mc = ASPEED_SMC_CLASS(klass);
 
     dc->realize = aspeed_smc_realize;
     dc->reset = aspeed_smc_reset;
     device_class_set_props(dc, aspeed_smc_properties);
     dc->vmsd = &vmstate_aspeed_smc;
-    mc->ctrl = data;
 }
 
 static const TypeInfo aspeed_smc_info = {
     .name           = TYPE_ASPEED_SMC,
     .parent         = TYPE_SYS_BUS_DEVICE,
+    .instance_init  = aspeed_smc_instance_init,
     .instance_size  = sizeof(AspeedSMCState),
     .class_size     = sizeof(AspeedSMCClass),
+    .class_init     = aspeed_smc_class_init,
     .abstract       = true,
 };
 
-static void aspeed_smc_register_types(void)
+static void aspeed_smc_flash_realize(DeviceState *dev, Error **errp)
 {
-    int i;
+    AspeedSMCFlash *s = ASPEED_SMC_FLASH(dev);
+    AspeedSMCClass *asc;
+    g_autofree char *name = g_strdup_printf(TYPE_ASPEED_SMC_FLASH ".%d", s->cs);
 
-    type_register_static(&aspeed_smc_info);
-    for (i = 0; i < ARRAY_SIZE(controllers); ++i) {
-        TypeInfo ti = {
-            .name       = controllers[i].name,
-            .parent     = TYPE_ASPEED_SMC,
-            .class_init = aspeed_smc_class_init,
-            .class_data = (void *)&controllers[i],
-        };
-        type_register(&ti);
+    if (!s->controller) {
+        error_setg(errp, TYPE_ASPEED_SMC_FLASH ": 'controller' link not set");
+        return;
+    }
+
+    asc = ASPEED_SMC_GET_CLASS(s->controller);
+
+    /*
+     * Use the default segment value to size the memory region. This
+     * can be changed by FW at runtime.
+     */
+    memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_flash_ops,
+                          s, name, asc->segments[s->cs].size);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
+}
+
+static Property aspeed_smc_flash_properties[] = {
+    DEFINE_PROP_UINT8("cs", AspeedSMCFlash, cs, 0),
+    DEFINE_PROP_LINK("controller", AspeedSMCFlash, controller, TYPE_ASPEED_SMC,
+                     AspeedSMCState *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void aspeed_smc_flash_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "Aspeed SMC Flash device region";
+    dc->realize = aspeed_smc_flash_realize;
+    device_class_set_props(dc, aspeed_smc_flash_properties);
+}
+
+static const TypeInfo aspeed_smc_flash_info = {
+    .name           = TYPE_ASPEED_SMC_FLASH,
+    .parent         = TYPE_SYS_BUS_DEVICE,
+    .instance_size  = sizeof(AspeedSMCFlash),
+    .class_init     = aspeed_smc_flash_class_init,
+};
+
+/*
+ * The Segment Registers of the AST2400 and AST2500 have a 8MB
+ * unit. The address range of a flash SPI peripheral is encoded with
+ * absolute addresses which should be part of the overall controller
+ * window.
+ */
+static uint32_t aspeed_smc_segment_to_reg(const AspeedSMCState *s,
+                                          const AspeedSegments *seg)
+{
+    uint32_t reg = 0;
+    reg |= ((seg->addr >> 23) & SEG_START_MASK) << SEG_START_SHIFT;
+    reg |= (((seg->addr + seg->size) >> 23) & SEG_END_MASK) << SEG_END_SHIFT;
+    return reg;
+}
+
+static void aspeed_smc_reg_to_segment(const AspeedSMCState *s,
+                                      uint32_t reg, AspeedSegments *seg)
+{
+    seg->addr = ((reg >> SEG_START_SHIFT) & SEG_START_MASK) << 23;
+    seg->size = (((reg >> SEG_END_SHIFT) & SEG_END_MASK) << 23) - seg->addr;
+}
+
+static const AspeedSegments aspeed_2400_smc_segments[] = {
+    { 0x10000000, 32 * MiB },
+};
+
+static void aspeed_2400_smc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2400 SMC Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 1;
+    asc->segments          = aspeed_2400_smc_segments;
+    asc->flash_window_base = 0x10000000;
+    asc->flash_window_size = 0x6000000;
+    asc->features          = 0x0;
+    asc->nregs             = ASPEED_SMC_R_SMC_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2400_smc_info = {
+    .name =  "aspeed.smc-ast2400",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2400_smc_class_init,
+};
+
+static const uint32_t aspeed_2400_fmc_resets[ASPEED_SMC_R_MAX] = {
+    /*
+     * CE0 and CE1 types are HW strapped in SCU70. Do it here to
+     * simplify the model.
+     */
+    [R_CONF] = CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0,
+};
+
+static const AspeedSegments aspeed_2400_fmc_segments[] = {
+    { 0x20000000, 64 * MiB }, /* start address is readonly */
+    { 0x24000000, 32 * MiB },
+    { 0x26000000, 32 * MiB },
+    { 0x28000000, 32 * MiB },
+    { 0x2A000000, 32 * MiB }
+};
+
+static void aspeed_2400_fmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2400 FMC Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 5;
+    asc->segments          = aspeed_2400_fmc_segments;
+    asc->resets            = aspeed_2400_fmc_resets;
+    asc->flash_window_base = 0x20000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = ASPEED_SMC_FEATURE_DMA;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0x1FFFFFFC;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2400_fmc_info = {
+    .name =  "aspeed.fmc-ast2400",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2400_fmc_class_init,
+};
+
+static const AspeedSegments aspeed_2400_spi1_segments[] = {
+    { 0x30000000, 64 * MiB },
+};
+
+static int aspeed_2400_spi1_addr_width(const AspeedSMCState *s)
+{
+    return s->regs[R_SPI_CTRL0] & CTRL_AST2400_SPI_4BYTE ? 4 : 3;
+}
+
+static void aspeed_2400_spi1_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2400 SPI1 Controller";
+    asc->r_conf            = R_SPI_CONF;
+    asc->r_ce_ctrl         = 0xff;
+    asc->r_ctrl0           = R_SPI_CTRL0;
+    asc->r_timings         = R_SPI_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = SPI_CONF_ENABLE_W0;
+    asc->max_peripherals   = 1;
+    asc->segments          = aspeed_2400_spi1_segments;
+    asc->flash_window_base = 0x30000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = 0x0;
+    asc->nregs             = ASPEED_SMC_R_SPI_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->addr_width        = aspeed_2400_spi1_addr_width;
+}
+
+static const TypeInfo aspeed_2400_spi1_info = {
+    .name =  "aspeed.spi1-ast2400",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2400_spi1_class_init,
+};
+
+static const uint32_t aspeed_2500_fmc_resets[ASPEED_SMC_R_MAX] = {
+    [R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 |
+                CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1),
+};
+
+static const AspeedSegments aspeed_2500_fmc_segments[] = {
+    { 0x20000000, 128 * MiB }, /* start address is readonly */
+    { 0x28000000,  32 * MiB },
+    { 0x2A000000,  32 * MiB },
+};
+
+static void aspeed_2500_fmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 FMC Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 3;
+    asc->segments          = aspeed_2500_fmc_segments;
+    asc->resets            = aspeed_2500_fmc_resets;
+    asc->flash_window_base = 0x20000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = ASPEED_SMC_FEATURE_DMA;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2500_fmc_info = {
+    .name =  "aspeed.fmc-ast2500",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2500_fmc_class_init,
+};
+
+static const AspeedSegments aspeed_2500_spi1_segments[] = {
+    { 0x30000000, 32 * MiB }, /* start address is readonly */
+    { 0x32000000, 96 * MiB }, /* end address is readonly */
+};
+
+static void aspeed_2500_spi1_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 SPI1 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 2;
+    asc->segments          = aspeed_2500_spi1_segments;
+    asc->flash_window_base = 0x30000000;
+    asc->flash_window_size = 0x8000000;
+    asc->features          = 0x0;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2500_spi1_info = {
+    .name =  "aspeed.spi1-ast2500",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2500_spi1_class_init,
+};
+
+static const AspeedSegments aspeed_2500_spi2_segments[] = {
+    { 0x38000000, 32 * MiB }, /* start address is readonly */
+    { 0x3A000000, 96 * MiB }, /* end address is readonly */
+};
+
+static void aspeed_2500_spi2_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 SPI2 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 2;
+    asc->segments          = aspeed_2500_spi2_segments;
+    asc->flash_window_base = 0x38000000;
+    asc->flash_window_size = 0x8000000;
+    asc->features          = 0x0;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2500_spi2_info = {
+    .name =  "aspeed.spi2-ast2500",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2500_spi2_class_init,
+};
+
+/*
+ * The Segment Registers of the AST2600 have a 1MB unit. The address
+ * range of a flash SPI peripheral is encoded with offsets in the overall
+ * controller window. The previous SoC AST2400 and AST2500 used
+ * absolute addresses. Only bits [27:20] are relevant and the end
+ * address is an upper bound limit.
+ */
+#define AST2600_SEG_ADDR_MASK 0x0ff00000
+
+static uint32_t aspeed_2600_smc_segment_to_reg(const AspeedSMCState *s,
+                                               const AspeedSegments *seg)
+{
+    uint32_t reg = 0;
+
+    /* Disabled segments have a nil register */
+    if (!seg->size) {
+        return 0;
+    }
+
+    reg |= (seg->addr & AST2600_SEG_ADDR_MASK) >> 16; /* start offset */
+    reg |= (seg->addr + seg->size - 1) & AST2600_SEG_ADDR_MASK; /* end offset */
+    return reg;
+}
+
+static void aspeed_2600_smc_reg_to_segment(const AspeedSMCState *s,
+                                           uint32_t reg, AspeedSegments *seg)
+{
+    uint32_t start_offset = (reg << 16) & AST2600_SEG_ADDR_MASK;
+    uint32_t end_offset = reg & AST2600_SEG_ADDR_MASK;
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+
+    if (reg) {
+        seg->addr = asc->flash_window_base + start_offset;
+        seg->size = end_offset + MiB - start_offset;
+    } else {
+        seg->addr = asc->flash_window_base;
+        seg->size = 0;
     }
 }
 
+static const uint32_t aspeed_2600_fmc_resets[ASPEED_SMC_R_MAX] = {
+    [R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 |
+                CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1 |
+                CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE2),
+};
+
+static const AspeedSegments aspeed_2600_fmc_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2600_fmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 FMC Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 1;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 3;
+    asc->segments          = aspeed_2600_fmc_segments;
+    asc->resets            = aspeed_2600_fmc_resets;
+    asc->flash_window_base = 0x20000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_WDT_CONTROL;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2600_fmc_info = {
+    .name =  "aspeed.fmc-ast2600",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2600_fmc_class_init,
+};
+
+static const AspeedSegments aspeed_2600_spi1_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2600_spi1_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 SPI1 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 2;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 2;
+    asc->segments          = aspeed_2600_spi1_segments;
+    asc->flash_window_base = 0x30000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_GRANT;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2600_spi1_info = {
+    .name =  "aspeed.spi1-ast2600",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2600_spi1_class_init,
+};
+
+static const AspeedSegments aspeed_2600_spi2_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 0x0, 0 }, /* disabled */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2600_spi2_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2600 SPI2 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 3;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->max_peripherals   = 3;
+    asc->segments          = aspeed_2600_spi2_segments;
+    asc->flash_window_base = 0x50000000;
+    asc->flash_window_size = 0x10000000;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_GRANT;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2600_spi2_info = {
+    .name =  "aspeed.spi2-ast2600",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2600_spi2_class_init,
+};
+
+static void aspeed_smc_register_types(void)
+{
+    type_register_static(&aspeed_smc_flash_info);
+    type_register_static(&aspeed_smc_info);
+    type_register_static(&aspeed_2400_smc_info);
+    type_register_static(&aspeed_2400_fmc_info);
+    type_register_static(&aspeed_2400_spi1_info);
+    type_register_static(&aspeed_2500_fmc_info);
+    type_register_static(&aspeed_2500_spi1_info);
+    type_register_static(&aspeed_2500_spi2_info);
+    type_register_static(&aspeed_2600_fmc_info);
+    type_register_static(&aspeed_2600_spi1_info);
+    type_register_static(&aspeed_2600_spi2_info);
+}
+
 type_init(aspeed_smc_register_types)
diff --git a/hw/ssi/sifive_spi.c b/hw/ssi/sifive_spi.c
index 0c9ebca3c86..03540cf5ca6 100644
--- a/hw/ssi/sifive_spi.c
+++ b/hw/ssi/sifive_spi.c
@@ -24,7 +24,6 @@
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "hw/ssi/ssi.h"
-#include "sysemu/sysemu.h"
 #include "qemu/fifo8.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
diff --git a/hw/ssi/ssi.c b/hw/ssi/ssi.c
index e5d7ce95237..003931fb509 100644
--- a/hw/ssi/ssi.c
+++ b/hw/ssi/ssi.c
@@ -107,7 +107,7 @@ DeviceState *ssi_create_peripheral(SSIBus *bus, const char *name)
 SSIBus *ssi_create_bus(DeviceState *parent, const char *name)
 {
     BusState *bus;
-    bus = qbus_create(TYPE_SSI_BUS, parent, name);
+    bus = qbus_new(TYPE_SSI_BUS, parent, name);
     return SSI_BUS(bus);
 }
 
diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c
index 49ff2755935..b2819a7ff09 100644
--- a/hw/ssi/xilinx_spi.c
+++ b/hw/ssi/xilinx_spi.c
@@ -27,7 +27,6 @@
 #include "qemu/osdep.h"
 #include "hw/sysbus.h"
 #include "migration/vmstate.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/fifo8.h"
 
diff --git a/hw/timer/Kconfig b/hw/timer/Kconfig
index bac25117155..010be7ed1f5 100644
--- a/hw/timer/Kconfig
+++ b/hw/timer/Kconfig
@@ -25,6 +25,9 @@ config ALLWINNER_A10_PIT
     bool
     select PTIMER
 
+config SIFIVE_PWM
+    bool
+
 config STM32F2XX_TIMER
     bool
 
@@ -52,5 +55,8 @@ config SSE_COUNTER
 config SSE_TIMER
     bool
 
+config STELLARIS_GPTM
+    bool
+
 config AVR_TIMER16
     bool
diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c
index 2f192011eb0..3bd951dd044 100644
--- a/hw/timer/armv7m_systick.c
+++ b/hw/timer/armv7m_systick.c
@@ -14,28 +14,32 @@
 #include "migration/vmstate.h"
 #include "hw/irq.h"
 #include "hw/sysbus.h"
+#include "hw/qdev-clock.h"
 #include "qemu/timer.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
 #include "trace.h"
 
-/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
-#define SYSTICK_SCALE 1000ULL
-
 #define SYSTICK_ENABLE    (1 << 0)
 #define SYSTICK_TICKINT   (1 << 1)
 #define SYSTICK_CLKSOURCE (1 << 2)
 #define SYSTICK_COUNTFLAG (1 << 16)
 
-int system_clock_scale;
+#define SYSCALIB_NOREF (1U << 31)
+#define SYSCALIB_SKEW (1U << 30)
+#define SYSCALIB_TENMS ((1U << 24) - 1)
 
-/* Conversion factor from qemu timer to SysTick frequencies.  */
-static inline int64_t systick_scale(SysTickState *s)
+static void systick_set_period_from_clock(SysTickState *s)
 {
+    /*
+     * Set the ptimer period from whichever clock is selected.
+     * Must be called from within a ptimer transaction block.
+     */
     if (s->control & SYSTICK_CLKSOURCE) {
-        return system_clock_scale;
+        ptimer_set_period_from_clock(s->ptimer, s->cpuclk, 1);
     } else {
-        return 1000;
+        ptimer_set_period_from_clock(s->ptimer, s->refclk, 1);
     }
 }
 
@@ -82,7 +86,28 @@ static MemTxResult systick_read(void *opaque, hwaddr addr, uint64_t *data,
         val = ptimer_get_count(s->ptimer);
         break;
     case 0xc: /* SysTick Calibration Value.  */
-        val = 10000;
+        /*
+         * In real hardware it is possible to make this register report
+         * a different value from what the reference clock is actually
+         * running at. We don't model that (which usually happens due
+         * to integration errors in the real hardware) and instead always
+         * report the theoretical correct value as described in the
+         * knowledgebase article at
+         * https://developer.arm.com/documentation/ka001325/latest
+         * If necessary, we could implement an extra QOM property on this
+         * device to force the STCALIB value to something different from
+         * the "correct" value.
+         */
+        if (!clock_has_source(s->refclk)) {
+            val = SYSCALIB_NOREF;
+            break;
+        }
+        val = clock_ns_to_ticks(s->refclk, 10 * SCALE_MS) - 1;
+        val &= SYSCALIB_TENMS;
+        if (clock_ticks_to_ns(s->refclk, val + 1) != 10 * SCALE_MS) {
+            /* report that tick count does not yield exactly 10ms */
+            val |= SYSCALIB_SKEW;
+        }
         break;
     default:
         val = 0;
@@ -114,6 +139,11 @@ static MemTxResult systick_write(void *opaque, hwaddr addr,
     {
         uint32_t oldval;
 
+        if (!clock_has_source(s->refclk)) {
+            /* This bit is always 1 if there is no external refclk */
+            value |= SYSTICK_CLKSOURCE;
+        }
+
         ptimer_transaction_begin(s->ptimer);
         oldval = s->control;
         s->control &= 0xfffffff8;
@@ -121,19 +151,14 @@ static MemTxResult systick_write(void *opaque, hwaddr addr,
 
         if ((oldval ^ value) & SYSTICK_ENABLE) {
             if (value & SYSTICK_ENABLE) {
-                /*
-                 * Always reload the period in case board code has
-                 * changed system_clock_scale. If we ever replace that
-                 * global with a more sensible API then we might be able
-                 * to set the period only when it actually changes.
-                 */
-                ptimer_set_period(s->ptimer, systick_scale(s));
                 ptimer_run(s->ptimer, 0);
             } else {
                 ptimer_stop(s->ptimer);
             }
-        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
-            ptimer_set_period(s->ptimer, systick_scale(s));
+        }
+
+        if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
+            systick_set_period_from_clock(s);
         }
         ptimer_transaction_commit(s->ptimer);
         break;
@@ -176,20 +201,42 @@ static void systick_reset(DeviceState *dev)
 {
     SysTickState *s = SYSTICK(dev);
 
-    /*
-     * Forgetting to set system_clock_scale is always a board code
-     * bug. We can't check this earlier because for some boards
-     * (like stellaris) it is not yet configured at the point where
-     * the systick device is realized.
-     */
-    assert(system_clock_scale != 0);
-
     ptimer_transaction_begin(s->ptimer);
     s->control = 0;
+    if (!clock_has_source(s->refclk)) {
+        /* This bit is always 1 if there is no external refclk */
+        s->control |= SYSTICK_CLKSOURCE;
+    }
     ptimer_stop(s->ptimer);
     ptimer_set_count(s->ptimer, 0);
     ptimer_set_limit(s->ptimer, 0, 0);
-    ptimer_set_period(s->ptimer, systick_scale(s));
+    systick_set_period_from_clock(s);
+    ptimer_transaction_commit(s->ptimer);
+}
+
+static void systick_cpuclk_update(void *opaque, ClockEvent event)
+{
+    SysTickState *s = SYSTICK(opaque);
+
+    if (!(s->control & SYSTICK_CLKSOURCE)) {
+        /* currently using refclk, we can ignore cpuclk changes */
+    }
+
+    ptimer_transaction_begin(s->ptimer);
+    ptimer_set_period_from_clock(s->ptimer, s->cpuclk, 1);
+    ptimer_transaction_commit(s->ptimer);
+}
+
+static void systick_refclk_update(void *opaque, ClockEvent event)
+{
+    SysTickState *s = SYSTICK(opaque);
+
+    if (s->control & SYSTICK_CLKSOURCE) {
+        /* currently using cpuclk, we can ignore refclk changes */
+    }
+
+    ptimer_transaction_begin(s->ptimer);
+    ptimer_set_period_from_clock(s->ptimer, s->refclk, 1);
     ptimer_transaction_commit(s->ptimer);
 }
 
@@ -201,6 +248,11 @@ static void systick_instance_init(Object *obj)
     memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0);
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->irq);
+
+    s->refclk = qdev_init_clock_in(DEVICE(obj), "refclk",
+                                   systick_refclk_update, s, ClockUpdate);
+    s->cpuclk = qdev_init_clock_in(DEVICE(obj), "cpuclk",
+                                   systick_cpuclk_update, s, ClockUpdate);
 }
 
 static void systick_realize(DeviceState *dev, Error **errp)
@@ -211,13 +263,21 @@ static void systick_realize(DeviceState *dev, Error **errp)
                             PTIMER_POLICY_NO_COUNTER_ROUND_DOWN |
                             PTIMER_POLICY_NO_IMMEDIATE_RELOAD |
                             PTIMER_POLICY_TRIGGER_ONLY_ON_DECREMENT);
+
+    if (!clock_has_source(s->cpuclk)) {
+        error_setg(errp, "systick: cpuclk must be connected");
+        return;
+    }
+    /* It's OK not to connect the refclk */
 }
 
 static const VMStateDescription vmstate_systick = {
     .name = "armv7m_systick",
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
+        VMSTATE_CLOCK(refclk, SysTickState),
+        VMSTATE_CLOCK(cpuclk, SysTickState),
         VMSTATE_UINT32(control, SysTickState),
         VMSTATE_INT64(tick, SysTickState),
         VMSTATE_PTIMER(ptimer, SysTickState),
diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c
index 5379006086f..4ba662190de 100644
--- a/hw/timer/etraxfs_timer.c
+++ b/hw/timer/etraxfs_timer.c
@@ -309,9 +309,9 @@ static const MemoryRegionOps timer_ops = {
     }
 };
 
-static void etraxfs_timer_reset(void *opaque)
+static void etraxfs_timer_reset_enter(Object *obj, ResetType type)
 {
-    ETRAXTimerState *t = opaque;
+    ETRAXTimerState *t = ETRAX_TIMER(obj);
 
     ptimer_transaction_begin(t->ptimer_t0);
     ptimer_stop(t->ptimer_t0);
@@ -325,6 +325,12 @@ static void etraxfs_timer_reset(void *opaque)
     t->rw_wd_ctrl = 0;
     t->r_intr = 0;
     t->rw_intr_mask = 0;
+}
+
+static void etraxfs_timer_reset_hold(Object *obj)
+{
+    ETRAXTimerState *t = ETRAX_TIMER(obj);
+
     qemu_irq_lower(t->irq);
 }
 
@@ -343,14 +349,16 @@ static void etraxfs_timer_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&t->mmio, OBJECT(t), &timer_ops, t,
                           "etraxfs-timer", 0x5c);
     sysbus_init_mmio(sbd, &t->mmio);
-    qemu_register_reset(etraxfs_timer_reset, t);
 }
 
 static void etraxfs_timer_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    ResettableClass *rc = RESETTABLE_CLASS(klass);
 
     dc->realize = etraxfs_timer_realize;
+    rc->phases.enter = etraxfs_timer_reset_enter;
+    rc->phases.hold = etraxfs_timer_reset_hold;
 }
 
 static const TypeInfo etraxfs_timer_info = {
diff --git a/hw/timer/ibex_timer.c b/hw/timer/ibex_timer.c
new file mode 100644
index 00000000000..66e1f8e48cb
--- /dev/null
+++ b/hw/timer/ibex_timer.c
@@ -0,0 +1,312 @@
+/*
+ * QEMU lowRISC Ibex Timer device
+ *
+ * Copyright (c) 2021 Western Digital
+ *
+ * For details check the documentation here:
+ *    https://docs.opentitan.org/hw/ip/rv_timer/doc/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "hw/timer/ibex_timer.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "target/riscv/cpu.h"
+#include "migration/vmstate.h"
+
+REG32(CTRL, 0x00)
+    FIELD(CTRL, ACTIVE, 0, 1)
+REG32(CFG0, 0x100)
+    FIELD(CFG0, PRESCALE, 0, 12)
+    FIELD(CFG0, STEP, 16, 8)
+REG32(LOWER0, 0x104)
+REG32(UPPER0, 0x108)
+REG32(COMPARE_LOWER0, 0x10C)
+REG32(COMPARE_UPPER0, 0x110)
+REG32(INTR_ENABLE, 0x114)
+    FIELD(INTR_ENABLE, IE_0, 0, 1)
+REG32(INTR_STATE, 0x118)
+    FIELD(INTR_STATE, IS_0, 0, 1)
+REG32(INTR_TEST, 0x11C)
+    FIELD(INTR_TEST, T_0, 0, 1)
+
+static uint64_t cpu_riscv_read_rtc(uint32_t timebase_freq)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    timebase_freq, NANOSECONDS_PER_SECOND);
+}
+
+static void ibex_timer_update_irqs(IbexTimerState *s)
+{
+    CPUState *cs = qemu_get_cpu(0);
+    RISCVCPU *cpu = RISCV_CPU(cs);
+    uint64_t value = s->timer_compare_lower0 |
+                         ((uint64_t)s->timer_compare_upper0 << 32);
+    uint64_t next, diff;
+    uint64_t now = cpu_riscv_read_rtc(s->timebase_freq);
+
+    if (!(s->timer_ctrl & R_CTRL_ACTIVE_MASK)) {
+        /* Timer isn't active */
+        return;
+    }
+
+    /* Update the CPUs mtimecmp */
+    cpu->env.timecmp = value;
+
+    if (cpu->env.timecmp <= now) {
+        /*
+         * If the mtimecmp was in the past raise the interrupt now.
+         */
+        qemu_irq_raise(s->m_timer_irq);
+        if (s->timer_intr_enable & R_INTR_ENABLE_IE_0_MASK) {
+            s->timer_intr_state |= R_INTR_STATE_IS_0_MASK;
+            qemu_set_irq(s->irq, true);
+        }
+        return;
+    }
+
+    /* Setup a timer to trigger the interrupt in the future */
+    qemu_irq_lower(s->m_timer_irq);
+    qemu_set_irq(s->irq, false);
+
+    diff = cpu->env.timecmp - now;
+    next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                                 muldiv64(diff,
+                                          NANOSECONDS_PER_SECOND,
+                                          s->timebase_freq);
+
+    if (next < qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)) {
+        /* We overflowed the timer, just set it as large as we can */
+        timer_mod(cpu->env.timer, 0x7FFFFFFFFFFFFFFF);
+    } else {
+        timer_mod(cpu->env.timer, next);
+    }
+}
+
+static void ibex_timer_cb(void *opaque)
+{
+    IbexTimerState *s = opaque;
+
+    qemu_irq_raise(s->m_timer_irq);
+    if (s->timer_intr_enable & R_INTR_ENABLE_IE_0_MASK) {
+        s->timer_intr_state |= R_INTR_STATE_IS_0_MASK;
+        qemu_set_irq(s->irq, true);
+    }
+}
+
+static void ibex_timer_reset(DeviceState *dev)
+{
+    IbexTimerState *s = IBEX_TIMER(dev);
+
+    CPUState *cpu = qemu_get_cpu(0);
+    CPURISCVState *env = cpu->env_ptr;
+    env->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                              &ibex_timer_cb, s);
+    env->timecmp = 0;
+
+    s->timer_ctrl = 0x00000000;
+    s->timer_cfg0 = 0x00010000;
+    s->timer_compare_lower0 = 0xFFFFFFFF;
+    s->timer_compare_upper0 = 0xFFFFFFFF;
+    s->timer_intr_enable = 0x00000000;
+    s->timer_intr_state = 0x00000000;
+    s->timer_intr_test = 0x00000000;
+
+    ibex_timer_update_irqs(s);
+}
+
+static uint64_t ibex_timer_read(void *opaque, hwaddr addr,
+                                       unsigned int size)
+{
+    IbexTimerState *s = opaque;
+    uint64_t now = cpu_riscv_read_rtc(s->timebase_freq);
+    uint64_t retvalue = 0;
+
+    switch (addr >> 2) {
+    case R_CTRL:
+        retvalue = s->timer_ctrl;
+        break;
+    case R_CFG0:
+        retvalue = s->timer_cfg0;
+        break;
+    case R_LOWER0:
+        retvalue = now;
+        break;
+    case R_UPPER0:
+        retvalue = now >> 32;
+        break;
+    case R_COMPARE_LOWER0:
+        retvalue = s->timer_compare_lower0;
+        break;
+    case R_COMPARE_UPPER0:
+        retvalue = s->timer_compare_upper0;
+        break;
+    case R_INTR_ENABLE:
+        retvalue = s->timer_intr_enable;
+        break;
+    case R_INTR_STATE:
+        retvalue = s->timer_intr_state;
+        break;
+    case R_INTR_TEST:
+        retvalue = s->timer_intr_test;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+        return 0;
+    }
+
+    return retvalue;
+}
+
+static void ibex_timer_write(void *opaque, hwaddr addr,
+                             uint64_t val64, unsigned int size)
+{
+    IbexTimerState *s = opaque;
+    uint32_t val = val64;
+
+    switch (addr >> 2) {
+    case R_CTRL:
+        s->timer_ctrl = val;
+        break;
+    case R_CFG0:
+        qemu_log_mask(LOG_UNIMP, "Changing prescale or step not supported");
+        s->timer_cfg0 = val;
+        break;
+    case R_LOWER0:
+        qemu_log_mask(LOG_UNIMP, "Changing timer value is not supported");
+        break;
+    case R_UPPER0:
+        qemu_log_mask(LOG_UNIMP, "Changing timer value is not supported");
+        break;
+    case R_COMPARE_LOWER0:
+        s->timer_compare_lower0 = val;
+        ibex_timer_update_irqs(s);
+        break;
+    case R_COMPARE_UPPER0:
+        s->timer_compare_upper0 = val;
+        ibex_timer_update_irqs(s);
+        break;
+    case R_INTR_ENABLE:
+        s->timer_intr_enable = val;
+        break;
+    case R_INTR_STATE:
+        /* Write 1 to clear */
+        s->timer_intr_state &= ~val;
+        break;
+    case R_INTR_TEST:
+        s->timer_intr_test = val;
+        if (s->timer_intr_enable &
+            s->timer_intr_test &
+            R_INTR_ENABLE_IE_0_MASK) {
+            s->timer_intr_state |= R_INTR_STATE_IS_0_MASK;
+            qemu_set_irq(s->irq, true);
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+    }
+}
+
+static const MemoryRegionOps ibex_timer_ops = {
+    .read = ibex_timer_read,
+    .write = ibex_timer_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+};
+
+static int ibex_timer_post_load(void *opaque, int version_id)
+{
+    IbexTimerState *s = opaque;
+
+    ibex_timer_update_irqs(s);
+    return 0;
+}
+
+static const VMStateDescription vmstate_ibex_timer = {
+    .name = TYPE_IBEX_TIMER,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = ibex_timer_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(timer_ctrl, IbexTimerState),
+        VMSTATE_UINT32(timer_cfg0, IbexTimerState),
+        VMSTATE_UINT32(timer_compare_lower0, IbexTimerState),
+        VMSTATE_UINT32(timer_compare_upper0, IbexTimerState),
+        VMSTATE_UINT32(timer_intr_enable, IbexTimerState),
+        VMSTATE_UINT32(timer_intr_state, IbexTimerState),
+        VMSTATE_UINT32(timer_intr_test, IbexTimerState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property ibex_timer_properties[] = {
+    DEFINE_PROP_UINT32("timebase-freq", IbexTimerState, timebase_freq, 10000),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ibex_timer_init(Object *obj)
+{
+    IbexTimerState *s = IBEX_TIMER(obj);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+    memory_region_init_io(&s->mmio, obj, &ibex_timer_ops, s,
+                          TYPE_IBEX_TIMER, 0x400);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static void ibex_timer_realize(DeviceState *dev, Error **errp)
+{
+    IbexTimerState *s = IBEX_TIMER(dev);
+
+    qdev_init_gpio_out(dev, &s->m_timer_irq, 1);
+}
+
+
+static void ibex_timer_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = ibex_timer_reset;
+    dc->vmsd = &vmstate_ibex_timer;
+    dc->realize = ibex_timer_realize;
+    device_class_set_props(dc, ibex_timer_properties);
+}
+
+static const TypeInfo ibex_timer_info = {
+    .name          = TYPE_IBEX_TIMER,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IbexTimerState),
+    .instance_init = ibex_timer_init,
+    .class_init    = ibex_timer_class_init,
+};
+
+static void ibex_timer_register_types(void)
+{
+    type_register_static(&ibex_timer_info);
+}
+
+type_init(ibex_timer_register_types)
diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c
deleted file mode 100644
index eeaf0ada5fa..00000000000
--- a/hw/timer/lm32_timer.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- *  QEMU model of the LatticeMico32 timer block.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://www.latticesemi.com/documents/mico32timer.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qemu/timer.h"
-#include "hw/ptimer.h"
-#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-#define DEFAULT_FREQUENCY (50*1000000)
-
-enum {
-    R_SR = 0,
-    R_CR,
-    R_PERIOD,
-    R_SNAPSHOT,
-    R_MAX
-};
-
-enum {
-    SR_TO    = (1 << 0),
-    SR_RUN   = (1 << 1),
-};
-
-enum {
-    CR_ITO   = (1 << 0),
-    CR_CONT  = (1 << 1),
-    CR_START = (1 << 2),
-    CR_STOP  = (1 << 3),
-};
-
-#define TYPE_LM32_TIMER "lm32-timer"
-OBJECT_DECLARE_SIMPLE_TYPE(LM32TimerState, LM32_TIMER)
-
-struct LM32TimerState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-
-    ptimer_state *ptimer;
-
-    qemu_irq irq;
-    uint32_t freq_hz;
-
-    uint32_t regs[R_MAX];
-};
-
-static void timer_update_irq(LM32TimerState *s)
-{
-    int state = (s->regs[R_SR] & SR_TO) && (s->regs[R_CR] & CR_ITO);
-
-    trace_lm32_timer_irq_state(state);
-    qemu_set_irq(s->irq, state);
-}
-
-static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
-{
-    LM32TimerState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_SR:
-    case R_CR:
-    case R_PERIOD:
-        r = s->regs[addr];
-        break;
-    case R_SNAPSHOT:
-        r = (uint32_t)ptimer_get_count(s->ptimer);
-        break;
-    default:
-        error_report("lm32_timer: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_lm32_timer_memory_read(addr << 2, r);
-    return r;
-}
-
-static void timer_write(void *opaque, hwaddr addr,
-                        uint64_t value, unsigned size)
-{
-    LM32TimerState *s = opaque;
-
-    trace_lm32_timer_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_SR:
-        s->regs[R_SR] &= ~SR_TO;
-        break;
-    case R_CR:
-        ptimer_transaction_begin(s->ptimer);
-        s->regs[R_CR] = value;
-        if (s->regs[R_CR] & CR_START) {
-            ptimer_run(s->ptimer, 1);
-        }
-        if (s->regs[R_CR] & CR_STOP) {
-            ptimer_stop(s->ptimer);
-        }
-        ptimer_transaction_commit(s->ptimer);
-        break;
-    case R_PERIOD:
-        s->regs[R_PERIOD] = value;
-        ptimer_transaction_begin(s->ptimer);
-        ptimer_set_count(s->ptimer, value);
-        ptimer_transaction_commit(s->ptimer);
-        break;
-    case R_SNAPSHOT:
-        error_report("lm32_timer: write access to read only register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    default:
-        error_report("lm32_timer: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-    timer_update_irq(s);
-}
-
-static const MemoryRegionOps timer_ops = {
-    .read = timer_read,
-    .write = timer_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-};
-
-static void timer_hit(void *opaque)
-{
-    LM32TimerState *s = opaque;
-
-    trace_lm32_timer_hit();
-
-    s->regs[R_SR] |= SR_TO;
-
-    if (s->regs[R_CR] & CR_CONT) {
-        ptimer_set_count(s->ptimer, s->regs[R_PERIOD]);
-        ptimer_run(s->ptimer, 1);
-    }
-    timer_update_irq(s);
-}
-
-static void timer_reset(DeviceState *d)
-{
-    LM32TimerState *s = LM32_TIMER(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-    ptimer_transaction_begin(s->ptimer);
-    ptimer_stop(s->ptimer);
-    ptimer_transaction_commit(s->ptimer);
-}
-
-static void lm32_timer_init(Object *obj)
-{
-    LM32TimerState *s = LM32_TIMER(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(dev, &s->irq);
-
-    memory_region_init_io(&s->iomem, obj, &timer_ops, s,
-                          "timer", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->iomem);
-}
-
-static void lm32_timer_realize(DeviceState *dev, Error **errp)
-{
-    LM32TimerState *s = LM32_TIMER(dev);
-
-    s->ptimer = ptimer_init(timer_hit, s, PTIMER_POLICY_DEFAULT);
-
-    ptimer_transaction_begin(s->ptimer);
-    ptimer_set_freq(s->ptimer, s->freq_hz);
-    ptimer_transaction_commit(s->ptimer);
-}
-
-static const VMStateDescription vmstate_lm32_timer = {
-    .name = "lm32-timer",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_PTIMER(ptimer, LM32TimerState),
-        VMSTATE_UINT32(freq_hz, LM32TimerState),
-        VMSTATE_UINT32_ARRAY(regs, LM32TimerState, R_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property lm32_timer_properties[] = {
-    DEFINE_PROP_UINT32("frequency", LM32TimerState, freq_hz, DEFAULT_FREQUENCY),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void lm32_timer_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = lm32_timer_realize;
-    dc->reset = timer_reset;
-    dc->vmsd = &vmstate_lm32_timer;
-    device_class_set_props(dc, lm32_timer_properties);
-}
-
-static const TypeInfo lm32_timer_info = {
-    .name          = TYPE_LM32_TIMER,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(LM32TimerState),
-    .instance_init = lm32_timer_init,
-    .class_init    = lm32_timer_class_init,
-};
-
-static void lm32_timer_register_types(void)
-{
-    type_register_static(&lm32_timer_info);
-}
-
-type_init(lm32_timer_register_types)
diff --git a/hw/timer/meson.build b/hw/timer/meson.build
index 598d0585064..03092e2cebf 100644
--- a/hw/timer/meson.build
+++ b/hw/timer/meson.build
@@ -19,22 +19,22 @@ softmmu_ss.add(when: 'CONFIG_HPET', if_true: files('hpet.c'))
 softmmu_ss.add(when: 'CONFIG_I8254', if_true: files('i8254_common.c', 'i8254.c'))
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_epit.c'))
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('imx_gpt.c'))
-softmmu_ss.add(when: 'CONFIG_LM32_DEVICES', if_true: files('lm32_timer.c'))
-softmmu_ss.add(when: 'CONFIG_MILKYMIST', if_true: files('milkymist-sysctl.c'))
 softmmu_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_gictimer.c'))
 softmmu_ss.add(when: 'CONFIG_MSF2', if_true: files('mss-timer.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_timer.c'))
 softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_timer.c'))
 softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_gptimer.c'))
 softmmu_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_synctimer.c'))
-softmmu_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3_ost.c'))
 softmmu_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx_timer.c'))
 softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_systmr.c'))
 softmmu_ss.add(when: 'CONFIG_SH_TIMER', if_true: files('sh_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SSE_COUNTER', if_true: files('sse-counter.c'))
 softmmu_ss.add(when: 'CONFIG_SSE_TIMER', if_true: files('sse-timer.c'))
+softmmu_ss.add(when: 'CONFIG_STELLARIS_GPTM', if_true: files('stellaris-gptm.c'))
 softmmu_ss.add(when: 'CONFIG_STM32F2XX_TIMER', if_true: files('stm32f2xx_timer.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_timer.c'))
+specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c'))
+softmmu_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c'))
 
 specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c'))
diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c
deleted file mode 100644
index 9ecea63861c..00000000000
--- a/hw/timer/milkymist-sysctl.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- *  QEMU model of the Milkymist System Controller.
- *
- *  Copyright (c) 2010-2012 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/sysctl.pdf
- */
-
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "trace.h"
-#include "qemu/timer.h"
-#include "sysemu/runstate.h"
-#include "hw/ptimer.h"
-#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-enum {
-    CTRL_ENABLE      = (1<<0),
-    CTRL_AUTORESTART = (1<<1),
-};
-
-enum {
-    ICAP_READY       = (1<<0),
-};
-
-enum {
-    R_GPIO_IN         = 0,
-    R_GPIO_OUT,
-    R_GPIO_INTEN,
-    R_TIMER0_CONTROL  = 4,
-    R_TIMER0_COMPARE,
-    R_TIMER0_COUNTER,
-    R_TIMER1_CONTROL  = 8,
-    R_TIMER1_COMPARE,
-    R_TIMER1_COUNTER,
-    R_ICAP = 16,
-    R_DBG_SCRATCHPAD  = 20,
-    R_DBG_WRITE_LOCK,
-    R_CLK_FREQUENCY   = 29,
-    R_CAPABILITIES,
-    R_SYSTEM_ID,
-    R_MAX
-};
-
-#define TYPE_MILKYMIST_SYSCTL "milkymist-sysctl"
-OBJECT_DECLARE_SIMPLE_TYPE(MilkymistSysctlState, MILKYMIST_SYSCTL)
-
-struct MilkymistSysctlState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion regs_region;
-
-    ptimer_state *ptimer0;
-    ptimer_state *ptimer1;
-
-    uint32_t freq_hz;
-    uint32_t capabilities;
-    uint32_t systemid;
-    uint32_t strappings;
-
-    uint32_t regs[R_MAX];
-
-    qemu_irq gpio_irq;
-    qemu_irq timer0_irq;
-    qemu_irq timer1_irq;
-};
-
-static void sysctl_icap_write(MilkymistSysctlState *s, uint32_t value)
-{
-    trace_milkymist_sysctl_icap_write(value);
-    switch (value & 0xffff) {
-    case 0x000e:
-        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
-        break;
-    }
-}
-
-static uint64_t sysctl_read(void *opaque, hwaddr addr,
-                            unsigned size)
-{
-    MilkymistSysctlState *s = opaque;
-    uint32_t r = 0;
-
-    addr >>= 2;
-    switch (addr) {
-    case R_TIMER0_COUNTER:
-        r = (uint32_t)ptimer_get_count(s->ptimer0);
-        /* milkymist timer counts up */
-        r = s->regs[R_TIMER0_COMPARE] - r;
-        break;
-    case R_TIMER1_COUNTER:
-        r = (uint32_t)ptimer_get_count(s->ptimer1);
-        /* milkymist timer counts up */
-        r = s->regs[R_TIMER1_COMPARE] - r;
-        break;
-    case R_GPIO_IN:
-    case R_GPIO_OUT:
-    case R_GPIO_INTEN:
-    case R_TIMER0_CONTROL:
-    case R_TIMER0_COMPARE:
-    case R_TIMER1_CONTROL:
-    case R_TIMER1_COMPARE:
-    case R_ICAP:
-    case R_DBG_SCRATCHPAD:
-    case R_DBG_WRITE_LOCK:
-    case R_CLK_FREQUENCY:
-    case R_CAPABILITIES:
-    case R_SYSTEM_ID:
-        r = s->regs[addr];
-        break;
-
-    default:
-        error_report("milkymist_sysctl: read access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-
-    trace_milkymist_sysctl_memory_read(addr << 2, r);
-
-    return r;
-}
-
-static void sysctl_write(void *opaque, hwaddr addr, uint64_t value,
-                         unsigned size)
-{
-    MilkymistSysctlState *s = opaque;
-
-    trace_milkymist_sysctl_memory_write(addr, value);
-
-    addr >>= 2;
-    switch (addr) {
-    case R_GPIO_OUT:
-    case R_GPIO_INTEN:
-    case R_TIMER0_COUNTER:
-    case R_TIMER1_COUNTER:
-    case R_DBG_SCRATCHPAD:
-        s->regs[addr] = value;
-        break;
-    case R_TIMER0_COMPARE:
-        ptimer_transaction_begin(s->ptimer0);
-        ptimer_set_limit(s->ptimer0, value, 0);
-        s->regs[addr] = value;
-        ptimer_transaction_commit(s->ptimer0);
-        break;
-    case R_TIMER1_COMPARE:
-        ptimer_transaction_begin(s->ptimer1);
-        ptimer_set_limit(s->ptimer1, value, 0);
-        s->regs[addr] = value;
-        ptimer_transaction_commit(s->ptimer1);
-        break;
-    case R_TIMER0_CONTROL:
-        ptimer_transaction_begin(s->ptimer0);
-        s->regs[addr] = value;
-        if (s->regs[R_TIMER0_CONTROL] & CTRL_ENABLE) {
-            trace_milkymist_sysctl_start_timer0();
-            ptimer_set_count(s->ptimer0,
-                    s->regs[R_TIMER0_COMPARE] - s->regs[R_TIMER0_COUNTER]);
-            ptimer_run(s->ptimer0, 0);
-        } else {
-            trace_milkymist_sysctl_stop_timer0();
-            ptimer_stop(s->ptimer0);
-        }
-        ptimer_transaction_commit(s->ptimer0);
-        break;
-    case R_TIMER1_CONTROL:
-        ptimer_transaction_begin(s->ptimer1);
-        s->regs[addr] = value;
-        if (s->regs[R_TIMER1_CONTROL] & CTRL_ENABLE) {
-            trace_milkymist_sysctl_start_timer1();
-            ptimer_set_count(s->ptimer1,
-                    s->regs[R_TIMER1_COMPARE] - s->regs[R_TIMER1_COUNTER]);
-            ptimer_run(s->ptimer1, 0);
-        } else {
-            trace_milkymist_sysctl_stop_timer1();
-            ptimer_stop(s->ptimer1);
-        }
-        ptimer_transaction_commit(s->ptimer1);
-        break;
-    case R_ICAP:
-        sysctl_icap_write(s, value);
-        break;
-    case R_DBG_WRITE_LOCK:
-        s->regs[addr] = 1;
-        break;
-    case R_SYSTEM_ID:
-        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
-        break;
-
-    case R_GPIO_IN:
-    case R_CLK_FREQUENCY:
-    case R_CAPABILITIES:
-        error_report("milkymist_sysctl: write to read-only register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-
-    default:
-        error_report("milkymist_sysctl: write access to unknown register 0x"
-                TARGET_FMT_plx, addr << 2);
-        break;
-    }
-}
-
-static const MemoryRegionOps sysctl_mmio_ops = {
-    .read = sysctl_read,
-    .write = sysctl_write,
-    .valid = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void timer0_hit(void *opaque)
-{
-    MilkymistSysctlState *s = opaque;
-
-    if (!(s->regs[R_TIMER0_CONTROL] & CTRL_AUTORESTART)) {
-        s->regs[R_TIMER0_CONTROL] &= ~CTRL_ENABLE;
-        trace_milkymist_sysctl_stop_timer0();
-        ptimer_stop(s->ptimer0);
-    }
-
-    trace_milkymist_sysctl_pulse_irq_timer0();
-    qemu_irq_pulse(s->timer0_irq);
-}
-
-static void timer1_hit(void *opaque)
-{
-    MilkymistSysctlState *s = opaque;
-
-    if (!(s->regs[R_TIMER1_CONTROL] & CTRL_AUTORESTART)) {
-        s->regs[R_TIMER1_CONTROL] &= ~CTRL_ENABLE;
-        trace_milkymist_sysctl_stop_timer1();
-        ptimer_stop(s->ptimer1);
-    }
-
-    trace_milkymist_sysctl_pulse_irq_timer1();
-    qemu_irq_pulse(s->timer1_irq);
-}
-
-static void milkymist_sysctl_reset(DeviceState *d)
-{
-    MilkymistSysctlState *s = MILKYMIST_SYSCTL(d);
-    int i;
-
-    for (i = 0; i < R_MAX; i++) {
-        s->regs[i] = 0;
-    }
-
-    ptimer_transaction_begin(s->ptimer0);
-    ptimer_stop(s->ptimer0);
-    ptimer_transaction_commit(s->ptimer0);
-    ptimer_transaction_begin(s->ptimer1);
-    ptimer_stop(s->ptimer1);
-    ptimer_transaction_commit(s->ptimer1);
-
-    /* defaults */
-    s->regs[R_ICAP] = ICAP_READY;
-    s->regs[R_SYSTEM_ID] = s->systemid;
-    s->regs[R_CLK_FREQUENCY] = s->freq_hz;
-    s->regs[R_CAPABILITIES] = s->capabilities;
-    s->regs[R_GPIO_IN] = s->strappings;
-}
-
-static void milkymist_sysctl_init(Object *obj)
-{
-    MilkymistSysctlState *s = MILKYMIST_SYSCTL(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
-
-    sysbus_init_irq(dev, &s->gpio_irq);
-    sysbus_init_irq(dev, &s->timer0_irq);
-    sysbus_init_irq(dev, &s->timer1_irq);
-
-    memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s,
-            "milkymist-sysctl", R_MAX * 4);
-    sysbus_init_mmio(dev, &s->regs_region);
-}
-
-static void milkymist_sysctl_realize(DeviceState *dev, Error **errp)
-{
-    MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev);
-
-    s->ptimer0 = ptimer_init(timer0_hit, s, PTIMER_POLICY_DEFAULT);
-    s->ptimer1 = ptimer_init(timer1_hit, s, PTIMER_POLICY_DEFAULT);
-
-    ptimer_transaction_begin(s->ptimer0);
-    ptimer_set_freq(s->ptimer0, s->freq_hz);
-    ptimer_transaction_commit(s->ptimer0);
-    ptimer_transaction_begin(s->ptimer1);
-    ptimer_set_freq(s->ptimer1, s->freq_hz);
-    ptimer_transaction_commit(s->ptimer1);
-}
-
-static const VMStateDescription vmstate_milkymist_sysctl = {
-    .name = "milkymist-sysctl",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, MilkymistSysctlState, R_MAX),
-        VMSTATE_PTIMER(ptimer0, MilkymistSysctlState),
-        VMSTATE_PTIMER(ptimer1, MilkymistSysctlState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static Property milkymist_sysctl_properties[] = {
-    DEFINE_PROP_UINT32("frequency", MilkymistSysctlState,
-    freq_hz, 80000000),
-    DEFINE_PROP_UINT32("capabilities", MilkymistSysctlState,
-    capabilities, 0x00000000),
-    DEFINE_PROP_UINT32("systemid", MilkymistSysctlState,
-    systemid, 0x10014d31),
-    DEFINE_PROP_UINT32("gpio_strappings", MilkymistSysctlState,
-    strappings, 0x00000001),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void milkymist_sysctl_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = milkymist_sysctl_realize;
-    dc->reset = milkymist_sysctl_reset;
-    dc->vmsd = &vmstate_milkymist_sysctl;
-    device_class_set_props(dc, milkymist_sysctl_properties);
-}
-
-static const TypeInfo milkymist_sysctl_info = {
-    .name          = TYPE_MILKYMIST_SYSCTL,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(MilkymistSysctlState),
-    .instance_init = milkymist_sysctl_init,
-    .class_init    = milkymist_sysctl_class_init,
-};
-
-static void milkymist_sysctl_register_types(void)
-{
-    type_register_static(&milkymist_sysctl_info);
-}
-
-type_init(milkymist_sysctl_register_types)
diff --git a/hw/timer/mips_gictimer.c b/hw/timer/mips_gictimer.c
index bc44cd934e8..2b0696d4acb 100644
--- a/hw/timer/mips_gictimer.c
+++ b/hw/timer/mips_gictimer.c
@@ -7,7 +7,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/timer/mips_gictimer.h"
 
diff --git a/hw/timer/puv3_ost.c b/hw/timer/puv3_ost.c
deleted file mode 100644
index d5bf26b56bc..00000000000
--- a/hw/timer/puv3_ost.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * OSTimer device simulation in PKUnity SoC
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "hw/sysbus.h"
-#include "hw/irq.h"
-#include "hw/ptimer.h"
-#include "qemu/module.h"
-#include "qemu/log.h"
-#include "qom/object.h"
-
-#undef DEBUG_PUV3
-#include "hw/unicore32/puv3.h"
-
-#define TYPE_PUV3_OST "puv3_ost"
-OBJECT_DECLARE_SIMPLE_TYPE(PUV3OSTState, PUV3_OST)
-
-/* puv3 ostimer implementation. */
-struct PUV3OSTState {
-    SysBusDevice parent_obj;
-
-    MemoryRegion iomem;
-    qemu_irq irq;
-    ptimer_state *ptimer;
-
-    uint32_t reg_OSMR0;
-    uint32_t reg_OSCR;
-    uint32_t reg_OSSR;
-    uint32_t reg_OIER;
-};
-
-static uint64_t puv3_ost_read(void *opaque, hwaddr offset,
-        unsigned size)
-{
-    PUV3OSTState *s = opaque;
-    uint32_t ret = 0;
-
-    switch (offset) {
-    case 0x10: /* Counter Register */
-        ret = s->reg_OSMR0 - (uint32_t)ptimer_get_count(s->ptimer);
-        break;
-    case 0x14: /* Status Register */
-        ret = s->reg_OSSR;
-        break;
-    case 0x1c: /* Interrupt Enable Register */
-        ret = s->reg_OIER;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad read offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, ret);
-    return ret;
-}
-
-static void puv3_ost_write(void *opaque, hwaddr offset,
-        uint64_t value, unsigned size)
-{
-    PUV3OSTState *s = opaque;
-
-    DPRINTF("offset 0x%x, value 0x%x\n", offset, value);
-    switch (offset) {
-    case 0x00: /* Match Register 0 */
-        ptimer_transaction_begin(s->ptimer);
-        s->reg_OSMR0 = value;
-        if (s->reg_OSMR0 > s->reg_OSCR) {
-            ptimer_set_count(s->ptimer, s->reg_OSMR0 - s->reg_OSCR);
-        } else {
-            ptimer_set_count(s->ptimer, s->reg_OSMR0 +
-                    (0xffffffff - s->reg_OSCR));
-        }
-        ptimer_run(s->ptimer, 2);
-        ptimer_transaction_commit(s->ptimer);
-        break;
-    case 0x14: /* Status Register */
-        assert(value == 0);
-        if (s->reg_OSSR) {
-            s->reg_OSSR = value;
-            qemu_irq_lower(s->irq);
-        }
-        break;
-    case 0x1c: /* Interrupt Enable Register */
-        s->reg_OIER = value;
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "%s: Bad write offset 0x%"HWADDR_PRIx"\n",
-                      __func__, offset);
-    }
-}
-
-static const MemoryRegionOps puv3_ost_ops = {
-    .read = puv3_ost_read,
-    .write = puv3_ost_write,
-    .impl = {
-        .min_access_size = 4,
-        .max_access_size = 4,
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static void puv3_ost_tick(void *opaque)
-{
-    PUV3OSTState *s = opaque;
-
-    DPRINTF("ost hit when ptimer counter from 0x%x to 0x%x!\n",
-            s->reg_OSCR, s->reg_OSMR0);
-
-    s->reg_OSCR = s->reg_OSMR0;
-    if (s->reg_OIER) {
-        s->reg_OSSR = 1;
-        qemu_irq_raise(s->irq);
-    }
-}
-
-static void puv3_ost_realize(DeviceState *dev, Error **errp)
-{
-    PUV3OSTState *s = PUV3_OST(dev);
-    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
-
-    s->reg_OIER = 0;
-    s->reg_OSSR = 0;
-    s->reg_OSMR0 = 0;
-    s->reg_OSCR = 0;
-
-    sysbus_init_irq(sbd, &s->irq);
-
-    s->ptimer = ptimer_init(puv3_ost_tick, s, PTIMER_POLICY_DEFAULT);
-    ptimer_transaction_begin(s->ptimer);
-    ptimer_set_freq(s->ptimer, 50 * 1000 * 1000);
-    ptimer_transaction_commit(s->ptimer);
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &puv3_ost_ops, s, "puv3_ost",
-            PUV3_REGS_OFFSET);
-    sysbus_init_mmio(sbd, &s->iomem);
-}
-
-static void puv3_ost_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->realize = puv3_ost_realize;
-}
-
-static const TypeInfo puv3_ost_info = {
-    .name = TYPE_PUV3_OST,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(PUV3OSTState),
-    .class_init = puv3_ost_class_init,
-};
-
-static void puv3_ost_register_type(void)
-{
-    type_register_static(&puv3_ost_info);
-}
-
-type_init(puv3_ost_register_type)
diff --git a/hw/timer/sh_timer.c b/hw/timer/sh_timer.c
index 58af1a1edbd..c72c327bfaf 100644
--- a/hw/timer/sh_timer.c
+++ b/hw/timer/sh_timer.c
@@ -10,13 +10,12 @@
 
 #include "qemu/osdep.h"
 #include "exec/memory.h"
-#include "hw/hw.h"
+#include "qemu/log.h"
 #include "hw/irq.h"
 #include "hw/sh4/sh.h"
 #include "hw/timer/tmu012.h"
 #include "hw/ptimer.h"
-
-//#define DEBUG_TIMER
+#include "trace.h"
 
 #define TIMER_TCR_TPSC          (7 << 0)
 #define TIMER_TCR_CKEG          (3 << 3)
@@ -46,24 +45,24 @@ typedef struct {
     int feat;
     int enabled;
     qemu_irq irq;
-} sh_timer_state;
+} SHTimerState;
 
 /* Check all active timers, and schedule the next timer interrupt. */
 
-static void sh_timer_update(sh_timer_state *s)
+static void sh_timer_update(SHTimerState *s)
 {
     int new_level = s->int_level && (s->tcr & TIMER_TCR_UNIE);
 
-    if (new_level != s->old_level)
-      qemu_set_irq (s->irq, new_level);
-
+    if (new_level != s->old_level) {
+        qemu_set_irq(s->irq, new_level);
+    }
     s->old_level = s->int_level;
     s->int_level = new_level;
 }
 
 static uint32_t sh_timer_read(void *opaque, hwaddr offset)
 {
-    sh_timer_state *s = (sh_timer_state *)opaque;
+    SHTimerState *s = opaque;
 
     switch (offset >> 2) {
     case OFFSET_TCOR:
@@ -73,19 +72,18 @@ static uint32_t sh_timer_read(void *opaque, hwaddr offset)
     case OFFSET_TCR:
         return s->tcr | (s->int_level ? TIMER_TCR_UNF : 0);
     case OFFSET_TCPR:
-        if (s->feat & TIMER_FEAT_CAPT)
+        if (s->feat & TIMER_FEAT_CAPT) {
             return s->tcpr;
-        /* fall through */
-    default:
-        hw_error("sh_timer_read: Bad offset %x\n", (int)offset);
-        return 0;
+        }
     }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n",
+                  __func__, offset);
+    return 0;
 }
 
-static void sh_timer_write(void *opaque, hwaddr offset,
-                            uint32_t value)
+static void sh_timer_write(void *opaque, hwaddr offset, uint32_t value)
 {
-    sh_timer_state *s = (sh_timer_state *)opaque;
+    SHTimerState *s = opaque;
     int freq;
 
     switch (offset >> 2) {
@@ -104,19 +102,30 @@ static void sh_timer_write(void *opaque, hwaddr offset,
     case OFFSET_TCR:
         ptimer_transaction_begin(s->timer);
         if (s->enabled) {
-            /* Pause the timer if it is running.  This may cause some
-               inaccuracy dure to rounding, but avoids a whole lot of other
-               messyness.  */
+            /*
+             * Pause the timer if it is running. This may cause some inaccuracy
+             * due to rounding, but avoids a whole lot of other messiness
+             */
             ptimer_stop(s->timer);
         }
         freq = s->freq;
         /* ??? Need to recalculate expiry time after changing divisor.  */
         switch (value & TIMER_TCR_TPSC) {
-        case 0: freq >>= 2; break;
-        case 1: freq >>= 4; break;
-        case 2: freq >>= 6; break;
-        case 3: freq >>= 8; break;
-        case 4: freq >>= 10; break;
+        case 0:
+            freq >>= 2;
+            break;
+        case 1:
+            freq >>= 4;
+            break;
+        case 2:
+            freq >>= 6;
+            break;
+        case 3:
+            freq >>= 8;
+            break;
+        case 4:
+            freq >>= 10;
+            break;
         case 6:
         case 7:
             if (s->feat & TIMER_FEAT_EXTCLK) {
@@ -124,7 +133,8 @@ static void sh_timer_write(void *opaque, hwaddr offset,
             }
             /* fallthrough */
         default:
-            hw_error("sh_timer_write: Reserved TPSC value\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Reserved TPSC value\n", __func__);
         }
         switch ((value & TIMER_TCR_CKEG) >> 3) {
         case 0:
@@ -137,7 +147,8 @@ static void sh_timer_write(void *opaque, hwaddr offset,
             }
             /* fallthrough */
         default:
-            hw_error("sh_timer_write: Reserved CKEG value\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Reserved CKEG value\n", __func__);
         }
         switch ((value & TIMER_TCR_ICPE) >> 6) {
         case 0:
@@ -149,7 +160,8 @@ static void sh_timer_write(void *opaque, hwaddr offset,
             }
             /* fallthrough */
         default:
-            hw_error("sh_timer_write: Reserved ICPE value\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Reserved ICPE value\n", __func__);
         }
         if ((value & TIMER_TCR_UNF) == 0) {
             s->int_level = 0;
@@ -158,13 +170,15 @@ static void sh_timer_write(void *opaque, hwaddr offset,
         value &= ~TIMER_TCR_UNF;
 
         if ((value & TIMER_TCR_ICPF) && (!(s->feat & TIMER_FEAT_CAPT))) {
-            hw_error("sh_timer_write: Reserved ICPF value\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Reserved ICPF value\n", __func__);
         }
 
         value &= ~TIMER_TCR_ICPF; /* capture not supported */
 
         if (value & TIMER_TCR_RESERVED) {
-            hw_error("sh_timer_write: Reserved TCR bits set\n");
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Reserved TCR bits set\n", __func__);
         }
         s->tcr = value;
         ptimer_set_limit(s->timer, s->tcor, 0);
@@ -182,19 +196,17 @@ static void sh_timer_write(void *opaque, hwaddr offset,
         }
         /* fallthrough */
     default:
-        hw_error("sh_timer_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset);
     }
     sh_timer_update(s);
 }
 
 static void sh_timer_start_stop(void *opaque, int enable)
 {
-    sh_timer_state *s = (sh_timer_state *)opaque;
-
-#ifdef DEBUG_TIMER
-    printf("sh_timer_start_stop %d (%d)\n", enable, s->enabled);
-#endif
+    SHTimerState *s = opaque;
 
+    trace_sh_timer_start_stop(enable, s->enabled);
     ptimer_transaction_begin(s->timer);
     if (s->enabled && !enable) {
         ptimer_stop(s->timer);
@@ -204,24 +216,20 @@ static void sh_timer_start_stop(void *opaque, int enable)
     }
     ptimer_transaction_commit(s->timer);
     s->enabled = !!enable;
-
-#ifdef DEBUG_TIMER
-    printf("sh_timer_start_stop done %d\n", s->enabled);
-#endif
 }
 
 static void sh_timer_tick(void *opaque)
 {
-    sh_timer_state *s = (sh_timer_state *)opaque;
+    SHTimerState *s = opaque;
     s->int_level = s->enabled;
     sh_timer_update(s);
 }
 
 static void *sh_timer_init(uint32_t freq, int feat, qemu_irq irq)
 {
-    sh_timer_state *s;
+    SHTimerState *s;
 
-    s = (sh_timer_state *)g_malloc0(sizeof(sh_timer_state));
+    s = g_malloc0(sizeof(*s));
     s->freq = freq;
     s->feat = feat;
     s->tcor = 0xffffffff;
@@ -252,50 +260,49 @@ typedef struct {
     int feat;
 } tmu012_state;
 
-static uint64_t tmu012_read(void *opaque, hwaddr offset,
-                            unsigned size)
+static uint64_t tmu012_read(void *opaque, hwaddr offset, unsigned size)
 {
-    tmu012_state *s = (tmu012_state *)opaque;
-
-#ifdef DEBUG_TIMER
-    printf("tmu012_read 0x%lx\n", (unsigned long) offset);
-#endif
+    tmu012_state *s = opaque;
 
+    trace_sh_timer_read(offset);
     if (offset >= 0x20) {
         if (!(s->feat & TMU012_FEAT_3CHAN)) {
-            hw_error("tmu012_write: Bad channel offset %x\n", (int)offset);
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Bad channel offset 0x%" HWADDR_PRIx "\n",
+                          __func__, offset);
         }
         return sh_timer_read(s->timer[2], offset - 0x20);
     }
 
-    if (offset >= 0x14)
+    if (offset >= 0x14) {
         return sh_timer_read(s->timer[1], offset - 0x14);
-
-    if (offset >= 0x08)
+    }
+    if (offset >= 0x08) {
         return sh_timer_read(s->timer[0], offset - 0x08);
-
-    if (offset == 4)
+    }
+    if (offset == 4) {
         return s->tstr;
-
-    if ((s->feat & TMU012_FEAT_TOCR) && offset == 0)
+    }
+    if ((s->feat & TMU012_FEAT_TOCR) && offset == 0) {
         return s->tocr;
+    }
 
-    hw_error("tmu012_write: Bad offset %x\n", (int)offset);
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset);
     return 0;
 }
 
 static void tmu012_write(void *opaque, hwaddr offset,
                         uint64_t value, unsigned size)
 {
-    tmu012_state *s = (tmu012_state *)opaque;
-
-#ifdef DEBUG_TIMER
-    printf("tmu012_write 0x%lx 0x%08x\n", (unsigned long) offset, value);
-#endif
+    tmu012_state *s = opaque;
 
+    trace_sh_timer_write(offset, value);
     if (offset >= 0x20) {
         if (!(s->feat & TMU012_FEAT_3CHAN)) {
-            hw_error("tmu012_write: Bad channel offset %x\n", (int)offset);
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: Bad channel offset 0x%" HWADDR_PRIx "\n",
+                          __func__, offset);
         }
         sh_timer_write(s->timer[2], offset - 0x20, value);
         return;
@@ -318,7 +325,7 @@ static void tmu012_write(void *opaque, hwaddr offset,
             sh_timer_start_stop(s->timer[2], value & (1 << 2));
         } else {
             if (value & (1 << 2)) {
-                hw_error("tmu012_write: Bad channel\n");
+                qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad channel\n", __func__);
             }
         }
 
@@ -337,15 +344,14 @@ static const MemoryRegionOps tmu012_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-void tmu012_init(MemoryRegion *sysmem, hwaddr base,
-                 int feat, uint32_t freq,
+void tmu012_init(MemoryRegion *sysmem, hwaddr base, int feat, uint32_t freq,
                  qemu_irq ch0_irq, qemu_irq ch1_irq,
                  qemu_irq ch2_irq0, qemu_irq ch2_irq1)
 {
     tmu012_state *s;
     int timer_feat = (feat & TMU012_FEAT_EXTCLK) ? TIMER_FEAT_EXTCLK : 0;
 
-    s = (tmu012_state *)g_malloc0(sizeof(tmu012_state));
+    s = g_malloc0(sizeof(*s));
     s->feat = feat;
     s->timer[0] = sh_timer_init(freq, timer_feat, ch0_irq);
     s->timer[1] = sh_timer_init(freq, timer_feat, ch1_irq);
@@ -354,15 +360,14 @@ void tmu012_init(MemoryRegion *sysmem, hwaddr base,
                                     ch2_irq0); /* ch2_irq1 not supported */
     }
 
-    memory_region_init_io(&s->iomem, NULL, &tmu012_ops, s,
-                          "timer", 0x100000000ULL);
+    memory_region_init_io(&s->iomem, NULL, &tmu012_ops, s, "timer", 0x30);
 
     memory_region_init_alias(&s->iomem_p4, NULL, "timer-p4",
-                             &s->iomem, 0, 0x1000);
+                             &s->iomem, 0, memory_region_size(&s->iomem));
     memory_region_add_subregion(sysmem, P4ADDR(base), &s->iomem_p4);
 
     memory_region_init_alias(&s->iomem_a7, NULL, "timer-a7",
-                             &s->iomem, 0, 0x1000);
+                             &s->iomem, 0, memory_region_size(&s->iomem));
     memory_region_add_subregion(sysmem, A7ADDR(base), &s->iomem_a7);
     /* ??? Save/restore.  */
 }
diff --git a/hw/timer/sifive_pwm.c b/hw/timer/sifive_pwm.c
new file mode 100644
index 00000000000..c664480ccf5
--- /dev/null
+++ b/hw/timer/sifive_pwm.c
@@ -0,0 +1,468 @@
+/*
+ * SiFive PWM
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Author:  Alistair Francis <alistair.francis@wdc.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "hw/irq.h"
+#include "hw/timer/sifive_pwm.h"
+#include "hw/qdev-properties.h"
+#include "hw/registerfields.h"
+#include "migration/vmstate.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+
+#define HAS_PWM_EN_BITS(cfg) ((cfg & R_CONFIG_ENONESHOT_MASK) || \
+                              (cfg & R_CONFIG_ENALWAYS_MASK))
+
+#define PWMCMP_MASK 0xFFFF
+#define PWMCOUNT_MASK 0x7FFFFFFF
+
+REG32(CONFIG,                   0x00)
+    FIELD(CONFIG, SCALE,            0, 4)
+    FIELD(CONFIG, STICKY,           8, 1)
+    FIELD(CONFIG, ZEROCMP,          9, 1)
+    FIELD(CONFIG, DEGLITCH,         10, 1)
+    FIELD(CONFIG, ENALWAYS,         12, 1)
+    FIELD(CONFIG, ENONESHOT,        13, 1)
+    FIELD(CONFIG, CMP0CENTER,       16, 1)
+    FIELD(CONFIG, CMP1CENTER,       17, 1)
+    FIELD(CONFIG, CMP2CENTER,       18, 1)
+    FIELD(CONFIG, CMP3CENTER,       19, 1)
+    FIELD(CONFIG, CMP0GANG,         24, 1)
+    FIELD(CONFIG, CMP1GANG,         25, 1)
+    FIELD(CONFIG, CMP2GANG,         26, 1)
+    FIELD(CONFIG, CMP3GANG,         27, 1)
+    FIELD(CONFIG, CMP0IP,           28, 1)
+    FIELD(CONFIG, CMP1IP,           29, 1)
+    FIELD(CONFIG, CMP2IP,           30, 1)
+    FIELD(CONFIG, CMP3IP,           31, 1)
+REG32(COUNT,                    0x08)
+REG32(PWMS,                     0x10)
+REG32(PWMCMP0,                  0x20)
+REG32(PWMCMP1,                  0x24)
+REG32(PWMCMP2,                  0x28)
+REG32(PWMCMP3,                  0x2C)
+
+static inline uint64_t sifive_pwm_ns_to_ticks(SiFivePwmState *s,
+                                                uint64_t time)
+{
+    return muldiv64(time, s->freq_hz, NANOSECONDS_PER_SECOND);
+}
+
+static inline uint64_t sifive_pwm_ticks_to_ns(SiFivePwmState *s,
+                                                uint64_t ticks)
+{
+    return muldiv64(ticks, NANOSECONDS_PER_SECOND, s->freq_hz);
+}
+
+static inline uint64_t sifive_pwm_compute_scale(SiFivePwmState *s)
+{
+    return s->pwmcfg & R_CONFIG_SCALE_MASK;
+}
+
+static void sifive_pwm_set_alarms(SiFivePwmState *s)
+{
+    uint64_t now_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    if (HAS_PWM_EN_BITS(s->pwmcfg)) {
+        /*
+         * Subtract ticks from number of ticks when the timer was zero
+         * and mask to the register width.
+         */
+        uint64_t pwmcount = (sifive_pwm_ns_to_ticks(s, now_ns) -
+                             s->tick_offset) & PWMCOUNT_MASK;
+        uint64_t scale = sifive_pwm_compute_scale(s);
+        /* PWMs only contains PWMCMP_MASK bits starting at scale */
+        uint64_t pwms = (pwmcount & (PWMCMP_MASK << scale)) >> scale;
+
+        for (int i = 0; i < SIFIVE_PWM_CHANS; i++) {
+            uint64_t pwmcmp = s->pwmcmp[i] & PWMCMP_MASK;
+            uint64_t pwmcmp_ticks = pwmcmp << scale;
+
+            /*
+             * Per circuit diagram and spec, both cases raises corresponding
+             * IP bit one clock cycle after time expires.
+             */
+            if (pwmcmp > pwms) {
+                uint64_t offset = pwmcmp_ticks - pwmcount + 1;
+                uint64_t when_to_fire = now_ns +
+                                          sifive_pwm_ticks_to_ns(s, offset);
+
+                trace_sifive_pwm_set_alarm(when_to_fire, now_ns);
+                timer_mod(&s->timer[i], when_to_fire);
+            } else {
+                /* Schedule interrupt for next cycle */
+                trace_sifive_pwm_set_alarm(now_ns + 1, now_ns);
+                timer_mod(&s->timer[i], now_ns + 1);
+            }
+
+        }
+    } else {
+        /*
+         * If timer incrementing disabled, just do pwms > pwmcmp check since
+         * a write may have happened to PWMs.
+         */
+        uint64_t pwmcount = (s->tick_offset) & PWMCOUNT_MASK;
+        uint64_t scale = sifive_pwm_compute_scale(s);
+        uint64_t pwms = (pwmcount & (PWMCMP_MASK << scale)) >> scale;
+
+        for (int i = 0; i < SIFIVE_PWM_CHANS; i++) {
+            uint64_t pwmcmp = s->pwmcmp[i] & PWMCMP_MASK;
+
+            if (pwms >= pwmcmp) {
+                trace_sifive_pwm_set_alarm(now_ns + 1, now_ns);
+                timer_mod(&s->timer[i], now_ns + 1);
+            } else {
+                /* Effectively disable timer by scheduling far in future. */
+                trace_sifive_pwm_set_alarm(0xFFFFFFFFFFFFFF, now_ns);
+                timer_mod(&s->timer[i], 0xFFFFFFFFFFFFFF);
+            }
+        }
+    }
+}
+
+static void sifive_pwm_interrupt(SiFivePwmState *s, int num)
+{
+    uint64_t now = sifive_pwm_ns_to_ticks(s,
+                                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+    bool was_incrementing = HAS_PWM_EN_BITS(s->pwmcfg);
+
+    trace_sifive_pwm_interrupt(num);
+
+    s->pwmcfg |= R_CONFIG_CMP0IP_MASK << num;
+    qemu_irq_raise(s->irqs[num]);
+
+    /*
+     * If the zerocmp is set and pwmcmp0 raised the interrupt
+     * reset the zero ticks.
+     */
+    if ((s->pwmcfg & R_CONFIG_ZEROCMP_MASK) && (num == 0)) {
+        /* If reset signal conditions, disable ENONESHOT. */
+        s->pwmcfg &= ~R_CONFIG_ENONESHOT_MASK;
+
+        if (was_incrementing) {
+            /* If incrementing, time in ticks is when pwmcount is zero */
+            s->tick_offset = now;
+        } else {
+            /* If not incrementing, pwmcount = 0 */
+            s->tick_offset = 0;
+        }
+    }
+
+    /*
+     * If carryout bit set, which we discern via looking for overflow,
+     * also reset ENONESHOT.
+     */
+    if (was_incrementing &&
+        ((now & PWMCOUNT_MASK) < (s->tick_offset & PWMCOUNT_MASK))) {
+        s->pwmcfg &= ~R_CONFIG_ENONESHOT_MASK;
+    }
+
+    /* Schedule or disable interrupts */
+    sifive_pwm_set_alarms(s);
+
+    /* If was enabled, and now not enabled, switch tick rep */
+    if (was_incrementing && !HAS_PWM_EN_BITS(s->pwmcfg)) {
+        s->tick_offset = (now - s->tick_offset) & PWMCOUNT_MASK;
+    }
+}
+
+static void sifive_pwm_interrupt_0(void *opaque)
+{
+    SiFivePwmState *s = opaque;
+
+    sifive_pwm_interrupt(s, 0);
+}
+
+static void sifive_pwm_interrupt_1(void *opaque)
+{
+    SiFivePwmState *s = opaque;
+
+    sifive_pwm_interrupt(s, 1);
+}
+
+static void sifive_pwm_interrupt_2(void *opaque)
+{
+    SiFivePwmState *s = opaque;
+
+    sifive_pwm_interrupt(s, 2);
+}
+
+static void sifive_pwm_interrupt_3(void *opaque)
+{
+    SiFivePwmState *s = opaque;
+
+    sifive_pwm_interrupt(s, 3);
+}
+
+static uint64_t sifive_pwm_read(void *opaque, hwaddr addr,
+                                  unsigned int size)
+{
+    SiFivePwmState *s = opaque;
+    uint64_t cur_time, scale;
+    uint64_t now = sifive_pwm_ns_to_ticks(s,
+                                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+
+    trace_sifive_pwm_read(addr);
+
+    switch (addr) {
+    case A_CONFIG:
+        return s->pwmcfg;
+    case A_COUNT:
+        cur_time = s->tick_offset;
+
+        if (HAS_PWM_EN_BITS(s->pwmcfg)) {
+            cur_time = now - cur_time;
+        }
+
+        /*
+         * Return the value in the counter with bit 31 always 0
+         * This is allowed to wrap around so we don't need to check that.
+         */
+        return cur_time & PWMCOUNT_MASK;
+    case A_PWMS:
+        cur_time = s->tick_offset;
+        scale = sifive_pwm_compute_scale(s);
+
+        if (HAS_PWM_EN_BITS(s->pwmcfg)) {
+            cur_time = now - cur_time;
+        }
+
+        return ((cur_time & PWMCOUNT_MASK) >> scale) & PWMCMP_MASK;
+    case A_PWMCMP0:
+        return s->pwmcmp[0] & PWMCMP_MASK;
+    case A_PWMCMP1:
+        return s->pwmcmp[1] & PWMCMP_MASK;
+    case A_PWMCMP2:
+        return s->pwmcmp[2] & PWMCMP_MASK;
+    case A_PWMCMP3:
+        return s->pwmcmp[3] & PWMCMP_MASK;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+        return 0;
+    }
+
+    return 0;
+}
+
+static void sifive_pwm_write(void *opaque, hwaddr addr,
+                               uint64_t val64, unsigned int size)
+{
+    SiFivePwmState *s = opaque;
+    uint32_t value = val64;
+    uint64_t new_offset, scale;
+    uint64_t now = sifive_pwm_ns_to_ticks(s,
+                                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+
+    trace_sifive_pwm_write(value, addr);
+
+    switch (addr) {
+    case A_CONFIG:
+        if (value & (R_CONFIG_CMP0CENTER_MASK | R_CONFIG_CMP1CENTER_MASK |
+                     R_CONFIG_CMP2CENTER_MASK | R_CONFIG_CMP3CENTER_MASK)) {
+            qemu_log_mask(LOG_UNIMP, "%s: CMPxCENTER is not supported\n",
+                          __func__);
+        }
+
+        if (value & (R_CONFIG_CMP0GANG_MASK | R_CONFIG_CMP1GANG_MASK |
+                     R_CONFIG_CMP2GANG_MASK | R_CONFIG_CMP3GANG_MASK)) {
+            qemu_log_mask(LOG_UNIMP, "%s: CMPxGANG is not supported\n",
+                          __func__);
+        }
+
+        if (value & (R_CONFIG_CMP0IP_MASK | R_CONFIG_CMP1IP_MASK |
+                     R_CONFIG_CMP2IP_MASK | R_CONFIG_CMP3IP_MASK)) {
+            qemu_log_mask(LOG_UNIMP, "%s: CMPxIP is not supported\n",
+                          __func__);
+        }
+
+        if (!(value & R_CONFIG_CMP0IP_MASK)) {
+            qemu_irq_lower(s->irqs[0]);
+        }
+
+        if (!(value & R_CONFIG_CMP1IP_MASK)) {
+            qemu_irq_lower(s->irqs[1]);
+        }
+
+        if (!(value & R_CONFIG_CMP2IP_MASK)) {
+            qemu_irq_lower(s->irqs[2]);
+        }
+
+        if (!(value & R_CONFIG_CMP3IP_MASK)) {
+            qemu_irq_lower(s->irqs[3]);
+        }
+
+        /*
+         * If this write enables the timer increment
+         * set the time when pwmcount was zero to be cur_time - pwmcount.
+         * If this write disables the timer increment
+         * convert back from pwmcount to the time in ticks
+         * when pwmcount was zero.
+         */
+        if ((!HAS_PWM_EN_BITS(s->pwmcfg) && HAS_PWM_EN_BITS(value)) ||
+            (HAS_PWM_EN_BITS(s->pwmcfg) && !HAS_PWM_EN_BITS(value))) {
+            s->tick_offset = (now - s->tick_offset) & PWMCOUNT_MASK;
+        }
+
+        s->pwmcfg = value;
+        break;
+    case A_COUNT:
+        /* The guest changed the counter, updated the offset value. */
+        new_offset = value;
+
+        if (HAS_PWM_EN_BITS(s->pwmcfg)) {
+            new_offset = now - new_offset;
+        }
+
+        s->tick_offset = new_offset;
+        break;
+    case A_PWMS:
+        scale = sifive_pwm_compute_scale(s);
+        new_offset = (((value & PWMCMP_MASK) << scale) & PWMCOUNT_MASK);
+
+        if (HAS_PWM_EN_BITS(s->pwmcfg)) {
+            new_offset = now - new_offset;
+        }
+
+        s->tick_offset = new_offset;
+        break;
+    case A_PWMCMP0:
+        s->pwmcmp[0] = value & PWMCMP_MASK;
+        break;
+    case A_PWMCMP1:
+        s->pwmcmp[1] = value & PWMCMP_MASK;
+        break;
+    case A_PWMCMP2:
+        s->pwmcmp[2] = value & PWMCMP_MASK;
+        break;
+    case A_PWMCMP3:
+        s->pwmcmp[3] = value & PWMCMP_MASK;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+    }
+
+    /* Update the alarms to reflect possible updated values */
+    sifive_pwm_set_alarms(s);
+}
+
+static void sifive_pwm_reset(DeviceState *dev)
+{
+    SiFivePwmState *s = SIFIVE_PWM(dev);
+    uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    s->pwmcfg = 0x00000000;
+    s->pwmcmp[0] = 0x00000000;
+    s->pwmcmp[1] = 0x00000000;
+    s->pwmcmp[2] = 0x00000000;
+    s->pwmcmp[3] = 0x00000000;
+
+    s->tick_offset = sifive_pwm_ns_to_ticks(s, now);
+}
+
+static const MemoryRegionOps sifive_pwm_ops = {
+    .read = sifive_pwm_read,
+    .write = sifive_pwm_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_sifive_pwm = {
+    .name = TYPE_SIFIVE_PWM,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_ARRAY(timer, SiFivePwmState, 4),
+        VMSTATE_UINT64(tick_offset, SiFivePwmState),
+        VMSTATE_UINT32(pwmcfg, SiFivePwmState),
+        VMSTATE_UINT32_ARRAY(pwmcmp, SiFivePwmState, 4),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property sifive_pwm_properties[] = {
+    /* 0.5Ghz per spec after FSBL */
+    DEFINE_PROP_UINT64("clock-frequency", struct SiFivePwmState,
+                       freq_hz, 500000000ULL),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sifive_pwm_init(Object *obj)
+{
+    SiFivePwmState *s = SIFIVE_PWM(obj);
+    int i;
+
+    for (i = 0; i < SIFIVE_PWM_IRQS; i++) {
+        sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irqs[i]);
+    }
+
+    memory_region_init_io(&s->mmio, obj, &sifive_pwm_ops, s,
+                          TYPE_SIFIVE_PWM, 0x100);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static void sifive_pwm_realize(DeviceState *dev, Error **errp)
+{
+    SiFivePwmState *s = SIFIVE_PWM(dev);
+
+    timer_init_ns(&s->timer[0], QEMU_CLOCK_VIRTUAL,
+                  sifive_pwm_interrupt_0, s);
+
+    timer_init_ns(&s->timer[1], QEMU_CLOCK_VIRTUAL,
+                  sifive_pwm_interrupt_1, s);
+
+    timer_init_ns(&s->timer[2], QEMU_CLOCK_VIRTUAL,
+                  sifive_pwm_interrupt_2, s);
+
+    timer_init_ns(&s->timer[3], QEMU_CLOCK_VIRTUAL,
+                  sifive_pwm_interrupt_3, s);
+}
+
+static void sifive_pwm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = sifive_pwm_reset;
+    device_class_set_props(dc, sifive_pwm_properties);
+    dc->vmsd = &vmstate_sifive_pwm;
+    dc->realize = sifive_pwm_realize;
+}
+
+static const TypeInfo sifive_pwm_info = {
+    .name          = TYPE_SIFIVE_PWM,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SiFivePwmState),
+    .instance_init = sifive_pwm_init,
+    .class_init    = sifive_pwm_class_init,
+};
+
+static void sifive_pwm_register_types(void)
+{
+    type_register_static(&sifive_pwm_info);
+}
+
+type_init(sifive_pwm_register_types)
diff --git a/hw/timer/sse-counter.c b/hw/timer/sse-counter.c
index 0384051f151..16c0e8ad15d 100644
--- a/hw/timer/sse-counter.c
+++ b/hw/timer/sse-counter.c
@@ -33,7 +33,6 @@
 #include "trace.h"
 #include "hw/timer/sse-counter.h"
 #include "hw/sysbus.h"
-#include "hw/irq.h"
 #include "hw/registerfields.h"
 #include "hw/clock.h"
 #include "hw/qdev-clock.h"
diff --git a/hw/timer/stellaris-gptm.c b/hw/timer/stellaris-gptm.c
new file mode 100644
index 00000000000..fd71c79be48
--- /dev/null
+++ b/hw/timer/stellaris-gptm.c
@@ -0,0 +1,332 @@
+/*
+ * Luminary Micro Stellaris General Purpose Timer Module
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-clock.h"
+#include "hw/timer/stellaris-gptm.h"
+
+static void gptm_update_irq(gptm_state *s)
+{
+    int level;
+    level = (s->state & s->mask) != 0;
+    qemu_set_irq(s->irq, level);
+}
+
+static void gptm_stop(gptm_state *s, int n)
+{
+    timer_del(s->timer[n]);
+}
+
+static void gptm_reload(gptm_state *s, int n, int reset)
+{
+    int64_t tick;
+    if (reset) {
+        tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    } else {
+        tick = s->tick[n];
+    }
+
+    if (s->config == 0) {
+        /* 32-bit CountDown.  */
+        uint32_t count;
+        count = s->load[0] | (s->load[1] << 16);
+        tick += clock_ticks_to_ns(s->clk, count);
+    } else if (s->config == 1) {
+        /* 32-bit RTC.  1Hz tick.  */
+        tick += NANOSECONDS_PER_SECOND;
+    } else if (s->mode[n] == 0xa) {
+        /* PWM mode.  Not implemented.  */
+    } else {
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
+        return;
+    }
+    s->tick[n] = tick;
+    timer_mod(s->timer[n], tick);
+}
+
+static void gptm_tick(void *opaque)
+{
+    gptm_state **p = (gptm_state **)opaque;
+    gptm_state *s;
+    int n;
+
+    s = *p;
+    n = p - s->opaque;
+    if (s->config == 0) {
+        s->state |= 1;
+        if ((s->control & 0x20)) {
+            /* Output trigger.  */
+            qemu_irq_pulse(s->trigger);
+        }
+        if (s->mode[0] & 1) {
+            /* One-shot.  */
+            s->control &= ~1;
+        } else {
+            /* Periodic.  */
+            gptm_reload(s, 0, 0);
+        }
+    } else if (s->config == 1) {
+        /* RTC.  */
+        uint32_t match;
+        s->rtc++;
+        match = s->match[0] | (s->match[1] << 16);
+        if (s->rtc > match)
+            s->rtc = 0;
+        if (s->rtc == 0) {
+            s->state |= 8;
+        }
+        gptm_reload(s, 0, 0);
+    } else if (s->mode[n] == 0xa) {
+        /* PWM mode.  Not implemented.  */
+    } else {
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
+    }
+    gptm_update_irq(s);
+}
+
+static uint64_t gptm_read(void *opaque, hwaddr offset,
+                          unsigned size)
+{
+    gptm_state *s = (gptm_state *)opaque;
+
+    switch (offset) {
+    case 0x00: /* CFG */
+        return s->config;
+    case 0x04: /* TAMR */
+        return s->mode[0];
+    case 0x08: /* TBMR */
+        return s->mode[1];
+    case 0x0c: /* CTL */
+        return s->control;
+    case 0x18: /* IMR */
+        return s->mask;
+    case 0x1c: /* RIS */
+        return s->state;
+    case 0x20: /* MIS */
+        return s->state & s->mask;
+    case 0x24: /* CR */
+        return 0;
+    case 0x28: /* TAILR */
+        return s->load[0] | ((s->config < 4) ? (s->load[1] << 16) : 0);
+    case 0x2c: /* TBILR */
+        return s->load[1];
+    case 0x30: /* TAMARCHR */
+        return s->match[0] | ((s->config < 4) ? (s->match[1] << 16) : 0);
+    case 0x34: /* TBMATCHR */
+        return s->match[1];
+    case 0x38: /* TAPR */
+        return s->prescale[0];
+    case 0x3c: /* TBPR */
+        return s->prescale[1];
+    case 0x40: /* TAPMR */
+        return s->match_prescale[0];
+    case 0x44: /* TBPMR */
+        return s->match_prescale[1];
+    case 0x48: /* TAR */
+        if (s->config == 1) {
+            return s->rtc;
+        }
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: read of TAR but timer read not supported\n");
+        return 0;
+    case 0x4c: /* TBR */
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: read of TBR but timer read not supported\n");
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "GPTM: read at bad offset 0x02%" HWADDR_PRIx "\n",
+                      offset);
+        return 0;
+    }
+}
+
+static void gptm_write(void *opaque, hwaddr offset,
+                       uint64_t value, unsigned size)
+{
+    gptm_state *s = (gptm_state *)opaque;
+    uint32_t oldval;
+
+    /*
+     * The timers should be disabled before changing the configuration.
+     * We take advantage of this and defer everything until the timer
+     * is enabled.
+     */
+    switch (offset) {
+    case 0x00: /* CFG */
+        s->config = value;
+        break;
+    case 0x04: /* TAMR */
+        s->mode[0] = value;
+        break;
+    case 0x08: /* TBMR */
+        s->mode[1] = value;
+        break;
+    case 0x0c: /* CTL */
+        oldval = s->control;
+        s->control = value;
+        /* TODO: Implement pause.  */
+        if ((oldval ^ value) & 1) {
+            if (value & 1) {
+                gptm_reload(s, 0, 1);
+            } else {
+                gptm_stop(s, 0);
+            }
+        }
+        if (((oldval ^ value) & 0x100) && s->config >= 4) {
+            if (value & 0x100) {
+                gptm_reload(s, 1, 1);
+            } else {
+                gptm_stop(s, 1);
+            }
+        }
+        break;
+    case 0x18: /* IMR */
+        s->mask = value & 0x77;
+        gptm_update_irq(s);
+        break;
+    case 0x24: /* CR */
+        s->state &= ~value;
+        break;
+    case 0x28: /* TAILR */
+        s->load[0] = value & 0xffff;
+        if (s->config < 4) {
+            s->load[1] = value >> 16;
+        }
+        break;
+    case 0x2c: /* TBILR */
+        s->load[1] = value & 0xffff;
+        break;
+    case 0x30: /* TAMARCHR */
+        s->match[0] = value & 0xffff;
+        if (s->config < 4) {
+            s->match[1] = value >> 16;
+        }
+        break;
+    case 0x34: /* TBMATCHR */
+        s->match[1] = value >> 16;
+        break;
+    case 0x38: /* TAPR */
+        s->prescale[0] = value;
+        break;
+    case 0x3c: /* TBPR */
+        s->prescale[1] = value;
+        break;
+    case 0x40: /* TAPMR */
+        s->match_prescale[0] = value;
+        break;
+    case 0x44: /* TBPMR */
+        s->match_prescale[0] = value;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "GPTM: write at bad offset 0x02%" HWADDR_PRIx "\n",
+                      offset);
+    }
+    gptm_update_irq(s);
+}
+
+static const MemoryRegionOps gptm_ops = {
+    .read = gptm_read,
+    .write = gptm_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_stellaris_gptm = {
+    .name = "stellaris_gptm",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(config, gptm_state),
+        VMSTATE_UINT32_ARRAY(mode, gptm_state, 2),
+        VMSTATE_UINT32(control, gptm_state),
+        VMSTATE_UINT32(state, gptm_state),
+        VMSTATE_UINT32(mask, gptm_state),
+        VMSTATE_UNUSED(8),
+        VMSTATE_UINT32_ARRAY(load, gptm_state, 2),
+        VMSTATE_UINT32_ARRAY(match, gptm_state, 2),
+        VMSTATE_UINT32_ARRAY(prescale, gptm_state, 2),
+        VMSTATE_UINT32_ARRAY(match_prescale, gptm_state, 2),
+        VMSTATE_UINT32(rtc, gptm_state),
+        VMSTATE_INT64_ARRAY(tick, gptm_state, 2),
+        VMSTATE_TIMER_PTR_ARRAY(timer, gptm_state, 2),
+        VMSTATE_CLOCK(clk, gptm_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void stellaris_gptm_init(Object *obj)
+{
+    DeviceState *dev = DEVICE(obj);
+    gptm_state *s = STELLARIS_GPTM(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    sysbus_init_irq(sbd, &s->irq);
+    qdev_init_gpio_out(dev, &s->trigger, 1);
+
+    memory_region_init_io(&s->iomem, obj, &gptm_ops, s,
+                          "gptm", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+
+    s->opaque[0] = s->opaque[1] = s;
+
+    /*
+     * TODO: in an ideal world we would model the effects of changing
+     * the input clock frequency while the countdown timer is active.
+     * The best way to do this would be to convert the device to use
+     * ptimer instead of hand-rolling its own timer. This would also
+     * make it easy to implement reading the current count from the
+     * TAR and TBR registers.
+     */
+    s->clk = qdev_init_clock_in(dev, "clk", NULL, NULL, 0);
+}
+
+static void stellaris_gptm_realize(DeviceState *dev, Error **errp)
+{
+    gptm_state *s = STELLARIS_GPTM(dev);
+
+    if (!clock_has_source(s->clk)) {
+        error_setg(errp, "stellaris-gptm: clk must be connected");
+        return;
+    }
+
+    s->timer[0] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[0]);
+    s->timer[1] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, &s->opaque[1]);
+}
+
+static void stellaris_gptm_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_stellaris_gptm;
+    dc->realize = stellaris_gptm_realize;
+}
+
+static const TypeInfo stellaris_gptm_info = {
+    .name          = TYPE_STELLARIS_GPTM,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(gptm_state),
+    .instance_init = stellaris_gptm_init,
+    .class_init    = stellaris_gptm_class_init,
+};
+
+static void stellaris_gptm_register_types(void)
+{
+    type_register_static(&stellaris_gptm_info);
+}
+
+type_init(stellaris_gptm_register_types)
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index f8b9db25c27..3eccef83858 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # slavio_timer.c
 slavio_timer_get_out(uint64_t limit, uint32_t counthigh, uint32_t count) "limit 0x%"PRIx64" count 0x%x0x%08x"
@@ -24,23 +24,6 @@ grlib_gptimer_hit(int id) "timer:%d HIT"
 grlib_gptimer_readl(int id, uint64_t addr, uint32_t val) "timer:%d addr 0x%"PRIx64" 0x%x"
 grlib_gptimer_writel(int id, uint64_t addr, uint32_t val) "timer:%d addr 0x%"PRIx64" 0x%x"
 
-# lm32_timer.c
-lm32_timer_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-lm32_timer_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-lm32_timer_hit(void) "timer hit"
-lm32_timer_irq_state(int level) "irq state %d"
-
-# milkymist-sysctl.c
-milkymist_sysctl_memory_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_sysctl_memory_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
-milkymist_sysctl_icap_write(uint32_t value) "value 0x%08x"
-milkymist_sysctl_start_timer0(void) "Start timer0"
-milkymist_sysctl_stop_timer0(void) "Stop timer0"
-milkymist_sysctl_start_timer1(void) "Start timer1"
-milkymist_sysctl_stop_timer1(void) "Stop timer1"
-milkymist_sysctl_pulse_irq_timer0(void) "Pulse IRQ Timer0"
-milkymist_sysctl_pulse_irq_timer1(void) "Pulse IRQ Timer1"
-
 # aspeed_timer.c
 aspeed_timer_ctrl_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
 aspeed_timer_ctrl_external_clock(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
@@ -105,3 +88,14 @@ sse_counter_reset(void) "SSE system counter: reset"
 sse_timer_read(uint64_t offset, uint64_t data, unsigned size) "SSE system timer read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 sse_timer_write(uint64_t offset, uint64_t data, unsigned size) "SSE system timer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 sse_timer_reset(void) "SSE system timer: reset"
+
+# sifive_pwm.c
+sifive_pwm_set_alarm(uint64_t alarm, uint64_t now) "Setting alarm to: 0x%" PRIx64 ", now: 0x%" PRIx64
+sifive_pwm_interrupt(int num) "Interrupt %d"
+sifive_pwm_read(uint64_t offset) "Read at address: 0x%" PRIx64
+sifive_pwm_write(uint64_t data, uint64_t offset) "Write 0x%" PRIx64 " at address: 0x%" PRIx64
+
+# sh_timer.c
+sh_timer_start_stop(int enable, int current) "%d (%d)"
+sh_timer_read(uint64_t offset) "tmu012_read 0x%" PRIx64
+sh_timer_write(uint64_t offset, uint64_t value) "tmu012_write 0x%" PRIx64 " 0x%08" PRIx64
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index aa9c00aad3a..58ebd1469c3 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -18,7 +18,6 @@
 
 #include "qemu/module.h"
 #include "qapi/error.h"
-#include "exec/address-spaces.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci_ids.h"
 #include "hw/acpi/tpm.h"
diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
index 72d7a3d9260..274e9aa4b01 100644
--- a/hw/tpm/tpm_ppi.c
+++ b/hw/tpm/tpm_ppi.c
@@ -23,18 +23,21 @@
 
 void tpm_ppi_reset(TPMPPI *tpmppi)
 {
-    if (tpmppi->buf[0x15a /* movv, docs/specs/tpm.txt */] & 0x1) {
+    if (tpmppi->buf[0x15a /* movv, docs/specs/tpm.rst */] & 0x1) {
         GuestPhysBlockList guest_phys_blocks;
         GuestPhysBlock *block;
 
         guest_phys_blocks_init(&guest_phys_blocks);
         guest_phys_blocks_append(&guest_phys_blocks);
         QTAILQ_FOREACH(block, &guest_phys_blocks.head, next) {
+            hwaddr mr_offs = block->host_addr -
+                             (uint8_t *)memory_region_get_ram_ptr(block->mr);
+
             trace_tpm_ppi_memset(block->host_addr,
                                  block->target_end - block->target_start);
             memset(block->host_addr, 0,
                    block->target_end - block->target_start);
-            memory_region_set_dirty(block->mr, 0,
+            memory_region_set_dirty(block->mr, mr_offs,
                                     block->target_end - block->target_start);
         }
         guest_phys_blocks_free(&guest_phys_blocks);
diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
index 6005ecb5dae..f17110458e6 100644
--- a/hw/tpm/trace-events
+++ b/hw/tpm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # tpm_crb.c
 tpm_crb_mmio_read(uint64_t addr, unsigned size, uint32_t val) "CRB read 0x%016" PRIx64 " len:%u val: 0x%" PRIx32
diff --git a/hw/tricore/Kconfig b/hw/tricore/Kconfig
index 506e6183c17..33c1e852c33 100644
--- a/hw/tricore/Kconfig
+++ b/hw/tricore/Kconfig
@@ -1,9 +1,8 @@
-config TRICORE
+config TRICORE_TESTBOARD
     bool
 
 config TRIBOARD
     bool
-    select TRICORE
     select TC27X_SOC
 
 config TC27X_SOC
diff --git a/hw/tricore/meson.build b/hw/tricore/meson.build
index 77ff6fd1371..7e3585daf8f 100644
--- a/hw/tricore/meson.build
+++ b/hw/tricore/meson.build
@@ -1,5 +1,6 @@
 tricore_ss = ss.source_set()
-tricore_ss.add(when: 'CONFIG_TRICORE', if_true: files('tricore_testboard.c'))
+tricore_ss.add(when: 'CONFIG_TRICORE_TESTBOARD', if_true: files('tricore_testboard.c'))
+tricore_ss.add(when: 'CONFIG_TRICORE_TESTBOARD', if_true: files('tricore_testdevice.c'))
 tricore_ss.add(when: 'CONFIG_TRIBOARD', if_true: files('triboard.c'))
 tricore_ss.add(when: 'CONFIG_TC27X_SOC', if_true: files('tc27x_soc.c'))
 
diff --git a/hw/tricore/tc27x_soc.c b/hw/tricore/tc27x_soc.c
index 8af079e6b25..ecd92717b50 100644
--- a/hw/tricore/tc27x_soc.c
+++ b/hw/tricore/tc27x_soc.c
@@ -21,13 +21,9 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/sysbus.h"
-#include "hw/boards.h"
 #include "hw/loader.h"
 #include "qemu/units.h"
 #include "hw/misc/unimp.h"
-#include "exec/address-spaces.h"
-#include "qemu/log.h"
-#include "cpu.h"
 
 #include "hw/tricore/tc27x_soc.h"
 #include "hw/tricore/triboard.h"
diff --git a/hw/tricore/triboard.c b/hw/tricore/triboard.c
index 16e2fd7e27e..4dba0259cd3 100644
--- a/hw/tricore/triboard.c
+++ b/hw/tricore/triboard.c
@@ -22,11 +22,8 @@
 #include "qemu/units.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
-#include "cpu.h"
 #include "net/net.h"
-#include "hw/boards.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "elf.h"
 #include "hw/tricore/tricore.h"
 #include "qemu/error-report.h"
diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 12ea1490fde..b6810e3be05 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c
@@ -25,9 +25,9 @@
 #include "net/net.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
-#include "exec/address-spaces.h"
 #include "elf.h"
 #include "hw/tricore/tricore.h"
+#include "hw/tricore/tricore_testdevice.h"
 #include "qemu/error-report.h"
 
 
@@ -57,6 +57,7 @@ static void tricore_testboard_init(MachineState *machine, int board_id)
 {
     TriCoreCPU *cpu;
     CPUTriCoreState *env;
+    TriCoreTestDeviceState *test_dev;
 
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ext_cram = g_new(MemoryRegion, 1);
@@ -88,6 +89,12 @@ static void tricore_testboard_init(MachineState *machine, int board_id)
     memory_region_add_subregion(sysmem, 0xf0050000, pcp_data);
     memory_region_add_subregion(sysmem, 0xf0060000, pcp_text);
 
+    test_dev = g_new(TriCoreTestDeviceState, 1);
+    object_initialize(test_dev, sizeof(TriCoreTestDeviceState),
+                      TYPE_TRICORE_TESTDEVICE);
+    memory_region_add_subregion(sysmem, 0xf0000000, &test_dev->iomem);
+
+
     tricoretb_binfo.ram_size = machine->ram_size;
     tricoretb_binfo.kernel_filename = machine->kernel_filename;
 
diff --git a/hw/tricore/tricore_testdevice.c b/hw/tricore/tricore_testdevice.c
new file mode 100644
index 00000000000..a1563aa5689
--- /dev/null
+++ b/hw/tricore/tricore_testdevice.c
@@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2018-2021 Bastian Koppelmann Paderborn University
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/tricore/tricore_testdevice.h"
+
+static void tricore_testdevice_write(void *opaque, hwaddr offset,
+                                      uint64_t value, unsigned size)
+{
+    exit(value);
+}
+
+static uint64_t tricore_testdevice_read(void *opaque, hwaddr offset,
+                                         unsigned size)
+{
+    return 0xdeadbeef;
+}
+
+static void tricore_testdevice_reset(DeviceState *dev)
+{
+}
+
+static const MemoryRegionOps tricore_testdevice_ops = {
+    .read = tricore_testdevice_read,
+    .write = tricore_testdevice_write,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void tricore_testdevice_init(Object *obj)
+{
+    TriCoreTestDeviceState *s = TRICORE_TESTDEVICE(obj);
+   /* map memory */
+    memory_region_init_io(&s->iomem, OBJECT(s), &tricore_testdevice_ops, s,
+                          "tricore_testdevice", 0x4);
+}
+
+static Property tricore_testdevice_properties[] = {
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void tricore_testdevice_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, tricore_testdevice_properties);
+    dc->reset = tricore_testdevice_reset;
+}
+
+static const TypeInfo tricore_testdevice_info = {
+    .name          = TYPE_TRICORE_TESTDEVICE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(TriCoreTestDeviceState),
+    .instance_init = tricore_testdevice_init,
+    .class_init    = tricore_testdevice_class_init,
+};
+
+static void tricore_testdevice_register_types(void)
+{
+    type_register_static(&tricore_testdevice_info);
+}
+
+type_init(tricore_testdevice_register_types)
diff --git a/hw/unicore32/Kconfig b/hw/unicore32/Kconfig
deleted file mode 100644
index 4443a29dd29..00000000000
--- a/hw/unicore32/Kconfig
+++ /dev/null
@@ -1,5 +0,0 @@
-config PUV3
-    bool
-    select ISA_BUS
-    select PCKBD
-    select PTIMER
diff --git a/hw/unicore32/meson.build b/hw/unicore32/meson.build
deleted file mode 100644
index fc26d6bcabe..00000000000
--- a/hw/unicore32/meson.build
+++ /dev/null
@@ -1,5 +0,0 @@
-unicore32_ss = ss.source_set()
-# PKUnity-v3 SoC and board information
-unicore32_ss.add(when: 'CONFIG_PUV3', if_true: files('puv3.c'))
-
-hw_arch += {'unicore32': unicore32_ss}
diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c
deleted file mode 100644
index eacacb4249b..00000000000
--- a/hw/unicore32/puv3.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Generic PKUnity SoC machine and board descriptor
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "cpu.h"
-#include "ui/console.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "sysemu/qtest.h"
-#include "hw/unicore32/puv3.h"
-#include "hw/input/i8042.h"
-#include "hw/irq.h"
-
-#define KERNEL_LOAD_ADDR        0x03000000
-#define KERNEL_MAX_SIZE         0x00800000 /* Just a guess */
-
-/* PKUnity System bus (AHB): 0xc0000000 - 0xedffffff (640MB) */
-#define PUV3_DMA_BASE           (0xc0200000) /* AHB-4 */
-
-/* PKUnity Peripheral bus (APB): 0xee000000 - 0xefffffff (128MB) */
-#define PUV3_GPIO_BASE          (0xee500000) /* APB-5 */
-#define PUV3_INTC_BASE          (0xee600000) /* APB-6 */
-#define PUV3_OST_BASE           (0xee800000) /* APB-8 */
-#define PUV3_PM_BASE            (0xeea00000) /* APB-10 */
-#define PUV3_PS2_BASE           (0xeeb00000) /* APB-11 */
-
-static void puv3_intc_cpu_handler(void *opaque, int irq, int level)
-{
-    UniCore32CPU *cpu = opaque;
-    CPUState *cs = CPU(cpu);
-
-    assert(irq == 0);
-    if (level) {
-        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-    } else {
-        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-    }
-}
-
-static void puv3_soc_init(CPUUniCore32State *env)
-{
-    qemu_irq cpu_intc, irqs[PUV3_IRQS_NR];
-    DeviceState *dev;
-    MemoryRegion *i8042 = g_new(MemoryRegion, 1);
-    int i;
-
-    /* Initialize interrupt controller */
-    cpu_intc = qemu_allocate_irq(puv3_intc_cpu_handler,
-                                 env_archcpu(env), 0);
-    dev = sysbus_create_simple("puv3_intc", PUV3_INTC_BASE, cpu_intc);
-    for (i = 0; i < PUV3_IRQS_NR; i++) {
-        irqs[i] = qdev_get_gpio_in(dev, i);
-    }
-
-    /* Initialize minimal necessary devices for kernel booting */
-    sysbus_create_simple("puv3_pm", PUV3_PM_BASE, NULL);
-    sysbus_create_simple("puv3_dma", PUV3_DMA_BASE, NULL);
-    sysbus_create_simple("puv3_ost", PUV3_OST_BASE, irqs[PUV3_IRQS_OST0]);
-    sysbus_create_varargs("puv3_gpio", PUV3_GPIO_BASE,
-            irqs[PUV3_IRQS_GPIOLOW0], irqs[PUV3_IRQS_GPIOLOW1],
-            irqs[PUV3_IRQS_GPIOLOW2], irqs[PUV3_IRQS_GPIOLOW3],
-            irqs[PUV3_IRQS_GPIOLOW4], irqs[PUV3_IRQS_GPIOLOW5],
-            irqs[PUV3_IRQS_GPIOLOW6], irqs[PUV3_IRQS_GPIOLOW7],
-            irqs[PUV3_IRQS_GPIOHIGH], NULL);
-
-    /* Keyboard (i8042), mouse disabled for nographic */
-    i8042_mm_init(irqs[PUV3_IRQS_PS2_KBD], NULL, i8042, PUV3_REGS_OFFSET, 4);
-    memory_region_add_subregion(get_system_memory(), PUV3_PS2_BASE, i8042);
-}
-
-static void puv3_board_init(CPUUniCore32State *env, ram_addr_t ram_size)
-{
-    MemoryRegion *ram_memory = g_new(MemoryRegion, 1);
-
-    /* SDRAM at address zero.  */
-    memory_region_init_ram(ram_memory, NULL, "puv3.ram", ram_size,
-                           &error_fatal);
-    memory_region_add_subregion(get_system_memory(), 0, ram_memory);
-}
-
-static const GraphicHwOps no_ops;
-
-static void puv3_load_kernel(const char *kernel_filename)
-{
-    int size;
-
-    if (kernel_filename == NULL && qtest_enabled()) {
-        return;
-    }
-    if (kernel_filename == NULL) {
-        error_report("kernel parameter cannot be empty");
-        exit(1);
-    }
-
-    /* only zImage format supported */
-    size = load_image_targphys(kernel_filename, KERNEL_LOAD_ADDR,
-            KERNEL_MAX_SIZE);
-    if (size < 0) {
-        error_report("Load kernel error: '%s'", kernel_filename);
-        exit(1);
-    }
-
-    /* cheat curses that we have a graphic console, only under ocd console */
-    graphic_console_init(NULL, 0, &no_ops, NULL);
-}
-
-static void puv3_init(MachineState *machine)
-{
-    ram_addr_t ram_size = machine->ram_size;
-    const char *kernel_filename = machine->kernel_filename;
-    const char *initrd_filename = machine->initrd_filename;
-    CPUUniCore32State *env;
-    UniCore32CPU *cpu;
-
-    if (initrd_filename) {
-        error_report("Please use kernel built-in initramdisk");
-        exit(1);
-    }
-
-    cpu = UNICORE32_CPU(cpu_create(machine->cpu_type));
-    env = &cpu->env;
-
-    puv3_soc_init(env);
-    puv3_board_init(env, ram_size);
-    puv3_load_kernel(kernel_filename);
-}
-
-static void puv3_machine_init(MachineClass *mc)
-{
-    mc->desc = "PKUnity Version-3 based on UniCore32";
-    mc->init = puv3_init;
-    mc->is_default = true;
-    mc->default_cpu_type = UNICORE32_CPU_TYPE_NAME("UniCore-II");
-}
-
-DEFINE_MACHINE("puv3", puv3_machine_init)
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 07083349f51..92d6ed56261 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -2,6 +2,8 @@
 #include "hw/qdev-properties.h"
 #include "hw/usb.h"
 #include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "qapi/type-helpers.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "sysemu/sysemu.h"
@@ -82,7 +84,7 @@ const VMStateDescription vmstate_usb_device = {
 void usb_bus_new(USBBus *bus, size_t bus_size,
                  USBBusOps *ops, DeviceState *host)
 {
-    qbus_create_inplace(bus, bus_size, TYPE_USB_BUS, host, NULL);
+    qbus_init(bus, bus_size, TYPE_USB_BUS, host, NULL);
     qbus_set_bus_hotplug_handler(BUS(bus));
     bus->ops = ops;
     bus->busnr = next_usb_bus++;
@@ -631,15 +633,16 @@ static char *usb_get_fw_dev_path(DeviceState *qdev)
     return fw_path;
 }
 
-void hmp_info_usb(Monitor *mon, const QDict *qdict)
+HumanReadableText *qmp_x_query_usb(Error **errp)
 {
+    g_autoptr(GString) buf = g_string_new("");
     USBBus *bus;
     USBDevice *dev;
     USBPort *port;
 
     if (QTAILQ_EMPTY(&busses)) {
-        monitor_printf(mon, "USB support not enabled\n");
-        return;
+        error_setg(errp, "USB support not enabled");
+        return NULL;
     }
 
     QTAILQ_FOREACH(bus, &busses, next) {
@@ -647,14 +650,17 @@ void hmp_info_usb(Monitor *mon, const QDict *qdict)
             dev = port->dev;
             if (!dev)
                 continue;
-            monitor_printf(mon, "  Device %d.%d, Port %s, Speed %s Mb/s, "
-                           "Product %s%s%s\n",
-                           bus->busnr, dev->addr, port->path,
-                           usb_speed(dev->speed), dev->product_desc,
-                           dev->qdev.id ? ", ID: " : "",
-                           dev->qdev.id ?: "");
+            g_string_append_printf(buf,
+                                   "  Device %d.%d, Port %s, Speed %s Mb/s, "
+                                   "Product %s%s%s\n",
+                                   bus->busnr, dev->addr, port->path,
+                                   usb_speed(dev->speed), dev->product_desc,
+                                   dev->qdev.id ? ", ID: " : "",
+                                   dev->qdev.id ?: "");
         }
     }
+
+    return human_readable_text_from_str(buf);
 }
 
 /* handle legacy -usbdevice cmd line option */
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 5c76bed77aa..6c8c0355e09 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -612,6 +612,7 @@ static const TypeInfo emulated_card_info = {
     .instance_size = sizeof(EmulatedState),
     .class_init    = emulated_class_initfn,
 };
+module_obj(TYPE_EMULATED_CCID);
 
 static void ccid_card_emulated_register_types(void)
 {
diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c
index c1a90fcc7a5..fa3040fb715 100644
--- a/hw/usb/ccid-card-passthru.c
+++ b/hw/usb/ccid-card-passthru.c
@@ -374,7 +374,7 @@ static void passthru_realize(CCIDCardState *base, Error **errp)
     card->atr_length = sizeof(DEFAULT_ATR);
 }
 
-static VMStateDescription passthru_vmstate = {
+static const VMStateDescription passthru_vmstate = {
     .name = "ccid-card-passthru",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -414,6 +414,7 @@ static const TypeInfo passthru_card_info = {
     .instance_size = sizeof(PassthruState),
     .class_init    = passthru_class_initfn,
 };
+module_obj(TYPE_CCID_PASSTHRU);
 
 static void ccid_card_passthru_register_types(void)
 {
diff --git a/hw/usb/chipidea.c b/hw/usb/chipidea.c
index 3dcd22ccba8..b1c85404d6f 100644
--- a/hw/usb/chipidea.c
+++ b/hw/usb/chipidea.c
@@ -12,7 +12,6 @@
 #include "qemu/osdep.h"
 #include "hw/usb/hcd-ehci.h"
 #include "hw/usb/chipidea.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 
 enum {
diff --git a/hw/usb/combined-packet.c b/hw/usb/combined-packet.c
index 5d57e883dcb..e56802f89a3 100644
--- a/hw/usb/combined-packet.c
+++ b/hw/usb/combined-packet.c
@@ -171,7 +171,9 @@ void usb_ep_combine_input_packets(USBEndpoint *ep)
         if ((p->iov.size % ep->max_packet_size) != 0 || !p->short_not_ok ||
                 next == NULL ||
                 /* Work around for Linux usbfs bulk splitting + migration */
-                (totalsize == (16 * KiB - 36) && p->int_req)) {
+                (totalsize == (16 * KiB - 36) && p->int_req) ||
+                /* Next package may grow combined package over 1MiB */
+                totalsize > 1 * MiB - ep->max_packet_size) {
             usb_device_handle_data(ep->dev, first);
             assert(first->status == USB_RET_ASYNC);
             if (first->combined) {
diff --git a/hw/usb/desc-msos.c b/hw/usb/desc-msos.c
index 3a5ad7c8d0f..c72c65b650c 100644
--- a/hw/usb/desc-msos.c
+++ b/hw/usb/desc-msos.c
@@ -5,12 +5,12 @@
 /*
  * Microsoft OS Descriptors
  *
- * Windows tries to fetch some special descriptors with informations
+ * Windows tries to fetch some special descriptors with information
  * specifically for windows.  Presence is indicated using a special
  * string @ index 0xee.  There are two kinds of descriptors:
  *
  * compatid descriptor
- *   Used to bind drivers, if usb class isn't specific enougth.
+ *   Used to bind drivers, if usb class isn't specific enough.
  *   Used for PTP/MTP for example (both share the same usb class).
  *
  * properties descriptor
@@ -23,7 +23,7 @@
  *   HLM\SYSTEM\CurrentControlSet\Control\usbflags
  *   HLM\SYSTEM\CurrentControlSet\Enum\USB
  * Windows will complain it can't delete entries on the second one.
- * It has deleted everything it had permissions too, which is enouth
+ * It has deleted everything it had permissions too, which is enough
  * as this includes "Device Parameters".
  *
  * http://msdn.microsoft.com/en-us/library/windows/hardware/ff537430.aspx
@@ -181,7 +181,7 @@ static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest)
 
     if (desc->msos->Label) {
         /*
-         * Given as example in the specs.  Havn't figured yet where
+         * Given as example in the specs.  Haven't figured yet where
          * this label shows up in the windows gui.
          */
         length += usb_desc_msos_prop_str(dest+length, MSOS_REG_SZ,
@@ -192,8 +192,8 @@ static int usb_desc_msos_prop(const USBDesc *desc, uint8_t *dest)
     if (desc->msos->SelectiveSuspendEnabled) {
         /*
          * Signaling remote wakeup capability in the standard usb
-         * descriptors isn't enouth to make windows actually use it.
-         * This is the "Yes, we really mean it" registy entry to flip
+         * descriptors isn't enough to make windows actually use it.
+         * This is the "Yes, we really mean it" registry entry to flip
          * the switch in the windows drivers.
          */
         length += usb_desc_msos_prop_dword(dest+length,
diff --git a/hw/usb/desc.h b/hw/usb/desc.h
index 4d81c68e0ef..3ac604ecfa1 100644
--- a/hw/usb/desc.h
+++ b/hw/usb/desc.h
@@ -133,7 +133,7 @@ struct USBDescConfig {
     const USBDescIface        *ifs;
 };
 
-/* conceptually an Interface Association Descriptor, and releated interfaces */
+/* conceptually an Interface Association Descriptor, and related interfaces */
 struct USBDescIfaceAssoc {
     uint8_t                   bFirstInterface;
     uint8_t                   bInterfaceCount;
diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c
index f5cb2467929..8748c1ba040 100644
--- a/hw/usb/dev-audio.c
+++ b/hw/usb/dev-audio.c
@@ -168,7 +168,7 @@ static const USBDescIface desc_iface[] = {
                     STRING_FEATURE_UNIT,        /*  u8  iFeature */
                 }
             },{
-                /* Headphone Ouptut Terminal ID3 Descriptor */
+                /* Headphone Output Terminal ID3 Descriptor */
                 .data = (uint8_t[]) {
                     0x09,                       /*  u8  bLength */
                     USB_DT_CS_INTERFACE,        /*  u8  bDescriptorType */
@@ -332,7 +332,7 @@ static const USBDescIface desc_iface_multi[] = {
                     STRING_FEATURE_UNIT,        /*  u8  iFeature */
                 }
             },{
-                /* Headphone Ouptut Terminal ID3 Descriptor */
+                /* Headphone Output Terminal ID3 Descriptor */
                 .data = (uint8_t[]) {
                     0x09,                       /*  u8  bLength */
                     USB_DT_CS_INTERFACE,        /*  u8  bDescriptorType */
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
index fc39bab79f9..1c7ae97c303 100644
--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -656,7 +656,7 @@ static void usb_hid_handle_data(USBDevice *dev, USBPacket *p)
 {
     USBHIDState *us = USB_HID(dev);
     HIDState *hs = &us->hid;
-    uint8_t buf[p->iov.size];
+    g_autofree uint8_t *buf = g_malloc(p->iov.size);
     int len = 0;
 
     switch (p->pid) {
diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c
index bbb82743448..c1d1694fd0b 100644
--- a/hw/usb/dev-mtp.c
+++ b/hw/usb/dev-mtp.c
@@ -772,12 +772,9 @@ static void usb_mtp_add_str(MTPData *data, const char *str)
 
 static void usb_mtp_add_time(MTPData *data, time_t time)
 {
-    char buf[16];
-    struct tm tm;
-
-    gmtime_r(&time, &tm);
-    strftime(buf, sizeof(buf), "%Y%m%dT%H%M%S", &tm);
-    usb_mtp_add_str(data, buf);
+    g_autoptr(GDateTime) then = g_date_time_new_from_unix_utc(time);
+    g_autofree char *thenstr = g_date_time_format(then, "%Y%m%dT%H%M%S");
+    usb_mtp_add_str(data, thenstr);
 }
 
 /* ----------------------------------------------------------------------- */
@@ -907,7 +904,8 @@ static MTPData *usb_mtp_get_object_handles(MTPState *s, MTPControl *c,
                                            MTPObject *o)
 {
     MTPData *d = usb_mtp_data_alloc(c);
-    uint32_t i = 0, handles[o->nchildren];
+    uint32_t i = 0;
+    g_autofree uint32_t *handles = g_new(uint32_t, o->nchildren);
     MTPObject *iter;
 
     trace_usb_mtp_op_get_object_handles(s->dev.addr, o->handle, o->path);
diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index bc3d94092a2..91ffd9f8ae8 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -1320,8 +1320,7 @@ static void ccid_realize(USBDevice *dev, Error **errp)
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
-    qbus_create_inplace(&s->bus, sizeof(s->bus), TYPE_CCID_BUS, DEVICE(dev),
-                        NULL);
+    qbus_init(&s->bus, sizeof(s->bus), TYPE_CCID_BUS, DEVICE(dev), NULL);
     qbus_set_hotplug_handler(BUS(&s->bus), OBJECT(dev));
     s->intr = usb_ep_get(dev, USB_TOKEN_IN, CCID_INT_IN_EP);
     s->bulk = usb_ep_get(dev, USB_TOKEN_IN, CCID_BULK_IN_EP);
@@ -1365,7 +1364,7 @@ static int ccid_pre_save(void *opaque)
     return 0;
 }
 
-static VMStateDescription bulk_in_vmstate = {
+static const VMStateDescription bulk_in_vmstate = {
     .name = "CCID BulkIn state",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -1377,7 +1376,7 @@ static VMStateDescription bulk_in_vmstate = {
     }
 };
 
-static VMStateDescription answer_vmstate = {
+static const VMStateDescription answer_vmstate = {
     .name = "CCID Answer state",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -1388,7 +1387,7 @@ static VMStateDescription answer_vmstate = {
     }
 };
 
-static VMStateDescription usb_device_vmstate = {
+static const VMStateDescription usb_device_vmstate = {
     .name = "usb_device",
     .version_id = 1,
     .minimum_version_id = 1,
@@ -1400,7 +1399,7 @@ static VMStateDescription usb_device_vmstate = {
     }
 };
 
-static VMStateDescription ccid_vmstate = {
+static const VMStateDescription ccid_vmstate = {
     .name = "usb-ccid",
     .version_id = 1,
     .minimum_version_id = 1,
diff --git a/hw/usb/dev-storage-bot.c b/hw/usb/dev-storage-bot.c
index 6aad026d113..b24b3148c28 100644
--- a/hw/usb/dev-storage-bot.c
+++ b/hw/usb/dev-storage-bot.c
@@ -32,12 +32,12 @@ static void usb_msd_bot_realize(USBDevice *dev, Error **errp)
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
+    dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
     if (d->hotplugged) {
         s->dev.auto_attach = 0;
     }
 
-    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
-                 &usb_msd_scsi_info_bot, NULL);
+    scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev), &usb_msd_scsi_info_bot);
     usb_msd_handle_reset(dev);
 }
 
diff --git a/hw/usb/dev-storage-classic.c b/hw/usb/dev-storage-classic.c
index 00cb34b22f0..00f25bade28 100644
--- a/hw/usb/dev-storage-classic.c
+++ b/hw/usb/dev-storage-classic.c
@@ -64,8 +64,9 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
-    scsi_bus_new(&s->bus, sizeof(s->bus), DEVICE(dev),
-                 &usb_msd_scsi_info_storage, NULL);
+    dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
+    scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(dev),
+                 &usb_msd_scsi_info_storage);
     scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable,
                                          s->conf.bootindex, s->conf.share_rw,
                                          s->conf.rerror, s->conf.werror,
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index d2bd85d3f6b..599d6b52a01 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -840,6 +840,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p)
         }
         break;
     case UAS_PIPE_ID_STATUS:
+        if (p->stream > UAS_MAX_STREAMS) {
+            goto err_stream;
+        }
         if (p->stream) {
             QTAILQ_FOREACH(st, &uas->results, next) {
                 if (st->stream == p->stream) {
@@ -867,6 +870,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p)
         break;
     case UAS_PIPE_ID_DATA_IN:
     case UAS_PIPE_ID_DATA_OUT:
+        if (p->stream > UAS_MAX_STREAMS) {
+            goto err_stream;
+        }
         if (p->stream) {
             req = usb_uas_find_request(uas, p->stream);
         } else {
@@ -902,6 +908,11 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p)
         p->status = USB_RET_STALL;
         break;
     }
+
+err_stream:
+    error_report("%s: invalid stream %d", __func__, p->stream);
+    p->status = USB_RET_STALL;
+    return;
 }
 
 static void usb_uas_unrealize(USBDevice *dev)
@@ -926,8 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp)
     QTAILQ_INIT(&uas->requests);
     uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas);
 
-    scsi_bus_new(&uas->bus, sizeof(uas->bus), DEVICE(dev),
-                 &usb_uas_scsi_info, NULL);
+    dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
+    scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info);
 }
 
 static const VMStateDescription vmstate_usb_uas = {
diff --git a/hw/usb/dev-wacom.c b/hw/usb/dev-wacom.c
index b5950486350..ed687bc9f1e 100644
--- a/hw/usb/dev-wacom.c
+++ b/hw/usb/dev-wacom.c
@@ -301,7 +301,7 @@ static void usb_wacom_handle_control(USBDevice *dev, USBPacket *p,
 static void usb_wacom_handle_data(USBDevice *dev, USBPacket *p)
 {
     USBWacomState *s = (USBWacomState *) dev;
-    uint8_t buf[p->iov.size];
+    g_autofree uint8_t *buf = g_malloc(p->iov.size);
     int len = 0;
 
     switch (p->pid) {
diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c
index d547d0538dd..279263489e4 100644
--- a/hw/usb/hcd-dwc3.c
+++ b/hw/usb/hcd-dwc3.c
@@ -31,7 +31,6 @@
 #include "hw/sysbus.h"
 #include "hw/register.h"
 #include "qemu/bitops.h"
-#include "qemu/log.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 0cb02a64321..d1b5657d722 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -31,6 +31,7 @@
 #include "hw/usb/uhci-regs.h"
 #include "migration/vmstate.h"
 #include "hw/pci/pci.h"
+#include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
@@ -290,7 +291,7 @@ static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t td_addr)
 
 static void uhci_update_irq(UHCIState *s)
 {
-    int level;
+    int level = 0;
     if (((s->status2 & 1) && (s->intr & (1 << 2))) ||
         ((s->status2 & 2) && (s->intr & (1 << 3))) ||
         ((s->status & UHCI_STS_USBERR) && (s->intr & (1 << 0))) ||
@@ -298,10 +299,8 @@ static void uhci_update_irq(UHCIState *s)
         (s->status & UHCI_STS_HSERR) ||
         (s->status & UHCI_STS_HCPERR)) {
         level = 1;
-    } else {
-        level = 0;
     }
-    pci_set_irq(&s->dev, level);
+    qemu_set_irq(s->irq, level);
 }
 
 static void uhci_reset(DeviceState *dev)
@@ -1170,9 +1169,9 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp)
 
     pci_conf[PCI_CLASS_PROG] = 0x00;
     /* TODO: reset value should be 0. */
-    pci_conf[USB_SBRN] = USB_RELEASE_1; // release number
-
+    pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */
     pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1);
+    s->irq = pci_allocate_irq(dev);
 
     if (s->masterbus) {
         USBPort *ports[NB_PORTS];
@@ -1285,6 +1284,9 @@ void uhci_data_class_init(ObjectClass *klass, void *data)
     } else {
         device_class_set_props(dc, uhci_properties_standalone);
     }
+    if (info->notuser) {
+        dc->user_creatable = false;
+    }
     u->info = *info;
 }
 
diff --git a/hw/usb/hcd-uhci.h b/hw/usb/hcd-uhci.h
index e61d8fcb192..c85ab7868ee 100644
--- a/hw/usb/hcd-uhci.h
+++ b/hw/usb/hcd-uhci.h
@@ -60,7 +60,7 @@ typedef struct UHCIState {
     uint32_t frame_bandwidth;
     bool completions_only;
     UHCIPort ports[NB_PORTS];
-
+    qemu_irq irq;
     /* Interrupts that should be raised at the end of the current frame.  */
     uint32_t pending_int_mask;
 
@@ -85,6 +85,7 @@ typedef struct UHCIInfo {
     uint8_t    irq_pin;
     void       (*realize)(PCIDevice *dev, Error **errp);
     bool       unplug;
+    bool       notuser; /* disallow user_creatable */
 } UHCIInfo;
 
 void uhci_data_class_init(ObjectClass *klass, void *data);
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index 9421734d0fe..e934b1a5b1f 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -57,7 +57,7 @@ static void xhci_pci_intr_update(XHCIState *xhci, int n, bool enable)
     }
 }
 
-static void xhci_pci_intr_raise(XHCIState *xhci, int n, bool level)
+static bool xhci_pci_intr_raise(XHCIState *xhci, int n, bool level)
 {
     XHCIPciState *s = container_of(xhci, XHCIPciState, xhci);
     PCIDevice *pci_dev = PCI_DEVICE(s);
@@ -67,15 +67,18 @@ static void xhci_pci_intr_raise(XHCIState *xhci, int n, bool level)
          msi_enabled(pci_dev))) {
         pci_set_irq(pci_dev, level);
     }
-    if (msix_enabled(pci_dev)) {
+
+    if (msix_enabled(pci_dev) && level) {
         msix_notify(pci_dev, n);
-        return;
+        return true;
     }
 
-    if (msi_enabled(pci_dev)) {
+    if (msi_enabled(pci_dev) && level) {
         msi_notify(pci_dev, n);
-        return;
+        return true;
     }
+
+    return false;
 }
 
 static void xhci_pci_reset(DeviceState *dev)
diff --git a/hw/usb/hcd-xhci-sysbus.c b/hw/usb/hcd-xhci-sysbus.c
index 42e2574c829..a14e4381960 100644
--- a/hw/usb/hcd-xhci-sysbus.c
+++ b/hw/usb/hcd-xhci-sysbus.c
@@ -16,11 +16,13 @@
 #include "hw/acpi/aml-build.h"
 #include "hw/irq.h"
 
-static void xhci_sysbus_intr_raise(XHCIState *xhci, int n, bool level)
+static bool xhci_sysbus_intr_raise(XHCIState *xhci, int n, bool level)
 {
     XHCISysbusState *s = container_of(xhci, XHCISysbusState, xhci);
 
     qemu_set_irq(s->irq[n], level);
+
+    return false;
 }
 
 void xhci_sysbus_reset(DeviceState *dev)
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 46212b1e695..e01700039b1 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -551,7 +551,9 @@ static void xhci_intr_update(XHCIState *xhci, int v)
             level = 1;
         }
         if (xhci->intr_raise) {
-            xhci->intr_raise(xhci, 0, level);
+            if (xhci->intr_raise(xhci, 0, level)) {
+                xhci->intr[0].iman &= ~IMAN_IP;
+            }
         }
     }
     if (xhci->intr_update) {
@@ -579,7 +581,9 @@ static void xhci_intr_raise(XHCIState *xhci, int v)
         return;
     }
     if (xhci->intr_raise) {
-        xhci->intr_raise(xhci, v, true);
+        if (xhci->intr_raise(xhci, v, true)) {
+            xhci->intr[v].iman &= ~IMAN_IP;
+        }
     }
 }
 
diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h
index 7bba361f3bb..98f598382ad 100644
--- a/hw/usb/hcd-xhci.h
+++ b/hw/usb/hcd-xhci.h
@@ -194,7 +194,7 @@ typedef struct XHCIState {
     uint32_t flags;
     uint32_t max_pstreams_mask;
     void (*intr_update)(XHCIState *s, int n, bool enable);
-    void (*intr_raise)(XHCIState *s, int n, bool level);
+    bool (*intr_raise)(XHCIState *s, int n, bool level);
     DeviceState *hostOpaque;
 
     /* Operational Registers */
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 2518306f527..d0d46dd0a4a 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -254,6 +254,29 @@ static void usb_host_del_fd(int fd, void *user_data)
     qemu_set_fd_handler(fd, NULL, NULL, NULL);
 }
 
+#else
+
+static QEMUTimer *poll_timer;
+static uint32_t request_count;
+
+static void usb_host_timer_kick(void)
+{
+    int64_t delay_ns;
+
+    delay_ns = request_count
+        ? (NANOSECONDS_PER_SECOND / 100)  /* 10 ms interval with active req */
+        : (NANOSECONDS_PER_SECOND);       /* 1 sec interval otherwise */
+    timer_mod(poll_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns);
+}
+
+static void usb_host_timer(void *opaque)
+{
+    struct timeval tv = { 0, 0 };
+
+    libusb_handle_events_timeout(ctx, &tv);
+    usb_host_timer_kick();
+}
+
 #endif /* !CONFIG_WIN32 */
 
 static int usb_host_init(void)
@@ -276,7 +299,8 @@ static int usb_host_init(void)
     libusb_set_debug(ctx, loglevel);
 #endif
 #ifdef CONFIG_WIN32
-    /* FIXME: add support for Windows. */
+    poll_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, usb_host_timer, NULL);
+    usb_host_timer_kick();
 #else
     libusb_set_pollfd_notifiers(ctx, usb_host_add_fd,
                                 usb_host_del_fd,
@@ -364,11 +388,18 @@ static USBHostRequest *usb_host_req_alloc(USBHostDevice *s, USBPacket *p,
         r->buffer = g_malloc(bufsize);
     }
     QTAILQ_INSERT_TAIL(&s->requests, r, next);
+#ifdef CONFIG_WIN32
+    request_count++;
+    usb_host_timer_kick();
+#endif
     return r;
 }
 
 static void usb_host_req_free(USBHostRequest *r)
 {
+#ifdef CONFIG_WIN32
+    request_count--;
+#endif
     QTAILQ_REMOVE(&r->host->requests, r, next);
     libusb_free_transfer(r->xfer);
     g_free(r->buffer);
@@ -770,6 +801,13 @@ static void usb_host_speed_compat(USBHostDevice *s)
         for (i = 0; i < conf->bNumInterfaces; i++) {
             for (a = 0; a < conf->interface[i].num_altsetting; a++) {
                 intf = &conf->interface[i].altsetting[a];
+
+                if (intf->bInterfaceClass == LIBUSB_CLASS_MASS_STORAGE &&
+                    intf->bInterfaceSubClass == 6) { /* SCSI */
+                    udev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
+                    break;
+                }
+
                 for (e = 0; e < intf->bNumEndpoints; e++) {
                     endp = &intf->endpoint[e];
                     type = endp->bmAttributes & 0x3;
@@ -1668,7 +1706,7 @@ static void usb_host_free_streams(USBDevice *udev, USBEndpoint **eps,
 /*
  * This is *NOT* about restoring state.  We have absolutely no idea
  * what state the host device is in at the moment and whenever it is
- * still present in the first place.  Attemping to contine where we
+ * still present in the first place.  Attempting to continue where we
  * left off is impossible.
  *
  * What we are going to do here is emulate a surprise removal of
@@ -1770,10 +1808,12 @@ static TypeInfo usb_host_dev_info = {
     .class_init    = usb_host_class_initfn,
     .instance_init = usb_host_instance_init,
 };
+module_obj(TYPE_USB_HOST_DEVICE);
 
 static void usb_host_register_types(void)
 {
     type_register_static(&usb_host_dev_info);
+    monitor_register_hmp("usbhost", true, hmp_info_usbhost);
 }
 
 type_init(usb_host_register_types)
@@ -1893,35 +1933,6 @@ static void usb_host_auto_check(void *unused)
     timer_mod(usb_auto_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 2000);
 }
 
-/**
- * Check whether USB host device has a USB mass storage SCSI interface
- */
-bool usb_host_dev_is_scsi_storage(USBDevice *ud)
-{
-    USBHostDevice *uhd = USB_HOST_DEVICE(ud);
-    struct libusb_config_descriptor *conf;
-    const struct libusb_interface_descriptor *intf;
-    bool is_scsi_storage = false;
-    int i;
-
-    if (!uhd || libusb_get_active_config_descriptor(uhd->dev, &conf) != 0) {
-        return false;
-    }
-
-    for (i = 0; i < conf->bNumInterfaces; i++) {
-        intf = &conf->interface[i].altsetting[ud->altsetting[i]];
-        if (intf->bInterfaceClass == LIBUSB_CLASS_MASS_STORAGE &&
-            intf->bInterfaceSubClass == 6) {                 /* 6 means SCSI */
-            is_scsi_storage = true;
-            break;
-        }
-    }
-
-    libusb_free_config_descriptor(conf);
-
-    return is_scsi_storage;
-}
-
 void hmp_info_usbhost(Monitor *mon, const QDict *qdict)
 {
     libusb_device **devs = NULL;
diff --git a/hw/usb/host-stub.c b/hw/usb/host-stub.c
deleted file mode 100644
index 538ed29684c..00000000000
--- a/hw/usb/host-stub.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Stub host USB redirector
- *
- * Copyright (c) 2005 Fabrice Bellard
- *
- * Copyright (c) 2008 Max Krasnyansky
- *      Support for host device auto connect & disconnect
- *      Major rewrite to support fully async operation
- *
- * Copyright 2008 TJ <linux@tjworld.net>
- *      Added flexible support for /dev/bus/usb /sys/bus/usb/devices in addition
- *      to the legacy /proc/bus/usb USB device discovery and handling
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "ui/console.h"
-#include "hw/usb.h"
-#include "monitor/monitor.h"
-
-void hmp_info_usbhost(Monitor *mon, const QDict *qdict)
-{
-    monitor_printf(mon, "USB host devices not supported\n");
-}
-
-bool usb_host_dev_is_scsi_storage(USBDevice *ud)
-{
-    return false;
-}
diff --git a/hw/usb/imx-usb-phy.c b/hw/usb/imx-usb-phy.c
index e705a03a1fc..5d7a549e34d 100644
--- a/hw/usb/imx-usb-phy.c
+++ b/hw/usb/imx-usb-phy.c
@@ -13,7 +13,6 @@
 #include "qemu/osdep.h"
 #include "hw/usb/imx-usb-phy.h"
 #include "migration/vmstate.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 
 static const VMStateDescription vmstate_imx_usbphy = {
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index fb7a74e73ae..de853d780dd 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -1,17 +1,14 @@
 hw_usb_modules = {}
 
 # usb subsystem core
-softmmu_ss.add(files(
+softmmu_ss.add(when: 'CONFIG_USB', if_true: files(
   'bus.c',
   'combined-packet.c',
   'core.c',
-  'pcap.c',
-  'libhw.c'
-))
-
-softmmu_ss.add(when: 'CONFIG_USB', if_true: files(
   'desc.c',
   'desc-msos.c',
+  'libhw.c',
+  'pcap.c',
 ))
 
 # usb host adapters
@@ -52,7 +49,7 @@ softmmu_ss.add(when: ['CONFIG_POSIX', 'CONFIG_USB_STORAGE_MTP'], if_true: files(
 # smartcard
 softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader.c'))
 
-if config_host.has_key('CONFIG_SMARTCARD')
+if cacard.found()
   usbsmartcard_ss = ss.source_set()
   usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD',
                       if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')])
@@ -67,7 +64,7 @@ if u2f.found()
 endif
 
 # usb redirect
-if config_host.has_key('CONFIG_USB_REDIR')
+if usbredir.found()
   usbredir_ss = ss.source_set()
   usbredir_ss.add(when: 'CONFIG_USB',
                   if_true: [usbredir, files('redirect.c', 'quirks.c')])
@@ -75,10 +72,12 @@ if config_host.has_key('CONFIG_USB_REDIR')
 endif
 
 # usb pass-through
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_USB_LIBUSB', libusb],
-               if_true: files('host-libusb.c'),
-               if_false: files('host-stub.c'))
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('host-stub.c'))
+if libusb.found()
+  usbhost_ss = ss.source_set()
+  usbhost_ss.add(when: ['CONFIG_USB', libusb],
+                 if_true: files('host-libusb.c'))
+  hw_usb_modules += {'host': usbhost_ss}
+endif
 
 softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c'))
 
diff --git a/hw/usb/quirks-ftdi-ids.h b/hw/usb/quirks-ftdi-ids.h
index 57c12ef6625..f3cb157d6fa 100644
--- a/hw/usb/quirks-ftdi-ids.h
+++ b/hw/usb/quirks-ftdi-ids.h
@@ -625,9 +625,9 @@
  * Definitions for Icom Inc. devices
  */
 #define ICOM_VID		0x0C26 /* Icom vendor ID */
-/* Note: ID-1 is a communications tranceiver for HAM-radio operators */
+/* Note: ID-1 is a communications transceiver for HAM-radio operators */
 #define ICOM_ID_1_PID		0x0004 /* ID-1 USB to RS-232 */
-/* Note: OPC is an Optional cable to connect an Icom Tranceiver */
+/* Note: OPC is an Optional cable to connect an Icom Transceiver */
 #define ICOM_OPC_U_UC_PID	0x0018 /* OPC-478UC, OPC-1122U cloning cable */
 /* Note: ID-RP* devices are Icom Repeater Devices for HAM-radio */
 #define ICOM_ID_RP2C1_PID	0x0009 /* ID-RP2C Asset 1 to RS-232 */
@@ -1221,12 +1221,6 @@
 #define FTDI_SCIENCESCOPE_LS_LOGBOOK_PID	0xFF1C
 #define FTDI_SCIENCESCOPE_HS_LOGBOOK_PID	0xFF1D
 
-/*
- * Milkymist One JTAG/Serial
- */
-#define QIHARDWARE_VID			0x20B7
-#define MILKYMISTONE_JTAGSERIAL_PID	0x0713
-
 /*
  * CTI GmbH RS485 Converter http://www.cti-lean.com/
  */
diff --git a/hw/usb/quirks.h b/hw/usb/quirks.h
index 50ef2f9c2eb..c3e595f40b7 100644
--- a/hw/usb/quirks.h
+++ b/hw/usb/quirks.h
@@ -904,7 +904,6 @@ static const struct usb_device_id usbredir_ftdi_serial_ids[] = {
     { USB_DEVICE(FTDI_VID, FTDI_SCIENCESCOPE_HS_LOGBOOK_PID) },
     { USB_DEVICE(FTDI_VID, FTDI_CINTERION_MC55I_PID) },
     { USB_DEVICE(FTDI_VID, FTDI_DOTEC_PID) },
-    { USB_DEVICE(QIHARDWARE_VID, MILKYMISTONE_JTAGSERIAL_PID) },
     { USB_DEVICE(ST_VID, ST_STMCLT1030_PID) },
     { USB_DEVICE(FTDI_VID, FTDI_RF_R106) },
     { USB_DEVICE(FTDI_VID, FTDI_DISTORTEC_JTAG_LOCK_PICK_PID) },
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 17f06f34179..5f0ef9cb3b0 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -270,7 +270,7 @@ static int usbredir_read(void *priv, uint8_t *data, int count)
     return count;
 }
 
-static gboolean usbredir_write_unblocked(GIOChannel *chan, GIOCondition cond,
+static gboolean usbredir_write_unblocked(void *do_not_use, GIOCondition cond,
                                          void *opaque)
 {
     USBRedirDevice *dev = opaque;
@@ -476,7 +476,7 @@ static int bufp_alloc(USBRedirDevice *dev, uint8_t *data, uint16_t len,
     if (dev->endpoint[EP2I(ep)].bufpq_dropping_packets) {
         if (dev->endpoint[EP2I(ep)].bufpq_size >
                 dev->endpoint[EP2I(ep)].bufpq_target_size) {
-            free(data);
+            free(free_on_destroy);
             return -1;
         }
         dev->endpoint[EP2I(ep)].bufpq_dropping_packets = 0;
@@ -620,7 +620,7 @@ static void usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p,
                 .endpoint = ep,
                 .length = p->iov.size
             };
-            uint8_t buf[p->iov.size];
+            g_autofree uint8_t *buf = g_malloc(p->iov.size);
             /* No id, we look at the ep when receiving a status back */
             usb_packet_copy(p, buf, p->iov.size);
             usbredirparser_send_iso_packet(dev->parser, 0, &iso_packet,
@@ -818,7 +818,7 @@ static void usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
         usbredirparser_send_bulk_packet(dev->parser, p->id,
                                         &bulk_packet, NULL, 0);
     } else {
-        uint8_t buf[size];
+        g_autofree uint8_t *buf = g_malloc(size);
         usb_packet_copy(p, buf, size);
         usbredir_log_data(dev, "bulk data out:", buf, size);
         usbredirparser_send_bulk_packet(dev->parser, p->id,
@@ -923,7 +923,7 @@ static void usbredir_handle_interrupt_out_data(USBRedirDevice *dev,
                                                USBPacket *p, uint8_t ep)
 {
     struct usb_redir_interrupt_packet_header interrupt_packet;
-    uint8_t buf[p->iov.size];
+    g_autofree uint8_t *buf = g_malloc(p->iov.size);
 
     DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep,
             p->iov.size, p->id);
@@ -2608,6 +2608,7 @@ static const TypeInfo usbredir_dev_info = {
     .class_init    = usbredir_class_initfn,
     .instance_init = usbredir_instance_init,
 };
+module_obj(TYPE_USB_REDIR);
 
 static void usbredir_register_types(void)
 {
diff --git a/hw/usb/trace-events b/hw/usb/trace-events
index 38e05fc7f4d..b8287b63f15 100644
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # core.c
 usb_packet_state_change(int bus, const char *port, int ep, void *p, const char *o, const char *n) "bus %d, port %s, ep %d, packet %p, state %s -> %s"
diff --git a/hw/usb/u2f-emulated.c b/hw/usb/u2f-emulated.c
index 9151feb63d4..63cceaa5fc8 100644
--- a/hw/usb/u2f-emulated.c
+++ b/hw/usb/u2f-emulated.c
@@ -307,7 +307,7 @@ static void u2f_emulated_realize(U2FKeyState *base, Error **errp)
             rc = u2f_emulated_setup_vdev_manualy(key);
         } else {
             error_setg(errp, "%s: cert, priv, entropy and counter "
-                       "parameters must be provided to manualy configure "
+                       "parameters must be provided to manually configure "
                        "the emulated device", TYPE_U2F_EMULATED);
             return;
         }
diff --git a/hw/usb/vt82c686-uhci-pci.c b/hw/usb/vt82c686-uhci-pci.c
index b109c216033..0bf2b72ff08 100644
--- a/hw/usb/vt82c686-uhci-pci.c
+++ b/hw/usb/vt82c686-uhci-pci.c
@@ -1,6 +1,17 @@
 #include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/isa/vt82c686.h"
 #include "hcd-uhci.h"
 
+static void uhci_isa_set_irq(void *opaque, int irq_num, int level)
+{
+    UHCIState *s = opaque;
+    uint8_t irq = pci_get_byte(s->dev.config + PCI_INTERRUPT_LINE);
+    if (irq > 0 && irq < 15) {
+        via_isa_set_irq(pci_get_function_0(&s->dev), irq, level);
+    }
+}
+
 static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp)
 {
     UHCIState *s = UHCI(dev);
@@ -14,6 +25,8 @@ static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp)
     pci_set_long(pci_conf + 0xc0, 0x00002000);
 
     usb_uhci_common_realize(dev, errp);
+    object_unref(s->irq);
+    s->irq = qemu_allocate_irq(uhci_isa_set_irq, s, 0);
 }
 
 static UHCIInfo uhci_info[] = {
@@ -25,6 +38,8 @@ static UHCIInfo uhci_info[] = {
         .irq_pin   = 3,
         .realize   = usb_uhci_vt82c686b_realize,
         .unplug    = true,
+        /* Reason: only works as USB function of VT82xx superio chips */
+        .notuser   = true,
     }
 };
 
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 4d266d7bb45..0f7369e7ed6 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -26,7 +26,6 @@
 #include "qemu/config-file.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
-#include "hw/sysbus.h"
 #include "hw/usb.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "monitor/qdev.h"
diff --git a/hw/usb/xlnx-usb-subsystem.c b/hw/usb/xlnx-usb-subsystem.c
index 568257370cb..d8deeb6ced5 100644
--- a/hw/usb/xlnx-usb-subsystem.c
+++ b/hw/usb/xlnx-usb-subsystem.c
@@ -24,10 +24,8 @@
 
 #include "qemu/osdep.h"
 #include "hw/sysbus.h"
-#include "hw/irq.h"
 #include "hw/register.h"
 #include "qemu/bitops.h"
-#include "qemu/log.h"
 #include "qom/object.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/usb/xlnx-versal-usb2-ctrl-regs.c b/hw/usb/xlnx-versal-usb2-ctrl-regs.c
index 9eaa59ebb8b..1c094aa1a63 100644
--- a/hw/usb/xlnx-versal-usb2-ctrl-regs.c
+++ b/hw/usb/xlnx-versal-usb2-ctrl-regs.c
@@ -32,7 +32,6 @@
 #include "hw/irq.h"
 #include "hw/register.h"
 #include "qemu/bitops.h"
-#include "qemu/log.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
 #include "hw/usb/xlnx-versal-usb2-ctrl-regs.h"
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 9571c2f91fd..e0dd561e85a 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -14,7 +14,6 @@
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
 #include "qapi/error.h"
-#include "hw/sysbus.h"
 #include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/s390x/ap-device.h"
@@ -22,8 +21,7 @@
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
-#include "cpu.h"
-#include "kvm_s390x.h"
+#include "kvm/kvm_s390x.h"
 #include "migration/vmstate.h"
 #include "hw/qdev-properties.h"
 #include "hw/s390x/ap-bridge.h"
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index b2df708e4b0..0354737666a 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -20,7 +20,6 @@
 #include <sys/ioctl.h>
 
 #include "qapi/error.h"
-#include "hw/sysbus.h"
 #include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/s390x/s390-ccw.h"
@@ -200,7 +199,7 @@ static int vfio_ccw_handle_clear(SubchDev *sch)
     case 0:
     case -ENODEV:
     case -EACCES:
-        return 0;
+        return ret;
     case -EFAULT:
     default:
         sch_gen_unit_exception(sch);
@@ -241,7 +240,7 @@ static int vfio_ccw_handle_halt(SubchDev *sch)
     case -EBUSY:
     case -ENODEV:
     case -EACCES:
-        return 0;
+        return ret;
     case -EFAULT:
     default:
         sch_gen_unit_exception(sch);
@@ -322,6 +321,7 @@ static void vfio_ccw_io_notifier_handler(void *opaque)
     SCHIB *schib = &sch->curr_status;
     SCSW s;
     IRB irb;
+    ESW esw;
     int size;
 
     if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
@@ -372,6 +372,9 @@ static void vfio_ccw_io_notifier_handler(void *opaque)
     copy_scsw_to_guest(&s, &irb.scsw);
     schib->scsw = s;
 
+    copy_esw_to_guest(&esw, &irb.esw);
+    sch->esw = esw;
+
     /* If a uint check is pending, copy sense data. */
     if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) &&
         (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) {
@@ -412,8 +415,8 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
     }
 
     if (vdev->num_irqs < irq + 1) {
-        error_setg(errp, "vfio: unexpected number of irqs %u",
-                   vdev->num_irqs);
+        error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)",
+                   irq, vdev->num_irqs);
         return;
     }
 
@@ -470,7 +473,7 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
 
     if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0,
                                VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) {
-        error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
+        warn_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
     }
 
     qemu_set_fd_handler(event_notifier_get_fd(notifier),
@@ -690,20 +693,24 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
     if (vcdev->crw_region) {
         vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err);
         if (err) {
-            goto out_crw_notifier_err;
+            goto out_irq_notifier_err;
         }
     }
 
     vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, &err);
     if (err) {
-        goto out_req_notifier_err;
+        /*
+         * Report this error, but do not make it a failing condition.
+         * Lack of this IRQ in the host does not prevent normal operation.
+         */
+        error_report_err(err);
     }
 
     return;
 
-out_req_notifier_err:
+out_irq_notifier_err:
+    vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX);
     vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
-out_crw_notifier_err:
     vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
 out_io_notifier_err:
     vfio_ccw_put_region(vcdev);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ae5654fcdb8..080046e3f51 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -36,6 +36,7 @@
 #include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
+#include "sysemu/runstate.h"
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
@@ -134,6 +135,29 @@ static const char *index_to_str(VFIODevice *vbasedev, int index)
     }
 }
 
+static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
+{
+    switch (container->iommu_type) {
+    case VFIO_TYPE1v2_IOMMU:
+    case VFIO_TYPE1_IOMMU:
+        /*
+         * We support coordinated discarding of RAM via the RamDiscardManager.
+         */
+        return ram_block_uncoordinated_discard_disable(state);
+    default:
+        /*
+         * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
+         * RamDiscardManager, however, it is completely untested.
+         *
+         * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
+         * completely the opposite of managing mapping/pinning dynamically as
+         * required by RamDiscardManager. We would have to special-case sections
+         * with a RamDiscardManager.
+         */
+        return ram_block_discard_disable(state);
+    }
+}
+
 int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
                            int action, int fd, Error **errp)
 {
@@ -527,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
     QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
         if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
             QLIST_REMOVE(hostwin, hostwin_next);
+            g_free(hostwin);
             return 0;
         }
     }
@@ -538,6 +563,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
     return (!memory_region_is_ram(section->mr) &&
             !memory_region_is_iommu(section->mr)) ||
+           memory_region_is_protected(section->mr) ||
            /*
             * Sizing an enabled 64-bit BAR can cause spurious mappings to
             * addresses in the upper part of the 64-bit address space.  These
@@ -569,6 +595,44 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
         error_report("iommu map to non memory area %"HWADDR_PRIx"",
                      xlat);
         return false;
+    } else if (memory_region_has_ram_discard_manager(mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+        MemoryRegionSection tmp = {
+            .mr = mr,
+            .offset_within_region = xlat,
+            .size = int128_make64(len),
+        };
+
+        /*
+         * Malicious VMs can map memory into the IOMMU, which is expected
+         * to remain discarded. vfio will pin all pages, populating memory.
+         * Disallow that. vmstate priorities make sure any RamDiscardManager
+         * were already restored before IOMMUs are restored.
+         */
+        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+            error_report("iommu map to discarded memory (e.g., unplugged via"
+                         " virtio-mem): %"HWADDR_PRIx"",
+                         iotlb->translated_addr);
+            return false;
+        }
+
+        /*
+         * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
+         * pages will remain pinned inside vfio until unmapped, resulting in a
+         * higher memory consumption than expected. If memory would get
+         * populated again later, there would be an inconsistency between pages
+         * pinned by vfio and pages seen by QEMU. This is the case until
+         * unmapped from the IOMMU (e.g., during device reset).
+         *
+         * With malicious guests, we really only care about pinning more memory
+         * than expected. RLIMIT_MEMLOCK set for the user/process can never be
+         * exceeded and can be used to mitigate this problem.
+         */
+        warn_report_once("Using vfio with vIOMMUs and coordinated discarding of"
+                         " RAM (e.g., virtio-mem) works, however, malicious"
+                         " guests can trigger pinning of more memory than"
+                         " intended via an IOMMU. It's possible to mitigate "
+                         " by setting/adjusting RLIMIT_MEMLOCK.");
     }
 
     /*
@@ -649,6 +713,154 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     rcu_read_unlock();
 }
 
+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+    const hwaddr size = int128_get64(section->size);
+    const hwaddr iova = section->offset_within_address_space;
+    int ret;
+
+    /* Unmap with a single call. */
+    ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
+    if (ret) {
+        error_report("%s: vfio_dma_unmap() failed: %s", __func__,
+                     strerror(-ret));
+    }
+}
+
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+    const hwaddr end = section->offset_within_region +
+                       int128_get64(section->size);
+    hwaddr start, next, iova;
+    void *vaddr;
+    int ret;
+
+    /*
+     * Map in (aligned within memory region) minimum granularity, so we can
+     * unmap in minimum granularity later.
+     */
+    for (start = section->offset_within_region; start < end; start = next) {
+        next = ROUND_UP(start + 1, vrdl->granularity);
+        next = MIN(next, end);
+
+        iova = start - section->offset_within_region +
+               section->offset_within_address_space;
+        vaddr = memory_region_get_ram_ptr(section->mr) + start;
+
+        ret = vfio_dma_map(vrdl->container, iova, next - start,
+                           vaddr, section->readonly);
+        if (ret) {
+            /* Rollback */
+            vfio_ram_discard_notify_discard(rdl, section);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static void vfio_register_ram_discard_listener(VFIOContainer *container,
+                                               MemoryRegionSection *section)
+{
+    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    VFIORamDiscardListener *vrdl;
+
+    /* Ignore some corner cases not relevant in practice. */
+    g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
+    g_assert(QEMU_IS_ALIGNED(section->offset_within_address_space,
+                             TARGET_PAGE_SIZE));
+    g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
+
+    vrdl = g_new0(VFIORamDiscardListener, 1);
+    vrdl->container = container;
+    vrdl->mr = section->mr;
+    vrdl->offset_within_address_space = section->offset_within_address_space;
+    vrdl->size = int128_get64(section->size);
+    vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
+                                                                section->mr);
+
+    g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
+    g_assert(container->pgsizes &&
+             vrdl->granularity >= 1ULL << ctz64(container->pgsizes));
+
+    ram_discard_listener_init(&vrdl->listener,
+                              vfio_ram_discard_notify_populate,
+                              vfio_ram_discard_notify_discard, true);
+    ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
+    QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
+
+    /*
+     * Sanity-check if we have a theoretically problematic setup where we could
+     * exceed the maximum number of possible DMA mappings over time. We assume
+     * that each mapped section in the same address space as a RamDiscardManager
+     * section consumes exactly one DMA mapping, with the exception of
+     * RamDiscardManager sections; i.e., we don't expect to have gIOMMU sections
+     * in the same address space as RamDiscardManager sections.
+     *
+     * We assume that each section in the address space consumes one memslot.
+     * We take the number of KVM memory slots as a best guess for the maximum
+     * number of sections in the address space we could have over time,
+     * also consuming DMA mappings.
+     */
+    if (container->dma_max_mappings) {
+        unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
+
+#ifdef CONFIG_KVM
+        if (kvm_enabled()) {
+            max_memslots = kvm_get_max_memslots();
+        }
+#endif
+
+        QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+            hwaddr start, end;
+
+            start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
+                                    vrdl->granularity);
+            end = ROUND_UP(vrdl->offset_within_address_space + vrdl->size,
+                           vrdl->granularity);
+            vrdl_mappings += (end - start) / vrdl->granularity;
+            vrdl_count++;
+        }
+
+        if (vrdl_mappings + max_memslots - vrdl_count >
+            container->dma_max_mappings) {
+            warn_report("%s: possibly running out of DMA mappings. E.g., try"
+                        " increasing the 'block-size' of virtio-mem devies."
+                        " Maximum possible DMA mappings: %d, Maximum possible"
+                        " memslots: %d", __func__, container->dma_max_mappings,
+                        max_memslots);
+        }
+    }
+}
+
+static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
+                                                 MemoryRegionSection *section)
+{
+    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    VFIORamDiscardListener *vrdl = NULL;
+
+    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+        if (vrdl->mr == section->mr &&
+            vrdl->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!vrdl) {
+        hw_error("vfio: Trying to unregister missing RAM discard listener");
+    }
+
+    ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
+    QLIST_REMOVE(vrdl, next);
+    g_free(vrdl);
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
@@ -682,6 +894,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
     llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
 
     if (int128_ge(int128_make64(iova), llend)) {
+        if (memory_region_is_ram_device(section->mr)) {
+            trace_vfio_listener_region_add_no_dma_map(
+                memory_region_name(section->mr),
+                section->offset_within_address_space,
+                int128_getlo(section->size),
+                qemu_real_host_page_size);
+        }
         return;
     }
     end = int128_get64(int128_sub(llend, int128_one()));
@@ -810,6 +1029,16 @@ static void vfio_listener_region_add(MemoryListener *listener,
 
     /* Here we assume that memory_region_is_ram(section->mr)==true */
 
+    /*
+     * For RAM memory regions with a RamDiscardManager, we only want to map the
+     * actually populated parts - and update the mapping whenever we're notified
+     * about changes.
+     */
+    if (memory_region_has_ram_discard_manager(section->mr)) {
+        vfio_register_ram_discard_listener(container, section);
+        return;
+    }
+
     vaddr = memory_region_get_ram_ptr(section->mr) +
             section->offset_within_region +
             (iova - section->offset_within_address_space);
@@ -947,6 +1176,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
 
         pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
         try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
+    } else if (memory_region_has_ram_discard_manager(section->mr)) {
+        vfio_unregister_ram_discard_listener(container, section);
+        /* Unregistering will trigger an unmap. */
+        try_unmap = false;
     }
 
     if (try_unmap) {
@@ -1108,6 +1341,49 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     rcu_read_unlock();
 }
 
+static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
+                                             void *opaque)
+{
+    const hwaddr size = int128_get64(section->size);
+    const hwaddr iova = section->offset_within_address_space;
+    const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
+                                section->offset_within_region;
+    VFIORamDiscardListener *vrdl = opaque;
+
+    /*
+     * Sync the whole mapped region (spanning multiple individual mappings)
+     * in one go.
+     */
+    return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
+}
+
+static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
+                                                   MemoryRegionSection *section)
+{
+    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    VFIORamDiscardListener *vrdl = NULL;
+
+    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+        if (vrdl->mr == section->mr &&
+            vrdl->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!vrdl) {
+        hw_error("vfio: Trying to sync missing RAM discard listener");
+    }
+
+    /*
+     * We only want/can synchronize the bitmap for actually mapped parts -
+     * which correspond to populated parts. Replay all populated parts.
+     */
+    return ram_discard_manager_replay_populated(rdm, section,
+                                              vfio_ram_discard_get_dirty_bitmap,
+                                                &vrdl);
+}
+
 static int vfio_sync_dirty_bitmap(VFIOContainer *container,
                                   MemoryRegionSection *section)
 {
@@ -1139,6 +1415,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
             }
         }
         return 0;
+    } else if (memory_region_has_ram_discard_manager(section->mr)) {
+        return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
     }
 
     ram_addr = memory_region_get_ram_addr(section->mr) +
@@ -1165,6 +1443,7 @@ static void vfio_listener_log_sync(MemoryListener *listener,
 }
 
 static const MemoryListener vfio_memory_listener = {
+    .name = "vfio",
     .region_add = vfio_listener_region_add,
     .region_del = vfio_listener_region_del,
     .log_global_start = vfio_listener_log_global_start,
@@ -1732,15 +2011,25 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
      * new memory, it will not yet set ram_block_discard_set_required() and
      * therefore, neither stops us here or deals with the sudden memory
      * consumption of inflated memory.
+     *
+     * We do support discarding of memory coordinated via the RamDiscardManager
+     * with some IOMMU types. vfio_ram_block_discard_disable() handles the
+     * details once we know which type of IOMMU we are using.
      */
-    ret = ram_block_discard_disable(true);
-    if (ret) {
-        error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
-        return ret;
-    }
 
     QLIST_FOREACH(container, &space->containers, next) {
         if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            ret = vfio_ram_block_discard_disable(container, true);
+            if (ret) {
+                error_setg_errno(errp, -ret,
+                                 "Cannot set discarding of RAM broken");
+                if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER,
+                          &container->fd)) {
+                    error_report("vfio: error disconnecting group %d from"
+                                 " container", group->groupid);
+                }
+                return ret;
+            }
             group->container = container;
             QLIST_INSERT_HEAD(&container->group_list, group, container_next);
             vfio_kvm_device_add_group(group);
@@ -1768,14 +2057,22 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     container->fd = fd;
     container->error = NULL;
     container->dirty_pages_supported = false;
+    container->dma_max_mappings = 0;
     QLIST_INIT(&container->giommu_list);
     QLIST_INIT(&container->hostwin_list);
+    QLIST_INIT(&container->vrdl_list);
 
     ret = vfio_init_container(container, group->fd, errp);
     if (ret) {
         goto free_container_exit;
     }
 
+    ret = vfio_ram_block_discard_disable(container, true);
+    if (ret) {
+        error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+        goto free_container_exit;
+    }
+
     switch (container->iommu_type) {
     case VFIO_TYPE1v2_IOMMU:
     case VFIO_TYPE1_IOMMU:
@@ -1798,7 +2095,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
         vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes);
         container->pgsizes = info->iova_pgsizes;
 
+        /* The default in the kernel ("dma_entry_limit") is 65535. */
+        container->dma_max_mappings = 65535;
         if (!ret) {
+            vfio_get_info_dma_avail(info, &container->dma_max_mappings);
             vfio_get_iommu_info_migration(container, info);
         }
         g_free(info);
@@ -1820,7 +2120,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
             if (ret) {
                 error_setg_errno(errp, errno, "failed to enable container");
                 ret = -errno;
-                goto free_container_exit;
+                goto enable_discards_exit;
             }
         } else {
             container->prereg_listener = vfio_prereg_listener;
@@ -1832,7 +2132,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
                 ret = -1;
                 error_propagate_prepend(errp, container->error,
                     "RAM memory listener initialization failed: ");
-                goto free_container_exit;
+                goto enable_discards_exit;
             }
         }
 
@@ -1845,7 +2145,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
             if (v2) {
                 memory_listener_unregister(&container->prereg_listener);
             }
-            goto free_container_exit;
+            goto enable_discards_exit;
         }
 
         if (v2) {
@@ -1860,7 +2160,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
             if (ret) {
                 error_setg_errno(errp, -ret,
                                  "failed to remove existing window");
-                goto free_container_exit;
+                goto enable_discards_exit;
             }
         } else {
             /* The default table uses 4K pages */
@@ -1901,6 +2201,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     vfio_kvm_device_del_group(group);
     vfio_listener_release(container);
 
+enable_discards_exit:
+    vfio_ram_block_discard_disable(container, false);
+
 free_container_exit:
     g_free(container);
 
@@ -1908,7 +2211,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
     close(fd);
 
 put_space_exit:
-    ram_block_discard_disable(false);
     vfio_put_address_space(space);
 
     return ret;
@@ -1938,6 +2240,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
     if (QLIST_EMPTY(&container->group_list)) {
         VFIOAddressSpace *space = container->space;
         VFIOGuestIOMMU *giommu, *tmp;
+        VFIOHostDMAWindow *hostwin, *next;
 
         QLIST_REMOVE(container, next);
 
@@ -1948,6 +2251,12 @@ static void vfio_disconnect_container(VFIOGroup *group)
             g_free(giommu);
         }
 
+        QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
+                           next) {
+            QLIST_REMOVE(hostwin, hostwin_next);
+            g_free(hostwin);
+        }
+
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
         g_free(container);
@@ -2030,7 +2339,7 @@ void vfio_put_group(VFIOGroup *group)
     }
 
     if (!group->ram_block_discard_allowed) {
-        ram_block_discard_disable(false);
+        vfio_ram_block_discard_disable(group->container, false);
     }
     vfio_kvm_device_del_group(group);
     vfio_disconnect_container(group);
@@ -2084,7 +2393,7 @@ int vfio_get_device(VFIOGroup *group, const char *name,
 
         if (!group->ram_block_discard_allowed) {
             group->ram_block_discard_allowed = true;
-            ram_block_discard_disable(false);
+            vfio_ram_block_discard_disable(group->container, false);
         }
     }
 
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index f04473e3cec..89bc90508fb 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -14,7 +14,6 @@
 #include <linux/vfio.h>
 #include <sys/ioctl.h>
 
-#include "sysemu/sysemu.h"
 #include "hw/display/edid.h"
 #include "ui/console.h"
 #include "qapi/error.h"
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index 470205f487e..d4685709a3b 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -557,7 +557,7 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * must allocate a 1MB aligned reserved memory region below 4GB with
      * the requested size (in bytes) for use by the Intel PCI class VGA
      * device at VM address 00:02.0.  The base address of this reserved
-     * memory region must be written to the device BDSM regsiter at PCI
+     * memory region must be written to the device BDSM register at PCI
      * config offset 0x5C.
      */
     bdsm_size = g_malloc(sizeof(*bdsm_size));
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 384576cfc05..ff6b45de6b5 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -15,7 +15,6 @@
 
 #include "sysemu/runstate.h"
 #include "hw/vfio/vfio-common.h"
-#include "cpu.h"
 #include "migration/migration.h"
 #include "migration/vmstate.h"
 #include "migration/qemu-file.h"
@@ -725,7 +724,16 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
          * _RUNNING bit
          */
         mask = ~VFIO_DEVICE_STATE_RUNNING;
-        value = 0;
+
+        /*
+         * When VM state transition to stop for savevm command, device should
+         * start saving data.
+         */
+        if (state == RUN_STATE_SAVE_VM) {
+            value = VFIO_DEVICE_STATE_SAVING;
+        } else {
+            value = 0;
+        }
     }
 
     ret = vfio_migration_set_state(vbasedev, mask, value);
@@ -850,7 +858,6 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
     VFIOContainer *container = vbasedev->group->container;
     struct vfio_region_info *info = NULL;
-    Error *local_err = NULL;
     int ret = -ENOTSUP;
 
     if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
@@ -877,9 +884,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
                "VFIO device doesn't support migration");
     g_free(info);
 
-    ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
+    if (ret < 0) {
         error_free(vbasedev->migration_blocker);
         vbasedev->migration_blocker = NULL;
     }
@@ -893,6 +899,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev)
 
         remove_migration_state_change_notifier(&migration->migration_state);
         qemu_del_vm_change_state_handler(migration->vm_state);
+        unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
         vfio_migration_exit(vbasedev);
     }
 
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index b90cf3d37c3..0cf69a8c6d6 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -22,7 +22,6 @@
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include <sys/ioctl.h>
-#include "hw/hw.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/qdev-properties.h"
 #include "pci.h"
@@ -1357,7 +1356,7 @@ static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
 /*
  * The scope of a config reset is controlled by a mode bit in the misc register
  * and a fuse, exposed as a bit in another register.  The fuse is the default
- * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
+ * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the formula
  * scope = !(misc ^ fuse), where the resulting scope is defined the same as
  * the fuse.  A truth table therefore tells us that if misc == fuse, we need
  * to flip the value of the bit in the misc register.
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5c65aa0a98e..7b45353ce27 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,15 +29,14 @@
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
 #include "migration/vmstate.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
-#include "qemu/option.h"
 #include "qemu/range.h"
 #include "qemu/units.h"
 #include "sysemu/kvm.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
@@ -942,7 +941,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
     }
 
     if (vfio_opt_rom_in_denylist(vdev)) {
-        if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
+        if (dev->opts && qdict_haskey(dev->opts, "rombar")) {
             warn_report("Device at %s is known to cause system instability"
                         " issues during option rom execution",
                         vdev->vbasedev.name);
@@ -1365,7 +1364,7 @@ static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp)
          * TODO: Lookup table for known devices.
          *
          * Logically we might use an algorithm here to select the BAR adding
-         * the least additional MMIO space, but we cannot programatically
+         * the least additional MMIO space, but we cannot programmatically
          * predict the driver dependency on BAR ordering or sizing, therefore
          * 'auto' becomes a lookup for combinations reported to work.
          */
@@ -1500,6 +1499,14 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
         if (vdev->vendor_id == PCI_VENDOR_ID_CHELSIO &&
             (vdev->device_id & 0xff00) == 0x5800) {
             msix->pba_offset = 0x1000;
+        /*
+         * BAIDU KUNLUN Virtual Function devices for KUNLUN AI processor
+         * return an incorrect value of 0x460000 for the VF PBA offset while
+         * the BAR itself is only 0x10000.  The correct value is 0xb400.
+         */
+        } else if (vfio_pci_is(vdev, PCI_VENDOR_ID_BAIDU,
+                               PCI_DEVICE_ID_KUNLUN_VF)) {
+            msix->pba_offset = 0xb400;
         } else if (vdev->msix_relo == OFF_AUTOPCIBAR_OFF) {
             error_setg(errp, "hardware reports invalid configuration, "
                        "MSIX PBA outside of specified BAR");
@@ -2151,7 +2158,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
     }
 
     /*
-     * Stop any ongoing DMA by disconecting I/O, MMIO, and bus master.
+     * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
      * Also put INTx Disable in known state.
      */
     cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
@@ -2377,7 +2384,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 }
 
 /*
- * We want to differentiate hot reset of mulitple in-use devices vs hot reset
+ * We want to differentiate hot reset of multiple in-use devices vs hot reset
  * of a single in-use device.  VFIO_DEVICE_RESET will already handle the case
  * of doing hot resets when there is only a single device per bus.  The in-use
  * here refers to how many VFIODevices are affected.  A hot reset that affects
@@ -2446,7 +2453,12 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
 {
     VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
     PCIDevice *pdev = &vdev->pdev;
-    int ret;
+    pcibus_t old_addr[PCI_NUM_REGIONS - 1];
+    int bar, ret;
+
+    for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+        old_addr[bar] = pdev->io_regions[bar].addr;
+    }
 
     ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
     if (ret) {
@@ -2456,6 +2468,18 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
     vfio_pci_write_config(pdev, PCI_COMMAND,
                           pci_get_word(pdev->config + PCI_COMMAND), 2);
 
+    for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+        /*
+         * The address may not be changed in some scenarios
+         * (e.g. the VF driver isn't loaded in VM).
+         */
+        if (old_addr[bar] != pdev->io_regions[bar].addr &&
+            vdev->bars[bar].region.size > 0 &&
+            vdev->bars[bar].region.size < qemu_real_host_page_size) {
+            vfio_sub_page_bar_update_mapping(pdev, bar);
+        }
+    }
+
     if (msi_enabled(pdev)) {
         vfio_msi_enable(vdev);
     } else if (msix_enabled(pdev)) {
@@ -3059,14 +3083,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         }
     }
 
-    if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) {
+    if (vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
         ret = vfio_pci_nvidia_v100_ram_init(vdev, errp);
         if (ret && ret != -ENODEV) {
             error_report("Failed to setup NVIDIA V100 GPU RAM");
         }
     }
 
-    if (vdev->vendor_id == PCI_VENDOR_ID_IBM) {
+    if (vfio_pci_is(vdev, PCI_VENDOR_ID_IBM, PCI_ANY_ID)) {
         ret = vfio_pci_nvlink2_init(vdev, errp);
         if (ret && ret != -ENODEV) {
             error_report("Failed to setup NVlink2 bridge");
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index cc3f66f7e44..f8f08a0f362 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -156,7 +156,7 @@ static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
  * if there is no more active IRQ
  * @opaque: actually points to the VFIO platform device
  *
- * Called on mmap timer timout, this function checks whether the
+ * Called on mmap timer timeout, this function checks whether the
  * IRQ is still active and if not, restores the fast path.
  * by construction a single eventfd is handled at a time.
  * if the IRQ is still active, the timer is re-programmed.
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 2900bd19417..04c6e67f8fb 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -9,7 +9,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
 
@@ -137,6 +136,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener,
 }
 
 const MemoryListener vfio_prereg_listener = {
+    .name = "vfio-pre-reg",
     .region_add = vfio_prereg_listener_region_add,
     .region_del = vfio_prereg_listener_region_del,
 };
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 079f53acf28..0ef1b5f4a65 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pci.c
 vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 0eda25c4e1b..c144d42f9bd 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -58,3 +58,13 @@ config VIRTIO_MEM
     depends on LINUX
     depends on VIRTIO_MEM_SUPPORTED
     select MEM_DEVICE
+
+config VHOST_USER_I2C
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
+
+config VHOST_USER_RNG
+    bool
+    default y
+    depends on VIRTIO && VHOST_USER
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index fbff9bc9d4d..521f7d64a86 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -25,6 +25,10 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.
 virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c'))
 virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c'))
+virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c'))
+virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c'))
+virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c'))
 
 virtio_pci_ss = ss.source_set()
 virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index c62727f8793..650e521e351 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # vhost.c
 vhost_commit(bool started, bool changed) "Started: %d Changed: %d"
@@ -52,6 +52,7 @@ vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p index:
 vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64
 vhost_vdpa_set_owner(void *dev) "dev: %p"
 vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
+vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64
 
 # virtio.c
 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 31b33bde37b..b65f8f7e97b 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -24,13 +24,15 @@ static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
                              void *arg)
 {
     int fd = (uintptr_t) dev->opaque;
+    int ret;
 
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
 
-    return ioctl(fd, request, arg);
+    ret = ioctl(fd, request, arg);
+    return ret < 0 ? -errno : ret;
 }
 
-static int vhost_kernel_init(struct vhost_dev *dev, void *opaque)
+static int vhost_kernel_init(struct vhost_dev *dev, void *opaque, Error **errp)
 {
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
 
@@ -291,7 +293,7 @@ static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev,
         qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL);
 }
 
-static const VhostOps kernel_ops = {
+const VhostOps kernel_ops = {
         .backend_type = VHOST_BACKEND_TYPE_KERNEL,
         .vhost_backend_init = vhost_kernel_init,
         .vhost_backend_cleanup = vhost_kernel_cleanup,
@@ -326,34 +328,6 @@ static const VhostOps kernel_ops = {
 };
 #endif
 
-int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
-{
-    int r = 0;
-
-    switch (backend_type) {
-#ifdef CONFIG_VHOST_KERNEL
-    case VHOST_BACKEND_TYPE_KERNEL:
-        dev->vhost_ops = &kernel_ops;
-        break;
-#endif
-#ifdef CONFIG_VHOST_USER
-    case VHOST_BACKEND_TYPE_USER:
-        dev->vhost_ops = &user_ops;
-        break;
-#endif
-#ifdef CONFIG_VHOST_VDPA
-    case VHOST_BACKEND_TYPE_VDPA:
-        dev->vhost_ops = &vdpa_ops;
-        break;
-#endif
-    default:
-        error_report("Unknown vhost backend type");
-        r = -1;
-    }
-
-    return r;
-}
-
 int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
                                              uint64_t iova, uint64_t uaddr,
                                              uint64_t len,
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 6f7f91533d2..c5959579839 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -235,9 +235,8 @@ static void vuf_device_realize(DeviceState *dev, Error **errp)
     fs->vhost_dev.nvqs = 1 + fs->conf.num_request_queues;
     fs->vhost_dev.vqs = g_new0(struct vhost_virtqueue, fs->vhost_dev.nvqs);
     ret = vhost_dev_init(&fs->vhost_dev, &fs->vhost_user,
-                         VHOST_BACKEND_TYPE_USER, 0);
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "vhost_dev_init failed");
         goto err_virtio;
     }
 
diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c
new file mode 100644
index 00000000000..70b7b65fd97
--- /dev/null
+++ b/hw/virtio/vhost-user-i2c-pci.c
@@ -0,0 +1,69 @@
+/*
+ * Vhost-user i2c virtio device PCI glue
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-i2c.h"
+#include "virtio-pci.h"
+
+struct VHostUserI2CPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserI2C vdev;
+};
+
+typedef struct VHostUserI2CPCI VHostUserI2CPCI;
+
+#define TYPE_VHOST_USER_I2C_PCI "vhost-user-i2c-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserI2CPCI, VHOST_USER_I2C_PCI,
+                         TYPE_VHOST_USER_I2C_PCI)
+
+static void vhost_user_i2c_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    vpci_dev->nvectors = 1;
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_i2c_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_i2c_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_user_i2c_pci_instance_init(Object *obj)
+{
+    VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_I2C);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_i2c_pci_info = {
+    .base_name = TYPE_VHOST_USER_I2C_PCI,
+    .non_transitional_name = "vhost-user-i2c-pci",
+    .instance_size = sizeof(VHostUserI2CPCI),
+    .instance_init = vhost_user_i2c_pci_instance_init,
+    .class_init = vhost_user_i2c_pci_class_init,
+};
+
+static void vhost_user_i2c_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_i2c_pci_info);
+}
+
+type_init(vhost_user_i2c_pci_register);
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
new file mode 100644
index 00000000000..d172632bb0c
--- /dev/null
+++ b/hw/virtio/vhost-user-i2c.c
@@ -0,0 +1,288 @@
+/*
+ * Vhost-user i2c virtio device
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-i2c.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+/* Remove this once the header is updated in Linux kernel */
+#ifndef VIRTIO_ID_I2C_ADAPTER
+#define VIRTIO_ID_I2C_ADAPTER                34
+#endif
+
+static void vu_i2c_start(VirtIODevice *vdev)
+{
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    int ret, i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&i2c->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    i2c->vhost_dev.acked_features = vdev->guest_features;
+
+    ret = vhost_dev_start(&i2c->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error starting vhost-user-i2c: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here. virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < i2c->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&i2c->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev);
+}
+
+static void vu_i2c_stop(VirtIODevice *vdev)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&i2c->vhost_dev, vdev);
+
+    ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev);
+}
+
+static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+    bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+    if (!vdev->vm_running) {
+        should_start = false;
+    }
+
+    if (i2c->vhost_dev.started == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vu_i2c_start(vdev);
+    } else {
+        vu_i2c_stop(vdev);
+    }
+}
+
+static uint64_t vu_i2c_get_features(VirtIODevice *vdev,
+                                    uint64_t requested_features, Error **errp)
+{
+    /* No feature bits used yet */
+    return requested_features;
+}
+
+static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask);
+}
+
+static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    return vhost_virtqueue_pending(&i2c->vhost_dev, idx);
+}
+
+static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c)
+{
+    vhost_user_cleanup(&i2c->vhost_user);
+    virtio_delete_queue(i2c->vq);
+    virtio_cleanup(vdev);
+    g_free(i2c->vhost_dev.vqs);
+    i2c->vhost_dev.vqs = NULL;
+}
+
+static int vu_i2c_connect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    if (i2c->connected) {
+        return 0;
+    }
+    i2c->connected = true;
+
+    /* restore vhost state */
+    if (virtio_device_started(vdev, vdev->status)) {
+        vu_i2c_start(vdev);
+    }
+
+    return 0;
+}
+
+static void vu_i2c_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    if (!i2c->connected) {
+        return;
+    }
+    i2c->connected = false;
+
+    if (i2c->vhost_dev.started) {
+        vu_i2c_stop(vdev);
+    }
+}
+
+static void vu_i2c_event(void *opaque, QEMUChrEvent event)
+{
+    DeviceState *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (vu_i2c_connect(dev) < 0) {
+            qemu_chr_fe_disconnect(&i2c->chardev);
+            return;
+        }
+        break;
+    case CHR_EVENT_CLOSED:
+        vu_i2c_disconnect(dev);
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+static void vu_i2c_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(dev);
+    int ret;
+
+    if (!i2c->chardev.chr) {
+        error_setg(errp, "vhost-user-i2c: missing chardev");
+        return;
+    }
+
+    if (!vhost_user_init(&i2c->vhost_user, &i2c->chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0);
+
+    i2c->vhost_dev.nvqs = 1;
+    i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output);
+    i2c->vhost_dev.vqs = g_new0(struct vhost_virtqueue, i2c->vhost_dev.nvqs);
+
+    ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        do_vhost_user_cleanup(vdev, i2c);
+    }
+
+    qemu_chr_fe_set_handlers(&i2c->chardev, NULL, NULL, vu_i2c_event, NULL,
+                             dev, NULL, true);
+}
+
+static void vu_i2c_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserI2C *i2c = VHOST_USER_I2C(dev);
+
+    /* This will stop vhost backend if appropriate. */
+    vu_i2c_set_status(vdev, 0);
+    vhost_dev_cleanup(&i2c->vhost_dev);
+    do_vhost_user_cleanup(vdev, i2c);
+}
+
+static const VMStateDescription vu_i2c_vmstate = {
+    .name = "vhost-user-i2c",
+    .unmigratable = 1,
+};
+
+static Property vu_i2c_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserI2C, chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vu_i2c_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vu_i2c_properties);
+    dc->vmsd = &vu_i2c_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    vdc->realize = vu_i2c_device_realize;
+    vdc->unrealize = vu_i2c_device_unrealize;
+    vdc->get_features = vu_i2c_get_features;
+    vdc->set_status = vu_i2c_set_status;
+    vdc->guest_notifier_mask = vu_i2c_guest_notifier_mask;
+    vdc->guest_notifier_pending = vu_i2c_guest_notifier_pending;
+}
+
+static const TypeInfo vu_i2c_info = {
+    .name = TYPE_VHOST_USER_I2C,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserI2C),
+    .class_init = vu_i2c_class_init,
+};
+
+static void vu_i2c_register_types(void)
+{
+    type_register_static(&vu_i2c_info);
+}
+
+type_init(vu_i2c_register_types)
diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c
new file mode 100644
index 00000000000..c83dc868138
--- /dev/null
+++ b/hw/virtio/vhost-user-rng-pci.c
@@ -0,0 +1,79 @@
+/*
+ * Vhost-user RNG virtio device PCI glue
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-rng.h"
+#include "virtio-pci.h"
+
+struct VHostUserRNGPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostUserRNG vdev;
+};
+
+typedef struct VHostUserRNGPCI VHostUserRNGPCI;
+
+#define TYPE_VHOST_USER_RNG_PCI "vhost-user-rng-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserRNGPCI, VHOST_USER_RNG_PCI,
+                         TYPE_VHOST_USER_RNG_PCI)
+
+static Property vhost_user_rng_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+                       DEV_NVECTORS_UNSPECIFIED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+        vpci_dev->nvectors = 1;
+    }
+
+    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+}
+
+static void vhost_user_rng_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_user_rng_pci_realize;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+    device_class_set_props(dc, vhost_user_rng_pci_properties);
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void vhost_user_rng_pci_instance_init(Object *obj)
+{
+    VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_USER_RNG);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_rng_pci_info = {
+    .base_name = TYPE_VHOST_USER_RNG_PCI,
+    .non_transitional_name = "vhost-user-rng-pci",
+    .instance_size = sizeof(VHostUserRNGPCI),
+    .instance_init = vhost_user_rng_pci_instance_init,
+    .class_init = vhost_user_rng_pci_class_init,
+};
+
+static void vhost_user_rng_pci_register(void)
+{
+    virtio_pci_types_register(&vhost_user_rng_pci_info);
+}
+
+type_init(vhost_user_rng_pci_register);
diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c
new file mode 100644
index 00000000000..209ee5bf9ac
--- /dev/null
+++ b/hw/virtio/vhost-user-rng.c
@@ -0,0 +1,289 @@
+/*
+ * Vhost-user RNG virtio device
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * Implementation seriously tailored on vhost-user-i2c.c
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-rng.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+static void vu_rng_start(VirtIODevice *vdev)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+    int i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&rng->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    rng->vhost_dev.acked_features = vdev->guest_features;
+    ret = vhost_dev_start(&rng->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error starting vhost-user-rng: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    /*
+     * guest_notifier_mask/pending not used yet, so just unmask
+     * everything here. virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < rng->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&rng->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&rng->vhost_dev, vdev);
+}
+
+static void vu_rng_stop(VirtIODevice *vdev)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    vhost_dev_stop(&rng->vhost_dev, vdev);
+
+    ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&rng->vhost_dev, vdev);
+}
+
+static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+    bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+    if (!vdev->vm_running) {
+        should_start = false;
+    }
+
+    if (rng->vhost_dev.started == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vu_rng_start(vdev);
+    } else {
+        vu_rng_stop(vdev);
+    }
+}
+
+static uint64_t vu_rng_get_features(VirtIODevice *vdev,
+                                    uint64_t requested_features, Error **errp)
+{
+    /* No feature bits used yet */
+    return requested_features;
+}
+
+static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /*
+     * Not normally called; it's the daemon that handles the queue;
+     * however virtio's cleanup path can call this.
+     */
+}
+
+static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask);
+}
+
+static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    return vhost_virtqueue_pending(&rng->vhost_dev, idx);
+}
+
+static void vu_rng_connect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    if (rng->connected) {
+        return;
+    }
+
+    rng->connected = true;
+
+    /* restore vhost state */
+    if (virtio_device_started(vdev, vdev->status)) {
+        vu_rng_start(vdev);
+    }
+}
+
+static void vu_rng_disconnect(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(vdev);
+
+    if (!rng->connected) {
+        return;
+    }
+
+    rng->connected = false;
+
+    if (rng->vhost_dev.started) {
+        vu_rng_stop(vdev);
+    }
+}
+
+static void vu_rng_event(void *opaque, QEMUChrEvent event)
+{
+    DeviceState *dev = opaque;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        vu_rng_connect(dev);
+        break;
+    case CHR_EVENT_CLOSED:
+        vu_rng_disconnect(dev);
+        break;
+    case CHR_EVENT_BREAK:
+    case CHR_EVENT_MUX_IN:
+    case CHR_EVENT_MUX_OUT:
+        /* Ignore */
+        break;
+    }
+}
+
+static void vu_rng_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(dev);
+    int ret;
+
+    if (!rng->chardev.chr) {
+        error_setg(errp, "missing chardev");
+        return;
+    }
+
+    if (!vhost_user_init(&rng->vhost_user, &rng->chardev, errp)) {
+        return;
+    }
+
+    virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0);
+
+    rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output);
+    if (!rng->req_vq) {
+        error_setg_errno(errp, -1, "virtio_add_queue() failed");
+        goto virtio_add_queue_failed;
+    }
+
+    rng->vhost_dev.nvqs = 1;
+    rng->vhost_dev.vqs = g_new0(struct vhost_virtqueue, rng->vhost_dev.nvqs);
+    ret = vhost_dev_init(&rng->vhost_dev, &rng->vhost_user,
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost_dev_init() failed");
+        goto vhost_dev_init_failed;
+    }
+
+    qemu_chr_fe_set_handlers(&rng->chardev, NULL, NULL, vu_rng_event, NULL,
+                             dev, NULL, true);
+
+    return;
+
+vhost_dev_init_failed:
+    virtio_delete_queue(rng->req_vq);
+virtio_add_queue_failed:
+    virtio_cleanup(vdev);
+    vhost_user_cleanup(&rng->vhost_user);
+}
+
+static void vu_rng_device_unrealize(DeviceState *dev)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostUserRNG *rng = VHOST_USER_RNG(dev);
+
+    vu_rng_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&rng->vhost_dev);
+    g_free(rng->vhost_dev.vqs);
+    rng->vhost_dev.vqs = NULL;
+    virtio_delete_queue(rng->req_vq);
+    virtio_cleanup(vdev);
+    vhost_user_cleanup(&rng->vhost_user);
+}
+
+static const VMStateDescription vu_rng_vmstate = {
+    .name = "vhost-user-rng",
+    .unmigratable = 1,
+};
+
+static Property vu_rng_properties[] = {
+    DEFINE_PROP_CHR("chardev", VHostUserRNG, chardev),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vu_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    device_class_set_props(dc, vu_rng_properties);
+    dc->vmsd = &vu_rng_vmstate;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+
+    vdc->realize = vu_rng_device_realize;
+    vdc->unrealize = vu_rng_device_unrealize;
+    vdc->get_features = vu_rng_get_features;
+    vdc->set_status = vu_rng_set_status;
+    vdc->guest_notifier_mask = vu_rng_guest_notifier_mask;
+    vdc->guest_notifier_pending = vu_rng_guest_notifier_pending;
+}
+
+static const TypeInfo vu_rng_info = {
+    .name = TYPE_VHOST_USER_RNG,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostUserRNG),
+    .class_init = vu_rng_class_init,
+};
+
+static void vu_rng_register_types(void)
+{
+    type_register_static(&vu_rng_info);
+}
+
+type_init(vu_rng_register_types)
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index a6f08c26b9a..52bd682c34d 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -34,10 +34,12 @@ static void vuv_get_config(VirtIODevice *vdev, uint8_t *config)
 static int vuv_handle_config_change(struct vhost_dev *dev)
 {
     VHostUserVSock *vsock = VHOST_USER_VSOCK(dev->vdev);
+    Error *local_err = NULL;
     int ret = vhost_dev_get_config(dev, (uint8_t *)&vsock->vsockcfg,
-                                   sizeof(struct virtio_vsock_config));
+                                   sizeof(struct virtio_vsock_config),
+                                   &local_err);
     if (ret < 0) {
-        error_report("get config space failed");
+        error_report_err(local_err);
         return -1;
     }
 
@@ -79,7 +81,9 @@ static uint64_t vuv_get_features(VirtIODevice *vdev,
 {
     VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
 
-    return vhost_get_features(&vvc->vhost_dev, user_feature_bits, features);
+    features = vhost_get_features(&vvc->vhost_dev, user_feature_bits, features);
+
+    return vhost_vsock_common_get_features(vdev, features, errp);
 }
 
 static const VMStateDescription vuv_vmstate = {
@@ -108,16 +112,14 @@ static void vuv_device_realize(DeviceState *dev, Error **errp)
     vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops);
 
     ret = vhost_dev_init(&vvc->vhost_dev, &vsock->vhost_user,
-                         VHOST_BACKEND_TYPE_USER, 0);
+                         VHOST_BACKEND_TYPE_USER, 0, errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "vhost_dev_init failed");
         goto err_virtio;
     }
 
     ret = vhost_dev_get_config(&vvc->vhost_dev, (uint8_t *)&vsock->vsockcfg,
-                               sizeof(struct virtio_vsock_config));
+                               sizeof(struct virtio_vsock_config), errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "get config space failed");
         goto err_vhost_dev;
     }
 
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ded0c104530..bf6e50223cb 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -303,7 +303,7 @@ struct vhost_user_read_cb_data {
     int ret;
 };
 
-static gboolean vhost_user_read_cb(GIOChannel *source, GIOCondition condition,
+static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
                                    gpointer opaque)
 {
     struct vhost_user_read_cb_data *data = opaque;
@@ -429,7 +429,7 @@ static int process_message_reply(struct vhost_dev *dev,
     }
 
     if (msg_reply.hdr.request != msg->hdr.request) {
-        error_report("Received unexpected msg type."
+        error_report("Received unexpected msg type. "
                      "Expected %d received %d",
                      msg->hdr.request, msg_reply.hdr.request);
         return -1;
@@ -1095,23 +1095,6 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
     return 0;
 }
 
-static int vhost_user_set_vring_addr(struct vhost_dev *dev,
-                                     struct vhost_vring_addr *addr)
-{
-    VhostUserMsg msg = {
-        .hdr.request = VHOST_USER_SET_VRING_ADDR,
-        .hdr.flags = VHOST_USER_VERSION,
-        .payload.addr = *addr,
-        .hdr.size = sizeof(msg.payload.addr),
-    };
-
-    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
-    }
-
-    return 0;
-}
-
 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
                                        struct vhost_vring_state *ring)
 {
@@ -1288,72 +1271,150 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev,
     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
 }
 
-static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
+
+static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
 {
     VhostUserMsg msg = {
         .hdr.request = request,
         .hdr.flags = VHOST_USER_VERSION,
-        .payload.u64 = u64,
-        .hdr.size = sizeof(msg.payload.u64),
     };
 
+    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
+        return 0;
+    }
+
     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
         return -1;
     }
 
+    if (vhost_user_read(dev, &msg) < 0) {
+        return -1;
+    }
+
+    if (msg.hdr.request != request) {
+        error_report("Received unexpected msg type. Expected %d received %d",
+                     request, msg.hdr.request);
+        return -1;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.u64)) {
+        error_report("Received bad msg size.");
+        return -1;
+    }
+
+    *u64 = msg.payload.u64;
+
     return 0;
 }
 
-static int vhost_user_set_features(struct vhost_dev *dev,
-                                   uint64_t features)
+static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
 {
-    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
+    if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
+        return -EPROTO;
+    }
+
+    return 0;
 }
 
-static int vhost_user_set_protocol_features(struct vhost_dev *dev,
-                                            uint64_t features)
+static int enforce_reply(struct vhost_dev *dev,
+                         const VhostUserMsg *msg)
 {
-    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
+    uint64_t dummy;
+
+    if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+        return process_message_reply(dev, msg);
+    }
+
+   /*
+    * We need to wait for a reply but the backend does not
+    * support replies for the command we just sent.
+    * Send VHOST_USER_GET_FEATURES which makes all backends
+    * send a reply.
+    */
+    return vhost_user_get_features(dev, &dummy);
 }
 
-static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
+static int vhost_user_set_vring_addr(struct vhost_dev *dev,
+                                     struct vhost_vring_addr *addr)
 {
     VhostUserMsg msg = {
-        .hdr.request = request,
+        .hdr.request = VHOST_USER_SET_VRING_ADDR,
         .hdr.flags = VHOST_USER_VERSION,
+        .payload.addr = *addr,
+        .hdr.size = sizeof(msg.payload.addr),
     };
 
-    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
-        return 0;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    /*
+     * wait for a reply if logging is enabled to make sure
+     * backend is actually logging changes
+     */
+    bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
+
+    if (reply_supported && wait_for_reply) {
+        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
     }
 
     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
         return -1;
     }
 
-    if (vhost_user_read(dev, &msg) < 0) {
-        return -1;
+    if (wait_for_reply) {
+        return enforce_reply(dev, &msg);
     }
 
-    if (msg.hdr.request != request) {
-        error_report("Received unexpected msg type. Expected %d received %d",
-                     request, msg.hdr.request);
-        return -1;
+    return 0;
+}
+
+static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
+                              bool wait_for_reply)
+{
+    VhostUserMsg msg = {
+        .hdr.request = request,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.u64 = u64,
+        .hdr.size = sizeof(msg.payload.u64),
+    };
+
+    if (wait_for_reply) {
+        bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                          VHOST_USER_PROTOCOL_F_REPLY_ACK);
+        if (reply_supported) {
+            msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+        }
     }
 
-    if (msg.hdr.size != sizeof(msg.payload.u64)) {
-        error_report("Received bad msg size.");
+    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
         return -1;
     }
 
-    *u64 = msg.payload.u64;
+    if (wait_for_reply) {
+        return enforce_reply(dev, &msg);
+    }
 
     return 0;
 }
 
-static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
+static int vhost_user_set_features(struct vhost_dev *dev,
+                                   uint64_t features)
+{
+    /*
+     * wait for a reply if logging is enabled to make sure
+     * backend is actually logging changes
+     */
+    bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
+
+    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features,
+                              log_enabled);
+}
+
+static int vhost_user_set_protocol_features(struct vhost_dev *dev,
+                                            uint64_t features)
 {
-    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
+    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
+                              false);
 }
 
 static int vhost_user_set_owner(struct vhost_dev *dev)
@@ -1364,7 +1425,7 @@ static int vhost_user_set_owner(struct vhost_dev *dev)
     };
 
     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+        return -EPROTO;
     }
 
     return 0;
@@ -1465,11 +1526,13 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
 
     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
                            user, queue_idx);
-    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
-                                      page_size, addr);
+    if (!n->mr.ram) /* Don't init again after suspend. */
+        memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
+                                          page_size, addr);
     g_free(name);
 
     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
+        object_unparent(OBJECT(&n->mr));
         munmap(addr, page_size);
         return -1;
     }
@@ -1856,7 +1919,8 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
     return 0;
 }
 
-static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
+static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
+                                   Error **errp)
 {
     uint64_t features, protocol_features, ram_slots;
     struct vhost_user *u;
@@ -1871,6 +1935,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
 
     err = vhost_user_get_features(dev, &features);
     if (err < 0) {
+        error_setg_errno(errp, -err, "vhost_backend_init failed");
         return err;
     }
 
@@ -1880,7 +1945,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
                                  &protocol_features);
         if (err < 0) {
-            return err;
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
         }
 
         dev->protocol_features =
@@ -1891,14 +1957,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
             dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
         } else if (!(protocol_features &
                     (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
-            error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
-                    "but backend does not support it.");
-            return -1;
+            error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG "
+                       "but backend does not support it.");
+            return -EINVAL;
         }
 
         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
         if (err < 0) {
-            return err;
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
         }
 
         /* query the max queues we support if backend supports Multiple Queue */
@@ -1906,8 +1973,17 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
                                      &dev->max_queues);
             if (err < 0) {
-                return err;
+                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+                return -EPROTO;
             }
+        } else {
+            dev->max_queues = 1;
+        }
+
+        if (dev->num_queues && dev->max_queues < dev->num_queues) {
+            error_setg(errp, "The maximum number of queues supported by the "
+                       "backend is %" PRIu64, dev->max_queues);
+            return -EINVAL;
         }
 
         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
@@ -1915,9 +1991,9 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
                     VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
                  virtio_has_feature(dev->protocol_features,
                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
-            error_report("IOMMU support requires reply-ack and "
-                         "slave-req protocol features.");
-            return -1;
+            error_setg(errp, "IOMMU support requires reply-ack and "
+                       "slave-req protocol features.");
+            return -EINVAL;
         }
 
         /* get max memory regions if backend supports configurable RAM slots */
@@ -1927,15 +2003,16 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
         } else {
             err = vhost_user_get_max_memslots(dev, &ram_slots);
             if (err < 0) {
-                return err;
+                error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+                return -EPROTO;
             }
 
             if (ram_slots < u->user->memory_slots) {
-                error_report("The backend specified a max ram slots limit "
-                             "of %" PRIu64", when the prior validated limit was %d. "
-                             "This limit should never decrease.", ram_slots,
-                             u->user->memory_slots);
-                return -1;
+                error_setg(errp, "The backend specified a max ram slots limit "
+                           "of %" PRIu64", when the prior validated limit was "
+                           "%d. This limit should never decrease.", ram_slots,
+                           u->user->memory_slots);
+                return -EINVAL;
             }
 
             u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
@@ -1953,7 +2030,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
     if (dev->vq_index == 0) {
         err = vhost_setup_slave_channel(dev);
         if (err < 0) {
-            return err;
+            error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
+            return -EPROTO;
         }
     }
 
@@ -2107,7 +2185,7 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
 }
 
 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
-                                 uint32_t config_len)
+                                 uint32_t config_len, Error **errp)
 {
     VhostUserMsg msg = {
         .hdr.request = VHOST_USER_GET_CONFIG,
@@ -2117,32 +2195,34 @@ static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
 
     if (!virtio_has_feature(dev->protocol_features,
                 VHOST_USER_PROTOCOL_F_CONFIG)) {
-        return -1;
+        error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
+        return -EINVAL;
     }
 
-    if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
-        return -1;
-    }
+    assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
 
     msg.payload.config.offset = 0;
     msg.payload.config.size = config_len;
     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
-        return -1;
+        error_setg_errno(errp, EPROTO, "vhost_get_config failed");
+        return -EPROTO;
     }
 
     if (vhost_user_read(dev, &msg) < 0) {
-        return -1;
+        error_setg_errno(errp, EPROTO, "vhost_get_config failed");
+        return -EPROTO;
     }
 
     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
-        error_report("Received unexpected msg type. Expected %d received %d",
-                     VHOST_USER_GET_CONFIG, msg.hdr.request);
-        return -1;
+        error_setg(errp,
+                   "Received unexpected msg type. Expected %d received %d",
+                   VHOST_USER_GET_CONFIG, msg.hdr.request);
+        return -EINVAL;
     }
 
     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
-        error_report("Received bad msg size.");
-        return -1;
+        error_setg(errp, "Received bad msg size.");
+        return -EINVAL;
     }
 
     memcpy(config, msg.payload.config.region, config_len);
@@ -2401,7 +2481,7 @@ void vhost_user_cleanup(VhostUserState *user)
     if (!user->chr) {
         return;
     }
-
+    memory_region_transaction_begin();
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         if (user->notifier[i].addr) {
             object_unparent(OBJECT(&user->notifier[i].mr));
@@ -2409,6 +2489,7 @@ void vhost_user_cleanup(VhostUserState *user)
             user->notifier[i].addr = NULL;
         }
     }
+    memory_region_transaction_commit();
     user->chr = NULL;
 }
 
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 01d2101d097..bcaf00e09f3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -18,22 +18,55 @@
 #include "hw/virtio/vhost-backend.h"
 #include "hw/virtio/virtio-net.h"
 #include "hw/virtio/vhost-vdpa.h"
+#include "exec/address-spaces.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
 #include "trace.h"
 #include "qemu-common.h"
 
-static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
+/*
+ * Return one past the end of the end of section. Be careful with uint64_t
+ * conversions!
+ */
+static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section)
 {
-    return (!memory_region_is_ram(section->mr) &&
-            !memory_region_is_iommu(section->mr)) ||
-           /*
-            * Sizing an enabled 64-bit BAR can cause spurious mappings to
-            * addresses in the upper part of the 64-bit address space.  These
-            * are never accessed by the CPU and beyond the address width of
-            * some IOMMU hardware.  TODO: VDPA should tell us the IOMMU width.
-            */
-           section->offset_within_address_space & (1ULL << 63);
+    Int128 llend = int128_make64(section->offset_within_address_space);
+    llend = int128_add(llend, section->size);
+    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+
+    return llend;
+}
+
+static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
+                                                uint64_t iova_min,
+                                                uint64_t iova_max)
+{
+    Int128 llend;
+
+    if ((!memory_region_is_ram(section->mr) &&
+         !memory_region_is_iommu(section->mr)) ||
+        memory_region_is_protected(section->mr) ||
+        /* vhost-vDPA doesn't allow MMIO to be mapped  */
+        memory_region_is_ram_device(section->mr)) {
+        return true;
+    }
+
+    if (section->offset_within_address_space < iova_min) {
+        error_report("RAM section out of device range (min=0x%" PRIx64
+                     ", addr=0x%" HWADDR_PRIx ")",
+                     iova_min, section->offset_within_address_space);
+        return true;
+    }
+
+    llend = vhost_vdpa_section_end(section);
+    if (int128_gt(llend, int128_make64(iova_max))) {
+        error_report("RAM section out of device range (max=0x%" PRIx64
+                     ", end addr=0x%" PRIx64 ")",
+                     iova_max, int128_get64(llend));
+        return true;
+    }
+
+    return false;
 }
 
 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
@@ -86,19 +119,13 @@ static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
     return ret;
 }
 
-static void vhost_vdpa_listener_begin(MemoryListener *listener)
+static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v)
 {
-    struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
-    struct vhost_dev *dev = v->dev;
-    struct vhost_msg_v2 msg = {};
     int fd = v->device_fd;
-
-    if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
-        return;
-    }
-
-    msg.type = v->msg_type;
-    msg.iotlb.type = VHOST_IOTLB_BATCH_BEGIN;
+    struct vhost_msg_v2 msg = {
+        .type = v->msg_type,
+        .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
+    };
 
     if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
         error_report("failed to write, fd=%d, errno=%d (%s)",
@@ -106,6 +133,16 @@ static void vhost_vdpa_listener_begin(MemoryListener *listener)
     }
 }
 
+static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v)
+{
+    if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
+        !v->iotlb_batch_begin_sent) {
+        vhost_vdpa_listener_begin_batch(v);
+    }
+
+    v->iotlb_batch_begin_sent = true;
+}
+
 static void vhost_vdpa_listener_commit(MemoryListener *listener)
 {
     struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
@@ -117,6 +154,10 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
         return;
     }
 
+    if (!v->iotlb_batch_begin_sent) {
+        return;
+    }
+
     msg.type = v->msg_type;
     msg.iotlb.type = VHOST_IOTLB_BATCH_END;
 
@@ -124,6 +165,8 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener)
         error_report("failed to write, fd=%d, errno=%d (%s)",
                      fd, errno, strerror(errno));
     }
+
+    v->iotlb_batch_begin_sent = false;
 }
 
 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
@@ -135,7 +178,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
     void *vaddr;
     int ret;
 
-    if (vhost_vdpa_listener_skipped_section(section)) {
+    if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
+                                            v->iova_range.last)) {
         return;
     }
 
@@ -146,10 +190,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    llend = int128_make64(section->offset_within_address_space);
-    llend = int128_add(llend, section->size);
-    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
-
+    llend = vhost_vdpa_section_end(section);
     if (int128_ge(int128_make64(iova), llend)) {
         return;
     }
@@ -167,26 +208,17 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
 
     llsize = int128_sub(llend, int128_make64(iova));
 
+    vhost_vdpa_iotlb_batch_begin_once(v);
     ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
                              vaddr, section->readonly);
     if (ret) {
         error_report("vhost vdpa map fail!");
-        if (memory_region_is_ram_device(section->mr)) {
-            /* Allow unexpected mappings not to be fatal for RAM devices */
-            error_report("map ram fail!");
-          return ;
-        }
         goto fail;
     }
 
     return;
 
 fail:
-    if (memory_region_is_ram_device(section->mr)) {
-        error_report("failed to vdpa_dma_map. pci p2p may not work");
-        return;
-
-    }
     /*
      * On the initfn path, store the first error in the container so we
      * can gracefully fail.  Runtime, there's not much we can do other
@@ -205,7 +237,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
     Int128 llend, llsize;
     int ret;
 
-    if (vhost_vdpa_listener_skipped_section(section)) {
+    if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first,
+                                            v->iova_range.last)) {
         return;
     }
 
@@ -216,9 +249,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    llend = int128_make64(section->offset_within_address_space);
-    llend = int128_add(llend, section->size);
-    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+    llend = vhost_vdpa_section_end(section);
 
     trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend));
 
@@ -228,6 +259,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
 
     llsize = int128_sub(llend, int128_make64(iova));
 
+    vhost_vdpa_iotlb_batch_begin_once(v);
     ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
     if (ret) {
         error_report("vhost_vdpa dma unmap error!");
@@ -241,7 +273,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
  * depends on the addnop().
  */
 static const MemoryListener vhost_vdpa_memory_listener = {
-    .begin = vhost_vdpa_listener_begin,
+    .name = "vhost-vdpa",
     .commit = vhost_vdpa_listener_commit,
     .region_add = vhost_vdpa_listener_region_add,
     .region_del = vhost_vdpa_listener_region_del,
@@ -252,10 +284,12 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
 {
     struct vhost_vdpa *v = dev->opaque;
     int fd = v->device_fd;
+    int ret;
 
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
 
-    return ioctl(fd, request, arg);
+    ret = ioctl(fd, request, arg);
+    return ret < 0 ? -errno : ret;
 }
 
 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
@@ -272,36 +306,155 @@ static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
     vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
 }
 
-static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
+static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
+{
+    int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE,
+                              &v->iova_range);
+    if (ret != 0) {
+        v->iova_range.first = 0;
+        v->iova_range.last = UINT64_MAX;
+    }
+
+    trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first,
+                                    v->iova_range.last);
+}
+
+static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+
+    return v->index != 0;
+}
+
+static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
 {
     struct vhost_vdpa *v;
-    uint64_t features;
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
     trace_vhost_vdpa_init(dev, opaque);
+    int ret;
+
+    /*
+     * Similar to VFIO, we end up pinning all guest memory and have to
+     * disable discarding of RAM.
+     */
+    ret = ram_block_discard_disable(true);
+    if (ret) {
+        error_report("Cannot set discarding of RAM broken");
+        return ret;
+    }
 
     v = opaque;
     v->dev = dev;
     dev->opaque =  opaque ;
-    vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
-    dev->backend_features = features;
     v->listener = vhost_vdpa_memory_listener;
     v->msg_type = VHOST_IOTLB_MSG_V2;
 
+    vhost_vdpa_get_iova_range(v);
+
+    if (vhost_vdpa_one_time_request(dev)) {
+        return 0;
+    }
+
     vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
                                VIRTIO_CONFIG_S_DRIVER);
 
     return 0;
 }
 
+static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
+                                            int queue_index)
+{
+    size_t page_size = qemu_real_host_page_size;
+    struct vhost_vdpa *v = dev->opaque;
+    VirtIODevice *vdev = dev->vdev;
+    VhostVDPAHostNotifier *n;
+
+    n = &v->notifier[queue_index];
+
+    if (n->addr) {
+        virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
+        object_unparent(OBJECT(&n->mr));
+        munmap(n->addr, page_size);
+        n->addr = NULL;
+    }
+}
+
+static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++) {
+        vhost_vdpa_host_notifier_uninit(dev, i);
+    }
+}
+
+static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
+{
+    size_t page_size = qemu_real_host_page_size;
+    struct vhost_vdpa *v = dev->opaque;
+    VirtIODevice *vdev = dev->vdev;
+    VhostVDPAHostNotifier *n;
+    int fd = v->device_fd;
+    void *addr;
+    char *name;
+
+    vhost_vdpa_host_notifier_uninit(dev, queue_index);
+
+    n = &v->notifier[queue_index];
+
+    addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
+                queue_index * page_size);
+    if (addr == MAP_FAILED) {
+        goto err;
+    }
+
+    name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
+                           v, queue_index);
+    memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
+                                      page_size, addr);
+    g_free(name);
+
+    if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
+        munmap(addr, page_size);
+        goto err;
+    }
+    n->addr = addr;
+
+    return 0;
+
+err:
+    return -1;
+}
+
+static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
+{
+    int i;
+
+    for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
+        if (vhost_vdpa_host_notifier_init(dev, i)) {
+            goto err;
+        }
+    }
+
+    return;
+
+err:
+    vhost_vdpa_host_notifiers_uninit(dev, i);
+    return;
+}
+
 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
 {
     struct vhost_vdpa *v;
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
     v = dev->opaque;
     trace_vhost_vdpa_cleanup(dev, v);
+    vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
     memory_listener_unregister(&v->listener);
 
     dev->opaque = NULL;
+    ram_block_discard_disable(false);
+
     return 0;
 }
 
@@ -314,6 +467,10 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
                                     struct vhost_memory *mem)
 {
+    if (vhost_vdpa_one_time_request(dev)) {
+        return 0;
+    }
+
     trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
     if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
         trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
@@ -337,6 +494,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
                                    uint64_t features)
 {
     int ret;
+
+    if (vhost_vdpa_one_time_request(dev)) {
+        return 0;
+    }
+
     trace_vhost_vdpa_set_features(dev, features);
     ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
     uint8_t status = 0;
@@ -357,13 +519,16 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
     int r;
 
     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
-        return 0;
+        return -EFAULT;
     }
 
     features &= f;
-    r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
-    if (r) {
-        return 0;
+
+    if (vhost_vdpa_one_time_request(dev)) {
+        r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+        if (r) {
+            return -EFAULT;
+        }
     }
 
     dev->backend_cap = features;
@@ -371,8 +536,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
     return 0;
 }
 
-int vhost_vdpa_get_device_id(struct vhost_dev *dev,
-                                   uint32_t *device_id)
+static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
+                                    uint32_t *device_id)
 {
     int ret;
     ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
@@ -394,8 +559,8 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
 {
     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
 
-    trace_vhost_vdpa_get_vq_index(dev, idx, idx - dev->vq_index);
-    return idx - dev->vq_index;
+    trace_vhost_vdpa_get_vq_index(dev, idx, idx);
+    return idx;
 }
 
 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
@@ -448,7 +613,7 @@ static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
 }
 
 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
-                                   uint32_t config_len)
+                                   uint32_t config_len, Error **errp)
 {
     struct vhost_vdpa_config *v_config;
     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
@@ -472,10 +637,21 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
 {
     struct vhost_vdpa *v = dev->opaque;
     trace_vhost_vdpa_dev_start(dev, started);
+
+    if (started) {
+        vhost_vdpa_host_notifiers_init(dev);
+        vhost_vdpa_set_vring_ready(dev);
+    } else {
+        vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
+    }
+
+    if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+        return 0;
+    }
+
     if (started) {
         uint8_t status = 0;
         memory_listener_register(&v->listener, &address_space_memory);
-        vhost_vdpa_set_vring_ready(dev);
         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
         vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
 
@@ -493,6 +669,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
                                      struct vhost_log *log)
 {
+    if (vhost_vdpa_one_time_request(dev)) {
+        return 0;
+    }
+
     trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
                                   log->log);
     return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
@@ -558,6 +738,10 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
 
 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
 {
+    if (vhost_vdpa_one_time_request(dev)) {
+        return 0;
+    }
+
     trace_vhost_vdpa_set_owner(dev);
     return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
 }
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 4ad6e234adf..3f3771274e7 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -18,6 +18,30 @@
 #include "qemu/iov.h"
 #include "monitor/monitor.h"
 
+const int feature_bits[] = {
+    VIRTIO_VSOCK_F_SEQPACKET,
+    VHOST_INVALID_FEATURE_BIT
+};
+
+uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features,
+                                         Error **errp)
+{
+    VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+
+    if (vvc->seqpacket != ON_OFF_AUTO_OFF) {
+        virtio_add_feature(&features, VIRTIO_VSOCK_F_SEQPACKET);
+    }
+
+    features = vhost_get_features(&vvc->vhost_dev, feature_bits, features);
+
+    if (vvc->seqpacket == ON_OFF_AUTO_ON &&
+        !virtio_has_feature(features, VIRTIO_VSOCK_F_SEQPACKET)) {
+        error_setg(errp, "vhost-vsock backend doesn't support seqpacket");
+    }
+
+    return features;
+}
+
 int vhost_vsock_common_start(VirtIODevice *vdev)
 {
     VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
@@ -231,11 +255,18 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev)
     virtio_cleanup(vdev);
 }
 
+static Property vhost_vsock_common_properties[] = {
+    DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket,
+                            ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vhost_vsock_common_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
 
+    device_class_set_props(dc, vhost_vsock_common_properties);
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     vdc->guest_notifier_mask = vhost_vsock_common_guest_notifier_mask;
     vdc->guest_notifier_pending = vhost_vsock_common_guest_notifier_pending;
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 8ddfb9abfe1..478c0c9a878 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -108,8 +108,7 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev,
                                          uint64_t requested_features,
                                          Error **errp)
 {
-    /* No feature bits used yet */
-    return requested_features;
+    return vhost_vsock_common_get_features(vdev, requested_features, errp);
 }
 
 static const VMStateDescription vmstate_virtio_vhost_vsock = {
@@ -170,9 +169,8 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
     vhost_vsock_common_realize(vdev, "vhost-vsock");
 
     ret = vhost_dev_init(&vvc->vhost_dev, (void *)(uintptr_t)vhostfd,
-                         VHOST_BACKEND_TYPE_KERNEL, 0);
+                         VHOST_BACKEND_TYPE_KERNEL, 0, errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed");
         goto err_virtio;
     }
 
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e2163a0d63e..437347ad01c 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -21,7 +21,6 @@
 #include "qemu/error-report.h"
 #include "qemu/memfd.h"
 #include "standard-headers/linux/vhost_types.h"
-#include "exec/address-spaces.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 #include "migration/blocker.h"
@@ -175,6 +174,35 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev)
     return log_size;
 }
 
+static int vhost_set_backend_type(struct vhost_dev *dev,
+                                  VhostBackendType backend_type)
+{
+    int r = 0;
+
+    switch (backend_type) {
+#ifdef CONFIG_VHOST_KERNEL
+    case VHOST_BACKEND_TYPE_KERNEL:
+        dev->vhost_ops = &kernel_ops;
+        break;
+#endif
+#ifdef CONFIG_VHOST_USER
+    case VHOST_BACKEND_TYPE_USER:
+        dev->vhost_ops = &user_ops;
+        break;
+#endif
+#ifdef CONFIG_VHOST_VDPA
+    case VHOST_BACKEND_TYPE_VDPA:
+        dev->vhost_ops = &vdpa_ops;
+        break;
+#endif
+    default:
+        error_report("Unknown vhost backend type");
+        r = -1;
+    }
+
+    return r;
+}
+
 static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
 {
     Error *err = NULL;
@@ -287,7 +315,7 @@ static int vhost_dev_has_iommu(struct vhost_dev *dev)
      * does not have IOMMU, there's no need to enable this feature
      * which may cause unnecessary IOTLB miss/update trnasactions.
      */
-    return vdev->dma_as != &address_space_memory &&
+    return virtio_bus_device_iommu_enabled(vdev) &&
            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
 }
 
@@ -1287,11 +1315,11 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
 }
 
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
-                   VhostBackendType backend_type, uint32_t busyloop_timeout)
+                   VhostBackendType backend_type, uint32_t busyloop_timeout,
+                   Error **errp)
 {
     uint64_t features;
     int i, r, n_initialized_vqs = 0;
-    Error *local_err = NULL;
 
     hdev->vdev = NULL;
     hdev->migration_blocker = NULL;
@@ -1299,26 +1327,27 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     r = vhost_set_backend_type(hdev, backend_type);
     assert(r >= 0);
 
-    r = hdev->vhost_ops->vhost_backend_init(hdev, opaque);
+    r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
     if (r < 0) {
         goto fail;
     }
 
     r = hdev->vhost_ops->vhost_set_owner(hdev);
     if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_set_owner failed");
+        error_setg_errno(errp, -r, "vhost_set_owner failed");
         goto fail;
     }
 
     r = hdev->vhost_ops->vhost_get_features(hdev, &features);
     if (r < 0) {
-        VHOST_OPS_DEBUG("vhost_get_features failed");
+        error_setg_errno(errp, -r, "vhost_get_features failed");
         goto fail;
     }
 
     for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
         r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
         if (r < 0) {
+            error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
             goto fail;
         }
     }
@@ -1328,6 +1357,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
             r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
                                                      busyloop_timeout);
             if (r < 0) {
+                error_setg_errno(errp, -r, "Failed to set busyloop timeout");
                 goto fail_busyloop;
             }
         }
@@ -1336,6 +1366,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     hdev->features = features;
 
     hdev->memory_listener = (MemoryListener) {
+        .name = "vhost",
         .begin = vhost_begin,
         .commit = vhost_commit,
         .region_add = vhost_region_addnop,
@@ -1351,6 +1382,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     };
 
     hdev->iommu_listener = (MemoryListener) {
+        .name = "vhost-iommu",
         .region_add = vhost_iommu_region_add,
         .region_del = vhost_iommu_region_del,
     };
@@ -1366,9 +1398,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     }
 
     if (hdev->migration_blocker != NULL) {
-        r = migrate_add_blocker(hdev->migration_blocker, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
+        r = migrate_add_blocker(hdev->migration_blocker, errp);
+        if (r < 0) {
             error_free(hdev->migration_blocker);
             goto fail_busyloop;
         }
@@ -1385,9 +1416,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
 
     if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
-        error_report("vhost backend memory slots limit is less"
-                " than current number of present memory slots");
-        r = -1;
+        error_setg(errp, "vhost backend memory slots limit is less"
+                   " than current number of present memory slots");
+        r = -EINVAL;
         goto fail_busyloop;
     }
 
@@ -1558,15 +1589,17 @@ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
 }
 
 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
-                         uint32_t config_len)
+                         uint32_t config_len, Error **errp)
 {
     assert(hdev->vhost_ops);
 
     if (hdev->vhost_ops->vhost_get_config) {
-        return hdev->vhost_ops->vhost_get_config(hdev, config, config_len);
+        return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
+                                                 errp);
     }
 
-    return -1;
+    error_setg(errp, "vhost_get_config not implemented");
+    return -ENOTSUP;
 }
 
 int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d120bf8f438..9a4f491b54d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -30,6 +30,7 @@
 #include "trace.h"
 #include "qemu/error-report.h"
 #include "migration/misc.h"
+#include "migration/migration.h"
 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
@@ -230,7 +231,7 @@ static void balloon_stats_poll_cb(void *opaque)
         return;
     }
 
-    virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset);
+    virtqueue_push(s->svq, s->stats_vq_elem, 0);
     virtio_notify(vdev, s->svq);
     g_free(s->stats_vq_elem);
     s->stats_vq_elem = NULL;
@@ -437,7 +438,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
             memory_region_unref(section.mr);
         }
 
-        virtqueue_push(vq, elem, offset);
+        virtqueue_push(vq, elem, 0);
         virtio_notify(vdev, vq);
         g_free(elem);
         virtio_balloon_pbp_free(&pbp);
@@ -509,6 +510,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtQueue *vq = dev->free_page_vq;
     bool ret = true;
+    int i;
 
     while (dev->block_iothread) {
         qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
@@ -533,26 +535,24 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
         if (dev->free_page_hint_status == FREE_PAGE_HINT_S_REQUESTED &&
             id == dev->free_page_hint_cmd_id) {
             dev->free_page_hint_status = FREE_PAGE_HINT_S_START;
-        } else {
+        } else if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
             /*
              * Stop the optimization only when it has started. This
              * avoids a stale stop sign for the previous command.
              */
-            if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
-                dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
-            }
+            dev->free_page_hint_status = FREE_PAGE_HINT_S_STOP;
         }
     }
 
-    if (elem->in_num) {
-        if (dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
-            qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
-                                      elem->in_sg[0].iov_len);
+    if (elem->in_num && dev->free_page_hint_status == FREE_PAGE_HINT_S_START) {
+        for (i = 0; i < elem->in_num; i++) {
+            qemu_guest_free_page_hint(elem->in_sg[i].iov_base,
+                                      elem->in_sg[i].iov_len);
         }
     }
 
 out:
-    virtqueue_push(vq, elem, 1);
+    virtqueue_push(vq, elem, 0);
     g_free(elem);
     return ret;
 }
@@ -591,16 +591,10 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-    /* For the stop and copy phase, we don't need to start the optimization */
-    if (!vdev->vm_running) {
-        return;
-    }
-
     qemu_mutex_lock(&s->free_page_lock);
 
     if (s->free_page_hint_cmd_id == UINT_MAX) {
-        s->free_page_hint_cmd_id =
-                       VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
+        s->free_page_hint_cmd_id = VIRTIO_BALLOON_FREE_PAGE_HINT_CMD_ID_MIN;
     } else {
         s->free_page_hint_cmd_id++;
     }
@@ -648,8 +642,7 @@ static void virtio_balloon_free_page_done(VirtIOBalloon *s)
 static int
 virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data)
 {
-    VirtIOBalloon *dev = container_of(n, VirtIOBalloon,
-                                      free_page_hint_notify);
+    VirtIOBalloon *dev = container_of(n, VirtIOBalloon, free_page_hint_notify);
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     PrecopyNotifyData *pnd = data;
 
@@ -662,10 +655,19 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data)
         return 0;
     }
 
+    /*
+     * Pages hinted via qemu_guest_free_page_hint() are cleared from the dirty
+     * bitmap and will not get migrated, especially also not when the postcopy
+     * destination starts using them and requests migration from the source; the
+     * faulting thread will stall until postcopy migration finishes and
+     * all threads are woken up. Let's not start free page hinting if postcopy
+     * is possible.
+     */
+    if (migrate_postcopy_ram()) {
+        return 0;
+    }
+
     switch (pnd->reason) {
-    case PRECOPY_NOTIFY_SETUP:
-        precopy_enable_free_page_optimization();
-        break;
     case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
         virtio_balloon_free_page_stop(dev);
         break;
@@ -685,6 +687,7 @@ virtio_balloon_free_page_hint_notify(NotifierWithReturn *n, void *data)
          */
         virtio_balloon_free_page_done(dev);
         break;
+    case PRECOPY_NOTIFY_SETUP:
     case PRECOPY_NOTIFY_COMPLETE:
         break;
     default:
@@ -852,7 +855,7 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_hint = {
 };
 
 static const VMStateDescription vmstate_virtio_balloon_page_poison = {
-    .name = "vitio-balloon-device/page-poison",
+    .name = "virtio-balloon-device/page-poison",
     .version_id = 1,
     .minimum_version_id = 1,
     .needed = virtio_balloon_page_poison_support,
@@ -908,8 +911,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
 
-    if (virtio_has_feature(s->host_features,
-                           VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+    if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
         s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
                                            virtio_balloon_handle_free_page_vq);
         precopy_add_notifier(&s->free_page_hint_notify);
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d6332d45c3b..d23db98c568 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -69,6 +69,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
         return;
     }
 
+    if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+        error_setg(errp, "iommu_platform=true is not supported by the device");
+        return;
+    }
+
     if (klass->device_plugged != NULL) {
         klass->device_plugged(qbus->parent, &local_err);
     }
@@ -320,6 +325,20 @@ static char *virtio_bus_get_fw_dev_path(DeviceState *dev)
     return NULL;
 }
 
+bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev)
+{
+    DeviceState *qdev = DEVICE(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(qdev));
+    VirtioBusState *bus = VIRTIO_BUS(qbus);
+    VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
+
+    if (!klass->iommu_enabled) {
+        return false;
+    }
+
+    return klass->iommu_enabled(qbus->parent);
+}
+
 static void virtio_bus_class_init(ObjectClass *klass, void *data)
 {
     BusClass *bus_class = BUS_CLASS(klass);
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 770c286be73..a160ae6b413 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -98,9 +98,7 @@ static void virtio_iommu_pci_instance_init(Object *obj)
 }
 
 static const VirtioPCIDeviceTypeInfo virtio_iommu_pci_info = {
-    .base_name             = TYPE_VIRTIO_IOMMU_PCI,
-    .generic_name          = "virtio-iommu-pci",
-    .non_transitional_name = "virtio-iommu-pci-non-transitional",
+    .generic_name          = TYPE_VIRTIO_IOMMU_PCI,
     .instance_size = sizeof(VirtIOIOMMUPCI),
     .instance_init = virtio_iommu_pci_instance_init,
     .class_init    = virtio_iommu_pci_class_init,
diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c
index fa5395cd885..be2383b0c52 100644
--- a/hw/virtio/virtio-mem-pci.c
+++ b/hw/virtio/virtio-mem-pci.c
@@ -87,14 +87,12 @@ static void virtio_mem_pci_size_change_notify(Notifier *notifier, void *data)
     VirtIOMEMPCI *pci_mem = container_of(notifier, VirtIOMEMPCI,
                                          size_change_notifier);
     DeviceState *dev = DEVICE(pci_mem);
+    char *qom_path = object_get_canonical_path(OBJECT(dev));
     const uint64_t * const size_p = data;
-    const char *id = NULL;
 
-    if (dev->id) {
-        id = g_strdup(dev->id);
-    }
-
-    qapi_event_send_memory_device_size_change(!!id, id, *size_p);
+    qapi_event_send_memory_device_size_change(!!dev->id, dev->id, *size_p,
+                                              qom_path);
+    g_free(qom_path);
 }
 
 static void virtio_mem_pci_class_init(ObjectClass *klass, void *data)
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 655824ff81a..d5a578142b7 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -145,7 +145,205 @@ static bool virtio_mem_is_busy(void)
     return migration_in_incoming_postcopy() || !migration_is_idle();
 }
 
-static bool virtio_mem_test_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
+typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
+                                   uint64_t offset, uint64_t size);
+
+static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
+                                               virtio_mem_range_cb cb)
+{
+    unsigned long first_zero_bit, last_zero_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
+    while (first_zero_bit < vmem->bitmap_size) {
+        offset = first_zero_bit * vmem->block_size;
+        last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                      first_zero_bit + 1) - 1;
+        size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+
+        ret = cb(vmem, arg, offset, size);
+        if (ret) {
+            break;
+        }
+        first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                            last_zero_bit + 2);
+    }
+    return ret;
+}
+
+/*
+ * Adjust the memory section to cover the intersection with the given range.
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
+                                                uint64_t offset, uint64_t size)
+{
+    uint64_t start = MAX(s->offset_within_region, offset);
+    uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
+                       offset + size);
+
+    if (end <= start) {
+        return false;
+    }
+
+    s->offset_within_address_space += start - s->offset_within_region;
+    s->offset_within_region = start;
+    s->size = int128_make64(end - start);
+    return true;
+}
+
+typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
+
+static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
+                                               MemoryRegionSection *s,
+                                               void *arg,
+                                               virtio_mem_section_cb cb)
+{
+    unsigned long first_bit, last_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_bit = s->offset_within_region / vmem->bitmap_size;
+    first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+    while (first_bit < vmem->bitmap_size) {
+        MemoryRegionSection tmp = *s;
+
+        offset = first_bit * vmem->block_size;
+        last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                      first_bit + 1) - 1;
+        size = (last_bit - first_bit + 1) * vmem->block_size;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            break;
+        }
+        ret = cb(&tmp, arg);
+        if (ret) {
+            break;
+        }
+        first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                  last_bit + 2);
+    }
+    return ret;
+}
+
+static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
+                                                 MemoryRegionSection *s,
+                                                 void *arg,
+                                                 virtio_mem_section_cb cb)
+{
+    unsigned long first_bit, last_bit;
+    uint64_t offset, size;
+    int ret = 0;
+
+    first_bit = s->offset_within_region / vmem->bitmap_size;
+    first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
+    while (first_bit < vmem->bitmap_size) {
+        MemoryRegionSection tmp = *s;
+
+        offset = first_bit * vmem->block_size;
+        last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
+                                 first_bit + 1) - 1;
+        size = (last_bit - first_bit + 1) * vmem->block_size;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            break;
+        }
+        ret = cb(&tmp, arg);
+        if (ret) {
+            break;
+        }
+        first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
+                                       last_bit + 2);
+    }
+    return ret;
+}
+
+static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
+{
+    RamDiscardListener *rdl = arg;
+
+    return rdl->notify_populate(rdl, s);
+}
+
+static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
+{
+    RamDiscardListener *rdl = arg;
+
+    rdl->notify_discard(rdl, s);
+    return 0;
+}
+
+static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
+                                     uint64_t size)
+{
+    RamDiscardListener *rdl;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        MemoryRegionSection tmp = *rdl->section;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            continue;
+        }
+        rdl->notify_discard(rdl, &tmp);
+    }
+}
+
+static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
+                                  uint64_t size)
+{
+    RamDiscardListener *rdl, *rdl2;
+    int ret = 0;
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        MemoryRegionSection tmp = *rdl->section;
+
+        if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+            continue;
+        }
+        ret = rdl->notify_populate(rdl, &tmp);
+        if (ret) {
+            break;
+        }
+    }
+
+    if (ret) {
+        /* Notify all already-notified listeners. */
+        QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
+            MemoryRegionSection tmp = *rdl->section;
+
+            if (rdl2 == rdl) {
+                break;
+            }
+            if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
+                continue;
+            }
+            rdl2->notify_discard(rdl2, &tmp);
+        }
+    }
+    return ret;
+}
+
+static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
+{
+    RamDiscardListener *rdl;
+
+    if (!vmem->size) {
+        return;
+    }
+
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        if (rdl->double_discard_supported) {
+            rdl->notify_discard(rdl, rdl->section);
+        } else {
+            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                virtio_mem_notify_discard_cb);
+        }
+    }
+}
+
+static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
                                    uint64_t size, bool plugged)
 {
     const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
@@ -198,7 +396,8 @@ static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
     virtio_mem_send_response(vmem, elem, &resp);
 }
 
-static bool virtio_mem_valid_range(VirtIOMEM *vmem, uint64_t gpa, uint64_t size)
+static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
+                                   uint64_t size)
 {
     if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
         return false;
@@ -219,19 +418,21 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
                                       uint64_t size, bool plug)
 {
     const uint64_t offset = start_gpa - vmem->addr;
-    int ret;
+    RAMBlock *rb = vmem->memdev->mr.ram_block;
 
     if (virtio_mem_is_busy()) {
         return -EBUSY;
     }
 
     if (!plug) {
-        ret = ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
-        if (ret) {
-            error_report("Unexpected error discarding RAM: %s",
-                         strerror(-ret));
+        if (ram_block_discard_range(rb, offset, size)) {
             return -EBUSY;
         }
+        virtio_mem_notify_unplug(vmem, offset, size);
+    } else if (virtio_mem_notify_plug(vmem, offset, size)) {
+        /* Could be a mapping attempt resulted in memory getting populated. */
+        ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
+        return -EBUSY;
     }
     virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
     return 0;
@@ -318,17 +519,16 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
 {
     RAMBlock *rb = vmem->memdev->mr.ram_block;
-    int ret;
 
     if (virtio_mem_is_busy()) {
         return -EBUSY;
     }
 
-    ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
-    if (ret) {
-        error_report("Unexpected error discarding RAM: %s", strerror(-ret));
+    if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
         return -EBUSY;
     }
+    virtio_mem_notify_unplug_all(vmem);
+
     bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
     if (vmem->size) {
         vmem->size = 0;
@@ -551,7 +751,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (ram_block_discard_require(true)) {
+    if (ram_block_coordinated_discard_require(true)) {
         error_setg(errp, "Discarding RAM is disabled");
         return;
     }
@@ -559,7 +759,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
     ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
     if (ret) {
         error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
-        ram_block_discard_require(false);
+        ram_block_coordinated_discard_require(false);
         return;
     }
 
@@ -576,7 +776,13 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
     host_memory_backend_set_mapped(vmem->memdev, true);
     vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
     qemu_register_reset(virtio_mem_system_reset, vmem);
-    precopy_add_notifier(&vmem->precopy_notifier);
+
+    /*
+     * Set ourselves as RamDiscardManager before the plug handler maps the
+     * memory region and exposes it via an address space.
+     */
+    memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+                                          RAM_DISCARD_MANAGER(vmem));
 }
 
 static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -584,50 +790,58 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOMEM *vmem = VIRTIO_MEM(dev);
 
-    precopy_remove_notifier(&vmem->precopy_notifier);
+    /*
+     * The unplug handler unmapped the memory region, it cannot be
+     * found via an address space anymore. Unset ourselves.
+     */
+    memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
     qemu_unregister_reset(virtio_mem_system_reset, vmem);
     vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
     host_memory_backend_set_mapped(vmem->memdev, false);
     virtio_del_queue(vdev, 0);
     virtio_cleanup(vdev);
     g_free(vmem->bitmap);
-    ram_block_discard_require(false);
+    ram_block_coordinated_discard_require(false);
 }
 
-static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
+                                       uint64_t offset, uint64_t size)
 {
     RAMBlock *rb = vmem->memdev->mr.ram_block;
-    unsigned long first_zero_bit, last_zero_bit;
-    uint64_t offset, length;
-    int ret;
 
-    /* Find consecutive unplugged blocks and discard the consecutive range. */
-    first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
-    while (first_zero_bit < vmem->bitmap_size) {
-        offset = first_zero_bit * vmem->block_size;
-        last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
-                                      first_zero_bit + 1) - 1;
-        length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
+    return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
+}
 
-        ret = ram_block_discard_range(rb, offset, length);
-        if (ret) {
-            error_report("Unexpected error discarding RAM: %s",
-                         strerror(-ret));
-            return -EINVAL;
-        }
-        first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
-                                            last_zero_bit + 2);
-    }
-    return 0;
+static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
+{
+    /* Make sure all memory is really discarded after migration. */
+    return virtio_mem_for_each_unplugged_range(vmem, NULL,
+                                               virtio_mem_discard_range_cb);
 }
 
 static int virtio_mem_post_load(void *opaque, int version_id)
 {
+    VirtIOMEM *vmem = VIRTIO_MEM(opaque);
+    RamDiscardListener *rdl;
+    int ret;
+
+    /*
+     * We started out with all memory discarded and our memory region is mapped
+     * into an address space. Replay, now that we updated the bitmap.
+     */
+    QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                 virtio_mem_notify_populate_cb);
+        if (ret) {
+            return ret;
+        }
+    }
+
     if (migration_in_incoming_postcopy()) {
         return 0;
     }
 
-    return virtio_mem_restore_unplugged(VIRTIO_MEM(opaque));
+    return virtio_mem_restore_unplugged(vmem);
 }
 
 typedef struct VirtIOMEMMigSanityChecks {
@@ -702,6 +916,7 @@ static const VMStateDescription vmstate_virtio_mem_device = {
     .name = "virtio-mem-device",
     .minimum_version_id = 1,
     .version_id = 1,
+    .priority = MIG_PRI_VIRTIO_MEM,
     .post_load = virtio_mem_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
@@ -872,55 +1087,12 @@ static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
     vmem->block_size = value;
 }
 
-static void virtio_mem_precopy_exclude_unplugged(VirtIOMEM *vmem)
-{
-    void * const host = qemu_ram_get_host_addr(vmem->memdev->mr.ram_block);
-    unsigned long first_zero_bit, last_zero_bit;
-    uint64_t offset, length;
-
-    /*
-     * Find consecutive unplugged blocks and exclude them from migration.
-     *
-     * Note: Blocks cannot get (un)plugged during precopy, no locking needed.
-     */
-    first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
-    while (first_zero_bit < vmem->bitmap_size) {
-        offset = first_zero_bit * vmem->block_size;
-        last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
-                                      first_zero_bit + 1) - 1;
-        length = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
-
-        qemu_guest_free_page_hint(host + offset, length);
-        first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
-                                            last_zero_bit + 2);
-    }
-}
-
-static int virtio_mem_precopy_notify(NotifierWithReturn *n, void *data)
-{
-    VirtIOMEM *vmem = container_of(n, VirtIOMEM, precopy_notifier);
-    PrecopyNotifyData *pnd = data;
-
-    switch (pnd->reason) {
-    case PRECOPY_NOTIFY_SETUP:
-        precopy_enable_free_page_optimization();
-        break;
-    case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
-        virtio_mem_precopy_exclude_unplugged(vmem);
-        break;
-    default:
-        break;
-    }
-
-    return 0;
-}
-
 static void virtio_mem_instance_init(Object *obj)
 {
     VirtIOMEM *vmem = VIRTIO_MEM(obj);
 
     notifier_list_init(&vmem->size_change_notifiers);
-    vmem->precopy_notifier.notify = virtio_mem_precopy_notify;
+    QLIST_INIT(&vmem->rdl_list);
 
     object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
                         NULL, NULL, NULL);
@@ -940,11 +1112,132 @@ static Property virtio_mem_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
+                                                   const MemoryRegion *mr)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(mr == &vmem->memdev->mr);
+    return vmem->block_size;
+}
+
+static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
+                                        const MemoryRegionSection *s)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    uint64_t start_gpa = vmem->addr + s->offset_within_region;
+    uint64_t end_gpa = start_gpa + int128_get64(s->size);
+
+    g_assert(s->mr == &vmem->memdev->mr);
+
+    start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
+    end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
+
+    if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
+        return false;
+    }
+
+    return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
+}
+
+struct VirtIOMEMReplayData {
+    void *fn;
+    void *opaque;
+};
+
+static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
+{
+    struct VirtIOMEMReplayData *data = arg;
+
+    return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+}
+
+static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
+                                           MemoryRegionSection *s,
+                                           ReplayRamPopulate replay_fn,
+                                           void *opaque)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    struct VirtIOMEMReplayData data = {
+        .fn = replay_fn,
+        .opaque = opaque,
+    };
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    return virtio_mem_for_each_plugged_section(vmem, s, &data,
+                                            virtio_mem_rdm_replay_populated_cb);
+}
+
+static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
+                                              void *arg)
+{
+    struct VirtIOMEMReplayData *data = arg;
+
+    ((ReplayRamDiscard)data->fn)(s, data->opaque);
+    return 0;
+}
+
+static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+                                            MemoryRegionSection *s,
+                                            ReplayRamDiscard replay_fn,
+                                            void *opaque)
+{
+    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    struct VirtIOMEMReplayData data = {
+        .fn = replay_fn,
+        .opaque = opaque,
+    };
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    virtio_mem_for_each_unplugged_section(vmem, s, &data,
+                                          virtio_mem_rdm_replay_discarded_cb);
+}
+
+static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl,
+                                             MemoryRegionSection *s)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    int ret;
+
+    g_assert(s->mr == &vmem->memdev->mr);
+    rdl->section = memory_region_section_new_copy(s);
+
+    QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
+    ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                              virtio_mem_notify_populate_cb);
+    if (ret) {
+        error_report("%s: Replaying plugged ranges failed: %s", __func__,
+                     strerror(-ret));
+    }
+}
+
+static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
+                                               RamDiscardListener *rdl)
+{
+    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+
+    g_assert(rdl->section->mr == &vmem->memdev->mr);
+    if (vmem->size) {
+        if (rdl->double_discard_supported) {
+            rdl->notify_discard(rdl, rdl->section);
+        } else {
+            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+                                                virtio_mem_notify_discard_cb);
+        }
+    }
+
+    memory_region_section_free_copy(rdl->section);
+    rdl->section = NULL;
+    QLIST_REMOVE(rdl, next);
+}
+
 static void virtio_mem_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
 
     device_class_set_props(dc, virtio_mem_properties);
     dc->vmsd = &vmstate_virtio_mem;
@@ -960,6 +1253,13 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
     vmc->get_memory_region = virtio_mem_get_memory_region;
     vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
+
+    rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+    rdmc->is_populated = virtio_mem_rdm_is_populated;
+    rdmc->replay_populated = virtio_mem_rdm_replay_populated;
+    rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
+    rdmc->register_listener = virtio_mem_rdm_register_listener;
+    rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
 }
 
 static const TypeInfo virtio_mem_info = {
@@ -969,6 +1269,10 @@ static const TypeInfo virtio_mem_info = {
     .instance_init = virtio_mem_instance_init,
     .class_init = virtio_mem_class_init,
     .class_size = sizeof(VirtIOMEMClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_RAM_DISCARD_MANAGER },
+        { }
+    },
 };
 
 static void virtio_register_types(void)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 342c918ea7b..72da12fea59 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -29,6 +29,7 @@
 #include "qemu/host-utils.h"
 #include "qemu/module.h"
 #include "sysemu/kvm.h"
+#include "sysemu/replay.h"
 #include "hw/virtio/virtio-mmio.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
@@ -36,7 +37,9 @@
 
 static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
 {
-    return kvm_eventfds_enabled();
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+
+    return (proxy->flags & VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD) != 0;
 }
 
 static int virtio_mmio_ioeventfd_assign(DeviceState *d,
@@ -720,6 +723,8 @@ static Property virtio_mmio_properties[] = {
     DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy,
                      format_transport_address, true),
     DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true),
+    DEFINE_PROP_BIT("ioeventfd", VirtIOMMIOProxy, flags,
+                    VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -728,9 +733,18 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp)
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
     SysBusDevice *sbd = SYS_BUS_DEVICE(d);
 
-    qbus_create_inplace(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS,
-                        d, NULL);
+    qbus_init(&proxy->bus, sizeof(proxy->bus), TYPE_VIRTIO_MMIO_BUS, d, NULL);
     sysbus_init_irq(sbd, &proxy->irq);
+
+    if (!kvm_eventfds_enabled()) {
+        proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
+    }
+
+    /* fd-based ioevents can't be synchronized in record/replay */
+    if (replay_mode != REPLAY_MODE_NONE) {
+        proxy->flags &= ~VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD;
+    }
+
     if (proxy->legacy) {
         memory_region_init_io(&proxy->iomem, OBJECT(d),
                               &virtio_legacy_mem_ops, proxy,
@@ -803,6 +817,17 @@ static char *virtio_mmio_bus_get_dev_path(DeviceState *dev)
     return path;
 }
 
+static void virtio_mmio_vmstate_change(DeviceState *d, bool running)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+
+    if (running) {
+        virtio_mmio_start_ioeventfd(proxy);
+    } else {
+        virtio_mmio_stop_ioeventfd(proxy);
+    }
+}
+
 static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
 {
     BusClass *bus_class = BUS_CLASS(klass);
@@ -818,6 +843,7 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
     k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
     k->pre_plugged = virtio_mmio_pre_plugged;
+    k->vmstate_change = virtio_mmio_vmstate_change;
     k->has_variable_vring_alignment = true;
     bus_class->max_dev = 1;
     bus_class->get_dev_path = virtio_mmio_bus_get_dev_path;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index b321604d9b3..750aa47ec14 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -37,6 +37,7 @@
 #include "qemu/range.h"
 #include "hw/virtio/virtio-bus.h"
 #include "qapi/visitor.h"
+#include "sysemu/replay.h"
 
 #define VIRTIO_PCI_REGION_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_present(dev))
 
@@ -423,6 +424,11 @@ static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
     uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
     uint64_t val = 0;
+
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
     if (addr < config) {
         return virtio_ioport_read(proxy, addr);
     }
@@ -454,6 +460,11 @@ static void virtio_pci_config_write(void *opaque, hwaddr addr,
     VirtIOPCIProxy *proxy = opaque;
     uint32_t config = VIRTIO_PCI_CONFIG_SIZE(&proxy->pci_dev);
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (vdev == NULL) {
+        return;
+    }
+
     if (addr < config) {
         virtio_ioport_write(proxy, addr, val);
         return;
@@ -1110,6 +1121,19 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
     return pci_get_address_space(dev);
 }
 
+static bool virtio_pci_iommu_enabled(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    PCIDevice *dev = &proxy->pci_dev;
+    AddressSpace *dma_as = pci_device_iommu_address_space(dev);
+
+    if (dma_as == &address_space_memory) {
+        return false;
+    }
+
+    return true;
+}
+
 static bool virtio_pci_queue_enabled(DeviceState *d, int n)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
@@ -1146,6 +1170,10 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
     uint32_t val = 0;
     int i;
 
+    if (vdev == NULL) {
+        return UINT64_MAX;
+    }
+
     switch (addr) {
     case VIRTIO_PCI_COMMON_DFSELECT:
         val = proxy->dfselect;
@@ -1229,6 +1257,10 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
     VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 
+    if (vdev == NULL) {
+        return;
+    }
+
     switch (addr) {
     case VIRTIO_PCI_COMMON_DFSELECT:
         proxy->dfselect = val;
@@ -1330,6 +1362,11 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
 static uint64_t virtio_pci_notify_read(void *opaque, hwaddr addr,
                                        unsigned size)
 {
+    VirtIOPCIProxy *proxy = opaque;
+    if (virtio_bus_get_device(&proxy->bus) == NULL) {
+        return UINT64_MAX;
+    }
+
     return 0;
 }
 
@@ -1367,7 +1404,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
     uint64_t val;
 
     if (vdev == NULL) {
-        return 0;
+        return UINT64_MAX;
     }
 
     val = qatomic_xchg(&vdev->isr, 0);
@@ -1388,7 +1425,7 @@ static uint64_t virtio_pci_device_read(void *opaque, hwaddr addr,
     uint64_t val;
 
     if (vdev == NULL) {
-        return 0;
+        return UINT64_MAX;
     }
 
     switch (size) {
@@ -1760,6 +1797,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
         proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
     }
 
+    /* fd-based ioevents can't be synchronized in record/replay */
+    if (replay_mode != REPLAY_MODE_NONE) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
     /*
      * virtio pci bar layout used by default.
      * subclasses can re-arrange things if needed.
@@ -2145,8 +2187,7 @@ static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
     DeviceState *qdev = DEVICE(dev);
     char virtio_bus_name[] = "virtio-bus";
 
-    qbus_create_inplace(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev,
-                        virtio_bus_name);
+    qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name);
 }
 
 static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
@@ -2173,6 +2214,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
     k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
     k->get_dma_as = virtio_pci_get_dma_as;
+    k->iommu_enabled = virtio_pci_iommu_enabled;
     k->queue_enabled = virtio_pci_queue_enabled;
 }
 
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 07f4e60b309..ea7c079fb04 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -15,7 +15,6 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "trace.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
@@ -134,12 +133,10 @@ struct VirtQueue
     QLIST_ENTRY(VirtQueue) node;
 };
 
+/* Called within call_rcu().  */
 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 {
-    if (!caches) {
-        return;
-    }
-
+    assert(caches != NULL);
     address_space_cache_destroy(&caches->desc);
     address_space_cache_destroy(&caches->avail);
     address_space_cache_destroy(&caches->used);
@@ -250,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev,
     hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
     hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
 
-    address_space_read_cached(cache, off_flags, &e->flags,
-                              sizeof(e->flags));
+    e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
     /* Make sure flags is seen before off_wrap */
     smp_rmb();
-    address_space_read_cached(cache, off_off, &e->off_wrap,
-                              sizeof(e->off_wrap));
-    virtio_tswap16s(vdev, &e->off_wrap);
+    e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
     virtio_tswap16s(vdev, &e->flags);
 }
 
@@ -266,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev,
 {
     hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
 
-    virtio_tswap16s(vdev, &off_wrap);
-    address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap));
+    virtio_stw_phys_cached(vdev, cache, off, off_wrap);
     address_space_cache_invalidate(cache, off, sizeof(off_wrap));
 }
 
@@ -276,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev,
 {
     hwaddr off = offsetof(VRingPackedDescEvent, flags);
 
-    virtio_tswap16s(vdev, &flags);
-    address_space_write_cached(cache, off, &flags, sizeof(flags));
+    virtio_stw_phys_cached(vdev, cache, off, flags);
     address_space_cache_invalidate(cache, off, sizeof(flags));
 }
 
@@ -510,11 +502,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev,
                                          MemoryRegionCache *cache,
                                          int i)
 {
-    address_space_read_cached(cache,
-                              i * sizeof(VRingPackedDesc) +
-                              offsetof(VRingPackedDesc, flags),
-                              flags, sizeof(*flags));
-    virtio_tswap16s(vdev, flags);
+    hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
+
+    *flags = virtio_lduw_phys_cached(vdev, cache, off);
 }
 
 static void vring_packed_desc_read(VirtIODevice *vdev,
@@ -567,8 +557,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev,
 {
     hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
 
-    virtio_tswap16s(vdev, &desc->flags);
-    address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags));
+    virtio_stw_phys_cached(vdev, cache, off, desc->flags);
     address_space_cache_invalidate(cache, off, sizeof(desc->flags));
 }
 
@@ -635,6 +624,7 @@ static int virtio_queue_split_empty(VirtQueue *vq)
     return empty;
 }
 
+/* Called within rcu_read_lock().  */
 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
 {
     struct VRingPackedDesc desc;
@@ -986,28 +976,23 @@ static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
     return VIRTQUEUE_READ_DESC_MORE;
 }
 
+/* Called within rcu_read_lock().  */
 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
                             unsigned int *in_bytes, unsigned int *out_bytes,
-                            unsigned max_in_bytes, unsigned max_out_bytes)
+                            unsigned max_in_bytes, unsigned max_out_bytes,
+                            VRingMemoryRegionCaches *caches)
 {
     VirtIODevice *vdev = vq->vdev;
     unsigned int max, idx;
     unsigned int total_bufs, in_total, out_total;
-    VRingMemoryRegionCaches *caches;
     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
     int64_t len = 0;
     int rc;
 
-    RCU_READ_LOCK_GUARD();
-
     idx = vq->last_avail_idx;
     total_bufs = in_total = out_total = 0;
 
     max = vq->vring.num;
-    caches = vring_get_region_caches(vq);
-    if (!caches) {
-        goto err;
-    }
 
     while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
         MemoryRegionCache *desc_cache = &caches->desc;
@@ -1126,32 +1111,28 @@ static int virtqueue_packed_read_next_desc(VirtQueue *vq,
     return VIRTQUEUE_READ_DESC_MORE;
 }
 
+/* Called within rcu_read_lock().  */
 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
                                              unsigned int *in_bytes,
                                              unsigned int *out_bytes,
                                              unsigned max_in_bytes,
-                                             unsigned max_out_bytes)
+                                             unsigned max_out_bytes,
+                                             VRingMemoryRegionCaches *caches)
 {
     VirtIODevice *vdev = vq->vdev;
     unsigned int max, idx;
     unsigned int total_bufs, in_total, out_total;
     MemoryRegionCache *desc_cache;
-    VRingMemoryRegionCaches *caches;
     MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
     int64_t len = 0;
     VRingPackedDesc desc;
     bool wrap_counter;
 
-    RCU_READ_LOCK_GUARD();
     idx = vq->last_avail_idx;
     wrap_counter = vq->last_avail_wrap_counter;
     total_bufs = in_total = out_total = 0;
 
     max = vq->vring.num;
-    caches = vring_get_region_caches(vq);
-    if (!caches) {
-        goto err;
-    }
 
     for (;;) {
         unsigned int num_bufs = total_bufs;
@@ -1252,6 +1233,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
     uint16_t desc_size;
     VRingMemoryRegionCaches *caches;
 
+    RCU_READ_LOCK_GUARD();
+
     if (unlikely(!vq->vring.desc)) {
         goto err;
     }
@@ -1270,10 +1253,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 
     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
         virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
-                                         max_in_bytes, max_out_bytes);
+                                         max_in_bytes, max_out_bytes,
+                                         caches);
     } else {
         virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
-                                        max_in_bytes, max_out_bytes);
+                                        max_in_bytes, max_out_bytes,
+                                        caches);
     }
 
     return;
@@ -1705,6 +1690,8 @@ static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
     VirtIODevice *vdev = vq->vdev;
     VRingPackedDesc desc;
 
+    RCU_READ_LOCK_GUARD();
+
     caches = vring_get_region_caches(vq);
     if (!caches) {
         return 0;
@@ -1973,9 +1960,7 @@ static enum virtio_device_endian virtio_default_endian(void)
 
 static enum virtio_device_endian virtio_current_cpu_endian(void)
 {
-    CPUClass *cc = CPU_GET_CLASS(current_cpu);
-
-    if (cc->virtio_is_big_endian(current_cpu)) {
+    if (cpu_virtio_is_big_endian(current_cpu)) {
         return VIRTIO_DEVICE_ENDIAN_BIG;
     } else {
         return VIRTIO_DEVICE_ENDIAN_LITTLE;
@@ -2450,6 +2435,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
     }
 }
 
+/* Called within rcu_read_lock(). */
 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     uint16_t old, new;
@@ -2486,6 +2472,7 @@ static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
     return vring_need_event(off, new, old);
 }
 
+/* Called within rcu_read_lock(). */
 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     VRingPackedDescEvent e;
@@ -2982,7 +2969,7 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
     return ret;
 }
 
-size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
+size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
                                       uint64_t host_features)
 {
     size_t config_size = 0;
@@ -3672,6 +3659,7 @@ static void virtio_device_realize(DeviceState *dev, Error **errp)
     }
 
     vdev->listener.commit = virtio_memory_listener_commit;
+    vdev->listener.name = "virtio";
     memory_listener_register(&vdev->listener, vdev->dma_as);
 }
 
@@ -3731,6 +3719,10 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
     int i, n, r, err;
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
     memory_region_transaction_begin();
     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
         VirtQueue *vq = &vdev->vq[n];
@@ -3769,6 +3761,10 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
         r = virtio_bus_set_host_notifier(qbus, n, false);
         assert(r >= 0);
     }
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
     memory_region_transaction_commit();
 
     while (--i >= 0) {
@@ -3793,6 +3789,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
     VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
     int n, r;
 
+    /*
+     * Batch all the host notifiers in a single transaction to avoid
+     * quadratic time complexity in address_space_update_ioeventfds().
+     */
     memory_region_transaction_begin();
     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
         VirtQueue *vq = &vdev->vq[n];
@@ -3804,6 +3804,10 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
         r = virtio_bus_set_host_notifier(qbus, n, false);
         assert(r >= 0);
     }
+    /*
+     * The transaction expects the ioeventfds to be open when it
+     * commits. Do it now, before the cleanup loop.
+     */
     memory_region_transaction_commit();
 
     for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
diff --git a/hw/watchdog/sbsa_gwdt.c b/hw/watchdog/sbsa_gwdt.c
index d0998f8489c..e49cacd0e20 100644
--- a/hw/watchdog/sbsa_gwdt.c
+++ b/hw/watchdog/sbsa_gwdt.c
@@ -273,8 +273,9 @@ static void wdt_sbsa_gwdt_class_init(ObjectClass *klass, void *data)
     dc->realize = wdt_sbsa_gwdt_realize;
     dc->reset = wdt_sbsa_gwdt_reset;
     dc->hotpluggable = false;
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
     dc->vmsd = &vmstate_sbsa_gwdt;
+    dc->desc = "SBSA-compliant generic watchdog device";
 }
 
 static const TypeInfo wdt_sbsa_gwdt_info = {
diff --git a/hw/watchdog/trace-events b/hw/watchdog/trace-events
index 3124ca1f1b6..e7523e22aaf 100644
--- a/hw/watchdog/trace-events
+++ b/hw/watchdog/trace-events
@@ -1,7 +1,11 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # cmsdk-apb-watchdog.c
 cmsdk_apb_watchdog_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB watchdog read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 cmsdk_apb_watchdog_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB watchdog write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
 cmsdk_apb_watchdog_reset(void) "CMSDK APB watchdog: reset"
 cmsdk_apb_watchdog_lock(uint32_t lock) "CMSDK APB watchdog: lock %" PRIu32
+
+# wdt-aspeed.c
+aspeed_wdt_read(uint64_t addr, uint32_t size) "@0x%" PRIx64 " size=%d"
+aspeed_wdt_write(uint64_t addr, uint32_t size, uint64_t data) "@0x%" PRIx64 " size=%d value=0x%"PRIx64
diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c
index 0e98ffb73fb..1437e6c5b62 100644
--- a/hw/watchdog/watchdog.c
+++ b/hw/watchdog/watchdog.c
@@ -76,20 +76,6 @@ int select_watchdog(const char *p)
     return 1;
 }
 
-int select_watchdog_action(const char *p)
-{
-    int action;
-    char *qapi_value;
-
-    qapi_value = g_ascii_strdown(p, -1);
-    action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, NULL);
-    g_free(qapi_value);
-    if (action < 0)
-        return -1;
-    qmp_watchdog_set_action(action, &error_abort);
-    return 0;
-}
-
 WatchdogAction get_watchdog_action(void)
 {
     return watchdog_action;
diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
index 6352ba1b0e5..6aa6f90b664 100644
--- a/hw/watchdog/wdt_aspeed.c
+++ b/hw/watchdog/wdt_aspeed.c
@@ -19,6 +19,7 @@
 #include "hw/sysbus.h"
 #include "hw/watchdog/wdt_aspeed.h"
 #include "migration/vmstate.h"
+#include "trace.h"
 
 #define WDT_STATUS                      (0x00 / 4)
 #define WDT_RELOAD_VALUE                (0x04 / 4)
@@ -60,6 +61,8 @@ static uint64_t aspeed_wdt_read(void *opaque, hwaddr offset, unsigned size)
 {
     AspeedWDTState *s = ASPEED_WDT(opaque);
 
+    trace_aspeed_wdt_read(offset, size);
+
     offset >>= 2;
 
     switch (offset) {
@@ -118,13 +121,29 @@ static void aspeed_wdt_reload_1mhz(AspeedWDTState *s)
     }
 }
 
+static uint64_t aspeed_2400_sanitize_ctrl(uint64_t data)
+{
+    return data & 0xffff;
+}
+
+static uint64_t aspeed_2500_sanitize_ctrl(uint64_t data)
+{
+    return (data & ~(0xfUL << 8)) | WDT_CTRL_1MHZ_CLK;
+}
+
+static uint64_t aspeed_2600_sanitize_ctrl(uint64_t data)
+{
+    return data & ~(0x7UL << 7);
+}
 
 static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data,
                              unsigned size)
 {
     AspeedWDTState *s = ASPEED_WDT(opaque);
     AspeedWDTClass *awc = ASPEED_WDT_GET_CLASS(s);
-    bool enable = data & WDT_CTRL_ENABLE;
+    bool enable;
+
+    trace_aspeed_wdt_write(offset, size, data);
 
     offset >>= 2;
 
@@ -144,12 +163,16 @@ static void aspeed_wdt_write(void *opaque, hwaddr offset, uint64_t data,
         }
         break;
     case WDT_CTRL:
+        data = awc->sanitize_ctrl(data);
+        enable = data & WDT_CTRL_ENABLE;
         if (enable && !aspeed_wdt_is_enabled(s)) {
             s->regs[WDT_CTRL] = data;
             awc->wdt_reload(s);
         } else if (!enable && aspeed_wdt_is_enabled(s)) {
             s->regs[WDT_CTRL] = data;
             timer_del(s->timer);
+        } else {
+            s->regs[WDT_CTRL] = data;
         }
         break;
     case WDT_RESET_WIDTH:
@@ -207,11 +230,12 @@ static const MemoryRegionOps aspeed_wdt_ops = {
 static void aspeed_wdt_reset(DeviceState *dev)
 {
     AspeedWDTState *s = ASPEED_WDT(dev);
+    AspeedWDTClass *awc = ASPEED_WDT_GET_CLASS(s);
 
     s->regs[WDT_STATUS] = 0x3EF1480;
     s->regs[WDT_RELOAD_VALUE] = 0x03EF1480;
     s->regs[WDT_RESTART] = 0;
-    s->regs[WDT_CTRL] = 0;
+    s->regs[WDT_CTRL] = awc->sanitize_ctrl(0);
     s->regs[WDT_RESET_WIDTH] = 0xFF;
 
     timer_del(s->timer);
@@ -269,9 +293,10 @@ static void aspeed_wdt_class_init(ObjectClass *klass, void *data)
     dc->desc = "ASPEED Watchdog Controller";
     dc->realize = aspeed_wdt_realize;
     dc->reset = aspeed_wdt_reset;
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
     dc->vmsd = &vmstate_aspeed_wdt;
     device_class_set_props(dc, aspeed_wdt_properties);
+    dc->desc = "Aspeed watchdog device";
 }
 
 static const TypeInfo aspeed_wdt_info = {
@@ -293,6 +318,7 @@ static void aspeed_2400_wdt_class_init(ObjectClass *klass, void *data)
     awc->ext_pulse_width_mask = 0xff;
     awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
     awc->wdt_reload = aspeed_wdt_reload;
+    awc->sanitize_ctrl = aspeed_2400_sanitize_ctrl;
 }
 
 static const TypeInfo aspeed_2400_wdt_info = {
@@ -328,6 +354,7 @@ static void aspeed_2500_wdt_class_init(ObjectClass *klass, void *data)
     awc->reset_ctrl_reg = SCU_RESET_CONTROL1;
     awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
     awc->wdt_reload = aspeed_wdt_reload_1mhz;
+    awc->sanitize_ctrl = aspeed_2500_sanitize_ctrl;
 }
 
 static const TypeInfo aspeed_2500_wdt_info = {
@@ -348,6 +375,7 @@ static void aspeed_2600_wdt_class_init(ObjectClass *klass, void *data)
     awc->reset_ctrl_reg = AST2600_SCU_RESET_CONTROL1;
     awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
     awc->wdt_reload = aspeed_wdt_reload_1mhz;
+    awc->sanitize_ctrl = aspeed_2600_sanitize_ctrl;
 }
 
 static const TypeInfo aspeed_2600_wdt_info = {
diff --git a/hw/watchdog/wdt_diag288.c b/hw/watchdog/wdt_diag288.c
index e135a4de8b2..9e8882a11cf 100644
--- a/hw/watchdog/wdt_diag288.c
+++ b/hw/watchdog/wdt_diag288.c
@@ -122,9 +122,10 @@ static void wdt_diag288_class_init(ObjectClass *klass, void *data)
     dc->unrealize = wdt_diag288_unrealize;
     dc->reset = wdt_diag288_reset;
     dc->hotpluggable = false;
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
     dc->vmsd = &vmstate_diag288;
     diag288->handle_timer = wdt_diag288_handle_timer;
+    dc->desc = "diag288 device for s390x platform";
 }
 
 static const TypeInfo wdt_diag288_info = {
diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index 4c52e3bb9e1..f99a1c9d294 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -476,7 +476,8 @@ static void i6300esb_class_init(ObjectClass *klass, void *data)
     k->class_id = PCI_CLASS_SYSTEM_OTHER;
     dc->reset = i6300esb_reset;
     dc->vmsd = &vmstate_i6300esb;
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
+    dc->desc = "Intel 6300ESB";
 }
 
 static const TypeInfo i6300esb_info = {
diff --git a/hw/watchdog/wdt_ib700.c b/hw/watchdog/wdt_ib700.c
index 177aaa503f9..91d1bdc0da1 100644
--- a/hw/watchdog/wdt_ib700.c
+++ b/hw/watchdog/wdt_ib700.c
@@ -140,7 +140,8 @@ static void wdt_ib700_class_init(ObjectClass *klass, void *data)
     dc->realize = wdt_ib700_realize;
     dc->reset = wdt_ib700_reset;
     dc->vmsd = &vmstate_ib700;
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
+    dc->desc = "iBASE 700";
 }
 
 static const TypeInfo wdt_ib700_info = {
diff --git a/hw/watchdog/wdt_imx2.c b/hw/watchdog/wdt_imx2.c
index a5fb76308f5..c3128370b5b 100644
--- a/hw/watchdog/wdt_imx2.c
+++ b/hw/watchdog/wdt_imx2.c
@@ -280,8 +280,8 @@ static void imx2_wdt_class_init(ObjectClass *klass, void *data)
     dc->realize = imx2_wdt_realize;
     dc->reset = imx2_wdt_reset;
     dc->vmsd = &vmstate_imx2_wdt;
-    dc->desc = "i.MX watchdog timer";
-    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "i.MX2 watchdog timer";
+    set_bit(DEVICE_CATEGORY_WATCHDOG, dc->categories);
 }
 
 static const TypeInfo imx2_wdt_info = {
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index e6885bc751a..3da3fd83483 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # ../../include/hw/xen/xen_common.h
 xen_default_ioreq_server(void) ""
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index b459bb93968..5a1e12b374e 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -6,7 +6,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "hw/sysbus.h"
 #include "hw/xen/xen.h"
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-bus-helper.h"
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index 8c588920d9f..416583f130b 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -1398,7 +1398,7 @@ type_init(xen_register_types)
 void xen_bus_init(void)
 {
     DeviceState *dev = qdev_new(TYPE_XEN_BRIDGE);
-    BusState *bus = qbus_create(TYPE_XEN_BUS, dev, NULL);
+    BusState *bus = qbus_new(TYPE_XEN_BUS, dev, NULL);
 
     sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
     qbus_set_bus_hotplug_handler(bus);
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index b61a4855b7b..085fd31ef7a 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -27,7 +27,6 @@
 #include "hw/sysbus.h"
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
-#include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "qapi/error.h"
 #include "hw/xen/xen-legacy-backend.h"
@@ -277,7 +276,8 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom,
     xendev = g_malloc0(ops->size);
     object_initialize(&xendev->qdev, ops->size, TYPE_XENBACKEND);
     OBJECT(xendev)->free = g_free;
-    qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev));
+    qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev),
+                &error_fatal);
     qdev_realize(DEVICE(xendev), xen_sysbus, &error_fatal);
     object_unref(OBJECT(xendev));
 
@@ -703,7 +703,7 @@ int xen_be_init(void)
 
     xen_sysdev = qdev_new(TYPE_XENSYSDEV);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal);
-    xen_sysbus = qbus_create(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
+    xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
     qbus_set_bus_hotplug_handler(xen_sysbus);
 
     return 0;
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index a513fdd62d1..027190fa447 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -64,7 +64,6 @@
 #include "hw/xen/xen-legacy-backend.h"
 #include "xen_pt.h"
 #include "qemu/range.h"
-#include "exec/address-spaces.h"
 
 static bool has_igd_gfx_passthru;
 
@@ -616,8 +615,8 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
     }
 
     args.type = d->io_regions[bar].type;
-    pci_for_each_device(pci_get_bus(d), pci_dev_bus_num(d),
-                        xen_pt_check_bar_overlap, &args);
+    pci_for_each_device_under_bus(pci_get_bus(d),
+                                  xen_pt_check_bar_overlap, &args);
     if (args.rc) {
         XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS
                     ", len: 0x%"FMT_PCIBUS") is overlapped.\n",
@@ -690,12 +689,14 @@ static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec)
 }
 
 static const MemoryListener xen_pt_memory_listener = {
+    .name = "xen-pt-mem",
     .region_add = xen_pt_region_add,
     .region_del = xen_pt_region_del,
     .priority = 10,
 };
 
 static const MemoryListener xen_pt_io_listener = {
+    .name = "xen-pt-io",
     .region_add = xen_pt_io_region_add,
     .region_del = xen_pt_io_region_del,
     .priority = 10,
diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c
index cbac50db2de..2028fe793d9 100644
--- a/hw/xtensa/sim.c
+++ b/hw/xtensa/sim.c
@@ -27,14 +27,12 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "sysemu/reset.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "elf.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "xtensa_memory.h"
 #include "xtensa_sim.h"
diff --git a/hw/xtensa/virt.c b/hw/xtensa/virt.c
index e47e1de6767..a18e3fc910e 100644
--- a/hw/xtensa/virt.c
+++ b/hw/xtensa/virt.c
@@ -27,16 +27,13 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "sysemu/reset.h"
-#include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "hw/pci-host/gpex.h"
 #include "net/net.h"
 #include "elf.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 #include "xtensa_memory.h"
 #include "xtensa_sim.h"
diff --git a/hw/xtensa/xtensa_memory.c b/hw/xtensa/xtensa_memory.c
index 1c5f62b0146..2c1095f0170 100644
--- a/hw/xtensa/xtensa_memory.c
+++ b/hw/xtensa/xtensa_memory.c
@@ -27,7 +27,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "exec/memory.h"
 #include "qemu/error-report.h"
 #include "xtensa_memory.h"
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index 7be53f1895b..17f087b3951 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -35,7 +35,6 @@
 #include "hw/qdev-properties.h"
 #include "elf.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "hw/char/serial.h"
 #include "net/net.h"
 #include "hw/sysbus.h"
diff --git a/include/block/aio.h b/include/block/aio.h
index 5f342267d5c..47fbe9d81f2 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -232,6 +232,9 @@ struct AioContext {
     int64_t poll_grow;      /* polling time growth factor */
     int64_t poll_shrink;    /* polling time shrink factor */
 
+    /* AIO engine parameters */
+    int64_t aio_max_batch;  /* maximum number of requests in a batch */
+
     /*
      * List of handlers participating in userspace polling.  Protected by
      * ctx->list_lock.  Iterated and modified mostly by the event loop thread
@@ -291,20 +294,45 @@ void aio_context_acquire(AioContext *ctx);
 /* Relinquish ownership of the AioContext. */
 void aio_context_release(AioContext *ctx);
 
+/**
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
+ * run only once and as soon as possible.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                                  const char *name);
+
 /**
  * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
  * only once and as soon as possible.
+ *
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
+ * name string.
  */
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
+    aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
 
 /**
- * aio_bh_new: Allocate a new bottom half structure.
+ * aio_bh_new_full: Allocate a new bottom half structure.
  *
  * Bottom halves are lightweight callbacks whose invocation is guaranteed
  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
  * is opaque and must be allocated prior to its use.
+ *
+ * @name: A human-readable identifier for debugging purposes.
+ */
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                        const char *name);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure
+ *
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
+ * string.
  */
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+#define aio_bh_new(ctx, cb, opaque) \
+    aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
 
 /**
  * aio_notify: Force processing of pending events.
@@ -691,10 +719,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
  * Return the AioContext whose event loop runs in the current thread.
  *
  * If called from an IOThread this will be the IOThread's AioContext.  If
- * called from another thread it will be the main loop AioContext.
+ * called from the main thread or with the "big QEMU lock" taken it
+ * will be the main loop AioContext.
  */
 AioContext *qemu_get_current_aio_context(void);
 
+void qemu_set_current_aio_context(AioContext *ctx);
+
 /**
  * aio_context_setup:
  * @ctx: the aio context
@@ -727,4 +758,13 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
                                  int64_t grow, int64_t shrink,
                                  Error **errp);
 
+/**
+ * aio_context_set_aio_params:
+ * @ctx: the aio context
+ * @max_batch: maximum number of requests in a batch, 0 means that the
+ *             engine will use its default
+ */
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp);
+
 #endif
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 338f2ea7fdf..99370fa38be 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -18,15 +18,18 @@
 #include "block/block.h"
 #include "qemu/co-shared-resource.h"
 
+/* All APIs are thread-safe */
+
 typedef void (*BlockCopyAsyncCallbackFunc)(void *opaque);
 typedef struct BlockCopyState BlockCopyState;
 typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     int64_t cluster_size, bool use_copy_range,
-                                     BdrvRequestFlags write_flags,
                                      Error **errp);
 
+/* Function should be called prior any actual copy request */
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+                              bool compress);
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
 
 void block_copy_state_free(BlockCopyState *s);
@@ -87,6 +90,7 @@ void block_copy_kick(BlockCopyCallState *call_state);
 void block_copy_call_cancel(BlockCopyCallState *call_state);
 
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
+int64_t block_copy_cluster_size(BlockCopyState *s);
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
 
 #endif /* BLOCK_COPY_H */
diff --git a/include/block/block.h b/include/block/block.h
index b3f6e509d49..e5dd22b0343 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -9,6 +9,7 @@
 #include "block/dirty-bitmap.h"
 #include "block/blockjob.h"
 #include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
 
 /*
  * generated_co_wrapper
@@ -101,6 +102,7 @@ typedef struct HDGeometry {
     uint32_t cylinders;
 } HDGeometry;
 
+#define BDRV_O_NO_SHARE    0x0001 /* don't share permissions */
 #define BDRV_O_RDWR        0x0002
 #define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
 #define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
@@ -206,9 +208,8 @@ typedef struct BDRVReopenState {
     int flags;
     BlockdevDetectZeroesOptions detect_zeroes;
     bool backing_missing;
-    bool replace_backing_bs;  /* new_backing_bs is ignored if this is false */
-    BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
-    uint64_t perm, shared_perm;
+    BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
+    BlockDriverState *old_file_bs; /* keep pointer for permissions update */
     QDict *options;
     QDict *explicit_options;
     void *opaque;
@@ -360,8 +361,11 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                 Error **errp);
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
                       Error **errp);
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+                          Error **errp);
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
                                    int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
@@ -379,18 +383,21 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
                             QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+                                            const char *node_name,
+                                            QDict *options, int flags,
+                                            Error **errp);
 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
                                        int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs, QDict *options,
                                     bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+                Error **errp);
 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
                               Error **errp);
-int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
-                        BlockReopenQueue *queue, Error **errp);
-void bdrv_reopen_commit(BDRVReopenState *reopen_state);
-void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
                        int64_t bytes, BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
@@ -424,7 +431,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
 BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
                                BlockDriverState *in_bs, Error **errp);
 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
 int bdrv_commit(BlockDriverState *bs);
 int bdrv_make_empty(BdrvChild *c, Error **errp);
 int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
@@ -702,6 +709,9 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
                                     GSList **ignore, Error **errp);
 bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
                               GSList **ignore, Error **errp);
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
+
 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
 
@@ -745,9 +755,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
  * bdrv_drained_begin:
  *
  * Begin a quiesced section for exclusive access to the BDS, by disabling
- * external request sources including NBD server and device model. Note that
- * this doesn't block timers or coroutines from submitting more requests, which
- * means block_job_pause is still necessary.
+ * external request sources including NBD server, block jobs, and device model.
  *
  * This function can be recursive.
  */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 88e41119398..f4c75e8ba95 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -34,6 +34,7 @@
 #include "qemu/hbitmap.h"
 #include "block/snapshot.h"
 #include "qemu/throttle.h"
+#include "qemu/rcu.h"
 
 #define BLOCK_FLAG_LAZY_REFCOUNTS   8
 
@@ -93,6 +94,9 @@ typedef struct BdrvTrackedRequest {
     struct BdrvTrackedRequest *waiting_for;
 } BdrvTrackedRequest;
 
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+                            QEMUIOVector *qiov, size_t qiov_offset,
+                            Error **errp);
 int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
 
 struct BlockDriver {
@@ -231,11 +235,11 @@ struct BlockDriver {
 
     /* aio */
     BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
-        BlockCompletionFunc *cb, void *opaque);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags,
-        BlockCompletionFunc *cb, void *opaque);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
         BlockCompletionFunc *cb, void *opaque);
     BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
@@ -261,10 +265,11 @@ struct BlockDriver {
      * The buffer in @qiov may point directly to guest memory.
      */
     int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags);
+        int64_t offset, int64_t bytes,
+        QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
     /**
@@ -283,10 +288,11 @@ struct BlockDriver {
      * The buffer in @qiov may point directly to guest memory.
      */
     int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+        BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes,
-        QEMUIOVector *qiov, size_t qiov_offset, int flags);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
+        BdrvRequestFlags flags);
 
     /*
      * Efficiently zero a region of the disk image.  Typically an image format
@@ -295,9 +301,9 @@ struct BlockDriver {
      * will be called instead.
      */
     int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
-        int64_t offset, int bytes, BdrvRequestFlags flags);
+        int64_t offset, int64_t bytes, BdrvRequestFlags flags);
     int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
-        int64_t offset, int bytes);
+        int64_t offset, int64_t bytes);
 
     /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
      * and invoke bdrv_co_copy_range_from(child, ...), or invoke
@@ -308,10 +314,10 @@ struct BlockDriver {
      */
     int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
                                                 BdrvChild *src,
-                                                uint64_t offset,
+                                                int64_t offset,
                                                 BdrvChild *dst,
-                                                uint64_t dst_offset,
-                                                uint64_t bytes,
+                                                int64_t dst_offset,
+                                                int64_t bytes,
                                                 BdrvRequestFlags read_flags,
                                                 BdrvRequestFlags write_flags);
 
@@ -325,10 +331,10 @@ struct BlockDriver {
      */
     int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
                                               BdrvChild *src,
-                                              uint64_t src_offset,
+                                              int64_t src_offset,
                                               BdrvChild *dst,
-                                              uint64_t dst_offset,
-                                              uint64_t bytes,
+                                              int64_t dst_offset,
+                                              int64_t bytes,
                                               BdrvRequestFlags read_flags,
                                               BdrvRequestFlags write_flags);
 
@@ -347,6 +353,15 @@ struct BlockDriver {
      * clamped to bdrv_getlength() and aligned to request_alignment,
      * as well as non-NULL pnum, map, and file; in turn, the driver
      * must return an error or set pnum to an aligned non-zero value.
+     *
+     * Note that @bytes is just a hint on how big of a region the
+     * caller wants to inspect.  It is not a limit on *pnum.
+     * Implementations are free to return larger values of *pnum if
+     * doing so does not incur a performance penalty.
+     *
+     * block/io.c's bdrv_co_block_status() will utilize an unclamped
+     * *pnum value for the block-status cache on protocol nodes, prior
+     * to clamping *pnum for return to its caller.
      */
     int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
         bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
@@ -357,7 +372,7 @@ struct BlockDriver {
      * of in-flight requests, so don't waste the time if possible.
      *
      * One example usage is to avoid waiting for an nbd target node reconnect
-     * timeout during job-cancel.
+     * timeout during job-cancel with force=true.
      */
     void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
 
@@ -424,10 +439,9 @@ struct BlockDriver {
                                       Error **errp);
 
     int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov);
     int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
-        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov,
-        size_t qiov_offset);
+        int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
 
     int (*bdrv_snapshot_create)(BlockDriverState *bs,
                                 QEMUSnapshotInfo *sn_info);
@@ -660,11 +674,12 @@ typedef struct BlockLimits {
      * otherwise. */
     uint32_t request_alignment;
 
-    /* Maximum number of bytes that can be discarded at once (since it
-     * is signed, it must be < 2G, if set). Must be multiple of
-     * pdiscard_alignment, but need not be power of 2. May be 0 if no
-     * inherent 32-bit limit */
-    int32_t max_pdiscard;
+    /*
+     * Maximum number of bytes that can be discarded at once. Must be multiple
+     * of pdiscard_alignment, but need not be power of 2. May be 0 if no
+     * inherent 64-bit limit.
+     */
+    int64_t max_pdiscard;
 
     /* Optimal alignment for discard requests in bytes. A power of 2
      * is best but not mandatory.  Must be a multiple of
@@ -672,10 +687,11 @@ typedef struct BlockLimits {
      * that is set. May be 0 if bl.request_alignment is good enough */
     uint32_t pdiscard_alignment;
 
-    /* Maximum number of bytes that can zeroized at once (since it is
-     * signed, it must be < 2G, if set). Must be multiple of
-     * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
-    int32_t max_pwrite_zeroes;
+    /*
+     * Maximum number of bytes that can zeroized at once. Must be multiple of
+     * pwrite_zeroes_alignment. 0 means no limit.
+     */
+    int64_t max_pwrite_zeroes;
 
     /* Optimal alignment for write zeroes requests in bytes. A power
      * of 2 is best but not mandatory.  Must be a multiple of
@@ -695,6 +711,20 @@ typedef struct BlockLimits {
      * clamped down. */
     uint32_t max_transfer;
 
+    /* Maximal hardware transfer length in bytes.  Applies whenever
+     * transfers to the device bypass the kernel I/O scheduler, for
+     * example with SG_IO.  If larger than max_transfer or if zero,
+     * blk_get_max_hw_transfer will fall back to max_transfer.
+     */
+    uint64_t max_hw_transfer;
+
+    /* Maximal number of scatter/gather elements allowed by the hardware.
+     * Applies whenever transfers to the device bypass the kernel I/O
+     * scheduler, for example with SG_IO.  If larger than max_iov
+     * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+     */
+    int max_hw_iov;
+
     /* memory alignment, in bytes so that no bounce buffer is needed */
     size_t min_mem_alignment;
 
@@ -789,6 +819,8 @@ struct BdrvChildClass {
     bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
                             GSList **ignore, Error **errp);
     void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+    AioContext *(*get_parent_aio_context)(BdrvChild *child);
 };
 
 extern const BdrvChildClass child_of_bds;
@@ -811,11 +843,6 @@ struct BdrvChild {
      */
     uint64_t shared_perm;
 
-    /* backup of permissions during permission update procedure */
-    bool has_backup_perm;
-    uint64_t backup_perm;
-    uint64_t backup_shared_perm;
-
     /*
      * This link is frozen: the child can neither be replaced nor
      * detached from the parent.
@@ -836,17 +863,28 @@ struct BdrvChild {
 };
 
 /*
- * Note: the function bdrv_append() copies and swaps contents of
- * BlockDriverStates, so if you add new fields to this struct, please
- * inspect bdrv_append() to determine if the new fields need to be
- * copied as well.
+ * Allows bdrv_co_block_status() to cache one data region for a
+ * protocol node.
+ *
+ * @valid: Whether the cache is valid (should be accessed with atomic
+ *         functions so this can be reset by RCU readers)
+ * @data_start: Offset where we know (or strongly assume) is data
+ * @data_end: Offset where the data region ends (which is not necessarily
+ *            the start of a zeroed region)
  */
+typedef struct BdrvBlockStatusCache {
+    struct rcu_head rcu;
+
+    bool valid;
+    int64_t data_start;
+    int64_t data_end;
+} BdrvBlockStatusCache;
+
 struct BlockDriverState {
     /* Protected by big QEMU lock or read-only after opening.  No special
      * locking needed during I/O...
      */
     int open_flags; /* flags used to open the file, re-used for re-open */
-    bool read_only; /* if true, the media is read only */
     bool encrypted; /* if true, the media is encrypted */
     bool sg;        /* if true, the device is a /dev/sg* */
     bool probed;    /* if true, format was probed rather than specified */
@@ -957,12 +995,8 @@ struct BlockDriverState {
      */
     int64_t total_sectors;
 
-    /* Callback before write request is processed */
-    NotifierWithReturnList before_write_notifiers;
-
     /* threshold limit for writes, in bytes. "High water mark". */
     uint64_t write_threshold_offset;
-    NotifierWithReturn write_threshold_notifier;
 
     /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
      * Reading from the list can be done with either the BQL or the
@@ -1011,11 +1045,15 @@ struct BlockDriverState {
 
     /* BdrvChild links to this node may never be frozen */
     bool never_freeze;
+
+    /* Lock for block-status cache RCU writers */
+    CoMutex bsc_modify_lock;
+    /* Always non-NULL, but must only be dereferenced under an RCU read guard */
+    BdrvBlockStatusCache *block_status_cache;
 };
 
 struct BlockBackendRootState {
     int open_flags;
-    bool read_only;
     BlockdevDetectZeroesOptions detect_zeroes;
 };
 
@@ -1087,15 +1125,6 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
 bool bdrv_backing_overridden(BlockDriverState *bs);
 
 
-/**
- * bdrv_add_before_write_notifier:
- *
- * Register a callback that is invoked before write requests are processed but
- * after any throttling or waiting for overlapping requests.
- */
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier);
-
 /**
  * bdrv_add_aio_context_notifier:
  *
@@ -1306,7 +1335,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildClass *child_class,
                                   BdrvChildRole child_role,
-                                  AioContext *ctx,
                                   uint64_t perm, uint64_t shared_perm,
                                   void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
@@ -1447,4 +1475,30 @@ static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
  */
 void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
 
+/**
+ * Check whether the given offset is in the cached block-status data
+ * region.
+ *
+ * If it is, and @pnum is not NULL, *pnum is set to
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
+ * @offset, are data (according to the cache).
+ * Otherwise, *pnum is not touched.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
+
+/**
+ * If [offset, offset + bytes) overlaps with the currently cached
+ * block-status region, invalidate the cache.
+ *
+ * (To be used by I/O paths that cause data regions to be zero or
+ * holes.)
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes);
+
+/**
+ * Mark the range [offset, offset + bytes) as a data region.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
 #endif /* BLOCK_INT_H */
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 5f34d23bb03..78d101b7748 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -406,4 +406,22 @@ const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
 const char *nbd_err_lookup(int err);
 
+/* nbd/client-connection.c */
+typedef struct NBDClientConnection NBDClientConnection;
+
+void nbd_client_connection_enable_retry(NBDClientConnection *conn);
+
+NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
+                                               bool do_negotiation,
+                                               const char *export_name,
+                                               const char *x_dirty_bitmap,
+                                               QCryptoTLSCreds *tlscreds);
+void nbd_client_connection_release(NBDClientConnection *conn);
+
+QIOChannel *coroutine_fn
+nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
+                            bool blocking, Error **errp);
+
+void coroutine_fn nbd_co_establish_connection_cancel(NBDClientConnection *conn);
+
 #endif
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 4ac926fbc68..e3bd47bf76a 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -7,9 +7,9 @@ typedef struct QEMU_PACKED NvmeBar {
     uint32_t    intms;
     uint32_t    intmc;
     uint32_t    cc;
-    uint32_t    rsvd1;
+    uint8_t     rsvd24[4];
     uint32_t    csts;
-    uint32_t    nssrc;
+    uint32_t    nssr;
     uint32_t    aqa;
     uint64_t    asq;
     uint64_t    acq;
@@ -26,10 +26,38 @@ typedef struct QEMU_PACKED NvmeBar {
     uint32_t    pmrsts;
     uint32_t    pmrebs;
     uint32_t    pmrswtp;
-    uint64_t    pmrmsc;
+    uint32_t    pmrmscl;
+    uint32_t    pmrmscu;
     uint8_t     css[484];
 } NvmeBar;
 
+enum NvmeBarRegs {
+    NVME_REG_CAP     = offsetof(NvmeBar, cap),
+    NVME_REG_VS      = offsetof(NvmeBar, vs),
+    NVME_REG_INTMS   = offsetof(NvmeBar, intms),
+    NVME_REG_INTMC   = offsetof(NvmeBar, intmc),
+    NVME_REG_CC      = offsetof(NvmeBar, cc),
+    NVME_REG_CSTS    = offsetof(NvmeBar, csts),
+    NVME_REG_NSSR    = offsetof(NvmeBar, nssr),
+    NVME_REG_AQA     = offsetof(NvmeBar, aqa),
+    NVME_REG_ASQ     = offsetof(NvmeBar, asq),
+    NVME_REG_ACQ     = offsetof(NvmeBar, acq),
+    NVME_REG_CMBLOC  = offsetof(NvmeBar, cmbloc),
+    NVME_REG_CMBSZ   = offsetof(NvmeBar, cmbsz),
+    NVME_REG_BPINFO  = offsetof(NvmeBar, bpinfo),
+    NVME_REG_BPRSEL  = offsetof(NvmeBar, bprsel),
+    NVME_REG_BPMBL   = offsetof(NvmeBar, bpmbl),
+    NVME_REG_CMBMSC  = offsetof(NvmeBar, cmbmsc),
+    NVME_REG_CMBSTS  = offsetof(NvmeBar, cmbsts),
+    NVME_REG_PMRCAP  = offsetof(NvmeBar, pmrcap),
+    NVME_REG_PMRCTL  = offsetof(NvmeBar, pmrctl),
+    NVME_REG_PMRSTS  = offsetof(NvmeBar, pmrsts),
+    NVME_REG_PMREBS  = offsetof(NvmeBar, pmrebs),
+    NVME_REG_PMRSWTP = offsetof(NvmeBar, pmrswtp),
+    NVME_REG_PMRMSCL = offsetof(NvmeBar, pmrmscl),
+    NVME_REG_PMRMSCU = offsetof(NvmeBar, pmrmscu),
+};
+
 enum NvmeCapShift {
     CAP_MQES_SHIFT     = 0,
     CAP_CQR_SHIFT      = 16,
@@ -475,25 +503,25 @@ enum NvmePmrswtpMask {
 #define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val)   \
     (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
 
-enum NvmePmrmscShift {
-    PMRMSC_CMSE_SHIFT   = 1,
-    PMRMSC_CBA_SHIFT    = 12,
+enum NvmePmrmsclShift {
+    PMRMSCL_CMSE_SHIFT   = 1,
+    PMRMSCL_CBA_SHIFT    = 12,
 };
 
-enum NvmePmrmscMask {
-    PMRMSC_CMSE_MASK   = 0x1,
-    PMRMSC_CBA_MASK    = 0xfffffffffffff,
+enum NvmePmrmsclMask {
+    PMRMSCL_CMSE_MASK   = 0x1,
+    PMRMSCL_CBA_MASK    = 0xfffff,
 };
 
-#define NVME_PMRMSC_CMSE(pmrmsc)    \
-    ((pmrmsc >> PMRMSC_CMSE_SHIFT)   & PMRMSC_CMSE_MASK)
-#define NVME_PMRMSC_CBA(pmrmsc)     \
-    ((pmrmsc >> PMRMSC_CBA_SHIFT)   & PMRMSC_CBA_MASK)
+#define NVME_PMRMSCL_CMSE(pmrmscl)    \
+    ((pmrmscl >> PMRMSCL_CMSE_SHIFT)   & PMRMSCL_CMSE_MASK)
+#define NVME_PMRMSCL_CBA(pmrmscl)     \
+    ((pmrmscl >> PMRMSCL_CBA_SHIFT)   & PMRMSCL_CBA_MASK)
 
-#define NVME_PMRMSC_SET_CMSE(pmrmsc, val)   \
-    (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
-#define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
-    (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
+#define NVME_PMRMSCL_SET_CMSE(pmrmscl, val)   \
+    (pmrmscl |= (uint32_t)(val & PMRMSCL_CMSE_MASK) << PMRMSCL_CMSE_SHIFT)
+#define NVME_PMRMSCL_SET_CBA(pmrmscl, val)   \
+    (pmrmscl |= (uint32_t)(val & PMRMSCL_CBA_MASK) << PMRMSCL_CBA_SHIFT)
 
 enum NvmeSglDescriptorType {
     NVME_SGL_DESCR_TYPE_DATA_BLOCK          = 0x0,
@@ -708,6 +736,14 @@ enum {
 
 #define NVME_RW_PRINFO(control) ((control >> 10) & 0xf)
 
+enum {
+    NVME_PRINFO_PRACT       = 1 << 3,
+    NVME_PRINFO_PRCHK_GUARD = 1 << 2,
+    NVME_PRINFO_PRCHK_APP   = 1 << 1,
+    NVME_PRINFO_PRCHK_REF   = 1 << 0,
+    NVME_PRINFO_PRCHK_MASK  = 7 << 0,
+};
+
 typedef struct QEMU_PACKED NvmeDsmCmd {
     uint8_t     opcode;
     uint8_t     flags;
@@ -848,8 +884,8 @@ enum NvmeStatusCodes {
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
     NVME_NS_ALREADY_ATTACHED    = 0x0118,
     NVME_NS_PRIVATE             = 0x0119,
-    NVME_NS_NOT_ATTACHED        = 0x011A,
-    NVME_NS_CTRL_LIST_INVALID   = 0x011C,
+    NVME_NS_NOT_ATTACHED        = 0x011a,
+    NVME_NS_CTRL_LIST_INVALID   = 0x011c,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
     NVME_WRITE_TO_RO            = 0x0182,
@@ -980,6 +1016,7 @@ enum NvmeIdCns {
     NVME_ID_CNS_NS_PRESENT_LIST       = 0x10,
     NVME_ID_CNS_NS_PRESENT            = 0x11,
     NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
+    NVME_ID_CNS_CTRL_LIST             = 0x13,
     NVME_ID_CNS_CS_NS_PRESENT_LIST    = 0x1a,
     NVME_ID_CNS_CS_NS_PRESENT         = 0x1b,
     NVME_ID_CNS_IO_COMMAND_SET        = 0x1c,
@@ -1117,6 +1154,11 @@ enum NvmeIdCtrlCmic {
     NVME_CMIC_MULTI_CTRL    = 1 << 1,
 };
 
+enum NvmeNsAttachmentOperation {
+    NVME_NS_ATTACHMENT_ATTACH = 0x0,
+    NVME_NS_ATTACHMENT_DETACH = 0x1,
+};
+
 #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf)
 #define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf)
 #define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf)
@@ -1341,6 +1383,15 @@ enum NvmeIdNsDps {
     NVME_ID_NS_DPS_FIRST_EIGHT = 8,
 };
 
+enum NvmeIdNsFlbas {
+    NVME_ID_NS_FLBAS_EXTENDED = 1 << 4,
+};
+
+enum NvmeIdNsMc {
+    NVME_ID_NS_MC_EXTENDED = 1 << 0,
+    NVME_ID_NS_MC_SEPARATE = 1 << 1,
+};
+
 #define NVME_ID_NS_DPS_TYPE(dps) (dps & NVME_ID_NS_DPS_TYPE_MASK)
 
 typedef struct NvmeDifTuple {
@@ -1409,9 +1460,9 @@ typedef enum NvmeZoneState {
     NVME_ZONE_STATE_IMPLICITLY_OPEN  = 0x02,
     NVME_ZONE_STATE_EXPLICITLY_OPEN  = 0x03,
     NVME_ZONE_STATE_CLOSED           = 0x04,
-    NVME_ZONE_STATE_READ_ONLY        = 0x0D,
-    NVME_ZONE_STATE_FULL             = 0x0E,
-    NVME_ZONE_STATE_OFFLINE          = 0x0F,
+    NVME_ZONE_STATE_READ_ONLY        = 0x0d,
+    NVME_ZONE_STATE_FULL             = 0x0e,
+    NVME_ZONE_STATE_OFFLINE          = 0x0f,
 } NvmeZoneState;
 
 static inline void _nvme_check_size(void)
diff --git a/include/block/qdict.h b/include/block/qdict.h
index d8cb502d7db..ced2acfb92a 100644
--- a/include/block/qdict.h
+++ b/include/block/qdict.h
@@ -20,8 +20,6 @@ void qdict_join(QDict *dest, QDict *src, bool overwrite);
 void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
 void qdict_array_split(QDict *src, QList **dst);
 int qdict_array_entries(QDict *src, const char *subqdict);
-QObject *qdict_crumple(const QDict *src, Error **errp);
-void qdict_flatten(QDict *qdict);
 
 typedef struct QDictRenames {
     const char *from;
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
index 251b10d2733..21fc10c4c96 100644
--- a/include/block/raw-aio.h
+++ b/include/block/raw-aio.h
@@ -51,11 +51,13 @@ typedef struct LinuxAioState LinuxAioState;
 LinuxAioState *laio_init(Error **errp);
 void laio_cleanup(LinuxAioState *s);
 int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
-                                uint64_t offset, QEMUIOVector *qiov, int type);
+                                uint64_t offset, QEMUIOVector *qiov, int type,
+                                uint64_t dev_max_batch);
 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
 void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
+                    uint64_t dev_max_batch);
 #endif
 /* io_uring.c - Linux io_uring implementation */
 #ifdef CONFIG_LINUX_IO_URING
diff --git a/replication.h b/include/block/replication.h
similarity index 98%
rename from replication.h
rename to include/block/replication.h
index d49fc22cb9f..21931b4f0ca 100644
--- a/replication.h
+++ b/include/block/replication.h
@@ -23,7 +23,7 @@ typedef struct ReplicationOps ReplicationOps;
 typedef struct ReplicationState ReplicationState;
 
 /**
- * SECTION:replication.h
+ * SECTION:block/replication.h
  * @title:Base Replication System
  * @short_description: interfaces for handling replication
  *
@@ -32,7 +32,7 @@ typedef struct ReplicationState ReplicationState;
  * <example>
  *   <title>How to use replication interfaces</title>
  *   <programlisting>
- * #include "replication.h"
+ * #include "block/replication.h"
  *
  * typedef struct BDRVReplicationState {
  *     ReplicationState *rs;
diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h
index c646f267a45..f50f923e7e1 100644
--- a/include/block/write-threshold.h
+++ b/include/block/write-threshold.h
@@ -13,7 +13,7 @@
 #ifndef BLOCK_WRITE_THRESHOLD_H
 #define BLOCK_WRITE_THRESHOLD_H
 
-#include "block/block_int.h"
+#include "qemu/typedefs.h"
 
 /*
  * bdrv_write_threshold_set:
@@ -36,27 +36,12 @@ void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes);
 uint64_t bdrv_write_threshold_get(const BlockDriverState *bs);
 
 /*
- * bdrv_write_threshold_is_set
+ * bdrv_write_threshold_check_write
  *
- * Tell if a write threshold is set for a given BDS.
+ * Check whether the specified request exceeds the write threshold.
+ * If so, send a corresponding event and disable write threshold checking.
  */
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs);
-
-/*
- * bdrv_write_threshold_exceeded
- *
- * Return the extent of a write request that exceeded the threshold,
- * or zero if the request is below the threshold.
- * Return zero also if the threshold was not set.
- *
- * NOTE: here we assume the following holds for each request this code
- * deals with:
- *
- * assert((req->offset + req->bytes) <= UINT64_MAX)
- *
- * Please not there is *not* an actual C assert().
- */
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req);
+void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
+                                      int64_t bytes);
 
 #endif
diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
index a5538433649..867ef1b3b28 100644
--- a/include/chardev/char-fe.h
+++ b/include/chardev/char-fe.h
@@ -174,6 +174,9 @@ void qemu_chr_fe_set_open(CharBackend *be, int fe_open);
 void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
+
+typedef gboolean (*FEWatchFunc)(void *do_not_use, GIOCondition condition, void *data);
+
 /**
  * qemu_chr_fe_add_watch:
  * @cond: the condition to poll for
@@ -188,10 +191,13 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
  * Note that you are responsible to update the front-end sources if
  * you are switching the main context with qemu_chr_fe_set_handlers().
  *
+ * Warning: DO NOT use the first callback argument (it may be either
+ * a GIOChannel or a QIOChannel, depending on the underlying chardev)
+ *
  * Returns: the source tag
  */
 guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
-                            GIOFunc func, void *user_data);
+                            FEWatchFunc func, void *user_data);
 
 /**
  * qemu_chr_fe_write:
diff --git a/include/chardev/char.h b/include/chardev/char.h
index 7c0444f90db..a319b5fdff7 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -254,26 +254,58 @@ struct ChardevClass {
 
     bool internal; /* TODO: eventually use TYPE_USER_CREATABLE */
     bool supports_yank;
+
+    /* parse command line options and populate QAPI @backend */
     void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp);
 
+    /* called after construction, open/starts the backend */
     void (*open)(Chardev *chr, ChardevBackend *backend,
                  bool *be_opened, Error **errp);
 
+    /* write buf to the backend */
     int (*chr_write)(Chardev *s, const uint8_t *buf, int len);
+
+    /*
+     * Read from the backend (blocking). A typical front-end will instead rely
+     * on chr_can_read/chr_read being called when polling/looping.
+     */
     int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
+
+    /* create a watch on the backend */
     GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
+
+    /* update the backend internal sources */
     void (*chr_update_read_handler)(Chardev *s);
+
+    /* send an ioctl to the backend */
     int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
+
+    /* get ancillary-received fds during last read */
     int (*get_msgfds)(Chardev *s, int* fds, int num);
+
+    /* set ancillary fds to be sent with next write */
     int (*set_msgfds)(Chardev *s, int *fds, int num);
+
+    /* accept the given fd */
     int (*chr_add_client)(Chardev *chr, int fd);
+
+    /* wait for a connection */
     int (*chr_wait_connected)(Chardev *chr, Error **errp);
+
+    /* disconnect a connection */
     void (*chr_disconnect)(Chardev *chr);
+
+    /* called by frontend when it can read */
     void (*chr_accept_input)(Chardev *chr);
+
+    /* set terminal echo */
     void (*chr_set_echo)(Chardev *chr, bool echo);
+
+    /* notify the backend of frontend open state */
     void (*chr_set_fe_open)(Chardev *chr, int fe_open);
+
+    /* handle various events */
     void (*chr_be_event)(Chardev *s, QEMUChrEvent event);
-    void (*chr_options_parsed)(Chardev *chr);
 };
 
 Chardev *qemu_chardev_new(const char *id, const char *typename,
diff --git a/include/crypto/tls-cipher-suites.h b/include/crypto/tls-cipher-suites.h
index bb9ee53e03a..7eb1b76122d 100644
--- a/include/crypto/tls-cipher-suites.h
+++ b/include/crypto/tls-cipher-suites.h
@@ -19,12 +19,6 @@ typedef struct QCryptoTLSCipherSuites QCryptoTLSCipherSuites;
 DECLARE_INSTANCE_CHECKER(QCryptoTLSCipherSuites, QCRYPTO_TLS_CIPHER_SUITES,
                          TYPE_QCRYPTO_TLS_CIPHER_SUITES)
 
-struct QCryptoTLSCipherSuites {
-    /* <private> */
-    QCryptoTLSCreds parent_obj;
-    /* <public> */
-};
-
 /**
   * qcrypto_tls_cipher_suites_get_data:
   * @obj: pointer to a TLS cipher suites object
diff --git a/include/crypto/tlscreds.h b/include/crypto/tlscreds.h
index d0808e391e9..2a8a8570109 100644
--- a/include/crypto/tlscreds.h
+++ b/include/crypto/tlscreds.h
@@ -24,10 +24,6 @@
 #include "qapi/qapi-types-crypto.h"
 #include "qom/object.h"
 
-#ifdef CONFIG_GNUTLS
-#include <gnutls/gnutls.h>
-#endif
-
 #define TYPE_QCRYPTO_TLS_CREDS "tls-creds"
 typedef struct QCryptoTLSCreds QCryptoTLSCreds;
 typedef struct QCryptoTLSCredsClass QCryptoTLSCredsClass;
@@ -48,22 +44,24 @@ typedef bool (*CryptoTLSCredsReload)(QCryptoTLSCreds *, Error **);
  * certificate credentials.
  */
 
-struct QCryptoTLSCreds {
-    Object parent_obj;
-    char *dir;
-    QCryptoTLSCredsEndpoint endpoint;
-#ifdef CONFIG_GNUTLS
-    gnutls_dh_params_t dh_params;
-#endif
-    bool verifyPeer;
-    char *priority;
-};
-
-
 struct QCryptoTLSCredsClass {
     ObjectClass parent_class;
     CryptoTLSCredsReload reload;
 };
 
+/**
+ * qcrypto_tls_creds_check_endpoint:
+ * @creds: pointer to a TLS credentials object
+ * @endpoint: type of network endpoint that will be using the credentials
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Check whether the credentials is setup according to
+ * the type of @endpoint argument.
+ *
+ * Returns true if the credentials is setup for the endpoint, false otherwise
+ */
+bool qcrypto_tls_creds_check_endpoint(QCryptoTLSCreds *creds,
+                                      QCryptoTLSCredsEndpoint endpoint,
+                                      Error **errp);
 
 #endif /* QCRYPTO_TLSCREDS_H */
diff --git a/include/crypto/tlscredsanon.h b/include/crypto/tlscredsanon.h
index 3f464a38095..bd3023f9ea7 100644
--- a/include/crypto/tlscredsanon.h
+++ b/include/crypto/tlscredsanon.h
@@ -92,18 +92,6 @@ typedef struct QCryptoTLSCredsAnonClass QCryptoTLSCredsAnonClass;
  *
  */
 
-
-struct QCryptoTLSCredsAnon {
-    QCryptoTLSCreds parent_obj;
-#ifdef CONFIG_GNUTLS
-    union {
-        gnutls_anon_server_credentials_t server;
-        gnutls_anon_client_credentials_t client;
-    } data;
-#endif
-};
-
-
 struct QCryptoTLSCredsAnonClass {
     QCryptoTLSCredsClass parent_class;
 };
diff --git a/include/crypto/tlscredspsk.h b/include/crypto/tlscredspsk.h
index d7e6bdb5edf..bcd07dc4f62 100644
--- a/include/crypto/tlscredspsk.h
+++ b/include/crypto/tlscredspsk.h
@@ -87,18 +87,6 @@ typedef struct QCryptoTLSCredsPSKClass QCryptoTLSCredsPSKClass;
  * The PSK file can be created and managed using psktool.
  */
 
-struct QCryptoTLSCredsPSK {
-    QCryptoTLSCreds parent_obj;
-    char *username;
-#ifdef CONFIG_GNUTLS
-    union {
-        gnutls_psk_server_credentials_t server;
-        gnutls_psk_client_credentials_t client;
-    } data;
-#endif
-};
-
-
 struct QCryptoTLSCredsPSKClass {
     QCryptoTLSCredsClass parent_class;
 };
diff --git a/include/crypto/tlscredsx509.h b/include/crypto/tlscredsx509.h
index c6d89b78819..c4daba21a6b 100644
--- a/include/crypto/tlscredsx509.h
+++ b/include/crypto/tlscredsx509.h
@@ -96,16 +96,6 @@ typedef struct QCryptoTLSCredsX509Class QCryptoTLSCredsX509Class;
  *
  */
 
-struct QCryptoTLSCredsX509 {
-    QCryptoTLSCreds parent_obj;
-#ifdef CONFIG_GNUTLS
-    gnutls_certificate_credentials_t data;
-#endif
-    bool sanityCheck;
-    char *passwordid;
-};
-
-
 struct QCryptoTLSCredsX509Class {
     QCryptoTLSCredsClass parent_class;
 };
diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 691e4687d96..446fe662c5b 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h
@@ -9,6 +9,12 @@
 #ifndef DISAS_DIS_ASM_H
 #define DISAS_DIS_ASM_H
 
+#include "qemu/bswap.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef void *PTR;
 typedef uint64_t bfd_vma;
 typedef int64_t bfd_signed_vma;
@@ -243,8 +249,6 @@ enum bfd_architecture
 #define bfd_mach_nios2          0
 #define bfd_mach_nios2r1        1
 #define bfd_mach_nios2r2        2
-  bfd_arch_lm32,       /* Lattice Mico32 */
-#define bfd_mach_lm32 1
   bfd_arch_rx,       /* Renesas RX */
 #define bfd_mach_rx            0x75
 #define bfd_mach_rx_v2         0x76
@@ -438,7 +442,6 @@ int print_insn_m32r             (bfd_vma, disassemble_info*);
 int print_insn_m88k             (bfd_vma, disassemble_info*);
 int print_insn_mn10200          (bfd_vma, disassemble_info*);
 int print_insn_mn10300          (bfd_vma, disassemble_info*);
-int print_insn_moxie            (bfd_vma, disassemble_info*);
 int print_insn_ns32k            (bfd_vma, disassemble_info*);
 int print_insn_big_powerpc      (bfd_vma, disassemble_info*);
 int print_insn_little_powerpc   (bfd_vma, disassemble_info*);
@@ -453,9 +456,7 @@ int print_insn_crisv32          (bfd_vma, disassemble_info*);
 int print_insn_crisv10          (bfd_vma, disassemble_info*);
 int print_insn_microblaze       (bfd_vma, disassemble_info*);
 int print_insn_ia64             (bfd_vma, disassemble_info*);
-int print_insn_lm32             (bfd_vma, disassemble_info*);
-int print_insn_big_nios2        (bfd_vma, disassemble_info*);
-int print_insn_little_nios2     (bfd_vma, disassemble_info*);
+int print_insn_nios2(bfd_vma, disassemble_info*);
 int print_insn_xtensa           (bfd_vma, disassemble_info*);
 int print_insn_riscv32          (bfd_vma, disassemble_info*);
 int print_insn_riscv64          (bfd_vma, disassemble_info*);
@@ -482,8 +483,6 @@ bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size);
 
 /* from libbfd */
 
-#include "qemu/bswap.h"
-
 static inline bfd_vma bfd_getl64(const bfd_byte *addr)
 {
     return ldq_le_p(addr);
@@ -511,4 +510,8 @@ static inline bfd_vma bfd_getb16(const bfd_byte *addr)
 
 typedef bool bfd_boolean;
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* DISAS_DIS_ASM_H */
diff --git a/include/elf.h b/include/elf.h
index 78237c9a871..811bf4a1cb5 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -174,9 +174,8 @@ typedef struct mips_elf_abiflags_v0 {
 
 #define EM_OPENRISC     92        /* OpenCores OpenRISC */
 
-#define EM_UNICORE32    110     /* UniCore32 */
-
 #define EM_HEXAGON      164     /* Qualcomm Hexagon */
+
 #define EM_RX           173     /* Renesas RX family */
 
 #define EM_RISCV        243     /* RISC-V */
@@ -206,9 +205,6 @@ typedef struct mips_elf_abiflags_v0 {
 
 #define EM_AARCH64  183
 
-#define EM_MOXIE           223     /* Moxie processor family */
-#define EM_MOXIE_OLD       0xFEED
-
 #define EF_AVR_MACH     0x7F       /* Mask for AVR e_flags to get core type */
 
 /* This is the info that is needed to parse the dynamic section of the file */
@@ -609,6 +605,13 @@ typedef struct {
 #define HWCAP_S390_HIGH_GPRS    512
 #define HWCAP_S390_TE           1024
 #define HWCAP_S390_VXRS         2048
+#define HWCAP_S390_VXRS_BCD     4096
+#define HWCAP_S390_VXRS_EXT     8192
+#define HWCAP_S390_GS           16384
+#define HWCAP_S390_VXRS_EXT2    32768
+#define HWCAP_S390_VXRS_PDE     65536
+#define HWCAP_S390_SORT         131072
+#define HWCAP_S390_DFLT         262144
 
 /* M68K specific definitions. */
 /* We use the top 24 bits to encode information about the
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 2062f418473..65ea00e0654 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -442,10 +442,10 @@ static inline bool tlb_hit(target_ulong tlb_addr, target_ulong addr)
 
 #ifdef CONFIG_TCG
 /* accel/tcg/cpu-exec.c */
-void dump_drift_info(void);
+void dump_drift_info(GString *buf);
 /* accel/tcg/translate-all.c */
-void dump_exec_info(void);
-void dump_opcount_info(void);
+void dump_exec_info(GString *buf);
+void dump_opcount_info(GString *buf);
 #endif /* CONFIG_TCG */
 
 #endif /* !CONFIG_USER_ONLY */
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 5a0a2d93e06..039d422bf4c 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -57,7 +57,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb);
 void *qemu_ram_get_host_addr(RAMBlock *rb);
 ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
+ram_addr_t qemu_ram_get_max_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_noreserve(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
 bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 60d1fa5fffc..06b84804c86 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -28,10 +28,12 @@
  * load:  cpu_ld{sign}{size}{end}_{mmusuffix}(env, ptr)
  *        cpu_ld{sign}{size}{end}_{mmusuffix}_ra(env, ptr, retaddr)
  *        cpu_ld{sign}{size}{end}_mmuidx_ra(env, ptr, mmu_idx, retaddr)
+ *        cpu_ld{sign}{size}{end}_mmu(env, ptr, oi, retaddr)
  *
  * store: cpu_st{size}{end}_{mmusuffix}(env, ptr, val)
  *        cpu_st{size}{end}_{mmusuffix}_ra(env, ptr, val, retaddr)
  *        cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmu_idx, retaddr)
+ *        cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)
  *
  * sign is:
  * (empty): for 32 and 64 bit sizes
@@ -53,10 +55,16 @@
  * The "mmuidx" suffix carries an extra mmu_idx argument that specifies
  * the index to use; the "data" and "code" suffixes take the index from
  * cpu_mmu_index().
+ *
+ * The "mmu" suffix carries the full MemOpIdx, with both mmu_idx and the
+ * MemOp including alignment requirements.  The alignment will be enforced.
  */
 #ifndef CPU_LDST_H
 #define CPU_LDST_H
 
+#include "exec/memopidx.h"
+#include "qemu/int128.h"
+
 #if defined(CONFIG_USER_ONLY)
 /* sparc32plus has 64bit long but 32bit space address
  * this can make bad result with g2h() and h2g()
@@ -118,12 +126,10 @@ typedef target_ulong abi_ptr;
 
 uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr);
-
 uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr);
 uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr);
 uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr);
-
 uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr);
 int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr);
 uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr);
@@ -131,37 +137,31 @@ uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr);
 
 uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
-
 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
-
 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
 
 void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
-
 void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
-
 void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
 void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
 
 void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
                      uint32_t val, uintptr_t ra);
-
 void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint64_t val, uintptr_t ra);
-
 void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr,
                         uint32_t val, uintptr_t ra);
 void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
@@ -176,6 +176,157 @@ void cpu_st_cap_word_ra(CPUArchState *env, target_ulong ptr,
                         target_ulong val, uintptr_t retaddr);
 #endif
 
+uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                            int mmu_idx, uintptr_t ra);
+int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                       int mmu_idx, uintptr_t ra);
+uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                               int mmu_idx, uintptr_t ra);
+int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                          int mmu_idx, uintptr_t ra);
+uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr,
+                              int mmu_idx, uintptr_t ra);
+
+void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                       int mmu_idx, uintptr_t ra);
+void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                          int mmu_idx, uintptr_t ra);
+void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                          int mmu_idx, uintptr_t ra);
+
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr ptr, MemOpIdx oi, uintptr_t ra);
+uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
+                        MemOpIdx oi, uintptr_t ra);
+
+void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
+                 MemOpIdx oi, uintptr_t ra);
+void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra);
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra);
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra);
+void cpu_stw_le_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra);
+void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra);
+void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra);
+
+uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
+                                 uint32_t cmpv, uint32_t newv,
+                                 MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t cpu_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
+                                    uint64_t cmpv, uint64_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint32_t cpu_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t cpu_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
+                                    uint64_t cmpv, uint64_t newv,
+                                    MemOpIdx oi, uintptr_t retaddr);
+
+#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX)         \
+TYPE cpu_atomic_ ## NAME ## SUFFIX ## _mmu            \
+    (CPUArchState *env, target_ulong addr, TYPE val,  \
+     MemOpIdx oi, uintptr_t retaddr);
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+#else
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+#endif
+
+GEN_ATOMIC_HELPER_ALL(fetch_add)
+GEN_ATOMIC_HELPER_ALL(fetch_sub)
+GEN_ATOMIC_HELPER_ALL(fetch_and)
+GEN_ATOMIC_HELPER_ALL(fetch_or)
+GEN_ATOMIC_HELPER_ALL(fetch_xor)
+GEN_ATOMIC_HELPER_ALL(fetch_smin)
+GEN_ATOMIC_HELPER_ALL(fetch_umin)
+GEN_ATOMIC_HELPER_ALL(fetch_smax)
+GEN_ATOMIC_HELPER_ALL(fetch_umax)
+
+GEN_ATOMIC_HELPER_ALL(add_fetch)
+GEN_ATOMIC_HELPER_ALL(sub_fetch)
+GEN_ATOMIC_HELPER_ALL(and_fetch)
+GEN_ATOMIC_HELPER_ALL(or_fetch)
+GEN_ATOMIC_HELPER_ALL(xor_fetch)
+GEN_ATOMIC_HELPER_ALL(smin_fetch)
+GEN_ATOMIC_HELPER_ALL(umin_fetch)
+GEN_ATOMIC_HELPER_ALL(smax_fetch)
+GEN_ATOMIC_HELPER_ALL(umax_fetch)
+
+GEN_ATOMIC_HELPER_ALL(xchg)
+
+#undef GEN_ATOMIC_HELPER_ALL
+#undef GEN_ATOMIC_HELPER
+
+Int128 cpu_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+                                  Int128 cmpv, Int128 newv,
+                                  MemOpIdx oi, uintptr_t retaddr);
+Int128 cpu_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+                                  Int128 cmpv, Int128 newv,
+                                  MemOpIdx oi, uintptr_t retaddr);
+
+Int128 cpu_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+                             MemOpIdx oi, uintptr_t retaddr);
+Int128 cpu_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+                             MemOpIdx oi, uintptr_t retaddr);
+void cpu_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                           MemOpIdx oi, uintptr_t retaddr);
+void cpu_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                           MemOpIdx oi, uintptr_t retaddr);
+
 #if defined(CONFIG_USER_ONLY)
 
 extern __thread uintptr_t helper_retaddr;
@@ -200,119 +351,6 @@ static inline void clear_helper_retaddr(void)
     helper_retaddr = 0;
 }
 
-/*
- * Provide the same *_mmuidx_ra interface as for softmmu.
- * The mmu_idx argument is ignored.
- */
-
-static inline uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                          int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldub_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                     int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsb_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                             int mmu_idx, uintptr_t ra)
-{
-    return cpu_lduw_be_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsw_be_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldl_be_data_ra(env, addr, ra);
-}
-
-static inline uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldq_be_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                             int mmu_idx, uintptr_t ra)
-{
-    return cpu_lduw_le_data_ra(env, addr, ra);
-}
-
-static inline int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldsw_le_data_ra(env, addr, ra);
-}
-
-static inline uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldl_le_data_ra(env, addr, ra);
-}
-
-static inline uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                            int mmu_idx, uintptr_t ra)
-{
-    return cpu_ldq_le_data_ra(env, addr, ra);
-}
-
-static inline void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                     uint32_t val, int mmu_idx, uintptr_t ra)
-{
-    cpu_stb_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stw_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stl_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint64_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stq_be_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stw_le_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint32_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stl_le_data_ra(env, addr, val, ra);
-}
-
-static inline void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                                        uint64_t val, int mmu_idx,
-                                        uintptr_t ra)
-{
-    cpu_stq_le_data_ra(env, addr, val, ra);
-}
-
 #else
 
 /* Needed for TCG_OVERSIZED_GUEST */
@@ -343,46 +381,6 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
     return &env_tlb(env)->f[mmu_idx].table[tlb_index(env, mmu_idx, addr)];
 }
 
-uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                            int mmu_idx, uintptr_t ra);
-int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                       int mmu_idx, uintptr_t ra);
-
-uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra);
-int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra);
-uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-
-uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                               int mmu_idx, uintptr_t ra);
-int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                          int mmu_idx, uintptr_t ra);
-uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
-                              int mmu_idx, uintptr_t ra);
-
-void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       int mmu_idx, uintptr_t retaddr);
-
-void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr);
-
-void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
-                          int mmu_idx, uintptr_t retaddr);
-void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
-                          int mmu_idx, uintptr_t retaddr);
-
 #endif /* defined(CONFIG_USER_ONLY) */
 
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -398,6 +396,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 # define cpu_ldsw_mmuidx_ra   cpu_ldsw_be_mmuidx_ra
 # define cpu_ldl_mmuidx_ra    cpu_ldl_be_mmuidx_ra
 # define cpu_ldq_mmuidx_ra    cpu_ldq_be_mmuidx_ra
+# define cpu_ldw_mmu          cpu_ldw_be_mmu
+# define cpu_ldl_mmu          cpu_ldl_be_mmu
+# define cpu_ldq_mmu          cpu_ldq_be_mmu
 # define cpu_stw_data         cpu_stw_be_data
 # define cpu_stl_data         cpu_stl_be_data
 # define cpu_stq_data         cpu_stq_be_data
@@ -407,6 +408,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 # define cpu_stw_mmuidx_ra    cpu_stw_be_mmuidx_ra
 # define cpu_stl_mmuidx_ra    cpu_stl_be_mmuidx_ra
 # define cpu_stq_mmuidx_ra    cpu_stq_be_mmuidx_ra
+# define cpu_stw_mmu          cpu_stw_be_mmu
+# define cpu_stl_mmu          cpu_stl_be_mmu
+# define cpu_stq_mmu          cpu_stq_be_mmu
 #else
 # define cpu_lduw_data        cpu_lduw_le_data
 # define cpu_ldsw_data        cpu_ldsw_le_data
@@ -420,6 +424,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 # define cpu_ldsw_mmuidx_ra   cpu_ldsw_le_mmuidx_ra
 # define cpu_ldl_mmuidx_ra    cpu_ldl_le_mmuidx_ra
 # define cpu_ldq_mmuidx_ra    cpu_ldq_le_mmuidx_ra
+# define cpu_ldw_mmu          cpu_ldw_le_mmu
+# define cpu_ldl_mmu          cpu_ldl_le_mmu
+# define cpu_ldq_mmu          cpu_ldq_le_mmu
 # define cpu_stw_data         cpu_stw_le_data
 # define cpu_stl_data         cpu_stl_le_data
 # define cpu_stq_data         cpu_stq_le_data
@@ -429,6 +436,9 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 # define cpu_stw_mmuidx_ra    cpu_stw_le_mmuidx_ra
 # define cpu_stl_mmuidx_ra    cpu_stl_le_mmuidx_ra
 # define cpu_stq_mmuidx_ra    cpu_stq_le_mmuidx_ra
+# define cpu_stw_mmu          cpu_stw_le_mmu
+# define cpu_stl_mmu          cpu_stl_le_mmu
+# define cpu_stq_mmu          cpu_stq_le_mmu
 #endif
 
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4376e654517..57dd90a76ad 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -21,7 +21,6 @@
 #define EXEC_ALL_H
 
 #include "cpu.h"
-#include "exec/tb-context.h"
 #ifdef CONFIG_TCG
 #include "exec/cpu_ldst.h"
 #endif
@@ -262,6 +261,31 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
 void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
     (CPUState *cpu, target_ulong addr, uint16_t idxmap, unsigned bits);
 
+/**
+ * tlb_flush_range_by_mmuidx
+ * @cpu: CPU whose TLB should be flushed
+ * @addr: virtual address of the start of the range to be flushed
+ * @len: length of range to be flushed
+ * @idxmap: bitmap of mmu indexes to flush
+ * @bits: number of significant bits in address
+ *
+ * For each mmuidx in @idxmap, flush all pages within [@addr,@addr+@len),
+ * comparing only the low @bits worth of each virtual page.
+ */
+void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
+                               target_ulong len, uint16_t idxmap,
+                               unsigned bits);
+
+/* Similarly, with broadcast and syncing. */
+void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+                                        target_ulong len, uint16_t idxmap,
+                                        unsigned bits);
+void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                               target_ulong addr,
+                                               target_ulong len,
+                                               uint16_t idxmap,
+                                               unsigned bits);
+
 /**
  * tlb_set_page_with_attrs:
  * @cpu: CPU to add this TLB entry for
@@ -365,6 +389,25 @@ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
                                               uint16_t idxmap, unsigned bits)
 {
 }
+static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, target_ulong addr,
+                                             target_ulong len, uint16_t idxmap,
+                                             unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu,
+                                                      target_ulong addr,
+                                                      target_ulong len,
+                                                      uint16_t idxmap,
+                                                      unsigned bits)
+{
+}
+static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                             target_ulong addr,
+                                                             target_long len,
+                                                             uint16_t idxmap,
+                                                             unsigned bits)
+{
+}
 #endif
 /**
  * probe_access:
@@ -446,21 +489,28 @@ struct tb_tc {
 struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
-    target_ulong cs_top;  /* End (exclusive) of code segment for this block */
+    target_ulong pcc_base; /* CHERI: Base of program counter for this block */
+    target_ulong pcc_top; /* CHERI: End of program counter for this block */
     uint32_t cheri_flags; /* Extra flags for CHERI. We need more bits than are
                              available in flags (at least for MIPS) and this
                              will allow us to avoid continuously changing the
                              bits that we use when merging from upstream. */
     uint32_t flags; /* flags defining in which context the code was generated */
     uint32_t cflags;    /* compile flags */
-#define CF_COUNT_MASK  0x00007fff
-#define CF_LAST_IO     0x00008000 /* Last insn may be an IO access.  */
-#define CF_MEMI_ONLY   0x00010000 /* Only instrument memory ops */
-#define CF_USE_ICOUNT  0x00020000
-#define CF_INVALID     0x00040000 /* TB is stale. Set with @jmp_lock held */
-#define CF_PARALLEL    0x00080000 /* Generate code for a parallel context */
-#define CF_LOG_INSTR   0x00100000 /* Generate calls to instruction tracing */
-#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
+
+/* Note that TCG_MAX_INSNS is 512; we validate this match elsewhere. */
+#define CF_COUNT_MASK    0x000001ff
+#define CF_NO_GOTO_TB    0x00000200 /* Do not chain with goto_tb */
+#define CF_NO_GOTO_PTR   0x00000400 /* Do not chain with goto_ptr */
+#define CF_SINGLE_STEP   0x00000800 /* gdbstub single-step in effect */
+#define CF_LAST_IO       0x00008000 /* Last insn may be an IO access.  */
+#define CF_MEMI_ONLY     0x00010000 /* Only instrument memory ops */
+#define CF_USE_ICOUNT    0x00020000
+#define CF_INVALID       0x00040000 /* TB is stale. Set with @jmp_lock held */
+#define CF_PARALLEL      0x00080000 /* Generate code for a parallel context */
+#define CF_NOIRQ         0x00100000 /* Generate an uninterruptible TB */
+#define CF_LOG_INSTR     0x00200000 /* Generate calls to instruction tracing */
+#define CF_CLUSTER_MASK  0xff000000 /* Top 8 bits are cluster ID */
 #define CF_CLUSTER_SHIFT 24
 
     /* Per-vCPU dynamic tracing state used to generate this TB */
@@ -518,15 +568,17 @@ struct TranslationBlock {
     uintptr_t jmp_dest[2];
 };
 
-// Reduce diff to upstream for CHERI (since we addd cs_top/ds_base/ds_top)
-#if !defined(cpu_get_tb_cpu_state_6)
-static inline void cpu_get_tb_cpu_state_6(CPUArchState *env, target_ulong *pc,
+/* Reduce diff to upstream for CHERI since we add pcc_{base,top},cheri_flags. */
+#if !defined(cpu_get_tb_cpu_state_ext)
+static inline void cpu_get_tb_cpu_state_ext(CPUArchState *env, target_ulong *pc,
                                           target_ulong *cs_base,
-                                          target_ulong *cs_top,
+                                          target_ulong *pcc_base,
+                                          target_ulong *pcc_top,
                                           uint32_t *cheri_flags,
                                           uint32_t *flags)
 {
-    (void)cs_top;
+    (void)pcc_base;
+    (void)pcc_top;
     (void)cheri_flags;
     cpu_get_tb_cpu_state(env, pc, cs_base, flags);
 }
@@ -539,10 +591,7 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb)
 }
 
 /* current cflags for hashing/comparison */
-static inline uint32_t curr_cflags(CPUState *cpu)
-{
-    return cpu->tcg_cflags;
-}
+uint32_t curr_cflags(CPUState *cpu);
 
 /* TranslationBlock invalidate API */
 #if defined(CONFIG_USER_ONLY)
@@ -554,9 +603,9 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, target_ulong cs_top,
-                                   uint32_t cheri_flags, uint32_t flags,
-                                   uint32_t cflags);
+                                   target_ulong cs_base, target_ulong pcc_base,
+                                   target_ulong pcc_top, uint32_t cheri_flags,
+                                   uint32_t flags, uint32_t cflags);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
@@ -637,6 +686,58 @@ static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
     }
     return addr;
 }
+
+/**
+ * adjust_signal_pc:
+ * @pc: raw pc from the host signal ucontext_t.
+ * @is_write: host memory operation was write, or read-modify-write.
+ *
+ * Alter @pc as required for unwinding.  Return the type of the
+ * guest memory access -- host reads may be for guest execution.
+ */
+MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write);
+
+/**
+ * handle_sigsegv_accerr_write:
+ * @cpu: the cpu context
+ * @old_set: the sigset_t from the signal ucontext_t
+ * @host_pc: the host pc, adjusted for the signal
+ * @host_addr: the host address of the fault
+ *
+ * Return true if the write fault has been handled, and should be re-tried.
+ */
+bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
+                                 uintptr_t host_pc, abi_ptr guest_addr);
+
+/**
+ * cpu_loop_exit_sigsegv:
+ * @cpu: the cpu context
+ * @addr: the guest address of the fault
+ * @access_type: access was read/write/execute
+ * @maperr: true for invalid page, false for permission fault
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGSEGV, and jump to the main cpu loop.
+ */
+void QEMU_NORETURN cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+                                         MMUAccessType access_type,
+                                         bool maperr, uintptr_t ra);
+
+/**
+ * cpu_loop_exit_sigbus:
+ * @cpu: the cpu context
+ * @addr: the guest address of the alignment fault
+ * @access_type: access was read/write/execute
+ * @ra: host pc for unwinding
+ *
+ * Use the TCGCPUOps hook to record cpu state, do guest operating system
+ * specific things to raise SIGBUS, and jump to the main cpu loop.
+ */
+void QEMU_NORETURN cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+                                        MMUAccessType access_type,
+                                        uintptr_t ra);
+
 #else
 static inline void mmap_lock(void) {}
 static inline void mmap_unlock(void) {}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 298e01eef49..c57204ddad9 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -1,6 +1,7 @@
 #ifndef GEN_ICOUNT_H
 #define GEN_ICOUNT_H
 
+#include "exec/exec-all.h"
 #include "qemu/timer.h"
 
 /* Helpers for instruction counting code generation.  */
@@ -16,27 +17,10 @@ static inline void gen_io_start(void)
     tcg_temp_free_i32(tmp);
 }
 
-/*
- * cpu->can_do_io is cleared automatically at the beginning of
- * each translation block.  The cost is minimal and only paid
- * for -icount, plus it would be very easy to forget doing it
- * in the translator.  Therefore, backends only need to call
- * gen_io_start.
- */
-static inline void gen_io_end(void)
-{
-    TCGv_i32 tmp = tcg_const_i32(0);
-    tcg_gen_st_i32(tmp, cpu_env,
-                   offsetof(ArchCPU, parent_obj.can_do_io) -
-                   offsetof(ArchCPU, env));
-    tcg_temp_free_i32(tmp);
-}
-
 static inline void gen_tb_start(const TranslationBlock *tb)
 {
     TCGv_i32 count;
 
-    tcg_ctx->exitreq_label = gen_new_label();
     if (tb_cflags(tb) & CF_USE_ICOUNT) {
         count = tcg_temp_local_new_i32();
     } else {
@@ -57,13 +41,34 @@ static inline void gen_tb_start(const TranslationBlock *tb)
         icount_start_insn = tcg_last_op();
     }
 
-    tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
+    /*
+     * Emit the check against icount_decr.u32 to see if we should exit
+     * unless we suppress the check with CF_NOIRQ. If we are using
+     * icount and have suppressed interruption the higher level code
+     * should have ensured we don't run more instructions than the
+     * budget.
+     */
+    if (tb_cflags(tb) & CF_NOIRQ) {
+        tcg_ctx->exitreq_label = NULL;
+    } else {
+        tcg_ctx->exitreq_label = gen_new_label();
+        tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
+    }
 
     if (tb_cflags(tb) & CF_USE_ICOUNT) {
         tcg_gen_st16_i32(count, cpu_env,
                          offsetof(ArchCPU, neg.icount_decr.u16.low) -
                          offsetof(ArchCPU, env));
-        gen_io_end();
+        /*
+         * cpu->can_do_io is cleared automatically here at the beginning of
+         * each translation block.  The cost is minimal and only paid for
+         * -icount, plus it would be very easy to forget doing it in the
+         * translator. Doing it here means we don't need a gen_io_end() to
+         * go with gen_io_start().
+         */
+        tcg_gen_st_i32(tcg_constant_i32(0), cpu_env,
+                       offsetof(ArchCPU, parent_obj.can_do_io) -
+                       offsetof(ArchCPU, env));
     }
 
     tcg_temp_free_i32(count);
@@ -80,8 +85,10 @@ static inline void gen_tb_end(const TranslationBlock *tb, int num_insns)
                            tcgv_i32_arg(tcg_constant_i32(num_insns)));
     }
 
-    gen_set_label(tcg_ctx->exitreq_label);
-    tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
+    if (tcg_ctx->exitreq_label) {
+        gen_set_label(tcg_ctx->exitreq_label);
+        tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
+    }
 }
 
 #endif
diff --git a/include/exec/helper-gen.h b/include/exec/helper-gen.h
index 29c02f85dcc..1c2e7a8ed39 100644
--- a/include/exec/helper-gen.h
+++ b/include/exec/helper-gen.h
@@ -81,8 +81,8 @@ static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
 #include "helper.h"
 #include "trace/generated-helpers.h"
 #include "trace/generated-helpers-wrappers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
+#include "accel/tcg/tcg-runtime.h"
+#include "accel/tcg/plugin-helpers.h"
 
 #undef DEF_HELPER_FLAGS_0
 #undef DEF_HELPER_FLAGS_1
diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index bf171853b73..da1a8bc9c5e 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -49,8 +49,8 @@
 #define dh_ctype_cptr const void *
 #define dh_ctype_void void
 #define dh_ctype_memop MemOp
-/* Can't use TCGMemOpIdx here due to include ordering. */
-#define dh_ctype_memop_idx uint32_t /* TCGMemOpIdx */
+/* Can't use MemOpIdx here due to include ordering. */
+#define dh_ctype_memop_idx uint32_t /* MemOpIdx */
 #define dh_ctype_noreturn void QEMU_NORETURN
 #define dh_ctype(t) dh_ctype_##t
 
@@ -95,38 +95,15 @@
 #define dh_retvar_cap_checked_ptr tcgv_cap_checked_ptr_temp(retval)
 #define dh_retvar(t) glue(dh_retvar_, dh_alias(t))
 
-#define dh_is_64bit_void 0
-#define dh_is_64bit_memop 0
-#define dh_is_64bit_memop_idx 0
-#define dh_is_64bit_noreturn 0
-#define dh_is_64bit_i32 0
-#define dh_is_64bit_i64 1
-#define dh_is_64bit_ptr (sizeof(void *) == 8)
-#define dh_is_64bit_cptr dh_is_64bit_ptr
-#define dh_is_64bit_cap_checked_ptr (sizeof(target_ulong) == 8)
-#define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
-
-#define dh_is_signed_void 0
-#define dh_is_signed_memop 0
-#define dh_is_signed_memop_idx 0
-#define dh_is_signed_noreturn 0
-#define dh_is_signed_i32 0
-#define dh_is_signed_s32 1
-#define dh_is_signed_i64 0
-#define dh_is_signed_s64 1
-#define dh_is_signed_f16 0
-#define dh_is_signed_f32 0
-#define dh_is_signed_f64 0
-#define dh_is_signed_tl  0
-#define dh_is_signed_cap_checked_ptr 0
-#define dh_is_signed_int 1
-/* ??? This is highly specific to the host cpu.  There are even special
-   extension instructions that may be required, e.g. ia64's addp4.  But
-   for now we don't support any 64-bit targets with 32-bit pointers.  */
-#define dh_is_signed_ptr 0
-#define dh_is_signed_cptr dh_is_signed_ptr
-#define dh_is_signed_env dh_is_signed_ptr
-#define dh_is_signed(t) dh_is_signed_##t
+#define dh_typecode_void 0
+#define dh_typecode_noreturn 0
+#define dh_typecode_i32 2
+#define dh_typecode_s32 3
+#define dh_typecode_i64 4
+#define dh_typecode_s64 5
+#define dh_typecode_ptr 6
+#define dh_typecode_cap_checked_ptr 6
+#define dh_typecode(t) glue(dh_typecode_, dh_alias(t))
 
 #define dh_callflag_i32  0
 #define dh_callflag_s32  0
@@ -145,8 +122,7 @@
 #define dh_callflag_noreturn TCG_CALL_NO_RETURN
 #define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
 
-#define dh_sizemask(t, n) \
-  ((dh_is_64bit(t) << (n*2)) | (dh_is_signed(t) << (n*2+1)))
+#define dh_typemask(t, n)  (dh_typecode(t) << (n * 3))
 
 #define dh_arg(t, n) \
   glue(glue(tcgv_, dh_alias(t)), _temp)(glue(arg, n))
diff --git a/include/exec/helper-proto.h b/include/exec/helper-proto.h
index 659f9298e8f..ba100793a7d 100644
--- a/include/exec/helper-proto.h
+++ b/include/exec/helper-proto.h
@@ -39,8 +39,8 @@ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), \
 
 #include "helper.h"
 #include "trace/generated-helpers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
+#include "accel/tcg/tcg-runtime.h"
+#include "accel/tcg/plugin-helpers.h"
 
 #undef IN_HELPER_PROTO
 
diff --git a/include/exec/helper-tcg.h b/include/exec/helper-tcg.h
index 27870509a20..16cd318b836 100644
--- a/include/exec/helper-tcg.h
+++ b/include/exec/helper-tcg.h
@@ -13,55 +13,55 @@
 #define DEF_HELPER_FLAGS_0(NAME, FLAGS, ret) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) },
+    .typemask = dh_typemask(ret, 0) },
 
 #define DEF_HELPER_FLAGS_1(NAME, FLAGS, ret, t1) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) },
 
 #define DEF_HELPER_FLAGS_2(NAME, FLAGS, ret, t1, t2) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) },
 
 #define DEF_HELPER_FLAGS_3(NAME, FLAGS, ret, t1, t2, t3) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) | dh_typemask(t3, 3) },
 
 #define DEF_HELPER_FLAGS_4(NAME, FLAGS, ret, t1, t2, t3, t4) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) },
 
 #define DEF_HELPER_FLAGS_5(NAME, FLAGS, ret, t1, t2, t3, t4, t5) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
+    | dh_typemask(t5, 5) },
 
 #define DEF_HELPER_FLAGS_6(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6) \
   { .func = HELPER(NAME), .name = str(NAME), \
     .flags = FLAGS | dh_callflag(ret), \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
+    | dh_typemask(t5, 5) | dh_typemask(t6, 6) },
 
 #define DEF_HELPER_FLAGS_7(NAME, FLAGS, ret, t1, t2, t3, t4, t5, t6, t7) \
   { .func = HELPER(NAME), .name = str(NAME), .flags = FLAGS, \
-    .sizemask = dh_sizemask(ret, 0) | dh_sizemask(t1, 1) \
-    | dh_sizemask(t2, 2) | dh_sizemask(t3, 3) | dh_sizemask(t4, 4) \
-    | dh_sizemask(t5, 5) | dh_sizemask(t6, 6) | dh_sizemask(t7, 7) },
+    .typemask = dh_typemask(ret, 0) | dh_typemask(t1, 1) \
+    | dh_typemask(t2, 2) | dh_typemask(t3, 3) | dh_typemask(t4, 4) \
+    | dh_typemask(t5, 5) | dh_typemask(t6, 6) | dh_typemask(t7, 7) },
 
 #include "helper.h"
 #include "trace/generated-helpers.h"
-#include "tcg-runtime.h"
-#include "plugin-helpers.h"
+#include "accel/tcg/tcg-runtime.h"
+#include "accel/tcg/plugin-helpers.h"
 
 #undef str
 #undef DEF_HELPER_FLAGS_0
diff --git a/include/exec/memop.h b/include/exec/memop.h
index 529d07b02dd..04264ffd6b5 100644
--- a/include/exec/memop.h
+++ b/include/exec/memop.h
@@ -19,11 +19,15 @@ typedef enum MemOp {
     MO_16    = 1,
     MO_32    = 2,
     MO_64    = 3,
-    MO_SIZE  = 3,   /* Mask for the above.  */
+    MO_128   = 4,
+    MO_256   = 5,
+    MO_512   = 6,
+    MO_1024  = 7,
+    MO_SIZE  = 0x07,   /* Mask for the above.  */
 
-    MO_SIGN  = 4,   /* Sign-extended, otherwise zero-extended.  */
+    MO_SIGN  = 0x08,   /* Sign-extended, otherwise zero-extended.  */
 
-    MO_BSWAP = 8,   /* Host reverse endian.  */
+    MO_BSWAP = 0x10,   /* Host reverse endian.  */
 #ifdef HOST_WORDS_BIGENDIAN
     MO_LE    = MO_BSWAP,
     MO_BE    = 0,
@@ -59,8 +63,8 @@ typedef enum MemOp {
      * - an alignment to a specified size, which may be more or less than
      *   the access size (MO_ALIGN_x where 'x' is a size in bytes);
      */
-    MO_ASHIFT = 4,
-    MO_AMASK = 7 << MO_ASHIFT,
+    MO_ASHIFT = 5,
+    MO_AMASK = 0x7 << MO_ASHIFT,
 #ifdef NEED_CPU_H
 #ifdef TARGET_ALIGNED_ONLY
     MO_ALIGN = 0,
diff --git a/include/exec/memopidx.h b/include/exec/memopidx.h
new file mode 100644
index 00000000000..83bce97874d
--- /dev/null
+++ b/include/exec/memopidx.h
@@ -0,0 +1,55 @@
+/*
+ * Combine the MemOp and mmu_idx parameters into a single value.
+ *
+ * Authors:
+ *  Richard Henderson <rth@twiddle.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef EXEC_MEMOPIDX_H
+#define EXEC_MEMOPIDX_H 1
+
+#include "exec/memop.h"
+
+typedef uint32_t MemOpIdx;
+
+/**
+ * make_memop_idx
+ * @op: memory operation
+ * @idx: mmu index
+ *
+ * Encode these values into a single parameter.
+ */
+static inline MemOpIdx make_memop_idx(MemOp op, unsigned idx)
+{
+#ifdef CONFIG_DEBUG_TCG
+    assert(idx <= 15);
+#endif
+    return (op << 4) | idx;
+}
+
+/**
+ * get_memop
+ * @oi: combined op/idx parameter
+ *
+ * Extract the memory operation from the combined value.
+ */
+static inline MemOp get_memop(MemOpIdx oi)
+{
+    return oi >> 4;
+}
+
+/**
+ * get_mmuidx
+ * @oi: combined op/idx parameter
+ *
+ * Extract the mmu index from the combined value.
+ */
+static inline unsigned get_mmuidx(MemOpIdx oi)
+{
+    return oi & 15;
+}
+
+#endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 5728a681b27..20f1b27377e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -42,6 +42,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
 DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
                      IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
 
+#define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
+typedef struct RamDiscardManagerClass RamDiscardManagerClass;
+typedef struct RamDiscardManager RamDiscardManager;
+DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
+                     RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
+
 #ifdef CONFIG_FUZZ
 void fuzz_dma_read_cb(size_t addr,
                       size_t len,
@@ -55,7 +61,17 @@ static inline void fuzz_dma_read_cb(size_t addr,
 }
 #endif
 
-extern bool global_dirty_log;
+/* Possible bits for global_dirty_log_{start|stop} */
+
+/* Dirty tracking enabled because migration is running */
+#define GLOBAL_DIRTY_MIGRATION  (1U << 0)
+
+/* Dirty tracking enabled because measuring dirty rate */
+#define GLOBAL_DIRTY_DIRTY_RATE (1U << 1)
+
+#define GLOBAL_DIRTY_MASK  (0x3)
+
+extern unsigned int global_dirty_tracking;
 
 typedef struct MemoryRegionOps MemoryRegionOps;
 
@@ -65,6 +81,28 @@ struct ReservedRegion {
     unsigned type;
 };
 
+/**
+ * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
+ *
+ * @mr: the region, or %NULL if empty
+ * @fv: the flat view of the address space the region is mapped in
+ * @offset_within_region: the beginning of the section, relative to @mr's start
+ * @size: the size of the section; will not exceed @mr's boundaries
+ * @offset_within_address_space: the address of the first byte of the section
+ *     relative to the region's address space
+ * @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
+ */
+struct MemoryRegionSection {
+    Int128 size;
+    MemoryRegion *mr;
+    FlatView *fv;
+    hwaddr offset_within_region;
+    hwaddr offset_within_address_space;
+    bool readonly;
+    bool nonvolatile;
+};
+
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
 /* See address_space_translate: bit 0 is read, bit 1 is write.  */
@@ -131,7 +169,7 @@ typedef struct IOMMUTLBEvent {
 #define RAM_SHARED     (1 << 1)
 
 /* Only a portion of RAM (used_length) is actually used, and migrated.
- * This used_length size can change across reboots.
+ * Resizing RAM while migrating can result in the migration being canceled.
  */
 #define RAM_RESIZEABLE (1 << 2)
 
@@ -155,6 +193,16 @@ typedef struct IOMMUTLBEvent {
  */
 #define RAM_UF_WRITEPROTECT (1 << 6)
 
+/*
+ * RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or huge
+ * pages if applicable) is skipped: will bail out if not supported. When not
+ * set, the OS will do the reservation, if supported for the memory type.
+ */
+#define RAM_NORESERVE (1 << 7)
+
+/* RAM that isn't accessible through normal means. */
+#define RAM_PROTECTED (1 << 8)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -441,6 +489,227 @@ struct IOMMUMemoryRegionClass {
                                      Error **errp);
 };
 
+typedef struct RamDiscardListener RamDiscardListener;
+typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
+                                 MemoryRegionSection *section);
+typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
+                                 MemoryRegionSection *section);
+
+struct RamDiscardListener {
+    /*
+     * @notify_populate:
+     *
+     * Notification that previously discarded memory is about to get populated.
+     * Listeners are able to object. If any listener objects, already
+     * successfully notified listeners are notified about a discard again.
+     *
+     * @rdl: the #RamDiscardListener getting notified
+     * @section: the #MemoryRegionSection to get populated. The section
+     *           is aligned within the memory region to the minimum granularity
+     *           unless it would exceed the registered section.
+     *
+     * Returns 0 on success. If the notification is rejected by the listener,
+     * an error is returned.
+     */
+    NotifyRamPopulate notify_populate;
+
+    /*
+     * @notify_discard:
+     *
+     * Notification that previously populated memory was discarded successfully
+     * and listeners should drop all references to such memory and prevent
+     * new population (e.g., unmap).
+     *
+     * @rdl: the #RamDiscardListener getting notified
+     * @section: the #MemoryRegionSection to get populated. The section
+     *           is aligned within the memory region to the minimum granularity
+     *           unless it would exceed the registered section.
+     */
+    NotifyRamDiscard notify_discard;
+
+    /*
+     * @double_discard_supported:
+     *
+     * The listener suppors getting @notify_discard notifications that span
+     * already discarded parts.
+     */
+    bool double_discard_supported;
+
+    MemoryRegionSection *section;
+    QLIST_ENTRY(RamDiscardListener) next;
+};
+
+static inline void ram_discard_listener_init(RamDiscardListener *rdl,
+                                             NotifyRamPopulate populate_fn,
+                                             NotifyRamDiscard discard_fn,
+                                             bool double_discard_supported)
+{
+    rdl->notify_populate = populate_fn;
+    rdl->notify_discard = discard_fn;
+    rdl->double_discard_supported = double_discard_supported;
+}
+
+typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
+typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
+
+/*
+ * RamDiscardManagerClass:
+ *
+ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
+ * regions are currently populated to be used/accessed by the VM, notifying
+ * after parts were discarded (freeing up memory) and before parts will be
+ * populated (consuming memory), to be used/acessed by the VM.
+ *
+ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
+ * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is
+ * mapped.
+ *
+ * The #RamDiscardManager is intended to be used by technologies that are
+ * incompatible with discarding of RAM (e.g., VFIO, which may pin all
+ * memory inside a #MemoryRegion), and require proper coordination to only
+ * map the currently populated parts, to hinder parts that are expected to
+ * remain discarded from silently getting populated and consuming memory.
+ * Technologies that support discarding of RAM don't have to bother and can
+ * simply map the whole #MemoryRegion.
+ *
+ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
+ * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
+ * Logically unplugging memory consists of discarding RAM. The VM agreed to not
+ * access unplugged (discarded) memory - especially via DMA. virtio-mem will
+ * properly coordinate with listeners before memory is plugged (populated),
+ * and after memory is unplugged (discarded).
+ *
+ * Listeners are called in multiples of the minimum granularity (unless it
+ * would exceed the registered range) and changes are aligned to the minimum
+ * granularity within the #MemoryRegion. Listeners have to prepare for memory
+ * becomming discarded in a different granularity than it was populated and the
+ * other way around.
+ */
+struct RamDiscardManagerClass {
+    /* private */
+    InterfaceClass parent_class;
+
+    /* public */
+
+    /**
+     * @get_min_granularity:
+     *
+     * Get the minimum granularity in which listeners will get notified
+     * about changes within the #MemoryRegion via the #RamDiscardManager.
+     *
+     * @rdm: the #RamDiscardManager
+     * @mr: the #MemoryRegion
+     *
+     * Returns the minimum granularity.
+     */
+    uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
+                                    const MemoryRegion *mr);
+
+    /**
+     * @is_populated:
+     *
+     * Check whether the given #MemoryRegionSection is completely populated
+     * (i.e., no parts are currently discarded) via the #RamDiscardManager.
+     * There are no alignment requirements.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     *
+     * Returns whether the given range is completely populated.
+     */
+    bool (*is_populated)(const RamDiscardManager *rdm,
+                         const MemoryRegionSection *section);
+
+    /**
+     * @replay_populated:
+     *
+     * Call the #ReplayRamPopulate callback for all populated parts within the
+     * #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * In case any call fails, no further calls are made.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     * @replay_fn: the #ReplayRamPopulate callback
+     * @opaque: pointer to forward to the callback
+     *
+     * Returns 0 on success, or a negative error if any notification failed.
+     */
+    int (*replay_populated)(const RamDiscardManager *rdm,
+                            MemoryRegionSection *section,
+                            ReplayRamPopulate replay_fn, void *opaque);
+
+    /**
+     * @replay_discarded:
+     *
+     * Call the #ReplayRamDiscard callback for all discarded parts within the
+     * #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * @rdm: the #RamDiscardManager
+     * @section: the #MemoryRegionSection
+     * @replay_fn: the #ReplayRamDiscard callback
+     * @opaque: pointer to forward to the callback
+     */
+    void (*replay_discarded)(const RamDiscardManager *rdm,
+                             MemoryRegionSection *section,
+                             ReplayRamDiscard replay_fn, void *opaque);
+
+    /**
+     * @register_listener:
+     *
+     * Register a #RamDiscardListener for the given #MemoryRegionSection and
+     * immediately notify the #RamDiscardListener about all populated parts
+     * within the #MemoryRegionSection via the #RamDiscardManager.
+     *
+     * In case any notification fails, no further notifications are triggered
+     * and an error is logged.
+     *
+     * @rdm: the #RamDiscardManager
+     * @rdl: the #RamDiscardListener
+     * @section: the #MemoryRegionSection
+     */
+    void (*register_listener)(RamDiscardManager *rdm,
+                              RamDiscardListener *rdl,
+                              MemoryRegionSection *section);
+
+    /**
+     * @unregister_listener:
+     *
+     * Unregister a previously registered #RamDiscardListener via the
+     * #RamDiscardManager after notifying the #RamDiscardListener about all
+     * populated parts becoming unpopulated within the registered
+     * #MemoryRegionSection.
+     *
+     * @rdm: the #RamDiscardManager
+     * @rdl: the #RamDiscardListener
+     */
+    void (*unregister_listener)(RamDiscardManager *rdm,
+                                RamDiscardListener *rdl);
+};
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+                                                 const MemoryRegion *mr);
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+                                      const MemoryRegionSection *section);
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayRamPopulate replay_fn,
+                                         void *opaque);
+
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                          MemoryRegionSection *section,
+                                          ReplayRamDiscard replay_fn,
+                                          void *opaque);
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+                                           RamDiscardListener *rdl,
+                                           MemoryRegionSection *section);
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl);
+
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
@@ -487,6 +756,7 @@ struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+    RamDiscardManager *rdm; /* Only for RAM */
 };
 
 struct IOMMUMemoryRegion {
@@ -571,7 +841,7 @@ struct MemoryListener {
      * @log_start:
      *
      * Called during an address space update transaction, after
-     * one of #MemoryListener.region_add(),#MemoryListener.region_del() or
+     * one of #MemoryListener.region_add(), #MemoryListener.region_del() or
      * #MemoryListener.region_nop(), if dirty memory logging clients have
      * become active since the last transaction.
      *
@@ -616,6 +886,18 @@ struct MemoryListener {
      */
     void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
 
+    /**
+     * @log_sync_global:
+     *
+     * This is the global version of @log_sync when the listener does
+     * not have a way to synchronize the log with finer granularity.
+     * When the listener registers with @log_sync_global defined, then
+     * its @log_sync must be NULL.  Vice versa.
+     *
+     * @listener: The #MemoryListener.
+     */
+    void (*log_sync_global)(MemoryListener *listener);
+
     /**
      * @log_clear:
      *
@@ -731,6 +1013,14 @@ struct MemoryListener {
      */
     unsigned priority;
 
+    /**
+     * @name:
+     *
+     * Name of the listener.  It can be used in contexts where we'd like to
+     * identify one memory listener with the rest.
+     */
+    const char *name;
+
     /* private: */
     AddressSpace *address_space;
     QTAILQ_ENTRY(MemoryListener) link;
@@ -806,28 +1096,6 @@ typedef bool (*flatview_cb)(Int128 start,
  */
 void flatview_for_each_range(FlatView *fv, flatview_cb cb, void *opaque);
 
-/**
- * struct MemoryRegionSection: describes a fragment of a #MemoryRegion
- *
- * @mr: the region, or %NULL if empty
- * @fv: the flat view of the address space the region is mapped in
- * @offset_within_region: the beginning of the section, relative to @mr's start
- * @size: the size of the section; will not exceed @mr's boundaries
- * @offset_within_address_space: the address of the first byte of the section
- *     relative to the region's address space
- * @readonly: writes to this section are ignored
- * @nonvolatile: this section is non-volatile
- */
-struct MemoryRegionSection {
-    Int128 size;
-    MemoryRegion *mr;
-    FlatView *fv;
-    hwaddr offset_within_region;
-    hwaddr offset_within_address_space;
-    bool readonly;
-    bool nonvolatile;
-};
-
 static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
                                           MemoryRegionSection *b)
 {
@@ -840,6 +1108,26 @@ static inline bool MemoryRegionSection_eq(MemoryRegionSection *a,
            a->nonvolatile == b->nonvolatile;
 }
 
+/**
+ * memory_region_section_new_copy: Copy a memory region section
+ *
+ * Allocate memory for a new copy, copy the memory region section, and
+ * properly take a reference on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
+
+/**
+ * memory_region_section_new_copy: Free a copied memory region section
+ *
+ * Free a copy of a memory section created via memory_region_section_new_copy().
+ * properly dropping references on all relevant members.
+ *
+ * @s: the #MemoryRegionSection to copy
+ */
+void memory_region_section_free_copy(MemoryRegionSection *s);
+
 /**
  * memory_region_init: Initialize a memory region
  *
@@ -928,34 +1216,36 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr,
                                       Error **errp);
 
 /**
- * memory_region_init_ram_shared_nomigrate:  Initialize RAM memory region.
- *                                           Accesses into the region will
- *                                           modify memory directly.
+ * memory_region_init_ram_flags_nomigrate:  Initialize RAM memory region.
+ *                                          Accesses into the region will
+ *                                          modify memory directly.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
  * @name: Region name, becomes part of RAMBlock name used in migration stream
  *        must be unique within any device
  * @size: size of the region.
- * @share: allow remapping RAM to different addresses
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
  * @errp: pointer to Error*, to store an error if it happens.
  *
- * Note that this function is similar to memory_region_init_ram_nomigrate.
- * The only difference is part of the RAM region can be remapped.
+ * Note that this function does not do anything to cause the data in the
+ * RAM memory region to be migrated; that is the responsibility of the caller.
  */
-void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
-                                             Object *owner,
-                                             const char *name,
-                                             uint64_t size,
-                                             bool share,
-                                             Error **errp);
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+                                            Object *owner,
+                                            const char *name,
+                                            uint64_t size,
+                                            uint32_t ram_flags,
+                                            Error **errp);
 
 /**
  * memory_region_init_resizeable_ram:  Initialize memory region with resizeable
  *                                     RAM.  Accesses into the region will
  *                                     modify memory directly.  Only an initial
  *                                     portion of this RAM is actually used.
- *                                     The used size can change across reboots.
+ *                                     Changing the size while migrating
+ *                                     can result in the migration being
+ *                                     canceled.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @owner: the object that tracks the region's reference count
@@ -991,10 +1281,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @size: size of the region.
  * @align: alignment of the region base address; if 0, the default alignment
  *         (getpagesize()) will be used.
- * @ram_flags: Memory region features:
- *             - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
- *             - RAM_PMEM: the memory is persistent memory
- *             Other bits are ignored now.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE,
  * @path: the path in which to allocate the RAM.
  * @readonly: true to open @path for reading, false for read/write.
  * @errp: pointer to Error*, to store an error if it happens.
@@ -1020,7 +1308,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @owner: the object that tracks the region's reference count
  * @name: the name of the region.
  * @size: size of the region.
- * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE, RAM_PROTECTED.
  * @fd: the fd to mmap.
  * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
@@ -1032,7 +1321,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
                                     Object *owner,
                                     const char *name,
                                     uint64_t size,
-                                    bool share,
+                                    uint32_t ram_flags,
                                     int fd,
                                     ram_addr_t offset,
                                     Error **errp);
@@ -1321,6 +1610,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
     return mr->rom_device && mr->romd_mode;
 }
 
+/**
+ * memory_region_is_protected: check whether a memory region is protected
+ *
+ * Returns %true if a memory region is protected RAM and cannot be accessed
+ * via standard mechanisms, e.g. DMA.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_is_protected(MemoryRegion *mr);
+
 /**
  * memory_region_get_iommu: check whether a memory region is an iommu
  *
@@ -1586,8 +1885,8 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr);
 
 /* memory_region_ram_resize: Resize a RAM region.
  *
- * Only legal before guest might have detected the memory size: e.g. on
- * incoming migration, or right after reset.
+ * Resizing RAM while migrating can result in the migration being canceled.
+ * Care has to be taken if the guest might have already detected the memory.
  *
  * @mr: a memory region created with @memory_region_init_resizeable_ram.
  * @newsize: the new size the region
@@ -2003,6 +2302,41 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
  */
 bool memory_region_is_mapped(MemoryRegion *mr);
 
+/**
+ * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * The #RamDiscardManager cannot change while a memory region is mapped.
+ *
+ * @mr: the #MemoryRegion
+ */
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
+
+/**
+ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
+ * #RamDiscardManager assigned
+ *
+ * @mr: the #MemoryRegion
+ */
+static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
+{
+    return !!memory_region_get_ram_discard_manager(mr);
+}
+
+/**
+ * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
+ * #MemoryRegion
+ *
+ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
+ * that does not cover RAM, or a #MemoryRegion that already has a
+ * #RamDiscardManager assigned.
+ *
+ * @mr: the #MemoryRegion
+ * @rdm: #RamDiscardManager to set
+ */
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                           RamDiscardManager *rdm);
+
 /**
  * memory_region_find: translate an address/size relative to a
  * MemoryRegion into a #MemoryRegionSection.
@@ -2085,13 +2419,17 @@ void memory_listener_unregister(MemoryListener *listener);
 
 /**
  * memory_global_dirty_log_start: begin dirty logging for all regions
+ *
+ * @flags: purpose of starting dirty log, migration or dirty rate
  */
-void memory_global_dirty_log_start(void);
+void memory_global_dirty_log_start(unsigned int flags);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
+ *
+ * @flags: purpose of stopping dirty log, migration or dirty rate
  */
-void memory_global_dirty_log_stop(void);
+void memory_global_dirty_log_stop(unsigned int flags);
 
 void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled);
 
@@ -2303,7 +2641,7 @@ static inline uint8_t address_space_ldub_cached(MemoryRegionCache *cache,
 }
 
 static inline void address_space_stb_cached(MemoryRegionCache *cache,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     assert(addr < cache->len);
     if (likely(cache->ptr)) {
@@ -2611,6 +2949,12 @@ static inline MemOp devend_memop(enum device_endian end)
  */
 int ram_block_discard_disable(bool state);
 
+/*
+ * See ram_block_discard_disable(): only disable uncoordinated discards,
+ * keeping coordinated discards (via the RamDiscardManager) enabled.
+ */
+int ram_block_uncoordinated_discard_disable(bool state);
+
 /*
  * Inhibit technologies that disable discarding of pages in RAM blocks.
  *
@@ -2620,12 +2964,20 @@ int ram_block_discard_disable(bool state);
 int ram_block_discard_require(bool state);
 
 /*
- * Test if discarding of memory in ram blocks is disabled.
+ * See ram_block_discard_require(): only inhibit technologies that disable
+ * uncoordinated discarding of pages in RAM blocks, allowing co-existance with
+ * technologies that only inhibit uncoordinated discards (via the
+ * RamDiscardManager).
+ */
+int ram_block_coordinated_discard_require(bool state);
+
+/*
+ * Test if any discarding of memory in ram blocks is disabled.
  */
 bool ram_block_discard_is_disabled(void);
 
 /*
- * Test if discarding of memory in ram blocks is required to work reliably.
+ * Test if any discarding of memory in ram blocks is required to work reliably.
  */
 bool ram_block_discard_is_required(void);
 
diff --git a/include/exec/memory_ldst.h.inc b/include/exec/memory_ldst.h.inc
index 46e6c220d35..7c3a641f7ed 100644
--- a/include/exec/memory_ldst.h.inc
+++ b/include/exec/memory_ldst.h.inc
@@ -20,7 +20,7 @@
  */
 
 #ifdef TARGET_ENDIANNESS
-extern uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
+extern uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
 extern uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
@@ -29,17 +29,17 @@ extern uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
 extern void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stw, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stl, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stq, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result);
 #else
-extern uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
+extern uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
+extern uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
-extern uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
+extern uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
 extern uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
@@ -50,11 +50,11 @@ extern uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
 extern uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stb, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
+    hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result);
 extern void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
diff --git a/include/exec/memory_ldst_cached.h.inc b/include/exec/memory_ldst_cached.h.inc
index 7bc8790d346..d7834f852c4 100644
--- a/include/exec/memory_ldst_cached.h.inc
+++ b/include/exec/memory_ldst_cached.h.inc
@@ -24,6 +24,18 @@
 #define LD_P(size) \
     glue(glue(ld, size), glue(ENDIANNESS, _p))
 
+static inline uint16_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    assert(addr < cache->len && 2 <= cache->len - addr);
+    fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr);
+    if (likely(cache->ptr)) {
+        return LD_P(uw)(cache->ptr + addr);
+    } else {
+        return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result);
+    }
+}
+
 static inline uint32_t ADDRESS_SPACE_LD_CACHED(l)(MemoryRegionCache *cache,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
@@ -48,18 +60,6 @@ static inline uint64_t ADDRESS_SPACE_LD_CACHED(q)(MemoryRegionCache *cache,
     }
 }
 
-static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
-    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
-{
-    assert(addr < cache->len && 2 <= cache->len - addr);
-    fuzz_dma_read_cb(cache->xlat + addr, 2, cache->mrs.mr);
-    if (likely(cache->ptr)) {
-        return LD_P(uw)(cache->ptr + addr);
-    } else {
-        return ADDRESS_SPACE_LD_CACHED_SLOW(uw)(cache, addr, attrs, result);
-    }
-}
-
 #undef ADDRESS_SPACE_LD_CACHED
 #undef ADDRESS_SPACE_LD_CACHED_SLOW
 #undef LD_P
@@ -71,25 +71,25 @@ static inline uint32_t ADDRESS_SPACE_LD_CACHED(uw)(MemoryRegionCache *cache,
 #define ST_P(size) \
     glue(glue(st, size), glue(ENDIANNESS, _p))
 
-static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result)
 {
-    assert(addr < cache->len && 4 <= cache->len - addr);
+    assert(addr < cache->len && 2 <= cache->len - addr);
     if (likely(cache->ptr)) {
-        ST_P(l)(cache->ptr + addr, val);
+        ST_P(w)(cache->ptr + addr, val);
     } else {
-        ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result);
+        ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result);
     }
 }
 
-static inline void ADDRESS_SPACE_ST_CACHED(w)(MemoryRegionCache *cache,
+static inline void ADDRESS_SPACE_ST_CACHED(l)(MemoryRegionCache *cache,
     hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
 {
-    assert(addr < cache->len && 2 <= cache->len - addr);
+    assert(addr < cache->len && 4 <= cache->len - addr);
     if (likely(cache->ptr)) {
-        ST_P(w)(cache->ptr + addr, val);
+        ST_P(l)(cache->ptr + addr, val);
     } else {
-        ADDRESS_SPACE_ST_CACHED_SLOW(w)(cache, addr, val, attrs, result);
+        ADDRESS_SPACE_ST_CACHED_SLOW(l)(cache, addr, val, attrs, result);
     }
 }
 
diff --git a/include/exec/memory_ldst_phys.h.inc b/include/exec/memory_ldst_phys.h.inc
index b9dd53c389d..ecd678610d1 100644
--- a/include/exec/memory_ldst_phys.h.inc
+++ b/include/exec/memory_ldst_phys.h.inc
@@ -20,6 +20,12 @@
  */
 
 #ifdef TARGET_ENDIANNESS
+static inline uint16_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 static inline uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
 {
     return glue(address_space_ldl, SUFFIX)(ARG1, addr,
@@ -32,10 +38,10 @@ static inline uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
                                            MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw, SUFFIX)(ARG1, addr,
-                                            MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
@@ -44,18 +50,30 @@ static inline void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
                                     MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw, SUFFIX)(ARG1, addr, val,
-                                    MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
 static inline void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
 {
     glue(address_space_stq, SUFFIX)(ARG1, addr, val,
                                     MEMTXATTRS_UNSPECIFIED, NULL);
 }
 #else
+static inline uint8_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldub, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+static inline uint16_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_le, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+static inline uint16_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_be, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 static inline uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
 {
     return glue(address_space_ldl_le, SUFFIX)(ARG1, addr,
@@ -80,22 +98,22 @@ static inline uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
                                               MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint8_t val)
 {
-    return glue(address_space_ldub, SUFFIX)(ARG1, addr,
-                                            MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stb, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw_le, SUFFIX)(ARG1, addr,
-                                               MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw_le, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint16_t val)
 {
-    return glue(address_space_lduw_be, SUFFIX)(ARG1, addr,
-                                               MEMTXATTRS_UNSPECIFIED, NULL);
+    glue(address_space_stw_be, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static inline void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
@@ -110,24 +128,6 @@ static inline void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t va
                                        MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
-static inline void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stb, SUFFIX)(ARG1, addr, val,
-                                    MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-static inline void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw_le, SUFFIX)(ARG1, addr, val,
-                                       MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-static inline void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
-{
-    glue(address_space_stw_be, SUFFIX)(ARG1, addr, val,
-                                       MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
 static inline void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
 {
     glue(address_space_stq_le, SUFFIX)(ARG1, addr, val,
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
index b1b72b5d90b..f92f1697398 100644
--- a/include/exec/plugin-gen.h
+++ b/include/exec/plugin-gen.h
@@ -27,13 +27,21 @@ void plugin_gen_insn_end(void);
 void plugin_gen_disable_mem_helpers(void);
 void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
 
-static inline void plugin_insn_append(const void *from, size_t size)
+static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
 {
     struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn;
+    abi_ptr off;
 
     if (insn == NULL) {
         return;
     }
+    off = pc - insn->vaddr;
+    if (off < insn->data->len) {
+        g_byte_array_set_size(insn->data, off);
+    } else if (off > insn->data->len) {
+        /* we have an unexpected gap */
+        g_assert_not_reached();
+    }
 
     insn->data = g_byte_array_append(insn->data, from, size);
 }
@@ -62,7 +70,7 @@ static inline void plugin_gen_disable_mem_helpers(void)
 static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
 { }
 
-static inline void plugin_insn_append(const void *from, size_t size)
+static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
 { }
 
 #endif /* CONFIG_PLUGIN */
diff --git a/include/exec/poison.h b/include/exec/poison.h
index 2d169d42148..43bdbed31d9 100644
--- a/include/exec/poison.h
+++ b/include/exec/poison.h
@@ -4,6 +4,8 @@
 #ifndef HW_POISON_H
 #define HW_POISON_H
 
+#include "config-poison.h"
+
 #pragma GCC poison TARGET_I386
 #pragma GCC poison TARGET_X86_64
 #pragma GCC poison TARGET_AARCH64
@@ -12,7 +14,6 @@
 #pragma GCC poison TARGET_CRIS
 #pragma GCC poison TARGET_HEXAGON
 #pragma GCC poison TARGET_HPPA
-#pragma GCC poison TARGET_LM32
 #pragma GCC poison TARGET_M68K
 #pragma GCC poison TARGET_MICROBLAZE
 #pragma GCC poison TARGET_MIPS
@@ -21,7 +22,6 @@
 #pragma GCC poison TARGET_MIPS64
 #pragma GCC poison TARGET_CHERI
 #pragma GCC poison TARGET_ABI_MIPSN64
-#pragma GCC poison TARGET_MOXIE
 #pragma GCC poison TARGET_NIOS2
 #pragma GCC poison TARGET_OPENRISC
 #pragma GCC poison TARGET_PPC
@@ -36,7 +36,6 @@
 #pragma GCC poison TARGET_SPARC
 #pragma GCC poison TARGET_SPARC64
 #pragma GCC poison TARGET_TRICORE
-#pragma GCC poison TARGET_UNICORE32
 #pragma GCC poison TARGET_XTENSA
 
 #pragma GCC poison TARGET_ALIGNED_ONLY
@@ -78,12 +77,10 @@
 #pragma GCC poison CONFIG_HPPA_DIS
 #pragma GCC poison CONFIG_I386_DIS
 #pragma GCC poison CONFIG_HEXAGON_DIS
-#pragma GCC poison CONFIG_LM32_DIS
 #pragma GCC poison CONFIG_M68K_DIS
 #pragma GCC poison CONFIG_MICROBLAZE_DIS
 #pragma GCC poison CONFIG_MIPS_DIS
 #pragma GCC poison CONFIG_NANOMIPS_DIS
-#pragma GCC poison CONFIG_MOXIE_DIS
 #pragma GCC poison CONFIG_NIOS2_DIS
 #pragma GCC poison CONFIG_PPC_DIS
 #pragma GCC poison CONFIG_RISCV_DIS
@@ -92,8 +89,12 @@
 #pragma GCC poison CONFIG_SPARC_DIS
 #pragma GCC poison CONFIG_XTENSA_DIS
 
+#pragma GCC poison CONFIG_HAX
+#pragma GCC poison CONFIG_HVF
 #pragma GCC poison CONFIG_LINUX_USER
 #pragma GCC poison CONFIG_KVM
 #pragma GCC poison CONFIG_SOFTMMU
+#pragma GCC poison CONFIG_WHPX
+#pragma GCC poison CONFIG_XEN
 
 #endif
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 3cb9791df3b..64fb936c7c7 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -26,6 +26,8 @@
 #include "exec/ramlist.h"
 #include "exec/ramblock.h"
 
+extern uint64_t total_dirty_pages;
+
 /**
  * clear_bmap_size: calculate clear bitmap size
  *
@@ -104,11 +106,8 @@ long qemu_maxrampagesize(void);
  * Parameters:
  *  @size: the size in bytes of the ram block
  *  @mr: the memory region where the ram block is
- *  @ram_flags: specify the properties of the ram block, which can be one
- *              or bit-or of following values
- *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
- *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
- *              Other bits are ignored.
+ *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *              RAM_NORESERVE.
  *  @mem_path or @fd: specify the backing file or device
  *  @readonly: true to open @path for reading, false for read/write.
  *  @errp: pointer to Error*, to store an error if it happens
@@ -126,7 +125,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp);
-RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
                          Error **errp);
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
                                     void (*resized)(const char*,
@@ -372,10 +371,14 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
 
                     qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
 
-                    if (global_dirty_log) {
+                    if (global_dirty_tracking) {
                         qatomic_or(
                                 &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
                                 temp);
+                        if (unlikely(
+                            global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+                            total_dirty_pages += ctpopl(temp);
+                        }
                     }
 
                     if (tcg_enabled()) {
@@ -395,7 +398,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
     } else {
         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
 
-        if (!global_dirty_log) {
+        if (!global_dirty_tracking) {
             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
         }
 
@@ -406,6 +409,9 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
         for (i = 0; i < len; i++) {
             if (bitmap[i] != 0) {
                 c = leul_to_cpu(bitmap[i]);
+                if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
+                    total_dirty_pages += ctpopl(c);
+                }
                 do {
                     j = ctzl(c);
                     c &= ~(1ul << j);
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 4fa47599693..069e421573f 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -64,6 +64,16 @@ struct RAMBlock {
      */
     unsigned long *clear_bmap;
     uint8_t clear_bmap_shift;
+
+    /*
+     * RAM block length that corresponds to the used_length on the migration
+     * source (after RAM block sizes were synchronized). Especially, after
+     * starting to run the guest, used_length and postcopy_length can differ.
+     * Used to register/unregister uffd handlers and as the size of the received
+     * bitmap. Receiving any page beyond this length will bail out, as it
+     * could not have been valid on the source.
+     */
+    ram_addr_t postcopy_length;
 };
 #endif
 
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
index 26704aa3b0d..2ad2a81accf 100644
--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
@@ -65,16 +65,21 @@ void qemu_mutex_lock_ramlist(void);
 void qemu_mutex_unlock_ramlist(void);
 
 struct RAMBlockNotifier {
-    void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size);
-    void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size);
+    void (*ram_block_added)(RAMBlockNotifier *n, void *host, size_t size,
+                            size_t max_size);
+    void (*ram_block_removed)(RAMBlockNotifier *n, void *host, size_t size,
+                              size_t max_size);
+    void (*ram_block_resized)(RAMBlockNotifier *n, void *host, size_t old_size,
+                              size_t new_size);
     QLIST_ENTRY(RAMBlockNotifier) next;
 };
 
 void ram_block_notifier_add(RAMBlockNotifier *n);
 void ram_block_notifier_remove(RAMBlockNotifier *n);
-void ram_block_notify_add(void *host, size_t size);
-void ram_block_notify_remove(void *host, size_t size);
+void ram_block_notify_add(void *host, size_t size, size_t max_size);
+void ram_block_notify_remove(void *host, size_t size, size_t max_size);
+void ram_block_notify_resize(void *host, size_t old_size, size_t new_size);
 
-void ram_block_dump(Monitor *mon);
+GString *ram_block_format(void);
 
 #endif /* RAMLIST_H */
diff --git a/include/exec/tb-lookup.h b/include/exec/tb-lookup.h
deleted file mode 100644
index 2f314eb6eb2..00000000000
--- a/include/exec/tb-lookup.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
- *
- * License: GNU GPL, version 2 or later.
- *   See the COPYING file in the top-level directory.
- */
-#ifndef EXEC_TB_LOOKUP_H
-#define EXEC_TB_LOOKUP_H
-
-#ifdef NEED_CPU_H
-#include "cpu.h"
-#else
-#include "exec/poison.h"
-#endif
-
-#include "exec/exec-all.h"
-#include "exec/tb-hash.h"
-
-/* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
-                                          target_ulong cs_base,
-                                          target_ulong cs_top,
-                                          uint32_t cheri_flags,
-                                          uint32_t flags, uint32_t cflags)
-{
-    TranslationBlock *tb;
-    uint32_t hash;
-
-    /* we should never be trying to look up an INVALID tb */
-    tcg_debug_assert(!(cflags & CF_INVALID));
-
-    hash = tb_jmp_cache_hash_func(pc);
-    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
-
-    if (likely(tb &&
-               tb->pc == pc &&
-               tb->cs_base == cs_base &&
-               tb->cs_top == cs_top &&
-               tb->cheri_flags == cheri_flags &&
-               tb->flags == flags &&
-               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
-               tb_cflags(tb) == cflags)) {
-        return tb;
-    }
-    tb = tb_htable_lookup(cpu, pc, cs_base, cs_top, cheri_flags, flags, cflags);
-
-    if (tb == NULL) {
-        return NULL;
-    }
-    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
-    return tb;
-}
-
-#endif /* EXEC_TB_LOOKUP_H */
diff --git a/include/exec/translate-all.h b/include/exec/translate-all.h
index a557b4e2bb9..9f646389afe 100644
--- a/include/exec/translate-all.h
+++ b/include/exec/translate-all.h
@@ -33,6 +33,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end);
 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr);
 
 #ifdef CONFIG_USER_ONLY
+void page_protect(tb_page_addr_t page_addr);
 int page_unprotect(target_ulong address, uintptr_t pc);
 #endif
 
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 91919f49ad7..e9eb2945a49 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -23,6 +23,7 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/plugin-gen.h"
+#include "exec/translate-all.h"
 #include "tcg/tcg.h"
 
 
@@ -87,6 +88,17 @@ typedef struct DisasContextBase {
     bool log_instr_enabled;
     uint8_t printf_used_ptr;
 #endif
+#ifdef CONFIG_USER_ONLY
+    /*
+     * Guest address of the last byte of the last protected page.
+     *
+     * Pages containing the translated instructions are made non-writable in
+     * order to achieve consistency in case another thread is modifying the
+     * code while translate_insn() fetches the instruction bytes piecemeal.
+     * Such writer threads are blocked on mmap_lock() in page_unprotect().
+     */
+    target_ulong page_protect_end;
+#endif
 } DisasContextBase;
 
 #ifdef TARGET_CHERI
@@ -115,15 +127,6 @@ typedef struct DisasContextBase {
  * @insn_start:
  *      Emit the tcg_gen_insn_start opcode.
  *
- * @breakpoint_check:
- *      When called, the breakpoint has already been checked to match the PC,
- *      but the target may decide the breakpoint missed the address
- *      (e.g., due to conditions encoded in their flags).  Return true to
- *      indicate that the breakpoint did hit, in which case no more breakpoints
- *      are checked.  If the breakpoint did hit, emit any code required to
- *      signal the exception, and set db->is_jmp as necessary to terminate
- *      the main loop.
- *
  * @translate_insn:
  *      Disassemble one instruction and set db->pc_next for the start
  *      of the following instruction.  Set db->is_jmp as necessary to
@@ -139,8 +142,6 @@ typedef struct TranslatorOps {
     void (*init_disas_context)(DisasContextBase *db, CPUState *cpu);
     void (*tb_start)(DisasContextBase *db, CPUState *cpu);
     void (*insn_start)(DisasContextBase *db, CPUState *cpu);
-    bool (*breakpoint_check)(DisasContextBase *db, CPUState *cpu,
-                             const CPUBreakpoint *bp);
     void (*translate_insn)(DisasContextBase *db, CPUState *cpu);
     void (*tb_stop)(DisasContextBase *db, CPUState *cpu);
     void (*disas_log)(const DisasContextBase *db, CPUState *cpu);
@@ -171,6 +172,16 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 
 void translator_loop_temp_check(DisasContextBase *db);
 
+/**
+ * translator_use_goto_tb
+ * @db: Disassembly context
+ * @dest: target pc of the goto
+ *
+ * Return true if goto_tb is allowed between the current TB
+ * and the destination PC.
+ */
+bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
+
 /*
  * Translator Load Functions
  *
@@ -183,27 +194,23 @@ void translator_loop_temp_check(DisasContextBase *db);
  */
 
 #define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
-    static inline type                                                  \
-    fullname ## _swap(CPUArchState *env, abi_ptr pc, bool do_swap)      \
-    {                                                                   \
-        type ret = load_fn(env, pc);                                    \
-        if (do_swap) {                                                  \
-            ret = swap_fn(ret);                                         \
-        }                                                               \
-        plugin_insn_append(&ret, sizeof(ret));                          \
-        return ret;                                                     \
-    }                                                                   \
-                                                                        \
-    static inline type fullname(CPUArchState *env, abi_ptr pc)          \
+    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
+                           abi_ptr pc, bool do_swap);                   \
+    static inline type fullname(CPUArchState *env,                      \
+                                DisasContextBase *dcbase, abi_ptr pc)   \
     {                                                                   \
-        return fullname ## _swap(env, pc, false);                       \
+        return fullname ## _swap(env, dcbase, pc, false);               \
     }
 
-GEN_TRANSLATOR_LD(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)
-GEN_TRANSLATOR_LD(translator_ldsw, int16_t, cpu_ldsw_code, bswap16)
-GEN_TRANSLATOR_LD(translator_lduw, uint16_t, cpu_lduw_code, bswap16)
-GEN_TRANSLATOR_LD(translator_ldl, uint32_t, cpu_ldl_code, bswap32)
-GEN_TRANSLATOR_LD(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
+#define FOR_EACH_TRANSLATOR_LD(F)                                       \
+    F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)           \
+    F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16)                 \
+    F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)                \
+    F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)                  \
+    F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
+
+FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
+
 #undef GEN_TRANSLATOR_LD
 
 #endif  /* EXEC__TRANSLATOR_H */
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h
index 2f0674fbdde..a98d759cd3a 100644
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -48,8 +48,8 @@ this code that are retained.
 ===============================================================================
 */
 
-#ifndef _SOFTFLOAT_HELPERS_H_
-#define _SOFTFLOAT_HELPERS_H_
+#ifndef SOFTFLOAT_HELPERS_H
+#define SOFTFLOAT_HELPERS_H
 
 #include "fpu/softfloat-types.h"
 
@@ -69,7 +69,7 @@ static inline void set_float_exception_flags(int val, float_status *status)
     status->float_exception_flags = val;
 }
 
-static inline void set_floatx80_rounding_precision(int val,
+static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val,
                                                    float_status *status)
 {
     status->floatx80_rounding_precision = val;
@@ -120,7 +120,8 @@ static inline int get_float_exception_flags(float_status *status)
     return status->float_exception_flags;
 }
 
-static inline int get_floatx80_rounding_precision(float_status *status)
+static inline FloatX80RoundPrec
+get_floatx80_rounding_precision(float_status *status)
 {
     return status->floatx80_rounding_precision;
 }
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index a35ec2893a2..f35cdbfa636 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -8,7 +8,6 @@
  * so some portions are provided under:
  *  the SoftFloat-2a license
  *  the BSD license
- *  GPL-v2-or-later
  *
  * Any future contributions to this file after December 1st 2014 will be
  * taken to be licensed under the Softfloat-2a license unless specifically
@@ -75,14 +74,47 @@ this code that are retained.
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* Portions of this work are licensed under the terms of the GNU GPL,
- * version 2 or later. See the COPYING file in the top-level directory.
- */
-
 #ifndef FPU_SOFTFLOAT_MACROS_H
 #define FPU_SOFTFLOAT_MACROS_H
 
 #include "fpu/softfloat-types.h"
+#include "qemu/host-utils.h"
+
+/**
+ * shl_double: double-word merging left shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @l left by @c bits, shifting in bits from @r.
+ */
+static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
+    return l;
+#else
+    return c ? (l << c) | (r >> (64 - c)) : l;
+#endif
+}
+
+/**
+ * shr_double: double-word merging right shift
+ * @l: left or most-significant word
+ * @r: right or least-significant word
+ * @c: shift count
+ *
+ * Shift @r right by @c bits, shifting in bits from @l.
+ */
+static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
+{
+#if defined(__x86_64__)
+    asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
+    return r;
+#else
+    return c ? (r >> c) | (l << (64 - c)) : r;
+#endif
+}
 
 /*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
@@ -403,16 +435,12 @@ static inline void
 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint64_t z1;
-
-    z1 = a1 + b1;
-    *z1Ptr = z1;
-    *z0Ptr = a0 + b0 + ( z1 < a1 );
-
+    bool c = 0;
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -423,34 +451,14 @@ static inline void
 | `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- add192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t carry0, carry1;
-
-    z2 = a2 + b2;
-    carry1 = ( z2 < a2 );
-    z1 = a1 + b1;
-    carry0 = ( z1 < a1 );
-    z0 = a0 + b0;
-    z1 += carry1;
-    z0 += ( z1 < carry1 );
-    z0 += carry0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = uadd64_carry(a2, b2, &c);
+    *z1Ptr = uadd64_carry(a1, b1, &c);
+    *z0Ptr = uadd64_carry(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -461,14 +469,12 @@ static inline void
 | `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub128(
-     uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-
-    *z1Ptr = a1 - b1;
-    *z0Ptr = a0 - b0 - ( a1 < b1 );
-
+    bool c = 0;
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -479,34 +485,14 @@ static inline void
 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- sub192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t a2,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t b2,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
+                          uint64_t b0, uint64_t b1, uint64_t b2,
+                          uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2;
-    int8_t borrow0, borrow1;
-
-    z2 = a2 - b2;
-    borrow1 = ( a2 < b2 );
-    z1 = a1 - b1;
-    borrow0 = ( a1 < b1 );
-    z0 = a0 - b0;
-    z0 -= ( z1 < borrow1 );
-    z1 -= borrow1;
-    z0 -= borrow0;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    bool c = 0;
+    *z2Ptr = usub64_borrow(a2, b2, &c);
+    *z1Ptr = usub64_borrow(a1, b1, &c);
+    *z0Ptr = usub64_borrow(a0, b0, &c);
 }
 
 /*----------------------------------------------------------------------------
@@ -515,27 +501,10 @@ static inline void
 | `z0Ptr' and `z1Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+static inline void
+mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
 {
-    uint32_t aHigh, aLow, bHigh, bLow;
-    uint64_t z0, zMiddleA, zMiddleB, z1;
-
-    aLow = a;
-    aHigh = a>>32;
-    bLow = b;
-    bHigh = b>>32;
-    z1 = ( (uint64_t) aLow ) * bLow;
-    zMiddleA = ( (uint64_t) aLow ) * bHigh;
-    zMiddleB = ( (uint64_t) aHigh ) * bLow;
-    z0 = ( (uint64_t) aHigh ) * bHigh;
-    zMiddleA += zMiddleB;
-    z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
-    zMiddleA <<= 32;
-    z1 += zMiddleA;
-    z0 += ( z1 < zMiddleA );
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-
+    mulu64(z1Ptr, z0Ptr, a, b);
 }
 
 /*----------------------------------------------------------------------------
@@ -546,24 +515,14 @@ static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t
 *----------------------------------------------------------------------------*/
 
 static inline void
- mul128By64To192(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr
- )
+mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
+                uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
 {
-    uint64_t z0, z1, z2, more1;
-
-    mul64To128( a1, b, &z1, &z2 );
-    mul64To128( a0, b, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    uint64_t z0, z1, m1;
 
+    mul64To128(a1, b, &m1, z2Ptr);
+    mul64To128(a0, b, &z0, &z1);
+    add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
 }
 
 /*----------------------------------------------------------------------------
@@ -573,34 +532,21 @@ static inline void
 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 *----------------------------------------------------------------------------*/
 
-static inline void
- mul128To256(
-     uint64_t a0,
-     uint64_t a1,
-     uint64_t b0,
-     uint64_t b1,
-     uint64_t *z0Ptr,
-     uint64_t *z1Ptr,
-     uint64_t *z2Ptr,
-     uint64_t *z3Ptr
- )
+static inline void mul128To256(uint64_t a0, uint64_t a1,
+                               uint64_t b0, uint64_t b1,
+                               uint64_t *z0Ptr, uint64_t *z1Ptr,
+                               uint64_t *z2Ptr, uint64_t *z3Ptr)
 {
-    uint64_t z0, z1, z2, z3;
-    uint64_t more1, more2;
-
-    mul64To128( a1, b1, &z2, &z3 );
-    mul64To128( a1, b0, &z1, &more2 );
-    add128( z1, more2, 0, z2, &z1, &z2 );
-    mul64To128( a0, b0, &z0, &more1 );
-    add128( z0, more1, 0, z1, &z0, &z1 );
-    mul64To128( a0, b1, &more1, &more2 );
-    add128( more1, more2, 0, z2, &more1, &z2 );
-    add128( z0, z1, 0, more1, &z0, &z1 );
-    *z3Ptr = z3;
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
+    uint64_t z0, z1, z2;
+    uint64_t m0, m1, m2, n1, n2;
 
+    mul64To128(a1, b0, &m1, &m2);
+    mul64To128(a0, b1, &n1, &n2);
+    mul64To128(a1, b1, &z2, z3Ptr);
+    mul64To128(a0, b0, &z0, &z1);
+
+    add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
+    add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
 }
 
 /*----------------------------------------------------------------------------
@@ -634,83 +580,6 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
 
 }
 
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
- *
- * Licensed under the GPLv2/LGPLv3
- */
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
-                                  uint64_t n0, uint64_t d)
-{
-#if defined(__x86_64__)
-    uint64_t q;
-    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
-    return q;
-#elif defined(__s390x__) && !defined(__clang__)
-    /* Need to use a TImode type to get an even register pair for DLGR.  */
-    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
-    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
-    *r = n >> 64;
-    return n;
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
-    /* From Power ISA 2.06, programming note for divdeu.  */
-    uint64_t q1, q2, Q, r1, r2, R;
-    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
-        : "=&r"(q1), "=r"(q2)
-        : "r"(n1), "r"(n0), "r"(d));
-    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
-    r2 = n0 - (q2 * d);
-    Q = q1 + q2;
-    R = r1 + r2;
-    if (R >= d || R < r2) { /* overflow implies R > d */
-        Q += 1;
-        R -= d;
-    }
-    *r = R;
-    return Q;
-#else
-    uint64_t d0, d1, q0, q1, r1, r0, m;
-
-    d0 = (uint32_t)d;
-    d1 = d >> 32;
-
-    r1 = n1 % d1;
-    q1 = n1 / d1;
-    m = q1 * d0;
-    r1 = (r1 << 32) | (n0 >> 32);
-    if (r1 < m) {
-        q1 -= 1;
-        r1 += d;
-        if (r1 >= d) {
-            if (r1 < m) {
-                q1 -= 1;
-                r1 += d;
-            }
-        }
-    }
-    r1 -= m;
-
-    r0 = r1 % d1;
-    q0 = r1 / d1;
-    m = q0 * d0;
-    r0 = (r0 << 32) | (uint32_t)n0;
-    if (r0 < m) {
-        q0 -= 1;
-        r0 += d;
-        if (r0 >= d) {
-            if (r0 < m) {
-                q0 -= 1;
-                r0 += d;
-            }
-        }
-    }
-    r0 -= m;
-
-    *r = r0;
-    return (q1 << 32) | q0;
-#endif
-}
-
 /*----------------------------------------------------------------------------
 | Returns an approximation to the square root of the 32-bit significand given
 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
@@ -794,4 +663,38 @@ static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
     return a0 != b0 || a1 != b1;
 }
 
+/*
+ * Similarly, comparisons of 192-bit values.
+ */
+
+static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0;
+}
+
+static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    if (a0 != b0) {
+        return a0 < b0;
+    }
+    if (a1 != b1) {
+        return a1 < b1;
+    }
+    return a2 <= b2;
+}
+
+static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
+                         uint64_t b0, uint64_t b1, uint64_t b2)
+{
+    if (a0 != b0) {
+        return a0 < b0;
+    }
+    if (a1 != b1) {
+        return a1 < b1;
+    }
+    return a2 < b2;
+}
+
 #endif
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 8a3f20fae9e..5bcbd041f74 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -134,8 +134,10 @@ typedef enum __attribute__((__packed__)) {
     float_round_up           = 2,
     float_round_to_zero      = 3,
     float_round_ties_away    = 4,
-    /* Not an IEEE rounding mode: round to the closest odd mantissa value */
+    /* Not an IEEE rounding mode: round to closest odd, overflow to max */
     float_round_to_odd       = 5,
+    /* Not an IEEE rounding mode: round to closest odd, overflow to inf */
+    float_round_to_odd_inf   = 6,
 } FloatRoundMode;
 
 /*
@@ -152,6 +154,14 @@ enum {
     float_flag_output_denormal = 128
 };
 
+/*
+ * Rounding precision for floatx80.
+ */
+typedef enum __attribute__((__packed__)) {
+    floatx80_precision_x,
+    floatx80_precision_d,
+    floatx80_precision_s,
+} FloatX80RoundPrec;
 
 /*
  * Floating Point Status. Individual architectures may maintain
@@ -163,7 +173,7 @@ enum {
 typedef struct float_status {
     FloatRoundMode float_rounding_mode;
     uint8_t     float_exception_flags;
-    signed char floatx80_rounding_precision;
+    FloatX80RoundPrec floatx80_rounding_precision;
     bool tininess_before_rounding;
     /* should denormalised results go to zero and set the inexact flag? */
     bool flush_to_zero;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 78ad5ca738b..a249991e612 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -100,7 +100,10 @@ typedef enum {
 | Routine to raise any or all of the software IEC/IEEE floating-point
 | exception flags.
 *----------------------------------------------------------------------------*/
-void float_raise(uint8_t flags, float_status *status);
+static inline void float_raise(uint8_t flags, float_status *status)
+{
+    status->float_exception_flags |= flags;
+}
 
 /*----------------------------------------------------------------------------
 | If `a' is denormal and we are in flush-to-zero mode then set the
@@ -240,6 +243,8 @@ float16 float16_minnum(float16, float16, float_status *status);
 float16 float16_maxnum(float16, float16, float_status *status);
 float16 float16_minnummag(float16, float16, float_status *status);
 float16 float16_maxnummag(float16, float16, float_status *status);
+float16 float16_minimum_number(float16, float16, float_status *status);
+float16 float16_maximum_number(float16, float16, float_status *status);
 float16 float16_sqrt(float16, float_status *status);
 FloatRelation float16_compare(float16, float16, float_status *status);
 FloatRelation float16_compare_quiet(float16, float16, float_status *status);
@@ -419,6 +424,8 @@ bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_minimum_number(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_maximum_number(bfloat16, bfloat16, float_status *status);
 bfloat16 bfloat16_sqrt(bfloat16, float_status *status);
 FloatRelation bfloat16_compare(bfloat16, bfloat16, float_status *status);
 FloatRelation bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status);
@@ -586,6 +593,8 @@ float32 float32_minnum(float32, float32, float_status *status);
 float32 float32_maxnum(float32, float32, float_status *status);
 float32 float32_minnummag(float32, float32, float_status *status);
 float32 float32_maxnummag(float32, float32, float_status *status);
+float32 float32_minimum_number(float32, float32, float_status *status);
+float32 float32_maximum_number(float32, float32, float_status *status);
 bool float32_is_quiet_nan(float32, float_status *status);
 bool float32_is_signaling_nan(float32, float_status *status);
 float32 float32_silence_nan(float32, float_status *status);
@@ -775,6 +784,8 @@ float64 float64_minnum(float64, float64, float_status *status);
 float64 float64_maxnum(float64, float64, float_status *status);
 float64 float64_minnummag(float64, float64, float_status *status);
 float64 float64_maxnummag(float64, float64, float_status *status);
+float64 float64_minimum_number(float64, float64, float_status *status);
+float64 float64_maximum_number(float64, float64, float_status *status);
 bool float64_is_quiet_nan(float64 a, float_status *status);
 bool float64_is_signaling_nan(float64, float_status *status);
 float64 float64_silence_nan(float64, float_status *status);
@@ -1149,7 +1160,7 @@ floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status *status);
 | Floating-Point Arithmetic.
 *----------------------------------------------------------------------------*/
 
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
+floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
                               int32_t zExp, uint64_t zSig0, uint64_t zSig1,
                               float_status *status);
 
@@ -1162,7 +1173,7 @@ floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
 | normalized.
 *----------------------------------------------------------------------------*/
 
-floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
+floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
                                        bool zSign, int32_t zExp,
                                        uint64_t zSig0, uint64_t zSig1,
                                        float_status *status);
@@ -1194,11 +1205,21 @@ float128 float128_round_to_int(float128, float_status *status);
 float128 float128_add(float128, float128, float_status *status);
 float128 float128_sub(float128, float128, float_status *status);
 float128 float128_mul(float128, float128, float_status *status);
+float128 float128_muladd(float128, float128, float128, int,
+                         float_status *status);
 float128 float128_div(float128, float128, float_status *status);
 float128 float128_rem(float128, float128, float_status *status);
 float128 float128_sqrt(float128, float_status *status);
 FloatRelation float128_compare(float128, float128, float_status *status);
 FloatRelation float128_compare_quiet(float128, float128, float_status *status);
+float128 float128_min(float128, float128, float_status *status);
+float128 float128_max(float128, float128, float_status *status);
+float128 float128_minnum(float128, float128, float_status *status);
+float128 float128_maxnum(float128, float128, float_status *status);
+float128 float128_minnummag(float128, float128, float_status *status);
+float128 float128_maxnummag(float128, float128, float_status *status);
+float128 float128_minimum_number(float128, float128, float_status *status);
+float128 float128_maximum_number(float128, float128, float_status *status);
 bool float128_is_quiet_nan(float128, float_status *status);
 bool float128_is_signaling_nan(float128, float_status *status);
 float128 float128_silence_nan(float128, float_status *status);
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 695a96f7ea6..9e95c888f54 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -19,12 +19,12 @@
 /* Ask for warnings for anything that was marked deprecated in
  * the defined version, or before. It is a candidate for rewrite.
  */
-#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_48
+#define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_56
 
 /* Ask for warnings if code tries to use function that did not
  * exist in the defined version. These risk breaking builds
  */
-#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_48
+#define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_56
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -68,15 +68,6 @@
  * without generating warnings.
  */
 
-#if defined(_WIN32) && !GLIB_CHECK_VERSION(2, 50, 0)
-/*
- * g_poll has a problem on Windows when using
- * timeouts < 10ms, so use wrapper.
- */
-#define g_poll(fds, nfds, timeout) g_poll_fixed(fds, nfds, timeout)
-gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout);
-#endif
-
 #if defined(G_OS_UNIX)
 /*
  * Note: The fallback implementation is not MT-safe, and it returns a copy of
@@ -100,6 +91,23 @@ g_unix_get_passwd_entry_qemu(const gchar *user_name, GError **error)
 }
 #endif /* G_OS_UNIX */
 
+static inline bool
+qemu_g_test_slow(void)
+{
+    static int cached = -1;
+    if (cached == -1) {
+        cached = g_test_slow() || getenv("G_TEST_SLOW") != NULL;
+    }
+    return cached;
+}
+
+#undef g_test_slow
+#undef g_test_thorough
+#undef g_test_quick
+#define g_test_slow() qemu_g_test_slow()
+#define g_test_thorough() qemu_g_test_slow()
+#define g_test_quick() (!qemu_g_test_slow())
+
 #pragma GCC diagnostic pop
 
 #endif
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index cf9f44299ca..c97e8633ad8 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -48,31 +48,6 @@ typedef struct AcpiRsdpData {
     unsigned *xsdt_tbl_offset;
 } AcpiRsdpData;
 
-/* Table structure from Linux kernel (the ACPI tables are under the
-   BSD license) */
-
-
-#define ACPI_TABLE_HEADER_DEF   /* ACPI common table header */ \
-    uint32_t signature;          /* ACPI signature (4 ASCII characters) */ \
-    uint32_t length;                 /* Length of table, in bytes, including header */ \
-    uint8_t  revision;               /* ACPI Specification minor version # */ \
-    uint8_t  checksum;               /* To make sum of entire table == 0 */ \
-    uint8_t  oem_id[6] \
-                 QEMU_NONSTRING;     /* OEM identification */ \
-    uint8_t  oem_table_id[8] \
-                 QEMU_NONSTRING;     /* OEM table identification */ \
-    uint32_t oem_revision;           /* OEM revision number */ \
-    uint8_t  asl_compiler_id[4] \
-                 QEMU_NONSTRING;     /* ASL compiler vendor ID */ \
-    uint32_t asl_compiler_revision;  /* ASL compiler revision number */
-
-
-/* ACPI common table header */
-struct AcpiTableHeader {
-    ACPI_TABLE_HEADER_DEF
-} QEMU_PACKED;
-typedef struct AcpiTableHeader AcpiTableHeader;
-
 struct AcpiGenericAddress {
     uint8_t space_id;        /* Address space where struct or register exists */
     uint8_t bit_width;       /* Size in bits of given register */
@@ -80,7 +55,7 @@ struct AcpiGenericAddress {
     uint8_t access_width;    /* ACPI 3.0: Minimum Access size (ACPI 3.0),
                                 ACPI 2.0: Reserved, Table 5-1 */
     uint64_t address;        /* 64-bit address of struct or register */
-} QEMU_PACKED;
+};
 
 typedef struct AcpiFadtData {
     struct AcpiGenericAddress pm1a_cnt;   /* PM1a_CNT_BLK */
@@ -117,505 +92,4 @@ typedef struct AcpiFadtData {
 #define ACPI_FADT_ARM_PSCI_COMPLIANT  (1 << 0)
 #define ACPI_FADT_ARM_PSCI_USE_HVC    (1 << 1)
 
-/*
- * Serial Port Console Redirection Table (SPCR), Rev. 1.02
- *
- * For .interface_type see Debug Port Table 2 (DBG2) serial port
- * subtypes in Table 3, Rev. May 22, 2012
- */
-struct AcpiSerialPortConsoleRedirection {
-    ACPI_TABLE_HEADER_DEF
-    uint8_t  interface_type;
-    uint8_t  reserved1[3];
-    struct AcpiGenericAddress base_address;
-    uint8_t  interrupt_types;
-    uint8_t  irq;
-    uint32_t gsi;
-    uint8_t  baud;
-    uint8_t  parity;
-    uint8_t  stopbits;
-    uint8_t  flowctrl;
-    uint8_t  term_type;
-    uint8_t  reserved2;
-    uint16_t pci_device_id;
-    uint16_t pci_vendor_id;
-    uint8_t  pci_bus;
-    uint8_t  pci_slot;
-    uint8_t  pci_func;
-    uint32_t pci_flags;
-    uint8_t  pci_seg;
-    uint32_t reserved3;
-} QEMU_PACKED;
-typedef struct AcpiSerialPortConsoleRedirection
-               AcpiSerialPortConsoleRedirection;
-
-/*
- * ACPI 1.0 Root System Description Table (RSDT)
- */
-struct AcpiRsdtDescriptorRev1 {
-    ACPI_TABLE_HEADER_DEF       /* ACPI common table header */
-    uint32_t table_offset_entry[];  /* Array of pointers to other */
-    /* ACPI tables */
-} QEMU_PACKED;
-typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1;
-
-/*
- * ACPI 2.0 eXtended System Description Table (XSDT)
- */
-struct AcpiXsdtDescriptorRev2 {
-    ACPI_TABLE_HEADER_DEF       /* ACPI common table header */
-    uint64_t table_offset_entry[];  /* Array of pointers to other */
-    /* ACPI tables */
-} QEMU_PACKED;
-typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2;
-
-/*
- * ACPI 1.0 Firmware ACPI Control Structure (FACS)
- */
-struct AcpiFacsDescriptorRev1 {
-    uint32_t signature;           /* ACPI Signature */
-    uint32_t length;                 /* Length of structure, in bytes */
-    uint32_t hardware_signature;     /* Hardware configuration signature */
-    uint32_t firmware_waking_vector; /* ACPI OS waking vector */
-    uint32_t global_lock;            /* Global Lock */
-    uint32_t flags;
-    uint8_t  resverved3 [40];        /* Reserved - must be zero */
-} QEMU_PACKED;
-typedef struct AcpiFacsDescriptorRev1 AcpiFacsDescriptorRev1;
-
-/*
- * Differentiated System Description Table (DSDT)
- */
-
-/*
- * MADT values and structures
- */
-
-/* Values for MADT PCATCompat */
-
-#define ACPI_DUAL_PIC                0
-#define ACPI_MULTIPLE_APIC           1
-
-/* Master MADT */
-
-struct AcpiMultipleApicTable {
-    ACPI_TABLE_HEADER_DEF     /* ACPI common table header */
-    uint32_t local_apic_address;     /* Physical address of local APIC */
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
-
-/* Values for Type in APIC sub-headers */
-
-#define ACPI_APIC_PROCESSOR          0
-#define ACPI_APIC_IO                 1
-#define ACPI_APIC_XRUPT_OVERRIDE     2
-#define ACPI_APIC_NMI                3
-#define ACPI_APIC_LOCAL_NMI          4
-#define ACPI_APIC_ADDRESS_OVERRIDE   5
-#define ACPI_APIC_IO_SAPIC           6
-#define ACPI_APIC_LOCAL_SAPIC        7
-#define ACPI_APIC_XRUPT_SOURCE       8
-#define ACPI_APIC_LOCAL_X2APIC       9
-#define ACPI_APIC_LOCAL_X2APIC_NMI      10
-#define ACPI_APIC_GENERIC_CPU_INTERFACE 11
-#define ACPI_APIC_GENERIC_DISTRIBUTOR   12
-#define ACPI_APIC_GENERIC_MSI_FRAME     13
-#define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
-#define ACPI_APIC_GENERIC_TRANSLATOR    15
-#define ACPI_APIC_RESERVED              16   /* 16 and greater are reserved */
-
-/*
- * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
- */
-#define ACPI_SUB_HEADER_DEF   /* Common ACPI sub-structure header */\
-    uint8_t  type;                               \
-    uint8_t  length;
-
-/* Sub-structures for MADT */
-
-struct AcpiMadtProcessorApic {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  processor_id;           /* ACPI processor id */
-    uint8_t  local_apic_id;          /* Processor's local APIC id */
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMadtProcessorApic AcpiMadtProcessorApic;
-
-struct AcpiMadtIoApic {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  io_apic_id;             /* I/O APIC ID */
-    uint8_t  reserved;               /* Reserved - must be zero */
-    uint32_t address;                /* APIC physical address */
-    uint32_t interrupt;              /* Global system interrupt where INTI
-                                 * lines start */
-} QEMU_PACKED;
-typedef struct AcpiMadtIoApic AcpiMadtIoApic;
-
-struct AcpiMadtIntsrcovr {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  bus;
-    uint8_t  source;
-    uint32_t gsi;
-    uint16_t flags;
-} QEMU_PACKED;
-typedef struct AcpiMadtIntsrcovr AcpiMadtIntsrcovr;
-
-struct AcpiMadtLocalNmi {
-    ACPI_SUB_HEADER_DEF
-    uint8_t  processor_id;           /* ACPI processor id */
-    uint16_t flags;                  /* MPS INTI flags */
-    uint8_t  lint;                   /* Local APIC LINT# */
-} QEMU_PACKED;
-typedef struct AcpiMadtLocalNmi AcpiMadtLocalNmi;
-
-struct AcpiMadtProcessorX2Apic {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t x2apic_id;              /* Processor's local x2APIC ID */
-    uint32_t flags;
-    uint32_t uid;                    /* Processor object _UID */
-} QEMU_PACKED;
-typedef struct AcpiMadtProcessorX2Apic AcpiMadtProcessorX2Apic;
-
-struct AcpiMadtLocalX2ApicNmi {
-    ACPI_SUB_HEADER_DEF
-    uint16_t flags;                  /* MPS INTI flags */
-    uint32_t uid;                    /* Processor object _UID */
-    uint8_t  lint;                   /* Local APIC LINT# */
-    uint8_t  reserved[3];            /* Local APIC LINT# */
-} QEMU_PACKED;
-typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
-
-struct AcpiMadtGenericCpuInterface {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t cpu_interface_number;
-    uint32_t uid;
-    uint32_t flags;
-    uint32_t parking_version;
-    uint32_t performance_interrupt;
-    uint64_t parked_address;
-    uint64_t base_address;
-    uint64_t gicv_base_address;
-    uint64_t gich_base_address;
-    uint32_t vgic_interrupt;
-    uint64_t gicr_base_address;
-    uint64_t arm_mpidr;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface;
-
-/* GICC CPU Interface Flags */
-#define ACPI_MADT_GICC_ENABLED 1
-
-struct AcpiMadtGenericDistributor {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t gic_id;
-    uint64_t base_address;
-    uint32_t global_irq_base;
-    /* ACPI 5.1 Errata 1228 Present GIC version in MADT table */
-    uint8_t version;
-    uint8_t reserved2[3];
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericDistributor AcpiMadtGenericDistributor;
-
-struct AcpiMadtGenericMsiFrame {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t gic_msi_frame_id;
-    uint64_t base_address;
-    uint32_t flags;
-    uint16_t spi_count;
-    uint16_t spi_base;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericMsiFrame AcpiMadtGenericMsiFrame;
-
-struct AcpiMadtGenericRedistributor {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint64_t base_address;
-    uint32_t range_length;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor;
-
-struct AcpiMadtGenericTranslator {
-    ACPI_SUB_HEADER_DEF
-    uint16_t reserved;
-    uint32_t translation_id;
-    uint64_t base_address;
-    uint32_t reserved2;
-} QEMU_PACKED;
-
-typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
-
-/*
- * Generic Timer Description Table (GTDT)
- */
-#define ACPI_GTDT_INTERRUPT_MODE_LEVEL    (0 << 0)
-#define ACPI_GTDT_INTERRUPT_MODE_EDGE     (1 << 0)
-#define ACPI_GTDT_CAP_ALWAYS_ON           (1 << 2)
-
-struct AcpiGenericTimerTable {
-    ACPI_TABLE_HEADER_DEF
-    uint64_t counter_block_addresss;
-    uint32_t reserved;
-    uint32_t secure_el1_interrupt;
-    uint32_t secure_el1_flags;
-    uint32_t non_secure_el1_interrupt;
-    uint32_t non_secure_el1_flags;
-    uint32_t virtual_timer_interrupt;
-    uint32_t virtual_timer_flags;
-    uint32_t non_secure_el2_interrupt;
-    uint32_t non_secure_el2_flags;
-    uint64_t counter_read_block_address;
-    uint32_t platform_timer_count;
-    uint32_t platform_timer_offset;
-} QEMU_PACKED;
-typedef struct AcpiGenericTimerTable AcpiGenericTimerTable;
-
-/*
- * HPET Description Table
- */
-struct Acpi20Hpet {
-    ACPI_TABLE_HEADER_DEF                    /* ACPI common table header */
-    uint32_t           timer_block_id;
-    struct AcpiGenericAddress addr;
-    uint8_t            hpet_number;
-    uint16_t           min_tick;
-    uint8_t            page_protect;
-} QEMU_PACKED;
-typedef struct Acpi20Hpet Acpi20Hpet;
-
-/*
- * SRAT (NUMA topology description) table
- */
-
-struct AcpiSystemResourceAffinityTable {
-    ACPI_TABLE_HEADER_DEF
-    uint32_t    reserved1;
-    uint32_t    reserved2[2];
-} QEMU_PACKED;
-typedef struct AcpiSystemResourceAffinityTable AcpiSystemResourceAffinityTable;
-
-#define ACPI_SRAT_PROCESSOR_APIC     0
-#define ACPI_SRAT_MEMORY             1
-#define ACPI_SRAT_PROCESSOR_x2APIC   2
-#define ACPI_SRAT_PROCESSOR_GICC     3
-
-struct AcpiSratProcessorAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint8_t     proximity_lo;
-    uint8_t     local_apic_id;
-    uint32_t    flags;
-    uint8_t     local_sapic_eid;
-    uint8_t     proximity_hi[3];
-    uint32_t    reserved;
-} QEMU_PACKED;
-typedef struct AcpiSratProcessorAffinity AcpiSratProcessorAffinity;
-
-struct AcpiSratProcessorX2ApicAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint16_t    reserved;
-    uint32_t    proximity_domain;
-    uint32_t    x2apic_id;
-    uint32_t    flags;
-    uint32_t    clk_domain;
-    uint32_t    reserved2;
-} QEMU_PACKED;
-typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity;
-
-struct AcpiSratMemoryAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint32_t    proximity;
-    uint16_t    reserved1;
-    uint64_t    base_addr;
-    uint64_t    range_length;
-    uint32_t    reserved2;
-    uint32_t    flags;
-    uint32_t    reserved3[2];
-} QEMU_PACKED;
-typedef struct AcpiSratMemoryAffinity AcpiSratMemoryAffinity;
-
-struct AcpiSratProcessorGiccAffinity {
-    ACPI_SUB_HEADER_DEF
-    uint32_t    proximity;
-    uint32_t    acpi_processor_uid;
-    uint32_t    flags;
-    uint32_t    clock_domain;
-} QEMU_PACKED;
-
-typedef struct AcpiSratProcessorGiccAffinity AcpiSratProcessorGiccAffinity;
-
-/*
- * TCPA Description Table
- *
- * Following Level 00, Rev 00.37 of specs:
- * http://www.trustedcomputinggroup.org/resources/tcg_acpi_specification
- */
-struct Acpi20Tcpa {
-    ACPI_TABLE_HEADER_DEF                    /* ACPI common table header */
-    uint16_t platform_class;
-    uint32_t log_area_minimum_length;
-    uint64_t log_area_start_address;
-} QEMU_PACKED;
-typedef struct Acpi20Tcpa Acpi20Tcpa;
-
-/* DMAR - DMA Remapping table r2.2 */
-struct AcpiTableDmar {
-    ACPI_TABLE_HEADER_DEF
-    uint8_t host_address_width; /* Maximum DMA physical addressability */
-    uint8_t flags;
-    uint8_t reserved[10];
-} QEMU_PACKED;
-typedef struct AcpiTableDmar AcpiTableDmar;
-
-/* Masks for Flags field above */
-#define ACPI_DMAR_INTR_REMAP        1
-#define ACPI_DMAR_X2APIC_OPT_OUT    (1 << 1)
-
-/* Values for sub-structure type for DMAR */
-enum {
-    ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,       /* DRHD */
-    ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,     /* RMRR */
-    ACPI_DMAR_TYPE_ATSR = 2,                /* ATSR */
-    ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
-    ACPI_DMAR_TYPE_ANDD = 4,                /* ANDD */
-    ACPI_DMAR_TYPE_RESERVED = 5             /* Reserved for furture use */
-};
-
-/*
- * Sub-structures for DMAR
- */
-
-/* Device scope structure for DRHD. */
-struct AcpiDmarDeviceScope {
-    uint8_t entry_type;
-    uint8_t length;
-    uint16_t reserved;
-    uint8_t enumeration_id;
-    uint8_t bus;
-    struct {
-        uint8_t device;
-        uint8_t function;
-    } path[];
-} QEMU_PACKED;
-typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
-
-/* Type 0: Hardware Unit Definition */
-struct AcpiDmarHardwareUnit {
-    uint16_t type;
-    uint16_t length;
-    uint8_t flags;
-    uint8_t reserved;
-    uint16_t pci_segment;   /* The PCI Segment associated with this unit */
-    uint64_t address;   /* Base address of remapping hardware register-set */
-    AcpiDmarDeviceScope scope[];
-} QEMU_PACKED;
-typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
-
-/* Type 2: Root Port ATS Capability Reporting Structure */
-struct AcpiDmarRootPortATS {
-    uint16_t type;
-    uint16_t length;
-    uint8_t flags;
-    uint8_t reserved;
-    uint16_t pci_segment;
-    AcpiDmarDeviceScope scope[];
-} QEMU_PACKED;
-typedef struct AcpiDmarRootPortATS AcpiDmarRootPortATS;
-
-/* Masks for Flags field above */
-#define ACPI_DMAR_INCLUDE_PCI_ALL   1
-#define ACPI_DMAR_ATSR_ALL_PORTS    1
-
-/*
- * Input Output Remapping Table (IORT)
- * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049B, October 2015
- */
-
-struct AcpiIortTable {
-    ACPI_TABLE_HEADER_DEF     /* ACPI common table header */
-    uint32_t node_count;
-    uint32_t node_offset;
-    uint32_t reserved;
-} QEMU_PACKED;
-typedef struct AcpiIortTable AcpiIortTable;
-
-/*
- * IORT node types
- */
-
-#define ACPI_IORT_NODE_HEADER_DEF   /* Node format common fields */ \
-    uint8_t  type;          \
-    uint16_t length;        \
-    uint8_t  revision;      \
-    uint32_t reserved;      \
-    uint32_t mapping_count; \
-    uint32_t mapping_offset;
-
-/* Values for node Type above */
-enum {
-        ACPI_IORT_NODE_ITS_GROUP = 0x00,
-        ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
-        ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
-        ACPI_IORT_NODE_SMMU = 0x03,
-        ACPI_IORT_NODE_SMMU_V3 = 0x04
-};
-
-struct AcpiIortIdMapping {
-    uint32_t input_base;
-    uint32_t id_count;
-    uint32_t output_base;
-    uint32_t output_reference;
-    uint32_t flags;
-} QEMU_PACKED;
-typedef struct AcpiIortIdMapping AcpiIortIdMapping;
-
-struct AcpiIortMemoryAccess {
-    uint32_t cache_coherency;
-    uint8_t  hints;
-    uint16_t reserved;
-    uint8_t  memory_flags;
-} QEMU_PACKED;
-typedef struct AcpiIortMemoryAccess AcpiIortMemoryAccess;
-
-struct AcpiIortItsGroup {
-    ACPI_IORT_NODE_HEADER_DEF
-    uint32_t its_count;
-    uint32_t identifiers[];
-} QEMU_PACKED;
-typedef struct AcpiIortItsGroup AcpiIortItsGroup;
-
-#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE 1
-
-struct AcpiIortSmmu3 {
-    ACPI_IORT_NODE_HEADER_DEF
-    uint64_t base_address;
-    uint32_t flags;
-    uint32_t reserved2;
-    uint64_t vatos_address;
-    uint32_t model;
-    uint32_t event_gsiv;
-    uint32_t pri_gsiv;
-    uint32_t gerr_gsiv;
-    uint32_t sync_gsiv;
-    AcpiIortIdMapping id_mapping_array[];
-} QEMU_PACKED;
-typedef struct AcpiIortSmmu3 AcpiIortSmmu3;
-
-struct AcpiIortRC {
-    ACPI_IORT_NODE_HEADER_DEF
-    AcpiIortMemoryAccess memory_properties;
-    uint32_t ats_attribute;
-    uint32_t pci_segment_number;
-    AcpiIortIdMapping id_mapping_array[];
-} QEMU_PACKED;
-typedef struct AcpiIortRC AcpiIortRC;
-
 #endif
diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h
index 9e8a76f2e2a..cc0d3707458 100644
--- a/include/hw/acpi/acpi.h
+++ b/include/hw/acpi/acpi.h
@@ -47,6 +47,8 @@
 #define ACPI_PM_PROP_PM_IO_BASE "pm_io_base"
 #define ACPI_PM_PROP_GPE0_BLK "gpe0_blk"
 #define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len"
+#define ACPI_PM_PROP_ACPI_PCIHP_BRIDGE "acpi-pci-hotplug-with-bridge-support"
+#define ACPI_PM_PROP_ACPI_PCI_ROOTHP "acpi-root-pci-hotplug"
 
 /* PM Timer ticks per second (HZ) */
 #define PM_TIMER_FREQUENCY  3579545
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
index 769ff55c7ee..ea6056ab926 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -53,6 +53,7 @@ struct AcpiDeviceIfClass {
     void (*ospm_status)(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
     void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev);
     void (*madt_cpu)(AcpiDeviceIf *adev, int uid,
-                     const CPUArchIdList *apic_ids, GArray *entry);
+                     const CPUArchIdList *apic_ids, GArray *entry,
+                     bool force_enabled);
 };
 #endif
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 471266d7391..8346003a224 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -413,10 +413,37 @@ Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 Aml *aml_object_type(Aml *object);
 
 void build_append_int_noprefix(GArray *table, uint64_t value, int size);
-void
-build_header(BIOSLinker *linker, GArray *table_data,
-             AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
-             const char *oem_id, const char *oem_table_id);
+
+typedef struct AcpiTable {
+    const char *sig;
+    const uint8_t rev;
+    const char *oem_id;
+    const char *oem_table_id;
+    /* private vars tracking table state */
+    GArray *array;
+    unsigned table_offset;
+} AcpiTable;
+
+/**
+ * acpi_table_begin:
+ * initializes table header and keeps track of
+ * table data/offsets
+ * @desc: ACPI table descriptor
+ * @array: blob where the ACPI table will be composed/stored.
+ */
+void acpi_table_begin(AcpiTable *desc, GArray *array);
+
+/**
+ * acpi_table_end:
+ * sets actual table length and tells bios loader
+ * where table is for the later initialization on
+ * guest side.
+ * @linker: reference to BIOSLinker object to use for the table
+ * @table: ACPI table descriptor that was used with @acpi_table_begin
+ * counterpart
+ */
+void acpi_table_end(BIOSLinker *linker, AcpiTable *table);
+
 void *acpi_data_push(GArray *table_data, unsigned size);
 unsigned acpi_data_len(GArray *table);
 void acpi_add_table(GArray *table_offsets, GArray *table_data);
@@ -456,12 +483,15 @@ Aml *build_crs(PCIHostState *host, CrsRangeSet *range_set, uint32_t io_offset,
                uint32_t mmio32_offset, uint64_t mmio64_offset,
                uint16_t bus_nr_offset);
 
-void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
+void build_srat_memory(GArray *table_data, uint64_t base,
                        uint64_t len, int node, MemoryAffinityFlags flags);
 
 void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id);
 
+void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                const char *oem_id, const char *oem_table_id);
+
 void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
                 const char *oem_id, const char *oem_table_id);
 
diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h
index 6bed92e8fc5..d49217c445f 100644
--- a/include/hw/acpi/generic_event_device.h
+++ b/include/hw/acpi/generic_event_device.h
@@ -70,8 +70,6 @@
 OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED)
 
 #define TYPE_ACPI_GED_X86 "acpi-ged-x86"
-#define ACPI_GED_X86(obj) \
-    OBJECT_CHECK(AcpiGedX86State, (obj), TYPE_ACPI_GED_X86)
 
 #define ACPI_GED_EVT_SEL_OFFSET    0x0
 #define ACPI_GED_EVT_SEL_LEN       0x4
diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
index 2ae8bc1ded3..674f6958e90 100644
--- a/include/hw/acpi/ghes.h
+++ b/include/hw/acpi/ghes.h
@@ -64,6 +64,7 @@ enum {
 
 typedef struct AcpiGhesState {
     uint64_t ghes_addr_le;
+    bool present; /* True if GHES is present at all on this board */
 } AcpiGhesState;
 
 void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker);
@@ -72,4 +73,12 @@ void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
 void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
                           GArray *hardware_errors);
 int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr);
+
+/**
+ * acpi_ghes_present: Report whether ACPI GHES table is present
+ *
+ * Returns: true if the system has an ACPI GHES table and it is
+ * safe to call acpi_ghes_record_errors() to record a memory error.
+ */
+bool acpi_ghes_present(void);
 #endif
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index df519e40b57..7ca92843c6b 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -24,10 +24,13 @@
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/acpi/cpu.h"
+#include "hw/acpi/pcihp.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/acpi_dev_interface.h"
 #include "hw/acpi/tco.h"
 
+#define ACPI_PCIHP_ADDR_ICH9 0x0cc0
+
 typedef struct ICH9LPCPMRegs {
     /*
      * In ich9 spec says that pm1_cnt register is 32bit width and
@@ -53,6 +56,9 @@ typedef struct ICH9LPCPMRegs {
     AcpiCpuHotplug gpe_cpu;
     CPUHotplugState cpuhp_state;
 
+    bool keep_pci_slot_hpc;
+    bool use_acpi_hotplug_bridge;
+    AcpiPciHpState acpi_pci_hotplug;
     MemHotplugState acpi_memory_hotplug;
 
     uint8_t disable_s3;
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index 2dd90aea309..af1a169fc32 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -55,7 +55,8 @@ typedef struct AcpiPciHpState {
 } AcpiPciHpState;
 
 void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root,
-                     MemoryRegion *address_space_io, bool bridges_enabled);
+                     MemoryRegion *address_space_io, bool bridges_enabled,
+                     uint16_t io_base);
 
 void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                    DeviceState *dev, Error **errp);
diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h
index 1a2a57a21f0..559ba6906c8 100644
--- a/include/hw/acpi/tpm.h
+++ b/include/hw/acpi/tpm.h
@@ -21,6 +21,8 @@
 #include "hw/acpi/aml-build.h"
 #include "sysemu/tpm.h"
 
+#ifdef CONFIG_TPM
+
 #define TPM_TIS_ADDR_BASE           0xFED40000
 #define TPM_TIS_ADDR_SIZE           0x5000
 
@@ -209,4 +211,6 @@ REG32(CRB_DATA_BUFFER, 0x80)
 
 void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev);
 
+#endif /* CONFIG_TPM */
+
 #endif /* HW_ACPI_TPM_H */
diff --git a/include/hw/adc/aspeed_adc.h b/include/hw/adc/aspeed_adc.h
new file mode 100644
index 00000000000..2f166e8be11
--- /dev/null
+++ b/include/hw/adc/aspeed_adc.h
@@ -0,0 +1,55 @@
+/*
+ * Aspeed ADC
+ *
+ * Copyright 2017-2021 IBM Corp.
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_ADC_ASPEED_ADC_H
+#define HW_ADC_ASPEED_ADC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_ADC "aspeed.adc"
+#define TYPE_ASPEED_2400_ADC TYPE_ASPEED_ADC "-ast2400"
+#define TYPE_ASPEED_2500_ADC TYPE_ASPEED_ADC "-ast2500"
+#define TYPE_ASPEED_2600_ADC TYPE_ASPEED_ADC "-ast2600"
+OBJECT_DECLARE_TYPE(AspeedADCState, AspeedADCClass, ASPEED_ADC)
+
+#define TYPE_ASPEED_ADC_ENGINE "aspeed.adc.engine"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedADCEngineState, ASPEED_ADC_ENGINE)
+
+#define ASPEED_ADC_NR_CHANNELS 16
+#define ASPEED_ADC_NR_REGS     (0xD0 >> 2)
+
+struct AspeedADCEngineState {
+    /* <private> */
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+    uint32_t engine_id;
+    uint32_t nr_channels;
+    uint32_t regs[ASPEED_ADC_NR_REGS];
+};
+
+struct AspeedADCState {
+    /* <private> */
+    SysBusDevice parent;
+
+    MemoryRegion mmio;
+    qemu_irq irq;
+
+    AspeedADCEngineState engines[2];
+};
+
+struct AspeedADCClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t nr_engines;
+};
+
+#endif /* HW_ADC_ASPEED_ADC_H */
diff --git a/include/hw/misc/max111x.h b/include/hw/adc/max111x.h
similarity index 100%
rename from include/hw/misc/max111x.h
rename to include/hw/adc/max111x.h
diff --git a/include/hw/misc/zynq-xadc.h b/include/hw/adc/zynq-xadc.h
similarity index 100%
rename from include/hw/misc/zynq-xadc.h
rename to include/hw/adc/zynq-xadc.h
diff --git a/include/hw/arm/allwinner-h3.h b/include/hw/arm/allwinner-h3.h
index cc308a5d2c9..63025fb27c8 100644
--- a/include/hw/arm/allwinner-h3.h
+++ b/include/hw/arm/allwinner-h3.h
@@ -18,7 +18,7 @@
  */
 
 /*
- * The Allwinner H3 is a System on Chip containing four ARM Cortex A7
+ * The Allwinner H3 is a System on Chip containing four ARM Cortex-A7
  * processor cores. Features and specifications include DDR2/DDR3 memory,
  * SD/MMC storage cards, 10/100/1000Mbit Ethernet, USB 2.0, HDMI and
  * various I/O modules.
diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
index 36592be62c5..9648e7a4193 100644
--- a/include/hw/arm/armsse.h
+++ b/include/hw/arm/armsse.h
@@ -198,6 +198,8 @@ struct ARMSSE {
     MemoryRegion alias2;
     MemoryRegion alias3[SSE_MAX_CPUS];
     MemoryRegion sram[MAX_SRAM_BANKS];
+    MemoryRegion itcm;
+    MemoryRegion dtcm;
 
     qemu_irq *exp_irqs[SSE_MAX_CPUS];
     qemu_irq ppc0_irq;
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
index 189b23a8ceb..b7ba0ff409c 100644
--- a/include/hw/arm/armv7m.h
+++ b/include/hw/arm/armv7m.h
@@ -12,8 +12,10 @@
 
 #include "hw/sysbus.h"
 #include "hw/intc/armv7m_nvic.h"
+#include "hw/misc/armv7m_ras.h"
 #include "target/arm/idau.h"
 #include "qom/object.h"
+#include "hw/clock.h"
 
 #define TYPE_BITBAND "ARM-bitband-memory"
 OBJECT_DECLARE_SIMPLE_TYPE(BitBandState, BITBAND)
@@ -46,9 +48,12 @@ OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MState, ARMV7M)
  *   devices will be automatically layered on top of this view.)
  * + Property "idau": IDAU interface (forwarded to CPU object)
  * + Property "init-svtor": secure VTOR reset value (forwarded to CPU object)
+ * + Property "init-nsvtor": non-secure VTOR reset value (forwarded to CPU object)
  * + Property "vfp": enable VFP (forwarded to CPU object)
  * + Property "dsp": enable DSP (forwarded to CPU object)
  * + Property "enable-bitband": expose bitbanded IO
+ * + Clock input "refclk" is the external reference clock for the systick timers
+ * + Clock input "cpuclk" is the main CPU clock
  */
 struct ARMv7MState {
     /*< private >*/
@@ -57,11 +62,31 @@ struct ARMv7MState {
     NVICState nvic;
     BitBandState bitband[ARMV7M_NUM_BITBANDS];
     ARMCPU *cpu;
+    ARMv7MRAS ras;
+    SysTickState systick[M_REG_NUM_BANKS];
 
     /* MemoryRegion we pass to the CPU, with our devices layered on
      * top of the ones the board provides in board_memory.
      */
     MemoryRegion container;
+    /*
+     * MemoryRegion which passes the transaction to either the S or the
+     * NS systick device depending on the transaction attributes
+     */
+    MemoryRegion systickmem;
+    /*
+     * MemoryRegion which enforces the S/NS handling of the systick
+     * device NS alias region and passes the transaction to the
+     * NS systick device if appropriate.
+     */
+    MemoryRegion systick_ns_mem;
+    /* Ditto, for the sysregs region provided by the NVIC */
+    MemoryRegion sysreg_ns_mem;
+    /* MR providing default PPB behaviour */
+    MemoryRegion defaultmem;
+
+    Clock *refclk;
+    Clock *cpuclk;
 
     /* Properties */
     char *cpu_type;
@@ -69,6 +94,7 @@ struct ARMv7MState {
     MemoryRegion *board_memory;
     Object *idau;
     uint32_t init_svtor;
+    uint32_t init_nsvtor;
     bool enable_bitband;
     bool start_powered_off;
     bool vfp;
diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
index c9747b15fc5..cbeacb214ca 100644
--- a/include/hw/arm/aspeed.h
+++ b/include/hw/arm/aspeed.h
@@ -38,6 +38,7 @@ struct AspeedMachineClass {
     uint32_t num_cs;
     uint32_t macs_mask;
     void (*i2c_init)(AspeedMachineState *bmc);
+    uint32_t uart_default;
 };
 
 
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index 9359d6da336..8139358549d 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -15,12 +15,14 @@
 #include "hw/cpu/a15mpcore.h"
 #include "hw/intc/aspeed_vic.h"
 #include "hw/misc/aspeed_scu.h"
+#include "hw/adc/aspeed_adc.h"
 #include "hw/misc/aspeed_sdmc.h"
 #include "hw/misc/aspeed_xdma.h"
 #include "hw/timer/aspeed_timer.h"
 #include "hw/rtc/aspeed_rtc.h"
 #include "hw/i2c/aspeed_i2c.h"
 #include "hw/ssi/aspeed_smc.h"
+#include "hw/misc/aspeed_hace.h"
 #include "hw/watchdog/wdt_aspeed.h"
 #include "hw/net/ftgmac100.h"
 #include "target/arm/cpu.h"
@@ -50,7 +52,9 @@ struct AspeedSoCState {
     AspeedTimerCtrlState timerctrl;
     AspeedI2CState i2c;
     AspeedSCUState scu;
+    AspeedHACEState hace;
     AspeedXDMAState xdma;
+    AspeedADCState adc;
     AspeedSMCState fmc;
     AspeedSMCState spi[ASPEED_SPIS_NUM];
     EHCISysBusState ehci[ASPEED_EHCIS_NUM];
@@ -63,6 +67,7 @@ struct AspeedSoCState {
     AspeedSDHCIState sdhci;
     AspeedSDHCIState emmc;
     AspeedLPCState lpc;
+    uint32_t uart_default;
 };
 
 #define TYPE_ASPEED_SOC "aspeed-soc"
@@ -133,6 +138,7 @@ enum {
     ASPEED_DEV_XDMA,
     ASPEED_DEV_EMMC,
     ASPEED_DEV_KCS,
+    ASPEED_DEV_HACE,
 };
 
 #endif /* ASPEED_SOC_H */
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
index 479e2346e80..d864879421a 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -24,6 +24,7 @@
 #include "hw/misc/bcm2835_mphi.h"
 #include "hw/misc/bcm2835_thermal.h"
 #include "hw/misc/bcm2835_cprman.h"
+#include "hw/misc/bcm2835_powermgt.h"
 #include "hw/sd/sdhci.h"
 #include "hw/sd/bcm2835_sdhost.h"
 #include "hw/gpio/bcm2835_gpio.h"
@@ -48,7 +49,7 @@ struct BCM2835PeripheralState {
     BCM2835MphiState mphi;
     UnimplementedDeviceState txp;
     UnimplementedDeviceState armtmr;
-    UnimplementedDeviceState powermgt;
+    BCM2835PowerMgtState powermgt;
     BCM2835CprmanState cprman;
     PL011State uart0;
     BCM2835AuxState aux;
diff --git a/include/hw/arm/fsl-imx7.h b/include/hw/arm/fsl-imx7.h
index f5d527a4906..1c5fa6fd676 100644
--- a/include/hw/arm/fsl-imx7.h
+++ b/include/hw/arm/fsl-imx7.h
@@ -174,6 +174,11 @@ enum FslIMX7MemoryMap {
     FSL_IMX7_UART6_ADDR           = 0x30A80000,
     FSL_IMX7_UART7_ADDR           = 0x30A90000,
 
+    FSL_IMX7_SAI1_ADDR            = 0x308A0000,
+    FSL_IMX7_SAI2_ADDR            = 0x308B0000,
+    FSL_IMX7_SAI3_ADDR            = 0x308C0000,
+    FSL_IMX7_SAIn_SIZE            = 0x10000,
+
     FSL_IMX7_ENET1_ADDR           = 0x30BE0000,
     FSL_IMX7_ENET2_ADDR           = 0x30BF0000,
 
diff --git a/include/hw/arm/msf2-soc.h b/include/hw/arm/msf2-soc.h
index d4061846855..ce417a6266a 100644
--- a/include/hw/arm/msf2-soc.h
+++ b/include/hw/arm/msf2-soc.h
@@ -30,6 +30,7 @@
 #include "hw/misc/msf2-sysreg.h"
 #include "hw/ssi/mss-spi.h"
 #include "hw/net/msf2-emac.h"
+#include "hw/clock.h"
 #include "qom/object.h"
 
 #define TYPE_MSF2_SOC     "msf2-soc"
@@ -57,7 +58,8 @@ struct MSF2State {
     uint64_t envm_size;
     uint64_t esram_size;
 
-    uint32_t m3clk;
+    Clock *m3clk;
+    Clock *refclk;
     uint8_t apb0div;
     uint8_t apb1div;
 
@@ -65,6 +67,10 @@ struct MSF2State {
     MSSTimerState timer;
     MSSSpiState spi[MSF2_NUM_SPIS];
     MSF2EmacState emac;
+
+    MemoryRegion nvm;
+    MemoryRegion nvm_alias;
+    MemoryRegion sram;
 };
 
 #endif
diff --git a/include/hw/arm/npcm7xx.h b/include/hw/arm/npcm7xx.h
index 61ecc57ab90..ce593235d94 100644
--- a/include/hw/arm/npcm7xx.h
+++ b/include/hw/arm/npcm7xx.h
@@ -35,6 +35,7 @@
 #include "hw/usb/hcd-ehci.h"
 #include "hw/usb/hcd-ohci.h"
 #include "target/arm/cpu.h"
+#include "hw/sd/npcm7xx_sdhci.h"
 
 #define NPCM7XX_MAX_NUM_CPUS    (2)
 
@@ -103,6 +104,7 @@ typedef struct NPCM7xxState {
     OHCISysBusState     ohci;
     NPCM7xxFIUState     fiu[2];
     NPCM7xxEMCState     emc[2];
+    NPCM7xxSDHCIState   mmc;
 } NPCM7xxState;
 
 #define TYPE_NPCM7XX    "npcm7xx"
diff --git a/include/hw/arm/nrf51_soc.h b/include/hw/arm/nrf51_soc.h
index f8a6725b775..e52a56e75e0 100644
--- a/include/hw/arm/nrf51_soc.h
+++ b/include/hw/arm/nrf51_soc.h
@@ -17,6 +17,7 @@
 #include "hw/gpio/nrf51_gpio.h"
 #include "hw/nvram/nrf51_nvm.h"
 #include "hw/timer/nrf51_timer.h"
+#include "hw/clock.h"
 #include "qom/object.h"
 
 #define TYPE_NRF51_SOC "nrf51-soc"
@@ -50,6 +51,7 @@ struct NRF51State {
 
     MemoryRegion container;
 
+    Clock *sysclk;
 };
 
 #endif
diff --git a/include/hw/arm/stm32f100_soc.h b/include/hw/arm/stm32f100_soc.h
new file mode 100644
index 00000000000..40cd415b284
--- /dev/null
+++ b/include/hw/arm/stm32f100_soc.h
@@ -0,0 +1,65 @@
+/*
+ * STM32F100 SoC
+ *
+ * Copyright (c) 2021 Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_ARM_STM32F100_SOC_H
+#define HW_ARM_STM32F100_SOC_H
+
+#include "hw/char/stm32f2xx_usart.h"
+#include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
+#include "qom/object.h"
+#include "hw/clock.h"
+
+#define TYPE_STM32F100_SOC "stm32f100-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F100State, STM32F100_SOC)
+
+#define STM_NUM_USARTS 3
+#define STM_NUM_SPIS 2
+
+#define FLASH_BASE_ADDRESS 0x08000000
+#define FLASH_SIZE (128 * 1024)
+#define SRAM_BASE_ADDRESS 0x20000000
+#define SRAM_SIZE (8 * 1024)
+
+struct STM32F100State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    char *cpu_type;
+
+    ARMv7MState armv7m;
+
+    STM32F2XXUsartState usart[STM_NUM_USARTS];
+    STM32F2XXSPIState spi[STM_NUM_SPIS];
+
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
+};
+
+#endif
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
index 985ff63aa9e..849d3ed8891 100644
--- a/include/hw/arm/stm32f205_soc.h
+++ b/include/hw/arm/stm32f205_soc.h
@@ -32,6 +32,7 @@
 #include "hw/or-irq.h"
 #include "hw/ssi/stm32f2xx_spi.h"
 #include "hw/arm/armv7m.h"
+#include "hw/clock.h"
 #include "qom/object.h"
 
 #define TYPE_STM32F205_SOC "stm32f205-soc"
@@ -63,6 +64,13 @@ struct STM32F205State {
     STM32F2XXSPIState spi[STM_NUM_SPIS];
 
     qemu_or_irq *adc_irqs;
+
+    MemoryRegion sram;
+    MemoryRegion flash;
+    MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
 };
 
 #endif
diff --git a/include/hw/arm/stm32f405_soc.h b/include/hw/arm/stm32f405_soc.h
index 347105e709b..5bb0c8d5697 100644
--- a/include/hw/arm/stm32f405_soc.h
+++ b/include/hw/arm/stm32f405_soc.h
@@ -68,6 +68,9 @@ struct STM32F405State {
     MemoryRegion sram;
     MemoryRegion flash;
     MemoryRegion flash_alias;
+
+    Clock *sysclk;
+    Clock *refclk;
 };
 
 #endif
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 921416f918b..dc6b66ffc8f 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -120,15 +120,18 @@ struct VirtMachineClass {
     MachineClass parent;
     bool disallow_affinity_adjustment;
     bool no_its;
+    bool no_tcg_its;
     bool no_pmu;
     bool claim_edge_triggered_timers;
     bool smbios_old_sys_ver;
     bool no_highmem_ecam;
-    bool no_ged;   /* Machines < 4.2 has no support for ACPI GED device */
+    bool no_ged;   /* Machines < 4.2 have no support for ACPI GED device */
     bool kvm_no_adjvtime;
     bool no_kvm_steal_time;
     bool acpi_expose_flash;
     bool no_secure_gpio;
+    /* Machines < 6.2 have no support for describing cpu topology to guest */
+    bool no_cpu_topology;
 };
 
 struct VirtMachineState {
@@ -141,12 +144,14 @@ struct VirtMachineState {
     bool highmem;
     bool highmem_ecam;
     bool its;
+    bool tcg_its;
     bool virt;
     bool ras;
     bool mte;
     OnOffAuto acpi;
     VirtGICType gic_version;
     VirtIOMMUType iommu;
+    bool default_bus_bypass_iommu;
     VirtMSIControllerType msi_controller;
     uint16_t virtio_iommu_bdf;
     struct arm_boot_info bootinfo;
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 22a8fa5d11b..895ba12c61e 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -24,6 +24,8 @@
 #include "qom/object.h"
 #include "hw/usb/xlnx-usb-subsystem.h"
 #include "hw/misc/xlnx-versal-xramc.h"
+#include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
 
 #define TYPE_XLNX_VERSAL "xlnx-versal"
 OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
@@ -79,6 +81,10 @@ struct Versal {
         } iou;
 
         XlnxZynqMPRTC rtc;
+        XlnxBBRam bbram;
+        XlnxEFuse efuse;
+        XlnxVersalEFuseCtrl efuse_ctrl;
+        XlnxVersalEFuseCache efuse_cache;
     } pmc;
 
     struct {
@@ -105,8 +111,10 @@ struct Versal {
 #define VERSAL_GEM1_WAKE_IRQ_0     59
 #define VERSAL_ADMA_IRQ_0          60
 #define VERSAL_XRAM_IRQ_0          79
+#define VERSAL_BBRAM_APB_IRQ_0     121
 #define VERSAL_RTC_APB_ERR_IRQ     121
 #define VERSAL_SD0_IRQ_0           126
+#define VERSAL_EFUSE_IRQ           139
 #define VERSAL_RTC_ALARM_IRQ       142
 #define VERSAL_RTC_SECONDS_IRQ     143
 
@@ -167,9 +175,18 @@ struct Versal {
 #define MM_IOU_SCNTRS_SIZE          0x10000
 #define MM_FPD_CRF                  0xfd1a0000U
 #define MM_FPD_CRF_SIZE             0x140000
+#define MM_FPD_FPD_APU              0xfd5c0000
+#define MM_FPD_FPD_APU_SIZE         0x100
 
 #define MM_PMC_SD0                  0xf1040000U
 #define MM_PMC_SD0_SIZE             0x10000
+#define MM_PMC_BBRAM_CTRL           0xf11f0000
+#define MM_PMC_BBRAM_CTRL_SIZE      0x00050
+#define MM_PMC_EFUSE_CTRL           0xf1240000
+#define MM_PMC_EFUSE_CTRL_SIZE      0x00104
+#define MM_PMC_EFUSE_CACHE          0xf1250000
+#define MM_PMC_EFUSE_CACHE_SIZE     0x00C00
+
 #define MM_PMC_CRP                  0xf1260000U
 #define MM_PMC_CRP_SIZE             0x10000
 #define MM_PMC_RTC                  0xf12a0000
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index d3e2ef97f67..062e637fe49 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -36,6 +36,8 @@
 #include "qom/object.h"
 #include "net/can_emu.h"
 #include "hw/dma/xlnx_csu_dma.h"
+#include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-zynqmp-efuse.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx-zynqmp"
 OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
@@ -79,6 +81,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
 #define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \
                                   XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE)
 
+/*
+ * Unimplemented mmio regions needed to boot some images.
+ */
+#define XLNX_ZYNQMP_NUM_UNIMP_AREAS 1
+
 struct XlnxZynqMPState {
     /*< private >*/
     DeviceState parent_obj;
@@ -95,6 +102,11 @@ struct XlnxZynqMPState {
 
     MemoryRegion *ddr_ram;
     MemoryRegion ddr_ram_low, ddr_ram_high;
+    XlnxBBRam bbram;
+    XlnxEFuse efuse;
+    XlnxZynqMPEFuse efuse_ctrl;
+
+    MemoryRegion mr_unimp[XLNX_ZYNQMP_NUM_UNIMP_AREAS];
 
     CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
     CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index c172cbe65f1..5902c0440a5 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -19,6 +19,7 @@
 
 typedef struct BlockConf {
     BlockBackend *blk;
+    OnOffAuto backend_defaults;
     uint32_t physical_block_size;
     uint32_t logical_block_size;
     uint32_t min_io_size;
@@ -48,6 +49,8 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
 }
 
 #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)                     \
+    DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state,                 \
+                            _conf.backend_defaults, ON_OFF_AUTO_AUTO),  \
     DEFINE_PROP_BLOCKSIZE("logical_block_size", _state,                 \
                           _conf.logical_block_size),                    \
     DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,                \
diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h
index 7dde0adcee7..86d8363bb09 100644
--- a/include/hw/block/flash.h
+++ b/include/hw/block/flash.h
@@ -74,6 +74,6 @@ typedef struct {
 
 uint8_t ecc_digest(ECCState *s, uint8_t sample);
 void ecc_reset(ECCState *s);
-extern VMStateDescription vmstate_ecc_state;
+extern const VMStateDescription vmstate_ecc_state;
 
 #endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ad6c8fd5376..9c1c1901046 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -26,7 +26,6 @@ OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE)
 extern MachineState *current_machine;
 
 void machine_run_board_init(MachineState *machine);
-bool machine_smp_parse(MachineState *ms, QemuOpts *opts, Error **errp);
 bool machine_usb(MachineState *machine);
 int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
@@ -35,6 +34,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
                                const CpuInstanceProperties *props,
                                Error **errp);
+void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp);
 
 /**
  * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
@@ -52,6 +52,21 @@ void machine_set_cpu_numa_node(MachineState *machine,
  */
 void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
 
+/**
+ * device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device
+ * type for the machine class.
+ * @mc: Machine class
+ * @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE)
+ *
+ * Returns: true if @type is a type in the machine's list of
+ * dynamically pluggable sysbus devices; otherwise false.
+ *
+ * Check if the QOM type @type is in the list of allowed sysbus device
+ * types (see machine_class_allowed_dynamic_sysbus_dev()).
+ * Note that if @type has a parent type in the list, it is allowed too.
+ */
+bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type);
+
 /**
  * device_is_dynamic_sysbus: test whether device is a dynamic sysbus device
  * @mc: Machine class
@@ -109,6 +124,16 @@ typedef struct {
     CPUArchId cpus[];
 } CPUArchIdList;
 
+/**
+ * SMPCompatProps:
+ * @prefer_sockets - whether sockets are preferred over cores in smp parsing
+ * @dies_supported - whether dies are supported by the machine
+ */
+typedef struct {
+    bool prefer_sockets;
+    bool dies_supported;
+} SMPCompatProps;
+
 /**
  * MachineClass:
  * @deprecation_reason: If set, the machine is marked as deprecated. The
@@ -170,10 +195,6 @@ typedef struct {
  *    kvm-type may be NULL if it is not needed.
  * @numa_mem_supported:
  *    true if '--numa node.mem' option is supported and false otherwise
- * @smp_parse:
- *    The function pointer to hook different machine specific functions for
- *    parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
- *    machine specific topology fields, such as smp_dies for PCMachine.
  * @hotplug_allowed:
  *    If the hook is provided, then it'll be called for each device
  *    hotplug to check whether the device hotplug is allowed.  Return
@@ -210,7 +231,6 @@ struct MachineClass {
     void (*reset)(MachineState *state);
     void (*wakeup)(MachineState *state);
     int (*kvm_type)(MachineState *machine, const char *arg);
-    void (*smp_parse)(MachineState *ms, QemuOpts *opts);
 
     BlockInterfaceType block_default_type;
     int units_per_default_bus;
@@ -248,6 +268,7 @@ struct MachineClass {
     bool nvdimm_supported;
     bool numa_mem_supported;
     bool auto_enable_numa;
+    SMPCompatProps smp_props;
     const char *default_ram_id;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
@@ -275,16 +296,18 @@ typedef struct DeviceMemoryState {
 /**
  * CpuTopology:
  * @cpus: the number of present logical processors on the machine
- * @cores: the number of cores in one package
- * @threads: the number of threads in one core
  * @sockets: the number of sockets on the machine
+ * @dies: the number of dies in one socket
+ * @cores: the number of cores in one die
+ * @threads: the number of threads in one core
  * @max_cpus: the maximum number of logical processors on the machine
  */
 typedef struct CpuTopology {
     unsigned int cpus;
+    unsigned int sockets;
+    unsigned int dies;
     unsigned int cores;
     unsigned int threads;
-    unsigned int sockets;
     unsigned int max_cpus;
 } CpuTopology;
 
@@ -294,7 +317,6 @@ typedef struct CpuTopology {
 struct MachineState {
     /*< private >*/
     Object parent_obj;
-    Notifier sysbus_notifier;
 
     /*< public >*/
 
@@ -353,6 +375,12 @@ struct MachineState {
     } \
     type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_6_1[];
+extern const size_t hw_compat_6_1_len;
+
+extern GlobalProperty hw_compat_6_0[];
+extern const size_t hw_compat_6_0_len;
+
 extern GlobalProperty hw_compat_5_2[];
 extern const size_t hw_compat_5_2_len;
 
diff --git a/include/hw/char/avr_usart.h b/include/hw/char/avr_usart.h
index bb575324036..62eaa1528ef 100644
--- a/include/hw/char/avr_usart.h
+++ b/include/hw/char/avr_usart.h
@@ -24,7 +24,6 @@
 
 #include "hw/sysbus.h"
 #include "chardev/char-fe.h"
-#include "hw/hw.h"
 #include "qom/object.h"
 
 /* Offsets of registers. */
diff --git a/include/hw/char/goldfish_tty.h b/include/hw/char/goldfish_tty.h
index b9dd67362a6..7503d2fa1e1 100644
--- a/include/hw/char/goldfish_tty.h
+++ b/include/hw/char/goldfish_tty.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Goldfish TTY
  *
diff --git a/include/hw/char/ibex_uart.h b/include/hw/char/ibex_uart.h
index 546f958eb8a..a39985516a4 100644
--- a/include/hw/char/ibex_uart.h
+++ b/include/hw/char/ibex_uart.h
@@ -31,43 +31,6 @@
 #include "qemu/timer.h"
 #include "qom/object.h"
 
-REG32(INTR_STATE, 0x00)
-    FIELD(INTR_STATE, TX_WATERMARK, 0, 1)
-    FIELD(INTR_STATE, RX_WATERMARK, 1, 1)
-    FIELD(INTR_STATE, TX_EMPTY, 2, 1)
-    FIELD(INTR_STATE, RX_OVERFLOW, 3, 1)
-REG32(INTR_ENABLE, 0x04)
-REG32(INTR_TEST, 0x08)
-REG32(CTRL, 0x0C)
-    FIELD(CTRL, TX_ENABLE, 0, 1)
-    FIELD(CTRL, RX_ENABLE, 1, 1)
-    FIELD(CTRL, NF, 2, 1)
-    FIELD(CTRL, SLPBK, 4, 1)
-    FIELD(CTRL, LLPBK, 5, 1)
-    FIELD(CTRL, PARITY_EN, 6, 1)
-    FIELD(CTRL, PARITY_ODD, 7, 1)
-    FIELD(CTRL, RXBLVL, 8, 2)
-    FIELD(CTRL, NCO, 16, 16)
-REG32(STATUS, 0x10)
-    FIELD(STATUS, TXFULL, 0, 1)
-    FIELD(STATUS, RXFULL, 1, 1)
-    FIELD(STATUS, TXEMPTY, 2, 1)
-    FIELD(STATUS, RXIDLE, 4, 1)
-    FIELD(STATUS, RXEMPTY, 5, 1)
-REG32(RDATA, 0x14)
-REG32(WDATA, 0x18)
-REG32(FIFO_CTRL, 0x1c)
-    FIELD(FIFO_CTRL, RXRST, 0, 1)
-    FIELD(FIFO_CTRL, TXRST, 1, 1)
-    FIELD(FIFO_CTRL, RXILVL, 2, 3)
-    FIELD(FIFO_CTRL, TXILVL, 5, 2)
-REG32(FIFO_STATUS, 0x20)
-    FIELD(FIFO_STATUS, TXLVL, 0, 5)
-    FIELD(FIFO_STATUS, RXLVL, 16, 5)
-REG32(OVRD, 0x24)
-REG32(VAL, 0x28)
-REG32(TIMEOUT_CTRL, 0x2c)
-
 #define IBEX_UART_TX_FIFO_SIZE 16
 #define IBEX_UART_CLOCK 50000000 /* 50MHz clock */
 
diff --git a/include/hw/char/lm32_juart.h b/include/hw/char/lm32_juart.h
deleted file mode 100644
index 6fce2783266..00000000000
--- a/include/hw/char/lm32_juart.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef QEMU_HW_CHAR_LM32_JUART_H
-#define QEMU_HW_CHAR_LM32_JUART_H
-
-#include "hw/qdev-core.h"
-
-#define TYPE_LM32_JUART "lm32-juart"
-
-uint32_t lm32_juart_get_jtx(DeviceState *d);
-uint32_t lm32_juart_get_jrx(DeviceState *d);
-void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx);
-void lm32_juart_set_jrx(DeviceState *d, uint32_t jrx);
-
-#endif /* QEMU_HW_CHAR_LM32_JUART_H */
diff --git a/include/hw/char/mchp_pfsoc_mmuart.h b/include/hw/char/mchp_pfsoc_mmuart.h
index f61990215f0..b0e14ca3554 100644
--- a/include/hw/char/mchp_pfsoc_mmuart.h
+++ b/include/hw/char/mchp_pfsoc_mmuart.h
@@ -28,18 +28,25 @@
 #ifndef HW_MCHP_PFSOC_MMUART_H
 #define HW_MCHP_PFSOC_MMUART_H
 
+#include "hw/sysbus.h"
 #include "hw/char/serial.h"
 
-#define MCHP_PFSOC_MMUART_REG_SIZE  52
+#define MCHP_PFSOC_MMUART_REG_COUNT 13
+
+#define TYPE_MCHP_PFSOC_UART "mchp.pfsoc.uart"
+OBJECT_DECLARE_SIMPLE_TYPE(MchpPfSoCMMUartState, MCHP_PFSOC_UART)
 
 typedef struct MchpPfSoCMMUartState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion container;
     MemoryRegion iomem;
-    hwaddr base;
-    qemu_irq irq;
 
-    SerialMM *serial;
+    SerialMM serial_mm;
 
-    uint32_t reg[MCHP_PFSOC_MMUART_REG_SIZE / sizeof(uint32_t)];
+    uint32_t reg[MCHP_PFSOC_MMUART_REG_COUNT];
 } MchpPfSoCMMUartState;
 
 /**
diff --git a/include/hw/char/shakti_uart.h b/include/hw/char/shakti_uart.h
new file mode 100644
index 00000000000..526c408233f
--- /dev/null
+++ b/include/hw/char/shakti_uart.h
@@ -0,0 +1,74 @@
+/*
+ * SHAKTI UART
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SHAKTI_UART_H
+#define HW_SHAKTI_UART_H
+
+#include "hw/sysbus.h"
+#include "chardev/char-fe.h"
+
+#define SHAKTI_UART_BAUD        0x00
+#define SHAKTI_UART_TX          0x04
+#define SHAKTI_UART_RX          0x08
+#define SHAKTI_UART_STATUS      0x0C
+#define SHAKTI_UART_DELAY       0x10
+#define SHAKTI_UART_CONTROL     0x14
+#define SHAKTI_UART_INT_EN      0x18
+#define SHAKTI_UART_IQ_CYCLES   0x1C
+#define SHAKTI_UART_RX_THRES    0x20
+
+#define SHAKTI_UART_STATUS_TX_EMPTY     (1 << 0)
+#define SHAKTI_UART_STATUS_TX_FULL      (1 << 1)
+#define SHAKTI_UART_STATUS_RX_NOT_EMPTY (1 << 2)
+#define SHAKTI_UART_STATUS_RX_FULL      (1 << 3)
+/* 9600 8N1 is the default setting */
+/* Reg value = (50000000 Hz)/(16 * 9600)*/
+#define SHAKTI_UART_BAUD_DEFAULT    0x0145
+#define SHAKTI_UART_CONTROL_DEFAULT 0x0100
+
+#define TYPE_SHAKTI_UART "shakti-uart"
+#define SHAKTI_UART(obj) \
+    OBJECT_CHECK(ShaktiUartState, (obj), TYPE_SHAKTI_UART)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t uart_baud;
+    uint32_t uart_tx;
+    uint32_t uart_rx;
+    uint32_t uart_status;
+    uint32_t uart_delay;
+    uint32_t uart_control;
+    uint32_t uart_interrupt;
+    uint32_t uart_iq_cycles;
+    uint32_t uart_rx_threshold;
+
+    CharBackend chr;
+} ShaktiUartState;
+
+#endif /* HW_SHAKTI_UART_H */
diff --git a/include/hw/char/sifive_uart.h b/include/hw/char/sifive_uart.h
index 3e962be6592..7f6c79f8bdb 100644
--- a/include/hw/char/sifive_uart.h
+++ b/include/hw/char/sifive_uart.h
@@ -21,6 +21,7 @@
 #define HW_SIFIVE_UART_H
 
 #include "chardev/char-fe.h"
+#include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
 #include "qom/object.h"
 
@@ -49,12 +50,10 @@ enum {
 
 #define SIFIVE_UART_GET_TXCNT(txctrl)   ((txctrl >> 16) & 0x7)
 #define SIFIVE_UART_GET_RXCNT(rxctrl)   ((rxctrl >> 16) & 0x7)
+#define SIFIVE_UART_RX_FIFO_SIZE 8
 
 #define TYPE_SIFIVE_UART "riscv.sifive.uart"
-
-typedef struct SiFiveUARTState SiFiveUARTState;
-DECLARE_INSTANCE_CHECKER(SiFiveUARTState, SIFIVE_UART,
-                         TYPE_SIFIVE_UART)
+OBJECT_DECLARE_SIMPLE_TYPE(SiFiveUARTState, SIFIVE_UART)
 
 struct SiFiveUARTState {
     /*< private >*/
@@ -64,8 +63,8 @@ struct SiFiveUARTState {
     qemu_irq irq;
     MemoryRegion mmio;
     CharBackend chr;
-    uint8_t rx_fifo[8];
-    unsigned int rx_fifo_len;
+    uint8_t rx_fifo[SIFIVE_UART_RX_FIFO_SIZE];
+    uint8_t rx_fifo_len;
     uint32_t ie;
     uint32_t ip;
     uint32_t txctrl;
diff --git a/include/hw/clock.h b/include/hw/clock.h
index a7187eab95e..5c927cee7f8 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -81,6 +81,10 @@ struct Clock {
     void *callback_opaque;
     unsigned int callback_events;
 
+    /* Ratio of the parent clock to run the child clocks at */
+    uint32_t multiplier;
+    uint32_t divider;
+
     /* Clocks are organized in a clock tree */
     Clock *source;
     QLIST_HEAD(, Clock) children;
@@ -319,10 +323,7 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
     if (clk->period == 0) {
         return 0;
     }
-    /*
-     * Ignore divu128() return value as we've caught div-by-zero and don't
-     * need different behaviour for overflow.
-     */
+
     divu128(&lo, &hi, clk->period);
     return lo;
 }
@@ -350,4 +351,29 @@ static inline bool clock_is_enabled(const Clock *clk)
  */
 char *clock_display_freq(Clock *clk);
 
+/**
+ * clock_set_mul_div: set multiplier/divider for child clocks
+ * @clk: clock
+ * @multiplier: multiplier value
+ * @divider: divider value
+ *
+ * By default, a Clock's children will all run with the same period
+ * as their parent. This function allows you to adjust the multiplier
+ * and divider used to derive the child clock frequency.
+ * For example, setting a multiplier of 2 and a divider of 3
+ * will run child clocks with a period 2/3 of the parent clock,
+ * so if the parent clock is an 8MHz clock the children will
+ * be 12MHz.
+ *
+ * Setting the multiplier to 0 will stop the child clocks.
+ * Setting the divider to 0 is a programming error (diagnosed with
+ * an assertion failure).
+ * Setting a multiplier value that results in the child period
+ * overflowing is not diagnosed.
+ *
+ * Note that this function does not call clock_propagate(); the
+ * caller should do that if necessary.
+ */
+void clock_set_mul_div(Clock *clk, uint32_t multiplier, uint32_t divider);
+
 #endif /* QEMU_HW_CLOCK_H */
diff --git a/include/hw/core/accel-cpu.h b/include/hw/core/accel-cpu.h
index 24a6697412e..5dbfd799553 100644
--- a/include/hw/core/accel-cpu.h
+++ b/include/hw/core/accel-cpu.h
@@ -32,7 +32,7 @@ typedef struct AccelCPUClass {
 
     void (*cpu_class_init)(CPUClass *cc);
     void (*cpu_instance_init)(CPUState *cpu);
-    void (*cpu_realizefn)(CPUState *cpu, Error **errp);
+    bool (*cpu_realizefn)(CPUState *cpu, Error **errp);
 } AccelCPUClass;
 
 #endif /* ACCEL_CPU_H */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 4dc5739759f..f93b042d6c3 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -83,6 +83,9 @@ struct TCGCPUOps;
 /* see accel-cpu.h */
 struct AccelCPUClass;
 
+/* see sysemu-cpu-ops.h */
+struct SysemuCPUOps;
+
 /**
  * CPUClass:
  * @class_by_name: Callback to map -cpu command line model name to an
@@ -90,16 +93,9 @@ struct AccelCPUClass;
  * @parse_features: Callback to parse command line arguments.
  * @reset_dump_flags: #CPUDumpFlags to use for reset logging.
  * @has_work: Callback for checking if there is work to do.
- * @virtio_is_big_endian: Callback to return %true if a CPU which supports
- * runtime configurable endianness is currently big-endian. Non-configurable
- * CPUs can use the default implementation of this method. This method should
- * not be used by any callers other than the pre-1.0 virtio devices.
  * @memory_rw_debug: Callback for GDB memory access.
  * @dump_state: Callback for dumping state.
- * @dump_statistics: Callback for dumping statistics.
  * @get_arch_id: Callback for getting architecture-dependent CPU ID.
- * @get_paging_enabled: Callback for inquiring whether paging is enabled.
- * @get_memory_mapping: Callback for obtaining the memory mappings.
  * @set_pc: Callback for setting the Program Counter register. This
  *       should have the semantics used by the target architecture when
  *       setting the PC from a source such as an ELF file entry point;
@@ -108,24 +104,11 @@ struct AccelCPUClass;
  *       If the target behaviour here is anything other than "set
  *       the PC register to the value passed in" then the target must
  *       also implement the synchronize_from_tb hook.
- * @get_phys_page_debug: Callback for obtaining a physical address.
- * @get_phys_page_attrs_debug: Callback for obtaining a physical address and the
- *       associated memory transaction attributes to use for the access.
- *       CPUs which use memory transaction attributes should implement this
- *       instead of get_phys_page_debug.
- * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for
- *       a memory access with the specified memory transaction attributes.
  * @gdb_read_register: Callback for letting GDB read a register.
  * @gdb_write_register: Callback for letting GDB write a register.
- * @write_elf64_note: Callback for writing a CPU-specific ELF note to a
- * 64-bit VM coredump.
- * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
- * note to a 32-bit VM coredump.
- * @write_elf32_note: Callback for writing a CPU-specific ELF note to a
- * 32-bit VM coredump.
- * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
- * note to a 32-bit VM coredump.
- * @vmsd: State description for migration.
+ * @gdb_adjust_breakpoint: Callback for adjusting the address of a
+ *       breakpoint.  Used by AVR to handle a gdb mis-feature with
+ *       its Harvard architecture split code and data.
  * @gdb_num_core_regs: Number of core registers accessible to GDB.
  * @gdb_core_xml_file: File name for core registers GDB XML description.
  * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop
@@ -151,36 +134,16 @@ struct CPUClass {
     ObjectClass *(*class_by_name)(const char *cpu_model);
     void (*parse_features)(const char *typename, char *str, Error **errp);
 
-    int reset_dump_flags;
     bool (*has_work)(CPUState *cpu);
-    bool (*virtio_is_big_endian)(CPUState *cpu);
     int (*memory_rw_debug)(CPUState *cpu, vaddr addr,
                            uint8_t *buf, int len, bool is_write);
     void (*dump_state)(CPUState *cpu, FILE *, int flags);
-    GuestPanicInformation* (*get_crash_info)(CPUState *cpu);
-    void (*dump_statistics)(CPUState *cpu, int flags);
     int64_t (*get_arch_id)(CPUState *cpu);
-    bool (*get_paging_enabled)(const CPUState *cpu);
-    void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list,
-                               Error **errp);
     void (*set_pc)(CPUState *cpu, vaddr value);
-    hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr);
-    hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr,
-                                        MemTxAttrs *attrs);
-    int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs);
     int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
     int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
+    vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
 
-    int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
-                            int cpuid, void *opaque);
-    int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
-                                void *opaque);
-    int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu,
-                            int cpuid, void *opaque);
-    int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
-                                void *opaque);
-
-    const VMStateDescription *vmsd;
     const char *gdb_core_xml_file;
     gchar * (*gdb_arch_name)(CPUState *cpu);
     const char * (*gdb_get_dynamic_xml)(CPUState *cpu, const char *xmlname);
@@ -188,13 +151,26 @@ struct CPUClass {
     void (*disas_set_info)(CPUState *cpu, disassemble_info *info);
 
     const char *deprecation_note;
-    /* Keep non-pointer data at the end to minimize holes.  */
-    int gdb_num_core_regs;
-    bool gdb_stop_before_watchpoint;
     struct AccelCPUClass *accel_cpu;
 
+    /* when system emulation is not available, this pointer is NULL */
+    const struct SysemuCPUOps *sysemu_ops;
+
     /* when TCG is not available, this pointer is NULL */
-    struct TCGCPUOps *tcg_ops;
+    const struct TCGCPUOps *tcg_ops;
+
+    /*
+     * if not NULL, this is called in order for the CPUClass to initialize
+     * class data that depends on the accelerator, see accel/accel-common.c.
+     */
+    void (*init_accel_cpu)(struct AccelCPUClass *accel_cpu, CPUClass *cc);
+
+    /*
+     * Keep non-pointer data at the end to minimize holes.
+     */
+    int reset_dump_flags;
+    int gdb_num_core_regs;
+    bool gdb_stop_before_watchpoint;
 };
 
 /*
@@ -248,6 +224,7 @@ struct KVMState;
 struct kvm_run;
 
 struct hax_vcpu_state;
+struct hvf_vcpu_state;
 
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
@@ -326,6 +303,10 @@ struct qemu_work_item;
  * @ignore_memory_transaction_failures: Cached copy of the MachineState
  *    flag of the same name: allows the board to suppress calling of the
  *    CPU do_transaction_failed hook function.
+ * @kvm_dirty_gfns: Points to the KVM dirty ring for this CPU when KVM dirty
+ *    ring is enabled.
+ * @kvm_fetch_index: Keeps the index that we last fetched from the per-vCPU
+ *    dirty ring structure.
  * @log_state: The per-cpu instruction logging state.
  *
  * State of one CPU core or thread.
@@ -399,9 +380,13 @@ struct CPUState {
      */
     uintptr_t mem_io_pc;
 
+    /* Only used in KVM */
     int kvm_fd;
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
+    struct kvm_dirty_gfn *kvm_dirty_gfns;
+    uint32_t kvm_fetch_index;
+    uint64_t dirty_pages;
 
     /* Used for events with 'vcpu' and *without* the 'disabled' properties */
     DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
@@ -435,7 +420,7 @@ struct CPUState {
 
     struct hax_vcpu_state *hax_vcpu;
 
-    int hvf_fd;
+    struct hvf_vcpu_state *hvf;
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
@@ -565,16 +550,6 @@ enum CPUDumpFlags {
  */
 void cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 
-/**
- * cpu_dump_statistics:
- * @cpu: The CPU whose state is to be dumped.
- * @flags: Flags what to dump.
- *
- * Dump CPU statistics to the current monitor if we have one, else to
- * stdout.
- */
-void cpu_dump_statistics(CPUState *cpu, int flags);
-
 #ifndef CONFIG_USER_ONLY
 /**
  * cpu_get_phys_page_attrs_debug:
@@ -589,18 +564,8 @@ void cpu_dump_statistics(CPUState *cpu, int flags);
  *
  * Returns: Corresponding physical page address or -1 if no page found.
  */
-static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
-                                                   MemTxAttrs *attrs)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    if (cc->get_phys_page_attrs_debug) {
-        return cc->get_phys_page_attrs_debug(cpu, addr, attrs);
-    }
-    /* Fallback for CPUs which don't implement the _attrs_ hook */
-    *attrs = MEMTXATTRS_UNSPECIFIED;
-    return cc->get_phys_page_debug(cpu, addr);
-}
+hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
+                                     MemTxAttrs *attrs);
 
 /**
  * cpu_get_phys_page_debug:
@@ -612,12 +577,7 @@ static inline hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
  *
  * Returns: Corresponding physical page address or -1 if no page found.
  */
-static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
-{
-    MemTxAttrs attrs = {};
-
-    return cpu_get_phys_page_attrs_debug(cpu, addr, &attrs);
-}
+hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 
 /** cpu_asidx_from_attrs:
  * @cpu: CPU
@@ -626,17 +586,16 @@ static inline hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
  * Returns the address space index specifying the CPU AddressSpace
  * to use for a memory access with the given transaction attributes.
  */
-static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
-{
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-    int ret = 0;
+int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs);
 
-    if (cc->asidx_from_attrs) {
-        ret = cc->asidx_from_attrs(cpu, attrs);
-        assert(ret < cpu->num_ases && ret >= 0);
-    }
-    return ret;
-}
+/**
+ * cpu_virtio_is_big_endian:
+ * @cpu: CPU
+
+ * Returns %true if a CPU which supports runtime configurable endianness
+ * is currently big-endian.
+ */
+bool cpu_virtio_is_big_endian(CPUState *cpu);
 
 #endif /* CONFIG_USER_ONLY */
 
@@ -1060,6 +1019,7 @@ void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /* $(top_srcdir)/cpu.c */
+void cpu_class_init_props(DeviceClass *dc);
 void cpu_exec_initfn(CPUState *cpu);
 void cpu_exec_realizefn(CPUState *cpu, Error **errp);
 void cpu_exec_unrealizefn(CPUState *cpu);
@@ -1078,10 +1038,8 @@ bool target_words_bigendian(void);
 #ifdef NEED_CPU_H
 
 #ifdef CONFIG_SOFTMMU
+
 extern const VMStateDescription vmstate_cpu_common;
-#else
-#define vmstate_cpu_common vmstate_dummy
-#endif
 
 #define VMSTATE_CPU() {                                                     \
     .name = "parent_obj",                                                   \
@@ -1090,6 +1048,7 @@ extern const VMStateDescription vmstate_cpu_common;
     .flags = VMS_STRUCT,                                                    \
     .offset = 0,                                                            \
 }
+#endif /* CONFIG_SOFTMMU */
 
 #endif /* NEED_CPU_H */
 
diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h
new file mode 100644
index 00000000000..a9ba39e5f25
--- /dev/null
+++ b/include/hw/core/sysemu-cpu-ops.h
@@ -0,0 +1,92 @@
+/*
+ * CPU operations specific to system emulation
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SYSEMU_CPU_OPS_H
+#define SYSEMU_CPU_OPS_H
+
+#include "hw/core/cpu.h"
+
+/*
+ * struct SysemuCPUOps: System operations specific to a CPU class
+ */
+typedef struct SysemuCPUOps {
+    /**
+     * @get_memory_mapping: Callback for obtaining the memory mappings.
+     */
+    void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list,
+                               Error **errp);
+    /**
+     * @get_paging_enabled: Callback for inquiring whether paging is enabled.
+     */
+    bool (*get_paging_enabled)(const CPUState *cpu);
+    /**
+     * @get_phys_page_debug: Callback for obtaining a physical address.
+     */
+    hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr);
+    /**
+     * @get_phys_page_attrs_debug: Callback for obtaining a physical address
+     *       and the associated memory transaction attributes to use for the
+     *       access.
+     * CPUs which use memory transaction attributes should implement this
+     * instead of get_phys_page_debug.
+     */
+    hwaddr (*get_phys_page_attrs_debug)(CPUState *cpu, vaddr addr,
+                                        MemTxAttrs *attrs);
+    /**
+     * @asidx_from_attrs: Callback to return the CPU AddressSpace to use for
+     *       a memory access with the specified memory transaction attributes.
+     */
+    int (*asidx_from_attrs)(CPUState *cpu, MemTxAttrs attrs);
+    /**
+     * @get_crash_info: Callback for reporting guest crash information in
+     * GUEST_PANICKED events.
+     */
+    GuestPanicInformation* (*get_crash_info)(CPUState *cpu);
+    /**
+     * @write_elf32_note: Callback for writing a CPU-specific ELF note to a
+     * 32-bit VM coredump.
+     */
+    int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu,
+                            int cpuid, void *opaque);
+    /**
+     * @write_elf64_note: Callback for writing a CPU-specific ELF note to a
+     * 64-bit VM coredump.
+     */
+    int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
+                            int cpuid, void *opaque);
+    /**
+     * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF
+     * note to a 32-bit VM coredump.
+     */
+    int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
+                                void *opaque);
+    /**
+     * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF
+     * note to a 64-bit VM coredump.
+     */
+    int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu,
+                                void *opaque);
+    /**
+     * @virtio_is_big_endian: Callback to return %true if a CPU which supports
+     * runtime configurable endianness is currently big-endian.
+     * Non-configurable CPUs can use the default implementation of this method.
+     * This method should not be used by any callers other than the pre-1.0
+     * virtio devices.
+     */
+    bool (*virtio_is_big_endian)(CPUState *cpu);
+
+    /**
+     * @legacy_vmsd: Legacy state for migration.
+     *               Do not use in new targets, use #DeviceClass::vmsd instead.
+     */
+    const VMStateDescription *legacy_vmsd;
+
+} SysemuCPUOps;
+
+#endif /* SYSEMU_CPU_OPS_H */
diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
index 72d791438c2..e13898553af 100644
--- a/include/hw/core/tcg-cpu-ops.h
+++ b/include/hw/core/tcg-cpu-ops.h
@@ -35,33 +35,37 @@ struct TCGCPUOps {
     void (*cpu_exec_enter)(CPUState *cpu);
     /** @cpu_exec_exit: Callback for cpu_exec cleanup */
     void (*cpu_exec_exit)(CPUState *cpu);
-    /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
-    bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
+    /** @debug_excp_handler: Callback for handling debug exceptions */
+    void (*debug_excp_handler)(CPUState *cpu);
+
+#ifdef NEED_CPU_H
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386)
     /**
-     * @do_interrupt: Callback for interrupt handling.
+     * @fake_user_interrupt: Callback for 'fake exception' handling.
      *
-     * note that this is in general SOFTMMU only, but it actually isn't
-     * because of an x86 hack (accel/tcg/cpu-exec.c), so we cannot put it
-     * in the SOFTMMU section in general.
+     * Simulate 'fake exception' which will be handled outside the
+     * cpu execution loop (hack for x86 user mode).
+     */
+    void (*fake_user_interrupt)(CPUState *cpu);
+#else
+    /**
+     * @do_interrupt: Callback for interrupt handling.
      */
     void (*do_interrupt)(CPUState *cpu);
+#endif /* !CONFIG_USER_ONLY || !TARGET_I386 */
+#ifdef CONFIG_SOFTMMU
+    /** @cpu_exec_interrupt: Callback for processing interrupts in cpu_exec */
+    bool (*cpu_exec_interrupt)(CPUState *cpu, int interrupt_request);
     /**
-     * @tlb_fill: Handle a softmmu tlb miss or user-only address fault
+     * @tlb_fill: Handle a softmmu tlb miss
      *
-     * For system mode, if the access is valid, call tlb_set_page
-     * and return true; if the access is invalid, and probe is
-     * true, return false; otherwise raise an exception and do
-     * not return.  For user-only mode, always raise an exception
-     * and do not return.
+     * If the access is valid, call tlb_set_page and return true;
+     * if the access is invalid and probe is true, return false;
+     * otherwise raise an exception and do not return.
      */
     bool (*tlb_fill)(CPUState *cpu, vaddr address, int size,
                      MMUAccessType access_type, int mmu_idx,
                      bool probe, uintptr_t retaddr);
-    /** @debug_excp_handler: Callback for handling debug exceptions */
-    void (*debug_excp_handler)(CPUState *cpu);
-
-#ifdef NEED_CPU_H
-#ifdef CONFIG_SOFTMMU
     /**
      * @do_transaction_failed: Callback for handling failed memory transactions
      * (ie bus faults or external aborts; not MMU faults)
@@ -72,10 +76,11 @@ struct TCGCPUOps {
                                   MemTxResult response, uintptr_t retaddr);
     /**
      * @do_unaligned_access: Callback for unaligned access handling
+     * The callback must exit via raising an exception.
      */
     void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
                                 MMUAccessType access_type,
-                                int mmu_idx, uintptr_t retaddr);
+                                int mmu_idx, uintptr_t retaddr) QEMU_NORETURN;
 
     /**
      * @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
@@ -88,6 +93,12 @@ struct TCGCPUOps {
      */
     bool (*debug_check_watchpoint)(CPUState *cpu, CPUWatchpoint *wp);
 
+    /**
+     * @debug_check_breakpoint: return true if the architectural
+     * breakpoint whose PC has matched should really fire.
+     */
+    bool (*debug_check_breakpoint)(CPUState *cpu);
+
     /**
      * @io_recompile_replay_branch: Callback for cpu_io_recompile.
      *
@@ -98,6 +109,55 @@ struct TCGCPUOps {
      */
     bool (*io_recompile_replay_branch)(CPUState *cpu,
                                        const TranslationBlock *tb);
+#else
+    /**
+     * record_sigsegv:
+     * @cpu: cpu context
+     * @addr: faulting guest address
+     * @access_type: access was read/write/execute
+     * @maperr: true for invalid page, false for permission fault
+     * @ra: host pc for unwinding
+     *
+     * We are about to raise SIGSEGV with si_code set for @maperr,
+     * and si_addr set for @addr.  Record anything further needed
+     * for the signal ucontext_t.
+     *
+     * If the emulated kernel does not provide anything to the signal
+     * handler with anything besides the user context registers, and
+     * the siginfo_t, then this hook need do nothing and may be omitted.
+     * Otherwise, record the data and return; the caller will raise
+     * the signal, unwind the cpu state, and return to the main loop.
+     *
+     * If it is simpler to re-use the sysemu tlb_fill code, @ra is provided
+     * so that a "normal" cpu exception can be raised.  In this case,
+     * the signal must be raised by the architecture cpu_loop.
+     */
+    void (*record_sigsegv)(CPUState *cpu, vaddr addr,
+                           MMUAccessType access_type,
+                           bool maperr, uintptr_t ra);
+    /**
+     * record_sigbus:
+     * @cpu: cpu context
+     * @addr: misaligned guest address
+     * @access_type: access was read/write/execute
+     * @ra: host pc for unwinding
+     *
+     * We are about to raise SIGBUS with si_code BUS_ADRALN,
+     * and si_addr set for @addr.  Record anything further needed
+     * for the signal ucontext_t.
+     *
+     * If the emulated kernel does not provide the signal handler with
+     * anything besides the user context registers, and the siginfo_t,
+     * then this hook need do nothing and may be omitted.
+     * Otherwise, record the data and return; the caller will raise
+     * the signal, unwind the cpu state, and return to the main loop.
+     *
+     * If it is simpler to re-use the sysemu do_unaligned_access code,
+     * @ra is provided so that a "normal" cpu exception can be raised.
+     * In this case, the signal must be raised by the architecture cpu_loop.
+     */
+    void (*record_sigbus)(CPUState *cpu, vaddr addr,
+                          MMUAccessType access_type, uintptr_t ra);
 #endif /* CONFIG_SOFTMMU */
 #endif /* NEED_CPU_H */
 
diff --git a/include/hw/display/edid.h b/include/hw/display/edid.h
index 1f8fc9b3750..520f8ec2027 100644
--- a/include/hw/display/edid.h
+++ b/include/hw/display/edid.h
@@ -11,6 +11,7 @@ typedef struct qemu_edid_info {
     uint32_t    prefy;
     uint32_t    maxx;
     uint32_t    maxy;
+    uint32_t    refresh_rate;
 } qemu_edid_info;
 
 void qemu_edid_generate(uint8_t *edid, size_t size,
@@ -21,10 +22,11 @@ void qemu_edid_region_io(MemoryRegion *region, Object *owner,
 
 uint32_t qemu_edid_dpi_to_mm(uint32_t dpi, uint32_t res);
 
-#define DEFINE_EDID_PROPERTIES(_state, _edid_info)              \
-    DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0),    \
-    DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0),    \
-    DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0),     \
-    DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0)
+#define DEFINE_EDID_PROPERTIES(_state, _edid_info)                         \
+    DEFINE_PROP_UINT32("xres", _state, _edid_info.prefx, 0),               \
+    DEFINE_PROP_UINT32("yres", _state, _edid_info.prefy, 0),               \
+    DEFINE_PROP_UINT32("xmax", _state, _edid_info.maxx, 0),                \
+    DEFINE_PROP_UINT32("ymax", _state, _edid_info.maxy, 0),                \
+    DEFINE_PROP_UINT32("refresh_rate", _state, _edid_info.refresh_rate, 0)
 
 #endif /* EDID_H */
diff --git a/include/hw/display/macfb.h b/include/hw/display/macfb.h
index 80806b0306a..e52775aa215 100644
--- a/include/hw/display/macfb.h
+++ b/include/hw/display/macfb.h
@@ -14,9 +14,42 @@
 #define MACFB_H
 
 #include "exec/memory.h"
+#include "hw/irq.h"
 #include "ui/console.h"
+#include "qemu/timer.h"
 #include "qom/object.h"
 
+typedef enum  {
+    MACFB_DISPLAY_APPLE_21_COLOR = 0,
+    MACFB_DISPLAY_APPLE_PORTRAIT = 1,
+    MACFB_DISPLAY_APPLE_12_RGB = 2,
+    MACFB_DISPLAY_APPLE_2PAGE_MONO = 3,
+    MACFB_DISPLAY_NTSC_UNDERSCAN = 4,
+    MACFB_DISPLAY_NTSC_OVERSCAN = 5,
+    MACFB_DISPLAY_APPLE_12_MONO = 6,
+    MACFB_DISPLAY_APPLE_13_RGB = 7,
+    MACFB_DISPLAY_16_COLOR = 8,
+    MACFB_DISPLAY_PAL1_UNDERSCAN = 9,
+    MACFB_DISPLAY_PAL1_OVERSCAN = 10,
+    MACFB_DISPLAY_PAL2_UNDERSCAN = 11,
+    MACFB_DISPLAY_PAL2_OVERSCAN = 12,
+    MACFB_DISPLAY_VGA = 13,
+    MACFB_DISPLAY_SVGA = 14,
+} MacfbDisplayType;
+
+typedef struct MacFbMode {
+    uint8_t type;
+    uint8_t depth;
+    uint32_t mode_ctrl1;
+    uint32_t mode_ctrl2;
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+    uint32_t offset;
+} MacFbMode;
+
+#define MACFB_NUM_REGS      8
+
 typedef struct MacfbState {
     MemoryRegion mem_vram;
     MemoryRegion mem_ctrl;
@@ -28,6 +61,15 @@ typedef struct MacfbState {
     uint8_t color_palette[256 * 3];
     uint32_t width, height; /* in pixels */
     uint8_t depth;
+    uint8_t type;
+
+    uint32_t regs[MACFB_NUM_REGS];
+    MacFbMode *mode;
+
+    uint32_t irq_state;
+    uint32_t irq_mask;
+    QEMUTimer *vbl_timer;
+    qemu_irq irq;
 } MacfbState;
 
 #define TYPE_MACFB "sysbus-macfb"
@@ -46,6 +88,7 @@ struct MacfbNubusDeviceClass {
     DeviceClass parent_class;
 
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
 };
 
 
diff --git a/include/hw/display/milkymist_tmu2.h b/include/hw/display/milkymist_tmu2.h
deleted file mode 100644
index fdce9535a1e..00000000000
--- a/include/hw/display/milkymist_tmu2.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  QEMU model of the Milkymist texture mapping unit.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *  Copyright (c) 2010 Sebastien Bourdeauducq
- *                       <sebastien.bourdeauducq@lekernel.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Specification available at:
- *   http://milkymist.walle.cc/socdoc/tmu2.pdf
- *
- */
-
-#ifndef HW_DISPLAY_MILKYMIST_TMU2_H
-#define HW_DISPLAY_MILKYMIST_TMU2_H
-
-#include "exec/hwaddr.h"
-#include "hw/qdev-core.h"
-
-#if defined(CONFIG_X11) && defined(CONFIG_OPENGL)
-DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq);
-#else
-static inline DeviceState *milkymist_tmu2_create(hwaddr base, qemu_irq irq)
-{
-    return NULL;
-}
-#endif
-
-#endif /* HW_DISPLAY_MILKYMIST_TMU2_H */
diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h
index ca0003dbfd9..5f7825e0e36 100644
--- a/include/hw/display/vga.h
+++ b/include/hw/display/vga.h
@@ -11,6 +11,12 @@
 
 #include "exec/hwaddr.h"
 
+/*
+ * modules can reference this symbol to avoid being loaded
+ * into system emulators without vga support
+ */
+extern bool have_vga;
+
 enum vga_retrace_method {
     VGA_RETRACE_DUMB,
     VGA_RETRACE_PRECISE
diff --git a/include/hw/dma/xlnx-zdma.h b/include/hw/dma/xlnx-zdma.h
index 6602e7ffa72..efc75217d59 100644
--- a/include/hw/dma/xlnx-zdma.h
+++ b/include/hw/dma/xlnx-zdma.h
@@ -56,7 +56,7 @@ struct XlnxZDMA {
     MemoryRegion iomem;
     MemTxAttrs attr;
     MemoryRegion *dma_mr;
-    AddressSpace *dma_as;
+    AddressSpace dma_as;
     qemu_irq irq_zdma_ch_imr;
 
     struct {
diff --git a/include/hw/dma/xlnx_csu_dma.h b/include/hw/dma/xlnx_csu_dma.h
index 204d94c6737..9e9dc551e99 100644
--- a/include/hw/dma/xlnx_csu_dma.h
+++ b/include/hw/dma/xlnx_csu_dma.h
@@ -30,7 +30,7 @@ typedef struct XlnxCSUDMA {
     MemoryRegion iomem;
     MemTxAttrs attr;
     MemoryRegion *dma_mr;
-    AddressSpace *dma_as;
+    AddressSpace dma_as;
     qemu_irq irq;
     StreamSink *tx_dev; /* Used as generic StreamSink */
     ptimer_state *src_timer;
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 6ee458e7bc3..995de8495c2 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -312,25 +312,26 @@ static struct elf_note *glue(get_elf_note_type, SZ)(struct elf_note *nhdr,
     return nhdr;
 }
 
-static int glue(load_elf, SZ)(const char *name, int fd,
-                              uint64_t (*elf_note_fn)(void *, void *, bool),
-                              uint64_t (*translate_fn)(void *, uint64_t),
-                              void *translate_opaque,
-                              int must_swab, uint64_t *pentry,
-                              uint64_t *lowaddr, uint64_t *highaddr,
-                              uint32_t *pflags, int elf_machine,
-                              int clear_lsb, int data_swab,
-                              AddressSpace *as, bool load_rom,
-                              symbol_fn_t sym_cb)
+static ssize_t glue(load_elf, SZ)(const char *name, int fd,
+                                  uint64_t (*elf_note_fn)(void *, void *, bool),
+                                  uint64_t (*translate_fn)(void *, uint64_t),
+                                  void *translate_opaque,
+                                  int must_swab, uint64_t *pentry,
+                                  uint64_t *lowaddr, uint64_t *highaddr,
+                                  uint32_t *pflags, int elf_machine,
+                                  int clear_lsb, int data_swab,
+                                  AddressSpace *as, bool load_rom,
+                                  symbol_fn_t sym_cb)
 {
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
-    int size, i, total_size;
+    int size, i;
+    ssize_t total_size;
     elf_word mem_size, file_size, data_offset;
     uint64_t addr, low = (uint64_t)-1, high = 0;
     GMappedFile *mapped_file = NULL;
     uint8_t *data = NULL;
-    int ret = ELF_LOAD_FAILED;
+    ssize_t ret = ELF_LOAD_FAILED;
 
     if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
         goto fail;
@@ -368,14 +369,6 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 }
             }
             break;
-        case EM_MOXIE:
-            if (ehdr.e_machine != EM_MOXIE) {
-                if (ehdr.e_machine != EM_MOXIE_OLD) {
-                    ret = ELF_LOAD_WRONG_ARCH;
-                    goto fail;
-                }
-            }
-            break;
         case EM_MIPS:
         case EM_NANOMIPS:
             if ((ehdr.e_machine != EM_MIPS) &&
@@ -490,7 +483,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 }
             }
 
-            if (mem_size > INT_MAX - total_size) {
+            if (mem_size > SSIZE_MAX - total_size) {
                 ret = ELF_LOAD_TOO_BIG;
                 goto fail;
             }
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
index 02a0ced0a09..5a0dd0c8cff 100644
--- a/include/hw/firmware/smbios.h
+++ b/include/hw/firmware/smbios.h
@@ -258,6 +258,17 @@ struct smbios_type_32 {
     uint8_t boot_status;
 } QEMU_PACKED;
 
+/* SMBIOS type 41 - Onboard Devices Extended Information */
+struct smbios_type_41 {
+    struct smbios_structure_header header;
+    uint8_t reference_designation_str;
+    uint8_t device_type;
+    uint8_t device_type_instance;
+    uint16_t segment_group_number;
+    uint8_t bus_number;
+    uint8_t device_number;
+} QEMU_PACKED;
+
 /* SMBIOS type 127 -- End-of-table */
 struct smbios_type_127 {
     struct smbios_structure_header header;
@@ -273,5 +284,6 @@ void smbios_get_tables(MachineState *ms,
                        const struct smbios_phys_mem_area *mem_array,
                        const unsigned int mem_array_size,
                        uint8_t **tables, size_t *tables_len,
-                       uint8_t **anchor, size_t *anchor_len);
+                       uint8_t **anchor, size_t *anchor_len,
+                       Error **errp);
 #endif /* QEMU_SMBIOS_H */
diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h
index e1636ce7fea..801846befb3 100644
--- a/include/hw/gpio/aspeed_gpio.h
+++ b/include/hw/gpio/aspeed_gpio.h
@@ -17,9 +17,9 @@
 OBJECT_DECLARE_TYPE(AspeedGPIOState, AspeedGPIOClass, ASPEED_GPIO)
 
 #define ASPEED_GPIO_MAX_NR_SETS 8
+#define ASPEED_GPIOS_PER_SET 32
 #define ASPEED_REGS_PER_BANK 14
 #define ASPEED_GPIO_MAX_NR_REGS (ASPEED_REGS_PER_BANK * ASPEED_GPIO_MAX_NR_SETS)
-#define ASPEED_GPIO_NR_PINS 228
 #define ASPEED_GROUPS_PER_SET 4
 #define ASPEED_GPIO_NR_DEBOUNCE_REGS 3
 #define ASPEED_CHARS_PER_GROUP_LABEL 4
@@ -60,7 +60,6 @@ struct AspeedGPIOClass {
     const GPIOSetProperties *props;
     uint32_t nr_gpio_pins;
     uint32_t nr_gpio_sets;
-    uint32_t gap;
     const AspeedGPIOReg *reg_table;
 };
 
@@ -72,7 +71,7 @@ struct AspeedGPIOState {
     MemoryRegion iomem;
     int pending;
     qemu_irq irq;
-    qemu_irq gpios[ASPEED_GPIO_NR_PINS];
+    qemu_irq gpios[ASPEED_GPIO_MAX_NR_SETS][ASPEED_GPIOS_PER_SET];
 
 /* Parallel GPIO Registers */
     uint32_t debounce_regs[ASPEED_GPIO_NR_DEBOUNCE_REGS];
diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h
index 565f8330662..4b9be09274c 100644
--- a/include/hw/i2c/aspeed_i2c.h
+++ b/include/hw/i2c/aspeed_i2c.h
@@ -36,7 +36,11 @@ OBJECT_DECLARE_TYPE(AspeedI2CState, AspeedI2CClass, ASPEED_I2C)
 
 struct AspeedI2CState;
 
-typedef struct AspeedI2CBus {
+#define TYPE_ASPEED_I2C_BUS "aspeed.i2c.bus"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedI2CBus, ASPEED_I2C_BUS)
+struct AspeedI2CBus {
+    SysBusDevice parent_obj;
+
     struct AspeedI2CState *controller;
 
     MemoryRegion mr;
@@ -54,7 +58,7 @@ typedef struct AspeedI2CBus {
     uint32_t pool_ctrl;
     uint32_t dma_addr;
     uint32_t dma_len;
-} AspeedI2CBus;
+};
 
 struct AspeedI2CState {
     SysBusDevice parent_obj;
diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index 277dd9f2d6d..5ca3b708c0b 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -16,6 +16,7 @@ enum i2c_event {
     I2C_NACK /* Masker NACKed a receive byte.  */
 };
 
+typedef struct I2CNodeList I2CNodeList;
 
 #define TYPE_I2C_SLAVE "i2c-slave"
 OBJECT_DECLARE_TYPE(I2CSlave, I2CSlaveClass,
@@ -39,6 +40,16 @@ struct I2CSlaveClass {
      * return code is not used and should be zero.
      */
     int (*event)(I2CSlave *s, enum i2c_event event);
+
+    /*
+     * Check if this device matches the address provided.  Returns bool of
+     * true if it matches (or broadcast), and updates the device list, false
+     * otherwise.
+     *
+     * If broadcast is true, match should add the device and return true.
+     */
+    bool (*match_and_add)(I2CSlave *candidate, uint8_t address, bool broadcast,
+                          I2CNodeList *current_devs);
 };
 
 struct I2CSlave {
@@ -58,22 +69,58 @@ struct I2CNode {
     QLIST_ENTRY(I2CNode) next;
 };
 
+typedef QLIST_HEAD(I2CNodeList, I2CNode) I2CNodeList;
+
 struct I2CBus {
     BusState qbus;
-    QLIST_HEAD(, I2CNode) current_devs;
+    I2CNodeList current_devs;
     uint8_t saved_address;
     bool broadcast;
 };
 
 I2CBus *i2c_init_bus(DeviceState *parent, const char *name);
-void i2c_set_slave_address(I2CSlave *dev, uint8_t address);
 int i2c_bus_busy(I2CBus *bus);
-int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv);
+
+/**
+ * i2c_start_transfer: start a transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ * @is_recv: indicates the transfer direction
+ *
+ * When @is_recv is a known boolean constant, use the
+ * i2c_start_recv() or i2c_start_send() helper instead.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv);
+
+/**
+ * i2c_start_recv: start a 'receive' transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_recv(I2CBus *bus, uint8_t address);
+
+/**
+ * i2c_start_send: start a 'send' transfer on an I2C bus.
+ *
+ * @bus: #I2CBus to be used
+ * @address: address of the slave
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int i2c_start_send(I2CBus *bus, uint8_t address);
+
 void i2c_end_transfer(I2CBus *bus);
 void i2c_nack(I2CBus *bus);
-int i2c_send_recv(I2CBus *bus, uint8_t *data, bool send);
 int i2c_send(I2CBus *bus, uint8_t data);
 uint8_t i2c_recv(I2CBus *bus);
+bool i2c_scan_bus(I2CBus *bus, uint8_t address, bool broadcast,
+                  I2CNodeList *current_devs);
 
 /**
  * Create an I2C slave device on the heap.
@@ -127,8 +174,12 @@ I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr);
  */
 bool i2c_slave_realize_and_unref(I2CSlave *dev, I2CBus *bus, Error **errp);
 
-/* lm832x.c */
-void lm832x_key_event(DeviceState *dev, int key, int state);
+/**
+ * Set the I2C bus address of a slave device
+ * @dev: I2C slave device
+ * @address: I2C address of the slave when put on a bus
+ */
+void i2c_slave_set_address(I2CSlave *dev, uint8_t address);
 
 extern const VMStateDescription vmstate_i2c_slave;
 
diff --git a/include/hw/i2c/i2c_mux_pca954x.h b/include/hw/i2c/i2c_mux_pca954x.h
new file mode 100644
index 00000000000..8aaf9bbc394
--- /dev/null
+++ b/include/hw/i2c/i2c_mux_pca954x.h
@@ -0,0 +1,19 @@
+#ifndef QEMU_I2C_MUX_PCA954X
+#define QEMU_I2C_MUX_PCA954X
+
+#include "hw/i2c/i2c.h"
+
+#define TYPE_PCA9546 "pca9546"
+#define TYPE_PCA9548 "pca9548"
+
+/**
+ * Retrieves the i2c bus associated with the specified channel on this i2c
+ * mux.
+ * @mux: an i2c mux device.
+ * @channel: the i2c channel requested
+ *
+ * Returns: a pointer to the associated i2c bus.
+ */
+I2CBus *pca954x_i2c_get_bus(I2CSlave *mux, uint8_t channel);
+
+#endif
diff --git a/include/hw/i2c/pmbus_device.h b/include/hw/i2c/pmbus_device.h
new file mode 100644
index 00000000000..62bd38c83fb
--- /dev/null
+++ b/include/hw/i2c/pmbus_device.h
@@ -0,0 +1,517 @@
+/*
+ * QEMU PMBus device emulation
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_PMBUS_DEVICE_H
+#define HW_PMBUS_DEVICE_H
+
+#include "qemu/bitops.h"
+#include "hw/i2c/smbus_slave.h"
+
+enum pmbus_registers {
+    PMBUS_PAGE                      = 0x00, /* R/W byte */
+    PMBUS_OPERATION                 = 0x01, /* R/W byte */
+    PMBUS_ON_OFF_CONFIG             = 0x02, /* R/W byte */
+    PMBUS_CLEAR_FAULTS              = 0x03, /* Send Byte */
+    PMBUS_PHASE                     = 0x04, /* R/W byte */
+    PMBUS_PAGE_PLUS_WRITE           = 0x05, /* Block Write-only */
+    PMBUS_PAGE_PLUS_READ            = 0x06, /* Block Read-only */
+    PMBUS_WRITE_PROTECT             = 0x10, /* R/W byte */
+    PMBUS_STORE_DEFAULT_ALL         = 0x11, /* Send Byte */
+    PMBUS_RESTORE_DEFAULT_ALL       = 0x12, /* Send Byte */
+    PMBUS_STORE_DEFAULT_CODE        = 0x13, /* Write-only Byte */
+    PMBUS_RESTORE_DEFAULT_CODE      = 0x14, /* Write-only Byte */
+    PMBUS_STORE_USER_ALL            = 0x15, /* Send Byte */
+    PMBUS_RESTORE_USER_ALL          = 0x16, /* Send Byte */
+    PMBUS_STORE_USER_CODE           = 0x17, /* Write-only Byte */
+    PMBUS_RESTORE_USER_CODE         = 0x18, /* Write-only Byte */
+    PMBUS_CAPABILITY                = 0x19, /* Read-Only byte */
+    PMBUS_QUERY                     = 0x1A, /* Write-Only */
+    PMBUS_SMBALERT_MASK             = 0x1B, /* Block read, Word write */
+    PMBUS_VOUT_MODE                 = 0x20, /* R/W byte */
+    PMBUS_VOUT_COMMAND              = 0x21, /* R/W word */
+    PMBUS_VOUT_TRIM                 = 0x22, /* R/W word */
+    PMBUS_VOUT_CAL_OFFSET           = 0x23, /* R/W word */
+    PMBUS_VOUT_MAX                  = 0x24, /* R/W word */
+    PMBUS_VOUT_MARGIN_HIGH          = 0x25, /* R/W word */
+    PMBUS_VOUT_MARGIN_LOW           = 0x26, /* R/W word */
+    PMBUS_VOUT_TRANSITION_RATE      = 0x27, /* R/W word */
+    PMBUS_VOUT_DROOP                = 0x28, /* R/W word */
+    PMBUS_VOUT_SCALE_LOOP           = 0x29, /* R/W word */
+    PMBUS_VOUT_SCALE_MONITOR        = 0x2A, /* R/W word */
+    PMBUS_COEFFICIENTS              = 0x30, /* Read-only block 5 bytes */
+    PMBUS_POUT_MAX                  = 0x31, /* R/W word */
+    PMBUS_MAX_DUTY                  = 0x32, /* R/W word */
+    PMBUS_FREQUENCY_SWITCH          = 0x33, /* R/W word */
+    PMBUS_VIN_ON                    = 0x35, /* R/W word */
+    PMBUS_VIN_OFF                   = 0x36, /* R/W word */
+    PMBUS_INTERLEAVE                = 0x37, /* R/W word */
+    PMBUS_IOUT_CAL_GAIN             = 0x38, /* R/W word */
+    PMBUS_IOUT_CAL_OFFSET           = 0x39, /* R/W word */
+    PMBUS_FAN_CONFIG_1_2            = 0x3A, /* R/W byte */
+    PMBUS_FAN_COMMAND_1             = 0x3B, /* R/W word */
+    PMBUS_FAN_COMMAND_2             = 0x3C, /* R/W word */
+    PMBUS_FAN_CONFIG_3_4            = 0x3D, /* R/W byte */
+    PMBUS_FAN_COMMAND_3             = 0x3E, /* R/W word */
+    PMBUS_FAN_COMMAND_4             = 0x3F, /* R/W word */
+    PMBUS_VOUT_OV_FAULT_LIMIT       = 0x40, /* R/W word */
+    PMBUS_VOUT_OV_FAULT_RESPONSE    = 0x41, /* R/W byte */
+    PMBUS_VOUT_OV_WARN_LIMIT        = 0x42, /* R/W word */
+    PMBUS_VOUT_UV_WARN_LIMIT        = 0x43, /* R/W word */
+    PMBUS_VOUT_UV_FAULT_LIMIT       = 0x44, /* R/W word */
+    PMBUS_VOUT_UV_FAULT_RESPONSE    = 0x45, /* R/W byte */
+    PMBUS_IOUT_OC_FAULT_LIMIT       = 0x46, /* R/W word */
+    PMBUS_IOUT_OC_FAULT_RESPONSE    = 0x47, /* R/W byte */
+    PMBUS_IOUT_OC_LV_FAULT_LIMIT    = 0x48, /* R/W word */
+    PMBUS_IOUT_OC_LV_FAULT_RESPONSE = 0x49, /* R/W byte */
+    PMBUS_IOUT_OC_WARN_LIMIT        = 0x4A, /* R/W word */
+    PMBUS_IOUT_UC_FAULT_LIMIT       = 0x4B, /* R/W word */
+    PMBUS_IOUT_UC_FAULT_RESPONSE    = 0x4C, /* R/W byte */
+    PMBUS_OT_FAULT_LIMIT            = 0x4F, /* R/W word */
+    PMBUS_OT_FAULT_RESPONSE         = 0x50, /* R/W byte */
+    PMBUS_OT_WARN_LIMIT             = 0x51, /* R/W word */
+    PMBUS_UT_WARN_LIMIT             = 0x52, /* R/W word */
+    PMBUS_UT_FAULT_LIMIT            = 0x53, /* R/W word */
+    PMBUS_UT_FAULT_RESPONSE         = 0x54, /* R/W byte */
+    PMBUS_VIN_OV_FAULT_LIMIT        = 0x55, /* R/W word */
+    PMBUS_VIN_OV_FAULT_RESPONSE     = 0x56, /* R/W byte */
+    PMBUS_VIN_OV_WARN_LIMIT         = 0x57, /* R/W word */
+    PMBUS_VIN_UV_WARN_LIMIT         = 0x58, /* R/W word */
+    PMBUS_VIN_UV_FAULT_LIMIT        = 0x59, /* R/W word */
+    PMBUS_VIN_UV_FAULT_RESPONSE     = 0x5A, /* R/W byte */
+    PMBUS_IIN_OC_FAULT_LIMIT        = 0x5B, /* R/W word */
+    PMBUS_IIN_OC_FAULT_RESPONSE     = 0x5C, /* R/W byte */
+    PMBUS_IIN_OC_WARN_LIMIT         = 0x5D, /* R/W word */
+    PMBUS_POWER_GOOD_ON             = 0x5E, /* R/W word */
+    PMBUS_POWER_GOOD_OFF            = 0x5F, /* R/W word */
+    PMBUS_TON_DELAY                 = 0x60, /* R/W word */
+    PMBUS_TON_RISE                  = 0x61, /* R/W word */
+    PMBUS_TON_MAX_FAULT_LIMIT       = 0x62, /* R/W word */
+    PMBUS_TON_MAX_FAULT_RESPONSE    = 0x63, /* R/W byte */
+    PMBUS_TOFF_DELAY                = 0x64, /* R/W word */
+    PMBUS_TOFF_FALL                 = 0x65, /* R/W word */
+    PMBUS_TOFF_MAX_WARN_LIMIT       = 0x66, /* R/W word */
+    PMBUS_POUT_OP_FAULT_LIMIT       = 0x68, /* R/W word */
+    PMBUS_POUT_OP_FAULT_RESPONSE    = 0x69, /* R/W byte */
+    PMBUS_POUT_OP_WARN_LIMIT        = 0x6A, /* R/W word */
+    PMBUS_PIN_OP_WARN_LIMIT         = 0x6B, /* R/W word */
+    PMBUS_STATUS_BYTE               = 0x78, /* R/W byte */
+    PMBUS_STATUS_WORD               = 0x79, /* R/W word */
+    PMBUS_STATUS_VOUT               = 0x7A, /* R/W byte */
+    PMBUS_STATUS_IOUT               = 0x7B, /* R/W byte */
+    PMBUS_STATUS_INPUT              = 0x7C, /* R/W byte */
+    PMBUS_STATUS_TEMPERATURE        = 0x7D, /* R/W byte */
+    PMBUS_STATUS_CML                = 0x7E, /* R/W byte */
+    PMBUS_STATUS_OTHER              = 0x7F, /* R/W byte */
+    PMBUS_STATUS_MFR_SPECIFIC       = 0x80, /* R/W byte */
+    PMBUS_STATUS_FANS_1_2           = 0x81, /* R/W byte */
+    PMBUS_STATUS_FANS_3_4           = 0x82, /* R/W byte */
+    PMBUS_READ_EIN                  = 0x86, /* Read-Only block 5 bytes */
+    PMBUS_READ_EOUT                 = 0x87, /* Read-Only block 5 bytes */
+    PMBUS_READ_VIN                  = 0x88, /* Read-Only word */
+    PMBUS_READ_IIN                  = 0x89, /* Read-Only word */
+    PMBUS_READ_VCAP                 = 0x8A, /* Read-Only word */
+    PMBUS_READ_VOUT                 = 0x8B, /* Read-Only word */
+    PMBUS_READ_IOUT                 = 0x8C, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_1        = 0x8D, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_2        = 0x8E, /* Read-Only word */
+    PMBUS_READ_TEMPERATURE_3        = 0x8F, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_1          = 0x90, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_2          = 0x91, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_3          = 0x92, /* Read-Only word */
+    PMBUS_READ_FAN_SPEED_4          = 0x93, /* Read-Only word */
+    PMBUS_READ_DUTY_CYCLE           = 0x94, /* Read-Only word */
+    PMBUS_READ_FREQUENCY            = 0x95, /* Read-Only word */
+    PMBUS_READ_POUT                 = 0x96, /* Read-Only word */
+    PMBUS_READ_PIN                  = 0x97, /* Read-Only word */
+    PMBUS_REVISION                  = 0x98, /* Read-Only byte */
+    PMBUS_MFR_ID                    = 0x99, /* R/W block */
+    PMBUS_MFR_MODEL                 = 0x9A, /* R/W block */
+    PMBUS_MFR_REVISION              = 0x9B, /* R/W block */
+    PMBUS_MFR_LOCATION              = 0x9C, /* R/W block */
+    PMBUS_MFR_DATE                  = 0x9D, /* R/W block */
+    PMBUS_MFR_SERIAL                = 0x9E, /* R/W block */
+    PMBUS_APP_PROFILE_SUPPORT       = 0x9F, /* Read-Only block-read */
+    PMBUS_MFR_VIN_MIN               = 0xA0, /* Read-Only word */
+    PMBUS_MFR_VIN_MAX               = 0xA1, /* Read-Only word */
+    PMBUS_MFR_IIN_MAX               = 0xA2, /* Read-Only word */
+    PMBUS_MFR_PIN_MAX               = 0xA3, /* Read-Only word */
+    PMBUS_MFR_VOUT_MIN              = 0xA4, /* Read-Only word */
+    PMBUS_MFR_VOUT_MAX              = 0xA5, /* Read-Only word */
+    PMBUS_MFR_IOUT_MAX              = 0xA6, /* Read-Only word */
+    PMBUS_MFR_POUT_MAX              = 0xA7, /* Read-Only word */
+    PMBUS_MFR_TAMBIENT_MAX          = 0xA8, /* Read-Only word */
+    PMBUS_MFR_TAMBIENT_MIN          = 0xA9, /* Read-Only word */
+    PMBUS_MFR_EFFICIENCY_LL         = 0xAA, /* Read-Only block 14 bytes */
+    PMBUS_MFR_EFFICIENCY_HL         = 0xAB, /* Read-Only block 14 bytes */
+    PMBUS_MFR_PIN_ACCURACY          = 0xAC, /* Read-Only byte */
+    PMBUS_IC_DEVICE_ID              = 0xAD, /* Read-Only block-read */
+    PMBUS_IC_DEVICE_REV             = 0xAE, /* Read-Only block-read */
+    PMBUS_MFR_MAX_TEMP_1            = 0xC0, /* R/W word */
+    PMBUS_MFR_MAX_TEMP_2            = 0xC1, /* R/W word */
+    PMBUS_MFR_MAX_TEMP_3            = 0xC2, /* R/W word */
+};
+
+/* STATUS_WORD */
+#define PB_STATUS_VOUT           BIT(15)
+#define PB_STATUS_IOUT_POUT      BIT(14)
+#define PB_STATUS_INPUT          BIT(13)
+#define PB_STATUS_WORD_MFR       BIT(12)
+#define PB_STATUS_POWER_GOOD_N   BIT(11)
+#define PB_STATUS_FAN            BIT(10)
+#define PB_STATUS_OTHER          BIT(9)
+#define PB_STATUS_UNKNOWN        BIT(8)
+/* STATUS_BYTE */
+#define PB_STATUS_BUSY           BIT(7)
+#define PB_STATUS_OFF            BIT(6)
+#define PB_STATUS_VOUT_OV        BIT(5)
+#define PB_STATUS_IOUT_OC        BIT(4)
+#define PB_STATUS_VIN_UV         BIT(3)
+#define PB_STATUS_TEMPERATURE    BIT(2)
+#define PB_STATUS_CML            BIT(1)
+#define PB_STATUS_NONE_ABOVE     BIT(0)
+
+/* STATUS_VOUT */
+#define PB_STATUS_VOUT_OV_FAULT         BIT(7) /* Output Overvoltage Fault */
+#define PB_STATUS_VOUT_OV_WARN          BIT(6) /* Output Overvoltage Warning */
+#define PB_STATUS_VOUT_UV_WARN          BIT(5) /* Output Undervoltage Warning */
+#define PB_STATUS_VOUT_UV_FAULT         BIT(4) /* Output Undervoltage Fault */
+#define PB_STATUS_VOUT_MAX              BIT(3)
+#define PB_STATUS_VOUT_TON_MAX_FAULT    BIT(2)
+#define PB_STATUS_VOUT_TOFF_MAX_WARN    BIT(1)
+
+/* STATUS_IOUT */
+#define PB_STATUS_IOUT_OC_FAULT    BIT(7) /* Output Overcurrent Fault */
+#define PB_STATUS_IOUT_OC_LV_FAULT BIT(6) /* Output OC And Low Voltage Fault */
+#define PB_STATUS_IOUT_OC_WARN     BIT(5) /* Output Overcurrent Warning */
+#define PB_STATUS_IOUT_UC_FAULT    BIT(4) /* Output Undercurrent Fault */
+#define PB_STATUS_CURR_SHARE       BIT(3) /* Current Share Fault */
+#define PB_STATUS_PWR_LIM_MODE     BIT(2) /* In Power Limiting Mode */
+#define PB_STATUS_POUT_OP_FAULT    BIT(1) /* Output Overpower Fault */
+#define PB_STATUS_POUT_OP_WARN     BIT(0) /* Output Overpower Warning */
+
+/* STATUS_INPUT */
+#define PB_STATUS_INPUT_VIN_OV_FAULT    BIT(7) /* Input Overvoltage Fault */
+#define PB_STATUS_INPUT_VIN_OV_WARN     BIT(6) /* Input Overvoltage Warning */
+#define PB_STATUS_INPUT_VIN_UV_WARN     BIT(5) /* Input Undervoltage Warning */
+#define PB_STATUS_INPUT_VIN_UV_FAULT    BIT(4) /* Input Undervoltage Fault */
+#define PB_STATUS_INPUT_IIN_OC_FAULT    BIT(2) /* Input Overcurrent Fault */
+#define PB_STATUS_INPUT_IIN_OC_WARN     BIT(1) /* Input Overcurrent Warning */
+#define PB_STATUS_INPUT_PIN_OP_WARN     BIT(0) /* Input Overpower Warning */
+
+/* STATUS_TEMPERATURE */
+#define PB_STATUS_OT_FAULT              BIT(7) /* Overtemperature Fault */
+#define PB_STATUS_OT_WARN               BIT(6) /* Overtemperature Warning */
+#define PB_STATUS_UT_WARN               BIT(5) /* Undertemperature Warning */
+#define PB_STATUS_UT_FAULT              BIT(4) /* Undertemperature Fault */
+
+/* STATUS_CML */
+#define PB_CML_FAULT_INVALID_CMD     BIT(7) /* Invalid/Unsupported Command */
+#define PB_CML_FAULT_INVALID_DATA    BIT(6) /* Invalid/Unsupported Data  */
+#define PB_CML_FAULT_PEC             BIT(5) /* Packet Error Check Failed */
+#define PB_CML_FAULT_MEMORY          BIT(4) /* Memory Fault Detected */
+#define PB_CML_FAULT_PROCESSOR       BIT(3) /* Processor Fault Detected */
+#define PB_CML_FAULT_OTHER_COMM      BIT(1) /* Other communication fault */
+#define PB_CML_FAULT_OTHER_MEM_LOGIC BIT(0) /* Other Memory Or Logic Fault */
+
+/* OPERATION*/
+#define PB_OP_ON                BIT(7) /* PSU is switched on */
+#define PB_OP_MARGIN_HIGH       BIT(5) /* PSU vout is set to margin high */
+#define PB_OP_MARGIN_LOW        BIT(4) /* PSU vout is set to margin low */
+
+/* PAGES */
+#define PB_MAX_PAGES            0x1F
+#define PB_ALL_PAGES            0xFF
+
+#define TYPE_PMBUS_DEVICE "pmbus-device"
+OBJECT_DECLARE_TYPE(PMBusDevice, PMBusDeviceClass,
+                    PMBUS_DEVICE)
+
+/* flags */
+#define PB_HAS_COEFFICIENTS        BIT_ULL(9)
+#define PB_HAS_VIN                 BIT_ULL(10)
+#define PB_HAS_VOUT                BIT_ULL(11)
+#define PB_HAS_VOUT_MARGIN         BIT_ULL(12)
+#define PB_HAS_VIN_RATING          BIT_ULL(13)
+#define PB_HAS_VOUT_RATING         BIT_ULL(14)
+#define PB_HAS_VOUT_MODE           BIT_ULL(15)
+#define PB_HAS_IOUT                BIT_ULL(21)
+#define PB_HAS_IIN                 BIT_ULL(22)
+#define PB_HAS_IOUT_RATING         BIT_ULL(23)
+#define PB_HAS_IIN_RATING          BIT_ULL(24)
+#define PB_HAS_IOUT_GAIN           BIT_ULL(25)
+#define PB_HAS_POUT                BIT_ULL(30)
+#define PB_HAS_PIN                 BIT_ULL(31)
+#define PB_HAS_EIN                 BIT_ULL(32)
+#define PB_HAS_EOUT                BIT_ULL(33)
+#define PB_HAS_POUT_RATING         BIT_ULL(34)
+#define PB_HAS_PIN_RATING          BIT_ULL(35)
+#define PB_HAS_TEMPERATURE         BIT_ULL(40)
+#define PB_HAS_TEMP2               BIT_ULL(41)
+#define PB_HAS_TEMP3               BIT_ULL(42)
+#define PB_HAS_TEMP_RATING         BIT_ULL(43)
+#define PB_HAS_MFR_INFO            BIT_ULL(50)
+
+struct PMBusDeviceClass {
+    SMBusDeviceClass parent_class;
+    uint8_t device_num_pages;
+
+    /**
+     * Implement quick_cmd, receive byte, and write_data to support non-standard
+     * PMBus functionality
+     */
+    void (*quick_cmd)(PMBusDevice *dev, uint8_t read);
+    int (*write_data)(PMBusDevice *dev, const uint8_t *buf, uint8_t len);
+    uint8_t (*receive_byte)(PMBusDevice *dev);
+};
+
+/*
+ * According to the spec, each page may offer the full range of PMBus commands
+ * available for each output or non-PMBus device.
+ * Therefore, we can't assume that any registers will always be the same across
+ * all pages.
+ * The page 0xFF is intended for writes to all pages
+ */
+typedef struct PMBusPage {
+    uint64_t page_flags;
+
+    uint8_t page;                      /* R/W byte */
+    uint8_t operation;                 /* R/W byte */
+    uint8_t on_off_config;             /* R/W byte */
+    uint8_t write_protect;             /* R/W byte */
+    uint8_t phase;                     /* R/W byte */
+    uint8_t vout_mode;                 /* R/W byte */
+    uint16_t vout_command;             /* R/W word */
+    uint16_t vout_trim;                /* R/W word */
+    uint16_t vout_cal_offset;          /* R/W word */
+    uint16_t vout_max;                 /* R/W word */
+    uint16_t vout_margin_high;         /* R/W word */
+    uint16_t vout_margin_low;          /* R/W word */
+    uint16_t vout_transition_rate;     /* R/W word */
+    uint16_t vout_droop;               /* R/W word */
+    uint16_t vout_scale_loop;          /* R/W word */
+    uint16_t vout_scale_monitor;       /* R/W word */
+    uint8_t coefficients[5];           /* Read-only block 5 bytes */
+    uint16_t pout_max;                 /* R/W word */
+    uint16_t max_duty;                 /* R/W word */
+    uint16_t frequency_switch;         /* R/W word */
+    uint16_t vin_on;                   /* R/W word */
+    uint16_t vin_off;                  /* R/W word */
+    uint16_t iout_cal_gain;            /* R/W word */
+    uint16_t iout_cal_offset;          /* R/W word */
+    uint8_t fan_config_1_2;            /* R/W byte */
+    uint16_t fan_command_1;            /* R/W word */
+    uint16_t fan_command_2;            /* R/W word */
+    uint8_t fan_config_3_4;            /* R/W byte */
+    uint16_t fan_command_3;            /* R/W word */
+    uint16_t fan_command_4;            /* R/W word */
+    uint16_t vout_ov_fault_limit;      /* R/W word */
+    uint8_t vout_ov_fault_response;    /* R/W byte */
+    uint16_t vout_ov_warn_limit;       /* R/W word */
+    uint16_t vout_uv_warn_limit;       /* R/W word */
+    uint16_t vout_uv_fault_limit;      /* R/W word */
+    uint8_t vout_uv_fault_response;    /* R/W byte */
+    uint16_t iout_oc_fault_limit;      /* R/W word */
+    uint8_t iout_oc_fault_response;    /* R/W byte */
+    uint16_t iout_oc_lv_fault_limit;   /* R/W word */
+    uint8_t iout_oc_lv_fault_response; /* R/W byte */
+    uint16_t iout_oc_warn_limit;       /* R/W word */
+    uint16_t iout_uc_fault_limit;      /* R/W word */
+    uint8_t iout_uc_fault_response;    /* R/W byte */
+    uint16_t ot_fault_limit;           /* R/W word */
+    uint8_t ot_fault_response;         /* R/W byte */
+    uint16_t ot_warn_limit;            /* R/W word */
+    uint16_t ut_warn_limit;            /* R/W word */
+    uint16_t ut_fault_limit;           /* R/W word */
+    uint8_t ut_fault_response;         /* R/W byte */
+    uint16_t vin_ov_fault_limit;       /* R/W word */
+    uint8_t vin_ov_fault_response;     /* R/W byte */
+    uint16_t vin_ov_warn_limit;        /* R/W word */
+    uint16_t vin_uv_warn_limit;        /* R/W word */
+    uint16_t vin_uv_fault_limit;       /* R/W word */
+    uint8_t vin_uv_fault_response;     /* R/W byte */
+    uint16_t iin_oc_fault_limit;       /* R/W word */
+    uint8_t iin_oc_fault_response;     /* R/W byte */
+    uint16_t iin_oc_warn_limit;        /* R/W word */
+    uint16_t power_good_on;            /* R/W word */
+    uint16_t power_good_off;           /* R/W word */
+    uint16_t ton_delay;                /* R/W word */
+    uint16_t ton_rise;                 /* R/W word */
+    uint16_t ton_max_fault_limit;      /* R/W word */
+    uint8_t ton_max_fault_response;    /* R/W byte */
+    uint16_t toff_delay;               /* R/W word */
+    uint16_t toff_fall;                /* R/W word */
+    uint16_t toff_max_warn_limit;      /* R/W word */
+    uint16_t pout_op_fault_limit;      /* R/W word */
+    uint8_t pout_op_fault_response;    /* R/W byte */
+    uint16_t pout_op_warn_limit;       /* R/W word */
+    uint16_t pin_op_warn_limit;        /* R/W word */
+    uint16_t status_word;              /* R/W word */
+    uint8_t status_vout;               /* R/W byte */
+    uint8_t status_iout;               /* R/W byte */
+    uint8_t status_input;              /* R/W byte */
+    uint8_t status_temperature;        /* R/W byte */
+    uint8_t status_cml;                /* R/W byte */
+    uint8_t status_other;              /* R/W byte */
+    uint8_t status_mfr_specific;       /* R/W byte */
+    uint8_t status_fans_1_2;           /* R/W byte */
+    uint8_t status_fans_3_4;           /* R/W byte */
+    uint8_t read_ein[5];               /* Read-Only block 5 bytes */
+    uint8_t read_eout[5];              /* Read-Only block 5 bytes */
+    uint16_t read_vin;                 /* Read-Only word */
+    uint16_t read_iin;                 /* Read-Only word */
+    uint16_t read_vcap;                /* Read-Only word */
+    uint16_t read_vout;                /* Read-Only word */
+    uint16_t read_iout;                /* Read-Only word */
+    uint16_t read_temperature_1;       /* Read-Only word */
+    uint16_t read_temperature_2;       /* Read-Only word */
+    uint16_t read_temperature_3;       /* Read-Only word */
+    uint16_t read_fan_speed_1;         /* Read-Only word */
+    uint16_t read_fan_speed_2;         /* Read-Only word */
+    uint16_t read_fan_speed_3;         /* Read-Only word */
+    uint16_t read_fan_speed_4;         /* Read-Only word */
+    uint16_t read_duty_cycle;          /* Read-Only word */
+    uint16_t read_frequency;           /* Read-Only word */
+    uint16_t read_pout;                /* Read-Only word */
+    uint16_t read_pin;                 /* Read-Only word */
+    uint8_t revision;                  /* Read-Only byte */
+    const char *mfr_id;                /* R/W block */
+    const char *mfr_model;             /* R/W block */
+    const char *mfr_revision;          /* R/W block */
+    const char *mfr_location;          /* R/W block */
+    const char *mfr_date;              /* R/W block */
+    const char *mfr_serial;            /* R/W block */
+    const char *app_profile_support;   /* Read-Only block-read */
+    uint16_t mfr_vin_min;              /* Read-Only word */
+    uint16_t mfr_vin_max;              /* Read-Only word */
+    uint16_t mfr_iin_max;              /* Read-Only word */
+    uint16_t mfr_pin_max;              /* Read-Only word */
+    uint16_t mfr_vout_min;             /* Read-Only word */
+    uint16_t mfr_vout_max;             /* Read-Only word */
+    uint16_t mfr_iout_max;             /* Read-Only word */
+    uint16_t mfr_pout_max;             /* Read-Only word */
+    uint16_t mfr_tambient_max;         /* Read-Only word */
+    uint16_t mfr_tambient_min;         /* Read-Only word */
+    uint8_t mfr_efficiency_ll[14];     /* Read-Only block 14 bytes */
+    uint8_t mfr_efficiency_hl[14];     /* Read-Only block 14 bytes */
+    uint8_t mfr_pin_accuracy;          /* Read-Only byte */
+    uint16_t mfr_max_temp_1;           /* R/W word */
+    uint16_t mfr_max_temp_2;           /* R/W word */
+    uint16_t mfr_max_temp_3;           /* R/W word */
+} PMBusPage;
+
+/* State */
+struct PMBusDevice {
+    SMBusDevice smb;
+
+    uint8_t num_pages;
+    uint8_t code;
+    uint8_t page;
+
+    /*
+     * PMBus registers are stored in a PMBusPage structure allocated by
+     * calling pmbus_pages_alloc()
+     */
+    PMBusPage *pages;
+    uint8_t capability;
+
+
+    int32_t in_buf_len;
+    uint8_t *in_buf;
+    int32_t out_buf_len;
+    uint8_t out_buf[SMBUS_DATA_MAX_LEN];
+};
+
+/**
+ * Direct mode coefficients
+ * @var m - mantissa
+ * @var b - offset
+ * @var R - exponent
+ */
+typedef struct PMBusCoefficients {
+    int32_t m;     /* mantissa */
+    int64_t b;     /* offset */
+    int32_t R;     /* exponent */
+} PMBusCoefficients;
+
+/**
+ * Convert sensor values to direct mode format
+ *
+ * Y = (m * x - b) * 10^R
+ *
+ * @return uint32_t
+ */
+uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value);
+
+/**
+ * Convert direct mode formatted data into sensor reading
+ *
+ * X = (Y * 10^-R - b) / m
+ *
+ * @return uint32_t
+ */
+uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value);
+
+/**
+ * @brief Send a block of data over PMBus
+ * Assumes that the bytes in the block are already ordered correctly,
+ * also assumes the length has been prepended to the block if necessary
+ *     | low_byte | ... | high_byte |
+ * @param state - maintains state of the PMBus device
+ * @param data - byte array to be sent by device
+ * @param len - number
+ */
+void pmbus_send(PMBusDevice *state, const uint8_t *data, uint16_t len);
+void pmbus_send8(PMBusDevice *state, uint8_t data);
+void pmbus_send16(PMBusDevice *state, uint16_t data);
+void pmbus_send32(PMBusDevice *state, uint32_t data);
+void pmbus_send64(PMBusDevice *state, uint64_t data);
+
+/**
+ * @brief Send a string over PMBus with length prepended.
+ * Length is calculated using str_len()
+ */
+void pmbus_send_string(PMBusDevice *state, const char *data);
+
+/**
+ * @brief Receive data over PMBus
+ * These methods help track how much data is being received over PMBus
+ * Log to GUEST_ERROR if too much or too little is sent.
+ */
+uint8_t pmbus_receive8(PMBusDevice *pmdev);
+uint16_t pmbus_receive16(PMBusDevice *pmdev);
+uint32_t pmbus_receive32(PMBusDevice *pmdev);
+uint64_t pmbus_receive64(PMBusDevice *pmdev);
+
+/**
+ * PMBus page config must be called before any page is first used.
+ * It will allocate memory for all the pages if needed.
+ * Passed in flags overwrite existing flags if any.
+ * @param page_index the page to which the flags are applied, setting page_index
+ * to 0xFF applies the passed in flags to all pages.
+ * @param flags
+ */
+int pmbus_page_config(PMBusDevice *pmdev, uint8_t page_index, uint64_t flags);
+
+/**
+ * Update the status registers when sensor values change.
+ * Useful if modifying sensors through qmp, this way status registers get
+ * updated
+ */
+void pmbus_check_limits(PMBusDevice *pmdev);
+
+extern const VMStateDescription vmstate_pmbus_device;
+
+#define VMSTATE_PMBUS_DEVICE(_field, _state) {                       \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(PMBusDevice),                               \
+    .vmsd       = &vmstate_pmbus_device,                             \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, PMBusDevice), \
+}
+
+#endif
diff --git a/include/hw/i386/hostmem-epc.h b/include/hw/i386/hostmem-epc.h
new file mode 100644
index 00000000000..846c7260854
--- /dev/null
+++ b/include/hw/i386/hostmem-epc.h
@@ -0,0 +1,28 @@
+/*
+ * SGX EPC backend
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_HOSTMEM_EPC_H
+#define QEMU_HOSTMEM_EPC_H
+
+#include "sysemu/hostmem.h"
+
+#define TYPE_MEMORY_BACKEND_EPC "memory-backend-epc"
+
+#define MEMORY_BACKEND_EPC(obj)                                        \
+    OBJECT_CHECK(HostMemoryBackendEpc, (obj), TYPE_MEMORY_BACKEND_EPC)
+
+typedef struct HostMemoryBackendEpc HostMemoryBackendEpc;
+
+struct HostMemoryBackendEpc {
+    HostMemoryBackend parent_obj;
+};
+
+#endif
diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
index f25f8374413..4d9c732d4b2 100644
--- a/include/hw/i386/microvm.h
+++ b/include/hw/i386/microvm.h
@@ -104,6 +104,10 @@ struct MicrovmMachineState {
     Notifier machine_done;
     Notifier powerdown_req;
     struct GPEXConfig gpex;
+
+    /* device tree */
+    void *fdt;
+    uint32_t ioapic_phandle[2];
 };
 
 #define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index dcf060b7918..9ab39e428f8 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -12,6 +12,7 @@
 #include "hw/acpi/acpi_dev_interface.h"
 #include "hw/hotplug.h"
 #include "qom/object.h"
+#include "hw/i386/sgx-epc.h"
 
 #define HPET_INTCAP "hpet-intcap"
 
@@ -19,7 +20,6 @@
  * PCMachineState:
  * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
  * @boot_cpus: number of present VCPUs
- * @smp_dies: number of dies per one package
  */
 typedef struct PCMachineState {
     /*< private >*/
@@ -35,6 +35,7 @@ typedef struct PCMachineState {
     I2CBus *smbus;
     PFlashCFI01 *flash[2];
     ISADevice *pcspk;
+    DeviceState *iommu;
 
     /* Configuration options: */
     uint64_t max_ram_below_4g;
@@ -45,14 +46,13 @@ typedef struct PCMachineState {
     bool sata_enabled;
     bool pit_enabled;
     bool hpet_enabled;
+    bool default_bus_bypass_iommu;
     uint64_t max_fw_size;
 
-    /* NUMA information: */
-    uint64_t numa_nodes;
-    uint64_t *node_mem;
-
     /* ACPI Memory hotplug IO base address */
     hwaddr memhp_io_base;
+
+    SGXEPCState sgx_epc;
 } PCMachineState;
 
 #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
@@ -117,9 +117,6 @@ struct PCMachineClass {
     /* generate legacy CPU hotplug AML */
     bool legacy_cpu_hotplug;
 
-    /* use DMA capable linuxboot option rom */
-    bool linuxboot_dma_enabled;
-
     /* use PVH to load kernels that support this feature */
     bool pvh_enabled;
 
@@ -139,8 +136,6 @@ extern int fd_bootchk;
 
 void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
 
-void pc_smp_parse(MachineState *ms, QemuOpts *opts);
-
 void pc_guest_info_init(PCMachineState *pcms);
 
 #define PCI_HOST_PROP_PCI_HOLE_START   "pci-hole-start"
@@ -191,11 +186,21 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms);
 void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
 bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
                                int *data_len);
+void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, size_t flash_size);
 
-
-/* acpi-build.c */
+/* hw/i386/acpi-common.c */
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
-                       const CPUArchIdList *apic_ids, GArray *entry);
+                       const CPUArchIdList *apic_ids, GArray *entry,
+                       bool force_enabled);
+
+/* sgx.c */
+void pc_machine_init_sgx_epc(PCMachineState *pcms);
+
+extern GlobalProperty pc_compat_6_1[];
+extern const size_t pc_compat_6_1_len;
+
+extern GlobalProperty pc_compat_6_0[];
+extern const size_t pc_compat_6_0_len;
 
 extern GlobalProperty pc_compat_5_2[];
 extern const size_t pc_compat_5_2_len;
diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h
new file mode 100644
index 00000000000..a6a65be854f
--- /dev/null
+++ b/include/hw/i386/sgx-epc.h
@@ -0,0 +1,67 @@
+/*
+ * SGX EPC device
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Authors:
+ *   Sean Christopherson <sean.j.christopherson@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_SGX_EPC_H
+#define QEMU_SGX_EPC_H
+
+#include "hw/i386/hostmem-epc.h"
+
+#define TYPE_SGX_EPC "sgx-epc"
+#define SGX_EPC(obj) \
+    OBJECT_CHECK(SGXEPCDevice, (obj), TYPE_SGX_EPC)
+#define SGX_EPC_CLASS(oc) \
+    OBJECT_CLASS_CHECK(SGXEPCDeviceClass, (oc), TYPE_SGX_EPC)
+#define SGX_EPC_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(SGXEPCDeviceClass, (obj), TYPE_SGX_EPC)
+
+#define SGX_EPC_ADDR_PROP "addr"
+#define SGX_EPC_SIZE_PROP "size"
+#define SGX_EPC_MEMDEV_PROP "memdev"
+
+/**
+ * SGXEPCDevice:
+ * @addr: starting guest physical address, where @SGXEPCDevice is mapped.
+ *         Default value: 0, means that address is auto-allocated.
+ * @hostmem: host memory backend providing memory for @SGXEPCDevice
+ */
+typedef struct SGXEPCDevice {
+    /* private */
+    DeviceState parent_obj;
+
+    /* public */
+    uint64_t addr;
+    HostMemoryBackendEpc *hostmem;
+} SGXEPCDevice;
+
+/*
+ * @base: address in guest physical address space where EPC regions start
+ * @mr: address space container for memory devices
+ */
+typedef struct SGXEPCState {
+    uint64_t base;
+    uint64_t size;
+
+    MemoryRegion mr;
+
+    struct SGXEPCDevice **sections;
+    int nr_sections;
+} SGXEPCState;
+
+bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size);
+
+static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc)
+{
+    assert(sgx_epc != NULL && sgx_epc->base >= 0x100000000ULL);
+
+    return sgx_epc->base + sgx_epc->size;
+}
+
+#endif
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 9de92d33a11..5ba0c056d60 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -33,12 +33,6 @@ OBJECT_DECLARE_TYPE(X86IOMMUState, X86IOMMUClass, X86_IOMMU_DEVICE)
 typedef struct X86IOMMUIrq X86IOMMUIrq;
 typedef struct X86IOMMU_MSIMessage X86IOMMU_MSIMessage;
 
-typedef enum IommuType {
-    TYPE_INTEL,
-    TYPE_AMD,
-    TYPE_NONE
-} IommuType;
-
 struct X86IOMMUClass {
     SysBusDeviceClass parent;
     /* Intel/AMD specific realize() hook */
@@ -71,7 +65,6 @@ struct X86IOMMUState {
     OnOffAuto intr_supported;   /* Whether vIOMMU supports IR */
     bool dt_supported;          /* Whether vIOMMU supports DT */
     bool pt_supported;          /* Whether vIOMMU supports pass-through */
-    IommuType type;             /* IOMMU type - AMD/Intel     */
     QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
 
@@ -140,11 +133,6 @@ struct X86IOMMU_MSIMessage {
  */
 X86IOMMUState *x86_iommu_get_default(void);
 
-/*
- * x86_iommu_get_type - get IOMMU type
- */
-IommuType x86_iommu_get_type(void);
-
 /**
  * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry
  *                                   Cache) notifiers
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index c09b648dff2..bb1cfb88966 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -38,6 +38,8 @@ struct X86MachineClass {
     bool save_tsc_khz;
     /* Enables contiguous-apic-ID mode */
     bool compat_apic_id_mode;
+    /* use DMA capable linuxboot option rom */
+    bool fwcfg_dma_enabled;
 };
 
 struct X86MachineState {
@@ -62,7 +64,7 @@ struct X86MachineState {
     unsigned pci_irq_mask;
     unsigned apic_id_limit;
     uint16_t boot_cpus;
-    unsigned smp_dies;
+    SgxEPCList *sgx_epc_list;
 
     OnOffAuto smm;
     OnOffAuto acpi;
@@ -74,12 +76,20 @@ struct X86MachineState {
      * will be translated to MSI messages in the address space.
      */
     AddressSpace *ioapic_as;
+
+    /*
+     * Ratelimit enforced on detected bus locks in guest.
+     * The default value of the bus_lock_ratelimit is 0 per second,
+     * which means no limitation on the guest's bus locks.
+     */
+    uint64_t bus_lock_ratelimit;
 };
 
 #define X86_MACHINE_SMM              "smm"
 #define X86_MACHINE_ACPI             "acpi"
 #define X86_MACHINE_OEM_ID           "x-oem-id"
 #define X86_MACHINE_OEM_TABLE_ID     "x-oem-table-id"
+#define X86_MACHINE_BUS_LOCK_RATELIMIT  "bus-lock-ratelimit"
 
 #define TYPE_X86_MACHINE   MACHINE_TYPE_NAME("x86")
 OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE)
@@ -112,8 +122,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
 void x86_load_linux(X86MachineState *x86ms,
                     FWCfgState *fw_cfg,
                     int acpi_data_size,
-                    bool pvh_enabled,
-                    bool linuxboot_dma_enabled);
+                    bool pvh_enabled);
 
 bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
 bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
index 2d09162eeb7..97e7e59dc58 100644
--- a/include/hw/ide/internal.h
+++ b/include/hw/ide/internal.h
@@ -624,7 +624,7 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind,
                    int chs_trans, Error **errp);
 void ide_init2(IDEBus *bus, qemu_irq irq);
 void ide_exit(IDEState *s);
-void ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2);
+int ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2);
 void ide_register_restart_cb(IDEBus *bus);
 
 void ide_exec_cmd(IDEBus *bus, uint32_t val);
@@ -648,8 +648,8 @@ void ide_atapi_cmd(IDEState *s);
 void ide_atapi_cmd_reply_end(IDEState *s);
 
 /* hw/ide/qdev.c */
-void ide_bus_new(IDEBus *idebus, size_t idebus_size, DeviceState *dev,
-                 int bus_id, int max_units);
+void ide_bus_init(IDEBus *idebus, size_t idebus_size, DeviceState *dev,
+                  int bus_id, int max_units);
 IDEDevice *ide_create_drive(IDEBus *bus, int unit, DriveInfo *drive);
 
 int ide_handle_rw_error(IDEState *s, int error, int op);
diff --git a/include/hw/input/lm832x.h b/include/hw/input/lm832x.h
new file mode 100644
index 00000000000..2a58ccf8916
--- /dev/null
+++ b/include/hw/input/lm832x.h
@@ -0,0 +1,28 @@
+/*
+ * National Semiconductor LM8322/8323 GPIO keyboard & PWM chips.
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Written by Andrzej Zaborowski <andrew@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_INPUT_LM832X
+#define HW_INPUT_LM832X
+
+#define TYPE_LM8323 "lm8323"
+
+void lm832x_key_event(DeviceState *dev, int key, int state);
+
+#endif
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 91491a2f664..fc38e4b7dca 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -36,6 +36,8 @@
 #define GICV3_MAXIRQ 1020
 #define GICV3_MAXSPI (GICV3_MAXIRQ - GIC_INTERNAL)
 
+#define GICV3_LPI_INTID_START 8192
+
 #define GICV3_REDIST_SIZE 0x20000
 
 /* Number of SGI target-list bits */
@@ -202,23 +204,41 @@ struct GICv3CPUState {
      * real state above; it doesn't need to be migrated.
      */
     PendingIrq hppi;
+
+    /*
+     * Cached information recalculated from LPI tables
+     * in guest memory
+     */
+    PendingIrq hpplpi;
+
     /* This is temporary working state, to avoid a malloc in gicv3_update() */
     bool seenbetter;
 };
 
+/*
+ * The redistributor pages might be split into more than one region
+ * on some machine types if there are many CPUs.
+ */
+typedef struct GICv3RedistRegion {
+    GICv3State *gic;
+    MemoryRegion iomem;
+    uint32_t cpuidx; /* index of first CPU this region covers */
+} GICv3RedistRegion;
+
 struct GICv3State {
     /*< private >*/
     SysBusDevice parent_obj;
     /*< public >*/
 
     MemoryRegion iomem_dist; /* Distributor */
-    MemoryRegion *iomem_redist; /* Redistributor Regions */
+    GICv3RedistRegion *redist_regions; /* Redistributor Regions */
     uint32_t *redist_region_count; /* redistributor count within each region */
     uint32_t nb_redist_regions; /* number of redist regions */
 
     uint32_t num_cpu;
     uint32_t num_irq;
     uint32_t revision;
+    bool lpi_enable;
     bool security_extn;
     bool irq_reset_nonsecure;
     bool gicd_no_migration_shift_bug;
@@ -226,6 +246,9 @@ struct GICv3State {
     int dev_fd; /* kvm device fd if backed by kvm vgic support */
     Error *migration_blocker;
 
+    MemoryRegion *dma;
+    AddressSpace dma_as;
+
     /* Distributor */
 
     /* for a GIC with the security extensions the NS banked version of this
@@ -293,6 +316,6 @@ struct ARMGICv3CommonClass {
 };
 
 void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler,
-                              const MemoryRegionOps *ops, Error **errp);
+                              const MemoryRegionOps *ops);
 
 #endif
diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
index 5a0952b4049..4e79145dde3 100644
--- a/include/hw/intc/arm_gicv3_its_common.h
+++ b/include/hw/intc/arm_gicv3_its_common.h
@@ -25,17 +25,41 @@
 #include "hw/intc/arm_gicv3_common.h"
 #include "qom/object.h"
 
+#define TYPE_ARM_GICV3_ITS "arm-gicv3-its"
+
 #define ITS_CONTROL_SIZE 0x10000
 #define ITS_TRANS_SIZE   0x10000
 #define ITS_SIZE         (ITS_CONTROL_SIZE + ITS_TRANS_SIZE)
 
 #define GITS_CTLR        0x0
 #define GITS_IIDR        0x4
+#define GITS_TYPER       0x8
 #define GITS_CBASER      0x80
 #define GITS_CWRITER     0x88
 #define GITS_CREADR      0x90
 #define GITS_BASER       0x100
 
+#define GITS_TRANSLATER  0x0040
+
+typedef struct {
+    bool valid;
+    bool indirect;
+    uint16_t entry_sz;
+    uint32_t page_sz;
+    uint32_t max_entries;
+    union {
+        uint32_t max_devids;
+        uint32_t max_collids;
+    } maxids;
+    uint64_t base_addr;
+} TableDesc;
+
+typedef struct {
+    bool valid;
+    uint32_t max_entries;
+    uint64_t base_addr;
+} CmdQDesc;
+
 struct GICv3ITSState {
     SysBusDevice parent_obj;
 
@@ -52,17 +76,23 @@ struct GICv3ITSState {
     /* Registers */
     uint32_t ctlr;
     uint32_t iidr;
+    uint64_t typer;
     uint64_t cbaser;
     uint64_t cwriter;
     uint64_t creadr;
     uint64_t baser[8];
 
+    TableDesc  dt;
+    TableDesc  ct;
+    CmdQDesc   cq;
+
     Error *migration_blocker;
 };
 
 typedef struct GICv3ITSState GICv3ITSState;
 
-void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops);
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops,
+                   const MemoryRegionOps *tops);
 
 #define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common"
 typedef struct GICv3ITSCommonClass GICv3ITSCommonClass;
diff --git a/include/hw/intc/armv7m_nvic.h b/include/hw/intc/armv7m_nvic.h
index 39c71e15936..0180c7b0ca1 100644
--- a/include/hw/intc/armv7m_nvic.h
+++ b/include/hw/intc/armv7m_nvic.h
@@ -80,18 +80,10 @@ struct NVICState {
     int vectpending_prio; /* group prio of the exeception in vectpending */
 
     MemoryRegion sysregmem;
-    MemoryRegion sysreg_ns_mem;
-    MemoryRegion systickmem;
-    MemoryRegion systick_ns_mem;
-    MemoryRegion ras_mem;
-    MemoryRegion container;
-    MemoryRegion defaultmem;
 
     uint32_t num_irq;
     qemu_irq excpout;
     qemu_irq sysresetreq;
-
-    SysTickState systick[M_REG_NUM_BANKS];
 };
 
 #endif
diff --git a/include/hw/intc/goldfish_pic.h b/include/hw/intc/goldfish_pic.h
index ad13ab37fc3..e9d552f7968 100644
--- a/include/hw/intc/goldfish_pic.h
+++ b/include/hw/intc/goldfish_pic.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Goldfish PIC
  *
diff --git a/include/hw/intc/ibex_plic.h b/include/hw/intc/ibex_plic.h
index 7fc495db992..d596436e064 100644
--- a/include/hw/intc/ibex_plic.h
+++ b/include/hw/intc/ibex_plic.h
@@ -60,6 +60,8 @@ struct IbexPlicState {
     uint32_t threshold_base;
 
     uint32_t claim_base;
+
+    qemu_irq *external_irqs;
 };
 
 #endif /* HW_IBEX_PLIC_H */
diff --git a/include/hw/intc/m68k_irqc.h b/include/hw/intc/m68k_irqc.h
index dbcfcfc2e00..ef91f218122 100644
--- a/include/hw/intc/m68k_irqc.h
+++ b/include/hw/intc/m68k_irqc.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * QEMU Motorola 680x0 IRQ Controller
  *
diff --git a/include/hw/intc/riscv_aclint.h b/include/hw/intc/riscv_aclint.h
new file mode 100644
index 00000000000..229bd08d254
--- /dev/null
+++ b/include/hw/intc/riscv_aclint.h
@@ -0,0 +1,80 @@
+/*
+ * RISC-V ACLINT (Advanced Core Local Interruptor) interface
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2017 SiFive, Inc.
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_ACLINT_H
+#define HW_RISCV_ACLINT_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_RISCV_ACLINT_MTIMER "riscv.aclint.mtimer"
+
+#define RISCV_ACLINT_MTIMER(obj) \
+    OBJECT_CHECK(RISCVAclintMTimerState, (obj), TYPE_RISCV_ACLINT_MTIMER)
+
+typedef struct RISCVAclintMTimerState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hartid_base;
+    uint32_t num_harts;
+    uint32_t timecmp_base;
+    uint32_t time_base;
+    uint32_t aperture_size;
+    uint32_t timebase_freq;
+    qemu_irq *timer_irqs;
+} RISCVAclintMTimerState;
+
+DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size,
+    uint32_t hartid_base, uint32_t num_harts,
+    uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq,
+    bool provide_rdtime);
+
+#define TYPE_RISCV_ACLINT_SWI "riscv.aclint.swi"
+
+#define RISCV_ACLINT_SWI(obj) \
+    OBJECT_CHECK(RISCVAclintSwiState, (obj), TYPE_RISCV_ACLINT_SWI)
+
+typedef struct RISCVAclintSwiState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion mmio;
+    uint32_t hartid_base;
+    uint32_t num_harts;
+    uint32_t sswi;
+    qemu_irq *soft_irqs;
+} RISCVAclintSwiState;
+
+DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base,
+    uint32_t num_harts, bool sswi);
+
+enum {
+    RISCV_ACLINT_DEFAULT_MTIMECMP      = 0x0,
+    RISCV_ACLINT_DEFAULT_MTIME         = 0x7ff8,
+    RISCV_ACLINT_DEFAULT_MTIMER_SIZE   = 0x8000,
+    RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ = 10000000,
+    RISCV_ACLINT_MAX_HARTS             = 4095,
+    RISCV_ACLINT_SWI_SIZE              = 0x4000
+};
+
+#endif
diff --git a/include/hw/intc/sifive_clint.h b/include/hw/intc/sifive_clint.h
deleted file mode 100644
index a30be0f3d6f..00000000000
--- a/include/hw/intc/sifive_clint.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * SiFive CLINT (Core Local Interruptor) interface
- *
- * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
- * Copyright (c) 2017 SiFive, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2 or later, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HW_SIFIVE_CLINT_H
-#define HW_SIFIVE_CLINT_H
-
-#include "hw/sysbus.h"
-
-#define TYPE_SIFIVE_CLINT "riscv.sifive.clint"
-
-#define SIFIVE_CLINT(obj) \
-    OBJECT_CHECK(SiFiveCLINTState, (obj), TYPE_SIFIVE_CLINT)
-
-typedef struct SiFiveCLINTState {
-    /*< private >*/
-    SysBusDevice parent_obj;
-
-    /*< public >*/
-    MemoryRegion mmio;
-    uint32_t hartid_base;
-    uint32_t num_harts;
-    uint32_t sip_base;
-    uint32_t timecmp_base;
-    uint32_t time_base;
-    uint32_t aperture_size;
-    uint32_t timebase_freq;
-} SiFiveCLINTState;
-
-DeviceState *sifive_clint_create(hwaddr addr, hwaddr size,
-    uint32_t hartid_base, uint32_t num_harts, uint32_t sip_base,
-    uint32_t timecmp_base, uint32_t time_base, uint32_t timebase_freq,
-    bool provide_rdtime);
-
-enum {
-    SIFIVE_SIP_BASE     = 0x0,
-    SIFIVE_TIMECMP_BASE = 0x4000,
-    SIFIVE_TIME_BASE    = 0xBFF8
-};
-
-enum {
-    SIFIVE_CLINT_TIMEBASE_FREQ = 10000000
-};
-
-#endif
diff --git a/include/hw/intc/sifive_plic.h b/include/hw/intc/sifive_plic.h
index 1e451a270c7..134cf39a96b 100644
--- a/include/hw/intc/sifive_plic.h
+++ b/include/hw/intc/sifive_plic.h
@@ -72,9 +72,13 @@ struct SiFivePLICState {
     uint32_t context_base;
     uint32_t context_stride;
     uint32_t aperture_size;
+
+    qemu_irq *m_external_irqs;
+    qemu_irq *s_external_irqs;
 };
 
 DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
+    uint32_t num_harts,
     uint32_t hartid_base, uint32_t num_sources,
     uint32_t num_priorities, uint32_t priority_base,
     uint32_t pending_base, uint32_t enable_base,
diff --git a/include/hw/ipack/ipack.h b/include/hw/ipack/ipack.h
index 75014e74ae1..cbcdda509d3 100644
--- a/include/hw/ipack/ipack.h
+++ b/include/hw/ipack/ipack.h
@@ -73,9 +73,9 @@ extern const VMStateDescription vmstate_ipack_device;
     VMSTATE_STRUCT(_field, _state, 1, vmstate_ipack_device, IPackDevice)
 
 IPackDevice *ipack_device_find(IPackBus *bus, int32_t slot);
-void ipack_bus_new_inplace(IPackBus *bus, size_t bus_size,
-                           DeviceState *parent,
-                           const char *name, uint8_t n_slots,
-                           qemu_irq_handler handler);
+void ipack_bus_init(IPackBus *bus, size_t bus_size,
+                    DeviceState *parent,
+                    uint8_t n_slots,
+                    qemu_irq_handler handler);
 
 #endif
diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h
index ddaae89a853..d4417b34b63 100644
--- a/include/hw/isa/isa.h
+++ b/include/hw/isa/isa.h
@@ -132,12 +132,15 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start);
  * @portio: the ports, sorted by offset.
  * @opaque: passed into the portio callbacks.
  * @name: passed into memory_region_init_io.
+ *
+ * Returns: 0 on success, negative error code otherwise (e.g. if the
+ *          ISA bus is not available)
  */
-void isa_register_portio_list(ISADevice *dev,
-                              PortioList *piolist,
-                              uint16_t start,
-                              const MemoryRegionPortio *portio,
-                              void *opaque, const char *name);
+int isa_register_portio_list(ISADevice *dev,
+                             PortioList *piolist,
+                             uint16_t start,
+                             const MemoryRegionPortio *portio,
+                             void *opaque, const char *name);
 
 static inline ISABus *isa_bus_from_device(ISADevice *d)
 {
diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h
index 9b6d610e838..56ac141be38 100644
--- a/include/hw/isa/vt82c686.h
+++ b/include/hw/isa/vt82c686.h
@@ -1,11 +1,15 @@
 #ifndef HW_VT82C686_H
 #define HW_VT82C686_H
 
+#include "hw/pci/pci.h"
+
 #define TYPE_VT82C686B_ISA "vt82c686b-isa"
-#define TYPE_VT82C686B_SUPERIO "vt82c686b-superio"
 #define TYPE_VT82C686B_PM "vt82c686b-pm"
+#define TYPE_VT8231_ISA "vt8231-isa"
 #define TYPE_VT8231_PM "vt8231-pm"
 #define TYPE_VIA_AC97 "via-ac97"
 #define TYPE_VIA_MC97 "via-mc97"
 
+void via_isa_set_irq(PCIDevice *d, int n, int level);
+
 #endif
diff --git a/include/hw/lm32/lm32_pic.h b/include/hw/lm32/lm32_pic.h
deleted file mode 100644
index 9e5e038437c..00000000000
--- a/include/hw/lm32/lm32_pic.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef QEMU_HW_LM32_PIC_H
-#define QEMU_HW_LM32_PIC_H
-
-
-uint32_t lm32_pic_get_ip(DeviceState *d);
-uint32_t lm32_pic_get_im(DeviceState *d);
-void lm32_pic_set_ip(DeviceState *d, uint32_t ip);
-void lm32_pic_set_im(DeviceState *d, uint32_t im);
-
-#endif /* QEMU_HW_LM32_PIC_H */
diff --git a/include/hw/loader.h b/include/hw/loader.h
index cbfc1848737..4fa485bd61c 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -90,7 +90,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
 #define ELF_LOAD_WRONG_ARCH   -3
 #define ELF_LOAD_WRONG_ENDIAN -4
 #define ELF_LOAD_TOO_BIG      -5
-const char *load_elf_strerror(int error);
+const char *load_elf_strerror(ssize_t error);
 
 /** load_elf_ram_sym:
  * @filename: Path of ELF file
@@ -128,48 +128,48 @@ const char *load_elf_strerror(int error);
 typedef void (*symbol_fn_t)(const char *st_name, int st_info,
                             uint64_t st_value, uint64_t st_size);
 
-int load_elf_ram_sym(const char *filename,
-                     uint64_t (*elf_note_fn)(void *, void *, bool),
-                     uint64_t (*translate_fn)(void *, uint64_t),
-                     void *translate_opaque, uint64_t *pentry,
-                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
-                     int big_endian, int elf_machine,
-                     int clear_lsb, int data_swab,
-                     AddressSpace *as, bool load_rom, symbol_fn_t sym_cb);
+ssize_t load_elf_ram_sym(const char *filename,
+                         uint64_t (*elf_note_fn)(void *, void *, bool),
+                         uint64_t (*translate_fn)(void *, uint64_t),
+                         void *translate_opaque, uint64_t *pentry,
+                         uint64_t *lowaddr, uint64_t *highaddr,
+                         uint32_t *pflags, int big_endian, int elf_machine,
+                         int clear_lsb, int data_swab,
+                         AddressSpace *as, bool load_rom, symbol_fn_t sym_cb);
 
 /** load_elf_ram:
  * Same as load_elf_ram_sym(), but doesn't allow the caller to specify a
  * symbol callback function
  */
-int load_elf_ram(const char *filename,
-                 uint64_t (*elf_note_fn)(void *, void *, bool),
-                 uint64_t (*translate_fn)(void *, uint64_t),
-                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                 int elf_machine, int clear_lsb, int data_swab,
-                 AddressSpace *as, bool load_rom);
+ssize_t load_elf_ram(const char *filename,
+                     uint64_t (*elf_note_fn)(void *, void *, bool),
+                     uint64_t (*translate_fn)(void *, uint64_t),
+                     void *translate_opaque, uint64_t *pentry,
+                     uint64_t *lowaddr, uint64_t *highaddr, uint32_t *pflags,
+                     int big_endian, int elf_machine, int clear_lsb,
+                     int data_swab, AddressSpace *as, bool load_rom);
 
 /** load_elf_as:
  * Same as load_elf_ram(), but always loads the elf as ROM
  */
-int load_elf_as(const char *filename,
-                uint64_t (*elf_note_fn)(void *, void *, bool),
-                uint64_t (*translate_fn)(void *, uint64_t),
-                void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-                uint64_t *highaddr, uint32_t *pflags, int big_endian,
-                int elf_machine, int clear_lsb, int data_swab,
-                AddressSpace *as);
+ssize_t load_elf_as(const char *filename,
+                    uint64_t (*elf_note_fn)(void *, void *, bool),
+                    uint64_t (*translate_fn)(void *, uint64_t),
+                    void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                    uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                    int elf_machine, int clear_lsb, int data_swab,
+                    AddressSpace *as);
 
 /** load_elf:
  * Same as load_elf_as(), but doesn't allow the caller to specify an
  * AddressSpace.
  */
-int load_elf(const char *filename,
-             uint64_t (*elf_note_fn)(void *, void *, bool),
-             uint64_t (*translate_fn)(void *, uint64_t),
-             void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
-             uint64_t *highaddr, uint32_t *pflags, int big_endian,
-             int elf_machine, int clear_lsb, int data_swab);
+ssize_t load_elf(const char *filename,
+                 uint64_t (*elf_note_fn)(void *, void *, bool),
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, uint32_t *pflags, int big_endian,
+                 int elf_machine, int clear_lsb, int data_swab);
 
 /** load_elf_hdr:
  * @filename: Path of ELF file
@@ -336,12 +336,6 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
 #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as)      \
     rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true)
 
-#define PC_ROM_MIN_VGA     0xc0000
-#define PC_ROM_MIN_OPTION  0xc8000
-#define PC_ROM_MAX         0xe0000
-#define PC_ROM_ALIGN       0x800
-#define PC_ROM_SIZE        (PC_ROM_MAX - PC_ROM_MIN_VGA)
-
 int rom_add_vga(const char *file);
 int rom_add_option(const char *file, int32_t bootindex);
 
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index 3d3db82641f..1473e6db625 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -56,9 +56,6 @@ struct PCDIMMDevice {
  * PCDIMMDeviceClass:
  * @realize: called after common dimm is realized so that the dimm based
  * devices get the chance to do specified operations.
- * @get_vmstate_memory_region: returns #MemoryRegion which indicates the
- * memory of @dimm should be kept during live migration. Will not fail
- * after the device was realized.
  */
 struct PCDIMMDeviceClass {
     /* private */
@@ -66,8 +63,6 @@ struct PCDIMMDeviceClass {
 
     /* public */
     void (*realize)(PCDIMMDevice *dimm, Error **errp);
-    MemoryRegion *(*get_vmstate_memory_region)(PCDIMMDevice *dimm,
-                                               Error **errp);
 };
 
 void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
diff --git a/include/hw/misc/armv7m_ras.h b/include/hw/misc/armv7m_ras.h
new file mode 100644
index 00000000000..ba6daccf3fc
--- /dev/null
+++ b/include/hw/misc/armv7m_ras.h
@@ -0,0 +1,37 @@
+/*
+ * Arm M-profile RAS (Reliability, Availability and Serviceability) block
+ *
+ * Copyright (c) 2021 Linaro Limited
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * This is a model of the RAS register block of an M-profile CPU
+ * (the registers starting at 0xE0005000 with ERRFRn).
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ *
+ * The QEMU implementation currently provides "minimal RAS" only.
+ */
+
+#ifndef HW_MISC_ARMV7M_RAS_H
+#define HW_MISC_ARMV7M_RAS_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ARMV7M_RAS "armv7m-ras"
+OBJECT_DECLARE_SIMPLE_TYPE(ARMv7MRAS, ARMV7M_RAS)
+
+struct ARMv7MRAS {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+};
+
+#endif
diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h
new file mode 100644
index 00000000000..94d5ada95fa
--- /dev/null
+++ b/include/hw/misc/aspeed_hace.h
@@ -0,0 +1,43 @@
+/*
+ * ASPEED Hash and Crypto Engine
+ *
+ * Copyright (C) 2021 IBM Corp.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ASPEED_HACE_H
+#define ASPEED_HACE_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_HACE "aspeed.hace"
+#define TYPE_ASPEED_AST2400_HACE TYPE_ASPEED_HACE "-ast2400"
+#define TYPE_ASPEED_AST2500_HACE TYPE_ASPEED_HACE "-ast2500"
+#define TYPE_ASPEED_AST2600_HACE TYPE_ASPEED_HACE "-ast2600"
+OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE)
+
+#define ASPEED_HACE_NR_REGS (0x64 >> 2)
+
+struct AspeedHACEState {
+    SysBusDevice parent;
+
+    MemoryRegion iomem;
+    qemu_irq irq;
+
+    uint32_t regs[ASPEED_HACE_NR_REGS];
+
+    MemoryRegion *dram_mr;
+    AddressSpace dram_as;
+};
+
+
+struct AspeedHACEClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t src_mask;
+    uint32_t dest_mask;
+    uint32_t hash_mask;
+};
+
+#endif /* _ASPEED_HACE_H_ */
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
index d49bfb02fbd..c14aff2bcbb 100644
--- a/include/hw/misc/aspeed_scu.h
+++ b/include/hw/misc/aspeed_scu.h
@@ -43,6 +43,8 @@ struct AspeedSCUState {
 #define AST2500_A1_SILICON_REV   0x04010303U
 #define AST2600_A0_SILICON_REV   0x05000303U
 #define AST2600_A1_SILICON_REV   0x05010303U
+#define AST2600_A2_SILICON_REV   0x05020303U
+#define AST2600_A3_SILICON_REV   0x05030303U
 
 #define ASPEED_IS_AST2500(si_rev)     ((((si_rev) >> 24) & 0xff) == 0x04)
 
diff --git a/include/hw/misc/aspeed_xdma.h b/include/hw/misc/aspeed_xdma.h
index a2dea96984f..b1478fd1c68 100644
--- a/include/hw/misc/aspeed_xdma.h
+++ b/include/hw/misc/aspeed_xdma.h
@@ -13,7 +13,10 @@
 #include "qom/object.h"
 
 #define TYPE_ASPEED_XDMA "aspeed.xdma"
-OBJECT_DECLARE_SIMPLE_TYPE(AspeedXDMAState, ASPEED_XDMA)
+#define TYPE_ASPEED_2400_XDMA TYPE_ASPEED_XDMA "-ast2400"
+#define TYPE_ASPEED_2500_XDMA TYPE_ASPEED_XDMA "-ast2500"
+#define TYPE_ASPEED_2600_XDMA TYPE_ASPEED_XDMA "-ast2600"
+OBJECT_DECLARE_TYPE(AspeedXDMAState, AspeedXDMAClass, ASPEED_XDMA)
 
 #define ASPEED_XDMA_NUM_REGS (ASPEED_XDMA_REG_SIZE / sizeof(uint32_t))
 #define ASPEED_XDMA_REG_SIZE 0x7C
@@ -28,4 +31,16 @@ struct AspeedXDMAState {
     uint32_t regs[ASPEED_XDMA_NUM_REGS];
 };
 
+struct AspeedXDMAClass {
+    SysBusDeviceClass parent_class;
+
+    uint8_t cmdq_endp;
+    uint8_t cmdq_wrp;
+    uint8_t cmdq_rdp;
+    uint8_t intr_ctrl;
+    uint32_t intr_ctrl_mask;
+    uint8_t intr_status;
+    uint32_t intr_complete;
+};
+
 #endif /* ASPEED_XDMA_H */
diff --git a/include/hw/misc/avr_power.h b/include/hw/misc/avr_power.h
index 707df030b18..388e421aa7b 100644
--- a/include/hw/misc/avr_power.h
+++ b/include/hw/misc/avr_power.h
@@ -26,7 +26,6 @@
 #define HW_MISC_AVR_POWER_H
 
 #include "hw/sysbus.h"
-#include "hw/hw.h"
 #include "qom/object.h"
 
 
diff --git a/include/hw/misc/bcm2835_powermgt.h b/include/hw/misc/bcm2835_powermgt.h
new file mode 100644
index 00000000000..303b9a6f684
--- /dev/null
+++ b/include/hw/misc/bcm2835_powermgt.h
@@ -0,0 +1,29 @@
+/*
+ * BCM2835 Power Management emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ * Copyright (C) 2021 Nolan Leake <nolan@sigbus.net>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_POWERMGT_H
+#define BCM2835_POWERMGT_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_BCM2835_POWERMGT "bcm2835-powermgt"
+OBJECT_DECLARE_SIMPLE_TYPE(BCM2835PowerMgtState, BCM2835_POWERMGT)
+
+struct BCM2835PowerMgtState {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t rstc;
+    uint32_t rsts;
+    uint32_t wdog;
+};
+
+#endif
diff --git a/include/hw/misc/mac_via.h b/include/hw/misc/mac_via.h
index 3058b30685a..b4455658660 100644
--- a/include/hw/misc/mac_via.h
+++ b/include/hw/misc/mac_via.h
@@ -15,6 +15,8 @@
 #include "qom/object.h"
 
 
+#define VIA_SIZE   0x2000
+
 /* VIA 1 */
 #define VIA1_IRQ_ONE_SECOND_BIT 0
 #define VIA1_IRQ_60HZ_BIT       1
@@ -24,11 +26,11 @@
 
 #define VIA1_IRQ_NB             8
 
-#define VIA1_IRQ_ONE_SECOND (1 << VIA1_IRQ_ONE_SECOND_BIT)
-#define VIA1_IRQ_60HZ       (1 << VIA1_IRQ_60HZ_BIT)
-#define VIA1_IRQ_ADB_READY  (1 << VIA1_IRQ_ADB_READY_BIT)
-#define VIA1_IRQ_ADB_DATA   (1 << VIA1_IRQ_ADB_DATA_BIT)
-#define VIA1_IRQ_ADB_CLOCK  (1 << VIA1_IRQ_ADB_CLOCK_BIT)
+#define VIA1_IRQ_ONE_SECOND     (1 << VIA1_IRQ_ONE_SECOND_BIT)
+#define VIA1_IRQ_60HZ           (1 << VIA1_IRQ_60HZ_BIT)
+#define VIA1_IRQ_ADB_READY      (1 << VIA1_IRQ_ADB_READY_BIT)
+#define VIA1_IRQ_ADB_DATA       (1 << VIA1_IRQ_ADB_DATA_BIT)
+#define VIA1_IRQ_ADB_CLOCK      (1 << VIA1_IRQ_ADB_CLOCK_BIT)
 
 
 #define TYPE_MOS6522_Q800_VIA1 "mos6522-q800-via1"
@@ -38,9 +40,36 @@ struct MOS6522Q800VIA1State {
     /*< private >*/
     MOS6522State parent_obj;
 
+    MemoryRegion via_mem;
+
     qemu_irq irqs[VIA1_IRQ_NB];
+    qemu_irq auxmode_irq;
     uint8_t last_b;
+
+    /* RTC */
     uint8_t PRAM[256];
+    BlockBackend *blk;
+    VMChangeStateEntry *vmstate;
+
+    uint32_t tick_offset;
+
+    uint8_t data_out;
+    int data_out_cnt;
+    uint8_t data_in;
+    uint8_t data_in_cnt;
+    uint8_t cmd;
+    int wprotect;
+    int alt;
+
+    /* ADB */
+    ADBBusState adb_bus;
+    qemu_irq adb_data_ready;
+    int adb_data_in_size;
+    int adb_data_in_index;
+    int adb_data_out_index;
+    uint8_t adb_data_in[128];
+    uint8_t adb_data_out[16];
+    uint8_t adb_autopoll_cmd;
 
     /* external timers */
     QEMUTimer *one_second_timer;
@@ -52,18 +81,28 @@ struct MOS6522Q800VIA1State {
 
 /* VIA 2 */
 #define VIA2_IRQ_SCSI_DATA_BIT  0
-#define VIA2_IRQ_SLOT_BIT       1
+#define VIA2_IRQ_NUBUS_BIT      1
 #define VIA2_IRQ_UNUSED_BIT     2
 #define VIA2_IRQ_SCSI_BIT       3
 #define VIA2_IRQ_ASC_BIT        4
 
 #define VIA2_IRQ_NB             8
 
-#define VIA2_IRQ_SCSI_DATA  (1 << VIA2_IRQ_SCSI_DATA_BIT)
-#define VIA2_IRQ_SLOT       (1 << VIA2_IRQ_SLOT_BIT)
-#define VIA2_IRQ_UNUSED     (1 << VIA2_IRQ_SCSI_BIT)
-#define VIA2_IRQ_SCSI       (1 << VIA2_IRQ_UNUSED_BIT)
-#define VIA2_IRQ_ASC        (1 << VIA2_IRQ_ASC_BIT)
+#define VIA2_IRQ_SCSI_DATA      (1 << VIA2_IRQ_SCSI_DATA_BIT)
+#define VIA2_IRQ_NUBUS          (1 << VIA2_IRQ_NUBUS_BIT)
+#define VIA2_IRQ_UNUSED         (1 << VIA2_IRQ_SCSI_BIT)
+#define VIA2_IRQ_SCSI           (1 << VIA2_IRQ_UNUSED_BIT)
+#define VIA2_IRQ_ASC            (1 << VIA2_IRQ_ASC_BIT)
+
+#define VIA2_NUBUS_IRQ_NB       7
+
+#define VIA2_NUBUS_IRQ_9        0
+#define VIA2_NUBUS_IRQ_A        1
+#define VIA2_NUBUS_IRQ_B        2
+#define VIA2_NUBUS_IRQ_C        3
+#define VIA2_NUBUS_IRQ_D        4
+#define VIA2_NUBUS_IRQ_E        5
+#define VIA2_NUBUS_IRQ_INTVIDEO 6
 
 #define TYPE_MOS6522_Q800_VIA2 "mos6522-q800-via2"
 OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2)
@@ -71,47 +110,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(MOS6522Q800VIA2State, MOS6522_Q800_VIA2)
 struct MOS6522Q800VIA2State {
     /*< private >*/
     MOS6522State parent_obj;
-};
 
-
-#define TYPE_MAC_VIA "mac_via"
-OBJECT_DECLARE_SIMPLE_TYPE(MacVIAState, MAC_VIA)
-
-struct MacVIAState {
-    SysBusDevice busdev;
-
-    VMChangeStateEntry *vmstate;
-
-    /* MMIO */
-    MemoryRegion mmio;
-    MemoryRegion via1mem;
-    MemoryRegion via2mem;
-
-    /* VIAs */
-    MOS6522Q800VIA1State mos6522_via1;
-    MOS6522Q800VIA2State mos6522_via2;
-
-    /* RTC */
-    uint32_t tick_offset;
-
-    uint8_t data_out;
-    int data_out_cnt;
-    uint8_t data_in;
-    uint8_t data_in_cnt;
-    uint8_t cmd;
-    int wprotect;
-    int alt;
-    BlockBackend *blk;
-
-    /* ADB */
-    ADBBusState adb_bus;
-    qemu_irq adb_data_ready;
-    int adb_data_in_size;
-    int adb_data_in_index;
-    int adb_data_out_index;
-    uint8_t adb_data_in[128];
-    uint8_t adb_data_out[16];
-    uint8_t adb_autopoll_cmd;
+    MemoryRegion via_mem;
 };
 
 #endif
diff --git a/include/hw/misc/mps2-scc.h b/include/hw/misc/mps2-scc.h
index 49d070616aa..3b2d13ac9c3 100644
--- a/include/hw/misc/mps2-scc.h
+++ b/include/hw/misc/mps2-scc.h
@@ -9,6 +9,24 @@
  *  (at your option) any later version.
  */
 
+/*
+ * This is a model of the Serial Communication Controller (SCC)
+ * block found in most MPS FPGA images.
+ *
+ * QEMU interface:
+ *  + sysbus MMIO region 0: the register bank
+ *  + QOM property "scc-cfg4": value of the read-only CFG4 register
+ *  + QOM property "scc-aid": value of the read-only SCC_AID register
+ *  + QOM property "scc-id": value of the read-only SCC_ID register
+ *  + QOM property "scc-cfg0": reset value of the CFG0 register
+ *  + QOM property array "oscclk": reset values of the OSCCLK registers
+ *    (which are accessed via the SYS_CFG channel provided by this device)
+ *  + named GPIO output "remap": this tracks the value of CFG0 register
+ *    bit 0. Boards where this bit controls memory remapping should
+ *    connect this GPIO line to a function performing that mapping.
+ *    Boards where bit 0 has no special function should leave the GPIO
+ *    output disconnected.
+ */
 #ifndef MPS2_SCC_H
 #define MPS2_SCC_H
 
@@ -43,6 +61,9 @@ struct MPS2SCC {
     uint32_t num_oscclk;
     uint32_t *oscclk;
     uint32_t *oscclk_reset;
+    uint32_t cfg0_reset;
+
+    qemu_irq remap;
 };
 
 #endif
diff --git a/include/hw/misc/stm32f4xx_exti.h b/include/hw/misc/stm32f4xx_exti.h
index 24b6fa7724b..ea6b0097b0e 100644
--- a/include/hw/misc/stm32f4xx_exti.h
+++ b/include/hw/misc/stm32f4xx_exti.h
@@ -26,7 +26,6 @@
 #define HW_STM_EXTI_H
 
 #include "hw/sysbus.h"
-#include "hw/hw.h"
 #include "qom/object.h"
 
 #define EXTI_IMR   0x00
diff --git a/include/hw/misc/stm32f4xx_syscfg.h b/include/hw/misc/stm32f4xx_syscfg.h
index 8c31feccd37..6f8ca49228b 100644
--- a/include/hw/misc/stm32f4xx_syscfg.h
+++ b/include/hw/misc/stm32f4xx_syscfg.h
@@ -26,7 +26,6 @@
 #define HW_STM_SYSCFG_H
 
 #include "hw/sysbus.h"
-#include "hw/hw.h"
 #include "qom/object.h"
 
 #define SYSCFG_MEMRMP  0x00
diff --git a/include/hw/misc/virt_ctrl.h b/include/hw/misc/virt_ctrl.h
index edfadc46950..25a237e5187 100644
--- a/include/hw/misc/virt_ctrl.h
+++ b/include/hw/misc/virt_ctrl.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-License-Identifer: GPL-2.0-or-later
+ * SPDX-License-Identifier: GPL-2.0-or-later
  *
  * Virt system Controller
  */
diff --git a/include/hw/nubus/mac-nubus-bridge.h b/include/hw/nubus/mac-nubus-bridge.h
index 36aa098dd4b..70ab50ab2d6 100644
--- a/include/hw/nubus/mac-nubus-bridge.h
+++ b/include/hw/nubus/mac-nubus-bridge.h
@@ -12,13 +12,18 @@
 #include "hw/nubus/nubus.h"
 #include "qom/object.h"
 
+#define MAC_NUBUS_FIRST_SLOT 0x9
+#define MAC_NUBUS_LAST_SLOT  0xe
+#define MAC_NUBUS_SLOT_NB    (MAC_NUBUS_LAST_SLOT - MAC_NUBUS_FIRST_SLOT + 1)
+
 #define TYPE_MAC_NUBUS_BRIDGE "mac-nubus-bridge"
-OBJECT_DECLARE_SIMPLE_TYPE(MacNubusState, MAC_NUBUS_BRIDGE)
+OBJECT_DECLARE_SIMPLE_TYPE(MacNubusBridge, MAC_NUBUS_BRIDGE)
 
-struct MacNubusState {
-    SysBusDevice sysbus_dev;
+struct MacNubusBridge {
+    NubusBridge parent_obj;
 
-    NubusBus *bus;
+    MemoryRegion super_slot_alias;
+    MemoryRegion slot_alias;
 };
 
 #endif
diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h
index e2b5cf260ba..b3b4d2eadb4 100644
--- a/include/hw/nubus/nubus.h
+++ b/include/hw/nubus/nubus.h
@@ -10,17 +10,23 @@
 #define HW_NUBUS_NUBUS_H
 
 #include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
 #include "exec/address-spaces.h"
 #include "qom/object.h"
+#include "qemu/units.h"
 
 #define NUBUS_SUPER_SLOT_SIZE 0x10000000U
-#define NUBUS_SUPER_SLOT_NB   0x9
+#define NUBUS_SUPER_SLOT_NB   0xe
+
+#define NUBUS_SLOT_BASE       (NUBUS_SUPER_SLOT_SIZE * \
+                               (NUBUS_SUPER_SLOT_NB + 1))
 
 #define NUBUS_SLOT_SIZE       0x01000000
-#define NUBUS_SLOT_NB         0xF
+#define NUBUS_FIRST_SLOT      0x0
+#define NUBUS_LAST_SLOT       0xf
+#define NUBUS_SLOT_NB         (NUBUS_LAST_SLOT - NUBUS_FIRST_SLOT + 1)
 
-#define NUBUS_FIRST_SLOT      0x9
-#define NUBUS_LAST_SLOT       0xF
+#define NUBUS_IRQS            16
 
 #define TYPE_NUBUS_DEVICE "nubus-device"
 OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE)
@@ -29,40 +35,41 @@ OBJECT_DECLARE_SIMPLE_TYPE(NubusDevice, NUBUS_DEVICE)
 OBJECT_DECLARE_SIMPLE_TYPE(NubusBus, NUBUS_BUS)
 
 #define TYPE_NUBUS_BRIDGE "nubus-bridge"
+OBJECT_DECLARE_SIMPLE_TYPE(NubusBridge, NUBUS_BRIDGE);
 
 struct NubusBus {
     BusState qbus;
 
+    AddressSpace nubus_as;
+    MemoryRegion nubus_mr;
+
     MemoryRegion super_slot_io;
     MemoryRegion slot_io;
 
-    int current_slot;
+    uint16_t slot_available_mask;
+
+    qemu_irq irqs[NUBUS_IRQS];
 };
 
+#define NUBUS_DECL_ROM_MAX_SIZE    (128 * KiB)
+
 struct NubusDevice {
     DeviceState qdev;
 
-    int slot_nb;
+    int32_t slot;
+    MemoryRegion super_slot_mem;
     MemoryRegion slot_mem;
 
-    /* Format Block */
-
-    MemoryRegion fblock_io;
+    char *romfile;
+    MemoryRegion decl_rom;
+};
 
-    uint32_t rom_length;
-    uint32_t rom_crc;
-    uint8_t rom_rev;
-    uint8_t rom_format;
-    uint8_t byte_lanes;
-    int32_t directory_offset;
+void nubus_set_irq(NubusDevice *nd, int level);
 
-    /* ROM */
+struct NubusBridge {
+    SysBusDevice parent_obj;
 
-    MemoryRegion rom_io;
-    const uint8_t *rom;
+    NubusBus bus;
 };
 
-void nubus_register_rom(NubusDevice *dev, const uint8_t *rom, uint32_t size,
-                        int revision, int format, uint8_t byte_lanes);
-
 #endif
diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h
new file mode 100644
index 00000000000..87d59ef3c0c
--- /dev/null
+++ b/include/hw/nvram/xlnx-bbram.h
@@ -0,0 +1,54 @@
+/*
+ * QEMU model of the Xilinx BBRAM Battery Backed RAM
+ *
+ * Copyright (c) 2015-2021 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_BBRAM_H
+#define XLNX_BBRAM_H
+
+#include "sysemu/block-backend.h"
+#include "hw/qdev-core.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+
+#define RMAX_XLNX_BBRAM ((0x4c / 4) + 1)
+
+#define TYPE_XLNX_BBRAM "xlnx,bbram-ctrl"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM);
+
+struct XlnxBBRam {
+    SysBusDevice parent_obj;
+    qemu_irq irq_bbram;
+
+    BlockBackend *blk;
+
+    uint32_t crc_zpads;
+    bool bbram8_wo;
+    bool blk_ro;
+
+    uint32_t regs[RMAX_XLNX_BBRAM];
+    RegisterInfo regs_info[RMAX_XLNX_BBRAM];
+};
+
+#endif
diff --git a/include/hw/nvram/xlnx-efuse.h b/include/hw/nvram/xlnx-efuse.h
new file mode 100644
index 00000000000..58414e468b5
--- /dev/null
+++ b/include/hw/nvram/xlnx-efuse.h
@@ -0,0 +1,132 @@
+/*
+ * QEMU model of the Xilinx eFuse core
+ *
+ * Copyright (c) 2015 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias <edgari@xilinx.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef XLNX_EFUSE_H
+#define XLNX_EFUSE_H
+
+#include "sysemu/block-backend.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_XLNX_EFUSE "xlnx,efuse"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxEFuse, XLNX_EFUSE);
+
+struct XlnxEFuse {
+    DeviceState parent_obj;
+    BlockBackend *blk;
+    bool blk_ro;
+    uint32_t *fuse32;
+
+    DeviceState *dev;
+
+    bool init_tbits;
+
+    uint8_t efuse_nr;
+    uint32_t efuse_size;
+
+    uint32_t *ro_bits;
+    uint32_t ro_bits_cnt;
+};
+
+/**
+ * xlnx_efuse_calc_crc:
+ * @data: an array of 32-bit words for which the CRC should be computed
+ * @u32_cnt: the array size in number of 32-bit words
+ * @zpads: the number of 32-bit zeros prepended to @data before computation
+ *
+ * This function is used to compute the CRC for an array of 32-bit words,
+ * using a Xilinx-specific data padding.
+ *
+ * Returns: the computed 32-bit CRC
+ */
+uint32_t xlnx_efuse_calc_crc(const uint32_t *data, unsigned u32_cnt,
+                             unsigned zpads);
+
+/**
+ * xlnx_efuse_get_bit:
+ * @s: the efuse object
+ * @bit: the efuse bit-address to read the data
+ *
+ * Returns: the bit, 0 or 1, at @bit of object @s
+ */
+bool xlnx_efuse_get_bit(XlnxEFuse *s, unsigned int bit);
+
+/**
+ * xlnx_efuse_set_bit:
+ * @s: the efuse object
+ * @bit: the efuse bit-address to be written a value of 1
+ *
+ * Returns: true on success, false on failure
+ */
+bool xlnx_efuse_set_bit(XlnxEFuse *s, unsigned int bit);
+
+/**
+ * xlnx_efuse_k256_check:
+ * @s: the efuse object
+ * @crc: the 32-bit CRC to be compared with
+ * @start: the efuse bit-address (which must be multiple of 32) of the
+ *         start of a 256-bit array
+ *
+ * This function computes the CRC of a 256-bit array starting at @start
+ * then compares to the given @crc
+ *
+ * Returns: true of @crc == computed, false otherwise
+ */
+bool xlnx_efuse_k256_check(XlnxEFuse *s, uint32_t crc, unsigned start);
+
+/**
+ * xlnx_efuse_tbits_check:
+ * @s: the efuse object
+ *
+ * This function inspects a number of efuse bits at specific addresses
+ * to see if they match a validation pattern. Each pattern is a group
+ * of 4 bits, and there are 3 groups.
+ *
+ * Returns: a 3-bit mask, where a bit of '1' means the corresponding
+ * group has a valid pattern.
+ */
+uint32_t xlnx_efuse_tbits_check(XlnxEFuse *s);
+
+/**
+ * xlnx_efuse_get_row:
+ * @s: the efuse object
+ * @bit: the efuse bit address for which a 32-bit value is read
+ *
+ * Returns: the entire 32 bits of the efuse, starting at a bit
+ * address that is multiple of 32 and contains the bit at @bit
+ */
+static inline uint32_t xlnx_efuse_get_row(XlnxEFuse *s, unsigned int bit)
+{
+    if (!(s->fuse32)) {
+        return 0;
+    } else {
+        unsigned int row_idx = bit / 32;
+
+        assert(row_idx < (s->efuse_size * s->efuse_nr / 32));
+        return s->fuse32[row_idx];
+    }
+}
+
+#endif
diff --git a/include/hw/nvram/xlnx-versal-efuse.h b/include/hw/nvram/xlnx-versal-efuse.h
new file mode 100644
index 00000000000..a873dc5cb01
--- /dev/null
+++ b/include/hw/nvram/xlnx-versal-efuse.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_VERSAL_EFUSE_H
+#define XLNX_VERSAL_EFUSE_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#define XLNX_VERSAL_EFUSE_CTRL_R_MAX ((0x100 / 4) + 1)
+
+#define TYPE_XLNX_VERSAL_EFUSE_CTRL  "xlnx,versal-efuse"
+#define TYPE_XLNX_VERSAL_EFUSE_CACHE "xlnx,pmc-efuse-cache"
+
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCtrl, XLNX_VERSAL_EFUSE_CTRL);
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalEFuseCache, XLNX_VERSAL_EFUSE_CACHE);
+
+struct XlnxVersalEFuseCtrl {
+    SysBusDevice parent_obj;
+    qemu_irq irq_efuse_imr;
+
+    XlnxEFuse *efuse;
+
+    void *extra_pg0_lock_spec;      /* Opaque property */
+    uint32_t extra_pg0_lock_n16;
+
+    uint32_t regs[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
+    RegisterInfo regs_info[XLNX_VERSAL_EFUSE_CTRL_R_MAX];
+};
+
+struct XlnxVersalEFuseCache {
+    SysBusDevice parent_obj;
+    MemoryRegion iomem;
+
+    XlnxEFuse *efuse;
+};
+
+/**
+ * xlnx_versal_efuse_read_row:
+ * @s: the efuse object
+ * @bit: the bit-address within the 32-bit row to be read
+ * @denied: if non-NULL, to receive true if the row is write-only
+ *
+ * Returns: the 32-bit word containing address @bit; 0 if @denies is true
+ */
+uint32_t xlnx_versal_efuse_read_row(XlnxEFuse *s, uint32_t bit, bool *denied);
+
+#endif
diff --git a/include/hw/nvram/xlnx-zynqmp-efuse.h b/include/hw/nvram/xlnx-zynqmp-efuse.h
new file mode 100644
index 00000000000..6b051ec4f15
--- /dev/null
+++ b/include/hw/nvram/xlnx-zynqmp-efuse.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef XLNX_ZYNQMP_EFUSE_H
+#define XLNX_ZYNQMP_EFUSE_H
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#define XLNX_ZYNQMP_EFUSE_R_MAX ((0x10fc / 4) + 1)
+
+#define TYPE_XLNX_ZYNQMP_EFUSE "xlnx,zynqmp-efuse"
+OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPEFuse, XLNX_ZYNQMP_EFUSE);
+
+struct XlnxZynqMPEFuse {
+    SysBusDevice parent_obj;
+    qemu_irq irq;
+
+    XlnxEFuse *efuse;
+    uint32_t regs[XLNX_ZYNQMP_EFUSE_R_MAX];
+    RegisterInfo regs_info[XLNX_ZYNQMP_EFUSE_R_MAX];
+};
+
+#endif
diff --git a/include/hw/pci-host/gpex.h b/include/hw/pci-host/gpex.h
index d48a020a952..fcf8b638200 100644
--- a/include/hw/pci-host/gpex.h
+++ b/include/hw/pci-host/gpex.h
@@ -49,8 +49,12 @@ struct GPEXHost {
 
     MemoryRegion io_ioport;
     MemoryRegion io_mmio;
+    MemoryRegion io_ioport_window;
+    MemoryRegion io_mmio_window;
     qemu_irq irq[GPEX_NUM_IRQS];
     int irq_num[GPEX_NUM_IRQS];
+
+    bool allow_unmapped_accesses;
 };
 
 struct GPEXConfig {
diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h
index 24fd53942ca..f068aaba8fd 100644
--- a/include/hw/pci-host/i440fx.h
+++ b/include/hw/pci-host/i440fx.h
@@ -11,7 +11,6 @@
 #ifndef HW_PCI_I440FX_H
 #define HW_PCI_I440FX_H
 
-#include "hw/hw.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/pam.h"
 #include "qom/object.h"
@@ -46,6 +45,5 @@ PCIBus *i440fx_init(const char *host_type, const char *pci_type,
                     MemoryRegion *pci_memory,
                     MemoryRegion *ram_memory);
 
-PCIBus *find_i440fx(void);
 
 #endif
diff --git a/include/hw/pci-host/mv64361.h b/include/hw/pci-host/mv64361.h
new file mode 100644
index 00000000000..9cdb35cb3cf
--- /dev/null
+++ b/include/hw/pci-host/mv64361.h
@@ -0,0 +1,8 @@
+#ifndef MV64361_H
+#define MV64361_H
+
+#define TYPE_MV64361 "mv64361"
+
+PCIBus *mv64361_get_pci_bus(DeviceState *dev, int n);
+
+#endif
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6be4e0c460c..e7cdf2d5ec5 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
 struct PCIDevice {
     DeviceState qdev;
     bool partially_hotplugged;
+    bool has_power;
 
     /* PCI config space */
     uint8_t *config;
@@ -401,13 +402,17 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin);
 OBJECT_DECLARE_TYPE(PCIBus, PCIBusClass, PCI_BUS)
 #define TYPE_PCIE_BUS "PCIE"
 
+typedef void (*pci_bus_dev_fn)(PCIBus *b, PCIDevice *d, void *opaque);
+typedef void (*pci_bus_fn)(PCIBus *b, void *opaque);
+typedef void *(*pci_bus_ret_fn)(PCIBus *b, void *opaque);
+
 bool pci_bus_is_express(PCIBus *bus);
 
-void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
-                              const char *name,
-                              MemoryRegion *address_space_mem,
-                              MemoryRegion *address_space_io,
-                              uint8_t devfn_min, const char *typename);
+void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
+                       const char *name,
+                       MemoryRegion *address_space_mem,
+                       MemoryRegion *address_space_io,
+                       uint8_t devfn_min, const char *typename);
 PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
                          MemoryRegion *address_space_mem,
                          MemoryRegion *address_space_io,
@@ -450,6 +455,7 @@ static inline PCIBus *pci_get_bus(const PCIDevice *dev)
     return PCI_BUS(qdev_get_parent_bus(DEVICE(dev)));
 }
 int pci_bus_num(PCIBus *s);
+void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus);
 static inline int pci_dev_bus_num(const PCIDevice *dev)
 {
     return pci_bus_num(pci_get_bus(dev));
@@ -457,29 +463,30 @@ static inline int pci_dev_bus_num(const PCIDevice *dev)
 
 int pci_bus_numa_node(PCIBus *bus);
 void pci_for_each_device(PCIBus *bus, int bus_num,
-                         void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
+                         pci_bus_dev_fn fn,
                          void *opaque);
 void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
-                                 void (*fn)(PCIBus *bus, PCIDevice *d,
-                                            void *opaque),
+                                 pci_bus_dev_fn fn,
                                  void *opaque);
-void pci_for_each_bus_depth_first(PCIBus *bus,
-                                  void *(*begin)(PCIBus *bus, void *parent_state),
-                                  void (*end)(PCIBus *bus, void *state),
-                                  void *parent_state);
+void pci_for_each_device_under_bus(PCIBus *bus,
+                                   pci_bus_dev_fn fn, void *opaque);
+void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                           pci_bus_dev_fn fn,
+                                           void *opaque);
+void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
+                                  pci_bus_fn end, void *parent_state);
 PCIDevice *pci_get_function_0(PCIDevice *pci_dev);
 
 /* Use this wrapper when specific scan order is not required. */
 static inline
-void pci_for_each_bus(PCIBus *bus,
-                      void (*fn)(PCIBus *bus, void *opaque),
-                      void *opaque)
+void pci_for_each_bus(PCIBus *bus, pci_bus_fn fn, void *opaque)
 {
     pci_for_each_bus_depth_first(bus, NULL, fn, opaque);
 }
 
 PCIBus *pci_device_root_bus(const PCIDevice *d);
 const char *pci_root_bus_path(PCIDevice *dev);
+bool pci_bus_bypass_iommu(PCIBus *bus);
 PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn);
 int pci_qdev_find_device(const char *id, PCIDevice **pdev);
 void pci_bus_get_w64_range(PCIBus *bus, Range *range);
@@ -902,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device;
 }
 
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_power(PCIDevice *pci_dev, bool state);
 
 #endif
diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h
index 52e038c0196..c6f4eb45851 100644
--- a/include/hw/pci/pci_host.h
+++ b/include/hw/pci/pci_host.h
@@ -43,6 +43,7 @@ struct PCIHostState {
     uint32_t config_reg;
     bool mig_enabled;
     PCIBus *bus;
+    bool bypass_iommu;
 
     QLIST_ENTRY(PCIHostState) next;
 };
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index ea28dcc850b..11abe22d460 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -204,15 +204,17 @@
 #define PCI_VENDOR_ID_XILINX             0x10ee
 
 #define PCI_VENDOR_ID_VIA                0x1106
-#define PCI_DEVICE_ID_VIA_ISA_BRIDGE     0x0686
+#define PCI_DEVICE_ID_VIA_82C686B_ISA    0x0686
 #define PCI_DEVICE_ID_VIA_IDE            0x0571
 #define PCI_DEVICE_ID_VIA_UHCI           0x3038
 #define PCI_DEVICE_ID_VIA_82C686B_PM     0x3057
 #define PCI_DEVICE_ID_VIA_AC97           0x3058
 #define PCI_DEVICE_ID_VIA_MC97           0x3068
+#define PCI_DEVICE_ID_VIA_8231_ISA       0x8231
 #define PCI_DEVICE_ID_VIA_8231_PM        0x8235
 
 #define PCI_VENDOR_ID_MARVELL            0x11ab
+#define PCI_DEVICE_ID_MARVELL_MV6436X    0x6460
 
 #define PCI_VENDOR_ID_SILICON_MOTION     0x126f
 #define PCI_DEVICE_ID_SM501              0x0501
@@ -225,6 +227,9 @@
 #define PCI_VENDOR_ID_FREESCALE          0x1957
 #define PCI_DEVICE_ID_MPC8533E           0x0030
 
+#define PCI_VENDOR_ID_BAIDU              0x1d22
+#define PCI_DEVICE_ID_KUNLUN_VF          0x3685
+
 #define PCI_VENDOR_ID_INTEL              0x8086
 #define PCI_DEVICE_ID_INTEL_82378        0x0484
 #define PCI_DEVICE_ID_INTEL_82441        0x1237
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index bea8ecad0fd..e25b289ce84 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -57,8 +57,11 @@ struct PCIESlot {
     /* Disable ACS (really for a pcie_root_port) */
     bool        disable_acs;
 
-    /* Indicates whether hot-plug is enabled on the slot */
+    /* Indicates whether any type of hot-plug is allowed on the slot */
     bool        hotplug;
+
+    bool        native_hotplug;
+
     QLIST_ENTRY(PCIESlot) next;
 };
 
diff --git a/include/hw/ppc/openpic.h b/include/hw/ppc/openpic.h
index 74ff44bff0a..ebdaf8a4932 100644
--- a/include/hw/ppc/openpic.h
+++ b/include/hw/ppc/openpic.h
@@ -21,7 +21,6 @@ enum {
 
 typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
 
-#define OPENPIC_MODEL_RAVEN       0
 #define OPENPIC_MODEL_FSL_MPIC_20 1
 #define OPENPIC_MODEL_FSL_MPIC_42 2
 #define OPENPIC_MODEL_KEYLARGO    3
@@ -32,13 +31,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
 #define OPENPIC_MAX_IRQ     (OPENPIC_MAX_SRC + OPENPIC_MAX_IPI + \
                              OPENPIC_MAX_TMR)
 
-/* Raven */
-#define RAVEN_MAX_CPU      2
-#define RAVEN_MAX_EXT     48
-#define RAVEN_MAX_IRQ     64
-#define RAVEN_MAX_TMR      OPENPIC_MAX_TMR
-#define RAVEN_MAX_IPI      OPENPIC_MAX_IPI
-
 /* KeyLargo */
 #define KEYLARGO_MAX_CPU  4
 #define KEYLARGO_MAX_EXT  64
@@ -49,14 +41,6 @@ typedef struct IrqLines { qemu_irq irq[OPENPIC_OUTPUT_NB]; } IrqLines;
 /* Timers don't exist but this makes the code happy... */
 #define KEYLARGO_TMR_IRQ  (KEYLARGO_IPI_IRQ + KEYLARGO_MAX_IPI)
 
-/* Interrupt definitions */
-#define RAVEN_FE_IRQ     (RAVEN_MAX_EXT)     /* Internal functional IRQ */
-#define RAVEN_ERR_IRQ    (RAVEN_MAX_EXT + 1) /* Error IRQ */
-#define RAVEN_TMR_IRQ    (RAVEN_MAX_EXT + 2) /* First timer IRQ */
-#define RAVEN_IPI_IRQ    (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */
-/* First doorbell IRQ */
-#define RAVEN_DBL_IRQ    (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI))
-
 typedef struct FslMpicInfo {
     int max_ext;
 } FslMpicInfo;
@@ -67,7 +51,8 @@ typedef enum IRQType {
     IRQ_TYPE_FSLSPECIAL,    /* FSL timer/IPI interrupt, edge, no polarity */
 } IRQType;
 
-/* Round up to the nearest 64 IRQs so that the queue length
+/*
+ * Round up to the nearest 64 IRQs so that the queue length
  * won't change when moving between 32 and 64 bit hosts.
  */
 #define IRQQUEUE_SIZE_BITS ((OPENPIC_MAX_IRQ + 63) & ~63)
@@ -117,8 +102,10 @@ typedef struct OpenPICTimer {
     bool                  qemu_timer_active; /* Is the qemu_timer is running? */
     struct QEMUTimer     *qemu_timer;
     struct OpenPICState  *opp;          /* Device timer is part of. */
-    /* The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
-       current_count written or read, only defined if qemu_timer_active. */
+    /*
+     * The QEMU_CLOCK_VIRTUAL time (in ns) corresponding to the last
+     * current_count written or read, only defined if qemu_timer_active.
+     */
     uint64_t              origin_time;
 } OpenPICTimer;
 
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index d69cee17b23..aa08d79d24d 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -170,29 +170,10 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER8NVL,
 DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER9,
                          TYPE_PNV_CHIP_POWER9)
 
-#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v1.0")
+#define TYPE_PNV_CHIP_POWER10 PNV_CHIP_TYPE_NAME("power10_v2.0")
 DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10,
                          TYPE_PNV_CHIP_POWER10)
 
-/*
- * This generates a HW chip id depending on an index, as found on a
- * two socket system with dual chip modules :
- *
- *    0x0, 0x1, 0x10, 0x11
- *
- * 4 chips should be the maximum
- *
- * TODO: use a machine property to define the chip ids
- */
-#define PNV_CHIP_HWID(i) ((((i) & 0x3e) << 3) | ((i) & 0x1))
-
-/*
- * Converts back a HW chip id to an index. This is useful to calculate
- * the MMIO addresses of some controllers which depend on the chip id.
- */
-#define PNV_CHIP_INDEX(chip)                                    \
-    (((chip)->chip_id >> 2) * 2 + ((chip)->chip_id & 0x3))
-
 PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir);
 
 #define TYPE_PNV_MACHINE       MACHINE_TYPE_NAME("powernv")
@@ -256,11 +237,11 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
 #define PNV_OCC_COMMON_AREA_SIZE    0x0000000000800000ull
 #define PNV_OCC_COMMON_AREA_BASE    0x7fff800000ull
 #define PNV_OCC_SENSOR_BASE(chip)   (PNV_OCC_COMMON_AREA_BASE + \
-    PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip)))
+    PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
 
 #define PNV_HOMER_SIZE              0x0000000000400000ull
 #define PNV_HOMER_BASE(chip)                                            \
-    (0x7ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV_HOMER_SIZE)
+    (0x7ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV_HOMER_SIZE)
 
 
 /*
@@ -279,16 +260,16 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
  */
 #define PNV_ICP_SIZE         0x0000000000100000ull
 #define PNV_ICP_BASE(chip)                                              \
-    (0x0003ffff80000000ull + (uint64_t) PNV_CHIP_INDEX(chip) * PNV_ICP_SIZE)
+    (0x0003ffff80000000ull + (uint64_t) (chip)->chip_id * PNV_ICP_SIZE)
 
 
 #define PNV_PSIHB_SIZE       0x0000000000100000ull
 #define PNV_PSIHB_BASE(chip) \
-    (0x0003fffe80000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * PNV_PSIHB_SIZE)
+    (0x0003fffe80000000ull + (uint64_t)(chip)->chip_id * PNV_PSIHB_SIZE)
 
 #define PNV_PSIHB_FSP_SIZE   0x0000000100000000ull
 #define PNV_PSIHB_FSP_BASE(chip) \
-    (0x0003ffe000000000ull + (uint64_t)PNV_CHIP_INDEX(chip) * \
+    (0x0003ffe000000000ull + (uint64_t)(chip)->chip_id * \
      PNV_PSIHB_FSP_SIZE)
 
 /*
@@ -324,11 +305,11 @@ void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor);
 #define PNV9_OCC_COMMON_AREA_SIZE    0x0000000000800000ull
 #define PNV9_OCC_COMMON_AREA_BASE    0x203fff800000ull
 #define PNV9_OCC_SENSOR_BASE(chip)   (PNV9_OCC_COMMON_AREA_BASE +       \
-    PNV_OCC_SENSOR_DATA_BLOCK_BASE(PNV_CHIP_INDEX(chip)))
+    PNV_OCC_SENSOR_DATA_BLOCK_BASE((chip)->chip_id))
 
 #define PNV9_HOMER_SIZE              0x0000000000400000ull
 #define PNV9_HOMER_BASE(chip)                                           \
-    (0x203ffd800000ull + ((uint64_t)PNV_CHIP_INDEX(chip)) * PNV9_HOMER_SIZE)
+    (0x203ffd800000ull + ((uint64_t)(chip)->chip_id) * PNV9_HOMER_SIZE)
 
 /*
  * POWER10 MMIO base addresses - 16TB stride per chip
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 6ecee98a76e..c22eab2e1f6 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -67,7 +67,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvQuad, PNV_QUAD)
 struct PnvQuad {
     DeviceState parent_obj;
 
-    uint32_t id;
+    uint32_t quad_id;
     MemoryRegion xscom_regs;
 };
 #endif /* PPC_PNV_CORE_H */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bf7cab7a2ce..ee7504b9760 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -12,6 +12,7 @@
 #include "hw/ppc/spapr_xive.h"  /* For SpaprXive */
 #include "hw/ppc/xics.h"        /* For ICSState */
 #include "hw/ppc/spapr_tpm_proxy.h"
+#include "hw/ppc/vof.h"
 
 struct SpaprVioBus;
 struct SpaprPhbState;
@@ -74,8 +75,10 @@ typedef enum {
 #define SPAPR_CAP_CCF_ASSIST            0x09
 /* Implements PAPR FWNMI option */
 #define SPAPR_CAP_FWNMI                 0x0A
+/* Support H_RPT_INVALIDATE */
+#define SPAPR_CAP_RPT_INVALIDATE        0x0B
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_RPT_INVALIDATE + 1)
 
 /*
  * Capability Values
@@ -95,25 +98,32 @@ typedef enum {
 #define SPAPR_CAP_FIXED_CCD             0x03
 #define SPAPR_CAP_FIXED_NA              0x10 /* Lets leave a bit of a gap... */
 
-#define FDT_MAX_SIZE                    0x100000
+#define FDT_MAX_SIZE                    0x200000
+
+/* Max number of GPUs per system */
+#define NVGPU_MAX_NUM              6
+
+/* Max number of NUMA nodes */
+#define NUMA_NODES_MAX_NUM         (MAX_NODES + NVGPU_MAX_NUM)
 
 /*
- * NUMA related macros. MAX_DISTANCE_REF_POINTS was taken
- * from Linux kernel arch/powerpc/mm/numa.h. It represents the
- * amount of associativity domains for non-CPU resources.
+ * NUMA FORM1 macros. FORM1_DIST_REF_POINTS was taken from
+ * MAX_DISTANCE_REF_POINTS in arch/powerpc/mm/numa.h from Linux
+ * kernel source. It represents the amount of associativity domains
+ * for non-CPU resources.
  *
- * NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
+ * FORM1_NUMA_ASSOC_SIZE is the base array size of an ibm,associativity
  * array for any non-CPU resource.
- *
- * VCPU_ASSOC_SIZE represents the size of ibm,associativity array
- * for CPUs, which has an extra element (vcpu_id) in the end.
  */
-#define MAX_DISTANCE_REF_POINTS    4
-#define NUMA_ASSOC_SIZE            (MAX_DISTANCE_REF_POINTS + 1)
-#define VCPU_ASSOC_SIZE            (NUMA_ASSOC_SIZE + 1)
+#define FORM1_DIST_REF_POINTS            4
+#define FORM1_NUMA_ASSOC_SIZE            (FORM1_DIST_REF_POINTS + 1)
 
-/* Max number of these GPUsper a physical box */
-#define NVGPU_MAX_NUM                6
+/*
+ * FORM2 NUMA affinity has a single associativity domain, giving
+ * us a assoc size of 2.
+ */
+#define FORM2_DIST_REF_POINTS            1
+#define FORM2_NUMA_ASSOC_SIZE            (FORM2_DIST_REF_POINTS + 1)
 
 typedef struct SpaprCapabilities SpaprCapabilities;
 struct SpaprCapabilities {
@@ -142,6 +152,7 @@ struct SpaprMachineClass {
     hwaddr rma_limit;          /* clamp the RMA to this size */
     bool pre_5_1_assoc_refpoints;
     bool pre_5_2_numa_associativity;
+    bool pre_6_2_numa_affinity;
 
     bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
                           uint64_t *buid, hwaddr *pio,
@@ -180,6 +191,7 @@ struct SpaprMachineState {
     uint64_t kernel_addr;
     uint32_t initrd_base;
     long initrd_size;
+    Vof *vof;
     uint64_t rtc_offset; /* Now used only during incoming migration */
     struct PPCTimebase tb;
     bool has_graphics;
@@ -223,6 +235,9 @@ struct SpaprMachineState {
     int fwnmi_machine_check_interlock;
     QemuCond fwnmi_machine_check_interlock_cond;
 
+    /* Set by -boot */
+    char *boot_device;
+
     /*< public >*/
     char *kvm_type;
     char *host_model;
@@ -242,7 +257,8 @@ struct SpaprMachineState {
     unsigned gpu_numa_id;
     SpaprTpmProxy *tpm_proxy;
 
-    uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE];
+    uint32_t FORM1_assoc_array[NUMA_NODES_MAX_NUM][FORM1_NUMA_ASSOC_SIZE];
+    uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
 
     Error *fwnmi_migration_blocker;
 };
@@ -363,7 +379,7 @@ struct SpaprMachineState {
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR           1
-#define H_SET_MODE_RESOURCE_SET_DAWR            2
+#define H_SET_MODE_RESOURCE_SET_DAWR0           2
 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE     3
 #define H_SET_MODE_RESOURCE_LE                  4
 
@@ -395,10 +411,13 @@ struct SpaprMachineState {
 #define H_CPU_CHAR_THR_RECONF_TRIG              PPC_BIT(6)
 #define H_CPU_CHAR_CACHE_COUNT_DIS              PPC_BIT(7)
 #define H_CPU_CHAR_BCCTR_FLUSH_ASSIST           PPC_BIT(9)
+
 #define H_CPU_BEHAV_FAVOUR_SECURITY             PPC_BIT(0)
 #define H_CPU_BEHAV_L1D_FLUSH_PR                PPC_BIT(1)
 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR           PPC_BIT(2)
 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE           PPC_BIT(5)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY          PPC_BIT(7)
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS        PPC_BIT(8)
 
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT     4096
@@ -538,8 +557,10 @@ struct SpaprMachineState {
 #define H_SCM_BIND_MEM          0x3EC
 #define H_SCM_UNBIND_MEM        0x3F0
 #define H_SCM_UNBIND_ALL        0x3FC
+#define H_SCM_HEALTH            0x400
+#define H_RPT_INVALIDATE        0x448
 
-#define MAX_HCALL_OPCODE        H_SCM_UNBIND_ALL
+#define MAX_HCALL_OPCODE        H_RPT_INVALIDATE
 
 /* The hcalls above are standardized in PAPR and implemented by pHyp
  * as well.
@@ -554,7 +575,9 @@ struct SpaprMachineState {
 /* Client Architecture support */
 #define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
 #define KVMPPC_H_UPDATE_DT      (KVMPPC_HCALL_BASE + 0x3)
-#define KVMPPC_HCALL_MAX        KVMPPC_H_UPDATE_DT
+/* 0x4 was used for KVMPPC_H_UPDATE_PHANDLE in SLOF */
+#define KVMPPC_H_VOF_CLIENT     (KVMPPC_HCALL_BASE + 0x5)
+#define KVMPPC_HCALL_MAX        KVMPPC_H_VOF_CLIENT
 
 /*
  * The hcall range 0xEF00 to 0xEF80 is reserved for use in facilitating
@@ -581,6 +604,12 @@ typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, SpaprMachineState *sm,
 void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
                              target_ulong *args);
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                         target_ulong shift);
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                        target_ulong flags, target_ulong shift);
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr);
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr);
 
 /* Virtual Processor Area structure constants */
 #define VPA_MIN_SIZE           640
@@ -760,7 +789,7 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
 #define SPAPR_IS_PCI_LIOBN(liobn)   (!!((liobn) & 0x80000000))
 #define SPAPR_PCI_DMA_WINDOW_NUM(liobn) ((liobn) & 0xff)
 
-#define RTAS_SIZE               2048
+#define RTAS_MIN_SIZE           20 /* hv_rtas_size in SLOF */
 #define RTAS_ERROR_LOG_MAX      2048
 
 /* Offset from rtas-base where error log is placed */
@@ -820,6 +849,7 @@ void spapr_dt_events(SpaprMachineState *sm, void *fdt);
 void close_htab_fd(SpaprMachineState *spapr);
 void spapr_setup_hpt(SpaprMachineState *spapr);
 void spapr_free_hpt(SpaprMachineState *spapr);
+void spapr_check_mmu_mode(bool guest_radix);
 SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
 void spapr_tce_table_enable(SpaprTceTable *tcet,
                             uint32_t page_shift, uint64_t bus_offset,
@@ -921,6 +951,7 @@ extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
 extern const VMStateDescription vmstate_spapr_cap_fwnmi;
+extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
@@ -945,4 +976,16 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
 void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
 hwaddr spapr_get_rtas_addr(void);
 bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp);
+void spapr_vof_quiesce(MachineState *ms);
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+                       void *val, int vallen);
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args);
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+                                                   CPUState *cs,
+                                                   target_ulong ovec_addr);
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
+
 #endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h
index 6f9f02d3de2..7cb33674006 100644
--- a/include/hw/ppc/spapr_numa.h
+++ b/include/hw/ppc/spapr_numa.h
@@ -24,6 +24,7 @@
  */
 void spapr_numa_associativity_init(SpaprMachineState *spapr,
                                    MachineState *machine);
+void spapr_numa_associativity_check(SpaprMachineState *spapr);
 void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
                                        int offset, int nodeid);
diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h
index 73be250e2ac..764f999f547 100644
--- a/include/hw/ppc/spapr_nvdimm.h
+++ b/include/hw/ppc/spapr_nvdimm.h
@@ -11,19 +11,9 @@
 #define HW_SPAPR_NVDIMM_H
 
 #include "hw/mem/nvdimm.h"
-#include "hw/ppc/spapr.h"
 
-/*
- * The nvdimm size should be aligned to SCM block size.
- * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
- * inorder to have SCM regions not to overlap with dimm memory regions.
- * The SCM devices can have variable block sizes. For now, fixing the
- * block size to the minimum value.
- */
-#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
-
-/* Have an explicit check for alignment */
-QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
+typedef struct SpaprDrc SpaprDrc;
+typedef struct SpaprMachineState SpaprMachineState;
 
 int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
                            void *fdt, int *fdt_start_offset, Error **errp);
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
index 48b716a060c..c3e8b98e7e2 100644
--- a/include/hw/ppc/spapr_ovec.h
+++ b/include/hw/ppc/spapr_ovec.h
@@ -49,6 +49,7 @@ typedef struct SpaprOptionVector SpaprOptionVector;
 /* option vector 5 */
 #define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
 #define OV5_FORM1_AFFINITY      OV_BIT(5, 0)
+#define OV5_FORM2_AFFINITY      OV_BIT(5, 2)
 #define OV5_HP_EVT              OV_BIT(6, 5)
 #define OV5_HPT_RESIZE          OV_BIT(6, 7)
 #define OV5_DRMEM_V2            OV_BIT(22, 0)
diff --git a/include/hw/ppc/vof.h b/include/hw/ppc/vof.h
new file mode 100644
index 00000000000..97fdef758bf
--- /dev/null
+++ b/include/hw/ppc/vof.h
@@ -0,0 +1,60 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_VOF_H
+#define HW_VOF_H
+
+typedef struct Vof {
+    uint64_t top_addr; /* copied from rma_size */
+    GArray *claimed; /* array of SpaprOfClaimed */
+    uint64_t claimed_base;
+    GHashTable *of_instances; /* ihandle -> SpaprOfInstance */
+    uint32_t of_instance_last;
+    char *bootargs;
+    long fw_size;
+} Vof;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+                    target_ulong args_real);
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, uint64_t align);
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp);
+void vof_cleanup(Vof *vof);
+void vof_build_dt(void *fdt, Vof *vof);
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+                               const char *prop, const char *path);
+
+#define TYPE_VOF_MACHINE_IF "vof-machine-if"
+
+typedef struct VofMachineIfClass VofMachineIfClass;
+DECLARE_CLASS_CHECKERS(VofMachineIfClass, VOF_MACHINE, TYPE_VOF_MACHINE_IF)
+
+struct VofMachineIfClass {
+    InterfaceClass parent;
+    target_ulong (*client_architecture_support)(MachineState *ms, CPUState *cs,
+                                                target_ulong vec);
+    void (*quiesce)(MachineState *ms);
+    bool (*setprop)(MachineState *ms, const char *path, const char *propname,
+                    void *val, int vallen);
+};
+
+/*
+ * Initial stack size is from
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#REF27292
+ *
+ * "Client programs shall be invoked with a valid stack pointer (r1) with
+ * at least 32K bytes of memory available for stack growth".
+ */
+#define VOF_STACK_SIZE       0x8000
+
+#define VOF_MEM_READ(pa, buf, size) \
+    address_space_read(&address_space_memory, \
+    (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+#define VOF_MEM_WRITE(pa, buf, size) \
+    address_space_write(&address_space_memory, \
+    (pa), MEMTXATTRS_UNSPECIFIED, (buf), (size))
+
+#define PROM_ERROR          (~0U)
+
+#endif /* HW_VOF_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 445eccfe6b7..b8ab0bf7490 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -261,6 +261,10 @@ static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
 #define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
 #define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
 
+bool xive_esb_trigger(uint8_t *pq);
+bool xive_esb_eoi(uint8_t *pq);
+uint8_t xive_esb_set(uint8_t *pq, uint8_t value);
+
 /*
  * "magic" Event State Buffer (ESB) MMIO offsets.
  *
@@ -282,6 +286,30 @@ static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
 uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
 uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
 
+/*
+ * Source status helpers
+ */
+static inline void xive_source_set_status(XiveSource *xsrc, uint32_t srcno,
+                                          uint8_t status, bool enable)
+{
+    if (enable) {
+        xsrc->status[srcno] |= status;
+    } else {
+        xsrc->status[srcno] &= ~status;
+    }
+}
+
+static inline void xive_source_set_asserted(XiveSource *xsrc, uint32_t srcno,
+                                            bool enable)
+{
+    xive_source_set_status(xsrc, srcno, XIVE_STATUS_ASSERTED, enable);
+}
+
+static inline bool xive_source_is_asserted(XiveSource *xsrc, uint32_t srcno)
+{
+    return xsrc->status[srcno] & XIVE_STATUS_ASSERTED;
+}
+
 void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
                                 Monitor *mon);
 
@@ -331,6 +359,11 @@ struct XiveTCTX {
     XivePresenter *xptr;
 };
 
+static inline uint32_t xive_tctx_word2(uint8_t *ring)
+{
+    return *((uint32_t *) &ring[TM_WORD2]);
+}
+
 /*
  * XIVE Router
  */
@@ -404,6 +437,10 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
                               uint8_t format,
                               uint8_t nvt_blk, uint32_t nvt_idx,
                               bool cam_ignore, uint32_t logic_serv);
+bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
+                           uint8_t nvt_blk, uint32_t nvt_idx,
+                           bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv);
 
 /*
  * XIVE Fabric (Interface between Interrupt Controller and Machine)
@@ -450,6 +487,17 @@ struct XiveENDSource {
  */
 #define XIVE_PRIORITY_MAX  7
 
+/*
+ * Convert a priority number to an Interrupt Pending Buffer (IPB)
+ * register, which indicates a pending interrupt at the priority
+ * corresponding to the bit number
+ */
+static inline uint8_t xive_priority_to_ipb(uint8_t priority)
+{
+    return priority > XIVE_PRIORITY_MAX ?
+        0 : 1 << (XIVE_PRIORITY_MAX - priority);
+}
+
 /*
  * XIVE Thread Interrupt Management Aera (TIMA)
  *
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bafc311bfa1..20d3066595e 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -26,6 +26,7 @@ typedef enum DeviceCategory {
     DEVICE_CATEGORY_SOUND,
     DEVICE_CATEGORY_MISC,
     DEVICE_CATEGORY_CPU,
+    DEVICE_CATEGORY_WATCHDOG,
     DEVICE_CATEGORY_MAX
 } DeviceCategory;
 
@@ -176,11 +177,12 @@ struct DeviceState {
     Object parent_obj;
     /*< public >*/
 
-    const char *id;
+    char *id;
     char *canonical_path;
     bool realized;
     bool pending_deleted_event;
-    QemuOpts *opts;
+    int64_t pending_deleted_expires_ms;
+    QDict *opts;
     int hotplugged;
     bool allow_unplug_during_migration;
     BusState *parent_bus;
@@ -201,8 +203,12 @@ struct DeviceListener {
      * informs qdev if a device should be visible or hidden.  We can
      * hide a failover device depending for example on the device
      * opts.
+     *
+     * On errors, it returns false and errp is set. Device creation
+     * should fail in this case.
      */
-    bool (*hide_device)(DeviceListener *listener, QemuOpts *device_opts);
+    bool (*hide_device)(DeviceListener *listener, const QDict *device_opts,
+                        bool from_json, Error **errp);
     QTAILQ_ENTRY(DeviceListener) link;
 };
 
@@ -264,6 +270,7 @@ struct BusState {
     HotplugHandler *hotplug_handler;
     int max_index;
     bool realized;
+    bool full;
     int num_children;
 
     /*
@@ -597,6 +604,10 @@ void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n);
  *
  * See qdev_connect_gpio_out() for how code that uses such a device
  * can connect to one of its output GPIO lines.
+ *
+ * There is no need to release the @pins allocated array because it
+ * will be automatically released when @dev calls its instance_finalize()
+ * handler.
  */
 void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n);
 /**
@@ -673,9 +684,9 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id);
 typedef int (qbus_walkerfn)(BusState *bus, void *opaque);
 typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque);
 
-void qbus_create_inplace(void *bus, size_t size, const char *typename,
-                         DeviceState *parent, const char *name);
-BusState *qbus_create(const char *typename, DeviceState *parent, const char *name);
+void qbus_init(void *bus, size_t size, const char *typename,
+               DeviceState *parent, const char *name);
+BusState *qbus_new(const char *typename, DeviceState *parent, const char *name);
 bool qbus_realize(BusState *bus, Error **errp);
 void qbus_unrealize(BusState *bus);
 
@@ -798,18 +809,43 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
    return bus->hotplug_handler;
 }
 
+/**
+ * qbus_mark_full: Mark this bus as full, so no more devices can be attached
+ * @bus: Bus to mark as full
+ *
+ * By default, QEMU will allow devices to be plugged into a bus up
+ * to the bus class's device count limit. Calling this function
+ * marks a particular bus as full, so that no more devices can be
+ * plugged into it. In particular this means that the bus will not
+ * be considered as a candidate for plugging in devices created by
+ * the user on the commandline or via the monitor.
+ * If a machine has multiple buses of a given type, such as I2C,
+ * where some of those buses in the real hardware are used only for
+ * internal devices and some are exposed via expansion ports, you
+ * can use this function to mark the internal-only buses as full
+ * after you have created all their internal devices. Then user
+ * created devices will appear on the expansion-port bus where
+ * guest software expects them.
+ */
+static inline void qbus_mark_full(BusState *bus)
+{
+    bus->full = true;
+}
+
 void device_listener_register(DeviceListener *listener);
 void device_listener_unregister(DeviceListener *listener);
 
 /**
  * @qdev_should_hide_device:
- * @opts: QemuOpts as passed on cmdline.
+ * @opts: options QDict
+ * @from_json: true if @opts entries are typed, false for all strings
+ * @errp: pointer to error object
  *
  * Check if a device should be added.
  * When a device is added via qdev_device_add() this will be called,
  * and return if the device should be added now or not.
  */
-bool qdev_should_hide_device(QemuOpts *opts);
+bool qdev_should_hide_device(const QDict *opts, bool from_json, Error **errp);
 
 typedef enum MachineInitPhase {
     /* current_machine is NULL.  */
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 0ef97d60ce4..f7925f67d03 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -32,6 +32,7 @@ struct PropertyInfo {
     const char *name;
     const char *description;
     const QEnumLookup *enum_table;
+    bool realized_set_allowed; /* allow setting property on realized device */
     int (*print)(Object *obj, Property *prop, char *dest, size_t len);
     void (*set_default_value)(ObjectProperty *op, const Property *prop);
     ObjectProperty *(*create)(ObjectClass *oc, const char *name,
diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h
index e77e43a1709..80b2e531c47 100644
--- a/include/hw/rdma/rdma.h
+++ b/include/hw/rdma/rdma.h
@@ -31,7 +31,7 @@ typedef struct RdmaProvider RdmaProvider;
 struct RdmaProviderClass {
     InterfaceClass parent;
 
-    void (*print_statistics)(Monitor *mon, RdmaProvider *obj);
+    void (*format_statistics)(RdmaProvider *obj, GString *buf);
 };
 
 #endif
diff --git a/include/hw/register.h b/include/hw/register.h
index b480e3882cd..6a076cfcdf0 100644
--- a/include/hw/register.h
+++ b/include/hw/register.h
@@ -204,6 +204,14 @@ RegisterInfoArray *register_init_block32(DeviceState *owner,
                                          bool debug_enabled,
                                          uint64_t memory_size);
 
+RegisterInfoArray *register_init_block64(DeviceState *owner,
+                                         const RegisterAccessInfo *rae,
+                                         int num, RegisterInfo *ri,
+                                         uint64_t *data,
+                                         const MemoryRegionOps *ops,
+                                         bool debug_enabled,
+                                         uint64_t memory_size);
+
 /**
  * This function should be called to cleanup the registers that were initialized
  * when calling register_init_block32(). This function should only be called
diff --git a/include/hw/registerfields.h b/include/hw/registerfields.h
index 93fa4a84c22..f2a3c9c41f7 100644
--- a/include/hw/registerfields.h
+++ b/include/hw/registerfields.h
@@ -30,6 +30,10 @@
     enum { A_ ## reg = (addr) };                                          \
     enum { R_ ## reg = (addr) / 2 };
 
+#define REG64(reg, addr)                                                  \
+    enum { A_ ## reg = (addr) };                                          \
+    enum { R_ ## reg = (addr) / 8 };
+
 /* Define SHIFT, LENGTH and MASK constants for a field within a register */
 
 /* This macro will define R_FOO_BAR_MASK, R_FOO_BAR_SHIFT and R_FOO_BAR_LENGTH
@@ -58,6 +62,8 @@
 /* Extract a field from an array of registers */
 #define ARRAY_FIELD_EX32(regs, reg, field)                                \
     FIELD_EX32((regs)[R_ ## reg], reg, field)
+#define ARRAY_FIELD_EX64(regs, reg, field)                                \
+    FIELD_EX64((regs)[R_ ## reg], reg, field)
 
 /* Deposit a register field.
  * Assigning values larger then the target field will result in
@@ -89,7 +95,7 @@
     _d; })
 #define FIELD_DP64(storage, reg, field, val) ({                           \
     struct {                                                              \
-        unsigned int v:R_ ## reg ## _ ## field ## _LENGTH;                \
+        uint64_t v:R_ ## reg ## _ ## field ## _LENGTH;                \
     } _v = { .v = val };                                                  \
     uint64_t _d;                                                          \
     _d = deposit64((storage), R_ ## reg ## _ ## field ## _SHIFT,          \
@@ -99,5 +105,7 @@
 /* Deposit a field to array of registers.  */
 #define ARRAY_FIELD_DP32(regs, reg, field, val)                           \
     (regs)[R_ ## reg] = FIELD_DP32((regs)[R_ ## reg], reg, field, val);
+#define ARRAY_FIELD_DP64(regs, reg, field, val)                           \
+    (regs)[R_ ## reg] = FIELD_DP64((regs)[R_ ## reg], reg, field, val);
 
 #endif
diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
index 182322d7ebf..81f5445b21f 100644
--- a/include/hw/riscv/boot.h
+++ b/include/hw/riscv/boot.h
@@ -24,8 +24,15 @@
 #include "hw/loader.h"
 #include "hw/riscv/riscv_hart.h"
 
+#define RISCV32_BIOS_BIN    "opensbi-riscv32-generic-fw_dynamic.bin"
+#define RISCV32_BIOS_ELF    "opensbi-riscv32-generic-fw_dynamic.elf"
+#define RISCV64_BIOS_BIN    "opensbi-riscv64-generic-fw_dynamic.bin"
+#define RISCV64_BIOS_ELF    "opensbi-riscv64-generic-fw_dynamic.elf"
+
 bool riscv_is_32bit(RISCVHartArrayState *harts);
 
+char *riscv_plic_hart_config_string(int hart_count);
+
 target_ulong riscv_calc_kernel_start_addr(RISCVHartArrayState *harts,
                                           target_ulong firmware_end_addr);
 QEMU_WARN_UNUSED_RESULT
diff --git a/include/hw/riscv/microchip_pfsoc.h b/include/hw/riscv/microchip_pfsoc.h
index d30916f45d4..a0673f5f59c 100644
--- a/include/hw/riscv/microchip_pfsoc.h
+++ b/include/hw/riscv/microchip_pfsoc.h
@@ -138,7 +138,6 @@ enum {
 #define MICROCHIP_PFSOC_MANAGEMENT_CPU_COUNT    1
 #define MICROCHIP_PFSOC_COMPUTE_CPU_COUNT       4
 
-#define MICROCHIP_PFSOC_PLIC_HART_CONFIG        "MS"
 #define MICROCHIP_PFSOC_PLIC_NUM_SOURCES        185
 #define MICROCHIP_PFSOC_PLIC_NUM_PRIORITIES     7
 #define MICROCHIP_PFSOC_PLIC_PRIORITY_BASE      0x04
diff --git a/include/hw/riscv/opentitan.h b/include/hw/riscv/opentitan.h
index a5ea3a5e4e5..eac35ef5905 100644
--- a/include/hw/riscv/opentitan.h
+++ b/include/hw/riscv/opentitan.h
@@ -20,8 +20,9 @@
 #define HW_OPENTITAN_H
 
 #include "hw/riscv/riscv_hart.h"
-#include "hw/intc/ibex_plic.h"
+#include "hw/intc/sifive_plic.h"
 #include "hw/char/ibex_uart.h"
+#include "hw/timer/ibex_timer.h"
 #include "qom/object.h"
 
 #define TYPE_RISCV_IBEX_SOC "riscv.lowrisc.ibex.soc"
@@ -33,11 +34,13 @@ struct LowRISCIbexSoCState {
 
     /*< public >*/
     RISCVHartArrayState cpus;
-    IbexPlicState plic;
+    SiFivePLICState plic;
     IbexUartState uart;
+    IbexTimerState timer;
 
     MemoryRegion flash_mem;
     MemoryRegion rom;
+    MemoryRegion flash_alias;
 };
 
 typedef struct OpenTitanState {
@@ -52,12 +55,13 @@ enum {
     IBEX_DEV_ROM,
     IBEX_DEV_RAM,
     IBEX_DEV_FLASH,
+    IBEX_DEV_FLASH_VIRTUAL,
     IBEX_DEV_UART,
     IBEX_DEV_GPIO,
     IBEX_DEV_SPI,
     IBEX_DEV_I2C,
     IBEX_DEV_PATTGEN,
-    IBEX_DEV_RV_TIMER,
+    IBEX_DEV_TIMER,
     IBEX_DEV_SENSOR_CTRL,
     IBEX_DEV_OTP_CTRL,
     IBEX_DEV_PWRMGR,
@@ -79,17 +83,19 @@ enum {
     IBEX_DEV_ALERT_HANDLER,
     IBEX_DEV_NMI_GEN,
     IBEX_DEV_OTBN,
+    IBEX_DEV_PERI,
 };
 
 enum {
-    IBEX_UART_RX_PARITY_ERR_IRQ = 0x28,
-    IBEX_UART_RX_TIMEOUT_IRQ = 0x27,
-    IBEX_UART_RX_BREAK_ERR_IRQ = 0x26,
-    IBEX_UART_RX_FRAME_ERR_IRQ = 0x25,
-    IBEX_UART_RX_OVERFLOW_IRQ = 0x24,
-    IBEX_UART_TX_EMPTY_IRQ = 0x23,
-    IBEX_UART_RX_WATERMARK_IRQ = 0x22,
-    IBEX_UART_TX_WATERMARK_IRQ = 0x21,
+    IBEX_TIMER_TIMEREXPIRED0_0 = 126,
+    IBEX_UART0_RX_PARITY_ERR_IRQ = 8,
+    IBEX_UART0_RX_TIMEOUT_IRQ = 7,
+    IBEX_UART0_RX_BREAK_ERR_IRQ = 6,
+    IBEX_UART0_RX_FRAME_ERR_IRQ = 5,
+    IBEX_UART0_RX_OVERFLOW_IRQ = 4,
+    IBEX_UART0_TX_EMPTY_IRQ = 3,
+    IBEX_UART0_RX_WATERMARK_IRQ = 2,
+    IBEX_UART0_TX_WATERMARK_IRQ = 1,
 };
 
 #endif
diff --git a/include/hw/riscv/shakti_c.h b/include/hw/riscv/shakti_c.h
new file mode 100644
index 00000000000..50a2b790860
--- /dev/null
+++ b/include/hw/riscv/shakti_c.h
@@ -0,0 +1,75 @@
+/*
+ * Shakti C-class SoC emulation
+ *
+ * Copyright (c) 2021 Vijai Kumar K <vijai@behindbytes.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_SHAKTI_H
+#define HW_SHAKTI_H
+
+#include "hw/riscv/riscv_hart.h"
+#include "hw/boards.h"
+#include "hw/char/shakti_uart.h"
+
+#define TYPE_RISCV_SHAKTI_SOC "riscv.shakti.cclass.soc"
+#define RISCV_SHAKTI_SOC(obj) \
+    OBJECT_CHECK(ShaktiCSoCState, (obj), TYPE_RISCV_SHAKTI_SOC)
+
+typedef struct ShaktiCSoCState {
+    /*< private >*/
+    DeviceState parent_obj;
+
+    /*< public >*/
+    RISCVHartArrayState cpus;
+    DeviceState *plic;
+    ShaktiUartState uart;
+    MemoryRegion rom;
+
+} ShaktiCSoCState;
+
+#define TYPE_RISCV_SHAKTI_MACHINE MACHINE_TYPE_NAME("shakti_c")
+#define RISCV_SHAKTI_MACHINE(obj) \
+    OBJECT_CHECK(ShaktiCMachineState, (obj), TYPE_RISCV_SHAKTI_MACHINE)
+typedef struct ShaktiCMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /*< public >*/
+    ShaktiCSoCState soc;
+} ShaktiCMachineState;
+
+enum {
+    SHAKTI_C_ROM,
+    SHAKTI_C_RAM,
+    SHAKTI_C_UART,
+    SHAKTI_C_GPIO,
+    SHAKTI_C_PLIC,
+    SHAKTI_C_CLINT,
+    SHAKTI_C_I2C,
+};
+
+#define SHAKTI_C_PLIC_HART_CONFIG "MS"
+/* Including Interrupt ID 0 (no interrupt)*/
+#define SHAKTI_C_PLIC_NUM_SOURCES 28
+/* Excluding Priority 0 */
+#define SHAKTI_C_PLIC_NUM_PRIORITIES 2
+#define SHAKTI_C_PLIC_PRIORITY_BASE 0x04
+#define SHAKTI_C_PLIC_PENDING_BASE 0x1000
+#define SHAKTI_C_PLIC_ENABLE_BASE 0x2000
+#define SHAKTI_C_PLIC_ENABLE_STRIDE 0x80
+#define SHAKTI_C_PLIC_CONTEXT_BASE 0x200000
+#define SHAKTI_C_PLIC_CONTEXT_STRIDE 0x1000
+
+#endif
diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h
index 2656b39808a..8f63a183c47 100644
--- a/include/hw/riscv/sifive_u.h
+++ b/include/hw/riscv/sifive_u.h
@@ -27,6 +27,7 @@
 #include "hw/misc/sifive_u_otp.h"
 #include "hw/misc/sifive_u_prci.h"
 #include "hw/ssi/sifive_spi.h"
+#include "hw/timer/sifive_pwm.h"
 
 #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc"
 #define RISCV_U_SOC(obj) \
@@ -49,6 +50,7 @@ typedef struct SiFiveUSoCState {
     SiFiveSPIState spi0;
     SiFiveSPIState spi2;
     CadenceGEMState gem;
+    SiFivePwmState pwm[2];
 
     uint32_t serial;
     char *cpu_type;
@@ -92,7 +94,9 @@ enum {
     SIFIVE_U_DEV_FLASH0,
     SIFIVE_U_DEV_DRAM,
     SIFIVE_U_DEV_GEM,
-    SIFIVE_U_DEV_GEM_MGMT
+    SIFIVE_U_DEV_GEM_MGMT,
+    SIFIVE_U_DEV_PWM0,
+    SIFIVE_U_DEV_PWM1
 };
 
 enum {
@@ -126,6 +130,14 @@ enum {
     SIFIVE_U_PDMA_IRQ5 = 28,
     SIFIVE_U_PDMA_IRQ6 = 29,
     SIFIVE_U_PDMA_IRQ7 = 30,
+    SIFIVE_U_PWM0_IRQ0 = 42,
+    SIFIVE_U_PWM0_IRQ1 = 43,
+    SIFIVE_U_PWM0_IRQ2 = 44,
+    SIFIVE_U_PWM0_IRQ3 = 45,
+    SIFIVE_U_PWM1_IRQ0 = 46,
+    SIFIVE_U_PWM1_IRQ1 = 47,
+    SIFIVE_U_PWM1_IRQ2 = 48,
+    SIFIVE_U_PWM1_IRQ3 = 49,
     SIFIVE_U_QSPI0_IRQ = 51,
     SIFIVE_U_GEM_IRQ = 53
 };
@@ -144,7 +156,6 @@ enum {
 #define SIFIVE_U_MANAGEMENT_CPU_COUNT   1
 #define SIFIVE_U_COMPUTE_CPU_COUNT      4
 
-#define SIFIVE_U_PLIC_HART_CONFIG "MS"
 #define SIFIVE_U_PLIC_NUM_SOURCES 54
 #define SIFIVE_U_PLIC_NUM_PRIORITIES 7
 #define SIFIVE_U_PLIC_PRIORITY_BASE 0x04
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 349fee1f897..b8ef99f3489 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -43,6 +43,7 @@ struct RISCVVirtState {
     FWCfgState *fw_cfg;
 
     int fdt_size;
+    bool have_aclint;
 };
 
 enum {
@@ -51,6 +52,7 @@ enum {
     VIRT_TEST,
     VIRT_RTC,
     VIRT_CLINT,
+    VIRT_ACLINT_SSWI,
     VIRT_PLIC,
     VIRT_UART0,
     VIRT_VIRTIO,
@@ -71,7 +73,6 @@ enum {
     VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */
 };
 
-#define VIRT_PLIC_HART_CONFIG "MS"
 #define VIRT_PLIC_NUM_SOURCES 127
 #define VIRT_PLIC_NUM_PRIORITIES 7
 #define VIRT_PLIC_PRIORITY_BASE 0x04
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index bba7593d2ea..75e53816135 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -138,13 +138,16 @@ struct SubchDev {
     int (*ccw_cb) (SubchDev *, CCW1);
     void (*disable_cb)(SubchDev *);
     IOInstEnding (*do_subchannel_work) (SubchDev *);
+    void (*irb_cb)(SubchDev *, IRB *);
     SenseId id;
     void *driver_data;
+    ESW esw;
 };
 
 static inline void sch_gen_unit_exception(SubchDev *sch)
 {
-    sch->curr_status.scsw.ctrl &= ~SCSW_ACTL_START_PEND;
+    sch->curr_status.scsw.ctrl &= ~(SCSW_ACTL_DEVICE_ACTIVE |
+                                    SCSW_ACTL_SUBCH_ACTIVE);
     sch->curr_status.scsw.ctrl |= SCSW_STCTL_PRIMARY |
                                   SCSW_STCTL_SECONDARY |
                                   SCSW_STCTL_ALERT |
@@ -201,6 +204,7 @@ int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id);
 unsigned int css_find_free_chpid(uint8_t cssid);
 uint16_t css_build_subchannel_id(SubchDev *sch);
 void copy_scsw_to_guest(SCSW *dest, const SCSW *src);
+void copy_esw_to_guest(ESW *dest, const ESW *src);
 void css_inject_io_interrupt(SubchDev *sch);
 void css_reset(void);
 void css_reset_sch(SubchDev *sch);
@@ -215,6 +219,8 @@ void css_clear_sei_pending(void);
 IOInstEnding s390_ccw_cmd_request(SubchDev *sch);
 IOInstEnding do_subchannel_work_virtual(SubchDev *sub);
 IOInstEnding do_subchannel_work_passthrough(SubchDev *sub);
+void build_irb_passthrough(SubchDev *sch, IRB *irb);
+void build_irb_virtual(SubchDev *sch, IRB *irb);
 
 int s390_ccw_halt(SubchDev *sch);
 int s390_ccw_clear(SubchDev *sch);
diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h
index c6737a30d44..3771fff9d44 100644
--- a/include/hw/s390x/ioinst.h
+++ b/include/hw/s390x/ioinst.h
@@ -123,10 +123,20 @@ typedef struct SCHIB {
     uint8_t mda[4];
 } QEMU_PACKED SCHIB;
 
+/* format-0 extended-status word */
+typedef struct ESW {
+    uint32_t word0;      /* subchannel logout for format 0 */
+    uint32_t erw;
+    uint64_t word2;     /* failing-storage address for format 0 */
+    uint32_t word4;     /* secondary-CCW address for format 0 */
+} QEMU_PACKED ESW;
+
+#define ESW_ERW_SENSE 0x01000000
+
 /* interruption response block */
 typedef struct IRB {
     SCSW scsw;
-    uint32_t esw[5];
+    ESW esw;
     uint32_t ecw[8];
     uint32_t emw[8];
 } IRB;
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 49ae9f03d31..aa891c178dd 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -81,9 +81,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(S390PCIIOMMU, S390_PCI_IOMMU)
 #define ZPCI_SDMA_ADDR 0x100000000ULL
 #define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL
 
-#define PAGE_SHIFT      12
-#define PAGE_SIZE       (1 << PAGE_SHIFT)
-#define PAGE_MASK       (~(PAGE_SIZE-1))
 #define PAGE_DEFAULT_ACC        0
 #define PAGE_DEFAULT_KEY        (PAGE_DEFAULT_ACC << 4)
 
@@ -137,7 +134,7 @@ enum ZpciIoatDtype {
 
 #define ZPCI_TABLE_BITS         11
 #define ZPCI_PT_BITS            8
-#define ZPCI_ST_SHIFT           (ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_ST_SHIFT           (ZPCI_PT_BITS + TARGET_PAGE_BITS)
 #define ZPCI_RT_SHIFT           (ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
 
 #define ZPCI_RTE_FLAG_MASK      0x3fffULL
diff --git a/include/hw/s390x/storage-keys.h b/include/hw/s390x/storage-keys.h
index 2888d42d0b4..aa2ec2aae50 100644
--- a/include/hw/s390x/storage-keys.h
+++ b/include/hw/s390x/storage-keys.h
@@ -28,9 +28,72 @@ struct S390SKeysState {
 
 struct S390SKeysClass {
     DeviceClass parent_class;
-    int (*skeys_enabled)(S390SKeysState *ks);
+
+    /**
+     * @skeys_are_enabled:
+     *
+     * Check whether storage keys are enabled. If not enabled, they were not
+     * enabled lazily either by the guest via a storage key instruction or
+     * by the host during migration.
+     *
+     * If disabled, everything not explicitly triggered by the guest,
+     * such as outgoing migration or dirty/change tracking, should not touch
+     * storage keys and should not lazily enable it.
+     *
+     * @ks: the #S390SKeysState
+     *
+     * Returns false if not enabled and true if enabled.
+     */
+    bool (*skeys_are_enabled)(S390SKeysState *ks);
+
+    /**
+     * @enable_skeys:
+     *
+     * Lazily enable storage keys. If this function is not implemented,
+     * setting a storage key will lazily enable storage keys implicitly
+     * instead. TCG guests have to make sure to flush the TLB of all CPUs
+     * if storage keys were not enabled before this call.
+     *
+     * @ks: the #S390SKeysState
+     *
+     * Returns false if not enabled before this call, and true if already
+     * enabled.
+     */
+    bool (*enable_skeys)(S390SKeysState *ks);
+
+    /**
+     * @get_skeys:
+     *
+     * Get storage keys for the given PFN range. This call will fail if
+     * storage keys have not been lazily enabled yet.
+     *
+     * Callers have to validate that a GFN is valid before this call.
+     *
+     * @ks: the #S390SKeysState
+     * @start_gfn: the start GFN to get storage keys for
+     * @count: the number of storage keys to get
+     * @keys: the byte array where storage keys will be stored to
+     *
+     * Returns 0 on success, returns an error if getting a storage key failed.
+     */
     int (*get_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count,
                      uint8_t *keys);
+    /**
+     * @set_skeys:
+     *
+     * Set storage keys for the given PFN range. This call will fail if
+     * storage keys have not been lazily enabled yet and implicit
+     * enablement is not supported.
+     *
+     * Callers have to validate that a GFN is valid before this call.
+     *
+     * @ks: the #S390SKeysState
+     * @start_gfn: the start GFN to set storage keys for
+     * @count: the number of storage keys to set
+     * @keys: the byte array where storage keys will be read from
+     *
+     * Returns 0 on success, returns an error if setting a storage key failed.
+     */
     int (*set_skeys)(S390SKeysState *ks, uint64_t start_gfn, uint64_t count,
                      uint8_t *keys);
 };
diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h
index ff3195a4bf8..0935e850891 100644
--- a/include/hw/s390x/tod.h
+++ b/include/hw/s390x/tod.h
@@ -12,7 +12,7 @@
 #define HW_S390_TOD_H
 
 #include "hw/qdev-core.h"
-#include "target/s390x/s390-tod.h"
+#include "tcg/s390-tod.h"
 #include "qom/object.h"
 
 typedef struct S390TOD {
diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h
index aada3680b75..b1ec27612f6 100644
--- a/include/hw/scsi/esp.h
+++ b/include/hw/scsi/esp.h
@@ -37,6 +37,7 @@ struct ESPState {
     SCSIRequest *current_req;
     Fifo8 cmdfifo;
     uint8_t cmdfifo_cdb_offset;
+    uint8_t lun;
     uint32_t do_cmd;
 
     bool data_in_ready;
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 0b726bc78c6..a567a5ed86b 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -146,8 +146,34 @@ struct SCSIBus {
     const SCSIBusInfo *info;
 };
 
-void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host,
-                  const SCSIBusInfo *info, const char *bus_name);
+/**
+ * scsi_bus_init_named: Initialize a SCSI bus with the specified name
+ * @bus: SCSIBus object to initialize
+ * @bus_size: size of @bus object
+ * @host: Device which owns the bus (generally the SCSI controller)
+ * @info: structure defining callbacks etc for the controller
+ * @bus_name: Name to use for this bus
+ *
+ * This in-place initializes @bus as a new SCSI bus with a name
+ * provided by the caller. It is the caller's responsibility to make
+ * sure that name does not clash with the name of any other bus in the
+ * system. Unless you need the new bus to have a specific name, you
+ * should use scsi_bus_new() instead.
+ */
+void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
+                         const SCSIBusInfo *info, const char *bus_name);
+
+/**
+ * scsi_bus_init: Initialize a SCSI bus
+ *
+ * This in-place-initializes @bus as a new SCSI bus and gives it
+ * an automatically generated unique name.
+ */
+static inline void scsi_bus_init(SCSIBus *bus, size_t bus_size,
+                                 DeviceState *host, const SCSIBusInfo *info)
+{
+    scsi_bus_init_named(bus, bus_size, host, info, NULL);
+}
 
 static inline SCSIBus *scsi_bus_from_device(SCSIDevice *d)
 {
diff --git a/include/hw/sd/npcm7xx_sdhci.h b/include/hw/sd/npcm7xx_sdhci.h
new file mode 100644
index 00000000000..d728f0a40de
--- /dev/null
+++ b/include/hw/sd/npcm7xx_sdhci.h
@@ -0,0 +1,65 @@
+/*
+ * NPCM7xx SD-3.0 / eMMC-4.51 Host Controller
+ *
+ * Copyright (c) 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef NPCM7XX_SDHCI_H
+#define NPCM7XX_SDHCI_H
+
+#include "hw/sd/sdhci.h"
+#include "qom/object.h"
+
+#define TYPE_NPCM7XX_SDHCI "npcm7xx.sdhci"
+#define NPCM7XX_PRSTVALS_SIZE 6
+#define NPCM7XX_PRSTVALS 0x60
+#define NPCM7XX_PRSTVALS_0 0x0
+#define NPCM7XX_PRSTVALS_1 0x2
+#define NPCM7XX_PRSTVALS_2 0x4
+#define NPCM7XX_PRSTVALS_3 0x6
+#define NPCM7XX_PRSTVALS_4 0x8
+#define NPCM7XX_PRSTVALS_5 0xA
+#define NPCM7XX_BOOTTOCTRL 0x10
+#define NPCM7XX_SDHCI_REGSIZE 0x20
+
+#define NPCM7XX_PRSNTS_RESET 0x04A00000
+#define NPCM7XX_BLKGAP_RESET 0x80
+#define NPCM7XX_CAPAB_RESET 0x0100200161EE0399
+#define NPCM7XX_MAXCURR_RESET 0x0000000000000005
+#define NPCM7XX_HCVER_RESET 0x1002
+
+#define NPCM7XX_PRSTVALS_0_RESET 0x0040
+#define NPCM7XX_PRSTVALS_1_RESET 0x0001
+#define NPCM7XX_PRSTVALS_3_RESET 0x0001
+
+OBJECT_DECLARE_SIMPLE_TYPE(NPCM7xxSDHCIState, NPCM7XX_SDHCI)
+
+typedef struct NPCM7xxRegs {
+    /* Preset Values Register Field, read-only */
+    uint16_t prstvals[NPCM7XX_PRSTVALS_SIZE];
+    /* Boot Timeout Control Register, read-write */
+    uint32_t boottoctrl;
+} NPCM7xxRegisters;
+
+typedef struct NPCM7xxSDHCIState {
+    SysBusDevice parent;
+
+    MemoryRegion container;
+    MemoryRegion iomem;
+    BusState *bus;
+    NPCM7xxRegisters regs;
+
+    SDHCIState sdhci;
+} NPCM7xxSDHCIState;
+
+#endif /* NPCM7XX_SDHCI_H */
diff --git a/include/hw/misc/emc141x_regs.h b/include/hw/sensor/emc141x_regs.h
similarity index 100%
rename from include/hw/misc/emc141x_regs.h
rename to include/hw/sensor/emc141x_regs.h
diff --git a/hw/misc/tmp105.h b/include/hw/sensor/tmp105.h
similarity index 97%
rename from hw/misc/tmp105.h
rename to include/hw/sensor/tmp105.h
index 7c97071ad75..244e2989feb 100644
--- a/hw/misc/tmp105.h
+++ b/include/hw/sensor/tmp105.h
@@ -15,7 +15,7 @@
 #define QEMU_TMP105_H
 
 #include "hw/i2c/i2c.h"
-#include "hw/misc/tmp105_regs.h"
+#include "hw/sensor/tmp105_regs.h"
 #include "qom/object.h"
 
 #define TYPE_TMP105 "tmp105"
diff --git a/include/hw/misc/tmp105_regs.h b/include/hw/sensor/tmp105_regs.h
similarity index 100%
rename from include/hw/misc/tmp105_regs.h
rename to include/hw/sensor/tmp105_regs.h
diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h
index becb5969790..ec716cdd458 100644
--- a/include/hw/sh4/sh.h
+++ b/include/hw/sh4/sh.h
@@ -44,25 +44,18 @@ typedef struct {
     uint16_t portbmask_trigger;
     /* Return 0 if no action was taken */
     int (*port_change_cb) (uint16_t porta, uint16_t portb,
-			   uint16_t * periph_pdtra,
-			   uint16_t * periph_portdira,
-			   uint16_t * periph_pdtrb,
-			   uint16_t * periph_portdirb);
+                           uint16_t *periph_pdtra,
+                           uint16_t *periph_portdira,
+                           uint16_t *periph_pdtrb,
+                           uint16_t *periph_portdirb);
 } sh7750_io_device;
 
 int sh7750_register_io_device(struct SH7750State *s,
-			      sh7750_io_device * device);
+                              sh7750_io_device *device);
 
 /* sh_serial.c */
+#define TYPE_SH_SERIAL "sh-serial"
 #define SH_SERIAL_FEAT_SCIF (1 << 0)
-void sh_serial_init(MemoryRegion *sysmem,
-                    hwaddr base, int feat,
-                    uint32_t freq, Chardev *chr,
-		     qemu_irq eri_source,
-		     qemu_irq rxi_source,
-		     qemu_irq txi_source,
-		     qemu_irq tei_source,
-		     qemu_irq bri_source);
 
 /* sh7750.c */
 qemu_irq sh7750_irl(struct SH7750State *s);
diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h
index 65f34250572..f62d5c5e136 100644
--- a/include/hw/sh4/sh_intc.h
+++ b/include/hw/sh4/sh_intc.h
@@ -58,7 +58,7 @@ struct intc_desc {
 };
 
 int sh_intc_get_pending_vector(struct intc_desc *desc, int imask);
-struct intc_source *sh_intc_source(struct intc_desc *desc, intc_enum id);
+
 void sh_intc_toggle_source(struct intc_source *source,
                            int enable_adj, int assert_adj);
 
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
index 16c03fe64f3..e2655558199 100644
--- a/include/hw/ssi/aspeed_smc.h
+++ b/include/hw/ssi/aspeed_smc.h
@@ -29,64 +29,33 @@
 #include "hw/sysbus.h"
 #include "qom/object.h"
 
-typedef struct AspeedSegments {
-    hwaddr addr;
-    uint32_t size;
-} AspeedSegments;
-
 struct AspeedSMCState;
-typedef struct AspeedSMCController {
-    const char *name;
-    uint8_t r_conf;
-    uint8_t r_ce_ctrl;
-    uint8_t r_ctrl0;
-    uint8_t r_timings;
-    uint8_t nregs_timings;
-    uint8_t conf_enable_w0;
-    uint8_t max_peripherals;
-    const AspeedSegments *segments;
-    hwaddr flash_window_base;
-    uint32_t flash_window_size;
-    bool has_dma;
-    hwaddr dma_flash_mask;
-    hwaddr dma_dram_mask;
-    uint32_t nregs;
-    uint32_t (*segment_to_reg)(const struct AspeedSMCState *s,
-                               const AspeedSegments *seg);
-    void (*reg_to_segment)(const struct AspeedSMCState *s, uint32_t reg,
-                           AspeedSegments *seg);
-} AspeedSMCController;
 
-typedef struct AspeedSMCFlash {
-    struct AspeedSMCState *controller;
+#define TYPE_ASPEED_SMC_FLASH "aspeed.smc.flash"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedSMCFlash, ASPEED_SMC_FLASH)
+struct AspeedSMCFlash {
+    SysBusDevice parent_obj;
 
-    uint8_t id;
-    uint32_t size;
+    struct AspeedSMCState *controller;
+    uint8_t cs;
 
     MemoryRegion mmio;
-    DeviceState *flash;
-} AspeedSMCFlash;
+};
 
 #define TYPE_ASPEED_SMC "aspeed.smc"
 OBJECT_DECLARE_TYPE(AspeedSMCState, AspeedSMCClass, ASPEED_SMC)
 
-struct AspeedSMCClass {
-    SysBusDevice parent_obj;
-    const AspeedSMCController *ctrl;
-};
-
 #define ASPEED_SMC_R_MAX        (0x100 / 4)
+#define ASPEED_SMC_CS_MAX       5
 
 struct AspeedSMCState {
     SysBusDevice parent_obj;
 
-    const AspeedSMCController *ctrl;
-
     MemoryRegion mmio;
+    MemoryRegion mmio_flash_container;
     MemoryRegion mmio_flash;
 
     qemu_irq irq;
-    int irqline;
 
     uint32_t num_cs;
     qemu_irq *cs_lines;
@@ -103,17 +72,45 @@ struct AspeedSMCState {
     uint8_t r_timings;
     uint8_t conf_enable_w0;
 
-    /* for DMA support */
-    uint64_t sdram_base;
-
     AddressSpace flash_as;
     MemoryRegion *dram_mr;
     AddressSpace dram_as;
 
-    AspeedSMCFlash *flashes;
+    AspeedSMCFlash flashes[ASPEED_SMC_CS_MAX];
 
     uint8_t snoop_index;
     uint8_t snoop_dummies;
 };
 
+typedef struct AspeedSegments {
+    hwaddr addr;
+    uint32_t size;
+} AspeedSegments;
+
+struct AspeedSMCClass {
+    SysBusDeviceClass parent_obj;
+
+    uint8_t r_conf;
+    uint8_t r_ce_ctrl;
+    uint8_t r_ctrl0;
+    uint8_t r_timings;
+    uint8_t nregs_timings;
+    uint8_t conf_enable_w0;
+    uint8_t max_peripherals;
+    const uint32_t *resets;
+    const AspeedSegments *segments;
+    hwaddr flash_window_base;
+    uint32_t flash_window_size;
+    uint32_t features;
+    hwaddr dma_flash_mask;
+    hwaddr dma_dram_mask;
+    uint32_t nregs;
+    uint32_t (*segment_to_reg)(const AspeedSMCState *s,
+                               const AspeedSegments *seg);
+    void (*reg_to_segment)(const AspeedSMCState *s, uint32_t reg,
+                           AspeedSegments *seg);
+    void (*dma_ctrl)(AspeedSMCState *s, uint32_t value);
+    int (*addr_width)(const AspeedSMCState *s);
+};
+
 #endif /* ASPEED_SMC_H */
diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h
index 84496faaf96..ee09b138810 100644
--- a/include/hw/timer/armv7m_systick.h
+++ b/include/hw/timer/armv7m_systick.h
@@ -15,11 +15,23 @@
 #include "hw/sysbus.h"
 #include "qom/object.h"
 #include "hw/ptimer.h"
+#include "hw/clock.h"
 
 #define TYPE_SYSTICK "armv7m_systick"
 
 OBJECT_DECLARE_SIMPLE_TYPE(SysTickState, SYSTICK)
 
+/*
+ * QEMU interface:
+ *  + sysbus MMIO region 0 is the register interface (covering
+ *    the registers which are mapped at address 0xE000E010)
+ *  + sysbus IRQ 0 is the interrupt line to the NVIC
+ *  + Clock input "refclk" is the external reference clock
+ *    (used when SYST_CSR.CLKSOURCE == 0)
+ *  + Clock input "cpuclk" is the main CPU clock
+ *    (used when SYST_CSR.CLKSOURCE == 1)
+ */
+
 struct SysTickState {
     /*< private >*/
     SysBusDevice parent_obj;
@@ -31,28 +43,8 @@ struct SysTickState {
     ptimer_state *ptimer;
     MemoryRegion iomem;
     qemu_irq irq;
+    Clock *refclk;
+    Clock *cpuclk;
 };
 
-/*
- * Multiplication factor to convert from system clock ticks to qemu timer
- * ticks. This should be set (by board code, usually) to a value
- * equal to NANOSECONDS_PER_SECOND / frq, where frq is the clock frequency
- * in Hz of the CPU.
- *
- * This value is used by the systick device when it is running in
- * its "use the CPU clock" mode (ie when SYST_CSR.CLKSOURCE == 1) to
- * set how fast the timer should tick.
- *
- * TODO: we should refactor this so that rather than using a global
- * we use a device property or something similar. This is complicated
- * because (a) the property would need to be plumbed through from the
- * board code down through various layers to the systick device
- * and (b) the property needs to be modifiable after realize, because
- * the stellaris board uses this to implement the behaviour where the
- * guest can reprogram the PLL registers to downclock the CPU, and the
- * systick device needs to react accordingly. Possibly this should
- * be deferred until we have a good API for modelling clock trees.
- */
-extern int system_clock_scale;
-
 #endif
diff --git a/include/hw/timer/avr_timer16.h b/include/hw/timer/avr_timer16.h
index 05362543378..a1a032a24dc 100644
--- a/include/hw/timer/avr_timer16.h
+++ b/include/hw/timer/avr_timer16.h
@@ -30,7 +30,6 @@
 
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
-#include "hw/hw.h"
 #include "qom/object.h"
 
 enum NextInterrupt {
diff --git a/include/hw/timer/ibex_timer.h b/include/hw/timer/ibex_timer.h
new file mode 100644
index 00000000000..b6f69b38ee7
--- /dev/null
+++ b/include/hw/timer/ibex_timer.h
@@ -0,0 +1,54 @@
+/*
+ * QEMU lowRISC Ibex Timer device
+ *
+ * Copyright (c) 2021 Western Digital
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_IBEX_TIMER_H
+#define HW_IBEX_TIMER_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_IBEX_TIMER "ibex-timer"
+OBJECT_DECLARE_SIMPLE_TYPE(IbexTimerState, IBEX_TIMER)
+
+struct IbexTimerState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t timer_ctrl;
+    uint32_t timer_cfg0;
+    uint32_t timer_compare_lower0;
+    uint32_t timer_compare_upper0;
+    uint32_t timer_intr_enable;
+    uint32_t timer_intr_state;
+    uint32_t timer_intr_test;
+
+    uint32_t timebase_freq;
+
+    qemu_irq irq;
+
+    qemu_irq m_timer_irq;
+};
+#endif /* HW_IBEX_TIMER_H */
diff --git a/include/hw/timer/sifive_pwm.h b/include/hw/timer/sifive_pwm.h
new file mode 100644
index 00000000000..6a8cf7b29e4
--- /dev/null
+++ b/include/hw/timer/sifive_pwm.h
@@ -0,0 +1,62 @@
+/*
+ * SiFive PWM
+ *
+ * Copyright (c) 2020 Western Digital
+ *
+ * Author:  Alistair Francis <alistair.francis@wdc.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_SIFIVE_PWM_H
+#define HW_SIFIVE_PWM_H
+
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qom/object.h"
+
+#define TYPE_SIFIVE_PWM "sifive-pwm"
+
+#define SIFIVE_PWM(obj) \
+    OBJECT_CHECK(SiFivePwmState, (obj), TYPE_SIFIVE_PWM)
+
+#define SIFIVE_PWM_CHANS          4
+#define SIFIVE_PWM_IRQS           SIFIVE_PWM_CHANS
+
+typedef struct SiFivePwmState {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+    QEMUTimer timer[SIFIVE_PWM_CHANS];
+    /*
+     * if en bit(s) set, is the number of ticks when pwmcount was 0
+     * if en bit(s) not set, is the number of ticks in pwmcount
+     */
+    uint64_t tick_offset;
+    uint64_t freq_hz;
+
+    uint32_t pwmcfg;
+    uint32_t pwmcmp[SIFIVE_PWM_CHANS];
+
+    qemu_irq irqs[SIFIVE_PWM_IRQS];
+} SiFivePwmState;
+
+#endif /* HW_SIFIVE_PWM_H */
diff --git a/include/hw/timer/stellaris-gptm.h b/include/hw/timer/stellaris-gptm.h
new file mode 100644
index 00000000000..fde1fc6f0c7
--- /dev/null
+++ b/include/hw/timer/stellaris-gptm.h
@@ -0,0 +1,51 @@
+/*
+ * Luminary Micro Stellaris General Purpose Timer Module
+ *
+ * Copyright (c) 2006 CodeSourcery.
+ * Written by Paul Brook
+ *
+ * This code is licensed under the GPL.
+ */
+
+#ifndef HW_TIMER_STELLARIS_GPTM_H
+#define HW_TIMER_STELLARIS_GPTM_H
+
+#include "qom/object.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/clock.h"
+
+#define TYPE_STELLARIS_GPTM "stellaris-gptm"
+OBJECT_DECLARE_SIMPLE_TYPE(gptm_state, STELLARIS_GPTM)
+
+/*
+ * QEMU interface:
+ *  + sysbus MMIO region 0: register bank
+ *  + sysbus IRQ 0: timer interrupt
+ *  + unnamed GPIO output 0: trigger output for the ADC
+ *  + Clock input "clk": the 32-bit countdown timer runs at this speed
+ */
+struct gptm_state {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t config;
+    uint32_t mode[2];
+    uint32_t control;
+    uint32_t state;
+    uint32_t mask;
+    uint32_t load[2];
+    uint32_t match[2];
+    uint32_t prescale[2];
+    uint32_t match_prescale[2];
+    uint32_t rtc;
+    int64_t tick[2];
+    struct gptm_state *opaque[2];
+    QEMUTimer *timer[2];
+    /* The timers have an alternate output used to trigger the ADC.  */
+    qemu_irq trigger;
+    qemu_irq irq;
+    Clock *clk;
+};
+
+#endif
diff --git a/include/hw/tricore/tricore_testdevice.h b/include/hw/tricore/tricore_testdevice.h
new file mode 100644
index 00000000000..2c56c51bcb8
--- /dev/null
+++ b/include/hw/tricore/tricore_testdevice.h
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2018-2021  Bastian Koppelmann Paderborn University
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#ifndef HW_TRICORE_TESTDEV_H
+#define HW_TRICORE_TESTDEV_H
+
+#include "hw/sysbus.h"
+#include "hw/hw.h"
+
+#define TYPE_TRICORE_TESTDEVICE "tricore_testdevice"
+#define TRICORE_TESTDEVICE(obj) \
+    OBJECT_CHECK(TriCoreTestDeviceState, (obj), TYPE_TRICORE_TESTDEVICE)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion iomem;
+
+} TriCoreTestDeviceState;
+
+#endif
diff --git a/include/hw/unicore32/puv3.h b/include/hw/unicore32/puv3.h
deleted file mode 100644
index f587a1f6228..00000000000
--- a/include/hw/unicore32/puv3.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Misc PKUnity SoC declarations
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef QEMU_HW_PUV3_H
-#define QEMU_HW_PUV3_H
-
-#define PUV3_REGS_OFFSET        (0x1000) /* 4K is reasonable */
-
-/* Hardware interrupts */
-#define PUV3_IRQS_NR            (32)
-
-#define PUV3_IRQS_GPIOLOW0      (0)
-#define PUV3_IRQS_GPIOLOW1      (1)
-#define PUV3_IRQS_GPIOLOW2      (2)
-#define PUV3_IRQS_GPIOLOW3      (3)
-#define PUV3_IRQS_GPIOLOW4      (4)
-#define PUV3_IRQS_GPIOLOW5      (5)
-#define PUV3_IRQS_GPIOLOW6      (6)
-#define PUV3_IRQS_GPIOLOW7      (7)
-#define PUV3_IRQS_GPIOHIGH      (8)
-#define PUV3_IRQS_PS2_KBD       (22)
-#define PUV3_IRQS_PS2_AUX       (23)
-#define PUV3_IRQS_OST0          (26)
-
-/* All puv3_*.c use DPRINTF for debug. */
-#ifdef DEBUG_PUV3
-#define DPRINTF(fmt, ...) printf("%s: " fmt , __func__, ## __VA_ARGS__)
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
-
-#endif /* QEMU_HW_PUV3_H */
diff --git a/include/hw/usb.h b/include/hw/usb.h
index 436e07b3040..33668dd0a99 100644
--- a/include/hw/usb.h
+++ b/include/hw/usb.h
@@ -219,6 +219,7 @@ enum USBDeviceFlags {
     USB_DEV_FLAG_IS_HOST,
     USB_DEV_FLAG_MSOS_DESC_ENABLE,
     USB_DEV_FLAG_MSOS_DESC_IN_USE,
+    USB_DEV_FLAG_IS_SCSI_STORAGE,
 };
 
 /* definition of a USB device */
@@ -465,7 +466,6 @@ void usb_generic_async_ctrl_complete(USBDevice *s, USBPacket *p);
 
 /* usb-linux.c */
 void hmp_info_usbhost(Monitor *mon, const QDict *qdict);
-bool usb_host_dev_is_scsi_storage(USBDevice *usbdev);
 
 /* usb ports of the VM */
 
@@ -561,6 +561,11 @@ const char *usb_device_get_product_desc(USBDevice *dev);
 
 const USBDesc *usb_device_get_usb_desc(USBDevice *dev);
 
+static inline bool usb_device_is_scsi_storage(USBDevice *dev)
+{
+    return dev->flags & (1 << USB_DEV_FLAG_IS_SCSI_STORAGE);
+}
+
 /* quirks.c */
 
 /* In bulk endpoints are streaming data sources (iow behave like isoc eps) */
diff --git a/include/hw/usb/dwc2-regs.h b/include/hw/usb/dwc2-regs.h
index 40af23a0bad..a7eb5314854 100644
--- a/include/hw/usb/dwc2-regs.h
+++ b/include/hw/usb/dwc2-regs.h
@@ -39,8 +39,8 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __DWC2_HW_H__
-#define __DWC2_HW_H__
+#ifndef DWC2_HW_H
+#define DWC2_HW_H
 
 #define HSOTG_REG(x)	(x)
 
diff --git a/include/hw/usb/msd.h b/include/hw/usb/msd.h
index 7538c54569b..54e9f38bda4 100644
--- a/include/hw/usb/msd.h
+++ b/include/hw/usb/msd.h
@@ -17,7 +17,7 @@ enum USBMSDMode {
     USB_MSDM_CSW /* Command Status.  */
 };
 
-struct usb_msd_csw {
+struct QEMU_PACKED usb_msd_csw {
     uint32_t sig;
     uint32_t tag;
     uint32_t residue;
diff --git a/include/hw/usb/xlnx-usb-subsystem.h b/include/hw/usb/xlnx-usb-subsystem.h
index 739bef7f451..999e423951a 100644
--- a/include/hw/usb/xlnx-usb-subsystem.h
+++ b/include/hw/usb/xlnx-usb-subsystem.h
@@ -22,8 +22,8 @@
  * THE SOFTWARE.
  */
 
-#ifndef _XLNX_VERSAL_USB_SUBSYSTEM_H_
-#define _XLNX_VERSAL_USB_SUBSYSTEM_H_
+#ifndef XLNX_VERSAL_USB_SUBSYSTEM_H
+#define XLNX_VERSAL_USB_SUBSYSTEM_H
 
 #include "hw/usb/xlnx-versal-usb2-ctrl-regs.h"
 #include "hw/usb/hcd-dwc3.h"
diff --git a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h
index 975a717627a..b76dce04195 100644
--- a/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h
+++ b/include/hw/usb/xlnx-versal-usb2-ctrl-regs.h
@@ -23,8 +23,8 @@
  * THE SOFTWARE.
  */
 
-#ifndef _XLNX_USB2_REGS_H_
-#define _XLNX_USB2_REGS_H_
+#ifndef XLNX_USB2_REGS_H
+#define XLNX_USB2_REGS_H
 
 #define TYPE_XILINX_VERSAL_USB2_CTRL_REGS "xlnx.versal-usb2-ctrl-regs"
 
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 6141162d7ae..8af11b0a769 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -88,9 +88,11 @@ typedef struct VFIOContainer {
     uint64_t dirty_pgsizes;
     uint64_t max_dirty_bitmap_size;
     unsigned long pgsizes;
+    unsigned int dma_max_mappings;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
     QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
     QLIST_ENTRY(VFIOContainer) next;
 } VFIOContainer;
 
@@ -102,6 +104,16 @@ typedef struct VFIOGuestIOMMU {
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
 
+typedef struct VFIORamDiscardListener {
+    VFIOContainer *container;
+    MemoryRegion *mr;
+    hwaddr offset_within_address_space;
+    hwaddr size;
+    uint64_t granularity;
+    RamDiscardListener listener;
+    QLIST_ENTRY(VFIORamDiscardListener) next;
+} VFIORamDiscardListener;
+
 typedef struct VFIOHostDMAWindow {
     hwaddr min_iova;
     hwaddr max_iova;
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 8a6f8e2a7a3..81bf3109f83 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -37,7 +37,8 @@ struct vhost_scsi_target;
 struct vhost_iotlb_msg;
 struct vhost_virtqueue;
 
-typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque);
+typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque,
+                                  Error **errp);
 typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
 typedef int (*vhost_backend_memslots_limit)(struct vhost_dev *dev);
 
@@ -97,7 +98,7 @@ typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data,
                                    uint32_t offset, uint32_t size,
                                    uint32_t flags);
 typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config,
-                                   uint32_t config_len);
+                                   uint32_t config_len, Error **errp);
 
 typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev,
                                               void *session_info,
@@ -172,12 +173,6 @@ typedef struct VhostOps {
     vhost_force_iommu_op vhost_force_iommu;
 } VhostOps;
 
-extern const VhostOps user_ops;
-extern const VhostOps vdpa_ops;
-
-int vhost_set_backend_type(struct vhost_dev *dev,
-                           VhostBackendType backend_type);
-
 int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
                                              uint64_t iova, uint64_t uaddr,
                                              uint64_t len,
diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h
new file mode 100644
index 00000000000..deae47a76d5
--- /dev/null
+++ b/include/hw/virtio/vhost-user-i2c.h
@@ -0,0 +1,28 @@
+/*
+ * Vhost-user i2c virtio device
+ *
+ * Copyright (c) 2021 Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _QEMU_VHOST_USER_I2C_H
+#define _QEMU_VHOST_USER_I2C_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+#define TYPE_VHOST_USER_I2C "vhost-user-i2c-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C)
+
+struct VHostUserI2C {
+    VirtIODevice parent;
+    CharBackend chardev;
+    struct vhost_virtqueue *vhost_vq;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *vq;
+    bool connected;
+};
+
+#endif /* _QEMU_VHOST_USER_I2C_H */
diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h
new file mode 100644
index 00000000000..071539996d1
--- /dev/null
+++ b/include/hw/virtio/vhost-user-rng.h
@@ -0,0 +1,33 @@
+/*
+ * Vhost-user RNG virtio device
+ *
+ * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef _QEMU_VHOST_USER_RNG_H
+#define _QEMU_VHOST_USER_RNG_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "chardev/char-fe.h"
+
+#define TYPE_VHOST_USER_RNG "vhost-user-rng"
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG)
+
+struct VHostUserRNG {
+    /*< private >*/
+    VirtIODevice parent;
+    CharBackend chardev;
+    struct vhost_virtqueue *vhost_vq;
+    struct vhost_dev vhost_dev;
+    VhostUserState vhost_user;
+    VirtQueue *req_vq;
+    bool connected;
+
+    /*< public >*/
+};
+
+#endif /* _QEMU_VHOST_USER_RNG_H */
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 9b81a409da8..3ce79a646df 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -13,15 +13,22 @@
 #define HW_VIRTIO_VHOST_VDPA_H
 
 #include "hw/virtio/virtio.h"
+#include "standard-headers/linux/vhost_types.h"
+
+typedef struct VhostVDPAHostNotifier {
+    MemoryRegion mr;
+    void *addr;
+} VhostVDPAHostNotifier;
 
 typedef struct vhost_vdpa {
     int device_fd;
+    int index;
     uint32_t msg_type;
+    bool iotlb_batch_begin_sent;
     MemoryListener listener;
+    struct vhost_vdpa_iova_range iova_range;
     struct vhost_dev *dev;
+    VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
-extern AddressSpace address_space_memory;
-extern int vhost_vdpa_get_device_id(struct vhost_dev *dev,
-                                   uint32_t *device_id);
 #endif
diff --git a/include/hw/virtio/vhost-vsock-common.h b/include/hw/virtio/vhost-vsock-common.h
index e412b5ee982..d8b565b4dac 100644
--- a/include/hw/virtio/vhost-vsock-common.h
+++ b/include/hw/virtio/vhost-vsock-common.h
@@ -35,6 +35,9 @@ struct VHostVSockCommon {
     VirtQueue *trans_vq;
 
     QEMUTimer *post_load_timer;
+
+    /* features */
+    OnOffAuto seqpacket;
 };
 
 int vhost_vsock_common_start(VirtIODevice *vdev);
@@ -43,5 +46,7 @@ int vhost_vsock_common_pre_save(void *opaque);
 int vhost_vsock_common_post_load(void *opaque, int version_id);
 void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name);
 void vhost_vsock_common_unrealize(VirtIODevice *vdev);
+uint64_t vhost_vsock_common_get_features(VirtIODevice *vdev, uint64_t features,
+                                         Error **errp);
 
 #endif /* _QEMU_VHOST_VSOCK_COMMON_H */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 4a8bc75415f..58a73e7b7a1 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -71,9 +71,13 @@ struct vhost_dev {
     int n_tmp_sections;
     MemoryRegionSection *tmp_sections;
     struct vhost_virtqueue *vqs;
-    int nvqs;
+    unsigned int nvqs;
     /* the first virtqueue which would be used by this vhost dev */
     int vq_index;
+    /* one past the last vq index for the virtio device (not vhost) */
+    int vq_index_end;
+    /* if non-zero, minimum required value for max_queues */
+    int num_queues;
     uint64_t features;
     uint64_t acked_features;
     uint64_t backend_features;
@@ -93,6 +97,10 @@ struct vhost_dev {
     const VhostDevConfigOps *config_ops;
 };
 
+extern const VhostOps kernel_ops;
+extern const VhostOps user_ops;
+extern const VhostOps vdpa_ops;
+
 struct vhost_net {
     struct vhost_dev dev;
     struct vhost_virtqueue vqs[2];
@@ -102,7 +110,7 @@ struct vhost_net {
 
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                    VhostBackendType backend_type,
-                   uint32_t busyloop_timeout);
+                   uint32_t busyloop_timeout, Error **errp);
 void vhost_dev_cleanup(struct vhost_dev *hdev);
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
@@ -128,8 +136,8 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
                           struct vhost_vring_file *file);
 
 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
-int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config,
-                         uint32_t config_len);
+int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
+                         uint32_t config_len, Error **errp);
 int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
                          uint32_t offset, uint32_t size, uint32_t flags);
 /* notifier callback in case vhost device config space changed
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index ef8abe49c5a..7ab8c9dab05 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -93,6 +93,7 @@ struct VirtioBusClass {
      */
     bool has_variable_vring_alignment;
     AddressSpace *(*get_dma_as)(DeviceState *d);
+    bool (*iommu_enabled)(DeviceState *d);
 };
 
 struct VirtioBusState {
@@ -154,5 +155,6 @@ void virtio_bus_release_ioeventfd(VirtioBusState *bus);
 int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
 /* Tell the bus that the ioeventfd handler is no longer required. */
 void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n);
-
+/* Whether the IOMMU is enabled for this device */
+bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev);
 #endif /* VIRTIO_BUS_H */
diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h
index 203f9e17189..e2bee8f5955 100644
--- a/include/hw/virtio/virtio-gpu-bswap.h
+++ b/include/hw/virtio/virtio-gpu-bswap.h
@@ -59,4 +59,20 @@ virtio_gpu_t2d_bswap(struct virtio_gpu_transfer_to_host_2d *t2d)
     le32_to_cpus(&t2d->padding);
 }
 
+static inline void
+virtio_gpu_create_blob_bswap(struct virtio_gpu_resource_create_blob *cblob)
+{
+    virtio_gpu_ctrl_hdr_bswap(&cblob->hdr);
+    le32_to_cpus(&cblob->resource_id);
+    le32_to_cpus(&cblob->blob_flags);
+    le64_to_cpus(&cblob->size);
+}
+
+static inline void
+virtio_gpu_scanout_blob_bswap(struct virtio_gpu_set_scanout_blob *ssb)
+{
+    virtio_gpu_bswap_32(ssb, sizeof(*ssb) - sizeof(ssb->offsets[3]));
+    le32_to_cpus(&ssb->offsets[3]);
+}
+
 #endif
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index fae149235c5..acfba7c76c1 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -29,7 +29,10 @@ OBJECT_DECLARE_TYPE(VirtIOGPUBase, VirtIOGPUBaseClass,
                     VIRTIO_GPU_BASE)
 
 #define TYPE_VIRTIO_GPU "virtio-gpu-device"
-OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPU, VIRTIO_GPU)
+OBJECT_DECLARE_TYPE(VirtIOGPU, VirtIOGPUClass, VIRTIO_GPU)
+
+#define TYPE_VIRTIO_GPU_GL "virtio-gpu-gl-device"
+OBJECT_DECLARE_SIMPLE_TYPE(VirtIOGPUGL, VIRTIO_GPU_GL)
 
 #define TYPE_VHOST_USER_GPU "vhost-user-gpu"
 OBJECT_DECLARE_SIMPLE_TYPE(VhostUserGPU, VHOST_USER_GPU)
@@ -47,9 +50,23 @@ struct virtio_gpu_simple_resource {
     uint32_t scanout_bitmask;
     pixman_image_t *image;
     uint64_t hostmem;
+
+    uint64_t blob_size;
+    void *blob;
+    int dmabuf_fd;
+    uint8_t *remapped;
+
     QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
 };
 
+struct virtio_gpu_framebuffer {
+    pixman_format_code_t format;
+    uint32_t bytes_pp;
+    uint32_t width, height;
+    uint32_t stride;
+    uint32_t offset;
+};
+
 struct virtio_gpu_scanout {
     QemuConsole *con;
     DisplaySurface *ds;
@@ -72,6 +89,7 @@ enum virtio_gpu_base_conf_flags {
     VIRTIO_GPU_FLAG_STATS_ENABLED,
     VIRTIO_GPU_FLAG_EDID_ENABLED,
     VIRTIO_GPU_FLAG_DMABUF_ENABLED,
+    VIRTIO_GPU_FLAG_BLOB_ENABLED,
 };
 
 #define virtio_gpu_virgl_enabled(_cfg) \
@@ -82,6 +100,8 @@ enum virtio_gpu_base_conf_flags {
     (_cfg.flags & (1 << VIRTIO_GPU_FLAG_EDID_ENABLED))
 #define virtio_gpu_dmabuf_enabled(_cfg) \
     (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED))
+#define virtio_gpu_blob_enabled(_cfg) \
+    (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED))
 
 struct virtio_gpu_base_conf {
     uint32_t max_outputs;
@@ -108,7 +128,6 @@ struct VirtIOGPUBase {
     struct virtio_gpu_config virtio_config;
     const GraphicHwOps *hw_ops;
 
-    bool use_virgl_renderer;
     int renderer_blocked;
     int enable;
 
@@ -131,6 +150,12 @@ struct VirtIOGPUBaseClass {
     DEFINE_PROP_UINT32("xres", _state, _conf.xres, 1024), \
     DEFINE_PROP_UINT32("yres", _state, _conf.yres, 768)
 
+typedef struct VGPUDMABuf {
+    QemuDmaBuf buf;
+    uint32_t scanout_id;
+    QTAILQ_ENTRY(VGPUDMABuf) next;
+} VGPUDMABuf;
+
 struct VirtIOGPU {
     VirtIOGPUBase parent_obj;
 
@@ -149,8 +174,6 @@ struct VirtIOGPU {
     uint64_t hostmem;
 
     bool processing_cmdq;
-    bool renderer_inited;
-    bool renderer_reset;
     QEMUTimer *fence_poll;
     QEMUTimer *print_stats;
 
@@ -161,6 +184,28 @@ struct VirtIOGPU {
         uint32_t req_3d;
         uint32_t bytes_3d;
     } stats;
+
+    struct {
+        QTAILQ_HEAD(, VGPUDMABuf) bufs;
+        VGPUDMABuf *primary[VIRTIO_GPU_MAX_SCANOUTS];
+    } dmabuf;
+};
+
+struct VirtIOGPUClass {
+    VirtIOGPUBaseClass parent;
+
+    void (*handle_ctrl)(VirtIODevice *vdev, VirtQueue *vq);
+    void (*process_cmd)(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd);
+    void (*update_cursor_data)(VirtIOGPU *g,
+                               struct virtio_gpu_scanout *s,
+                               uint32_t resource_id);
+};
+
+struct VirtIOGPUGL {
+    struct VirtIOGPU parent_obj;
+
+    bool renderer_inited;
+    bool renderer_reset;
 };
 
 struct VhostUserGPU {
@@ -207,17 +252,35 @@ void virtio_gpu_get_display_info(VirtIOGPU *g,
 void virtio_gpu_get_edid(VirtIOGPU *g,
                          struct virtio_gpu_ctrl_command *cmd);
 int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
-                                  struct virtio_gpu_resource_attach_backing *ab,
+                                  uint32_t nr_entries, uint32_t offset,
                                   struct virtio_gpu_ctrl_command *cmd,
-                                  uint64_t **addr, struct iovec **iov);
+                                  uint64_t **addr, struct iovec **iov,
+                                  uint32_t *niov);
 void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g,
                                     struct iovec *iov, uint32_t count);
 void virtio_gpu_process_cmdq(VirtIOGPU *g);
+void virtio_gpu_device_realize(DeviceState *qdev, Error **errp);
+void virtio_gpu_reset(VirtIODevice *vdev);
+void virtio_gpu_simple_process_cmd(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd);
+void virtio_gpu_update_cursor_data(VirtIOGPU *g,
+                                   struct virtio_gpu_scanout *s,
+                                   uint32_t resource_id);
+
+/* virtio-gpu-udmabuf.c */
+bool virtio_gpu_have_udmabuf(void);
+void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res);
+void virtio_gpu_fini_udmabuf(struct virtio_gpu_simple_resource *res);
+int virtio_gpu_update_dmabuf(VirtIOGPU *g,
+                             uint32_t scanout_id,
+                             struct virtio_gpu_simple_resource *res,
+                             struct virtio_gpu_framebuffer *fb,
+                             struct virtio_gpu_rect *r);
 
 /* virtio-gpu-3d.c */
 void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
                                   struct virtio_gpu_ctrl_command *cmd);
 void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
+void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
 void virtio_gpu_virgl_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 273e35c04bc..e2339e5b72e 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -26,7 +26,7 @@
 #include "qom/object.h"
 
 #define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
-#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-device-base"
+#define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci"
 OBJECT_DECLARE_SIMPLE_TYPE(VirtIOIOMMU, VIRTIO_IOMMU)
 
 #define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region"
diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
index 4eeb82d5ddb..a5dd6a493b6 100644
--- a/include/hw/virtio/virtio-mem.h
+++ b/include/hw/virtio/virtio-mem.h
@@ -65,8 +65,8 @@ struct VirtIOMEM {
     /* notifiers to notify when "size" changes */
     NotifierList size_change_notifiers;
 
-    /* don't migrate unplugged memory */
-    NotifierWithReturn precopy_notifier;
+    /* listeners to notify on plug/unplug activity. */
+    QLIST_HEAD(, RamDiscardListener) rdl_list;
 };
 
 struct VirtIOMEMClass {
diff --git a/include/hw/virtio/virtio-mmio.h b/include/hw/virtio/virtio-mmio.h
index d4c4c386ab0..090f7730e77 100644
--- a/include/hw/virtio/virtio-mmio.h
+++ b/include/hw/virtio/virtio-mmio.h
@@ -49,12 +49,17 @@ typedef struct VirtIOMMIOQueue {
     uint32_t used[2];
 } VirtIOMMIOQueue;
 
+#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT 1
+#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD \
+        (1 << VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT)
+
 struct VirtIOMMIOProxy {
     /* Generic */
     SysBusDevice parent_obj;
     MemoryRegion iomem;
     qemu_irq irq;
     bool legacy;
+    uint32_t flags;
     /* Guest accessible state needing migration and reset */
     uint32_t host_features_sel;
     uint32_t guest_features_sel;
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 7e96d193aa6..eb87032627d 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -21,6 +21,8 @@
 #include "qemu/option_int.h"
 #include "qom/object.h"
 
+#include "ebpf/ebpf_rss.h"
+
 #define TYPE_VIRTIO_NET "virtio-net-device"
 OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
 
@@ -130,6 +132,7 @@ typedef struct VirtioNetRscChain {
 
 typedef struct VirtioNetRssData {
     bool    enabled;
+    bool    enabled_software_rss;
     bool    redirect;
     bool    populate_hash;
     uint32_t hash_types;
@@ -191,8 +194,9 @@ struct VirtIONet {
     NICConf nic_conf;
     DeviceState *qdev;
     int multiqueue;
-    uint16_t max_queues;
-    uint16_t curr_queues;
+    uint16_t max_queue_pairs;
+    uint16_t curr_queue_pairs;
+    uint16_t max_ncs;
     size_t config_size;
     char *netclient_name;
     char *netclient_type;
@@ -206,9 +210,12 @@ struct VirtIONet {
     bool failover_primary_hidden;
     bool failover;
     DeviceListener primary_listener;
+    QDict *primary_opts;
+    bool primary_opts_from_json;
     Notifier migration_state;
     VirtioNetRssData rss_data;
     struct NetRxPkt *rx_pkt;
+    struct EBPFRSSContext ebpf_rss;
 };
 
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index b7ece7a6a89..8bab9cfb750 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -43,7 +43,7 @@ typedef struct VirtIOFeature {
     size_t end;
 } VirtIOFeature;
 
-size_t virtio_feature_get_config_size(VirtIOFeature *features,
+size_t virtio_feature_get_config_size(const VirtIOFeature *features,
                                       uint64_t host_features);
 
 typedef struct VirtQueue VirtQueue;
diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h
index 80b03661e30..f945cd6c583 100644
--- a/include/hw/watchdog/wdt_aspeed.h
+++ b/include/hw/watchdog/wdt_aspeed.h
@@ -44,6 +44,7 @@ struct AspeedWDTClass {
     uint32_t reset_ctrl_reg;
     void (*reset_pulse)(AspeedWDTState *s, uint32_t property);
     void (*wdt_reload)(AspeedWDTState *s);
+    uint64_t (*sanitize_ctrl)(uint64_t data);
 };
 
 #endif /* WDT_ASPEED_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 82e56339dd7..a8118b41acf 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -134,6 +134,12 @@ static inline xenforeignmemory_resource_handle *xenforeignmemory_map_resource(
     return NULL;
 }
 
+static inline int xenforeignmemory_unmap_resource(
+    xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
+{
+    return 0;
+}
+
 #endif /* CONFIG_XEN_CTRL_INTERFACE_VERSION < 41100 */
 
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 41000
diff --git a/include/libdecnumber/decNumber.h b/include/libdecnumber/decNumber.h
index aa115fed079..41bc2a0d36f 100644
--- a/include/libdecnumber/decNumber.h
+++ b/include/libdecnumber/decNumber.h
@@ -116,12 +116,16 @@
   decNumber * decNumberFromUInt32(decNumber *, uint32_t);
   decNumber *decNumberFromInt64(decNumber *, int64_t);
   decNumber *decNumberFromUInt64(decNumber *, uint64_t);
+  decNumber *decNumberFromInt128(decNumber *, uint64_t, int64_t);
+  decNumber *decNumberFromUInt128(decNumber *, uint64_t, uint64_t);
   decNumber * decNumberFromString(decNumber *, const char *, decContext *);
   char	    * decNumberToString(const decNumber *, char *);
   char	    * decNumberToEngString(const decNumber *, char *);
   uint32_t    decNumberToUInt32(const decNumber *, decContext *);
   int32_t     decNumberToInt32(const decNumber *, decContext *);
   int64_t     decNumberIntegralToInt64(const decNumber *dn, decContext *set);
+  void        decNumberIntegralToInt128(const decNumber *dn, decContext *set,
+        uint64_t *plow, uint64_t *phigh);
   uint8_t   * decNumberGetBCD(const decNumber *, uint8_t *);
   decNumber * decNumberSetBCD(decNumber *, const uint8_t *, uint32_t);
 
diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h
index 4d53c077f2a..6198ca85930 100644
--- a/include/libdecnumber/decNumberLocal.h
+++ b/include/libdecnumber/decNumberLocal.h
@@ -98,7 +98,7 @@
 
   /* Shared lookup tables					      */
   extern const uByte  DECSTICKYTAB[10]; /* re-round digits if sticky  */
-  extern const uLong  DECPOWERS[19];    /* powers of ten table        */
+  extern const uLong  DECPOWERS[20];    /* powers of ten table        */
   /* The following are included from decDPD.h			      */
   extern const uShort DPD2BIN[1024];	/* DPD -> 0-999		      */
   extern const uShort BIN2DPD[1000];	/* 0-999 -> DPD		      */
diff --git a/include/migration/blocker.h b/include/migration/blocker.h
index acd27018e94..9cebe2ba06a 100644
--- a/include/migration/blocker.h
+++ b/include/migration/blocker.h
@@ -25,6 +25,22 @@
  */
 int migrate_add_blocker(Error *reason, Error **errp);
 
+/**
+ * @migrate_add_blocker_internal - prevent migration from proceeding without
+ *                                 only-migrate implications
+ *
+ * @reason - an error to be returned whenever migration is attempted
+ *
+ * @errp - [out] The reason (if any) we cannot block migration right now.
+ *
+ * @returns - 0 on success, -EBUSY on failure, with errp set.
+ *
+ * Some of the migration blockers can be temporary (e.g., for a few seconds),
+ * so it shouldn't need to conflict with "-only-migratable".  For those cases,
+ * we can call this function rather than @migrate_add_blocker().
+ */
+int migrate_add_blocker_internal(Error *reason, Error **errp);
+
 /**
  * @migrate_del_blocker - remove a blocking error from migration
  *
diff --git a/include/migration/misc.h b/include/migration/misc.h
index 738675ef528..465906710de 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -37,7 +37,6 @@ void precopy_infrastructure_init(void);
 void precopy_add_notifier(NotifierWithReturn *n);
 void precopy_remove_notifier(NotifierWithReturn *n);
 int precopy_notify(PrecopyNotifyReason reason, Error **errp);
-void precopy_enable_free_page_optimization(void);
 
 void ram_mig_init(void);
 void qemu_guest_free_page_hint(void *addr, size_t len);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index ab64f7c217b..3f0290f21ad 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -153,6 +153,7 @@ typedef enum {
     MIG_PRI_DEFAULT = 0,
     MIG_PRI_IOMMU,              /* Must happen before PCI devices */
     MIG_PRI_PCI_BUS,            /* Must happen before IOMMU */
+    MIG_PRI_VIRTIO_MEM,         /* Must happen before IOMMU */
     MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
     MIG_PRI_GICV3,              /* Must happen before the ITS */
     MIG_PRI_MAX,
@@ -194,8 +195,6 @@ struct VMStateDescription {
     const VMStateDescription **subsections;
 };
 
-extern const VMStateDescription vmstate_dummy;
-
 extern const VMStateInfo vmstate_info_bool;
 
 extern const VMStateInfo vmstate_info_int8;
diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h
index 60fc92722ae..ffdc15a34b6 100644
--- a/include/monitor/hmp-target.h
+++ b/include/monitor/hmp-target.h
@@ -48,6 +48,7 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict);
 void hmp_info_tlb(Monitor *mon, const QDict *qdict);
 void hmp_mce(Monitor *mon, const QDict *qdict);
 void hmp_info_local_apic(Monitor *mon, const QDict *qdict);
-void hmp_info_io_apic(Monitor *mon, const QDict *qdict);
+void hmp_info_sev(Monitor *mon, const QDict *qdict);
+void hmp_info_sgx(Monitor *mon, const QDict *qdict);
 
 #endif /* MONITOR_HMP_TARGET_H */
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 605d57287ae..96d014826ad 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -15,8 +15,9 @@
 #define HMP_H
 
 #include "qemu/readline.h"
+#include "qapi/qapi-types-common.h"
 
-void hmp_handle_error(Monitor *mon, Error *err);
+bool hmp_handle_error(Monitor *mon, Error *err);
 
 void hmp_info_name(Monitor *mon, const QDict *qdict);
 void hmp_info_version(Monitor *mon, const QDict *qdict);
@@ -124,10 +125,13 @@ void hmp_info_ramblock(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
-void hmp_info_sev(Monitor *mon, const QDict *qdict);
 void hmp_info_replay(Monitor *mon, const QDict *qdict);
 void hmp_replay_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_seek(Monitor *mon, const QDict *qdict);
+void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
+void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
+void hmp_human_readable_text_helper(Monitor *mon,
+                                    HumanReadableText *(*qmp_handler)(Error **));
 
 #endif
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index af3887bb71d..12d395d62d6 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -4,7 +4,7 @@
 #include "block/block.h"
 #include "qapi/qapi-types-misc.h"
 #include "qemu/readline.h"
-#include "include/exec/hwaddr.h"
+#include "exec/hwaddr.h"
 
 typedef struct MonitorHMP MonitorHMP;
 typedef struct MonitorOptions MonitorOptions;
@@ -51,4 +51,9 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags);
 void monitor_fdset_dup_fd_remove(int dup_fd);
 int64_t monitor_fdset_dup_fd_find(int dup_fd);
 
+void monitor_register_hmp(const char *name, bool info,
+                          void (*cmd)(Monitor *mon, const QDict *qdict));
+void monitor_register_hmp_info_hrt(const char *name,
+                                   HumanReadableText *(*handler)(Error **errp));
+
 #endif /* MONITOR_H */
diff --git a/include/monitor/qdev.h b/include/monitor/qdev.h
index eaa947d73a3..1d57bf65779 100644
--- a/include/monitor/qdev.h
+++ b/include/monitor/qdev.h
@@ -9,6 +9,31 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp);
 
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts, Error **errp);
-void qdev_set_id(DeviceState *dev, const char *id);
+DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+                                        bool from_json, Error **errp);
+
+/**
+ * qdev_set_id: parent the device and set its id if provided.
+ * @dev: device to handle
+ * @id: id to be given to the device, or NULL.
+ *
+ * Returns: the id of the device in case of success; otherwise NULL.
+ *
+ * @dev must be unrealized, unparented and must not have an id.
+ *
+ * If @id is non-NULL, this function tries to setup @dev qom path as
+ * "/peripheral/id". If @id is already taken, it fails. If it succeeds,
+ * the id field of @dev is set to @id (@dev now owns the given @id
+ * parameter).
+ *
+ * If @id is NULL, this function generates a unique name and setups @dev
+ * qom path as "/peripheral-anon/name". This name is not set as the id
+ * of @dev.
+ *
+ * Upon success, it returns the id/name (generated or provided). The
+ * returned string is owned by the corresponding child property and must
+ * not be freed by the caller.
+ */
+const char *qdev_set_id(DeviceState *dev, char *id, Error **errp);
 
 #endif
diff --git a/include/net/net.h b/include/net/net.h
index 1ef536d7712..523136c7acb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -61,6 +61,8 @@ typedef int (SetVnetBE)(NetClientState *, bool);
 typedef struct SocketReadState SocketReadState;
 typedef void (SocketReadStateFinalize)(SocketReadState *rs);
 typedef void (NetAnnounce)(NetClientState *);
+typedef bool (SetSteeringEBPF)(NetClientState *, int);
+typedef bool (NetCheckPeerType)(NetClientState *, ObjectClass *, Error **);
 
 typedef struct NetClientInfo {
     NetClientDriver type;
@@ -82,6 +84,8 @@ typedef struct NetClientInfo {
     SetVnetLE *set_vnet_le;
     SetVnetBE *set_vnet_be;
     NetAnnounce *announce;
+    SetSteeringEBPF *set_steering_ebpf;
+    NetCheckPeerType *check_peer_type;
 } NetClientInfo;
 
 struct NetClientState {
@@ -101,6 +105,7 @@ struct NetClientState {
     int vnet_hdr_len;
     bool is_netdev;
     bool do_not_pad; /* do not pad to the minimum ethernet frame length */
+    bool is_datapath;
     QTAILQ_HEAD(, NetFilterState) filters;
 };
 
@@ -132,6 +137,10 @@ NetClientState *qemu_new_net_client(NetClientInfo *info,
                                     NetClientState *peer,
                                     const char *model,
                                     const char *name);
+NetClientState *qemu_new_net_control_client(NetClientInfo *info,
+                                        NetClientState *peer,
+                                        const char *model,
+                                        const char *name);
 NICState *qemu_new_nic(NetClientInfo *info,
                        NICConf *conf,
                        const char *model,
diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
index 45e34b7cfcf..b81f9a6f2a0 100644
--- a/include/net/vhost-vdpa.h
+++ b/include/net/vhost-vdpa.h
@@ -15,7 +15,6 @@
 #define TYPE_VHOST_VDPA "vhost-vdpa"
 
 struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
-uint64_t vhost_vdpa_get_acked_features(NetClientState *nc);
 
 extern const int vdpa_feature_bits[];
 
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 172b0051d81..387e913e4e6 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -14,14 +14,17 @@ typedef struct VhostNetOptions {
     VhostBackendType backend_type;
     NetClientState *net_backend;
     uint32_t busyloop_timeout;
+    unsigned int nvqs;
     void *opaque;
 } VhostNetOptions;
 
 uint64_t vhost_net_get_max_queues(VHostNetState *net);
 struct vhost_net *vhost_net_init(VhostNetOptions *options);
 
-int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues);
-void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, int total_queues);
+int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+                    int data_queue_pairs, int cvq);
+void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
+                    int data_queue_pairs, int cvq);
 
 void vhost_net_cleanup(VHostNetState *net);
 
diff --git a/include/qapi/compat-policy.h b/include/qapi/compat-policy.h
index 1083f951228..8b7b25c0b5a 100644
--- a/include/qapi/compat-policy.h
+++ b/include/qapi/compat-policy.h
@@ -13,10 +13,17 @@
 #ifndef QAPI_COMPAT_POLICY_H
 #define QAPI_COMPAT_POLICY_H
 
+#include "qapi/error.h"
 #include "qapi/qapi-types-compat.h"
 
 extern CompatPolicy compat_policy;
 
+bool compat_policy_input_ok(unsigned special_features,
+                            const CompatPolicy *policy,
+                            ErrorClass error_class,
+                            const char *kind, const char *name,
+                            Error **errp);
+
 /*
  * Create a QObject input visitor for @obj for use with QMP
  *
diff --git a/include/qapi/forward-visitor.h b/include/qapi/forward-visitor.h
new file mode 100644
index 00000000000..50fb3e9d50b
--- /dev/null
+++ b/include/qapi/forward-visitor.h
@@ -0,0 +1,27 @@
+/*
+ * Forwarding visitor
+ *
+ * Copyright Red Hat, Inc. 2021
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef FORWARD_VISITOR_H
+#define FORWARD_VISITOR_H
+
+#include "qapi/visitor.h"
+
+typedef struct ForwardFieldVisitor ForwardFieldVisitor;
+
+/*
+ * The forwarding visitor only expects a single name, @from, to be passed for
+ * toplevel fields.  It is converted to @to and forwarded to the @target visitor.
+ * Calls within a struct are forwarded without changing the name.
+ */
+Visitor *visitor_forward_field(Visitor *target, const char *from, const char *to);
+
+#endif
diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 075203dc676..1e4240fd0db 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -21,12 +21,10 @@ typedef void (QmpCommandFunc)(QDict *, QObject **, Error **);
 
 typedef enum QmpCommandOptions
 {
-    QCO_NO_OPTIONS            =  0x0,
     QCO_NO_SUCCESS_RESP       =  (1U << 0),
     QCO_ALLOW_OOB             =  (1U << 1),
     QCO_ALLOW_PRECONFIG       =  (1U << 2),
     QCO_COROUTINE             =  (1U << 3),
-    QCO_DEPRECATED            =  (1U << 4),
 } QmpCommandOptions;
 
 typedef struct QmpCommand
@@ -35,6 +33,7 @@ typedef struct QmpCommand
     /* Runs in coroutine context if QCO_COROUTINE is set */
     QmpCommandFunc *fn;
     QmpCommandOptions options;
+    unsigned special_features;
     QTAILQ_ENTRY(QmpCommand) node;
     bool enabled;
     const char *disable_reason;
@@ -43,7 +42,8 @@ typedef struct QmpCommand
 typedef QTAILQ_HEAD(QmpCommandList, QmpCommand) QmpCommandList;
 
 void qmp_register_command(QmpCommandList *cmds, const char *name,
-                          QmpCommandFunc *fn, QmpCommandOptions options);
+                          QmpCommandFunc *fn, QmpCommandOptions options,
+                          unsigned special_features);
 const QmpCommand *qmp_find_command(const QmpCommandList *cmds,
                                    const char *name);
 void qmp_disable_command(QmpCommandList *cmds, const char *name,
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 9934539c1b7..d5b5430e21a 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -64,4 +64,7 @@ const char *qdict_get_try_str(const QDict *qdict, const char *key);
 
 QDict *qdict_clone_shallow(const QDict *src);
 
+QObject *qdict_crumple(const QDict *src, Error **errp);
+void qdict_flatten(QDict *qdict);
+
 #endif /* QDICT_H */
diff --git a/include/qapi/qobject-input-visitor.h b/include/qapi/qobject-input-visitor.h
index 8d693888105..95985e25e52 100644
--- a/include/qapi/qobject-input-visitor.h
+++ b/include/qapi/qobject-input-visitor.h
@@ -15,7 +15,6 @@
 #ifndef QOBJECT_INPUT_VISITOR_H
 #define QOBJECT_INPUT_VISITOR_H
 
-#include "qapi/qapi-types-compat.h"
 #include "qapi/visitor.h"
 
 typedef struct QObjectInputVisitor QObjectInputVisitor;
@@ -59,9 +58,6 @@ typedef struct QObjectInputVisitor QObjectInputVisitor;
  */
 Visitor *qobject_input_visitor_new(QObject *obj);
 
-void qobject_input_visitor_set_policy(Visitor *v,
-                                      CompatPolicyInput deprecated);
-
 /*
  * Create a QObject input visitor for @obj for use with keyval_parse()
  *
diff --git a/include/qapi/qobject-output-visitor.h b/include/qapi/qobject-output-visitor.h
index f2a2f92a004..2b1726baf55 100644
--- a/include/qapi/qobject-output-visitor.h
+++ b/include/qapi/qobject-output-visitor.h
@@ -15,7 +15,6 @@
 #define QOBJECT_OUTPUT_VISITOR_H
 
 #include "qapi/visitor.h"
-#include "qapi/qapi-types-compat.h"
 
 typedef struct QObjectOutputVisitor QObjectOutputVisitor;
 
@@ -54,7 +53,4 @@ typedef struct QObjectOutputVisitor QObjectOutputVisitor;
  */
 Visitor *qobject_output_visitor_new(QObject **result);
 
-void qobject_output_visitor_set_policy(Visitor *v,
-                                       CompatPolicyOutput deprecated);
-
 #endif
diff --git a/include/qapi/type-helpers.h b/include/qapi/type-helpers.h
new file mode 100644
index 00000000000..be1f1815264
--- /dev/null
+++ b/include/qapi/type-helpers.h
@@ -0,0 +1,14 @@
+/*
+ * QAPI common helper functions
+ *
+ * This file provides helper functions related to types defined
+ * in the QAPI schema.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qapi-types-common.h"
+
+HumanReadableText *human_readable_text_from_str(GString *str);
diff --git a/include/qapi/util.h b/include/qapi/util.h
index d7bfb30e25c..81a2b13a333 100644
--- a/include/qapi/util.h
+++ b/include/qapi/util.h
@@ -11,9 +11,15 @@
 #ifndef QAPI_UTIL_H
 #define QAPI_UTIL_H
 
+typedef enum {
+    QAPI_DEPRECATED,
+    QAPI_UNSTABLE,
+} QapiSpecialFeature;
+
 typedef struct QEnumLookup {
     const char *const *array;
-    int size;
+    const unsigned char *const special_features;
+    const int size;
 } QEnumLookup;
 
 const char *qapi_enum_lookup(const QEnumLookup *lookup, int val);
diff --git a/include/qapi/visitor-impl.h b/include/qapi/visitor-impl.h
index 3b950f6e3db..2badec5ba46 100644
--- a/include/qapi/visitor-impl.h
+++ b/include/qapi/visitor-impl.h
@@ -114,14 +114,19 @@ struct Visitor
     void (*optional)(Visitor *v, const char *name, bool *present);
 
     /* Optional */
-    bool (*deprecated_accept)(Visitor *v, const char *name, Error **errp);
+    bool (*policy_reject)(Visitor *v, const char *name,
+                          unsigned special_features, Error **errp);
 
     /* Optional */
-    bool (*deprecated)(Visitor *v, const char *name);
+    bool (*policy_skip)(Visitor *v, const char *name,
+                        unsigned special_features);
 
     /* Must be set */
     VisitorType type;
 
+    /* Optional */
+    struct CompatPolicy compat_policy;
+
     /* Must be set for output visitors, optional otherwise. */
     void (*complete)(Visitor *v, void *opaque);
 
diff --git a/include/qapi/visitor.h b/include/qapi/visitor.h
index b3c9ef7a810..d53a84c9ba4 100644
--- a/include/qapi/visitor.h
+++ b/include/qapi/visitor.h
@@ -16,6 +16,7 @@
 #define QAPI_VISITOR_H
 
 #include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-compat.h"
 
 /*
  * The QAPI schema defines both a set of C data types, and a QMP wire
@@ -460,22 +461,39 @@ void visit_end_alternate(Visitor *v, void **obj);
 bool visit_optional(Visitor *v, const char *name, bool *present);
 
 /*
- * Should we reject deprecated member @name?
+ * Should we reject member @name due to policy?
+ *
+ * @special_features is the member's special features encoded as a
+ * bitset of QapiSpecialFeature.
  *
  * @name must not be NULL.  This function is only useful between
  * visit_start_struct() and visit_end_struct(), since only objects
  * have deprecated members.
  */
-bool visit_deprecated_accept(Visitor *v, const char *name, Error **errp);
+bool visit_policy_reject(Visitor *v, const char *name,
+                         unsigned special_features, Error **errp);
 
 /*
- * Should we visit deprecated member @name?
+ *
+ * Should we skip member @name due to policy?
+ *
+ * @special_features is the member's special features encoded as a
+ * bitset of QapiSpecialFeature.
  *
  * @name must not be NULL.  This function is only useful between
  * visit_start_struct() and visit_end_struct(), since only objects
  * have deprecated members.
  */
-bool visit_deprecated(Visitor *v, const char *name);
+bool visit_policy_skip(Visitor *v, const char *name,
+                       unsigned special_features);
+
+/*
+ * Set policy for handling deprecated management interfaces.
+ *
+ * Intended use: call visit_set_policy(v, &compat_policy) when
+ * visiting management interface input or output.
+ */
+void visit_set_policy(Visitor *v, CompatPolicy *policy);
 
 /*
  * Visit an enum value.
diff --git a/include/qemu/accel.h b/include/qemu/accel.h
index b9d6d69eb8d..4f4c283f6fc 100644
--- a/include/qemu/accel.h
+++ b/include/qemu/accel.h
@@ -78,4 +78,17 @@ int accel_init_machine(AccelState *accel, MachineState *ms);
 void accel_setup_post(MachineState *ms);
 #endif /* !CONFIG_USER_ONLY */
 
+/**
+ * accel_cpu_instance_init:
+ * @cpu: The CPU that needs to do accel-specific object initializations.
+ */
+void accel_cpu_instance_init(CPUState *cpu);
+
+/**
+ * accel_cpu_realizefn:
+ * @cpu: The CPU that needs to call accel-specific cpu realization.
+ * @errp: currently unused.
+ */
+bool accel_cpu_realizefn(CPUState *cpu, Error **errp);
+
 #endif /* QEMU_ACCEL_H */
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 8f4b3a80fbd..112a29910be 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -8,7 +8,7 @@
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  *
- * See docs/devel/atomics.txt for discussion about the guarantees each
+ * See docs/devel/atomics.rst for discussion about the guarantees each
  * atomic primitive is meant to provide.
  */
 
@@ -60,8 +60,9 @@
         (unsigned short)1,                                                         \
       (expr)+0))))))
 
-#ifdef __ATOMIC_RELAXED
-/* For C11 atomic ops */
+#ifndef __ATOMIC_RELAXED
+#error "Expecting C11 atomic ops"
+#endif
 
 /* Manual memory barriers
  *
@@ -239,212 +240,27 @@
 #define qatomic_xor(ptr, n) \
     ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
 
-#else /* __ATOMIC_RELAXED */
-
-#ifdef __alpha__
-#define smp_read_barrier_depends()   asm volatile("mb":::"memory")
-#endif
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
-
-/*
- * Because of the strongly ordered storage model, wmb() and rmb() are nops
- * here (a compiler barrier only).  QEMU doesn't do accesses to write-combining
- * qemu memory or non-temporal load/stores from C code.
- */
-#define smp_mb_release()   barrier()
-#define smp_mb_acquire()   barrier()
-
-/*
- * __sync_lock_test_and_set() is documented to be an acquire barrier only,
- * but it is a full barrier at the hardware level.  Add a compiler barrier
- * to make it a full barrier also at the compiler level.
- */
-#define qatomic_xchg(ptr, i)    (barrier(), __sync_lock_test_and_set(ptr, i))
-
-#elif defined(_ARCH_PPC)
-
-/*
- * We use an eieio() for wmb() on powerpc.  This assumes we don't
- * need to order cacheable and non-cacheable stores with respect to
- * each other.
- *
- * smp_mb has the same problem as on x86 for not-very-new GCC
- * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
- */
-#define smp_wmb()          ({ asm volatile("eieio" ::: "memory"); (void)0; })
-#if defined(__powerpc64__)
-#define smp_mb_release()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
-#define smp_mb_acquire()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
-#else
-#define smp_mb_release()   ({ asm volatile("sync" ::: "memory"); (void)0; })
-#define smp_mb_acquire()   ({ asm volatile("sync" ::: "memory"); (void)0; })
-#endif
-#define smp_mb()           ({ asm volatile("sync" ::: "memory"); (void)0; })
-
-#endif /* _ARCH_PPC */
-
-/*
- * For (host) platforms we don't have explicit barrier definitions
- * for, we use the gcc __sync_synchronize() primitive to generate a
- * full barrier.  This should be safe on all platforms, though it may
- * be overkill for smp_mb_acquire() and smp_mb_release().
- */
-#ifndef smp_mb
-#define smp_mb()           __sync_synchronize()
-#endif
-
-#ifndef smp_mb_acquire
-#define smp_mb_acquire()   __sync_synchronize()
-#endif
-
-#ifndef smp_mb_release
-#define smp_mb_release()   __sync_synchronize()
-#endif
-
-#ifndef smp_read_barrier_depends
-#define smp_read_barrier_depends()   barrier()
-#endif
-
-#ifndef signal_barrier
-#define signal_barrier()    barrier()
-#endif
-
-/* These will only be atomic if the processor does the fetch or store
- * in a single issue memory operation
- */
-#define qatomic_read__nocheck(p)   (*(__typeof__(*(p)) volatile*) (p))
-#define qatomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i))
-
-#define qatomic_read(ptr)       qatomic_read__nocheck(ptr)
-#define qatomic_set(ptr, i)     qatomic_set__nocheck(ptr,i)
-
-/**
- * qatomic_rcu_read - reads a RCU-protected pointer to a local variable
- * into a RCU read-side critical section. The pointer can later be safely
- * dereferenced within the critical section.
- *
- * This ensures that the pointer copy is invariant thorough the whole critical
- * section.
- *
- * Inserts memory barriers on architectures that require them (currently only
- * Alpha) and documents which pointers are protected by RCU.
- *
- * qatomic_rcu_read also includes a compiler barrier to ensure that
- * value-speculative optimizations (e.g. VSS: Value Speculation
- * Scheduling) does not perform the data read before the pointer read
- * by speculating the value of the pointer.
- *
- * Should match qatomic_rcu_set(), qatomic_xchg(), qatomic_cmpxchg().
- */
-#define qatomic_rcu_read(ptr)    ({               \
-    typeof(*ptr) _val = qatomic_read(ptr);        \
-    smp_read_barrier_depends();                   \
-    _val;                                         \
-})
-
-/**
- * qatomic_rcu_set - assigns (publicizes) a pointer to a new data structure
- * meant to be read by RCU read-side critical sections.
- *
- * Documents which pointers will be dereferenced by RCU read-side critical
- * sections and adds the required memory barriers on architectures requiring
- * them. It also makes sure the compiler does not reorder code initializing the
- * data structure before its publication.
- *
- * Should match qatomic_rcu_read().
- */
-#define qatomic_rcu_set(ptr, i)  do {             \
-    smp_wmb();                                    \
-    qatomic_set(ptr, i);                          \
-} while (0)
-
-#define qatomic_load_acquire(ptr)    ({     \
-    typeof(*ptr) _val = qatomic_read(ptr);  \
-    smp_mb_acquire();                       \
-    _val;                                   \
-})
-
-#define qatomic_store_release(ptr, i)  do { \
-    smp_mb_release();                       \
-    qatomic_set(ptr, i);                    \
-} while (0)
-
-#ifndef qatomic_xchg
-#if defined(__clang__)
-#define qatomic_xchg(ptr, i)    __sync_swap(ptr, i)
-#else
-/* __sync_lock_test_and_set() is documented to be an acquire barrier only.  */
-#define qatomic_xchg(ptr, i)    (smp_mb(), __sync_lock_test_and_set(ptr, i))
-#endif
-#endif
-#define qatomic_xchg__nocheck  qatomic_xchg
-
-/* Provide shorter names for GCC atomic builtins.  */
-#define qatomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
-#define qatomic_fetch_dec(ptr)  __sync_fetch_and_add(ptr, -1)
-
-#define qatomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
-#define qatomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
-#define qatomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
-#define qatomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
-#define qatomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
-
-#define qatomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
-#define qatomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
-#define qatomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
-#define qatomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
-#define qatomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
-#define qatomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
-#define qatomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
-
-#define qatomic_cmpxchg(ptr, old, new) \
-    __sync_val_compare_and_swap(ptr, old, new)
-#define qatomic_cmpxchg__nocheck(ptr, old, new)  qatomic_cmpxchg(ptr, old, new)
-
-/* And even shorter names that return void.  */
-#define qatomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
-#define qatomic_dec(ptr)        ((void) __sync_fetch_and_add(ptr, -1))
-#define qatomic_add(ptr, n)     ((void) __sync_fetch_and_add(ptr, n))
-#define qatomic_sub(ptr, n)     ((void) __sync_fetch_and_sub(ptr, n))
-#define qatomic_and(ptr, n)     ((void) __sync_fetch_and_and(ptr, n))
-#define qatomic_or(ptr, n)      ((void) __sync_fetch_and_or(ptr, n))
-#define qatomic_xor(ptr, n)     ((void) __sync_fetch_and_xor(ptr, n))
-
-#endif /* __ATOMIC_RELAXED */
-
-#ifndef smp_wmb
 #define smp_wmb()   smp_mb_release()
-#endif
-#ifndef smp_rmb
 #define smp_rmb()   smp_mb_acquire()
-#endif
-
-/* This is more efficient than a store plus a fence.  */
-#if !defined(__SANITIZE_THREAD__)
-#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
-#define qatomic_mb_set(ptr, i)  ((void)qatomic_xchg(ptr, i))
-#endif
-#endif
 
 /* qatomic_mb_read/set semantics map Java volatile variables. They are
  * less expensive on some platforms (notably POWER) than fully
  * sequentially consistent operations.
  *
  * As long as they are used as paired operations they are safe to
- * use. See docs/devel/atomics.txt for more discussion.
+ * use. See docs/devel/atomics.rst for more discussion.
  */
 
-#ifndef qatomic_mb_read
 #define qatomic_mb_read(ptr)                             \
     qatomic_load_acquire(ptr)
-#endif
 
-#ifndef qatomic_mb_set
-#define qatomic_mb_set(ptr, i)  do {                    \
-    qatomic_store_release(ptr, i);                      \
-    smp_mb();                                           \
-} while(0)
+#if !defined(__SANITIZE_THREAD__) && \
+    (defined(__i386__) || defined(__x86_64__) || defined(__s390x__))
+/* This is more efficient than a store plus a fence.  */
+# define qatomic_mb_set(ptr, i)  ((void)qatomic_xchg(ptr, i))
+#else
+# define qatomic_mb_set(ptr, i) \
+   ({ qatomic_store_release(ptr, i); smp_mb(); })
 #endif
 
 #define qatomic_fetch_inc_nonzero(ptr) ({                               \
@@ -455,28 +271,29 @@
     _oldn;                                                              \
 })
 
-/* Abstractions to access atomically (i.e. "once") i64/u64 variables */
-#ifdef CONFIG_ATOMIC64
-static inline int64_t qatomic_read_i64(const int64_t *ptr)
-{
-    /* use __nocheck because sizeof(void *) might be < sizeof(u64) */
-    return qatomic_read__nocheck(ptr);
-}
-
-static inline uint64_t qatomic_read_u64(const uint64_t *ptr)
-{
-    return qatomic_read__nocheck(ptr);
-}
-
-static inline void qatomic_set_i64(int64_t *ptr, int64_t val)
-{
-    qatomic_set__nocheck(ptr, val);
-}
+/*
+ * Abstractions to access atomically (i.e. "once") i64/u64 variables.
+ *
+ * The i386 abi is odd in that by default members are only aligned to
+ * 4 bytes, which means that 8-byte types can wind up mis-aligned.
+ * Clang will then warn about this, and emit a call into libatomic.
+ *
+ * Use of these types in structures when they will be used with atomic
+ * operations can avoid this.
+ */
+typedef int64_t aligned_int64_t __attribute__((aligned(8)));
+typedef uint64_t aligned_uint64_t __attribute__((aligned(8)));
 
-static inline void qatomic_set_u64(uint64_t *ptr, uint64_t val)
-{
-    qatomic_set__nocheck(ptr, val);
-}
+#ifdef CONFIG_ATOMIC64
+/* Use __nocheck because sizeof(void *) might be < sizeof(u64) */
+#define qatomic_read_i64(P) \
+    _Generic(*(P), int64_t: qatomic_read__nocheck(P))
+#define qatomic_read_u64(P) \
+    _Generic(*(P), uint64_t: qatomic_read__nocheck(P))
+#define qatomic_set_i64(P, V) \
+    _Generic(*(P), int64_t: qatomic_set__nocheck(P, V))
+#define qatomic_set_u64(P, V) \
+    _Generic(*(P), uint64_t: qatomic_set__nocheck(P, V))
 
 static inline void qatomic64_init(void)
 {
diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index ad2bcf45b4f..adb9a1a260b 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -6,7 +6,7 @@
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  *
- * See docs/devel/atomics.txt for discussion about the guarantees each
+ * See docs/devel/atomics.rst for discussion about the guarantees each
  * atomic primitive is meant to provide.
  */
 
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index efaa8f94c43..92ee3788bb5 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -140,7 +140,8 @@ static inline int test_bit(long nr, const unsigned long *addr)
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first set bit, or size.
+ * Returns the bit number of the last set bit,
+ * or @size if there is no set bit in the bitmap.
  */
 unsigned long find_last_bit(const unsigned long *addr,
                             unsigned long size);
@@ -150,6 +151,9 @@ unsigned long find_last_bit(const unsigned long *addr,
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next set bit,
+ * or @size if there are no further set bits in the bitmap.
  */
 unsigned long find_next_bit(const unsigned long *addr,
                             unsigned long size,
@@ -160,6 +164,9 @@ unsigned long find_next_bit(const unsigned long *addr,
  * @addr: The address to base the search on
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next cleared bit,
+ * or @size if there are no further clear bits in the bitmap.
  */
 
 unsigned long find_next_zero_bit(const unsigned long *addr,
@@ -171,7 +178,8 @@ unsigned long find_next_zero_bit(const unsigned long *addr,
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first set bit.
+ * Returns the bit number of the first set bit,
+ * or @size if there is no set bit in the bitmap.
  */
 static inline unsigned long find_first_bit(const unsigned long *addr,
                                            unsigned long size)
@@ -194,7 +202,8 @@ static inline unsigned long find_first_bit(const unsigned long *addr,
  * @addr: The address to start the search at
  * @size: The maximum size to search
  *
- * Returns the bit number of the first cleared bit.
+ * Returns the bit number of the first cleared bit,
+ * or @size if there is no clear bit in the bitmap.
  */
 static inline unsigned long find_first_zero_bit(const unsigned long *addr,
                                                 unsigned long size)
@@ -282,6 +291,35 @@ static inline uint64_t ror64(uint64_t word, unsigned int shift)
     return (word >> shift) | (word << ((64 - shift) & 63));
 }
 
+/**
+ * hswap32 - swap 16-bit halfwords within a 32-bit value
+ * @h: value to swap
+ */
+static inline uint32_t hswap32(uint32_t h)
+{
+    return rol32(h, 16);
+}
+
+/**
+ * hswap64 - swap 16-bit halfwords within a 64-bit value
+ * @h: value to swap
+ */
+static inline uint64_t hswap64(uint64_t h)
+{
+    uint64_t m = 0x0000ffff0000ffffull;
+    h = rol64(h, 32);
+    return ((h & m) << 16) | ((h >> 16) & m);
+}
+
+/**
+ * wswap64 - swap 32-bit words within a 64-bit value
+ * @h: value to swap
+ */
+static inline uint64_t wswap64(uint64_t h)
+{
+    return rol64(h, 32);
+}
+
 /**
  * extract32:
  * @value: the value to extract the bit field from
diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h
index 4aaf992b5d7..2d3bb8bbedd 100644
--- a/include/qemu/bswap.h
+++ b/include/qemu/bswap.h
@@ -1,8 +1,6 @@
 #ifndef BSWAP_H
 #define BSWAP_H
 
-#include "fpu/softfloat-types.h"
-
 #ifdef CONFIG_MACHINE_BSWAP_H
 # include <sys/endian.h>
 # include <machine/bswap.h>
@@ -12,7 +10,18 @@
 # include <endian.h>
 #elif defined(CONFIG_BYTESWAP_H)
 # include <byteswap.h>
+#define BSWAP_FROM_BYTESWAP
+# else
+#define BSWAP_FROM_FALLBACKS
+#endif /* ! CONFIG_MACHINE_BSWAP_H */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "fpu/softfloat-types.h"
+
+#ifdef BSWAP_FROM_BYTESWAP
 static inline uint16_t bswap16(uint16_t x)
 {
     return bswap_16(x);
@@ -27,7 +36,9 @@ static inline uint64_t bswap64(uint64_t x)
 {
     return bswap_64(x);
 }
-# else
+#endif
+
+#ifdef BSWAP_FROM_FALLBACKS
 static inline uint16_t bswap16(uint16_t x)
 {
     return (((x & 0x00ff) << 8) |
@@ -53,7 +64,10 @@ static inline uint64_t bswap64(uint64_t x)
             ((x & 0x00ff000000000000ULL) >> 40) |
             ((x & 0xff00000000000000ULL) >> 56));
 }
-#endif /* ! CONFIG_MACHINE_BSWAP_H */
+#endif
+
+#undef BSWAP_FROM_BYTESWAP
+#undef BSWAP_FROM_FALLBACKS
 
 static inline void bswap16s(uint16_t *s)
 {
@@ -494,4 +508,8 @@ DO_STN_LDN_P(be)
 #undef le_bswaps
 #undef be_bswaps
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* BSWAP_H */
diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h
index 4e4503004ca..78ca5850f8f 100644
--- a/include/qemu/co-shared-resource.h
+++ b/include/qemu/co-shared-resource.h
@@ -26,15 +26,13 @@
 #ifndef QEMU_CO_SHARED_RESOURCE_H
 #define QEMU_CO_SHARED_RESOURCE_H
 
-
+/* Accesses to co-shared-resource API are thread-safe */
 typedef struct SharedResource SharedResource;
 
 /*
  * Create SharedResource structure
  *
  * @total: total amount of some resource to be shared between clients
- *
- * Note: this API is not thread-safe.
  */
 SharedResource *shres_create(uint64_t total);
 
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index 091c45248b0..3baa5e3790f 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -72,18 +72,7 @@
         int:(x) ? -1 : 1; \
     }
 
-/* QEMU_BUILD_BUG_MSG() emits the message given if _Static_assert is
- * supported; otherwise, it will be omitted from the compiler error
- * message (but as it remains present in the source code, it can still
- * be useful when debugging). */
-#if defined(CONFIG_STATIC_ASSERT)
 #define QEMU_BUILD_BUG_MSG(x, msg) _Static_assert(!(x), msg)
-#elif defined(__COUNTER__)
-#define QEMU_BUILD_BUG_MSG(x, msg) typedef QEMU_BUILD_BUG_ON_STRUCT(x) \
-    glue(qemu_build_bug_on__, __COUNTER__) __attribute__((unused))
-#else
-#define QEMU_BUILD_BUG_MSG(x, msg)
-#endif
 
 #define QEMU_BUILD_BUG_ON(x) QEMU_BUILD_BUG_MSG(x, "not expecting: " #x)
 
@@ -173,46 +162,6 @@
 #define QEMU_ALWAYS_INLINE
 #endif
 
-/* Implement C11 _Generic via GCC builtins.  Example:
- *
- *    QEMU_GENERIC(x, (float, sinf), (long double, sinl), sin) (x)
- *
- * The first argument is the discriminator.  The last is the default value.
- * The middle ones are tuples in "(type, expansion)" format.
- */
-
-/* First, find out the number of generic cases.  */
-#define QEMU_GENERIC(x, ...) \
-    QEMU_GENERIC_(typeof(x), __VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-
-/* There will be extra arguments, but they are not used.  */
-#define QEMU_GENERIC_(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, count, ...) \
-    QEMU_GENERIC##count(x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9)
-
-/* Two more helper macros, this time to extract items from a parenthesized
- * list.
- */
-#define QEMU_FIRST_(a, b) a
-#define QEMU_SECOND_(a, b) b
-
-/* ... and a final one for the common part of the "recursion".  */
-#define QEMU_GENERIC_IF(x, type_then, else_)                                   \
-    __builtin_choose_expr(__builtin_types_compatible_p(x,                      \
-                                                       QEMU_FIRST_ type_then), \
-                          QEMU_SECOND_ type_then, else_)
-
-/* CPP poor man's "recursion".  */
-#define QEMU_GENERIC1(x, a0, ...) (a0)
-#define QEMU_GENERIC2(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC1(x, __VA_ARGS__))
-#define QEMU_GENERIC3(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC2(x, __VA_ARGS__))
-#define QEMU_GENERIC4(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC3(x, __VA_ARGS__))
-#define QEMU_GENERIC5(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC4(x, __VA_ARGS__))
-#define QEMU_GENERIC6(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC5(x, __VA_ARGS__))
-#define QEMU_GENERIC7(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC6(x, __VA_ARGS__))
-#define QEMU_GENERIC8(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC7(x, __VA_ARGS__))
-#define QEMU_GENERIC9(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC8(x, __VA_ARGS__))
-#define QEMU_GENERIC10(x, a0, ...) QEMU_GENERIC_IF(x, a0, QEMU_GENERIC9(x, __VA_ARGS__))
-
 /**
  * qemu_build_not_reached()
  *
diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h
index 8d3e53ae4d4..f6054233212 100644
--- a/include/qemu/config-file.h
+++ b/include/qemu/config-file.h
@@ -1,7 +1,9 @@
 #ifndef QEMU_CONFIG_FILE_H
 #define QEMU_CONFIG_FILE_H
 
+typedef void QEMUConfigCB(const char *group, QDict *qdict, void *opaque, Error **errp);
 
+void qemu_load_module_for_opts(const char *group);
 QemuOptsList *qemu_find_opts(const char *group);
 QemuOptsList *qemu_find_opts_err(const char *group, Error **errp);
 QemuOpts *qemu_find_opts_singleton(const char *group);
@@ -14,7 +16,10 @@ void qemu_config_write(FILE *fp);
 int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname,
                       Error **errp);
 
-int qemu_read_config_file(const char *filename, Error **errp);
+/* A default callback for qemu_read_config_file().  */
+void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp);
+
+int qemu_read_config_file(const char *filename, QEMUConfigCB *f, Error **errp);
 
 /* Parse QDict options as a replacement for a config file (allowing multiple
    enumerated (0..(n-1)) configuration "sections") */
diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h
new file mode 100644
index 00000000000..1558a826aa0
--- /dev/null
+++ b/include/qemu/coroutine-tls.h
@@ -0,0 +1,165 @@
+/*
+ * QEMU Thread Local Storage for coroutines
+ *
+ * Copyright Red Hat
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * It is forbidden to access Thread Local Storage in coroutines because
+ * compiler optimizations may cause values to be cached across coroutine
+ * re-entry. Coroutines can run in more than one thread through the course of
+ * their life, leading bugs when stale TLS values from the wrong thread are
+ * used as a result of compiler optimization.
+ *
+ * An example is:
+ *
+ * ..code-block:: c
+ *   :caption: A coroutine that may see the wrong TLS value
+ *
+ *   static __thread AioContext *current_aio_context;
+ *   ...
+ *   static void coroutine_fn foo(void)
+ *   {
+ *       aio_notify(current_aio_context);
+ *       qemu_coroutine_yield();
+ *       aio_notify(current_aio_context); // <-- may be stale after yielding!
+ *   }
+ *
+ * This header provides macros for safely defining variables in Thread Local
+ * Storage:
+ *
+ * ..code-block:: c
+ *   :caption: A coroutine that safely uses TLS
+ *
+ *   QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
+ *   ...
+ *   static void coroutine_fn foo(void)
+ *   {
+ *       aio_notify(get_current_aio_context());
+ *       qemu_coroutine_yield();
+ *       aio_notify(get_current_aio_context()); // <-- safe
+ *   }
+ */
+
+#ifndef QEMU_COROUTINE_TLS_H
+#define QEMU_COROUTINE_TLS_H
+
+/*
+ * To stop the compiler from caching TLS values we define accessor functions
+ * with __attribute__((noinline)) plus asm volatile("") to prevent
+ * optimizations that override noinline.
+ *
+ * The compiler can still analyze noinline code and make optimizations based on
+ * that knowledge, so an inline asm output operand is used to prevent
+ * optimizations that make assumptions about the address of the TLS variable.
+ *
+ * This is fragile and ultimately needs to be solved by a mechanism that is
+ * guaranteed to work by the compiler (e.g. stackless coroutines), but for now
+ * we use this approach to prevent issues.
+ */
+
+/**
+ * QEMU_DECLARE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Declare an extern variable in Thread Local Storage from a header file:
+ *
+ * .. code-block:: c
+ *   :caption: Declaring an extern variable in Thread Local Storage
+ *
+ *   QEMU_DECLARE_CO_TLS(int, my_count)
+ *   ...
+ *   int c = get_my_count();
+ *   set_my_count(c + 1);
+ *   *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Declaring a TLS variable using __thread
+ *
+ *   extern __thread int my_count;
+ *   ...
+ *   int c = my_count;
+ *   my_count = c + 1;
+ *   *(&my_count) = 0;
+ */
+#define QEMU_DECLARE_CO_TLS(type, var)                                       \
+    __attribute__((noinline)) type get_##var(void);                          \
+    __attribute__((noinline)) void set_##var(type v);                        \
+    __attribute__((noinline)) type *get_ptr_##var(void);
+
+/**
+ * QEMU_DEFINE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a variable in Thread Local Storage that was previously declared from
+ * a header file with QEMU_DECLARE_CO_TLS():
+ *
+ * .. code-block:: c
+ *   :caption: Defining a variable in Thread Local Storage
+ *
+ *   QEMU_DEFINE_CO_TLS(int, my_count)
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a TLS variable using __thread
+ *
+ *   __thread int my_count;
+ */
+#define QEMU_DEFINE_CO_TLS(type, var)                                        \
+    static __thread type co_tls_##var;                                       \
+    type get_##var(void) { asm volatile(""); return co_tls_##var; }          \
+    void set_##var(type v) { asm volatile(""); co_tls_##var = v; }           \
+    type *get_ptr_##var(void)                                                \
+    { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+/**
+ * QEMU_DEFINE_STATIC_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a static variable in Thread Local Storage:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a static variable in Thread Local Storage
+ *
+ *   QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
+ *   ...
+ *   int c = get_my_count();
+ *   set_my_count(c + 1);
+ *   *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ *   :caption: Defining a static TLS variable using __thread
+ *
+ *   static __thread int my_count;
+ *   ...
+ *   int c = my_count;
+ *   my_count = c + 1;
+ *   *(&my_count) = 0;
+ */
+#define QEMU_DEFINE_STATIC_CO_TLS(type, var)                                 \
+    static __thread type co_tls_##var;                                       \
+    static __attribute__((noinline, unused))                                 \
+    type get_##var(void)                                                     \
+    { asm volatile(""); return co_tls_##var; }                               \
+    static __attribute__((noinline, unused))                                 \
+    void set_##var(type v)                                                   \
+    { asm volatile(""); co_tls_##var = v; }                                  \
+    static __attribute__((noinline, unused))                                 \
+    type *get_ptr_##var(void)                                                \
+    { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+#endif /* QEMU_COROUTINE_TLS_H */
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index ce5b9c6851a..4829ff373d3 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -210,13 +210,15 @@ void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
 /**
  * Removes the next coroutine from the CoQueue, and wake it up.
  * Returns true if a coroutine was removed, false if the queue is empty.
+ * OK to run from coroutine and non-coroutine context.
  */
-bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
+bool qemu_co_queue_next(CoQueue *queue);
 
 /**
  * Empties the CoQueue; all coroutines are woken up.
+ * OK to run from coroutine and non-coroutine context.
  */
-void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
+void qemu_co_queue_restart_all(CoQueue *queue);
 
 /**
  * Removes the next coroutine from the CoQueue, and wake it up.  Unlike
@@ -291,20 +293,27 @@ void qemu_co_rwlock_wrlock(CoRwlock *lock);
  */
 void qemu_co_rwlock_unlock(CoRwlock *lock);
 
-typedef struct QemuCoSleepState QemuCoSleepState;
+typedef struct QemuCoSleep {
+    Coroutine *to_wake;
+} QemuCoSleep;
 
 /**
- * Yield the coroutine for a given duration. During this yield, @sleep_state
- * (if not NULL) is set to an opaque pointer, which may be used for
- * qemu_co_sleep_wake(). Be careful, the pointer is set back to zero when the
- * timer fires. Don't save the obtained value to other variables and don't call
- * qemu_co_sleep_wake from another aio context.
+ * Yield the coroutine for a given duration. Initializes @w so that,
+ * during this yield, it can be passed to qemu_co_sleep_wake() to
+ * terminate the sleep.
  */
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
-                                            QemuCoSleepState **sleep_state);
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
+                                            QEMUClockType type, int64_t ns);
+
+/**
+ * Yield the coroutine until the next call to qemu_co_sleep_wake.
+ */
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w);
+
 static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
 {
-    qemu_co_sleep_ns_wakeable(type, ns, NULL);
+    QemuCoSleep w = { 0 };
+    qemu_co_sleep_ns_wakeable(&w, type, ns);
 }
 
 /**
@@ -313,7 +322,7 @@ static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
  * qemu_co_sleep_ns() and should be checked to be non-NULL before calling
  * qemu_co_sleep_wake().
  */
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state);
+void qemu_co_sleep_wake(QemuCoSleep *w);
 
 /**
  * Yield until a file descriptor becomes readable
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index cdca2991d8a..ca979dc6ccd 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -23,9 +23,14 @@
  * THE SOFTWARE.
  */
 
+/* Portions of this work are licensed under the terms of the GNU GPL,
+ * version 2 or later. See the COPYING file in the top-level directory.
+ */
+
 #ifndef HOST_UTILS_H
 #define HOST_UTILS_H
 
+#include "qemu/compiler.h"
 #include "qemu/bswap.h"
 
 #ifdef CONFIG_INT128
@@ -51,36 +56,32 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
+                               uint64_t divisor)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
-        __uint128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result > UINT64_MAX;
-    }
+    __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
+    __uint128_t result = dividend / divisor;
+
+    *plow = result;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
+                              int64_t divisor)
 {
-    if (divisor == 0) {
-        return 1;
-    } else {
-        __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
-        __int128_t result = dividend / divisor;
-        *plow = result;
-        *phigh = dividend % divisor;
-        return result != *plow;
-    }
+    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
+    __int128_t result = dividend / divisor;
+
+    *plow = result;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
 
 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 {
@@ -272,6 +273,9 @@ static inline int ctpop64(uint64_t val)
  */
 static inline uint8_t revbit8(uint8_t x)
 {
+#if __has_builtin(__builtin_bitreverse8)
+    return __builtin_bitreverse8(x);
+#else
     /* Assign the correct nibble position.  */
     x = ((x & 0xf0) >> 4)
       | ((x & 0x0f) << 4);
@@ -281,6 +285,7 @@ static inline uint8_t revbit8(uint8_t x)
       | ((x & 0x22) << 1)
       | ((x & 0x11) << 3);
     return x;
+#endif
 }
 
 /**
@@ -289,6 +294,9 @@ static inline uint8_t revbit8(uint8_t x)
  */
 static inline uint16_t revbit16(uint16_t x)
 {
+#if __has_builtin(__builtin_bitreverse16)
+    return __builtin_bitreverse16(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap16(x);
     /* Assign the correct nibble position.  */
@@ -300,6 +308,7 @@ static inline uint16_t revbit16(uint16_t x)
       | ((x & 0x2222) << 1)
       | ((x & 0x1111) << 3);
     return x;
+#endif
 }
 
 /**
@@ -308,6 +317,9 @@ static inline uint16_t revbit16(uint16_t x)
  */
 static inline uint32_t revbit32(uint32_t x)
 {
+#if __has_builtin(__builtin_bitreverse32)
+    return __builtin_bitreverse32(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap32(x);
     /* Assign the correct nibble position.  */
@@ -319,6 +331,7 @@ static inline uint32_t revbit32(uint32_t x)
       | ((x & 0x22222222u) << 1)
       | ((x & 0x11111111u) << 3);
     return x;
+#endif
 }
 
 /**
@@ -327,6 +340,9 @@ static inline uint32_t revbit32(uint32_t x)
  */
 static inline uint64_t revbit64(uint64_t x)
 {
+#if __has_builtin(__builtin_bitreverse64)
+    return __builtin_bitreverse64(x);
+#else
     /* Assign the correct byte position.  */
     x = bswap64(x);
     /* Assign the correct nibble position.  */
@@ -338,6 +354,325 @@ static inline uint64_t revbit64(uint64_t x)
       | ((x & 0x2222222222222222ull) << 1)
       | ((x & 0x1111111111111111ull) << 3);
     return x;
+#endif
+}
+
+/**
+ * Return the absolute value of a 64-bit integer as an unsigned 64-bit value
+ */
+static inline uint64_t uabs64(int64_t v)
+{
+    return v < 0 ? -v : v;
+}
+
+/**
+ * sadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * sadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return ((*ret ^ x) & ~(x ^ y)) < 0;
+#endif
+}
+
+/**
+ * uadd32_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return *ret < x;
+#endif
+}
+
+/**
+ * uadd64_overflow - addition with overflow indication
+ * @x, @y: addends
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x + @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_add_overflow) || __GNUC__ >= 5
+    return __builtin_add_overflow(x, y, ret);
+#else
+    *ret = x + y;
+    return *ret < x;
+#endif
+}
+
+/**
+ * ssub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for difference
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * ssub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return ((*ret ^ x) & (x ^ y)) < 0;
+#endif
+}
+
+/**
+ * usub32_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return x < y;
+#endif
+}
+
+/**
+ * usub64_overflow - subtraction with overflow indication
+ * @x: Minuend
+ * @y: Subtrahend
+ * @ret: Output for sum
+ *
+ * Computes *@ret = @x - @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_sub_overflow) || __GNUC__ >= 5
+    return __builtin_sub_overflow(x, y, ret);
+#else
+    *ret = x - y;
+    return x < y;
+#endif
+}
+
+/**
+ * smul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    int64_t z = (int64_t)x * y;
+    *ret = z;
+    return *ret != z;
+#endif
+}
+
+/**
+ * smul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t hi, lo;
+    muls64(&lo, &hi, x, y);
+    *ret = lo;
+    return hi != ((int64_t)lo >> 63);
+#endif
+}
+
+/**
+ * umul32_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t z = (uint64_t)x * y;
+    *ret = z;
+    return z > UINT32_MAX;
+#endif
+}
+
+/**
+ * umul64_overflow - multiplication with overflow indication
+ * @x, @y: Input multipliers
+ * @ret: Output for product
+ *
+ * Computes *@ret = @x * @y, and returns true if and only if that
+ * value has been truncated.
+ */
+static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret)
+{
+#if __has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5
+    return __builtin_mul_overflow(x, y, ret);
+#else
+    uint64_t hi;
+    mulu64(ret, &hi, x, y);
+    return hi != 0;
+#endif
+}
+
+/*
+ * Unsigned 128x64 multiplication.
+ * Returns true if the result got truncated to 128 bits.
+ * Otherwise, returns false and the multiplication result via plow and phigh.
+ */
+static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor)
+{
+#if defined(CONFIG_INT128) && \
+    (__has_builtin(__builtin_mul_overflow) || __GNUC__ >= 5)
+    bool res;
+    __uint128_t r;
+    __uint128_t f = ((__uint128_t)*phigh << 64) | *plow;
+    res = __builtin_mul_overflow(f, factor, &r);
+
+    *plow = r;
+    *phigh = r >> 64;
+
+    return res;
+#else
+    uint64_t dhi = *phigh;
+    uint64_t dlo = *plow;
+    uint64_t ahi;
+    uint64_t blo, bhi;
+
+    if (dhi == 0) {
+        mulu64(plow, phigh, dlo, factor);
+        return false;
+    }
+
+    mulu64(plow, &ahi, dlo, factor);
+    mulu64(&blo, &bhi, dhi, factor);
+
+    return uadd64_overflow(ahi, blo, phigh) || bhi != 0;
+#endif
+}
+
+/**
+ * uadd64_carry - addition with carry-in and carry-out
+ * @x, @y: addends
+ * @pcarry: in-out carry value
+ *
+ * Computes @x + @y + *@pcarry, placing the carry-out back
+ * into *@pcarry and returning the 64-bit sum.
+ */
+static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry)
+{
+#if __has_builtin(__builtin_addcll)
+    unsigned long long c = *pcarry;
+    x = __builtin_addcll(x, y, c, &c);
+    *pcarry = c & 1;
+    return x;
+#else
+    bool c = *pcarry;
+    /* This is clang's internal expansion of __builtin_addc. */
+    c = uadd64_overflow(x, c, &x);
+    c |= uadd64_overflow(x, y, &x);
+    *pcarry = c;
+    return x;
+#endif
+}
+
+/**
+ * usub64_borrow - subtraction with borrow-in and borrow-out
+ * @x, @y: addends
+ * @pborrow: in-out borrow value
+ *
+ * Computes @x - @y - *@pborrow, placing the borrow-out back
+ * into *@pborrow and returning the 64-bit sum.
+ */
+static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow)
+{
+#if __has_builtin(__builtin_subcll)
+    unsigned long long b = *pborrow;
+    x = __builtin_subcll(x, y, b, &b);
+    *pborrow = b & 1;
+    return x;
+#else
+    bool b = *pborrow;
+    b = usub64_overflow(x, b, &x);
+    b |= usub64_overflow(x, y, &x);
+    *pborrow = b;
+    return x;
+#endif
 }
 
 /* Host type specific sizes of these routines.  */
@@ -437,4 +772,81 @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
  */
 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
 
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
+ *
+ * Licensed under the GPLv2/LGPLv3
+ */
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+                                  uint64_t n0, uint64_t d)
+{
+#if defined(__x86_64__)
+    uint64_t q;
+    asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
+    return q;
+#elif defined(__s390x__) && !defined(__clang__)
+    /* Need to use a TImode type to get an even register pair for DLGR.  */
+    unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
+    asm("dlgr %0, %1" : "+r"(n) : "r"(d));
+    *r = n >> 64;
+    return n;
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
+    /* From Power ISA 2.06, programming note for divdeu.  */
+    uint64_t q1, q2, Q, r1, r2, R;
+    asm("divdeu %0,%2,%4; divdu %1,%3,%4"
+        : "=&r"(q1), "=r"(q2)
+        : "r"(n1), "r"(n0), "r"(d));
+    r1 = -(q1 * d);         /* low part of (n1<<64) - (q1 * d) */
+    r2 = n0 - (q2 * d);
+    Q = q1 + q2;
+    R = r1 + r2;
+    if (R >= d || R < r2) { /* overflow implies R > d */
+        Q += 1;
+        R -= d;
+    }
+    *r = R;
+    return Q;
+#else
+    uint64_t d0, d1, q0, q1, r1, r0, m;
+
+    d0 = (uint32_t)d;
+    d1 = d >> 32;
+
+    r1 = n1 % d1;
+    q1 = n1 / d1;
+    m = q1 * d0;
+    r1 = (r1 << 32) | (n0 >> 32);
+    if (r1 < m) {
+        q1 -= 1;
+        r1 += d;
+        if (r1 >= d) {
+            if (r1 < m) {
+                q1 -= 1;
+                r1 += d;
+            }
+        }
+    }
+    r1 -= m;
+
+    r0 = r1 % d1;
+    q0 = r1 / d1;
+    m = q0 * d0;
+    r0 = (r0 << 32) | (uint32_t)n0;
+    if (r0 < m) {
+        q0 -= 1;
+        r0 += d;
+        if (r0 >= d) {
+            if (r0 < m) {
+                q0 -= 1;
+                r0 += d;
+            }
+        }
+    }
+    r0 -= m;
+
+    *r = r0;
+    return (q1 << 32) | q0;
+#endif
+}
+
 #endif
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 52fc2384211..b6d517aea4e 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -1,9 +1,9 @@
 #ifndef INT128_H
 #define INT128_H
 
-#ifdef CONFIG_INT128
 #include "qemu/bswap.h"
 
+#ifdef CONFIG_INT128
 typedef __int128_t Int128;
 
 static inline Int128 int128_make64(uint64_t a)
@@ -11,6 +11,11 @@ static inline Int128 int128_make64(uint64_t a)
     return a;
 }
 
+static inline Int128 int128_makes64(int64_t a)
+{
+    return a;
+}
+
 static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
 {
     return (__uint128_t)hi << 64 | lo;
@@ -53,6 +58,11 @@ static inline Int128 int128_exts64(int64_t a)
     return a;
 }
 
+static inline Int128 int128_not(Int128 a)
+{
+    return ~a;
+}
+
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
     return a & b;
@@ -63,6 +73,11 @@ static inline Int128 int128_or(Int128 a, Int128 b)
     return a | b;
 }
 
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return a ^ b;
+}
+
 static inline Int128 int128_rshift(Int128 a, int n)
 {
     return a >> n;
@@ -150,26 +165,48 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
 
 static inline Int128 bswap128(Int128 a)
 {
+#if __has_builtin(__builtin_bswap128)
+    return __builtin_bswap128(a);
+#else
     return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+#endif
 }
 
 #else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
 
+/*
+ * We guarantee that the in-memory byte representation of an
+ * Int128 is that of a host-endian-order 128-bit integer
+ * (whether using this struct or the __int128_t version of the type).
+ * Some code using this type relies on this (eg when copying it into
+ * guest memory or a gdb protocol buffer, or by using Int128 in
+ * a union with other integer types).
+ */
 struct Int128 {
+#ifdef HOST_WORDS_BIGENDIAN
+    int64_t hi;
+    uint64_t lo;
+#else
     uint64_t lo;
     int64_t hi;
+#endif
 };
 
 static inline Int128 int128_make64(uint64_t a)
 {
-    return (Int128) { a, 0 };
+    return (Int128) { .lo = a, .hi = 0 };
+}
+
+static inline Int128 int128_makes64(int64_t a)
+{
+    return (Int128) { .lo = a, .hi = a >> 63 };
 }
 
 static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
 {
-    return (Int128) { lo, hi };
+    return (Int128) { .lo = lo, .hi = hi };
 }
 
 static inline uint64_t int128_get64(Int128 a)
@@ -200,22 +237,32 @@ static inline Int128 int128_one(void)
 
 static inline Int128 int128_2_64(void)
 {
-    return (Int128) { 0, 1 };
+    return int128_make128(0, 1);
 }
 
 static inline Int128 int128_exts64(int64_t a)
 {
-    return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 };
+    return int128_make128(a, (a < 0) ? -1 : 0);
+}
+
+static inline Int128 int128_not(Int128 a)
+{
+    return int128_make128(~a.lo, ~a.hi);
 }
 
 static inline Int128 int128_and(Int128 a, Int128 b)
 {
-    return (Int128) { a.lo & b.lo, a.hi & b.hi };
+    return int128_make128(a.lo & b.lo, a.hi & b.hi);
 }
 
 static inline Int128 int128_or(Int128 a, Int128 b)
 {
-    return (Int128) { a.lo | b.lo, a.hi | b.hi };
+    return int128_make128(a.lo | b.lo, a.hi | b.hi);
+}
+
+static inline Int128 int128_xor(Int128 a, Int128 b)
+{
+    return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
 }
 
 static inline Int128 int128_rshift(Int128 a, int n)
@@ -327,5 +374,16 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a = int128_sub(*a, b);
 }
 
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(a.hi), bswap64(a.lo));
+}
+
 #endif /* CONFIG_INT128 */
+
+static inline void bswap128s(Int128 *s)
+{
+    *s = bswap128(*s);
+}
+
 #endif /* INT128_H */
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
index b66cf93c4bc..8249edd764e 100644
--- a/include/qemu/iova-tree.h
+++ b/include/qemu/iova-tree.h
@@ -59,7 +59,7 @@ IOVATree *iova_tree_new(void);
  *
  * Return: 0 if succeeded, or <0 if error.
  */
-int iova_tree_insert(IOVATree *tree, DMAMap *map);
+int iova_tree_insert(IOVATree *tree, const DMAMap *map);
 
 /**
  * iova_tree_remove:
@@ -74,7 +74,7 @@ int iova_tree_insert(IOVATree *tree, DMAMap *map);
  *
  * Return: 0 if succeeded, or <0 if error.
  */
-int iova_tree_remove(IOVATree *tree, DMAMap *map);
+int iova_tree_remove(IOVATree *tree, const DMAMap *map);
 
 /**
  * iova_tree_find:
@@ -92,7 +92,7 @@ int iova_tree_remove(IOVATree *tree, DMAMap *map);
  * user is responsible to make sure the pointer is valid (say, no
  * concurrent deletion in progress).
  */
-DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
+const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
 
 /**
  * iova_tree_find_address:
@@ -105,7 +105,7 @@ DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
  *
  * Return: same as iova_tree_find().
  */
-DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
+const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
 
 /**
  * iova_tree_foreach:
diff --git a/include/qemu/job.h b/include/qemu/job.h
index efc6fa75449..6e67b6977ff 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -253,8 +253,17 @@ struct JobDriver {
 
     /**
      * If the callback is not NULL, it will be invoked in job_cancel_async
+     *
+     * This function must return true if the job will be cancelled
+     * immediately without any further I/O (mandatory if @force is
+     * true), and false otherwise.  This lets the generic job layer
+     * know whether a job has been truly (force-)cancelled, or whether
+     * it is just in a special completion mode (like mirror after
+     * READY).
+     * (If the callback is NULL, the job is assumed to terminate
+     * without I/O.)
      */
-    void (*cancel)(Job *job);
+    bool (*cancel)(Job *job, bool force);
 
 
     /** Called when the job is freed */
@@ -427,9 +436,15 @@ const char *job_type_str(const Job *job);
 /** Returns true if the job should not be visible to the management layer. */
 bool job_is_internal(Job *job);
 
-/** Returns whether the job is scheduled for cancellation. */
+/** Returns whether the job is being cancelled. */
 bool job_is_cancelled(Job *job);
 
+/**
+ * Returns whether the job is scheduled for cancellation (at an
+ * indefinite point).
+ */
+bool job_cancel_requested(Job *job);
+
 /** Returns whether the job is in a completed state. */
 bool job_is_completed(Job *job);
 
@@ -506,18 +521,18 @@ void job_user_cancel(Job *job, bool force, Error **errp);
 
 /**
  * Synchronously cancel the @job.  The completion callback is called
- * before the function returns.  The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
+ * before the function returns.  If @force is false, the job may
+ * actually complete instead of canceling itself; the circumstances
+ * under which this happens depend on the kind of job that is active.
  *
  * Returns the return value from the job if the job actually completed
  * during the call, or -ECANCELED if it was canceled.
  *
  * Callers must hold the AioContext lock of job->aio_context.
  */
-int job_cancel_sync(Job *job);
+int job_cancel_sync(Job *job, bool force);
 
-/** Synchronously cancels all jobs using job_cancel_sync(). */
+/** Synchronously force-cancels all jobs using job_cancel_sync(). */
 void job_cancel_sync_all(void);
 
 /**
diff --git a/include/qemu/lockable.h b/include/qemu/lockable.h
index b6200231418..86db7cb04c9 100644
--- a/include/qemu/lockable.h
+++ b/include/qemu/lockable.h
@@ -24,79 +24,71 @@ struct QemuLockable {
     QemuLockUnlockFunc *unlock;
 };
 
-/* This function gives an error if an invalid, non-NULL pointer type is passed
- * to QEMU_MAKE_LOCKABLE.  For optimized builds, we can rely on dead-code elimination
- * from the compiler, and give the errors already at link time.
- */
-#if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__)
-void unknown_lock_type(void *);
-#else
-static inline void unknown_lock_type(void *unused)
-{
-    abort();
-}
-#endif
-
 static inline __attribute__((__always_inline__)) QemuLockable *
 qemu_make_lockable(void *x, QemuLockable *lockable)
 {
-    /* We cannot test this in a macro, otherwise we get compiler
+    /*
+     * We cannot test this in a macro, otherwise we get compiler
      * warnings like "the address of 'm' will always evaluate as 'true'".
      */
     return x ? lockable : NULL;
 }
 
-/* Auxiliary macros to simplify QEMU_MAKE_LOCABLE.  */
-#define QEMU_LOCK_FUNC(x) ((QemuLockUnlockFunc *)    \
-    QEMU_GENERIC(x,                                  \
-                 (QemuMutex *, qemu_mutex_lock),     \
-                 (QemuRecMutex *, qemu_rec_mutex_lock), \
-                 (CoMutex *, qemu_co_mutex_lock),    \
-                 (QemuSpin *, qemu_spin_lock),       \
-                 unknown_lock_type))
-
-#define QEMU_UNLOCK_FUNC(x) ((QemuLockUnlockFunc *)  \
-    QEMU_GENERIC(x,                                  \
-                 (QemuMutex *, qemu_mutex_unlock),   \
-                 (QemuRecMutex *, qemu_rec_mutex_unlock), \
-                 (CoMutex *, qemu_co_mutex_unlock),  \
-                 (QemuSpin *, qemu_spin_unlock),     \
-                 unknown_lock_type))
-
-/* In C, compound literals have the lifetime of an automatic variable.
+static inline __attribute__((__always_inline__)) QemuLockable *
+qemu_null_lockable(void *x)
+{
+    if (x != NULL) {
+        qemu_build_not_reached();
+    }
+    return NULL;
+}
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
  * In C++ it would be different, but then C++ wouldn't need QemuLockable
  * either...
  */
-#define QEMU_MAKE_LOCKABLE_(x) (&(QemuLockable) {     \
-        .object = (x),                               \
-        .lock = QEMU_LOCK_FUNC(x),                   \
-        .unlock = QEMU_UNLOCK_FUNC(x),               \
+#define QML_OBJ_(x, name) (&(QemuLockable) {                            \
+        .object = (x),                                                  \
+        .lock = (QemuLockUnlockFunc *) qemu_ ## name ## _lock,          \
+        .unlock = (QemuLockUnlockFunc *) qemu_ ## name ## _unlock       \
     })
 
-/* QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable
+/**
+ * QEMU_MAKE_LOCKABLE - Make a polymorphic QemuLockable
  *
- * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin).
+ * @x: a lock object (currently one of QemuMutex, QemuRecMutex,
+ *     CoMutex, QemuSpin).
  *
  * Returns a QemuLockable object that can be passed around
  * to a function that can operate with locks of any kind, or
  * NULL if @x is %NULL.
+ *
+ * Note the special case for void *, so that we may pass "NULL".
  */
-#define QEMU_MAKE_LOCKABLE(x)                        \
-    QEMU_GENERIC(x,                                  \
-                 (QemuLockable *, (x)),              \
-                 qemu_make_lockable((x), QEMU_MAKE_LOCKABLE_(x)))
+#define QEMU_MAKE_LOCKABLE(x)                                           \
+    _Generic((x), QemuLockable *: (x),                                  \
+             void *: qemu_null_lockable(x),                             \
+             QemuMutex *: qemu_make_lockable(x, QML_OBJ_(x, mutex)),    \
+             QemuRecMutex *: qemu_make_lockable(x, QML_OBJ_(x, rec_mutex)), \
+             CoMutex *: qemu_make_lockable(x, QML_OBJ_(x, co_mutex)),   \
+             QemuSpin *: qemu_make_lockable(x, QML_OBJ_(x, spin)))
 
-/* QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable
+/**
+ * QEMU_MAKE_LOCKABLE_NONNULL - Make a polymorphic QemuLockable
  *
- * @x: a lock object (currently one of QemuMutex, QemuRecMutex, CoMutex, QemuSpin).
+ * @x: a lock object (currently one of QemuMutex, QemuRecMutex,
+ *     CoMutex, QemuSpin).
  *
  * Returns a QemuLockable object that can be passed around
  * to a function that can operate with locks of any kind.
  */
-#define QEMU_MAKE_LOCKABLE_NONNULL(x)                \
-    QEMU_GENERIC(x,                                  \
-                 (QemuLockable *, (x)),              \
-                 QEMU_MAKE_LOCKABLE_(x))
+#define QEMU_MAKE_LOCKABLE_NONNULL(x)                           \
+    _Generic((x), QemuLockable *: (x),                          \
+                  QemuMutex *: QML_OBJ_(x, mutex),              \
+                  QemuRecMutex *: QML_OBJ_(x, rec_mutex),       \
+                  CoMutex *: QML_OBJ_(x, co_mutex),             \
+                  QemuSpin *: QML_OBJ_(x, spin))
 
 static inline void qemu_lockable_lock(QemuLockable *x)
 {
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index bbe067db974..8dbc6fcb894 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -234,25 +234,6 @@ void event_notifier_set_handler(EventNotifier *e,
 
 GSource *iohandler_get_g_source(void);
 AioContext *iohandler_get_aio_context(void);
-#ifdef CONFIG_POSIX
-/**
- * qemu_add_child_watch: Register a child process for reaping.
- *
- * Under POSIX systems, a parent process must read the exit status of
- * its child processes using waitpid, or the operating system will not
- * free some of the resources attached to that process.
- *
- * This function directs the QEMU main loop to observe a child process
- * and call waitpid as soon as it exits; the watch is then removed
- * automatically.  It is useful whenever QEMU forks a child process
- * but will find out about its termination by other means such as a
- * "broken pipe".
- *
- * @pid: The pid that QEMU should observe.
- */
-typedef void ChildTerminationHandler(int status, void *opaque);
-int qemu_add_child_watch(pid_t pid, ChildTerminationHandler *callback, void* opaque);
-#endif
 
 /**
  * qemu_mutex_iothread_locked: Return lock status of the main loop mutex.
@@ -313,7 +294,9 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
 
 void qemu_fd_register(int fd);
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+#define qemu_bh_new(cb, opaque) \
+    qemu_bh_new_full((cb), (opaque), (stringify(cb)))
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
 void qemu_bh_schedule_idle(QEMUBH *bh);
 
 enum {
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 456ff87df16..90d0eee7053 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -7,18 +7,22 @@ size_t qemu_fd_getpagesize(int fd);
 size_t qemu_mempath_getpagesize(const char *mem_path);
 
 /**
- * qemu_ram_mmap: mmap the specified file or device.
+ * qemu_ram_mmap: mmap anonymous memory, the specified file or device.
+ *
+ * mmap() abstraction to map guest RAM, simplifying flag handling, taking
+ * care of alignment requirements and installing guard pages.
  *
  * Parameters:
  *  @fd: the file or the device to mmap
  *  @size: the number of bytes to be mmaped
  *  @align: if not zero, specify the alignment of the starting mapping address;
  *          otherwise, the alignment in use will be determined by QEMU.
- *  @readonly: true for a read-only mapping, false for read/write.
- *  @shared: map has RAM_SHARED flag.
- *  @is_pmem: map has RAM_PMEM flag.
+ *  @qemu_map_flags: QEMU_MAP_* flags
  *  @map_offset: map starts at offset of map_offset from the start of fd
  *
+ * Internally, MAP_PRIVATE, MAP_ANONYMOUS and MAP_SHARED_VALIDATE are set
+ * implicitly based on other parameters.
+ *
  * Return:
  *  On success, return a pointer to the mapped area.
  *  On failure, return MAP_FAILED.
@@ -26,9 +30,7 @@ size_t qemu_mempath_getpagesize(const char *mem_path);
 void *qemu_ram_mmap(int fd,
                     size_t size,
                     size_t align,
-                    bool readonly,
-                    bool shared,
-                    bool is_pmem,
+                    uint32_t qemu_map_flags,
                     off_t map_offset);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
diff --git a/include/qemu/module.h b/include/qemu/module.h
index 944d403cbd1..5fcc323b2a7 100644
--- a/include/qemu/module.h
+++ b/include/qemu/module.h
@@ -72,5 +72,84 @@ void module_call_init(module_init_type type);
 bool module_load_one(const char *prefix, const char *lib_name, bool mayfail);
 void module_load_qom_one(const char *type);
 void module_load_qom_all(void);
+void module_allow_arch(const char *arch);
+
+/**
+ * DOC: module info annotation macros
+ *
+ * ``scripts/modinfo-collect.py`` will collect module info,
+ * using the preprocessor and -DQEMU_MODINFO.
+ *
+ * ``scripts/modinfo-generate.py`` will create a module meta-data database
+ * from the collected information so qemu knows about module
+ * dependencies and QOM objects implemented by modules.
+ *
+ * See ``*.modinfo`` and ``modinfo.c`` in the build directory to check the
+ * script results.
+ */
+#ifdef QEMU_MODINFO
+# define modinfo(kind, value) \
+    MODINFO_START kind value MODINFO_END
+#else
+# define modinfo(kind, value)
+#endif
+
+/**
+ * module_obj
+ *
+ * @name: QOM type.
+ *
+ * This module implements QOM type @name.
+ */
+#define module_obj(name) modinfo(obj, name)
+
+/**
+ * module_dep
+ *
+ * @name: module name
+ *
+ * This module depends on module @name.
+ */
+#define module_dep(name) modinfo(dep, name)
+
+/**
+ * module_arch
+ *
+ * @name: target architecture
+ *
+ * This module is for target architecture @arch.
+ *
+ * Note that target-dependent modules are tagged automatically, so
+ * this is only needed in case target-independent modules should be
+ * restricted.  Use case example: the ccw bus is implemented by s390x
+ * only.
+ */
+#define module_arch(name) modinfo(arch, name)
+
+/**
+ * module_opts
+ *
+ * @name: QemuOpts name
+ *
+ * This module registers QemuOpts @name.
+ */
+#define module_opts(name) modinfo(opts, name)
+
+/*
+ * module info database
+ *
+ * scripts/modinfo-generate.c will build this using the data collected
+ * by scripts/modinfo-collect.py
+ */
+typedef struct QemuModinfo QemuModinfo;
+struct QemuModinfo {
+    const char *name;
+    const char *arch;
+    const char **objs;
+    const char **deps;
+    const char **opts;
+};
+extern const QemuModinfo qemu_modinfo[];
+void module_init_info(const QemuModinfo *info);
 
 #endif
diff --git a/include/qemu/option.h b/include/qemu/option.h
index f73e0dc7d95..306bf075750 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -119,7 +119,6 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
                            int fail_if_exists, Error **errp);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
-bool qemu_opts_set(QemuOptsList *list, const char *name, const char *value, Error **errp);
 const char *qemu_opts_id(QemuOpts *opts);
 void qemu_opts_set_id(QemuOpts *opts, char *id);
 void qemu_opts_del(QemuOpts *opts);
@@ -130,8 +129,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
                                   bool permit_abbrev);
 QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
                           bool permit_abbrev, Error **errp);
-void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
-                            int permit_abbrev);
 QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
                                Error **errp);
 QDict *qemu_opts_to_qdict_filtered(QemuOpts *opts, QDict *qdict,
@@ -147,7 +144,10 @@ void qemu_opts_print_help(QemuOptsList *list, bool print_caption);
 void qemu_opts_free(QemuOptsList *list);
 QemuOptsList *qemu_opts_append(QemuOptsList *dst, QemuOptsList *list);
 
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key,
+                         bool *p_help, Error **errp);
 QDict *keyval_parse(const char *params, const char *implied_key,
                     bool *help, Error **errp);
+void keyval_merge(QDict *old, const QDict *new, Error **errp);
 
 #endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 62f747f8e7b..1d2dfe97974 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -131,10 +131,6 @@ QEMU_EXTERN_C int daemon(int, int);
  */
 #include "glib-compat.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #ifdef _WIN32
 #include "sysemu/os-win32.h"
 #endif
@@ -143,6 +139,10 @@ extern "C" {
 #include "sysemu/os-posix.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "qemu/typedefs.h"
 
 /*
@@ -195,6 +195,9 @@ extern "C" {
 #ifndef MAP_FIXED_NOREPLACE
 #define MAP_FIXED_NOREPLACE 0
 #endif
+#ifndef MAP_NORESERVE
+#define MAP_NORESERVE 0
+#endif
 #ifndef ENOMEDIUM
 #define ENOMEDIUM ENODEV
 #endif
@@ -253,7 +256,7 @@ extern "C" {
 /* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
  * the wrong type. Our replacement isn't usable in preprocessor
  * expressions, but it is sufficient for our needs. */
-#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
+#ifdef HAVE_BROKEN_SIZE_MAX
 #undef SIZE_MAX
 #define SIZE_MAX ((size_t)-1)
 #endif
@@ -316,11 +319,16 @@ extern "C" {
     })
 #endif
 
-/* Round number down to multiple */
+/*
+ * Round number down to multiple. Safe when m is not a power of 2 (see
+ * ROUND_DOWN for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
 
-/* Round number up to multiple. Safe when m is not a power of 2 (see
- * ROUND_UP for a faster version when a power of 2 is guaranteed) */
+/*
+ * Round number up to multiple. Safe when m is not a power of 2 (see
+ * ROUND_UP for a faster version when a power of 2 is guaranteed).
+ */
 #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
 
 /* Check if n is a multiple of m */
@@ -345,11 +353,22 @@ extern "C" {
 /* Check if pointer p is n-bytes aligned */
 #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n))
 
-/* Round number up to multiple. Requires that d be a power of 2 (see
+/*
+ * Round number down to multiple. Requires that d be a power of 2 (see
+ * QEMU_ALIGN_UP for a safer but slower version on arbitrary
+ * numbers); works even if d is a smaller type than n.
+ */
+#ifndef ROUND_DOWN
+#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d)))
+#endif
+
+/*
+ * Round number up to multiple. Requires that d be a power of 2 (see
  * QEMU_ALIGN_UP for a safer but slower version on arbitrary
- * numbers); works even if d is a smaller type than n.  */
+ * numbers); works even if d is a smaller type than n.
+ */
 #ifndef ROUND_UP
-#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
+#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d))
 #endif
 
 #ifndef DIV_ROUND_UP
@@ -370,10 +389,50 @@ extern "C" {
 int qemu_daemon(int nochdir, int noclose);
 void *qemu_try_memalign(size_t alignment, size_t size);
 void *qemu_memalign(size_t alignment, size_t size);
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared);
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve);
 void qemu_vfree(void *ptr);
 void qemu_anon_ram_free(void *ptr, size_t size);
 
+/*
+ * It's an analog of GLIB's g_autoptr_cleanup_generic_gfree(), used to define
+ * g_autofree macro.
+ */
+static inline void qemu_cleanup_generic_vfree(void *p)
+{
+  void **pp = (void **)p;
+  qemu_vfree(*pp);
+}
+
+/*
+ * Analog of g_autofree, but qemu_vfree is called on cleanup instead of g_free.
+ */
+#define QEMU_AUTO_VFREE __attribute__((cleanup(qemu_cleanup_generic_vfree)))
+
+/*
+ * Abstraction of PROT_ and MAP_ flags as passed to mmap(), for example,
+ * consumed by qemu_ram_mmap().
+ */
+
+/* Map PROT_READ instead of PROT_READ | PROT_WRITE. */
+#define QEMU_MAP_READONLY   (1 << 0)
+
+/* Use MAP_SHARED instead of MAP_PRIVATE. */
+#define QEMU_MAP_SHARED     (1 << 1)
+
+/*
+ * Use MAP_SYNC | MAP_SHARED_VALIDATE if supported. Ignored without
+ * QEMU_MAP_SHARED. If mapping fails, warn and fallback to !QEMU_MAP_SYNC.
+ */
+#define QEMU_MAP_SYNC       (1 << 2)
+
+/*
+ * Use MAP_NORESERVE to skip reservation of swap space (or huge pages if
+ * applicable). Bail out if not supported/effective.
+ */
+#define QEMU_MAP_NORESERVE  (1 << 3)
+
+
 #define QEMU_MADV_INVALID -1
 
 #if defined(CONFIG_MADVISE)
@@ -418,7 +477,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #ifdef MADV_REMOVE
 #define QEMU_MADV_REMOVE MADV_REMOVE
 #else
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
 #endif
 
 #elif defined(CONFIG_POSIX_MADVISE)
@@ -432,7 +491,7 @@ void qemu_anon_ram_free(void *ptr, size_t size);
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 #define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 #define QEMU_MADV_NOHUGEPAGE  QEMU_MADV_INVALID
-#define QEMU_MADV_REMOVE QEMU_MADV_INVALID
+#define QEMU_MADV_REMOVE QEMU_MADV_DONTNEED
 
 #else /* no-op */
 
@@ -520,6 +579,7 @@ void sigaction_invoke(struct sigaction *action,
 #endif
 
 int qemu_madvise(void *addr, size_t len, int advice);
+int qemu_mprotect_rw(void *addr, size_t size);
 int qemu_mprotect_rwx(void *addr, size_t size);
 int qemu_mprotect_none(void *addr, size_t size);
 
diff --git a/include/qemu/plugin-memory.h b/include/qemu/plugin-memory.h
index fbbe99474bd..0f592267279 100644
--- a/include/qemu/plugin-memory.h
+++ b/include/qemu/plugin-memory.h
@@ -6,8 +6,8 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _PLUGIN_MEMORY_H_
-#define _PLUGIN_MEMORY_H_
+#ifndef PLUGIN_MEMORY_H
+#define PLUGIN_MEMORY_H
 
 struct qemu_plugin_hwaddr {
     bool is_io;
@@ -18,7 +18,7 @@ struct qemu_plugin_hwaddr {
             hwaddr    offset;
         } io;
         struct {
-            uint64_t hostaddr;
+            void *hostaddr;
         } ram;
     } v;
 };
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index c5a79a89f0d..145f8a221ac 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -12,6 +12,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/option.h"
+#include "exec/memopidx.h"
 
 /*
  * Events that plugins can subscribe to.
@@ -36,6 +37,25 @@ enum qemu_plugin_event {
 struct qemu_plugin_desc;
 typedef QTAILQ_HEAD(, qemu_plugin_desc) QemuPluginList;
 
+/*
+ * Construct a qemu_plugin_meminfo_t.
+ */
+static inline qemu_plugin_meminfo_t
+make_plugin_meminfo(MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+    return oi | (rw << 16);
+}
+
+/*
+ * Extract the memory operation direction from a qemu_plugin_meminfo_t.
+ * Other portions may be extracted via get_memop and get_mmuidx.
+ */
+static inline enum qemu_plugin_mem_rw
+get_plugin_meminfo_rw(qemu_plugin_meminfo_t i)
+{
+    return i >> 16;
+}
+
 #ifdef CONFIG_PLUGIN
 extern QemuOptsList qemu_plugin_opts;
 
@@ -79,7 +99,6 @@ enum plugin_dyn_cb_subtype {
 struct qemu_plugin_dyn_cb {
     union qemu_plugin_cb_sig f;
     void *userp;
-    unsigned tcg_flags;
     enum plugin_dyn_cb_subtype type;
     /* @rw applies to mem callbacks only (both regular and inline) */
     enum qemu_plugin_mem_rw rw;
@@ -144,10 +163,12 @@ struct qemu_plugin_tb {
 
 /**
  * qemu_plugin_tb_insn_get(): get next plugin record for translation.
- *
+ * @tb: the internal tb context
+ * @pc: address of instruction
  */
 static inline
-struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb)
+struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb,
+                                                 uint64_t pc)
 {
     struct qemu_plugin_insn *insn;
     int i, j;
@@ -160,6 +181,7 @@ struct qemu_plugin_insn *qemu_plugin_tb_insn_get(struct qemu_plugin_tb *tb)
     g_byte_array_set_size(insn->data, 0);
     insn->calls_helpers = false;
     insn->mem_helper = false;
+    insn->vaddr = pc;
 
     for (i = 0; i < PLUGIN_N_CB_TYPES; i++) {
         for (j = 0; j < PLUGIN_N_CB_SUBTYPES; j++) {
@@ -181,7 +203,8 @@ qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
                          uint64_t a6, uint64_t a7, uint64_t a8);
 void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
 
-void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t meminfo);
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw);
 
 void qemu_plugin_flush_cb(void);
 
@@ -191,6 +214,16 @@ void qemu_plugin_add_dyn_cb_arr(GArray *arr);
 
 void qemu_plugin_disable_mem_helpers(CPUState *cpu);
 
+/**
+ * qemu_plugin_user_exit(): clean-up callbacks before calling exit callbacks
+ *
+ * This is a user-mode only helper that ensure we have fully cleared
+ * callbacks from all threads before calling the exit callbacks. This
+ * is so the plugins themselves don't have to jump through hoops to
+ * guard against race conditions.
+ */
+void qemu_plugin_user_exit(void);
+
 #else /* !CONFIG_PLUGIN */
 
 static inline void qemu_plugin_add_opts(void)
@@ -235,7 +268,8 @@ void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
 { }
 
 static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
-                                           uint32_t meminfo)
+                                           MemOpIdx oi,
+                                           enum qemu_plugin_mem_rw rw)
 { }
 
 static inline void qemu_plugin_flush_cb(void)
@@ -251,6 +285,8 @@ void qemu_plugin_add_dyn_cb_arr(GArray *arr)
 static inline void qemu_plugin_disable_mem_helpers(CPUState *cpu)
 { }
 
+static inline void qemu_plugin_user_exit(void)
+{ }
 #endif /* !CONFIG_PLUGIN */
 
 #endif /* QEMU_PLUGIN_H */
diff --git a/include/qemu/progress_meter.h b/include/qemu/progress_meter.h
index 9a23ff071c4..dadf822bbf8 100644
--- a/include/qemu/progress_meter.h
+++ b/include/qemu/progress_meter.h
@@ -27,6 +27,8 @@
 #ifndef QEMU_PROGRESS_METER_H
 #define QEMU_PROGRESS_METER_H
 
+#include "qemu/lockable.h"
+
 typedef struct ProgressMeter {
     /**
      * Current progress. The unit is arbitrary as long as the ratio between
@@ -37,22 +39,24 @@ typedef struct ProgressMeter {
 
     /** Estimated current value at the completion of the process */
     uint64_t total;
+
+    QemuMutex lock; /* protects concurrent access to above fields */
 } ProgressMeter;
 
-static inline void progress_work_done(ProgressMeter *pm, uint64_t done)
-{
-    pm->current += done;
-}
-
-static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
-{
-    pm->total = pm->current + remaining;
-}
-
-static inline void progress_increase_remaining(ProgressMeter *pm,
-                                               uint64_t delta)
-{
-    pm->total += delta;
-}
+void progress_init(ProgressMeter *pm);
+void progress_destroy(ProgressMeter *pm);
+
+/* Get a snapshot of internal current and total values  */
+void progress_get_snapshot(ProgressMeter *pm, uint64_t *current,
+                           uint64_t *total);
+
+/* Increases the amount of work done so far by @done */
+void progress_work_done(ProgressMeter *pm, uint64_t done);
+
+/* Sets how much work has to be done to complete to @remaining */
+void progress_set_remaining(ProgressMeter *pm, uint64_t remaining);
+
+/* Increases the total work to do by @delta */
+void progress_increase_remaining(ProgressMeter *pm, uint64_t delta);
 
 #endif /* QEMU_PROGRESS_METER_H */
diff --git a/qemu-options.h b/include/qemu/qemu-options.h
similarity index 88%
rename from qemu-options.h
rename to include/qemu/qemu-options.h
index b4ee63cd601..4a62c83c453 100644
--- a/qemu-options.h
+++ b/include/qemu/qemu-options.h
@@ -29,8 +29,13 @@
 #define QEMU_OPTIONS_H
 
 enum {
-#define QEMU_OPTIONS_GENERATE_ENUM
-#include "qemu-options-wrapper.h"
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    opt_enum,
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#include "qemu-options.def"
 };
 
 #endif
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 97cdfd77618..5f1017201f3 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -525,6 +525,15 @@ qemu_plugin_register_vcpu_syscall_ret_cb(qemu_plugin_id_t id,
 
 char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn);
 
+/**
+ * qemu_plugin_insn_symbol() - best effort symbol lookup
+ * @insn: instruction reference
+ *
+ * Return a static string referring to the symbol. This is dependent
+ * on the binary QEMU is running having provided a symbol table.
+ */
+const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn);
+
 /**
  * qemu_plugin_vcpu_for_each() - iterate over the existing vCPU
  * @id: plugin ID
@@ -540,6 +549,19 @@ void qemu_plugin_vcpu_for_each(qemu_plugin_id_t id,
 void qemu_plugin_register_flush_cb(qemu_plugin_id_t id,
                                    qemu_plugin_simple_cb_t cb);
 
+/**
+ * qemu_plugin_register_atexit_cb() - register exit callback
+ * @id: plugin ID
+ * @cb: callback
+ * @userdata: user data for callback
+ *
+ * The @cb function is called once execution has finished. Plugins
+ * should be able to free all their resources at this point much like
+ * after a reset/uninstall callback is called.
+ *
+ * In user-mode it is possible a few un-instrumented instructions from
+ * child threads may run before the host kernel reaps the threads.
+ */
 void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id,
                                     qemu_plugin_udata_cb_t cb, void *userdata);
 
@@ -555,4 +577,17 @@ int qemu_plugin_n_max_vcpus(void);
  */
 void qemu_plugin_outs(const char *string);
 
+/**
+ * qemu_plugin_bool_parse() - parses a boolean argument in the form of
+ * "<argname>=[on|yes|true|off|no|false]"
+ *
+ * @name: argument name, the part before the equals sign
+ * @val: argument value, what's after the equals sign
+ * @ret: output return value
+ *
+ * returns true if the combination @name=@val parses correctly to a boolean
+ * argument, and false otherwise
+ */
+bool qemu_plugin_bool_parse(const char *name, const char *val, bool *ret);
+
 #endif /* QEMU_PLUGIN_API_H */
diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h
index 01da8d63f14..48bf59e8572 100644
--- a/include/qemu/ratelimit.h
+++ b/include/qemu/ratelimit.h
@@ -14,9 +14,11 @@
 #ifndef QEMU_RATELIMIT_H
 #define QEMU_RATELIMIT_H
 
+#include "qemu/lockable.h"
 #include "qemu/timer.h"
 
 typedef struct {
+    QemuMutex lock;
     int64_t slice_start_time;
     int64_t slice_end_time;
     uint64_t slice_quota;
@@ -40,7 +42,12 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     double delay_slices;
 
-    assert(limit->slice_quota && limit->slice_ns);
+    QEMU_LOCK_GUARD(&limit->lock);
+    if (!limit->slice_quota) {
+        /* Throttling disabled.  */
+        return 0;
+    }
+    assert(limit->slice_ns);
 
     if (limit->slice_end_time < now) {
         /* Previous, possibly extended, time slice finished; reset the
@@ -65,11 +72,26 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
     return limit->slice_end_time - now;
 }
 
+static inline void ratelimit_init(RateLimit *limit)
+{
+    qemu_mutex_init(&limit->lock);
+}
+
+static inline void ratelimit_destroy(RateLimit *limit)
+{
+    qemu_mutex_destroy(&limit->lock);
+}
+
 static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed,
                                        uint64_t slice_ns)
 {
+    QEMU_LOCK_GUARD(&limit->lock);
     limit->slice_ns = slice_ns;
-    limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1);
+    if (speed == 0) {
+        limit->slice_quota = 0;
+    } else {
+        limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1);
+    }
 }
 
 #endif
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 515d327cf11..b063c6fde81 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -27,7 +27,9 @@
 #include "qemu/thread.h"
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 #include "qemu/sys_membarrier.h"
+#include "qemu/coroutine-tls.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -66,13 +68,20 @@ struct rcu_reader_data {
 
     /* Data used for registry, protected by rcu_registry_lock */
     QLIST_ENTRY(rcu_reader_data) node;
+
+    /*
+     * NotifierList used to force an RCU grace period.  Accessed under
+     * rcu_registry_lock.  Note that the notifier is called _outside_
+     * the thread!
+     */
+    NotifierList force_rcu;
 };
 
-extern __thread struct rcu_reader_data rcu_reader;
+QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
 
 static inline void rcu_read_lock(void)
 {
-    struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
     unsigned ctr;
 
     if (p_rcu_reader->depth++ > 0) {
@@ -88,7 +97,7 @@ static inline void rcu_read_lock(void)
 
 static inline void rcu_read_unlock(void)
 {
-    struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+    struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
 
     assert(p_rcu_reader->depth != 0);
     if (--p_rcu_reader->depth > 0) {
@@ -180,6 +189,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock)
 #define RCU_READ_LOCK_GUARD() \
     g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock()
 
+/*
+ * Force-RCU notifiers tell readers that they should exit their
+ * read-side critical section.
+ */
+void rcu_add_force_rcu_notifier(Notifier *n);
+void rcu_remove_force_rcu_notifier(Notifier *n);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/qemu/selfmap.h b/include/qemu/selfmap.h
index 8382c4c779d..80cf920fbad 100644
--- a/include/qemu/selfmap.h
+++ b/include/qemu/selfmap.h
@@ -6,8 +6,8 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _SELFMAP_H_
-#define _SELFMAP_H_
+#ifndef SELFMAP_H
+#define SELFMAP_H
 
 typedef struct {
     unsigned long start;
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 7d1f8135767..0c34bf23987 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -111,4 +111,15 @@ SocketAddress *socket_remote_address(int fd, Error **errp);
  */
 SocketAddress *socket_address_flatten(SocketAddressLegacy *addr);
 
+/**
+ * socket_address_parse_named_fd:
+ *
+ * Modify @addr, replacing a named fd by its corresponding number.
+ * Needed for callers that plan to pass @addr to a context where the
+ * current monitor is not available.
+ *
+ * Return 0 on success.
+ */
+int socket_address_parse_named_fd(SocketAddress *addr, Error **errp);
+
 #endif /* QEMU_SOCKETS_H */
diff --git a/include/qemu/stats64.h b/include/qemu/stats64.h
index fdd3d1b8f98..802402254b6 100644
--- a/include/qemu/stats64.h
+++ b/include/qemu/stats64.h
@@ -21,7 +21,7 @@
 
 typedef struct Stat64 {
 #ifdef CONFIG_ATOMIC64
-    uint64_t value;
+    aligned_uint64_t value;
 #else
     uint32_t low, high;
     uint32_t lock;
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index c903525062e..bda54e599c8 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -4,11 +4,9 @@
 #include <pthread.h>
 #include <semaphore.h>
 
-typedef QemuMutex QemuRecMutex;
-#define qemu_rec_mutex_destroy qemu_mutex_destroy
-#define qemu_rec_mutex_lock_impl    qemu_mutex_lock_impl
-#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl
-#define qemu_rec_mutex_unlock qemu_mutex_unlock
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#endif
 
 struct QemuMutex {
     pthread_mutex_t lock;
@@ -19,6 +17,14 @@ struct QemuMutex {
     bool initialized;
 };
 
+/*
+ * QemuRecMutex cannot be a typedef of QemuMutex lest we have two
+ * compatible cases in _Generic.  See qemu/lockable.h.
+ */
+typedef struct QemuRecMutex {
+    QemuMutex m;
+} QemuRecMutex;
+
 struct QemuCond {
     pthread_cond_t cond;
     bool initialized;
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index d0a1a9597eb..d95af4498fc 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -18,12 +18,6 @@ struct QemuRecMutex {
     bool initialized;
 };
 
-void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
-void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
-int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file,
-                                int line);
-void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
-
 struct QemuCond {
     CONDITION_VARIABLE var;
     bool initialized;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 54357631846..460568d67d5 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -28,6 +28,12 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
 
+void qemu_rec_mutex_init(QemuRecMutex *mutex);
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line);
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line);
+
 typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l);
 typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l);
 typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l);
@@ -104,6 +110,9 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func;
 #define qemu_mutex_unlock(mutex) \
         qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__)
 
+#define qemu_rec_mutex_unlock(mutex) \
+        qemu_rec_mutex_unlock_impl(mutex, __FILE__, __LINE__)
+
 static inline void (qemu_mutex_lock)(QemuMutex *mutex)
 {
     qemu_mutex_lock(mutex);
@@ -129,8 +138,10 @@ static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex)
     return qemu_rec_mutex_trylock(mutex);
 }
 
-/* Prototypes for other functions are in thread-posix.h/thread-win32.h.  */
-void qemu_rec_mutex_init(QemuRecMutex *mutex);
+static inline void (qemu_rec_mutex_unlock)(QemuRecMutex *mutex)
+{
+    qemu_rec_mutex_unlock(mutex);
+}
 
 void qemu_cond_init(QemuCond *cond);
 void qemu_cond_destroy(QemuCond *cond);
diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h
new file mode 100644
index 00000000000..2f2060acd90
--- /dev/null
+++ b/include/qemu/transactions.h
@@ -0,0 +1,66 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * = Generic transaction API =
+ *
+ * The intended usage is the following: you create "prepare" functions, which
+ * represents the actions. They will usually have Transaction* argument, and
+ * call tran_add() to register finalization callbacks. For finalization
+ * callbacks, prepare corresponding TransactionActionDrv structures.
+ *
+ * Then, when you need to make a transaction, create an empty Transaction by
+ * tran_create(), call your "prepare" functions on it, and finally call
+ * tran_abort() or tran_commit() to finalize the transaction by corresponding
+ * finalization actions in reverse order.
+ *
+ * The clean() functions registered by the drivers in a transaction are called
+ * last, after all abort() or commit() functions have been called.
+ */
+
+#ifndef QEMU_TRANSACTIONS_H
+#define QEMU_TRANSACTIONS_H
+
+#include <gmodule.h>
+
+typedef struct TransactionActionDrv {
+    void (*abort)(void *opaque);
+    void (*commit)(void *opaque);
+    void (*clean)(void *opaque);
+} TransactionActionDrv;
+
+typedef struct Transaction Transaction;
+
+Transaction *tran_new(void);
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque);
+void tran_abort(Transaction *tran);
+void tran_commit(Transaction *tran);
+
+static inline void tran_finalize(Transaction *tran, int ret)
+{
+    if (ret < 0) {
+        tran_abort(tran);
+    } else {
+        tran_commit(tran);
+    }
+}
+
+#endif /* QEMU_TRANSACTIONS_H */
diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index 4491c8e1a6e..bde9495b254 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState;
 QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
 void qemu_vfio_close(QEMUVFIOState *s);
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-                      bool temporary, uint64_t *iova_list);
+                      bool temporary, uint64_t *iova_list, Error **errp);
 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
diff --git a/include/qom/object.h b/include/qom/object.h
index 6721cd312e6..fae096f51cc 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -861,6 +861,29 @@ static void do_qemu_init_ ## type_array(void)                               \
 }                                                                           \
 type_init(do_qemu_init_ ## type_array)
 
+/**
+ * type_print_class_properties:
+ * @type: a QOM class name
+ *
+ * Print the object's class properties to stdout or the monitor.
+ * Return whether an object was found.
+ */
+bool type_print_class_properties(const char *type);
+
+/**
+ * object_set_properties_from_keyval:
+ * @obj: a QOM object
+ * @qdict: a dictionary with the properties to be set
+ * @from_json: true if leaf values of @qdict are typed, false if they
+ * are strings
+ * @errp: pointer to error object
+ *
+ * For each key in the dictionary, parse the value string if needed,
+ * then set the corresponding property in @obj.
+ */
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+                                       bool from_json, Error **errp);
+
 /**
  * object_class_dynamic_cast_assert:
  * @klass: The #ObjectClass to attempt to cast.
@@ -1520,6 +1543,18 @@ Object *object_resolve_path(const char *path, bool *ambiguous);
 Object *object_resolve_path_type(const char *path, const char *typename,
                                  bool *ambiguous);
 
+/**
+ * object_resolve_path_at:
+ * @parent: the object in which to resolve the path
+ * @path: the path to resolve
+ *
+ * This is like object_resolve_path(), except paths not starting with
+ * a slash are relative to @parent.
+ *
+ * Returns: The resolved object or NULL on path lookup failure.
+ */
+Object *object_resolve_path_at(Object *parent, const char *path);
+
 /**
  * object_resolve_path_component:
  * @parent: the object in which to resolve the path
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
index 215d01b4eca..204cfb8640d 100644
--- a/include/standard-headers/asm-x86/kvm_para.h
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -33,6 +33,8 @@
 #define KVM_FEATURE_PV_SCHED_YIELD	13
 #define KVM_FEATURE_ASYNC_PF_INT	14
 #define KVM_FEATURE_MSI_EXT_DEST_ID	15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE	16
+#define KVM_FEATURE_MIGRATION_CONTROL	17
 
 #define KVM_HINTS_REALTIME      0
 
@@ -54,6 +56,7 @@
 #define MSR_KVM_POLL_CONTROL	0x4b564d05
 #define MSR_KVM_ASYNC_PF_INT	0x4b564d06
 #define MSR_KVM_ASYNC_PF_ACK	0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL	0x4b564d08
 
 struct kvm_steal_time {
 	uint64_t steal;
@@ -90,6 +93,16 @@ struct kvm_clock_pairing {
 /* MSR_KVM_ASYNC_PF_INT */
 #define KVM_ASYNC_PF_VEC_MASK			GENMASK(7, 0)
 
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY		(1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K	0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M	(1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G	(1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n)	(n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(0)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 1677208a411..94d41b202c9 100644
--- a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -70,30 +70,6 @@ enum pvrdma_mtu {
 	PVRDMA_MTU_4096 = 5,
 };
 
-static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu)
-{
-	switch (mtu) {
-	case PVRDMA_MTU_256:	return  256;
-	case PVRDMA_MTU_512:	return  512;
-	case PVRDMA_MTU_1024:	return 1024;
-	case PVRDMA_MTU_2048:	return 2048;
-	case PVRDMA_MTU_4096:	return 4096;
-	default:		return   -1;
-	}
-}
-
-static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu)
-{
-	switch (mtu) {
-	case 256:	return PVRDMA_MTU_256;
-	case 512:	return PVRDMA_MTU_512;
-	case 1024:	return PVRDMA_MTU_1024;
-	case 2048:	return PVRDMA_MTU_2048;
-	case 4096:
-	default:	return PVRDMA_MTU_4096;
-	}
-}
-
 enum pvrdma_port_state {
 	PVRDMA_PORT_NOP			= 0,
 	PVRDMA_PORT_DOWN		= 1,
@@ -138,17 +114,6 @@ enum pvrdma_port_width {
 	PVRDMA_WIDTH_12X	= 8,
 };
 
-static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width)
-{
-	switch (width) {
-	case PVRDMA_WIDTH_1X:	return  1;
-	case PVRDMA_WIDTH_4X:	return  4;
-	case PVRDMA_WIDTH_8X:	return  8;
-	case PVRDMA_WIDTH_12X:	return 12;
-	default:		return -1;
-	}
-}
-
 enum pvrdma_port_speed {
 	PVRDMA_SPEED_SDR	= 1,
 	PVRDMA_SPEED_DDR	= 2,
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index c47e19810c0..352b51fd0ac 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -167,6 +167,13 @@ extern "C" {
 #define DRM_FORMAT_RGBA1010102	fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */
 #define DRM_FORMAT_BGRA1010102	fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */
 
+/* 64 bpp RGB */
+#define DRM_FORMAT_XRGB16161616	fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_XBGR16161616	fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define DRM_FORMAT_ARGB16161616	fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_ABGR16161616	fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
 /*
  * Floating point 64bpp RGB
  * IEEE 754-2008 binary16 half-precision float
@@ -526,6 +533,25 @@ extern "C" {
  */
 #define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7)
 
+/*
+ * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render
+ * compression.
+ *
+ * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear
+ * and at index 1. The clear color is stored at index 2, and the pitch should
+ * be ignored. The clear color structure is 256 bits. The first 128 bits
+ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented
+ * by 32 bits. The raw clear color is consumed by the 3d engine and generates
+ * the converted clear color of size 64 bits. The first 32 bits store the Lower
+ * Converted Clear Color value and the next 32 bits store the Higher Converted
+ * Clear Color value when applicable. The Converted Clear Color values are
+ * consumed by the DE. The last 64 bits are used to store Color Discard Enable
+ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line
+ * corresponds to an area of 4x1 tiles in the main surface. The main surface
+ * pitch is required to be a multiple of 4 tile widths.
+ */
+#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8)
+
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
  *
@@ -1035,9 +1061,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
  * Not all combinations are valid, and different SoCs may support different
  * combinations of layout and options.
  */
-#define __fourcc_mod_amlogic_layout_mask 0xf
+#define __fourcc_mod_amlogic_layout_mask 0xff
 #define __fourcc_mod_amlogic_options_shift 8
-#define __fourcc_mod_amlogic_options_mask 0xf
+#define __fourcc_mod_amlogic_options_mask 0xff
 
 #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \
 	fourcc_mod_code(AMLOGIC, \
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 8bfd01d230d..053d3fafdf3 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -26,6 +26,14 @@
  * have the same layout for 32-bit and 64-bit userland.
  */
 
+/* Note on reserved space.
+ * Reserved fields must not be accessed directly by user space because
+ * they may be replaced by a different field in the future. They must
+ * be initialized to zero before making the request, e.g. via memset
+ * of the entire structure or implicitly by not being set in a structure
+ * initializer.
+ */
+
 /**
  * struct ethtool_cmd - DEPRECATED, link control and status
  * This structure is DEPRECATED, please use struct ethtool_link_settings.
@@ -67,6 +75,7 @@
  *	and other link features that the link partner advertised
  *	through autonegotiation; 0 if unknown or not applicable.
  *	Read-only.
+ * @reserved: Reserved for future use; see the note on reserved space.
  *
  * The link speed in Mbps is split between @speed and @speed_hi.  Use
  * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
@@ -155,6 +164,7 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep)
  * @bus_info: Device bus address.  This should match the dev_name()
  *	string for the underlying bus device, if there is one.  May be
  *	an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
  * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
  *	%ETHTOOL_SPFLAGS commands; also the number of strings in the
  *	%ETH_SS_PRIV_FLAGS set
@@ -223,7 +233,7 @@ enum tunable_id {
 	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
 	 * Add your fresh new tunable attribute above and remember to update
-	 * tunable_strings[] in net/core/ethtool.c
+	 * tunable_strings[] in net/ethtool/common.c
 	 */
 	__ETHTOOL_TUNABLE_COUNT,
 };
@@ -287,7 +297,7 @@ enum phy_tunable_id {
 	ETHTOOL_PHY_EDPD,
 	/*
 	 * Add your fresh new phy tunable attribute above and remember to update
-	 * phy_tunable_strings[] in net/core/ethtool.c
+	 * phy_tunable_strings[] in net/ethtool/common.c
 	 */
 	__ETHTOOL_PHY_TUNABLE_COUNT,
 };
@@ -356,6 +366,7 @@ struct ethtool_eeprom {
  * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
  *	its tx lpi (after reaching 'idle' state). Effective only when eee
  *	was negotiated and tx_lpi_enabled was set.
+ * @reserved: Reserved for future use; see the note on reserved space.
  */
 struct ethtool_eee {
 	uint32_t	cmd;
@@ -374,6 +385,7 @@ struct ethtool_eee {
  * @cmd: %ETHTOOL_GMODULEINFO
  * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
  * @eeprom_len: Length of the eeprom
+ * @reserved: Reserved for future use; see the note on reserved space.
  *
  * This structure is used to return the information to
  * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
@@ -579,9 +591,7 @@ struct ethtool_pauseparam {
 	uint32_t	tx_pause;
 };
 
-/**
- * enum ethtool_link_ext_state - link extended state
- */
+/* Link extended state */
 enum ethtool_link_ext_state {
 	ETHTOOL_LINK_EXT_STATE_AUTONEG,
 	ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
@@ -595,10 +605,7 @@ enum ethtool_link_ext_state {
 	ETHTOOL_LINK_EXT_STATE_OVERHEAT,
 };
 
-/**
- * enum ethtool_link_ext_substate_autoneg - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_AUTONEG.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
 enum ethtool_link_ext_substate_autoneg {
 	ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
 	ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
@@ -608,9 +615,7 @@ enum ethtool_link_ext_substate_autoneg {
 	ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
 };
 
-/**
- * enum ethtool_link_ext_substate_link_training - more information in addition to
- * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
  */
 enum ethtool_link_ext_substate_link_training {
 	ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
@@ -619,9 +624,7 @@ enum ethtool_link_ext_substate_link_training {
 	ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
 };
 
-/**
- * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
- * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
  */
 enum ethtool_link_ext_substate_link_logical_mismatch {
 	ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
@@ -631,19 +634,14 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
 	ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
 };
 
-/**
- * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
  */
 enum ethtool_link_ext_substate_bad_signal_integrity {
 	ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
 	ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
 };
 
-/**
- * enum ethtool_link_ext_substate_cable_issue - more information in
- * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
- */
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
 enum ethtool_link_ext_substate_cable_issue {
 	ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
 	ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
@@ -661,6 +659,7 @@ enum ethtool_link_ext_substate_cable_issue {
  *	now deprecated
  * @ETH_SS_FEATURES: Device feature names
  * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+ * @ETH_SS_TUNABLES: tunable names
  * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
  * @ETH_SS_PHY_TUNABLES: PHY tunable names
  * @ETH_SS_LINK_MODES: link mode names
@@ -670,6 +669,13 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_TS_TX_TYPES: timestamping Tx types
  * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
  * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ * @ETH_SS_STATS_STD: standardized stats
+ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
+ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
+ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
+ * @ETH_SS_STATS_RMON: names of RMON statistics
+ *
+ * @ETH_SS_COUNT: number of defined string sets
  */
 enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
@@ -688,6 +694,11 @@ enum ethtool_stringset {
 	ETH_SS_TS_TX_TYPES,
 	ETH_SS_TS_RX_FILTERS,
 	ETH_SS_UDP_TUNNEL_TYPES,
+	ETH_SS_STATS_STD,
+	ETH_SS_STATS_ETH_PHY,
+	ETH_SS_STATS_ETH_MAC,
+	ETH_SS_STATS_ETH_CTRL,
+	ETH_SS_STATS_RMON,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
@@ -715,6 +726,7 @@ struct ethtool_gstrings {
 /**
  * struct ethtool_sset_info - string set information
  * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @reserved: Reserved for future use; see the note on reserved space.
  * @sset_mask: On entry, a bitmask of string sets to query, with bits
  *	numbered according to &enum ethtool_stringset.  On return, a
  *	bitmask of those string sets queried that are supported.
@@ -759,6 +771,7 @@ enum ethtool_test_flags {
  * @flags: A bitmask of flags from &enum ethtool_test_flags.  Some
  *	flags may be set by the user on entry; others may be set by
  *	the driver on return.
+ * @reserved: Reserved for future use; see the note on reserved space.
  * @len: On return, the number of test results
  * @data: Array of test results
  *
@@ -959,6 +972,7 @@ union ethtool_flow_union {
  * @vlan_etype: VLAN EtherType
  * @vlan_tci: VLAN tag control information
  * @data: user defined data
+ * @padding: Reserved for future use; see the note on reserved space.
  *
  * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
  * is set in &struct ethtool_rx_flow_spec @flow_type.
@@ -1134,7 +1148,8 @@ struct ethtool_rxfh_indir {
  *	hardware hash key.
  * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
  *	Valid values are one of the %ETH_RSS_HASH_*.
- * @rsvd:	Reserved for future extensions.
+ * @rsvd8: Reserved for future use; see the note on reserved space.
+ * @rsvd32: Reserved for future use; see the note on reserved space.
  * @rss_config: RX ring/queue index for each hash value i.e., indirection table
  *	of @indir_size uint32_t elements, followed by hash key of @key_size
  *	bytes.
@@ -1302,7 +1317,9 @@ struct ethtool_sfeatures {
  * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
  * @phc_index: device index of the associated PHC, or -1 if there is none
  * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @tx_reserved: Reserved for future use; see the note on reserved space.
  * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ * @rx_reserved: Reserved for future use; see the note on reserved space.
  *
  * The bits in the 'tx_types' and 'rx_filters' fields correspond to
  * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
@@ -1376,15 +1393,33 @@ struct ethtool_per_queue_op {
 };
 
 /**
- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on porte
- * @fec: Bitmask of supported/configured FEC modes
- * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ * @active_fec: FEC mode which is active on the port, single bit set, GET only.
+ * @fec: Bitmask of configured FEC modes.
+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
  *
- * Drivers should reject a non-zero setting of @autoneg when
- * autoneogotiation is disabled (or not supported) for the link.
+ * Note that @reserved was never validated on input and ethtool user space
+ * left it uninitialized when calling SET. Hence going forward it can only be
+ * used to return a value to userspace with GET.
+ *
+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
+ * FEC settings are configured by link autonegotiation whenever it's enabled.
+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
+ *
+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
+ * It is recommended that drivers only accept a single bit set in @fec.
+ * When multiple bits are set in @fec drivers may pick mode in an implementation
+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
+ * FEC modes, because it's unclear whether in this case other modes constrain
+ * AUTO or are independent choices.
+ * Drivers must reject SET requests if they support none of the requested modes.
+ *
+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
  *
+ * See enum ethtool_fec_config_bits for definition of valid bits for both
+ * @fec and @active_fec.
  */
 struct ethtool_fecparam {
 	uint32_t   cmd;
@@ -1396,11 +1431,16 @@ struct ethtool_fecparam {
 
 /**
  * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
- * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
+ *			be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
+ *			based link mode and SFP parameters read from module's
+ *			EEPROM. This bit does _not_ mean autonegotiation.
+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ *			Consortium)
  */
 enum ethtool_fec_config_bits {
 	ETHTOOL_FEC_NONE_BIT,
@@ -1958,6 +1998,11 @@ enum ethtool_reset_flags {
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
  * @transceiver: Used to distinguish different possible PHY types,
  *	reported consistently by PHYLIB.  Read-only.
+ * @master_slave_cfg: Master/slave port mode.
+ * @master_slave_state: Master/slave port state.
+ * @reserved: Reserved for future use; see the note on reserved space.
+ * @reserved1: Reserved for future use; see the note on reserved space.
+ * @link_mode_masks: Variable length bitmaps.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index 950d7edb7ef..cce105bfbab 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -179,6 +179,8 @@
  *  7.33
  *  - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID
  *  - add FUSE_OPEN_KILL_SUIDGID
+ *  - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+ *  - add FUSE_SETXATTR_ACL_KILL_SGID
  */
 
 #ifndef _LINUX_FUSE_H
@@ -326,6 +328,7 @@ struct fuse_file_lock {
  *			does not have CAP_FSETID. Additionally upon
  *			write/truncate sgid is killed only if file has group
  *			execute permission. (Same as Linux VFS behavior).
+ * FUSE_SETXATTR_EXT:	Server supports extended struct fuse_setxattr_in
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -356,6 +359,7 @@ struct fuse_file_lock {
 #define FUSE_MAP_ALIGNMENT	(1 << 26)
 #define FUSE_SUBMOUNTS		(1 << 27)
 #define FUSE_HANDLE_KILLPRIV_V2	(1 << 28)
+#define FUSE_SETXATTR_EXT	(1 << 29)
 
 /**
  * CUSE INIT request/reply flags
@@ -447,6 +451,12 @@ struct fuse_file_lock {
  */
 #define FUSE_OPEN_KILL_SUIDGID	(1 << 0)
 
+/**
+ * setxattr flags
+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set
+ */
+#define FUSE_SETXATTR_ACL_KILL_SGID	(1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP		= 1,
 	FUSE_FORGET		= 2,  /* no reply */
@@ -677,9 +687,13 @@ struct fuse_fsync_in {
 	uint32_t	padding;
 };
 
+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8
+
 struct fuse_setxattr_in {
 	uint32_t	size;
 	uint32_t	flags;
+	uint32_t	setxattr_flags;
+	uint32_t	padding;
 };
 
 struct fuse_getxattr_in {
@@ -899,7 +913,8 @@ struct fuse_notify_retrieve_in {
 };
 
 /* Device ioctls: */
-#define FUSE_DEV_IOC_CLONE	_IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_MAGIC		229
+#define FUSE_DEV_IOC_CLONE		_IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
 
 struct fuse_lseek_in {
 	uint64_t	fh;
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index c403b9cb0d4..b5e86b40abd 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -611,6 +611,7 @@
 #define KEY_VOICECOMMAND		0x246	/* Listening Voice Command */
 #define KEY_ASSISTANT		0x247	/* AL Context-aware desktop assistant */
 #define KEY_KBD_LAYOUT_NEXT	0x248	/* AC Next Keyboard Layout Select */
+#define KEY_EMOJI_PICKER	0x249	/* Show/hide emoji picker (HUTRR101) */
 
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index f89c986190d..7822c241784 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -81,7 +81,7 @@ struct input_id {
  * in units per radian.
  * When INPUT_PROP_ACCELEROMETER is set the resolution changes.
  * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
- * in units per g (units/g) and in units per degree per second
+ * units per g (units/g) and in units per degree per second
  * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ).
  */
 struct input_absinfo {
diff --git a/include/standard-headers/linux/udmabuf.h b/include/standard-headers/linux/udmabuf.h
new file mode 100644
index 00000000000..e19eb5b5ce7
--- /dev/null
+++ b/include/standard-headers/linux/udmabuf.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_UDMABUF_H
+#define _LINUX_UDMABUF_H
+
+#include "standard-headers/linux/types.h"
+
+#define UDMABUF_FLAGS_CLOEXEC	0x01
+
+struct udmabuf_create {
+	uint32_t memfd;
+	uint32_t flags;
+	uint64_t offset;
+	uint64_t size;
+};
+
+struct udmabuf_create_item {
+	uint32_t memfd;
+	uint32_t __pad;
+	uint64_t offset;
+	uint64_t size;
+};
+
+struct udmabuf_create_list {
+	uint32_t flags;
+	uint32_t count;
+	struct udmabuf_create_item list[];
+};
+
+#define UDMABUF_CREATE       _IOW('u', 0x42, struct udmabuf_create)
+#define UDMABUF_CREATE_LIST  _IOW('u', 0x43, struct udmabuf_create_list)
+
+#endif /* _LINUX_UDMABUF_H */
diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h
new file mode 100644
index 00000000000..245e1eff4b9
--- /dev/null
+++ b/include/standard-headers/linux/virtio_bt.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _LINUX_VIRTIO_BT_H
+#define _LINUX_VIRTIO_BT_H
+
+#include "standard-headers/linux/virtio_types.h"
+
+/* Feature bits */
+#define VIRTIO_BT_F_VND_HCI	0	/* Indicates vendor command support */
+#define VIRTIO_BT_F_MSFT_EXT	1	/* Indicates MSFT vendor support */
+#define VIRTIO_BT_F_AOSP_EXT	2	/* Indicates AOSP vendor support */
+
+enum virtio_bt_config_type {
+	VIRTIO_BT_CONFIG_TYPE_PRIMARY	= 0,
+	VIRTIO_BT_CONFIG_TYPE_AMP	= 1,
+};
+
+enum virtio_bt_config_vendor {
+	VIRTIO_BT_CONFIG_VENDOR_NONE	= 0,
+	VIRTIO_BT_CONFIG_VENDOR_ZEPHYR	= 1,
+	VIRTIO_BT_CONFIG_VENDOR_INTEL	= 2,
+	VIRTIO_BT_CONFIG_VENDOR_REALTEK	= 3,
+};
+
+struct virtio_bt_config {
+	uint8_t  type;
+	uint16_t vendor;
+	uint16_t msft_opcode;
+} QEMU_PACKED;
+
+#endif /* _LINUX_VIRTIO_BT_H */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index bc1c0621f5e..4fe842c3a3a 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -51,8 +51,10 @@
 #define VIRTIO_ID_PSTORE		22 /* virtio pstore device */
 #define VIRTIO_ID_IOMMU			23 /* virtio IOMMU */
 #define VIRTIO_ID_MEM			24 /* virtio mem */
+#define VIRTIO_ID_SOUND			25 /* virtio sound */
 #define VIRTIO_ID_FS			26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM			27 /* virtio pmem */
 #define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_BT			40 /* virtio bluetooth */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h
new file mode 100644
index 00000000000..1af96b9fc61
--- /dev/null
+++ b/include/standard-headers/linux/virtio_snd.h
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (C) 2021 OpenSynergy GmbH
+ */
+#ifndef VIRTIO_SND_IF_H
+#define VIRTIO_SND_IF_H
+
+#include "standard-headers/linux/virtio_types.h"
+
+/*******************************************************************************
+ * CONFIGURATION SPACE
+ */
+struct virtio_snd_config {
+	/* # of available physical jacks */
+	uint32_t jacks;
+	/* # of available PCM streams */
+	uint32_t streams;
+	/* # of available channel maps */
+	uint32_t chmaps;
+};
+
+enum {
+	/* device virtqueue indexes */
+	VIRTIO_SND_VQ_CONTROL = 0,
+	VIRTIO_SND_VQ_EVENT,
+	VIRTIO_SND_VQ_TX,
+	VIRTIO_SND_VQ_RX,
+	/* # of device virtqueues */
+	VIRTIO_SND_VQ_MAX
+};
+
+/*******************************************************************************
+ * COMMON DEFINITIONS
+ */
+
+/* supported dataflow directions */
+enum {
+	VIRTIO_SND_D_OUTPUT = 0,
+	VIRTIO_SND_D_INPUT
+};
+
+enum {
+	/* jack control request types */
+	VIRTIO_SND_R_JACK_INFO = 1,
+	VIRTIO_SND_R_JACK_REMAP,
+
+	/* PCM control request types */
+	VIRTIO_SND_R_PCM_INFO = 0x0100,
+	VIRTIO_SND_R_PCM_SET_PARAMS,
+	VIRTIO_SND_R_PCM_PREPARE,
+	VIRTIO_SND_R_PCM_RELEASE,
+	VIRTIO_SND_R_PCM_START,
+	VIRTIO_SND_R_PCM_STOP,
+
+	/* channel map control request types */
+	VIRTIO_SND_R_CHMAP_INFO = 0x0200,
+
+	/* jack event types */
+	VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000,
+	VIRTIO_SND_EVT_JACK_DISCONNECTED,
+
+	/* PCM event types */
+	VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100,
+	VIRTIO_SND_EVT_PCM_XRUN,
+
+	/* common status codes */
+	VIRTIO_SND_S_OK = 0x8000,
+	VIRTIO_SND_S_BAD_MSG,
+	VIRTIO_SND_S_NOT_SUPP,
+	VIRTIO_SND_S_IO_ERR
+};
+
+/* common header */
+struct virtio_snd_hdr {
+	uint32_t code;
+};
+
+/* event notification */
+struct virtio_snd_event {
+	/* VIRTIO_SND_EVT_XXX */
+	struct virtio_snd_hdr hdr;
+	/* optional event data */
+	uint32_t data;
+};
+
+/* common control request to query an item information */
+struct virtio_snd_query_info {
+	/* VIRTIO_SND_R_XXX_INFO */
+	struct virtio_snd_hdr hdr;
+	/* item start identifier */
+	uint32_t start_id;
+	/* item count to query */
+	uint32_t count;
+	/* item information size in bytes */
+	uint32_t size;
+};
+
+/* common item information header */
+struct virtio_snd_info {
+	/* function group node id (High Definition Audio Specification 7.1.2) */
+	uint32_t hda_fn_nid;
+};
+
+/*******************************************************************************
+ * JACK CONTROL MESSAGES
+ */
+struct virtio_snd_jack_hdr {
+	/* VIRTIO_SND_R_JACK_XXX */
+	struct virtio_snd_hdr hdr;
+	/* 0 ... virtio_snd_config::jacks - 1 */
+	uint32_t jack_id;
+};
+
+/* supported jack features */
+enum {
+	VIRTIO_SND_JACK_F_REMAP = 0
+};
+
+struct virtio_snd_jack_info {
+	/* common header */
+	struct virtio_snd_info hdr;
+	/* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */
+	uint32_t features;
+	/* pin configuration (High Definition Audio Specification 7.3.3.31) */
+	uint32_t hda_reg_defconf;
+	/* pin capabilities (High Definition Audio Specification 7.3.4.9) */
+	uint32_t hda_reg_caps;
+	/* current jack connection status (0: disconnected, 1: connected) */
+	uint8_t connected;
+
+	uint8_t padding[7];
+};
+
+/* jack remapping control request */
+struct virtio_snd_jack_remap {
+	/* .code = VIRTIO_SND_R_JACK_REMAP */
+	struct virtio_snd_jack_hdr hdr;
+	/* selected association number */
+	uint32_t association;
+	/* selected sequence number */
+	uint32_t sequence;
+};
+
+/*******************************************************************************
+ * PCM CONTROL MESSAGES
+ */
+struct virtio_snd_pcm_hdr {
+	/* VIRTIO_SND_R_PCM_XXX */
+	struct virtio_snd_hdr hdr;
+	/* 0 ... virtio_snd_config::streams - 1 */
+	uint32_t stream_id;
+};
+
+/* supported PCM stream features */
+enum {
+	VIRTIO_SND_PCM_F_SHMEM_HOST = 0,
+	VIRTIO_SND_PCM_F_SHMEM_GUEST,
+	VIRTIO_SND_PCM_F_MSG_POLLING,
+	VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS,
+	VIRTIO_SND_PCM_F_EVT_XRUNS
+};
+
+/* supported PCM sample formats */
+enum {
+	/* analog formats (width / physical width) */
+	VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0,	/*  4 /  4 bits */
+	VIRTIO_SND_PCM_FMT_MU_LAW,		/*  8 /  8 bits */
+	VIRTIO_SND_PCM_FMT_A_LAW,		/*  8 /  8 bits */
+	VIRTIO_SND_PCM_FMT_S8,			/*  8 /  8 bits */
+	VIRTIO_SND_PCM_FMT_U8,			/*  8 /  8 bits */
+	VIRTIO_SND_PCM_FMT_S16,			/* 16 / 16 bits */
+	VIRTIO_SND_PCM_FMT_U16,			/* 16 / 16 bits */
+	VIRTIO_SND_PCM_FMT_S18_3,		/* 18 / 24 bits */
+	VIRTIO_SND_PCM_FMT_U18_3,		/* 18 / 24 bits */
+	VIRTIO_SND_PCM_FMT_S20_3,		/* 20 / 24 bits */
+	VIRTIO_SND_PCM_FMT_U20_3,		/* 20 / 24 bits */
+	VIRTIO_SND_PCM_FMT_S24_3,		/* 24 / 24 bits */
+	VIRTIO_SND_PCM_FMT_U24_3,		/* 24 / 24 bits */
+	VIRTIO_SND_PCM_FMT_S20,			/* 20 / 32 bits */
+	VIRTIO_SND_PCM_FMT_U20,			/* 20 / 32 bits */
+	VIRTIO_SND_PCM_FMT_S24,			/* 24 / 32 bits */
+	VIRTIO_SND_PCM_FMT_U24,			/* 24 / 32 bits */
+	VIRTIO_SND_PCM_FMT_S32,			/* 32 / 32 bits */
+	VIRTIO_SND_PCM_FMT_U32,			/* 32 / 32 bits */
+	VIRTIO_SND_PCM_FMT_FLOAT,		/* 32 / 32 bits */
+	VIRTIO_SND_PCM_FMT_FLOAT64,		/* 64 / 64 bits */
+	/* digital formats (width / physical width) */
+	VIRTIO_SND_PCM_FMT_DSD_U8,		/*  8 /  8 bits */
+	VIRTIO_SND_PCM_FMT_DSD_U16,		/* 16 / 16 bits */
+	VIRTIO_SND_PCM_FMT_DSD_U32,		/* 32 / 32 bits */
+	VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME	/* 32 / 32 bits */
+};
+
+/* supported PCM frame rates */
+enum {
+	VIRTIO_SND_PCM_RATE_5512 = 0,
+	VIRTIO_SND_PCM_RATE_8000,
+	VIRTIO_SND_PCM_RATE_11025,
+	VIRTIO_SND_PCM_RATE_16000,
+	VIRTIO_SND_PCM_RATE_22050,
+	VIRTIO_SND_PCM_RATE_32000,
+	VIRTIO_SND_PCM_RATE_44100,
+	VIRTIO_SND_PCM_RATE_48000,
+	VIRTIO_SND_PCM_RATE_64000,
+	VIRTIO_SND_PCM_RATE_88200,
+	VIRTIO_SND_PCM_RATE_96000,
+	VIRTIO_SND_PCM_RATE_176400,
+	VIRTIO_SND_PCM_RATE_192000,
+	VIRTIO_SND_PCM_RATE_384000
+};
+
+struct virtio_snd_pcm_info {
+	/* common header */
+	struct virtio_snd_info hdr;
+	/* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */
+	uint32_t features;
+	/* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */
+	uint64_t formats;
+	/* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */
+	uint64_t rates;
+	/* dataflow direction (VIRTIO_SND_D_XXX) */
+	uint8_t direction;
+	/* minimum # of supported channels */
+	uint8_t channels_min;
+	/* maximum # of supported channels */
+	uint8_t channels_max;
+
+	uint8_t padding[5];
+};
+
+/* set PCM stream format */
+struct virtio_snd_pcm_set_params {
+	/* .code = VIRTIO_SND_R_PCM_SET_PARAMS */
+	struct virtio_snd_pcm_hdr hdr;
+	/* size of the hardware buffer */
+	uint32_t buffer_bytes;
+	/* size of the hardware period */
+	uint32_t period_bytes;
+	/* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */
+	uint32_t features;
+	/* selected # of channels */
+	uint8_t channels;
+	/* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */
+	uint8_t format;
+	/* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */
+	uint8_t rate;
+
+	uint8_t padding;
+};
+
+/*******************************************************************************
+ * PCM I/O MESSAGES
+ */
+
+/* I/O request header */
+struct virtio_snd_pcm_xfer {
+	/* 0 ... virtio_snd_config::streams - 1 */
+	uint32_t stream_id;
+};
+
+/* I/O request status */
+struct virtio_snd_pcm_status {
+	/* VIRTIO_SND_S_XXX */
+	uint32_t status;
+	/* current device latency */
+	uint32_t latency_bytes;
+};
+
+/*******************************************************************************
+ * CHANNEL MAP CONTROL MESSAGES
+ */
+struct virtio_snd_chmap_hdr {
+	/* VIRTIO_SND_R_CHMAP_XXX */
+	struct virtio_snd_hdr hdr;
+	/* 0 ... virtio_snd_config::chmaps - 1 */
+	uint32_t chmap_id;
+};
+
+/* standard channel position definition */
+enum {
+	VIRTIO_SND_CHMAP_NONE = 0,	/* undefined */
+	VIRTIO_SND_CHMAP_NA,		/* silent */
+	VIRTIO_SND_CHMAP_MONO,		/* mono stream */
+	VIRTIO_SND_CHMAP_FL,		/* front left */
+	VIRTIO_SND_CHMAP_FR,		/* front right */
+	VIRTIO_SND_CHMAP_RL,		/* rear left */
+	VIRTIO_SND_CHMAP_RR,		/* rear right */
+	VIRTIO_SND_CHMAP_FC,		/* front center */
+	VIRTIO_SND_CHMAP_LFE,		/* low frequency (LFE) */
+	VIRTIO_SND_CHMAP_SL,		/* side left */
+	VIRTIO_SND_CHMAP_SR,		/* side right */
+	VIRTIO_SND_CHMAP_RC,		/* rear center */
+	VIRTIO_SND_CHMAP_FLC,		/* front left center */
+	VIRTIO_SND_CHMAP_FRC,		/* front right center */
+	VIRTIO_SND_CHMAP_RLC,		/* rear left center */
+	VIRTIO_SND_CHMAP_RRC,		/* rear right center */
+	VIRTIO_SND_CHMAP_FLW,		/* front left wide */
+	VIRTIO_SND_CHMAP_FRW,		/* front right wide */
+	VIRTIO_SND_CHMAP_FLH,		/* front left high */
+	VIRTIO_SND_CHMAP_FCH,		/* front center high */
+	VIRTIO_SND_CHMAP_FRH,		/* front right high */
+	VIRTIO_SND_CHMAP_TC,		/* top center */
+	VIRTIO_SND_CHMAP_TFL,		/* top front left */
+	VIRTIO_SND_CHMAP_TFR,		/* top front right */
+	VIRTIO_SND_CHMAP_TFC,		/* top front center */
+	VIRTIO_SND_CHMAP_TRL,		/* top rear left */
+	VIRTIO_SND_CHMAP_TRR,		/* top rear right */
+	VIRTIO_SND_CHMAP_TRC,		/* top rear center */
+	VIRTIO_SND_CHMAP_TFLC,		/* top front left center */
+	VIRTIO_SND_CHMAP_TFRC,		/* top front right center */
+	VIRTIO_SND_CHMAP_TSL,		/* top side left */
+	VIRTIO_SND_CHMAP_TSR,		/* top side right */
+	VIRTIO_SND_CHMAP_LLFE,		/* left LFE */
+	VIRTIO_SND_CHMAP_RLFE,		/* right LFE */
+	VIRTIO_SND_CHMAP_BC,		/* bottom center */
+	VIRTIO_SND_CHMAP_BLC,		/* bottom left center */
+	VIRTIO_SND_CHMAP_BRC		/* bottom right center */
+};
+
+/* maximum possible number of channels */
+#define VIRTIO_SND_CHMAP_MAX_SIZE	18
+
+struct virtio_snd_chmap_info {
+	/* common header */
+	struct virtio_snd_info hdr;
+	/* dataflow direction (VIRTIO_SND_D_XXX) */
+	uint8_t direction;
+	/* # of valid channel position values */
+	uint8_t channels;
+	/* channel position values (VIRTIO_SND_CHMAP_XXX) */
+	uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE];
+};
+
+#endif /* VIRTIO_SND_IF_H */
diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h
index be443211ce9..3a23488e421 100644
--- a/include/standard-headers/linux/virtio_vsock.h
+++ b/include/standard-headers/linux/virtio_vsock.h
@@ -38,6 +38,9 @@
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_config.h"
 
+/* The feature bitmap for virtio vsock */
+#define VIRTIO_VSOCK_F_SEQPACKET	1	/* SOCK_SEQPACKET supported */
+
 struct virtio_vsock_config {
 	uint64_t guest_cid;
 } QEMU_PACKED;
@@ -65,6 +68,7 @@ struct virtio_vsock_hdr {
 
 enum virtio_vsock_type {
 	VIRTIO_VSOCK_TYPE_STREAM = 1,
+	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
 };
 
 enum virtio_vsock_op {
@@ -91,4 +95,9 @@ enum virtio_vsock_shutdown {
 	VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
 };
 
+/* VIRTIO_VSOCK_OP_RW flags values */
+enum virtio_vsock_rw {
+	VIRTIO_VSOCK_SEQ_EOR = 1,
+};
+
 #endif /* _LINUX_VIRTIO_VSOCK_H */
diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h
index 0989426a3f5..c30182a7ae7 100644
--- a/include/standard-headers/rdma/vmw_pvrdma-abi.h
+++ b/include/standard-headers/rdma/vmw_pvrdma-abi.h
@@ -133,6 +133,13 @@ enum pvrdma_wc_flags {
 	PVRDMA_WC_FLAGS_MAX		= PVRDMA_WC_WITH_NETWORK_HDR_TYPE,
 };
 
+enum pvrdma_network_type {
+	PVRDMA_NETWORK_IB,
+	PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB,
+	PVRDMA_NETWORK_IPV4,
+	PVRDMA_NETWORK_IPV6
+};
+
 struct pvrdma_alloc_ucontext_resp {
 	uint32_t qp_tab_size;
 	uint32_t reserved;
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 16da2796962..70c579560ad 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -9,7 +9,6 @@ enum {
     QEMU_ARCH_CRIS = (1 << 2),
     QEMU_ARCH_I386 = (1 << 3),
     QEMU_ARCH_M68K = (1 << 4),
-    QEMU_ARCH_LM32 = (1 << 5),
     QEMU_ARCH_MICROBLAZE = (1 << 6),
     QEMU_ARCH_MIPS = (1 << 7),
     QEMU_ARCH_PPC = (1 << 8),
@@ -18,30 +17,15 @@ enum {
     QEMU_ARCH_SPARC = (1 << 11),
     QEMU_ARCH_XTENSA = (1 << 12),
     QEMU_ARCH_OPENRISC = (1 << 13),
-    QEMU_ARCH_UNICORE32 = (1 << 14),
-    QEMU_ARCH_MOXIE = (1 << 15),
     QEMU_ARCH_TRICORE = (1 << 16),
     QEMU_ARCH_NIOS2 = (1 << 17),
     QEMU_ARCH_HPPA = (1 << 18),
     QEMU_ARCH_RISCV = (1 << 19),
     QEMU_ARCH_RX = (1 << 20),
     QEMU_ARCH_AVR = (1 << 21),
-
-    QEMU_ARCH_NONE = (1 << 31),
+    QEMU_ARCH_HEXAGON = (1 << 22),
 };
 
 extern const uint32_t arch_type;
 
-int kvm_available(void);
-int xen_available(void);
-
-/* default virtio transport per architecture */
-#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \
-                              QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \
-                              QEMU_ARCH_MIPS | QEMU_ARCH_PPC |  \
-                              QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \
-                              QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA)
-#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X)
-#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K)
-
 #endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 880e9032930..e5e1524f065 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -66,6 +66,10 @@ typedef struct BlockDevOps {
      * Runs when the backend's last drain request ends.
      */
     void (*drained_end)(void *opaque);
+    /*
+     * Is the device still busy?
+     */
+    bool (*drained_poll)(void *opaque);
 } BlockDevOps;
 
 /* This struct is embedded in (the private) BlockBackend struct and contains
@@ -98,6 +102,7 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
 BlockDriverState *blk_bs(BlockBackend *blk);
 void blk_remove_bs(BlockBackend *blk);
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
 bool bdrv_has_blk(BlockDriverState *bs);
 bool bdrv_is_root_node(BlockDriverState *bs);
 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
@@ -121,38 +126,42 @@ BlockBackend *blk_by_dev(void *dev);
 BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
+                               int64_t bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags);
 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
-                                     unsigned int bytes,
+                                     int64_t bytes,
                                      QEMUIOVector *qiov, size_t qiov_offset,
                                      BdrvRequestFlags flags);
 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                               unsigned int bytes, QEMUIOVector *qiov,
+                               int64_t bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags);
 
 static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
-                                            unsigned int bytes, void *buf,
+                                            int64_t bytes, void *buf,
                                             BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 
+    assert(bytes <= SIZE_MAX);
+
     return blk_co_preadv(blk, offset, bytes, &qiov, flags);
 }
 
 static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
-                                             unsigned int bytes, void *buf,
+                                             int64_t bytes, void *buf,
                                              BdrvRequestFlags flags)
 {
     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 
+    assert(bytes <= SIZE_MAX);
+
     return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
 }
 
 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int bytes, BdrvRequestFlags flags);
+                      int64_t bytes, BdrvRequestFlags flags);
 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                  int bytes, BdrvRequestFlags flags,
+                                  int64_t bytes, BdrvRequestFlags flags,
                                   BlockCompletionFunc *cb, void *opaque);
 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
@@ -169,15 +178,16 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                             BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes,
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
                              BlockCompletionFunc *cb, void *opaque);
 void blk_aio_cancel(BlockAIOCB *acb);
 void blk_aio_cancel_async(BlockAIOCB *acb);
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque);
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
-int blk_co_flush(BlockBackend *blk);
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+                                 int64_t bytes);
+int coroutine_fn blk_co_flush(BlockBackend *blk);
 int blk_flush(BlockBackend *blk);
 int blk_commit_all(void);
 void blk_inc_in_flight(BlockBackend *blk);
@@ -204,7 +214,9 @@ void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
 uint32_t blk_get_request_alignment(BlockBackend *blk);
 uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
 int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
 void blk_set_guest_block_size(BlockBackend *blk, int align);
 void *blk_try_blockalign(BlockBackend *blk, size_t size);
 void *blk_blockalign(BlockBackend *blk, size_t size);
@@ -236,12 +248,12 @@ int blk_get_open_flags_from_root_state(BlockBackend *blk);
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int bytes, BdrvRequestFlags flags);
+                                      int64_t bytes, BdrvRequestFlags flags);
 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
-                          int bytes);
+                          int64_t bytes);
 int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
                  PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size);
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
@@ -262,7 +274,7 @@ void blk_unregister_buf(BlockBackend *blk, void *host);
 
 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
                                    BlockBackend *blk_out, int64_t off_out,
-                                   int bytes, BdrvRequestFlags read_flags,
+                                   int64_t bytes, BdrvRequestFlags read_flags,
                                    BdrvRequestFlags write_flags);
 
 const BdrvChild *blk_root(BlockBackend *blk);
diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h
index 8a2fe556225..ef060a97590 100644
--- a/include/sysemu/device_tree.h
+++ b/include/sysemu/device_tree.h
@@ -121,6 +121,7 @@ uint32_t qemu_fdt_get_phandle(void *fdt, const char *path);
 uint32_t qemu_fdt_alloc_phandle(void *fdt);
 int qemu_fdt_nop_node(void *fdt, const char *node_path);
 int qemu_fdt_add_subnode(void *fdt, const char *name);
+int qemu_fdt_add_path(void *fdt, const char *path);
 
 #define qemu_fdt_setprop_cells(fdt, node_path, property, ...)                 \
     do {                                                                      \
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
index 12fb54f9902..247f0661d12 100644
--- a/include/sysemu/hax.h
+++ b/include/sysemu/hax.h
@@ -24,6 +24,8 @@
 
 int hax_sync_vcpus(void);
 
+#ifdef NEED_CPU_H
+
 #ifdef CONFIG_HAX
 
 int hax_enabled(void);
@@ -34,4 +36,6 @@ int hax_enabled(void);
 
 #endif /* CONFIG_HAX */
 
+#endif /* NEED_CPU_H */
+
 #endif /* QEMU_HAX_H */
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index 31d7343cb21..3f1af140bd3 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -65,7 +65,7 @@ struct HostMemoryBackend {
     uint64_t size;
     bool merge, dump, use_canonical_path;
     bool cheri_tags;
-    bool prealloc, is_mapped, share;
+    bool prealloc, is_mapped, share, reserve;
     uint32_t prealloc_threads;
     DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
     HostMemPolicy policy;
diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index c98636bc812..bb70082e458 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -16,6 +16,8 @@
 #include "qemu/accel.h"
 #include "qom/object.h"
 
+#ifdef NEED_CPU_H
+
 #ifdef CONFIG_HVF
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
                                  int reg);
@@ -26,6 +28,8 @@ extern bool hvf_allowed;
 #define hvf_get_supported_cpuid(func, idx, reg) 0
 #endif /* !CONFIG_HVF */
 
+#endif /* NEED_CPU_H */
+
 #define TYPE_HVF_ACCEL ACCEL_CLASS_NAME("hvf")
 
 typedef struct HVFState HVFState;
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
new file mode 100644
index 00000000000..6545f7cd613
--- /dev/null
+++ b/include/sysemu/hvf_int.h
@@ -0,0 +1,68 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* header to be included in HVF-specific code */
+
+#ifndef HVF_INT_H
+#define HVF_INT_H
+
+#ifdef __aarch64__
+#include <Hypervisor/Hypervisor.h>
+#else
+#include <Hypervisor/hv.h>
+#endif
+
+/* hvf_slot flags */
+#define HVF_SLOT_LOG (1 << 0)
+
+typedef struct hvf_slot {
+    uint64_t start;
+    uint64_t size;
+    uint8_t *mem;
+    int slot_id;
+    uint32_t flags;
+    MemoryRegion *region;
+} hvf_slot;
+
+typedef struct hvf_vcpu_caps {
+    uint64_t vmx_cap_pinbased;
+    uint64_t vmx_cap_procbased;
+    uint64_t vmx_cap_procbased2;
+    uint64_t vmx_cap_entry;
+    uint64_t vmx_cap_exit;
+    uint64_t vmx_cap_preemption_timer;
+} hvf_vcpu_caps;
+
+struct HVFState {
+    AccelState parent;
+    hvf_slot slots[32];
+    int num_slots;
+
+    hvf_vcpu_caps *hvf_caps;
+    uint64_t vtimer_offset;
+};
+extern HVFState *hvf_state;
+
+struct hvf_vcpu_state {
+    uint64_t fd;
+    void *exit;
+    bool vtimer_masked;
+    sigset_t unblock_ipi_mask;
+};
+
+void assert_hvf_ok(hv_return_t ret);
+int hvf_arch_init(void);
+int hvf_arch_init_vcpu(CPUState *cpu);
+void hvf_arch_vcpu_destroy(CPUState *cpu);
+int hvf_vcpu_exec(CPUState *);
+hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
+int hvf_put_registers(CPUState *);
+int hvf_get_registers(CPUState *);
+void hvf_kick_vcpu_thread(CPUState *cpu);
+
+#endif
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 61672f9b322..01b5ebf442a 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -16,6 +16,7 @@
 #include "sysemu/kvm.h"
 #include "sysemu/hvf.h"
 #include "sysemu/whpx.h"
+#include "sysemu/nvmm.h"
 
 void cpu_synchronize_state(CPUState *cpu);
 void cpu_synchronize_post_reset(CPUState *cpu);
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index f177142f16c..7f714bd1368 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -37,6 +37,9 @@ struct IOThread {
     int64_t poll_max_ns;
     int64_t poll_grow;
     int64_t poll_shrink;
+
+    /* AioContext AIO engine parameters */
+    int64_t aio_max_batch;
 };
 typedef struct IOThread IOThread;
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index a1ab1ee12d3..7b22aeb6ae1 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -547,4 +547,5 @@ bool kvm_cpu_check_are_resettable(void);
 
 bool kvm_arch_cpu_check_are_resettable(void);
 
+bool kvm_dirty_ring_enabled(void);
 #endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index ccb8869f01b..1f5487d9b74 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -23,18 +23,21 @@ typedef struct KVMSlot
     int old_flags;
     /* Dirty bitmap cache for the slot */
     unsigned long *dirty_bmap;
+    unsigned long dirty_bmap_size;
+    /* Cache of the address space ID */
+    int as_id;
+    /* Cache of the offset in ram address space */
+    ram_addr_t ram_start_offset;
 } KVMSlot;
 
 typedef struct KVMMemoryListener {
     MemoryListener listener;
-    /* Protects the slots and all inside them */
-    QemuMutex slots_lock;
     KVMSlot *slots;
     int as_id;
 } KVMMemoryListener;
 
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
-                                  AddressSpace *as, int as_id);
+                                  AddressSpace *as, int as_id, const char *name);
 
 void kvm_set_max_memslot_size(hwaddr max_slot_size);
 
diff --git a/include/sysemu/nvmm.h b/include/sysemu/nvmm.h
new file mode 100644
index 00000000000..833670fccbe
--- /dev/null
+++ b/include/sysemu/nvmm.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator support.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_NVMM_H
+#define QEMU_NVMM_H
+
+#ifdef NEED_CPU_H
+
+#ifdef CONFIG_NVMM
+
+int nvmm_enabled(void);
+
+#else /* CONFIG_NVMM */
+
+#define nvmm_enabled() (0)
+
+#endif /* CONFIG_NVMM */
+
+#endif /* NEED_CPU_H */
+
+#endif /* QEMU_NVMM_H */
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 629c8c648b7..2edf33658a4 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -38,6 +38,10 @@
 #include <sys/sysmacros.h>
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 void os_set_line_buffering(void);
 void os_set_proc_name(const char *s);
 void os_setup_signal_handling(void);
@@ -92,4 +96,8 @@ static inline void qemu_funlockfile(FILE *f)
     funlockfile(f);
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 5346d51e890..43f569b5c21 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -30,6 +30,10 @@
 #include <windows.h>
 #include <ws2tcpip.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #if defined(_WIN64)
 /* On w64, setjmp is implemented by _setjmp which needs a second parameter.
  * If this parameter is NULL, longjump does no stack unwinding.
@@ -194,4 +198,8 @@ ssize_t qemu_recv_wrap(int sockfd, void *buf, size_t len, int flags);
 ssize_t qemu_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags,
                            struct sockaddr *addr, socklen_t *addrlen);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
deleted file mode 100644
index 94d821d737c..00000000000
--- a/include/sysemu/sev.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * QEMU Secure Encrypted Virutualization (SEV) support
- *
- * Copyright: Advanced Micro Devices, 2016-2018
- *
- * Authors:
- *  Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_SEV_H
-#define QEMU_SEV_H
-
-#include "sysemu/kvm.h"
-
-bool sev_enabled(void);
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
-int sev_inject_launch_secret(const char *hdr, const char *secret,
-                             uint64_t gpa, Error **errp);
-
-int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
-void sev_es_set_reset_vector(CPUState *cpu);
-
-#endif
diff --git a/include/sysemu/tcg.h b/include/sysemu/tcg.h
index 00349fb18a7..53352450ff6 100644
--- a/include/sysemu/tcg.h
+++ b/include/sysemu/tcg.h
@@ -8,8 +8,6 @@
 #ifndef SYSEMU_TCG_H
 #define SYSEMU_TCG_H
 
-void tcg_exec_init(unsigned long tb_size, int splitwx);
-
 #ifdef CONFIG_TCG
 extern bool tcg_allowed;
 #define tcg_enabled() (tcg_allowed)
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 1a85564e479..68b2206463c 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -15,6 +15,8 @@
 #include "qapi/qapi-types-tpm.h"
 #include "qom/object.h"
 
+#ifdef CONFIG_TPM
+
 int tpm_config_parse(QemuOptsList *opts_list, const char *optarg);
 int tpm_init(void);
 void tpm_cleanup(void);
@@ -73,4 +75,11 @@ static inline TPMVersion tpm_get_version(TPMIf *ti)
     return TPM_IF_GET_CLASS(ti)->get_version(ti);
 }
 
+#else /* CONFIG_TPM */
+
+#define tpm_init()  (0)
+#define tpm_cleanup()
+
+#endif /* CONFIG_TPM */
+
 #endif /* QEMU_TPM_H */
diff --git a/include/sysemu/tpm_backend.h b/include/sysemu/tpm_backend.h
index 6f078f5f482..8fd3269c117 100644
--- a/include/sysemu/tpm_backend.h
+++ b/include/sysemu/tpm_backend.h
@@ -18,6 +18,8 @@
 #include "sysemu/tpm.h"
 #include "qapi/error.h"
 
+#ifdef CONFIG_TPM
+
 #define TYPE_TPM_BACKEND "tpm-backend"
 OBJECT_DECLARE_TYPE(TPMBackend, TPMBackendClass,
                     TPM_BACKEND)
@@ -209,4 +211,6 @@ TPMInfo *tpm_backend_query_tpm(TPMBackend *s);
 
 TPMBackend *qemu_find_tpm_be(const char *id);
 
-#endif
+#endif /* CONFIG_TPM */
+
+#endif /* TPM_BACKEND_H */
diff --git a/include/sysemu/watchdog.h b/include/sysemu/watchdog.h
index a08d16380d7..d2d4901dbbe 100644
--- a/include/sysemu/watchdog.h
+++ b/include/sysemu/watchdog.h
@@ -37,7 +37,6 @@ typedef struct WatchdogTimerModel WatchdogTimerModel;
 
 /* in hw/watchdog.c */
 int select_watchdog(const char *p);
-int select_watchdog_action(const char *action);
 WatchdogAction get_watchdog_action(void);
 void watchdog_add_model(WatchdogTimerModel *model);
 void watchdog_perform_action(void);
diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
index 8ca1c1c4ac7..2889fa2278b 100644
--- a/include/sysemu/whpx.h
+++ b/include/sysemu/whpx.h
@@ -13,6 +13,8 @@
 #ifndef QEMU_WHPX_H
 #define QEMU_WHPX_H
 
+#ifdef NEED_CPU_H
+
 #ifdef CONFIG_WHPX
 
 int whpx_enabled(void);
@@ -25,4 +27,6 @@ bool whpx_apic_in_platform(void);
 
 #endif /* CONFIG_WHPX */
 
+#endif /* NEED_CPU_H */
+
 #endif /* QEMU_WHPX_H */
diff --git a/include/tcg/tcg-cond.h b/include/tcg/tcg-cond.h
new file mode 100644
index 00000000000..3819aa2d0ed
--- /dev/null
+++ b/include/tcg/tcg-cond.h
@@ -0,0 +1,133 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_COND_H
+#define TCG_COND_H
+
+/*
+ * Conditions.  Note that these are laid out for easy manipulation by
+ * the functions below:
+ *    bit 0 is used for inverting;
+ *    bit 1 is signed,
+ *    bit 2 is unsigned,
+ *    bit 3 is used with bit 0 for swapping signed/unsigned.
+ */
+typedef enum {
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
+    /* unsigned */
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
+} TCGCond;
+
+/* Invert the sense of the comparison.  */
+static inline TCGCond tcg_invert_cond(TCGCond c)
+{
+    return (TCGCond)(c ^ 1);
+}
+
+/* Swap the operands in a comparison.  */
+static inline TCGCond tcg_swap_cond(TCGCond c)
+{
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
+}
+
+/* Create an "unsigned" version of a "signed" comparison.  */
+static inline TCGCond tcg_unsigned_cond(TCGCond c)
+{
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Create a "signed" version of an "unsigned" comparison.  */
+static inline TCGCond tcg_signed_cond(TCGCond c)
+{
+    return c & 4 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned?  */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+    return (c & 4) != 0;
+}
+
+/*
+ * Create a "high" version of a double-word comparison.
+ * This removes equality from a LTE or GTE comparison.
+ */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+        return (TCGCond)(c ^ 8);
+    default:
+        return c;
+    }
+}
+
+static inline const char *tcg_cond_string(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_NEVER:
+        return "NEVER";
+    case TCG_COND_ALWAYS:
+        return "ALWAYS";
+    case TCG_COND_EQ:
+        return "==";
+    case TCG_COND_NE:
+        return "!=";
+    case TCG_COND_LT:
+        return "<_s";
+    case TCG_COND_LTU:
+        return "<_u";
+    case TCG_COND_GE:
+        return ">=_s";
+    case TCG_COND_GEU:
+        return ">=_u";
+    case TCG_COND_LE:
+        return "<=_s";
+    case TCG_COND_LEU:
+        return "<=_u";
+    case TCG_COND_GT:
+        return ">_s";
+    case TCG_COND_GTU:
+        return ">_u";
+    default:
+        return "?";
+    }
+}
+
+#endif /* TCG_COND_H */
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
new file mode 100644
index 00000000000..bf40942de4a
--- /dev/null
+++ b/include/tcg/tcg-ldst.h
@@ -0,0 +1,79 @@
+/*
+ * Memory helpers that will be used by TCG generated code.
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_LDST_H
+#define TCG_LDST_H 1
+
+#ifdef CONFIG_SOFTMMU
+
+/* Value zero-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
+                                     MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
+                           MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
+                           MemOpIdx oi, uintptr_t retaddr);
+
+/* Value sign-extended to tcg register size.  */
+tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
+                                     MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
+                                    MemOpIdx oi, uintptr_t retaddr);
+
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                        MemOpIdx oi, uintptr_t retaddr);
+void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                       MemOpIdx oi, uintptr_t retaddr);
+
+#else
+
+void QEMU_NORETURN helper_unaligned_ld(CPUArchState *env, target_ulong addr);
+void QEMU_NORETURN helper_unaligned_st(CPUArchState *env, target_ulong addr);
+
+#endif /* CONFIG_SOFTMMU */
+#endif /* TCG_LDST_H */
diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h
index c69a7de984b..da55fed8704 100644
--- a/include/tcg/tcg-op-gvec.h
+++ b/include/tcg/tcg-op-gvec.h
@@ -401,4 +401,47 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
 void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c);
 
+/* 32-bit vector operations. */
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t);
+
+#if TARGET_LONG_BITS == 64
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i64
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i64
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i64
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i64
+#define tcg_gen_vec_add32_tl tcg_gen_vec_add32_i64
+#define tcg_gen_vec_sub32_tl tcg_gen_vec_sub32_i64
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i64
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i64
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i64
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i64
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i64
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i64
+
+#else
+#define tcg_gen_vec_add8_tl  tcg_gen_vec_add8_i32
+#define tcg_gen_vec_sub8_tl  tcg_gen_vec_sub8_i32
+#define tcg_gen_vec_add16_tl tcg_gen_vec_add16_i32
+#define tcg_gen_vec_sub16_tl tcg_gen_vec_sub16_i32
+#define tcg_gen_vec_add32_tl tcg_gen_add_i32
+#define tcg_gen_vec_sub32_tl tcg_gen_sub_i32
+#define tcg_gen_vec_shl8i_tl tcg_gen_vec_shl8i_i32
+#define tcg_gen_vec_shr8i_tl tcg_gen_vec_shr8i_i32
+#define tcg_gen_vec_sar8i_tl tcg_gen_vec_sar8i_i32
+#define tcg_gen_vec_shl16i_tl tcg_gen_vec_shl16i_i32
+#define tcg_gen_vec_shr16i_tl tcg_gen_vec_shr16i_i32
+#define tcg_gen_vec_sar16i_tl tcg_gen_vec_sar16i_i32
+#endif
+
 #endif
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index e3a2a45ec58..3c790c96659 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -330,7 +330,7 @@ void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags);
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -338,6 +338,9 @@ void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_abs_i32(TCGv_i32, TCGv_i32);
 
+/* Replicate a value of size @vece from @in to all the lanes in @out */
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in);
+
 static inline void tcg_gen_discard_i32(TCGv_i32 arg)
 {
     tcg_gen_op1_i32(INDEX_op_discard, arg);
@@ -530,8 +533,8 @@ void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg);
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg);
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags);
 void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -539,6 +542,9 @@ void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_abs_i64(TCGv_i64, TCGv_i64);
 
+/* Replicate a value of size @vece from @in to all the lanes in @out */
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in);
+
 #if TCG_TARGET_REG_BITS == 64
 static inline void tcg_gen_discard_i64(TCGv_i64 arg)
 {
@@ -848,7 +854,6 @@ static inline void tcg_gen_plugin_cb_end(void)
 
 #if TARGET_LONG_BITS == 32
 #define tcg_temp_new() tcg_temp_new_i32()
-#define tcg_global_reg_new tcg_global_reg_new_i32
 #define tcg_global_mem_new tcg_global_mem_new_i32
 #define tcg_temp_local_new() tcg_temp_local_new_i32()
 #define tcg_temp_free tcg_temp_free_i32
@@ -860,7 +865,6 @@ static inline void tcg_gen_plugin_cb_end(void)
 #define tcg_gen_qemu_st_tl_with_checked_addr tcg_gen_qemu_st_i32_with_checked_addr
 #else
 #define tcg_temp_new() tcg_temp_new_i64()
-#define tcg_global_reg_new tcg_global_reg_new_i64
 #define tcg_global_mem_new tcg_global_mem_new_i64
 #define tcg_temp_local_new() tcg_temp_local_new_i64()
 #define tcg_temp_free tcg_temp_free_i64
@@ -1135,6 +1139,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_sextract_tl tcg_gen_sextract_i64
 #define tcg_gen_extract2_tl tcg_gen_extract2_i64
 #define tcg_const_tl tcg_const_i64
+#define tcg_constant_tl tcg_constant_i64
 #define tcg_const_local_tl tcg_const_local_i64
 #define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #define tcg_gen_add2_tl tcg_gen_add2_i64
@@ -1168,6 +1173,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64
 #define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64
 #define tcg_gen_dup_tl_vec  tcg_gen_dup_i64_vec
+#define tcg_gen_dup_tl tcg_gen_dup_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -1227,7 +1233,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_ext32u_tl tcg_gen_mov_i32
 #define tcg_gen_ext32s_tl tcg_gen_mov_i32
 #define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
-#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_bswap32_tl(D, S, F) tcg_gen_bswap32_i32(D, S)
 #define tcg_gen_bswap_tl tcg_gen_bswap32_i32
 #define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
 #define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
@@ -1252,6 +1258,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_sextract_tl tcg_gen_sextract_i32
 #define tcg_gen_extract2_tl tcg_gen_extract2_i32
 #define tcg_const_tl tcg_const_i32
+#define tcg_constant_tl tcg_constant_i32
 #define tcg_const_local_tl tcg_const_local_i32
 #define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #define tcg_gen_add2_tl tcg_gen_add2_i32
@@ -1285,6 +1292,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
 #define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32
 #define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32
 #define tcg_gen_dup_tl_vec  tcg_gen_dup_i32_vec
+#define tcg_gen_dup_tl tcg_gen_dup_i32
 #endif
 
 static inline void tcg_gen_mov_cap_checked(TCGv_cap_checked_ptr ret,
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
index 14ece2c1993..71d3703867a 100644
--- a/include/tcg/tcg-opc.h
+++ b/include/tcg/tcg-opc.h
@@ -97,8 +97,8 @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32))
 DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32))
 DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32))
 DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32))
-DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32))
-DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32))
+DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32))
+DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32))
 DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32))
 DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32))
 DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32))
@@ -166,9 +166,9 @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
 DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64))
 DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64))
 DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64))
-DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
-DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
-DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
+DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64))
+DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64))
+DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64))
 DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64))
 DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64))
 DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64))
@@ -195,8 +195,7 @@ DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
     TCG_OPF_NOT_PRESENT)
 DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
-DEF(goto_ptr, 0, 1, 0,
-    TCG_OPF_BB_EXIT | TCG_OPF_BB_END | IMPL(TCG_TARGET_HAS_goto_ptr))
+DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
 
 DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
 DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
@@ -278,8 +277,8 @@ DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
 
 #ifdef TCG_TARGET_INTERPRETER
 /* These opcodes are only for use between the tci generator and interpreter. */
-DEF(tci_movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
-DEF(tci_movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
+DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
 #endif
 
 #undef TLADDR_ARGS
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 685e71cd371..4c16a3331a4 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -27,13 +27,13 @@
 
 #include "cpu.h"
 #include "exec/memop.h"
-#include "exec/tb-context.h"
+#include "exec/memopidx.h"
 #include "qemu/bitops.h"
 #include "qemu/plugin.h"
 #include "qemu/queue.h"
 #include "tcg/tcg-mo.h"
 #include "tcg-target.h"
-#include "qemu/int128.h"
+#include "tcg/tcg-cond.h"
 
 /* XXX: make safe guess about sizes */
 #define MAX_OP_PER_INSTR 266
@@ -53,6 +53,7 @@
 #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
 
 #define CPU_TEMP_BUF_NLONGS 128
+#define TCG_STATIC_FRAME_SIZE  (CPU_TEMP_BUF_NLONGS * sizeof(long))
 
 /* Default target word size to pointer size.  */
 #ifndef TCG_TARGET_REG_BITS
@@ -414,105 +415,17 @@ typedef struct TCGv_cap_checked_ptr_tl_d *TCGv_cap_checked_ptr;
 /* Used to align parameters.  See the comment before tcgv_i32_temp.  */
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)0)
 
-/* Conditions.  Note that these are laid out for easy manipulation by
-   the functions below:
-     bit 0 is used for inverting;
-     bit 1 is signed,
-     bit 2 is unsigned,
-     bit 3 is used with bit 0 for swapping signed/unsigned.  */
-typedef enum {
-    /* non-signed */
-    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
-    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
-    TCG_COND_EQ     = 8 | 0 | 0 | 0,
-    TCG_COND_NE     = 8 | 0 | 0 | 1,
-    /* signed */
-    TCG_COND_LT     = 0 | 0 | 2 | 0,
-    TCG_COND_GE     = 0 | 0 | 2 | 1,
-    TCG_COND_LE     = 8 | 0 | 2 | 0,
-    TCG_COND_GT     = 8 | 0 | 2 | 1,
-    /* unsigned */
-    TCG_COND_LTU    = 0 | 4 | 0 | 0,
-    TCG_COND_GEU    = 0 | 4 | 0 | 1,
-    TCG_COND_LEU    = 8 | 4 | 0 | 0,
-    TCG_COND_GTU    = 8 | 4 | 0 | 1,
-} TCGCond;
-
-static inline const char *tcg_cond_string(TCGCond c)
-{
-    switch (c) {
-    case TCG_COND_NEVER:
-        return "NEVER";
-    case TCG_COND_ALWAYS:
-        return "ALWAYS";
-    case TCG_COND_EQ:
-        return "==";
-    case TCG_COND_NE:
-        return "!=";
-    case TCG_COND_LT:
-        return "<_s";
-    case TCG_COND_LTU:
-        return "<_u";
-    case TCG_COND_GE:
-        return ">=_s";
-    case TCG_COND_GEU:
-        return ">=_u";
-    case TCG_COND_LE:
-        return "<=_s";
-    case TCG_COND_LEU:
-        return "<=_u";
-    case TCG_COND_GT:
-        return ">_s";
-    case TCG_COND_GTU:
-        return ">_u";
-    default:
-        return "?";
-    }
-}
-/* Invert the sense of the comparison.  */
-static inline TCGCond tcg_invert_cond(TCGCond c)
-{
-    return (TCGCond)(c ^ 1);
-}
-
-/* Swap the operands in a comparison.  */
-static inline TCGCond tcg_swap_cond(TCGCond c)
-{
-    return c & 6 ? (TCGCond)(c ^ 9) : c;
-}
-
-/* Create an "unsigned" version of a "signed" comparison.  */
-static inline TCGCond tcg_unsigned_cond(TCGCond c)
-{
-    return c & 2 ? (TCGCond)(c ^ 6) : c;
-}
-
-/* Create a "signed" version of an "unsigned" comparison.  */
-static inline TCGCond tcg_signed_cond(TCGCond c)
-{
-    return c & 4 ? (TCGCond)(c ^ 6) : c;
-}
-
-/* Must a comparison be considered unsigned?  */
-static inline bool is_unsigned_cond(TCGCond c)
-{
-    return (c & 4) != 0;
-}
-
-/* Create a "high" version of a double-word comparison.
-   This removes equality from a LTE or GTE comparison.  */
-static inline TCGCond tcg_high_cond(TCGCond c)
-{
-    switch (c) {
-    case TCG_COND_GE:
-    case TCG_COND_LE:
-    case TCG_COND_GEU:
-    case TCG_COND_LEU:
-        return (TCGCond)(c ^ 8);
-    default:
-        return c;
-    }
-}
+/*
+ * Flags for the bswap opcodes.
+ * If IZ, the input is zero-extended, otherwise unknown.
+ * If OZ or OS, the output is zero- or sign-extended respectively,
+ * otherwise the high bits are undefined.
+ */
+enum {
+    TCG_BSWAP_IZ = 1,
+    TCG_BSWAP_OZ = 2,
+    TCG_BSWAP_OS = 4,
+};
 
 typedef enum TCGTempVal {
     TEMP_VAL_DEAD,
@@ -586,9 +499,6 @@ typedef struct TCGOp {
 
     /* Next and previous opcodes.  */
     QTAILQ_ENTRY(TCGOp) link;
-#ifdef CONFIG_PLUGIN
-    QSIMPLEQ_ENTRY(TCGOp) plugin_link;
-#endif
 
     /* Arguments for the opcode.  */
     TCGArg args[MAX_OPC_PARAM];
@@ -673,8 +583,6 @@ struct TCGContext {
     /* Threshold to flush the translated code buffer.  */
     void *code_gen_highwater;
 
-    size_t tb_phys_invalidate_count;
-
     /* Track which vCPU triggers events */
     CPUState *cpu;                      /* *_trans */
 
@@ -700,9 +608,6 @@ struct TCGContext {
 
     /* descriptor of the instruction being translated */
     struct qemu_plugin_insn *plugin_insn;
-
-    /* list to quickly access the injected ops */
-    QSIMPLEQ_HEAD(, TCGOp) plugin_ops;
 #endif
 
     GHashTable *const_table[TCG_TYPE_COUNT];
@@ -728,7 +633,6 @@ static inline bool temp_readonly(TCGTemp *ts)
     return ts->kind >= TEMP_FIXED;
 }
 
-extern TCGContext tcg_init_ctx;
 extern __thread TCGContext *tcg_ctx;
 extern const void *tcg_code_gen_epilogue;
 extern uintptr_t tcg_splitwx_diff;
@@ -743,16 +647,7 @@ extern TCGv _pc_is_current;
 extern TCGv_i32 cpu_rvfi_available_fields;
 #endif
 
-static inline bool in_code_gen_buffer(const void *p)
-{
-    const TCGContext *s = &tcg_init_ctx;
-    /*
-     * Much like it is valid to have a pointer to the byte past the
-     * end of an array (so long as you don't dereference it), allow
-     * a pointer to the byte past the end of the code gen buffer.
-     */
-    return (size_t)(p - s->code_gen_buffer) <= s->code_gen_buffer_size;
-}
+bool in_code_gen_buffer(const void *p);
 
 #ifdef CONFIG_DEBUG_TCG
 const void *tcg_splitwx_to_rx(void *rw);
@@ -926,8 +821,6 @@ void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
 TranslationBlock *tcg_tb_alloc(TCGContext *s);
 
-void tcg_region_init(void);
-void tb_destroy(TranslationBlock *tb);
 void tcg_region_reset_all(void);
 
 size_t tcg_code_size(void);
@@ -935,7 +828,6 @@ size_t tcg_code_capacity(void);
 
 void tcg_tb_insert(TranslationBlock *tb);
 void tcg_tb_remove(TranslationBlock *tb);
-size_t tcg_tb_phys_invalidate_count(void);
 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
 size_t tcg_nb_tbs(void);
@@ -959,7 +851,7 @@ static inline void *tcg_malloc(int size)
     }
 }
 
-void tcg_context_init(TCGContext *s);
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
 void tcg_register_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
@@ -1066,8 +958,8 @@ int tcg_check_temp_count(void);
 #endif
 
 int64_t tcg_cpu_exec_time(void);
-void tcg_dump_info(void);
-void tcg_dump_op_count(void);
+void tcg_dump_info(GString *buf);
+void tcg_dump_op_count(GString *buf);
 
 #define TCG_CT_CONST  1 /* any constant of register size */
 
@@ -1135,6 +1027,16 @@ void tcg_op_remove(TCGContext *s, TCGOp *op);
 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc);
 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc);
 
+/**
+ * tcg_remove_ops_after:
+ * @op: target operation
+ *
+ * Discard any opcodes emitted since @op.  Expected usage is to save
+ * a starting point with tcg_last_op(), speculatively emit opcodes,
+ * then decide whether or not to keep those opcodes after the fact.
+ */
+void tcg_remove_ops_after(TCGOp *op);
+
 void tcg_optimize(TCGContext *s);
 
 /* Allocate a new temporary and initialize it with a constant. */
@@ -1149,7 +1051,8 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
 
 /*
  * Locate or create a read-only temporary that is a constant.
- * This kind of temporary need not and should not be freed.
+ * This kind of temporary need not be freed, but for convenience
+ * will be silently ignored by tcg_temp_free_*.
  */
 TCGTemp *tcg_constant_internal(TCGType type, int64_t val);
 
@@ -1259,44 +1162,6 @@ static inline size_t tcg_current_code_size(TCGContext *s)
     return tcg_ptr_byte_diff(s->code_ptr, s->code_buf);
 }
 
-/* Combine the MemOp and mmu_idx parameters into a single value.  */
-typedef uint32_t TCGMemOpIdx;
-
-/**
- * make_memop_idx
- * @op: memory operation
- * @idx: mmu index
- *
- * Encode these values into a single parameter.
- */
-static inline TCGMemOpIdx make_memop_idx(MemOp op, unsigned idx)
-{
-    tcg_debug_assert(idx <= 15);
-    return (op << 4) | idx;
-}
-
-/**
- * get_memop
- * @oi: combined op/idx parameter
- *
- * Extract the memory operation from the combined value.
- */
-static inline MemOp get_memop(TCGMemOpIdx oi)
-{
-    return oi >> 4;
-}
-
-/**
- * get_mmuidx
- * @oi: combined op/idx parameter
- *
- * Extract the mmu index from the combined value.
- */
-static inline unsigned get_mmuidx(TCGMemOpIdx oi)
-{
-    return oi & 15;
-}
-
 /**
  * tcg_qemu_tb_exec:
  * @env: pointer to CPUArchState for the CPU
@@ -1384,172 +1249,18 @@ uint64_t dup_const(unsigned vece, uint64_t c);
         : (qemu_build_not_reached_always(), 0))                    \
      : dup_const(VECE, C))
 
-
-/*
- * Memory helpers that will be used by TCG generated code.
- */
-#ifdef CONFIG_SOFTMMU
-/* Value zero-extended to tcg register size.  */
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
-                                     TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
-                           TCGMemOpIdx oi, uintptr_t retaddr);
-
-/* Value sign-extended to tcg register size.  */
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
-                                     TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
-                        TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                       TCGMemOpIdx oi, uintptr_t retaddr);
-
-/* Temporary aliases until backends are converted.  */
-#ifdef TARGET_WORDS_BIGENDIAN
-# define helper_ret_ldsw_mmu  helper_be_ldsw_mmu
-# define helper_ret_lduw_mmu  helper_be_lduw_mmu
-# define helper_ret_ldsl_mmu  helper_be_ldsl_mmu
-# define helper_ret_ldul_mmu  helper_be_ldul_mmu
-# define helper_ret_ldl_mmu   helper_be_ldul_mmu
-# define helper_ret_ldq_mmu   helper_be_ldq_mmu
-# define helper_ret_stw_mmu   helper_be_stw_mmu
-# define helper_ret_stl_mmu   helper_be_stl_mmu
-# define helper_ret_stq_mmu   helper_be_stq_mmu
-#else
-# define helper_ret_ldsw_mmu  helper_le_ldsw_mmu
-# define helper_ret_lduw_mmu  helper_le_lduw_mmu
-# define helper_ret_ldsl_mmu  helper_le_ldsl_mmu
-# define helper_ret_ldul_mmu  helper_le_ldul_mmu
-# define helper_ret_ldl_mmu   helper_le_ldul_mmu
-# define helper_ret_ldq_mmu   helper_le_ldq_mmu
-# define helper_ret_stw_mmu   helper_le_stw_mmu
-# define helper_ret_stl_mmu   helper_le_stl_mmu
-# define helper_ret_stq_mmu   helper_le_stq_mmu
-#endif
-
-uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
-                                    uint32_t cmpv, uint32_t newv,
-                                    TCGMemOpIdx oi, uintptr_t retaddr);
-uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
-                                       uint32_t cmpv, uint32_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
-                                       uint32_t cmpv, uint32_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
-                                       uint64_t cmpv, uint64_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
-                                       uint32_t cmpv, uint32_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
-                                       uint32_t cmpv, uint32_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
-                                       uint64_t cmpv, uint64_t newv,
-                                       TCGMemOpIdx oi, uintptr_t retaddr);
-
-#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX)         \
-TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu         \
-    (CPUArchState *env, target_ulong addr, TYPE val,  \
-     TCGMemOpIdx oi, uintptr_t retaddr);
-
-#ifdef CONFIG_ATOMIC64
-#define GEN_ATOMIC_HELPER_ALL(NAME)          \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+#if TARGET_LONG_BITS == 64
+# define dup_const_tl  dup_const
 #else
-#define GEN_ATOMIC_HELPER_ALL(NAME)          \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
-    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+# define dup_const_tl(VECE, C)                                     \
+    (__builtin_constant_p(VECE)                                    \
+     ? (  (VECE) == MO_8  ? 0x01010101ul * (uint8_t)(C)            \
+        : (VECE) == MO_16 ? 0x00010001ul * (uint16_t)(C)           \
+        : (VECE) == MO_32 ? 0x00000001ul * (uint32_t)(C)           \
+        : (qemu_build_not_reached_always(), 0))                    \
+     :  (target_long)dup_const(VECE, C))
 #endif
 
-GEN_ATOMIC_HELPER_ALL(fetch_add)
-GEN_ATOMIC_HELPER_ALL(fetch_sub)
-GEN_ATOMIC_HELPER_ALL(fetch_and)
-GEN_ATOMIC_HELPER_ALL(fetch_or)
-GEN_ATOMIC_HELPER_ALL(fetch_xor)
-GEN_ATOMIC_HELPER_ALL(fetch_smin)
-GEN_ATOMIC_HELPER_ALL(fetch_umin)
-GEN_ATOMIC_HELPER_ALL(fetch_smax)
-GEN_ATOMIC_HELPER_ALL(fetch_umax)
-
-GEN_ATOMIC_HELPER_ALL(add_fetch)
-GEN_ATOMIC_HELPER_ALL(sub_fetch)
-GEN_ATOMIC_HELPER_ALL(and_fetch)
-GEN_ATOMIC_HELPER_ALL(or_fetch)
-GEN_ATOMIC_HELPER_ALL(xor_fetch)
-GEN_ATOMIC_HELPER_ALL(smin_fetch)
-GEN_ATOMIC_HELPER_ALL(umin_fetch)
-GEN_ATOMIC_HELPER_ALL(smax_fetch)
-GEN_ATOMIC_HELPER_ALL(umax_fetch)
-
-GEN_ATOMIC_HELPER_ALL(xchg)
-
-#undef GEN_ATOMIC_HELPER_ALL
-#undef GEN_ATOMIC_HELPER
-#endif /* CONFIG_SOFTMMU */
-
-/*
- * These aren't really a "proper" helpers because TCG cannot manage Int128.
- * However, use the same format as the others, for use by the backends.
- *
- * The cmpxchg functions are only defined if HAVE_CMPXCHG128;
- * the ld/st functions are only defined if HAVE_ATOMIC128,
- * as defined by <qemu/atomic128.h>.
- */
-Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
-                                     Int128 cmpv, Int128 newv,
-                                     TCGMemOpIdx oi, uintptr_t retaddr);
-Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
-                                     Int128 cmpv, Int128 newv,
-                                     TCGMemOpIdx oi, uintptr_t retaddr);
-
-Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
-                                TCGMemOpIdx oi, uintptr_t retaddr);
-Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
-                                TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
-                              TCGMemOpIdx oi, uintptr_t retaddr);
-void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
-                              TCGMemOpIdx oi, uintptr_t retaddr);
-
 #ifdef CONFIG_DEBUG_TCG
 void tcg_assert_listed_vecop(TCGOpcode);
 #else
diff --git a/include/ui/clipboard.h b/include/ui/clipboard.h
new file mode 100644
index 00000000000..6298986b15c
--- /dev/null
+++ b/include/ui/clipboard.h
@@ -0,0 +1,226 @@
+#ifndef QEMU_CLIPBOARD_H
+#define QEMU_CLIPBOARD_H
+
+#include "qemu/notify.h"
+
+/**
+ * DOC: Introduction
+ *
+ * The header ``ui/clipboard.h`` declares the qemu clipboard interface.
+ *
+ * All qemu elements which want use the clipboard can register as
+ * clipboard peer.  Subsequently they can set the clipboard content
+ * and get notifications for clipboard updates.
+ *
+ * Typical users are user interfaces (gtk), remote access protocols
+ * (vnc) and devices talking to the guest (vdagent).
+ *
+ * Even though the design allows different data types only plain text
+ * is supported for now.
+ */
+
+typedef enum QemuClipboardType QemuClipboardType;
+typedef enum QemuClipboardSelection QemuClipboardSelection;
+typedef struct QemuClipboardPeer QemuClipboardPeer;
+typedef struct QemuClipboardInfo QemuClipboardInfo;
+
+/**
+ * enum QemuClipboardType
+ *
+ * @QEMU_CLIPBOARD_TYPE_TEXT: text/plain; charset=utf-8
+ * @QEMU_CLIPBOARD_TYPE__COUNT: type count.
+ */
+enum QemuClipboardType {
+    QEMU_CLIPBOARD_TYPE_TEXT,
+    QEMU_CLIPBOARD_TYPE__COUNT,
+};
+
+/* same as VD_AGENT_CLIPBOARD_SELECTION_* */
+/**
+ * enum QemuClipboardSelection
+ *
+ * @QEMU_CLIPBOARD_SELECTION_CLIPBOARD: clipboard (explitcit cut+paste).
+ * @QEMU_CLIPBOARD_SELECTION_PRIMARY: primary selection (select + middle mouse button).
+ * @QEMU_CLIPBOARD_SELECTION_SECONDARY: secondary selection (dunno).
+ * @QEMU_CLIPBOARD_SELECTION__COUNT: selection count.
+ */
+enum QemuClipboardSelection {
+    QEMU_CLIPBOARD_SELECTION_CLIPBOARD,
+    QEMU_CLIPBOARD_SELECTION_PRIMARY,
+    QEMU_CLIPBOARD_SELECTION_SECONDARY,
+    QEMU_CLIPBOARD_SELECTION__COUNT,
+};
+
+/**
+ * struct QemuClipboardPeer
+ *
+ * @name: peer name.
+ * @update: notifier for clipboard updates.
+ * @request: callback for clipboard data requests.
+ *
+ * Clipboard peer description.
+ */
+struct QemuClipboardPeer {
+    const char *name;
+    Notifier update;
+    void (*request)(QemuClipboardInfo *info,
+                    QemuClipboardType type);
+};
+
+/**
+ * struct QemuClipboardInfo
+ *
+ * @refcount: reference counter.
+ * @owner: clipboard owner.
+ * @selection: clipboard selection.
+ * @types: clipboard data array (one entry per type).
+ *
+ * Clipboard content data and metadata.
+ */
+struct QemuClipboardInfo {
+    uint32_t refcount;
+    QemuClipboardPeer *owner;
+    QemuClipboardSelection selection;
+    struct {
+        bool available;
+        bool requested;
+        size_t size;
+        void *data;
+    } types[QEMU_CLIPBOARD_TYPE__COUNT];
+};
+
+/**
+ * qemu_clipboard_peer_register
+ *
+ * @peer: peer information.
+ *
+ * Register clipboard peer.  Registering is needed for both active
+ * (set+grab clipboard) and passive (watch clipboard for updates)
+ * interaction with the qemu clipboard.
+ */
+void qemu_clipboard_peer_register(QemuClipboardPeer *peer);
+
+/**
+ * qemu_clipboard_peer_unregister
+ *
+ * @peer: peer information.
+ *
+ * Unregister clipboard peer.
+ */
+void qemu_clipboard_peer_unregister(QemuClipboardPeer *peer);
+
+/**
+ * qemu_clipboard_peer_owns
+ *
+ * @peer: peer information.
+ * @selection: clipboard selection.
+ *
+ * Return TRUE if the peer owns the clipboard.
+ */
+bool qemu_clipboard_peer_owns(QemuClipboardPeer *peer,
+                              QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_peer_release
+ *
+ * @peer: peer information.
+ * @selection: clipboard selection.
+ *
+ * If the peer owns the clipboard, release it.
+ */
+void qemu_clipboard_peer_release(QemuClipboardPeer *peer,
+                                 QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_info
+ *
+ * @selection: clipboard selection.
+ *
+ * Return the current clipboard data & owner informations.
+ */
+QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection);
+
+/**
+ * qemu_clipboard_info_new
+ *
+ * @owner: clipboard owner.
+ * @selection: clipboard selection.
+ *
+ * Allocate a new QemuClipboardInfo and initialize it with the given
+ * @owner and @selection.
+ *
+ * QemuClipboardInfo is a reference-counted struct.  The new struct is
+ * returned with a reference already taken (i.e. reference count is
+ * one).
+ */
+QemuClipboardInfo *qemu_clipboard_info_new(QemuClipboardPeer *owner,
+                                           QemuClipboardSelection selection);
+/**
+ * qemu_clipboard_info_ref
+ *
+ * @info: clipboard info.
+ *
+ * Increase @info reference count.
+ */
+QemuClipboardInfo *qemu_clipboard_info_ref(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_info_unref
+ *
+ * @info: clipboard info.
+ *
+ * Decrease @info reference count.  When the count goes down to zero
+ * free the @info struct itself and all clipboard data.
+ */
+void qemu_clipboard_info_unref(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_update
+ *
+ * @info: clipboard info.
+ *
+ * Update the qemu clipboard.  Notify all registered peers (including
+ * the clipboard owner) that the qemu clipboard has been updated.
+ *
+ * This is used for both new completely clipboard content and for
+ * clipboard data updates in response to qemu_clipboard_request()
+ * calls.
+ */
+void qemu_clipboard_update(QemuClipboardInfo *info);
+
+/**
+ * qemu_clipboard_request
+ *
+ * @info: clipboard info.
+ * @type: clipboard data type.
+ *
+ * Request clipboard content.  Typically the clipboard owner only
+ * advertises the available data types and provides the actual data
+ * only on request.
+ */
+void qemu_clipboard_request(QemuClipboardInfo *info,
+                            QemuClipboardType type);
+
+/**
+ * qemu_clipboard_set_data
+ *
+ * @peer: clipboard peer.
+ * @info: clipboard info.
+ * @type: clipboard data type.
+ * @size: data size.
+ * @data: data blob.
+ * @update: notify peers about the update.
+ *
+ * Set clipboard content for the given @type.  This function will make
+ * a copy of the content data and store that.
+ */
+void qemu_clipboard_set_data(QemuClipboardPeer *peer,
+                             QemuClipboardInfo *info,
+                             QemuClipboardType type,
+                             uint32_t size,
+                             const void *data,
+                             bool update);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QemuClipboardInfo, qemu_clipboard_info_unref)
+
+#endif /* QEMU_CLIPBOARD_H */
diff --git a/include/ui/console.h b/include/ui/console.h
index ca3c7af6a6c..6d678924f6f 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -167,7 +167,15 @@ typedef struct QemuDmaBuf {
     uint32_t  fourcc;
     uint64_t  modifier;
     uint32_t  texture;
+    uint32_t  x;
+    uint32_t  y;
+    uint32_t  scanout_width;
+    uint32_t  scanout_height;
     bool      y0_top;
+    void      *sync;
+    int       fence_fd;
+    bool      allow_fences;
+    bool      draw_submitted;
 } QemuDmaBuf;
 
 typedef struct DisplayState DisplayState;
@@ -471,4 +479,9 @@ bool vnc_display_reload_certs(const char *id,  Error **errp);
 /* input.c */
 int index_from_key(const char *key, size_t key_length);
 
+#ifdef CONFIG_LINUX
+/* udmabuf.c */
+int udmabuf_fd(void);
+#endif
+
 #endif
diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h
index f1bf8f97fc3..2fb6e0dd6b8 100644
--- a/include/ui/egl-helpers.h
+++ b/include/ui/egl-helpers.h
@@ -19,6 +19,7 @@ typedef struct egl_fb {
     GLuint texture;
     GLuint framebuffer;
     bool delete_texture;
+    QemuDmaBuf *dmabuf;
 } egl_fb;
 
 void egl_fb_destroy(egl_fb *fb);
@@ -45,6 +46,8 @@ int egl_get_fd_for_texture(uint32_t tex_id, EGLint *stride, EGLint *fourcc,
 
 void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf);
 void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf);
+void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf);
 
 #endif
 
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 5ae0ad60a60..7d22affd381 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -18,11 +18,18 @@
 #include <gdk/gdkwayland.h>
 #endif
 
+#include "ui/clipboard.h"
+#include "ui/console.h"
 #include "ui/kbd-state.h"
 #if defined(CONFIG_OPENGL)
 #include "ui/egl-helpers.h"
 #include "ui/egl-context.h"
 #endif
+#ifdef CONFIG_VTE
+#include "qemu/fifo8.h"
+#endif
+
+#define MAX_VCS 10
 
 typedef struct GtkDisplayState GtkDisplayState;
 
@@ -58,6 +65,7 @@ typedef struct VirtualVteConsole {
     GtkWidget *scrollbar;
     GtkWidget *terminal;
     Chardev *chr;
+    Fifo8 out_fifo;
     bool echo;
 } VirtualVteConsole;
 #endif
@@ -83,11 +91,71 @@ typedef struct VirtualConsole {
     };
 } VirtualConsole;
 
+struct GtkDisplayState {
+    GtkWidget *window;
+
+    GtkWidget *menu_bar;
+
+    GtkAccelGroup *accel_group;
+
+    GtkWidget *machine_menu_item;
+    GtkWidget *machine_menu;
+    GtkWidget *pause_item;
+    GtkWidget *reset_item;
+    GtkWidget *powerdown_item;
+    GtkWidget *quit_item;
+
+    GtkWidget *view_menu_item;
+    GtkWidget *view_menu;
+    GtkWidget *full_screen_item;
+    GtkWidget *copy_item;
+    GtkWidget *zoom_in_item;
+    GtkWidget *zoom_out_item;
+    GtkWidget *zoom_fixed_item;
+    GtkWidget *zoom_fit_item;
+    GtkWidget *grab_item;
+    GtkWidget *grab_on_hover_item;
+
+    int nb_vcs;
+    VirtualConsole vc[MAX_VCS];
+
+    GtkWidget *show_tabs_item;
+    GtkWidget *untabify_item;
+    GtkWidget *show_menubar_item;
+
+    GtkWidget *vbox;
+    GtkWidget *notebook;
+    int button_mask;
+    gboolean last_set;
+    int last_x;
+    int last_y;
+    int grab_x_root;
+    int grab_y_root;
+    VirtualConsole *kbd_owner;
+    VirtualConsole *ptr_owner;
+
+    gboolean full_screen;
+
+    GdkCursor *null_cursor;
+    Notifier mouse_mode_notifier;
+    gboolean free_scale;
+
+    bool external_pause_update;
+
+    QemuClipboardPeer cbpeer;
+    uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT];
+    GtkClipboard *gtkcb[QEMU_CLIPBOARD_SELECTION__COUNT];
+    bool cbowner[QEMU_CLIPBOARD_SELECTION__COUNT];
+
+    DisplayOptions *opts;
+};
+
 extern bool gtk_use_gl_area;
 
 /* ui/gtk.c */
 void gd_update_windowsize(VirtualConsole *vc);
 int gd_monitor_update_interval(GtkWidget *widget);
+void gd_hw_gl_flushed(void *vc);
 
 /* ui/gtk-egl.c */
 void gd_egl_init(VirtualConsole *vc);
@@ -114,8 +182,8 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
                           uint32_t hot_x, uint32_t hot_y);
 void gd_egl_cursor_position(DisplayChangeListener *dcl,
                             uint32_t pos_x, uint32_t pos_y);
-void gd_egl_release_dmabuf(DisplayChangeListener *dcl,
-                           QemuDmaBuf *dmabuf);
+void gd_egl_flush(DisplayChangeListener *dcl,
+                  uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(DisplayGLMode mode);
@@ -150,4 +218,7 @@ void gtk_gl_area_init(void);
 int gd_gl_area_make_current(DisplayChangeListener *dcl,
                             QEMUGLContext ctx);
 
+/* gtk-clipboard.c */
+void gd_clipboard_init(GtkDisplayState *gd);
+
 #endif /* UI_GTK_H */
diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h
index 87737a6f162..806ddcd7cda 100644
--- a/include/ui/qemu-pixman.h
+++ b/include/ui/qemu-pixman.h
@@ -62,6 +62,7 @@ typedef struct PixelFormat {
 PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format);
 pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian);
 pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format);
+uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman);
 int qemu_pixman_get_type(int rshift, int gshift, int bshift);
 pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf);
 bool qemu_pixman_check_format(DisplayChangeListener *dcl,
diff --git a/include/user/syscall-trace.h b/include/user/syscall-trace.h
index 42e3b48b032..614cfacfa58 100644
--- a/include/user/syscall-trace.h
+++ b/include/user/syscall-trace.h
@@ -7,8 +7,8 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _SYSCALL_TRACE_H_
-#define _SYSCALL_TRACE_H_
+#ifndef SYSCALL_TRACE_H
+#define SYSCALL_TRACE_H
 
 #include "trace/trace-root.h"
 
diff --git a/io/channel-socket.c b/io/channel-socket.c
index de259f7eed2..606ec97cf7c 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -487,15 +487,15 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
 
     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
-#ifdef MSG_CMSG_CLOEXEC
-    sflags |= MSG_CMSG_CLOEXEC;
-#endif
-
     msg.msg_iov = (struct iovec *)iov;
     msg.msg_iovlen = niov;
     if (fds && nfds) {
         msg.msg_control = control;
         msg.msg_controllen = sizeof(control);
+#ifdef MSG_CMSG_CLOEXEC
+        sflags |= MSG_CMSG_CLOEXEC;
+#endif
+
     }
 
  retry:
diff --git a/io/channel-websock.c b/io/channel-websock.c
index 03c1f7cb62f..70889bb54da 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -177,15 +177,9 @@ qio_channel_websock_handshake_send_res(QIOChannelWebsock *ioc,
 
 static gchar *qio_channel_websock_date_str(void)
 {
-    struct tm tm;
-    time_t now = time(NULL);
-    char datebuf[128];
+    g_autoptr(GDateTime) now = g_date_time_new_now_utc();
 
-    gmtime_r(&now, &tm);
-
-    strftime(datebuf, sizeof(datebuf), "%a, %d %b %Y %H:%M:%S GMT", &tm);
-
-    return g_strdup(datebuf);
+    return g_date_time_format(now, "%a, %d %b %Y %H:%M:%S GMT");
 }
 
 static void qio_channel_websock_handshake_send_res_err(QIOChannelWebsock *ioc,
diff --git a/io/dns-resolver.c b/io/dns-resolver.c
index 743a0efc876..53b0e8407a9 100644
--- a/io/dns-resolver.c
+++ b/io/dns-resolver.c
@@ -122,6 +122,10 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver,
             .ipv4 = iaddr->ipv4,
             .has_ipv6 = iaddr->has_ipv6,
             .ipv6 = iaddr->ipv6,
+#ifdef HAVE_IPPROTO_MPTCP
+            .has_mptcp = iaddr->has_mptcp,
+            .mptcp = iaddr->mptcp,
+#endif
         };
 
         (*addrs)[i] = newaddr;
diff --git a/io/net-listener.c b/io/net-listener.c
index 46c2643d005..1c984d69c69 100644
--- a/io/net-listener.c
+++ b/io/net-listener.c
@@ -292,6 +292,9 @@ static void qio_net_listener_finalize(Object *obj)
     QIONetListener *listener = QIO_NET_LISTENER(obj);
     size_t i;
 
+    if (listener->io_notify) {
+        listener->io_notify(listener->io_data);
+    }
     qio_net_listener_disconnect(listener);
 
     for (i = 0; i < listener->nsioc; i++) {
diff --git a/io/trace-events b/io/trace-events
index d7bc70b9666..c5e814eb446 100644
--- a/io/trace-events
+++ b/io/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # task.c
 qio_task_new(void *task, void *source, void *func, void *opaque) "Task new task=%p source=%p func=%p opaque=%p"
diff --git a/iothread.c b/iothread.c
index 7f086387be9..0f98af0f2aa 100644
--- a/iothread.c
+++ b/iothread.c
@@ -39,13 +39,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
 #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL
 #endif
 
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
 static void *iothread_run(void *opaque)
 {
     IOThread *iothread = opaque;
@@ -56,7 +49,7 @@ static void *iothread_run(void *opaque)
      * in this new thread uses glib.
      */
     g_main_context_push_thread_default(iothread->worker_context);
-    my_iothread = iothread;
+    qemu_set_current_aio_context(iothread->ctx);
     iothread->thread_id = qemu_get_thread_id();
     qemu_sem_post(&iothread->init_done_sem);
 
@@ -159,6 +152,24 @@ static void iothread_init_gcontext(IOThread *iothread)
     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
 }
 
+static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
+{
+    ERRP_GUARD();
+
+    aio_context_set_poll_params(iothread->ctx,
+                                iothread->poll_max_ns,
+                                iothread->poll_grow,
+                                iothread->poll_shrink,
+                                errp);
+    if (*errp) {
+        return;
+    }
+
+    aio_context_set_aio_params(iothread->ctx,
+                               iothread->aio_max_batch,
+                               errp);
+}
+
 static void iothread_complete(UserCreatable *obj, Error **errp)
 {
     Error *local_error = NULL;
@@ -178,11 +189,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
      */
     iothread_init_gcontext(iothread);
 
-    aio_context_set_poll_params(iothread->ctx,
-                                iothread->poll_max_ns,
-                                iothread->poll_grow,
-                                iothread->poll_shrink,
-                                &local_error);
+    iothread_set_aio_context_params(iothread, &local_error);
     if (local_error) {
         error_propagate(errp, local_error);
         aio_context_unref(iothread->ctx);
@@ -208,48 +215,70 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
 typedef struct {
     const char *name;
     ptrdiff_t offset; /* field's byte offset in IOThread struct */
-} PollParamInfo;
+} IOThreadParamInfo;
 
-static PollParamInfo poll_max_ns_info = {
+static IOThreadParamInfo poll_max_ns_info = {
     "poll-max-ns", offsetof(IOThread, poll_max_ns),
 };
-static PollParamInfo poll_grow_info = {
+static IOThreadParamInfo poll_grow_info = {
     "poll-grow", offsetof(IOThread, poll_grow),
 };
-static PollParamInfo poll_shrink_info = {
+static IOThreadParamInfo poll_shrink_info = {
     "poll-shrink", offsetof(IOThread, poll_shrink),
 };
+static IOThreadParamInfo aio_max_batch_info = {
+    "aio-max-batch", offsetof(IOThread, aio_max_batch),
+};
 
-static void iothread_get_poll_param(Object *obj, Visitor *v,
-        const char *name, void *opaque, Error **errp)
+static void iothread_get_param(Object *obj, Visitor *v,
+        const char *name, IOThreadParamInfo *info, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
-    PollParamInfo *info = opaque;
     int64_t *field = (void *)iothread + info->offset;
 
     visit_type_int64(v, name, field, errp);
 }
 
-static void iothread_set_poll_param(Object *obj, Visitor *v,
-        const char *name, void *opaque, Error **errp)
+static bool iothread_set_param(Object *obj, Visitor *v,
+        const char *name, IOThreadParamInfo *info, Error **errp)
 {
     IOThread *iothread = IOTHREAD(obj);
-    PollParamInfo *info = opaque;
     int64_t *field = (void *)iothread + info->offset;
     int64_t value;
 
     if (!visit_type_int64(v, name, &value, errp)) {
-        return;
+        return false;
     }
 
     if (value < 0) {
         error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
                    info->name, INT64_MAX);
-        return;
+        return false;
     }
 
     *field = value;
 
+    return true;
+}
+
+static void iothread_get_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThreadParamInfo *info = opaque;
+
+    iothread_get_param(obj, v, name, info, errp);
+}
+
+static void iothread_set_poll_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThread *iothread = IOTHREAD(obj);
+    IOThreadParamInfo *info = opaque;
+
+    if (!iothread_set_param(obj, v, name, info, errp)) {
+        return;
+    }
+
     if (iothread->ctx) {
         aio_context_set_poll_params(iothread->ctx,
                                     iothread->poll_max_ns,
@@ -259,6 +288,31 @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
     }
 }
 
+static void iothread_get_aio_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThreadParamInfo *info = opaque;
+
+    iothread_get_param(obj, v, name, info, errp);
+}
+
+static void iothread_set_aio_param(Object *obj, Visitor *v,
+        const char *name, void *opaque, Error **errp)
+{
+    IOThread *iothread = IOTHREAD(obj);
+    IOThreadParamInfo *info = opaque;
+
+    if (!iothread_set_param(obj, v, name, info, errp)) {
+        return;
+    }
+
+    if (iothread->ctx) {
+        aio_context_set_aio_params(iothread->ctx,
+                                   iothread->aio_max_batch,
+                                   errp);
+    }
+}
+
 static void iothread_class_init(ObjectClass *klass, void *class_data)
 {
     UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
@@ -276,6 +330,10 @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
                               iothread_get_poll_param,
                               iothread_set_poll_param,
                               NULL, &poll_shrink_info);
+    object_class_property_add(klass, "aio-max-batch", "int",
+                              iothread_get_aio_param,
+                              iothread_set_aio_param,
+                              NULL, &aio_max_batch_info);
 }
 
 static const TypeInfo iothread_info = {
@@ -325,6 +383,7 @@ static int query_one_iothread(Object *object, void *opaque)
     info->poll_max_ns = iothread->poll_max_ns;
     info->poll_grow = iothread->poll_grow;
     info->poll_shrink = iothread->poll_shrink;
+    info->aio_max_batch = iothread->aio_max_batch;
 
     QAPI_LIST_APPEND(*tail, info);
     return 0;
diff --git a/job-qmp.c b/job-qmp.c
index 34c4da094f2..829a28aa70e 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -144,16 +144,20 @@ void qmp_job_dismiss(const char *id, Error **errp)
 static JobInfo *job_query_single(Job *job, Error **errp)
 {
     JobInfo *info;
+    uint64_t progress_current;
+    uint64_t progress_total;
 
     assert(!job_is_internal(job));
+    progress_get_snapshot(&job->progress, &progress_current,
+                          &progress_total);
 
     info = g_new(JobInfo, 1);
     *info = (JobInfo) {
         .id                 = g_strdup(job->id),
         .type               = job_type(job),
         .status             = job->status,
-        .current_progress   = job->progress.current,
-        .total_progress     = job->progress.total,
+        .current_progress   = progress_current,
+        .total_progress     = progress_total,
         .has_error          = !!job->err,
         .error              = job->err ? \
                               g_strdup(error_get_pretty(job->err)) : NULL,
diff --git a/job.c b/job.c
index 4aff13d95ab..dbfa67bb0a3 100644
--- a/job.c
+++ b/job.c
@@ -216,6 +216,13 @@ const char *job_type_str(const Job *job)
 }
 
 bool job_is_cancelled(Job *job)
+{
+    /* force_cancel may be true only if cancelled is true, too */
+    assert(job->cancelled || !job->force_cancel);
+    return job->force_cancel;
+}
+
+bool job_cancel_requested(Job *job)
 {
     return job->cancelled;
 }
@@ -339,6 +346,8 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
     job->cb            = cb;
     job->opaque        = opaque;
 
+    progress_init(&job->progress);
+
     notifier_list_init(&job->on_finalize_cancelled);
     notifier_list_init(&job->on_finalize_completed);
     notifier_list_init(&job->on_pending);
@@ -382,6 +391,7 @@ void job_unref(Job *job)
 
         QLIST_REMOVE(job, job_list);
 
+        progress_destroy(&job->progress);
         error_free(job->err);
         g_free(job->id);
         g_free(job);
@@ -716,8 +726,12 @@ static int job_finalize_single(Job *job)
 static void job_cancel_async(Job *job, bool force)
 {
     if (job->driver->cancel) {
-        job->driver->cancel(job);
+        force = job->driver->cancel(job, force);
+    } else {
+        /* No .cancel() means the job will behave as if force-cancelled */
+        force = true;
     }
+
     if (job->user_paused) {
         /* Do not call job_enter here, the caller will handle it.  */
         if (job->driver->user_resume) {
@@ -727,14 +741,23 @@ static void job_cancel_async(Job *job, bool force)
         assert(job->pause_count > 0);
         job->pause_count--;
     }
-    job->cancelled = true;
-    /* To prevent 'force == false' overriding a previous 'force == true' */
-    job->force_cancel |= force;
+
+    /*
+     * Ignore soft cancel requests after the job is already done
+     * (We will still invoke job->driver->cancel() above, but if the
+     * job driver supports soft cancelling and the job is done, that
+     * should be a no-op, too.  We still call it so it can override
+     * @force.)
+     */
+    if (force || !job->deferred_to_main_loop) {
+        job->cancelled = true;
+        /* To prevent 'force == false' overriding a previous 'force == true' */
+        job->force_cancel |= force;
+    }
 }
 
 static void job_completed_txn_abort(Job *job)
 {
-    AioContext *outer_ctx = job->aio_context;
     AioContext *ctx;
     JobTxn *txn = job->txn;
     Job *other_job;
@@ -748,10 +771,14 @@ static void job_completed_txn_abort(Job *job)
     txn->aborting = true;
     job_txn_ref(txn);
 
-    /* We can only hold the single job's AioContext lock while calling
+    /*
+     * We can only hold the single job's AioContext lock while calling
      * job_finalize_single() because the finalization callbacks can involve
-     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
-    aio_context_release(outer_ctx);
+     * calls of AIO_WAIT_WHILE(), which could deadlock otherwise.
+     * Note that the job's AioContext may change when it is finalized.
+     */
+    job_ref(job);
+    aio_context_release(job->aio_context);
 
     /* Other jobs are effectively cancelled by us, set the status for
      * them; this job, however, may or may not be cancelled, depending
@@ -760,23 +787,37 @@ static void job_completed_txn_abort(Job *job)
         if (other_job != job) {
             ctx = other_job->aio_context;
             aio_context_acquire(ctx);
-            job_cancel_async(other_job, false);
+            /*
+             * This is a transaction: If one job failed, no result will matter.
+             * Therefore, pass force=true to terminate all other jobs as quickly
+             * as possible.
+             */
+            job_cancel_async(other_job, true);
             aio_context_release(ctx);
         }
     }
     while (!QLIST_EMPTY(&txn->jobs)) {
         other_job = QLIST_FIRST(&txn->jobs);
+        /*
+         * The job's AioContext may change, so store it in @ctx so we
+         * release the same context that we have acquired before.
+         */
         ctx = other_job->aio_context;
         aio_context_acquire(ctx);
         if (!job_is_completed(other_job)) {
-            assert(job_is_cancelled(other_job));
+            assert(job_cancel_requested(other_job));
             job_finish_sync(other_job, NULL, NULL);
         }
         job_finalize_single(other_job);
         aio_context_release(ctx);
     }
 
-    aio_context_acquire(outer_ctx);
+    /*
+     * Use job_ref()/job_unref() so we can read the AioContext here
+     * even if the job went away during job_finalize_single().
+     */
+    aio_context_acquire(job->aio_context);
+    job_unref(job);
 
     job_txn_unref(txn);
 }
@@ -939,7 +980,19 @@ void job_cancel(Job *job, bool force)
     if (!job_started(job)) {
         job_completed(job);
     } else if (job->deferred_to_main_loop) {
-        job_completed_txn_abort(job);
+        /*
+         * job_cancel_async() ignores soft-cancel requests for jobs
+         * that are already done (i.e. deferred to the main loop).  We
+         * have to check again whether the job is really cancelled.
+         * (job_cancel_requested() and job_is_cancelled() are equivalent
+         * here, because job_cancel_async() will make soft-cancel
+         * requests no-ops when deferred_to_main_loop is true.  We
+         * choose to call job_is_cancelled() to show that we invoke
+         * job_completed_txn_abort() only for force-cancelled jobs.)
+         */
+        if (job_is_cancelled(job)) {
+            job_completed_txn_abort(job);
+        }
     } else {
         job_enter(job);
     }
@@ -961,9 +1014,21 @@ static void job_cancel_err(Job *job, Error **errp)
     job_cancel(job, false);
 }
 
-int job_cancel_sync(Job *job)
+/**
+ * Same as job_cancel_err(), but force-cancel.
+ */
+static void job_force_cancel_err(Job *job, Error **errp)
+{
+    job_cancel(job, true);
+}
+
+int job_cancel_sync(Job *job, bool force)
 {
-    return job_finish_sync(job, &job_cancel_err, NULL);
+    if (force) {
+        return job_finish_sync(job, &job_force_cancel_err, NULL);
+    } else {
+        return job_finish_sync(job, &job_cancel_err, NULL);
+    }
 }
 
 void job_cancel_sync_all(void)
@@ -974,7 +1039,7 @@ void job_cancel_sync_all(void)
     while ((job = job_next(NULL))) {
         aio_context = job->aio_context;
         aio_context_acquire(aio_context);
-        job_cancel_sync(job);
+        job_cancel_sync(job, true);
         aio_context_release(aio_context);
     }
 }
@@ -991,7 +1056,7 @@ void job_complete(Job *job, Error **errp)
     if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
         return;
     }
-    if (job_is_cancelled(job) || !job->driver->complete) {
+    if (job_cancel_requested(job) || !job->driver->complete) {
         error_setg(errp, "The active block job '%s' cannot be completed",
                    job->id);
         return;
diff --git a/libdecnumber/decContext.c b/libdecnumber/decContext.c
index 7d97a65ac56..1956edf0a7a 100644
--- a/libdecnumber/decContext.c
+++ b/libdecnumber/decContext.c
@@ -53,12 +53,13 @@ static	const  Flag *mfctop=(Flag *)&mfcone; /* -> top byte */
 const uByte DECSTICKYTAB[10]={1,1,2,3,4,6,6,7,8,9}; /* used if sticky */
 
 /* ------------------------------------------------------------------ */
-/* Powers of ten (powers[n]==10**n, 0<=n<=9)			      */
+/* Powers of ten (powers[n]==10**n, 0<=n<=19)                         */
 /* ------------------------------------------------------------------ */
-const uLong DECPOWERS[19] = {1, 10, 100, 1000, 10000, 100000, 1000000,
+const uLong DECPOWERS[20] = {1, 10, 100, 1000, 10000, 100000, 1000000,
   10000000, 100000000, 1000000000, 10000000000ULL, 100000000000ULL,
   1000000000000ULL, 10000000000000ULL, 100000000000000ULL, 1000000000000000ULL,
-  10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL, };
+  10000000000000000ULL, 100000000000000000ULL, 1000000000000000000ULL,
+  10000000000000000000ULL,};
 
 /* ------------------------------------------------------------------ */
 /* decContextClearStatus -- clear bits in current status	      */
diff --git a/libdecnumber/decNumber.c b/libdecnumber/decNumber.c
index 1ffe458ad83..31282adafdc 100644
--- a/libdecnumber/decNumber.c
+++ b/libdecnumber/decNumber.c
@@ -167,6 +167,7 @@
 /* ------------------------------------------------------------------ */
 
 #include "qemu/osdep.h"
+#include "qemu/host-utils.h"
 #include "libdecnumber/dconfig.h"
 #include "libdecnumber/decNumber.h"
 #include "libdecnumber/decNumberLocal.h"
@@ -263,6 +264,7 @@ static decNumber * decTrim(decNumber *, decContext *, Flag, Int *);
 static Int	   decUnitAddSub(const Unit *, Int, const Unit *, Int, Int,
 			      Unit *, Int);
 static Int	   decUnitCompare(const Unit *, Int, const Unit *, Int, Int);
+static bool        mulUInt128ByPowOf10(uLong *, uLong *, uInt);
 
 #if !DECSUBSET
 /* decFinish == decFinalize when no subset arithmetic needed */
@@ -462,6 +464,41 @@ decNumber *decNumberFromUInt64(decNumber *dn, uint64_t uin)
     return dn;
 } /* decNumberFromUInt64 */
 
+decNumber *decNumberFromInt128(decNumber *dn, uint64_t lo, int64_t hi)
+{
+    uint64_t unsig_hi = hi;
+    if (hi < 0) {
+        if (lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            lo = -lo;
+        }
+    }
+
+    decNumberFromUInt128(dn, lo, unsig_hi);
+    if (hi < 0) {
+        dn->bits = DECNEG;        /* sign needed */
+    }
+    return dn;
+} /* decNumberFromInt128 */
+
+decNumber *decNumberFromUInt128(decNumber *dn, uint64_t lo, uint64_t hi)
+{
+    uint64_t rem;
+    Unit *up;                             /* work pointer */
+    decNumberZero(dn);                    /* clean */
+    if (lo == 0 && hi == 0) {
+        return dn;                /* [or decGetDigits bad call] */
+    }
+    for (up = dn->lsu; hi > 0 || lo > 0; up++) {
+        rem = divu128(&lo, &hi, DECDPUNMAX + 1);
+        *up = (Unit)rem;
+    }
+    dn->digits = decGetDigits(dn->lsu, up - dn->lsu);
+    return dn;
+} /* decNumberFromUInt128 */
+
 /* ------------------------------------------------------------------ */
 /* to-int64 -- conversion to int64                                    */
 /*                                                                    */
@@ -506,6 +543,68 @@ int64_t decNumberIntegralToInt64(const decNumber *dn, decContext *set)
     return 0;
 } /* decNumberIntegralToInt64 */
 
+/* ------------------------------------------------------------------ */
+/* decNumberIntegralToInt128 -- conversion to int128                  */
+/*                                                                    */
+/*  dn is the decNumber to convert.  dn is assumed to have been       */
+/*    rounded to a floating point integer value.                      */
+/*  set is the context for reporting errors                           */
+/*  returns the converted decNumber via plow and phigh                */
+/*                                                                    */
+/* Invalid is set if the decNumber is a NaN, Infinite or is out of    */
+/* range for a signed 128 bit integer.                                */
+/* ------------------------------------------------------------------ */
+
+void decNumberIntegralToInt128(const decNumber *dn, decContext *set,
+        uint64_t *plow, uint64_t *phigh)
+{
+    int d;        /* work */
+    const Unit *up;   /* .. */
+    uint64_t lo = 0, hi = 0;
+
+    if (decNumberIsSpecial(dn) || (dn->exponent < 0) ||
+       (dn->digits + dn->exponent > 39)) {
+        goto Invalid;
+    }
+
+    up = dn->lsu;     /* -> lsu */
+
+    for (d = (dn->digits - 1) / DECDPUN; d >= 0; d--) {
+        if (mulu128(&lo, &hi, DECDPUNMAX + 1)) {
+            /* overflow */
+            goto Invalid;
+        }
+        if (uadd64_overflow(lo, up[d], &lo)) {
+            if (uadd64_overflow(hi, 1, &hi)) {
+                /* overflow */
+                goto Invalid;
+            }
+        }
+    }
+
+    if (mulUInt128ByPowOf10(&lo, &hi, dn->exponent)) {
+        /* overflow */
+        goto Invalid;
+    }
+
+    if (decNumberIsNegative(dn)) {
+        if (lo == 0) {
+            *phigh = -hi;
+            *plow = 0;
+        } else {
+            *phigh = ~hi;
+            *plow = -lo;
+        }
+    } else {
+        *plow = lo;
+        *phigh = hi;
+    }
+
+    return;
+
+Invalid:
+    decContextSetStatus(set, DEC_Invalid_operation);
+} /* decNumberIntegralToInt128 */
 
 /* ------------------------------------------------------------------ */
 /* to-scientific-string -- conversion to numeric string		      */
@@ -7849,6 +7948,38 @@ static Int decGetDigits(Unit *uar, Int len) {
   return digits;
   } /* decGetDigits */
 
+/* ------------------------------------------------------------------ */
+/* mulUInt128ByPowOf10 -- multiply a 128-bit unsigned integer by a    */
+/* power of 10.                                                       */
+/*                                                                    */
+/*   The 128-bit factor composed of plow and phigh is multiplied      */
+/*   by 10^exp.                                                       */
+/*                                                                    */
+/*   plow   pointer to the low 64 bits of the first factor            */
+/*   phigh  pointer to the high 64 bits of the first factor           */
+/*   exp    the exponent of the power of 10 of the second factor      */
+/*                                                                    */
+/* If the result fits in 128 bits, returns false and the              */
+/* multiplication result through plow and phigh.                      */
+/* Otherwise, returns true.                                           */
+/* ------------------------------------------------------------------ */
+static bool mulUInt128ByPowOf10(uLong *plow, uLong *phigh, uInt pow10)
+{
+    while (pow10 >= ARRAY_SIZE(powers)) {
+        if (mulu128(plow, phigh, powers[ARRAY_SIZE(powers) - 1])) {
+            /* Overflow */
+            return true;
+        }
+        pow10 -= ARRAY_SIZE(powers) - 1;
+    }
+
+    if (pow10 > 0) {
+        return mulu128(plow, phigh, powers[pow10]);
+    } else {
+        return false;
+    }
+}
+
 #if DECTRACE | DECCHECK
 /* ------------------------------------------------------------------ */
 /* decNumberShow -- display a number [debug aid]		      */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index b6a0eaa32ae..3d2ce9912dc 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
 	__u32 reserved[12];
 };
 
+struct kvm_arm_copy_mte_tags {
+	__u64 guest_ipa;
+	__u64 length;
+	void *addr;
+	__u64 flags;
+	__u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST		0
+#define KVM_ARM_TAGS_FROM_GUEST		1
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index f94f65d429b..1567a3294c3 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -72,6 +72,9 @@
 #define MADV_COLD	20		/* deactivate these pages */
 #define MADV_PAGEOUT	21		/* reclaim these pages */
 
+#define MADV_POPULATE_READ	22	/* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE	23	/* populate (prefault) page tables writable */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
index 72875291778..f211961ce1d 100644
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -861,9 +861,20 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
 #define __NR_epoll_pwait2 441
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_fd 443
+__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
+
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 
 #undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 447
 
 /*
  * 32 bit systems traditionally used different
diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
index 57dc2ac4f8b..40b210c65a5 100644
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h
@@ -98,6 +98,9 @@
 #define MADV_COLD	20		/* deactivate these pages */
 #define MADV_PAGEOUT	21		/* reclaim these pages */
 
+#define MADV_POPULATE_READ	22	/* populate (prefault) page tables readable */
+#define MADV_POPULATE_WRITE	23	/* populate (prefault) page tables writable */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
index 59e53b6e076..09cd297698e 100644
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -1,376 +1,380 @@
-#ifndef _ASM_MIPS_UNISTD_N32_H
-#define _ASM_MIPS_UNISTD_N32_H
+#ifndef _ASM_UNISTD_N32_H
+#define _ASM_UNISTD_N32_H
 
-#define __NR_read	(__NR_Linux + 0)
-#define __NR_write	(__NR_Linux + 1)
-#define __NR_open	(__NR_Linux + 2)
-#define __NR_close	(__NR_Linux + 3)
-#define __NR_stat	(__NR_Linux + 4)
-#define __NR_fstat	(__NR_Linux + 5)
-#define __NR_lstat	(__NR_Linux + 6)
-#define __NR_poll	(__NR_Linux + 7)
-#define __NR_lseek	(__NR_Linux + 8)
-#define __NR_mmap	(__NR_Linux + 9)
-#define __NR_mprotect	(__NR_Linux + 10)
-#define __NR_munmap	(__NR_Linux + 11)
-#define __NR_brk	(__NR_Linux + 12)
-#define __NR_rt_sigaction	(__NR_Linux + 13)
-#define __NR_rt_sigprocmask	(__NR_Linux + 14)
-#define __NR_ioctl	(__NR_Linux + 15)
-#define __NR_pread64	(__NR_Linux + 16)
-#define __NR_pwrite64	(__NR_Linux + 17)
-#define __NR_readv	(__NR_Linux + 18)
-#define __NR_writev	(__NR_Linux + 19)
-#define __NR_access	(__NR_Linux + 20)
-#define __NR_pipe	(__NR_Linux + 21)
-#define __NR__newselect	(__NR_Linux + 22)
-#define __NR_sched_yield	(__NR_Linux + 23)
-#define __NR_mremap	(__NR_Linux + 24)
-#define __NR_msync	(__NR_Linux + 25)
-#define __NR_mincore	(__NR_Linux + 26)
-#define __NR_madvise	(__NR_Linux + 27)
-#define __NR_shmget	(__NR_Linux + 28)
-#define __NR_shmat	(__NR_Linux + 29)
-#define __NR_shmctl	(__NR_Linux + 30)
-#define __NR_dup	(__NR_Linux + 31)
-#define __NR_dup2	(__NR_Linux + 32)
-#define __NR_pause	(__NR_Linux + 33)
-#define __NR_nanosleep	(__NR_Linux + 34)
-#define __NR_getitimer	(__NR_Linux + 35)
-#define __NR_setitimer	(__NR_Linux + 36)
-#define __NR_alarm	(__NR_Linux + 37)
-#define __NR_getpid	(__NR_Linux + 38)
-#define __NR_sendfile	(__NR_Linux + 39)
-#define __NR_socket	(__NR_Linux + 40)
-#define __NR_connect	(__NR_Linux + 41)
-#define __NR_accept	(__NR_Linux + 42)
-#define __NR_sendto	(__NR_Linux + 43)
-#define __NR_recvfrom	(__NR_Linux + 44)
-#define __NR_sendmsg	(__NR_Linux + 45)
-#define __NR_recvmsg	(__NR_Linux + 46)
-#define __NR_shutdown	(__NR_Linux + 47)
-#define __NR_bind	(__NR_Linux + 48)
-#define __NR_listen	(__NR_Linux + 49)
-#define __NR_getsockname	(__NR_Linux + 50)
-#define __NR_getpeername	(__NR_Linux + 51)
-#define __NR_socketpair	(__NR_Linux + 52)
-#define __NR_setsockopt	(__NR_Linux + 53)
-#define __NR_getsockopt	(__NR_Linux + 54)
-#define __NR_clone	(__NR_Linux + 55)
-#define __NR_fork	(__NR_Linux + 56)
-#define __NR_execve	(__NR_Linux + 57)
-#define __NR_exit	(__NR_Linux + 58)
-#define __NR_wait4	(__NR_Linux + 59)
-#define __NR_kill	(__NR_Linux + 60)
-#define __NR_uname	(__NR_Linux + 61)
-#define __NR_semget	(__NR_Linux + 62)
-#define __NR_semop	(__NR_Linux + 63)
-#define __NR_semctl	(__NR_Linux + 64)
-#define __NR_shmdt	(__NR_Linux + 65)
-#define __NR_msgget	(__NR_Linux + 66)
-#define __NR_msgsnd	(__NR_Linux + 67)
-#define __NR_msgrcv	(__NR_Linux + 68)
-#define __NR_msgctl	(__NR_Linux + 69)
-#define __NR_fcntl	(__NR_Linux + 70)
-#define __NR_flock	(__NR_Linux + 71)
-#define __NR_fsync	(__NR_Linux + 72)
-#define __NR_fdatasync	(__NR_Linux + 73)
-#define __NR_truncate	(__NR_Linux + 74)
-#define __NR_ftruncate	(__NR_Linux + 75)
-#define __NR_getdents	(__NR_Linux + 76)
-#define __NR_getcwd	(__NR_Linux + 77)
-#define __NR_chdir	(__NR_Linux + 78)
-#define __NR_fchdir	(__NR_Linux + 79)
-#define __NR_rename	(__NR_Linux + 80)
-#define __NR_mkdir	(__NR_Linux + 81)
-#define __NR_rmdir	(__NR_Linux + 82)
-#define __NR_creat	(__NR_Linux + 83)
-#define __NR_link	(__NR_Linux + 84)
-#define __NR_unlink	(__NR_Linux + 85)
-#define __NR_symlink	(__NR_Linux + 86)
-#define __NR_readlink	(__NR_Linux + 87)
-#define __NR_chmod	(__NR_Linux + 88)
-#define __NR_fchmod	(__NR_Linux + 89)
-#define __NR_chown	(__NR_Linux + 90)
-#define __NR_fchown	(__NR_Linux + 91)
-#define __NR_lchown	(__NR_Linux + 92)
-#define __NR_umask	(__NR_Linux + 93)
-#define __NR_gettimeofday	(__NR_Linux + 94)
-#define __NR_getrlimit	(__NR_Linux + 95)
-#define __NR_getrusage	(__NR_Linux + 96)
-#define __NR_sysinfo	(__NR_Linux + 97)
-#define __NR_times	(__NR_Linux + 98)
-#define __NR_ptrace	(__NR_Linux + 99)
-#define __NR_getuid	(__NR_Linux + 100)
-#define __NR_syslog	(__NR_Linux + 101)
-#define __NR_getgid	(__NR_Linux + 102)
-#define __NR_setuid	(__NR_Linux + 103)
-#define __NR_setgid	(__NR_Linux + 104)
-#define __NR_geteuid	(__NR_Linux + 105)
-#define __NR_getegid	(__NR_Linux + 106)
-#define __NR_setpgid	(__NR_Linux + 107)
-#define __NR_getppid	(__NR_Linux + 108)
-#define __NR_getpgrp	(__NR_Linux + 109)
-#define __NR_setsid	(__NR_Linux + 110)
-#define __NR_setreuid	(__NR_Linux + 111)
-#define __NR_setregid	(__NR_Linux + 112)
-#define __NR_getgroups	(__NR_Linux + 113)
-#define __NR_setgroups	(__NR_Linux + 114)
-#define __NR_setresuid	(__NR_Linux + 115)
-#define __NR_getresuid	(__NR_Linux + 116)
-#define __NR_setresgid	(__NR_Linux + 117)
-#define __NR_getresgid	(__NR_Linux + 118)
-#define __NR_getpgid	(__NR_Linux + 119)
-#define __NR_setfsuid	(__NR_Linux + 120)
-#define __NR_setfsgid	(__NR_Linux + 121)
-#define __NR_getsid	(__NR_Linux + 122)
-#define __NR_capget	(__NR_Linux + 123)
-#define __NR_capset	(__NR_Linux + 124)
-#define __NR_rt_sigpending	(__NR_Linux + 125)
-#define __NR_rt_sigtimedwait	(__NR_Linux + 126)
-#define __NR_rt_sigqueueinfo	(__NR_Linux + 127)
-#define __NR_rt_sigsuspend	(__NR_Linux + 128)
-#define __NR_sigaltstack	(__NR_Linux + 129)
-#define __NR_utime	(__NR_Linux + 130)
-#define __NR_mknod	(__NR_Linux + 131)
-#define __NR_personality	(__NR_Linux + 132)
-#define __NR_ustat	(__NR_Linux + 133)
-#define __NR_statfs	(__NR_Linux + 134)
-#define __NR_fstatfs	(__NR_Linux + 135)
-#define __NR_sysfs	(__NR_Linux + 136)
-#define __NR_getpriority	(__NR_Linux + 137)
-#define __NR_setpriority	(__NR_Linux + 138)
-#define __NR_sched_setparam	(__NR_Linux + 139)
-#define __NR_sched_getparam	(__NR_Linux + 140)
-#define __NR_sched_setscheduler	(__NR_Linux + 141)
-#define __NR_sched_getscheduler	(__NR_Linux + 142)
-#define __NR_sched_get_priority_max	(__NR_Linux + 143)
-#define __NR_sched_get_priority_min	(__NR_Linux + 144)
-#define __NR_sched_rr_get_interval	(__NR_Linux + 145)
-#define __NR_mlock	(__NR_Linux + 146)
-#define __NR_munlock	(__NR_Linux + 147)
-#define __NR_mlockall	(__NR_Linux + 148)
-#define __NR_munlockall	(__NR_Linux + 149)
-#define __NR_vhangup	(__NR_Linux + 150)
-#define __NR_pivot_root	(__NR_Linux + 151)
-#define __NR__sysctl	(__NR_Linux + 152)
-#define __NR_prctl	(__NR_Linux + 153)
-#define __NR_adjtimex	(__NR_Linux + 154)
-#define __NR_setrlimit	(__NR_Linux + 155)
-#define __NR_chroot	(__NR_Linux + 156)
-#define __NR_sync	(__NR_Linux + 157)
-#define __NR_acct	(__NR_Linux + 158)
-#define __NR_settimeofday	(__NR_Linux + 159)
-#define __NR_mount	(__NR_Linux + 160)
-#define __NR_umount2	(__NR_Linux + 161)
-#define __NR_swapon	(__NR_Linux + 162)
-#define __NR_swapoff	(__NR_Linux + 163)
-#define __NR_reboot	(__NR_Linux + 164)
-#define __NR_sethostname	(__NR_Linux + 165)
-#define __NR_setdomainname	(__NR_Linux + 166)
-#define __NR_create_module	(__NR_Linux + 167)
-#define __NR_init_module	(__NR_Linux + 168)
-#define __NR_delete_module	(__NR_Linux + 169)
-#define __NR_get_kernel_syms	(__NR_Linux + 170)
-#define __NR_query_module	(__NR_Linux + 171)
-#define __NR_quotactl	(__NR_Linux + 172)
-#define __NR_nfsservctl	(__NR_Linux + 173)
-#define __NR_getpmsg	(__NR_Linux + 174)
-#define __NR_putpmsg	(__NR_Linux + 175)
-#define __NR_afs_syscall	(__NR_Linux + 176)
-#define __NR_reserved177	(__NR_Linux + 177)
-#define __NR_gettid	(__NR_Linux + 178)
-#define __NR_readahead	(__NR_Linux + 179)
-#define __NR_setxattr	(__NR_Linux + 180)
-#define __NR_lsetxattr	(__NR_Linux + 181)
-#define __NR_fsetxattr	(__NR_Linux + 182)
-#define __NR_getxattr	(__NR_Linux + 183)
-#define __NR_lgetxattr	(__NR_Linux + 184)
-#define __NR_fgetxattr	(__NR_Linux + 185)
-#define __NR_listxattr	(__NR_Linux + 186)
-#define __NR_llistxattr	(__NR_Linux + 187)
-#define __NR_flistxattr	(__NR_Linux + 188)
-#define __NR_removexattr	(__NR_Linux + 189)
-#define __NR_lremovexattr	(__NR_Linux + 190)
-#define __NR_fremovexattr	(__NR_Linux + 191)
-#define __NR_tkill	(__NR_Linux + 192)
-#define __NR_reserved193	(__NR_Linux + 193)
-#define __NR_futex	(__NR_Linux + 194)
-#define __NR_sched_setaffinity	(__NR_Linux + 195)
-#define __NR_sched_getaffinity	(__NR_Linux + 196)
-#define __NR_cacheflush	(__NR_Linux + 197)
-#define __NR_cachectl	(__NR_Linux + 198)
-#define __NR_sysmips	(__NR_Linux + 199)
-#define __NR_io_setup	(__NR_Linux + 200)
-#define __NR_io_destroy	(__NR_Linux + 201)
-#define __NR_io_getevents	(__NR_Linux + 202)
-#define __NR_io_submit	(__NR_Linux + 203)
-#define __NR_io_cancel	(__NR_Linux + 204)
-#define __NR_exit_group	(__NR_Linux + 205)
-#define __NR_lookup_dcookie	(__NR_Linux + 206)
-#define __NR_epoll_create	(__NR_Linux + 207)
-#define __NR_epoll_ctl	(__NR_Linux + 208)
-#define __NR_epoll_wait	(__NR_Linux + 209)
-#define __NR_remap_file_pages	(__NR_Linux + 210)
-#define __NR_rt_sigreturn	(__NR_Linux + 211)
-#define __NR_fcntl64	(__NR_Linux + 212)
-#define __NR_set_tid_address	(__NR_Linux + 213)
-#define __NR_restart_syscall	(__NR_Linux + 214)
-#define __NR_semtimedop	(__NR_Linux + 215)
-#define __NR_fadvise64	(__NR_Linux + 216)
-#define __NR_statfs64	(__NR_Linux + 217)
-#define __NR_fstatfs64	(__NR_Linux + 218)
-#define __NR_sendfile64	(__NR_Linux + 219)
-#define __NR_timer_create	(__NR_Linux + 220)
-#define __NR_timer_settime	(__NR_Linux + 221)
-#define __NR_timer_gettime	(__NR_Linux + 222)
-#define __NR_timer_getoverrun	(__NR_Linux + 223)
-#define __NR_timer_delete	(__NR_Linux + 224)
-#define __NR_clock_settime	(__NR_Linux + 225)
-#define __NR_clock_gettime	(__NR_Linux + 226)
-#define __NR_clock_getres	(__NR_Linux + 227)
-#define __NR_clock_nanosleep	(__NR_Linux + 228)
-#define __NR_tgkill	(__NR_Linux + 229)
-#define __NR_utimes	(__NR_Linux + 230)
-#define __NR_mbind	(__NR_Linux + 231)
-#define __NR_get_mempolicy	(__NR_Linux + 232)
-#define __NR_set_mempolicy	(__NR_Linux + 233)
-#define __NR_mq_open	(__NR_Linux + 234)
-#define __NR_mq_unlink	(__NR_Linux + 235)
-#define __NR_mq_timedsend	(__NR_Linux + 236)
-#define __NR_mq_timedreceive	(__NR_Linux + 237)
-#define __NR_mq_notify	(__NR_Linux + 238)
-#define __NR_mq_getsetattr	(__NR_Linux + 239)
-#define __NR_vserver	(__NR_Linux + 240)
-#define __NR_waitid	(__NR_Linux + 241)
-#define __NR_add_key	(__NR_Linux + 243)
-#define __NR_request_key	(__NR_Linux + 244)
-#define __NR_keyctl	(__NR_Linux + 245)
-#define __NR_set_thread_area	(__NR_Linux + 246)
-#define __NR_inotify_init	(__NR_Linux + 247)
-#define __NR_inotify_add_watch	(__NR_Linux + 248)
-#define __NR_inotify_rm_watch	(__NR_Linux + 249)
-#define __NR_migrate_pages	(__NR_Linux + 250)
-#define __NR_openat	(__NR_Linux + 251)
-#define __NR_mkdirat	(__NR_Linux + 252)
-#define __NR_mknodat	(__NR_Linux + 253)
-#define __NR_fchownat	(__NR_Linux + 254)
-#define __NR_futimesat	(__NR_Linux + 255)
-#define __NR_newfstatat	(__NR_Linux + 256)
-#define __NR_unlinkat	(__NR_Linux + 257)
-#define __NR_renameat	(__NR_Linux + 258)
-#define __NR_linkat	(__NR_Linux + 259)
-#define __NR_symlinkat	(__NR_Linux + 260)
-#define __NR_readlinkat	(__NR_Linux + 261)
-#define __NR_fchmodat	(__NR_Linux + 262)
-#define __NR_faccessat	(__NR_Linux + 263)
-#define __NR_pselect6	(__NR_Linux + 264)
-#define __NR_ppoll	(__NR_Linux + 265)
-#define __NR_unshare	(__NR_Linux + 266)
-#define __NR_splice	(__NR_Linux + 267)
-#define __NR_sync_file_range	(__NR_Linux + 268)
-#define __NR_tee	(__NR_Linux + 269)
-#define __NR_vmsplice	(__NR_Linux + 270)
-#define __NR_move_pages	(__NR_Linux + 271)
-#define __NR_set_robust_list	(__NR_Linux + 272)
-#define __NR_get_robust_list	(__NR_Linux + 273)
-#define __NR_kexec_load	(__NR_Linux + 274)
-#define __NR_getcpu	(__NR_Linux + 275)
-#define __NR_epoll_pwait	(__NR_Linux + 276)
-#define __NR_ioprio_set	(__NR_Linux + 277)
-#define __NR_ioprio_get	(__NR_Linux + 278)
-#define __NR_utimensat	(__NR_Linux + 279)
-#define __NR_signalfd	(__NR_Linux + 280)
-#define __NR_timerfd	(__NR_Linux + 281)
-#define __NR_eventfd	(__NR_Linux + 282)
-#define __NR_fallocate	(__NR_Linux + 283)
-#define __NR_timerfd_create	(__NR_Linux + 284)
-#define __NR_timerfd_gettime	(__NR_Linux + 285)
-#define __NR_timerfd_settime	(__NR_Linux + 286)
-#define __NR_signalfd4	(__NR_Linux + 287)
-#define __NR_eventfd2	(__NR_Linux + 288)
-#define __NR_epoll_create1	(__NR_Linux + 289)
-#define __NR_dup3	(__NR_Linux + 290)
-#define __NR_pipe2	(__NR_Linux + 291)
-#define __NR_inotify_init1	(__NR_Linux + 292)
-#define __NR_preadv	(__NR_Linux + 293)
-#define __NR_pwritev	(__NR_Linux + 294)
-#define __NR_rt_tgsigqueueinfo	(__NR_Linux + 295)
-#define __NR_perf_event_open	(__NR_Linux + 296)
-#define __NR_accept4	(__NR_Linux + 297)
-#define __NR_recvmmsg	(__NR_Linux + 298)
-#define __NR_getdents64	(__NR_Linux + 299)
-#define __NR_fanotify_init	(__NR_Linux + 300)
-#define __NR_fanotify_mark	(__NR_Linux + 301)
-#define __NR_prlimit64	(__NR_Linux + 302)
-#define __NR_name_to_handle_at	(__NR_Linux + 303)
-#define __NR_open_by_handle_at	(__NR_Linux + 304)
-#define __NR_clock_adjtime	(__NR_Linux + 305)
-#define __NR_syncfs	(__NR_Linux + 306)
-#define __NR_sendmmsg	(__NR_Linux + 307)
-#define __NR_setns	(__NR_Linux + 308)
-#define __NR_process_vm_readv	(__NR_Linux + 309)
-#define __NR_process_vm_writev	(__NR_Linux + 310)
-#define __NR_kcmp	(__NR_Linux + 311)
-#define __NR_finit_module	(__NR_Linux + 312)
-#define __NR_sched_setattr	(__NR_Linux + 313)
-#define __NR_sched_getattr	(__NR_Linux + 314)
-#define __NR_renameat2	(__NR_Linux + 315)
-#define __NR_seccomp	(__NR_Linux + 316)
-#define __NR_getrandom	(__NR_Linux + 317)
-#define __NR_memfd_create	(__NR_Linux + 318)
-#define __NR_bpf	(__NR_Linux + 319)
-#define __NR_execveat	(__NR_Linux + 320)
-#define __NR_userfaultfd	(__NR_Linux + 321)
-#define __NR_membarrier	(__NR_Linux + 322)
-#define __NR_mlock2	(__NR_Linux + 323)
-#define __NR_copy_file_range	(__NR_Linux + 324)
-#define __NR_preadv2	(__NR_Linux + 325)
-#define __NR_pwritev2	(__NR_Linux + 326)
-#define __NR_pkey_mprotect	(__NR_Linux + 327)
-#define __NR_pkey_alloc	(__NR_Linux + 328)
-#define __NR_pkey_free	(__NR_Linux + 329)
-#define __NR_statx	(__NR_Linux + 330)
-#define __NR_rseq	(__NR_Linux + 331)
-#define __NR_io_pgetevents	(__NR_Linux + 332)
-#define __NR_clock_gettime64	(__NR_Linux + 403)
-#define __NR_clock_settime64	(__NR_Linux + 404)
-#define __NR_clock_adjtime64	(__NR_Linux + 405)
-#define __NR_clock_getres_time64	(__NR_Linux + 406)
-#define __NR_clock_nanosleep_time64	(__NR_Linux + 407)
-#define __NR_timer_gettime64	(__NR_Linux + 408)
-#define __NR_timer_settime64	(__NR_Linux + 409)
-#define __NR_timerfd_gettime64	(__NR_Linux + 410)
-#define __NR_timerfd_settime64	(__NR_Linux + 411)
-#define __NR_utimensat_time64	(__NR_Linux + 412)
-#define __NR_pselect6_time64	(__NR_Linux + 413)
-#define __NR_ppoll_time64	(__NR_Linux + 414)
-#define __NR_io_pgetevents_time64	(__NR_Linux + 416)
-#define __NR_recvmmsg_time64	(__NR_Linux + 417)
-#define __NR_mq_timedsend_time64	(__NR_Linux + 418)
-#define __NR_mq_timedreceive_time64	(__NR_Linux + 419)
-#define __NR_semtimedop_time64	(__NR_Linux + 420)
-#define __NR_rt_sigtimedwait_time64	(__NR_Linux + 421)
-#define __NR_futex_time64	(__NR_Linux + 422)
-#define __NR_sched_rr_get_interval_time64	(__NR_Linux + 423)
-#define __NR_pidfd_send_signal	(__NR_Linux + 424)
-#define __NR_io_uring_setup	(__NR_Linux + 425)
-#define __NR_io_uring_enter	(__NR_Linux + 426)
-#define __NR_io_uring_register	(__NR_Linux + 427)
-#define __NR_open_tree	(__NR_Linux + 428)
-#define __NR_move_mount	(__NR_Linux + 429)
-#define __NR_fsopen	(__NR_Linux + 430)
-#define __NR_fsconfig	(__NR_Linux + 431)
-#define __NR_fsmount	(__NR_Linux + 432)
-#define __NR_fspick	(__NR_Linux + 433)
-#define __NR_pidfd_open	(__NR_Linux + 434)
-#define __NR_clone3	(__NR_Linux + 435)
-#define __NR_close_range	(__NR_Linux + 436)
-#define __NR_openat2	(__NR_Linux + 437)
-#define __NR_pidfd_getfd	(__NR_Linux + 438)
-#define __NR_faccessat2	(__NR_Linux + 439)
-#define __NR_process_madvise	(__NR_Linux + 440)
-#define __NR_epoll_pwait2	(__NR_Linux + 441)
+#define __NR_read (__NR_Linux + 0)
+#define __NR_write (__NR_Linux + 1)
+#define __NR_open (__NR_Linux + 2)
+#define __NR_close (__NR_Linux + 3)
+#define __NR_stat (__NR_Linux + 4)
+#define __NR_fstat (__NR_Linux + 5)
+#define __NR_lstat (__NR_Linux + 6)
+#define __NR_poll (__NR_Linux + 7)
+#define __NR_lseek (__NR_Linux + 8)
+#define __NR_mmap (__NR_Linux + 9)
+#define __NR_mprotect (__NR_Linux + 10)
+#define __NR_munmap (__NR_Linux + 11)
+#define __NR_brk (__NR_Linux + 12)
+#define __NR_rt_sigaction (__NR_Linux + 13)
+#define __NR_rt_sigprocmask (__NR_Linux + 14)
+#define __NR_ioctl (__NR_Linux + 15)
+#define __NR_pread64 (__NR_Linux + 16)
+#define __NR_pwrite64 (__NR_Linux + 17)
+#define __NR_readv (__NR_Linux + 18)
+#define __NR_writev (__NR_Linux + 19)
+#define __NR_access (__NR_Linux + 20)
+#define __NR_pipe (__NR_Linux + 21)
+#define __NR__newselect (__NR_Linux + 22)
+#define __NR_sched_yield (__NR_Linux + 23)
+#define __NR_mremap (__NR_Linux + 24)
+#define __NR_msync (__NR_Linux + 25)
+#define __NR_mincore (__NR_Linux + 26)
+#define __NR_madvise (__NR_Linux + 27)
+#define __NR_shmget (__NR_Linux + 28)
+#define __NR_shmat (__NR_Linux + 29)
+#define __NR_shmctl (__NR_Linux + 30)
+#define __NR_dup (__NR_Linux + 31)
+#define __NR_dup2 (__NR_Linux + 32)
+#define __NR_pause (__NR_Linux + 33)
+#define __NR_nanosleep (__NR_Linux + 34)
+#define __NR_getitimer (__NR_Linux + 35)
+#define __NR_setitimer (__NR_Linux + 36)
+#define __NR_alarm (__NR_Linux + 37)
+#define __NR_getpid (__NR_Linux + 38)
+#define __NR_sendfile (__NR_Linux + 39)
+#define __NR_socket (__NR_Linux + 40)
+#define __NR_connect (__NR_Linux + 41)
+#define __NR_accept (__NR_Linux + 42)
+#define __NR_sendto (__NR_Linux + 43)
+#define __NR_recvfrom (__NR_Linux + 44)
+#define __NR_sendmsg (__NR_Linux + 45)
+#define __NR_recvmsg (__NR_Linux + 46)
+#define __NR_shutdown (__NR_Linux + 47)
+#define __NR_bind (__NR_Linux + 48)
+#define __NR_listen (__NR_Linux + 49)
+#define __NR_getsockname (__NR_Linux + 50)
+#define __NR_getpeername (__NR_Linux + 51)
+#define __NR_socketpair (__NR_Linux + 52)
+#define __NR_setsockopt (__NR_Linux + 53)
+#define __NR_getsockopt (__NR_Linux + 54)
+#define __NR_clone (__NR_Linux + 55)
+#define __NR_fork (__NR_Linux + 56)
+#define __NR_execve (__NR_Linux + 57)
+#define __NR_exit (__NR_Linux + 58)
+#define __NR_wait4 (__NR_Linux + 59)
+#define __NR_kill (__NR_Linux + 60)
+#define __NR_uname (__NR_Linux + 61)
+#define __NR_semget (__NR_Linux + 62)
+#define __NR_semop (__NR_Linux + 63)
+#define __NR_semctl (__NR_Linux + 64)
+#define __NR_shmdt (__NR_Linux + 65)
+#define __NR_msgget (__NR_Linux + 66)
+#define __NR_msgsnd (__NR_Linux + 67)
+#define __NR_msgrcv (__NR_Linux + 68)
+#define __NR_msgctl (__NR_Linux + 69)
+#define __NR_fcntl (__NR_Linux + 70)
+#define __NR_flock (__NR_Linux + 71)
+#define __NR_fsync (__NR_Linux + 72)
+#define __NR_fdatasync (__NR_Linux + 73)
+#define __NR_truncate (__NR_Linux + 74)
+#define __NR_ftruncate (__NR_Linux + 75)
+#define __NR_getdents (__NR_Linux + 76)
+#define __NR_getcwd (__NR_Linux + 77)
+#define __NR_chdir (__NR_Linux + 78)
+#define __NR_fchdir (__NR_Linux + 79)
+#define __NR_rename (__NR_Linux + 80)
+#define __NR_mkdir (__NR_Linux + 81)
+#define __NR_rmdir (__NR_Linux + 82)
+#define __NR_creat (__NR_Linux + 83)
+#define __NR_link (__NR_Linux + 84)
+#define __NR_unlink (__NR_Linux + 85)
+#define __NR_symlink (__NR_Linux + 86)
+#define __NR_readlink (__NR_Linux + 87)
+#define __NR_chmod (__NR_Linux + 88)
+#define __NR_fchmod (__NR_Linux + 89)
+#define __NR_chown (__NR_Linux + 90)
+#define __NR_fchown (__NR_Linux + 91)
+#define __NR_lchown (__NR_Linux + 92)
+#define __NR_umask (__NR_Linux + 93)
+#define __NR_gettimeofday (__NR_Linux + 94)
+#define __NR_getrlimit (__NR_Linux + 95)
+#define __NR_getrusage (__NR_Linux + 96)
+#define __NR_sysinfo (__NR_Linux + 97)
+#define __NR_times (__NR_Linux + 98)
+#define __NR_ptrace (__NR_Linux + 99)
+#define __NR_getuid (__NR_Linux + 100)
+#define __NR_syslog (__NR_Linux + 101)
+#define __NR_getgid (__NR_Linux + 102)
+#define __NR_setuid (__NR_Linux + 103)
+#define __NR_setgid (__NR_Linux + 104)
+#define __NR_geteuid (__NR_Linux + 105)
+#define __NR_getegid (__NR_Linux + 106)
+#define __NR_setpgid (__NR_Linux + 107)
+#define __NR_getppid (__NR_Linux + 108)
+#define __NR_getpgrp (__NR_Linux + 109)
+#define __NR_setsid (__NR_Linux + 110)
+#define __NR_setreuid (__NR_Linux + 111)
+#define __NR_setregid (__NR_Linux + 112)
+#define __NR_getgroups (__NR_Linux + 113)
+#define __NR_setgroups (__NR_Linux + 114)
+#define __NR_setresuid (__NR_Linux + 115)
+#define __NR_getresuid (__NR_Linux + 116)
+#define __NR_setresgid (__NR_Linux + 117)
+#define __NR_getresgid (__NR_Linux + 118)
+#define __NR_getpgid (__NR_Linux + 119)
+#define __NR_setfsuid (__NR_Linux + 120)
+#define __NR_setfsgid (__NR_Linux + 121)
+#define __NR_getsid (__NR_Linux + 122)
+#define __NR_capget (__NR_Linux + 123)
+#define __NR_capset (__NR_Linux + 124)
+#define __NR_rt_sigpending (__NR_Linux + 125)
+#define __NR_rt_sigtimedwait (__NR_Linux + 126)
+#define __NR_rt_sigqueueinfo (__NR_Linux + 127)
+#define __NR_rt_sigsuspend (__NR_Linux + 128)
+#define __NR_sigaltstack (__NR_Linux + 129)
+#define __NR_utime (__NR_Linux + 130)
+#define __NR_mknod (__NR_Linux + 131)
+#define __NR_personality (__NR_Linux + 132)
+#define __NR_ustat (__NR_Linux + 133)
+#define __NR_statfs (__NR_Linux + 134)
+#define __NR_fstatfs (__NR_Linux + 135)
+#define __NR_sysfs (__NR_Linux + 136)
+#define __NR_getpriority (__NR_Linux + 137)
+#define __NR_setpriority (__NR_Linux + 138)
+#define __NR_sched_setparam (__NR_Linux + 139)
+#define __NR_sched_getparam (__NR_Linux + 140)
+#define __NR_sched_setscheduler (__NR_Linux + 141)
+#define __NR_sched_getscheduler (__NR_Linux + 142)
+#define __NR_sched_get_priority_max (__NR_Linux + 143)
+#define __NR_sched_get_priority_min (__NR_Linux + 144)
+#define __NR_sched_rr_get_interval (__NR_Linux + 145)
+#define __NR_mlock (__NR_Linux + 146)
+#define __NR_munlock (__NR_Linux + 147)
+#define __NR_mlockall (__NR_Linux + 148)
+#define __NR_munlockall (__NR_Linux + 149)
+#define __NR_vhangup (__NR_Linux + 150)
+#define __NR_pivot_root (__NR_Linux + 151)
+#define __NR__sysctl (__NR_Linux + 152)
+#define __NR_prctl (__NR_Linux + 153)
+#define __NR_adjtimex (__NR_Linux + 154)
+#define __NR_setrlimit (__NR_Linux + 155)
+#define __NR_chroot (__NR_Linux + 156)
+#define __NR_sync (__NR_Linux + 157)
+#define __NR_acct (__NR_Linux + 158)
+#define __NR_settimeofday (__NR_Linux + 159)
+#define __NR_mount (__NR_Linux + 160)
+#define __NR_umount2 (__NR_Linux + 161)
+#define __NR_swapon (__NR_Linux + 162)
+#define __NR_swapoff (__NR_Linux + 163)
+#define __NR_reboot (__NR_Linux + 164)
+#define __NR_sethostname (__NR_Linux + 165)
+#define __NR_setdomainname (__NR_Linux + 166)
+#define __NR_create_module (__NR_Linux + 167)
+#define __NR_init_module (__NR_Linux + 168)
+#define __NR_delete_module (__NR_Linux + 169)
+#define __NR_get_kernel_syms (__NR_Linux + 170)
+#define __NR_query_module (__NR_Linux + 171)
+#define __NR_quotactl (__NR_Linux + 172)
+#define __NR_nfsservctl (__NR_Linux + 173)
+#define __NR_getpmsg (__NR_Linux + 174)
+#define __NR_putpmsg (__NR_Linux + 175)
+#define __NR_afs_syscall (__NR_Linux + 176)
+#define __NR_reserved177 (__NR_Linux + 177)
+#define __NR_gettid (__NR_Linux + 178)
+#define __NR_readahead (__NR_Linux + 179)
+#define __NR_setxattr (__NR_Linux + 180)
+#define __NR_lsetxattr (__NR_Linux + 181)
+#define __NR_fsetxattr (__NR_Linux + 182)
+#define __NR_getxattr (__NR_Linux + 183)
+#define __NR_lgetxattr (__NR_Linux + 184)
+#define __NR_fgetxattr (__NR_Linux + 185)
+#define __NR_listxattr (__NR_Linux + 186)
+#define __NR_llistxattr (__NR_Linux + 187)
+#define __NR_flistxattr (__NR_Linux + 188)
+#define __NR_removexattr (__NR_Linux + 189)
+#define __NR_lremovexattr (__NR_Linux + 190)
+#define __NR_fremovexattr (__NR_Linux + 191)
+#define __NR_tkill (__NR_Linux + 192)
+#define __NR_reserved193 (__NR_Linux + 193)
+#define __NR_futex (__NR_Linux + 194)
+#define __NR_sched_setaffinity (__NR_Linux + 195)
+#define __NR_sched_getaffinity (__NR_Linux + 196)
+#define __NR_cacheflush (__NR_Linux + 197)
+#define __NR_cachectl (__NR_Linux + 198)
+#define __NR_sysmips (__NR_Linux + 199)
+#define __NR_io_setup (__NR_Linux + 200)
+#define __NR_io_destroy (__NR_Linux + 201)
+#define __NR_io_getevents (__NR_Linux + 202)
+#define __NR_io_submit (__NR_Linux + 203)
+#define __NR_io_cancel (__NR_Linux + 204)
+#define __NR_exit_group (__NR_Linux + 205)
+#define __NR_lookup_dcookie (__NR_Linux + 206)
+#define __NR_epoll_create (__NR_Linux + 207)
+#define __NR_epoll_ctl (__NR_Linux + 208)
+#define __NR_epoll_wait (__NR_Linux + 209)
+#define __NR_remap_file_pages (__NR_Linux + 210)
+#define __NR_rt_sigreturn (__NR_Linux + 211)
+#define __NR_fcntl64 (__NR_Linux + 212)
+#define __NR_set_tid_address (__NR_Linux + 213)
+#define __NR_restart_syscall (__NR_Linux + 214)
+#define __NR_semtimedop (__NR_Linux + 215)
+#define __NR_fadvise64 (__NR_Linux + 216)
+#define __NR_statfs64 (__NR_Linux + 217)
+#define __NR_fstatfs64 (__NR_Linux + 218)
+#define __NR_sendfile64 (__NR_Linux + 219)
+#define __NR_timer_create (__NR_Linux + 220)
+#define __NR_timer_settime (__NR_Linux + 221)
+#define __NR_timer_gettime (__NR_Linux + 222)
+#define __NR_timer_getoverrun (__NR_Linux + 223)
+#define __NR_timer_delete (__NR_Linux + 224)
+#define __NR_clock_settime (__NR_Linux + 225)
+#define __NR_clock_gettime (__NR_Linux + 226)
+#define __NR_clock_getres (__NR_Linux + 227)
+#define __NR_clock_nanosleep (__NR_Linux + 228)
+#define __NR_tgkill (__NR_Linux + 229)
+#define __NR_utimes (__NR_Linux + 230)
+#define __NR_mbind (__NR_Linux + 231)
+#define __NR_get_mempolicy (__NR_Linux + 232)
+#define __NR_set_mempolicy (__NR_Linux + 233)
+#define __NR_mq_open (__NR_Linux + 234)
+#define __NR_mq_unlink (__NR_Linux + 235)
+#define __NR_mq_timedsend (__NR_Linux + 236)
+#define __NR_mq_timedreceive (__NR_Linux + 237)
+#define __NR_mq_notify (__NR_Linux + 238)
+#define __NR_mq_getsetattr (__NR_Linux + 239)
+#define __NR_vserver (__NR_Linux + 240)
+#define __NR_waitid (__NR_Linux + 241)
+#define __NR_add_key (__NR_Linux + 243)
+#define __NR_request_key (__NR_Linux + 244)
+#define __NR_keyctl (__NR_Linux + 245)
+#define __NR_set_thread_area (__NR_Linux + 246)
+#define __NR_inotify_init (__NR_Linux + 247)
+#define __NR_inotify_add_watch (__NR_Linux + 248)
+#define __NR_inotify_rm_watch (__NR_Linux + 249)
+#define __NR_migrate_pages (__NR_Linux + 250)
+#define __NR_openat (__NR_Linux + 251)
+#define __NR_mkdirat (__NR_Linux + 252)
+#define __NR_mknodat (__NR_Linux + 253)
+#define __NR_fchownat (__NR_Linux + 254)
+#define __NR_futimesat (__NR_Linux + 255)
+#define __NR_newfstatat (__NR_Linux + 256)
+#define __NR_unlinkat (__NR_Linux + 257)
+#define __NR_renameat (__NR_Linux + 258)
+#define __NR_linkat (__NR_Linux + 259)
+#define __NR_symlinkat (__NR_Linux + 260)
+#define __NR_readlinkat (__NR_Linux + 261)
+#define __NR_fchmodat (__NR_Linux + 262)
+#define __NR_faccessat (__NR_Linux + 263)
+#define __NR_pselect6 (__NR_Linux + 264)
+#define __NR_ppoll (__NR_Linux + 265)
+#define __NR_unshare (__NR_Linux + 266)
+#define __NR_splice (__NR_Linux + 267)
+#define __NR_sync_file_range (__NR_Linux + 268)
+#define __NR_tee (__NR_Linux + 269)
+#define __NR_vmsplice (__NR_Linux + 270)
+#define __NR_move_pages (__NR_Linux + 271)
+#define __NR_set_robust_list (__NR_Linux + 272)
+#define __NR_get_robust_list (__NR_Linux + 273)
+#define __NR_kexec_load (__NR_Linux + 274)
+#define __NR_getcpu (__NR_Linux + 275)
+#define __NR_epoll_pwait (__NR_Linux + 276)
+#define __NR_ioprio_set (__NR_Linux + 277)
+#define __NR_ioprio_get (__NR_Linux + 278)
+#define __NR_utimensat (__NR_Linux + 279)
+#define __NR_signalfd (__NR_Linux + 280)
+#define __NR_timerfd (__NR_Linux + 281)
+#define __NR_eventfd (__NR_Linux + 282)
+#define __NR_fallocate (__NR_Linux + 283)
+#define __NR_timerfd_create (__NR_Linux + 284)
+#define __NR_timerfd_gettime (__NR_Linux + 285)
+#define __NR_timerfd_settime (__NR_Linux + 286)
+#define __NR_signalfd4 (__NR_Linux + 287)
+#define __NR_eventfd2 (__NR_Linux + 288)
+#define __NR_epoll_create1 (__NR_Linux + 289)
+#define __NR_dup3 (__NR_Linux + 290)
+#define __NR_pipe2 (__NR_Linux + 291)
+#define __NR_inotify_init1 (__NR_Linux + 292)
+#define __NR_preadv (__NR_Linux + 293)
+#define __NR_pwritev (__NR_Linux + 294)
+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295)
+#define __NR_perf_event_open (__NR_Linux + 296)
+#define __NR_accept4 (__NR_Linux + 297)
+#define __NR_recvmmsg (__NR_Linux + 298)
+#define __NR_getdents64 (__NR_Linux + 299)
+#define __NR_fanotify_init (__NR_Linux + 300)
+#define __NR_fanotify_mark (__NR_Linux + 301)
+#define __NR_prlimit64 (__NR_Linux + 302)
+#define __NR_name_to_handle_at (__NR_Linux + 303)
+#define __NR_open_by_handle_at (__NR_Linux + 304)
+#define __NR_clock_adjtime (__NR_Linux + 305)
+#define __NR_syncfs (__NR_Linux + 306)
+#define __NR_sendmmsg (__NR_Linux + 307)
+#define __NR_setns (__NR_Linux + 308)
+#define __NR_process_vm_readv (__NR_Linux + 309)
+#define __NR_process_vm_writev (__NR_Linux + 310)
+#define __NR_kcmp (__NR_Linux + 311)
+#define __NR_finit_module (__NR_Linux + 312)
+#define __NR_sched_setattr (__NR_Linux + 313)
+#define __NR_sched_getattr (__NR_Linux + 314)
+#define __NR_renameat2 (__NR_Linux + 315)
+#define __NR_seccomp (__NR_Linux + 316)
+#define __NR_getrandom (__NR_Linux + 317)
+#define __NR_memfd_create (__NR_Linux + 318)
+#define __NR_bpf (__NR_Linux + 319)
+#define __NR_execveat (__NR_Linux + 320)
+#define __NR_userfaultfd (__NR_Linux + 321)
+#define __NR_membarrier (__NR_Linux + 322)
+#define __NR_mlock2 (__NR_Linux + 323)
+#define __NR_copy_file_range (__NR_Linux + 324)
+#define __NR_preadv2 (__NR_Linux + 325)
+#define __NR_pwritev2 (__NR_Linux + 326)
+#define __NR_pkey_mprotect (__NR_Linux + 327)
+#define __NR_pkey_alloc (__NR_Linux + 328)
+#define __NR_pkey_free (__NR_Linux + 329)
+#define __NR_statx (__NR_Linux + 330)
+#define __NR_rseq (__NR_Linux + 331)
+#define __NR_io_pgetevents (__NR_Linux + 332)
+#define __NR_clock_gettime64 (__NR_Linux + 403)
+#define __NR_clock_settime64 (__NR_Linux + 404)
+#define __NR_clock_adjtime64 (__NR_Linux + 405)
+#define __NR_clock_getres_time64 (__NR_Linux + 406)
+#define __NR_clock_nanosleep_time64 (__NR_Linux + 407)
+#define __NR_timer_gettime64 (__NR_Linux + 408)
+#define __NR_timer_settime64 (__NR_Linux + 409)
+#define __NR_timerfd_gettime64 (__NR_Linux + 410)
+#define __NR_timerfd_settime64 (__NR_Linux + 411)
+#define __NR_utimensat_time64 (__NR_Linux + 412)
+#define __NR_pselect6_time64 (__NR_Linux + 413)
+#define __NR_ppoll_time64 (__NR_Linux + 414)
+#define __NR_io_pgetevents_time64 (__NR_Linux + 416)
+#define __NR_recvmmsg_time64 (__NR_Linux + 417)
+#define __NR_mq_timedsend_time64 (__NR_Linux + 418)
+#define __NR_mq_timedreceive_time64 (__NR_Linux + 419)
+#define __NR_semtimedop_time64 (__NR_Linux + 420)
+#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421)
+#define __NR_futex_time64 (__NR_Linux + 422)
+#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423)
+#define __NR_pidfd_send_signal (__NR_Linux + 424)
+#define __NR_io_uring_setup (__NR_Linux + 425)
+#define __NR_io_uring_enter (__NR_Linux + 426)
+#define __NR_io_uring_register (__NR_Linux + 427)
+#define __NR_open_tree (__NR_Linux + 428)
+#define __NR_move_mount (__NR_Linux + 429)
+#define __NR_fsopen (__NR_Linux + 430)
+#define __NR_fsconfig (__NR_Linux + 431)
+#define __NR_fsmount (__NR_Linux + 432)
+#define __NR_fspick (__NR_Linux + 433)
+#define __NR_pidfd_open (__NR_Linux + 434)
+#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_close_range (__NR_Linux + 436)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
+#define __NR_faccessat2 (__NR_Linux + 439)
+#define __NR_process_madvise (__NR_Linux + 440)
+#define __NR_epoll_pwait2 (__NR_Linux + 441)
+#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
+#define __NR_landlock_create_ruleset (__NR_Linux + 444)
+#define __NR_landlock_add_rule (__NR_Linux + 445)
+#define __NR_landlock_restrict_self (__NR_Linux + 446)
 
-
-#endif /* _ASM_MIPS_UNISTD_N32_H */
+#endif /* _ASM_UNISTD_N32_H */
diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
index 683558a7f8a..780e0cead66 100644
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -1,352 +1,356 @@
-#ifndef _ASM_MIPS_UNISTD_N64_H
-#define _ASM_MIPS_UNISTD_N64_H
+#ifndef _ASM_UNISTD_N64_H
+#define _ASM_UNISTD_N64_H
 
-#define __NR_read	(__NR_Linux + 0)
-#define __NR_write	(__NR_Linux + 1)
-#define __NR_open	(__NR_Linux + 2)
-#define __NR_close	(__NR_Linux + 3)
-#define __NR_stat	(__NR_Linux + 4)
-#define __NR_fstat	(__NR_Linux + 5)
-#define __NR_lstat	(__NR_Linux + 6)
-#define __NR_poll	(__NR_Linux + 7)
-#define __NR_lseek	(__NR_Linux + 8)
-#define __NR_mmap	(__NR_Linux + 9)
-#define __NR_mprotect	(__NR_Linux + 10)
-#define __NR_munmap	(__NR_Linux + 11)
-#define __NR_brk	(__NR_Linux + 12)
-#define __NR_rt_sigaction	(__NR_Linux + 13)
-#define __NR_rt_sigprocmask	(__NR_Linux + 14)
-#define __NR_ioctl	(__NR_Linux + 15)
-#define __NR_pread64	(__NR_Linux + 16)
-#define __NR_pwrite64	(__NR_Linux + 17)
-#define __NR_readv	(__NR_Linux + 18)
-#define __NR_writev	(__NR_Linux + 19)
-#define __NR_access	(__NR_Linux + 20)
-#define __NR_pipe	(__NR_Linux + 21)
-#define __NR__newselect	(__NR_Linux + 22)
-#define __NR_sched_yield	(__NR_Linux + 23)
-#define __NR_mremap	(__NR_Linux + 24)
-#define __NR_msync	(__NR_Linux + 25)
-#define __NR_mincore	(__NR_Linux + 26)
-#define __NR_madvise	(__NR_Linux + 27)
-#define __NR_shmget	(__NR_Linux + 28)
-#define __NR_shmat	(__NR_Linux + 29)
-#define __NR_shmctl	(__NR_Linux + 30)
-#define __NR_dup	(__NR_Linux + 31)
-#define __NR_dup2	(__NR_Linux + 32)
-#define __NR_pause	(__NR_Linux + 33)
-#define __NR_nanosleep	(__NR_Linux + 34)
-#define __NR_getitimer	(__NR_Linux + 35)
-#define __NR_setitimer	(__NR_Linux + 36)
-#define __NR_alarm	(__NR_Linux + 37)
-#define __NR_getpid	(__NR_Linux + 38)
-#define __NR_sendfile	(__NR_Linux + 39)
-#define __NR_socket	(__NR_Linux + 40)
-#define __NR_connect	(__NR_Linux + 41)
-#define __NR_accept	(__NR_Linux + 42)
-#define __NR_sendto	(__NR_Linux + 43)
-#define __NR_recvfrom	(__NR_Linux + 44)
-#define __NR_sendmsg	(__NR_Linux + 45)
-#define __NR_recvmsg	(__NR_Linux + 46)
-#define __NR_shutdown	(__NR_Linux + 47)
-#define __NR_bind	(__NR_Linux + 48)
-#define __NR_listen	(__NR_Linux + 49)
-#define __NR_getsockname	(__NR_Linux + 50)
-#define __NR_getpeername	(__NR_Linux + 51)
-#define __NR_socketpair	(__NR_Linux + 52)
-#define __NR_setsockopt	(__NR_Linux + 53)
-#define __NR_getsockopt	(__NR_Linux + 54)
-#define __NR_clone	(__NR_Linux + 55)
-#define __NR_fork	(__NR_Linux + 56)
-#define __NR_execve	(__NR_Linux + 57)
-#define __NR_exit	(__NR_Linux + 58)
-#define __NR_wait4	(__NR_Linux + 59)
-#define __NR_kill	(__NR_Linux + 60)
-#define __NR_uname	(__NR_Linux + 61)
-#define __NR_semget	(__NR_Linux + 62)
-#define __NR_semop	(__NR_Linux + 63)
-#define __NR_semctl	(__NR_Linux + 64)
-#define __NR_shmdt	(__NR_Linux + 65)
-#define __NR_msgget	(__NR_Linux + 66)
-#define __NR_msgsnd	(__NR_Linux + 67)
-#define __NR_msgrcv	(__NR_Linux + 68)
-#define __NR_msgctl	(__NR_Linux + 69)
-#define __NR_fcntl	(__NR_Linux + 70)
-#define __NR_flock	(__NR_Linux + 71)
-#define __NR_fsync	(__NR_Linux + 72)
-#define __NR_fdatasync	(__NR_Linux + 73)
-#define __NR_truncate	(__NR_Linux + 74)
-#define __NR_ftruncate	(__NR_Linux + 75)
-#define __NR_getdents	(__NR_Linux + 76)
-#define __NR_getcwd	(__NR_Linux + 77)
-#define __NR_chdir	(__NR_Linux + 78)
-#define __NR_fchdir	(__NR_Linux + 79)
-#define __NR_rename	(__NR_Linux + 80)
-#define __NR_mkdir	(__NR_Linux + 81)
-#define __NR_rmdir	(__NR_Linux + 82)
-#define __NR_creat	(__NR_Linux + 83)
-#define __NR_link	(__NR_Linux + 84)
-#define __NR_unlink	(__NR_Linux + 85)
-#define __NR_symlink	(__NR_Linux + 86)
-#define __NR_readlink	(__NR_Linux + 87)
-#define __NR_chmod	(__NR_Linux + 88)
-#define __NR_fchmod	(__NR_Linux + 89)
-#define __NR_chown	(__NR_Linux + 90)
-#define __NR_fchown	(__NR_Linux + 91)
-#define __NR_lchown	(__NR_Linux + 92)
-#define __NR_umask	(__NR_Linux + 93)
-#define __NR_gettimeofday	(__NR_Linux + 94)
-#define __NR_getrlimit	(__NR_Linux + 95)
-#define __NR_getrusage	(__NR_Linux + 96)
-#define __NR_sysinfo	(__NR_Linux + 97)
-#define __NR_times	(__NR_Linux + 98)
-#define __NR_ptrace	(__NR_Linux + 99)
-#define __NR_getuid	(__NR_Linux + 100)
-#define __NR_syslog	(__NR_Linux + 101)
-#define __NR_getgid	(__NR_Linux + 102)
-#define __NR_setuid	(__NR_Linux + 103)
-#define __NR_setgid	(__NR_Linux + 104)
-#define __NR_geteuid	(__NR_Linux + 105)
-#define __NR_getegid	(__NR_Linux + 106)
-#define __NR_setpgid	(__NR_Linux + 107)
-#define __NR_getppid	(__NR_Linux + 108)
-#define __NR_getpgrp	(__NR_Linux + 109)
-#define __NR_setsid	(__NR_Linux + 110)
-#define __NR_setreuid	(__NR_Linux + 111)
-#define __NR_setregid	(__NR_Linux + 112)
-#define __NR_getgroups	(__NR_Linux + 113)
-#define __NR_setgroups	(__NR_Linux + 114)
-#define __NR_setresuid	(__NR_Linux + 115)
-#define __NR_getresuid	(__NR_Linux + 116)
-#define __NR_setresgid	(__NR_Linux + 117)
-#define __NR_getresgid	(__NR_Linux + 118)
-#define __NR_getpgid	(__NR_Linux + 119)
-#define __NR_setfsuid	(__NR_Linux + 120)
-#define __NR_setfsgid	(__NR_Linux + 121)
-#define __NR_getsid	(__NR_Linux + 122)
-#define __NR_capget	(__NR_Linux + 123)
-#define __NR_capset	(__NR_Linux + 124)
-#define __NR_rt_sigpending	(__NR_Linux + 125)
-#define __NR_rt_sigtimedwait	(__NR_Linux + 126)
-#define __NR_rt_sigqueueinfo	(__NR_Linux + 127)
-#define __NR_rt_sigsuspend	(__NR_Linux + 128)
-#define __NR_sigaltstack	(__NR_Linux + 129)
-#define __NR_utime	(__NR_Linux + 130)
-#define __NR_mknod	(__NR_Linux + 131)
-#define __NR_personality	(__NR_Linux + 132)
-#define __NR_ustat	(__NR_Linux + 133)
-#define __NR_statfs	(__NR_Linux + 134)
-#define __NR_fstatfs	(__NR_Linux + 135)
-#define __NR_sysfs	(__NR_Linux + 136)
-#define __NR_getpriority	(__NR_Linux + 137)
-#define __NR_setpriority	(__NR_Linux + 138)
-#define __NR_sched_setparam	(__NR_Linux + 139)
-#define __NR_sched_getparam	(__NR_Linux + 140)
-#define __NR_sched_setscheduler	(__NR_Linux + 141)
-#define __NR_sched_getscheduler	(__NR_Linux + 142)
-#define __NR_sched_get_priority_max	(__NR_Linux + 143)
-#define __NR_sched_get_priority_min	(__NR_Linux + 144)
-#define __NR_sched_rr_get_interval	(__NR_Linux + 145)
-#define __NR_mlock	(__NR_Linux + 146)
-#define __NR_munlock	(__NR_Linux + 147)
-#define __NR_mlockall	(__NR_Linux + 148)
-#define __NR_munlockall	(__NR_Linux + 149)
-#define __NR_vhangup	(__NR_Linux + 150)
-#define __NR_pivot_root	(__NR_Linux + 151)
-#define __NR__sysctl	(__NR_Linux + 152)
-#define __NR_prctl	(__NR_Linux + 153)
-#define __NR_adjtimex	(__NR_Linux + 154)
-#define __NR_setrlimit	(__NR_Linux + 155)
-#define __NR_chroot	(__NR_Linux + 156)
-#define __NR_sync	(__NR_Linux + 157)
-#define __NR_acct	(__NR_Linux + 158)
-#define __NR_settimeofday	(__NR_Linux + 159)
-#define __NR_mount	(__NR_Linux + 160)
-#define __NR_umount2	(__NR_Linux + 161)
-#define __NR_swapon	(__NR_Linux + 162)
-#define __NR_swapoff	(__NR_Linux + 163)
-#define __NR_reboot	(__NR_Linux + 164)
-#define __NR_sethostname	(__NR_Linux + 165)
-#define __NR_setdomainname	(__NR_Linux + 166)
-#define __NR_create_module	(__NR_Linux + 167)
-#define __NR_init_module	(__NR_Linux + 168)
-#define __NR_delete_module	(__NR_Linux + 169)
-#define __NR_get_kernel_syms	(__NR_Linux + 170)
-#define __NR_query_module	(__NR_Linux + 171)
-#define __NR_quotactl	(__NR_Linux + 172)
-#define __NR_nfsservctl	(__NR_Linux + 173)
-#define __NR_getpmsg	(__NR_Linux + 174)
-#define __NR_putpmsg	(__NR_Linux + 175)
-#define __NR_afs_syscall	(__NR_Linux + 176)
-#define __NR_reserved177	(__NR_Linux + 177)
-#define __NR_gettid	(__NR_Linux + 178)
-#define __NR_readahead	(__NR_Linux + 179)
-#define __NR_setxattr	(__NR_Linux + 180)
-#define __NR_lsetxattr	(__NR_Linux + 181)
-#define __NR_fsetxattr	(__NR_Linux + 182)
-#define __NR_getxattr	(__NR_Linux + 183)
-#define __NR_lgetxattr	(__NR_Linux + 184)
-#define __NR_fgetxattr	(__NR_Linux + 185)
-#define __NR_listxattr	(__NR_Linux + 186)
-#define __NR_llistxattr	(__NR_Linux + 187)
-#define __NR_flistxattr	(__NR_Linux + 188)
-#define __NR_removexattr	(__NR_Linux + 189)
-#define __NR_lremovexattr	(__NR_Linux + 190)
-#define __NR_fremovexattr	(__NR_Linux + 191)
-#define __NR_tkill	(__NR_Linux + 192)
-#define __NR_reserved193	(__NR_Linux + 193)
-#define __NR_futex	(__NR_Linux + 194)
-#define __NR_sched_setaffinity	(__NR_Linux + 195)
-#define __NR_sched_getaffinity	(__NR_Linux + 196)
-#define __NR_cacheflush	(__NR_Linux + 197)
-#define __NR_cachectl	(__NR_Linux + 198)
-#define __NR_sysmips	(__NR_Linux + 199)
-#define __NR_io_setup	(__NR_Linux + 200)
-#define __NR_io_destroy	(__NR_Linux + 201)
-#define __NR_io_getevents	(__NR_Linux + 202)
-#define __NR_io_submit	(__NR_Linux + 203)
-#define __NR_io_cancel	(__NR_Linux + 204)
-#define __NR_exit_group	(__NR_Linux + 205)
-#define __NR_lookup_dcookie	(__NR_Linux + 206)
-#define __NR_epoll_create	(__NR_Linux + 207)
-#define __NR_epoll_ctl	(__NR_Linux + 208)
-#define __NR_epoll_wait	(__NR_Linux + 209)
-#define __NR_remap_file_pages	(__NR_Linux + 210)
-#define __NR_rt_sigreturn	(__NR_Linux + 211)
-#define __NR_set_tid_address	(__NR_Linux + 212)
-#define __NR_restart_syscall	(__NR_Linux + 213)
-#define __NR_semtimedop	(__NR_Linux + 214)
-#define __NR_fadvise64	(__NR_Linux + 215)
-#define __NR_timer_create	(__NR_Linux + 216)
-#define __NR_timer_settime	(__NR_Linux + 217)
-#define __NR_timer_gettime	(__NR_Linux + 218)
-#define __NR_timer_getoverrun	(__NR_Linux + 219)
-#define __NR_timer_delete	(__NR_Linux + 220)
-#define __NR_clock_settime	(__NR_Linux + 221)
-#define __NR_clock_gettime	(__NR_Linux + 222)
-#define __NR_clock_getres	(__NR_Linux + 223)
-#define __NR_clock_nanosleep	(__NR_Linux + 224)
-#define __NR_tgkill	(__NR_Linux + 225)
-#define __NR_utimes	(__NR_Linux + 226)
-#define __NR_mbind	(__NR_Linux + 227)
-#define __NR_get_mempolicy	(__NR_Linux + 228)
-#define __NR_set_mempolicy	(__NR_Linux + 229)
-#define __NR_mq_open	(__NR_Linux + 230)
-#define __NR_mq_unlink	(__NR_Linux + 231)
-#define __NR_mq_timedsend	(__NR_Linux + 232)
-#define __NR_mq_timedreceive	(__NR_Linux + 233)
-#define __NR_mq_notify	(__NR_Linux + 234)
-#define __NR_mq_getsetattr	(__NR_Linux + 235)
-#define __NR_vserver	(__NR_Linux + 236)
-#define __NR_waitid	(__NR_Linux + 237)
-#define __NR_add_key	(__NR_Linux + 239)
-#define __NR_request_key	(__NR_Linux + 240)
-#define __NR_keyctl	(__NR_Linux + 241)
-#define __NR_set_thread_area	(__NR_Linux + 242)
-#define __NR_inotify_init	(__NR_Linux + 243)
-#define __NR_inotify_add_watch	(__NR_Linux + 244)
-#define __NR_inotify_rm_watch	(__NR_Linux + 245)
-#define __NR_migrate_pages	(__NR_Linux + 246)
-#define __NR_openat	(__NR_Linux + 247)
-#define __NR_mkdirat	(__NR_Linux + 248)
-#define __NR_mknodat	(__NR_Linux + 249)
-#define __NR_fchownat	(__NR_Linux + 250)
-#define __NR_futimesat	(__NR_Linux + 251)
-#define __NR_newfstatat	(__NR_Linux + 252)
-#define __NR_unlinkat	(__NR_Linux + 253)
-#define __NR_renameat	(__NR_Linux + 254)
-#define __NR_linkat	(__NR_Linux + 255)
-#define __NR_symlinkat	(__NR_Linux + 256)
-#define __NR_readlinkat	(__NR_Linux + 257)
-#define __NR_fchmodat	(__NR_Linux + 258)
-#define __NR_faccessat	(__NR_Linux + 259)
-#define __NR_pselect6	(__NR_Linux + 260)
-#define __NR_ppoll	(__NR_Linux + 261)
-#define __NR_unshare	(__NR_Linux + 262)
-#define __NR_splice	(__NR_Linux + 263)
-#define __NR_sync_file_range	(__NR_Linux + 264)
-#define __NR_tee	(__NR_Linux + 265)
-#define __NR_vmsplice	(__NR_Linux + 266)
-#define __NR_move_pages	(__NR_Linux + 267)
-#define __NR_set_robust_list	(__NR_Linux + 268)
-#define __NR_get_robust_list	(__NR_Linux + 269)
-#define __NR_kexec_load	(__NR_Linux + 270)
-#define __NR_getcpu	(__NR_Linux + 271)
-#define __NR_epoll_pwait	(__NR_Linux + 272)
-#define __NR_ioprio_set	(__NR_Linux + 273)
-#define __NR_ioprio_get	(__NR_Linux + 274)
-#define __NR_utimensat	(__NR_Linux + 275)
-#define __NR_signalfd	(__NR_Linux + 276)
-#define __NR_timerfd	(__NR_Linux + 277)
-#define __NR_eventfd	(__NR_Linux + 278)
-#define __NR_fallocate	(__NR_Linux + 279)
-#define __NR_timerfd_create	(__NR_Linux + 280)
-#define __NR_timerfd_gettime	(__NR_Linux + 281)
-#define __NR_timerfd_settime	(__NR_Linux + 282)
-#define __NR_signalfd4	(__NR_Linux + 283)
-#define __NR_eventfd2	(__NR_Linux + 284)
-#define __NR_epoll_create1	(__NR_Linux + 285)
-#define __NR_dup3	(__NR_Linux + 286)
-#define __NR_pipe2	(__NR_Linux + 287)
-#define __NR_inotify_init1	(__NR_Linux + 288)
-#define __NR_preadv	(__NR_Linux + 289)
-#define __NR_pwritev	(__NR_Linux + 290)
-#define __NR_rt_tgsigqueueinfo	(__NR_Linux + 291)
-#define __NR_perf_event_open	(__NR_Linux + 292)
-#define __NR_accept4	(__NR_Linux + 293)
-#define __NR_recvmmsg	(__NR_Linux + 294)
-#define __NR_fanotify_init	(__NR_Linux + 295)
-#define __NR_fanotify_mark	(__NR_Linux + 296)
-#define __NR_prlimit64	(__NR_Linux + 297)
-#define __NR_name_to_handle_at	(__NR_Linux + 298)
-#define __NR_open_by_handle_at	(__NR_Linux + 299)
-#define __NR_clock_adjtime	(__NR_Linux + 300)
-#define __NR_syncfs	(__NR_Linux + 301)
-#define __NR_sendmmsg	(__NR_Linux + 302)
-#define __NR_setns	(__NR_Linux + 303)
-#define __NR_process_vm_readv	(__NR_Linux + 304)
-#define __NR_process_vm_writev	(__NR_Linux + 305)
-#define __NR_kcmp	(__NR_Linux + 306)
-#define __NR_finit_module	(__NR_Linux + 307)
-#define __NR_getdents64	(__NR_Linux + 308)
-#define __NR_sched_setattr	(__NR_Linux + 309)
-#define __NR_sched_getattr	(__NR_Linux + 310)
-#define __NR_renameat2	(__NR_Linux + 311)
-#define __NR_seccomp	(__NR_Linux + 312)
-#define __NR_getrandom	(__NR_Linux + 313)
-#define __NR_memfd_create	(__NR_Linux + 314)
-#define __NR_bpf	(__NR_Linux + 315)
-#define __NR_execveat	(__NR_Linux + 316)
-#define __NR_userfaultfd	(__NR_Linux + 317)
-#define __NR_membarrier	(__NR_Linux + 318)
-#define __NR_mlock2	(__NR_Linux + 319)
-#define __NR_copy_file_range	(__NR_Linux + 320)
-#define __NR_preadv2	(__NR_Linux + 321)
-#define __NR_pwritev2	(__NR_Linux + 322)
-#define __NR_pkey_mprotect	(__NR_Linux + 323)
-#define __NR_pkey_alloc	(__NR_Linux + 324)
-#define __NR_pkey_free	(__NR_Linux + 325)
-#define __NR_statx	(__NR_Linux + 326)
-#define __NR_rseq	(__NR_Linux + 327)
-#define __NR_io_pgetevents	(__NR_Linux + 328)
-#define __NR_pidfd_send_signal	(__NR_Linux + 424)
-#define __NR_io_uring_setup	(__NR_Linux + 425)
-#define __NR_io_uring_enter	(__NR_Linux + 426)
-#define __NR_io_uring_register	(__NR_Linux + 427)
-#define __NR_open_tree	(__NR_Linux + 428)
-#define __NR_move_mount	(__NR_Linux + 429)
-#define __NR_fsopen	(__NR_Linux + 430)
-#define __NR_fsconfig	(__NR_Linux + 431)
-#define __NR_fsmount	(__NR_Linux + 432)
-#define __NR_fspick	(__NR_Linux + 433)
-#define __NR_pidfd_open	(__NR_Linux + 434)
-#define __NR_clone3	(__NR_Linux + 435)
-#define __NR_close_range	(__NR_Linux + 436)
-#define __NR_openat2	(__NR_Linux + 437)
-#define __NR_pidfd_getfd	(__NR_Linux + 438)
-#define __NR_faccessat2	(__NR_Linux + 439)
-#define __NR_process_madvise	(__NR_Linux + 440)
-#define __NR_epoll_pwait2	(__NR_Linux + 441)
+#define __NR_read (__NR_Linux + 0)
+#define __NR_write (__NR_Linux + 1)
+#define __NR_open (__NR_Linux + 2)
+#define __NR_close (__NR_Linux + 3)
+#define __NR_stat (__NR_Linux + 4)
+#define __NR_fstat (__NR_Linux + 5)
+#define __NR_lstat (__NR_Linux + 6)
+#define __NR_poll (__NR_Linux + 7)
+#define __NR_lseek (__NR_Linux + 8)
+#define __NR_mmap (__NR_Linux + 9)
+#define __NR_mprotect (__NR_Linux + 10)
+#define __NR_munmap (__NR_Linux + 11)
+#define __NR_brk (__NR_Linux + 12)
+#define __NR_rt_sigaction (__NR_Linux + 13)
+#define __NR_rt_sigprocmask (__NR_Linux + 14)
+#define __NR_ioctl (__NR_Linux + 15)
+#define __NR_pread64 (__NR_Linux + 16)
+#define __NR_pwrite64 (__NR_Linux + 17)
+#define __NR_readv (__NR_Linux + 18)
+#define __NR_writev (__NR_Linux + 19)
+#define __NR_access (__NR_Linux + 20)
+#define __NR_pipe (__NR_Linux + 21)
+#define __NR__newselect (__NR_Linux + 22)
+#define __NR_sched_yield (__NR_Linux + 23)
+#define __NR_mremap (__NR_Linux + 24)
+#define __NR_msync (__NR_Linux + 25)
+#define __NR_mincore (__NR_Linux + 26)
+#define __NR_madvise (__NR_Linux + 27)
+#define __NR_shmget (__NR_Linux + 28)
+#define __NR_shmat (__NR_Linux + 29)
+#define __NR_shmctl (__NR_Linux + 30)
+#define __NR_dup (__NR_Linux + 31)
+#define __NR_dup2 (__NR_Linux + 32)
+#define __NR_pause (__NR_Linux + 33)
+#define __NR_nanosleep (__NR_Linux + 34)
+#define __NR_getitimer (__NR_Linux + 35)
+#define __NR_setitimer (__NR_Linux + 36)
+#define __NR_alarm (__NR_Linux + 37)
+#define __NR_getpid (__NR_Linux + 38)
+#define __NR_sendfile (__NR_Linux + 39)
+#define __NR_socket (__NR_Linux + 40)
+#define __NR_connect (__NR_Linux + 41)
+#define __NR_accept (__NR_Linux + 42)
+#define __NR_sendto (__NR_Linux + 43)
+#define __NR_recvfrom (__NR_Linux + 44)
+#define __NR_sendmsg (__NR_Linux + 45)
+#define __NR_recvmsg (__NR_Linux + 46)
+#define __NR_shutdown (__NR_Linux + 47)
+#define __NR_bind (__NR_Linux + 48)
+#define __NR_listen (__NR_Linux + 49)
+#define __NR_getsockname (__NR_Linux + 50)
+#define __NR_getpeername (__NR_Linux + 51)
+#define __NR_socketpair (__NR_Linux + 52)
+#define __NR_setsockopt (__NR_Linux + 53)
+#define __NR_getsockopt (__NR_Linux + 54)
+#define __NR_clone (__NR_Linux + 55)
+#define __NR_fork (__NR_Linux + 56)
+#define __NR_execve (__NR_Linux + 57)
+#define __NR_exit (__NR_Linux + 58)
+#define __NR_wait4 (__NR_Linux + 59)
+#define __NR_kill (__NR_Linux + 60)
+#define __NR_uname (__NR_Linux + 61)
+#define __NR_semget (__NR_Linux + 62)
+#define __NR_semop (__NR_Linux + 63)
+#define __NR_semctl (__NR_Linux + 64)
+#define __NR_shmdt (__NR_Linux + 65)
+#define __NR_msgget (__NR_Linux + 66)
+#define __NR_msgsnd (__NR_Linux + 67)
+#define __NR_msgrcv (__NR_Linux + 68)
+#define __NR_msgctl (__NR_Linux + 69)
+#define __NR_fcntl (__NR_Linux + 70)
+#define __NR_flock (__NR_Linux + 71)
+#define __NR_fsync (__NR_Linux + 72)
+#define __NR_fdatasync (__NR_Linux + 73)
+#define __NR_truncate (__NR_Linux + 74)
+#define __NR_ftruncate (__NR_Linux + 75)
+#define __NR_getdents (__NR_Linux + 76)
+#define __NR_getcwd (__NR_Linux + 77)
+#define __NR_chdir (__NR_Linux + 78)
+#define __NR_fchdir (__NR_Linux + 79)
+#define __NR_rename (__NR_Linux + 80)
+#define __NR_mkdir (__NR_Linux + 81)
+#define __NR_rmdir (__NR_Linux + 82)
+#define __NR_creat (__NR_Linux + 83)
+#define __NR_link (__NR_Linux + 84)
+#define __NR_unlink (__NR_Linux + 85)
+#define __NR_symlink (__NR_Linux + 86)
+#define __NR_readlink (__NR_Linux + 87)
+#define __NR_chmod (__NR_Linux + 88)
+#define __NR_fchmod (__NR_Linux + 89)
+#define __NR_chown (__NR_Linux + 90)
+#define __NR_fchown (__NR_Linux + 91)
+#define __NR_lchown (__NR_Linux + 92)
+#define __NR_umask (__NR_Linux + 93)
+#define __NR_gettimeofday (__NR_Linux + 94)
+#define __NR_getrlimit (__NR_Linux + 95)
+#define __NR_getrusage (__NR_Linux + 96)
+#define __NR_sysinfo (__NR_Linux + 97)
+#define __NR_times (__NR_Linux + 98)
+#define __NR_ptrace (__NR_Linux + 99)
+#define __NR_getuid (__NR_Linux + 100)
+#define __NR_syslog (__NR_Linux + 101)
+#define __NR_getgid (__NR_Linux + 102)
+#define __NR_setuid (__NR_Linux + 103)
+#define __NR_setgid (__NR_Linux + 104)
+#define __NR_geteuid (__NR_Linux + 105)
+#define __NR_getegid (__NR_Linux + 106)
+#define __NR_setpgid (__NR_Linux + 107)
+#define __NR_getppid (__NR_Linux + 108)
+#define __NR_getpgrp (__NR_Linux + 109)
+#define __NR_setsid (__NR_Linux + 110)
+#define __NR_setreuid (__NR_Linux + 111)
+#define __NR_setregid (__NR_Linux + 112)
+#define __NR_getgroups (__NR_Linux + 113)
+#define __NR_setgroups (__NR_Linux + 114)
+#define __NR_setresuid (__NR_Linux + 115)
+#define __NR_getresuid (__NR_Linux + 116)
+#define __NR_setresgid (__NR_Linux + 117)
+#define __NR_getresgid (__NR_Linux + 118)
+#define __NR_getpgid (__NR_Linux + 119)
+#define __NR_setfsuid (__NR_Linux + 120)
+#define __NR_setfsgid (__NR_Linux + 121)
+#define __NR_getsid (__NR_Linux + 122)
+#define __NR_capget (__NR_Linux + 123)
+#define __NR_capset (__NR_Linux + 124)
+#define __NR_rt_sigpending (__NR_Linux + 125)
+#define __NR_rt_sigtimedwait (__NR_Linux + 126)
+#define __NR_rt_sigqueueinfo (__NR_Linux + 127)
+#define __NR_rt_sigsuspend (__NR_Linux + 128)
+#define __NR_sigaltstack (__NR_Linux + 129)
+#define __NR_utime (__NR_Linux + 130)
+#define __NR_mknod (__NR_Linux + 131)
+#define __NR_personality (__NR_Linux + 132)
+#define __NR_ustat (__NR_Linux + 133)
+#define __NR_statfs (__NR_Linux + 134)
+#define __NR_fstatfs (__NR_Linux + 135)
+#define __NR_sysfs (__NR_Linux + 136)
+#define __NR_getpriority (__NR_Linux + 137)
+#define __NR_setpriority (__NR_Linux + 138)
+#define __NR_sched_setparam (__NR_Linux + 139)
+#define __NR_sched_getparam (__NR_Linux + 140)
+#define __NR_sched_setscheduler (__NR_Linux + 141)
+#define __NR_sched_getscheduler (__NR_Linux + 142)
+#define __NR_sched_get_priority_max (__NR_Linux + 143)
+#define __NR_sched_get_priority_min (__NR_Linux + 144)
+#define __NR_sched_rr_get_interval (__NR_Linux + 145)
+#define __NR_mlock (__NR_Linux + 146)
+#define __NR_munlock (__NR_Linux + 147)
+#define __NR_mlockall (__NR_Linux + 148)
+#define __NR_munlockall (__NR_Linux + 149)
+#define __NR_vhangup (__NR_Linux + 150)
+#define __NR_pivot_root (__NR_Linux + 151)
+#define __NR__sysctl (__NR_Linux + 152)
+#define __NR_prctl (__NR_Linux + 153)
+#define __NR_adjtimex (__NR_Linux + 154)
+#define __NR_setrlimit (__NR_Linux + 155)
+#define __NR_chroot (__NR_Linux + 156)
+#define __NR_sync (__NR_Linux + 157)
+#define __NR_acct (__NR_Linux + 158)
+#define __NR_settimeofday (__NR_Linux + 159)
+#define __NR_mount (__NR_Linux + 160)
+#define __NR_umount2 (__NR_Linux + 161)
+#define __NR_swapon (__NR_Linux + 162)
+#define __NR_swapoff (__NR_Linux + 163)
+#define __NR_reboot (__NR_Linux + 164)
+#define __NR_sethostname (__NR_Linux + 165)
+#define __NR_setdomainname (__NR_Linux + 166)
+#define __NR_create_module (__NR_Linux + 167)
+#define __NR_init_module (__NR_Linux + 168)
+#define __NR_delete_module (__NR_Linux + 169)
+#define __NR_get_kernel_syms (__NR_Linux + 170)
+#define __NR_query_module (__NR_Linux + 171)
+#define __NR_quotactl (__NR_Linux + 172)
+#define __NR_nfsservctl (__NR_Linux + 173)
+#define __NR_getpmsg (__NR_Linux + 174)
+#define __NR_putpmsg (__NR_Linux + 175)
+#define __NR_afs_syscall (__NR_Linux + 176)
+#define __NR_reserved177 (__NR_Linux + 177)
+#define __NR_gettid (__NR_Linux + 178)
+#define __NR_readahead (__NR_Linux + 179)
+#define __NR_setxattr (__NR_Linux + 180)
+#define __NR_lsetxattr (__NR_Linux + 181)
+#define __NR_fsetxattr (__NR_Linux + 182)
+#define __NR_getxattr (__NR_Linux + 183)
+#define __NR_lgetxattr (__NR_Linux + 184)
+#define __NR_fgetxattr (__NR_Linux + 185)
+#define __NR_listxattr (__NR_Linux + 186)
+#define __NR_llistxattr (__NR_Linux + 187)
+#define __NR_flistxattr (__NR_Linux + 188)
+#define __NR_removexattr (__NR_Linux + 189)
+#define __NR_lremovexattr (__NR_Linux + 190)
+#define __NR_fremovexattr (__NR_Linux + 191)
+#define __NR_tkill (__NR_Linux + 192)
+#define __NR_reserved193 (__NR_Linux + 193)
+#define __NR_futex (__NR_Linux + 194)
+#define __NR_sched_setaffinity (__NR_Linux + 195)
+#define __NR_sched_getaffinity (__NR_Linux + 196)
+#define __NR_cacheflush (__NR_Linux + 197)
+#define __NR_cachectl (__NR_Linux + 198)
+#define __NR_sysmips (__NR_Linux + 199)
+#define __NR_io_setup (__NR_Linux + 200)
+#define __NR_io_destroy (__NR_Linux + 201)
+#define __NR_io_getevents (__NR_Linux + 202)
+#define __NR_io_submit (__NR_Linux + 203)
+#define __NR_io_cancel (__NR_Linux + 204)
+#define __NR_exit_group (__NR_Linux + 205)
+#define __NR_lookup_dcookie (__NR_Linux + 206)
+#define __NR_epoll_create (__NR_Linux + 207)
+#define __NR_epoll_ctl (__NR_Linux + 208)
+#define __NR_epoll_wait (__NR_Linux + 209)
+#define __NR_remap_file_pages (__NR_Linux + 210)
+#define __NR_rt_sigreturn (__NR_Linux + 211)
+#define __NR_set_tid_address (__NR_Linux + 212)
+#define __NR_restart_syscall (__NR_Linux + 213)
+#define __NR_semtimedop (__NR_Linux + 214)
+#define __NR_fadvise64 (__NR_Linux + 215)
+#define __NR_timer_create (__NR_Linux + 216)
+#define __NR_timer_settime (__NR_Linux + 217)
+#define __NR_timer_gettime (__NR_Linux + 218)
+#define __NR_timer_getoverrun (__NR_Linux + 219)
+#define __NR_timer_delete (__NR_Linux + 220)
+#define __NR_clock_settime (__NR_Linux + 221)
+#define __NR_clock_gettime (__NR_Linux + 222)
+#define __NR_clock_getres (__NR_Linux + 223)
+#define __NR_clock_nanosleep (__NR_Linux + 224)
+#define __NR_tgkill (__NR_Linux + 225)
+#define __NR_utimes (__NR_Linux + 226)
+#define __NR_mbind (__NR_Linux + 227)
+#define __NR_get_mempolicy (__NR_Linux + 228)
+#define __NR_set_mempolicy (__NR_Linux + 229)
+#define __NR_mq_open (__NR_Linux + 230)
+#define __NR_mq_unlink (__NR_Linux + 231)
+#define __NR_mq_timedsend (__NR_Linux + 232)
+#define __NR_mq_timedreceive (__NR_Linux + 233)
+#define __NR_mq_notify (__NR_Linux + 234)
+#define __NR_mq_getsetattr (__NR_Linux + 235)
+#define __NR_vserver (__NR_Linux + 236)
+#define __NR_waitid (__NR_Linux + 237)
+#define __NR_add_key (__NR_Linux + 239)
+#define __NR_request_key (__NR_Linux + 240)
+#define __NR_keyctl (__NR_Linux + 241)
+#define __NR_set_thread_area (__NR_Linux + 242)
+#define __NR_inotify_init (__NR_Linux + 243)
+#define __NR_inotify_add_watch (__NR_Linux + 244)
+#define __NR_inotify_rm_watch (__NR_Linux + 245)
+#define __NR_migrate_pages (__NR_Linux + 246)
+#define __NR_openat (__NR_Linux + 247)
+#define __NR_mkdirat (__NR_Linux + 248)
+#define __NR_mknodat (__NR_Linux + 249)
+#define __NR_fchownat (__NR_Linux + 250)
+#define __NR_futimesat (__NR_Linux + 251)
+#define __NR_newfstatat (__NR_Linux + 252)
+#define __NR_unlinkat (__NR_Linux + 253)
+#define __NR_renameat (__NR_Linux + 254)
+#define __NR_linkat (__NR_Linux + 255)
+#define __NR_symlinkat (__NR_Linux + 256)
+#define __NR_readlinkat (__NR_Linux + 257)
+#define __NR_fchmodat (__NR_Linux + 258)
+#define __NR_faccessat (__NR_Linux + 259)
+#define __NR_pselect6 (__NR_Linux + 260)
+#define __NR_ppoll (__NR_Linux + 261)
+#define __NR_unshare (__NR_Linux + 262)
+#define __NR_splice (__NR_Linux + 263)
+#define __NR_sync_file_range (__NR_Linux + 264)
+#define __NR_tee (__NR_Linux + 265)
+#define __NR_vmsplice (__NR_Linux + 266)
+#define __NR_move_pages (__NR_Linux + 267)
+#define __NR_set_robust_list (__NR_Linux + 268)
+#define __NR_get_robust_list (__NR_Linux + 269)
+#define __NR_kexec_load (__NR_Linux + 270)
+#define __NR_getcpu (__NR_Linux + 271)
+#define __NR_epoll_pwait (__NR_Linux + 272)
+#define __NR_ioprio_set (__NR_Linux + 273)
+#define __NR_ioprio_get (__NR_Linux + 274)
+#define __NR_utimensat (__NR_Linux + 275)
+#define __NR_signalfd (__NR_Linux + 276)
+#define __NR_timerfd (__NR_Linux + 277)
+#define __NR_eventfd (__NR_Linux + 278)
+#define __NR_fallocate (__NR_Linux + 279)
+#define __NR_timerfd_create (__NR_Linux + 280)
+#define __NR_timerfd_gettime (__NR_Linux + 281)
+#define __NR_timerfd_settime (__NR_Linux + 282)
+#define __NR_signalfd4 (__NR_Linux + 283)
+#define __NR_eventfd2 (__NR_Linux + 284)
+#define __NR_epoll_create1 (__NR_Linux + 285)
+#define __NR_dup3 (__NR_Linux + 286)
+#define __NR_pipe2 (__NR_Linux + 287)
+#define __NR_inotify_init1 (__NR_Linux + 288)
+#define __NR_preadv (__NR_Linux + 289)
+#define __NR_pwritev (__NR_Linux + 290)
+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291)
+#define __NR_perf_event_open (__NR_Linux + 292)
+#define __NR_accept4 (__NR_Linux + 293)
+#define __NR_recvmmsg (__NR_Linux + 294)
+#define __NR_fanotify_init (__NR_Linux + 295)
+#define __NR_fanotify_mark (__NR_Linux + 296)
+#define __NR_prlimit64 (__NR_Linux + 297)
+#define __NR_name_to_handle_at (__NR_Linux + 298)
+#define __NR_open_by_handle_at (__NR_Linux + 299)
+#define __NR_clock_adjtime (__NR_Linux + 300)
+#define __NR_syncfs (__NR_Linux + 301)
+#define __NR_sendmmsg (__NR_Linux + 302)
+#define __NR_setns (__NR_Linux + 303)
+#define __NR_process_vm_readv (__NR_Linux + 304)
+#define __NR_process_vm_writev (__NR_Linux + 305)
+#define __NR_kcmp (__NR_Linux + 306)
+#define __NR_finit_module (__NR_Linux + 307)
+#define __NR_getdents64 (__NR_Linux + 308)
+#define __NR_sched_setattr (__NR_Linux + 309)
+#define __NR_sched_getattr (__NR_Linux + 310)
+#define __NR_renameat2 (__NR_Linux + 311)
+#define __NR_seccomp (__NR_Linux + 312)
+#define __NR_getrandom (__NR_Linux + 313)
+#define __NR_memfd_create (__NR_Linux + 314)
+#define __NR_bpf (__NR_Linux + 315)
+#define __NR_execveat (__NR_Linux + 316)
+#define __NR_userfaultfd (__NR_Linux + 317)
+#define __NR_membarrier (__NR_Linux + 318)
+#define __NR_mlock2 (__NR_Linux + 319)
+#define __NR_copy_file_range (__NR_Linux + 320)
+#define __NR_preadv2 (__NR_Linux + 321)
+#define __NR_pwritev2 (__NR_Linux + 322)
+#define __NR_pkey_mprotect (__NR_Linux + 323)
+#define __NR_pkey_alloc (__NR_Linux + 324)
+#define __NR_pkey_free (__NR_Linux + 325)
+#define __NR_statx (__NR_Linux + 326)
+#define __NR_rseq (__NR_Linux + 327)
+#define __NR_io_pgetevents (__NR_Linux + 328)
+#define __NR_pidfd_send_signal (__NR_Linux + 424)
+#define __NR_io_uring_setup (__NR_Linux + 425)
+#define __NR_io_uring_enter (__NR_Linux + 426)
+#define __NR_io_uring_register (__NR_Linux + 427)
+#define __NR_open_tree (__NR_Linux + 428)
+#define __NR_move_mount (__NR_Linux + 429)
+#define __NR_fsopen (__NR_Linux + 430)
+#define __NR_fsconfig (__NR_Linux + 431)
+#define __NR_fsmount (__NR_Linux + 432)
+#define __NR_fspick (__NR_Linux + 433)
+#define __NR_pidfd_open (__NR_Linux + 434)
+#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_close_range (__NR_Linux + 436)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
+#define __NR_faccessat2 (__NR_Linux + 439)
+#define __NR_process_madvise (__NR_Linux + 440)
+#define __NR_epoll_pwait2 (__NR_Linux + 441)
+#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
+#define __NR_landlock_create_ruleset (__NR_Linux + 444)
+#define __NR_landlock_add_rule (__NR_Linux + 445)
+#define __NR_landlock_restrict_self (__NR_Linux + 446)
 
-
-#endif /* _ASM_MIPS_UNISTD_N64_H */
+#endif /* _ASM_UNISTD_N64_H */
diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
index ca6a7e5c0b9..06a2b3b55e6 100644
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -1,422 +1,426 @@
-#ifndef _ASM_MIPS_UNISTD_O32_H
-#define _ASM_MIPS_UNISTD_O32_H
+#ifndef _ASM_UNISTD_O32_H
+#define _ASM_UNISTD_O32_H
 
-#define __NR_syscall	(__NR_Linux + 0)
-#define __NR_exit	(__NR_Linux + 1)
-#define __NR_fork	(__NR_Linux + 2)
-#define __NR_read	(__NR_Linux + 3)
-#define __NR_write	(__NR_Linux + 4)
-#define __NR_open	(__NR_Linux + 5)
-#define __NR_close	(__NR_Linux + 6)
-#define __NR_waitpid	(__NR_Linux + 7)
-#define __NR_creat	(__NR_Linux + 8)
-#define __NR_link	(__NR_Linux + 9)
-#define __NR_unlink	(__NR_Linux + 10)
-#define __NR_execve	(__NR_Linux + 11)
-#define __NR_chdir	(__NR_Linux + 12)
-#define __NR_time	(__NR_Linux + 13)
-#define __NR_mknod	(__NR_Linux + 14)
-#define __NR_chmod	(__NR_Linux + 15)
-#define __NR_lchown	(__NR_Linux + 16)
-#define __NR_break	(__NR_Linux + 17)
-#define __NR_unused18	(__NR_Linux + 18)
-#define __NR_lseek	(__NR_Linux + 19)
-#define __NR_getpid	(__NR_Linux + 20)
-#define __NR_mount	(__NR_Linux + 21)
-#define __NR_umount	(__NR_Linux + 22)
-#define __NR_setuid	(__NR_Linux + 23)
-#define __NR_getuid	(__NR_Linux + 24)
-#define __NR_stime	(__NR_Linux + 25)
-#define __NR_ptrace	(__NR_Linux + 26)
-#define __NR_alarm	(__NR_Linux + 27)
-#define __NR_unused28	(__NR_Linux + 28)
-#define __NR_pause	(__NR_Linux + 29)
-#define __NR_utime	(__NR_Linux + 30)
-#define __NR_stty	(__NR_Linux + 31)
-#define __NR_gtty	(__NR_Linux + 32)
-#define __NR_access	(__NR_Linux + 33)
-#define __NR_nice	(__NR_Linux + 34)
-#define __NR_ftime	(__NR_Linux + 35)
-#define __NR_sync	(__NR_Linux + 36)
-#define __NR_kill	(__NR_Linux + 37)
-#define __NR_rename	(__NR_Linux + 38)
-#define __NR_mkdir	(__NR_Linux + 39)
-#define __NR_rmdir	(__NR_Linux + 40)
-#define __NR_dup	(__NR_Linux + 41)
-#define __NR_pipe	(__NR_Linux + 42)
-#define __NR_times	(__NR_Linux + 43)
-#define __NR_prof	(__NR_Linux + 44)
-#define __NR_brk	(__NR_Linux + 45)
-#define __NR_setgid	(__NR_Linux + 46)
-#define __NR_getgid	(__NR_Linux + 47)
-#define __NR_signal	(__NR_Linux + 48)
-#define __NR_geteuid	(__NR_Linux + 49)
-#define __NR_getegid	(__NR_Linux + 50)
-#define __NR_acct	(__NR_Linux + 51)
-#define __NR_umount2	(__NR_Linux + 52)
-#define __NR_lock	(__NR_Linux + 53)
-#define __NR_ioctl	(__NR_Linux + 54)
-#define __NR_fcntl	(__NR_Linux + 55)
-#define __NR_mpx	(__NR_Linux + 56)
-#define __NR_setpgid	(__NR_Linux + 57)
-#define __NR_ulimit	(__NR_Linux + 58)
-#define __NR_unused59	(__NR_Linux + 59)
-#define __NR_umask	(__NR_Linux + 60)
-#define __NR_chroot	(__NR_Linux + 61)
-#define __NR_ustat	(__NR_Linux + 62)
-#define __NR_dup2	(__NR_Linux + 63)
-#define __NR_getppid	(__NR_Linux + 64)
-#define __NR_getpgrp	(__NR_Linux + 65)
-#define __NR_setsid	(__NR_Linux + 66)
-#define __NR_sigaction	(__NR_Linux + 67)
-#define __NR_sgetmask	(__NR_Linux + 68)
-#define __NR_ssetmask	(__NR_Linux + 69)
-#define __NR_setreuid	(__NR_Linux + 70)
-#define __NR_setregid	(__NR_Linux + 71)
-#define __NR_sigsuspend	(__NR_Linux + 72)
-#define __NR_sigpending	(__NR_Linux + 73)
-#define __NR_sethostname	(__NR_Linux + 74)
-#define __NR_setrlimit	(__NR_Linux + 75)
-#define __NR_getrlimit	(__NR_Linux + 76)
-#define __NR_getrusage	(__NR_Linux + 77)
-#define __NR_gettimeofday	(__NR_Linux + 78)
-#define __NR_settimeofday	(__NR_Linux + 79)
-#define __NR_getgroups	(__NR_Linux + 80)
-#define __NR_setgroups	(__NR_Linux + 81)
-#define __NR_reserved82	(__NR_Linux + 82)
-#define __NR_symlink	(__NR_Linux + 83)
-#define __NR_unused84	(__NR_Linux + 84)
-#define __NR_readlink	(__NR_Linux + 85)
-#define __NR_uselib	(__NR_Linux + 86)
-#define __NR_swapon	(__NR_Linux + 87)
-#define __NR_reboot	(__NR_Linux + 88)
-#define __NR_readdir	(__NR_Linux + 89)
-#define __NR_mmap	(__NR_Linux + 90)
-#define __NR_munmap	(__NR_Linux + 91)
-#define __NR_truncate	(__NR_Linux + 92)
-#define __NR_ftruncate	(__NR_Linux + 93)
-#define __NR_fchmod	(__NR_Linux + 94)
-#define __NR_fchown	(__NR_Linux + 95)
-#define __NR_getpriority	(__NR_Linux + 96)
-#define __NR_setpriority	(__NR_Linux + 97)
-#define __NR_profil	(__NR_Linux + 98)
-#define __NR_statfs	(__NR_Linux + 99)
-#define __NR_fstatfs	(__NR_Linux + 100)
-#define __NR_ioperm	(__NR_Linux + 101)
-#define __NR_socketcall	(__NR_Linux + 102)
-#define __NR_syslog	(__NR_Linux + 103)
-#define __NR_setitimer	(__NR_Linux + 104)
-#define __NR_getitimer	(__NR_Linux + 105)
-#define __NR_stat	(__NR_Linux + 106)
-#define __NR_lstat	(__NR_Linux + 107)
-#define __NR_fstat	(__NR_Linux + 108)
-#define __NR_unused109	(__NR_Linux + 109)
-#define __NR_iopl	(__NR_Linux + 110)
-#define __NR_vhangup	(__NR_Linux + 111)
-#define __NR_idle	(__NR_Linux + 112)
-#define __NR_vm86	(__NR_Linux + 113)
-#define __NR_wait4	(__NR_Linux + 114)
-#define __NR_swapoff	(__NR_Linux + 115)
-#define __NR_sysinfo	(__NR_Linux + 116)
-#define __NR_ipc	(__NR_Linux + 117)
-#define __NR_fsync	(__NR_Linux + 118)
-#define __NR_sigreturn	(__NR_Linux + 119)
-#define __NR_clone	(__NR_Linux + 120)
-#define __NR_setdomainname	(__NR_Linux + 121)
-#define __NR_uname	(__NR_Linux + 122)
-#define __NR_modify_ldt	(__NR_Linux + 123)
-#define __NR_adjtimex	(__NR_Linux + 124)
-#define __NR_mprotect	(__NR_Linux + 125)
-#define __NR_sigprocmask	(__NR_Linux + 126)
-#define __NR_create_module	(__NR_Linux + 127)
-#define __NR_init_module	(__NR_Linux + 128)
-#define __NR_delete_module	(__NR_Linux + 129)
-#define __NR_get_kernel_syms	(__NR_Linux + 130)
-#define __NR_quotactl	(__NR_Linux + 131)
-#define __NR_getpgid	(__NR_Linux + 132)
-#define __NR_fchdir	(__NR_Linux + 133)
-#define __NR_bdflush	(__NR_Linux + 134)
-#define __NR_sysfs	(__NR_Linux + 135)
-#define __NR_personality	(__NR_Linux + 136)
-#define __NR_afs_syscall	(__NR_Linux + 137)
-#define __NR_setfsuid	(__NR_Linux + 138)
-#define __NR_setfsgid	(__NR_Linux + 139)
-#define __NR__llseek	(__NR_Linux + 140)
-#define __NR_getdents	(__NR_Linux + 141)
-#define __NR__newselect	(__NR_Linux + 142)
-#define __NR_flock	(__NR_Linux + 143)
-#define __NR_msync	(__NR_Linux + 144)
-#define __NR_readv	(__NR_Linux + 145)
-#define __NR_writev	(__NR_Linux + 146)
-#define __NR_cacheflush	(__NR_Linux + 147)
-#define __NR_cachectl	(__NR_Linux + 148)
-#define __NR_sysmips	(__NR_Linux + 149)
-#define __NR_unused150	(__NR_Linux + 150)
-#define __NR_getsid	(__NR_Linux + 151)
-#define __NR_fdatasync	(__NR_Linux + 152)
-#define __NR__sysctl	(__NR_Linux + 153)
-#define __NR_mlock	(__NR_Linux + 154)
-#define __NR_munlock	(__NR_Linux + 155)
-#define __NR_mlockall	(__NR_Linux + 156)
-#define __NR_munlockall	(__NR_Linux + 157)
-#define __NR_sched_setparam	(__NR_Linux + 158)
-#define __NR_sched_getparam	(__NR_Linux + 159)
-#define __NR_sched_setscheduler	(__NR_Linux + 160)
-#define __NR_sched_getscheduler	(__NR_Linux + 161)
-#define __NR_sched_yield	(__NR_Linux + 162)
-#define __NR_sched_get_priority_max	(__NR_Linux + 163)
-#define __NR_sched_get_priority_min	(__NR_Linux + 164)
-#define __NR_sched_rr_get_interval	(__NR_Linux + 165)
-#define __NR_nanosleep	(__NR_Linux + 166)
-#define __NR_mremap	(__NR_Linux + 167)
-#define __NR_accept	(__NR_Linux + 168)
-#define __NR_bind	(__NR_Linux + 169)
-#define __NR_connect	(__NR_Linux + 170)
-#define __NR_getpeername	(__NR_Linux + 171)
-#define __NR_getsockname	(__NR_Linux + 172)
-#define __NR_getsockopt	(__NR_Linux + 173)
-#define __NR_listen	(__NR_Linux + 174)
-#define __NR_recv	(__NR_Linux + 175)
-#define __NR_recvfrom	(__NR_Linux + 176)
-#define __NR_recvmsg	(__NR_Linux + 177)
-#define __NR_send	(__NR_Linux + 178)
-#define __NR_sendmsg	(__NR_Linux + 179)
-#define __NR_sendto	(__NR_Linux + 180)
-#define __NR_setsockopt	(__NR_Linux + 181)
-#define __NR_shutdown	(__NR_Linux + 182)
-#define __NR_socket	(__NR_Linux + 183)
-#define __NR_socketpair	(__NR_Linux + 184)
-#define __NR_setresuid	(__NR_Linux + 185)
-#define __NR_getresuid	(__NR_Linux + 186)
-#define __NR_query_module	(__NR_Linux + 187)
-#define __NR_poll	(__NR_Linux + 188)
-#define __NR_nfsservctl	(__NR_Linux + 189)
-#define __NR_setresgid	(__NR_Linux + 190)
-#define __NR_getresgid	(__NR_Linux + 191)
-#define __NR_prctl	(__NR_Linux + 192)
-#define __NR_rt_sigreturn	(__NR_Linux + 193)
-#define __NR_rt_sigaction	(__NR_Linux + 194)
-#define __NR_rt_sigprocmask	(__NR_Linux + 195)
-#define __NR_rt_sigpending	(__NR_Linux + 196)
-#define __NR_rt_sigtimedwait	(__NR_Linux + 197)
-#define __NR_rt_sigqueueinfo	(__NR_Linux + 198)
-#define __NR_rt_sigsuspend	(__NR_Linux + 199)
-#define __NR_pread64	(__NR_Linux + 200)
-#define __NR_pwrite64	(__NR_Linux + 201)
-#define __NR_chown	(__NR_Linux + 202)
-#define __NR_getcwd	(__NR_Linux + 203)
-#define __NR_capget	(__NR_Linux + 204)
-#define __NR_capset	(__NR_Linux + 205)
-#define __NR_sigaltstack	(__NR_Linux + 206)
-#define __NR_sendfile	(__NR_Linux + 207)
-#define __NR_getpmsg	(__NR_Linux + 208)
-#define __NR_putpmsg	(__NR_Linux + 209)
-#define __NR_mmap2	(__NR_Linux + 210)
-#define __NR_truncate64	(__NR_Linux + 211)
-#define __NR_ftruncate64	(__NR_Linux + 212)
-#define __NR_stat64	(__NR_Linux + 213)
-#define __NR_lstat64	(__NR_Linux + 214)
-#define __NR_fstat64	(__NR_Linux + 215)
-#define __NR_pivot_root	(__NR_Linux + 216)
-#define __NR_mincore	(__NR_Linux + 217)
-#define __NR_madvise	(__NR_Linux + 218)
-#define __NR_getdents64	(__NR_Linux + 219)
-#define __NR_fcntl64	(__NR_Linux + 220)
-#define __NR_reserved221	(__NR_Linux + 221)
-#define __NR_gettid	(__NR_Linux + 222)
-#define __NR_readahead	(__NR_Linux + 223)
-#define __NR_setxattr	(__NR_Linux + 224)
-#define __NR_lsetxattr	(__NR_Linux + 225)
-#define __NR_fsetxattr	(__NR_Linux + 226)
-#define __NR_getxattr	(__NR_Linux + 227)
-#define __NR_lgetxattr	(__NR_Linux + 228)
-#define __NR_fgetxattr	(__NR_Linux + 229)
-#define __NR_listxattr	(__NR_Linux + 230)
-#define __NR_llistxattr	(__NR_Linux + 231)
-#define __NR_flistxattr	(__NR_Linux + 232)
-#define __NR_removexattr	(__NR_Linux + 233)
-#define __NR_lremovexattr	(__NR_Linux + 234)
-#define __NR_fremovexattr	(__NR_Linux + 235)
-#define __NR_tkill	(__NR_Linux + 236)
-#define __NR_sendfile64	(__NR_Linux + 237)
-#define __NR_futex	(__NR_Linux + 238)
-#define __NR_sched_setaffinity	(__NR_Linux + 239)
-#define __NR_sched_getaffinity	(__NR_Linux + 240)
-#define __NR_io_setup	(__NR_Linux + 241)
-#define __NR_io_destroy	(__NR_Linux + 242)
-#define __NR_io_getevents	(__NR_Linux + 243)
-#define __NR_io_submit	(__NR_Linux + 244)
-#define __NR_io_cancel	(__NR_Linux + 245)
-#define __NR_exit_group	(__NR_Linux + 246)
-#define __NR_lookup_dcookie	(__NR_Linux + 247)
-#define __NR_epoll_create	(__NR_Linux + 248)
-#define __NR_epoll_ctl	(__NR_Linux + 249)
-#define __NR_epoll_wait	(__NR_Linux + 250)
-#define __NR_remap_file_pages	(__NR_Linux + 251)
-#define __NR_set_tid_address	(__NR_Linux + 252)
-#define __NR_restart_syscall	(__NR_Linux + 253)
-#define __NR_fadvise64	(__NR_Linux + 254)
-#define __NR_statfs64	(__NR_Linux + 255)
-#define __NR_fstatfs64	(__NR_Linux + 256)
-#define __NR_timer_create	(__NR_Linux + 257)
-#define __NR_timer_settime	(__NR_Linux + 258)
-#define __NR_timer_gettime	(__NR_Linux + 259)
-#define __NR_timer_getoverrun	(__NR_Linux + 260)
-#define __NR_timer_delete	(__NR_Linux + 261)
-#define __NR_clock_settime	(__NR_Linux + 262)
-#define __NR_clock_gettime	(__NR_Linux + 263)
-#define __NR_clock_getres	(__NR_Linux + 264)
-#define __NR_clock_nanosleep	(__NR_Linux + 265)
-#define __NR_tgkill	(__NR_Linux + 266)
-#define __NR_utimes	(__NR_Linux + 267)
-#define __NR_mbind	(__NR_Linux + 268)
-#define __NR_get_mempolicy	(__NR_Linux + 269)
-#define __NR_set_mempolicy	(__NR_Linux + 270)
-#define __NR_mq_open	(__NR_Linux + 271)
-#define __NR_mq_unlink	(__NR_Linux + 272)
-#define __NR_mq_timedsend	(__NR_Linux + 273)
-#define __NR_mq_timedreceive	(__NR_Linux + 274)
-#define __NR_mq_notify	(__NR_Linux + 275)
-#define __NR_mq_getsetattr	(__NR_Linux + 276)
-#define __NR_vserver	(__NR_Linux + 277)
-#define __NR_waitid	(__NR_Linux + 278)
-#define __NR_add_key	(__NR_Linux + 280)
-#define __NR_request_key	(__NR_Linux + 281)
-#define __NR_keyctl	(__NR_Linux + 282)
-#define __NR_set_thread_area	(__NR_Linux + 283)
-#define __NR_inotify_init	(__NR_Linux + 284)
-#define __NR_inotify_add_watch	(__NR_Linux + 285)
-#define __NR_inotify_rm_watch	(__NR_Linux + 286)
-#define __NR_migrate_pages	(__NR_Linux + 287)
-#define __NR_openat	(__NR_Linux + 288)
-#define __NR_mkdirat	(__NR_Linux + 289)
-#define __NR_mknodat	(__NR_Linux + 290)
-#define __NR_fchownat	(__NR_Linux + 291)
-#define __NR_futimesat	(__NR_Linux + 292)
-#define __NR_fstatat64	(__NR_Linux + 293)
-#define __NR_unlinkat	(__NR_Linux + 294)
-#define __NR_renameat	(__NR_Linux + 295)
-#define __NR_linkat	(__NR_Linux + 296)
-#define __NR_symlinkat	(__NR_Linux + 297)
-#define __NR_readlinkat	(__NR_Linux + 298)
-#define __NR_fchmodat	(__NR_Linux + 299)
-#define __NR_faccessat	(__NR_Linux + 300)
-#define __NR_pselect6	(__NR_Linux + 301)
-#define __NR_ppoll	(__NR_Linux + 302)
-#define __NR_unshare	(__NR_Linux + 303)
-#define __NR_splice	(__NR_Linux + 304)
-#define __NR_sync_file_range	(__NR_Linux + 305)
-#define __NR_tee	(__NR_Linux + 306)
-#define __NR_vmsplice	(__NR_Linux + 307)
-#define __NR_move_pages	(__NR_Linux + 308)
-#define __NR_set_robust_list	(__NR_Linux + 309)
-#define __NR_get_robust_list	(__NR_Linux + 310)
-#define __NR_kexec_load	(__NR_Linux + 311)
-#define __NR_getcpu	(__NR_Linux + 312)
-#define __NR_epoll_pwait	(__NR_Linux + 313)
-#define __NR_ioprio_set	(__NR_Linux + 314)
-#define __NR_ioprio_get	(__NR_Linux + 315)
-#define __NR_utimensat	(__NR_Linux + 316)
-#define __NR_signalfd	(__NR_Linux + 317)
-#define __NR_timerfd	(__NR_Linux + 318)
-#define __NR_eventfd	(__NR_Linux + 319)
-#define __NR_fallocate	(__NR_Linux + 320)
-#define __NR_timerfd_create	(__NR_Linux + 321)
-#define __NR_timerfd_gettime	(__NR_Linux + 322)
-#define __NR_timerfd_settime	(__NR_Linux + 323)
-#define __NR_signalfd4	(__NR_Linux + 324)
-#define __NR_eventfd2	(__NR_Linux + 325)
-#define __NR_epoll_create1	(__NR_Linux + 326)
-#define __NR_dup3	(__NR_Linux + 327)
-#define __NR_pipe2	(__NR_Linux + 328)
-#define __NR_inotify_init1	(__NR_Linux + 329)
-#define __NR_preadv	(__NR_Linux + 330)
-#define __NR_pwritev	(__NR_Linux + 331)
-#define __NR_rt_tgsigqueueinfo	(__NR_Linux + 332)
-#define __NR_perf_event_open	(__NR_Linux + 333)
-#define __NR_accept4	(__NR_Linux + 334)
-#define __NR_recvmmsg	(__NR_Linux + 335)
-#define __NR_fanotify_init	(__NR_Linux + 336)
-#define __NR_fanotify_mark	(__NR_Linux + 337)
-#define __NR_prlimit64	(__NR_Linux + 338)
-#define __NR_name_to_handle_at	(__NR_Linux + 339)
-#define __NR_open_by_handle_at	(__NR_Linux + 340)
-#define __NR_clock_adjtime	(__NR_Linux + 341)
-#define __NR_syncfs	(__NR_Linux + 342)
-#define __NR_sendmmsg	(__NR_Linux + 343)
-#define __NR_setns	(__NR_Linux + 344)
-#define __NR_process_vm_readv	(__NR_Linux + 345)
-#define __NR_process_vm_writev	(__NR_Linux + 346)
-#define __NR_kcmp	(__NR_Linux + 347)
-#define __NR_finit_module	(__NR_Linux + 348)
-#define __NR_sched_setattr	(__NR_Linux + 349)
-#define __NR_sched_getattr	(__NR_Linux + 350)
-#define __NR_renameat2	(__NR_Linux + 351)
-#define __NR_seccomp	(__NR_Linux + 352)
-#define __NR_getrandom	(__NR_Linux + 353)
-#define __NR_memfd_create	(__NR_Linux + 354)
-#define __NR_bpf	(__NR_Linux + 355)
-#define __NR_execveat	(__NR_Linux + 356)
-#define __NR_userfaultfd	(__NR_Linux + 357)
-#define __NR_membarrier	(__NR_Linux + 358)
-#define __NR_mlock2	(__NR_Linux + 359)
-#define __NR_copy_file_range	(__NR_Linux + 360)
-#define __NR_preadv2	(__NR_Linux + 361)
-#define __NR_pwritev2	(__NR_Linux + 362)
-#define __NR_pkey_mprotect	(__NR_Linux + 363)
-#define __NR_pkey_alloc	(__NR_Linux + 364)
-#define __NR_pkey_free	(__NR_Linux + 365)
-#define __NR_statx	(__NR_Linux + 366)
-#define __NR_rseq	(__NR_Linux + 367)
-#define __NR_io_pgetevents	(__NR_Linux + 368)
-#define __NR_semget	(__NR_Linux + 393)
-#define __NR_semctl	(__NR_Linux + 394)
-#define __NR_shmget	(__NR_Linux + 395)
-#define __NR_shmctl	(__NR_Linux + 396)
-#define __NR_shmat	(__NR_Linux + 397)
-#define __NR_shmdt	(__NR_Linux + 398)
-#define __NR_msgget	(__NR_Linux + 399)
-#define __NR_msgsnd	(__NR_Linux + 400)
-#define __NR_msgrcv	(__NR_Linux + 401)
-#define __NR_msgctl	(__NR_Linux + 402)
-#define __NR_clock_gettime64	(__NR_Linux + 403)
-#define __NR_clock_settime64	(__NR_Linux + 404)
-#define __NR_clock_adjtime64	(__NR_Linux + 405)
-#define __NR_clock_getres_time64	(__NR_Linux + 406)
-#define __NR_clock_nanosleep_time64	(__NR_Linux + 407)
-#define __NR_timer_gettime64	(__NR_Linux + 408)
-#define __NR_timer_settime64	(__NR_Linux + 409)
-#define __NR_timerfd_gettime64	(__NR_Linux + 410)
-#define __NR_timerfd_settime64	(__NR_Linux + 411)
-#define __NR_utimensat_time64	(__NR_Linux + 412)
-#define __NR_pselect6_time64	(__NR_Linux + 413)
-#define __NR_ppoll_time64	(__NR_Linux + 414)
-#define __NR_io_pgetevents_time64	(__NR_Linux + 416)
-#define __NR_recvmmsg_time64	(__NR_Linux + 417)
-#define __NR_mq_timedsend_time64	(__NR_Linux + 418)
-#define __NR_mq_timedreceive_time64	(__NR_Linux + 419)
-#define __NR_semtimedop_time64	(__NR_Linux + 420)
-#define __NR_rt_sigtimedwait_time64	(__NR_Linux + 421)
-#define __NR_futex_time64	(__NR_Linux + 422)
-#define __NR_sched_rr_get_interval_time64	(__NR_Linux + 423)
-#define __NR_pidfd_send_signal	(__NR_Linux + 424)
-#define __NR_io_uring_setup	(__NR_Linux + 425)
-#define __NR_io_uring_enter	(__NR_Linux + 426)
-#define __NR_io_uring_register	(__NR_Linux + 427)
-#define __NR_open_tree	(__NR_Linux + 428)
-#define __NR_move_mount	(__NR_Linux + 429)
-#define __NR_fsopen	(__NR_Linux + 430)
-#define __NR_fsconfig	(__NR_Linux + 431)
-#define __NR_fsmount	(__NR_Linux + 432)
-#define __NR_fspick	(__NR_Linux + 433)
-#define __NR_pidfd_open	(__NR_Linux + 434)
-#define __NR_clone3	(__NR_Linux + 435)
-#define __NR_close_range	(__NR_Linux + 436)
-#define __NR_openat2	(__NR_Linux + 437)
-#define __NR_pidfd_getfd	(__NR_Linux + 438)
-#define __NR_faccessat2	(__NR_Linux + 439)
-#define __NR_process_madvise	(__NR_Linux + 440)
-#define __NR_epoll_pwait2	(__NR_Linux + 441)
+#define __NR_syscall (__NR_Linux + 0)
+#define __NR_exit (__NR_Linux + 1)
+#define __NR_fork (__NR_Linux + 2)
+#define __NR_read (__NR_Linux + 3)
+#define __NR_write (__NR_Linux + 4)
+#define __NR_open (__NR_Linux + 5)
+#define __NR_close (__NR_Linux + 6)
+#define __NR_waitpid (__NR_Linux + 7)
+#define __NR_creat (__NR_Linux + 8)
+#define __NR_link (__NR_Linux + 9)
+#define __NR_unlink (__NR_Linux + 10)
+#define __NR_execve (__NR_Linux + 11)
+#define __NR_chdir (__NR_Linux + 12)
+#define __NR_time (__NR_Linux + 13)
+#define __NR_mknod (__NR_Linux + 14)
+#define __NR_chmod (__NR_Linux + 15)
+#define __NR_lchown (__NR_Linux + 16)
+#define __NR_break (__NR_Linux + 17)
+#define __NR_unused18 (__NR_Linux + 18)
+#define __NR_lseek (__NR_Linux + 19)
+#define __NR_getpid (__NR_Linux + 20)
+#define __NR_mount (__NR_Linux + 21)
+#define __NR_umount (__NR_Linux + 22)
+#define __NR_setuid (__NR_Linux + 23)
+#define __NR_getuid (__NR_Linux + 24)
+#define __NR_stime (__NR_Linux + 25)
+#define __NR_ptrace (__NR_Linux + 26)
+#define __NR_alarm (__NR_Linux + 27)
+#define __NR_unused28 (__NR_Linux + 28)
+#define __NR_pause (__NR_Linux + 29)
+#define __NR_utime (__NR_Linux + 30)
+#define __NR_stty (__NR_Linux + 31)
+#define __NR_gtty (__NR_Linux + 32)
+#define __NR_access (__NR_Linux + 33)
+#define __NR_nice (__NR_Linux + 34)
+#define __NR_ftime (__NR_Linux + 35)
+#define __NR_sync (__NR_Linux + 36)
+#define __NR_kill (__NR_Linux + 37)
+#define __NR_rename (__NR_Linux + 38)
+#define __NR_mkdir (__NR_Linux + 39)
+#define __NR_rmdir (__NR_Linux + 40)
+#define __NR_dup (__NR_Linux + 41)
+#define __NR_pipe (__NR_Linux + 42)
+#define __NR_times (__NR_Linux + 43)
+#define __NR_prof (__NR_Linux + 44)
+#define __NR_brk (__NR_Linux + 45)
+#define __NR_setgid (__NR_Linux + 46)
+#define __NR_getgid (__NR_Linux + 47)
+#define __NR_signal (__NR_Linux + 48)
+#define __NR_geteuid (__NR_Linux + 49)
+#define __NR_getegid (__NR_Linux + 50)
+#define __NR_acct (__NR_Linux + 51)
+#define __NR_umount2 (__NR_Linux + 52)
+#define __NR_lock (__NR_Linux + 53)
+#define __NR_ioctl (__NR_Linux + 54)
+#define __NR_fcntl (__NR_Linux + 55)
+#define __NR_mpx (__NR_Linux + 56)
+#define __NR_setpgid (__NR_Linux + 57)
+#define __NR_ulimit (__NR_Linux + 58)
+#define __NR_unused59 (__NR_Linux + 59)
+#define __NR_umask (__NR_Linux + 60)
+#define __NR_chroot (__NR_Linux + 61)
+#define __NR_ustat (__NR_Linux + 62)
+#define __NR_dup2 (__NR_Linux + 63)
+#define __NR_getppid (__NR_Linux + 64)
+#define __NR_getpgrp (__NR_Linux + 65)
+#define __NR_setsid (__NR_Linux + 66)
+#define __NR_sigaction (__NR_Linux + 67)
+#define __NR_sgetmask (__NR_Linux + 68)
+#define __NR_ssetmask (__NR_Linux + 69)
+#define __NR_setreuid (__NR_Linux + 70)
+#define __NR_setregid (__NR_Linux + 71)
+#define __NR_sigsuspend (__NR_Linux + 72)
+#define __NR_sigpending (__NR_Linux + 73)
+#define __NR_sethostname (__NR_Linux + 74)
+#define __NR_setrlimit (__NR_Linux + 75)
+#define __NR_getrlimit (__NR_Linux + 76)
+#define __NR_getrusage (__NR_Linux + 77)
+#define __NR_gettimeofday (__NR_Linux + 78)
+#define __NR_settimeofday (__NR_Linux + 79)
+#define __NR_getgroups (__NR_Linux + 80)
+#define __NR_setgroups (__NR_Linux + 81)
+#define __NR_reserved82 (__NR_Linux + 82)
+#define __NR_symlink (__NR_Linux + 83)
+#define __NR_unused84 (__NR_Linux + 84)
+#define __NR_readlink (__NR_Linux + 85)
+#define __NR_uselib (__NR_Linux + 86)
+#define __NR_swapon (__NR_Linux + 87)
+#define __NR_reboot (__NR_Linux + 88)
+#define __NR_readdir (__NR_Linux + 89)
+#define __NR_mmap (__NR_Linux + 90)
+#define __NR_munmap (__NR_Linux + 91)
+#define __NR_truncate (__NR_Linux + 92)
+#define __NR_ftruncate (__NR_Linux + 93)
+#define __NR_fchmod (__NR_Linux + 94)
+#define __NR_fchown (__NR_Linux + 95)
+#define __NR_getpriority (__NR_Linux + 96)
+#define __NR_setpriority (__NR_Linux + 97)
+#define __NR_profil (__NR_Linux + 98)
+#define __NR_statfs (__NR_Linux + 99)
+#define __NR_fstatfs (__NR_Linux + 100)
+#define __NR_ioperm (__NR_Linux + 101)
+#define __NR_socketcall (__NR_Linux + 102)
+#define __NR_syslog (__NR_Linux + 103)
+#define __NR_setitimer (__NR_Linux + 104)
+#define __NR_getitimer (__NR_Linux + 105)
+#define __NR_stat (__NR_Linux + 106)
+#define __NR_lstat (__NR_Linux + 107)
+#define __NR_fstat (__NR_Linux + 108)
+#define __NR_unused109 (__NR_Linux + 109)
+#define __NR_iopl (__NR_Linux + 110)
+#define __NR_vhangup (__NR_Linux + 111)
+#define __NR_idle (__NR_Linux + 112)
+#define __NR_vm86 (__NR_Linux + 113)
+#define __NR_wait4 (__NR_Linux + 114)
+#define __NR_swapoff (__NR_Linux + 115)
+#define __NR_sysinfo (__NR_Linux + 116)
+#define __NR_ipc (__NR_Linux + 117)
+#define __NR_fsync (__NR_Linux + 118)
+#define __NR_sigreturn (__NR_Linux + 119)
+#define __NR_clone (__NR_Linux + 120)
+#define __NR_setdomainname (__NR_Linux + 121)
+#define __NR_uname (__NR_Linux + 122)
+#define __NR_modify_ldt (__NR_Linux + 123)
+#define __NR_adjtimex (__NR_Linux + 124)
+#define __NR_mprotect (__NR_Linux + 125)
+#define __NR_sigprocmask (__NR_Linux + 126)
+#define __NR_create_module (__NR_Linux + 127)
+#define __NR_init_module (__NR_Linux + 128)
+#define __NR_delete_module (__NR_Linux + 129)
+#define __NR_get_kernel_syms (__NR_Linux + 130)
+#define __NR_quotactl (__NR_Linux + 131)
+#define __NR_getpgid (__NR_Linux + 132)
+#define __NR_fchdir (__NR_Linux + 133)
+#define __NR_bdflush (__NR_Linux + 134)
+#define __NR_sysfs (__NR_Linux + 135)
+#define __NR_personality (__NR_Linux + 136)
+#define __NR_afs_syscall (__NR_Linux + 137)
+#define __NR_setfsuid (__NR_Linux + 138)
+#define __NR_setfsgid (__NR_Linux + 139)
+#define __NR__llseek (__NR_Linux + 140)
+#define __NR_getdents (__NR_Linux + 141)
+#define __NR__newselect (__NR_Linux + 142)
+#define __NR_flock (__NR_Linux + 143)
+#define __NR_msync (__NR_Linux + 144)
+#define __NR_readv (__NR_Linux + 145)
+#define __NR_writev (__NR_Linux + 146)
+#define __NR_cacheflush (__NR_Linux + 147)
+#define __NR_cachectl (__NR_Linux + 148)
+#define __NR_sysmips (__NR_Linux + 149)
+#define __NR_unused150 (__NR_Linux + 150)
+#define __NR_getsid (__NR_Linux + 151)
+#define __NR_fdatasync (__NR_Linux + 152)
+#define __NR__sysctl (__NR_Linux + 153)
+#define __NR_mlock (__NR_Linux + 154)
+#define __NR_munlock (__NR_Linux + 155)
+#define __NR_mlockall (__NR_Linux + 156)
+#define __NR_munlockall (__NR_Linux + 157)
+#define __NR_sched_setparam (__NR_Linux + 158)
+#define __NR_sched_getparam (__NR_Linux + 159)
+#define __NR_sched_setscheduler (__NR_Linux + 160)
+#define __NR_sched_getscheduler (__NR_Linux + 161)
+#define __NR_sched_yield (__NR_Linux + 162)
+#define __NR_sched_get_priority_max (__NR_Linux + 163)
+#define __NR_sched_get_priority_min (__NR_Linux + 164)
+#define __NR_sched_rr_get_interval (__NR_Linux + 165)
+#define __NR_nanosleep (__NR_Linux + 166)
+#define __NR_mremap (__NR_Linux + 167)
+#define __NR_accept (__NR_Linux + 168)
+#define __NR_bind (__NR_Linux + 169)
+#define __NR_connect (__NR_Linux + 170)
+#define __NR_getpeername (__NR_Linux + 171)
+#define __NR_getsockname (__NR_Linux + 172)
+#define __NR_getsockopt (__NR_Linux + 173)
+#define __NR_listen (__NR_Linux + 174)
+#define __NR_recv (__NR_Linux + 175)
+#define __NR_recvfrom (__NR_Linux + 176)
+#define __NR_recvmsg (__NR_Linux + 177)
+#define __NR_send (__NR_Linux + 178)
+#define __NR_sendmsg (__NR_Linux + 179)
+#define __NR_sendto (__NR_Linux + 180)
+#define __NR_setsockopt (__NR_Linux + 181)
+#define __NR_shutdown (__NR_Linux + 182)
+#define __NR_socket (__NR_Linux + 183)
+#define __NR_socketpair (__NR_Linux + 184)
+#define __NR_setresuid (__NR_Linux + 185)
+#define __NR_getresuid (__NR_Linux + 186)
+#define __NR_query_module (__NR_Linux + 187)
+#define __NR_poll (__NR_Linux + 188)
+#define __NR_nfsservctl (__NR_Linux + 189)
+#define __NR_setresgid (__NR_Linux + 190)
+#define __NR_getresgid (__NR_Linux + 191)
+#define __NR_prctl (__NR_Linux + 192)
+#define __NR_rt_sigreturn (__NR_Linux + 193)
+#define __NR_rt_sigaction (__NR_Linux + 194)
+#define __NR_rt_sigprocmask (__NR_Linux + 195)
+#define __NR_rt_sigpending (__NR_Linux + 196)
+#define __NR_rt_sigtimedwait (__NR_Linux + 197)
+#define __NR_rt_sigqueueinfo (__NR_Linux + 198)
+#define __NR_rt_sigsuspend (__NR_Linux + 199)
+#define __NR_pread64 (__NR_Linux + 200)
+#define __NR_pwrite64 (__NR_Linux + 201)
+#define __NR_chown (__NR_Linux + 202)
+#define __NR_getcwd (__NR_Linux + 203)
+#define __NR_capget (__NR_Linux + 204)
+#define __NR_capset (__NR_Linux + 205)
+#define __NR_sigaltstack (__NR_Linux + 206)
+#define __NR_sendfile (__NR_Linux + 207)
+#define __NR_getpmsg (__NR_Linux + 208)
+#define __NR_putpmsg (__NR_Linux + 209)
+#define __NR_mmap2 (__NR_Linux + 210)
+#define __NR_truncate64 (__NR_Linux + 211)
+#define __NR_ftruncate64 (__NR_Linux + 212)
+#define __NR_stat64 (__NR_Linux + 213)
+#define __NR_lstat64 (__NR_Linux + 214)
+#define __NR_fstat64 (__NR_Linux + 215)
+#define __NR_pivot_root (__NR_Linux + 216)
+#define __NR_mincore (__NR_Linux + 217)
+#define __NR_madvise (__NR_Linux + 218)
+#define __NR_getdents64 (__NR_Linux + 219)
+#define __NR_fcntl64 (__NR_Linux + 220)
+#define __NR_reserved221 (__NR_Linux + 221)
+#define __NR_gettid (__NR_Linux + 222)
+#define __NR_readahead (__NR_Linux + 223)
+#define __NR_setxattr (__NR_Linux + 224)
+#define __NR_lsetxattr (__NR_Linux + 225)
+#define __NR_fsetxattr (__NR_Linux + 226)
+#define __NR_getxattr (__NR_Linux + 227)
+#define __NR_lgetxattr (__NR_Linux + 228)
+#define __NR_fgetxattr (__NR_Linux + 229)
+#define __NR_listxattr (__NR_Linux + 230)
+#define __NR_llistxattr (__NR_Linux + 231)
+#define __NR_flistxattr (__NR_Linux + 232)
+#define __NR_removexattr (__NR_Linux + 233)
+#define __NR_lremovexattr (__NR_Linux + 234)
+#define __NR_fremovexattr (__NR_Linux + 235)
+#define __NR_tkill (__NR_Linux + 236)
+#define __NR_sendfile64 (__NR_Linux + 237)
+#define __NR_futex (__NR_Linux + 238)
+#define __NR_sched_setaffinity (__NR_Linux + 239)
+#define __NR_sched_getaffinity (__NR_Linux + 240)
+#define __NR_io_setup (__NR_Linux + 241)
+#define __NR_io_destroy (__NR_Linux + 242)
+#define __NR_io_getevents (__NR_Linux + 243)
+#define __NR_io_submit (__NR_Linux + 244)
+#define __NR_io_cancel (__NR_Linux + 245)
+#define __NR_exit_group (__NR_Linux + 246)
+#define __NR_lookup_dcookie (__NR_Linux + 247)
+#define __NR_epoll_create (__NR_Linux + 248)
+#define __NR_epoll_ctl (__NR_Linux + 249)
+#define __NR_epoll_wait (__NR_Linux + 250)
+#define __NR_remap_file_pages (__NR_Linux + 251)
+#define __NR_set_tid_address (__NR_Linux + 252)
+#define __NR_restart_syscall (__NR_Linux + 253)
+#define __NR_fadvise64 (__NR_Linux + 254)
+#define __NR_statfs64 (__NR_Linux + 255)
+#define __NR_fstatfs64 (__NR_Linux + 256)
+#define __NR_timer_create (__NR_Linux + 257)
+#define __NR_timer_settime (__NR_Linux + 258)
+#define __NR_timer_gettime (__NR_Linux + 259)
+#define __NR_timer_getoverrun (__NR_Linux + 260)
+#define __NR_timer_delete (__NR_Linux + 261)
+#define __NR_clock_settime (__NR_Linux + 262)
+#define __NR_clock_gettime (__NR_Linux + 263)
+#define __NR_clock_getres (__NR_Linux + 264)
+#define __NR_clock_nanosleep (__NR_Linux + 265)
+#define __NR_tgkill (__NR_Linux + 266)
+#define __NR_utimes (__NR_Linux + 267)
+#define __NR_mbind (__NR_Linux + 268)
+#define __NR_get_mempolicy (__NR_Linux + 269)
+#define __NR_set_mempolicy (__NR_Linux + 270)
+#define __NR_mq_open (__NR_Linux + 271)
+#define __NR_mq_unlink (__NR_Linux + 272)
+#define __NR_mq_timedsend (__NR_Linux + 273)
+#define __NR_mq_timedreceive (__NR_Linux + 274)
+#define __NR_mq_notify (__NR_Linux + 275)
+#define __NR_mq_getsetattr (__NR_Linux + 276)
+#define __NR_vserver (__NR_Linux + 277)
+#define __NR_waitid (__NR_Linux + 278)
+#define __NR_add_key (__NR_Linux + 280)
+#define __NR_request_key (__NR_Linux + 281)
+#define __NR_keyctl (__NR_Linux + 282)
+#define __NR_set_thread_area (__NR_Linux + 283)
+#define __NR_inotify_init (__NR_Linux + 284)
+#define __NR_inotify_add_watch (__NR_Linux + 285)
+#define __NR_inotify_rm_watch (__NR_Linux + 286)
+#define __NR_migrate_pages (__NR_Linux + 287)
+#define __NR_openat (__NR_Linux + 288)
+#define __NR_mkdirat (__NR_Linux + 289)
+#define __NR_mknodat (__NR_Linux + 290)
+#define __NR_fchownat (__NR_Linux + 291)
+#define __NR_futimesat (__NR_Linux + 292)
+#define __NR_fstatat64 (__NR_Linux + 293)
+#define __NR_unlinkat (__NR_Linux + 294)
+#define __NR_renameat (__NR_Linux + 295)
+#define __NR_linkat (__NR_Linux + 296)
+#define __NR_symlinkat (__NR_Linux + 297)
+#define __NR_readlinkat (__NR_Linux + 298)
+#define __NR_fchmodat (__NR_Linux + 299)
+#define __NR_faccessat (__NR_Linux + 300)
+#define __NR_pselect6 (__NR_Linux + 301)
+#define __NR_ppoll (__NR_Linux + 302)
+#define __NR_unshare (__NR_Linux + 303)
+#define __NR_splice (__NR_Linux + 304)
+#define __NR_sync_file_range (__NR_Linux + 305)
+#define __NR_tee (__NR_Linux + 306)
+#define __NR_vmsplice (__NR_Linux + 307)
+#define __NR_move_pages (__NR_Linux + 308)
+#define __NR_set_robust_list (__NR_Linux + 309)
+#define __NR_get_robust_list (__NR_Linux + 310)
+#define __NR_kexec_load (__NR_Linux + 311)
+#define __NR_getcpu (__NR_Linux + 312)
+#define __NR_epoll_pwait (__NR_Linux + 313)
+#define __NR_ioprio_set (__NR_Linux + 314)
+#define __NR_ioprio_get (__NR_Linux + 315)
+#define __NR_utimensat (__NR_Linux + 316)
+#define __NR_signalfd (__NR_Linux + 317)
+#define __NR_timerfd (__NR_Linux + 318)
+#define __NR_eventfd (__NR_Linux + 319)
+#define __NR_fallocate (__NR_Linux + 320)
+#define __NR_timerfd_create (__NR_Linux + 321)
+#define __NR_timerfd_gettime (__NR_Linux + 322)
+#define __NR_timerfd_settime (__NR_Linux + 323)
+#define __NR_signalfd4 (__NR_Linux + 324)
+#define __NR_eventfd2 (__NR_Linux + 325)
+#define __NR_epoll_create1 (__NR_Linux + 326)
+#define __NR_dup3 (__NR_Linux + 327)
+#define __NR_pipe2 (__NR_Linux + 328)
+#define __NR_inotify_init1 (__NR_Linux + 329)
+#define __NR_preadv (__NR_Linux + 330)
+#define __NR_pwritev (__NR_Linux + 331)
+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332)
+#define __NR_perf_event_open (__NR_Linux + 333)
+#define __NR_accept4 (__NR_Linux + 334)
+#define __NR_recvmmsg (__NR_Linux + 335)
+#define __NR_fanotify_init (__NR_Linux + 336)
+#define __NR_fanotify_mark (__NR_Linux + 337)
+#define __NR_prlimit64 (__NR_Linux + 338)
+#define __NR_name_to_handle_at (__NR_Linux + 339)
+#define __NR_open_by_handle_at (__NR_Linux + 340)
+#define __NR_clock_adjtime (__NR_Linux + 341)
+#define __NR_syncfs (__NR_Linux + 342)
+#define __NR_sendmmsg (__NR_Linux + 343)
+#define __NR_setns (__NR_Linux + 344)
+#define __NR_process_vm_readv (__NR_Linux + 345)
+#define __NR_process_vm_writev (__NR_Linux + 346)
+#define __NR_kcmp (__NR_Linux + 347)
+#define __NR_finit_module (__NR_Linux + 348)
+#define __NR_sched_setattr (__NR_Linux + 349)
+#define __NR_sched_getattr (__NR_Linux + 350)
+#define __NR_renameat2 (__NR_Linux + 351)
+#define __NR_seccomp (__NR_Linux + 352)
+#define __NR_getrandom (__NR_Linux + 353)
+#define __NR_memfd_create (__NR_Linux + 354)
+#define __NR_bpf (__NR_Linux + 355)
+#define __NR_execveat (__NR_Linux + 356)
+#define __NR_userfaultfd (__NR_Linux + 357)
+#define __NR_membarrier (__NR_Linux + 358)
+#define __NR_mlock2 (__NR_Linux + 359)
+#define __NR_copy_file_range (__NR_Linux + 360)
+#define __NR_preadv2 (__NR_Linux + 361)
+#define __NR_pwritev2 (__NR_Linux + 362)
+#define __NR_pkey_mprotect (__NR_Linux + 363)
+#define __NR_pkey_alloc (__NR_Linux + 364)
+#define __NR_pkey_free (__NR_Linux + 365)
+#define __NR_statx (__NR_Linux + 366)
+#define __NR_rseq (__NR_Linux + 367)
+#define __NR_io_pgetevents (__NR_Linux + 368)
+#define __NR_semget (__NR_Linux + 393)
+#define __NR_semctl (__NR_Linux + 394)
+#define __NR_shmget (__NR_Linux + 395)
+#define __NR_shmctl (__NR_Linux + 396)
+#define __NR_shmat (__NR_Linux + 397)
+#define __NR_shmdt (__NR_Linux + 398)
+#define __NR_msgget (__NR_Linux + 399)
+#define __NR_msgsnd (__NR_Linux + 400)
+#define __NR_msgrcv (__NR_Linux + 401)
+#define __NR_msgctl (__NR_Linux + 402)
+#define __NR_clock_gettime64 (__NR_Linux + 403)
+#define __NR_clock_settime64 (__NR_Linux + 404)
+#define __NR_clock_adjtime64 (__NR_Linux + 405)
+#define __NR_clock_getres_time64 (__NR_Linux + 406)
+#define __NR_clock_nanosleep_time64 (__NR_Linux + 407)
+#define __NR_timer_gettime64 (__NR_Linux + 408)
+#define __NR_timer_settime64 (__NR_Linux + 409)
+#define __NR_timerfd_gettime64 (__NR_Linux + 410)
+#define __NR_timerfd_settime64 (__NR_Linux + 411)
+#define __NR_utimensat_time64 (__NR_Linux + 412)
+#define __NR_pselect6_time64 (__NR_Linux + 413)
+#define __NR_ppoll_time64 (__NR_Linux + 414)
+#define __NR_io_pgetevents_time64 (__NR_Linux + 416)
+#define __NR_recvmmsg_time64 (__NR_Linux + 417)
+#define __NR_mq_timedsend_time64 (__NR_Linux + 418)
+#define __NR_mq_timedreceive_time64 (__NR_Linux + 419)
+#define __NR_semtimedop_time64 (__NR_Linux + 420)
+#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421)
+#define __NR_futex_time64 (__NR_Linux + 422)
+#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423)
+#define __NR_pidfd_send_signal (__NR_Linux + 424)
+#define __NR_io_uring_setup (__NR_Linux + 425)
+#define __NR_io_uring_enter (__NR_Linux + 426)
+#define __NR_io_uring_register (__NR_Linux + 427)
+#define __NR_open_tree (__NR_Linux + 428)
+#define __NR_move_mount (__NR_Linux + 429)
+#define __NR_fsopen (__NR_Linux + 430)
+#define __NR_fsconfig (__NR_Linux + 431)
+#define __NR_fsmount (__NR_Linux + 432)
+#define __NR_fspick (__NR_Linux + 433)
+#define __NR_pidfd_open (__NR_Linux + 434)
+#define __NR_clone3 (__NR_Linux + 435)
+#define __NR_close_range (__NR_Linux + 436)
+#define __NR_openat2 (__NR_Linux + 437)
+#define __NR_pidfd_getfd (__NR_Linux + 438)
+#define __NR_faccessat2 (__NR_Linux + 439)
+#define __NR_process_madvise (__NR_Linux + 440)
+#define __NR_epoll_pwait2 (__NR_Linux + 441)
+#define __NR_mount_setattr (__NR_Linux + 442)
+#define __NR_quotactl_fd (__NR_Linux + 443)
+#define __NR_landlock_create_ruleset (__NR_Linux + 444)
+#define __NR_landlock_add_rule (__NR_Linux + 445)
+#define __NR_landlock_restrict_self (__NR_Linux + 446)
 
-
-#endif /* _ASM_MIPS_UNISTD_O32_H */
+#endif /* _ASM_UNISTD_O32_H */
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index c3af3f324c5..9f18fa090f1 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
 #define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
 #define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
index 4624c900436..cd5a8a41b26 100644
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -1,429 +1,434 @@
-#ifndef _ASM_POWERPC_UNISTD_32_H
-#define _ASM_POWERPC_UNISTD_32_H
+#ifndef _ASM_UNISTD_32_H
+#define _ASM_UNISTD_32_H
 
-#define __NR_restart_syscall	0
-#define __NR_exit	1
-#define __NR_fork	2
-#define __NR_read	3
-#define __NR_write	4
-#define __NR_open	5
-#define __NR_close	6
-#define __NR_waitpid	7
-#define __NR_creat	8
-#define __NR_link	9
-#define __NR_unlink	10
-#define __NR_execve	11
-#define __NR_chdir	12
-#define __NR_time	13
-#define __NR_mknod	14
-#define __NR_chmod	15
-#define __NR_lchown	16
-#define __NR_break	17
-#define __NR_oldstat	18
-#define __NR_lseek	19
-#define __NR_getpid	20
-#define __NR_mount	21
-#define __NR_umount	22
-#define __NR_setuid	23
-#define __NR_getuid	24
-#define __NR_stime	25
-#define __NR_ptrace	26
-#define __NR_alarm	27
-#define __NR_oldfstat	28
-#define __NR_pause	29
-#define __NR_utime	30
-#define __NR_stty	31
-#define __NR_gtty	32
-#define __NR_access	33
-#define __NR_nice	34
-#define __NR_ftime	35
-#define __NR_sync	36
-#define __NR_kill	37
-#define __NR_rename	38
-#define __NR_mkdir	39
-#define __NR_rmdir	40
-#define __NR_dup	41
-#define __NR_pipe	42
-#define __NR_times	43
-#define __NR_prof	44
-#define __NR_brk	45
-#define __NR_setgid	46
-#define __NR_getgid	47
-#define __NR_signal	48
-#define __NR_geteuid	49
-#define __NR_getegid	50
-#define __NR_acct	51
-#define __NR_umount2	52
-#define __NR_lock	53
-#define __NR_ioctl	54
-#define __NR_fcntl	55
-#define __NR_mpx	56
-#define __NR_setpgid	57
-#define __NR_ulimit	58
-#define __NR_oldolduname	59
-#define __NR_umask	60
-#define __NR_chroot	61
-#define __NR_ustat	62
-#define __NR_dup2	63
-#define __NR_getppid	64
-#define __NR_getpgrp	65
-#define __NR_setsid	66
-#define __NR_sigaction	67
-#define __NR_sgetmask	68
-#define __NR_ssetmask	69
-#define __NR_setreuid	70
-#define __NR_setregid	71
-#define __NR_sigsuspend	72
-#define __NR_sigpending	73
-#define __NR_sethostname	74
-#define __NR_setrlimit	75
-#define __NR_getrlimit	76
-#define __NR_getrusage	77
-#define __NR_gettimeofday	78
-#define __NR_settimeofday	79
-#define __NR_getgroups	80
-#define __NR_setgroups	81
-#define __NR_select	82
-#define __NR_symlink	83
-#define __NR_oldlstat	84
-#define __NR_readlink	85
-#define __NR_uselib	86
-#define __NR_swapon	87
-#define __NR_reboot	88
-#define __NR_readdir	89
-#define __NR_mmap	90
-#define __NR_munmap	91
-#define __NR_truncate	92
-#define __NR_ftruncate	93
-#define __NR_fchmod	94
-#define __NR_fchown	95
-#define __NR_getpriority	96
-#define __NR_setpriority	97
-#define __NR_profil	98
-#define __NR_statfs	99
-#define __NR_fstatfs	100
-#define __NR_ioperm	101
-#define __NR_socketcall	102
-#define __NR_syslog	103
-#define __NR_setitimer	104
-#define __NR_getitimer	105
-#define __NR_stat	106
-#define __NR_lstat	107
-#define __NR_fstat	108
-#define __NR_olduname	109
-#define __NR_iopl	110
-#define __NR_vhangup	111
-#define __NR_idle	112
-#define __NR_vm86	113
-#define __NR_wait4	114
-#define __NR_swapoff	115
-#define __NR_sysinfo	116
-#define __NR_ipc	117
-#define __NR_fsync	118
-#define __NR_sigreturn	119
-#define __NR_clone	120
-#define __NR_setdomainname	121
-#define __NR_uname	122
-#define __NR_modify_ldt	123
-#define __NR_adjtimex	124
-#define __NR_mprotect	125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl	131
-#define __NR_getpgid	132
-#define __NR_fchdir	133
-#define __NR_bdflush	134
-#define __NR_sysfs	135
-#define __NR_personality	136
-#define __NR_afs_syscall	137
-#define __NR_setfsuid	138
-#define __NR_setfsgid	139
-#define __NR__llseek	140
-#define __NR_getdents	141
-#define __NR__newselect	142
-#define __NR_flock	143
-#define __NR_msync	144
-#define __NR_readv	145
-#define __NR_writev	146
-#define __NR_getsid	147
-#define __NR_fdatasync	148
-#define __NR__sysctl	149
-#define __NR_mlock	150
-#define __NR_munlock	151
-#define __NR_mlockall	152
-#define __NR_munlockall	153
-#define __NR_sched_setparam	154
-#define __NR_sched_getparam	155
-#define __NR_sched_setscheduler	156
-#define __NR_sched_getscheduler	157
-#define __NR_sched_yield	158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep	162
-#define __NR_mremap	163
-#define __NR_setresuid	164
-#define __NR_getresuid	165
-#define __NR_query_module	166
-#define __NR_poll	167
-#define __NR_nfsservctl	168
-#define __NR_setresgid	169
-#define __NR_getresgid	170
-#define __NR_prctl	171
-#define __NR_rt_sigreturn	172
-#define __NR_rt_sigaction	173
-#define __NR_rt_sigprocmask	174
-#define __NR_rt_sigpending	175
-#define __NR_rt_sigtimedwait	176
-#define __NR_rt_sigqueueinfo	177
-#define __NR_rt_sigsuspend	178
-#define __NR_pread64	179
-#define __NR_pwrite64	180
-#define __NR_chown	181
-#define __NR_getcwd	182
-#define __NR_capget	183
-#define __NR_capset	184
-#define __NR_sigaltstack	185
-#define __NR_sendfile	186
-#define __NR_getpmsg	187
-#define __NR_putpmsg	188
-#define __NR_vfork	189
-#define __NR_ugetrlimit	190
-#define __NR_readahead	191
-#define __NR_mmap2	192
-#define __NR_truncate64	193
-#define __NR_ftruncate64	194
-#define __NR_stat64	195
-#define __NR_lstat64	196
-#define __NR_fstat64	197
-#define __NR_pciconfig_read	198
-#define __NR_pciconfig_write	199
-#define __NR_pciconfig_iobase	200
-#define __NR_multiplexer	201
-#define __NR_getdents64	202
-#define __NR_pivot_root	203
-#define __NR_fcntl64	204
-#define __NR_madvise	205
-#define __NR_mincore	206
-#define __NR_gettid	207
-#define __NR_tkill	208
-#define __NR_setxattr	209
-#define __NR_lsetxattr	210
-#define __NR_fsetxattr	211
-#define __NR_getxattr	212
-#define __NR_lgetxattr	213
-#define __NR_fgetxattr	214
-#define __NR_listxattr	215
-#define __NR_llistxattr	216
-#define __NR_flistxattr	217
-#define __NR_removexattr	218
-#define __NR_lremovexattr	219
-#define __NR_fremovexattr	220
-#define __NR_futex	221
-#define __NR_sched_setaffinity	222
-#define __NR_sched_getaffinity	223
-#define __NR_tuxcall	225
-#define __NR_sendfile64	226
-#define __NR_io_setup	227
-#define __NR_io_destroy	228
-#define __NR_io_getevents	229
-#define __NR_io_submit	230
-#define __NR_io_cancel	231
-#define __NR_set_tid_address	232
-#define __NR_fadvise64	233
-#define __NR_exit_group	234
-#define __NR_lookup_dcookie	235
-#define __NR_epoll_create	236
-#define __NR_epoll_ctl	237
-#define __NR_epoll_wait	238
-#define __NR_remap_file_pages	239
-#define __NR_timer_create	240
-#define __NR_timer_settime	241
-#define __NR_timer_gettime	242
-#define __NR_timer_getoverrun	243
-#define __NR_timer_delete	244
-#define __NR_clock_settime	245
-#define __NR_clock_gettime	246
-#define __NR_clock_getres	247
-#define __NR_clock_nanosleep	248
-#define __NR_swapcontext	249
-#define __NR_tgkill	250
-#define __NR_utimes	251
-#define __NR_statfs64	252
-#define __NR_fstatfs64	253
-#define __NR_fadvise64_64	254
-#define __NR_rtas	255
-#define __NR_sys_debug_setcontext	256
-#define __NR_migrate_pages	258
-#define __NR_mbind	259
-#define __NR_get_mempolicy	260
-#define __NR_set_mempolicy	261
-#define __NR_mq_open	262
-#define __NR_mq_unlink	263
-#define __NR_mq_timedsend	264
-#define __NR_mq_timedreceive	265
-#define __NR_mq_notify	266
-#define __NR_mq_getsetattr	267
-#define __NR_kexec_load	268
-#define __NR_add_key	269
-#define __NR_request_key	270
-#define __NR_keyctl	271
-#define __NR_waitid	272
-#define __NR_ioprio_set	273
-#define __NR_ioprio_get	274
-#define __NR_inotify_init	275
-#define __NR_inotify_add_watch	276
-#define __NR_inotify_rm_watch	277
-#define __NR_spu_run	278
-#define __NR_spu_create	279
-#define __NR_pselect6	280
-#define __NR_ppoll	281
-#define __NR_unshare	282
-#define __NR_splice	283
-#define __NR_tee	284
-#define __NR_vmsplice	285
-#define __NR_openat	286
-#define __NR_mkdirat	287
-#define __NR_mknodat	288
-#define __NR_fchownat	289
-#define __NR_futimesat	290
-#define __NR_fstatat64	291
-#define __NR_unlinkat	292
-#define __NR_renameat	293
-#define __NR_linkat	294
-#define __NR_symlinkat	295
-#define __NR_readlinkat	296
-#define __NR_fchmodat	297
-#define __NR_faccessat	298
-#define __NR_get_robust_list	299
-#define __NR_set_robust_list	300
-#define __NR_move_pages	301
-#define __NR_getcpu	302
-#define __NR_epoll_pwait	303
-#define __NR_utimensat	304
-#define __NR_signalfd	305
-#define __NR_timerfd_create	306
-#define __NR_eventfd	307
-#define __NR_sync_file_range2	308
-#define __NR_fallocate	309
-#define __NR_subpage_prot	310
-#define __NR_timerfd_settime	311
-#define __NR_timerfd_gettime	312
-#define __NR_signalfd4	313
-#define __NR_eventfd2	314
-#define __NR_epoll_create1	315
-#define __NR_dup3	316
-#define __NR_pipe2	317
-#define __NR_inotify_init1	318
-#define __NR_perf_event_open	319
-#define __NR_preadv	320
-#define __NR_pwritev	321
-#define __NR_rt_tgsigqueueinfo	322
-#define __NR_fanotify_init	323
-#define __NR_fanotify_mark	324
-#define __NR_prlimit64	325
-#define __NR_socket	326
-#define __NR_bind	327
-#define __NR_connect	328
-#define __NR_listen	329
-#define __NR_accept	330
-#define __NR_getsockname	331
-#define __NR_getpeername	332
-#define __NR_socketpair	333
-#define __NR_send	334
-#define __NR_sendto	335
-#define __NR_recv	336
-#define __NR_recvfrom	337
-#define __NR_shutdown	338
-#define __NR_setsockopt	339
-#define __NR_getsockopt	340
-#define __NR_sendmsg	341
-#define __NR_recvmsg	342
-#define __NR_recvmmsg	343
-#define __NR_accept4	344
-#define __NR_name_to_handle_at	345
-#define __NR_open_by_handle_at	346
-#define __NR_clock_adjtime	347
-#define __NR_syncfs	348
-#define __NR_sendmmsg	349
-#define __NR_setns	350
-#define __NR_process_vm_readv	351
-#define __NR_process_vm_writev	352
-#define __NR_finit_module	353
-#define __NR_kcmp	354
-#define __NR_sched_setattr	355
-#define __NR_sched_getattr	356
-#define __NR_renameat2	357
-#define __NR_seccomp	358
-#define __NR_getrandom	359
-#define __NR_memfd_create	360
-#define __NR_bpf	361
-#define __NR_execveat	362
-#define __NR_switch_endian	363
-#define __NR_userfaultfd	364
-#define __NR_membarrier	365
-#define __NR_mlock2	378
-#define __NR_copy_file_range	379
-#define __NR_preadv2	380
-#define __NR_pwritev2	381
-#define __NR_kexec_file_load	382
-#define __NR_statx	383
-#define __NR_pkey_alloc	384
-#define __NR_pkey_free	385
-#define __NR_pkey_mprotect	386
-#define __NR_rseq	387
-#define __NR_io_pgetevents	388
-#define __NR_semget	393
-#define __NR_semctl	394
-#define __NR_shmget	395
-#define __NR_shmctl	396
-#define __NR_shmat	397
-#define __NR_shmdt	398
-#define __NR_msgget	399
-#define __NR_msgsnd	400
-#define __NR_msgrcv	401
-#define __NR_msgctl	402
-#define __NR_clock_gettime64	403
-#define __NR_clock_settime64	404
-#define __NR_clock_adjtime64	405
-#define __NR_clock_getres_time64	406
-#define __NR_clock_nanosleep_time64	407
-#define __NR_timer_gettime64	408
-#define __NR_timer_settime64	409
-#define __NR_timerfd_gettime64	410
-#define __NR_timerfd_settime64	411
-#define __NR_utimensat_time64	412
-#define __NR_pselect6_time64	413
-#define __NR_ppoll_time64	414
-#define __NR_io_pgetevents_time64	416
-#define __NR_recvmmsg_time64	417
-#define __NR_mq_timedsend_time64	418
-#define __NR_mq_timedreceive_time64	419
-#define __NR_semtimedop_time64	420
-#define __NR_rt_sigtimedwait_time64	421
-#define __NR_futex_time64	422
-#define __NR_sched_rr_get_interval_time64	423
-#define __NR_pidfd_send_signal	424
-#define __NR_io_uring_setup	425
-#define __NR_io_uring_enter	426
-#define __NR_io_uring_register	427
-#define __NR_open_tree	428
-#define __NR_move_mount	429
-#define __NR_fsopen	430
-#define __NR_fsconfig	431
-#define __NR_fsmount	432
-#define __NR_fspick	433
-#define __NR_pidfd_open	434
-#define __NR_clone3	435
-#define __NR_close_range	436
-#define __NR_openat2	437
-#define __NR_pidfd_getfd	438
-#define __NR_faccessat2	439
-#define __NR_process_madvise	440
-#define __NR_epoll_pwait2	441
+#define __NR_restart_syscall 0
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_waitpid 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_time 13
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_lchown 16
+#define __NR_break 17
+#define __NR_oldstat 18
+#define __NR_lseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount 22
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_oldfstat 28
+#define __NR_pause 29
+#define __NR_utime 30
+#define __NR_stty 31
+#define __NR_gtty 32
+#define __NR_access 33
+#define __NR_nice 34
+#define __NR_ftime 35
+#define __NR_sync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_prof 44
+#define __NR_brk 45
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_signal 48
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_acct 51
+#define __NR_umount2 52
+#define __NR_lock 53
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_mpx 56
+#define __NR_setpgid 57
+#define __NR_ulimit 58
+#define __NR_oldolduname 59
+#define __NR_umask 60
+#define __NR_chroot 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_sigaction 67
+#define __NR_sgetmask 68
+#define __NR_ssetmask 69
+#define __NR_setreuid 70
+#define __NR_setregid 71
+#define __NR_sigsuspend 72
+#define __NR_sigpending 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrlimit 76
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_select 82
+#define __NR_symlink 83
+#define __NR_oldlstat 84
+#define __NR_readlink 85
+#define __NR_uselib 86
+#define __NR_swapon 87
+#define __NR_reboot 88
+#define __NR_readdir 89
+#define __NR_mmap 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_fchown 95
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_profil 98
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_ioperm 101
+#define __NR_socketcall 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_stat 106
+#define __NR_lstat 107
+#define __NR_fstat 108
+#define __NR_olduname 109
+#define __NR_iopl 110
+#define __NR_vhangup 111
+#define __NR_idle 112
+#define __NR_vm86 113
+#define __NR_wait4 114
+#define __NR_swapoff 115
+#define __NR_sysinfo 116
+#define __NR_ipc 117
+#define __NR_fsync 118
+#define __NR_sigreturn 119
+#define __NR_clone 120
+#define __NR_setdomainname 121
+#define __NR_uname 122
+#define __NR_modify_ldt 123
+#define __NR_adjtimex 124
+#define __NR_mprotect 125
+#define __NR_sigprocmask 126
+#define __NR_create_module 127
+#define __NR_init_module 128
+#define __NR_delete_module 129
+#define __NR_get_kernel_syms 130
+#define __NR_quotactl 131
+#define __NR_getpgid 132
+#define __NR_fchdir 133
+#define __NR_bdflush 134
+#define __NR_sysfs 135
+#define __NR_personality 136
+#define __NR_afs_syscall 137
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#define __NR__llseek 140
+#define __NR_getdents 141
+#define __NR__newselect 142
+#define __NR_flock 143
+#define __NR_msync 144
+#define __NR_readv 145
+#define __NR_writev 146
+#define __NR_getsid 147
+#define __NR_fdatasync 148
+#define __NR__sysctl 149
+#define __NR_mlock 150
+#define __NR_munlock 151
+#define __NR_mlockall 152
+#define __NR_munlockall 153
+#define __NR_sched_setparam 154
+#define __NR_sched_getparam 155
+#define __NR_sched_setscheduler 156
+#define __NR_sched_getscheduler 157
+#define __NR_sched_yield 158
+#define __NR_sched_get_priority_max 159
+#define __NR_sched_get_priority_min 160
+#define __NR_sched_rr_get_interval 161
+#define __NR_nanosleep 162
+#define __NR_mremap 163
+#define __NR_setresuid 164
+#define __NR_getresuid 165
+#define __NR_query_module 166
+#define __NR_poll 167
+#define __NR_nfsservctl 168
+#define __NR_setresgid 169
+#define __NR_getresgid 170
+#define __NR_prctl 171
+#define __NR_rt_sigreturn 172
+#define __NR_rt_sigaction 173
+#define __NR_rt_sigprocmask 174
+#define __NR_rt_sigpending 175
+#define __NR_rt_sigtimedwait 176
+#define __NR_rt_sigqueueinfo 177
+#define __NR_rt_sigsuspend 178
+#define __NR_pread64 179
+#define __NR_pwrite64 180
+#define __NR_chown 181
+#define __NR_getcwd 182
+#define __NR_capget 183
+#define __NR_capset 184
+#define __NR_sigaltstack 185
+#define __NR_sendfile 186
+#define __NR_getpmsg 187
+#define __NR_putpmsg 188
+#define __NR_vfork 189
+#define __NR_ugetrlimit 190
+#define __NR_readahead 191
+#define __NR_mmap2 192
+#define __NR_truncate64 193
+#define __NR_ftruncate64 194
+#define __NR_stat64 195
+#define __NR_lstat64 196
+#define __NR_fstat64 197
+#define __NR_pciconfig_read 198
+#define __NR_pciconfig_write 199
+#define __NR_pciconfig_iobase 200
+#define __NR_multiplexer 201
+#define __NR_getdents64 202
+#define __NR_pivot_root 203
+#define __NR_fcntl64 204
+#define __NR_madvise 205
+#define __NR_mincore 206
+#define __NR_gettid 207
+#define __NR_tkill 208
+#define __NR_setxattr 209
+#define __NR_lsetxattr 210
+#define __NR_fsetxattr 211
+#define __NR_getxattr 212
+#define __NR_lgetxattr 213
+#define __NR_fgetxattr 214
+#define __NR_listxattr 215
+#define __NR_llistxattr 216
+#define __NR_flistxattr 217
+#define __NR_removexattr 218
+#define __NR_lremovexattr 219
+#define __NR_fremovexattr 220
+#define __NR_futex 221
+#define __NR_sched_setaffinity 222
+#define __NR_sched_getaffinity 223
+#define __NR_tuxcall 225
+#define __NR_sendfile64 226
+#define __NR_io_setup 227
+#define __NR_io_destroy 228
+#define __NR_io_getevents 229
+#define __NR_io_submit 230
+#define __NR_io_cancel 231
+#define __NR_set_tid_address 232
+#define __NR_fadvise64 233
+#define __NR_exit_group 234
+#define __NR_lookup_dcookie 235
+#define __NR_epoll_create 236
+#define __NR_epoll_ctl 237
+#define __NR_epoll_wait 238
+#define __NR_remap_file_pages 239
+#define __NR_timer_create 240
+#define __NR_timer_settime 241
+#define __NR_timer_gettime 242
+#define __NR_timer_getoverrun 243
+#define __NR_timer_delete 244
+#define __NR_clock_settime 245
+#define __NR_clock_gettime 246
+#define __NR_clock_getres 247
+#define __NR_clock_nanosleep 248
+#define __NR_swapcontext 249
+#define __NR_tgkill 250
+#define __NR_utimes 251
+#define __NR_statfs64 252
+#define __NR_fstatfs64 253
+#define __NR_fadvise64_64 254
+#define __NR_rtas 255
+#define __NR_sys_debug_setcontext 256
+#define __NR_migrate_pages 258
+#define __NR_mbind 259
+#define __NR_get_mempolicy 260
+#define __NR_set_mempolicy 261
+#define __NR_mq_open 262
+#define __NR_mq_unlink 263
+#define __NR_mq_timedsend 264
+#define __NR_mq_timedreceive 265
+#define __NR_mq_notify 266
+#define __NR_mq_getsetattr 267
+#define __NR_kexec_load 268
+#define __NR_add_key 269
+#define __NR_request_key 270
+#define __NR_keyctl 271
+#define __NR_waitid 272
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#define __NR_inotify_init 275
+#define __NR_inotify_add_watch 276
+#define __NR_inotify_rm_watch 277
+#define __NR_spu_run 278
+#define __NR_spu_create 279
+#define __NR_pselect6 280
+#define __NR_ppoll 281
+#define __NR_unshare 282
+#define __NR_splice 283
+#define __NR_tee 284
+#define __NR_vmsplice 285
+#define __NR_openat 286
+#define __NR_mkdirat 287
+#define __NR_mknodat 288
+#define __NR_fchownat 289
+#define __NR_futimesat 290
+#define __NR_fstatat64 291
+#define __NR_unlinkat 292
+#define __NR_renameat 293
+#define __NR_linkat 294
+#define __NR_symlinkat 295
+#define __NR_readlinkat 296
+#define __NR_fchmodat 297
+#define __NR_faccessat 298
+#define __NR_get_robust_list 299
+#define __NR_set_robust_list 300
+#define __NR_move_pages 301
+#define __NR_getcpu 302
+#define __NR_epoll_pwait 303
+#define __NR_utimensat 304
+#define __NR_signalfd 305
+#define __NR_timerfd_create 306
+#define __NR_eventfd 307
+#define __NR_sync_file_range2 308
+#define __NR_fallocate 309
+#define __NR_subpage_prot 310
+#define __NR_timerfd_settime 311
+#define __NR_timerfd_gettime 312
+#define __NR_signalfd4 313
+#define __NR_eventfd2 314
+#define __NR_epoll_create1 315
+#define __NR_dup3 316
+#define __NR_pipe2 317
+#define __NR_inotify_init1 318
+#define __NR_perf_event_open 319
+#define __NR_preadv 320
+#define __NR_pwritev 321
+#define __NR_rt_tgsigqueueinfo 322
+#define __NR_fanotify_init 323
+#define __NR_fanotify_mark 324
+#define __NR_prlimit64 325
+#define __NR_socket 326
+#define __NR_bind 327
+#define __NR_connect 328
+#define __NR_listen 329
+#define __NR_accept 330
+#define __NR_getsockname 331
+#define __NR_getpeername 332
+#define __NR_socketpair 333
+#define __NR_send 334
+#define __NR_sendto 335
+#define __NR_recv 336
+#define __NR_recvfrom 337
+#define __NR_shutdown 338
+#define __NR_setsockopt 339
+#define __NR_getsockopt 340
+#define __NR_sendmsg 341
+#define __NR_recvmsg 342
+#define __NR_recvmmsg 343
+#define __NR_accept4 344
+#define __NR_name_to_handle_at 345
+#define __NR_open_by_handle_at 346
+#define __NR_clock_adjtime 347
+#define __NR_syncfs 348
+#define __NR_sendmmsg 349
+#define __NR_setns 350
+#define __NR_process_vm_readv 351
+#define __NR_process_vm_writev 352
+#define __NR_finit_module 353
+#define __NR_kcmp 354
+#define __NR_sched_setattr 355
+#define __NR_sched_getattr 356
+#define __NR_renameat2 357
+#define __NR_seccomp 358
+#define __NR_getrandom 359
+#define __NR_memfd_create 360
+#define __NR_bpf 361
+#define __NR_execveat 362
+#define __NR_switch_endian 363
+#define __NR_userfaultfd 364
+#define __NR_membarrier 365
+#define __NR_mlock2 378
+#define __NR_copy_file_range 379
+#define __NR_preadv2 380
+#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
+#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
+#define __NR_rseq 387
+#define __NR_io_pgetevents 388
+#define __NR_semget 393
+#define __NR_semctl 394
+#define __NR_shmget 395
+#define __NR_shmctl 396
+#define __NR_shmat 397
+#define __NR_shmdt 398
+#define __NR_msgget 399
+#define __NR_msgsnd 400
+#define __NR_msgrcv 401
+#define __NR_msgctl 402
+#define __NR_clock_gettime64 403
+#define __NR_clock_settime64 404
+#define __NR_clock_adjtime64 405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64 408
+#define __NR_timer_settime64 409
+#define __NR_timerfd_gettime64 410
+#define __NR_timerfd_settime64 411
+#define __NR_utimensat_time64 412
+#define __NR_pselect6_time64 413
+#define __NR_ppoll_time64 414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64 417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64 420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64 422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 
-#endif /* _ASM_POWERPC_UNISTD_32_H */
+#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
index 7e851b30bb1..8458effa8d8 100644
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -1,401 +1,406 @@
-#ifndef _ASM_POWERPC_UNISTD_64_H
-#define _ASM_POWERPC_UNISTD_64_H
+#ifndef _ASM_UNISTD_64_H
+#define _ASM_UNISTD_64_H
 
-#define __NR_restart_syscall	0
-#define __NR_exit	1
-#define __NR_fork	2
-#define __NR_read	3
-#define __NR_write	4
-#define __NR_open	5
-#define __NR_close	6
-#define __NR_waitpid	7
-#define __NR_creat	8
-#define __NR_link	9
-#define __NR_unlink	10
-#define __NR_execve	11
-#define __NR_chdir	12
-#define __NR_time	13
-#define __NR_mknod	14
-#define __NR_chmod	15
-#define __NR_lchown	16
-#define __NR_break	17
-#define __NR_oldstat	18
-#define __NR_lseek	19
-#define __NR_getpid	20
-#define __NR_mount	21
-#define __NR_umount	22
-#define __NR_setuid	23
-#define __NR_getuid	24
-#define __NR_stime	25
-#define __NR_ptrace	26
-#define __NR_alarm	27
-#define __NR_oldfstat	28
-#define __NR_pause	29
-#define __NR_utime	30
-#define __NR_stty	31
-#define __NR_gtty	32
-#define __NR_access	33
-#define __NR_nice	34
-#define __NR_ftime	35
-#define __NR_sync	36
-#define __NR_kill	37
-#define __NR_rename	38
-#define __NR_mkdir	39
-#define __NR_rmdir	40
-#define __NR_dup	41
-#define __NR_pipe	42
-#define __NR_times	43
-#define __NR_prof	44
-#define __NR_brk	45
-#define __NR_setgid	46
-#define __NR_getgid	47
-#define __NR_signal	48
-#define __NR_geteuid	49
-#define __NR_getegid	50
-#define __NR_acct	51
-#define __NR_umount2	52
-#define __NR_lock	53
-#define __NR_ioctl	54
-#define __NR_fcntl	55
-#define __NR_mpx	56
-#define __NR_setpgid	57
-#define __NR_ulimit	58
-#define __NR_oldolduname	59
-#define __NR_umask	60
-#define __NR_chroot	61
-#define __NR_ustat	62
-#define __NR_dup2	63
-#define __NR_getppid	64
-#define __NR_getpgrp	65
-#define __NR_setsid	66
-#define __NR_sigaction	67
-#define __NR_sgetmask	68
-#define __NR_ssetmask	69
-#define __NR_setreuid	70
-#define __NR_setregid	71
-#define __NR_sigsuspend	72
-#define __NR_sigpending	73
-#define __NR_sethostname	74
-#define __NR_setrlimit	75
-#define __NR_getrlimit	76
-#define __NR_getrusage	77
-#define __NR_gettimeofday	78
-#define __NR_settimeofday	79
-#define __NR_getgroups	80
-#define __NR_setgroups	81
-#define __NR_select	82
-#define __NR_symlink	83
-#define __NR_oldlstat	84
-#define __NR_readlink	85
-#define __NR_uselib	86
-#define __NR_swapon	87
-#define __NR_reboot	88
-#define __NR_readdir	89
-#define __NR_mmap	90
-#define __NR_munmap	91
-#define __NR_truncate	92
-#define __NR_ftruncate	93
-#define __NR_fchmod	94
-#define __NR_fchown	95
-#define __NR_getpriority	96
-#define __NR_setpriority	97
-#define __NR_profil	98
-#define __NR_statfs	99
-#define __NR_fstatfs	100
-#define __NR_ioperm	101
-#define __NR_socketcall	102
-#define __NR_syslog	103
-#define __NR_setitimer	104
-#define __NR_getitimer	105
-#define __NR_stat	106
-#define __NR_lstat	107
-#define __NR_fstat	108
-#define __NR_olduname	109
-#define __NR_iopl	110
-#define __NR_vhangup	111
-#define __NR_idle	112
-#define __NR_vm86	113
-#define __NR_wait4	114
-#define __NR_swapoff	115
-#define __NR_sysinfo	116
-#define __NR_ipc	117
-#define __NR_fsync	118
-#define __NR_sigreturn	119
-#define __NR_clone	120
-#define __NR_setdomainname	121
-#define __NR_uname	122
-#define __NR_modify_ldt	123
-#define __NR_adjtimex	124
-#define __NR_mprotect	125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl	131
-#define __NR_getpgid	132
-#define __NR_fchdir	133
-#define __NR_bdflush	134
-#define __NR_sysfs	135
-#define __NR_personality	136
-#define __NR_afs_syscall	137
-#define __NR_setfsuid	138
-#define __NR_setfsgid	139
-#define __NR__llseek	140
-#define __NR_getdents	141
-#define __NR__newselect	142
-#define __NR_flock	143
-#define __NR_msync	144
-#define __NR_readv	145
-#define __NR_writev	146
-#define __NR_getsid	147
-#define __NR_fdatasync	148
-#define __NR__sysctl	149
-#define __NR_mlock	150
-#define __NR_munlock	151
-#define __NR_mlockall	152
-#define __NR_munlockall	153
-#define __NR_sched_setparam	154
-#define __NR_sched_getparam	155
-#define __NR_sched_setscheduler	156
-#define __NR_sched_getscheduler	157
-#define __NR_sched_yield	158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep	162
-#define __NR_mremap	163
-#define __NR_setresuid	164
-#define __NR_getresuid	165
-#define __NR_query_module	166
-#define __NR_poll	167
-#define __NR_nfsservctl	168
-#define __NR_setresgid	169
-#define __NR_getresgid	170
-#define __NR_prctl	171
-#define __NR_rt_sigreturn	172
-#define __NR_rt_sigaction	173
-#define __NR_rt_sigprocmask	174
-#define __NR_rt_sigpending	175
-#define __NR_rt_sigtimedwait	176
-#define __NR_rt_sigqueueinfo	177
-#define __NR_rt_sigsuspend	178
-#define __NR_pread64	179
-#define __NR_pwrite64	180
-#define __NR_chown	181
-#define __NR_getcwd	182
-#define __NR_capget	183
-#define __NR_capset	184
-#define __NR_sigaltstack	185
-#define __NR_sendfile	186
-#define __NR_getpmsg	187
-#define __NR_putpmsg	188
-#define __NR_vfork	189
-#define __NR_ugetrlimit	190
-#define __NR_readahead	191
-#define __NR_pciconfig_read	198
-#define __NR_pciconfig_write	199
-#define __NR_pciconfig_iobase	200
-#define __NR_multiplexer	201
-#define __NR_getdents64	202
-#define __NR_pivot_root	203
-#define __NR_madvise	205
-#define __NR_mincore	206
-#define __NR_gettid	207
-#define __NR_tkill	208
-#define __NR_setxattr	209
-#define __NR_lsetxattr	210
-#define __NR_fsetxattr	211
-#define __NR_getxattr	212
-#define __NR_lgetxattr	213
-#define __NR_fgetxattr	214
-#define __NR_listxattr	215
-#define __NR_llistxattr	216
-#define __NR_flistxattr	217
-#define __NR_removexattr	218
-#define __NR_lremovexattr	219
-#define __NR_fremovexattr	220
-#define __NR_futex	221
-#define __NR_sched_setaffinity	222
-#define __NR_sched_getaffinity	223
-#define __NR_tuxcall	225
-#define __NR_io_setup	227
-#define __NR_io_destroy	228
-#define __NR_io_getevents	229
-#define __NR_io_submit	230
-#define __NR_io_cancel	231
-#define __NR_set_tid_address	232
-#define __NR_fadvise64	233
-#define __NR_exit_group	234
-#define __NR_lookup_dcookie	235
-#define __NR_epoll_create	236
-#define __NR_epoll_ctl	237
-#define __NR_epoll_wait	238
-#define __NR_remap_file_pages	239
-#define __NR_timer_create	240
-#define __NR_timer_settime	241
-#define __NR_timer_gettime	242
-#define __NR_timer_getoverrun	243
-#define __NR_timer_delete	244
-#define __NR_clock_settime	245
-#define __NR_clock_gettime	246
-#define __NR_clock_getres	247
-#define __NR_clock_nanosleep	248
-#define __NR_swapcontext	249
-#define __NR_tgkill	250
-#define __NR_utimes	251
-#define __NR_statfs64	252
-#define __NR_fstatfs64	253
-#define __NR_rtas	255
-#define __NR_sys_debug_setcontext	256
-#define __NR_migrate_pages	258
-#define __NR_mbind	259
-#define __NR_get_mempolicy	260
-#define __NR_set_mempolicy	261
-#define __NR_mq_open	262
-#define __NR_mq_unlink	263
-#define __NR_mq_timedsend	264
-#define __NR_mq_timedreceive	265
-#define __NR_mq_notify	266
-#define __NR_mq_getsetattr	267
-#define __NR_kexec_load	268
-#define __NR_add_key	269
-#define __NR_request_key	270
-#define __NR_keyctl	271
-#define __NR_waitid	272
-#define __NR_ioprio_set	273
-#define __NR_ioprio_get	274
-#define __NR_inotify_init	275
-#define __NR_inotify_add_watch	276
-#define __NR_inotify_rm_watch	277
-#define __NR_spu_run	278
-#define __NR_spu_create	279
-#define __NR_pselect6	280
-#define __NR_ppoll	281
-#define __NR_unshare	282
-#define __NR_splice	283
-#define __NR_tee	284
-#define __NR_vmsplice	285
-#define __NR_openat	286
-#define __NR_mkdirat	287
-#define __NR_mknodat	288
-#define __NR_fchownat	289
-#define __NR_futimesat	290
-#define __NR_newfstatat	291
-#define __NR_unlinkat	292
-#define __NR_renameat	293
-#define __NR_linkat	294
-#define __NR_symlinkat	295
-#define __NR_readlinkat	296
-#define __NR_fchmodat	297
-#define __NR_faccessat	298
-#define __NR_get_robust_list	299
-#define __NR_set_robust_list	300
-#define __NR_move_pages	301
-#define __NR_getcpu	302
-#define __NR_epoll_pwait	303
-#define __NR_utimensat	304
-#define __NR_signalfd	305
-#define __NR_timerfd_create	306
-#define __NR_eventfd	307
-#define __NR_sync_file_range2	308
-#define __NR_fallocate	309
-#define __NR_subpage_prot	310
-#define __NR_timerfd_settime	311
-#define __NR_timerfd_gettime	312
-#define __NR_signalfd4	313
-#define __NR_eventfd2	314
-#define __NR_epoll_create1	315
-#define __NR_dup3	316
-#define __NR_pipe2	317
-#define __NR_inotify_init1	318
-#define __NR_perf_event_open	319
-#define __NR_preadv	320
-#define __NR_pwritev	321
-#define __NR_rt_tgsigqueueinfo	322
-#define __NR_fanotify_init	323
-#define __NR_fanotify_mark	324
-#define __NR_prlimit64	325
-#define __NR_socket	326
-#define __NR_bind	327
-#define __NR_connect	328
-#define __NR_listen	329
-#define __NR_accept	330
-#define __NR_getsockname	331
-#define __NR_getpeername	332
-#define __NR_socketpair	333
-#define __NR_send	334
-#define __NR_sendto	335
-#define __NR_recv	336
-#define __NR_recvfrom	337
-#define __NR_shutdown	338
-#define __NR_setsockopt	339
-#define __NR_getsockopt	340
-#define __NR_sendmsg	341
-#define __NR_recvmsg	342
-#define __NR_recvmmsg	343
-#define __NR_accept4	344
-#define __NR_name_to_handle_at	345
-#define __NR_open_by_handle_at	346
-#define __NR_clock_adjtime	347
-#define __NR_syncfs	348
-#define __NR_sendmmsg	349
-#define __NR_setns	350
-#define __NR_process_vm_readv	351
-#define __NR_process_vm_writev	352
-#define __NR_finit_module	353
-#define __NR_kcmp	354
-#define __NR_sched_setattr	355
-#define __NR_sched_getattr	356
-#define __NR_renameat2	357
-#define __NR_seccomp	358
-#define __NR_getrandom	359
-#define __NR_memfd_create	360
-#define __NR_bpf	361
-#define __NR_execveat	362
-#define __NR_switch_endian	363
-#define __NR_userfaultfd	364
-#define __NR_membarrier	365
-#define __NR_mlock2	378
-#define __NR_copy_file_range	379
-#define __NR_preadv2	380
-#define __NR_pwritev2	381
-#define __NR_kexec_file_load	382
-#define __NR_statx	383
-#define __NR_pkey_alloc	384
-#define __NR_pkey_free	385
-#define __NR_pkey_mprotect	386
-#define __NR_rseq	387
-#define __NR_io_pgetevents	388
-#define __NR_semtimedop	392
-#define __NR_semget	393
-#define __NR_semctl	394
-#define __NR_shmget	395
-#define __NR_shmctl	396
-#define __NR_shmat	397
-#define __NR_shmdt	398
-#define __NR_msgget	399
-#define __NR_msgsnd	400
-#define __NR_msgrcv	401
-#define __NR_msgctl	402
-#define __NR_pidfd_send_signal	424
-#define __NR_io_uring_setup	425
-#define __NR_io_uring_enter	426
-#define __NR_io_uring_register	427
-#define __NR_open_tree	428
-#define __NR_move_mount	429
-#define __NR_fsopen	430
-#define __NR_fsconfig	431
-#define __NR_fsmount	432
-#define __NR_fspick	433
-#define __NR_pidfd_open	434
-#define __NR_clone3	435
-#define __NR_close_range	436
-#define __NR_openat2	437
-#define __NR_pidfd_getfd	438
-#define __NR_faccessat2	439
-#define __NR_process_madvise	440
-#define __NR_epoll_pwait2	441
+#define __NR_restart_syscall 0
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_waitpid 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_time 13
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_lchown 16
+#define __NR_break 17
+#define __NR_oldstat 18
+#define __NR_lseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount 22
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_oldfstat 28
+#define __NR_pause 29
+#define __NR_utime 30
+#define __NR_stty 31
+#define __NR_gtty 32
+#define __NR_access 33
+#define __NR_nice 34
+#define __NR_ftime 35
+#define __NR_sync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_prof 44
+#define __NR_brk 45
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_signal 48
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_acct 51
+#define __NR_umount2 52
+#define __NR_lock 53
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_mpx 56
+#define __NR_setpgid 57
+#define __NR_ulimit 58
+#define __NR_oldolduname 59
+#define __NR_umask 60
+#define __NR_chroot 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_sigaction 67
+#define __NR_sgetmask 68
+#define __NR_ssetmask 69
+#define __NR_setreuid 70
+#define __NR_setregid 71
+#define __NR_sigsuspend 72
+#define __NR_sigpending 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrlimit 76
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_select 82
+#define __NR_symlink 83
+#define __NR_oldlstat 84
+#define __NR_readlink 85
+#define __NR_uselib 86
+#define __NR_swapon 87
+#define __NR_reboot 88
+#define __NR_readdir 89
+#define __NR_mmap 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_fchown 95
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_profil 98
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_ioperm 101
+#define __NR_socketcall 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_stat 106
+#define __NR_lstat 107
+#define __NR_fstat 108
+#define __NR_olduname 109
+#define __NR_iopl 110
+#define __NR_vhangup 111
+#define __NR_idle 112
+#define __NR_vm86 113
+#define __NR_wait4 114
+#define __NR_swapoff 115
+#define __NR_sysinfo 116
+#define __NR_ipc 117
+#define __NR_fsync 118
+#define __NR_sigreturn 119
+#define __NR_clone 120
+#define __NR_setdomainname 121
+#define __NR_uname 122
+#define __NR_modify_ldt 123
+#define __NR_adjtimex 124
+#define __NR_mprotect 125
+#define __NR_sigprocmask 126
+#define __NR_create_module 127
+#define __NR_init_module 128
+#define __NR_delete_module 129
+#define __NR_get_kernel_syms 130
+#define __NR_quotactl 131
+#define __NR_getpgid 132
+#define __NR_fchdir 133
+#define __NR_bdflush 134
+#define __NR_sysfs 135
+#define __NR_personality 136
+#define __NR_afs_syscall 137
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#define __NR__llseek 140
+#define __NR_getdents 141
+#define __NR__newselect 142
+#define __NR_flock 143
+#define __NR_msync 144
+#define __NR_readv 145
+#define __NR_writev 146
+#define __NR_getsid 147
+#define __NR_fdatasync 148
+#define __NR__sysctl 149
+#define __NR_mlock 150
+#define __NR_munlock 151
+#define __NR_mlockall 152
+#define __NR_munlockall 153
+#define __NR_sched_setparam 154
+#define __NR_sched_getparam 155
+#define __NR_sched_setscheduler 156
+#define __NR_sched_getscheduler 157
+#define __NR_sched_yield 158
+#define __NR_sched_get_priority_max 159
+#define __NR_sched_get_priority_min 160
+#define __NR_sched_rr_get_interval 161
+#define __NR_nanosleep 162
+#define __NR_mremap 163
+#define __NR_setresuid 164
+#define __NR_getresuid 165
+#define __NR_query_module 166
+#define __NR_poll 167
+#define __NR_nfsservctl 168
+#define __NR_setresgid 169
+#define __NR_getresgid 170
+#define __NR_prctl 171
+#define __NR_rt_sigreturn 172
+#define __NR_rt_sigaction 173
+#define __NR_rt_sigprocmask 174
+#define __NR_rt_sigpending 175
+#define __NR_rt_sigtimedwait 176
+#define __NR_rt_sigqueueinfo 177
+#define __NR_rt_sigsuspend 178
+#define __NR_pread64 179
+#define __NR_pwrite64 180
+#define __NR_chown 181
+#define __NR_getcwd 182
+#define __NR_capget 183
+#define __NR_capset 184
+#define __NR_sigaltstack 185
+#define __NR_sendfile 186
+#define __NR_getpmsg 187
+#define __NR_putpmsg 188
+#define __NR_vfork 189
+#define __NR_ugetrlimit 190
+#define __NR_readahead 191
+#define __NR_pciconfig_read 198
+#define __NR_pciconfig_write 199
+#define __NR_pciconfig_iobase 200
+#define __NR_multiplexer 201
+#define __NR_getdents64 202
+#define __NR_pivot_root 203
+#define __NR_madvise 205
+#define __NR_mincore 206
+#define __NR_gettid 207
+#define __NR_tkill 208
+#define __NR_setxattr 209
+#define __NR_lsetxattr 210
+#define __NR_fsetxattr 211
+#define __NR_getxattr 212
+#define __NR_lgetxattr 213
+#define __NR_fgetxattr 214
+#define __NR_listxattr 215
+#define __NR_llistxattr 216
+#define __NR_flistxattr 217
+#define __NR_removexattr 218
+#define __NR_lremovexattr 219
+#define __NR_fremovexattr 220
+#define __NR_futex 221
+#define __NR_sched_setaffinity 222
+#define __NR_sched_getaffinity 223
+#define __NR_tuxcall 225
+#define __NR_io_setup 227
+#define __NR_io_destroy 228
+#define __NR_io_getevents 229
+#define __NR_io_submit 230
+#define __NR_io_cancel 231
+#define __NR_set_tid_address 232
+#define __NR_fadvise64 233
+#define __NR_exit_group 234
+#define __NR_lookup_dcookie 235
+#define __NR_epoll_create 236
+#define __NR_epoll_ctl 237
+#define __NR_epoll_wait 238
+#define __NR_remap_file_pages 239
+#define __NR_timer_create 240
+#define __NR_timer_settime 241
+#define __NR_timer_gettime 242
+#define __NR_timer_getoverrun 243
+#define __NR_timer_delete 244
+#define __NR_clock_settime 245
+#define __NR_clock_gettime 246
+#define __NR_clock_getres 247
+#define __NR_clock_nanosleep 248
+#define __NR_swapcontext 249
+#define __NR_tgkill 250
+#define __NR_utimes 251
+#define __NR_statfs64 252
+#define __NR_fstatfs64 253
+#define __NR_rtas 255
+#define __NR_sys_debug_setcontext 256
+#define __NR_migrate_pages 258
+#define __NR_mbind 259
+#define __NR_get_mempolicy 260
+#define __NR_set_mempolicy 261
+#define __NR_mq_open 262
+#define __NR_mq_unlink 263
+#define __NR_mq_timedsend 264
+#define __NR_mq_timedreceive 265
+#define __NR_mq_notify 266
+#define __NR_mq_getsetattr 267
+#define __NR_kexec_load 268
+#define __NR_add_key 269
+#define __NR_request_key 270
+#define __NR_keyctl 271
+#define __NR_waitid 272
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#define __NR_inotify_init 275
+#define __NR_inotify_add_watch 276
+#define __NR_inotify_rm_watch 277
+#define __NR_spu_run 278
+#define __NR_spu_create 279
+#define __NR_pselect6 280
+#define __NR_ppoll 281
+#define __NR_unshare 282
+#define __NR_splice 283
+#define __NR_tee 284
+#define __NR_vmsplice 285
+#define __NR_openat 286
+#define __NR_mkdirat 287
+#define __NR_mknodat 288
+#define __NR_fchownat 289
+#define __NR_futimesat 290
+#define __NR_newfstatat 291
+#define __NR_unlinkat 292
+#define __NR_renameat 293
+#define __NR_linkat 294
+#define __NR_symlinkat 295
+#define __NR_readlinkat 296
+#define __NR_fchmodat 297
+#define __NR_faccessat 298
+#define __NR_get_robust_list 299
+#define __NR_set_robust_list 300
+#define __NR_move_pages 301
+#define __NR_getcpu 302
+#define __NR_epoll_pwait 303
+#define __NR_utimensat 304
+#define __NR_signalfd 305
+#define __NR_timerfd_create 306
+#define __NR_eventfd 307
+#define __NR_sync_file_range2 308
+#define __NR_fallocate 309
+#define __NR_subpage_prot 310
+#define __NR_timerfd_settime 311
+#define __NR_timerfd_gettime 312
+#define __NR_signalfd4 313
+#define __NR_eventfd2 314
+#define __NR_epoll_create1 315
+#define __NR_dup3 316
+#define __NR_pipe2 317
+#define __NR_inotify_init1 318
+#define __NR_perf_event_open 319
+#define __NR_preadv 320
+#define __NR_pwritev 321
+#define __NR_rt_tgsigqueueinfo 322
+#define __NR_fanotify_init 323
+#define __NR_fanotify_mark 324
+#define __NR_prlimit64 325
+#define __NR_socket 326
+#define __NR_bind 327
+#define __NR_connect 328
+#define __NR_listen 329
+#define __NR_accept 330
+#define __NR_getsockname 331
+#define __NR_getpeername 332
+#define __NR_socketpair 333
+#define __NR_send 334
+#define __NR_sendto 335
+#define __NR_recv 336
+#define __NR_recvfrom 337
+#define __NR_shutdown 338
+#define __NR_setsockopt 339
+#define __NR_getsockopt 340
+#define __NR_sendmsg 341
+#define __NR_recvmsg 342
+#define __NR_recvmmsg 343
+#define __NR_accept4 344
+#define __NR_name_to_handle_at 345
+#define __NR_open_by_handle_at 346
+#define __NR_clock_adjtime 347
+#define __NR_syncfs 348
+#define __NR_sendmmsg 349
+#define __NR_setns 350
+#define __NR_process_vm_readv 351
+#define __NR_process_vm_writev 352
+#define __NR_finit_module 353
+#define __NR_kcmp 354
+#define __NR_sched_setattr 355
+#define __NR_sched_getattr 356
+#define __NR_renameat2 357
+#define __NR_seccomp 358
+#define __NR_getrandom 359
+#define __NR_memfd_create 360
+#define __NR_bpf 361
+#define __NR_execveat 362
+#define __NR_switch_endian 363
+#define __NR_userfaultfd 364
+#define __NR_membarrier 365
+#define __NR_mlock2 378
+#define __NR_copy_file_range 379
+#define __NR_preadv2 380
+#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
+#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
+#define __NR_rseq 387
+#define __NR_io_pgetevents 388
+#define __NR_semtimedop 392
+#define __NR_semget 393
+#define __NR_semctl 394
+#define __NR_shmget 395
+#define __NR_shmctl 396
+#define __NR_shmat 397
+#define __NR_shmdt 398
+#define __NR_msgget 399
+#define __NR_msgsnd 400
+#define __NR_msgrcv 401
+#define __NR_msgctl 402
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 
-#endif /* _ASM_POWERPC_UNISTD_64_H */
+#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
index c94d2c3a22d..0c3cd299e42 100644
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -414,5 +414,10 @@
 #define __NR_faccessat2 439
 #define __NR_process_madvise 440
 #define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 #endif /* _ASM_S390_UNISTD_32_H */
diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
index 984a06b7ebe..8dfc08b5e62 100644
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -362,5 +362,10 @@
 #define __NR_faccessat2 439
 #define __NR_process_madvise 440
 #define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 #endif /* _ASM_S390_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 8e76d3701db..a6c327f8ad9 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -112,6 +112,7 @@ struct kvm_ioapic_state {
 #define KVM_NR_IRQCHIPS          3
 
 #define KVM_RUN_X86_SMM		 (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK     (1 << 1)
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
@@ -158,6 +159,19 @@ struct kvm_sregs {
 	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
 };
 
+struct kvm_sregs2 {
+	/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 flags;
+	__u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
 /* for KVM_GET_FPU and KVM_SET_FPU */
 struct kvm_fpu {
 	__u8  fpr[8][16];
@@ -436,6 +450,8 @@ struct kvm_vmx_nested_state_hdr {
 		__u16 flags;
 	} smm;
 
+	__u16 pad;
+
 	__u32 flags;
 	__u64 preemption_timer_deadline;
 };
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index 18fb99dfa28..66e96c0c685 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_32_H
-#define _ASM_X86_UNISTD_32_H 1
+#ifndef _ASM_UNISTD_32_H
+#define _ASM_UNISTD_32_H
 
 #define __NR_restart_syscall 0
 #define __NR_exit 1
@@ -432,6 +432,11 @@
 #define __NR_faccessat2 439
 #define __NR_process_madvise 440
 #define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 
-#endif /* _ASM_X86_UNISTD_32_H */
+#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index bde959328d6..b8ff6f14ee8 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_64_H
-#define _ASM_X86_UNISTD_64_H 1
+#ifndef _ASM_UNISTD_64_H
+#define _ASM_UNISTD_64_H
 
 #define __NR_read 0
 #define __NR_write 1
@@ -354,6 +354,11 @@
 #define __NR_faccessat2 439
 #define __NR_process_madvise 440
 #define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
 
 
-#endif /* _ASM_X86_UNISTD_64_H */
+#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index 4ff6b17d3bb..06a1097c15e 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_X86_UNISTD_X32_H
-#define _ASM_X86_UNISTD_X32_H 1
+#ifndef _ASM_UNISTD_X32_H
+#define _ASM_UNISTD_X32_H
 
 #define __NR_read (__X32_SYSCALL_BIT + 0)
 #define __NR_write (__X32_SYSCALL_BIT + 1)
@@ -307,6 +307,11 @@
 #define __NR_faccessat2 (__X32_SYSCALL_BIT + 439)
 #define __NR_process_madvise (__X32_SYSCALL_BIT + 440)
 #define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441)
+#define __NR_mount_setattr (__X32_SYSCALL_BIT + 442)
+#define __NR_quotactl_fd (__X32_SYSCALL_BIT + 443)
+#define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444)
+#define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445)
+#define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
@@ -345,4 +350,4 @@
 #define __NR_pwritev2 (__X32_SYSCALL_BIT + 547)
 
 
-#endif /* _ASM_X86_UNISTD_X32_H */
+#endif /* _ASM_UNISTD_X32_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 020b62a619a..bcaf66cc4d2 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -8,6 +8,7 @@
  * Note: you must update KVM_API_VERSION if you change this interface.
  */
 
+#include <linux/const.h>
 #include <linux/types.h>
 
 #include <linux/ioctl.h>
@@ -216,6 +217,20 @@ struct kvm_hyperv_exit {
 	} u;
 };
 
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL          1
+	__u32 type;
+	union {
+		struct {
+			__u32 longmode;
+			__u32 cpl;
+			__u64 input;
+			__u64 result;
+			__u64 params[6];
+		} hcall;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -251,6 +266,9 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
+#define KVM_EXIT_X86_BUS_LOCK     33
+#define KVM_EXIT_XEN              34
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -262,6 +280,9 @@ struct kvm_hyperv_exit {
 /* Encounter unexpected vm-exit reason */
 #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON	4
 
+/* Flags that describe what fields in emulation_failure hold valid data. */
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
@@ -365,6 +386,25 @@ struct kvm_run {
 			__u32 ndata;
 			__u64 data[16];
 		} internal;
+		/*
+		 * KVM_INTERNAL_ERROR_EMULATION
+		 *
+		 * "struct emulation_failure" is an overlay of "struct internal"
+		 * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of
+		 * KVM_EXIT_INTERNAL_ERROR.  Note, unlike other internal error
+		 * sub-types, this struct is ABI!  It also needs to be backwards
+		 * compatible with "struct internal".  Take special care that
+		 * "ndata" is correct, that new fields are enumerated in "flags",
+		 * and that each flag enumerates fields that are 64-bit aligned
+		 * and sized (so that ndata+internal.data[] is valid/accurate).
+		 */
+		struct {
+			__u32 suberror;
+			__u32 ndata;
+			__u64 flags;
+			__u8  insn_size;
+			__u8  insn_bytes[15];
+		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {
 			__u64 gprs[32];
@@ -427,6 +467,8 @@ struct kvm_run {
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
 		} msr;
+		/* KVM_EXIT_XEN */
+		struct kvm_xen_exit xen;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -573,6 +615,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
 	__u32 mp_state;
@@ -1056,6 +1099,19 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_X86_BUS_LOCK_EXIT 193
+#define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_SET_GUEST_DEBUG2 195
+#define KVM_CAP_SGX_ATTRIBUTE 196
+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
+#define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_HYPERV_ENFORCE_CPUID 199
+#define KVM_CAP_SREGS2 200
+#define KVM_CAP_EXIT_HYPERCALL 201
+#define KVM_CAP_PPC_RPT_INVALIDATE 202
+#define KVM_CAP_BINARY_STATS_FD 203
+#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
+#define KVM_CAP_ARM_MTE 205
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1129,6 +1185,11 @@ struct kvm_x86_mce {
 #endif
 
 #ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
+
 struct kvm_xen_hvm_config {
 	__u32 flags;
 	__u32 msr;
@@ -1396,6 +1457,7 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_PMU_EVENT_FILTER */
 #define KVM_SET_PMU_EVENT_FILTER  _IOW(KVMIO,  0xb2, struct kvm_pmu_event_filter)
 #define KVM_PPC_SVM_OFF		  _IO(KVMIO,  0xb3)
+#define KVM_ARM_MTE_COPY_TAGS	  _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
@@ -1563,6 +1625,60 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_DIRTY_LOG_RING */
 #define KVM_RESET_DIRTY_RINGS		_IO(KVMIO, 0xc7)
 
+/* Per-VM Xen attributes */
+#define KVM_XEN_HVM_GET_ATTR	_IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
+#define KVM_XEN_HVM_SET_ATTR	_IOW(KVMIO,  0xc9, struct kvm_xen_hvm_attr)
+
+struct kvm_xen_hvm_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u8 long_mode;
+		__u8 vector;
+		struct {
+			__u64 gfn;
+		} shared_info;
+		__u64 pad[8];
+	} u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x2
+
+/* Per-vCPU Xen attributes */
+#define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
+#define KVM_XEN_VCPU_SET_ATTR	_IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
+
+#define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
+#define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
+
+struct kvm_xen_vcpu_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u64 gpa;
+		__u64 pad[8];
+		struct {
+			__u64 state;
+			__u64 state_entry_time;
+			__u64 time_running;
+			__u64 time_runnable;
+			__u64 time_blocked;
+			__u64 time_offline;
+		} runstate;
+	} u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO	0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO	0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR	0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT	0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA	0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST	0x5
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -1591,6 +1707,10 @@ enum sev_cmd_id {
 	KVM_SEV_DBG_ENCRYPT,
 	/* Guest certificates commands */
 	KVM_SEV_CERT_EXPORT,
+	/* Attestation report */
+	KVM_SEV_GET_ATTESTATION_REPORT,
+	/* Guest Migration Extension */
+	KVM_SEV_SEND_CANCEL,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1643,6 +1763,51 @@ struct kvm_sev_dbg {
 	__u32 len;
 };
 
+struct kvm_sev_attestation_report {
+	__u8 mnonce[16];
+	__u64 uaddr;
+	__u32 len;
+};
+
+struct kvm_sev_send_start {
+	__u32 policy;
+	__u64 pdh_cert_uaddr;
+	__u32 pdh_cert_len;
+	__u64 plat_certs_uaddr;
+	__u32 plat_certs_len;
+	__u64 amd_certs_uaddr;
+	__u32 amd_certs_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 pdh_uaddr;
+	__u32 pdh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
@@ -1748,8 +1913,8 @@ struct kvm_hyperv_eventfd {
  * conversion after harvesting an entry.  Also, it must not skip any
  * dirty bits, so that dirty bits are always harvested in sequence.
  */
-#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
-#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_DIRTY           _BITUL(0)
+#define KVM_DIRTY_GFN_F_RESET           _BITUL(1)
 #define KVM_DIRTY_GFN_F_MASK            0x3
 
 /*
@@ -1764,4 +1929,79 @@ struct kvm_dirty_gfn {
 	__u64 offset;
 };
 
+#define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
+#define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
+
+/**
+ * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
+ * @flags: Some extra information for header, always 0 for now.
+ * @name_size: The size in bytes of the memory which contains statistics
+ *             name string including trailing '\0'. The memory is allocated
+ *             at the send of statistics descriptor.
+ * @num_desc: The number of statistics the vm or vcpu has.
+ * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed
+ *             by vm/vcpu stats fd.
+ * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file
+ *               pointd by vm/vcpu stats fd.
+ * @data_offset: The offset of the vm/vcpu stats' data block in the file
+ *               pointed by vm/vcpu stats fd.
+ *
+ * This is the header userspace needs to read from stats fd before any other
+ * readings. It is used by userspace to discover all the information about the
+ * vm/vcpu's binary statistics.
+ * Userspace reads this header from the start of the vm/vcpu's stats fd.
+ */
+struct kvm_stats_header {
+	__u32 flags;
+	__u32 name_size;
+	__u32 num_desc;
+	__u32 id_offset;
+	__u32 desc_offset;
+	__u32 data_offset;
+};
+
+#define KVM_STATS_TYPE_SHIFT		0
+#define KVM_STATS_TYPE_MASK		(0xF << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_PEAK
+
+#define KVM_STATS_UNIT_SHIFT		4
+#define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_NONE		(0x0 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BYTES		(0x1 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_SECONDS		(0x2 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_CYCLES		(0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX		KVM_STATS_UNIT_CYCLES
+
+#define KVM_STATS_BASE_SHIFT		8
+#define KVM_STATS_BASE_MASK		(0xF << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW10		(0x0 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW2		(0x1 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_MAX		KVM_STATS_BASE_POW2
+
+/**
+ * struct kvm_stats_desc - Descriptor of a KVM statistics.
+ * @flags: Annotations of the stats, like type, unit, etc.
+ * @exponent: Used together with @flags to determine the unit.
+ * @size: The number of data items for this stats.
+ *        Every data item is of type __u64.
+ * @offset: The offset of the stats to the start of stat structure in
+ *          struture kvm or kvm_vcpu.
+ * @unused: Unused field for future usage. Always 0 for now.
+ * @name: The name string for the stats. Its size is indicated by the
+ *        &kvm_stats_header->name_size.
+ */
+struct kvm_stats_desc {
+	__u32 flags;
+	__s16 exponent;
+	__u16 size;
+	__u32 offset;
+	__u32 unused;
+	char name[];
+};
+
+#define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
+
 #endif /* __LINUX_KVM_H */
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 1ba9a9feeb8..8479af5f4c7 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -19,15 +19,20 @@
  * means the userland is reading).
  */
 #define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING |	\
+				 UFFDIO_REGISTER_MODE_WP |	\
+				 UFFDIO_REGISTER_MODE_MINOR)
 #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP |	\
 			   UFFD_FEATURE_EVENT_FORK |		\
 			   UFFD_FEATURE_EVENT_REMAP |		\
-			   UFFD_FEATURE_EVENT_REMOVE |	\
+			   UFFD_FEATURE_EVENT_REMOVE |		\
 			   UFFD_FEATURE_EVENT_UNMAP |		\
 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
 			   UFFD_FEATURE_MISSING_SHMEM |		\
 			   UFFD_FEATURE_SIGBUS |		\
-			   UFFD_FEATURE_THREAD_ID)
+			   UFFD_FEATURE_THREAD_ID |		\
+			   UFFD_FEATURE_MINOR_HUGETLBFS |	\
+			   UFFD_FEATURE_MINOR_SHMEM)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -36,10 +41,12 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE |		\
-	 (__u64)1 << _UFFDIO_WRITEPROTECT)
+	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
+	 (__u64)1 << _UFFDIO_CONTINUE)
 #define UFFD_API_RANGE_IOCTLS_BASIC		\
 	((__u64)1 << _UFFDIO_WAKE |		\
-	 (__u64)1 << _UFFDIO_COPY)
+	 (__u64)1 << _UFFDIO_COPY |		\
+	 (__u64)1 << _UFFDIO_CONTINUE)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -55,6 +62,7 @@
 #define _UFFDIO_COPY			(0x03)
 #define _UFFDIO_ZEROPAGE		(0x04)
 #define _UFFDIO_WRITEPROTECT		(0x06)
+#define _UFFDIO_CONTINUE		(0x07)
 #define _UFFDIO_API			(0x3F)
 
 /* userfaultfd ioctl ids */
@@ -73,6 +81,8 @@
 				      struct uffdio_zeropage)
 #define UFFDIO_WRITEPROTECT	_IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
 				      struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE		_IOWR(UFFDIO, _UFFDIO_CONTINUE,	\
+				      struct uffdio_continue)
 
 /* read() structure */
 struct uffd_msg {
@@ -127,6 +137,7 @@ struct uffd_msg {
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
 #define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR	(1<<2)	/* If reason is VM_UFFD_MINOR */
 
 struct uffdio_api {
 	/* userland asks for an API number and the features to enable */
@@ -171,6 +182,13 @@ struct uffdio_api {
 	 *
 	 * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
 	 * be returned, if feature is not requested 0 will be returned.
+	 *
+	 * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+	 * can be intercepted (via REGISTER_MODE_MINOR) for
+	 * hugetlbfs-backed pages.
+	 *
+	 * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+	 * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -181,6 +199,8 @@ struct uffdio_api {
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
 #define UFFD_FEATURE_SIGBUS			(1<<7)
 #define UFFD_FEATURE_THREAD_ID			(1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS		(1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM		(1<<10)
 	__u64 features;
 
 	__u64 ioctls;
@@ -195,6 +215,7 @@ struct uffdio_register {
 	struct uffdio_range range;
 #define UFFDIO_REGISTER_MODE_MISSING	((__u64)1<<0)
 #define UFFDIO_REGISTER_MODE_WP		((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR	((__u64)1<<2)
 	__u64 mode;
 
 	/*
@@ -257,6 +278,18 @@ struct uffdio_writeprotect {
 	__u64 mode;
 };
 
+struct uffdio_continue {
+	struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE		((__u64)1<<0)
+	__u64 mode;
+
+	/*
+	 * Fields below here are written by the ioctl and must be at the end:
+	 * the copy_from_user will not read past here.
+	 */
+	__s64 mapped;
+};
+
 /*
  * Flags for the userfaultfd(2) system call itself.
  */
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 609099e455c..e680594f27b 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -46,6 +46,12 @@
  */
 #define VFIO_NOIOMMU_IOMMU		8
 
+/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
+#define VFIO_UNMAP_ALL			9
+
+/* Supports the vaddr flag for DMA map and unmap */
+#define VFIO_UPDATE_VADDR		10
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -329,6 +335,8 @@ struct vfio_region_info_cap_type {
 /* 10de vendor PCI sub-types */
 /*
  * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ *
+ * Deprecated, region no longer provided
  */
 #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
 
@@ -336,6 +344,8 @@ struct vfio_region_info_cap_type {
 /*
  * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
  * to do TLB invalidation on a GPU.
+ *
+ * Deprecated, region no longer provided
  */
 #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
 
@@ -635,6 +645,8 @@ struct vfio_device_migration_info {
  * Capability with compressed real address (aka SSA - small system address)
  * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
  * and by the userspace to associate a NVLink bridge with a GPU.
+ *
+ * Deprecated, capability no longer provided
  */
 #define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT	4
 
@@ -649,6 +661,8 @@ struct vfio_region_info_cap_nvlink2_ssatgt {
  * property in the device tree. The value is fixed in the hardware
  * and failing to provide the correct value results in the link
  * not working with no indication from the driver why.
+ *
+ * Deprecated, capability no longer provided
  */
 #define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD	5
 
@@ -1074,12 +1088,22 @@ struct vfio_iommu_type1_info_dma_avail {
  *
  * Map process virtual addresses to IO virtual addresses using the
  * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ *
+ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
+ * unblock translation of host virtual addresses in the iova range.  The vaddr
+ * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR.  To
+ * maintain memory consistency within the user application, the updated vaddr
+ * must address the same memory object as originally mapped.  Failure to do so
+ * will result in user memory corruption and/or device misbehavior.  iova and
+ * size must match those in the original MAP_DMA call.  Protection is not
+ * changed, and the READ & WRITE flags must be 0.
  */
 struct vfio_iommu_type1_dma_map {
 	__u32	argsz;
 	__u32	flags;
 #define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
 #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
 	__u64	vaddr;				/* Process virtual address */
 	__u64	iova;				/* IO virtual address */
 	__u64	size;				/* Size of mapping (bytes) */
@@ -1102,6 +1126,7 @@ struct vfio_bitmap {
  * field.  No guarantee is made to the user that arbitrary unmaps of iova
  * or size different from those used in the original mapping call will
  * succeed.
+ *
  * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
  * before unmapping IO virtual addresses. When this flag is set, the user must
  * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
@@ -1111,11 +1136,21 @@ struct vfio_bitmap {
  * indicates that the page at that offset from iova is dirty. A Bitmap of the
  * pages in the range of unmapped size is returned in the user-provided
  * vfio_bitmap.data.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses.  iova and size
+ * must be 0.  This cannot be combined with the get-dirty-bitmap flag.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
+ * virtual addresses in the iova range.  Tasks that attempt to translate an
+ * iova's vaddr will block.  DMA to already-mapped pages continues.  This
+ * cannot be combined with the get-dirty-bitmap flag.
  */
 struct vfio_iommu_type1_dma_unmap {
 	__u32	argsz;
 	__u32	flags;
 #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+#define VFIO_DMA_UNMAP_FLAG_ALL		     (1 << 1)
+#define VFIO_DMA_UNMAP_FLAG_VADDR	     (1 << 2)
 	__u64	iova;				/* IO virtual address */
 	__u64	size;				/* Size of mapping (bytes) */
 	__u8    data[];
diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index ee72a1c20f0..97e0728b679 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 #include "qemu/guest-random.h"
 #include "semihosting/common-semi.h"
 #include "target/arm/syndrome.h"
@@ -77,9 +79,8 @@
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = env_cpu(env);
-    int trapnr, ec, fsc;
+    int trapnr, ec, fsc, si_code, si_signo;
     abi_long ret;
-    target_siginfo_t info;
 
     for (;;) {
         cpu_exec_start(cs);
@@ -108,18 +109,10 @@ void cpu_loop(CPUARMState *env)
             /* just indicate that signals should be handled asap */
             break;
         case EXCP_UDEF:
-            info.si_signo = TARGET_SIGILL;
-            info.si_errno = 0;
-            info.si_code = TARGET_ILL_ILLOPN;
-            info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->pc);
             break;
         case EXCP_PREFETCH_ABORT:
         case EXCP_DATA_ABORT:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info._sifields._sigfault._addr = env->exception.vaddress;
-
             /* We should only arrive here with EC in {DATAABORT, INSNABORT}. */
             ec = syn_get_ec(env->exception.syndrome);
             assert(ec == EC_DATAABORT || ec == EC_INSNABORT);
@@ -128,27 +121,30 @@ void cpu_loop(CPUARMState *env)
             fsc = extract32(env->exception.syndrome, 0, 6);
             switch (fsc) {
             case 0x04 ... 0x07: /* Translation fault, level {0-3} */
-                info.si_code = TARGET_SEGV_MAPERR;
+                si_signo = TARGET_SIGSEGV;
+                si_code = TARGET_SEGV_MAPERR;
                 break;
             case 0x09 ... 0x0b: /* Access flag fault, level {1-3} */
             case 0x0d ... 0x0f: /* Permission fault, level {1-3} */
-                info.si_code = TARGET_SEGV_ACCERR;
+                si_signo = TARGET_SIGSEGV;
+                si_code = TARGET_SEGV_ACCERR;
                 break;
             case 0x11: /* Synchronous Tag Check Fault */
-                info.si_code = TARGET_SEGV_MTESERR;
+                si_signo = TARGET_SIGSEGV;
+                si_code = TARGET_SEGV_MTESERR;
+                break;
+            case 0x21: /* Alignment fault */
+                si_signo = TARGET_SIGBUS;
+                si_code = TARGET_BUS_ADRALN;
                 break;
             default:
                 g_assert_not_reached();
             }
-
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            force_sig_fault(si_signo, si_code, env->exception.vaddress);
             break;
         case EXCP_DEBUG:
         case EXCP_BKPT:
-            info.si_signo = TARGET_SIGTRAP;
-            info.si_errno = 0;
-            info.si_code = TARGET_TRAP_BRKPT;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->pc);
             break;
         case EXCP_SEMIHOST:
             env->xregs[0] = do_common_semihosting(cs);
@@ -168,11 +164,7 @@ void cpu_loop(CPUARMState *env)
         /* Check for MTE asynchronous faults */
         if (unlikely(env->cp15.tfsr_el[0])) {
             env->cp15.tfsr_el[0] = 0;
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info._sifields._sigfault._addr = 0;
-            info.si_code = TARGET_SEGV_MTEAERR;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MTEAERR, 0);
         }
 
         process_pending_signals(env);
diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index b591790c22e..29c52db3f13 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -108,7 +109,6 @@ struct target_rt_sigframe {
 struct target_rt_frame_record {
     uint64_t fp;
     uint64_t lr;
-    uint32_t tramp[2];
 };
 
 static void target_setup_general_frame(struct target_rt_sigframe *sf,
@@ -460,9 +460,9 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
     layout.total_size = MAX(layout.total_size,
                             sizeof(struct target_rt_sigframe));
 
-    /* Reserve space for the return code.  On a real system this would
-     * be within the VDSO.  So, despite the name this is not a "real"
-     * record within the frame.
+    /*
+     * Reserve space for the standard frame unwind pair: fp, lr.
+     * Despite the name this is not a "real" record within the frame.
      */
     fr_ofs = layout.total_size;
     layout.total_size += sizeof(struct target_rt_frame_record);
@@ -495,15 +495,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         return_addr = ka->sa_restorer;
     } else {
-        /*
-         * mov x8,#__NR_rt_sigreturn; svc #0
-         * Since these are instructions they need to be put as little-endian
-         * regardless of target default or current CPU endianness.
-         */
-        __put_user_e(0xd2801168, &fr->tramp[0], le);
-        __put_user_e(0xd4000001, &fr->tramp[1], le);
-        return_addr = frame_addr + fr_ofs
-            + offsetof(struct target_rt_frame_record, tramp);
+        return_addr = default_rt_sigreturn;
     }
     env->xregs[0] = usig;
     env->xregs[29] = frame_addr + fr_ofs;
@@ -561,11 +553,7 @@ long do_rt_sigreturn(CPUARMState *env)
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr +
-            offsetof(struct target_rt_sigframe, uc.tuc_stack),
-            0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -580,3 +568,20 @@ long do_sigreturn(CPUARMState *env)
 {
     return do_rt_sigreturn(env);
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0);
+    assert(tramp != NULL);
+
+    /*
+     * mov x8,#__NR_rt_sigreturn; svc #0
+     * Since these are instructions they need to be put as little-endian
+     * regardless of target default or current CPU endianness.
+     */
+    __put_user_e(0xd2801168, &tramp[0], le);
+    __put_user_e(0xd4000001, &tramp[1], le);
+
+    default_rt_sigreturn = sigtramp_page;
+    unlock_user(tramp, sigtramp_page, 8);
+}
diff --git a/linux-user/aarch64/syscall_nr.h b/linux-user/aarch64/syscall_nr.h
index 6fd5b331e78..12ef002d60f 100644
--- a/linux-user/aarch64/syscall_nr.h
+++ b/linux-user/aarch64/syscall_nr.h
@@ -302,6 +302,12 @@
 #define TARGET_NR_openat2 437
 #define TARGET_NR_pidfd_getfd 438
 #define TARGET_NR_faccessat2 439
-#define TARGET_NR_syscalls 440
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_AARCH64_SYSCALL_NR_H */
diff --git a/linux-user/aarch64/target_errno_defs.h b/linux-user/aarch64/target_errno_defs.h
new file mode 100644
index 00000000000..461b5477284
--- /dev/null
+++ b/linux-user/aarch64/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef AARCH64_TARGET_ERRNO_DEFS_H
+#define AARCH64_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/aarch64/target_signal.h b/linux-user/aarch64/target_signal.h
index 18013e1b235..7580d99403c 100644
--- a/linux-user/aarch64/target_signal.h
+++ b/linux-user/aarch64/target_signal.h
@@ -25,4 +25,6 @@ typedef struct target_sigaltstack {
 #define TARGET_SEGV_MTESERR  9  /* Synchronous ARM MTE exception */
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* AARCH64_TARGET_SIGNAL_H */
diff --git a/linux-user/alpha/cpu_loop.c b/linux-user/alpha/cpu_loop.c
index 7ce2461a028..4029849d5cd 100644
--- a/linux-user/alpha/cpu_loop.c
+++ b/linux-user/alpha/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUAlphaState *env)
 {
@@ -52,21 +54,6 @@ void cpu_loop(CPUAlphaState *env)
             fprintf(stderr, "External interrupt. Exit\n");
             exit(EXIT_FAILURE);
             break;
-        case EXCP_MMFAULT:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID
-                            ? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR);
-            info._sifields._sigfault._addr = env->trap_arg0;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
-        case EXCP_UNALIGN:
-            info.si_signo = TARGET_SIGBUS;
-            info.si_errno = 0;
-            info.si_code = TARGET_BUS_ADRALN;
-            info._sifields._sigfault._addr = env->trap_arg0;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_OPCDEC:
         do_sigill:
             info.si_signo = TARGET_SIGILL;
diff --git a/linux-user/alpha/signal.c b/linux-user/alpha/signal.c
index c5c27ce0841..bbe3dd175a7 100644
--- a/linux-user/alpha/signal.c
+++ b/linux-user/alpha/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -54,13 +55,11 @@ struct target_ucontext {
 
 struct target_sigframe {
     struct target_sigcontext sc;
-    unsigned int retcode[3];
 };
 
 struct target_rt_sigframe {
     target_siginfo_t info;
     struct target_ucontext uc;
-    unsigned int retcode[3];
 };
 
 #define INSN_MOV_R30_R16        0x47fe0410
@@ -138,15 +137,10 @@ void setup_frame(int sig, struct target_sigaction *ka,
 
     setup_sigcontext(&frame->sc, env, frame_addr, set);
 
-    if (ka->sa_restorer) {
-        r26 = ka->sa_restorer;
+    if (ka->ka_restorer) {
+        r26 = ka->ka_restorer;
     } else {
-        __put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
-        __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn,
-                   &frame->retcode[1]);
-        __put_user(INSN_CALLSYS, &frame->retcode[2]);
-        /* imb() */
-        r26 = frame_addr + offsetof(struct target_sigframe, retcode);
+        r26 = default_sigreturn;
     }
 
     unlock_user_struct(frame, frame_addr, 1);
@@ -192,15 +186,10 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
     }
 
-    if (ka->sa_restorer) {
-        r26 = ka->sa_restorer;
+    if (ka->ka_restorer) {
+        r26 = ka->ka_restorer;
     } else {
-        __put_user(INSN_MOV_R30_R16, &frame->retcode[0]);
-        __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn,
-                   &frame->retcode[1]);
-        __put_user(INSN_CALLSYS, &frame->retcode[2]);
-        /* imb(); */
-        r26 = frame_addr + offsetof(struct target_sigframe, retcode);
+        r26 = default_rt_sigreturn;
     }
 
     if (err) {
@@ -257,11 +246,7 @@ long do_rt_sigreturn(CPUAlphaState *env)
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-                                             uc.tuc_stack),
-                       0, env->ir[IR_SP]) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -272,3 +257,21 @@ long do_rt_sigreturn(CPUAlphaState *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6 * 4, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    __put_user(INSN_MOV_R30_R16, &tramp[0]);
+    __put_user(INSN_LDI_R0 + TARGET_NR_sigreturn, &tramp[1]);
+    __put_user(INSN_CALLSYS, &tramp[2]);
+
+    default_rt_sigreturn = sigtramp_page + 3 * 4;
+    __put_user(INSN_MOV_R30_R16, &tramp[3]);
+    __put_user(INSN_LDI_R0 + TARGET_NR_rt_sigreturn, &tramp[4]);
+    __put_user(INSN_CALLSYS, &tramp[5]);
+
+    unlock_user(tramp, sigtramp_page, 6 * 4);
+}
diff --git a/linux-user/alpha/syscall.tbl b/linux-user/alpha/syscall.tbl
index ec8bed9e7b7..3000a2e8ee2 100644
--- a/linux-user/alpha/syscall.tbl
+++ b/linux-user/alpha/syscall.tbl
@@ -479,3 +479,10 @@
 547	common	openat2				sys_openat2
 548	common	pidfd_getfd			sys_pidfd_getfd
 549	common	faccessat2			sys_faccessat2
+550	common	process_madvise			sys_process_madvise
+551	common	epoll_pwait2			sys_epoll_pwait2
+552	common	mount_setattr			sys_mount_setattr
+# 553 reserved for quotactl_path
+554	common	landlock_create_ruleset		sys_landlock_create_ruleset
+555	common	landlock_add_rule		sys_landlock_add_rule
+556	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/alpha/target_errno_defs.h b/linux-user/alpha/target_errno_defs.h
new file mode 100644
index 00000000000..07924b13aaf
--- /dev/null
+++ b/linux-user/alpha/target_errno_defs.h
@@ -0,0 +1,204 @@
+#ifndef ALPHA_TARGET_ERRNO_DEFS_H
+#define ALPHA_TARGET_ERRNO_DEFS_H
+
+#include "../generic/target_errno_defs.h"
+
+/*
+ * Generic target errno overridden with definitions taken
+ * from asm-alpha/errno.h
+ */
+#undef TARGET_EWOULDBLOCK
+#define TARGET_EWOULDBLOCK      TARGET_EAGAIN
+#undef TARGET_EDEADLK
+#define TARGET_EDEADLK          11
+#undef TARGET_EAGAIN
+#define TARGET_EAGAIN           35
+#undef TARGET_EINPROGRESS
+#define TARGET_EINPROGRESS      36
+#undef TARGET_EALREADY
+#define TARGET_EALREADY         37
+#undef TARGET_ENOTSOCK
+#define TARGET_ENOTSOCK         38
+#undef TARGET_EDESTADDRREQ
+#define TARGET_EDESTADDRREQ     39
+#undef TARGET_EMSGSIZE
+#define TARGET_EMSGSIZE         40
+#undef TARGET_EPROTOTYPE
+#define TARGET_EPROTOTYPE       41
+#undef TARGET_ENOPROTOOPT
+#define TARGET_ENOPROTOOPT      42
+#undef TARGET_EPROTONOSUPPORT
+#define TARGET_EPROTONOSUPPORT  43
+#undef TARGET_ESOCKTNOSUPPORT
+#define TARGET_ESOCKTNOSUPPORT  44
+#undef TARGET_EOPNOTSUPP
+#define TARGET_EOPNOTSUPP       45
+#undef TARGET_EPFNOSUPPORT
+#define TARGET_EPFNOSUPPORT     46
+#undef TARGET_EAFNOSUPPORT
+#define TARGET_EAFNOSUPPORT     47
+#undef TARGET_EADDRINUSE
+#define TARGET_EADDRINUSE       48
+#undef TARGET_EADDRNOTAVAIL
+#define TARGET_EADDRNOTAVAIL    49
+#undef TARGET_ENETDOWN
+#define TARGET_ENETDOWN         50
+#undef TARGET_ENETUNREACH
+#define TARGET_ENETUNREACH      51
+#undef TARGET_ENETRESET
+#define TARGET_ENETRESET        52
+#undef TARGET_ECONNABORTED
+#define TARGET_ECONNABORTED     53
+#undef TARGET_ECONNRESET
+#define TARGET_ECONNRESET       54
+#undef TARGET_ENOBUFS
+#define TARGET_ENOBUFS          55
+#undef TARGET_EISCONN
+#define TARGET_EISCONN          56
+#undef TARGET_ENOTCONN
+#define TARGET_ENOTCONN         57
+#undef TARGET_ESHUTDOWN
+#define TARGET_ESHUTDOWN        58
+#undef TARGET_ETOOMANYREFS
+#define TARGET_ETOOMANYREFS     59
+#undef TARGET_ETIMEDOUT
+#define TARGET_ETIMEDOUT        60
+#undef TARGET_ECONNREFUSED
+#define TARGET_ECONNREFUSED     61
+#undef TARGET_ELOOP
+#define TARGET_ELOOP            62
+#undef TARGET_ENAMETOOLONG
+#define TARGET_ENAMETOOLONG     63
+#undef TARGET_EHOSTDOWN
+#define TARGET_EHOSTDOWN        64
+#undef TARGET_EHOSTUNREACH
+#define TARGET_EHOSTUNREACH     65
+#undef TARGET_ENOTEMPTY
+#define TARGET_ENOTEMPTY        66
+/* Unused                       67 */
+#undef TARGET_EUSERS
+#define TARGET_EUSERS           68
+#undef TARGET_EDQUOT
+#define TARGET_EDQUOT           69
+#undef TARGET_ESTALE
+#define TARGET_ESTALE           70
+#undef TARGET_EREMOTE
+#define TARGET_EREMOTE          71
+/* Unused                       72-76 */
+#undef TARGET_ENOLCK
+#define TARGET_ENOLCK           77
+#undef TARGET_ENOSYS
+#define TARGET_ENOSYS           78
+/* Unused                       79 */
+#undef TARGET_ENOMSG
+#define TARGET_ENOMSG           80
+#undef TARGET_EIDRM
+#define TARGET_EIDRM            81
+#undef TARGET_ENOSR
+#define TARGET_ENOSR            82
+#undef TARGET_ETIME
+#define TARGET_ETIME            83
+#undef TARGET_EBADMSG
+#define TARGET_EBADMSG          84
+#undef TARGET_EPROTO
+#define TARGET_EPROTO           85
+#undef TARGET_ENODATA
+#define TARGET_ENODATA          86
+#undef TARGET_ENOSTR
+#define TARGET_ENOSTR           87
+#undef TARGET_ECHRNG
+#define TARGET_ECHRNG           88
+#undef TARGET_EL2NSYNC
+#define TARGET_EL2NSYNC         89
+#undef TARGET_EL3HLT
+#define TARGET_EL3HLT           90
+#undef TARGET_EL3RST
+#define TARGET_EL3RST           91
+#undef TARGET_ENOPKG
+#define TARGET_ENOPKG           92
+#undef TARGET_ELNRNG
+#define TARGET_ELNRNG           93
+#undef TARGET_EUNATCH
+#define TARGET_EUNATCH          94
+#undef TARGET_ENOCSI
+#define TARGET_ENOCSI           95
+#undef TARGET_EL2HLT
+#define TARGET_EL2HLT           96
+#undef TARGET_EBADE
+#define TARGET_EBADE            97
+#undef TARGET_EBADR
+#define TARGET_EBADR            98
+#undef TARGET_EXFULL
+#define TARGET_EXFULL           99
+#undef TARGET_ENOANO
+#define TARGET_ENOANO           100
+#undef TARGET_EBADRQC
+#define TARGET_EBADRQC          101
+#undef TARGET_EBADSLT
+#define TARGET_EBADSLT          102
+/* Unused                       103 */
+#undef TARGET_EBFONT
+#define TARGET_EBFONT           104
+#undef TARGET_ENONET
+#define TARGET_ENONET           105
+#undef TARGET_ENOLINK
+#define TARGET_ENOLINK          106
+#undef TARGET_EADV
+#define TARGET_EADV             107
+#undef TARGET_ESRMNT
+#define TARGET_ESRMNT           108
+#undef TARGET_ECOMM
+#define TARGET_ECOMM            109
+#undef TARGET_EMULTIHOP
+#define TARGET_EMULTIHOP        110
+#undef TARGET_EDOTDOT
+#define TARGET_EDOTDOT          111
+#undef TARGET_EOVERFLOW
+#define TARGET_EOVERFLOW        112
+#undef TARGET_ENOTUNIQ
+#define TARGET_ENOTUNIQ         113
+#undef TARGET_EBADFD
+#define TARGET_EBADFD           114
+#undef TARGET_EREMCHG
+#define TARGET_EREMCHG          115
+#undef TARGET_EILSEQ
+#define TARGET_EILSEQ           116
+/* Same as default              117-121 */
+#undef TARGET_ELIBACC
+#define TARGET_ELIBACC          122
+#undef TARGET_ELIBBAD
+#define TARGET_ELIBBAD          123
+#undef TARGET_ELIBSCN
+#define TARGET_ELIBSCN          124
+#undef TARGET_ELIBMAX
+#define TARGET_ELIBMAX          125
+#undef TARGET_ELIBEXEC
+#define TARGET_ELIBEXEC         126
+#undef TARGET_ERESTART
+#define TARGET_ERESTART         127
+#undef TARGET_ESTRPIPE
+#define TARGET_ESTRPIPE         128
+#undef TARGET_ENOMEDIUM
+#define TARGET_ENOMEDIUM        129
+#undef TARGET_EMEDIUMTYPE
+#define TARGET_EMEDIUMTYPE      130
+#undef TARGET_ECANCELED
+#define TARGET_ECANCELED        131
+#undef TARGET_ENOKEY
+#define TARGET_ENOKEY           132
+#undef TARGET_EKEYEXPIRED
+#define TARGET_EKEYEXPIRED      133
+#undef TARGET_EKEYREVOKED
+#define TARGET_EKEYREVOKED      134
+#undef TARGET_EKEYREJECTED
+#define TARGET_EKEYREJECTED     135
+#undef TARGET_EOWNERDEAD
+#define TARGET_EOWNERDEAD       136
+#undef TARGET_ENOTRECOVERABLE
+#define TARGET_ENOTRECOVERABLE  137
+#undef TARGET_ERFKILL
+#define TARGET_ERFKILL          138
+#undef TARGET_EHWPOISON
+#define TARGET_EHWPOISON        139
+
+#endif
diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 0b90d3a8970..0b6a39de657 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -92,6 +92,8 @@ typedef struct target_sigaltstack {
 #define TARGET_GEN_SUBRNG7     -25
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_KA_RESTORER
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/alpha/target_syscall.h b/linux-user/alpha/target_syscall.h
index fd389422e31..03091bf0a82 100644
--- a/linux-user/alpha/target_syscall.h
+++ b/linux-user/alpha/target_syscall.h
@@ -44,200 +44,6 @@ struct target_pt_regs {
 #define UNAME_MACHINE "alpha"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
-#undef TARGET_EDEADLK
-#define TARGET_EDEADLK		11
-#undef TARGET_EAGAIN
-#define TARGET_EAGAIN		35
-#undef TARGET_EINPROGRESS
-#define TARGET_EINPROGRESS	36
-#undef TARGET_EALREADY
-#define TARGET_EALREADY		37
-#undef TARGET_ENOTSOCK
-#define TARGET_ENOTSOCK		38
-#undef TARGET_EDESTADDRREQ
-#define TARGET_EDESTADDRREQ	39
-#undef TARGET_EMSGSIZE
-#define TARGET_EMSGSIZE		40
-#undef TARGET_EPROTOTYPE
-#define TARGET_EPROTOTYPE	41
-#undef TARGET_ENOPROTOOPT
-#define TARGET_ENOPROTOOPT	42
-#undef TARGET_EPROTONOSUPPORT
-#define TARGET_EPROTONOSUPPORT	43
-#undef TARGET_ESOCKTNOSUPPORT
-#define TARGET_ESOCKTNOSUPPORT	44
-#undef TARGET_EOPNOTSUPP
-#define TARGET_EOPNOTSUPP	45
-#undef TARGET_EPFNOSUPPORT
-#define TARGET_EPFNOSUPPORT	46
-#undef TARGET_EAFNOSUPPORT
-#define TARGET_EAFNOSUPPORT	47
-#undef TARGET_EADDRINUSE
-#define TARGET_EADDRINUSE	48
-#undef TARGET_EADDRNOTAVAIL
-#define TARGET_EADDRNOTAVAIL	49
-#undef TARGET_ENETDOWN
-#define TARGET_ENETDOWN		50
-#undef TARGET_ENETUNREACH
-#define TARGET_ENETUNREACH	51
-#undef TARGET_ENETRESET
-#define TARGET_ENETRESET	52
-#undef TARGET_ECONNABORTED
-#define TARGET_ECONNABORTED	53
-#undef TARGET_ECONNRESET
-#define TARGET_ECONNRESET	54
-#undef TARGET_ENOBUFS
-#define TARGET_ENOBUFS		55
-#undef TARGET_EISCONN
-#define TARGET_EISCONN		56
-#undef TARGET_ENOTCONN
-#define TARGET_ENOTCONN		57
-#undef TARGET_ESHUTDOWN
-#define TARGET_ESHUTDOWN	58
-#undef TARGET_ETOOMANYREFS
-#define TARGET_ETOOMANYREFS	59
-#undef TARGET_ETIMEDOUT
-#define TARGET_ETIMEDOUT	60
-#undef TARGET_ECONNREFUSED
-#define TARGET_ECONNREFUSED	61
-#undef TARGET_ELOOP
-#define TARGET_ELOOP		62
-#undef TARGET_ENAMETOOLONG
-#define TARGET_ENAMETOOLONG	63
-#undef TARGET_EHOSTDOWN
-#define TARGET_EHOSTDOWN	64
-#undef TARGET_EHOSTUNREACH
-#define TARGET_EHOSTUNREACH	65
-#undef TARGET_ENOTEMPTY
-#define TARGET_ENOTEMPTY	66
-// Unused			67
-#undef TARGET_EUSERS
-#define TARGET_EUSERS		68
-#undef TARGET_EDQUOT
-#define TARGET_EDQUOT		69
-#undef TARGET_ESTALE
-#define TARGET_ESTALE		70
-#undef TARGET_EREMOTE
-#define TARGET_EREMOTE		71
-// Unused			72-76
-#undef TARGET_ENOLCK
-#define TARGET_ENOLCK		77
-#undef TARGET_ENOSYS
-#define TARGET_ENOSYS		78
-// Unused			79
-#undef TARGET_ENOMSG
-#define TARGET_ENOMSG		80
-#undef TARGET_EIDRM
-#define TARGET_EIDRM		81
-#undef TARGET_ENOSR
-#define TARGET_ENOSR		82
-#undef TARGET_ETIME
-#define TARGET_ETIME		83
-#undef TARGET_EBADMSG
-#define TARGET_EBADMSG		84
-#undef TARGET_EPROTO
-#define TARGET_EPROTO		85
-#undef TARGET_ENODATA
-#define TARGET_ENODATA		86
-#undef TARGET_ENOSTR
-#define TARGET_ENOSTR		87
-#undef TARGET_ECHRNG
-#define TARGET_ECHRNG		88
-#undef TARGET_EL2NSYNC
-#define TARGET_EL2NSYNC		89
-#undef TARGET_EL3HLT
-#define TARGET_EL3HLT		90
-#undef TARGET_EL3RST
-#define TARGET_EL3RST		91
-#undef TARGET_ENOPKG
-#define TARGET_ENOPKG		92
-#undef TARGET_ELNRNG
-#define TARGET_ELNRNG		93
-#undef TARGET_EUNATCH
-#define TARGET_EUNATCH		94
-#undef TARGET_ENOCSI
-#define TARGET_ENOCSI		95
-#undef TARGET_EL2HLT
-#define TARGET_EL2HLT		96
-#undef TARGET_EBADE
-#define TARGET_EBADE		97
-#undef TARGET_EBADR
-#define TARGET_EBADR		98
-#undef TARGET_EXFULL
-#define TARGET_EXFULL		99
-#undef TARGET_ENOANO
-#define TARGET_ENOANO		100
-#undef TARGET_EBADRQC
-#define TARGET_EBADRQC		101
-#undef TARGET_EBADSLT
-#define TARGET_EBADSLT		102
-// Unused			103
-#undef TARGET_EBFONT
-#define TARGET_EBFONT		104
-#undef TARGET_ENONET
-#define TARGET_ENONET		105
-#undef TARGET_ENOLINK
-#define TARGET_ENOLINK		106
-#undef TARGET_EADV
-#define TARGET_EADV		107
-#undef TARGET_ESRMNT
-#define TARGET_ESRMNT		108
-#undef TARGET_ECOMM
-#define TARGET_ECOMM		109
-#undef TARGET_EMULTIHOP
-#define TARGET_EMULTIHOP	110
-#undef TARGET_EDOTDOT
-#define TARGET_EDOTDOT		111
-#undef TARGET_EOVERFLOW
-#define TARGET_EOVERFLOW	112
-#undef TARGET_ENOTUNIQ
-#define TARGET_ENOTUNIQ		113
-#undef TARGET_EBADFD
-#define TARGET_EBADFD		114
-#undef TARGET_EREMCHG
-#define TARGET_EREMCHG		115
-#undef TARGET_EILSEQ
-#define TARGET_EILSEQ		116
-
-// Same as default		117-121
-
-#undef TARGET_ELIBACC
-#define TARGET_ELIBACC		122
-#undef TARGET_ELIBBAD
-#define TARGET_ELIBBAD		123
-#undef TARGET_ELIBSCN
-#define TARGET_ELIBSCN		124
-#undef TARGET_ELIBMAX
-#define TARGET_ELIBMAX		125
-#undef TARGET_ELIBEXEC
-#define TARGET_ELIBEXEC		126
-#undef TARGET_ERESTART
-#define TARGET_ERESTART		127
-#undef TARGET_ESTRPIPE
-#define TARGET_ESTRPIPE		128
-#undef TARGET_ENOMEDIUM
-#define TARGET_ENOMEDIUM	129
-#undef TARGET_EMEDIUMTYPE
-#define TARGET_EMEDIUMTYPE	130
-#undef TARGET_ECANCELED
-#define TARGET_ECANCELED	131
-#undef TARGET_ENOKEY
-#define TARGET_ENOKEY		132
-#undef TARGET_EKEYEXPIRED
-#define TARGET_EKEYEXPIRED	133
-#undef TARGET_EKEYREVOKED
-#define TARGET_EKEYREVOKED	134
-#undef TARGET_EKEYREJECTED
-#define TARGET_EKEYREJECTED	135
-#undef TARGET_EOWNERDEAD
-#define TARGET_EOWNERDEAD	136
-#undef TARGET_ENOTRECOVERABLE
-#define TARGET_ENOTRECOVERABLE	137
-#undef TARGET_ERFKILL
-#define TARGET_ERFKILL		138
-#undef TARGET_EHWPOISON
-#define TARGET_EHWPOISON        139
-
 // For sys_osf_getsysinfo
 #define TARGET_GSI_UACPROC		8
 #define TARGET_GSI_IEEE_FP_CONTROL	45
diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c
index 989d03cd89d..01cb6eb534e 100644
--- a/linux-user/arm/cpu_loop.c
+++ b/linux-user/arm/cpu_loop.c
@@ -20,9 +20,12 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "elf.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 #include "semihosting/common-semi.h"
+#include "target/arm/syndrome.h"
 
 #define get_user_code_u32(x, gaddr, env)                \
     ({ abi_long __r = get_user_u32((x), (gaddr));       \
@@ -92,7 +95,6 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
 {
     uint64_t oldval, newval, val;
     uint32_t addr, cpsr;
-    target_siginfo_t info;
 
     /* Based on the 32 bit code in do_kernel_trap */
 
@@ -141,12 +143,9 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
     end_exclusive();
     /* We get the PC of the entry address - which is as good as anything,
        on a real kernel what you get depends on which mode it uses. */
-    info.si_signo = TARGET_SIGSEGV;
-    info.si_errno = 0;
     /* XXX: check env->error_code */
-    info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = env->exception.vaddress;
-    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+    force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR,
+                    env->exception.vaddress);
 }
 
 /* Handle a jump to the kernel code page.  */
@@ -224,13 +223,66 @@ static bool insn_is_linux_bkpt(uint32_t opcode, bool is_thumb)
     }
 }
 
+static bool emulate_arm_fpa11(CPUARMState *env, uint32_t opcode)
+{
+    TaskState *ts = env_cpu(env)->opaque;
+    int rc = EmulateAll(opcode, &ts->fpa, env);
+    int raise, enabled;
+
+    if (rc == 0) {
+        /* Illegal instruction */
+        return false;
+    }
+    if (rc > 0) {
+        /* Everything ok. */
+        env->regs[15] += 4;
+        return true;
+    }
+
+    /* FP exception */
+    rc = -rc;
+    raise = 0;
+
+    /* Translate softfloat flags to FPSR flags */
+    if (rc & float_flag_invalid) {
+        raise |= BIT_IOC;
+    }
+    if (rc & float_flag_divbyzero) {
+        raise |= BIT_DZC;
+    }
+    if (rc & float_flag_overflow) {
+        raise |= BIT_OFC;
+    }
+    if (rc & float_flag_underflow) {
+        raise |= BIT_UFC;
+    }
+    if (rc & float_flag_inexact) {
+        raise |= BIT_IXC;
+    }
+
+    /* Accumulate unenabled exceptions */
+    enabled = ts->fpa.fpsr >> 16;
+    ts->fpa.fpsr |= raise & ~enabled;
+
+    if (raise & enabled) {
+        /*
+         * The kernel's nwfpe emulator does not pass a real si_code.
+         * It merely uses send_sig(SIGFPE, current, 1), which results in
+         * __send_signal() filling out SI_KERNEL with pid and uid 0 (under
+         * the "SEND_SIG_PRIV" case). That's what our force_sig() does.
+         */
+        force_sig(TARGET_SIGFPE);
+    } else {
+        env->regs[15] += 4;
+    }
+    return true;
+}
+
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = env_cpu(env);
-    int trapnr;
+    int trapnr, si_signo, si_code;
     unsigned int n, insn;
-    target_siginfo_t info;
-    uint32_t addr;
     abi_ulong ret;
 
     for(;;) {
@@ -244,9 +296,7 @@ void cpu_loop(CPUARMState *env)
         case EXCP_NOCP:
         case EXCP_INVSTATE:
             {
-                TaskState *ts = cs->opaque;
                 uint32_t opcode;
-                int rc;
 
                 /* we handle the FPU emulation here, as Linux */
                 /* we get the opcode */
@@ -263,64 +313,12 @@ void cpu_loop(CPUARMState *env)
                     goto excp_debug;
                 }
 
-                rc = EmulateAll(opcode, &ts->fpa, env);
-                if (rc == 0) { /* illegal instruction */
-                    info.si_signo = TARGET_SIGILL;
-                    info.si_errno = 0;
-                    info.si_code = TARGET_ILL_ILLOPN;
-                    info._sifields._sigfault._addr = env->regs[15];
-                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                } else if (rc < 0) { /* FP exception */
-                    int arm_fpe=0;
-
-                    /* translate softfloat flags to FPSR flags */
-                    if (-rc & float_flag_invalid)
-                      arm_fpe |= BIT_IOC;
-                    if (-rc & float_flag_divbyzero)
-                      arm_fpe |= BIT_DZC;
-                    if (-rc & float_flag_overflow)
-                      arm_fpe |= BIT_OFC;
-                    if (-rc & float_flag_underflow)
-                      arm_fpe |= BIT_UFC;
-                    if (-rc & float_flag_inexact)
-                      arm_fpe |= BIT_IXC;
-
-                    FPSR fpsr = ts->fpa.fpsr;
-                    //printf("fpsr 0x%x, arm_fpe 0x%x\n",fpsr,arm_fpe);
-
-                    if (fpsr & (arm_fpe << 16)) { /* exception enabled? */
-                      info.si_signo = TARGET_SIGFPE;
-                      info.si_errno = 0;
-
-                      /* ordered by priority, least first */
-                      if (arm_fpe & BIT_IXC) info.si_code = TARGET_FPE_FLTRES;
-                      if (arm_fpe & BIT_UFC) info.si_code = TARGET_FPE_FLTUND;
-                      if (arm_fpe & BIT_OFC) info.si_code = TARGET_FPE_FLTOVF;
-                      if (arm_fpe & BIT_DZC) info.si_code = TARGET_FPE_FLTDIV;
-                      if (arm_fpe & BIT_IOC) info.si_code = TARGET_FPE_FLTINV;
-
-                      info._sifields._sigfault._addr = env->regs[15];
-                      queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                    } else {
-                      env->regs[15] += 4;
-                    }
-
-                    /* accumulate unenabled exceptions */
-                    if ((!(fpsr & BIT_IXE)) && (arm_fpe & BIT_IXC))
-                      fpsr |= BIT_IXC;
-                    if ((!(fpsr & BIT_UFE)) && (arm_fpe & BIT_UFC))
-                      fpsr |= BIT_UFC;
-                    if ((!(fpsr & BIT_OFE)) && (arm_fpe & BIT_OFC))
-                      fpsr |= BIT_OFC;
-                    if ((!(fpsr & BIT_DZE)) && (arm_fpe & BIT_DZC))
-                      fpsr |= BIT_DZC;
-                    if ((!(fpsr & BIT_IOE)) && (arm_fpe & BIT_IOC))
-                      fpsr |= BIT_IOC;
-                    ts->fpa.fpsr=fpsr;
-                } else { /* everything OK */
-                    /* increment PC */
-                    env->regs[15] += 4;
+                if (!env->thumb && emulate_arm_fpa11(env, opcode)) {
+                    break;
                 }
+
+                force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN,
+                                env->regs[15]);
             }
             break;
         case EXCP_SWI:
@@ -388,18 +386,14 @@ void cpu_loop(CPUARMState *env)
                              * Otherwise SIGILL. This includes any SWI with
                              * immediate not originally 0x9fxxxx, because
                              * of the earlier XOR.
+                             * Like the real kernel, we report the addr of the
+                             * SWI in the siginfo si_addr but leave the PC
+                             * pointing at the insn after the SWI.
                              */
-                            info.si_signo = TARGET_SIGILL;
-                            info.si_errno = 0;
-                            info.si_code = TARGET_ILL_ILLTRP;
-                            info._sifields._sigfault._addr = env->regs[15];
-                            if (env->thumb) {
-                                info._sifields._sigfault._addr -= 2;
-                            } else {
-                                info._sifields._sigfault._addr -= 4;
-                            }
-                            queue_signal(env, info.si_signo,
-                                         QEMU_SI_FAULT, &info);
+                            abi_ulong faultaddr = env->regs[15];
+                            faultaddr -= env->thumb ? 2 : 4;
+                            force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLTRP,
+                                            faultaddr);
                         }
                         break;
                     }
@@ -430,23 +424,35 @@ void cpu_loop(CPUARMState *env)
             break;
         case EXCP_PREFETCH_ABORT:
         case EXCP_DATA_ABORT:
-            addr = env->exception.vaddress;
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = addr;
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            /* For user-only we don't set TTBCR_EAE, so look at the FSR. */
+            switch (env->exception.fsr & 0x1f) {
+            case 0x1: /* Alignment */
+                si_signo = TARGET_SIGBUS;
+                si_code = TARGET_BUS_ADRALN;
+                break;
+            case 0x3: /* Access flag fault, level 1 */
+            case 0x6: /* Access flag fault, level 2 */
+            case 0x9: /* Domain fault, level 1 */
+            case 0xb: /* Domain fault, level 2 */
+            case 0xd: /* Permision fault, level 1 */
+            case 0xf: /* Permision fault, level 2 */
+                si_signo = TARGET_SIGSEGV;
+                si_code = TARGET_SEGV_ACCERR;
+                break;
+            case 0x5: /* Translation fault, level 1 */
+            case 0x7: /* Translation fault, level 2 */
+                si_signo = TARGET_SIGSEGV;
+                si_code = TARGET_SEGV_MAPERR;
+                break;
+            default:
+                g_assert_not_reached();
             }
+            force_sig_fault(si_signo, si_code, env->exception.vaddress);
             break;
         case EXCP_DEBUG:
         case EXCP_BKPT:
         excp_debug:
-            info.si_signo = TARGET_SIGTRAP;
-            info.si_errno = 0;
-            info.si_code = TARGET_TRAP_BRKPT;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->regs[15]);
             break;
         case EXCP_KERNEL_TRAP:
             if (do_kernel_trap(env))
diff --git a/linux-user/arm/nwfpe/fpa11.c b/linux-user/arm/nwfpe/fpa11.c
index f6f8163eab1..9a93610d245 100644
--- a/linux-user/arm/nwfpe/fpa11.c
+++ b/linux-user/arm/nwfpe/fpa11.c
@@ -97,37 +97,38 @@ void SetRoundingMode(const unsigned int opcode)
 
 void SetRoundingPrecision(const unsigned int opcode)
 {
-    int rounding_precision;
-   FPA11 *fpa11 = GET_FPA11();
+    FloatX80RoundPrec rounding_precision;
+    FPA11 *fpa11 = GET_FPA11();
 #ifdef MAINTAIN_FPCR
-   fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
+    fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
 #endif
-   switch (opcode & MASK_ROUNDING_PRECISION)
-   {
-      case ROUND_SINGLE:
-         rounding_precision = 32;
+    switch (opcode & MASK_ROUNDING_PRECISION) {
+    case ROUND_SINGLE:
+        rounding_precision = floatx80_precision_s;
 #ifdef MAINTAIN_FPCR
-         fpa11->fpcr |= ROUND_SINGLE;
+        fpa11->fpcr |= ROUND_SINGLE;
 #endif
-      break;
+        break;
 
-      case ROUND_DOUBLE:
-         rounding_precision = 64;
+    case ROUND_DOUBLE:
+        rounding_precision = floatx80_precision_d;
 #ifdef MAINTAIN_FPCR
-         fpa11->fpcr |= ROUND_DOUBLE;
+        fpa11->fpcr |= ROUND_DOUBLE;
 #endif
-      break;
+        break;
 
-      case ROUND_EXTENDED:
-         rounding_precision = 80;
+    case ROUND_EXTENDED:
+        rounding_precision = floatx80_precision_x;
 #ifdef MAINTAIN_FPCR
-         fpa11->fpcr |= ROUND_EXTENDED;
+        fpa11->fpcr |= ROUND_EXTENDED;
 #endif
-      break;
+        break;
 
-      default: rounding_precision = 80;
-  }
-   set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status);
+    default:
+        rounding_precision = floatx80_precision_x;
+        break;
+    }
+    set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status);
 }
 
 /* Emulate the instruction in the opcode. */
diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c
index f21d1535e4d..df9f8e8eb20 100644
--- a/linux-user/arm/signal.c
+++ b/linux-user/arm/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -45,15 +46,7 @@ struct target_sigcontext {
     abi_ulong fault_address;
 };
 
-struct target_ucontext_v1 {
-    abi_ulong tuc_flags;
-    abi_ulong tuc_link;
-    target_stack_t tuc_stack;
-    struct target_sigcontext tuc_mcontext;
-    target_sigset_t  tuc_sigmask;       /* mask last for extensibility */
-};
-
-struct target_ucontext_v2 {
+struct target_ucontext {
     abi_ulong tuc_flags;
     abi_ulong tuc_link;
     target_stack_t tuc_stack;
@@ -97,68 +90,30 @@ struct target_iwmmxt_sigframe {
 #define TARGET_VFP_MAGIC 0x56465001
 #define TARGET_IWMMXT_MAGIC 0x12ef842a
 
-struct sigframe_v1
-{
-    struct target_sigcontext sc;
-    abi_ulong extramask[TARGET_NSIG_WORDS-1];
-    abi_ulong retcode[4];
-};
-
-struct sigframe_v2
-{
-    struct target_ucontext_v2 uc;
-    abi_ulong retcode[4];
-};
-
-struct rt_sigframe_v1
+struct sigframe
 {
-    abi_ulong pinfo;
-    abi_ulong puc;
-    struct target_siginfo info;
-    struct target_ucontext_v1 uc;
+    struct target_ucontext uc;
     abi_ulong retcode[4];
 };
 
-struct rt_sigframe_v2
+struct rt_sigframe
 {
     struct target_siginfo info;
-    struct target_ucontext_v2 uc;
-    abi_ulong retcode[4];
+    struct sigframe sig;
 };
 
-/*
- * For ARM syscalls, we encode the syscall number into the instruction.
- */
-#define SWI_SYS_SIGRETURN       (0xef000000|(TARGET_NR_sigreturn + ARM_SYSCALL_BASE))
-#define SWI_SYS_RT_SIGRETURN    (0xef000000|(TARGET_NR_rt_sigreturn + ARM_SYSCALL_BASE))
+static abi_ptr sigreturn_fdpic_tramp;
 
 /*
- * For Thumb syscalls, we pass the syscall number via r7.  We therefore
- * need two 16-bit instructions.
- */
-#define SWI_THUMB_SIGRETURN     (0xdf00 << 16 | 0x2700 | (TARGET_NR_sigreturn))
-#define SWI_THUMB_RT_SIGRETURN  (0xdf00 << 16 | 0x2700 | (TARGET_NR_rt_sigreturn))
-
-static const abi_ulong retcodes[4] = {
-        SWI_SYS_SIGRETURN,      SWI_THUMB_SIGRETURN,
-        SWI_SYS_RT_SIGRETURN,   SWI_THUMB_RT_SIGRETURN
-};
-
-/*
- * Stub needed to make sure the FD register (r9) contains the right
- * value.
+ * Up to 3 words of 'retcode' in the sigframe are code,
+ * with retcode[3] being used by fdpic for the function descriptor.
+ * This code is not actually executed, but is retained for ABI compat.
+ *
+ * We will create a table of 8 retcode variants in the sigtramp page.
+ * Let each table entry use 3 words.
  */
-static const unsigned long sigreturn_fdpic_codes[3] = {
-    0xe59fc004, /* ldr r12, [pc, #4] to read function descriptor */
-    0xe59c9004, /* ldr r9, [r12, #4] to setup GOT */
-    0xe59cf000  /* ldr pc, [r12] to jump into restorer */
-};
-
-static const unsigned long sigreturn_fdpic_thumb_codes[3] = {
-    0xc008f8df, /* ldr r12, [pc, #8] to read function descriptor */
-    0x9004f8dc, /* ldr r9, [r12, #4] to setup GOT */
-    0xf000f8dc  /* ldr pc, [r12] to jump into restorer */
-};
+#define RETCODE_WORDS  3
+#define RETCODE_BYTES  (RETCODE_WORDS * 4)
 
 static inline int valid_user_regs(CPUARMState *regs)
 {
@@ -206,15 +161,15 @@ get_sigframe(struct target_sigaction *ka, CPUARMState *regs, int framesize)
 }
 
 static int
-setup_return(CPUARMState *env, struct target_sigaction *ka,
-             abi_ulong *rc, abi_ulong frame_addr, int usig, abi_ulong rc_addr)
+setup_return(CPUARMState *env, struct target_sigaction *ka, int usig,
+             struct sigframe *frame, abi_ulong sp_addr)
 {
     abi_ulong handler = 0;
     abi_ulong handler_fdpic_GOT = 0;
     abi_ulong retcode;
-
-    int thumb;
+    int thumb, retcode_idx;
     int is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info);
+    bool copy_retcode;
 
     if (is_fdpic) {
         /* In FDPIC mode, ka->_sa_handler points to a function
@@ -231,6 +186,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka,
     }
 
     thumb = handler & 1;
+    retcode_idx = thumb + (ka->sa_flags & TARGET_SA_SIGINFO ? 2 : 0);
 
     uint32_t cpsr = cpsr_read(env);
 
@@ -248,53 +204,37 @@ setup_return(CPUARMState *env, struct target_sigaction *ka,
 
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         if (is_fdpic) {
-            /* For FDPIC we ensure that the restorer is called with a
-             * correct r9 value.  For that we need to write code on
-             * the stack that sets r9 and jumps back to restorer
-             * value.
-             */
-            if (thumb) {
-                __put_user(sigreturn_fdpic_thumb_codes[0], rc);
-                __put_user(sigreturn_fdpic_thumb_codes[1], rc + 1);
-                __put_user(sigreturn_fdpic_thumb_codes[2], rc + 2);
-                __put_user((abi_ulong)ka->sa_restorer, rc + 3);
-            } else {
-                __put_user(sigreturn_fdpic_codes[0], rc);
-                __put_user(sigreturn_fdpic_codes[1], rc + 1);
-                __put_user(sigreturn_fdpic_codes[2], rc + 2);
-                __put_user((abi_ulong)ka->sa_restorer, rc + 3);
-            }
-
-            retcode = rc_addr + thumb;
+            __put_user((abi_ulong)ka->sa_restorer, &frame->retcode[3]);
+            retcode = (sigreturn_fdpic_tramp +
+                       retcode_idx * RETCODE_BYTES + thumb);
+            copy_retcode = true;
         } else {
             retcode = ka->sa_restorer;
+            copy_retcode = false;
         }
     } else {
-        unsigned int idx = thumb;
-
-        if (ka->sa_flags & TARGET_SA_SIGINFO) {
-            idx += 2;
-        }
-
-        __put_user(retcodes[idx], rc);
+        retcode = default_sigreturn + retcode_idx * RETCODE_BYTES + thumb;
+        copy_retcode = true;
+    }
 
-        retcode = rc_addr + thumb;
+    /* Copy the code to the stack slot for ABI compatibility. */
+    if (copy_retcode) {
+        memcpy(frame->retcode, g2h_untagged(retcode & ~1), RETCODE_BYTES);
     }
 
     env->regs[0] = usig;
     if (is_fdpic) {
         env->regs[9] = handler_fdpic_GOT;
     }
-    env->regs[13] = frame_addr;
+    env->regs[13] = sp_addr;
     env->regs[14] = retcode;
     env->regs[15] = handler & (thumb ? ~1 : ~3);
     cpsr_write(env, cpsr, CPSR_IT | CPSR_T | CPSR_E, CPSRWriteByInstr);
-    arm_rebuild_hflags(env);
 
     return 0;
 }
 
-static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env)
+static abi_ulong *setup_sigframe_vfp(abi_ulong *regspace, CPUARMState *env)
 {
     int i;
     struct target_vfp_sigframe *vfpframe;
@@ -311,8 +251,7 @@ static abi_ulong *setup_sigframe_v2_vfp(abi_ulong *regspace, CPUARMState *env)
     return (abi_ulong*)(vfpframe+1);
 }
 
-static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong *regspace,
-                                           CPUARMState *env)
+static abi_ulong *setup_sigframe_iwmmxt(abi_ulong *regspace, CPUARMState *env)
 {
     int i;
     struct target_iwmmxt_sigframe *iwmmxtframe;
@@ -331,15 +270,15 @@ static abi_ulong *setup_sigframe_v2_iwmmxt(abi_ulong *regspace,
     return (abi_ulong*)(iwmmxtframe+1);
 }
 
-static void setup_sigframe_v2(struct target_ucontext_v2 *uc,
-                              target_sigset_t *set, CPUARMState *env)
+static void setup_sigframe(struct target_ucontext *uc,
+                           target_sigset_t *set, CPUARMState *env)
 {
     struct target_sigaltstack stack;
     int i;
     abi_ulong *regspace;
 
     /* Clear all the bits of the ucontext we don't use.  */
-    memset(uc, 0, offsetof(struct target_ucontext_v2, tuc_mcontext));
+    memset(uc, 0, offsetof(struct target_ucontext, tuc_mcontext));
 
     memset(&stack, 0, sizeof(stack));
     target_save_altstack(&stack, env);
@@ -349,10 +288,10 @@ static void setup_sigframe_v2(struct target_ucontext_v2 *uc,
     /* Save coprocessor signal frame.  */
     regspace = uc->tuc_regspace;
     if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
-        regspace = setup_sigframe_v2_vfp(regspace, env);
+        regspace = setup_sigframe_vfp(regspace, env);
     }
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        regspace = setup_sigframe_v2_iwmmxt(regspace, env);
+        regspace = setup_sigframe_iwmmxt(regspace, env);
     }
 
     /* Write terminating magic word */
@@ -363,114 +302,23 @@ static void setup_sigframe_v2(struct target_ucontext_v2 *uc,
     }
 }
 
-/* compare linux/arch/arm/kernel/signal.c:setup_frame() */
-static void setup_frame_v1(int usig, struct target_sigaction *ka,
-                           target_sigset_t *set, CPUARMState *regs)
-{
-    struct sigframe_v1 *frame;
-    abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame));
-    int i;
-
-    trace_user_setup_frame(regs, frame_addr);
-    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto sigsegv;
-    }
-
-    setup_sigcontext(&frame->sc, regs, set->sig[0]);
-
-    for(i = 1; i < TARGET_NSIG_WORDS; i++) {
-        __put_user(set->sig[i], &frame->extramask[i - 1]);
-    }
-
-    if (setup_return(regs, ka, frame->retcode, frame_addr, usig,
-                     frame_addr + offsetof(struct sigframe_v1, retcode))) {
-        goto sigsegv;
-    }
-
-    unlock_user_struct(frame, frame_addr, 1);
-    return;
-sigsegv:
-    unlock_user_struct(frame, frame_addr, 1);
-    force_sigsegv(usig);
-}
-
-static void setup_frame_v2(int usig, struct target_sigaction *ka,
-                           target_sigset_t *set, CPUARMState *regs)
-{
-    struct sigframe_v2 *frame;
-    abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame));
-
-    trace_user_setup_frame(regs, frame_addr);
-    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto sigsegv;
-    }
-
-    setup_sigframe_v2(&frame->uc, set, regs);
-
-    if (setup_return(regs, ka, frame->retcode, frame_addr, usig,
-                     frame_addr + offsetof(struct sigframe_v2, retcode))) {
-        goto sigsegv;
-    }
-
-    unlock_user_struct(frame, frame_addr, 1);
-    return;
-sigsegv:
-    unlock_user_struct(frame, frame_addr, 1);
-    force_sigsegv(usig);
-}
-
 void setup_frame(int usig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUARMState *regs)
 {
-    if (get_osversion() >= 0x020612) {
-        setup_frame_v2(usig, ka, set, regs);
-    } else {
-        setup_frame_v1(usig, ka, set, regs);
-    }
-}
-
-/* compare linux/arch/arm/kernel/signal.c:setup_rt_frame() */
-static void setup_rt_frame_v1(int usig, struct target_sigaction *ka,
-                              target_siginfo_t *info,
-                              target_sigset_t *set, CPUARMState *env)
-{
-    struct rt_sigframe_v1 *frame;
-    abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame));
-    struct target_sigaltstack stack;
-    int i;
-    abi_ulong info_addr, uc_addr;
+    struct sigframe *frame;
+    abi_ulong frame_addr = get_sigframe(ka, regs, sizeof(*frame));
 
-    trace_user_setup_rt_frame(env, frame_addr);
+    trace_user_setup_frame(regs, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
         goto sigsegv;
     }
 
-    info_addr = frame_addr + offsetof(struct rt_sigframe_v1, info);
-    __put_user(info_addr, &frame->pinfo);
-    uc_addr = frame_addr + offsetof(struct rt_sigframe_v1, uc);
-    __put_user(uc_addr, &frame->puc);
-    tswap_siginfo(&frame->info, info);
-
-    /* Clear all the bits of the ucontext we don't use.  */
-    memset(&frame->uc, 0, offsetof(struct target_ucontext_v1, tuc_mcontext));
-
-    memset(&stack, 0, sizeof(stack));
-    target_save_altstack(&stack, env);
-    memcpy(&frame->uc.tuc_stack, &stack, sizeof(stack));
-
-    setup_sigcontext(&frame->uc.tuc_mcontext, env, set->sig[0]);
-    for(i = 0; i < TARGET_NSIG_WORDS; i++) {
-        __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
-    }
+    setup_sigframe(&frame->uc, set, regs);
 
-    if (setup_return(env, ka, frame->retcode, frame_addr, usig,
-                     frame_addr + offsetof(struct rt_sigframe_v1, retcode))) {
+    if (setup_return(regs, ka, usig, frame, frame_addr)) {
         goto sigsegv;
     }
 
-    env->regs[1] = info_addr;
-    env->regs[2] = uc_addr;
-
     unlock_user_struct(frame, frame_addr, 1);
     return;
 sigsegv:
@@ -478,11 +326,11 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka,
     force_sigsegv(usig);
 }
 
-static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
-                              target_siginfo_t *info,
-                              target_sigset_t *set, CPUARMState *env)
+void setup_rt_frame(int usig, struct target_sigaction *ka,
+                    target_siginfo_t *info,
+                    target_sigset_t *set, CPUARMState *env)
 {
-    struct rt_sigframe_v2 *frame;
+    struct rt_sigframe *frame;
     abi_ulong frame_addr = get_sigframe(ka, env, sizeof(*frame));
     abi_ulong info_addr, uc_addr;
 
@@ -491,14 +339,13 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
         goto sigsegv;
     }
 
-    info_addr = frame_addr + offsetof(struct rt_sigframe_v2, info);
-    uc_addr = frame_addr + offsetof(struct rt_sigframe_v2, uc);
+    info_addr = frame_addr + offsetof(struct rt_sigframe, info);
+    uc_addr = frame_addr + offsetof(struct rt_sigframe, sig.uc);
     tswap_siginfo(&frame->info, info);
 
-    setup_sigframe_v2(&frame->uc, set, env);
+    setup_sigframe(&frame->sig.uc, set, env);
 
-    if (setup_return(env, ka, frame->retcode, frame_addr, usig,
-                     frame_addr + offsetof(struct rt_sigframe_v2, retcode))) {
+    if (setup_return(env, ka, usig, &frame->sig, frame_addr)) {
         goto sigsegv;
     }
 
@@ -512,17 +359,6 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
     force_sigsegv(usig);
 }
 
-void setup_rt_frame(int usig, struct target_sigaction *ka,
-                    target_siginfo_t *info,
-                    target_sigset_t *set, CPUARMState *env)
-{
-    if (get_osversion() >= 0x020612) {
-        setup_rt_frame_v2(usig, ka, info, set, env);
-    } else {
-        setup_rt_frame_v1(usig, ka, info, set, env);
-    }
-}
-
 static int
 restore_sigcontext(CPUARMState *env, struct target_sigcontext *sc)
 {
@@ -547,62 +383,13 @@ restore_sigcontext(CPUARMState *env, struct target_sigcontext *sc)
     __get_user(env->regs[15], &sc->arm_pc);
     __get_user(cpsr, &sc->arm_cpsr);
     cpsr_write(env, cpsr, CPSR_USER | CPSR_EXEC, CPSRWriteByInstr);
-    arm_rebuild_hflags(env);
 
     err |= !valid_user_regs(env);
 
     return err;
 }
 
-static long do_sigreturn_v1(CPUARMState *env)
-{
-    abi_ulong frame_addr;
-    struct sigframe_v1 *frame = NULL;
-    target_sigset_t set;
-    sigset_t host_set;
-    int i;
-
-    /*
-     * Since we stacked the signal on a 64-bit boundary,
-     * then 'sp' should be word aligned here.  If it's
-     * not, then the user is trying to mess with us.
-     */
-    frame_addr = env->regs[13];
-    trace_user_do_sigreturn(env, frame_addr);
-    if (frame_addr & 7) {
-        goto badframe;
-    }
-
-    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
-    }
-
-    __get_user(set.sig[0], &frame->sc.oldmask);
-    for(i = 1; i < TARGET_NSIG_WORDS; i++) {
-        __get_user(set.sig[i], &frame->extramask[i - 1]);
-    }
-
-    target_to_host_sigset_internal(&host_set, &set);
-    set_sigmask(&host_set);
-
-    if (restore_sigcontext(env, &frame->sc)) {
-        goto badframe;
-    }
-
-#if 0
-    /* Send SIGTRAP if we're single-stepping */
-    if (ptrace_cancel_bpt(current))
-        send_sig(SIGTRAP, current, 1);
-#endif
-    unlock_user_struct(frame, frame_addr, 0);
-    return -TARGET_QEMU_ESIGRETURN;
-
-badframe:
-    force_sig(TARGET_SIGSEGV);
-    return -TARGET_QEMU_ESIGRETURN;
-}
-
-static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace)
+static abi_ulong *restore_sigframe_vfp(CPUARMState *env, abi_ulong *regspace)
 {
     int i;
     abi_ulong magic, sz;
@@ -632,8 +419,8 @@ static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace)
     return (abi_ulong*)(vfpframe + 1);
 }
 
-static abi_ulong *restore_sigframe_v2_iwmmxt(CPUARMState *env,
-                                             abi_ulong *regspace)
+static abi_ulong *restore_sigframe_iwmmxt(CPUARMState *env,
+                                          abi_ulong *regspace)
 {
     int i;
     abi_ulong magic, sz;
@@ -657,9 +444,9 @@ static abi_ulong *restore_sigframe_v2_iwmmxt(CPUARMState *env,
     return (abi_ulong*)(iwmmxtframe + 1);
 }
 
-static int do_sigframe_return_v2(CPUARMState *env,
-                                 target_ulong context_addr,
-                                 struct target_ucontext_v2 *uc)
+static int do_sigframe_return(CPUARMState *env,
+                              target_ulong context_addr,
+                              struct target_ucontext *uc)
 {
     sigset_t host_set;
     abi_ulong *regspace;
@@ -667,29 +454,26 @@ static int do_sigframe_return_v2(CPUARMState *env,
     target_to_host_sigset(&host_set, &uc->tuc_sigmask);
     set_sigmask(&host_set);
 
-    if (restore_sigcontext(env, &uc->tuc_mcontext))
+    if (restore_sigcontext(env, &uc->tuc_mcontext)) {
         return 1;
+    }
 
     /* Restore coprocessor signal frame */
     regspace = uc->tuc_regspace;
     if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
-        regspace = restore_sigframe_v2_vfp(env, regspace);
+        regspace = restore_sigframe_vfp(env, regspace);
         if (!regspace) {
             return 1;
         }
     }
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
-        regspace = restore_sigframe_v2_iwmmxt(env, regspace);
+        regspace = restore_sigframe_iwmmxt(env, regspace);
         if (!regspace) {
             return 1;
         }
     }
 
-    if (do_sigaltstack(context_addr
-                       + offsetof(struct target_ucontext_v2, tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT) {
-        return 1;
-    }
+    target_restore_altstack(&uc->tuc_stack, env);
 
 #if 0
     /* Send SIGTRAP if we're single-stepping */
@@ -700,10 +484,10 @@ static int do_sigframe_return_v2(CPUARMState *env,
     return 0;
 }
 
-static long do_sigreturn_v2(CPUARMState *env)
+long do_sigreturn(CPUARMState *env)
 {
     abi_ulong frame_addr;
-    struct sigframe_v2 *frame = NULL;
+    struct sigframe *frame = NULL;
 
     /*
      * Since we stacked the signal on a 64-bit boundary,
@@ -720,10 +504,9 @@ static long do_sigreturn_v2(CPUARMState *env)
         goto badframe;
     }
 
-    if (do_sigframe_return_v2(env,
-                              frame_addr
-                              + offsetof(struct sigframe_v2, uc),
-                              &frame->uc)) {
+    if (do_sigframe_return(env,
+                           frame_addr + offsetof(struct sigframe, uc),
+                           &frame->uc)) {
         goto badframe;
     }
 
@@ -736,20 +519,10 @@ static long do_sigreturn_v2(CPUARMState *env)
     return -TARGET_QEMU_ESIGRETURN;
 }
 
-long do_sigreturn(CPUARMState *env)
-{
-    if (get_osversion() >= 0x020612) {
-        return do_sigreturn_v2(env);
-    } else {
-        return do_sigreturn_v1(env);
-    }
-}
-
-static long do_rt_sigreturn_v1(CPUARMState *env)
+long do_rt_sigreturn(CPUARMState *env)
 {
     abi_ulong frame_addr;
-    struct rt_sigframe_v1 *frame = NULL;
-    sigset_t host_set;
+    struct rt_sigframe *frame = NULL;
 
     /*
      * Since we stacked the signal on a 64-bit boundary,
@@ -766,21 +539,12 @@ static long do_rt_sigreturn_v1(CPUARMState *env)
         goto badframe;
     }
 
-    target_to_host_sigset(&host_set, &frame->uc.tuc_sigmask);
-    set_sigmask(&host_set);
-
-    if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
+    if (do_sigframe_return(env,
+                           frame_addr + offsetof(struct rt_sigframe, sig.uc),
+                           &frame->sig.uc)) {
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr + offsetof(struct rt_sigframe_v1, uc.tuc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
-
-#if 0
-    /* Send SIGTRAP if we're single-stepping */
-    if (ptrace_cancel_bpt(current))
-        send_sig(SIGTRAP, current, 1);
-#endif
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
 
@@ -790,47 +554,77 @@ static long do_rt_sigreturn_v1(CPUARMState *env)
     return -TARGET_QEMU_ESIGRETURN;
 }
 
-static long do_rt_sigreturn_v2(CPUARMState *env)
-{
-    abi_ulong frame_addr;
-    struct rt_sigframe_v2 *frame = NULL;
+/*
+ * EABI syscalls pass the number via r7.
+ * Note that the kernel still adds the OABI syscall number to the trap,
+ * presumably for backward ABI compatibility with unwinders.
+ */
+#define ARM_MOV_R7_IMM(X)       (0xe3a07000 | (X))
+#define ARM_SWI_SYS(X)          (0xef000000 | (X) | ARM_SYSCALL_BASE)
 
-    /*
-     * Since we stacked the signal on a 64-bit boundary,
-     * then 'sp' should be word aligned here.  If it's
-     * not, then the user is trying to mess with us.
-     */
-    frame_addr = env->regs[13];
-    trace_user_do_rt_sigreturn(env, frame_addr);
-    if (frame_addr & 7) {
-        goto badframe;
-    }
+#define THUMB_MOVS_R7_IMM(X)    (0x2700 | (X))
+#define THUMB_SWI_SYS           0xdf00
 
-    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
-    }
+static void write_arm_sigreturn(uint32_t *rc, int syscall)
+{
+    __put_user(ARM_MOV_R7_IMM(syscall), rc);
+    __put_user(ARM_SWI_SYS(syscall), rc + 1);
+    /* Wrote 8 of 12 bytes */
+}
 
-    if (do_sigframe_return_v2(env,
-                              frame_addr
-                              + offsetof(struct rt_sigframe_v2, uc),
-                              &frame->uc)) {
-        goto badframe;
-    }
+static void write_thm_sigreturn(uint32_t *rc, int syscall)
+{
+    __put_user(THUMB_SWI_SYS << 16 | THUMB_MOVS_R7_IMM(syscall), rc);
+    /* Wrote 4 of 12 bytes */
+}
 
-    unlock_user_struct(frame, frame_addr, 0);
-    return -TARGET_QEMU_ESIGRETURN;
+/*
+ * Stub needed to make sure the FD register (r9) contains the right value.
+ * Use the same instruction sequence as the kernel.
+ */
+static void write_arm_fdpic_sigreturn(uint32_t *rc, int ofs)
+{
+    assert(ofs <= 0xfff);
+    __put_user(0xe59d3000 | ofs, rc + 0);   /* ldr r3, [sp, #ofs] */
+    __put_user(0xe8930908, rc + 1);         /* ldm r3, { r3, r9 } */
+    __put_user(0xe12fff13, rc + 2);         /* bx  r3 */
+    /* Wrote 12 of 12 bytes */
+}
 
-badframe:
-    unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV);
-    return -TARGET_QEMU_ESIGRETURN;
+static void write_thm_fdpic_sigreturn(void *vrc, int ofs)
+{
+    uint16_t *rc = vrc;
+
+    assert((ofs & ~0x3fc) == 0);
+    __put_user(0x9b00 | (ofs >> 2), rc + 0);      /* ldr r3, [sp, #ofs] */
+    __put_user(0xcb0c, rc + 1);                   /* ldm r3, { r2, r3 } */
+    __put_user(0x4699, rc + 2);                   /* mov r9, r3 */
+    __put_user(0x4710, rc + 3);                   /* bx  r2 */
+    /* Wrote 8 of 12 bytes */
 }
 
-long do_rt_sigreturn(CPUARMState *env)
+void setup_sigtramp(abi_ulong sigtramp_page)
 {
-    if (get_osversion() >= 0x020612) {
-        return do_rt_sigreturn_v2(env);
-    } else {
-        return do_rt_sigreturn_v1(env);
-    }
+    uint32_t total_size = 8 * RETCODE_BYTES;
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, total_size, 0);
+
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    write_arm_sigreturn(&tramp[0 * RETCODE_WORDS], TARGET_NR_sigreturn);
+    write_thm_sigreturn(&tramp[1 * RETCODE_WORDS], TARGET_NR_sigreturn);
+    write_arm_sigreturn(&tramp[2 * RETCODE_WORDS], TARGET_NR_rt_sigreturn);
+    write_thm_sigreturn(&tramp[3 * RETCODE_WORDS], TARGET_NR_rt_sigreturn);
+
+    sigreturn_fdpic_tramp = sigtramp_page + 4 * RETCODE_BYTES;
+    write_arm_fdpic_sigreturn(tramp + 4 * RETCODE_WORDS,
+                              offsetof(struct sigframe, retcode[3]));
+    write_thm_fdpic_sigreturn(tramp + 5 * RETCODE_WORDS,
+                                offsetof(struct sigframe, retcode[3]));
+    write_arm_fdpic_sigreturn(tramp + 6 * RETCODE_WORDS,
+                              offsetof(struct rt_sigframe, sig.retcode[3]));
+    write_thm_fdpic_sigreturn(tramp + 7 * RETCODE_WORDS,
+                              offsetof(struct rt_sigframe, sig.retcode[3]));
+
+    unlock_user(tramp, sigtramp_page, total_size);
 }
diff --git a/linux-user/arm/syscall.tbl b/linux-user/arm/syscall.tbl
index 171077cbf41..28e03b5fec0 100644
--- a/linux-user/arm/syscall.tbl
+++ b/linux-user/arm/syscall.tbl
@@ -453,3 +453,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/arm/target_errno_defs.h b/linux-user/arm/target_errno_defs.h
new file mode 100644
index 00000000000..fd843732384
--- /dev/null
+++ b/linux-user/arm/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef ARM_TARGET_ERRNO_DEFS_H
+#define ARM_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/arm/target_signal.h b/linux-user/arm/target_signal.h
index 0998dd6dfa7..1e7fb0cecbd 100644
--- a/linux-user/arm/target_signal.h
+++ b/linux-user/arm/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* ARM_TARGET_SIGNAL_H */
diff --git a/linux-user/cris/cpu_loop.c b/linux-user/cris/cpu_loop.c
index 334edddd1e2..0d5d268609a 100644
--- a/linux-user/cris/cpu_loop.c
+++ b/linux-user/cris/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUCRISState *env)
 {
@@ -35,16 +37,6 @@ void cpu_loop(CPUCRISState *env)
         process_queued_cpu_work(cs);
 
         switch (trapnr) {
-        case 0xaa:
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = env->pregs[PR_EDA];
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
         case EXCP_INTERRUPT:
           /* just indicate that signals should be handled asap */
           break;
diff --git a/linux-user/cris/signal.c b/linux-user/cris/signal.c
index 1e02194377b..7f6aca934e1 100644
--- a/linux-user/cris/signal.c
+++ b/linux-user/cris/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -96,6 +97,14 @@ static abi_ulong get_sigframe(CPUCRISState *env, int framesize)
     return sp - framesize;
 }
 
+static void setup_sigreturn(uint16_t *retcode)
+{
+    /* This is movu.w __NR_sigreturn, r9; break 13; */
+    __put_user(0x9c5f, retcode + 0);
+    __put_user(TARGET_NR_sigreturn, retcode + 1);
+    __put_user(0xe93d, retcode + 2);
+}
+
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUCRISState *env)
 {
@@ -111,14 +120,8 @@ void setup_frame(int sig, struct target_sigaction *ka,
     /*
      * The CRIS signal return trampoline. A real linux/CRIS kernel doesn't
      * use this trampoline anymore but it sets it up for GDB.
-     * In QEMU, using the trampoline simplifies things a bit so we use it.
-     *
-     * This is movu.w __NR_sigreturn, r9; break 13;
      */
-    __put_user(0x9c5f, frame->retcode+0);
-    __put_user(TARGET_NR_sigreturn,
-               frame->retcode + 1);
-    __put_user(0xe93d, frame->retcode + 2);
+    setup_sigreturn(frame->retcode);
 
     /* Save the mask.  */
     __put_user(set->sig[0], &frame->sc.oldmask);
@@ -134,7 +137,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
     env->regs[10] = sig;
     env->pc = (unsigned long) ka->_sa_handler;
     /* Link SRP so the guest returns through the trampoline.  */
-    env->pregs[PR_SRP] = frame_addr + offsetof(typeof(*frame), retcode);
+    env->pregs[PR_SRP] = default_sigreturn;
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
@@ -186,3 +189,14 @@ long do_rt_sigreturn(CPUCRISState *env)
     qemu_log_mask(LOG_UNIMP, "do_rt_sigreturn: not implemented\n");
     return -TARGET_ENOSYS;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    setup_sigreturn(tramp);
+
+    unlock_user(tramp, sigtramp_page, 6);
+}
diff --git a/linux-user/cris/target_errno_defs.h b/linux-user/cris/target_errno_defs.h
new file mode 100644
index 00000000000..1cf43b17a50
--- /dev/null
+++ b/linux-user/cris/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef CRIS_TARGET_ERRNO_DEFS_H
+#define CRIS_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/cris/target_signal.h b/linux-user/cris/target_signal.h
index 495a1428968..83a51555074 100644
--- a/linux-user/cris/target_signal.h
+++ b/linux-user/cris/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* CRIS_TARGET_SIGNAL_H */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index c325230ee90..ed9eb54bfd1 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -6,6 +6,10 @@
 #include <sys/shm.h>
 
 #include "qemu.h"
+#include "user-internals.h"
+#include "signal-common.h"
+#include "loader.h"
+#include "user-mmap.h"
 #include "disas/disas.h"
 #include "qemu/bitops.h"
 #include "qemu/path.h"
@@ -14,6 +18,7 @@
 #include "qemu/units.h"
 #include "qemu/selfmap.h"
 #include "qapi/error.h"
+#include "target_signal.h"
 
 #ifdef _ARCH_PPC64
 #undef ARCH_DLINFO
@@ -172,33 +177,33 @@ typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
  */
 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env)
 {
-    (*regs)[0] = env->regs[15];
-    (*regs)[1] = env->regs[14];
-    (*regs)[2] = env->regs[13];
-    (*regs)[3] = env->regs[12];
-    (*regs)[4] = env->regs[R_EBP];
-    (*regs)[5] = env->regs[R_EBX];
-    (*regs)[6] = env->regs[11];
-    (*regs)[7] = env->regs[10];
-    (*regs)[8] = env->regs[9];
-    (*regs)[9] = env->regs[8];
-    (*regs)[10] = env->regs[R_EAX];
-    (*regs)[11] = env->regs[R_ECX];
-    (*regs)[12] = env->regs[R_EDX];
-    (*regs)[13] = env->regs[R_ESI];
-    (*regs)[14] = env->regs[R_EDI];
-    (*regs)[15] = env->regs[R_EAX]; /* XXX */
-    (*regs)[16] = env->eip;
-    (*regs)[17] = env->segs[R_CS].selector & 0xffff;
-    (*regs)[18] = env->eflags;
-    (*regs)[19] = env->regs[R_ESP];
-    (*regs)[20] = env->segs[R_SS].selector & 0xffff;
-    (*regs)[21] = env->segs[R_FS].selector & 0xffff;
-    (*regs)[22] = env->segs[R_GS].selector & 0xffff;
-    (*regs)[23] = env->segs[R_DS].selector & 0xffff;
-    (*regs)[24] = env->segs[R_ES].selector & 0xffff;
-    (*regs)[25] = env->segs[R_FS].selector & 0xffff;
-    (*regs)[26] = env->segs[R_GS].selector & 0xffff;
+    (*regs)[0] = tswapreg(env->regs[15]);
+    (*regs)[1] = tswapreg(env->regs[14]);
+    (*regs)[2] = tswapreg(env->regs[13]);
+    (*regs)[3] = tswapreg(env->regs[12]);
+    (*regs)[4] = tswapreg(env->regs[R_EBP]);
+    (*regs)[5] = tswapreg(env->regs[R_EBX]);
+    (*regs)[6] = tswapreg(env->regs[11]);
+    (*regs)[7] = tswapreg(env->regs[10]);
+    (*regs)[8] = tswapreg(env->regs[9]);
+    (*regs)[9] = tswapreg(env->regs[8]);
+    (*regs)[10] = tswapreg(env->regs[R_EAX]);
+    (*regs)[11] = tswapreg(env->regs[R_ECX]);
+    (*regs)[12] = tswapreg(env->regs[R_EDX]);
+    (*regs)[13] = tswapreg(env->regs[R_ESI]);
+    (*regs)[14] = tswapreg(env->regs[R_EDI]);
+    (*regs)[15] = tswapreg(env->regs[R_EAX]); /* XXX */
+    (*regs)[16] = tswapreg(env->eip);
+    (*regs)[17] = tswapreg(env->segs[R_CS].selector & 0xffff);
+    (*regs)[18] = tswapreg(env->eflags);
+    (*regs)[19] = tswapreg(env->regs[R_ESP]);
+    (*regs)[20] = tswapreg(env->segs[R_SS].selector & 0xffff);
+    (*regs)[21] = tswapreg(env->segs[R_FS].selector & 0xffff);
+    (*regs)[22] = tswapreg(env->segs[R_GS].selector & 0xffff);
+    (*regs)[23] = tswapreg(env->segs[R_DS].selector & 0xffff);
+    (*regs)[24] = tswapreg(env->segs[R_ES].selector & 0xffff);
+    (*regs)[25] = tswapreg(env->segs[R_FS].selector & 0xffff);
+    (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff);
 }
 
 #else
@@ -244,23 +249,23 @@ typedef target_elf_greg_t  target_elf_gregset_t[ELF_NREG];
  */
 static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env)
 {
-    (*regs)[0] = env->regs[R_EBX];
-    (*regs)[1] = env->regs[R_ECX];
-    (*regs)[2] = env->regs[R_EDX];
-    (*regs)[3] = env->regs[R_ESI];
-    (*regs)[4] = env->regs[R_EDI];
-    (*regs)[5] = env->regs[R_EBP];
-    (*regs)[6] = env->regs[R_EAX];
-    (*regs)[7] = env->segs[R_DS].selector & 0xffff;
-    (*regs)[8] = env->segs[R_ES].selector & 0xffff;
-    (*regs)[9] = env->segs[R_FS].selector & 0xffff;
-    (*regs)[10] = env->segs[R_GS].selector & 0xffff;
-    (*regs)[11] = env->regs[R_EAX]; /* XXX */
-    (*regs)[12] = env->eip;
-    (*regs)[13] = env->segs[R_CS].selector & 0xffff;
-    (*regs)[14] = env->eflags;
-    (*regs)[15] = env->regs[R_ESP];
-    (*regs)[16] = env->segs[R_SS].selector & 0xffff;
+    (*regs)[0] = tswapreg(env->regs[R_EBX]);
+    (*regs)[1] = tswapreg(env->regs[R_ECX]);
+    (*regs)[2] = tswapreg(env->regs[R_EDX]);
+    (*regs)[3] = tswapreg(env->regs[R_ESI]);
+    (*regs)[4] = tswapreg(env->regs[R_EDI]);
+    (*regs)[5] = tswapreg(env->regs[R_EBP]);
+    (*regs)[6] = tswapreg(env->regs[R_EAX]);
+    (*regs)[7] = tswapreg(env->segs[R_DS].selector & 0xffff);
+    (*regs)[8] = tswapreg(env->segs[R_ES].selector & 0xffff);
+    (*regs)[9] = tswapreg(env->segs[R_FS].selector & 0xffff);
+    (*regs)[10] = tswapreg(env->segs[R_GS].selector & 0xffff);
+    (*regs)[11] = tswapreg(env->regs[R_EAX]); /* XXX */
+    (*regs)[12] = tswapreg(env->eip);
+    (*regs)[13] = tswapreg(env->segs[R_CS].selector & 0xffff);
+    (*regs)[14] = tswapreg(env->eflags);
+    (*regs)[15] = tswapreg(env->regs[R_ESP]);
+    (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff);
 }
 #endif
 
@@ -586,6 +591,16 @@ enum {
     ARM_HWCAP2_A64_SVESM4       = 1 << 6,
     ARM_HWCAP2_A64_FLAGM2       = 1 << 7,
     ARM_HWCAP2_A64_FRINT        = 1 << 8,
+    ARM_HWCAP2_A64_SVEI8MM      = 1 << 9,
+    ARM_HWCAP2_A64_SVEF32MM     = 1 << 10,
+    ARM_HWCAP2_A64_SVEF64MM     = 1 << 11,
+    ARM_HWCAP2_A64_SVEBF16      = 1 << 12,
+    ARM_HWCAP2_A64_I8MM         = 1 << 13,
+    ARM_HWCAP2_A64_BF16         = 1 << 14,
+    ARM_HWCAP2_A64_DGH          = 1 << 15,
+    ARM_HWCAP2_A64_RNG          = 1 << 16,
+    ARM_HWCAP2_A64_BTI          = 1 << 17,
+    ARM_HWCAP2_A64_MTE          = 1 << 18,
 };
 
 #define ELF_HWCAP   get_elf_hwcap()
@@ -638,8 +653,23 @@ static uint32_t get_elf_hwcap2(void)
     uint32_t hwcaps = 0;
 
     GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
+    GET_FEATURE_ID(aa64_sve2, ARM_HWCAP2_A64_SVE2);
+    GET_FEATURE_ID(aa64_sve2_aes, ARM_HWCAP2_A64_SVEAES);
+    GET_FEATURE_ID(aa64_sve2_pmull128, ARM_HWCAP2_A64_SVEPMULL);
+    GET_FEATURE_ID(aa64_sve2_bitperm, ARM_HWCAP2_A64_SVEBITPERM);
+    GET_FEATURE_ID(aa64_sve2_sha3, ARM_HWCAP2_A64_SVESHA3);
+    GET_FEATURE_ID(aa64_sve2_sm4, ARM_HWCAP2_A64_SVESM4);
     GET_FEATURE_ID(aa64_condm_5, ARM_HWCAP2_A64_FLAGM2);
     GET_FEATURE_ID(aa64_frint, ARM_HWCAP2_A64_FRINT);
+    GET_FEATURE_ID(aa64_sve_i8mm, ARM_HWCAP2_A64_SVEI8MM);
+    GET_FEATURE_ID(aa64_sve_f32mm, ARM_HWCAP2_A64_SVEF32MM);
+    GET_FEATURE_ID(aa64_sve_f64mm, ARM_HWCAP2_A64_SVEF64MM);
+    GET_FEATURE_ID(aa64_sve_bf16, ARM_HWCAP2_A64_SVEBF16);
+    GET_FEATURE_ID(aa64_i8mm, ARM_HWCAP2_A64_I8MM);
+    GET_FEATURE_ID(aa64_bf16, ARM_HWCAP2_A64_BF16);
+    GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
+    GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
+    GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
 
     return hwcaps;
 }
@@ -663,48 +693,25 @@ static uint32_t get_elf_hwcap2(void)
 
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCH    EM_SPARCV9
-
-#define STACK_BIAS              2047
-
-static inline void init_thread(struct target_pt_regs *regs,
-                               struct image_info *infop)
-{
-#ifndef TARGET_ABI32
-    regs->tstate = 0;
-#endif
-    regs->pc = infop->entry;
-    regs->npc = regs->pc + 4;
-    regs->y = 0;
-#ifdef TARGET_ABI32
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
-#else
-    if (personality(infop->personality) == PER_LINUX32)
-        regs->u_regs[14] = infop->start_stack - 16 * 4;
-    else
-        regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS;
-#endif
-}
-
 #else
 #define ELF_START_MMAP 0x80000000
 #define ELF_HWCAP  (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
                     | HWCAP_SPARC_MULDIV)
-
 #define ELF_CLASS   ELFCLASS32
 #define ELF_ARCH    EM_SPARC
+#endif /* TARGET_SPARC64 */
 
 static inline void init_thread(struct target_pt_regs *regs,
                                struct image_info *infop)
 {
-    regs->psr = 0;
+    /* Note that target_cpu_copy_regs does not read psr/tstate. */
     regs->pc = infop->entry;
     regs->npc = regs->pc + 4;
     regs->y = 0;
-    regs->u_regs[14] = infop->start_stack - 16 * 4;
+    regs->u_regs[14] = (infop->start_stack - 16 * sizeof(abi_ulong)
+                        - TARGET_STACK_BIAS);
 }
-
-#endif
-#endif
+#endif /* TARGET_SPARC */
 
 #ifdef TARGET_PPC
 
@@ -828,7 +835,7 @@ static uint32_t get_elf_hwcap2(void)
                   PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07 |
                   QEMU_PPC_FEATURE2_VEC_CRYPTO);
     GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 |
-                 QEMU_PPC_FEATURE2_DARN);
+                 QEMU_PPC_FEATURE2_DARN | QEMU_PPC_FEATURE2_HAS_IEEE128);
 
 #undef GET_FEATURE
 #undef GET_FEATURE2
@@ -894,7 +901,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
     (*regs)[33] = tswapreg(env->msr);
     (*regs)[35] = tswapreg(env->ctr);
     (*regs)[36] = tswapreg(env->lr);
-    (*regs)[37] = tswapreg(env->xer);
+    (*regs)[37] = tswapreg(cpu_read_xer(env));
 
     for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
         ccr |= env->crf[i] << (32 - ((i + 1) * 4));
@@ -918,8 +925,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
 #endif
 #define ELF_ARCH    EM_MIPS
 
-#define elf_check_arch(x) ((x) == EM_MIPS || (x) == EM_NANOMIPS)
-
 #ifdef TARGET_ABI_MIPSN32
 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
 #else
@@ -1374,6 +1379,7 @@ static uint32_t get_elf_hwcap(void)
         hwcap |= HWCAP_S390_ETF3EH;
     }
     GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
+    GET_FEATURE(S390_FEAT_VECTOR_ENH, HWCAP_S390_VXRS_EXT);
 
     return hwcap;
 }
@@ -1385,6 +1391,39 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i
     regs->gprs[15] = infop->start_stack;
 }
 
+/* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs).  */
+#define ELF_NREG 27
+typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
+
+enum {
+    TARGET_REG_PSWM = 0,
+    TARGET_REG_PSWA = 1,
+    TARGET_REG_GPRS = 2,
+    TARGET_REG_ARS = 18,
+    TARGET_REG_ORIG_R2 = 26,
+};
+
+static void elf_core_copy_regs(target_elf_gregset_t *regs,
+                               const CPUS390XState *env)
+{
+    int i;
+    uint32_t *aregs;
+
+    (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask);
+    (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr);
+    for (i = 0; i < 16; i++) {
+        (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]);
+    }
+    aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]);
+    for (i = 0; i < 16; i++) {
+        aregs[i] = tswap32(env->aregs[i]);
+    }
+    (*regs)[TARGET_REG_ORIG_R2] = 0;
+}
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE 4096
+
 #endif /* TARGET_S390X */
 
 #ifdef TARGET_RISCV
@@ -1398,6 +1437,19 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i
 #define ELF_CLASS ELFCLASS64
 #endif
 
+#define ELF_HWCAP get_elf_hwcap()
+
+static uint32_t get_elf_hwcap(void)
+{
+#define MISA_BIT(EXT) (1 << (EXT - 'A'))
+    RISCVCPU *cpu = RISCV_CPU(thread_cpu);
+    uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A')
+                    | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C');
+
+    return cpu->env.misa_ext & mask;
+#undef MISA_BIT
+}
+
 static inline void init_thread(struct target_pt_regs *regs,
                                struct image_info *infop)
 {
@@ -3197,6 +3249,22 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
 #endif
     }
 
+    /*
+     * TODO: load a vdso, which would also contain the signal trampolines.
+     * Otherwise, allocate a private page to hold them.
+     */
+    if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
+        abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE,
+                                          PROT_READ | PROT_WRITE,
+                                          MAP_PRIVATE | MAP_ANON, -1, 0);
+        if (tramp_page == -1) {
+            return -errno;
+        }
+
+        setup_sigtramp(tramp_page);
+        target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC);
+    }
+
     bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
                                 info, (elf_interpreter ? &interp_info : NULL));
     info->start_stack = bprm->p;
@@ -3386,7 +3454,6 @@ static size_t note_size(const struct memelfnote *);
 static void free_note_info(struct elf_note_info *);
 static int fill_note_info(struct elf_note_info *, long, const CPUArchState *);
 static void fill_thread_info(struct elf_note_info *, const CPUArchState *);
-static int core_dump_filename(const TaskState *, char *, size_t);
 
 static int dump_write(int, const void *, size_t);
 static int write_note(struct memelfnote *, int);
@@ -3629,11 +3696,12 @@ static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts)
 
     (void) memset(psinfo, 0, sizeof (*psinfo));
 
-    len = ts->info->arg_end - ts->info->arg_start;
+    len = ts->info->env_strings - ts->info->arg_strings;
     if (len >= ELF_PRARGSZ)
         len = ELF_PRARGSZ - 1;
-    if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_start, len))
+    if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_strings, len)) {
         return -EFAULT;
+    }
     for (i = 0; i < len; i++)
         if (psinfo->pr_psargs[i] == 0)
             psinfo->pr_psargs[i] = ' ';
@@ -3685,32 +3753,16 @@ static void fill_auxv_note(struct memelfnote *note, const TaskState *ts)
  * for the name:
  *     qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core
  *
- * Returns 0 in case of success, -1 otherwise (errno is set).
+ * Returns the filename
  */
-static int core_dump_filename(const TaskState *ts, char *buf,
-                              size_t bufsize)
+static char *core_dump_filename(const TaskState *ts)
 {
-    char timestamp[64];
-    char *base_filename = NULL;
-    struct timeval tv;
-    struct tm tm;
+    g_autoptr(GDateTime) now = g_date_time_new_now_local();
+    g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S");
+    g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename);
 
-    assert(bufsize >= PATH_MAX);
-
-    if (gettimeofday(&tv, NULL) < 0) {
-        (void) fprintf(stderr, "unable to get current timestamp: %s",
-                       strerror(errno));
-        return (-1);
-    }
-
-    base_filename = g_path_get_basename(ts->bprm->filename);
-    (void) strftime(timestamp, sizeof (timestamp), "%Y%m%d-%H%M%S",
-                    localtime_r(&tv.tv_sec, &tm));
-    (void) snprintf(buf, bufsize, "qemu_%s_%s_%d.core",
-                    base_filename, timestamp, (int)getpid());
-    g_free(base_filename);
-
-    return (0);
+    return g_strdup_printf("qemu_%s_%s_%d.core",
+                           base_filename, nowstr, (int)getpid());
 }
 
 static int dump_write(int fd, const void *ptr, size_t size)
@@ -3938,7 +3990,7 @@ static int elf_core_dump(int signr, const CPUArchState *env)
     const CPUState *cpu = env_cpu((CPUArchState *)env);
     const TaskState *ts = (const TaskState *)cpu->opaque;
     struct vm_area_struct *vma = NULL;
-    char corefile[PATH_MAX];
+    g_autofree char *corefile = NULL;
     struct elf_note_info info;
     struct elfhdr elf;
     struct elf_phdr phdr;
@@ -3955,8 +4007,7 @@ static int elf_core_dump(int signr, const CPUArchState *env)
     if (dumpsize.rlim_cur == 0)
         return 0;
 
-    if (core_dump_filename(ts, corefile, sizeof (corefile)) < 0)
-        return (-errno);
+    corefile = core_dump_filename(ts);
 
     if ((fd = open(corefile, O_WRONLY | O_CREAT,
                    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
diff --git a/linux-user/errno_defs.h b/linux-user/errno_defs.h
deleted file mode 100644
index aaf5208d62d..00000000000
--- a/linux-user/errno_defs.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Target definitions of errnos. These may be overridden by an
- * architecture specific header if needed.
- *
- * Taken from asm-generic/errno-base.h and asm-generic/errno.h
- */
-
-#ifndef LINUX_USER_ERRNO_DEFS_H
-#define LINUX_USER_ERRNO_DEFS_H
-
-#define TARGET_EPERM            1      /* Operation not permitted */
-#define TARGET_ENOENT           2      /* No such file or directory */
-#define TARGET_ESRCH            3      /* No such process */
-#define TARGET_EINTR            4      /* Interrupted system call */
-#define TARGET_EIO              5      /* I/O error */
-#define TARGET_ENXIO            6      /* No such device or address */
-#define TARGET_E2BIG            7      /* Argument list too long */
-#define TARGET_ENOEXEC          8      /* TARGET_Exec format error */
-#define TARGET_EBADF            9      /* Bad file number */
-#define TARGET_ECHILD          10      /* No child processes */
-#define TARGET_EAGAIN          11      /* Try again */
-#define TARGET_ENOMEM          12      /* Out of memory */
-#define TARGET_EACCES          13      /* Permission denied */
-#define TARGET_EFAULT          14      /* Bad address */
-#define TARGET_ENOTBLK         15      /* Block device required */
-#define TARGET_EBUSY           16      /* Device or resource busy */
-#define TARGET_EEXIST          17      /* File exists */
-#define TARGET_EXDEV           18      /* Cross-device link */
-#define TARGET_ENODEV          19      /* No such device */
-#define TARGET_ENOTDIR         20      /* Not a directory */
-#define TARGET_EISDIR          21      /* Is a directory */
-#define TARGET_EINVAL          22      /* Invalid argument */
-#define TARGET_ENFILE          23      /* File table overflow */
-#define TARGET_EMFILE          24      /* Too many open files */
-#define TARGET_ENOTTY          25      /* Not a typewriter */
-#define TARGET_ETXTBSY         26      /* Text file busy */
-#define TARGET_EFBIG           27      /* File too large */
-#define TARGET_ENOSPC          28      /* No space left on device */
-#define TARGET_ESPIPE          29      /* Illegal seek */
-#define TARGET_EROFS           30      /* Read-only file system */
-#define TARGET_EMLINK          31      /* Too many links */
-#define TARGET_EPIPE           32      /* Broken pipe */
-#define TARGET_EDOM            33      /* Math argument out of domain of func */
-#define TARGET_ERANGE          34      /* Math result not representable */
-
-#define TARGET_EDEADLK         35      /* Resource deadlock would occur */
-#define TARGET_ENAMETOOLONG    36      /* File name too long */
-#define TARGET_ENOLCK          37      /* No record locks available */
-#define TARGET_ENOSYS          38      /* Function not implemented */
-#define TARGET_ENOTEMPTY       39      /* Directory not empty */
-#define TARGET_ELOOP           40      /* Too many symbolic links encountered */
-
-#define TARGET_ENOMSG          42      /* No message of desired type */
-#define TARGET_EIDRM           43      /* Identifier removed */
-#define TARGET_ECHRNG          44      /* Channel number out of range */
-#define TARGET_EL2NSYNC        45      /* Level 2 not synchronized */
-#define TARGET_EL3HLT          46      /* Level 3 halted */
-#define TARGET_EL3RST          47      /* Level 3 reset */
-#define TARGET_ELNRNG          48      /* Link number out of range */
-#define TARGET_EUNATCH         49      /* Protocol driver not attached */
-#define TARGET_ENOCSI          50      /* No CSI structure available */
-#define TARGET_EL2HLT          51      /* Level 2 halted */
-#define TARGET_EBADE           52      /* Invalid exchange */
-#define TARGET_EBADR           53      /* Invalid request descriptor */
-#define TARGET_EXFULL          54      /* TARGET_Exchange full */
-#define TARGET_ENOANO          55      /* No anode */
-#define TARGET_EBADRQC         56      /* Invalid request code */
-#define TARGET_EBADSLT         57      /* Invalid slot */
-
-#define TARGET_EBFONT          59      /* Bad font file format */
-#define TARGET_ENOSTR          60      /* Device not a stream */
-#define TARGET_ENODATA         61      /* No data available */
-#define TARGET_ETIME           62      /* Timer expired */
-#define TARGET_ENOSR           63      /* Out of streams resources */
-#define TARGET_ENONET          64      /* Machine is not on the network */
-#define TARGET_ENOPKG          65      /* Package not installed */
-#define TARGET_EREMOTE         66      /* Object is remote */
-#define TARGET_ENOLINK         67      /* Link has been severed */
-#define TARGET_EADV            68      /* Advertise error */
-#define TARGET_ESRMNT          69      /* Srmount error */
-#define TARGET_ECOMM           70      /* Communication error on send */
-#define TARGET_EPROTO          71      /* Protocol error */
-#define TARGET_EMULTIHOP       72      /* Multihop attempted */
-#define TARGET_EDOTDOT         73      /* RFS specific error */
-#define TARGET_EBADMSG         74      /* Not a data message */
-#define TARGET_EOVERFLOW       75      /* Value too large for defined data type */
-#define TARGET_ENOTUNIQ        76      /* Name not unique on network */
-#define TARGET_EBADFD          77      /* File descriptor in bad state */
-#define TARGET_EREMCHG         78      /* Remote address changed */
-#define TARGET_ELIBACC         79      /* Can not access a needed shared library */
-#define TARGET_ELIBBAD         80      /* Accessing a corrupted shared library */
-#define TARGET_ELIBSCN         81      /* .lib section in a.out corrupted */
-#define TARGET_ELIBMAX         82      /* Attempting to link in too many shared libraries */
-#define TARGET_ELIBEXEC        83      /* Cannot exec a shared library directly */
-#define TARGET_EILSEQ          84      /* Illegal byte sequence */
-#define TARGET_ERESTART        85      /* Interrupted system call should be restarted */
-#define TARGET_ESTRPIPE        86      /* Streams pipe error */
-#define TARGET_EUSERS          87      /* Too many users */
-#define TARGET_ENOTSOCK        88      /* Socket operation on non-socket */
-#define TARGET_EDESTADDRREQ    89      /* Destination address required */
-#define TARGET_EMSGSIZE        90      /* Message too long */
-#define TARGET_EPROTOTYPE      91      /* Protocol wrong type for socket */
-#define TARGET_ENOPROTOOPT     92      /* Protocol not available */
-#define TARGET_EPROTONOSUPPORT 93      /* Protocol not supported */
-#define TARGET_ESOCKTNOSUPPORT 94      /* Socket type not supported */
-#define TARGET_EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
-#define TARGET_EPFNOSUPPORT    96      /* Protocol family not supported */
-#define TARGET_EAFNOSUPPORT    97      /* Address family not supported by protocol */
-#define TARGET_EADDRINUSE      98      /* Address already in use */
-#define TARGET_EADDRNOTAVAIL   99      /* Cannot assign requested address */
-#define TARGET_ENETDOWN        100     /* Network is down */
-#define TARGET_ENETUNREACH     101     /* Network is unreachable */
-#define TARGET_ENETRESET       102     /* Network dropped connection because of reset */
-#define TARGET_ECONNABORTED    103     /* Software caused connection abort */
-#define TARGET_ECONNRESET      104     /* Connection reset by peer */
-#define TARGET_ENOBUFS         105     /* No buffer space available */
-#define TARGET_EISCONN         106     /* Transport endpoint is already connected */
-#define TARGET_ENOTCONN        107     /* Transport endpoint is not connected */
-#define TARGET_ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
-#define TARGET_ETOOMANYREFS    109     /* Too many references: cannot splice */
-#define TARGET_ETIMEDOUT       110     /* Connection timed out */
-#define TARGET_ECONNREFUSED    111     /* Connection refused */
-#define TARGET_EHOSTDOWN       112     /* Host is down */
-#define TARGET_EHOSTUNREACH    113     /* No route to host */
-#define TARGET_EALREADY        114     /* Operation already in progress */
-#define TARGET_EINPROGRESS     115     /* Operation now in progress */
-#define TARGET_ESTALE          116     /* Stale NFS file handle */
-#define TARGET_EUCLEAN         117     /* Structure needs cleaning */
-#define TARGET_ENOTNAM         118     /* Not a XENIX named type file */
-#define TARGET_ENAVAIL         119     /* No XENIX semaphores available */
-#define TARGET_EISNAM          120     /* Is a named type file */
-#define TARGET_EREMOTEIO       121     /* Remote I/O error */
-#define TARGET_EDQUOT          122     /* Quota exceeded */
-
-#define TARGET_ENOMEDIUM       123     /* No medium found */
-#define TARGET_EMEDIUMTYPE     124     /* Wrong medium type */
-#define TARGET_ECANCELED       125     /* Operation Canceled */
-#define TARGET_ENOKEY          126     /* Required key not available */
-#define TARGET_EKEYEXPIRED     127     /* Key has expired */
-#define TARGET_EKEYREVOKED     128     /* Key has been revoked */
-#define TARGET_EKEYREJECTED    129     /* Key was rejected by service */
-
-/* for robust mutexes */
-#define TARGET_EOWNERDEAD      130     /* Owner died */
-#define TARGET_ENOTRECOVERABLE 131     /* State not recoverable */
-
-#define TARGET_ERFKILL         132     /* Operation not possible due to RF-kill */
-#define TARGET_EHWPOISON       133     /* Memory page has hardware error */
-
-/* QEMU internal, not visible to the guest. This is returned when a
- * system call should be restarted, to tell the main loop that it
- * should wind the guest PC backwards so it will re-execute the syscall
- * after handling any pending signals. They match with the ones the guest
- * kernel uses for the same purpose.
- */
-#define TARGET_ERESTARTSYS     512     /* Restart system call (if SA_RESTART) */
-
-/* QEMU internal, not visible to the guest. This is returned by the
- * do_sigreturn() code after a successful sigreturn syscall, to indicate
- * that it has correctly set the guest registers and so the main loop
- * should not touch them. We use the value the guest would use for
- * ERESTART_NOINTR (which is kernel internal) to guarantee that we won't
- * clash with a valid guest errno now or in the future.
- */
-#define TARGET_QEMU_ESIGRETURN 513     /* Return from signal */
-
-#endif
diff --git a/linux-user/errnos.c.inc b/linux-user/errnos.c.inc
new file mode 100644
index 00000000000..963ba1ce9d1
--- /dev/null
+++ b/linux-user/errnos.c.inc
@@ -0,0 +1,140 @@
+/*
+ * This list is the union of errno values overridden in asm-<arch>/errno.h
+ * minus the errnos that are not actually generic to all archs.
+ *
+ * Please keep this list sorted alphabetically.
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+E(EADDRINUSE)
+E(EADDRNOTAVAIL)
+E(EADV)
+E(EAFNOSUPPORT)
+E(EAGAIN)
+E(EALREADY)
+E(EBADE)
+E(EBADFD)
+E(EBADMSG)
+E(EBADR)
+E(EBADRQC)
+E(EBADSLT)
+E(EBFONT)
+E(ECANCELED)
+E(ECHRNG)
+E(ECOMM)
+E(ECONNABORTED)
+E(ECONNREFUSED)
+E(ECONNRESET)
+E(EDEADLK)
+E(EDESTADDRREQ)
+E(EDOTDOT)
+E(EDQUOT)
+E(EHOSTDOWN)
+E(EHOSTUNREACH)
+#ifdef EHWPOISON
+E(EHWPOISON)
+#endif
+E(EIDRM)
+E(EILSEQ)
+E(EINPROGRESS)
+E(EISCONN)
+E(EISNAM)
+#ifdef EKEYEXPIRED
+E(EKEYEXPIRED)
+#endif
+#ifdef EKEYREJECTED
+E(EKEYREJECTED)
+#endif
+#ifdef EKEYREVOKED
+E(EKEYREVOKED)
+#endif
+E(EL2HLT)
+E(EL2NSYNC)
+E(EL3HLT)
+E(EL3RST)
+E(ELIBACC)
+E(ELIBBAD)
+E(ELIBEXEC)
+E(ELIBMAX)
+E(ELIBSCN)
+E(ELNRNG)
+E(ELOOP)
+E(EMEDIUMTYPE)
+E(EMSGSIZE)
+E(EMULTIHOP)
+E(ENAMETOOLONG)
+E(ENAVAIL)
+E(ENETDOWN)
+E(ENETRESET)
+E(ENETUNREACH)
+E(ENOANO)
+E(ENOBUFS)
+E(ENOCSI)
+E(ENODATA)
+#ifdef ENOKEY
+E(ENOKEY)
+#endif
+E(ENOLCK)
+E(ENOLINK)
+E(ENOMEDIUM)
+#ifdef ENOMSG
+E(ENOMSG)
+#endif
+E(ENONET)
+E(ENOPKG)
+E(ENOPROTOOPT)
+E(ENOSR)
+E(ENOSTR)
+E(ENOSYS)
+E(ENOTCONN)
+E(ENOTEMPTY)
+E(ENOTNAM)
+#ifdef ENOTRECOVERABLE
+E(ENOTRECOVERABLE)
+#endif
+E(ENOTSOCK)
+E(ENOTUNIQ)
+E(EOPNOTSUPP)
+E(EOVERFLOW)
+#ifdef EOWNERDEAD
+E(EOWNERDEAD)
+#endif
+E(EPFNOSUPPORT)
+E(EPROTO)
+E(EPROTONOSUPPORT)
+E(EPROTOTYPE)
+E(EREMCHG)
+E(EREMOTE)
+E(EREMOTEIO)
+E(ERESTART)
+#ifdef ERFKILL
+E(ERFKILL)
+#endif
+E(ESHUTDOWN)
+E(ESOCKTNOSUPPORT)
+E(ESRMNT)
+E(ESTALE)
+E(ESTRPIPE)
+E(ETIME)
+E(ETIMEDOUT)
+E(ETOOMANYREFS)
+E(EUCLEAN)
+E(EUNATCH)
+E(EUSERS)
+E(EXFULL)
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 70b344048c0..fa6ef0b9b44 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -17,7 +17,9 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "exec/gdbstub.h"
 #include "qemu.h"
+#include "user-internals.h"
 #ifdef CONFIG_GPROF
 #include <sys/gmon.h>
 #endif
@@ -35,5 +37,5 @@ void preexit_cleanup(CPUArchState *env, int code)
         __gcov_dump();
 #endif
         gdb_exit(code);
-        qemu_plugin_atexit_cb();
+        qemu_plugin_user_exit();
 }
diff --git a/linux-user/fd-trans.c b/linux-user/fd-trans.c
index 23adaca8363..69410899590 100644
--- a/linux-user/fd-trans.c
+++ b/linux-user/fd-trans.c
@@ -27,7 +27,9 @@
 #include <linux/if_bridge.h>
 #endif
 #include "qemu.h"
+#include "user-internals.h"
 #include "fd-trans.h"
+#include "signal-common.h"
 
 enum {
     QEMU_IFLA_BR_UNSPEC,
@@ -267,6 +269,7 @@ enum {
 };
 
 TargetFdTrans **target_fd_trans;
+QemuMutex target_fd_trans_lock;
 unsigned int target_fd_max;
 
 static void tswap_nlmsghdr(struct nlmsghdr *nlh)
diff --git a/linux-user/fd-trans.h b/linux-user/fd-trans.h
index a3fcdaabc75..1b9fa2041c0 100644
--- a/linux-user/fd-trans.h
+++ b/linux-user/fd-trans.h
@@ -16,6 +16,8 @@
 #ifndef FD_TRANS_H
 #define FD_TRANS_H
 
+#include "qemu/lockable.h"
+
 typedef abi_long (*TargetFdDataFunc)(void *, size_t);
 typedef abi_long (*TargetFdAddrFunc)(void *, abi_ulong, socklen_t);
 typedef struct TargetFdTrans {
@@ -25,12 +27,23 @@ typedef struct TargetFdTrans {
 } TargetFdTrans;
 
 extern TargetFdTrans **target_fd_trans;
+extern QemuMutex target_fd_trans_lock;
 
 extern unsigned int target_fd_max;
 
+static inline void fd_trans_init(void)
+{
+    qemu_mutex_init(&target_fd_trans_lock);
+}
+
 static inline TargetFdDataFunc fd_trans_target_to_host_data(int fd)
 {
-    if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
+    if (fd < 0) {
+        return NULL;
+    }
+
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    if (fd < target_fd_max && target_fd_trans[fd]) {
         return target_fd_trans[fd]->target_to_host_data;
     }
     return NULL;
@@ -38,7 +51,12 @@ static inline TargetFdDataFunc fd_trans_target_to_host_data(int fd)
 
 static inline TargetFdDataFunc fd_trans_host_to_target_data(int fd)
 {
-    if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
+    if (fd < 0) {
+        return NULL;
+    }
+
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    if (fd < target_fd_max && target_fd_trans[fd]) {
         return target_fd_trans[fd]->host_to_target_data;
     }
     return NULL;
@@ -46,13 +64,19 @@ static inline TargetFdDataFunc fd_trans_host_to_target_data(int fd)
 
 static inline TargetFdAddrFunc fd_trans_target_to_host_addr(int fd)
 {
-    if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) {
+    if (fd < 0) {
+        return NULL;
+    }
+
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    if (fd < target_fd_max && target_fd_trans[fd]) {
         return target_fd_trans[fd]->target_to_host_addr;
     }
     return NULL;
 }
 
-static inline void fd_trans_register(int fd, TargetFdTrans *trans)
+static inline void internal_fd_trans_register_unsafe(int fd,
+                                                     TargetFdTrans *trans)
 {
     unsigned int oldmax;
 
@@ -67,18 +91,35 @@ static inline void fd_trans_register(int fd, TargetFdTrans *trans)
     target_fd_trans[fd] = trans;
 }
 
-static inline void fd_trans_unregister(int fd)
+static inline void fd_trans_register(int fd, TargetFdTrans *trans)
+{
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    internal_fd_trans_register_unsafe(fd, trans);
+}
+
+static inline void internal_fd_trans_unregister_unsafe(int fd)
 {
     if (fd >= 0 && fd < target_fd_max) {
         target_fd_trans[fd] = NULL;
     }
 }
 
+static inline void fd_trans_unregister(int fd)
+{
+    if (fd < 0) {
+        return;
+    }
+
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    internal_fd_trans_unregister_unsafe(fd);
+}
+
 static inline void fd_trans_dup(int oldfd, int newfd)
 {
-    fd_trans_unregister(newfd);
+    QEMU_LOCK_GUARD(&target_fd_trans_lock);
+    internal_fd_trans_unregister_unsafe(newfd);
     if (oldfd < target_fd_max && target_fd_trans[oldfd]) {
-        fd_trans_register(newfd, target_fd_trans[oldfd]);
+        internal_fd_trans_register_unsafe(newfd, target_fd_trans[oldfd]);
     }
 }
 
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 3e5594cf894..e4c2f89a226 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -36,6 +36,9 @@
 #include "qemu/osdep.h"
 
 #include "qemu.h"
+#include "user-internals.h"
+#include "loader.h"
+#include "user-mmap.h"
 #include "flat.h"
 #include "target_flat.h"
 
diff --git a/linux-user/generic/target_errno_defs.h b/linux-user/generic/target_errno_defs.h
new file mode 100644
index 00000000000..17d85e0b61b
--- /dev/null
+++ b/linux-user/generic/target_errno_defs.h
@@ -0,0 +1,167 @@
+/*
+ * Target definitions of errnos. These may be overridden by an
+ * architecture specific header if needed.
+ *
+ * Taken from asm-generic/errno-base.h and asm-generic/errno.h
+ */
+
+#ifndef GENERIC_TARGET_ERRNO_DEFS_H
+#define GENERIC_TARGET_ERRNO_DEFS_H
+
+#define TARGET_EPERM            1      /* Operation not permitted */
+#define TARGET_ENOENT           2      /* No such file or directory */
+#define TARGET_ESRCH            3      /* No such process */
+#define TARGET_EINTR            4      /* Interrupted system call */
+#define TARGET_EIO              5      /* I/O error */
+#define TARGET_ENXIO            6      /* No such device or address */
+#define TARGET_E2BIG            7      /* Argument list too long */
+#define TARGET_ENOEXEC          8      /* TARGET_Exec format error */
+#define TARGET_EBADF            9      /* Bad file number */
+#define TARGET_ECHILD          10      /* No child processes */
+#define TARGET_EAGAIN          11      /* Try again */
+#define TARGET_ENOMEM          12      /* Out of memory */
+#define TARGET_EACCES          13      /* Permission denied */
+#define TARGET_EFAULT          14      /* Bad address */
+#define TARGET_ENOTBLK         15      /* Block device required */
+#define TARGET_EBUSY           16      /* Device or resource busy */
+#define TARGET_EEXIST          17      /* File exists */
+#define TARGET_EXDEV           18      /* Cross-device link */
+#define TARGET_ENODEV          19      /* No such device */
+#define TARGET_ENOTDIR         20      /* Not a directory */
+#define TARGET_EISDIR          21      /* Is a directory */
+#define TARGET_EINVAL          22      /* Invalid argument */
+#define TARGET_ENFILE          23      /* File table overflow */
+#define TARGET_EMFILE          24      /* Too many open files */
+#define TARGET_ENOTTY          25      /* Not a typewriter */
+#define TARGET_ETXTBSY         26      /* Text file busy */
+#define TARGET_EFBIG           27      /* File too large */
+#define TARGET_ENOSPC          28      /* No space left on device */
+#define TARGET_ESPIPE          29      /* Illegal seek */
+#define TARGET_EROFS           30      /* Read-only file system */
+#define TARGET_EMLINK          31      /* Too many links */
+#define TARGET_EPIPE           32      /* Broken pipe */
+#define TARGET_EDOM            33      /* Math argument out of domain of func */
+#define TARGET_ERANGE          34      /* Math result not representable */
+
+#define TARGET_EDEADLK         35      /* Resource deadlock would occur */
+#define TARGET_ENAMETOOLONG    36      /* File name too long */
+#define TARGET_ENOLCK          37      /* No record locks available */
+#define TARGET_ENOSYS          38      /* Function not implemented */
+#define TARGET_ENOTEMPTY       39      /* Directory not empty */
+#define TARGET_ELOOP           40      /* Too many symbolic links encountered */
+
+#define TARGET_ENOMSG          42      /* No message of desired type */
+#define TARGET_EIDRM           43      /* Identifier removed */
+#define TARGET_ECHRNG          44      /* Channel number out of range */
+#define TARGET_EL2NSYNC        45      /* Level 2 not synchronized */
+#define TARGET_EL3HLT          46      /* Level 3 halted */
+#define TARGET_EL3RST          47      /* Level 3 reset */
+#define TARGET_ELNRNG          48      /* Link number out of range */
+#define TARGET_EUNATCH         49      /* Protocol driver not attached */
+#define TARGET_ENOCSI          50      /* No CSI structure available */
+#define TARGET_EL2HLT          51      /* Level 2 halted */
+#define TARGET_EBADE           52      /* Invalid exchange */
+#define TARGET_EBADR           53      /* Invalid request descriptor */
+#define TARGET_EXFULL          54      /* TARGET_Exchange full */
+#define TARGET_ENOANO          55      /* No anode */
+#define TARGET_EBADRQC         56      /* Invalid request code */
+#define TARGET_EBADSLT         57      /* Invalid slot */
+
+#define TARGET_EBFONT          59      /* Bad font file format */
+#define TARGET_ENOSTR          60      /* Device not a stream */
+#define TARGET_ENODATA         61      /* No data available */
+#define TARGET_ETIME           62      /* Timer expired */
+#define TARGET_ENOSR           63      /* Out of streams resources */
+#define TARGET_ENONET          64      /* Machine is not on the network */
+#define TARGET_ENOPKG          65      /* Package not installed */
+#define TARGET_EREMOTE         66      /* Object is remote */
+#define TARGET_ENOLINK         67      /* Link has been severed */
+#define TARGET_EADV            68      /* Advertise error */
+#define TARGET_ESRMNT          69      /* Srmount error */
+#define TARGET_ECOMM           70      /* Communication error on send */
+#define TARGET_EPROTO          71      /* Protocol error */
+#define TARGET_EMULTIHOP       72      /* Multihop attempted */
+#define TARGET_EDOTDOT         73      /* RFS specific error */
+#define TARGET_EBADMSG         74      /* Not a data message */
+#define TARGET_EOVERFLOW       75      /* Value too large for defined data type */
+#define TARGET_ENOTUNIQ        76      /* Name not unique on network */
+#define TARGET_EBADFD          77      /* File descriptor in bad state */
+#define TARGET_EREMCHG         78      /* Remote address changed */
+#define TARGET_ELIBACC         79      /* Can not access a needed shared library */
+#define TARGET_ELIBBAD         80      /* Accessing a corrupted shared library */
+#define TARGET_ELIBSCN         81      /* .lib section in a.out corrupted */
+#define TARGET_ELIBMAX         82      /* Attempting to link in too many shared libraries */
+#define TARGET_ELIBEXEC        83      /* Cannot exec a shared library directly */
+#define TARGET_EILSEQ          84      /* Illegal byte sequence */
+#define TARGET_ERESTART        85      /* Interrupted system call should be restarted */
+#define TARGET_ESTRPIPE        86      /* Streams pipe error */
+#define TARGET_EUSERS          87      /* Too many users */
+#define TARGET_ENOTSOCK        88      /* Socket operation on non-socket */
+#define TARGET_EDESTADDRREQ    89      /* Destination address required */
+#define TARGET_EMSGSIZE        90      /* Message too long */
+#define TARGET_EPROTOTYPE      91      /* Protocol wrong type for socket */
+#define TARGET_ENOPROTOOPT     92      /* Protocol not available */
+#define TARGET_EPROTONOSUPPORT 93      /* Protocol not supported */
+#define TARGET_ESOCKTNOSUPPORT 94      /* Socket type not supported */
+#define TARGET_EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
+#define TARGET_EPFNOSUPPORT    96      /* Protocol family not supported */
+#define TARGET_EAFNOSUPPORT    97      /* Address family not supported by protocol */
+#define TARGET_EADDRINUSE      98      /* Address already in use */
+#define TARGET_EADDRNOTAVAIL   99      /* Cannot assign requested address */
+#define TARGET_ENETDOWN        100     /* Network is down */
+#define TARGET_ENETUNREACH     101     /* Network is unreachable */
+#define TARGET_ENETRESET       102     /* Network dropped connection because of reset */
+#define TARGET_ECONNABORTED    103     /* Software caused connection abort */
+#define TARGET_ECONNRESET      104     /* Connection reset by peer */
+#define TARGET_ENOBUFS         105     /* No buffer space available */
+#define TARGET_EISCONN         106     /* Transport endpoint is already connected */
+#define TARGET_ENOTCONN        107     /* Transport endpoint is not connected */
+#define TARGET_ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
+#define TARGET_ETOOMANYREFS    109     /* Too many references: cannot splice */
+#define TARGET_ETIMEDOUT       110     /* Connection timed out */
+#define TARGET_ECONNREFUSED    111     /* Connection refused */
+#define TARGET_EHOSTDOWN       112     /* Host is down */
+#define TARGET_EHOSTUNREACH    113     /* No route to host */
+#define TARGET_EALREADY        114     /* Operation already in progress */
+#define TARGET_EINPROGRESS     115     /* Operation now in progress */
+#define TARGET_ESTALE          116     /* Stale NFS file handle */
+#define TARGET_EUCLEAN         117     /* Structure needs cleaning */
+#define TARGET_ENOTNAM         118     /* Not a XENIX named type file */
+#define TARGET_ENAVAIL         119     /* No XENIX semaphores available */
+#define TARGET_EISNAM          120     /* Is a named type file */
+#define TARGET_EREMOTEIO       121     /* Remote I/O error */
+#define TARGET_EDQUOT          122     /* Quota exceeded */
+
+#define TARGET_ENOMEDIUM       123     /* No medium found */
+#define TARGET_EMEDIUMTYPE     124     /* Wrong medium type */
+#define TARGET_ECANCELED       125     /* Operation Canceled */
+#define TARGET_ENOKEY          126     /* Required key not available */
+#define TARGET_EKEYEXPIRED     127     /* Key has expired */
+#define TARGET_EKEYREVOKED     128     /* Key has been revoked */
+#define TARGET_EKEYREJECTED    129     /* Key was rejected by service */
+
+/* for robust mutexes */
+#define TARGET_EOWNERDEAD      130     /* Owner died */
+#define TARGET_ENOTRECOVERABLE 131     /* State not recoverable */
+
+#define TARGET_ERFKILL         132     /* Operation not possible due to RF-kill */
+#define TARGET_EHWPOISON       133     /* Memory page has hardware error */
+
+/* QEMU internal, not visible to the guest. This is returned when a
+ * system call should be restarted, to tell the main loop that it
+ * should wind the guest PC backwards so it will re-execute the syscall
+ * after handling any pending signals. They match with the ones the guest
+ * kernel uses for the same purpose.
+ */
+#define TARGET_ERESTARTSYS     512     /* Restart system call (if SA_RESTART) */
+
+/* QEMU internal, not visible to the guest. This is returned by the
+ * do_sigreturn() code after a successful sigreturn syscall, to indicate
+ * that it has correctly set the guest registers and so the main loop
+ * should not touch them. We use the value the guest would use for
+ * ERESTART_NOINTR (which is kernel internal) to guarantee that we won't
+ * clash with a valid guest errno now or in the future.
+ */
+#define TARGET_QEMU_ESIGRETURN 513     /* Return from signal */
+
+#endif
diff --git a/linux-user/hexagon/cpu_loop.c b/linux-user/hexagon/cpu_loop.c
index 9a68ca05c31..6b24cbaba93 100644
--- a/linux-user/hexagon/cpu_loop.c
+++ b/linux-user/hexagon/cpu_loop.c
@@ -20,14 +20,15 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 #include "internal.h"
 
 void cpu_loop(CPUHexagonState *env)
 {
-    CPUState *cs = CPU(hexagon_env_get_cpu(env));
-    int trapnr, signum, sigcode;
-    target_ulong sigaddr;
+    CPUState *cs = env_cpu(env);
+    int trapnr;
     target_ulong syscallnum;
     target_ulong ret;
 
@@ -37,10 +38,6 @@ void cpu_loop(CPUHexagonState *env)
         cpu_exec_end(cs);
         process_queued_cpu_work(cs);
 
-        signum = 0;
-        sigcode = 0;
-        sigaddr = 0;
-
         switch (trapnr) {
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
@@ -63,12 +60,6 @@ void cpu_loop(CPUHexagonState *env)
                 env->gpr[0] = ret;
             }
             break;
-        case HEX_EXCP_FETCH_NO_UPAGE:
-        case HEX_EXCP_PRIV_NO_UREAD:
-        case HEX_EXCP_PRIV_NO_UWRITE:
-            signum = TARGET_SIGSEGV;
-            sigcode = TARGET_SEGV_MAPERR;
-            break;
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
             break;
@@ -77,17 +68,6 @@ void cpu_loop(CPUHexagonState *env)
                      trapnr);
             exit(EXIT_FAILURE);
         }
-
-        if (signum) {
-            target_siginfo_t info = {
-                .si_signo = signum,
-                .si_errno = 0,
-                .si_code = sigcode,
-                ._sifields._sigfault._addr = sigaddr
-            };
-            queue_signal(env, info.si_signo, QEMU_SI_KILL, &info);
-        }
-
         process_pending_signals(env);
     }
 }
diff --git a/linux-user/hexagon/signal.c b/linux-user/hexagon/signal.c
index fde8dc93b7a..74e61739a0a 100644
--- a/linux-user/hexagon/signal.c
+++ b/linux-user/hexagon/signal.c
@@ -19,6 +19,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -161,6 +162,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     setup_ucontext(&frame->uc, env, set);
     tswap_siginfo(&frame->info, info);
+    /*
+     * The on-stack signal trampoline is no longer executed;
+     * however, the libgcc signal frame unwinding code checks
+     * for the presence of these two numeric magic values.
+     */
     install_sigtramp(frame->tramp);
 
     env->gpr[HEX_REG_PC] = ka->_sa_handler;
@@ -170,8 +176,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         frame_addr + offsetof(struct target_rt_sigframe, info);
     env->gpr[HEX_REG_R02] =
         frame_addr + offsetof(struct target_rt_sigframe, uc);
-    env->gpr[HEX_REG_LR] =
-        frame_addr + offsetof(struct target_rt_sigframe, tramp);
+    env->gpr[HEX_REG_LR] = default_rt_sigreturn;
 
     return;
 
@@ -260,11 +265,7 @@ long do_rt_sigreturn(CPUHexagonState *env)
     }
 
     restore_ucontext(env, &frame->uc);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-            uc.uc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.uc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -274,3 +275,14 @@ long do_rt_sigreturn(CPUHexagonState *env)
     force_sig(TARGET_SIGSEGV);
     return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 4 * 2, 0);
+    assert(tramp != NULL);
+
+    default_rt_sigreturn = sigtramp_page;
+    install_sigtramp(tramp);
+
+    unlock_user(tramp, sigtramp_page, 4 * 2);
+}
diff --git a/linux-user/hexagon/syscall_nr.h b/linux-user/hexagon/syscall_nr.h
index da1314f7132..b047dbbf6df 100644
--- a/linux-user/hexagon/syscall_nr.h
+++ b/linux-user/hexagon/syscall_nr.h
@@ -317,6 +317,16 @@
 #define TARGET_NR_fsmount 432
 #define TARGET_NR_fspick 433
 #define TARGET_NR_pidfd_open 434
-#define TARGET_NR_syscalls 436
+#define TARGET_NR_close_range 436
+#define TARGET_NR_openat2 437
+#define TARGET_NR_pidfd_getfd 438
+#define TARGET_NR_faccessat2 439
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_HEXAGON_SYSCALL_NR_H */
diff --git a/linux-user/hexagon/target_errno_defs.h b/linux-user/hexagon/target_errno_defs.h
new file mode 100644
index 00000000000..da033a9a9e4
--- /dev/null
+++ b/linux-user/hexagon/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef HEXAGON_TARGET_ERRNO_DEFS_H
+#define HEXAGON_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/hexagon/target_signal.h b/linux-user/hexagon/target_signal.h
index 345cf1cbb84..9e0223d3222 100644
--- a/linux-user/hexagon/target_signal.h
+++ b/linux-user/hexagon/target_signal.h
@@ -31,4 +31,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* TARGET_SIGNAL_H */
diff --git a/linux-user/host/aarch64/host-signal.h b/linux-user/host/aarch64/host-signal.h
new file mode 100644
index 00000000000..9770b36dc10
--- /dev/null
+++ b/linux-user/host/aarch64/host-signal.h
@@ -0,0 +1,79 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef AARCH64_HOST_SIGNAL_H
+#define AARCH64_HOST_SIGNAL_H
+
+/* Pre-3.16 kernel headers don't have these, so provide fallback definitions */
+#ifndef ESR_MAGIC
+#define ESR_MAGIC 0x45535201
+struct esr_context {
+    struct _aarch64_ctx head;
+    uint64_t esr;
+};
+#endif
+
+static inline struct _aarch64_ctx *first_ctx(ucontext_t *uc)
+{
+    return (struct _aarch64_ctx *)&uc->uc_mcontext.__reserved;
+}
+
+static inline struct _aarch64_ctx *next_ctx(struct _aarch64_ctx *hdr)
+{
+    return (struct _aarch64_ctx *)((char *)hdr + hdr->size);
+}
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.pc;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.pc = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    struct _aarch64_ctx *hdr;
+    uint32_t insn;
+
+    /* Find the esr_context, which has the WnR bit in it */
+    for (hdr = first_ctx(uc); hdr->magic; hdr = next_ctx(hdr)) {
+        if (hdr->magic == ESR_MAGIC) {
+            struct esr_context const *ec = (struct esr_context const *)hdr;
+            uint64_t esr = ec->esr;
+
+            /* For data aborts ESR.EC is 0b10010x: then bit 6 is the WnR bit */
+            return extract32(esr, 27, 5) == 0x12 && extract32(esr, 6, 1) == 1;
+        }
+    }
+
+    /*
+     * Fall back to parsing instructions; will only be needed
+     * for really ancient (pre-3.16) kernels.
+     */
+    insn = *(uint32_t *)host_signal_pc(uc);
+
+    return (insn & 0xbfff0000) == 0x0c000000   /* C3.3.1 */
+        || (insn & 0xbfe00000) == 0x0c800000   /* C3.3.2 */
+        || (insn & 0xbfdf0000) == 0x0d000000   /* C3.3.3 */
+        || (insn & 0xbfc00000) == 0x0d800000   /* C3.3.4 */
+        || (insn & 0x3f400000) == 0x08000000   /* C3.3.6 */
+        || (insn & 0x3bc00000) == 0x39000000   /* C3.3.13 */
+        || (insn & 0x3fc00000) == 0x3d800000   /* ... 128bit */
+        /* Ignore bits 10, 11 & 21, controlling indexing.  */
+        || (insn & 0x3bc00000) == 0x38000000   /* C3.3.8-12 */
+        || (insn & 0x3fe00000) == 0x3c800000   /* ... 128bit */
+        /* Ignore bits 23 & 24, controlling indexing.  */
+        || (insn & 0x3a400000) == 0x28000000; /* C3.3.7,14-16 */
+}
+
+#endif
diff --git a/linux-user/host/aarch64/hostdep.h b/linux-user/host/aarch64/hostdep.h
index a8d41a21adb..39299d798ad 100644
--- a/linux-user/host/aarch64/hostdep.h
+++ b/linux-user/host/aarch64/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    __u64 *pcreg = &uc->uc_mcontext.pc;
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/host/alpha/host-signal.h b/linux-user/host/alpha/host-signal.h
new file mode 100644
index 00000000000..f4c942948a9
--- /dev/null
+++ b/linux-user/host/alpha/host-signal.h
@@ -0,0 +1,47 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ALPHA_HOST_SIGNAL_H
+#define ALPHA_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.sc_pc;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.sc_pc = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    uint32_t *pc = (uint32_t *)host_signal_pc(uc);
+    uint32_t insn = *pc;
+
+    /* XXX: need kernel patch to get write flag faster */
+    switch (insn >> 26) {
+    case 0x0d: /* stw */
+    case 0x0e: /* stb */
+    case 0x0f: /* stq_u */
+    case 0x24: /* stf */
+    case 0x25: /* stg */
+    case 0x26: /* sts */
+    case 0x27: /* stt */
+    case 0x2c: /* stl */
+    case 0x2d: /* stq */
+    case 0x2e: /* stl_c */
+    case 0x2f: /* stq_c */
+        return true;
+    }
+    return false;
+}
+
+#endif
diff --git a/linux-user/host/arm/host-signal.h b/linux-user/host/arm/host-signal.h
new file mode 100644
index 00000000000..6c095773c05
--- /dev/null
+++ b/linux-user/host/arm/host-signal.h
@@ -0,0 +1,35 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef ARM_HOST_SIGNAL_H
+#define ARM_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.arm_pc;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.arm_pc = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    /*
+     * In the FSR, bit 11 is WnR, assuming a v6 or
+     * later processor.  On v5 we will always report
+     * this as a read, which will fail later.
+     */
+    uint32_t fsr = uc->uc_mcontext.error_code;
+    return extract32(fsr, 11, 1);
+}
+
+#endif
diff --git a/linux-user/host/arm/hostdep.h b/linux-user/host/arm/hostdep.h
index 9276fe6cebe..86b137875a9 100644
--- a/linux-user/host/arm/hostdep.h
+++ b/linux-user/host/arm/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    unsigned long *pcreg = &uc->uc_mcontext.arm_pc;
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/host/i386/host-signal.h b/linux-user/host/i386/host-signal.h
new file mode 100644
index 00000000000..abe1ece5c98
--- /dev/null
+++ b/linux-user/host/i386/host-signal.h
@@ -0,0 +1,30 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef I386_HOST_SIGNAL_H
+#define I386_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.gregs[REG_EIP];
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.gregs[REG_EIP] = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe
+        && (uc->uc_mcontext.gregs[REG_ERR] & 0x2);
+}
+
+#endif
diff --git a/linux-user/host/i386/hostdep.h b/linux-user/host/i386/hostdep.h
index 073be74d87d..ce7136501f2 100644
--- a/linux-user/host/i386/hostdep.h
+++ b/linux-user/host/i386/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    greg_t *pcreg = &uc->uc_mcontext.gregs[REG_EIP];
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/host/mips/host-signal.h b/linux-user/host/mips/host-signal.h
new file mode 100644
index 00000000000..c666ed8c3ff
--- /dev/null
+++ b/linux-user/host/mips/host-signal.h
@@ -0,0 +1,67 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIPS_HOST_SIGNAL_H
+#define MIPS_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.pc;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.pc = pc;
+}
+
+#if defined(__misp16) || defined(__mips_micromips)
+#error "Unsupported encoding"
+#endif
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    uint32_t insn = *(uint32_t *)host_signal_pc(uc);
+
+    /* Detect all store instructions at program counter. */
+    switch ((insn >> 26) & 077) {
+    case 050: /* SB */
+    case 051: /* SH */
+    case 052: /* SWL */
+    case 053: /* SW */
+    case 054: /* SDL */
+    case 055: /* SDR */
+    case 056: /* SWR */
+    case 070: /* SC */
+    case 071: /* SWC1 */
+    case 074: /* SCD */
+    case 075: /* SDC1 */
+    case 077: /* SD */
+#if !defined(__mips_isa_rev) || __mips_isa_rev < 6
+    case 072: /* SWC2 */
+    case 076: /* SDC2 */
+#endif
+        return true;
+    case 023: /* COP1X */
+        /*
+         * Required in all versions of MIPS64 since
+         * MIPS64r1 and subsequent versions of MIPS32r2.
+         */
+        switch (insn & 077) {
+        case 010: /* SWXC1 */
+        case 011: /* SDXC1 */
+        case 015: /* SUXC1 */
+            return true;
+        }
+        break;
+    }
+    return false;
+}
+
+#endif
diff --git a/linux-user/host/ppc/host-signal.h b/linux-user/host/ppc/host-signal.h
new file mode 100644
index 00000000000..1d8e658ff79
--- /dev/null
+++ b/linux-user/host/ppc/host-signal.h
@@ -0,0 +1,30 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_HOST_SIGNAL_H
+#define PPC_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.regs->nip;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.regs->nip = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    return uc->uc_mcontext.regs->trap != 0x400
+        && (uc->uc_mcontext.regs->dsisr & 0x02000000);
+}
+
+#endif
diff --git a/linux-user/host/ppc64/host-signal.h b/linux-user/host/ppc64/host-signal.h
new file mode 100644
index 00000000000..a353c22a908
--- /dev/null
+++ b/linux-user/host/ppc64/host-signal.h
@@ -0,0 +1 @@
+#include "../ppc/host-signal.h"
diff --git a/linux-user/host/ppc64/hostdep.h b/linux-user/host/ppc64/hostdep.h
index 98979ad9176..0c290dd9046 100644
--- a/linux-user/host/ppc64/hostdep.h
+++ b/linux-user/host/ppc64/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    unsigned long *pcreg = &uc->uc_mcontext.gp_regs[PT_NIP];
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/host/riscv/host-signal.h b/linux-user/host/riscv/host-signal.h
new file mode 100644
index 00000000000..a4f170efb08
--- /dev/null
+++ b/linux-user/host/riscv/host-signal.h
@@ -0,0 +1,63 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef RISCV_HOST_SIGNAL_H
+#define RISCV_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.__gregs[REG_PC];
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.__gregs[REG_PC] = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    /*
+     * Detect store by reading the instruction at the program counter.
+     * Do not read more than 16 bits, because we have not yet determined
+     * the size of the instruction.
+     */
+    const uint16_t *pinsn = (const uint16_t *)host_signal_pc(uc);
+    uint16_t insn = pinsn[0];
+
+    /* 16-bit instructions */
+    switch (insn & 0xe003) {
+    case 0xa000: /* c.fsd */
+    case 0xc000: /* c.sw */
+    case 0xe000: /* c.sd (rv64) / c.fsw (rv32) */
+    case 0xa002: /* c.fsdsp */
+    case 0xc002: /* c.swsp */
+    case 0xe002: /* c.sdsp (rv64) / c.fswsp (rv32) */
+        return true;
+    }
+
+    /* 32-bit instructions, major opcodes */
+    switch (insn & 0x7f) {
+    case 0x23: /* store */
+    case 0x27: /* store-fp */
+        return true;
+    case 0x2f: /* amo */
+        /*
+         * The AMO function code is in bits 25-31, unread as yet.
+         * The AMO functions are LR (read), SC (write), and the
+         * rest are all read-modify-write.
+         */
+        insn = pinsn[1];
+        return (insn >> 11) != 2; /* LR */
+    }
+
+    return false;
+}
+
+#endif
diff --git a/linux-user/host/riscv/hostdep.h b/linux-user/host/riscv/hostdep.h
new file mode 100644
index 00000000000..7f67c22868d
--- /dev/null
+++ b/linux-user/host/riscv/hostdep.h
@@ -0,0 +1,14 @@
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef RISCV_HOSTDEP_H
+#define RISCV_HOSTDEP_H
+
+/* We have a safe-syscall.inc.S */
+#define HAVE_SAFE_SYSCALL
+
+#endif
diff --git a/linux-user/host/riscv64/safe-syscall.inc.S b/linux-user/host/riscv/safe-syscall.inc.S
similarity index 100%
rename from linux-user/host/riscv64/safe-syscall.inc.S
rename to linux-user/host/riscv/safe-syscall.inc.S
diff --git a/linux-user/host/riscv32/hostdep.h b/linux-user/host/riscv32/hostdep.h
deleted file mode 100644
index adf9edbf2da..00000000000
--- a/linux-user/host/riscv32/hostdep.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * hostdep.h : things which are dependent on the host architecture
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef RISCV32_HOSTDEP_H
-#define RISCV32_HOSTDEP_H
-
-#endif
diff --git a/linux-user/host/riscv64/hostdep.h b/linux-user/host/riscv64/hostdep.h
deleted file mode 100644
index 865f0fb9ff8..00000000000
--- a/linux-user/host/riscv64/hostdep.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * hostdep.h : things which are dependent on the host architecture
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef RISCV64_HOSTDEP_H
-#define RISCV64_HOSTDEP_H
-
-/* We have a safe-syscall.inc.S */
-#define HAVE_SAFE_SYSCALL
-
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    unsigned long *pcreg = &uc->uc_mcontext.__gregs[REG_PC];
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
-#endif
diff --git a/linux-user/host/s390/host-signal.h b/linux-user/host/s390/host-signal.h
new file mode 100644
index 00000000000..a524f2ab00c
--- /dev/null
+++ b/linux-user/host/s390/host-signal.h
@@ -0,0 +1,98 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef S390_HOST_SIGNAL_H
+#define S390_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.psw.addr;
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.psw.addr = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    uint16_t *pinsn = (uint16_t *)host_signal_pc(uc);
+
+    /*
+     * ??? On linux, the non-rt signal handler has 4 (!) arguments instead
+     * of the normal 2 arguments.  The 4th argument contains the "Translation-
+     * Exception Identification for DAT Exceptions" from the hardware (aka
+     * "int_parm_long"), which does in fact contain the is_write value.
+     * The rt signal handler, as far as I can tell, does not give this value
+     * at all.  Not that we could get to it from here even if it were.
+     * So fall back to parsing instructions.  Treat read-modify-write ones as
+     * writes, which is not fully correct, but for tracking self-modifying code
+     * this is better than treating them as reads.  Checking si_addr page flags
+     * might be a viable improvement, albeit a racy one.
+     */
+    /* ??? This is not even close to complete.  */
+    switch (pinsn[0] >> 8) {
+    case 0x50: /* ST */
+    case 0x42: /* STC */
+    case 0x40: /* STH */
+    case 0xba: /* CS */
+    case 0xbb: /* CDS */
+        return true;
+    case 0xc4: /* RIL format insns */
+        switch (pinsn[0] & 0xf) {
+        case 0xf: /* STRL */
+        case 0xb: /* STGRL */
+        case 0x7: /* STHRL */
+            return true;
+        }
+        break;
+    case 0xc8: /* SSF format insns */
+        switch (pinsn[0] & 0xf) {
+        case 0x2: /* CSST */
+            return true;
+        }
+        break;
+    case 0xe3: /* RXY format insns */
+        switch (pinsn[2] & 0xff) {
+        case 0x50: /* STY */
+        case 0x24: /* STG */
+        case 0x72: /* STCY */
+        case 0x70: /* STHY */
+        case 0x8e: /* STPQ */
+        case 0x3f: /* STRVH */
+        case 0x3e: /* STRV */
+        case 0x2f: /* STRVG */
+            return true;
+        }
+        break;
+    case 0xeb: /* RSY format insns */
+        switch (pinsn[2] & 0xff) {
+        case 0x14: /* CSY */
+        case 0x30: /* CSG */
+        case 0x31: /* CDSY */
+        case 0x3e: /* CDSG */
+        case 0xe4: /* LANG */
+        case 0xe6: /* LAOG */
+        case 0xe7: /* LAXG */
+        case 0xe8: /* LAAG */
+        case 0xea: /* LAALG */
+        case 0xf4: /* LAN */
+        case 0xf6: /* LAO */
+        case 0xf7: /* LAX */
+        case 0xfa: /* LAAL */
+        case 0xf8: /* LAA */
+            return true;
+        }
+        break;
+    }
+    return false;
+}
+
+#endif
diff --git a/linux-user/host/s390x/host-signal.h b/linux-user/host/s390x/host-signal.h
new file mode 100644
index 00000000000..0e83f9358df
--- /dev/null
+++ b/linux-user/host/s390x/host-signal.h
@@ -0,0 +1 @@
+#include "../s390/host-signal.h"
diff --git a/linux-user/host/s390x/hostdep.h b/linux-user/host/s390x/hostdep.h
index 4f0171f36fd..d8011458545 100644
--- a/linux-user/host/s390x/hostdep.h
+++ b/linux-user/host/s390x/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    unsigned long *pcreg = &uc->uc_mcontext.psw.addr;
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/host/sparc/host-signal.h b/linux-user/host/sparc/host-signal.h
new file mode 100644
index 00000000000..73429360712
--- /dev/null
+++ b/linux-user/host/sparc/host-signal.h
@@ -0,0 +1,63 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ * Copyright (c) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef SPARC_HOST_SIGNAL_H
+#define SPARC_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+#ifdef __arch64__
+    return uc->uc_mcontext.mc_gregs[MC_PC];
+#else
+    return uc->uc_mcontext.gregs[REG_PC];
+#endif
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+#ifdef __arch64__
+    uc->uc_mcontext.mc_gregs[MC_PC] = pc;
+#else
+    uc->uc_mcontext.gregs[REG_PC] = pc;
+#endif
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    uint32_t insn = *(uint32_t *)host_signal_pc(uc);
+
+    if ((insn >> 30) == 3) {
+        switch ((insn >> 19) & 0x3f) {
+        case 0x05: /* stb */
+        case 0x15: /* stba */
+        case 0x06: /* sth */
+        case 0x16: /* stha */
+        case 0x04: /* st */
+        case 0x14: /* sta */
+        case 0x07: /* std */
+        case 0x17: /* stda */
+        case 0x0e: /* stx */
+        case 0x1e: /* stxa */
+        case 0x24: /* stf */
+        case 0x34: /* stfa */
+        case 0x27: /* stdf */
+        case 0x37: /* stdfa */
+        case 0x26: /* stqf */
+        case 0x36: /* stqfa */
+        case 0x25: /* stfsr */
+        case 0x3c: /* casa */
+        case 0x3e: /* casxa */
+            return true;
+        }
+    }
+    return false;
+}
+
+#endif
diff --git a/linux-user/host/sparc64/host-signal.h b/linux-user/host/sparc64/host-signal.h
new file mode 100644
index 00000000000..1191fe2d403
--- /dev/null
+++ b/linux-user/host/sparc64/host-signal.h
@@ -0,0 +1 @@
+#include "../sparc/host-signal.h"
diff --git a/linux-user/host/x32/host-signal.h b/linux-user/host/x32/host-signal.h
new file mode 100644
index 00000000000..26800591d3b
--- /dev/null
+++ b/linux-user/host/x32/host-signal.h
@@ -0,0 +1 @@
+#include "../x86_64/host-signal.h"
diff --git a/linux-user/host/x86_64/host-signal.h b/linux-user/host/x86_64/host-signal.h
new file mode 100644
index 00000000000..c71d597eb23
--- /dev/null
+++ b/linux-user/host/x86_64/host-signal.h
@@ -0,0 +1,29 @@
+/*
+ * host-signal.h: signal info dependent on the host architecture
+ *
+ * Copyright (C) 2021 Linaro Limited
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef X86_64_HOST_SIGNAL_H
+#define X86_64_HOST_SIGNAL_H
+
+static inline uintptr_t host_signal_pc(ucontext_t *uc)
+{
+    return uc->uc_mcontext.gregs[REG_RIP];
+}
+
+static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc)
+{
+    uc->uc_mcontext.gregs[REG_RIP] = pc;
+}
+
+static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
+{
+    return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe
+        && (uc->uc_mcontext.gregs[REG_ERR] & 0x2);
+}
+
+#endif
diff --git a/linux-user/host/x86_64/hostdep.h b/linux-user/host/x86_64/hostdep.h
index a4fefb5114f..9c62bd26bd7 100644
--- a/linux-user/host/x86_64/hostdep.h
+++ b/linux-user/host/x86_64/hostdep.h
@@ -15,24 +15,4 @@
 /* We have a safe-syscall.inc.S */
 #define HAVE_SAFE_SYSCALL
 
-#ifndef __ASSEMBLER__
-
-/* These are defined by the safe-syscall.inc.S file */
-extern char safe_syscall_start[];
-extern char safe_syscall_end[];
-
-/* Adjust the signal context to rewind out of safe-syscall if we're in it */
-static inline void rewind_if_in_safe_syscall(void *puc)
-{
-    ucontext_t *uc = puc;
-    greg_t *pcreg = &uc->uc_mcontext.gregs[REG_RIP];
-
-    if (*pcreg > (uintptr_t)safe_syscall_start
-        && *pcreg < (uintptr_t)safe_syscall_end) {
-        *pcreg = (uintptr_t)safe_syscall_start;
-    }
-}
-
-#endif /* __ASSEMBLER__ */
-
 #endif
diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c
index 3aaaf3337cb..375576c8f09 100644
--- a/linux-user/hppa/cpu_loop.c
+++ b/linux-user/hppa/cpu_loop.c
@@ -19,7 +19,9 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 static abi_ulong hppa_lws(CPUHPPAState *env)
 {
@@ -82,7 +84,7 @@ static abi_ulong hppa_lws(CPUHPPAState *env)
                 o64 = *(uint64_t *)g2h(cs, old);
                 n64 = *(uint64_t *)g2h(cs, new);
 #ifdef CONFIG_ATOMIC64
-                r64 = qatomic_cmpxchg__nocheck((uint64_t *)g2h(cs, addr),
+                r64 = qatomic_cmpxchg__nocheck((aligned_uint64_t *)g2h(cs, addr),
                                                o64, n64);
                 ret = r64 != o64;
 #else
@@ -142,29 +144,6 @@ void cpu_loop(CPUHPPAState *env)
             env->iaoq_f = env->gr[31];
             env->iaoq_b = env->gr[31] + 4;
             break;
-        case EXCP_ITLB_MISS:
-        case EXCP_DTLB_MISS:
-        case EXCP_NA_ITLB_MISS:
-        case EXCP_NA_DTLB_MISS:
-        case EXCP_IMP:
-        case EXCP_DMP:
-        case EXCP_DMB:
-        case EXCP_PAGE_REF:
-        case EXCP_DMAR:
-        case EXCP_DMPI:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info.si_code = TARGET_SEGV_ACCERR;
-            info._sifields._sigfault._addr = env->cr[CR_IOR];
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
-        case EXCP_UNALIGN:
-            info.si_signo = TARGET_SIGBUS;
-            info.si_errno = 0;
-            info.si_code = 0;
-            info._sifields._sigfault._addr = env->cr[CR_IOR];
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_ILL:
         case EXCP_PRIV_OPR:
         case EXCP_PRIV_REG:
diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c
index d1a58feeb36..c2fbc26ebb6 100644
--- a/linux-user/hppa/signal.c
+++ b/linux-user/hppa/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -187,13 +188,7 @@ long do_rt_sigreturn(CPUArchState *env)
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    unlock_user_struct(frame, frame_addr, 0);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-                                             uc.tuc_stack),
-                       0, env->gr[30]) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
diff --git a/linux-user/hppa/syscall.tbl b/linux-user/hppa/syscall.tbl
index def64d221cd..aabc37f8cae 100644
--- a/linux-user/hppa/syscall.tbl
+++ b/linux-user/hppa/syscall.tbl
@@ -29,7 +29,7 @@
 18	common	stat			sys_newstat			compat_sys_newstat
 19	common	lseek			sys_lseek			compat_sys_lseek
 20	common	getpid			sys_getpid
-21	common	mount			sys_mount			compat_sys_mount
+21	common	mount			sys_mount
 22	common	bind			sys_bind
 23	common	setuid			sys_setuid
 24	common	getuid			sys_getuid
@@ -159,8 +159,8 @@
 142	common	_newselect		sys_select			compat_sys_select
 143	common	flock			sys_flock
 144	common	msync			sys_msync
-145	common	readv			sys_readv			compat_sys_readv
-146	common	writev			sys_writev			compat_sys_writev
+145	common	readv			sys_readv
+146	common	writev			sys_writev
 147	common	getsid			sys_getsid
 148	common	fdatasync		sys_fdatasync
 149	common	_sysctl			sys_ni_syscall
@@ -330,7 +330,7 @@
 292	32	sync_file_range		parisc_sync_file_range
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
-294	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+294	common	vmsplice		sys_vmsplice
 295	common	move_pages		sys_move_pages			compat_sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
@@ -344,17 +344,17 @@
 304	common	eventfd			sys_eventfd
 305	32	fallocate		parisc_fallocate
 305	64	fallocate		sys_fallocate
-306	common	timerfd_create		sys_timerfd_create
+306	common	timerfd_create		parisc_timerfd_create
 307	32	timerfd_settime		sys_timerfd_settime32
 307	64	timerfd_settime		sys_timerfd_settime
 308	32	timerfd_gettime		sys_timerfd_gettime32
 308	64	timerfd_gettime		sys_timerfd_gettime
-309	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
-310	common	eventfd2		sys_eventfd2
+309	common	signalfd4		parisc_signalfd4		parisc_compat_signalfd4
+310	common	eventfd2		parisc_eventfd2
 311	common	epoll_create1		sys_epoll_create1
 312	common	dup3			sys_dup3
-313	common	pipe2			sys_pipe2
-314	common	inotify_init1		sys_inotify_init1
+313	common	pipe2			parisc_pipe2
+314	common	inotify_init1		parisc_inotify_init1
 315	common	preadv	sys_preadv	compat_sys_preadv
 316	common	pwritev	sys_pwritev	compat_sys_pwritev
 317	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
@@ -372,8 +372,8 @@
 327	common	syncfs			sys_syncfs
 328	common	setns			sys_setns
 329	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-330	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-331	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+330	common	process_vm_readv	sys_process_vm_readv
+331	common	process_vm_writev	sys_process_vm_writev
 332	common	kcmp			sys_kcmp
 333	common	finit_module		sys_finit_module
 334	common	sched_setattr		sys_sched_setattr
@@ -387,7 +387,7 @@
 341	common	bpf			sys_bpf
 342	common	execveat		sys_execveat			compat_sys_execveat
 343	common	membarrier		sys_membarrier
-344	common	userfaultfd		sys_userfaultfd
+344	common	userfaultfd		parisc_userfaultfd
 345	common	mlock2			sys_mlock2
 346	common	copy_file_range		sys_copy_file_range
 347	common	preadv2			sys_preadv2			compat_sys_preadv2
@@ -437,3 +437,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/hppa/target_errno_defs.h b/linux-user/hppa/target_errno_defs.h
new file mode 100644
index 00000000000..b8f728f5863
--- /dev/null
+++ b/linux-user/hppa/target_errno_defs.h
@@ -0,0 +1,220 @@
+#ifndef HPPA_TARGET_ERRNO_DEFS_H
+#define HPPA_TARGET_ERRNO_DEFS_H
+
+#include "../generic/target_errno_defs.h"
+
+/*
+ * Generic target errno overridden with definitions taken
+ * from asm-parisc/errno.h
+ */
+#undef TARGET_EWOULDBLOCK
+#define TARGET_EWOULDBLOCK     TARGET_EAGAIN /* Operation would block */
+#undef  TARGET_ENOMSG
+#define TARGET_ENOMSG          35
+#undef  TARGET_EIDRM
+#define TARGET_EIDRM           36
+#undef  TARGET_ECHRNG
+#define TARGET_ECHRNG          37
+#undef  TARGET_EL2NSYNC
+#define TARGET_EL2NSYNC        38
+#undef  TARGET_EL3HLT
+#define TARGET_EL3HLT          39
+#undef  TARGET_EL3RST
+#define TARGET_EL3RST          40
+#undef  TARGET_ELNRNG
+#define TARGET_ELNRNG          41
+#undef  TARGET_EUNATCH
+#define TARGET_EUNATCH         42
+#undef  TARGET_ENOCSI
+#define TARGET_ENOCSI          43
+#undef  TARGET_EL2HLT
+#define TARGET_EL2HLT          44
+#undef  TARGET_EDEADLK
+#define TARGET_EDEADLK         45
+#undef  TARGET_ENOLCK
+#define TARGET_ENOLCK          46
+#undef  TARGET_EILSEQ
+#define TARGET_EILSEQ          47
+
+#undef  TARGET_ENONET
+#define TARGET_ENONET          50
+#undef  TARGET_ENODATA
+#define TARGET_ENODATA         51
+#undef  TARGET_ETIME
+#define TARGET_ETIME           52
+#undef  TARGET_ENOSR
+#define TARGET_ENOSR           53
+#undef  TARGET_ENOSTR
+#define TARGET_ENOSTR          54
+#undef  TARGET_ENOPKG
+#define TARGET_ENOPKG          55
+
+#undef  TARGET_ENOLINK
+#define TARGET_ENOLINK         57
+#undef  TARGET_EADV
+#define TARGET_EADV            58
+#undef  TARGET_ESRMNT
+#define TARGET_ESRMNT          59
+#undef  TARGET_ECOMM
+#define TARGET_ECOMM           60
+#undef  TARGET_EPROTO
+#define TARGET_EPROTO          61
+
+#undef  TARGET_EMULTIHOP
+#define TARGET_EMULTIHOP       64
+
+#undef  TARGET_EDOTDOT
+#define TARGET_EDOTDOT         66
+#undef  TARGET_EBADMSG
+#define TARGET_EBADMSG         67
+#undef  TARGET_EUSERS
+#define TARGET_EUSERS          68
+#undef  TARGET_EDQUOT
+#define TARGET_EDQUOT          69
+#undef  TARGET_ESTALE
+#define TARGET_ESTALE          70
+#undef  TARGET_EREMOTE
+#define TARGET_EREMOTE         71
+#undef  TARGET_EOVERFLOW
+#define TARGET_EOVERFLOW       72
+
+#undef  TARGET_EBADE
+#define TARGET_EBADE           160
+#undef  TARGET_EBADR
+#define TARGET_EBADR           161
+#undef  TARGET_EXFULL
+#define TARGET_EXFULL          162
+#undef  TARGET_ENOANO
+#define TARGET_ENOANO          163
+#undef  TARGET_EBADRQC
+#define TARGET_EBADRQC         164
+#undef  TARGET_EBADSLT
+#define TARGET_EBADSLT         165
+#undef  TARGET_EBFONT
+#define TARGET_EBFONT          166
+#undef  TARGET_ENOTUNIQ
+#define TARGET_ENOTUNIQ        167
+#undef  TARGET_EBADFD
+#define TARGET_EBADFD          168
+#undef  TARGET_EREMCHG
+#define TARGET_EREMCHG         169
+#undef  TARGET_ELIBACC
+#define TARGET_ELIBACC         170
+#undef  TARGET_ELIBBAD
+#define TARGET_ELIBBAD         171
+#undef  TARGET_ELIBSCN
+#define TARGET_ELIBSCN         172
+#undef  TARGET_ELIBMAX
+#define TARGET_ELIBMAX         173
+#undef  TARGET_ELIBEXEC
+#define TARGET_ELIBEXEC        174
+#undef  TARGET_ERESTART
+#define TARGET_ERESTART        175
+#undef  TARGET_ESTRPIPE
+#define TARGET_ESTRPIPE        176
+#undef  TARGET_EUCLEAN
+#define TARGET_EUCLEAN         177
+#undef  TARGET_ENOTNAM
+#define TARGET_ENOTNAM         178
+#undef  TARGET_ENAVAIL
+#define TARGET_ENAVAIL         179
+#undef  TARGET_EISNAM
+#define TARGET_EISNAM          180
+#undef  TARGET_EREMOTEIO
+#define TARGET_EREMOTEIO       181
+#undef  TARGET_ENOMEDIUM
+#define TARGET_ENOMEDIUM       182
+#undef  TARGET_EMEDIUMTYPE
+#define TARGET_EMEDIUMTYPE     183
+#undef  TARGET_ENOKEY
+#define TARGET_ENOKEY          184
+#undef  TARGET_EKEYEXPIRED
+#define TARGET_EKEYEXPIRED     185
+#undef  TARGET_EKEYREVOKED
+#define TARGET_EKEYREVOKED     186
+#undef  TARGET_EKEYREJECTED
+#define TARGET_EKEYREJECTED    187
+
+/* Never used in linux.  */
+/* #define TARGET_ENOSYM          215 */
+#undef  TARGET_ENOTSOCK
+#define TARGET_ENOTSOCK        216
+#undef  TARGET_EDESTADDRREQ
+#define TARGET_EDESTADDRREQ    217
+#undef  TARGET_EMSGSIZE
+#define TARGET_EMSGSIZE        218
+#undef  TARGET_EPROTOTYPE
+#define TARGET_EPROTOTYPE      219
+#undef  TARGET_ENOPROTOOPT
+#define TARGET_ENOPROTOOPT     220
+#undef  TARGET_EPROTONOSUPPORT
+#define TARGET_EPROTONOSUPPORT 221
+#undef  TARGET_ESOCKTNOSUPPORT
+#define TARGET_ESOCKTNOSUPPORT 222
+#undef  TARGET_EOPNOTSUPP
+#define TARGET_EOPNOTSUPP      223
+#undef  TARGET_EPFNOSUPPORT
+#define TARGET_EPFNOSUPPORT    224
+#undef  TARGET_EAFNOSUPPORT
+#define TARGET_EAFNOSUPPORT    225
+#undef  TARGET_EADDRINUSE
+#define TARGET_EADDRINUSE      226
+#undef  TARGET_EADDRNOTAVAIL
+#define TARGET_EADDRNOTAVAIL   227
+#undef  TARGET_ENETDOWN
+#define TARGET_ENETDOWN        228
+#undef  TARGET_ENETUNREACH
+#define TARGET_ENETUNREACH     229
+#undef  TARGET_ENETRESET
+#define TARGET_ENETRESET       230
+#undef  TARGET_ECONNABORTED
+#define TARGET_ECONNABORTED    231
+#undef  TARGET_ECONNRESET
+#define TARGET_ECONNRESET      232
+#undef  TARGET_ENOBUFS
+#define TARGET_ENOBUFS         233
+#undef  TARGET_EISCONN
+#define TARGET_EISCONN         234
+#undef  TARGET_ENOTCONN
+#define TARGET_ENOTCONN        235
+#undef  TARGET_ESHUTDOWN
+#define TARGET_ESHUTDOWN       236
+#undef  TARGET_ETOOMANYREFS
+#define TARGET_ETOOMANYREFS    237
+#undef  TARGET_ETIMEDOUT
+#define TARGET_ETIMEDOUT       238
+#undef  TARGET_ECONNREFUSED
+#define TARGET_ECONNREFUSED    239
+#define TARGET_EREMOTERELEASE  240
+#undef  TARGET_EHOSTDOWN
+#define TARGET_EHOSTDOWN       241
+#undef  TARGET_EHOSTUNREACH
+#define TARGET_EHOSTUNREACH    242
+
+#undef  TARGET_EALREADY
+#define TARGET_EALREADY        244
+#undef  TARGET_EINPROGRESS
+#define TARGET_EINPROGRESS     245
+#undef  TARGET_ENOTEMPTY
+#define TARGET_ENOTEMPTY       247
+#undef  TARGET_ENAMETOOLONG
+#define TARGET_ENAMETOOLONG    248
+#undef  TARGET_ELOOP
+#define TARGET_ELOOP           249
+#undef  TARGET_ENOSYS
+#define TARGET_ENOSYS          251
+
+#undef  TARGET_ECANCELED
+#define TARGET_ECANCELED       253
+
+#undef  TARGET_EOWNERDEAD
+#define TARGET_EOWNERDEAD      254
+#undef  TARGET_ENOTRECOVERABLE
+#define TARGET_ENOTRECOVERABLE 255
+
+#undef  TARGET_ERFKILL
+#define TARGET_ERFKILL         256
+#undef  TARGET_EHWPOISON
+#define TARGET_EHWPOISON       257
+
+#endif
diff --git a/linux-user/hppa/target_signal.h b/linux-user/hppa/target_signal.h
index 7f525362e91..d558119ee7b 100644
--- a/linux-user/hppa/target_signal.h
+++ b/linux-user/hppa/target_signal.h
@@ -71,4 +71,18 @@ typedef struct target_sigaltstack {
 /* mask for all SS_xxx flags */
 #define TARGET_SS_FLAG_BITS  TARGET_SS_AUTODISARM
 
+/*
+ * We cannot use a bare sigtramp page for hppa-linux.
+ *
+ * Unlike other guests where we use the instructions at PC to validate
+ * an offset from SP, the hppa libgcc signal frame fallback unwinding uses
+ * the PC address itself to find the frame.  This is due to the fact that
+ * the hppa grows the stack upward, and the frame is of unknown size.
+ *
+ * TODO: We should be able to use a VDSO to address this, by providing
+ * proper unwind info for the sigtramp code, at which point the fallback
+ * unwinder will not be used.
+ */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+
 #endif /* HPPA_TARGET_SIGNAL_H */
diff --git a/linux-user/hppa/target_syscall.h b/linux-user/hppa/target_syscall.h
index f34e05edb57..0018bcb5c4d 100644
--- a/linux-user/hppa/target_syscall.h
+++ b/linux-user/hppa/target_syscall.h
@@ -27,212 +27,4 @@ struct target_pt_regs {
 #define TARGET_MCL_FUTURE  2
 #define TARGET_MCL_ONFAULT 4
 
-#undef  TARGET_ENOMSG
-#define TARGET_ENOMSG          35
-#undef  TARGET_EIDRM
-#define TARGET_EIDRM           36
-#undef  TARGET_ECHRNG
-#define TARGET_ECHRNG          37
-#undef  TARGET_EL2NSYNC
-#define TARGET_EL2NSYNC        38
-#undef  TARGET_EL3HLT
-#define TARGET_EL3HLT          39
-#undef  TARGET_EL3RST
-#define TARGET_EL3RST          40
-#undef  TARGET_ELNRNG
-#define TARGET_ELNRNG          41
-#undef  TARGET_EUNATCH
-#define TARGET_EUNATCH         42
-#undef  TARGET_ENOCSI
-#define TARGET_ENOCSI          43
-#undef  TARGET_EL2HLT
-#define TARGET_EL2HLT          44
-#undef  TARGET_EDEADLK
-#define TARGET_EDEADLK         45
-#undef  TARGET_ENOLCK
-#define TARGET_ENOLCK          46
-#undef  TARGET_EILSEQ
-#define TARGET_EILSEQ          47
-
-#undef  TARGET_ENONET
-#define TARGET_ENONET          50
-#undef  TARGET_ENODATA
-#define TARGET_ENODATA         51
-#undef  TARGET_ETIME
-#define TARGET_ETIME           52
-#undef  TARGET_ENOSR
-#define TARGET_ENOSR           53
-#undef  TARGET_ENOSTR
-#define TARGET_ENOSTR          54
-#undef  TARGET_ENOPKG
-#define TARGET_ENOPKG          55
-
-#undef  TARGET_ENOLINK
-#define TARGET_ENOLINK         57
-#undef  TARGET_EADV
-#define TARGET_EADV            58
-#undef  TARGET_ESRMNT
-#define TARGET_ESRMNT          59
-#undef  TARGET_ECOMM
-#define TARGET_ECOMM           60
-#undef  TARGET_EPROTO
-#define TARGET_EPROTO          61
-
-#undef  TARGET_EMULTIHOP
-#define TARGET_EMULTIHOP       64
-
-#undef  TARGET_EDOTDOT
-#define TARGET_EDOTDOT         66
-#undef  TARGET_EBADMSG
-#define TARGET_EBADMSG         67
-#undef  TARGET_EUSERS
-#define TARGET_EUSERS          68
-#undef  TARGET_EDQUOT
-#define TARGET_EDQUOT          69
-#undef  TARGET_ESTALE
-#define TARGET_ESTALE          70
-#undef  TARGET_EREMOTE
-#define TARGET_EREMOTE         71
-#undef  TARGET_EOVERFLOW
-#define TARGET_EOVERFLOW       72
-
-#undef  TARGET_EBADE
-#define TARGET_EBADE           160
-#undef  TARGET_EBADR
-#define TARGET_EBADR           161
-#undef  TARGET_EXFULL
-#define TARGET_EXFULL          162
-#undef  TARGET_ENOANO
-#define TARGET_ENOANO          163
-#undef  TARGET_EBADRQC
-#define TARGET_EBADRQC         164
-#undef  TARGET_EBADSLT
-#define TARGET_EBADSLT         165
-#undef  TARGET_EBFONT
-#define TARGET_EBFONT          166
-#undef  TARGET_ENOTUNIQ
-#define TARGET_ENOTUNIQ        167
-#undef  TARGET_EBADFD
-#define TARGET_EBADFD          168
-#undef  TARGET_EREMCHG
-#define TARGET_EREMCHG         169
-#undef  TARGET_ELIBACC
-#define TARGET_ELIBACC         170
-#undef  TARGET_ELIBBAD
-#define TARGET_ELIBBAD         171
-#undef  TARGET_ELIBSCN
-#define TARGET_ELIBSCN         172
-#undef  TARGET_ELIBMAX
-#define TARGET_ELIBMAX         173
-#undef  TARGET_ELIBEXEC
-#define TARGET_ELIBEXEC        174
-#undef  TARGET_ERESTART
-#define TARGET_ERESTART        175
-#undef  TARGET_ESTRPIPE
-#define TARGET_ESTRPIPE        176
-#undef  TARGET_EUCLEAN
-#define TARGET_EUCLEAN         177
-#undef  TARGET_ENOTNAM
-#define TARGET_ENOTNAM         178
-#undef  TARGET_ENAVAIL
-#define TARGET_ENAVAIL         179
-#undef  TARGET_EISNAM
-#define TARGET_EISNAM          180
-#undef  TARGET_EREMOTEIO
-#define TARGET_EREMOTEIO       181
-#undef  TARGET_ENOMEDIUM
-#define TARGET_ENOMEDIUM       182
-#undef  TARGET_EMEDIUMTYPE
-#define TARGET_EMEDIUMTYPE     183
-#undef  TARGET_ENOKEY
-#define TARGET_ENOKEY          184
-#undef  TARGET_EKEYEXPIRED
-#define TARGET_EKEYEXPIRED     185
-#undef  TARGET_EKEYREVOKED
-#define TARGET_EKEYREVOKED     186
-#undef  TARGET_EKEYREJECTED
-#define TARGET_EKEYREJECTED    187
-
-/* Never used in linux.  */
-/* #define TARGET_ENOSYM          215 */
-#undef  TARGET_ENOTSOCK
-#define TARGET_ENOTSOCK        216
-#undef  TARGET_EDESTADDRREQ
-#define TARGET_EDESTADDRREQ    217
-#undef  TARGET_EMSGSIZE
-#define TARGET_EMSGSIZE        218
-#undef  TARGET_EPROTOTYPE
-#define TARGET_EPROTOTYPE      219
-#undef  TARGET_ENOPROTOOPT
-#define TARGET_ENOPROTOOPT     220
-#undef  TARGET_EPROTONOSUPPORT
-#define TARGET_EPROTONOSUPPORT 221
-#undef  TARGET_ESOCKTNOSUPPORT
-#define TARGET_ESOCKTNOSUPPORT 222
-#undef  TARGET_EOPNOTSUPP
-#define TARGET_EOPNOTSUPP      223
-#undef  TARGET_EPFNOSUPPORT
-#define TARGET_EPFNOSUPPORT    224
-#undef  TARGET_EAFNOSUPPORT
-#define TARGET_EAFNOSUPPORT    225
-#undef  TARGET_EADDRINUSE
-#define TARGET_EADDRINUSE      226
-#undef  TARGET_EADDRNOTAVAIL
-#define TARGET_EADDRNOTAVAIL   227
-#undef  TARGET_ENETDOWN
-#define TARGET_ENETDOWN        228
-#undef  TARGET_ENETUNREACH
-#define TARGET_ENETUNREACH     229
-#undef  TARGET_ENETRESET
-#define TARGET_ENETRESET       230
-#undef  TARGET_ECONNABORTED
-#define TARGET_ECONNABORTED    231
-#undef  TARGET_ECONNRESET
-#define TARGET_ECONNRESET      232
-#undef  TARGET_ENOBUFS
-#define TARGET_ENOBUFS         233
-#undef  TARGET_EISCONN
-#define TARGET_EISCONN         234
-#undef  TARGET_ENOTCONN
-#define TARGET_ENOTCONN        235
-#undef  TARGET_ESHUTDOWN
-#define TARGET_ESHUTDOWN       236
-#undef  TARGET_ETOOMANYREFS
-#define TARGET_ETOOMANYREFS    237
-#undef  TARGET_ETIMEDOUT
-#define TARGET_ETIMEDOUT       238
-#undef  TARGET_ECONNREFUSED
-#define TARGET_ECONNREFUSED    239
-#define TARGET_EREMOTERELEASE  240
-#undef  TARGET_EHOSTDOWN
-#define TARGET_EHOSTDOWN       241
-#undef  TARGET_EHOSTUNREACH
-#define TARGET_EHOSTUNREACH    242
-
-#undef  TARGET_EALREADY
-#define TARGET_EALREADY        244
-#undef  TARGET_EINPROGRESS
-#define TARGET_EINPROGRESS     245
-#undef  TARGET_ENOTEMPTY
-#define TARGET_ENOTEMPTY       247
-#undef  TARGET_ENAMETOOLONG
-#define TARGET_ENAMETOOLONG    248
-#undef  TARGET_ELOOP
-#define TARGET_ELOOP           249
-#undef  TARGET_ENOSYS
-#define TARGET_ENOSYS          251
-
-#undef  TARGET_ECANCELED
-#define TARGET_ECANCELED       253
-
-#undef  TARGET_EOWNERDEAD
-#define TARGET_EOWNERDEAD      254
-#undef  TARGET_ENOTRECOVERABLE
-#define TARGET_ENOTRECOVERABLE 255
-
-#undef  TARGET_ERFKILL
-#define TARGET_ERFKILL         256
-#undef  TARGET_EHWPOISON
-#define TARGET_EHWPOISON       257
-
 #endif /* HPPA_TARGET_SYSCALL_H */
diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index f813e87294a..f6a1cc632b1 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -20,7 +20,10 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
+#include "user-mmap.h"
 
 /***********************************************************/
 /* CPUX86 core interface */
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 9320e1d4726..433efa3d693 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -309,6 +310,22 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size)
 }
 
 #ifndef TARGET_X86_64
+static void install_sigtramp(void *tramp)
+{
+    /* This is popl %eax ; movl $syscall,%eax ; int $0x80 */
+    __put_user(0xb858, (uint16_t *)(tramp + 0));
+    __put_user(TARGET_NR_sigreturn, (int32_t *)(tramp + 2));
+    __put_user(0x80cd, (uint16_t *)(tramp + 6));
+}
+
+static void install_rt_sigtramp(void *tramp)
+{
+    /* This is movl $syscall,%eax ; int $0x80 */
+    __put_user(0xb8, (uint8_t *)(tramp + 0));
+    __put_user(TARGET_NR_rt_sigreturn, (int32_t *)(tramp + 1));
+    __put_user(0x80cd, (uint16_t *)(tramp + 5));
+}
+
 /* compare linux/arch/i386/kernel/signal.c:setup_frame() */
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUX86State *env)
@@ -337,16 +354,9 @@ void setup_frame(int sig, struct target_sigaction *ka,
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         __put_user(ka->sa_restorer, &frame->pretcode);
     } else {
-        uint16_t val16;
-        abi_ulong retcode_addr;
-        retcode_addr = frame_addr + offsetof(struct sigframe, retcode);
-        __put_user(retcode_addr, &frame->pretcode);
-        /* This is popl %eax ; movl $,%eax ; int $0x80 */
-        val16 = 0xb858;
-        __put_user(val16, (uint16_t *)(frame->retcode+0));
-        __put_user(TARGET_NR_sigreturn, (int *)(frame->retcode+2));
-        val16 = 0x80cd;
-        __put_user(val16, (uint16_t *)(frame->retcode+6));
+        /* This is no longer used, but is retained for ABI compatibility. */
+        install_sigtramp(frame->retcode);
+        __put_user(default_sigreturn, &frame->pretcode);
     }
 
     /* Set up registers for signal handler */
@@ -411,24 +421,18 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Set up to return from userspace.  If provided, use a stub
        already in userspace.  */
-#ifndef TARGET_X86_64
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         __put_user(ka->sa_restorer, &frame->pretcode);
     } else {
-        uint16_t val16;
-        addr = frame_addr + offsetof(struct rt_sigframe, retcode);
-        __put_user(addr, &frame->pretcode);
-        /* This is movl $,%eax ; int $0x80 */
-        __put_user(0xb8, (char *)(frame->retcode+0));
-        __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1));
-        val16 = 0x80cd;
-        __put_user(val16, (uint16_t *)(frame->retcode+5));
-    }
+#ifdef TARGET_X86_64
+        /* For x86_64, SA_RESTORER is required ABI.  */
+        goto give_sigsegv;
 #else
-    /* XXX: Would be slightly better to return -EFAULT here if test fails
-       assert(ka->sa_flags & TARGET_SA_RESTORER); */
-    __put_user(ka->sa_restorer, &frame->pretcode);
+        /* This is no longer used, but is retained for ABI compatibility. */
+        install_rt_sigtramp(frame->retcode);
+        __put_user(default_rt_sigreturn, &frame->pretcode);
 #endif
+    }
 
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
@@ -436,13 +440,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 #ifndef TARGET_X86_64
     env->regs[R_EAX] = sig;
-    env->regs[R_EDX] = (unsigned long)&frame->info;
-    env->regs[R_ECX] = (unsigned long)&frame->uc;
+    env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info);
+    env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc);
 #else
     env->regs[R_EAX] = 0;
     env->regs[R_EDI] = sig;
-    env->regs[R_ESI] = (unsigned long)&frame->info;
-    env->regs[R_EDX] = (unsigned long)&frame->uc;
+    env->regs[R_ESI] = frame_addr + offsetof(struct rt_sigframe, info);
+    env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, uc);
 #endif
 
     cpu_x86_load_seg(env, R_DS, __USER_DS);
@@ -581,10 +585,7 @@ long do_rt_sigreturn(CPUX86State *env)
         goto badframe;
     }
 
-    if (do_sigaltstack(frame_addr + offsetof(struct rt_sigframe, uc.tuc_stack), 0,
-                       get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -594,3 +595,19 @@ long do_rt_sigreturn(CPUX86State *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+#ifndef TARGET_X86_64
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    install_sigtramp(tramp);
+
+    default_rt_sigreturn = sigtramp_page + 8;
+    install_rt_sigtramp(tramp + 8);
+
+    unlock_user(tramp, sigtramp_page, 2 * 8);
+}
+#endif
diff --git a/linux-user/i386/syscall_32.tbl b/linux-user/i386/syscall_32.tbl
index 9d110287366..4bbc267fb36 100644
--- a/linux-user/i386/syscall_32.tbl
+++ b/linux-user/i386/syscall_32.tbl
@@ -32,7 +32,7 @@
 18	i386	oldstat			sys_stat
 19	i386	lseek			sys_lseek			compat_sys_lseek
 20	i386	getpid			sys_getpid
-21	i386	mount			sys_mount			compat_sys_mount
+21	i386	mount			sys_mount
 22	i386	umount			sys_oldumount
 23	i386	setuid			sys_setuid16
 24	i386	getuid			sys_getuid16
@@ -142,7 +142,7 @@
 128	i386	init_module		sys_init_module
 129	i386	delete_module		sys_delete_module
 130	i386	get_kernel_syms
-131	i386	quotactl		sys_quotactl			compat_sys_quotactl32
+131	i386	quotactl		sys_quotactl
 132	i386	getpgid			sys_getpgid
 133	i386	fchdir			sys_fchdir
 134	i386	bdflush			sys_bdflush
@@ -156,8 +156,8 @@
 142	i386	_newselect		sys_select			compat_sys_select
 143	i386	flock			sys_flock
 144	i386	msync			sys_msync
-145	i386	readv			sys_readv			compat_sys_readv
-146	i386	writev			sys_writev			compat_sys_writev
+145	i386	readv			sys_readv
+146	i386	writev			sys_writev
 147	i386	getsid			sys_getsid
 148	i386	fdatasync		sys_fdatasync
 149	i386	_sysctl			sys_ni_syscall
@@ -327,7 +327,7 @@
 313	i386	splice			sys_splice
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
-316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
+316	i386	vmsplice		sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
@@ -358,8 +358,8 @@
 344	i386	syncfs			sys_syncfs
 345	i386	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 346	i386	setns			sys_setns
-347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+347	i386	process_vm_readv	sys_process_vm_readv
+348	i386	process_vm_writev	sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
 351	i386	sched_setattr		sys_sched_setattr
@@ -444,3 +444,10 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	process_madvise		sys_process_madvise
+441	i386	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	i386	mount_setattr		sys_mount_setattr
+# 443 reserved for quotactl_path
+444	i386	landlock_create_ruleset	sys_landlock_create_ruleset
+445	i386	landlock_add_rule	sys_landlock_add_rule
+446	i386	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/linux-user/i386/target_errno_defs.h b/linux-user/i386/target_errno_defs.h
new file mode 100644
index 00000000000..459b2189e2b
--- /dev/null
+++ b/linux-user/i386/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef I386_TARGET_ERRNO_DEFS_H
+#define I386_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/i386/target_signal.h b/linux-user/i386/target_signal.h
index 50361af8746..64d09f2e75b 100644
--- a/linux-user/i386/target_signal.h
+++ b/linux-user/i386/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* I386_TARGET_SIGNAL_H */
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 7193c3b2269..f182d40190e 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -637,6 +637,10 @@
   IOCTL(LOOP_SET_STATUS64, IOC_W, MK_PTR(MK_STRUCT(STRUCT_loop_info64)))
   IOCTL(LOOP_GET_STATUS64, IOC_R, MK_PTR(MK_STRUCT(STRUCT_loop_info64)))
   IOCTL(LOOP_CHANGE_FD, 0, TYPE_INT)
+  IOCTL(LOOP_SET_CAPACITY, 0, TYPE_INT)
+  IOCTL(LOOP_SET_DIRECT_IO, 0, TYPE_INT)
+  IOCTL(LOOP_SET_BLOCK_SIZE, 0, TYPE_INT)
+  IOCTL(LOOP_CONFIGURE, IOC_W, MK_PTR(MK_STRUCT(STRUCT_loop_config)))
 
   IOCTL(LOOP_CTL_ADD, 0, TYPE_INT)
   IOCTL(LOOP_CTL_REMOVE, 0, TYPE_INT)
diff --git a/linux-user/linux_loop.h b/linux-user/linux_loop.h
index c69fea11e4e..f80b96f1ff9 100644
--- a/linux-user/linux_loop.h
+++ b/linux-user/linux_loop.h
@@ -96,6 +96,8 @@ struct loop_info64 {
 #define LOOP_CHANGE_FD		0x4C06
 #define LOOP_SET_CAPACITY       0x4C07
 #define LOOP_SET_DIRECT_IO      0x4C08
+#define LOOP_SET_BLOCK_SIZE     0x4C09
+#define LOOP_CONFIGURE          0x4C0A
 
 /* /dev/loop-control interface */
 #define LOOP_CTL_ADD            0x4C80
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index a27e1d0d8bc..2ed5fc45ed8 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -1,59 +1,59 @@
 /* Code for loading Linux executables.  Mostly linux kernel code.  */
 
 #include "qemu/osdep.h"
-
 #include "qemu.h"
+#include "user-internals.h"
+#include "loader.h"
 
 #define NGROUPS 32
 
 /* ??? This should really be somewhere else.  */
-abi_long memcpy_to_target(abi_ulong dest, const void *src,
-                          unsigned long len)
+abi_long memcpy_to_target(abi_ulong dest, const void *src, unsigned long len)
 {
     void *host_ptr;
 
     host_ptr = lock_user(VERIFY_WRITE, dest, len, 0);
-    if (!host_ptr)
+    if (!host_ptr) {
         return -TARGET_EFAULT;
+    }
     memcpy(host_ptr, src, len);
     unlock_user(host_ptr, dest, 1);
     return 0;
 }
 
-static int count(char ** vec)
+static int count(char **vec)
 {
-    int		i;
+    int i;
 
-    for(i = 0; *vec; i++) {
+    for (i = 0; *vec; i++) {
         vec++;
     }
-
-    return(i);
+    return i;
 }
 
 static int prepare_binprm(struct linux_binprm *bprm)
 {
-    struct stat		st;
+    struct stat st;
     int mode;
     int retval;
 
-    if(fstat(bprm->fd, &st) < 0) {
-        return(-errno);
+    if (fstat(bprm->fd, &st) < 0) {
+        return -errno;
     }
 
     mode = st.st_mode;
-    if(!S_ISREG(mode)) {	/* Must be regular file */
-        return(-EACCES);
+    if (!S_ISREG(mode)) {   /* Must be regular file */
+        return -EACCES;
     }
-    if(!(mode & 0111)) {	/* Must have at least one execute bit set */
-        return(-EACCES);
+    if (!(mode & 0111)) {   /* Must have at least one execute bit set */
+        return -EACCES;
     }
 
     bprm->e_uid = geteuid();
     bprm->e_gid = getegid();
 
     /* Set-uid? */
-    if(mode & S_ISUID) {
+    if (mode & S_ISUID) {
         bprm->e_uid = st.st_uid;
     }
 
@@ -125,8 +125,8 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
 }
 
 int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
-             struct target_pt_regs * regs, struct image_info *infop,
-             struct linux_binprm *bprm)
+                struct target_pt_regs *regs, struct image_info *infop,
+                struct linux_binprm *bprm)
 {
     int retval;
 
@@ -139,7 +139,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
 
     retval = prepare_binprm(bprm);
 
-    if(retval>=0) {
+    if (retval >= 0) {
         if (bprm->buf[0] == 0x7f
                 && bprm->buf[1] == 'E'
                 && bprm->buf[2] == 'L'
@@ -157,11 +157,11 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
         }
     }
 
-    if(retval>=0) {
+    if (retval >= 0) {
         /* success.  Initialize important registers */
         do_init_thread(regs, infop);
         return retval;
     }
 
-    return(retval);
+    return retval;
 }
diff --git a/linux-user/loader.h b/linux-user/loader.h
new file mode 100644
index 00000000000..f375ee0679b
--- /dev/null
+++ b/linux-user/loader.h
@@ -0,0 +1,59 @@
+/*
+ * loader.h: prototypes for linux-user guest binary loader
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_LOADER_H
+#define LINUX_USER_LOADER_H
+
+/*
+ * Read a good amount of data initially, to hopefully get all the
+ * program headers loaded.
+ */
+#define BPRM_BUF_SIZE  1024
+
+/*
+ * This structure is used to hold the arguments that are
+ * used when loading binaries.
+ */
+struct linux_binprm {
+        char buf[BPRM_BUF_SIZE] __attribute__((aligned));
+        abi_ulong p;
+        int fd;
+        int e_uid, e_gid;
+        int argc, envc;
+        char **argv;
+        char **envp;
+        char *filename;        /* Name of binary */
+        int (*core_dump)(int, const CPUArchState *); /* coredump routine */
+};
+
+void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
+abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
+                              abi_ulong stringp, int push_ptr);
+int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
+             struct target_pt_regs *regs, struct image_info *infop,
+             struct linux_binprm *);
+
+uint32_t get_elf_eflags(int fd);
+int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
+int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
+
+abi_long memcpy_to_target(abi_ulong dest, const void *src,
+                          unsigned long len);
+
+extern unsigned long guest_stack_size;
+
+#endif /* LINUX_USER_LOADER_H */
diff --git a/linux-user/m68k/cpu_loop.c b/linux-user/m68k/cpu_loop.c
index c7a500b58c4..790bd558c3f 100644
--- a/linux-user/m68k/cpu_loop.c
+++ b/linux-user/m68k/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUM68KState *env)
 {
@@ -88,16 +90,6 @@ void cpu_loop(CPUM68KState *env)
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
             break;
-        case EXCP_ACCESS:
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = env->mmu.ar;
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
         case EXCP_DEBUG:
             info.si_signo = TARGET_SIGTRAP;
             info.si_errno = 0;
diff --git a/linux-user/m68k/signal.c b/linux-user/m68k/signal.c
index 49ff87c77bc..ec33482e140 100644
--- a/linux-user/m68k/signal.c
+++ b/linux-user/m68k/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -38,7 +39,6 @@ struct target_sigframe
     int sig;
     int code;
     abi_ulong psc;
-    char retcode[8];
     abi_ulong extramask[TARGET_NSIG_WORDS-1];
     struct target_sigcontext sc;
 };
@@ -75,7 +75,6 @@ struct target_rt_sigframe
     int sig;
     abi_ulong pinfo;
     abi_ulong puc;
-    char retcode[8];
     struct target_siginfo info;
     struct target_ucontext uc;
 };
@@ -129,7 +128,6 @@ void setup_frame(int sig, struct target_sigaction *ka,
 {
     struct target_sigframe *frame;
     abi_ulong frame_addr;
-    abi_ulong retcode_addr;
     abi_ulong sc_addr;
     int i;
 
@@ -151,16 +149,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
     }
 
     /* Set up to return from userspace.  */
-
-    retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode);
-    __put_user(retcode_addr, &frame->pretcode);
-
-    /* moveq #,d0; trap #0 */
-
-    __put_user(0x70004e40 + (TARGET_NR_sigreturn << 16),
-               (uint32_t *)(frame->retcode));
-
-    /* Set up to return from userspace */
+    __put_user(default_sigreturn, &frame->pretcode);
 
     env->aregs[7] = frame_addr;
     env->pc = ka->_sa_handler;
@@ -287,7 +276,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 {
     struct target_rt_sigframe *frame;
     abi_ulong frame_addr;
-    abi_ulong retcode_addr;
     abi_ulong info_addr;
     abi_ulong uc_addr;
     int err = 0;
@@ -324,17 +312,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     }
 
     /* Set up to return from userspace.  */
-
-    retcode_addr = frame_addr + offsetof(struct target_sigframe, retcode);
-    __put_user(retcode_addr, &frame->pretcode);
-
-    /* moveq #,d0; notb d0; trap #0 */
-
-    __put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16),
-               (uint32_t *)(frame->retcode + 0));
-    __put_user(0x4e40, (uint16_t *)(frame->retcode + 4));
-
-    /* Set up to return from userspace */
+    __put_user(default_rt_sigreturn, &frame->pretcode);
 
     env->aregs[7] = frame_addr;
     env->pc = ka->_sa_handler;
@@ -400,10 +378,7 @@ long do_rt_sigreturn(CPUM68KState *env)
     if (target_rt_restore_ucontext(env, &frame->uc))
         goto badframe;
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -413,3 +388,23 @@ long do_rt_sigreturn(CPUM68KState *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    void *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 4 + 6, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+
+    /* moveq #,d0; trap #0 */
+    __put_user(0x70004e40 + (TARGET_NR_sigreturn << 16), (uint32_t *)tramp);
+
+    default_rt_sigreturn = sigtramp_page + 4;
+
+    /* moveq #,d0; notb d0; trap #0 */
+    __put_user(0x70004600 + ((TARGET_NR_rt_sigreturn ^ 0xff) << 16),
+               (uint32_t *)(tramp + 4));
+    __put_user(0x4e40, (uint16_t *)(tramp + 8));
+
+    unlock_user(tramp, sigtramp_page, 4 + 6);
+}
diff --git a/linux-user/m68k/syscall.tbl b/linux-user/m68k/syscall.tbl
index 81fc799d839..79c2d24c89d 100644
--- a/linux-user/m68k/syscall.tbl
+++ b/linux-user/m68k/syscall.tbl
@@ -439,3 +439,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/m68k/target_errno_defs.h b/linux-user/m68k/target_errno_defs.h
new file mode 100644
index 00000000000..96485a75431
--- /dev/null
+++ b/linux-user/m68k/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef M68K_TARGET_ERRNO_DEFS_H
+#define M68K_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/m68k/target_signal.h b/linux-user/m68k/target_signal.h
index d096544ef84..94157bf1f48 100644
--- a/linux-user/m68k/target_signal.h
+++ b/linux-user/m68k/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* M68K_TARGET_SIGNAL_H */
diff --git a/linux-user/main.c b/linux-user/main.c
index f956afccab6..16def5215d9 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -30,6 +30,7 @@
 
 #include "qapi/error.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "qemu/path.h"
 #include "qemu/queue.h"
 #include "qemu/config-file.h"
@@ -38,8 +39,8 @@
 #include "qemu/help_option.h"
 #include "qemu/module.h"
 #include "qemu/plugin.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
+#include "exec/gdbstub.h"
 #include "tcg/tcg.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
@@ -49,6 +50,10 @@
 #include "target_elf.h"
 #include "cpu_loop-common.h"
 #include "crypto/init.h"
+#include "fd-trans.h"
+#include "signal-common.h"
+#include "loader.h"
+#include "user-mmap.h"
 
 #ifndef AT_FLAGS_PRESERVE_ARGV0
 #define AT_FLAGS_PRESERVE_ARGV0_BIT 0
@@ -120,13 +125,6 @@ const char *qemu_uname_release;
    by remapping the process stack directly at the right place */
 unsigned long guest_stack_size = 8 * 1024 * 1024UL;
 
-#if defined(TARGET_I386)
-int cpu_get_pic_interrupt(CPUX86State *env)
-{
-    return -1;
-}
-#endif
-
 /***********************************************************/
 /* Helper routines for implementing atomic operations.  */
 
@@ -206,7 +204,6 @@ CPUArchState *cpu_copy(CPUArchState *env)
     CPUState *new_cpu = cpu_create(cpu_type);
     CPUArchState *new_env = new_cpu->env_ptr;
     CPUBreakpoint *bp;
-    CPUWatchpoint *wp;
 
     /* Reset non arch specific state */
     cpu_reset(new_cpu);
@@ -218,13 +215,9 @@ CPUArchState *cpu_copy(CPUArchState *env)
        Note: Once we support ptrace with hw-debug register access, make sure
        BP_CPU break/watchpoints are handled correctly on clone. */
     QTAILQ_INIT(&new_cpu->breakpoints);
-    QTAILQ_INIT(&new_cpu->watchpoints);
     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
         cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
     }
-    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
-    }
 
     return new_env;
 }
@@ -468,7 +461,7 @@ static const struct qemu_argument arg_table[] = {
      "",           "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
 #ifdef CONFIG_PLUGIN
     {"plugin",     "QEMU_PLUGIN",      true,  handle_arg_plugin,
-     "",           "[file=]<file>[,arg=<string>]"},
+     "",           "[file=]<file>[,<argname>=<argvalue>]"},
 #endif
     {"version",    "QEMU_VERSION",     false, handle_arg_version,
      "",           "display version information and exit"},
@@ -730,8 +723,8 @@ int main(int argc, char **argv, char **envp)
     {
         AccelClass *ac = ACCEL_GET_CLASS(current_accel());
 
-        ac->init_machine(NULL);
         accel_init_interfaces(ac);
+        ac->init_machine(NULL);
     }
     cpu = cpu_create(cpu_type);
     env = cpu->env_ptr;
@@ -835,6 +828,8 @@ int main(int argc, char **argv, char **envp)
     cpu->opaque = ts;
     task_settid(ts);
 
+    fd_trans_init();
+
     ret = loader_exec(execfd, exec_path, target_argv, target_environ, regs,
         info, &bprm);
     if (ret != 0) {
@@ -874,7 +869,6 @@ int main(int argc, char **argv, char **envp)
        generating the prologue until now so that the prologue can take
        the real value of GUEST_BASE into account.  */
     tcg_prologue_init(tcg_ctx);
-    tcg_region_init();
 
     target_cpu_copy_regs(env, regs);
 
diff --git a/linux-user/meson.build b/linux-user/meson.build
index 7fe28d659ef..bf62c13e378 100644
--- a/linux-user/meson.build
+++ b/linux-user/meson.build
@@ -1,3 +1,7 @@
+if not have_linux_user
+   subdir_done()
+endif
+
 linux_user_ss.add(files(
   'elfload.c',
   'exit.c',
@@ -32,7 +36,6 @@ subdir('mips')
 subdir('ppc')
 subdir('s390x')
 subdir('sh4')
-subdir('sparc64')
 subdir('sparc')
 subdir('x86_64')
 subdir('xtensa')
diff --git a/linux-user/microblaze/cpu_loop.c b/linux-user/microblaze/cpu_loop.c
index c3396a6e09b..a94467dd2da 100644
--- a/linux-user/microblaze/cpu_loop.c
+++ b/linux-user/microblaze/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUMBState *env)
 {
@@ -35,16 +37,6 @@ void cpu_loop(CPUMBState *env)
         process_queued_cpu_work(cs);
 
         switch (trapnr) {
-        case 0xaa:
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
         case EXCP_INTERRUPT:
           /* just indicate that signals should be handled asap */
           break;
diff --git a/linux-user/microblaze/signal.c b/linux-user/microblaze/signal.c
index cf0707b5564..8ebb6a1b7df 100644
--- a/linux-user/microblaze/signal.c
+++ b/linux-user/microblaze/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -160,17 +161,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Kernel does not use SA_RESTORER. */
 
-    /* addi r12, r0, __NR_sigreturn */
-    __put_user(0x31800000U | TARGET_NR_rt_sigreturn, frame->tramp + 0);
-    /* brki r14, 0x8 */
-    __put_user(0xb9cc0008U, frame->tramp + 1);
-
     /*
      * Return from sighandler will jump to the tramp.
      * Negative 8 offset because return is rtsd r15, 8
      */
-    env->regs[15] =
-        frame_addr + offsetof(struct target_rt_sigframe, tramp) - 8;
+    env->regs[15] = default_rt_sigreturn - 8;
 
     /* Set up registers for signal handler */
     env->regs[1] = frame_addr;
@@ -209,11 +204,7 @@ long do_rt_sigreturn(CPUMBState *env)
 
     restore_sigcontext(&frame->uc.tuc_mcontext, env);
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -223,3 +214,19 @@ long do_rt_sigreturn(CPUMBState *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0);
+    assert(tramp != NULL);
+
+    /*
+     * addi r12, r0, __NR_rt_sigreturn
+     * brki r14, 0x8
+     */
+    __put_user(0x31800000U | TARGET_NR_rt_sigreturn, tramp);
+    __put_user(0xb9cc0008U, tramp + 1);
+
+    default_rt_sigreturn = sigtramp_page;
+    unlock_user(tramp, sigtramp_page, 8);
+}
diff --git a/linux-user/microblaze/syscall.tbl b/linux-user/microblaze/syscall.tbl
index b4e263916f4..b11395a20c2 100644
--- a/linux-user/microblaze/syscall.tbl
+++ b/linux-user/microblaze/syscall.tbl
@@ -445,3 +445,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/microblaze/target_errno_defs.h b/linux-user/microblaze/target_errno_defs.h
new file mode 100644
index 00000000000..91a0bbf9dc0
--- /dev/null
+++ b/linux-user/microblaze/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef MICROBLAZE_TARGET_ERRNO_DEFS_H
+#define MICROBLAZE_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/microblaze/target_signal.h b/linux-user/microblaze/target_signal.h
index 1c326296de4..e8b510f6b18 100644
--- a/linux-user/microblaze/target_signal.h
+++ b/linux-user/microblaze/target_signal.h
@@ -21,4 +21,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* MICROBLAZE_TARGET_SIGNAL_H */
diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
index 42f2bfefdfd..75386005b1a 100644
--- a/linux-user/mips/cpu_loop.c
+++ b/linux-user/mips/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 #include "elf.h"
 #include "internal.h"
 #include "fpu_helper.h"
@@ -156,17 +158,6 @@ void cpu_loop(CPUMIPSState *env)
             }
             env->active_tc.gpr[2] = ret;
             break;
-        case EXCP_TLBL:
-        case EXCP_TLBS:
-        case EXCP_AdEL:
-        case EXCP_AdES:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            /* XXX: check env->error_code */
-            info.si_code = TARGET_SEGV_MAPERR;
-            info._sifields._sigfault._addr = env->CP0_BadVAddr;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_CpU:
         case EXCP_RI:
             info.si_signo = TARGET_SIGILL;
diff --git a/linux-user/mips/signal.c b/linux-user/mips/signal.c
index 242bd84bc36..5c6afad88a2 100644
--- a/linux-user/mips/signal.c
+++ b/linux-user/mips/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -86,10 +87,8 @@ struct target_rt_sigframe {
 };
 
 /* Install trampoline to jump back from signal handler */
-static inline int install_sigtramp(unsigned int *tramp,   unsigned int syscall)
+static void install_sigtramp(uint32_t *tramp, unsigned int syscall)
 {
-    int err = 0;
-
     /*
      * Set up the return code ...
      *
@@ -99,7 +98,6 @@ static inline int install_sigtramp(unsigned int *tramp,   unsigned int syscall)
 
     __put_user(0x24020000 + syscall, tramp + 0);
     __put_user(0x0000000c          , tramp + 1);
-    return err;
 }
 
 static inline void setup_sigcontext(CPUMIPSState *regs,
@@ -211,8 +209,6 @@ void setup_frame(int sig, struct target_sigaction * ka,
         goto give_sigsegv;
     }
 
-    install_sigtramp(frame->sf_code, TARGET_NR_sigreturn);
-
     setup_sigcontext(regs, &frame->sf_sc);
 
     for(i = 0; i < TARGET_NSIG_WORDS; i++) {
@@ -233,7 +229,7 @@ void setup_frame(int sig, struct target_sigaction * ka,
     regs->active_tc.gpr[ 5] = 0;
     regs->active_tc.gpr[ 6] = frame_addr + offsetof(struct sigframe, sf_sc);
     regs->active_tc.gpr[29] = frame_addr;
-    regs->active_tc.gpr[31] = frame_addr + offsetof(struct sigframe, sf_code);
+    regs->active_tc.gpr[31] = default_sigreturn;
     /* The original kernel code sets CP0_EPC to the handler
     * since it returns to userland using eret
     * we cannot do this here, and we must set PC directly */
@@ -308,8 +304,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         goto give_sigsegv;
     }
 
-    install_sigtramp(frame->rs_code, TARGET_NR_rt_sigreturn);
-
     tswap_siginfo(&frame->rs_info, info);
 
     __put_user(0, &frame->rs_uc.tuc_flags);
@@ -338,11 +332,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->active_tc.gpr[ 6] = frame_addr
                              + offsetof(struct target_rt_sigframe, rs_uc);
     env->active_tc.gpr[29] = frame_addr;
-    env->active_tc.gpr[31] = frame_addr
-                             + offsetof(struct target_rt_sigframe, rs_code);
-    /* The original kernel code sets CP0_EPC to the handler
-    * since it returns to userland using eret
-    * we cannot do this here, and we must set PC directly */
+    env->active_tc.gpr[31] = default_rt_sigreturn;
+
+    /*
+     * The original kernel code sets CP0_EPC to the handler
+     * since it returns to userland using eret
+     * we cannot do this here, and we must set PC directly
+     */
     env->active_tc.gpr[25] = ka->_sa_handler;
     mips_update_pc(env, ka->_sa_handler, /*can_be_unrepresentable=*/false);
     mips_set_hflags_isa_mode_from_pc(env);
@@ -370,11 +366,7 @@ long do_rt_sigreturn(CPUMIPSState *env)
     set_sigmask(&blocked);
 
     restore_sigcontext(env, &frame->rs_uc.tuc_mcontext);
-
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, rs_uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -EFAULT)
-        goto badframe;
+    target_restore_altstack(&frame->rs_uc.tuc_stack, env);
 
     mips_update_pc(env, env->CP0_EPC, /*can_be_unrepresentable=*/false);
     mips_set_hflags_isa_mode_from_pc(env);
@@ -387,3 +379,19 @@ long do_rt_sigreturn(CPUMIPSState *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+    assert(tramp != NULL);
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+    default_sigreturn = sigtramp_page;
+    install_sigtramp(tramp, TARGET_NR_sigreturn);
+#endif
+
+    default_rt_sigreturn = sigtramp_page + 8;
+    install_sigtramp(tramp + 2, TARGET_NR_rt_sigreturn);
+
+    unlock_user(tramp, sigtramp_page, 2 * 8);
+}
diff --git a/linux-user/mips/syscall-args-o32.c.inc b/linux-user/mips/syscall-args-o32.c.inc
index 92ee4f921ec..a6a2c5c566c 100644
--- a/linux-user/mips/syscall-args-o32.c.inc
+++ b/linux-user/mips/syscall-args-o32.c.inc
@@ -356,7 +356,7 @@
     [ 355] = 3, /* bpf */
     [ 356] = 5, /* execveat */
     [ 357] = 1, /* userfaultfd */
-    [ 358] = 2, /* membarrier */
+    [ 358] = 3, /* membarrier */
     [ 359] = 3, /* mlock2 */
     [ 360] = 6, /* copy_file_range */
     [ 361] = 6, /* preadv2 */
@@ -438,3 +438,6 @@
     [ 437] = 4, /* openat2 */
     [ 438] = 3, /* pidfd_getfd */
     [ 439] = 4, /* faccessat2 */
+    [ 440] = 5, /* process_madvise */
+    [ 441] = 6, /* epoll_pwait2 */
+    [ 442] = 5, /* mount_setattr */
diff --git a/linux-user/mips/syscall_o32.tbl b/linux-user/mips/syscall_o32.tbl
index 195b43cf27c..d560c467a8c 100644
--- a/linux-user/mips/syscall_o32.tbl
+++ b/linux-user/mips/syscall_o32.tbl
@@ -29,7 +29,7 @@
 18	o32	unused18			sys_ni_syscall
 19	o32	lseek				sys_lseek
 20	o32	getpid				sys_getpid
-21	o32	mount				sys_mount			compat_sys_mount
+21	o32	mount				sys_mount
 22	o32	umount				sys_oldumount
 23	o32	setuid				sys_setuid
 24	o32	getuid				sys_getuid
@@ -156,8 +156,8 @@
 142	o32	_newselect			sys_select			compat_sys_select
 143	o32	flock				sys_flock
 144	o32	msync				sys_msync
-145	o32	readv				sys_readv			compat_sys_readv
-146	o32	writev				sys_writev			compat_sys_writev
+145	o32	readv				sys_readv
+146	o32	writev				sys_writev
 147	o32	cacheflush			sys_cacheflush
 148	o32	cachectl			sys_cachectl
 149	o32	sysmips				__sys_sysmips
@@ -318,7 +318,7 @@
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
-307	o32	vmsplice			sys_vmsplice			compat_sys_vmsplice
+307	o32	vmsplice			sys_vmsplice
 308	o32	move_pages			sys_move_pages			compat_sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
@@ -356,8 +356,8 @@
 342	o32	syncfs				sys_syncfs
 343	o32	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 344	o32	setns				sys_setns
-345	o32	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-346	o32	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+345	o32	process_vm_readv		sys_process_vm_readv
+346	o32	process_vm_writev		sys_process_vm_writev
 347	o32	kcmp				sys_kcmp
 348	o32	finit_module			sys_finit_module
 349	o32	sched_setattr			sys_sched_setattr
@@ -427,3 +427,10 @@
 437	o32	openat2				sys_openat2
 438	o32	pidfd_getfd			sys_pidfd_getfd
 439	o32	faccessat2			sys_faccessat2
+440	o32	process_madvise			sys_process_madvise
+441	o32	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	o32	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
+445	o32	landlock_add_rule		sys_landlock_add_rule
+446	o32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/mips/target_errno_defs.h b/linux-user/mips/target_errno_defs.h
new file mode 100644
index 00000000000..5685cda10db
--- /dev/null
+++ b/linux-user/mips/target_errno_defs.h
@@ -0,0 +1,221 @@
+#ifndef MIPS_TARGET_ERRNO_DEFS_H
+#define MIPS_TARGET_ERRNO_DEFS_H
+
+#include "../generic/target_errno_defs.h"
+
+/*
+ * Generic target errno overridden with definitions taken
+ * from asm-mips/errno.h
+ */
+
+#undef TARGET_EWOULDBLOCK
+#define TARGET_EWOULDBLOCK     TARGET_EAGAIN /* Operation would block */
+#undef TARGET_ENOMSG
+#define TARGET_ENOMSG          35      /* Identifier removed */
+#undef TARGET_EIDRM
+#define TARGET_EIDRM           36      /* Identifier removed */
+#undef TARGET_ECHRNG
+#define TARGET_ECHRNG          37      /* Channel number out of range */
+#undef TARGET_EL2NSYNC
+#define TARGET_EL2NSYNC        38      /* Level 2 not synchronized */
+#undef TARGET_EL3HLT
+#define TARGET_EL3HLT          39      /* Level 3 halted */
+#undef TARGET_EL3RST
+#define TARGET_EL3RST          40      /* Level 3 reset */
+#undef TARGET_ELNRNG
+#define TARGET_ELNRNG          41      /* Link number out of range */
+#undef TARGET_EUNATCH
+#define TARGET_EUNATCH         42      /* Protocol driver not attached */
+#undef TARGET_ENOCSI
+#define TARGET_ENOCSI          43      /* No CSI structure available */
+#undef TARGET_EL2HLT
+#define TARGET_EL2HLT          44      /* Level 2 halted */
+#undef TARGET_EDEADLK
+#define TARGET_EDEADLK         45      /* Resource deadlock would occur */
+#undef TARGET_ENOLCK
+#define TARGET_ENOLCK          46      /* No record locks available */
+#undef TARGET_EBADE
+#define TARGET_EBADE           50      /* Invalid exchange */
+#undef TARGET_EBADR
+#define TARGET_EBADR           51      /* Invalid request descriptor */
+#undef TARGET_EXFULL
+#define TARGET_EXFULL          52      /* TARGET_Exchange full */
+#undef TARGET_ENOANO
+#define TARGET_ENOANO          53      /* No anode */
+#undef TARGET_EBADRQC
+#define TARGET_EBADRQC         54      /* Invalid request code */
+#undef TARGET_EBADSLT
+#define TARGET_EBADSLT         55      /* Invalid slot */
+#undef TARGET_EDEADLOCK
+#define TARGET_EDEADLOCK       56      /* File locking deadlock error */
+#undef TARGET_EBFONT
+#define TARGET_EBFONT          59      /* Bad font file format */
+#undef TARGET_ENOSTR
+#define TARGET_ENOSTR          60      /* Device not a stream */
+#undef TARGET_ENODATA
+#define TARGET_ENODATA         61      /* No data available */
+#undef TARGET_ETIME
+#define TARGET_ETIME           62      /* Timer expired */
+#undef TARGET_ENOSR
+#define TARGET_ENOSR           63      /* Out of streams resources */
+#undef TARGET_ENONET
+#define TARGET_ENONET          64      /* Machine is not on the network */
+#undef TARGET_ENOPKG
+#define TARGET_ENOPKG          65      /* Package not installed */
+#undef TARGET_EREMOTE
+#define TARGET_EREMOTE         66      /* Object is remote */
+#undef TARGET_ENOLINK
+#define TARGET_ENOLINK         67      /* Link has been severed */
+#undef TARGET_EADV
+#define TARGET_EADV            68      /* Advertise error */
+#undef TARGET_ESRMNT
+#define TARGET_ESRMNT          69      /* Srmount error */
+#undef TARGET_ECOMM
+#define TARGET_ECOMM           70      /* Communication error on send */
+#undef TARGET_EPROTO
+#define TARGET_EPROTO          71      /* Protocol error */
+#undef TARGET_EDOTDOT
+#define TARGET_EDOTDOT         73      /* RFS specific error */
+#undef TARGET_EMULTIHOP
+#define TARGET_EMULTIHOP       74      /* Multihop attempted */
+#undef TARGET_EBADMSG
+#define TARGET_EBADMSG         77      /* Not a data message */
+#undef TARGET_ENAMETOOLONG
+#define TARGET_ENAMETOOLONG    78      /* File name too long */
+#undef TARGET_EOVERFLOW
+#define TARGET_EOVERFLOW       79      /* Value too large for defined data type */
+#undef TARGET_ENOTUNIQ
+#define TARGET_ENOTUNIQ        80      /* Name not unique on network */
+#undef TARGET_EBADFD
+#define TARGET_EBADFD          81      /* File descriptor in bad state */
+#undef TARGET_EREMCHG
+#define TARGET_EREMCHG         82      /* Remote address changed */
+#undef TARGET_ELIBACC
+#define TARGET_ELIBACC         83      /* Can not access a needed shared library */
+#undef TARGET_ELIBBAD
+#define TARGET_ELIBBAD         84      /* Accessing a corrupted shared library */
+#undef TARGET_ELIBSCN
+#define TARGET_ELIBSCN         85      /* .lib section in a.out corrupted */
+#undef TARGET_ELIBMAX
+#define TARGET_ELIBMAX         86      /* Attempting to link in too many shared libraries */
+#undef TARGET_ELIBEXEC
+#define TARGET_ELIBEXEC        87      /* Cannot exec a shared library directly */
+#undef TARGET_EILSEQ
+#define TARGET_EILSEQ          88      /* Illegal byte sequence */
+#undef TARGET_ENOSYS
+#define TARGET_ENOSYS          89      /* Function not implemented */
+#undef TARGET_ELOOP
+#define TARGET_ELOOP           90      /* Too many symbolic links encountered */
+#undef TARGET_ERESTART
+#define TARGET_ERESTART        91      /* Interrupted system call should be restarted */
+#undef TARGET_ESTRPIPE
+#define TARGET_ESTRPIPE        92      /* Streams pipe error */
+#undef TARGET_ENOTEMPTY
+#define TARGET_ENOTEMPTY       93      /* Directory not empty */
+#undef TARGET_EUSERS
+#define TARGET_EUSERS          94      /* Too many users */
+#undef TARGET_ENOTSOCK
+#define TARGET_ENOTSOCK        95      /* Socket operation on non-socket */
+#undef TARGET_EDESTADDRREQ
+#define TARGET_EDESTADDRREQ    96      /* Destination address required */
+#undef TARGET_EMSGSIZE
+#define TARGET_EMSGSIZE        97      /* Message too long */
+#undef TARGET_EPROTOTYPE
+#define TARGET_EPROTOTYPE      98      /* Protocol wrong type for socket */
+#undef TARGET_ENOPROTOOPT
+#define TARGET_ENOPROTOOPT     99      /* Protocol not available */
+#undef TARGET_EPROTONOSUPPORT
+#define TARGET_EPROTONOSUPPORT 120     /* Protocol not supported */
+#undef TARGET_ESOCKTNOSUPPORT
+#define TARGET_ESOCKTNOSUPPORT 121     /* Socket type not supported */
+#undef TARGET_EOPNOTSUPP
+#define TARGET_EOPNOTSUPP      122     /* Operation not supported on transport endpoint */
+#undef TARGET_EPFNOSUPPORT
+#define TARGET_EPFNOSUPPORT    123     /* Protocol family not supported */
+#undef TARGET_EAFNOSUPPORT
+#define TARGET_EAFNOSUPPORT    124     /* Address family not supported by protocol */
+#undef TARGET_EADDRINUSE
+#define TARGET_EADDRINUSE      125     /* Address already in use */
+#undef TARGET_EADDRNOTAVAIL
+#define TARGET_EADDRNOTAVAIL   126     /* Cannot assign requested address */
+#undef TARGET_ENETDOWN
+#define TARGET_ENETDOWN        127     /* Network is down */
+#undef TARGET_ENETUNREACH
+#define TARGET_ENETUNREACH     128     /* Network is unreachable */
+#undef TARGET_ENETRESET
+#define TARGET_ENETRESET       129     /* Network dropped connection because of reset */
+#undef TARGET_ECONNABORTED
+#define TARGET_ECONNABORTED    130     /* Software caused connection abort */
+#undef TARGET_ECONNRESET
+#define TARGET_ECONNRESET      131     /* Connection reset by peer */
+#undef TARGET_ENOBUFS
+#define TARGET_ENOBUFS         132     /* No buffer space available */
+#undef TARGET_EISCONN
+#define TARGET_EISCONN         133     /* Transport endpoint is already connected */
+#undef TARGET_ENOTCONN
+#define TARGET_ENOTCONN        134     /* Transport endpoint is not connected */
+#undef TARGET_EUCLEAN
+#define TARGET_EUCLEAN         135     /* Structure needs cleaning */
+#undef TARGET_ENOTNAM
+#define TARGET_ENOTNAM         137     /* Not a XENIX named type file */
+#undef TARGET_ENAVAIL
+#define TARGET_ENAVAIL         138     /* No XENIX semaphores available */
+#undef TARGET_EISNAM
+#define TARGET_EISNAM          139     /* Is a named type file */
+#undef TARGET_EREMOTEIO
+#define TARGET_EREMOTEIO       140     /* Remote I/O error */
+#undef TARGET_EINIT
+#define TARGET_EINIT           141     /* Reserved */
+#undef TARGET_EREMDEV
+#define TARGET_EREMDEV         142     /* TARGET_Error 142 */
+#undef TARGET_ESHUTDOWN
+#define TARGET_ESHUTDOWN       143     /* Cannot send after transport endpoint shutdown */
+#undef TARGET_ETOOMANYREFS
+#define TARGET_ETOOMANYREFS    144     /* Too many references: cannot splice */
+#undef TARGET_ETIMEDOUT
+#define TARGET_ETIMEDOUT       145     /* Connection timed out */
+#undef TARGET_ECONNREFUSED
+#define TARGET_ECONNREFUSED    146     /* Connection refused */
+#undef TARGET_EHOSTDOWN
+#define TARGET_EHOSTDOWN       147     /* Host is down */
+#undef TARGET_EHOSTUNREACH
+#define TARGET_EHOSTUNREACH    148     /* No route to host */
+#undef TARGET_EALREADY
+#define TARGET_EALREADY        149     /* Operation already in progress */
+#undef TARGET_EINPROGRESS
+#define TARGET_EINPROGRESS     150     /* Operation now in progress */
+#undef TARGET_ESTALE
+#define TARGET_ESTALE          151     /* Stale NFS file handle */
+#undef TARGET_ECANCELED
+#define TARGET_ECANCELED       158     /* AIO operation canceled */
+/*
+ * These error are Linux extensions.
+ */
+#undef TARGET_ENOMEDIUM
+#define TARGET_ENOMEDIUM       159     /* No medium found */
+#undef TARGET_EMEDIUMTYPE
+#define TARGET_EMEDIUMTYPE     160     /* Wrong medium type */
+#undef TARGET_ENOKEY
+#define TARGET_ENOKEY          161     /* Required key not available */
+#undef TARGET_EKEYEXPIRED
+#define TARGET_EKEYEXPIRED     162     /* Key has expired */
+#undef TARGET_EKEYREVOKED
+#define TARGET_EKEYREVOKED     163     /* Key has been revoked */
+#undef TARGET_EKEYREJECTED
+#define TARGET_EKEYREJECTED    164     /* Key was rejected by service */
+
+/* for robust mutexes */
+#undef TARGET_EOWNERDEAD
+#define TARGET_EOWNERDEAD      165     /* Owner died */
+#undef TARGET_ENOTRECOVERABLE
+#define TARGET_ENOTRECOVERABLE 166     /* State not recoverable */
+
+#undef TARGET_ERFKILL
+#define TARGET_ERFKILL         167
+#undef TARGET_EHWPOISON
+#define TARGET_EHWPOISON       168
+
+#undef TARGET_EDQUOT
+#define TARGET_EDQUOT          1133    /* Quota exceeded */
+
+#endif
diff --git a/linux-user/mips/target_signal.h b/linux-user/mips/target_signal.h
index d521765f6b2..780a4ddf29d 100644
--- a/linux-user/mips/target_signal.h
+++ b/linux-user/mips/target_signal.h
@@ -73,6 +73,7 @@ typedef struct target_sigaltstack {
 /* compare linux/arch/mips/kernel/signal.c:setup_frame() */
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/mips/target_syscall.h b/linux-user/mips/target_syscall.h
index dd6fd7af8ea..f59057493a3 100644
--- a/linux-user/mips/target_syscall.h
+++ b/linux-user/mips/target_syscall.h
@@ -20,215 +20,6 @@ struct target_pt_regs {
 	abi_ulong cp0_epc;
 };
 
-/* Target errno definitions taken from asm-mips/errno.h */
-#undef TARGET_ENOMSG
-#define TARGET_ENOMSG          35      /* Identifier removed */
-#undef TARGET_EIDRM
-#define TARGET_EIDRM           36      /* Identifier removed */
-#undef TARGET_ECHRNG
-#define TARGET_ECHRNG          37      /* Channel number out of range */
-#undef TARGET_EL2NSYNC
-#define TARGET_EL2NSYNC        38      /* Level 2 not synchronized */
-#undef TARGET_EL3HLT
-#define TARGET_EL3HLT          39      /* Level 3 halted */
-#undef TARGET_EL3RST
-#define TARGET_EL3RST          40      /* Level 3 reset */
-#undef TARGET_ELNRNG
-#define TARGET_ELNRNG          41      /* Link number out of range */
-#undef TARGET_EUNATCH
-#define TARGET_EUNATCH         42      /* Protocol driver not attached */
-#undef TARGET_ENOCSI
-#define TARGET_ENOCSI          43      /* No CSI structure available */
-#undef TARGET_EL2HLT
-#define TARGET_EL2HLT          44      /* Level 2 halted */
-#undef TARGET_EDEADLK
-#define TARGET_EDEADLK         45      /* Resource deadlock would occur */
-#undef TARGET_ENOLCK
-#define TARGET_ENOLCK          46      /* No record locks available */
-#undef TARGET_EBADE
-#define TARGET_EBADE           50      /* Invalid exchange */
-#undef TARGET_EBADR
-#define TARGET_EBADR           51      /* Invalid request descriptor */
-#undef TARGET_EXFULL
-#define TARGET_EXFULL          52      /* TARGET_Exchange full */
-#undef TARGET_ENOANO
-#define TARGET_ENOANO          53      /* No anode */
-#undef TARGET_EBADRQC
-#define TARGET_EBADRQC         54      /* Invalid request code */
-#undef TARGET_EBADSLT
-#define TARGET_EBADSLT         55      /* Invalid slot */
-#undef TARGET_EDEADLOCK
-#define TARGET_EDEADLOCK       56      /* File locking deadlock error */
-#undef TARGET_EBFONT
-#define TARGET_EBFONT          59      /* Bad font file format */
-#undef TARGET_ENOSTR
-#define TARGET_ENOSTR          60      /* Device not a stream */
-#undef TARGET_ENODATA
-#define TARGET_ENODATA         61      /* No data available */
-#undef TARGET_ETIME
-#define TARGET_ETIME           62      /* Timer expired */
-#undef TARGET_ENOSR
-#define TARGET_ENOSR           63      /* Out of streams resources */
-#undef TARGET_ENONET
-#define TARGET_ENONET          64      /* Machine is not on the network */
-#undef TARGET_ENOPKG
-#define TARGET_ENOPKG          65      /* Package not installed */
-#undef TARGET_EREMOTE
-#define TARGET_EREMOTE         66      /* Object is remote */
-#undef TARGET_ENOLINK
-#define TARGET_ENOLINK         67      /* Link has been severed */
-#undef TARGET_EADV
-#define TARGET_EADV            68      /* Advertise error */
-#undef TARGET_ESRMNT
-#define TARGET_ESRMNT          69      /* Srmount error */
-#undef TARGET_ECOMM
-#define TARGET_ECOMM           70      /* Communication error on send */
-#undef TARGET_EPROTO
-#define TARGET_EPROTO          71      /* Protocol error */
-#undef TARGET_EDOTDOT
-#define TARGET_EDOTDOT         73      /* RFS specific error */
-#undef TARGET_EMULTIHOP
-#define TARGET_EMULTIHOP       74      /* Multihop attempted */
-#undef TARGET_EBADMSG
-#define TARGET_EBADMSG         77      /* Not a data message */
-#undef TARGET_ENAMETOOLONG
-#define TARGET_ENAMETOOLONG    78      /* File name too long */
-#undef TARGET_EOVERFLOW
-#define TARGET_EOVERFLOW       79      /* Value too large for defined data type */
-#undef TARGET_ENOTUNIQ
-#define TARGET_ENOTUNIQ        80      /* Name not unique on network */
-#undef TARGET_EBADFD
-#define TARGET_EBADFD          81      /* File descriptor in bad state */
-#undef TARGET_EREMCHG
-#define TARGET_EREMCHG         82      /* Remote address changed */
-#undef TARGET_ELIBACC
-#define TARGET_ELIBACC         83      /* Can not access a needed shared library */
-#undef TARGET_ELIBBAD
-#define TARGET_ELIBBAD         84      /* Accessing a corrupted shared library */
-#undef TARGET_ELIBSCN
-#define TARGET_ELIBSCN         85      /* .lib section in a.out corrupted */
-#undef TARGET_ELIBMAX
-#define TARGET_ELIBMAX         86      /* Attempting to link in too many shared libraries */
-#undef TARGET_ELIBEXEC
-#define TARGET_ELIBEXEC        87      /* Cannot exec a shared library directly */
-#undef TARGET_EILSEQ
-#define TARGET_EILSEQ          88      /* Illegal byte sequence */
-#undef TARGET_ENOSYS
-#define TARGET_ENOSYS          89      /* Function not implemented */
-#undef TARGET_ELOOP
-#define TARGET_ELOOP           90      /* Too many symbolic links encountered */
-#undef TARGET_ERESTART
-#define TARGET_ERESTART        91      /* Interrupted system call should be restarted */
-#undef TARGET_ESTRPIPE
-#define TARGET_ESTRPIPE        92      /* Streams pipe error */
-#undef TARGET_ENOTEMPTY
-#define TARGET_ENOTEMPTY       93      /* Directory not empty */
-#undef TARGET_EUSERS
-#define TARGET_EUSERS          94      /* Too many users */
-#undef TARGET_ENOTSOCK
-#define TARGET_ENOTSOCK        95      /* Socket operation on non-socket */
-#undef TARGET_EDESTADDRREQ
-#define TARGET_EDESTADDRREQ    96      /* Destination address required */
-#undef TARGET_EMSGSIZE
-#define TARGET_EMSGSIZE        97      /* Message too long */
-#undef TARGET_EPROTOTYPE
-#define TARGET_EPROTOTYPE      98      /* Protocol wrong type for socket */
-#undef TARGET_ENOPROTOOPT
-#define TARGET_ENOPROTOOPT     99      /* Protocol not available */
-#undef TARGET_EPROTONOSUPPORT
-#define TARGET_EPROTONOSUPPORT 120     /* Protocol not supported */
-#undef TARGET_ESOCKTNOSUPPORT
-#define TARGET_ESOCKTNOSUPPORT 121     /* Socket type not supported */
-#undef TARGET_EOPNOTSUPP
-#define TARGET_EOPNOTSUPP      122     /* Operation not supported on transport endpoint */
-#undef TARGET_EPFNOSUPPORT
-#define TARGET_EPFNOSUPPORT    123     /* Protocol family not supported */
-#undef TARGET_EAFNOSUPPORT
-#define TARGET_EAFNOSUPPORT    124     /* Address family not supported by protocol */
-#undef TARGET_EADDRINUSE
-#define TARGET_EADDRINUSE      125     /* Address already in use */
-#undef TARGET_EADDRNOTAVAIL
-#define TARGET_EADDRNOTAVAIL   126     /* Cannot assign requested address */
-#undef TARGET_ENETDOWN
-#define TARGET_ENETDOWN        127     /* Network is down */
-#undef TARGET_ENETUNREACH
-#define TARGET_ENETUNREACH     128     /* Network is unreachable */
-#undef TARGET_ENETRESET
-#define TARGET_ENETRESET       129     /* Network dropped connection because of reset */
-#undef TARGET_ECONNABORTED
-#define TARGET_ECONNABORTED    130     /* Software caused connection abort */
-#undef TARGET_ECONNRESET
-#define TARGET_ECONNRESET      131     /* Connection reset by peer */
-#undef TARGET_ENOBUFS
-#define TARGET_ENOBUFS         132     /* No buffer space available */
-#undef TARGET_EISCONN
-#define TARGET_EISCONN         133     /* Transport endpoint is already connected */
-#undef TARGET_ENOTCONN
-#define TARGET_ENOTCONN        134     /* Transport endpoint is not connected */
-#undef TARGET_EUCLEAN
-#define TARGET_EUCLEAN         135     /* Structure needs cleaning */
-#undef TARGET_ENOTNAM
-#define TARGET_ENOTNAM         137     /* Not a XENIX named type file */
-#undef TARGET_ENAVAIL
-#define TARGET_ENAVAIL         138     /* No XENIX semaphores available */
-#undef TARGET_EISNAM
-#define TARGET_EISNAM          139     /* Is a named type file */
-#undef TARGET_EREMOTEIO
-#define TARGET_EREMOTEIO       140     /* Remote I/O error */
-#undef TARGET_EINIT
-#define TARGET_EINIT           141     /* Reserved */
-#undef TARGET_EREMDEV
-#define TARGET_EREMDEV         142     /* TARGET_Error 142 */
-#undef TARGET_ESHUTDOWN
-#define TARGET_ESHUTDOWN       143     /* Cannot send after transport endpoint shutdown */
-#undef TARGET_ETOOMANYREFS
-#define TARGET_ETOOMANYREFS    144     /* Too many references: cannot splice */
-#undef TARGET_ETIMEDOUT
-#define TARGET_ETIMEDOUT       145     /* Connection timed out */
-#undef TARGET_ECONNREFUSED
-#define TARGET_ECONNREFUSED    146     /* Connection refused */
-#undef TARGET_EHOSTDOWN
-#define TARGET_EHOSTDOWN       147     /* Host is down */
-#undef TARGET_EHOSTUNREACH
-#define TARGET_EHOSTUNREACH    148     /* No route to host */
-#undef TARGET_EALREADY
-#define TARGET_EALREADY        149     /* Operation already in progress */
-#undef TARGET_EINPROGRESS
-#define TARGET_EINPROGRESS     150     /* Operation now in progress */
-#undef TARGET_ESTALE
-#define TARGET_ESTALE          151     /* Stale NFS file handle */
-#undef TARGET_ECANCELED
-#define TARGET_ECANCELED       158     /* AIO operation canceled */
-/*
- * These error are Linux extensions.
- */
-#undef TARGET_ENOMEDIUM
-#define TARGET_ENOMEDIUM       159     /* No medium found */
-#undef TARGET_EMEDIUMTYPE
-#define TARGET_EMEDIUMTYPE     160     /* Wrong medium type */
-#undef TARGET_ENOKEY
-#define TARGET_ENOKEY          161     /* Required key not available */
-#undef TARGET_EKEYEXPIRED
-#define TARGET_EKEYEXPIRED     162     /* Key has expired */
-#undef TARGET_EKEYREVOKED
-#define TARGET_EKEYREVOKED     163     /* Key has been revoked */
-#undef TARGET_EKEYREJECTED
-#define TARGET_EKEYREJECTED    164     /* Key was rejected by service */
-
-/* for robust mutexes */
-#undef TARGET_EOWNERDEAD
-#define TARGET_EOWNERDEAD      165     /* Owner died */
-#undef TARGET_ENOTRECOVERABLE
-#define TARGET_ENOTRECOVERABLE 166     /* State not recoverable */
-
-#undef TARGET_ERFKILL
-#define TARGET_ERFKILL         167
-#undef TARGET_EHWPOISON
-#define TARGET_EHWPOISON       168
-
-#undef TARGET_EDQUOT
-#define TARGET_EDQUOT          1133    /* Quota exceeded */
-
 #define UNAME_MACHINE "mips"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
diff --git a/linux-user/mips64/syscall_n32.tbl b/linux-user/mips64/syscall_n32.tbl
index f9df9edb67a..9220909526f 100644
--- a/linux-user/mips64/syscall_n32.tbl
+++ b/linux-user/mips64/syscall_n32.tbl
@@ -25,8 +25,8 @@
 15	n32	ioctl				compat_sys_ioctl
 16	n32	pread64				sys_pread64
 17	n32	pwrite64			sys_pwrite64
-18	n32	readv				compat_sys_readv
-19	n32	writev				compat_sys_writev
+18	n32	readv				sys_readv
+19	n32	writev				sys_writev
 20	n32	access				sys_access
 21	n32	pipe				sysm_pipe
 22	n32	_newselect			compat_sys_select
@@ -167,7 +167,7 @@
 157	n32	sync				sys_sync
 158	n32	acct				sys_acct
 159	n32	settimeofday			compat_sys_settimeofday
-160	n32	mount				compat_sys_mount
+160	n32	mount				sys_mount
 161	n32	umount2				sys_umount
 162	n32	swapon				sys_swapon
 163	n32	swapoff				sys_swapoff
@@ -278,7 +278,7 @@
 267	n32	splice				sys_splice
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
-270	n32	vmsplice			compat_sys_vmsplice
+270	n32	vmsplice			sys_vmsplice
 271	n32	move_pages			compat_sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
@@ -317,8 +317,8 @@
 306	n32	syncfs				sys_syncfs
 307	n32	sendmmsg			compat_sys_sendmmsg
 308	n32	setns				sys_setns
-309	n32	process_vm_readv		compat_sys_process_vm_readv
-310	n32	process_vm_writev		compat_sys_process_vm_writev
+309	n32	process_vm_readv		sys_process_vm_readv
+310	n32	process_vm_writev		sys_process_vm_writev
 311	n32	kcmp				sys_kcmp
 312	n32	finit_module			sys_finit_module
 313	n32	sched_setattr			sys_sched_setattr
@@ -378,3 +378,10 @@
 437	n32	openat2				sys_openat2
 438	n32	pidfd_getfd			sys_pidfd_getfd
 439	n32	faccessat2			sys_faccessat2
+440	n32	process_madvise			sys_process_madvise
+441	n32	epoll_pwait2			compat_sys_epoll_pwait2
+442	n32	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
+445	n32	landlock_add_rule		sys_landlock_add_rule
+446	n32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/mips64/syscall_n64.tbl b/linux-user/mips64/syscall_n64.tbl
index 557f9954a2b..9cd1c34f31b 100644
--- a/linux-user/mips64/syscall_n64.tbl
+++ b/linux-user/mips64/syscall_n64.tbl
@@ -354,3 +354,10 @@
 437	n64	openat2				sys_openat2
 438	n64	pidfd_getfd			sys_pidfd_getfd
 439	n64	faccessat2			sys_faccessat2
+440	n64	process_madvise			sys_process_madvise
+441	n64	epoll_pwait2			sys_epoll_pwait2
+442	n64	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
+445	n64	landlock_add_rule		sys_landlock_add_rule
+446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/mips64/target_errno_defs.h b/linux-user/mips64/target_errno_defs.h
new file mode 100644
index 00000000000..fb7b4628a9b
--- /dev/null
+++ b/linux-user/mips64/target_errno_defs.h
@@ -0,0 +1,10 @@
+#ifndef MIPS64_TARGET_ERRNO_DEFS_H
+#define MIPS64_TARGET_ERRNO_DEFS_H
+
+/*
+ * The mips64 target uses errno definitions taken from asm-mips/errno.h
+ * so directly use the mips target errno definitions.
+ */
+#include "../mips/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/mips64/target_signal.h b/linux-user/mips64/target_signal.h
index d857c55e4c6..275e9b7f9a2 100644
--- a/linux-user/mips64/target_signal.h
+++ b/linux-user/mips64/target_signal.h
@@ -76,4 +76,6 @@ typedef struct target_sigaltstack {
 /* compare linux/arch/mips/kernel/signal.c:setup_frame() */
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* MIPS64_TARGET_SIGNAL_H */
diff --git a/linux-user/mips64/target_syscall.h b/linux-user/mips64/target_syscall.h
index 8594955eec2..cd1e1b49691 100644
--- a/linux-user/mips64/target_syscall.h
+++ b/linux-user/mips64/target_syscall.h
@@ -17,215 +17,6 @@ struct target_pt_regs {
         target_ulong cp0_epc;
 };
 
-/* Target errno definitions taken from asm-mips/errno.h */
-#undef TARGET_ENOMSG
-#define TARGET_ENOMSG          35      /* Identifier removed */
-#undef TARGET_EIDRM
-#define TARGET_EIDRM           36      /* Identifier removed */
-#undef TARGET_ECHRNG
-#define TARGET_ECHRNG          37      /* Channel number out of range */
-#undef TARGET_EL2NSYNC
-#define TARGET_EL2NSYNC        38      /* Level 2 not synchronized */
-#undef TARGET_EL3HLT
-#define TARGET_EL3HLT          39      /* Level 3 halted */
-#undef TARGET_EL3RST
-#define TARGET_EL3RST          40      /* Level 3 reset */
-#undef TARGET_ELNRNG
-#define TARGET_ELNRNG          41      /* Link number out of range */
-#undef TARGET_EUNATCH
-#define TARGET_EUNATCH         42      /* Protocol driver not attached */
-#undef TARGET_ENOCSI
-#define TARGET_ENOCSI          43      /* No CSI structure available */
-#undef TARGET_EL2HLT
-#define TARGET_EL2HLT          44      /* Level 2 halted */
-#undef TARGET_EDEADLK
-#define TARGET_EDEADLK         45      /* Resource deadlock would occur */
-#undef TARGET_ENOLCK
-#define TARGET_ENOLCK          46      /* No record locks available */
-#undef TARGET_EBADE
-#define TARGET_EBADE           50      /* Invalid exchange */
-#undef TARGET_EBADR
-#define TARGET_EBADR           51      /* Invalid request descriptor */
-#undef TARGET_EXFULL
-#define TARGET_EXFULL          52      /* TARGET_Exchange full */
-#undef TARGET_ENOANO
-#define TARGET_ENOANO          53      /* No anode */
-#undef TARGET_EBADRQC
-#define TARGET_EBADRQC         54      /* Invalid request code */
-#undef TARGET_EBADSLT
-#define TARGET_EBADSLT         55      /* Invalid slot */
-#undef TARGET_EDEADLOCK
-#define TARGET_EDEADLOCK       56      /* File locking deadlock error */
-#undef TARGET_EBFONT
-#define TARGET_EBFONT          59      /* Bad font file format */
-#undef TARGET_ENOSTR
-#define TARGET_ENOSTR          60      /* Device not a stream */
-#undef TARGET_ENODATA
-#define TARGET_ENODATA         61      /* No data available */
-#undef TARGET_ETIME
-#define TARGET_ETIME           62      /* Timer expired */
-#undef TARGET_ENOSR
-#define TARGET_ENOSR           63      /* Out of streams resources */
-#undef TARGET_ENONET
-#define TARGET_ENONET          64      /* Machine is not on the network */
-#undef TARGET_ENOPKG
-#define TARGET_ENOPKG          65      /* Package not installed */
-#undef TARGET_EREMOTE
-#define TARGET_EREMOTE         66      /* Object is remote */
-#undef TARGET_ENOLINK
-#define TARGET_ENOLINK         67      /* Link has been severed */
-#undef TARGET_EADV
-#define TARGET_EADV            68      /* Advertise error */
-#undef TARGET_ESRMNT
-#define TARGET_ESRMNT          69      /* Srmount error */
-#undef TARGET_ECOMM
-#define TARGET_ECOMM           70      /* Communication error on send */
-#undef TARGET_EPROTO
-#define TARGET_EPROTO          71      /* Protocol error */
-#undef TARGET_EDOTDOT
-#define TARGET_EDOTDOT         73      /* RFS specific error */
-#undef TARGET_EMULTIHOP
-#define TARGET_EMULTIHOP       74      /* Multihop attempted */
-#undef TARGET_EBADMSG
-#define TARGET_EBADMSG         77      /* Not a data message */
-#undef TARGET_ENAMETOOLONG
-#define TARGET_ENAMETOOLONG    78      /* File name too long */
-#undef TARGET_EOVERFLOW
-#define TARGET_EOVERFLOW       79      /* Value too large for defined data type */
-#undef TARGET_ENOTUNIQ
-#define TARGET_ENOTUNIQ        80      /* Name not unique on network */
-#undef TARGET_EBADFD
-#define TARGET_EBADFD          81      /* File descriptor in bad state */
-#undef TARGET_EREMCHG
-#define TARGET_EREMCHG         82      /* Remote address changed */
-#undef TARGET_ELIBACC
-#define TARGET_ELIBACC         83      /* Can not access a needed shared library */
-#undef TARGET_ELIBBAD
-#define TARGET_ELIBBAD         84      /* Accessing a corrupted shared library */
-#undef TARGET_ELIBSCN
-#define TARGET_ELIBSCN         85      /* .lib section in a.out corrupted */
-#undef TARGET_ELIBMAX
-#define TARGET_ELIBMAX         86      /* Attempting to link in too many shared libraries */
-#undef TARGET_ELIBEXEC
-#define TARGET_ELIBEXEC        87      /* Cannot exec a shared library directly */
-#undef TARGET_EILSEQ
-#define TARGET_EILSEQ          88      /* Illegal byte sequence */
-#undef TARGET_ENOSYS
-#define TARGET_ENOSYS          89      /* Function not implemented */
-#undef TARGET_ELOOP
-#define TARGET_ELOOP           90      /* Too many symbolic links encountered */
-#undef TARGET_ERESTART
-#define TARGET_ERESTART        91      /* Interrupted system call should be restarted */
-#undef TARGET_ESTRPIPE
-#define TARGET_ESTRPIPE        92      /* Streams pipe error */
-#undef TARGET_ENOTEMPTY
-#define TARGET_ENOTEMPTY       93      /* Directory not empty */
-#undef TARGET_EUSERS
-#define TARGET_EUSERS          94      /* Too many users */
-#undef TARGET_ENOTSOCK
-#define TARGET_ENOTSOCK        95      /* Socket operation on non-socket */
-#undef TARGET_EDESTADDRREQ
-#define TARGET_EDESTADDRREQ    96      /* Destination address required */
-#undef TARGET_EMSGSIZE
-#define TARGET_EMSGSIZE        97      /* Message too long */
-#undef TARGET_EPROTOTYPE
-#define TARGET_EPROTOTYPE      98      /* Protocol wrong type for socket */
-#undef TARGET_ENOPROTOOPT
-#define TARGET_ENOPROTOOPT     99      /* Protocol not available */
-#undef TARGET_EPROTONOSUPPORT
-#define TARGET_EPROTONOSUPPORT 120     /* Protocol not supported */
-#undef TARGET_ESOCKTNOSUPPORT
-#define TARGET_ESOCKTNOSUPPORT 121     /* Socket type not supported */
-#undef TARGET_EOPNOTSUPP
-#define TARGET_EOPNOTSUPP      122     /* Operation not supported on transport endpoint */
-#undef TARGET_EPFNOSUPPORT
-#define TARGET_EPFNOSUPPORT    123     /* Protocol family not supported */
-#undef TARGET_EAFNOSUPPORT
-#define TARGET_EAFNOSUPPORT    124     /* Address family not supported by protocol */
-#undef TARGET_EADDRINUSE
-#define TARGET_EADDRINUSE      125     /* Address already in use */
-#undef TARGET_EADDRNOTAVAIL
-#define TARGET_EADDRNOTAVAIL   126     /* Cannot assign requested address */
-#undef TARGET_ENETDOWN
-#define TARGET_ENETDOWN        127     /* Network is down */
-#undef TARGET_ENETUNREACH
-#define TARGET_ENETUNREACH     128     /* Network is unreachable */
-#undef TARGET_ENETRESET
-#define TARGET_ENETRESET       129     /* Network dropped connection because of reset */
-#undef TARGET_ECONNABORTED
-#define TARGET_ECONNABORTED    130     /* Software caused connection abort */
-#undef TARGET_ECONNRESET
-#define TARGET_ECONNRESET      131     /* Connection reset by peer */
-#undef TARGET_ENOBUFS
-#define TARGET_ENOBUFS         132     /* No buffer space available */
-#undef TARGET_EISCONN
-#define TARGET_EISCONN         133     /* Transport endpoint is already connected */
-#undef TARGET_ENOTCONN
-#define TARGET_ENOTCONN        134     /* Transport endpoint is not connected */
-#undef TARGET_EUCLEAN
-#define TARGET_EUCLEAN         135     /* Structure needs cleaning */
-#undef TARGET_ENOTNAM
-#define TARGET_ENOTNAM         137     /* Not a XENIX named type file */
-#undef TARGET_ENAVAIL
-#define TARGET_ENAVAIL         138     /* No XENIX semaphores available */
-#undef TARGET_EISNAM
-#define TARGET_EISNAM          139     /* Is a named type file */
-#undef TARGET_EREMOTEIO
-#define TARGET_EREMOTEIO       140     /* Remote I/O error */
-#undef TARGET_EINIT
-#define TARGET_EINIT           141     /* Reserved */
-#undef TARGET_EREMDEV
-#define TARGET_EREMDEV         142     /* TARGET_Error 142 */
-#undef TARGET_ESHUTDOWN
-#define TARGET_ESHUTDOWN       143     /* Cannot send after transport endpoint shutdown */
-#undef TARGET_ETOOMANYREFS
-#define TARGET_ETOOMANYREFS    144     /* Too many references: cannot splice */
-#undef TARGET_ETIMEDOUT
-#define TARGET_ETIMEDOUT       145     /* Connection timed out */
-#undef TARGET_ECONNREFUSED
-#define TARGET_ECONNREFUSED    146     /* Connection refused */
-#undef TARGET_EHOSTDOWN
-#define TARGET_EHOSTDOWN       147     /* Host is down */
-#undef TARGET_EHOSTUNREACH
-#define TARGET_EHOSTUNREACH    148     /* No route to host */
-#undef TARGET_EALREADY
-#define TARGET_EALREADY        149     /* Operation already in progress */
-#undef TARGET_EINPROGRESS
-#define TARGET_EINPROGRESS     150     /* Operation now in progress */
-#undef TARGET_ESTALE
-#define TARGET_ESTALE          151     /* Stale NFS file handle */
-#undef TARGET_ECANCELED
-#define TARGET_ECANCELED       158     /* AIO operation canceled */
-/*
- * These error are Linux extensions.
- */
-#undef TARGET_ENOMEDIUM
-#define TARGET_ENOMEDIUM       159     /* No medium found */
-#undef TARGET_EMEDIUMTYPE
-#define TARGET_EMEDIUMTYPE     160     /* Wrong medium type */
-#undef TARGET_ENOKEY
-#define TARGET_ENOKEY          161     /* Required key not available */
-#undef TARGET_EKEYEXPIRED
-#define TARGET_EKEYEXPIRED     162     /* Key has expired */
-#undef TARGET_EKEYREVOKED
-#define TARGET_EKEYREVOKED     163     /* Key has been revoked */
-#undef TARGET_EKEYREJECTED
-#define TARGET_EKEYREJECTED    164     /* Key was rejected by service */
-
-/* for robust mutexes */
-#undef TARGET_EOWNERDEAD
-#define TARGET_EOWNERDEAD      165     /* Owner died */
-#undef TARGET_ENOTRECOVERABLE
-#define TARGET_ENOTRECOVERABLE 166     /* State not recoverable */
-
-#undef TARGET_ERFKILL
-#define TARGET_ERFKILL         167
-#undef TARGET_EHWPOISON
-#define TARGET_EHWPOISON       168
-
-#undef TARGET_EDQUOT
-#define TARGET_EDQUOT          1133    /* Quota exceeded */
-
 #define UNAME_MACHINE "mips64"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 7e3b2450368..c125031b904 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -20,6 +20,8 @@
 #include "trace.h"
 #include "exec/log.h"
 #include "qemu.h"
+#include "user-internals.h"
+#include "user-mmap.h"
 
 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
@@ -451,6 +453,20 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
         goto fail;
     }
 
+    /*
+     * If we're mapping shared memory, ensure we generate code for parallel
+     * execution and flush old translations.  This will work up to the level
+     * supported by the host -- anything that requires EXCP_ATOMIC will not
+     * be atomic with respect to an external process.
+     */
+    if (flags & MAP_SHARED) {
+        CPUState *cpu = thread_cpu;
+        if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+            cpu->tcg_cflags |= CF_PARALLEL;
+            tb_flush(cpu);
+        }
+    }
+
     real_start = start & qemu_host_page_mask;
     host_offset = offset & qemu_host_page_mask;
 
diff --git a/linux-user/nios2/cpu_loop.c b/linux-user/nios2/cpu_loop.c
index 9869083fa19..34290fb3b51 100644
--- a/linux-user/nios2/cpu_loop.c
+++ b/linux-user/nios2/cpu_loop.c
@@ -19,7 +19,9 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUNios2State *env)
 {
diff --git a/linux-user/nios2/signal.c b/linux-user/nios2/signal.c
index 7d535065ed9..a77e8a40f46 100644
--- a/linux-user/nios2/signal.c
+++ b/linux-user/nios2/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -82,9 +83,7 @@ static int rt_restore_ucontext(CPUNios2State *env, struct target_ucontext *uc,
                                int *pr2)
 {
     int temp;
-    abi_ulong off, frame_addr = env->regs[R_SP];
     unsigned long *gregs = uc->tuc_mcontext.gregs;
-    int err;
 
     /* Always make any pending restarted system calls return -EINTR */
     /* current->restart_block.fn = do_no_restart_syscall; */
@@ -130,11 +129,7 @@ static int rt_restore_ucontext(CPUNios2State *env, struct target_ucontext *uc,
     __get_user(env->regs[R_RA], &gregs[23]);
     __get_user(env->regs[R_SP], &gregs[28]);
 
-    off = offsetof(struct target_rt_sigframe, uc.tuc_stack);
-    err = do_sigaltstack(frame_addr + off, 0, get_sp_from_cpustate(env));
-    if (err == -EFAULT) {
-        return 1;
-    }
+    target_restore_altstack(&uc->tuc_stack, env);
 
     *pr2 = env->regs[2];
     return 0;
diff --git a/linux-user/nios2/syscall_nr.h b/linux-user/nios2/syscall_nr.h
index e37f40179bf..11a37b32e8b 100644
--- a/linux-user/nios2/syscall_nr.h
+++ b/linux-user/nios2/syscall_nr.h
@@ -322,6 +322,12 @@
 #define TARGET_NR_openat2 437
 #define TARGET_NR_pidfd_getfd 438
 #define TARGET_NR_faccessat2 439
-#define TARGET_NR_syscalls 440
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_NIOS2_SYSCALL_NR_H */
diff --git a/linux-user/nios2/target_errno_defs.h b/linux-user/nios2/target_errno_defs.h
new file mode 100644
index 00000000000..28120013e24
--- /dev/null
+++ b/linux-user/nios2/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef NIOS2_TARGET_ERRNO_DEFS_H
+#define NIOS2_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/nios2/target_signal.h b/linux-user/nios2/target_signal.h
index aebf749f127..fe266c4c51d 100644
--- a/linux-user/nios2/target_signal.h
+++ b/linux-user/nios2/target_signal.h
@@ -19,4 +19,7 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+/* Nios2 uses a fixed address on the kuser page for sigreturn. */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+
 #endif /* NIOS2_TARGET_SIGNAL_H */
diff --git a/linux-user/openrisc/cpu_loop.c b/linux-user/openrisc/cpu_loop.c
index b33fa777187..3cfdbbf0372 100644
--- a/linux-user/openrisc/cpu_loop.c
+++ b/linux-user/openrisc/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUOpenRISCState *env)
 {
@@ -52,15 +54,6 @@ void cpu_loop(CPUOpenRISCState *env)
                 cpu_set_gpr(env, 11, ret);
             }
             break;
-        case EXCP_DPF:
-        case EXCP_IPF:
-        case EXCP_RANGE:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info.si_code = TARGET_SEGV_MAPERR;
-            info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_ALIGN:
             info.si_signo = TARGET_SIGBUS;
             info.si_errno = 0;
@@ -75,13 +68,6 @@ void cpu_loop(CPUOpenRISCState *env)
             info._sifields._sigfault._addr = env->pc;
             queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
-        case EXCP_FPE:
-            info.si_signo = TARGET_SIGFPE;
-            info.si_errno = 0;
-            info.si_code = 0;
-            info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_INTERRUPT:
             /* We processed the pending cpu work above.  */
             break;
@@ -94,6 +80,15 @@ void cpu_loop(CPUOpenRISCState *env)
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
             break;
+        case EXCP_RANGE:
+            /* Requires SR.OVE set, which linux-user won't do. */
+            cpu_abort(cs, "Unexpected RANGE exception");
+        case EXCP_FPE:
+            /*
+             * Requires FPSCR.FPEE set.  Writes to FPSCR from usermode not
+             * yet enabled in kernel ABI, so linux-user does not either.
+             */
+            cpu_abort(cs, "Unexpected FPE exception");
         default:
             g_assert_not_reached();
         }
diff --git a/linux-user/openrisc/signal.c b/linux-user/openrisc/signal.c
index 232ad82b98b..be8b68784a2 100644
--- a/linux-user/openrisc/signal.c
+++ b/linux-user/openrisc/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -37,7 +38,6 @@ typedef struct target_ucontext {
 typedef struct target_rt_sigframe {
     struct target_siginfo info;
     target_ucontext uc;
-    uint32_t retcode[4];  /* trampoline code */
 } target_rt_sigframe;
 
 static void restore_sigcontext(CPUOpenRISCState *env, target_sigcontext *sc)
@@ -115,14 +115,8 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
         __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
     }
 
-    /* This is l.ori r11,r0,__NR_sigreturn; l.sys 1; l.nop; l.nop */
-    __put_user(0xa9600000 | TARGET_NR_rt_sigreturn, frame->retcode + 0);
-    __put_user(0x20000001, frame->retcode + 1);
-    __put_user(0x15000000, frame->retcode + 2);
-    __put_user(0x15000000, frame->retcode + 3);
-
     /* Set up registers for signal handler */
-    cpu_set_gpr(env, 9, frame_addr + offsetof(target_rt_sigframe, retcode));
+    cpu_set_gpr(env, 9, default_rt_sigreturn);
     cpu_set_gpr(env, 3, sig);
     cpu_set_gpr(env, 4, frame_addr + offsetof(target_rt_sigframe, info));
     cpu_set_gpr(env, 5, frame_addr + offsetof(target_rt_sigframe, uc));
@@ -158,10 +152,7 @@ long do_rt_sigreturn(CPUOpenRISCState *env)
     set_sigmask(&set);
 
     restore_sigcontext(env, &frame->uc.tuc_mcontext);
-    if (do_sigaltstack(frame_addr + offsetof(target_rt_sigframe, uc.tuc_stack),
-                       0, frame_addr) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return cpu_get_gpr(env, 11);
@@ -171,3 +162,16 @@ long do_rt_sigreturn(CPUOpenRISCState *env)
     force_sig(TARGET_SIGSEGV);
     return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0);
+    assert(tramp != NULL);
+
+    /* This is l.ori r11,r0,__NR_sigreturn; l.sys 1 */
+    __put_user(0xa9600000 | TARGET_NR_rt_sigreturn, tramp + 0);
+    __put_user(0x20000001, tramp + 1);
+
+    default_rt_sigreturn = sigtramp_page;
+    unlock_user(tramp, sigtramp_page, 8);
+}
diff --git a/linux-user/openrisc/syscall_nr.h b/linux-user/openrisc/syscall_nr.h
index a8fc0295109..f7faddb54c5 100644
--- a/linux-user/openrisc/syscall_nr.h
+++ b/linux-user/openrisc/syscall_nr.h
@@ -323,6 +323,12 @@
 #define TARGET_NR_openat2 437
 #define TARGET_NR_pidfd_getfd 438
 #define TARGET_NR_faccessat2 439
-#define TARGET_NR_syscalls 440
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_OPENRISC_SYSCALL_NR_H */
diff --git a/linux-user/openrisc/target_errno_defs.h b/linux-user/openrisc/target_errno_defs.h
new file mode 100644
index 00000000000..cdf159746b1
--- /dev/null
+++ b/linux-user/openrisc/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef OR1K_TARGET_ERRNO_DEFS_H
+#define OR1K_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/openrisc/target_signal.h b/linux-user/openrisc/target_signal.h
index 8283eaf5441..077ec3d5e8d 100644
--- a/linux-user/openrisc/target_signal.h
+++ b/linux-user/openrisc/target_signal.h
@@ -26,4 +26,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* OPENRISC_TARGET_SIGNAL_H */
diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c
index df71e15a256..483e669300f 100644
--- a/linux-user/ppc/cpu_loop.c
+++ b/linux-user/ppc/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env)
 {
@@ -160,14 +162,6 @@ void cpu_loop(CPUPPCState *env)
             cpu_abort(cs, "External interrupt while in user mode. "
                       "Aborting\n");
             break;
-        case POWERPC_EXCP_ALIGN:    /* Alignment exception                   */
-            /* XXX: check this */
-            info.si_signo = TARGET_SIGBUS;
-            info.si_errno = 0;
-            info.si_code = TARGET_BUS_ADRALN;
-            info._sifields._sigfault._addr = env->nip;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case POWERPC_EXCP_PROGRAM:  /* Program exception                     */
         case POWERPC_EXCP_HV_EMU:   /* HV emulation                          */
             /* XXX: check this */
@@ -423,12 +417,6 @@ void cpu_loop(CPUPPCState *env)
             cpu_abort(cs, "Maintenance exception while in user mode. "
                       "Aborting\n");
             break;
-        case POWERPC_EXCP_STOP:     /* stop translation                      */
-            /* We did invalidate the instruction cache. Go on */
-            break;
-        case POWERPC_EXCP_BRANCH:   /* branch instruction:                   */
-            /* We just stopped because of a branch. Go on */
-            break;
         case POWERPC_EXCP_SYSCALL_USER:
             /* system call in user-mode emulation */
             /* WARNING:
@@ -492,11 +480,12 @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 #if defined(TARGET_PPC64)
     int flag = (env->insns_flags2 & PPC2_BOOKE206) ? MSR_CM : MSR_SF;
 #if defined(TARGET_ABI32)
-    env->msr &= ~((target_ulong)1 << flag);
+    ppc_store_msr(env, env->msr & ~((target_ulong)1 << flag));
 #else
-    env->msr |= (target_ulong)1 << flag;
+    ppc_store_msr(env, env->msr | (target_ulong)1 << flag);
 #endif
 #endif
+
     env->nip = regs->nip;
     for(i = 0; i < 32; i++) {
         env->gpr[i] = regs->gpr[i];
diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c
index b78613f7c86..90a0369632f 100644
--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -202,9 +203,6 @@ struct target_func_ptr {
 
 #endif
 
-/* We use the mc_pad field for the signal return trampoline.  */
-#define tramp mc_pad
-
 /* See arch/powerpc/kernel/signal.c.  */
 static target_ulong get_sigframe(struct target_sigaction *ka,
                                  CPUPPCState *env,
@@ -244,7 +242,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
     __put_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]);
     __put_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]);
     __put_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]);
-    __put_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
+    __put_user(cpu_read_xer(env), &frame->mc_gregs[TARGET_PT_XER]);
 
     for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
         ccr |= env->crf[i] << (32 - ((i + 1) * 4));
@@ -261,9 +259,6 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
             __put_user(avr->u64[PPC_VEC_HI], &vreg->u64[0]);
             __put_user(avr->u64[PPC_VEC_LO], &vreg->u64[1]);
         }
-        /* Set MSR_VR in the saved MSR value to indicate that
-           frame->mc_vregs contains valid data.  */
-        msr |= MSR_VR;
 #if defined(TARGET_PPC64)
         vrsave = (uint32_t *)&frame->mc_vregs.altivec[33];
         /* 64-bit needs to put a pointer to the vectors in the frame */
@@ -300,9 +295,6 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
         for (i = 0; i < ARRAY_SIZE(env->gprh); i++) {
             __put_user(env->gprh[i], &frame->mc_vregs.spe[i]);
         }
-        /* Set MSR_SPE in the saved MSR value to indicate that
-           frame->mc_vregs contains valid data.  */
-        msr |= MSR_SPE;
         __put_user(env->spe_fscr, &frame->mc_vregs.spe[32]);
     }
 #endif
@@ -314,10 +306,8 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 static void encode_trampoline(int sigret, uint32_t *tramp)
 {
     /* Set up the sigreturn trampoline: li r0,sigret; sc.  */
-    if (sigret) {
-        __put_user(0x38000000 | sigret, &tramp[0]);
-        __put_user(0x44000002, &tramp[1]);
-    }
+    __put_user(0x38000000 | sigret, &tramp[0]);
+    __put_user(0x44000002, &tramp[1]);
 }
 
 static void restore_user_regs(CPUPPCState *env,
@@ -325,6 +315,7 @@ static void restore_user_regs(CPUPPCState *env,
 {
     target_ulong save_r2 = 0;
     target_ulong msr;
+    target_ulong xer;
     target_ulong ccr;
 
     int i;
@@ -340,9 +331,11 @@ static void restore_user_regs(CPUPPCState *env,
     __get_user(env->nip, &frame->mc_gregs[TARGET_PT_NIP]);
     __get_user(env->ctr, &frame->mc_gregs[TARGET_PT_CTR]);
     __get_user(env->lr, &frame->mc_gregs[TARGET_PT_LNK]);
-    __get_user(env->xer, &frame->mc_gregs[TARGET_PT_XER]);
-    __get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
 
+    __get_user(xer, &frame->mc_gregs[TARGET_PT_XER]);
+    cpu_write_xer(env, xer);
+
+    __get_user(ccr, &frame->mc_gregs[TARGET_PT_CCR]);
     for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
         env->crf[i] = (ccr >> (32 - ((i + 1) * 4))) & 0xf;
     }
@@ -354,8 +347,10 @@ static void restore_user_regs(CPUPPCState *env,
     __get_user(msr, &frame->mc_gregs[TARGET_PT_MSR]);
 
     /* If doing signal return, restore the previous little-endian mode.  */
-    if (sig)
-        env->msr = (env->msr & ~(1ull << MSR_LE)) | (msr & (1ull << MSR_LE));
+    if (sig) {
+        ppc_store_msr(env, ((env->msr & ~(1ull << MSR_LE)) |
+                            (msr & (1ull << MSR_LE))));
+    }
 
     /* Restore Altivec registers if necessary.  */
     if (env->insns_flags & PPC_ALTIVEC) {
@@ -376,8 +371,6 @@ static void restore_user_regs(CPUPPCState *env,
             __get_user(avr->u64[PPC_VEC_HI], &vreg->u64[0]);
             __get_user(avr->u64[PPC_VEC_LO], &vreg->u64[1]);
         }
-        /* Set MSR_VEC in the saved MSR value to indicate that
-           frame->mc_vregs contains valid data.  */
 #if defined(TARGET_PPC64)
         vrsave = (uint32_t *)&v_regs[33];
 #else
@@ -443,12 +436,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
     /* Save user regs.  */
     save_user_regs(env, &frame->mctx);
 
-    /* Construct the trampoline code on the stack. */
-    encode_trampoline(TARGET_NR_sigreturn, (uint32_t *)&frame->mctx.tramp);
-
-    /* The kernel checks for the presence of a VDSO here.  We don't
-       emulate a vdso, so use a sigreturn system call.  */
-    env->lr = (target_ulong) h2g(frame->mctx.tramp);
+    env->lr = default_sigreturn;
 
     /* Turn off all fp exceptions.  */
     env->fpscr = 0;
@@ -468,7 +456,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
     env->nip = (target_ulong) ka->_sa_handler;
 
     /* Signal handlers are entered in big-endian mode.  */
-    env->msr &= ~(1ull << MSR_LE);
+    ppc_store_msr(env, env->msr & ~(1ull << MSR_LE));
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
@@ -484,7 +472,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_sigset_t *set, CPUPPCState *env)
 {
     struct target_rt_sigframe *rt_sf;
-    uint32_t *trampptr = 0;
     struct target_mcontext *mctx = 0;
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
@@ -514,22 +501,17 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 #if defined(TARGET_PPC64)
     mctx = &rt_sf->uc.tuc_sigcontext.mcontext;
-    trampptr = &rt_sf->trampoline[0];
 
     sc = &rt_sf->uc.tuc_sigcontext;
     __put_user(h2g(mctx), &sc->regs);
     __put_user(sig, &sc->signal);
 #else
     mctx = &rt_sf->uc.tuc_mcontext;
-    trampptr = (uint32_t *)&rt_sf->uc.tuc_mcontext.tramp;
 #endif
 
     save_user_regs(env, mctx);
-    encode_trampoline(TARGET_NR_rt_sigreturn, trampptr);
 
-    /* The kernel checks for the presence of a VDSO here.  We don't
-       emulate a vdso, so use a sigreturn system call.  */
-    env->lr = (target_ulong) h2g(trampptr);
+    env->lr = default_rt_sigreturn;
 
     /* Turn off all fp exceptions.  */
     env->fpscr = 0;
@@ -563,8 +545,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->nip = (target_ulong) ka->_sa_handler;
 #endif
 
+#ifdef TARGET_WORDS_BIGENDIAN
     /* Signal handlers are entered in big-endian mode.  */
-    env->msr &= ~(1ull << MSR_LE);
+    ppc_store_msr(env, env->msr & ~(1ull << MSR_LE));
+#else
+    /* Signal handlers are entered in little-endian mode.  */
+    ppc_store_msr(env, env->msr | (1ull << MSR_LE));
+#endif
 
     unlock_user_struct(rt_sf, rt_sf_addr, 1);
     return;
@@ -656,9 +643,7 @@ long do_rt_sigreturn(CPUPPCState *env)
     if (do_setcontext(&rt_sf->uc, env, 1))
         goto sigsegv;
 
-    do_sigaltstack(rt_sf_addr
-                   + offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                   0, env->gpr[1]);
+    target_restore_altstack(&rt_sf->uc.tuc_stack, env);
 
     unlock_user_struct(rt_sf, rt_sf_addr, 1);
     return -TARGET_QEMU_ESIGRETURN;
@@ -724,3 +709,19 @@ abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
 
     return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+    assert(tramp != NULL);
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+    default_sigreturn = sigtramp_page;
+    encode_trampoline(TARGET_NR_sigreturn, tramp + 0);
+#endif
+
+    default_rt_sigreturn = sigtramp_page + 8;
+    encode_trampoline(TARGET_NR_rt_sigreturn, tramp + 2);
+
+    unlock_user(tramp, sigtramp_page, 2 * 8);
+}
diff --git a/linux-user/ppc/syscall.tbl b/linux-user/ppc/syscall.tbl
index c2d737ff2e7..8f052ff4058 100644
--- a/linux-user/ppc/syscall.tbl
+++ b/linux-user/ppc/syscall.tbl
@@ -9,9 +9,7 @@
 #
 0	nospu	restart_syscall			sys_restart_syscall
 1	nospu	exit				sys_exit
-2	32	fork				ppc_fork			sys_fork
-2	64	fork				sys_fork
-2	spu	fork				sys_ni_syscall
+2	nospu	fork				sys_fork
 3	common	read				sys_read
 4	common	write				sys_write
 5	common	open				sys_open			compat_sys_open
@@ -34,7 +32,7 @@
 18	spu	oldstat				sys_ni_syscall
 19	common	lseek				sys_lseek			compat_sys_lseek
 20	common	getpid				sys_getpid
-21	nospu	mount				sys_mount			compat_sys_mount
+21	nospu	mount				sys_mount
 22	32	umount				sys_oldumount
 22	64	umount				sys_ni_syscall
 22	spu	umount				sys_ni_syscall
@@ -160,9 +158,7 @@
 119	32	sigreturn			sys_sigreturn			compat_sys_sigreturn
 119	64	sigreturn			sys_ni_syscall
 119	spu	sigreturn			sys_ni_syscall
-120	32	clone				ppc_clone			sys_clone
-120	64	clone				sys_clone
-120	spu	clone				sys_ni_syscall
+120	nospu	clone				sys_clone
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
@@ -193,8 +189,8 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
 149	nospu	_sysctl				sys_ni_syscall
@@ -244,9 +240,7 @@
 186	spu	sendfile			sys_sendfile64
 187	common	getpmsg				sys_ni_syscall
 188	common 	putpmsg				sys_ni_syscall
-189	32	vfork				ppc_vfork			sys_vfork
-189	64	vfork				sys_vfork
-189	spu	vfork				sys_ni_syscall
+189	nospu	vfork				sys_vfork
 190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
 191	common	readahead			sys_readahead			compat_sys_readahead
 192	32	mmap2				sys_mmap2			compat_sys_mmap2
@@ -322,9 +316,7 @@
 248	32	clock_nanosleep			sys_clock_nanosleep_time32
 248	64	clock_nanosleep			sys_clock_nanosleep
 248	spu	clock_nanosleep			sys_clock_nanosleep
-249	32	swapcontext			ppc_swapcontext			compat_sys_swapcontext
-249	64	swapcontext			sys_swapcontext
-249	spu	swapcontext			sys_ni_syscall
+249	nospu	swapcontext			sys_swapcontext			compat_sys_swapcontext
 250	common	tgkill				sys_tgkill
 251	32	utimes				sys_utimes_time32
 251	64	utimes				sys_utimes
@@ -369,7 +361,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
@@ -449,8 +441,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
@@ -522,10 +514,15 @@
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
-435	32	clone3				ppc_clone3			sys_clone3
-435	64	clone3				sys_clone3
-435	spu	clone3				sys_ni_syscall
+435	nospu	clone3				sys_clone3
 436	common	close_range			sys_close_range
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/ppc/target_errno_defs.h b/linux-user/ppc/target_errno_defs.h
new file mode 100644
index 00000000000..a24a973342f
--- /dev/null
+++ b/linux-user/ppc/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef PPC_TARGET_ERRNO_DEFS_H
+#define PPC_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/ppc/target_signal.h b/linux-user/ppc/target_signal.h
index 72fcdd9bfa2..82184ab8f2e 100644
--- a/linux-user/ppc/target_signal.h
+++ b/linux-user/ppc/target_signal.h
@@ -24,4 +24,6 @@ typedef struct target_sigaltstack {
 #if !defined(TARGET_PPC64)
 #define TARGET_ARCH_HAS_SETUP_FRAME
 #endif
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* PPC_TARGET_SIGNAL_H */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 74e06e7121c..5c713fa8ab2 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -1,26 +1,24 @@
 #ifndef QEMU_H
 #define QEMU_H
 
-#include "hostdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
 #undef DEBUG_REMAP
 
 #include "exec/user/abitypes.h"
 
-#include "exec/user/thunk.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
-#include "exec/gdbstub.h"
 
-/* This is the size of the host kernel's sigset_t, needed where we make
+/*
+ * This is the size of the host kernel's sigset_t, needed where we make
  * direct system calls that take a sigset_t pointer and a size.
  */
 #define SIGSET_T_SIZE (_NSIG / 8)
 
-/* This struct is used to hold certain information about the image.
+/*
+ * This struct is used to hold certain information about the image.
  * Basically, it replicates in user space what would be certain
  * task_struct fields in the kernel
  */
@@ -48,13 +46,13 @@ struct image_info {
         abi_ulong       env_strings;
         abi_ulong       file_string;
         uint32_t        elf_flags;
-        int		personality;
+        int             personality;
         abi_ulong       alignment;
 
         /* The fields below are used in FDPIC mode.  */
         abi_ulong       loadmap_addr;
         uint16_t        nsegs;
-        void           *loadsegs;
+        void            *loadsegs;
         abi_ulong       pt_dynamic_addr;
         abi_ulong       interpreter_loadmap_addr;
         abi_ulong       interpreter_pt_dynamic_addr;
@@ -98,8 +96,10 @@ struct emulated_sigtable {
     target_siginfo_t info;
 };
 
-/* NOTE: we force a big alignment so that the stack stored after is
-   aligned too */
+/*
+ * NOTE: we force a big alignment so that the stack stored after is
+ * aligned too
+ */
 typedef struct TaskState {
     pid_t ts_tid;     /* tid (or pid) of this task */
 #ifdef TARGET_ARM
@@ -134,20 +134,23 @@ typedef struct TaskState {
 
     struct emulated_sigtable sync_signal;
     struct emulated_sigtable sigtab[TARGET_NSIG];
-    /* This thread's signal mask, as requested by the guest program.
+    /*
+     * This thread's signal mask, as requested by the guest program.
      * The actual signal mask of this thread may differ:
      *  + we don't let SIGSEGV and SIGBUS be blocked while running guest code
      *  + sometimes we block all signals to avoid races
      */
     sigset_t signal_mask;
-    /* The signal mask imposed by a guest sigsuspend syscall, if we are
+    /*
+     * The signal mask imposed by a guest sigsuspend syscall, if we are
      * currently in the middle of such a syscall
      */
     sigset_t sigsuspend_mask;
     /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */
     int in_sigsuspend;
 
-    /* Nonzero if process_pending_signals() needs to do something (either
+    /*
+     * Nonzero if process_pending_signals() needs to do something (either
      * handle a pending signal or unblock signals).
      * This flag is written from a signal handler so should be accessed via
      * the qatomic_read() and qatomic_set() functions. (It is not accessed
@@ -159,330 +162,7 @@ typedef struct TaskState {
     struct target_sigaltstack sigaltstack_used;
 } __attribute__((aligned(16))) TaskState;
 
-extern char *exec_path;
-void init_task_state(TaskState *ts);
-void task_settid(TaskState *);
-void stop_all_tasks(void);
-extern const char *qemu_uname_release;
-extern unsigned long mmap_min_addr;
-
-/* ??? See if we can avoid exposing so much of the loader internals.  */
-
-/* Read a good amount of data initially, to hopefully get all the
-   program headers loaded.  */
-#define BPRM_BUF_SIZE  1024
-
-/*
- * This structure is used to hold the arguments that are
- * used when loading binaries.
- */
-struct linux_binprm {
-        char buf[BPRM_BUF_SIZE] __attribute__((aligned));
-        abi_ulong p;
-        int fd;
-        int e_uid, e_gid;
-        int argc, envc;
-        char **argv;
-        char **envp;
-        char * filename;        /* Name of binary */
-        int (*core_dump)(int, const CPUArchState *); /* coredump routine */
-};
-
-typedef struct IOCTLEntry IOCTLEntry;
-
-typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
-                             int fd, int cmd, abi_long arg);
-
-struct IOCTLEntry {
-    int target_cmd;
-    unsigned int host_cmd;
-    const char *name;
-    int access;
-    do_ioctl_fn *do_ioctl;
-    const argtype arg_type[5];
-};
-
-extern IOCTLEntry ioctl_entries[];
-
-#define IOC_R 0x0001
-#define IOC_W 0x0002
-#define IOC_RW (IOC_R | IOC_W)
-
-void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
-abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
-                              abi_ulong stringp, int push_ptr);
-int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
-             struct target_pt_regs * regs, struct image_info *infop,
-             struct linux_binprm *);
-
-/* Returns true if the image uses the FDPIC ABI. If this is the case,
- * we have to provide some information (loadmap, pt_dynamic_info) such
- * that the program can be relocated adequately. This is also useful
- * when handling signals.
- */
-int info_is_fdpic(struct image_info *info);
-
-uint32_t get_elf_eflags(int fd);
-int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
-int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
-
-abi_long memcpy_to_target(abi_ulong dest, const void *src,
-                          unsigned long len);
-void target_set_brk(abi_ulong new_brk);
 abi_long do_brk(abi_ulong new_brk);
-void syscall_init(void);
-abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
-                    abi_long arg2, abi_long arg3, abi_long arg4,
-                    abi_long arg5, abi_long arg6, abi_long arg7,
-                    abi_long arg8);
-extern __thread CPUState *thread_cpu;
-void cpu_loop(CPUArchState *env);
-const char *target_strerror(int err);
-int get_osversion(void);
-void init_qemu_uname_release(void);
-void fork_start(void);
-void fork_end(int child);
-
-/**
- * probe_guest_base:
- * @image_name: the executable being loaded
- * @loaddr: the lowest fixed address in the executable
- * @hiaddr: the highest fixed address in the executable
- *
- * Creates the initial guest address space in the host memory space.
- *
- * If @loaddr == 0, then no address in the executable is fixed,
- * i.e. it is fully relocatable.  In that case @hiaddr is the size
- * of the executable.
- *
- * This function will not return if a valid value for guest_base
- * cannot be chosen.  On return, the executable loader can expect
- *
- *    target_mmap(loaddr, hiaddr - loaddr, ...)
- *
- * to succeed.
- */
-void probe_guest_base(const char *image_name,
-                      abi_ulong loaddr, abi_ulong hiaddr);
-
-#include "qemu/log.h"
-
-/* safe_syscall.S */
-
-/**
- * safe_syscall:
- * @int number: number of system call to make
- * ...: arguments to the system call
- *
- * Call a system call if guest signal not pending.
- * This has the same API as the libc syscall() function, except that it
- * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
- *
- * Returns: the system call result, or -1 with an error code in errno
- * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
- * with any of the host errno values.)
- */
-
-/* A guide to using safe_syscall() to handle interactions between guest
- * syscalls and guest signals:
- *
- * Guest syscalls come in two flavours:
- *
- * (1) Non-interruptible syscalls
- *
- * These are guest syscalls that never get interrupted by signals and
- * so never return EINTR. They can be implemented straightforwardly in
- * QEMU: just make sure that if the implementation code has to make any
- * blocking calls that those calls are retried if they return EINTR.
- * It's also OK to implement these with safe_syscall, though it will be
- * a little less efficient if a signal is delivered at the 'wrong' moment.
- *
- * Some non-interruptible syscalls need to be handled using block_signals()
- * to block signals for the duration of the syscall. This mainly applies
- * to code which needs to modify the data structures used by the
- * host_signal_handler() function and the functions it calls, including
- * all syscalls which change the thread's signal mask.
- *
- * (2) Interruptible syscalls
- *
- * These are guest syscalls that can be interrupted by signals and
- * for which we need to either return EINTR or arrange for the guest
- * syscall to be restarted. This category includes both syscalls which
- * always restart (and in the kernel return -ERESTARTNOINTR), ones
- * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
- * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
- * if the handler was registered with SA_RESTART (kernel returns
- * -ERESTARTSYS). System calls which are only interruptible in some
- * situations (like 'open') also need to be handled this way.
- *
- * Here it is important that the host syscall is made
- * via this safe_syscall() function, and *not* via the host libc.
- * If the host libc is used then the implementation will appear to work
- * most of the time, but there will be a race condition where a
- * signal could arrive just before we make the host syscall inside libc,
- * and then then guest syscall will not correctly be interrupted.
- * Instead the implementation of the guest syscall can use the safe_syscall
- * function but otherwise just return the result or errno in the usual
- * way; the main loop code will take care of restarting the syscall
- * if appropriate.
- *
- * (If the implementation needs to make multiple host syscalls this is
- * OK; any which might really block must be via safe_syscall(); for those
- * which are only technically blocking (ie which we know in practice won't
- * stay in the host kernel indefinitely) it's OK to use libc if necessary.
- * You must be able to cope with backing out correctly if some safe_syscall
- * you make in the implementation returns either -TARGET_ERESTARTSYS or
- * EINTR though.)
- *
- * block_signals() cannot be used for interruptible syscalls.
- *
- *
- * How and why the safe_syscall implementation works:
- *
- * The basic setup is that we make the host syscall via a known
- * section of host native assembly. If a signal occurs, our signal
- * handler checks the interrupted host PC against the addresse of that
- * known section. If the PC is before or at the address of the syscall
- * instruction then we change the PC to point at a "return
- * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
- * (causing the safe_syscall() call to immediately return that value).
- * Then in the main.c loop if we see this magic return value we adjust
- * the guest PC to wind it back to before the system call, and invoke
- * the guest signal handler as usual.
- *
- * This winding-back will happen in two cases:
- * (1) signal came in just before we took the host syscall (a race);
- *   in this case we'll take the guest signal and have another go
- *   at the syscall afterwards, and this is indistinguishable for the
- *   guest from the timing having been different such that the guest
- *   signal really did win the race
- * (2) signal came in while the host syscall was blocking, and the
- *   host kernel decided the syscall should be restarted;
- *   in this case we want to restart the guest syscall also, and so
- *   rewinding is the right thing. (Note that "restart" semantics mean
- *   "first call the signal handler, then reattempt the syscall".)
- * The other situation to consider is when a signal came in while the
- * host syscall was blocking, and the host kernel decided that the syscall
- * should not be restarted; in this case QEMU's host signal handler will
- * be invoked with the PC pointing just after the syscall instruction,
- * with registers indicating an EINTR return; the special code in the
- * handler will not kick in, and we will return EINTR to the guest as
- * we should.
- *
- * Notice that we can leave the host kernel to make the decision for
- * us about whether to do a restart of the syscall or not; we do not
- * need to check SA_RESTART flags in QEMU or distinguish the various
- * kinds of restartability.
- */
-#ifdef HAVE_SAFE_SYSCALL
-/* The core part of this function is implemented in assembly */
-extern long safe_syscall_base(int *pending, long number, ...);
-
-#define safe_syscall(...)                                               \
-    ({                                                                  \
-        long ret_;                                                      \
-        int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
-        ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
-        if (is_error(ret_)) {                                           \
-            errno = -ret_;                                              \
-            ret_ = -1;                                                  \
-        }                                                               \
-        ret_;                                                           \
-    })
-
-#else
-
-/* Fallback for architectures which don't yet provide a safe-syscall assembly
- * fragment; note that this is racy!
- * This should go away when all host architectures have been updated.
- */
-#define safe_syscall syscall
-
-#endif
-
-/* syscall.c */
-int host_to_target_waitstatus(int status);
-
-/* strace.c */
-void print_syscall(void *cpu_env, int num,
-                   abi_long arg1, abi_long arg2, abi_long arg3,
-                   abi_long arg4, abi_long arg5, abi_long arg6);
-void print_syscall_ret(void *cpu_env, int num, abi_long ret,
-                       abi_long arg1, abi_long arg2, abi_long arg3,
-                       abi_long arg4, abi_long arg5, abi_long arg6);
-/**
- * print_taken_signal:
- * @target_signum: target signal being taken
- * @tinfo: target_siginfo_t which will be passed to the guest for the signal
- *
- * Print strace output indicating that this signal is being taken by the guest,
- * in a format similar to:
- * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
- */
-void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
-
-/* signal.c */
-void process_pending_signals(CPUArchState *cpu_env);
-void signal_init(void);
-int queue_signal(CPUArchState *env, int sig, int si_type,
-                 target_siginfo_t *info);
-void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
-void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
-int target_to_host_signal(int sig);
-int host_to_target_signal(int sig);
-long do_sigreturn(CPUArchState *env);
-long do_rt_sigreturn(CPUArchState *env);
-abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
-int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
-abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
-                        abi_ulong unew_ctx, abi_long ctx_size);
-/**
- * block_signals: block all signals while handling this guest syscall
- *
- * Block all signals, and arrange that the signal mask is returned to
- * its correct value for the guest before we resume execution of guest code.
- * If this function returns non-zero, then the caller should immediately
- * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
- * signal and restart execution of the syscall.
- * If block_signals() returns zero, then the caller can continue with
- * emulation of the system call knowing that no signals can be taken
- * (and therefore that no race conditions will result).
- * This should only be called once, because if it is called a second time
- * it will always return non-zero. (Think of it like a mutex that can't
- * be recursively locked.)
- * Signals will be unblocked again by process_pending_signals().
- *
- * Return value: non-zero if there was a pending signal, zero if not.
- */
-int block_signals(void); /* Returns non zero if signal pending */
-
-#ifdef TARGET_I386
-/* vm86.c */
-void save_v86_state(CPUX86State *env);
-void handle_vm86_trap(CPUX86State *env, int trapno);
-void handle_vm86_fault(CPUX86State *env);
-int do_vm86(CPUX86State *env, long subfunction, abi_ulong v86_addr);
-#elif defined(TARGET_SPARC64)
-void sparc64_set_context(CPUSPARCState *env);
-void sparc64_get_context(CPUSPARCState *env);
-#endif
-
-/* mmap.c */
-int target_mprotect(abi_ulong start, abi_ulong len, int prot);
-abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
-                     int flags, int fd, abi_ulong offset);
-int target_munmap(abi_ulong start, abi_ulong len);
-abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
-                       abi_ulong new_size, unsigned long flags,
-                       abi_ulong new_addr);
-extern unsigned long last_brk;
-extern abi_ulong mmap_next_start;
-abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
-void mmap_fork_start(void);
-void mmap_fork_end(int child);
-
-/* main.c */
-extern unsigned long guest_stack_size;
 
 /* user access */
 
@@ -666,80 +346,4 @@ void *lock_user_string(abi_ulong guest_addr);
 #define unlock_user_struct(host_ptr, guest_addr, copy)		\
     unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
-#include <pthread.h>
-
-static inline int is_error(abi_long ret)
-{
-    return (abi_ulong)ret >= (abi_ulong)(-4096);
-}
-
-#if TARGET_ABI_BITS == 32
-static inline uint64_t target_offset64(uint32_t word0, uint32_t word1)
-{
-#ifdef TARGET_WORDS_BIGENDIAN
-    return ((uint64_t)word0 << 32) | word1;
-#else
-    return ((uint64_t)word1 << 32) | word0;
-#endif
-}
-#else /* TARGET_ABI_BITS == 32 */
-static inline uint64_t target_offset64(uint64_t word0, uint64_t word1)
-{
-    return word0;
-}
-#endif /* TARGET_ABI_BITS != 32 */
-
-void print_termios(void *arg);
-
-/* ARM EABI and MIPS expect 64bit types aligned even on pairs or registers */
-#ifdef TARGET_ARM
-static inline int regpairs_aligned(void *cpu_env, int num)
-{
-    return ((((CPUARMState *)cpu_env)->eabi) == 1) ;
-}
-#elif defined(TARGET_MIPS) && (TARGET_ABI_BITS == 32)
-static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
-#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
-/*
- * SysV AVI for PPC32 expects 64bit parameters to be passed on odd/even pairs
- * of registers which translates to the same as ARM/MIPS, because we start with
- * r3 as arg1
- */
-static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
-#elif defined(TARGET_SH4)
-/* SH4 doesn't align register pairs, except for p{read,write}64 */
-static inline int regpairs_aligned(void *cpu_env, int num)
-{
-    switch (num) {
-    case TARGET_NR_pread64:
-    case TARGET_NR_pwrite64:
-        return 1;
-
-    default:
-        return 0;
-    }
-}
-#elif defined(TARGET_XTENSA)
-static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
-#elif defined(TARGET_HEXAGON)
-static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
-#else
-static inline int regpairs_aligned(void *cpu_env, int num) { return 0; }
-#endif
-
-/**
- * preexit_cleanup: housekeeping before the guest exits
- *
- * env: the CPU state
- * code: the exit code
- */
-void preexit_cleanup(CPUArchState *env, int code);
-
-/* Include target-specific struct and function definitions;
- * they may need access to the target-independent structures
- * above, so include them last.
- */
-#include "target_cpu.h"
-#include "target_structs.h"
-
 #endif /* QEMU_H */
diff --git a/linux-user/riscv/cpu_loop.c b/linux-user/riscv/cpu_loop.c
index 74a9628dc9b..b301dac802e 100644
--- a/linux-user/riscv/cpu_loop.c
+++ b/linux-user/riscv/cpu_loop.c
@@ -21,7 +21,9 @@
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 #include "elf.h"
 #include "semihosting/common-semi.h"
 
@@ -85,13 +87,6 @@ void cpu_loop(CPURISCVState *env)
             sigcode = TARGET_TRAP_BRKPT;
             sigaddr = env->pc;
             break;
-        case RISCV_EXCP_INST_PAGE_FAULT:
-        case RISCV_EXCP_LOAD_PAGE_FAULT:
-        case RISCV_EXCP_STORE_PAGE_FAULT:
-            signum = TARGET_SIGSEGV;
-            sigcode = TARGET_SEGV_MAPERR;
-            sigaddr = env->badaddr;
-            break;
         case RISCV_EXCP_SEMIHOST:
             env->gpr[xA0] = do_common_semihosting(cs);
             env->pc += 4;
@@ -131,7 +126,7 @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
     env->gpr[xSP] = regs->sp;
     env->elf_flags = info->elf_flags;
 
-    if ((env->misa & RVE) && !(env->elf_flags & EF_RISCV_RVE)) {
+    if ((env->misa_ext & RVE) && !(env->elf_flags & EF_RISCV_RVE)) {
         error_report("Incompatible ELF: RVE cpu requires RVE ABI binary");
         exit(EXIT_FAILURE);
     }
diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c
index 67a95dbc7b9..a0f9542ce39 100644
--- a/linux-user/riscv/signal.c
+++ b/linux-user/riscv/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -46,7 +47,6 @@ struct target_ucontext {
 };
 
 struct target_rt_sigframe {
-    uint32_t tramp[2]; /* not in kernel, which uses VDSO instead */
     struct target_siginfo info;
     struct target_ucontext uc;
 };
@@ -104,12 +104,6 @@ static void setup_ucontext(struct target_ucontext *uc,
     setup_sigcontext(&uc->uc_mcontext, env);
 }
 
-static inline void install_sigtramp(uint32_t *tramp)
-{
-    __put_user(0x08b00893, tramp + 0);  /* li a7, 139 = __NR_rt_sigreturn */
-    __put_user(0x00000073, tramp + 1);  /* ecall */
-}
-
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPURISCVState *env)
@@ -126,14 +120,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     setup_ucontext(&frame->uc, env, set);
     tswap_siginfo(&frame->info, info);
-    install_sigtramp(frame->tramp);
 
     env->pc = ka->_sa_handler;
     env->gpr[xSP] = frame_addr;
     env->gpr[xA0] = sig;
     env->gpr[xA1] = frame_addr + offsetof(struct target_rt_sigframe, info);
     env->gpr[xA2] = frame_addr + offsetof(struct target_rt_sigframe, uc);
-    env->gpr[xRA] = frame_addr + offsetof(struct target_rt_sigframe, tramp);
+    env->gpr[xRA] = default_rt_sigreturn;
 
     return;
 
@@ -192,11 +185,7 @@ long do_rt_sigreturn(CPURISCVState *env)
     }
 
     restore_ucontext(env, &frame->uc);
-
-    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
-            uc.uc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.uc_stack, env);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -206,3 +195,15 @@ long do_rt_sigreturn(CPURISCVState *env)
     force_sig(TARGET_SIGSEGV);
     return 0;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 8, 0);
+    assert(tramp != NULL);
+
+    __put_user(0x08b00893, tramp + 0);  /* li a7, 139 = __NR_rt_sigreturn */
+    __put_user(0x00000073, tramp + 1);  /* ecall */
+
+    default_rt_sigreturn = sigtramp_page;
+    unlock_user(tramp, sigtramp_page, 8);
+}
diff --git a/linux-user/riscv/syscall32_nr.h b/linux-user/riscv/syscall32_nr.h
index 079b804daef..1327d7dffab 100644
--- a/linux-user/riscv/syscall32_nr.h
+++ b/linux-user/riscv/syscall32_nr.h
@@ -296,6 +296,12 @@
 #define TARGET_NR_openat2 437
 #define TARGET_NR_pidfd_getfd 438
 #define TARGET_NR_faccessat2 439
-#define TARGET_NR_syscalls 440
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_RISCV_SYSCALL32_NR_H */
diff --git a/linux-user/riscv/syscall64_nr.h b/linux-user/riscv/syscall64_nr.h
index d54224ccec6..6659751933d 100644
--- a/linux-user/riscv/syscall64_nr.h
+++ b/linux-user/riscv/syscall64_nr.h
@@ -302,6 +302,12 @@
 #define TARGET_NR_openat2 437
 #define TARGET_NR_pidfd_getfd 438
 #define TARGET_NR_faccessat2 439
-#define TARGET_NR_syscalls 440
+#define TARGET_NR_process_madvise 440
+#define TARGET_NR_epoll_pwait2 441
+#define TARGET_NR_mount_setattr 442
+#define TARGET_NR_landlock_create_ruleset 444
+#define TARGET_NR_landlock_add_rule 445
+#define TARGET_NR_landlock_restrict_self 446
+#define TARGET_NR_syscalls 447
 
 #endif /* LINUX_USER_RISCV_SYSCALL64_NR_H */
diff --git a/linux-user/riscv/target_errno_defs.h b/linux-user/riscv/target_errno_defs.h
new file mode 100644
index 00000000000..5e377a2fce8
--- /dev/null
+++ b/linux-user/riscv/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef RISCV_TARGET_ERRNO_DEFS_H
+#define RISCV_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/riscv/target_signal.h b/linux-user/riscv/target_signal.h
index f113ba9a55f..3e36fddc9db 100644
--- a/linux-user/riscv/target_signal.h
+++ b/linux-user/riscv/target_signal.h
@@ -15,4 +15,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* RISCV_TARGET_SIGNAL_H */
diff --git a/linux-user/s390x/cpu_loop.c b/linux-user/s390x/cpu_loop.c
index f2d1215fb1b..d089c8417e7 100644
--- a/linux-user/s390x/cpu_loop.c
+++ b/linux-user/s390x/cpu_loop.c
@@ -20,10 +20,39 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
-/* s390x masks the fault address it reports in si_addr for SIGSEGV and SIGBUS */
-#define S390X_FAIL_ADDR_MASK -4096LL
+
+static int get_pgm_data_si_code(int dxc_code)
+{
+    switch (dxc_code) {
+    /* Non-simulated IEEE exceptions */
+    case 0x80:
+        return TARGET_FPE_FLTINV;
+    case 0x40:
+        return TARGET_FPE_FLTDIV;
+    case 0x20:
+    case 0x28:
+    case 0x2c:
+        return TARGET_FPE_FLTOVF;
+    case 0x10:
+    case 0x18:
+    case 0x1c:
+        return TARGET_FPE_FLTUND;
+    case 0x08:
+    case 0x0c:
+        return TARGET_FPE_FLTRES;
+    }
+    /*
+     * Non-IEEE and simulated IEEE:
+     * Includes compare-and-trap, quantum exception, etc.
+     * Simulated IEEE are included here to match current
+     * s390x linux kernel.
+     */
+    return 0;
+}
 
 void cpu_loop(CPUS390XState *env)
 {
@@ -64,7 +93,13 @@ void cpu_loop(CPUS390XState *env)
         case EXCP_DEBUG:
             sig = TARGET_SIGTRAP;
             n = TARGET_TRAP_BRKPT;
-            goto do_signal_pc;
+            /*
+             * For SIGTRAP the PSW must point after the instruction, which it
+             * already does thanks to s390x_tr_tb_stop(). si_addr doesn't need
+             * to be filled.
+             */
+            addr = 0;
+            goto do_signal;
         case EXCP_PGM:
             n = env->int_pgm_code;
             switch (n) {
@@ -74,12 +109,13 @@ void cpu_loop(CPUS390XState *env)
                 n = TARGET_ILL_ILLOPC;
                 goto do_signal_pc;
             case PGM_PROTECTION:
+                force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_ACCERR,
+                                env->__excp_addr);
+                break;
             case PGM_ADDRESSING:
-                sig = TARGET_SIGSEGV;
-                /* XXX: check env->error_code */
-                n = TARGET_SEGV_MAPERR;
-                addr = env->__excp_addr & S390X_FAIL_ADDR_MASK;
-                goto do_signal;
+                force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR,
+                                env->__excp_addr);
+                break;
             case PGM_EXECUTE:
             case PGM_SPECIFICATION:
             case PGM_SPECIAL_OP:
@@ -100,29 +136,14 @@ void cpu_loop(CPUS390XState *env)
 
             case PGM_DATA:
                 n = (env->fpc >> 8) & 0xff;
-                if (n == 0xff) {
-                    /* compare-and-trap */
+                if (n == 0) {
                     goto do_sigill_opn;
-                } else {
-                    /* An IEEE exception, simulated or otherwise.  */
-                    if (n & 0x80) {
-                        n = TARGET_FPE_FLTINV;
-                    } else if (n & 0x40) {
-                        n = TARGET_FPE_FLTDIV;
-                    } else if (n & 0x20) {
-                        n = TARGET_FPE_FLTOVF;
-                    } else if (n & 0x10) {
-                        n = TARGET_FPE_FLTUND;
-                    } else if (n & 0x08) {
-                        n = TARGET_FPE_FLTRES;
-                    } else {
-                        /* ??? Quantum exception; BFP, DFP error.  */
-                        goto do_sigill_opn;
-                    }
-                    sig = TARGET_SIGFPE;
-                    goto do_signal_pc;
                 }
 
+                sig = TARGET_SIGFPE;
+                n = get_pgm_data_si_code(n);
+                goto do_signal_pc;
+
             default:
                 fprintf(stderr, "Unhandled program exception: %#x\n", n);
                 cpu_dump_state(cs, stderr, 0);
@@ -132,6 +153,10 @@ void cpu_loop(CPUS390XState *env)
 
         do_signal_pc:
             addr = env->psw.addr;
+            /*
+             * For SIGILL and SIGFPE the PSW must point after the instruction.
+             */
+            env->psw.addr += env->int_pgm_ilen;
         do_signal:
             info.si_signo = sig;
             info.si_errno = 0;
diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c
index 7107c5fb533..676b9481476 100644
--- a/linux-user/s390x/signal.c
+++ b/linux-user/s390x/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -25,25 +26,24 @@
 #define __NUM_FPRS 16
 #define __NUM_ACRS 16
 
-#define S390_SYSCALL_SIZE   2
 #define __SIGNAL_FRAMESIZE      160 /* FIXME: 31-bit mode -> 96 */
 
 #define _SIGCONTEXT_NSIG        64
 #define _SIGCONTEXT_NSIG_BPW    64 /* FIXME: 31-bit mode -> 32 */
 #define _SIGCONTEXT_NSIG_WORDS  (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
 #define _SIGMASK_COPY_SIZE    (sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
-#define PSW_ADDR_AMODE            0x0000000000000000UL /* 0x80000000UL for 31-bit */
 #define S390_SYSCALL_OPCODE ((uint16_t)0x0a00)
 
 typedef struct {
     target_psw_t psw;
-    target_ulong gprs[__NUM_GPRS];
-    unsigned int acrs[__NUM_ACRS];
+    abi_ulong gprs[__NUM_GPRS];
+    abi_uint acrs[__NUM_ACRS];
 } target_s390_regs_common;
 
 typedef struct {
-    unsigned int fpc;
-    double   fprs[__NUM_FPRS];
+    uint32_t fpc;
+    uint32_t pad;
+    uint64_t fprs[__NUM_FPRS];
 } target_s390_fp_regs;
 
 typedef struct {
@@ -51,30 +51,39 @@ typedef struct {
     target_s390_fp_regs     fpregs;
 } target_sigregs;
 
-struct target_sigcontext {
-    target_ulong   oldmask[_SIGCONTEXT_NSIG_WORDS];
-    target_sigregs *sregs;
-};
+typedef struct {
+    uint64_t vxrs_low[16];
+    uint64_t vxrs_high[16][2];
+    uint8_t reserved[128];
+} target_sigregs_ext;
+
+typedef struct {
+    abi_ulong oldmask[_SIGCONTEXT_NSIG_WORDS];
+    abi_ulong sregs;
+} target_sigcontext;
 
 typedef struct {
     uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
-    struct target_sigcontext sc;
+    target_sigcontext sc;
     target_sigregs sregs;
     int signo;
-    uint8_t retcode[S390_SYSCALL_SIZE];
+    target_sigregs_ext sregs_ext;
 } sigframe;
 
+#define TARGET_UC_VXRS 2
+
 struct target_ucontext {
-    target_ulong tuc_flags;
-    struct target_ucontext *tuc_link;
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
     target_stack_t tuc_stack;
     target_sigregs tuc_mcontext;
-    target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+    target_sigset_t tuc_sigmask;
+    uint8_t reserved[128 - sizeof(target_sigset_t)];
+    target_sigregs_ext tuc_mcontext_ext;
 };
 
 typedef struct {
     uint8_t callee_used_stack[__SIGNAL_FRAMESIZE];
-    uint8_t retcode[S390_SYSCALL_SIZE];
     struct target_siginfo info;
     struct target_ucontext uc;
 } rt_sigframe;
@@ -102,154 +111,217 @@ get_sigframe(struct target_sigaction *ka, CPUS390XState *env, size_t frame_size)
     return (sp - frame_size) & -8ul;
 }
 
+#define PSW_USER_BITS   (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+                         PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+#define PSW_MASK_USER   (PSW_MASK_ASC | PSW_MASK_CC | PSW_MASK_PM | \
+                         PSW_MASK_64 | PSW_MASK_32)
+
 static void save_sigregs(CPUS390XState *env, target_sigregs *sregs)
 {
+    uint64_t psw_mask = s390_cpu_get_psw_mask(env);
     int i;
-    //save_access_regs(current->thread.acrs); FIXME
 
-    /* Copy a 'clean' PSW mask to the user to avoid leaking
-       information about whether PER is currently on.  */
-    __put_user(env->psw.mask, &sregs->regs.psw.mask);
+    /*
+     * Copy a 'clean' PSW mask to the user to avoid leaking
+     * information about whether PER is currently on.
+     * TODO: qemu does not support PSW_MASK_RI; it will never be set.
+     */
+    psw_mask = PSW_USER_BITS | (psw_mask & PSW_MASK_USER);
+    __put_user(psw_mask, &sregs->regs.psw.mask);
     __put_user(env->psw.addr, &sregs->regs.psw.addr);
+
     for (i = 0; i < 16; i++) {
         __put_user(env->regs[i], &sregs->regs.gprs[i]);
     }
     for (i = 0; i < 16; i++) {
         __put_user(env->aregs[i], &sregs->regs.acrs[i]);
     }
+
     /*
      * We have to store the fp registers to current->thread.fp_regs
      * to merge them with the emulated registers.
      */
-    //save_fp_regs(&current->thread.fp_regs); FIXME
     for (i = 0; i < 16; i++) {
         __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]);
     }
 }
 
+static void save_sigregs_ext(CPUS390XState *env, target_sigregs_ext *ext)
+{
+    int i;
+
+    /*
+     * if (MACHINE_HAS_VX) ...
+     * That said, we always allocate the stack storage and the
+     * space is always available in env.
+     */
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->vregs[i][1], &ext->vxrs_low[i]);
+    }
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->vregs[i + 16][0], &ext->vxrs_high[i][0]);
+        __put_user(env->vregs[i + 16][1], &ext->vxrs_high[i][1]);
+    }
+}
+
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUS390XState *env)
 {
     sigframe *frame;
     abi_ulong frame_addr;
+    abi_ulong restorer;
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     trace_user_setup_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto give_sigsegv;
+        force_sigsegv(sig);
+        return;
     }
 
+    /* Set up backchain. */
+    __put_user(env->regs[15], (abi_ulong *) frame);
+
+    /* Create struct sigcontext on the signal stack. */
+    /* Make sure that we're initializing all of oldmask. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(frame->sc.oldmask) != 1);
     __put_user(set->sig[0], &frame->sc.oldmask[0]);
+    __put_user(frame_addr + offsetof(sigframe, sregs), &frame->sc.sregs);
 
+    /* Create _sigregs on the signal stack */
     save_sigregs(env, &frame->sregs);
 
-    __put_user((abi_ulong)(unsigned long)&frame->sregs,
-               (abi_ulong *)&frame->sc.sregs);
+    /*
+     * ??? The kernel uses regs->gprs[2] here, which is not yet the signo.
+     * Moreover the comment talks about allowing backtrace, which is really
+     * done by the r15 copy above.
+     */
+    __put_user(sig, &frame->signo);
+
+    /* Create sigregs_ext on the signal stack. */
+    save_sigregs_ext(env, &frame->sregs_ext);
 
-    /* Set up to return from userspace.  If provided, use a stub
-       already in userspace.  */
+    /*
+     * Set up to return from userspace.
+     * If provided, use a stub already in userspace.
+     */
     if (ka->sa_flags & TARGET_SA_RESTORER) {
-        env->regs[14] = (unsigned long)
-                ka->sa_restorer | PSW_ADDR_AMODE;
+        restorer = ka->sa_restorer;
     } else {
-        env->regs[14] = (frame_addr + offsetof(sigframe, retcode))
-                        | PSW_ADDR_AMODE;
-        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn,
-                   (uint16_t *)(frame->retcode));
+        restorer = default_sigreturn;
     }
 
-    /* Set up backchain. */
-    __put_user(env->regs[15], (abi_ulong *) frame);
-
     /* Set up registers for signal handler */
+    env->regs[14] = restorer;
     env->regs[15] = frame_addr;
-    env->psw.addr = (target_ulong) ka->_sa_handler | PSW_ADDR_AMODE;
+    /* Force default amode and default user address space control. */
+    env->psw.mask = PSW_MASK_64 | PSW_MASK_32 | PSW_ASC_PRIMARY
+                  | (env->psw.mask & ~PSW_MASK_ASC);
+    env->psw.addr = ka->_sa_handler;
 
-    env->regs[2] = sig; //map_signal(sig);
-    env->regs[3] = frame_addr += offsetof(typeof(*frame), sc);
+    env->regs[2] = sig;
+    env->regs[3] = frame_addr + offsetof(typeof(*frame), sc);
 
-    /* We forgot to include these in the sigcontext.
-       To avoid breaking binary compatibility, they are passed as args. */
-    env->regs[4] = 0; // FIXME: no clue... current->thread.trap_no;
-    env->regs[5] = 0; // FIXME: no clue... current->thread.prot_addr;
+    /*
+     * We forgot to include these in the sigcontext.
+     * To avoid breaking binary compatibility, they are passed as args.
+     */
+    env->regs[4] = 0; /* FIXME: regs->int_code & 127 */
+    env->regs[5] = 0; /* FIXME: regs->int_parm_long */
+    env->regs[6] = 0; /* FIXME: current->thread.last_break */
 
-    /* Place signal number on stack to allow backtrace from handler.  */
-    __put_user(env->regs[2], &frame->signo);
     unlock_user_struct(frame, frame_addr, 1);
-    return;
-
-give_sigsegv:
-    force_sigsegv(sig);
 }
 
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUS390XState *env)
 {
-    int i;
     rt_sigframe *frame;
     abi_ulong frame_addr;
+    abi_ulong restorer;
+    abi_ulong uc_flags;
 
     frame_addr = get_sigframe(ka, env, sizeof *frame);
     trace_user_setup_rt_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        goto give_sigsegv;
+        force_sigsegv(sig);
+        return;
     }
 
-    tswap_siginfo(&frame->info, info);
-
-    /* Create the ucontext.  */
-    __put_user(0, &frame->uc.tuc_flags);
-    __put_user((abi_ulong)0, (abi_ulong *)&frame->uc.tuc_link);
-    target_save_altstack(&frame->uc.tuc_stack, env);
-    save_sigregs(env, &frame->uc.tuc_mcontext);
-    for (i = 0; i < TARGET_NSIG_WORDS; i++) {
-        __put_user((abi_ulong)set->sig[i],
-                   (abi_ulong *)&frame->uc.tuc_sigmask.sig[i]);
-    }
+    /* Set up backchain. */
+    __put_user(env->regs[15], (abi_ulong *) frame);
 
-    /* Set up to return from userspace.  If provided, use a stub
-       already in userspace.  */
+    /*
+     * Set up to return from userspace.
+     * If provided, use a stub already in userspace.
+     */
     if (ka->sa_flags & TARGET_SA_RESTORER) {
-        env->regs[14] = ka->sa_restorer | PSW_ADDR_AMODE;
+        restorer = ka->sa_restorer;
     } else {
-        env->regs[14] = (frame_addr + offsetof(typeof(*frame), retcode))
-                        | PSW_ADDR_AMODE;
-        __put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn,
-                   (uint16_t *)(frame->retcode));
+        restorer = default_rt_sigreturn;
     }
 
-    /* Set up backchain. */
-    __put_user(env->regs[15], (abi_ulong *) frame);
+    /* Create siginfo on the signal stack. */
+    tswap_siginfo(&frame->info, info);
+
+    /* Create ucontext on the signal stack. */
+    uc_flags = 0;
+    if (s390_has_feat(S390_FEAT_VECTOR)) {
+        uc_flags |= TARGET_UC_VXRS;
+    }
+    __put_user(uc_flags, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+    target_save_altstack(&frame->uc.tuc_stack, env);
+    save_sigregs(env, &frame->uc.tuc_mcontext);
+    save_sigregs_ext(env, &frame->uc.tuc_mcontext_ext);
+    tswap_sigset(&frame->uc.tuc_sigmask, set);
 
     /* Set up registers for signal handler */
+    env->regs[14] = restorer;
     env->regs[15] = frame_addr;
-    env->psw.addr = (target_ulong) ka->_sa_handler | PSW_ADDR_AMODE;
+    /* Force default amode and default user address space control. */
+    env->psw.mask = PSW_MASK_64 | PSW_MASK_32 | PSW_ASC_PRIMARY
+                  | (env->psw.mask & ~PSW_MASK_ASC);
+    env->psw.addr = ka->_sa_handler;
 
-    env->regs[2] = sig; //map_signal(sig);
+    env->regs[2] = sig;
     env->regs[3] = frame_addr + offsetof(typeof(*frame), info);
     env->regs[4] = frame_addr + offsetof(typeof(*frame), uc);
-    return;
-
-give_sigsegv:
-    force_sigsegv(sig);
+    env->regs[5] = 0; /* FIXME: current->thread.last_break */
 }
 
-static int
-restore_sigregs(CPUS390XState *env, target_sigregs *sc)
+static void restore_sigregs(CPUS390XState *env, target_sigregs *sc)
 {
-    int err = 0;
+    uint64_t prev_addr, prev_mask, mask, addr;
     int i;
 
     for (i = 0; i < 16; i++) {
         __get_user(env->regs[i], &sc->regs.gprs[i]);
     }
 
-    __get_user(env->psw.mask, &sc->regs.psw.mask);
-    trace_user_s390x_restore_sigregs(env, (unsigned long long)sc->regs.psw.addr,
-                                     (unsigned long long)env->psw.addr);
-    __get_user(env->psw.addr, &sc->regs.psw.addr);
-    /* FIXME: 31-bit -> | PSW_ADDR_AMODE */
+    prev_addr = env->psw.addr;
+    __get_user(mask, &sc->regs.psw.mask);
+    __get_user(addr, &sc->regs.psw.addr);
+    trace_user_s390x_restore_sigregs(env, addr, prev_addr);
+
+    /*
+     * Use current psw.mask to preserve PER bit.
+     * TODO:
+     *  if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+     *          return -EINVAL;
+     * Simply do not allow it to be set in mask.
+     */
+    prev_mask = s390_cpu_get_psw_mask(env);
+    mask = (prev_mask & ~PSW_MASK_USER) | (mask & PSW_MASK_USER);
+    /* Check for invalid user address space control. */
+    if ((mask & PSW_MASK_ASC) == PSW_ASC_HOME) {
+        mask = (mask & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
+    }
+    /* Check for invalid amode. */
+    if (mask & PSW_MASK_64) {
+        mask |= PSW_MASK_32;
+    }
+    s390_cpu_set_psw(env, mask, addr);
 
     for (i = 0; i < 16; i++) {
         __get_user(env->aregs[i], &sc->regs.acrs[i]);
@@ -257,8 +329,24 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc)
     for (i = 0; i < 16; i++) {
         __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]);
     }
+}
 
-    return err;
+static void restore_sigregs_ext(CPUS390XState *env, target_sigregs_ext *ext)
+{
+    int i;
+
+    /*
+     * if (MACHINE_HAS_VX) ...
+     * That said, we always allocate the stack storage and the
+     * space is always available in env.
+     */
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->vregs[i][1], &ext->vxrs_low[i]);
+    }
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->vregs[i + 16][0], &ext->vxrs_high[i][0]);
+        __get_user(env->vregs[i + 16][1], &ext->vxrs_high[i][1]);
+    }
 }
 
 long do_sigreturn(CPUS390XState *env)
@@ -270,23 +358,22 @@ long do_sigreturn(CPUS390XState *env)
 
     trace_user_do_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
+        force_sig(TARGET_SIGSEGV);
+        return -TARGET_QEMU_ESIGRETURN;
     }
+
+    /* Make sure that we're initializing all of target_set. */
+    QEMU_BUILD_BUG_ON(ARRAY_SIZE(target_set.sig) != 1);
     __get_user(target_set.sig[0], &frame->sc.oldmask[0]);
 
     target_to_host_sigset_internal(&set, &target_set);
     set_sigmask(&set); /* ~_BLOCKABLE? */
 
-    if (restore_sigregs(env, &frame->sregs)) {
-        goto badframe;
-    }
+    restore_sigregs(env, &frame->sregs);
+    restore_sigregs_ext(env, &frame->sregs_ext);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
-
-badframe:
-    force_sig(TARGET_SIGSEGV);
-    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUS390XState *env)
@@ -297,25 +384,32 @@ long do_rt_sigreturn(CPUS390XState *env)
 
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
-        goto badframe;
+        force_sig(TARGET_SIGSEGV);
+        return -TARGET_QEMU_ESIGRETURN;
     }
     target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
 
     set_sigmask(&set); /* ~_BLOCKABLE? */
 
-    if (restore_sigregs(env, &frame->uc.tuc_mcontext)) {
-        goto badframe;
-    }
+    restore_sigregs(env, &frame->uc.tuc_mcontext);
+    restore_sigregs_ext(env, &frame->uc.tuc_mcontext_ext);
 
-    if (do_sigaltstack(frame_addr + offsetof(rt_sigframe, uc.tuc_stack), 0,
-                       get_sp_from_cpustate(env)) == -EFAULT) {
-        goto badframe;
-    }
-    unlock_user_struct(frame, frame_addr, 0);
-    return -TARGET_QEMU_ESIGRETURN;
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
-badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 + 2, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    __put_user(S390_SYSCALL_OPCODE | TARGET_NR_sigreturn, &tramp[0]);
+
+    default_rt_sigreturn = sigtramp_page + 2;
+    __put_user(S390_SYSCALL_OPCODE | TARGET_NR_rt_sigreturn, &tramp[1]);
+
+    unlock_user(tramp, sigtramp_page, 2 + 2);
+}
diff --git a/linux-user/s390x/syscall.tbl b/linux-user/s390x/syscall.tbl
index 10456bc936f..0690263df1d 100644
--- a/linux-user/s390x/syscall.tbl
+++ b/linux-user/s390x/syscall.tbl
@@ -26,7 +26,7 @@
 16   32		lchown			-				sys_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount			compat_sys_mount
+21   common	mount			sys_mount			sys_mount
 22   common	umount			sys_oldumount			sys_oldumount
 23   32		setuid			-				sys_setuid16
 24   32		getuid			-				sys_getuid16
@@ -134,8 +134,8 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			sys_kcmp
 344  common	finit_module		sys_finit_module		sys_finit_module
@@ -442,3 +442,10 @@
 437  common	openat2			sys_openat2			sys_openat2
 438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
+441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
+442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
+# 443 reserved for quotactl_path
+444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
+445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
+446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/linux-user/s390x/target_errno_defs.h b/linux-user/s390x/target_errno_defs.h
new file mode 100644
index 00000000000..f4c09700b5e
--- /dev/null
+++ b/linux-user/s390x/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef S390X_TARGET_ERRNO_DEFS_H
+#define S390X_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/s390x/target_signal.h b/linux-user/s390x/target_signal.h
index bbfc464d441..64f5f422010 100644
--- a/linux-user/s390x/target_signal.h
+++ b/linux-user/s390x/target_signal.h
@@ -19,4 +19,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* S390X_TARGET_SIGNAL_H */
diff --git a/linux-user/safe-syscall.S b/linux-user/safe-syscall.S
index b5df6254aea..42ea7c40ba3 100644
--- a/linux-user/safe-syscall.S
+++ b/linux-user/safe-syscall.S
@@ -11,7 +11,7 @@
  */
 
 #include "hostdep.h"
-#include "errno_defs.h"
+#include "target_errno_defs.h"
 
 /* We have the correct host directory on our include path
  * so that this will pull in the right fragment for the architecture.
diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h
new file mode 100644
index 00000000000..aaa9ffc0e2b
--- /dev/null
+++ b/linux-user/safe-syscall.h
@@ -0,0 +1,157 @@
+/*
+ * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_SAFE_SYSCALL_H
+#define LINUX_USER_SAFE_SYSCALL_H
+
+/**
+ * safe_syscall:
+ * @int number: number of system call to make
+ * ...: arguments to the system call
+ *
+ * Call a system call if guest signal not pending.
+ * This has the same API as the libc syscall() function, except that it
+ * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
+ *
+ * Returns: the system call result, or -1 with an error code in errno
+ * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
+ * with any of the host errno values.)
+ */
+
+/*
+ * A guide to using safe_syscall() to handle interactions between guest
+ * syscalls and guest signals:
+ *
+ * Guest syscalls come in two flavours:
+ *
+ * (1) Non-interruptible syscalls
+ *
+ * These are guest syscalls that never get interrupted by signals and
+ * so never return EINTR. They can be implemented straightforwardly in
+ * QEMU: just make sure that if the implementation code has to make any
+ * blocking calls that those calls are retried if they return EINTR.
+ * It's also OK to implement these with safe_syscall, though it will be
+ * a little less efficient if a signal is delivered at the 'wrong' moment.
+ *
+ * Some non-interruptible syscalls need to be handled using block_signals()
+ * to block signals for the duration of the syscall. This mainly applies
+ * to code which needs to modify the data structures used by the
+ * host_signal_handler() function and the functions it calls, including
+ * all syscalls which change the thread's signal mask.
+ *
+ * (2) Interruptible syscalls
+ *
+ * These are guest syscalls that can be interrupted by signals and
+ * for which we need to either return EINTR or arrange for the guest
+ * syscall to be restarted. This category includes both syscalls which
+ * always restart (and in the kernel return -ERESTARTNOINTR), ones
+ * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
+ * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
+ * if the handler was registered with SA_RESTART (kernel returns
+ * -ERESTARTSYS). System calls which are only interruptible in some
+ * situations (like 'open') also need to be handled this way.
+ *
+ * Here it is important that the host syscall is made
+ * via this safe_syscall() function, and *not* via the host libc.
+ * If the host libc is used then the implementation will appear to work
+ * most of the time, but there will be a race condition where a
+ * signal could arrive just before we make the host syscall inside libc,
+ * and then then guest syscall will not correctly be interrupted.
+ * Instead the implementation of the guest syscall can use the safe_syscall
+ * function but otherwise just return the result or errno in the usual
+ * way; the main loop code will take care of restarting the syscall
+ * if appropriate.
+ *
+ * (If the implementation needs to make multiple host syscalls this is
+ * OK; any which might really block must be via safe_syscall(); for those
+ * which are only technically blocking (ie which we know in practice won't
+ * stay in the host kernel indefinitely) it's OK to use libc if necessary.
+ * You must be able to cope with backing out correctly if some safe_syscall
+ * you make in the implementation returns either -TARGET_ERESTARTSYS or
+ * EINTR though.)
+ *
+ * block_signals() cannot be used for interruptible syscalls.
+ *
+ *
+ * How and why the safe_syscall implementation works:
+ *
+ * The basic setup is that we make the host syscall via a known
+ * section of host native assembly. If a signal occurs, our signal
+ * handler checks the interrupted host PC against the addresse of that
+ * known section. If the PC is before or at the address of the syscall
+ * instruction then we change the PC to point at a "return
+ * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
+ * (causing the safe_syscall() call to immediately return that value).
+ * Then in the main.c loop if we see this magic return value we adjust
+ * the guest PC to wind it back to before the system call, and invoke
+ * the guest signal handler as usual.
+ *
+ * This winding-back will happen in two cases:
+ * (1) signal came in just before we took the host syscall (a race);
+ *   in this case we'll take the guest signal and have another go
+ *   at the syscall afterwards, and this is indistinguishable for the
+ *   guest from the timing having been different such that the guest
+ *   signal really did win the race
+ * (2) signal came in while the host syscall was blocking, and the
+ *   host kernel decided the syscall should be restarted;
+ *   in this case we want to restart the guest syscall also, and so
+ *   rewinding is the right thing. (Note that "restart" semantics mean
+ *   "first call the signal handler, then reattempt the syscall".)
+ * The other situation to consider is when a signal came in while the
+ * host syscall was blocking, and the host kernel decided that the syscall
+ * should not be restarted; in this case QEMU's host signal handler will
+ * be invoked with the PC pointing just after the syscall instruction,
+ * with registers indicating an EINTR return; the special code in the
+ * handler will not kick in, and we will return EINTR to the guest as
+ * we should.
+ *
+ * Notice that we can leave the host kernel to make the decision for
+ * us about whether to do a restart of the syscall or not; we do not
+ * need to check SA_RESTART flags in QEMU or distinguish the various
+ * kinds of restartability.
+ */
+#ifdef HAVE_SAFE_SYSCALL
+/* The core part of this function is implemented in assembly */
+extern long safe_syscall_base(int *pending, long number, ...);
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+#define safe_syscall(...)                                               \
+    ({                                                                  \
+        long ret_;                                                      \
+        int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
+        ret_ = safe_syscall_base(psp_, __VA_ARGS__);                    \
+        if (is_error(ret_)) {                                           \
+            errno = -ret_;                                              \
+            ret_ = -1;                                                  \
+        }                                                               \
+        ret_;                                                           \
+    })
+
+#else
+
+/*
+ * Fallback for architectures which don't yet provide a safe-syscall assembly
+ * fragment; note that this is racy!
+ * This should go away when all host architectures have been updated.
+ */
+#define safe_syscall syscall
+
+#endif
+
+#endif
diff --git a/linux-user/semihost.c b/linux-user/semihost.c
index 82013b8b48b..17f074ac565 100644
--- a/linux-user/semihost.c
+++ b/linux-user/semihost.c
@@ -11,9 +11,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "semihosting/console.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include <termios.h>
 
 int qemu_semihosting_console_outs(CPUArchState *env, target_ulong addr)
diff --git a/linux-user/sh4/cpu_loop.c b/linux-user/sh4/cpu_loop.c
index 222ed1c670c..ac9b01840c5 100644
--- a/linux-user/sh4/cpu_loop.c
+++ b/linux-user/sh4/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 void cpu_loop(CPUSH4State *env)
 {
@@ -63,14 +65,6 @@ void cpu_loop(CPUSH4State *env)
             info.si_code = TARGET_TRAP_BRKPT;
             queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
-        case 0xa0:
-        case 0xc0:
-            info.si_signo = TARGET_SIGSEGV;
-            info.si_errno = 0;
-            info.si_code = TARGET_SEGV_MAPERR;
-            info._sifields._sigfault._addr = env->tea;
-            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            break;
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
             arch_interrupt = false;
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index 29c1ee30e6d..faa869fb195 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -51,7 +52,6 @@ struct target_sigframe
 {
     struct target_sigcontext sc;
     target_ulong extramask[TARGET_NSIG_WORDS-1];
-    uint16_t retcode[3];
 };
 
 
@@ -67,7 +67,6 @@ struct target_rt_sigframe
 {
     struct target_siginfo info;
     struct target_ucontext uc;
-    uint16_t retcode[3];
 };
 
 
@@ -189,15 +188,9 @@ void setup_frame(int sig, struct target_sigaction *ka,
     /* Set up to return from userspace.  If provided, use a stub
        already in userspace.  */
     if (ka->sa_flags & TARGET_SA_RESTORER) {
-        regs->pr = (unsigned long) ka->sa_restorer;
+        regs->pr = ka->sa_restorer;
     } else {
-        /* Generate return code (system call to sigreturn) */
-        abi_ulong retcode_addr = frame_addr +
-                                 offsetof(struct target_sigframe, retcode);
-        __put_user(MOVW(2), &frame->retcode[0]);
-        __put_user(TRAP_NOARG, &frame->retcode[1]);
-        __put_user((TARGET_NR_sigreturn), &frame->retcode[2]);
-        regs->pr = (unsigned long) retcode_addr;
+        regs->pr = default_sigreturn;
     }
 
     /* Set up registers for signal handler */
@@ -247,15 +240,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     /* Set up to return from userspace.  If provided, use a stub
        already in userspace.  */
     if (ka->sa_flags & TARGET_SA_RESTORER) {
-        regs->pr = (unsigned long) ka->sa_restorer;
+        regs->pr = ka->sa_restorer;
     } else {
-        /* Generate return code (system call to sigreturn) */
-        abi_ulong retcode_addr = frame_addr +
-                                 offsetof(struct target_rt_sigframe, retcode);
-        __put_user(MOVW(2), &frame->retcode[0]);
-        __put_user(TRAP_NOARG, &frame->retcode[1]);
-        __put_user((TARGET_NR_rt_sigreturn), &frame->retcode[2]);
-        regs->pr = (unsigned long) retcode_addr;
+        regs->pr = default_rt_sigreturn;
     }
 
     /* Set up registers for signal handler */
@@ -323,12 +310,7 @@ long do_rt_sigreturn(CPUSH4State *regs)
     set_sigmask(&blocked);
 
     restore_sigcontext(regs, &frame->uc.tuc_mcontext);
-
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(regs)) == -EFAULT) {
-        goto badframe;
-    }
+    target_restore_altstack(&frame->uc.tuc_stack, regs);
 
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
@@ -338,3 +320,21 @@ long do_rt_sigreturn(CPUSH4State *regs)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 6, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    __put_user(MOVW(2), &tramp[0]);
+    __put_user(TRAP_NOARG, &tramp[1]);
+    __put_user(TARGET_NR_sigreturn, &tramp[2]);
+
+    default_rt_sigreturn = sigtramp_page + 6;
+    __put_user(MOVW(2), &tramp[3]);
+    __put_user(TRAP_NOARG, &tramp[4]);
+    __put_user(TARGET_NR_rt_sigreturn, &tramp[5]);
+
+    unlock_user(tramp, sigtramp_page, 2 * 6);
+}
diff --git a/linux-user/sh4/syscall.tbl b/linux-user/sh4/syscall.tbl
index ae0a00beea5..0b91499ebdc 100644
--- a/linux-user/sh4/syscall.tbl
+++ b/linux-user/sh4/syscall.tbl
@@ -442,3 +442,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/sh4/target_errno_defs.h b/linux-user/sh4/target_errno_defs.h
new file mode 100644
index 00000000000..e90adb54ab2
--- /dev/null
+++ b/linux-user/sh4/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef SH4_TARGET_ERRNO_DEFS_H
+#define SH4_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/sh4/target_signal.h b/linux-user/sh4/target_signal.h
index d7309b7136d..04069cba664 100644
--- a/linux-user/sh4/target_signal.h
+++ b/linux-user/sh4/target_signal.h
@@ -22,4 +22,6 @@ typedef struct target_sigaltstack {
 #include "../generic/signal.h"
 
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif /* SH4_TARGET_SIGNAL_H */
diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h
index 1df1068552f..7457f8025c4 100644
--- a/linux-user/signal-common.h
+++ b/linux-user/signal-common.h
@@ -20,10 +20,17 @@
 #ifndef SIGNAL_COMMON_H
 #define SIGNAL_COMMON_H
 
+/* Fallback addresses into sigtramp page. */
+extern abi_ulong default_sigreturn;
+extern abi_ulong default_rt_sigreturn;
+
+void setup_sigtramp(abi_ulong tramp_page);
+
 int on_sig_stack(unsigned long sp);
 int sas_ss_flags(unsigned long sp);
 abi_ulong target_sigsp(abi_ulong sp, struct target_sigaction *ka);
 void target_save_altstack(target_stack_t *uss, CPUArchState *env);
+abi_long target_restore_altstack(target_stack_t *uss, CPUArchState *env);
 
 static inline void target_sigemptyset(target_sigset_t *set)
 {
@@ -39,6 +46,7 @@ void tswap_siginfo(target_siginfo_t *tinfo,
 void set_sigmask(const sigset_t *set);
 void force_sig(int sig);
 void force_sigsegv(int oldsig);
+void force_sig_fault(int sig, int code, abi_ulong addr);
 #if defined(TARGET_ARCH_HAS_SETUP_FRAME)
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUArchState *env);
@@ -46,4 +54,40 @@ void setup_frame(int sig, struct target_sigaction *ka,
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUArchState *env);
+
+void process_pending_signals(CPUArchState *cpu_env);
+void signal_init(void);
+int queue_signal(CPUArchState *env, int sig, int si_type,
+                 target_siginfo_t *info);
+void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
+void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
+int target_to_host_signal(int sig);
+int host_to_target_signal(int sig);
+long do_sigreturn(CPUArchState *env);
+long do_rt_sigreturn(CPUArchState *env);
+abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
+                        CPUArchState *env);
+int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
+abi_long do_swapcontext(CPUArchState *env, abi_ulong uold_ctx,
+                        abi_ulong unew_ctx, abi_long ctx_size);
+/**
+ * block_signals: block all signals while handling this guest syscall
+ *
+ * Block all signals, and arrange that the signal mask is returned to
+ * its correct value for the guest before we resume execution of guest code.
+ * If this function returns non-zero, then the caller should immediately
+ * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
+ * signal and restart execution of the syscall.
+ * If block_signals() returns zero, then the caller can continue with
+ * emulation of the system call knowing that no signals can be taken
+ * (and therefore that no race conditions will result).
+ * This should only be called once, because if it is called a second time
+ * it will always return non-zero. (Think of it like a mutex that can't
+ * be recursively locked.)
+ * Signals will be unblocked again by process_pending_signals().
+ *
+ * Return value: non-zero if there was a pending signal, zero if not.
+ */
+int block_signals(void); /* Returns non zero if signal pending */
+
 #endif
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7eecec46c40..6d5e5b698cc 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -18,18 +18,29 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
+#include "exec/gdbstub.h"
+#include "hw/core/tcg-cpu-ops.h"
+
 #include <sys/ucontext.h>
 #include <sys/resource.h>
 
 #include "qemu.h"
+#include "user-internals.h"
+#include "strace.h"
+#include "loader.h"
 #include "trace.h"
 #include "signal-common.h"
+#include "host-signal.h"
+#include "safe-syscall.h"
 
 static struct target_sigaction sigact_table[TARGET_NSIG];
 
 static void host_signal_handler(int host_signum, siginfo_t *info,
                                 void *puc);
 
+/* Fallback addresses into sigtramp page. */
+abi_ulong default_sigreturn;
+abi_ulong default_rt_sigreturn;
 
 /*
  * System includes define _NSIG as SIGRTMAX + 1,
@@ -38,7 +49,9 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
  * Signal number 0 is reserved for use as kill(pid, 0), to test whether
  * a process exists without sending it a signal.
  */
+#ifdef __SIGRTMAX
 QEMU_BUILD_BUG_ON(__SIGRTMAX + 1 != _NSIG);
+#endif
 static uint8_t host_to_target_signal_table[_NSIG] = {
     [SIGHUP] = TARGET_SIGHUP,
     [SIGINT] = TARGET_SIGINT,
@@ -297,6 +310,50 @@ void target_save_altstack(target_stack_t *uss, CPUArchState *env)
     __put_user(ts->sigaltstack_used.ss_size, &uss->ss_size);
 }
 
+abi_long target_restore_altstack(target_stack_t *uss, CPUArchState *env)
+{
+    TaskState *ts = (TaskState *)thread_cpu->opaque;
+    size_t minstacksize = TARGET_MINSIGSTKSZ;
+    target_stack_t ss;
+
+#if defined(TARGET_PPC64)
+    /* ELF V2 for PPC64 has a 4K minimum stack size for signal handlers */
+    struct image_info *image = ts->info;
+    if (get_ppc64_abi(image) > 1) {
+        minstacksize = 4096;
+    }
+#endif
+
+    __get_user(ss.ss_sp, &uss->ss_sp);
+    __get_user(ss.ss_size, &uss->ss_size);
+    __get_user(ss.ss_flags, &uss->ss_flags);
+
+    if (on_sig_stack(get_sp_from_cpustate(env))) {
+        return -TARGET_EPERM;
+    }
+
+    switch (ss.ss_flags) {
+    default:
+        return -TARGET_EINVAL;
+
+    case TARGET_SS_DISABLE:
+        ss.ss_size = 0;
+        ss.ss_sp = 0;
+        break;
+
+    case TARGET_SS_ONSTACK:
+    case 0:
+        if (ss.ss_size < minstacksize) {
+            return -TARGET_ENOMEM;
+        }
+        break;
+    }
+
+    ts->sigaltstack_used.ss_sp = ss.ss_sp;
+    ts->sigaltstack_used.ss_size = ss.ss_size;
+    return 0;
+}
+
 /* siginfo conversion */
 
 static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
@@ -590,7 +647,7 @@ void force_sig(int sig)
 {
     CPUState *cpu = thread_cpu;
     CPUArchState *env = cpu->env_ptr;
-    target_siginfo_t info;
+    target_siginfo_t info = {};
 
     info.si_signo = sig;
     info.si_errno = 0;
@@ -600,6 +657,23 @@ void force_sig(int sig)
     queue_signal(env, info.si_signo, QEMU_SI_KILL, &info);
 }
 
+/*
+ * Force a synchronously taken QEMU_SI_FAULT signal. For QEMU the
+ * 'force' part is handled in process_pending_signals().
+ */
+void force_sig_fault(int sig, int code, abi_ulong addr)
+{
+    CPUState *cpu = thread_cpu;
+    CPUArchState *env = cpu->env_ptr;
+    target_siginfo_t info = {};
+
+    info.si_signo = sig;
+    info.si_errno = 0;
+    info.si_code = code;
+    info._sifields._sigfault._addr = addr;
+    queue_signal(env, sig, QEMU_SI_FAULT, &info);
+}
+
 /* Force a SIGSEGV if we couldn't write to memory trying to set
  * up the signal frame. oldsig is the signal we were trying to handle
  * at the point of failure.
@@ -615,9 +689,38 @@ void force_sigsegv(int oldsig)
     }
     force_sig(TARGET_SIGSEGV);
 }
-
 #endif
 
+void cpu_loop_exit_sigsegv(CPUState *cpu, target_ulong addr,
+                           MMUAccessType access_type, bool maperr, uintptr_t ra)
+{
+    const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+
+    if (tcg_ops->record_sigsegv) {
+        tcg_ops->record_sigsegv(cpu, addr, access_type, maperr, ra);
+    }
+
+    force_sig_fault(TARGET_SIGSEGV,
+                    maperr ? TARGET_SEGV_MAPERR : TARGET_SEGV_ACCERR,
+                    addr);
+    cpu->exception_index = EXCP_INTERRUPT;
+    cpu_loop_exit_restore(cpu, ra);
+}
+
+void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
+                          MMUAccessType access_type, uintptr_t ra)
+{
+    const struct TCGCPUOps *tcg_ops = CPU_GET_CLASS(cpu)->tcg_ops;
+
+    if (tcg_ops->record_sigbus) {
+        tcg_ops->record_sigbus(cpu, addr, access_type, ra);
+    }
+
+    force_sig_fault(TARGET_SIGBUS, TARGET_BUS_ADRALN, addr);
+    cpu->exception_index = EXCP_INTERRUPT;
+    cpu_loop_exit_restore(cpu, ra);
+}
+
 /* abort execution with signal */
 static void QEMU_NORETURN dump_core_and_abort(int target_sig)
 {
@@ -691,48 +794,116 @@ int queue_signal(CPUArchState *env, int sig, int si_type,
     return 1; /* indicates that the signal was queued */
 }
 
-#ifndef HAVE_SAFE_SYSCALL
+
+/* Adjust the signal context to rewind out of safe-syscall if we're in it */
 static inline void rewind_if_in_safe_syscall(void *puc)
 {
-    /* Default version: never rewind */
-}
+#ifdef HAVE_SAFE_SYSCALL
+    ucontext_t *uc = (ucontext_t *)puc;
+    uintptr_t pcreg = host_signal_pc(uc);
+
+    if (pcreg > (uintptr_t)safe_syscall_start
+        && pcreg < (uintptr_t)safe_syscall_end) {
+        host_signal_set_pc(uc, (uintptr_t)safe_syscall_start);
+    }
 #endif
+}
 
-static void host_signal_handler(int host_signum, siginfo_t *info,
-                                void *puc)
+static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
 {
     CPUArchState *env = thread_cpu->env_ptr;
     CPUState *cpu = env_cpu(env);
     TaskState *ts = cpu->opaque;
-
-    int sig;
     target_siginfo_t tinfo;
     ucontext_t *uc = puc;
     struct emulated_sigtable *k;
+    int guest_sig;
+    uintptr_t pc = 0;
+    bool sync_sig = false;
+
+    /*
+     * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special
+     * handling wrt signal blocking and unwinding.
+     */
+    if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) {
+        MMUAccessType access_type;
+        uintptr_t host_addr;
+        abi_ptr guest_addr;
+        bool is_write;
+
+        host_addr = (uintptr_t)info->si_addr;
+
+        /*
+         * Convert forcefully to guest address space: addresses outside
+         * reserved_va are still valid to report via SEGV_MAPERR.
+         */
+        guest_addr = h2g_nocheck(host_addr);
 
-    /* the CPU emulator uses some host signals to detect exceptions,
-       we forward to it some signals */
-    if ((host_signum == SIGSEGV || host_signum == SIGBUS)
-        && info->si_code > 0) {
-        if (cpu_signal_handler(host_signum, info, puc))
-            return;
+        pc = host_signal_pc(uc);
+        is_write = host_signal_write(info, uc);
+        access_type = adjust_signal_pc(&pc, is_write);
+
+        if (host_sig == SIGSEGV) {
+            bool maperr = true;
+
+            if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) {
+                /* If this was a write to a TB protected page, restart. */
+                if (is_write &&
+                    handle_sigsegv_accerr_write(cpu, &uc->uc_sigmask,
+                                                pc, guest_addr)) {
+                    return;
+                }
+
+                /*
+                 * With reserved_va, the whole address space is PROT_NONE,
+                 * which means that we may get ACCERR when we want MAPERR.
+                 */
+                if (page_get_flags(guest_addr) & PAGE_VALID) {
+                    maperr = false;
+                } else {
+                    info->si_code = SEGV_MAPERR;
+                }
+            }
+
+            sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+            cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc);
+        } else {
+            sigprocmask(SIG_SETMASK, &uc->uc_sigmask, NULL);
+            if (info->si_code == BUS_ADRALN) {
+                cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc);
+            }
+        }
+
+        sync_sig = true;
     }
 
     /* get target signal number */
-    sig = host_to_target_signal(host_signum);
-    if (sig < 1 || sig > TARGET_NSIG)
+    guest_sig = host_to_target_signal(host_sig);
+    if (guest_sig < 1 || guest_sig > TARGET_NSIG) {
         return;
-    trace_user_host_signal(env, host_signum, sig);
-
-    rewind_if_in_safe_syscall(puc);
+    }
+    trace_user_host_signal(env, host_sig, guest_sig);
 
     host_to_target_siginfo_noswap(&tinfo, info);
-    k = &ts->sigtab[sig - 1];
+    k = &ts->sigtab[guest_sig - 1];
     k->info = tinfo;
-    k->pending = sig;
+    k->pending = guest_sig;
     ts->signal_pending = 1;
 
-    /* Block host signals until target signal handler entered. We
+    /*
+     * For synchronous signals, unwind the cpu state to the faulting
+     * insn and then exit back to the main loop so that the signal
+     * is delivered immediately.
+     */
+    if (sync_sig) {
+        cpu->exception_index = EXCP_INTERRUPT;
+        cpu_loop_exit_restore(cpu, pc);
+    }
+
+    rewind_if_in_safe_syscall(puc);
+
+    /*
+     * Block host signals until target signal handler entered. We
      * can't block SIGSEGV or SIGBUS while we're executing guest
      * code in case the guest code provokes one in the window between
      * now and it getting out to the main loop. Signals will be
@@ -756,81 +927,49 @@ static void host_signal_handler(int host_signum, siginfo_t *info,
 
 /* do_sigaltstack() returns target values and errnos. */
 /* compare linux/kernel/signal.c:do_sigaltstack() */
-abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp)
+abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr,
+                        CPUArchState *env)
 {
-    int ret;
-    struct target_sigaltstack oss;
-    TaskState *ts = (TaskState *)thread_cpu->opaque;
+    target_stack_t oss, *uoss = NULL;
+    abi_long ret = -TARGET_EFAULT;
 
-    /* XXX: test errors */
-    if(uoss_addr)
-    {
-        __put_user(ts->sigaltstack_used.ss_sp, &oss.ss_sp);
-        __put_user(ts->sigaltstack_used.ss_size, &oss.ss_size);
-        __put_user(sas_ss_flags(sp), &oss.ss_flags);
+    if (uoss_addr) {
+        /* Verify writability now, but do not alter user memory yet. */
+        if (!lock_user_struct(VERIFY_WRITE, uoss, uoss_addr, 0)) {
+            goto out;
+        }
+        target_save_altstack(&oss, env);
     }
 
-    if(uss_addr)
-    {
-        struct target_sigaltstack *uss;
-        struct target_sigaltstack ss;
-        size_t minstacksize = TARGET_MINSIGSTKSZ;
+    if (uss_addr) {
+        target_stack_t *uss;
 
-#if defined(TARGET_PPC64)
-        /* ELF V2 for PPC64 has a 4K minimum stack size for signal handlers */
-        struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
-        if (get_ppc64_abi(image) > 1) {
-            minstacksize = 4096;
-        }
-#endif
-
-        ret = -TARGET_EFAULT;
         if (!lock_user_struct(VERIFY_READ, uss, uss_addr, 1)) {
             goto out;
         }
-        __get_user(ss.ss_sp, &uss->ss_sp);
-        __get_user(ss.ss_size, &uss->ss_size);
-        __get_user(ss.ss_flags, &uss->ss_flags);
-        unlock_user_struct(uss, uss_addr, 0);
-
-        ret = -TARGET_EPERM;
-        if (on_sig_stack(sp))
-            goto out;
-
-        ret = -TARGET_EINVAL;
-        if (ss.ss_flags != TARGET_SS_DISABLE
-            && ss.ss_flags != TARGET_SS_ONSTACK
-            && ss.ss_flags != 0)
+        ret = target_restore_altstack(uss, env);
+        if (ret) {
             goto out;
-
-        if (ss.ss_flags == TARGET_SS_DISABLE) {
-            ss.ss_size = 0;
-            ss.ss_sp = 0;
-        } else {
-            ret = -TARGET_ENOMEM;
-            if (ss.ss_size < minstacksize) {
-                goto out;
-            }
         }
-
-        ts->sigaltstack_used.ss_sp = ss.ss_sp;
-        ts->sigaltstack_used.ss_size = ss.ss_size;
     }
 
     if (uoss_addr) {
-        ret = -TARGET_EFAULT;
-        if (copy_to_user(uoss_addr, &oss, sizeof(oss)))
-            goto out;
+        memcpy(uoss, &oss, sizeof(oss));
+        unlock_user_struct(uoss, uoss_addr, 1);
+        uoss = NULL;
     }
-
     ret = 0;
-out:
+
+ out:
+    if (uoss) {
+        unlock_user_struct(uoss, uoss_addr, 0);
+    }
     return ret;
 }
 
 /* do_sigaction() return target values and host errnos */
 int do_sigaction(int sig, const struct target_sigaction *act,
-                 struct target_sigaction *oact)
+                 struct target_sigaction *oact, abi_ulong ka_restorer)
 {
     struct target_sigaction *k;
     struct sigaction act1;
@@ -839,7 +978,11 @@ int do_sigaction(int sig, const struct target_sigaction *act,
 
     trace_signal_do_sigaction_guest(sig, TARGET_NSIG);
 
-    if (sig < 1 || sig > TARGET_NSIG || sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP) {
+    if (sig < 1 || sig > TARGET_NSIG) {
+        return -TARGET_EINVAL;
+    }
+
+    if (act && (sig == TARGET_SIGKILL || sig == TARGET_SIGSTOP)) {
         return -TARGET_EINVAL;
     }
 
@@ -863,6 +1006,9 @@ int do_sigaction(int sig, const struct target_sigaction *act,
         __get_user(k->sa_flags, &act->sa_flags);
 #ifdef TARGET_ARCH_HAS_SA_RESTORER
         __get_user(k->sa_restorer, &act->sa_restorer);
+#endif
+#ifdef TARGET_ARCH_HAS_KA_RESTORER
+        k->ka_restorer = ka_restorer;
 #endif
         /* To be swapped in target_to_host_sigset.  */
         k->sa_mask = act->sa_mask;
diff --git a/linux-user/sparc/cpu_loop.c b/linux-user/sparc/cpu_loop.c
index 02532f198df..0ba65e431cd 100644
--- a/linux-user/sparc/cpu_loop.c
+++ b/linux-user/sparc/cpu_loop.c
@@ -20,7 +20,9 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 #define SPARC64_STACK_BIAS 2047
 
@@ -217,17 +219,6 @@ void cpu_loop (CPUSPARCState *env)
         case TT_WIN_UNF: /* window underflow */
             restore_window(env);
             break;
-        case TT_TFAULT:
-        case TT_DFAULT:
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                info._sifields._sigfault._addr = env->mmuregs[4];
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
 #else
         case TT_SPILL: /* window overflow */
             save_window(env);
@@ -235,20 +226,6 @@ void cpu_loop (CPUSPARCState *env)
         case TT_FILL: /* window underflow */
             restore_window(env);
             break;
-        case TT_TFAULT:
-        case TT_DFAULT:
-            {
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                /* XXX: check env->error_code */
-                info.si_code = TARGET_SEGV_MAPERR;
-                if (trapnr == TT_DFAULT)
-                    info._sifields._sigfault._addr = env->dmmu.mmuregs[4];
-                else
-                    info._sifields._sigfault._addr = cpu_tsptr(env)->tpc;
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-            }
-            break;
 #ifndef TARGET_ABI32
         case 0x16e:
             flush_windows(env);
diff --git a/linux-user/sparc/signal.c b/linux-user/sparc/signal.c
index d27b7a3af79..23e1e761de4 100644
--- a/linux-user/sparc/signal.c
+++ b/linux-user/sparc/signal.c
@@ -18,110 +18,100 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
-#define __SUNOS_MAXWIN   31
-
-/* This is what SunOS does, so shall I. */
-struct target_sigcontext {
-    abi_ulong sigc_onstack;      /* state to restore */
-
-    abi_ulong sigc_mask;         /* sigmask to restore */
-    abi_ulong sigc_sp;           /* stack pointer */
-    abi_ulong sigc_pc;           /* program counter */
-    abi_ulong sigc_npc;          /* next program counter */
-    abi_ulong sigc_psr;          /* for condition codes etc */
-    abi_ulong sigc_g1;           /* User uses these two registers */
-    abi_ulong sigc_o0;           /* within the trampoline code. */
-
-    /* Now comes information regarding the users window set
-         * at the time of the signal.
-         */
-    abi_ulong sigc_oswins;       /* outstanding windows */
-
-    /* stack ptrs for each regwin buf */
-    char *sigc_spbuf[__SUNOS_MAXWIN];
-
-    /* Windows to restore after signal */
-    struct {
-        abi_ulong locals[8];
-        abi_ulong ins[8];
-    } sigc_wbuf[__SUNOS_MAXWIN];
-};
-/* A Sparc stack frame */
-struct sparc_stackf {
+/* A Sparc register window */
+struct target_reg_window {
     abi_ulong locals[8];
     abi_ulong ins[8];
-    /* It's simpler to treat fp and callers_pc as elements of ins[]
-         * since we never need to access them ourselves.
-         */
-    char *structptr;
-    abi_ulong xargs[6];
-    abi_ulong xxargs[1];
 };
 
-typedef struct {
-    struct {
-        abi_ulong psr;
-        abi_ulong pc;
-        abi_ulong npc;
-        abi_ulong y;
-        abi_ulong u_regs[16]; /* globals and ins */
-    }               si_regs;
-    int             si_mask;
-} __siginfo_t;
+/* A Sparc stack frame. */
+struct target_stackf {
+    /*
+     * Since qemu does not reference fp or callers_pc directly,
+     * it's simpler to treat fp and callers_pc as elements of ins[],
+     * and then bundle locals[] and ins[] into reg_window.
+     */
+    struct target_reg_window win;
+    /*
+     * Similarly, bundle structptr and xxargs into xargs[].
+     * This portion of the struct is part of the function call abi,
+     * and belongs to the callee for spilling argument registers.
+     */
+    abi_ulong xargs[8];
+};
 
-typedef struct {
-    abi_ulong  si_float_regs[32];
-    unsigned   long si_fsr;
-    unsigned   long si_fpqdepth;
+struct target_siginfo_fpu {
+#ifdef TARGET_SPARC64
+    uint64_t si_double_regs[32];
+    uint64_t si_fsr;
+    uint64_t si_gsr;
+    uint64_t si_fprs;
+#else
+    /* It is more convenient for qemu to move doubles, not singles. */
+    uint64_t si_double_regs[16];
+    uint32_t si_fsr;
+    uint32_t si_fpqdepth;
     struct {
-        unsigned long *insn_addr;
-        unsigned long insn;
+        uint32_t insn_addr;
+        uint32_t insn;
     } si_fpqueue [16];
-} qemu_siginfo_fpu_t;
-
+#endif
+};
 
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
 struct target_signal_frame {
-    struct sparc_stackf ss;
-    __siginfo_t         info;
-    abi_ulong           fpu_save;
-    uint32_t            insns[2] QEMU_ALIGNED(8);
-    abi_ulong           extramask[TARGET_NSIG_WORDS - 1];
-    abi_ulong           extra_size; /* Should be 0 */
-    qemu_siginfo_fpu_t fpu_state;
+    struct target_stackf ss;
+    struct target_pt_regs regs;
+    uint32_t si_mask;
+    abi_ulong fpu_save;
+    uint32_t insns[2] QEMU_ALIGNED(8);
+    abi_ulong extramask[TARGET_NSIG_WORDS - 1];
+    abi_ulong extra_size; /* Should be 0 */
+    abi_ulong rwin_save;
 };
+#endif
+
 struct target_rt_signal_frame {
-    struct sparc_stackf ss;
-    siginfo_t           info;
-    abi_ulong           regs[20];
-    sigset_t            mask;
-    abi_ulong           fpu_save;
-    uint32_t            insns[2];
-    stack_t             stack;
-    unsigned int        extra_size; /* Should be 0 */
-    qemu_siginfo_fpu_t  fpu_state;
+    struct target_stackf ss;
+    target_siginfo_t info;
+    struct target_pt_regs regs;
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    abi_ulong fpu_save;
+    target_stack_t stack;
+    target_sigset_t mask;
+#else
+    target_sigset_t mask;
+    abi_ulong fpu_save;
+    uint32_t insns[2];
+    target_stack_t stack;
+    abi_ulong extra_size; /* Should be 0 */
+#endif
+    abi_ulong rwin_save;
 };
 
-static inline abi_ulong get_sigframe(struct target_sigaction *sa, 
-                                     CPUSPARCState *env,
-                                     unsigned long framesize)
+static abi_ulong get_sigframe(struct target_sigaction *sa,
+                              CPUSPARCState *env,
+                              size_t framesize)
 {
     abi_ulong sp = get_sp_from_cpustate(env);
 
     /*
      * If we are on the alternate signal stack and would overflow it, don't.
      * Return an always-bogus address instead so we will die with SIGSEGV.
-         */
+     */
     if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) {
-            return -1;
+        return -1;
     }
 
     /* This is the X/Open sanctioned signal stack switching.  */
     sp = target_sigsp(sp, sa) - framesize;
 
-    /* Always align the stack frame.  This handles two cases.  First,
+    /*
+     * Always align the stack frame.  This handles two cases.  First,
      * sigaltstack need not be mindful of platform specific stack
      * alignment.  Second, if we took this signal because the stack
      * is not aligned properly, we'd like to take the signal cleanly
@@ -132,175 +122,308 @@ static inline abi_ulong get_sigframe(struct target_sigaction *sa,
     return sp;
 }
 
-static int
-setup___siginfo(__siginfo_t *si, CPUSPARCState *env, abi_ulong mask)
+static void save_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env)
 {
-    int err = 0, i;
+    int i;
 
-    __put_user(env->psr, &si->si_regs.psr);
-    __put_user(env->pc, &si->si_regs.pc);
-    __put_user(env->npc, &si->si_regs.npc);
-    __put_user(env->y, &si->si_regs.y);
-    for (i=0; i < 8; i++) {
-        __put_user(env->gregs[i], &si->si_regs.u_regs[i]);
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    __put_user(sparc64_tstate(env), &regs->tstate);
+    /* TODO: magic should contain PT_REG_MAGIC + %tt. */
+    __put_user(0, &regs->magic);
+#else
+    __put_user(cpu_get_psr(env), &regs->psr);
+#endif
+
+    __put_user(env->pc, &regs->pc);
+    __put_user(env->npc, &regs->npc);
+    __put_user(env->y, &regs->y);
+
+    for (i = 0; i < 8; i++) {
+        __put_user(env->gregs[i], &regs->u_regs[i]);
     }
-    for (i=0; i < 8; i++) {
-        __put_user(env->regwptr[WREG_O0 + i], &si->si_regs.u_regs[i + 8]);
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[WREG_O0 + i], &regs->u_regs[i + 8]);
     }
-    __put_user(mask, &si->si_mask);
-    return err;
 }
 
-#define NF_ALIGNEDSZ  (((sizeof(struct target_signal_frame) + 7) & (~7)))
+static void restore_pt_regs(struct target_pt_regs *regs, CPUSPARCState *env)
+{
+    int i;
+
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+    /* User can only change condition codes and %asi in %tstate. */
+    uint64_t tstate;
+    __get_user(tstate, &regs->tstate);
+    cpu_put_ccr(env, tstate >> 32);
+    env->asi = extract64(tstate, 24, 8);
+#else
+    /*
+     * User can only change condition codes and FPU enabling in %psr.
+     * But don't bother with FPU enabling, since a real kernel would
+     * just re-enable the FPU upon the next fpu trap.
+     */
+    uint32_t psr;
+    __get_user(psr, &regs->psr);
+    env->psr = (psr & PSR_ICC) | (env->psr & ~PSR_ICC);
+#endif
+
+    /* Note that pc and npc are handled in the caller. */
+
+    __get_user(env->y, &regs->y);
+
+    for (i = 0; i < 8; i++) {
+        __get_user(env->gregs[i], &regs->u_regs[i]);
+    }
+    for (i = 0; i < 8; i++) {
+        __get_user(env->regwptr[WREG_O0 + i], &regs->u_regs[i + 8]);
+    }
+}
+
+static void save_reg_win(struct target_reg_window *win, CPUSPARCState *env)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[i + WREG_L0], &win->locals[i]);
+    }
+    for (i = 0; i < 8; i++) {
+        __put_user(env->regwptr[i + WREG_I0], &win->ins[i]);
+    }
+}
+
+static void save_fpu(struct target_siginfo_fpu *fpu, CPUSPARCState *env)
+{
+    int i;
+
+#ifdef TARGET_SPARC64
+    for (i = 0; i < 32; ++i) {
+        __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(env->gsr, &fpu->si_gsr);
+    __put_user(env->fprs, &fpu->si_fprs);
+#else
+    for (i = 0; i < 16; ++i) {
+        __put_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __put_user(env->fsr, &fpu->si_fsr);
+    __put_user(0, &fpu->si_fpqdepth);
+#endif
+}
+
+static void restore_fpu(struct target_siginfo_fpu *fpu, CPUSPARCState *env)
+{
+    int i;
+
+#ifdef TARGET_SPARC64
+    uint64_t fprs;
+    __get_user(fprs, &fpu->si_fprs);
+
+    /* In case the user mucks about with FPRS, restore as directed. */
+    if (fprs & FPRS_DL) {
+        for (i = 0; i < 16; ++i) {
+            __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+        }
+    }
+    if (fprs & FPRS_DU) {
+        for (i = 16; i < 32; ++i) {
+            __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+        }
+    }
+    __get_user(env->fsr, &fpu->si_fsr);
+    __get_user(env->gsr, &fpu->si_gsr);
+    env->fprs |= fprs;
+#else
+    for (i = 0; i < 16; ++i) {
+        __get_user(env->fpr[i].ll, &fpu->si_double_regs[i]);
+    }
+    __get_user(env->fsr, &fpu->si_fsr);
+#endif
+}
+
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
+static void install_sigtramp(uint32_t *tramp, int syscall)
+{
+    __put_user(0x82102000u + syscall, &tramp[0]); /* mov syscall, %g1 */
+    __put_user(0x91d02010u, &tramp[1]);           /* t 0x10 */
+}
 
 void setup_frame(int sig, struct target_sigaction *ka,
                  target_sigset_t *set, CPUSPARCState *env)
 {
     abi_ulong sf_addr;
     struct target_signal_frame *sf;
-    int sigframe_size, err, i;
-
-    /* 1. Make sure everything is clean */
-    //synchronize_user_stack();
+    size_t sf_size = sizeof(*sf) + sizeof(struct target_siginfo_fpu);
+    int i;
 
-    sigframe_size = NF_ALIGNEDSZ;
-    sf_addr = get_sigframe(ka, env, sigframe_size);
+    sf_addr = get_sigframe(ka, env, sf_size);
     trace_user_setup_frame(env, sf_addr);
 
-    sf = lock_user(VERIFY_WRITE, sf_addr,
-                   sizeof(struct target_signal_frame), 0);
+    sf = lock_user(VERIFY_WRITE, sf_addr, sf_size, 0);
     if (!sf) {
-        goto sigsegv;
+        force_sigsegv(sig);
+        return;
     }
-#if 0
-    if (invalid_frame_pointer(sf, sigframe_size))
-        goto sigill_and_return;
-#endif
+
     /* 2. Save the current process state */
-    err = setup___siginfo(&sf->info, env, set->sig[0]);
+    save_pt_regs(&sf->regs, env);
     __put_user(0, &sf->extra_size);
 
-    //save_fpu_state(regs, &sf->fpu_state);
-    //__put_user(&sf->fpu_state, &sf->fpu_save);
+    save_fpu((struct target_siginfo_fpu *)(sf + 1), env);
+    __put_user(sf_addr + sizeof(*sf), &sf->fpu_save);
 
-    __put_user(set->sig[0], &sf->info.si_mask);
+    __put_user(0, &sf->rwin_save);  /* TODO: save_rwin_state */
+
+    __put_user(set->sig[0], &sf->si_mask);
     for (i = 0; i < TARGET_NSIG_WORDS - 1; i++) {
         __put_user(set->sig[i + 1], &sf->extramask[i]);
     }
 
-    for (i = 0; i < 8; i++) {
-        __put_user(env->regwptr[i + WREG_L0], &sf->ss.locals[i]);
-    }
-    for (i = 0; i < 8; i++) {
-        __put_user(env->regwptr[i + WREG_I0], &sf->ss.ins[i]);
-    }
-    if (err)
-        goto sigsegv;
+    save_reg_win(&sf->ss.win, env);
 
     /* 3. signal handler back-trampoline and parameters */
     env->regwptr[WREG_SP] = sf_addr;
     env->regwptr[WREG_O0] = sig;
     env->regwptr[WREG_O1] = sf_addr +
-            offsetof(struct target_signal_frame, info);
+            offsetof(struct target_signal_frame, regs);
     env->regwptr[WREG_O2] = sf_addr +
-            offsetof(struct target_signal_frame, info);
+            offsetof(struct target_signal_frame, regs);
 
     /* 4. signal handler */
     env->pc = ka->_sa_handler;
-    env->npc = (env->pc + 4);
+    env->npc = env->pc + 4;
+
     /* 5. return to kernel instructions */
     if (ka->ka_restorer) {
         env->regwptr[WREG_O7] = ka->ka_restorer;
     } else {
-        uint32_t val32;
-
-        env->regwptr[WREG_O7] = sf_addr +
-                offsetof(struct target_signal_frame, insns) - 2 * 4;
-
-        /* mov __NR_sigreturn, %g1 */
-        val32 = 0x821020d8;
-        __put_user(val32, &sf->insns[0]);
-
-        /* t 0x10 */
-        val32 = 0x91d02010;
-        __put_user(val32, &sf->insns[1]);
+        /* Not used, but retain for ABI compatibility. */
+        install_sigtramp(sf->insns, TARGET_NR_sigreturn);
+        env->regwptr[WREG_O7] = default_sigreturn;
     }
-    unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
-    return;
-#if 0
-sigill_and_return:
-    force_sig(TARGET_SIGILL);
-#endif
-sigsegv:
-    unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
-    force_sigsegv(sig);
+    unlock_user(sf, sf_addr, sf_size);
 }
+#endif /* TARGET_ARCH_HAS_SETUP_FRAME */
 
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUSPARCState *env)
 {
-    qemu_log_mask(LOG_UNIMP, "setup_rt_frame: not implemented\n");
+    abi_ulong sf_addr;
+    struct target_rt_signal_frame *sf;
+    size_t sf_size = sizeof(*sf) + sizeof(struct target_siginfo_fpu);
+
+    sf_addr = get_sigframe(ka, env, sf_size);
+    trace_user_setup_rt_frame(env, sf_addr);
+
+    sf = lock_user(VERIFY_WRITE, sf_addr, sf_size, 0);
+    if (!sf) {
+        force_sigsegv(sig);
+        return;
+    }
+
+    /* 2. Save the current process state */
+    save_reg_win(&sf->ss.win, env);
+    save_pt_regs(&sf->regs, env);
+
+    save_fpu((struct target_siginfo_fpu *)(sf + 1), env);
+    __put_user(sf_addr + sizeof(*sf), &sf->fpu_save);
+
+    __put_user(0, &sf->rwin_save);  /* TODO: save_rwin_state */
+
+    tswap_siginfo(&sf->info, info);
+    tswap_sigset(&sf->mask, set);
+    target_save_altstack(&sf->stack, env);
+
+#ifdef TARGET_ABI32
+    __put_user(0, &sf->extra_size);
+#endif
+
+    /* 3. signal handler back-trampoline and parameters */
+    env->regwptr[WREG_SP] = sf_addr - TARGET_STACK_BIAS;
+    env->regwptr[WREG_O0] = sig;
+    env->regwptr[WREG_O1] =
+        sf_addr + offsetof(struct target_rt_signal_frame, info);
+#ifdef TARGET_ABI32
+    env->regwptr[WREG_O2] =
+        sf_addr + offsetof(struct target_rt_signal_frame, regs);
+#else
+    env->regwptr[WREG_O2] = env->regwptr[WREG_O1];
+#endif
+
+    /* 4. signal handler */
+    env->pc = ka->_sa_handler;
+    env->npc = env->pc + 4;
+
+    /* 5. return to kernel instructions */
+#ifdef TARGET_ABI32
+    if (ka->ka_restorer) {
+        env->regwptr[WREG_O7] = ka->ka_restorer;
+    } else {
+        /* Not used, but retain for ABI compatibility. */
+        install_sigtramp(sf->insns, TARGET_NR_rt_sigreturn);
+        env->regwptr[WREG_O7] = default_rt_sigreturn;
+    }
+#else
+    env->regwptr[WREG_O7] = ka->ka_restorer;
+#endif
+
+    unlock_user(sf, sf_addr, sf_size);
 }
 
 long do_sigreturn(CPUSPARCState *env)
 {
+#ifdef TARGET_ARCH_HAS_SETUP_FRAME
     abi_ulong sf_addr;
-    struct target_signal_frame *sf;
-    abi_ulong up_psr, pc, npc;
+    struct target_signal_frame *sf = NULL;
+    abi_ulong pc, npc, ptr;
     target_sigset_t set;
     sigset_t host_set;
     int i;
 
     sf_addr = env->regwptr[WREG_SP];
     trace_user_do_sigreturn(env, sf_addr);
-    if (!lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
-        goto segv_and_exit;
-    }
 
     /* 1. Make sure we are not getting garbage from the user */
-
-    if (sf_addr & 3)
+    if ((sf_addr & 15) || !lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
         goto segv_and_exit;
+    }
 
-    __get_user(pc,  &sf->info.si_regs.pc);
-    __get_user(npc, &sf->info.si_regs.npc);
+    /* Make sure stack pointer is aligned.  */
+    __get_user(ptr, &sf->regs.u_regs[14]);
+    if (ptr & 7) {
+        goto segv_and_exit;
+    }
 
+    /* Make sure instruction pointers are aligned.  */
+    __get_user(pc, &sf->regs.pc);
+    __get_user(npc, &sf->regs.npc);
     if ((pc | npc) & 3) {
         goto segv_and_exit;
     }
 
     /* 2. Restore the state */
-    __get_user(up_psr, &sf->info.si_regs.psr);
-
-    /* User can only change condition codes and FPU enabling in %psr. */
-    env->psr = (up_psr & (PSR_ICC /* | PSR_EF */))
-            | (env->psr & ~(PSR_ICC /* | PSR_EF */));
-
+    restore_pt_regs(&sf->regs, env);
     env->pc = pc;
     env->npc = npc;
-    __get_user(env->y, &sf->info.si_regs.y);
-    for (i=0; i < 8; i++) {
-        __get_user(env->gregs[i], &sf->info.si_regs.u_regs[i]);
-    }
-    for (i=0; i < 8; i++) {
-        __get_user(env->regwptr[i + WREG_O0], &sf->info.si_regs.u_regs[i + 8]);
+
+    __get_user(ptr, &sf->fpu_save);
+    if (ptr) {
+        struct target_siginfo_fpu *fpu;
+        if ((ptr & 3) || !lock_user_struct(VERIFY_READ, fpu, ptr, 1)) {
+            goto segv_and_exit;
+        }
+        restore_fpu(fpu, env);
+        unlock_user_struct(fpu, ptr, 0);
     }
 
-    /* FIXME: implement FPU save/restore:
-     * __get_user(fpu_save, &sf->fpu_save);
-     * if (fpu_save) {
-     *     if (restore_fpu_state(env, fpu_save)) {
-     *         goto segv_and_exit;
-     *     }
-     * }
-     */
+    __get_user(ptr, &sf->rwin_save);
+    if (ptr) {
+        goto segv_and_exit;  /* TODO: restore_rwin */
+    }
 
-    /* This is pretty much atomic, no amount locking would prevent
-         * the races which exist anyways.
-         */
-    __get_user(set.sig[0], &sf->info.si_mask);
-    for(i = 1; i < TARGET_NSIG_WORDS; i++) {
+    __get_user(set.sig[0], &sf->si_mask);
+    for (i = 1; i < TARGET_NSIG_WORDS; i++) {
         __get_user(set.sig[i], &sf->extramask[i - 1]);
     }
 
@@ -310,17 +433,74 @@ long do_sigreturn(CPUSPARCState *env)
     unlock_user_struct(sf, sf_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
 
-segv_and_exit:
+ segv_and_exit:
     unlock_user_struct(sf, sf_addr, 0);
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
+#else
+    return -TARGET_ENOSYS;
+#endif
 }
 
 long do_rt_sigreturn(CPUSPARCState *env)
 {
-    trace_user_do_rt_sigreturn(env, 0);
-    qemu_log_mask(LOG_UNIMP, "do_rt_sigreturn: not implemented\n");
-    return -TARGET_ENOSYS;
+    abi_ulong sf_addr, tpc, tnpc, ptr;
+    struct target_rt_signal_frame *sf = NULL;
+    sigset_t set;
+
+    sf_addr = get_sp_from_cpustate(env);
+    trace_user_do_rt_sigreturn(env, sf_addr);
+
+    /* 1. Make sure we are not getting garbage from the user */
+    if ((sf_addr & 15) || !lock_user_struct(VERIFY_READ, sf, sf_addr, 1)) {
+        goto segv_and_exit;
+    }
+
+    /* Validate SP alignment.  */
+    __get_user(ptr, &sf->regs.u_regs[8 + WREG_SP]);
+    if ((ptr + TARGET_STACK_BIAS) & 7) {
+        goto segv_and_exit;
+    }
+
+    /* Validate PC and NPC alignment.  */
+    __get_user(tpc, &sf->regs.pc);
+    __get_user(tnpc, &sf->regs.npc);
+    if ((tpc | tnpc) & 3) {
+        goto segv_and_exit;
+    }
+
+    /* 2. Restore the state */
+    restore_pt_regs(&sf->regs, env);
+
+    __get_user(ptr, &sf->fpu_save);
+    if (ptr) {
+        struct target_siginfo_fpu *fpu;
+        if ((ptr & 7) || !lock_user_struct(VERIFY_READ, fpu, ptr, 1)) {
+            goto segv_and_exit;
+        }
+        restore_fpu(fpu, env);
+        unlock_user_struct(fpu, ptr, 0);
+    }
+
+    __get_user(ptr, &sf->rwin_save);
+    if (ptr) {
+        goto segv_and_exit;  /* TODO: restore_rwin_state */
+    }
+
+    target_restore_altstack(&sf->stack, env);
+    target_to_host_sigset(&set, &sf->mask);
+    set_sigmask(&set);
+
+    env->pc = tpc;
+    env->npc = tnpc;
+
+    unlock_user_struct(sf, sf_addr, 0);
+    return -TARGET_QEMU_ESIGRETURN;
+
+ segv_and_exit:
+    unlock_user_struct(sf, sf_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
@@ -388,14 +568,6 @@ struct target_ucontext {
     target_mcontext_t tuc_mcontext;
 };
 
-/* A V9 register window */
-struct target_reg_window {
-    abi_ulong locals[8];
-    abi_ulong ins[8];
-};
-
-#define TARGET_STACK_BIAS 2047
-
 /* {set, get}context() needed for 64-bit SparcLinux userland. */
 void sparc64_set_context(CPUSPARCState *env)
 {
@@ -601,4 +773,18 @@ void sparc64_get_context(CPUSPARCState *env)
     unlock_user_struct(ucp, ucp_addr, 1);
     force_sig(TARGET_SIGSEGV);
 }
+#else
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0);
+    assert(tramp != NULL);
+
+    default_sigreturn = sigtramp_page;
+    install_sigtramp(tramp, TARGET_NR_sigreturn);
+
+    default_rt_sigreturn = sigtramp_page + 8;
+    install_sigtramp(tramp + 2, TARGET_NR_rt_sigreturn);
+
+    unlock_user(tramp, sigtramp_page, 2 * 8);
+}
 #endif
diff --git a/linux-user/sparc/syscall.tbl b/linux-user/sparc/syscall.tbl
index 4af114e84f2..e34cc30ef22 100644
--- a/linux-user/sparc/syscall.tbl
+++ b/linux-user/sparc/syscall.tbl
@@ -38,7 +38,7 @@
 23	64    	setuid			sys_setuid
 24	32	getuid			sys_getuid16
 24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+25	common	vmsplice		sys_vmsplice
 26	common	ptrace			sys_ptrace			compat_sys_ptrace
 27	common	alarm			sys_alarm
 28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
@@ -149,8 +149,8 @@
 117	common	getrusage		sys_getrusage			compat_sys_getrusage
 118	common	getsockopt		sys_getsockopt			sys_getsockopt
 119	common	getcwd			sys_getcwd
-120	common	readv			sys_readv			compat_sys_readv
-121	common	writev			sys_writev			compat_sys_writev
+120	common	readv			sys_readv
+121	common	writev			sys_writev
 122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
 123	32	fchown			sys_fchown16
 123	64	fchown			sys_fchown
@@ -201,7 +201,7 @@
 164	64	utrap_install		sys_utrap_install
 165	common	quotactl		sys_quotactl
 166	common	set_tid_address		sys_set_tid_address
-167	common	mount			sys_mount			compat_sys_mount
+167	common	mount			sys_mount
 168	common	ustat			sys_ustat			compat_sys_ustat
 169	common	setxattr		sys_setxattr
 170	common	lsetxattr		sys_lsetxattr
@@ -406,8 +406,8 @@
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 337	common	setns			sys_setns
-338	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-339	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+338	common	process_vm_readv	sys_process_vm_readv
+339	common	process_vm_writev	sys_process_vm_writev
 340	32	kern_features		sys_ni_syscall			sys_kern_features
 340	64	kern_features		sys_kern_features
 341	common	kcmp			sys_kcmp
@@ -485,3 +485,10 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/sparc/target_cpu.h b/linux-user/sparc/target_cpu.h
index 1fa1011775a..1f4bed50f47 100644
--- a/linux-user/sparc/target_cpu.h
+++ b/linux-user/sparc/target_cpu.h
@@ -20,6 +20,12 @@
 #ifndef SPARC_TARGET_CPU_H
 #define SPARC_TARGET_CPU_H
 
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+# define TARGET_STACK_BIAS 2047
+#else
+# define TARGET_STACK_BIAS 0
+#endif
+
 static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp,
                                         unsigned flags)
 {
@@ -40,6 +46,7 @@ static inline void cpu_clone_regs_child(CPUSPARCState *env, target_ulong newsp,
 #endif
         /* ??? The kernel appears to copy one stack frame to the new stack. */
         /* ??? The kernel force aligns the new stack. */
+        /* Userspace provides a biased stack pointer value. */
         env->regwptr[WREG_SP] = newsp;
     }
 
@@ -77,7 +84,7 @@ static inline void cpu_set_tls(CPUSPARCState *env, target_ulong newtls)
 
 static inline abi_ulong get_sp_from_cpustate(CPUSPARCState *state)
 {
-    return state->regwptr[WREG_SP];
+    return state->regwptr[WREG_SP] + TARGET_STACK_BIAS;
 }
 
 #endif
diff --git a/linux-user/sparc/target_errno.h b/linux-user/sparc/target_errno.h
deleted file mode 100644
index 9b846899cd4..00000000000
--- a/linux-user/sparc/target_errno.h
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifndef SPARC_TARGET_ERRNO_H
-#define SPARC_TARGET_ERRNO_H
-
-/* Target errno definitions taken from asm-sparc/errno.h */
-#undef TARGET_EWOULDBLOCK
-#define TARGET_EWOULDBLOCK     TARGET_EAGAIN /* Operation would block */
-#undef TARGET_EINPROGRESS
-#define TARGET_EINPROGRESS     36 /* Operation now in progress */
-#undef TARGET_EALREADY
-#define TARGET_EALREADY        37 /* Operation already in progress */
-#undef TARGET_ENOTSOCK
-#define TARGET_ENOTSOCK        38 /* Socket operation on non-socket */
-#undef TARGET_EDESTADDRREQ
-#define TARGET_EDESTADDRREQ    39 /* Destination address required */
-#undef TARGET_EMSGSIZE
-#define TARGET_EMSGSIZE        40 /* Message too long */
-#undef TARGET_EPROTOTYPE
-#define TARGET_EPROTOTYPE      41 /* Protocol wrong type for socket */
-#undef TARGET_ENOPROTOOPT
-#define TARGET_ENOPROTOOPT     42 /* Protocol not available */
-#undef TARGET_EPROTONOSUPPORT
-#define TARGET_EPROTONOSUPPORT  43 /* Protocol not supported */
-#undef TARGET_ESOCKTNOSUPPORT
-#define TARGET_ESOCKTNOSUPPORT  44 /* Socket type not supported */
-#undef TARGET_EOPNOTSUPP
-#define TARGET_EOPNOTSUPP      45 /* Op not supported on transport endpoint */
-#undef TARGET_EPFNOSUPPORT
-#define TARGET_EPFNOSUPPORT    46 /* Protocol family not supported */
-#undef TARGET_EAFNOSUPPORT
-#define TARGET_EAFNOSUPPORT    47 /* Address family not supported by protocol */
-#undef TARGET_EADDRINUSE
-#define TARGET_EADDRINUSE      48 /* Address already in use */
-#undef TARGET_EADDRNOTAVAIL
-#define TARGET_EADDRNOTAVAIL   49 /* Cannot assign requested address */
-#undef TARGET_ENETDOWN
-#define TARGET_ENETDOWN        50 /* Network is down */
-#undef TARGET_ENETUNREACH
-#define TARGET_ENETUNREACH     51 /* Network is unreachable */
-#undef TARGET_ENETRESET
-#define TARGET_ENETRESET       52 /* Net dropped connection because of reset */
-#undef TARGET_ECONNABORTED
-#define TARGET_ECONNABORTED    53 /* Software caused connection abort */
-#undef TARGET_ECONNRESET
-#define TARGET_ECONNRESET      54 /* Connection reset by peer */
-#undef TARGET_ENOBUFS
-#define TARGET_ENOBUFS         55 /* No buffer space available */
-#undef TARGET_EISCONN
-#define TARGET_EISCONN         56 /* Transport endpoint is already connected */
-#undef TARGET_ENOTCONN
-#define TARGET_ENOTCONN        57 /* Transport endpoint is not connected */
-#undef TARGET_ESHUTDOWN
-#define TARGET_ESHUTDOWN       58 /* No send after transport endpoint shutdown*/
-#undef TARGET_ETOOMANYREFS
-#define TARGET_ETOOMANYREFS    59 /* Too many references: cannot splice */
-#undef TARGET_ETIMEDOUT
-#define TARGET_ETIMEDOUT       60 /* Connection timed out */
-#undef TARGET_ECONNREFUSED
-#define TARGET_ECONNREFUSED    61 /* Connection refused */
-#undef TARGET_ELOOP
-#define TARGET_ELOOP           62 /* Too many symbolic links encountered */
-#undef TARGET_ENAMETOOLONG
-#define TARGET_ENAMETOOLONG    63 /* File name too long */
-#undef TARGET_EHOSTDOWN
-#define TARGET_EHOSTDOWN       64 /* Host is down */
-#undef TARGET_EHOSTUNREACH
-#define TARGET_EHOSTUNREACH    65 /* No route to host */
-#undef TARGET_ENOTEMPTY
-#define TARGET_ENOTEMPTY       66 /* Directory not empty */
-#undef TARGET_EPROCLIM
-#define TARGET_EPROCLIM        67 /* SUNOS: Too many processes */
-#undef TARGET_EUSERS
-#define TARGET_EUSERS          68 /* Too many users */
-#undef TARGET_EDQUOT
-#define TARGET_EDQUOT          69 /* Quota exceeded */
-#undef TARGET_ESTALE
-#define TARGET_ESTALE          70 /* Stale file handle */
-#undef TARGET_EREMOTE
-#define TARGET_EREMOTE         71 /* Object is remote */
-#undef TARGET_ENOSTR
-#define TARGET_ENOSTR          72 /* Device not a stream */
-#undef TARGET_ETIME
-#define TARGET_ETIME           73 /* Timer expired */
-#undef TARGET_ENOSR
-#define TARGET_ENOSR           74 /* Out of streams resources */
-#undef TARGET_ENOMSG
-#define TARGET_ENOMSG          75 /* No message of desired type */
-#undef TARGET_EBADMSG
-#define TARGET_EBADMSG         76 /* Not a data message */
-#undef TARGET_EIDRM
-#define TARGET_EIDRM           77 /* Identifier removed */
-#undef TARGET_EDEADLK
-#define TARGET_EDEADLK         78 /* Resource deadlock would occur */
-#undef TARGET_ENOLCK
-#define TARGET_ENOLCK          79 /* No record locks available */
-#undef TARGET_ENONET
-#define TARGET_ENONET          80 /* Machine is not on the network */
-#undef TARGET_ERREMOTE
-#define TARGET_ERREMOTE        81 /* SunOS: Too many lvls of remote in path */
-#undef TARGET_ENOLINK
-#define TARGET_ENOLINK         82 /* Link has been severed */
-#undef TARGET_EADV
-#define TARGET_EADV            83 /* Advertise error */
-#undef TARGET_ESRMNT
-#define TARGET_ESRMNT          84 /* Srmount error */
-#undef TARGET_ECOMM
-#define TARGET_ECOMM           85 /* Communication error on send */
-#undef TARGET_EPROTO
-#define TARGET_EPROTO          86 /* Protocol error */
-#undef TARGET_EMULTIHOP
-#define TARGET_EMULTIHOP       87 /* Multihop attempted */
-#undef TARGET_EDOTDOT
-#define TARGET_EDOTDOT         88 /* RFS specific error */
-#undef TARGET_EREMCHG
-#define TARGET_EREMCHG         89 /* Remote address changed */
-#undef TARGET_ENOSYS
-#define TARGET_ENOSYS          90 /* Function not implemented */
-#undef TARGET_ESTRPIPE
-#define TARGET_ESTRPIPE        91 /* Streams pipe error */
-#undef TARGET_EOVERFLOW
-#define TARGET_EOVERFLOW       92 /* Value too large for defined data type */
-#undef TARGET_EBADFD
-#define TARGET_EBADFD          93 /* File descriptor in bad state */
-#undef TARGET_ECHRNG
-#define TARGET_ECHRNG          94 /* Channel number out of range */
-#undef TARGET_EL2NSYNC
-#define TARGET_EL2NSYNC        95 /* Level 2 not synchronized */
-#undef TARGET_EL3HLT
-#define TARGET_EL3HLT          96 /* Level 3 halted */
-#undef TARGET_EL3RST
-#define TARGET_EL3RST          97 /* Level 3 reset */
-#undef TARGET_ELNRNG
-#define TARGET_ELNRNG          98 /* Link number out of range */
-#undef TARGET_EUNATCH
-#define TARGET_EUNATCH         99 /* Protocol driver not attached */
-#undef TARGET_ENOCSI
-#define TARGET_ENOCSI          100 /* No CSI structure available */
-#undef TARGET_EL2HLT
-#define TARGET_EL2HLT          101 /* Level 2 halted */
-#undef TARGET_EBADE
-#define TARGET_EBADE           102 /* Invalid exchange */
-#undef TARGET_EBADR
-#define TARGET_EBADR           103 /* Invalid request descriptor */
-#undef TARGET_EXFULL
-#define TARGET_EXFULL          104 /* Exchange full */
-#undef TARGET_ENOANO
-#define TARGET_ENOANO          105 /* No anode */
-#undef TARGET_EBADRQC
-#define TARGET_EBADRQC         106 /* Invalid request code */
-#undef TARGET_EBADSLT
-#define TARGET_EBADSLT         107 /* Invalid slot */
-#undef TARGET_EDEADLOCK
-#define TARGET_EDEADLOCK       108 /* File locking deadlock error */
-#undef TARGET_EBFONT
-#define TARGET_EBFONT          109 /* Bad font file format */
-#undef TARGET_ELIBEXEC
-#define TARGET_ELIBEXEC        110 /* Cannot exec a shared library directly */
-#undef TARGET_ENODATA
-#define TARGET_ENODATA         111 /* No data available */
-#undef TARGET_ELIBBAD
-#define TARGET_ELIBBAD         112 /* Accessing a corrupted shared library */
-#undef TARGET_ENOPKG
-#define TARGET_ENOPKG          113 /* Package not installed */
-#undef TARGET_ELIBACC
-#define TARGET_ELIBACC         114 /* Can not access a needed shared library */
-#undef TARGET_ENOTUNIQ
-#define TARGET_ENOTUNIQ        115 /* Name not unique on network */
-#undef TARGET_ERESTART
-#define TARGET_ERESTART        116 /* Interrupted syscall should be restarted */
-#undef TARGET_EUCLEAN
-#define TARGET_EUCLEAN         117 /* Structure needs cleaning */
-#undef TARGET_ENOTNAM
-#define TARGET_ENOTNAM         118 /* Not a XENIX named type file */
-#undef TARGET_ENAVAIL
-#define TARGET_ENAVAIL         119 /* No XENIX semaphores available */
-#undef TARGET_EISNAM
-#define TARGET_EISNAM          120 /* Is a named type file */
-#undef TARGET_EREMOTEIO
-#define TARGET_EREMOTEIO       121 /* Remote I/O error */
-#undef TARGET_EILSEQ
-#define TARGET_EILSEQ          122 /* Illegal byte sequence */
-#undef TARGET_ELIBMAX
-#define TARGET_ELIBMAX         123 /* Atmpt to link in too many shared libs */
-#undef TARGET_ELIBSCN
-#define TARGET_ELIBSCN         124 /* .lib section in a.out corrupted */
-#undef TARGET_ENOMEDIUM
-#define TARGET_ENOMEDIUM       125 /* No medium found */
-#undef TARGET_EMEDIUMTYPE
-#define TARGET_EMEDIUMTYPE     126 /* Wrong medium type */
-#undef TARGET_ECANCELED
-#define TARGET_ECANCELED       127 /* Operation Cancelled */
-#undef TARGET_ENOKEY
-#define TARGET_ENOKEY          128 /* Required key not available */
-#undef TARGET_EKEYEXPIRED
-#define TARGET_EKEYEXPIRED     129 /* Key has expired */
-#undef TARGET_EKEYREVOKED
-#define TARGET_EKEYREVOKED     130 /* Key has been revoked */
-#undef TARGET_EKEYREJECTED
-#define TARGET_EKEYREJECTED    131 /* Key was rejected by service */
-#undef TARGET_EOWNERDEAD
-#define TARGET_EOWNERDEAD      132 /* Owner died */
-#undef TARGET_ENOTRECOVERABLE
-#define TARGET_ENOTRECOVERABLE  133 /* State not recoverable */
-#undef TARGET_ERFKILL
-#define TARGET_ERFKILL         134 /* Operation not possible due to RF-kill */
-#undef TARGET_EHWPOISON
-#define TARGET_EHWPOISON       135 /* Memory page has hardware error */
-#endif
diff --git a/linux-user/sparc/target_errno_defs.h b/linux-user/sparc/target_errno_defs.h
new file mode 100644
index 00000000000..de4f1ffb0ac
--- /dev/null
+++ b/linux-user/sparc/target_errno_defs.h
@@ -0,0 +1,212 @@
+#ifndef SPARC_TARGET_ERRNO_DEFS_H
+#define SPARC_TARGET_ERRNO_DEFS_H
+
+#include "../generic/target_errno_defs.h"
+
+/*
+ * Generic target errno overridden with definitions taken
+ * from asm-sparc/errno.h
+ */
+#undef TARGET_EWOULDBLOCK
+#define TARGET_EWOULDBLOCK     TARGET_EAGAIN /* Operation would block */
+#undef TARGET_EINPROGRESS
+#define TARGET_EINPROGRESS     36 /* Operation now in progress */
+#undef TARGET_EALREADY
+#define TARGET_EALREADY        37 /* Operation already in progress */
+#undef TARGET_ENOTSOCK
+#define TARGET_ENOTSOCK        38 /* Socket operation on non-socket */
+#undef TARGET_EDESTADDRREQ
+#define TARGET_EDESTADDRREQ    39 /* Destination address required */
+#undef TARGET_EMSGSIZE
+#define TARGET_EMSGSIZE        40 /* Message too long */
+#undef TARGET_EPROTOTYPE
+#define TARGET_EPROTOTYPE      41 /* Protocol wrong type for socket */
+#undef TARGET_ENOPROTOOPT
+#define TARGET_ENOPROTOOPT     42 /* Protocol not available */
+#undef TARGET_EPROTONOSUPPORT
+#define TARGET_EPROTONOSUPPORT  43 /* Protocol not supported */
+#undef TARGET_ESOCKTNOSUPPORT
+#define TARGET_ESOCKTNOSUPPORT  44 /* Socket type not supported */
+#undef TARGET_EOPNOTSUPP
+#define TARGET_EOPNOTSUPP      45 /* Op not supported on transport endpoint */
+#undef TARGET_EPFNOSUPPORT
+#define TARGET_EPFNOSUPPORT    46 /* Protocol family not supported */
+#undef TARGET_EAFNOSUPPORT
+#define TARGET_EAFNOSUPPORT    47 /* Address family not supported by protocol */
+#undef TARGET_EADDRINUSE
+#define TARGET_EADDRINUSE      48 /* Address already in use */
+#undef TARGET_EADDRNOTAVAIL
+#define TARGET_EADDRNOTAVAIL   49 /* Cannot assign requested address */
+#undef TARGET_ENETDOWN
+#define TARGET_ENETDOWN        50 /* Network is down */
+#undef TARGET_ENETUNREACH
+#define TARGET_ENETUNREACH     51 /* Network is unreachable */
+#undef TARGET_ENETRESET
+#define TARGET_ENETRESET       52 /* Net dropped connection because of reset */
+#undef TARGET_ECONNABORTED
+#define TARGET_ECONNABORTED    53 /* Software caused connection abort */
+#undef TARGET_ECONNRESET
+#define TARGET_ECONNRESET      54 /* Connection reset by peer */
+#undef TARGET_ENOBUFS
+#define TARGET_ENOBUFS         55 /* No buffer space available */
+#undef TARGET_EISCONN
+#define TARGET_EISCONN         56 /* Transport endpoint is already connected */
+#undef TARGET_ENOTCONN
+#define TARGET_ENOTCONN        57 /* Transport endpoint is not connected */
+#undef TARGET_ESHUTDOWN
+#define TARGET_ESHUTDOWN       58 /* No send after transport endpoint shutdown*/
+#undef TARGET_ETOOMANYREFS
+#define TARGET_ETOOMANYREFS    59 /* Too many references: cannot splice */
+#undef TARGET_ETIMEDOUT
+#define TARGET_ETIMEDOUT       60 /* Connection timed out */
+#undef TARGET_ECONNREFUSED
+#define TARGET_ECONNREFUSED    61 /* Connection refused */
+#undef TARGET_ELOOP
+#define TARGET_ELOOP           62 /* Too many symbolic links encountered */
+#undef TARGET_ENAMETOOLONG
+#define TARGET_ENAMETOOLONG    63 /* File name too long */
+#undef TARGET_EHOSTDOWN
+#define TARGET_EHOSTDOWN       64 /* Host is down */
+#undef TARGET_EHOSTUNREACH
+#define TARGET_EHOSTUNREACH    65 /* No route to host */
+#undef TARGET_ENOTEMPTY
+#define TARGET_ENOTEMPTY       66 /* Directory not empty */
+#undef TARGET_EPROCLIM
+#define TARGET_EPROCLIM        67 /* SUNOS: Too many processes */
+#undef TARGET_EUSERS
+#define TARGET_EUSERS          68 /* Too many users */
+#undef TARGET_EDQUOT
+#define TARGET_EDQUOT          69 /* Quota exceeded */
+#undef TARGET_ESTALE
+#define TARGET_ESTALE          70 /* Stale file handle */
+#undef TARGET_EREMOTE
+#define TARGET_EREMOTE         71 /* Object is remote */
+#undef TARGET_ENOSTR
+#define TARGET_ENOSTR          72 /* Device not a stream */
+#undef TARGET_ETIME
+#define TARGET_ETIME           73 /* Timer expired */
+#undef TARGET_ENOSR
+#define TARGET_ENOSR           74 /* Out of streams resources */
+#undef TARGET_ENOMSG
+#define TARGET_ENOMSG          75 /* No message of desired type */
+#undef TARGET_EBADMSG
+#define TARGET_EBADMSG         76 /* Not a data message */
+#undef TARGET_EIDRM
+#define TARGET_EIDRM           77 /* Identifier removed */
+#undef TARGET_EDEADLK
+#define TARGET_EDEADLK         78 /* Resource deadlock would occur */
+#undef TARGET_ENOLCK
+#define TARGET_ENOLCK          79 /* No record locks available */
+#undef TARGET_ENONET
+#define TARGET_ENONET          80 /* Machine is not on the network */
+#undef TARGET_ERREMOTE
+#define TARGET_ERREMOTE        81 /* SunOS: Too many lvls of remote in path */
+#undef TARGET_ENOLINK
+#define TARGET_ENOLINK         82 /* Link has been severed */
+#undef TARGET_EADV
+#define TARGET_EADV            83 /* Advertise error */
+#undef TARGET_ESRMNT
+#define TARGET_ESRMNT          84 /* Srmount error */
+#undef TARGET_ECOMM
+#define TARGET_ECOMM           85 /* Communication error on send */
+#undef TARGET_EPROTO
+#define TARGET_EPROTO          86 /* Protocol error */
+#undef TARGET_EMULTIHOP
+#define TARGET_EMULTIHOP       87 /* Multihop attempted */
+#undef TARGET_EDOTDOT
+#define TARGET_EDOTDOT         88 /* RFS specific error */
+#undef TARGET_EREMCHG
+#define TARGET_EREMCHG         89 /* Remote address changed */
+#undef TARGET_ENOSYS
+#define TARGET_ENOSYS          90 /* Function not implemented */
+#undef TARGET_ESTRPIPE
+#define TARGET_ESTRPIPE        91 /* Streams pipe error */
+#undef TARGET_EOVERFLOW
+#define TARGET_EOVERFLOW       92 /* Value too large for defined data type */
+#undef TARGET_EBADFD
+#define TARGET_EBADFD          93 /* File descriptor in bad state */
+#undef TARGET_ECHRNG
+#define TARGET_ECHRNG          94 /* Channel number out of range */
+#undef TARGET_EL2NSYNC
+#define TARGET_EL2NSYNC        95 /* Level 2 not synchronized */
+#undef TARGET_EL3HLT
+#define TARGET_EL3HLT          96 /* Level 3 halted */
+#undef TARGET_EL3RST
+#define TARGET_EL3RST          97 /* Level 3 reset */
+#undef TARGET_ELNRNG
+#define TARGET_ELNRNG          98 /* Link number out of range */
+#undef TARGET_EUNATCH
+#define TARGET_EUNATCH         99 /* Protocol driver not attached */
+#undef TARGET_ENOCSI
+#define TARGET_ENOCSI          100 /* No CSI structure available */
+#undef TARGET_EL2HLT
+#define TARGET_EL2HLT          101 /* Level 2 halted */
+#undef TARGET_EBADE
+#define TARGET_EBADE           102 /* Invalid exchange */
+#undef TARGET_EBADR
+#define TARGET_EBADR           103 /* Invalid request descriptor */
+#undef TARGET_EXFULL
+#define TARGET_EXFULL          104 /* Exchange full */
+#undef TARGET_ENOANO
+#define TARGET_ENOANO          105 /* No anode */
+#undef TARGET_EBADRQC
+#define TARGET_EBADRQC         106 /* Invalid request code */
+#undef TARGET_EBADSLT
+#define TARGET_EBADSLT         107 /* Invalid slot */
+#undef TARGET_EDEADLOCK
+#define TARGET_EDEADLOCK       108 /* File locking deadlock error */
+#undef TARGET_EBFONT
+#define TARGET_EBFONT          109 /* Bad font file format */
+#undef TARGET_ELIBEXEC
+#define TARGET_ELIBEXEC        110 /* Cannot exec a shared library directly */
+#undef TARGET_ENODATA
+#define TARGET_ENODATA         111 /* No data available */
+#undef TARGET_ELIBBAD
+#define TARGET_ELIBBAD         112 /* Accessing a corrupted shared library */
+#undef TARGET_ENOPKG
+#define TARGET_ENOPKG          113 /* Package not installed */
+#undef TARGET_ELIBACC
+#define TARGET_ELIBACC         114 /* Can not access a needed shared library */
+#undef TARGET_ENOTUNIQ
+#define TARGET_ENOTUNIQ        115 /* Name not unique on network */
+#undef TARGET_ERESTART
+#define TARGET_ERESTART        116 /* Interrupted syscall should be restarted */
+#undef TARGET_EUCLEAN
+#define TARGET_EUCLEAN         117 /* Structure needs cleaning */
+#undef TARGET_ENOTNAM
+#define TARGET_ENOTNAM         118 /* Not a XENIX named type file */
+#undef TARGET_ENAVAIL
+#define TARGET_ENAVAIL         119 /* No XENIX semaphores available */
+#undef TARGET_EISNAM
+#define TARGET_EISNAM          120 /* Is a named type file */
+#undef TARGET_EREMOTEIO
+#define TARGET_EREMOTEIO       121 /* Remote I/O error */
+#undef TARGET_EILSEQ
+#define TARGET_EILSEQ          122 /* Illegal byte sequence */
+#undef TARGET_ELIBMAX
+#define TARGET_ELIBMAX         123 /* Atmpt to link in too many shared libs */
+#undef TARGET_ELIBSCN
+#define TARGET_ELIBSCN         124 /* .lib section in a.out corrupted */
+#undef TARGET_ENOMEDIUM
+#define TARGET_ENOMEDIUM       125 /* No medium found */
+#undef TARGET_EMEDIUMTYPE
+#define TARGET_EMEDIUMTYPE     126 /* Wrong medium type */
+#undef TARGET_ECANCELED
+#define TARGET_ECANCELED       127 /* Operation Cancelled */
+#undef TARGET_ENOKEY
+#define TARGET_ENOKEY          128 /* Required key not available */
+#undef TARGET_EKEYEXPIRED
+#define TARGET_EKEYEXPIRED     129 /* Key has expired */
+#undef TARGET_EKEYREVOKED
+#define TARGET_EKEYREVOKED     130 /* Key has been revoked */
+#undef TARGET_EKEYREJECTED
+#define TARGET_EKEYREJECTED    131 /* Key was rejected by service */
+#undef TARGET_EOWNERDEAD
+#define TARGET_EOWNERDEAD      132 /* Owner died */
+#undef TARGET_ENOTRECOVERABLE
+#define TARGET_ENOTRECOVERABLE  133 /* State not recoverable */
+#undef TARGET_ERFKILL
+#define TARGET_ERFKILL         134 /* Operation not possible due to RF-kill */
+#undef TARGET_EHWPOISON
+#define TARGET_EHWPOISON       135 /* Memory page has hardware error */
+#endif
diff --git a/linux-user/sparc/target_signal.h b/linux-user/sparc/target_signal.h
index 911a3f5af55..e661ddd6ab3 100644
--- a/linux-user/sparc/target_signal.h
+++ b/linux-user/sparc/target_signal.h
@@ -67,7 +67,13 @@ typedef struct target_sigaltstack {
 #define TARGET_MINSIGSTKSZ	4096
 #define TARGET_SIGSTKSZ		16384
 
+#ifdef TARGET_ABI32
 #define TARGET_ARCH_HAS_SETUP_FRAME
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+#else
+/* For sparc64, use of KA_RESTORER is mandatory. */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+#endif
 
 /* bit-flags */
 #define TARGET_SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
diff --git a/linux-user/sparc/target_structs.h b/linux-user/sparc/target_structs.h
index 99535407595..beeace8fb23 100644
--- a/linux-user/sparc/target_structs.h
+++ b/linux-user/sparc/target_structs.h
@@ -26,13 +26,10 @@ struct target_ipc_perm {
     abi_uint cuid;                      /* Creator's user ID.  */
     abi_uint cgid;                      /* Creator's group ID.  */
 #if TARGET_ABI_BITS == 32
-    abi_ushort __pad1;
+    abi_ushort __pad0;
+#endif
     abi_ushort mode;                    /* Read/write permission.  */
-    abi_ushort __pad2;
-#else
-    abi_ushort mode;
     abi_ushort __pad1;
-#endif
     abi_ushort __seq;                   /* Sequence number.  */
     uint64_t __unused1;
     uint64_t __unused2;
@@ -40,22 +37,17 @@ struct target_ipc_perm {
 
 struct target_shmid_ds {
     struct target_ipc_perm shm_perm;    /* operation permission struct */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad1;
-#endif
-    abi_ulong shm_atime;                /* time of last shmat() */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad2;
-#endif
-    abi_ulong shm_dtime;                /* time of last shmdt() */
-#if TARGET_ABI_BITS == 32
-    abi_uint __pad3;
-#endif
-    abi_ulong shm_ctime;                /* time of last change by shmctl() */
-    abi_long shm_segsz;                 /* size of segment in bytes */
-    abi_ulong shm_cpid;                 /* pid of creator */
-    abi_ulong shm_lpid;                 /* pid of last shmop */
-    abi_long shm_nattch;                /* number of current attaches */
+    /*
+     * Note that sparc32 splits these into hi/lo parts.
+     * For simplicity in qemu, always use a 64-bit type.
+     */
+    int64_t  shm_atime;                 /* last attach time */
+    int64_t  shm_dtime;                 /* last detach time */
+    int64_t  shm_ctime;                 /* last change time */
+    abi_ulong shm_segsz;                /* size of segment in bytes */
+    abi_int shm_cpid;                   /* pid of creator */
+    abi_int shm_lpid;                   /* pid of last shmop */
+    abi_ulong shm_nattch;               /* number of current attaches */
     abi_ulong __unused1;
     abi_ulong __unused2;
 };
diff --git a/linux-user/sparc/target_syscall.h b/linux-user/sparc/target_syscall.h
index d8ea04ea837..087b39d39c4 100644
--- a/linux-user/sparc/target_syscall.h
+++ b/linux-user/sparc/target_syscall.h
@@ -1,20 +1,34 @@
 #ifndef SPARC_TARGET_SYSCALL_H
 #define SPARC_TARGET_SYSCALL_H
 
-#include "target_errno.h"
-
+#if defined(TARGET_SPARC64) && !defined(TARGET_ABI32)
+struct target_pt_regs {
+    abi_ulong u_regs[16];
+    abi_ulong tstate;
+    abi_ulong pc;
+    abi_ulong npc;
+    uint32_t y;
+    uint32_t magic;
+};
+#else
 struct target_pt_regs {
-	abi_ulong psr;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong u_regs[16];
+    abi_ulong psr;
+    abi_ulong pc;
+    abi_ulong npc;
+    abi_ulong y;
+    abi_ulong u_regs[16];
 };
+#endif
 
-#define UNAME_MACHINE "sparc"
+#ifdef TARGET_SPARC64
+# define UNAME_MACHINE "sparc64"
+#else
+# define UNAME_MACHINE "sparc"
+#endif
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
-/* SPARC kernels don't define this in their Kconfig, but they have the
+/*
+ * SPARC kernels don't define this in their Kconfig, but they have the
  * same ABI as if they did, implemented by sparc-specific code which fishes
  * directly in the u_regs() struct for half the parameters in sparc_do_fork()
  * and copy_thread().
@@ -25,20 +39,24 @@ struct target_pt_regs {
 #define TARGET_MCL_FUTURE  0x4000
 #define TARGET_MCL_ONFAULT 0x8000
 
-/* For SPARC SHMLBA is determined at runtime in the kernel, and
- * libc has to runtime-detect it using the hwcaps (see glibc
- * sysdeps/unix/sysv/linux/sparc/getshmlba; we follow the same
- * logic here, though we know we're not the sparc v9 64-bit case).
+/*
+ * For SPARC SHMLBA is determined at runtime in the kernel, and
+ * libc has to runtime-detect it using the hwcaps.
+ * See glibc sysdeps/unix/sysv/linux/sparc/getshmlba.
  */
 #define TARGET_FORCE_SHMLBA
 
 static inline abi_ulong target_shmlba(CPUSPARCState *env)
 {
+#ifdef TARGET_SPARC64
+    return MAX(TARGET_PAGE_SIZE, 16 * 1024);
+#else
     if (!(env->def.features & CPU_FEATURE_FLUSH)) {
         return 64 * 1024;
     } else {
         return 256 * 1024;
     }
+#endif
 }
 
 #endif /* SPARC_TARGET_SYSCALL_H */
diff --git a/linux-user/sparc64/cpu_loop.c b/linux-user/sparc64/cpu_loop.c
deleted file mode 100644
index 4fd44e1b1eb..00000000000
--- a/linux-user/sparc64/cpu_loop.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  qemu user cpu loop
- *
- *  Copyright (c) 2003-2008 Fabrice Bellard
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "../sparc/cpu_loop.c"
diff --git a/linux-user/sparc64/meson.build b/linux-user/sparc64/meson.build
deleted file mode 100644
index 9527a40ed44..00000000000
--- a/linux-user/sparc64/meson.build
+++ /dev/null
@@ -1,5 +0,0 @@
-syscall_nr_generators += {
-  'sparc64': generator(sh,
-                       arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ],
-                       output: '@BASENAME@_nr.h')
-}
diff --git a/linux-user/sparc64/signal.c b/linux-user/sparc64/signal.c
deleted file mode 100644
index 170ebac232c..00000000000
--- a/linux-user/sparc64/signal.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Emulation of Linux signals
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "../sparc/signal.c"
diff --git a/linux-user/sparc64/sockbits.h b/linux-user/sparc64/sockbits.h
deleted file mode 100644
index 658899e4d36..00000000000
--- a/linux-user/sparc64/sockbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/sockbits.h"
diff --git a/linux-user/sparc64/syscall.tbl b/linux-user/sparc64/syscall.tbl
deleted file mode 100644
index 4af114e84f2..00000000000
--- a/linux-user/sparc64/syscall.tbl
+++ /dev/null
@@ -1,487 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-#
-# system call numbers and entry vectors for sparc
-#
-# The format is:
-# <number> <abi> <name> <entry point> <compat entry point>
-#
-# The <abi> can be common, 64, or 32 for this file.
-#
-0	common	restart_syscall		sys_restart_syscall
-1	32	exit			sys_exit			sparc_exit
-1	64	exit			sparc_exit
-2	common	fork			sys_fork
-3	common	read			sys_read
-4	common	write			sys_write
-5	common	open			sys_open			compat_sys_open
-6	common	close			sys_close
-7	common	wait4			sys_wait4			compat_sys_wait4
-8	common	creat			sys_creat
-9	common	link			sys_link
-10	common	unlink			sys_unlink
-11	32	execv			sunos_execv
-11	64	execv			sys_nis_syscall
-12	common	chdir			sys_chdir
-13	32    	chown			sys_chown16
-13	64	chown			sys_chown
-14	common	mknod			sys_mknod
-15	common	chmod			sys_chmod
-16	32	lchown			sys_lchown16
-16	64	lchown			sys_lchown
-17	common	brk			sys_brk
-18	common	perfctr			sys_nis_syscall
-19	common	lseek			sys_lseek			compat_sys_lseek
-20	common	getpid			sys_getpid
-21	common	capget			sys_capget
-22	common	capset			sys_capset
-23	32    	setuid			sys_setuid16
-23	64    	setuid			sys_setuid
-24	32	getuid			sys_getuid16
-24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
-26	common	ptrace			sys_ptrace			compat_sys_ptrace
-27	common	alarm			sys_alarm
-28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
-29	32    	pause			sys_pause
-29	64    	pause			sys_nis_syscall
-30	32	utime			sys_utime32
-30	64	utime			sys_utime
-31	32    	lchown32		sys_lchown
-32	32    	fchown32		sys_fchown
-33	common	access			sys_access
-34	common	nice			sys_nice
-35	32    	chown32			sys_chown
-36	common	sync			sys_sync
-37	common	kill			sys_kill
-38	common	stat			sys_newstat			compat_sys_newstat
-39	32	sendfile		sys_sendfile			compat_sys_sendfile
-39	64	sendfile		sys_sendfile64
-40	common	lstat			sys_newlstat			compat_sys_newlstat
-41	common	dup			sys_dup
-42	common	pipe			sys_sparc_pipe
-43	common	times			sys_times			compat_sys_times
-44	32    	getuid32		sys_getuid
-45	common	umount2			sys_umount
-46	32	setgid			sys_setgid16
-46	64	setgid			sys_setgid
-47	32	getgid			sys_getgid16
-47	64	getgid			sys_getgid
-48	common	signal			sys_signal
-49	32	geteuid			sys_geteuid16
-49	64	geteuid			sys_geteuid
-50	32	getegid			sys_getegid16
-50	64	getegid			sys_getegid
-51	common	acct			sys_acct
-52	64	memory_ordering		sys_memory_ordering
-53	32	getgid32		sys_getgid
-54	common	ioctl			sys_ioctl			compat_sys_ioctl
-55	common	reboot			sys_reboot
-56	32    	mmap2			sys_mmap2			sys32_mmap2
-57	common	symlink			sys_symlink
-58	common	readlink		sys_readlink
-59	32	execve			sys_execve			sys32_execve
-59	64	execve			sys64_execve
-60	common	umask			sys_umask
-61	common	chroot			sys_chroot
-62	common	fstat			sys_newfstat			compat_sys_newfstat
-63	common	fstat64			sys_fstat64			compat_sys_fstat64
-64	common	getpagesize		sys_getpagesize
-65	common	msync			sys_msync
-66	common	vfork			sys_vfork
-67	common	pread64			sys_pread64			compat_sys_pread64
-68	common	pwrite64		sys_pwrite64			compat_sys_pwrite64
-69	32    	geteuid32		sys_geteuid
-70	32	getegid32		sys_getegid
-71	common	mmap			sys_mmap
-72	32	setreuid32		sys_setreuid
-73	32	munmap			sys_munmap
-73	64	munmap			sys_64_munmap
-74	common	mprotect		sys_mprotect
-75	common	madvise			sys_madvise
-76	common	vhangup			sys_vhangup
-77	32	truncate64		sys_truncate64			compat_sys_truncate64
-78	common	mincore			sys_mincore
-79	32	getgroups		sys_getgroups16
-79	64	getgroups		sys_getgroups
-80	32	setgroups		sys_setgroups16
-80	64	setgroups		sys_setgroups
-81	common	getpgrp			sys_getpgrp
-82	32	setgroups32		sys_setgroups
-83	common	setitimer		sys_setitimer			compat_sys_setitimer
-84	32	ftruncate64		sys_ftruncate64			compat_sys_ftruncate64
-85	common	swapon			sys_swapon
-86	common	getitimer		sys_getitimer			compat_sys_getitimer
-87	32	setuid32		sys_setuid
-88	common	sethostname		sys_sethostname
-89	32	setgid32		sys_setgid
-90	common	dup2			sys_dup2
-91	32	setfsuid32		sys_setfsuid
-92	common	fcntl			sys_fcntl			compat_sys_fcntl
-93	common	select			sys_select
-94	32	setfsgid32		sys_setfsgid
-95	common	fsync			sys_fsync
-96	common	setpriority		sys_setpriority
-97	common	socket			sys_socket
-98	common	connect			sys_connect
-99	common	accept			sys_accept
-100	common	getpriority		sys_getpriority
-101	common	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
-102	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
-103	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
-104	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-105	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
-105	64	rt_sigtimedwait		sys_rt_sigtimedwait
-106	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
-107	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
-108	32	setresuid32		sys_setresuid
-108	64	setresuid		sys_setresuid
-109	32	getresuid32		sys_getresuid
-109	64	getresuid		sys_getresuid
-110	32	setresgid32		sys_setresgid
-110	64	setresgid		sys_setresgid
-111	32	getresgid32		sys_getresgid
-111	64	getresgid		sys_getresgid
-112	32	setregid32		sys_setregid
-113	common	recvmsg			sys_recvmsg			compat_sys_recvmsg
-114	common	sendmsg			sys_sendmsg			compat_sys_sendmsg
-115	32	getgroups32		sys_getgroups
-116	common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
-117	common	getrusage		sys_getrusage			compat_sys_getrusage
-118	common	getsockopt		sys_getsockopt			sys_getsockopt
-119	common	getcwd			sys_getcwd
-120	common	readv			sys_readv			compat_sys_readv
-121	common	writev			sys_writev			compat_sys_writev
-122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
-123	32	fchown			sys_fchown16
-123	64	fchown			sys_fchown
-124	common	fchmod			sys_fchmod
-125	common	recvfrom		sys_recvfrom
-126	32	setreuid		sys_setreuid16
-126	64	setreuid		sys_setreuid
-127	32	setregid		sys_setregid16
-127	64	setregid		sys_setregid
-128	common	rename			sys_rename
-129	common	truncate		sys_truncate			compat_sys_truncate
-130	common	ftruncate		sys_ftruncate			compat_sys_ftruncate
-131	common	flock			sys_flock
-132	common	lstat64			sys_lstat64			compat_sys_lstat64
-133	common	sendto			sys_sendto
-134	common	shutdown		sys_shutdown
-135	common	socketpair		sys_socketpair
-136	common	mkdir			sys_mkdir
-137	common	rmdir			sys_rmdir
-138	32	utimes			sys_utimes_time32
-138	64	utimes			sys_utimes
-139	common	stat64			sys_stat64			compat_sys_stat64
-140	common	sendfile64		sys_sendfile64
-141	common	getpeername		sys_getpeername
-142	32	futex			sys_futex_time32
-142	64	futex			sys_futex
-143	common	gettid			sys_gettid
-144	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
-145	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
-146	common	pivot_root		sys_pivot_root
-147	common	prctl			sys_prctl
-148	common	pciconfig_read		sys_pciconfig_read
-149	common	pciconfig_write		sys_pciconfig_write
-150	common	getsockname		sys_getsockname
-151	common	inotify_init		sys_inotify_init
-152	common	inotify_add_watch	sys_inotify_add_watch
-153	common	poll			sys_poll
-154	common	getdents64		sys_getdents64
-155	32	fcntl64			sys_fcntl64			compat_sys_fcntl64
-156	common	inotify_rm_watch	sys_inotify_rm_watch
-157	common	statfs			sys_statfs			compat_sys_statfs
-158	common	fstatfs			sys_fstatfs			compat_sys_fstatfs
-159	common	umount			sys_oldumount
-160	common	sched_set_affinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-161	common	sched_get_affinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
-162	common	getdomainname		sys_getdomainname
-163	common	setdomainname		sys_setdomainname
-164	64	utrap_install		sys_utrap_install
-165	common	quotactl		sys_quotactl
-166	common	set_tid_address		sys_set_tid_address
-167	common	mount			sys_mount			compat_sys_mount
-168	common	ustat			sys_ustat			compat_sys_ustat
-169	common	setxattr		sys_setxattr
-170	common	lsetxattr		sys_lsetxattr
-171	common	fsetxattr		sys_fsetxattr
-172	common	getxattr		sys_getxattr
-173	common	lgetxattr		sys_lgetxattr
-174	common	getdents		sys_getdents			compat_sys_getdents
-175	common	setsid			sys_setsid
-176	common	fchdir			sys_fchdir
-177	common	fgetxattr		sys_fgetxattr
-178	common	listxattr		sys_listxattr
-179	common	llistxattr		sys_llistxattr
-180	common	flistxattr		sys_flistxattr
-181	common	removexattr		sys_removexattr
-182	common	lremovexattr		sys_lremovexattr
-183	32	sigpending		sys_sigpending			compat_sys_sigpending
-183	64	sigpending		sys_nis_syscall
-184	common	query_module		sys_ni_syscall
-185	common	setpgid			sys_setpgid
-186	common	fremovexattr		sys_fremovexattr
-187	common	tkill			sys_tkill
-188	32	exit_group		sys_exit_group			sparc_exit_group
-188	64	exit_group		sparc_exit_group
-189	common	uname			sys_newuname
-190	common	init_module		sys_init_module
-191	32	personality		sys_personality			sys_sparc64_personality
-191	64	personality		sys_sparc64_personality
-192	32	remap_file_pages	sys_sparc_remap_file_pages	sys_remap_file_pages
-192	64	remap_file_pages	sys_remap_file_pages
-193	common	epoll_create		sys_epoll_create
-194	common	epoll_ctl		sys_epoll_ctl
-195	common	epoll_wait		sys_epoll_wait
-196	common	ioprio_set		sys_ioprio_set
-197	common	getppid			sys_getppid
-198	32	sigaction		sys_sparc_sigaction		compat_sys_sparc_sigaction
-198	64	sigaction		sys_nis_syscall
-199	common	sgetmask		sys_sgetmask
-200	common	ssetmask		sys_ssetmask
-201	32	sigsuspend		sys_sigsuspend
-201	64	sigsuspend		sys_nis_syscall
-202	common	oldlstat		sys_newlstat			compat_sys_newlstat
-203	common	uselib			sys_uselib
-204	32	readdir			sys_old_readdir			compat_sys_old_readdir
-204	64	readdir			sys_nis_syscall
-205	common	readahead		sys_readahead			compat_sys_readahead
-206	common	socketcall		sys_socketcall			sys32_socketcall
-207	common	syslog			sys_syslog
-208	common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
-209	common	fadvise64		sys_fadvise64			compat_sys_fadvise64
-210	common	fadvise64_64		sys_fadvise64_64		compat_sys_fadvise64_64
-211	common	tgkill			sys_tgkill
-212	common	waitpid			sys_waitpid
-213	common	swapoff			sys_swapoff
-214	common	sysinfo			sys_sysinfo			compat_sys_sysinfo
-215	32	ipc			sys_ipc				compat_sys_ipc
-215	64	ipc			sys_sparc_ipc
-216	32	sigreturn		sys_sigreturn			sys32_sigreturn
-216	64	sigreturn		sys_nis_syscall
-217	common	clone			sys_clone
-218	common	ioprio_get		sys_ioprio_get
-219	32	adjtimex		sys_adjtimex_time32
-219	64	adjtimex		sys_sparc_adjtimex
-220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
-220	64	sigprocmask		sys_nis_syscall
-221	common	create_module		sys_ni_syscall
-222	common	delete_module		sys_delete_module
-223	common	get_kernel_syms		sys_ni_syscall
-224	common	getpgid			sys_getpgid
-225	common	bdflush			sys_bdflush
-226	common	sysfs			sys_sysfs
-227	common	afs_syscall		sys_nis_syscall
-228	common	setfsuid		sys_setfsuid16
-229	common	setfsgid		sys_setfsgid16
-230	common	_newselect		sys_select			compat_sys_select
-231	32	time			sys_time32
-232	common	splice			sys_splice
-233	32	stime			sys_stime32
-233	64	stime			sys_stime
-234	common	statfs64		sys_statfs64			compat_sys_statfs64
-235	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
-236	common	_llseek			sys_llseek
-237	common	mlock			sys_mlock
-238	common	munlock			sys_munlock
-239	common	mlockall		sys_mlockall
-240	common	munlockall		sys_munlockall
-241	common	sched_setparam		sys_sched_setparam
-242	common	sched_getparam		sys_sched_getparam
-243	common	sched_setscheduler	sys_sched_setscheduler
-244	common	sched_getscheduler	sys_sched_getscheduler
-245	common	sched_yield		sys_sched_yield
-246	common	sched_get_priority_max	sys_sched_get_priority_max
-247	common	sched_get_priority_min	sys_sched_get_priority_min
-248	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
-248	64	sched_rr_get_interval	sys_sched_rr_get_interval
-249	32	nanosleep		sys_nanosleep_time32
-249	64	nanosleep		sys_nanosleep
-250	32	mremap			sys_mremap
-250	64	mremap			sys_64_mremap
-251	common	_sysctl			sys_ni_syscall
-252	common	getsid			sys_getsid
-253	common	fdatasync		sys_fdatasync
-254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
-254	64	nfsservctl		sys_nis_syscall
-255	common	sync_file_range		sys_sync_file_range		compat_sys_sync_file_range
-256	32	clock_settime		sys_clock_settime32
-256	64	clock_settime		sys_clock_settime
-257	32	clock_gettime		sys_clock_gettime32
-257	64	clock_gettime		sys_clock_gettime
-258	32	clock_getres		sys_clock_getres_time32
-258	64	clock_getres		sys_clock_getres
-259	32	clock_nanosleep		sys_clock_nanosleep_time32
-259	64	clock_nanosleep		sys_clock_nanosleep
-260	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
-261	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-262	32	timer_settime		sys_timer_settime32
-262	64	timer_settime		sys_timer_settime
-263	32	timer_gettime		sys_timer_gettime32
-263	64	timer_gettime		sys_timer_gettime
-264	common	timer_getoverrun	sys_timer_getoverrun
-265	common	timer_delete		sys_timer_delete
-266	common	timer_create		sys_timer_create		compat_sys_timer_create
-# 267 was vserver
-267	common	vserver			sys_nis_syscall
-268	common	io_setup		sys_io_setup			compat_sys_io_setup
-269	common	io_destroy		sys_io_destroy
-270	common	io_submit		sys_io_submit			compat_sys_io_submit
-271	common	io_cancel		sys_io_cancel
-272	32	io_getevents		sys_io_getevents_time32
-272	64	io_getevents		sys_io_getevents
-273	common	mq_open			sys_mq_open			compat_sys_mq_open
-274	common	mq_unlink		sys_mq_unlink
-275	32	mq_timedsend		sys_mq_timedsend_time32
-275	64	mq_timedsend		sys_mq_timedsend
-276	32	mq_timedreceive		sys_mq_timedreceive_time32
-276	64	mq_timedreceive		sys_mq_timedreceive
-277	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
-278	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
-279	common	waitid			sys_waitid			compat_sys_waitid
-280	common	tee			sys_tee
-281	common	add_key			sys_add_key
-282	common	request_key		sys_request_key
-283	common	keyctl			sys_keyctl			compat_sys_keyctl
-284	common	openat			sys_openat			compat_sys_openat
-285	common	mkdirat			sys_mkdirat
-286	common	mknodat			sys_mknodat
-287	common	fchownat		sys_fchownat
-288	32	futimesat		sys_futimesat_time32
-288	64	futimesat		sys_futimesat
-289	common	fstatat64		sys_fstatat64			compat_sys_fstatat64
-290	common	unlinkat		sys_unlinkat
-291	common	renameat		sys_renameat
-292	common	linkat			sys_linkat
-293	common	symlinkat		sys_symlinkat
-294	common	readlinkat		sys_readlinkat
-295	common	fchmodat		sys_fchmodat
-296	common	faccessat		sys_faccessat
-297	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
-297	64	pselect6		sys_pselect6
-298	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
-298	64	ppoll			sys_ppoll
-299	common	unshare			sys_unshare
-300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
-301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-302	common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
-303	common	mbind			sys_mbind			compat_sys_mbind
-304	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-305	common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
-306	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-307	common	move_pages		sys_move_pages			compat_sys_move_pages
-308	common	getcpu			sys_getcpu
-309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-310	32	utimensat		sys_utimensat_time32
-310	64	utimensat		sys_utimensat
-311	common	signalfd		sys_signalfd			compat_sys_signalfd
-312	common	timerfd_create		sys_timerfd_create
-313	common	eventfd			sys_eventfd
-314	common	fallocate		sys_fallocate			compat_sys_fallocate
-315	32	timerfd_settime		sys_timerfd_settime32
-315	64	timerfd_settime		sys_timerfd_settime
-316	32	timerfd_gettime		sys_timerfd_gettime32
-316	64	timerfd_gettime		sys_timerfd_gettime
-317	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
-318	common	eventfd2		sys_eventfd2
-319	common	epoll_create1		sys_epoll_create1
-320	common	dup3			sys_dup3
-321	common	pipe2			sys_pipe2
-322	common	inotify_init1		sys_inotify_init1
-323	common	accept4			sys_accept4
-324	common	preadv			sys_preadv			compat_sys_preadv
-325	common	pwritev			sys_pwritev			compat_sys_pwritev
-326	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
-327	common	perf_event_open		sys_perf_event_open
-328	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
-328	64	recvmmsg		sys_recvmmsg
-329	common	fanotify_init		sys_fanotify_init
-330	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
-331	common	prlimit64		sys_prlimit64
-332	common	name_to_handle_at	sys_name_to_handle_at
-333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-334	32	clock_adjtime		sys_clock_adjtime32
-334	64	clock_adjtime		sys_sparc_clock_adjtime
-335	common	syncfs			sys_syncfs
-336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-337	common	setns			sys_setns
-338	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-339	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
-340	32	kern_features		sys_ni_syscall			sys_kern_features
-340	64	kern_features		sys_kern_features
-341	common	kcmp			sys_kcmp
-342	common	finit_module		sys_finit_module
-343	common	sched_setattr		sys_sched_setattr
-344	common	sched_getattr		sys_sched_getattr
-345	common	renameat2		sys_renameat2
-346	common	seccomp			sys_seccomp
-347	common	getrandom		sys_getrandom
-348	common	memfd_create		sys_memfd_create
-349	common	bpf			sys_bpf
-350	32	execveat		sys_execveat			sys32_execveat
-350	64	execveat		sys64_execveat
-351	common	membarrier		sys_membarrier
-352	common	userfaultfd		sys_userfaultfd
-353	common	bind			sys_bind
-354	common	listen			sys_listen
-355	common	setsockopt		sys_setsockopt			sys_setsockopt
-356	common	mlock2			sys_mlock2
-357	common	copy_file_range		sys_copy_file_range
-358	common	preadv2			sys_preadv2			compat_sys_preadv2
-359	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
-360	common	statx			sys_statx
-361	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
-361	64	io_pgetevents		sys_io_pgetevents
-362	common	pkey_mprotect		sys_pkey_mprotect
-363	common	pkey_alloc		sys_pkey_alloc
-364	common	pkey_free		sys_pkey_free
-365	common	rseq			sys_rseq
-# room for arch specific syscalls
-392	64	semtimedop			sys_semtimedop
-393	common	semget			sys_semget
-394	common	semctl			sys_semctl			compat_sys_semctl
-395	common	shmget			sys_shmget
-396	common	shmctl			sys_shmctl			compat_sys_shmctl
-397	common	shmat			sys_shmat			compat_sys_shmat
-398	common	shmdt			sys_shmdt
-399	common	msgget			sys_msgget
-400	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
-401	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
-402	common	msgctl			sys_msgctl			compat_sys_msgctl
-403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
-404	32	clock_settime64			sys_clock_settime		sys_clock_settime
-405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
-406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
-407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
-408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
-409	32	timer_settime64			sys_timer_settime		sys_timer_settime
-410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
-411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
-412	32	utimensat_time64		sys_utimensat			sys_utimensat
-413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
-414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
-416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
-417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
-418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
-419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
-420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
-421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
-422	32	futex_time64			sys_futex			sys_futex
-423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
-424	common	pidfd_send_signal		sys_pidfd_send_signal
-425	common	io_uring_setup			sys_io_uring_setup
-426	common	io_uring_enter			sys_io_uring_enter
-427	common	io_uring_register		sys_io_uring_register
-428	common	open_tree			sys_open_tree
-429	common	move_mount			sys_move_mount
-430	common	fsopen				sys_fsopen
-431	common	fsconfig			sys_fsconfig
-432	common	fsmount				sys_fsmount
-433	common	fspick				sys_fspick
-434	common	pidfd_open			sys_pidfd_open
-# 435 reserved for clone3
-436	common	close_range			sys_close_range
-437	common	openat2			sys_openat2
-438	common	pidfd_getfd			sys_pidfd_getfd
-439	common	faccessat2			sys_faccessat2
diff --git a/linux-user/sparc64/syscallhdr.sh b/linux-user/sparc64/syscallhdr.sh
deleted file mode 100644
index 08c7e39bb3f..00000000000
--- a/linux-user/sparc64/syscallhdr.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=LINUX_USER_SPARC64_`basename "$out" | sed \
-    -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-    -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-    printf "#ifndef %s\n" "${fileguard}"
-    printf "#define %s\n" "${fileguard}"
-    printf "\n"
-
-    nxt=0
-    while read nr abi name entry compat ; do
-        if [ -z "$offset" ]; then
-            printf "#define TARGET_NR_%s%s\t%s\n" \
-                "${prefix}" "${name}" "${nr}"
-        else
-            printf "#define TARGET_NR_%s%s\t(%s + %s)\n" \
-                "${prefix}" "${name}" "${offset}" "${nr}"
-        fi
-        nxt=$((nr+1))
-    done
-
-    printf "\n"
-    printf "#endif /* %s */" "${fileguard}"
-) > "$out"
diff --git a/linux-user/sparc64/target_cpu.h b/linux-user/sparc64/target_cpu.h
deleted file mode 100644
index b22263d2dbe..00000000000
--- a/linux-user/sparc64/target_cpu.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_cpu.h"
diff --git a/linux-user/sparc64/target_elf.h b/linux-user/sparc64/target_elf.h
deleted file mode 100644
index d6e388f1cf6..00000000000
--- a/linux-user/sparc64/target_elf.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-
-#ifndef SPARC64_TARGET_ELF_H
-#define SPARC64_TARGET_ELF_H
-static inline const char *cpu_get_model(uint32_t eflags)
-{
-    return "TI UltraSparc II";
-}
-#endif
diff --git a/linux-user/sparc64/target_fcntl.h b/linux-user/sparc64/target_fcntl.h
deleted file mode 100644
index 053c7742579..00000000000
--- a/linux-user/sparc64/target_fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_fcntl.h"
diff --git a/linux-user/sparc64/target_signal.h b/linux-user/sparc64/target_signal.h
deleted file mode 100644
index 6a7d57d0243..00000000000
--- a/linux-user/sparc64/target_signal.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../sparc/target_signal.h"
diff --git a/linux-user/sparc64/target_structs.h b/linux-user/sparc64/target_structs.h
deleted file mode 100644
index 4a8ed48df74..00000000000
--- a/linux-user/sparc64/target_structs.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * SPARC64 specific structures for linux-user
- *
- * Copyright (c) 2013 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef SPARC64_TARGET_STRUCTS_H
-#define SPARC64_TARGET_STRUCTS_H
-
-struct target_ipc_perm {
-    abi_int __key;                      /* Key.  */
-    abi_uint uid;                       /* Owner's user ID.  */
-    abi_uint gid;                       /* Owner's group ID.  */
-    abi_uint cuid;                      /* Creator's user ID.  */
-    abi_uint cgid;                      /* Creator's group ID.  */
-    abi_ushort mode;                    /* Read/write permission.  */
-    abi_ushort __pad1;
-    abi_ushort __seq;                   /* Sequence number.  */
-    abi_ushort __pad2;
-    abi_ulong __unused1;
-    abi_ulong __unused2;
-};
-
-struct target_shmid_ds {
-    struct target_ipc_perm shm_perm;    /* operation permission struct */
-    abi_long shm_segsz;                 /* size of segment in bytes */
-    abi_ulong shm_atime;                /* time of last shmat() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused1;
-#endif
-    abi_ulong shm_dtime;                /* time of last shmdt() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused2;
-#endif
-    abi_ulong shm_ctime;                /* time of last change by shmctl() */
-#if TARGET_ABI_BITS == 32
-    abi_ulong __unused3;
-#endif
-    abi_int shm_cpid;                   /* pid of creator */
-    abi_int shm_lpid;                   /* pid of last shmop */
-    abi_ulong shm_nattch;               /* number of current attaches */
-    abi_ulong __unused4;
-    abi_ulong __unused5;
-};
-
-#endif
diff --git a/linux-user/sparc64/target_syscall.h b/linux-user/sparc64/target_syscall.h
deleted file mode 100644
index 696a68b1ed4..00000000000
--- a/linux-user/sparc64/target_syscall.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef SPARC64_TARGET_SYSCALL_H
-#define SPARC64_TARGET_SYSCALL_H
-
-#include "../sparc/target_errno.h"
-
-struct target_pt_regs {
-	abi_ulong u_regs[16];
-	abi_ulong tstate;
-	abi_ulong pc;
-	abi_ulong npc;
-	abi_ulong y;
-	abi_ulong fprs;
-};
-
-#define UNAME_MACHINE "sparc64"
-#define UNAME_MINIMUM_RELEASE "2.6.32"
-
-/* SPARC kernels don't define this in their Kconfig, but they have the
- * same ABI as if they did, implemented by sparc-specific code which fishes
- * directly in the u_regs() struct for half the parameters in sparc_do_fork()
- * and copy_thread().
- */
-#define TARGET_CLONE_BACKWARDS
-#define TARGET_MINSIGSTKSZ      4096
-#define TARGET_MCL_CURRENT 0x2000
-#define TARGET_MCL_FUTURE  0x4000
-#define TARGET_MCL_ONFAULT 0x8000
-
-#define TARGET_FORCE_SHMLBA
-
-static inline abi_ulong target_shmlba(CPUSPARCState *env)
-{
-    return MAX(TARGET_PAGE_SIZE, 16 * 1024);
-}
-#endif /* SPARC64_TARGET_SYSCALL_H */
diff --git a/linux-user/sparc64/termbits.h b/linux-user/sparc64/termbits.h
deleted file mode 100644
index 1ab1e80db54..00000000000
--- a/linux-user/sparc64/termbits.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* from asm/termbits.h */
-
-#ifndef LINUX_USER_SPARC64_TERMBITS_H
-#define LINUX_USER_SPARC64_TERMBITS_H
-
-#define TARGET_NCCS 19
-
-typedef unsigned char   target_cc_t;        /* cc_t */
-typedef unsigned int    target_speed_t;     /* speed_t */
-typedef unsigned int    target_tcflag_t;    /* tcflag_t */
-
-struct target_termios {
-    target_tcflag_t c_iflag;               /* input mode flags */
-    target_tcflag_t c_oflag;               /* output mode flags */
-    target_tcflag_t c_cflag;               /* control mode flags */
-    target_tcflag_t c_lflag;               /* local mode flags */
-    target_cc_t c_line;                    /* line discipline */
-    target_cc_t c_cc[TARGET_NCCS];         /* control characters */
-};
-
-
-/* c_cc characters */
-#define TARGET_VINTR    0
-#define TARGET_VQUIT    1
-#define TARGET_VERASE   2
-#define TARGET_VKILL    3
-#define TARGET_VEOF     4
-#define TARGET_VEOL     5
-#define TARGET_VEOL2    6
-#define TARGET_VSWTC    7
-#define TARGET_VSTART   8
-#define TARGET_VSTOP    9
-
-#define TARGET_VSUSP    10
-#define TARGET_VDSUSP   11  /* SunOS POSIX nicety I do believe... */
-#define TARGET_VREPRINT 12
-#define TARGET_VDISCARD 13
-#define TARGET_VWERASE  14
-#define TARGET_VLNEXT   15
-
-/* Kernel keeps vmin/vtime separated, user apps assume vmin/vtime is
- * shared with eof/eol
- */
-#define TARGET_VMIN     TARGET_VEOF
-#define TARGET_VTIME    TARGET_VEOL
-
-/* c_iflag bits */
-#define TARGET_IGNBRK	0x00000001
-#define TARGET_BRKINT	0x00000002
-#define TARGET_IGNPAR	0x00000004
-#define TARGET_PARMRK	0x00000008
-#define TARGET_INPCK	0x00000010
-#define TARGET_ISTRIP	0x00000020
-#define TARGET_INLCR	0x00000040
-#define TARGET_IGNCR	0x00000080
-#define TARGET_ICRNL	0x00000100
-#define TARGET_IUCLC	0x00000200
-#define TARGET_IXON	0x00000400
-#define TARGET_IXANY	0x00000800
-#define TARGET_IXOFF	0x00001000
-#define TARGET_IMAXBEL	0x00002000
-#define TARGET_IUTF8	0x00004000
-
-/* c_oflag bits */
-#define TARGET_OPOST	0x00000001
-#define TARGET_OLCUC	0x00000002
-#define TARGET_ONLCR	0x00000004
-#define TARGET_OCRNL	0x00000008
-#define TARGET_ONOCR	0x00000010
-#define TARGET_ONLRET	0x00000020
-#define TARGET_OFILL	0x00000040
-#define TARGET_OFDEL	0x00000080
-#define TARGET_NLDLY	0x00000100
-#define   TARGET_NL0	0x00000000
-#define   TARGET_NL1	0x00000100
-#define TARGET_CRDLY	0x00000600
-#define   TARGET_CR0	0x00000000
-#define   TARGET_CR1	0x00000200
-#define   TARGET_CR2	0x00000400
-#define   TARGET_CR3	0x00000600
-#define TARGET_TABDLY	0x00001800
-#define   TARGET_TAB0	0x00000000
-#define   TARGET_TAB1	0x00000800
-#define   TARGET_TAB2	0x00001000
-#define   TARGET_TAB3	0x00001800
-#define   TARGET_XTABS	0x00001800
-#define TARGET_BSDLY	0x00002000
-#define   TARGET_BS0	0x00000000
-#define   TARGET_BS1	0x00002000
-#define TARGET_VTDLY	0x00004000
-#define   TARGET_VT0	0x00000000
-#define   TARGET_VT1	0x00004000
-#define TARGET_FFDLY	0x00008000
-#define   TARGET_FF0	0x00000000
-#define   TARGET_FF1	0x00008000
-#define TARGET_PAGEOUT 0x00010000  /* SUNOS specific */
-#define TARGET_WRAP    0x00020000  /* SUNOS specific */
-
-/* c_cflag bit meaning */
-#define TARGET_CBAUD	  0x0000100f
-#define  TARGET_B0	  0x00000000   /* hang up */
-#define  TARGET_B50	  0x00000001
-#define  TARGET_B75	  0x00000002
-#define  TARGET_B110	  0x00000003
-#define  TARGET_B134	  0x00000004
-#define  TARGET_B150	  0x00000005
-#define  TARGET_B200	  0x00000006
-#define  TARGET_B300	  0x00000007
-#define  TARGET_B600	  0x00000008
-#define  TARGET_B1200	  0x00000009
-#define  TARGET_B1800	  0x0000000a
-#define  TARGET_B2400	  0x0000000b
-#define  TARGET_B4800	  0x0000000c
-#define  TARGET_B9600	  0x0000000d
-#define  TARGET_B19200	  0x0000000e
-#define  TARGET_B38400	  0x0000000f
-#define TARGET_EXTA      B19200
-#define TARGET_EXTB      B38400
-#define  TARGET_CSIZE    0x00000030
-#define   TARGET_CS5	  0x00000000
-#define   TARGET_CS6	  0x00000010
-#define   TARGET_CS7	  0x00000020
-#define   TARGET_CS8	  0x00000030
-#define TARGET_CSTOPB	  0x00000040
-#define TARGET_CREAD	  0x00000080
-#define TARGET_PARENB	  0x00000100
-#define TARGET_PARODD	  0x00000200
-#define TARGET_HUPCL	  0x00000400
-#define TARGET_CLOCAL	  0x00000800
-#define TARGET_CBAUDEX   0x00001000
-/* We'll never see these speeds with the Zilogs, but for completeness... */
-#define  TARGET_B57600   0x00001001
-#define  TARGET_B115200  0x00001002
-#define  TARGET_B230400  0x00001003
-#define  TARGET_B460800  0x00001004
-/* This is what we can do with the Zilogs. */
-#define  TARGET_B76800   0x00001005
-/* This is what we can do with the SAB82532. */
-#define  TARGET_B153600  0x00001006
-#define  TARGET_B307200  0x00001007
-#define  TARGET_B614400  0x00001008
-#define  TARGET_B921600  0x00001009
-/* And these are the rest... */
-#define  TARGET_B500000  0x0000100a
-#define  TARGET_B576000  0x0000100b
-#define TARGET_B1000000  0x0000100c
-#define TARGET_B1152000  0x0000100d
-#define TARGET_B1500000  0x0000100e
-#define TARGET_B2000000  0x0000100f
-/* These have totally bogus values and nobody uses them
-   so far. Later on we'd have to use say 0x10000x and
-   adjust CBAUD constant and drivers accordingly.
-#define B2500000  0x00001010
-#define B3000000  0x00001011
-#define B3500000  0x00001012
-#define B4000000  0x00001013  */
-#define TARGET_CIBAUD	  0x100f0000  /* input baud rate (not used) */
-#define TARGET_CMSPAR	  0x40000000  /* mark or space (stick) parity */
-#define TARGET_CRTSCTS	  0x80000000  /* flow control */
-
-/* c_lflag bits */
-#define TARGET_ISIG	0x00000001
-#define TARGET_ICANON	0x00000002
-#define TARGET_XCASE	0x00000004
-#define TARGET_ECHO	0x00000008
-#define TARGET_ECHOE	0x00000010
-#define TARGET_ECHOK	0x00000020
-#define TARGET_ECHONL	0x00000040
-#define TARGET_NOFLSH	0x00000080
-#define TARGET_TOSTOP	0x00000100
-#define TARGET_ECHOCTL	0x00000200
-#define TARGET_ECHOPRT	0x00000400
-#define TARGET_ECHOKE	0x00000800
-#define TARGET_DEFECHO  0x00001000  /* SUNOS thing, what is it? */
-#define TARGET_FLUSHO	0x00002000
-#define TARGET_PENDIN	0x00004000
-#define TARGET_IEXTEN	0x00008000
-#define TARGET_EXTPROC  0x00010000
-
-/* ioctls */
-
-/* Big T */
-#define TARGET_TCGETA		TARGET_IOR('T', 1, struct target_termio)
-#define TARGET_TCSETA		TARGET_IOW('T', 2, struct target_termio)
-#define TARGET_TCSETAW		TARGET_IOW('T', 3, struct target_termio)
-#define TARGET_TCSETAF		TARGET_IOW('T', 4, struct target_termio)
-#define TARGET_TCSBRK		TARGET_IO('T', 5)
-#define TARGET_TCXONC		TARGET_IO('T', 6)
-#define TARGET_TCFLSH		TARGET_IO('T', 7)
-#define TARGET_TCGETS		TARGET_IOR('T', 8, struct target_termios)
-#define TARGET_TCSETS		TARGET_IOW('T', 9, struct target_termios)
-#define TARGET_TCSETSW		TARGET_IOW('T', 10, struct target_termios)
-#define TARGET_TCSETSF		TARGET_IOW('T', 11, struct target_termios)
-
-/* Note that all the ioctls that are not available in Linux have a
- * double underscore on the front to: a) avoid some programs to
- * thing we support some ioctls under Linux (autoconfiguration stuff)
- */
-/* Little t */
-#define TARGET_TIOCGETD	TARGET_IOR('t', 0, int)
-#define TARGET_TIOCSETD	TARGET_IOW('t', 1, int)
-//#define __TIOCHPCL        _IO('t', 2) /* SunOS Specific */
-//#define __TIOCMODG        _IOR('t', 3, int) /* SunOS Specific */
-//#define __TIOCMODS        _IOW('t', 4, int) /* SunOS Specific */
-//#define __TIOCGETP        _IOR('t', 8, struct sgttyb) /* SunOS Specific */
-//#define __TIOCSETP        _IOW('t', 9, struct sgttyb) /* SunOS Specific */
-//#define __TIOCSETN        _IOW('t', 10, struct sgttyb) /* SunOS Specific */
-#define TARGET_TIOCEXCL	TARGET_IO('t', 13)
-#define TARGET_TIOCNXCL	TARGET_IO('t', 14)
-//#define __TIOCFLUSH       _IOW('t', 16, int) /* SunOS Specific */
-//#define __TIOCSETC        _IOW('t', 17, struct tchars) /* SunOS Specific */
-//#define __TIOCGETC        _IOR('t', 18, struct tchars) /* SunOS Specific */
-//#define __TIOCTCNTL       _IOW('t', 32, int) /* SunOS Specific */
-//#define __TIOCSIGNAL      _IOW('t', 33, int) /* SunOS Specific */
-//#define __TIOCSETX        _IOW('t', 34, int) /* SunOS Specific */
-//#define __TIOCGETX        _IOR('t', 35, int) /* SunOS Specific */
-#define TARGET_TIOCCONS	TARGET_IO('t', 36)
-//#define __TIOCSSIZE     _IOW('t', 37, struct sunos_ttysize) /* SunOS Specific */
-//#define __TIOCGSIZE     _IOR('t', 38, struct sunos_ttysize) /* SunOS Specific */
-#define TARGET_TIOCGSOFTCAR	TARGET_IOR('t', 100, int)
-#define TARGET_TIOCSSOFTCAR	TARGET_IOW('t', 101, int)
-//#define __TIOCUCNTL       _IOW('t', 102, int) /* SunOS Specific */
-#define TARGET_TIOCSWINSZ	TARGET_IOW('t', 103, struct winsize)
-#define TARGET_TIOCGWINSZ	TARGET_IOR('t', 104, struct winsize)
-//#define __TIOCREMOTE      _IOW('t', 105, int) /* SunOS Specific */
-#define TARGET_TIOCMGET	TARGET_IOR('t', 106, int)
-#define TARGET_TIOCMBIC	TARGET_IOW('t', 107, int)
-#define TARGET_TIOCMBIS	TARGET_IOW('t', 108, int)
-#define TARGET_TIOCMSET	TARGET_IOW('t', 109, int)
-#define TARGET_TIOCSTART       TARGET_IO('t', 110)
-#define TARGET_TIOCSTOP        TARGET_IO('t', 111)
-#define TARGET_TIOCPKT		TARGET_IOW('t', 112, int)
-#define TARGET_TIOCNOTTY	TARGET_IO('t', 113)
-#define TARGET_TIOCSTI		TARGET_IOW('t', 114, char)
-#define TARGET_TIOCOUTQ	TARGET_IOR('t', 115, int)
-//#define __TIOCGLTC        _IOR('t', 116, struct ltchars) /* SunOS Specific */
-//#define __TIOCSLTC        _IOW('t', 117, struct ltchars) /* SunOS Specific */
-/* 118 is the non-posix setpgrp tty ioctl */
-/* 119 is the non-posix getpgrp tty ioctl */
-//#define __TIOCCDTR        TARGET_IO('t', 120) /* SunOS Specific */
-//#define __TIOCSDTR        TARGET_IO('t', 121) /* SunOS Specific */
-#define TARGET_TIOCCBRK        TARGET_IO('t', 122)
-#define TARGET_TIOCSBRK        TARGET_IO('t', 123)
-//#define __TIOCLGET        TARGET_IOW('t', 124, int) /* SunOS Specific */
-//#define __TIOCLSET        TARGET_IOW('t', 125, int) /* SunOS Specific */
-//#define __TIOCLBIC        TARGET_IOW('t', 126, int) /* SunOS Specific */
-//#define __TIOCLBIS        TARGET_IOW('t', 127, int) /* SunOS Specific */
-//#define __TIOCISPACE      TARGET_IOR('t', 128, int) /* SunOS Specific */
-//#define __TIOCISIZE       TARGET_IOR('t', 129, int) /* SunOS Specific */
-#define TARGET_TIOCSPGRP	TARGET_IOW('t', 130, int)
-#define TARGET_TIOCGPGRP	TARGET_IOR('t', 131, int)
-#define TARGET_TIOCSCTTY	TARGET_IO('t', 132)
-#define TARGET_TIOCGSID	TARGET_IOR('t', 133, int)
-/* Get minor device of a pty master's FD -- Solaris equiv is ISPTM */
-#define TARGET_TIOCGPTN	TARGET_IOR('t', 134, unsigned int) /* Get Pty Number */
-#define TARGET_TIOCSPTLCK	TARGET_IOW('t', 135, int) /* Lock/unlock PTY */
-#define TARGET_TIOCGPTPEER      TARGET_IO('t', 137) /* Safely open the slave */
-
-/* Little f */
-#define TARGET_FIOCLEX		TARGET_IO('f', 1)
-#define TARGET_FIONCLEX	TARGET_IO('f', 2)
-#define TARGET_FIOASYNC	TARGET_IOW('f', 125, int)
-#define TARGET_FIONBIO		TARGET_IOW('f', 126, int)
-#define TARGET_FIONREAD	TARGET_IOR('f', 127, int)
-#define TARGET_TIOCINQ		TARGET_FIONREAD
-
-/* SCARY Rutgers local SunOS kernel hackery, perhaps I will support it
- * someday.  This is completely bogus, I know...
- */
-//#define __TCGETSTAT       TARGET_IO('T', 200) /* Rutgers specific */
-//#define __TCSETSTAT       TARGET_IO('T', 201) /* Rutgers specific */
-
-/* Linux specific, no SunOS equivalent. */
-#define TARGET_TIOCLINUX	0x541C
-#define TARGET_TIOCGSERIAL	0x541E
-#define TARGET_TIOCSSERIAL	0x541F
-#define TARGET_TCSBRKP		0x5425
-#define TARGET_TIOCTTYGSTRUCT	0x5426
-#define TARGET_TIOCSERCONFIG	0x5453
-#define TARGET_TIOCSERGWILD	0x5454
-#define TARGET_TIOCSERSWILD	0x5455
-#define TARGET_TIOCGLCKTRMIOS	0x5456
-#define TARGET_TIOCSLCKTRMIOS	0x5457
-#define TARGET_TIOCSERGSTRUCT	0x5458 /* For debugging only */
-#define TARGET_TIOCSERGETLSR   0x5459 /* Get line status register */
-#define TARGET_TIOCSERGETMULTI 0x545A /* Get multiport config  */
-#define TARGET_TIOCSERSETMULTI 0x545B /* Set multiport config */
-#define TARGET_TIOCMIWAIT	0x545C /* Wait input */
-#define TARGET_TIOCGICOUNT	0x545D /* Read serial port inline interrupt counts */
-
-#endif
diff --git a/linux-user/strace.c b/linux-user/strace.c
index e969121b6cd..2cdbf030ba4 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+
 #include <sys/ipc.h>
 #include <sys/msg.h>
 #include <sys/sem.h>
@@ -14,6 +15,8 @@
 #include <linux/netlink.h>
 #include <sched.h>
 #include "qemu.h"
+#include "user-internals.h"
+#include "strace.h"
 
 struct syscallname {
     int nr;
@@ -1109,6 +1112,12 @@ UNUSED static struct flags clone_flags[] = {
 #if defined(CLONE_NEWNET)
     FLAG_GENERIC(CLONE_NEWNET),
 #endif
+#if defined(CLONE_NEWCGROUP)
+    FLAG_GENERIC(CLONE_NEWCGROUP),
+#endif
+#if defined(CLONE_NEWTIME)
+    FLAG_GENERIC(CLONE_NEWTIME),
+#endif
 #if defined(CLONE_IO)
     FLAG_GENERIC(CLONE_IO),
 #endif
@@ -2335,7 +2344,7 @@ print_linkat(void *cpu_env, const struct syscallname *name,
 }
 #endif
 
-#ifdef TARGET_NR__llseek
+#if defined(TARGET_NR__llseek) || defined(TARGET_NR_llseek)
 static void
 print__llseek(void *cpu_env, const struct syscallname *name,
               abi_long arg0, abi_long arg1, abi_long arg2,
@@ -2355,6 +2364,7 @@ print__llseek(void *cpu_env, const struct syscallname *name,
     qemu_log("%s", whence);
     print_syscall_epilogue(name);
 }
+#define print_llseek print__llseek
 #endif
 
 #ifdef TARGET_NR_lseek
@@ -3467,6 +3477,18 @@ print_unlinkat(void *cpu_env, const struct syscallname *name,
 }
 #endif
 
+#ifdef TARGET_NR_unshare
+static void
+print_unshare(void *cpu_env, const struct syscallname *name,
+              abi_long arg0, abi_long arg1, abi_long arg2,
+              abi_long arg3, abi_long arg4, abi_long arg5)
+{
+    print_syscall_prologue(name);
+    print_flags(clone_flags, arg0, 1);
+    print_syscall_epilogue(name);
+}
+#endif
+
 #ifdef TARGET_NR_utime
 static void
 print_utime(void *cpu_env, const struct syscallname *name,
diff --git a/linux-user/strace.h b/linux-user/strace.h
new file mode 100644
index 00000000000..1e232d07fc8
--- /dev/null
+++ b/linux-user/strace.h
@@ -0,0 +1,38 @@
+/*
+ * strace.h: prototypes for linux-user builtin strace handling
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_STRACE_H
+#define LINUX_USER_STRACE_H
+
+void print_syscall(void *cpu_env, int num,
+                   abi_long arg1, abi_long arg2, abi_long arg3,
+                   abi_long arg4, abi_long arg5, abi_long arg6);
+void print_syscall_ret(void *cpu_env, int num, abi_long ret,
+                       abi_long arg1, abi_long arg2, abi_long arg3,
+                       abi_long arg4, abi_long arg5, abi_long arg6);
+/**
+ * print_taken_signal:
+ * @target_signum: target signal being taken
+ * @tinfo: target_siginfo_t which will be passed to the guest for the signal
+ *
+ * Print strace output indicating that this signal is being taken by the guest,
+ * in a format similar to:
+ * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
+ */
+void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
+
+#endif /* LINUX_USER_STRACE_H */
diff --git a/linux-user/strace.list b/linux-user/strace.list
index 084048ab96d..278596acd13 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -511,6 +511,9 @@
 #ifdef TARGET_NR__llseek
 { TARGET_NR__llseek, "_llseek" , NULL, print__llseek, NULL },
 #endif
+#ifdef TARGET_NR_llseek
+{ TARGET_NR_llseek, "llseek" , NULL, print_llseek, NULL },
+#endif
 #ifdef TARGET_NR_lock
 { TARGET_NR_lock, "lock" , NULL, NULL, NULL },
 #endif
@@ -1573,7 +1576,7 @@
 { TARGET_NR_unlinkat, "unlinkat" , NULL, print_unlinkat, NULL },
 #endif
 #ifdef TARGET_NR_unshare
-{ TARGET_NR_unshare, "unshare" , NULL, NULL, NULL },
+{ TARGET_NR_unshare, "unshare" , NULL, print_unshare, NULL },
 #endif
 #ifdef TARGET_NR_userfaultfd
 { TARGET_NR_userfaultfd, "userfaultfd" , NULL, NULL, NULL },
@@ -1665,3 +1668,6 @@
 #ifdef TARGET_NR_statx
 { TARGET_NR_statx, "statx", NULL, print_statx, NULL },
 #endif
+#ifdef TARGET_NR_copy_file_range
+{ TARGET_NR_copy_file_range, "copy_file_range", "%s(%d,%p,%d,%p,"TARGET_ABI_FMT_lu",%u)", NULL, NULL },
+#endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 95d79ddc437..f1cfcc81048 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -127,6 +127,12 @@
 #include "uname.h"
 
 #include "qemu.h"
+#include "user-internals.h"
+#include "strace.h"
+#include "signal-common.h"
+#include "loader.h"
+#include "user-mmap.h"
+#include "safe-syscall.h"
 #include "qemu/guest-random.h"
 #include "qemu/selfmap.h"
 #include "user/syscall-trace.h"
@@ -191,8 +197,10 @@
 //#define DEBUG_ERESTARTSYS
 
 //#include <linux/msdos_fs.h>
-#define	VFAT_IOCTL_READDIR_BOTH		_IOR('r', 1, struct linux_dirent [2])
-#define	VFAT_IOCTL_READDIR_SHORT	_IOR('r', 2, struct linux_dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH \
+    _IOC(_IOC_READ, 'r', 1, (sizeof(struct linux_dirent) + 256) * 2)
+#define VFAT_IOCTL_READDIR_SHORT \
+    _IOC(_IOC_READ, 'r', 2, (sizeof(struct linux_dirent) + 256) * 2)
 
 #undef _syscall0
 #undef _syscall1
@@ -365,7 +373,7 @@ _syscall5(int, sys_statx, int, dirfd, const char *, pathname, int, flags,
 _syscall2(int, membarrier, int, cmd, int, flags)
 #endif
 
-static bitmask_transtbl fcntl_flags_tbl[] = {
+static const bitmask_transtbl fcntl_flags_tbl[] = {
   { TARGET_O_ACCMODE,   TARGET_O_WRONLY,    O_ACCMODE,   O_WRONLY,    },
   { TARGET_O_ACCMODE,   TARGET_O_RDWR,      O_ACCMODE,   O_RDWR,      },
   { TARGET_O_CREAT,     TARGET_O_CREAT,     O_CREAT,     O_CREAT,     },
@@ -507,152 +515,26 @@ static inline int next_free_host_timer(void)
 }
 #endif
 
-#define ERRNO_TABLE_SIZE 1200
-
-/* target_to_host_errno_table[] is initialized from
- * host_to_target_errno_table[] in syscall_init(). */
-static uint16_t target_to_host_errno_table[ERRNO_TABLE_SIZE] = {
-};
-
-/*
- * This list is the union of errno values overridden in asm-<arch>/errno.h
- * minus the errnos that are not actually generic to all archs.
- */
-static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = {
-    [EAGAIN]		= TARGET_EAGAIN,
-    [EIDRM]		= TARGET_EIDRM,
-    [ECHRNG]		= TARGET_ECHRNG,
-    [EL2NSYNC]		= TARGET_EL2NSYNC,
-    [EL3HLT]		= TARGET_EL3HLT,
-    [EL3RST]		= TARGET_EL3RST,
-    [ELNRNG]		= TARGET_ELNRNG,
-    [EUNATCH]		= TARGET_EUNATCH,
-    [ENOCSI]		= TARGET_ENOCSI,
-    [EL2HLT]		= TARGET_EL2HLT,
-    [EDEADLK]		= TARGET_EDEADLK,
-    [ENOLCK]		= TARGET_ENOLCK,
-    [EBADE]		= TARGET_EBADE,
-    [EBADR]		= TARGET_EBADR,
-    [EXFULL]		= TARGET_EXFULL,
-    [ENOANO]		= TARGET_ENOANO,
-    [EBADRQC]		= TARGET_EBADRQC,
-    [EBADSLT]		= TARGET_EBADSLT,
-    [EBFONT]		= TARGET_EBFONT,
-    [ENOSTR]		= TARGET_ENOSTR,
-    [ENODATA]		= TARGET_ENODATA,
-    [ETIME]		= TARGET_ETIME,
-    [ENOSR]		= TARGET_ENOSR,
-    [ENONET]		= TARGET_ENONET,
-    [ENOPKG]		= TARGET_ENOPKG,
-    [EREMOTE]		= TARGET_EREMOTE,
-    [ENOLINK]		= TARGET_ENOLINK,
-    [EADV]		= TARGET_EADV,
-    [ESRMNT]		= TARGET_ESRMNT,
-    [ECOMM]		= TARGET_ECOMM,
-    [EPROTO]		= TARGET_EPROTO,
-    [EDOTDOT]		= TARGET_EDOTDOT,
-    [EMULTIHOP]		= TARGET_EMULTIHOP,
-    [EBADMSG]		= TARGET_EBADMSG,
-    [ENAMETOOLONG]	= TARGET_ENAMETOOLONG,
-    [EOVERFLOW]		= TARGET_EOVERFLOW,
-    [ENOTUNIQ]		= TARGET_ENOTUNIQ,
-    [EBADFD]		= TARGET_EBADFD,
-    [EREMCHG]		= TARGET_EREMCHG,
-    [ELIBACC]		= TARGET_ELIBACC,
-    [ELIBBAD]		= TARGET_ELIBBAD,
-    [ELIBSCN]		= TARGET_ELIBSCN,
-    [ELIBMAX]		= TARGET_ELIBMAX,
-    [ELIBEXEC]		= TARGET_ELIBEXEC,
-    [EILSEQ]		= TARGET_EILSEQ,
-    [ENOSYS]		= TARGET_ENOSYS,
-    [ELOOP]		= TARGET_ELOOP,
-    [ERESTART]		= TARGET_ERESTART,
-    [ESTRPIPE]		= TARGET_ESTRPIPE,
-    [ENOTEMPTY]		= TARGET_ENOTEMPTY,
-    [EUSERS]		= TARGET_EUSERS,
-    [ENOTSOCK]		= TARGET_ENOTSOCK,
-    [EDESTADDRREQ]	= TARGET_EDESTADDRREQ,
-    [EMSGSIZE]		= TARGET_EMSGSIZE,
-    [EPROTOTYPE]	= TARGET_EPROTOTYPE,
-    [ENOPROTOOPT]	= TARGET_ENOPROTOOPT,
-    [EPROTONOSUPPORT]	= TARGET_EPROTONOSUPPORT,
-    [ESOCKTNOSUPPORT]	= TARGET_ESOCKTNOSUPPORT,
-    [EOPNOTSUPP]	= TARGET_EOPNOTSUPP,
-    [EPFNOSUPPORT]	= TARGET_EPFNOSUPPORT,
-    [EAFNOSUPPORT]	= TARGET_EAFNOSUPPORT,
-    [EADDRINUSE]	= TARGET_EADDRINUSE,
-    [EADDRNOTAVAIL]	= TARGET_EADDRNOTAVAIL,
-    [ENETDOWN]		= TARGET_ENETDOWN,
-    [ENETUNREACH]	= TARGET_ENETUNREACH,
-    [ENETRESET]		= TARGET_ENETRESET,
-    [ECONNABORTED]	= TARGET_ECONNABORTED,
-    [ECONNRESET]	= TARGET_ECONNRESET,
-    [ENOBUFS]		= TARGET_ENOBUFS,
-    [EISCONN]		= TARGET_EISCONN,
-    [ENOTCONN]		= TARGET_ENOTCONN,
-    [EUCLEAN]		= TARGET_EUCLEAN,
-    [ENOTNAM]		= TARGET_ENOTNAM,
-    [ENAVAIL]		= TARGET_ENAVAIL,
-    [EISNAM]		= TARGET_EISNAM,
-    [EREMOTEIO]		= TARGET_EREMOTEIO,
-    [EDQUOT]            = TARGET_EDQUOT,
-    [ESHUTDOWN]		= TARGET_ESHUTDOWN,
-    [ETOOMANYREFS]	= TARGET_ETOOMANYREFS,
-    [ETIMEDOUT]		= TARGET_ETIMEDOUT,
-    [ECONNREFUSED]	= TARGET_ECONNREFUSED,
-    [EHOSTDOWN]		= TARGET_EHOSTDOWN,
-    [EHOSTUNREACH]	= TARGET_EHOSTUNREACH,
-    [EALREADY]		= TARGET_EALREADY,
-    [EINPROGRESS]	= TARGET_EINPROGRESS,
-    [ESTALE]		= TARGET_ESTALE,
-    [ECANCELED]		= TARGET_ECANCELED,
-    [ENOMEDIUM]		= TARGET_ENOMEDIUM,
-    [EMEDIUMTYPE]	= TARGET_EMEDIUMTYPE,
-#ifdef ENOKEY
-    [ENOKEY]		= TARGET_ENOKEY,
-#endif
-#ifdef EKEYEXPIRED
-    [EKEYEXPIRED]	= TARGET_EKEYEXPIRED,
-#endif
-#ifdef EKEYREVOKED
-    [EKEYREVOKED]	= TARGET_EKEYREVOKED,
-#endif
-#ifdef EKEYREJECTED
-    [EKEYREJECTED]	= TARGET_EKEYREJECTED,
-#endif
-#ifdef EOWNERDEAD
-    [EOWNERDEAD]	= TARGET_EOWNERDEAD,
-#endif
-#ifdef ENOTRECOVERABLE
-    [ENOTRECOVERABLE]	= TARGET_ENOTRECOVERABLE,
-#endif
-#ifdef ENOMSG
-    [ENOMSG]            = TARGET_ENOMSG,
-#endif
-#ifdef ERKFILL
-    [ERFKILL]           = TARGET_ERFKILL,
-#endif
-#ifdef EHWPOISON
-    [EHWPOISON]         = TARGET_EHWPOISON,
-#endif
-};
-
-static inline int host_to_target_errno(int err)
+static inline int host_to_target_errno(int host_errno)
 {
-    if (err >= 0 && err < ERRNO_TABLE_SIZE &&
-        host_to_target_errno_table[err]) {
-        return host_to_target_errno_table[err];
+    switch (host_errno) {
+#define E(X)  case X: return TARGET_##X;
+#include "errnos.c.inc"
+#undef E
+    default:
+        return host_errno;
     }
-    return err;
 }
 
-static inline int target_to_host_errno(int err)
+static inline int target_to_host_errno(int target_errno)
 {
-    if (err >= 0 && err < ERRNO_TABLE_SIZE &&
-        target_to_host_errno_table[err]) {
-        return target_to_host_errno_table[err];
+    switch (target_errno) {
+#define E(X)  case TARGET_##X: return X;
+#include "errnos.c.inc"
+#undef E
+    default:
+        return target_errno;
     }
-    return err;
 }
 
 static inline abi_long get_errno(abi_long ret)
@@ -672,9 +554,6 @@ const char *target_strerror(int err)
         return "Successful exit from sigreturn";
     }
 
-    if ((err >= ERRNO_TABLE_SIZE) || (err < 0)) {
-        return NULL;
-    }
     return strerror(target_to_host_errno(err));
 }
 
@@ -2250,6 +2129,9 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
                 return -TARGET_EINVAL;
 
             ip_mreq_source = lock_user(VERIFY_READ, optval_addr, optlen, 1);
+            if (!ip_mreq_source) {
+                return -TARGET_EFAULT;
+            }
             ret = get_errno(setsockopt(sockfd, level, optname, ip_mreq_source, optlen));
             unlock_user (ip_mreq_source, optval_addr, 0);
             break;
@@ -4603,6 +4485,7 @@ static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
 static inline abi_ulong do_shmat(CPUArchState *cpu_env,
                                  int shmid, abi_ulong shmaddr, int shmflg)
 {
+    CPUState *cpu = env_cpu(cpu_env);
     abi_long raddr;
     void *host_raddr;
     struct shmid_ds shm_info;
@@ -4633,6 +4516,17 @@ static inline abi_ulong do_shmat(CPUArchState *cpu_env,
 
     mmap_lock();
 
+    /*
+     * We're mapping shared memory, so ensure we generate code for parallel
+     * execution and flush old translations.  This will work up to the level
+     * supported by the host -- anything that requires EXCP_ATOMIC will not
+     * be atomic with respect to an external process.
+     */
+    if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+        cpu->tcg_cflags |= CF_PARALLEL;
+        tb_flush(cpu);
+    }
+
     if (shmaddr)
         host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
     else {
@@ -6062,7 +5956,7 @@ static const StructEntry struct_termios_def = {
     .print = print_termios,
 };
 
-static bitmask_transtbl mmap_flags_tbl[] = {
+static const bitmask_transtbl mmap_flags_tbl[] = {
     { TARGET_MAP_SHARED, TARGET_MAP_SHARED, MAP_SHARED, MAP_SHARED },
     { TARGET_MAP_PRIVATE, TARGET_MAP_PRIVATE, MAP_PRIVATE, MAP_PRIVATE },
     { TARGET_MAP_FIXED, TARGET_MAP_FIXED, MAP_FIXED, MAP_FIXED },
@@ -7090,7 +6984,6 @@ void syscall_init(void)
     IOCTLEntry *ie;
     const argtype *arg_type;
     int size;
-    int i;
 
     thunk_init(STRUCT_MAX);
 
@@ -7100,12 +6993,6 @@ void syscall_init(void)
 #undef STRUCT
 #undef STRUCT_SPECIAL
 
-    /* Build target_to_host_errno_table[] table from
-     * host_to_target_errno_table[]. */
-    for (i = 0; i < ERRNO_TABLE_SIZE; i++) {
-        target_to_host_errno_table[host_to_target_errno_table[i]] = i;
-    }
-
     /* we patch the ioctl size if necessary. We rely on the fact that
        no ioctl has all the bits at '1' in the size field */
     ie = ioctl_entries;
@@ -7393,6 +7280,10 @@ static inline abi_long host_to_target_timex64(abi_long target_addr,
 }
 #endif
 
+#ifndef HAVE_SIGEV_NOTIFY_THREAD_ID
+#define sigev_notify_thread_id _sigev_un._tid
+#endif
+
 static inline abi_long target_to_host_sigevent(struct sigevent *host_sevp,
                                                abi_ulong target_addr)
 {
@@ -7413,7 +7304,7 @@ static inline abi_long target_to_host_sigevent(struct sigevent *host_sevp,
     host_sevp->sigev_signo =
         target_to_host_signal(tswap32(target_sevp->sigev_signo));
     host_sevp->sigev_notify = tswap32(target_sevp->sigev_notify);
-    host_sevp->_sigev_un._tid = tswap32(target_sevp->_sigev_un._tid);
+    host_sevp->sigev_notify_thread_id = tswap32(target_sevp->_sigev_un._tid);
 
     unlock_user_struct(target_sevp, target_addr, 1);
     return 0;
@@ -7470,7 +7361,7 @@ static inline abi_long host_to_target_stat64(void *cpu_env,
         __put_user(host_st->st_atime, &target_st->target_st_atime);
         __put_user(host_st->st_mtime, &target_st->target_st_mtime);
         __put_user(host_st->st_ctime, &target_st->target_st_ctime);
-#if _POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700
+#ifdef HAVE_STRUCT_STAT_ST_ATIM
         __put_user(host_st->st_atim.tv_nsec, &target_st->target_st_atime_nsec);
         __put_user(host_st->st_mtim.tv_nsec, &target_st->target_st_mtime_nsec);
         __put_user(host_st->st_ctim.tv_nsec, &target_st->target_st_ctime_nsec);
@@ -7505,7 +7396,7 @@ static inline abi_long host_to_target_stat64(void *cpu_env,
         __put_user(host_st->st_atime, &target_st->target_st_atime);
         __put_user(host_st->st_mtime, &target_st->target_st_mtime);
         __put_user(host_st->st_ctime, &target_st->target_st_ctime);
-#if _POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700
+#ifdef HAVE_STRUCT_STAT_ST_ATIM
         __put_user(host_st->st_atim.tv_nsec, &target_st->target_st_atime_nsec);
         __put_user(host_st->st_mtim.tv_nsec, &target_st->target_st_mtime_nsec);
         __put_user(host_st->st_ctim.tv_nsec, &target_st->target_st_ctime_nsec);
@@ -7940,6 +7831,9 @@ static int open_self_stat(void *cpu_env, int fd)
             gchar *bin = g_strrstr(ts->bprm->argv[0], "/");
             bin = bin ? bin + 1 : ts->bprm->argv[0];
             g_string_printf(buf, "(%.15s) ", bin);
+        } else if (i == 3) {
+            /* ppid */
+            g_string_printf(buf, FMT_pid " ", getppid());
         } else if (i == 27) {
             /* stack bottom */
             g_string_printf(buf, TARGET_ABI_FMT_ld " ", ts->info->start_stack);
@@ -8245,6 +8139,163 @@ static int host_to_target_cpu_mask(const unsigned long *host_mask,
     return 0;
 }
 
+#ifdef TARGET_NR_getdents
+static int do_getdents(abi_long dirfd, abi_long arg2, abi_long count)
+{
+    g_autofree void *hdirp = NULL;
+    void *tdirp;
+    int hlen, hoff, toff;
+    int hreclen, treclen;
+    off64_t prev_diroff = 0;
+
+    hdirp = g_try_malloc(count);
+    if (!hdirp) {
+        return -TARGET_ENOMEM;
+    }
+
+#ifdef EMULATE_GETDENTS_WITH_GETDENTS
+    hlen = sys_getdents(dirfd, hdirp, count);
+#else
+    hlen = sys_getdents64(dirfd, hdirp, count);
+#endif
+
+    hlen = get_errno(hlen);
+    if (is_error(hlen)) {
+        return hlen;
+    }
+
+    tdirp = lock_user(VERIFY_WRITE, arg2, count, 0);
+    if (!tdirp) {
+        return -TARGET_EFAULT;
+    }
+
+    for (hoff = toff = 0; hoff < hlen; hoff += hreclen, toff += treclen) {
+#ifdef EMULATE_GETDENTS_WITH_GETDENTS
+        struct linux_dirent *hde = hdirp + hoff;
+#else
+        struct linux_dirent64 *hde = hdirp + hoff;
+#endif
+        struct target_dirent *tde = tdirp + toff;
+        int namelen;
+        uint8_t type;
+
+        namelen = strlen(hde->d_name);
+        hreclen = hde->d_reclen;
+        treclen = offsetof(struct target_dirent, d_name) + namelen + 2;
+        treclen = QEMU_ALIGN_UP(treclen, __alignof(struct target_dirent));
+
+        if (toff + treclen > count) {
+            /*
+             * If the host struct is smaller than the target struct, or
+             * requires less alignment and thus packs into less space,
+             * then the host can return more entries than we can pass
+             * on to the guest.
+             */
+            if (toff == 0) {
+                toff = -TARGET_EINVAL; /* result buffer is too small */
+                break;
+            }
+            /*
+             * Return what we have, resetting the file pointer to the
+             * location of the first record not returned.
+             */
+            lseek64(dirfd, prev_diroff, SEEK_SET);
+            break;
+        }
+
+        prev_diroff = hde->d_off;
+        tde->d_ino = tswapal(hde->d_ino);
+        tde->d_off = tswapal(hde->d_off);
+        tde->d_reclen = tswap16(treclen);
+        memcpy(tde->d_name, hde->d_name, namelen + 1);
+
+        /*
+         * The getdents type is in what was formerly a padding byte at the
+         * end of the structure.
+         */
+#ifdef EMULATE_GETDENTS_WITH_GETDENTS
+        type = *((uint8_t *)hde + hreclen - 1);
+#else
+        type = hde->d_type;
+#endif
+        *((uint8_t *)tde + treclen - 1) = type;
+    }
+
+    unlock_user(tdirp, arg2, toff);
+    return toff;
+}
+#endif /* TARGET_NR_getdents */
+
+#if defined(TARGET_NR_getdents64) && defined(__NR_getdents64)
+static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count)
+{
+    g_autofree void *hdirp = NULL;
+    void *tdirp;
+    int hlen, hoff, toff;
+    int hreclen, treclen;
+    off64_t prev_diroff = 0;
+
+    hdirp = g_try_malloc(count);
+    if (!hdirp) {
+        return -TARGET_ENOMEM;
+    }
+
+    hlen = get_errno(sys_getdents64(dirfd, hdirp, count));
+    if (is_error(hlen)) {
+        return hlen;
+    }
+
+    tdirp = lock_user(VERIFY_WRITE, arg2, count, 0);
+    if (!tdirp) {
+        return -TARGET_EFAULT;
+    }
+
+    for (hoff = toff = 0; hoff < hlen; hoff += hreclen, toff += treclen) {
+        struct linux_dirent64 *hde = hdirp + hoff;
+        struct target_dirent64 *tde = tdirp + toff;
+        int namelen;
+
+        namelen = strlen(hde->d_name) + 1;
+        hreclen = hde->d_reclen;
+        treclen = offsetof(struct target_dirent64, d_name) + namelen;
+        treclen = QEMU_ALIGN_UP(treclen, __alignof(struct target_dirent64));
+
+        if (toff + treclen > count) {
+            /*
+             * If the host struct is smaller than the target struct, or
+             * requires less alignment and thus packs into less space,
+             * then the host can return more entries than we can pass
+             * on to the guest.
+             */
+            if (toff == 0) {
+                toff = -TARGET_EINVAL; /* result buffer is too small */
+                break;
+            }
+            /*
+             * Return what we have, resetting the file pointer to the
+             * location of the first record not returned.
+             */
+            lseek64(dirfd, prev_diroff, SEEK_SET);
+            break;
+        }
+
+        prev_diroff = hde->d_off;
+        tde->d_ino = tswap64(hde->d_ino);
+        tde->d_off = tswap64(hde->d_off);
+        tde->d_reclen = tswap16(treclen);
+        tde->d_type = hde->d_type;
+        memcpy(tde->d_name, hde->d_name, namelen);
+    }
+
+    unlock_user(tdirp, arg2, toff);
+    return toff;
+}
+#endif /* TARGET_NR_getdents64 */
+
+#if defined(TARGET_NR_pivot_root) && defined(__NR_pivot_root)
+_syscall2(int, pivot_root, const char *, new_root, const char *, put_old)
+#endif
+
 /* This is an internal helper for do_syscall so that it is easier
  * to have a single return point, so that actions, such as logging
  * of syscall results, can be performed.
@@ -8477,7 +8528,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
             abi_ulong guest_envp;
             abi_ulong addr;
             char **q;
-            int total_size = 0;
 
             argc = 0;
             guest_argp = arg2;
@@ -8509,7 +8559,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                     break;
                 if (!(*q = lock_user_string(addr)))
                     goto execve_efault;
-                total_size += strlen(*q) + 1;
             }
             *q = NULL;
 
@@ -8521,7 +8570,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                     break;
                 if (!(*q = lock_user_string(addr)))
                     goto execve_efault;
-                total_size += strlen(*q) + 1;
             }
             *q = NULL;
 
@@ -8980,29 +9028,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 #ifdef TARGET_NR_sigaction
     case TARGET_NR_sigaction:
         {
-#if defined(TARGET_ALPHA)
-            struct target_sigaction act, oact, *pact = 0;
-            struct target_old_sigaction *old_act;
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, old_act, arg2, 1))
-                    return -TARGET_EFAULT;
-                act._sa_handler = old_act->_sa_handler;
-                target_siginitset(&act.sa_mask, old_act->sa_mask);
-                act.sa_flags = old_act->sa_flags;
-                act.sa_restorer = 0;
-                unlock_user_struct(old_act, arg2, 0);
-                pact = &act;
-            }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
-            if (!is_error(ret) && arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
-                    return -TARGET_EFAULT;
-                old_act->_sa_handler = oact._sa_handler;
-                old_act->sa_mask = oact.sa_mask.sig[0];
-                old_act->sa_flags = oact.sa_flags;
-                unlock_user_struct(old_act, arg3, 1);
-            }
-#elif defined(TARGET_MIPS)
+#if defined(TARGET_MIPS)
 	    struct target_sigaction act, oact, *pact, *old_act;
 
 	    if (arg2) {
@@ -9017,7 +9043,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 		pact = NULL;
 	    }
 
-	    ret = get_errno(do_sigaction(arg1, pact, &oact));
+        ret = get_errno(do_sigaction(arg1, pact, &oact, 0));
 
 	    if (!is_error(ret) && arg3) {
                 if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
@@ -9039,23 +9065,24 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                 act._sa_handler = old_act->_sa_handler;
                 target_siginitset(&act.sa_mask, old_act->sa_mask);
                 act.sa_flags = old_act->sa_flags;
+#ifdef TARGET_ARCH_HAS_SA_RESTORER
                 act.sa_restorer = old_act->sa_restorer;
-#ifdef TARGET_ARCH_HAS_KA_RESTORER
-                act.ka_restorer = 0;
 #endif
                 unlock_user_struct(old_act, arg2, 0);
                 pact = &act;
             } else {
                 pact = NULL;
             }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
+            ret = get_errno(do_sigaction(arg1, pact, &oact, 0));
             if (!is_error(ret) && arg3) {
                 if (!lock_user_struct(VERIFY_WRITE, old_act, arg3, 0))
                     return -TARGET_EFAULT;
                 old_act->_sa_handler = oact._sa_handler;
                 old_act->sa_mask = oact.sa_mask.sig[0];
                 old_act->sa_flags = oact.sa_flags;
+#ifdef TARGET_ARCH_HAS_SA_RESTORER
                 old_act->sa_restorer = oact.sa_restorer;
+#endif
                 unlock_user_struct(old_act, arg3, 1);
             }
 #endif
@@ -9064,77 +9091,43 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 #endif
     case TARGET_NR_rt_sigaction:
         {
-#if defined(TARGET_ALPHA)
-            /* For Alpha and SPARC this is a 5 argument syscall, with
+            /*
+             * For Alpha and SPARC this is a 5 argument syscall, with
              * a 'restorer' parameter which must be copied into the
              * sa_restorer field of the sigaction struct.
              * For Alpha that 'restorer' is arg5; for SPARC it is arg4,
              * and arg5 is the sigsetsize.
-             * Alpha also has a separate rt_sigaction struct that it uses
-             * here; SPARC uses the usual sigaction struct.
              */
-            struct target_rt_sigaction *rt_act;
-            struct target_sigaction act, oact, *pact = 0;
-
-            if (arg4 != sizeof(target_sigset_t)) {
-                return -TARGET_EINVAL;
-            }
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, rt_act, arg2, 1))
-                    return -TARGET_EFAULT;
-                act._sa_handler = rt_act->_sa_handler;
-                act.sa_mask = rt_act->sa_mask;
-                act.sa_flags = rt_act->sa_flags;
-                act.sa_restorer = arg5;
-                unlock_user_struct(rt_act, arg2, 0);
-                pact = &act;
-            }
-            ret = get_errno(do_sigaction(arg1, pact, &oact));
-            if (!is_error(ret) && arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, rt_act, arg3, 0))
-                    return -TARGET_EFAULT;
-                rt_act->_sa_handler = oact._sa_handler;
-                rt_act->sa_mask = oact.sa_mask;
-                rt_act->sa_flags = oact.sa_flags;
-                unlock_user_struct(rt_act, arg3, 1);
-            }
-#else
-#ifdef TARGET_SPARC
+#if defined(TARGET_ALPHA)
+            target_ulong sigsetsize = arg4;
+            target_ulong restorer = arg5;
+#elif defined(TARGET_SPARC)
             target_ulong restorer = arg4;
             target_ulong sigsetsize = arg5;
 #else
             target_ulong sigsetsize = arg4;
+            target_ulong restorer = 0;
 #endif
-            struct target_sigaction *act;
-            struct target_sigaction *oact;
+            struct target_sigaction *act = NULL;
+            struct target_sigaction *oact = NULL;
 
             if (sigsetsize != sizeof(target_sigset_t)) {
                 return -TARGET_EINVAL;
             }
-            if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, act, arg2, 1)) {
-                    return -TARGET_EFAULT;
-                }
-#ifdef TARGET_ARCH_HAS_KA_RESTORER
-                act->ka_restorer = restorer;
-#endif
-            } else {
-                act = NULL;
+            if (arg2 && !lock_user_struct(VERIFY_READ, act, arg2, 1)) {
+                return -TARGET_EFAULT;
             }
-            if (arg3) {
-                if (!lock_user_struct(VERIFY_WRITE, oact, arg3, 0)) {
-                    ret = -TARGET_EFAULT;
-                    goto rt_sigaction_fail;
+            if (arg3 && !lock_user_struct(VERIFY_WRITE, oact, arg3, 0)) {
+                ret = -TARGET_EFAULT;
+            } else {
+                ret = get_errno(do_sigaction(arg1, act, oact, restorer));
+                if (oact) {
+                    unlock_user_struct(oact, arg3, 1);
                 }
-            } else
-                oact = NULL;
-            ret = get_errno(do_sigaction(arg1, act, oact));
-	rt_sigaction_fail:
-            if (act)
+            }
+            if (act) {
                 unlock_user_struct(act, arg2, 0);
-            if (oact)
-                unlock_user_struct(oact, arg3, 1);
-#endif
+            }
         }
         return ret;
 #ifdef TARGET_NR_sgetmask /* not on alpha */
@@ -10111,8 +10104,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                 __put_user(st.st_atime, &target_st->target_st_atime);
                 __put_user(st.st_mtime, &target_st->target_st_mtime);
                 __put_user(st.st_ctime, &target_st->target_st_ctime);
-#if (_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700) && \
-    defined(TARGET_STAT_HAVE_NSEC)
+#if defined(HAVE_STRUCT_STAT_ST_ATIM) && defined(TARGET_STAT_HAVE_NSEC)
                 __put_user(st.st_atim.tv_nsec,
                            &target_st->target_st_atime_nsec);
                 __put_user(st.st_mtim.tv_nsec,
@@ -10390,162 +10382,11 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_getdents
     case TARGET_NR_getdents:
-#ifdef EMULATE_GETDENTS_WITH_GETDENTS
-#if TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64
-        {
-            struct target_dirent *target_dirp;
-            struct linux_dirent *dirp;
-            abi_long count = arg3;
-
-            dirp = g_try_malloc(count);
-            if (!dirp) {
-                return -TARGET_ENOMEM;
-            }
-
-            ret = get_errno(sys_getdents(arg1, dirp, count));
-            if (!is_error(ret)) {
-                struct linux_dirent *de;
-		struct target_dirent *tde;
-                int len = ret;
-                int reclen, treclen;
-		int count1, tnamelen;
-
-		count1 = 0;
-                de = dirp;
-                if (!(target_dirp = lock_user(VERIFY_WRITE, arg2, count, 0)))
-                    return -TARGET_EFAULT;
-		tde = target_dirp;
-                while (len > 0) {
-                    reclen = de->d_reclen;
-                    tnamelen = reclen - offsetof(struct linux_dirent, d_name);
-                    assert(tnamelen >= 0);
-                    treclen = tnamelen + offsetof(struct target_dirent, d_name);
-                    assert(count1 + treclen <= count);
-                    tde->d_reclen = tswap16(treclen);
-                    tde->d_ino = tswapal(de->d_ino);
-                    tde->d_off = tswapal(de->d_off);
-                    memcpy(tde->d_name, de->d_name, tnamelen);
-                    de = (struct linux_dirent *)((char *)de + reclen);
-                    len -= reclen;
-                    tde = (struct target_dirent *)((char *)tde + treclen);
-		    count1 += treclen;
-                }
-		ret = count1;
-                unlock_user(target_dirp, arg2, ret);
-            }
-            g_free(dirp);
-        }
-#else
-        {
-            struct linux_dirent *dirp;
-            abi_long count = arg3;
-
-            if (!(dirp = lock_user(VERIFY_WRITE, arg2, count, 0)))
-                return -TARGET_EFAULT;
-            ret = get_errno(sys_getdents(arg1, dirp, count));
-            if (!is_error(ret)) {
-                struct linux_dirent *de;
-                int len = ret;
-                int reclen;
-                de = dirp;
-                while (len > 0) {
-                    reclen = de->d_reclen;
-                    if (reclen > len)
-                        break;
-                    de->d_reclen = tswap16(reclen);
-                    tswapls(&de->d_ino);
-                    tswapls(&de->d_off);
-                    de = (struct linux_dirent *)((char *)de + reclen);
-                    len -= reclen;
-                }
-            }
-            unlock_user(dirp, arg2, ret);
-        }
-#endif
-#else
-        /* Implement getdents in terms of getdents64 */
-        {
-            struct linux_dirent64 *dirp;
-            abi_long count = arg3;
-
-            dirp = lock_user(VERIFY_WRITE, arg2, count, 0);
-            if (!dirp) {
-                return -TARGET_EFAULT;
-            }
-            ret = get_errno(sys_getdents64(arg1, dirp, count));
-            if (!is_error(ret)) {
-                /* Convert the dirent64 structs to target dirent.  We do this
-                 * in-place, since we can guarantee that a target_dirent is no
-                 * larger than a dirent64; however this means we have to be
-                 * careful to read everything before writing in the new format.
-                 */
-                struct linux_dirent64 *de;
-                struct target_dirent *tde;
-                int len = ret;
-                int tlen = 0;
-
-                de = dirp;
-                tde = (struct target_dirent *)dirp;
-                while (len > 0) {
-                    int namelen, treclen;
-                    int reclen = de->d_reclen;
-                    uint64_t ino = de->d_ino;
-                    int64_t off = de->d_off;
-                    uint8_t type = de->d_type;
-
-                    namelen = strlen(de->d_name);
-                    treclen = offsetof(struct target_dirent, d_name)
-                        + namelen + 2;
-                    treclen = QEMU_ALIGN_UP(treclen, sizeof(abi_long));
-
-                    memmove(tde->d_name, de->d_name, namelen + 1);
-                    tde->d_ino = tswapal(ino);
-                    tde->d_off = tswapal(off);
-                    tde->d_reclen = tswap16(treclen);
-                    /* The target_dirent type is in what was formerly a padding
-                     * byte at the end of the structure:
-                     */
-                    *(((char *)tde) + treclen - 1) = type;
-
-                    de = (struct linux_dirent64 *)((char *)de + reclen);
-                    tde = (struct target_dirent *)((char *)tde + treclen);
-                    len -= reclen;
-                    tlen += treclen;
-                }
-                ret = tlen;
-            }
-            unlock_user(dirp, arg2, ret);
-        }
-#endif
-        return ret;
+        return do_getdents(arg1, arg2, arg3);
 #endif /* TARGET_NR_getdents */
 #if defined(TARGET_NR_getdents64) && defined(__NR_getdents64)
     case TARGET_NR_getdents64:
-        {
-            struct linux_dirent64 *dirp;
-            abi_long count = arg3;
-            if (!(dirp = lock_user(VERIFY_WRITE, arg2, count, 0)))
-                return -TARGET_EFAULT;
-            ret = get_errno(sys_getdents64(arg1, dirp, count));
-            if (!is_error(ret)) {
-                struct linux_dirent64 *de;
-                int len = ret;
-                int reclen;
-                de = dirp;
-                while (len > 0) {
-                    reclen = de->d_reclen;
-                    if (reclen > len)
-                        break;
-                    de->d_reclen = tswap16(reclen);
-                    tswap64s((uint64_t *)&de->d_ino);
-                    tswap64s((uint64_t *)&de->d_off);
-                    de = (struct linux_dirent64 *)((char *)de + reclen);
-                    len -= reclen;
-                }
-            }
-            unlock_user(dirp, arg2, ret);
-        }
-        return ret;
+        return do_getdents64(arg1, arg2, arg3);
 #endif /* TARGET_NR_getdents64 */
 #if defined(TARGET_NR__newselect)
     case TARGET_NR__newselect:
@@ -11195,8 +11036,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
         return ret;
     }
     case TARGET_NR_sigaltstack:
-        return do_sigaltstack(arg1, arg2,
-                              get_sp_from_cpustate((CPUArchState *)cpu_env));
+        return do_sigaltstack(arg1, arg2, cpu_env);
 
 #ifdef CONFIG_SENDFILE
 #ifdef TARGET_NR_sendfile
@@ -13245,8 +13085,9 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                 }
                 poutoff = &outoff;
             }
+            /* Do not sign-extend the count parameter. */
             ret = get_errno(safe_copy_file_range(arg1, pinoff, arg3, poutoff,
-                                                 arg5, arg6));
+                                                 (abi_ulong)arg5, arg6));
             if (!is_error(ret) && ret > 0) {
                 if (arg2) {
                     if (put_user_u64(inoff, arg2)) {
@@ -13263,6 +13104,23 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
         return ret;
 #endif
 
+#if defined(TARGET_NR_pivot_root)
+    case TARGET_NR_pivot_root:
+        {
+            void *p2;
+            p = lock_user_string(arg1); /* new_root */
+            p2 = lock_user_string(arg2); /* put_old */
+            if (!p || !p2) {
+                ret = -TARGET_EFAULT;
+            } else {
+                ret = get_errno(pivot_root(p, p2));
+            }
+            unlock_user(p2, arg2, 0);
+            unlock_user(p, arg1, 0);
+        }
+        return ret;
+#endif
+
     default:
         qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
         return -TARGET_ENOSYS;
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 25be414727f..0b139759377 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -437,11 +437,11 @@ struct target_dirent {
 };
 
 struct target_dirent64 {
-	uint64_t	d_ino;
-	int64_t		d_off;
-	unsigned short	d_reclen;
+	abi_ullong      d_ino;
+	abi_llong       d_off;
+	abi_ushort      d_reclen;
 	unsigned char	d_type;
-	char		d_name[256];
+	char		d_name[];
 };
 
 
@@ -492,7 +492,7 @@ void target_to_host_old_sigset(sigset_t *sigset,
                                const abi_ulong *old_sigset);
 struct target_sigaction;
 int do_sigaction(int sig, const struct target_sigaction *act,
-                 struct target_sigaction *oact);
+                 struct target_sigaction *oact, abi_ulong ka_restorer);
 
 #include "target_signal.h"
 
@@ -501,27 +501,12 @@ int do_sigaction(int sig, const struct target_sigaction *act,
 #endif
 
 #if defined(TARGET_ALPHA)
-struct target_old_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_mask;
-    int32_t sa_flags;
-};
-
-struct target_rt_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_flags;
-    target_sigset_t sa_mask;
-};
+typedef int32_t target_old_sa_flags;
+#else
+typedef abi_ulong target_old_sa_flags;
+#endif
 
-/* This is the struct used inside the kernel.  The ka_restorer
-   field comes from the 5th argument to sys_rt_sigaction.  */
-struct target_sigaction {
-    abi_ulong _sa_handler;
-    abi_ulong sa_flags;
-    target_sigset_t sa_mask;
-    abi_ulong sa_restorer;
-};
-#elif defined(TARGET_MIPS)
+#if defined(TARGET_MIPS)
 struct target_sigaction {
 	uint32_t	sa_flags;
 #if defined(TARGET_ABI_MIPSN32)
@@ -539,7 +524,7 @@ struct target_sigaction {
 struct target_old_sigaction {
         abi_ulong _sa_handler;
         abi_ulong sa_mask;
-        abi_ulong sa_flags;
+        target_old_sa_flags sa_flags;
 #ifdef TARGET_ARCH_HAS_SA_RESTORER
         abi_ulong sa_restorer;
 #endif
@@ -1234,6 +1219,10 @@ struct target_rtc_pll_info {
 #define TARGET_LOOP_SET_STATUS64      0x4C04
 #define TARGET_LOOP_GET_STATUS64      0x4C05
 #define TARGET_LOOP_CHANGE_FD         0x4C06
+#define TARGET_LOOP_SET_CAPACITY      0x4C07
+#define TARGET_LOOP_SET_DIRECT_IO     0x4C08
+#define TARGET_LOOP_SET_BLOCK_SIZE    0x4C09
+#define TARGET_LOOP_CONFIGURE         0x4C0A
 
 #define TARGET_LOOP_CTL_ADD           0x4C80
 #define TARGET_LOOP_CTL_REMOVE        0x4C81
@@ -2729,7 +2718,7 @@ struct linux_dirent {
     long            d_ino;
     unsigned long   d_off;
     unsigned short  d_reclen;
-    char            d_name[256]; /* We must not include limits.h! */
+    char            d_name[];
 };
 
 struct linux_dirent64 {
@@ -2737,7 +2726,7 @@ struct linux_dirent64 {
     int64_t         d_off;
     unsigned short  d_reclen;
     unsigned char   d_type;
-    char            d_name[256];
+    char            d_name[];
 };
 
 struct target_mq_attr {
@@ -2766,7 +2755,7 @@ struct target_drm_i915_getparam {
 
 #include "socket.h"
 
-#include "errno_defs.h"
+#include "target_errno_defs.h"
 
 #define FUTEX_WAIT              0
 #define FUTEX_WAKE              1
diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index ba2c1518eb5..c3b43f80223 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -201,6 +201,12 @@ STRUCT(loop_info64,
        MK_ARRAY(TYPE_CHAR, 32),  /* lo_encrypt_key */
        MK_ARRAY(TYPE_ULONGLONG, 2))  /* lo_init */
 
+STRUCT(loop_config,
+       TYPE_INT,                 /* fd */
+       TYPE_INT,                 /* block_size */
+       MK_STRUCT(STRUCT_loop_info64), /* info */
+       MK_ARRAY(TYPE_ULONGLONG, 8)) /* __reserved */
+
 /* mag tape ioctls */
 STRUCT(mtop, TYPE_SHORT, TYPE_INT)
 STRUCT(mtget, TYPE_LONG, TYPE_LONG, TYPE_LONG, TYPE_LONG, TYPE_LONG,
diff --git a/linux-user/trace-events b/linux-user/trace-events
index 0296133daeb..e7d2f54e940 100644
--- a/linux-user/trace-events
+++ b/linux-user/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # signal.c
 signal_table_init(int i) "number of unavailable signals: %d"
@@ -11,7 +11,7 @@ user_do_rt_sigreturn(void *env, uint64_t frame_addr) "env=%p frame_addr=0x%"PRIx
 user_do_sigreturn(void *env, uint64_t frame_addr) "env=%p frame_addr=0x%"PRIx64
 user_force_sig(void *env, int target_sig, int host_sig) "env=%p signal %d (host %d)"
 user_handle_signal(void *env, int target_sig) "env=%p signal %d"
-user_host_signal(void *env, int host_sig, int target_sig) "env=%p signal %d (target %d("
+user_host_signal(void *env, int host_sig, int target_sig) "env=%p signal %d (target %d)"
 user_queue_signal(void *env, int target_sig) "env=%p signal %d"
 user_s390x_restore_sigregs(void *env, uint64_t sc_psw_addr, uint64_t env_psw_addr) "env=%p frame psw.addr 0x%"PRIx64 " current psw.addr 0x%"PRIx64
 
diff --git a/linux-user/uaccess.c b/linux-user/uaccess.c
index 6a5b029607c..425cbf677f7 100644
--- a/linux-user/uaccess.c
+++ b/linux-user/uaccess.c
@@ -3,6 +3,7 @@
 #include "qemu/cutils.h"
 
 #include "qemu.h"
+#include "user-internals.h"
 
 void *lock_user(int type, abi_ulong guest_addr, ssize_t len, bool copy)
 {
diff --git a/linux-user/uname.c b/linux-user/uname.c
index a09ffe1ea7b..1d82608c100 100644
--- a/linux-user/uname.c
+++ b/linux-user/uname.c
@@ -20,6 +20,7 @@
 #include "qemu/osdep.h"
 
 #include "qemu.h"
+#include "user-internals.h"
 //#include "qemu-common.h"
 #include "uname.h"
 
diff --git a/linux-user/user-internals.h b/linux-user/user-internals.h
new file mode 100644
index 00000000000..661612a088b
--- /dev/null
+++ b/linux-user/user-internals.h
@@ -0,0 +1,186 @@
+/*
+ * user-internals.h: prototypes etc internal to the linux-user implementation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_USER_INTERNALS_H
+#define LINUX_USER_USER_INTERNALS_H
+
+#include "hostdep.h"
+#include "exec/user/thunk.h"
+#include "exec/exec-all.h"
+#include "qemu/log.h"
+
+extern char *exec_path;
+void init_task_state(TaskState *ts);
+void task_settid(TaskState *);
+void stop_all_tasks(void);
+extern const char *qemu_uname_release;
+extern unsigned long mmap_min_addr;
+
+typedef struct IOCTLEntry IOCTLEntry;
+
+typedef abi_long do_ioctl_fn(const IOCTLEntry *ie, uint8_t *buf_temp,
+                             int fd, int cmd, abi_long arg);
+
+struct IOCTLEntry {
+    int target_cmd;
+    unsigned int host_cmd;
+    const char *name;
+    int access;
+    do_ioctl_fn *do_ioctl;
+    const argtype arg_type[5];
+};
+
+extern IOCTLEntry ioctl_entries[];
+
+#define IOC_R 0x0001
+#define IOC_W 0x0002
+#define IOC_RW (IOC_R | IOC_W)
+
+/*
+ * Returns true if the image uses the FDPIC ABI. If this is the case,
+ * we have to provide some information (loadmap, pt_dynamic_info) such
+ * that the program can be relocated adequately. This is also useful
+ * when handling signals.
+ */
+int info_is_fdpic(struct image_info *info);
+
+void target_set_brk(abi_ulong new_brk);
+void syscall_init(void);
+abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
+                    abi_long arg2, abi_long arg3, abi_long arg4,
+                    abi_long arg5, abi_long arg6, abi_long arg7,
+                    abi_long arg8);
+extern __thread CPUState *thread_cpu;
+void cpu_loop(CPUArchState *env);
+const char *target_strerror(int err);
+int get_osversion(void);
+void init_qemu_uname_release(void);
+void fork_start(void);
+void fork_end(int child);
+
+/**
+ * probe_guest_base:
+ * @image_name: the executable being loaded
+ * @loaddr: the lowest fixed address in the executable
+ * @hiaddr: the highest fixed address in the executable
+ *
+ * Creates the initial guest address space in the host memory space.
+ *
+ * If @loaddr == 0, then no address in the executable is fixed,
+ * i.e. it is fully relocatable.  In that case @hiaddr is the size
+ * of the executable.
+ *
+ * This function will not return if a valid value for guest_base
+ * cannot be chosen.  On return, the executable loader can expect
+ *
+ *    target_mmap(loaddr, hiaddr - loaddr, ...)
+ *
+ * to succeed.
+ */
+void probe_guest_base(const char *image_name,
+                      abi_ulong loaddr, abi_ulong hiaddr);
+
+/* syscall.c */
+int host_to_target_waitstatus(int status);
+
+#ifdef TARGET_I386
+/* vm86.c */
+void save_v86_state(CPUX86State *env);
+void handle_vm86_trap(CPUX86State *env, int trapno);
+void handle_vm86_fault(CPUX86State *env);
+int do_vm86(CPUX86State *env, long subfunction, abi_ulong v86_addr);
+#elif defined(TARGET_SPARC64)
+void sparc64_set_context(CPUSPARCState *env);
+void sparc64_get_context(CPUSPARCState *env);
+#endif
+
+static inline int is_error(abi_long ret)
+{
+    return (abi_ulong)ret >= (abi_ulong)(-4096);
+}
+
+#if TARGET_ABI_BITS == 32
+static inline uint64_t target_offset64(uint32_t word0, uint32_t word1)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    return ((uint64_t)word0 << 32) | word1;
+#else
+    return ((uint64_t)word1 << 32) | word0;
+#endif
+}
+#else /* TARGET_ABI_BITS == 32 */
+static inline uint64_t target_offset64(uint64_t word0, uint64_t word1)
+{
+    return word0;
+}
+#endif /* TARGET_ABI_BITS != 32 */
+
+void print_termios(void *arg);
+
+/* ARM EABI and MIPS expect 64bit types aligned even on pairs or registers */
+#ifdef TARGET_ARM
+static inline int regpairs_aligned(void *cpu_env, int num)
+{
+    return ((((CPUARMState *)cpu_env)->eabi) == 1) ;
+}
+#elif defined(TARGET_MIPS) && (TARGET_ABI_BITS == 32)
+static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
+#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
+/*
+ * SysV AVI for PPC32 expects 64bit parameters to be passed on odd/even pairs
+ * of registers which translates to the same as ARM/MIPS, because we start with
+ * r3 as arg1
+ */
+static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
+#elif defined(TARGET_SH4)
+/* SH4 doesn't align register pairs, except for p{read,write}64 */
+static inline int regpairs_aligned(void *cpu_env, int num)
+{
+    switch (num) {
+    case TARGET_NR_pread64:
+    case TARGET_NR_pwrite64:
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+#elif defined(TARGET_XTENSA)
+static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
+#elif defined(TARGET_HEXAGON)
+static inline int regpairs_aligned(void *cpu_env, int num) { return 1; }
+#else
+static inline int regpairs_aligned(void *cpu_env, int num) { return 0; }
+#endif
+
+/**
+ * preexit_cleanup: housekeeping before the guest exits
+ *
+ * env: the CPU state
+ * code: the exit code
+ */
+void preexit_cleanup(CPUArchState *env, int code);
+
+/*
+ * Include target-specific struct and function definitions;
+ * they may need access to the target-independent structures
+ * above, so include them last.
+ */
+#include "target_cpu.h"
+#include "target_structs.h"
+
+#endif
diff --git a/linux-user/user-mmap.h b/linux-user/user-mmap.h
new file mode 100644
index 00000000000..d1dec99c024
--- /dev/null
+++ b/linux-user/user-mmap.h
@@ -0,0 +1,34 @@
+/*
+ * user-mmap.h: prototypes for linux-user guest binary loader
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LINUX_USER_USER_MMAP_H
+#define LINUX_USER_USER_MMAP_H
+
+int target_mprotect(abi_ulong start, abi_ulong len, int prot);
+abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
+                     int flags, int fd, abi_ulong offset);
+int target_munmap(abi_ulong start, abi_ulong len);
+abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
+                       abi_ulong new_size, unsigned long flags,
+                       abi_ulong new_addr);
+extern unsigned long last_brk;
+extern abi_ulong mmap_next_start;
+abi_ulong mmap_find_vma(abi_ulong, abi_ulong, abi_ulong);
+void mmap_fork_start(void);
+void mmap_fork_end(int child);
+
+#endif /* LINUX_USER_USER_MMAP_H */
diff --git a/linux-user/vm86.c b/linux-user/vm86.c
index 4412522c4c4..c2facf3fc2d 100644
--- a/linux-user/vm86.c
+++ b/linux-user/vm86.c
@@ -19,6 +19,7 @@
 #include "qemu/osdep.h"
 
 #include "qemu.h"
+#include "user-internals.h"
 
 //#define DEBUG_VM86
 
diff --git a/linux-user/x86_64/syscall_64.tbl b/linux-user/x86_64/syscall_64.tbl
index f30d6ae9a68..ce18119ea0d 100644
--- a/linux-user/x86_64/syscall_64.tbl
+++ b/linux-user/x86_64/syscall_64.tbl
@@ -361,18 +361,25 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	process_madvise		sys_process_madvise
+441	common	epoll_pwait2		sys_epoll_pwait2
+442	common	mount_setattr		sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset	sys_landlock_create_ruleset
+445	common	landlock_add_rule	sys_landlock_add_rule
+446	common	landlock_restrict_self	sys_landlock_restrict_self
 
 #
-# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation. The __x32_compat_sys stubs are created
-# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
-# is defined.
+# Due to a historical design error, certain syscalls are numbered differently
+# in x32 as compared to native x86_64.  These syscalls have numbers 512-547.
+# Do not add new syscalls to this range.  Numbers 548 and above are available
+# for non-x32 use.
 #
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
@@ -388,15 +395,15 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
@@ -404,3 +411,5 @@
 545	x32	execveat		compat_sys_execveat
 546	x32	preadv2			compat_sys_preadv64v2
 547	x32	pwritev2		compat_sys_pwritev64v2
+# This is the end of the legacy x32 range.  Numbers 548 and above are
+# not special and are not to be used for x32-specific syscalls.
diff --git a/linux-user/x86_64/target_errno_defs.h b/linux-user/x86_64/target_errno_defs.h
new file mode 100644
index 00000000000..cb2a0f6e0be
--- /dev/null
+++ b/linux-user/x86_64/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef X86_64_TARGET_ERRNO_DEFS_H
+#define X86_64_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/x86_64/target_signal.h b/linux-user/x86_64/target_signal.h
index 4ea74f20dd4..4673c5a8869 100644
--- a/linux-user/x86_64/target_signal.h
+++ b/linux-user/x86_64/target_signal.h
@@ -21,4 +21,7 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+/* For x86_64, use of SA_RESTORER is mandatory. */
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+
 #endif /* X86_64_TARGET_SIGNAL_H */
diff --git a/linux-user/xtensa/cpu_loop.c b/linux-user/xtensa/cpu_loop.c
index 64831c91996..a83490ab35a 100644
--- a/linux-user/xtensa/cpu_loop.c
+++ b/linux-user/xtensa/cpu_loop.c
@@ -19,7 +19,9 @@
 
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "cpu_loop-common.h"
+#include "signal-common.h"
 
 static void xtensa_rfw(CPUXtensaState *env)
 {
@@ -224,15 +226,6 @@ void cpu_loop(CPUXtensaState *env)
                 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 break;
 
-            case LOAD_PROHIBITED_CAUSE:
-            case STORE_PROHIBITED_CAUSE:
-                info.si_signo = TARGET_SIGSEGV;
-                info.si_errno = 0;
-                info.si_code = TARGET_SEGV_ACCERR;
-                info._sifields._sigfault._addr = env->sregs[EXCVADDR];
-                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                break;
-
             default:
                 fprintf(stderr, "exccause = %d\n", env->sregs[EXCCAUSE]);
                 g_assert_not_reached();
diff --git a/linux-user/xtensa/signal.c b/linux-user/xtensa/signal.c
index 590f0313ffe..81572a5fc7b 100644
--- a/linux-user/xtensa/signal.c
+++ b/linux-user/xtensa/signal.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu.h"
+#include "user-internals.h"
 #include "signal-common.h"
 #include "linux-user/trace.h"
 
@@ -127,6 +128,29 @@ static int setup_sigcontext(struct target_rt_sigframe *frame,
     return 1;
 }
 
+static void install_sigtramp(uint8_t *tramp)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    /* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
+    __put_user(0x22, &tramp[0]);
+    __put_user(0x0a, &tramp[1]);
+    __put_user(TARGET_NR_rt_sigreturn, &tramp[2]);
+    /* Generate instruction:  SYSCALL */
+    __put_user(0x00, &tramp[3]);
+    __put_user(0x05, &tramp[4]);
+    __put_user(0x00, &tramp[5]);
+#else
+    /* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
+    __put_user(0x22, &tramp[0]);
+    __put_user(0xa0, &tramp[1]);
+    __put_user(TARGET_NR_rt_sigreturn, &tramp[2]);
+    /* Generate instruction:  SYSCALL */
+    __put_user(0x00, &tramp[3]);
+    __put_user(0x50, &tramp[4]);
+    __put_user(0x00, &tramp[5]);
+#endif
+}
+
 void setup_rt_frame(int sig, struct target_sigaction *ka,
                     target_siginfo_t *info,
                     target_sigset_t *set, CPUXtensaState *env)
@@ -163,26 +187,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         ra = ka->sa_restorer;
     } else {
-        ra = frame_addr + offsetof(struct target_rt_sigframe, retcode);
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
-        __put_user(0x22, &frame->retcode[0]);
-        __put_user(0x0a, &frame->retcode[1]);
-        __put_user(TARGET_NR_rt_sigreturn, &frame->retcode[2]);
-        /* Generate instruction:  SYSCALL */
-        __put_user(0x00, &frame->retcode[3]);
-        __put_user(0x05, &frame->retcode[4]);
-        __put_user(0x00, &frame->retcode[5]);
-#else
-        /* Generate instruction:  MOVI a2, __NR_rt_sigreturn */
-        __put_user(0x22, &frame->retcode[0]);
-        __put_user(0xa0, &frame->retcode[1]);
-        __put_user(TARGET_NR_rt_sigreturn, &frame->retcode[2]);
-        /* Generate instruction:  SYSCALL */
-        __put_user(0x00, &frame->retcode[3]);
-        __put_user(0x50, &frame->retcode[4]);
-        __put_user(0x00, &frame->retcode[5]);
-#endif
+        /* Not used, but retain for ABI compatibility. */
+        install_sigtramp(frame->retcode);
+        ra = default_rt_sigreturn;
     }
     memset(env->regs, 0, sizeof(env->regs));
     env->pc = ka->_sa_handler;
@@ -253,12 +260,8 @@ long do_rt_sigreturn(CPUXtensaState *env)
     set_sigmask(&set);
 
     restore_sigcontext(env, frame);
+    target_restore_altstack(&frame->uc.tuc_stack, env);
 
-    if (do_sigaltstack(frame_addr +
-                       offsetof(struct target_rt_sigframe, uc.tuc_stack),
-                       0, get_sp_from_cpustate(env)) == -TARGET_EFAULT) {
-        goto badframe;
-    }
     unlock_user_struct(frame, frame_addr, 0);
     return -TARGET_QEMU_ESIGRETURN;
 
@@ -267,3 +270,13 @@ long do_rt_sigreturn(CPUXtensaState *env)
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+
+void setup_sigtramp(abi_ulong sigtramp_page)
+{
+    uint8_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6, 0);
+    assert(tramp != NULL);
+
+    default_rt_sigreturn = sigtramp_page;
+    install_sigtramp(tramp);
+    unlock_user(tramp, sigtramp_page, 6);
+}
diff --git a/linux-user/xtensa/syscall.tbl b/linux-user/xtensa/syscall.tbl
index 6276e3c2d3f..fd2f30227d9 100644
--- a/linux-user/xtensa/syscall.tbl
+++ b/linux-user/xtensa/syscall.tbl
@@ -410,3 +410,10 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+# 443 reserved for quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/linux-user/xtensa/target_errno_defs.h b/linux-user/xtensa/target_errno_defs.h
new file mode 100644
index 00000000000..66fade2d0c8
--- /dev/null
+++ b/linux-user/xtensa/target_errno_defs.h
@@ -0,0 +1,7 @@
+#ifndef XTENSA_TARGET_ERRNO_DEFS_H
+#define XTENSA_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/xtensa/target_signal.h b/linux-user/xtensa/target_signal.h
index c60bf656f6b..1c7ee73154a 100644
--- a/linux-user/xtensa/target_signal.h
+++ b/linux-user/xtensa/target_signal.h
@@ -20,4 +20,6 @@ typedef struct target_sigaltstack {
 
 #include "../generic/signal.h"
 
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1
+
 #endif
diff --git a/memory_ldst.c.inc b/memory_ldst.c.inc
index b56e9619674..84b868f2946 100644
--- a/memory_ldst.c.inc
+++ b/memory_ldst.c.inc
@@ -157,7 +157,7 @@ uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
                                                     DEVICE_BIG_ENDIAN);
 }
 
-uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
+uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     uint8_t *ptr;
@@ -193,7 +193,7 @@ uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
 }
 
 /* warning: addr must be aligned */
-static inline uint32_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
+static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result,
     enum device_endian endian)
 {
@@ -240,21 +240,21 @@ static inline uint32_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
     return val;
 }
 
-uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
                                                      DEVICE_NATIVE_ENDIAN);
 }
 
-uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
                                                      DEVICE_LITTLE_ENDIAN);
 }
 
-uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
+uint16_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
     hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
 {
     return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
@@ -366,7 +366,7 @@ void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
 }
 
 void glue(address_space_stb, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint8_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     uint8_t *ptr;
     MemoryRegion *mr;
@@ -398,7 +398,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL,
 
 /* warning: addr must be aligned */
 static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs,
+    hwaddr addr, uint16_t val, MemTxAttrs attrs,
     MemTxResult *result, enum device_endian endian)
 {
     uint8_t *ptr;
@@ -441,21 +441,21 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
 }
 
 void glue(address_space_stw, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
                                              DEVICE_NATIVE_ENDIAN);
 }
 
 void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
                                              DEVICE_LITTLE_ENDIAN);
 }
 
 void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
-    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+    hwaddr addr, uint16_t val, MemTxAttrs attrs, MemTxResult *result)
 {
     glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
                                DEVICE_BIG_ENDIAN);
diff --git a/meson b/meson
index 776acd2a805..12f9f04ba0d 160000
--- a/meson
+++ b/meson
@@ -1 +1 @@
-Subproject commit 776acd2a805c9b42b4f0375150977df42130317f
+Subproject commit 12f9f04ba0decfda425dbbf9a501084c153a2d18
diff --git a/meson.build b/meson.build
index bc337fc34a9..b95fe15aff1 100644
--- a/meson.build
+++ b/meson.build
@@ -1,14 +1,10 @@
-project('qemu', ['c'], meson_version: '>=0.55.0',
-        default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++14', 'b_colorout=auto'] +
-                         (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []),
-        version: run_command('head', meson.source_root() / 'VERSION').stdout().strip())
+project('qemu', ['c'], meson_version: '>=0.58.2',
+        default_options: ['warning_level=1', 'c_std=gnu11', 'cpp_std=gnu++14', 'b_colorout=auto',
+                          'b_staticpic=false'],
+        version: files('VERSION'))
 
 not_found = dependency('', required: false)
-if meson.version().version_compare('>=0.56.0')
-  keyval = import('keyval')
-else
-  keyval = import('unstable-keyval')
-endif
+keyval = import('keyval')
 ss = import('sourceset')
 fs = import('fs')
 
@@ -44,22 +40,31 @@ config_host_data = configuration_data()
 genh = []
 
 target_dirs = config_host['TARGET_DIRS'].split()
-have_user = false
+have_linux_user = false
+have_bsd_user = false
 have_system = false
 foreach target : target_dirs
-  have_user = have_user or target.endswith('-user')
+  have_linux_user = have_linux_user or target.endswith('linux-user')
+  have_bsd_user = have_bsd_user or target.endswith('bsd-user')
   have_system = have_system or target.endswith('-softmmu')
 endforeach
+have_user = have_linux_user or have_bsd_user
 have_tools = 'CONFIG_TOOLS' in config_host
 have_block = have_system or have_tools
 
 python = import('python').find_installation()
 
 supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux']
-supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64',
+supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64',
   'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64']
 
 cpu = host_machine.cpu_family()
+
+# Unify riscv* to a single family.
+if cpu in ['riscv32', 'riscv64']
+  cpu = 'riscv'
+endif
+
 targetos = host_machine.system()
 
 if cpu in ['x86', 'x86_64']
@@ -76,9 +81,22 @@ else
   kvm_targets = []
 endif
 
+kvm_targets_c = '""'
+if not get_option('kvm').disabled() and targetos == 'linux'
+  kvm_targets_c = '"' + '" ,"'.join(kvm_targets) + '"'
+endif
+config_host_data.set('CONFIG_KVM_TARGETS', kvm_targets_c)
+
 accelerator_targets = { 'CONFIG_KVM': kvm_targets }
+
+if cpu in ['aarch64']
+  accelerator_targets += {
+    'CONFIG_HVF': ['aarch64-softmmu']
+  }
+endif
+
 if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
-  # i368 emulator provides xenpv machine type for multiple architectures
+  # i386 emulator provides xenpv machine type for multiple architectures
   accelerator_targets += {
     'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'],
   }
@@ -87,19 +105,42 @@ if cpu in ['x86', 'x86_64']
   accelerator_targets += {
     'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'],
     'CONFIG_HVF': ['x86_64-softmmu'],
+    'CONFIG_NVMM': ['i386-softmmu', 'x86_64-softmmu'],
     'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
   }
 endif
 
-edk2_targets = [ 'arm-softmmu', 'aarch64-softmmu', 'morello-softmmu', 'i386-softmmu', 'x86_64-softmmu' ]
-install_edk2_blobs = false
-if get_option('install_blobs')
-  foreach target : target_dirs
-    install_edk2_blobs = install_edk2_blobs or target in edk2_targets
-  endforeach
+modular_tcg = []
+# Darwin does not support references to thread-local variables in modules
+if targetos != 'darwin'
+  modular_tcg = ['i386-softmmu', 'x86_64-softmmu']
 endif
 
-bzip2 = find_program('bzip2', required: install_edk2_blobs)
+edk2_targets = [ 'arm-softmmu', 'aarch64-softmmu', 'morello-softmmu', 'i386-softmmu', 'x86_64-softmmu' ]
+unpack_edk2_blobs = false
+foreach target : edk2_targets
+  if target in target_dirs
+    bzip2 = find_program('bzip2', required: get_option('install_blobs'))
+    unpack_edk2_blobs = bzip2.found()
+    break
+  endif
+endforeach
+
+dtrace = not_found
+stap = not_found
+if 'dtrace' in get_option('trace_backends')
+  dtrace = find_program('dtrace', required: true)
+  stap = find_program('stap', required: false)
+  if stap.found()
+    # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol
+    # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility
+    # instead. QEMU --enable-modules depends on this because the SystemTap
+    # semaphores are linked into the main binary and not the module's shared
+    # object.
+    add_global_arguments('-DSTAP_SDT_V2',
+                         native: false, language: ['c', 'cpp', 'objc'])
+  endif
+endif
 
 ##################
 # Compiler flags #
@@ -107,10 +148,37 @@ bzip2 = find_program('bzip2', required: install_edk2_blobs)
 
 # Specify linker-script with add_project_link_arguments so that it is not placed
 # within a linker --start-group/--end-group pair
-if 'CONFIG_FUZZ' in config_host
-   add_project_link_arguments(['-Wl,-T,',
-                               (meson.current_source_dir() / 'tests/qtest/fuzz/fork_fuzz.ld')],
+if get_option('fuzzing')
+  add_project_link_arguments(['-Wl,-T,',
+                              (meson.current_source_dir() / 'tests/qtest/fuzz/fork_fuzz.ld')],
+                             native: false, language: ['c', 'cpp', 'objc'])
+
+  # Specify a filter to only instrument code that is directly related to
+  # virtual-devices.
+  configure_file(output: 'instrumentation-filter',
+                 input: 'scripts/oss-fuzz/instrumentation-filter-template',
+                 copy: true)
+  add_global_arguments(
+      cc.get_supported_arguments('-fsanitize-coverage-allowlist=instrumentation-filter'),
+      native: false, language: ['c', 'cpp', 'objc'])
+
+  if get_option('fuzzing_engine') == ''
+    # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the
+    # compiled code.  To build non-fuzzer binaries with --enable-fuzzing, link
+    # everything with fsanitize=fuzzer-no-link. Otherwise, the linker will be
+    # unable to bind the fuzzer-related callbacks added by instrumentation.
+    add_global_arguments('-fsanitize=fuzzer-no-link',
+                         native: false, language: ['c', 'cpp', 'objc'])
+    add_global_link_arguments('-fsanitize=fuzzer-no-link',
                               native: false, language: ['c', 'cpp', 'objc'])
+    # For the actual fuzzer binaries, we need to link against the libfuzzer
+    # library. They need to be configurable, to support OSS-Fuzz
+    fuzz_exe_ldflags = ['-fsanitize=fuzzer']
+  else
+    # LIB_FUZZING_ENGINE was set; assume we are running on OSS-Fuzz, and
+    # the needed CFLAGS have already been provided
+    fuzz_exe_ldflags = get_option('fuzzing_engine').split()
+  endif
 endif
 
 add_global_arguments(config_host['QEMU_CFLAGS'].split(),
@@ -135,6 +203,10 @@ add_project_arguments('-iquote', '.',
 link_language = meson.get_external_property('link_language', 'cpp')
 if link_language == 'cpp'
   add_languages('cpp', required: true, native: false)
+  cxx = meson.get_compiler('cpp')
+  linker = cxx
+else
+  linker = cc
 endif
 if host_machine.system() == 'darwin'
   add_languages('objc', required: false, native: false)
@@ -153,6 +225,30 @@ endif
 # Target-specific checks and dependencies #
 ###########################################
 
+if get_option('fuzzing') and get_option('fuzzing_engine') == '' and \
+    not cc.links('''
+          #include <stdint.h>
+          #include <sys/types.h>
+          int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+          int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; }
+        ''',
+        args: ['-Werror', '-fsanitize=fuzzer'])
+  error('Your compiler does not support -fsanitize=fuzzer')
+endif
+
+if 'ftrace' in get_option('trace_backends') and targetos != 'linux'
+  error('ftrace is supported only on Linux')
+endif
+if 'syslog' in get_option('trace_backends') and not cc.compiles('''
+    #include <syslog.h>
+    int main(void) {
+        openlog("qemu", LOG_PID, LOG_DAEMON);
+        syslog(LOG_INFO, "configure");
+        return 0;
+    }''')
+  error('syslog is not supported on this system')
+endif
+
 if targetos != 'linux' and get_option('mpath').enabled()
   error('Multipath is supported only on Linux')
 endif
@@ -162,7 +258,8 @@ if targetos != 'linux' and get_option('multiprocess').enabled()
 endif
 multiprocess_allowed = targetos == 'linux' and not get_option('multiprocess').disabled()
 
-m = cc.find_library('m', required: false)
+libm = cc.find_library('m', required: false)
+threads = dependency('threads')
 util = cc.find_library('util', required: false)
 winmm = []
 socket = []
@@ -170,7 +267,9 @@ version_res = []
 coref = []
 iokit = []
 emulator_link_args = []
+nvmm =not_found
 hvf = not_found
+host_dsosuf = '.so'
 if targetos == 'windows'
   socket = cc.find_library('ws2_32')
   winmm = cc.find_library('winmm')
@@ -179,9 +278,11 @@ if targetos == 'windows'
   version_res = win.compile_resources('version.rc',
                                       depend_files: files('pc-bios/qemu-nsis.ico'),
                                       include_directories: include_directories('.'))
+  host_dsosuf = '.dll'
 elif targetos == 'darwin'
   coref = dependency('appleframeworks', modules: 'CoreFoundation')
-  iokit = dependency('appleframeworks', modules: 'IOKit')
+  iokit = dependency('appleframeworks', modules: 'IOKit', required: false)
+  host_dsosuf = '.dylib'
 elif targetos == 'sunos'
   socket = [cc.find_library('socket'),
             cc.find_library('nsl'),
@@ -227,17 +328,23 @@ if not get_option('hax').disabled()
     accelerators += 'CONFIG_HAX'
   endif
 endif
+if targetos == 'netbsd'
+  nvmm = cc.find_library('nvmm', required: get_option('nvmm'))
+  if nvmm.found()
+    accelerators += 'CONFIG_NVMM'
+  endif
+endif
 
 tcg_arch = config_host['ARCH']
 if not get_option('tcg').disabled()
   if cpu not in supported_cpus
     if get_option('tcg_interpreter')
-      warning('Unsupported CPU @0@, will use TCG with TCI (experimental and slow)'.format(cpu))
+      warning('Unsupported CPU @0@, will use TCG with TCI (slow)'.format(cpu))
     else
       error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu))
     endif
   elif get_option('tcg_interpreter')
-    warning('Use of the TCG interpretor is not recommended on this host')
+    warning('Use of the TCG interpreter is not recommended on this host')
     warning('architecture. There is a native TCG execution backend available')
     warning('which provides substantially better performance and reliability.')
     warning('It is strongly recommended to remove the --enable-tcg-interpreter')
@@ -248,17 +355,12 @@ if not get_option('tcg').disabled()
     tcg_arch = 'tci'
   elif config_host['ARCH'] == 'sparc64'
     tcg_arch = 'sparc'
-  elif config_host['ARCH'] == 's390x'
-    tcg_arch = 's390'
   elif config_host['ARCH'] in ['x86_64', 'x32']
     tcg_arch = 'i386'
   elif config_host['ARCH'] == 'ppc64'
     tcg_arch = 'ppc'
-  elif config_host['ARCH'] in ['riscv32', 'riscv64']
-    tcg_arch = 'riscv'
   endif
   add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch,
-                        '-iquote', meson.current_source_dir() / 'accel/tcg',
                         language: ['c', 'cpp', 'objc'])
 
   accelerators += 'CONFIG_TCG'
@@ -271,6 +373,9 @@ endif
 if 'CONFIG_HVF' not in accelerators and get_option('hvf').enabled()
   error('HVF not available on this platform')
 endif
+if 'CONFIG_NVMM' not in accelerators and get_option('nvmm').enabled()
+  error('NVMM not available on this platform')
+endif
 if 'CONFIG_WHPX' not in accelerators and get_option('whpx').enabled()
   error('WHPX not available on this platform')
 endif
@@ -301,48 +406,32 @@ if 'CONFIG_GIO' in config_host
                            link_args: config_host['GIO_LIBS'].split())
 endif
 lttng = not_found
-if 'CONFIG_TRACE_UST' in config_host
-  lttng = declare_dependency(link_args: config_host['LTTNG_UST_LIBS'].split())
-endif
-urcubp = not_found
-if 'CONFIG_TRACE_UST' in config_host
-  urcubp = declare_dependency(link_args: config_host['URCU_BP_LIBS'].split())
-endif
-gcrypt = not_found
-if 'CONFIG_GCRYPT' in config_host
-  gcrypt = declare_dependency(compile_args: config_host['GCRYPT_CFLAGS'].split(),
-                              link_args: config_host['GCRYPT_LIBS'].split())
-endif
-nettle = not_found
-if 'CONFIG_NETTLE' in config_host
-  nettle = declare_dependency(compile_args: config_host['NETTLE_CFLAGS'].split(),
-                              link_args: config_host['NETTLE_LIBS'].split())
-endif
-gnutls = not_found
-if 'CONFIG_GNUTLS' in config_host
-  gnutls = declare_dependency(compile_args: config_host['GNUTLS_CFLAGS'].split(),
-                              link_args: config_host['GNUTLS_LIBS'].split())
+if 'ust' in get_option('trace_backends')
+  lttng = dependency('lttng-ust', required: true, method: 'pkg-config',
+                     kwargs: static_kwargs)
 endif
 pixman = not_found
 if have_system or have_tools
   pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8',
                       method: 'pkg-config', kwargs: static_kwargs)
 endif
-pam = not_found
-if 'CONFIG_AUTH_PAM' in config_host
-  pam = cc.find_library('pam')
-endif
-libaio = cc.find_library('aio', required: false)
 zlib = dependency('zlib', required: true, kwargs: static_kwargs)
+
+libaio = not_found
+if not get_option('linux_aio').auto() or have_block
+  libaio = cc.find_library('aio', has_headers: ['libaio.h'],
+                           required: get_option('linux_aio'),
+                           kwargs: static_kwargs)
+endif
 linux_io_uring = not_found
-if 'CONFIG_LINUX_IO_URING' in config_host
-  linux_io_uring = declare_dependency(compile_args: config_host['LINUX_IO_URING_CFLAGS'].split(),
-                                      link_args: config_host['LINUX_IO_URING_LIBS'].split())
+if not get_option('linux_io_uring').auto() or have_block
+  linux_io_uring = dependency('liburing', required: get_option('linux_io_uring'),
+                              method: 'pkg-config', kwargs: static_kwargs)
 endif
 libxml2 = not_found
-if 'CONFIG_LIBXML2' in config_host
-  libxml2 = declare_dependency(compile_args: config_host['LIBXML2_CFLAGS'].split(),
-                               link_args: config_host['LIBXML2_LIBS'].split())
+if not get_option('libxml2').auto() or have_block
+  libxml2 = dependency('libxml-2.0', required: get_option('libxml2'),
+                       method: 'pkg-config', kwargs: static_kwargs)
 endif
 libnfs = not_found
 if not get_option('libnfs').auto() or have_block
@@ -426,36 +515,62 @@ else
   xkbcommon = dependency('xkbcommon', required: get_option('xkbcommon'),
                          method: 'pkg-config', kwargs: static_kwargs)
 endif
+
 vde = not_found
-if config_host.has_key('CONFIG_VDE')
-  vde = declare_dependency(link_args: config_host['VDE_LIBS'].split())
+if not get_option('vde').auto() or have_system or have_tools
+  vde = cc.find_library('vdeplug', has_headers: ['libvdeplug.h'],
+                           required: get_option('vde'),
+                           kwargs: static_kwargs)
+endif
+if vde.found() and not cc.links('''
+   #include <libvdeplug.h>
+   int main(void)
+   {
+     struct vde_open_args a = {0, 0, 0};
+     char s[] = "";
+     vde_open(s, s, &a);
+     return 0;
+   }''', dependencies: vde)
+  vde = not_found
+  if get_option('cap_ng').enabled()
+    error('could not link libvdeplug')
+  else
+    warning('could not link libvdeplug, disabling')
+  endif
 endif
+
 pulse = not_found
-if 'CONFIG_LIBPULSE' in config_host
-  pulse = declare_dependency(compile_args: config_host['PULSE_CFLAGS'].split(),
-                             link_args: config_host['PULSE_LIBS'].split())
+if not get_option('pa').auto() or (targetos == 'linux' and have_system)
+  pulse = dependency('libpulse', required: get_option('pa'),
+                     method: 'pkg-config', kwargs: static_kwargs)
 endif
 alsa = not_found
-if 'CONFIG_ALSA' in config_host
-  alsa = declare_dependency(compile_args: config_host['ALSA_CFLAGS'].split(),
-                            link_args: config_host['ALSA_LIBS'].split())
+if not get_option('alsa').auto() or (targetos == 'linux' and have_system)
+  alsa = dependency('alsa', required: get_option('alsa'),
+                    method: 'pkg-config', kwargs: static_kwargs)
 endif
 jack = not_found
-if 'CONFIG_LIBJACK' in config_host
-  jack = declare_dependency(link_args: config_host['JACK_LIBS'].split())
+if not get_option('jack').auto() or have_system
+  jack = dependency('jack', required: get_option('jack'),
+                    method: 'pkg-config', kwargs: static_kwargs)
+endif
+
+spice_protocol = not_found
+if not get_option('spice_protocol').auto() or have_system
+  spice_protocol = dependency('spice-protocol', version: '>=0.12.3',
+                              required: get_option('spice_protocol'),
+                              method: 'pkg-config', kwargs: static_kwargs)
 endif
 spice = not_found
-spice_headers = not_found
-if 'CONFIG_SPICE' in config_host
-  spice = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split(),
-                             link_args: config_host['SPICE_LIBS'].split())
-  spice_headers = declare_dependency(compile_args: config_host['SPICE_CFLAGS'].split())
+if not get_option('spice').auto() or have_system
+  spice = dependency('spice-server', version: '>=0.12.5',
+                     required: get_option('spice'),
+                     method: 'pkg-config', kwargs: static_kwargs)
 endif
+spice_headers = spice.partial_dependency(compile_args: true, includes: true)
+
 rt = cc.find_library('rt', required: false)
-libdl = not_found
-if 'CONFIG_PLUGIN' in config_host
-  libdl = cc.find_library('dl', required: true)
-endif
+
 libiscsi = not_found
 if not get_option('libiscsi').auto() or have_block
   libiscsi = dependency('libiscsi', version: '>=1.9.0',
@@ -468,15 +583,12 @@ if not get_option('zstd').auto() or have_block
                     required: get_option('zstd'),
                     method: 'pkg-config', kwargs: static_kwargs)
 endif
-gbm = not_found
-if 'CONFIG_GBM' in config_host
-  gbm = declare_dependency(compile_args: config_host['GBM_CFLAGS'].split(),
-                           link_args: config_host['GBM_LIBS'].split())
-endif
 virgl = not_found
-if 'CONFIG_VIRGL' in config_host
-  virgl = declare_dependency(compile_args: config_host['VIRGL_CFLAGS'].split(),
-                             link_args: config_host['VIRGL_LIBS'].split())
+if not get_option('virglrenderer').auto() or have_system
+  virgl = dependency('virglrenderer',
+                     method: 'pkg-config',
+                     required: get_option('virglrenderer'),
+                     kwargs: static_kwargs)
 endif
 curl = not_found
 if not get_option('curl').auto() or have_block
@@ -567,6 +679,9 @@ iconv = not_found
 curses = not_found
 if have_system and not get_option('curses').disabled()
   curses_test = '''
+    #if defined(__APPLE__) || defined(__OpenBSD__)
+    #define _XOPEN_SOURCE_EXTENDED 1
+    #endif
     #include <locale.h>
     #include <curses.h>
     #include <wchar.h>
@@ -590,7 +705,7 @@ if have_system and not get_option('curses').disabled()
     endif
   endforeach
   msg = get_option('curses').enabled() ? 'curses library not found' : ''
-  curses_compile_args = ['-DNCURSES_WIDECHAR']
+  curses_compile_args = ['-DNCURSES_WIDECHAR=1']
   if curses.found()
     if cc.links(curses_test, args: curses_compile_args, dependencies: [curses])
       curses = declare_dependency(compile_args: curses_compile_args, dependencies: [curses])
@@ -709,13 +824,16 @@ if not get_option('rbd').auto() or have_block
       int main(void) {
         rados_t cluster;
         rados_create(&cluster, NULL);
+        #if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 12, 0)
+        #error
+        #endif
         return 0;
       }''', dependencies: [librbd, librados])
       rbd = declare_dependency(dependencies: [librbd, librados])
     elif get_option('rbd').enabled()
-      error('could not link librados')
+      error('librbd >= 1.12.0 required')
     else
-      warning('could not link librados, disabling')
+      warning('librbd >= 1.12.0 not found, disabling')
     endif
   endif
 endif
@@ -796,25 +914,130 @@ if liblzfse.found() and not cc.links('''
 endif
 
 oss = not_found
-if 'CONFIG_AUDIO_OSS' in config_host
-  oss = declare_dependency(link_args: config_host['OSS_LIBS'].split())
+if have_system and not get_option('oss').disabled()
+  if not cc.has_header('sys/soundcard.h')
+    # not found
+  elif targetos == 'netbsd'
+    oss = cc.find_library('ossaudio', required: get_option('oss'),
+                          kwargs: static_kwargs)
+  else
+    oss = declare_dependency()
+  endif
+
+  if not oss.found()
+    if get_option('oss').enabled()
+      error('OSS not found')
+    endif
+  endif
 endif
 dsound = not_found
-if 'CONFIG_AUDIO_DSOUND' in config_host
-  dsound = declare_dependency(link_args: config_host['DSOUND_LIBS'].split())
+if not get_option('dsound').auto() or (targetos == 'windows' and have_system)
+  if cc.has_header('dsound.h')
+    dsound = declare_dependency(link_args: ['-lole32', '-ldxguid'])
+  endif
+
+  if not dsound.found()
+    if get_option('dsound').enabled()
+      error('DirectSound not found')
+    endif
+  endif
 endif
+
 coreaudio = not_found
-if 'CONFIG_AUDIO_COREAUDIO' in config_host
-  coreaudio = declare_dependency(link_args: config_host['COREAUDIO_LIBS'].split())
+if not get_option('coreaudio').auto() or (targetos == 'darwin' and have_system)
+  coreaudio = dependency('appleframeworks', modules: 'CoreAudio',
+                         required: get_option('coreaudio'))
 endif
+
 opengl = not_found
 if 'CONFIG_OPENGL' in config_host
   opengl = declare_dependency(compile_args: config_host['OPENGL_CFLAGS'].split(),
                               link_args: config_host['OPENGL_LIBS'].split())
 endif
+gbm = not_found
+if (have_system or have_tools) and (virgl.found() or opengl.found())
+  gbm = dependency('gbm', method: 'pkg-config', required: false,
+                   kwargs: static_kwargs)
+endif
+
+gnutls = not_found
+gnutls_crypto = not_found
+if get_option('gnutls').enabled() or (get_option('gnutls').auto() and have_system)
+  # For general TLS support our min gnutls matches
+  # that implied by our platform support matrix
+  #
+  # For the crypto backends, we look for a newer
+  # gnutls:
+  #
+  #   Version 3.6.8  is needed to get XTS
+  #   Version 3.6.13 is needed to get PBKDF
+  #   Version 3.6.14 is needed to get HW accelerated XTS
+  #
+  # If newer enough gnutls isn't available, we can
+  # still use a different crypto backend to satisfy
+  # the platform support requirements
+  gnutls_crypto = dependency('gnutls', version: '>=3.6.14',
+                             method: 'pkg-config',
+                             required: false,
+                             kwargs: static_kwargs)
+  if gnutls_crypto.found()
+    gnutls = gnutls_crypto
+  else
+    # Our min version if all we need is TLS
+    gnutls = dependency('gnutls', version: '>=3.5.18',
+                        method: 'pkg-config',
+                        required: get_option('gnutls'),
+                        kwargs: static_kwargs)
+  endif
+endif
+
+# We prefer use of gnutls for crypto, unless the options
+# explicitly asked for nettle or gcrypt.
+#
+# If gnutls isn't available for crypto, then we'll prefer
+# gcrypt over nettle for performance reasons.
+gcrypt = not_found
+nettle = not_found
+xts = 'none'
+
+if get_option('nettle').enabled() and get_option('gcrypt').enabled()
+  error('Only one of gcrypt & nettle can be enabled')
+endif
+
+# Explicit nettle/gcrypt request, so ignore gnutls for crypto
+if get_option('nettle').enabled() or get_option('gcrypt').enabled()
+  gnutls_crypto = not_found
+endif
+
+if not gnutls_crypto.found()
+  if (not get_option('gcrypt').auto() or have_system) and not get_option('nettle').enabled()
+    gcrypt = dependency('libgcrypt', version: '>=1.8',
+                        method: 'config-tool',
+                        required: get_option('gcrypt'),
+                        kwargs: static_kwargs)
+    # Debian has removed -lgpg-error from libgcrypt-config
+    # as it "spreads unnecessary dependencies" which in
+    # turn breaks static builds...
+    if gcrypt.found() and enable_static
+      gcrypt = declare_dependency(dependencies: [
+        gcrypt,
+        cc.find_library('gpg-error', required: true, kwargs: static_kwargs)])
+    endif
+  endif
+  if (not get_option('nettle').auto() or have_system) and not gcrypt.found()
+    nettle = dependency('nettle', version: '>=3.4',
+                        method: 'pkg-config',
+                        required: get_option('nettle'),
+                        kwargs: static_kwargs)
+    if nettle.found() and not cc.has_header('nettle/xts.h', dependencies: nettle)
+      xts = 'private'
+    endif
+  endif
+endif
 
 gtk = not_found
 gtkx11 = not_found
+vte = not_found
 if not get_option('gtk').auto() or (have_system and not cocoa.found())
   gtk = dependency('gtk+-3.0', version: '>=3.22.0',
                    method: 'pkg-config',
@@ -826,16 +1049,18 @@ if not get_option('gtk').auto() or (have_system and not cocoa.found())
                         required: false,
                         kwargs: static_kwargs)
     gtk = declare_dependency(dependencies: [gtk, gtkx11])
+
+    if not get_option('vte').auto() or have_system
+      vte = dependency('vte-2.91',
+                       method: 'pkg-config',
+                       required: get_option('vte'),
+                       kwargs: static_kwargs)
+    endif
   endif
 endif
 
-vte = not_found
-if 'CONFIG_VTE' in config_host
-  vte = declare_dependency(compile_args: config_host['VTE_CFLAGS'].split(),
-                           link_args: config_host['VTE_LIBS'].split())
-endif
 x11 = not_found
-if gtkx11.found() or 'lm32-softmmu' in target_dirs
+if gtkx11.found()
   x11 = dependency('x11', method: 'pkg-config', required: gtkx11.found(),
                    kwargs: static_kwargs)
 endif
@@ -843,7 +1068,7 @@ vnc = not_found
 png = not_found
 jpeg = not_found
 sasl = not_found
-if get_option('vnc').enabled()
+if have_system and not get_option('vnc').disabled()
   vnc = declare_dependency() # dummy dependency
   png = dependency('libpng', required: get_option('vnc_png'),
                    method: 'pkg-config', kwargs: static_kwargs)
@@ -858,13 +1083,38 @@ if get_option('vnc').enabled()
   endif
 endif
 
+pam = not_found
+if not get_option('auth_pam').auto() or have_system
+  pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
+                        required: get_option('auth_pam'),
+                        kwargs: static_kwargs)
+endif
+if pam.found() and not cc.links('''
+   #include <stddef.h>
+   #include <security/pam_appl.h>
+   int main(void) {
+     const char *service_name = "qemu";
+     const char *user = "frank";
+     const struct pam_conv pam_conv = { 0 };
+     pam_handle_t *pamh = NULL;
+     pam_start(service_name, user, &pam_conv, &pamh);
+     return 0;
+   }''', dependencies: pam)
+  pam = not_found
+  if get_option('auth_pam').enabled()
+    error('could not link libpam')
+  else
+    warning('could not link libpam, disabling')
+  endif
+endif
+
 snappy = not_found
 if not get_option('snappy').auto() or have_system
   snappy = cc.find_library('snappy', has_headers: ['snappy-c.h'],
                            required: get_option('snappy'),
                            kwargs: static_kwargs)
 endif
-if snappy.found() and not cc.links('''
+if snappy.found() and not linker.links('''
    #include <snappy-c.h>
    int main(void) { snappy_max_compressed_length(4096); return 0; }''', dependencies: snappy)
   snappy = not_found
@@ -906,9 +1156,10 @@ if 'CONFIG_XEN_BACKEND' in config_host
                            link_args: config_host['XEN_LIBS'].split())
 endif
 cacard = not_found
-if 'CONFIG_SMARTCARD' in config_host
-  cacard = declare_dependency(compile_args: config_host['SMARTCARD_CFLAGS'].split(),
-                              link_args: config_host['SMARTCARD_LIBS'].split())
+if not get_option('smartcard').auto() or have_system
+  cacard = dependency('libcacard', required: get_option('smartcard'),
+                      version: '>=2.5.1', method: 'pkg-config',
+                      kwargs: static_kwargs)
 endif
 u2f = not_found
 if have_system
@@ -917,34 +1168,45 @@ if have_system
                    kwargs: static_kwargs)
 endif
 usbredir = not_found
-if 'CONFIG_USB_REDIR' in config_host
-  usbredir = declare_dependency(compile_args: config_host['USB_REDIR_CFLAGS'].split(),
-                                link_args: config_host['USB_REDIR_LIBS'].split())
+if not get_option('usb_redir').auto() or have_system
+  usbredir = dependency('libusbredirparser-0.5', required: get_option('usb_redir'),
+                        version: '>=0.6', method: 'pkg-config',
+                        kwargs: static_kwargs)
 endif
 libusb = not_found
-if 'CONFIG_USB_LIBUSB' in config_host
-  libusb = declare_dependency(compile_args: config_host['LIBUSB_CFLAGS'].split(),
-                              link_args: config_host['LIBUSB_LIBS'].split())
+if not get_option('libusb').auto() or have_system
+  libusb = dependency('libusb-1.0', required: get_option('libusb'),
+                      version: '>=1.0.13', method: 'pkg-config',
+                      kwargs: static_kwargs)
 endif
+
 libpmem = not_found
-if 'CONFIG_LIBPMEM' in config_host
-  libpmem = declare_dependency(compile_args: config_host['LIBPMEM_CFLAGS'].split(),
-                               link_args: config_host['LIBPMEM_LIBS'].split())
+if not get_option('libpmem').auto() or have_system
+  libpmem = dependency('libpmem', required: get_option('libpmem'),
+                       method: 'pkg-config', kwargs: static_kwargs)
 endif
 libdaxctl = not_found
-if 'CONFIG_LIBDAXCTL' in config_host
-  libdaxctl = declare_dependency(link_args: config_host['LIBDAXCTL_LIBS'].split())
+if not get_option('libdaxctl').auto() or have_system
+  libdaxctl = dependency('libdaxctl', required: get_option('libdaxctl'),
+                         version: '>=57', method: 'pkg-config',
+                         kwargs: static_kwargs)
 endif
 tasn1 = not_found
-if 'CONFIG_TASN1' in config_host
-  tasn1 = declare_dependency(compile_args: config_host['TASN1_CFLAGS'].split(),
-                             link_args: config_host['TASN1_LIBS'].split())
+if gnutls.found()
+  tasn1 = dependency('libtasn1',
+                     method: 'pkg-config',
+                     kwargs: static_kwargs)
 endif
 keyutils = dependency('libkeyutils', required: false,
                       method: 'pkg-config', kwargs: static_kwargs)
 
 has_gettid = cc.has_function('gettid')
 
+# libselinux
+selinux = dependency('libselinux',
+                     required: get_option('selinux'),
+                     method: 'pkg-config', kwargs: static_kwargs)
+
 # Malloc tests
 
 malloc = []
@@ -967,10 +1229,12 @@ endif
 
 # Check whether the glibc provides statx()
 
-statx_test = '''
+gnu_source_prefix = '''
   #ifndef _GNU_SOURCE
   #define _GNU_SOURCE
   #endif
+'''
+statx_test = gnu_source_prefix + '''
   #include <sys/stat.h>
   int main(void) {
     struct statx statxbuf;
@@ -1016,6 +1280,66 @@ if not get_option('fuse_lseek').disabled()
   endif
 endif
 
+# libbpf
+libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
+if libbpf.found() and not cc.links('''
+   #include <bpf/libbpf.h>
+   int main(void)
+   {
+     bpf_object__destroy_skeleton(NULL);
+     return 0;
+   }''', dependencies: libbpf)
+  libbpf = not_found
+  if get_option('bpf').enabled()
+    error('libbpf skeleton test failed')
+  else
+    warning('libbpf skeleton test failed, disabling')
+  endif
+endif
+
+#################
+# config-host.h #
+#################
+
+audio_drivers_selected = []
+if have_system
+  audio_drivers_available = {
+    'alsa': alsa.found(),
+    'coreaudio': coreaudio.found(),
+    'dsound': dsound.found(),
+    'jack': jack.found(),
+    'oss': oss.found(),
+    'pa': pulse.found(),
+    'sdl': sdl.found(),
+  }
+  foreach k, v: audio_drivers_available
+    config_host_data.set('CONFIG_AUDIO_' + k.to_upper(), v)
+  endforeach
+
+  # Default to native drivers first, OSS second, SDL third
+  audio_drivers_priority = \
+    [ 'pa', 'coreaudio', 'dsound', 'oss' ] + \
+    (targetos == 'linux' ? [] : [ 'sdl' ])
+  audio_drivers_default = []
+  foreach k: audio_drivers_priority
+    if audio_drivers_available[k]
+      audio_drivers_default += k
+    endif
+  endforeach
+
+  foreach k: get_option('audio_drv_list')
+    if k == 'default'
+      audio_drivers_selected += audio_drivers_default
+    elif not audio_drivers_available[k]
+      error('Audio driver "@0@" not available.'.format(k))
+    else
+      audio_drivers_selected += k
+    endif
+  endforeach
+endif
+config_host_data.set('CONFIG_AUDIO_DRIVERS',
+                     '"' + '", "'.join(audio_drivers_selected) + '", ')
+
 if get_option('cfi')
   cfi_flags=[]
   # Check for dependency on LTO
@@ -1056,9 +1380,8 @@ if get_option('cfi')
   add_global_link_arguments(cfi_flags, native: false, language: ['c', 'cpp', 'objc'])
 endif
 
-#################
-# config-host.h #
-#################
+have_host_block_device = (targetos != 'darwin' or
+    cc.has_header('IOKit/storage/IOMedia.h'))
 
 have_virtfs = (targetos == 'linux' and
     have_system and
@@ -1086,6 +1409,11 @@ elif get_option('virtfs').disabled()
   have_virtfs = false
 endif
 
+foreach k : get_option('trace_backends')
+  config_host_data.set('CONFIG_TRACE_' + k.to_upper(), true)
+endforeach
+config_host_data.set_quoted('CONFIG_TRACE_FILE', get_option('trace_file'))
+
 config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / get_option('bindir'))
 config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix'))
 config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / qemu_confdir)
@@ -1102,12 +1430,15 @@ config_host_data.set_quoted('CONFIG_SYSCONFDIR', get_option('prefix') / get_opti
 config_host_data.set('CONFIG_ATTR', libattr.found())
 config_host_data.set('CONFIG_BRLAPI', brlapi.found())
 config_host_data.set('CONFIG_COCOA', cocoa.found())
+config_host_data.set('CONFIG_FUZZ', get_option('fuzzing'))
+config_host_data.set('CONFIG_GCOV', get_option('b_coverage'))
 config_host_data.set('CONFIG_LIBUDEV', libudev.found())
 config_host_data.set('CONFIG_LZO', lzo.found())
 config_host_data.set('CONFIG_MPATH', mpathpersist.found())
 config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
 config_host_data.set('CONFIG_CURL', curl.found())
 config_host_data.set('CONFIG_CURSES', curses.found())
+config_host_data.set('CONFIG_GBM', gbm.found())
 config_host_data.set('CONFIG_GLUSTERFS', glusterfs.found())
 if glusterfs.found()
   config_host_data.set('CONFIG_GLUSTERFS_XLATOR_OPT', glusterfs.version().version_compare('>=4'))
@@ -1118,48 +1449,330 @@ if glusterfs.found()
   config_host_data.set('CONFIG_GLUSTERFS_IOCB_HAS_STAT', glusterfs_iocb_has_stat)
 endif
 config_host_data.set('CONFIG_GTK', gtk.found())
+config_host_data.set('CONFIG_VTE', vte.found())
 config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
 config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
+config_host_data.set('CONFIG_EBPF', libbpf.found())
+config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found())
 config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
 config_host_data.set('CONFIG_LIBNFS', libnfs.found())
+config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
+config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
+config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_SDL', sdl.found())
 config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
 config_host_data.set('CONFIG_SECCOMP', seccomp.found())
 config_host_data.set('CONFIG_SNAPPY', snappy.found())
+config_host_data.set('CONFIG_USB_LIBUSB', libusb.found())
+config_host_data.set('CONFIG_VDE', vde.found())
 config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', have_vhost_user_blk_server)
 config_host_data.set('CONFIG_VNC', vnc.found())
 config_host_data.set('CONFIG_VNC_JPEG', jpeg.found())
 config_host_data.set('CONFIG_VNC_PNG', png.found())
 config_host_data.set('CONFIG_VNC_SASL', sasl.found())
 config_host_data.set('CONFIG_VIRTFS', have_virtfs)
+config_host_data.set('CONFIG_VTE', vte.found())
 config_host_data.set('CONFIG_XKBCOMMON', xkbcommon.found())
 config_host_data.set('CONFIG_KEYUTILS', keyutils.found())
 config_host_data.set('CONFIG_GETTID', has_gettid)
+config_host_data.set('CONFIG_GNUTLS', gnutls.found())
+config_host_data.set('CONFIG_GNUTLS_CRYPTO', gnutls_crypto.found())
+config_host_data.set('CONFIG_GCRYPT', gcrypt.found())
+config_host_data.set('CONFIG_NETTLE', nettle.found())
+config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private')
 config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim)
 config_host_data.set('CONFIG_STATX', has_statx)
 config_host_data.set('CONFIG_ZSTD', zstd.found())
 config_host_data.set('CONFIG_FUSE', fuse.found())
 config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found())
+config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found())
+config_host_data.set('CONFIG_SPICE', spice.found())
 config_host_data.set('CONFIG_X11', x11.found())
 config_host_data.set('CONFIG_CFI', get_option('cfi'))
+config_host_data.set('CONFIG_SELINUX', selinux.found())
 config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version()))
 config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0])
 config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1])
 config_host_data.set('QEMU_VERSION_MICRO', meson.project_version().split('.')[2])
 
+config_host_data.set_quoted('CONFIG_HOST_DSOSUF', host_dsosuf)
+config_host_data.set('HAVE_HOST_BLOCK_DEVICE', have_host_block_device)
+config_host_data.set('HOST_WORDS_BIGENDIAN', host_machine.endian() == 'big')
+
+# has_header
+config_host_data.set('CONFIG_EPOLL', cc.has_header('sys/epoll.h'))
+config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h'))
+config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h'))
 config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h'))
 config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h'))
 config_host_data.set('HAVE_PTY_H', cc.has_header('pty.h'))
+config_host_data.set('HAVE_SYS_DISK_H', cc.has_header('sys/disk.h'))
 config_host_data.set('HAVE_SYS_IOCCOM_H', cc.has_header('sys/ioccom.h'))
 config_host_data.set('HAVE_SYS_KCOV_H', cc.has_header('sys/kcov.h'))
-config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
 
+# has_function
+config_host_data.set('CONFIG_ACCEPT4', cc.has_function('accept4'))
+config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime'))
+config_host_data.set('CONFIG_DUP3', cc.has_function('dup3'))
+config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate'))
+config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate'))
+config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign'))
+config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll'))
 config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include <sys/uio.h>'))
+config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads))
+config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile'))
+config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare'))
+config_host_data.set('CONFIG_SYNCFS', cc.has_function('syncfs'))
+config_host_data.set('CONFIG_SYNC_FILE_RANGE', cc.has_function('sync_file_range'))
+config_host_data.set('CONFIG_TIMERFD', cc.has_function('timerfd_create'))
+config_host_data.set('HAVE_COPY_FILE_RANGE', cc.has_function('copy_file_range'))
+config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util))
+config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
+config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
+if rdma.found()
+  config_host_data.set('HAVE_IBV_ADVISE_MR',
+                       cc.has_function('ibv_advise_mr',
+                                       args: config_host['RDMA_LIBS'].split(),
+                                       prefix: '#include <infiniband/verbs.h>'))
+endif
+
+# has_header_symbol
+config_host_data.set('CONFIG_BYTESWAP_H',
+                     cc.has_header_symbol('byteswap.h', 'bswap_32'))
+config_host_data.set('CONFIG_EPOLL_CREATE1',
+                     cc.has_header_symbol('sys/epoll.h', 'epoll_create1'))
+config_host_data.set('CONFIG_HAS_ENVIRON',
+                     cc.has_header_symbol('unistd.h', 'environ', prefix: gnu_source_prefix))
+config_host_data.set('CONFIG_FALLOCATE_PUNCH_HOLE',
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_PUNCH_HOLE') and
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_KEEP_SIZE'))
+config_host_data.set('CONFIG_FALLOCATE_ZERO_RANGE',
+                     cc.has_header_symbol('linux/falloc.h', 'FALLOC_FL_ZERO_RANGE'))
+config_host_data.set('CONFIG_FIEMAP',
+                     cc.has_header('linux/fiemap.h') and
+                     cc.has_header_symbol('linux/fs.h', 'FS_IOC_FIEMAP'))
+config_host_data.set('CONFIG_GETRANDOM',
+                     cc.has_function('getrandom') and
+                     cc.has_header_symbol('sys/random.h', 'GRND_NONBLOCK'))
+config_host_data.set('CONFIG_INOTIFY',
+                     cc.has_header_symbol('sys/inotify.h', 'inotify_init'))
+config_host_data.set('CONFIG_INOTIFY1',
+                     cc.has_header_symbol('sys/inotify.h', 'inotify_init1'))
+config_host_data.set('CONFIG_MACHINE_BSWAP_H',
+                     cc.has_header_symbol('machine/bswap.h', 'bswap32',
+                                          prefix: '''#include <sys/endian.h>
+                                                     #include <sys/types.h>'''))
+config_host_data.set('CONFIG_PRCTL_PR_SET_TIMERSLACK',
+                     cc.has_header_symbol('sys/prctl.h', 'PR_SET_TIMERSLACK'))
+config_host_data.set('CONFIG_RTNETLINK',
+                     cc.has_header_symbol('linux/rtnetlink.h', 'IFLA_PROTO_DOWN'))
+config_host_data.set('CONFIG_SYSMACROS',
+                     cc.has_header_symbol('sys/sysmacros.h', 'makedev'))
+config_host_data.set('HAVE_OPTRESET',
+                     cc.has_header_symbol('getopt.h', 'optreset'))
+config_host_data.set('HAVE_IPPROTO_MPTCP',
+                     cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+
+# has_member
+config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
+                     cc.has_member('struct sigevent', 'sigev_notify_thread_id',
+                                   prefix: '#include <signal.h>'))
+config_host_data.set('HAVE_STRUCT_STAT_ST_ATIM',
+                     cc.has_member('struct stat', 'st_atim',
+                                   prefix: '#include <sys/stat.h>'))
+
+# has_type
+config_host_data.set('CONFIG_IOVEC',
+                     cc.has_type('struct iovec',
+                                 prefix: '#include <sys/uio.h>'))
+config_host_data.set('HAVE_UTMPX',
+                     cc.has_type('struct utmpx',
+                                 prefix: '#include <utmpx.h>'))
+
+config_host_data.set('CONFIG_EVENTFD', cc.links('''
+  #include <sys/eventfd.h>
+  int main(void) { return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); }'''))
+config_host_data.set('CONFIG_FDATASYNC', cc.links(gnu_source_prefix + '''
+  #include <unistd.h>
+  int main(void) {
+  #if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
+  return fdatasync(0);
+  #else
+  #error Not supported
+  #endif
+  }'''))
+config_host_data.set('CONFIG_MADVISE', cc.links(gnu_source_prefix + '''
+  #include <sys/types.h>
+  #include <sys/mman.h>
+  #include <stddef.h>
+  int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }'''))
+config_host_data.set('CONFIG_MEMFD', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  int main(void) { return memfd_create("foo", MFD_ALLOW_SEALING); }'''))
+config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + '''
+  #include <fcntl.h>
+  #if !defined(AT_EMPTY_PATH)
+  # error missing definition
+  #else
+  int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
+  #endif'''))
+config_host_data.set('CONFIG_PIPE2', cc.links(gnu_source_prefix + '''
+  #include <unistd.h>
+  #include <fcntl.h>
+
+  int main(void)
+  {
+      int pipefd[2];
+      return pipe2(pipefd, O_CLOEXEC);
+  }'''))
+config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  #include <stddef.h>
+  int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
 
-ignored = ['CONFIG_QEMU_INTERP_PREFIX'] # actually per-target
-arrays = ['CONFIG_AUDIO_DRIVERS', 'CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST']
-strings = ['HOST_DSOSUF', 'CONFIG_IASL']
+config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #ifdef __FreeBSD__
+  #include <pthread_np.h>
+  #endif
+
+  static void *f(void *p) { return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    pthread_setname_np(thread, "QEMU");
+    return 0;
+  }''', dependencies: threads))
+config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_prefix + '''
+  #include <pthread.h>
+  #ifdef __FreeBSD__
+  #include <pthread_np.h>
+  #endif
+
+  static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; }
+  int main(void)
+  {
+    pthread_t thread;
+    pthread_create(&thread, 0, f, 0);
+    return 0;
+  }''', dependencies: threads))
+
+config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + '''
+  #include <sys/signalfd.h>
+  #include <stddef.h>
+  int main(void) { return signalfd(-1, NULL, SFD_CLOEXEC); }'''))
+config_host_data.set('CONFIG_SPLICE', cc.links(gnu_source_prefix + '''
+  #include <unistd.h>
+  #include <fcntl.h>
+  #include <limits.h>
+
+  int main(void)
+  {
+    int len, fd = 0;
+    len = tee(STDIN_FILENO, STDOUT_FILENO, INT_MAX, SPLICE_F_NONBLOCK);
+    splice(STDIN_FILENO, NULL, fd, NULL, len, SPLICE_F_MOVE);
+    return 0;
+  }'''))
+
+config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + '''
+  #include <sys/mman.h>
+  int main(int argc, char *argv[]) {
+    return mlockall(MCL_FUTURE);
+  }'''))
+
+have_l2tpv3 = false
+if not get_option('l2tpv3').disabled() and have_system
+  have_l2tpv3 = cc.has_type('struct mmsghdr',
+    prefix: gnu_source_prefix + '''
+      #include <sys/socket.h>
+      #include <linux/ip.h>''')
+endif
+config_host_data.set('CONFIG_L2TPV3', have_l2tpv3)
+
+have_netmap = false
+if not get_option('netmap').disabled() and have_system
+  have_netmap = cc.compiles('''
+    #include <inttypes.h>
+    #include <net/if.h>
+    #include <net/netmap.h>
+    #include <net/netmap_user.h>
+    #if (NETMAP_API < 11) || (NETMAP_API > 15)
+    #error
+    #endif
+    int main(void) { return 0; }''')
+  if not have_netmap and get_option('netmap').enabled()
+    error('Netmap headers not available')
+  endif
+endif
+config_host_data.set('CONFIG_NETMAP', have_netmap)
+
+# Work around a system header bug with some kernel/XFS header
+# versions where they both try to define 'struct fsxattr':
+# xfs headers will not try to redefine structs from linux headers
+# if this macro is set.
+config_host_data.set('HAVE_FSXATTR', cc.links('''
+  #include <linux/fs.h>
+  struct fsxattr foo;
+  int main(void) {
+    return 0;
+  }'''))
+
+# Some versions of Mac OS X incorrectly define SIZE_MAX
+config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
+    #include <stdint.h>
+    #include <stdio.h>
+    int main(int argc, char *argv[]) {
+        return printf("%zu", SIZE_MAX);
+    }''', args: ['-Werror']))
+
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
+  #include <stdint.h>
+  int main(void)
+  {
+    uint64_t x = 0, y = 0;
+    y = __atomic_load_n(&x, __ATOMIC_RELAXED);
+    __atomic_store_n(&x, y, __ATOMIC_RELAXED);
+    __atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+    __atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
+    __atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
+    return 0;
+  }'''))
+
+config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + '''
+  #include <sys/auxv.h>
+  int main(void) {
+    return getauxval(AT_HWCAP) == 0;
+  }'''))
+
+config_host_data.set('CONFIG_AF_VSOCK', cc.compiles(gnu_source_prefix + '''
+  #include <errno.h>
+  #include <sys/types.h>
+  #include <sys/socket.h>
+  #if !defined(AF_VSOCK)
+  # error missing AF_VSOCK flag
+  #endif
+  #include <linux/vm_sockets.h>
+  int main(void) {
+    int sock, ret;
+    struct sockaddr_vm svm;
+    socklen_t len = sizeof(svm);
+    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
+    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
+    if ((ret == -1) && (errno == ENOTCONN)) {
+        return 0;
+    }
+    return -1;
+  }'''))
+
+ignored = ['CONFIG_QEMU_INTERP_PREFIX', # actually per-target
+    'HAVE_GDB_BIN']
+arrays = ['CONFIG_BDRV_RW_WHITELIST', 'CONFIG_BDRV_RO_WHITELIST']
+strings = ['CONFIG_IASL']
 foreach k, v: config_host
   if ignored.contains(k)
     # do nothing
@@ -1171,11 +1784,8 @@ foreach k, v: config_host
   elif k == 'ARCH'
     config_host_data.set('HOST_' + v.to_upper(), 1)
   elif strings.contains(k)
-    if not k.startswith('CONFIG_')
-      k = 'CONFIG_' + k.to_upper()
-    endif
     config_host_data.set_quoted(k, v)
-  elif k.startswith('CONFIG_') or k.startswith('HAVE_') or k.startswith('HOST_')
+  elif k.startswith('CONFIG_')
     config_host_data.set(k, v == 'y' ? 1 : v)
   endif
 endforeach
@@ -1203,11 +1813,9 @@ disassemblers = {
   'i386' : ['CONFIG_I386_DIS'],
   'x86_64' : ['CONFIG_I386_DIS'],
   'x32' : ['CONFIG_I386_DIS'],
-  'lm32' : ['CONFIG_LM32_DIS'],
   'm68k' : ['CONFIG_M68K_DIS'],
   'microblaze' : ['CONFIG_MICROBLAZE_DIS'],
   'mips' : ['CONFIG_MIPS_DIS'],
-  'moxie' : ['CONFIG_MOXIE_DIS'],
   'nios2' : ['CONFIG_NIOS2_DIS'],
   'or1k' : ['CONFIG_OPENRISC_DIS'],
   'ppc' : ['CONFIG_PPC_DIS'],
@@ -1226,10 +1834,12 @@ if link_language == 'cpp'
   }
 endif
 
+have_ivshmem = config_host_data.get('CONFIG_EVENTFD')
 host_kconfig = \
+  (get_option('fuzzing') ? ['CONFIG_FUZZ=y'] : []) + \
   ('CONFIG_TPM' in config_host ? ['CONFIG_TPM=y'] : []) + \
-  ('CONFIG_SPICE' in config_host ? ['CONFIG_SPICE=y'] : []) + \
-  ('CONFIG_IVSHMEM' in config_host ? ['CONFIG_IVSHMEM=y'] : []) + \
+  (spice.found() ? ['CONFIG_SPICE=y'] : []) + \
+  (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \
   ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \
   (x11.found() ? ['CONFIG_X11=y'] : []) + \
   ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \
@@ -1284,6 +1894,11 @@ foreach target : target_dirs
       elif sym == 'CONFIG_XEN' and have_xen_pci_passthrough
         config_target += { 'CONFIG_XEN_PCI_PASSTHROUGH': 'y' }
       endif
+      if target in modular_tcg
+        config_target += { 'CONFIG_TCG_MODULAR': 'y' }
+      else
+        config_target += { 'CONFIG_TCG_BUILTIN': 'y' }
+      endif
       accel_kconfig += [ sym + '=y' ]
     endif
   endforeach
@@ -1295,14 +1910,14 @@ foreach target : target_dirs
   endif
 
   actual_target_dirs += target
-  config_target += keyval.load('default-configs/targets' / target + '.mak')
+  config_target += keyval.load('configs/targets' / target + '.mak')
   # Meson keyval does not support including other files, so do it manually (only 1 file right now)
   if 'INCLUDE_WORKAROUND' in config_target
     message('Loading base config ' + config_target['INCLUDE_WORKAROUND'])
-    base_config_target = keyval.load('default-configs/targets' / config_target['INCLUDE_WORKAROUND'])
+    base_config_target = keyval.load('configs/targets' / config_target['INCLUDE_WORKAROUND'])
     config_target = base_config_target + config_target
   endif
-  message('Loaded default-configs/targets' / target + '.mak')
+  message('Loaded configs/targets' / target + '.mak')
   foreach k, v: config_target
     message(k + '=' + v)
   endforeach
@@ -1350,20 +1965,24 @@ foreach target : target_dirs
       config_target_data.set(k, v)
     endif
   endforeach
+  config_target_data.set('QEMU_ARCH',
+                         'QEMU_ARCH_' + config_target['TARGET_BASE_ARCH'].to_upper())
   config_target_h += {target: configure_file(output: target + '-config-target.h',
                                                configuration: config_target_data)}
 
   if target.endswith('-softmmu')
+    config_input = meson.get_external_property(target, 'default')
     config_devices_mak = target + '-config-devices.mak'
     config_devices_mak = configure_file(
-      input: ['default-configs/devices' / target + '.mak', 'Kconfig'],
+      input: ['configs/devices' / target / config_input + '.mak', 'Kconfig'],
       output: config_devices_mak,
       depfile: config_devices_mak + '.d',
       capture: true,
       command: [minikconf,
                 get_option('default_devices') ? '--defconfig' : '--allnoconfig',
                 config_devices_mak, '@DEPFILE@', '@INPUT@',
-                host_kconfig, accel_kconfig])
+                host_kconfig, accel_kconfig,
+                'CONFIG_' + config_target['TARGET_ARCH'].to_upper() + '=y'])
 
     config_devices_data = configuration_data()
     config_devices = keyval.load(config_devices_mak)
@@ -1411,6 +2030,19 @@ if capstone_opt in ['enabled', 'auto', 'system']
                         kwargs: static_kwargs, method: 'pkg-config',
                         required: capstone_opt == 'system' or
                                   capstone_opt == 'enabled' and not have_internal)
+
+  # Some versions of capstone have broken pkg-config file
+  # that reports a wrong -I path, causing the #include to
+  # fail later. If the system has such a broken version
+  # do not use it.
+  if capstone.found() and not cc.compiles('#include <capstone.h>',
+                                          dependencies: [capstone])
+    capstone = not_found
+    if capstone_opt == 'system'
+      error('system capstone requested, it does not appear to work')
+    endif
+  endif
+
   if capstone.found()
     capstone_opt = 'system'
   elif have_internal
@@ -1620,10 +2252,13 @@ if have_system
        int main(void) { fdt_check_full(NULL, 0); return 0; }''',
          dependencies: fdt)
       fdt_opt = 'system'
+    elif fdt_opt == 'system'
+       error('system libfdt requested, but it is too old (1.5.1 or newer required)')
     elif have_internal
       fdt_opt = 'internal'
     else
       fdt_opt = 'disabled'
+      fdt = not_found
     endif
   endif
   if fdt_opt == 'internal'
@@ -1666,26 +2301,26 @@ genh += configure_file(output: 'config-host.h', configuration: config_host_data)
 hxtool = find_program('scripts/hxtool')
 shaderinclude = find_program('scripts/shaderinclude.pl')
 qapi_gen = find_program('scripts/qapi-gen.py')
-qapi_gen_depends = [ meson.source_root() / 'scripts/qapi/__init__.py',
-                     meson.source_root() / 'scripts/qapi/commands.py',
-                     meson.source_root() / 'scripts/qapi/common.py',
-                     meson.source_root() / 'scripts/qapi/error.py',
-                     meson.source_root() / 'scripts/qapi/events.py',
-                     meson.source_root() / 'scripts/qapi/expr.py',
-                     meson.source_root() / 'scripts/qapi/gen.py',
-                     meson.source_root() / 'scripts/qapi/introspect.py',
-                     meson.source_root() / 'scripts/qapi/parser.py',
-                     meson.source_root() / 'scripts/qapi/schema.py',
-                     meson.source_root() / 'scripts/qapi/source.py',
-                     meson.source_root() / 'scripts/qapi/types.py',
-                     meson.source_root() / 'scripts/qapi/visit.py',
-                     meson.source_root() / 'scripts/qapi/common.py',
-                     meson.source_root() / 'scripts/qapi-gen.py'
+qapi_gen_depends = [ meson.current_source_dir() / 'scripts/qapi/__init__.py',
+                     meson.current_source_dir() / 'scripts/qapi/commands.py',
+                     meson.current_source_dir() / 'scripts/qapi/common.py',
+                     meson.current_source_dir() / 'scripts/qapi/error.py',
+                     meson.current_source_dir() / 'scripts/qapi/events.py',
+                     meson.current_source_dir() / 'scripts/qapi/expr.py',
+                     meson.current_source_dir() / 'scripts/qapi/gen.py',
+                     meson.current_source_dir() / 'scripts/qapi/introspect.py',
+                     meson.current_source_dir() / 'scripts/qapi/parser.py',
+                     meson.current_source_dir() / 'scripts/qapi/schema.py',
+                     meson.current_source_dir() / 'scripts/qapi/source.py',
+                     meson.current_source_dir() / 'scripts/qapi/types.py',
+                     meson.current_source_dir() / 'scripts/qapi/visit.py',
+                     meson.current_source_dir() / 'scripts/qapi/common.py',
+                     meson.current_source_dir() / 'scripts/qapi-gen.py'
 ]
 
 tracetool = [
   python, files('scripts/tracetool.py'),
-   '--backend=' + config_host['TRACE_BACKENDS']
+   '--backend=' + ','.join(get_option('trace_backends'))
 ]
 tracetool_depends = files(
   'scripts/tracetool/backend/log.py',
@@ -1756,6 +2391,7 @@ bsd_user_ss = ss.source_set()
 chardev_ss = ss.source_set()
 common_ss = ss.source_set()
 crypto_ss = ss.source_set()
+hwcore_ss = ss.source_set()
 io_ss = ss.source_set()
 linux_user_ss = ss.source_set()
 qmp_ss = ss.source_set()
@@ -1768,10 +2404,16 @@ trace_ss = ss.source_set()
 user_ss = ss.source_set()
 util_ss = ss.source_set()
 
+# accel modules
+qtest_module_ss = ss.source_set()
+tcg_module_ss = ss.source_set()
+
 modules = {}
+target_modules = {}
 hw_arch = {}
 target_arch = {}
 target_softmmu_arch = {}
+target_user_arch = {}
 
 ###############
 # Trace files #
@@ -1805,6 +2447,7 @@ if have_system
     'backends',
     'backends/tpm',
     'chardev',
+    'ebpf',
     'hw/9pfs',
     'hw/acpi',
     'hw/adc',
@@ -1831,6 +2474,8 @@ if have_system
     'hw/misc/macio',
     'hw/net',
     'hw/net/can',
+    'hw/nubus',
+    'hw/nvme',
     'hw/nvram',
     'hw/pci',
     'hw/pci-host',
@@ -1841,6 +2486,7 @@ if have_system
     'hw/s390x',
     'hw/scsi',
     'hw/sd',
+    'hw/sh4',
     'hw/sparc',
     'hw/sparc64',
     'hw/ssi',
@@ -1864,13 +2510,15 @@ if have_system or have_user
     'accel/tcg',
     'hw/core',
     'target/arm',
+    'target/arm/hvf',
     'target/hppa',
     'target/i386',
     'target/i386/kvm',
-    'target/mips',
+    'target/mips/tcg',
     'target/ppc',
     'target/riscv',
     'target/s390x',
+    'target/s390x/kvm',
     'target/sparc',
   ]
 endif
@@ -1903,7 +2551,7 @@ util_ss.add_all(trace_ss)
 util_ss = util_ss.apply(config_all, strict: false)
 libqemuutil = static_library('qemuutil',
                              sources: util_ss.sources() + stub_ss.sources() + genh,
-                             dependencies: [util_ss.dependencies(), m, glib, socket, malloc])
+                             dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman])
 qemuutil = declare_dependency(link_with: libqemuutil,
                               sources: genh + version_res)
 
@@ -1953,16 +2601,6 @@ subdir('softmmu')
 
 common_ss.add(capstone)
 specific_ss.add(files('cpu.c', 'disas.c', 'gdbstub.c'), capstone)
-specific_ss.add(when: 'CONFIG_TCG', if_true: files(
-  'fpu/softfloat.c',
-  'tcg/optimize.c',
-  'tcg/tcg-common.c',
-  'tcg/tcg-op-gvec.c',
-  'tcg/tcg-op-vec.c',
-  'tcg/tcg-op.c',
-  'tcg/tcg.c',
-))
-specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tcg/tci.c'))
 
 # Work around a gcc bug/misfeature wherein constant propagation looks
 # through an alias:
@@ -1992,25 +2630,38 @@ subdir('net')
 subdir('replay')
 subdir('semihosting')
 subdir('hw')
+subdir('tcg')
+subdir('fpu')
 subdir('accel')
 subdir('plugins')
 subdir('bsd-user')
 subdir('linux-user')
+subdir('ebpf')
 
-bsd_user_ss.add(files('gdbstub.c'))
 specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss)
 
-linux_user_ss.add(files('gdbstub.c', 'thunk.c'))
+linux_user_ss.add(files('thunk.c'))
 specific_ss.add_all(when: 'CONFIG_LINUX_USER', if_true: linux_user_ss)
 
 # needed for fuzzing binaries
 subdir('tests/qtest/libqos')
 subdir('tests/qtest/fuzz')
 
+# accel modules
+tcg_real_module_ss = ss.source_set()
+tcg_real_module_ss.add_all(when: 'CONFIG_TCG_MODULAR', if_true: tcg_module_ss)
+specific_ss.add_all(when: 'CONFIG_TCG_BUILTIN', if_true: tcg_module_ss)
+target_modules += { 'accel' : { 'qtest': qtest_module_ss,
+                                'tcg': tcg_real_module_ss }}
+
 ########################
 # Library dependencies #
 ########################
 
+modinfo_collect = find_program('scripts/modinfo-collect.py')
+modinfo_generate = find_program('scripts/modinfo-generate.py')
+modinfo_files = []
+
 block_mods = []
 softmmu_mods = []
 foreach d, list : modules
@@ -2024,6 +2675,18 @@ foreach d, list : modules
       else
         softmmu_mods += sl
       endif
+      if module_ss.sources() != []
+        # FIXME: Should use sl.extract_all_objects(recursive: true) as
+        # input. Sources can be used multiple times but objects are
+        # unique when it comes to lookup in compile_commands.json.
+        # Depnds on a mesion version with
+        # https://github.com/mesonbuild/meson/pull/8900
+        modinfo_files += custom_target(d + '-' + m + '.modinfo',
+                                       output: d + '-' + m + '.modinfo',
+                                       input: module_ss.sources() + genh,
+                                       capture: true,
+                                       command: [modinfo_collect, module_ss.sources()])
+      endif
     else
       if d == 'block'
         block_ss.add_all(module_ss)
@@ -2034,6 +2697,53 @@ foreach d, list : modules
   endforeach
 endforeach
 
+foreach d, list : target_modules
+  foreach m, module_ss : list
+    if enable_modules and targetos != 'windows'
+      foreach target : target_dirs
+        if target.endswith('-softmmu')
+          config_target = config_target_mak[target]
+          config_target += config_host
+          target_inc = [include_directories('target' / config_target['TARGET_BASE_ARCH'])]
+          c_args = ['-DNEED_CPU_H',
+                    '-DCONFIG_TARGET="@0@-config-target.h"'.format(target),
+                    '-DCONFIG_DEVICES="@0@-config-devices.h"'.format(target)]
+          target_module_ss = module_ss.apply(config_target, strict: false)
+          if target_module_ss.sources() != []
+            module_name = d + '-' + m + '-' + config_target['TARGET_NAME']
+            sl = static_library(module_name,
+                                [genh, target_module_ss.sources()],
+                                dependencies: [modulecommon, target_module_ss.dependencies()],
+                                include_directories: target_inc,
+                                c_args: c_args,
+                                pic: true)
+            softmmu_mods += sl
+            # FIXME: Should use sl.extract_all_objects(recursive: true) too.
+            modinfo_files += custom_target(module_name + '.modinfo',
+                                           output: module_name + '.modinfo',
+                                           input: target_module_ss.sources() + genh,
+                                           capture: true,
+                                           command: [modinfo_collect, '--target', target, target_module_ss.sources()])
+          endif
+        endif
+      endforeach
+    else
+      specific_ss.add_all(module_ss)
+    endif
+  endforeach
+endforeach
+
+if enable_modules
+  modinfo_src = custom_target('modinfo.c',
+                              output: 'modinfo.c',
+                              input: modinfo_files,
+                              command: [modinfo_generate, '@INPUT@'],
+                              capture: true)
+  modinfo_lib = static_library('modinfo', modinfo_src)
+  modinfo_dep = declare_dependency(link_whole: modinfo_lib)
+  softmmu_ss.add(modinfo_dep)
+endif
+
 nm = find_program('nm')
 undefsym = find_program('scripts/undefsym.py')
 block_syms = custom_target('block.syms', output: 'block.syms',
@@ -2121,7 +2831,8 @@ libchardev = static_library('chardev', chardev_ss.sources() + genh,
 
 chardev = declare_dependency(link_whole: libchardev)
 
-libhwcore = static_library('hwcore', sources: hwcore_files + genh,
+hwcore_ss = hwcore_ss.apply(config_host, strict: false)
+libhwcore = static_library('hwcore', sources: hwcore_ss.sources() + genh,
                            name_suffix: 'fa',
                            build_by_default: false)
 hwcore = declare_dependency(link_whole: libhwcore)
@@ -2194,12 +2905,20 @@ foreach target : target_dirs
     abi = config_target['TARGET_ABI_DIR']
     target_type='user'
     qemu_target_name = 'qemu-' + target_name
+    if arch in target_user_arch
+      t = target_user_arch[arch].apply(config_target, strict: false)
+      arch_srcs += t.sources()
+      arch_deps += t.dependencies()
+    endif
     if 'CONFIG_LINUX_USER' in config_target
       base_dir = 'linux-user'
       target_inc += include_directories('linux-user/host/' / config_host['ARCH'])
-    else
+    endif
+    if 'CONFIG_BSD_USER' in config_target
       base_dir = 'bsd-user'
-      target_inc += include_directories('bsd-user/freebsd')
+      target_inc += include_directories('bsd-user/' / targetos)
+      dir = base_dir / abi
+      arch_srcs += files(dir / 'target_arch_cpu.c')
     endif
     target_inc += include_directories(
       base_dir,
@@ -2249,23 +2968,23 @@ foreach target : target_dirs
   if target.endswith('-softmmu')
     execs = [{
       'name': 'qemu-system-' + target_name,
-      'gui': false,
+      'win_subsystem': 'console',
       'sources': files('softmmu/main.c'),
       'dependencies': []
     }]
     if targetos == 'windows' and (sdl.found() or gtk.found())
       execs += [{
         'name': 'qemu-system-' + target_name + 'w',
-        'gui': true,
+        'win_subsystem': 'windows',
         'sources': files('softmmu/main.c'),
         'dependencies': []
       }]
     endif
-    if config_host.has_key('CONFIG_FUZZ')
+    if get_option('fuzzing')
       specific_fuzz = specific_fuzz_ss.apply(config_target, strict: false)
       execs += [{
         'name': 'qemu-fuzz-' + target_name,
-        'gui': false,
+        'win_subsystem': 'console',
         'sources': specific_fuzz.sources(),
         'dependencies': specific_fuzz.dependencies(),
       }]
@@ -2273,15 +2992,14 @@ foreach target : target_dirs
   else
     execs = [{
       'name': 'qemu-' + target_name,
-      'gui': false,
+      'win_subsystem': 'console',
       'sources': [],
       'dependencies': []
     }]
   endif
   foreach exe: execs
     exe_name = exe['name']
-    exe_sign = 'CONFIG_HVF' in config_target
-    if exe_sign
+    if targetos == 'darwin'
       exe_name += '-unsigned'
     endif
 
@@ -2293,24 +3011,34 @@ foreach target : target_dirs
                link_language: link_language,
                link_depends: [block_syms, qemu_syms] + exe.get('link_depends', []),
                link_args: link_args,
-               gui_app: exe['gui'])
+               win_subsystem: exe['win_subsystem'])
+
+    if targetos == 'darwin'
+      icon = 'pc-bios/qemu.rsrc'
+      build_input = [emulator, files(icon)]
+      install_input = [
+        get_option('bindir') / exe_name,
+        meson.current_source_dir() / icon
+      ]
+      if 'CONFIG_HVF' in config_target
+        entitlements = 'accel/hvf/entitlements.plist'
+        build_input += files(entitlements)
+        install_input += meson.current_source_dir() / entitlements
+      endif
 
-    if exe_sign
       emulators += {exe['name'] : custom_target(exe['name'],
-                   depends: emulator,
+                   input: build_input,
                    output: exe['name'],
                    command: [
-                     meson.current_source_dir() / 'scripts/entitlement.sh',
-                     meson.current_build_dir() / exe_name,
-                     meson.current_build_dir() / exe['name'],
-                     meson.current_source_dir() / 'accel/hvf/entitlements.plist'
+                     files('scripts/entitlement.sh'),
+                     '@OUTPUT@',
+                     '@INPUT@'
                    ])
       }
 
       meson.add_install_script('scripts/entitlement.sh', '--install',
-                               get_option('bindir') / exe_name,
                                get_option('bindir') / exe['name'],
-                               meson.current_source_dir() / 'accel/hvf/entitlements.plist')
+                               install_input)
     else
       emulators += {exe['name']: emulator}
     endif
@@ -2335,7 +3063,7 @@ foreach target : target_dirs
                                                        exe_path_alias))
     endif
 
-    if 'CONFIG_TRACE_SYSTEMTAP' in config_host
+    if stap.found()
       foreach stp: [
         {'ext': '.stp-build', 'fmt': 'stap', 'bin': meson.current_build_dir() / exe['name'], 'install': false},
         {'ext': '.stp', 'fmt': 'stap', 'bin': get_option('prefix') / get_option('bindir') / exe['name'], 'install': true},
@@ -2387,7 +3115,8 @@ if have_tools
   qemu_io = executable('qemu-io', files('qemu-io.c'),
              dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
-               dependencies: [blockdev, qemuutil, gnutls], install: true)
+               dependencies: [blockdev, qemuutil, gnutls, selinux],
+               install: true)
 
   subdir('storage-daemon')
   subdir('contrib/rdmacm-mux')
@@ -2416,7 +3145,7 @@ if have_tools
                install: true)
   endif
 
-  if 'CONFIG_IVSHMEM' in config_host
+  if have_ivshmem
     subdir('contrib/ivshmem-client')
     subdir('contrib/ivshmem-server')
   endif
@@ -2488,15 +3217,15 @@ summary_info = {}
 summary_info += {'git':               config_host['GIT']}
 summary_info += {'make':              config_host['MAKE']}
 summary_info += {'python':            '@0@ (version: @1@)'.format(python.full_path(), python.language_version())}
-summary_info += {'sphinx-build':      sphinx_build.found()}
+summary_info += {'sphinx-build':      sphinx_build}
 if config_host.has_key('HAVE_GDB_BIN')
   summary_info += {'gdb':             config_host['HAVE_GDB_BIN']}
 endif
 summary_info += {'genisoimage':       config_host['GENISOIMAGE']}
 if targetos == 'windows' and config_host.has_key('CONFIG_GUEST_AGENT')
-  summary_info += {'wixl':            wixl.found() ? wixl.full_path() : false}
+  summary_info += {'wixl':            wixl}
 endif
-if slirp_opt != 'disabled'
+if slirp_opt != 'disabled' and 'CONFIG_SLIRP_SMBD' in config_host
   summary_info += {'smbd':            config_host['CONFIG_SMBD_COMMAND']}
 endif
 summary(summary_info, bool_yn: true, section: 'Host binaries')
@@ -2512,14 +3241,13 @@ summary_info += {'module support':    config_host.has_key('CONFIG_MODULES')}
 if config_host.has_key('CONFIG_MODULES')
   summary_info += {'alternative module path': config_host.has_key('CONFIG_MODULE_UPGRADES')}
 endif
-summary_info += {'plugin support':    config_host.has_key('CONFIG_PLUGIN')}
-summary_info += {'fuzzing support':   config_host.has_key('CONFIG_FUZZ')}
+summary_info += {'fuzzing support':   get_option('fuzzing')}
 if have_system
-  summary_info += {'Audio drivers':     config_host['CONFIG_AUDIO_DRIVERS']}
+  summary_info += {'Audio drivers':     ' '.join(audio_drivers_selected)}
 endif
-summary_info += {'Trace backends':    config_host['TRACE_BACKENDS']}
-if config_host['TRACE_BACKENDS'].split().contains('simple')
-  summary_info += {'Trace output file': config_host['CONFIG_TRACE_FILE'] + '-<pid>'}
+summary_info += {'Trace backends':    ','.join(get_option('trace_backends'))}
+if 'simple' in get_option('trace_backends')
+  summary_info += {'Trace output file': get_option('trace_file') + '-<pid>'}
 endif
 summary_info += {'QOM debugging':     config_host.has_key('CONFIG_QOM_CAST_DEBUG')}
 summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')}
@@ -2538,22 +3266,21 @@ summary(summary_info, bool_yn: true, section: 'Configurable features')
 summary_info = {}
 summary_info += {'host CPU':          cpu}
 summary_info += {'host endianness':   build_machine.endian()}
-summary_info += {'C compiler':        meson.get_compiler('c').cmd_array()[0]}
-summary_info += {'Host C compiler':   meson.get_compiler('c', native: true).cmd_array()[0]}
+summary_info += {'C compiler':        ' '.join(meson.get_compiler('c').cmd_array())}
+summary_info += {'Host C compiler':   ' '.join(meson.get_compiler('c', native: true).cmd_array())}
 if link_language == 'cpp'
-  summary_info += {'C++ compiler':      meson.get_compiler('cpp').cmd_array()[0]}
+  summary_info += {'C++ compiler':    ' '.join(meson.get_compiler('cpp').cmd_array())}
 else
   summary_info += {'C++ compiler':      false}
 endif
 if targetos == 'darwin'
-  summary_info += {'Objective-C compiler': meson.get_compiler('objc').cmd_array()[0]}
+  summary_info += {'Objective-C compiler': ' '.join(meson.get_compiler('objc').cmd_array())}
 endif
 if targetos == 'windows'
   if 'WIN_SDK' in config_host
     summary_info += {'Windows SDK':   config_host['WIN_SDK']}
   endif
 endif
-summary_info += {'ARFLAGS':           config_host['ARFLAGS']}
 summary_info += {'CFLAGS':            ' '.join(get_option('c_args')
                                                + ['-O' + get_option('optimization')]
                                                + (get_option('debug') ? ['-g'] : []))}
@@ -2574,11 +3301,6 @@ summary_info += {'PIE':               get_option('b_pie')}
 summary_info += {'static build':      config_host.has_key('CONFIG_STATIC')}
 summary_info += {'malloc trim support': has_malloc_trim}
 summary_info += {'membarrier':        config_host.has_key('CONFIG_MEMBARRIER')}
-summary_info += {'preadv support':    config_host_data.get('CONFIG_PREADV')}
-summary_info += {'fdatasync':         config_host.has_key('CONFIG_FDATASYNC')}
-summary_info += {'madvise':           config_host.has_key('CONFIG_MADVISE')}
-summary_info += {'posix_madvise':     config_host.has_key('CONFIG_POSIX_MADVISE')}
-summary_info += {'posix_memalign':    config_host.has_key('CONFIG_POSIX_MEMALIGN')}
 summary_info += {'debug stack usage': config_host.has_key('CONFIG_DEBUG_STACK_USAGE')}
 summary_info += {'mutex debugging':   config_host.has_key('CONFIG_DEBUG_MUTEX')}
 summary_info += {'memory allocator':  get_option('malloc')}
@@ -2592,7 +3314,7 @@ if get_option('cfi')
   summary_info += {'CFI debug support': get_option('cfi_debug')}
 endif
 summary_info += {'strip binaries':    get_option('strip')}
-summary_info += {'sparse':            sparse.found() ? sparse.full_path() : false}
+summary_info += {'sparse':            sparse}
 summary_info += {'mingw32 support':   targetos == 'windows'}
 
 # snarf the cross-compilation information for tests
@@ -2621,6 +3343,7 @@ if have_system
   summary_info += {'HAX support':       config_all.has_key('CONFIG_HAX')}
   summary_info += {'HVF support':       config_all.has_key('CONFIG_HVF')}
   summary_info += {'WHPX support':      config_all.has_key('CONFIG_WHPX')}
+  summary_info += {'NVMM support':      config_all.has_key('CONFIG_NVMM')}
   summary_info += {'Xen support':       config_host.has_key('CONFIG_XEN_BACKEND')}
   if config_host.has_key('CONFIG_XEN_BACKEND')
     summary_info += {'xen ctrl version':  config_host['CONFIG_XEN_CTRL_INTERFACE_VERSION']}
@@ -2629,10 +3352,11 @@ endif
 summary_info += {'TCG support':       config_all.has_key('CONFIG_TCG')}
 if config_all.has_key('CONFIG_TCG')
   if get_option('tcg_interpreter')
-    summary_info += {'TCG backend':   'TCI (TCG with bytecode interpreter, experimental and slow)'}
+    summary_info += {'TCG backend':   'TCI (TCG with bytecode interpreter, slow)'}
   else
     summary_info += {'TCG backend':   'native (@0@)'.format(cpu)}
   endif
+  summary_info += {'TCG plugins': config_host.has_key('CONFIG_PLUGIN')}
   summary_info += {'TCG debug enabled': config_host.has_key('CONFIG_DEBUG_TCG')}
   summary_info += {'TCG instruction logging': config_host.has_key('CONFIG_TCG_LOG_INSTR')}
 endif
@@ -2650,6 +3374,7 @@ summary_info += {'coroutine pool':    config_host['CONFIG_COROUTINE_POOL'] == '1
 if have_block
   summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']}
   summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']}
+  summary_info += {'Use block whitelist in tools': config_host.has_key('CONFIG_BDRV_WHITELIST_TOOLS')}
   summary_info += {'VirtFS support':    have_virtfs}
   summary_info += {'build virtiofs daemon': have_virtiofsd}
   summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')}
@@ -2662,25 +3387,21 @@ if have_block
   summary_info += {'vvfat support':     config_host.has_key('CONFIG_VVFAT')}
   summary_info += {'qed support':       config_host.has_key('CONFIG_QED')}
   summary_info += {'parallels support': config_host.has_key('CONFIG_PARALLELS')}
-  summary_info += {'sheepdog support':  config_host.has_key('CONFIG_SHEEPDOG')}
-  summary_info += {'FUSE exports':      fuse.found()}
+  summary_info += {'FUSE exports':      fuse}
 endif
 summary(summary_info, bool_yn: true, section: 'Block layer support')
 
 # Crypto
 summary_info = {}
 summary_info += {'TLS priority':      config_host['CONFIG_TLS_PRIORITY']}
-summary_info += {'GNUTLS support':    config_host.has_key('CONFIG_GNUTLS')}
-# TODO: add back version
-summary_info += {'libgcrypt':         config_host.has_key('CONFIG_GCRYPT')}
-if config_host.has_key('CONFIG_GCRYPT')
-   summary_info += {'  hmac':            config_host.has_key('CONFIG_GCRYPT_HMAC')}
-   summary_info += {'  XTS':             not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')}
-endif
-# TODO: add back version
-summary_info += {'nettle':            config_host.has_key('CONFIG_NETTLE')}
-if config_host.has_key('CONFIG_NETTLE')
-   summary_info += {'  XTS':             not config_host.has_key('CONFIG_QEMU_PRIVATE_XTS')}
+summary_info += {'GNUTLS support':    gnutls}
+if gnutls.found()
+  summary_info += {'  GNUTLS crypto':   gnutls_crypto.found()}
+endif
+summary_info += {'libgcrypt':         gcrypt}
+summary_info += {'nettle':            nettle}
+if nettle.found()
+   summary_info += {'  XTS':             xts != 'private'}
 endif
 summary_info += {'crypto afalg':      config_host.has_key('CONFIG_AF_ALG')}
 summary_info += {'rng-none':          config_host.has_key('CONFIG_RNG_NONE')}
@@ -2690,77 +3411,90 @@ summary(summary_info, bool_yn: true, section: 'Crypto')
 # Libraries
 summary_info = {}
 if targetos == 'darwin'
-  summary_info += {'Cocoa support':   cocoa.found()}
-endif
-# TODO: add back version
-summary_info += {'SDL support':       sdl.found()}
-summary_info += {'SDL image support': sdl_image.found()}
-# TODO: add back version
-summary_info += {'GTK support':       gtk.found()}
-summary_info += {'pixman':            pixman.found()}
-# TODO: add back version
-summary_info += {'VTE support':       config_host.has_key('CONFIG_VTE')}
-# TODO: add back version
-summary_info += {'slirp support':     slirp_opt == 'disabled' ? false : slirp_opt}
-summary_info += {'libtasn1':          config_host.has_key('CONFIG_TASN1')}
-summary_info += {'PAM':               config_host.has_key('CONFIG_AUTH_PAM')}
-summary_info += {'iconv support':     iconv.found()}
-summary_info += {'curses support':    curses.found()}
-# TODO: add back version
-summary_info += {'virgl support':     config_host.has_key('CONFIG_VIRGL')}
-summary_info += {'curl support':      curl.found()}
-summary_info += {'Multipath support': mpathpersist.found()}
-summary_info += {'VNC support':       vnc.found()}
+  summary_info += {'Cocoa support':   cocoa}
+endif
+summary_info += {'SDL support':       sdl}
+summary_info += {'SDL image support': sdl_image}
+summary_info += {'GTK support':       gtk}
+summary_info += {'pixman':            pixman}
+summary_info += {'VTE support':       vte}
+summary_info += {'slirp support':     slirp_opt == 'internal' ? slirp_opt : slirp}
+summary_info += {'libtasn1':          tasn1}
+summary_info += {'PAM':               pam}
+summary_info += {'iconv support':     iconv}
+summary_info += {'curses support':    curses}
+summary_info += {'virgl support':     virgl}
+summary_info += {'curl support':      curl}
+summary_info += {'Multipath support': mpathpersist}
+summary_info += {'VNC support':       vnc}
 if vnc.found()
-  summary_info += {'VNC SASL support':  sasl.found()}
-  summary_info += {'VNC JPEG support':  jpeg.found()}
-  summary_info += {'VNC PNG support':   png.found()}
-endif
-summary_info += {'brlapi support':    brlapi.found()}
-summary_info += {'vde support':       config_host.has_key('CONFIG_VDE')}
-summary_info += {'netmap support':    config_host.has_key('CONFIG_NETMAP')}
-summary_info += {'Linux AIO support': config_host.has_key('CONFIG_LINUX_AIO')}
-summary_info += {'Linux io_uring support': config_host.has_key('CONFIG_LINUX_IO_URING')}
-summary_info += {'ATTR/XATTR support': libattr.found()}
+  summary_info += {'VNC SASL support':  sasl}
+  summary_info += {'VNC JPEG support':  jpeg}
+  summary_info += {'VNC PNG support':   png}
+endif
+if targetos not in ['darwin', 'haiku', 'windows']
+  summary_info += {'OSS support':     oss}
+elif targetos == 'darwin'
+  summary_info += {'CoreAudio support': coreaudio}
+elif targetos == 'windows'
+  summary_info += {'DirectSound support': dsound}
+endif
+if targetos == 'linux'
+  summary_info += {'ALSA support':    alsa}
+  summary_info += {'PulseAudio support': pulse}
+endif
+summary_info += {'JACK support':      jack}
+summary_info += {'brlapi support':    brlapi}
+summary_info += {'vde support':       vde}
+summary_info += {'netmap support':    have_netmap}
+summary_info += {'l2tpv3 support':    have_l2tpv3}
+summary_info += {'Linux AIO support': libaio}
+summary_info += {'Linux io_uring support': linux_io_uring}
+summary_info += {'ATTR/XATTR support': libattr}
 summary_info += {'RVFI-DII':          config_host.has_key('CONFIG_RVFI_DII')}
 summary_info += {'RDMA support':      config_host.has_key('CONFIG_RDMA')}
 summary_info += {'PVRDMA support':    config_host.has_key('CONFIG_PVRDMA')}
 summary_info += {'fdt support':       fdt_opt == 'disabled' ? false : fdt_opt}
-summary_info += {'libcap-ng support': libcap_ng.found()}
-# TODO: add back protocol and server version
-summary_info += {'spice support':     config_host.has_key('CONFIG_SPICE')}
-summary_info += {'rbd support':       rbd.found()}
+summary_info += {'libcap-ng support': libcap_ng}
+summary_info += {'bpf support':       libbpf}
+summary_info += {'spice protocol support': spice_protocol}
+if spice_protocol.found()
+  summary_info += {'  spice server support': spice}
+endif
+summary_info += {'rbd support':       rbd}
 summary_info += {'xfsctl support':    config_host.has_key('CONFIG_XFS')}
-summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
-summary_info += {'U2F support':       u2f.found()}
-summary_info += {'libusb':            config_host.has_key('CONFIG_USB_LIBUSB')}
-summary_info += {'usb net redir':     config_host.has_key('CONFIG_USB_REDIR')}
+summary_info += {'smartcard support': cacard}
+summary_info += {'U2F support':       u2f}
+summary_info += {'libusb':            libusb}
+summary_info += {'usb net redir':     usbredir}
 summary_info += {'OpenGL support':    config_host.has_key('CONFIG_OPENGL')}
-summary_info += {'GBM':               config_host.has_key('CONFIG_GBM')}
-summary_info += {'libiscsi support':  libiscsi.found()}
-summary_info += {'libnfs support':    libnfs.found()}
+summary_info += {'GBM':               gbm}
+summary_info += {'libiscsi support':  libiscsi}
+summary_info += {'libnfs support':    libnfs}
 if targetos == 'windows'
   if config_host.has_key('CONFIG_GUEST_AGENT')
     summary_info += {'QGA VSS support':   config_host.has_key('CONFIG_QGA_VSS')}
     summary_info += {'QGA w32 disk info': config_host.has_key('CONFIG_QGA_NTDDSCSI')}
   endif
 endif
-summary_info += {'seccomp support':   seccomp.found()}
-summary_info += {'GlusterFS support': glusterfs.found()}
+summary_info += {'seccomp support':   seccomp}
+summary_info += {'GlusterFS support': glusterfs}
 summary_info += {'TPM support':       config_host.has_key('CONFIG_TPM')}
 summary_info += {'libssh support':    config_host.has_key('CONFIG_LIBSSH')}
-summary_info += {'lzo support':       lzo.found()}
-summary_info += {'snappy support':    snappy.found()}
-summary_info += {'bzip2 support':     libbzip2.found()}
-summary_info += {'lzfse support':     liblzfse.found()}
-summary_info += {'zstd support':      zstd.found()}
+summary_info += {'lzo support':       lzo}
+summary_info += {'snappy support':    snappy}
+summary_info += {'bzip2 support':     libbzip2}
+summary_info += {'lzfse support':     liblzfse}
+summary_info += {'zstd support':      zstd}
 summary_info += {'NUMA host support': config_host.has_key('CONFIG_NUMA')}
-summary_info += {'libxml2':           config_host.has_key('CONFIG_LIBXML2')}
-summary_info += {'capstone':          capstone_opt == 'disabled' ? false : capstone_opt}
-summary_info += {'libpmem support':   config_host.has_key('CONFIG_LIBPMEM')}
-summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
-summary_info += {'libudev':           libudev.found()}
+summary_info += {'libxml2':           libxml2}
+summary_info += {'capstone':          capstone_opt == 'internal' ? capstone_opt : capstone}
+summary_info += {'libpmem support':   libpmem}
+summary_info += {'libdaxctl support': libdaxctl}
+summary_info += {'libudev':           libudev}
+# Dummy dependency, keep .found()
 summary_info += {'FUSE lseek':        fuse_lseek.found()}
+summary_info += {'selinux':           selinux}
 summary(summary_info, bool_yn: true, section: 'Dependencies')
 
 if not supported_cpus.contains(cpu)
diff --git a/meson_options.txt b/meson_options.txt
index 9734019995a..e3923237322 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -1,3 +1,7 @@
+# These options do not correspond to a --enable/--disable-* option
+# on the configure script command line.  If you add more, list them in
+# scripts/meson-buildoptions.py's SKIP_OPTIONS constant too.
+
 option('qemu_suffix', type : 'string', value: 'qemu',
        description: 'Suffix for QEMU data/modules/config directories (can be empty)')
 option('docdir', type : 'string', value : 'doc',
@@ -6,11 +10,24 @@ option('qemu_firmwarepath', type : 'string', value : '',
        description: 'search PATH for firmware files')
 option('sphinx_build', type : 'string', value : '',
        description: 'Use specified sphinx-build [$sphinx_build] for building document (default to be empty)')
-
 option('default_devices', type : 'boolean', value : true,
        description: 'Include a default selection of devices in emulators')
+option('audio_drv_list', type: 'array', value: ['default'],
+       choices: ['alsa', 'coreaudio', 'default', 'dsound', 'jack', 'oss', 'pa', 'sdl'],
+       description: 'Set audio driver list')
+option('fuzzing_engine', type : 'string', value : '',
+       description: 'fuzzing engine library for OSS-Fuzz')
+option('trace_file', type: 'string', value: 'trace',
+       description: 'Trace file prefix for simple backend')
+
+# Everything else can be set via --enable/--disable-* option
+# on the configure script command line.  After adding an option
+# here make sure to run "make update-buildoptions".
+
 option('docs', type : 'feature', value : 'auto',
        description: 'Documentations build support')
+option('fuzzing', type : 'boolean', value: false,
+       description: 'build fuzzing targets')
 option('gettext', type : 'feature', value : 'auto',
        description: 'Localization of the GTK+ user interface')
 option('install_blobs', type : 'boolean', value : true,
@@ -33,6 +50,8 @@ option('whpx', type: 'feature', value: 'auto',
        description: 'WHPX acceleration support')
 option('hvf', type: 'feature', value: 'auto',
        description: 'HVF acceleration support')
+option('nvmm', type: 'feature', value: 'auto',
+       description: 'NVMM acceleration support')
 option('xen', type: 'feature', value: 'auto',
        description: 'Xen backend support')
 option('xen_pci_passthrough', type: 'feature', value: 'auto',
@@ -40,7 +59,7 @@ option('xen_pci_passthrough', type: 'feature', value: 'auto',
 option('tcg', type: 'feature', value: 'auto',
        description: 'TCG support')
 option('tcg_interpreter', type: 'boolean', value: false,
-       description: 'TCG with bytecode interpreter (experimental and slow)')
+       description: 'TCG with bytecode interpreter (slow)')
 option('cfi', type: 'boolean', value: 'false',
        description: 'Control-Flow Integrity (CFI)')
 option('cfi_debug', type: 'boolean', value: 'false',
@@ -50,12 +69,16 @@ option('multiprocess', type: 'feature', value: 'auto',
 
 option('attr', type : 'feature', value : 'auto',
        description: 'attr/xattr support')
+option('auth_pam', type : 'feature', value : 'auto',
+       description: 'PAM access control')
 option('brlapi', type : 'feature', value : 'auto',
        description: 'brlapi character device driver')
 option('bzip2', type : 'feature', value : 'auto',
        description: 'bzip2 support for DMG images')
 option('cap_ng', type : 'feature', value : 'auto',
        description: 'cap_ng support')
+option('bpf', type : 'feature', value : 'auto',
+        description: 'eBPF support')
 option('cocoa', type : 'feature', value : 'auto',
        description: 'Cocoa user interface (macOS only)')
 option('curl', type : 'feature', value : 'auto',
@@ -72,8 +95,26 @@ option('iconv', type : 'feature', value : 'auto',
        description: 'Font glyph conversion support')
 option('curses', type : 'feature', value : 'auto',
        description: 'curses UI')
+option('gnutls', type : 'feature', value : 'auto',
+       description: 'GNUTLS cryptography support')
+option('nettle', type : 'feature', value : 'auto',
+       description: 'nettle cryptography support')
+option('gcrypt', type : 'feature', value : 'auto',
+       description: 'libgcrypt cryptography support')
+option('libdaxctl', type : 'feature', value : 'auto',
+       description: 'libdaxctl support')
+option('libpmem', type : 'feature', value : 'auto',
+       description: 'libpmem support')
 option('libudev', type : 'feature', value : 'auto',
        description: 'Use libudev to enumerate host devices')
+option('libusb', type : 'feature', value : 'auto',
+       description: 'libusb support for USB passthrough')
+option('libxml2', type : 'feature', value : 'auto',
+       description: 'libxml2 support for Parallels image format')
+option('linux_aio', type : 'feature', value : 'auto',
+       description: 'Linux AIO support')
+option('linux_io_uring', type : 'feature', value : 'auto',
+       description: 'Linux io_uring support')
 option('lzfse', type : 'feature', value : 'auto',
        description: 'lzfse support for DMG images')
 option('lzo', type : 'feature', value : 'auto',
@@ -88,11 +129,27 @@ option('sdl_image', type : 'feature', value : 'auto',
        description: 'SDL Image support for icons')
 option('seccomp', type : 'feature', value : 'auto',
        description: 'seccomp support')
+option('smartcard', type : 'feature', value : 'auto',
+       description: 'CA smartcard emulation support')
 option('snappy', type : 'feature', value : 'auto',
        description: 'snappy compression support')
+option('spice', type : 'feature', value : 'auto',
+       description: 'Spice server support')
+option('spice_protocol', type : 'feature', value : 'auto',
+       description: 'Spice protocol support')
 option('u2f', type : 'feature', value : 'auto',
        description: 'U2F emulation support')
-option('vnc', type : 'feature', value : 'enabled',
+option('usb_redir', type : 'feature', value : 'auto',
+       description: 'libusbredir support')
+option('l2tpv3', type : 'feature', value : 'auto',
+       description: 'l2tpv3 network backend support')
+option('netmap', type : 'feature', value : 'auto',
+       description: 'netmap network backend support')
+option('vde', type : 'feature', value : 'auto',
+       description: 'vde network backend support')
+option('virglrenderer', type : 'feature', value : 'auto',
+       description: 'virgl rendering support')
+option('vnc', type : 'feature', value : 'auto',
        description: 'VNC server')
 option('vnc_jpeg', type : 'feature', value : 'auto',
        description: 'JPEG lossy compression for VNC server')
@@ -100,6 +157,8 @@ option('vnc_png', type : 'feature', value : 'auto',
        description: 'PNG compression for VNC server')
 option('vnc_sasl', type : 'feature', value : 'auto',
        description: 'SASL authentication for VNC server')
+option('vte', type : 'feature', value : 'auto',
+       description: 'vte support for the gtk UI')
 option('xkbcommon', type : 'feature', value : 'auto',
        description: 'xkbcommon support')
 option('zstd', type : 'feature', value : 'auto',
@@ -109,6 +168,23 @@ option('fuse', type: 'feature', value: 'auto',
 option('fuse_lseek', type : 'feature', value : 'auto',
        description: 'SEEK_HOLE/SEEK_DATA support for FUSE exports')
 
+option('trace_backends', type: 'array', value: ['log'],
+       choices: ['dtrace', 'ftrace', 'log', 'nop', 'simple', 'syslog', 'ust'],
+       description: 'Set available tracing backends')
+
+option('alsa', type: 'feature', value: 'auto',
+       description: 'ALSA sound support')
+option('coreaudio', type: 'feature', value: 'auto',
+       description: 'CoreAudio sound support')
+option('dsound', type: 'feature', value: 'auto',
+       description: 'DirectSound sound support')
+option('jack', type: 'feature', value: 'auto',
+       description: 'JACK sound support')
+option('oss', type: 'feature', value: 'auto',
+       description: 'OSS sound support')
+option('pa', type: 'feature', value: 'auto',
+       description: 'PulseAudio sound support')
+
 option('vhost_user_blk_server', type: 'feature', value: 'auto',
        description: 'build vhost-user-blk server')
 option('virtfs', type: 'feature', value: 'auto',
@@ -125,3 +201,6 @@ option('slirp', type: 'combo', value: 'auto',
 option('fdt', type: 'combo', value: 'auto',
        choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
        description: 'Whether and how to find the libfdt library')
+
+option('selinux', type: 'feature', value: 'auto',
+       description: 'SELinux support in qemu-nbd')
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 35f5ef688dc..9aba7d9c220 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1215,7 +1215,10 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
 {
     DBMSaveState *s = &((DBMState *)opaque)->save;
     SaveBitmapState *dbms = NULL;
+
+    qemu_mutex_lock_iothread();
     if (init_dirty_bitmap_migration(s) < 0) {
+        qemu_mutex_unlock_iothread();
         return -1;
     }
 
@@ -1223,7 +1226,7 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
         send_bitmap_start(f, s, dbms);
     }
     qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
-
+    qemu_mutex_unlock_iothread();
     return 0;
 }
 
diff --git a/migration/channel.c b/migration/channel.c
index c9ee902021a..c4fc000a1a0 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -38,18 +38,13 @@ void migration_channel_process_incoming(QIOChannel *ioc)
     trace_migration_set_incoming_channel(
         ioc, object_get_typename(OBJECT(ioc)));
 
-    if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) {
-        yank_register_function(MIGRATION_YANK_INSTANCE,
-                               migration_yank_iochannel,
-                               QIO_CHANNEL(ioc));
-    }
-
     if (s->parameters.tls_creds &&
         *s->parameters.tls_creds &&
         !object_dynamic_cast(OBJECT(ioc),
                              TYPE_QIO_CHANNEL_TLS)) {
         migration_tls_channel_process_incoming(s, ioc, &local_err);
     } else {
+        migration_ioc_register_yank(ioc);
         migration_ioc_process_incoming(ioc, &local_err);
     }
 
@@ -76,12 +71,6 @@ void migration_channel_connect(MigrationState *s,
         ioc, object_get_typename(OBJECT(ioc)), hostname, error);
 
     if (!error) {
-        if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) {
-            yank_register_function(MIGRATION_YANK_INSTANCE,
-                                   migration_yank_iochannel,
-                                   QIO_CHANNEL(ioc));
-        }
-
         if (s->parameters.tls_creds &&
             *s->parameters.tls_creds &&
             !object_dynamic_cast(OBJECT(ioc),
@@ -99,6 +88,8 @@ void migration_channel_connect(MigrationState *s,
         } else {
             QEMUFile *f = qemu_fopen_channel_output(ioc);
 
+            migration_ioc_register_yank(ioc);
+
             qemu_mutex_lock(&s->qemu_file_lock);
             s->to_dst_file = f;
             qemu_mutex_unlock(&s->qemu_file_lock);
diff --git a/migration/colo.c b/migration/colo.c
index de27662cab5..2415325262b 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -28,7 +28,7 @@
 #include "migration/failover.h"
 #include "migration/ram.h"
 #ifdef CONFIG_REPLICATION
-#include "replication.h"
+#include "block/replication.h"
 #endif
 #include "net/colo-compare.h"
 #include "net/colo.h"
@@ -152,7 +152,7 @@ static void primary_vm_do_failover(void)
      * kick COLO thread which might wait at
      * qemu_sem_wait(&s->colo_checkpoint_sem).
      */
-    colo_checkpoint_notify(migrate_get_current());
+    colo_checkpoint_notify(s);
 
     /*
      * Wake up COLO thread which may blocked in recv() or send(),
@@ -205,7 +205,7 @@ void colo_do_failover(void)
         vm_stop_force_state(RUN_STATE_COLO);
     }
 
-    switch (get_colo_mode()) {
+    switch (last_colo_mode = get_colo_mode()) {
     case COLO_MODE_PRIMARY:
         primary_vm_do_failover();
         break;
@@ -435,12 +435,6 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
     if (failover_get_state() != FAILOVER_STATUS_NONE) {
         goto out;
     }
-
-    /* Disable block migration */
-    migrate_set_block_enabled(false, &local_err);
-    if (local_err) {
-        goto out;
-    }
     qemu_mutex_lock_iothread();
 
 #ifdef CONFIG_REPLICATION
@@ -465,6 +459,10 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
     if (ret < 0) {
         goto out;
     }
+
+    if (migrate_auto_converge()) {
+        mig_throttle_counter_reset();
+    }
     /*
      * Only save VM's live state, which not including device state.
      * TODO: We may need a timeout mechanism to prevent COLO process
@@ -536,8 +534,7 @@ static void colo_process_checkpoint(MigrationState *s)
     Error *local_err = NULL;
     int ret;
 
-    last_colo_mode = get_colo_mode();
-    if (last_colo_mode != COLO_MODE_PRIMARY) {
+    if (get_colo_mode() != COLO_MODE_PRIMARY) {
         error_report("COLO mode must be COLO_MODE_PRIMARY");
         return;
     }
@@ -646,6 +643,7 @@ static void colo_process_checkpoint(MigrationState *s)
      */
     if (s->rp_state.from_dst_file) {
         qemu_fclose(s->rp_state.from_dst_file);
+        s->rp_state.from_dst_file = NULL;
     }
 }
 
@@ -835,8 +833,7 @@ void *colo_process_incoming_thread(void *opaque)
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
 
-    last_colo_mode = get_colo_mode();
-    if (last_colo_mode != COLO_MODE_SECONDARY) {
+    if (get_colo_mode() != COLO_MODE_SECONDARY) {
         error_report("COLO mode must be COLO_MODE_SECONDARY");
         return NULL;
     }
@@ -924,11 +921,6 @@ void *colo_process_incoming_thread(void *opaque)
     /* Hope this not to be too long to loop here */
     qemu_sem_wait(&mis->colo_incoming_sem);
     qemu_sem_destroy(&mis->colo_incoming_sem);
-    /* Must be called after failover BH is completed */
-    if (mis->to_src_file) {
-        qemu_fclose(mis->to_src_file);
-        mis->to_src_file = NULL;
-    }
 
     rcu_unregister_thread();
     return NULL;
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index ccb98147e89..d65e744af92 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -15,14 +15,36 @@
 #include "qapi/error.h"
 #include "cpu.h"
 #include "exec/ramblock.h"
+#include "exec/ram_addr.h"
 #include "qemu/rcu_queue.h"
+#include "qemu/main-loop.h"
 #include "qapi/qapi-commands-migration.h"
 #include "ram.h"
 #include "trace.h"
 #include "dirtyrate.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "exec/memory.h"
+
+/*
+ * total_dirty_pages is procted by BQL and is used
+ * to stat dirty pages during the period of two
+ * memory_global_dirty_log_sync
+ */
+uint64_t total_dirty_pages;
+
+typedef struct DirtyPageRecord {
+    uint64_t start_pages;
+    uint64_t end_pages;
+} DirtyPageRecord;
 
 static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
 static struct DirtyRateStat DirtyStat;
+static DirtyRateMeasureMode dirtyrate_mode =
+                DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
 
 static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
 {
@@ -48,6 +70,12 @@ static bool is_sample_period_valid(int64_t sec)
     return true;
 }
 
+static bool is_sample_pages_valid(int64_t pages)
+{
+    return pages >= MIN_SAMPLE_PAGE_COUNT &&
+           pages <= MAX_SAMPLE_PAGE_COUNT;
+}
+
 static int dirtyrate_set_state(int *state, int old_state, int new_state)
 {
     assert(new_state < DIRTY_RATE_STATUS__MAX);
@@ -61,48 +89,94 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state)
 
 static struct DirtyRateInfo *query_dirty_rate_info(void)
 {
+    int i;
     int64_t dirty_rate = DirtyStat.dirty_rate;
     struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
+    DirtyRateVcpuList *head = NULL, **tail = &head;
+
+    info->status = CalculatingState;
+    info->start_time = DirtyStat.start_time;
+    info->calc_time = DirtyStat.calc_time;
+    info->sample_pages = DirtyStat.sample_pages;
+    info->mode = dirtyrate_mode;
 
     if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
         info->has_dirty_rate = true;
         info->dirty_rate = dirty_rate;
-    }
 
-    info->status = CalculatingState;
-    info->start_time = DirtyStat.start_time;
-    info->calc_time = DirtyStat.calc_time;
+        if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+            /*
+             * set sample_pages with 0 to indicate page sampling
+             * isn't enabled
+             **/
+            info->sample_pages = 0;
+            info->has_vcpu_dirty_rate = true;
+            for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+                DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu));
+                rate->id = DirtyStat.dirty_ring.rates[i].id;
+                rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+                QAPI_LIST_APPEND(tail, rate);
+            }
+            info->vcpu_dirty_rate = head;
+        }
+
+        if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+            info->sample_pages = 0;
+        }
+    }
 
     trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
 
     return info;
 }
 
-static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
+static void init_dirtyrate_stat(int64_t start_time,
+                                struct DirtyRateConfig config)
 {
-    DirtyStat.total_dirty_samples = 0;
-    DirtyStat.total_sample_count = 0;
-    DirtyStat.total_block_mem_MB = 0;
     DirtyStat.dirty_rate = -1;
     DirtyStat.start_time = start_time;
-    DirtyStat.calc_time = calc_time;
+    DirtyStat.calc_time = config.sample_period_seconds;
+    DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
+
+    switch (config.mode) {
+    case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
+        DirtyStat.page_sampling.total_dirty_samples = 0;
+        DirtyStat.page_sampling.total_sample_count = 0;
+        DirtyStat.page_sampling.total_block_mem_MB = 0;
+        break;
+    case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
+        DirtyStat.dirty_ring.nvcpu = -1;
+        DirtyStat.dirty_ring.rates = NULL;
+        break;
+    default:
+        break;
+    }
+}
+
+static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
+{
+    /* last calc-dirty-rate qmp use dirty ring mode */
+    if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+        free(DirtyStat.dirty_ring.rates);
+        DirtyStat.dirty_ring.rates = NULL;
+    }
 }
 
 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
 {
-    DirtyStat.total_dirty_samples += info->sample_dirty_count;
-    DirtyStat.total_sample_count += info->sample_pages_count;
+    DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
+    DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
     /* size of total pages in MB */
-    DirtyStat.total_block_mem_MB += (info->ramblock_pages *
-                                     TARGET_PAGE_SIZE) >> 20;
+    DirtyStat.page_sampling.total_block_mem_MB += (info->ramblock_pages *
+                                                   TARGET_PAGE_SIZE) >> 20;
 }
 
 static void update_dirtyrate(uint64_t msec)
 {
     uint64_t dirtyrate;
-    uint64_t total_dirty_samples = DirtyStat.total_dirty_samples;
-    uint64_t total_sample_count = DirtyStat.total_sample_count;
-    uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB;
+    uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
+    uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
+    uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
 
     dirtyrate = total_dirty_samples * total_block_mem_MB *
                 1000 / (total_sample_count * msec);
@@ -315,21 +389,183 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
         update_dirtyrate_stat(block_dinfo);
     }
 
-    if (DirtyStat.total_sample_count == 0) {
+    if (DirtyStat.page_sampling.total_sample_count == 0) {
         return false;
     }
 
     return true;
 }
 
-static void calculate_dirtyrate(struct DirtyRateConfig config)
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+                                     CPUState *cpu, bool start)
+{
+    if (start) {
+        dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+    } else {
+        dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+    }
+}
+
+static void dirtyrate_global_dirty_log_start(void)
+{
+    qemu_mutex_lock_iothread();
+    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+    qemu_mutex_unlock_iothread();
+}
+
+static void dirtyrate_global_dirty_log_stop(void)
+{
+    qemu_mutex_lock_iothread();
+    memory_global_dirty_log_sync();
+    memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
+    qemu_mutex_unlock_iothread();
+}
+
+static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
+{
+    uint64_t memory_size_MB;
+    int64_t time_s;
+    uint64_t increased_dirty_pages =
+        dirty_pages.end_pages - dirty_pages.start_pages;
+
+    memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+    time_s = DirtyStat.calc_time;
+
+    return memory_size_MB / time_s;
+}
+
+static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+                                            bool start)
+{
+    if (start) {
+        dirty_pages->start_pages = total_dirty_pages;
+    } else {
+        dirty_pages->end_pages = total_dirty_pages;
+    }
+}
+
+static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
+{
+    DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
+}
+
+static inline void dirtyrate_manual_reset_protect(void)
+{
+    RAMBlock *block = NULL;
+
+    WITH_RCU_READ_LOCK_GUARD() {
+        RAMBLOCK_FOREACH_MIGRATABLE(block) {
+            memory_region_clear_dirty_bitmap(block->mr, 0,
+                                             block->used_length);
+        }
+    }
+}
+
+static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
+{
+    int64_t msec = 0;
+    int64_t start_time;
+    DirtyPageRecord dirty_pages;
+
+    qemu_mutex_lock_iothread();
+    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+
+    /*
+     * 1'round of log sync may return all 1 bits with
+     * KVM_DIRTY_LOG_INITIALLY_SET enable
+     * skip it unconditionally and start dirty tracking
+     * from 2'round of log sync
+     */
+    memory_global_dirty_log_sync();
+
+    /*
+     * reset page protect manually and unconditionally.
+     * this make sure kvm dirty log be cleared if
+     * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
+     */
+    dirtyrate_manual_reset_protect();
+    qemu_mutex_unlock_iothread();
+
+    record_dirtypages_bitmap(&dirty_pages, true);
+
+    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    DirtyStat.start_time = start_time / 1000;
+
+    msec = config.sample_period_seconds * 1000;
+    msec = set_sample_page_period(msec, start_time);
+    DirtyStat.calc_time = msec / 1000;
+
+    /*
+     * dirtyrate_global_dirty_log_stop do two things.
+     * 1. fetch dirty bitmap from kvm
+     * 2. stop dirty tracking
+     */
+    dirtyrate_global_dirty_log_stop();
+
+    record_dirtypages_bitmap(&dirty_pages, false);
+
+    do_calculate_dirtyrate_bitmap(dirty_pages);
+}
+
+static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+{
+    CPUState *cpu;
+    int64_t msec = 0;
+    int64_t start_time;
+    uint64_t dirtyrate = 0;
+    uint64_t dirtyrate_sum = 0;
+    DirtyPageRecord *dirty_pages;
+    int nvcpu = 0;
+    int i = 0;
+
+    CPU_FOREACH(cpu) {
+        nvcpu++;
+    }
+
+    dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
+
+    DirtyStat.dirty_ring.nvcpu = nvcpu;
+    DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
+
+    dirtyrate_global_dirty_log_start();
+
+    CPU_FOREACH(cpu) {
+        record_dirtypages(dirty_pages, cpu, true);
+    }
+
+    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    DirtyStat.start_time = start_time / 1000;
+
+    msec = config.sample_period_seconds * 1000;
+    msec = set_sample_page_period(msec, start_time);
+    DirtyStat.calc_time = msec / 1000;
+
+    dirtyrate_global_dirty_log_stop();
+
+    CPU_FOREACH(cpu) {
+        record_dirtypages(dirty_pages, cpu, false);
+    }
+
+    for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+        dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
+        trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+
+        DirtyStat.dirty_ring.rates[i].id = i;
+        DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
+        dirtyrate_sum += dirtyrate;
+    }
+
+    DirtyStat.dirty_rate = dirtyrate_sum;
+    free(dirty_pages);
+}
+
+static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
 {
     struct RamblockDirtyInfo *block_dinfo = NULL;
     int block_count = 0;
     int64_t msec = 0;
     int64_t initial_time;
 
-    rcu_register_thread();
     rcu_read_lock();
     initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
@@ -352,15 +588,26 @@ static void calculate_dirtyrate(struct DirtyRateConfig config)
 out:
     rcu_read_unlock();
     free_ramblock_dirty_info(block_dinfo, block_count);
-    rcu_unregister_thread();
+}
+
+static void calculate_dirtyrate(struct DirtyRateConfig config)
+{
+    if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+        calculate_dirtyrate_dirty_bitmap(config);
+    } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+        calculate_dirtyrate_dirty_ring(config);
+    } else {
+        calculate_dirtyrate_sample_vm(config);
+    }
+
+    trace_dirtyrate_calculate(DirtyStat.dirty_rate);
 }
 
 void *get_dirtyrate_thread(void *arg)
 {
     struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
     int ret;
-    int64_t start_time;
-    int64_t calc_time;
+    rcu_register_thread();
 
     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
                               DIRTY_RATE_STATUS_MEASURING);
@@ -369,10 +616,6 @@ void *get_dirtyrate_thread(void *arg)
         return NULL;
     }
 
-    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
-    calc_time = config.sample_period_seconds;
-    init_dirtyrate_stat(start_time, calc_time);
-
     calculate_dirtyrate(config);
 
     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
@@ -380,14 +623,22 @@ void *get_dirtyrate_thread(void *arg)
     if (ret == -1) {
         error_report("change dirtyrate state failed.");
     }
+
+    rcu_unregister_thread();
     return NULL;
 }
 
-void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time,
+                         bool has_sample_pages,
+                         int64_t sample_pages,
+                         bool has_mode,
+                         DirtyRateMeasureMode mode,
+                         Error **errp)
 {
     static struct DirtyRateConfig config;
     QemuThread thread;
     int ret;
+    int64_t start_time;
 
     /*
      * If the dirty rate is already being measured, don't attempt to start.
@@ -404,6 +655,39 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
         return;
     }
 
+    if (!has_mode) {
+        mode =  DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+    }
+
+    if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+        error_setg(errp, "either sample-pages or dirty-ring can be specified.");
+        return;
+    }
+
+    if (has_sample_pages) {
+        if (!is_sample_pages_valid(sample_pages)) {
+            error_setg(errp, "sample-pages is out of range[%d, %d].",
+                            MIN_SAMPLE_PAGE_COUNT,
+                            MAX_SAMPLE_PAGE_COUNT);
+            return;
+        }
+    } else {
+        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    }
+
+    /*
+     * dirty ring mode only works when kvm dirty ring is enabled.
+     * on the contrary, dirty bitmap mode is not.
+     */
+    if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
+        !kvm_dirty_ring_enabled()) ||
+        ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
+         kvm_dirty_ring_enabled())) {
+        error_setg(errp, "mode %s is not enabled, use other method instead.",
+                         DirtyRateMeasureMode_str(mode));
+         return;
+    }
+
     /*
      * Init calculation state as unstarted.
      */
@@ -415,7 +699,20 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
     }
 
     config.sample_period_seconds = calc_time;
-    config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    config.sample_pages_per_gigabytes = sample_pages;
+    config.mode = mode;
+
+    cleanup_dirtyrate_stat(config);
+
+    /*
+     * update dirty rate mode so that we can figure out what mode has
+     * been used in last calculation
+     **/
+    dirtyrate_mode = mode;
+
+    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+    init_dirtyrate_stat(start_time, config);
+
     qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
                        (void *)&config, QEMU_THREAD_DETACHED);
 }
@@ -424,3 +721,75 @@ struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp)
 {
     return query_dirty_rate_info();
 }
+
+void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
+{
+    DirtyRateInfo *info = query_dirty_rate_info();
+
+    monitor_printf(mon, "Status: %s\n",
+                   DirtyRateStatus_str(info->status));
+    monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
+                   info->start_time);
+    monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
+                   info->sample_pages);
+    monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
+                   info->calc_time);
+    monitor_printf(mon, "Mode: %s\n",
+                   DirtyRateMeasureMode_str(info->mode));
+    monitor_printf(mon, "Dirty rate: ");
+    if (info->has_dirty_rate) {
+        monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
+        if (info->has_vcpu_dirty_rate) {
+            DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
+            for (rate = head; rate != NULL; rate = rate->next) {
+                monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
+                               " (MB/s)\n", rate->value->id,
+                               rate->value->dirty_rate);
+            }
+        }
+    } else {
+        monitor_printf(mon, "(not ready)\n");
+    }
+
+    qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
+    g_free(info);
+}
+
+void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
+{
+    int64_t sec = qdict_get_try_int(qdict, "second", 0);
+    int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
+    bool has_sample_pages = (sample_pages != -1);
+    bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
+    bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
+    DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+    Error *err = NULL;
+
+    if (!sec) {
+        monitor_printf(mon, "Incorrect period length specified!\n");
+        return;
+    }
+
+    if (dirty_ring && dirty_bitmap) {
+        monitor_printf(mon, "Either dirty ring or dirty bitmap "
+                       "can be specified!\n");
+        return;
+    }
+
+    if (dirty_bitmap) {
+        mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
+    } else if (dirty_ring) {
+        mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
+    }
+
+    qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true,
+                        mode, &err);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
+    }
+
+    monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
+                   " seconds\n", sec);
+    monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
+}
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index 6ec429534d7..69d4c5b8655 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -15,7 +15,6 @@
 
 /*
  * Sample 512 pages per GB as default.
- * TODO: Make it configurable.
  */
 #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            512
 
@@ -35,9 +34,16 @@
 #define MIN_FETCH_DIRTYRATE_TIME_SEC              1
 #define MAX_FETCH_DIRTYRATE_TIME_SEC              60
 
+/*
+ * Take 1/16 pages in 1G as the maxmum sample page count
+ */
+#define MIN_SAMPLE_PAGE_COUNT                     128
+#define MAX_SAMPLE_PAGE_COUNT                     16384
+
 struct DirtyRateConfig {
     uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
     int64_t sample_period_seconds; /* time duration between two sampling */
+    DirtyRateMeasureMode mode; /* mode of dirtyrate measurement */
 };
 
 /*
@@ -53,16 +59,29 @@ struct RamblockDirtyInfo {
     uint32_t *hash_result; /* array of hash result for sampled pages */
 };
 
+typedef struct SampleVMStat {
+    uint64_t total_dirty_samples; /* total dirty sampled page */
+    uint64_t total_sample_count; /* total sampled pages */
+    uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
+} SampleVMStat;
+
+typedef struct VcpuStat {
+    int nvcpu; /* number of vcpu */
+    DirtyRateVcpu *rates; /* array of dirty rate for each vcpu */
+} VcpuStat;
+
 /*
  * Store calculation statistics for each measure.
  */
 struct DirtyRateStat {
-    uint64_t total_dirty_samples; /* total dirty sampled page */
-    uint64_t total_sample_count; /* total sampled pages */
-    uint64_t total_block_mem_MB; /* size of total sampled pages in MB */
     int64_t dirty_rate; /* dirty rate in MB/s */
     int64_t start_time; /* calculation start time in units of second */
     int64_t calc_time; /* time duration of two sampling in units of second */
+    uint64_t sample_pages; /* sample pages per GB */
+    union {
+        SampleVMStat page_sampling;
+        VcpuStat dirty_ring;
+    };
 };
 
 void *get_dirtyrate_thread(void *arg);
diff --git a/migration/meson.build b/migration/meson.build
index 3ecedce94dd..f8714dcb154 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -31,4 +31,5 @@ softmmu_ss.add(when: ['CONFIG_RDMA', rdma], if_true: files('rdma.c'))
 softmmu_ss.add(when: 'CONFIG_LIVE_BLOCK_MIGRATION', if_true: files('block.c'))
 softmmu_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
 
-specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('dirtyrate.c', 'ram.c'))
+specific_ss.add(when: 'CONFIG_SOFTMMU',
+                if_true: files('dirtyrate.c', 'ram.c', 'target.c'))
diff --git a/migration/migration.c b/migration/migration.c
index 8ca034136bc..abaf6f9e3d7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -59,10 +59,7 @@
 #include "multifd.h"
 #include "qemu/yank.h"
 #include "sysemu/cpus.h"
-
-#ifdef CONFIG_VFIO
-#include "hw/vfio/vfio-common.h"
-#endif
+#include "yank_functions.h"
 
 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
 
@@ -191,8 +188,6 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
 
 void migration_object_init(void)
 {
-    Error *err = NULL;
-
     /* This can only be called once. */
     assert(!current_migration);
     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
@@ -213,23 +208,28 @@ void migration_object_init(void)
     qemu_mutex_init(&current_incoming->page_request_mutex);
     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 
-    if (!migration_object_check(current_migration, &err)) {
-        error_report_err(err);
-        exit(1);
-    }
+    migration_object_check(current_migration, &error_fatal);
 
     blk_mig_init();
     ram_mig_init();
     dirty_bitmap_mig_init();
 }
 
+void migration_cancel(const Error *error)
+{
+    if (error) {
+        migrate_set_error(current_migration, error);
+    }
+    migrate_fd_cancel(current_migration);
+}
+
 void migration_shutdown(void)
 {
     /*
      * Cancel the current migration - that will (eventually)
      * stop the migration using this structure
      */
-    migrate_fd_cancel(current_migration);
+    migration_cancel(NULL);
     object_unref(OBJECT(current_migration));
 
     /*
@@ -272,6 +272,7 @@ void migration_incoming_state_destroy(void)
     }
 
     if (mis->from_src_file) {
+        migration_ioc_unregister_yank_from_file(mis->from_src_file);
         qemu_fclose(mis->from_src_file);
         mis->from_src_file = NULL;
     }
@@ -279,6 +280,9 @@ void migration_incoming_state_destroy(void)
         g_array_free(mis->postcopy_remote_fds, TRUE);
         mis->postcopy_remote_fds = NULL;
     }
+    if (mis->transport_cleanup) {
+        mis->transport_cleanup(mis->transport_data);
+    }
 
     qemu_event_reset(&mis->main_thread_load_event);
 
@@ -390,7 +394,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
 {
-    void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb)));
+    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
     bool received = false;
 
     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
@@ -452,14 +456,12 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
     const char *p = NULL;
 
-    if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
-        return;
-    }
-
+    migrate_protocol_allow_multifd(false); /* reset it anyway */
     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
+        migrate_protocol_allow_multifd(true);
         socket_start_incoming_migration(p ? p : uri, errp);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -470,7 +472,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
     } else if (strstart(uri, "fd:", &p)) {
         fd_start_incoming_migration(p, errp);
     } else {
-        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
         error_setg(errp, "unknown migration protocol: %s", uri);
     }
 }
@@ -589,8 +590,10 @@ static void process_incoming_migration_co(void *opaque)
         mis->have_colo_incoming_thread = true;
         qemu_coroutine_yield();
 
+        qemu_mutex_unlock_iothread();
         /* Wait checkpoint incoming thread exit before free resource */
         qemu_thread_join(&mis->colo_incoming_thread);
+        qemu_mutex_lock_iothread();
         /* We hold the global iothread lock, so it is safe here */
         colo_release_ram_cache();
     }
@@ -615,30 +618,25 @@ static void process_incoming_migration_co(void *opaque)
 }
 
 /**
- * @migration_incoming_setup: Setup incoming migration
- *
- * Returns 0 for no error or 1 for error
- *
+ * migration_incoming_setup: Setup incoming migration
  * @f: file for main migration channel
  * @errp: where to put errors
+ *
+ * Returns: %true on success, %false on error.
  */
-static int migration_incoming_setup(QEMUFile *f, Error **errp)
+static bool migration_incoming_setup(QEMUFile *f, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
-    Error *local_err = NULL;
 
-    if (multifd_load_setup(&local_err) != 0) {
-        /* We haven't been able to create multifd threads
-           nothing better to do */
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
+    if (multifd_load_setup(errp) != 0) {
+        return false;
     }
 
     if (!mis->from_src_file) {
         mis->from_src_file = f;
     }
     qemu_file_set_blocking(f, false);
-    return 0;
+    return true;
 }
 
 void migration_incoming_process(void)
@@ -681,14 +679,11 @@ static bool postcopy_try_recover(QEMUFile *f)
 
 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
 {
-    Error *local_err = NULL;
-
     if (postcopy_try_recover(f)) {
         return;
     }
 
-    if (migration_incoming_setup(f, &local_err)) {
-        error_propagate(errp, local_err);
+    if (!migration_incoming_setup(f, errp)) {
         return;
     }
     migration_incoming_process();
@@ -709,8 +704,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
             return;
         }
 
-        if (migration_incoming_setup(f, &local_err)) {
-            error_propagate(errp, local_err);
+        if (!migration_incoming_setup(f, errp)) {
             return;
         }
 
@@ -1059,41 +1053,27 @@ static void populate_disk_info(MigrationInfo *info)
     }
 }
 
-static void populate_vfio_info(MigrationInfo *info)
-{
-#ifdef CONFIG_VFIO
-    if (vfio_mig_active()) {
-        info->has_vfio = true;
-        info->vfio = g_malloc0(sizeof(*info->vfio));
-        info->vfio->transferred = vfio_mig_bytes_transferred();
-    }
-#endif
-}
-
 static void fill_source_migration_info(MigrationInfo *info)
 {
     MigrationState *s = migrate_get_current();
+    GSList *cur_blocker = migration_blockers;
 
-    info->blocked = migration_is_blocked(NULL);
-    info->has_blocked_reasons = info->blocked;
     info->blocked_reasons = NULL;
-    if (info->blocked) {
-        GSList *cur_blocker = migration_blockers;
 
-        /*
-         * There are two types of reasons a migration might be blocked;
-         * a) devices marked in VMState as non-migratable, and
-         * b) Explicit migration blockers
-         * We need to add both of them here.
-         */
-        qemu_savevm_non_migratable_list(&info->blocked_reasons);
+    /*
+     * There are two types of reasons a migration might be blocked;
+     * a) devices marked in VMState as non-migratable, and
+     * b) Explicit migration blockers
+     * We need to add both of them here.
+     */
+    qemu_savevm_non_migratable_list(&info->blocked_reasons);
 
-        while (cur_blocker) {
-            QAPI_LIST_PREPEND(info->blocked_reasons,
-                              g_strdup(error_get_pretty(cur_blocker->data)));
-            cur_blocker = g_slist_next(cur_blocker);
-        }
+    while (cur_blocker) {
+        QAPI_LIST_PREPEND(info->blocked_reasons,
+                          g_strdup(error_get_pretty(cur_blocker->data)));
+        cur_blocker = g_slist_next(cur_blocker);
     }
+    info->has_blocked_reasons = info->blocked_reasons != NULL;
 
     switch (s->state) {
     case MIGRATION_STATUS_NONE:
@@ -1262,6 +1242,14 @@ static bool migrate_caps_check(bool *cap_list,
         }
     }
 
+    /* incoming side only */
+    if (runstate_check(RUN_STATE_INMIGRATE) &&
+        !migrate_multifd_is_allowed() &&
+        cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
+        error_setg(errp, "multifd is not supported by current protocol");
+        return false;
+    }
+
     return true;
 }
 
@@ -1826,6 +1814,7 @@ static void migrate_fd_cleanup(MigrationState *s)
          * Close the file handle without the lock to make sure the
          * critical section won't block for long.
          */
+        migration_ioc_unregister_yank_from_file(tmp);
         qemu_fclose(tmp);
     }
 
@@ -1870,6 +1859,15 @@ void migrate_set_error(MigrationState *s, const Error *error)
     }
 }
 
+static void migrate_error_free(MigrationState *s)
+{
+    QEMU_LOCK_GUARD(&s->error_mutex);
+    if (s->error) {
+        error_free(s->error);
+        s->error = NULL;
+    }
+}
+
 void migrate_fd_error(MigrationState *s, const Error *error)
 {
     trace_migrate_fd_error(error_get_pretty(error));
@@ -1885,9 +1883,11 @@ static void migrate_fd_cancel(MigrationState *s)
     QEMUFile *f = migrate_get_current()->to_dst_file;
     trace_migrate_fd_cancel();
 
-    if (s->rp_state.from_dst_file) {
-        /* shutdown the rp socket, so causing the rp thread to shutdown */
-        qemu_file_shutdown(s->rp_state.from_dst_file);
+    WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
+        if (s->rp_state.from_dst_file) {
+            /* shutdown the rp socket, so causing the rp thread to shutdown */
+            qemu_file_shutdown(s->rp_state.from_dst_file);
+        }
     }
 
     do {
@@ -2054,6 +2054,20 @@ void migrate_init(MigrationState *s)
     s->threshold_size = 0;
 }
 
+int migrate_add_blocker_internal(Error *reason, Error **errp)
+{
+    /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
+    if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
+        error_propagate_prepend(errp, error_copy(reason),
+                                "disallowing migration blocker "
+                                "(migration/snapshot in progress) for: ");
+        return -EBUSY;
+    }
+
+    migration_blockers = g_slist_prepend(migration_blockers, reason);
+    return 0;
+}
+
 int migrate_add_blocker(Error *reason, Error **errp)
 {
     if (only_migratable) {
@@ -2063,15 +2077,7 @@ int migrate_add_blocker(Error *reason, Error **errp)
         return -EACCES;
     }
 
-    if (migration_is_idle()) {
-        migration_blockers = g_slist_prepend(migration_blockers, reason);
-        return 0;
-    }
-
-    error_propagate_prepend(errp, error_copy(reason),
-                            "disallowing migration blocker "
-                            "(migration in progress) for: ");
-    return -EBUSY;
+    return migrate_add_blocker_internal(reason, errp);
 }
 
 void migrate_del_blocker(Error *reason)
@@ -2093,9 +2099,14 @@ void qmp_migrate_incoming(const char *uri, Error **errp)
         return;
     }
 
+    if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
+        return;
+    }
+
     qemu_start_incoming_migration(uri, &local_err);
 
     if (local_err) {
+        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
         error_propagate(errp, local_err);
         return;
     }
@@ -2107,6 +2118,13 @@ void qmp_migrate_recover(const char *uri, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
 
+    /*
+     * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
+     * callers (no one should ignore a recover failure); if there is, it's a
+     * programming error.
+     */
+    assert(errp);
+
     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
         error_setg(errp, "Migrate recover can only be run "
                    "when postcopy is paused.");
@@ -2124,8 +2142,13 @@ void qmp_migrate_recover(const char *uri, Error **errp)
      * only re-setup the migration stream and poke existing migration
      * to continue using that newly established channel.
      */
-    yank_unregister_instance(MIGRATION_YANK_INSTANCE);
     qemu_start_incoming_migration(uri, errp);
+
+    /* Safe to dereference with the assert above */
+    if (*errp) {
+        /* Reset the flag so user could still retry */
+        qatomic_set(&mis->postcopy_recover_triggered, false);
+    }
 }
 
 void qmp_migrate_pause(Error **errp)
@@ -2227,6 +2250,10 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
     }
 
     if (blk || blk_inc) {
+        if (migrate_colo_enabled()) {
+            error_setg(errp, "No disk migration is required in COLO mode");
+            return false;
+        }
         if (migrate_use_block() || migrate_use_block_incremental()) {
             error_setg(errp, "Command options are incompatible with "
                        "current migration capabilities");
@@ -2246,10 +2273,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
 
     migrate_init(s);
     /*
-     * set ram_counters memory to zero for a
+     * set ram_counters compression_counters memory to zero for a
      * new migration
      */
     memset(&ram_counters, 0, sizeof(ram_counters));
+    memset(&compression_counters, 0, sizeof(compression_counters));
 
     return true;
 }
@@ -2274,9 +2302,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
         }
     }
 
+    migrate_protocol_allow_multifd(false);
     if (strstart(uri, "tcp:", &p) ||
         strstart(uri, "unix:", NULL) ||
         strstart(uri, "vsock:", NULL)) {
+        migrate_protocol_allow_multifd(true);
         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
 #ifdef CONFIG_RDMA
     } else if (strstart(uri, "rdma:", &p)) {
@@ -2310,7 +2340,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 
 void qmp_migrate_cancel(Error **errp)
 {
-    migrate_fd_cancel(migrate_get_current());
+    migration_cancel(NULL);
 }
 
 void qmp_migrate_continue(MigrationStatus state, Error **errp)
@@ -2613,8 +2643,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
      * Since we currently insist on matching page sizes, just sanity check
      * we're being asked for whole host pages.
      */
-    if (start & (our_host_ps - 1) ||
-       (len & (our_host_ps - 1))) {
+    if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
+        !QEMU_IS_ALIGNED(len, our_host_ps)) {
         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
                      " len: %zd", __func__, start, len);
         mark_source_rp_bad(ms);
@@ -2671,6 +2701,23 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
     return 0;
 }
 
+/* Release ms->rp_state.from_dst_file in a safe way */
+static void migration_release_from_dst_file(MigrationState *ms)
+{
+    QEMUFile *file;
+
+    WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
+        /*
+         * Reset the from_dst_file pointer first before releasing it, as we
+         * can't block within lock section
+         */
+        file = ms->rp_state.from_dst_file;
+        ms->rp_state.from_dst_file = NULL;
+    }
+
+    qemu_fclose(file);
+}
+
 /*
  * Handles messages sent on the return path towards the source VM
  *
@@ -2812,12 +2859,14 @@ static void *source_return_path_thread(void *opaque)
              * Maybe there is something we can do: it looks like a
              * network down issue, and we pause for a recovery.
              */
+            migration_release_from_dst_file(ms);
+            rp = NULL;
             if (postcopy_pause_return_path_thread(ms)) {
-                /* Reload rp, reset the rest */
-                if (rp != ms->rp_state.from_dst_file) {
-                    qemu_fclose(rp);
-                    rp = ms->rp_state.from_dst_file;
-                }
+                /*
+                 * Reload rp, reset the rest.  Referencing it is safe since
+                 * it's reset only by us above, or when migration completes
+                 */
+                rp = ms->rp_state.from_dst_file;
                 ms->rp_state.error = false;
                 goto retry;
             }
@@ -2828,8 +2877,7 @@ static void *source_return_path_thread(void *opaque)
     }
 
     trace_source_return_path_thread_end();
-    ms->rp_state.from_dst_file = NULL;
-    qemu_fclose(rp);
+    migration_release_from_dst_file(ms);
     rcu_unregister_thread();
     return NULL;
 }
@@ -2837,7 +2885,6 @@ static void *source_return_path_thread(void *opaque)
 static int open_return_path_on_source(MigrationState *ms,
                                       bool create_thread)
 {
-
     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
     if (!ms->rp_state.from_dst_file) {
         return -1;
@@ -2852,6 +2899,7 @@ static int open_return_path_on_source(MigrationState *ms,
 
     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
+    ms->rp_state.rp_thread_created = true;
 
     trace_open_return_path_on_source_continue();
 
@@ -2876,6 +2924,7 @@ static int await_return_path_close_on_source(MigrationState *ms)
     }
     trace_await_return_path_close_on_source_joining();
     qemu_thread_join(&ms->rp_state.rp_thread);
+    ms->rp_state.rp_thread_created = false;
     trace_await_return_path_close_on_source_close();
     return ms->rp_state.error;
 }
@@ -3121,6 +3170,7 @@ static void migration_completion(MigrationState *s)
         if (!ret) {
             bool inactivate = !migrate_colo_enabled();
             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+            trace_migration_completion_vm_stop(ret);
             if (ret >= 0) {
                 ret = migration_maybe_pause(s, &current_active_state,
                                             MIGRATION_STATUS_DEVICE);
@@ -3142,7 +3192,10 @@ static void migration_completion(MigrationState *s)
     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
         trace_migration_completion_postcopy_end();
 
+        qemu_mutex_lock_iothread();
         qemu_savevm_state_complete_postcopy(s->to_dst_file);
+        qemu_mutex_unlock_iothread();
+
         trace_migration_completion_postcopy_end_after_complete();
     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
         goto fail;
@@ -3154,7 +3207,7 @@ static void migration_completion(MigrationState *s)
      * it will wait for the destination to send it's status in
      * a SHUT command).
      */
-    if (s->rp_state.from_dst_file) {
+    if (s->rp_state.rp_thread_created) {
         int rp_error;
         trace_migration_return_path_end_before();
         rp_error = await_return_path_close_on_source(s);
@@ -3314,8 +3367,17 @@ static MigThrError postcopy_pause(MigrationState *s)
     while (true) {
         QEMUFile *file;
 
-        /* Current channel is possibly broken. Release it. */
+        /*
+         * Current channel is possibly broken. Release it.  Note that this is
+         * guaranteed even without lock because to_dst_file should only be
+         * modified by the migration thread.  That also guarantees that the
+         * unregister of yank is safe too without the lock.  It should be safe
+         * even to be within the qemu_file_lock, but we didn't do that to avoid
+         * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
+         * the qemu_file_lock critical section as small as possible.
+         */
         assert(s->to_dst_file);
+        migration_ioc_unregister_yank_from_file(s->to_dst_file);
         qemu_mutex_lock(&s->qemu_file_lock);
         file = s->to_dst_file;
         s->to_dst_file = NULL;
@@ -3566,7 +3628,9 @@ static void migration_iteration_finish(MigrationState *s)
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_CANCELLING:
         if (s->vm_was_running) {
-            vm_start();
+            if (!runstate_check(RUN_STATE_SHUTDOWN)) {
+                vm_start();
+            }
         } else {
             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
                 runstate_set(RUN_STATE_POSTMIGRATE);
@@ -3669,6 +3733,43 @@ bool migration_rate_limit(void)
     return urgent;
 }
 
+/*
+ * if failover devices are present, wait they are completely
+ * unplugged
+ */
+
+static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
+                                    int new_state)
+{
+    if (qemu_savevm_state_guest_unplug_pending()) {
+        migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
+
+        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
+               qemu_savevm_state_guest_unplug_pending()) {
+            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+        }
+        if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
+            int timeout = 120; /* 30 seconds */
+            /*
+             * migration has been canceled
+             * but as we have started an unplug we must wait the end
+             * to be able to plug back the card
+             */
+            while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
+                qemu_sem_timedwait(&s->wait_unplug_sem, 250);
+            }
+            if (qemu_savevm_state_guest_unplug_pending()) {
+                warn_report("migration: partially unplugged device on "
+                            "failure");
+            }
+        }
+
+        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
+    } else {
+        migrate_set_state(&s->state, old_state, new_state);
+    }
+}
+
 /*
  * Master migration thread on the source VM.
  * It drives the migration and pumps the data down the outgoing channel.
@@ -3691,7 +3792,7 @@ static void *migration_thread(void *opaque)
      * If we opened the return path, we need to make sure dst has it
      * opened as well.
      */
-    if (s->rp_state.from_dst_file) {
+    if (s->rp_state.rp_thread_created) {
         /* Now tell the dest that it should open its end so it can reply */
         qemu_savevm_send_open_return_path(s->to_dst_file);
 
@@ -3715,22 +3816,10 @@ static void *migration_thread(void *opaque)
 
     qemu_savevm_state_setup(s->to_dst_file);
 
-    if (qemu_savevm_state_guest_unplug_pending()) {
-        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_WAIT_UNPLUG);
-
-        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
-               qemu_savevm_state_guest_unplug_pending()) {
-            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
-        }
-
-        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
-                MIGRATION_STATUS_ACTIVE);
-    }
+    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
+                               MIGRATION_STATUS_ACTIVE);
 
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
-    migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                      MIGRATION_STATUS_ACTIVE);
 
     trace_migration_thread_setup_complete();
 
@@ -3838,21 +3927,9 @@ static void *bg_migration_thread(void *opaque)
     qemu_savevm_state_header(s->to_dst_file);
     qemu_savevm_state_setup(s->to_dst_file);
 
-    if (qemu_savevm_state_guest_unplug_pending()) {
-        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                          MIGRATION_STATUS_WAIT_UNPLUG);
+    qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
+                               MIGRATION_STATUS_ACTIVE);
 
-        while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
-               qemu_savevm_state_guest_unplug_pending()) {
-            qemu_sem_timedwait(&s->wait_unplug_sem, 250);
-        }
-
-        migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
-                          MIGRATION_STATUS_ACTIVE);
-    } else {
-        migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
-                MIGRATION_STATUS_ACTIVE);
-    }
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
 
     trace_migration_thread_setup_complete();
@@ -3950,6 +4027,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     int64_t rate_limit;
     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
 
+    /*
+     * If there's a previous error, free it and prepare for another one.
+     * Meanwhile if migration completes successfully, there won't have an error
+     * dumped when calling migrate_fd_cleanup().
+     */
+    migrate_error_free(s);
+
     s->expected_downtime = s->parameters.downtime_limit;
     if (resume) {
         assert(s->cleanup_bh);
@@ -3959,7 +4043,18 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
     }
     if (error_in) {
         migrate_fd_error(s, error_in);
-        migrate_fd_cleanup(s);
+        if (resume) {
+            /*
+             * Don't do cleanup for resume if channel is invalid, but only dump
+             * the error.  We wait for another channel connect from the user.
+             * The error_report still gives HMP user a hint on what failed.
+             * It's normally done in migrate_fd_cleanup(), but call it here
+             * explicitly.
+             */
+            error_report_err(error_copy(s->error));
+        } else {
+            migrate_fd_cleanup(s);
+        }
         return;
     }
 
diff --git a/migration/migration.h b/migration/migration.h
index db6708326b1..8130b703eb9 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -49,6 +49,10 @@ struct PostcopyBlocktimeContext;
 struct MigrationIncomingState {
     QEMUFile *from_src_file;
 
+    /* A hook to allow cleanup at the end of incoming migration */
+    void *transport_data;
+    void (*transport_cleanup)(void *data);
+
     /*
      * Free at the start of the main state load, set as the main thread finishes
      * loading state.
@@ -150,12 +154,13 @@ struct MigrationState {
     QemuThread thread;
     QEMUBH *vm_start_bh;
     QEMUBH *cleanup_bh;
+    /* Protected by qemu_file_lock */
     QEMUFile *to_dst_file;
     QIOChannelBuffer *bioc;
     /*
-     * Protects to_dst_file pointer.  We need to make sure we won't
-     * yield or hang during the critical section, since this lock will
-     * be used in OOB command handler.
+     * Protects to_dst_file/from_dst_file pointers.  We need to make sure we
+     * won't yield or hang during the critical section, since this lock will be
+     * used in OOB command handler.
      */
     QemuMutex qemu_file_lock;
 
@@ -188,9 +193,17 @@ struct MigrationState {
 
     /* State related to return path */
     struct {
+        /* Protected by qemu_file_lock */
         QEMUFile     *from_dst_file;
         QemuThread    rp_thread;
         bool          error;
+        /*
+         * We can also check non-zero of rp_thread, but there's no "official"
+         * way to do this, so this bool makes it slightly more elegant.
+         * Checking from_dst_file for this is racy because from_dst_file will
+         * be cleared in the rp_thread!
+         */
+        bool          rp_thread_created;
         QemuSemaphore rp_sem;
     } rp_state;
 
@@ -375,5 +388,8 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
 void migration_make_urgent_request(void);
 void migration_consume_urgent_request(void);
 bool migration_rate_limit(void);
+void migration_cancel(const Error *error);
+
+void populate_vfio_info(MigrationInfo *info);
 
 #endif
diff --git a/migration/multifd.c b/migration/multifd.c
index a6677c45c86..7c9deb1921d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -361,7 +361,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
         if (offset > (block->used_length - qemu_target_page_size())) {
             error_setg(errp, "multifd: offset too long %" PRIu64
                        " (max " RAM_ADDR_FMT ")",
-                       offset, block->max_length);
+                       offset, block->used_length);
             return -1;
         }
         p->pages->iov[i].iov_base = block->host + offset;
@@ -531,7 +531,7 @@ void multifd_save_cleanup(void)
 {
     int i;
 
-    if (!migrate_use_multifd()) {
+    if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
         return;
     }
     multifd_send_terminate_threads(NULL);
@@ -546,6 +546,9 @@ void multifd_save_cleanup(void)
         MultiFDSendParams *p = &multifd_send_state->params[i];
         Error *local_err = NULL;
 
+        if (p->registered_yank) {
+            migration_ioc_unregister_yank(p->c);
+        }
         socket_send_channel_destroy(p->c);
         p->c = NULL;
         qemu_mutex_destroy(&p->mutex);
@@ -813,7 +816,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
                 return false;
             }
         } else {
-            /* update for tls qio channel */
+            migration_ioc_register_yank(ioc);
+            p->registered_yank = true;
             p->c = ioc;
             qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
                                    QEMU_THREAD_JOINABLE);
@@ -864,6 +868,17 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
     multifd_new_send_channel_cleanup(p, sioc, local_err);
 }
 
+static bool migrate_allow_multifd = true;
+void migrate_protocol_allow_multifd(bool allow)
+{
+    migrate_allow_multifd = allow;
+}
+
+bool migrate_multifd_is_allowed(void)
+{
+    return migrate_allow_multifd;
+}
+
 int multifd_save_setup(Error **errp)
 {
     int thread_count;
@@ -874,6 +889,11 @@ int multifd_save_setup(Error **errp)
     if (!migrate_use_multifd()) {
         return 0;
     }
+    if (!migrate_multifd_is_allowed()) {
+        error_setg(errp, "multifd is not supported by current protocol");
+        return -1;
+    }
+
     s = migrate_get_current();
     thread_count = migrate_multifd_channels();
     multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
@@ -967,7 +987,7 @@ int multifd_load_cleanup(Error **errp)
 {
     int i;
 
-    if (!migrate_use_multifd()) {
+    if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
         return 0;
     }
     multifd_recv_terminate_threads(NULL);
@@ -987,13 +1007,7 @@ int multifd_load_cleanup(Error **errp)
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDRecvParams *p = &multifd_recv_state->params[i];
 
-        if (object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET)
-            && OBJECT(p->c)->ref == 1) {
-            yank_unregister_function(MIGRATION_YANK_INSTANCE,
-                                     migration_yank_iochannel,
-                                     QIO_CHANNEL(p->c));
-        }
-
+        migration_ioc_unregister_yank(p->c);
         object_unref(OBJECT(p->c));
         p->c = NULL;
         qemu_mutex_destroy(&p->mutex);
@@ -1122,6 +1136,10 @@ int multifd_load_setup(Error **errp)
     if (!migrate_use_multifd()) {
         return 0;
     }
+    if (!migrate_multifd_is_allowed()) {
+        error_setg(errp, "multifd is not supported by current protocol");
+        return -1;
+    }
     thread_count = migrate_multifd_channels();
     multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
     multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
@@ -1165,6 +1183,11 @@ bool multifd_recv_all_channels_created(void)
         return true;
     }
 
+    if (!multifd_recv_state) {
+        /* Called before any connections created */
+        return false;
+    }
+
     return thread_count == qatomic_read(&multifd_recv_state->count);
 }
 
diff --git a/migration/multifd.h b/migration/multifd.h
index 8d6751f5ed8..15c50ca0b24 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -13,6 +13,8 @@
 #ifndef QEMU_MIGRATION_MULTIFD_H
 #define QEMU_MIGRATION_MULTIFD_H
 
+bool migrate_multifd_is_allowed(void);
+void migrate_protocol_allow_multifd(bool allow);
 int multifd_save_setup(Error **errp);
 void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
@@ -85,6 +87,8 @@ typedef struct {
     bool running;
     /* should this thread finish */
     bool quit;
+    /* is the yank function registered */
+    bool registered_yank;
     /* thread has work to do */
     int pending_job;
     /* array of pages to sent */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index ab482adef10..d18b5d05b2f 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -17,6 +17,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "migration.h"
 #include "qemu-file.h"
@@ -30,6 +31,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/boards.h"
+#include "exec/ramblock.h"
 
 /* Arbitrary limit on size of each discard command,
  * keeps them around ~200 bytes
@@ -400,7 +402,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
                      strerror(errno));
         goto out;
     }
-    g_assert(((size_t)testarea & (pagesize - 1)) == 0);
+    g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize));
 
     reg_struct.range.start = (uintptr_t)testarea;
     reg_struct.range.len = pagesize;
@@ -452,6 +454,13 @@ static int init_range(RAMBlock *rb, void *opaque)
     ram_addr_t length = qemu_ram_get_used_length(rb);
     trace_postcopy_init_range(block_name, host_addr, offset, length);
 
+    /*
+     * Save the used_length before running the guest. In case we have to
+     * resize RAM blocks when syncing RAM block sizes from the source during
+     * precopy, we'll update it manually via the ram block notifier.
+     */
+    rb->postcopy_length = length;
+
     /*
      * We need the whole of RAM to be truly empty for postcopy, so things
      * like ROMs and any data tables built during init must be zero'd
@@ -474,7 +483,7 @@ static int cleanup_range(RAMBlock *rb, void *opaque)
     const char *block_name = qemu_ram_get_idstr(rb);
     void *host_addr = qemu_ram_get_host_addr(rb);
     ram_addr_t offset = qemu_ram_get_offset(rb);
-    ram_addr_t length = qemu_ram_get_used_length(rb);
+    ram_addr_t length = rb->postcopy_length;
     MigrationIncomingState *mis = opaque;
     struct uffdio_range range_struct;
     trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
@@ -580,7 +589,7 @@ static int nhp_range(RAMBlock *rb, void *opaque)
     const char *block_name = qemu_ram_get_idstr(rb);
     void *host_addr = qemu_ram_get_host_addr(rb);
     ram_addr_t offset = qemu_ram_get_offset(rb);
-    ram_addr_t length = qemu_ram_get_used_length(rb);
+    ram_addr_t length = rb->postcopy_length;
     trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 
     /*
@@ -624,7 +633,7 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
     struct uffdio_register reg_struct;
 
     reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
-    reg_struct.range.len = qemu_ram_get_used_length(rb);
+    reg_struct.range.len = rb->postcopy_length;
     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 
     /* Now tell our userfault_fd that it's responsible for this area */
@@ -651,7 +660,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
     struct uffdio_range range;
     int ret;
     trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb));
-    range.start = client_addr & ~(pagesize - 1);
+    range.start = ROUND_DOWN(client_addr, pagesize);
     range.len = pagesize;
     ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range);
     if (ret) {
@@ -662,6 +671,29 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
     return ret;
 }
 
+static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
+                                 ram_addr_t start, uint64_t haddr)
+{
+    void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
+
+    /*
+     * Discarded pages (via RamDiscardManager) are never migrated. On unlikely
+     * access, place a zeropage, which will also set the relevant bits in the
+     * recv_bitmap accordingly, so we won't try placing a zeropage twice.
+     *
+     * Checking a single bit is sufficient to handle pagesize > TPS as either
+     * all relevant bits are set or not.
+     */
+    assert(QEMU_IS_ALIGNED(start, qemu_ram_pagesize(rb)));
+    if (ramblock_page_is_discarded(rb, start)) {
+        bool received = ramblock_recv_bitmap_test_byte_offset(rb, start);
+
+        return received ? 0 : postcopy_place_page_zero(mis, aligned, rb);
+    }
+
+    return migrate_send_rp_req_pages(mis, rb, start, haddr);
+}
+
 /*
  * Callback from shared fault handlers to ask for a page,
  * the page must be specified by a RAMBlock and an offset in that rb
@@ -670,8 +702,7 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
 int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
                                  uint64_t client_addr, uint64_t rb_offset)
 {
-    size_t pagesize = qemu_ram_pagesize(rb);
-    uint64_t aligned_rbo = rb_offset & ~(pagesize - 1);
+    uint64_t aligned_rbo = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
     MigrationIncomingState *mis = migration_incoming_get_current();
 
     trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb),
@@ -681,7 +712,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
                                         qemu_ram_get_idstr(rb), rb_offset);
         return postcopy_wake_shared(pcfd, client_addr, rb);
     }
-    migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr);
+    postcopy_request_page(mis, rb, aligned_rbo, client_addr);
     return 0;
 }
 
@@ -961,7 +992,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
                 break;
             }
 
-            rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
+            rb_offset = ROUND_DOWN(rb_offset, qemu_ram_pagesize(rb));
             trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset,
@@ -975,8 +1006,8 @@ static void *postcopy_ram_fault_thread(void *opaque)
              * Send the request to the source - we want to request one
              * of our host page sizes (which is >= TPS)
              */
-            ret = migrate_send_rp_req_pages(mis, rb, rb_offset,
-                                            msg.arg.pagefault.address);
+            ret = postcopy_request_page(mis, rb, rb_offset,
+                                        msg.arg.pagefault.address);
             if (ret) {
                 /* May be network failure, try to wait for recovery */
                 if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
@@ -984,7 +1015,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
                     goto retry;
                 } else {
                     /* This is a unavoidable fault */
-                    error_report("%s: migrate_send_rp_req_pages() get %d",
+                    error_report("%s: postcopy_request_page() get %d",
                                  __func__, ret);
                     break;
                 }
@@ -1426,6 +1457,10 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
     MigrationIncomingState *mis = migration_incoming_get_current();
     GArray *pcrfds = mis->postcopy_remote_fds;
 
+    if (!pcrfds) {
+        /* migration has already finished and freed the array */
+        return;
+    }
     for (i = 0; i < pcrfds->len; i++) {
         struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
         if (cur->fd == pcfd->fd) {
diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
index 876d05a540f..bb5a5752df2 100644
--- a/migration/qemu-file-channel.c
+++ b/migration/qemu-file-channel.c
@@ -26,6 +26,7 @@
 #include "qemu-file-channel.h"
 #include "qemu-file.h"
 #include "io/channel-socket.h"
+#include "io/channel-tls.h"
 #include "qemu/iov.h"
 #include "qemu/yank.h"
 #include "yank_functions.h"
@@ -106,12 +107,6 @@ static int channel_close(void *opaque, Error **errp)
     int ret;
     QIOChannel *ioc = QIO_CHANNEL(opaque);
     ret = qio_channel_close(ioc, errp);
-    if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)
-        && OBJECT(ioc)->ref == 1) {
-        yank_unregister_function(MIGRATION_YANK_INSTANCE,
-                                 migration_yank_iochannel,
-                                 QIO_CHANNEL(ioc));
-    }
     object_unref(OBJECT(ioc));
     return ret;
 }
@@ -189,11 +184,11 @@ static const QEMUFileOps channel_output_ops = {
 QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
 {
     object_ref(OBJECT(ioc));
-    return qemu_fopen_ops(ioc, &channel_input_ops);
+    return qemu_fopen_ops(ioc, &channel_input_ops, true);
 }
 
 QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
 {
     object_ref(OBJECT(ioc));
-    return qemu_fopen_ops(ioc, &channel_output_ops);
+    return qemu_fopen_ops(ioc, &channel_output_ops, true);
 }
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index d6e03dbc0e0..6338d8e2ff5 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -55,6 +55,8 @@ struct QEMUFile {
     Error *last_error_obj;
     /* has the file has been shutdown */
     bool shutdown;
+    /* Whether opaque points to a QIOChannel */
+    bool has_ioc;
 };
 
 /*
@@ -101,7 +103,7 @@ bool qemu_file_mode_is_not_valid(const char *mode)
     return false;
 }
 
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
 {
     QEMUFile *f;
 
@@ -109,6 +111,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops)
 
     f->opaque = opaque;
     f->ops = ops;
+    f->has_ioc = has_ioc;
     return f;
 }
 
@@ -416,6 +419,11 @@ static int add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size,
     {
         f->iov[f->iovcnt - 1].iov_len += size;
     } else {
+        if (f->iovcnt >= MAX_IOV_SIZE) {
+            /* Should only happen if a previous fflush failed */
+            assert(f->shutdown || !qemu_file_is_writable(f));
+            return 1;
+        }
         if (may_free) {
             set_bit(f->iovcnt, f->may_free);
         }
@@ -846,3 +854,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block)
         f->ops->set_blocking(f->opaque, block, NULL);
     }
 }
+
+/*
+ * Return the ioc object if it's a migration channel.  Note: it can return NULL
+ * for callers passing in a non-migration qemufile.  E.g. see qemu_fopen_bdrv()
+ * and its usage in e.g. load_snapshot().  So we need to check against NULL
+ * before using it.  If without the check, migration_incoming_state_destroy()
+ * could fail for load_snapshot().
+ */
+QIOChannel *qemu_file_get_ioc(QEMUFile *file)
+{
+    return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL;
+}
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index a9b6d6ccb7d..3f36d4dc8c4 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -27,6 +27,7 @@
 
 #include <zlib.h>
 #include "exec/cpu-common.h"
+#include "io/channel.h"
 
 /* Read a chunk of data from a file at the given position.  The pos argument
  * can be ignored if the file is only be used for streaming.  The number of
@@ -119,7 +120,7 @@ typedef struct QEMUFileHooks {
     QEMURamSaveFunc *save_page;
 } QEMUFileHooks;
 
-QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc);
 void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
 int qemu_get_fd(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
@@ -179,5 +180,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
 size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
                              ram_addr_t offset, size_t size,
                              uint64_t *bytes_sent);
+QIOChannel *qemu_file_get_ioc(QEMUFile *file);
 
 #endif
diff --git a/migration/ram.c b/migration/ram.c
index 4682f3625c4..863035d2351 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -27,7 +27,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "qemu/cutils.h"
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
@@ -51,13 +50,14 @@
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 #include "block.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/cpu-throttle.h"
 #include "savevm.h"
 #include "qemu/iov.h"
 #include "multifd.h"
 #include "sysemu/runstate.h"
 
+#include "hw/boards.h" /* for machine_dump_guest_core() */
+
 #if defined(__linux__)
 #include "qemu/userfaultfd.h"
 #endif /* defined(__linux__) */
@@ -242,7 +242,7 @@ int64_t ramblock_recv_bitmap_send(QEMUFile *file,
         return -1;
     }
 
-    nbits = block->used_length >> TARGET_PAGE_BITS;
+    nbits = block->postcopy_length >> TARGET_PAGE_BITS;
 
     /*
      * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
@@ -313,10 +313,6 @@ struct RAMState {
     ram_addr_t last_page;
     /* last ram version we have seen */
     uint32_t last_version;
-    /* We are in the first round */
-    bool ram_bulk_stage;
-    /* The free page optimization is enabled */
-    bool fpo_enabled;
     /* How many times we have dirty too many pages */
     int dirty_rate_high_cnt;
     /* these variables are used for bitmap sync */
@@ -332,6 +328,8 @@ struct RAMState {
     uint64_t xbzrle_pages_prev;
     /* Amount of xbzrle encoded bytes since the beginning of the period */
     uint64_t xbzrle_bytes_prev;
+    /* Start using XBZRLE (e.g., after the first round). */
+    bool xbzrle_enabled;
 
     /* compression statistics since the beginning of the period */
     /* amount of count that no free thread to compress data */
@@ -385,15 +383,6 @@ int precopy_notify(PrecopyNotifyReason reason, Error **errp)
     return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
 }
 
-void precopy_enable_free_page_optimization(void)
-{
-    if (!ram_state) {
-        return;
-    }
-
-    ram_state->fpo_enabled = true;
-}
-
 uint64_t ram_bytes_remaining(void)
 {
     return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
@@ -563,7 +552,7 @@ static int compress_threads_save_setup(void)
         /* comp_param[i].file is just used as a dummy buffer to save data,
          * set its ops to empty.
          */
-        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
+        comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false);
         comp_param[i].done = true;
         comp_param[i].quit = false;
         qemu_mutex_init(&comp_param[i].mutex);
@@ -613,7 +602,7 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f,  RAMBlock *block,
 }
 
 /**
- * mig_throttle_guest_down: throotle down the guest
+ * mig_throttle_guest_down: throttle down the guest
  *
  * Reduce amount of guest cpu execution to hopefully slow down memory
  * writes. If guest dirty memory rate is reduced below the rate at
@@ -652,6 +641,15 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
     }
 }
 
+void mig_throttle_counter_reset(void)
+{
+    RAMState *rs = ram_state;
+
+    rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    rs->num_dirty_pages_period = 0;
+    rs->bytes_xfer_prev = ram_counters.transferred;
+}
+
 /**
  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
  *
@@ -666,7 +664,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
  */
 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 {
-    if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
+    if (!rs->xbzrle_enabled) {
         return;
     }
 
@@ -794,23 +792,92 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
 {
     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
     unsigned long *bitmap = rb->bmap;
-    unsigned long next;
 
     if (ramblock_is_ignored(rb)) {
         return size;
     }
 
+    return find_next_bit(bitmap, size, start);
+}
+
+static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
+                                                       unsigned long page)
+{
+    uint8_t shift;
+    hwaddr size, start;
+
+    if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) {
+        return;
+    }
+
+    shift = rb->clear_bmap_shift;
     /*
-     * When the free page optimization is enabled, we need to check the bitmap
-     * to send the non-free pages rather than all the pages in the bulk stage.
+     * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
+     * can make things easier sometimes since then start address
+     * of the small chunk will always be 64 pages aligned so the
+     * bitmap will always be aligned to unsigned long. We should
+     * even be able to remove this restriction but I'm simply
+     * keeping it.
      */
-    if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
-        next = start + 1;
-    } else {
-        next = find_next_bit(bitmap, size, start);
+    assert(shift >= 6);
+
+    size = 1ULL << (TARGET_PAGE_BITS + shift);
+    start = QEMU_ALIGN_DOWN((ram_addr_t)page << TARGET_PAGE_BITS, size);
+    trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
+    memory_region_clear_dirty_bitmap(rb->mr, start, size);
+}
+
+static void
+migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
+                                                 unsigned long start,
+                                                 unsigned long npages)
+{
+    unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift;
+    unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages);
+    unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages);
+
+    /*
+     * Clear pages from start to start + npages - 1, so the end boundary is
+     * exclusive.
+     */
+    for (i = chunk_start; i < chunk_end; i += chunk_pages) {
+        migration_clear_memory_region_dirty_bitmap(rb, i);
+    }
+}
+
+/*
+ * colo_bitmap_find_diry:find contiguous dirty pages from start
+ *
+ * Returns the page offset within memory region of the start of the contiguout
+ * dirty page
+ *
+ * @rs: current RAM state
+ * @rb: RAMBlock where to search for dirty pages
+ * @start: page where we start the search
+ * @num: the number of contiguous dirty pages
+ */
+static inline
+unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+                                     unsigned long start, unsigned long *num)
+{
+    unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
+    unsigned long *bitmap = rb->bmap;
+    unsigned long first, next;
+
+    *num = 0;
+
+    if (ramblock_is_ignored(rb)) {
+        return size;
     }
 
-    return next;
+    first = find_next_bit(bitmap, size, start);
+    if (first >= size) {
+        return first;
+    }
+    next = find_next_zero_bit(bitmap, size, first + 1);
+    assert(next >= first);
+    *num = next - first;
+    return first;
 }
 
 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
@@ -819,8 +886,6 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
 {
     bool ret;
 
-    QEMU_LOCK_GUARD(&rs->bitmap_mutex);
-
     /*
      * Clear dirty bitmap if needed.  This _must_ be called before we
      * send any of the page in the chunk because we need to make sure
@@ -829,26 +894,9 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
      * the page in the chunk we clear the remote dirty bitmap for all.
      * Clearing it earlier won't be a problem, but too late will.
      */
-    if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
-        uint8_t shift = rb->clear_bmap_shift;
-        hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
-        hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
-
-        /*
-         * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
-         * can make things easier sometimes since then start address
-         * of the small chunk will always be 64 pages aligned so the
-         * bitmap will always be aligned to unsigned long.  We should
-         * even be able to remove this restriction but I'm simply
-         * keeping it.
-         */
-        assert(shift >= 6);
-        trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
-        memory_region_clear_dirty_bitmap(rb->mr, start, size);
-    }
+    migration_clear_memory_region_dirty_bitmap(rb, page);
 
     ret = test_and_clear_bit(page, rb->bmap);
-
     if (ret) {
         rs->migration_dirty_pages--;
     }
@@ -856,6 +904,81 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
     return ret;
 }
 
+static void dirty_bitmap_clear_section(MemoryRegionSection *section,
+                                       void *opaque)
+{
+    const hwaddr offset = section->offset_within_region;
+    const hwaddr size = int128_get64(section->size);
+    const unsigned long start = offset >> TARGET_PAGE_BITS;
+    const unsigned long npages = size >> TARGET_PAGE_BITS;
+    RAMBlock *rb = section->mr->ram_block;
+    uint64_t *cleared_bits = opaque;
+
+    /*
+     * We don't grab ram_state->bitmap_mutex because we expect to run
+     * only when starting migration or during postcopy recovery where
+     * we don't have concurrent access.
+     */
+    if (!migration_in_postcopy() && !migrate_background_snapshot()) {
+        migration_clear_memory_region_dirty_bitmap_range(rb, start, npages);
+    }
+    *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
+    bitmap_clear(rb->bmap, start, npages);
+}
+
+/*
+ * Exclude all dirty pages from migration that fall into a discarded range as
+ * managed by a RamDiscardManager responsible for the mapped memory region of
+ * the RAMBlock. Clear the corresponding bits in the dirty bitmaps.
+ *
+ * Discarded pages ("logically unplugged") have undefined content and must
+ * not get migrated, because even reading these pages for migration might
+ * result in undesired behavior.
+ *
+ * Returns the number of cleared bits in the RAMBlock dirty bitmap.
+ *
+ * Note: The result is only stable while migrating (precopy/postcopy).
+ */
+static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
+{
+    uint64_t cleared_bits = 0;
+
+    if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        MemoryRegionSection section = {
+            .mr = rb->mr,
+            .offset_within_region = 0,
+            .size = int128_make64(qemu_ram_get_used_length(rb)),
+        };
+
+        ram_discard_manager_replay_discarded(rdm, &section,
+                                             dirty_bitmap_clear_section,
+                                             &cleared_bits);
+    }
+    return cleared_bits;
+}
+
+/*
+ * Check if a host-page aligned page falls into a discarded range as managed by
+ * a RamDiscardManager responsible for the mapped memory region of the RAMBlock.
+ *
+ * Note: The result is only stable while migrating (precopy/postcopy).
+ */
+bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
+{
+    if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        MemoryRegionSection section = {
+            .mr = rb->mr,
+            .offset_within_region = start,
+            .size = int128_make64(qemu_ram_pagesize(rb)),
+        };
+
+        return !ram_discard_manager_is_populated(rdm, &section);
+    }
+    return false;
+}
+
 /* Called with RCU critical section */
 static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
 {
@@ -1187,8 +1310,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
     trace_ram_save_page(block->idstr, (uint64_t)offset, p);
 
     XBZRLE_cache_lock();
-    if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
-        migrate_use_xbzrle()) {
+    if (rs->xbzrle_enabled && !migration_in_postcopy()) {
         pages = save_xbzrle_page(rs, &p, current_addr, block,
                                  offset, last_stage);
         if (!last_stage) {
@@ -1367,8 +1489,8 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
         *again = false;
         return false;
     }
-    if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
-        >= pss->block->used_length) {
+    if (!offset_in_ramblock(pss->block,
+                            ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
         /* Didn't find anything in this RAM Block */
         pss->page = 0;
         pss->block = QLIST_NEXT_RCU(pss->block, next);
@@ -1388,7 +1510,10 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
             /* Flag that we've looped */
             pss->complete_round = true;
-            rs->ram_bulk_stage = false;
+            /* After the first round, enable XBZRLE. */
+            if (migrate_use_xbzrle()) {
+                rs->xbzrle_enabled = true;
+            }
         }
         /* Didn't find anything this time, but try again on the new block */
         *again = true;
@@ -1560,25 +1685,68 @@ bool ram_write_tracking_compatible(void)
     return ret;
 }
 
+static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
+                                       ram_addr_t size)
+{
+    /*
+     * We read one byte of each page; this will preallocate page tables if
+     * required and populate the shared zeropage on MAP_PRIVATE anonymous memory
+     * where no page was populated yet. This might require adaption when
+     * supporting other mappings, like shmem.
+     */
+    for (; offset < size; offset += block->page_size) {
+        char tmp = *((char *)block->host + offset);
+
+        /* Don't optimize the read out */
+        asm volatile("" : "+r" (tmp));
+    }
+}
+
+static inline int populate_read_section(MemoryRegionSection *section,
+                                        void *opaque)
+{
+    const hwaddr size = int128_get64(section->size);
+    hwaddr offset = section->offset_within_region;
+    RAMBlock *block = section->mr->ram_block;
+
+    populate_read_range(block, offset, size);
+    return 0;
+}
+
 /*
- * ram_block_populate_pages: populate memory in the RAM block by reading
- *   an integer from the beginning of each page.
+ * ram_block_populate_read: preallocate page tables and populate pages in the
+ *   RAM block by reading a byte of each page.
  *
  * Since it's solely used for userfault_fd WP feature, here we just
  *   hardcode page size to qemu_real_host_page_size.
  *
  * @block: RAM block to populate
  */
-static void ram_block_populate_pages(RAMBlock *block)
+static void ram_block_populate_read(RAMBlock *rb)
 {
-    char *ptr = (char *) block->host;
-
-    for (ram_addr_t offset = 0; offset < block->used_length;
-            offset += qemu_real_host_page_size) {
-        char tmp = *(ptr + offset);
-
-        /* Don't optimize the read out */
-        asm volatile("" : "+r" (tmp));
+    /*
+     * Skip populating all pages that fall into a discarded range as managed by
+     * a RamDiscardManager responsible for the mapped memory region of the
+     * RAMBlock. Such discarded ("logically unplugged") parts of a RAMBlock
+     * must not get populated automatically. We don't have to track
+     * modifications via userfaultfd WP reliably, because these pages will
+     * not be part of the migration stream either way -- see
+     * ramblock_dirty_bitmap_exclude_discarded_pages().
+     *
+     * Note: The result is only stable while migrating (precopy/postcopy).
+     */
+    if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
+        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        MemoryRegionSection section = {
+            .mr = rb->mr,
+            .offset_within_region = 0,
+            .size = rb->mr->size,
+        };
+
+        ram_discard_manager_replay_populated(rdm, &section,
+                                             populate_read_section, NULL);
+    } else {
+        populate_read_range(rb, 0, rb->used_length);
     }
 }
 
@@ -1605,7 +1773,7 @@ void ram_write_tracking_prepare(void)
          * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
          * pages with pte_none() entries in page table.
          */
-        ram_block_populate_pages(block);
+        ram_block_populate_read(block);
     }
 }
 
@@ -1802,14 +1970,6 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
     }
 
     if (block) {
-        /*
-         * As soon as we start servicing pages out of order, then we have
-         * to kill the bulk stage, since the bulk stage assumes
-         * in (migration_bitmap_find_and_reset_dirty) that every page is
-         * dirty, that's no longer true.
-         */
-        rs->ram_bulk_stage = false;
-
         /*
          * We want the background search to continue from the queued page
          * since the guest is likely to want other pages near to the page
@@ -1893,7 +2053,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
         rs->last_req_rb = ramblock;
     }
     trace_ram_save_queue_pages(ramblock->idstr, start, len);
-    if (start + len > ramblock->used_length) {
+    if (!offset_in_ramblock(ramblock, start + len - 1)) {
         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
                      __func__, start, len, ramblock->used_length);
@@ -1922,15 +2082,15 @@ static bool save_page_use_compression(RAMState *rs)
     }
 
     /*
-     * If xbzrle is on, stop using the data compression after first
-     * round of migration even if compression is enabled. In theory,
-     * xbzrle can do better than compression.
+     * If xbzrle is enabled (e.g., after first round of migration), stop
+     * using the data compression. In theory, xbzrle can do better than
+     * compression.
      */
-    if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
-        return true;
+    if (rs->xbzrle_enabled) {
+        return false;
     }
 
-    return false;
+    return true;
 }
 
 /*
@@ -2043,6 +2203,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
     int tmppages, pages = 0;
     size_t pagesize_bits =
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
+    unsigned long hostpage_boundary =
+        QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
     unsigned long start_page = pss->page;
     int res;
 
@@ -2053,25 +2215,27 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
 
     do {
         /* Check the pages is dirty and if it is send it */
-        if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
-            pss->page++;
-            continue;
-        }
+        if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
+            tmppages = ram_save_target_page(rs, pss, last_stage);
+            if (tmppages < 0) {
+                return tmppages;
+            }
 
-        tmppages = ram_save_target_page(rs, pss, last_stage);
-        if (tmppages < 0) {
-            return tmppages;
+            pages += tmppages;
+            /*
+             * Allow rate limiting to happen in the middle of huge pages if
+             * something is sent in the current iteration.
+             */
+            if (pagesize_bits > 1 && tmppages > 0) {
+                migration_rate_limit();
+            }
         }
-
-        pages += tmppages;
-        pss->page++;
-        /* Allow rate limiting to happen in the middle of huge pages */
-        migration_rate_limit();
-    } while ((pss->page & (pagesize_bits - 1)) &&
+        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
+    } while ((pss->page < hostpage_boundary) &&
              offset_in_ramblock(pss->block,
                                 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
-    /* The offset we leave with is the last one we looked at */
-    pss->page--;
+    /* The offset we leave with is the min boundary of host page and block */
+    pss->page = MIN(pss->page, hostpage_boundary) - 1;
 
     res = ram_save_release_protection(rs, pss, start_page);
     return (res < 0 ? res : pages);
@@ -2216,7 +2380,14 @@ static void ram_save_cleanup(void *opaque)
         /* caller have hold iothread lock or is in a bh, so there is
          * no writing race against the migration bitmap
          */
-        memory_global_dirty_log_stop();
+        if (global_dirty_tracking & GLOBAL_DIRTY_MIGRATION) {
+            /*
+             * do not stop dirty log without starting it, since
+             * memory_global_dirty_log_stop will assert that
+             * memory_global_dirty_log_start/stop used in pairs
+             */
+            memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
+        }
     }
 
     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
@@ -2237,8 +2408,7 @@ static void ram_state_reset(RAMState *rs)
     rs->last_sent_block = NULL;
     rs->last_page = 0;
     rs->last_version = ram_list.version;
-    rs->ram_bulk_stage = true;
-    rs->fpo_enabled = false;
+    rs->xbzrle_enabled = false;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -2669,6 +2839,19 @@ static void ram_list_init_bitmaps(void)
     }
 }
 
+static void migration_bitmap_clear_discarded_pages(RAMState *rs)
+{
+    unsigned long pages;
+    RAMBlock *rb;
+
+    RCU_READ_LOCK_GUARD();
+
+    RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
+            pages = ramblock_dirty_bitmap_clear_discarded_pages(rb);
+            rs->migration_dirty_pages -= pages;
+    }
+}
+
 static void ram_init_bitmaps(RAMState *rs)
 {
     /* For memory_global_dirty_log_start below.  */
@@ -2679,12 +2862,18 @@ static void ram_init_bitmaps(RAMState *rs)
         ram_list_init_bitmaps();
         /* We don't use dirty log with background snapshots */
         if (!migrate_background_snapshot()) {
-            memory_global_dirty_log_start();
+            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
             migration_bitmap_sync_precopy(rs);
         }
     }
     qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
+
+    /*
+     * After an eventual first bitmap sync, fixup the initial bitmap
+     * containing all 1s to exclude any discarded pages from migration.
+     */
+    migration_bitmap_clear_discarded_pages(rs);
 }
 
 static int ram_init_all(RAMState **rsp)
@@ -2722,15 +2911,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
     /* This may not be aligned with current bitmaps. Recalculate. */
     rs->migration_dirty_pages = pages;
 
-    rs->last_seen_block = NULL;
-    rs->last_sent_block = NULL;
-    rs->last_page = 0;
-    rs->last_version = ram_list.version;
-    /*
-     * Disable the bulk stage, otherwise we'll resend the whole RAM no
-     * matter what we have sent.
-     */
-    rs->ram_bulk_stage = false;
+    ram_state_reset(rs);
 
     /* Update RAMState cache of output QEMUFile */
     rs->f = out;
@@ -2778,6 +2959,13 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
         npages = used_len >> TARGET_PAGE_BITS;
 
         qemu_mutex_lock(&ram_state->bitmap_mutex);
+        /*
+         * The skipped free pages are equavalent to be sent from clear_bmap's
+         * perspective, so clear the bits from the memory region bitmap which
+         * are initially set. Otherwise those skipped pages will be sent in
+         * the next round after syncing from the memory region bitmap.
+         */
+        migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
         ram_state->migration_dirty_pages -=
                       bitmap_count_one_with_offset(block->bmap, start, npages);
         bitmap_clear(block->bmap, start, npages);
@@ -2869,6 +3057,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         goto out;
     }
 
+    /*
+     * We'll take this lock a little bit long, but it's okay for two reasons.
+     * Firstly, the only possible other thread to take it is who calls
+     * qemu_guest_free_page_hint(), which should be rare; secondly, see
+     * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which
+     * guarantees that we'll at least released it in a regular basis.
+     */
+    qemu_mutex_lock(&rs->bitmap_mutex);
     WITH_RCU_READ_LOCK_GUARD() {
         if (ram_list.version != rs->last_version) {
             ram_state_reset(rs);
@@ -2928,6 +3124,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
             i++;
         }
     }
+    qemu_mutex_unlock(&rs->bitmap_mutex);
 
     /*
      * Must occur before EOS (or any QEMUFile operation)
@@ -3120,6 +3317,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
     return block->host + offset;
 }
 
+static void *host_page_from_ram_block_offset(RAMBlock *block,
+                                             ram_addr_t offset)
+{
+    /* Note: Explicitly no check against offset_in_ramblock(). */
+    return (void *)QEMU_ALIGN_DOWN((uintptr_t)(block->host + offset),
+                                   block->page_size);
+}
+
+static ram_addr_t host_page_offset_from_ram_block_offset(RAMBlock *block,
+                                                         ram_addr_t offset)
+{
+    return ((uintptr_t)block->host + offset) & (block->page_size - 1);
+}
+
 static inline void *colo_cache_from_block_offset(RAMBlock *block,
                              ram_addr_t offset, bool record_bitmap)
 {
@@ -3347,16 +3558,9 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
     }
 }
 
- /*
-  * we must set ram_bulk_stage to false, otherwise in
-  * migation_bitmap_find_dirty the bitmap will be unused and
-  * all the pages in ram cache wil be flushed to the ram of
-  * secondary VM.
-  */
 static void colo_init_ram_state(void)
 {
     ram_state_init(&ram_state);
-    ram_state->ram_bulk_stage = false;
 }
 
 /*
@@ -3371,8 +3575,7 @@ int colo_init_ram_cache(void)
     WITH_RCU_READ_LOCK_GUARD() {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             block->colo_cache = qemu_anon_ram_alloc(block->used_length,
-                                                    NULL,
-                                                    false);
+                                                    NULL, false, false);
             if (!block->colo_cache) {
                 error_report("%s: Can't alloc memory for COLO cache of block %s,"
                              "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
@@ -3385,6 +3588,10 @@ int colo_init_ram_cache(void)
                 }
                 return -errno;
             }
+            if (!machine_dump_guest_core(current_machine)) {
+                qemu_madvise(block->colo_cache, block->used_length,
+                             QEMU_MADV_DONTDUMP);
+            }
         }
     }
 
@@ -3421,7 +3628,7 @@ void colo_incoming_start_dirty_log(void)
             /* Discard this dirty bitmap record */
             bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
         }
-        memory_global_dirty_log_start();
+        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
     }
     ram_state->migration_dirty_pages = 0;
     qemu_mutex_unlock_ramlist();
@@ -3433,7 +3640,7 @@ void colo_release_ram_cache(void)
 {
     RAMBlock *block;
 
-    memory_global_dirty_log_stop();
+    memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
         g_free(block->bmap);
         block->bmap = NULL;
@@ -3523,13 +3730,12 @@ static int ram_load_postcopy(QEMUFile *f)
     MigrationIncomingState *mis = migration_incoming_get_current();
     /* Temporary page that is later 'placed' */
     void *postcopy_host_page = mis->postcopy_tmp_page;
-    void *this_host = NULL;
+    void *host_page = NULL;
     bool all_zero = true;
     int target_pages = 0;
 
     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
         ram_addr_t addr;
-        void *host = NULL;
         void *page_buffer = NULL;
         void *place_source = NULL;
         RAMBlock *block = NULL;
@@ -3554,9 +3760,18 @@ static int ram_load_postcopy(QEMUFile *f)
         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
                      RAM_SAVE_FLAG_COMPRESS_PAGE)) {
             block = ram_block_from_stream(f, flags);
+            if (!block) {
+                ret = -EINVAL;
+                break;
+            }
 
-            host = host_from_ram_block_offset(block, addr);
-            if (!host) {
+            /*
+             * Relying on used_length is racy and can result in false positives.
+             * We might place pages beyond used_length in case RAM was shrunk
+             * while in postcopy, which is fine - trying to place via
+             * UFFDIO_COPY/UFFDIO_ZEROPAGE will never segfault.
+             */
+            if (!block->host || addr >= block->postcopy_length) {
                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
                 ret = -EINVAL;
                 break;
@@ -3574,19 +3789,17 @@ static int ram_load_postcopy(QEMUFile *f)
              * of a host page in one chunk.
              */
             page_buffer = postcopy_host_page +
-                          ((uintptr_t)host & (block->page_size - 1));
+                          host_page_offset_from_ram_block_offset(block, addr);
+            /* If all TP are zero then we can optimise the place */
             if (target_pages == 1) {
-                this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
-                                                    block->page_size);
-            } else {
+                host_page = host_page_from_ram_block_offset(block, addr);
+            } else if (host_page != host_page_from_ram_block_offset(block,
+                                                                    addr)) {
                 /* not the 1st TP within the HP */
-                if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
-                    (uintptr_t)this_host) {
-                    error_report("Non-same host page %p/%p",
-                                  host, this_host);
-                    ret = -EINVAL;
-                    break;
-                }
+                error_report("Non-same host page %p/%p", host_page,
+                             host_page_from_ram_block_offset(block, addr));
+                ret = -EINVAL;
+                break;
             }
 
             /*
@@ -3665,16 +3878,11 @@ static int ram_load_postcopy(QEMUFile *f)
         }
 
         if (!ret && place_needed) {
-            /* This gets called at the last target page in the host page */
-            void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
-                                                       block->page_size);
-
             if (all_zero) {
-                ret = postcopy_place_page_zero(mis, place_dest,
-                                               block);
+                ret = postcopy_place_page_zero(mis, host_page, block);
             } else {
-                ret = postcopy_place_page(mis, place_dest,
-                                          place_source, block);
+                ret = postcopy_place_page(mis, host_page, place_source,
+                                          block);
             }
             place_needed = false;
             target_pages = 0;
@@ -3710,6 +3918,7 @@ void colo_flush_ram_cache(void)
     unsigned long offset = 0;
 
     memory_global_dirty_log_sync();
+    qemu_mutex_lock(&ram_state->bitmap_mutex);
     WITH_RCU_READ_LOCK_GUARD() {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             ramblock_sync_dirty_bitmap(ram_state, block);
@@ -3721,23 +3930,31 @@ void colo_flush_ram_cache(void)
         block = QLIST_FIRST_RCU(&ram_list.blocks);
 
         while (block) {
-            offset = migration_bitmap_find_dirty(ram_state, block, offset);
+            unsigned long num = 0;
 
-            if (((ram_addr_t)offset) << TARGET_PAGE_BITS
-                >= block->used_length) {
+            offset = colo_bitmap_find_dirty(ram_state, block, offset, &num);
+            if (!offset_in_ramblock(block,
+                                    ((ram_addr_t)offset) << TARGET_PAGE_BITS)) {
                 offset = 0;
+                num = 0;
                 block = QLIST_NEXT_RCU(block, next);
             } else {
-                migration_bitmap_clear_dirty(ram_state, block, offset);
+                unsigned long i = 0;
+
+                for (i = 0; i < num; i++) {
+                    migration_bitmap_clear_dirty(ram_state, block, offset + i);
+                }
                 dst_host = block->host
                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
                 src_host = block->colo_cache
                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
-                memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
+                memcpy(dst_host, src_host, TARGET_PAGE_SIZE * num);
+                offset += num;
             }
         }
     }
     trace_colo_flush_ram_cache_end();
+    qemu_mutex_unlock(&ram_state->bitmap_mutex);
 }
 
 /**
@@ -4031,6 +4248,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState *s)
 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
 {
     int ret = -EINVAL;
+    /* from_dst_file is always valid because we're within rp_thread */
     QEMUFile *file = s->rp_state.from_dst_file;
     unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
     uint64_t local_size = DIV_ROUND_UP(nbits, 8);
@@ -4095,6 +4313,10 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
      */
     bitmap_complement(block->bmap, block->bmap, nbits);
 
+    /* Clear dirty bits of discarded ranges that we don't want to migrate. */
+    ramblock_dirty_bitmap_clear_discarded_pages(block);
+
+    /* We'll recalculate migration_dirty_pages in ram_state_resume_prepare(). */
     trace_ram_dirty_bitmap_reload_complete(block->idstr);
 
     /*
@@ -4138,8 +4360,68 @@ static SaveVMHandlers savevm_ram_handlers = {
     .resume_prepare = ram_resume_prepare,
 };
 
+static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
+                                      size_t old_size, size_t new_size)
+{
+    PostcopyState ps = postcopy_state_get();
+    ram_addr_t offset;
+    RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset);
+    Error *err = NULL;
+
+    if (ramblock_is_ignored(rb)) {
+        return;
+    }
+
+    if (!migration_is_idle()) {
+        /*
+         * Precopy code on the source cannot deal with the size of RAM blocks
+         * changing at random points in time - especially after sending the
+         * RAM block sizes in the migration stream, they must no longer change.
+         * Abort and indicate a proper reason.
+         */
+        error_setg(&err, "RAM block '%s' resized during precopy.", rb->idstr);
+        migration_cancel(err);
+        error_free(err);
+    }
+
+    switch (ps) {
+    case POSTCOPY_INCOMING_ADVISE:
+        /*
+         * Update what ram_postcopy_incoming_init()->init_range() does at the
+         * time postcopy was advised. Syncing RAM blocks with the source will
+         * result in RAM resizes.
+         */
+        if (old_size < new_size) {
+            if (ram_discard_range(rb->idstr, old_size, new_size - old_size)) {
+                error_report("RAM block '%s' discard of resized RAM failed",
+                             rb->idstr);
+            }
+        }
+        rb->postcopy_length = new_size;
+        break;
+    case POSTCOPY_INCOMING_NONE:
+    case POSTCOPY_INCOMING_RUNNING:
+    case POSTCOPY_INCOMING_END:
+        /*
+         * Once our guest is running, postcopy does no longer care about
+         * resizes. When growing, the new memory was not available on the
+         * source, no handler needed.
+         */
+        break;
+    default:
+        error_report("RAM block '%s' resized during postcopy state: %d",
+                     rb->idstr, ps);
+        exit(-1);
+    }
+}
+
+static RAMBlockNotifier ram_mig_ram_notifier = {
+    .ram_block_resized = ram_mig_ram_block_resized,
+};
+
 void ram_mig_init(void)
 {
     qemu_mutex_init(&XBZRLE.lock);
     register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state);
+    ram_block_notifier_add(&ram_mig_ram_notifier);
 }
diff --git a/migration/ram.h b/migration/ram.h
index 4833e9fd5be..c515396a9a3 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -50,6 +50,7 @@ bool ramblock_is_ignored(RAMBlock *block);
 int xbzrle_cache_resize(uint64_t new_size, Error **errp);
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_total(void);
+void mig_throttle_counter_reset(void);
 
 uint64_t ram_pagesize_summary(void);
 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
@@ -72,6 +73,7 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr);
 int64_t ramblock_recv_bitmap_send(QEMUFile *file,
                                   const char *block_name);
 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
+bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start);
 
 /* ram cache */
 int colo_init_ram_cache(void);
diff --git a/migration/rdma.c b/migration/rdma.c
index 00eac34232e..f5d3bbe7e9c 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -36,6 +36,7 @@
 #include <rdma/rdma_cma.h>
 #include "trace.h"
 #include "qom/object.h"
+#include <poll.h>
 
 /*
  * Print and error on both the Monitor and the Log file.
@@ -316,6 +317,7 @@ typedef struct RDMALocalBlocks {
 typedef struct RDMAContext {
     char *host;
     int port;
+    char *host_port;
 
     RDMAWorkRequestData wr_data[RDMA_WRID_MAX];
 
@@ -356,9 +358,11 @@ typedef struct RDMAContext {
     struct ibv_context          *verbs;
     struct rdma_event_channel   *channel;
     struct ibv_qp *qp;                      /* queue pair */
-    struct ibv_comp_channel *comp_channel;  /* completion channel */
+    struct ibv_comp_channel *recv_comp_channel;  /* recv completion channel */
+    struct ibv_comp_channel *send_comp_channel;  /* send completion channel */
     struct ibv_pd *pd;                      /* protection domain */
-    struct ibv_cq *cq;                      /* completion queue */
+    struct ibv_cq *recv_cq;                 /* recvieve completion queue */
+    struct ibv_cq *send_cq;                 /* send completion queue */
 
     /*
      * If a previous write failed (perhaps because of a failed
@@ -987,10 +991,12 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
         }
     }
 
+    rdma_freeaddrinfo(res);
     ERROR(errp, "could not resolve address %s", rdma->host);
     goto err_resolve_get_addr;
 
 route:
+    rdma_freeaddrinfo(res);
     qemu_rdma_dump_gid("source_resolve_addr", rdma->cm_id);
 
     ret = rdma_get_cm_event(rdma->channel, &cm_event);
@@ -1002,7 +1008,7 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
     if (cm_event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
         ERROR(errp, "result not equal to event_addr_resolved %s",
                 rdma_event_str(cm_event->event));
-        perror("rdma_resolve_addr");
+        error_report("rdma_resolve_addr");
         rdma_ack_cm_event(cm_event);
         ret = -EINVAL;
         goto err_resolve_get_addr;
@@ -1055,21 +1061,34 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
         return -1;
     }
 
-    /* create completion channel */
-    rdma->comp_channel = ibv_create_comp_channel(rdma->verbs);
-    if (!rdma->comp_channel) {
-        error_report("failed to allocate completion channel");
+    /* create receive completion channel */
+    rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs);
+    if (!rdma->recv_comp_channel) {
+        error_report("failed to allocate receive completion channel");
         goto err_alloc_pd_cq;
     }
 
     /*
-     * Completion queue can be filled by both read and write work requests,
-     * so must reflect the sum of both possible queue sizes.
+     * Completion queue can be filled by read work requests.
      */
-    rdma->cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
-            NULL, rdma->comp_channel, 0);
-    if (!rdma->cq) {
-        error_report("failed to allocate completion queue");
+    rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
+                                  NULL, rdma->recv_comp_channel, 0);
+    if (!rdma->recv_cq) {
+        error_report("failed to allocate receive completion queue");
+        goto err_alloc_pd_cq;
+    }
+
+    /* create send completion channel */
+    rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs);
+    if (!rdma->send_comp_channel) {
+        error_report("failed to allocate send completion channel");
+        goto err_alloc_pd_cq;
+    }
+
+    rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
+                                  NULL, rdma->send_comp_channel, 0);
+    if (!rdma->send_cq) {
+        error_report("failed to allocate send completion queue");
         goto err_alloc_pd_cq;
     }
 
@@ -1079,11 +1098,19 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
     if (rdma->pd) {
         ibv_dealloc_pd(rdma->pd);
     }
-    if (rdma->comp_channel) {
-        ibv_destroy_comp_channel(rdma->comp_channel);
+    if (rdma->recv_comp_channel) {
+        ibv_destroy_comp_channel(rdma->recv_comp_channel);
+    }
+    if (rdma->send_comp_channel) {
+        ibv_destroy_comp_channel(rdma->send_comp_channel);
+    }
+    if (rdma->recv_cq) {
+        ibv_destroy_cq(rdma->recv_cq);
+        rdma->recv_cq = NULL;
     }
     rdma->pd = NULL;
-    rdma->comp_channel = NULL;
+    rdma->recv_comp_channel = NULL;
+    rdma->send_comp_channel = NULL;
     return -1;
 
 }
@@ -1100,8 +1127,8 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
     attr.cap.max_recv_wr = 3;
     attr.cap.max_send_sge = 1;
     attr.cap.max_recv_sge = 1;
-    attr.send_cq = rdma->cq;
-    attr.recv_cq = rdma->cq;
+    attr.send_cq = rdma->send_cq;
+    attr.recv_cq = rdma->recv_cq;
     attr.qp_type = IBV_QPT_RC;
 
     ret = rdma_create_qp(rdma->cm_id, rdma->pd, &attr);
@@ -1113,21 +1140,84 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
     return 0;
 }
 
+/* Check whether On-Demand Paging is supported by RDAM device */
+static bool rdma_support_odp(struct ibv_context *dev)
+{
+    struct ibv_device_attr_ex attr = {0};
+    int ret = ibv_query_device_ex(dev, NULL, &attr);
+    if (ret) {
+        return false;
+    }
+
+    if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
+        return true;
+    }
+
+    return false;
+}
+
+/*
+ * ibv_advise_mr to avoid RNR NAK error as far as possible.
+ * The responder mr registering with ODP will sent RNR NAK back to
+ * the requester in the face of the page fault.
+ */
+static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr,
+                                         uint32_t len,  uint32_t lkey,
+                                         const char *name, bool wr)
+{
+#ifdef HAVE_IBV_ADVISE_MR
+    int ret;
+    int advice = wr ? IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE :
+                 IBV_ADVISE_MR_ADVICE_PREFETCH;
+    struct ibv_sge sg_list = {.lkey = lkey, .addr = addr, .length = len};
+
+    ret = ibv_advise_mr(pd, advice,
+                        IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1);
+    /* ignore the error */
+    if (ret) {
+        trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno));
+    } else {
+        trace_qemu_rdma_advise_mr(name, len, addr, "successed");
+    }
+#endif
+}
+
 static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
 {
     int i;
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
 
     for (i = 0; i < local->nb_blocks; i++) {
+        int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
+
         local->block[i].mr =
             ibv_reg_mr(rdma->pd,
                     local->block[i].local_host_addr,
-                    local->block[i].length,
-                    IBV_ACCESS_LOCAL_WRITE |
-                    IBV_ACCESS_REMOTE_WRITE
+                    local->block[i].length, access
                     );
+
+        if (!local->block[i].mr &&
+            errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+                access |= IBV_ACCESS_ON_DEMAND;
+                /* register ODP mr */
+                local->block[i].mr =
+                    ibv_reg_mr(rdma->pd,
+                               local->block[i].local_host_addr,
+                               local->block[i].length, access);
+                trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
+
+                if (local->block[i].mr) {
+                    qemu_rdma_advise_prefetch_mr(rdma->pd,
+                                    (uintptr_t)local->block[i].local_host_addr,
+                                    local->block[i].length,
+                                    local->block[i].mr->lkey,
+                                    local->block[i].block_name,
+                                    true);
+                }
+        }
+
         if (!local->block[i].mr) {
-            perror("Failed to register local dest ram block!\n");
+            perror("Failed to register local dest ram block!");
             break;
         }
         rdma->total_registrations++;
@@ -1139,6 +1229,7 @@ static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
 
     for (i--; i >= 0; i--) {
         ibv_dereg_mr(local->block[i].mr);
+        local->block[i].mr = NULL;
         rdma->total_registrations--;
     }
 
@@ -1210,28 +1301,40 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
      */
     if (!block->pmr[chunk]) {
         uint64_t len = chunk_end - chunk_start;
+        int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
+                     0;
 
         trace_qemu_rdma_register_and_get_keys(len, chunk_start);
 
-        block->pmr[chunk] = ibv_reg_mr(rdma->pd,
-                chunk_start, len,
-                (rkey ? (IBV_ACCESS_LOCAL_WRITE |
-                        IBV_ACCESS_REMOTE_WRITE) : 0));
-
-        if (!block->pmr[chunk]) {
-            perror("Failed to register chunk!");
-            fprintf(stderr, "Chunk details: block: %d chunk index %d"
-                            " start %" PRIuPTR " end %" PRIuPTR
-                            " host %" PRIuPTR
-                            " local %" PRIuPTR " registrations: %d\n",
-                            block->index, chunk, (uintptr_t)chunk_start,
-                            (uintptr_t)chunk_end, host_addr,
-                            (uintptr_t)block->local_host_addr,
-                            rdma->total_registrations);
-            return -1;
+        block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+        if (!block->pmr[chunk] &&
+            errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+            access |= IBV_ACCESS_ON_DEMAND;
+            /* register ODP mr */
+            block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+            trace_qemu_rdma_register_odp_mr(block->block_name);
+
+            if (block->pmr[chunk]) {
+                qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start,
+                                            len, block->pmr[chunk]->lkey,
+                                            block->block_name, rkey);
+
+            }
         }
-        rdma->total_registrations++;
     }
+    if (!block->pmr[chunk]) {
+        perror("Failed to register chunk!");
+        fprintf(stderr, "Chunk details: block: %d chunk index %d"
+                        " start %" PRIuPTR " end %" PRIuPTR
+                        " host %" PRIuPTR
+                        " local %" PRIuPTR " registrations: %d\n",
+                        block->index, chunk, (uintptr_t)chunk_start,
+                        (uintptr_t)chunk_end, host_addr,
+                        (uintptr_t)block->local_host_addr,
+                        rdma->total_registrations);
+        return -1;
+    }
+    rdma->total_registrations++;
 
     if (lkey) {
         *lkey = block->pmr[chunk]->lkey;
@@ -1416,14 +1519,14 @@ static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
  * (of any kind) has completed.
  * Return the work request ID that completed.
  */
-static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out,
-                               uint32_t *byte_len)
+static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq,
+                               uint64_t *wr_id_out, uint32_t *byte_len)
 {
     int ret;
     struct ibv_wc wc;
     uint64_t wr_id;
 
-    ret = ibv_poll_cq(rdma->cq, 1, &wc);
+    ret = ibv_poll_cq(cq, 1, &wc);
 
     if (!ret) {
         *wr_id_out = RDMA_WRID_NONE;
@@ -1495,7 +1598,8 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out,
 /* Wait for activity on the completion channel.
  * Returns 0 on success, none-0 on error.
  */
-static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
+static int qemu_rdma_wait_comp_channel(RDMAContext *rdma,
+                                       struct ibv_comp_channel *comp_channel)
 {
     struct rdma_cm_event *cm_event;
     int ret = -1;
@@ -1506,7 +1610,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
      */
     if (rdma->migration_started_on_destination &&
         migration_incoming_get_current()->state == MIGRATION_STATUS_ACTIVE) {
-        yield_until_fd_readable(rdma->comp_channel->fd);
+        yield_until_fd_readable(comp_channel->fd);
     } else {
         /* This is the source side, we're in a separate thread
          * or destination prior to migration_fd_process_incoming()
@@ -1517,7 +1621,7 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
          */
         while (!rdma->error_state  && !rdma->received_error) {
             GPollFD pfds[2];
-            pfds[0].fd = rdma->comp_channel->fd;
+            pfds[0].fd = comp_channel->fd;
             pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
             pfds[0].revents = 0;
 
@@ -1535,16 +1639,20 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
 
                 if (pfds[1].revents) {
                     ret = rdma_get_cm_event(rdma->channel, &cm_event);
-                    if (!ret) {
-                        rdma_ack_cm_event(cm_event);
+                    if (ret) {
+                        error_report("failed to get cm event while wait "
+                                     "completion channel");
+                        return -EPIPE;
                     }
 
                     error_report("receive cm event while wait comp channel,"
                                  "cm event is %d", cm_event->event);
                     if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
                         cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
+                        rdma_ack_cm_event(cm_event);
                         return -EPIPE;
                     }
+                    rdma_ack_cm_event(cm_event);
                 }
                 break;
 
@@ -1571,6 +1679,17 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
     return rdma->error_state;
 }
 
+static struct ibv_comp_channel *to_channel(RDMAContext *rdma, int wrid)
+{
+    return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_comp_channel :
+           rdma->recv_comp_channel;
+}
+
+static struct ibv_cq *to_cq(RDMAContext *rdma, int wrid)
+{
+    return wrid < RDMA_WRID_RECV_CONTROL ? rdma->send_cq : rdma->recv_cq;
+}
+
 /*
  * Block until the next work request has completed.
  *
@@ -1591,13 +1710,15 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
     struct ibv_cq *cq;
     void *cq_ctx;
     uint64_t wr_id = RDMA_WRID_NONE, wr_id_in;
+    struct ibv_comp_channel *ch = to_channel(rdma, wrid_requested);
+    struct ibv_cq *poll_cq = to_cq(rdma, wrid_requested);
 
-    if (ibv_req_notify_cq(rdma->cq, 0)) {
+    if (ibv_req_notify_cq(poll_cq, 0)) {
         return -1;
     }
     /* poll cq first */
     while (wr_id != wrid_requested) {
-        ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
+        ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len);
         if (ret < 0) {
             return ret;
         }
@@ -1618,12 +1739,12 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
     }
 
     while (1) {
-        ret = qemu_rdma_wait_comp_channel(rdma);
+        ret = qemu_rdma_wait_comp_channel(rdma, ch);
         if (ret) {
             goto err_block_for_wrid;
         }
 
-        ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx);
+        ret = ibv_get_cq_event(ch, &cq, &cq_ctx);
         if (ret) {
             perror("ibv_get_cq_event");
             goto err_block_for_wrid;
@@ -1637,7 +1758,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
         }
 
         while (wr_id != wrid_requested) {
-            ret = qemu_rdma_poll(rdma, &wr_id_in, byte_len);
+            ret = qemu_rdma_poll(rdma, poll_cq, &wr_id_in, byte_len);
             if (ret < 0) {
                 goto err_block_for_wrid;
             }
@@ -2353,13 +2474,21 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma_destroy_qp(rdma->cm_id);
         rdma->qp = NULL;
     }
-    if (rdma->cq) {
-        ibv_destroy_cq(rdma->cq);
-        rdma->cq = NULL;
+    if (rdma->recv_cq) {
+        ibv_destroy_cq(rdma->recv_cq);
+        rdma->recv_cq = NULL;
     }
-    if (rdma->comp_channel) {
-        ibv_destroy_comp_channel(rdma->comp_channel);
-        rdma->comp_channel = NULL;
+    if (rdma->send_cq) {
+        ibv_destroy_cq(rdma->send_cq);
+        rdma->send_cq = NULL;
+    }
+    if (rdma->recv_comp_channel) {
+        ibv_destroy_comp_channel(rdma->recv_comp_channel);
+        rdma->recv_comp_channel = NULL;
+    }
+    if (rdma->send_comp_channel) {
+        ibv_destroy_comp_channel(rdma->send_comp_channel);
+        rdma->send_comp_channel = NULL;
     }
     if (rdma->pd) {
         ibv_dealloc_pd(rdma->pd);
@@ -2390,7 +2519,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma->channel = NULL;
     }
     g_free(rdma->host);
+    g_free(rdma->host_port);
     rdma->host = NULL;
+    rdma->host_port = NULL;
 }
 
 
@@ -2455,7 +2586,36 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
     return -1;
 }
 
-static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
+static int qemu_get_cm_event_timeout(RDMAContext *rdma,
+                                     struct rdma_cm_event **cm_event,
+                                     long msec, Error **errp)
+{
+    int ret;
+    struct pollfd poll_fd = {
+                                .fd = rdma->channel->fd,
+                                .events = POLLIN,
+                                .revents = 0
+                            };
+
+    do {
+        ret = poll(&poll_fd, 1, msec);
+    } while (ret < 0 && errno == EINTR);
+
+    if (ret == 0) {
+        ERROR(errp, "poll cm event timeout");
+        return -1;
+    } else if (ret < 0) {
+        ERROR(errp, "failed to poll cm event, errno=%i", errno);
+        return -1;
+    } else if (poll_fd.revents & POLLIN) {
+        return rdma_get_cm_event(rdma->channel, cm_event);
+    } else {
+        ERROR(errp, "no POLLIN event, revent=%x", poll_fd.revents);
+        return -1;
+    }
+}
+
+static int qemu_rdma_connect(RDMAContext *rdma, Error **errp, bool return_path)
 {
     RDMACapabilities cap = {
                                 .version = RDMA_CONTROL_VERSION_CURRENT,
@@ -2493,16 +2653,19 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         goto err_rdma_source_connect;
     }
 
-    ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    if (return_path) {
+        ret = qemu_get_cm_event_timeout(rdma, &cm_event, 5000, errp);
+    } else {
+        ret = rdma_get_cm_event(rdma->channel, &cm_event);
+    }
     if (ret) {
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
-        rdma_ack_cm_event(cm_event);
         goto err_rdma_source_connect;
     }
 
     if (cm_event->event != RDMA_CM_EVENT_ESTABLISHED) {
-        perror("rdma_get_cm_event != EVENT_ESTABLISHED after rdma_connect");
+        error_report("rdma_get_cm_event != EVENT_ESTABLISHED after rdma_connect");
         ERROR(errp, "connecting to destination!");
         rdma_ack_cm_event(cm_event);
         goto err_rdma_source_connect;
@@ -2594,6 +2757,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
         break;
     }
 
+    rdma_freeaddrinfo(res);
     if (!e) {
         ERROR(errp, "Error: could not rdma_bind_addr!");
         goto err_dest_init_bind_addr;
@@ -2646,6 +2810,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
         if (!inet_parse(addr, host_port, NULL)) {
             rdma->port = atoi(addr->port);
             rdma->host = g_strdup(addr->host);
+            rdma->host_port = g_strdup(host_port);
         } else {
             ERROR(errp, "bad RDMA migration address '%s'", host_port);
             g_free(rdma);
@@ -2995,10 +3160,14 @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     if (io_read) {
-        aio_set_fd_handler(ctx, rioc->rdmain->comp_channel->fd,
+        aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd,
+                           false, io_read, io_write, NULL, opaque);
+        aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd,
                            false, io_read, io_write, NULL, opaque);
     } else {
-        aio_set_fd_handler(ctx, rioc->rdmaout->comp_channel->fd,
+        aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd,
+                           false, io_read, io_write, NULL, opaque);
+        aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd,
                            false, io_read, io_write, NULL, opaque);
     }
 }
@@ -3212,7 +3381,22 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
      */
     while (1) {
         uint64_t wr_id, wr_id_in;
-        int ret = qemu_rdma_poll(rdma, &wr_id_in, NULL);
+        int ret = qemu_rdma_poll(rdma, rdma->recv_cq, &wr_id_in, NULL);
+        if (ret < 0) {
+            error_report("rdma migration: polling error! %d", ret);
+            goto err;
+        }
+
+        wr_id = wr_id_in & RDMA_WRID_TYPE_MASK;
+
+        if (wr_id == RDMA_WRID_NONE) {
+            break;
+        }
+    }
+
+    while (1) {
+        uint64_t wr_id, wr_id_in;
+        int ret = qemu_rdma_poll(rdma, rdma->send_cq, &wr_id_in, NULL);
         if (ret < 0) {
             error_report("rdma migration: polling error! %d", ret);
             goto err;
@@ -3245,7 +3429,6 @@ static void rdma_cm_poll_handler(void *opaque)
         error_report("get_cm_event failed %d", errno);
         return;
     }
-    rdma_ack_cm_event(cm_event);
 
     if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
         cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
@@ -3258,12 +3441,14 @@ static void rdma_cm_poll_handler(void *opaque)
                 rdma->return_path->error_state = -EPIPE;
             }
         }
+        rdma_ack_cm_event(cm_event);
 
         if (mis->migration_incoming_co) {
             qemu_coroutine_enter(mis->migration_incoming_co);
         }
         return;
     }
+    rdma_ack_cm_event(cm_event);
 }
 
 static int qemu_rdma_accept(RDMAContext *rdma)
@@ -3274,6 +3459,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
                                             .private_data = &cap,
                                             .private_data_len = sizeof(cap),
                                          };
+    RDMAContext *rdma_return_path = NULL;
     struct rdma_cm_event *cm_event;
     struct ibv_context *verbs;
     int ret = -EINVAL;
@@ -3289,6 +3475,20 @@ static int qemu_rdma_accept(RDMAContext *rdma)
         goto err_rdma_dest_wait;
     }
 
+    /*
+     * initialize the RDMAContext for return path for postcopy after first
+     * connection request reached.
+     */
+    if (migrate_postcopy() && !rdma->is_return_path) {
+        rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL);
+        if (rdma_return_path == NULL) {
+            rdma_ack_cm_event(cm_event);
+            goto err_rdma_dest_wait;
+        }
+
+        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
+    }
+
     memcpy(&cap, cm_event->param.conn.private_data, sizeof(cap));
 
     network_to_caps(&cap);
@@ -3404,6 +3604,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 err_rdma_dest_wait:
     rdma->error_state = ret;
     qemu_rdma_cleanup(rdma);
+    g_free(rdma_return_path);
     return ret;
 }
 
@@ -4041,29 +4242,22 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
 
     if (ret) {
         ERROR(errp, "listening on socket!");
-        goto err;
+        goto cleanup_rdma;
     }
 
     trace_rdma_start_incoming_migration_after_rdma_listen();
 
-    /* initialize the RDMAContext for return path */
-    if (migrate_postcopy()) {
-        rdma_return_path = qemu_rdma_data_init(host_port, &local_err);
-
-        if (rdma_return_path == NULL) {
-            goto err;
-        }
-
-        qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
-    }
-
     qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
                         NULL, (void *)(intptr_t)rdma);
     return;
+
+cleanup_rdma:
+    qemu_rdma_cleanup(rdma);
 err:
     error_propagate(errp, local_err);
     if (rdma) {
         g_free(rdma->host);
+        g_free(rdma->host_port);
     }
     g_free(rdma);
     g_free(rdma_return_path);
@@ -4096,7 +4290,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, errp);
+    ret = qemu_rdma_connect(rdma, errp, false);
 
     if (ret) {
         goto err;
@@ -4117,7 +4311,7 @@ void rdma_start_outgoing_migration(void *opaque,
             goto return_path_err;
         }
 
-        ret = qemu_rdma_connect(rdma_return_path, errp);
+        ret = qemu_rdma_connect(rdma_return_path, errp, true);
 
         if (ret) {
             goto return_path_err;
diff --git a/migration/savevm.c b/migration/savevm.c
index 52e2d72e4b0..d59e976d50e 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -65,6 +65,7 @@
 #include "qemu/bitmap.h"
 #include "net/announce.h"
 #include "qemu/yank.h"
+#include "yank_functions.h"
 
 const unsigned int postcopy_ram_discard_version;
 
@@ -168,9 +169,9 @@ static const QEMUFileOps bdrv_write_ops = {
 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
 {
     if (is_writable) {
-        return qemu_fopen_ops(bs, &bdrv_write_ops);
+        return qemu_fopen_ops(bs, &bdrv_write_ops, false);
     }
-    return qemu_fopen_ops(bs, &bdrv_read_ops);
+    return qemu_fopen_ops(bs, &bdrv_read_ops, false);
 }
 
 
@@ -1566,6 +1567,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 
     migrate_init(ms);
     memset(&ram_counters, 0, sizeof(ram_counters));
+    memset(&compression_counters, 0, sizeof(compression_counters));
     ms->to_dst_file = f;
 
     qemu_mutex_unlock_iothread();
@@ -2568,6 +2570,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
     /* Clear the triggered bit to allow one recovery */
     mis->postcopy_recover_triggered = false;
 
+    /*
+     * Unregister yank with either from/to src would work, since ioc behind it
+     * is the same
+     */
+    migration_ioc_unregister_yank_from_file(mis->from_src_file);
+
     assert(mis->from_src_file);
     qemu_file_shutdown(mis->from_src_file);
     qemu_fclose(mis->from_src_file);
@@ -2775,8 +2783,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
     QEMUFile *f;
     int saved_vm_running;
     uint64_t vm_state_size;
-    qemu_timeval tv;
-    struct tm tm;
+    g_autoptr(GDateTime) now = g_date_time_new_now_local();
     AioContext *aio_context;
 
     if (migration_is_blocked(errp)) {
@@ -2836,9 +2843,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
     memset(sn, 0, sizeof(*sn));
 
     /* fill auxiliary fields */
-    qemu_gettimeofday(&tv);
-    sn->date_sec = tv.tv_sec;
-    sn->date_nsec = tv.tv_usec * 1000;
+    sn->date_sec = g_date_time_to_unix(now);
+    sn->date_nsec = g_date_time_get_microsecond(now) * 1000;
     sn->vm_clock_nsec = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     if (replay_mode != REPLAY_MODE_NONE) {
         sn->icount = replay_get_current_icount();
@@ -2849,9 +2855,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
     if (name) {
         pstrcpy(sn->name, sizeof(sn->name), name);
     } else {
-        /* cast below needed for OpenBSD where tv_sec is still 'long' */
-        localtime_r((const time_t *)&tv.tv_sec, &tm);
-        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
+        g_autofree char *autoname = g_date_time_format(now,  "vm-%Y%m%d%H%M%S");
+        pstrcpy(sn->name, sizeof(sn->name), autoname);
     }
 
     /* save the VM state */
diff --git a/migration/socket.c b/migration/socket.c
index 6016642e040..05705a32d8f 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -126,22 +126,31 @@ static void socket_accept_incoming_migration(QIONetListener *listener,
 {
     trace_migration_socket_incoming_accepted();
 
-    qio_channel_set_name(QIO_CHANNEL(cioc), "migration-socket-incoming");
-    migration_channel_process_incoming(QIO_CHANNEL(cioc));
-
     if (migration_has_all_channels()) {
-        /* Close listening socket as its no longer needed */
-        qio_net_listener_disconnect(listener);
-        object_unref(OBJECT(listener));
+        error_report("%s: Extra incoming migration connection; ignoring",
+                     __func__);
+        return;
     }
+
+    qio_channel_set_name(QIO_CHANNEL(cioc), "migration-socket-incoming");
+    migration_channel_process_incoming(QIO_CHANNEL(cioc));
 }
 
+static void
+socket_incoming_migration_end(void *opaque)
+{
+    QIONetListener *listener = opaque;
+
+    qio_net_listener_disconnect(listener);
+    object_unref(OBJECT(listener));
+}
 
 static void
 socket_start_incoming_migration_internal(SocketAddress *saddr,
                                          Error **errp)
 {
     QIONetListener *listener = qio_net_listener_new();
+    MigrationIncomingState *mis = migration_incoming_get_current();
     size_t i;
     int num = 1;
 
@@ -156,6 +165,9 @@ socket_start_incoming_migration_internal(SocketAddress *saddr,
         return;
     }
 
+    mis->transport_data = listener;
+    mis->transport_cleanup = socket_incoming_migration_end;
+
     qio_net_listener_set_client_func_full(listener,
                                           socket_accept_incoming_migration,
                                           NULL, NULL,
diff --git a/migration/target.c b/migration/target.c
new file mode 100644
index 00000000000..907ebf0a0af
--- /dev/null
+++ b/migration/target.c
@@ -0,0 +1,25 @@
+/*
+ * QEMU live migration - functions that need to be compiled target-specific
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2
+ * or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration.h"
+
+#ifdef CONFIG_VFIO
+#include "hw/vfio/vfio-common.h"
+#endif
+
+void populate_vfio_info(MigrationInfo *info)
+{
+#ifdef CONFIG_VFIO
+    if (vfio_mig_active()) {
+        info->has_vfio = true;
+        info->vfio = g_malloc0(sizeof(*info->vfio));
+        info->vfio->transferred = vfio_mig_bytes_transferred();
+    }
+#endif
+}
diff --git a/migration/tls.c b/migration/tls.c
index abb149d8325..ca1ea3bbdd4 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -49,11 +49,7 @@ migration_tls_get_creds(MigrationState *s,
                    s->parameters.tls_creds);
         return NULL;
     }
-    if (ret->endpoint != endpoint) {
-        error_setg(errp,
-                   "Expected TLS credentials for a %s endpoint",
-                   endpoint == QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT ?
-                   "client" : "server");
+    if (!qcrypto_tls_creds_check_endpoint(ret, endpoint, errp)) {
         return NULL;
     }
 
diff --git a/migration/trace-events b/migration/trace-events
index 668c562fed7..b48d873b8a0 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # savevm.c
 qemu_loadvm_state_section(unsigned int section_type) "%d"
@@ -149,6 +149,7 @@ migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint
 migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
 migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
 migration_completion_file_err(void) ""
+migration_completion_vm_stop(int ret) "ret %d"
 migration_completion_postcopy_end(void) ""
 migration_completion_postcopy_end_after_complete(void) ""
 migration_rate_limit_pre(int ms) "%d ms"
@@ -211,6 +212,8 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block
 qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d"
 qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.."
 qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p"
+qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s"
+qemu_rdma_advise_mr(const char *name, uint32_t len, uint64_t addr, const char *res) "Try to advise block %s prefetch at %" PRIu32 "@0x%" PRIx64 ": %s"
 qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
 qemu_rdma_registration_handle_finished(void) ""
 qemu_rdma_registration_handle_ram_blocks(void) ""
@@ -330,6 +333,8 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n
 calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32
 skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64
 find_page_matched(const char *idstr) "ramblock %s addr or size changed"
+dirtyrate_calculate(int64_t dirtyrate) "dirty rate: %" PRIi64 " MB/s"
+dirtyrate_do_calculate_vcpu(int idx, uint64_t rate) "vcpu[%d]: %"PRIu64 " MB/s"
 
 # block.c
 migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image"
diff --git a/migration/yank_functions.c b/migration/yank_functions.c
index 96c90e17dc1..8c08aef14a8 100644
--- a/migration/yank_functions.c
+++ b/migration/yank_functions.c
@@ -11,6 +11,10 @@
 #include "qapi/error.h"
 #include "io/channel.h"
 #include "yank_functions.h"
+#include "qemu/yank.h"
+#include "io/channel-socket.h"
+#include "io/channel-tls.h"
+#include "qemu-file.h"
 
 void migration_yank_iochannel(void *opaque)
 {
@@ -18,3 +22,41 @@ void migration_yank_iochannel(void *opaque)
 
     qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
 }
+
+/* Return whether yank is supported on this ioc */
+static bool migration_ioc_yank_supported(QIOChannel *ioc)
+{
+    return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
+        object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS);
+}
+
+void migration_ioc_register_yank(QIOChannel *ioc)
+{
+    if (migration_ioc_yank_supported(ioc)) {
+        yank_register_function(MIGRATION_YANK_INSTANCE,
+                               migration_yank_iochannel,
+                               QIO_CHANNEL(ioc));
+    }
+}
+
+void migration_ioc_unregister_yank(QIOChannel *ioc)
+{
+    if (migration_ioc_yank_supported(ioc)) {
+        yank_unregister_function(MIGRATION_YANK_INSTANCE,
+                                 migration_yank_iochannel,
+                                 QIO_CHANNEL(ioc));
+    }
+}
+
+void migration_ioc_unregister_yank_from_file(QEMUFile *file)
+{
+    QIOChannel *ioc = qemu_file_get_ioc(file);
+
+    if (ioc) {
+        /*
+         * For migration qemufiles, we'll always reach here.  Though we'll skip
+         * calls from e.g. savevm/loadvm as they don't use yank.
+         */
+        migration_ioc_unregister_yank(ioc);
+    }
+}
diff --git a/migration/yank_functions.h b/migration/yank_functions.h
index 055ea225231..a7577955ed6 100644
--- a/migration/yank_functions.h
+++ b/migration/yank_functions.h
@@ -15,3 +15,6 @@
  * @opaque: QIOChannel to shutdown
  */
 void migration_yank_iochannel(void *opaque);
+void migration_ioc_register_yank(QIOChannel *ioc);
+void migration_ioc_unregister_yank(QIOChannel *ioc);
+void migration_ioc_unregister_yank_from_file(QEMUFile *file);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 0ad5b774778..9c91bf93e94 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -52,9 +52,7 @@
 #include "ui/console.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
-#include "exec/ramlist.h"
 #include "hw/intc/intc.h"
-#include "hw/rdma/rdma.h"
 #include "migration/snapshot.h"
 #include "migration/misc.h"
 
@@ -62,11 +60,13 @@
 #include <spice/enums.h>
 #endif
 
-void hmp_handle_error(Monitor *mon, Error *err)
+bool hmp_handle_error(Monitor *mon, Error *err)
 {
     if (err) {
         error_reportf_err(err, "Error: ");
+        return true;
     }
+    return false;
 }
 
 /*
@@ -224,7 +224,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 
     migration_global_dump(mon);
 
-    if (info->blocked) {
+    if (info->blocked_reasons) {
         strList *reasons = info->blocked_reasons;
         monitor_printf(mon, "Outgoing migration blocked:\n");
         while (reasons) {
@@ -577,8 +577,7 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict)
 
     info2l = qmp_query_vnc_servers(&err);
     info2l_head = info2l;
-    if (err) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
     if (!info2l) {
@@ -693,8 +692,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
     Error *err = NULL;
 
     info = qmp_query_balloon(&err);
-    if (err) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -786,44 +784,6 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
     }
 }
 
-static int hmp_info_irq_foreach(Object *obj, void *opaque)
-{
-    InterruptStatsProvider *intc;
-    InterruptStatsProviderClass *k;
-    Monitor *mon = opaque;
-
-    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
-        intc = INTERRUPT_STATS_PROVIDER(obj);
-        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
-        uint64_t *irq_counts;
-        unsigned int nb_irqs, i;
-        if (k->get_statistics &&
-            k->get_statistics(intc, &irq_counts, &nb_irqs)) {
-            if (nb_irqs > 0) {
-                monitor_printf(mon, "IRQ statistics for %s:\n",
-                               object_get_typename(obj));
-                for (i = 0; i < nb_irqs; i++) {
-                    if (irq_counts[i] > 0) {
-                        monitor_printf(mon, "%2d: %" PRId64 "\n", i,
-                                       irq_counts[i]);
-                    }
-                }
-            }
-        } else {
-            monitor_printf(mon, "IRQ statistics not available for %s.\n",
-                           object_get_typename(obj));
-        }
-    }
-
-    return 0;
-}
-
-void hmp_info_irq(Monitor *mon, const QDict *qdict)
-{
-    object_child_foreach_recursive(object_get_root(),
-                                   hmp_info_irq_foreach, mon);
-}
-
 static int hmp_info_pic_foreach(Object *obj, void *opaque)
 {
     InterruptStatsProvider *intc;
@@ -850,32 +810,6 @@ void hmp_info_pic(Monitor *mon, const QDict *qdict)
                                    hmp_info_pic_foreach, mon);
 }
 
-static int hmp_info_rdma_foreach(Object *obj, void *opaque)
-{
-    RdmaProvider *rdma;
-    RdmaProviderClass *k;
-    Monitor *mon = opaque;
-
-    if (object_dynamic_cast(obj, INTERFACE_RDMA_PROVIDER)) {
-        rdma = RDMA_PROVIDER(obj);
-        k = RDMA_PROVIDER_GET_CLASS(obj);
-        if (k->print_statistics) {
-            k->print_statistics(mon, rdma);
-        } else {
-            monitor_printf(mon, "RDMA statistics not available for %s.\n",
-                           object_get_typename(obj));
-        }
-    }
-
-    return 0;
-}
-
-void hmp_info_rdma(Monitor *mon, const QDict *qdict)
-{
-    object_child_foreach_recursive(object_get_root(),
-                                   hmp_info_rdma_foreach, mon);
-}
-
 void hmp_info_pci(Monitor *mon, const QDict *qdict)
 {
     PciInfoList *info_list, *info;
@@ -901,6 +835,7 @@ void hmp_info_pci(Monitor *mon, const QDict *qdict)
 
 void hmp_info_tpm(Monitor *mon, const QDict *qdict)
 {
+#ifdef CONFIG_TPM
     TPMInfoList *info_list, *info;
     Error *err = NULL;
     unsigned int c = 0;
@@ -924,10 +859,10 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict)
                        c, TpmModel_str(ti->model));
 
         monitor_printf(mon, "  \\ %s: type=%s",
-                       ti->id, TpmTypeOptionsKind_str(ti->options->type));
+                       ti->id, TpmType_str(ti->options->type));
 
         switch (ti->options->type) {
-        case TPM_TYPE_OPTIONS_KIND_PASSTHROUGH:
+        case TPM_TYPE_PASSTHROUGH:
             tpo = ti->options->u.passthrough.data;
             monitor_printf(mon, "%s%s%s%s",
                            tpo->has_path ? ",path=" : "",
@@ -935,17 +870,20 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict)
                            tpo->has_cancel_path ? ",cancel-path=" : "",
                            tpo->has_cancel_path ? tpo->cancel_path : "");
             break;
-        case TPM_TYPE_OPTIONS_KIND_EMULATOR:
+        case TPM_TYPE_EMULATOR:
             teo = ti->options->u.emulator.data;
             monitor_printf(mon, ",chardev=%s", teo->chardev);
             break;
-        case TPM_TYPE_OPTIONS_KIND__MAX:
+        case TPM_TYPE__MAX:
             break;
         }
         monitor_printf(mon, "\n");
         c++;
     }
     qapi_free_TPMInfoList(info_list);
+#else
+    monitor_printf(mon, "TPM device not supported\n");
+#endif /* CONFIG_TPM */
 }
 
 void hmp_quit(Monitor *mon, const QDict *qdict)
@@ -1061,8 +999,7 @@ void hmp_ringbuf_read(Monitor *mon, const QDict *qdict)
     int i;
 
     data = qmp_ringbuf_read(chardev, size, false, 0, &err);
-    if (err) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -1133,7 +1070,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict)
 
     vm_stop(RUN_STATE_RESTORE_VM);
 
-    if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) {
+    if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) {
         vm_start();
     }
     hmp_handle_error(mon, err);
@@ -1492,7 +1429,7 @@ void hmp_change(Monitor *mon, const QDict *qdict)
         }
         if (strcmp(target, "passwd") == 0 ||
             strcmp(target, "password") == 0) {
-            if (arg) {
+            if (!arg) {
                 MonitorHMP *hmp_mon = container_of(mon, MonitorHMP, common);
                 monitor_read_password(hmp_mon, hmp_change_read_arg, NULL);
                 return;
@@ -1578,8 +1515,7 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
 
     qmp_migrate(uri, !!blk, blk, !!inc, inc,
                 false, false, true, resume, &err);
-    if (err) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -1819,6 +1755,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
     VirtioMEMDeviceInfo *vmi;
     MemoryDeviceInfo *value;
     PCDIMMDeviceInfo *di;
+    SgxEPCDeviceInfo *se;
 
     for (info = info_list; info; info = info->next) {
         value = info->value;
@@ -1866,6 +1803,15 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
                                vmi->block_size);
                 monitor_printf(mon, "  memdev: %s\n", vmi->memdev);
                 break;
+            case MEMORY_DEVICE_INFO_KIND_SGX_EPC:
+                se = value->u.sgx_epc.data;
+                monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
+                               MemoryDeviceInfoKind_str(value->type),
+                               se->id ? se->id : "");
+                monitor_printf(mon, "  memaddr: 0x%" PRIx64 "\n", se->memaddr);
+                monitor_printf(mon, "  size: %" PRIu64 "\n", se->size);
+                monitor_printf(mon, "  memdev: %s\n", se->memdev);
+                break;
             default:
                 g_assert_not_reached();
             }
@@ -1889,6 +1835,8 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
         monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
         monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
+        monitor_printf(mon, "  aio-max-batch=%" PRId64 "\n",
+                       value->aio_max_batch);
     }
 
     qapi_free_IOThreadInfoList(info_list);
@@ -1901,8 +1849,7 @@ void hmp_rocker(Monitor *mon, const QDict *qdict)
     Error *err = NULL;
 
     rocker = qmp_query_rocker(name, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -1920,8 +1867,7 @@ void hmp_rocker_ports(Monitor *mon, const QDict *qdict)
     Error *err = NULL;
 
     list = qmp_query_rocker_ports(name, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -1929,7 +1875,7 @@ void hmp_rocker_ports(Monitor *mon, const QDict *qdict)
     monitor_printf(mon, "      port  link    duplex neg?\n");
 
     for (port = list; port; port = port->next) {
-        monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %-3s\n",
+        monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %s\n",
                        port->value->name,
                        port->value->enabled ? port->value->link_up ?
                        "up" : "down" : "!ena",
@@ -1949,8 +1895,7 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict)
     Error *err = NULL;
 
     list = qmp_query_rocker_of_dpa_flows(name, tbl_id != -1, tbl_id, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -2099,8 +2044,7 @@ void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict)
     Error *err = NULL;
 
     list = qmp_query_rocker_of_dpa_groups(name, type != 9, type, &err);
-    if (err != NULL) {
-        hmp_handle_error(mon, err);
+    if (hmp_handle_error(mon, err)) {
         return;
     }
 
@@ -2193,11 +2137,6 @@ void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict)
     qapi_free_RockerOfDpaGroupList(list);
 }
 
-void hmp_info_ramblock(Monitor *mon, const QDict *qdict)
-{
-    ram_block_dump(mon);
-}
-
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
diff --git a/monitor/hmp.c b/monitor/hmp.c
index 6c0b33a0b19..b20737e63c3 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -26,6 +26,7 @@
 #include <dirent.h>
 #include "hw/qdev-core.h"
 #include "monitor-internal.h"
+#include "monitor/hmp.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qnum.h"
@@ -1061,6 +1062,31 @@ static QDict *monitor_parse_arguments(Monitor *mon,
     return NULL;
 }
 
+static void hmp_info_human_readable_text(Monitor *mon,
+                                         HumanReadableText *(*handler)(Error **))
+{
+    Error *err = NULL;
+    g_autoptr(HumanReadableText) info = handler(&err);
+
+    if (hmp_handle_error(mon, err)) {
+        return;
+    }
+
+    monitor_printf(mon, "%s", info->human_readable_text);
+}
+
+static void handle_hmp_command_exec(Monitor *mon,
+                                    const HMPCommand *cmd,
+                                    QDict *qdict)
+{
+    if (cmd->cmd_info_hrt) {
+        hmp_info_human_readable_text(mon,
+                                     cmd->cmd_info_hrt);
+    } else {
+        cmd->cmd(mon, qdict);
+    }
+}
+
 typedef struct HandleHmpCommandCo {
     Monitor *mon;
     const HMPCommand *cmd;
@@ -1071,7 +1097,7 @@ typedef struct HandleHmpCommandCo {
 static void handle_hmp_command_co(void *opaque)
 {
     HandleHmpCommandCo *data = opaque;
-    data->cmd->cmd(data->mon, data->qdict);
+    handle_hmp_command_exec(data->mon, data->cmd, data->qdict);
     monitor_set_cur(qemu_coroutine_self(), NULL);
     data->done = true;
 }
@@ -1089,6 +1115,13 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
         return;
     }
 
+    if (!cmd->cmd && !cmd->cmd_info_hrt) {
+        /* FIXME: is it useful to try autoload modules here ??? */
+        monitor_printf(&mon->common, "Command \"%.*s\" is not available.\n",
+                       (int)(cmdline - cmd_start), cmd_start);
+        return;
+    }
+
     qdict = monitor_parse_arguments(&mon->common, &cmdline, cmd);
     if (!qdict) {
         while (cmdline > cmd_start && qemu_isspace(cmdline[-1])) {
@@ -1102,7 +1135,7 @@ void handle_hmp_command(MonitorHMP *mon, const char *cmdline)
     if (!cmd->coroutine) {
         /* old_mon is non-NULL when called from qmp_human_monitor_command() */
         Monitor *old_mon = monitor_set_cur(qemu_coroutine_self(), &mon->common);
-        cmd->cmd(&mon->common, qdict);
+        handle_hmp_command_exec(&mon->common, cmd, qdict);
         monitor_set_cur(qemu_coroutine_self(), old_mon);
     } else {
         HandleHmpCommandCo data = {
diff --git a/monitor/misc.c b/monitor/misc.c
index 54577c0a385..c940dde52d9 100644
--- a/monitor/misc.c
+++ b/monitor/misc.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "monitor-internal.h"
-#include "cpu.h"
 #include "monitor/qdev.h"
 #include "hw/usb.h"
 #include "hw/pci/pci.h"
@@ -71,7 +70,9 @@
 #include "qapi/qapi-commands-migration.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qapi-commands-qom.h"
+#include "qapi/qapi-commands-run-state.h"
 #include "qapi/qapi-commands-trace.h"
+#include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-init-commands.h"
 #include "qapi/error.h"
 #include "qapi/qmp-event.h"
@@ -232,12 +233,13 @@ static void monitor_init_qmp_commands(void)
 
     qmp_init_marshal(&qmp_commands);
 
-    qmp_register_command(&qmp_commands, "device_add", qmp_device_add,
-                         QCO_NO_OPTIONS);
+    qmp_register_command(&qmp_commands, "device_add",
+                         qmp_device_add, 0, 0);
 
     QTAILQ_INIT(&qmp_cap_negotiation_commands);
     qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
-                         qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
+                         qmp_marshal_qmp_capabilities,
+                         QCO_ALLOW_PRECONFIG, 0);
 }
 
 /* Set the current CPU defined by the user. Callers must hold BQL. */
@@ -322,24 +324,6 @@ static void hmp_info_registers(Monitor *mon, const QDict *qdict)
     }
 }
 
-#ifdef CONFIG_TCG
-static void hmp_info_jit(Monitor *mon, const QDict *qdict)
-{
-    if (!tcg_enabled()) {
-        error_report("JIT information is only available with accel=tcg");
-        return;
-    }
-
-    dump_exec_info();
-    dump_drift_info();
-}
-
-static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
-{
-    dump_opcount_info();
-}
-#endif
-
 static void hmp_info_sync_profile(Monitor *mon, const QDict *qdict)
 {
     int64_t max = qdict_get_try_int(qdict, "max", 10);
@@ -371,17 +355,6 @@ static void hmp_info_history(Monitor *mon, const QDict *qdict)
     }
 }
 
-static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
-{
-    CPUState *cs = mon_get_cpu(mon);
-
-    if (!cs) {
-        monitor_printf(mon, "No CPU available\n");
-        return;
-    }
-    cpu_dump_statistics(cs, 0);
-}
-
 static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
 {
     const char *name = qdict_get_try_str(qdict, "name");
@@ -510,10 +483,18 @@ static void hmp_gdbserver(Monitor *mon, const QDict *qdict)
 
 static void hmp_watchdog_action(Monitor *mon, const QDict *qdict)
 {
-    const char *action = qdict_get_str(qdict, "action");
-    if (select_watchdog_action(action) == -1) {
-        monitor_printf(mon, "Unknown watchdog action '%s'\n", action);
+    Error *err = NULL;
+    WatchdogAction action;
+    char *qapi_value;
+
+    qapi_value = g_ascii_strdown(qdict_get_str(qdict, "action"), -1);
+    action = qapi_enum_parse(&WatchdogAction_lookup, qapi_value, -1, &err);
+    g_free(qapi_value);
+    if (err) {
+        hmp_handle_error(mon, err);
+        return;
     }
+    qmp_watchdog_set_action(action, &error_abort);
 }
 
 static void monitor_printc(Monitor *mon, int c)
@@ -969,33 +950,6 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict)
     mtree_info(flatview, dispatch_tree, owner, disabled);
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-static void hmp_info_profile(Monitor *mon, const QDict *qdict)
-{
-    static int64_t last_cpu_exec_time;
-    int64_t cpu_exec_time;
-    int64_t delta;
-
-    cpu_exec_time = tcg_cpu_exec_time();
-    delta = cpu_exec_time - last_cpu_exec_time;
-
-    monitor_printf(mon, "async time  %" PRId64 " (%0.3f)\n",
-                   dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
-    monitor_printf(mon, "qemu time   %" PRId64 " (%0.3f)\n",
-                   delta, delta / (double)NANOSECONDS_PER_SECOND);
-    last_cpu_exec_time = cpu_exec_time;
-    dev_time = 0;
-}
-#else
-static void hmp_info_profile(Monitor *mon, const QDict *qdict)
-{
-    monitor_printf(mon, "Internal profiler not compiled\n");
-}
-#endif
-
 /* Capture support */
 static QLIST_HEAD (capture_list_head, CaptureState) capture_head;
 
@@ -1844,7 +1798,7 @@ void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *st
         TraceEventIter iter;
         TraceEvent *ev;
         char *pattern = g_strdup_printf("%s*", str);
-        trace_event_iter_init(&iter, pattern);
+        trace_event_iter_init_pattern(&iter, pattern);
         while ((ev = trace_event_iter_next(&iter)) != NULL) {
             readline_add_completion(rs, trace_event_get_name(ev));
         }
@@ -1862,7 +1816,7 @@ void trace_event_completion(ReadLineState *rs, int nb_args, const char *str)
         TraceEventIter iter;
         TraceEvent *ev;
         char *pattern = g_strdup_printf("%s*", str);
-        trace_event_iter_init(&iter, pattern);
+        trace_event_iter_init_pattern(&iter, pattern);
         while ((ev = trace_event_iter_next(&iter)) != NULL) {
             readline_add_completion(rs, trace_event_get_name(ev));
         }
@@ -1996,6 +1950,38 @@ static void sortcmdlist(void)
           compare_mon_cmd);
 }
 
+void monitor_register_hmp(const char *name, bool info,
+                          void (*cmd)(Monitor *mon, const QDict *qdict))
+{
+    HMPCommand *table = info ? hmp_info_cmds : hmp_cmds;
+
+    while (table->name != NULL) {
+        if (strcmp(table->name, name) == 0) {
+            g_assert(table->cmd == NULL && table->cmd_info_hrt == NULL);
+            table->cmd = cmd;
+            return;
+        }
+        table++;
+    }
+    g_assert_not_reached();
+}
+
+void monitor_register_hmp_info_hrt(const char *name,
+                                   HumanReadableText *(*handler)(Error **errp))
+{
+    HMPCommand *table = hmp_info_cmds;
+
+    while (table->name != NULL) {
+        if (strcmp(table->name, name) == 0) {
+            g_assert(table->cmd == NULL && table->cmd_info_hrt == NULL);
+            table->cmd_info_hrt = handler;
+            return;
+        }
+        table++;
+    }
+    g_assert_not_reached();
+}
+
 void monitor_init_globals(void)
 {
     monitor_init_globals_core();
diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
index 9c3a09cb01a..3da3f86c6ae 100644
--- a/monitor/monitor-internal.h
+++ b/monitor/monitor-internal.h
@@ -74,6 +74,13 @@ typedef struct HMPCommand {
     const char *help;
     const char *flags; /* p=preconfig */
     void (*cmd)(Monitor *mon, const QDict *qdict);
+    /*
+     * If implementing a command that takes no arguments and simply
+     * prints formatted data, then leave @cmd NULL, and then set
+     * @cmd_info_hrt to the corresponding QMP handler that returns
+     * the formatted text.
+     */
+    HumanReadableText *(*cmd_info_hrt)(Error **errp);
     bool coroutine;
     /*
      * @sub_table is a list of 2nd level of commands. If it does not exist,
diff --git a/monitor/monitor.c b/monitor/monitor.c
index 636bcc81c5b..21c7a68758f 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -32,7 +32,6 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "sysemu/qtest.h"
-#include "sysemu/sysemu.h"
 #include "trace.h"
 
 /*
@@ -157,7 +156,7 @@ static inline bool monitor_is_hmp_non_interactive(const Monitor *mon)
 
 static void monitor_flush_locked(Monitor *mon);
 
-static gboolean monitor_unblocked(GIOChannel *chan, GIOCondition cond,
+static gboolean monitor_unblocked(void *do_not_use, GIOCondition cond,
                                   void *opaque)
 {
     Monitor *mon = opaque;
@@ -475,6 +474,10 @@ static unsigned int qapi_event_throttle_hash(const void *key)
         hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
     }
 
+    if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+        hash += g_str_hash(qdict_get_str(evstate->data, "qom-path"));
+    }
+
     return hash;
 }
 
@@ -497,6 +500,11 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b)
                        qdict_get_str(evb->data, "node-name"));
     }
 
+    if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+        return !strcmp(qdict_get_str(eva->data, "qom-path"),
+                       qdict_get_str(evb->data, "qom-path"));
+    }
+
     return TRUE;
 }
 
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index f7d64a64577..343353e27a7 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -27,7 +27,6 @@
 #include "sysemu/kvm.h"
 #include "sysemu/runstate.h"
 #include "sysemu/runstate-action.h"
-#include "sysemu/arch_init.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
@@ -37,9 +36,13 @@
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qapi-commands-ui.h"
+#include "qapi/type-helpers.h"
 #include "qapi/qmp/qerror.h"
+#include "exec/ramlist.h"
 #include "hw/mem/memory-device.h"
 #include "hw/acpi/acpi_dev_interface.h"
+#include "hw/intc/intc.h"
+#include "hw/rdma/rdma.h"
 
 NameInfo *qmp_query_name(Error **errp)
 {
@@ -58,7 +61,7 @@ KvmInfo *qmp_query_kvm(Error **errp)
     KvmInfo *info = g_malloc0(sizeof(*info));
 
     info->enabled = kvm_enabled();
-    info->present = kvm_available();
+    info->present = accel_find("kvm");
 
     return info;
 }
@@ -351,3 +354,115 @@ void qmp_display_reload(DisplayReloadOptions *arg, Error **errp)
         abort();
     }
 }
+
+#ifdef CONFIG_PROFILER
+
+int64_t dev_time;
+
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+    static int64_t last_cpu_exec_time;
+    int64_t cpu_exec_time;
+    int64_t delta;
+
+    cpu_exec_time = tcg_cpu_exec_time();
+    delta = cpu_exec_time - last_cpu_exec_time;
+
+    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
+    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+                           delta, delta / (double)NANOSECONDS_PER_SECOND);
+    last_cpu_exec_time = cpu_exec_time;
+    dev_time = 0;
+
+    return human_readable_text_from_str(buf);
+}
+#else
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+    error_setg(errp, "Internal profiler not compiled");
+    return NULL;
+}
+#endif
+
+static int qmp_x_query_rdma_foreach(Object *obj, void *opaque)
+{
+    RdmaProvider *rdma;
+    RdmaProviderClass *k;
+    GString *buf = opaque;
+
+    if (object_dynamic_cast(obj, INTERFACE_RDMA_PROVIDER)) {
+        rdma = RDMA_PROVIDER(obj);
+        k = RDMA_PROVIDER_GET_CLASS(obj);
+        if (k->format_statistics) {
+            k->format_statistics(rdma, buf);
+        } else {
+            g_string_append_printf(buf,
+                                   "RDMA statistics not available for %s.\n",
+                                   object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+HumanReadableText *qmp_x_query_rdma(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+
+    object_child_foreach_recursive(object_get_root(),
+                                   qmp_x_query_rdma_foreach, buf);
+
+    return human_readable_text_from_str(buf);
+}
+
+HumanReadableText *qmp_x_query_ramblock(Error **errp)
+{
+    g_autoptr(GString) buf = ram_block_format();
+
+    return human_readable_text_from_str(buf);
+}
+
+static int qmp_x_query_irq_foreach(Object *obj, void *opaque)
+{
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    GString *buf = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        uint64_t *irq_counts;
+        unsigned int nb_irqs, i;
+        if (k->get_statistics &&
+            k->get_statistics(intc, &irq_counts, &nb_irqs)) {
+            if (nb_irqs > 0) {
+                g_string_append_printf(buf, "IRQ statistics for %s:\n",
+                                       object_get_typename(obj));
+                for (i = 0; i < nb_irqs; i++) {
+                    if (irq_counts[i] > 0) {
+                        g_string_append_printf(buf, "%2d: %" PRId64 "\n", i,
+                                               irq_counts[i]);
+                    }
+                }
+            }
+        } else {
+            g_string_append_printf(buf,
+                                   "IRQ statistics not available for %s.\n",
+                                   object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+HumanReadableText *qmp_x_query_irq(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+
+    object_child_foreach_recursive(object_get_root(),
+                                   qmp_x_query_irq_foreach, buf);
+
+    return human_readable_text_from_str(buf);
+}
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 2b0308f9337..092c527b6fc 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -257,24 +257,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
         trace_monitor_qmp_in_band_dequeue(req_obj,
                                           req_obj->mon->qmp_requests->length);
 
-        if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
-            /*
-             * Someone rescheduled us (probably because a new requests
-             * came in), but we didn't actually yield. Do that now,
-             * only to be immediately reentered and removed from the
-             * list of scheduled coroutines.
-             */
-            qemu_coroutine_yield();
-        }
-
-        /*
-         * Move the coroutine from iohandler_ctx to qemu_aio_context for
-         * executing the command handler so that it can make progress if it
-         * involves an AIO_WAIT_WHILE().
-         */
-        aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
-        qemu_coroutine_yield();
-
         /*
          * @req_obj has a request, we hold req_obj->mon->qmp_queue_lock
          */
@@ -298,8 +280,30 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data)
             monitor_resume(&mon->common);
         }
 
+        /*
+         * Drop the queue mutex now, before yielding, otherwise we might
+         * deadlock if the main thread tries to lock it.
+         */
         qemu_mutex_unlock(&mon->qmp_queue_lock);
 
+        if (qatomic_xchg(&qmp_dispatcher_co_busy, true) == true) {
+            /*
+             * Someone rescheduled us (probably because a new requests
+             * came in), but we didn't actually yield. Do that now,
+             * only to be immediately reentered and removed from the
+             * list of scheduled coroutines.
+             */
+            qemu_coroutine_yield();
+        }
+
+        /*
+         * Move the coroutine from iohandler_ctx to qemu_aio_context for
+         * executing the command handler so that it can make progress if it
+         * involves an AIO_WAIT_WHILE().
+         */
+        aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co);
+        qemu_coroutine_yield();
+
         /* Process request */
         if (req_obj->req) {
             if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) {
diff --git a/monitor/trace-events b/monitor/trace-events
index 348dcfca9be..032d1220e14 100644
--- a/monitor/trace-events
+++ b/monitor/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # hmp.c
 handle_hmp_command(void *mon, const char *cmdline) "mon %p cmdline: %s"
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
new file mode 100644
index 00000000000..695f8557541
--- /dev/null
+++ b/nbd/client-connection.c
@@ -0,0 +1,389 @@
+/*
+ * QEMU Block driver for  NBD
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/nbd.h"
+
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
+
+struct NBDClientConnection {
+    /* Initialization constants, never change */
+    SocketAddress *saddr; /* address to connect to */
+    QCryptoTLSCreds *tlscreds;
+    NBDExportInfo initial_info;
+    bool do_negotiation;
+    bool do_retry;
+
+    QemuMutex mutex;
+
+    /*
+     * @sioc and @err represent a connection attempt.  While running
+     * is true, they are only used by the connection thread, and mutex
+     * locking is not needed.  Once the thread finishes,
+     * nbd_co_establish_connection then steals these pointers while
+     * under the mutex.
+     */
+    NBDExportInfo updated_info;
+    QIOChannelSocket *sioc;
+    QIOChannel *ioc;
+    Error *err;
+
+    /* All further fields are accessed only under mutex */
+    bool running; /* thread is running now */
+    bool detached; /* thread is detached and should cleanup the state */
+
+    /*
+     * wait_co: if non-NULL, which coroutine to wake in
+     * nbd_co_establish_connection() after yield()
+     */
+    Coroutine *wait_co;
+};
+
+/*
+ * The function isn't protected by any mutex, only call it when the client
+ * connection attempt has not yet started.
+ */
+void nbd_client_connection_enable_retry(NBDClientConnection *conn)
+{
+    conn->do_retry = true;
+}
+
+NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
+                                               bool do_negotiation,
+                                               const char *export_name,
+                                               const char *x_dirty_bitmap,
+                                               QCryptoTLSCreds *tlscreds)
+{
+    NBDClientConnection *conn = g_new(NBDClientConnection, 1);
+
+    object_ref(OBJECT(tlscreds));
+    *conn = (NBDClientConnection) {
+        .saddr = QAPI_CLONE(SocketAddress, saddr),
+        .tlscreds = tlscreds,
+        .do_negotiation = do_negotiation,
+
+        .initial_info.request_sizes = true,
+        .initial_info.structured_reply = true,
+        .initial_info.base_allocation = true,
+        .initial_info.x_dirty_bitmap = g_strdup(x_dirty_bitmap),
+        .initial_info.name = g_strdup(export_name ?: "")
+    };
+
+    qemu_mutex_init(&conn->mutex);
+
+    return conn;
+}
+
+static void nbd_client_connection_do_free(NBDClientConnection *conn)
+{
+    if (conn->sioc) {
+        qio_channel_close(QIO_CHANNEL(conn->sioc), NULL);
+        object_unref(OBJECT(conn->sioc));
+    }
+    error_free(conn->err);
+    qapi_free_SocketAddress(conn->saddr);
+    object_unref(OBJECT(conn->tlscreds));
+    g_free(conn->initial_info.x_dirty_bitmap);
+    g_free(conn->initial_info.name);
+    g_free(conn);
+}
+
+/*
+ * Connect to @addr and do NBD negotiation if @info is not null. If @tlscreds
+ * are given @outioc is returned. @outioc is provided only on success.  The call
+ * may be cancelled from other thread by simply qio_channel_shutdown(sioc).
+ */
+static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr,
+                       NBDExportInfo *info, QCryptoTLSCreds *tlscreds,
+                       QIOChannel **outioc, Error **errp)
+{
+    int ret;
+
+    if (outioc) {
+        *outioc = NULL;
+    }
+
+    ret = qio_channel_socket_connect_sync(sioc, addr, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+    if (!info) {
+        return 0;
+    }
+
+    ret = nbd_receive_negotiate(NULL, QIO_CHANNEL(sioc), tlscreds,
+                                tlscreds ? addr->u.inet.host : NULL,
+                                outioc, info, errp);
+    if (ret < 0) {
+        /*
+         * nbd_receive_negotiate() may setup tls ioc and return it even on
+         * failure path. In this case we should use it instead of original
+         * channel.
+         */
+        if (outioc && *outioc) {
+            qio_channel_close(QIO_CHANNEL(*outioc), NULL);
+            object_unref(OBJECT(*outioc));
+            *outioc = NULL;
+        } else {
+            qio_channel_close(QIO_CHANNEL(sioc), NULL);
+        }
+
+        return ret;
+    }
+
+    return 0;
+}
+
+static void *connect_thread_func(void *opaque)
+{
+    NBDClientConnection *conn = opaque;
+    int ret;
+    bool do_free;
+    uint64_t timeout = 1;
+    uint64_t max_timeout = 16;
+
+    qemu_mutex_lock(&conn->mutex);
+    while (!conn->detached) {
+        assert(!conn->sioc);
+        conn->sioc = qio_channel_socket_new();
+
+        qemu_mutex_unlock(&conn->mutex);
+
+        error_free(conn->err);
+        conn->err = NULL;
+        conn->updated_info = conn->initial_info;
+
+        ret = nbd_connect(conn->sioc, conn->saddr,
+                          conn->do_negotiation ? &conn->updated_info : NULL,
+                          conn->tlscreds, &conn->ioc, &conn->err);
+
+        /*
+         * conn->updated_info will finally be returned to the user. Clear the
+         * pointers to our internally allocated strings, which are IN parameters
+         * of nbd_receive_negotiate() and therefore nbd_connect(). Caller
+         * shoudn't be interested in these fields.
+         */
+        conn->updated_info.x_dirty_bitmap = NULL;
+        conn->updated_info.name = NULL;
+
+        qemu_mutex_lock(&conn->mutex);
+
+        if (ret < 0) {
+            object_unref(OBJECT(conn->sioc));
+            conn->sioc = NULL;
+            if (conn->do_retry && !conn->detached) {
+                qemu_mutex_unlock(&conn->mutex);
+
+                sleep(timeout);
+                if (timeout < max_timeout) {
+                    timeout *= 2;
+                }
+
+                qemu_mutex_lock(&conn->mutex);
+                continue;
+            }
+        }
+
+        break;
+    }
+
+    /* mutex is locked */
+
+    assert(conn->running);
+    conn->running = false;
+    if (conn->wait_co) {
+        aio_co_wake(conn->wait_co);
+        conn->wait_co = NULL;
+    }
+    do_free = conn->detached;
+
+    qemu_mutex_unlock(&conn->mutex);
+
+    if (do_free) {
+        nbd_client_connection_do_free(conn);
+    }
+
+    return NULL;
+}
+
+void nbd_client_connection_release(NBDClientConnection *conn)
+{
+    bool do_free = false;
+
+    if (!conn) {
+        return;
+    }
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        assert(!conn->detached);
+        if (conn->running) {
+            conn->detached = true;
+        } else {
+            do_free = true;
+        }
+        if (conn->sioc) {
+            qio_channel_shutdown(QIO_CHANNEL(conn->sioc),
+                                 QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+        }
+    }
+
+    if (do_free) {
+        nbd_client_connection_do_free(conn);
+    }
+}
+
+/*
+ * Get a new connection in context of @conn:
+ *   if the thread is running, wait for completion
+ *   if the thread already succeeded in the background, and user didn't get the
+ *     result, just return it now
+ *   otherwise the thread is not running, so start a thread and wait for
+ *     completion
+ *
+ * If @blocking is false, don't wait for the thread, return immediately.
+ *
+ * If @info is not NULL, also do nbd-negotiation after successful connection.
+ * In this case info is used only as out parameter, and is fully initialized by
+ * nbd_co_establish_connection(). "IN" fields of info as well as related only to
+ * nbd_receive_export_list() would be zero (see description of NBDExportInfo in
+ * include/block/nbd.h).
+ */
+QIOChannel *coroutine_fn
+nbd_co_establish_connection(NBDClientConnection *conn, NBDExportInfo *info,
+                            bool blocking, Error **errp)
+{
+    QemuThread thread;
+
+    if (conn->do_negotiation) {
+        assert(info);
+    }
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        /*
+         * Don't call nbd_co_establish_connection() in several coroutines in
+         * parallel. Only one call at once is supported.
+         */
+        assert(!conn->wait_co);
+
+        if (!conn->running) {
+            if (conn->sioc) {
+                /* Previous attempt finally succeeded in background */
+                if (conn->do_negotiation) {
+                    memcpy(info, &conn->updated_info, sizeof(*info));
+                    if (conn->ioc) {
+                        /* TLS channel now has own reference to parent */
+                        object_unref(OBJECT(conn->sioc));
+                        conn->sioc = NULL;
+
+                        return g_steal_pointer(&conn->ioc);
+                    }
+                }
+
+                assert(!conn->ioc);
+
+                return QIO_CHANNEL(g_steal_pointer(&conn->sioc));
+            }
+
+            conn->running = true;
+            error_free(conn->err);
+            conn->err = NULL;
+            qemu_thread_create(&thread, "nbd-connect",
+                               connect_thread_func, conn, QEMU_THREAD_DETACHED);
+        }
+
+        if (!blocking) {
+            error_setg(errp, "No connection at the moment");
+            return NULL;
+        }
+
+        conn->wait_co = qemu_coroutine_self();
+    }
+
+    /*
+     * We are going to wait for connect-thread finish, but
+     * nbd_co_establish_connection_cancel() can interrupt.
+     */
+    qemu_coroutine_yield();
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        if (conn->running) {
+            /*
+             * The connection attempt was canceled and the coroutine resumed
+             * before the connection thread finished its job.  Report the
+             * attempt as failed, but leave the connection thread running,
+             * to reuse it for the next connection attempt.
+             */
+            error_setg(errp, "Connection attempt cancelled by other operation");
+            return NULL;
+        } else {
+            error_propagate(errp, conn->err);
+            conn->err = NULL;
+            if (!conn->sioc) {
+                return NULL;
+            }
+            if (conn->do_negotiation) {
+                memcpy(info, &conn->updated_info, sizeof(*info));
+                if (conn->ioc) {
+                    /* TLS channel now has own reference to parent */
+                    object_unref(OBJECT(conn->sioc));
+                    conn->sioc = NULL;
+
+                    return g_steal_pointer(&conn->ioc);
+                }
+            }
+
+            assert(!conn->ioc);
+
+            return QIO_CHANNEL(g_steal_pointer(&conn->sioc));
+        }
+    }
+
+    abort(); /* unreachable */
+}
+
+/*
+ * nbd_co_establish_connection_cancel
+ * Cancel nbd_co_establish_connection() asynchronously.
+ *
+ * Note that this function neither directly stops the thread nor closes the
+ * socket, but rather safely wakes nbd_co_establish_connection() which is
+ * sleeping in yield()
+ */
+void nbd_co_establish_connection_cancel(NBDClientConnection *conn)
+{
+    Coroutine *wait_co;
+
+    WITH_QEMU_LOCK_GUARD(&conn->mutex) {
+        wait_co = g_steal_pointer(&conn->wait_co);
+    }
+
+    if (wait_co) {
+        aio_co_wake(wait_co);
+    }
+}
diff --git a/nbd/client.c b/nbd/client.c
index 0c2db4bcba9..30d5383cb19 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1434,9 +1434,7 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
 
         len = qio_channel_readv(ioc, &iov, 1, errp);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
-            bdrv_dec_in_flight(bs);
             qio_channel_yield(ioc, G_IO_IN);
-            bdrv_inc_in_flight(bs);
             continue;
         } else if (len < 0) {
             return -EIO;
diff --git a/nbd/meson.build b/nbd/meson.build
index 2baaa369480..b26d70565eb 100644
--- a/nbd/meson.build
+++ b/nbd/meson.build
@@ -1,5 +1,6 @@
 block_ss.add(files(
   'client.c',
+  'client-connection.c',
   'common.c',
 ))
 blockdev_ss.add(files(
diff --git a/nbd/server.c b/nbd/server.c
index 86a44a9b41c..4630dd73225 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (C) 2016-2020 Red Hat, Inc.
+ *  Copyright (C) 2016-2021 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Server Side
@@ -879,7 +879,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (!*query) {
         if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
             meta->allocation_depth = meta->exp->allocation_depth;
-            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            if (meta->exp->nr_export_bitmaps) {
+                memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+            }
         }
         trace_nbd_negotiate_meta_query_parse("empty");
         return true;
@@ -894,7 +896,8 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
     if (nbd_strshift(&query, "dirty-bitmap:")) {
         trace_nbd_negotiate_meta_query_parse("dirty-bitmap:");
         if (!*query) {
-            if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            if (client->opt == NBD_OPT_LIST_META_CONTEXT &&
+                meta->exp->nr_export_bitmaps) {
                 memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
             }
             trace_nbd_negotiate_meta_query_parse("empty");
@@ -973,13 +976,14 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
 {
     int ret;
     g_autofree char *export_name = NULL;
-    g_autofree bool *bitmaps = NULL;
+    /* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
+    g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
     NBDExportMetaContexts local_meta = {0};
     uint32_t nb_queries;
     size_t i;
     size_t count = 0;
 
-    if (!client->structured_reply) {
+    if (client->opt == NBD_OPT_SET_META_CONTEXT && !client->structured_reply) {
         return nbd_opt_invalid(client, errp,
                                "request option '%s' when structured reply "
                                "is not negotiated",
@@ -1023,7 +1027,9 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
         /* enable all known contexts */
         meta->base_allocation = true;
         meta->allocation_depth = meta->exp->allocation_depth;
-        memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        if (meta->exp->nr_export_bitmaps) {
+            memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps);
+        }
     } else {
         for (i = 0; i < nb_queries; ++i) {
             ret = nbd_negotiate_meta_query(client, meta, errp);
@@ -1412,6 +1418,9 @@ static int nbd_receive_request(NBDClient *client, NBDRequest *request,
     if (ret < 0) {
         return ret;
     }
+    if (ret == 0) {
+        return -EIO;
+    }
 
     /* Request
        [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
@@ -1513,6 +1522,11 @@ static void nbd_request_put(NBDRequestData *req)
     g_free(req);
 
     client->nb_requests--;
+
+    if (client->quiescing && client->nb_requests == 0) {
+        aio_wait_kick();
+    }
+
     nbd_client_receive_next_request(client);
 
     nbd_client_put(client);
@@ -1530,49 +1544,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
     QTAILQ_FOREACH(client, &exp->clients, next) {
         qio_channel_attach_aio_context(client->ioc, ctx);
 
+        assert(client->nb_requests == 0);
         assert(client->recv_coroutine == NULL);
         assert(client->send_coroutine == NULL);
-
-        if (client->quiescing) {
-            client->quiescing = false;
-            nbd_client_receive_next_request(client);
-        }
     }
 }
 
-static void nbd_aio_detach_bh(void *opaque)
+static void blk_aio_detach(void *opaque)
 {
     NBDExport *exp = opaque;
     NBDClient *client;
 
+    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
+
     QTAILQ_FOREACH(client, &exp->clients, next) {
         qio_channel_detach_aio_context(client->ioc);
+    }
+
+    exp->common.ctx = NULL;
+}
+
+static void nbd_drained_begin(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
+
+    QTAILQ_FOREACH(client, &exp->clients, next) {
         client->quiescing = true;
+    }
+}
 
-        if (client->recv_coroutine) {
-            if (client->read_yielding) {
-                qemu_aio_coroutine_enter(exp->common.ctx,
-                                         client->recv_coroutine);
-            } else {
-                AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
-            }
-        }
+static void nbd_drained_end(void *opaque)
+{
+    NBDExport *exp = opaque;
+    NBDClient *client;
 
-        if (client->send_coroutine) {
-            AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
-        }
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        client->quiescing = false;
+        nbd_client_receive_next_request(client);
     }
 }
 
-static void blk_aio_detach(void *opaque)
+static bool nbd_drained_poll(void *opaque)
 {
     NBDExport *exp = opaque;
+    NBDClient *client;
 
-    trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
+    QTAILQ_FOREACH(client, &exp->clients, next) {
+        if (client->nb_requests != 0) {
+            /*
+             * If there's a coroutine waiting for a request on nbd_read_eof()
+             * enter it here so we don't depend on the client to wake it up.
+             */
+            if (client->recv_coroutine != NULL && client->read_yielding) {
+                qemu_aio_coroutine_enter(exp->common.ctx,
+                                         client->recv_coroutine);
+            }
 
-    aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
+            return true;
+        }
+    }
 
-    exp->common.ctx = NULL;
+    return false;
 }
 
 static void nbd_eject_notifier(Notifier *n, void *data)
@@ -1594,6 +1627,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
     blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
 }
 
+static const BlockDevOps nbd_block_ops = {
+    .drained_begin = nbd_drained_begin,
+    .drained_end = nbd_drained_end,
+    .drained_poll = nbd_drained_poll,
+};
+
 static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
                              Error **errp)
 {
@@ -1715,8 +1754,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
 
     exp->allocation_depth = arg->allocation_depth;
 
+    /*
+     * We need to inhibit request queuing in the block layer to ensure we can
+     * be properly quiesced when entering a drained section, as our coroutines
+     * servicing pending requests might enter blk_pread().
+     */
+    blk_set_disable_request_queuing(blk, true);
+
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
 
+    blk_set_dev_ops(blk, &nbd_block_ops, exp);
+
     QTAILQ_INSERT_TAIL(&exports, exp, next);
 
     return 0;
@@ -1788,6 +1836,7 @@ static void nbd_export_delete(BlockExport *blk_exp)
         }
         blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
                                         blk_aio_detach, exp);
+        blk_set_disable_request_queuing(exp->common.blk, false);
     }
 
     for (i = 0; i < exp->nr_export_bitmaps; i++) {
@@ -2460,16 +2509,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
         if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
             flags |= BDRV_REQ_NO_FALLBACK;
         }
-        ret = 0;
-        /* FIXME simplify this when blk_pwrite_zeroes switches to 64-bit */
-        while (ret >= 0 && request->len) {
-            int align = client->check_align ?: 1;
-            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
-                                                        align));
-            ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags);
-            request->len -= len;
-            request->from += len;
-        }
+        ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len,
+                                flags);
         return nbd_send_generic_reply(client, request->handle, ret,
                                       "writing to file failed", errp);
 
@@ -2483,16 +2524,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
                                       "flush failed", errp);
 
     case NBD_CMD_TRIM:
-        ret = 0;
-        /* FIXME simplify this when blk_co_pdiscard switches to 64-bit */
-        while (ret >= 0 && request->len) {
-            int align = client->check_align ?: 1;
-            int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
-                                                        align));
-            ret = blk_co_pdiscard(exp->common.blk, request->from, len);
-            request->len -= len;
-            request->from += len;
-        }
+        ret = blk_co_pdiscard(exp->common.blk, request->from, request->len);
         if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
             ret = blk_co_flush(exp->common.blk);
         }
diff --git a/nbd/trace-events b/nbd/trace-events
index a955918e970..c4919a2dd58 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # client.c
 nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32
diff --git a/net/checksum.c b/net/checksum.c
index 70f4eaeb3a0..68245fd7484 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -186,12 +186,11 @@ uint32_t
 net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
                      uint32_t iov_off, uint32_t size, uint32_t csum_offset)
 {
-    size_t iovec_off, buf_off;
+    size_t iovec_off;
     unsigned int i;
     uint32_t res = 0;
 
     iovec_off = 0;
-    buf_off = 0;
     for (i = 0; i < iov_cnt && size; i++) {
         if (iov_off < (iovec_off + iov[i].iov_len)) {
             size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
@@ -200,7 +199,6 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
             res += net_checksum_add_cont(len, chunk_buf, csum_offset);
             csum_offset += len;
 
-            buf_off += len;
             iov_off += len;
             size -= len;
         }
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 9d1ad99941d..b966e7e514d 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -170,7 +170,7 @@ static bool packet_matches_str(const char *str,
         return false;
     }
 
-    return !memcmp(str, buf, strlen(str));
+    return !memcmp(str, buf, packet_len);
 }
 
 static void notify_remote_frame(CompareState *s)
@@ -209,9 +209,10 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
 
     pkt->tcp_seq = ntohl(tcphd->th_seq);
     pkt->tcp_ack = ntohl(tcphd->th_ack);
-    *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
+    /* Need to consider ACK will bigger than uint32_t MAX */
+    *max_ack = pkt->tcp_ack - *max_ack > 0 ? pkt->tcp_ack : *max_ack;
     pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
-                       + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
+                       + (tcphd->th_off << 2);
     pkt->payload_size = pkt->size - pkt->header_size;
     pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
     pkt->flags = tcphd->th_flags;
@@ -264,7 +265,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
         pkt = NULL;
         return -1;
     }
-    fill_connection_key(pkt, &key);
+    fill_connection_key(pkt, &key, false);
 
     conn = connection_get(s->connection_track_table,
                           &key,
@@ -413,7 +414,8 @@ static void colo_compare_tcp(CompareState *s, Connection *conn)
      * can ensure that the packet's payload is acknowledged by
      * primary and secondary.
     */
-    uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
+    uint32_t min_ack = conn->pack - conn->sack > 0 ?
+                       conn->sack : conn->pack;
 
 pri:
     if (g_queue_is_empty(&conn->primary_list)) {
@@ -590,19 +592,6 @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
     uint16_t offset = ppkt->vnet_hdr_len;
 
     trace_colo_compare_main("compare other");
-    if (trace_event_get_state_backends(TRACE_COLO_COMPARE_IP_INFO)) {
-        char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
-
-        strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
-        strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
-        strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
-        strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
-
-        trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
-                                   pri_ip_dst, spkt->size,
-                                   sec_ip_src, sec_ip_dst);
-    }
-
     if (ppkt->size != spkt->size) {
         trace_colo_compare_main("Other: payload size of packets are different");
         return -1;
@@ -818,7 +807,7 @@ static int compare_chr_send(CompareState *s,
     }
 
     if (!size) {
-        return 0;
+        return -1;
     }
 
     entry = g_slice_new(SendEntry);
@@ -1415,6 +1404,16 @@ static void colo_compare_init(Object *obj)
                              compare_set_vnet_hdr);
 }
 
+void colo_compare_cleanup(void)
+{
+    CompareState *tmp = NULL;
+    CompareState *n = NULL;
+
+    QTAILQ_FOREACH_SAFE(tmp, &net_compares, next, n) {
+        object_unparent(OBJECT(tmp));
+    }
+}
+
 static void colo_compare_finalize(Object *obj)
 {
     CompareState *s = COLO_COMPARE(obj);
diff --git a/net/colo-compare.h b/net/colo-compare.h
index 22ddd512e27..b055270da2c 100644
--- a/net/colo-compare.h
+++ b/net/colo-compare.h
@@ -20,5 +20,6 @@
 void colo_notify_compares_event(void *opaque, int event, Error **errp);
 void colo_compare_register_notifier(Notifier *notify);
 void colo_compare_unregister_notifier(Notifier *notify);
+void colo_compare_cleanup(void);
 
 #endif /* QEMU_COLO_COMPARE_H */
diff --git a/net/colo.c b/net/colo.c
index ef00609848a..1f8162f59f7 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -83,19 +83,26 @@ int parse_packet_early(Packet *pkt)
     return 0;
 }
 
-void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key, Packet *pkt)
+void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key,
+                         Packet *pkt, bool reverse)
 {
+    if (reverse) {
+        key->src = pkt->ip->ip_dst;
+        key->dst = pkt->ip->ip_src;
+        key->src_port = ntohs(tmp_ports & 0xffff);
+        key->dst_port = ntohs(tmp_ports >> 16);
+    } else {
         key->src = pkt->ip->ip_src;
         key->dst = pkt->ip->ip_dst;
         key->src_port = ntohs(tmp_ports >> 16);
         key->dst_port = ntohs(tmp_ports & 0xffff);
+    }
 }
 
-void fill_connection_key(Packet *pkt, ConnectionKey *key)
+void fill_connection_key(Packet *pkt, ConnectionKey *key, bool reverse)
 {
-    uint32_t tmp_ports;
+    uint32_t tmp_ports = 0;
 
-    memset(key, 0, sizeof(*key));
     key->ip_proto = pkt->ip->ip_p;
 
     switch (key->ip_proto) {
@@ -106,29 +113,15 @@ void fill_connection_key(Packet *pkt, ConnectionKey *key)
     case IPPROTO_SCTP:
     case IPPROTO_UDPLITE:
         tmp_ports = *(uint32_t *)(pkt->transport_header);
-        extract_ip_and_port(tmp_ports, key, pkt);
         break;
     case IPPROTO_AH:
         tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
-        extract_ip_and_port(tmp_ports, key, pkt);
         break;
     default:
         break;
     }
-}
-
-void reverse_connection_key(ConnectionKey *key)
-{
-    struct in_addr tmp_ip;
-    uint16_t tmp_port;
 
-    tmp_ip = key->src;
-    key->src = key->dst;
-    key->dst = tmp_ip;
-
-    tmp_port = key->src_port;
-    key->src_port = key->dst_port;
-    key->dst_port = tmp_port;
+    extract_ip_and_port(tmp_ports, key, pkt, reverse);
 }
 
 Connection *connection_new(ConnectionKey *key)
@@ -157,19 +150,28 @@ void connection_destroy(void *opaque)
 
 Packet *packet_new(const void *data, int size, int vnet_hdr_len)
 {
-    Packet *pkt = g_slice_new(Packet);
+    Packet *pkt = g_slice_new0(Packet);
 
     pkt->data = g_memdup(data, size);
     pkt->size = size;
     pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     pkt->vnet_hdr_len = vnet_hdr_len;
-    pkt->tcp_seq = 0;
-    pkt->tcp_ack = 0;
-    pkt->seq_end = 0;
-    pkt->header_size = 0;
-    pkt->payload_size = 0;
-    pkt->offset = 0;
-    pkt->flags = 0;
+
+    return pkt;
+}
+
+/*
+ * packet_new_nocopy will not copy data, so the caller can't release
+ * the data. And it will be released in packet_destroy.
+ */
+Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len)
+{
+    Packet *pkt = g_slice_new0(Packet);
+
+    pkt->data = data;
+    pkt->size = size;
+    pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    pkt->vnet_hdr_len = vnet_hdr_len;
 
     return pkt;
 }
diff --git a/net/colo.h b/net/colo.h
index 573ab91785d..8b3e8d5a836 100644
--- a/net/colo.h
+++ b/net/colo.h
@@ -89,9 +89,9 @@ typedef struct Connection {
 uint32_t connection_key_hash(const void *opaque);
 int connection_key_equal(const void *opaque1, const void *opaque2);
 int parse_packet_early(Packet *pkt);
-void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key, Packet *pkt);
-void fill_connection_key(Packet *pkt, ConnectionKey *key);
-void reverse_connection_key(ConnectionKey *key);
+void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key,
+                         Packet *pkt, bool reverse);
+void fill_connection_key(Packet *pkt, ConnectionKey *key, bool reverse);
 Connection *connection_new(ConnectionKey *key);
 void connection_destroy(void *opaque);
 Connection *connection_get(GHashTable *connection_track_table,
@@ -101,6 +101,7 @@ bool connection_has_tracked(GHashTable *connection_track_table,
                             ConnectionKey *key);
 void connection_hashtable_reset(GHashTable *connection_track_table);
 Packet *packet_new(const void *data, int size, int vnet_hdr_len);
+Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len);
 void packet_destroy(void *opaque, void *user_data);
 void packet_destroy_partial(void *opaque, void *user_data);
 
diff --git a/net/dump.c b/net/dump.c
index 4d538d82a69..a07ba624011 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -28,7 +28,6 @@
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/iov.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/timer.h"
 #include "qapi/visitor.h"
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f8e65007c02..f20240cc9f1 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -88,7 +88,7 @@ static int filter_send(MirrorState *s,
         goto err;
     }
 
-    return 0;
+    return size;
 
 err:
     return ret < 0 ? ret : -EIO;
@@ -159,7 +159,7 @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
     int ret;
 
     ret = filter_send(s, iov, iovcnt);
-    if (ret) {
+    if (ret < 0) {
         error_report("filter mirror send failed(%s)", strerror(-ret));
     }
 
@@ -182,10 +182,10 @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
 
     if (qemu_chr_fe_backend_connected(&s->chr_out)) {
         ret = filter_send(s, iov, iovcnt);
-        if (ret) {
+        if (ret < 0) {
             error_report("filter redirector send failed(%s)", strerror(-ret));
         }
-        return iov_size(iov, iovcnt);
+        return ret;
     } else {
         return 0;
     }
diff --git a/net/filter-replay.c b/net/filter-replay.c
index eef8443059b..54690676ef0 100644
--- a/net/filter-replay.c
+++ b/net/filter-replay.c
@@ -13,7 +13,6 @@
 #include "clients.h"
 #include "qemu/error-report.h"
 #include "qemu/iov.h"
-#include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/timer.h"
 #include "qapi/visitor.h"
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
index 10fe3939b13..bf05023dc3c 100644
--- a/net/filter-rewriter.c
+++ b/net/filter-rewriter.c
@@ -270,8 +270,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
         vnet_hdr_len = nf->netdev->vnet_hdr_len;
     }
 
-    pkt = packet_new(buf, size, vnet_hdr_len);
-    g_free(buf);
+    pkt = packet_new_nocopy(buf, size, vnet_hdr_len);
 
     /*
      * if we get tcp packet
@@ -280,15 +279,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
      */
     if (pkt && is_tcp_packet(pkt)) {
 
-        fill_connection_key(pkt, &key);
-
-        if (sender == nf->netdev) {
-            /*
-             * We need make tcp TX and RX packet
-             * into one connection.
-             */
-            reverse_connection_key(&key);
-        }
+        fill_connection_key(pkt, &key, sender == nf->netdev);
 
         /* After failover we needn't change new TCP packet */
         if (s->failover_mode &&
diff --git a/net/meson.build b/net/meson.build
index 1076b0a7ab4..847bc2ac85b 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -18,10 +18,14 @@ softmmu_ss.add(files(
 
 softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('filter-replay.c'))
 
-softmmu_ss.add(when: 'CONFIG_L2TPV3', if_true: files('l2tpv3.c'))
+if have_l2tpv3
+  softmmu_ss.add(files('l2tpv3.c'))
+endif
 softmmu_ss.add(when: slirp, if_true: files('slirp.c'))
-softmmu_ss.add(when: ['CONFIG_VDE', vde], if_true: files('vde.c'))
-softmmu_ss.add(when: 'CONFIG_NETMAP', if_true: files('netmap.c'))
+softmmu_ss.add(when: vde, if_true: files('vde.c'))
+if have_netmap
+  softmmu_ss.add(files('netmap.c'))
+endif
 vhost_user_ss = ss.source_set()
 vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
 softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss)
diff --git a/net/net.c b/net/net.c
index edf9b954185..f0d14dbfc1f 100644
--- a/net/net.c
+++ b/net/net.c
@@ -51,9 +51,8 @@
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include "qapi/opts-visitor.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
+#include "net/colo-compare.h"
 #include "net/filter.h"
 #include "qapi/string-output-visitor.h"
 
@@ -76,6 +75,8 @@ int parse_host_port(struct sockaddr_in *saddr, const char *str,
     const char *addr, *p, *r;
     int port, ret = 0;
 
+    memset(saddr, 0, sizeof(*saddr));
+
     substrings = g_strsplit(str, ":", 2);
     if (!substrings || !substrings[0] || !substrings[1]) {
         error_setg(errp, "host address '%s' doesn't contain ':' "
@@ -238,7 +239,8 @@ static void qemu_net_client_setup(NetClientState *nc,
                                   NetClientState *peer,
                                   const char *model,
                                   const char *name,
-                                  NetClientDestructor *destructor)
+                                  NetClientDestructor *destructor,
+                                  bool is_datapath)
 {
     nc->info = info;
     nc->model = g_strdup(model);
@@ -257,6 +259,7 @@ static void qemu_net_client_setup(NetClientState *nc,
 
     nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc);
     nc->destructor = destructor;
+    nc->is_datapath = is_datapath;
     QTAILQ_INIT(&nc->filters);
 }
 
@@ -271,7 +274,23 @@ NetClientState *qemu_new_net_client(NetClientInfo *info,
 
     nc = g_malloc0(info->size);
     qemu_net_client_setup(nc, info, peer, model, name,
-                          qemu_net_client_destructor);
+                          qemu_net_client_destructor, true);
+
+    return nc;
+}
+
+NetClientState *qemu_new_net_control_client(NetClientInfo *info,
+                                            NetClientState *peer,
+                                            const char *model,
+                                            const char *name)
+{
+    NetClientState *nc;
+
+    assert(info->size >= sizeof(NetClientState));
+
+    nc = g_malloc0(info->size);
+    qemu_net_client_setup(nc, info, peer, model, name,
+                          qemu_net_client_destructor, false);
 
     return nc;
 }
@@ -296,7 +315,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
 
     for (i = 0; i < queues; i++) {
         qemu_net_client_setup(&nic->ncs[i], info, peers[i], model, name,
-                              NULL);
+                              NULL, true);
         nic->ncs[i].queue_index = i;
     }
 
@@ -1404,6 +1423,9 @@ void net_cleanup(void)
 {
     NetClientState *nc;
 
+    /*cleanup colo compare module for COLO*/
+    colo_compare_cleanup();
+
     /* We may del multiple entries during qemu_del_net_client(),
      * so QTAILQ_FOREACH_SAFE() is also not safe here.
      */
diff --git a/net/netmap.c b/net/netmap.c
index 350f097f91c..9e0cec58d37 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -33,7 +33,6 @@
 #include "net/net.h"
 #include "net/tap.h"
 #include "clients.h"
-#include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu/iov.h"
diff --git a/net/slirp.c b/net/slirp.c
index c07f34c4a29..9892856f809 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -27,7 +27,7 @@
 #include "net/slirp.h"
 
 
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
 #include <pwd.h>
 #include <sys/wait.h>
 #endif
@@ -91,7 +91,7 @@ typedef struct SlirpState {
     Slirp *slirp;
     Notifier poll_notifier;
     Notifier exit_notifier;
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
     gchar *smb_dir;
 #endif
     GSList *fwd;
@@ -104,7 +104,7 @@ static QTAILQ_HEAD(, SlirpState) slirp_stacks =
 static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp);
 static int slirp_guestfwd(SlirpState *s, const char *config_str, Error **errp);
 
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
 static int slirp_smb(SlirpState *s, const char *exported_dir,
                      struct in_addr vserver_addr, Error **errp);
 static void slirp_smb_cleanup(SlirpState *s);
@@ -377,7 +377,7 @@ static int net_slirp_init(NetClientState *peer, const char *model,
     struct in6_addr ip6_prefix;
     struct in6_addr ip6_host;
     struct in6_addr ip6_dns;
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
     struct in_addr smbsrv = { .s_addr = 0 };
 #endif
     NetClientState *nc;
@@ -487,7 +487,7 @@ static int net_slirp_init(NetClientState *peer, const char *model,
         return -1;
     }
 
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
     if (vsmbserver && !inet_aton(vsmbserver, &smbsrv)) {
         error_setg(errp, "Failed to parse SMB address");
         return -1;
@@ -602,7 +602,7 @@ static int net_slirp_init(NetClientState *peer, const char *model,
             }
         }
     }
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
     if (smb_export) {
         if (slirp_smb(s, smb_export, smbsrv, errp) < 0) {
             goto error;
@@ -794,7 +794,7 @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
 
 }
 
-#ifndef _WIN32
+#if defined(CONFIG_SLIRP_SMBD)
 
 /* automatic user mode samba server configuration */
 static void slirp_smb_cleanup(SlirpState *s)
@@ -961,7 +961,7 @@ static int slirp_smb(SlirpState* s, const char *exported_dir_unparsed,
     return 0;
 }
 
-#endif /* !defined(_WIN32) */
+#endif /* defined(CONFIG_SLIRP_SMBD) */
 
 static int guestfwd_can_read(void *opaque)
 {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 77aaf674b19..e45a6d124eb 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -35,10 +35,6 @@
 #include <net/if_tap.h>
 #endif
 
-#if defined(__OpenBSD__)
-#include <sys/param.h>
-#endif
-
 #ifndef __FreeBSD__
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
              int vnet_hdr_required, int mq_required, Error **errp)
@@ -59,11 +55,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         if (*ifname) {
             snprintf(dname, sizeof dname, "/dev/%s", ifname);
         } else {
-#if defined(__OpenBSD__) && OpenBSD < 201605
-            snprintf(dname, sizeof dname, "/dev/tun%d", i);
-#else
             snprintf(dname, sizeof dname, "/dev/tap%d", i);
-#endif
         }
         TFR(fd = open(dname, O_RDWR));
         if (fd >= 0) {
@@ -259,3 +251,8 @@ int tap_fd_get_ifname(int fd, char *ifname)
 {
     return -1;
 }
+
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
+{
+    return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index b0635e9e32c..95847697401 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -316,3 +316,16 @@ int tap_fd_get_ifname(int fd, char *ifname)
     pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
     return 0;
 }
+
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
+{
+    if (ioctl(fd, TUNSETSTEERINGEBPF, (void *) &prog_fd) != 0) {
+        error_report("Issue while setting TUNSETSTEERINGEBPF:"
+                    " %s with fd: %d, prog_fd: %d",
+                    strerror(errno), fd, prog_fd);
+
+       return -1;
+    }
+
+    return 0;
+}
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 2f36d100fc5..1d06fe0de63 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -31,6 +31,7 @@
 #define TUNSETQUEUE  _IOW('T', 217, int)
 #define TUNSETVNETLE _IOW('T', 220, int)
 #define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
 
 #endif
 
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 0475a58207b..d85224242b3 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -255,3 +255,8 @@ int tap_fd_get_ifname(int fd, char *ifname)
 {
     return -1;
 }
+
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
+{
+    return -1;
+}
diff --git a/net/tap-stub.c b/net/tap-stub.c
index de525a2e69d..a0fa25804b6 100644
--- a/net/tap-stub.c
+++ b/net/tap-stub.c
@@ -85,3 +85,8 @@ int tap_fd_get_ifname(int fd, char *ifname)
 {
     return -1;
 }
+
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
+{
+    return -1;
+}
diff --git a/net/tap.c b/net/tap.c
index bae895e2874..f716be3e3fb 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -347,6 +347,14 @@ static void tap_poll(NetClientState *nc, bool enable)
     tap_write_poll(s, enable);
 }
 
+static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
+
+    return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
+}
+
 int tap_get_fd(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -372,6 +380,7 @@ static NetClientInfo net_tap_info = {
     .set_vnet_hdr_len = tap_set_vnet_hdr_len,
     .set_vnet_le = tap_set_vnet_le,
     .set_vnet_be = tap_set_vnet_be,
+    .set_steering_ebpf = tap_set_steering_ebpf,
 };
 
 static TAPState *net_tap_fd_init(NetClientState *peer,
@@ -740,6 +749,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
             qemu_set_nonblock(vhostfd);
         }
         options.opaque = (void *)(uintptr_t)vhostfd;
+        options.nvqs = 2;
 
         s->vhost_net = vhost_net_init(&options);
         if (!s->vhost_net) {
diff --git a/net/tap_int.h b/net/tap_int.h
index 225a49ea484..547f8a5a28f 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -44,5 +44,6 @@ int tap_fd_set_vnet_be(int fd, int vnet_is_be);
 int tap_fd_enable(int fd);
 int tap_fd_disable(int fd);
 int tap_fd_get_ifname(int fd, char *ifname);
+int tap_fd_set_steering_ebpf(int fd, int prog_fd);
 
 #endif /* NET_TAP_INT_H */
diff --git a/net/trace-events b/net/trace-events
index bfaff7891d4..d7a17256cca 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # announce.c
 qemu_announce_self_iter(const char *id, const char *name, const char *mac, int skip) "%s:%s:%s skip: %d"
diff --git a/net/vhost-user.c b/net/vhost-user.c
index ffbd94d944f..b1a0247b598 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -85,6 +85,7 @@ static int vhost_user_start(int queues, NetClientState *ncs[],
         options.net_backend = ncs[i];
         options.opaque      = be;
         options.busyloop_timeout = 0;
+        options.nvqs = 2;
         net = vhost_net_init(&options);
         if (!net) {
             error_report("failed to init vhost_net for queue %d", i);
@@ -197,6 +198,19 @@ static bool vhost_user_has_ufo(NetClientState *nc)
     return true;
 }
 
+static bool vhost_user_check_peer_type(NetClientState *nc, ObjectClass *oc,
+                                       Error **errp)
+{
+    const char *driver = object_class_get_name(oc);
+
+    if (!g_str_has_prefix(driver, "virtio-net-")) {
+        error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
+        return false;
+    }
+
+    return true;
+}
+
 static NetClientInfo net_vhost_user_info = {
         .type = NET_CLIENT_DRIVER_VHOST_USER,
         .size = sizeof(NetVhostUserState),
@@ -206,10 +220,11 @@ static NetClientInfo net_vhost_user_info = {
         .has_ufo = vhost_user_has_ufo,
         .set_vnet_be = vhost_user_set_vnet_endianness,
         .set_vnet_le = vhost_user_set_vnet_endianness,
+        .check_peer_type = vhost_user_check_peer_type,
 };
 
-static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
-                                           void *opaque)
+static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond,
+                                     void *opaque)
 {
     NetVhostUserState *s = opaque;
 
@@ -396,27 +411,6 @@ static Chardev *net_vhost_claim_chardev(
     return chr;
 }
 
-static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
-{
-    const char *name = opaque;
-    const char *driver, *netdev;
-
-    driver = qemu_opt_get(opts, "driver");
-    netdev = qemu_opt_get(opts, "netdev");
-
-    if (!driver || !netdev) {
-        return 0;
-    }
-
-    if (strcmp(netdev, name) == 0 &&
-        !g_str_has_prefix(driver, "virtio-net-")) {
-        error_setg(errp, "vhost-user requires frontend driver virtio-net-*");
-        return -1;
-    }
-
-    return 0;
-}
-
 int net_init_vhost_user(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp)
 {
@@ -432,12 +426,6 @@ int net_init_vhost_user(const Netdev *netdev, const char *name,
         return -1;
     }
 
-    /* verify net frontend */
-    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
-                          (char *)name, errp)) {
-        return -1;
-    }
-
     queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
     if (queues < 1 || queues > MAX_QUEUE_NUM) {
         error_setg(errp,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index fe659ec9e24..25dd6dd9754 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -18,6 +18,7 @@
 #include "qemu/error-report.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
+#include <linux/vhost.h>
 #include <sys/ioctl.h>
 #include <err.h>
 #include "standard-headers/linux/virtio_net.h"
@@ -29,7 +30,6 @@ typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
     VHostNetState *vhost_net;
-    uint64_t acked_features;
     bool started;
 } VhostVDPAState;
 
@@ -52,8 +52,18 @@ const int vdpa_feature_bits[] = {
     VIRTIO_NET_F_HOST_UFO,
     VIRTIO_NET_F_MRG_RXBUF,
     VIRTIO_NET_F_MTU,
+    VIRTIO_NET_F_CTRL_RX,
+    VIRTIO_NET_F_CTRL_RX_EXTRA,
+    VIRTIO_NET_F_CTRL_VLAN,
+    VIRTIO_NET_F_GUEST_ANNOUNCE,
+    VIRTIO_NET_F_CTRL_MAC_ADDR,
+    VIRTIO_NET_F_RSS,
+    VIRTIO_NET_F_MQ,
+    VIRTIO_NET_F_CTRL_VQ,
     VIRTIO_F_IOMMU_PLATFORM,
     VIRTIO_F_RING_PACKED,
+    VIRTIO_NET_F_RSS,
+    VIRTIO_NET_F_HASH_REPORT,
     VIRTIO_NET_F_GUEST_ANNOUNCE,
     VIRTIO_NET_F_STATUS,
     VHOST_INVALID_FEATURE_BIT
@@ -66,15 +76,6 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
     return s->vhost_net;
 }
 
-uint64_t vhost_vdpa_get_acked_features(NetClientState *nc)
-{
-    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
-    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
-    s->acked_features = vhost_net_get_acked_features(s->vhost_net);
-
-    return s->acked_features;
-}
-
 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
 {
     uint32_t device_id;
@@ -89,17 +90,8 @@ static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
     return ret;
 }
 
-static void vhost_vdpa_del(NetClientState *ncs)
-{
-    VhostVDPAState *s;
-    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
-    s = DO_UPCAST(VhostVDPAState, nc, ncs);
-    if (s->vhost_net) {
-        vhost_net_cleanup(s->vhost_net);
-    }
-}
-
-static int vhost_vdpa_add(NetClientState *ncs, void *be)
+static int vhost_vdpa_add(NetClientState *ncs, void *be,
+                          int queue_pair_index, int nvqs)
 {
     VhostNetOptions options;
     struct vhost_net *net = NULL;
@@ -112,27 +104,23 @@ static int vhost_vdpa_add(NetClientState *ncs, void *be)
     options.net_backend = ncs;
     options.opaque      = be;
     options.busyloop_timeout = 0;
+    options.nvqs = nvqs;
 
     net = vhost_net_init(&options);
     if (!net) {
         error_report("failed to init vhost_net for queue");
-        goto err;
-    }
-    if (s->vhost_net) {
-        vhost_net_cleanup(s->vhost_net);
-        g_free(s->vhost_net);
+        goto err_init;
     }
     s->vhost_net = net;
     ret = vhost_vdpa_net_check_device_id(net);
     if (ret) {
-        goto err;
+        goto err_check;
     }
     return 0;
-err:
-    if (net) {
-        vhost_net_cleanup(net);
-    }
-    vhost_vdpa_del(ncs);
+err_check:
+    vhost_net_cleanup(net);
+    g_free(net);
+err_init:
     return -1;
 }
 
@@ -169,65 +157,159 @@ static bool vhost_vdpa_has_ufo(NetClientState *nc)
 
 }
 
+static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
+                                       Error **errp)
+{
+    const char *driver = object_class_get_name(oc);
+
+    if (!g_str_has_prefix(driver, "virtio-net-")) {
+        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
+        return false;
+    }
+
+    return true;
+}
+
+/** Dummy receive in case qemu falls back to userland tap networking */
+static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
+                                  size_t size)
+{
+    return 0;
+}
+
 static NetClientInfo net_vhost_vdpa_info = {
         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
         .size = sizeof(VhostVDPAState),
+        .receive = vhost_vdpa_receive,
         .cleanup = vhost_vdpa_cleanup,
         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
         .has_ufo = vhost_vdpa_has_ufo,
+        .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
-static int net_vhost_vdpa_init(NetClientState *peer, const char *device,
-                               const char *name, const char *vhostdev)
+static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
+                                           const char *device,
+                                           const char *name,
+                                           int vdpa_device_fd,
+                                           int queue_pair_index,
+                                           int nvqs,
+                                           bool is_datapath)
 {
     NetClientState *nc = NULL;
     VhostVDPAState *s;
-    int vdpa_device_fd = -1;
     int ret = 0;
     assert(name);
-    nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, name);
+    if (is_datapath) {
+        nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
+                                 name);
+    } else {
+        nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer,
+                                         device, name);
+    }
     snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA);
-    nc->queue_index = 0;
     s = DO_UPCAST(VhostVDPAState, nc, nc);
-    vdpa_device_fd = qemu_open_old(vhostdev, O_RDWR);
-    if (vdpa_device_fd == -1) {
-        return -errno;
-    }
+
     s->vhost_vdpa.device_fd = vdpa_device_fd;
-    ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa);
-    assert(s->vhost_net);
-    return ret;
+    s->vhost_vdpa.index = queue_pair_index;
+    ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
+    if (ret) {
+        qemu_del_net_client(nc);
+        return NULL;
+    }
+    return nc;
 }
 
-static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
+static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
 {
-    const char *name = opaque;
-    const char *driver, *netdev;
+    unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
+    g_autofree struct vhost_vdpa_config *config = NULL;
+    __virtio16 *max_queue_pairs;
+    uint64_t features;
+    int ret;
 
-    driver = qemu_opt_get(opts, "driver");
-    netdev = qemu_opt_get(opts, "netdev");
-    if (!driver || !netdev) {
-        return 0;
+    ret = ioctl(fd, VHOST_GET_FEATURES, &features);
+    if (ret) {
+        error_setg(errp, "Fail to query features from vhost-vDPA device");
+        return ret;
     }
-    if (strcmp(netdev, name) == 0 &&
-        !g_str_has_prefix(driver, "virtio-net-")) {
-        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
-        return -1;
+
+    if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
+        *has_cvq = 1;
+    } else {
+        *has_cvq = 0;
     }
-    return 0;
+
+    if (features & (1 << VIRTIO_NET_F_MQ)) {
+        config = g_malloc0(config_size + sizeof(*max_queue_pairs));
+        config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
+        config->len = sizeof(*max_queue_pairs);
+
+        ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
+        if (ret) {
+            error_setg(errp, "Fail to get config from vhost-vDPA device");
+            return -ret;
+        }
+
+        max_queue_pairs = (__virtio16 *)&config->buf;
+
+        return lduw_le_p(max_queue_pairs);
+    }
+
+    return 1;
 }
 
 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
                         NetClientState *peer, Error **errp)
 {
     const NetdevVhostVDPAOptions *opts;
+    int vdpa_device_fd;
+    NetClientState **ncs, *nc;
+    int queue_pairs, i, has_cvq = 0;
 
     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
     opts = &netdev->u.vhost_vdpa;
-    /* verify net frontend */
-    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
-                          (char *)name, errp)) {
+    if (!opts->vhostdev) {
+        error_setg(errp, "vdpa character device not specified with vhostdev");
         return -1;
     }
-    return net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, opts->vhostdev);
+
+    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
+    if (vdpa_device_fd == -1) {
+        return -errno;
+    }
+
+    queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+                                                 &has_cvq, errp);
+    if (queue_pairs < 0) {
+        qemu_close(vdpa_device_fd);
+        return queue_pairs;
+    }
+
+    ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
+
+    for (i = 0; i < queue_pairs; i++) {
+        ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
+                                     vdpa_device_fd, i, 2, true);
+        if (!ncs[i])
+            goto err;
+    }
+
+    if (has_cvq) {
+        nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
+                                 vdpa_device_fd, i, 1, false);
+        if (!nc)
+            goto err;
+    }
+
+    g_free(ncs);
+    return 0;
+
+err:
+    if (i) {
+        qemu_del_net_client(ncs[0]);
+    }
+    qemu_close(vdpa_device_fd);
+    g_free(ncs);
+
+    return -1;
 }
diff --git a/os-posix.c b/os-posix.c
index a6846f51c17..ae6c9f2a5e9 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -32,7 +32,7 @@
 #include "qemu-common.h"
 /* Needed early for CONFIG_BSD etc. */
 #include "net/slirp.h"
-#include "qemu-options.h"
+#include "qemu/qemu-options.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
 #include "sysemu/runstate.h"
diff --git a/os-win32.c b/os-win32.c
index fd1137bab1f..e31c921983b 100644
--- a/os-win32.c
+++ b/os-win32.c
@@ -27,7 +27,6 @@
 #include <windows.h>
 #include <mmsystem.h>
 #include "qemu-common.h"
-#include "qemu-options.h"
 #include "sysemu/runstate.h"
 
 static BOOL WINAPI qemu_ctrl_handler(DWORD type)
diff --git a/pc-bios/README b/pc-bios/README
index c101c9a04f8..db39d757b01 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -14,7 +14,11 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20210217.
+  built from git tag qemu-slof-20210711.
+
+- VOF (Virtual Open Firmware) is a minimalistic firmware to work with
+  -machine pseries,x-vof=on. When enabled, the firmware acts as a slim shim and
+  QEMU implements parts of the IEEE 1275 Open Firmware interface.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 96bdeaa487d..26f863b6092 100644
Binary files a/pc-bios/bios-256k.bin and b/pc-bios/bios-256k.bin differ
diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin
index eefd32197e8..ddeb24391d1 100644
Binary files a/pc-bios/bios-microvm.bin and b/pc-bios/bios-microvm.bin differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 3ecaf4a7094..6e6cde8c7fa 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/pc-bios/descriptors/meson.build b/pc-bios/descriptors/meson.build
index 29efa16d993..66f85d01c42 100644
--- a/pc-bios/descriptors/meson.build
+++ b/pc-bios/descriptors/meson.build
@@ -1,4 +1,4 @@
-if install_edk2_blobs
+if unpack_edk2_blobs and get_option('install_blobs')
   foreach f: [
     '50-edk2-i386-secure.json',
     '50-edk2-x86_64-secure.json',
@@ -10,7 +10,7 @@ if install_edk2_blobs
     configure_file(input: files(f),
                    output: f,
                    configuration: {'DATADIR': get_option('prefix') / qemu_datadir},
-                   install: get_option('install_blobs'),
+                   install: true,
                    install_dir: qemu_datadir / 'firmware')
   endforeach
 endif
diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img
index 4ba8c7f8b8d..a34a8c84f5d 100644
Binary files a/pc-bios/hppa-firmware.img and b/pc-bios/hppa-firmware.img differ
diff --git a/pc-bios/keymaps/meson.build b/pc-bios/keymaps/meson.build
index cbee14ba82b..6e025cddf2d 100644
--- a/pc-bios/keymaps/meson.build
+++ b/pc-bios/keymaps/meson.build
@@ -38,6 +38,7 @@ if meson.is_cross_build() or 'CONFIG_XKBCOMMON' not in config_host
 else
   native_qemu_keymap = qemu_keymap
 endif
+cp = find_program('cp')
 
 t = []
 foreach km, args: keymaps
@@ -55,7 +56,7 @@ foreach km, args: keymaps
                        build_by_default: true,
                        input: km,
                        output: km,
-                       command: ['cp', '@INPUT@', '@OUTPUT@'],
+                       command: [cp, '@INPUT@', '@OUTPUT@'],
                        install: true,
                        install_dir: qemu_datadir / 'keymaps')
   endif
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index f2b32598af7..b40ff3f2bd3 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -1,4 +1,5 @@
-if install_edk2_blobs
+roms = []
+if unpack_edk2_blobs
   fds = [
     'edk2-aarch64-code.fd',
     'edk2-arm-code.fd',
@@ -11,7 +12,7 @@ if install_edk2_blobs
   ]
 
   foreach f : fds
-    custom_target(f,
+    roms += custom_target(f,
                   build_by_default: have_system,
                   output: f,
                   input: '@0@.bz2'.format(f),
@@ -62,6 +63,7 @@ blobs = files(
   'petalogix-s3adsp1800.dtb',
   'petalogix-ml605.dtb',
   'multiboot.bin',
+  'multiboot_dma.bin',
   'linuxboot.bin',
   'linuxboot_dma.bin',
   'kvmvapic.bin',
diff --git a/pc-bios/multiboot_dma.bin b/pc-bios/multiboot_dma.bin
new file mode 100644
index 00000000000..c0e2c3102a3
Binary files /dev/null and b/pc-bios/multiboot_dma.bin differ
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index e40e1d7025b..67f32a8602e 100644
Binary files a/pc-bios/openbios-ppc and b/pc-bios/openbios-ppc differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 0c0aa094ec3..376b01c10b5 100644
Binary files a/pc-bios/openbios-sparc32 and b/pc-bios/openbios-sparc32 differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index a3e458517ae..bbd746fde94 100644
Binary files a/pc-bios/openbios-sparc64 and b/pc-bios/openbios-sparc64 differ
diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index 30771f8d17c..5d55d25acca 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -1,9 +1,8 @@
-CURRENT_MAKEFILE := $(realpath $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)))
-SRC_DIR := $(dir $(CURRENT_MAKEFILE))
-TOPSRC_DIR := $(SRC_DIR)/../..
+include config.mak
+SRC_DIR := $(TOPSRC_DIR)/pc-bios/optionrom
 VPATH = $(SRC_DIR)
 
-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
+all: multiboot.bin multiboot_dma.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
 # Dummy command so that make thinks it has done something
 	@true
 
@@ -42,8 +41,6 @@ override CFLAGS += $(call cc-option, $(Wa)-32)
 LD_I386_EMULATION ?= elf_i386
 override LDFLAGS = -m $(LD_I386_EMULATION) -T $(SRC_DIR)/flat.lds
 
-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin pvh.bin
-
 pvh.img: pvh.o pvh_main.o
 
 %.o: %.S
diff --git a/pc-bios/optionrom/multiboot.S b/pc-bios/optionrom/multiboot.S
index b7efe4de343..181a4b03a3f 100644
--- a/pc-bios/optionrom/multiboot.S
+++ b/pc-bios/optionrom/multiboot.S
@@ -68,7 +68,7 @@ run_multiboot:
 	mov		%eax, %es
 
 	/* Read the bootinfo struct into RAM */
-	read_fw_blob(FW_CFG_INITRD)
+	read_fw_blob_dma(FW_CFG_INITRD)
 
 	/* FS = bootinfo_struct */
 	read_fw		FW_CFG_INITRD_ADDR
@@ -188,7 +188,7 @@ prot_mode:
 	movl		%eax, %gs
 
 	/* Read the kernel and modules into RAM */
-	read_fw_blob(FW_CFG_KERNEL)
+	read_fw_blob_dma(FW_CFG_KERNEL)
 
 	/* Jump off to the kernel */
 	read_fw		FW_CFG_KERNEL_ENTRY
diff --git a/pc-bios/optionrom/multiboot_dma.S b/pc-bios/optionrom/multiboot_dma.S
new file mode 100644
index 00000000000..d809af3e23f
--- /dev/null
+++ b/pc-bios/optionrom/multiboot_dma.S
@@ -0,0 +1,2 @@
+#define USE_FW_CFG_DMA 1
+#include "multiboot.S"
diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h
index a2b612f1a7e..8d74c0ddf35 100644
--- a/pc-bios/optionrom/optionrom.h
+++ b/pc-bios/optionrom/optionrom.h
@@ -37,6 +37,17 @@
 #define BIOS_CFG_IOPORT_CFG	0x510
 #define BIOS_CFG_IOPORT_DATA	0x511
 
+#define FW_CFG_DMA_CTL_ERROR   0x01
+#define FW_CFG_DMA_CTL_READ    0x02
+#define FW_CFG_DMA_CTL_SKIP    0x04
+#define FW_CFG_DMA_CTL_SELECT  0x08
+#define FW_CFG_DMA_CTL_WRITE   0x10
+
+#define FW_CFG_DMA_SIGNATURE 0x51454d5520434647ULL /* "QEMU CFG" */
+
+#define BIOS_CFG_DMA_ADDR_HIGH  0x514
+#define BIOS_CFG_DMA_ADDR_LOW   0x518
+
 /* Break the translation block flow so -d cpu shows us values */
 #define DEBUG_HERE \
 	jmp		1f;				\
@@ -62,6 +73,61 @@
 	bswap		%eax
 .endm
 
+
+/*
+ * Read data from the fw_cfg device using DMA.
+ * Clobbers:	%edx, %eax, ADDR, SIZE, memory[%esp-16] to memory[%esp]
+ */
+.macro read_fw_dma VAR, SIZE, ADDR
+        /* Address */
+	bswapl		\ADDR
+	pushl		\ADDR
+
+	/* We only support 32 bit target addresses */
+	xorl		%eax, %eax
+	pushl		%eax
+	mov		$BIOS_CFG_DMA_ADDR_HIGH, %dx
+	outl		%eax, (%dx)
+
+	/* Size */
+	bswapl		\SIZE
+	pushl		\SIZE
+
+        /* Control */
+	movl		$(\VAR << 16) | (FW_CFG_DMA_CTL_READ | FW_CFG_DMA_CTL_SELECT), %eax
+	bswapl		%eax
+	pushl		%eax
+
+	movl		%esp, %eax /* Address of the struct we generated */
+	bswapl		%eax
+	mov		$BIOS_CFG_DMA_ADDR_LOW, %dx
+	outl		%eax, (%dx) /* Initiate DMA */
+
+1:  mov		(%esp), %eax /* Wait for completion */
+	bswapl		%eax
+	testl		$~FW_CFG_DMA_CTL_ERROR, %eax
+	jnz		1b
+       addl            $16, %esp
+.endm
+
+
+/*
+ * Read a blob from the fw_cfg device using DMA
+ * Requires _ADDR, _SIZE and _DATA values for the parameter.
+ *
+ * Clobbers:	%eax, %edx, %es, %ecx, %edi and adresses %esp-20 to %esp
+ */
+#ifdef USE_FW_CFG_DMA
+#define read_fw_blob_dma(var) \
+	read_fw		var ## _SIZE; \
+	mov		%eax, %ecx; \
+	read_fw		var ## _ADDR; \
+	mov		%eax, %edi ;\
+	read_fw_dma	var ## _DATA, %ecx, %edi
+#else
+#define read_fw_blob_dma(var) read_fw_blob(var)
+#endif
+
 #define read_fw_blob_pre(var)				\
 	read_fw		var ## _SIZE;			\
 	mov		%eax, %ecx;			\
diff --git a/pc-bios/palcode-clipper b/pc-bios/palcode-clipper
index b7dbc5d88c1..aecb3040364 100644
Binary files a/pc-bios/palcode-clipper and b/pc-bios/palcode-clipper differ
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 5aaeac9f79c..a560c1272f6 100644
Binary files a/pc-bios/s390-ccw.img and b/pc-bios/s390-ccw.img differ
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 29fd9019b83..cee9d2c63bd 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -6,8 +6,8 @@ include ../../config-host.mak
 CFLAGS = -O2 -g
 
 quiet-command = $(if $(V),$1,$(if $(2),@printf "  %-7s %s\n" $2 $3 && $1, @$1))
-cc-option = $(if $(shell $(CC) $1 -S -o /dev/null -xc /dev/null > /dev/null \
-	      2>&1 && echo OK), $1, $2)
+cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \
+			 >/dev/null 2>&1 && echo OK),$2,$3)
 
 VPATH_SUFFIXES = %.c %.h %.S %.m %.mak %.sh %.rc Kconfig% %.json.in
 set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1)))
@@ -30,10 +30,12 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \
 	  virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o
 
 QEMU_CFLAGS := -Wall $(filter -W%, $(QEMU_CFLAGS))
+QEMU_CFLAGS += $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-stringop-overflow)
 QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE
 QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables
 QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector)
-QEMU_CFLAGS += -msoft-float -march=z900
+QEMU_CFLAGS += -msoft-float
+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS),-march=z900,-march=z10)
 QEMU_CFLAGS += -std=gnu99
 LDFLAGS += -Wl,-pie -nostdlib
 
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 44df7d16afc..56411ab3b67 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -213,7 +213,7 @@ static int eckd_get_boot_menu_index(block_number_t s1b_block_nr)
                 next_block_nr = eckd_block_num(&s1b->seek[i + 1].chs);
             }
 
-            if (next_block_nr) {
+            if (next_block_nr && !is_null_block_number(next_block_nr)) {
                 read_block(next_block_nr, s2_next_blk,
                            "Cannot read stage2 boot loader");
             }
@@ -299,7 +299,7 @@ static void ipl_eckd_cdl(void)
         sclp_print("Bad block size in zIPL section of IPL2 record.\n");
         return;
     }
-    if (!mbr->dev_type == DEV_TYPE_ECKD) {
+    if (mbr->dev_type != DEV_TYPE_ECKD) {
         sclp_print("Non-ECKD device type in zIPL section of IPL2 record.\n");
         return;
     }
diff --git a/pc-bios/s390-ccw/dasd-ipl.c b/pc-bios/s390-ccw/dasd-ipl.c
index 71cbae2f16e..254bb1a15e3 100644
--- a/pc-bios/s390-ccw/dasd-ipl.c
+++ b/pc-bios/s390-ccw/dasd-ipl.c
@@ -205,7 +205,7 @@ static void run_ipl2(SubChannelId schid, uint16_t cutype, uint32_t addr)
 
 /*
  * Limitations in vfio-ccw support complicate the IPL process. Details can
- * be found in docs/devel/s390-dasd-ipl.txt
+ * be found in docs/devel/s390-dasd-ipl.rst
  */
 void dasd_ipl(SubChannelId schid, uint16_t cutype)
 {
diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h
index dfcfea0ff0c..3d0731c4c60 100644
--- a/pc-bios/s390-ccw/helper.h
+++ b/pc-bios/s390-ccw/helper.h
@@ -31,7 +31,7 @@ static inline void *u32toptr(uint32_t n)
 
 static inline void yield(void)
 {
-    asm volatile ("diag 0,0,0x44"
+    asm volatile ("diag %%r0,%%r0,0x44"
                   : :
                   : "memory", "cc");
 }
diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c
index b9c70d64a5d..78f5f46533a 100644
--- a/pc-bios/s390-ccw/jump2ipl.c
+++ b/pc-bios/s390-ccw/jump2ipl.c
@@ -64,8 +64,8 @@ void jump_to_IPL_code(uint64_t address)
      * We use the load normal reset to keep r15 unchanged. jump_to_IPL_2
      * can then use r15 as its stack pointer.
      */
-    asm volatile("lghi 1,1\n\t"
-                 "diag 1,1,0x308\n\t"
+    asm volatile("lghi %%r1,1\n\t"
+                 "diag %%r1,%%r1,0x308\n\t"
                  : : : "1", "memory");
     panic("\n! IPL returns !\n");
 }
@@ -82,8 +82,8 @@ void jump_to_low_kernel(void)
         jump_to_IPL_code(KERN_IMAGE_START);
     }
 
-    /* Trying to get PSW at zero address */
-    if (*((uint64_t *)0) & RESET_PSW_MASK) {
+    /* Trying to get PSW at zero address (pointed to by reset_psw) */
+    if (*reset_psw & RESET_PSW_MASK) {
         /*
          * Surely nobody will try running directly from lowcore, so
          * let's use 0 as an indication that we want to load the reset
diff --git a/pc-bios/s390-ccw/menu.c b/pc-bios/s390-ccw/menu.c
index de8260a5d6c..d601952d3e1 100644
--- a/pc-bios/s390-ccw/menu.c
+++ b/pc-bios/s390-ccw/menu.c
@@ -36,9 +36,9 @@ static inline void enable_clock_int(void)
     uint64_t tmp = 0;
 
     asm volatile(
-        "stctg      0,0,%0\n"
+        "stctg      %%c0,%%c0,%0\n"
         "oi         6+%0, 0x8\n"
-        "lctlg      0,0,%0"
+        "lctlg      %%c0,%%c0,%0"
         : : "Q" (tmp) : "memory"
     );
 }
@@ -48,9 +48,9 @@ static inline void disable_clock_int(void)
     uint64_t tmp = 0;
 
     asm volatile(
-        "stctg      0,0,%0\n"
+        "stctg      %%c0,%%c0,%0\n"
         "ni         6+%0, 0xf7\n"
-        "lctlg      0,0,%0"
+        "lctlg      %%c0,%%c0,%0"
         : : "Q" (tmp) : "memory"
     );
 }
diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak
index 577c023afe3..68b4d7edcb2 100644
--- a/pc-bios/s390-ccw/netboot.mak
+++ b/pc-bios/s390-ccw/netboot.mak
@@ -6,7 +6,7 @@ NETOBJS := start.o sclp.o cio.o virtio.o virtio-net.o jump2ipl.o netmain.o
 LIBC_INC := -nostdinc -I$(SLOF_DIR)/lib/libc/include
 LIBNET_INC := -I$(SLOF_DIR)/lib/libnet
 
-NETLDFLAGS := $(LDFLAGS) -Ttext=0x7800000
+NETLDFLAGS := $(LDFLAGS) -Wl,-Ttext=0x7800000
 
 $(NETOBJS): QEMU_CFLAGS += $(LIBC_INC) $(LIBNET_INC)
 
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 6cd92669e96..79db69ff542 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -89,6 +89,7 @@ bool menu_is_enabled_enum(void);
 
 #define MAX_BOOT_ENTRIES  31
 
+__attribute__ ((__noreturn__))
 static inline void panic(const char *string)
 {
     sclp_print(string);
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index ab49840db85..5d2c6e33816 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -54,7 +54,7 @@ static long kvm_hypercall(unsigned long nr, unsigned long param1,
     register ulong r_param3 asm("4") = param3;
     register long retval asm("2");
 
-    asm volatile ("diag 2,4,0x500"
+    asm volatile ("diag %%r2,%%r4,0x500"
                   : "=d" (retval)
                   : "d" (r_nr), "0" (r_param1), "r"(r_param2), "d"(r_param3)
                   : "memory", "cc");
diff --git a/pc-bios/s390-netboot.img b/pc-bios/s390-netboot.img
index 120bd40ca90..bc34af8a287 100644
Binary files a/pc-bios/s390-netboot.img and b/pc-bios/s390-netboot.img differ
diff --git a/pc-bios/skiboot.lid b/pc-bios/skiboot.lid
index 504b95e8b66..8a3c278512a 100644
Binary files a/pc-bios/skiboot.lid and b/pc-bios/skiboot.lid differ
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 3f3918a9e14..855521e650e 100644
Binary files a/pc-bios/slof.bin and b/pc-bios/slof.bin differ
diff --git a/pc-bios/u-boot.e500 b/pc-bios/u-boot.e500
index 732660f348f..8e635c8a5c9 100644
Binary files a/pc-bios/u-boot.e500 and b/pc-bios/u-boot.e500 differ
diff --git a/pc-bios/vgabios-ati.bin b/pc-bios/vgabios-ati.bin
index 118caacde6a..fe497ea4f29 100644
Binary files a/pc-bios/vgabios-ati.bin and b/pc-bios/vgabios-ati.bin differ
diff --git a/pc-bios/vgabios-bochs-display.bin b/pc-bios/vgabios-bochs-display.bin
index d79124b7601..7ca04402e97 100644
Binary files a/pc-bios/vgabios-bochs-display.bin and b/pc-bios/vgabios-bochs-display.bin differ
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 37bf5e7fe72..020dfe6c56e 100644
Binary files a/pc-bios/vgabios-cirrus.bin and b/pc-bios/vgabios-cirrus.bin differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index 0ff8ff21f00..0cc20678a3f 100644
Binary files a/pc-bios/vgabios-qxl.bin and b/pc-bios/vgabios-qxl.bin differ
diff --git a/pc-bios/vgabios-ramfb.bin b/pc-bios/vgabios-ramfb.bin
index df5e9d615a2..a7c5ae7c8f5 100644
Binary files a/pc-bios/vgabios-ramfb.bin and b/pc-bios/vgabios-ramfb.bin differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index 70f60941533..abeea373d36 100644
Binary files a/pc-bios/vgabios-stdvga.bin and b/pc-bios/vgabios-stdvga.bin differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index 65af5be59d1..806647a009f 100644
Binary files a/pc-bios/vgabios-virtio.bin and b/pc-bios/vgabios-virtio.bin differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index 89d42b32c1f..39e3093667d 100644
Binary files a/pc-bios/vgabios-vmware.bin and b/pc-bios/vgabios-vmware.bin differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 320471e18d3..c3a66af50e4 100644
Binary files a/pc-bios/vgabios.bin and b/pc-bios/vgabios.bin differ
diff --git a/pc-bios/vof-nvram.bin b/pc-bios/vof-nvram.bin
new file mode 100644
index 00000000000..d183901cf98
Binary files /dev/null and b/pc-bios/vof-nvram.bin differ
diff --git a/pc-bios/vof.bin b/pc-bios/vof.bin
new file mode 100755
index 00000000000..300cb7c7f9d
Binary files /dev/null and b/pc-bios/vof.bin differ
diff --git a/pc-bios/vof/Makefile b/pc-bios/vof/Makefile
new file mode 100644
index 00000000000..aa1678c4d88
--- /dev/null
+++ b/pc-bios/vof/Makefile
@@ -0,0 +1,23 @@
+all: build-all
+
+build-all: vof.bin
+
+CROSS ?=
+CC = $(CROSS)gcc
+LD = $(CROSS)ld
+OBJCOPY = $(CROSS)objcopy
+
+%.o: %.S
+	$(CC) -m32 -mbig-endian -mcpu=power4 -c -o $@ $<
+
+%.o: %.c
+	$(CC) -m32 -mbig-endian -mcpu=power4 -c -fno-stack-protector -o $@ $<
+
+vof.elf: entry.o main.o ci.o bootmem.o libc.o
+	$(LD) -nostdlib -e_start -Tvof.lds -EB -o $@ $^
+
+%.bin: %.elf
+	$(OBJCOPY) -O binary -j .text -j .data -j .toc -j .got2 $^ $@
+
+clean:
+	rm -f *.o vof.bin vof.elf *~
diff --git a/pc-bios/vof/bootmem.c b/pc-bios/vof/bootmem.c
new file mode 100644
index 00000000000..771b9e95f95
--- /dev/null
+++ b/pc-bios/vof/bootmem.c
@@ -0,0 +1,14 @@
+#include "vof.h"
+
+void boot_from_memory(uint64_t initrd, uint64_t initrdsize)
+{
+    uint64_t kern[2];
+    phandle chosen = ci_finddevice("/chosen");
+
+    if (ci_getprop(chosen, "qemu,boot-kernel", kern, sizeof(kern)) !=
+        sizeof(kern)) {
+        return;
+    }
+
+    do_boot(kern[0], initrd, initrdsize);
+}
diff --git a/pc-bios/vof/ci.c b/pc-bios/vof/ci.c
new file mode 100644
index 00000000000..fc4821b3e97
--- /dev/null
+++ b/pc-bios/vof/ci.c
@@ -0,0 +1,91 @@
+#include "vof.h"
+
+struct prom_args {
+    uint32_t service;
+    uint32_t nargs;
+    uint32_t nret;
+    uint32_t args[10];
+};
+
+typedef unsigned long prom_arg_t;
+
+#define ADDR(x) (uint32_t)(x)
+
+static int prom_handle(struct prom_args *pargs)
+{
+    void *rtasbase;
+    uint32_t rtassize = 0;
+    phandle rtas;
+
+    if (strcmp("call-method", (void *)(unsigned long)pargs->service)) {
+        return -1;
+    }
+
+    if (strcmp("instantiate-rtas", (void *)(unsigned long)pargs->args[0])) {
+        return -1;
+    }
+
+    rtas = ci_finddevice("/rtas");
+    /* rtas-size is set by QEMU depending of FWNMI support */
+    ci_getprop(rtas, "rtas-size", &rtassize, sizeof(rtassize));
+    if (rtassize < hv_rtas_size) {
+        return -1;
+    }
+
+    rtasbase = (void *)(unsigned long) pargs->args[2];
+
+    memcpy(rtasbase, hv_rtas, hv_rtas_size);
+    pargs->args[pargs->nargs] = 0;
+    pargs->args[pargs->nargs + 1] = pargs->args[2];
+
+    return 0;
+}
+
+void prom_entry(uint32_t args)
+{
+    if (prom_handle((void *)(unsigned long) args)) {
+        ci_entry(args);
+    }
+}
+
+static int call_ci(const char *service, int nargs, int nret, ...)
+{
+    int i;
+    struct prom_args args;
+    va_list list;
+
+    args.service = ADDR(service);
+    args.nargs = nargs;
+    args.nret = nret;
+
+    va_start(list, nret);
+    for (i = 0; i < nargs; i++) {
+        args.args[i] = va_arg(list, prom_arg_t);
+    }
+    va_end(list);
+
+    for (i = 0; i < nret; i++) {
+        args.args[nargs + i] = 0;
+    }
+
+    if (ci_entry((uint32_t)(&args)) < 0) {
+        return -1;
+    }
+
+    return (nret > 0) ? args.args[nargs] : 0;
+}
+
+void ci_panic(const char *str)
+{
+    call_ci("exit", 0, 0);
+}
+
+phandle ci_finddevice(const char *path)
+{
+    return call_ci("finddevice", 1, 1, path);
+}
+
+uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len)
+{
+    return call_ci("getprop", 4, 1, ph, propname, prop, len);
+}
diff --git a/pc-bios/vof/entry.S b/pc-bios/vof/entry.S
new file mode 100644
index 00000000000..10a101fb6d7
--- /dev/null
+++ b/pc-bios/vof/entry.S
@@ -0,0 +1,49 @@
+#define LOAD32(rn, name)    \
+	lis     rn,name##@h;    \
+	ori     rn,rn,name##@l
+
+#define ENTRY(func_name)    \
+	.text;                  \
+	.align  2;              \
+	.globl  .func_name;     \
+	.func_name:             \
+	.globl  func_name;      \
+	func_name:
+
+#define KVMPPC_HCALL_BASE       0xf000
+#define KVMPPC_H_RTAS           (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT     (KVMPPC_HCALL_BASE + 0x5)
+
+	. = 0x100 /* Do exactly as SLOF does */
+
+ENTRY(_start)
+	LOAD32(2, __toc_start)
+	b entry_c
+
+ENTRY(_prom_entry)
+	LOAD32(2, __toc_start)
+	stwu    %r1,-112(%r1)
+	stw     %r31,104(%r1)
+	mflr    %r31
+	bl prom_entry
+	nop
+	mtlr    %r31
+	lwz     %r31,104(%r1)
+	addi    %r1,%r1,112
+	blr
+
+ENTRY(ci_entry)
+	mr	4,3
+	LOAD32(3,KVMPPC_H_VOF_CLIENT)
+	sc	1
+	blr
+
+/* This is the actual RTAS blob copied to the OS at instantiate-rtas */
+ENTRY(hv_rtas)
+	mr      %r4,%r3
+	LOAD32(3,KVMPPC_H_RTAS)
+	sc	1
+	blr
+	.globl hv_rtas_size
+hv_rtas_size:
+	.long . - hv_rtas;
diff --git a/pc-bios/vof/libc.c b/pc-bios/vof/libc.c
new file mode 100644
index 00000000000..fdbc30f777d
--- /dev/null
+++ b/pc-bios/vof/libc.c
@@ -0,0 +1,66 @@
+#include "vof.h"
+
+int strlen(const char *s)
+{
+    int len = 0;
+
+    while (*s != 0) {
+        len += 1;
+        s += 1;
+    }
+
+    return len;
+}
+
+int strcmp(const char *s1, const char *s2)
+{
+    while (*s1 != 0 && *s2 != 0) {
+        if (*s1 != *s2) {
+            break;
+        }
+        s1 += 1;
+        s2 += 1;
+    }
+
+    return *s1 - *s2;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    char *cdest;
+    const char *csrc = src;
+
+    cdest = dest;
+    while (n-- > 0) {
+        *cdest++ = *csrc++;
+    }
+
+    return dest;
+}
+
+int memcmp(const void *ptr1, const void *ptr2, size_t n)
+{
+    const unsigned char *p1 = ptr1;
+    const unsigned char *p2 = ptr2;
+
+    while (n-- > 0) {
+        if (*p1 != *p2) {
+            return *p1 - *p2;
+        }
+        p1 += 1;
+        p2 += 1;
+    }
+
+    return 0;
+}
+
+void *memset(void *dest, int c, size_t size)
+{
+    unsigned char *d = (unsigned char *)dest;
+
+    while (size-- > 0) {
+        *d++ = (unsigned char)c;
+    }
+
+    return dest;
+}
diff --git a/pc-bios/vof/main.c b/pc-bios/vof/main.c
new file mode 100644
index 00000000000..0f0f6b4cb19
--- /dev/null
+++ b/pc-bios/vof/main.c
@@ -0,0 +1,21 @@
+#include "vof.h"
+
+void do_boot(unsigned long addr, unsigned long _r3, unsigned long _r4)
+{
+    register unsigned long r3 __asm__("r3") = _r3;
+    register unsigned long r4 __asm__("r4") = _r4;
+    register unsigned long r5 __asm__("r5") = (unsigned long) _prom_entry;
+
+    ((void (*)(void))(uint32_t)addr)();
+}
+
+void entry_c(void)
+{
+    register unsigned long r3 __asm__("r3");
+    register unsigned long r4 __asm__("r4");
+    register unsigned long r5 __asm__("r5");
+    uint64_t initrd = r3, initrdsize = r4;
+
+    boot_from_memory(initrd, initrdsize);
+    ci_panic("*** No boot target ***\n");
+}
diff --git a/pc-bios/vof/vof.h b/pc-bios/vof/vof.h
new file mode 100644
index 00000000000..5f12c077f51
--- /dev/null
+++ b/pc-bios/vof/vof.h
@@ -0,0 +1,41 @@
+/*
+ * Virtual Open Firmware
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include <stdarg.h>
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+#define NULL (0)
+typedef unsigned long ihandle;
+typedef unsigned long phandle;
+typedef int size_t;
+
+/* globals */
+extern void _prom_entry(void); /* OF CI entry point (i.e. this firmware) */
+
+void do_boot(unsigned long addr, unsigned long r3, unsigned long r4);
+
+/* libc */
+int strlen(const char *s);
+int strcmp(const char *s1, const char *s2);
+void *memcpy(void *dest, const void *src, size_t n);
+int memcmp(const void *ptr1, const void *ptr2, size_t n);
+void *memmove(void *dest, const void *src, size_t n);
+void *memset(void *dest, int c, size_t size);
+
+/* CI wrappers */
+void ci_panic(const char *str);
+phandle ci_finddevice(const char *path);
+uint32_t ci_getprop(phandle ph, const char *propname, void *prop, int len);
+
+/* booting from -kernel */
+void boot_from_memory(uint64_t initrd, uint64_t initrdsize);
+
+/* Entry points for CI and RTAS */
+extern uint32_t ci_entry(uint32_t params);
+extern unsigned long hv_rtas(unsigned long params);
+extern unsigned int hv_rtas_size;
diff --git a/pc-bios/vof/vof.lds b/pc-bios/vof/vof.lds
new file mode 100644
index 00000000000..1506ab4b018
--- /dev/null
+++ b/pc-bios/vof/vof.lds
@@ -0,0 +1,48 @@
+OUTPUT_FORMAT("elf32-powerpc")
+OUTPUT_ARCH(powerpc:common)
+
+/* set the entry point */
+ENTRY ( __start )
+
+SECTIONS {
+	__executable_start = .;
+
+	.text : {
+		*(.text)
+	}
+
+	__etext = .;
+
+	. = ALIGN(8);
+
+	.data : {
+		*(.data)
+		*(.rodata .rodata.*)
+		*(.got1)
+		*(.sdata)
+		*(.opd)
+	}
+
+	/* FIXME bss at end ??? */
+
+	. = ALIGN(8);
+	__bss_start = .;
+	.bss : {
+		*(.sbss) *(.scommon)
+		*(.dynbss)
+		*(.bss)
+	}
+
+	. = ALIGN(8);
+	__bss_end = .;
+	__bss_size = (__bss_end - __bss_start);
+
+	. = ALIGN(256);
+	__toc_start = DEFINED (.TOC.) ? .TOC. : ADDR (.got) + 0x8000;
+	.got :
+	{
+		 *(.toc .got)
+	}
+	. = ALIGN(8);
+	__toc_end = .;
+}
diff --git a/plugins/api.c b/plugins/api.c
index b22998cd7c9..b143b09ce9b 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -36,8 +36,6 @@
 
 #include "qemu/osdep.h"
 #include "qemu/plugin.h"
-#include "cpu.h"
-#include "sysemu/sysemu.h"
 #include "tcg/tcg.h"
 #include "exec/exec-all.h"
 #include "exec/ram_addr.h"
@@ -47,7 +45,6 @@
 #include "qemu/plugin-memory.h"
 #include "hw/boards.h"
 #endif
-#include "trace/mem.h"
 
 /* Uninstall and Reset handlers */
 
@@ -235,6 +232,12 @@ char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
     return plugin_disas(cpu, insn->vaddr, insn->data->len);
 }
 
+const char *qemu_plugin_insn_symbol(const struct qemu_plugin_insn *insn)
+{
+    const char *sym = lookup_symbol(insn->vaddr);
+    return sym[0] != 0 ? sym : NULL;
+}
+
 /*
  * The memory queries allow the plugin to query information about a
  * memory access.
@@ -242,22 +245,25 @@ char *qemu_plugin_insn_disas(const struct qemu_plugin_insn *insn)
 
 unsigned qemu_plugin_mem_size_shift(qemu_plugin_meminfo_t info)
 {
-    return info & TRACE_MEM_SZ_SHIFT_MASK;
+    MemOp op = get_memop(info);
+    return op & MO_SIZE;
 }
 
 bool qemu_plugin_mem_is_sign_extended(qemu_plugin_meminfo_t info)
 {
-    return !!(info & TRACE_MEM_SE);
+    MemOp op = get_memop(info);
+    return op & MO_SIGN;
 }
 
 bool qemu_plugin_mem_is_big_endian(qemu_plugin_meminfo_t info)
 {
-    return !!(info & TRACE_MEM_BE);
+    MemOp op = get_memop(info);
+    return (op & MO_BSWAP) == MO_BE;
 }
 
 bool qemu_plugin_mem_is_store(qemu_plugin_meminfo_t info)
 {
-    return !!(info & TRACE_MEM_ST);
+    return get_plugin_meminfo_rw(info) & QEMU_PLUGIN_MEM_W;
 }
 
 /*
@@ -273,11 +279,12 @@ struct qemu_plugin_hwaddr *qemu_plugin_get_hwaddr(qemu_plugin_meminfo_t info,
 {
 #ifdef CONFIG_SOFTMMU
     CPUState *cpu = current_cpu;
-    unsigned int mmu_idx = info >> TRACE_MEM_MMU_SHIFT;
-    hwaddr_info.is_store = info & TRACE_MEM_ST;
+    unsigned int mmu_idx = get_mmuidx(info);
+    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
+    hwaddr_info.is_store = (rw & QEMU_PLUGIN_MEM_W) != 0;
 
     if (!tlb_plugin_lookup(cpu, vaddr, mmu_idx,
-                           info & TRACE_MEM_ST, &hwaddr_info)) {
+                           hwaddr_info.is_store, &hwaddr_info)) {
         error_report("invalid use of qemu_plugin_get_hwaddr");
         return NULL;
     }
@@ -304,18 +311,18 @@ uint64_t qemu_plugin_hwaddr_phys_addr(const struct qemu_plugin_hwaddr *haddr)
         if (!haddr->is_io) {
             RAMBlock *block;
             ram_addr_t offset;
-            void *hostaddr = (void *) haddr->v.ram.hostaddr;
+            void *hostaddr = haddr->v.ram.hostaddr;
 
             block = qemu_ram_block_from_host(hostaddr, false, &offset);
             if (!block) {
-                error_report("Bad ram pointer %"PRIx64"", haddr->v.ram.hostaddr);
+                error_report("Bad host ram pointer %p", haddr->v.ram.hostaddr);
                 abort();
             }
 
             return block->offset + offset + block->mr->addr;
         } else {
             MemoryRegionSection *mrs = haddr->v.io.section;
-            return haddr->v.io.offset + mrs->mr->addr;
+            return mrs->offset_within_address_space + haddr->v.io.offset;
         }
     }
 #endif
@@ -379,3 +386,8 @@ void qemu_plugin_outs(const char *string)
 {
     qemu_log_mask(CPU_LOG_PLUGIN, "%s", string);
 }
+
+bool qemu_plugin_bool_parse(const char *name, const char *value, bool *ret)
+{
+    return name && value && qapi_bool_parse(name, value, ret, NULL);
+}
diff --git a/plugins/core.c b/plugins/core.c
index 87b823bbc47..792262da083 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -23,13 +23,10 @@
 #include "hw/core/cpu.h"
 #include "exec/cpu-common.h"
 
-#include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
-#include "sysemu/sysemu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
-#include "trace/mem-internal.h" /* mem_info macros */
 #include "plugin.h"
 #include "qemu/compiler.h"
 
@@ -297,33 +294,15 @@ void plugin_register_inline_op(GArray **arr,
     dyn_cb->inline_insn.imm = imm;
 }
 
-static inline uint32_t cb_to_tcg_flags(enum qemu_plugin_cb_flags flags)
-{
-    uint32_t ret;
-
-    switch (flags) {
-    case QEMU_PLUGIN_CB_RW_REGS:
-        ret = 0;
-        break;
-    case QEMU_PLUGIN_CB_R_REGS:
-        ret = TCG_CALL_NO_WG;
-        break;
-    case QEMU_PLUGIN_CB_NO_REGS:
-    default:
-        ret = TCG_CALL_NO_RWG;
-    }
-    return ret;
-}
-
-inline void
-plugin_register_dyn_cb__udata(GArray **arr,
-                              qemu_plugin_vcpu_udata_cb_t cb,
-                              enum qemu_plugin_cb_flags flags, void *udata)
+void plugin_register_dyn_cb__udata(GArray **arr,
+                                   qemu_plugin_vcpu_udata_cb_t cb,
+                                   enum qemu_plugin_cb_flags flags,
+                                   void *udata)
 {
     struct qemu_plugin_dyn_cb *dyn_cb = plugin_get_dyn_cb(arr);
 
     dyn_cb->userp = udata;
-    dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
+    /* Note flags are discarded as unused. */
     dyn_cb->f.vcpu_udata = cb;
     dyn_cb->type = PLUGIN_CB_REGULAR;
 }
@@ -338,7 +317,7 @@ void plugin_register_vcpu_mem_cb(GArray **arr,
 
     dyn_cb = plugin_get_dyn_cb(arr);
     dyn_cb->userp = udata;
-    dyn_cb->tcg_flags = cb_to_tcg_flags(flags);
+    /* Note flags are discarded as unused. */
     dyn_cb->type = PLUGIN_CB_REGULAR;
     dyn_cb->rw = rw;
     dyn_cb->f.generic = cb;
@@ -466,7 +445,8 @@ void exec_inline_op(struct qemu_plugin_dyn_cb *cb)
     }
 }
 
-void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t info)
+void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
 {
     GArray *arr = cpu->plugin_mem_cbs;
     size_t i;
@@ -477,14 +457,14 @@ void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr, uint32_t info)
     for (i = 0; i < arr->len; i++) {
         struct qemu_plugin_dyn_cb *cb =
             &g_array_index(arr, struct qemu_plugin_dyn_cb, i);
-        int w = !!(info & TRACE_MEM_ST) + 1;
 
-        if (!(w & cb->rw)) {
+        if (!(rw & cb->rw)) {
                 break;
         }
         switch (cb->type) {
         case PLUGIN_CB_REGULAR:
-            cb->f.vcpu_mem(cpu->cpu_index, info, vaddr, cb->userp);
+            cb->f.vcpu_mem(cpu->cpu_index, make_plugin_meminfo(oi, rw),
+                           vaddr, cb->userp);
             break;
         case PLUGIN_CB_INLINE:
             exec_inline_op(cb);
@@ -507,6 +487,45 @@ void qemu_plugin_register_atexit_cb(qemu_plugin_id_t id,
     plugin_register_cb_udata(id, QEMU_PLUGIN_EV_ATEXIT, cb, udata);
 }
 
+/*
+ * Handle exit from linux-user. Unlike the normal atexit() mechanism
+ * we need to handle the clean-up manually as it's possible threads
+ * are still running. We need to remove all callbacks from code
+ * generation, flush the current translations and then we can safely
+ * trigger the exit callbacks.
+ */
+
+void qemu_plugin_user_exit(void)
+{
+    enum qemu_plugin_event ev;
+    CPUState *cpu;
+
+    QEMU_LOCK_GUARD(&plugin.lock);
+
+    start_exclusive();
+
+    /* un-register all callbacks except the final AT_EXIT one */
+    for (ev = 0; ev < QEMU_PLUGIN_EV_MAX; ev++) {
+        if (ev != QEMU_PLUGIN_EV_ATEXIT) {
+            struct qemu_plugin_ctx *ctx;
+            QTAILQ_FOREACH(ctx, &plugin.ctxs, entry) {
+                plugin_unregister_cb__locked(ctx, ev);
+            }
+        }
+    }
+
+    tb_flush(current_cpu);
+
+    CPU_FOREACH(cpu) {
+        qemu_plugin_disable_mem_helpers(cpu);
+    }
+
+    end_exclusive();
+
+    /* now it's safe to handle the exit case */
+    qemu_plugin_atexit_cb();
+}
+
 /*
  * Call this function after longjmp'ing to the main loop. It's possible that the
  * last instruction of a TB might have used helpers, and therefore the
diff --git a/plugins/loader.c b/plugins/loader.c
index 8550e61184c..a4ec2816922 100644
--- a/plugins/loader.c
+++ b/plugins/loader.c
@@ -27,7 +27,6 @@
 #include "qemu/xxhash.h"
 #include "qemu/plugin.h"
 #include "hw/core/cpu.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 #ifndef CONFIG_USER_ONLY
 #include "hw/boards.h"
@@ -95,6 +94,8 @@ static int plugin_add(void *opaque, const char *name, const char *value,
 {
     struct qemu_plugin_parse_arg *arg = opaque;
     struct qemu_plugin_desc *p;
+    bool is_on;
+    char *fullarg;
 
     if (strcmp(name, "file") == 0) {
         if (strcmp(value, "") == 0) {
@@ -108,18 +109,32 @@ static int plugin_add(void *opaque, const char *name, const char *value,
             QTAILQ_INSERT_TAIL(arg->head, p, entry);
         }
         arg->curr = p;
-    } else if (strcmp(name, "arg") == 0) {
+    } else {
         if (arg->curr == NULL) {
             error_setg(errp, "missing earlier '-plugin file=' option");
             return 1;
         }
+
+        if (g_strcmp0(name, "arg") == 0 &&
+                !qapi_bool_parse(name, value, &is_on, NULL)) {
+            if (strchr(value, '=') == NULL) {
+                /* Will treat arg="argname" as "argname=on" */
+                fullarg = g_strdup_printf("%s=%s", value, "on");
+            } else {
+                fullarg = g_strdup_printf("%s", value);
+            }
+            warn_report("using 'arg=%s' is deprecated", value);
+            error_printf("Please use '%s' directly\n", fullarg);
+        } else {
+            fullarg = g_strdup_printf("%s=%s", name, value);
+        }
+
         p = arg->curr;
         p->argc++;
         p->argv = g_realloc_n(p->argv, p->argc, sizeof(char *));
-        p->argv[p->argc - 1] = g_strdup(value);
-    } else {
-        error_setg(errp, "-plugin: unexpected parameter '%s'; ignored", name);
+        p->argv[p->argc - 1] = fullarg;
     }
+
     return 0;
 }
 
diff --git a/plugins/meson.build b/plugins/meson.build
index e77723010e6..b3de57853bf 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -1,9 +1,11 @@
-if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host
-  plugin_ldflags = ['-Wl,--dynamic-list=' + (meson.build_root() / 'qemu-plugins-ld.symbols')]
-elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host
-  plugin_ldflags = ['-Wl,-exported_symbols_list,' + (meson.build_root() / 'qemu-plugins-ld64.symbols')]
-else
-  plugin_ldflags = []
+plugin_ldflags = []
+# Modules need more symbols than just those in plugins/qemu-plugins.symbols
+if not enable_modules
+  if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host
+    plugin_ldflags = ['-Wl,--dynamic-list=qemu-plugins-ld.symbols']
+  elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host
+    plugin_ldflags = ['-Wl,-exported_symbols_list,qemu-plugins-ld64.symbols']
+  endif
 endif
 
 specific_ss.add(when: 'CONFIG_PLUGIN', if_true: [files(
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 1aa29dcaddf..b13677d0dc2 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -9,10 +9,11 @@
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
 
-#ifndef _PLUGIN_INTERNAL_H_
-#define _PLUGIN_INTERNAL_H_
+#ifndef PLUGIN_INTERNAL_H
+#define PLUGIN_INTERNAL_H
 
 #include <gmodule.h>
+#include "qemu/qht.h"
 
 #define QEMU_PLUGIN_MIN_VERSION 0
 
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index 4bdb381f48e..4834756ba35 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -1,40 +1,41 @@
 {
-  qemu_plugin_uninstall;
-  qemu_plugin_reset;
-  qemu_plugin_register_vcpu_init_cb;
+  qemu_plugin_bool_parse;
+  qemu_plugin_get_hwaddr;
+  qemu_plugin_hwaddr_device_name;
+  qemu_plugin_hwaddr_is_io;
+  qemu_plugin_hwaddr_phys_addr;
+  qemu_plugin_insn_data;
+  qemu_plugin_insn_disas;
+  qemu_plugin_insn_haddr;
+  qemu_plugin_insn_size;
+  qemu_plugin_insn_symbol;
+  qemu_plugin_insn_vaddr;
+  qemu_plugin_mem_is_big_endian;
+  qemu_plugin_mem_is_sign_extended;
+  qemu_plugin_mem_is_store;
+  qemu_plugin_mem_size_shift;
+  qemu_plugin_n_max_vcpus;
+  qemu_plugin_n_vcpus;
+  qemu_plugin_outs;
+  qemu_plugin_register_atexit_cb;
+  qemu_plugin_register_flush_cb;
   qemu_plugin_register_vcpu_exit_cb;
   qemu_plugin_register_vcpu_idle_cb;
-  qemu_plugin_register_vcpu_resume_cb;
+  qemu_plugin_register_vcpu_init_cb;
   qemu_plugin_register_vcpu_insn_exec_cb;
   qemu_plugin_register_vcpu_insn_exec_inline;
   qemu_plugin_register_vcpu_mem_cb;
-  qemu_plugin_register_vcpu_mem_haddr_cb;
   qemu_plugin_register_vcpu_mem_inline;
-  qemu_plugin_ram_addr_from_host;
-  qemu_plugin_register_vcpu_tb_trans_cb;
-  qemu_plugin_register_vcpu_tb_exec_cb;
-  qemu_plugin_register_vcpu_tb_exec_inline;
-  qemu_plugin_register_flush_cb;
+  qemu_plugin_register_vcpu_resume_cb;
   qemu_plugin_register_vcpu_syscall_cb;
   qemu_plugin_register_vcpu_syscall_ret_cb;
-  qemu_plugin_register_atexit_cb;
-  qemu_plugin_tb_n_insns;
+  qemu_plugin_register_vcpu_tb_exec_cb;
+  qemu_plugin_register_vcpu_tb_exec_inline;
+  qemu_plugin_register_vcpu_tb_trans_cb;
+  qemu_plugin_reset;
   qemu_plugin_tb_get_insn;
+  qemu_plugin_tb_n_insns;
   qemu_plugin_tb_vaddr;
-  qemu_plugin_insn_data;
-  qemu_plugin_insn_size;
-  qemu_plugin_insn_vaddr;
-  qemu_plugin_insn_haddr;
-  qemu_plugin_insn_disas;
-  qemu_plugin_mem_size_shift;
-  qemu_plugin_mem_is_sign_extended;
-  qemu_plugin_mem_is_big_endian;
-  qemu_plugin_mem_is_store;
-  qemu_plugin_get_hwaddr;
-  qemu_plugin_hwaddr_is_io;
-  qemu_plugin_hwaddr_to_raddr;
+  qemu_plugin_uninstall;
   qemu_plugin_vcpu_for_each;
-  qemu_plugin_n_vcpus;
-  qemu_plugin_n_max_vcpus;
-  qemu_plugin_outs;
 };
diff --git a/po/tr.po b/po/tr.po
index 632c7f38513..f4f0425c431 100644
--- a/po/tr.po
+++ b/po/tr.po
@@ -1,14 +1,15 @@
 # Turkish translation for QEMU.
 # This file is put in the public domain.
 # Ozan Çağlayan <ozancag@gmail.com>, 2013.
+# Oğuz Ersen <oguzersen@protonmail.com>, 2021.
 #
 msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
 "POT-Creation-Date: 2018-07-18 07:56+0200\n"
-"PO-Revision-Date: 2013-04-22 18:35+0300\n"
-"Last-Translator: Ozan Çağlayan <ozancag@gmail.com>\n"
+"PO-Revision-Date: 2021-08-15 22:17+0300\n"
+"Last-Translator: Oğuz Ersen <oguzersen@protonmail.com>\n"
 "Language-Team: Türkçe <>\n"
 "Language: tr\n"
 "MIME-Version: 1.0\n"
@@ -33,24 +34,22 @@ msgid "Power _Down"
 msgstr "_Kapat"
 
 msgid "_Quit"
-msgstr ""
+msgstr "_Çıkış"
 
 msgid "_Fullscreen"
-msgstr ""
+msgstr "_Tam Ekran"
 
 msgid "_Copy"
-msgstr ""
+msgstr "_Kopyala"
 
-#, fuzzy
 msgid "Zoom _In"
-msgstr "Yakınlaş ve Sığ_dır"
+msgstr "_Yakınlaş"
 
-#, fuzzy
 msgid "Zoom _Out"
-msgstr "Yakınlaş ve Sığ_dır"
+msgstr "_Uzaklaş"
 
 msgid "Best _Fit"
-msgstr ""
+msgstr "_En Uygun"
 
 msgid "Zoom To _Fit"
 msgstr "Yakınlaş ve Sığ_dır"
@@ -62,13 +61,13 @@ msgid "_Grab Input"
 msgstr "Girdiyi _Yakala"
 
 msgid "Show _Tabs"
-msgstr "Se_kmeleri Göster"
+msgstr "_Sekmeleri Göster"
 
 msgid "Detach Tab"
-msgstr ""
+msgstr "Sekmeyi Ayır"
 
 msgid "Show Menubar"
-msgstr ""
+msgstr "Menü Çubuğunu Göster"
 
 msgid "_Machine"
 msgstr "_Makine"
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 00000000000..904f324bb11
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,22 @@
+# linter/tooling cache
+.mypy_cache/
+.cache/
+
+# python packaging
+build/
+dist/
+qemu.egg-info/
+
+# editor config
+.idea/
+.vscode/
+
+# virtual environments (pipenv et al)
+.venv/
+.tox/
+.dev-venv/
+
+# Coverage.py reports
+.coverage
+.coverage.*
+htmlcov/
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
new file mode 100644
index 00000000000..7059ad28221
--- /dev/null
+++ b/python/MANIFEST.in
@@ -0,0 +1,3 @@
+include VERSION
+include PACKAGE.rst
+exclude README.rst
diff --git a/python/Makefile b/python/Makefile
new file mode 100644
index 00000000000..33343113625
--- /dev/null
+++ b/python/Makefile
@@ -0,0 +1,111 @@
+QEMU_VENV_DIR=.dev-venv
+QEMU_TOX_EXTRA_ARGS ?=
+
+.PHONY: help
+help:
+	@echo "python packaging help:"
+	@echo ""
+	@echo "make check-pipenv:"
+	@echo "    Run tests in pipenv's virtual environment."
+	@echo "    These tests use the oldest dependencies."
+	@echo "    Requires: Python 3.6 and pipenv."
+	@echo "    Hint (Fedora): 'sudo dnf install python3.6 pipenv'"
+	@echo ""
+	@echo "make check-tox:"
+	@echo "    Run tests against multiple python versions."
+	@echo "    These tests use the newest dependencies."
+	@echo "    Requires: Python 3.6 - 3.10, and tox."
+	@echo "    Hint (Fedora): 'sudo dnf install python3-tox python3.10'"
+	@echo "    The variable QEMU_TOX_EXTRA_ARGS can be use to pass extra"
+	@echo "    arguments to tox".
+	@echo ""
+	@echo "make check-dev:"
+	@echo "    Run tests in a venv against your default python3 version."
+	@echo "    These tests use the newest dependencies."
+	@echo "    Requires: Python 3.x"
+	@echo ""
+	@echo "make check:"
+	@echo "    Run tests in your *current environment*."
+	@echo "    Performs no environment setup of any kind."
+	@echo ""
+	@echo "make develop:"
+	@echo "    Install deps needed for for 'make check',"
+	@echo "    and install the qemu package in editable mode."
+	@echo "    (Can be used in or outside of a venv.)"
+	@echo ""
+	@echo "make pipenv"
+	@echo "    Creates pipenv's virtual environment (.venv)"
+	@echo ""
+	@echo "make dev-venv"
+	@echo "    Creates a simple venv for check-dev. ($(QEMU_VENV_DIR))"
+	@echo ""
+	@echo "make clean:"
+	@echo "    Remove package build output."
+	@echo ""
+	@echo "make distclean:"
+	@echo "    remove pipenv/venv files, qemu package forwarder,"
+	@echo "    built distribution files, and everything from 'make clean'."
+	@echo ""
+	@echo -e "Have a nice day ^_^\n"
+
+.PHONY: pipenv
+pipenv: .venv
+.venv: Pipfile.lock
+	@PIPENV_VENV_IN_PROJECT=1 pipenv sync --dev --keep-outdated
+	rm -f pyproject.toml
+	@touch .venv
+
+.PHONY: check-pipenv
+check-pipenv: pipenv
+	@pipenv run make check
+
+.PHONY: dev-venv
+dev-venv: $(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate
+$(QEMU_VENV_DIR) $(QEMU_VENV_DIR)/bin/activate: setup.cfg
+	@echo "VENV $(QEMU_VENV_DIR)"
+	@python3 -m venv $(QEMU_VENV_DIR)
+	@(							\
+		echo "ACTIVATE $(QEMU_VENV_DIR)";		\
+		. $(QEMU_VENV_DIR)/bin/activate;		\
+		echo "INSTALL qemu[devel] $(QEMU_VENV_DIR)";	\
+		make develop 1>/dev/null;			\
+	)
+	@touch $(QEMU_VENV_DIR)
+
+.PHONY: check-dev
+check-dev: dev-venv
+	@(							\
+		echo "ACTIVATE $(QEMU_VENV_DIR)";		\
+		. $(QEMU_VENV_DIR)/bin/activate;		\
+		make check;					\
+	)
+
+.PHONY: develop
+develop:
+	pip3 install --disable-pip-version-check -e .[devel]
+
+.PHONY: check
+check:
+	@avocado --config avocado.cfg run tests/
+
+.PHONY: check-tox
+check-tox:
+	@tox $(QEMU_TOX_EXTRA_ARGS)
+
+.PHONY: check-coverage
+check-coverage:
+	@coverage run -m avocado --config avocado.cfg run tests/*.py
+	@coverage combine
+	@coverage html
+	@coverage report
+
+.PHONY: clean
+clean:
+	python3 setup.py clean --all
+	rm -f pyproject.toml
+
+.PHONY: distclean
+distclean: clean
+	rm -rf qemu.egg-info/ .venv/ .tox/ $(QEMU_VENV_DIR) dist/
+	rm -f .coverage .coverage.*
+	rm -rf htmlcov/
diff --git a/python/PACKAGE.rst b/python/PACKAGE.rst
new file mode 100644
index 00000000000..b0b86cc4c31
--- /dev/null
+++ b/python/PACKAGE.rst
@@ -0,0 +1,43 @@
+QEMU Python Tooling
+===================
+
+This package provides QEMU tooling used by the QEMU project to build,
+configure, and test QEMU. It is not a fully-fledged SDK and it is subject
+to change at any time.
+
+Usage
+-----
+
+The ``qemu.qmp`` subpackage provides a library for communicating with
+QMP servers. The ``qemu.machine`` subpackage offers rudimentary
+facilities for launching and managing QEMU processes. Refer to each
+package's documentation
+(``>>> help(qemu.qmp)``, ``>>> help(qemu.machine)``)
+for more information.
+
+Contributing
+------------
+
+This package is maintained by John Snow <jsnow@redhat.com> as part of
+the QEMU source tree. Contributions are welcome and follow the `QEMU
+patch submission process
+<https://wiki.qemu.org/Contribute/SubmitAPatch>`_, which involves
+sending patches to the QEMU development mailing list.
+
+John maintains a `GitLab staging branch
+<https://gitlab.com/jsnow/qemu/-/tree/python>`_, and there is an
+official `GitLab mirror <https://gitlab.com/qemu-project/qemu>`_.
+
+Please report bugs on the `QEMU issue tracker
+<https://gitlab.com/qemu-project/qemu/-/issues>`_ and tag ``@jsnow`` in
+the report.
+
+Optional packages necessary for running code quality analysis for this
+package can be installed with the optional dependency group "devel":
+``pip install qemu[devel]``.
+
+``make develop`` can be used to install this package in editable mode
+(to the current environment) *and* bring in testing dependencies in one
+command.
+
+``make check`` can be used to run the available tests.
diff --git a/python/Pipfile b/python/Pipfile
new file mode 100644
index 00000000000..e7acb8cefa4
--- /dev/null
+++ b/python/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+qemu = {editable = true, extras = ["devel"], path = "."}
+
+[packages]
+qemu = {editable = true,path = "."}
+
+[requires]
+python_version = "3.6"
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
new file mode 100644
index 00000000000..d2a7dbd88be
--- /dev/null
+++ b/python/Pipfile.lock
@@ -0,0 +1,335 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "784b327272db32403d5a488507853b5afba850ba26a5948e5b6a90c1baef2d9c"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.6"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "qemu": {
+            "editable": true,
+            "path": "."
+        }
+    },
+    "develop": {
+        "appdirs": {
+            "hashes": [
+                "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
+                "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"
+            ],
+            "version": "==1.4.4"
+        },
+        "astroid": {
+            "hashes": [
+                "sha256:09bdb456e02564731f8b5957cdd0c98a7f01d2db5e90eb1d794c353c28bfd705",
+                "sha256:6a8a51f64dae307f6e0c9db752b66a7951e282389d8362cc1d39a56f3feeb31d"
+            ],
+            "markers": "python_version ~= '3.6'",
+            "version": "==2.6.0"
+        },
+        "avocado-framework": {
+            "hashes": [
+                "sha256:244cb569f8eb4e50a22ac82e1a2b2bba2458999f4281efbe2651bd415d59c65b",
+                "sha256:6f15998b67ecd0e7dde790c4de4dd249d6df52dfe6d5cc4e2dd6596df51c3583"
+            ],
+            "index": "pypi",
+            "version": "==90.0"
+        },
+        "distlib": {
+            "hashes": [
+                "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736",
+                "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"
+            ],
+            "version": "==0.3.2"
+        },
+        "filelock": {
+            "hashes": [
+                "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
+                "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
+            ],
+            "version": "==3.0.12"
+        },
+        "flake8": {
+            "hashes": [
+                "sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670",
+                "sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2"
+            ],
+            "index": "pypi",
+            "version": "==3.6.0"
+        },
+        "fusepy": {
+            "hashes": [
+                "sha256:10f5c7f5414241bffecdc333c4d3a725f1d6605cae6b4eaf86a838ff49cdaf6c",
+                "sha256:a9f3a3699080ddcf0919fd1eb2cf743e1f5859ca54c2018632f939bdfac269ee"
+            ],
+            "index": "pypi",
+            "version": "==2.0.4"
+        },
+        "importlib-metadata": {
+            "hashes": [
+                "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83",
+                "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"
+            ],
+            "markers": "python_version < '3.8'",
+            "version": "==1.7.0"
+        },
+        "importlib-resources": {
+            "hashes": [
+                "sha256:54161657e8ffc76596c4ede7080ca68cb02962a2e074a2586b695a93a925d36e",
+                "sha256:e962bff7440364183203d179d7ae9ad90cb1f2b74dcb84300e88ecc42dca3351"
+            ],
+            "markers": "python_version < '3.7'",
+            "version": "==5.1.4"
+        },
+        "isort": {
+            "hashes": [
+                "sha256:408e4d75d84f51b64d0824894afee44469eba34a4caee621dc53799f80d71ccc",
+                "sha256:64022dea6a06badfa09b300b4dfe8ba968114a737919e8ed50aea1c288f078aa"
+            ],
+            "index": "pypi",
+            "version": "==5.1.2"
+        },
+        "lazy-object-proxy": {
+            "hashes": [
+                "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653",
+                "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61",
+                "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2",
+                "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837",
+                "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3",
+                "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43",
+                "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726",
+                "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3",
+                "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587",
+                "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8",
+                "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a",
+                "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd",
+                "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f",
+                "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad",
+                "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4",
+                "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b",
+                "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf",
+                "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981",
+                "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741",
+                "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e",
+                "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93",
+                "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
+            "version": "==1.6.0"
+        },
+        "mccabe": {
+            "hashes": [
+                "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
+                "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
+            ],
+            "version": "==0.6.1"
+        },
+        "mypy": {
+            "hashes": [
+                "sha256:15b948e1302682e3682f11f50208b726a246ab4e6c1b39f9264a8796bb416aa2",
+                "sha256:219a3116ecd015f8dca7b5d2c366c973509dfb9a8fc97ef044a36e3da66144a1",
+                "sha256:3b1fc683fb204c6b4403a1ef23f0b1fac8e4477091585e0c8c54cbdf7d7bb164",
+                "sha256:3beff56b453b6ef94ecb2996bea101a08f1f8a9771d3cbf4988a61e4d9973761",
+                "sha256:7687f6455ec3ed7649d1ae574136835a4272b65b3ddcf01ab8704ac65616c5ce",
+                "sha256:7ec45a70d40ede1ec7ad7f95b3c94c9cf4c186a32f6bacb1795b60abd2f9ef27",
+                "sha256:86c857510a9b7c3104cf4cde1568f4921762c8f9842e987bc03ed4f160925754",
+                "sha256:8a627507ef9b307b46a1fea9513d5c98680ba09591253082b4c48697ba05a4ae",
+                "sha256:8dfb69fbf9f3aeed18afffb15e319ca7f8da9642336348ddd6cab2713ddcf8f9",
+                "sha256:a34b577cdf6313bf24755f7a0e3f3c326d5c1f4fe7422d1d06498eb25ad0c600",
+                "sha256:a8ffcd53cb5dfc131850851cc09f1c44689c2812d0beb954d8138d4f5fc17f65",
+                "sha256:b90928f2d9eb2f33162405f32dde9f6dcead63a0971ca8a1b50eb4ca3e35ceb8",
+                "sha256:c56ffe22faa2e51054c5f7a3bc70a370939c2ed4de308c690e7949230c995913",
+                "sha256:f91c7ae919bbc3f96cd5e5b2e786b2b108343d1d7972ea130f7de27fdd547cf3"
+            ],
+            "index": "pypi",
+            "version": "==0.770"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
+                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
+            ],
+            "version": "==0.4.3"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
+                "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==20.9"
+        },
+        "pluggy": {
+            "hashes": [
+                "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
+                "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.13.1"
+        },
+        "py": {
+            "hashes": [
+                "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
+                "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.10.0"
+        },
+        "pycodestyle": {
+            "hashes": [
+                "sha256:74abc4e221d393ea5ce1f129ea6903209940c1ecd29e002e8c6933c2b21026e0",
+                "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
+                "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
+            ],
+            "version": "==2.4.0"
+        },
+        "pyflakes": {
+            "hashes": [
+                "sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49",
+                "sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae"
+            ],
+            "version": "==2.0.0"
+        },
+        "pygments": {
+            "hashes": [
+                "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f",
+                "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==2.9.0"
+        },
+        "pylint": {
+            "hashes": [
+                "sha256:082a6d461b54f90eea49ca90fff4ee8b6e45e8029e5dbd72f6107ef84f3779c0",
+                "sha256:a01cd675eccf6e25b3bdb42be184eb46aaf89187d612ba0fb5f93328ed6b0fd5"
+            ],
+            "index": "pypi",
+            "version": "==2.8.0"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
+                "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
+            ],
+            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==2.4.7"
+        },
+        "qemu": {
+            "editable": true,
+            "path": "."
+        },
+        "six": {
+            "hashes": [
+                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
+                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.16.0"
+        },
+        "toml": {
+            "hashes": [
+                "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
+                "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
+            ],
+            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==0.10.2"
+        },
+        "tox": {
+            "hashes": [
+                "sha256:c60692d92fe759f46c610ac04c03cf0169432d1ff8e981e8ae63e068d0954fc3",
+                "sha256:f179cb4043d7dc1339425dd49ab1dd8c916246b0d9173143c1b0af7498a03ab0"
+            ],
+            "index": "pypi",
+            "version": "==3.18.0"
+        },
+        "typed-ast": {
+            "hashes": [
+                "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace",
+                "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff",
+                "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266",
+                "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528",
+                "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6",
+                "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808",
+                "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4",
+                "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363",
+                "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341",
+                "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04",
+                "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41",
+                "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e",
+                "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3",
+                "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899",
+                "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805",
+                "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c",
+                "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c",
+                "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39",
+                "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a",
+                "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3",
+                "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7",
+                "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f",
+                "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075",
+                "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0",
+                "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40",
+                "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428",
+                "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927",
+                "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3",
+                "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
+                "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
+            ],
+            "markers": "python_version < '3.8' and implementation_name == 'cpython'",
+            "version": "==1.4.3"
+        },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
+                "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
+                "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
+            ],
+            "markers": "python_version < '3.8'",
+            "version": "==3.10.0.0"
+        },
+        "urwid": {
+            "hashes": [
+                "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae"
+            ],
+            "version": "==2.1.2"
+        },
+        "urwid-readline": {
+            "hashes": [
+                "sha256:018020cbc864bb5ed87be17dc26b069eae2755cb29f3a9c569aac3bded1efaf4"
+            ],
+            "version": "==0.13"
+        },
+        "virtualenv": {
+            "hashes": [
+                "sha256:14fdf849f80dbb29a4eb6caa9875d476ee2a5cf76a5f5415fa2f1606010ab467",
+                "sha256:2b0126166ea7c9c3661f5b8e06773d28f83322de7a3ff7d06f0aed18c9de6a76"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==20.4.7"
+        },
+        "wrapt": {
+            "hashes": [
+                "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"
+            ],
+            "version": "==1.12.1"
+        },
+        "zipp": {
+            "hashes": [
+                "sha256:3607921face881ba3e026887d8150cca609d517579abe052ac81fc5aeffdbd76",
+                "sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098"
+            ],
+            "markers": "python_version < '3.10'",
+            "version": "==3.4.1"
+        }
+    }
+}
diff --git a/python/README.rst b/python/README.rst
new file mode 100644
index 00000000000..9c1fceaee73
--- /dev/null
+++ b/python/README.rst
@@ -0,0 +1,87 @@
+QEMU Python Tooling
+===================
+
+This directory houses Python tooling used by the QEMU project to build,
+configure, and test QEMU. It is organized by namespace (``qemu``), and
+then by package (e.g. ``qemu/machine``, ``qemu/qmp``, etc).
+
+``setup.py`` is used by ``pip`` to install this tooling to the current
+environment. ``setup.cfg`` provides the packaging configuration used by
+``setup.py``. You will generally invoke it by doing one of the following:
+
+1. ``pip3 install .`` will install these packages to your current
+   environment. If you are inside a virtual environment, they will
+   install there. If you are not, it will attempt to install to the
+   global environment, which is **not recommended**.
+
+2. ``pip3 install --user .`` will install these packages to your user's
+   local python packages. If you are inside of a virtual environment,
+   this will fail; you want the first invocation above.
+
+If you append the ``--editable`` or ``-e`` argument to either invocation
+above, pip will install in "editable" mode. This installs the package as
+a forwarder ("qemu.egg-link") that points to the source tree. In so
+doing, the installed package always reflects the latest version in your
+source tree.
+
+Installing ".[devel]" instead of "." will additionally pull in required
+packages for testing this package. They are not runtime requirements,
+and are not needed to simply use these libraries.
+
+Running ``make develop`` will pull in all testing dependencies and
+install QEMU in editable mode to the current environment.
+(It is a shortcut for ``pip3 install -e .[devel]``.)
+
+See `Installing packages using pip and virtual environments
+<https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/>`_
+for more information.
+
+
+Using these packages without installing them
+--------------------------------------------
+
+These packages may be used without installing them first, by using one
+of two tricks:
+
+1. Set your PYTHONPATH environment variable to include this source
+   directory, e.g. ``~/src/qemu/python``. See
+   https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH
+
+2. Inside a Python script, use ``sys.path`` to forcibly include a search
+   path prior to importing the ``qemu`` namespace. See
+   https://docs.python.org/3/library/sys.html#sys.path
+
+A strong downside to both approaches is that they generally interfere
+with static analysis tools being able to locate and analyze the code
+being imported.
+
+Package installation also normally provides executable console scripts,
+so that tools like ``qmp-shell`` are always available via $PATH. To
+invoke them without installation, you can invoke e.g.:
+
+``> PYTHONPATH=~/src/qemu/python python3 -m qemu.qmp.qmp_shell``
+
+The mappings between console script name and python module path can be
+found in ``setup.cfg``.
+
+
+Files in this directory
+-----------------------
+
+- ``qemu/`` Python 'qemu' namespace package source directory.
+- ``tests/`` Python package tests directory.
+- ``avocado.cfg`` Configuration for the Avocado test-runner.
+  Used by ``make check`` et al.
+- ``Makefile`` provides some common testing/installation invocations.
+  Try ``make help`` to see available targets.
+- ``MANIFEST.in`` is read by python setuptools, it specifies additional files
+  that should be included by a source distribution.
+- ``PACKAGE.rst`` is used as the README file that is visible on PyPI.org.
+- ``Pipfile`` is used by Pipenv to generate ``Pipfile.lock``.
+- ``Pipfile.lock`` is a set of pinned package dependencies that this package
+  is tested under in our CI suite. It is used by ``make check-pipenv``.
+- ``README.rst`` you are here!
+- ``VERSION`` contains the PEP-440 compliant version used to describe
+  this package; it is referenced by ``setup.cfg``.
+- ``setup.cfg`` houses setuptools package configuration.
+- ``setup.py`` is the setuptools installer used by pip; See above.
diff --git a/python/VERSION b/python/VERSION
new file mode 100644
index 00000000000..c19f3b832b7
--- /dev/null
+++ b/python/VERSION
@@ -0,0 +1 @@
+0.6.1.0a1
diff --git a/python/avocado.cfg b/python/avocado.cfg
new file mode 100644
index 00000000000..c7722e7ecd3
--- /dev/null
+++ b/python/avocado.cfg
@@ -0,0 +1,13 @@
+[run]
+test_runner = runner
+
+[simpletests]
+# Don't show stdout/stderr in the test *summary*
+status.failure_fields = ['status']
+
+[job]
+# Don't show the full debug.log output; only select stdout/stderr.
+output.testlogs.logfiles = ['stdout', 'stderr']
+
+# Show full stdout/stderr only on tests that FAIL
+output.testlogs.statuses = ['FAIL']
diff --git a/python/mypy.ini b/python/mypy.ini
deleted file mode 100644
index 1a581c5f1ea..00000000000
--- a/python/mypy.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[mypy]
-strict = True
-python_version = 3.6
-warn_unused_configs = True
diff --git a/python/qemu/.flake8 b/python/qemu/.flake8
deleted file mode 100644
index 45d8146f3f5..00000000000
--- a/python/qemu/.flake8
+++ /dev/null
@@ -1,2 +0,0 @@
-[flake8]
-extend-ignore = E722  # Pylint handles this, but smarter.
\ No newline at end of file
diff --git a/python/qemu/.isort.cfg b/python/qemu/.isort.cfg
deleted file mode 100644
index 6d0fd6cc0d3..00000000000
--- a/python/qemu/.isort.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-[settings]
-force_grid_wrap=4
-force_sort_within_sections=True
-include_trailing_comma=True
-line_length=72
-lines_after_imports=2
-multi_line_output=3
\ No newline at end of file
diff --git a/python/qemu/README.rst b/python/qemu/README.rst
new file mode 100644
index 00000000000..d04943f526c
--- /dev/null
+++ b/python/qemu/README.rst
@@ -0,0 +1,8 @@
+QEMU Python Namespace
+=====================
+
+This directory serves as the root of a `Python PEP 420 implicit
+namespace package <https://www.python.org/dev/peps/pep-0420/>`_.
+
+Each directory below is assumed to be an installable Python package that
+is available under the ``qemu.<package>`` namespace.
diff --git a/python/qemu/__init__.py b/python/qemu/__init__.py
deleted file mode 100644
index 4ca06c34a41..00000000000
--- a/python/qemu/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# QEMU library
-#
-# Copyright (C) 2015-2016 Red Hat Inc.
-# Copyright (C) 2012 IBM Corp.
-#
-# Authors:
-#  Fam Zheng <famz@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
diff --git a/python/qemu/aqmp/__init__.py b/python/qemu/aqmp/__init__.py
new file mode 100644
index 00000000000..880d5b6fa7f
--- /dev/null
+++ b/python/qemu/aqmp/__init__.py
@@ -0,0 +1,51 @@
+"""
+QEMU Monitor Protocol (QMP) development library & tooling.
+
+This package provides a fairly low-level class for communicating
+asynchronously with QMP protocol servers, as implemented by QEMU, the
+QEMU Guest Agent, and the QEMU Storage Daemon.
+
+`QMPClient` provides the main functionality of this package. All errors
+raised by this library dervive from `AQMPError`, see `aqmp.error` for
+additional detail. See `aqmp.events` for an in-depth tutorial on
+managing QMP events.
+"""
+
+# Copyright (C) 2020, 2021 John Snow for Red Hat, Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#
+# Based on earlier work by Luiz Capitulino <lcapitulino@redhat.com>.
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import logging
+
+from .error import AQMPError
+from .events import EventListener
+from .message import Message
+from .protocol import ConnectError, Runstate, StateError
+from .qmp_client import ExecInterruptedError, ExecuteError, QMPClient
+
+
+# Suppress logging unless an application engages it.
+logging.getLogger('qemu.aqmp').addHandler(logging.NullHandler())
+
+
+# The order of these fields impact the Sphinx documentation order.
+__all__ = (
+    # Classes, most to least important
+    'QMPClient',
+    'Message',
+    'EventListener',
+    'Runstate',
+
+    # Exceptions, most generic to most explicit
+    'AQMPError',
+    'StateError',
+    'ConnectError',
+    'ExecuteError',
+    'ExecInterruptedError',
+)
diff --git a/python/qemu/aqmp/aqmp_tui.py b/python/qemu/aqmp/aqmp_tui.py
new file mode 100644
index 00000000000..a2929f771cf
--- /dev/null
+++ b/python/qemu/aqmp/aqmp_tui.py
@@ -0,0 +1,652 @@
+# Copyright (c) 2021
+#
+# Authors:
+#  Niteesh Babu G S <niteesh.gs@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+"""
+AQMP TUI
+
+AQMP TUI is an asynchronous interface built on top the of the AQMP library.
+It is the successor of QMP-shell and is bought-in as a replacement for it.
+
+Example Usage: aqmp-tui <SOCKET | TCP IP:PORT>
+Full Usage: aqmp-tui --help
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+from logging import Handler, LogRecord
+import signal
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+)
+
+from pygments import lexers
+from pygments import token as Token
+import urwid
+import urwid_readline
+
+from ..qmp import QEMUMonitorProtocol, QMPBadPortError
+from .error import ProtocolError
+from .message import DeserializationError, Message, UnexpectedTypeError
+from .protocol import ConnectError, Runstate
+from .qmp_client import ExecInterruptedError, QMPClient
+from .util import create_task, pretty_traceback
+
+
+# The name of the signal that is used to update the history list
+UPDATE_MSG: str = 'UPDATE_MSG'
+
+
+palette = [
+    (Token.Punctuation, '', '', '', 'h15,bold', 'g7'),
+    (Token.Text, '', '', '', '', 'g7'),
+    (Token.Name.Tag, '', '', '', 'bold,#f88', 'g7'),
+    (Token.Literal.Number.Integer, '', '', '', '#fa0', 'g7'),
+    (Token.Literal.String.Double, '', '', '', '#6f6', 'g7'),
+    (Token.Keyword.Constant, '', '', '', '#6af', 'g7'),
+    ('DEBUG', '', '', '', '#ddf', 'g7'),
+    ('INFO', '', '', '', 'g100', 'g7'),
+    ('WARNING', '', '', '', '#ff6', 'g7'),
+    ('ERROR', '', '', '', '#a00', 'g7'),
+    ('CRITICAL', '', '', '', '#a00', 'g7'),
+    ('background', '', 'black', '', '', 'g7'),
+]
+
+
+def format_json(msg: str) -> str:
+    """
+    Formats valid/invalid multi-line JSON message into a single-line message.
+
+    Formatting is first tried using the standard json module. If that fails
+    due to an decoding error then a simple string manipulation is done to
+    achieve a single line JSON string.
+
+    Converting into single line is more asthetically pleasing when looking
+    along with error messages.
+
+    Eg:
+    Input:
+          [ 1,
+            true,
+            3 ]
+    The above input is not a valid QMP message and produces the following error
+    "QMP message is not a JSON object."
+    When displaying this in TUI in multiline mode we get
+
+        [ 1,
+          true,
+          3 ]: QMP message is not a JSON object.
+
+    whereas in singleline mode we get the following
+
+        [1, true, 3]: QMP message is not a JSON object.
+
+    The single line mode is more asthetically pleasing.
+
+    :param msg:
+        The message to formatted into single line.
+
+    :return: Formatted singleline message.
+    """
+    try:
+        msg = json.loads(msg)
+        return str(json.dumps(msg))
+    except json.decoder.JSONDecodeError:
+        msg = msg.replace('\n', '')
+        words = msg.split(' ')
+        words = list(filter(None, words))
+        return ' '.join(words)
+
+
+def has_handler_type(logger: logging.Logger,
+                     handler_type: Type[Handler]) -> bool:
+    """
+    The Logger class has no interface to check if a certain type of handler is
+    installed or not. So we provide an interface to do so.
+
+    :param logger:
+        Logger object
+    :param handler_type:
+        The type of the handler to be checked.
+
+    :return: returns True if handler of type `handler_type`.
+    """
+    for handler in logger.handlers:
+        if isinstance(handler, handler_type):
+            return True
+    return False
+
+
+class App(QMPClient):
+    """
+    Implements the AQMP TUI.
+
+    Initializes the widgets and starts the urwid event loop.
+
+    :param address:
+        Address of the server to connect to.
+    :param num_retries:
+        The number of times to retry before stopping to reconnect.
+    :param retry_delay:
+        The delay(sec) before each retry
+    """
+    def __init__(self, address: Union[str, Tuple[str, int]], num_retries: int,
+                 retry_delay: Optional[int]) -> None:
+        urwid.register_signal(type(self), UPDATE_MSG)
+        self.window = Window(self)
+        self.address = address
+        self.aloop: Optional[asyncio.AbstractEventLoop] = None
+        self.num_retries = num_retries
+        self.retry_delay = retry_delay if retry_delay else 2
+        self.retry: bool = False
+        self.exiting: bool = False
+        super().__init__()
+
+    def add_to_history(self, msg: str, level: Optional[str] = None) -> None:
+        """
+        Appends the msg to the history list.
+
+        :param msg:
+            The raw message to be appended in string type.
+        """
+        urwid.emit_signal(self, UPDATE_MSG, msg, level)
+
+    def _cb_outbound(self, msg: Message) -> Message:
+        """
+        Callback: outbound message hook.
+
+        Appends the outgoing messages to the history box.
+
+        :param msg: raw outbound message.
+        :return: final outbound message.
+        """
+        str_msg = str(msg)
+
+        if not has_handler_type(logging.getLogger(), TUILogHandler):
+            logging.debug('Request: %s', str_msg)
+        self.add_to_history('<-- ' + str_msg)
+        return msg
+
+    def _cb_inbound(self, msg: Message) -> Message:
+        """
+        Callback: outbound message hook.
+
+        Appends the incoming messages to the history box.
+
+        :param msg: raw inbound message.
+        :return: final inbound message.
+        """
+        str_msg = str(msg)
+
+        if not has_handler_type(logging.getLogger(), TUILogHandler):
+            logging.debug('Request: %s', str_msg)
+        self.add_to_history('--> ' + str_msg)
+        return msg
+
+    async def _send_to_server(self, msg: Message) -> None:
+        """
+        This coroutine sends the message to the server.
+        The message has to be pre-validated.
+
+        :param msg:
+            Pre-validated message to be to sent to the server.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        try:
+            await self._raw(msg, assign_id='id' not in msg)
+        except ExecInterruptedError as err:
+            logging.info('Error server disconnected before reply %s', str(err))
+            self.add_to_history('Server disconnected before reply', 'ERROR')
+        except Exception as err:
+            logging.error('Exception from _send_to_server: %s', str(err))
+            raise err
+
+    def cb_send_to_server(self, raw_msg: str) -> None:
+        """
+        Validates and sends the message to the server.
+        The raw string message is first converted into a Message object
+        and is then sent to the server.
+
+        :param raw_msg:
+            The raw string message to be sent to the server.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        try:
+            msg = Message(bytes(raw_msg, encoding='utf-8'))
+            create_task(self._send_to_server(msg))
+        except (DeserializationError, UnexpectedTypeError) as err:
+            raw_msg = format_json(raw_msg)
+            logging.info('Invalid message: %s', err.error_message)
+            self.add_to_history(f'{raw_msg}: {err.error_message}', 'ERROR')
+
+    def unhandled_input(self, key: str) -> None:
+        """
+        Handle's keys which haven't been handled by the child widgets.
+
+        :param key:
+            Unhandled key
+        """
+        if key == 'esc':
+            self.kill_app()
+
+    def kill_app(self) -> None:
+        """
+        Initiates killing of app. A bridge between asynchronous and synchronous
+        code.
+        """
+        create_task(self._kill_app())
+
+    async def _kill_app(self) -> None:
+        """
+        This coroutine initiates the actual disconnect process and calls
+        urwid.ExitMainLoop() to kill the TUI.
+
+        :raise Exception: When an unhandled exception is caught.
+        """
+        self.exiting = True
+        await self.disconnect()
+        logging.debug('Disconnect finished. Exiting app')
+        raise urwid.ExitMainLoop()
+
+    async def disconnect(self) -> None:
+        """
+        Overrides the disconnect method to handle the errors locally.
+        """
+        try:
+            await super().disconnect()
+        except (OSError, EOFError) as err:
+            logging.info('disconnect: %s', str(err))
+            self.retry = True
+        except ProtocolError as err:
+            logging.info('disconnect: %s', str(err))
+        except Exception as err:
+            logging.error('disconnect: Unhandled exception %s', str(err))
+            raise err
+
+    def _set_status(self, msg: str) -> None:
+        """
+        Sets the message as the status.
+
+        :param msg:
+            The message to be displayed in the status bar.
+        """
+        self.window.footer.set_text(msg)
+
+    def _get_formatted_address(self) -> str:
+        """
+        Returns a formatted version of the server's address.
+
+        :return: formatted address
+        """
+        if isinstance(self.address, tuple):
+            host, port = self.address
+            addr = f'{host}:{port}'
+        else:
+            addr = f'{self.address}'
+        return addr
+
+    async def _initiate_connection(self) -> Optional[ConnectError]:
+        """
+        Tries connecting to a server a number of times with a delay between
+        each try. If all retries failed then return the error faced during
+        the last retry.
+
+        :return: Error faced during last retry.
+        """
+        current_retries = 0
+        err = None
+
+        # initial try
+        await self.connect_server()
+        while self.retry and current_retries < self.num_retries:
+            logging.info('Connection Failed, retrying in %d', self.retry_delay)
+            status = f'[Retry #{current_retries} ({self.retry_delay}s)]'
+            self._set_status(status)
+
+            await asyncio.sleep(self.retry_delay)
+
+            err = await self.connect_server()
+            current_retries += 1
+        # If all retries failed report the last error
+        if err:
+            logging.info('All retries failed: %s', err)
+            return err
+        return None
+
+    async def manage_connection(self) -> None:
+        """
+        Manage the connection based on the current run state.
+
+        A reconnect is issued when the current state is IDLE and the number
+        of retries is not exhausted.
+        A disconnect is issued when the current state is DISCONNECTING.
+        """
+        while not self.exiting:
+            if self.runstate == Runstate.IDLE:
+                err = await self._initiate_connection()
+                # If retry is still true then, we have exhausted all our tries.
+                if err:
+                    self._set_status(f'[Error: {err.error_message}]')
+                else:
+                    addr = self._get_formatted_address()
+                    self._set_status(f'[Connected {addr}]')
+            elif self.runstate == Runstate.DISCONNECTING:
+                self._set_status('[Disconnected]')
+                await self.disconnect()
+                # check if a retry is needed
+                if self.runstate == Runstate.IDLE:
+                    continue
+            await self.runstate_changed()
+
+    async def connect_server(self) -> Optional[ConnectError]:
+        """
+        Initiates a connection to the server at address `self.address`
+        and in case of a failure, sets the status to the respective error.
+        """
+        try:
+            await self.connect(self.address)
+            self.retry = False
+        except ConnectError as err:
+            logging.info('connect_server: ConnectError %s', str(err))
+            self.retry = True
+            return err
+        return None
+
+    def run(self, debug: bool = False) -> None:
+        """
+        Starts the long running co-routines and the urwid event loop.
+
+        :param debug:
+            Enables/Disables asyncio event loop debugging
+        """
+        screen = urwid.raw_display.Screen()
+        screen.set_terminal_properties(256)
+
+        self.aloop = asyncio.get_event_loop()
+        self.aloop.set_debug(debug)
+
+        # Gracefully handle SIGTERM and SIGINT signals
+        cancel_signals = [signal.SIGTERM, signal.SIGINT]
+        for sig in cancel_signals:
+            self.aloop.add_signal_handler(sig, self.kill_app)
+
+        event_loop = urwid.AsyncioEventLoop(loop=self.aloop)
+        main_loop = urwid.MainLoop(urwid.AttrMap(self.window, 'background'),
+                                   unhandled_input=self.unhandled_input,
+                                   screen=screen,
+                                   palette=palette,
+                                   handle_mouse=True,
+                                   event_loop=event_loop)
+
+        create_task(self.manage_connection(), self.aloop)
+        try:
+            main_loop.run()
+        except Exception as err:
+            logging.error('%s\n%s\n', str(err), pretty_traceback())
+            raise err
+
+
+class StatusBar(urwid.Text):
+    """
+    A simple statusbar modelled using the Text widget. The status can be
+    set using the set_text function. All text set is aligned to right.
+
+    :param text: Initial text to be displayed. Default is empty str.
+    """
+    def __init__(self, text: str = ''):
+        super().__init__(text, align='right')
+
+
+class Editor(urwid_readline.ReadlineEdit):
+    """
+    A simple editor modelled using the urwid_readline.ReadlineEdit widget.
+    Mimcs GNU readline shortcuts and provides history support.
+
+    The readline shortcuts can be found below:
+    https://github.com/rr-/urwid_readline#features
+
+    Along with the readline features, this editor also has support for
+    history. Pressing the 'up'/'down' switches between the prev/next messages
+    available in the history.
+
+    Currently there is no support to save the history to a file. The history of
+    previous commands is lost on exit.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        super().__init__(caption='> ', multiline=True)
+        self.parent = parent
+        self.history: List[str] = []
+        self.last_index: int = -1
+        self.show_history: bool = False
+
+    def keypress(self, size: Tuple[int, int], key: str) -> Optional[str]:
+        """
+        Handles the keypress on this widget.
+
+        :param size:
+            The current size of the widget.
+        :param key:
+            The key to be handled.
+
+        :return: Unhandled key if any.
+        """
+        msg = self.get_edit_text()
+        if key == 'up' and not msg:
+            # Show the history when 'up arrow' is pressed with no input text.
+            # NOTE: The show_history logic is necessary because in 'multiline'
+            # mode (which we use) 'up arrow' is used to move between lines.
+            if not self.history:
+                return None
+            self.show_history = True
+            last_msg = self.history[self.last_index]
+            self.set_edit_text(last_msg)
+            self.edit_pos = len(last_msg)
+        elif key == 'up' and self.show_history:
+            self.last_index = max(self.last_index - 1, -len(self.history))
+            self.set_edit_text(self.history[self.last_index])
+            self.edit_pos = len(self.history[self.last_index])
+        elif key == 'down' and self.show_history:
+            if self.last_index == -1:
+                self.set_edit_text('')
+                self.show_history = False
+            else:
+                self.last_index += 1
+                self.set_edit_text(self.history[self.last_index])
+                self.edit_pos = len(self.history[self.last_index])
+        elif key == 'meta enter':
+            # When using multiline, enter inserts a new line into the editor
+            # send the input to the server on alt + enter
+            self.parent.cb_send_to_server(msg)
+            self.history.append(msg)
+            self.set_edit_text('')
+            self.last_index = -1
+            self.show_history = False
+        else:
+            self.show_history = False
+            self.last_index = -1
+            return cast(Optional[str], super().keypress(size, key))
+        return None
+
+
+class EditorWidget(urwid.Filler):
+    """
+    Wrapper around the editor widget.
+
+    The Editor is a flow widget and has to wrapped inside a box widget.
+    This class wraps the Editor inside filler widget.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        super().__init__(Editor(parent), valign='top')
+
+
+class HistoryBox(urwid.ListBox):
+    """
+    This widget is modelled using the ListBox widget, contains the list of
+    all messages both QMP messages and log messsages to be shown in the TUI.
+
+    The messages are urwid.Text widgets. On every append of a message, the
+    focus is shifted to the last appended message.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        self.history = urwid.SimpleFocusListWalker([])
+        super().__init__(self.history)
+
+    def add_to_history(self,
+                       history: Union[str, List[Tuple[str, str]]]) -> None:
+        """
+        Appends a message to the list and set the focus to the last appended
+        message.
+
+        :param history:
+            The history item(message/event) to be appended to the list.
+        """
+        self.history.append(urwid.Text(history))
+        self.history.set_focus(len(self.history) - 1)
+
+    def mouse_event(self, size: Tuple[int, int], _event: str, button: float,
+                    _x: int, _y: int, focus: bool) -> None:
+        # Unfortunately there are no urwid constants that represent the mouse
+        # events.
+        if button == 4:  # Scroll up event
+            super().keypress(size, 'up')
+        elif button == 5:  # Scroll down event
+            super().keypress(size, 'down')
+
+
+class HistoryWindow(urwid.Frame):
+    """
+    This window composes the HistoryBox and EditorWidget in a horizontal split.
+    By default the first focus is given to the history box.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        self.editor_widget = EditorWidget(parent)
+        self.editor = urwid.LineBox(self.editor_widget)
+        self.history = HistoryBox(parent)
+        self.body = urwid.Pile([('weight', 80, self.history),
+                                ('weight', 20, self.editor)])
+        super().__init__(self.body)
+        urwid.connect_signal(self.parent, UPDATE_MSG, self.cb_add_to_history)
+
+    def cb_add_to_history(self, msg: str, level: Optional[str] = None) -> None:
+        """
+        Appends a message to the history box
+
+        :param msg:
+            The message to be appended to the history box.
+        :param level:
+            The log level of the message, if it is a log message.
+        """
+        formatted = []
+        if level:
+            msg = f'[{level}]: {msg}'
+            formatted.append((level, msg))
+        else:
+            lexer = lexers.JsonLexer()  # pylint: disable=no-member
+            for token in lexer.get_tokens(msg):
+                formatted.append(token)
+        self.history.add_to_history(formatted)
+
+
+class Window(urwid.Frame):
+    """
+    This window is the top most widget of the TUI and will contain other
+    windows. Each child of this widget is responsible for displaying a specific
+    functionality.
+
+    :param parent: Reference to the TUI object.
+    """
+    def __init__(self, parent: App) -> None:
+        self.parent = parent
+        footer = StatusBar()
+        body = HistoryWindow(parent)
+        super().__init__(body, footer=footer)
+
+
+class TUILogHandler(Handler):
+    """
+    This handler routes all the log messages to the TUI screen.
+    It is installed to the root logger to so that the log message from all
+    libraries begin used is routed to the screen.
+
+    :param tui: Reference to the TUI object.
+    """
+    def __init__(self, tui: App) -> None:
+        super().__init__()
+        self.tui = tui
+
+    def emit(self, record: LogRecord) -> None:
+        """
+        Emits a record to the TUI screen.
+
+        Appends the log message to the TUI screen
+        """
+        level = record.levelname
+        msg = record.getMessage()
+        self.tui.add_to_history(msg, level)
+
+
+def main() -> None:
+    """
+    Driver of the whole script, parses arguments, initialize the TUI and
+    the logger.
+    """
+    parser = argparse.ArgumentParser(description='AQMP TUI')
+    parser.add_argument('qmp_server', help='Address of the QMP server. '
+                        'Format <UNIX socket path | TCP addr:port>')
+    parser.add_argument('--num-retries', type=int, default=10,
+                        help='Number of times to reconnect before giving up.')
+    parser.add_argument('--retry-delay', type=int,
+                        help='Time(s) to wait before next retry. '
+                        'Default action is to wait 2s between each retry.')
+    parser.add_argument('--log-file', help='The Log file name')
+    parser.add_argument('--log-level', default='WARNING',
+                        help='Log level <CRITICAL|ERROR|WARNING|INFO|DEBUG|>')
+    parser.add_argument('--asyncio-debug', action='store_true',
+                        help='Enable debug mode for asyncio loop. '
+                        'Generates lot of output, makes TUI unusable when '
+                        'logs are logged in the TUI. '
+                        'Use only when logging to a file.')
+    args = parser.parse_args()
+
+    try:
+        address = QEMUMonitorProtocol.parse_address(args.qmp_server)
+    except QMPBadPortError as err:
+        parser.error(str(err))
+
+    app = App(address, args.num_retries, args.retry_delay)
+
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.getLevelName(args.log_level))
+
+    if args.log_file:
+        root_logger.addHandler(logging.FileHandler(args.log_file))
+    else:
+        root_logger.addHandler(TUILogHandler(app))
+
+    app.run(args.asyncio_debug)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/aqmp/error.py b/python/qemu/aqmp/error.py
new file mode 100644
index 00000000000..781f49b0087
--- /dev/null
+++ b/python/qemu/aqmp/error.py
@@ -0,0 +1,50 @@
+"""
+AQMP Error Classes
+
+This package seeks to provide semantic error classes that are intended
+to be used directly by clients when they would like to handle particular
+semantic failures (e.g. "failed to connect") without needing to know the
+enumeration of possible reasons for that failure.
+
+AQMPError serves as the ancestor for all exceptions raised by this
+package, and is suitable for use in handling semantic errors from this
+library. In most cases, individual public methods will attempt to catch
+and re-encapsulate various exceptions to provide a semantic
+error-handling interface.
+
+.. admonition:: AQMP Exception Hierarchy Reference
+
+ |   `Exception`
+ |    +-- `AQMPError`
+ |         +-- `ConnectError`
+ |         +-- `StateError`
+ |         +-- `ExecInterruptedError`
+ |         +-- `ExecuteError`
+ |         +-- `ListenerError`
+ |         +-- `ProtocolError`
+ |              +-- `DeserializationError`
+ |              +-- `UnexpectedTypeError`
+ |              +-- `ServerParseError`
+ |              +-- `BadReplyError`
+ |              +-- `GreetingError`
+ |              +-- `NegotiationError`
+"""
+
+
+class AQMPError(Exception):
+    """Abstract error class for all errors originating from this package."""
+
+
+class ProtocolError(AQMPError):
+    """
+    Abstract error class for protocol failures.
+
+    Semantically, these errors are generally the fault of either the
+    protocol server or as a result of a bug in this library.
+
+    :param error_message: Human-readable string describing the error.
+    """
+    def __init__(self, error_message: str):
+        super().__init__(error_message)
+        #: Human-readable error message, without any prefix.
+        self.error_message: str = error_message
diff --git a/python/qemu/aqmp/events.py b/python/qemu/aqmp/events.py
new file mode 100644
index 00000000000..5f7150c78d4
--- /dev/null
+++ b/python/qemu/aqmp/events.py
@@ -0,0 +1,717 @@
+"""
+AQMP Events and EventListeners
+
+Asynchronous QMP uses `EventListener` objects to listen for events. An
+`EventListener` is a FIFO event queue that can be pre-filtered to listen
+for only specific events. Each `EventListener` instance receives its own
+copy of events that it hears, so events may be consumed without fear or
+worry for depriving other listeners of events they need to hear.
+
+
+EventListener Tutorial
+----------------------
+
+In all of the following examples, we assume that we have a `QMPClient`
+instantiated named ``qmp`` that is already connected.
+
+
+`listener()` context blocks with one name
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The most basic usage is by using the `listener()` context manager to
+construct them:
+
+.. code:: python
+
+   with qmp.listener('STOP') as listener:
+       await qmp.execute('stop')
+       await listener.get()
+
+The listener is active only for the duration of the ‘with’ block. This
+instance listens only for ‘STOP’ events.
+
+
+`listener()` context blocks with two or more names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Multiple events can be selected for by providing any ``Iterable[str]``:
+
+.. code:: python
+
+   with qmp.listener(('STOP', 'RESUME')) as listener:
+       await qmp.execute('stop')
+       event = await listener.get()
+       assert event['event'] == 'STOP'
+
+       await qmp.execute('cont')
+       event = await listener.get()
+       assert event['event'] == 'RESUME'
+
+
+`listener()` context blocks with no names
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By omitting names entirely, you can listen to ALL events.
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       await qmp.execute('stop')
+       event = await listener.get()
+       assert event['event'] == 'STOP'
+
+This isn’t a very good use case for this feature: In a non-trivial
+running system, we may not know what event will arrive next. Grabbing
+the top of a FIFO queue returning multiple kinds of events may be prone
+to error.
+
+
+Using async iterators to retrieve events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you’d like to simply watch what events happen to arrive, you can use
+the listener as an async iterator:
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       async for event in listener:
+           print(f"Event arrived: {event['event']}")
+
+This is analogous to the following code:
+
+.. code:: python
+
+   with qmp.listener() as listener:
+       while True:
+           event = listener.get()
+           print(f"Event arrived: {event['event']}")
+
+This event stream will never end, so these blocks will never terminate.
+
+
+Using asyncio.Task to concurrently retrieve events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since a listener’s event stream will never terminate, it is not likely
+useful to use that form in a script. For longer-running clients, we can
+create event handlers by using `asyncio.Task` to create concurrent
+coroutines:
+
+.. code:: python
+
+   async def print_events(listener):
+       try:
+           async for event in listener:
+               print(f"Event arrived: {event['event']}")
+       except asyncio.CancelledError:
+           return
+
+   with qmp.listener() as listener:
+       task = asyncio.Task(print_events(listener))
+       await qmp.execute('stop')
+       await qmp.execute('cont')
+       task.cancel()
+       await task
+
+However, there is no guarantee that these events will be received by the
+time we leave this context block. Once the context block is exited, the
+listener will cease to hear any new events, and becomes inert.
+
+Be mindful of the timing: the above example will *probably*– but does
+not *guarantee*– that both STOP/RESUMED events will be printed. The
+example below outlines how to use listeners outside of a context block.
+
+
+Using `register_listener()` and `remove_listener()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To create a listener with a longer lifetime, beyond the scope of a
+single block, create a listener and then call `register_listener()`:
+
+.. code:: python
+
+   class MyClient:
+       def __init__(self, qmp):
+           self.qmp = qmp
+           self.listener = EventListener()
+
+       async def print_events(self):
+           try:
+               async for event in self.listener:
+                   print(f"Event arrived: {event['event']}")
+           except asyncio.CancelledError:
+               return
+
+       async def run(self):
+           self.task = asyncio.Task(self.print_events)
+           self.qmp.register_listener(self.listener)
+           await qmp.execute('stop')
+           await qmp.execute('cont')
+
+       async def stop(self):
+           self.task.cancel()
+           await self.task
+           self.qmp.remove_listener(self.listener)
+
+The listener can be deactivated by using `remove_listener()`. When it is
+removed, any possible pending events are cleared and it can be
+re-registered at a later time.
+
+
+Using the built-in all events listener
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `QMPClient` object creates its own default listener named
+:py:obj:`~Events.events` that can be used for the same purpose without
+having to create your own:
+
+.. code:: python
+
+   async def print_events(listener):
+       try:
+           async for event in listener:
+               print(f"Event arrived: {event['event']}")
+       except asyncio.CancelledError:
+           return
+
+   task = asyncio.Task(print_events(qmp.events))
+
+   await qmp.execute('stop')
+   await qmp.execute('cont')
+
+   task.cancel()
+   await task
+
+
+Using both .get() and async iterators
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The async iterator and `get()` methods pull events from the same FIFO
+queue. If you mix the usage of both, be aware: Events are emitted
+precisely once per listener.
+
+If multiple contexts try to pull events from the same listener instance,
+events are still emitted only precisely once.
+
+This restriction can be lifted by creating additional listeners.
+
+
+Creating multiple listeners
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Additional `EventListener` objects can be created at-will. Each one
+receives its own copy of events, with separate FIFO event queues.
+
+.. code:: python
+
+   my_listener = EventListener()
+   qmp.register_listener(my_listener)
+
+   await qmp.execute('stop')
+   copy1 = await my_listener.get()
+   copy2 = await qmp.events.get()
+
+   assert copy1 == copy2
+
+In this example, we await an event from both a user-created
+`EventListener` and the built-in events listener. Both receive the same
+event.
+
+
+Clearing listeners
+~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects can be cleared, clearing all events seen thus far:
+
+.. code:: python
+
+   await qmp.execute('stop')
+   qmp.events.clear()
+   await qmp.execute('cont')
+   event = await qmp.events.get()
+   assert event['event'] == 'RESUME'
+
+`EventListener` objects are FIFO queues. If events are not consumed,
+they will remain in the queue until they are witnessed or discarded via
+`clear()`. FIFO queues will be drained automatically upon leaving a
+context block, or when calling `remove_listener()`.
+
+
+Accessing listener history
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects record their history. Even after being cleared,
+you can obtain a record of all events seen so far:
+
+.. code:: python
+
+   await qmp.execute('stop')
+   await qmp.execute('cont')
+   qmp.events.clear()
+
+   assert len(qmp.events.history) == 2
+   assert qmp.events.history[0]['event'] == 'STOP'
+   assert qmp.events.history[1]['event'] == 'RESUME'
+
+The history is updated immediately and does not require the event to be
+witnessed first.
+
+
+Using event filters
+~~~~~~~~~~~~~~~~~~~
+
+`EventListener` objects can be given complex filtering criteria if names
+are not sufficient:
+
+.. code:: python
+
+   def job1_filter(event) -> bool:
+       event_data = event.get('data', {})
+       event_job_id = event_data.get('id')
+       return event_job_id == "job1"
+
+   with qmp.listener('JOB_STATUS_CHANGE', job1_filter) as listener:
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job1', ...})
+       async for event in listener:
+           if event['data']['status'] == 'concluded':
+               break
+
+These filters might be most useful when parameterized. `EventListener`
+objects expect a function that takes only a single argument (the raw
+event, as a `Message`) and returns a bool; True if the event should be
+accepted into the stream. You can create a function that adapts this
+signature to accept configuration parameters:
+
+.. code:: python
+
+   def job_filter(job_id: str) -> EventFilter:
+       def filter(event: Message) -> bool:
+           return event['data']['id'] == job_id
+       return filter
+
+   with qmp.listener('JOB_STATUS_CHANGE', job_filter('job2')) as listener:
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job2', ...})
+       async for event in listener:
+           if event['data']['status'] == 'concluded':
+               break
+
+
+Activating an existing listener with `listen()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Listeners with complex, long configurations can also be created manually
+and activated temporarily by using `listen()` instead of `listener()`:
+
+.. code:: python
+
+   listener = EventListener(('BLOCK_JOB_COMPLETED', 'BLOCK_JOB_CANCELLED',
+                             'BLOCK_JOB_ERROR', 'BLOCK_JOB_READY',
+                             'BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'))
+
+   with qmp.listen(listener):
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job3', ...})
+       async for event in listener:
+           print(event)
+           if event['event'] == 'BLOCK_JOB_COMPLETED':
+               break
+
+Any events that are not witnessed by the time the block is left will be
+cleared from the queue; entering the block is an implicit
+`register_listener()` and leaving the block is an implicit
+`remove_listener()`.
+
+
+Activating multiple existing listeners with `listen()`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While `listener()` is only capable of creating a single listener,
+`listen()` is capable of activating multiple listeners simultaneously:
+
+.. code:: python
+
+   def job_filter(job_id: str) -> EventFilter:
+       def filter(event: Message) -> bool:
+           return event['data']['id'] == job_id
+       return filter
+
+   jobA = EventListener('JOB_STATUS_CHANGE', job_filter('jobA'))
+   jobB = EventListener('JOB_STATUS_CHANGE', job_filter('jobB'))
+
+   with qmp.listen(jobA, jobB):
+       qmp.execute('blockdev-create', arguments={'job-id': 'jobA', ...})
+       qmp.execute('blockdev-create', arguments={'job-id': 'jobB', ...})
+
+       async for event in jobA.get():
+           if event['data']['status'] == 'concluded':
+               break
+       async for event in jobB.get():
+           if event['data']['status'] == 'concluded':
+               break
+
+
+Extending the `EventListener` class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the case that a more specialized `EventListener` is desired to
+provide either more functionality or more compact syntax for specialized
+cases, it can be extended.
+
+One of the key methods to extend or override is
+:py:meth:`~EventListener.accept()`. The default implementation checks an
+incoming message for:
+
+1. A qualifying name, if any :py:obj:`~EventListener.names` were
+   specified at initialization time
+2. That :py:obj:`~EventListener.event_filter()` returns True.
+
+This can be modified however you see fit to change the criteria for
+inclusion in the stream.
+
+For convenience, a ``JobListener`` class could be created that simply
+bakes in configuration so it does not need to be repeated:
+
+.. code:: python
+
+   class JobListener(EventListener):
+       def __init__(self, job_id: str):
+           super().__init__(('BLOCK_JOB_COMPLETED', 'BLOCK_JOB_CANCELLED',
+                             'BLOCK_JOB_ERROR', 'BLOCK_JOB_READY',
+                             'BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'))
+           self.job_id = job_id
+
+       def accept(self, event) -> bool:
+           if not super().accept(event):
+               return False
+           if event['event'] in ('BLOCK_JOB_PENDING', 'JOB_STATUS_CHANGE'):
+               return event['data']['id'] == job_id
+           return event['data']['device'] == job_id
+
+From here on out, you can conjure up a custom-purpose listener that
+listens only for job-related events for a specific job-id easily:
+
+.. code:: python
+
+   listener = JobListener('job4')
+   with qmp.listener(listener):
+       await qmp.execute('blockdev-backup', arguments={'job-id': 'job4', ...})
+       async for event in listener:
+           print(event)
+           if event['event'] == 'BLOCK_JOB_COMPLETED':
+               break
+
+
+Experimental Interfaces & Design Issues
+---------------------------------------
+
+These interfaces are not ones I am sure I will keep or otherwise modify
+heavily.
+
+qmp.listener()’s type signature
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`listener()` does not return anything, because it was assumed the caller
+already had a handle to the listener. However, for
+``qmp.listener(EventListener())`` forms, the caller will not have saved
+a handle to the listener.
+
+Because this function can accept *many* listeners, I found it hard to
+accurately type in a way where it could be used in both “one” or “many”
+forms conveniently and in a statically type-safe manner.
+
+Ultimately, I removed the return altogether, but perhaps with more time
+I can work out a way to re-add it.
+
+
+API Reference
+-------------
+
+"""
+
+import asyncio
+from contextlib import contextmanager
+import logging
+from typing import (
+    AsyncIterator,
+    Callable,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
+
+from .error import AQMPError
+from .message import Message
+
+
+EventNames = Union[str, Iterable[str], None]
+EventFilter = Callable[[Message], bool]
+
+
+class ListenerError(AQMPError):
+    """
+    Generic error class for `EventListener`-related problems.
+    """
+
+
+class EventListener:
+    """
+    Selectively listens for events with runtime configurable filtering.
+
+    This class is designed to be directly usable for the most common cases,
+    but it can be extended to provide more rigorous control.
+
+    :param names:
+        One or more names of events to listen for.
+        When not provided, listen for ALL events.
+    :param event_filter:
+        An optional event filtering function.
+        When names are also provided, this acts as a secondary filter.
+
+    When ``names`` and ``event_filter`` are both provided, the names
+    will be filtered first, and then the filter function will be called
+    second. The event filter function can assume that the format of the
+    event is a known format.
+    """
+    def __init__(
+        self,
+        names: EventNames = None,
+        event_filter: Optional[EventFilter] = None,
+    ):
+        # Queue of 'heard' events yet to be witnessed by a caller.
+        self._queue: 'asyncio.Queue[Message]' = asyncio.Queue()
+
+        # Intended as a historical record, NOT a processing queue or backlog.
+        self._history: List[Message] = []
+
+        #: Primary event filter, based on one or more event names.
+        self.names: Set[str] = set()
+        if isinstance(names, str):
+            self.names.add(names)
+        elif names is not None:
+            self.names.update(names)
+
+        #: Optional, secondary event filter.
+        self.event_filter: Optional[EventFilter] = event_filter
+
+    @property
+    def history(self) -> Tuple[Message, ...]:
+        """
+        A read-only history of all events seen so far.
+
+        This represents *every* event, including those not yet witnessed
+        via `get()` or ``async for``. It persists between `clear()`
+        calls and is immutable.
+        """
+        return tuple(self._history)
+
+    def accept(self, event: Message) -> bool:
+        """
+        Determine if this listener accepts this event.
+
+        This method determines which events will appear in the stream.
+        The default implementation simply checks the event against the
+        list of names and the event_filter to decide if this
+        `EventListener` accepts a given event. It can be
+        overridden/extended to provide custom listener behavior.
+
+        User code is not expected to need to invoke this method.
+
+        :param event: The event under consideration.
+        :return: `True`, if this listener accepts this event.
+        """
+        name_ok = (not self.names) or (event['event'] in self.names)
+        return name_ok and (
+            (not self.event_filter) or self.event_filter(event)
+        )
+
+    async def put(self, event: Message) -> None:
+        """
+        Conditionally put a new event into the FIFO queue.
+
+        This method is not designed to be invoked from user code, and it
+        should not need to be overridden. It is a public interface so
+        that `QMPClient` has an interface by which it can inform
+        registered listeners of new events.
+
+        The event will be put into the queue if
+        :py:meth:`~EventListener.accept()` returns `True`.
+
+        :param event: The new event to put into the FIFO queue.
+        """
+        if not self.accept(event):
+            return
+
+        self._history.append(event)
+        await self._queue.put(event)
+
+    async def get(self) -> Message:
+        """
+        Wait for the very next event in this stream.
+
+        If one is already available, return that one.
+        """
+        return await self._queue.get()
+
+    def empty(self) -> bool:
+        """
+        Return `True` if there are no pending events.
+        """
+        return self._queue.empty()
+
+    def clear(self) -> List[Message]:
+        """
+        Clear this listener of all pending events.
+
+        Called when an `EventListener` is being unregistered, this clears the
+        pending FIFO queue synchronously. It can be also be used to
+        manually clear any pending events, if desired.
+
+        :return: The cleared events, if any.
+
+        .. warning::
+            Take care when discarding events. Cleared events will be
+            silently tossed on the floor. All events that were ever
+            accepted by this listener are visible in `history()`.
+        """
+        events = []
+        while True:
+            try:
+                events.append(self._queue.get_nowait())
+            except asyncio.QueueEmpty:
+                break
+
+        return events
+
+    def __aiter__(self) -> AsyncIterator[Message]:
+        return self
+
+    async def __anext__(self) -> Message:
+        """
+        Enables the `EventListener` to function as an async iterator.
+
+        It may be used like this:
+
+        .. code:: python
+
+            async for event in listener:
+                print(event)
+
+        These iterators will never terminate of their own accord; you
+        must provide break conditions or otherwise prepare to run them
+        in an `asyncio.Task` that can be cancelled.
+        """
+        return await self.get()
+
+
+class Events:
+    """
+    Events is a mix-in class that adds event functionality to the QMP class.
+
+    It's designed specifically as a mix-in for `QMPClient`, and it
+    relies upon the class it is being mixed into having a 'logger'
+    property.
+    """
+    def __init__(self) -> None:
+        self._listeners: List[EventListener] = []
+
+        #: Default, all-events `EventListener`.
+        self.events: EventListener = EventListener()
+        self.register_listener(self.events)
+
+        # Parent class needs to have a logger
+        self.logger: logging.Logger
+
+    async def _event_dispatch(self, msg: Message) -> None:
+        """
+        Given a new event, propagate it to all of the active listeners.
+
+        :param msg: The event to propagate.
+        """
+        for listener in self._listeners:
+            await listener.put(msg)
+
+    def register_listener(self, listener: EventListener) -> None:
+        """
+        Register and activate an `EventListener`.
+
+        :param listener: The listener to activate.
+        :raise ListenerError: If the given listener is already registered.
+        """
+        if listener in self._listeners:
+            raise ListenerError("Attempted to re-register existing listener")
+        self.logger.debug("Registering %s.", str(listener))
+        self._listeners.append(listener)
+
+    def remove_listener(self, listener: EventListener) -> None:
+        """
+        Unregister and deactivate an `EventListener`.
+
+        The removed listener will have its pending events cleared via
+        `clear()`. The listener can be re-registered later when
+        desired.
+
+        :param listener: The listener to deactivate.
+        :raise ListenerError: If the given listener is not registered.
+        """
+        if listener == self.events:
+            raise ListenerError("Cannot remove the default listener.")
+        self.logger.debug("Removing %s.", str(listener))
+        listener.clear()
+        self._listeners.remove(listener)
+
+    @contextmanager
+    def listen(self, *listeners: EventListener) -> Iterator[None]:
+        r"""
+        Context manager: Temporarily listen with an `EventListener`.
+
+        Accepts one or more `EventListener` objects and registers them,
+        activating them for the duration of the context block.
+
+        `EventListener` objects will have any pending events in their
+        FIFO queue cleared upon exiting the context block, when they are
+        deactivated.
+
+        :param \*listeners: One or more EventListeners to activate.
+        :raise ListenerError: If the given listener(s) are already active.
+        """
+        _added = []
+
+        try:
+            for listener in listeners:
+                self.register_listener(listener)
+                _added.append(listener)
+
+            yield
+
+        finally:
+            for listener in _added:
+                self.remove_listener(listener)
+
+    @contextmanager
+    def listener(
+        self,
+        names: EventNames = (),
+        event_filter: Optional[EventFilter] = None
+    ) -> Iterator[EventListener]:
+        """
+        Context manager: Temporarily listen with a new `EventListener`.
+
+        Creates an `EventListener` object and registers it, activating
+        it for the duration of the context block.
+
+        :param names:
+            One or more names of events to listen for.
+            When not provided, listen for ALL events.
+        :param event_filter:
+            An optional event filtering function.
+            When names are also provided, this acts as a secondary filter.
+
+        :return: The newly created and active `EventListener`.
+        """
+        listener = EventListener(names, event_filter)
+        with self.listen(listener):
+            yield listener
diff --git a/python/qemu/aqmp/legacy.py b/python/qemu/aqmp/legacy.py
new file mode 100644
index 00000000000..9e7b9fb80b9
--- /dev/null
+++ b/python/qemu/aqmp/legacy.py
@@ -0,0 +1,138 @@
+"""
+Sync QMP Wrapper
+
+This class pretends to be qemu.qmp.QEMUMonitorProtocol.
+"""
+
+import asyncio
+from typing import (
+    Awaitable,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+)
+
+import qemu.qmp
+from qemu.qmp import QMPMessage, QMPReturnValue, SocketAddrT
+
+from .qmp_client import QMPClient
+
+
+# pylint: disable=missing-docstring
+
+
+class QEMUMonitorProtocol(qemu.qmp.QEMUMonitorProtocol):
+    def __init__(self, address: SocketAddrT,
+                 server: bool = False,
+                 nickname: Optional[str] = None):
+
+        # pylint: disable=super-init-not-called
+        self._aqmp = QMPClient(nickname)
+        self._aloop = asyncio.get_event_loop()
+        self._address = address
+        self._timeout: Optional[float] = None
+
+    _T = TypeVar('_T')
+
+    def _sync(
+            self, future: Awaitable[_T], timeout: Optional[float] = None
+    ) -> _T:
+        return self._aloop.run_until_complete(
+            asyncio.wait_for(future, timeout=timeout)
+        )
+
+    def _get_greeting(self) -> Optional[QMPMessage]:
+        if self._aqmp.greeting is not None:
+            # pylint: disable=protected-access
+            return self._aqmp.greeting._asdict()
+        return None
+
+    # __enter__ and __exit__ need no changes
+    # parse_address needs no changes
+
+    def connect(self, negotiate: bool = True) -> Optional[QMPMessage]:
+        self._aqmp.await_greeting = negotiate
+        self._aqmp.negotiate = negotiate
+
+        self._sync(
+            self._aqmp.connect(self._address)
+        )
+        return self._get_greeting()
+
+    def accept(self, timeout: Optional[float] = 15.0) -> QMPMessage:
+        self._aqmp.await_greeting = True
+        self._aqmp.negotiate = True
+
+        self._sync(
+            self._aqmp.accept(self._address),
+            timeout
+        )
+
+        ret = self._get_greeting()
+        assert ret is not None
+        return ret
+
+    def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
+        return dict(
+            self._sync(
+                # pylint: disable=protected-access
+
+                # _raw() isn't a public API, because turning off
+                # automatic ID assignment is discouraged. For
+                # compatibility with iotests *only*, do it anyway.
+                self._aqmp._raw(qmp_cmd, assign_id=False),
+                self._timeout
+            )
+        )
+
+    # Default impl of cmd() delegates to cmd_obj
+
+    def command(self, cmd: str, **kwds: object) -> QMPReturnValue:
+        return self._sync(
+            self._aqmp.execute(cmd, kwds),
+            self._timeout
+        )
+
+    def pull_event(self,
+                   wait: Union[bool, float] = False) -> Optional[QMPMessage]:
+        if not wait:
+            # wait is False/0: "do not wait, do not except."
+            if self._aqmp.events.empty():
+                return None
+
+        # If wait is 'True', wait forever. If wait is False/0, the events
+        # queue must not be empty; but it still needs some real amount
+        # of time to complete.
+        timeout = None
+        if wait and isinstance(wait, float):
+            timeout = wait
+
+        return dict(
+            self._sync(
+                self._aqmp.events.get(),
+                timeout
+            )
+        )
+
+    def get_events(self, wait: Union[bool, float] = False) -> List[QMPMessage]:
+        events = [dict(x) for x in self._aqmp.events.clear()]
+        if events:
+            return events
+
+        event = self.pull_event(wait)
+        return [event] if event is not None else []
+
+    def clear_events(self) -> None:
+        self._aqmp.events.clear()
+
+    def close(self) -> None:
+        self._sync(
+            self._aqmp.disconnect()
+        )
+
+    def settimeout(self, timeout: Optional[float]) -> None:
+        self._timeout = timeout
+
+    def send_fd_scm(self, fd: int) -> None:
+        self._aqmp.send_fd_scm(fd)
diff --git a/python/qemu/aqmp/message.py b/python/qemu/aqmp/message.py
new file mode 100644
index 00000000000..f76ccc90746
--- /dev/null
+++ b/python/qemu/aqmp/message.py
@@ -0,0 +1,209 @@
+"""
+QMP Message Format
+
+This module provides the `Message` class, which represents a single QMP
+message sent to or from the server.
+"""
+
+import json
+from json import JSONDecodeError
+from typing import (
+    Dict,
+    Iterator,
+    Mapping,
+    MutableMapping,
+    Optional,
+    Union,
+)
+
+from .error import ProtocolError
+
+
+class Message(MutableMapping[str, object]):
+    """
+    Represents a single QMP protocol message.
+
+    QMP uses JSON objects as its basic communicative unit; so this
+    Python object is a :py:obj:`~collections.abc.MutableMapping`. It may
+    be instantiated from either another mapping (like a `dict`), or from
+    raw `bytes` that still need to be deserialized.
+
+    Once instantiated, it may be treated like any other MutableMapping::
+
+        >>> msg = Message(b'{"hello": "world"}')
+        >>> assert msg['hello'] == 'world'
+        >>> msg['id'] = 'foobar'
+        >>> print(msg)
+        {
+          "hello": "world",
+          "id": "foobar"
+        }
+
+    It can be converted to `bytes`::
+
+        >>> msg = Message({"hello": "world"})
+        >>> print(bytes(msg))
+        b'{"hello":"world","id":"foobar"}'
+
+    Or back into a garden-variety `dict`::
+
+       >>> dict(msg)
+       {'hello': 'world'}
+
+
+    :param value: Initial value, if any.
+    :param eager:
+        When `True`, attempt to serialize or deserialize the initial value
+        immediately, so that conversion exceptions are raised during
+        the call to ``__init__()``.
+    """
+    # pylint: disable=too-many-ancestors
+
+    def __init__(self,
+                 value: Union[bytes, Mapping[str, object]] = b'{}', *,
+                 eager: bool = True):
+        self._data: Optional[bytes] = None
+        self._obj: Optional[Dict[str, object]] = None
+
+        if isinstance(value, bytes):
+            self._data = value
+            if eager:
+                self._obj = self._deserialize(self._data)
+        else:
+            self._obj = dict(value)
+            if eager:
+                self._data = self._serialize(self._obj)
+
+    # Methods necessary to implement the MutableMapping interface, see:
+    # https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping
+
+    # We get pop, popitem, clear, update, setdefault, __contains__,
+    # keys, items, values, get, __eq__ and __ne__ for free.
+
+    def __getitem__(self, key: str) -> object:
+        return self._object[key]
+
+    def __setitem__(self, key: str, value: object) -> None:
+        self._object[key] = value
+        self._data = None
+
+    def __delitem__(self, key: str) -> None:
+        del self._object[key]
+        self._data = None
+
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._object)
+
+    def __len__(self) -> int:
+        return len(self._object)
+
+    # Dunder methods not related to MutableMapping:
+
+    def __repr__(self) -> str:
+        if self._obj is not None:
+            return f"Message({self._object!r})"
+        return f"Message({bytes(self)!r})"
+
+    def __str__(self) -> str:
+        """Pretty-printed representation of this QMP message."""
+        return json.dumps(self._object, indent=2)
+
+    def __bytes__(self) -> bytes:
+        """bytes representing this QMP message."""
+        if self._data is None:
+            self._data = self._serialize(self._obj or {})
+        return self._data
+
+    # Conversion Methods
+
+    @property
+    def _object(self) -> Dict[str, object]:
+        """
+        A `dict` representing this QMP message.
+
+        Generated on-demand, if required. This property is private
+        because it returns an object that could be used to invalidate
+        the internal state of the `Message` object.
+        """
+        if self._obj is None:
+            self._obj = self._deserialize(self._data or b'{}')
+        return self._obj
+
+    @classmethod
+    def _serialize(cls, value: object) -> bytes:
+        """
+        Serialize a JSON object as `bytes`.
+
+        :raise ValueError: When the object cannot be serialized.
+        :raise TypeError: When the object cannot be serialized.
+
+        :return: `bytes` ready to be sent over the wire.
+        """
+        return json.dumps(value, separators=(',', ':')).encode('utf-8')
+
+    @classmethod
+    def _deserialize(cls, data: bytes) -> Dict[str, object]:
+        """
+        Deserialize JSON `bytes` into a native Python `dict`.
+
+        :raise DeserializationError:
+            If JSON deserialization fails for any reason.
+        :raise UnexpectedTypeError:
+            If the data does not represent a JSON object.
+
+        :return: A `dict` representing this QMP message.
+        """
+        try:
+            obj = json.loads(data)
+        except JSONDecodeError as err:
+            emsg = "Failed to deserialize QMP message."
+            raise DeserializationError(emsg, data) from err
+        if not isinstance(obj, dict):
+            raise UnexpectedTypeError(
+                "QMP message is not a JSON object.",
+                obj
+            )
+        return obj
+
+
+class DeserializationError(ProtocolError):
+    """
+    A QMP message was not understood as JSON.
+
+    When this Exception is raised, ``__cause__`` will be set to the
+    `json.JSONDecodeError` Exception, which can be interrogated for
+    further details.
+
+    :param error_message: Human-readable string describing the error.
+    :param raw: The raw `bytes` that prompted the failure.
+    """
+    def __init__(self, error_message: str, raw: bytes):
+        super().__init__(error_message)
+        #: The raw `bytes` that were not understood as JSON.
+        self.raw: bytes = raw
+
+    def __str__(self) -> str:
+        return "\n".join([
+            super().__str__(),
+            f"  raw bytes were: {str(self.raw)}",
+        ])
+
+
+class UnexpectedTypeError(ProtocolError):
+    """
+    A QMP message was JSON, but not a JSON object.
+
+    :param error_message: Human-readable string describing the error.
+    :param value: The deserialized JSON value that wasn't an object.
+    """
+    def __init__(self, error_message: str, value: object):
+        super().__init__(error_message)
+        #: The JSON value that was expected to be an object.
+        self.value: object = value
+
+    def __str__(self) -> str:
+        strval = json.dumps(self.value, indent=2)
+        return "\n".join([
+            super().__str__(),
+            f"  json value was: {strval}",
+        ])
diff --git a/python/qemu/aqmp/models.py b/python/qemu/aqmp/models.py
new file mode 100644
index 00000000000..de87f878047
--- /dev/null
+++ b/python/qemu/aqmp/models.py
@@ -0,0 +1,146 @@
+"""
+QMP Data Models
+
+This module provides simplistic data classes that represent the few
+structures that the QMP spec mandates; they are used to verify incoming
+data to make sure it conforms to spec.
+"""
+# pylint: disable=too-few-public-methods
+
+from collections import abc
+import copy
+from typing import (
+    Any,
+    Dict,
+    Mapping,
+    Optional,
+    Sequence,
+)
+
+
+class Model:
+    """
+    Abstract data model, representing some QMP object of some kind.
+
+    :param raw: The raw object to be validated.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        self._raw = raw
+
+    def _check_key(self, key: str) -> None:
+        if key not in self._raw:
+            raise KeyError(f"'{self._name}' object requires '{key}' member")
+
+    def _check_value(self, key: str, type_: type, typestr: str) -> None:
+        assert key in self._raw
+        if not isinstance(self._raw[key], type_):
+            raise TypeError(
+                f"'{self._name}' member '{key}' must be a {typestr}"
+            )
+
+    def _check_member(self, key: str, type_: type, typestr: str) -> None:
+        self._check_key(key)
+        self._check_value(key, type_, typestr)
+
+    @property
+    def _name(self) -> str:
+        return type(self).__name__
+
+    def __repr__(self) -> str:
+        return f"{self._name}({self._raw!r})"
+
+
+class Greeting(Model):
+    """
+    Defined in qmp-spec.txt, section 2.2, "Server Greeting".
+
+    :param raw: The raw Greeting object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'QMP' member
+        self.QMP: QMPGreeting  # pylint: disable=invalid-name
+
+        self._check_member('QMP', abc.Mapping, "JSON object")
+        self.QMP = QMPGreeting(self._raw['QMP'])
+
+    def _asdict(self) -> Dict[str, object]:
+        """
+        For compatibility with the iotests sync QMP wrapper.
+
+        The legacy QMP interface needs Greetings as a garden-variety Dict.
+
+        This interface is private in the hopes that it will be able to
+        be dropped again in the near-future. Caller beware!
+        """
+        return dict(copy.deepcopy(self._raw))
+
+
+class QMPGreeting(Model):
+    """
+    Defined in qmp-spec.txt, section 2.2, "Server Greeting".
+
+    :param raw: The raw QMPGreeting object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'version' member
+        self.version: Mapping[str, object]
+        #: 'capabilities' member
+        self.capabilities: Sequence[object]
+
+        self._check_member('version', abc.Mapping, "JSON object")
+        self.version = self._raw['version']
+
+        self._check_member('capabilities', abc.Sequence, "JSON array")
+        self.capabilities = self._raw['capabilities']
+
+
+class ErrorResponse(Model):
+    """
+    Defined in qmp-spec.txt, section 2.4.2, "error".
+
+    :param raw: The raw ErrorResponse object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'error' member
+        self.error: ErrorInfo
+        #: 'id' member
+        self.id: Optional[object] = None  # pylint: disable=invalid-name
+
+        self._check_member('error', abc.Mapping, "JSON object")
+        self.error = ErrorInfo(self._raw['error'])
+
+        if 'id' in raw:
+            self.id = raw['id']
+
+
+class ErrorInfo(Model):
+    """
+    Defined in qmp-spec.txt, section 2.4.2, "error".
+
+    :param raw: The raw ErrorInfo object.
+    :raise KeyError: If any required fields are absent.
+    :raise TypeError: If any required fields have the wrong type.
+    """
+    def __init__(self, raw: Mapping[str, Any]):
+        super().__init__(raw)
+        #: 'class' member, with an underscore to avoid conflicts in Python.
+        self.class_: str
+        #: 'desc' member
+        self.desc: str
+
+        self._check_member('class', str, "string")
+        self.class_ = self._raw['class']
+
+        self._check_member('desc', str, "string")
+        self.desc = self._raw['desc']
diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py
new file mode 100644
index 00000000000..5190b33b13d
--- /dev/null
+++ b/python/qemu/aqmp/protocol.py
@@ -0,0 +1,917 @@
+"""
+Generic Asynchronous Message-based Protocol Support
+
+This module provides a generic framework for sending and receiving
+messages over an asyncio stream. `AsyncProtocol` is an abstract class
+that implements the core mechanisms of a simple send/receive protocol,
+and is designed to be extended.
+
+In this package, it is used as the implementation for the `QMPClient`
+class.
+"""
+
+import asyncio
+from asyncio import StreamReader, StreamWriter
+from enum import Enum
+from functools import wraps
+import logging
+from ssl import SSLContext
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Generic,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    cast,
+)
+
+from .error import AQMPError
+from .util import (
+    bottom_half,
+    create_task,
+    exception_summary,
+    flush,
+    is_closing,
+    pretty_traceback,
+    upper_half,
+    wait_closed,
+)
+
+
+T = TypeVar('T')
+_TaskFN = Callable[[], Awaitable[None]]  # aka ``async def func() -> None``
+_FutureT = TypeVar('_FutureT', bound=Optional['asyncio.Future[Any]'])
+
+
+class Runstate(Enum):
+    """Protocol session runstate."""
+
+    #: Fully quiesced and disconnected.
+    IDLE = 0
+    #: In the process of connecting or establishing a session.
+    CONNECTING = 1
+    #: Fully connected and active session.
+    RUNNING = 2
+    #: In the process of disconnecting.
+    #: Runstate may be returned to `IDLE` by calling `disconnect()`.
+    DISCONNECTING = 3
+
+
+class ConnectError(AQMPError):
+    """
+    Raised when the initial connection process has failed.
+
+    This Exception always wraps a "root cause" exception that can be
+    interrogated for additional information.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+    def __init__(self, error_message: str, exc: Exception):
+        super().__init__(error_message)
+        #: Human-readable error string
+        self.error_message: str = error_message
+        #: Wrapped root cause exception
+        self.exc: Exception = exc
+
+    def __str__(self) -> str:
+        cause = str(self.exc)
+        if not cause:
+            # If there's no error string, use the exception name.
+            cause = exception_summary(self.exc)
+        return f"{self.error_message}: {cause}"
+
+
+class StateError(AQMPError):
+    """
+    An API command (connect, execute, etc) was issued at an inappropriate time.
+
+    This error is raised when a command like
+    :py:meth:`~AsyncProtocol.connect()` is issued at an inappropriate
+    time.
+
+    :param error_message: Human-readable string describing the state violation.
+    :param state: The actual `Runstate` seen at the time of the violation.
+    :param required: The `Runstate` required to process this command.
+    """
+    def __init__(self, error_message: str,
+                 state: Runstate, required: Runstate):
+        super().__init__(error_message)
+        self.error_message = error_message
+        self.state = state
+        self.required = required
+
+
+F = TypeVar('F', bound=Callable[..., Any])  # pylint: disable=invalid-name
+
+
+# Don't Panic.
+def require(required_state: Runstate) -> Callable[[F], F]:
+    """
+    Decorator: protect a method so it can only be run in a certain `Runstate`.
+
+    :param required_state: The `Runstate` required to invoke this method.
+    :raise StateError: When the required `Runstate` is not met.
+    """
+    def _decorator(func: F) -> F:
+        # _decorator is the decorator that is built by calling the
+        # require() decorator factory; e.g.:
+        #
+        # @require(Runstate.IDLE) def foo(): ...
+        # will replace 'foo' with the result of '_decorator(foo)'.
+
+        @wraps(func)
+        def _wrapper(proto: 'AsyncProtocol[Any]',
+                     *args: Any, **kwargs: Any) -> Any:
+            # _wrapper is the function that gets executed prior to the
+            # decorated method.
+
+            name = type(proto).__name__
+
+            if proto.runstate != required_state:
+                if proto.runstate == Runstate.CONNECTING:
+                    emsg = f"{name} is currently connecting."
+                elif proto.runstate == Runstate.DISCONNECTING:
+                    emsg = (f"{name} is disconnecting."
+                            " Call disconnect() to return to IDLE state.")
+                elif proto.runstate == Runstate.RUNNING:
+                    emsg = f"{name} is already connected and running."
+                elif proto.runstate == Runstate.IDLE:
+                    emsg = f"{name} is disconnected and idle."
+                else:
+                    assert False
+                raise StateError(emsg, proto.runstate, required_state)
+            # No StateError, so call the wrapped method.
+            return func(proto, *args, **kwargs)
+
+        # Return the decorated method;
+        # Transforming Func to Decorated[Func].
+        return cast(F, _wrapper)
+
+    # Return the decorator instance from the decorator factory. Phew!
+    return _decorator
+
+
+class AsyncProtocol(Generic[T]):
+    """
+    AsyncProtocol implements a generic async message-based protocol.
+
+    This protocol assumes the basic unit of information transfer between
+    client and server is a "message", the details of which are left up
+    to the implementation. It assumes the sending and receiving of these
+    messages is full-duplex and not necessarily correlated; i.e. it
+    supports asynchronous inbound messages.
+
+    It is designed to be extended by a specific protocol which provides
+    the implementations for how to read and send messages. These must be
+    defined in `_do_recv()` and `_do_send()`, respectively.
+
+    Other callbacks have a default implementation, but are intended to be
+    either extended or overridden:
+
+     - `_establish_session`:
+         The base implementation starts the reader/writer tasks.
+         A protocol implementation can override this call, inserting
+         actions to be taken prior to starting the reader/writer tasks
+         before the super() call; actions needing to occur afterwards
+         can be written after the super() call.
+     - `_on_message`:
+         Actions to be performed when a message is received.
+     - `_cb_outbound`:
+         Logging/Filtering hook for all outbound messages.
+     - `_cb_inbound`:
+         Logging/Filtering hook for all inbound messages.
+         This hook runs *before* `_on_message()`.
+
+    :param name:
+        Name used for logging messages, if any. By default, messages
+        will log to 'qemu.aqmp.protocol', but each individual connection
+        can be given its own logger by giving it a name; messages will
+        then log to 'qemu.aqmp.protocol.${name}'.
+    """
+    # pylint: disable=too-many-instance-attributes
+
+    #: Logger object for debugging messages from this connection.
+    logger = logging.getLogger(__name__)
+
+    # Maximum allowable size of read buffer
+    _limit = (64 * 1024)
+
+    # -------------------------
+    # Section: Public interface
+    # -------------------------
+
+    def __init__(self, name: Optional[str] = None) -> None:
+        #: The nickname for this connection, if any.
+        self.name: Optional[str] = name
+        if self.name is not None:
+            self.logger = self.logger.getChild(self.name)
+
+        # stream I/O
+        self._reader: Optional[StreamReader] = None
+        self._writer: Optional[StreamWriter] = None
+
+        # Outbound Message queue
+        self._outgoing: asyncio.Queue[T]
+
+        # Special, long-running tasks:
+        self._reader_task: Optional[asyncio.Future[None]] = None
+        self._writer_task: Optional[asyncio.Future[None]] = None
+
+        # Aggregate of the above two tasks, used for Exception management.
+        self._bh_tasks: Optional[asyncio.Future[Tuple[None, None]]] = None
+
+        #: Disconnect task. The disconnect implementation runs in a task
+        #: so that asynchronous disconnects (initiated by the
+        #: reader/writer) are allowed to wait for the reader/writers to
+        #: exit.
+        self._dc_task: Optional[asyncio.Future[None]] = None
+
+        self._runstate = Runstate.IDLE
+        self._runstate_changed: Optional[asyncio.Event] = None
+
+    def __repr__(self) -> str:
+        cls_name = type(self).__name__
+        tokens = []
+        if self.name is not None:
+            tokens.append(f"name={self.name!r}")
+        tokens.append(f"runstate={self.runstate.name}")
+        return f"<{cls_name} {' '.join(tokens)}>"
+
+    @property  # @upper_half
+    def runstate(self) -> Runstate:
+        """The current `Runstate` of the connection."""
+        return self._runstate
+
+    @upper_half
+    async def runstate_changed(self) -> Runstate:
+        """
+        Wait for the `runstate` to change, then return that runstate.
+        """
+        await self._runstate_event.wait()
+        return self.runstate
+
+    @upper_half
+    @require(Runstate.IDLE)
+    async def accept(self, address: Union[str, Tuple[str, int]],
+                     ssl: Optional[SSLContext] = None) -> None:
+        """
+        Accept a connection and begin processing message queues.
+
+        If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+
+        :param address:
+            Address to listen to; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise StateError: When the `Runstate` is not `IDLE`.
+        :raise ConnectError: If a connection could not be accepted.
+        """
+        await self._new_session(address, ssl, accept=True)
+
+    @upper_half
+    @require(Runstate.IDLE)
+    async def connect(self, address: Union[str, Tuple[str, int]],
+                      ssl: Optional[SSLContext] = None) -> None:
+        """
+        Connect to the server and begin processing message queues.
+
+        If this call fails, `runstate` is guaranteed to be set back to `IDLE`.
+
+        :param address:
+            Address to connect to; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise StateError: When the `Runstate` is not `IDLE`.
+        :raise ConnectError: If a connection cannot be made to the server.
+        """
+        await self._new_session(address, ssl)
+
+    @upper_half
+    async def disconnect(self) -> None:
+        """
+        Disconnect and wait for all tasks to fully stop.
+
+        If there was an exception that caused the reader/writers to
+        terminate prematurely, it will be raised here.
+
+        :raise Exception: When the reader or writer terminate unexpectedly.
+        """
+        self.logger.debug("disconnect() called.")
+        self._schedule_disconnect()
+        await self._wait_disconnect()
+
+    # --------------------------
+    # Section: Session machinery
+    # --------------------------
+
+    @property
+    def _runstate_event(self) -> asyncio.Event:
+        # asyncio.Event() objects should not be created prior to entrance into
+        # an event loop, so we can ensure we create it in the correct context.
+        # Create it on-demand *only* at the behest of an 'async def' method.
+        if not self._runstate_changed:
+            self._runstate_changed = asyncio.Event()
+        return self._runstate_changed
+
+    @upper_half
+    @bottom_half
+    def _set_state(self, state: Runstate) -> None:
+        """
+        Change the `Runstate` of the protocol connection.
+
+        Signals the `runstate_changed` event.
+        """
+        if state == self._runstate:
+            return
+
+        self.logger.debug("Transitioning from '%s' to '%s'.",
+                          str(self._runstate), str(state))
+        self._runstate = state
+        self._runstate_event.set()
+        self._runstate_event.clear()
+
+    @upper_half
+    async def _new_session(self,
+                           address: Union[str, Tuple[str, int]],
+                           ssl: Optional[SSLContext] = None,
+                           accept: bool = False) -> None:
+        """
+        Establish a new connection and initialize the session.
+
+        Connect or accept a new connection, then begin the protocol
+        session machinery. If this call fails, `runstate` is guaranteed
+        to be set back to `IDLE`.
+
+        :param address:
+            Address to connect to/listen on;
+            UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+        :param accept: Accept a connection instead of connecting when `True`.
+
+        :raise ConnectError:
+            When a connection or session cannot be established.
+
+            This exception will wrap a more concrete one. In most cases,
+            the wrapped exception will be `OSError` or `EOFError`. If a
+            protocol-level failure occurs while establishing a new
+            session, the wrapped error may also be an `AQMPError`.
+        """
+        assert self.runstate == Runstate.IDLE
+
+        try:
+            phase = "connection"
+            await self._establish_connection(address, ssl, accept)
+
+            phase = "session"
+            await self._establish_session()
+
+        except BaseException as err:
+            emsg = f"Failed to establish {phase}"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            try:
+                # Reset from CONNECTING back to IDLE.
+                await self.disconnect()
+            except:
+                emsg = "Unexpected bottom half exception"
+                self.logger.critical("%s:\n%s\n", emsg, pretty_traceback())
+                raise
+
+            # NB: CancelledError is not a BaseException before Python 3.8
+            if isinstance(err, asyncio.CancelledError):
+                raise
+
+            if isinstance(err, Exception):
+                raise ConnectError(emsg, err) from err
+
+            # Raise BaseExceptions un-wrapped, they're more important.
+            raise
+
+        assert self.runstate == Runstate.RUNNING
+
+    @upper_half
+    async def _establish_connection(
+            self,
+            address: Union[str, Tuple[str, int]],
+            ssl: Optional[SSLContext] = None,
+            accept: bool = False
+    ) -> None:
+        """
+        Establish a new connection.
+
+        :param address:
+            Address to connect to/listen on;
+            UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+        :param accept: Accept a connection instead of connecting when `True`.
+        """
+        assert self.runstate == Runstate.IDLE
+        self._set_state(Runstate.CONNECTING)
+
+        # Allow runstate watchers to witness 'CONNECTING' state; some
+        # failures in the streaming layer are synchronous and will not
+        # otherwise yield.
+        await asyncio.sleep(0)
+
+        if accept:
+            await self._do_accept(address, ssl)
+        else:
+            await self._do_connect(address, ssl)
+
+    @upper_half
+    async def _do_accept(self, address: Union[str, Tuple[str, int]],
+                         ssl: Optional[SSLContext] = None) -> None:
+        """
+        Acting as the transport server, accept a single connection.
+
+        :param address:
+            Address to listen on; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise OSError: For stream-related errors.
+        """
+        self.logger.debug("Awaiting connection on %s ...", address)
+        connected = asyncio.Event()
+        server: Optional[asyncio.AbstractServer] = None
+
+        async def _client_connected_cb(reader: asyncio.StreamReader,
+                                       writer: asyncio.StreamWriter) -> None:
+            """Used to accept a single incoming connection, see below."""
+            nonlocal server
+            nonlocal connected
+
+            # A connection has been accepted; stop listening for new ones.
+            assert server is not None
+            server.close()
+            await server.wait_closed()
+            server = None
+
+            # Register this client as being connected
+            self._reader, self._writer = (reader, writer)
+
+            # Signal back: We've accepted a client!
+            connected.set()
+
+        if isinstance(address, tuple):
+            coro = asyncio.start_server(
+                _client_connected_cb,
+                host=address[0],
+                port=address[1],
+                ssl=ssl,
+                backlog=1,
+                limit=self._limit,
+            )
+        else:
+            coro = asyncio.start_unix_server(
+                _client_connected_cb,
+                path=address,
+                ssl=ssl,
+                backlog=1,
+                limit=self._limit,
+            )
+
+        server = await coro     # Starts listening
+        await connected.wait()  # Waits for the callback to fire (and finish)
+        assert server is None
+
+        self.logger.debug("Connection accepted.")
+
+    @upper_half
+    async def _do_connect(self, address: Union[str, Tuple[str, int]],
+                          ssl: Optional[SSLContext] = None) -> None:
+        """
+        Acting as the transport client, initiate a connection to a server.
+
+        :param address:
+            Address to connect to; UNIX socket path or TCP address/port.
+        :param ssl: SSL context to use, if any.
+
+        :raise OSError: For stream-related errors.
+        """
+        self.logger.debug("Connecting to %s ...", address)
+
+        if isinstance(address, tuple):
+            connect = asyncio.open_connection(
+                address[0],
+                address[1],
+                ssl=ssl,
+                limit=self._limit,
+            )
+        else:
+            connect = asyncio.open_unix_connection(
+                path=address,
+                ssl=ssl,
+                limit=self._limit,
+            )
+        self._reader, self._writer = await connect
+
+        self.logger.debug("Connected.")
+
+    @upper_half
+    async def _establish_session(self) -> None:
+        """
+        Establish a new session.
+
+        Starts the readers/writer tasks; subclasses may perform their
+        own negotiations here. The Runstate will be RUNNING upon
+        successful conclusion.
+        """
+        assert self.runstate == Runstate.CONNECTING
+
+        self._outgoing = asyncio.Queue()
+
+        reader_coro = self._bh_loop_forever(self._bh_recv_message, 'Reader')
+        writer_coro = self._bh_loop_forever(self._bh_send_message, 'Writer')
+
+        self._reader_task = create_task(reader_coro)
+        self._writer_task = create_task(writer_coro)
+
+        self._bh_tasks = asyncio.gather(
+            self._reader_task,
+            self._writer_task,
+        )
+
+        self._set_state(Runstate.RUNNING)
+        await asyncio.sleep(0)  # Allow runstate_event to process
+
+    @upper_half
+    @bottom_half
+    def _schedule_disconnect(self) -> None:
+        """
+        Initiate a disconnect; idempotent.
+
+        This method is used both in the upper-half as a direct
+        consequence of `disconnect()`, and in the bottom-half in the
+        case of unhandled exceptions in the reader/writer tasks.
+
+        It can be invoked no matter what the `runstate` is.
+        """
+        if not self._dc_task:
+            self._set_state(Runstate.DISCONNECTING)
+            self.logger.debug("Scheduling disconnect.")
+            self._dc_task = create_task(self._bh_disconnect())
+
+    @upper_half
+    async def _wait_disconnect(self) -> None:
+        """
+        Waits for a previously scheduled disconnect to finish.
+
+        This method will gather any bottom half exceptions and re-raise
+        the one that occurred first; presuming it to be the root cause
+        of any subsequent Exceptions. It is intended to be used in the
+        upper half of the call chain.
+
+        :raise Exception:
+            Arbitrary exception re-raised on behalf of the reader/writer.
+        """
+        assert self.runstate == Runstate.DISCONNECTING
+        assert self._dc_task
+
+        aws: List[Awaitable[object]] = [self._dc_task]
+        if self._bh_tasks:
+            aws.insert(0, self._bh_tasks)
+        all_defined_tasks = asyncio.gather(*aws)
+
+        # Ensure disconnect is done; Exception (if any) is not raised here:
+        await asyncio.wait((self._dc_task,))
+
+        try:
+            await all_defined_tasks  # Raise Exceptions from the bottom half.
+        finally:
+            self._cleanup()
+            self._set_state(Runstate.IDLE)
+
+    @upper_half
+    def _cleanup(self) -> None:
+        """
+        Fully reset this object to a clean state and return to `IDLE`.
+        """
+        def _paranoid_task_erase(task: _FutureT) -> Optional[_FutureT]:
+            # Help to erase a task, ENSURING it is fully quiesced first.
+            assert (task is None) or task.done()
+            return None if (task and task.done()) else task
+
+        assert self.runstate == Runstate.DISCONNECTING
+        self._dc_task = _paranoid_task_erase(self._dc_task)
+        self._reader_task = _paranoid_task_erase(self._reader_task)
+        self._writer_task = _paranoid_task_erase(self._writer_task)
+        self._bh_tasks = _paranoid_task_erase(self._bh_tasks)
+
+        self._reader = None
+        self._writer = None
+
+        # NB: _runstate_changed cannot be cleared because we still need it to
+        # send the final runstate changed event ...!
+
+    # ----------------------------
+    # Section: Bottom Half methods
+    # ----------------------------
+
+    @bottom_half
+    async def _bh_disconnect(self) -> None:
+        """
+        Disconnect and cancel all outstanding tasks.
+
+        It is designed to be called from its task context,
+        :py:obj:`~AsyncProtocol._dc_task`. By running in its own task,
+        it is free to wait on any pending actions that may still need to
+        occur in either the reader or writer tasks.
+        """
+        assert self.runstate == Runstate.DISCONNECTING
+
+        def _done(task: Optional['asyncio.Future[Any]']) -> bool:
+            return task is not None and task.done()
+
+        # Are we already in an error pathway? If either of the tasks are
+        # already done, or if we have no tasks but a reader/writer; we
+        # must be.
+        #
+        # NB: We can't use _bh_tasks to check for premature task
+        # completion, because it may not yet have had a chance to run
+        # and gather itself.
+        tasks = tuple(filter(None, (self._writer_task, self._reader_task)))
+        error_pathway = _done(self._reader_task) or _done(self._writer_task)
+        if not tasks:
+            error_pathway |= bool(self._reader) or bool(self._writer)
+
+        try:
+            # Try to flush the writer, if possible.
+            # This *may* cause an error and force us over into the error path.
+            if not error_pathway:
+                await self._bh_flush_writer()
+        except BaseException as err:
+            error_pathway = True
+            emsg = "Failed to flush the writer"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+        finally:
+            # Cancel any still-running tasks (Won't raise):
+            if self._writer_task is not None and not self._writer_task.done():
+                self.logger.debug("Cancelling writer task.")
+                self._writer_task.cancel()
+            if self._reader_task is not None and not self._reader_task.done():
+                self.logger.debug("Cancelling reader task.")
+                self._reader_task.cancel()
+
+            # Close out the tasks entirely (Won't raise):
+            if tasks:
+                self.logger.debug("Waiting for tasks to complete ...")
+                await asyncio.wait(tasks)
+
+            # Lastly, close the stream itself. (*May raise*!):
+            await self._bh_close_stream(error_pathway)
+            self.logger.debug("Disconnected.")
+
+    @bottom_half
+    async def _bh_flush_writer(self) -> None:
+        if not self._writer_task:
+            return
+
+        self.logger.debug("Draining the outbound queue ...")
+        await self._outgoing.join()
+        if self._writer is not None:
+            self.logger.debug("Flushing the StreamWriter ...")
+            await flush(self._writer)
+
+    @bottom_half
+    async def _bh_close_stream(self, error_pathway: bool = False) -> None:
+        # NB: Closing the writer also implcitly closes the reader.
+        if not self._writer:
+            return
+
+        if not is_closing(self._writer):
+            self.logger.debug("Closing StreamWriter.")
+            self._writer.close()
+
+        self.logger.debug("Waiting for StreamWriter to close ...")
+        try:
+            await wait_closed(self._writer)
+        except Exception:  # pylint: disable=broad-except
+            # It's hard to tell if the Stream is already closed or
+            # not. Even if one of the tasks has failed, it may have
+            # failed for a higher-layered protocol reason. The
+            # stream could still be open and perfectly fine.
+            # I don't know how to discern its health here.
+
+            if error_pathway:
+                # We already know that *something* went wrong. Let's
+                # just trust that the Exception we already have is the
+                # better one to present to the user, even if we don't
+                # genuinely *know* the relationship between the two.
+                self.logger.debug(
+                    "Discarding Exception from wait_closed:\n%s\n",
+                    pretty_traceback(),
+                )
+            else:
+                # Oops, this is a brand-new error!
+                raise
+        finally:
+            self.logger.debug("StreamWriter closed.")
+
+    @bottom_half
+    async def _bh_loop_forever(self, async_fn: _TaskFN, name: str) -> None:
+        """
+        Run one of the bottom-half methods in a loop forever.
+
+        If the bottom half ever raises any exception, schedule a
+        disconnect that will terminate the entire loop.
+
+        :param async_fn: The bottom-half method to run in a loop.
+        :param name: The name of this task, used for logging.
+        """
+        try:
+            while True:
+                await async_fn()
+        except asyncio.CancelledError:
+            # We have been cancelled by _bh_disconnect, exit gracefully.
+            self.logger.debug("Task.%s: cancelled.", name)
+            return
+        except BaseException as err:
+            self.logger.log(
+                logging.INFO if isinstance(err, EOFError) else logging.ERROR,
+                "Task.%s: %s",
+                name, exception_summary(err)
+            )
+            self.logger.debug("Task.%s: failure:\n%s\n",
+                              name, pretty_traceback())
+            self._schedule_disconnect()
+            raise
+        finally:
+            self.logger.debug("Task.%s: exiting.", name)
+
+    @bottom_half
+    async def _bh_send_message(self) -> None:
+        """
+        Wait for an outgoing message, then send it.
+
+        Designed to be run in `_bh_loop_forever()`.
+        """
+        msg = await self._outgoing.get()
+        try:
+            await self._send(msg)
+        finally:
+            self._outgoing.task_done()
+
+    @bottom_half
+    async def _bh_recv_message(self) -> None:
+        """
+        Wait for an incoming message and call `_on_message` to route it.
+
+        Designed to be run in `_bh_loop_forever()`.
+        """
+        msg = await self._recv()
+        await self._on_message(msg)
+
+    # --------------------
+    # Section: Message I/O
+    # --------------------
+
+    @upper_half
+    @bottom_half
+    def _cb_outbound(self, msg: T) -> T:
+        """
+        Callback: outbound message hook.
+
+        This is intended for subclasses to be able to add arbitrary
+        hooks to filter or manipulate outgoing messages. The base
+        implementation does nothing but log the message without any
+        manipulation of the message.
+
+        :param msg: raw outbound message
+        :return: final outbound message
+        """
+        self.logger.debug("--> %s", str(msg))
+        return msg
+
+    @upper_half
+    @bottom_half
+    def _cb_inbound(self, msg: T) -> T:
+        """
+        Callback: inbound message hook.
+
+        This is intended for subclasses to be able to add arbitrary
+        hooks to filter or manipulate incoming messages. The base
+        implementation does nothing but log the message without any
+        manipulation of the message.
+
+        This method does not "handle" incoming messages; it is a filter.
+        The actual "endpoint" for incoming messages is `_on_message()`.
+
+        :param msg: raw inbound message
+        :return: processed inbound message
+        """
+        self.logger.debug("<-- %s", str(msg))
+        return msg
+
+    @upper_half
+    @bottom_half
+    async def _readline(self) -> bytes:
+        """
+        Wait for a newline from the incoming reader.
+
+        This method is provided as a convenience for upper-layer
+        protocols, as many are line-based.
+
+        This method *may* return a sequence of bytes without a trailing
+        newline if EOF occurs, but *some* bytes were received. In this
+        case, the next call will raise `EOFError`. It is assumed that
+        the layer 5 protocol will decide if there is anything meaningful
+        to be done with a partial message.
+
+        :raise OSError: For stream-related errors.
+        :raise EOFError:
+            If the reader stream is at EOF and there are no bytes to return.
+        :return: bytes, including the newline.
+        """
+        assert self._reader is not None
+        msg_bytes = await self._reader.readline()
+
+        if not msg_bytes:
+            if self._reader.at_eof():
+                raise EOFError
+
+        return msg_bytes
+
+    @upper_half
+    @bottom_half
+    async def _do_recv(self) -> T:
+        """
+        Abstract: Read from the stream and return a message.
+
+        Very low-level; intended to only be called by `_recv()`.
+        """
+        raise NotImplementedError
+
+    @upper_half
+    @bottom_half
+    async def _recv(self) -> T:
+        """
+        Read an arbitrary protocol message.
+
+        .. warning::
+            This method is intended primarily for `_bh_recv_message()`
+            to use in an asynchronous task loop. Using it outside of
+            this loop will "steal" messages from the normal routing
+            mechanism. It is safe to use prior to `_establish_session()`,
+            but should not be used otherwise.
+
+        This method uses `_do_recv()` to retrieve the raw message, and
+        then transforms it using `_cb_inbound()`.
+
+        :return: A single (filtered, processed) protocol message.
+        """
+        message = await self._do_recv()
+        return self._cb_inbound(message)
+
+    @upper_half
+    @bottom_half
+    def _do_send(self, msg: T) -> None:
+        """
+        Abstract: Write a message to the stream.
+
+        Very low-level; intended to only be called by `_send()`.
+        """
+        raise NotImplementedError
+
+    @upper_half
+    @bottom_half
+    async def _send(self, msg: T) -> None:
+        """
+        Send an arbitrary protocol message.
+
+        This method will transform any outgoing messages according to
+        `_cb_outbound()`.
+
+        .. warning::
+            Like `_recv()`, this method is intended to be called by
+            the writer task loop that processes outgoing
+            messages. Calling it directly may circumvent logic
+            implemented by the caller meant to correlate outgoing and
+            incoming messages.
+
+        :raise OSError: For problems with the underlying stream.
+        """
+        msg = self._cb_outbound(msg)
+        self._do_send(msg)
+
+    @bottom_half
+    async def _on_message(self, msg: T) -> None:
+        """
+        Called to handle the receipt of a new message.
+
+        .. caution::
+            This is executed from within the reader loop, so be advised
+            that waiting on either the reader or writer task will lead
+            to deadlock. Additionally, any unhandled exceptions will
+            directly cause the loop to halt, so logic may be best-kept
+            to a minimum if at all possible.
+
+        :param msg: The incoming message, already logged/filtered.
+        """
+        # Nothing to do in the abstract case.
diff --git a/tests/qapi-schema/flat-union-array-branch.out b/python/qemu/aqmp/py.typed
similarity index 100%
rename from tests/qapi-schema/flat-union-array-branch.out
rename to python/qemu/aqmp/py.typed
diff --git a/python/qemu/aqmp/qmp_client.py b/python/qemu/aqmp/qmp_client.py
new file mode 100644
index 00000000000..8105e29fa8f
--- /dev/null
+++ b/python/qemu/aqmp/qmp_client.py
@@ -0,0 +1,651 @@
+"""
+QMP Protocol Implementation
+
+This module provides the `QMPClient` class, which can be used to connect
+and send commands to a QMP server such as QEMU. The QMP class can be
+used to either connect to a listening server, or used to listen and
+accept an incoming connection from that server.
+"""
+
+import asyncio
+import logging
+import socket
+import struct
+from typing import (
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Union,
+    cast,
+)
+
+from .error import AQMPError, ProtocolError
+from .events import Events
+from .message import Message
+from .models import ErrorResponse, Greeting
+from .protocol import AsyncProtocol, Runstate, require
+from .util import (
+    bottom_half,
+    exception_summary,
+    pretty_traceback,
+    upper_half,
+)
+
+
+class _WrappedProtocolError(ProtocolError):
+    """
+    Abstract exception class for Protocol errors that wrap an Exception.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+    def __init__(self, error_message: str, exc: Exception):
+        super().__init__(error_message)
+        self.exc = exc
+
+    def __str__(self) -> str:
+        return f"{self.error_message}: {self.exc!s}"
+
+
+class GreetingError(_WrappedProtocolError):
+    """
+    An exception occurred during the Greeting phase.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+
+
+class NegotiationError(_WrappedProtocolError):
+    """
+    An exception occurred during the Negotiation phase.
+
+    :param error_message: Human-readable string describing the error.
+    :param exc: The root-cause exception.
+    """
+
+
+class ExecuteError(AQMPError):
+    """
+    Exception raised by `QMPClient.execute()` on RPC failure.
+
+    :param error_response: The RPC error response object.
+    :param sent: The sent RPC message that caused the failure.
+    :param received: The raw RPC error reply received.
+    """
+    def __init__(self, error_response: ErrorResponse,
+                 sent: Message, received: Message):
+        super().__init__(error_response.error.desc)
+        #: The sent `Message` that caused the failure
+        self.sent: Message = sent
+        #: The received `Message` that indicated failure
+        self.received: Message = received
+        #: The parsed error response
+        self.error: ErrorResponse = error_response
+        #: The QMP error class
+        self.error_class: str = error_response.error.class_
+
+
+class ExecInterruptedError(AQMPError):
+    """
+    Exception raised by `execute()` (et al) when an RPC is interrupted.
+
+    This error is raised when an `execute()` statement could not be
+    completed.  This can occur because the connection itself was
+    terminated before a reply was received.
+
+    The true cause of the interruption will be available via `disconnect()`.
+    """
+
+
+class _MsgProtocolError(ProtocolError):
+    """
+    Abstract error class for protocol errors that have a `Message` object.
+
+    This Exception class is used for protocol errors where the `Message`
+    was mechanically understood, but was found to be inappropriate or
+    malformed.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The QMP `Message` that caused the error.
+    """
+    def __init__(self, error_message: str, msg: Message):
+        super().__init__(error_message)
+        #: The received `Message` that caused the error.
+        self.msg: Message = msg
+
+    def __str__(self) -> str:
+        return "\n".join([
+            super().__str__(),
+            f"  Message was: {str(self.msg)}\n",
+        ])
+
+
+class ServerParseError(_MsgProtocolError):
+    """
+    The Server sent a `Message` indicating parsing failure.
+
+    i.e. A reply has arrived from the server, but it is missing the "ID"
+    field, indicating a parsing error.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The QMP `Message` that caused the error.
+    """
+
+
+class BadReplyError(_MsgProtocolError):
+    """
+    An execution reply was successfully routed, but not understood.
+
+    If a QMP message is received with an 'id' field to allow it to be
+    routed, but is otherwise malformed, this exception will be raised.
+
+    A reply message is malformed if it is missing either the 'return' or
+    'error' keys, or if the 'error' value has missing keys or members of
+    the wrong type.
+
+    :param error_message: Human-readable string describing the error.
+    :param msg: The malformed reply that was received.
+    :param sent: The message that was sent that prompted the error.
+    """
+    def __init__(self, error_message: str, msg: Message, sent: Message):
+        super().__init__(error_message, msg)
+        #: The sent `Message` that caused the failure
+        self.sent = sent
+
+
+class QMPClient(AsyncProtocol[Message], Events):
+    """
+    Implements a QMP client connection.
+
+    QMP can be used to establish a connection as either the transport
+    client or server, though this class always acts as the QMP client.
+
+    :param name: Optional nickname for the connection, used for logging.
+
+    Basic script-style usage looks like this::
+
+      qmp = QMPClient('my_virtual_machine_name')
+      await qmp.connect(('127.0.0.1', 1234))
+      ...
+      res = await qmp.execute('block-query')
+      ...
+      await qmp.disconnect()
+
+    Basic async client-style usage looks like this::
+
+      class Client:
+          def __init__(self, name: str):
+              self.qmp = QMPClient(name)
+
+          async def watch_events(self):
+              try:
+                  async for event in self.qmp.events:
+                      print(f"Event: {event['event']}")
+              except asyncio.CancelledError:
+                  return
+
+          async def run(self, address='/tmp/qemu.socket'):
+              await self.qmp.connect(address)
+              asyncio.create_task(self.watch_events())
+              await self.qmp.runstate_changed.wait()
+              await self.disconnect()
+
+    See `aqmp.events` for more detail on event handling patterns.
+    """
+    #: Logger object used for debugging messages.
+    logger = logging.getLogger(__name__)
+
+    # Read buffer limit; large enough to accept query-qmp-schema
+    _limit = (256 * 1024)
+
+    # Type alias for pending execute() result items
+    _PendingT = Union[Message, ExecInterruptedError]
+
+    def __init__(self, name: Optional[str] = None) -> None:
+        super().__init__(name)
+        Events.__init__(self)
+
+        #: Whether or not to await a greeting after establishing a connection.
+        self.await_greeting: bool = True
+
+        #: Whether or not to perform capabilities negotiation upon connection.
+        #: Implies `await_greeting`.
+        self.negotiate: bool = True
+
+        # Cached Greeting, if one was awaited.
+        self._greeting: Optional[Greeting] = None
+
+        # Command ID counter
+        self._execute_id = 0
+
+        # Incoming RPC reply messages.
+        self._pending: Dict[
+            Union[str, None],
+            'asyncio.Queue[QMPClient._PendingT]'
+        ] = {}
+
+    @property
+    def greeting(self) -> Optional[Greeting]:
+        """The `Greeting` from the QMP server, if any."""
+        return self._greeting
+
+    @upper_half
+    async def _establish_session(self) -> None:
+        """
+        Initiate the QMP session.
+
+        Wait for the QMP greeting and perform capabilities negotiation.
+
+        :raise GreetingError: When the greeting is not understood.
+        :raise NegotiationError: If the negotiation fails.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+        """
+        self._greeting = None
+        self._pending = {}
+
+        if self.await_greeting or self.negotiate:
+            self._greeting = await self._get_greeting()
+
+        if self.negotiate:
+            await self._negotiate()
+
+        # This will start the reader/writers:
+        await super()._establish_session()
+
+    @upper_half
+    async def _get_greeting(self) -> Greeting:
+        """
+        :raise GreetingError: When the greeting is not understood.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+
+        :return: the Greeting object given by the server.
+        """
+        self.logger.debug("Awaiting greeting ...")
+
+        try:
+            msg = await self._recv()
+            return Greeting(msg)
+        except (ProtocolError, KeyError, TypeError) as err:
+            emsg = "Did not understand Greeting"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise GreetingError(emsg, err) from err
+        except BaseException as err:
+            # EOFError, OSError, or something unexpected.
+            emsg = "Failed to receive Greeting"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+
+    @upper_half
+    async def _negotiate(self) -> None:
+        """
+        Perform QMP capabilities negotiation.
+
+        :raise NegotiationError: When negotiation fails.
+        :raise EOFError: When the server unexpectedly hangs up.
+        :raise OSError: For underlying stream errors.
+        """
+        self.logger.debug("Negotiating capabilities ...")
+
+        arguments: Dict[str, List[str]] = {'enable': []}
+        if self._greeting and 'oob' in self._greeting.QMP.capabilities:
+            arguments['enable'].append('oob')
+        msg = self.make_execute_msg('qmp_capabilities', arguments=arguments)
+
+        # It's not safe to use execute() here, because the reader/writers
+        # aren't running. AsyncProtocol *requires* that a new session
+        # does not fail after the reader/writers are running!
+        try:
+            await self._send(msg)
+            reply = await self._recv()
+            assert 'return' in reply
+            assert 'error' not in reply
+        except (ProtocolError, AssertionError) as err:
+            emsg = "Negotiation failed"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise NegotiationError(emsg, err) from err
+        except BaseException as err:
+            # EOFError, OSError, or something unexpected.
+            emsg = "Negotiation failed"
+            self.logger.error("%s: %s", emsg, exception_summary(err))
+            self.logger.debug("%s:\n%s\n", emsg, pretty_traceback())
+            raise
+
+    @bottom_half
+    async def _bh_disconnect(self) -> None:
+        try:
+            await super()._bh_disconnect()
+        finally:
+            if self._pending:
+                self.logger.debug("Cancelling pending executions")
+            keys = self._pending.keys()
+            for key in keys:
+                self.logger.debug("Cancelling execution '%s'", key)
+                self._pending[key].put_nowait(
+                    ExecInterruptedError("Disconnected")
+                )
+
+            self.logger.debug("QMP Disconnected.")
+
+    @upper_half
+    def _cleanup(self) -> None:
+        super()._cleanup()
+        assert not self._pending
+
+    @bottom_half
+    async def _on_message(self, msg: Message) -> None:
+        """
+        Add an incoming message to the appropriate queue/handler.
+
+        :raise ServerParseError: When Message indicates server parse failure.
+        """
+        # Incoming messages are not fully parsed/validated here;
+        # do only light peeking to know how to route the messages.
+
+        if 'event' in msg:
+            await self._event_dispatch(msg)
+            return
+
+        # Below, we assume everything left is an execute/exec-oob response.
+
+        exec_id = cast(Optional[str], msg.get('id'))
+
+        if exec_id in self._pending:
+            await self._pending[exec_id].put(msg)
+            return
+
+        # We have a message we can't route back to a caller.
+
+        is_error = 'error' in msg
+        has_id = 'id' in msg
+
+        if is_error and not has_id:
+            # This is very likely a server parsing error.
+            # It doesn't inherently belong to any pending execution.
+            # Instead of performing clever recovery, just terminate.
+            # See "NOTE" in qmp-spec.txt, section 2.4.2
+            raise ServerParseError(
+                ("Server sent an error response without an ID, "
+                 "but there are no ID-less executions pending. "
+                 "Assuming this is a server parser failure."),
+                msg
+            )
+
+        # qmp-spec.txt, section 2.4:
+        # 'Clients should drop all the responses
+        # that have an unknown "id" field.'
+        self.logger.log(
+            logging.ERROR if is_error else logging.WARNING,
+            "Unknown ID '%s', message dropped.",
+            exec_id,
+        )
+        self.logger.debug("Unroutable message: %s", str(msg))
+
+    @upper_half
+    @bottom_half
+    async def _do_recv(self) -> Message:
+        """
+        :raise OSError: When a stream error is encountered.
+        :raise EOFError: When the stream is at EOF.
+        :raise ProtocolError:
+            When the Message is not understood.
+            See also `Message._deserialize`.
+
+        :return: A single QMP `Message`.
+        """
+        msg_bytes = await self._readline()
+        msg = Message(msg_bytes, eager=True)
+        return msg
+
+    @upper_half
+    @bottom_half
+    def _do_send(self, msg: Message) -> None:
+        """
+        :raise ValueError: JSON serialization failure
+        :raise TypeError: JSON serialization failure
+        :raise OSError: When a stream error is encountered.
+        """
+        assert self._writer is not None
+        self._writer.write(bytes(msg))
+
+    @upper_half
+    def _get_exec_id(self) -> str:
+        exec_id = f"__aqmp#{self._execute_id:05d}"
+        self._execute_id += 1
+        return exec_id
+
+    @upper_half
+    async def _issue(self, msg: Message) -> Union[None, str]:
+        """
+        Issue a QMP `Message` and do not wait for a reply.
+
+        :param msg: The QMP `Message` to send to the server.
+
+        :return: The ID of the `Message` sent.
+        """
+        msg_id: Optional[str] = None
+        if 'id' in msg:
+            assert isinstance(msg['id'], str)
+            msg_id = msg['id']
+
+        self._pending[msg_id] = asyncio.Queue(maxsize=1)
+        await self._outgoing.put(msg)
+
+        return msg_id
+
+    @upper_half
+    async def _reply(self, msg_id: Union[str, None]) -> Message:
+        """
+        Await a reply to a previously issued QMP message.
+
+        :param msg_id: The ID of the previously issued message.
+
+        :return: The reply from the server.
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        """
+        queue = self._pending[msg_id]
+        result = await queue.get()
+
+        try:
+            if isinstance(result, ExecInterruptedError):
+                raise result
+            return result
+        finally:
+            del self._pending[msg_id]
+
+    @upper_half
+    async def _execute(self, msg: Message, assign_id: bool = True) -> Message:
+        """
+        Send a QMP `Message` to the server and await a reply.
+
+        This method *assumes* you are sending some kind of an execute
+        statement that *will* receive a reply.
+
+        An execution ID will be assigned if assign_id is `True`. It can be
+        disabled, but this requires that an ID is manually assigned
+        instead. For manually assigned IDs, you must not use the string
+        '__aqmp#' anywhere in the ID.
+
+        :param msg: The QMP `Message` to execute.
+        :param assign_id: If True, assign a new execution ID.
+
+        :return: Execution reply from the server.
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        """
+        if assign_id:
+            msg['id'] = self._get_exec_id()
+        elif 'id' in msg:
+            assert isinstance(msg['id'], str)
+            assert '__aqmp#' not in msg['id']
+
+        exec_id = await self._issue(msg)
+        return await self._reply(exec_id)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    async def _raw(
+            self,
+            msg: Union[Message, Mapping[str, object], bytes],
+            assign_id: bool = True,
+    ) -> Message:
+        """
+        Issue a raw `Message` to the QMP server and await a reply.
+
+        :param msg:
+            A Message to send to the server. It may be a `Message`, any
+            Mapping (including Dict), or raw bytes.
+        :param assign_id:
+            Assign an arbitrary execution ID to this message. If
+            `False`, the existing id must either be absent (and no other
+            such pending execution may omit an ID) or a string. If it is
+            a string, it must not start with '__aqmp#' and no other such
+            pending execution may currently be using that ID.
+
+        :return: Execution reply from the server.
+
+        :raise ExecInterruptedError:
+            When the reply could not be retrieved because the connection
+            was lost, or some other problem.
+        :raise TypeError:
+            When assign_id is `False`, an ID is given, and it is not a string.
+        :raise ValueError:
+            When assign_id is `False`, but the ID is not usable;
+            Either because it starts with '__aqmp#' or it is already in-use.
+        """
+        # 1. convert generic Mapping or bytes to a QMP Message
+        # 2. copy Message objects so that we assign an ID only to the copy.
+        msg = Message(msg)
+
+        exec_id = msg.get('id')
+        if not assign_id and 'id' in msg:
+            if not isinstance(exec_id, str):
+                raise TypeError(f"ID ('{exec_id}') must be a string.")
+            if exec_id.startswith('__aqmp#'):
+                raise ValueError(
+                    f"ID ('{exec_id}') must not start with '__aqmp#'."
+                )
+
+        if not assign_id and exec_id in self._pending:
+            raise ValueError(
+                f"ID '{exec_id}' is in-use and cannot be used."
+            )
+
+        return await self._execute(msg, assign_id=assign_id)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    async def execute_msg(self, msg: Message) -> object:
+        """
+        Execute a QMP command and return its value.
+
+        :param msg: The QMP `Message` to execute.
+
+        :return:
+            The command execution return value from the server. The type of
+            object returned depends on the command that was issued,
+            though most in QEMU return a `dict`.
+        :raise ValueError:
+            If the QMP `Message` does not have either the 'execute' or
+            'exec-oob' fields set.
+        :raise ExecuteError: When the server returns an error response.
+        :raise ExecInterruptedError: if the connection was terminated early.
+        """
+        if not ('execute' in msg or 'exec-oob' in msg):
+            raise ValueError("Requires 'execute' or 'exec-oob' message")
+
+        # Copy the Message so that the ID assigned by _execute() is
+        # local to this method; allowing the ID to be seen in raised
+        # Exceptions but without modifying the caller's held copy.
+        msg = Message(msg)
+        reply = await self._execute(msg)
+
+        if 'error' in reply:
+            try:
+                error_response = ErrorResponse(reply)
+            except (KeyError, TypeError) as err:
+                # Error response was malformed.
+                raise BadReplyError(
+                    "QMP error reply is malformed", reply, msg,
+                ) from err
+
+            raise ExecuteError(error_response, msg, reply)
+
+        if 'return' not in reply:
+            raise BadReplyError(
+                "QMP reply is missing a 'error' or 'return' member",
+                reply, msg,
+            )
+
+        return reply['return']
+
+    @classmethod
+    def make_execute_msg(cls, cmd: str,
+                         arguments: Optional[Mapping[str, object]] = None,
+                         oob: bool = False) -> Message:
+        """
+        Create an executable message to be sent by `execute_msg` later.
+
+        :param cmd: QMP command name.
+        :param arguments: Arguments (if any). Must be JSON-serializable.
+        :param oob: If `True`, execute "out of band".
+
+        :return: An executable QMP `Message`.
+        """
+        msg = Message({'exec-oob' if oob else 'execute': cmd})
+        if arguments is not None:
+            msg['arguments'] = arguments
+        return msg
+
+    @upper_half
+    async def execute(self, cmd: str,
+                      arguments: Optional[Mapping[str, object]] = None,
+                      oob: bool = False) -> object:
+        """
+        Execute a QMP command and return its value.
+
+        :param cmd: QMP command name.
+        :param arguments: Arguments (if any). Must be JSON-serializable.
+        :param oob: If `True`, execute "out of band".
+
+        :return:
+            The command execution return value from the server. The type of
+            object returned depends on the command that was issued,
+            though most in QEMU return a `dict`.
+        :raise ExecuteError: When the server returns an error response.
+        :raise ExecInterruptedError: if the connection was terminated early.
+        """
+        msg = self.make_execute_msg(cmd, arguments, oob=oob)
+        return await self.execute_msg(msg)
+
+    @upper_half
+    @require(Runstate.RUNNING)
+    def send_fd_scm(self, fd: int) -> None:
+        """
+        Send a file descriptor to the remote via SCM_RIGHTS.
+        """
+        assert self._writer is not None
+        sock = self._writer.transport.get_extra_info('socket')
+
+        if sock.family != socket.AF_UNIX:
+            raise AQMPError("Sending file descriptors requires a UNIX socket.")
+
+        if not hasattr(sock, 'sendmsg'):
+            # We need to void the warranty sticker.
+            # Access to sendmsg is scheduled for removal in Python 3.11.
+            # Find the real backing socket to use it anyway.
+            sock = sock._sock  # pylint: disable=protected-access
+
+        sock.sendmsg(
+            [b' '],
+            [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('@i', fd))]
+        )
diff --git a/python/qemu/aqmp/util.py b/python/qemu/aqmp/util.py
new file mode 100644
index 00000000000..eaa5fc7d5f9
--- /dev/null
+++ b/python/qemu/aqmp/util.py
@@ -0,0 +1,217 @@
+"""
+Miscellaneous Utilities
+
+This module provides asyncio utilities and compatibility wrappers for
+Python 3.6 to provide some features that otherwise become available in
+Python 3.7+.
+
+Various logging and debugging utilities are also provided, such as
+`exception_summary()` and `pretty_traceback()`, used primarily for
+adding information into the logging stream.
+"""
+
+import asyncio
+import sys
+import traceback
+from typing import (
+    Any,
+    Coroutine,
+    Optional,
+    TypeVar,
+    cast,
+)
+
+
+T = TypeVar('T')
+
+
+# --------------------------
+# Section: Utility Functions
+# --------------------------
+
+
+async def flush(writer: asyncio.StreamWriter) -> None:
+    """
+    Utility function to ensure a StreamWriter is *fully* drained.
+
+    `asyncio.StreamWriter.drain` only promises we will return to below
+    the "high-water mark". This function ensures we flush the entire
+    buffer -- by setting the high water mark to 0 and then calling
+    drain. The flow control limits are restored after the call is
+    completed.
+    """
+    transport = cast(asyncio.WriteTransport, writer.transport)
+
+    # https://github.com/python/typeshed/issues/5779
+    low, high = transport.get_write_buffer_limits()  # type: ignore
+    transport.set_write_buffer_limits(0, 0)
+    try:
+        await writer.drain()
+    finally:
+        transport.set_write_buffer_limits(high, low)
+
+
+def upper_half(func: T) -> T:
+    """
+    Do-nothing decorator that annotates a method as an "upper-half" method.
+
+    These methods must not call bottom-half functions directly, but can
+    schedule them to run.
+    """
+    return func
+
+
+def bottom_half(func: T) -> T:
+    """
+    Do-nothing decorator that annotates a method as a "bottom-half" method.
+
+    These methods must take great care to handle their own exceptions whenever
+    possible. If they go unhandled, they will cause termination of the loop.
+
+    These methods do not, in general, have the ability to directly
+    report information to a caller’s context and will usually be
+    collected as a Task result instead.
+
+    They must not call upper-half functions directly.
+    """
+    return func
+
+
+# -------------------------------
+# Section: Compatibility Wrappers
+# -------------------------------
+
+
+def create_task(coro: Coroutine[Any, Any, T],
+                loop: Optional[asyncio.AbstractEventLoop] = None
+                ) -> 'asyncio.Future[T]':
+    """
+    Python 3.6-compatible `asyncio.create_task` wrapper.
+
+    :param coro: The coroutine to execute in a task.
+    :param loop: Optionally, the loop to create the task in.
+
+    :return: An `asyncio.Future` object.
+    """
+    if sys.version_info >= (3, 7):
+        if loop is not None:
+            return loop.create_task(coro)
+        return asyncio.create_task(coro)  # pylint: disable=no-member
+
+    # Python 3.6:
+    return asyncio.ensure_future(coro, loop=loop)
+
+
+def is_closing(writer: asyncio.StreamWriter) -> bool:
+    """
+    Python 3.6-compatible `asyncio.StreamWriter.is_closing` wrapper.
+
+    :param writer: The `asyncio.StreamWriter` object.
+    :return: `True` if the writer is closing, or closed.
+    """
+    if sys.version_info >= (3, 7):
+        return writer.is_closing()
+
+    # Python 3.6:
+    transport = writer.transport
+    assert isinstance(transport, asyncio.WriteTransport)
+    return transport.is_closing()
+
+
+async def wait_closed(writer: asyncio.StreamWriter) -> None:
+    """
+    Python 3.6-compatible `asyncio.StreamWriter.wait_closed` wrapper.
+
+    :param writer: The `asyncio.StreamWriter` to wait on.
+    """
+    if sys.version_info >= (3, 7):
+        await writer.wait_closed()
+        return
+
+    # Python 3.6
+    transport = writer.transport
+    assert isinstance(transport, asyncio.WriteTransport)
+
+    while not transport.is_closing():
+        await asyncio.sleep(0)
+
+    # This is an ugly workaround, but it's the best I can come up with.
+    sock = transport.get_extra_info('socket')
+
+    if sock is None:
+        # Our transport doesn't have a socket? ...
+        # Nothing we can reasonably do.
+        return
+
+    while sock.fileno() != -1:
+        await asyncio.sleep(0)
+
+
+def asyncio_run(coro: Coroutine[Any, Any, T], *, debug: bool = False) -> T:
+    """
+    Python 3.6-compatible `asyncio.run` wrapper.
+
+    :param coro: A coroutine to execute now.
+    :return: The return value from the coroutine.
+    """
+    if sys.version_info >= (3, 7):
+        return asyncio.run(coro, debug=debug)
+
+    # Python 3.6
+    loop = asyncio.get_event_loop()
+    loop.set_debug(debug)
+    ret = loop.run_until_complete(coro)
+    loop.close()
+
+    return ret
+
+
+# ----------------------------
+# Section: Logging & Debugging
+# ----------------------------
+
+
+def exception_summary(exc: BaseException) -> str:
+    """
+    Return a summary string of an arbitrary exception.
+
+    It will be of the form "ExceptionType: Error Message", if the error
+    string is non-empty, and just "ExceptionType" otherwise.
+    """
+    name = type(exc).__qualname__
+    smod = type(exc).__module__
+    if smod not in ("__main__", "builtins"):
+        name = smod + '.' + name
+
+    error = str(exc)
+    if error:
+        return f"{name}: {error}"
+    return name
+
+
+def pretty_traceback(prefix: str = "  | ") -> str:
+    """
+    Formats the current traceback, indented to provide visual distinction.
+
+    This is useful for printing a traceback within a traceback for
+    debugging purposes when encapsulating errors to deliver them up the
+    stack; when those errors are printed, this helps provide a nice
+    visual grouping to quickly identify the parts of the error that
+    belong to the inner exception.
+
+    :param prefix: The prefix to append to each line of the traceback.
+    :return: A string, formatted something like the following::
+
+      | Traceback (most recent call last):
+      |   File "foobar.py", line 42, in arbitrary_example
+      |     foo.baz()
+      | ArbitraryError: [Errno 42] Something bad happened!
+    """
+    output = "".join(traceback.format_exception(*sys.exc_info()))
+
+    exc_lines = []
+    for line in output.split('\n'):
+        exc_lines.append(prefix + line)
+
+    # The last line is always empty, omit it
+    return "\n".join(exc_lines[:-1])
diff --git a/python/qemu/machine.py b/python/qemu/machine.py
deleted file mode 100644
index 6e44bda337e..00000000000
--- a/python/qemu/machine.py
+++ /dev/null
@@ -1,746 +0,0 @@
-"""
-QEMU machine module:
-
-The machine module primarily provides the QEMUMachine class,
-which provides facilities for managing the lifetime of a QEMU VM.
-"""
-
-# Copyright (C) 2015-2016 Red Hat Inc.
-# Copyright (C) 2012 IBM Corp.
-#
-# Authors:
-#  Fam Zheng <famz@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
-# Based on qmp.py.
-#
-
-import errno
-from itertools import chain
-import logging
-import os
-import shutil
-import signal
-import socket
-import subprocess
-import tempfile
-from types import TracebackType
-from typing import (
-    Any,
-    BinaryIO,
-    Dict,
-    List,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-)
-
-from . import console_socket, qmp
-from .qmp import QMPMessage, QMPReturnValue, SocketAddrT
-
-
-LOG = logging.getLogger(__name__)
-
-
-class QEMUMachineError(Exception):
-    """
-    Exception called when an error in QEMUMachine happens.
-    """
-
-
-class QEMUMachineAddDeviceError(QEMUMachineError):
-    """
-    Exception raised when a request to add a device can not be fulfilled
-
-    The failures are caused by limitations, lack of information or conflicting
-    requests on the QEMUMachine methods.  This exception does not represent
-    failures reported by the QEMU binary itself.
-    """
-
-
-class AbnormalShutdown(QEMUMachineError):
-    """
-    Exception raised when a graceful shutdown was requested, but not performed.
-    """
-
-
-class QEMUMachine:
-    """
-    A QEMU VM.
-
-    Use this object as a context manager to ensure
-    the QEMU process terminates::
-
-        with VM(binary) as vm:
-            ...
-        # vm is guaranteed to be shut down here
-    """
-
-    def __init__(self,
-                 binary: str,
-                 args: Sequence[str] = (),
-                 wrapper: Sequence[str] = (),
-                 name: Optional[str] = None,
-                 test_dir: str = "/var/tmp",
-                 monitor_address: Optional[SocketAddrT] = None,
-                 socket_scm_helper: Optional[str] = None,
-                 sock_dir: Optional[str] = None,
-                 drain_console: bool = False,
-                 console_log: Optional[str] = None):
-        '''
-        Initialize a QEMUMachine
-
-        @param binary: path to the qemu binary
-        @param args: list of extra arguments
-        @param wrapper: list of arguments used as prefix to qemu binary
-        @param name: prefix for socket and log file names (default: qemu-PID)
-        @param test_dir: where to create socket and log file
-        @param monitor_address: address for QMP monitor
-        @param socket_scm_helper: helper program, required for send_fd_scm()
-        @param sock_dir: where to create socket (overrides test_dir for sock)
-        @param drain_console: (optional) True to drain console socket to buffer
-        @param console_log: (optional) path to console log file
-        @note: Qemu process is not started until launch() is used.
-        '''
-        # Direct user configuration
-
-        self._binary = binary
-        self._args = list(args)
-        self._wrapper = wrapper
-
-        self._name = name or "qemu-%d" % os.getpid()
-        self._test_dir = test_dir
-        self._sock_dir = sock_dir or self._test_dir
-        self._socket_scm_helper = socket_scm_helper
-
-        if monitor_address is not None:
-            self._monitor_address = monitor_address
-            self._remove_monitor_sockfile = False
-        else:
-            self._monitor_address = os.path.join(
-                self._sock_dir, f"{self._name}-monitor.sock"
-            )
-            self._remove_monitor_sockfile = True
-
-        self._console_log_path = console_log
-        if self._console_log_path:
-            # In order to log the console, buffering needs to be enabled.
-            self._drain_console = True
-        else:
-            self._drain_console = drain_console
-
-        # Runstate
-        self._qemu_log_path: Optional[str] = None
-        self._qemu_log_file: Optional[BinaryIO] = None
-        self._popen: Optional['subprocess.Popen[bytes]'] = None
-        self._events: List[QMPMessage] = []
-        self._iolog: Optional[str] = None
-        self._qmp_set = True   # Enable QMP monitor by default.
-        self._qmp_connection: Optional[qmp.QEMUMonitorProtocol] = None
-        self._qemu_full_args: Tuple[str, ...] = ()
-        self._temp_dir: Optional[str] = None
-        self._launched = False
-        self._machine: Optional[str] = None
-        self._console_index = 0
-        self._console_set = False
-        self._console_device_type: Optional[str] = None
-        self._console_address = os.path.join(
-            self._sock_dir, f"{self._name}-console.sock"
-        )
-        self._console_socket: Optional[socket.socket] = None
-        self._remove_files: List[str] = []
-        self._user_killed = False
-
-    def __enter__(self) -> 'QEMUMachine':
-        return self
-
-    def __exit__(self,
-                 exc_type: Optional[Type[BaseException]],
-                 exc_val: Optional[BaseException],
-                 exc_tb: Optional[TracebackType]) -> None:
-        self.shutdown()
-
-    def add_monitor_null(self) -> None:
-        """
-        This can be used to add an unused monitor instance.
-        """
-        self._args.append('-monitor')
-        self._args.append('null')
-
-    def add_fd(self, fd: int, fdset: int,
-               opaque: str, opts: str = '') -> 'QEMUMachine':
-        """
-        Pass a file descriptor to the VM
-        """
-        options = ['fd=%d' % fd,
-                   'set=%d' % fdset,
-                   'opaque=%s' % opaque]
-        if opts:
-            options.append(opts)
-
-        # This did not exist before 3.4, but since then it is
-        # mandatory for our purpose
-        if hasattr(os, 'set_inheritable'):
-            os.set_inheritable(fd, True)
-
-        self._args.append('-add-fd')
-        self._args.append(','.join(options))
-        return self
-
-    def send_fd_scm(self, fd: Optional[int] = None,
-                    file_path: Optional[str] = None) -> int:
-        """
-        Send an fd or file_path to socket_scm_helper.
-
-        Exactly one of fd and file_path must be given.
-        If it is file_path, the helper will open that file and pass its own fd.
-        """
-        # In iotest.py, the qmp should always use unix socket.
-        assert self._qmp.is_scm_available()
-        if self._socket_scm_helper is None:
-            raise QEMUMachineError("No path to socket_scm_helper set")
-        if not os.path.exists(self._socket_scm_helper):
-            raise QEMUMachineError("%s does not exist" %
-                                   self._socket_scm_helper)
-
-        # This did not exist before 3.4, but since then it is
-        # mandatory for our purpose
-        if hasattr(os, 'set_inheritable'):
-            os.set_inheritable(self._qmp.get_sock_fd(), True)
-            if fd is not None:
-                os.set_inheritable(fd, True)
-
-        fd_param = ["%s" % self._socket_scm_helper,
-                    "%d" % self._qmp.get_sock_fd()]
-
-        if file_path is not None:
-            assert fd is None
-            fd_param.append(file_path)
-        else:
-            assert fd is not None
-            fd_param.append(str(fd))
-
-        devnull = open(os.path.devnull, 'rb')
-        proc = subprocess.Popen(
-            fd_param, stdin=devnull, stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT, close_fds=False
-        )
-        output = proc.communicate()[0]
-        if output:
-            LOG.debug(output)
-
-        return proc.returncode
-
-    @staticmethod
-    def _remove_if_exists(path: str) -> None:
-        """
-        Remove file object at path if it exists
-        """
-        try:
-            os.remove(path)
-        except OSError as exception:
-            if exception.errno == errno.ENOENT:
-                return
-            raise
-
-    def is_running(self) -> bool:
-        """Returns true if the VM is running."""
-        return self._popen is not None and self._popen.poll() is None
-
-    @property
-    def _subp(self) -> 'subprocess.Popen[bytes]':
-        if self._popen is None:
-            raise QEMUMachineError('Subprocess pipe not present')
-        return self._popen
-
-    def exitcode(self) -> Optional[int]:
-        """Returns the exit code if possible, or None."""
-        if self._popen is None:
-            return None
-        return self._popen.poll()
-
-    def get_pid(self) -> Optional[int]:
-        """Returns the PID of the running process, or None."""
-        if not self.is_running():
-            return None
-        return self._subp.pid
-
-    def _load_io_log(self) -> None:
-        if self._qemu_log_path is not None:
-            with open(self._qemu_log_path, "r") as iolog:
-                self._iolog = iolog.read()
-
-    @property
-    def _base_args(self) -> List[str]:
-        args = ['-display', 'none', '-vga', 'none']
-
-        if self._qmp_set:
-            if isinstance(self._monitor_address, tuple):
-                moncdev = "socket,id=mon,host={},port={}".format(
-                    *self._monitor_address
-                )
-            else:
-                moncdev = f"socket,id=mon,path={self._monitor_address}"
-            args.extend(['-chardev', moncdev, '-mon',
-                         'chardev=mon,mode=control'])
-
-        if self._machine is not None:
-            args.extend(['-machine', self._machine])
-        for _ in range(self._console_index):
-            args.extend(['-serial', 'null'])
-        if self._console_set:
-            chardev = ('socket,id=console,path=%s,server=on,wait=off' %
-                       self._console_address)
-            args.extend(['-chardev', chardev])
-            if self._console_device_type is None:
-                args.extend(['-serial', 'chardev:console'])
-            else:
-                device = '%s,chardev=console' % self._console_device_type
-                args.extend(['-device', device])
-        return args
-
-    def _pre_launch(self) -> None:
-        self._temp_dir = tempfile.mkdtemp(prefix="qemu-machine-",
-                                          dir=self._test_dir)
-        self._qemu_log_path = os.path.join(self._temp_dir, self._name + ".log")
-        self._qemu_log_file = open(self._qemu_log_path, 'wb')
-
-        if self._console_set:
-            self._remove_files.append(self._console_address)
-
-        if self._qmp_set:
-            if self._remove_monitor_sockfile:
-                assert isinstance(self._monitor_address, str)
-                self._remove_files.append(self._monitor_address)
-            self._qmp_connection = qmp.QEMUMonitorProtocol(
-                self._monitor_address,
-                server=True,
-                nickname=self._name
-            )
-
-    def _post_launch(self) -> None:
-        if self._qmp_connection:
-            self._qmp.accept()
-
-    def _post_shutdown(self) -> None:
-        """
-        Called to cleanup the VM instance after the process has exited.
-        May also be called after a failed launch.
-        """
-        # Comprehensive reset for the failed launch case:
-        self._early_cleanup()
-
-        if self._qmp_connection:
-            self._qmp.close()
-            self._qmp_connection = None
-
-        if self._qemu_log_file is not None:
-            self._qemu_log_file.close()
-            self._qemu_log_file = None
-
-        self._load_io_log()
-
-        self._qemu_log_path = None
-
-        if self._temp_dir is not None:
-            shutil.rmtree(self._temp_dir)
-            self._temp_dir = None
-
-        while len(self._remove_files) > 0:
-            self._remove_if_exists(self._remove_files.pop())
-
-        exitcode = self.exitcode()
-        if (exitcode is not None and exitcode < 0
-                and not (self._user_killed and exitcode == -signal.SIGKILL)):
-            msg = 'qemu received signal %i; command: "%s"'
-            if self._qemu_full_args:
-                command = ' '.join(self._qemu_full_args)
-            else:
-                command = ''
-            LOG.warning(msg, -int(exitcode), command)
-
-        self._user_killed = False
-        self._launched = False
-
-    def launch(self) -> None:
-        """
-        Launch the VM and make sure we cleanup and expose the
-        command line/output in case of exception
-        """
-
-        if self._launched:
-            raise QEMUMachineError('VM already launched')
-
-        self._iolog = None
-        self._qemu_full_args = ()
-        try:
-            self._launch()
-            self._launched = True
-        except:
-            self._post_shutdown()
-
-            LOG.debug('Error launching VM')
-            if self._qemu_full_args:
-                LOG.debug('Command: %r', ' '.join(self._qemu_full_args))
-            if self._iolog:
-                LOG.debug('Output: %r', self._iolog)
-            raise
-
-    def _launch(self) -> None:
-        """
-        Launch the VM and establish a QMP connection
-        """
-        devnull = open(os.path.devnull, 'rb')
-        self._pre_launch()
-        self._qemu_full_args = tuple(
-            chain(self._wrapper,
-                  [self._binary],
-                  self._base_args,
-                  self._args)
-        )
-        LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
-        self._popen = subprocess.Popen(self._qemu_full_args,
-                                       stdin=devnull,
-                                       stdout=self._qemu_log_file,
-                                       stderr=subprocess.STDOUT,
-                                       shell=False,
-                                       close_fds=False)
-        self._post_launch()
-
-    def _early_cleanup(self) -> None:
-        """
-        Perform any cleanup that needs to happen before the VM exits.
-
-        May be invoked by both soft and hard shutdown in failover scenarios.
-        Called additionally by _post_shutdown for comprehensive cleanup.
-        """
-        # If we keep the console socket open, we may deadlock waiting
-        # for QEMU to exit, while QEMU is waiting for the socket to
-        # become writeable.
-        if self._console_socket is not None:
-            self._console_socket.close()
-            self._console_socket = None
-
-    def _hard_shutdown(self) -> None:
-        """
-        Perform early cleanup, kill the VM, and wait for it to terminate.
-
-        :raise subprocess.Timeout: When timeout is exceeds 60 seconds
-            waiting for the QEMU process to terminate.
-        """
-        self._early_cleanup()
-        self._subp.kill()
-        self._subp.wait(timeout=60)
-
-    def _soft_shutdown(self, timeout: Optional[int],
-                       has_quit: bool = False) -> None:
-        """
-        Perform early cleanup, attempt to gracefully shut down the VM, and wait
-        for it to terminate.
-
-        :param timeout: Timeout in seconds for graceful shutdown.
-                        A value of None is an infinite wait.
-        :param has_quit: When True, don't attempt to issue 'quit' QMP command
-
-        :raise ConnectionReset: On QMP communication errors
-        :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
-            the QEMU process to terminate.
-        """
-        self._early_cleanup()
-
-        if self._qmp_connection:
-            if not has_quit:
-                # Might raise ConnectionReset
-                self._qmp.cmd('quit')
-
-        # May raise subprocess.TimeoutExpired
-        self._subp.wait(timeout=timeout)
-
-    def _do_shutdown(self, timeout: Optional[int],
-                     has_quit: bool = False) -> None:
-        """
-        Attempt to shutdown the VM gracefully; fallback to a hard shutdown.
-
-        :param timeout: Timeout in seconds for graceful shutdown.
-                        A value of None is an infinite wait.
-        :param has_quit: When True, don't attempt to issue 'quit' QMP command
-
-        :raise AbnormalShutdown: When the VM could not be shut down gracefully.
-            The inner exception will likely be ConnectionReset or
-            subprocess.TimeoutExpired. In rare cases, non-graceful termination
-            may result in its own exceptions, likely subprocess.TimeoutExpired.
-        """
-        try:
-            self._soft_shutdown(timeout, has_quit)
-        except Exception as exc:
-            self._hard_shutdown()
-            raise AbnormalShutdown("Could not perform graceful shutdown") \
-                from exc
-
-    def shutdown(self, has_quit: bool = False,
-                 hard: bool = False,
-                 timeout: Optional[int] = 30) -> None:
-        """
-        Terminate the VM (gracefully if possible) and perform cleanup.
-        Cleanup will always be performed.
-
-        If the VM has not yet been launched, or shutdown(), wait(), or kill()
-        have already been called, this method does nothing.
-
-        :param has_quit: When true, do not attempt to issue 'quit' QMP command.
-        :param hard: When true, do not attempt graceful shutdown, and
-                     suppress the SIGKILL warning log message.
-        :param timeout: Optional timeout in seconds for graceful shutdown.
-                        Default 30 seconds, A `None` value is an infinite wait.
-        """
-        if not self._launched:
-            return
-
-        try:
-            if hard:
-                self._user_killed = True
-                self._hard_shutdown()
-            else:
-                self._do_shutdown(timeout, has_quit)
-        finally:
-            self._post_shutdown()
-
-    def kill(self) -> None:
-        """
-        Terminate the VM forcefully, wait for it to exit, and perform cleanup.
-        """
-        self.shutdown(hard=True)
-
-    def wait(self, timeout: Optional[int] = 30) -> None:
-        """
-        Wait for the VM to power off and perform post-shutdown cleanup.
-
-        :param timeout: Optional timeout in seconds. Default 30 seconds.
-                        A value of `None` is an infinite wait.
-        """
-        self.shutdown(has_quit=True, timeout=timeout)
-
-    def set_qmp_monitor(self, enabled: bool = True) -> None:
-        """
-        Set the QMP monitor.
-
-        @param enabled: if False, qmp monitor options will be removed from
-                        the base arguments of the resulting QEMU command
-                        line. Default is True.
-        @note: call this function before launch().
-        """
-        self._qmp_set = enabled
-
-    @property
-    def _qmp(self) -> qmp.QEMUMonitorProtocol:
-        if self._qmp_connection is None:
-            raise QEMUMachineError("Attempt to access QMP with no connection")
-        return self._qmp_connection
-
-    @classmethod
-    def _qmp_args(cls, _conv_keys: bool = True, **args: Any) -> Dict[str, Any]:
-        qmp_args = dict()
-        for key, value in args.items():
-            if _conv_keys:
-                qmp_args[key.replace('_', '-')] = value
-            else:
-                qmp_args[key] = value
-        return qmp_args
-
-    def qmp(self, cmd: str,
-            conv_keys: bool = True,
-            **args: Any) -> QMPMessage:
-        """
-        Invoke a QMP command and return the response dict
-        """
-        qmp_args = self._qmp_args(conv_keys, **args)
-        return self._qmp.cmd(cmd, args=qmp_args)
-
-    def command(self, cmd: str,
-                conv_keys: bool = True,
-                **args: Any) -> QMPReturnValue:
-        """
-        Invoke a QMP command.
-        On success return the response dict.
-        On failure raise an exception.
-        """
-        qmp_args = self._qmp_args(conv_keys, **args)
-        return self._qmp.command(cmd, **qmp_args)
-
-    def get_qmp_event(self, wait: bool = False) -> Optional[QMPMessage]:
-        """
-        Poll for one queued QMP events and return it
-        """
-        if self._events:
-            return self._events.pop(0)
-        return self._qmp.pull_event(wait=wait)
-
-    def get_qmp_events(self, wait: bool = False) -> List[QMPMessage]:
-        """
-        Poll for queued QMP events and return a list of dicts
-        """
-        events = self._qmp.get_events(wait=wait)
-        events.extend(self._events)
-        del self._events[:]
-        self._qmp.clear_events()
-        return events
-
-    @staticmethod
-    def event_match(event: Any, match: Optional[Any]) -> bool:
-        """
-        Check if an event matches optional match criteria.
-
-        The match criteria takes the form of a matching subdict. The event is
-        checked to be a superset of the subdict, recursively, with matching
-        values whenever the subdict values are not None.
-
-        This has a limitation that you cannot explicitly check for None values.
-
-        Examples, with the subdict queries on the left:
-         - None matches any object.
-         - {"foo": None} matches {"foo": {"bar": 1}}
-         - {"foo": None} matches {"foo": 5}
-         - {"foo": {"abc": None}} does not match {"foo": {"bar": 1}}
-         - {"foo": {"rab": 2}} matches {"foo": {"bar": 1, "rab": 2}}
-        """
-        if match is None:
-            return True
-
-        try:
-            for key in match:
-                if key in event:
-                    if not QEMUMachine.event_match(event[key], match[key]):
-                        return False
-                else:
-                    return False
-            return True
-        except TypeError:
-            # either match or event wasn't iterable (not a dict)
-            return bool(match == event)
-
-    def event_wait(self, name: str,
-                   timeout: float = 60.0,
-                   match: Optional[QMPMessage] = None) -> Optional[QMPMessage]:
-        """
-        event_wait waits for and returns a named event from QMP with a timeout.
-
-        name: The event to wait for.
-        timeout: QEMUMonitorProtocol.pull_event timeout parameter.
-        match: Optional match criteria. See event_match for details.
-        """
-        return self.events_wait([(name, match)], timeout)
-
-    def events_wait(self,
-                    events: Sequence[Tuple[str, Any]],
-                    timeout: float = 60.0) -> Optional[QMPMessage]:
-        """
-        events_wait waits for and returns a single named event from QMP.
-        In the case of multiple qualifying events, this function returns the
-        first one.
-
-        :param events: A sequence of (name, match_criteria) tuples.
-                       The match criteria are optional and may be None.
-                       See event_match for details.
-        :param timeout: Optional timeout, in seconds.
-                        See QEMUMonitorProtocol.pull_event.
-
-        :raise QMPTimeoutError: If timeout was non-zero and no matching events
-                                were found.
-        :return: A QMP event matching the filter criteria.
-                 If timeout was 0 and no event matched, None.
-        """
-        def _match(event: QMPMessage) -> bool:
-            for name, match in events:
-                if event['event'] == name and self.event_match(event, match):
-                    return True
-            return False
-
-        event: Optional[QMPMessage]
-
-        # Search cached events
-        for event in self._events:
-            if _match(event):
-                self._events.remove(event)
-                return event
-
-        # Poll for new events
-        while True:
-            event = self._qmp.pull_event(wait=timeout)
-            if event is None:
-                # NB: None is only returned when timeout is false-ish.
-                # Timeouts raise QMPTimeoutError instead!
-                break
-            if _match(event):
-                return event
-            self._events.append(event)
-
-        return None
-
-    def get_log(self) -> Optional[str]:
-        """
-        After self.shutdown or failed qemu execution, this returns the output
-        of the qemu process.
-        """
-        return self._iolog
-
-    def add_args(self, *args: str) -> None:
-        """
-        Adds to the list of extra arguments to be given to the QEMU binary
-        """
-        self._args.extend(args)
-
-    def set_machine(self, machine_type: str) -> None:
-        """
-        Sets the machine type
-
-        If set, the machine type will be added to the base arguments
-        of the resulting QEMU command line.
-        """
-        self._machine = machine_type
-
-    def set_console(self,
-                    device_type: Optional[str] = None,
-                    console_index: int = 0) -> None:
-        """
-        Sets the device type for a console device
-
-        If set, the console device and a backing character device will
-        be added to the base arguments of the resulting QEMU command
-        line.
-
-        This is a convenience method that will either use the provided
-        device type, or default to a "-serial chardev:console" command
-        line argument.
-
-        The actual setting of command line arguments will be be done at
-        machine launch time, as it depends on the temporary directory
-        to be created.
-
-        @param device_type: the device type, such as "isa-serial".  If
-                            None is given (the default value) a "-serial
-                            chardev:console" command line argument will
-                            be used instead, resorting to the machine's
-                            default device type.
-        @param console_index: the index of the console device to use.
-                              If not zero, the command line will create
-                              'index - 1' consoles and connect them to
-                              the 'null' backing character device.
-        """
-        self._console_set = True
-        self._console_device_type = device_type
-        self._console_index = console_index
-
-    @property
-    def console_socket(self) -> socket.socket:
-        """
-        Returns a socket connected to the console
-        """
-        if self._console_socket is None:
-            self._console_socket = console_socket.ConsoleSocket(
-                self._console_address,
-                file=self._console_log_path,
-                drain=self._drain_console)
-        return self._console_socket
diff --git a/python/qemu/machine/README.rst b/python/qemu/machine/README.rst
new file mode 100644
index 00000000000..8de2c3d7722
--- /dev/null
+++ b/python/qemu/machine/README.rst
@@ -0,0 +1,9 @@
+qemu.machine package
+====================
+
+This package provides core utilities used for testing and debugging
+QEMU. It is used by the iotests, vm tests, avocado tests, and several
+other utilities in the ./scripts directory. It is not a fully-fledged
+SDK and it is subject to change at any time.
+
+See the documentation in ``__init__.py`` for more information.
diff --git a/python/qemu/machine/__init__.py b/python/qemu/machine/__init__.py
new file mode 100644
index 00000000000..9ccd58ef147
--- /dev/null
+++ b/python/qemu/machine/__init__.py
@@ -0,0 +1,36 @@
+"""
+QEMU development and testing library.
+
+This library provides a few high-level classes for driving QEMU from a
+test suite, not intended for production use.
+
+ | QEMUQtestProtocol: send/receive qtest messages.
+ | QEMUMachine: Configure and Boot a QEMU VM
+ | +-- QEMUQtestMachine: VM class, with a qtest socket.
+
+"""
+
+# Copyright (C) 2020-2021 John Snow for Red Hat Inc.
+# Copyright (C) 2015-2016 Red Hat Inc.
+# Copyright (C) 2012 IBM Corp.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Fam Zheng <fam@euphon.net>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+# pylint: disable=import-error
+# see: https://github.com/PyCQA/pylint/issues/3624
+# see: https://github.com/PyCQA/pylint/issues/3651
+from .machine import QEMUMachine
+from .qtest import QEMUQtestMachine, QEMUQtestProtocol
+
+
+__all__ = (
+    'QEMUMachine',
+    'QEMUQtestProtocol',
+    'QEMUQtestMachine',
+)
diff --git a/python/qemu/console_socket.py b/python/qemu/machine/console_socket.py
similarity index 94%
rename from python/qemu/console_socket.py
rename to python/qemu/machine/console_socket.py
index ac21130e446..8c4ff598ad7 100644
--- a/python/qemu/console_socket.py
+++ b/python/qemu/machine/console_socket.py
@@ -39,6 +39,7 @@ def __init__(self, address: str, file: Optional[str] = None,
         self.connect(address)
         self._logfile = None
         if file:
+            # pylint: disable=consider-using-with
             self._logfile = open(file, "bw")
         self._open = True
         self._drain_thread = None
@@ -46,11 +47,11 @@ def __init__(self, address: str, file: Optional[str] = None,
             self._drain_thread = self._thread_start()
 
     def __repr__(self) -> str:
-        s = super().__repr__()
-        s = s.rstrip(">")
-        s = "%s,  logfile=%s, drain_thread=%s>" % (s, self._logfile,
-                                                   self._drain_thread)
-        return s
+        tmp = super().__repr__()
+        tmp = tmp.rstrip(">")
+        tmp = "%s,  logfile=%s, drain_thread=%s>" % (tmp, self._logfile,
+                                                     self._drain_thread)
+        return tmp
 
     def _drain_fn(self) -> None:
         """Drains the socket and runs while the socket is open."""
diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
new file mode 100644
index 00000000000..67ab06ca2b6
--- /dev/null
+++ b/python/qemu/machine/machine.py
@@ -0,0 +1,837 @@
+"""
+QEMU machine module:
+
+The machine module primarily provides the QEMUMachine class,
+which provides facilities for managing the lifetime of a QEMU VM.
+"""
+
+# Copyright (C) 2015-2016 Red Hat Inc.
+# Copyright (C) 2012 IBM Corp.
+#
+# Authors:
+#  Fam Zheng <famz@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Based on qmp.py.
+#
+
+import errno
+from itertools import chain
+import locale
+import logging
+import os
+import shutil
+import signal
+import socket
+import subprocess
+import tempfile
+from types import TracebackType
+from typing import (
+    Any,
+    BinaryIO,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+)
+
+from qemu.qmp import (  # pylint: disable=import-error
+    QMPMessage,
+    QMPReturnValue,
+    SocketAddrT,
+)
+
+from . import console_socket
+
+
+if os.environ.get('QEMU_PYTHON_LEGACY_QMP'):
+    from qemu.qmp import QEMUMonitorProtocol
+else:
+    from qemu.aqmp.legacy import QEMUMonitorProtocol
+
+
+LOG = logging.getLogger(__name__)
+
+
+class QEMUMachineError(Exception):
+    """
+    Exception called when an error in QEMUMachine happens.
+    """
+
+
+class QEMUMachineAddDeviceError(QEMUMachineError):
+    """
+    Exception raised when a request to add a device can not be fulfilled
+
+    The failures are caused by limitations, lack of information or conflicting
+    requests on the QEMUMachine methods.  This exception does not represent
+    failures reported by the QEMU binary itself.
+    """
+
+
+class AbnormalShutdown(QEMUMachineError):
+    """
+    Exception raised when a graceful shutdown was requested, but not performed.
+    """
+
+
+_T = TypeVar('_T', bound='QEMUMachine')
+
+
+class QEMUMachine:
+    """
+    A QEMU VM.
+
+    Use this object as a context manager to ensure
+    the QEMU process terminates::
+
+        with VM(binary) as vm:
+            ...
+        # vm is guaranteed to be shut down here
+    """
+    # pylint: disable=too-many-instance-attributes, too-many-public-methods
+
+    def __init__(self,
+                 binary: str,
+                 args: Sequence[str] = (),
+                 wrapper: Sequence[str] = (),
+                 name: Optional[str] = None,
+                 base_temp_dir: str = "/var/tmp",
+                 monitor_address: Optional[SocketAddrT] = None,
+                 sock_dir: Optional[str] = None,
+                 drain_console: bool = False,
+                 console_log: Optional[str] = None,
+                 log_dir: Optional[str] = None,
+                 qmp_timer: Optional[float] = None):
+        '''
+        Initialize a QEMUMachine
+
+        @param binary: path to the qemu binary
+        @param args: list of extra arguments
+        @param wrapper: list of arguments used as prefix to qemu binary
+        @param name: prefix for socket and log file names (default: qemu-PID)
+        @param base_temp_dir: default location where temp files are created
+        @param monitor_address: address for QMP monitor
+        @param sock_dir: where to create socket (defaults to base_temp_dir)
+        @param drain_console: (optional) True to drain console socket to buffer
+        @param console_log: (optional) path to console log file
+        @param log_dir: where to create and keep log files
+        @param qmp_timer: (optional) default QMP socket timeout
+        @note: Qemu process is not started until launch() is used.
+        '''
+        # pylint: disable=too-many-arguments
+
+        # Direct user configuration
+
+        self._binary = binary
+        self._args = list(args)
+        self._wrapper = wrapper
+        self._qmp_timer = qmp_timer
+
+        self._name = name or f"qemu-{os.getpid()}-{id(self):02x}"
+        self._temp_dir: Optional[str] = None
+        self._base_temp_dir = base_temp_dir
+        self._sock_dir = sock_dir
+        self._log_dir = log_dir
+
+        if monitor_address is not None:
+            self._monitor_address = monitor_address
+        else:
+            self._monitor_address = os.path.join(
+                self.sock_dir, f"{self._name}-monitor.sock"
+            )
+
+        self._console_log_path = console_log
+        if self._console_log_path:
+            # In order to log the console, buffering needs to be enabled.
+            self._drain_console = True
+        else:
+            self._drain_console = drain_console
+
+        # Runstate
+        self._qemu_log_path: Optional[str] = None
+        self._qemu_log_file: Optional[BinaryIO] = None
+        self._popen: Optional['subprocess.Popen[bytes]'] = None
+        self._events: List[QMPMessage] = []
+        self._iolog: Optional[str] = None
+        self._qmp_set = True   # Enable QMP monitor by default.
+        self._qmp_connection: Optional[QEMUMonitorProtocol] = None
+        self._qemu_full_args: Tuple[str, ...] = ()
+        self._launched = False
+        self._machine: Optional[str] = None
+        self._console_index = 0
+        self._console_set = False
+        self._console_device_type: Optional[str] = None
+        self._console_address = os.path.join(
+            self.sock_dir, f"{self._name}-console.sock"
+        )
+        self._console_socket: Optional[socket.socket] = None
+        self._remove_files: List[str] = []
+        self._user_killed = False
+        self._quit_issued = False
+
+    def __enter__(self: _T) -> _T:
+        return self
+
+    def __exit__(self,
+                 exc_type: Optional[Type[BaseException]],
+                 exc_val: Optional[BaseException],
+                 exc_tb: Optional[TracebackType]) -> None:
+        self.shutdown()
+
+    def add_monitor_null(self) -> None:
+        """
+        This can be used to add an unused monitor instance.
+        """
+        self._args.append('-monitor')
+        self._args.append('null')
+
+    def add_fd(self: _T, fd: int, fdset: int,
+               opaque: str, opts: str = '') -> _T:
+        """
+        Pass a file descriptor to the VM
+        """
+        options = ['fd=%d' % fd,
+                   'set=%d' % fdset,
+                   'opaque=%s' % opaque]
+        if opts:
+            options.append(opts)
+
+        # This did not exist before 3.4, but since then it is
+        # mandatory for our purpose
+        if hasattr(os, 'set_inheritable'):
+            os.set_inheritable(fd, True)
+
+        self._args.append('-add-fd')
+        self._args.append(','.join(options))
+        return self
+
+    def send_fd_scm(self, fd: Optional[int] = None,
+                    file_path: Optional[str] = None) -> int:
+        """
+        Send an fd or file_path to the remote via SCM_RIGHTS.
+
+        Exactly one of fd and file_path must be given.  If it is
+        file_path, the file will be opened read-only and the new file
+        descriptor will be sent to the remote.
+        """
+        if file_path is not None:
+            assert fd is None
+            with open(file_path, "rb") as passfile:
+                fd = passfile.fileno()
+                self._qmp.send_fd_scm(fd)
+        else:
+            assert fd is not None
+            self._qmp.send_fd_scm(fd)
+
+        return 0
+
+    @staticmethod
+    def _remove_if_exists(path: str) -> None:
+        """
+        Remove file object at path if it exists
+        """
+        try:
+            os.remove(path)
+        except OSError as exception:
+            if exception.errno == errno.ENOENT:
+                return
+            raise
+
+    def is_running(self) -> bool:
+        """Returns true if the VM is running."""
+        return self._popen is not None and self._popen.poll() is None
+
+    @property
+    def _subp(self) -> 'subprocess.Popen[bytes]':
+        if self._popen is None:
+            raise QEMUMachineError('Subprocess pipe not present')
+        return self._popen
+
+    def exitcode(self) -> Optional[int]:
+        """Returns the exit code if possible, or None."""
+        if self._popen is None:
+            return None
+        return self._popen.poll()
+
+    def get_pid(self) -> Optional[int]:
+        """Returns the PID of the running process, or None."""
+        if not self.is_running():
+            return None
+        return self._subp.pid
+
+    def _load_io_log(self) -> None:
+        # Assume that the output encoding of QEMU's terminal output is
+        # defined by our locale. If indeterminate, allow open() to fall
+        # back to the platform default.
+        _, encoding = locale.getlocale()
+        if self._qemu_log_path is not None:
+            with open(self._qemu_log_path, "r", encoding=encoding) as iolog:
+                self._iolog = iolog.read()
+
+    @property
+    def _base_args(self) -> List[str]:
+        args = ['-display', 'none', '-vga', 'none']
+
+        if self._qmp_set:
+            if isinstance(self._monitor_address, tuple):
+                moncdev = "socket,id=mon,host={},port={}".format(
+                    *self._monitor_address
+                )
+            else:
+                moncdev = f"socket,id=mon,path={self._monitor_address}"
+            args.extend(['-chardev', moncdev, '-mon',
+                         'chardev=mon,mode=control'])
+
+        if self._machine is not None:
+            args.extend(['-machine', self._machine])
+        for _ in range(self._console_index):
+            args.extend(['-serial', 'null'])
+        if self._console_set:
+            chardev = ('socket,id=console,path=%s,server=on,wait=off' %
+                       self._console_address)
+            args.extend(['-chardev', chardev])
+            if self._console_device_type is None:
+                args.extend(['-serial', 'chardev:console'])
+            else:
+                device = '%s,chardev=console' % self._console_device_type
+                args.extend(['-device', device])
+        return args
+
+    @property
+    def args(self) -> List[str]:
+        """Returns the list of arguments given to the QEMU binary."""
+        return self._args
+
+    def _pre_launch(self) -> None:
+        if self._console_set:
+            self._remove_files.append(self._console_address)
+
+        if self._qmp_set:
+            if isinstance(self._monitor_address, str):
+                self._remove_files.append(self._monitor_address)
+            self._qmp_connection = QEMUMonitorProtocol(
+                self._monitor_address,
+                server=True,
+                nickname=self._name
+            )
+
+        # NOTE: Make sure any opened resources are *definitely* freed in
+        # _post_shutdown()!
+        # pylint: disable=consider-using-with
+        self._qemu_log_path = os.path.join(self.log_dir, self._name + ".log")
+        self._qemu_log_file = open(self._qemu_log_path, 'wb')
+
+        self._iolog = None
+        self._qemu_full_args = tuple(chain(
+            self._wrapper,
+            [self._binary],
+            self._base_args,
+            self._args
+        ))
+
+    def _post_launch(self) -> None:
+        if self._qmp_connection:
+            self._qmp.accept(self._qmp_timer)
+
+    def _close_qemu_log_file(self) -> None:
+        if self._qemu_log_file is not None:
+            self._qemu_log_file.close()
+            self._qemu_log_file = None
+
+    def _post_shutdown(self) -> None:
+        """
+        Called to cleanup the VM instance after the process has exited.
+        May also be called after a failed launch.
+        """
+        try:
+            self._close_qmp_connection()
+        except Exception as err:  # pylint: disable=broad-except
+            LOG.warning(
+                "Exception closing QMP connection: %s",
+                str(err) if str(err) else type(err).__name__
+            )
+        finally:
+            assert self._qmp_connection is None
+
+        self._close_qemu_log_file()
+
+        self._load_io_log()
+
+        self._qemu_log_path = None
+
+        if self._temp_dir is not None:
+            shutil.rmtree(self._temp_dir)
+            self._temp_dir = None
+
+        while len(self._remove_files) > 0:
+            self._remove_if_exists(self._remove_files.pop())
+
+        exitcode = self.exitcode()
+        if (exitcode is not None and exitcode < 0
+                and not (self._user_killed and exitcode == -signal.SIGKILL)):
+            msg = 'qemu received signal %i; command: "%s"'
+            if self._qemu_full_args:
+                command = ' '.join(self._qemu_full_args)
+            else:
+                command = ''
+            LOG.warning(msg, -int(exitcode), command)
+
+        self._quit_issued = False
+        self._user_killed = False
+        self._launched = False
+
+    def launch(self) -> None:
+        """
+        Launch the VM and make sure we cleanup and expose the
+        command line/output in case of exception
+        """
+
+        if self._launched:
+            raise QEMUMachineError('VM already launched')
+
+        try:
+            self._launch()
+        except:
+            # We may have launched the process but it may
+            # have exited before we could connect via QMP.
+            # Assume the VM didn't launch or is exiting.
+            # If we don't wait for the process, exitcode() may still be
+            # 'None' by the time control is ceded back to the caller.
+            if self._launched:
+                self.wait()
+            else:
+                self._post_shutdown()
+
+            LOG.debug('Error launching VM')
+            if self._qemu_full_args:
+                LOG.debug('Command: %r', ' '.join(self._qemu_full_args))
+            if self._iolog:
+                LOG.debug('Output: %r', self._iolog)
+            raise
+
+    def _launch(self) -> None:
+        """
+        Launch the VM and establish a QMP connection
+        """
+        self._pre_launch()
+        LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
+
+        # Cleaning up of this subprocess is guaranteed by _do_shutdown.
+        # pylint: disable=consider-using-with
+        self._popen = subprocess.Popen(self._qemu_full_args,
+                                       stdin=subprocess.DEVNULL,
+                                       stdout=self._qemu_log_file,
+                                       stderr=subprocess.STDOUT,
+                                       shell=False,
+                                       close_fds=False)
+        self._launched = True
+        self._post_launch()
+
+    def _close_qmp_connection(self) -> None:
+        """
+        Close the underlying QMP connection, if any.
+
+        Dutifully report errors that occurred while closing, but assume
+        that any error encountered indicates an abnormal termination
+        process and not a failure to close.
+        """
+        if self._qmp_connection is None:
+            return
+
+        try:
+            self._qmp.close()
+        except EOFError:
+            # EOF can occur as an Exception here when using the Async
+            # QMP backend. It indicates that the server closed the
+            # stream. If we successfully issued 'quit' at any point,
+            # then this was expected. If the remote went away without
+            # our permission, it's worth reporting that as an abnormal
+            # shutdown case.
+            if not (self._user_killed or self._quit_issued):
+                raise
+        finally:
+            self._qmp_connection = None
+
+    def _early_cleanup(self) -> None:
+        """
+        Perform any cleanup that needs to happen before the VM exits.
+
+        This method may be called twice upon shutdown, once each by soft
+        and hard shutdown in failover scenarios.
+        """
+        # If we keep the console socket open, we may deadlock waiting
+        # for QEMU to exit, while QEMU is waiting for the socket to
+        # become writeable.
+        if self._console_socket is not None:
+            self._console_socket.close()
+            self._console_socket = None
+
+    def _hard_shutdown(self) -> None:
+        """
+        Perform early cleanup, kill the VM, and wait for it to terminate.
+
+        :raise subprocess.Timeout: When timeout is exceeds 60 seconds
+            waiting for the QEMU process to terminate.
+        """
+        self._early_cleanup()
+        self._subp.kill()
+        self._subp.wait(timeout=60)
+
+    def _soft_shutdown(self, timeout: Optional[int]) -> None:
+        """
+        Perform early cleanup, attempt to gracefully shut down the VM, and wait
+        for it to terminate.
+
+        :param timeout: Timeout in seconds for graceful shutdown.
+                        A value of None is an infinite wait.
+
+        :raise ConnectionReset: On QMP communication errors
+        :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
+            the QEMU process to terminate.
+        """
+        self._early_cleanup()
+
+        if self._qmp_connection:
+            try:
+                if not self._quit_issued:
+                    # May raise ExecInterruptedError or StateError if the
+                    # connection dies or has *already* died.
+                    self.qmp('quit')
+            finally:
+                # Regardless, we want to quiesce the connection.
+                self._close_qmp_connection()
+
+        # May raise subprocess.TimeoutExpired
+        self._subp.wait(timeout=timeout)
+
+    def _do_shutdown(self, timeout: Optional[int]) -> None:
+        """
+        Attempt to shutdown the VM gracefully; fallback to a hard shutdown.
+
+        :param timeout: Timeout in seconds for graceful shutdown.
+                        A value of None is an infinite wait.
+
+        :raise AbnormalShutdown: When the VM could not be shut down gracefully.
+            The inner exception will likely be ConnectionReset or
+            subprocess.TimeoutExpired. In rare cases, non-graceful termination
+            may result in its own exceptions, likely subprocess.TimeoutExpired.
+        """
+        try:
+            self._soft_shutdown(timeout)
+        except Exception as exc:
+            self._hard_shutdown()
+            raise AbnormalShutdown("Could not perform graceful shutdown") \
+                from exc
+
+    def shutdown(self,
+                 hard: bool = False,
+                 timeout: Optional[int] = 30) -> None:
+        """
+        Terminate the VM (gracefully if possible) and perform cleanup.
+        Cleanup will always be performed.
+
+        If the VM has not yet been launched, or shutdown(), wait(), or kill()
+        have already been called, this method does nothing.
+
+        :param hard: When true, do not attempt graceful shutdown, and
+                     suppress the SIGKILL warning log message.
+        :param timeout: Optional timeout in seconds for graceful shutdown.
+                        Default 30 seconds, A `None` value is an infinite wait.
+        """
+        if not self._launched:
+            return
+
+        try:
+            if hard:
+                self._user_killed = True
+                self._hard_shutdown()
+            else:
+                self._do_shutdown(timeout)
+        finally:
+            self._post_shutdown()
+
+    def kill(self) -> None:
+        """
+        Terminate the VM forcefully, wait for it to exit, and perform cleanup.
+        """
+        self.shutdown(hard=True)
+
+    def wait(self, timeout: Optional[int] = 30) -> None:
+        """
+        Wait for the VM to power off and perform post-shutdown cleanup.
+
+        :param timeout: Optional timeout in seconds. Default 30 seconds.
+                        A value of `None` is an infinite wait.
+        """
+        self._quit_issued = True
+        self.shutdown(timeout=timeout)
+
+    def set_qmp_monitor(self, enabled: bool = True) -> None:
+        """
+        Set the QMP monitor.
+
+        @param enabled: if False, qmp monitor options will be removed from
+                        the base arguments of the resulting QEMU command
+                        line. Default is True.
+
+        .. note:: Call this function before launch().
+        """
+        self._qmp_set = enabled
+
+    @property
+    def _qmp(self) -> QEMUMonitorProtocol:
+        if self._qmp_connection is None:
+            raise QEMUMachineError("Attempt to access QMP with no connection")
+        return self._qmp_connection
+
+    @classmethod
+    def _qmp_args(cls, conv_keys: bool,
+                  args: Dict[str, Any]) -> Dict[str, object]:
+        if conv_keys:
+            return {k.replace('_', '-'): v for k, v in args.items()}
+
+        return args
+
+    def qmp(self, cmd: str,
+            args_dict: Optional[Dict[str, object]] = None,
+            conv_keys: Optional[bool] = None,
+            **args: Any) -> QMPMessage:
+        """
+        Invoke a QMP command and return the response dict
+        """
+        if args_dict is not None:
+            assert not args
+            assert conv_keys is None
+            args = args_dict
+            conv_keys = False
+
+        if conv_keys is None:
+            conv_keys = True
+
+        qmp_args = self._qmp_args(conv_keys, args)
+        ret = self._qmp.cmd(cmd, args=qmp_args)
+        if cmd == 'quit' and 'error' not in ret and 'return' in ret:
+            self._quit_issued = True
+        return ret
+
+    def command(self, cmd: str,
+                conv_keys: bool = True,
+                **args: Any) -> QMPReturnValue:
+        """
+        Invoke a QMP command.
+        On success return the response dict.
+        On failure raise an exception.
+        """
+        qmp_args = self._qmp_args(conv_keys, args)
+        ret = self._qmp.command(cmd, **qmp_args)
+        if cmd == 'quit':
+            self._quit_issued = True
+        return ret
+
+    def get_qmp_event(self, wait: bool = False) -> Optional[QMPMessage]:
+        """
+        Poll for one queued QMP events and return it
+        """
+        if self._events:
+            return self._events.pop(0)
+        return self._qmp.pull_event(wait=wait)
+
+    def get_qmp_events(self, wait: bool = False) -> List[QMPMessage]:
+        """
+        Poll for queued QMP events and return a list of dicts
+        """
+        events = self._qmp.get_events(wait=wait)
+        events.extend(self._events)
+        del self._events[:]
+        return events
+
+    @staticmethod
+    def event_match(event: Any, match: Optional[Any]) -> bool:
+        """
+        Check if an event matches optional match criteria.
+
+        The match criteria takes the form of a matching subdict. The event is
+        checked to be a superset of the subdict, recursively, with matching
+        values whenever the subdict values are not None.
+
+        This has a limitation that you cannot explicitly check for None values.
+
+        Examples, with the subdict queries on the left:
+         - None matches any object.
+         - {"foo": None} matches {"foo": {"bar": 1}}
+         - {"foo": None} matches {"foo": 5}
+         - {"foo": {"abc": None}} does not match {"foo": {"bar": 1}}
+         - {"foo": {"rab": 2}} matches {"foo": {"bar": 1, "rab": 2}}
+        """
+        if match is None:
+            return True
+
+        try:
+            for key in match:
+                if key in event:
+                    if not QEMUMachine.event_match(event[key], match[key]):
+                        return False
+                else:
+                    return False
+            return True
+        except TypeError:
+            # either match or event wasn't iterable (not a dict)
+            return bool(match == event)
+
+    def event_wait(self, name: str,
+                   timeout: float = 60.0,
+                   match: Optional[QMPMessage] = None) -> Optional[QMPMessage]:
+        """
+        event_wait waits for and returns a named event from QMP with a timeout.
+
+        name: The event to wait for.
+        timeout: QEMUMonitorProtocol.pull_event timeout parameter.
+        match: Optional match criteria. See event_match for details.
+        """
+        return self.events_wait([(name, match)], timeout)
+
+    def events_wait(self,
+                    events: Sequence[Tuple[str, Any]],
+                    timeout: float = 60.0) -> Optional[QMPMessage]:
+        """
+        events_wait waits for and returns a single named event from QMP.
+        In the case of multiple qualifying events, this function returns the
+        first one.
+
+        :param events: A sequence of (name, match_criteria) tuples.
+                       The match criteria are optional and may be None.
+                       See event_match for details.
+        :param timeout: Optional timeout, in seconds.
+                        See QEMUMonitorProtocol.pull_event.
+
+        :raise QMPTimeoutError: If timeout was non-zero and no matching events
+                                were found.
+        :return: A QMP event matching the filter criteria.
+                 If timeout was 0 and no event matched, None.
+        """
+        def _match(event: QMPMessage) -> bool:
+            for name, match in events:
+                if event['event'] == name and self.event_match(event, match):
+                    return True
+            return False
+
+        event: Optional[QMPMessage]
+
+        # Search cached events
+        for event in self._events:
+            if _match(event):
+                self._events.remove(event)
+                return event
+
+        # Poll for new events
+        while True:
+            event = self._qmp.pull_event(wait=timeout)
+            if event is None:
+                # NB: None is only returned when timeout is false-ish.
+                # Timeouts raise QMPTimeoutError instead!
+                break
+            if _match(event):
+                return event
+            self._events.append(event)
+
+        return None
+
+    def get_log(self) -> Optional[str]:
+        """
+        After self.shutdown or failed qemu execution, this returns the output
+        of the qemu process.
+        """
+        return self._iolog
+
+    def add_args(self, *args: str) -> None:
+        """
+        Adds to the list of extra arguments to be given to the QEMU binary
+        """
+        self._args.extend(args)
+
+    def set_machine(self, machine_type: str) -> None:
+        """
+        Sets the machine type
+
+        If set, the machine type will be added to the base arguments
+        of the resulting QEMU command line.
+        """
+        self._machine = machine_type
+
+    def set_console(self,
+                    device_type: Optional[str] = None,
+                    console_index: int = 0) -> None:
+        """
+        Sets the device type for a console device
+
+        If set, the console device and a backing character device will
+        be added to the base arguments of the resulting QEMU command
+        line.
+
+        This is a convenience method that will either use the provided
+        device type, or default to a "-serial chardev:console" command
+        line argument.
+
+        The actual setting of command line arguments will be be done at
+        machine launch time, as it depends on the temporary directory
+        to be created.
+
+        @param device_type: the device type, such as "isa-serial".  If
+                            None is given (the default value) a "-serial
+                            chardev:console" command line argument will
+                            be used instead, resorting to the machine's
+                            default device type.
+        @param console_index: the index of the console device to use.
+                              If not zero, the command line will create
+                              'index - 1' consoles and connect them to
+                              the 'null' backing character device.
+        """
+        self._console_set = True
+        self._console_device_type = device_type
+        self._console_index = console_index
+
+    @property
+    def console_socket(self) -> socket.socket:
+        """
+        Returns a socket connected to the console
+        """
+        if self._console_socket is None:
+            self._console_socket = console_socket.ConsoleSocket(
+                self._console_address,
+                file=self._console_log_path,
+                drain=self._drain_console)
+        return self._console_socket
+
+    @property
+    def temp_dir(self) -> str:
+        """
+        Returns a temporary directory to be used for this machine
+        """
+        if self._temp_dir is None:
+            self._temp_dir = tempfile.mkdtemp(prefix="qemu-machine-",
+                                              dir=self._base_temp_dir)
+        return self._temp_dir
+
+    @property
+    def sock_dir(self) -> str:
+        """
+        Returns the directory used for sockfiles by this machine.
+        """
+        if self._sock_dir:
+            return self._sock_dir
+        return self.temp_dir
+
+    @property
+    def log_dir(self) -> str:
+        """
+        Returns a directory to be used for writing logs
+        """
+        if self._log_dir is None:
+            return self.temp_dir
+        return self._log_dir
diff --git a/tests/qapi-schema/flat-union-bad-base.out b/python/qemu/machine/py.typed
similarity index 100%
rename from tests/qapi-schema/flat-union-bad-base.out
rename to python/qemu/machine/py.typed
diff --git a/python/qemu/qtest.py b/python/qemu/machine/qtest.py
similarity index 88%
rename from python/qemu/qtest.py
rename to python/qemu/machine/qtest.py
index 39a0cf62fe9..f2f9aaa5e50 100644
--- a/python/qemu/qtest.py
+++ b/python/qemu/machine/qtest.py
@@ -26,8 +26,9 @@
     TextIO,
 )
 
+from qemu.qmp import SocketAddrT  # pylint: disable=import-error
+
 from .machine import QEMUMachine
-from .qmp import SocketAddrT
 
 
 class QEMUQtestProtocol:
@@ -111,17 +112,20 @@ class QEMUQtestMachine(QEMUMachine):
     def __init__(self,
                  binary: str,
                  args: Sequence[str] = (),
+                 wrapper: Sequence[str] = (),
                  name: Optional[str] = None,
-                 test_dir: str = "/var/tmp",
-                 socket_scm_helper: Optional[str] = None,
-                 sock_dir: Optional[str] = None):
+                 base_temp_dir: str = "/var/tmp",
+                 sock_dir: Optional[str] = None,
+                 qmp_timer: Optional[float] = None):
+        # pylint: disable=too-many-arguments
+
         if name is None:
             name = "qemu-%d" % os.getpid()
         if sock_dir is None:
-            sock_dir = test_dir
-        super().__init__(binary, args, name=name, test_dir=test_dir,
-                         socket_scm_helper=socket_scm_helper,
-                         sock_dir=sock_dir)
+            sock_dir = base_temp_dir
+        super().__init__(binary, args, wrapper=wrapper, name=name,
+                         base_temp_dir=base_temp_dir,
+                         sock_dir=sock_dir, qmp_timer=qmp_timer)
         self._qtest: Optional[QEMUQtestProtocol] = None
         self._qtest_path = os.path.join(sock_dir, name + "-qtest.sock")
 
diff --git a/python/qemu/pylintrc b/python/qemu/pylintrc
deleted file mode 100644
index 3f69205000d..00000000000
--- a/python/qemu/pylintrc
+++ /dev/null
@@ -1,58 +0,0 @@
-[MASTER]
-
-[MESSAGES CONTROL]
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use "--disable=all --enable=classes
-# --disable=W".
-disable=too-many-arguments,
-        too-many-instance-attributes,
-        too-many-public-methods,
-
-[REPORTS]
-
-[REFACTORING]
-
-[MISCELLANEOUS]
-
-[LOGGING]
-
-[BASIC]
-
-# Good variable names which should always be accepted, separated by a comma.
-good-names=i,
-           j,
-           k,
-           ex,
-           Run,
-           _,
-           fd,
-           c,
-[VARIABLES]
-
-[STRING]
-
-[SPELLING]
-
-[FORMAT]
-
-[SIMILARITIES]
-
-# Ignore imports when computing similarities.
-ignore-imports=yes
-
-[TYPECHECK]
-
-[CLASSES]
-
-[IMPORTS]
-
-[DESIGN]
-
-[EXCEPTIONS]
diff --git a/python/qemu/qmp.py b/python/qemu/qmp.py
deleted file mode 100644
index 2cd4d43036c..00000000000
--- a/python/qemu/qmp.py
+++ /dev/null
@@ -1,375 +0,0 @@
-""" QEMU Monitor Protocol Python class """
-# Copyright (C) 2009, 2010 Red Hat Inc.
-#
-# Authors:
-#  Luiz Capitulino <lcapitulino@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-
-import errno
-import json
-import logging
-import socket
-from types import TracebackType
-from typing import (
-    Any,
-    Dict,
-    List,
-    Optional,
-    TextIO,
-    Tuple,
-    Type,
-    Union,
-    cast,
-)
-
-
-# QMPMessage is a QMP Message of any kind.
-# e.g. {'yee': 'haw'}
-#
-# QMPReturnValue is the inner value of return values only.
-# {'return': {}} is the QMPMessage,
-# {} is the QMPReturnValue.
-QMPMessage = Dict[str, Any]
-QMPReturnValue = Dict[str, Any]
-
-InternetAddrT = Tuple[str, str]
-UnixAddrT = str
-SocketAddrT = Union[InternetAddrT, UnixAddrT]
-
-
-class QMPError(Exception):
-    """
-    QMP base exception
-    """
-
-
-class QMPConnectError(QMPError):
-    """
-    QMP connection exception
-    """
-
-
-class QMPCapabilitiesError(QMPError):
-    """
-    QMP negotiate capabilities exception
-    """
-
-
-class QMPTimeoutError(QMPError):
-    """
-    QMP timeout exception
-    """
-
-
-class QMPProtocolError(QMPError):
-    """
-    QMP protocol error; unexpected response
-    """
-
-
-class QMPResponseError(QMPError):
-    """
-    Represents erroneous QMP monitor reply
-    """
-    def __init__(self, reply: QMPMessage):
-        try:
-            desc = reply['error']['desc']
-        except KeyError:
-            desc = reply
-        super().__init__(desc)
-        self.reply = reply
-
-
-class QEMUMonitorProtocol:
-    """
-    Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP) and then
-    allow to handle commands and events.
-    """
-
-    #: Logger object for debugging messages
-    logger = logging.getLogger('QMP')
-
-    def __init__(self, address: SocketAddrT,
-                 server: bool = False,
-                 nickname: Optional[str] = None):
-        """
-        Create a QEMUMonitorProtocol class.
-
-        @param address: QEMU address, can be either a unix socket path (string)
-                        or a tuple in the form ( address, port ) for a TCP
-                        connection
-        @param server: server mode listens on the socket (bool)
-        @raise OSError on socket connection errors
-        @note No connection is established, this is done by the connect() or
-              accept() methods
-        """
-        self.__events: List[QMPMessage] = []
-        self.__address = address
-        self.__sock = self.__get_sock()
-        self.__sockfile: Optional[TextIO] = None
-        self._nickname = nickname
-        if self._nickname:
-            self.logger = logging.getLogger('QMP').getChild(self._nickname)
-        if server:
-            self.__sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            self.__sock.bind(self.__address)
-            self.__sock.listen(1)
-
-    def __get_sock(self) -> socket.socket:
-        if isinstance(self.__address, tuple):
-            family = socket.AF_INET
-        else:
-            family = socket.AF_UNIX
-        return socket.socket(family, socket.SOCK_STREAM)
-
-    def __negotiate_capabilities(self) -> QMPMessage:
-        greeting = self.__json_read()
-        if greeting is None or "QMP" not in greeting:
-            raise QMPConnectError
-        # Greeting seems ok, negotiate capabilities
-        resp = self.cmd('qmp_capabilities')
-        if resp and "return" in resp:
-            return greeting
-        raise QMPCapabilitiesError
-
-    def __json_read(self, only_event: bool = False) -> Optional[QMPMessage]:
-        assert self.__sockfile is not None
-        while True:
-            data = self.__sockfile.readline()
-            if not data:
-                return None
-            # By definition, any JSON received from QMP is a QMPMessage,
-            # and we are asserting only at static analysis time that it
-            # has a particular shape.
-            resp: QMPMessage = json.loads(data)
-            if 'event' in resp:
-                self.logger.debug("<<< %s", resp)
-                self.__events.append(resp)
-                if not only_event:
-                    continue
-            return resp
-
-    def __get_events(self, wait: Union[bool, float] = False) -> None:
-        """
-        Check for new events in the stream and cache them in __events.
-
-        @param wait (bool): block until an event is available.
-        @param wait (float): If wait is a float, treat it as a timeout value.
-
-        @raise QMPTimeoutError: If a timeout float is provided and the timeout
-                                period elapses.
-        @raise QMPConnectError: If wait is True but no events could be
-                                retrieved or if some other error occurred.
-        """
-
-        # Current timeout and blocking status
-        current_timeout = self.__sock.gettimeout()
-
-        # Check for new events regardless and pull them into the cache:
-        self.__sock.settimeout(0)  # i.e. setblocking(False)
-        try:
-            self.__json_read()
-        except OSError as err:
-            # EAGAIN: No data available; not critical
-            if err.errno != errno.EAGAIN:
-                raise
-        finally:
-            self.__sock.settimeout(current_timeout)
-
-        # Wait for new events, if needed.
-        # if wait is 0.0, this means "no wait" and is also implicitly false.
-        if not self.__events and wait:
-            if isinstance(wait, float):
-                self.__sock.settimeout(wait)
-            try:
-                ret = self.__json_read(only_event=True)
-            except socket.timeout as err:
-                raise QMPTimeoutError("Timeout waiting for event") from err
-            except Exception as err:
-                msg = "Error while reading from socket"
-                raise QMPConnectError(msg) from err
-            finally:
-                self.__sock.settimeout(current_timeout)
-
-            if ret is None:
-                raise QMPConnectError("Error while reading from socket")
-
-    def __enter__(self) -> 'QEMUMonitorProtocol':
-        # Implement context manager enter function.
-        return self
-
-    def __exit__(self,
-                 # pylint: disable=duplicate-code
-                 # see https://github.com/PyCQA/pylint/issues/3619
-                 exc_type: Optional[Type[BaseException]],
-                 exc_val: Optional[BaseException],
-                 exc_tb: Optional[TracebackType]) -> None:
-        # Implement context manager exit function.
-        self.close()
-
-    def connect(self, negotiate: bool = True) -> Optional[QMPMessage]:
-        """
-        Connect to the QMP Monitor and perform capabilities negotiation.
-
-        @return QMP greeting dict, or None if negotiate is false
-        @raise OSError on socket connection errors
-        @raise QMPConnectError if the greeting is not received
-        @raise QMPCapabilitiesError if fails to negotiate capabilities
-        """
-        self.__sock.connect(self.__address)
-        self.__sockfile = self.__sock.makefile(mode='r')
-        if negotiate:
-            return self.__negotiate_capabilities()
-        return None
-
-    def accept(self, timeout: Optional[float] = 15.0) -> QMPMessage:
-        """
-        Await connection from QMP Monitor and perform capabilities negotiation.
-
-        @param timeout: timeout in seconds (nonnegative float number, or
-                        None). The value passed will set the behavior of the
-                        underneath QMP socket as described in [1].
-                        Default value is set to 15.0.
-        @return QMP greeting dict
-        @raise OSError on socket connection errors
-        @raise QMPConnectError if the greeting is not received
-        @raise QMPCapabilitiesError if fails to negotiate capabilities
-
-        [1]
-        https://docs.python.org/3/library/socket.html#socket.socket.settimeout
-        """
-        self.__sock.settimeout(timeout)
-        self.__sock, _ = self.__sock.accept()
-        self.__sockfile = self.__sock.makefile(mode='r')
-        return self.__negotiate_capabilities()
-
-    def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
-        """
-        Send a QMP command to the QMP Monitor.
-
-        @param qmp_cmd: QMP command to be sent as a Python dict
-        @return QMP response as a Python dict
-        """
-        self.logger.debug(">>> %s", qmp_cmd)
-        self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
-        resp = self.__json_read()
-        if resp is None:
-            raise QMPConnectError("Unexpected empty reply from server")
-        self.logger.debug("<<< %s", resp)
-        return resp
-
-    def cmd(self, name: str,
-            args: Optional[Dict[str, Any]] = None,
-            cmd_id: Optional[Any] = None) -> QMPMessage:
-        """
-        Build a QMP command and send it to the QMP Monitor.
-
-        @param name: command name (string)
-        @param args: command arguments (dict)
-        @param cmd_id: command id (dict, list, string or int)
-        """
-        qmp_cmd: QMPMessage = {'execute': name}
-        if args:
-            qmp_cmd['arguments'] = args
-        if cmd_id:
-            qmp_cmd['id'] = cmd_id
-        return self.cmd_obj(qmp_cmd)
-
-    def command(self, cmd: str, **kwds: Any) -> QMPReturnValue:
-        """
-        Build and send a QMP command to the monitor, report errors if any
-        """
-        ret = self.cmd(cmd, kwds)
-        if 'error' in ret:
-            raise QMPResponseError(ret)
-        if 'return' not in ret:
-            raise QMPProtocolError(
-                "'return' key not found in QMP response '{}'".format(str(ret))
-            )
-        return cast(QMPReturnValue, ret['return'])
-
-    def pull_event(self,
-                   wait: Union[bool, float] = False) -> Optional[QMPMessage]:
-        """
-        Pulls a single event.
-
-        @param wait (bool): block until an event is available.
-        @param wait (float): If wait is a float, treat it as a timeout value.
-
-        @raise QMPTimeoutError: If a timeout float is provided and the timeout
-                                period elapses.
-        @raise QMPConnectError: If wait is True but no events could be
-                                retrieved or if some other error occurred.
-
-        @return The first available QMP event, or None.
-        """
-        self.__get_events(wait)
-
-        if self.__events:
-            return self.__events.pop(0)
-        return None
-
-    def get_events(self, wait: bool = False) -> List[QMPMessage]:
-        """
-        Get a list of available QMP events.
-
-        @param wait (bool): block until an event is available.
-        @param wait (float): If wait is a float, treat it as a timeout value.
-
-        @raise QMPTimeoutError: If a timeout float is provided and the timeout
-                                period elapses.
-        @raise QMPConnectError: If wait is True but no events could be
-                                retrieved or if some other error occurred.
-
-        @return The list of available QMP events.
-        """
-        self.__get_events(wait)
-        return self.__events
-
-    def clear_events(self) -> None:
-        """
-        Clear current list of pending events.
-        """
-        self.__events = []
-
-    def close(self) -> None:
-        """
-        Close the socket and socket file.
-        """
-        if self.__sock:
-            self.__sock.close()
-        if self.__sockfile:
-            self.__sockfile.close()
-
-    def settimeout(self, timeout: Optional[float]) -> None:
-        """
-        Set the socket timeout.
-
-        @param timeout (float): timeout in seconds (non-zero), or None.
-        @note This is a wrap around socket.settimeout
-
-        @raise ValueError: if timeout was set to 0.
-        """
-        if timeout == 0:
-            msg = "timeout cannot be 0; this engages non-blocking mode."
-            msg += " Use 'None' instead to disable timeouts."
-            raise ValueError(msg)
-        self.__sock.settimeout(timeout)
-
-    def get_sock_fd(self) -> int:
-        """
-        Get the socket file descriptor.
-
-        @return The file descriptor number.
-        """
-        return self.__sock.fileno()
-
-    def is_scm_available(self) -> bool:
-        """
-        Check if the socket allows for SCM_RIGHTS.
-
-        @return True if SCM_RIGHTS is available, otherwise False.
-        """
-        return self.__sock.family == socket.AF_UNIX
diff --git a/python/qemu/qmp/README.rst b/python/qemu/qmp/README.rst
new file mode 100644
index 00000000000..5bfb82535f8
--- /dev/null
+++ b/python/qemu/qmp/README.rst
@@ -0,0 +1,9 @@
+qemu.qmp package
+================
+
+This package provides a library used for connecting to and communicating
+with QMP servers. It is used extensively by iotests, vm tests,
+avocado tests, and other utilities in the ./scripts directory. It is
+not a fully-fledged SDK and is subject to change at any time.
+
+See the documentation in ``__init__.py`` for more information.
diff --git a/python/qemu/qmp/__init__.py b/python/qemu/qmp/__init__.py
new file mode 100644
index 00000000000..358c0971d06
--- /dev/null
+++ b/python/qemu/qmp/__init__.py
@@ -0,0 +1,422 @@
+"""
+QEMU Monitor Protocol (QMP) development library & tooling.
+
+This package provides a fairly low-level class for communicating to QMP
+protocol servers, as implemented by QEMU, the QEMU Guest Agent, and the
+QEMU Storage Daemon. This library is not intended for production use.
+
+`QEMUMonitorProtocol` is the primary class of interest, and all errors
+raised derive from `QMPError`.
+"""
+
+# Copyright (C) 2009, 2010 Red Hat Inc.
+#
+# Authors:
+#  Luiz Capitulino <lcapitulino@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import errno
+import json
+import logging
+import socket
+import struct
+from types import TracebackType
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    TextIO,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    cast,
+)
+
+
+#: QMPMessage is an entire QMP message of any kind.
+QMPMessage = Dict[str, Any]
+
+#: QMPReturnValue is the 'return' value of a command.
+QMPReturnValue = object
+
+#: QMPObject is any object in a QMP message.
+QMPObject = Dict[str, object]
+
+# QMPMessage can be outgoing commands or incoming events/returns.
+# QMPReturnValue is usually a dict/json object, but due to QAPI's
+# 'returns-whitelist', it can actually be anything.
+#
+# {'return': {}} is a QMPMessage,
+# {} is the QMPReturnValue.
+
+
+InternetAddrT = Tuple[str, int]
+UnixAddrT = str
+SocketAddrT = Union[InternetAddrT, UnixAddrT]
+
+
+class QMPError(Exception):
+    """
+    QMP base exception
+    """
+
+
+class QMPConnectError(QMPError):
+    """
+    QMP connection exception
+    """
+
+
+class QMPCapabilitiesError(QMPError):
+    """
+    QMP negotiate capabilities exception
+    """
+
+
+class QMPTimeoutError(QMPError):
+    """
+    QMP timeout exception
+    """
+
+
+class QMPProtocolError(QMPError):
+    """
+    QMP protocol error; unexpected response
+    """
+
+
+class QMPResponseError(QMPError):
+    """
+    Represents erroneous QMP monitor reply
+    """
+    def __init__(self, reply: QMPMessage):
+        try:
+            desc = reply['error']['desc']
+        except KeyError:
+            desc = reply
+        super().__init__(desc)
+        self.reply = reply
+
+
+class QMPBadPortError(QMPError):
+    """
+    Unable to parse socket address: Port was non-numerical.
+    """
+
+
+class QEMUMonitorProtocol:
+    """
+    Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP) and then
+    allow to handle commands and events.
+    """
+
+    #: Logger object for debugging messages
+    logger = logging.getLogger('QMP')
+
+    def __init__(self, address: SocketAddrT,
+                 server: bool = False,
+                 nickname: Optional[str] = None):
+        """
+        Create a QEMUMonitorProtocol class.
+
+        @param address: QEMU address, can be either a unix socket path (string)
+                        or a tuple in the form ( address, port ) for a TCP
+                        connection
+        @param server: server mode listens on the socket (bool)
+        @raise OSError on socket connection errors
+        @note No connection is established, this is done by the connect() or
+              accept() methods
+        """
+        self.__events: List[QMPMessage] = []
+        self.__address = address
+        self.__sock = self.__get_sock()
+        self.__sockfile: Optional[TextIO] = None
+        self._nickname = nickname
+        if self._nickname:
+            self.logger = logging.getLogger('QMP').getChild(self._nickname)
+        if server:
+            self.__sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            self.__sock.bind(self.__address)
+            self.__sock.listen(1)
+
+    def __get_sock(self) -> socket.socket:
+        if isinstance(self.__address, tuple):
+            family = socket.AF_INET
+        else:
+            family = socket.AF_UNIX
+        return socket.socket(family, socket.SOCK_STREAM)
+
+    def __negotiate_capabilities(self) -> QMPMessage:
+        greeting = self.__json_read()
+        if greeting is None or "QMP" not in greeting:
+            raise QMPConnectError
+        # Greeting seems ok, negotiate capabilities
+        resp = self.cmd('qmp_capabilities')
+        if resp and "return" in resp:
+            return greeting
+        raise QMPCapabilitiesError
+
+    def __json_read(self, only_event: bool = False) -> Optional[QMPMessage]:
+        assert self.__sockfile is not None
+        while True:
+            data = self.__sockfile.readline()
+            if not data:
+                return None
+            # By definition, any JSON received from QMP is a QMPMessage,
+            # and we are asserting only at static analysis time that it
+            # has a particular shape.
+            resp: QMPMessage = json.loads(data)
+            if 'event' in resp:
+                self.logger.debug("<<< %s", resp)
+                self.__events.append(resp)
+                if not only_event:
+                    continue
+            return resp
+
+    def __get_events(self, wait: Union[bool, float] = False) -> None:
+        """
+        Check for new events in the stream and cache them in __events.
+
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
+
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be
+                                retrieved or if some other error occurred.
+        """
+
+        # Current timeout and blocking status
+        current_timeout = self.__sock.gettimeout()
+
+        # Check for new events regardless and pull them into the cache:
+        self.__sock.settimeout(0)  # i.e. setblocking(False)
+        try:
+            self.__json_read()
+        except OSError as err:
+            # EAGAIN: No data available; not critical
+            if err.errno != errno.EAGAIN:
+                raise
+        finally:
+            self.__sock.settimeout(current_timeout)
+
+        # Wait for new events, if needed.
+        # if wait is 0.0, this means "no wait" and is also implicitly false.
+        if not self.__events and wait:
+            if isinstance(wait, float):
+                self.__sock.settimeout(wait)
+            try:
+                ret = self.__json_read(only_event=True)
+            except socket.timeout as err:
+                raise QMPTimeoutError("Timeout waiting for event") from err
+            except Exception as err:
+                msg = "Error while reading from socket"
+                raise QMPConnectError(msg) from err
+            finally:
+                self.__sock.settimeout(current_timeout)
+
+            if ret is None:
+                raise QMPConnectError("Error while reading from socket")
+
+    T = TypeVar('T')
+
+    def __enter__(self: T) -> T:
+        # Implement context manager enter function.
+        return self
+
+    def __exit__(self,
+                 # pylint: disable=duplicate-code
+                 # see https://github.com/PyCQA/pylint/issues/3619
+                 exc_type: Optional[Type[BaseException]],
+                 exc_val: Optional[BaseException],
+                 exc_tb: Optional[TracebackType]) -> None:
+        # Implement context manager exit function.
+        self.close()
+
+    @classmethod
+    def parse_address(cls, address: str) -> SocketAddrT:
+        """
+        Parse a string into a QMP address.
+
+        Figure out if the argument is in the port:host form.
+        If it's not, it's probably a file path.
+        """
+        components = address.split(':')
+        if len(components) == 2:
+            try:
+                port = int(components[1])
+            except ValueError:
+                msg = f"Bad port: '{components[1]}' in '{address}'."
+                raise QMPBadPortError(msg) from None
+            return (components[0], port)
+
+        # Treat as filepath.
+        return address
+
+    def connect(self, negotiate: bool = True) -> Optional[QMPMessage]:
+        """
+        Connect to the QMP Monitor and perform capabilities negotiation.
+
+        @return QMP greeting dict, or None if negotiate is false
+        @raise OSError on socket connection errors
+        @raise QMPConnectError if the greeting is not received
+        @raise QMPCapabilitiesError if fails to negotiate capabilities
+        """
+        self.__sock.connect(self.__address)
+        self.__sockfile = self.__sock.makefile(mode='r')
+        if negotiate:
+            return self.__negotiate_capabilities()
+        return None
+
+    def accept(self, timeout: Optional[float] = 15.0) -> QMPMessage:
+        """
+        Await connection from QMP Monitor and perform capabilities negotiation.
+
+        @param timeout: timeout in seconds (nonnegative float number, or
+                        None). The value passed will set the behavior of the
+                        underneath QMP socket as described in [1].
+                        Default value is set to 15.0.
+
+        @return QMP greeting dict
+        @raise OSError on socket connection errors
+        @raise QMPConnectError if the greeting is not received
+        @raise QMPCapabilitiesError if fails to negotiate capabilities
+
+        [1]
+        https://docs.python.org/3/library/socket.html#socket.socket.settimeout
+        """
+        self.__sock.settimeout(timeout)
+        self.__sock, _ = self.__sock.accept()
+        self.__sockfile = self.__sock.makefile(mode='r')
+        return self.__negotiate_capabilities()
+
+    def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
+        """
+        Send a QMP command to the QMP Monitor.
+
+        @param qmp_cmd: QMP command to be sent as a Python dict
+        @return QMP response as a Python dict
+        """
+        self.logger.debug(">>> %s", qmp_cmd)
+        self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
+        resp = self.__json_read()
+        if resp is None:
+            raise QMPConnectError("Unexpected empty reply from server")
+        self.logger.debug("<<< %s", resp)
+        return resp
+
+    def cmd(self, name: str,
+            args: Optional[Dict[str, object]] = None,
+            cmd_id: Optional[object] = None) -> QMPMessage:
+        """
+        Build a QMP command and send it to the QMP Monitor.
+
+        @param name: command name (string)
+        @param args: command arguments (dict)
+        @param cmd_id: command id (dict, list, string or int)
+        """
+        qmp_cmd: QMPMessage = {'execute': name}
+        if args:
+            qmp_cmd['arguments'] = args
+        if cmd_id:
+            qmp_cmd['id'] = cmd_id
+        return self.cmd_obj(qmp_cmd)
+
+    def command(self, cmd: str, **kwds: object) -> QMPReturnValue:
+        """
+        Build and send a QMP command to the monitor, report errors if any
+        """
+        ret = self.cmd(cmd, kwds)
+        if 'error' in ret:
+            raise QMPResponseError(ret)
+        if 'return' not in ret:
+            raise QMPProtocolError(
+                "'return' key not found in QMP response '{}'".format(str(ret))
+            )
+        return cast(QMPReturnValue, ret['return'])
+
+    def pull_event(self,
+                   wait: Union[bool, float] = False) -> Optional[QMPMessage]:
+        """
+        Pulls a single event.
+
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
+
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be
+                                retrieved or if some other error occurred.
+
+        @return The first available QMP event, or None.
+        """
+        self.__get_events(wait)
+
+        if self.__events:
+            return self.__events.pop(0)
+        return None
+
+    def get_events(self, wait: bool = False) -> List[QMPMessage]:
+        """
+        Get a list of available QMP events and clear all pending events.
+
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
+
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be
+                                retrieved or if some other error occurred.
+
+        @return The list of available QMP events.
+        """
+        self.__get_events(wait)
+        events = self.__events
+        self.__events = []
+        return events
+
+    def clear_events(self) -> None:
+        """
+        Clear current list of pending events.
+        """
+        self.__events = []
+
+    def close(self) -> None:
+        """
+        Close the socket and socket file.
+        """
+        if self.__sock:
+            self.__sock.close()
+        if self.__sockfile:
+            self.__sockfile.close()
+
+    def settimeout(self, timeout: Optional[float]) -> None:
+        """
+        Set the socket timeout.
+
+        @param timeout (float): timeout in seconds (non-zero), or None.
+        @note This is a wrap around socket.settimeout
+
+        @raise ValueError: if timeout was set to 0.
+        """
+        if timeout == 0:
+            msg = "timeout cannot be 0; this engages non-blocking mode."
+            msg += " Use 'None' instead to disable timeouts."
+            raise ValueError(msg)
+        self.__sock.settimeout(timeout)
+
+    def send_fd_scm(self, fd: int) -> None:
+        """
+        Send a file descriptor to the remote via SCM_RIGHTS.
+        """
+        if self.__sock.family != socket.AF_UNIX:
+            raise RuntimeError("Can't use SCM_RIGHTS on non-AF_UNIX socket.")
+
+        self.__sock.sendmsg(
+            [b' '],
+            [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('@i', fd))]
+        )
diff --git a/tests/qapi-schema/flat-union-bad-discriminator.out b/python/qemu/qmp/py.typed
similarity index 100%
rename from tests/qapi-schema/flat-union-bad-discriminator.out
rename to python/qemu/qmp/py.typed
diff --git a/python/qemu/qmp/qemu_ga_client.py b/python/qemu/qmp/qemu_ga_client.py
new file mode 100644
index 00000000000..67ac0b42112
--- /dev/null
+++ b/python/qemu/qmp/qemu_ga_client.py
@@ -0,0 +1,323 @@
+"""
+QEMU Guest Agent Client
+
+Usage:
+
+Start QEMU with:
+
+# qemu [...] -chardev socket,path=/tmp/qga.sock,server,wait=off,id=qga0 \
+  -device virtio-serial \
+  -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+
+Run the script:
+
+$ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
+
+or
+
+$ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+$ qemu-ga-client <command> [args...]
+
+For example:
+
+$ qemu-ga-client cat /etc/resolv.conf
+# Generated by NetworkManager
+nameserver 10.0.2.3
+$ qemu-ga-client fsfreeze status
+thawed
+$ qemu-ga-client fsfreeze freeze
+2 filesystems frozen
+
+See also: https://wiki.qemu.org/Features/QAPI/GuestAgent
+"""
+
+# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+
+import argparse
+import base64
+import errno
+import os
+import random
+import sys
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Sequence,
+)
+
+from qemu import qmp
+from qemu.qmp import SocketAddrT
+
+
+# This script has not seen many patches or careful attention in quite
+# some time. If you would like to improve it, please review the design
+# carefully and add docstrings at that point in time. Until then:
+
+# pylint: disable=missing-docstring
+
+
+class QemuGuestAgent(qmp.QEMUMonitorProtocol):
+    def __getattr__(self, name: str) -> Callable[..., Any]:
+        def wrapper(**kwds: object) -> object:
+            return self.command('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    def __init__(self, address: SocketAddrT):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout: Optional[float] = 3) -> None:
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle: int) -> bytes:
+        eof = False
+        data = b''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path: str) -> bytes:
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self) -> str:
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands']
+                   if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands']
+                    if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    @classmethod
+    def __gen_ipv4_netmask(cls, prefixlen: int) -> str:
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self) -> str:
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append(f"\tinet {addr}  netmask {mask}")
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append(f"\tinet6 {addr}  prefixlen {prefix}")
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout: Optional[float]) -> bool:
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except TimeoutError:
+            return False
+        return True
+
+    def fsfreeze(self, cmd: str) -> object:
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise Exception('Invalid command: ' + cmd)
+        # Can be int (freeze, thaw) or GuestFsfreezeStatus (status)
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum: int) -> Dict[str, object]:
+        # returns GuestFilesystemTrimResponse
+        ret = getattr(self.qga, 'fstrim')(minimum=minimum)
+        assert isinstance(ret, dict)
+        return ret
+
+    def suspend(self, mode: str) -> None:
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise Exception('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except TimeoutError:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode: str = 'powerdown') -> None:
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise Exception('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except TimeoutError:
+            pass
+
+
+def _cmd_cat(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat <file>')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+        return
+
+    assert isinstance(ret, int)
+    verb = 'frozen' if cmd == 'freeze' else 'thawed'
+    print(f"{ret:d} filesystems {verb}")
+
+
+def _cmd_fstrim(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    print(client.ifconfig())
+
+
+def _cmd_info(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    print(client.info())
+
+
+def _cmd_ping(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    timeout = 3.0 if len(args) == 0 else float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown()
+
+
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client: QemuGuestAgentClient, args: Sequence[str]) -> None:
+    assert not args
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def send_command(address: str, cmd: str, args: Sequence[str]) -> None:
+    if not os.path.exists(address):
+        print('%s not found' % address)
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except OSError as err:
+        print(err)
+        if err.errno == errno.ECONNREFUSED:
+            print('Hint: qemu is not running?')
+        sys.exit(1)
+
+    if cmd == 'fsfreeze' and args[0] == 'freeze':
+        client.sync(60)
+    elif cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+def main() -> None:
+    address = os.environ.get('QGA_CLIENT_ADDRESS')
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--address', action='store',
+                        default=address,
+                        help='Specify a ip:port pair or a unix socket path')
+    parser.add_argument('command', choices=commands)
+    parser.add_argument('args', nargs='*')
+
+    args = parser.parse_args()
+    if args.address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    send_command(args.address, args.command, args.args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/qmp/qmp_shell.py b/python/qemu/qmp/qmp_shell.py
new file mode 100644
index 00000000000..e7d7eb18f19
--- /dev/null
+++ b/python/qemu/qmp/qmp_shell.py
@@ -0,0 +1,534 @@
+#
+# Copyright (C) 2009, 2010 Red Hat Inc.
+#
+# Authors:
+#  Luiz Capitulino <lcapitulino@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+"""
+Low-level QEMU shell on top of QMP.
+
+usage: qmp-shell [-h] [-H] [-N] [-v] [-p] qmp_server
+
+positional arguments:
+  qmp_server            < UNIX socket path | TCP address:port >
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -H, --hmp             Use HMP interface
+  -N, --skip-negotiation
+                        Skip negotiate (for qemu-ga)
+  -v, --verbose         Verbose (echo commands sent and received)
+  -p, --pretty          Pretty-print JSON
+
+
+Start QEMU with:
+
+# qemu [...] -qmp unix:./qmp-sock,server
+
+Run the shell:
+
+$ qmp-shell ./qmp-sock
+
+Commands have the following format:
+
+   < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+
+For example:
+
+(QEMU) device_add driver=e1000 id=net1
+{'return': {}}
+(QEMU)
+
+key=value pairs also support Python or JSON object literal subset notations,
+without spaces. Dictionaries/objects {} are supported as are arrays [].
+
+   example-command arg-name1={'key':'value','obj'={'prop':"value"}}
+
+Both JSON and Python formatting should work, including both styles of
+string literal quotes. Both paradigms of literal values should work,
+including null/true/false for JSON and None/True/False for Python.
+
+
+Transactions have the following multi-line format:
+
+   transaction(
+   action-name1 [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+   ...
+   action-nameN [ arg-name1=arg1 ] ... [arg-nameN=argN ]
+   )
+
+One line transactions are also supported:
+
+   transaction( action-name1 ... )
+
+For example:
+
+    (QEMU) transaction(
+    TRANS> block-dirty-bitmap-add node=drive0 name=bitmap1
+    TRANS> block-dirty-bitmap-clear node=drive0 name=bitmap0
+    TRANS> )
+    {"return": {}}
+    (QEMU)
+
+Use the -v and -p options to activate the verbose and pretty-print options,
+which will echo back the properly formatted JSON-compliant QMP that is being
+sent to QEMU, which is useful for debugging and documentation generation.
+"""
+
+import argparse
+import ast
+import json
+import logging
+import os
+import re
+import readline
+import sys
+from typing import (
+    Iterator,
+    List,
+    NoReturn,
+    Optional,
+    Sequence,
+)
+
+from qemu import qmp
+from qemu.qmp import QMPMessage
+
+
+LOG = logging.getLogger(__name__)
+
+
+class QMPCompleter:
+    """
+    QMPCompleter provides a readline library tab-complete behavior.
+    """
+    # NB: Python 3.9+ will probably allow us to subclass list[str] directly,
+    # but pylint as of today does not know that List[str] is simply 'list'.
+    def __init__(self) -> None:
+        self._matches: List[str] = []
+
+    def append(self, value: str) -> None:
+        """Append a new valid completion to the list of possibilities."""
+        return self._matches.append(value)
+
+    def complete(self, text: str, state: int) -> Optional[str]:
+        """readline.set_completer() callback implementation."""
+        for cmd in self._matches:
+            if cmd.startswith(text):
+                if state == 0:
+                    return cmd
+                state -= 1
+        return None
+
+
+class QMPShellError(qmp.QMPError):
+    """
+    QMP Shell Base error class.
+    """
+
+
+class FuzzyJSON(ast.NodeTransformer):
+    """
+    This extension of ast.NodeTransformer filters literal "true/false/null"
+    values in a Python AST and replaces them by proper "True/False/None" values
+    that Python can properly evaluate.
+    """
+
+    @classmethod
+    def visit_Name(cls,  # pylint: disable=invalid-name
+                   node: ast.Name) -> ast.AST:
+        """
+        Transform Name nodes with certain values into Constant (keyword) nodes.
+        """
+        if node.id == 'true':
+            return ast.Constant(value=True)
+        if node.id == 'false':
+            return ast.Constant(value=False)
+        if node.id == 'null':
+            return ast.Constant(value=None)
+        return node
+
+
+class QMPShell(qmp.QEMUMonitorProtocol):
+    """
+    QMPShell provides a basic readline-based QMP shell.
+
+    :param address: Address of the QMP server.
+    :param pretty: Pretty-print QMP messages.
+    :param verbose: Echo outgoing QMP messages to console.
+    """
+    def __init__(self, address: qmp.SocketAddrT,
+                 pretty: bool = False, verbose: bool = False):
+        super().__init__(address)
+        self._greeting: Optional[QMPMessage] = None
+        self._completer = QMPCompleter()
+        self._transmode = False
+        self._actions: List[QMPMessage] = []
+        self._histfile = os.path.join(os.path.expanduser('~'),
+                                      '.qmp-shell_history')
+        self.pretty = pretty
+        self.verbose = verbose
+
+    def close(self) -> None:
+        # Hook into context manager of parent to save shell history.
+        self._save_history()
+        super().close()
+
+    def _fill_completion(self) -> None:
+        cmds = self.cmd('query-commands')
+        if 'error' in cmds:
+            return
+        for cmd in cmds['return']:
+            self._completer.append(cmd['name'])
+
+    def _completer_setup(self) -> None:
+        self._completer = QMPCompleter()
+        self._fill_completion()
+        readline.set_history_length(1024)
+        readline.set_completer(self._completer.complete)
+        readline.parse_and_bind("tab: complete")
+        # NB: default delimiters conflict with some command names
+        # (eg. query-), clearing everything as it doesn't seem to matter
+        readline.set_completer_delims('')
+        try:
+            readline.read_history_file(self._histfile)
+        except FileNotFoundError:
+            pass
+        except IOError as err:
+            msg = f"Failed to read history '{self._histfile}': {err!s}"
+            LOG.warning(msg)
+
+    def _save_history(self) -> None:
+        try:
+            readline.write_history_file(self._histfile)
+        except IOError as err:
+            msg = f"Failed to save history file '{self._histfile}': {err!s}"
+            LOG.warning(msg)
+
+    @classmethod
+    def _parse_value(cls, val: str) -> object:
+        try:
+            return int(val)
+        except ValueError:
+            pass
+
+        if val.lower() == 'true':
+            return True
+        if val.lower() == 'false':
+            return False
+        if val.startswith(('{', '[')):
+            # Try first as pure JSON:
+            try:
+                return json.loads(val)
+            except ValueError:
+                pass
+            # Try once again as FuzzyJSON:
+            try:
+                tree = ast.parse(val, mode='eval')
+                transformed = FuzzyJSON().visit(tree)
+                return ast.literal_eval(transformed)
+            except (SyntaxError, ValueError):
+                pass
+        return val
+
+    def _cli_expr(self,
+                  tokens: Sequence[str],
+                  parent: qmp.QMPObject) -> None:
+        for arg in tokens:
+            (key, sep, val) = arg.partition('=')
+            if sep != '=':
+                raise QMPShellError(
+                    f"Expected a key=value pair, got '{arg!s}'"
+                )
+
+            value = self._parse_value(val)
+            optpath = key.split('.')
+            curpath = []
+            for path in optpath[:-1]:
+                curpath.append(path)
+                obj = parent.get(path, {})
+                if not isinstance(obj, dict):
+                    msg = 'Cannot use "{:s}" as both leaf and non-leaf key'
+                    raise QMPShellError(msg.format('.'.join(curpath)))
+                parent[path] = obj
+                parent = obj
+            if optpath[-1] in parent:
+                if isinstance(parent[optpath[-1]], dict):
+                    msg = 'Cannot use "{:s}" as both leaf and non-leaf key'
+                    raise QMPShellError(msg.format('.'.join(curpath)))
+                raise QMPShellError(f'Cannot set "{key}" multiple times')
+            parent[optpath[-1]] = value
+
+    def _build_cmd(self, cmdline: str) -> Optional[QMPMessage]:
+        """
+        Build a QMP input object from a user provided command-line in the
+        following format:
+
+            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
+        """
+        argument_regex = r'''(?:[^\s"']|"(?:\\.|[^"])*"|'(?:\\.|[^'])*')+'''
+        cmdargs = re.findall(argument_regex, cmdline)
+        qmpcmd: QMPMessage
+
+        # Transactional CLI entry:
+        if cmdargs and cmdargs[0] == 'transaction(':
+            self._transmode = True
+            self._actions = []
+            cmdargs.pop(0)
+
+        # Transactional CLI exit:
+        if cmdargs and cmdargs[0] == ')' and self._transmode:
+            self._transmode = False
+            if len(cmdargs) > 1:
+                msg = 'Unexpected input after close of Transaction sub-shell'
+                raise QMPShellError(msg)
+            qmpcmd = {
+                'execute': 'transaction',
+                'arguments': {'actions': self._actions}
+            }
+            return qmpcmd
+
+        # No args, or no args remaining
+        if not cmdargs:
+            return None
+
+        if self._transmode:
+            # Parse and cache this Transactional Action
+            finalize = False
+            action = {'type': cmdargs[0], 'data': {}}
+            if cmdargs[-1] == ')':
+                cmdargs.pop(-1)
+                finalize = True
+            self._cli_expr(cmdargs[1:], action['data'])
+            self._actions.append(action)
+            return self._build_cmd(')') if finalize else None
+
+        # Standard command: parse and return it to be executed.
+        qmpcmd = {'execute': cmdargs[0], 'arguments': {}}
+        self._cli_expr(cmdargs[1:], qmpcmd['arguments'])
+        return qmpcmd
+
+    def _print(self, qmp_message: object) -> None:
+        jsobj = json.dumps(qmp_message,
+                           indent=4 if self.pretty else None,
+                           sort_keys=self.pretty)
+        print(str(jsobj))
+
+    def _execute_cmd(self, cmdline: str) -> bool:
+        try:
+            qmpcmd = self._build_cmd(cmdline)
+        except QMPShellError as err:
+            print(
+                f"Error while parsing command line: {err!s}\n"
+                "command format: <command-name> "
+                "[arg-name1=arg1] ... [arg-nameN=argN",
+                file=sys.stderr
+            )
+            return True
+        # For transaction mode, we may have just cached the action:
+        if qmpcmd is None:
+            return True
+        if self.verbose:
+            self._print(qmpcmd)
+        resp = self.cmd_obj(qmpcmd)
+        if resp is None:
+            print('Disconnected')
+            return False
+        self._print(resp)
+        return True
+
+    def connect(self, negotiate: bool = True) -> None:
+        self._greeting = super().connect(negotiate)
+        self._completer_setup()
+
+    def show_banner(self,
+                    msg: str = 'Welcome to the QMP low-level shell!') -> None:
+        """
+        Print to stdio a greeting, and the QEMU version if available.
+        """
+        print(msg)
+        if not self._greeting:
+            print('Connected')
+            return
+        version = self._greeting['QMP']['version']['qemu']
+        print("Connected to QEMU {major}.{minor}.{micro}\n".format(**version))
+
+    @property
+    def prompt(self) -> str:
+        """
+        Return the current shell prompt, including a trailing space.
+        """
+        if self._transmode:
+            return 'TRANS> '
+        return '(QEMU) '
+
+    def read_exec_command(self) -> bool:
+        """
+        Read and execute a command.
+
+        @return True if execution was ok, return False if disconnected.
+        """
+        try:
+            cmdline = input(self.prompt)
+        except EOFError:
+            print()
+            return False
+
+        if cmdline == '':
+            for event in self.get_events():
+                print(event)
+            return True
+
+        return self._execute_cmd(cmdline)
+
+    def repl(self) -> Iterator[None]:
+        """
+        Return an iterator that implements the REPL.
+        """
+        self.show_banner()
+        while self.read_exec_command():
+            yield
+        self.close()
+
+
+class HMPShell(QMPShell):
+    """
+    HMPShell provides a basic readline-based HMP shell, tunnelled via QMP.
+
+    :param address: Address of the QMP server.
+    :param pretty: Pretty-print QMP messages.
+    :param verbose: Echo outgoing QMP messages to console.
+    """
+    def __init__(self, address: qmp.SocketAddrT,
+                 pretty: bool = False, verbose: bool = False):
+        super().__init__(address, pretty, verbose)
+        self._cpu_index = 0
+
+    def _cmd_completion(self) -> None:
+        for cmd in self._cmd_passthrough('help')['return'].split('\r\n'):
+            if cmd and cmd[0] != '[' and cmd[0] != '\t':
+                name = cmd.split()[0]  # drop help text
+                if name == 'info':
+                    continue
+                if name.find('|') != -1:
+                    # Command in the form 'foobar|f' or 'f|foobar', take the
+                    # full name
+                    opt = name.split('|')
+                    if len(opt[0]) == 1:
+                        name = opt[1]
+                    else:
+                        name = opt[0]
+                self._completer.append(name)
+                self._completer.append('help ' + name)  # help completion
+
+    def _info_completion(self) -> None:
+        for cmd in self._cmd_passthrough('info')['return'].split('\r\n'):
+            if cmd:
+                self._completer.append('info ' + cmd.split()[1])
+
+    def _other_completion(self) -> None:
+        # special cases
+        self._completer.append('help info')
+
+    def _fill_completion(self) -> None:
+        self._cmd_completion()
+        self._info_completion()
+        self._other_completion()
+
+    def _cmd_passthrough(self, cmdline: str,
+                         cpu_index: int = 0) -> QMPMessage:
+        return self.cmd_obj({
+            'execute': 'human-monitor-command',
+            'arguments': {
+                'command-line': cmdline,
+                'cpu-index': cpu_index
+            }
+        })
+
+    def _execute_cmd(self, cmdline: str) -> bool:
+        if cmdline.split()[0] == "cpu":
+            # trap the cpu command, it requires special setting
+            try:
+                idx = int(cmdline.split()[1])
+                if 'return' not in self._cmd_passthrough('info version', idx):
+                    print('bad CPU index')
+                    return True
+                self._cpu_index = idx
+            except ValueError:
+                print('cpu command takes an integer argument')
+                return True
+        resp = self._cmd_passthrough(cmdline, self._cpu_index)
+        if resp is None:
+            print('Disconnected')
+            return False
+        assert 'return' in resp or 'error' in resp
+        if 'return' in resp:
+            # Success
+            if len(resp['return']) > 0:
+                print(resp['return'], end=' ')
+        else:
+            # Error
+            print('%s: %s' % (resp['error']['class'], resp['error']['desc']))
+        return True
+
+    def show_banner(self, msg: str = 'Welcome to the HMP shell!') -> None:
+        QMPShell.show_banner(self, msg)
+
+
+def die(msg: str) -> NoReturn:
+    """Write an error to stderr, then exit with a return code of 1."""
+    sys.stderr.write('ERROR: %s\n' % msg)
+    sys.exit(1)
+
+
+def main() -> None:
+    """
+    qmp-shell entry point: parse command line arguments and start the REPL.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-H', '--hmp', action='store_true',
+                        help='Use HMP interface')
+    parser.add_argument('-N', '--skip-negotiation', action='store_true',
+                        help='Skip negotiate (for qemu-ga)')
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help='Verbose (echo commands sent and received)')
+    parser.add_argument('-p', '--pretty', action='store_true',
+                        help='Pretty-print JSON')
+
+    default_server = os.environ.get('QMP_SOCKET')
+    parser.add_argument('qmp_server', action='store',
+                        default=default_server,
+                        help='< UNIX socket path | TCP address:port >')
+
+    args = parser.parse_args()
+    if args.qmp_server is None:
+        parser.error("QMP socket or TCP address must be specified")
+
+    shell_class = HMPShell if args.hmp else QMPShell
+
+    try:
+        address = shell_class.parse_address(args.qmp_server)
+    except qmp.QMPBadPortError:
+        parser.error(f"Bad port number: {args.qmp_server}")
+        return  # pycharm doesn't know error() is noreturn
+
+    with shell_class(address, args.pretty, args.verbose) as qemu:
+        try:
+            qemu.connect(negotiate=not args.skip_negotiation)
+        except qmp.QMPConnectError:
+            die("Didn't get QMP greeting message")
+        except qmp.QMPCapabilitiesError:
+            die("Couldn't negotiate capabilities")
+        except OSError as err:
+            die(f"Couldn't connect to {args.qmp_server}: {err!s}")
+
+        for _ in qemu.repl():
+            pass
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/qemu/qmp/qom.py b/python/qemu/qmp/qom.py
new file mode 100644
index 00000000000..8ff28a83439
--- /dev/null
+++ b/python/qemu/qmp/qom.py
@@ -0,0 +1,272 @@
+"""
+QEMU Object Model testing tools.
+
+usage: qom [-h] {set,get,list,tree,fuse} ...
+
+Query and manipulate QOM data
+
+optional arguments:
+  -h, --help           show this help message and exit
+
+QOM commands:
+  {set,get,list,tree,fuse}
+    set                Set a QOM property value
+    get                Get a QOM property value
+    list               List QOM properties at a given path
+    tree               Show QOM tree from a given path
+    fuse               Mount a QOM tree as a FUSE filesystem
+"""
+##
+# Copyright John Snow 2020, for Red Hat, Inc.
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Anthony Liguori <aliguori@amazon.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# Based on ./scripts/qmp/qom-[set|get|tree|list]
+##
+
+import argparse
+
+from . import QMPResponseError
+from .qom_common import QOMCommand
+
+
+try:
+    from .qom_fuse import QOMFuse
+except ModuleNotFoundError as _err:
+    if _err.name != 'fuse':
+        raise
+else:
+    assert issubclass(QOMFuse, QOMCommand)
+
+
+class QOMSet(QOMCommand):
+    """
+    QOM Command - Set a property to a given value.
+
+    usage: qom-set [-h] [--socket SOCKET] <path>.<property> <value>
+
+    Set a QOM property value
+
+    positional arguments:
+      <path>.<property>     QOM path and property, separated by a period '.'
+      <value>               new QOM property value
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'set'
+    help = 'Set a QOM property value'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        cls.add_path_prop_arg(parser)
+        parser.add_argument(
+            'value',
+            metavar='<value>',
+            action='store',
+            help='new QOM property value'
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path, self.prop = args.path_prop.rsplit('.', 1)
+        self.value = args.value
+
+    def run(self) -> int:
+        rsp = self.qmp.command(
+            'qom-set',
+            path=self.path,
+            property=self.prop,
+            value=self.value
+        )
+        print(rsp)
+        return 0
+
+
+class QOMGet(QOMCommand):
+    """
+    QOM Command - Get a property's current value.
+
+    usage: qom-get [-h] [--socket SOCKET] <path>.<property>
+
+    Get a QOM property value
+
+    positional arguments:
+      <path>.<property>     QOM path and property, separated by a period '.'
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'get'
+    help = 'Get a QOM property value'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        cls.add_path_prop_arg(parser)
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        try:
+            tmp = args.path_prop.rsplit('.', 1)
+        except ValueError as err:
+            raise ValueError('Invalid format for <path>.<property>') from err
+        self.path = tmp[0]
+        self.prop = tmp[1]
+
+    def run(self) -> int:
+        rsp = self.qmp.command(
+            'qom-get',
+            path=self.path,
+            property=self.prop
+        )
+        if isinstance(rsp, dict):
+            for key, value in rsp.items():
+                print(f"{key}: {value}")
+        else:
+            print(rsp)
+        return 0
+
+
+class QOMList(QOMCommand):
+    """
+    QOM Command - List the properties at a given path.
+
+    usage: qom-list [-h] [--socket SOCKET] <path>
+
+    List QOM properties at a given path
+
+    positional arguments:
+      <path>                QOM path
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'list'
+    help = 'List QOM properties at a given path'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'path',
+            metavar='<path>',
+            action='store',
+            help='QOM path',
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path = args.path
+
+    def run(self) -> int:
+        rsp = self.qom_list(self.path)
+        for item in rsp:
+            if item.child:
+                print(f"{item.name}/")
+            elif item.link:
+                print(f"@{item.name}/")
+            else:
+                print(item.name)
+        return 0
+
+
+class QOMTree(QOMCommand):
+    """
+    QOM Command - Show the full tree below a given path.
+
+    usage: qom-tree [-h] [--socket SOCKET] [<path>]
+
+    Show QOM tree from a given path
+
+    positional arguments:
+      <path>                QOM path
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --socket SOCKET, -s SOCKET
+                            QMP socket path or address (addr:port). May also be
+                            set via QMP_SOCKET environment variable.
+    """
+    name = 'tree'
+    help = 'Show QOM tree from a given path'
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'path',
+            metavar='<path>',
+            action='store',
+            help='QOM path',
+            nargs='?',
+            default='/'
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.path = args.path
+
+    def _list_node(self, path: str) -> None:
+        print(path)
+        items = self.qom_list(path)
+        for item in items:
+            if item.child:
+                continue
+            try:
+                rsp = self.qmp.command('qom-get', path=path,
+                                       property=item.name)
+                print(f"  {item.name}: {rsp} ({item.type})")
+            except QMPResponseError as err:
+                print(f"  {item.name}: <EXCEPTION: {err!s}> ({item.type})")
+        print('')
+        for item in items:
+            if not item.child:
+                continue
+            if path == '/':
+                path = ''
+            self._list_node(f"{path}/{item.name}")
+
+    def run(self) -> int:
+        self._list_node(self.path)
+        return 0
+
+
+def main() -> int:
+    """QOM script main entry point."""
+    parser = argparse.ArgumentParser(
+        description='Query and manipulate QOM data'
+    )
+    subparsers = parser.add_subparsers(
+        title='QOM commands',
+        dest='command'
+    )
+
+    for command in QOMCommand.__subclasses__():
+        command.register(subparsers)
+
+    args = parser.parse_args()
+
+    if args.command is None:
+        parser.error('Command not specified.')
+        return 1
+
+    cmd_class = args.cmd_class
+    assert isinstance(cmd_class, type(QOMCommand))
+    return cmd_class.command_runner(args)
diff --git a/python/qemu/qmp/qom_common.py b/python/qemu/qmp/qom_common.py
new file mode 100644
index 00000000000..a59ae1a2a18
--- /dev/null
+++ b/python/qemu/qmp/qom_common.py
@@ -0,0 +1,178 @@
+"""
+QOM Command abstractions.
+"""
+##
+# Copyright John Snow 2020, for Red Hat, Inc.
+# Copyright IBM, Corp. 2011
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Anthony Liguori <aliguori@amazon.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# Based on ./scripts/qmp/qom-[set|get|tree|list]
+##
+
+import argparse
+import os
+import sys
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Type,
+    TypeVar,
+)
+
+from . import QEMUMonitorProtocol, QMPError
+
+
+# The following is needed only for a type alias.
+Subparsers = argparse._SubParsersAction  # pylint: disable=protected-access
+
+
+class ObjectPropertyInfo:
+    """
+    Represents the return type from e.g. qom-list.
+    """
+    def __init__(self, name: str, type_: str,
+                 description: Optional[str] = None,
+                 default_value: Optional[object] = None):
+        self.name = name
+        self.type = type_
+        self.description = description
+        self.default_value = default_value
+
+    @classmethod
+    def make(cls, value: Dict[str, Any]) -> 'ObjectPropertyInfo':
+        """
+        Build an ObjectPropertyInfo from a Dict with an unknown shape.
+        """
+        assert value.keys() >= {'name', 'type'}
+        assert value.keys() <= {'name', 'type', 'description', 'default-value'}
+        return cls(value['name'], value['type'],
+                   value.get('description'),
+                   value.get('default-value'))
+
+    @property
+    def child(self) -> bool:
+        """Is this property a child property?"""
+        return self.type.startswith('child<')
+
+    @property
+    def link(self) -> bool:
+        """Is this property a link property?"""
+        return self.type.startswith('link<')
+
+
+CommandT = TypeVar('CommandT', bound='QOMCommand')
+
+
+class QOMCommand:
+    """
+    Represents a QOM sub-command.
+
+    :param args: Parsed arguments, as returned from parser.parse_args.
+    """
+    name: str
+    help: str
+
+    def __init__(self, args: argparse.Namespace):
+        if args.socket is None:
+            raise QMPError("No QMP socket path or address given")
+        self.qmp = QEMUMonitorProtocol(
+            QEMUMonitorProtocol.parse_address(args.socket)
+        )
+        self.qmp.connect()
+
+    @classmethod
+    def register(cls, subparsers: Subparsers) -> None:
+        """
+        Register this command with the argument parser.
+
+        :param subparsers: argparse subparsers object, from "add_subparsers".
+        """
+        subparser = subparsers.add_parser(cls.name, help=cls.help,
+                                          description=cls.help)
+        cls.configure_parser(subparser)
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        """
+        Configure a parser with this command's arguments.
+
+        :param parser: argparse parser or subparser object.
+        """
+        default_path = os.environ.get('QMP_SOCKET')
+        parser.add_argument(
+            '--socket', '-s',
+            dest='socket',
+            action='store',
+            help='QMP socket path or address (addr:port).'
+            ' May also be set via QMP_SOCKET environment variable.',
+            default=default_path
+        )
+        parser.set_defaults(cmd_class=cls)
+
+    @classmethod
+    def add_path_prop_arg(cls, parser: argparse.ArgumentParser) -> None:
+        """
+        Add the <path>.<proptery> positional argument to this command.
+
+        :param parser: The parser to add the argument to.
+        """
+        parser.add_argument(
+            'path_prop',
+            metavar='<path>.<property>',
+            action='store',
+            help="QOM path and property, separated by a period '.'"
+        )
+
+    def run(self) -> int:
+        """
+        Run this command.
+
+        :return: 0 on success, 1 otherwise.
+        """
+        raise NotImplementedError
+
+    def qom_list(self, path: str) -> List[ObjectPropertyInfo]:
+        """
+        :return: a strongly typed list from the 'qom-list' command.
+        """
+        rsp = self.qmp.command('qom-list', path=path)
+        # qom-list returns List[ObjectPropertyInfo]
+        assert isinstance(rsp, list)
+        return [ObjectPropertyInfo.make(x) for x in rsp]
+
+    @classmethod
+    def command_runner(
+            cls: Type[CommandT],
+            args: argparse.Namespace
+    ) -> int:
+        """
+        Run a fully-parsed subcommand, with error-handling for the CLI.
+
+        :return: The return code from `run()`.
+        """
+        try:
+            cmd = cls(args)
+            return cmd.run()
+        except QMPError as err:
+            print(f"{type(err).__name__}: {err!s}", file=sys.stderr)
+            return -1
+
+    @classmethod
+    def entry_point(cls) -> int:
+        """
+        Build this command's parser, parse arguments, and run the command.
+
+        :return: `run`'s return code.
+        """
+        parser = argparse.ArgumentParser(description=cls.help)
+        cls.configure_parser(parser)
+        args = parser.parse_args()
+        return cls.command_runner(args)
diff --git a/python/qemu/qmp/qom_fuse.py b/python/qemu/qmp/qom_fuse.py
new file mode 100644
index 00000000000..43f4671fdb1
--- /dev/null
+++ b/python/qemu/qmp/qom_fuse.py
@@ -0,0 +1,206 @@
+"""
+QEMU Object Model FUSE filesystem tool
+
+This script offers a simple FUSE filesystem within which the QOM tree
+may be browsed, queried and edited using traditional shell tooling.
+
+This script requires the 'fusepy' python package.
+
+
+usage: qom-fuse [-h] [--socket SOCKET] <mount>
+
+Mount a QOM tree as a FUSE filesystem
+
+positional arguments:
+  <mount>               Mount point
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --socket SOCKET, -s SOCKET
+                        QMP socket path or address (addr:port). May also be
+                        set via QMP_SOCKET environment variable.
+"""
+##
+# Copyright IBM, Corp. 2012
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# Authors:
+#  Anthony Liguori   <aliguori@us.ibm.com>
+#  Markus Armbruster <armbru@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+##
+
+import argparse
+from errno import ENOENT, EPERM
+import stat
+import sys
+from typing import (
+    IO,
+    Dict,
+    Iterator,
+    Mapping,
+    Optional,
+    Union,
+)
+
+import fuse
+from fuse import FUSE, FuseOSError, Operations
+
+from . import QMPResponseError
+from .qom_common import QOMCommand
+
+
+fuse.fuse_python_api = (0, 2)
+
+
+class QOMFuse(QOMCommand, Operations):
+    """
+    QOMFuse implements both fuse.Operations and QOMCommand.
+
+    Operations implements the FS, and QOMCommand implements the CLI command.
+    """
+    name = 'fuse'
+    help = 'Mount a QOM tree as a FUSE filesystem'
+    fuse: FUSE
+
+    @classmethod
+    def configure_parser(cls, parser: argparse.ArgumentParser) -> None:
+        super().configure_parser(parser)
+        parser.add_argument(
+            'mount',
+            metavar='<mount>',
+            action='store',
+            help="Mount point",
+        )
+
+    def __init__(self, args: argparse.Namespace):
+        super().__init__(args)
+        self.mount = args.mount
+        self.ino_map: Dict[str, int] = {}
+        self.ino_count = 1
+
+    def run(self) -> int:
+        print(f"Mounting QOMFS to '{self.mount}'", file=sys.stderr)
+        self.fuse = FUSE(self, self.mount, foreground=True)
+        return 0
+
+    def get_ino(self, path: str) -> int:
+        """Get an inode number for a given QOM path."""
+        if path in self.ino_map:
+            return self.ino_map[path]
+        self.ino_map[path] = self.ino_count
+        self.ino_count += 1
+        return self.ino_map[path]
+
+    def is_object(self, path: str) -> bool:
+        """Is the given QOM path an object?"""
+        try:
+            self.qom_list(path)
+            return True
+        except QMPResponseError:
+            return False
+
+    def is_property(self, path: str) -> bool:
+        """Is the given QOM path a property?"""
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qom_list(path):
+                if item.name == prop:
+                    return True
+            return False
+        except QMPResponseError:
+            return False
+
+    def is_link(self, path: str) -> bool:
+        """Is the given QOM path a link?"""
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            for item in self.qom_list(path):
+                if item.name == prop and item.link:
+                    return True
+            return False
+        except QMPResponseError:
+            return False
+
+    def read(self, path: str, size: int, offset: int, fh: IO[bytes]) -> bytes:
+        if not self.is_property(path):
+            raise FuseOSError(ENOENT)
+
+        path, prop = path.rsplit('/', 1)
+        if path == '':
+            path = '/'
+        try:
+            data = str(self.qmp.command('qom-get', path=path, property=prop))
+            data += '\n'  # make values shell friendly
+        except QMPResponseError as err:
+            raise FuseOSError(EPERM) from err
+
+        if offset > len(data):
+            return b''
+
+        return bytes(data[offset:][:size], encoding='utf-8')
+
+    def readlink(self, path: str) -> Union[bool, str]:
+        if not self.is_link(path):
+            return False
+        path, prop = path.rsplit('/', 1)
+        prefix = '/'.join(['..'] * (len(path.split('/')) - 1))
+        return prefix + str(self.qmp.command('qom-get', path=path,
+                                             property=prop))
+
+    def getattr(self, path: str,
+                fh: Optional[IO[bytes]] = None) -> Mapping[str, object]:
+        if self.is_link(path):
+            value = {
+                'st_mode': 0o755 | stat.S_IFLNK,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 2,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        elif self.is_object(path):
+            value = {
+                'st_mode': 0o755 | stat.S_IFDIR,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 2,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        elif self.is_property(path):
+            value = {
+                'st_mode': 0o644 | stat.S_IFREG,
+                'st_ino': self.get_ino(path),
+                'st_dev': 0,
+                'st_nlink': 1,
+                'st_uid': 1000,
+                'st_gid': 1000,
+                'st_size': 4096,
+                'st_atime': 0,
+                'st_mtime': 0,
+                'st_ctime': 0
+            }
+        else:
+            raise FuseOSError(ENOENT)
+        return value
+
+    def readdir(self, path: str, fh: IO[bytes]) -> Iterator[str]:
+        yield '.'
+        yield '..'
+        for item in self.qom_list(path):
+            yield item.name
diff --git a/python/qemu/utils/README.rst b/python/qemu/utils/README.rst
new file mode 100644
index 00000000000..d5f2da14540
--- /dev/null
+++ b/python/qemu/utils/README.rst
@@ -0,0 +1,7 @@
+qemu.utils package
+==================
+
+This package provides miscellaneous utilities used for testing and
+debugging QEMU. It is used primarily by the vm and avocado tests.
+
+See the documentation in ``__init__.py`` for more information.
diff --git a/python/qemu/utils/__init__.py b/python/qemu/utils/__init__.py
new file mode 100644
index 00000000000..7f1a5138c4b
--- /dev/null
+++ b/python/qemu/utils/__init__.py
@@ -0,0 +1,45 @@
+"""
+QEMU development and testing utilities
+
+This package provides a small handful of utilities for performing
+various tasks not directly related to the launching of a VM.
+"""
+
+# Copyright (C) 2021 Red Hat Inc.
+#
+# Authors:
+#  John Snow <jsnow@redhat.com>
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+import re
+from typing import Optional
+
+# pylint: disable=import-error
+from .accel import kvm_available, list_accel, tcg_available
+
+
+__all__ = (
+    'get_info_usernet_hostfwd_port',
+    'kvm_available',
+    'list_accel',
+    'tcg_available',
+)
+
+
+def get_info_usernet_hostfwd_port(info_usernet_output: str) -> Optional[int]:
+    """
+    Returns the port given to the hostfwd parameter via info usernet
+
+    :param info_usernet_output: output generated by hmp command "info usernet"
+    :return: the port number allocated by the hostfwd option
+    """
+    for line in info_usernet_output.split('\r\n'):
+        regex = r'TCP.HOST_FORWARD.*127\.0\.0\.1\s+(\d+)\s+10\.'
+        match = re.search(regex, line)
+        if match is not None:
+            return int(match[1])
+    return None
diff --git a/python/qemu/accel.py b/python/qemu/utils/accel.py
similarity index 97%
rename from python/qemu/accel.py
rename to python/qemu/utils/accel.py
index 297933df2a3..386ff640ca8 100644
--- a/python/qemu/accel.py
+++ b/python/qemu/utils/accel.py
@@ -36,7 +36,7 @@ def list_accel(qemu_bin: str) -> List[str]:
     List accelerators enabled in the QEMU binary.
 
     @param qemu_bin (str): path to the QEMU binary.
-    @raise Exception: if failed to run `qemu -accel help`
+    @raise Exception: if failed to run ``qemu -accel help``
     @return a list of accelerator names.
     """
     if not qemu_bin:
diff --git a/tests/qapi-schema/flat-union-base-any.out b/python/qemu/utils/py.typed
similarity index 100%
rename from tests/qapi-schema/flat-union-base-any.out
rename to python/qemu/utils/py.typed
diff --git a/python/setup.cfg b/python/setup.cfg
new file mode 100644
index 00000000000..417e937839b
--- /dev/null
+++ b/python/setup.cfg
@@ -0,0 +1,177 @@
+[metadata]
+name = qemu
+version = file:VERSION
+maintainer = QEMU Developer Team
+maintainer_email = qemu-devel@nongnu.org
+url = https://www.qemu.org/
+download_url = https://www.qemu.org/download/
+description = QEMU Python Build, Debug and SDK tooling.
+long_description = file:PACKAGE.rst
+long_description_content_type = text/x-rst
+classifiers =
+    Development Status :: 3 - Alpha
+    License :: OSI Approved :: GNU General Public License v2 (GPLv2)
+    Natural Language :: English
+    Operating System :: OS Independent
+    Programming Language :: Python :: 3 :: Only
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Typing :: Typed
+
+[options]
+python_requires = >= 3.6
+packages =
+    qemu.qmp
+    qemu.machine
+    qemu.utils
+    qemu.aqmp
+
+[options.package_data]
+* = py.typed
+
+[options.extras_require]
+# For the devel group, When adding new dependencies or bumping the minimum
+# version, use e.g. "pipenv install --dev pylint==3.0.0".
+# Subsequently, edit 'Pipfile' to remove e.g. 'pylint = "==3.0.0'.
+devel =
+    avocado-framework >= 90.0
+    flake8 >= 3.6.0
+    fusepy >= 2.0.4
+    isort >= 5.1.2
+    mypy >= 0.770
+    pylint >= 2.8.0
+    tox >= 3.18.0
+    urwid >= 2.1.2
+    urwid-readline >= 0.13
+    Pygments >= 2.9.0
+
+# Provides qom-fuse functionality
+fuse =
+    fusepy >= 2.0.4
+
+# AQMP TUI dependencies
+tui =
+    urwid >= 2.1.2
+    urwid-readline >= 0.13
+    Pygments >= 2.9.0
+
+[options.entry_points]
+console_scripts =
+    qom = qemu.qmp.qom:main
+    qom-set = qemu.qmp.qom:QOMSet.entry_point
+    qom-get = qemu.qmp.qom:QOMGet.entry_point
+    qom-list = qemu.qmp.qom:QOMList.entry_point
+    qom-tree = qemu.qmp.qom:QOMTree.entry_point
+    qom-fuse = qemu.qmp.qom_fuse:QOMFuse.entry_point [fuse]
+    qemu-ga-client = qemu.qmp.qemu_ga_client:main
+    qmp-shell = qemu.qmp.qmp_shell:main
+    aqmp-tui = qemu.aqmp.aqmp_tui:main [tui]
+
+[flake8]
+extend-ignore = E722  # Prefer pylint's bare-except checks to flake8's
+exclude = __pycache__,
+
+[mypy]
+strict = True
+python_version = 3.6
+warn_unused_configs = True
+namespace_packages = True
+
+[mypy-qemu.qmp.qom_fuse]
+# fusepy has no type stubs:
+allow_subclassing_any = True
+
+[mypy-qemu.aqmp.aqmp_tui]
+# urwid and urwid_readline have no type stubs:
+allow_subclassing_any = True
+
+# The following missing import directives are because these libraries do not
+# provide type stubs. Allow them on an as-needed basis for mypy.
+[mypy-fuse]
+ignore_missing_imports = True
+
+[mypy-urwid]
+ignore_missing_imports = True
+
+[mypy-urwid_readline]
+ignore_missing_imports = True
+
+[mypy-pygments]
+ignore_missing_imports = True
+
+[pylint.messages control]
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=consider-using-f-string,
+        too-many-function-args,  # mypy handles this with less false positives.
+        no-member,  # mypy also handles this better.
+
+[pylint.basic]
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _,   # By convention: Unused variable
+           fh,  # fh = open(...)
+           fd,  # fd = os.open(...)
+           c,   # for c in string: ...
+           T,   # for TypeVars. See pylint#3401
+
+[pylint.similarities]
+# Ignore imports when computing similarities.
+ignore-imports=yes
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+# TODO: Remove after we opt in to Pylint 2.8.3. See commit msg.
+min-similarity-lines=6
+
+
+[isort]
+force_grid_wrap=4
+force_sort_within_sections=True
+include_trailing_comma=True
+line_length=72
+lines_after_imports=2
+multi_line_output=3
+
+# tox (https://tox.readthedocs.io/) is a tool for running tests in
+# multiple virtualenvs. This configuration file will run the test suite
+# on all supported python versions. To use it, "pip install tox" and
+# then run "tox" from this directory. You will need all of these versions
+# of python available on your system to run this test.
+
+[tox:tox]
+envlist = py36, py37, py38, py39, py310
+skip_missing_interpreters = true
+
+[testenv]
+allowlist_externals = make
+deps =
+    .[devel]
+    .[fuse]  # Workaround to trigger tox venv rebuild
+    .[tui]   # Workaround to trigger tox venv rebuild
+commands =
+    make check
+
+# Coverage.py [https://coverage.readthedocs.io/en/latest/] is a tool for
+# measuring code coverage of Python programs. It monitors your program,
+# noting which parts of the code have been executed, then analyzes the
+# source to identify code that could have been executed but was not.
+
+[coverage:run]
+concurrency = multiprocessing
+source = qemu/
+parallel = true
diff --git a/python/setup.py b/python/setup.py
new file mode 100755
index 00000000000..2014f81b757
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+"""
+QEMU tooling installer script
+Copyright (c) 2020-2021 John Snow for Red Hat, Inc.
+"""
+
+import setuptools
+import pkg_resources
+
+
+def main():
+    """
+    QEMU tooling installer
+    """
+
+    # https://medium.com/@daveshawley/safely-using-setup-cfg-for-metadata-1babbe54c108
+    pkg_resources.require('setuptools>=39.2')
+
+    setuptools.setup()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/tests/flake8.sh b/python/tests/flake8.sh
new file mode 100755
index 00000000000..1cd7d40fad8
--- /dev/null
+++ b/python/tests/flake8.sh
@@ -0,0 +1,2 @@
+#!/bin/sh -e
+python3 -m flake8 qemu/
diff --git a/python/tests/iotests-mypy.sh b/python/tests/iotests-mypy.sh
new file mode 100755
index 00000000000..ee764708199
--- /dev/null
+++ b/python/tests/iotests-mypy.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+
+cd ../tests/qemu-iotests/
+python3 -m linters --mypy
diff --git a/python/tests/iotests-pylint.sh b/python/tests/iotests-pylint.sh
new file mode 100755
index 00000000000..4cae03424b4
--- /dev/null
+++ b/python/tests/iotests-pylint.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+
+cd ../tests/qemu-iotests/
+python3 -m linters --pylint
diff --git a/python/tests/isort.sh b/python/tests/isort.sh
new file mode 100755
index 00000000000..4480405bfb0
--- /dev/null
+++ b/python/tests/isort.sh
@@ -0,0 +1,2 @@
+#!/bin/sh -e
+python3 -m isort -c qemu/
diff --git a/python/tests/mypy.sh b/python/tests/mypy.sh
new file mode 100755
index 00000000000..5f980f563bb
--- /dev/null
+++ b/python/tests/mypy.sh
@@ -0,0 +1,2 @@
+#!/bin/sh -e
+python3 -m mypy -p qemu
diff --git a/python/tests/protocol.py b/python/tests/protocol.py
new file mode 100644
index 00000000000..5cd7938be35
--- /dev/null
+++ b/python/tests/protocol.py
@@ -0,0 +1,583 @@
+import asyncio
+from contextlib import contextmanager
+import os
+import socket
+from tempfile import TemporaryDirectory
+
+import avocado
+
+from qemu.aqmp import ConnectError, Runstate
+from qemu.aqmp.protocol import AsyncProtocol, StateError
+from qemu.aqmp.util import asyncio_run, create_task
+
+
+class NullProtocol(AsyncProtocol[None]):
+    """
+    NullProtocol is a test mockup of an AsyncProtocol implementation.
+
+    It adds a fake_session instance variable that enables a code path
+    that bypasses the actual connection logic, but still allows the
+    reader/writers to start.
+
+    Because the message type is defined as None, an asyncio.Event named
+    'trigger_input' is created that prohibits the reader from
+    incessantly being able to yield None; this event can be poked to
+    simulate an incoming message.
+
+    For testing symmetry with do_recv, an interface is added to "send" a
+    Null message.
+
+    For testing purposes, a "simulate_disconnection" method is also
+    added which allows us to trigger a bottom half disconnect without
+    injecting any real errors into the reader/writer loops; in essence
+    it performs exactly half of what disconnect() normally does.
+    """
+    def __init__(self, name=None):
+        self.fake_session = False
+        self.trigger_input: asyncio.Event
+        super().__init__(name)
+
+    async def _establish_session(self):
+        self.trigger_input = asyncio.Event()
+        await super()._establish_session()
+
+    async def _do_accept(self, address, ssl=None):
+        if not self.fake_session:
+            await super()._do_accept(address, ssl)
+
+    async def _do_connect(self, address, ssl=None):
+        if not self.fake_session:
+            await super()._do_connect(address, ssl)
+
+    async def _do_recv(self) -> None:
+        await self.trigger_input.wait()
+        self.trigger_input.clear()
+
+    def _do_send(self, msg: None) -> None:
+        pass
+
+    async def send_msg(self) -> None:
+        await self._outgoing.put(None)
+
+    async def simulate_disconnect(self) -> None:
+        """
+        Simulates a bottom-half disconnect.
+
+        This method schedules a disconnection but does not wait for it
+        to complete. This is used to put the loop into the DISCONNECTING
+        state without fully quiescing it back to IDLE. This is normally
+        something you cannot coax AsyncProtocol to do on purpose, but it
+        will be similar to what happens with an unhandled Exception in
+        the reader/writer.
+
+        Under normal circumstances, the library design requires you to
+        await on disconnect(), which awaits the disconnect task and
+        returns bottom half errors as a pre-condition to allowing the
+        loop to return back to IDLE.
+        """
+        self._schedule_disconnect()
+
+
+class LineProtocol(AsyncProtocol[str]):
+    def __init__(self, name=None):
+        super().__init__(name)
+        self.rx_history = []
+
+    async def _do_recv(self) -> str:
+        raw = await self._readline()
+        msg = raw.decode()
+        self.rx_history.append(msg)
+        return msg
+
+    def _do_send(self, msg: str) -> None:
+        assert self._writer is not None
+        self._writer.write(msg.encode() + b'\n')
+
+    async def send_msg(self, msg: str) -> None:
+        await self._outgoing.put(msg)
+
+
+def run_as_task(coro, allow_cancellation=False):
+    """
+    Run a given coroutine as a task.
+
+    Optionally, wrap it in a try..except block that allows this
+    coroutine to be canceled gracefully.
+    """
+    async def _runner():
+        try:
+            await coro
+        except asyncio.CancelledError:
+            if allow_cancellation:
+                return
+            raise
+    return create_task(_runner())
+
+
+@contextmanager
+def jammed_socket():
+    """
+    Opens up a random unused TCP port on localhost, then jams it.
+    """
+    socks = []
+
+    try:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        sock.bind(('127.0.0.1', 0))
+        sock.listen(1)
+        address = sock.getsockname()
+
+        socks.append(sock)
+
+        # I don't *fully* understand why, but it takes *two* un-accepted
+        # connections to start jamming the socket.
+        for _ in range(2):
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(address)
+            socks.append(sock)
+
+        yield address
+
+    finally:
+        for sock in socks:
+            sock.close()
+
+
+class Smoke(avocado.Test):
+
+    def setUp(self):
+        self.proto = NullProtocol()
+
+    def test__repr__(self):
+        self.assertEqual(
+            repr(self.proto),
+            "<NullProtocol runstate=IDLE>"
+        )
+
+    def testRunstate(self):
+        self.assertEqual(
+            self.proto.runstate,
+            Runstate.IDLE
+        )
+
+    def testDefaultName(self):
+        self.assertEqual(
+            self.proto.name,
+            None
+        )
+
+    def testLogger(self):
+        self.assertEqual(
+            self.proto.logger.name,
+            'qemu.aqmp.protocol'
+        )
+
+    def testName(self):
+        self.proto = NullProtocol('Steve')
+
+        self.assertEqual(
+            self.proto.name,
+            'Steve'
+        )
+
+        self.assertEqual(
+            self.proto.logger.name,
+            'qemu.aqmp.protocol.Steve'
+        )
+
+        self.assertEqual(
+            repr(self.proto),
+            "<NullProtocol name='Steve' runstate=IDLE>"
+        )
+
+
+class TestBase(avocado.Test):
+
+    def setUp(self):
+        self.proto = NullProtocol(type(self).__name__)
+        self.assertEqual(self.proto.runstate, Runstate.IDLE)
+        self.runstate_watcher = None
+
+    def tearDown(self):
+        self.assertEqual(self.proto.runstate, Runstate.IDLE)
+
+    async def _asyncSetUp(self):
+        pass
+
+    async def _asyncTearDown(self):
+        if self.runstate_watcher:
+            await self.runstate_watcher
+
+    @staticmethod
+    def async_test(async_test_method):
+        """
+        Decorator; adds SetUp and TearDown to async tests.
+        """
+        async def _wrapper(self, *args, **kwargs):
+            loop = asyncio.get_event_loop()
+            loop.set_debug(True)
+
+            await self._asyncSetUp()
+            await async_test_method(self, *args, **kwargs)
+            await self._asyncTearDown()
+
+        return _wrapper
+
+    # Definitions
+
+    # The states we expect a "bad" connect/accept attempt to transition through
+    BAD_CONNECTION_STATES = (
+        Runstate.CONNECTING,
+        Runstate.DISCONNECTING,
+        Runstate.IDLE,
+    )
+
+    # The states we expect a "good" session to transition through
+    GOOD_CONNECTION_STATES = (
+        Runstate.CONNECTING,
+        Runstate.RUNNING,
+        Runstate.DISCONNECTING,
+        Runstate.IDLE,
+    )
+
+    # Helpers
+
+    async def _watch_runstates(self, *states):
+        """
+        This launches a task alongside (most) tests below to confirm that
+        the sequence of runstate changes that occur is exactly as
+        anticipated.
+        """
+        async def _watcher():
+            for state in states:
+                new_state = await self.proto.runstate_changed()
+                self.assertEqual(
+                    new_state,
+                    state,
+                    msg=f"Expected state '{state.name}'",
+                )
+
+        self.runstate_watcher = create_task(_watcher())
+        # Kick the loop and force the task to block on the event.
+        await asyncio.sleep(0)
+
+
+class State(TestBase):
+
+    @TestBase.async_test
+    async def testSuperfluousDisconnect(self):
+        """
+        Test calling disconnect() while already disconnected.
+        """
+        await self._watch_runstates(
+            Runstate.DISCONNECTING,
+            Runstate.IDLE,
+        )
+        await self.proto.disconnect()
+
+
+class Connect(TestBase):
+    """
+    Tests primarily related to calling Connect().
+    """
+    async def _bad_connection(self, family: str):
+        assert family in ('INET', 'UNIX')
+
+        if family == 'INET':
+            await self.proto.connect(('127.0.0.1', 0))
+        elif family == 'UNIX':
+            await self.proto.connect('/dev/null')
+
+    async def _hanging_connection(self):
+        with jammed_socket() as addr:
+            await self.proto.connect(addr)
+
+    async def _bad_connection_test(self, family: str):
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+
+        with self.assertRaises(ConnectError) as context:
+            await self._bad_connection(family)
+
+        self.assertIsInstance(context.exception.exc, OSError)
+        self.assertEqual(
+            context.exception.error_message,
+            "Failed to establish connection"
+        )
+
+    @TestBase.async_test
+    async def testBadINET(self):
+        """
+        Test an immediately rejected call to an IP target.
+        """
+        await self._bad_connection_test('INET')
+
+    @TestBase.async_test
+    async def testBadUNIX(self):
+        """
+        Test an immediately rejected call to a UNIX socket target.
+        """
+        await self._bad_connection_test('UNIX')
+
+    @TestBase.async_test
+    async def testCancellation(self):
+        """
+        Test what happens when a connection attempt is aborted.
+        """
+        # Note that accept() cannot be cancelled outright, as it isn't a task.
+        # However, we can wrap it in a task and cancel *that*.
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+
+        # This is insider baseball, but the connection attempt has
+        # yielded *just* before the actual connection attempt, so kick
+        # the loop to make sure it's truly wedged.
+        await asyncio.sleep(0)
+
+        task.cancel()
+        await task
+
+    @TestBase.async_test
+    async def testTimeout(self):
+        """
+        Test what happens when a connection attempt times out.
+        """
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection())
+
+        # More insider baseball: to improve the speed of this test while
+        # guaranteeing that the connection even gets a chance to start,
+        # verify that the connection hangs *first*, then await the
+        # result of the task with a nearly-zero timeout.
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+        await asyncio.sleep(0)
+
+        with self.assertRaises(asyncio.TimeoutError):
+            await asyncio.wait_for(task, timeout=0)
+
+    @TestBase.async_test
+    async def testRequire(self):
+        """
+        Test what happens when a connection attempt is made while CONNECTING.
+        """
+        await self._watch_runstates(*self.BAD_CONNECTION_STATES)
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        state = await self.proto.runstate_changed()
+        self.assertEqual(state, Runstate.CONNECTING)
+
+        with self.assertRaises(StateError) as context:
+            await self._bad_connection('UNIX')
+
+        self.assertEqual(
+            context.exception.error_message,
+            "NullProtocol is currently connecting."
+        )
+        self.assertEqual(context.exception.state, Runstate.CONNECTING)
+        self.assertEqual(context.exception.required, Runstate.IDLE)
+
+        task.cancel()
+        await task
+
+    @TestBase.async_test
+    async def testImplicitRunstateInit(self):
+        """
+        Test what happens if we do not wait on the runstate event until
+        AFTER a connection is made, i.e., connect()/accept() themselves
+        initialize the runstate event. All of the above tests force the
+        initialization by waiting on the runstate *first*.
+        """
+        task = run_as_task(self._hanging_connection(), allow_cancellation=True)
+
+        # Kick the loop to coerce the state change
+        await asyncio.sleep(0)
+        assert self.proto.runstate == Runstate.CONNECTING
+
+        # We already missed the transition to CONNECTING
+        await self._watch_runstates(Runstate.DISCONNECTING, Runstate.IDLE)
+
+        task.cancel()
+        await task
+
+
+class Accept(Connect):
+    """
+    All of the same tests as Connect, but using the accept() interface.
+    """
+    async def _bad_connection(self, family: str):
+        assert family in ('INET', 'UNIX')
+
+        if family == 'INET':
+            await self.proto.accept(('example.com', 1))
+        elif family == 'UNIX':
+            await self.proto.accept('/dev/null')
+
+    async def _hanging_connection(self):
+        with TemporaryDirectory(suffix='.aqmp') as tmpdir:
+            sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
+            await self.proto.accept(sock)
+
+
+class FakeSession(TestBase):
+
+    def setUp(self):
+        super().setUp()
+        self.proto.fake_session = True
+
+    async def _asyncSetUp(self):
+        await super()._asyncSetUp()
+        await self._watch_runstates(*self.GOOD_CONNECTION_STATES)
+
+    async def _asyncTearDown(self):
+        await self.proto.disconnect()
+        await super()._asyncTearDown()
+
+    ####
+
+    @TestBase.async_test
+    async def testFakeConnect(self):
+
+        """Test the full state lifecycle (via connect) with a no-op session."""
+        await self.proto.connect('/not/a/real/path')
+        self.assertEqual(self.proto.runstate, Runstate.RUNNING)
+
+    @TestBase.async_test
+    async def testFakeAccept(self):
+        """Test the full state lifecycle (via accept) with a no-op session."""
+        await self.proto.accept('/not/a/real/path')
+        self.assertEqual(self.proto.runstate, Runstate.RUNNING)
+
+    @TestBase.async_test
+    async def testFakeRecv(self):
+        """Test receiving a fake/null message."""
+        await self.proto.accept('/not/a/real/path')
+
+        logname = self.proto.logger.name
+        with self.assertLogs(logname, level='DEBUG') as context:
+            self.proto.trigger_input.set()
+            self.proto.trigger_input.clear()
+            await asyncio.sleep(0)  # Kick reader.
+
+        self.assertEqual(
+            context.output,
+            [f"DEBUG:{logname}:<-- None"],
+        )
+
+    @TestBase.async_test
+    async def testFakeSend(self):
+        """Test sending a fake/null message."""
+        await self.proto.accept('/not/a/real/path')
+
+        logname = self.proto.logger.name
+        with self.assertLogs(logname, level='DEBUG') as context:
+            # Cheat: Send a Null message to nobody.
+            await self.proto.send_msg()
+            # Kick writer; awaiting on a queue.put isn't sufficient to yield.
+            await asyncio.sleep(0)
+
+        self.assertEqual(
+            context.output,
+            [f"DEBUG:{logname}:--> None"],
+        )
+
+    async def _prod_session_api(
+            self,
+            current_state: Runstate,
+            error_message: str,
+            accept: bool = True
+    ):
+        with self.assertRaises(StateError) as context:
+            if accept:
+                await self.proto.accept('/not/a/real/path')
+            else:
+                await self.proto.connect('/not/a/real/path')
+
+        self.assertEqual(context.exception.error_message, error_message)
+        self.assertEqual(context.exception.state, current_state)
+        self.assertEqual(context.exception.required, Runstate.IDLE)
+
+    @TestBase.async_test
+    async def testAcceptRequireRunning(self):
+        """Test that accept() cannot be called when Runstate=RUNNING"""
+        await self.proto.accept('/not/a/real/path')
+
+        await self._prod_session_api(
+            Runstate.RUNNING,
+            "NullProtocol is already connected and running.",
+            accept=True,
+        )
+
+    @TestBase.async_test
+    async def testConnectRequireRunning(self):
+        """Test that connect() cannot be called when Runstate=RUNNING"""
+        await self.proto.accept('/not/a/real/path')
+
+        await self._prod_session_api(
+            Runstate.RUNNING,
+            "NullProtocol is already connected and running.",
+            accept=False,
+        )
+
+    @TestBase.async_test
+    async def testAcceptRequireDisconnecting(self):
+        """Test that accept() cannot be called when Runstate=DISCONNECTING"""
+        await self.proto.accept('/not/a/real/path')
+
+        # Cheat: force a disconnect.
+        await self.proto.simulate_disconnect()
+
+        await self._prod_session_api(
+            Runstate.DISCONNECTING,
+            ("NullProtocol is disconnecting."
+             " Call disconnect() to return to IDLE state."),
+            accept=True,
+        )
+
+    @TestBase.async_test
+    async def testConnectRequireDisconnecting(self):
+        """Test that connect() cannot be called when Runstate=DISCONNECTING"""
+        await self.proto.accept('/not/a/real/path')
+
+        # Cheat: force a disconnect.
+        await self.proto.simulate_disconnect()
+
+        await self._prod_session_api(
+            Runstate.DISCONNECTING,
+            ("NullProtocol is disconnecting."
+             " Call disconnect() to return to IDLE state."),
+            accept=False,
+        )
+
+
+class SimpleSession(TestBase):
+
+    def setUp(self):
+        super().setUp()
+        self.server = LineProtocol(type(self).__name__ + '-server')
+
+    async def _asyncSetUp(self):
+        await super()._asyncSetUp()
+        await self._watch_runstates(*self.GOOD_CONNECTION_STATES)
+
+    async def _asyncTearDown(self):
+        await self.proto.disconnect()
+        try:
+            await self.server.disconnect()
+        except EOFError:
+            pass
+        await super()._asyncTearDown()
+
+    @TestBase.async_test
+    async def testSmoke(self):
+        with TemporaryDirectory(suffix='.aqmp') as tmpdir:
+            sock = os.path.join(tmpdir, type(self.proto).__name__ + ".sock")
+            server_task = create_task(self.server.accept(sock))
+
+            # give the server a chance to start listening [...]
+            await asyncio.sleep(0)
+            await self.proto.connect(sock)
diff --git a/python/tests/pylint.sh b/python/tests/pylint.sh
new file mode 100755
index 00000000000..4b10b34db7c
--- /dev/null
+++ b/python/tests/pylint.sh
@@ -0,0 +1,2 @@
+#!/bin/sh -e
+python3 -m pylint qemu/
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6d227924d06..1d3dd9cb48e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -127,6 +127,64 @@
       'extents': ['ImageInfo']
   } }
 
+##
+# @ImageInfoSpecificRbd:
+#
+# @encryption-format: Image encryption format
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificRbd',
+  'data': {
+      '*encryption-format': 'RbdImageEncryptionFormat'
+  } }
+
+##
+# @ImageInfoSpecificKind:
+#
+# @luks: Since 2.7
+# @rbd: Since 6.1
+#
+# Since: 1.7
+##
+{ 'enum': 'ImageInfoSpecificKind',
+  'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] }
+
+##
+# @ImageInfoSpecificQCow2Wrapper:
+#
+# Since: 1.7
+##
+{ 'struct': 'ImageInfoSpecificQCow2Wrapper',
+  'data': { 'data': 'ImageInfoSpecificQCow2' } }
+
+##
+# @ImageInfoSpecificVmdkWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificVmdkWrapper',
+  'data': { 'data': 'ImageInfoSpecificVmdk' } }
+
+##
+# @ImageInfoSpecificLUKSWrapper:
+#
+# Since: 2.7
+##
+{ 'struct': 'ImageInfoSpecificLUKSWrapper',
+  'data': { 'data': 'QCryptoBlockInfoLUKS' } }
+# If we need to add block driver specific parameters for
+# LUKS in future, then we'll subclass QCryptoBlockInfoLUKS
+# to define a ImageInfoSpecificLUKS
+
+##
+# @ImageInfoSpecificRbdWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ImageInfoSpecificRbdWrapper',
+  'data': { 'data': 'ImageInfoSpecificRbd' } }
+
 ##
 # @ImageInfoSpecific:
 #
@@ -135,13 +193,13 @@
 # Since: 1.7
 ##
 { 'union': 'ImageInfoSpecific',
+  'base': { 'type': 'ImageInfoSpecificKind' },
+  'discriminator': 'type',
   'data': {
-      'qcow2': 'ImageInfoSpecificQCow2',
-      'vmdk': 'ImageInfoSpecificVmdk',
-      # If we need to add block driver specific parameters for
-      # LUKS in future, then we'll subclass QCryptoBlockInfoLUKS
-      # to define a ImageInfoSpecificLUKS
-      'luks': 'QCryptoBlockInfoLUKS'
+      'qcow2': 'ImageInfoSpecificQCow2Wrapper',
+      'vmdk': 'ImageInfoSpecificVmdkWrapper',
+      'luks': 'ImageInfoSpecificLUKSWrapper',
+      'rbd': 'ImageInfoSpecificRbdWrapper'
   } }
 
 ##
@@ -261,6 +319,9 @@
 #         images in the chain)) before reaching one for which the
 #         range is allocated
 #
+# @present: true if this layer provides the data, false if adding a backing
+#           layer could impact this region (since 6.1)
+#
 # @offset: if present, the image file stores the data for this range
 #          in raw format at the given (host) offset
 #
@@ -271,8 +332,8 @@
 ##
 { 'struct': 'MapEntry',
   'data': {'start': 'int', 'length': 'int', 'data': 'bool',
-           'zero': 'bool', 'depth': 'int', '*offset': 'int',
-           '*filename': 'str' } }
+           'zero': 'bool', 'depth': 'int', 'present': 'bool',
+           '*offset': 'int', '*filename': 'str' } }
 
 ##
 # @BlockdevCacheInfo:
@@ -430,11 +491,11 @@
 # @granularity: granularity of the dirty bitmap in bytes (since 1.4)
 #
 # @recording: true if the bitmap is recording new writes from the guest.
-#             Replaces `active` and `disabled` statuses. (since 4.0)
+#             Replaces ``active`` and ``disabled`` statuses. (since 4.0)
 #
 # @busy: true if the bitmap is in-use by some operation (NBD or jobs)
 #        and cannot be modified via QMP or used by another operation.
-#        Replaces `locked` and `frozen` statuses. (since 4.0)
+#        Replaces ``locked`` and ``frozen`` statuses. (since 4.0)
 #
 # @persistent: true if the bitmap was stored on disk, is scheduled to be stored
 #              on disk, or both. (since 4.0)
@@ -897,7 +958,8 @@
   'discriminator': 'driver',
   'data': {
       'file': 'BlockStatsSpecificFile',
-      'host_device': 'BlockStatsSpecificFile',
+      'host_device': { 'type': 'BlockStatsSpecificFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
       'nvme': 'BlockStatsSpecificNvme' } }
 
 ##
@@ -1376,6 +1438,9 @@
 #
 # @x-perf: Performance options. (Since 6.0)
 #
+# Features:
+# @unstable: Member @x-perf is experimental.
+#
 # Note: @on-source-error and @on-target-error only affect background
 #       I/O.  If an error occurs during a guest write request, the device's
 #       rerror/werror actions will be used.
@@ -1390,7 +1455,9 @@
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError',
             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
-            '*filter-node-name': 'str', '*x-perf': 'BackupPerf'  } }
+            '*filter-node-name': 'str',
+            '*x-perf': { 'type': 'BackupPerf',
+                         'features': [ 'unstable' ] } } }
 
 ##
 # @DriveBackup:
@@ -1642,6 +1709,9 @@
 # The operation can be stopped before it has completed using the
 # block-job-cancel command.
 #
+# Features:
+# @deprecated: This command is deprecated. Use @blockdev-backup instead.
+#
 # Returns: - nothing on success
 #          - If @device is not a valid block device, GenericError
 #
@@ -1657,7 +1727,7 @@
 #
 ##
 { 'command': 'drive-backup', 'boxed': true,
-  'data': 'DriveBackup' }
+  'data': 'DriveBackup', 'features': ['deprecated'] }
 
 ##
 # @blockdev-backup:
@@ -1854,9 +1924,13 @@
 #
 # Get the block graph.
 #
+# Features:
+# @unstable: This command is meant for debugging.
+#
 # Since: 4.0
 ##
-{ 'command': 'x-debug-query-block-graph', 'returns': 'XDbgBlockGraph' }
+{ 'command': 'x-debug-query-block-graph', 'returns': 'XDbgBlockGraph',
+  'features': [ 'unstable' ] }
 
 ##
 # @drive-mirror:
@@ -2195,6 +2269,9 @@
 #
 # Get bitmap SHA256.
 #
+# Features:
+# @unstable: This command is meant for debugging.
+#
 # Returns: - BlockDirtyBitmapSha256 on success
 #          - If @node is not a valid block device, DeviceNotFound
 #          - If @name is not found or if hashing has failed, GenericError with an
@@ -2203,7 +2280,8 @@
 # Since: 2.10
 ##
 { 'command': 'x-debug-block-dirty-bitmap-sha256',
-  'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+  'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256',
+  'features': [ 'unstable' ] }
 
 ##
 # @blockdev-mirror:
@@ -2433,27 +2511,57 @@
 #
 # Properties for throttle-group objects.
 #
-# The options starting with x- are aliases for the same key without x- in
-# the @limits object. As indicated by the x- prefix, this is not a stable
-# interface and may be removed or changed incompatibly in the future. Use
-# @limits for a supported stable interface.
-#
 # @limits: limits to apply for this throttle group
 #
+# Features:
+# @unstable: All members starting with x- are aliases for the same key
+#            without x- in the @limits object.  This is not a stable
+#            interface and may be removed or changed incompatibly in
+#            the future.  Use @limits for a supported stable
+#            interface.
+#
 # Since: 2.11
 ##
 { 'struct': 'ThrottleGroupProperties',
   'data': { '*limits': 'ThrottleLimits',
-            '*x-iops-total' : 'int', '*x-iops-total-max' : 'int',
-            '*x-iops-total-max-length' : 'int', '*x-iops-read' : 'int',
-            '*x-iops-read-max' : 'int', '*x-iops-read-max-length' : 'int',
-            '*x-iops-write' : 'int', '*x-iops-write-max' : 'int',
-            '*x-iops-write-max-length' : 'int', '*x-bps-total' : 'int',
-            '*x-bps-total-max' : 'int', '*x-bps-total-max-length' : 'int',
-            '*x-bps-read' : 'int', '*x-bps-read-max' : 'int',
-            '*x-bps-read-max-length' : 'int', '*x-bps-write' : 'int',
-            '*x-bps-write-max' : 'int', '*x-bps-write-max-length' : 'int',
-            '*x-iops-size' : 'int' } }
+            '*x-iops-total': { 'type': 'int',
+                               'features': [ 'unstable' ] },
+            '*x-iops-total-max': { 'type': 'int',
+                                   'features': [ 'unstable' ] },
+            '*x-iops-total-max-length': { 'type': 'int',
+                                          'features': [ 'unstable' ] },
+            '*x-iops-read': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-iops-read-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-iops-read-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-iops-write': { 'type': 'int',
+                               'features': [ 'unstable' ] },
+            '*x-iops-write-max': { 'type': 'int',
+                                   'features': [ 'unstable' ] },
+            '*x-iops-write-max-length': { 'type': 'int',
+                                          'features': [ 'unstable' ] },
+            '*x-bps-total': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-bps-total-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-bps-total-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-bps-read': { 'type': 'int',
+                             'features': [ 'unstable' ] },
+            '*x-bps-read-max': { 'type': 'int',
+                                 'features': [ 'unstable' ] },
+            '*x-bps-read-max-length': { 'type': 'int',
+                                        'features': [ 'unstable' ] },
+            '*x-bps-write': { 'type': 'int',
+                              'features': [ 'unstable' ] },
+            '*x-bps-write-max': { 'type': 'int',
+                                  'features': [ 'unstable' ] },
+            '*x-bps-write-max-length': { 'type': 'int',
+                                         'features': [ 'unstable' ] },
+            '*x-iops-size': { 'type': 'int',
+                              'features': [ 'unstable' ] } } }
 
 ##
 # @block-stream:
@@ -2779,7 +2887,7 @@
 ##
 { 'enum': 'BlockdevAioOptions',
   'data': [ 'threads', 'native',
-            { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] }
+            { 'name': 'io_uring', 'if': 'CONFIG_LINUX_IO_URING' } ] }
 
 ##
 # @BlockdevCacheOptions:
@@ -2808,17 +2916,20 @@
 # @blklogwrites: Since 3.0
 # @blkreplay: Since 4.2
 # @compress: Since 5.0
+# @copy-before-write: Since 6.2
 #
 # Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
-            'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps',
-            'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi',
+            'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
+            'file', 'ftp', 'ftps', 'gluster',
+            {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
+            {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
+            'http', 'https', 'iscsi',
             'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
             'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
-            { 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
-            'sheepdog',
+            { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
             'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
 
 ##
@@ -2831,6 +2942,12 @@
 #              for this device (default: none, forward the commands via SG_IO;
 #              since 2.11)
 # @aio: AIO backend (default: threads) (since: 2.8)
+# @aio-max-batch: maximum number of requests to batch together into a single
+#                 submission in the AIO backend. The smallest value between
+#                 this and the aio-max-batch value of the IOThread object is
+#                 chosen.
+#                 0 means that the AIO backend will handle it automatically.
+#                 (default: 0, since 6.2)
 # @locking: whether to enable file locking. If set to 'auto', only enable
 #           when Open File Descriptor (OFD) locking API is available
 #           (default: auto, since 2.10)
@@ -2851,6 +2968,7 @@
 #                          read-only when the last writer is detached. This
 #                          allows giving QEMU write permissions only on demand
 #                          when an operation actually needs write access.
+# @unstable: Member x-check-cache-dropped is meant for debugging.
 #
 # Since: 2.9
 ##
@@ -2859,11 +2977,13 @@
             '*pr-manager': 'str',
             '*locking': 'OnOffAuto',
             '*aio': 'BlockdevAioOptions',
+            '*aio-max-batch': 'int',
             '*drop-cache': {'type': 'bool',
-                            'if': 'defined(CONFIG_LINUX)'},
-            '*x-check-cache-dropped': 'bool' },
+                            'if': 'CONFIG_LINUX'},
+            '*x-check-cache-dropped': { 'type': 'bool',
+                                        'features': [ 'unstable' ] } },
   'features': [ { 'name': 'dynamic-auto-read-only',
-                  'if': 'defined(CONFIG_POSIX)' } ] }
+                  'if': 'CONFIG_POSIX' } ] }
 
 ##
 # @BlockdevOptionsNull:
@@ -3067,6 +3187,7 @@
 
 ##
 # @BlockdevQcow2EncryptionFormat:
+#
 # @aes: AES-CBC with plain64 initialization vectors
 #
 # Since: 2.10
@@ -3187,11 +3308,12 @@
 #
 # @md5: The given hash is an md5 hash
 # @sha1: The given hash is an sha1 hash
+# @sha256: The given hash is an sha256 hash
 #
 # Since: 2.12
 ##
 { 'enum': 'SshHostKeyCheckHashType',
-  'data': [ 'md5', 'sha1' ] }
+  'data': [ 'md5', 'sha1', 'sha256' ] }
 
 ##
 # @SshHostKeyHash:
@@ -3609,6 +3731,94 @@
 { 'enum': 'RbdAuthMode',
   'data': [ 'cephx', 'none' ] }
 
+##
+# @RbdImageEncryptionFormat:
+#
+# Since: 6.1
+##
+{ 'enum': 'RbdImageEncryptionFormat',
+  'data': [ 'luks', 'luks2' ] }
+
+##
+# @RbdEncryptionOptionsLUKSBase:
+#
+# @key-secret: ID of a QCryptoSecret object providing a passphrase
+#              for unlocking the encryption
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKSBase',
+  'data': { 'key-secret': 'str' } }
+
+##
+# @RbdEncryptionCreateOptionsLUKSBase:
+#
+# @cipher-alg: The encryption algorithm
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKSBase',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { '*cipher-alg': 'QCryptoCipherAlgorithm' } }
+
+##
+# @RbdEncryptionOptionsLUKS:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKS',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionOptionsLUKS2:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionOptionsLUKS2',
+  'base': 'RbdEncryptionOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionCreateOptionsLUKS:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKS',
+  'base': 'RbdEncryptionCreateOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionCreateOptionsLUKS2:
+#
+# Since: 6.1
+##
+{ 'struct': 'RbdEncryptionCreateOptionsLUKS2',
+  'base': 'RbdEncryptionCreateOptionsLUKSBase',
+  'data': { } }
+
+##
+# @RbdEncryptionOptions:
+#
+# Since: 6.1
+##
+{ 'union': 'RbdEncryptionOptions',
+  'base': { 'format': 'RbdImageEncryptionFormat' },
+  'discriminator': 'format',
+  'data': { 'luks': 'RbdEncryptionOptionsLUKS',
+            'luks2': 'RbdEncryptionOptionsLUKS2' } }
+
+##
+# @RbdEncryptionCreateOptions:
+#
+# Since: 6.1
+##
+{ 'union': 'RbdEncryptionCreateOptions',
+  'base': { 'format': 'RbdImageEncryptionFormat' },
+  'discriminator': 'format',
+  'data': { 'luks': 'RbdEncryptionCreateOptionsLUKS',
+            'luks2': 'RbdEncryptionCreateOptionsLUKS2' } }
+
 ##
 # @BlockdevOptionsRbd:
 #
@@ -3624,6 +3834,8 @@
 #
 # @snapshot: Ceph snapshot name.
 #
+# @encrypt: Image encryption options. (Since 6.1)
+#
 # @user: Ceph id name.
 #
 # @auth-client-required: Acceptable authentication modes.
@@ -3646,31 +3858,12 @@
             'image': 'str',
             '*conf': 'str',
             '*snapshot': 'str',
+            '*encrypt': 'RbdEncryptionOptions',
             '*user': 'str',
             '*auth-client-required': ['RbdAuthMode'],
             '*key-secret': 'str',
             '*server': ['InetSocketAddressBase'] } }
 
-##
-# @BlockdevOptionsSheepdog:
-#
-# Driver specific block device options for sheepdog
-#
-# @vdi: Virtual disk image name
-# @server: The Sheepdog server to connect to
-# @snap-id: Snapshot ID
-# @tag: Snapshot tag name
-#
-# Only one of @snap-id and @tag may be present.
-#
-# Since: 2.9
-##
-{ 'struct': 'BlockdevOptionsSheepdog',
-  'data': { 'server': 'SocketAddress',
-            'vdi': 'str',
-            '*snap-id': 'uint32',
-            '*tag': 'str' } }
-
 ##
 # @ReplicationMode:
 #
@@ -3683,7 +3876,7 @@
 # Since: 2.9
 ##
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ],
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @BlockdevOptionsReplication:
@@ -3702,7 +3895,7 @@
   'base': 'BlockdevOptionsGenericFormat',
   'data': { 'mode': 'ReplicationMode',
             '*top-id': 'str' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @NFSTransport:
@@ -3903,13 +4096,16 @@
 #                   future requests before a successful reconnect will
 #                   immediately fail. Default 0 (Since 4.2)
 #
+# Features:
+# @unstable: Member @x-dirty-bitmap is experimental.
+#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsNbd',
   'data': { 'server': 'SocketAddress',
             '*export': 'str',
             '*tls-creds': 'str',
-            '*x-dirty-bitmap': 'str',
+            '*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] },
             '*reconnect-delay': 'uint32' } }
 
 ##
@@ -3958,6 +4154,25 @@
   'base': 'BlockdevOptionsGenericFormat',
   'data': { '*bottom': 'str' } }
 
+##
+# @BlockdevOptionsCbw:
+#
+# Driver specific block device options for the copy-before-write driver,
+# which does so called copy-before-write operations: when data is
+# written to the filter, the filter first reads corresponding blocks
+# from its file child and copies them to @target child. After successfully
+# copying, the write request is propagated to file child. If copying
+# fails, the original write request is failed too and no data is written
+# to file child.
+#
+# @target: The target for copy-before-write operations.
+#
+# Since: 6.2
+##
+{ 'struct': 'BlockdevOptionsCbw',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'target': 'BlockdevRef' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -4010,14 +4225,17 @@
       'bochs':      'BlockdevOptionsGenericFormat',
       'cloop':      'BlockdevOptionsGenericFormat',
       'compress':   'BlockdevOptionsGenericFormat',
+      'copy-before-write':'BlockdevOptionsCbw',
       'copy-on-read':'BlockdevOptionsCor',
       'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
       'ftp':        'BlockdevOptionsCurlFtp',
       'ftps':       'BlockdevOptionsCurlFtps',
       'gluster':    'BlockdevOptionsGluster',
-      'host_cdrom': 'BlockdevOptionsFile',
-      'host_device':'BlockdevOptionsFile',
+      'host_cdrom':  { 'type': 'BlockdevOptionsFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
+      'host_device': { 'type': 'BlockdevOptionsFile',
+                       'if': 'HAVE_HOST_BLOCK_DEVICE' },
       'http':       'BlockdevOptionsCurlHttp',
       'https':      'BlockdevOptionsCurlHttps',
       'iscsi':      'BlockdevOptionsIscsi',
@@ -4036,8 +4254,7 @@
       'raw':        'BlockdevOptionsRaw',
       'rbd':        'BlockdevOptionsRbd',
       'replication': { 'type': 'BlockdevOptionsReplication',
-                       'if': 'defined(CONFIG_REPLICATION)' },
-      'sheepdog':   'BlockdevOptionsSheepdog',
+                       'if': 'CONFIG_REPLICATION' },
       'ssh':        'BlockdevOptionsSsh',
       'throttle':   'BlockdevOptionsThrottle',
       'vdi':        'BlockdevOptionsGenericFormat',
@@ -4130,15 +4347,17 @@
 { 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
 
 ##
-# @x-blockdev-reopen:
+# @blockdev-reopen:
 #
-# Reopens a block device using the given set of options. Any option
-# not specified will be reset to its default value regardless of its
-# previous status. If an option cannot be changed or a particular
+# Reopens one or more block devices using the given set of options.
+# Any option not specified will be reset to its default value regardless
+# of its previous status. If an option cannot be changed or a particular
 # driver does not support reopening then the command will return an
-# error.
+# error. All devices in the list are reopened in one transaction, so
+# if one of them fails then the whole transaction is cancelled.
 #
-# The top-level @node-name option (from BlockdevOptions) must be
+# The command receives a list of block devices to reopen. For each one
+# of them, the top-level @node-name option (from BlockdevOptions) must be
 # specified and is used to select the block device to be reopened.
 # Other @node-name options must be either omitted or set to the
 # current name of the appropriate node. This command won't change any
@@ -4158,18 +4377,18 @@
 #
 #  4) NULL: the current child (if any) is detached.
 #
-# Options (1) and (2) are supported in all cases, but at the moment
-# only @backing allows replacing or detaching an existing child.
+# Options (1) and (2) are supported in all cases. Option (3) is
+# supported for @file and @backing, and option (4) for @backing only.
 #
 # Unlike with blockdev-add, the @backing option must always be present
 # unless the node being reopened does not have a backing file and its
 # image does not have a default backing file name as part of its
 # metadata.
 #
-# Since: 4.0
+# Since: 6.1
 ##
-{ 'command': 'x-blockdev-reopen',
-  'data': 'BlockdevOptions', 'boxed': true }
+{ 'command': 'blockdev-reopen',
+  'data': { 'options': ['BlockdevOptions'] } }
 
 ##
 # @blockdev-del:
@@ -4213,8 +4432,8 @@
 # @size: Size of the virtual disk in bytes
 # @preallocation: Preallocation mode for the new image (default: off;
 #                 allowed values: off,
-#                 falloc (if defined CONFIG_POSIX_FALLOCATE),
-#                 full (if defined CONFIG_POSIX))
+#                 falloc (if CONFIG_POSIX_FALLOCATE),
+#                 full (if CONFIG_POSIX))
 # @nocow: Turn off copy-on-write (valid only on btrfs; default: off)
 # @extent-size-hint: Extent size hint to add to the image file; 0 for not
 #                    adding an extent size hint (default: 1 MB, since 5.1)
@@ -4237,8 +4456,8 @@
 # @size: Size of the virtual disk in bytes
 # @preallocation: Preallocation mode for the new image (default: off;
 #                 allowed values: off,
-#                 falloc (if defined CONFIG_GLUSTERFS_FALLOCATE),
-#                 full (if defined CONFIG_GLUSTERFS_ZEROFILL))
+#                 falloc (if CONFIG_GLUSTERFS_FALLOCATE),
+#                 full (if CONFIG_GLUSTERFS_ZEROFILL))
 #
 # Since: 2.12
 ##
@@ -4338,7 +4557,7 @@
 # Since: 5.1
 ##
 { 'enum': 'Qcow2CompressionType',
-  'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+  'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
 
 ##
 # @BlockdevCreateOptionsQcow2:
@@ -4418,13 +4637,15 @@
 #            point to a snapshot.
 # @size: Size of the virtual disk in bytes
 # @cluster-size: RBD object size
+# @encrypt: Image encryption options. (Since 6.1)
 #
 # Since: 2.12
 ##
 { 'struct': 'BlockdevCreateOptionsRbd',
   'data': { 'location':         'BlockdevOptionsRbd',
             'size':             'size',
-            '*cluster-size' :   'size' } }
+            '*cluster-size' :   'size',
+            '*encrypt' :        'RbdEncryptionCreateOptions' } }
 
 ##
 # @BlockdevVmdkSubformat:
@@ -4480,6 +4701,8 @@
 # @adapter-type: The adapter type used to fill in the descriptor. Default: ide.
 # @hwversion: Hardware version. The meaningful options are "4" or "6".
 #             Default: "4".
+# @toolsversion: VMware guest tools version.
+#                Default: "2147483647" (Since 6.2)
 # @zeroed-grain: Whether to enable zeroed-grain feature for sparse subformats.
 #                Default: false.
 #
@@ -4493,77 +4716,10 @@
             '*backing-file':    'str',
             '*adapter-type':    'BlockdevVmdkAdapterType',
             '*hwversion':       'str',
+            '*toolsversion':    'str',
             '*zeroed-grain':    'bool' } }
 
 
-##
-# @SheepdogRedundancyType:
-#
-# @full: Create a fully replicated vdi with x copies
-# @erasure-coded: Create an erasure coded vdi with x data strips and
-#                 y parity strips
-#
-# Since: 2.12
-##
-{ 'enum': 'SheepdogRedundancyType',
-  'data': [ 'full', 'erasure-coded' ] }
-
-##
-# @SheepdogRedundancyFull:
-#
-# @copies: Number of copies to use (between 1 and 31)
-#
-# Since: 2.12
-##
-{ 'struct': 'SheepdogRedundancyFull',
-  'data': { 'copies': 'int' }}
-
-##
-# @SheepdogRedundancyErasureCoded:
-#
-# @data-strips: Number of data strips to use (one of {2,4,8,16})
-# @parity-strips: Number of parity strips to use (between 1 and 15)
-#
-# Since: 2.12
-##
-{ 'struct': 'SheepdogRedundancyErasureCoded',
-  'data': { 'data-strips': 'int',
-            'parity-strips': 'int' }}
-
-##
-# @SheepdogRedundancy:
-#
-# Since: 2.12
-##
-{ 'union': 'SheepdogRedundancy',
-  'base': { 'type': 'SheepdogRedundancyType' },
-  'discriminator': 'type',
-  'data': { 'full': 'SheepdogRedundancyFull',
-            'erasure-coded': 'SheepdogRedundancyErasureCoded' } }
-
-##
-# @BlockdevCreateOptionsSheepdog:
-#
-# Driver specific image creation options for Sheepdog.
-#
-# @location: Where to store the new image file
-# @size: Size of the virtual disk in bytes
-# @backing-file: File name of a base image
-# @preallocation: Preallocation mode for the new image (default: off;
-#                 allowed values: off, full)
-# @redundancy: Redundancy of the image
-# @object-size: Object size of the image
-#
-# Since: 2.12
-##
-{ 'struct': 'BlockdevCreateOptionsSheepdog',
-  'data': { 'location':         'BlockdevOptionsSheepdog',
-            'size':             'size',
-            '*backing-file':    'str',
-            '*preallocation':   'PreallocMode',
-            '*redundancy':      'SheepdogRedundancy',
-            '*object-size':     'size' } }
-
 ##
 # @BlockdevCreateOptionsSsh:
 #
@@ -4687,7 +4843,6 @@
       'qcow2':          'BlockdevCreateOptionsQcow2',
       'qed':            'BlockdevCreateOptionsQed',
       'rbd':            'BlockdevCreateOptionsRbd',
-      'sheepdog':       'BlockdevCreateOptionsSheepdog',
       'ssh':            'BlockdevCreateOptionsSsh',
       'vdi':            'BlockdevCreateOptionsVdi',
       'vhdx':           'BlockdevCreateOptionsVhdx',
@@ -4771,13 +4926,17 @@
 #                   and replacement of an active keyslot
 #                   (possible loss of data if IO error happens)
 #
+# Features:
+# @unstable: This command is experimental.
+#
 # Since: 5.1
 ##
 { 'command': 'x-blockdev-amend',
   'data': { 'job-id': 'str',
             'node-name': 'str',
             'options': 'BlockdevAmendOptions',
-            '*force': 'bool' } }
+            '*force': 'bool' },
+  'features': [ 'unstable' ] }
 
 ##
 # @BlockErrorAction:
@@ -5148,16 +5307,18 @@
 #
 # @node: the name of the node that will be added.
 #
-# Note: this command is experimental, and its API is not stable. It
-#       does not support all kinds of operations, all kinds of children, nor
-#       all block drivers.
+# Features:
+# @unstable: This command is experimental, and its API is not stable.  It
+#            does not support all kinds of operations, all kinds of
+#            children, nor all block drivers.
 #
-#       FIXME Removing children from a quorum node means introducing gaps in the
-#       child indices. This cannot be represented in the 'children' list of
-#       BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
+#            FIXME Removing children from a quorum node means introducing
+#            gaps in the child indices. This cannot be represented in the
+#            'children' list of BlockdevOptionsQuorum, as returned by
+#            .bdrv_refresh_filename().
 #
-#       Warning: The data in a new quorum child MUST be consistent with that of
-#       the rest of the array.
+#            Warning: The data in a new quorum child MUST be consistent
+#            with that of the rest of the array.
 #
 # Since: 2.7
 #
@@ -5186,7 +5347,8 @@
 { 'command': 'x-blockdev-change',
   'data' : { 'parent': 'str',
              '*child': 'str',
-             '*node': 'str' } }
+             '*node': 'str' },
+  'features': [ 'unstable' ] }
 
 ##
 # @x-blockdev-set-iothread:
@@ -5203,8 +5365,9 @@
 # @force: true if the node and its children should be moved when a BlockBackend
 #         is already attached
 #
-# Note: this command is experimental and intended for test cases that need
-#       control over IOThreads only.
+# Features:
+# @unstable: This command is experimental and intended for test cases that
+#            need control over IOThreads only.
 #
 # Since: 2.12
 #
@@ -5226,7 +5389,8 @@
 { 'command': 'x-blockdev-set-iothread',
   'data' : { 'node-name': 'str',
              'iothread': 'StrOrNull',
-             '*force': 'bool' } }
+             '*force': 'bool' },
+  'features': [ 'unstable' ] }
 
 ##
 # @QuorumOpType:
@@ -5322,7 +5486,7 @@
 #
 # Notes: In transaction, if @name is empty, or any snapshot matching @name
 #        exists, the operation will fail. Only some image formats support it,
-#        for example, qcow2, rbd, and sheepdog.
+#        for example, qcow2, and rbd.
 #
 # Since: 1.7
 ##
diff --git a/qapi/block-export.json b/qapi/block-export.json
index e819e70cac0..c1b92ce1c1c 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -120,6 +120,23 @@
 	    '*logical-block-size': 'size',
             '*num-queues': 'uint16'} }
 
+##
+# @FuseExportAllowOther:
+#
+# Possible allow_other modes for FUSE exports.
+#
+# @off: Do not pass allow_other as a mount option.
+#
+# @on: Pass allow_other as a mount option.
+#
+# @auto: Try mounting with allow_other first, and if that fails, retry
+#        without allow_other.
+#
+# Since: 6.1
+##
+{ 'enum': 'FuseExportAllowOther',
+  'data': ['off', 'on', 'auto'] }
+
 ##
 # @BlockExportOptionsFuse:
 #
@@ -132,12 +149,26 @@
 # @growable: Whether writes beyond the EOF should grow the block node
 #            accordingly. (default: false)
 #
+# @allow-other: If this is off, only qemu's user is allowed access to
+#               this export.  That cannot be changed even with chmod or
+#               chown.
+#               Enabling this option will allow other users access to
+#               the export with the FUSE mount option "allow_other".
+#               Note that using allow_other as a non-root user requires
+#               user_allow_other to be enabled in the global fuse.conf
+#               configuration file.
+#               In auto mode (the default), the FUSE export driver will
+#               first attempt to mount the export with allow_other, and
+#               if that fails, try again without.
+#               (since 6.1; default: auto)
+#
 # Since: 6.0
 ##
 { 'struct': 'BlockExportOptionsFuse',
   'data': { 'mountpoint': 'str',
-            '*growable': 'bool' },
-  'if': 'defined(CONFIG_FUSE)' }
+            '*growable': 'bool',
+            '*allow-other': 'FuseExportAllowOther' },
+  'if': 'CONFIG_FUSE' }
 
 ##
 # @NbdServerAddOptions:
@@ -247,7 +278,7 @@
 ##
 { 'enum': 'BlockExportType',
   'data': [ 'nbd', 'vhost-user-blk',
-            { 'name': 'fuse', 'if': 'defined(CONFIG_FUSE)' } ] }
+            { 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] }
 
 ##
 # @BlockExportOptions:
@@ -290,7 +321,7 @@
       'nbd': 'BlockExportOptionsNbd',
       'vhost-user-blk': 'BlockExportOptionsVhostUserBlk',
       'fuse': { 'type': 'BlockExportOptionsFuse',
-                'if': 'defined(CONFIG_FUSE)' }
+                'if': 'CONFIG_FUSE' }
    } }
 
 ##
diff --git a/qapi/char.json b/qapi/char.json
index 6413970fa73..f5133a5eeb3 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -342,7 +342,7 @@
 { 'struct': 'ChardevSpiceChannel',
   'data': { 'type': 'str' },
   'base': 'ChardevCommon',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @ChardevSpicePort:
@@ -356,7 +356,7 @@
 { 'struct': 'ChardevSpicePort',
   'data': { 'fqdn': 'str' },
   'base': 'ChardevCommon',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @ChardevVC:
@@ -390,37 +390,202 @@
   'data': { '*size': 'int' },
   'base': 'ChardevCommon' }
 
+##
+# @ChardevQemuVDAgent:
+#
+# Configuration info for qemu vdagent implementation.
+#
+# @mouse: enable/disable mouse, default is enabled.
+# @clipboard: enable/disable clipboard, default is disabled.
+#
+# Since: 6.1
+#
+##
+{ 'struct': 'ChardevQemuVDAgent',
+  'data': { '*mouse': 'bool',
+            '*clipboard': 'bool' },
+  'base': 'ChardevCommon',
+  'if': 'CONFIG_SPICE_PROTOCOL' }
+
+##
+# @ChardevBackendKind:
+#
+# @pipe: Since 1.5
+# @udp: Since 1.5
+# @mux: Since 1.5
+# @msmouse: Since 1.5
+# @wctablet: Since 2.9
+# @braille: Since 1.5
+# @testdev: Since 2.2
+# @stdio: Since 1.5
+# @console: Since 1.5
+# @spicevmc: Since 1.5
+# @spiceport: Since 1.5
+# @qemu-vdagent: Since 6.1
+# @vc: v1.5
+# @ringbuf: Since 1.6
+# @memory: Since 1.5
+#
+# Since: 1.4
+##
+{ 'enum': 'ChardevBackendKind',
+  'data': [ 'file',
+            'serial',
+            'parallel',
+            'pipe',
+            'socket',
+            'udp',
+            'pty',
+            'null',
+            'mux',
+            'msmouse',
+            'wctablet',
+            'braille',
+            'testdev',
+            'stdio',
+            'console',
+            { 'name': 'spicevmc', 'if': 'CONFIG_SPICE' },
+            { 'name': 'spiceport', 'if': 'CONFIG_SPICE' },
+            { 'name': 'qemu-vdagent', 'if': 'CONFIG_SPICE_PROTOCOL' },
+            'vc',
+            'ringbuf',
+            # next one is just for compatibility
+            'memory' ] }
+
+##
+# @ChardevFileWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevFileWrapper',
+  'data': { 'data': 'ChardevFile' } }
+
+##
+# @ChardevHostdevWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevHostdevWrapper',
+  'data': { 'data': 'ChardevHostdev' } }
+
+##
+# @ChardevSocketWrapper:
+#
+# Since: 1.4
+##
+{ 'struct': 'ChardevSocketWrapper',
+  'data': { 'data': 'ChardevSocket' } }
+
+##
+# @ChardevUdpWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevUdpWrapper',
+  'data': { 'data': 'ChardevUdp' } }
+
+##
+# @ChardevCommonWrapper:
+#
+# Since: 2.6
+##
+{ 'struct': 'ChardevCommonWrapper',
+  'data': { 'data': 'ChardevCommon' } }
+
+##
+# @ChardevMuxWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevMuxWrapper',
+  'data': { 'data': 'ChardevMux' } }
+
+##
+# @ChardevStdioWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevStdioWrapper',
+  'data': { 'data': 'ChardevStdio' } }
+
+##
+# @ChardevSpiceChannelWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpiceChannelWrapper',
+  'data': { 'data': 'ChardevSpiceChannel' },
+  'if': 'CONFIG_SPICE' }
+
+##
+# @ChardevSpicePortWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevSpicePortWrapper',
+  'data': { 'data': 'ChardevSpicePort' },
+  'if': 'CONFIG_SPICE' }
+
+##
+# @ChardevQemuVDAgentWrapper:
+#
+# Since: 6.1
+##
+{ 'struct': 'ChardevQemuVDAgentWrapper',
+  'data': { 'data': 'ChardevQemuVDAgent' },
+  'if': 'CONFIG_SPICE_PROTOCOL' }
+
+##
+# @ChardevVCWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevVCWrapper',
+  'data': { 'data': 'ChardevVC' } }
+
+##
+# @ChardevRingbufWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'ChardevRingbufWrapper',
+  'data': { 'data': 'ChardevRingbuf' } }
+
 ##
 # @ChardevBackend:
 #
 # Configuration info for the new chardev backend.
 #
-# Since: 1.4 (testdev since 2.2, wctablet since 2.9)
+# Since: 1.4
 ##
 { 'union': 'ChardevBackend',
-  'data': { 'file': 'ChardevFile',
-            'serial': 'ChardevHostdev',
-            'parallel': 'ChardevHostdev',
-            'pipe': 'ChardevHostdev',
-            'socket': 'ChardevSocket',
-            'udp': 'ChardevUdp',
-            'pty': 'ChardevCommon',
-            'null': 'ChardevCommon',
-            'mux': 'ChardevMux',
-            'msmouse': 'ChardevCommon',
-            'wctablet': 'ChardevCommon',
-            'braille': 'ChardevCommon',
-            'testdev': 'ChardevCommon',
-            'stdio': 'ChardevStdio',
-            'console': 'ChardevCommon',
-            'spicevmc': { 'type': 'ChardevSpiceChannel',
-                          'if': 'defined(CONFIG_SPICE)' },
-            'spiceport': { 'type': 'ChardevSpicePort',
-                           'if': 'defined(CONFIG_SPICE)' },
-            'vc': 'ChardevVC',
-            'ringbuf': 'ChardevRingbuf',
+  'base': { 'type': 'ChardevBackendKind' },
+  'discriminator': 'type',
+  'data': { 'file': 'ChardevFileWrapper',
+            'serial': 'ChardevHostdevWrapper',
+            'parallel': 'ChardevHostdevWrapper',
+            'pipe': 'ChardevHostdevWrapper',
+            'socket': 'ChardevSocketWrapper',
+            'udp': 'ChardevUdpWrapper',
+            'pty': 'ChardevCommonWrapper',
+            'null': 'ChardevCommonWrapper',
+            'mux': 'ChardevMuxWrapper',
+            'msmouse': 'ChardevCommonWrapper',
+            'wctablet': 'ChardevCommonWrapper',
+            'braille': 'ChardevCommonWrapper',
+            'testdev': 'ChardevCommonWrapper',
+            'stdio': 'ChardevStdioWrapper',
+            'console': 'ChardevCommonWrapper',
+            'spicevmc': { 'type': 'ChardevSpiceChannelWrapper',
+                          'if': 'CONFIG_SPICE' },
+            'spiceport': { 'type': 'ChardevSpicePortWrapper',
+                           'if': 'CONFIG_SPICE' },
+            'qemu-vdagent': { 'type': 'ChardevQemuVDAgentWrapper',
+                              'if': 'CONFIG_SPICE_PROTOCOL' },
+            'vc': 'ChardevVCWrapper',
+            'ringbuf': 'ChardevRingbufWrapper',
             # next one is just for compatibility
-            'memory': 'ChardevRingbuf' } }
+            'memory': 'ChardevRingbufWrapper' } }
 
 ##
 # @ChardevReturn:
diff --git a/qapi/common.json b/qapi/common.json
index 7c976296f02..412cc4f5aed 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -197,3 +197,14 @@
 { 'enum': 'GrabToggleKeys',
   'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
             'ctrl-scrolllock' ] }
+
+##
+# @HumanReadableText:
+#
+# @human-readable-text: Formatted output intended for humans.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'HumanReadableText',
+  'data': { 'human-readable-text': 'str' } }
diff --git a/qapi/compat.json b/qapi/compat.json
index ae3afc22df3..dd7261ae2a9 100644
--- a/qapi/compat.json
+++ b/qapi/compat.json
@@ -42,11 +42,20 @@
 # with feature 'deprecated'.  We may want to extend it to cover
 # semantic aspects, CLI, and experimental features.
 #
+# Limitation: deprecated-output policy @hide is not implemented for
+# enumeration values.  They behave the same as with policy @accept.
+#
 # @deprecated-input: how to handle deprecated input (default 'accept')
 # @deprecated-output: how to handle deprecated output (default 'accept')
+# @unstable-input: how to handle unstable input (default 'accept')
+#                  (since 6.2)
+# @unstable-output: how to handle unstable output (default 'accept')
+#                   (since 6.2)
 #
 # Since: 6.0
 ##
 { 'struct': 'CompatPolicy',
   'data': { '*deprecated-input': 'CompatPolicyInput',
-            '*deprecated-output': 'CompatPolicyOutput' } }
+            '*deprecated-output': 'CompatPolicyOutput',
+            '*unstable-input': 'CompatPolicyInput',
+            '*unstable-output': 'CompatPolicyOutput' } }
diff --git a/qapi/crypto.json b/qapi/crypto.json
index 7116ae9a46a..1ec54c15ca5 100644
--- a/qapi/crypto.json
+++ b/qapi/crypto.json
@@ -66,7 +66,7 @@
 # @aes-128: AES with 128 bit / 16 byte keys
 # @aes-192: AES with 192 bit / 24 byte keys
 # @aes-256: AES with 256 bit / 32 byte keys
-# @des-rfb: RFB specific variant of single DES. Do not use except in VNC.
+# @des: DES with 56 bit / 8 byte keys. Do not use except in VNC. (since 6.1)
 # @3des: 3DES(EDE) with 192 bit / 24 byte keys (since 2.9)
 # @cast5-128: Cast5 with 128 bit / 16 byte keys
 # @serpent-128: Serpent with 128 bit / 16 byte keys
@@ -80,7 +80,7 @@
 { 'enum': 'QCryptoCipherAlgorithm',
   'prefix': 'QCRYPTO_CIPHER_ALG',
   'data': ['aes-128', 'aes-192', 'aes-256',
-           'des-rfb', '3des',
+           'des', '3des',
            'cast5-128',
            'serpent-128', 'serpent-192', 'serpent-256',
            'twofish-128', 'twofish-192', 'twofish-256']}
diff --git a/qapi/introspect.json b/qapi/introspect.json
index 39bd3037783..183148b2e9b 100644
--- a/qapi/introspect.json
+++ b/qapi/introspect.json
@@ -142,14 +142,38 @@
 #
 # Additional SchemaInfo members for meta-type 'enum'.
 #
-# @values: the enumeration type's values, in no particular order.
+# @members: the enum type's members, in no particular order
+#           (since 6.2).
+#
+# @values: the enumeration type's member names, in no particular order.
+#          Redundant with @members.  Just for backward compatibility.
+#
+# Features:
+# @deprecated: Member @values is deprecated.  Use @members instead.
 #
 # Values of this type are JSON string on the wire.
 #
 # Since: 2.5
 ##
 { 'struct': 'SchemaInfoEnum',
-  'data': { 'values': ['str'] } }
+  'data': { 'members': [ 'SchemaInfoEnumMember' ],
+            'values': { 'type': [ 'str' ],
+                        'features': [ 'deprecated' ] } } }
+
+##
+# @SchemaInfoEnumMember:
+#
+# An object member.
+#
+# @name: the member's name, as defined in the QAPI schema.
+#
+# @features: names of features associated with the member, in no
+#            particular order.
+#
+# Since: 6.2
+##
+{ 'struct': 'SchemaInfoEnumMember',
+  'data': { 'name': 'str', '*features': [ 'str' ] } }
 
 ##
 # @SchemaInfoArray:
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index e7811654b72..f5ec4bc172b 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -89,7 +89,7 @@
 ##
 { 'struct': 'CpuModelBaselineInfo',
   'data': { 'model': 'CpuModelInfo' },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @CpuModelCompareInfo:
@@ -112,7 +112,7 @@
 { 'struct': 'CpuModelCompareInfo',
   'data': { 'result': 'CpuModelCompareResult',
             'responsible-properties': ['str'] },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @query-cpu-model-comparison:
@@ -156,7 +156,7 @@
 { 'command': 'query-cpu-model-comparison',
   'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' },
   'returns': 'CpuModelCompareInfo',
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @query-cpu-model-baseline:
@@ -200,7 +200,7 @@
   'data': { 'modela': 'CpuModelInfo',
             'modelb': 'CpuModelInfo' },
   'returns': 'CpuModelBaselineInfo',
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @CpuModelExpansionInfo:
@@ -213,7 +213,9 @@
 ##
 { 'struct': 'CpuModelExpansionInfo',
   'data': { 'model': 'CpuModelInfo' },
-  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+  'if': { 'any': [ 'TARGET_S390X',
+                   'TARGET_I386',
+                   'TARGET_ARM' ] } }
 
 ##
 # @query-cpu-model-expansion:
@@ -252,7 +254,9 @@
   'data': { 'type': 'CpuModelExpansionType',
             'model': 'CpuModelInfo' },
   'returns': 'CpuModelExpansionInfo',
-  'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' }
+  'if': { 'any': [ 'TARGET_S390X',
+                   'TARGET_I386',
+                   'TARGET_ARM' ] } }
 
 ##
 # @CpuDefinitionInfo:
@@ -316,7 +320,11 @@
             'typename': 'str',
             '*alias-of' : 'str',
             'deprecated' : 'bool' },
-  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
+  'if': { 'any': [ 'TARGET_PPC',
+                   'TARGET_ARM',
+                   'TARGET_I386',
+                   'TARGET_S390X',
+                   'TARGET_MIPS' ] } }
 
 ##
 # @query-cpu-definitions:
@@ -328,4 +336,8 @@
 # Since: 1.2
 ##
 { 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'],
-  'if': 'defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_I386) || defined(TARGET_S390X) || defined(TARGET_MIPS)' }
+  'if': { 'any': [ 'TARGET_PPC',
+                   'TARGET_ARM',
+                   'TARGET_I386',
+                   'TARGET_S390X',
+                   'TARGET_MIPS' ] } }
diff --git a/qapi/machine.json b/qapi/machine.json
index 768c8a6cf95..fe54882777a 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -29,12 +29,11 @@
 # Since: 3.0
 ##
 { 'enum' : 'SysEmuTarget',
-  'data' : [ 'aarch64', 'morello', 'alpha', 'arm', 'avr', 'cris', 'hppa', 'i386', 'lm32',
-             'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64',
-             'mips64cheri128',
-             'mips64el', 'mipsel', 'moxie', 'nios2', 'or1k', 'ppc',
+  'data' : [ 'aarch64', 'morello', 'alpha', 'arm', 'avr', 'cris', 'hppa', 'i386',
+             'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64', 'mips64cheri128',
+             'mips64el', 'mipsel', 'nios2', 'or1k', 'ppc',
              'ppc64', 'riscv32', 'riscv64', 'riscv32cheri', 'riscv64cheri', 'rx', 's390x', 'sh4',
-             'sh4eb', 'sparc', 'sparc64', 'tricore', 'unicore32',
+             'sh4eb', 'sparc', 'sparc64', 'tricore',
              'x86_64', 'xtensa', 'xtensaeb' ] }
 
 ##
@@ -791,11 +790,19 @@
 #
 # @size: memory backend size
 #
-# @merge: enables or disables memory merge support
+# @merge: whether memory merge support is enabled
 #
-# @dump: includes memory backend's memory in a core dump or not
+# @dump: whether memory backend's memory is included in a core dump
 #
-# @prealloc: enables or disables memory preallocation
+# @prealloc: whether memory was preallocated
+#
+# @share: whether memory is private to QEMU or shared (since 6.1)
+#
+# @reserve: whether swap space (or huge pages) was reserved if applicable.
+#           This corresponds to the user configuration and not the actual
+#           behavior implemented in the OS to perform the reservation.
+#           For example, Linux will never reserve swap space for shared
+#           file mappings. (since 6.1)
 #
 # @host-nodes: host nodes for its memory policy
 #
@@ -810,6 +817,8 @@
     'merge':      'bool',
     'dump':       'bool',
     'prealloc':   'bool',
+    'share':      'bool',
+    '*reserve':    'bool',
     'host-nodes': ['uint16'],
     'policy':     'HostMemPolicy' }}
 
@@ -1185,24 +1194,115 @@
           }
 }
 
+##
+# @SgxEPCDeviceInfo:
+#
+# Sgx EPC state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @memdev: memory backend linked with device
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCDeviceInfo',
+  'data': { '*id': 'str',
+            'memaddr': 'size',
+            'size': 'size',
+            'memdev': 'str'
+          }
+}
+
+##
+# @MemoryDeviceInfoKind:
+#
+# Since: 2.1
+##
+{ 'enum': 'MemoryDeviceInfoKind',
+  'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] }
+
+##
+# @PCDIMMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'PCDIMMDeviceInfoWrapper',
+  'data': { 'data': 'PCDIMMDeviceInfo' } }
+
+##
+# @VirtioPMEMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'VirtioPMEMDeviceInfoWrapper',
+  'data': { 'data': 'VirtioPMEMDeviceInfo' } }
+
+##
+# @VirtioMEMDeviceInfoWrapper:
+#
+# Since: 2.1
+##
+{ 'struct': 'VirtioMEMDeviceInfoWrapper',
+  'data': { 'data': 'VirtioMEMDeviceInfo' } }
+
+##
+# @SgxEPCDeviceInfoWrapper:
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCDeviceInfoWrapper',
+  'data': { 'data': 'SgxEPCDeviceInfo' } }
+
 ##
 # @MemoryDeviceInfo:
 #
 # Union containing information about a memory device
 #
 # nvdimm is included since 2.12. virtio-pmem is included since 4.1.
-# virtio-mem is included since 5.1.
+# virtio-mem is included since 5.1. sgx-epc is included since 6.2.
 #
 # Since: 2.1
 ##
 { 'union': 'MemoryDeviceInfo',
-  'data': { 'dimm': 'PCDIMMDeviceInfo',
-            'nvdimm': 'PCDIMMDeviceInfo',
-            'virtio-pmem': 'VirtioPMEMDeviceInfo',
-            'virtio-mem': 'VirtioMEMDeviceInfo'
+  'base': { 'type': 'MemoryDeviceInfoKind' },
+  'discriminator': 'type',
+  'data': { 'dimm': 'PCDIMMDeviceInfoWrapper',
+            'nvdimm': 'PCDIMMDeviceInfoWrapper',
+            'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
+            'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
+            'sgx-epc': 'SgxEPCDeviceInfoWrapper'
           }
 }
 
+##
+# @SgxEPC:
+#
+# Sgx EPC cmdline information
+#
+# @memdev: memory backend linked with device
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPC',
+  'data': { 'memdev': 'str' } }
+
+##
+# @SgxEPCProperties:
+#
+# SGX properties of machine types.
+#
+# @sgx-epc: list of ids of memory-backend-epc objects.
+#
+# Since: 6.2
+##
+{ 'struct': 'SgxEPCProperties',
+  'data': { 'sgx-epc': ['SgxEPC'] }
+}
+
 ##
 # @query-memory-devices:
 #
@@ -1236,8 +1336,11 @@
 # action).
 #
 # @id: device's ID
+#
 # @size: the new size of memory that the device provides
 #
+# @qom-path: path to the device object in the QOM tree (since 6.2)
+#
 # Note: this event is rate-limited.
 #
 # Since: 5.1
@@ -1250,7 +1353,7 @@
 #
 ##
 { 'event': 'MEMORY_DEVICE_SIZE_CHANGE',
-  'data': { '*id': 'str', 'size': 'size' } }
+  'data': { '*id': 'str', 'size': 'size', 'qom-path' : 'str'} }
 
 
 ##
@@ -1262,6 +1365,10 @@
 #
 # @msg: Informative message
 #
+# Features:
+# @deprecated: This event is deprecated. Use @DEVICE_UNPLUG_GUEST_ERROR
+#              instead.
+#
 # Since: 2.4
 #
 # Example:
@@ -1274,4 +1381,179 @@
 #
 ##
 { 'event': 'MEM_UNPLUG_ERROR',
-  'data': { 'device': 'str', 'msg': 'str' } }
+  'data': { 'device': 'str', 'msg': 'str' },
+  'features': ['deprecated'] }
+
+##
+# @SMPConfiguration:
+#
+# Schema for CPU topology configuration.  A missing value lets
+# QEMU figure out a suitable value based on the ones that are provided.
+#
+# @cpus: number of virtual CPUs in the virtual machine
+#
+# @sockets: number of sockets in the CPU topology
+#
+# @dies: number of dies per socket in the CPU topology
+#
+# @cores: number of cores per die in the CPU topology
+#
+# @threads: number of threads per core in the CPU topology
+#
+# @maxcpus: maximum number of hotpluggable virtual CPUs in the virtual machine
+#
+# Since: 6.1
+##
+{ 'struct': 'SMPConfiguration', 'data': {
+     '*cpus': 'int',
+     '*sockets': 'int',
+     '*dies': 'int',
+     '*cores': 'int',
+     '*threads': 'int',
+     '*maxcpus': 'int' } }
+
+##
+# @x-query-irq:
+#
+# Query interrupt statistics
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: interrupt statistics
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-irq',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-jit:
+#
+# Query TCG compiler statistics
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: TCG compiler statistics
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-jit',
+  'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-numa:
+#
+# Query NUMA topology information
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: topology information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-numa',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-opcount:
+#
+# Query TCG opcode counters
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: TCG opcode counters
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-opcount',
+  'returns': 'HumanReadableText',
+  'if': 'CONFIG_TCG',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-profile:
+#
+# Query TCG profiling information
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: profile information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-profile',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-ramblock:
+#
+# Query system ramblock information
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: system ramblock information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-ramblock',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-rdma:
+#
+# Query RDMA state
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: RDMA state
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-rdma',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-roms:
+#
+# Query information on the registered ROMS
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: registered ROMs
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-roms',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
+
+##
+# @x-query-usb:
+#
+# Query information on the USB devices
+#
+# Features:
+# @unstable: This command is meant for debugging.
+#
+# Returns: USB device information
+#
+# Since: 6.2
+##
+{ 'command': 'x-query-usb',
+  'returns': 'HumanReadableText',
+  'features': [ 'unstable' ] }
diff --git a/qapi/meson.build b/qapi/meson.build
index 376f4ceafe7..c0c49c15e4e 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -2,6 +2,7 @@ util_ss.add(files(
   'opts-visitor.c',
   'qapi-clone-visitor.c',
   'qapi-dealloc-visitor.c',
+  'qapi-forward-visitor.c',
   'qapi-util.c',
   'qapi-visit-core.c',
   'qobject-input-visitor.c',
@@ -9,6 +10,9 @@ util_ss.add(files(
   'string-input-visitor.c',
   'string-output-visitor.c',
 ))
+if have_system
+  util_ss.add(files('qapi-type-helpers.c'))
+endif
 if have_system or have_tools
   util_ss.add(files(
     'qmp-dispatch.c',
diff --git a/qapi/migration.json b/qapi/migration.json
index 0b17cce46bf..bbfd48cf0b1 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -228,11 +228,6 @@
 #                   Present and non-empty when migration is blocked.
 #                   (since 6.0)
 #
-# @blocked: True if outgoing migration is blocked (since 6.0)
-#
-# Features:
-# @deprecated: Member @blocked is deprecated.  Use @blocked-reasons instead.
-#
 # Since: 0.14
 ##
 { 'struct': 'MigrationInfo',
@@ -246,7 +241,6 @@
            '*setup-time': 'int',
            '*cpu-throttle-percentage': 'int',
            '*error-desc': 'str',
-           'blocked': { 'type': 'bool', 'features': [ 'deprecated' ] },
            '*blocked-reasons': ['str'],
            '*postcopy-blocktime' : 'uint32',
            '*postcopy-vcpu-blocktime': ['uint32'],
@@ -458,14 +452,20 @@
 #                       procedure starts. The VM RAM is saved with running VM.
 #                       (since 6.0)
 #
+# Features:
+# @unstable: Members @x-colo and @x-ignore-shared are experimental.
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
+           'compress', 'events', 'postcopy-ram',
+           { 'name': 'x-colo', 'features': [ 'unstable' ] },
+           'release-ram',
            'block', 'return-path', 'pause-before-switchover', 'multifd',
            'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
-           'x-ignore-shared', 'validate-uuid', 'background-snapshot'] }
+           { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
+           'validate-uuid', 'background-snapshot'] }
 
 ##
 # @MigrationCapabilityStatus:
@@ -539,7 +539,7 @@
 ##
 { 'enum': 'MultiFDCompression',
   'data': [ 'none', 'zlib',
-            { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] }
+            { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
 
 ##
 # @BitmapMigrationBitmapAliasTransform:
@@ -749,6 +749,9 @@
 #                        block device name if there is one, and to their node name
 #                        otherwise. (Since 5.2)
 #
+# Features:
+# @unstable: Member @x-checkpoint-delay is experimental.
+#
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
@@ -759,7 +762,9 @@
            'cpu-throttle-initial', 'cpu-throttle-increment',
            'cpu-throttle-tailslow',
            'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
-           'downtime-limit', 'x-checkpoint-delay', 'block-incremental',
+           'downtime-limit',
+           { 'name': 'x-checkpoint-delay', 'features': [ 'unstable' ] },
+           'block-incremental',
            'multifd-channels',
            'xbzrle-cache-size', 'max-postcopy-bandwidth',
            'max-cpu-throttle', 'multifd-compression',
@@ -909,6 +914,9 @@
 #                        block device name if there is one, and to their node name
 #                        otherwise. (Since 5.2)
 #
+# Features:
+# @unstable: Member @x-checkpoint-delay is experimental.
+#
 # Since: 2.4
 ##
 # TODO either fuse back into MigrationParameters, or make
@@ -931,7 +939,8 @@
             '*tls-authz': 'StrOrNull',
             '*max-bandwidth': 'size',
             '*downtime-limit': 'uint64',
-            '*x-checkpoint-delay': 'uint32',
+            '*x-checkpoint-delay': { 'type': 'uint32',
+                                     'features': [ 'unstable' ] },
             '*block-incremental': 'bool',
             '*multifd-channels': 'uint8',
             '*xbzrle-cache-size': 'size',
@@ -1105,6 +1114,9 @@
 #                        block device name if there is one, and to their node name
 #                        otherwise. (Since 5.2)
 #
+# Features:
+# @unstable: Member @x-checkpoint-delay is experimental.
+#
 # Since: 2.4
 ##
 { 'struct': 'MigrationParameters',
@@ -1125,7 +1137,8 @@
             '*tls-authz': 'str',
             '*max-bandwidth': 'size',
             '*downtime-limit': 'uint64',
-            '*x-checkpoint-delay': 'uint32',
+            '*x-checkpoint-delay': { 'type': 'uint32',
+                                     'features': [ 'unstable' ] },
             '*block-incremental': 'bool',
             '*multifd-channels': 'uint8',
             '*xbzrle-cache-size': 'size',
@@ -1357,6 +1370,9 @@
 # If sent to the Secondary, the Secondary side will run failover work,
 # then takes over server operation to become the service VM.
 #
+# Features:
+# @unstable: This command is experimental.
+#
 # Since: 2.8
 #
 # Example:
@@ -1365,7 +1381,8 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'x-colo-lost-heartbeat' }
+{ 'command': 'x-colo-lost-heartbeat',
+  'features': [ 'unstable' ] }
 
 ##
 # @migrate_cancel:
@@ -1568,7 +1585,7 @@
 ##
 { 'command': 'xen-set-replication',
   'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @ReplicationStatus:
@@ -1584,7 +1601,7 @@
 ##
 { 'struct': 'ReplicationStatus',
   'data': { 'error': 'bool', '*desc': 'str' },
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @query-xen-replication-status:
@@ -1602,7 +1619,7 @@
 ##
 { 'command': 'query-xen-replication-status',
   'returns': 'ReplicationStatus',
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @xen-colo-do-checkpoint:
@@ -1619,7 +1636,7 @@
 # Since: 2.9
 ##
 { 'command': 'xen-colo-do-checkpoint',
-  'if': 'defined(CONFIG_REPLICATION)' }
+  'if': 'CONFIG_REPLICATION' }
 
 ##
 # @COLOStatus:
@@ -1714,6 +1731,21 @@
 { 'event': 'UNPLUG_PRIMARY',
   'data': { 'device-id': 'str' } }
 
+##
+# @DirtyRateVcpu:
+#
+# Dirty rate of vcpu.
+#
+# @id: vcpu index.
+#
+# @dirty-rate: dirty rate.
+#
+# Since: 6.2
+#
+##
+{ 'struct': 'DirtyRateVcpu',
+  'data': { 'id': 'int', 'dirty-rate': 'int64' } }
+
 ##
 # @DirtyRateStatus:
 #
@@ -1731,6 +1763,23 @@
 { 'enum': 'DirtyRateStatus',
   'data': [ 'unstarted', 'measuring', 'measured'] }
 
+##
+# @DirtyRateMeasureMode:
+#
+# An enumeration of mode of measuring dirtyrate.
+#
+# @page-sampling: calculate dirtyrate by sampling pages.
+#
+# @dirty-ring: calculate dirtyrate by dirty ring.
+#
+# @dirty-bitmap: calculate dirtyrate by dirty bitmap.
+#
+# Since: 6.2
+#
+##
+{ 'enum': 'DirtyRateMeasureMode',
+  'data': ['page-sampling', 'dirty-ring', 'dirty-bitmap'] }
+
 ##
 # @DirtyRateInfo:
 #
@@ -1746,6 +1795,15 @@
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
+# @mode: mode containing method of calculate dirtyrate includes
+#        'page-sampling' and 'dirty-ring' (Since 6.2)
+#
+# @vcpu-dirty-rate: dirtyrate for each vcpu if dirty-ring
+#                   mode specified (Since 6.2)
+#
 # Since: 5.2
 #
 ##
@@ -1753,7 +1811,10 @@
   'data': {'*dirty-rate': 'int64',
            'status': 'DirtyRateStatus',
            'start-time': 'int64',
-           'calc-time': 'int64'} }
+           'calc-time': 'int64',
+           'sample-pages': 'uint64',
+           'mode': 'DirtyRateMeasureMode',
+           '*vcpu-dirty-rate': [ 'DirtyRateVcpu' ] } }
 
 ##
 # @calc-dirty-rate:
@@ -1762,13 +1823,22 @@
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
+# @mode: mechanism of calculating dirtyrate includes
+#        'page-sampling' and 'dirty-ring' (Since 6.1)
+#
 # Since: 5.2
 #
 # Example:
-#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
+#   {"command": "calc-dirty-rate", "data": {"calc-time": 1,
+#                                           'sample-pages': 512} }
 #
 ##
-{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64',
+                                         '*sample-pages': 'int',
+                                         '*mode': 'DirtyRateMeasureMode'} }
 
 ##
 # @query-dirty-rate:
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 0c7491cd82a..5aa2b95b7d4 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -23,7 +23,17 @@
 ##
 { 'event': 'RTC_CHANGE',
   'data': { 'offset': 'int' },
-  'if': 'defined(TARGET_ALPHA) || defined(TARGET_ARM) || defined(TARGET_HPPA) || defined(TARGET_I386) || defined(TARGET_MIPS) || defined(TARGET_MIPS64) || defined(TARGET_MOXIE) || defined(TARGET_PPC) || defined(TARGET_PPC64) || defined(TARGET_S390X) || defined(TARGET_SH4) || defined(TARGET_SPARC)' }
+  'if': { 'any': [ 'TARGET_ALPHA',
+                   'TARGET_ARM',
+                   'TARGET_HPPA',
+                   'TARGET_I386',
+                   'TARGET_MIPS',
+                   'TARGET_MIPS64',
+                   'TARGET_PPC',
+                   'TARGET_PPC64',
+                   'TARGET_S390X',
+                   'TARGET_SH4',
+                   'TARGET_SPARC' ] } }
 
 ##
 # @rtc-reset-reinjection:
@@ -42,7 +52,7 @@
 #
 ##
 { 'command': 'rtc-reset-reinjection',
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 
 ##
@@ -69,7 +79,7 @@
 { 'enum': 'SevState',
   'data': ['uninit', 'launch-update', 'launch-secret', 'running',
            'send-update', 'receive-update' ],
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @SevInfo:
@@ -101,7 +111,7 @@
               'state' : 'SevState',
               'handle' : 'uint32'
             },
-  'if': 'defined(TARGET_I386)'
+  'if': 'TARGET_I386'
 }
 
 ##
@@ -122,7 +132,7 @@
 #
 ##
 { 'command': 'query-sev', 'returns': 'SevInfo',
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 
 ##
@@ -136,7 +146,7 @@
 #
 ##
 { 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'},
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @query-sev-launch-measure:
@@ -154,7 +164,7 @@
 #
 ##
 { 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo',
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 
 ##
@@ -179,7 +189,7 @@
             'cert-chain': 'str',
             'cbitpos': 'int',
             'reduced-phys-bits': 'int'},
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @query-sev-capabilities:
@@ -199,7 +209,7 @@
 #
 ##
 { 'command': 'query-sev-capabilities', 'returns': 'SevCapability',
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
 
 ##
 # @sev-inject-launch-secret:
@@ -217,7 +227,47 @@
 ##
 { 'command': 'sev-inject-launch-secret',
   'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' },
-  'if': 'defined(TARGET_I386)' }
+  'if': 'TARGET_I386' }
+
+##
+# @SevAttestationReport:
+#
+# The struct describes attestation report for a Secure Encrypted
+# Virtualization feature.
+#
+# @data:  guest attestation report (base64 encoded)
+#
+#
+# Since: 6.1
+##
+{ 'struct': 'SevAttestationReport',
+  'data': { 'data': 'str'},
+  'if': 'TARGET_I386' }
+
+##
+# @query-sev-attestation-report:
+#
+# This command is used to get the SEV attestation report, and is
+# supported on AMD X86 platforms only.
+#
+# @mnonce: a random 16 bytes value encoded in base64 (it will be
+#          included in report)
+#
+# Returns: SevAttestationReport objects.
+#
+# Since: 6.1
+#
+# Example:
+#
+# -> { "execute" : "query-sev-attestation-report",
+#                  "arguments": { "mnonce": "aaaaaaa" } }
+# <- { "return" : { "data": "aaaaaaaabbbddddd"} }
+#
+##
+{ 'command': 'query-sev-attestation-report',
+  'data': { 'mnonce': 'str' },
+  'returns': 'SevAttestationReport',
+  'if': 'TARGET_I386' }
 
 ##
 # @dump-skeys:
@@ -239,7 +289,7 @@
 ##
 { 'command': 'dump-skeys',
   'data': { 'filename': 'str' },
-  'if': 'defined(TARGET_S390X)' }
+  'if': 'TARGET_S390X' }
 
 ##
 # @GICCapability:
@@ -264,7 +314,7 @@
   'data': { 'version': 'int',
             'emulated': 'bool',
             'kernel': 'bool' },
-  'if': 'defined(TARGET_ARM)' }
+  'if': 'TARGET_ARM' }
 
 ##
 # @query-gic-capabilities:
@@ -284,4 +334,66 @@
 #
 ##
 { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'],
-  'if': 'defined(TARGET_ARM)' }
+  'if': 'TARGET_ARM' }
+
+
+##
+# @SGXInfo:
+#
+# Information about intel Safe Guard eXtension (SGX) support
+#
+# @sgx: true if SGX is supported
+#
+# @sgx1: true if SGX1 is supported
+#
+# @sgx2: true if SGX2 is supported
+#
+# @flc: true if FLC is supported
+#
+# @section-size: The EPC section size for guest
+#
+# Since: 6.2
+##
+{ 'struct': 'SGXInfo',
+  'data': { 'sgx': 'bool',
+            'sgx1': 'bool',
+            'sgx2': 'bool',
+            'flc': 'bool',
+            'section-size': 'uint64'},
+   'if': 'TARGET_I386' }
+
+##
+# @query-sgx:
+#
+# Returns information about SGX
+#
+# Returns: @SGXInfo
+#
+# Since: 6.2
+#
+# Example:
+#
+# -> { "execute": "query-sgx" }
+# <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true,
+#                  "flc": true, "section-size" : 0 } }
+#
+##
+{ 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' }
+
+##
+# @query-sgx-capabilities:
+#
+# Returns information from host SGX capabilities
+#
+# Returns: @SGXInfo
+#
+# Since: 6.2
+#
+# Example:
+#
+# -> { "execute": "query-sgx-capabilities" }
+# <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true,
+#                  "flc": true, "section-size" : 0 } }
+#
+##
+{ 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' }
diff --git a/qapi/misc.json b/qapi/misc.json
index 156f98203ec..358548abe1a 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -86,6 +86,9 @@
 # @poll-shrink: how many ns will be removed from polling time, 0 means that
 #               it's not configured (since 2.9)
 #
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+#                 0 means that the engine will use its default (since 6.1)
+#
 # Since: 2.0
 ##
 { 'struct': 'IOThreadInfo',
@@ -93,7 +96,8 @@
            'thread-id': 'int',
            'poll-max-ns': 'int',
            'poll-grow': 'int',
-           'poll-shrink': 'int' } }
+           'poll-shrink': 'int',
+           'aio-max-batch': 'int' } }
 
 ##
 # @query-iothreads:
@@ -181,6 +185,9 @@
 # available during the preconfig state (i.e. when the --preconfig command
 # line option was in use).
 #
+# Features:
+# @unstable: This command is experimental.
+#
 # Since 3.0
 #
 # Returns: nothing
@@ -191,7 +198,8 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'x-exit-preconfig', 'allow-preconfig': true }
+{ 'command': 'x-exit-preconfig', 'allow-preconfig': true,
+  'features': [ 'unstable' ] }
 
 ##
 # @human-monitor-command:
diff --git a/qapi/net.json b/qapi/net.json
index af3f5b0fdab..7fab2e7cd8a 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -55,7 +55,8 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'netdev_add', 'data': 'Netdev', 'boxed': true }
+{ 'command': 'netdev_add', 'data': 'Netdev', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
 # @netdev_del:
@@ -75,7 +76,8 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'netdev_del', 'data': {'id': 'str'} }
+{ 'command': 'netdev_del', 'data': {'id': 'str'},
+  'allow-preconfig': true }
 
 ##
 # @NetLegacyNicOptions:
diff --git a/qapi/qapi-forward-visitor.c b/qapi/qapi-forward-visitor.c
new file mode 100644
index 00000000000..4ea7e0bec3f
--- /dev/null
+++ b/qapi/qapi-forward-visitor.c
@@ -0,0 +1,328 @@
+/*
+ * Forward Visitor
+ *
+ * Copyright (C) 2021 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
+#include "qapi/error.h"
+#include "qapi/forward-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/queue.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/qmp/qnum.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+
+struct ForwardFieldVisitor {
+    Visitor visitor;
+
+    Visitor *target;
+    char *from;
+    char *to;
+
+    int depth;
+};
+
+static ForwardFieldVisitor *to_ffv(Visitor *v)
+{
+    return container_of(v, ForwardFieldVisitor, visitor);
+}
+
+static bool forward_field_translate_name(ForwardFieldVisitor *v, const char **name,
+                                         Error **errp)
+{
+    if (v->depth) {
+        return true;
+    }
+    if (g_str_equal(*name, v->from)) {
+        *name = v->to;
+        return true;
+    }
+    error_setg(errp, QERR_MISSING_PARAMETER, *name);
+    return false;
+}
+
+static bool forward_field_check_struct(Visitor *v, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    return visit_check_struct(ffv->target, errp);
+}
+
+static bool forward_field_start_struct(Visitor *v, const char *name, void **obj,
+                                       size_t size, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    if (!visit_start_struct(ffv->target, name, obj, size, errp)) {
+        return false;
+    }
+    ffv->depth++;
+    return true;
+}
+
+static void forward_field_end_struct(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    ffv->depth--;
+    visit_end_struct(ffv->target, obj);
+}
+
+static bool forward_field_start_list(Visitor *v, const char *name,
+                                     GenericList **list, size_t size,
+                                     Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    ffv->depth++;
+    return visit_start_list(ffv->target, name, list, size, errp);
+}
+
+static GenericList *forward_field_next_list(Visitor *v, GenericList *tail,
+                                            size_t size)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    return visit_next_list(ffv->target, tail, size);
+}
+
+static bool forward_field_check_list(Visitor *v, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    return visit_check_list(ffv->target, errp);
+}
+
+static void forward_field_end_list(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    assert(ffv->depth);
+    ffv->depth--;
+    visit_end_list(ffv->target, obj);
+}
+
+static bool forward_field_start_alternate(Visitor *v, const char *name,
+                                          GenericAlternate **obj, size_t size,
+                                          Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    /*
+     * The name passed to start_alternate is used also in the visit_type_* calls
+     * that retrieve the alternate's content; so, do not increase depth here.
+     */
+    return visit_start_alternate(ffv->target, name, obj, size, errp);
+}
+
+static void forward_field_end_alternate(Visitor *v, void **obj)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    visit_end_alternate(ffv->target, obj);
+}
+
+static bool forward_field_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                     Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_int64(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_uint64(Visitor *v, const char *name,
+                                      uint64_t *obj, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_uint64(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_bool(Visitor *v, const char *name, bool *obj,
+                                    Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_bool(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_str(Visitor *v, const char *name, char **obj,
+                                   Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_str(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_size(Visitor *v, const char *name, uint64_t *obj,
+                                    Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_size(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_number(Visitor *v, const char *name, double *obj,
+                                      Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_number(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_any(Visitor *v, const char *name, QObject **obj,
+                                   Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_any(ffv->target, name, obj, errp);
+}
+
+static bool forward_field_type_null(Visitor *v, const char *name,
+                                    QNull **obj, Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return false;
+    }
+    return visit_type_null(ffv->target, name, obj, errp);
+}
+
+static void forward_field_optional(Visitor *v, const char *name, bool *present)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, NULL)) {
+        *present = false;
+        return;
+    }
+    visit_optional(ffv->target, name, present);
+}
+
+static bool forward_field_policy_reject(Visitor *v, const char *name,
+                                        unsigned special_features,
+                                        Error **errp)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, errp)) {
+        return true;
+    }
+    return visit_policy_reject(ffv->target, name, special_features, errp);
+}
+
+static bool forward_field_policy_skip(Visitor *v, const char *name,
+                                      unsigned special_features)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    if (!forward_field_translate_name(ffv, &name, NULL)) {
+        return true;
+    }
+    return visit_policy_skip(ffv->target, name, special_features);
+}
+
+static void forward_field_complete(Visitor *v, void *opaque)
+{
+    /*
+     * Do nothing, the complete method will be called in due time
+     * on the target visitor.
+     */
+}
+
+static void forward_field_free(Visitor *v)
+{
+    ForwardFieldVisitor *ffv = to_ffv(v);
+
+    g_free(ffv->from);
+    g_free(ffv->to);
+    g_free(ffv);
+}
+
+Visitor *visitor_forward_field(Visitor *target, const char *from, const char *to)
+{
+    ForwardFieldVisitor *v = g_new0(ForwardFieldVisitor, 1);
+
+    /*
+     * Clone and dealloc visitors don't use a name for the toplevel
+     * visit, so they make no sense here.
+     */
+    assert(target->type == VISITOR_OUTPUT || target->type == VISITOR_INPUT);
+
+    v->visitor.type = target->type;
+    v->visitor.start_struct = forward_field_start_struct;
+    v->visitor.check_struct = forward_field_check_struct;
+    v->visitor.end_struct = forward_field_end_struct;
+    v->visitor.start_list = forward_field_start_list;
+    v->visitor.next_list = forward_field_next_list;
+    v->visitor.check_list = forward_field_check_list;
+    v->visitor.end_list = forward_field_end_list;
+    v->visitor.start_alternate = forward_field_start_alternate;
+    v->visitor.end_alternate = forward_field_end_alternate;
+    v->visitor.type_int64 = forward_field_type_int64;
+    v->visitor.type_uint64 = forward_field_type_uint64;
+    v->visitor.type_size = forward_field_type_size;
+    v->visitor.type_bool = forward_field_type_bool;
+    v->visitor.type_str = forward_field_type_str;
+    v->visitor.type_number = forward_field_type_number;
+    v->visitor.type_any = forward_field_type_any;
+    v->visitor.type_null = forward_field_type_null;
+    v->visitor.optional = forward_field_optional;
+    v->visitor.policy_reject = forward_field_policy_reject;
+    v->visitor.policy_skip = forward_field_policy_skip;
+    v->visitor.complete = forward_field_complete;
+    v->visitor.free = forward_field_free;
+
+    v->target = target;
+    v->from = g_strdup(from);
+    v->to = g_strdup(to);
+
+    return &v->visitor;
+}
diff --git a/qapi/qapi-type-helpers.c b/qapi/qapi-type-helpers.c
new file mode 100644
index 00000000000..f76b34f647e
--- /dev/null
+++ b/qapi/qapi-type-helpers.c
@@ -0,0 +1,23 @@
+/*
+ * QAPI common helper functions
+ *
+ * This file provides helper functions related to types defined
+ * in the QAPI schema.
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/type-helpers.h"
+
+HumanReadableText *human_readable_text_from_str(GString *str)
+{
+    HumanReadableText *ret = g_new0(HumanReadableText, 1);
+
+    ret->human_readable_text = g_steal_pointer(&str->str);
+
+    return ret;
+}
diff --git a/qapi/qapi-util.c b/qapi/qapi-util.c
index 3c24bb3d454..fda70445390 100644
--- a/qapi/qapi-util.c
+++ b/qapi/qapi-util.c
@@ -11,10 +11,53 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qemu/ctype.h"
 #include "qapi/qmp/qerror.h"
 
+CompatPolicy compat_policy;
+
+static bool compat_policy_input_ok1(const char *adjective,
+                                    CompatPolicyInput policy,
+                                    ErrorClass error_class,
+                                    const char *kind, const char *name,
+                                    Error **errp)
+{
+    switch (policy) {
+    case COMPAT_POLICY_INPUT_ACCEPT:
+        return true;
+    case COMPAT_POLICY_INPUT_REJECT:
+        error_set(errp, error_class, "%s %s %s disabled by policy",
+                  adjective, kind, name);
+        return false;
+    case COMPAT_POLICY_INPUT_CRASH:
+    default:
+        abort();
+    }
+}
+
+bool compat_policy_input_ok(unsigned special_features,
+                            const CompatPolicy *policy,
+                            ErrorClass error_class,
+                            const char *kind, const char *name,
+                            Error **errp)
+{
+    if ((special_features & 1u << QAPI_DEPRECATED)
+        && !compat_policy_input_ok1("Deprecated",
+                                    policy->deprecated_input,
+                                    error_class, kind, name, errp)) {
+        return false;
+    }
+    if ((special_features & (1u << QAPI_UNSTABLE))
+        && !compat_policy_input_ok1("Unstable",
+                                    policy->unstable_input,
+                                    error_class, kind, name, errp)) {
+        return false;
+    }
+    return true;
+}
+
 const char *qapi_enum_lookup(const QEnumLookup *lookup, int val)
 {
     assert(val >= 0 && val < lookup->size);
diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
index a641adec51e..6c13510a2bc 100644
--- a/qapi/qapi-visit-core.c
+++ b/qapi/qapi-visit-core.c
@@ -13,12 +13,17 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qapi/visitor-impl.h"
 #include "trace.h"
 
+/* Zero-initialization must result in default policy */
+QEMU_BUILD_BUG_ON(COMPAT_POLICY_INPUT_ACCEPT || COMPAT_POLICY_OUTPUT_ACCEPT);
+
+
 void visit_complete(Visitor *v, void *opaque)
 {
     assert(v->type != VISITOR_OUTPUT || v->complete);
@@ -135,22 +140,29 @@ bool visit_optional(Visitor *v, const char *name, bool *present)
     return *present;
 }
 
-bool visit_deprecated_accept(Visitor *v, const char *name, Error **errp)
+bool visit_policy_reject(Visitor *v, const char *name,
+                         unsigned special_features, Error **errp)
 {
-    trace_visit_deprecated_accept(v, name);
-    if (v->deprecated_accept) {
-        return v->deprecated_accept(v, name, errp);
+    trace_visit_policy_reject(v, name);
+    if (v->policy_reject) {
+        return v->policy_reject(v, name, special_features, errp);
     }
-    return true;
+    return false;
 }
 
-bool visit_deprecated(Visitor *v, const char *name)
+bool visit_policy_skip(Visitor *v, const char *name,
+                       unsigned special_features)
 {
-    trace_visit_deprecated(v, name);
-    if (v->deprecated) {
-        return v->deprecated(v, name);
+    trace_visit_policy_skip(v, name);
+    if (v->policy_skip) {
+        return v->policy_skip(v, name, special_features);
     }
-    return true;
+    return false;
+}
+
+void visit_set_policy(Visitor *v, CompatPolicy *policy)
+{
+    v->compat_policy = *policy;
 }
 
 bool visit_is_input(Visitor *v)
@@ -384,7 +396,7 @@ static bool input_type_enum(Visitor *v, const char *name, int *obj,
                             const QEnumLookup *lookup, Error **errp)
 {
     int64_t value;
-    char *enum_str;
+    g_autofree char *enum_str = NULL;
 
     if (!visit_type_str(v, name, &enum_str, errp)) {
         return false;
@@ -392,12 +404,19 @@ static bool input_type_enum(Visitor *v, const char *name, int *obj,
 
     value = qapi_enum_parse(lookup, enum_str, -1, NULL);
     if (value < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, enum_str);
-        g_free(enum_str);
+        error_setg(errp, "Parameter '%s' does not accept value '%s'",
+                   name ? name : "null", enum_str);
+        return false;
+    }
+
+    if (lookup->special_features
+        && !compat_policy_input_ok(lookup->special_features[value],
+                                   &v->compat_policy,
+                                   ERROR_CLASS_GENERIC_ERROR,
+                                   "value", enum_str, errp)) {
         return false;
     }
 
-    g_free(enum_str);
     *obj = value;
     return true;
 }
diff --git a/qapi/qdev.json b/qapi/qdev.json
index b83178220bf..69656b14df2 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -32,17 +32,23 @@
 ##
 # @device_add:
 #
+# Add a device.
+#
 # @driver: the name of the new device's driver
 #
 # @bus: the device's parent bus (device tree path)
 #
 # @id: the device's ID, must be unique
 #
-# Additional arguments depend on the type.
-#
-# Add a device.
+# Features:
+# @json-cli: If present, the "-device" command line option supports JSON
+#            syntax with a structure identical to the arguments of this
+#            command.
 #
 # Notes:
+#
+# Additional arguments depend on the type.
+#
 # 1. For detailed information about this command, please refer to the
 #    'docs/qdev-device-use.txt' file.
 #
@@ -67,7 +73,8 @@
 ##
 { 'command': 'device_add',
   'data': {'driver': 'str', '*bus': 'str', '*id': 'str'},
-  'gen': false } # so we can get the additional arguments
+  'gen': false, # so we can get the additional arguments
+  'features': ['json-cli'] }
 
 ##
 # @device_del:
@@ -84,7 +91,9 @@
 #        This command merely requests that the guest begin the hot removal
 #        process.  Completion of the device removal process is signaled with a
 #        DEVICE_DELETED event. Guest reset will automatically complete removal
-#        for all devices.
+#        for all devices.  If a guest-side error in the hot removal process is
+#        detected, the device will not be removed and a DEVICE_UNPLUG_GUEST_ERROR
+#        event is sent.  Some errors cannot be detected.
 #
 # Since: 0.14
 #
@@ -108,9 +117,9 @@
 # At this point, it's safe to reuse the specified device ID. Device removal can
 # be initiated by the guest or by HMP/QMP commands.
 #
-# @device: device name
+# @device: the device's ID if it has one
 #
-# @path: device path
+# @path: the device's QOM path
 #
 # Since: 1.5
 #
@@ -124,3 +133,26 @@
 ##
 { 'event': 'DEVICE_DELETED',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @DEVICE_UNPLUG_GUEST_ERROR:
+#
+# Emitted when a device hot unplug fails due to a guest reported error.
+#
+# @device: the device's ID if it has one
+#
+# @path: the device's QOM path
+#
+# Since: 6.2
+#
+# Example:
+#
+# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR"
+#      "data": { "device": "core1",
+#                "path": "/machine/peripheral/core1" },
+#      },
+#      "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
+#
+##
+{ 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
+  'data': { '*device': 'str', 'path': 'str' } }
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 59600210cec..d378bccac73 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -26,13 +26,11 @@
 #include "qemu/coroutine.h"
 #include "qemu/main-loop.h"
 
-CompatPolicy compat_policy;
-
 Visitor *qobject_input_visitor_new_qmp(QObject *obj)
 {
     Visitor *v = qobject_input_visitor_new(obj);
 
-    qobject_input_visitor_set_policy(v, compat_policy.deprecated_input);
+    visit_set_policy(v, &compat_policy);
     return v;
 }
 
@@ -40,7 +38,7 @@ Visitor *qobject_output_visitor_new_qmp(QObject **result)
 {
     Visitor *v = qobject_output_visitor_new(result);
 
-    qobject_output_visitor_set_policy(v, compat_policy.deprecated_output);
+    visit_set_policy(v, &compat_policy);
     return v;
 }
 
@@ -176,19 +174,10 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
                   "The command %s has not been found", command);
         goto out;
     }
-    if (cmd->options & QCO_DEPRECATED) {
-        switch (compat_policy.deprecated_input) {
-        case COMPAT_POLICY_INPUT_ACCEPT:
-            break;
-        case COMPAT_POLICY_INPUT_REJECT:
-            error_set(&err, ERROR_CLASS_COMMAND_NOT_FOUND,
-                      "Deprecated command %s disabled by policy",
-                      command);
-            goto out;
-        case COMPAT_POLICY_INPUT_CRASH:
-        default:
-            abort();
-        }
+    if (!compat_policy_input_ok(cmd->special_features, &compat_policy,
+                                ERROR_CLASS_COMMAND_NOT_FOUND,
+                                "command", command, &err)) {
+        goto out;
     }
     if (!cmd->enabled) {
         error_set(&err, ERROR_CLASS_COMMAND_NOT_FOUND,
diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
index f78c064aae5..485bc5e6fc7 100644
--- a/qapi/qmp-registry.c
+++ b/qapi/qmp-registry.c
@@ -16,7 +16,8 @@
 #include "qapi/qmp/dispatch.h"
 
 void qmp_register_command(QmpCommandList *cmds, const char *name,
-                          QmpCommandFunc *fn, QmpCommandOptions options)
+                          QmpCommandFunc *fn, QmpCommandOptions options,
+                          unsigned special_features)
 {
     QmpCommand *cmd = g_malloc0(sizeof(*cmd));
 
@@ -27,6 +28,7 @@ void qmp_register_command(QmpCommandList *cmds, const char *name,
     cmd->fn = fn;
     cmd->enabled = true;
     cmd->options = options;
+    cmd->special_features = special_features;
     QTAILQ_INSERT_TAIL(cmds, cmd, node);
 }
 
diff --git a/qapi/qobject-input-visitor.c b/qapi/qobject-input-visitor.c
index 04b790412eb..f0b4c7ca9d3 100644
--- a/qapi/qobject-input-visitor.c
+++ b/qapi/qobject-input-visitor.c
@@ -44,7 +44,6 @@ typedef struct StackObject {
 
 struct QObjectInputVisitor {
     Visitor visitor;
-    CompatPolicyInput deprecated_policy;
 
     /* Root of visit at visitor creation. */
     QObject *root;
@@ -664,22 +663,13 @@ static void qobject_input_optional(Visitor *v, const char *name, bool *present)
     *present = true;
 }
 
-static bool qobject_input_deprecated_accept(Visitor *v, const char *name,
-                                            Error **errp)
+static bool qobject_input_policy_reject(Visitor *v, const char *name,
+                                        unsigned special_features,
+                                        Error **errp)
 {
-    QObjectInputVisitor *qiv = to_qiv(v);
-
-    switch (qiv->deprecated_policy) {
-    case COMPAT_POLICY_INPUT_ACCEPT:
-        return true;
-    case COMPAT_POLICY_INPUT_REJECT:
-        error_setg(errp, "Deprecated parameter '%s' disabled by policy",
-                   name);
-        return false;
-    case COMPAT_POLICY_INPUT_CRASH:
-    default:
-        abort();
-    }
+    return !compat_policy_input_ok(special_features, &v->compat_policy,
+                                   ERROR_CLASS_GENERIC_ERROR,
+                                   "parameter", name, errp);
 }
 
 static void qobject_input_free(Visitor *v)
@@ -716,7 +706,7 @@ static QObjectInputVisitor *qobject_input_visitor_base_new(QObject *obj)
     v->visitor.end_list = qobject_input_end_list;
     v->visitor.start_alternate = qobject_input_start_alternate;
     v->visitor.optional = qobject_input_optional;
-    v->visitor.deprecated_accept = qobject_input_deprecated_accept;
+    v->visitor.policy_reject = qobject_input_policy_reject;
     v->visitor.free = qobject_input_free;
 
     v->root = qobject_ref(obj);
@@ -739,14 +729,6 @@ Visitor *qobject_input_visitor_new(QObject *obj)
     return &v->visitor;
 }
 
-void qobject_input_visitor_set_policy(Visitor *v,
-                                       CompatPolicyInput deprecated)
-{
-    QObjectInputVisitor *qiv = to_qiv(v);
-
-    qiv->deprecated_policy = deprecated;
-}
-
 Visitor *qobject_input_visitor_new_keyval(QObject *obj)
 {
     QObjectInputVisitor *v = qobject_input_visitor_base_new(obj);
diff --git a/qapi/qobject-output-visitor.c b/qapi/qobject-output-visitor.c
index e4873308d4e..74770edd73c 100644
--- a/qapi/qobject-output-visitor.c
+++ b/qapi/qobject-output-visitor.c
@@ -32,7 +32,6 @@ typedef struct QStackEntry {
 
 struct QObjectOutputVisitor {
     Visitor visitor;
-    CompatPolicyOutput deprecated_policy;
 
     QSLIST_HEAD(, QStackEntry) stack; /* Stack of unfinished containers */
     QObject *root; /* Root of the output visit */
@@ -210,11 +209,15 @@ static bool qobject_output_type_null(Visitor *v, const char *name,
     return true;
 }
 
-static bool qobject_output_deprecated(Visitor *v, const char *name)
+static bool qobject_output_policy_skip(Visitor *v, const char *name,
+                                       unsigned special_features)
 {
-    QObjectOutputVisitor *qov = to_qov(v);
+    CompatPolicy *pol = &v->compat_policy;
 
-    return qov->deprecated_policy != COMPAT_POLICY_OUTPUT_HIDE;
+    return ((special_features & 1u << QAPI_DEPRECATED)
+            && pol->deprecated_output == COMPAT_POLICY_OUTPUT_HIDE)
+        || ((special_features & 1u << QAPI_UNSTABLE)
+            && pol->unstable_output == COMPAT_POLICY_OUTPUT_HIDE);
 }
 
 /* Finish building, and return the root object.
@@ -266,7 +269,7 @@ Visitor *qobject_output_visitor_new(QObject **result)
     v->visitor.type_number = qobject_output_type_number;
     v->visitor.type_any = qobject_output_type_any;
     v->visitor.type_null = qobject_output_type_null;
-    v->visitor.deprecated = qobject_output_deprecated;
+    v->visitor.policy_skip = qobject_output_policy_skip;
     v->visitor.complete = qobject_output_complete;
     v->visitor.free = qobject_output_free;
 
@@ -275,11 +278,3 @@ Visitor *qobject_output_visitor_new(QObject **result)
 
     return &v->visitor;
 }
-
-void qobject_output_visitor_set_policy(Visitor *v,
-                                       CompatPolicyOutput deprecated)
-{
-    QObjectOutputVisitor *qov = to_qov(v);
-
-    qov->deprecated_policy = deprecated;
-}
diff --git a/qapi/qom.json b/qapi/qom.json
index cd0e76d5645..eeb5395ff3b 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -251,8 +251,8 @@
 #
 # @max_queue_size: the maximum number of packets to keep in the queue for
 #                  comparing with incoming packets from @secondary_in.  If the
-#                  queue is full and addtional packets are received, the
-#                  addtional packets are dropped. (default: 1024)
+#                  queue is full and additional packets are received, the
+#                  additional packets are dropped. (default: 1024)
 #
 # @vnet_hdr_support: if true, vnet header support is enabled (default: false)
 #
@@ -516,12 +516,17 @@
 #               algorithm detects it is spending too long polling without
 #               encountering events. 0 selects a default behaviour (default: 0)
 #
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+#                 0 means that the engine will use its default
+#                 (default:0, since 6.1)
+#
 # Since: 2.0
 ##
 { 'struct': 'IothreadProperties',
   'data': { '*poll-max-ns': 'int',
             '*poll-grow': 'int',
-            '*poll-shrink': 'int' } }
+            '*poll-shrink': 'int',
+            '*aio-max-batch': 'int' } }
 
 ##
 # @MemoryBackendProperties:
@@ -545,16 +550,23 @@
 # @share: if false, the memory is private to QEMU; if true, it is shared
 #         (default: false)
 #
+# @reserve: if true, reserve swap space (or huge pages) if applicable
+#           (default: true) (since 6.1)
+#
 # @size: size of the memory region in bytes
 #
 # @x-use-canonical-path-for-ramblock-id: if true, the canoncial path is used
 #                                        for ramblock-id. Disable this for 4.0
 #                                        machine types or older to allow
 #                                        migration with newer QEMU versions.
-#                                        This option is considered stable
-#                                        despite the x- prefix. (default:
-#                                        false generally, but true for machine
-#                                        types <= 4.0)
+#                                        (default: false generally,
+#                                        but true for machine types <= 4.0)
+#
+# Note: prealloc=true and reserve=false cannot be set at the same time. With
+#       reserve=true, the behavior depends on the operating system: for example,
+#       Linux will not reserve swap space for shared file mappings --
+#       "not applicable". In contrast, reserve=false will bail out if it cannot
+#       be configured accordingly.
 #
 # Since: 2.1
 ##
@@ -566,6 +578,7 @@
             '*prealloc': 'bool',
             '*prealloc-threads': 'uint32',
             '*share': 'bool',
+            '*reserve': 'bool',
             'size': 'size',
             '*x-use-canonical-path-for-ramblock-id': 'bool' } }
 
@@ -603,7 +616,7 @@
   'data': { '*align': 'size',
             '*discard-data': 'bool',
             'mem-path': 'str',
-            '*pmem': { 'type': 'bool', 'if': 'defined(CONFIG_LIBPMEM)' },
+            '*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
             '*readonly': 'bool' } }
 
 ##
@@ -632,6 +645,23 @@
             '*hugetlbsize': 'size',
             '*seal': 'bool' } }
 
+##
+# @MemoryBackendEpcProperties:
+#
+# Properties for memory-backend-epc objects.
+#
+# The @share boolean option is true by default with epc
+#
+# The @merge boolean option is false by default with epc
+#
+# The @dump boolean option is false by default with epc
+#
+# Since: 6.2
+##
+{ 'struct': 'MemoryBackendEpcProperties',
+  'base': 'MemoryBackendProperties',
+  'data': {} }
+
 ##
 # @PrManagerHelperProperties:
 #
@@ -644,6 +674,21 @@
 { 'struct': 'PrManagerHelperProperties',
   'data': { 'path': 'str' } }
 
+##
+# @QtestProperties:
+#
+# Properties for qtest objects.
+#
+# @chardev: the chardev to be used to receive qtest commands on.
+#
+# @log: the path to a log file
+#
+# Since: 6.0
+##
+{ 'struct': 'QtestProperties',
+        'data': { 'chardev': 'str',
+                  '*log': 'str' } }
+
 ##
 # @RemoteObjectProperties:
 #
@@ -724,6 +769,10 @@
 # @reduced-phys-bits: number of bits in physical addresses that become
 #                     unavailable when SEV is enabled
 #
+# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a
+#                 designated guest firmware page for measured boot
+#                 with -kernel (default: false) (since 6.2)
+#
 # Since: 2.12
 ##
 { 'struct': 'SevGuestProperties',
@@ -733,11 +782,15 @@
             '*policy': 'uint32',
             '*handle': 'uint32',
             '*cbitpos': 'uint32',
-            'reduced-phys-bits': 'uint32' } }
+            'reduced-phys-bits': 'uint32',
+            '*kernel-hashes': 'bool' } }
 
 ##
 # @ObjectType:
 #
+# Features:
+# @unstable: Member @x-remote-object is experimental.
+#
 # Since: 6.0
 ##
 { 'enum': 'ObjectType',
@@ -747,12 +800,13 @@
     'authz-pam',
     'authz-simple',
     'can-bus',
-    'can-host-socketcan',
+    { 'name': 'can-host-socketcan',
+      'if': 'CONFIG_LINUX' },
     'colo-compare',
     'cryptodev-backend',
     'cryptodev-backend-builtin',
     { 'name': 'cryptodev-vhost-user',
-      'if': 'defined(CONFIG_VHOST_CRYPTO)' },
+      'if': 'CONFIG_VHOST_CRYPTO' },
     'dbus-vmstate',
     'filter-buffer',
     'filter-dump',
@@ -761,19 +815,26 @@
     'filter-replay',
     'filter-rewriter',
     'input-barrier',
-    'input-linux',
+    { 'name': 'input-linux',
+      'if': 'CONFIG_LINUX' },
     'iothread',
+    { 'name': 'memory-backend-epc',
+      'if': 'CONFIG_LINUX' },
     'memory-backend-file',
     { 'name': 'memory-backend-memfd',
-      'if': 'defined(CONFIG_LINUX)' },
+      'if': 'CONFIG_LINUX' },
     'memory-backend-ram',
     'pef-guest',
-    'pr-manager-helper',
+    { 'name': 'pr-manager-helper',
+      'if': 'CONFIG_LINUX' },
+    'qtest',
     'rng-builtin',
     'rng-egd',
-    'rng-random',
+    { 'name': 'rng-random',
+      'if': 'CONFIG_POSIX' },
     'secret',
-    'secret_keyring',
+    { 'name': 'secret_keyring',
+      'if': 'CONFIG_SECRET_KEYRING' },
     'sev-guest',
     's390-pv-guest',
     'throttle-group',
@@ -781,7 +842,7 @@
     'tls-creds-psk',
     'tls-creds-x509',
     'tls-cipher-suites',
-    'x-remote-object'
+    { 'name': 'x-remote-object', 'features': [ 'unstable' ] }
   ] }
 
 ##
@@ -804,12 +865,13 @@
       'authz-listfile':             'AuthZListFileProperties',
       'authz-pam':                  'AuthZPAMProperties',
       'authz-simple':               'AuthZSimpleProperties',
-      'can-host-socketcan':         'CanHostSocketcanProperties',
+      'can-host-socketcan':         { 'type': 'CanHostSocketcanProperties',
+                                      'if': 'CONFIG_LINUX' },
       'colo-compare':               'ColoCompareProperties',
       'cryptodev-backend':          'CryptodevBackendProperties',
       'cryptodev-backend-builtin':  'CryptodevBackendProperties',
       'cryptodev-vhost-user':       { 'type': 'CryptodevVhostUserProperties',
-                                      'if': 'defined(CONFIG_VHOST_CRYPTO)' },
+                                      'if': 'CONFIG_VHOST_CRYPTO' },
       'dbus-vmstate':               'DBusVMStateProperties',
       'filter-buffer':              'FilterBufferProperties',
       'filter-dump':                'FilterDumpProperties',
@@ -818,18 +880,25 @@
       'filter-replay':              'NetfilterProperties',
       'filter-rewriter':            'FilterRewriterProperties',
       'input-barrier':              'InputBarrierProperties',
-      'input-linux':                'InputLinuxProperties',
+      'input-linux':                { 'type': 'InputLinuxProperties',
+                                      'if': 'CONFIG_LINUX' },
       'iothread':                   'IothreadProperties',
+      'memory-backend-epc':         { 'type': 'MemoryBackendEpcProperties',
+                                      'if': 'CONFIG_LINUX' },
       'memory-backend-file':        'MemoryBackendFileProperties',
       'memory-backend-memfd':       { 'type': 'MemoryBackendMemfdProperties',
-                                      'if': 'defined(CONFIG_LINUX)' },
+                                      'if': 'CONFIG_LINUX' },
       'memory-backend-ram':         'MemoryBackendProperties',
-      'pr-manager-helper':          'PrManagerHelperProperties',
+      'pr-manager-helper':          { 'type': 'PrManagerHelperProperties',
+                                      'if': 'CONFIG_LINUX' },
+      'qtest':                      'QtestProperties',
       'rng-builtin':                'RngProperties',
       'rng-egd':                    'RngEgdProperties',
-      'rng-random':                 'RngRandomProperties',
+      'rng-random':                 { 'type': 'RngRandomProperties',
+                                      'if': 'CONFIG_POSIX' },
       'secret':                     'SecretProperties',
-      'secret_keyring':             'SecretKeyringProperties',
+      'secret_keyring':             { 'type': 'SecretKeyringProperties',
+                                      'if': 'CONFIG_SECRET_KEYRING' },
       'sev-guest':                  'SevGuestProperties',
       'throttle-group':             'ThrottleGroupProperties',
       'tls-creds-anon':             'TlsCredsAnonProperties',
@@ -857,7 +926,8 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true }
+{ 'command': 'object-add', 'data': 'ObjectOptions', 'boxed': true,
+  'allow-preconfig': true }
 
 ##
 # @object-del:
@@ -877,4 +947,5 @@
 # <- { "return": {} }
 #
 ##
-{ 'command': 'object-del', 'data': {'id': 'str'} }
+{ 'command': 'object-del', 'data': {'id': 'str'},
+  'allow-preconfig': true }
diff --git a/qapi/sockets.json b/qapi/sockets.json
index 2e83452797f..5773d9fcc42 100644
--- a/qapi/sockets.json
+++ b/qapi/sockets.json
@@ -57,6 +57,8 @@
 # @keep-alive: enable keep-alive when connecting to this socket. Not supported
 #              for passive sockets. (Since 4.2)
 #
+# @mptcp: enable multi-path TCP. (Since 6.1)
+#
 # Since: 1.3
 ##
 { 'struct': 'InetSocketAddress',
@@ -66,7 +68,8 @@
     '*to': 'uint16',
     '*ipv4': 'bool',
     '*ipv6': 'bool',
-    '*keep-alive': 'bool' } }
+    '*keep-alive': 'bool',
+    '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } }
 
 ##
 # @UnixSocketAddress:
@@ -86,8 +89,8 @@
 { 'struct': 'UnixSocketAddress',
   'data': {
     'path': 'str',
-    '*abstract': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' },
-    '*tight': { 'type': 'bool', 'if': 'defined(CONFIG_LINUX)' } } }
+    '*abstract': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+    '*tight': { 'type': 'bool', 'if': 'CONFIG_LINUX' } } }
 
 ##
 # @VsockSocketAddress:
@@ -107,6 +110,38 @@
     'cid': 'str',
     'port': 'str' } }
 
+##
+# @InetSocketAddressWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'InetSocketAddressWrapper',
+  'data': { 'data': 'InetSocketAddress' } }
+
+##
+# @UnixSocketAddressWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'UnixSocketAddressWrapper',
+  'data': { 'data': 'UnixSocketAddress' } }
+
+##
+# @VsockSocketAddressWrapper:
+#
+# Since: 2.8
+##
+{ 'struct': 'VsockSocketAddressWrapper',
+  'data': { 'data': 'VsockSocketAddress' } }
+
+##
+# @StringWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'StringWrapper',
+  'data': { 'data': 'String' } }
+
 ##
 # @SocketAddressLegacy:
 #
@@ -114,18 +149,18 @@
 #
 # Note: This type is deprecated in favor of SocketAddress.  The
 #       difference between SocketAddressLegacy and SocketAddress is that the
-#       latter is a flat union rather than a simple union. Flat is nicer
-#       because it avoids nesting on the wire, i.e. that form has fewer {}.
-
+#       latter is has fewer {} on the wire.
 #
 # Since: 1.3
 ##
 { 'union': 'SocketAddressLegacy',
+  'base': { 'type': 'SocketAddressType' },
+  'discriminator': 'type',
   'data': {
-    'inet': 'InetSocketAddress',
-    'unix': 'UnixSocketAddress',
-    'vsock': 'VsockSocketAddress',
-    'fd': 'String' } }
+    'inet': 'InetSocketAddressWrapper',
+    'unix': 'UnixSocketAddressWrapper',
+    'vsock': 'VsockSocketAddressWrapper',
+    'fd': 'StringWrapper' } }
 
 ##
 # @SocketAddressType:
diff --git a/qapi/tpm.json b/qapi/tpm.json
index 6a10c9ed8d2..4e2ea9756a6 100644
--- a/qapi/tpm.json
+++ b/qapi/tpm.json
@@ -17,7 +17,9 @@
 #
 # Since: 1.5
 ##
-{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] }
+{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ],
+  'if': 'CONFIG_TPM' }
+
 ##
 # @query-tpm-models:
 #
@@ -33,7 +35,8 @@
 # <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
 #
 ##
-{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
+{ 'command': 'query-tpm-models', 'returns': ['TpmModel'],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TpmType:
@@ -46,7 +49,8 @@
 #
 # Since: 1.5
 ##
-{ 'enum': 'TpmType', 'data': [ 'passthrough', 'emulator' ] }
+{ 'enum': 'TpmType', 'data': [ 'passthrough', 'emulator' ],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @query-tpm-types:
@@ -63,7 +67,8 @@
 # <- { "return": [ "passthrough", "emulator" ] }
 #
 ##
-{ 'command': 'query-tpm-types', 'returns': ['TpmType'] }
+{ 'command': 'query-tpm-types', 'returns': ['TpmType'],
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMPassthroughOptions:
@@ -79,7 +84,8 @@
 ##
 { 'struct': 'TPMPassthroughOptions',
   'data': { '*path': 'str',
-            '*cancel-path': 'str' } }
+            '*cancel-path': 'str' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMEmulatorOptions:
@@ -90,7 +96,26 @@
 #
 # Since: 2.11
 ##
-{ 'struct': 'TPMEmulatorOptions', 'data': { 'chardev' : 'str' } }
+{ 'struct': 'TPMEmulatorOptions', 'data': { 'chardev' : 'str' },
+  'if': 'CONFIG_TPM' }
+
+##
+# @TPMPassthroughOptionsWrapper:
+#
+# Since: 1.5
+##
+{ 'struct': 'TPMPassthroughOptionsWrapper',
+  'data': { 'data': 'TPMPassthroughOptions' },
+  'if': 'CONFIG_TPM' }
+
+##
+# @TPMEmulatorOptionsWrapper:
+#
+# Since: 2.11
+##
+{ 'struct': 'TPMEmulatorOptionsWrapper',
+  'data': { 'data': 'TPMEmulatorOptions' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TpmTypeOptions:
@@ -103,8 +128,11 @@
 # Since: 1.5
 ##
 { 'union': 'TpmTypeOptions',
-   'data': { 'passthrough' : 'TPMPassthroughOptions',
-             'emulator': 'TPMEmulatorOptions' } }
+  'base': { 'type': 'TpmType' },
+  'discriminator': 'type',
+  'data': { 'passthrough' : 'TPMPassthroughOptionsWrapper',
+            'emulator': 'TPMEmulatorOptionsWrapper' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @TPMInfo:
@@ -122,7 +150,8 @@
 { 'struct': 'TPMInfo',
   'data': {'id': 'str',
            'model': 'TpmModel',
-           'options': 'TpmTypeOptions' } }
+           'options': 'TpmTypeOptions' },
+  'if': 'CONFIG_TPM' }
 
 ##
 # @query-tpm:
@@ -152,4 +181,5 @@
 #    }
 #
 ##
-{ 'command': 'query-tpm', 'returns': ['TPMInfo'] }
+{ 'command': 'query-tpm', 'returns': ['TPMInfo'],
+  'if': 'CONFIG_TPM' }
diff --git a/qapi/trace-events b/qapi/trace-events
index 3cabe912ae2..ab108c4f0e3 100644
--- a/qapi/trace-events
+++ b/qapi/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # qapi-visit-core.c
 visit_free(void *v) "v=%p"
@@ -17,8 +17,8 @@ visit_start_alternate(void *v, const char *name, void *obj, size_t size) "v=%p n
 visit_end_alternate(void *v, void *obj) "v=%p obj=%p"
 
 visit_optional(void *v, const char *name, bool *present) "v=%p name=%s present=%p"
-visit_deprecated_accept(void *v, const char *name) "v=%p name=%s"
-visit_deprecated(void *v, const char *name) "v=%p name=%s"
+visit_policy_reject(void *v, const char *name) "v=%p name=%s"
+visit_policy_skip(void *v, const char *name) "v=%p name=%s"
 
 visit_type_enum(void *v, const char *name, int *obj) "v=%p name=%s obj=%p"
 visit_type_int(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
diff --git a/qapi/trace.json b/qapi/trace.json
index 47c68f04da7..eedfded5120 100644
--- a/qapi/trace.json
+++ b/qapi/trace.json
@@ -99,7 +99,7 @@
 # Example:
 #
 # -> { "execute": "trace-event-set-state",
-#      "arguments": { "name": "qemu_memalign", "enable": "true" } }
+#      "arguments": { "name": "qemu_memalign", "enable": true } }
 # <- { "return": {} }
 #
 ##
diff --git a/qapi/transaction.json b/qapi/transaction.json
index 15ddebdbc36..381a2df7828 100644
--- a/qapi/transaction.json
+++ b/qapi/transaction.json
@@ -38,41 +38,132 @@
 { 'enum': 'ActionCompletionMode',
   'data': [ 'individual', 'grouped' ] }
 
+##
+# @TransactionActionKind:
+#
+# @abort: Since 1.6
+# @block-dirty-bitmap-add: Since 2.5
+# @block-dirty-bitmap-remove: Since 4.2
+# @block-dirty-bitmap-clear: Since 2.5
+# @block-dirty-bitmap-enable: Since 4.0
+# @block-dirty-bitmap-disable: Since 4.0
+# @block-dirty-bitmap-merge: Since 4.0
+# @blockdev-backup: Since 2.3
+# @blockdev-snapshot: Since 2.5
+# @blockdev-snapshot-internal-sync: Since 1.7
+# @blockdev-snapshot-sync: since 1.1
+# @drive-backup: Since 1.6
+#
+# Features:
+# @deprecated: Member @drive-backup is deprecated.  Use member
+#              @blockdev-backup instead.
+#
+# Since: 1.1
+##
+{ 'enum': 'TransactionActionKind',
+  'data': [ 'abort', 'block-dirty-bitmap-add', 'block-dirty-bitmap-remove',
+            'block-dirty-bitmap-clear', 'block-dirty-bitmap-enable',
+            'block-dirty-bitmap-disable', 'block-dirty-bitmap-merge',
+            'blockdev-backup', 'blockdev-snapshot',
+            'blockdev-snapshot-internal-sync', 'blockdev-snapshot-sync',
+            { 'name': 'drive-backup', 'features': [ 'deprecated' ] } ] }
+
+##
+# @AbortWrapper:
+#
+# Since: 1.6
+##
+{ 'struct': 'AbortWrapper',
+  'data': { 'data': 'Abort' } }
+
+##
+# @BlockDirtyBitmapAddWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockDirtyBitmapAddWrapper',
+  'data': { 'data': 'BlockDirtyBitmapAdd' } }
+
+##
+# @BlockDirtyBitmapWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockDirtyBitmapWrapper',
+  'data': { 'data': 'BlockDirtyBitmap' } }
+
+##
+# @BlockDirtyBitmapMergeWrapper:
+#
+# Since: 4.0
+##
+{ 'struct': 'BlockDirtyBitmapMergeWrapper',
+  'data': { 'data': 'BlockDirtyBitmapMerge' } }
+
+##
+# @BlockdevBackupWrapper:
+#
+# Since: 2.3
+##
+{ 'struct': 'BlockdevBackupWrapper',
+  'data': { 'data': 'BlockdevBackup' } }
+
+##
+# @BlockdevSnapshotWrapper:
+#
+# Since: 2.5
+##
+{ 'struct': 'BlockdevSnapshotWrapper',
+  'data': { 'data': 'BlockdevSnapshot' } }
+
+##
+# @BlockdevSnapshotInternalWrapper:
+#
+# Since: 1.7
+##
+{ 'struct': 'BlockdevSnapshotInternalWrapper',
+  'data': { 'data': 'BlockdevSnapshotInternal' } }
+
+##
+# @BlockdevSnapshotSyncWrapper:
+#
+# Since: 1.1
+##
+{ 'struct': 'BlockdevSnapshotSyncWrapper',
+  'data': { 'data': 'BlockdevSnapshotSync' } }
+
+##
+# @DriveBackupWrapper:
+#
+# Since: 1.6
+##
+{ 'struct': 'DriveBackupWrapper',
+  'data': { 'data': 'DriveBackup' } }
+
 ##
 # @TransactionAction:
 #
 # A discriminated record of operations that can be performed with
-# @transaction. Action @type can be:
-#
-# - @abort: since 1.6
-# - @block-dirty-bitmap-add: since 2.5
-# - @block-dirty-bitmap-remove: since 4.2
-# - @block-dirty-bitmap-clear: since 2.5
-# - @block-dirty-bitmap-enable: since 4.0
-# - @block-dirty-bitmap-disable: since 4.0
-# - @block-dirty-bitmap-merge: since 4.0
-# - @blockdev-backup: since 2.3
-# - @blockdev-snapshot: since 2.5
-# - @blockdev-snapshot-internal-sync: since 1.7
-# - @blockdev-snapshot-sync: since 1.1
-# - @drive-backup: since 1.6
+# @transaction.
 #
 # Since: 1.1
 ##
 { 'union': 'TransactionAction',
+  'base': { 'type': 'TransactionActionKind' },
+  'discriminator': 'type',
   'data': {
-       'abort': 'Abort',
-       'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd',
-       'block-dirty-bitmap-remove': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-clear': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-enable': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-disable': 'BlockDirtyBitmap',
-       'block-dirty-bitmap-merge': 'BlockDirtyBitmapMerge',
-       'blockdev-backup': 'BlockdevBackup',
-       'blockdev-snapshot': 'BlockdevSnapshot',
-       'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal',
-       'blockdev-snapshot-sync': 'BlockdevSnapshotSync',
-       'drive-backup': 'DriveBackup'
+       'abort': 'AbortWrapper',
+       'block-dirty-bitmap-add': 'BlockDirtyBitmapAddWrapper',
+       'block-dirty-bitmap-remove': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-clear': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-enable': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-disable': 'BlockDirtyBitmapWrapper',
+       'block-dirty-bitmap-merge': 'BlockDirtyBitmapMergeWrapper',
+       'blockdev-backup': 'BlockdevBackupWrapper',
+       'blockdev-snapshot': 'BlockdevSnapshotWrapper',
+       'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternalWrapper',
+       'blockdev-snapshot-sync': 'BlockdevSnapshotSyncWrapper',
+       'drive-backup': 'DriveBackupWrapper'
    } }
 
 ##
@@ -112,10 +203,10 @@
 #
 # On failure, the original disks pre-snapshot attempt will be used.
 #
-# For internal snapshots, the dictionary contains the device and the snapshot's
-# name.  If an internal snapshot matching name already exists, the request will
-# be rejected.  Only some image formats support it, for example, qcow2, rbd,
-# and sheepdog.
+# For internal snapshots, the dictionary contains the device and the
+# snapshot's name.  If an internal snapshot matching name already exists,
+# the request will be rejected.  Only some image formats support it, for
+# example, qcow2, and rbd,
 #
 # On failure, qemu will try delete the newly created internal snapshot in the
 # transaction.  When an I/O error occurs during deletion, the user needs to fix
diff --git a/qapi/ui.json b/qapi/ui.json
index 1052ca9c384..d7567ac8668 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -123,7 +123,7 @@
   'data': { 'host': 'str',
             'port': 'str',
             'family': 'NetworkAddressFamily' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceServerInfo:
@@ -137,7 +137,7 @@
 { 'struct': 'SpiceServerInfo',
   'base': 'SpiceBasicInfo',
   'data': { '*auth': 'str' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceChannel:
@@ -163,7 +163,7 @@
   'base': 'SpiceBasicInfo',
   'data': {'connection-id': 'int', 'channel-type': 'int', 'channel-id': 'int',
            'tls': 'bool'},
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceQueryMouseMode:
@@ -183,7 +183,7 @@
 ##
 { 'enum': 'SpiceQueryMouseMode',
   'data': [ 'client', 'server', 'unknown' ],
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SpiceInfo:
@@ -222,7 +222,7 @@
   'data': {'enabled': 'bool', 'migrated': 'bool', '*host': 'str', '*port': 'int',
            '*tls-port': 'int', '*auth': 'str', '*compiled-version': 'str',
            'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']},
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @query-spice:
@@ -268,7 +268,7 @@
 #
 ##
 { 'command': 'query-spice', 'returns': 'SpiceInfo',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_CONNECTED:
@@ -294,7 +294,7 @@
 { 'event': 'SPICE_CONNECTED',
   'data': { 'server': 'SpiceBasicInfo',
             'client': 'SpiceBasicInfo' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_INITIALIZED:
@@ -323,7 +323,7 @@
 { 'event': 'SPICE_INITIALIZED',
   'data': { 'server': 'SpiceServerInfo',
             'client': 'SpiceChannel' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_DISCONNECTED:
@@ -349,7 +349,7 @@
 { 'event': 'SPICE_DISCONNECTED',
   'data': { 'server': 'SpiceBasicInfo',
             'client': 'SpiceBasicInfo' },
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # @SPICE_MIGRATE_COMPLETED:
@@ -365,7 +365,7 @@
 #
 ##
 { 'event': 'SPICE_MIGRATE_COMPLETED',
-  'if': 'defined(CONFIG_SPICE)' }
+  'if': 'CONFIG_SPICE' }
 
 ##
 # == VNC
@@ -393,7 +393,7 @@
             'service': 'str',
             'family': 'NetworkAddressFamily',
             'websocket': 'bool' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncServerInfo:
@@ -408,7 +408,7 @@
 { 'struct': 'VncServerInfo',
   'base': 'VncBasicInfo',
   'data': { '*auth': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncClientInfo:
@@ -426,7 +426,7 @@
 { 'struct': 'VncClientInfo',
   'base': 'VncBasicInfo',
   'data': { '*x509_dname': 'str', '*sasl_username': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncInfo:
@@ -469,7 +469,7 @@
   'data': {'enabled': 'bool', '*host': 'str',
            '*family': 'NetworkAddressFamily',
            '*service': 'str', '*auth': 'str', '*clients': ['VncClientInfo']},
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncPrimaryAuth:
@@ -481,7 +481,7 @@
 { 'enum': 'VncPrimaryAuth',
   'data': [ 'none', 'vnc', 'ra2', 'ra2ne', 'tight', 'ultra',
             'tls', 'vencrypt', 'sasl' ],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncVencryptSubAuth:
@@ -496,7 +496,7 @@
             'tls-vnc',   'x509-vnc',
             'tls-plain', 'x509-plain',
             'tls-sasl',  'x509-sasl' ],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncServerInfo2:
@@ -514,7 +514,7 @@
   'base': 'VncBasicInfo',
   'data': { 'auth'      : 'VncPrimaryAuth',
             '*vencrypt' : 'VncVencryptSubAuth' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VncInfo2:
@@ -547,7 +547,7 @@
             'auth'      : 'VncPrimaryAuth',
             '*vencrypt' : 'VncVencryptSubAuth',
             '*display'  : 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @query-vnc:
@@ -579,7 +579,7 @@
 #
 ##
 { 'command': 'query-vnc', 'returns': 'VncInfo',
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 ##
 # @query-vnc-servers:
 #
@@ -590,7 +590,7 @@
 # Since: 2.3
 ##
 { 'command': 'query-vnc-servers', 'returns': ['VncInfo2'],
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @change-vnc-password:
@@ -606,7 +606,7 @@
 ##
 { 'command': 'change-vnc-password',
   'data': { 'password': 'str' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_CONNECTED:
@@ -636,7 +636,7 @@
 { 'event': 'VNC_CONNECTED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncBasicInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_INITIALIZED:
@@ -664,7 +664,7 @@
 { 'event': 'VNC_INITIALIZED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncClientInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # @VNC_DISCONNECTED:
@@ -691,7 +691,7 @@
 { 'event': 'VNC_DISCONNECTED',
   'data': { 'server': 'VncServerInfo',
             'client': 'VncClientInfo' },
-  'if': 'defined(CONFIG_VNC)' }
+  'if': 'CONFIG_VNC' }
 
 ##
 # = Input
@@ -786,6 +786,9 @@
 # @muhenkan: since 2.12
 # @katakanahiragana: since 2.12
 #
+# @lang1: since 6.1
+# @lang2: since 6.1
+#
 # 'sysrq' was mistakenly added to hack around the fact that
 # the ps2 driver was not generating correct scancodes sequences
 # when 'alt+print' was pressed. This flaw is now fixed and the
@@ -818,7 +821,32 @@
             'audionext', 'audioprev', 'audiostop', 'audioplay', 'audiomute',
             'volumeup', 'volumedown', 'mediaselect',
             'mail', 'calculator', 'computer',
-            'ac_home', 'ac_back', 'ac_forward', 'ac_refresh', 'ac_bookmarks' ] }
+            'ac_home', 'ac_back', 'ac_forward', 'ac_refresh', 'ac_bookmarks',
+            'lang1', 'lang2' ] }
+
+##
+# @KeyValueKind:
+#
+# Since: 1.3
+##
+{ 'enum': 'KeyValueKind',
+  'data': [ 'number', 'qcode' ] }
+
+##
+# @IntWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'IntWrapper',
+  'data': { 'data': 'int' } }
+
+##
+# @QKeyCodeWrapper:
+#
+# Since: 1.3
+##
+{ 'struct': 'QKeyCodeWrapper',
+  'data': { 'data': 'QKeyCode' } }
 
 ##
 # @KeyValue:
@@ -828,9 +856,11 @@
 # Since: 1.3
 ##
 { 'union': 'KeyValue',
+  'base': { 'type': 'KeyValueKind' },
+  'discriminator': 'type',
   'data': {
-    'number': 'int',
-    'qcode': 'QKeyCode' } }
+    'number': 'IntWrapper',
+    'qcode': 'QKeyCodeWrapper' } }
 
 ##
 # @send-key:
@@ -930,6 +960,38 @@
   'data'  : { 'axis'    : 'InputAxis',
               'value'   : 'int' } }
 
+##
+# @InputEventKind:
+#
+# Since: 2.0
+##
+{ 'enum': 'InputEventKind',
+  'data': [ 'key', 'btn', 'rel', 'abs' ] }
+
+##
+# @InputKeyEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputKeyEventWrapper',
+  'data': { 'data': 'InputKeyEvent' } }
+
+##
+# @InputBtnEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputBtnEventWrapper',
+  'data': { 'data': 'InputBtnEvent' } }
+
+##
+# @InputMoveEventWrapper:
+#
+# Since: 2.0
+##
+{ 'struct': 'InputMoveEventWrapper',
+  'data': { 'data': 'InputMoveEvent' } }
+
 ##
 # @InputEvent:
 #
@@ -945,10 +1007,12 @@
 # Since: 2.0
 ##
 { 'union' : 'InputEvent',
-  'data'  : { 'key'     : 'InputKeyEvent',
-              'btn'     : 'InputBtnEvent',
-              'rel'     : 'InputMoveEvent',
-              'abs'     : 'InputMoveEvent' } }
+  'base': { 'type': 'InputEventKind' },
+  'discriminator': 'type',
+  'data'  : { 'key'     : 'InputKeyEventWrapper',
+              'btn'     : 'InputBtnEventWrapper',
+              'rel'     : 'InputMoveEventWrapper',
+              'abs'     : 'InputMoveEventWrapper' } }
 
 ##
 # @input-send-event:
@@ -1126,9 +1190,16 @@
 #
 ##
 { 'enum'    : 'DisplayType',
-  'data'    : [ 'default', 'none', 'gtk', 'sdl',
-                'egl-headless', 'curses', 'cocoa',
-                'spice-app'] }
+  'data'    : [
+    { 'name': 'default' },
+    { 'name': 'none' },
+    { 'name': 'gtk', 'if': 'CONFIG_GTK' },
+    { 'name': 'sdl', 'if': 'CONFIG_SDL' },
+    { 'name': 'egl-headless',
+              'if': { 'all': ['CONFIG_OPENGL', 'CONFIG_GBM'] } },
+    { 'name': 'curses', 'if': 'CONFIG_CURSES' },
+    { 'name': 'cocoa', 'if': 'CONFIG_COCOA' },
+    { 'name': 'spice-app', 'if': 'CONFIG_SPICE'} ] }
 
 ##
 # @DisplayOptions:
@@ -1152,9 +1223,13 @@
                 '*show-cursor'   : 'bool',
                 '*gl'            : 'DisplayGLMode' },
   'discriminator' : 'type',
-  'data'    : { 'gtk'            : 'DisplayGTK',
-                'curses'         : 'DisplayCurses',
-                'egl-headless'   : 'DisplayEGLHeadless'} }
+  'data'    : {
+      'gtk': { 'type': 'DisplayGTK', 'if': 'CONFIG_GTK' },
+      'curses': { 'type': 'DisplayCurses', 'if': 'CONFIG_CURSES' },
+      'egl-headless': { 'type': 'DisplayEGLHeadless',
+                        'if': { 'all': ['CONFIG_OPENGL', 'CONFIG_GBM'] } }
+  }
+}
 
 ##
 # @query-display-options:
diff --git a/qemu-edid.c b/qemu-edid.c
index 1cd6a951723..c3a9fba10dc 100644
--- a/qemu-edid.c
+++ b/qemu-edid.c
@@ -41,7 +41,8 @@ static void usage(FILE *out)
 int main(int argc, char *argv[])
 {
     FILE *outfile = NULL;
-    uint8_t blob[256];
+    uint8_t blob[512];
+    size_t size;
     uint32_t dpi = 100;
     int rc;
 
@@ -119,7 +120,8 @@ int main(int argc, char *argv[])
 
     memset(blob, 0, sizeof(blob));
     qemu_edid_generate(blob, sizeof(blob), &info);
-    fwrite(blob, sizeof(blob), 1, outfile);
+    size = qemu_edid_size(blob);
+    fwrite(blob, size, 1, outfile);
     fflush(outfile);
 
     exit(0);
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index b3620f29e50..72bcdcfbfad 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -46,9 +46,9 @@ SRST
 ERST
 
 DEF("convert", img_convert,
-    "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
+    "convert [--object objectdef] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file [-F backing_fmt]] [-o options] [-l snapshot_param] [-S sparse_size] [-r rate_limit] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename")
 SRST
-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE [-F BACKING_FMT]] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] [--salvage] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME
 ERST
 
 DEF("create", img_create,
diff --git a/qemu-img.c b/qemu-img.c
index babb5573ab6..f036a1d428d 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -82,6 +82,7 @@ enum {
     OPTION_MERGE = 274,
     OPTION_BITMAPS = 275,
     OPTION_FORCE = 276,
+    OPTION_SKIP_BROKEN = 277,
 };
 
 typedef enum OutputFormat {
@@ -900,6 +901,7 @@ static void common_block_job_cb(void *opaque, int ret)
 
 static void run_block_job(BlockJob *job, Error **errp)
 {
+    uint64_t progress_current, progress_total;
     AioContext *aio_context = blk_get_aio_context(job->blk);
     int ret = 0;
 
@@ -908,9 +910,11 @@ static void run_block_job(BlockJob *job, Error **errp)
     do {
         float progress = 0.0f;
         aio_poll(aio_context, true);
-        if (job->job.progress.total) {
-            progress = (float)job->job.progress.current /
-                       job->job.progress.total * 100.f;
+
+        progress_get_snapshot(&job->job.progress, &progress_current,
+                              &progress_total);
+        if (progress_total) {
+            progress = (float)progress_current / progress_total * 100.f;
         }
         qemu_progress_print(progress, 0);
     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
@@ -2098,7 +2102,32 @@ static int convert_do_copy(ImgConvertState *s)
     return s->ret;
 }
 
-static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
+/* Check that bitmaps can be copied, or output an error */
+static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
+{
+    BdrvDirtyBitmap *bm;
+
+    if (!bdrv_supports_persistent_dirty_bitmap(src)) {
+        error_report("Source lacks bitmap support");
+        return -1;
+    }
+    FOR_EACH_DIRTY_BITMAP(src, bm) {
+        if (!bdrv_dirty_bitmap_get_persistence(bm)) {
+            continue;
+        }
+        if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
+            error_report("Cannot copy inconsistent bitmap '%s'",
+                         bdrv_dirty_bitmap_name(bm));
+            error_printf("Try --skip-broken-bitmaps, or "
+                         "use 'qemu-img bitmap --remove' to delete it\n");
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
+                                bool skip_broken)
 {
     BdrvDirtyBitmap *bm;
     Error *err = NULL;
@@ -2110,6 +2139,10 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
             continue;
         }
         name = bdrv_dirty_bitmap_name(bm);
+        if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
+            warn_report("Skipping inconsistent bitmap '%s'", name);
+            continue;
+        }
         qmp_block_dirty_bitmap_add(dst->node_name, name,
                                    true, bdrv_dirty_bitmap_granularity(bm),
                                    true, true,
@@ -2124,6 +2157,7 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst)
                               &err);
         if (err) {
             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
+            qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
             return -1;
         }
     }
@@ -2146,10 +2180,11 @@ static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
 
 static int img_convert(int argc, char **argv)
 {
-    int c, bs_i, flags, src_flags = 0;
+    int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
-               *out_filename, *out_baseimg_param, *snapshot_name = NULL;
+               *out_filename, *out_baseimg_param, *snapshot_name = NULL,
+               *backing_fmt = NULL;
     BlockDriver *drv = NULL, *proto_drv = NULL;
     BlockDriverInfo bdi;
     BlockDriverState *out_bs;
@@ -2164,6 +2199,7 @@ static int img_convert(int argc, char **argv)
     bool force_share = false;
     bool explict_min_sparse = false;
     bool bitmaps = false;
+    bool skip_broken = false;
     int64_t rate_limit = 0;
 
     ImgConvertState s = (ImgConvertState) {
@@ -2185,9 +2221,10 @@ static int img_convert(int argc, char **argv)
             {"salvage", no_argument, 0, OPTION_SALVAGE},
             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
+            {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:",
+        c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -2217,6 +2254,9 @@ static int img_convert(int argc, char **argv)
         case 'c':
             s.compressed = true;
             break;
+        case 'F':
+            backing_fmt = optarg;
+            break;
         case 'o':
             if (accumulate_options(&options, optarg) < 0) {
                 goto fail_getopt;
@@ -2313,6 +2353,9 @@ static int img_convert(int argc, char **argv)
         case OPTION_BITMAPS:
             bitmaps = true;
             break;
+        case OPTION_SKIP_BROKEN:
+            skip_broken = true;
+            break;
         }
     }
 
@@ -2320,6 +2363,11 @@ static int img_convert(int argc, char **argv)
         out_fmt = "raw";
     }
 
+    if (skip_broken && !bitmaps) {
+        error_report("Use of --skip-broken-bitmaps requires --bitmaps");
+        goto fail_getopt;
+    }
+
     if (s.compressed && s.copy_range) {
         error_report("Cannot enable copy offloading when -c is used");
         goto fail_getopt;
@@ -2477,7 +2525,7 @@ static int img_convert(int argc, char **argv)
 
         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
-        ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
+        ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
         if (ret < 0) {
             goto out;
         }
@@ -2505,8 +2553,10 @@ static int img_convert(int argc, char **argv)
 
     if (out_baseimg_param) {
         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
-            warn_report("Deprecated use of backing file without explicit "
-                        "backing format");
+            error_report("Use of backing file requires explicit "
+                         "backing format");
+            ret = -1;
+            goto out;
         }
     }
 
@@ -2549,9 +2599,8 @@ static int img_convert(int argc, char **argv)
             ret = -1;
             goto out;
         }
-        if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) {
-            error_report("Source lacks bitmap support");
-            ret = -1;
+        ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
+        if (ret < 0) {
             goto out;
         }
     }
@@ -2583,6 +2632,14 @@ static int img_convert(int argc, char **argv)
         goto out;
     }
 
+    if (flags & BDRV_O_NOCACHE) {
+        /*
+         * If we open the target with O_DIRECT, it may be necessary to
+         * extend its size to align to the physical sector size.
+         */
+        flags |= BDRV_O_RESIZE;
+    }
+
     if (skip_create) {
         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
                             flags, writethrough, s.quiet, false);
@@ -2675,7 +2732,7 @@ static int img_convert(int argc, char **argv)
 
     /* Now copy the bitmaps */
     if (bitmaps && ret == 0) {
-        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs);
+        ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
     }
 
 out:
@@ -2977,8 +3034,9 @@ static int dump_map_entry(OutputFormat output_format, MapEntry *e,
         break;
     case OFORMAT_JSON:
         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
-               " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
-               e->start, e->length, e->depth,
+               " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
+               " \"data\": %s", e->start, e->length, e->depth,
+               e->present ? "true" : "false",
                e->zero ? "true" : "false",
                e->data ? "true" : "false");
         if (e->has_offset) {
@@ -3044,6 +3102,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
         .offset = map,
         .has_offset = has_offset,
         .depth = depth,
+        .present = !!(ret & BDRV_BLOCK_ALLOCATED),
         .has_filename = filename,
         .filename = filename,
     };
@@ -3059,6 +3118,7 @@ static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
     if (curr->zero != next->zero ||
         curr->data != next->data ||
         curr->depth != next->depth ||
+        curr->present != next->present ||
         curr->has_filename != next->has_filename ||
         curr->has_offset != next->has_offset) {
         return false;
@@ -3762,6 +3822,9 @@ static int img_rebase(int argc, char **argv)
     if (ret == -ENOSPC) {
         error_report("Could not change the backing file to '%s': No "
                      "space left in the file header", out_baseimg);
+    } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
+        error_report("Could not change the backing file to '%s': backing "
+                     "format must be specified", out_baseimg);
     } else if (ret < 0) {
         error_report("Could not change the backing file to '%s': %s",
             out_baseimg, strerror(-ret));
@@ -5299,7 +5362,6 @@ int main(int argc, char **argv)
 {
     const img_cmd_t *cmd;
     const char *cmdname;
-    Error *local_error = NULL;
     int c;
     static const struct option long_options[] = {
         {"help", no_argument, 0, 'h'},
@@ -5317,10 +5379,7 @@ int main(int argc, char **argv)
     module_call_init(MODULE_INIT_TRACE);
     qemu_init_exec_dir(argv[0]);
 
-    if (qemu_init_main_loop(&local_error)) {
-        error_report_err(local_error);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
 
     qcrypto_init(&error_fatal);
 
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 97611969cbb..46593d632d8 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -92,9 +92,19 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
         return -EINVAL;
     }
 
-    /* Request additional permissions if necessary for this command. The caller
+    /*
+     * Request additional permissions if necessary for this command. The caller
      * is responsible for restoring the original permissions afterwards if this
-     * is what it wants. */
+     * is what it wants.
+     *
+     * Coverity thinks that blk may be NULL in the following if condition. It's
+     * not so: in init_check_command() we fail if blk is NULL for command with
+     * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in
+     * qemuio_add_command() we assert that command with non-zero .perm field
+     * doesn't set this flags. So, the following assertion is to silence
+     * Coverity:
+     */
+    assert(blk || !ct->perm);
     if (ct->perm && blk_is_available(blk)) {
         uint64_t orig_perm, orig_shared_perm;
         blk_get_perm(blk, &orig_perm, &orig_shared_perm);
@@ -2106,8 +2116,6 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
     bool writethrough = !blk_enable_write_cache(blk);
     bool has_rw_option = false;
     bool has_cache_option = false;
-
-    BlockReopenQueue *brq;
     Error *local_err = NULL;
 
     while ((c = getopt(argc, argv, "c:o:rw")) != -1) {
@@ -2200,10 +2208,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
         qdict_put_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, flags & BDRV_O_NO_FLUSH);
     }
 
-    bdrv_subtree_drained_begin(bs);
-    brq = bdrv_reopen_queue(NULL, bs, opts, true);
-    bdrv_reopen_multiple(brq, &local_err);
-    bdrv_subtree_drained_end(bs);
+    bdrv_reopen(bs, opts, true, &local_err);
 
     if (local_err) {
         error_report_err(local_err);
@@ -2457,9 +2462,12 @@ static const cmdinfo_t help_cmd = {
     .oneline    = "help for one or all commands",
 };
 
+/*
+ * Called with aio context of blk acquired. Or with qemu_get_aio_context()
+ * context acquired if blk is NULL.
+ */
 int qemuio_command(BlockBackend *blk, const char *cmd)
 {
-    AioContext *ctx;
     char *input;
     const cmdinfo_t *ct;
     char **v;
@@ -2471,10 +2479,7 @@ int qemuio_command(BlockBackend *blk, const char *cmd)
     if (c) {
         ct = find_command(v[0]);
         if (ct) {
-            ctx = blk ? blk_get_aio_context(blk) : qemu_get_aio_context();
-            aio_context_acquire(ctx);
             ret = command(blk, ct, c, v);
-            aio_context_release(ctx);
         } else {
             fprintf(stderr, "command \"%s\" not found\n", v[0]);
             ret = -EINVAL;
diff --git a/qemu-io.c b/qemu-io.c
index bf902302e9a..3924639b92b 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -411,6 +411,19 @@ static void prep_fetchline(void *opaque)
     *fetchable= 1;
 }
 
+static int do_qemuio_command(const char *cmd)
+{
+    int ret;
+    AioContext *ctx =
+        qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context();
+
+    aio_context_acquire(ctx);
+    ret = qemuio_command(qemuio_blk, cmd);
+    aio_context_release(ctx);
+
+    return ret;
+}
+
 static int command_loop(void)
 {
     int i, fetchable = 0, prompted = 0;
@@ -418,7 +431,7 @@ static int command_loop(void)
     char *input;
 
     for (i = 0; !quit_qemu_io && i < ncmdline; i++) {
-        ret = qemuio_command(qemuio_blk, cmdline[i]);
+        ret = do_qemuio_command(cmdline[i]);
         if (ret < 0) {
             last_error = ret;
         }
@@ -446,7 +459,7 @@ static int command_loop(void)
         if (input == NULL) {
             break;
         }
-        ret = qemuio_command(qemuio_blk, input);
+        ret = do_qemuio_command(input);
         g_free(input);
 
         if (ret < 0) {
@@ -516,7 +529,6 @@ int main(int argc, char **argv)
     int flags = BDRV_O_UNMAP;
     int ret;
     bool writethrough = true;
-    Error *local_error = NULL;
     QDict *opts = NULL;
     const char *format = NULL;
     bool force_share = false;
@@ -616,10 +628,7 @@ int main(int argc, char **argv)
         exit(1);
     }
 
-    if (qemu_init_main_loop(&local_error)) {
-        error_report_err(local_error);
-        exit(1);
-    }
+    qemu_init_main_loop(&error_fatal);
 
     if (!trace_init_backends()) {
         exit(1);
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 93ef4e288fd..c6c20df68a4 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -43,9 +43,14 @@
 #include "io/channel-socket.h"
 #include "io/net-listener.h"
 #include "crypto/init.h"
+#include "crypto/tlscreds.h"
 #include "trace/control.h"
 #include "qemu-version.h"
 
+#ifdef CONFIG_SELINUX
+#include <selinux/selinux.h>
+#endif
+
 #ifdef __linux__
 #define HAVE_NBD_DEVICE 1
 #else
@@ -63,6 +68,7 @@
 #define QEMU_NBD_OPT_FORK          263
 #define QEMU_NBD_OPT_TLSAUTHZ      264
 #define QEMU_NBD_OPT_PID_FILE      265
+#define QEMU_NBD_OPT_SELINUX_LABEL 266
 
 #define MBR_SIZE 512
 
@@ -115,6 +121,9 @@ static void usage(const char *name)
 "  --fork                    fork off the server process and exit the parent\n"
 "                            once the server is running\n"
 "  --pid-file=PATH           store the server's process ID in the given file\n"
+#ifdef CONFIG_SELINUX
+"  --selinux-label=LABEL     set SELinux process label on listening socket\n"
+#endif
 #if HAVE_NBD_DEVICE
 "\n"
 "Kernel NBD client support:\n"
@@ -134,7 +143,9 @@ static void usage(const char *name)
 "                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
 "                            '[ID_OR_NAME]'\n"
 "  -n, --nocache             disable host cache\n"
-"      --cache=MODE          set cache mode (none, writeback, ...)\n"
+"      --cache=MODE          set cache mode used to access the disk image, the\n"
+"                            valid options are: 'none', 'writeback' (default),\n"
+"                            'writethrough', 'directsync' and 'unsafe'\n"
 "      --aio=MODE            set AIO mode (native, io_uring or threads)\n"
 "      --discard=MODE        set discard mode (ignore, unmap)\n"
 "      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
@@ -422,18 +433,12 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, bool list,
         return NULL;
     }
 
-    if (list) {
-        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-            error_setg(errp,
-                       "Expecting TLS credentials with a client endpoint");
-            return NULL;
-        }
-    } else {
-        if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-            error_setg(errp,
-                       "Expecting TLS credentials with a server endpoint");
-            return NULL;
-        }
+    if (!qcrypto_tls_creds_check_endpoint(creds,
+                                          list
+                                          ? QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT
+                                          : QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+                                          errp)) {
+        return NULL;
     }
     object_ref(obj);
     return creds;
@@ -457,6 +462,7 @@ static const char *socket_activation_validate_opts(const char *device,
                                                    const char *sockpath,
                                                    const char *address,
                                                    const char *port,
+                                                   const char *selinux,
                                                    bool list)
 {
     if (device != NULL) {
@@ -475,6 +481,10 @@ static const char *socket_activation_validate_opts(const char *device,
         return "TCP port number can't be set when using socket activation";
     }
 
+    if (selinux != NULL) {
+        return "SELinux label can't be set when using socket activation";
+    }
+
     if (list) {
         return "List mode is incompatible with socket activation";
     }
@@ -537,6 +547,8 @@ int main(int argc, char **argv)
         { "trace", required_argument, NULL, 'T' },
         { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
         { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE },
+        { "selinux-label", required_argument, NULL,
+          QEMU_NBD_OPT_SELINUX_LABEL },
         { NULL, 0, NULL, 0 }
     };
     int ch;
@@ -557,12 +569,13 @@ int main(int argc, char **argv)
     bool alloc_depth = false;
     const char *tlscredsid = NULL;
     bool imageOpts = false;
-    bool writethrough = true;
+    bool writethrough = false; /* Client will flush as needed. */
     bool fork_process = false;
     bool list = false;
     int old_stderr = -1;
     unsigned socket_activation;
     const char *pid_file_name = NULL;
+    const char *selinux_label = NULL;
     BlockExportOptions *export_opts;
 
 #ifdef CONFIG_POSIX
@@ -752,6 +765,9 @@ int main(int argc, char **argv)
         case QEMU_NBD_OPT_PID_FILE:
             pid_file_name = optarg;
             break;
+        case QEMU_NBD_OPT_SELINUX_LABEL:
+            selinux_label = optarg;
+            break;
         }
     }
 
@@ -791,6 +807,7 @@ int main(int argc, char **argv)
         /* Using socket activation - check user didn't use -p etc. */
         const char *err_msg = socket_activation_validate_opts(device, sockpath,
                                                               bindto, port,
+                                                              selinux_label,
                                                               list);
         if (err_msg != NULL) {
             error_report("%s", err_msg);
@@ -830,6 +847,18 @@ int main(int argc, char **argv)
         }
     }
 
+    if (selinux_label) {
+#ifdef CONFIG_SELINUX
+        if (sockpath == NULL && device == NULL) {
+            error_report("--selinux-label is not permitted without --socket");
+            exit(EXIT_FAILURE);
+        }
+#else
+        error_report("SELinux support not enabled in this binary");
+        exit(EXIT_FAILURE);
+#endif
+    }
+
     if (list) {
         saddr = nbd_build_socket_address(sockpath, bindto, port);
         return qemu_nbd_client_list(saddr, tlscreds, bindto);
@@ -943,6 +972,13 @@ int main(int argc, char **argv)
         } else {
             backlog = MIN(shared, SOMAXCONN);
         }
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) {
+            error_report("Cannot set SELinux socket create context to %s: %s",
+                         selinux_label, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
         saddr = nbd_build_socket_address(sockpath, bindto, port);
         if (qio_net_listener_open_sync(server, saddr, backlog,
                                        &local_err) < 0) {
@@ -950,6 +986,13 @@ int main(int argc, char **argv)
             error_report_err(local_err);
             exit(EXIT_FAILURE);
         }
+#ifdef CONFIG_SELINUX
+        if (selinux_label && setsockcreatecon_raw(NULL) == -1) {
+            error_report("Cannot clear SELinux socket create context: %s",
+                         strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+#endif
     } else {
         size_t i;
         /* See comment in check_socket_activation above. */
@@ -968,10 +1011,7 @@ int main(int argc, char **argv)
         }
     }
 
-    if (qemu_init_main_loop(&local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
     bdrv_init();
     atexit(qemu_nbd_shutdown);
 
diff --git a/qemu-options-wrapper.h b/qemu-options-wrapper.h
deleted file mode 100644
index 6f548e3922e..00000000000
--- a/qemu-options-wrapper.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-#if defined(QEMU_OPTIONS_GENERATE_ENUM)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
-    opt_enum,
-#define DEFHEADING(text)
-#define ARCHHEADING(text, arch_mask)
-
-#elif defined(QEMU_OPTIONS_GENERATE_HELP)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
-    if ((arch_mask) & arch_type)                               \
-        fputs(opt_help, stdout);
-
-#define ARCHHEADING(text, arch_mask) \
-    if ((arch_mask) & arch_type)    \
-        puts(stringify(text));
-
-#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
-
-#elif defined(QEMU_OPTIONS_GENERATE_OPTIONS)
-
-#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
-    { option, opt_arg, opt_enum, arch_mask },
-#define DEFHEADING(text)
-#define ARCHHEADING(text, arch_mask)
-
-#else
-#error "qemu-options-wrapper.h included with no option defined"
-#endif
-
-#include "qemu-options.def"
-
-#undef DEF
-#undef DEFHEADING
-#undef ARCHHEADING
-
-#undef QEMU_OPTIONS_GENERATE_ENUM
-#undef QEMU_OPTIONS_GENERATE_HELP
-#undef QEMU_OPTIONS_GENERATE_OPTIONS
diff --git a/qemu-options.hx b/qemu-options.hx
index 973f1cb2bec..e6570bc556c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -26,7 +26,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "-machine [type=]name[,prop[=value][,...]]\n"
     "                selects emulated machine ('-machine help' for list)\n"
     "                property accel=accel1[:accel2[:...]] selects accelerator\n"
-    "                supported accelerators are kvm, xen, hax, hvf, whpx or tcg (default: tcg)\n"
+    "                supported accelerators are kvm, xen, hax, hvf, nvmm, whpx or tcg (default: tcg)\n"
     "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
     "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
     "                mem-merge=on|off controls memory merge support (default: on)\n"
@@ -58,7 +58,7 @@ SRST
 
     ``accel=accels1[:accels2[:...]]``
         This is used to enable an accelerator. Depending on the target
-        architecture, kvm, xen, hax, hvf, whpx or tcg can be available.
+        architecture, kvm, xen, hax, hvf, nvmm, whpx or tcg can be available.
         By default, tcg is used. If there is more than one accelerator
         specified, the next one is used if the previous one fails to
         initialize.
@@ -98,32 +98,42 @@ SRST
         Enables or disables ACPI Heterogeneous Memory Attribute Table
         (HMAT) support. The default is off.
 
-     ``memory-backend='id'``
+    ``memory-backend='id'``
         An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
         Allows to use a memory backend as main RAM.
 
         For example:
         ::
-        -object memory-backend-file,id=pc.ram,size=512M,mem-path=/hugetlbfs,prealloc=on,share=on
-        -machine memory-backend=pc.ram
-        -m 512M
+
+            -object memory-backend-file,id=pc.ram,size=512M,mem-path=/hugetlbfs,prealloc=on,share=on
+            -machine memory-backend=pc.ram
+            -m 512M
 
         Migration compatibility note:
-        a) as backend id one shall use value of 'default-ram-id', advertised by
-        machine type (available via ``query-machines`` QMP command), if migration
-        to/from old QEMU (<5.0) is expected.
-        b) for machine types 4.0 and older, user shall
-        use ``x-use-canonical-path-for-ramblock-id=off`` backend option
-        if migration to/from old QEMU (<5.0) is expected.
+
+        * as backend id one shall use value of 'default-ram-id', advertised by
+          machine type (available via ``query-machines`` QMP command), if migration
+          to/from old QEMU (<5.0) is expected.
+        * for machine types 4.0 and older, user shall
+          use ``x-use-canonical-path-for-ramblock-id=off`` backend option
+          if migration to/from old QEMU (<5.0) is expected.
+
         For example:
         ::
-        -object memory-backend-ram,id=pc.ram,size=512M,x-use-canonical-path-for-ramblock-id=off
-        -machine memory-backend=pc.ram
-        -m 512M
+
+            -object memory-backend-ram,id=pc.ram,size=512M,x-use-canonical-path-for-ramblock-id=off
+            -machine memory-backend=pc.ram
+            -m 512M
 ERST
 
-HXCOMM Deprecated by -machine
-DEF("M", HAS_ARG, QEMU_OPTION_M, "", QEMU_ARCH_ALL)
+DEF("M", HAS_ARG, QEMU_OPTION_M,
+    "                sgx-epc.0.memdev=memid\n",
+    QEMU_ARCH_ALL)
+
+SRST
+``sgx-epc.0.memdev=@var{memid}``
+    Define an SGX EPC section.
+ERST
 
 DEF("cpu", HAS_ARG, QEMU_OPTION_cpu,
     "-cpu cpu        select CPU ('-cpu help' for list)\n", QEMU_ARCH_ALL)
@@ -135,17 +145,18 @@ ERST
 
 DEF("accel", HAS_ARG, QEMU_OPTION_accel,
     "-accel [accel=]accelerator[,prop[=value][,...]]\n"
-    "                select accelerator (kvm, xen, hax, hvf, whpx or tcg; use 'help' for a list)\n"
+    "                select accelerator (kvm, xen, hax, hvf, nvmm, whpx or tcg; use 'help' for a list)\n"
     "                igd-passthru=on|off (enable Xen integrated Intel graphics passthrough, default=off)\n"
     "                kernel-irqchip=on|off|split controls accelerated irqchip support (default=on)\n"
     "                kvm-shadow-mem=size of KVM shadow MMU in bytes\n"
     "                split-wx=on|off (enable TCG split w^x mapping)\n"
     "                tb-size=n (TCG translation block cache size)\n"
+    "                dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n"
     "                thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL)
 SRST
 ``-accel name[,prop=value[,...]]``
     This is used to enable an accelerator. Depending on the target
-    architecture, kvm, xen, hax, hvf, whpx or tcg can be available. By
+    architecture, kvm, xen, hax, hvf, nvmm, whpx or tcg can be available. By
     default, tcg is used. If there is more than one accelerator
     specified, the next one is used if the previous one fails to
     initialize.
@@ -181,28 +192,60 @@ SRST
         where both the back-end and front-ends support it and no
         incompatible TCG features have been enabled (e.g.
         icount/replay).
+
+    ``dirty-ring-size=n``
+        When the KVM accelerator is used, it controls the size of the per-vCPU
+        dirty page ring buffer (number of entries for each vCPU). It should
+        be a value that is power of two, and it should be 1024 or bigger (but
+        still less than the maximum value that the kernel supports).  4096
+        could be a good initial value if you have no idea which is the best.
+        Set this value to 0 to disable the feature.  By default, this feature
+        is disabled (dirty-ring-size=0).  When enabled, KVM will instead
+        record dirty pages in a bitmap.
+
 ERST
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
-    "-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,dies=dies][,sockets=sockets]\n"
+    "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]\n"
     "                set the number of CPUs to 'n' [default=1]\n"
-    "                maxcpus= maximum number of total cpus, including\n"
+    "                maxcpus= maximum number of total CPUs, including\n"
     "                offline CPUs for hotplug, etc\n"
-    "                cores= number of CPU cores on one socket (for PC, it's on one die)\n"
-    "                threads= number of threads on one CPU core\n"
+    "                sockets= number of discrete sockets in the system\n"
     "                dies= number of CPU dies on one socket (for PC only)\n"
-    "                sockets= number of discrete sockets in the system\n",
+    "                cores= number of CPU cores on one socket (for PC, it's on one die)\n"
+    "                threads= number of threads on one CPU core\n",
         QEMU_ARCH_ALL)
 SRST
-``-smp [cpus=]n[,cores=cores][,threads=threads][,dies=dies][,sockets=sockets][,maxcpus=maxcpus]``
-    Simulate an SMP system with n CPUs. On the PC target, up to 255 CPUs
-    are supported. On Sparc32 target, Linux limits the number of usable
-    CPUs to 4. For the PC target, the number of cores per die, the
-    number of threads per cores, the number of dies per packages and the
-    total number of sockets can be specified. Missing values will be
-    computed. If any on the three values is given, the total number of
-    CPUs n can be omitted. maxcpus specifies the maximum number of
-    hotpluggable CPUs.
+``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]``
+    Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
+    the machine type board. On boards supporting CPU hotplug, the optional
+    '\ ``maxcpus``\ ' parameter can be set to enable further CPUs to be
+    added at runtime. When both parameters are omitted, the maximum number
+    of CPUs will be calculated from the provided topology members and the
+    initial CPU count will match the maximum number. When only one of them
+    is given then the omitted one will be set to its counterpart's value.
+    Both parameters may be specified, but the maximum number of CPUs must
+    be equal to or greater than the initial CPU count. Both parameters are
+    subject to an upper limit that is determined by the specific machine
+    type chosen.
+
+    To control reporting of CPU topology information, the number of sockets,
+    dies per socket, cores per die, and threads per core can be specified.
+    The sum `` sockets * cores * dies * threads `` must be equal to the
+    maximum CPU count. CPU targets may only support a subset of the topology
+    parameters. Where a CPU target does not support use of a particular
+    topology parameter, its value should be assumed to be 1 for the purpose
+    of computing the CPU maximum count.
+
+    Either the initial CPU count, or at least one of the topology parameters
+    must be specified. The specified parameters must be greater than zero,
+    explicit configuration like "cpus=0" is not allowed. Values for any
+    omitted parameters will be computed from those which are given.
+    Historically preference was given to the coarsest topology parameters
+    when computing missing values (ie sockets preferred over cores, which
+    were preferred over threads), however, this behaviour is considered
+    liable to change. Prior to 6.2 the preference was sockets over cores
+    over threads. Since 6.2 the preference is cores over sockets over threads.
 ERST
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
@@ -358,7 +401,7 @@ SRST
         -m 2G \
         -object memory-backend-ram,size=1G,id=m0 \
         -object memory-backend-ram,size=1G,id=m1 \
-        -smp 2 \
+        -smp 2,sockets=2,maxcpus=2 \
         -numa node,nodeid=0,memdev=m0 \
         -numa node,nodeid=1,memdev=m1,initiator=0 \
         -numa cpu,node-id=0,socket-id=0 \
@@ -914,6 +957,39 @@ SRST
 
 ``-device pci-ipmi-bt,bmc=id``
     Like the KCS interface, but defines a BT interface on the PCI bus.
+
+``-device intel-iommu[,option=...]``
+    This is only supported by ``-machine q35``, which will enable Intel VT-d
+    emulation within the guest.  It supports below options:
+
+    ``intremap=on|off`` (default: auto)
+        This enables interrupt remapping feature.  It's required to enable
+        complete x2apic.  Currently it only supports kvm kernel-irqchip modes
+        ``off`` or ``split``, while full kernel-irqchip is not yet supported.
+        The default value is "auto", which will be decided by the mode of
+        kernel-irqchip.
+
+    ``caching-mode=on|off`` (default: off)
+        This enables caching mode for the VT-d emulated device.  When
+        caching-mode is enabled, each guest DMA buffer mapping will generate an
+        IOTLB invalidation from the guest IOMMU driver to the vIOMMU device in
+        a synchronous way.  It is required for ``-device vfio-pci`` to work
+        with the VT-d device, because host assigned devices requires to setup
+        the DMA mapping on the host before guest DMA starts.
+
+    ``device-iotlb=on|off`` (default: off)
+        This enables device-iotlb capability for the emulated VT-d device.  So
+        far virtio/vhost should be the only real user for this parameter,
+        paired with ats=on configured for the device.
+
+    ``aw-bits=39|48`` (default: 39)
+        This decides the address width of IOVA address space.  The address
+        space has 39 bits width for 3-level IOMMU page tables, and 48 bits for
+        4-level IOMMU page tables.
+
+    Please also refer to the wiki page for general scenarios of VT-d
+    emulation in QEMU: https://wiki.qemu.org/Features/VT-d.
+
 ERST
 
 DEF("name", HAS_ARG, QEMU_OPTION_name,
@@ -1771,11 +1847,12 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
     "-display spice-app[,gl=on|off]\n"
 #endif
 #if defined(CONFIG_SDL)
-    "-display sdl[,alt_grab=on|off][,ctrl_grab=on|off]\n"
-    "            [,window_close=on|off][,gl=on|core|es|off]\n"
+    "-display sdl[,alt_grab=on|off][,ctrl_grab=on|off][,gl=on|core|es|off]\n"
+    "            [,grab-mod=<mod>][,show-cursor=on|off][,window-close=on|off]\n"
 #endif
 #if defined(CONFIG_GTK)
-    "-display gtk[,grab_on_hover=on|off][,gl=on|off]|\n"
+    "-display gtk[,full-screen=on|off][,gl=on|off][,grab-on-hover=on|off]\n"
+    "            [,show-cursor=on|off][,window-close=on|off]\n"
 #endif
 #if defined(CONFIG_VNC)
     "-display vnc=<display>[,<optargs>]\n"
@@ -1807,11 +1884,48 @@ SRST
     old style -sdl/-curses/... options. Use ``-display help`` to list
     the available display types. Valid values for type are
 
+    ``spice-app[,gl=on|off]``
+        Start QEMU as a Spice server and launch the default Spice client
+        application. The Spice server will redirect the serial consoles
+        and QEMU monitors. (Since 4.0)
+
     ``sdl``
         Display video output via SDL (usually in a separate graphics
         window; see the SDL documentation for other possibilities).
+        Valid parameters are:
 
-    ``curses``
+        ``grab-mod=<mods>`` : Used to select the modifier keys for toggling
+        the mouse grabbing in conjunction with the "g" key. ``<mods>`` can be
+        either ``lshift-lctrl-lalt`` or ``rctrl``.
+
+        ``alt_grab=on|off`` : Use Control+Alt+Shift-g to toggle mouse grabbing.
+        This parameter is deprecated - use ``grab-mod`` instead.
+
+        ``ctrl_grab=on|off`` : Use Right-Control-g to toggle mouse grabbing.
+        This parameter is deprecated - use ``grab-mod`` instead.
+
+        ``gl=on|off|core|es`` : Use OpenGL for displaying
+
+        ``show-cursor=on|off`` :  Force showing the mouse cursor
+
+        ``window-close=on|off`` : Allow to quit qemu with window close button
+
+    ``gtk``
+        Display video output in a GTK window. This interface provides
+        drop-down menus and other UI elements to configure and control
+        the VM during runtime. Valid parameters are:
+
+        ``full-screen=on|off`` : Start in fullscreen mode
+
+        ``gl=on|off`` : Use OpenGL for displaying
+
+        ``grab-on-hover=on|off`` : Grab keyboard input on mouse hover
+
+        ``show-cursor=on|off`` :  Force showing the mouse cursor
+
+        ``window-close=on|off`` : Allow to quit qemu with window close button
+
+    ``curses[,charset=<encoding>]``
         Display video output via curses. For graphics device models
         which support a text mode, QEMU can display this output using a
         curses/ncurses interface. Nothing is displayed when the graphics
@@ -1822,6 +1936,14 @@ SRST
         ``charset=CP850`` for IBM CP850 encoding. The default is
         ``CP437``.
 
+    ``egl-headless[,rendernode=<file>]``
+        Offload all OpenGL operations to a local DRI device. For any
+        graphical display, this display needs to be paired with either
+        VNC or SPICE displays.
+
+    ``vnc=<display>``
+        Start a VNC server on display <display>
+
     ``none``
         Do not display video output. The guest will still see an
         emulated graphics card, but its output will not be displayed to
@@ -1829,24 +1951,6 @@ SRST
         that it only affects what is done with video output; -nographic
         also changes the destination of the serial and parallel port
         data.
-
-    ``gtk``
-        Display video output in a GTK window. This interface provides
-        drop-down menus and other UI elements to configure and control
-        the VM during runtime.
-
-    ``vnc``
-        Start a VNC server on display <arg>
-
-    ``egl-headless``
-        Offload all OpenGL operations to a local DRI device. For any
-        graphical display, this display needs to be paired with either
-        VNC or SPICE displays.
-
-    ``spice-app``
-        Start QEMU as a Spice server and launch the default Spice client
-        application. The Spice server will redirect the serial consoles
-        and QEMU monitors. (Since 4.0)
 ERST
 
 DEF("nographic", 0, QEMU_OPTION_nographic,
@@ -1883,7 +1987,8 @@ SRST
 ``-alt-grab``
     Use Ctrl-Alt-Shift to grab mouse (instead of Ctrl-Alt). Note that
     this also affects the special keys (for fullscreen, monitor-mode
-    switching, etc).
+    switching, etc). This option is deprecated - please use
+    ``-display sdl,grab-mod=lshift-lctrl-lalt`` instead.
 ERST
 
 DEF("ctrl-grab", 0, QEMU_OPTION_ctrl_grab,
@@ -1893,14 +1998,16 @@ SRST
 ``-ctrl-grab``
     Use Right-Ctrl to grab mouse (instead of Ctrl-Alt). Note that this
     also affects the special keys (for fullscreen, monitor-mode
-    switching, etc).
+    switching, etc). This option is deprecated - please use
+    ``-display sdl,grab-mod=rctrl`` instead.
 ERST
 
 DEF("no-quit", 0, QEMU_OPTION_no_quit,
-    "-no-quit        disable SDL window close capability\n", QEMU_ARCH_ALL)
+    "-no-quit        disable SDL/GTK window close capability (deprecated)\n", QEMU_ARCH_ALL)
 SRST
 ``-no-quit``
-    Disable SDL window close capability.
+    Disable window close capability (SDL and GTK only). This option is
+    deprecated, please use ``-display ...,window-close=off`` instead.
 ERST
 
 DEF("sdl", 0, QEMU_OPTION_sdl,
@@ -2370,7 +2477,9 @@ DEF("smbios", HAS_ARG, QEMU_OPTION_smbios,
     "                specify SMBIOS type 11 fields\n"
     "-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str]\n"
     "               [,asset=str][,part=str][,speed=%d]\n"
-    "                specify SMBIOS type 17 fields\n",
+    "                specify SMBIOS type 17 fields\n"
+    "-smbios type=41[,designation=str][,kind=str][,instance=%d][,pcidev=str]\n"
+    "                specify SMBIOS type 41 fields\n",
     QEMU_ARCH_I386 | QEMU_ARCH_ARM)
 SRST
 ``-smbios file=binary``
@@ -2432,6 +2541,32 @@ SRST
 
 ``-smbios type=17[,loc_pfx=str][,bank=str][,manufacturer=str][,serial=str][,asset=str][,part=str][,speed=%d]``
     Specify SMBIOS type 17 fields
+
+``-smbios type=41[,designation=str][,kind=str][,instance=%d][,pcidev=str]``
+    Specify SMBIOS type 41 fields
+
+    This argument can be repeated multiple times.  Its main use is to allow network interfaces be created
+    as ``enoX`` on Linux, with X being the instance number, instead of the name depending on the interface
+    position on the PCI bus.
+
+    Here is an example of use:
+
+    .. parsed-literal::
+
+        -netdev user,id=internet \\
+        -device virtio-net-pci,mac=50:54:00:00:00:42,netdev=internet,id=internet-dev \\
+        -smbios type=41,designation='Onboard LAN',instance=1,kind=ethernet,pcidev=internet-dev
+
+    In the guest OS, the device should then appear as ``eno1``:
+
+    ..parsed-literal::
+
+         $ ip -brief l
+         lo               UNKNOWN        00:00:00:00:00:00 <LOOPBACK,UP,LOWER_UP>
+         eno1             UP             50:54:00:00:00:42 <BROADCAST,MULTICAST,UP,LOWER_UP>
+
+    Currently, the PCI device has to be attached to the root bus.
+
 ERST
 
 DEFHEADING()
@@ -3067,7 +3202,7 @@ DEFHEADING(Character device options:)
 DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
     "-chardev help\n"
     "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
-    "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off][,reconnect=seconds]\n"
+    "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4=on|off][,ipv6=on|off][,nodelay=on|off]\n"
     "         [,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds][,mux=on|off]\n"
     "         [,logfile=PATH][,logappend=on|off][,tls-creds=ID][,tls-authz=ID] (tcp)\n"
     "-chardev socket,id=id,path=path[,server=on|off][,wait=on|off][,telnet=on|off][,websocket=on|off][,reconnect=seconds]\n"
@@ -3506,7 +3641,9 @@ DEFHEADING(Debug/Expert options:)
 
 DEF("compat", HAS_ARG, QEMU_OPTION_compat,
     "-compat [deprecated-input=accept|reject|crash][,deprecated-output=accept|hide]\n"
-    "                Policy for handling deprecated management interfaces\n",
+    "                Policy for handling deprecated management interfaces\n"
+    "-compat [unstable-input=accept|reject|crash][,unstable-output=accept|hide]\n"
+    "                Policy for handling unstable management interfaces\n",
     QEMU_ARCH_ALL)
 SRST
 ``-compat [deprecated-input=@var{input-policy}][,deprecated-output=@var{output-policy}]``
@@ -3524,6 +3661,22 @@ SRST
         Suppress deprecated command results and events
 
     Limitation: covers only syntactic aspects of QMP.
+
+``-compat [unstable-input=@var{input-policy}][,unstable-output=@var{output-policy}]``
+    Set policy for handling unstable management interfaces (experimental):
+
+    ``unstable-input=accept`` (default)
+        Accept unstable commands and arguments
+    ``unstable-input=reject``
+        Reject unstable commands and arguments
+    ``unstable-input=crash``
+        Crash on unstable commands and arguments
+    ``unstable-output=accept`` (default)
+        Emit unstable command results and events
+    ``unstable-output=hide``
+        Suppress unstable command results and events
+
+    Limitation: covers only syntactic aspects of QMP.
 ERST
 
 DEF("fw_cfg", HAS_ARG, QEMU_OPTION_fwcfg,
@@ -3759,8 +3912,11 @@ DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
     "-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]\n", QEMU_ARCH_ALL)
 SRST
 ``-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]``
-    Setup monitor on chardev name. ``pretty`` is only valid when
-    ``mode=control``, turning on JSON pretty printing to ease
+    Setup monitor on chardev name. ``mode=control`` configures 
+    a QMP monitor (a JSON RPC-style protocol) and it is not the
+    same as HMP, the human monitor that has a "(qemu)" prompt.
+    ``pretty`` is only valid when ``mode=control``, 
+    turning on JSON pretty printing to ease
     human reading and debugging.
 ERST
 
@@ -4353,7 +4509,7 @@ SRST
 ERST
 DEF("semihosting", 0, QEMU_OPTION_semihosting,
     "-semihosting    semihosting mode\n",
-    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+    QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
     QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2 | QEMU_ARCH_RISCV)
 SRST
 ``-semihosting``
@@ -4368,7 +4524,7 @@ ERST
 DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config,
     "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]\n" \
     "                semihosting configuration\n",
-QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_LM32 |
+QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA |
 QEMU_ARCH_MIPS | QEMU_ARCH_NIOS2 | QEMU_ARCH_RISCV)
 SRST
 ``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,arg=str[,...]]``
@@ -4481,19 +4637,18 @@ SRST
 
 ERST
 DEF("plugin", HAS_ARG, QEMU_OPTION_plugin,
-    "-plugin [file=]<file>[,arg=<string>]\n"
+    "-plugin [file=]<file>[,<argname>=<argvalue>]\n"
     "                load a plugin\n",
     QEMU_ARCH_ALL)
 SRST
-``-plugin file=file[,arg=string]``
+``-plugin file=file[,argname=argvalue]``
     Load a plugin.
 
     ``file=file``
         Load the given plugin from a shared library file.
 
-    ``arg=string``
-        Argument string passed to the plugin. (Can be given multiple
-        times.)
+    ``argname=argvalue``
+        Argument passed to the plugin. (Can be given multiple times.)
 ERST
 
 HXCOMM Internal use
@@ -4573,11 +4728,11 @@ SRST
         the guest RAM with huge pages.
 
         The ``id`` parameter is a unique ID that will be used to
-        reference this memory region when configuring the ``-numa``
-        argument.
+        reference this memory region in other parameters, e.g. ``-numa``,
+        ``-device nvdimm``, etc.
 
         The ``size`` option provides the size of the memory region, and
-        accepts common suffixes, eg ``500M``.
+        accepts common suffixes, e.g. ``500M``.
 
         The ``mem-path`` provides the path to either a shared memory or
         huge page filesystem mount.
@@ -5099,7 +5254,7 @@ SRST
                  -object secret,id=sec0,keyid=secmaster0,format=base64,\\
                      data=$SECRET,iv=$(<iv.b64)
 
-    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file]``
+    ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file,kernel-hashes=on|off]``
         Create a Secure Encrypted Virtualization (SEV) guest object,
         which can be used to provide the guest memory encryption support
         on AMD processors.
@@ -5139,6 +5294,10 @@ SRST
         session with the guest owner to negotiate keys used for
         attestation. The file must be encoded in base64.
 
+        The ``kernel-hashes`` adds the hashes of given kernel/initrd/
+        cmdline to a designated guest firmware page for measured Linux
+        boot with -kernel. The default is off. (Since 6.2)
+
         e.g to launch a SEV guest
 
         .. parsed-literal::
@@ -5254,7 +5413,7 @@ SRST
 
             CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB
 
-    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink``
+    ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch``
         Creates a dedicated event loop thread that devices can be
         assigned to. This is known as an IOThread. By default device
         emulation happens in vCPU threads or the main event loop thread.
@@ -5290,7 +5449,11 @@ SRST
         the polling time when the algorithm detects it is spending too
         long polling without encountering events.
 
-        The polling parameters can be modified at run-time using the
+        The ``aio-max-batch`` parameter is the maximum number of requests
+        in a batch for the AIO engine, 0 means that the engine will use
+        its default.
+
+        The IOThread parameters can be modified at run-time using the
         ``qom-set`` command (where ``iothread1`` is the IOThread's
         ``id``):
 
@@ -5301,3 +5464,7 @@ ERST
 
 
 HXCOMM This is the last statement. Insert new options before this line!
+
+#undef DEF
+#undef DEFHEADING
+#undef ARCHHEADING
diff --git a/qemu.sasl b/qemu.sasl
index fb8a92ba587..851acc7e8f2 100644
--- a/qemu.sasl
+++ b/qemu.sasl
@@ -19,26 +19,23 @@ mech_list: gssapi
 
 # If using TLS with VNC, or a UNIX socket only, it is possible to
 # enable plugins which don't provide session encryption. The
-# 'scram-sha-1' plugin allows plain username/password authentication
+# 'scram-sha-256' plugin allows plain username/password authentication
 # to be performed
 #
-#mech_list: scram-sha-1
+#mech_list: scram-sha-256
 
 # You can also list many mechanisms at once, and the VNC server will
 # negotiate which to use by considering the list enabled on the VNC
 # client.
-#mech_list: scram-sha-1 gssapi
+#mech_list: scram-sha-256 gssapi
 
-# Some older builds of MIT kerberos on Linux ignore this option &
-# instead need KRB5_KTNAME env var.
-# For modern Linux, and other OS, this should be sufficient
-#
 # This file needs to be populated with the service principal that
 # was created on the Kerberos v5 server. If switching to a non-gssapi
 # mechanism this can be commented out.
 keytab: /etc/qemu/krb5.tab
 
-# If using scram-sha-1 for username/passwds, then this is the file
+# If using scram-sha-256 for username/passwds, then this is the file
 # containing the passwds. Use 'saslpasswd2 -a qemu [username]'
-# to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it
+# to add entries, and 'sasldblistusers2 -f [sasldb_path]' to browse it.
+# Note that this file stores passwords in clear text.
 #sasldb_path: /etc/qemu/passwd.db
diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c
index 2dda136d64f..f3a580b8cc3 100644
--- a/qga/commands-posix-ssh.c
+++ b/qga/commands-posix-ssh.c
@@ -45,8 +45,6 @@ get_passwd_entry(const char *username, Error **errp)
     g_autoptr(GError) err = NULL;
     struct passwd *p;
 
-    ERRP_GUARD();
-
     p = g_unix_get_passwd_entry_qemu(username, &err);
     if (p == NULL) {
         error_setg(errp, "failed to lookup user '%s': %s",
@@ -61,8 +59,6 @@ static bool
 mkdir_for_user(const char *path, const struct passwd *p,
                mode_t mode, Error **errp)
 {
-    ERRP_GUARD();
-
     if (g_mkdir(path, mode) == -1) {
         error_setg(errp, "failed to create directory '%s': %s",
                    path, g_strerror(errno));
@@ -87,8 +83,6 @@ mkdir_for_user(const char *path, const struct passwd *p,
 static bool
 check_openssh_pub_key(const char *key, Error **errp)
 {
-    ERRP_GUARD();
-
     /* simple sanity-check, we may want more? */
     if (!key || key[0] == '#' || strchr(key, '\n')) {
         error_setg(errp, "invalid OpenSSH public key: '%s'", key);
@@ -104,8 +98,6 @@ check_openssh_pub_keys(strList *keys, size_t *nkeys, Error **errp)
     size_t n = 0;
     strList *k;
 
-    ERRP_GUARD();
-
     for (k = keys; k != NULL; k = k->next) {
         if (!check_openssh_pub_key(k->value, errp)) {
             return false;
@@ -126,8 +118,6 @@ write_authkeys(const char *path, const GStrv keys,
     g_autofree char *contents = NULL;
     g_autoptr(GError) err = NULL;
 
-    ERRP_GUARD();
-
     contents = g_strjoinv("\n", keys);
     if (!g_file_set_contents(path, contents, -1, &err)) {
         error_setg(errp, "failed to write to '%s': %s", path, err->message);
@@ -155,8 +145,6 @@ read_authkeys(const char *path, Error **errp)
     g_autoptr(GError) err = NULL;
     g_autofree char *contents = NULL;
 
-    ERRP_GUARD();
-
     if (!g_file_get_contents(path, &contents, NULL, &err)) {
         error_setg(errp, "failed to read '%s': %s", path, err->message);
         return NULL;
@@ -178,7 +166,6 @@ qmp_guest_ssh_add_authorized_keys(const char *username, strList *keys,
     strList *k;
     size_t nkeys, nauthkeys;
 
-    ERRP_GUARD();
     reset = has_reset && reset;
 
     if (!check_openssh_pub_keys(keys, &nkeys, errp)) {
@@ -228,8 +215,6 @@ qmp_guest_ssh_remove_authorized_keys(const char *username, strList *keys,
     GStrv a;
     size_t nkeys = 0;
 
-    ERRP_GUARD();
-
     if (!check_openssh_pub_keys(keys, NULL, errp)) {
         return;
     }
@@ -277,8 +262,6 @@ qmp_guest_ssh_get_authorized_keys(const char *username, Error **errp)
     g_autoptr(GuestAuthorizedKeys) ret = NULL;
     int i;
 
-    ERRP_GUARD();
-
     p = get_passwd_entry(username, errp);
     if (p == NULL) {
         return NULL;
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 27baf17d6c1..4e84afd83be 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -976,7 +976,6 @@ static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp)
 
 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
 {
-    ERRP_GUARD();
     GuestDiskInfoList *ret = NULL;
     HDEVINFO dev_info;
     SP_DEVICE_INTERFACE_DATA dev_iface_data;
@@ -1091,7 +1090,7 @@ static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp)
     size_t len;
     uint64_t i64FreeBytesToCaller, i64TotalBytes, i64FreeBytes;
     GuestFilesystemInfo *fs = NULL;
-    HANDLE hLocalDiskHandle = NULL;
+    HANDLE hLocalDiskHandle = INVALID_HANDLE_VALUE;
 
     GetVolumePathNamesForVolumeName(guid, (LPCH)&mnt, 0, &info_size);
     if (GetLastError() != ERROR_MORE_DATA) {
@@ -1149,7 +1148,9 @@ static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp)
     fs->type = g_strdup(fs_name);
     fs->disk = build_guest_disk_info(guid, errp);
 free:
-    CloseHandle(hLocalDiskHandle);
+    if (hLocalDiskHandle != INVALID_HANDLE_VALUE) {
+        CloseHandle(hLocalDiskHandle);
+    }
     g_free(mnt_point);
     return fs;
 }
@@ -2166,9 +2167,10 @@ typedef struct _ga_win_10_0_server_t {
     char const *version_id;
 } ga_win_10_0_server_t;
 
-static ga_win_10_0_server_t const WIN_10_0_SERVER_VERSION_MATRIX[3] = {
+static ga_win_10_0_server_t const WIN_10_0_SERVER_VERSION_MATRIX[4] = {
     {14393, "Microsoft Windows Server 2016",    "2016"},
     {17763, "Microsoft Windows Server 2019",    "2019"},
+    {20344, "Microsoft Windows Server 2022",    "2022"},
     {0, 0}
 };
 
@@ -2228,7 +2230,7 @@ static char *ga_get_win_name(OSVERSIONINFOEXW const *os_version, bool id)
 
 static char *ga_get_win_product_name(Error **errp)
 {
-    HKEY key = NULL;
+    HKEY key = INVALID_HANDLE_VALUE;
     DWORD size = 128;
     char *result = g_malloc0(size);
     LONG err = ERROR_SUCCESS;
@@ -2238,7 +2240,8 @@ static char *ga_get_win_product_name(Error **errp)
                       &key);
     if (err != ERROR_SUCCESS) {
         error_setg_win32(errp, err, "failed to open registry key");
-        goto fail;
+        g_free(result);
+        return NULL;
     }
 
     err = RegQueryValueExA(key, "ProductName", NULL, NULL,
@@ -2259,9 +2262,13 @@ static char *ga_get_win_product_name(Error **errp)
         goto fail;
     }
 
+    RegCloseKey(key);
     return result;
 
 fail:
+    if (key != INVALID_HANDLE_VALUE) {
+        RegCloseKey(key);
+    }
     g_free(result);
     return NULL;
 }
@@ -2451,7 +2458,7 @@ GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
             continue;
         }
         for (j = 0; hw_ids[j] != NULL; j++) {
-            GMatchInfo *match_info;
+            g_autoptr(GMatchInfo) match_info;
             GuestDeviceIdPCI *id;
             if (!g_regex_match(device_pci_re, hw_ids[j], 0, &match_info)) {
                 continue;
@@ -2468,7 +2475,6 @@ GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
             id->vendor_id = g_ascii_strtoull(vendor_id, NULL, 16);
             id->device_id = g_ascii_strtoull(device_id, NULL, 16);
 
-            g_match_info_free(match_info);
             break;
         }
         if (skip) {
diff --git a/qga/commands.c b/qga/commands.c
index a6491d2cf8f..80501e4a737 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -402,7 +402,7 @@ GuestExec *qmp_guest_exec(const char *path,
     GIOChannel *in_ch, *out_ch, *err_ch;
     GSpawnFlags flags;
     bool has_output = (has_capture_output && capture_output);
-    uint8_t *input = NULL;
+    g_autofree uint8_t *input = NULL;
     size_t ninput = 0;
 
     arglist.value = (char *)path;
@@ -441,7 +441,7 @@ GuestExec *qmp_guest_exec(const char *path,
     g_child_watch_add(pid, guest_exec_child_watch, gei);
 
     if (has_input_data) {
-        gei->in.data = input;
+        gei->in.data = g_steal_pointer(&input);
         gei->in.size = ninput;
 #ifdef G_OS_WIN32
         in_ch = g_io_channel_win32_new_fd(in_fd);
diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs
index 9cb4c3d7330..0950e8c6bec 100644
--- a/qga/installer/qemu-ga.wxs
+++ b/qga/installer/qemu-ga.wxs
@@ -31,7 +31,7 @@
   <?endif?>
 
   <?if $(var.Arch) = "32"?>
-    <?define ArchLib=libgcc_s_sjlj-1.dll?>
+    <?define ArchLib=libgcc_s_dw2-1.dll?>
     <?define GaProgramFilesFolder="ProgramFilesFolder" ?>
   <?endif?>
 
@@ -84,6 +84,9 @@
             <ServiceControl Id="StartService" Start="install" Stop="both" Remove="uninstall" Name="QEMU-GA" Wait="yes" />
           </Component>
           <?ifdef var.InstallVss?>
+          <Component Id="libstdc++_6_lib" Guid="{55E737B5-9127-4A11-9FC3-A29367714574}">
+            <File Id="libstdc++-6.lib" Name="libstdc++-6.dll" Source="$(var.Mingw_bin)/libstdc++-6.dll" KeyPath="yes" DiskId="1"/>
+          </Component>
           <Component Id="qga_vss_dll" Guid="{CB19C453-FABB-4BB1-ABAB-6B74F687BFBB}">
             <File Id="qga_vss.dll" Name="qga-vss.dll" Source="$(env.BUILD_DIR)/qga/vss-win32/qga-vss.dll" KeyPath="yes" DiskId="1"/>
           </Component>
@@ -164,6 +167,7 @@
     <Feature Id="QEMUFeature" Title="QEMU Guest Agent" Level="1">
       <ComponentRef Id="qemu_ga" />
       <?ifdef var.InstallVss?>
+      <ComponentRef Id="libstdc++_6_lib" />
       <ComponentRef Id="qga_vss_dll" />
       <ComponentRef Id="qga_vss_tlb" />
       <?endif?>
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index fb17eebde3a..94e4aacdcc6 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -1140,6 +1140,7 @@
 
 ##
 # @GuestExec:
+#
 # @pid: pid of child process in guest OS
 #
 # Since: 2.5
@@ -1171,6 +1172,7 @@
 
 ##
 # @GuestHostName:
+#
 # @host-name: Fully qualified domain name of the guest OS
 #
 # Since: 2.10
@@ -1197,6 +1199,7 @@
 
 ##
 # @GuestUser:
+#
 # @user: Username
 # @domain: Logon domain (windows only)
 # @login-time: Time of login of this user on the computer. If multiple
@@ -1380,7 +1383,7 @@
   'data': {
       'keys': ['str']
   },
-  'if': 'defined(CONFIG_POSIX)' }
+  'if': 'CONFIG_POSIX' }
 
 
 ##
@@ -1398,7 +1401,7 @@
 { 'command': 'guest-ssh-get-authorized-keys',
   'data': { 'username': 'str' },
   'returns': 'GuestAuthorizedKeys',
-  'if': 'defined(CONFIG_POSIX)' }
+  'if': 'CONFIG_POSIX' }
 
 ##
 # @guest-ssh-add-authorized-keys:
@@ -1416,7 +1419,7 @@
 ##
 { 'command': 'guest-ssh-add-authorized-keys',
   'data': { 'username': 'str', 'keys': ['str'], '*reset': 'bool' },
-  'if': 'defined(CONFIG_POSIX)' }
+  'if': 'CONFIG_POSIX' }
 
 ##
 # @guest-ssh-remove-authorized-keys:
@@ -1434,4 +1437,4 @@
 ##
 { 'command': 'guest-ssh-remove-authorized-keys',
   'data': { 'username': 'str', 'keys': ['str'] },
-  'if': 'defined(CONFIG_POSIX)' }
+  'if': 'CONFIG_POSIX' }
diff --git a/qga/vss-win32/requester.cpp b/qga/vss-win32/requester.cpp
index 5378c55d235..940a2c8f558 100644
--- a/qga/vss-win32/requester.cpp
+++ b/qga/vss-win32/requester.cpp
@@ -18,7 +18,7 @@
 #include <inc/win2003/vsbackup.h>
 
 /* Max wait time for frozen event (VSS can only hold writes for 10 seconds) */
-#define VSS_TIMEOUT_FREEZE_MSEC 10000
+#define VSS_TIMEOUT_FREEZE_MSEC 60000
 
 /* Call QueryStatus every 10 ms while waiting for frozen event */
 #define VSS_TIMEOUT_EVENT_MSEC 10
diff --git a/qom/object.c b/qom/object.c
index 6a01d565469..4f0677cca9e 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -20,6 +20,7 @@
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qapi/qobject-input-visitor.h"
+#include "qapi/forward-visitor.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
@@ -1388,7 +1389,7 @@ bool object_property_get(Object *obj, const char *name, Visitor *v,
 bool object_property_set(Object *obj, const char *name, Visitor *v,
                          Error **errp)
 {
-    Error *err = NULL;
+    ERRP_GUARD();
     ObjectProperty *prop = object_property_find_err(obj, name, errp);
 
     if (prop == NULL) {
@@ -1399,9 +1400,8 @@ bool object_property_set(Object *obj, const char *name, Visitor *v,
         error_setg(errp, QERR_PERMISSION_DENIED);
         return false;
     }
-    prop->set(obj, v, name, prop->opaque, &err);
-    error_propagate(errp, err);
-    return !err;
+    prop->set(obj, v, name, prop->opaque, errp);
+    return !*errp;
 }
 
 bool object_property_set_str(Object *obj, const char *name,
@@ -2144,6 +2144,17 @@ Object *object_resolve_path(const char *path, bool *ambiguous)
     return object_resolve_path_type(path, TYPE_OBJECT, ambiguous);
 }
 
+Object *object_resolve_path_at(Object *parent, const char *path)
+{
+    g_auto(GStrv) parts = g_strsplit(path, "/", 0);
+
+    if (*path == '/') {
+        return object_resolve_abs_path(object_get_root(), parts + 1,
+                                       TYPE_OBJECT);
+    }
+    return object_resolve_abs_path(parent, parts, TYPE_OBJECT);
+}
+
 typedef struct StringProperty
 {
     char *(*get)(Object *, Error **);
@@ -2683,16 +2694,20 @@ static void property_get_alias(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
 {
     AliasProperty *prop = opaque;
+    Visitor *alias_v = visitor_forward_field(v, prop->target_name, name);
 
-    object_property_get(prop->target_obj, prop->target_name, v, errp);
+    object_property_get(prop->target_obj, prop->target_name, alias_v, errp);
+    visit_free(alias_v);
 }
 
 static void property_set_alias(Object *obj, Visitor *v, const char *name,
                                void *opaque, Error **errp)
 {
     AliasProperty *prop = opaque;
+    Visitor *alias_v = visitor_forward_field(v, prop->target_name, name);
 
-    object_property_set(prop->target_obj, prop->target_name, v, errp);
+    object_property_set(prop->target_obj, prop->target_name, alias_v, errp);
+    visit_free(alias_v);
 }
 
 static Object *property_resolve_alias(Object *obj, void *opaque,
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index 4479ee693a8..3b61c195c53 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -42,6 +42,37 @@ bool user_creatable_can_be_deleted(UserCreatable *uc)
     }
 }
 
+static void object_set_properties_from_qdict(Object *obj, const QDict *qdict,
+                                             Visitor *v, Error **errp)
+{
+    const QDictEntry *e;
+
+    if (!visit_start_struct(v, NULL, NULL, 0, errp)) {
+        return;
+    }
+    for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+        if (!object_property_set(obj, e->key, v, errp)) {
+            goto out;
+        }
+    }
+    visit_check_struct(v, errp);
+out:
+    visit_end_struct(v, NULL);
+}
+
+void object_set_properties_from_keyval(Object *obj, const QDict *qdict,
+                                       bool from_json, Error **errp)
+{
+    Visitor *v;
+    if (from_json) {
+        v = qobject_input_visitor_new(QOBJECT(qdict));
+    } else {
+        v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    }
+    object_set_properties_from_qdict(obj, qdict, v, errp);
+    visit_free(v);
+}
+
 Object *user_creatable_add_type(const char *type, const char *id,
                                 const QDict *qdict,
                                 Visitor *v, Error **errp)
@@ -49,7 +80,6 @@ Object *user_creatable_add_type(const char *type, const char *id,
     ERRP_GUARD();
     Object *obj;
     ObjectClass *klass;
-    const QDictEntry *e;
     Error *local_err = NULL;
 
     if (id != NULL && !id_wellformed(id)) {
@@ -78,18 +108,7 @@ Object *user_creatable_add_type(const char *type, const char *id,
 
     assert(qdict);
     obj = object_new(type);
-    if (!visit_start_struct(v, NULL, NULL, 0, &local_err)) {
-        goto out;
-    }
-    for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
-        if (!object_property_set(obj, e->key, v, &local_err)) {
-            break;
-        }
-    }
-    if (!local_err) {
-        visit_check_struct(v, &local_err);
-    }
-    visit_end_struct(v, NULL);
+    object_set_properties_from_qdict(obj, qdict, v, &local_err);
     if (local_err) {
         goto out;
     }
@@ -178,7 +197,7 @@ static void user_creatable_print_types(void)
     g_slist_free(list);
 }
 
-static bool user_creatable_print_type_properites(const char *type)
+bool type_print_class_properties(const char *type)
 {
     ObjectClass *klass;
     ObjectPropertyIterator iter;
@@ -224,7 +243,7 @@ bool user_creatable_print_help(const char *type, QemuOpts *opts)
     }
 
     if (qemu_opt_has_help_opt(opts)) {
-        return user_creatable_print_type_properites(type);
+        return type_print_class_properties(type);
     }
 
     return false;
@@ -234,7 +253,7 @@ static void user_creatable_print_help_from_qdict(QDict *args)
 {
     const char *type = qdict_get_try_str(args, "qom-type");
 
-    if (!type || !user_creatable_print_type_properites(type)) {
+    if (!type || !type_print_class_properties(type)) {
         user_creatable_print_types();
     }
 }
diff --git a/qom/trace-events b/qom/trace-events
index 945205bd106..b2e9f4a7127 100644
--- a/qom/trace-events
+++ b/qom/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # object.c
 object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
diff --git a/replication.c b/replication.c
index be3a42f9c9d..4acd3f8004c 100644
--- a/replication.c
+++ b/replication.c
@@ -14,7 +14,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "replication.h"
+#include "block/replication.h"
 
 static QLIST_HEAD(, ReplicationState) replication_states;
 
diff --git a/roms/Makefile b/roms/Makefile
index 5ffe3317acd..b967b53bb76 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -143,7 +143,8 @@ build-efi-roms: build-pxe-roms
 #    efirom
 #
 edk2-basetools:
-	cd edk2/BaseTools && git submodule update --init --force
+	cd edk2/BaseTools && git submodule update --init --force \
+		Source/C/BrotliCompress/brotli
 	$(MAKE) -C edk2/BaseTools \
 		PYTHON_COMMAND=$${EDK2_PYTHON_COMMAND:-python3} \
 		EXTRA_OPTFLAGS='$(EDK2_BASETOOLS_OPTFLAGS)' \
@@ -154,10 +155,10 @@ slof:
 	cp SLOF/boot_rom.bin ../pc-bios/slof.bin
 
 u-boot.e500:
-	$(MAKE) -C u-boot O=build.e500 qemu-ppce500_config
+	$(MAKE) -C u-boot O=build-e500 qemu-ppce500_config
 	$(MAKE) -C u-boot CROSS_COMPILE=$(powerpc_cross_prefix) \
-		O=build.e500
-	$(powerpc_cross_prefix)strip u-boot/build.e500/u-boot -o \
+		O=build-e500
+	$(powerpc_cross_prefix)strip u-boot/build-e500/u-boot -o \
 		../pc-bios/u-boot.e500
 
 u-boot.sam460:
@@ -205,7 +206,7 @@ clean:
 	$(MAKE) -C ipxe/src veryclean
 	$(MAKE) -C edk2/BaseTools clean
 	$(MAKE) -C SLOF clean
-	rm -rf u-boot/build.e500
+	rm -rf u-boot/build-e500
 	$(MAKE) -C u-boot-sam460ex distclean
 	$(MAKE) -C skiboot clean
 	$(MAKE) -f Makefile.edk2 clean
diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2
index a8ed3255757..fdae0b511f5 100644
--- a/roms/Makefile.edk2
+++ b/roms/Makefile.edk2
@@ -51,7 +51,12 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \
 # make-release/tarball scripts.
 submodules:
 	if test -d edk2/.git; then \
-		cd edk2 && git submodule update --init --force; \
+		cd edk2 && git submodule update --init --force -- \
+			ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 \
+			BaseTools/Source/C/BrotliCompress/brotli \
+			CryptoPkg/Library/OpensslLib/openssl \
+			MdeModulePkg/Library/BrotliCustomDecompressLib/brotli \
+		; \
 	fi
 
 # See notes on the ".NOTPARALLEL" target and the "+" indicator in
diff --git a/roms/SLOF b/roms/SLOF
index 33a7322de13..dd0dcaa1c10 160000
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@
-Subproject commit 33a7322de13e9dca4b38851a345a58d37e7a441d
+Subproject commit dd0dcaa1c1085c159ddab709c7f274b3917be8bd
diff --git a/roms/openbios b/roms/openbios
index 4a0041107b8..b9062deaaea 160000
--- a/roms/openbios
+++ b/roms/openbios
@@ -1 +1 @@
-Subproject commit 4a0041107b8ef77e0e8337bfcb5f8078887261a7
+Subproject commit b9062deaaea7269369eaa46260d75edcaf276afa
diff --git a/roms/qemu-palcode b/roms/qemu-palcode
index bf0e1369887..99d9b4dcf27 160000
--- a/roms/qemu-palcode
+++ b/roms/qemu-palcode
@@ -1 +1 @@
-Subproject commit bf0e13698872450164fa7040da36a95d2d4b326f
+Subproject commit 99d9b4dcf27d7fbcbadab71bdc88ef6531baf6bf
diff --git a/roms/seabios b/roms/seabios
index 155821a1990..2dd4b9b3f84 160000
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit 155821a1990b6de78dde5f98fa5ab90e802021e0
+Subproject commit 2dd4b9b3f84019668719344b40dba79d681be41c
diff --git a/roms/seabios-hppa b/roms/seabios-hppa
index 73b740f7719..b12acac4be2 160000
--- a/roms/seabios-hppa
+++ b/roms/seabios-hppa
@@ -1 +1 @@
-Subproject commit 73b740f77190643b2ada5ee97a9a108c6ef2a37b
+Subproject commit b12acac4be27b6d5d9fbe48c4be1286dcc245fbb
diff --git a/roms/skiboot b/roms/skiboot
index 3a6fdede6ce..820d43c0a77 160000
--- a/roms/skiboot
+++ b/roms/skiboot
@@ -1 +1 @@
-Subproject commit 3a6fdede6ce117facec0108afe716cf5d0472c3f
+Subproject commit 820d43c0a7751e75a8830561f35535dfffd522bd
diff --git a/roms/u-boot b/roms/u-boot
index d3689267f92..840658b0939 160000
--- a/roms/u-boot
+++ b/roms/u-boot
@@ -1 +1 @@
-Subproject commit d3689267f92c5956e09cc7d1baa4700141662bff
+Subproject commit 840658b093976390e9537724f802281c9c8439f5
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index d7177b212c8..b82a1b0c58c 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -588,7 +588,7 @@ def default(self, o):
 
     dump.read(desc_only = True)
     print("desc.json")
-    f = open("desc.json", "wb")
+    f = open("desc.json", "w")
     f.truncate()
     f.write(jsonenc.encode(dump.vmsd_desc))
     f.close()
@@ -596,7 +596,7 @@ def default(self, o):
     dump.read(write_memory = True)
     dict = dump.getDict()
     print("state.json")
-    f = open("state.json", "wb")
+    f = open("state.json", "w")
     f.truncate()
     f.write(jsonenc.encode(dict))
     f.close()
@@ -610,4 +610,4 @@ def default(self, o):
     dump.read(desc_only = True)
     print(jsonenc.encode(dump.vmsd_desc))
 else:
-    raise Exception("Please specify either -x, -d state or -d dump")
+    raise Exception("Please specify either -x, -d state or -d desc")
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
index 0461fd1c459..08be813407b 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -98,13 +98,22 @@ def snake_to_camel(func_name: str) -> str:
 
 
 def gen_wrapper(func: FuncDecl) -> str:
-    assert func.name.startswith('bdrv_')
-    assert not func.name.startswith('bdrv_co_')
+    assert not '_co_' in func.name
     assert func.return_type == 'int'
-    assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
+    assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *',
+                                 'BlockBackend *']
 
-    name = 'bdrv_co_' + func.name[5:]
-    bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
+    subsystem, subname = func.name.split('_', 1)
+
+    name = f'{subsystem}_co_{subname}'
+
+    t = func.args[0].type
+    if t == 'BlockDriverState *':
+        bs = 'bs'
+    elif t == 'BdrvChild *':
+        bs = 'child->bs'
+    else:
+        bs = 'blk_bs(blk)'
     struct_name = snake_to_camel(name)
 
     return f"""\
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3179dbb0211..b50142cddc5 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -12,7 +12,7 @@
 my $P = $0;
 $P =~ s@.*/@@g;
 
-our $SrcFile    = qr{\.(?:h|c|cpp|s|S|pl|py|sh)$};
+our $SrcFile    = qr{\.(?:(h|c)(\.inc)?|cpp|s|S|pl|py|sh)$};
 
 my $V = '0.31';
 
@@ -399,7 +399,12 @@ sub build_types {
 	my $num_patches = @patches;
 	for my $hash (@patches) {
 		my $FILE;
-		open($FILE, '-|', "git", "show", "--patch-with-stat", $hash) ||
+		open($FILE, '-|', "git",
+                     "-c", "diff.renamelimit=0",
+                     "-c", "diff.renames=True",
+                     "-c", "diff.algorithm=histogram",
+                     "show",
+                     "--patch-with-stat", $hash) ||
 			die "$P: git show $hash - $!\n";
 		while (<$FILE>) {
 			chomp;
@@ -1532,6 +1537,7 @@ sub process {
 		     ($line =~ /\{\s*([\w\/\.\-]*)\s*\=\>\s*([\w\/\.\-]*)\s*\}/ &&
 		      (defined($1) || defined($2)))) &&
                       !(($realfile ne '') &&
+                        defined($acpi_testexpected) &&
                         ($realfile eq $acpi_testexpected))) {
 			$reported_maintainer_file = 1;
 			WARN("added, moved or deleted file(s), does MAINTAINERS need updating?\n" . $herecurr);
@@ -1673,7 +1679,7 @@ sub process {
 		}
 
 # check we are in a valid C source file if not then ignore this hunk
-		next if ($realfile !~ /\.(h|c|cpp)$/);
+		next if ($realfile !~ /\.((h|c)(\.inc)?|cpp)$/);
 
 # Block comment styles
 
diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml
new file mode 100644
index 00000000000..42b0471634c
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/build-environment.yml
@@ -0,0 +1,51 @@
+---
+- name: Installation of extra packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Extra check for CentOS Stream 8
+      lineinfile:
+        path: /etc/redhat-release
+        line: CentOS Stream release 8
+        state: present
+      check_mode: yes
+      register: centos_stream_8
+
+    - name: Enable PowerTools repo on CentOS Stream 8
+      ini_file:
+        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
+        section: powertools
+        option: enabled
+        value: "1"
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+        - ansible_facts['distribution_major_version'] == '8'
+        - centos_stream_8
+
+    - name: Install basic packages to build QEMU on CentOS Stream 8
+      dnf:
+        name:
+          - device-mapper-multipath-devel
+          - glusterfs-api-devel
+          - gnutls-devel
+          - libcap-ng-devel
+          - libcurl-devel
+          - libfdt-devel
+          - libiscsi-devel
+          - libpmem-devel
+          - librados-devel
+          - librbd-devel
+          - libseccomp-devel
+          - libssh-devel
+          - libxkbcommon-devel
+          - ninja-build
+          - numactl-devel
+          - python3-sphinx
+          - redhat-rpm-config
+          - snappy-devel
+          - spice-server-devel
+          - systemd-devel
+        state: present
+      when:
+        - ansible_facts['distribution'] == 'CentOS'
+        - ansible_facts['distribution_major_version'] == '8'
+        - centos_stream_8
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
new file mode 100755
index 00000000000..048e80dc495
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -0,0 +1,208 @@
+#!/bin/sh -e
+#
+# Configuration for QEMU based on CentOS Stream 8 x86_64 builds
+#
+# The "configure" command line is based on:
+#
+# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec
+#
+# But, because the SPEC file contains a number of conditionals and
+# variable and expansions only available at RPM build time, this version
+# was initially generated from an actual RPM build on an x86_64 platform.
+#
+# From that initial version, options that are required or are a
+# consequence of non-upstream patches have been adapted.  One example
+# is "--without-default-devices" which is *not* present here, given
+# that patches adding downstream specific devices are not available.
+#
+../configure \
+--prefix="/usr" \
+--libdir="/usr/lib64" \
+--datadir="/usr/share" \
+--sysconfdir="/etc" \
+--interp-prefix=/usr/qemu-%M \
+--localstatedir="/var" \
+--docdir="/usr/share/doc" \
+--libexecdir="/usr/libexec" \
+--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \
+--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \
+--with-suffix="qemu-kvm" \
+--firmwarepath=/usr/share/qemu-firmware \
+--with-git=meson \
+--with-git-submodules=update \
+--target-list="x86_64-softmmu" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--audio-drv-list="" \
+--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
+--with-coroutine=ucontext \
+--with-git=git \
+--tls-priority=@QEMU,SYSTEM \
+--disable-attr \
+--disable-auth-pam \
+--disable-avx2 \
+--disable-avx512f \
+--disable-bochs \
+--disable-bpf \
+--disable-brlapi \
+--disable-bsd-user \
+--disable-bzip2 \
+--disable-cap-ng \
+--disable-capstone \
+--disable-cfi \
+--disable-cfi-debug \
+--disable-cloop \
+--disable-cocoa \
+--disable-coroutine-pool \
+--disable-crypto-afalg \
+--disable-curl \
+--disable-curses \
+--disable-debug-info \
+--disable-debug-mutex \
+--disable-debug-tcg \
+--disable-dmg \
+--disable-docs \
+--disable-fuse \
+--disable-fuse-lseek \
+--disable-gcrypt \
+--disable-gio \
+--disable-glusterfs \
+--disable-gnutls \
+--disable-gtk \
+--disable-guest-agent \
+--disable-guest-agent-msi \
+--disable-hax \
+--disable-hvf \
+--disable-iconv \
+--disable-kvm \
+--disable-libdaxctl \
+--disable-libiscsi \
+--disable-libnfs \
+--disable-libpmem \
+--disable-libssh \
+--disable-libudev \
+--disable-libusb \
+--disable-libxml2 \
+--disable-linux-aio \
+--disable-linux-io-uring \
+--disable-linux-user \
+--disable-live-block-migration \
+--disable-lto \
+--disable-lzfse \
+--disable-lzo \
+--disable-malloc-trim \
+--disable-membarrier \
+--disable-modules \
+--disable-module-upgrades \
+--disable-mpath \
+--disable-multiprocess \
+--disable-netmap \
+--disable-nettle \
+--disable-numa \
+--disable-nvmm \
+--disable-opengl \
+--disable-parallels \
+--disable-pie \
+--disable-pvrdma \
+--disable-qcow1 \
+--disable-qed \
+--disable-qom-cast-debug \
+--disable-rbd \
+--disable-rdma \
+--disable-replication \
+--disable-rng-none \
+--disable-safe-stack \
+--disable-sanitizers \
+--disable-sdl \
+--disable-sdl-image \
+--disable-seccomp \
+--disable-slirp-smbd \
+--disable-smartcard \
+--disable-snappy \
+--disable-sparse \
+--disable-spice \
+--disable-strip \
+--disable-system \
+--disable-tcg \
+--disable-tools \
+--disable-tpm \
+--disable-u2f \
+--disable-usb-redir \
+--disable-user \
+--disable-vde \
+--disable-vdi \
+--disable-vhost-crypto \
+--disable-vhost-kernel \
+--disable-vhost-net \
+--disable-vhost-scsi \
+--disable-vhost-user \
+--disable-vhost-user-blk-server \
+--disable-vhost-vdpa \
+--disable-vhost-vsock \
+--disable-virglrenderer \
+--disable-virtfs \
+--disable-virtiofsd \
+--disable-vnc \
+--disable-vnc-jpeg \
+--disable-vnc-png \
+--disable-vnc-sasl \
+--disable-vte \
+--disable-vvfat \
+--disable-werror \
+--disable-whpx \
+--disable-xen \
+--disable-xen-pci-passthrough \
+--disable-xfsctl \
+--disable-xkbcommon \
+--disable-zstd \
+--enable-attr \
+--enable-avx2 \
+--enable-cap-ng \
+--enable-capstone \
+--enable-coroutine-pool \
+--enable-curl \
+--enable-debug-info \
+--enable-docs \
+--enable-fdt \
+--enable-gcrypt \
+--enable-glusterfs \
+--enable-gnutls \
+--enable-guest-agent \
+--enable-iconv \
+--enable-kvm \
+--enable-libiscsi \
+--enable-libpmem \
+--enable-libssh \
+--enable-libusb \
+--enable-libudev \
+--enable-linux-aio \
+--enable-lzo \
+--enable-malloc-trim \
+--enable-modules \
+--enable-mpath \
+--enable-numa \
+--enable-opengl \
+--enable-pie \
+--enable-rbd \
+--enable-rdma \
+--enable-seccomp \
+--enable-snappy \
+--enable-smartcard \
+--enable-spice \
+--enable-system \
+--enable-tcg \
+--enable-tools \
+--enable-tpm \
+--enable-trace-backend=dtrace \
+--enable-usb-redir \
+--enable-virtiofsd \
+--enable-vhost-kernel \
+--enable-vhost-net \
+--enable-vhost-user \
+--enable-vhost-user-blk-server \
+--enable-vhost-vdpa \
+--enable-vhost-vsock \
+--enable-vnc \
+--enable-vnc-png \
+--enable-vnc-sasl \
+--enable-werror \
+--enable-xkbcommon
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
new file mode 100755
index 00000000000..7aeecbcfb82
--- /dev/null
+++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
@@ -0,0 +1,70 @@
+#!/bin/sh -e
+#
+# Runs a previously vetted list of tests, either marked explicitly for
+# KVM and x86_64, or tests that are generic enough to be valid for all
+# targets. Such a test list can be generated with:
+#
+# ./tests/venv/bin/avocado list --filter-by-tags-include-empty \
+#   --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \
+#   tests/avocado/
+#
+# This is almost the complete list of avocado based tests available at
+# the time this was compile, with the following exceptions:
+#
+# * Require machine type "x-remote":
+#   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
+#
+# * Needs superuser privileges:
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up
+#   - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs
+#
+# * Requires display type "egl-headless":
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
+#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl
+#
+#  * Test is marked (unconditionally) to be skipped:
+#   - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types
+#
+make get-vm-images
+./tests/venv/bin/avocado run \
+    --job-results-dir=tests/results/ \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \
+    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \
+    tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \
+    tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
+    tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
+    tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
+    tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \
+    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \
+    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \
+    tests/avocado/migration.py:Migration.test_migration_with_exec \
+    tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \
+    tests/avocado/migration.py:Migration.test_migration_with_unix \
+    tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \
+    tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \
+    tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \
+    tests/avocado/version.py:Version.test_qmp_human_info_version \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \
+    tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \
+    tests/avocado/vnc.py:Vnc.test_change_password \
+    tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \
+    tests/avocado/vnc.py:Vnc.test_no_vnc \
+    tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \
+    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \
+    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias
diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README
new file mode 100644
index 00000000000..e3eadfe3eaa
--- /dev/null
+++ b/scripts/ci/org.centos/stream/README
@@ -0,0 +1,17 @@
+This directory contains scripts for generating a build of QEMU that
+closely matches the CentOS Stream[1] builds of the qemu-kvm package.
+
+To have the environment ready to configure, build QEMU and run tests,
+please start with a CentOS Stream machine and:
+
+ * apply the generic "build-environment.yml" playbook located at
+   scripts/ci/setup
+
+ * apply the "build-environment.yml" in the directory following the
+   CentOS Stream version (such as "8").
+
+This currently only covers CentOS Stream 8 environments and
+packages[2].
+
+[1] https://www.centos.org/centos-stream/
+[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel
diff --git a/scripts/ci/setup/.gitignore b/scripts/ci/setup/.gitignore
new file mode 100644
index 00000000000..f4a6183f1fc
--- /dev/null
+++ b/scripts/ci/setup/.gitignore
@@ -0,0 +1,2 @@
+inventory
+vars.yml
diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml
new file mode 100644
index 00000000000..599896cc5b7
--- /dev/null
+++ b/scripts/ci/setup/build-environment.yml
@@ -0,0 +1,154 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+# This is an ansible playbook file.  Run it to set up systems with the
+# environment needed to build QEMU.
+---
+- name: Installation of basic packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Check for suitable ansible version
+      delegate_to: localhost
+      assert:
+        that:
+          - '((ansible_version.major == 2) and (ansible_version.minor >= 8)) or (ansible_version.major >= 3)'
+        msg: "Unsuitable ansible version, please use version 2.8.0 or later"
+
+    - name: Update apt cache / upgrade packages via apt
+      apt:
+        update_cache: yes
+        upgrade: yes
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    - name: Install basic packages to build QEMU on Ubuntu 18.04/20.04
+      package:
+        name:
+        # Originally from tests/docker/dockerfiles/ubuntu1804.docker
+          - ccache
+          - gcc
+          - gettext
+          - git
+          - glusterfs-common
+          - libaio-dev
+          - libattr1-dev
+          - libbrlapi-dev
+          - libbz2-dev
+          - libcacard-dev
+          - libcap-ng-dev
+          - libcurl4-gnutls-dev
+          - libdrm-dev
+          - libepoxy-dev
+          - libfdt-dev
+          - libgbm-dev
+          - libgtk-3-dev
+          - libibverbs-dev
+          - libiscsi-dev
+          - libjemalloc-dev
+          - libjpeg-turbo8-dev
+          - liblzo2-dev
+          - libncurses5-dev
+          - libncursesw5-dev
+          - libnfs-dev
+          - libnss3-dev
+          - libnuma-dev
+          - libpixman-1-dev
+          - librados-dev
+          - librbd-dev
+          - librdmacm-dev
+          - libsasl2-dev
+          - libsdl2-dev
+          - libseccomp-dev
+          - libsnappy-dev
+          - libspice-protocol-dev
+          - libssh-dev
+          - libusb-1.0-0-dev
+          - libusbredirhost-dev
+          - libvdeplug-dev
+          - libvte-2.91-dev
+          - libzstd-dev
+          - make
+          - python3-yaml
+          - python3-sphinx
+          - python3-sphinx-rtd-theme
+          - ninja-build
+          - sparse
+          - xfslibs-dev
+        state: present
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    - name: Install packages to build QEMU on Ubuntu 18.04/20.04 on non-s390x
+      package:
+        name:
+          - libspice-server-dev
+          - libxen-dev
+        state: present
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['architecture'] != 's390x'
+
+    - name: Install basic packages to build QEMU on Ubuntu 18.04
+      package:
+        name:
+        # Originally from tests/docker/dockerfiles/ubuntu1804.docker
+          - clang
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '18.04'
+
+    - name: Install basic packages to build QEMU on Ubuntu 20.04
+      package:
+        name:
+        # Originally from tests/docker/dockerfiles/ubuntu2004.docker
+          - clang-10
+          - genisoimage
+          - liblttng-ust-dev
+          - libslirp-dev
+          - netcat-openbsd
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '20.04'
+
+    - name: Install basic packages to build QEMU on EL8
+      dnf:
+        # This list of packages start with tests/docker/dockerfiles/centos8.docker
+        # but only include files that are common to all distro variants and present
+        # in the standard repos (no add-ons)
+        name:
+          - bzip2
+          - bzip2-devel
+          - dbus-daemon
+          - diffutils
+          - gcc
+          - gcc-c++
+          - genisoimage
+          - gettext
+          - git
+          - glib2-devel
+          - libaio-devel
+          - libepoxy-devel
+          - libgcrypt-devel
+          - lzo-devel
+          - make
+          - mesa-libEGL-devel
+          - nettle-devel
+          - nmap-ncat
+          - perl-Test-Harness
+          - pixman-devel
+          - python36
+          - rdma-core-devel
+          - spice-glib-devel
+          - spice-server
+          - systemtap-sdt-devel
+          - tar
+          - zlib-devel
+        state: present
+      when:
+        - ansible_facts['distribution_file_variety'] == 'RedHat'
+        - ansible_facts['distribution_version'] == '8'
diff --git a/scripts/ci/setup/gitlab-runner.yml b/scripts/ci/setup/gitlab-runner.yml
new file mode 100644
index 00000000000..1127db516f6
--- /dev/null
+++ b/scripts/ci/setup/gitlab-runner.yml
@@ -0,0 +1,71 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+# This is an ansible playbook file.  Run it to set up systems with the
+# gitlab-runner agent.
+---
+- name: Installation of gitlab-runner
+  hosts: all
+  vars_files:
+    - vars.yml
+  tasks:
+    - debug:
+        msg: 'Checking for a valid GitLab registration token'
+      failed_when: "gitlab_runner_registration_token == 'PLEASE_PROVIDE_A_VALID_TOKEN'"
+
+    - name: Create a group for the gitlab-runner service
+      group:
+        name: gitlab-runner
+
+    - name: Create a user for the gitlab-runner service
+      user:
+        user: gitlab-runner
+        group: gitlab-runner
+        comment: GitLab Runner
+        home: /home/gitlab-runner
+        shell: /bin/bash
+
+    - name: Remove the .bash_logout file when on Ubuntu systems
+      file:
+        path: /home/gitlab-runner/.bash_logout
+        state: absent
+      when: "ansible_facts['distribution'] == 'Ubuntu'"
+
+    - name: Set the Operating System for gitlab-runner
+      set_fact:
+        gitlab_runner_os: "{{ ansible_facts[\"system\"]|lower }}"
+    - debug:
+        msg: gitlab-runner OS is {{ gitlab_runner_os }}
+
+    - name: Set the architecture for gitlab-runner
+      set_fact:
+        gitlab_runner_arch: "{{ ansible_to_gitlab_arch[ansible_facts[\"architecture\"]] }}"
+    - debug:
+        msg: gitlab-runner arch is {{ gitlab_runner_arch }}
+
+    - name: Download the matching gitlab-runner
+      get_url:
+        dest: /usr/local/bin/gitlab-runner
+        url: "https://s3.amazonaws.com/gitlab-runner-downloads/v{{ gitlab_runner_version  }}/binaries/gitlab-runner-{{ gitlab_runner_os }}-{{ gitlab_runner_arch }}"
+        owner: gitlab-runner
+        group: gitlab-runner
+        mode: u=rwx,g=rwx,o=rx
+
+    - name: Register the gitlab-runner
+      command: "/usr/local/bin/gitlab-runner register --non-interactive --url {{ gitlab_runner_server_url }} --registration-token {{ gitlab_runner_registration_token }} --executor shell --tag-list {{ ansible_facts[\"architecture\"] }},{{ ansible_facts[\"distribution\"]|lower }}_{{ ansible_facts[\"distribution_version\"] }} --description '{{ ansible_facts[\"distribution\"] }} {{ ansible_facts[\"distribution_version\"] }} {{ ansible_facts[\"architecture\"] }} ({{ ansible_facts[\"os_family\"] }})'"
+
+    - name: Install the gitlab-runner service using its own functionality
+      command: /usr/local/bin/gitlab-runner install --user gitlab-runner --working-directory /home/gitlab-runner
+      register: gitlab_runner_install_service_result
+      failed_when: "gitlab_runner_install_service_result.rc != 0 and \"already exists\" not in gitlab_runner_install_service_result.stderr"
+
+    - name: Enable the gitlab-runner service
+      service:
+        name: gitlab-runner
+        state: started
+        enabled: yes
diff --git a/scripts/ci/setup/inventory.template b/scripts/ci/setup/inventory.template
new file mode 100644
index 00000000000..2fbb50c4a8d
--- /dev/null
+++ b/scripts/ci/setup/inventory.template
@@ -0,0 +1 @@
+localhost
diff --git a/scripts/ci/setup/vars.yml.template b/scripts/ci/setup/vars.yml.template
new file mode 100644
index 00000000000..e48089761f6
--- /dev/null
+++ b/scripts/ci/setup/vars.yml.template
@@ -0,0 +1,12 @@
+# The version of the gitlab-runner to use
+gitlab_runner_version: 13.12.0
+# The URL of the gitlab server to use, usually https://gitlab.com unless you're
+# using a private GitLab instance
+gitlab_runner_server_url: https://gitlab.com
+# A mapping of the ansible to gitlab architecture nomenclature
+ansible_to_gitlab_arch:
+  x86_64: amd64
+  aarch64: arm64
+  s390x: s390x
+# A unique token made available by GitLab to your project for registering runners
+gitlab_runner_registration_token: PLEASE_PROVIDE_A_VALID_TOKEN
diff --git a/scripts/coccinelle/memory-region-housekeeping.cocci b/scripts/coccinelle/memory-region-housekeeping.cocci
index c768d8140ab..29651ebde90 100644
--- a/scripts/coccinelle/memory-region-housekeeping.cocci
+++ b/scripts/coccinelle/memory-region-housekeeping.cocci
@@ -127,8 +127,8 @@ static void device_fn(DeviceState *dev, ...)
 - memory_region_init_rom(E1, NULL, E2, E3, E4);
 + memory_region_init_rom(E1, obj, E2, E3, E4);
 |
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, obj, E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, obj, E2, E3, E4, E5);
 )
   ...+>
 }
@@ -152,8 +152,8 @@ static void device_fn(DeviceState *dev, ...)
 - memory_region_init_rom(E1, NULL, E2, E3, E4);
 + memory_region_init_rom(E1, OBJECT(dev), E2, E3, E4);
 |
-- memory_region_init_ram_shared_nomigrate(E1, NULL, E2, E3, E4, E5);
-+ memory_region_init_ram_shared_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
+- memory_region_init_ram_flags_nomigrate(E1, NULL, E2, E3, E4, E5);
++ memory_region_init_ram_flags_nomigrate(E1, OBJECT(dev), E2, E3, E4, E5);
 )
   ...+>
 }
diff --git a/scripts/coverity-model.c b/scripts/coverity-model.c
deleted file mode 100644
index 2c0346ff252..00000000000
--- a/scripts/coverity-model.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/* Coverity Scan model
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * Authors:
- *  Markus Armbruster <armbru@redhat.com>
- *  Paolo Bonzini <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or, at your
- * option, any later version.  See the COPYING file in the top-level directory.
- */
-
-
-/*
- * This is the source code for our Coverity user model file.  The
- * purpose of user models is to increase scanning accuracy by explaining
- * code Coverity can't see (out of tree libraries) or doesn't
- * sufficiently understand.  Better accuracy means both fewer false
- * positives and more true defects.  Memory leaks in particular.
- *
- * - A model file can't import any header files.  Some built-in primitives are
- *   available but not wchar_t, NULL etc.
- * - Modeling doesn't need full structs and typedefs. Rudimentary structs
- *   and similar types are sufficient.
- * - An uninitialized local variable signifies that the variable could be
- *   any value.
- *
- * The model file must be uploaded by an admin in the analysis settings of
- * http://scan.coverity.com/projects/378
- */
-
-#define NULL ((void *)0)
-
-typedef unsigned char uint8_t;
-typedef char int8_t;
-typedef unsigned int uint32_t;
-typedef int int32_t;
-typedef long ssize_t;
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
-typedef _Bool bool;
-
-typedef struct va_list_str *va_list;
-
-/* exec.c */
-
-typedef struct AddressSpace AddressSpace;
-typedef uint64_t hwaddr;
-typedef uint32_t MemTxResult;
-typedef uint64_t MemTxAttrs;
-
-static void __bufwrite(uint8_t *buf, ssize_t len)
-{
-    int first, last;
-    __coverity_negative_sink__(len);
-    if (len == 0) return;
-    buf[0] = first;
-    buf[len-1] = last;
-    __coverity_writeall__(buf);
-}
-
-static void __bufread(uint8_t *buf, ssize_t len)
-{
-    __coverity_negative_sink__(len);
-    if (len == 0) return;
-    int first = buf[0];
-    int last = buf[len-1];
-}
-
-MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
-                               MemTxAttrs attrs,
-                               uint8_t *buf, int len)
-{
-    MemTxResult result;
-    // TODO: investigate impact of treating reads as producing
-    // tainted data, with __coverity_tainted_data_argument__(buf).
-    __bufwrite(buf, len);
-    return result;
-}
-
-MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
-                                MemTxAttrs attrs,
-                                const uint8_t *buf, int len)
-{
-    MemTxResult result;
-    __bufread(buf, len);
-    return result;
-}
-
-
-/* Tainting */
-
-typedef struct {} name2keysym_t;
-static int get_keysym(const name2keysym_t *table,
-                      const char *name)
-{
-    int result;
-    if (result > 0) {
-        __coverity_tainted_string_sanitize_content__(name);
-        return result;
-    } else {
-        return 0;
-    }
-}
-
-/* Replay data is considered trusted.  */
-uint8_t replay_get_byte(void)
-{
-    uint8_t byte;
-    return byte;
-}
-
-
-/*
- * GLib memory allocation functions.
- *
- * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
- * and g_realloc of 0 bytes frees the pointer.
- *
- * Modeling this would result in Coverity flagging a lot of memory
- * allocations as potentially returning NULL, and asking us to check
- * whether the result of the allocation is NULL or not.  However, the
- * resulting pointer should never be dereferenced anyway, and in fact
- * it is not in the vast majority of cases.
- *
- * If a dereference did happen, this would suppress a defect report
- * for an actual null pointer dereference.  But it's too unlikely to
- * be worth wading through the false positives, and with some luck
- * we'll get a buffer overflow reported anyway.
- */
-
-/*
- * Allocation primitives, cannot return NULL
- * See also Coverity's library/generic/libc/all/all.c
- */
-
-void *g_malloc_n(size_t nmemb, size_t size)
-{
-    size_t sz;
-    void *ptr;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    ptr = __coverity_alloc__(sz);
-    __coverity_mark_as_uninitialized_buffer__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void *g_malloc0_n(size_t nmemb, size_t size)
-{
-    size_t sz;
-    void *ptr;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    ptr = __coverity_alloc__(sz);
-    __coverity_writeall0__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void *g_realloc_n(void *ptr, size_t nmemb, size_t size)
-{
-    size_t sz;
-
-    __coverity_negative_sink__(nmemb);
-    __coverity_negative_sink__(size);
-    sz = nmemb * size;
-    __coverity_escape__(ptr);
-    ptr = __coverity_alloc__(sz);
-    /*
-     * Memory beyond the old size isn't actually initialized.  Can't
-     * model that.  See Coverity's realloc() model
-     */
-    __coverity_writeall__(ptr);
-    __coverity_mark_as_afm_allocated__(ptr, "g_free");
-    return ptr;
-}
-
-void g_free(void *ptr)
-{
-    __coverity_free__(ptr);
-    __coverity_mark_as_afm_freed__(ptr, "g_free");
-}
-
-/*
- * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate
- * out of memory conditions
- */
-
-void *g_try_malloc_n(size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_malloc_n(nmemb, size);
-}
-
-void *g_try_malloc0_n(size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_malloc0_n(nmemb, size);
-}
-
-void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size)
-{
-    int nomem;
-
-    if (nomem) {
-        return NULL;
-    }
-    return g_realloc_n(ptr, nmemb, size);
-}
-
-/* Trivially derive the g_FOO() from the g_FOO_n() */
-
-void *g_malloc(size_t size)
-{
-    return g_malloc_n(1, size);
-}
-
-void *g_malloc0(size_t size)
-{
-    return g_malloc0_n(1, size);
-}
-
-void *g_realloc(void *ptr, size_t size)
-{
-    return g_realloc_n(ptr, 1, size);
-}
-
-void *g_try_malloc(size_t size)
-{
-    return g_try_malloc_n(1, size);
-}
-
-void *g_try_malloc0(size_t size)
-{
-    return g_try_malloc0_n(1, size);
-}
-
-void *g_try_realloc(void *ptr, size_t size)
-{
-    return g_try_realloc_n(ptr, 1, size);
-}
-
-/* Other memory allocation functions */
-
-void *g_memdup(const void *ptr, unsigned size)
-{
-    unsigned char *dup;
-    unsigned i;
-
-    if (!ptr) {
-        return NULL;
-    }
-
-    dup = g_malloc(size);
-    for (i = 0; i < size; i++)
-        dup[i] = ((unsigned char *)ptr)[i];
-    return dup;
-}
-
-/*
- * GLib string allocation functions
- */
-
-char *g_strdup(const char *s)
-{
-    char *dup;
-    size_t i;
-
-    if (!s) {
-        return NULL;
-    }
-
-    __coverity_string_null_sink__(s);
-    __coverity_string_size_sink__(s);
-    dup = __coverity_alloc_nosize__();
-    __coverity_mark_as_afm_allocated__(dup, "g_free");
-    for (i = 0; (dup[i] = s[i]); i++) ;
-    return dup;
-}
-
-char *g_strndup(const char *s, size_t n)
-{
-    char *dup;
-    size_t i;
-
-    __coverity_negative_sink__(n);
-
-    if (!s) {
-        return NULL;
-    }
-
-    dup = g_malloc(n + 1);
-    for (i = 0; i < n && (dup[i] = s[i]); i++) ;
-    dup[i] = 0;
-    return dup;
-}
-
-char *g_strdup_printf(const char *format, ...)
-{
-    char ch, *s;
-    size_t len;
-
-    __coverity_string_null_sink__(format);
-    __coverity_string_size_sink__(format);
-
-    ch = *format;
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-    return s;
-}
-
-char *g_strdup_vprintf(const char *format, va_list ap)
-{
-    char ch, *s;
-    size_t len;
-
-    __coverity_string_null_sink__(format);
-    __coverity_string_size_sink__(format);
-
-    ch = *format;
-    ch = *(char *)ap;
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-
-    return len;
-}
-
-char *g_strconcat(const char *s, ...)
-{
-    char *s;
-
-    /*
-     * Can't model: last argument must be null, the others
-     * null-terminated strings
-     */
-
-    s = __coverity_alloc_nosize__();
-    __coverity_writeall__(s);
-    __coverity_mark_as_afm_allocated__(s, "g_free");
-    return s;
-}
-
-/* Other glib functions */
-
-typedef struct pollfd GPollFD;
-
-int poll();
-
-int g_poll (GPollFD *fds, unsigned nfds, int timeout)
-{
-    return poll(fds, nfds, timeout);
-}
-
-typedef struct _GIOChannel GIOChannel;
-GIOChannel *g_io_channel_unix_new(int fd)
-{
-    GIOChannel *c = g_malloc0(sizeof(GIOChannel));
-    __coverity_escape__(fd);
-    return c;
-}
-
-void g_assertion_message_expr(const char     *domain,
-                              const char     *file,
-                              int             line,
-                              const char     *func,
-                              const char     *expr)
-{
-    __coverity_panic__();
-}
diff --git a/scripts/coverity-scan/COMPONENTS.md b/scripts/coverity-scan/COMPONENTS.md
new file mode 100644
index 00000000000..183f26a32c9
--- /dev/null
+++ b/scripts/coverity-scan/COMPONENTS.md
@@ -0,0 +1,148 @@
+This is the list of currently configured Coverity components:
+
+alpha
+  ~ (/qemu)?((/include)?/hw/alpha/.*|/target/alpha/.*)
+
+arm
+  ~ (/qemu)?((/include)?/hw/arm/.*|(/include)?/hw/.*/(arm|allwinner-a10|bcm28|digic|exynos|imx|omap|stellaris|pxa2xx|versatile|zynq|cadence).*|/hw/net/xgmac.c|/hw/ssi/xilinx_spips.c|/target/arm/.*)
+
+avr
+  ~ (/qemu)?((/include)?/hw/avr/.*|/target/avr/.*)
+
+cris
+  ~ (/qemu)?((/include)?/hw/cris/.*|/target/cris/.*)
+
+hexagon
+  ~ (/qemu)?(/target/hexagon/.*)
+
+hppa
+  ~ (/qemu)?((/include)?/hw/hppa/.*|/target/hppa/.*)
+
+i386
+  ~ (/qemu)?((/include)?/hw/i386/.*|/target/i386/.*|/hw/intc/[^/]*apic[^/]*\.c)
+
+m68k
+  ~ (/qemu)?((/include)?/hw/m68k/.*|/target/m68k/.*|(/include)?/hw(/.*)?/mcf.*)
+
+microblaze
+  ~ (/qemu)?((/include)?/hw/microblaze/.*|/target/microblaze/.*)
+
+mips
+  ~ (/qemu)?((/include)?/hw/mips/.*|/target/mips/.*)
+
+nios2
+  ~ (/qemu)?((/include)?/hw/nios2/.*|/target/nios2/.*)
+
+ppc
+  ~ (/qemu)?((/include)?/hw/ppc/.*|/target/ppc/.*|/hw/pci-host/(uninorth.*|dec.*|prep.*|ppc.*)|/hw/misc/macio/.*|(/include)?/hw/.*/(xics|openpic|spapr).*)
+
+riscv
+  ~ (/qemu)?((/include)?/hw/riscv/.*|/target/riscv/.*)
+
+rx
+  ~ (/qemu)?((/include)?/hw/rx/.*|/target/rx/.*)
+
+s390
+  ~ (/qemu)?((/include)?/hw/s390x/.*|/target/s390x/.*|/hw/.*/s390_.*)
+
+sh4
+  ~ (/qemu)?((/include)?/hw/sh4/.*|/target/sh4/.*)
+
+sparc
+  ~ (/qemu)?((/include)?/hw/sparc(64)?.*|/target/sparc/.*|/hw/.*/grlib.*|/hw/display/cg3.c)
+
+tilegx
+  ~ (/qemu)?(/target/tilegx/.*)
+
+tricore
+  ~ (/qemu)?((/include)?/hw/tricore/.*|/target/tricore/.*)
+
+9pfs
+  ~ (/qemu)?(/hw/9pfs/.*|/fsdev/.*)
+
+audio
+  ~ (/qemu)?((/include)?/(audio|hw/audio)/.*)
+
+block
+  ~ (/qemu)?(/block.*|(/include?)(/hw)?/(block|storage-daemon)/.*|(/include)?/hw/ide/.*|/qemu-(img|io).*|/util/(aio|async|thread-pool).*)
+
+char
+  ~ (/qemu)?(/qemu-char\.c|/include/sysemu/char\.h|(/include)?/hw/char/.*)
+
+capstone
+  ~ (/qemu)?(/capstone/.*)
+
+crypto
+  ~ (/qemu)?((/include)?/crypto/.*|/hw/.*/crypto.*)
+
+disas
+  ~ (/qemu)?((/include)?/disas.*)
+
+fpu
+  ~ (/qemu)?((/include)?(/fpu|/libdecnumber)/.*)
+
+io
+  ~ (/qemu)?((/include)?/io/.*)
+
+ipmi
+  ~ (/qemu)?((/include)?/hw/ipmi/.*)
+
+libvixl
+  ~ (/qemu)?(/disas/libvixl/.*)
+
+migration
+  ~ (/qemu)?((/include)?/migration/.*)
+
+monitor
+  ~ (/qemu)?(/qapi.*|/qobject/.*|/monitor\..*|/[hq]mp\..*)
+
+nbd
+  ~ (/qemu)?(/nbd/.*|/include/block/nbd.*|/qemu-nbd\.c)
+
+net
+  ~ (/qemu)?((/include)?(/hw)?/(net|rdma)/.*)
+
+pci
+  ~ (/qemu)?(/hw/pci.*|/include/hw/pci.*)
+
+qemu-ga
+  ~ (/qemu)?(/qga/.*)
+
+scsi
+  ~ (/qemu)?(/scsi/.*|/hw/scsi/.*|/include/hw/scsi/.*)
+
+slirp
+  ~ (/qemu)?(/.*slirp.*)
+
+tcg
+  ~ (/qemu)?(/accel/tcg/.*|/replay/.*|/(.*/)?softmmu.*)
+
+trace
+  ~ (/qemu)?(/.*trace.*\.[ch])
+
+ui
+  ~ (/qemu)?((/include)?(/ui|/hw/display|/hw/input)/.*)
+
+usb
+  ~ (/qemu)?(/hw/usb/.*|/include/hw/usb/.*)
+
+user
+  ~ (/qemu)?(/linux-user/.*|/bsd-user/.*|/user-exec\.c|/thunk\.c|/include/exec/user/.*)
+
+util
+  ~ (/qemu)?(/util/.*|/include/qemu/.*)
+
+xen
+  ~ (/qemu)?(.*/xen.*)
+
+virtiofsd
+  ~ (/qemu)?(/tools/virtiofsd/.*)
+
+(headers)
+  ~ (/qemu)?(/include/.*)
+
+testlibs
+  ~ (/qemu)?(/tests/qtest(/libqos/.*|/libqtest.*))
+
+tests
+  ~ (/qemu)?(/tests/.*)
diff --git a/scripts/coverity-scan/coverity-scan.docker b/scripts/coverity-scan/coverity-scan.docker
index 501ac672332..ecff6ac5b4b 100644
--- a/scripts/coverity-scan/coverity-scan.docker
+++ b/scripts/coverity-scan/coverity-scan.docker
@@ -93,7 +93,6 @@ ENV PACKAGES \
     mingw64-SDL2 \
     ncurses-devel \
     nettle-devel \
-    nss-devel \
     numactl-devel \
     perl \
     perl-Test-Harness \
diff --git a/scripts/coverity-scan/model.c b/scripts/coverity-scan/model.c
new file mode 100644
index 00000000000..9d4fba53d91
--- /dev/null
+++ b/scripts/coverity-scan/model.c
@@ -0,0 +1,371 @@
+/* Coverity Scan model
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or, at your
+ * option, any later version.  See the COPYING file in the top-level directory.
+ */
+
+
+/*
+ * This is the source code for our Coverity user model file.  The
+ * purpose of user models is to increase scanning accuracy by explaining
+ * code Coverity can't see (out of tree libraries) or doesn't
+ * sufficiently understand.  Better accuracy means both fewer false
+ * positives and more true defects.  Memory leaks in particular.
+ *
+ * - A model file can't import any header files.  Some built-in primitives are
+ *   available but not wchar_t, NULL etc.
+ * - Modeling doesn't need full structs and typedefs. Rudimentary structs
+ *   and similar types are sufficient.
+ * - An uninitialized local variable signifies that the variable could be
+ *   any value.
+ *
+ * The model file must be uploaded by an admin in the analysis settings of
+ * http://scan.coverity.com/projects/378
+ */
+
+#define NULL ((void *)0)
+
+typedef unsigned char uint8_t;
+typedef char int8_t;
+typedef unsigned int uint32_t;
+typedef int int32_t;
+typedef long ssize_t;
+typedef unsigned long long uint64_t;
+typedef long long int64_t;
+typedef _Bool bool;
+
+typedef struct va_list_str *va_list;
+
+/* exec.c */
+
+typedef struct AddressSpace AddressSpace;
+typedef struct MemoryRegionCache MemoryRegionCache;
+typedef uint64_t hwaddr;
+typedef uint32_t MemTxResult;
+typedef struct MemTxAttrs {} MemTxAttrs;
+
+static void __bufwrite(uint8_t *buf, ssize_t len)
+{
+    int first, last;
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    buf[0] = first;
+    buf[len-1] = last;
+    __coverity_writeall__(buf);
+}
+
+static void __bufread(uint8_t *buf, ssize_t len)
+{
+    __coverity_negative_sink__(len);
+    if (len == 0) return;
+    int first = buf[0];
+    int last = buf[len-1];
+}
+
+MemTxResult address_space_read_cached(MemoryRegionCache *cache, hwaddr addr,
+                                      MemTxAttrs attrs,
+                                      void *buf, int len)
+{
+    MemTxResult result;
+    // TODO: investigate impact of treating reads as producing
+    // tainted data, with __coverity_tainted_data_argument__(buf).
+    __bufwrite(buf, len);
+    return result;
+}
+
+MemTxResult address_space_write_cached(MemoryRegionCache *cache, hwaddr addr,
+                                MemTxAttrs attrs,
+                                const void *buf, int len)
+{
+    MemTxResult result;
+    __bufread(buf, len);
+    return result;
+}
+
+MemTxResult address_space_rw_cached(MemoryRegionCache *cache, hwaddr addr,
+                                    MemTxAttrs attrs,
+                                    void *buf, int len, bool is_write)
+{
+    if (is_write) {
+        return address_space_write_cached(cache, addr, attrs, buf, len);
+    } else {
+        return address_space_read_cached(cache, addr, attrs, buf, len);
+    }
+}
+
+MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
+                               MemTxAttrs attrs,
+                               void *buf, int len)
+{
+    MemTxResult result;
+    // TODO: investigate impact of treating reads as producing
+    // tainted data, with __coverity_tainted_data_argument__(buf).
+    __bufwrite(buf, len);
+    return result;
+}
+
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
+                                MemTxAttrs attrs,
+                                const void *buf, int len)
+{
+    MemTxResult result;
+    __bufread(buf, len);
+    return result;
+}
+
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr,
+                             MemTxAttrs attrs,
+                             void *buf, int len, bool is_write)
+{
+    if (is_write) {
+        return address_space_write(as, addr, attrs, buf, len);
+    } else {
+        return address_space_read(as, addr, attrs, buf, len);
+    }
+}
+
+/* Tainting */
+
+typedef struct {} name2keysym_t;
+static int get_keysym(const name2keysym_t *table,
+                      const char *name)
+{
+    int result;
+    if (result > 0) {
+        __coverity_tainted_string_sanitize_content__(name);
+        return result;
+    } else {
+        return 0;
+    }
+}
+
+/* Replay data is considered trusted.  */
+uint8_t replay_get_byte(void)
+{
+    uint8_t byte;
+    return byte;
+}
+
+
+/*
+ * GLib memory allocation functions.
+ *
+ * Note that we ignore the fact that g_malloc of 0 bytes returns NULL,
+ * and g_realloc of 0 bytes frees the pointer.
+ *
+ * Modeling this would result in Coverity flagging a lot of memory
+ * allocations as potentially returning NULL, and asking us to check
+ * whether the result of the allocation is NULL or not.  However, the
+ * resulting pointer should never be dereferenced anyway, and in fact
+ * it is not in the vast majority of cases.
+ *
+ * If a dereference did happen, this would suppress a defect report
+ * for an actual null pointer dereference.  But it's too unlikely to
+ * be worth wading through the false positives, and with some luck
+ * we'll get a buffer overflow reported anyway.
+ */
+
+/*
+ * Allocation primitives, cannot return NULL
+ * See also Coverity's library/generic/libc/all/all.c
+ */
+
+void *g_malloc_n(size_t nmemb, size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_mark_as_uninitialized_buffer__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_malloc0_n(size_t nmemb, size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_writeall0__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    __coverity_negative_sink__(nmemb);
+    __coverity_negative_sink__(size);
+    __coverity_escape__(ptr);
+    ptr = __coverity_alloc__(nmemb * size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    /*
+     * Memory beyond the old size isn't actually initialized.  Can't
+     * model that.  See Coverity's realloc() model
+     */
+    __coverity_writeall__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void g_free(void *ptr)
+{
+    __coverity_free__(ptr);
+    __coverity_mark_as_afm_freed__(ptr, AFM_free);
+}
+
+/*
+ * Derive the g_try_FOO_n() from the g_FOO_n() by adding indeterminate
+ * out of memory conditions
+ */
+
+void *g_try_malloc_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc_n(nmemb, size);
+}
+
+void *g_try_malloc0_n(size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc0_n(nmemb, size);
+}
+
+void *g_try_realloc_n(void *ptr, size_t nmemb, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_realloc_n(ptr, nmemb, size);
+}
+
+/* Derive the g_FOO() from the g_FOO_n() */
+
+void *g_malloc(size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_mark_as_uninitialized_buffer__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_malloc0(size_t size)
+{
+    void *ptr;
+
+    __coverity_negative_sink__(size);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    __coverity_writeall0__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_realloc(void *ptr, size_t size)
+{
+    __coverity_negative_sink__(size);
+    __coverity_escape__(ptr);
+    ptr = __coverity_alloc__(size);
+    if (!ptr) {
+        __coverity_panic__();
+    }
+    /*
+     * Memory beyond the old size isn't actually initialized.  Can't
+     * model that.  See Coverity's realloc() model
+     */
+    __coverity_writeall__(ptr);
+    __coverity_mark_as_afm_allocated__(ptr, AFM_free);
+    return ptr;
+}
+
+void *g_try_malloc(size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc(size);
+}
+
+void *g_try_malloc0(size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_malloc0(size);
+}
+
+void *g_try_realloc(void *ptr, size_t size)
+{
+    int nomem;
+
+    if (nomem) {
+        return NULL;
+    }
+    return g_realloc(ptr, size);
+}
+
+/* Other glib functions */
+
+typedef struct pollfd GPollFD;
+
+int poll();
+
+int g_poll (GPollFD *fds, unsigned nfds, int timeout)
+{
+    return poll(fds, nfds, timeout);
+}
+
+typedef struct _GIOChannel GIOChannel;
+GIOChannel *g_io_channel_unix_new(int fd)
+{
+    GIOChannel *c = g_malloc0(sizeof(GIOChannel));
+    __coverity_escape__(fd);
+    return c;
+}
+
+void g_assertion_message_expr(const char     *domain,
+                              const char     *file,
+                              int             line,
+                              const char     *func,
+                              const char     *expr)
+{
+    __coverity_panic__();
+}
diff --git a/scripts/coverity-scan/run-coverity-scan b/scripts/coverity-scan/run-coverity-scan
index 6eefb4b558d..7395bbfad49 100755
--- a/scripts/coverity-scan/run-coverity-scan
+++ b/scripts/coverity-scan/run-coverity-scan
@@ -380,15 +380,17 @@ export PATH="$TOOLBIN:$PATH"
 
 cd "$SRCDIR"
 
-echo "Doing make distclean..."
-make distclean
+echo "Nuking build directory..."
+rm -rf +build
+mkdir +build
+cd +build
 
 echo "Configuring..."
 # We configure with a fixed set of enables here to ensure that we don't
 # accidentally reduce the scope of the analysis by doing the build on
 # the system that's missing a dependency that we need to build part of
 # the codebase.
-./configure --disable-modules --enable-sdl --enable-gtk \
+../configure --disable-modules --enable-sdl --enable-gtk \
     --enable-opengl --enable-vte --enable-gnutls \
     --enable-nettle --enable-curses --enable-curl \
     --audio-drv-list=oss,alsa,sdl,pa --enable-virtfs \
diff --git a/scripts/cpu-x86-uarch-abi.py b/scripts/cpu-x86-uarch-abi.py
new file mode 100644
index 00000000000..08acc52a816
--- /dev/null
+++ b/scripts/cpu-x86-uarch-abi.py
@@ -0,0 +1,194 @@
+#!/usr/bin/python3
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# A script to generate a CSV file showing the x86_64 ABI
+# compatibility levels for each CPU model.
+#
+
+from qemu import qmp
+import sys
+
+if len(sys.argv) != 1:
+    print("syntax: %s QMP-SOCK\n\n" % __file__ +
+          "Where QMP-SOCK points to a QEMU process such as\n\n" +
+          " # qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait " +
+          "-display none -accel kvm", file=sys.stderr)
+    sys.exit(1)
+
+# Mandatory CPUID features for each microarch ABI level
+levels = [
+    [ # x86-64 baseline
+        "cmov",
+        "cx8",
+        "fpu",
+        "fxsr",
+        "mmx",
+        "syscall",
+        "sse",
+        "sse2",
+    ],
+    [ # x86-64-v2
+        "cx16",
+        "lahf-lm",
+        "popcnt",
+        "pni",
+        "sse4.1",
+        "sse4.2",
+        "ssse3",
+    ],
+    [ # x86-64-v3
+        "avx",
+        "avx2",
+        "bmi1",
+        "bmi2",
+        "f16c",
+        "fma",
+        "abm",
+        "movbe",
+    ],
+    [ # x86-64-v4
+        "avx512f",
+        "avx512bw",
+        "avx512cd",
+        "avx512dq",
+        "avx512vl",
+    ],
+]
+
+# Assumes externally launched process such as
+#
+#   qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait -display none -accel kvm
+#
+# Note different results will be obtained with TCG, as
+# TCG masks out certain features otherwise present in
+# the CPU model definitions, as does KVM.
+
+
+sock = sys.argv[1]
+cmd = sys.argv[2]
+shell = qmp.QEMUMonitorProtocol(sock)
+shell.connect()
+
+models = shell.cmd("query-cpu-definitions")
+
+# These QMP props don't correspond to CPUID fatures
+# so ignore them
+skip = [
+    "family",
+    "min-level",
+    "min-xlevel",
+    "vendor",
+    "model",
+    "model-id",
+    "stepping",
+]
+
+names = []
+
+for model in models["return"]:
+    if "alias-of" in model:
+        continue
+    names.append(model["name"])
+
+models = {}
+
+for name in sorted(names):
+    cpu = shell.cmd("query-cpu-model-expansion",
+                     { "type": "static",
+                       "model": { "name": name }})
+
+    got = {}
+    for (feature, present) in cpu["return"]["model"]["props"].items():
+        if present and feature not in skip:
+            got[feature] = True
+
+    if name in ["host", "max", "base"]:
+        continue
+
+    models[name] = {
+        # Dict of all present features in this CPU model
+        "features": got,
+
+        # Whether each x86-64 ABI level is satisfied
+        "levels": [False, False, False, False],
+
+        # Number of extra CPUID features compared to the x86-64 ABI level
+        "distance":[-1, -1, -1, -1],
+
+        # CPUID features present in model, but not in ABI level
+        "delta":[[], [], [], []],
+
+        # CPUID features in ABI level but not present in model
+        "missing": [[], [], [], []],
+    }
+
+
+# Calculate whether the CPU models satisfy each ABI level
+for name in models.keys():
+    for level in range(len(levels)):
+        got = set(models[name]["features"])
+        want = set(levels[level])
+        missing = want - got
+        match = True
+        if len(missing) > 0:
+            match = False
+        models[name]["levels"][level] = match
+        models[name]["missing"][level] = missing
+
+# Cache list of CPU models satisfying each ABI level
+abi_models = [
+    [],
+    [],
+    [],
+    [],
+]
+
+for name in models.keys():
+    for level in range(len(levels)):
+        if models[name]["levels"][level]:
+            abi_models[level].append(name)
+
+
+for level in range(len(abi_models)):
+    # Find the union of features in all CPU models satisfying this ABI
+    allfeatures = {}
+    for name in abi_models[level]:
+        for feat in models[name]["features"]:
+            allfeatures[feat] = True
+
+    # Find the intersection of features in all CPU models satisfying this ABI
+    commonfeatures = []
+    for feat in allfeatures:
+        present = True
+        for name in models.keys():
+            if not models[name]["levels"][level]:
+                continue
+            if feat not in models[name]["features"]:
+                present = False
+        if present:
+            commonfeatures.append(feat)
+
+    # Determine how many extra features are present compared to the lowest
+    # common denominator
+    for name in models.keys():
+        if not models[name]["levels"][level]:
+            continue
+
+        delta = set(models[name]["features"].keys()) - set(commonfeatures)
+        models[name]["distance"][level] = len(delta)
+        models[name]["delta"][level] = delta
+
+def print_uarch_abi_csv():
+    print("# Automatically generated from '%s'" % __file__)
+    print("Model,baseline,v2,v3,v4")
+    for name in models.keys():
+        print(name, end="")
+        for level in range(len(levels)):
+            if models[name]["levels"][level]:
+                print(",✅", end="")
+            else:
+                print(",", end="")
+        print()
+
+print_uarch_abi_csv()
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index dbb42969b16..b94dc0ade5f 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -27,6 +27,7 @@
 import getopt
 
 insnwidth = 32
+bitop_width = 32
 insnmask = 0xffffffff
 variablewidth = False
 fields = {}
@@ -59,9 +60,9 @@ def error_with_file(file, lineno, *args):
 
     prefix = ''
     if file:
-        prefix += '{0}:'.format(file)
+        prefix += f'{file}:'
     if lineno:
-        prefix += '{0}:'.format(lineno)
+        prefix += f'{lineno}:'
     if prefix:
         prefix += ' '
     print(prefix, end='error: ', file=sys.stderr)
@@ -102,6 +103,23 @@ def str_fields(fields):
     return r[1:]
 
 
+def whex(val):
+    """Return a hex string for val padded for insnwidth"""
+    global insnwidth
+    return f'0x{val:0{insnwidth // 4}x}'
+
+
+def whexC(val):
+    """Return a hex string for val padded for insnwidth,
+       and with the proper suffix for a C constant."""
+    suffix = ''
+    if val >= 0x100000000:
+        suffix = 'ull'
+    elif val >= 0x80000000:
+        suffix = 'u'
+    return whex(val) + suffix
+
+
 def str_match_bits(bits, mask):
     """Return a string pretty-printing BITS/MASK"""
     global insnwidth
@@ -147,11 +165,15 @@ def is_contiguous(bits):
         return -1
 
 
-def eq_fields_for_args(flds_a, flds_b):
-    if len(flds_a) != len(flds_b):
+def eq_fields_for_args(flds_a, arg):
+    if len(flds_a) != len(arg.fields):
         return False
+    # Only allow inference on default types
+    for t in arg.types:
+        if t != 'int':
+            return False
     for k, a in flds_a.items():
-        if k not in flds_b:
+        if k not in arg.fields:
             return False
     return True
 
@@ -184,11 +206,9 @@ def __str__(self):
         return str(self.pos) + ':' + s + str(self.len)
 
     def str_extract(self):
-        if self.sign:
-            extr = 'sextract32'
-        else:
-            extr = 'extract32'
-        return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
+        global bitop_width
+        s = 's' if self.sign else ''
+        return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
 
     def __eq__(self, other):
         return self.sign == other.sign and self.mask == other.mask
@@ -209,14 +229,15 @@ def __str__(self):
         return str(self.subs)
 
     def str_extract(self):
+        global bitop_width
         ret = '0'
         pos = 0
         for f in reversed(self.subs):
+            ext = f.str_extract()
             if pos == 0:
-                ret = f.str_extract()
+                ret = ext
             else:
-                ret = 'deposit32({0}, {1}, {2}, {3})' \
-                      .format(ret, pos, 32 - pos, f.str_extract())
+                ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
             pos += f.len
         return ret
 
@@ -296,10 +317,11 @@ def __ne__(self, other):
 
 class Arguments:
     """Class representing the extracted fields of a format"""
-    def __init__(self, nm, flds, extern):
+    def __init__(self, nm, flds, types, extern):
         self.name = nm
         self.extern = extern
-        self.fields = sorted(flds)
+        self.fields = flds
+        self.types = types
 
     def __str__(self):
         return self.name + ' ' + str(self.fields)
@@ -310,8 +332,8 @@ def struct_name(self):
     def output_def(self):
         if not self.extern:
             output('typedef struct {\n')
-            for n in self.fields:
-                output('    int ', n, ';\n')
+            for (n, t) in zip(self.fields, self.types):
+                output(f'    {t} {n};\n')
             output('} ', self.struct_name(), ';\n\n')
 # end Arguments
 
@@ -493,11 +515,8 @@ def output_code(self, i, extracted, outerbits, outermask):
             if outermask != p.fixedmask:
                 innermask = p.fixedmask & ~outermask
                 innerbits = p.fixedbits & ~outermask
-                output(ind, 'if ((insn & ',
-                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
-                       ') {\n')
-                output(ind, '    /* ',
-                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
+                output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
+                output(ind, f'    /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
                 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
                 output(ind, '}\n')
             else:
@@ -516,12 +535,12 @@ def __init__(self, fm, tm):
 
     def str1(self, i):
         ind = str_indent(i)
-        r = '{0}{1:08x}'.format(ind, self.fixedmask)
+        r = ind + whex(self.fixedmask)
         if self.format:
             r += ' ' + self.format.name
         r += ' [\n'
         for (b, s) in self.subs:
-            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += ind + f'  {whex(b)}:\n'
             r += s.str1(i + 4) + '\n'
         r += ind + ']'
         return r
@@ -545,16 +564,16 @@ def output_code(self, i, extracted, outerbits, outermask):
         if sh > 0:
             # Propagate SH down into the local functions.
             def str_switch(b, sh=sh):
-                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+                return f'(insn >> {sh}) & {b >> sh:#x}'
 
             def str_case(b, sh=sh):
-                return '0x{0:x}'.format(b >> sh)
+                return hex(b >> sh)
         else:
             def str_switch(b):
-                return 'insn & 0x{0:08x}'.format(b)
+                return f'insn & {whexC(b)}'
 
             def str_case(b):
-                return '0x{0:08x}'.format(b)
+                return whexC(b)
 
         output(ind, 'switch (', str_switch(self.thismask), ') {\n')
         for b, s in sorted(self.subs):
@@ -679,11 +698,11 @@ def parse_field(lineno, name, toks):
             subtoks = t.split(':')
             sign = False
         else:
-            error(lineno, 'invalid field token "{0}"'.format(t))
+            error(lineno, f'invalid field token "{t}"')
         po = int(subtoks[0])
         le = int(subtoks[1])
         if po + le > insnwidth:
-            error(lineno, 'field {0} too large'.format(t))
+            error(lineno, f'field {t} too large')
         f = Field(sign, po, le)
         subs.append(f)
         width += le
@@ -721,21 +740,27 @@ def parse_arguments(lineno, name, toks):
     global anyextern
 
     flds = []
+    types = []
     extern = False
-    for t in toks:
-        if re.fullmatch('!extern', t):
+    for n in toks:
+        if re.fullmatch('!extern', n):
             extern = True
             anyextern = True
             continue
-        if not re.fullmatch(re_C_ident, t):
-            error(lineno, 'invalid argument set token "{0}"'.format(t))
-        if t in flds:
-            error(lineno, 'duplicate argument "{0}"'.format(t))
-        flds.append(t)
+        if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
+            (n, t) = n.split(':')
+        elif re.fullmatch(re_C_ident, n):
+            t = 'int'
+        else:
+            error(lineno, f'invalid argument set token "{n}"')
+        if n in flds:
+            error(lineno, f'duplicate argument "{n}"')
+        flds.append(n)
+        types.append(t)
 
     if name in arguments:
         error(lineno, 'duplicate argument set', name)
-    arguments[name] = Arguments(name, flds, extern)
+    arguments[name] = Arguments(name, flds, types, extern)
 # end parse_arguments
 
 
@@ -762,11 +787,11 @@ def infer_argument_set(flds):
     global decode_function
 
     for arg in arguments.values():
-        if eq_fields_for_args(flds, arg.fields):
+        if eq_fields_for_args(flds, arg):
             return arg
 
     name = decode_function + str(len(arguments))
-    arg = Arguments(name, flds.keys(), False)
+    arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
     arguments[name] = arg
     return arg
 
@@ -906,14 +931,14 @@ def parse_generic(lineno, parent_pat, name, toks):
                 flen = flen[1:]
             shift = int(flen, 10)
             if shift + width > insnwidth:
-                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
+                error(lineno, f'field {fname} exceeds insnwidth')
             f = Field(sign, insnwidth - width - shift, shift)
             flds = add_field(lineno, flds, fname, f)
             fixedbits <<= shift
             fixedmask <<= shift
             undefmask <<= shift
         else:
-            error(lineno, 'invalid token "{0}"'.format(t))
+            error(lineno, f'invalid token "{t}"')
         width += shift
 
     if variablewidth and width < insnwidth and width % 8 == 0:
@@ -925,7 +950,7 @@ def parse_generic(lineno, parent_pat, name, toks):
 
     # We should have filled in all of the bits of the instruction.
     elif not (is_format and width == 0) and width != insnwidth:
-        error(lineno, 'definition has {0} bits'.format(width))
+        error(lineno, f'definition has {width} bits')
 
     # Do not check for fields overlapping fields; one valid usage
     # is to be able to duplicate fields via import.
@@ -945,8 +970,7 @@ def parse_generic(lineno, parent_pat, name, toks):
         if arg:
             for f in flds.keys():
                 if f not in arg.fields:
-                    error(lineno, 'field {0} not in argument set {1}'
-                                  .format(f, arg.name))
+                    error(lineno, f'field {f} not in argument set {arg.name}')
         else:
             arg = infer_argument_set(flds)
         if name in formats:
@@ -973,13 +997,12 @@ def parse_generic(lineno, parent_pat, name, toks):
         arg = fmt.base
         for f in flds.keys():
             if f not in arg.fields:
-                error(lineno, 'field {0} not in argument set {1}'
-                              .format(f, arg.name))
+                error(lineno, f'field {f} not in argument set {arg.name}')
             if f in fmt.fields.keys():
-                error(lineno, 'field {0} set by format and pattern'.format(f))
+                error(lineno, f'field {f} set by format and pattern')
         for f in arg.fields:
             if f not in flds.keys() and f not in fmt.fields.keys():
-                error(lineno, 'field {0} not initialized'.format(f))
+                error(lineno, f'field {f} not initialized')
         pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
                       undefmask, fieldmask, flds, width, preds)
         parent_pat.pats.append(pat)
@@ -987,19 +1010,19 @@ def parse_generic(lineno, parent_pat, name, toks):
 
     # Validate the masks that we have assembled.
     if fieldmask & fixedmask:
-        error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fieldmask, fixedmask))
+        error(lineno, 'fieldmask overlaps fixedmask ',
+              f'({whex(fieldmask)} & {whex(fixedmask)})')
     if fieldmask & undefmask:
-        error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fieldmask, undefmask))
+        error(lineno, 'fieldmask overlaps undefmask ',
+              f'({whex(fieldmask)} & {whex(undefmask)})')
     if fixedmask & undefmask:
-        error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
-                      .format(fixedmask, undefmask))
+        error(lineno, 'fixedmask overlaps undefmask ',
+              f'({whex(fixedmask)} & {whex(undefmask)})')
     if not is_format:
         allbits = fieldmask | fixedmask | undefmask
         if allbits != insnmask:
-            error(lineno, 'bits left unspecified (0x{0:08x})'
-                          .format(allbits ^ insnmask))
+            error(lineno, 'bits left unspecified ',
+                  f'({whex(allbits ^ insnmask)})')
 # end parse_general
 
 
@@ -1110,7 +1133,7 @@ def parse_file(f, parent_pat):
         elif re.fullmatch(re_pat_ident, name):
             parse_generic(start_lineno, parent_pat, name, toks)
         else:
-            error(lineno, 'invalid token "{0}"'.format(name))
+            error(lineno, f'invalid token "{name}"')
         toks = []
 
     if nesting != 0:
@@ -1129,10 +1152,9 @@ def __init__(self, m, w):
 
     def str1(self, i):
         ind = str_indent(i)
-        r = '{0}{1:08x}'.format(ind, self.mask)
-        r += ' [\n'
+        r = ind + whex(self.mask) + ' [\n'
         for (b, s) in self.subs:
-            r += '{0}  {1:08x}:\n'.format(ind, b)
+            r += ind + f'  {whex(b)}:\n'
             r += s.str1(i + 4) + '\n'
         r += ind + ']'
         return r
@@ -1145,9 +1167,8 @@ def output_code(self, i, extracted, outerbits, outermask):
 
         # If we need to load more bytes to test, do so now.
         if extracted < self.width:
-            output(ind, 'insn = ', decode_function,
-                   '_load_bytes(ctx, insn, {0}, {1});\n'
-                   .format(extracted // 8, self.width // 8));
+            output(ind, f'insn = {decode_function}_load_bytes',
+                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
             extracted = self.width
 
         # Attempt to aid the compiler in producing compact switch statements.
@@ -1156,16 +1177,16 @@ def output_code(self, i, extracted, outerbits, outermask):
         if sh > 0:
             # Propagate SH down into the local functions.
             def str_switch(b, sh=sh):
-                return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
+                return f'(insn >> {sh}) & {b >> sh:#x}'
 
             def str_case(b, sh=sh):
-                return '0x{0:x}'.format(b >> sh)
+                return hex(b >> sh)
         else:
             def str_switch(b):
-                return 'insn & 0x{0:08x}'.format(b)
+                return f'insn & {whexC(b)}'
 
             def str_case(b):
-                return '0x{0:08x}'.format(b)
+                return whexC(b)
 
         output(ind, 'switch (', str_switch(self.mask), ') {\n')
         for b, s in sorted(self.subs):
@@ -1187,8 +1208,7 @@ def __init__(self, m, w):
         self.width = w
 
     def str1(self, i):
-        ind = str_indent(i)
-        return '{0}{1:08x}'.format(ind, self.mask)
+        return str_indent(i) + whex(self.mask)
 
     def __str__(self):
         return self.str1(0)
@@ -1199,9 +1219,8 @@ def output_code(self, i, extracted, outerbits, outermask):
 
         # If we need to load more bytes, do so now.
         if extracted < self.width:
-            output(ind, 'insn = ', decode_function,
-                   '_load_bytes(ctx, insn, {0}, {1});\n'
-                   .format(extracted // 8, self.width // 8));
+            output(ind, f'insn = {decode_function}_load_bytes',
+                   f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
             extracted = self.width
         output(ind, 'return insn;\n')
 # end SizeLeaf
@@ -1235,7 +1254,7 @@ def build_size_tree(pats, width, outerbits, outermask):
         for p in pats:
             pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
         error_with_file(pats[0].file, pats[0].lineno,
-                        'overlapping patterns size {0}:'.format(width), pnames)
+                        f'overlapping patterns size {width}:', pnames)
 
     bins = {}
     for i in pats:
@@ -1289,6 +1308,7 @@ def main():
     global insntype
     global insnmask
     global decode_function
+    global bitop_width
     global variablewidth
     global anyextern
 
@@ -1318,6 +1338,10 @@ def main():
             if insnwidth == 16:
                 insntype = 'uint16_t'
                 insnmask = 0xffff
+            elif insnwidth == 64:
+                insntype = 'uint64_t'
+                insnmask = 0xffffffffffffffff
+                bitop_width = 64
             elif insnwidth != 32:
                 error(0, 'cannot handle insns of width', insnwidth)
         else:
diff --git a/scripts/device-crash-test b/scripts/device-crash-test
index 8331c057b80..7fbd99158be 100755
--- a/scripts/device-crash-test
+++ b/scripts/device-crash-test
@@ -36,6 +36,7 @@ from itertools import chain
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
 from qemu.machine import QEMUMachine
+from qemu.aqmp import ConnectError
 
 logger = logging.getLogger('device-crash-test')
 dbg = logger.debug
@@ -175,7 +176,6 @@ ERROR_RULE_LIST = [
     {'log':r"Multiple VT220 operator consoles are not supported"},
     {'log':r"core 0 already populated"},
     {'log':r"could not find stage1 bootloader"},
-    {'log':r"No '.*' bus found for device"},
 
     # other exitcode=1 failures not listed above will just generate INFO messages:
     {'exitcode':1, 'loglevel':logging.INFO},
@@ -317,9 +317,7 @@ class QemuBinaryInfo(object):
         try:
             vm.launch()
             mi['runnable'] = True
-        except KeyboardInterrupt:
-            raise
-        except:
+        except Exception:
             dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
             dbg("log: %r", vm.get_log())
             mi['runnable'] = False
@@ -355,14 +353,14 @@ def checkOneCase(args, testcase):
             '-device', qemuOptsEscape(device)]
     cmdline = ' '.join([binary] + args)
     dbg("will launch QEMU: %s", cmdline)
-    vm = QEMUMachine(binary=binary, args=args)
+    vm = QEMUMachine(binary=binary, args=args, qmp_timer=15)
 
+    exc = None
     exc_traceback = None
     try:
         vm.launch()
-    except KeyboardInterrupt:
-        raise
-    except:
+    except Exception as this_exc:
+        exc = this_exc
         exc_traceback = traceback.format_exc()
         dbg("Exception while running test case")
     finally:
@@ -370,8 +368,9 @@ def checkOneCase(args, testcase):
         ec = vm.exitcode()
         log = vm.get_log()
 
-    if exc_traceback is not None or ec != 0:
-        return {'exc_traceback':exc_traceback,
+    if exc is not None or ec != 0:
+        return {'exc': exc,
+                'exc_traceback':exc_traceback,
                 'exitcode':ec,
                 'log':log,
                 'testcase':testcase,
@@ -459,6 +458,17 @@ def logFailure(f, level):
     for l in f['log'].strip().split('\n'):
         logger.log(level, "log: %s", l)
     logger.log(level, "exit code: %r", f['exitcode'])
+
+    # If the Exception is merely a QMP connect error,
+    # reduce the logging level for its traceback to
+    # improve visual clarity.
+    if isinstance(f.get('exc'), ConnectError):
+        logger.log(level, "%s.%s: %s",
+                   type(f['exc']).__module__,
+                   type(f['exc']).__qualname__,
+                   str(f['exc']))
+        level = logging.DEBUG
+
     if f['exc_traceback']:
         logger.log(level, "exception:")
         for l in f['exc_traceback'].split('\n'):
@@ -503,6 +513,12 @@ def main():
         lvl = logging.WARN
     logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
 
+    if not args.debug:
+        # Async QMP, when in use, is chatty about connection failures.
+        # This script knowingly generates a ton of connection errors.
+        # Silence this logger.
+        logging.getLogger('qemu.aqmp.qmp_client').setLevel(logging.CRITICAL)
+
     fatal_failures = []
     wl_stats = {}
     skipped = 0
diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh
index f7aaaf27662..e2c956a3ac9 100755
--- a/scripts/entitlement.sh
+++ b/scripts/entitlement.sh
@@ -8,9 +8,10 @@ if [ "$1" = --install ]; then
   in_place=false
 fi
 
-SRC="$1"
-DST="$2"
-ENTITLEMENT="$3"
+DST="$1"
+SRC="$2"
+ICON="$3"
+ENTITLEMENT="$4"
 
 if $in_place; then
   trap 'rm "$DST.tmp"' exit
@@ -20,6 +21,13 @@ else
   cd "$MESON_INSTALL_DESTDIR_PREFIX"
 fi
 
-codesign --entitlements "$ENTITLEMENT" --force -s - "$SRC"
+if test -n "$ENTITLEMENT"; then
+  codesign --entitlements "$ENTITLEMENT" --force -s - "$SRC"
+fi
+
+# Add the QEMU icon to the binary on Mac OS
+Rez -append "$ICON" -o "$SRC"
+SetFile -a C "$SRC"
+
 mv -f "$SRC" "$DST"
 trap '' exit
diff --git a/scripts/make-release b/scripts/make-release
index a2a8cda33c0..05b14ecc95e 100755
--- a/scripts/make-release
+++ b/scripts/make-release
@@ -27,7 +27,12 @@ git submodule update --init
 # don't necessarily have much control over how a submodule handles its
 # submodule dependencies, so we continue to handle these on a case-by-case
 # basis for now.
-(cd roms/edk2 && git submodule update --init)
+(cd roms/edk2 && \
+    git submodule update --init -- \
+        ArmPkg/Library/ArmSoftFloatLib/berkeley-softfloat-3 \
+        BaseTools/Source/C/BrotliCompress/brotli \
+        CryptoPkg/Library/OpensslLib/openssl \
+        MdeModulePkg/Library/BrotliCustomDecompressLib/brotli)
 popd
 tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination}
 rm -rf ${destination}
diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py
new file mode 100755
index 00000000000..96969d89ee0
--- /dev/null
+++ b/scripts/meson-buildoptions.py
@@ -0,0 +1,156 @@
+#! /usr/bin/env python3
+
+# Generate configure command line options handling code, based on Meson's
+# user build options introspection data
+#
+# Copyright (C) 2021 Red Hat, Inc.
+#
+# Author: Paolo Bonzini <pbonzini@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import json
+import textwrap
+import shlex
+import sys
+
+SKIP_OPTIONS = {
+    "audio_drv_list",
+    "default_devices",
+    "docdir",
+    "fuzzing_engine",
+    "qemu_firmwarepath",
+    "qemu_suffix",
+    "sphinx_build",
+    "trace_file",
+}
+
+LINE_WIDTH = 76
+
+
+# Convert the default value of an option to the string used in
+# the help message
+def value_to_help(value):
+    if isinstance(value, list):
+        return ",".join(value)
+    if isinstance(value, bool):
+        return "enabled" if value else "disabled"
+    return str(value)
+
+
+def wrap(left, text, indent):
+    spaces = " " * indent
+    if len(left) >= indent:
+        yield left
+        left = spaces
+    else:
+        left = (left + spaces)[0:indent]
+    yield from textwrap.wrap(
+        text, width=LINE_WIDTH, initial_indent=left, subsequent_indent=spaces
+    )
+
+
+def sh_print(line=""):
+    print('  printf "%s\\n"', shlex.quote(line))
+
+
+def help_line(left, opt, indent, long):
+    right = f'{opt["description"]}'
+    if long:
+        value = value_to_help(opt["value"])
+        if value != "auto":
+            right += f" [{value}]"
+    if "choices" in opt and long:
+        choices = "/".join(sorted(opt["choices"]))
+        right += f" (choices: {choices})"
+    for x in wrap("  " + left, right, indent):
+        sh_print(x)
+
+
+# Return whether the option (a dictionary) can be used with
+# arguments.  Booleans can never be used with arguments;
+# combos allow an argument only if they accept other values
+# than "auto", "enabled", and "disabled".
+def allow_arg(opt):
+    if opt["type"] == "boolean":
+        return False
+    if opt["type"] != "combo":
+        return True
+    return not (set(opt["choices"]) <= {"auto", "disabled", "enabled"})
+
+
+def load_options(json):
+    json = [
+        x
+        for x in json
+        if x["section"] == "user"
+        and ":" not in x["name"]
+        and x["name"] not in SKIP_OPTIONS
+    ]
+    return sorted(json, key=lambda x: x["name"])
+
+
+def print_help(options):
+    print("meson_options_help() {")
+    for opt in options:
+        key = opt["name"].replace("_", "-")
+        # The first section includes options that have an arguments,
+        # and booleans (i.e., only one of enable/disable makes sense)
+        if opt["type"] == "boolean":
+            left = f"--disable-{key}" if opt["value"] else f"--enable-{key}"
+            help_line(left, opt, 27, False)
+        elif allow_arg(opt):
+            if opt["type"] == "combo" and "enabled" in opt["choices"]:
+                left = f"--enable-{key}[=CHOICE]"
+            else:
+                left = f"--enable-{key}=CHOICE"
+            help_line(left, opt, 27, True)
+
+    sh_print()
+    sh_print("Optional features, enabled with --enable-FEATURE and")
+    sh_print("disabled with --disable-FEATURE, default is enabled if available")
+    sh_print("(unless built with --without-default-features):")
+    sh_print()
+    for opt in options:
+        key = opt["name"].replace("_", "-")
+        if opt["type"] != "boolean" and not allow_arg(opt):
+            help_line(key, opt, 18, False)
+    print("}")
+
+
+def print_parse(options):
+    print("_meson_option_parse() {")
+    print("  case $1 in")
+    for opt in options:
+        key = opt["name"].replace("_", "-")
+        name = opt["name"]
+        if opt["type"] == "boolean":
+            print(f'    --enable-{key}) printf "%s" -D{name}=true ;;')
+            print(f'    --disable-{key}) printf "%s" -D{name}=false ;;')
+        else:
+            if opt["type"] == "combo" and "enabled" in opt["choices"]:
+                print(f'    --enable-{key}) printf "%s" -D{name}=enabled ;;')
+            if opt["type"] == "combo" and "disabled" in opt["choices"]:
+                print(f'    --disable-{key}) printf "%s" -D{name}=disabled ;;')
+            if allow_arg(opt):
+                print(f'    --enable-{key}=*) quote_sh "-D{name}=$2" ;;')
+    print("    *) return 1 ;;")
+    print("  esac")
+    print("}")
+
+
+options = load_options(json.load(sys.stdin))
+print("# This file is generated by meson-buildoptions.py, do not edit!")
+print_help(options)
+print_parse(options)
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
new file mode 100644
index 00000000000..7a17ff42182
--- /dev/null
+++ b/scripts/meson-buildoptions.sh
@@ -0,0 +1,275 @@
+# This file is generated by meson-buildoptions.py, do not edit!
+meson_options_help() {
+  printf "%s\n" '  --enable-capstone[=CHOICE]'
+  printf "%s\n" '                           Whether and how to find the capstone library'
+  printf "%s\n" '                           (choices: auto/disabled/enabled/internal/system)'
+  printf "%s\n" '  --enable-cfi             Control-Flow Integrity (CFI)'
+  printf "%s\n" '  --enable-cfi-debug       Verbose errors in case of CFI violation'
+  printf "%s\n" '  --enable-fdt[=CHOICE]    Whether and how to find the libfdt library'
+  printf "%s\n" '                           (choices: auto/disabled/enabled/internal/system)'
+  printf "%s\n" '  --enable-fuzzing         build fuzzing targets'
+  printf "%s\n" '  --disable-install-blobs  install provided firmware blobs'
+  printf "%s\n" '  --enable-malloc=CHOICE   choose memory allocator to use [system] (choices:'
+  printf "%s\n" '                           jemalloc/system/tcmalloc)'
+  printf "%s\n" '  --enable-slirp[=CHOICE]  Whether and how to find the slirp library'
+  printf "%s\n" '                           (choices: auto/disabled/enabled/internal/system)'
+  printf "%s\n" '  --enable-tcg-interpreter TCG with bytecode interpreter (slow)'
+  printf "%s\n" '  --enable-trace-backends=CHOICE'
+  printf "%s\n" '                           Set available tracing backends [log] (choices:'
+  printf "%s\n" '                           dtrace/ftrace/log/nop/simple/syslog/ust)'
+  printf "%s\n" ''
+  printf "%s\n" 'Optional features, enabled with --enable-FEATURE and'
+  printf "%s\n" 'disabled with --disable-FEATURE, default is enabled if available'
+  printf "%s\n" '(unless built with --without-default-features):'
+  printf "%s\n" ''
+  printf "%s\n" '  alsa            ALSA sound support'
+  printf "%s\n" '  attr            attr/xattr support'
+  printf "%s\n" '  auth-pam        PAM access control'
+  printf "%s\n" '  bpf             eBPF support'
+  printf "%s\n" '  brlapi          brlapi character device driver'
+  printf "%s\n" '  bzip2           bzip2 support for DMG images'
+  printf "%s\n" '  cap-ng          cap_ng support'
+  printf "%s\n" '  cocoa           Cocoa user interface (macOS only)'
+  printf "%s\n" '  coreaudio       CoreAudio sound support'
+  printf "%s\n" '  curl            CURL block device driver'
+  printf "%s\n" '  curses          curses UI'
+  printf "%s\n" '  docs            Documentations build support'
+  printf "%s\n" '  dsound          DirectSound sound support'
+  printf "%s\n" '  fuse            FUSE block device export'
+  printf "%s\n" '  fuse-lseek      SEEK_HOLE/SEEK_DATA support for FUSE exports'
+  printf "%s\n" '  gcrypt          libgcrypt cryptography support'
+  printf "%s\n" '  gettext         Localization of the GTK+ user interface'
+  printf "%s\n" '  glusterfs       Glusterfs block device driver'
+  printf "%s\n" '  gnutls          GNUTLS cryptography support'
+  printf "%s\n" '  gtk             GTK+ user interface'
+  printf "%s\n" '  guest-agent-msi Build MSI package for the QEMU Guest Agent'
+  printf "%s\n" '  hax             HAX acceleration support'
+  printf "%s\n" '  hvf             HVF acceleration support'
+  printf "%s\n" '  iconv           Font glyph conversion support'
+  printf "%s\n" '  jack            JACK sound support'
+  printf "%s\n" '  kvm             KVM acceleration support'
+  printf "%s\n" '  l2tpv3          l2tpv3 network backend support'
+  printf "%s\n" '  libdaxctl       libdaxctl support'
+  printf "%s\n" '  libiscsi        libiscsi userspace initiator'
+  printf "%s\n" '  libnfs          libnfs block device driver'
+  printf "%s\n" '  libpmem         libpmem support'
+  printf "%s\n" '  libudev         Use libudev to enumerate host devices'
+  printf "%s\n" '  libusb          libusb support for USB passthrough'
+  printf "%s\n" '  libxml2         libxml2 support for Parallels image format'
+  printf "%s\n" '  linux-aio       Linux AIO support'
+  printf "%s\n" '  linux-io-uring  Linux io_uring support'
+  printf "%s\n" '  lzfse           lzfse support for DMG images'
+  printf "%s\n" '  lzo             lzo compression support'
+  printf "%s\n" '  malloc-trim     enable libc malloc_trim() for memory optimization'
+  printf "%s\n" '  mpath           Multipath persistent reservation passthrough'
+  printf "%s\n" '  multiprocess    Out of process device emulation support'
+  printf "%s\n" '  netmap          netmap network backend support'
+  printf "%s\n" '  nettle          nettle cryptography support'
+  printf "%s\n" '  nvmm            NVMM acceleration support'
+  printf "%s\n" '  oss             OSS sound support'
+  printf "%s\n" '  pa              PulseAudio sound support'
+  printf "%s\n" '  rbd             Ceph block device driver'
+  printf "%s\n" '  sdl             SDL user interface'
+  printf "%s\n" '  sdl-image       SDL Image support for icons'
+  printf "%s\n" '  seccomp         seccomp support'
+  printf "%s\n" '  selinux         SELinux support in qemu-nbd'
+  printf "%s\n" '  smartcard       CA smartcard emulation support'
+  printf "%s\n" '  snappy          snappy compression support'
+  printf "%s\n" '  sparse          sparse checker'
+  printf "%s\n" '  spice           Spice server support'
+  printf "%s\n" '  spice-protocol  Spice protocol support'
+  printf "%s\n" '  tcg             TCG support'
+  printf "%s\n" '  u2f             U2F emulation support'
+  printf "%s\n" '  usb-redir       libusbredir support'
+  printf "%s\n" '  vde             vde network backend support'
+  printf "%s\n" '  vhost-user-blk-server'
+  printf "%s\n" '                  build vhost-user-blk server'
+  printf "%s\n" '  virglrenderer   virgl rendering support'
+  printf "%s\n" '  virtfs          virtio-9p support'
+  printf "%s\n" '  virtiofsd       build virtiofs daemon (virtiofsd)'
+  printf "%s\n" '  vnc             VNC server'
+  printf "%s\n" '  vnc-jpeg        JPEG lossy compression for VNC server'
+  printf "%s\n" '  vnc-png         PNG compression for VNC server'
+  printf "%s\n" '  vnc-sasl        SASL authentication for VNC server'
+  printf "%s\n" '  vte             vte support for the gtk UI'
+  printf "%s\n" '  whpx            WHPX acceleration support'
+  printf "%s\n" '  xen             Xen backend support'
+  printf "%s\n" '  xen-pci-passthrough'
+  printf "%s\n" '                  Xen PCI passthrough support'
+  printf "%s\n" '  xkbcommon       xkbcommon support'
+  printf "%s\n" '  zstd            zstd compression support'
+}
+_meson_option_parse() {
+  case $1 in
+    --enable-alsa) printf "%s" -Dalsa=enabled ;;
+    --disable-alsa) printf "%s" -Dalsa=disabled ;;
+    --enable-attr) printf "%s" -Dattr=enabled ;;
+    --disable-attr) printf "%s" -Dattr=disabled ;;
+    --enable-auth-pam) printf "%s" -Dauth_pam=enabled ;;
+    --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
+    --enable-bpf) printf "%s" -Dbpf=enabled ;;
+    --disable-bpf) printf "%s" -Dbpf=disabled ;;
+    --enable-brlapi) printf "%s" -Dbrlapi=enabled ;;
+    --disable-brlapi) printf "%s" -Dbrlapi=disabled ;;
+    --enable-bzip2) printf "%s" -Dbzip2=enabled ;;
+    --disable-bzip2) printf "%s" -Dbzip2=disabled ;;
+    --enable-cap-ng) printf "%s" -Dcap_ng=enabled ;;
+    --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;;
+    --enable-capstone) printf "%s" -Dcapstone=enabled ;;
+    --disable-capstone) printf "%s" -Dcapstone=disabled ;;
+    --enable-capstone=*) quote_sh "-Dcapstone=$2" ;;
+    --enable-cfi) printf "%s" -Dcfi=true ;;
+    --disable-cfi) printf "%s" -Dcfi=false ;;
+    --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;;
+    --disable-cfi-debug) printf "%s" -Dcfi_debug=false ;;
+    --enable-cocoa) printf "%s" -Dcocoa=enabled ;;
+    --disable-cocoa) printf "%s" -Dcocoa=disabled ;;
+    --enable-coreaudio) printf "%s" -Dcoreaudio=enabled ;;
+    --disable-coreaudio) printf "%s" -Dcoreaudio=disabled ;;
+    --enable-curl) printf "%s" -Dcurl=enabled ;;
+    --disable-curl) printf "%s" -Dcurl=disabled ;;
+    --enable-curses) printf "%s" -Dcurses=enabled ;;
+    --disable-curses) printf "%s" -Dcurses=disabled ;;
+    --enable-docs) printf "%s" -Ddocs=enabled ;;
+    --disable-docs) printf "%s" -Ddocs=disabled ;;
+    --enable-dsound) printf "%s" -Ddsound=enabled ;;
+    --disable-dsound) printf "%s" -Ddsound=disabled ;;
+    --enable-fdt) printf "%s" -Dfdt=enabled ;;
+    --disable-fdt) printf "%s" -Dfdt=disabled ;;
+    --enable-fdt=*) quote_sh "-Dfdt=$2" ;;
+    --enable-fuse) printf "%s" -Dfuse=enabled ;;
+    --disable-fuse) printf "%s" -Dfuse=disabled ;;
+    --enable-fuse-lseek) printf "%s" -Dfuse_lseek=enabled ;;
+    --disable-fuse-lseek) printf "%s" -Dfuse_lseek=disabled ;;
+    --enable-fuzzing) printf "%s" -Dfuzzing=true ;;
+    --disable-fuzzing) printf "%s" -Dfuzzing=false ;;
+    --enable-gcrypt) printf "%s" -Dgcrypt=enabled ;;
+    --disable-gcrypt) printf "%s" -Dgcrypt=disabled ;;
+    --enable-gettext) printf "%s" -Dgettext=enabled ;;
+    --disable-gettext) printf "%s" -Dgettext=disabled ;;
+    --enable-glusterfs) printf "%s" -Dglusterfs=enabled ;;
+    --disable-glusterfs) printf "%s" -Dglusterfs=disabled ;;
+    --enable-gnutls) printf "%s" -Dgnutls=enabled ;;
+    --disable-gnutls) printf "%s" -Dgnutls=disabled ;;
+    --enable-gtk) printf "%s" -Dgtk=enabled ;;
+    --disable-gtk) printf "%s" -Dgtk=disabled ;;
+    --enable-guest-agent-msi) printf "%s" -Dguest_agent_msi=enabled ;;
+    --disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
+    --enable-hax) printf "%s" -Dhax=enabled ;;
+    --disable-hax) printf "%s" -Dhax=disabled ;;
+    --enable-hvf) printf "%s" -Dhvf=enabled ;;
+    --disable-hvf) printf "%s" -Dhvf=disabled ;;
+    --enable-iconv) printf "%s" -Diconv=enabled ;;
+    --disable-iconv) printf "%s" -Diconv=disabled ;;
+    --enable-install-blobs) printf "%s" -Dinstall_blobs=true ;;
+    --disable-install-blobs) printf "%s" -Dinstall_blobs=false ;;
+    --enable-jack) printf "%s" -Djack=enabled ;;
+    --disable-jack) printf "%s" -Djack=disabled ;;
+    --enable-kvm) printf "%s" -Dkvm=enabled ;;
+    --disable-kvm) printf "%s" -Dkvm=disabled ;;
+    --enable-l2tpv3) printf "%s" -Dl2tpv3=enabled ;;
+    --disable-l2tpv3) printf "%s" -Dl2tpv3=disabled ;;
+    --enable-libdaxctl) printf "%s" -Dlibdaxctl=enabled ;;
+    --disable-libdaxctl) printf "%s" -Dlibdaxctl=disabled ;;
+    --enable-libiscsi) printf "%s" -Dlibiscsi=enabled ;;
+    --disable-libiscsi) printf "%s" -Dlibiscsi=disabled ;;
+    --enable-libnfs) printf "%s" -Dlibnfs=enabled ;;
+    --disable-libnfs) printf "%s" -Dlibnfs=disabled ;;
+    --enable-libpmem) printf "%s" -Dlibpmem=enabled ;;
+    --disable-libpmem) printf "%s" -Dlibpmem=disabled ;;
+    --enable-libudev) printf "%s" -Dlibudev=enabled ;;
+    --disable-libudev) printf "%s" -Dlibudev=disabled ;;
+    --enable-libusb) printf "%s" -Dlibusb=enabled ;;
+    --disable-libusb) printf "%s" -Dlibusb=disabled ;;
+    --enable-libxml2) printf "%s" -Dlibxml2=enabled ;;
+    --disable-libxml2) printf "%s" -Dlibxml2=disabled ;;
+    --enable-linux-aio) printf "%s" -Dlinux_aio=enabled ;;
+    --disable-linux-aio) printf "%s" -Dlinux_aio=disabled ;;
+    --enable-linux-io-uring) printf "%s" -Dlinux_io_uring=enabled ;;
+    --disable-linux-io-uring) printf "%s" -Dlinux_io_uring=disabled ;;
+    --enable-lzfse) printf "%s" -Dlzfse=enabled ;;
+    --disable-lzfse) printf "%s" -Dlzfse=disabled ;;
+    --enable-lzo) printf "%s" -Dlzo=enabled ;;
+    --disable-lzo) printf "%s" -Dlzo=disabled ;;
+    --enable-malloc=*) quote_sh "-Dmalloc=$2" ;;
+    --enable-malloc-trim) printf "%s" -Dmalloc_trim=enabled ;;
+    --disable-malloc-trim) printf "%s" -Dmalloc_trim=disabled ;;
+    --enable-mpath) printf "%s" -Dmpath=enabled ;;
+    --disable-mpath) printf "%s" -Dmpath=disabled ;;
+    --enable-multiprocess) printf "%s" -Dmultiprocess=enabled ;;
+    --disable-multiprocess) printf "%s" -Dmultiprocess=disabled ;;
+    --enable-netmap) printf "%s" -Dnetmap=enabled ;;
+    --disable-netmap) printf "%s" -Dnetmap=disabled ;;
+    --enable-nettle) printf "%s" -Dnettle=enabled ;;
+    --disable-nettle) printf "%s" -Dnettle=disabled ;;
+    --enable-nvmm) printf "%s" -Dnvmm=enabled ;;
+    --disable-nvmm) printf "%s" -Dnvmm=disabled ;;
+    --enable-oss) printf "%s" -Doss=enabled ;;
+    --disable-oss) printf "%s" -Doss=disabled ;;
+    --enable-pa) printf "%s" -Dpa=enabled ;;
+    --disable-pa) printf "%s" -Dpa=disabled ;;
+    --enable-rbd) printf "%s" -Drbd=enabled ;;
+    --disable-rbd) printf "%s" -Drbd=disabled ;;
+    --enable-sdl) printf "%s" -Dsdl=enabled ;;
+    --disable-sdl) printf "%s" -Dsdl=disabled ;;
+    --enable-sdl-image) printf "%s" -Dsdl_image=enabled ;;
+    --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;;
+    --enable-seccomp) printf "%s" -Dseccomp=enabled ;;
+    --disable-seccomp) printf "%s" -Dseccomp=disabled ;;
+    --enable-selinux) printf "%s" -Dselinux=enabled ;;
+    --disable-selinux) printf "%s" -Dselinux=disabled ;;
+    --enable-slirp) printf "%s" -Dslirp=enabled ;;
+    --disable-slirp) printf "%s" -Dslirp=disabled ;;
+    --enable-slirp=*) quote_sh "-Dslirp=$2" ;;
+    --enable-smartcard) printf "%s" -Dsmartcard=enabled ;;
+    --disable-smartcard) printf "%s" -Dsmartcard=disabled ;;
+    --enable-snappy) printf "%s" -Dsnappy=enabled ;;
+    --disable-snappy) printf "%s" -Dsnappy=disabled ;;
+    --enable-sparse) printf "%s" -Dsparse=enabled ;;
+    --disable-sparse) printf "%s" -Dsparse=disabled ;;
+    --enable-spice) printf "%s" -Dspice=enabled ;;
+    --disable-spice) printf "%s" -Dspice=disabled ;;
+    --enable-spice-protocol) printf "%s" -Dspice_protocol=enabled ;;
+    --disable-spice-protocol) printf "%s" -Dspice_protocol=disabled ;;
+    --enable-tcg) printf "%s" -Dtcg=enabled ;;
+    --disable-tcg) printf "%s" -Dtcg=disabled ;;
+    --enable-tcg-interpreter) printf "%s" -Dtcg_interpreter=true ;;
+    --disable-tcg-interpreter) printf "%s" -Dtcg_interpreter=false ;;
+    --enable-trace-backends=*) quote_sh "-Dtrace_backends=$2" ;;
+    --enable-u2f) printf "%s" -Du2f=enabled ;;
+    --disable-u2f) printf "%s" -Du2f=disabled ;;
+    --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;;
+    --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;;
+    --enable-vde) printf "%s" -Dvde=enabled ;;
+    --disable-vde) printf "%s" -Dvde=disabled ;;
+    --enable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=enabled ;;
+    --disable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=disabled ;;
+    --enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;;
+    --disable-virglrenderer) printf "%s" -Dvirglrenderer=disabled ;;
+    --enable-virtfs) printf "%s" -Dvirtfs=enabled ;;
+    --disable-virtfs) printf "%s" -Dvirtfs=disabled ;;
+    --enable-virtiofsd) printf "%s" -Dvirtiofsd=enabled ;;
+    --disable-virtiofsd) printf "%s" -Dvirtiofsd=disabled ;;
+    --enable-vnc) printf "%s" -Dvnc=enabled ;;
+    --disable-vnc) printf "%s" -Dvnc=disabled ;;
+    --enable-vnc-jpeg) printf "%s" -Dvnc_jpeg=enabled ;;
+    --disable-vnc-jpeg) printf "%s" -Dvnc_jpeg=disabled ;;
+    --enable-vnc-png) printf "%s" -Dvnc_png=enabled ;;
+    --disable-vnc-png) printf "%s" -Dvnc_png=disabled ;;
+    --enable-vnc-sasl) printf "%s" -Dvnc_sasl=enabled ;;
+    --disable-vnc-sasl) printf "%s" -Dvnc_sasl=disabled ;;
+    --enable-vte) printf "%s" -Dvte=enabled ;;
+    --disable-vte) printf "%s" -Dvte=disabled ;;
+    --enable-whpx) printf "%s" -Dwhpx=enabled ;;
+    --disable-whpx) printf "%s" -Dwhpx=disabled ;;
+    --enable-xen) printf "%s" -Dxen=enabled ;;
+    --disable-xen) printf "%s" -Dxen=disabled ;;
+    --enable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=enabled ;;
+    --disable-xen-pci-passthrough) printf "%s" -Dxen_pci_passthrough=disabled ;;
+    --enable-xkbcommon) printf "%s" -Dxkbcommon=enabled ;;
+    --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;;
+    --enable-zstd) printf "%s" -Dzstd=enabled ;;
+    --disable-zstd) printf "%s" -Dzstd=disabled ;;
+    *) return 1 ;;
+  esac
+}
diff --git a/scripts/meson.build b/scripts/meson.build
index bbe36161896..1f0e68bd9c2 100644
--- a/scripts/meson.build
+++ b/scripts/meson.build
@@ -1,4 +1,4 @@
-if 'CONFIG_TRACE_SYSTEMTAP' in config_host
+if stap.found()
   install_data('qemu-trace-stap', install_dir: get_option('bindir'))
 endif
 install_data('symbolize-cheri-trace.py', install_dir: get_option('bindir'))
diff --git a/scripts/modinfo-collect.py b/scripts/modinfo-collect.py
new file mode 100755
index 00000000000..4acb188c3e8
--- /dev/null
+++ b/scripts/modinfo-collect.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import json
+import shlex
+import subprocess
+
+def find_command(src, target, compile_commands):
+    for command in compile_commands:
+        if command['file'] != src:
+            continue
+        if target != '' and command['command'].find(target) == -1:
+            continue
+        return command['command']
+    return 'false'
+
+def process_command(src, command):
+    skip = False
+    arg = False
+    out = []
+    for item in shlex.split(command):
+        if arg:
+            out.append(x)
+            arg = False
+            continue
+        if skip:
+            skip = False
+            continue
+        if item == '-MF' or item == '-MQ' or item == '-o':
+            skip = True
+            continue
+        if item == '-c':
+            skip = True
+            continue
+        out.append(item)
+    out.append('-DQEMU_MODINFO')
+    out.append('-E')
+    out.append(src)
+    return out
+
+def main(args):
+    target = ''
+    if args[0] == '--target':
+        args.pop(0)
+        target = args.pop(0)
+        print("MODINFO_DEBUG target %s" % target)
+        arch = target[:-8] # cut '-softmmu'
+        print("MODINFO_START arch \"%s\" MODINFO_END" % arch)
+    with open('compile_commands.json') as f:
+        compile_commands = json.load(f)
+    for src in args:
+        print("MODINFO_DEBUG src %s" % src)
+        command = find_command(src, target, compile_commands)
+        cmdline = process_command(src, command)
+        print("MODINFO_DEBUG cmd", cmdline)
+        result = subprocess.run(cmdline, stdout = subprocess.PIPE,
+                                universal_newlines = True)
+        if result.returncode != 0:
+            sys.exit(result.returncode)
+        for line in result.stdout.split('\n'):
+            if line.find('MODINFO') != -1:
+                print(line)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/scripts/modinfo-generate.py b/scripts/modinfo-generate.py
new file mode 100755
index 00000000000..f559eed0077
--- /dev/null
+++ b/scripts/modinfo-generate.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+def print_array(name, values):
+    if len(values) == 0:
+        return
+    list = ", ".join(values)
+    print("    .%s = ((const char*[]){ %s, NULL })," % (name, list))
+
+def parse_line(line):
+    kind = ""
+    data = ""
+    get_kind = False
+    get_data = False
+    for item in line.split():
+        if item == "MODINFO_START":
+            get_kind = True
+            continue
+        if item.startswith("MODINFO_END"):
+            get_data = False
+            continue
+        if get_kind:
+            kind = item
+            get_kind = False
+            get_data = True
+            continue
+        if get_data:
+            data += " " + item
+            continue
+    return (kind, data)
+
+def generate(name, lines):
+    arch = ""
+    objs = []
+    deps = []
+    opts = []
+    for line in lines:
+        if line.find("MODINFO_START") != -1:
+            (kind, data) = parse_line(line)
+            if kind == 'obj':
+                objs.append(data)
+            elif kind == 'dep':
+                deps.append(data)
+            elif kind == 'opts':
+                opts.append(data)
+            elif kind == 'arch':
+                arch = data;
+            else:
+                print("unknown:", kind)
+                exit(1)
+
+    print("    .name = \"%s\"," % name)
+    if arch != "":
+        print("    .arch = %s," % arch)
+    print_array("objs", objs)
+    print_array("deps", deps)
+    print_array("opts", opts)
+    print("},{");
+    return deps
+
+def print_pre():
+    print("/* generated by scripts/modinfo-generate.py */")
+    print("#include \"qemu/osdep.h\"")
+    print("#include \"qemu/module.h\"")
+    print("const QemuModinfo qemu_modinfo[] = {{")
+
+def print_post():
+    print("    /* end of list */")
+    print("}};")
+
+def main(args):
+    deps = {}
+    print_pre()
+    for modinfo in args:
+        with open(modinfo) as f:
+            lines = f.readlines()
+        print("    /* %s */" % modinfo)
+        (basename, ext) = os.path.splitext(modinfo)
+        deps[basename] = generate(basename, lines)
+    print_post()
+
+    flattened_deps = {flat.strip('" ') for dep in deps.values() for flat in dep}
+    error = False
+    for dep in flattened_deps:
+        if dep not in deps.keys():
+            print("Dependency {} cannot be satisfied".format(dep),
+                  file=sys.stderr)
+            error = True
+
+    if error:
+        exit(1)
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/scripts/mtest2make.py b/scripts/mtest2make.py
index ee072c05025..02c0453e67e 100644
--- a/scripts/mtest2make.py
+++ b/scripts/mtest2make.py
@@ -60,11 +60,8 @@ def process_tests(test, targets, suites):
     if test['workdir'] is not None:
         print('.test.dir.%d := %s' % (i, shlex.quote(test['workdir'])))
 
-    if 'depends' in test:
-        deps = (targets.get(x, []) for x in test['depends'])
-        deps = itertools.chain.from_iterable(deps)
-    else:
-        deps = ['all']
+    deps = (targets.get(x, []) for x in test['depends'])
+    deps = itertools.chain.from_iterable(deps)
 
     print('.test.name.%d := %s' % (i, test['name']))
     print('.test.driver.%d := %s' % (i, driver))
diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh
index c1af43fded1..98b56e05210 100755
--- a/scripts/oss-fuzz/build.sh
+++ b/scripts/oss-fuzz/build.sh
@@ -73,17 +73,19 @@ if ! make "-j$(nproc)" qemu-fuzz-i386; then
           "\nFor example: CC=clang CXX=clang++ $0"
 fi
 
-for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do
-    cp "$i" "$DEST_DIR/lib/"
-done
-rm qemu-fuzz-i386
-
-# Build a second time to build the final binary with correct rpath
-../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
-    --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
-    --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \
-    --target-list="i386-softmmu"
-make "-j$(nproc)" qemu-fuzz-i386 V=1
+if [ "$GITLAB_CI" != "true" ]; then
+    for i in $(ldd ./qemu-fuzz-i386 | cut -f3 -d' '); do
+        cp "$i" "$DEST_DIR/lib/"
+    done
+    rm qemu-fuzz-i386
+
+    # Build a second time to build the final binary with correct rpath
+    ../configure --disable-werror --cc="$CC" --cxx="$CXX" --enable-fuzzing \
+        --prefix="$DEST_DIR" --bindir="$DEST_DIR" --datadir="$DEST_DIR/data/" \
+        --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="-Wl,-rpath,\$ORIGIN/lib" \
+        --target-list="i386-softmmu"
+    make "-j$(nproc)" qemu-fuzz-i386 V=1
+fi
 
 # Copy over the datadir
 cp  -r ../pc-bios/ "$DEST_DIR/pc-bios"
diff --git a/scripts/oss-fuzz/instrumentation-filter-template b/scripts/oss-fuzz/instrumentation-filter-template
new file mode 100644
index 00000000000..76d2b6139a7
--- /dev/null
+++ b/scripts/oss-fuzz/instrumentation-filter-template
@@ -0,0 +1,15 @@
+# Code that we actually want the fuzzer to target
+# See: https://clang.llvm.org/docs/SanitizerCoverage.html#disabling-instrumentation-without-source-modification
+#
+src:*/hw/*
+src:*/include/hw/*
+src:*/slirp/*
+src:*/net/*
+
+# We don't care about coverage over fuzzer-specific code, however we should
+# instrument the fuzzer entry-point so libFuzzer always sees at least some
+# coverage - otherwise it will exit after the first input
+src:*/tests/qtest/fuzz/fuzz.c
+
+# Enable instrumentation for all functions in those files
+fun:*
diff --git a/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py
index 890e1def856..b154a25508f 100755
--- a/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py
+++ b/scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py
@@ -14,7 +14,7 @@
         /path/to/crash 2> qtest_log_output
 scripts/oss-fuzz/reorder_fuzzer_qtest_trace.py qtest_log_output > qtest_trace
 ./i386-softmmu/qemu-fuzz-i386 -machine q35,accel=qtest \
-        -qtest stdin < qtest_trace
+        -qtest stdio < qtest_trace
 
 ### Details ###
 
diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 0e13d510547..21001bbd6b8 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -17,7 +17,6 @@
     Dict,
     List,
     Optional,
-    Sequence,
     Set,
 )
 
@@ -27,10 +26,12 @@
     QAPISchemaModularCVisitor,
     build_params,
     ifcontext,
+    gen_special_features,
 )
 from .schema import (
     QAPISchema,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaType,
 )
@@ -217,9 +218,6 @@ def gen_register_command(name: str,
                          coroutine: bool) -> str:
     options = []
 
-    if 'deprecated' in [f.name for f in features]:
-        options += ['QCO_DEPRECATED']
-
     if not success_response:
         options += ['QCO_NO_SUCCESS_RESP']
     if allow_oob:
@@ -229,15 +227,13 @@ def gen_register_command(name: str,
     if coroutine:
         options += ['QCO_COROUTINE']
 
-    if not options:
-        options = ['QCO_NO_OPTIONS']
-
     ret = mcgen('''
     qmp_register_command(cmds, "%(name)s",
-                         qmp_marshal_%(c_name)s, %(opts)s);
+                         qmp_marshal_%(c_name)s, %(opts)s, %(feats)s);
 ''',
                 name=name, c_name=c_name(name),
-                opts=" | ".join(options))
+                opts=' | '.join(options) or 0,
+                feats=gen_special_features(features))
     return ret
 
 
@@ -301,7 +297,7 @@ def visit_end(self) -> None:
     def visit_command(self,
                       name: str,
                       info: Optional[QAPISourceInfo],
-                      ifcond: Sequence[str],
+                      ifcond: QAPISchemaIfCond,
                       features: List[QAPISchemaFeature],
                       arg_type: Optional[QAPISchemaObjectType],
                       ret_type: Optional[QAPISchemaType],
diff --git a/scripts/qapi/common.py b/scripts/qapi/common.py
index cbd3fd81d36..489273574ae 100644
--- a/scripts/qapi/common.py
+++ b/scripts/qapi/common.py
@@ -12,7 +12,14 @@
 # See the COPYING file in the top-level directory.
 
 import re
-from typing import Optional, Sequence
+from typing import (
+    Any,
+    Dict,
+    Match,
+    Optional,
+    Sequence,
+    Union,
+)
 
 
 #: Magic string that gets removed along with all space to its right.
@@ -125,9 +132,6 @@ class Indentation:
     def __init__(self, initial: int = 0) -> None:
         self._level = initial
 
-    def __int__(self) -> int:
-        return self._level
-
     def __repr__(self) -> str:
         return "{}({:d})".format(type(self).__name__, self._level)
 
@@ -135,19 +139,13 @@ def __str__(self) -> str:
         """Return the current indentation as a string of spaces."""
         return ' ' * self._level
 
-    def __bool__(self) -> bool:
-        """True when there is a non-zero indentation."""
-        return bool(self._level)
-
     def increase(self, amount: int = 4) -> None:
         """Increase the indentation level by ``amount``, default 4."""
         self._level += amount
 
     def decrease(self, amount: int = 4) -> None:
         """Decrease the indentation level by ``amount``, default 4."""
-        if self._level < amount:
-            raise ArithmeticError(
-                f"Can't remove {amount:d} spaces from {self!r}")
+        assert amount <= self._level
         self._level -= amount
 
 
@@ -162,8 +160,9 @@ def cgen(code: str, **kwds: object) -> str:
     Obey `indent`, and strip `EATSPACE`.
     """
     raw = code % kwds
-    if indent:
-        raw = re.sub(r'^(?!(#|$))', str(indent), raw, flags=re.MULTILINE)
+    pfx = str(indent)
+    if pfx:
+        raw = re.sub(r'^(?!(#|$))', pfx, raw, flags=re.MULTILINE)
     return re.sub(re.escape(EATSPACE) + r' *', '', raw)
 
 
@@ -194,19 +193,59 @@ def guardend(name: str) -> str:
                  name=c_fname(name).upper())
 
 
-def gen_if(ifcond: Sequence[str]) -> str:
-    ret = ''
-    for ifc in ifcond:
-        ret += mcgen('''
+def gen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]],
+               cond_fmt: str, not_fmt: str,
+               all_operator: str, any_operator: str) -> str:
+
+    def do_gen(ifcond: Union[str, Dict[str, Any]],
+               need_parens: bool) -> str:
+        if isinstance(ifcond, str):
+            return cond_fmt % ifcond
+        assert isinstance(ifcond, dict) and len(ifcond) == 1
+        if 'not' in ifcond:
+            return not_fmt % do_gen(ifcond['not'], True)
+        if 'all' in ifcond:
+            gen = gen_infix(all_operator, ifcond['all'])
+        else:
+            gen = gen_infix(any_operator, ifcond['any'])
+        if need_parens:
+            gen = '(' + gen + ')'
+        return gen
+
+    def gen_infix(operator: str, operands: Sequence[Any]) -> str:
+        return operator.join([do_gen(o, True) for o in operands])
+
+    if not ifcond:
+        return ''
+    return do_gen(ifcond, False)
+
+
+def cgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
+    return gen_ifcond(ifcond, 'defined(%s)', '!%s', ' && ', ' || ')
+
+
+def docgen_ifcond(ifcond: Optional[Union[str, Dict[str, Any]]]) -> str:
+    # TODO Doc generated for conditions needs polish
+    return gen_ifcond(ifcond, '%s', 'not %s', ' and ', ' or ')
+
+
+def gen_if(cond: str) -> str:
+    if not cond:
+        return ''
+    return mcgen('''
 #if %(cond)s
-''', cond=ifc)
-    return ret
+''', cond=cond)
 
 
-def gen_endif(ifcond: Sequence[str]) -> str:
-    ret = ''
-    for ifc in reversed(ifcond):
-        ret += mcgen('''
+def gen_endif(cond: str) -> str:
+    if not cond:
+        return ''
+    return mcgen('''
 #endif /* %(cond)s */
-''', cond=ifc)
-    return ret
+''', cond=cond)
+
+
+def must_match(pattern: str, string: str) -> Match[str]:
+    match = re.match(pattern, string)
+    assert match is not None
+    return match
diff --git a/scripts/qapi/error.py b/scripts/qapi/error.py
index ae60d9e2fe8..e35e4ddb26a 100644
--- a/scripts/qapi/error.py
+++ b/scripts/qapi/error.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 #
-# QAPI error classes
-#
 # Copyright (c) 2017-2019 Red Hat Inc.
 #
 # Authors:
@@ -11,15 +9,36 @@
 # This work is licensed under the terms of the GNU GPL, version 2.
 # See the COPYING file in the top-level directory.
 
+"""
+QAPI error classes
+
+Common error classes used throughout the package.  Additional errors may
+be defined in other modules.  At present, `QAPIParseError` is defined in
+parser.py.
+"""
+
+from typing import Optional
+
+from .source import QAPISourceInfo
+
 
 class QAPIError(Exception):
-    def __init__(self, info, col, msg):
-        Exception.__init__(self)
+    """Base class for all exceptions from the QAPI package."""
+
+
+class QAPISourceError(QAPIError):
+    """Error class for all exceptions identifying a source location."""
+    def __init__(self,
+                 info: Optional[QAPISourceInfo],
+                 msg: str,
+                 col: Optional[int] = None):
+        super().__init__()
         self.info = info
-        self.col = col
         self.msg = msg
+        self.col = col
 
-    def __str__(self):
+    def __str__(self) -> str:
+        assert self.info is not None
         loc = str(self.info)
         if self.col is not None:
             assert self.info.line is not None
@@ -27,17 +46,5 @@ def __str__(self):
         return loc + ': ' + self.msg
 
 
-class QAPIParseError(QAPIError):
-    def __init__(self, parser, msg):
-        col = 1
-        for ch in parser.src[parser.line_pos:parser.pos]:
-            if ch == '\t':
-                col = (col + 7) % 8 + 1
-            else:
-                col += 1
-        super().__init__(parser.info, col, msg)
-
-
-class QAPISemError(QAPIError):
-    def __init__(self, info, msg):
-        super().__init__(info, None, msg)
+class QAPISemError(QAPISourceError):
+    """Error class for semantic QAPI errors."""
diff --git a/scripts/qapi/events.py b/scripts/qapi/events.py
index fee8c671e71..27b44c49f5e 100644
--- a/scripts/qapi/events.py
+++ b/scripts/qapi/events.py
@@ -12,7 +12,7 @@
 See the COPYING file in the top-level directory.
 """
 
-from typing import List, Optional, Sequence
+from typing import List, Optional
 
 from .common import c_enum_const, c_name, mcgen
 from .gen import QAPISchemaModularCVisitor, build_params, ifcontext
@@ -20,6 +20,7 @@
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
 )
 from .source import QAPISourceInfo
@@ -108,13 +109,15 @@ def gen_event_send(name: str,
         if not boxed:
             ret += gen_param_var(arg_type)
 
-    if 'deprecated' in [f.name for f in features]:
-        ret += mcgen('''
+    for f in features:
+        if f.is_special():
+            ret += mcgen('''
 
-    if (compat_policy.deprecated_output == COMPAT_POLICY_OUTPUT_HIDE) {
+    if (compat_policy.%(feat)s_output == COMPAT_POLICY_OUTPUT_HIDE) {
         return;
     }
-''')
+''',
+                         feat=f.name)
 
     ret += mcgen('''
 
@@ -227,7 +230,7 @@ def visit_end(self) -> None:
     def visit_event(self,
                     name: str,
                     info: Optional[QAPISourceInfo],
-                    ifcond: Sequence[str],
+                    ifcond: QAPISchemaIfCond,
                     features: List[QAPISchemaFeature],
                     arg_type: Optional[QAPISchemaObjectType],
                     boxed: bool) -> None:
diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py
index 540b3982b10..3cb389e875c 100644
--- a/scripts/qapi/expr.py
+++ b/scripts/qapi/expr.py
@@ -1,40 +1,97 @@
 # -*- coding: utf-8 -*-
 #
-# Check (context-free) QAPI schema expression structure
-#
 # Copyright IBM, Corp. 2011
-# Copyright (c) 2013-2019 Red Hat Inc.
+# Copyright (c) 2013-2021 Red Hat Inc.
 #
 # Authors:
 #  Anthony Liguori <aliguori@us.ibm.com>
 #  Markus Armbruster <armbru@redhat.com>
 #  Eric Blake <eblake@redhat.com>
 #  Marc-André Lureau <marcandre.lureau@redhat.com>
+#  John Snow <jsnow@redhat.com>
 #
 # This work is licensed under the terms of the GNU GPL, version 2.
 # See the COPYING file in the top-level directory.
 
-from collections import OrderedDict
+"""
+Normalize and validate (context-free) QAPI schema expression structures.
+
+`QAPISchemaParser` parses a QAPI schema into abstract syntax trees
+consisting of dict, list, str, bool, and int nodes.  This module ensures
+that these nested structures have the correct type(s) and key(s) where
+appropriate for the QAPI context-free grammar.
+
+The QAPI schema expression language allows for certain syntactic sugar;
+this module also handles the normalization process of these nested
+structures.
+
+See `check_exprs` for the main entry point.
+
+See `schema.QAPISchema` for processing into native Python data
+structures and contextual semantic validation.
+"""
+
 import re
+from typing import (
+    Collection,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Union,
+    cast,
+)
 
 from .common import c_name
 from .error import QAPISemError
+from .parser import QAPIDoc
+from .source import QAPISourceInfo
 
 
-# Names consist of letters, digits, -, and _, starting with a letter.
-# An experimental name is prefixed with x-.  A name of a downstream
-# extension is prefixed with __RFQDN_.  The latter prefix goes first.
+# Deserialized JSON objects as returned by the parser.
+# The values of this mapping are not necessary to exhaustively type
+# here (and also not practical as long as mypy lacks recursive
+# types), because the purpose of this module is to interrogate that
+# type.
+_JSONObject = Dict[str, object]
+
+
+# See check_name_str(), below.
 valid_name = re.compile(r'(__[a-z0-9.-]+_)?'
                         r'(x-)?'
                         r'([a-z][a-z0-9_-]*)$', re.IGNORECASE)
 
 
-def check_name_is_str(name, info, source):
+def check_name_is_str(name: object,
+                      info: QAPISourceInfo,
+                      source: str) -> None:
+    """
+    Ensure that ``name`` is a ``str``.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     if not isinstance(name, str):
         raise QAPISemError(info, "%s requires a string name" % source)
 
 
-def check_name_str(name, info, source):
+def check_name_str(name: str, info: QAPISourceInfo, source: str) -> str:
+    """
+    Ensure that ``name`` is a valid QAPI name.
+
+    A valid name consists of ASCII letters, digits, ``-``, and ``_``,
+    starting with a letter.  It may be prefixed by a downstream prefix
+    of the form __RFQDN_, or the experimental prefix ``x-``.  If both
+    prefixes are present, the __RFDQN_ prefix goes first.
+
+    A valid name cannot start with ``q_``, which is reserved.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    :return: The stem of the valid name, with no prefixes.
+    """
     # Reserve the entire 'q_' namespace for c_name(), and for 'q_empty'
     # and 'q_obj_*' implicit type names.
     match = valid_name.match(name)
@@ -43,16 +100,44 @@ def check_name_str(name, info, source):
     return match.group(3)
 
 
-def check_name_upper(name, info, source):
+def check_name_upper(name: str, info: QAPISourceInfo, source: str) -> None:
+    """
+    Ensure that ``name`` is a valid event name.
+
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem prohibits lowercase
+    characters and ``-``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     stem = check_name_str(name, info, source)
     if re.search(r'[a-z-]', stem):
         raise QAPISemError(
             info, "name of %s must not use lowercase or '-'" % source)
 
 
-def check_name_lower(name, info, source,
-                     permit_upper=False,
-                     permit_underscore=False):
+def check_name_lower(name: str, info: QAPISourceInfo, source: str,
+                     permit_upper: bool = False,
+                     permit_underscore: bool = False) -> None:
+    """
+    Ensure that ``name`` is a valid command or member name.
+
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem prohibits uppercase
+    characters and ``_``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+    :param permit_upper: Additionally permit uppercase.
+    :param permit_underscore: Additionally permit ``_``.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     stem = check_name_str(name, info, source)
     if ((not permit_upper and re.search(r'[A-Z]', stem))
             or (not permit_underscore and '_' in stem)):
@@ -60,13 +145,40 @@ def check_name_lower(name, info, source,
             info, "name of %s must not use uppercase or '_'" % source)
 
 
-def check_name_camel(name, info, source):
+def check_name_camel(name: str, info: QAPISourceInfo, source: str) -> None:
+    """
+    Ensure that ``name`` is a valid user-defined type name.
+
+    This means it must be a valid QAPI name as checked by
+    `check_name_str()`, but where the stem must be in CamelCase.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing what ``name`` belongs to.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     stem = check_name_str(name, info, source)
     if not re.match(r'[A-Z][A-Za-z0-9]*[a-z][A-Za-z0-9]*$', stem):
         raise QAPISemError(info, "name of %s must use CamelCase" % source)
 
 
-def check_defn_name_str(name, info, meta):
+def check_defn_name_str(name: str, info: QAPISourceInfo, meta: str) -> None:
+    """
+    Ensure that ``name`` is a valid definition name.
+
+    Based on the value of ``meta``, this means that:
+      - 'event' names adhere to `check_name_upper()`.
+      - 'command' names adhere to `check_name_lower()`.
+      - Else, meta is a type, and must pass `check_name_camel()`.
+        These names must not end with ``List``.
+
+    :param name: Name to check.
+    :param info: QAPI schema source file information.
+    :param meta: Meta-type name of the QAPI expression.
+
+    :raise QAPISemError: When ``name`` fails validation.
+    """
     if meta == 'event':
         check_name_upper(name, info, meta)
     elif meta == 'command':
@@ -75,14 +187,29 @@ def check_defn_name_str(name, info, meta):
             permit_underscore=name in info.pragma.command_name_exceptions)
     else:
         check_name_camel(name, info, meta)
-    if name.endswith('Kind') or name.endswith('List'):
-        raise QAPISemError(
-            info, "%s name should not end in '%s'" % (meta, name[-4:]))
+        if name.endswith('List'):
+            raise QAPISemError(
+                info, "%s name should not end in 'List'" % meta)
+
 
+def check_keys(value: _JSONObject,
+               info: QAPISourceInfo,
+               source: str,
+               required: Collection[str],
+               optional: Collection[str]) -> None:
+    """
+    Ensure that a dict has a specific set of keys.
 
-def check_keys(value, info, source, required, optional):
+    :param value: The dict to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing this ``value``.
+    :param required: Keys that *must* be present.
+    :param optional: Keys that *may* be present.
 
-    def pprint(elems):
+    :raise QAPISemError: When unknown keys are present.
+    """
+
+    def pprint(elems: Iterable[str]) -> str:
         return ', '.join("'" + e + "'" for e in sorted(elems))
 
     missing = set(required) - set(value)
@@ -92,7 +219,7 @@ def pprint(elems):
             "%s misses key%s %s"
             % (source, 's' if len(missing) > 1 else '',
                pprint(missing)))
-    allowed = set(required + optional)
+    allowed = set(required) | set(optional)
     unknown = set(value) - allowed
     if unknown:
         raise QAPISemError(
@@ -102,12 +229,22 @@ def pprint(elems):
                pprint(unknown), pprint(allowed)))
 
 
-def check_flags(expr, info):
-    for key in ['gen', 'success-response']:
+def check_flags(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Ensure flag members (if present) have valid values.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError:
+        When certain flags have an invalid value, or when
+        incompatible flags are present.
+    """
+    for key in ('gen', 'success-response'):
         if key in expr and expr[key] is not False:
             raise QAPISemError(
                 info, "flag '%s' may only use false value" % key)
-    for key in ['boxed', 'allow-oob', 'allow-preconfig', 'coroutine']:
+    for key in ('boxed', 'allow-oob', 'allow-preconfig', 'coroutine'):
         if key in expr and expr[key] is not True:
             raise QAPISemError(
                 info, "flag '%s' may only use true value" % key)
@@ -120,47 +257,123 @@ def check_flags(expr, info):
                                  "are incompatible")
 
 
-def check_if(expr, info, source):
+def check_if(expr: _JSONObject, info: QAPISourceInfo, source: str) -> None:
+    """
+    Validate the ``if`` member of an object.
+
+    The ``if`` member may be either a ``str`` or a dict.
+
+    :param expr: The expression containing the ``if`` member to validate.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing ``expr``.
 
-    def check_if_str(ifcond, info):
-        if not isinstance(ifcond, str):
+    :raise QAPISemError:
+        When the "if" member fails validation, or when there are no
+        non-empty conditions.
+    :return: None
+    """
+
+    def _check_if(cond: Union[str, object]) -> None:
+        if isinstance(cond, str):
+            if not re.fullmatch(r'[A-Z][A-Z0-9_]*', cond):
+                raise QAPISemError(
+                    info,
+                    "'if' condition '%s' of %s is not a valid identifier"
+                    % (cond, source))
+            return
+
+        if not isinstance(cond, dict):
+            raise QAPISemError(
+                info,
+                "'if' condition of %s must be a string or an object" % source)
+        check_keys(cond, info, "'if' condition of %s" % source, [],
+                   ["all", "any", "not"])
+        if len(cond) != 1:
             raise QAPISemError(
                 info,
-                "'if' condition of %s must be a string or a list of strings"
-                % source)
-        if ifcond.strip() == '':
+                "'if' condition of %s has conflicting keys" % source)
+
+        if 'not' in cond:
+            _check_if(cond['not'])
+        elif 'all' in cond:
+            _check_infix('all', cond['all'])
+        else:
+            _check_infix('any', cond['any'])
+
+    def _check_infix(operator: str, operands: object) -> None:
+        if not isinstance(operands, list):
             raise QAPISemError(
                 info,
-                "'if' condition '%s' of %s makes no sense"
-                % (ifcond, source))
+                "'%s' condition of %s must be an array"
+                % (operator, source))
+        if not operands:
+            raise QAPISemError(
+                info, "'if' condition [] of %s is useless" % source)
+        for operand in operands:
+            _check_if(operand)
 
     ifcond = expr.get('if')
     if ifcond is None:
         return
-    if isinstance(ifcond, list):
-        if ifcond == []:
-            raise QAPISemError(
-                info, "'if' condition [] of %s is useless" % source)
-        for elt in ifcond:
-            check_if_str(elt, info)
-    else:
-        check_if_str(ifcond, info)
-        expr['if'] = [ifcond]
 
+    _check_if(ifcond)
+
+
+def normalize_members(members: object) -> None:
+    """
+    Normalize a "members" value.
+
+    If ``members`` is a dict, for every value in that dict, if that
+    value is not itself already a dict, normalize it to
+    ``{'type': value}``.
 
-def normalize_members(members):
-    if isinstance(members, OrderedDict):
+    :forms:
+      :sugared: ``Dict[str, Union[str, TypeRef]]``
+      :canonical: ``Dict[str, TypeRef]``
+
+    :param members: The members value to normalize.
+
+    :return: None, ``members`` is normalized in-place as needed.
+    """
+    if isinstance(members, dict):
         for key, arg in members.items():
             if isinstance(arg, dict):
                 continue
             members[key] = {'type': arg}
 
 
-def check_type(value, info, source,
-               allow_array=False, allow_dict=False):
+def check_type(value: Optional[object],
+               info: QAPISourceInfo,
+               source: str,
+               allow_array: bool = False,
+               allow_dict: Union[bool, str] = False) -> None:
+    """
+    Normalize and validate the QAPI type of ``value``.
+
+    Python types of ``str`` or ``None`` are always allowed.
+
+    :param value: The value to check.
+    :param info: QAPI schema source file information.
+    :param source: Error string describing this ``value``.
+    :param allow_array:
+        Allow a ``List[str]`` of length 1, which indicates an array of
+        the type named by the list element.
+    :param allow_dict:
+        Allow a dict.  Its members can be struct type members or union
+        branches.  When the value of ``allow_dict`` is in pragma
+        ``member-name-exceptions``, the dict's keys may violate the
+        member naming rules.  The dict members are normalized in place.
+
+    :raise QAPISemError: When ``value`` fails validation.
+    :return: None, ``value`` is normalized in-place as needed.
+    """
     if value is None:
         return
 
+    # Type name
+    if isinstance(value, str):
+        return
+
     # Array type
     if isinstance(value, list):
         if not allow_array:
@@ -171,20 +384,18 @@ def check_type(value, info, source,
                                source)
         return
 
-    # Type name
-    if isinstance(value, str):
-        return
-
     # Anonymous type
 
     if not allow_dict:
         raise QAPISemError(info, "%s should be a type name" % source)
 
-    if not isinstance(value, OrderedDict):
+    if not isinstance(value, dict):
         raise QAPISemError(info,
                            "%s should be an object or type name" % source)
 
-    permissive = allow_dict in info.pragma.member_name_exceptions
+    permissive = False
+    if isinstance(allow_dict, str):
+        permissive = allow_dict in info.pragma.member_name_exceptions
 
     # value is a dictionary, check that each member is okay
     for (key, arg) in value.items():
@@ -202,24 +413,50 @@ def check_type(value, info, source,
         check_type(arg['type'], info, key_source, allow_array=True)
 
 
-def check_features(features, info):
+def check_features(features: Optional[object],
+                   info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate the ``features`` member.
+
+    ``features`` may be a ``list`` of either ``str`` or ``dict``.
+    Any ``str`` element will be normalized to ``{'name': element}``.
+
+    :forms:
+      :sugared: ``List[Union[str, Feature]]``
+      :canonical: ``List[Feature]``
+
+    :param features: The features member value to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: When ``features`` fails validation.
+    :return: None, ``features`` is normalized in-place as needed.
+    """
     if features is None:
         return
     if not isinstance(features, list):
         raise QAPISemError(info, "'features' must be an array")
     features[:] = [f if isinstance(f, dict) else {'name': f}
                    for f in features]
-    for f in features:
+    for feat in features:
         source = "'features' member"
-        assert isinstance(f, dict)
-        check_keys(f, info, source, ['name'], ['if'])
-        check_name_is_str(f['name'], info, source)
-        source = "%s '%s'" % (source, f['name'])
-        check_name_lower(f['name'], info, source)
-        check_if(f, info, source)
+        assert isinstance(feat, dict)
+        check_keys(feat, info, source, ['name'], ['if'])
+        check_name_is_str(feat['name'], info, source)
+        source = "%s '%s'" % (source, feat['name'])
+        check_name_str(feat['name'], info, source)
+        check_if(feat, info, source)
+
+
+def check_enum(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as an ``enum`` definition.
 
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
 
-def check_enum(expr, info):
+    :raise QAPISemError: When ``expr`` is not a valid ``enum``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     name = expr['enum']
     members = expr['data']
     prefix = expr.get('prefix')
@@ -235,57 +472,83 @@ def check_enum(expr, info):
                   for m in members]
     for member in members:
         source = "'data' member"
+        check_keys(member, info, source, ['name'], ['if', 'features'])
         member_name = member['name']
-        check_keys(member, info, source, ['name'], ['if'])
         check_name_is_str(member_name, info, source)
         source = "%s '%s'" % (source, member_name)
         # Enum members may start with a digit
         if member_name[0].isdigit():
-            member_name = 'd' + member_name # Hack: hide the digit
+            member_name = 'd' + member_name  # Hack: hide the digit
         check_name_lower(member_name, info, source,
                          permit_upper=permissive,
                          permit_underscore=permissive)
         check_if(member, info, source)
+        check_features(member.get('features'), info)
 
 
-def check_struct(expr, info):
-    name = expr['struct']
+def check_struct(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as a ``struct`` definition.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``struct``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
+    name = cast(str, expr['struct'])  # Checked in check_exprs
     members = expr['data']
 
     check_type(members, info, "'data'", allow_dict=name)
     check_type(expr.get('base'), info, "'base'")
 
 
-def check_union(expr, info):
-    name = expr['union']
-    base = expr.get('base')
-    discriminator = expr.get('discriminator')
+def check_union(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as a ``union`` definition.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: when ``expr`` is not a valid ``union``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
+    name = cast(str, expr['union'])  # Checked in check_exprs
+    base = expr['base']
+    discriminator = expr['discriminator']
     members = expr['data']
 
-    if discriminator is None:   # simple union
-        if base is not None:
-            raise QAPISemError(info, "'base' requires 'discriminator'")
-    else:                       # flat union
-        check_type(base, info, "'base'", allow_dict=name)
-        if not base:
-            raise QAPISemError(info, "'discriminator' requires 'base'")
-        check_name_is_str(discriminator, info, "'discriminator'")
+    check_type(base, info, "'base'", allow_dict=name)
+    check_name_is_str(discriminator, info, "'discriminator'")
+
+    if not isinstance(members, dict):
+        raise QAPISemError(info, "'data' must be an object")
 
     for (key, value) in members.items():
         source = "'data' member '%s'" % key
-        if discriminator is None:
-            check_name_lower(key, info, source)
-        # else: name is in discriminator enum, which gets checked
         check_keys(value, info, source, ['type'], ['if'])
         check_if(value, info, source)
         check_type(value['type'], info, source, allow_array=not base)
 
 
-def check_alternate(expr, info):
+def check_alternate(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as an ``alternate`` definition.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``alternate``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     members = expr['data']
 
     if not members:
         raise QAPISemError(info, "'data' must not be empty")
+
+    if not isinstance(members, dict):
+        raise QAPISemError(info, "'data' must be an object")
+
     for (key, value) in members.items():
         source = "'data' member '%s'" % key
         check_name_lower(key, info, source)
@@ -294,7 +557,16 @@ def check_alternate(expr, info):
         check_type(value['type'], info, source)
 
 
-def check_command(expr, info):
+def check_command(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as a ``command`` definition.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``command``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     args = expr.get('data')
     rets = expr.get('returns')
     boxed = expr.get('boxed', False)
@@ -305,7 +577,16 @@ def check_command(expr, info):
     check_type(rets, info, "'returns'", allow_array=True)
 
 
-def check_event(expr, info):
+def check_event(expr: _JSONObject, info: QAPISourceInfo) -> None:
+    """
+    Normalize and validate this expression as an ``event`` definition.
+
+    :param expr: The expression to validate.
+    :param info: QAPI schema source file information.
+
+    :raise QAPISemError: When ``expr`` is not a valid ``event``.
+    :return: None, ``expr`` is normalized in-place as needed.
+    """
     args = expr.get('data')
     boxed = expr.get('boxed', False)
 
@@ -314,32 +595,49 @@ def check_event(expr, info):
     check_type(args, info, "'data'", allow_dict=not boxed)
 
 
-def check_exprs(exprs):
+def check_exprs(exprs: List[_JSONObject]) -> List[_JSONObject]:
+    """
+    Validate and normalize a list of parsed QAPI schema expressions.
+
+    This function accepts a list of expressions and metadata as returned
+    by the parser.  It destructively normalizes the expressions in-place.
+
+    :param exprs: The list of expressions to normalize and validate.
+
+    :raise QAPISemError: When any expression fails validation.
+    :return: The same list of expressions (now modified).
+    """
     for expr_elem in exprs:
-        expr = expr_elem['expr']
-        info = expr_elem['info']
-        doc = expr_elem.get('doc')
+        # Expression
+        assert isinstance(expr_elem['expr'], dict)
+        for key in expr_elem['expr'].keys():
+            assert isinstance(key, str)
+        expr: _JSONObject = expr_elem['expr']
+
+        # QAPISourceInfo
+        assert isinstance(expr_elem['info'], QAPISourceInfo)
+        info: QAPISourceInfo = expr_elem['info']
+
+        # Optional[QAPIDoc]
+        tmp = expr_elem.get('doc')
+        assert tmp is None or isinstance(tmp, QAPIDoc)
+        doc: Optional[QAPIDoc] = tmp
 
         if 'include' in expr:
             continue
 
-        if 'enum' in expr:
-            meta = 'enum'
-        elif 'union' in expr:
-            meta = 'union'
-        elif 'alternate' in expr:
-            meta = 'alternate'
-        elif 'struct' in expr:
-            meta = 'struct'
-        elif 'command' in expr:
-            meta = 'command'
-        elif 'event' in expr:
-            meta = 'event'
-        else:
-            raise QAPISemError(info, "expression is missing metatype")
+        metas = expr.keys() & {'enum', 'struct', 'union', 'alternate',
+                               'command', 'event'}
+        if len(metas) != 1:
+            raise QAPISemError(
+                info,
+                "expression must have exactly one key"
+                " 'enum', 'struct', 'union', 'alternate',"
+                " 'command', 'event'")
+        meta = metas.pop()
 
-        name = expr[meta]
-        check_name_is_str(name, info, "'%s'" % meta)
+        check_name_is_str(expr[meta], info, "'%s'" % meta)
+        name = cast(str, expr[meta])
         info.set_defn(meta, name)
         check_defn_name_str(name, info, meta)
 
@@ -358,8 +656,8 @@ def check_exprs(exprs):
             check_enum(expr, info)
         elif meta == 'union':
             check_keys(expr, info, meta,
-                       ['union', 'data'],
-                       ['base', 'discriminator', 'if', 'features'])
+                       ['union', 'base', 'discriminator', 'data'],
+                       ['if', 'features'])
             normalize_members(expr.get('base'))
             normalize_members(expr['data'])
             check_union(expr, info)
diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
index 1fa503bdbdf..995a97d2b82 100644
--- a/scripts/qapi/gen.py
+++ b/scripts/qapi/gen.py
@@ -25,13 +25,13 @@
 from .common import (
     c_fname,
     c_name,
-    gen_endif,
-    gen_if,
     guardend,
     guardstart,
     mcgen,
 )
 from .schema import (
+    QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaModule,
     QAPISchemaObjectType,
     QAPISchemaVisitor,
@@ -39,6 +39,12 @@
 from .source import QAPISourceInfo
 
 
+def gen_special_features(features: Sequence[QAPISchemaFeature]) -> str:
+    special_features = [f"1u << QAPI_{feat.name.upper()}"
+                        for feat in features if feat.is_special()]
+    return ' | '.join(special_features) or '0'
+
+
 class QAPIGen:
     def __init__(self, fname: str):
         self.fname = fname
@@ -85,7 +91,7 @@ def write(self, output_dir: str) -> None:
                 fp.write(text)
 
 
-def _wrap_ifcond(ifcond: Sequence[str], before: str, after: str) -> str:
+def _wrap_ifcond(ifcond: QAPISchemaIfCond, before: str, after: str) -> str:
     if before == after:
         return after   # suppress empty #if ... #endif
 
@@ -95,9 +101,9 @@ def _wrap_ifcond(ifcond: Sequence[str], before: str, after: str) -> str:
     if added[0] == '\n':
         out += '\n'
         added = added[1:]
-    out += gen_if(ifcond)
+    out += ifcond.gen_if()
     out += added
-    out += gen_endif(ifcond)
+    out += ifcond.gen_endif()
     return out
 
 
@@ -127,9 +133,9 @@ def build_params(arg_type: Optional[QAPISchemaObjectType],
 class QAPIGenCCode(QAPIGen):
     def __init__(self, fname: str):
         super().__init__(fname)
-        self._start_if: Optional[Tuple[Sequence[str], str, str]] = None
+        self._start_if: Optional[Tuple[QAPISchemaIfCond, str, str]] = None
 
-    def start_if(self, ifcond: Sequence[str]) -> None:
+    def start_if(self, ifcond: QAPISchemaIfCond) -> None:
         assert self._start_if is None
         self._start_if = (ifcond, self._body, self._preamble)
 
@@ -187,7 +193,7 @@ def _bottom(self) -> str:
 
 
 @contextmanager
-def ifcontext(ifcond: Sequence[str], *args: QAPIGenCCode) -> Iterator[None]:
+def ifcontext(ifcond: QAPISchemaIfCond, *args: QAPIGenCCode) -> Iterator[None]:
     """
     A with-statement context manager that wraps with `start_if()` / `end_if()`.
 
@@ -298,10 +304,9 @@ def _temp_module(self, name: str) -> Iterator[None]:
         self._current_module = old_module
 
     def write(self, output_dir: str, opt_builtins: bool = False) -> None:
-        for name in self._module:
+        for name, (genc, genh) in self._module.items():
             if QAPISchemaModule.is_builtin_module(name) and not opt_builtins:
                 continue
-            (genc, genh) = self._module[name]
             genc.write(output_dir)
             genh.write(output_dir)
 
diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
index 9a348ca2e52..67c7d89aae0 100644
--- a/scripts/qapi/introspect.py
+++ b/scripts/qapi/introspect.py
@@ -15,21 +15,14 @@
     Any,
     Dict,
     Generic,
-    Iterable,
     List,
     Optional,
     Sequence,
-    Tuple,
     TypeVar,
     Union,
 )
 
-from .common import (
-    c_name,
-    gen_endif,
-    gen_if,
-    mcgen,
-)
+from .common import c_name, mcgen
 from .gen import QAPISchemaMonolithicCVisitor
 from .schema import (
     QAPISchema,
@@ -38,6 +31,7 @@
     QAPISchemaEntity,
     QAPISchemaEnumMember,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaObjectTypeMember,
     QAPISchemaType,
@@ -74,6 +68,7 @@
 # TypedDict constructs, so they are broadly typed here as simple
 # Python Dicts.
 SchemaInfo = Dict[str, object]
+SchemaInfoEnumMember = Dict[str, object]
 SchemaInfoObject = Dict[str, object]
 SchemaInfoObjectVariant = Dict[str, object]
 SchemaInfoObjectMember = Dict[str, object]
@@ -91,11 +86,11 @@ class Annotated(Generic[_ValueT]):
     """
     # TODO: Remove after Python 3.7 adds @dataclass:
     # pylint: disable=too-few-public-methods
-    def __init__(self, value: _ValueT, ifcond: Iterable[str],
+    def __init__(self, value: _ValueT, ifcond: QAPISchemaIfCond,
                  comment: Optional[str] = None):
         self.value = value
         self.comment: Optional[str] = comment
-        self.ifcond: Tuple[str, ...] = tuple(ifcond)
+        self.ifcond = ifcond
 
 
 def _tree_to_qlit(obj: JSONValue,
@@ -124,11 +119,11 @@ def indent(level: int) -> str:
         ret = ''
         if obj.comment:
             ret += indent(level) + f"/* {obj.comment} */\n"
-        if obj.ifcond:
-            ret += gen_if(obj.ifcond)
+        if obj.ifcond.is_present():
+            ret += obj.ifcond.gen_if()
         ret += _tree_to_qlit(obj.value, level)
-        if obj.ifcond:
-            ret += '\n' + gen_endif(obj.ifcond)
+        if obj.ifcond.is_present():
+            ret += '\n' + obj.ifcond.gen_endif()
         return ret
 
     ret = ''
@@ -254,7 +249,7 @@ def _gen_features(features: Sequence[QAPISchemaFeature]
         return [Annotated(f.name, f.ifcond) for f in features]
 
     def _gen_tree(self, name: str, mtype: str, obj: Dict[str, object],
-                  ifcond: Sequence[str] = (),
+                  ifcond: QAPISchemaIfCond = QAPISchemaIfCond(),
                   features: Sequence[QAPISchemaFeature] = ()) -> None:
         """
         Build and append a SchemaInfo object to self._trees.
@@ -280,8 +275,17 @@ def _gen_tree(self, name: str, mtype: str, obj: Dict[str, object],
             obj['features'] = self._gen_features(features)
         self._trees.append(Annotated(obj, ifcond, comment))
 
-    def _gen_member(self, member: QAPISchemaObjectTypeMember
-                    ) -> Annotated[SchemaInfoObjectMember]:
+    def _gen_enum_member(self, member: QAPISchemaEnumMember
+                         ) -> Annotated[SchemaInfoEnumMember]:
+        obj: SchemaInfoEnumMember = {
+            'name': member.name,
+        }
+        if member.features:
+            obj['features'] = self._gen_features(member.features)
+        return Annotated(obj, member.ifcond)
+
+    def _gen_object_member(self, member: QAPISchemaObjectTypeMember
+                           ) -> Annotated[SchemaInfoObjectMember]:
         obj: SchemaInfoObjectMember = {
             'name': member.name,
             'type': self._use_type(member.type)
@@ -305,30 +309,31 @@ def visit_builtin_type(self, name: str, info: Optional[QAPISourceInfo],
         self._gen_tree(name, 'builtin', {'json-type': json_type})
 
     def visit_enum_type(self, name: str, info: Optional[QAPISourceInfo],
-                        ifcond: Sequence[str],
+                        ifcond: QAPISchemaIfCond,
                         features: List[QAPISchemaFeature],
                         members: List[QAPISchemaEnumMember],
                         prefix: Optional[str]) -> None:
         self._gen_tree(
             name, 'enum',
-            {'values': [Annotated(m.name, m.ifcond) for m in members]},
+            {'members': [self._gen_enum_member(m) for m in members],
+             'values': [Annotated(m.name, m.ifcond) for m in members]},
             ifcond, features
         )
 
     def visit_array_type(self, name: str, info: Optional[QAPISourceInfo],
-                         ifcond: Sequence[str],
+                         ifcond: QAPISchemaIfCond,
                          element_type: QAPISchemaType) -> None:
         element = self._use_type(element_type)
         self._gen_tree('[' + element + ']', 'array', {'element-type': element},
                        ifcond)
 
     def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo],
-                               ifcond: Sequence[str],
+                               ifcond: QAPISchemaIfCond,
                                features: List[QAPISchemaFeature],
                                members: List[QAPISchemaObjectTypeMember],
                                variants: Optional[QAPISchemaVariants]) -> None:
         obj: SchemaInfoObject = {
-            'members': [self._gen_member(m) for m in members]
+            'members': [self._gen_object_member(m) for m in members]
         }
         if variants:
             obj['tag'] = variants.tag_member.name
@@ -336,7 +341,7 @@ def visit_object_type_flat(self, name: str, info: Optional[QAPISourceInfo],
         self._gen_tree(name, 'object', obj, ifcond, features)
 
     def visit_alternate_type(self, name: str, info: Optional[QAPISourceInfo],
-                             ifcond: Sequence[str],
+                             ifcond: QAPISchemaIfCond,
                              features: List[QAPISchemaFeature],
                              variants: QAPISchemaVariants) -> None:
         self._gen_tree(
@@ -348,7 +353,7 @@ def visit_alternate_type(self, name: str, info: Optional[QAPISourceInfo],
         )
 
     def visit_command(self, name: str, info: Optional[QAPISourceInfo],
-                      ifcond: Sequence[str],
+                      ifcond: QAPISchemaIfCond,
                       features: List[QAPISchemaFeature],
                       arg_type: Optional[QAPISchemaObjectType],
                       ret_type: Optional[QAPISchemaType], gen: bool,
@@ -367,7 +372,8 @@ def visit_command(self, name: str, info: Optional[QAPISourceInfo],
         self._gen_tree(name, 'command', obj, ifcond, features)
 
     def visit_event(self, name: str, info: Optional[QAPISourceInfo],
-                    ifcond: Sequence[str], features: List[QAPISchemaFeature],
+                    ifcond: QAPISchemaIfCond,
+                    features: List[QAPISchemaFeature],
                     arg_type: Optional[QAPISchemaObjectType],
                     boxed: bool) -> None:
         assert self._schema is not None
diff --git a/scripts/qapi/main.py b/scripts/qapi/main.py
index 703e7ed1ed5..f2ea6e0ce4a 100644
--- a/scripts/qapi/main.py
+++ b/scripts/qapi/main.py
@@ -8,11 +8,11 @@
 """
 
 import argparse
-import re
 import sys
 from typing import Optional
 
 from .commands import gen_commands
+from .common import must_match
 from .error import QAPIError
 from .events import gen_events
 from .introspect import gen_introspect
@@ -22,9 +22,7 @@
 
 
 def invalid_prefix_char(prefix: str) -> Optional[str]:
-    match = re.match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix)
-    # match cannot be None, but mypy cannot infer that.
-    assert match is not None
+    match = must_match(r'([A-Za-z_.-][A-Za-z0-9_.-]*)?', prefix)
     if match.end() != len(prefix):
         return prefix[match.end()]
     return None
diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini
index 0a000d58b37..66253564297 100644
--- a/scripts/qapi/mypy.ini
+++ b/scripts/qapi/mypy.ini
@@ -3,21 +3,6 @@ strict = True
 disallow_untyped_calls = False
 python_version = 3.6
 
-[mypy-qapi.error]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
-[mypy-qapi.expr]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
-[mypy-qapi.parser]
-disallow_untyped_defs = False
-disallow_incomplete_defs = False
-check_untyped_defs = False
-
 [mypy-qapi.schema]
 disallow_untyped_defs = False
 disallow_incomplete_defs = False
diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py
index 58267c3db9e..1b006cdc133 100644
--- a/scripts/qapi/parser.py
+++ b/scripts/qapi/parser.py
@@ -17,37 +17,113 @@
 from collections import OrderedDict
 import os
 import re
-
-from .error import QAPIParseError, QAPISemError
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Optional,
+    Set,
+    Union,
+)
+
+from .common import must_match
+from .error import QAPISemError, QAPISourceError
 from .source import QAPISourceInfo
 
 
+if TYPE_CHECKING:
+    # pylint: disable=cyclic-import
+    # TODO: Remove cycle. [schema -> expr -> parser -> schema]
+    from .schema import QAPISchemaFeature, QAPISchemaMember
+
+
+#: Represents a single Top Level QAPI schema expression.
+TopLevelExpr = Dict[str, object]
+
+# Return value alias for get_expr().
+_ExprValue = Union[List[object], Dict[str, object], str, bool]
+
+# FIXME: Consolidate and centralize definitions for TopLevelExpr,
+# _ExprValue, _JSONValue, and _JSONObject; currently scattered across
+# several modules.
+
+
+class QAPIParseError(QAPISourceError):
+    """Error class for all QAPI schema parsing errors."""
+    def __init__(self, parser: 'QAPISchemaParser', msg: str):
+        col = 1
+        for ch in parser.src[parser.line_pos:parser.pos]:
+            if ch == '\t':
+                col = (col + 7) % 8 + 1
+            else:
+                col += 1
+        super().__init__(parser.info, msg, col)
+
+
 class QAPISchemaParser:
+    """
+    Parse QAPI schema source.
 
-    def __init__(self, fname, previously_included=None, incl_info=None):
-        previously_included = previously_included or set()
-        previously_included.add(os.path.abspath(fname))
+    Parse a JSON-esque schema file and process directives.  See
+    qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
+    Grammatical validation is handled later by `expr.check_exprs()`.
 
-        try:
-            fp = open(fname, 'r', encoding='utf-8')
-            self.src = fp.read()
-        except IOError as e:
-            raise QAPISemError(incl_info or QAPISourceInfo(None, None, None),
-                               "can't read %s file '%s': %s"
-                               % ("include" if incl_info else "schema",
-                                  fname,
-                                  e.strerror))
+    :param fname: Source file name.
+    :param previously_included:
+        The absolute names of previously included source files,
+        if being invoked from another parser.
+    :param incl_info:
+       `QAPISourceInfo` belonging to the parent module.
+       ``None`` implies this is the root module.
 
-        if self.src == '' or self.src[-1] != '\n':
-            self.src += '\n'
+    :ivar exprs: Resulting parsed expressions.
+    :ivar docs: Resulting parsed documentation blocks.
+
+    :raise OSError: For problems reading the root schema document.
+    :raise QAPIError: For errors in the schema source.
+    """
+    def __init__(self,
+                 fname: str,
+                 previously_included: Optional[Set[str]] = None,
+                 incl_info: Optional[QAPISourceInfo] = None):
+        self._fname = fname
+        self._included = previously_included or set()
+        self._included.add(os.path.abspath(self._fname))
+        self.src = ''
+
+        # Lexer state (see `accept` for details):
+        self.info = QAPISourceInfo(self._fname, incl_info)
+        self.tok: Union[None, str] = None
+        self.pos = 0
         self.cursor = 0
-        self.info = QAPISourceInfo(fname, 1, incl_info)
+        self.val: Optional[Union[bool, str]] = None
         self.line_pos = 0
-        self.exprs = []
-        self.docs = []
-        self.accept()
+
+        # Parser output:
+        self.exprs: List[Dict[str, object]] = []
+        self.docs: List[QAPIDoc] = []
+
+        # Showtime!
+        self._parse()
+
+    def _parse(self) -> None:
+        """
+        Parse the QAPI schema document.
+
+        :return: None.  Results are stored in ``.exprs`` and ``.docs``.
+        """
         cur_doc = None
 
+        # May raise OSError; allow the caller to handle it.
+        with open(self._fname, 'r', encoding='utf-8') as fp:
+            self.src = fp.read()
+        if self.src == '' or self.src[-1] != '\n':
+            self.src += '\n'
+
+        # Prime the lexer:
+        self.accept()
+
+        # Parse until done:
         while self.tok is not None:
             info = self.info
             if self.tok == '#':
@@ -56,7 +132,11 @@ def __init__(self, fname, previously_included=None, incl_info=None):
                     self.docs.append(cur_doc)
                 continue
 
-            expr = self.get_expr(False)
+            expr = self.get_expr()
+            if not isinstance(expr, dict):
+                raise QAPISemError(
+                    info, "top-level expression must be an object")
+
             if 'include' in expr:
                 self.reject_expr_doc(cur_doc)
                 if len(expr) != 1:
@@ -65,12 +145,12 @@ def __init__(self, fname, previously_included=None, incl_info=None):
                 if not isinstance(include, str):
                     raise QAPISemError(info,
                                        "value of 'include' must be a string")
-                incl_fname = os.path.join(os.path.dirname(fname),
+                incl_fname = os.path.join(os.path.dirname(self._fname),
                                           include)
                 self.exprs.append({'expr': {'include': incl_fname},
                                    'info': info})
                 exprs_include = self._include(include, info, incl_fname,
-                                              previously_included)
+                                              self._included)
                 if exprs_include:
                     self.exprs.extend(exprs_include.exprs)
                     self.docs.extend(exprs_include.docs)
@@ -97,17 +177,22 @@ def __init__(self, fname, previously_included=None, incl_info=None):
         self.reject_expr_doc(cur_doc)
 
     @staticmethod
-    def reject_expr_doc(doc):
+    def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
         if doc and doc.symbol:
             raise QAPISemError(
                 doc.info,
                 "documentation for '%s' is not followed by the definition"
                 % doc.symbol)
 
-    def _include(self, include, info, incl_fname, previously_included):
+    @staticmethod
+    def _include(include: str,
+                 info: QAPISourceInfo,
+                 incl_fname: str,
+                 previously_included: Set[str]
+                 ) -> Optional['QAPISchemaParser']:
         incl_abs_fname = os.path.abspath(incl_fname)
         # catch inclusion cycle
-        inf = info
+        inf: Optional[QAPISourceInfo] = info
         while inf:
             if incl_abs_fname == os.path.abspath(inf.fname):
                 raise QAPISemError(info, "inclusion loop for %s" % include)
@@ -117,34 +202,86 @@ def _include(self, include, info, incl_fname, previously_included):
         if incl_abs_fname in previously_included:
             return None
 
-        return QAPISchemaParser(incl_fname, previously_included, info)
-
-    def _check_pragma_list_of_str(self, name, value, info):
-        if (not isinstance(value, list)
-                or any([not isinstance(elt, str) for elt in value])):
+        try:
+            return QAPISchemaParser(incl_fname, previously_included, info)
+        except OSError as err:
             raise QAPISemError(
                 info,
-                "pragma %s must be a list of strings" % name)
+                f"can't read include file '{incl_fname}': {err.strerror}"
+            ) from err
+
+    @staticmethod
+    def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
+
+        def check_list_str(name: str, value: object) -> List[str]:
+            if (not isinstance(value, list) or
+                    any(not isinstance(elt, str) for elt in value)):
+                raise QAPISemError(
+                    info,
+                    "pragma %s must be a list of strings" % name)
+            return value
+
+        pragma = info.pragma
 
-    def _pragma(self, name, value, info):
         if name == 'doc-required':
             if not isinstance(value, bool):
                 raise QAPISemError(info,
                                    "pragma 'doc-required' must be boolean")
-            info.pragma.doc_required = value
+            pragma.doc_required = value
         elif name == 'command-name-exceptions':
-            self._check_pragma_list_of_str(name, value, info)
-            info.pragma.command_name_exceptions = value
+            pragma.command_name_exceptions = check_list_str(name, value)
         elif name == 'command-returns-exceptions':
-            self._check_pragma_list_of_str(name, value, info)
-            info.pragma.command_returns_exceptions = value
+            pragma.command_returns_exceptions = check_list_str(name, value)
         elif name == 'member-name-exceptions':
-            self._check_pragma_list_of_str(name, value, info)
-            info.pragma.member_name_exceptions = value
+            pragma.member_name_exceptions = check_list_str(name, value)
         else:
             raise QAPISemError(info, "unknown pragma '%s'" % name)
 
-    def accept(self, skip_comment=True):
+    def accept(self, skip_comment: bool = True) -> None:
+        """
+        Read and store the next token.
+
+        :param skip_comment:
+            When false, return COMMENT tokens ("#").
+            This is used when reading documentation blocks.
+
+        :return:
+            None.  Several instance attributes are updated instead:
+
+            - ``.tok`` represents the token type.  See below for values.
+            - ``.info`` describes the token's source location.
+            - ``.val`` is the token's value, if any.  See below.
+            - ``.pos`` is the buffer index of the first character of
+              the token.
+
+        * Single-character tokens:
+
+            These are "{", "}", ":", ",", "[", and "]".
+            ``.tok`` holds the single character and ``.val`` is None.
+
+        * Multi-character tokens:
+
+          * COMMENT:
+
+            This token is not normally returned by the lexer, but it can
+            be when ``skip_comment`` is False.  ``.tok`` is "#", and
+            ``.val`` is a string including all chars until end-of-line,
+            including the "#" itself.
+
+          * STRING:
+
+            ``.tok`` is "'", the single quote.  ``.val`` contains the
+            string, excluding the surrounding quotes.
+
+          * TRUE and FALSE:
+
+            ``.tok`` is either "t" or "f", ``.val`` will be the
+            corresponding bool value.
+
+          * EOF:
+
+            ``.tok`` and ``.val`` will both be None at EOF.
+        """
         while True:
             self.tok = self.src[self.cursor]
             self.pos = self.cursor
@@ -204,12 +341,12 @@ def accept(self, skip_comment=True):
             elif not self.tok.isspace():
                 # Show up to next structural, whitespace or quote
                 # character
-                match = re.match('[^[\\]{}:,\\s\'"]+',
-                                 self.src[self.cursor-1:])
+                match = must_match('[^[\\]{}:,\\s\'"]+',
+                                   self.src[self.cursor-1:])
                 raise QAPIParseError(self, "stray '%s'" % match.group(0))
 
-    def get_members(self):
-        expr = OrderedDict()
+    def get_members(self) -> Dict[str, object]:
+        expr: Dict[str, object] = OrderedDict()
         if self.tok == '}':
             self.accept()
             return expr
@@ -217,13 +354,15 @@ def get_members(self):
             raise QAPIParseError(self, "expected string or '}'")
         while True:
             key = self.val
+            assert isinstance(key, str)  # Guaranteed by tok == "'"
+
             self.accept()
             if self.tok != ':':
                 raise QAPIParseError(self, "expected ':'")
             self.accept()
             if key in expr:
                 raise QAPIParseError(self, "duplicate key '%s'" % key)
-            expr[key] = self.get_expr(True)
+            expr[key] = self.get_expr()
             if self.tok == '}':
                 self.accept()
                 return expr
@@ -233,16 +372,16 @@ def get_members(self):
             if self.tok != "'":
                 raise QAPIParseError(self, "expected string")
 
-    def get_values(self):
-        expr = []
+    def get_values(self) -> List[object]:
+        expr: List[object] = []
         if self.tok == ']':
             self.accept()
             return expr
-        if self.tok not in "{['tf":
+        if self.tok not in tuple("{['tf"):
             raise QAPIParseError(
                 self, "expected '{', '[', ']', string, or boolean")
         while True:
-            expr.append(self.get_expr(True))
+            expr.append(self.get_expr())
             if self.tok == ']':
                 self.accept()
                 return expr
@@ -250,16 +389,16 @@ def get_values(self):
                 raise QAPIParseError(self, "expected ',' or ']'")
             self.accept()
 
-    def get_expr(self, nested):
-        if self.tok != '{' and not nested:
-            raise QAPIParseError(self, "expected '{'")
+    def get_expr(self) -> _ExprValue:
+        expr: _ExprValue
         if self.tok == '{':
             self.accept()
             expr = self.get_members()
         elif self.tok == '[':
             self.accept()
             expr = self.get_values()
-        elif self.tok in "'tf":
+        elif self.tok in tuple("'tf"):
+            assert isinstance(self.val, (str, bool))
             expr = self.val
             self.accept()
         else:
@@ -267,7 +406,7 @@ def get_expr(self, nested):
                 self, "expected '{', '[', string, or boolean")
         return expr
 
-    def get_doc(self, info):
+    def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
         if self.val != '##':
             raise QAPIParseError(
                 self, "junk after '##' at start of documentation comment")
@@ -276,6 +415,7 @@ def get_doc(self, info):
         cur_doc = QAPIDoc(self, info)
         self.accept(False)
         while self.tok == '#':
+            assert isinstance(self.val, str)
             if self.val.startswith('##'):
                 # End of doc comment
                 if self.val != '##':
@@ -321,7 +461,10 @@ class QAPIDoc:
     """
 
     class Section:
-        def __init__(self, parser, name=None, indent=0):
+        # pylint: disable=too-few-public-methods
+        def __init__(self, parser: QAPISchemaParser,
+                     name: Optional[str] = None, indent: int = 0):
+
             # parser, for error messages about indentation
             self._parser = parser
             # optional section name (argument/member or section name)
@@ -330,11 +473,11 @@ def __init__(self, parser, name=None, indent=0):
             # the expected indent level of the text of this section
             self._indent = indent
 
-        def append(self, line):
+        def append(self, line: str) -> None:
             # Strip leading spaces corresponding to the expected indent level
             # Blank lines are always OK.
             if line:
-                indent = re.match(r'\s*', line).end()
+                indent = must_match(r'\s*', line).end()
                 if indent < self._indent:
                     raise QAPIParseError(
                         self._parser,
@@ -345,39 +488,47 @@ def append(self, line):
             self.text += line.rstrip() + '\n'
 
     class ArgSection(Section):
-        def __init__(self, parser, name, indent=0):
+        def __init__(self, parser: QAPISchemaParser,
+                     name: str, indent: int = 0):
             super().__init__(parser, name, indent)
-            self.member = None
+            self.member: Optional['QAPISchemaMember'] = None
 
-        def connect(self, member):
+        def connect(self, member: 'QAPISchemaMember') -> None:
             self.member = member
 
-    def __init__(self, parser, info):
+    class NullSection(Section):
+        """
+        Immutable dummy section for use at the end of a doc block.
+        """
+        # pylint: disable=too-few-public-methods
+        def append(self, line: str) -> None:
+            assert False, "Text appended after end_comment() called."
+
+    def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
         # self._parser is used to report errors with QAPIParseError.  The
         # resulting error position depends on the state of the parser.
         # It happens to be the beginning of the comment.  More or less
         # servicable, but action at a distance.
         self._parser = parser
         self.info = info
-        self.symbol = None
+        self.symbol: Optional[str] = None
         self.body = QAPIDoc.Section(parser)
-        # dict mapping parameter name to ArgSection
-        self.args = OrderedDict()
-        self.features = OrderedDict()
-        # a list of Section
-        self.sections = []
+        # dicts mapping parameter/feature names to their ArgSection
+        self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+        self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
+        self.sections: List[QAPIDoc.Section] = []
         # the current section
         self._section = self.body
         self._append_line = self._append_body_line
 
-    def has_section(self, name):
+    def has_section(self, name: str) -> bool:
         """Return True if we have a section with this name."""
         for i in self.sections:
             if i.name == name:
                 return True
         return False
 
-    def append(self, line):
+    def append(self, line: str) -> None:
         """
         Parse a comment line and add it to the documentation.
 
@@ -398,18 +549,18 @@ def append(self, line):
         line = line[1:]
         self._append_line(line)
 
-    def end_comment(self):
-        self._end_section()
+    def end_comment(self) -> None:
+        self._switch_section(QAPIDoc.NullSection(self._parser))
 
     @staticmethod
-    def _is_section_tag(name):
+    def _is_section_tag(name: str) -> bool:
         return name in ('Returns:', 'Since:',
                         # those are often singular or plural
                         'Note:', 'Notes:',
                         'Example:', 'Examples:',
                         'TODO:')
 
-    def _append_body_line(self, line):
+    def _append_body_line(self, line: str) -> None:
         """
         Process a line of documentation text in the body section.
 
@@ -430,9 +581,11 @@ def _append_body_line(self, line):
             if not line.endswith(':'):
                 raise QAPIParseError(self._parser, "line should end with ':'")
             self.symbol = line[1:-1]
-            # FIXME invalid names other than the empty string aren't flagged
+            # Invalid names are not checked here, but the name provided MUST
+            # match the following definition, which *is* validated in expr.py.
             if not self.symbol:
-                raise QAPIParseError(self._parser, "invalid name")
+                raise QAPIParseError(
+                    self._parser, "name required after '@'")
         elif self.symbol:
             # This is a definition documentation block
             if name.startswith('@') and name.endswith(':'):
@@ -449,7 +602,7 @@ def _append_body_line(self, line):
             # This is a free-form documentation block
             self._append_freeform(line)
 
-    def _append_args_line(self, line):
+    def _append_args_line(self, line: str) -> None:
         """
         Process a line of documentation text in an argument section.
 
@@ -470,7 +623,7 @@ def _append_args_line(self, line):
             # from line and replace it with spaces so that 'f' has the
             # same index as it did in the original line and can be
             # handled the same way we will handle following lines.
-            indent = re.match(r'@\S*:\s*', line).end()
+            indent = must_match(r'@\S*:\s*', line).end()
             line = line[indent:]
             if not line:
                 # Line was just the "@arg:" header; following lines
@@ -495,7 +648,7 @@ def _append_args_line(self, line):
 
         self._append_freeform(line)
 
-    def _append_features_line(self, line):
+    def _append_features_line(self, line: str) -> None:
         name = line.split(' ', 1)[0]
 
         if name.startswith('@') and name.endswith(':'):
@@ -505,7 +658,7 @@ def _append_features_line(self, line):
             # from line and replace it with spaces so that 'f' has the
             # same index as it did in the original line and can be
             # handled the same way we will handle following lines.
-            indent = re.match(r'@\S*:\s*', line).end()
+            indent = must_match(r'@\S*:\s*', line).end()
             line = line[indent:]
             if not line:
                 # Line was just the "@arg:" header; following lines
@@ -527,7 +680,7 @@ def _append_features_line(self, line):
 
         self._append_freeform(line)
 
-    def _append_various_line(self, line):
+    def _append_various_line(self, line: str) -> None:
         """
         Process a line of documentation text in an additional section.
 
@@ -551,7 +704,7 @@ def _append_various_line(self, line):
             # from line and replace it with spaces so that 'f' has the
             # same index as it did in the original line and can be
             # handled the same way we will handle following lines.
-            indent = re.match(r'\S*:\s*', line).end()
+            indent = must_match(r'\S*:\s*', line).end()
             line = line[indent:]
             if not line:
                 # Line was just the "Section:" header; following lines
@@ -563,7 +716,11 @@ def _append_various_line(self, line):
 
         self._append_freeform(line)
 
-    def _start_symbol_section(self, symbols_dict, name, indent):
+    def _start_symbol_section(
+            self,
+            symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
+            name: str,
+            indent: int) -> None:
         # FIXME invalid names other than the empty string aren't flagged
         if not name:
             raise QAPIParseError(self._parser, "invalid parameter name")
@@ -571,34 +728,41 @@ def _start_symbol_section(self, symbols_dict, name, indent):
             raise QAPIParseError(self._parser,
                                  "'%s' parameter name duplicated" % name)
         assert not self.sections
-        self._end_section()
-        self._section = QAPIDoc.ArgSection(self._parser, name, indent)
-        symbols_dict[name] = self._section
+        new_section = QAPIDoc.ArgSection(self._parser, name, indent)
+        self._switch_section(new_section)
+        symbols_dict[name] = new_section
 
-    def _start_args_section(self, name, indent):
+    def _start_args_section(self, name: str, indent: int) -> None:
         self._start_symbol_section(self.args, name, indent)
 
-    def _start_features_section(self, name, indent):
+    def _start_features_section(self, name: str, indent: int) -> None:
         self._start_symbol_section(self.features, name, indent)
 
-    def _start_section(self, name=None, indent=0):
+    def _start_section(self, name: Optional[str] = None,
+                       indent: int = 0) -> None:
         if name in ('Returns', 'Since') and self.has_section(name):
             raise QAPIParseError(self._parser,
                                  "duplicated '%s' section" % name)
-        self._end_section()
-        self._section = QAPIDoc.Section(self._parser, name, indent)
-        self.sections.append(self._section)
-
-    def _end_section(self):
-        if self._section:
-            text = self._section.text = self._section.text.strip()
-            if self._section.name and (not text or text.isspace()):
-                raise QAPIParseError(
-                    self._parser,
-                    "empty doc section '%s'" % self._section.name)
-            self._section = None
+        new_section = QAPIDoc.Section(self._parser, name, indent)
+        self._switch_section(new_section)
+        self.sections.append(new_section)
+
+    def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
+        text = self._section.text = self._section.text.strip()
+
+        # Only the 'body' section is allowed to have an empty body.
+        # All other sections, including anonymous ones, must have text.
+        if self._section != self.body and not text:
+            # We do not create anonymous sections unless there is
+            # something to put in them; this is a parser bug.
+            assert self._section.name
+            raise QAPIParseError(
+                self._parser,
+                "empty doc section '%s'" % self._section.name)
 
-    def _append_freeform(self, line):
+        self._section = new_section
+
+    def _append_freeform(self, line: str) -> None:
         match = re.match(r'(@\S+:)', line)
         if match:
             raise QAPIParseError(self._parser,
@@ -606,37 +770,41 @@ def _append_freeform(self, line):
                                  % match.group(1))
         self._section.append(line)
 
-    def connect_member(self, member):
+    def connect_member(self, member: 'QAPISchemaMember') -> None:
         if member.name not in self.args:
             # Undocumented TODO outlaw
             self.args[member.name] = QAPIDoc.ArgSection(self._parser,
                                                         member.name)
         self.args[member.name].connect(member)
 
-    def connect_feature(self, feature):
+    def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
         if feature.name not in self.features:
             raise QAPISemError(feature.info,
                                "feature '%s' lacks documentation"
                                % feature.name)
         self.features[feature.name].connect(feature)
 
-    def check_expr(self, expr):
+    def check_expr(self, expr: TopLevelExpr) -> None:
         if self.has_section('Returns') and 'command' not in expr:
             raise QAPISemError(self.info,
                                "'Returns:' is only valid for commands")
 
-    def check(self):
+    def check(self) -> None:
 
-        def check_args_section(args, info, what):
+        def check_args_section(
+                args: Dict[str, QAPIDoc.ArgSection], what: str
+        ) -> None:
             bogus = [name for name, section in args.items()
                      if not section.member]
             if bogus:
                 raise QAPISemError(
                     self.info,
-                    "documented member%s '%s' %s not exist"
-                    % ("s" if len(bogus) > 1 else "",
-                       "', '".join(bogus),
-                       "do" if len(bogus) > 1 else "does"))
-
-        check_args_section(self.args, self.info, 'members')
-        check_args_section(self.features, self.info, 'features')
+                    "documented %s%s '%s' %s not exist" % (
+                        what,
+                        "s" if len(bogus) > 1 else "",
+                        "', '".join(bogus),
+                        "do" if len(bogus) > 1 else "does"
+                    ))
+
+        check_args_section(self.args, 'member')
+        check_args_section(self.features, 'feature')
diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc
index b9e077a1642..b259531a726 100644
--- a/scripts/qapi/pylintrc
+++ b/scripts/qapi/pylintrc
@@ -2,10 +2,7 @@
 
 # Add files or directories matching the regex patterns to the ignore list.
 # The regex matches against base names, not paths.
-ignore-patterns=error.py,
-                expr.py,
-                parser.py,
-                schema.py,
+ignore-patterns=schema.py,
 
 
 [MESSAGES CONTROL]
@@ -25,6 +22,7 @@ disable=fixme,
         too-many-branches,
         too-many-statements,
         too-many-instance-attributes,
+        consider-using-f-string,
 
 [REPORTS]
 
@@ -45,6 +43,7 @@ good-names=i,
            _,
            fp,  # fp = open(...)
            fd,  # fd = os.open(...)
+           ch,
 
 [VARIABLES]
 
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index 703b446fd21..b7b3fc0ce40 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -19,12 +19,39 @@
 import re
 from typing import Optional
 
-from .common import POINTER_SUFFIX, c_name
-from .error import QAPIError, QAPISemError
+from .common import (
+    POINTER_SUFFIX,
+    c_name,
+    cgen_ifcond,
+    docgen_ifcond,
+    gen_endif,
+    gen_if,
+)
+from .error import QAPIError, QAPISemError, QAPISourceError
 from .expr import check_exprs
 from .parser import QAPISchemaParser
 
 
+class QAPISchemaIfCond:
+    def __init__(self, ifcond=None):
+        self.ifcond = ifcond
+
+    def _cgen(self):
+        return cgen_ifcond(self.ifcond)
+
+    def gen_if(self):
+        return gen_if(self._cgen())
+
+    def gen_endif(self):
+        return gen_endif(self._cgen())
+
+    def docgen(self):
+        return docgen_ifcond(self.ifcond)
+
+    def is_present(self):
+        return bool(self.ifcond)
+
+
 class QAPISchemaEntity:
     meta: Optional[str] = None
 
@@ -42,7 +69,7 @@ def __init__(self, name: str, info, doc, ifcond=None, features=None):
         # such place).
         self.info = info
         self.doc = doc
-        self._ifcond = ifcond or []
+        self._ifcond = ifcond or QAPISchemaIfCond()
         self.features = features or []
         self._checked = False
 
@@ -227,9 +254,11 @@ def doc_type(self):
 
     def check(self, schema):
         QAPISchemaEntity.check(self, schema)
-        if 'deprecated' in [f.name for f in self.features]:
-            raise QAPISemError(
-                self.info, "feature 'deprecated' is not supported for types")
+        for feat in self.features:
+            if feat.is_special():
+                raise QAPISemError(
+                    self.info,
+                    f"feature '{feat.name}' is not supported for types")
 
     def describe(self):
         assert self.meta
@@ -294,8 +323,8 @@ def connect_doc(self, doc=None):
             m.connect_doc(doc)
 
     def is_implicit(self):
-        # See QAPISchema._make_implicit_enum_type() and ._def_predefineds()
-        return self.name.endswith('Kind') or self.name == 'QType'
+        # See QAPISchema._def_predefineds()
+        return self.name == 'QType'
 
     def c_type(self):
         return c_name(self.name)
@@ -366,8 +395,7 @@ class QAPISchemaObjectType(QAPISchemaType):
     def __init__(self, name, info, doc, ifcond, features,
                  base, local_members, variants):
         # struct has local_members, optional base, and no variants
-        # flat union has base, variants, and no local_members
-        # simple union has local_members, variants, and no base
+        # union has base, variants, and no local_members
         super().__init__(name, info, doc, ifcond, features)
         self.meta = 'union' if variants else 'struct'
         assert base is None or isinstance(base, str)
@@ -438,15 +466,6 @@ def connect_doc(self, doc=None):
         for m in self.local_members:
             m.connect_doc(doc)
 
-    @property
-    def ifcond(self):
-        assert self._checked
-        if isinstance(self._ifcond, QAPISchemaType):
-            # Simple union wrapper type inherits from wrapped type;
-            # see _make_implicit_object_type()
-            return self._ifcond.ifcond
-        return self._ifcond
-
     def is_implicit(self):
         # See QAPISchema._make_implicit_object_type(), as well as
         # _def_predefineds()
@@ -549,10 +568,9 @@ def visit(self, visitor):
 
 class QAPISchemaVariants:
     def __init__(self, tag_name, info, tag_member, variants):
-        # Flat unions pass tag_name but not tag_member.
-        # Simple unions and alternates pass tag_member but not tag_name.
-        # After check(), tag_member is always set, and tag_name remains
-        # a reliable witness of being used by a flat union.
+        # Unions pass tag_name but not tag_member.
+        # Alternates pass tag_member but not tag_name.
+        # After check(), tag_member is always set.
         assert bool(tag_member) != bool(tag_name)
         assert (isinstance(tag_name, str) or
                 isinstance(tag_member, QAPISchemaObjectTypeMember))
@@ -568,7 +586,7 @@ def set_defined_in(self, name):
             v.set_defined_in(name)
 
     def check(self, schema, seen):
-        if not self.tag_member:  # flat union
+        if self._tag_name:      # union
             self.tag_member = seen.get(c_name(self._tag_name))
             base = "'base'"
             # Pointing to the base type when not implicit would be
@@ -593,16 +611,16 @@ def check(self, schema, seen):
                     self.info,
                     "discriminator member '%s' of %s must not be optional"
                     % (self._tag_name, base))
-            if self.tag_member.ifcond:
+            if self.tag_member.ifcond.is_present():
                 raise QAPISemError(
                     self.info,
                     "discriminator member '%s' of %s must not be conditional"
                     % (self._tag_name, base))
-        else:                   # simple union
+        else:                   # alternate
             assert isinstance(self.tag_member.type, QAPISchemaEnumType)
             assert not self.tag_member.optional
-            assert self.tag_member.ifcond == []
-        if self._tag_name:    # flat union
+            assert not self.tag_member.ifcond.is_present()
+        if self._tag_name:      # union
             # branches that are not explicitly covered get an empty type
             cases = {v.name for v in self.variants}
             for m in self.tag_member.type.members:
@@ -646,7 +664,7 @@ def __init__(self, name, info, ifcond=None):
         assert isinstance(name, str)
         self.name = name
         self.info = info
-        self.ifcond = ifcond or []
+        self.ifcond = ifcond or QAPISchemaIfCond()
         self.defined_in = None
 
     def set_defined_in(self, name):
@@ -680,18 +698,10 @@ def describe(self, info):
                 assert role == 'member'
                 role = 'parameter'
             elif defined_in.endswith('-base'):
-                # Implicit type created for a flat union's dict 'base'
+                # Implicit type created for a union's dict 'base'
                 role = 'base ' + role
             else:
-                # Implicit type created for a simple union's branch
-                assert defined_in.endswith('-wrapper')
-                # Unreachable and not implemented
                 assert False
-        elif defined_in.endswith('Kind'):
-            # See QAPISchema._make_implicit_enum_type()
-            # Implicit enum created for simple union's branches
-            assert role == 'value'
-            role = 'branch'
         elif defined_in != info.defn_name:
             return "%s '%s' of type '%s'" % (role, self.name, defined_in)
         return "%s '%s'" % (role, self.name)
@@ -700,10 +710,26 @@ def describe(self, info):
 class QAPISchemaEnumMember(QAPISchemaMember):
     role = 'value'
 
+    def __init__(self, name, info, ifcond=None, features=None):
+        super().__init__(name, info, ifcond)
+        for f in features or []:
+            assert isinstance(f, QAPISchemaFeature)
+            f.set_defined_in(name)
+        self.features = features or []
+
+    def connect_doc(self, doc):
+        super().connect_doc(doc)
+        if doc:
+            for f in self.features:
+                doc.connect_feature(f)
+
 
 class QAPISchemaFeature(QAPISchemaMember):
     role = 'feature'
 
+    def is_special(self):
+        return self.name in ('deprecated', 'unstable')
+
 
 class QAPISchemaObjectTypeMember(QAPISchemaMember):
     def __init__(self, name, info, typ, optional, ifcond=None, features=None):
@@ -849,7 +875,14 @@ def visit(self, visitor):
 class QAPISchema:
     def __init__(self, fname):
         self.fname = fname
-        parser = QAPISchemaParser(fname)
+
+        try:
+            parser = QAPISchemaParser(fname)
+        except OSError as err:
+            raise QAPIError(
+                f"can't read schema file '{fname}': {err.strerror}"
+            ) from err
+
         exprs = check_exprs(parser.exprs)
         self.docs = parser.docs
         self._entity_list = []
@@ -875,7 +908,7 @@ def _def_entity(self, ent):
         other_ent = self._entity_dict.get(ent.name)
         if other_ent:
             if other_ent.info:
-                where = QAPIError(other_ent.info, None, "previous definition")
+                where = QAPISourceError(other_ent.info, "previous definition")
                 raise QAPISemError(
                     ent.info,
                     "'%s' is already defined\n%s" % (ent.name, where))
@@ -961,22 +994,20 @@ def _def_predefineds(self):
     def _make_features(self, features, info):
         if features is None:
             return []
-        return [QAPISchemaFeature(f['name'], info, f.get('if'))
+        return [QAPISchemaFeature(f['name'], info,
+                                  QAPISchemaIfCond(f.get('if')))
                 for f in features]
 
+    def _make_enum_member(self, name, ifcond, features, info):
+        return QAPISchemaEnumMember(name, info,
+                                    QAPISchemaIfCond(ifcond),
+                                    self._make_features(features, info))
+
     def _make_enum_members(self, values, info):
-        return [QAPISchemaEnumMember(v['name'], info, v.get('if'))
+        return [self._make_enum_member(v['name'], v.get('if'),
+                                       v.get('features'), info)
                 for v in values]
 
-    def _make_implicit_enum_type(self, name, info, ifcond, values):
-        # See also QAPISchemaObjectTypeMember.describe()
-        name = name + 'Kind'    # reserved by check_defn_name_str()
-        self._def_entity(QAPISchemaEnumType(
-            name, info, None, ifcond, None,
-            self._make_enum_members(values, info),
-            None))
-        return name
-
     def _make_array_type(self, element_type, info):
         name = element_type + 'List'    # reserved by check_defn_name_str()
         if not self.lookup_type(name):
@@ -991,17 +1022,9 @@ def _make_implicit_object_type(self, name, info, ifcond, role, members):
         typ = self.lookup_entity(name, QAPISchemaObjectType)
         if typ:
             # The implicit object type has multiple users.  This can
-            # happen only for simple unions' implicit wrapper types.
-            # Its ifcond should be the disjunction of its user's
-            # ifconds.  Not implemented.  Instead, we always pass the
-            # wrapped type's ifcond, which is trivially the same for all
-            # users.  It's also necessary for the wrapper to compile.
-            # But it's not tight: the disjunction need not imply it.  We
-            # may end up compiling useless wrapper types.
-            # TODO kill simple unions or implement the disjunction
-
-            # pylint: disable=protected-access
-            assert (ifcond or []) == typ._ifcond
+            # only be a duplicate definition, which will be flagged
+            # later.
+            pass
         else:
             self._def_entity(QAPISchemaObjectType(
                 name, info, None, ifcond, None, None, members, None))
@@ -1011,7 +1034,7 @@ def _def_enum_type(self, expr, info, doc):
         name = expr['enum']
         data = expr['data']
         prefix = expr.get('prefix')
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaEnumType(
             name, info, doc, ifcond, features,
@@ -1029,7 +1052,8 @@ def _make_member(self, name, typ, ifcond, features, info):
                                           self._make_features(features, info))
 
     def _make_members(self, data, info):
-        return [self._make_member(key, value['type'], value.get('if'),
+        return [self._make_member(key, value['type'],
+                                  QAPISchemaIfCond(value.get('if')),
                                   value.get('features'), info)
                 for (key, value) in data.items()]
 
@@ -1037,7 +1061,7 @@ def _def_struct_type(self, expr, info, doc):
         name = expr['struct']
         base = expr.get('base')
         data = expr['data']
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         self._def_entity(QAPISchemaObjectType(
             name, info, doc, ifcond, features, base,
@@ -1047,54 +1071,39 @@ def _def_struct_type(self, expr, info, doc):
     def _make_variant(self, case, typ, ifcond, info):
         return QAPISchemaVariant(case, info, typ, ifcond)
 
-    def _make_simple_variant(self, case, typ, ifcond, info):
-        if isinstance(typ, list):
-            assert len(typ) == 1
-            typ = self._make_array_type(typ[0], info)
-        typ = self._make_implicit_object_type(
-            typ, info, self.lookup_type(typ),
-            'wrapper', [self._make_member('data', typ, None, None, info)])
-        return QAPISchemaVariant(case, info, typ, ifcond)
-
     def _def_union_type(self, expr, info, doc):
         name = expr['union']
+        base = expr['base']
+        tag_name = expr['discriminator']
         data = expr['data']
-        base = expr.get('base')
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
-        tag_name = expr.get('discriminator')
-        tag_member = None
         if isinstance(base, dict):
             base = self._make_implicit_object_type(
                 name, info, ifcond,
                 'base', self._make_members(base, info))
-        if tag_name:
-            variants = [self._make_variant(key, value['type'],
-                                           value.get('if'), info)
-                        for (key, value) in data.items()]
-            members = []
-        else:
-            variants = [self._make_simple_variant(key, value['type'],
-                                                  value.get('if'), info)
-                        for (key, value) in data.items()]
-            enum = [{'name': v.name, 'if': v.ifcond} for v in variants]
-            typ = self._make_implicit_enum_type(name, info, ifcond, enum)
-            tag_member = QAPISchemaObjectTypeMember('type', info, typ, False)
-            members = [tag_member]
+        variants = [
+            self._make_variant(key, value['type'],
+                               QAPISchemaIfCond(value.get('if')),
+                               info)
+            for (key, value) in data.items()]
+        members = []
         self._def_entity(
             QAPISchemaObjectType(name, info, doc, ifcond, features,
                                  base, members,
                                  QAPISchemaVariants(
-                                     tag_name, info, tag_member, variants)))
+                                     tag_name, info, None, variants)))
 
     def _def_alternate_type(self, expr, info, doc):
         name = expr['alternate']
         data = expr['data']
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
-        variants = [self._make_variant(key, value['type'], value.get('if'),
-                                       info)
-                    for (key, value) in data.items()]
+        variants = [
+            self._make_variant(key, value['type'],
+                               QAPISchemaIfCond(value.get('if')),
+                               info)
+            for (key, value) in data.items()]
         tag_member = QAPISchemaObjectTypeMember('type', info, 'QType', False)
         self._def_entity(
             QAPISchemaAlternateType(name, info, doc, ifcond, features,
@@ -1111,7 +1120,7 @@ def _def_command(self, expr, info, doc):
         allow_oob = expr.get('allow-oob', False)
         allow_preconfig = expr.get('allow-preconfig', False)
         coroutine = expr.get('coroutine', False)
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
@@ -1130,7 +1139,7 @@ def _def_event(self, expr, info, doc):
         name = expr['event']
         data = expr.get('data')
         boxed = expr.get('boxed', False)
-        ifcond = expr.get('if')
+        ifcond = QAPISchemaIfCond(expr.get('if'))
         features = self._make_features(expr.get('features'), info)
         if isinstance(data, OrderedDict):
             data = self._make_implicit_object_type(
diff --git a/scripts/qapi/source.py b/scripts/qapi/source.py
index 03b6ede0828..04193cc9643 100644
--- a/scripts/qapi/source.py
+++ b/scripts/qapi/source.py
@@ -10,7 +10,6 @@
 # See the COPYING file in the top-level directory.
 
 import copy
-import sys
 from typing import List, Optional, TypeVar
 
 
@@ -32,10 +31,9 @@ def __init__(self) -> None:
 class QAPISourceInfo:
     T = TypeVar('T', bound='QAPISourceInfo')
 
-    def __init__(self, fname: str, line: int,
-                 parent: Optional['QAPISourceInfo']):
+    def __init__(self, fname: str, parent: Optional['QAPISourceInfo']):
         self.fname = fname
-        self.line = line
+        self.line = 1
         self.parent = parent
         self.pragma: QAPISchemaPragma = (
             parent.pragma if parent else QAPISchemaPragma()
@@ -53,12 +51,7 @@ def next_line(self: T) -> T:
         return info
 
     def loc(self) -> str:
-        if self.fname is None:
-            return sys.argv[0]
-        ret = self.fname
-        if self.line is not None:
-            ret += ':%d' % self.line
-        return ret
+        return f"{self.fname}:{self.line}"
 
     def in_defn(self) -> str:
         if self.defn_name:
diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py
index 20d572a23aa..3013329c248 100644
--- a/scripts/qapi/types.py
+++ b/scripts/qapi/types.py
@@ -13,20 +13,15 @@
 # See the COPYING file in the top-level directory.
 """
 
-from typing import List, Optional, Sequence
-
-from .common import (
-    c_enum_const,
-    c_name,
-    gen_endif,
-    gen_if,
-    mcgen,
-)
-from .gen import QAPISchemaModularCVisitor, ifcontext
+from typing import List, Optional
+
+from .common import c_enum_const, c_name, mcgen
+from .gen import QAPISchemaModularCVisitor, gen_special_features, ifcontext
 from .schema import (
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaObjectTypeMember,
     QAPISchemaType,
@@ -43,6 +38,8 @@
 def gen_enum_lookup(name: str,
                     members: List[QAPISchemaEnumMember],
                     prefix: Optional[str] = None) -> str:
+    max_index = c_enum_const(name, '_MAX', prefix)
+    feats = ''
     ret = mcgen('''
 
 const QEnumLookup %(c_name)s_lookup = {
@@ -50,20 +47,35 @@ def gen_enum_lookup(name: str,
 ''',
                 c_name=c_name(name))
     for memb in members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         index = c_enum_const(name, memb.name, prefix)
         ret += mcgen('''
         [%(index)s] = "%(name)s",
 ''',
                      index=index, name=memb.name)
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
+
+        special_features = gen_special_features(memb.features)
+        if special_features != '0':
+            feats += mcgen('''
+        [%(index)s] = %(special_features)s,
+''',
+                           index=index, special_features=special_features)
+
+    if feats:
+        ret += mcgen('''
+    },
+    .special_features = (const unsigned char[%(max_index)s]) {
+''',
+                     max_index=max_index)
+        ret += feats
 
     ret += mcgen('''
     },
     .size = %(max_index)s
 };
 ''',
-                 max_index=c_enum_const(name, '_MAX', prefix))
+                 max_index=max_index)
     return ret
 
 
@@ -80,12 +92,12 @@ def gen_enum(name: str,
                 c_name=c_name(name))
 
     for memb in enum_members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         ret += mcgen('''
     %(c_enum)s,
 ''',
                      c_enum=c_enum_const(name, memb.name, prefix))
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
 
     ret += mcgen('''
 } %(c_name)s;
@@ -125,7 +137,7 @@ def gen_array(name: str, element_type: QAPISchemaType) -> str:
 def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str:
     ret = ''
     for memb in members:
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         if memb.optional:
             ret += mcgen('''
     bool has_%(c_name)s;
@@ -135,11 +147,11 @@ def gen_struct_members(members: List[QAPISchemaObjectTypeMember]) -> str:
     %(c_type)s %(c_name)s;
 ''',
                      c_type=memb.type.c_type(), c_name=c_name(memb.name))
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
     return ret
 
 
-def gen_object(name: str, ifcond: Sequence[str],
+def gen_object(name: str, ifcond: QAPISchemaIfCond,
                base: Optional[QAPISchemaObjectType],
                members: List[QAPISchemaObjectTypeMember],
                variants: Optional[QAPISchemaVariants]) -> str:
@@ -158,7 +170,7 @@ def gen_object(name: str, ifcond: Sequence[str],
     ret += mcgen('''
 
 ''')
-    ret += gen_if(ifcond)
+    ret += ifcond.gen_if()
     ret += mcgen('''
 struct %(c_name)s {
 ''',
@@ -192,7 +204,7 @@ def gen_object(name: str, ifcond: Sequence[str],
     ret += mcgen('''
 };
 ''')
-    ret += gen_endif(ifcond)
+    ret += ifcond.gen_endif()
 
     return ret
 
@@ -219,13 +231,13 @@ def gen_variants(variants: QAPISchemaVariants) -> str:
     for var in variants.variants:
         if var.type.name == 'q_empty':
             continue
-        ret += gen_if(var.ifcond)
+        ret += var.ifcond.gen_if()
         ret += mcgen('''
         %(c_type)s %(c_name)s;
 ''',
                      c_type=var.type.c_unboxed_type(),
                      c_name=c_name(var.name))
-        ret += gen_endif(var.ifcond)
+        ret += var.ifcond.gen_endif()
 
     ret += mcgen('''
     } u;
@@ -307,7 +319,7 @@ def _gen_type_cleanup(self, name: str) -> None:
     def visit_enum_type(self,
                         name: str,
                         info: Optional[QAPISourceInfo],
-                        ifcond: Sequence[str],
+                        ifcond: QAPISchemaIfCond,
                         features: List[QAPISchemaFeature],
                         members: List[QAPISchemaEnumMember],
                         prefix: Optional[str]) -> None:
@@ -318,7 +330,7 @@ def visit_enum_type(self,
     def visit_array_type(self,
                          name: str,
                          info: Optional[QAPISourceInfo],
-                         ifcond: Sequence[str],
+                         ifcond: QAPISchemaIfCond,
                          element_type: QAPISchemaType) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
             self._genh.preamble_add(gen_fwd_object_or_array(name))
@@ -328,7 +340,7 @@ def visit_array_type(self,
     def visit_object_type(self,
                           name: str,
                           info: Optional[QAPISourceInfo],
-                          ifcond: Sequence[str],
+                          ifcond: QAPISchemaIfCond,
                           features: List[QAPISchemaFeature],
                           base: Optional[QAPISchemaObjectType],
                           members: List[QAPISchemaObjectTypeMember],
@@ -351,7 +363,7 @@ def visit_object_type(self,
     def visit_alternate_type(self,
                              name: str,
                              info: Optional[QAPISourceInfo],
-                             ifcond: Sequence[str],
+                             ifcond: QAPISchemaIfCond,
                              features: List[QAPISchemaFeature],
                              variants: QAPISchemaVariants) -> None:
         with ifcontext(ifcond, self._genh):
diff --git a/scripts/qapi/visit.py b/scripts/qapi/visit.py
index 9e96f3c566c..e13bbe42925 100644
--- a/scripts/qapi/visit.py
+++ b/scripts/qapi/visit.py
@@ -13,22 +13,21 @@
 See the COPYING file in the top-level directory.
 """
 
-from typing import List, Optional, Sequence
+from typing import List, Optional
 
 from .common import (
     c_enum_const,
     c_name,
-    gen_endif,
-    gen_if,
     indent,
     mcgen,
 )
-from .gen import QAPISchemaModularCVisitor, ifcontext
+from .gen import QAPISchemaModularCVisitor, gen_special_features, ifcontext
 from .schema import (
     QAPISchema,
     QAPISchemaEnumMember,
     QAPISchemaEnumType,
     QAPISchemaFeature,
+    QAPISchemaIfCond,
     QAPISchemaObjectType,
     QAPISchemaObjectTypeMember,
     QAPISchemaType,
@@ -77,22 +76,22 @@ def gen_visit_object_members(name: str,
                      c_type=base.c_name())
 
     for memb in members:
-        deprecated = 'deprecated' in [f.name for f in memb.features]
-        ret += gen_if(memb.ifcond)
+        ret += memb.ifcond.gen_if()
         if memb.optional:
             ret += mcgen('''
     if (visit_optional(v, "%(name)s", &obj->has_%(c_name)s)) {
 ''',
                          name=memb.name, c_name=c_name(memb.name))
             indent.increase()
-        if deprecated:
+        special_features = gen_special_features(memb.features)
+        if special_features != '0':
             ret += mcgen('''
-    if (!visit_deprecated_accept(v, "%(name)s", errp)) {
+    if (visit_policy_reject(v, "%(name)s", %(special_features)s, errp)) {
         return false;
     }
-    if (visit_deprecated(v, "%(name)s")) {
+    if (!visit_policy_skip(v, "%(name)s", %(special_features)s)) {
 ''',
-                         name=memb.name)
+                         name=memb.name, special_features=special_features)
             indent.increase()
         ret += mcgen('''
     if (!visit_type_%(c_type)s(v, "%(name)s", &obj->%(c_name)s, errp)) {
@@ -101,7 +100,7 @@ def gen_visit_object_members(name: str,
 ''',
                      c_type=memb.type.c_name(), name=memb.name,
                      c_name=c_name(memb.name))
-        if deprecated:
+        if special_features != '0':
             indent.decrease()
             ret += mcgen('''
     }
@@ -111,7 +110,7 @@ def gen_visit_object_members(name: str,
             ret += mcgen('''
     }
 ''')
-        ret += gen_endif(memb.ifcond)
+        ret += memb.ifcond.gen_endif()
 
     if variants:
         tag_member = variants.tag_member
@@ -125,7 +124,7 @@ def gen_visit_object_members(name: str,
         for var in variants.variants:
             case_str = c_enum_const(tag_member.type.name, var.name,
                                     tag_member.type.prefix)
-            ret += gen_if(var.ifcond)
+            ret += var.ifcond.gen_if()
             if var.type.name == 'q_empty':
                 # valid variant and nothing to do
                 ret += mcgen('''
@@ -141,7 +140,7 @@ def gen_visit_object_members(name: str,
                              case=case_str,
                              c_type=var.type.c_name(), c_name=c_name(var.name))
 
-            ret += gen_endif(var.ifcond)
+            ret += var.ifcond.gen_endif()
         ret += mcgen('''
     default:
         abort();
@@ -227,7 +226,7 @@ def gen_visit_alternate(name: str, variants: QAPISchemaVariants) -> str:
                 c_name=c_name(name))
 
     for var in variants.variants:
-        ret += gen_if(var.ifcond)
+        ret += var.ifcond.gen_if()
         ret += mcgen('''
     case %(case)s:
 ''',
@@ -253,7 +252,7 @@ def gen_visit_alternate(name: str, variants: QAPISchemaVariants) -> str:
         ret += mcgen('''
         break;
 ''')
-        ret += gen_endif(var.ifcond)
+        ret += var.ifcond.gen_endif()
 
     ret += mcgen('''
     case QTYPE_NONE:
@@ -352,7 +351,7 @@ def _begin_user_module(self, name: str) -> None:
     def visit_enum_type(self,
                         name: str,
                         info: Optional[QAPISourceInfo],
-                        ifcond: Sequence[str],
+                        ifcond: QAPISchemaIfCond,
                         features: List[QAPISchemaFeature],
                         members: List[QAPISchemaEnumMember],
                         prefix: Optional[str]) -> None:
@@ -363,7 +362,7 @@ def visit_enum_type(self,
     def visit_array_type(self,
                          name: str,
                          info: Optional[QAPISourceInfo],
-                         ifcond: Sequence[str],
+                         ifcond: QAPISchemaIfCond,
                          element_type: QAPISchemaType) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
             self._genh.add(gen_visit_decl(name))
@@ -372,7 +371,7 @@ def visit_array_type(self,
     def visit_object_type(self,
                           name: str,
                           info: Optional[QAPISourceInfo],
-                          ifcond: Sequence[str],
+                          ifcond: QAPISchemaIfCond,
                           features: List[QAPISchemaFeature],
                           base: Optional[QAPISchemaObjectType],
                           members: List[QAPISchemaObjectTypeMember],
@@ -394,7 +393,7 @@ def visit_object_type(self,
     def visit_alternate_type(self,
                              name: str,
                              info: Optional[QAPISourceInfo],
-                             ifcond: Sequence[str],
+                             ifcond: QAPISchemaIfCond,
                              features: List[QAPISchemaFeature],
                              variants: QAPISchemaVariants) -> None:
         with ifcontext(ifcond, self._genh, self._genc):
diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 573b5dc6acd..7de996d536e 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -294,7 +294,9 @@ package qemu-$cpu
 interpreter $qemu
 magic $magic
 mask $mask
-credential $CREDENTIAL
+credentials $CREDENTIAL
+preserve $PRESERVE_ARG0
+fix_binary $PERSISTENT
 EOF
 }
 
diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap
index 90527eb974f..eb6e951ff23 100755
--- a/scripts/qemu-trace-stap
+++ b/scripts/qemu-trace-stap
@@ -55,11 +55,6 @@ def tapset_dir(binary):
     return os.path.realpath(tapset)
 
 
-def tapset_env(tapset_dir):
-    tenv = copy.copy(os.environ)
-    tenv["SYSTEMTAP_TAPSET"] = tapset_dir
-    return tenv
-
 def cmd_run(args):
     prefix = probe_prefix(args.binary)
     tapsets = tapset_dir(args.binary)
@@ -81,11 +76,11 @@ def cmd_run(args):
 
     # We request an 8MB buffer, since the stap default 1MB buffer
     # can be easily overflowed by frequently firing QEMU traces
-    stapargs = ["stap", "-s", "8"]
+    stapargs = ["stap", "-s", "8", "-I", tapsets ]
     if args.pid is not None:
         stapargs.extend(["-x", args.pid])
     stapargs.extend(["-e", script])
-    subprocess.call(stapargs, env=tapset_env(tapsets))
+    subprocess.call(stapargs)
 
 
 def cmd_list(args):
@@ -101,10 +96,9 @@ def cmd_list(args):
 
         if verbose:
             print("Listing probes with name '%s'" % script)
-        proc = subprocess.Popen(["stap", "-l", script],
+        proc = subprocess.Popen(["stap", "-I", tapsets, "-l", script],
                                 stdout=subprocess.PIPE,
-                                universal_newlines=True,
-                                env=tapset_env(tapsets))
+                                universal_newlines=True)
         out, err = proc.communicate()
         if proc.returncode != 0:
             print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary))
diff --git a/scripts/qmp/qemu-ga-client b/scripts/qmp/qemu-ga-client
index 348d85864cd..102fd2cad93 100755
--- a/scripts/qmp/qemu-ga-client
+++ b/scripts/qmp/qemu-ga-client
@@ -1,304 +1,11 @@
 #!/usr/bin/env python3
 
-# QEMU Guest Agent Client
-#
-# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
-# Usage:
-#
-# Start QEMU with:
-#
-# # qemu [...] -chardev socket,path=/tmp/qga.sock,server=on,wait=off,id=qga0 \
-#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
-#
-# Run the script:
-#
-# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
-#
-# or
-#
-# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
-# $ qemu-ga-client <command> [args...]
-#
-# For example:
-#
-# $ qemu-ga-client cat /etc/resolv.conf
-# # Generated by NetworkManager
-# nameserver 10.0.2.3
-# $ qemu-ga-client fsfreeze status
-# thawed
-# $ qemu-ga-client fsfreeze freeze
-# 2 filesystems frozen
-#
-# See also: https://wiki.qemu.org/Features/QAPI/GuestAgent
-#
-
 import os
 import sys
-import base64
-import random
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu import qmp
-
-
-class QemuGuestAgent(qmp.QEMUMonitorProtocol):
-    def __getattr__(self, name):
-        def wrapper(**kwds):
-            return self.command('guest-' + name.replace('_', '-'), **kwds)
-        return wrapper
-
-
-class QemuGuestAgentClient:
-    error = QemuGuestAgent.error
-
-    def __init__(self, address):
-        self.qga = QemuGuestAgent(address)
-        self.qga.connect(negotiate=False)
-
-    def sync(self, timeout=3):
-        # Avoid being blocked forever
-        if not self.ping(timeout):
-            raise EnvironmentError('Agent seems not alive')
-        uid = random.randint(0, (1 << 32) - 1)
-        while True:
-            ret = self.qga.sync(id=uid)
-            if isinstance(ret, int) and int(ret) == uid:
-                break
-
-    def __file_read_all(self, handle):
-        eof = False
-        data = ''
-        while not eof:
-            ret = self.qga.file_read(handle=handle, count=1024)
-            _data = base64.b64decode(ret['buf-b64'])
-            data += _data
-            eof = ret['eof']
-        return data
-
-    def read(self, path):
-        handle = self.qga.file_open(path=path)
-        try:
-            data = self.__file_read_all(handle)
-        finally:
-            self.qga.file_close(handle=handle)
-        return data
-
-    def info(self):
-        info = self.qga.info()
-
-        msgs = []
-        msgs.append('version: ' + info['version'])
-        msgs.append('supported_commands:')
-        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
-        msgs.append('\tenabled: ' + ', '.join(enabled))
-        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
-        msgs.append('\tdisabled: ' + ', '.join(disabled))
-
-        return '\n'.join(msgs)
-
-    def __gen_ipv4_netmask(self, prefixlen):
-        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
-        return '.'.join([str(mask >> 24),
-                         str((mask >> 16) & 0xff),
-                         str((mask >> 8) & 0xff),
-                         str(mask & 0xff)])
-
-    def ifconfig(self):
-        nifs = self.qga.network_get_interfaces()
-
-        msgs = []
-        for nif in nifs:
-            msgs.append(nif['name'] + ':')
-            if 'ip-addresses' in nif:
-                for ipaddr in nif['ip-addresses']:
-                    if ipaddr['ip-address-type'] == 'ipv4':
-                        addr = ipaddr['ip-address']
-                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
-                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
-                    elif ipaddr['ip-address-type'] == 'ipv6':
-                        addr = ipaddr['ip-address']
-                        prefix = ipaddr['prefix']
-                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
-            if nif['hardware-address'] != '00:00:00:00:00:00':
-                msgs.append("\tether " + nif['hardware-address'])
-
-        return '\n'.join(msgs)
-
-    def ping(self, timeout):
-        self.qga.settimeout(timeout)
-        try:
-            self.qga.ping()
-        except self.qga.timeout:
-            return False
-        return True
-
-    def fsfreeze(self, cmd):
-        if cmd not in ['status', 'freeze', 'thaw']:
-            raise Exception('Invalid command: ' + cmd)
-
-        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
-
-    def fstrim(self, minimum=0):
-        return getattr(self.qga, 'fstrim')(minimum=minimum)
-
-    def suspend(self, mode):
-        if mode not in ['disk', 'ram', 'hybrid']:
-            raise Exception('Invalid mode: ' + mode)
-
-        try:
-            getattr(self.qga, 'suspend' + '_' + mode)()
-            # On error exception will raise
-        except self.qga.timeout:
-            # On success command will timed out
-            return
-
-    def shutdown(self, mode='powerdown'):
-        if mode not in ['powerdown', 'halt', 'reboot']:
-            raise Exception('Invalid mode: ' + mode)
-
-        try:
-            self.qga.shutdown(mode=mode)
-        except self.qga.timeout:
-            return
-
-
-def _cmd_cat(client, args):
-    if len(args) != 1:
-        print('Invalid argument')
-        print('Usage: cat <file>')
-        sys.exit(1)
-    print(client.read(args[0]))
-
-
-def _cmd_fsfreeze(client, args):
-    usage = 'Usage: fsfreeze status|freeze|thaw'
-    if len(args) != 1:
-        print('Invalid argument')
-        print(usage)
-        sys.exit(1)
-    if args[0] not in ['status', 'freeze', 'thaw']:
-        print('Invalid command: ' + args[0])
-        print(usage)
-        sys.exit(1)
-    cmd = args[0]
-    ret = client.fsfreeze(cmd)
-    if cmd == 'status':
-        print(ret)
-    elif cmd == 'freeze':
-        print("%d filesystems frozen" % ret)
-    else:
-        print("%d filesystems thawed" % ret)
-
-
-def _cmd_fstrim(client, args):
-    if len(args) == 0:
-        minimum = 0
-    else:
-        minimum = int(args[0])
-    print(client.fstrim(minimum))
-
-
-def _cmd_ifconfig(client, args):
-    print(client.ifconfig())
-
-
-def _cmd_info(client, args):
-    print(client.info())
-
-
-def _cmd_ping(client, args):
-    if len(args) == 0:
-        timeout = 3
-    else:
-        timeout = float(args[0])
-    alive = client.ping(timeout)
-    if not alive:
-        print("Not responded in %s sec" % args[0])
-        sys.exit(1)
-
-
-def _cmd_suspend(client, args):
-    usage = 'Usage: suspend disk|ram|hybrid'
-    if len(args) != 1:
-        print('Less argument')
-        print(usage)
-        sys.exit(1)
-    if args[0] not in ['disk', 'ram', 'hybrid']:
-        print('Invalid command: ' + args[0])
-        print(usage)
-        sys.exit(1)
-    client.suspend(args[0])
-
-
-def _cmd_shutdown(client, args):
-    client.shutdown()
-_cmd_powerdown = _cmd_shutdown
-
-
-def _cmd_halt(client, args):
-    client.shutdown('halt')
-
-
-def _cmd_reboot(client, args):
-    client.shutdown('reboot')
-
-
-commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
-
-
-def main(address, cmd, args):
-    if not os.path.exists(address):
-        print('%s not found' % address)
-        sys.exit(1)
-
-    if cmd not in commands:
-        print('Invalid command: ' + cmd)
-        print('Available commands: ' + ', '.join(commands))
-        sys.exit(1)
-
-    try:
-        client = QemuGuestAgentClient(address)
-    except QemuGuestAgent.error as e:
-        import errno
-
-        print(e)
-        if e.errno == errno.ECONNREFUSED:
-            print('Hint: qemu is not running?')
-        sys.exit(1)
-
-    if cmd == 'fsfreeze' and args[0] == 'freeze':
-        client.sync(60)
-    elif cmd != 'ping':
-        client.sync()
-
-    globals()['_cmd_' + cmd](client, args)
+from qemu.qmp import qemu_ga_client
 
 
 if __name__ == '__main__':
-    import sys
-    import os
-    import optparse
-
-    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
-
-    usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n"
-    usage += '<command>: ' + ', '.join(commands)
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option('--address', action='store', type='string',
-                      default=address, help='Specify a ip:port pair or a unix socket path')
-    options, args = parser.parse_args()
-
-    address = options.address
-    if address is None:
-        parser.error('address is not specified')
-        sys.exit(1)
-
-    if len(args) == 0:
-        parser.error('Less argument')
-        sys.exit(1)
-
-    main(address, args[0], args[1:])
+    sys.exit(qemu_ga_client.main())
diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell
index b4d06096abd..4a20f97db70 100755
--- a/scripts/qmp/qmp-shell
+++ b/scripts/qmp/qmp-shell
@@ -1,459 +1,11 @@
 #!/usr/bin/env python3
-#
-# Low-level QEMU shell on top of QMP.
-#
-# Copyright (C) 2009, 2010 Red Hat Inc.
-#
-# Authors:
-#  Luiz Capitulino <lcapitulino@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2.  See
-# the COPYING file in the top-level directory.
-#
-# Usage:
-#
-# Start QEMU with:
-#
-# # qemu [...] -qmp unix:./qmp-sock,server
-#
-# Run the shell:
-#
-# $ qmp-shell ./qmp-sock
-#
-# Commands have the following format:
-#
-#    < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
-#
-# For example:
-#
-# (QEMU) device_add driver=e1000 id=net1
-# {u'return': {}}
-# (QEMU)
-#
-# key=value pairs also support Python or JSON object literal subset notations,
-# without spaces. Dictionaries/objects {} are supported as are arrays [].
-#
-#    example-command arg-name1={'key':'value','obj'={'prop':"value"}}
-#
-# Both JSON and Python formatting should work, including both styles of
-# string literal quotes. Both paradigms of literal values should work,
-# including null/true/false for JSON and None/True/False for Python.
-#
-#
-# Transactions have the following multi-line format:
-#
-#    transaction(
-#    action-name1 [ arg-name1=arg1 ] ... [arg-nameN=argN ]
-#    ...
-#    action-nameN [ arg-name1=arg1 ] ... [arg-nameN=argN ]
-#    )
-#
-# One line transactions are also supported:
-#
-#    transaction( action-name1 ... )
-#
-# For example:
-#
-#     (QEMU) transaction(
-#     TRANS> block-dirty-bitmap-add node=drive0 name=bitmap1
-#     TRANS> block-dirty-bitmap-clear node=drive0 name=bitmap0
-#     TRANS> )
-#     {"return": {}}
-#     (QEMU)
-#
-# Use the -v and -p options to activate the verbose and pretty-print options,
-# which will echo back the properly formatted JSON-compliant QMP that is being
-# sent to QEMU, which is useful for debugging and documentation generation.
 
-import json
-import ast
-import readline
-import sys
 import os
-import errno
-import atexit
-import re
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu import qmp
-
-class QMPCompleter(list):
-    def complete(self, text, state):
-        for cmd in self:
-            if cmd.startswith(text):
-                if not state:
-                    return cmd
-                else:
-                    state -= 1
-
-class QMPShellError(Exception):
-    pass
-
-class QMPShellBadPort(QMPShellError):
-    pass
-
-class FuzzyJSON(ast.NodeTransformer):
-    '''This extension of ast.NodeTransformer filters literal "true/false/null"
-    values in an AST and replaces them by proper "True/False/None" values that
-    Python can properly evaluate.'''
-    def visit_Name(self, node):
-        if node.id == 'true':
-            node.id = 'True'
-        if node.id == 'false':
-            node.id = 'False'
-        if node.id == 'null':
-            node.id = 'None'
-        return node
-
-# TODO: QMPShell's interface is a bit ugly (eg. _fill_completion() and
-#       _execute_cmd()). Let's design a better one.
-class QMPShell(qmp.QEMUMonitorProtocol):
-    def __init__(self, address, pretty=False):
-        super(QMPShell, self).__init__(self.__get_address(address))
-        self._greeting = None
-        self._completer = None
-        self._pretty = pretty
-        self._transmode = False
-        self._actions = list()
-        self._histfile = os.path.join(os.path.expanduser('~'),
-                                      '.qmp-shell_history')
-
-    def __get_address(self, arg):
-        """
-        Figure out if the argument is in the port:host form, if it's not it's
-        probably a file path.
-        """
-        addr = arg.split(':')
-        if len(addr) == 2:
-            try:
-                port = int(addr[1])
-            except ValueError:
-                raise QMPShellBadPort
-            return ( addr[0], port )
-        # socket path
-        return arg
-
-    def _fill_completion(self):
-        cmds = self.cmd('query-commands')
-        if 'error' in cmds:
-            return
-        for cmd in cmds['return']:
-            self._completer.append(cmd['name'])
-
-    def __completer_setup(self):
-        self._completer = QMPCompleter()
-        self._fill_completion()
-        readline.set_history_length(1024)
-        readline.set_completer(self._completer.complete)
-        readline.parse_and_bind("tab: complete")
-        # XXX: default delimiters conflict with some command names (eg. query-),
-        # clearing everything as it doesn't seem to matter
-        readline.set_completer_delims('')
-        try:
-            readline.read_history_file(self._histfile)
-        except Exception as e:
-            if isinstance(e, IOError) and e.errno == errno.ENOENT:
-                # File not found. No problem.
-                pass
-            else:
-                print("Failed to read history '%s'; %s" % (self._histfile, e))
-        atexit.register(self.__save_history)
-
-    def __save_history(self):
-        try:
-            readline.write_history_file(self._histfile)
-        except Exception as e:
-            print("Failed to save history file '%s'; %s" % (self._histfile, e))
-
-    def __parse_value(self, val):
-        try:
-            return int(val)
-        except ValueError:
-            pass
-
-        if val.lower() == 'true':
-            return True
-        if val.lower() == 'false':
-            return False
-        if val.startswith(('{', '[')):
-            # Try first as pure JSON:
-            try:
-                return json.loads(val)
-            except ValueError:
-                pass
-            # Try once again as FuzzyJSON:
-            try:
-                st = ast.parse(val, mode='eval')
-                return ast.literal_eval(FuzzyJSON().visit(st))
-            except SyntaxError:
-                pass
-            except ValueError:
-                pass
-        return val
-
-    def __cli_expr(self, tokens, parent):
-        for arg in tokens:
-            (key, sep, val) = arg.partition('=')
-            if sep != '=':
-                raise QMPShellError("Expected a key=value pair, got '%s'" % arg)
-
-            value = self.__parse_value(val)
-            optpath = key.split('.')
-            curpath = []
-            for p in optpath[:-1]:
-                curpath.append(p)
-                d = parent.get(p, {})
-                if type(d) is not dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                parent[p] = d
-                parent = d
-            if optpath[-1] in parent:
-                if type(parent[optpath[-1]]) is dict:
-                    raise QMPShellError('Cannot use "%s" as both leaf and non-leaf key' % '.'.join(curpath))
-                else:
-                    raise QMPShellError('Cannot set "%s" multiple times' % key)
-            parent[optpath[-1]] = value
-
-    def __build_cmd(self, cmdline):
-        """
-        Build a QMP input object from a user provided command-line in the
-        following format:
-
-            < command-name > [ arg-name1=arg1 ] ... [ arg-nameN=argN ]
-        """
-        cmdargs = re.findall(r'''(?:[^\s"']|"(?:\\.|[^"])*"|'(?:\\.|[^'])*')+''', cmdline)
-
-        # Transactional CLI entry/exit:
-        if cmdargs[0] == 'transaction(':
-            self._transmode = True
-            cmdargs.pop(0)
-        elif cmdargs[0] == ')' and self._transmode:
-            self._transmode = False
-            if len(cmdargs) > 1:
-                raise QMPShellError("Unexpected input after close of Transaction sub-shell")
-            qmpcmd = { 'execute': 'transaction',
-                       'arguments': { 'actions': self._actions } }
-            self._actions = list()
-            return qmpcmd
-
-        # Nothing to process?
-        if not cmdargs:
-            return None
-
-        # Parse and then cache this Transactional Action
-        if self._transmode:
-            finalize = False
-            action = { 'type': cmdargs[0], 'data': {} }
-            if cmdargs[-1] == ')':
-                cmdargs.pop(-1)
-                finalize = True
-            self.__cli_expr(cmdargs[1:], action['data'])
-            self._actions.append(action)
-            return self.__build_cmd(')') if finalize else None
-
-        # Standard command: parse and return it to be executed.
-        qmpcmd = { 'execute': cmdargs[0], 'arguments': {} }
-        self.__cli_expr(cmdargs[1:], qmpcmd['arguments'])
-        return qmpcmd
-
-    def _print(self, qmp):
-        indent = None
-        if self._pretty:
-            indent = 4
-        jsobj = json.dumps(qmp, indent=indent, sort_keys=self._pretty)
-        print(str(jsobj))
-
-    def _execute_cmd(self, cmdline):
-        try:
-            qmpcmd = self.__build_cmd(cmdline)
-        except Exception as e:
-            print('Error while parsing command line: %s' % e)
-            print('command format: <command-name> ', end=' ')
-            print('[arg-name1=arg1] ... [arg-nameN=argN]')
-            return True
-        # For transaction mode, we may have just cached the action:
-        if qmpcmd is None:
-            return True
-        if self._verbose:
-            self._print(qmpcmd)
-        resp = self.cmd_obj(qmpcmd)
-        if resp is None:
-            print('Disconnected')
-            return False
-        self._print(resp)
-        return True
-
-    def connect(self, negotiate):
-        self._greeting = super(QMPShell, self).connect(negotiate)
-        self.__completer_setup()
-
-    def show_banner(self, msg='Welcome to the QMP low-level shell!'):
-        print(msg)
-        if not self._greeting:
-            print('Connected')
-            return
-        version = self._greeting['QMP']['version']['qemu']
-        print('Connected to QEMU %d.%d.%d\n' % (version['major'],version['minor'],version['micro']))
-
-    def get_prompt(self):
-        if self._transmode:
-            return "TRANS> "
-        return "(QEMU) "
-
-    def read_exec_command(self, prompt):
-        """
-        Read and execute a command.
-
-        @return True if execution was ok, return False if disconnected.
-        """
-        try:
-            cmdline = input(prompt)
-        except EOFError:
-            print()
-            return False
-        if cmdline == '':
-            for ev in self.get_events():
-                print(ev)
-            self.clear_events()
-            return True
-        else:
-            return self._execute_cmd(cmdline)
-
-    def set_verbosity(self, verbose):
-        self._verbose = verbose
-
-class HMPShell(QMPShell):
-    def __init__(self, address):
-        QMPShell.__init__(self, address)
-        self.__cpu_index = 0
-
-    def __cmd_completion(self):
-        for cmd in self.__cmd_passthrough('help')['return'].split('\r\n'):
-            if cmd and cmd[0] != '[' and cmd[0] != '\t':
-                name = cmd.split()[0] # drop help text
-                if name == 'info':
-                    continue
-                if name.find('|') != -1:
-                    # Command in the form 'foobar|f' or 'f|foobar', take the
-                    # full name
-                    opt = name.split('|')
-                    if len(opt[0]) == 1:
-                        name = opt[1]
-                    else:
-                        name = opt[0]
-                self._completer.append(name)
-                self._completer.append('help ' + name) # help completion
-
-    def __info_completion(self):
-        for cmd in self.__cmd_passthrough('info')['return'].split('\r\n'):
-            if cmd:
-                self._completer.append('info ' + cmd.split()[1])
-
-    def __other_completion(self):
-        # special cases
-        self._completer.append('help info')
-
-    def _fill_completion(self):
-        self.__cmd_completion()
-        self.__info_completion()
-        self.__other_completion()
-
-    def __cmd_passthrough(self, cmdline, cpu_index = 0):
-        return self.cmd_obj({ 'execute': 'human-monitor-command', 'arguments':
-                              { 'command-line': cmdline,
-                                'cpu-index': cpu_index } })
-
-    def _execute_cmd(self, cmdline):
-        if cmdline.split()[0] == "cpu":
-            # trap the cpu command, it requires special setting
-            try:
-                idx = int(cmdline.split()[1])
-                if not 'return' in self.__cmd_passthrough('info version', idx):
-                    print('bad CPU index')
-                    return True
-                self.__cpu_index = idx
-            except ValueError:
-                print('cpu command takes an integer argument')
-                return True
-        resp = self.__cmd_passthrough(cmdline, self.__cpu_index)
-        if resp is None:
-            print('Disconnected')
-            return False
-        assert 'return' in resp or 'error' in resp
-        if 'return' in resp:
-            # Success
-            if len(resp['return']) > 0:
-                print(resp['return'], end=' ')
-        else:
-            # Error
-            print('%s: %s' % (resp['error']['class'], resp['error']['desc']))
-        return True
-
-    def show_banner(self):
-        QMPShell.show_banner(self, msg='Welcome to the HMP shell!')
-
-def die(msg):
-    sys.stderr.write('ERROR: %s\n' % msg)
-    sys.exit(1)
-
-def fail_cmdline(option=None):
-    if option:
-        sys.stderr.write('ERROR: bad command-line option \'%s\'\n' % option)
-    sys.stderr.write('qmp-shell [ -v ] [ -p ] [ -H ] [ -N ] < UNIX socket path> | < TCP address:port >\n')
-    sys.stderr.write('    -v     Verbose (echo command sent and received)\n')
-    sys.stderr.write('    -p     Pretty-print JSON\n')
-    sys.stderr.write('    -H     Use HMP interface\n')
-    sys.stderr.write('    -N     Skip negotiate (for qemu-ga)\n')
-    sys.exit(1)
-
-def main():
-    addr = ''
-    qemu = None
-    hmp = False
-    pretty = False
-    verbose = False
-    negotiate = True
-
-    try:
-        for arg in sys.argv[1:]:
-            if arg == "-H":
-                if qemu is not None:
-                    fail_cmdline(arg)
-                hmp = True
-            elif arg == "-p":
-                pretty = True
-            elif arg == "-N":
-                negotiate = False
-            elif arg == "-v":
-                verbose = True
-            else:
-                if qemu is not None:
-                    fail_cmdline(arg)
-                if hmp:
-                    qemu = HMPShell(arg)
-                else:
-                    qemu = QMPShell(arg, pretty)
-                addr = arg
-
-        if qemu is None:
-            fail_cmdline()
-    except QMPShellBadPort:
-        die('bad port number in command-line')
-
-    try:
-        qemu.connect(negotiate)
-    except qmp.QMPConnectError:
-        die('Didn\'t get QMP greeting message')
-    except qmp.QMPCapabilitiesError:
-        die('Could not negotiate capabilities')
-    except qemu.error:
-        die('Could not connect to %s' % addr)
+from qemu.qmp import qmp_shell
 
-    qemu.show_banner()
-    qemu.set_verbosity(verbose)
-    while qemu.read_exec_command(qemu.get_prompt()):
-        pass
-    qemu.close()
 
 if __name__ == '__main__':
-    main()
+    qmp_shell.main()
diff --git a/scripts/qmp/qom-fuse b/scripts/qmp/qom-fuse
index 7c7cff8edfb..a58c8ef9793 100755
--- a/scripts/qmp/qom-fuse
+++ b/scripts/qmp/qom-fuse
@@ -1,147 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2012
-# Copyright (C) 2020 Red Hat, Inc.
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#  Markus Armbruster <armbru@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import fuse, stat
-from fuse import FUSE, FuseOSError, Operations
-import os, posix, sys
-from errno import *
+import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
+from qemu.qmp.qom_fuse import QOMFuse
 
-fuse.fuse_python_api = (0, 2)
-
-class QOMFS(Operations):
-    def __init__(self, qmp):
-        self.qmp = qmp
-        self.qmp.connect()
-        self.ino_map = {}
-        self.ino_count = 1
-
-    def get_ino(self, path):
-        if path in self.ino_map:
-            return self.ino_map[path]
-        self.ino_map[path] = self.ino_count
-        self.ino_count += 1
-        return self.ino_map[path]
-
-    def is_object(self, path):
-        try:
-            items = self.qmp.command('qom-list', path=path)
-            return True
-        except:
-            return False
-
-    def is_property(self, path):
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            for item in self.qmp.command('qom-list', path=path):
-                if item['name'] == prop:
-                    return True
-            return False
-        except:
-            return False
-
-    def is_link(self, path):
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            for item in self.qmp.command('qom-list', path=path):
-                if item['name'] == prop:
-                    if item['type'].startswith('link<'):
-                        return True
-                    return False
-            return False
-        except:
-            return False
-
-    def read(self, path, length, offset, fh):
-        if not self.is_property(path):
-            return -ENOENT
-
-        path, prop = path.rsplit('/', 1)
-        if path == '':
-            path = '/'
-        try:
-            data = self.qmp.command('qom-get', path=path, property=prop)
-            data += '\n' # make values shell friendly
-        except:
-            raise FuseOSError(EPERM)
-
-        if offset > len(data):
-            return ''
-
-        return bytes(data[offset:][:length], encoding='utf-8')
-
-    def readlink(self, path):
-        if not self.is_link(path):
-            return False
-        path, prop = path.rsplit('/', 1)
-        prefix = '/'.join(['..'] * (len(path.split('/')) - 1))
-        return prefix + str(self.qmp.command('qom-get', path=path,
-                                             property=prop))
-
-    def getattr(self, path, fh=None):
-        if self.is_link(path):
-            value = { 'st_mode': 0o755 | stat.S_IFLNK,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 2,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        elif self.is_object(path):
-            value = { 'st_mode': 0o755 | stat.S_IFDIR,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 2,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        elif self.is_property(path):
-            value = { 'st_mode': 0o644 | stat.S_IFREG,
-                      'st_ino': self.get_ino(path),
-                      'st_dev': 0,
-                      'st_nlink': 1,
-                      'st_uid': 1000,
-                      'st_gid': 1000,
-                      'st_size': 4096,
-                      'st_atime': 0,
-                      'st_mtime': 0,
-                      'st_ctime': 0 }
-        else:
-            raise FuseOSError(ENOENT)
-        return value
-
-    def readdir(self, path, fh):
-        yield '.'
-        yield '..'
-        for item in self.qmp.command('qom-list', path=path):
-            yield str(item['name'])
 
 if __name__ == '__main__':
-    import os
-
-    fuse = FUSE(QOMFS(QEMUMonitorProtocol(os.environ['QMP_SOCKET'])),
-                sys.argv[1], foreground=True)
+    sys.exit(QOMFuse.entry_point())
diff --git a/scripts/qmp/qom-get b/scripts/qmp/qom-get
index 666df718320..e4f3e0c0138 100755
--- a/scripts/qmp/qom-get
+++ b/scripts/qmp/qom-get
@@ -1,69 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] <path>.<property>
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-if len(args) > 0:
-    try:
-        path, prop = args[0].rsplit('.', 1)
-    except:
-        usage_error("invalid format for path/property/value")
-else:
-    usage_error("not enough arguments")
+from qemu.qmp.qom import QOMGet
 
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
 
-rsp = srv.command('qom-get', path=path, property=prop)
-if type(rsp) == dict:
-    for i in rsp.keys():
-        print('%s: %s' % (i, rsp[i]))
-else:
-    print(rsp)
+if __name__ == '__main__':
+    sys.exit(QOMGet.entry_point())
diff --git a/scripts/qmp/qom-list b/scripts/qmp/qom-list
index 5074fd939f4..7a071a54e1e 100755
--- a/scripts/qmp/qom-list
+++ b/scripts/qmp/qom-list
@@ -1,66 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] [<path>]
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
+from qemu.qmp.qom import QOMList
 
-if len(args) == 0:
-    print('/')
-    sys.exit(0)
 
-for item in srv.command('qom-list', path=args[0]):
-    if item['type'].startswith('child<'):
-        print('%s/' % item['name'])
-    elif item['type'].startswith('link<'):
-        print('@%s/' % item['name'])
-    else:
-        print('%s' % item['name'])
+if __name__ == '__main__':
+    sys.exit(QOMList.entry_point())
diff --git a/scripts/qmp/qom-set b/scripts/qmp/qom-set
index 240a78187f9..9ca9e2ba106 100755
--- a/scripts/qmp/qom-set
+++ b/scripts/qmp/qom-set
@@ -1,66 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-#
-# Authors:
-#  Anthony Liguori   <aliguori@us.ibm.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-value = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] <path>.<property> <value>
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-if len(args) > 1:
-    try:
-        path, prop = args[0].rsplit('.', 1)
-    except:
-        usage_error("invalid format for path/property/value")
-    value = args[1]
-else:
-    usage_error("not enough arguments")
+from qemu.qmp.qom import QOMSet
 
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
 
-print(srv.command('qom-set', path=path, property=prop, value=value))
+if __name__ == '__main__':
+    sys.exit(QOMSet.entry_point())
diff --git a/scripts/qmp/qom-tree b/scripts/qmp/qom-tree
index 25b0781323c..7d0ccca3a4d 100755
--- a/scripts/qmp/qom-tree
+++ b/scripts/qmp/qom-tree
@@ -1,77 +1,11 @@
 #!/usr/bin/env python3
-##
-# QEMU Object Model test tools
-#
-# Copyright IBM, Corp. 2011
-# Copyright (c) 2013 SUSE LINUX Products GmbH
-#
-# Authors:
-#  Anthony Liguori   <aliguori@amazon.com>
-#  Andreas Faerber   <afaerber@suse.de>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later.  See
-# the COPYING file in the top-level directory.
-##
 
-import sys
 import os
+import sys
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.qmp import QEMUMonitorProtocol
-
-cmd, args = sys.argv[0], sys.argv[1:]
-socket_path = None
-path = None
-prop = None
-
-def usage():
-    return '''environment variables:
-    QMP_SOCKET=<path | addr:port>
-usage:
-    %s [-h] [-s <QMP socket path | addr:port>] [<path>]
-''' % cmd
-
-def usage_error(error_msg = "unspecified error"):
-    sys.stderr.write('%s\nERROR: %s\n' % (usage(), error_msg))
-    exit(1)
-
-if len(args) > 0:
-    if args[0] == "-h":
-        print(usage())
-        exit(0);
-    elif args[0] == "-s":
-        try:
-            socket_path = args[1]
-        except:
-            usage_error("missing argument: QMP socket path or address");
-        args = args[2:]
-
-if not socket_path:
-    if 'QMP_SOCKET' in os.environ:
-        socket_path = os.environ['QMP_SOCKET']
-    else:
-        usage_error("no QMP socket path or address given");
-
-srv = QEMUMonitorProtocol(socket_path)
-srv.connect()
-
-def list_node(path):
-    print('%s' % path)
-    items = srv.command('qom-list', path=path)
-    for item in items:
-        if not item['type'].startswith('child<'):
-            try:
-                print('  %s: %s (%s)' % (item['name'], srv.command('qom-get', path=path, property=item['name']), item['type']))
-            except:
-                print('  %s: <EXCEPTION> (%s)' % (item['name'], item['type']))
-    print('')
-    for item in items:
-        if item['type'].startswith('child<'):
-            list_node((path if (path != '/') else '')  + '/' + item['name'])
+from qemu.qmp.qom import QOMTree
 
-if len(args) == 0:
-    path = '/'
-else:
-    path = args[0]
 
-list_node(path)
+if __name__ == '__main__':
+    sys.exit(QOMTree.entry_point())
diff --git a/scripts/show-fixed-bugs.sh b/scripts/show-fixed-bugs.sh
deleted file mode 100755
index a095a4d6ba4..00000000000
--- a/scripts/show-fixed-bugs.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/sh
-
-# This script checks the git log for URLs to the QEMU launchpad bugtracker
-# and optionally checks whether the corresponding bugs are not closed yet.
-
-show_help () {
-    echo "Usage:"
-    echo "  -s <commit>  : Start searching at this commit"
-    echo "  -e <commit>  : End searching at this commit"
-    echo "  -c           : Check if bugs are still open"
-    echo "  -b           : Open bugs in browser"
-}
-
-while getopts "s:e:cbh" opt; do
-   case "$opt" in
-    s)  start="$OPTARG" ;;
-    e)  end="$OPTARG" ;;
-    c)  check_if_open=1 ;;
-    b)  show_in_browser=1 ;;
-    h)  show_help ; exit 0 ;;
-    *)   echo "Use -h for help." ; exit 1 ;;
-   esac
-done
-
-if [ "x$start" = "x" ]; then
-    start=$(git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1)
-fi
-if [ "x$end" = "x" ]; then
-    end=$(git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1)
-fi
-
-if [ "x$start" = "x" ] || [ "x$end" = "x" ]; then
-    echo "Could not determine start or end revision ... Please note that this"
-    echo "script must be run from a checked out git repository of QEMU."
-    exit 1
-fi
-
-echo "Searching git log for bugs in the range $start..$end"
-
-urlstr='https://bugs.launchpad.net/\(bugs\|qemu/+bug\)/'
-bug_urls=$(git log $start..$end \
-  | sed -n '\,'"$urlstr"', s,\(.*\)\('"$urlstr"'\)\([0-9]*\).*,\2\4,p' \
-  | sort -u)
-
-echo Found bug URLs:
-for i in $bug_urls ; do echo " $i" ; done
-
-if [ "x$check_if_open" = "x1" ]; then
-    echo
-    echo "Checking which ones are still open..."
-    for i in $bug_urls ; do
-        if ! curl -s -L "$i" | grep "value status" | grep -q "Fix Released" ; then
-            echo " $i"
-            final_bug_urls="$final_bug_urls $i"
-        fi
-    done
-else
-    final_bug_urls=$bug_urls
-fi
-
-if [ "x$final_bug_urls" = "x" ]; then
-    echo "No open bugs found."
-elif [ "x$show_in_browser" = "x1" ]; then
-    # Try to determine which browser we should use
-    if [ "x$BROWSER" != "x" ]; then
-        bugbrowser="$BROWSER"
-    elif command -v xdg-open >/dev/null 2>&1; then
-        bugbrowser=xdg-open
-    elif command -v gnome-open >/dev/null 2>&1; then
-        bugbrowser=gnome-open
-    elif [ "$(uname)" = "Darwin" ]; then
-        bugbrowser=open
-    elif command -v sensible-browser >/dev/null 2>&1; then
-        bugbrowser=sensible-browser
-    else
-        echo "Please set the BROWSER variable to the browser of your choice."
-        exit 1
-    fi
-    # Now show the bugs in the browser
-    first=1
-    for i in $final_bug_urls; do
-        "$bugbrowser" "$i"
-        if [ $first = 1 ]; then
-            # if it is the first entry, give the browser some time to start
-            # (to avoid messages like "Firefox is already running, but is
-            # not responding...")
-            sleep 4
-            first=0
-        fi
-    done
-fi
diff --git a/scripts/simplebench/bench-backup.py b/scripts/simplebench/bench-backup.py
index 33a1ecfefab..5a0675c593c 100755
--- a/scripts/simplebench/bench-backup.py
+++ b/scripts/simplebench/bench-backup.py
@@ -23,7 +23,7 @@
 
 import simplebench
 from results_to_text import results_to_text
-from bench_block_job import bench_block_copy, drv_file, drv_nbd
+from bench_block_job import bench_block_copy, drv_file, drv_nbd, drv_qcow2
 
 
 def bench_func(env, case):
@@ -37,29 +37,56 @@ def bench_func(env, case):
 def bench(args):
     test_cases = []
 
-    sources = {}
-    targets = {}
-    for d in args.dir:
-        label, path = d.split(':')  # paths with colon not supported
-        sources[label] = drv_file(path + '/test-source')
-        targets[label] = drv_file(path + '/test-target')
+    # paths with colon not supported, so we just split by ':'
+    dirs = dict(d.split(':') for d in args.dir)
 
+    nbd_drv = None
     if args.nbd:
         nbd = args.nbd.split(':')
         host = nbd[0]
         port = '10809' if len(nbd) == 1 else nbd[1]
-        drv = drv_nbd(host, port)
-        sources['nbd'] = drv
-        targets['nbd'] = drv
+        nbd_drv = drv_nbd(host, port)
 
     for t in args.test:
         src, dst = t.split(':')
 
-        test_cases.append({
-            'id': t,
-            'source': sources[src],
-            'target': targets[dst]
-        })
+        if src == 'nbd' and dst == 'nbd':
+            raise ValueError("Can't use 'nbd' label for both src and dst")
+
+        if (src == 'nbd' or dst == 'nbd') and not nbd_drv:
+            raise ValueError("'nbd' label used but --nbd is not given")
+
+        if src == 'nbd':
+            source = nbd_drv
+        elif args.qcow2_sources:
+            source = drv_qcow2(drv_file(dirs[src] + '/test-source.qcow2'))
+        else:
+            source = drv_file(dirs[src] + '/test-source')
+
+        if dst == 'nbd':
+            test_cases.append({'id': t, 'source': source, 'target': nbd_drv})
+            continue
+
+        if args.target_cache == 'both':
+            target_caches = ['direct', 'cached']
+        else:
+            target_caches = [args.target_cache]
+
+        for c in target_caches:
+            o_direct = c == 'direct'
+            fname = dirs[dst] + '/test-target'
+            if args.compressed:
+                fname += '.qcow2'
+            target = drv_file(fname, o_direct=o_direct)
+            if args.compressed:
+                target = drv_qcow2(target)
+
+            test_id = t
+            if args.target_cache == 'both':
+                test_id += f'({c})'
+
+            test_cases.append({'id': test_id, 'source': source,
+                               'target': target})
 
     binaries = []  # list of (<label>, <path>, [<options>])
     for i, q in enumerate(args.env):
@@ -106,6 +133,13 @@ def bench(args):
             elif opt.startswith('max-workers='):
                 x_perf['max-workers'] = int(opt.split('=')[1])
 
+        backup_options = {}
+        if x_perf:
+            backup_options['x-perf'] = x_perf
+
+        if args.compressed:
+            backup_options['compress'] = True
+
         if is_mirror:
             assert not x_perf
             test_envs.append({
@@ -117,11 +151,13 @@ def bench(args):
             test_envs.append({
                 'id': f'backup({label})\n' + '\n'.join(opts),
                 'cmd': 'blockdev-backup',
-                'cmd-options': {'x-perf': x_perf} if x_perf else {},
+                'cmd-options': backup_options,
                 'qemu-binary': path
             })
 
-    result = simplebench.bench(bench_func, test_envs, test_cases, count=3)
+    result = simplebench.bench(bench_func, test_envs, test_cases,
+                               count=args.count, initial_run=args.initial_run,
+                               drop_caches=args.drop_caches)
     with open('results.json', 'w') as f:
         json.dump(result, f, indent=4)
     print(results_to_text(result))
@@ -163,5 +199,30 @@ def __call__(self, parser, namespace, values, option_string=None):
     p.add_argument('--test', nargs='+', help='''\
 Tests, in form source-dir-label:target-dir-label''',
                    action=ExtendAction)
+    p.add_argument('--compressed', help='''\
+Use compressed backup. It automatically means
+automatically creating qcow2 target with
+lazy_refcounts for each test run''', action='store_true')
+    p.add_argument('--qcow2-sources', help='''\
+Use test-source.qcow2 images as sources instead of
+test-source raw images''', action='store_true')
+    p.add_argument('--target-cache', help='''\
+Setup cache for target nodes. Options:
+   direct: default, use O_DIRECT and aio=native
+   cached: use system cache (Qemu default) and aio=threads (Qemu default)
+   both: generate two test cases for each src:dst pair''',
+                   default='direct', choices=('direct', 'cached', 'both'))
+
+    p.add_argument('--count', type=int, default=3, help='''\
+Number of test runs per table cell''')
+
+    # BooleanOptionalAction helps to support --no-initial-run option
+    p.add_argument('--initial-run', action=argparse.BooleanOptionalAction,
+                   help='''\
+Do additional initial run per cell which doesn't count in result,
+default true''')
+
+    p.add_argument('--drop-caches', action='store_true', help='''\
+Do "sync; echo 3 > /proc/sys/vm/drop_caches" before each test run''')
 
     bench(p.parse_args())
diff --git a/scripts/simplebench/bench_block_job.py b/scripts/simplebench/bench_block_job.py
index 7332845c1c9..a403c35b08f 100755
--- a/scripts/simplebench/bench_block_job.py
+++ b/scripts/simplebench/bench_block_job.py
@@ -21,12 +21,14 @@
 
 import sys
 import os
+import subprocess
 import socket
 import json
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
 from qemu.qmp import QMPConnectError
+from qemu.aqmp import ConnectError
 
 
 def bench_block_job(cmd, cmd_args, qemu_args):
@@ -48,7 +50,7 @@ def bench_block_job(cmd, cmd_args, qemu_args):
         vm.launch()
     except OSError as e:
         return {'error': 'popen failed: ' + str(e)}
-    except (QMPConnectError, socket.timeout):
+    except (QMPConnectError, ConnectError, socket.timeout):
         return {'error': 'qemu failed: ' + str(vm.get_log())}
 
     try:
@@ -69,6 +71,10 @@ def bench_block_job(cmd, cmd_args, qemu_args):
             vm.shutdown()
             return {'error': 'block-job failed: ' + str(e),
                     'vm-log': vm.get_log()}
+        if 'error' in e['data']:
+            vm.shutdown()
+            return {'error': 'block-job failed: ' + e['data']['error'],
+                    'vm-log': vm.get_log()}
         end_ms = e['timestamp']['seconds'] * 1000000 + \
             e['timestamp']['microseconds']
     finally:
@@ -77,11 +83,34 @@ def bench_block_job(cmd, cmd_args, qemu_args):
     return {'seconds': (end_ms - start_ms) / 1000000.0}
 
 
+def get_image_size(path):
+    out = subprocess.run(['qemu-img', 'info', '--out=json', path],
+                         stdout=subprocess.PIPE, check=True).stdout
+    return json.loads(out)['virtual-size']
+
+
+def get_blockdev_size(obj):
+    img = obj['filename'] if 'filename' in obj else obj['file']['filename']
+    return get_image_size(img)
+
+
 # Bench backup or mirror
 def bench_block_copy(qemu_binary, cmd, cmd_options, source, target):
     """Helper to run bench_block_job() for mirror or backup"""
     assert cmd in ('blockdev-backup', 'blockdev-mirror')
 
+    if target['driver'] == 'qcow2':
+        try:
+            os.remove(target['file']['filename'])
+        except OSError:
+            pass
+
+        subprocess.run(['qemu-img', 'create', '-f', 'qcow2',
+                        target['file']['filename'],
+                        str(get_blockdev_size(source))],
+                       stdout=subprocess.DEVNULL,
+                       stderr=subprocess.DEVNULL, check=True)
+
     source['node-name'] = 'source'
     target['node-name'] = 'target'
 
@@ -96,9 +125,13 @@ def bench_block_copy(qemu_binary, cmd, cmd_options, source, target):
                             '-blockdev', json.dumps(target)])
 
 
-def drv_file(filename):
-    return {'driver': 'file', 'filename': filename,
-            'cache': {'direct': True}, 'aio': 'native'}
+def drv_file(filename, o_direct=True):
+    node = {'driver': 'file', 'filename': filename}
+    if o_direct:
+        node['cache'] = {'direct': True}
+        node['aio'] = 'native'
+
+    return node
 
 
 def drv_nbd(host, port):
@@ -106,6 +139,10 @@ def drv_nbd(host, port):
             'server': {'type': 'inet', 'host': host, 'port': port}}
 
 
+def drv_qcow2(file):
+    return {'driver': 'qcow2', 'file': file}
+
+
 if __name__ == '__main__':
     import sys
 
diff --git a/scripts/simplebench/img_bench_templater.py b/scripts/simplebench/img_bench_templater.py
new file mode 100755
index 00000000000..f8e1540ada4
--- /dev/null
+++ b/scripts/simplebench/img_bench_templater.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+#
+# Process img-bench test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+
+import sys
+import subprocess
+import re
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from table_templater import Templater
+
+
+def bench_func(env, case):
+    test = templater.gen(env['data'], case['data'])
+
+    p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
+                       stderr=subprocess.STDOUT, universal_newlines=True)
+
+    if p.returncode == 0:
+        try:
+            m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
+            return {'seconds': float(m.group(1))}
+        except Exception:
+            return {'error': f'failed to parse qemu-img output: {p.stdout}'}
+    else:
+        return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        print("""
+Usage: img_bench_templater.py < path/to/test-template.sh
+
+This script generates performance tests from a test template (example below),
+runs them, and displays the results in a table. The template is read from
+stdin.  It must be written in bash and end with a `qemu-img bench` invocation
+(whose result is parsed to get the test instance’s result).
+
+Use the following syntax in the template to create the various different test
+instances:
+
+  column templating: {var1|var2|...} - test will use different values in
+  different columns. You may use several {} constructions in the test, in this
+  case product of all choice-sets will be used.
+
+  row templating: [var1|var2|...] - similar thing to define rows (test-cases)
+
+Test template example:
+
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
+qemu-img-new in your build directory in two test-cases with 4K writes and 64K
+writes. The template may look like this:
+
+qemu_img=/path/to/qemu/build/qemu-img-{old|new}
+$qemu_img create -f qcow2 /ssd/x.qcow2 1G
+$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2
+
+When passing this to stdin of img_bench_templater.py, the resulting comparison
+table will contain two columns (for two binaries) and two rows (for two
+test-cases).
+
+In addition to displaying the results, script also stores results in JSON
+format into results.json file in current directory.
+""")
+        sys.exit()
+
+    templater = Templater(sys.stdin.read())
+
+    envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns]
+    cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows]
+
+    result = simplebench.bench(bench_func, envs, cases, count=5,
+                               initial_run=False)
+    print(results_to_text(result))
+    with open('results.json', 'w') as f:
+        json.dump(result, f, indent=4)
diff --git a/scripts/simplebench/simplebench.py b/scripts/simplebench/simplebench.py
index f61513af902..8efca2af988 100644
--- a/scripts/simplebench/simplebench.py
+++ b/scripts/simplebench/simplebench.py
@@ -19,9 +19,17 @@
 #
 
 import statistics
+import subprocess
+import time
 
 
-def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
+def do_drop_caches():
+    subprocess.run('sync; echo 3 > /proc/sys/vm/drop_caches', shell=True,
+                   check=True)
+
+
+def bench_one(test_func, test_env, test_case, count=5, initial_run=True,
+              slow_limit=100, drop_caches=False):
     """Benchmark one test-case
 
     test_func   -- benchmarking function with prototype
@@ -36,6 +44,9 @@ def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
     test_case   -- test case - opaque second argument for test_func
     count       -- how many times to call test_func, to calculate average
     initial_run -- do initial run of test_func, which don't get into result
+    slow_limit  -- stop at slow run (that exceedes the slow_limit by seconds).
+                   (initial run is not measured)
+    drop_caches -- drop caches before each run
 
     Returns dict with the following fields:
         'runs':     list of test_func results
@@ -49,15 +60,25 @@ def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
     """
     if initial_run:
         print('  #initial run:')
+        do_drop_caches()
         print('   ', test_func(test_env, test_case))
 
     runs = []
     for i in range(count):
+        t = time.time()
+
         print('  #run {}'.format(i+1))
+        do_drop_caches()
         res = test_func(test_env, test_case)
         print('   ', res)
         runs.append(res)
 
+        if time.time() - t > slow_limit:
+            print('    - run is too slow, stop here')
+            break
+
+    count = len(runs)
+
     result = {'runs': runs}
 
     succeeded = [r for r in runs if ('seconds' in r or 'iops' in r)]
@@ -71,7 +92,10 @@ def bench_one(test_func, test_env, test_case, count=5, initial_run=True):
             dim = 'seconds'
         result['dimension'] = dim
         result['average'] = statistics.mean(r[dim] for r in succeeded)
-        result['stdev'] = statistics.stdev(r[dim] for r in succeeded)
+        if len(succeeded) == 1:
+            result['stdev'] = 0
+        else:
+            result['stdev'] = statistics.stdev(r[dim] for r in succeeded)
 
     if len(succeeded) < count:
         result['n-failed'] = count - len(succeeded)
diff --git a/scripts/simplebench/table_templater.py b/scripts/simplebench/table_templater.py
new file mode 100644
index 00000000000..950f3b30247
--- /dev/null
+++ b/scripts/simplebench/table_templater.py
@@ -0,0 +1,62 @@
+# Parser for test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import itertools
+from lark import Lark
+
+grammar = """
+start: ( text | column_switch | row_switch )+
+
+column_switch: "{" text ["|" text]+ "}"
+row_switch: "[" text ["|" text]+ "]"
+text: /[^|{}\[\]]+/
+"""
+
+parser = Lark(grammar)
+
+class Templater:
+    def __init__(self, template):
+        self.tree = parser.parse(template)
+
+        c_switches = []
+        r_switches = []
+        for x in self.tree.children:
+            if x.data == 'column_switch':
+                c_switches.append([el.children[0].value for el in x.children])
+            elif x.data == 'row_switch':
+                r_switches.append([el.children[0].value for el in x.children])
+
+        self.columns = list(itertools.product(*c_switches))
+        self.rows = list(itertools.product(*r_switches))
+
+    def gen(self, column, row):
+        i = 0
+        j = 0
+        result = []
+
+        for x in self.tree.children:
+            if x.data == 'text':
+                result.append(x.children[0].value)
+            elif x.data == 'column_switch':
+                result.append(column[i])
+                i += 1
+            elif x.data == 'row_switch':
+                result.append(row[j])
+                j += 1
+
+        return ''.join(result)
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index d61fb0bd870..1f6d1ae1f3e 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -7,7 +7,7 @@
 # This work is licensed under the terms of the GNU GPL, version 2.  See
 # the COPYING file in the top-level directory.
 #
-# For help see docs/devel/tracing.txt
+# For help see docs/devel/tracing.rst
 
 import struct
 import inspect
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 1050e361694..fea4d6eb655 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -34,6 +34,7 @@ cp_portable() {
     if
         grep '#include' "$f" | grep -v -e 'linux/virtio' \
                                      -e 'linux/types' \
+                                     -e 'linux/ioctl' \
                                      -e 'stdint' \
                                      -e 'linux/if_ether' \
                                      -e 'input-event-codes' \
@@ -66,6 +67,7 @@ cp_portable() {
         -e 's/__BITS_PER_LONG/HOST_LONG_BITS/' \
         -e '/\"drm.h\"/d' \
         -e '/sys\/ioctl.h/d' \
+        -e '/linux\/ioctl.h/d' \
         -e 's/SW_MAX/SW_MAX_/' \
         -e 's/atomic_t/int/' \
         -e 's/__kernel_long_t/long/' \
@@ -190,6 +192,7 @@ for i in "$tmpdir"/include/linux/*virtio*.h \
          "$tmpdir/include/linux/fuse.h" \
          "$tmpdir/include/linux/input.h" \
          "$tmpdir/include/linux/input-event-codes.h" \
+         "$tmpdir/include/linux/udmabuf.h" \
          "$tmpdir/include/linux/pci_regs.h" \
          "$tmpdir/include/linux/ethtool.h" \
          "$tmpdir/include/linux/const.h" \
diff --git a/scripts/update-mips-syscall-args.sh b/scripts/update-mips-syscall-args.sh
index 4f0dda4b83b..5a529b699eb 100755
--- a/scripts/update-mips-syscall-args.sh
+++ b/scripts/update-mips-syscall-args.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
 
-URL=https://raw.githubusercontent.com/strace/strace/master
+URL=https://raw.githubusercontent.com/strace/strace/master/src
 FILES="sysent.h sysent_shorthand_defs.h linux/mips/syscallent-compat.h \
-       linux/mips/syscallent-o32.h linux/syscallent-common-32.h \
-       linux/syscallent-common.h"
+       linux/mips/syscallent-o32.h linux/32/syscallent-common-32.h \
+       linux/generic/syscallent-common.h"
 
 output="$1"
 if [ "$output" = "" ] ; then
@@ -16,10 +16,11 @@ TMP=$(mktemp -d)
 cd $TMP
 
 for file in $FILES; do
-    curl -O $URL/$file
+    curl --create-dirs $URL/$file -o $TMP/$file
 done
 
-> subcall32.h
+> linux/generic/subcallent.h
+> linux/32/subcallent.h
 
 cat > gen_mips_o32.c <<EOF
 #include <stdio.h>
@@ -52,6 +53,6 @@ int main(void)
 }
 EOF
 
-cc -o gen_mips_o32 gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC"
+cc -o gen_mips_o32 -I linux/mips -I linux/generic gen_mips_o32.c && ./gen_mips_o32 > "$output/$INC"
 
 rm -fr "$TMP"
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 7b9389b47b5..f281daeced8 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -1044,10 +1044,7 @@ int main(int argc, char **argv)
         }
     }
 
-    if (qemu_init_main_loop(&local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
-    }
+    qemu_init_main_loop(&error_fatal);
 
     server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
                                          G_IO_IN,
@@ -1061,10 +1058,8 @@ int main(int argc, char **argv)
         }
     }
 
-    if ((daemonize || pidfile_specified) &&
-        !qemu_write_pidfile(pidfile, &local_err)) {
-        error_report_err(local_err);
-        exit(EXIT_FAILURE);
+    if (daemonize || pidfile_specified) {
+        qemu_write_pidfile(pidfile, &error_fatal);
     }
 
 #ifdef CONFIG_LIBCAP_NG
diff --git a/scsi/trace-events b/scsi/trace-events
index 6dbfeae790c..baf924fa89e 100644
--- a/scsi/trace-events
+++ b/scsi/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # pr-manager.c
 pr_manager_execute(int fd, int cmd, int sa) "fd=%d cmd=0x%02x service action=0x%02x"
diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
index 2ffc96b509f..3a62d47e1d5 100644
--- a/semihosting/arm-compat-semi.c
+++ b/semihosting/arm-compat-semi.c
@@ -33,18 +33,16 @@
 
 #include "qemu/osdep.h"
 
-#include "cpu.h"
 #include "semihosting/semihost.h"
 #include "semihosting/console.h"
 #include "semihosting/common-semi.h"
-#include "qemu/log.h"
 #include "qemu/timer.h"
+#include "exec/gdbstub.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 
 #define COMMON_SEMI_HEAP_SIZE (128 * 1024 * 1024)
 #else
-#include "exec/gdbstub.h"
 #include "qemu/cutils.h"
 #ifdef TARGET_ARM
 #include "hw/arm/boot.h"
@@ -763,7 +761,7 @@ static inline bool is_64bit_semihosting(CPUArchState *env)
 #if defined(TARGET_ARM)
     return is_a64(env);
 #elif defined(TARGET_RISCV)
-    return !riscv_cpu_is_32bit(env);
+    return riscv_cpu_mxl(env) != MXL_RV32;
 #else
 #error un-handled architecture
 #endif
diff --git a/semihosting/config.c b/semihosting/config.c
index 3548e0f627f..137171b717b 100644
--- a/semihosting/config.c
+++ b/semihosting/config.c
@@ -24,7 +24,6 @@
 #include "qemu/error-report.h"
 #include "semihosting/semihost.h"
 #include "chardev/char.h"
-#include "sysemu/sysemu.h"
 
 QemuOptsList qemu_semihosting_config_opts = {
     .name = "semihosting-config",
diff --git a/semihosting/console.c b/semihosting/console.c
index c9ebd6fdd05..ef6958d8445 100644
--- a/semihosting/console.c
+++ b/semihosting/console.c
@@ -16,7 +16,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "semihosting/semihost.h"
 #include "semihosting/console.h"
 #include "exec/gdbstub.h"
@@ -24,7 +23,6 @@
 #include "qemu/log.h"
 #include "chardev/char.h"
 #include "chardev/char-fe.h"
-#include "sysemu/sysemu.h"
 #include "qemu/main-loop.h"
 #include "qapi/error.h"
 #include "qemu/fifo8.h"
diff --git a/slirp b/slirp
index 8f43a99191a..a88d9ace234 160000
--- a/slirp
+++ b/slirp
@@ -1 +1 @@
-Subproject commit 8f43a99191afb47ca3f3c6972f6306209f367ece
+Subproject commit a88d9ace234a24ce1c17189642ef9104799425e0
diff --git a/softmmu/arch_init.c b/softmmu/arch_init.c
index 7fd5c09b2b9..8919405c7b2 100644
--- a/softmmu/arch_init.c
+++ b/softmmu/arch_init.c
@@ -22,16 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
-#include "cpu.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/arch_init.h"
-#include "hw/pci/pci.h"
-#include "hw/audio/soundhw.h"
-#include "qapi/error.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "hw/acpi/acpi.h"
-#include "qemu/help_option.h"
 
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
@@ -47,69 +38,4 @@ int graphic_height = 600;
 int graphic_depth = 32;
 #endif
 
-
-#if defined(TARGET_ALPHA)
-#define QEMU_ARCH QEMU_ARCH_ALPHA
-#elif defined(TARGET_ARM)
-#define QEMU_ARCH QEMU_ARCH_ARM
-#elif defined(TARGET_CRIS)
-#define QEMU_ARCH QEMU_ARCH_CRIS
-#elif defined(TARGET_HPPA)
-#define QEMU_ARCH QEMU_ARCH_HPPA
-#elif defined(TARGET_I386)
-#define QEMU_ARCH QEMU_ARCH_I386
-#elif defined(TARGET_LM32)
-#define QEMU_ARCH QEMU_ARCH_LM32
-#elif defined(TARGET_M68K)
-#define QEMU_ARCH QEMU_ARCH_M68K
-#elif defined(TARGET_MICROBLAZE)
-#define QEMU_ARCH QEMU_ARCH_MICROBLAZE
-#elif defined(TARGET_MIPS)
-#define QEMU_ARCH QEMU_ARCH_MIPS
-#elif defined(TARGET_MOXIE)
-#define QEMU_ARCH QEMU_ARCH_MOXIE
-#elif defined(TARGET_NIOS2)
-#define QEMU_ARCH QEMU_ARCH_NIOS2
-#elif defined(TARGET_OPENRISC)
-#define QEMU_ARCH QEMU_ARCH_OPENRISC
-#elif defined(TARGET_PPC)
-#define QEMU_ARCH QEMU_ARCH_PPC
-#elif defined(TARGET_RISCV)
-#define QEMU_ARCH QEMU_ARCH_RISCV
-#elif defined(TARGET_RX)
-#define QEMU_ARCH QEMU_ARCH_RX
-#elif defined(TARGET_S390X)
-#define QEMU_ARCH QEMU_ARCH_S390X
-#elif defined(TARGET_SH4)
-#define QEMU_ARCH QEMU_ARCH_SH4
-#elif defined(TARGET_SPARC)
-#define QEMU_ARCH QEMU_ARCH_SPARC
-#elif defined(TARGET_TRICORE)
-#define QEMU_ARCH QEMU_ARCH_TRICORE
-#elif defined(TARGET_UNICORE32)
-#define QEMU_ARCH QEMU_ARCH_UNICORE32
-#elif defined(TARGET_XTENSA)
-#define QEMU_ARCH QEMU_ARCH_XTENSA
-#elif defined(TARGET_AVR)
-#define QEMU_ARCH QEMU_ARCH_AVR
-#endif
-
 const uint32_t arch_type = QEMU_ARCH;
-
-int kvm_available(void)
-{
-#ifdef CONFIG_KVM
-    return 1;
-#else
-    return 0;
-#endif
-}
-
-int xen_available(void)
-{
-#ifdef CONFIG_XEN
-    return 1;
-#else
-    return 0;
-#endif
-}
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index a7ee431187a..99da7db54ef 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "monitor/monitor.h"
+#include "qemu/coroutine-tls.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
@@ -44,6 +45,7 @@
 #include "sysemu/whpx.h"
 #include "hw/boards.h"
 #include "hw/hw.h"
+#include "trace.h"
 
 #ifdef CONFIG_LINUX
 
@@ -266,6 +268,7 @@ static int do_vm_stop(RunState state, bool send_stop)
 
     bdrv_drain_all();
     ret = bdrv_flush_all();
+    trace_vm_stop_flush_all(ret);
 
     return ret;
 }
@@ -323,7 +326,7 @@ static void sigbus_reraise(void)
         sigaddset(&set, SIGBUS);
         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
     }
-    perror("Failed to re-raise SIGBUS!\n");
+    perror("Failed to re-raise SIGBUS!");
     abort();
 }
 
@@ -472,11 +475,11 @@ bool qemu_in_vcpu_thread(void)
     return current_cpu && qemu_cpu_is_self(current_cpu);
 }
 
-static __thread bool iothread_locked = false;
+QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
 
 bool qemu_mutex_iothread_locked(void)
 {
-    return iothread_locked;
+    return get_iothread_locked();
 }
 
 /*
@@ -489,13 +492,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line)
 
     g_assert(!qemu_mutex_iothread_locked());
     bql_lock(&qemu_global_mutex, file, line);
-    iothread_locked = true;
+    set_iothread_locked(true);
 }
 
 void qemu_mutex_unlock_iothread(void)
 {
     g_assert(qemu_mutex_iothread_locked());
-    iothread_locked = false;
+    set_iothread_locked(false);
     qemu_mutex_unlock(&qemu_global_mutex);
 }
 
@@ -704,12 +707,15 @@ int vm_stop_force_state(RunState state)
     if (runstate_is_running()) {
         return vm_stop(state);
     } else {
+        int ret;
         runstate_set(state);
 
         bdrv_drain_all();
         /* Make sure to return an error if the flush in a previous vm_stop()
          * failed. */
-        return bdrv_flush_all();
+        ret = bdrv_flush_all();
+        trace_vm_stop_flush_all(ret);
+        return ret;
     }
 }
 
diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c
index 2691c58cf63..3965c834ca6 100644
--- a/softmmu/device_tree.c
+++ b/softmmu/device_tree.c
@@ -23,7 +23,6 @@
 #include "qemu/bswap.h"
 #include "qemu/cutils.h"
 #include "sysemu/device_tree.h"
-#include "sysemu/sysemu.h"
 #include "hw/loader.h"
 #include "hw/boards.h"
 #include "qemu/config-file.h"
@@ -541,8 +540,8 @@ int qemu_fdt_add_subnode(void *fdt, const char *name)
 
     retval = fdt_add_subnode(fdt, parent, basename);
     if (retval < 0) {
-        error_report("FDT: Failed to create subnode %s: %s", name,
-                     fdt_strerror(retval));
+        error_report("%s: Failed to create subnode %s: %s",
+                     __func__, name, fdt_strerror(retval));
         exit(1);
     }
 
@@ -550,6 +549,46 @@ int qemu_fdt_add_subnode(void *fdt, const char *name)
     return retval;
 }
 
+/*
+ * qemu_fdt_add_path: Like qemu_fdt_add_subnode(), but will add
+ * all missing subnodes from the given path.
+ */
+int qemu_fdt_add_path(void *fdt, const char *path)
+{
+    const char *name;
+    const char *p = path;
+    int namelen, retval;
+    int parent = 0;
+
+    if (path[0] != '/') {
+        return -1;
+    }
+
+    while (p) {
+        name = p + 1;
+        p = strchr(name, '/');
+        namelen = p != NULL ? p - name : strlen(name);
+
+        retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen);
+        if (retval < 0 && retval != -FDT_ERR_NOTFOUND) {
+            error_report("%s: Unexpected error in finding subnode %.*s: %s",
+                         __func__, namelen, name, fdt_strerror(retval));
+            exit(1);
+        } else if (retval == -FDT_ERR_NOTFOUND) {
+            retval = fdt_add_subnode_namelen(fdt, parent, name, namelen);
+            if (retval < 0) {
+                error_report("%s: Failed to create subnode %.*s: %s",
+                             __func__, namelen, name, fdt_strerror(retval));
+                exit(1);
+            }
+        }
+
+        parent = retval;
+    }
+
+    return retval;
+}
+
 void qemu_fdt_dumpdtb(void *fdt, int size)
 {
     const char *dumpdtb = current_machine->dumpdtb;
diff --git a/softmmu/memory.c b/softmmu/memory.c
index d4493ef9e43..7340e19ff5e 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -16,9 +16,7 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
-#include "cpu.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
@@ -41,7 +39,7 @@
 static unsigned memory_region_transaction_depth;
 static bool memory_region_update_pending;
 static bool ioeventfd_update_pending;
-bool global_dirty_log;
+unsigned int global_dirty_tracking;
 
 static QTAILQ_HEAD(, MemoryListener) memory_listeners
     = QTAILQ_HEAD_INITIALIZER(memory_listeners);
@@ -444,7 +442,8 @@ static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
         trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
     } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
+        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size,
+                                     memory_region_name(mr));
     }
     memory_region_shift_read_access(value, shift, mask, tmp);
     return MEMTX_OK;
@@ -466,7 +465,8 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
         trace_memory_region_subpage_read(get_cpu_index(), mr, addr, tmp, size);
     } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_READ)) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size);
+        trace_memory_region_ops_read(get_cpu_index(), mr, abs_addr, tmp, size,
+                                     memory_region_name(mr));
     }
     memory_region_shift_read_access(value, shift, mask, tmp);
     return r;
@@ -486,7 +486,8 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
         trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
     } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
+        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size,
+                                      memory_region_name(mr));
     }
     mr->ops->write(mr->opaque, addr, tmp, size);
     return MEMTX_OK;
@@ -506,7 +507,8 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
         trace_memory_region_subpage_write(get_cpu_index(), mr, addr, tmp, size);
     } else if (trace_event_get_state_backends(TRACE_MEMORY_REGION_OPS_WRITE)) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size);
+        trace_memory_region_ops_write(get_cpu_index(), mr, abs_addr, tmp, size,
+                                      memory_region_name(mr));
     }
     return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
 }
@@ -1376,17 +1378,17 @@ bool memory_region_access_valid(MemoryRegion *mr,
 {
     if (mr->ops->valid.accepts
         && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr "
-                                       "0x%" HWADDR_PRIX ", size %u, "
-                                       "region '%s', reason: rejected\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: rejected\n",
+                      is_write ? "write" : "read",
                       addr, size, memory_region_name(mr));
         return false;
     }
 
     if (!mr->ops->valid.unaligned && (addr & (size - 1))) {
-        qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr "
-                                       "0x%" HWADDR_PRIX ", size %u, "
-                                       "region '%s', reason: unaligned\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: unaligned\n",
+                      is_write ? "write" : "read",
                       addr, size, memory_region_name(mr));
         return false;
     }
@@ -1398,10 +1400,10 @@ bool memory_region_access_valid(MemoryRegion *mr,
 
     if (size > mr->ops->valid.max_access_size
         || size < mr->ops->valid.min_access_size) {
-        qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr "
-                                       "0x%" HWADDR_PRIX ", size %u, "
-                                       "region '%s', reason: invalid size "
-                                       "(min:%u max:%u)\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid %s at addr 0x%" HWADDR_PRIX
+                      ", size %u, region '%s', reason: invalid size "
+                      "(min:%u max:%u)\n",
+                      is_write ? "write" : "read",
                       addr, size, memory_region_name(mr),
                       mr->ops->valid.min_access_size,
                       mr->ops->valid.max_access_size);
@@ -1533,22 +1535,22 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr,
                                       uint64_t size,
                                       Error **errp)
 {
-    memory_region_init_ram_shared_nomigrate(mr, owner, name, size, false, errp);
+    memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
 }
 
-void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr,
-                                             Object *owner,
-                                             const char *name,
-                                             uint64_t size,
-                                             bool share,
-                                             Error **errp)
+void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
+                                            Object *owner,
+                                            const char *name,
+                                            uint64_t size,
+                                            uint32_t ram_flags,
+                                            Error **errp)
 {
     Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc(size, share, mr, &err);
+    mr->ram_block = qemu_ram_alloc(size, ram_flags, mr, &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -1611,7 +1613,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
                                     Object *owner,
                                     const char *name,
                                     uint64_t size,
-                                    bool share,
+                                    uint32_t ram_flags,
                                     int fd,
                                     ram_addr_t offset,
                                     Error **errp)
@@ -1621,9 +1623,8 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
-                                           share ? RAM_SHARED : 0,
-                                           fd, offset, false, &err);
+    mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
+                                           false, &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -1685,7 +1686,7 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
                                       uint64_t size,
                                       Error **errp)
 {
-    memory_region_init_ram_shared_nomigrate(mr, owner, name, size, false, errp);
+    memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp);
     mr->readonly = true;
 }
 
@@ -1705,7 +1706,7 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
     mr->terminates = true;
     mr->rom_device = true;
     mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc(size, false,  mr, &err);
+    mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -1810,12 +1811,17 @@ bool memory_region_is_ram_device(MemoryRegion *mr)
     return mr->ram_device;
 }
 
+bool memory_region_is_protected(MemoryRegion *mr)
+{
+    return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
+}
+
 uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
 {
     uint8_t mask = mr->dirty_log_mask;
     RAMBlock *rb = mr->ram_block;
 
-    if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) ||
+    if (global_dirty_tracking && ((rb && qemu_ram_is_migratable(rb)) ||
                              memory_region_is_iommu(mr))) {
         mask |= (1 << DIRTY_MEMORY_MIGRATION);
     }
@@ -2030,6 +2036,81 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
     return imrc->num_indexes(iommu_mr);
 }
 
+RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
+{
+    if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) {
+        return NULL;
+    }
+    return mr->rdm;
+}
+
+void memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                           RamDiscardManager *rdm)
+{
+    g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr));
+    g_assert(!rdm || !mr->rdm);
+    mr->rdm = rdm;
+}
+
+uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
+                                                 const MemoryRegion *mr)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->get_min_granularity);
+    return rdmc->get_min_granularity(rdm, mr);
+}
+
+bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
+                                      const MemoryRegionSection *section)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->is_populated);
+    return rdmc->is_populated(rdm, section);
+}
+
+int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayRamPopulate replay_fn,
+                                         void *opaque)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->replay_populated);
+    return rdmc->replay_populated(rdm, section, replay_fn, opaque);
+}
+
+void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                          MemoryRegionSection *section,
+                                          ReplayRamDiscard replay_fn,
+                                          void *opaque)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->replay_discarded);
+    rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+}
+
+void ram_discard_manager_register_listener(RamDiscardManager *rdm,
+                                           RamDiscardListener *rdl,
+                                           MemoryRegionSection *section)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->register_listener);
+    rdmc->register_listener(rdm, rdl, section);
+}
+
+void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
+                                             RamDiscardListener *rdl)
+{
+    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+
+    g_assert(rdmc->unregister_listener);
+    rdmc->unregister_listener(rdm, rdl);
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
@@ -2057,6 +2138,10 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                                         memory_region_get_dirty_log_mask(mr));
 }
 
+/*
+ * If memory region `mr' is NULL, do global sync.  Otherwise, sync
+ * dirty bitmap for the specified memory region.
+ */
 static void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
     MemoryListener *listener;
@@ -2070,18 +2155,26 @@ static void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
      * address space once.
      */
     QTAILQ_FOREACH(listener, &memory_listeners, link) {
-        if (!listener->log_sync) {
-            continue;
-        }
-        as = listener->address_space;
-        view = address_space_get_flatview(as);
-        FOR_EACH_FLAT_RANGE(fr, view) {
-            if (fr->dirty_log_mask && (!mr || fr->mr == mr)) {
-                MemoryRegionSection mrs = section_from_flat_range(fr, view);
-                listener->log_sync(listener, &mrs);
+        if (listener->log_sync) {
+            as = listener->address_space;
+            view = address_space_get_flatview(as);
+            FOR_EACH_FLAT_RANGE(fr, view) {
+                if (fr->dirty_log_mask && (!mr || fr->mr == mr)) {
+                    MemoryRegionSection mrs = section_from_flat_range(fr, view);
+                    listener->log_sync(listener, &mrs);
+                }
             }
+            flatview_unref(view);
+            trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 0);
+        } else if (listener->log_sync_global) {
+            /*
+             * No matter whether MR is specified, what we can do here
+             * is to do a global sync, because we are not capable to
+             * sync in a finer granularity.
+             */
+            listener->log_sync_global(listener);
+            trace_memory_region_sync_dirty(mr ? mr->name : "(all)", listener->name, 1);
         }
-        flatview_unref(view);
     }
 }
 
@@ -2630,6 +2723,33 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
     return ret;
 }
 
+MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s)
+{
+    MemoryRegionSection *tmp = g_new(MemoryRegionSection, 1);
+
+    *tmp = *s;
+    if (tmp->mr) {
+        memory_region_ref(tmp->mr);
+    }
+    if (tmp->fv) {
+        bool ret  = flatview_ref(tmp->fv);
+
+        g_assert(ret);
+    }
+    return tmp;
+}
+
+void memory_region_section_free_copy(MemoryRegionSection *s)
+{
+    if (s->fv) {
+        flatview_unref(s->fv);
+    }
+    if (s->mr) {
+        memory_region_unref(s->mr);
+    }
+    g_free(s);
+}
+
 bool memory_region_present(MemoryRegion *container, hwaddr addr)
 {
     MemoryRegion *mr;
@@ -2651,14 +2771,18 @@ void memory_global_after_dirty_log_sync(void)
 
 static VMChangeStateEntry *vmstate_change;
 
-void memory_global_dirty_log_start(void)
+void memory_global_dirty_log_start(unsigned int flags)
 {
     if (vmstate_change) {
         qemu_del_vm_change_state_handler(vmstate_change);
         vmstate_change = NULL;
     }
 
-    global_dirty_log = true;
+    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+    assert(!(global_dirty_tracking & flags));
+    global_dirty_tracking |= flags;
+
+    trace_global_dirty_changed(global_dirty_tracking);
 
     MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
 
@@ -2668,9 +2792,13 @@ void memory_global_dirty_log_start(void)
     memory_region_transaction_commit();
 }
 
-static void memory_global_dirty_log_do_stop(void)
+static void memory_global_dirty_log_do_stop(unsigned int flags)
 {
-    global_dirty_log = false;
+    assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
+    assert((global_dirty_tracking & flags) == flags);
+    global_dirty_tracking &= ~flags;
+
+    trace_global_dirty_changed(global_dirty_tracking);
 
     /* Refresh DIRTY_MEMORY_MIGRATION bit.  */
     memory_region_transaction_begin();
@@ -2683,8 +2811,9 @@ static void memory_global_dirty_log_do_stop(void)
 static void memory_vm_change_state_handler(void *opaque, bool running,
                                            RunState state)
 {
+    unsigned int flags = (unsigned int)(uintptr_t)opaque;
     if (running) {
-        memory_global_dirty_log_do_stop();
+        memory_global_dirty_log_do_stop(flags);
 
         if (vmstate_change) {
             qemu_del_vm_change_state_handler(vmstate_change);
@@ -2693,18 +2822,19 @@ static void memory_vm_change_state_handler(void *opaque, bool running,
     }
 }
 
-void memory_global_dirty_log_stop(void)
+void memory_global_dirty_log_stop(unsigned int flags)
 {
     if (!runstate_is_running()) {
         if (vmstate_change) {
             return;
         }
         vmstate_change = qemu_add_vm_change_state_handler(
-                                memory_vm_change_state_handler, NULL);
+                                memory_vm_change_state_handler,
+                                (void *)(uintptr_t)flags);
         return;
     }
 
-    memory_global_dirty_log_do_stop();
+    memory_global_dirty_log_do_stop(flags);
 }
 
 static void listener_add_address_space(MemoryListener *listener,
@@ -2716,7 +2846,7 @@ static void listener_add_address_space(MemoryListener *listener,
     if (listener->begin) {
         listener->begin(listener);
     }
-    if (global_dirty_log) {
+    if (global_dirty_tracking) {
         if (listener->log_global_start) {
             listener->log_global_start(listener);
         }
@@ -2769,6 +2899,9 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
 {
     MemoryListener *other = NULL;
 
+    /* Only one of them can be defined for a listener */
+    assert(!(listener->log_sync && listener->log_sync_global));
+
     listener->address_space = as;
     if (QTAILQ_EMPTY(&memory_listeners)
         || listener->priority >= QTAILQ_LAST(&memory_listeners)->priority) {
@@ -3310,10 +3443,17 @@ static const TypeInfo iommu_memory_region_info = {
     .abstract           = true,
 };
 
+static const TypeInfo ram_discard_manager_info = {
+    .parent             = TYPE_INTERFACE,
+    .name               = TYPE_RAM_DISCARD_MANAGER,
+    .class_size         = sizeof(RamDiscardManagerClass),
+};
+
 static void memory_register_types(void)
 {
     type_register_static(&memory_region_info);
     type_register_static(&iommu_memory_region_info);
+    type_register_static(&ram_discard_manager_info);
 }
 
 type_init(memory_register_types)
diff --git a/softmmu/memory_mapping.c b/softmmu/memory_mapping.c
index 18d0b8067cb..a62eaa49ccb 100644
--- a/softmmu/memory_mapping.c
+++ b/softmmu/memory_mapping.c
@@ -14,7 +14,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 
-#include "cpu.h"
 #include "sysemu/memory_mapping.h"
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
@@ -194,29 +193,14 @@ typedef struct GuestPhysListener {
     MemoryListener listener;
 } GuestPhysListener;
 
-static void guest_phys_blocks_region_add(MemoryListener *listener,
+static void guest_phys_block_add_section(GuestPhysListener *g,
                                          MemoryRegionSection *section)
 {
-    GuestPhysListener *g;
-    uint64_t section_size;
-    hwaddr target_start, target_end;
-    uint8_t *host_addr;
-    GuestPhysBlock *predecessor;
-
-    /* we only care about RAM */
-    if (!memory_region_is_ram(section->mr) ||
-        memory_region_is_ram_device(section->mr) ||
-        memory_region_is_nonvolatile(section->mr)) {
-        return;
-    }
-
-    g            = container_of(listener, GuestPhysListener, listener);
-    section_size = int128_get64(section->size);
-    target_start = section->offset_within_address_space;
-    target_end   = target_start + section_size;
-    host_addr    = memory_region_get_ram_ptr(section->mr) +
-                   section->offset_within_region;
-    predecessor  = NULL;
+    const hwaddr target_start = section->offset_within_address_space;
+    const hwaddr target_end = target_start + int128_get64(section->size);
+    uint8_t *host_addr = memory_region_get_ram_ptr(section->mr) +
+                         section->offset_within_region;
+    GuestPhysBlock *predecessor = NULL;
 
     /* find continuity in guest physical address space */
     if (!QTAILQ_EMPTY(&g->list->head)) {
@@ -230,7 +214,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
 
         /* we want continuity in both guest-physical and host-virtual memory */
         if (predecessor->target_end < target_start ||
-            predecessor->host_addr + predecessor_size != host_addr) {
+            predecessor->host_addr + predecessor_size != host_addr ||
+            predecessor->mr != section->mr) {
             predecessor = NULL;
         }
     }
@@ -261,6 +246,40 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
 #endif
 }
 
+static int guest_phys_ram_populate_cb(MemoryRegionSection *section,
+                                      void *opaque)
+{
+    GuestPhysListener *g = opaque;
+
+    guest_phys_block_add_section(g, section);
+    return 0;
+}
+
+static void guest_phys_blocks_region_add(MemoryListener *listener,
+                                         MemoryRegionSection *section)
+{
+    GuestPhysListener *g = container_of(listener, GuestPhysListener, listener);
+
+    /* we only care about RAM */
+    if (!memory_region_is_ram(section->mr) ||
+        memory_region_is_ram_device(section->mr) ||
+        memory_region_is_nonvolatile(section->mr)) {
+        return;
+    }
+
+    /* for special sparse regions, only add populated parts */
+    if (memory_region_has_ram_discard_manager(section->mr)) {
+        RamDiscardManager *rdm;
+
+        rdm = memory_region_get_ram_discard_manager(section->mr);
+        ram_discard_manager_replay_populated(rdm, section,
+                                             guest_phys_ram_populate_cb, g);
+        return;
+    }
+
+    guest_phys_block_add_section(g, section);
+}
+
 void guest_phys_blocks_append(GuestPhysBlockList *list)
 {
     GuestPhysListener g = { 0 };
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 7eeeb814cd6..a67d37fce17 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -24,7 +24,6 @@
 
 #include "qemu/cutils.h"
 #include "qemu/cacheflush.h"
-#include "cpu.h"
 
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
@@ -37,7 +36,6 @@
 #include "hw/boards.h"
 #include "hw/xen/xen.h"
 #include "sysemu/kvm.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/tcg.h"
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
@@ -49,7 +47,6 @@
 #include "sysemu/dma.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/hw_accel.h"
-#include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace/trace-root.h"
 
@@ -764,6 +761,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
     if (tcg_enabled()) {
         newas->tcg_as_listener.log_global_after_sync = tcg_log_global_after_sync;
         newas->tcg_as_listener.commit = tcg_commit;
+        newas->tcg_as_listener.name = "tcg";
         memory_listener_register(&newas->tcg_as_listener, as);
     }
 }
@@ -912,6 +910,16 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
         if (watchpoint_address_matches(wp, addr, len)
             && (wp->flags & flags)) {
             if (replay_running_debug()) {
+                /*
+                 * replay_breakpoint reads icount.
+                 * Force recompile to succeed, because icount may
+                 * be read only at the end of the block.
+                 */
+                if (!cpu->can_do_io) {
+                    /* Force execution of one insn next time.  */
+                    cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(cpu);
+                    cpu_loop_exit_restore(cpu, ra);
+                }
                 /*
                  * Don't process the watchpoints when we are
                  * in a reverse debugging operation.
@@ -926,29 +934,26 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
             }
             wp->hitaddr = MAX(addr, wp->vaddr);
             wp->hitattrs = attrs;
-            if (!cpu->watchpoint_hit) {
-                if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
-                    !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
-                    wp->flags &= ~BP_WATCHPOINT_HIT;
-                    continue;
-                }
-                cpu->watchpoint_hit = wp;
 
-                mmap_lock();
-                tb_check_watchpoint(cpu, ra);
-                if (wp->flags & BP_STOP_BEFORE_ACCESS) {
-                    cpu->exception_index = EXCP_DEBUG;
-                    mmap_unlock();
-                    cpu_loop_exit_restore(cpu, ra);
-                } else {
-                    /* Force execution of one insn next time.  */
-                    cpu->cflags_next_tb = 1 | curr_cflags(cpu);
-                    mmap_unlock();
-                    if (ra) {
-                        cpu_restore_state(cpu, ra, true);
-                    }
-                    cpu_loop_exit_noexc(cpu);
-                }
+            if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
+                !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
+                wp->flags &= ~BP_WATCHPOINT_HIT;
+                continue;
+            }
+            cpu->watchpoint_hit = wp;
+
+            mmap_lock();
+            /* This call also restores vCPU state */
+            tb_check_watchpoint(cpu, ra);
+            if (wp->flags & BP_STOP_BEFORE_ACCESS) {
+                cpu->exception_index = EXCP_DEBUG;
+                mmap_unlock();
+                cpu_loop_exit(cpu);
+            } else {
+                /* Force execution of one insn next time.  */
+                cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(cpu);
+                mmap_unlock();
+                cpu_loop_exit_noexc(cpu);
             }
         } else {
             wp->flags &= ~BP_WATCHPOINT_HIT;
@@ -1305,23 +1310,26 @@ void qemu_mutex_unlock_ramlist(void)
     qemu_mutex_unlock(&ram_list.mutex);
 }
 
-void ram_block_dump(Monitor *mon)
+GString *ram_block_format(void)
 {
     RAMBlock *block;
     char *psize;
+    GString *buf = g_string_new("");
 
     RCU_READ_LOCK_GUARD();
-    monitor_printf(mon, "%24s %8s  %18s %18s %18s\n",
-                   "Block Name", "PSize", "Offset", "Used", "Total");
+    g_string_append_printf(buf, "%24s %8s  %18s %18s %18s\n",
+                           "Block Name", "PSize", "Offset", "Used", "Total");
     RAMBLOCK_FOREACH(block) {
         psize = size_to_str(block->page_size);
-        monitor_printf(mon, "%24s %8s  0x%016" PRIx64 " 0x%016" PRIx64
-                       " 0x%016" PRIx64 "\n", block->idstr, psize,
-                       (uint64_t)block->offset,
-                       (uint64_t)block->used_length,
-                       (uint64_t)block->max_length);
+        g_string_append_printf(buf, "%24s %8s  0x%016" PRIx64 " 0x%016" PRIx64
+                               " 0x%016" PRIx64 "\n", block->idstr, psize,
+                               (uint64_t)block->offset,
+                               (uint64_t)block->used_length,
+                               (uint64_t)block->max_length);
         g_free(psize);
     }
+
+    return buf;
 }
 
 #ifdef __linux__
@@ -1458,6 +1466,9 @@ static int64_t get_file_align(int fd)
         path = g_strdup_printf("/sys/dev/char/%d:%d",
                     major(st.st_rdev), minor(st.st_rdev));
         rpath = realpath(path, NULL);
+        if (!rpath) {
+            return -errno;
+        }
 
         rc = daxctl_new(&ctx);
         if (rc) {
@@ -1547,6 +1558,7 @@ static void *file_ram_alloc(RAMBlock *block,
                             off_t offset,
                             Error **errp)
 {
+    uint32_t qemu_map_flags;
     void *area;
 
     block->page_size = qemu_fd_getpagesize(fd);
@@ -1594,9 +1606,11 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    area = qemu_ram_mmap(fd, memory, block->mr->align, readonly,
-                         block->flags & RAM_SHARED, block->flags & RAM_PMEM,
-                         offset);
+    qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
+    qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
+    qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
+    qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
+    area = qemu_ram_mmap(fd, memory, block->mr->align, qemu_map_flags, offset);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for guest RAM");
@@ -1711,11 +1725,21 @@ ram_addr_t qemu_ram_get_used_length(RAMBlock *rb)
     return rb->used_length;
 }
 
+ram_addr_t qemu_ram_get_max_length(RAMBlock *rb)
+{
+    return rb->max_length;
+}
+
 bool qemu_ram_is_shared(RAMBlock *rb)
 {
     return rb->flags & RAM_SHARED;
 }
 
+bool qemu_ram_is_noreserve(RAMBlock *rb)
+{
+    return rb->flags & RAM_NORESERVE;
+}
+
 /* Note: Only set at the start of postcopy */
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
 {
@@ -1810,8 +1834,9 @@ static int memory_try_enable_merging(void *addr, size_t len)
     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
 }
 
-/* Only legal before guest might have detected the memory size: e.g. on
- * incoming migration, or right after reset.
+/*
+ * Resizing RAM while migrating can result in the migration being canceled.
+ * Care has to be taken if the guest might have already detected the memory.
  *
  * As memory core doesn't know how is memory accessed, it is up to
  * resize callback to update device state and/or add assertions to detect
@@ -1819,6 +1844,7 @@ static int memory_try_enable_merging(void *addr, size_t len)
  */
 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
 {
+    const ram_addr_t oldsize = block->used_length;
     const ram_addr_t unaligned_size = newsize;
 
     assert(block);
@@ -1855,6 +1881,11 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
         return -EINVAL;
     }
 
+    /* Notify before modifying the ram block and touching the bitmaps. */
+    if (block->host) {
+        ram_block_notify_resize(block->host, oldsize, newsize);
+    }
+
     cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
     block->used_length = newsize;
     cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
@@ -1941,8 +1972,10 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
     }
 }
 
-static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
+static void ram_block_add(RAMBlock *new_block, Error **errp)
 {
+    const bool noreserve = qemu_ram_is_noreserve(new_block);
+    const bool shared = qemu_ram_is_shared(new_block);
     RAMBlock *block;
     RAMBlock *last_block = NULL;
     ram_addr_t old_ram_size, new_ram_size;
@@ -1965,7 +1998,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
         } else {
             new_block->host = qemu_anon_ram_alloc(new_block->max_length,
                                                   &new_block->mr->align,
-                                                  shared);
+                                                  shared, noreserve);
             if (!new_block->host) {
                 error_setg_errno(errp, errno,
                                  "cannot set up guest memory '%s'",
@@ -2022,7 +2055,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
             qemu_madvise(new_block->host, new_block->max_length,
                          QEMU_MADV_DONTFORK);
         }
-        ram_block_notify_add(new_block->host, new_block->max_length);
+        ram_block_notify_add(new_block->host, new_block->used_length,
+                             new_block->max_length);
     }
 }
 
@@ -2036,7 +2070,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     int64_t file_size, file_align;
 
     /* Just support these ram flags by now. */
-    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
+                          RAM_PROTECTED)) == 0);
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
@@ -2059,7 +2094,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     }
 
     file_align = get_file_align(fd);
-    if (file_align > 0 && mr && file_align > mr->align) {
+    if (file_align > 0 && file_align > mr->align) {
         error_setg(errp, "backing store align 0x%" PRIx64
                    " is larger than 'align' option 0x%" PRIx64,
                    file_align, mr->align);
@@ -2078,7 +2113,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
         return NULL;
     }
 
-    ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED);
+    ram_block_add(new_block, &local_err);
     if (local_err) {
         g_free(new_block);
         error_propagate(errp, local_err);
@@ -2121,12 +2156,16 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
                                   void (*resized)(const char*,
                                                   uint64_t length,
                                                   void *host),
-                                  void *host, bool resizeable, bool share,
+                                  void *host, uint32_t ram_flags,
                                   MemoryRegion *mr, Error **errp)
 {
     RAMBlock *new_block;
     Error *local_err = NULL;
 
+    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
+                          RAM_NORESERVE)) == 0);
+    assert(!host ^ (ram_flags & RAM_PREALLOC));
+
     size = HOST_PAGE_ALIGN(size);
     max_size = HOST_PAGE_ALIGN(max_size);
     new_block = g_malloc0(sizeof(*new_block));
@@ -2138,13 +2177,8 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     new_block->fd = -1;
     new_block->page_size = qemu_real_host_page_size;
     new_block->host = host;
-    if (host) {
-        new_block->flags |= RAM_PREALLOC;
-    }
-    if (resizeable) {
-        new_block->flags |= RAM_RESIZEABLE;
-    }
-    ram_block_add(new_block, &local_err, share);
+    new_block->flags = ram_flags;
+    ram_block_add(new_block, &local_err);
     if (local_err) {
         g_free(new_block);
         error_propagate(errp, local_err);
@@ -2156,15 +2190,15 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr, Error **errp)
 {
-    return qemu_ram_alloc_internal(size, size, NULL, host, false,
-                                   false, mr, errp);
+    return qemu_ram_alloc_internal(size, size, NULL, host, RAM_PREALLOC, mr,
+                                   errp);
 }
 
-RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share,
+RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                          MemoryRegion *mr, Error **errp)
 {
-    return qemu_ram_alloc_internal(size, size, NULL, NULL, false,
-                                   share, mr, errp);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+    return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }
 
 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
@@ -2173,8 +2207,8 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
                                                      void *host),
                                      MemoryRegion *mr, Error **errp)
 {
-    return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true,
-                                   false, mr, errp);
+    return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
+                                   RAM_RESIZEABLE, mr, errp);
 }
 
 static void reclaim_ramblock(RAMBlock *block)
@@ -2201,7 +2235,8 @@ void qemu_ram_free(RAMBlock *block)
     }
 
     if (block->host) {
-        ram_block_notify_remove(block->host, block->max_length);
+        ram_block_notify_remove(block->host, block->used_length,
+                                block->max_length);
     }
 
     qemu_mutex_lock_ramlist();
@@ -2232,13 +2267,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 abort();
             } else {
                 flags = MAP_FIXED;
+                flags |= block->flags & RAM_SHARED ?
+                         MAP_SHARED : MAP_PRIVATE;
+                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
                 if (block->fd >= 0) {
-                    flags |= (block->flags & RAM_SHARED ?
-                              MAP_SHARED : MAP_PRIVATE);
                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                 flags, block->fd, offset);
                 } else {
-                    flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+                    flags |= MAP_ANONYMOUS;
                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                 flags, -1, 0);
                 }
@@ -2611,7 +2647,7 @@ static void tcg_log_global_after_sync(MemoryListener *listener)
          * In record/replay mode this causes a deadlock, because
          * run_on_cpu waits for rr mutex. Therefore no races are possible
          * in this case and no need for making run_on_cpu when
-         * record/replay is not enabled.
+         * record/replay is enabled.
          */
         cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
         run_on_cpu(cpuas->cpu, do_nothing, RUN_ON_CPU_NULL);
@@ -3640,7 +3676,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
         goto err;
     }
 
-    if ((start + length) <= rb->used_length) {
+    if ((start + length) <= rb->max_length) {
         bool need_madvise, need_fallocate;
         if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
             error_report("ram_block_discard_range: Unaligned length: %zx",
@@ -3653,6 +3689,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
         /* The logic here is messy;
          *    madvise DONTNEED fails for hugepages
          *    fallocate works on hugepages and shmem
+         *    shared anonymous memory requires madvise REMOVE
          */
         need_madvise = (rb->page_size == qemu_host_page_size);
         need_fallocate = rb->fd != -1;
@@ -3686,7 +3723,11 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
              * fallocate'd away).
              */
 #if defined(CONFIG_MADVISE)
-            ret =  madvise(host_startaddr, length, MADV_DONTNEED);
+            if (qemu_ram_is_shared(rb) && rb->fd < 0) {
+                ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
+            } else {
+                ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+            }
             if (ret) {
                 ret = -errno;
                 error_report("ram_block_discard_range: Failed to discard range "
@@ -3707,7 +3748,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
     } else {
         error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
                      "/%zx/" RAM_ADDR_FMT")",
-                     rb->idstr, start, length, rb->used_length);
+                     rb->idstr, start, length, rb->max_length);
     }
 
 err:
@@ -3798,56 +3839,106 @@ void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root)
     }
 }
 
-/*
- * If positive, discarding RAM is disabled. If negative, discarding RAM is
- * required to work and cannot be disabled.
- */
-static int ram_block_discard_disabled;
+/* Require any discards to work. */
+static unsigned int ram_block_discard_required_cnt;
+/* Require only coordinated discards to work. */
+static unsigned int ram_block_coordinated_discard_required_cnt;
+/* Disable any discards. */
+static unsigned int ram_block_discard_disabled_cnt;
+/* Disable only uncoordinated discards. */
+static unsigned int ram_block_uncoordinated_discard_disabled_cnt;
+static QemuMutex ram_block_discard_disable_mutex;
+
+static void ram_block_discard_disable_mutex_lock(void)
+{
+    static gsize initialized;
+
+    if (g_once_init_enter(&initialized)) {
+        qemu_mutex_init(&ram_block_discard_disable_mutex);
+        g_once_init_leave(&initialized, 1);
+    }
+    qemu_mutex_lock(&ram_block_discard_disable_mutex);
+}
+
+static void ram_block_discard_disable_mutex_unlock(void)
+{
+    qemu_mutex_unlock(&ram_block_discard_disable_mutex);
+}
 
 int ram_block_discard_disable(bool state)
 {
-    int old;
+    int ret = 0;
 
+    ram_block_discard_disable_mutex_lock();
     if (!state) {
-        qatomic_dec(&ram_block_discard_disabled);
-        return 0;
+        ram_block_discard_disabled_cnt--;
+    } else if (ram_block_discard_required_cnt ||
+               ram_block_coordinated_discard_required_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_discard_disabled_cnt++;
     }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
 
-    do {
-        old = qatomic_read(&ram_block_discard_disabled);
-        if (old < 0) {
-            return -EBUSY;
-        }
-    } while (qatomic_cmpxchg(&ram_block_discard_disabled,
-                             old, old + 1) != old);
-    return 0;
+int ram_block_uncoordinated_discard_disable(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_uncoordinated_discard_disabled_cnt--;
+    } else if (ram_block_discard_required_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_uncoordinated_discard_disabled_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
 }
 
 int ram_block_discard_require(bool state)
 {
-    int old;
+    int ret = 0;
 
+    ram_block_discard_disable_mutex_lock();
     if (!state) {
-        qatomic_inc(&ram_block_discard_disabled);
-        return 0;
+        ram_block_discard_required_cnt--;
+    } else if (ram_block_discard_disabled_cnt ||
+               ram_block_uncoordinated_discard_disabled_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_discard_required_cnt++;
     }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
+}
 
-    do {
-        old = qatomic_read(&ram_block_discard_disabled);
-        if (old > 0) {
-            return -EBUSY;
-        }
-    } while (qatomic_cmpxchg(&ram_block_discard_disabled,
-                             old, old - 1) != old);
-    return 0;
+int ram_block_coordinated_discard_require(bool state)
+{
+    int ret = 0;
+
+    ram_block_discard_disable_mutex_lock();
+    if (!state) {
+        ram_block_coordinated_discard_required_cnt--;
+    } else if (ram_block_discard_disabled_cnt) {
+        ret = -EBUSY;
+    } else {
+        ram_block_coordinated_discard_required_cnt++;
+    }
+    ram_block_discard_disable_mutex_unlock();
+    return ret;
 }
 
 bool ram_block_discard_is_disabled(void)
 {
-    return qatomic_read(&ram_block_discard_disabled) > 0;
+    return qatomic_read(&ram_block_discard_disabled_cnt) ||
+           qatomic_read(&ram_block_uncoordinated_discard_disabled_cnt);
 }
 
 bool ram_block_discard_is_required(void)
 {
-    return qatomic_read(&ram_block_discard_disabled) < 0;
+    return qatomic_read(&ram_block_discard_required_cnt) ||
+           qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index a9955b97a07..01f3834db57 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -28,6 +28,8 @@
 #include "qapi/qmp/dispatch.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
@@ -35,12 +37,12 @@
 #include "qemu/qemu-print.h"
 #include "qemu/option_int.h"
 #include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
 #include "migration/misc.h"
 #include "migration/migration.h"
 #include "qemu/cutils.h"
 #include "hw/qdev-properties.h"
 #include "hw/clock.h"
+#include "hw/boards.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -53,6 +55,15 @@ typedef struct QDevAlias
     uint32_t arch_mask;
 } QDevAlias;
 
+/* default virtio transport per architecture */
+#define QEMU_ARCH_VIRTIO_PCI (QEMU_ARCH_ALPHA | QEMU_ARCH_ARM | \
+                              QEMU_ARCH_HPPA | QEMU_ARCH_I386 | \
+                              QEMU_ARCH_MIPS | QEMU_ARCH_PPC |  \
+                              QEMU_ARCH_RISCV | QEMU_ARCH_SH4 | \
+                              QEMU_ARCH_SPARC | QEMU_ARCH_XTENSA)
+#define QEMU_ARCH_VIRTIO_CCW (QEMU_ARCH_S390X)
+#define QEMU_ARCH_VIRTIO_MMIO (QEMU_ARCH_M68K)
+
 /* Please keep this table sorted by typename. */
 static const QDevAlias qdev_alias_table[] = {
     { "AC97", "ac97" }, /* -soundhw name */
@@ -154,6 +165,7 @@ static void qdev_print_devinfos(bool show_no_user)
         [DEVICE_CATEGORY_SOUND]   = "Sound",
         [DEVICE_CATEGORY_MISC]    = "Misc",
         [DEVICE_CATEGORY_CPU]     = "CPU",
+        [DEVICE_CATEGORY_WATCHDOG]= "Watchdog",
         [DEVICE_CATEGORY_MAX]     = "Uncategorized",
     };
     GSList *list, *elt;
@@ -186,22 +198,6 @@ static void qdev_print_devinfos(bool show_no_user)
     g_slist_free(list);
 }
 
-static int set_property(void *opaque, const char *name, const char *value,
-                        Error **errp)
-{
-    Object *obj = opaque;
-
-    if (strcmp(name, "driver") == 0)
-        return 0;
-    if (strcmp(name, "bus") == 0)
-        return 0;
-
-    if (!object_property_parse(obj, name, value, errp)) {
-        return -1;
-    }
-    return 0;
-}
-
 static const char *find_typename_by_alias(const char *alias)
 {
     int i;
@@ -260,6 +256,16 @@ static DeviceClass *qdev_get_device_class(const char **driver, Error **errp)
         return NULL;
     }
 
+    if (object_class_dynamic_cast(oc, TYPE_SYS_BUS_DEVICE)) {
+        /* sysbus devices need to be allowed by the machine */
+        MachineClass *mc = MACHINE_CLASS(object_get_class(qdev_get_machine()));
+        if (!device_type_is_dynamic_sysbus(mc, *driver)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
+                       "a dynamic sysbus device type for the machine");
+            return NULL;
+        }
+    }
+
     return dc;
 }
 
@@ -427,7 +433,12 @@ static DeviceState *qbus_find_dev(BusState *bus, char *elem)
 
 static inline bool qbus_is_full(BusState *bus)
 {
-    BusClass *bus_class = BUS_GET_CLASS(bus);
+    BusClass *bus_class;
+
+    if (bus->full) {
+        return true;
+    }
+    bus_class = BUS_GET_CLASS(bus);
     return bus_class->max_dev && bus->num_children >= bus_class->max_dev;
 }
 
@@ -565,32 +576,49 @@ static BusState *qbus_find(const char *path, Error **errp)
     return bus;
 }
 
-void qdev_set_id(DeviceState *dev, const char *id)
+/* Takes ownership of @id, will be freed when deleting the device */
+const char *qdev_set_id(DeviceState *dev, char *id, Error **errp)
 {
-    if (id) {
-        dev->id = id;
-    }
+    ObjectProperty *prop;
+
+    assert(!dev->id && !dev->realized);
 
-    if (dev->id) {
-        object_property_add_child(qdev_get_peripheral(), dev->id,
-                                  OBJECT(dev));
+    /*
+     * object_property_[try_]add_child() below will assert the device
+     * has no parent
+     */
+    if (id) {
+        prop = object_property_try_add_child(qdev_get_peripheral(), id,
+                                             OBJECT(dev), NULL);
+        if (prop) {
+            dev->id = id;
+        } else {
+            error_setg(errp, "Duplicate device ID '%s'", id);
+            g_free(id);
+            return NULL;
+        }
     } else {
         static int anon_count;
         gchar *name = g_strdup_printf("device[%d]", anon_count++);
-        object_property_add_child(qdev_get_peripheral_anon(), name,
-                                  OBJECT(dev));
+        prop = object_property_add_child(qdev_get_peripheral_anon(), name,
+                                         OBJECT(dev));
         g_free(name);
     }
+
+    return prop->name;
 }
 
-DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
+DeviceState *qdev_device_add_from_qdict(const QDict *opts,
+                                        bool from_json, Error **errp)
 {
+    ERRP_GUARD();
     DeviceClass *dc;
     const char *driver, *path;
+    char *id;
     DeviceState *dev = NULL;
     BusState *bus = NULL;
 
-    driver = qemu_opt_get(opts, "driver");
+    driver = qdict_get_try_str(opts, "driver");
     if (!driver) {
         error_setg(errp, QERR_MISSING_PARAMETER, "driver");
         return NULL;
@@ -603,7 +631,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
     }
 
     /* find bus */
-    path = qemu_opt_get(opts, "bus");
+    path = qdict_get_try_str(opts, "bus");
     if (path != NULL) {
         bus = qbus_find(path, errp);
         if (!bus) {
@@ -623,17 +651,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         }
     }
 
-    if (qemu_opt_get(opts, "failover_pair_id")) {
-        if (!opts->id) {
-            error_setg(errp, "Device with failover_pair_id don't have id");
-            return NULL;
-        }
-        if (qdev_should_hide_device(opts)) {
-            if (bus && !qbus_is_hotpluggable(bus)) {
-                error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
-            }
-            return NULL;
+    if (qdev_should_hide_device(opts, from_json, errp)) {
+        if (bus && !qbus_is_hotpluggable(bus)) {
+            error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
         }
+        return NULL;
+    } else if (*errp) {
+        return NULL;
     }
 
     if (phase_check(PHASE_MACHINE_READY) && bus && !qbus_is_hotpluggable(bus)) {
@@ -663,16 +687,28 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         }
     }
 
-    qdev_set_id(dev, qemu_opts_id(opts));
+    /*
+     * set dev's parent and register its id.
+     * If it fails it means the id is already taken.
+     */
+    id = g_strdup(qdict_get_try_str(opts, "id"));
+    if (!qdev_set_id(dev, id, errp)) {
+        goto err_del_dev;
+    }
 
     /* set properties */
-    if (qemu_opt_foreach(opts, set_property, dev, errp)) {
+    dev->opts = qdict_clone_shallow(opts);
+    qdict_del(dev->opts, "driver");
+    qdict_del(dev->opts, "bus");
+    qdict_del(dev->opts, "id");
+
+    object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json,
+                                      errp);
+    if (*errp) {
         goto err_del_dev;
     }
 
-    dev->opts = opts;
     if (!qdev_realize(DEVICE(dev), bus, errp)) {
-        dev->opts = NULL;
         goto err_del_dev;
     }
     return dev;
@@ -685,6 +721,19 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
     return NULL;
 }
 
+/* Takes ownership of @opts on success */
+DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
+{
+    QDict *qdict = qemu_opts_to_qdict(opts, NULL);
+    DeviceState *ret;
+
+    ret = qdev_device_add_from_qdict(qdict, false, errp);
+    if (ret) {
+        qemu_opts_del(opts);
+    }
+    qobject_unref(qdict);
+    return ret;
+}
 
 #define qdev_printf(fmt, ...) monitor_printf(mon, "%*s" fmt, indent, "", ## __VA_ARGS__)
 static void qbus_print(Monitor *mon, BusState *bus, int indent);
@@ -822,18 +871,8 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
 
 static DeviceState *find_device_state(const char *id, Error **errp)
 {
-    Object *obj;
-
-    if (id[0] == '/') {
-        obj = object_resolve_path(id, NULL);
-    } else {
-        char *root_path = object_get_canonical_path(qdev_get_peripheral());
-        char *path = g_strdup_printf("%s/%s", root_path, id);
-
-        g_free(root_path);
-        obj = object_resolve_path_type(path, TYPE_DEVICE, NULL);
-        g_free(path);
-    }
+    Object *obj = object_resolve_path_at(qdev_get_peripheral(), id);
+    DeviceState *dev;
 
     if (!obj) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
@@ -841,12 +880,13 @@ static DeviceState *find_device_state(const char *id, Error **errp)
         return NULL;
     }
 
-    if (!object_dynamic_cast(obj, TYPE_DEVICE)) {
+    dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
+    if (!dev) {
         error_setg(errp, "%s is not a hotpluggable device", id);
         return NULL;
     }
 
-    return DEVICE(obj);
+    return dev;
 }
 
 void qdev_unplug(DeviceState *dev, Error **errp)
@@ -897,7 +937,9 @@ void qmp_device_del(const char *id, Error **errp)
 {
     DeviceState *dev = find_device_state(id, errp);
     if (dev != NULL) {
-        if (dev->pending_deleted_event) {
+        if (dev->pending_deleted_event &&
+            (dev->pending_deleted_expires_ms == 0 ||
+             dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) {
             error_setg(errp, "Device %s is already in the "
                              "process of unplug", id);
             return;
diff --git a/softmmu/qemu-seccomp.c b/softmmu/qemu-seccomp.c
index 9c29d9cf007..f50026778cf 100644
--- a/softmmu/qemu-seccomp.c
+++ b/softmmu/qemu-seccomp.c
@@ -97,17 +97,11 @@ static const struct QemuSeccompSyscall denylist[] = {
     { SCMP_SYS(vfork),                  QEMU_SECCOMP_SET_SPAWN },
     { SCMP_SYS(execve),                 QEMU_SECCOMP_SET_SPAWN },
     /* resource control */
-    { SCMP_SYS(getpriority),            QEMU_SECCOMP_SET_RESOURCECTL },
     { SCMP_SYS(setpriority),            QEMU_SECCOMP_SET_RESOURCECTL },
     { SCMP_SYS(sched_setparam),         QEMU_SECCOMP_SET_RESOURCECTL },
-    { SCMP_SYS(sched_getparam),         QEMU_SECCOMP_SET_RESOURCECTL },
     { SCMP_SYS(sched_setscheduler),     QEMU_SECCOMP_SET_RESOURCECTL,
       ARRAY_SIZE(sched_setscheduler_arg), sched_setscheduler_arg },
-    { SCMP_SYS(sched_getscheduler),     QEMU_SECCOMP_SET_RESOURCECTL },
     { SCMP_SYS(sched_setaffinity),      QEMU_SECCOMP_SET_RESOURCECTL },
-    { SCMP_SYS(sched_getaffinity),      QEMU_SECCOMP_SET_RESOURCECTL },
-    { SCMP_SYS(sched_get_priority_max), QEMU_SECCOMP_SET_RESOURCECTL },
-    { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL },
 };
 
 static inline __attribute__((unused)) int
diff --git a/softmmu/qtest.c b/softmmu/qtest.c
index 130c3666154..72751e1fd8c 100644
--- a/softmmu/qtest.c
+++ b/softmmu/qtest.c
@@ -27,6 +27,8 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
+#include "qapi/qmp/qerror.h"
+#include "qom/object_interfaces.h"
 #include CONFIG_DEVICES
 #ifdef CONFIG_PSERIES
 #include "hw/ppc/spapr_rtas.h"
@@ -34,11 +36,25 @@
 
 #define MAX_IRQ 256
 
+#define TYPE_QTEST "qtest"
+
+OBJECT_DECLARE_SIMPLE_TYPE(QTest, QTEST)
+
+struct QTest {
+    Object parent;
+
+    bool has_machine_link;
+    char *chr_name;
+    Chardev *chr;
+    CharBackend qtest_chr;
+    char *log;
+};
+
 bool qtest_allowed;
 
 static DeviceState *irq_intercept_dev;
 static FILE *qtest_log_fp;
-static CharBackend qtest_chr;
+static QTest *qtest;
 static GString *inbuf;
 static int irq_levels[MAX_IRQ];
 static qemu_timeval start_time;
@@ -320,7 +336,7 @@ static void qtest_irq_handler(void *opaque, int n, int level)
     qemu_set_irq(old_irq, level);
 
     if (irq_levels[n] != level) {
-        CharBackend *chr = &qtest_chr;
+        CharBackend *chr = &qtest->qtest_chr;
         irq_levels[n] = level;
         qtest_send_prefix(chr);
         qtest_sendf(chr, "IRQ %s %d\n",
@@ -849,18 +865,39 @@ static void qtest_event(void *opaque, QEMUChrEvent event)
         break;
     }
 }
+
 void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
 {
+    ERRP_GUARD();
     Chardev *chr;
+    Object *qtest;
 
     chr = qemu_chr_new("qtest", qtest_chrdev, NULL);
-
     if (chr == NULL) {
         error_setg(errp, "Failed to initialize device for qtest: \"%s\"",
                    qtest_chrdev);
         return;
     }
 
+    qtest = object_new(TYPE_QTEST);
+    object_property_set_str(qtest, "chardev", "qtest", &error_abort);
+    if (qtest_log) {
+        object_property_set_str(qtest, "log", qtest_log, &error_abort);
+    }
+    object_property_add_child(qdev_get_machine(), "qtest", qtest);
+    user_creatable_complete(USER_CREATABLE(qtest), errp);
+    if (*errp) {
+        object_unparent(qtest);
+    }
+    object_unref(OBJECT(chr));
+    object_unref(qtest);
+}
+
+static bool qtest_server_start(QTest *q, Error **errp)
+{
+    Chardev *chr = q->chr;
+    const char *qtest_log = q->log;
+
     if (qtest_log) {
         if (strcmp(qtest_log, "none") != 0) {
             qtest_log_fp = fopen(qtest_log, "w+");
@@ -869,16 +906,20 @@ void qtest_server_init(const char *qtest_chrdev, const char *qtest_log, Error **
         qtest_log_fp = stderr;
     }
 
-    qemu_chr_fe_init(&qtest_chr, chr, errp);
-    qemu_chr_fe_set_handlers(&qtest_chr, qtest_can_read, qtest_read,
-                             qtest_event, NULL, &qtest_chr, NULL, true);
-    qemu_chr_fe_set_echo(&qtest_chr, true);
+    if (!qemu_chr_fe_init(&q->qtest_chr, chr, errp)) {
+        return false;
+    }
+    qemu_chr_fe_set_handlers(&q->qtest_chr, qtest_can_read, qtest_read,
+                             qtest_event, NULL, &q->qtest_chr, NULL, true);
+    qemu_chr_fe_set_echo(&q->qtest_chr, true);
 
     inbuf = g_string_new("");
 
     if (!qtest_server_send) {
-        qtest_server_set_send_handler(qtest_server_char_be_send, &qtest_chr);
+        qtest_server_set_send_handler(qtest_server_char_be_send, &q->qtest_chr);
     }
+    qtest = q;
+    return true;
 }
 
 void qtest_server_set_send_handler(void (*send)(void*, const char*),
@@ -890,7 +931,7 @@ void qtest_server_set_send_handler(void (*send)(void*, const char*),
 
 bool qtest_driver(void)
 {
-    return qtest_chr.chr != NULL;
+    return qtest && qtest->qtest_chr.chr != NULL;
 }
 
 void qtest_server_inproc_recv(void *dummy, const char *buf)
@@ -905,3 +946,129 @@ void qtest_server_inproc_recv(void *dummy, const char *buf)
         g_string_truncate(gstr, 0);
     }
 }
+
+static void qtest_complete(UserCreatable *uc, Error **errp)
+{
+    QTest *q = QTEST(uc);
+    if (qtest) {
+        error_setg(errp, "Only one instance of qtest can be created");
+        return;
+    }
+    if (!q->chr_name) {
+        error_setg(errp, "No backend specified");
+        return;
+    }
+
+    if (OBJECT(uc)->parent != qdev_get_machine()) {
+        q->has_machine_link = true;
+        object_property_add_const_link(qdev_get_machine(), "qtest", OBJECT(uc));
+    } else {
+        /* -qtest was used.  */
+    }
+
+    qtest_server_start(q, errp);
+}
+
+static void qtest_unparent(Object *obj)
+{
+    QTest *q = QTEST(obj);
+
+    if (qtest == q) {
+        qemu_chr_fe_disconnect(&q->qtest_chr);
+        assert(!qtest_opened);
+        qemu_chr_fe_deinit(&q->qtest_chr, false);
+        if (qtest_log_fp) {
+            fclose(qtest_log_fp);
+            qtest_log_fp = NULL;
+        }
+        qtest = NULL;
+    }
+
+    if (q->has_machine_link) {
+        object_property_del(qdev_get_machine(), "qtest");
+        q->has_machine_link = false;
+    }
+}
+
+static void qtest_set_log(Object *obj, const char *value, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    if (qtest == q) {
+        error_setg(errp, QERR_PERMISSION_DENIED);
+    } else {
+        g_free(q->log);
+        q->log = g_strdup(value);
+    }
+}
+
+static char *qtest_get_log(Object *obj, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    return g_strdup(q->log);
+}
+
+static void qtest_set_chardev(Object *obj, const char *value, Error **errp)
+{
+    QTest *q = QTEST(obj);
+    Chardev *chr;
+
+    if (qtest == q) {
+        error_setg(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    chr = qemu_chr_find(value);
+    if (!chr) {
+        error_setg(errp, "Cannot find character device '%s'", value);
+        return;
+    }
+
+    g_free(q->chr_name);
+    q->chr_name = g_strdup(value);
+
+    if (q->chr) {
+        object_unref(q->chr);
+    }
+    q->chr = chr;
+    object_ref(chr);
+}
+
+static char *qtest_get_chardev(Object *obj, Error **errp)
+{
+    QTest *q = QTEST(obj);
+
+    return g_strdup(q->chr_name);
+}
+
+static void qtest_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    oc->unparent = qtest_unparent;
+    ucc->complete = qtest_complete;
+
+    object_class_property_add_str(oc, "chardev",
+                                  qtest_get_chardev, qtest_set_chardev);
+    object_class_property_add_str(oc, "log",
+                                  qtest_get_log, qtest_set_log);
+}
+
+static const TypeInfo qtest_info = {
+    .name = TYPE_QTEST,
+    .parent = TYPE_OBJECT,
+    .class_init = qtest_class_init,
+    .instance_size = sizeof(QTest),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&qtest_info);
+}
+
+type_init(register_types);
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index ce8977c6a29..10d9b7365aa 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -126,6 +126,7 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
 
     { RUN_STATE_COLO, RUN_STATE_RUNNING },
+    { RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
 
     { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
     { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
@@ -746,7 +747,7 @@ static void qemu_run_exit_notifiers(void)
 
 void qemu_init_subsystems(void)
 {
-    Error *err;
+    Error *err = NULL;
 
     os_set_line_buffering();
 
diff --git a/softmmu/timers-state.h b/softmmu/timers-state.h
index 8c262ce139c..94bb7394c5d 100644
--- a/softmmu/timers-state.h
+++ b/softmmu/timers-state.h
@@ -47,7 +47,7 @@ typedef struct TimersState {
     int64_t last_delta;
 
     /* Compensate for varying guest execution speed.  */
-    int64_t qemu_icount_bias;
+    aligned_int64_t qemu_icount_bias;
 
     int64_t vm_clock_warp_start;
     int64_t cpu_clock_offset;
diff --git a/softmmu/trace-events b/softmmu/trace-events
index b80ca042e1f..9c88887b3c6 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # balloon.c
 # Since requests are raised via monitor, not many tracepoints are needed.
@@ -9,15 +9,20 @@ cpu_in(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
 cpu_out(unsigned int addr, char size, unsigned int val) "addr 0x%x(%c) value %u"
 
 # memory.c
-memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
-memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
+memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
 memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
 flatview_new(void *view, void *root) "%p (root %p)"
 flatview_destroy(void *view, void *root) "%p (root %p)"
 flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
+global_dirty_changed(unsigned int bitmask) "bitmask 0x%"PRIx32
+
+# softmmu.c
+vm_stop_flush_all(int ret) "ret %d"
 
 # vl.c
 vm_state_notify(int running, int reason, const char *reason_str) "running %d reason %d (%s)"
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 1d52167be06..1f36462e180 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -28,11 +28,11 @@
 #include "qemu/datadir.h"
 #include "qemu/units.h"
 #include "exec/cpu-common.h"
-#include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "qapi/compat-policy.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu-version.h"
 #include "qemu/cutils.h"
@@ -91,7 +91,7 @@
 #include "qapi/qobject-input-visitor.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
-#include "qemu-options.h"
+#include "qemu/qemu-options.h"
 #include "qemu/main-loop.h"
 #ifdef CONFIG_VIRTFS
 #include "fsdev/qemu-fsdev.h"
@@ -124,10 +124,13 @@
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qapi-visit-qom.h"
 #include "qapi/qapi-commands-ui.h"
+#include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
 #include "sysemu/iothread.h"
 #include "qemu/guest-random.h"
 
+#include "config-host.h"
+
 #define MAX_VIRTIO_CONSOLES 1
 
 typedef struct BlockdevOptionsQueueEntry {
@@ -143,11 +146,20 @@ typedef struct ObjectOption {
     QTAILQ_ENTRY(ObjectOption) next;
 } ObjectOption;
 
+typedef struct DeviceOption {
+    QDict *opts;
+    Location loc;
+    QTAILQ_ENTRY(DeviceOption) next;
+} DeviceOption;
+
 static const char *cpu_option;
 static const char *mem_path;
 static const char *incoming;
 static const char *loadvm;
+static const char *accelerators;
+static QDict *machine_opts_dict;
 static QTAILQ_HEAD(, ObjectOption) object_opts = QTAILQ_HEAD_INITIALIZER(object_opts);
+static QTAILQ_HEAD(, DeviceOption) device_opts = QTAILQ_HEAD_INITIALIZER(device_opts);
 static ram_addr_t maxram_size;
 static uint64_t ram_slots;
 static int display_remote;
@@ -255,6 +267,7 @@ static struct {
     { .driver = "virtio-vga",           .flag = &default_vga       },
     { .driver = "ati-vga",              .flag = &default_vga       },
     { .driver = "vhost-user-vga",       .flag = &default_vga       },
+    { .driver = "virtio-vga-gl",        .flag = &default_vga       },
 };
 
 static QemuOptsList qemu_rtc_opts = {
@@ -292,21 +305,6 @@ static QemuOptsList qemu_option_rom_opts = {
     },
 };
 
-static QemuOptsList qemu_machine_opts = {
-    .name = "machine",
-    .implied_opt_name = "type",
-    .merge_lists = true,
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_machine_opts.head),
-    .desc = {
-        /*
-         * no elements => accept any
-         * sanity checking will happen later
-         * when setting machine properties
-         */
-        { }
-    },
-};
-
 static QemuOptsList qemu_accel_opts = {
     .name = "accel",
     .implied_opt_name = "accel",
@@ -555,36 +553,44 @@ static QemuOptsList qemu_action_opts = {
     },
 };
 
-/**
- * Get machine options
- *
- * Returns: machine options (never null).
- */
-static QemuOpts *qemu_get_machine_opts(void)
-{
-    return qemu_find_opts_singleton("machine");
-}
-
 const char *qemu_get_vm_name(void)
 {
     return qemu_name;
 }
 
-static int default_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+static void default_driver_disable(const char *driver)
 {
-    const char *driver = qemu_opt_get(opts, "driver");
     int i;
 
-    if (!driver)
-        return 0;
+    if (!driver) {
+        return;
+    }
+
     for (i = 0; i < ARRAY_SIZE(default_list); i++) {
         if (strcmp(default_list[i].driver, driver) != 0)
             continue;
         *(default_list[i].flag) = 0;
     }
+}
+
+static int default_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+    const char *driver = qemu_opt_get(opts, "driver");
+
+    default_driver_disable(driver);
     return 0;
 }
 
+static void default_driver_check_json(void)
+{
+    DeviceOption *opt;
+
+    QTAILQ_FOREACH(opt, &device_opts, next) {
+        const char *driver = qdict_get_try_str(opt->opts, "driver");
+        default_driver_disable(driver);
+    }
+}
+
 static int parse_name(void *opaque, QemuOpts *opts, Error **errp)
 {
     const char *proc_name;
@@ -872,33 +878,6 @@ static MachineClass *find_default_machine(GSList *machines)
     return default_machineclass;
 }
 
-static int machine_help_func(QemuOpts *opts, MachineState *machine)
-{
-    ObjectProperty *prop;
-    ObjectPropertyIterator iter;
-
-    if (!qemu_opt_has_help_opt(opts)) {
-        return 0;
-    }
-
-    object_property_iter_init(&iter, OBJECT(machine));
-    while ((prop = object_property_iter_next(&iter))) {
-        if (!prop->set) {
-            continue;
-        }
-
-        printf("%s.%s=%s", MACHINE_GET_CLASS(machine)->name,
-               prop->name, prop->type);
-        if (prop->description) {
-            printf(" (%s)\n", prop->description);
-        } else {
-            printf("\n");
-        }
-    }
-
-    return 1;
-}
-
 static void version(void)
 {
     printf("QEMU emulator version " QEMU_FULL_VERSION "\n"
@@ -931,8 +910,17 @@ static void help(int exitcode)
            "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n",
             error_get_progname());
 
-#define QEMU_OPTIONS_GENERATE_HELP
-#include "qemu-options-wrapper.h"
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)    \
+    if ((arch_mask) & arch_type)                               \
+        fputs(opt_help, stdout);
+
+#define ARCHHEADING(text, arch_mask) \
+    if ((arch_mask) & arch_type)    \
+        puts(stringify(text));
+
+#define DEFHEADING(text) ARCHHEADING(text, QEMU_ARCH_ALL)
+
+#include "qemu-options.def"
 
     printf("\nDuring emulation, the following keys are useful:\n"
            "ctrl-alt-f      toggle full screen\n"
@@ -957,8 +945,13 @@ typedef struct QEMUOption {
 
 static const QEMUOption qemu_options[] = {
     { "h", 0, QEMU_OPTION_h, QEMU_ARCH_ALL },
-#define QEMU_OPTIONS_GENERATE_OPTIONS
-#include "qemu-options-wrapper.h"
+
+#define DEF(option, opt_arg, opt_enum, opt_help, arch_mask)     \
+    { option, opt_arg, opt_enum, arch_mask },
+#define DEFHEADING(text)
+#define ARCHHEADING(text, arch_mask)
+
+#include "qemu-options.def"
     { NULL },
 };
 
@@ -1124,17 +1117,25 @@ static void parse_display(const char *p)
          * sdl DisplayType needs hand-crafted parser instead of
          * parse_display_qapi() due to some options not in
          * DisplayOptions, specifically:
-         *   - frame
-         *     Already deprecated.
          *   - ctrl_grab + alt_grab
-         *     Not clear yet what happens to them long-term.  Should
-         *     replaced by something better or deprecated and dropped.
+         *     They can't be moved into the QAPI since they use underscores,
+         *     thus they will get replaced by "grab-mod" in the long term
          */
+#if defined(CONFIG_SDL)
         dpy.type = DISPLAY_TYPE_SDL;
         while (*opts) {
             const char *nextopt;
 
-            if (strstart(opts, ",alt_grab=", &nextopt)) {
+            if (strstart(opts, ",grab-mod=", &nextopt)) {
+                opts = nextopt;
+                if (strstart(opts, "lshift-lctrl-lalt", &nextopt)) {
+                    alt_grab = 1;
+                } else if (strstart(opts, "rctrl", &nextopt)) {
+                    ctrl_grab = 1;
+                } else {
+                    goto invalid_sdl_args;
+                }
+            } else if (strstart(opts, ",alt_grab=", &nextopt)) {
                 opts = nextopt;
                 if (strstart(opts, "on", &nextopt)) {
                     alt_grab = 1;
@@ -1143,6 +1144,7 @@ static void parse_display(const char *p)
                 } else {
                     goto invalid_sdl_args;
                 }
+                warn_report("alt_grab is deprecated, use grab-mod instead.");
             } else if (strstart(opts, ",ctrl_grab=", &nextopt)) {
                 opts = nextopt;
                 if (strstart(opts, "on", &nextopt)) {
@@ -1152,7 +1154,13 @@ static void parse_display(const char *p)
                 } else {
                     goto invalid_sdl_args;
                 }
-            } else if (strstart(opts, ",window_close=", &nextopt)) {
+                warn_report("ctrl_grab is deprecated, use grab-mod instead.");
+            } else if (strstart(opts, ",window_close=", &nextopt) ||
+                       strstart(opts, ",window-close=", &nextopt)) {
+                if (strstart(opts, ",window_close=", NULL)) {
+                    warn_report("window_close with an underscore is deprecated,"
+                                " please use window-close instead.");
+                }
                 opts = nextopt;
                 dpy.has_window_close = true;
                 if (strstart(opts, "on", &nextopt)) {
@@ -1193,6 +1201,10 @@ static void parse_display(const char *p)
             }
             opts = nextopt;
         }
+#else
+        error_report("SDL display supported is not available in this binary");
+        exit(1);
+#endif
     } else if (strstart(p, "vnc", &opts)) {
         /*
          * vnc isn't a (local) DisplayType but a protocol for remote
@@ -1400,6 +1412,7 @@ static void qemu_disable_default_devices(void)
 {
     MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
 
+    default_driver_check_json();
     qemu_opts_foreach(qemu_find_opts("device"),
                       default_driver_check, NULL, NULL);
     qemu_opts_foreach(qemu_find_opts("global"),
@@ -1608,33 +1621,60 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b)
                   object_class_get_name(OBJECT_CLASS(mc1)));
 }
 
-static MachineClass *machine_parse(const char *name, GSList *machines)
+static void machine_help_func(const QDict *qdict)
 {
-    MachineClass *mc;
-    GSList *el;
+    GSList *machines, *el;
+    const char *type = qdict_get_try_str(qdict, "type");
 
-    if (is_help_option(name)) {
-        printf("Supported machines are:\n");
-        machines = g_slist_sort(machines, machine_class_cmp);
-        for (el = machines; el; el = el->next) {
-            MachineClass *mc = el->data;
-            if (mc->alias) {
-                printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, mc->name);
-            }
-            printf("%-20s %s%s%s\n", mc->name, mc->desc,
-                   mc->is_default ? " (default)" : "",
-                   mc->deprecation_reason ? " (deprecated)" : "");
+    machines = object_class_get_list(TYPE_MACHINE, false);
+    if (type) {
+        ObjectClass *machine_class = OBJECT_CLASS(find_machine(type, machines));
+        if (machine_class) {
+            type_print_class_properties(object_class_get_name(machine_class));
+            return;
         }
-        exit(0);
     }
 
-    mc = find_machine(name, machines);
-    if (!mc) {
-        error_report("unsupported machine type");
-        error_printf("Use -machine help to list supported machines\n");
-        exit(1);
+    printf("Supported machines are:\n");
+    machines = g_slist_sort(machines, machine_class_cmp);
+    for (el = machines; el; el = el->next) {
+        MachineClass *mc = el->data;
+        if (mc->alias) {
+            printf("%-20s %s (alias of %s)\n", mc->alias, mc->desc, mc->name);
+        }
+        printf("%-20s %s%s%s\n", mc->name, mc->desc,
+               mc->is_default ? " (default)" : "",
+               mc->deprecation_reason ? " (deprecated)" : "");
     }
-    return mc;
+}
+
+static void
+machine_merge_property(const char *propname, QDict *prop, Error **errp)
+{
+    QDict *opts;
+
+    opts = qdict_new();
+    /* Preserve the caller's reference to prop.  */
+    qobject_ref(prop);
+    qdict_put(opts, propname, prop);
+    keyval_merge(machine_opts_dict, opts, errp);
+    qobject_unref(opts);
+}
+
+static void
+machine_parse_property_opt(QemuOptsList *opts_list, const char *propname,
+                           const char *arg)
+{
+    QDict *prop = NULL;
+    bool help = false;
+
+    prop = keyval_parse(arg, opts_list->implied_opt_name, &help, &error_fatal);
+    if (help) {
+        qemu_opts_print_help(opts_list, true);
+        exit(0);
+    }
+    machine_merge_property(propname, prop, &error_fatal);
+    qobject_unref(prop);
 }
 
 static const char *pid_file;
@@ -1687,32 +1727,31 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
     return popt;
 }
 
-static MachineClass *select_machine(void)
+static MachineClass *select_machine(QDict *qdict, Error **errp)
 {
+    const char *optarg = qdict_get_try_str(qdict, "type");
     GSList *machines = object_class_get_list(TYPE_MACHINE, false);
-    MachineClass *machine_class = find_default_machine(machines);
-    const char *optarg;
-    QemuOpts *opts;
-    Location loc;
-
-    loc_push_none(&loc);
-
-    opts = qemu_get_machine_opts();
-    qemu_opts_loc_restore(opts);
+    MachineClass *machine_class;
+    Error *local_err = NULL;
 
-    optarg = qemu_opt_get(opts, "type");
     if (optarg) {
-        machine_class = machine_parse(optarg, machines);
-    }
-
-    if (!machine_class) {
-        error_report("No machine specified, and there is no default");
-        error_printf("Use -machine help to list supported machines\n");
-        exit(1);
+        machine_class = find_machine(optarg, machines);
+        qdict_del(qdict, "type");
+        if (!machine_class) {
+            error_setg(&local_err, "unsupported machine type");
+        }
+    } else {
+        machine_class = find_default_machine(machines);
+        if (!machine_class) {
+            error_setg(&local_err, "No machine specified, and there is no default");
+        }
     }
 
-    loc_pop(&loc);
     g_slist_free(machines);
+    if (local_err) {
+        error_append_hint(&local_err, "Use -machine help to list supported machines\n");
+        error_propagate(errp, local_err);
+    }
     return machine_class;
 }
 
@@ -1731,42 +1770,70 @@ static int object_parse_property_opt(Object *obj,
     return 0;
 }
 
-static int machine_set_property(void *opaque,
-                                const char *name, const char *value,
-                                Error **errp)
+/* *Non*recursively replace underscores with dashes in QDict keys.  */
+static void keyval_dashify(QDict *qdict, Error **errp)
 {
-    g_autofree char *qom_name = g_strdup(name);
+    const QDictEntry *ent, *next;
     char *p;
 
-    for (p = qom_name; *p; p++) {
-        if (*p == '_') {
-            *p = '-';
+    for (ent = qdict_first(qdict); ent; ent = next) {
+        g_autofree char *new_key = NULL;
+
+        next = qdict_next(qdict, ent);
+        if (!strchr(ent->key, '_')) {
+            continue;
         }
+        new_key = g_strdup(ent->key);
+        for (p = new_key; *p; p++) {
+            if (*p == '_') {
+                *p = '-';
+            }
+        }
+        if (qdict_haskey(qdict, new_key)) {
+            error_setg(errp, "Conflict between '%s' and '%s'", ent->key, new_key);
+            return;
+        }
+        qobject_ref(ent->value);
+        qdict_put_obj(qdict, new_key, ent->value);
+        qdict_del(qdict, ent->key);
     }
+}
+
+static void qemu_apply_legacy_machine_options(QDict *qdict)
+{
+    const char *value;
+
+    keyval_dashify(qdict, &error_fatal);
 
     /* Legacy options do not correspond to MachineState properties.  */
-    if (g_str_equal(qom_name, "accel")) {
-        return 0;
+    value = qdict_get_try_str(qdict, "accel");
+    if (value) {
+        accelerators = g_strdup(value);
+        qdict_del(qdict, "accel");
     }
-    if (g_str_equal(qom_name, "igd-passthru")) {
-        object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), qom_name, value,
+
+    value = qdict_get_try_str(qdict, "igd-passthru");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), "igd-passthru", value,
                                    false);
-        return 0;
+        qdict_del(qdict, "igd-passthru");
     }
-    if (g_str_equal(qom_name, "kvm-shadow-mem")) {
-        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), qom_name, value,
+
+    value = qdict_get_try_str(qdict, "kvm-shadow-mem");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kvm-shadow-mem", value,
                                    false);
-        return 0;
+        qdict_del(qdict, "kvm-shadow-mem");
     }
-    if (g_str_equal(qom_name, "kernel-irqchip")) {
-        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), qom_name, value,
+
+    value = qdict_get_try_str(qdict, "kernel-irqchip");
+    if (value) {
+        object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), "kernel-irqchip", value,
                                    false);
-        object_register_sugar_prop(ACCEL_CLASS_NAME("whpx"), qom_name, value,
+        object_register_sugar_prop(ACCEL_CLASS_NAME("whpx"), "kernel-irqchip", value,
                                    false);
-        return 0;
+        qdict_del(qdict, "kernel-irqchip");
     }
-
-    return object_parse_property_opt(opaque, name, value, "type", errp);
 }
 
 static void object_option_foreach_add(bool (*type_opt_predicate)(const char *))
@@ -1784,9 +1851,15 @@ static void object_option_foreach_add(bool (*type_opt_predicate)(const char *))
     }
 }
 
+static void object_option_add_visitor(Visitor *v)
+{
+    ObjectOption *opt = g_new0(ObjectOption, 1);
+    visit_type_ObjectOptions(v, NULL, &opt->opts, &error_fatal);
+    QTAILQ_INSERT_TAIL(&object_opts, opt, next);
+}
+
 static void object_option_parse(const char *optarg)
 {
-    ObjectOption *opt;
     QemuOpts *opts;
     const char *type;
     Visitor *v;
@@ -1814,11 +1887,8 @@ static void object_option_parse(const char *optarg)
         v = opts_visitor_new(opts);
     }
 
-    opt = g_new0(ObjectOption, 1);
-    visit_type_ObjectOptions(v, NULL, &opt->opts, &error_fatal);
+    object_option_add_visitor(v);
     visit_free(v);
-
-    QTAILQ_INSERT_TAIL(&object_opts, opt, next);
 }
 
 /*
@@ -1835,8 +1905,9 @@ static bool object_create_early(const char *type)
      * add one, state the reason in a comment!
      */
 
-    /* Reason: rng-egd property "chardev" */
-    if (g_str_equal(type, "rng-egd")) {
+    /* Reason: property "chardev" */
+    if (g_str_equal(type, "rng-egd") ||
+        g_str_equal(type, "qtest")) {
         return false;
     }
 
@@ -1877,16 +1948,14 @@ static bool object_create_early(const char *type)
     return true;
 }
 
-static void qemu_apply_machine_options(void)
+static void qemu_apply_machine_options(QDict *qdict)
 {
     MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
-    QemuOpts *machine_opts = qemu_get_machine_opts();
     const char *boot_order = NULL;
     const char *boot_once = NULL;
     QemuOpts *opts;
 
-    qemu_opt_foreach(machine_opts, machine_set_property, current_machine,
-                     &error_fatal);
+    object_set_properties_from_keyval(OBJECT(current_machine), qdict, false, &error_fatal);
     current_machine->ram_size = ram_size;
     current_machine->maxram_size = maxram_size;
     current_machine->ram_slots = ram_slots;
@@ -1915,23 +1984,36 @@ static void qemu_apply_machine_options(void)
     current_machine->boot_once = boot_once;
 
     if (semihosting_enabled() && !semihosting_get_argc()) {
-        const char *kernel_filename = qemu_opt_get(machine_opts, "kernel");
-        const char *kernel_cmdline = qemu_opt_get(machine_opts, "append") ?: "";
         /* fall back to the -kernel/-append */
-        semihosting_arg_fallback(kernel_filename, kernel_cmdline);
+        semihosting_arg_fallback(current_machine->kernel_filename, current_machine->kernel_cmdline);
+    }
+
+    if (current_machine->smp.cpus > 1) {
+        Error *blocker = NULL;
+        error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp");
+        replay_add_blocker(blocker);
     }
 }
 
 static void qemu_create_early_backends(void)
 {
     MachineClass *machine_class = MACHINE_GET_CLASS(current_machine);
+#if defined(CONFIG_SDL)
+    const bool use_sdl = (dpy.type == DISPLAY_TYPE_SDL);
+#else
+    const bool use_sdl = false;
+#endif
+#if defined(CONFIG_GTK)
+    const bool use_gtk = (dpy.type == DISPLAY_TYPE_GTK);
+#else
+    const bool use_gtk = false;
+#endif
 
-    if ((alt_grab || ctrl_grab) && dpy.type != DISPLAY_TYPE_SDL) {
+    if ((alt_grab || ctrl_grab) && !use_sdl) {
         error_report("-alt-grab and -ctrl-grab are only valid "
                      "for SDL, ignoring option");
     }
-    if (dpy.has_window_close &&
-        (dpy.type != DISPLAY_TYPE_GTK && dpy.type != DISPLAY_TYPE_SDL)) {
+    if (dpy.has_window_close && !use_gtk && !use_sdl) {
         error_report("-no-quit is only valid for GTK and SDL, "
                      "ignoring option");
     }
@@ -1965,8 +2047,7 @@ static void qemu_create_early_backends(void)
 
     /*
      * Note: we need to create audio and block backends before
-     * machine_set_property(), so machine properties can refer to
-     * them.
+     * setting machine properties, so they can be referred to.
      */
     configure_blockdev(&bdo_queue, machine_class, snapshot);
     audio_init_audiodevs();
@@ -2101,8 +2182,6 @@ static void set_memory_options(MachineClass *mc)
         exit(EXIT_FAILURE);
     }
 
-    /* store value for the future use */
-    qemu_opt_set_number(opts, "size", ram_size, &error_abort);
     maxram_size = ram_size;
 
     if (qemu_opt_get(opts, "maxmem")) {
@@ -2134,16 +2213,14 @@ static void set_memory_options(MachineClass *mc)
     loc_pop(&loc);
 }
 
-static void qemu_create_machine(MachineClass *machine_class)
+static void qemu_create_machine(QDict *qdict)
 {
+    MachineClass *machine_class = select_machine(qdict, &error_fatal);
     object_set_machine_compat_props(machine_class->compat_props);
 
     set_memory_options(machine_class);
 
     current_machine = MACHINE(object_new_with_class(OBJECT_CLASS(machine_class)));
-    if (machine_help_func(qemu_get_machine_opts(), current_machine)) {
-        exit(0);
-    }
     object_property_add_child(object_get_root(), "machine",
                               OBJECT(current_machine));
     object_property_add_child(container_get(OBJECT(current_machine),
@@ -2166,16 +2243,18 @@ static void qemu_create_machine(MachineClass *machine_class)
         qemu_set_hw_version(machine_class->hw_version);
     }
 
-    machine_smp_parse(current_machine,
-        qemu_opts_find(qemu_find_opts("smp-opts"), NULL), &error_fatal);
-
     /*
      * Get the default machine options from the machine if it is not already
      * specified either by the configuration file or by the command line.
      */
     if (machine_class->default_machine_opts) {
-        qemu_opts_set_defaults(qemu_find_opts("machine"),
-                               machine_class->default_machine_opts, 0);
+        QDict *default_opts =
+            keyval_parse(machine_class->default_machine_opts, NULL, NULL,
+                         &error_abort);
+        qemu_apply_legacy_machine_options(default_opts);
+        object_set_properties_from_keyval(OBJECT(current_machine), default_opts,
+                                          false, &error_abort);
+        qobject_unref(default_opts);
     }
 }
 
@@ -2191,13 +2270,78 @@ static int global_init_func(void *opaque, QemuOpts *opts, Error **errp)
     return 0;
 }
 
+/*
+ * Return whether configuration group @group is stored in QemuOpts, or
+ * recorded as one or more QDicts by qemu_record_config_group.
+ */
+static bool is_qemuopts_group(const char *group)
+{
+    if (g_str_equal(group, "object") ||
+        g_str_equal(group, "machine") ||
+        g_str_equal(group, "smp-opts")) {
+        return false;
+    }
+    return true;
+}
+
+static void qemu_record_config_group(const char *group, QDict *dict,
+                                     bool from_json, Error **errp)
+{
+    if (g_str_equal(group, "object")) {
+        Visitor *v = qobject_input_visitor_new_keyval(QOBJECT(dict));
+        object_option_add_visitor(v);
+        visit_free(v);
+    } else if (g_str_equal(group, "machine")) {
+        /*
+         * Cannot merge string-valued and type-safe dictionaries, so JSON
+         * is not accepted yet for -M.
+         */
+        assert(!from_json);
+        keyval_merge(machine_opts_dict, dict, errp);
+    } else if (g_str_equal(group, "smp-opts")) {
+        machine_merge_property("smp", dict, &error_fatal);
+    } else {
+        abort();
+    }
+}
+
+/*
+ * Parse non-QemuOpts config file groups, pass the rest to
+ * qemu_config_do_parse.
+ */
+static void qemu_parse_config_group(const char *group, QDict *qdict,
+                                    void *opaque, Error **errp)
+{
+    QObject *crumpled;
+    if (is_qemuopts_group(group)) {
+        qemu_config_do_parse(group, qdict, opaque, errp);
+        return;
+    }
+
+    crumpled = qdict_crumple(qdict, errp);
+    if (!crumpled) {
+        return;
+    }
+    switch (qobject_type(crumpled)) {
+    case QTYPE_QDICT:
+        qemu_record_config_group(group, qobject_to(QDict, crumpled), false, errp);
+        break;
+    case QTYPE_QLIST:
+        error_setg(errp, "Lists cannot be at top level of a configuration section");
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    qobject_unref(crumpled);
+}
+
 static void qemu_read_default_config_file(Error **errp)
 {
     ERRP_GUARD();
     int ret;
     g_autofree char *file = get_relocated_path(CONFIG_QEMU_CONFDIR "/qemu.conf");
 
-    ret = qemu_read_config_file(file, errp);
+    ret = qemu_read_config_file(file, qemu_parse_config_group, errp);
     if (ret < 0) {
         if (ret == -ENOENT) {
             error_free(*errp);
@@ -2206,9 +2350,8 @@ static void qemu_read_default_config_file(Error **errp)
     }
 }
 
-static int qemu_set_option(const char *str)
+static void qemu_set_option(const char *str, Error **errp)
 {
-    Error *local_err = NULL;
     char group[64], id[64], arg[64];
     QemuOptsList *list;
     QemuOpts *opts;
@@ -2216,27 +2359,23 @@ static int qemu_set_option(const char *str)
 
     rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
     if (rc < 3 || str[offset] != '=') {
-        error_report("can't parse: \"%s\"", str);
-        return -1;
-    }
-
-    list = qemu_find_opts(group);
-    if (list == NULL) {
-        return -1;
-    }
-
-    opts = qemu_opts_find(list, id);
-    if (!opts) {
-        error_report("there is no %s \"%s\" defined",
-                     list->name, id);
-        return -1;
+        error_setg(errp, "can't parse: \"%s\"", str);
+        return;
     }
 
-    if (!qemu_opt_set(opts, arg, str + offset + 1, &local_err)) {
-        error_report_err(local_err);
-        return -1;
+    if (!is_qemuopts_group(group)) {
+        error_setg(errp, "-set is not supported with %s", group);
+    } else {
+        list = qemu_find_opts_err(group, errp);
+        if (list) {
+            opts = qemu_opts_find(list, id);
+            if (!opts) {
+                error_setg(errp, "there is no %s \"%s\" defined", group, id);
+                return;
+            }
+            qemu_opt_set(opts, arg, str + offset + 1, errp);
+        }
     }
-    return 0;
 }
 
 static void user_register_global_props(void)
@@ -2296,13 +2435,11 @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
 
 static void configure_accelerators(const char *progname)
 {
-    const char *accelerators;
     bool init_failed = false;
 
     qemu_opts_foreach(qemu_find_opts("icount"),
                       do_configure_icount, NULL, &error_fatal);
 
-    accelerators = qemu_opt_get(qemu_get_machine_opts(), "accel");
     if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
         char **accel_list, **tmp;
 
@@ -2397,12 +2534,11 @@ static void create_default_memdev(MachineState *ms, const char *path)
                             &error_fatal);
 }
 
-static void qemu_validate_options(void)
+static void qemu_validate_options(const QDict *machine_opts)
 {
-    QemuOpts *machine_opts = qemu_get_machine_opts();
-    const char *kernel_filename = qemu_opt_get(machine_opts, "kernel");
-    const char *initrd_filename = qemu_opt_get(machine_opts, "initrd");
-    const char *kernel_cmdline = qemu_opt_get(machine_opts, "append");
+    const char *kernel_filename = qdict_get_try_str(machine_opts, "kernel");
+    const char *initrd_filename = qdict_get_try_str(machine_opts, "initrd");
+    const char *kernel_cmdline = qdict_get_try_str(machine_opts, "append");
 
     if (kernel_filename == NULL) {
          if (kernel_cmdline != NULL) {
@@ -2437,13 +2573,15 @@ static void qemu_validate_options(void)
 static void qemu_process_sugar_options(void)
 {
     if (mem_prealloc) {
-        char *val;
-
-        val = g_strdup_printf("%d",
-                 (uint32_t) qemu_opt_get_number(qemu_find_opts_singleton("smp-opts"), "cpus", 1));
-        object_register_sugar_prop("memory-backend", "prealloc-threads", val,
-                                   false);
-        g_free(val);
+        QObject *smp = qdict_get(machine_opts_dict, "smp");
+        if (smp && qobject_type(smp) == QTYPE_QDICT) {
+            QObject *cpus = qdict_get(qobject_to(QDict, smp), "cpus");
+            if (cpus && qobject_type(cpus) == QTYPE_QSTRING) {
+                const char *val = qstring_get_str(qobject_to(QString, cpus));
+                object_register_sugar_prop("memory-backend", "prealloc-threads",
+                                           val, false);
+            }
+        }
         object_register_sugar_prop("memory-backend", "prealloc", "on", false);
     }
 
@@ -2545,7 +2683,7 @@ static void qemu_process_help_options(void)
 
 static void qemu_maybe_daemonize(const char *pid_file)
 {
-    Error *err;
+    Error *err = NULL;
 
     os_daemonize();
     rcu_disable_atfork();
@@ -2608,6 +2746,8 @@ static void qemu_init_board(void)
 
 static void qemu_create_cli_devices(void)
 {
+    DeviceOption *opt;
+
     soundhw_init();
 
     qemu_opts_foreach(qemu_find_opts("fw_cfg"),
@@ -2623,6 +2763,18 @@ static void qemu_create_cli_devices(void)
     rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE);
     qemu_opts_foreach(qemu_find_opts("device"),
                       device_init_func, NULL, &error_fatal);
+    QTAILQ_FOREACH(opt, &device_opts, next) {
+        loc_push_restore(&opt->loc);
+        /*
+         * TODO Eventually we should call qmp_device_add() here to make sure it
+         * behaves the same, but QMP still has to accept incorrectly typed
+         * options until libvirt is fixed and we want to be strict on the CLI
+         * from the start, so call qdev_device_add_from_qdict() directly for
+         * now.
+         */
+        qdev_device_add_from_qdict(opt->opts, true, &error_fatal);
+        loc_pop(&opt->loc);
+    }
     rom_reset_order_override();
 }
 
@@ -2673,12 +2825,7 @@ void qmp_x_exit_preconfig(Error **errp)
     qemu_machine_creation_done();
 
     if (loadvm) {
-        Error *local_err = NULL;
-        if (!load_snapshot(loadvm, NULL, false, NULL, &local_err)) {
-            error_report_err(local_err);
-            autostart = 0;
-            exit(1);
-        }
+        load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
     }
     if (replay_mode != REPLAY_MODE_NONE) {
         replay_vmstate_init();
@@ -2757,7 +2904,6 @@ void qemu_init(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_trace_opts);
     qemu_plugin_add_opts();
     qemu_add_opts(&qemu_option_rom_opts);
-    qemu_add_opts(&qemu_machine_opts);
     qemu_add_opts(&qemu_accel_opts);
     qemu_add_opts(&qemu_mem_opts);
     qemu_add_opts(&qemu_smp_opts);
@@ -2778,6 +2924,11 @@ void qemu_init(int argc, char **argv, char **envp)
     error_init(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
+#ifdef CONFIG_MODULES
+    module_init_info(qemu_modinfo);
+    module_allow_arch(TARGET_NAME);
+#endif
+
     qemu_init_subsystems();
 
     /* first pass of option parsing */
@@ -2798,6 +2949,7 @@ void qemu_init(int argc, char **argv, char **envp)
         }
     }
 
+    machine_opts_dict = qdict_new();
     if (userconfig) {
         qemu_read_default_config_file(&error_fatal);
     }
@@ -2852,8 +3004,7 @@ void qemu_init(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_set:
-                if (qemu_set_option(optarg) != 0)
-                    exit(1);
+                qemu_set_option(optarg, &error_fatal);
                 break;
             case QEMU_OPTION_global:
                 if (qemu_global_option(optarg) != 0)
@@ -2888,12 +3039,13 @@ void qemu_init(int argc, char **argv, char **envp)
                 parse_display(optarg);
                 break;
             case QEMU_OPTION_nographic:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "graphics=off", false);
+                qdict_put_str(machine_opts_dict, "graphics", "off");
                 nographic = true;
                 dpy.type = DISPLAY_TYPE_NONE;
                 break;
             case QEMU_OPTION_curses:
+                warn_report("-curses is deprecated, "
+                            "use -display curses instead.");
 #ifdef CONFIG_CURSES
                 dpy.type = DISPLAY_TYPE_CURSES;
 #else
@@ -2913,16 +3065,16 @@ void qemu_init(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_kernel:
-                qemu_opts_set(qemu_find_opts("machine"), "kernel", optarg, &error_abort);
+                qdict_put_str(machine_opts_dict, "kernel", optarg);
                 break;
             case QEMU_OPTION_initrd:
-                qemu_opts_set(qemu_find_opts("machine"), "initrd", optarg, &error_abort);
+                qdict_put_str(machine_opts_dict, "initrd", optarg);
                 break;
             case QEMU_OPTION_append:
-                qemu_opts_set(qemu_find_opts("machine"), "append", optarg, &error_abort);
+                qdict_put_str(machine_opts_dict, "append", optarg);
                 break;
             case QEMU_OPTION_dtb:
-                qemu_opts_set(qemu_find_opts("machine"), "dtb", optarg, &error_abort);
+                qdict_put_str(machine_opts_dict, "dtb", optarg);
                 break;
             case QEMU_OPTION_cdrom:
                 drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS);
@@ -3032,7 +3184,7 @@ void qemu_init(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_bios:
-                qemu_opts_set(qemu_find_opts("machine"), "firmware", optarg, &error_abort);
+                qdict_put_str(machine_opts_dict, "firmware", optarg);
                 break;
             case QEMU_OPTION_singlestep:
                 singlestep = 1;
@@ -3219,6 +3371,7 @@ void qemu_init(int argc, char **argv, char **envp)
                     error_report("only one watchdog option may be given");
                     exit(1);
                 }
+                warn_report("-watchdog is deprecated; use -device instead.");
                 watchdog = optarg;
                 break;
             case QEMU_OPTION_action:
@@ -3227,12 +3380,12 @@ void qemu_init(int argc, char **argv, char **envp)
                      exit(1);
                 }
                 break;
-            case QEMU_OPTION_watchdog_action:
-                if (select_watchdog_action(optarg) == -1) {
-                    error_report("unknown -watchdog-action parameter");
-                    exit(1);
-                }
+            case QEMU_OPTION_watchdog_action: {
+                QemuOpts *opts;
+                opts = qemu_opts_create(qemu_find_opts("action"), NULL, 0, &error_abort);
+                qemu_opt_set(opts, "watchdog", optarg, &error_abort);
                 break;
+            }
             case QEMU_OPTION_parallel:
                 add_device_config(DEV_PARALLEL, optarg);
                 default_parallel = 0;
@@ -3252,15 +3405,22 @@ void qemu_init(int argc, char **argv, char **envp)
                 break;
             case QEMU_OPTION_alt_grab:
                 alt_grab = 1;
+                warn_report("-alt-grab is deprecated, please use "
+                            "-display sdl,grab-mod=lshift-lctrl-lalt instead.");
                 break;
             case QEMU_OPTION_ctrl_grab:
                 ctrl_grab = 1;
+                warn_report("-ctrl-grab is deprecated, please use "
+                            "-display sdl,grab-mod=rctrl instead.");
                 break;
             case QEMU_OPTION_no_quit:
                 dpy.has_window_close = true;
                 dpy.window_close = false;
+                warn_report("-no-quit is deprecated, please use "
+                            "-display ...,window-close=off instead.");
                 break;
             case QEMU_OPTION_sdl:
+                warn_report("-sdl is deprecated, use -display sdl instead.");
 #ifdef CONFIG_SDL
                 dpy.type = DISPLAY_TYPE_SDL;
                 break;
@@ -3301,17 +3461,20 @@ void qemu_init(int argc, char **argv, char **envp)
                 preconfig_requested = true;
                 break;
             case QEMU_OPTION_enable_kvm:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "accel=kvm", false);
+                qdict_put_str(machine_opts_dict, "accel", "kvm");
                 break;
             case QEMU_OPTION_M:
             case QEMU_OPTION_machine:
-                olist = qemu_find_opts("machine");
-                opts = qemu_opts_parse_noisily(olist, optarg, true);
-                if (!opts) {
-                    exit(1);
+                {
+                    bool help;
+
+                    keyval_parse_into(machine_opts_dict, optarg, "type", &help, &error_fatal);
+                    if (help) {
+                        machine_help_func(machine_opts_dict);
+                        exit(EXIT_SUCCESS);
+                    }
+                    break;
                 }
-                break;
             case QEMU_OPTION_accel:
                 accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
                                                      optarg, true);
@@ -3338,36 +3501,39 @@ void qemu_init(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_usb:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "usb=on", false);
+                qdict_put_str(machine_opts_dict, "usb", "on");
                 break;
             case QEMU_OPTION_usbdevice:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "usb=on", false);
+                qdict_put_str(machine_opts_dict, "usb", "on");
                 add_device_config(DEV_USB, optarg);
                 break;
             case QEMU_OPTION_device:
-                if (!qemu_opts_parse_noisily(qemu_find_opts("device"),
-                                             optarg, true)) {
-                    exit(1);
+                if (optarg[0] == '{') {
+                    QObject *obj = qobject_from_json(optarg, &error_fatal);
+                    DeviceOption *opt = g_new0(DeviceOption, 1);
+                    opt->opts = qobject_to(QDict, obj);
+                    loc_save(&opt->loc);
+                    assert(opt->opts != NULL);
+                    QTAILQ_INSERT_TAIL(&device_opts, opt, next);
+                } else {
+                    if (!qemu_opts_parse_noisily(qemu_find_opts("device"),
+                                                 optarg, true)) {
+                        exit(1);
+                    }
                 }
                 break;
             case QEMU_OPTION_smp:
-                if (!qemu_opts_parse_noisily(qemu_find_opts("smp-opts"),
-                                             optarg, true)) {
-                    exit(1);
-                }
+                machine_parse_property_opt(qemu_find_opts("smp-opts"),
+                                           "smp", optarg);
                 break;
             case QEMU_OPTION_vnc:
                 vnc_parse(optarg);
                 break;
             case QEMU_OPTION_no_acpi:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "acpi=off", false);
+                qdict_put_str(machine_opts_dict, "acpi", "off");
                 break;
             case QEMU_OPTION_no_hpet:
-                olist = qemu_find_opts("machine");
-                qemu_opts_parse_noisily(olist, "hpet=off", false);
+                qdict_put_str(machine_opts_dict, "hpet", "off");
                 break;
             case QEMU_OPTION_no_reboot:
                 olist = qemu_find_opts("action");
@@ -3521,21 +3687,21 @@ void qemu_init(int argc, char **argv, char **envp)
                 has_defaults = 0;
                 break;
             case QEMU_OPTION_xen_domid:
-                if (!(xen_available())) {
+                if (!(accel_find("xen"))) {
                     error_report("Option not supported for this target");
                     exit(1);
                 }
                 xen_domid = atoi(optarg);
                 break;
             case QEMU_OPTION_xen_attach:
-                if (!(xen_available())) {
+                if (!(accel_find("xen"))) {
                     error_report("Option not supported for this target");
                     exit(1);
                 }
                 xen_mode = XEN_ATTACH;
                 break;
             case QEMU_OPTION_xen_domid_restrict:
-                if (!(xen_available())) {
+                if (!(accel_find("xen"))) {
                     error_report("Option not supported for this target");
                     exit(1);
                 }
@@ -3548,14 +3714,10 @@ void qemu_init(int argc, char **argv, char **envp)
                 qemu_plugin_opt_parse(optarg, &plugin_list);
                 break;
             case QEMU_OPTION_readconfig:
-                qemu_read_config_file(optarg, &error_fatal);
+                qemu_read_config_file(optarg, qemu_parse_config_group, &error_fatal);
                 break;
             case QEMU_OPTION_spice:
                 olist = qemu_find_opts_err("spice", NULL);
-                if (!olist) {
-                    ui_module_load_one("spice-core");
-                    olist = qemu_find_opts("spice");
-                }
                 if (!olist) {
                     error_report("spice support is disabled");
                     exit(1);
@@ -3687,7 +3849,7 @@ void qemu_init(int argc, char **argv, char **envp)
      */
     loc_set_none();
 
-    qemu_validate_options();
+    qemu_validate_options(machine_opts_dict);
     qemu_process_sugar_options();
 
     /*
@@ -3720,7 +3882,7 @@ void qemu_init(int argc, char **argv, char **envp)
 
     configure_rtc(qemu_find_opts_singleton("rtc"));
 
-    qemu_create_machine(select_machine());
+    qemu_create_machine(machine_opts_dict);
 
     suspend_mux_open();
 
@@ -3728,12 +3890,14 @@ void qemu_init(int argc, char **argv, char **envp)
     qemu_create_default_devices();
     qemu_create_early_backends();
 
-    qemu_apply_machine_options();
+    qemu_apply_legacy_machine_options(machine_opts_dict);
+    qemu_apply_machine_options(machine_opts_dict);
+    qobject_unref(machine_opts_dict);
     phase_advance(PHASE_MACHINE_CREATED);
 
     /*
      * Note: uses machine properties such as kernel-irqchip, must run
-     * after machine_set_property().
+     * after qemu_apply_machine_options.
      */
     configure_accelerators(argv[0]);
     phase_advance(PHASE_ACCEL_CREATED);
@@ -3774,13 +3938,13 @@ void qemu_init(int argc, char **argv, char **envp)
         current_machine->cpu_type = parse_cpu_option(cpu_option);
     }
     /* NB: for machine none cpu_type could STILL be NULL here! */
-    accel_init_interfaces(ACCEL_GET_CLASS(current_machine->accelerator));
 
     qemu_resolve_machine_memdev();
     parse_numa_opts(current_machine);
 
     if (vmstate_dump_file) {
         /* dump and exit */
+        module_load_qom_all();
         dump_vmstate_json_to_file(vmstate_dump_file);
         exit(0);
     }
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
index 68852f3d257..49c9d2eac91 100644
--- a/storage-daemon/meson.build
+++ b/storage-daemon/meson.build
@@ -6,8 +6,8 @@ subdir('qapi')
 
 if have_tools
   qsd_ss = qsd_ss.apply(config_host, strict: false)
-  executable('qemu-storage-daemon',
-             qsd_ss.sources(),
-             dependencies: qsd_ss.dependencies(),
-             install: true)
+  qsd = executable('qemu-storage-daemon',
+                   qsd_ss.sources(),
+                   dependencies: qsd_ss.dependencies(),
+                   install: true)
 endif
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index fc8b1506297..52cf17e8ace 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -98,10 +98,12 @@ static void help(void)
 "                         export the specified block node over NBD\n"
 "                         (requires --nbd-server)\n"
 "\n"
+#ifdef CONFIG_FUSE
 "  --export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>\n"
 "           [,growable=on|off][,writable=on|off]\n"
 "                         export the specified block node over FUSE\n"
 "\n"
+#endif /* CONFIG_FUSE */
 "  --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n"
 "                         configure a QMP monitor\n"
 "\n"
@@ -144,7 +146,8 @@ static void init_qmp_commands(void)
 
     QTAILQ_INIT(&qmp_cap_negotiation_commands);
     qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
-                         qmp_marshal_qmp_capabilities, QCO_ALLOW_PRECONFIG);
+                         qmp_marshal_qmp_capabilities,
+                         QCO_ALLOW_PRECONFIG, 0);
 }
 
 static int getopt_set_loc(int argc, char **argv, const char *optstring,
diff --git a/stubs/arch_type.c b/stubs/arch_type.c
deleted file mode 100644
index fc5423bc98a..00000000000
--- a/stubs/arch_type.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "qemu/osdep.h"
-#include "sysemu/arch_init.h"
-
-const uint32_t arch_type = QEMU_ARCH_NONE;
diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c
index 2a6efad64a1..5b45b7fc8b9 100644
--- a/stubs/iothread-lock.c
+++ b/stubs/iothread-lock.c
@@ -3,7 +3,7 @@
 
 bool qemu_mutex_iothread_locked(void)
 {
-    return true;
+    return false;
 }
 
 void qemu_mutex_lock_iothread_impl(const char *file, int line)
diff --git a/stubs/iothread.c b/stubs/iothread.c
deleted file mode 100644
index 8cc9e28c555..00000000000
--- a/stubs/iothread.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "block/aio.h"
-#include "qemu/main-loop.h"
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return qemu_get_aio_context();
-}
diff --git a/stubs/meson.build b/stubs/meson.build
index be6f6d609e5..71469c1d50a 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -1,4 +1,3 @@
-stub_ss.add(files('arch_type.c'))
 stub_ss.add(files('bdrv-next-monitor-owned.c'))
 stub_ss.add(files('blk-commit-all.c'))
 stub_ss.add(files('blk-exp-close-all.c'))
@@ -15,20 +14,24 @@ stub_ss.add(files('fdset.c'))
 stub_ss.add(files('fw_cfg.c'))
 stub_ss.add(files('gdbstub.c'))
 stub_ss.add(files('get-vm-name.c'))
-stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
-stub_ss.add(files('iothread.c'))
+if linux_io_uring.found()
+  stub_ss.add(files('io_uring.c'))
+endif
 stub_ss.add(files('iothread-lock.c'))
 stub_ss.add(files('isa-bus.c'))
 stub_ss.add(files('is-daemonized.c'))
-stub_ss.add(when: 'CONFIG_LINUX_AIO', if_true: files('linux-aio.c'))
+if libaio.found()
+  stub_ss.add(files('linux-aio.c'))
+endif
 stub_ss.add(files('migr-blocker.c'))
+stub_ss.add(files('module-opts.c'))
 stub_ss.add(files('monitor.c'))
 stub_ss.add(files('monitor-core.c'))
 stub_ss.add(files('pci-bus.c'))
-stub_ss.add(files('pci-host-piix.c'))
 stub_ss.add(files('qemu-timer-notify-cb.c'))
 stub_ss.add(files('qmp_memory_device.c'))
 stub_ss.add(files('qmp-command-available.c'))
+stub_ss.add(files('qmp-quit.c'))
 stub_ss.add(files('qtest.c'))
 stub_ss.add(files('ram-block.c'))
 stub_ss.add(files('ramfb.c'))
@@ -37,7 +40,6 @@ stub_ss.add(files('runstate-check.c'))
 stub_ss.add(files('sysbus.c'))
 stub_ss.add(files('target-get-monitor-def.c'))
 stub_ss.add(files('target-monitor-defs.c'))
-stub_ss.add(files('tpm.c'))
 stub_ss.add(files('trace-control.c'))
 stub_ss.add(files('uuid.c'))
 stub_ss.add(files('vmgenid.c'))
@@ -50,6 +52,7 @@ if have_block
 endif
 if have_system
   stub_ss.add(files('semihost.c'))
+  stub_ss.add(files('usb-dev-stub.c'))
   stub_ss.add(files('xen-hw-stub.c'))
 else
   stub_ss.add(files('qdev.c'))
diff --git a/stubs/module-opts.c b/stubs/module-opts.c
new file mode 100644
index 00000000000..5412429ea86
--- /dev/null
+++ b/stubs/module-opts.c
@@ -0,0 +1,2 @@
+#include "qemu/osdep.h"
+#include "qemu/config-file.h"
diff --git a/stubs/pci-host-piix.c b/stubs/pci-host-piix.c
deleted file mode 100644
index 93975adbfeb..00000000000
--- a/stubs/pci-host-piix.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "qemu/osdep.h"
-#include "hw/pci-host/i440fx.h"
-
-PCIBus *find_i440fx(void)
-{
-    return NULL;
-}
diff --git a/stubs/qdev.c b/stubs/qdev.c
index 92e61431344..187659f7071 100644
--- a/stubs/qdev.c
+++ b/stubs/qdev.c
@@ -21,3 +21,10 @@ void qapi_event_send_device_deleted(bool has_device,
 {
     /* Nothing to do. */
 }
+
+void qapi_event_send_device_unplug_guest_error(bool has_device,
+                                               const char *device,
+                                               const char *path)
+{
+    /* Nothing to do. */
+}
diff --git a/stubs/qmp-quit.c b/stubs/qmp-quit.c
new file mode 100644
index 00000000000..a3ff47f7bdb
--- /dev/null
+++ b/stubs/qmp-quit.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qapi/qapi-commands-control.h"
+#include "qapi/qmp/dispatch.h"
+
+void qmp_quit(Error **errp)
+{
+    g_assert_not_reached();
+}
diff --git a/stubs/semihost.c b/stubs/semihost.c
index 1b30f38b03d..4bf2cf71b9b 100644
--- a/stubs/semihost.c
+++ b/stubs/semihost.c
@@ -12,7 +12,6 @@
 #include "qemu/option.h"
 #include "qemu/error-report.h"
 #include "semihosting/semihost.h"
-#include "sysemu/sysemu.h"
 
 /* Empty config */
 QemuOptsList qemu_semihosting_config_opts = {
diff --git a/stubs/tpm.c b/stubs/tpm.c
deleted file mode 100644
index 9bded191d9d..00000000000
--- a/stubs/tpm.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * TPM stubs
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/qapi-commands-tpm.h"
-#include "sysemu/tpm.h"
-#include "hw/acpi/tpm.h"
-
-int tpm_init(void)
-{
-    return 0;
-}
-
-void tpm_cleanup(void)
-{
-}
-
-TPMInfoList *qmp_query_tpm(Error **errp)
-{
-    return NULL;
-}
-
-TpmTypeList *qmp_query_tpm_types(Error **errp)
-{
-    return NULL;
-}
-
-TpmModelList *qmp_query_tpm_models(Error **errp)
-{
-    return NULL;
-}
-
-void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev)
-{
-}
diff --git a/stubs/usb-dev-stub.c b/stubs/usb-dev-stub.c
new file mode 100644
index 00000000000..aa557692b71
--- /dev/null
+++ b/stubs/usb-dev-stub.c
@@ -0,0 +1,33 @@
+/*
+ * QEMU USB device emulation stubs
+ *
+ * Copyright (C) 2021 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-machine.h"
+#include "sysemu/sysemu.h"
+#include "monitor/monitor.h"
+#include "hw/usb.h"
+
+USBDevice *usbdevice_create(const char *driver)
+{
+    error_report("Support for USB devices not built-in");
+
+    return NULL;
+}
+
+HumanReadableText *qmp_x_query_usb(Error **errp)
+{
+    error_setg(errp, "Support for USB devices not built-in");
+    return NULL;
+}
+
+void hmp_info_usb(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "Support for USB devices not built-in\n");
+}
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index cc4fe41dfc2..8513d9204e4 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,8 +1,6 @@
 #include "qemu/osdep.h"
 #include "migration/vmstate.h"
 
-const VMStateDescription vmstate_dummy = {};
-
 int vmstate_register_with_alias_id(VMStateIf *obj,
                                    uint32_t instance_id,
                                    const VMStateDescription *vmsd,
diff --git a/subprojects/libvhost-user/include/atomic.h b/subprojects/libvhost-user/include/atomic.h
new file mode 120000
index 00000000000..8c2be64f7b1
--- /dev/null
+++ b/subprojects/libvhost-user/include/atomic.h
@@ -0,0 +1 @@
+../../../include/qemu/atomic.h
\ No newline at end of file
diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c
index fab7ca17ee1..787f4d2d4fe 100644
--- a/subprojects/libvhost-user/libvhost-user.c
+++ b/subprojects/libvhost-user/libvhost-user.c
@@ -40,7 +40,7 @@
 
 #endif
 
-#include "qemu/atomic.h"
+#include "include/atomic.h"
 
 #include "libvhost-user.h"
 
@@ -816,6 +816,7 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) {
             shadow_regions[j].gpa = dev->regions[i].gpa;
             shadow_regions[j].size = dev->regions[i].size;
             shadow_regions[j].qva = dev->regions[i].qva;
+            shadow_regions[j].mmap_addr = dev->regions[i].mmap_addr;
             shadow_regions[j].mmap_offset = dev->regions[i].mmap_offset;
             j++;
         } else {
@@ -1067,10 +1068,10 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
     DPRINT("vhost_vring_addr:\n");
     DPRINT("    index:  %d\n", vra->index);
     DPRINT("    flags:  %d\n", vra->flags);
-    DPRINT("    desc_user_addr:   0x%016" PRIx64 "\n", vra->desc_user_addr);
-    DPRINT("    used_user_addr:   0x%016" PRIx64 "\n", vra->used_user_addr);
-    DPRINT("    avail_user_addr:  0x%016" PRIx64 "\n", vra->avail_user_addr);
-    DPRINT("    log_guest_addr:   0x%016" PRIx64 "\n", vra->log_guest_addr);
+    DPRINT("    desc_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->desc_user_addr);
+    DPRINT("    used_user_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->used_user_addr);
+    DPRINT("    avail_user_addr:  0x%016" PRIx64 "\n", (uint64_t)vra->avail_user_addr);
+    DPRINT("    log_guest_addr:   0x%016" PRIx64 "\n", (uint64_t)vra->log_guest_addr);
 
     vq->vra = *vra;
     vq->vring.flags = vra->flags;
diff --git a/subprojects/libvhost-user/meson.build b/subprojects/libvhost-user/meson.build
index b03446e7cd6..39825d9404a 100644
--- a/subprojects/libvhost-user/meson.build
+++ b/subprojects/libvhost-user/meson.build
@@ -4,21 +4,17 @@ project('libvhost-user', 'c',
 
 threads = dependency('threads')
 glib = dependency('glib-2.0')
-inc = include_directories('../../include', '../../linux-headers')
 
 vhost_user = static_library('vhost-user',
                             files('libvhost-user.c'),
-                            include_directories: inc,
                             dependencies: threads,
                             c_args: '-D_GNU_SOURCE')
 
 executable('link-test', files('link-test.c'),
-           link_whole: vhost_user,
-           include_directories: inc)
+           link_whole: vhost_user)
 
 vhost_user_glib = static_library('vhost-user-glib',
                                  files('libvhost-user-glib.c'),
-                                 include_directories: inc,
                                  link_with: vhost_user,
                                  dependencies: glib)
 
diff --git a/subprojects/libvhost-user/standard-headers/linux b/subprojects/libvhost-user/standard-headers/linux
new file mode 120000
index 00000000000..15a23781396
--- /dev/null
+++ b/subprojects/libvhost-user/standard-headers/linux
@@ -0,0 +1 @@
+../../../include/standard-headers/linux
\ No newline at end of file
diff --git a/target/Kconfig b/target/Kconfig
new file mode 100644
index 00000000000..ae7f24fc66b
--- /dev/null
+++ b/target/Kconfig
@@ -0,0 +1,19 @@
+source alpha/Kconfig
+source arm/Kconfig
+source avr/Kconfig
+source cris/Kconfig
+source hppa/Kconfig
+source i386/Kconfig
+source m68k/Kconfig
+source microblaze/Kconfig
+source mips/Kconfig
+source nios2/Kconfig
+source openrisc/Kconfig
+source ppc/Kconfig
+source riscv/Kconfig
+source rx/Kconfig
+source s390x/Kconfig
+source sh4/Kconfig
+source sparc/Kconfig
+source tricore/Kconfig
+source xtensa/Kconfig
diff --git a/target/alpha/Kconfig b/target/alpha/Kconfig
new file mode 100644
index 00000000000..267222c05b8
--- /dev/null
+++ b/target/alpha/Kconfig
@@ -0,0 +1,2 @@
+config ALPHA
+    bool
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index 27192b62e22..a8990d401bc 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -206,14 +206,25 @@ static void alpha_cpu_initfn(Object *obj)
 #endif
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps alpha_sysemu_ops = {
+    .get_phys_page_debug = alpha_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps alpha_tcg_ops = {
+static const struct TCGCPUOps alpha_tcg_ops = {
     .initialize = alpha_translate_init,
-    .cpu_exec_interrupt = alpha_cpu_exec_interrupt,
-    .tlb_fill = alpha_cpu_tlb_fill,
 
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+    .record_sigsegv = alpha_cpu_record_sigsegv,
+    .record_sigbus = alpha_cpu_record_sigbus,
+#else
+    .tlb_fill = alpha_cpu_tlb_fill,
+    .cpu_exec_interrupt = alpha_cpu_exec_interrupt,
     .do_interrupt = alpha_cpu_do_interrupt,
     .do_transaction_failed = alpha_cpu_do_transaction_failed,
     .do_unaligned_access = alpha_cpu_do_unaligned_access,
@@ -236,8 +247,8 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = alpha_cpu_gdb_read_register;
     cc->gdb_write_register = alpha_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = alpha_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_alpha_cpu;
+    cc->sysemu_ops = &alpha_sysemu_ops;
 #endif
     cc->disas_set_info = alpha_cpu_disas_set_info;
 
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index 82df108967b..afd975c8780 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -274,20 +274,16 @@ struct AlphaCPU {
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_alpha_cpu;
-#endif
 
 void alpha_cpu_do_interrupt(CPUState *cpu);
 bool alpha_cpu_exec_interrupt(CPUState *cpu, int int_req);
+#endif /* !CONFIG_USER_ONLY */
 void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags);
 hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, uintptr_t retaddr);
 
 #define cpu_list alpha_cpu_list
-#define cpu_signal_handler cpu_alpha_signal_handler
 
 typedef CPUAlphaState CPUArchState;
 typedef AlphaCPU ArchCPU;
@@ -440,14 +436,6 @@ void alpha_translate_init(void);
 #define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
 
 void alpha_cpu_list(void);
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_alpha_signal_handler(int host_signum, void *pinfo,
-                             void *puc);
-bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr);
 void QEMU_NORETURN dynamic_excp(CPUAlphaState *, uintptr_t, int, int);
 void QEMU_NORETURN arith_excp(CPUAlphaState *, uintptr_t, int, uint64_t);
 
@@ -455,7 +443,20 @@ uint64_t cpu_alpha_load_fpcr (CPUAlphaState *env);
 void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val);
 uint64_t cpu_alpha_load_gr(CPUAlphaState *env, unsigned reg);
 void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val);
-#ifndef CONFIG_USER_ONLY
+
+#ifdef CONFIG_USER_ONLY
+void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address,
+                              MMUAccessType access_type,
+                              bool maperr, uintptr_t retaddr);
+void alpha_cpu_record_sigbus(CPUState *cs, vaddr address,
+                             MMUAccessType access_type, uintptr_t retaddr);
+#else
+bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                        MMUAccessType access_type, int mmu_idx,
+                        bool probe, uintptr_t retaddr);
+void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                   MMUAccessType access_type, int mmu_idx,
+                                   uintptr_t retaddr) QEMU_NORETURN;
 void alpha_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
                                      vaddr addr, unsigned size,
                                      MMUAccessType access_type,
diff --git a/target/alpha/helper.c b/target/alpha/helper.c
index 4f56fe4d231..b7e7f73b15c 100644
--- a/target/alpha/helper.c
+++ b/target/alpha/helper.c
@@ -120,15 +120,44 @@ void cpu_alpha_store_gr(CPUAlphaState *env, unsigned reg, uint64_t val)
 }
 
 #if defined(CONFIG_USER_ONLY)
-bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr)
+void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address,
+                              MMUAccessType access_type,
+                              bool maperr, uintptr_t retaddr)
 {
     AlphaCPU *cpu = ALPHA_CPU(cs);
+    target_ulong mmcsr, cause;
+
+    /* Assuming !maperr, infer the missing protection. */
+    switch (access_type) {
+    case MMU_DATA_LOAD:
+        mmcsr = MM_K_FOR;
+        cause = 0;
+        break;
+    case MMU_DATA_STORE:
+        mmcsr = MM_K_FOW;
+        cause = 1;
+        break;
+    case MMU_INST_FETCH:
+        mmcsr = MM_K_FOE;
+        cause = -1;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (maperr) {
+        if (address < BIT_ULL(TARGET_VIRT_ADDR_SPACE_BITS - 1)) {
+            /* Userspace address, therefore page not mapped. */
+            mmcsr = MM_K_TNV;
+        } else {
+            /* Kernel or invalid address. */
+            mmcsr = MM_K_ACV;
+        }
+    }
 
-    cs->exception_index = EXCP_MMFAULT;
+    /* Record the arguments that PALcode would give to the kernel. */
     cpu->env.trap_arg0 = address;
-    cpu_loop_exit_restore(cs, retaddr);
+    cpu->env.trap_arg1 = mmcsr;
+    cpu->env.trap_arg2 = cause;
 }
 #else
 /* Returns the OSF/1 entMM failure indication, or -1 on success.  */
@@ -293,7 +322,6 @@ bool alpha_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
                  prot, mmu_idx, TARGET_PAGE_SIZE);
     return true;
 }
-#endif /* USER_ONLY */
 
 void alpha_cpu_do_interrupt(CPUState *cs)
 {
@@ -348,7 +376,6 @@ void alpha_cpu_do_interrupt(CPUState *cs)
 
     cs->exception_index = -1;
 
-#if !defined(CONFIG_USER_ONLY)
     switch (i) {
     case EXCP_RESET:
         i = 0x0000;
@@ -404,7 +431,6 @@ void alpha_cpu_do_interrupt(CPUState *cs)
 
     /* Switch to PALmode.  */
     env->flags |= ENV_FLAG_PAL_MODE;
-#endif /* !USER_ONLY */
 }
 
 bool alpha_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
@@ -451,6 +477,8 @@ bool alpha_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     return false;
 }
 
+#endif /* !CONFIG_USER_ONLY */
+
 void alpha_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
     static const char linux_reg_names[31][4] = {
diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c
index 75e72bc3370..47283a06123 100644
--- a/target/alpha/mem_helper.c
+++ b/target/alpha/mem_helper.c
@@ -23,18 +23,12 @@
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
-/* Softmmu support */
-#ifndef CONFIG_USER_ONLY
-void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, uintptr_t retaddr)
+static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retaddr)
 {
-    AlphaCPU *cpu = ALPHA_CPU(cs);
-    CPUAlphaState *env = &cpu->env;
     uint64_t pc;
     uint32_t insn;
 
-    cpu_restore_state(cs, retaddr, true);
+    cpu_restore_state(env_cpu(env), retaddr, true);
 
     pc = env->pc;
     insn = cpu_ldl_code(env, pc);
@@ -42,6 +36,26 @@ void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     env->trap_arg0 = addr;
     env->trap_arg1 = insn >> 26;                /* opcode */
     env->trap_arg2 = (insn >> 21) & 31;         /* dest regno */
+}
+
+#ifdef CONFIG_USER_ONLY
+void alpha_cpu_record_sigbus(CPUState *cs, vaddr addr,
+                             MMUAccessType access_type, uintptr_t retaddr)
+{
+    AlphaCPU *cpu = ALPHA_CPU(cs);
+    CPUAlphaState *env = &cpu->env;
+
+    do_unaligned_access(env, addr, retaddr);
+}
+#else
+void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, uintptr_t retaddr)
+{
+    AlphaCPU *cpu = ALPHA_CPU(cs);
+    CPUAlphaState *env = &cpu->env;
+
+    do_unaligned_access(env, addr, retaddr);
     cs->exception_index = EXCP_UNALIGN;
     env->error_code = 0;
     cpu_loop_exit(cs);
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index f454adea5e0..a4c3f43e720 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -28,7 +28,6 @@
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
-#include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
 
@@ -67,8 +66,6 @@ struct DisasContext {
     /* Temporaries for $31 and $f31 as source and destination.  */
     TCGv zero;
     TCGv sink;
-    /* Temporary for immediate constants.  */
-    TCGv lit;
 };
 
 /* Target-specific return values from translate_one, indicating the
@@ -158,7 +155,7 @@ void alpha_translate_init(void)
 static TCGv load_zero(DisasContext *ctx)
 {
     if (!ctx->zero) {
-        ctx->zero = tcg_const_i64(0);
+        ctx->zero = tcg_constant_i64(0);
     }
     return ctx->zero;
 }
@@ -178,14 +175,6 @@ static void free_context_temps(DisasContext *ctx)
         tcg_temp_free(ctx->sink);
         ctx->sink = NULL;
     }
-    if (ctx->zero) {
-        tcg_temp_free(ctx->zero);
-        ctx->zero = NULL;
-    }
-    if (ctx->lit) {
-        tcg_temp_free(ctx->lit);
-        ctx->lit = NULL;
-    }
 }
 
 static TCGv load_gpr(DisasContext *ctx, unsigned reg)
@@ -201,8 +190,7 @@ static TCGv load_gpr_lit(DisasContext *ctx, unsigned reg,
                          uint8_t lit, bool islit)
 {
     if (islit) {
-        ctx->lit = tcg_const_i64(lit);
-        return ctx->lit;
+        return tcg_constant_i64(lit);
     } else if (likely(reg < 31)) {
         return ctx->ir[reg];
     } else {
@@ -262,11 +250,9 @@ static void gen_excp_1(int exception, int error_code)
 {
     TCGv_i32 tmp1, tmp2;
 
-    tmp1 = tcg_const_i32(exception);
-    tmp2 = tcg_const_i32(error_code);
+    tmp1 = tcg_constant_i32(exception);
+    tmp2 = tcg_constant_i32(error_code);
     gen_helper_excp(cpu_env, tmp1, tmp2);
-    tcg_temp_free_i32(tmp2);
-    tcg_temp_free_i32(tmp1);
 }
 
 static DisasJumpType gen_excp(DisasContext *ctx, int exception, int error_code)
@@ -281,51 +267,51 @@ static inline DisasJumpType gen_invalid(DisasContext *ctx)
     return gen_excp(ctx, EXCP_OPCDEC, 0);
 }
 
-static inline void gen_qemu_ldf(TCGv t0, TCGv t1, int flags)
+static void gen_ldf(DisasContext *ctx, TCGv dest, TCGv addr)
 {
     TCGv_i32 tmp32 = tcg_temp_new_i32();
-    tcg_gen_qemu_ld_i32(tmp32, t1, flags, MO_LEUL);
-    gen_helper_memory_to_f(t0, tmp32);
+    tcg_gen_qemu_ld_i32(tmp32, addr, ctx->mem_idx, MO_LEUL);
+    gen_helper_memory_to_f(dest, tmp32);
     tcg_temp_free_i32(tmp32);
 }
 
-static inline void gen_qemu_ldg(TCGv t0, TCGv t1, int flags)
+static void gen_ldg(DisasContext *ctx, TCGv dest, TCGv addr)
 {
     TCGv tmp = tcg_temp_new();
-    tcg_gen_qemu_ld_i64(tmp, t1, flags, MO_LEQ);
-    gen_helper_memory_to_g(t0, tmp);
+    tcg_gen_qemu_ld_i64(tmp, addr, ctx->mem_idx, MO_LEQ);
+    gen_helper_memory_to_g(dest, tmp);
     tcg_temp_free(tmp);
 }
 
-static inline void gen_qemu_lds(TCGv t0, TCGv t1, int flags)
+static void gen_lds(DisasContext *ctx, TCGv dest, TCGv addr)
 {
     TCGv_i32 tmp32 = tcg_temp_new_i32();
-    tcg_gen_qemu_ld_i32(tmp32, t1, flags, MO_LEUL);
-    gen_helper_memory_to_s(t0, tmp32);
+    tcg_gen_qemu_ld_i32(tmp32, addr, ctx->mem_idx, MO_LEUL);
+    gen_helper_memory_to_s(dest, tmp32);
     tcg_temp_free_i32(tmp32);
 }
 
-static inline void gen_qemu_ldl_l(TCGv t0, TCGv t1, int flags)
+static void gen_ldt(DisasContext *ctx, TCGv dest, TCGv addr)
 {
-    tcg_gen_qemu_ld_i64(t0, t1, flags, MO_LESL);
-    tcg_gen_mov_i64(cpu_lock_addr, t1);
-    tcg_gen_mov_i64(cpu_lock_value, t0);
+    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_LEQ);
 }
 
-static inline void gen_qemu_ldq_l(TCGv t0, TCGv t1, int flags)
+static void gen_load_fp(DisasContext *ctx, int ra, int rb, int32_t disp16,
+                        void (*func)(DisasContext *, TCGv, TCGv))
 {
-    tcg_gen_qemu_ld_i64(t0, t1, flags, MO_LEQ);
-    tcg_gen_mov_i64(cpu_lock_addr, t1);
-    tcg_gen_mov_i64(cpu_lock_value, t0);
+    /* Loads to $f31 are prefetches, which we can treat as nops. */
+    if (likely(ra != 31)) {
+        TCGv addr = tcg_temp_new();
+        tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
+        func(ctx, cpu_fir[ra], addr);
+        tcg_temp_free(addr);
+    }
 }
 
-static inline void gen_load_mem(DisasContext *ctx,
-                                void (*tcg_gen_qemu_load)(TCGv t0, TCGv t1,
-                                                          int flags),
-                                int ra, int rb, int32_t disp16, bool fp,
-                                bool clear)
+static void gen_load_int(DisasContext *ctx, int ra, int rb, int32_t disp16,
+                         MemOp op, bool clear, bool locked)
 {
-    TCGv tmp, addr, va;
+    TCGv addr, dest;
 
     /* LDQ_U with ra $31 is UNOP.  Other various loads are forms of
        prefetches, which we can treat as nops.  No worries about
@@ -334,72 +320,75 @@ static inline void gen_load_mem(DisasContext *ctx,
         return;
     }
 
-    tmp = tcg_temp_new();
-    addr = load_gpr(ctx, rb);
-
-    if (disp16) {
-        tcg_gen_addi_i64(tmp, addr, disp16);
-        addr = tmp;
-    }
+    addr = tcg_temp_new();
+    tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
     if (clear) {
-        tcg_gen_andi_i64(tmp, addr, ~0x7);
-        addr = tmp;
+        tcg_gen_andi_i64(addr, addr, ~0x7);
     }
 
-    va = (fp ? cpu_fir[ra] : ctx->ir[ra]);
-    tcg_gen_qemu_load(va, addr, ctx->mem_idx);
+    dest = ctx->ir[ra];
+    tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, op);
 
-    tcg_temp_free(tmp);
+    if (locked) {
+        tcg_gen_mov_i64(cpu_lock_addr, addr);
+        tcg_gen_mov_i64(cpu_lock_value, dest);
+    }
+    tcg_temp_free(addr);
 }
 
-static inline void gen_qemu_stf(TCGv t0, TCGv t1, int flags)
+static void gen_stf(DisasContext *ctx, TCGv src, TCGv addr)
 {
     TCGv_i32 tmp32 = tcg_temp_new_i32();
-    gen_helper_f_to_memory(tmp32, t0);
-    tcg_gen_qemu_st_i32(tmp32, t1, flags, MO_LEUL);
+    gen_helper_f_to_memory(tmp32, addr);
+    tcg_gen_qemu_st_i32(tmp32, addr, ctx->mem_idx, MO_LEUL);
     tcg_temp_free_i32(tmp32);
 }
 
-static inline void gen_qemu_stg(TCGv t0, TCGv t1, int flags)
+static void gen_stg(DisasContext *ctx, TCGv src, TCGv addr)
 {
     TCGv tmp = tcg_temp_new();
-    gen_helper_g_to_memory(tmp, t0);
-    tcg_gen_qemu_st_i64(tmp, t1, flags, MO_LEQ);
+    gen_helper_g_to_memory(tmp, src);
+    tcg_gen_qemu_st_i64(tmp, addr, ctx->mem_idx, MO_LEQ);
     tcg_temp_free(tmp);
 }
 
-static inline void gen_qemu_sts(TCGv t0, TCGv t1, int flags)
+static void gen_sts(DisasContext *ctx, TCGv src, TCGv addr)
 {
     TCGv_i32 tmp32 = tcg_temp_new_i32();
-    gen_helper_s_to_memory(tmp32, t0);
-    tcg_gen_qemu_st_i32(tmp32, t1, flags, MO_LEUL);
+    gen_helper_s_to_memory(tmp32, src);
+    tcg_gen_qemu_st_i32(tmp32, addr, ctx->mem_idx, MO_LEUL);
     tcg_temp_free_i32(tmp32);
 }
 
-static inline void gen_store_mem(DisasContext *ctx,
-                                 void (*tcg_gen_qemu_store)(TCGv t0, TCGv t1,
-                                                            int flags),
-                                 int ra, int rb, int32_t disp16, bool fp,
-                                 bool clear)
+static void gen_stt(DisasContext *ctx, TCGv src, TCGv addr)
 {
-    TCGv tmp, addr, va;
+    tcg_gen_qemu_st_i64(src, addr, ctx->mem_idx, MO_LEQ);
+}
 
-    tmp = tcg_temp_new();
-    addr = load_gpr(ctx, rb);
+static void gen_store_fp(DisasContext *ctx, int ra, int rb, int32_t disp16,
+                         void (*func)(DisasContext *, TCGv, TCGv))
+{
+    TCGv addr = tcg_temp_new();
+    tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
+    func(ctx, load_fpr(ctx, ra), addr);
+    tcg_temp_free(addr);
+}
 
-    if (disp16) {
-        tcg_gen_addi_i64(tmp, addr, disp16);
-        addr = tmp;
-    }
+static void gen_store_int(DisasContext *ctx, int ra, int rb, int32_t disp16,
+                          MemOp op, bool clear)
+{
+    TCGv addr, src;
+
+    addr = tcg_temp_new();
+    tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
     if (clear) {
-        tcg_gen_andi_i64(tmp, addr, ~0x7);
-        addr = tmp;
+        tcg_gen_andi_i64(addr, addr, ~0x7);
     }
 
-    va = (fp ? load_fpr(ctx, ra) : load_gpr(ctx, ra));
-    tcg_gen_qemu_store(va, addr, ctx->mem_idx);
+    src = load_gpr(ctx, ra);
+    tcg_gen_qemu_st_i64(src, addr, ctx->mem_idx, op);
 
-    tcg_temp_free(tmp);
+    tcg_temp_free(addr);
 }
 
 static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb,
@@ -439,40 +428,9 @@ static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb,
     return DISAS_NEXT;
 }
 
-static bool in_superpage(DisasContext *ctx, int64_t addr)
-{
-#ifndef CONFIG_USER_ONLY
-    return ((ctx->tbflags & ENV_FLAG_PS_USER) == 0
-            && addr >> TARGET_VIRT_ADDR_SPACE_BITS == -1
-            && ((addr >> 41) & 3) == 2);
-#else
-    return false;
-#endif
-}
-
-static bool use_exit_tb(DisasContext *ctx)
-{
-    return ((tb_cflags(ctx->base.tb) & CF_LAST_IO)
-            || ctx->base.singlestep_enabled
-            || singlestep);
-}
-
 static bool use_goto_tb(DisasContext *ctx, uint64_t dest)
 {
-    /* Suppress goto_tb in the case of single-steping and IO.  */
-    if (unlikely(use_exit_tb(ctx))) {
-        return false;
-    }
-#ifndef CONFIG_USER_ONLY
-    /* If the destination is in the superpage, the page perms can't change.  */
-    if (in_superpage(ctx, dest)) {
-        return true;
-    }
-    /* Check for the dest on the same page as the start of the TB.  */
-    return ((ctx->base.tb->pc ^ dest) & TARGET_PAGE_MASK) == 0;
-#else
-    return true;
-#endif
+    return translator_use_goto_tb(&ctx->base, dest);
 }
 
 static DisasJumpType gen_bdirect(DisasContext *ctx, int ra, int32_t disp)
@@ -517,15 +475,11 @@ static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond,
 
         return DISAS_NORETURN;
     } else {
-        TCGv_i64 z = tcg_const_i64(0);
-        TCGv_i64 d = tcg_const_i64(dest);
-        TCGv_i64 p = tcg_const_i64(ctx->base.pc_next);
+        TCGv_i64 z = load_zero(ctx);
+        TCGv_i64 d = tcg_constant_i64(dest);
+        TCGv_i64 p = tcg_constant_i64(ctx->base.pc_next);
 
         tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p);
-
-        tcg_temp_free_i64(z);
-        tcg_temp_free_i64(d);
-        tcg_temp_free_i64(p);
         return DISAS_PC_UPDATED;
     }
 }
@@ -727,22 +681,19 @@ static void gen_fp_exc_raise(int rc, int fn11)
     if (!(fn11 & QUAL_I)) {
         ignore |= FPCR_INE;
     }
-    ign = tcg_const_i32(ignore);
+    ign = tcg_constant_i32(ignore);
 
     /* ??? Pass in the regno of the destination so that the helper can
        set EXC_MASK, which contains a bitmask of destination registers
        that have caused arithmetic traps.  A simple userspace emulation
        does not require this.  We do need it for a guest kernel's entArith,
        or if we were to do something clever with imprecise exceptions.  */
-    reg = tcg_const_i32(rc + 32);
+    reg = tcg_constant_i32(rc + 32);
     if (fn11 & QUAL_S) {
         gen_helper_fp_exc_raise_s(cpu_env, ign, reg);
     } else {
         gen_helper_fp_exc_raise(cpu_env, ign, reg);
     }
-
-    tcg_temp_free_i32(reg);
-    tcg_temp_free_i32(ign);
 }
 
 static void gen_cvtlq(TCGv vc, TCGv vb)
@@ -835,7 +786,7 @@ IEEE_INTCVT(cvtqt)
 
 static void gen_cpy_mask(TCGv vc, TCGv va, TCGv vb, bool inv_a, uint64_t mask)
 {
-    TCGv vmask = tcg_const_i64(mask);
+    TCGv vmask = tcg_constant_i64(mask);
     TCGv tmp = tcg_temp_new_i64();
 
     if (inv_a) {
@@ -847,7 +798,6 @@ static void gen_cpy_mask(TCGv vc, TCGv va, TCGv vb, bool inv_a, uint64_t mask)
     tcg_gen_andc_i64(vc, vb, vmask);
     tcg_gen_or_i64(vc, vc, tmp);
 
-    tcg_temp_free(vmask);
     tcg_temp_free(tmp);
 }
 
@@ -1116,15 +1066,11 @@ static void gen_msk_l(DisasContext *ctx, TCGv vc, TCGv va, int rb, bool islit,
 
 static void gen_rx(DisasContext *ctx, int ra, int set)
 {
-    TCGv tmp;
-
     if (ra != 31) {
         ld_flag_byte(ctx->ir[ra], ENV_FLAG_RX_SHIFT);
     }
 
-    tmp = tcg_const_i64(set);
-    st_flag_byte(ctx->ir[ra], ENV_FLAG_RX_SHIFT);
-    tcg_temp_free(tmp);
+    st_flag_byte(tcg_constant_i64(set), ENV_FLAG_RX_SHIFT);
 }
 
 static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode)
@@ -1225,12 +1171,9 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode)
 
         case 0x3E:
             /* WTINT */
-            {
-                TCGv_i32 tmp = tcg_const_i32(1);
-                tcg_gen_st_i32(tmp, cpu_env, -offsetof(AlphaCPU, env) +
-                                             offsetof(CPUState, halted));
-                tcg_temp_free_i32(tmp);
-            }
+            tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
+                           -offsetof(AlphaCPU, env) +
+                           offsetof(CPUState, halted));
             tcg_gen_movi_i64(ctx->ir[IR_V0], 0);
             return gen_excp(ctx, EXCP_HALTED, 0);
 
@@ -1267,19 +1210,8 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode)
                   ? 0x2000 + (palcode - 0x80) * 64
                   : 0x1000 + palcode * 64);
 
-        /* Since the destination is running in PALmode, we don't really
-           need the page permissions check.  We'll see the existence of
-           the page when we create the TB, and we'll flush all TBs if
-           we change the PAL base register.  */
-        if (!use_exit_tb(ctx)) {
-            tcg_gen_goto_tb(0);
-            tcg_gen_movi_i64(cpu_pc, entry);
-            tcg_gen_exit_tb(ctx->base.tb, 0);
-            return DISAS_NORETURN;
-        } else {
-            tcg_gen_movi_i64(cpu_pc, entry);
-            return DISAS_PC_UPDATED;
-        }
+        tcg_gen_movi_i64(cpu_pc, entry);
+        return DISAS_PC_UPDATED;
     }
 #endif
 }
@@ -1381,12 +1313,8 @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
 
     case 253:
         /* WAIT */
-        {
-            TCGv_i32 tmp = tcg_const_i32(1);
-            tcg_gen_st_i32(tmp, cpu_env, -offsetof(AlphaCPU, env) +
-                                         offsetof(CPUState, halted));
-            tcg_temp_free_i32(tmp);
-        }
+        tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
+                       -offsetof(AlphaCPU, env) + offsetof(CPUState, halted));
         return gen_excp(ctx, EXCP_HALTED, 0);
 
     case 252:
@@ -1471,6 +1399,13 @@ static DisasJumpType gen_mtpr(DisasContext *ctx, TCGv vb, int regno)
         }                                       \
     } while (0)
 
+#define REQUIRE_FEN                             \
+    do {                                        \
+        if (!(ctx->tbflags & ENV_FLAG_FEN)) {   \
+            goto raise_fen;                     \
+        }                                       \
+    } while (0)
+
 static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
 {
     int32_t disp21, disp16, disp12 __attribute__((unused));
@@ -1548,30 +1483,30 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
     case 0x0A:
         /* LDBU */
         REQUIRE_AMASK(BWX);
-        gen_load_mem(ctx, &tcg_gen_qemu_ld8u, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_UB, 0, 0);
         break;
     case 0x0B:
         /* LDQ_U */
-        gen_load_mem(ctx, &tcg_gen_qemu_ld64, ra, rb, disp16, 0, 1);
+        gen_load_int(ctx, ra, rb, disp16, MO_LEQ, 1, 0);
         break;
     case 0x0C:
         /* LDWU */
         REQUIRE_AMASK(BWX);
-        gen_load_mem(ctx, &tcg_gen_qemu_ld16u, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_LEUW, 0, 0);
         break;
     case 0x0D:
         /* STW */
         REQUIRE_AMASK(BWX);
-        gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0);
+        gen_store_int(ctx, ra, rb, disp16, MO_LEUW, 0);
         break;
     case 0x0E:
         /* STB */
         REQUIRE_AMASK(BWX);
-        gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0);
+        gen_store_int(ctx, ra, rb, disp16, MO_UB, 0);
         break;
     case 0x0F:
         /* STQ_U */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1);
+        gen_store_int(ctx, ra, rb, disp16, MO_LEQ, 1);
         break;
 
     case 0x10:
@@ -2066,6 +2001,7 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x04:
             /* ITOFS */
             REQUIRE_REG_31(rb);
+            REQUIRE_FEN;
             t32 = tcg_temp_new_i32();
             va = load_gpr(ctx, ra);
             tcg_gen_extrl_i64_i32(t32, va);
@@ -2075,17 +2011,20 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x0A:
             /* SQRTF */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             vb = load_fpr(ctx, rb);
             gen_helper_sqrtf(vc, cpu_env, vb);
             break;
         case 0x0B:
             /* SQRTS */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_sqrts(ctx, rb, rc, fn11);
             break;
         case 0x14:
             /* ITOFF */
             REQUIRE_REG_31(rb);
+            REQUIRE_FEN;
             t32 = tcg_temp_new_i32();
             va = load_gpr(ctx, ra);
             tcg_gen_extrl_i64_i32(t32, va);
@@ -2095,18 +2034,21 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x24:
             /* ITOFT */
             REQUIRE_REG_31(rb);
+            REQUIRE_FEN;
             va = load_gpr(ctx, ra);
             tcg_gen_mov_i64(vc, va);
             break;
         case 0x2A:
             /* SQRTG */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             vb = load_fpr(ctx, rb);
             gen_helper_sqrtg(vc, cpu_env, vb);
             break;
         case 0x02B:
             /* SQRTT */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_sqrtt(ctx, rb, rc, fn11);
             break;
         default:
@@ -2123,18 +2065,22 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDF */
+            REQUIRE_FEN;
             gen_helper_addf(vc, cpu_env, va, vb);
             break;
         case 0x01:
             /* SUBF */
+            REQUIRE_FEN;
             gen_helper_subf(vc, cpu_env, va, vb);
             break;
         case 0x02:
             /* MULF */
+            REQUIRE_FEN;
             gen_helper_mulf(vc, cpu_env, va, vb);
             break;
         case 0x03:
             /* DIVF */
+            REQUIRE_FEN;
             gen_helper_divf(vc, cpu_env, va, vb);
             break;
         case 0x1E:
@@ -2143,35 +2089,43 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             goto invalid_opc;
         case 0x20:
             /* ADDG */
+            REQUIRE_FEN;
             gen_helper_addg(vc, cpu_env, va, vb);
             break;
         case 0x21:
             /* SUBG */
+            REQUIRE_FEN;
             gen_helper_subg(vc, cpu_env, va, vb);
             break;
         case 0x22:
             /* MULG */
+            REQUIRE_FEN;
             gen_helper_mulg(vc, cpu_env, va, vb);
             break;
         case 0x23:
             /* DIVG */
+            REQUIRE_FEN;
             gen_helper_divg(vc, cpu_env, va, vb);
             break;
         case 0x25:
             /* CMPGEQ */
+            REQUIRE_FEN;
             gen_helper_cmpgeq(vc, cpu_env, va, vb);
             break;
         case 0x26:
             /* CMPGLT */
+            REQUIRE_FEN;
             gen_helper_cmpglt(vc, cpu_env, va, vb);
             break;
         case 0x27:
             /* CMPGLE */
+            REQUIRE_FEN;
             gen_helper_cmpgle(vc, cpu_env, va, vb);
             break;
         case 0x2C:
             /* CVTGF */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_helper_cvtgf(vc, cpu_env, vb);
             break;
         case 0x2D:
@@ -2181,16 +2135,19 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x2F:
             /* CVTGQ */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_helper_cvtgq(vc, cpu_env, vb);
             break;
         case 0x3C:
             /* CVTQF */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_helper_cvtqf(vc, cpu_env, vb);
             break;
         case 0x3E:
             /* CVTQG */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_helper_cvtqg(vc, cpu_env, vb);
             break;
         default:
@@ -2203,54 +2160,67 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDS */
+            REQUIRE_FEN;
             gen_adds(ctx, ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBS */
+            REQUIRE_FEN;
             gen_subs(ctx, ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULS */
+            REQUIRE_FEN;
             gen_muls(ctx, ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVS */
+            REQUIRE_FEN;
             gen_divs(ctx, ra, rb, rc, fn11);
             break;
         case 0x20:
             /* ADDT */
+            REQUIRE_FEN;
             gen_addt(ctx, ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBT */
+            REQUIRE_FEN;
             gen_subt(ctx, ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULT */
+            REQUIRE_FEN;
             gen_mult(ctx, ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVT */
+            REQUIRE_FEN;
             gen_divt(ctx, ra, rb, rc, fn11);
             break;
         case 0x24:
             /* CMPTUN */
+            REQUIRE_FEN;
             gen_cmptun(ctx, ra, rb, rc, fn11);
             break;
         case 0x25:
             /* CMPTEQ */
+            REQUIRE_FEN;
             gen_cmpteq(ctx, ra, rb, rc, fn11);
             break;
         case 0x26:
             /* CMPTLT */
+            REQUIRE_FEN;
             gen_cmptlt(ctx, ra, rb, rc, fn11);
             break;
         case 0x27:
             /* CMPTLE */
+            REQUIRE_FEN;
             gen_cmptle(ctx, ra, rb, rc, fn11);
             break;
         case 0x2C:
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             if (fn11 == 0x2AC || fn11 == 0x6AC) {
                 /* CVTST */
                 gen_cvtst(ctx, rb, rc, fn11);
@@ -2262,16 +2232,19 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x2F:
             /* CVTTQ */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_cvttq(ctx, rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQS */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_cvtqs(ctx, rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQT */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             gen_cvtqt(ctx, rb, rc, fn11);
             break;
         default:
@@ -2284,12 +2257,14 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         case 0x010:
             /* CVTLQ */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
             gen_cvtlq(vc, vb);
             break;
         case 0x020:
             /* CPYS */
+            REQUIRE_FEN;
             if (rc == 31) {
                 /* Special case CPYS as FNOP.  */
             } else {
@@ -2306,6 +2281,7 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x021:
             /* CPYSN */
+            REQUIRE_FEN;
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
             va = load_fpr(ctx, ra);
@@ -2313,6 +2289,7 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x022:
             /* CPYSE */
+            REQUIRE_FEN;
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
             va = load_fpr(ctx, ra);
@@ -2320,6 +2297,7 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x024:
             /* MT_FPCR */
+            REQUIRE_FEN;
             va = load_fpr(ctx, ra);
             gen_helper_store_fpcr(cpu_env, va);
             if (ctx->tb_rm == QUAL_RM_D) {
@@ -2330,37 +2308,45 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x025:
             /* MF_FPCR */
+            REQUIRE_FEN;
             va = dest_fpr(ctx, ra);
             gen_helper_load_fpcr(va, cpu_env);
             break;
         case 0x02A:
             /* FCMOVEQ */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_EQ, ra, rb, rc);
             break;
         case 0x02B:
             /* FCMOVNE */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_NE, ra, rb, rc);
             break;
         case 0x02C:
             /* FCMOVLT */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_LT, ra, rb, rc);
             break;
         case 0x02D:
             /* FCMOVGE */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_GE, ra, rb, rc);
             break;
         case 0x02E:
             /* FCMOVLE */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_LE, ra, rb, rc);
             break;
         case 0x02F:
             /* FCMOVGT */
+            REQUIRE_FEN;
             gen_fcmov(ctx, TCG_COND_GT, ra, rb, rc);
             break;
         case 0x030: /* CVTQL */
         case 0x130: /* CVTQL/V */
         case 0x530: /* CVTQL/SV */
             REQUIRE_REG_31(ra);
+            REQUIRE_FEN;
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
             gen_helper_cvtql(vc, cpu_env, vb);
@@ -2475,11 +2461,15 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
                 break;
             case 0x2:
                 /* Longword physical access with lock (hw_ldl_l/p) */
-                gen_qemu_ldl_l(va, addr, MMU_PHYS_IDX);
+                tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL);
+                tcg_gen_mov_i64(cpu_lock_addr, addr);
+                tcg_gen_mov_i64(cpu_lock_value, va);
                 break;
             case 0x3:
                 /* Quadword physical access with lock (hw_ldq_l/p) */
-                gen_qemu_ldq_l(va, addr, MMU_PHYS_IDX);
+                tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEQ);
+                tcg_gen_mov_i64(cpu_lock_addr, addr);
+                tcg_gen_mov_i64(cpu_lock_value, va);
                 break;
             case 0x4:
                 /* Longword virtual PTE fetch (hw_ldl/v) */
@@ -2695,15 +2685,14 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
             /* Pre-EV6 CPUs interpreted this as HW_REI, loading the return
                address from EXC_ADDR.  This turns out to be useful for our
                emulation PALcode, so continue to accept it.  */
-            ctx->lit = vb = tcg_temp_new();
+            vb = dest_sink(ctx);
             tcg_gen_ld_i64(vb, cpu_env, offsetof(CPUAlphaState, exc_addr));
         } else {
             vb = load_gpr(ctx, rb);
         }
         tcg_gen_movi_i64(cpu_lock_addr, -1);
+        st_flag_byte(load_zero(ctx), ENV_FLAG_RX_SHIFT);
         tmp = tcg_temp_new();
-        tcg_gen_movi_i64(tmp, 0);
-        st_flag_byte(tmp, ENV_FLAG_RX_SHIFT);
         tcg_gen_andi_i64(tmp, vb, 1);
         st_flag_byte(tmp, ENV_FLAG_PAL_SHIFT);
         tcg_temp_free(tmp);
@@ -2793,59 +2782,67 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
 #endif
     case 0x20:
         /* LDF */
-        gen_load_mem(ctx, &gen_qemu_ldf, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_load_fp(ctx, ra, rb, disp16, gen_ldf);
         break;
     case 0x21:
         /* LDG */
-        gen_load_mem(ctx, &gen_qemu_ldg, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_load_fp(ctx, ra, rb, disp16, gen_ldg);
         break;
     case 0x22:
         /* LDS */
-        gen_load_mem(ctx, &gen_qemu_lds, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_load_fp(ctx, ra, rb, disp16, gen_lds);
         break;
     case 0x23:
         /* LDT */
-        gen_load_mem(ctx, &tcg_gen_qemu_ld64, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_load_fp(ctx, ra, rb, disp16, gen_ldt);
         break;
     case 0x24:
         /* STF */
-        gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_store_fp(ctx, ra, rb, disp16, gen_stf);
         break;
     case 0x25:
         /* STG */
-        gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_store_fp(ctx, ra, rb, disp16, gen_stg);
         break;
     case 0x26:
         /* STS */
-        gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_store_fp(ctx, ra, rb, disp16, gen_sts);
         break;
     case 0x27:
         /* STT */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0);
+        REQUIRE_FEN;
+        gen_store_fp(ctx, ra, rb, disp16, gen_stt);
         break;
     case 0x28:
         /* LDL */
-        gen_load_mem(ctx, &tcg_gen_qemu_ld32s, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_LESL, 0, 0);
         break;
     case 0x29:
         /* LDQ */
-        gen_load_mem(ctx, &tcg_gen_qemu_ld64, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_LEQ, 0, 0);
         break;
     case 0x2A:
         /* LDL_L */
-        gen_load_mem(ctx, &gen_qemu_ldl_l, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_LESL, 0, 1);
         break;
     case 0x2B:
         /* LDQ_L */
-        gen_load_mem(ctx, &gen_qemu_ldq_l, ra, rb, disp16, 0, 0);
+        gen_load_int(ctx, ra, rb, disp16, MO_LEQ, 0, 1);
         break;
     case 0x2C:
         /* STL */
-        gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0);
+        gen_store_int(ctx, ra, rb, disp16, MO_LEUL, 0);
         break;
     case 0x2D:
         /* STQ */
-        gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0);
+        gen_store_int(ctx, ra, rb, disp16, MO_LEQ, 0);
         break;
     case 0x2E:
         /* STL_C */
@@ -2862,12 +2859,15 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         ret = gen_bdirect(ctx, ra, disp21);
         break;
     case 0x31: /* FBEQ */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_EQ, ra, disp21);
         break;
     case 0x32: /* FBLT */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_LT, ra, disp21);
         break;
     case 0x33: /* FBLE */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_LE, ra, disp21);
         break;
     case 0x34:
@@ -2875,12 +2875,15 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
         ret = gen_bdirect(ctx, ra, disp21);
         break;
     case 0x35: /* FBNE */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_NE, ra, disp21);
         break;
     case 0x36: /* FBGE */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_GE, ra, disp21);
         break;
     case 0x37: /* FBGT */
+        REQUIRE_FEN;
         ret = gen_fbcond(ctx, TCG_COND_GT, ra, disp21);
         break;
     case 0x38:
@@ -2918,6 +2921,9 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn)
     invalid_opc:
         ret = gen_invalid(ctx);
         break;
+    raise_fen:
+        ret = gen_excp(ctx, EXCP_FEN, 0);
+        break;
     }
 
     return ret;
@@ -2927,7 +2933,7 @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUAlphaState *env = cpu->env_ptr;
-    int64_t bound, mask;
+    int64_t bound;
 
     ctx->tbflags = ctx->base.tb->flags;
     ctx->mem_idx = cpu_mmu_index(env, false);
@@ -2953,15 +2959,9 @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
 
     ctx->zero = NULL;
     ctx->sink = NULL;
-    ctx->lit = NULL;
 
     /* Bound the number of insns to execute to those left on the page.  */
-    if (in_superpage(ctx, ctx->base.pc_first)) {
-        mask = -1ULL << 41;
-    } else {
-        mask = TARGET_PAGE_MASK;
-    }
-    bound = -(ctx->base.pc_first | mask) / 4;
+    bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
     ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
 }
 
@@ -2974,26 +2974,11 @@ static void alpha_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(dcbase->pc_next);
 }
 
-static bool alpha_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    ctx->base.is_jmp = gen_excp(ctx, EXCP_DEBUG, 0);
-
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size below does the right thing.  */
-    ctx->base.pc_next += 4;
-    return true;
-}
-
 static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUAlphaState *env = cpu->env_ptr;
-    uint32_t insn = translator_ldl(env, ctx->base.pc_next);
+    uint32_t insn = translator_ldl(env, &ctx->base, ctx->base.pc_next);
 
     ctx->base.pc_next += 4;
     ctx->base.is_jmp = translate_one(ctx, insn);
@@ -3020,17 +3005,10 @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
         tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next);
         /* FALLTHRU */
     case DISAS_PC_UPDATED:
-        if (!use_exit_tb(ctx)) {
-            tcg_gen_lookup_and_goto_ptr();
-            break;
-        }
-        /* FALLTHRU */
+        tcg_gen_lookup_and_goto_ptr();
+        break;
     case DISAS_PC_UPDATED_NOCHAIN:
-        if (ctx->base.singlestep_enabled) {
-            gen_excp_1(EXCP_DEBUG, 0);
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
         break;
     default:
         g_assert_not_reached();
@@ -3047,7 +3025,6 @@ static const TranslatorOps alpha_tr_ops = {
     .init_disas_context = alpha_tr_init_disas_context,
     .tb_start           = alpha_tr_tb_start,
     .insn_start         = alpha_tr_insn_start,
-    .breakpoint_check   = alpha_tr_breakpoint_check,
     .translate_insn     = alpha_tr_translate_insn,
     .tb_stop            = alpha_tr_tb_stop,
     .disas_log          = alpha_tr_disas_log,
diff --git a/target/arm/Kconfig b/target/arm/Kconfig
new file mode 100644
index 00000000000..3f3394a22b2
--- /dev/null
+++ b/target/arm/Kconfig
@@ -0,0 +1,6 @@
+config ARM
+    bool
+
+config AARCH64
+    bool
+    select ARM
diff --git a/target/arm/arm_ldst.h b/target/arm/arm_ldst.h
index 057160e8da6..cee0548a1c7 100644
--- a/target/arm/arm_ldst.h
+++ b/target/arm/arm_ldst.h
@@ -24,15 +24,15 @@
 #include "qemu/bswap.h"
 
 /* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
-                                    bool sctlr_b)
+static inline uint32_t arm_ldl_code(CPUARMState *env, DisasContextBase *s,
+                                    target_ulong addr, bool sctlr_b)
 {
-    return translator_ldl_swap(env, addr, bswap_code(sctlr_b));
+    return translator_ldl_swap(env, s, addr, bswap_code(sctlr_b));
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
-                                     bool sctlr_b)
+static inline uint16_t arm_lduw_code(CPUARMState *env, DisasContextBase* s,
+                                     target_ulong addr, bool sctlr_b)
 {
 #ifndef CONFIG_USER_ONLY
     /* In big-endian (BE32) mode, adjacent Thumb instructions have been swapped
@@ -41,7 +41,7 @@ static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
         addr ^= 2;
     }
 #endif
-    return translator_lduw_swap(env, addr, bswap_code(sctlr_b));
+    return translator_lduw_swap(env, s, addr, bswap_code(sctlr_b));
 }
 
 #endif
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ffcd0691acd..ad7d0224534 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -36,10 +36,10 @@
 #include "hw/loader.h"
 #include "hw/boards.h"
 #endif
-#include "sysemu/sysemu.h"
 #include "sysemu/tcg.h"
 #include "sysemu/hw_accel.h"
 #include "kvm_arm.h"
+#include "hvf_arm.h"
 #include "disas/capstone.h"
 #include "fpu/softfloat.h"
 
@@ -298,7 +298,8 @@ static void arm_cpu_reset(DeviceState *dev)
         env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 16, 2, 3);
         /* with reasonable vector length */
         if (cpu_isar_feature(aa64_sve, cpu)) {
-            env->vfp.zcr_el[1] = MIN(cpu->sve_max_vq - 1, 3);
+            env->vfp.zcr_el[1] =
+                aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1);
         }
         /*
          * Enable TBI0 but not TBI1.
@@ -378,11 +379,24 @@ static void arm_cpu_reset(DeviceState *dev)
     }
     env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
 
+    /* AArch32 has a hard highvec setting of 0xFFFF0000.  If we are currently
+     * executing as AArch32 then check if highvecs are enabled and
+     * adjust the PC accordingly.
+     */
+    if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
+        env->regs[15] = 0xFFFF0000;
+    }
+
+    env->vfp.xregs[ARM_VFP_FPEXC] = 0;
+#endif
+
     if (arm_feature(env, ARM_FEATURE_M)) {
+#ifndef CONFIG_USER_ONLY
         uint32_t initial_msp; /* Loaded from 0x0 */
         uint32_t initial_pc; /* Loaded from 0x4 */
         uint8_t *rom;
         uint32_t vecbase;
+#endif
 
         if (cpu_isar_feature(aa32_lob, cpu)) {
             /*
@@ -436,10 +450,13 @@ static void arm_cpu_reset(DeviceState *dev)
             env->v7m.fpccr[M_REG_S] = R_V7M_FPCCR_ASPEN_MASK |
                 R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK;
         }
+
+#ifndef CONFIG_USER_ONLY
         /* Unlike A/R profile, M profile defines the reset LR value */
         env->regs[14] = 0xffffffff;
 
         env->v7m.vecbase[M_REG_S] = cpu->init_svtor & 0xffffff80;
+        env->v7m.vecbase[M_REG_NS] = cpu->init_nsvtor & 0xffffff80;
 
         /* Load the initial SP and PC from offset 0 and 4 in the vector table */
         vecbase = env->v7m.vecbase[env->v7m.secure];
@@ -463,14 +480,19 @@ static void arm_cpu_reset(DeviceState *dev)
         env->regs[13] = initial_msp & 0xFFFFFFFC;
         env->regs[15] = initial_pc & ~1;
         env->thumb = initial_pc & 1;
-    }
-
-    /* AArch32 has a hard highvec setting of 0xFFFF0000.  If we are currently
-     * executing as AArch32 then check if highvecs are enabled and
-     * adjust the PC accordingly.
-     */
-    if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
-        env->regs[15] = 0xFFFF0000;
+#else
+        /*
+         * For user mode we run non-secure and with access to the FPU.
+         * The FPU context is active (ie does not need further setup)
+         * and is owned by non-secure.
+         */
+        env->v7m.secure = false;
+        env->v7m.nsacr = 0xcff;
+        env->v7m.cpacr[M_REG_NS] = 0xf0ffff;
+        env->v7m.fpccr[M_REG_S] &=
+            ~(R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK);
+        env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
+#endif
     }
 
     /* M profile requires that reset clears the exclusive monitor;
@@ -479,9 +501,6 @@ static void arm_cpu_reset(DeviceState *dev)
      */
     arm_clear_exclusive(env);
 
-    env->vfp.xregs[ARM_VFP_FPEXC] = 0;
-#endif
-
     if (arm_feature(env, ARM_FEATURE_PMSA)) {
         if (cpu->pmsav7_dregion > 0) {
             if (arm_feature(env, ARM_FEATURE_V8)) {
@@ -551,6 +570,8 @@ static void arm_cpu_reset(DeviceState *dev)
     arm_rebuild_hflags(env);
 }
 
+#ifndef CONFIG_USER_ONLY
+
 static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
                                      unsigned int target_el,
                                      unsigned int cur_el, bool secure,
@@ -667,7 +688,7 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
     return unmasked || pstate_unmasked;
 }
 
-bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     CPUClass *cc = CPU_GET_CLASS(cs);
     CPUARMState *env = cs->env_ptr;
@@ -719,6 +740,7 @@ bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     cc->tcg_ops->do_interrupt(cs);
     return true;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 void arm_cpu_update_virq(ARMCPU *cpu)
 {
@@ -1133,6 +1155,9 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
                          i, v);
         }
         qemu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
+        if (cpu_isar_feature(aa32_mve, cpu)) {
+            qemu_fprintf(f, "VPR: %08x\n", env->v7m.vpr);
+        }
     }
 }
 
@@ -1168,7 +1193,16 @@ static void arm_cpu_initfn(Object *obj)
     QLIST_INIT(&cpu->pre_el_change_hooks);
     QLIST_INIT(&cpu->el_change_hooks);
 
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+# ifdef TARGET_AARCH64
+    /*
+     * The linux kernel defaults to 512-bit vectors, when sve is supported.
+     * See documentation for /proc/sys/abi/sve_default_vector_length, and
+     * our corresponding sve-default-vector-length cpu property.
+     */
+    cpu->sve_default_vq = 4;
+# endif
+#else
     /* Our inbound IRQ and FIQ lines */
     if (kvm_enabled()) {
         /* VIRQ and VFIQ are unused with KVM but we add them to maintain
@@ -1196,8 +1230,8 @@ static void arm_cpu_initfn(Object *obj)
     cpu->psci_version = 1; /* By default assume PSCI v0.1 */
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
 
-    if (tcg_enabled()) {
-        cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
+    if (tcg_enabled() || hvf_enabled()) {
+        cpu->psci_version = 2; /* TCG and HVF implement PSCI 0.2 */
     }
 }
 
@@ -1393,6 +1427,15 @@ void arm_cpu_post_init(Object *obj)
                                        &cpu->init_svtor,
                                        OBJ_PROP_FLAG_READWRITE);
     }
+    if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
+        /*
+         * Initial value of the NS VTOR (for cores without the Security
+         * extension, this is the only VTOR)
+         */
+        object_property_add_uint32_ptr(obj, "init-nsvtor",
+                                       &cpu->init_nsvtor,
+                                       OBJ_PROP_FLAG_READWRITE);
+    }
 
     qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property);
 
@@ -1497,8 +1540,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
      * this is the first point where we can report it.
      */
     if (cpu->host_cpu_probe_failed) {
-        if (!kvm_enabled()) {
-            error_setg(errp, "The 'host' CPU type can only be used with KVM");
+        if (!kvm_enabled() && !hvf_enabled()) {
+            error_setg(errp, "The 'host' CPU type can only be used with KVM or HVF");
         } else {
             error_setg(errp, "Failed to retrieve host CPU features");
         }
@@ -1522,6 +1565,29 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         }
     }
 
+    if (kvm_enabled()) {
+        /*
+         * Catch all the cases which might cause us to create more than one
+         * address space for the CPU (otherwise we will assert() later in
+         * cpu_address_space_init()).
+         */
+        if (arm_feature(env, ARM_FEATURE_M)) {
+            error_setg(errp,
+                       "Cannot enable KVM when using an M-profile guest CPU");
+            return;
+        }
+        if (cpu->has_el3) {
+            error_setg(errp,
+                       "Cannot enable KVM when guest CPU has EL3 enabled");
+            return;
+        }
+        if (cpu->tag_memory) {
+            error_setg(errp,
+                       "Cannot enable KVM when guest CPUs has MTE enabled");
+            return;
+        }
+    }
+
     {
         uint64_t scale;
 
@@ -1586,6 +1652,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
 
         u = cpu->isar.id_isar6;
         u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0);
+        u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
         cpu->isar.id_isar6 = u;
 
         u = cpu->isar.mvfr0;
@@ -1626,6 +1693,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
 
         t = cpu->isar.id_aa64isar1;
         t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0);
+        t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0);
+        t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0);
         cpu->isar.id_aa64isar1 = t;
 
         t = cpu->isar.id_aa64pfr0;
@@ -1640,6 +1709,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         u = cpu->isar.id_isar6;
         u = FIELD_DP32(u, ID_ISAR6, DP, 0);
         u = FIELD_DP32(u, ID_ISAR6, FHM, 0);
+        u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
+        u = FIELD_DP32(u, ID_ISAR6, I8MM, 0);
         cpu->isar.id_isar6 = u;
 
         if (!arm_feature(env, ARM_FEATURE_M)) {
@@ -2068,20 +2139,37 @@ static gchar *arm_gdb_arch_name(CPUState *cs)
     return g_strdup("arm");
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps arm_sysemu_ops = {
+    .get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug,
+    .asidx_from_attrs = arm_asidx_from_attrs,
+    .write_elf32_note = arm_cpu_write_elf32_note,
+    .write_elf64_note = arm_cpu_write_elf64_note,
+    .virtio_is_big_endian = arm_cpu_virtio_is_big_endian,
+    .legacy_vmsd = &vmstate_arm_cpu,
+};
+#endif
+
 #ifdef CONFIG_TCG
-static struct TCGCPUOps arm_tcg_ops = {
+static const struct TCGCPUOps arm_tcg_ops = {
     .initialize = arm_translate_init,
     .synchronize_from_tb = arm_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = arm_cpu_exec_interrupt,
-    .tlb_fill = arm_cpu_tlb_fill,
     .debug_excp_handler = arm_debug_excp_handler,
 
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+    .record_sigsegv = arm_cpu_record_sigsegv,
+    .record_sigbus = arm_cpu_record_sigbus,
+#else
+    .tlb_fill = arm_cpu_tlb_fill,
+    .cpu_exec_interrupt = arm_cpu_exec_interrupt,
     .do_interrupt = arm_cpu_do_interrupt,
     .do_transaction_failed = arm_cpu_do_transaction_failed,
     .do_unaligned_access = arm_cpu_do_unaligned_access,
     .adjust_watchpoint_address = arm_adjust_watchpoint_address,
     .debug_check_watchpoint = arm_debug_check_watchpoint,
+    .debug_check_breakpoint = arm_debug_check_breakpoint,
 #endif /* !CONFIG_USER_ONLY */
 };
 #endif /* CONFIG_TCG */
@@ -2105,12 +2193,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = arm_cpu_gdb_read_register;
     cc->gdb_write_register = arm_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug;
-    cc->asidx_from_attrs = arm_asidx_from_attrs;
-    cc->vmsd = &vmstate_arm_cpu;
-    cc->virtio_is_big_endian = arm_cpu_virtio_is_big_endian;
-    cc->write_elf64_note = arm_cpu_write_elf64_note;
-    cc->write_elf32_note = arm_cpu_write_elf32_note;
+    cc->sysemu_ops = &arm_sysemu_ops;
 #endif
     cc->gdb_num_core_regs = 26;
     cc->gdb_core_xml_file = "arm-core.xml";
@@ -2124,15 +2207,19 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 #endif /* CONFIG_TCG */
 }
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 static void arm_host_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
 
+#ifdef CONFIG_KVM
     kvm_arm_set_cpu_features_from_host(cpu);
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         aarch64_add_sve_properties(obj);
     }
+#else
+    hvf_arm_set_cpu_features_from_host(cpu);
+#endif
     arm_cpu_post_init(obj);
 }
 
@@ -2192,7 +2279,7 @@ static void arm_cpu_register_types(void)
 {
     type_register_static(&arm_cpu_type_info);
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
     type_register_static(&host_arm_cpu_type_info);
 #endif
 }
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d28a934133b..af915697b41 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -60,6 +60,7 @@
 #define EXCP_LAZYFP         20   /* v7M fault during lazy FP stacking */
 #define EXCP_LSERR          21   /* v8M LSERR SecureFault */
 #define EXCP_UNALIGNED      22   /* v7M UNALIGNED UsageFault */
+#define EXCP_DIVBYZERO      23   /* v7M DIVBYZERO UsageFault */
 /* NB: add new EXCP_ defines to the array in arm_log_exception() too */
 
 #define ARMV7M_EXCP_RESET   1
@@ -244,6 +245,17 @@ typedef uint64_t AARCH_REG_TYPE;
 #define N_BANK_WITH_RESTRICTED 4
 #endif
 
+extern const char * const arm32_regnames[16];
+#ifdef TARGET_AARCH64
+extern const char * const arm64_regnames[32];
+#endif
+
+/* See the commentary above the TBFLAG field definitions.  */
+typedef struct CPUARMTBFlags {
+    uint32_t flags;
+    target_ulong flags2;
+} CPUARMTBFlags;
+
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -292,7 +304,7 @@ typedef struct CPUARMState {
      * well */
     uint32_t chflags;
 #endif
-    uint32_t hflags;
+    CPUARMTBFlags hflags;
 
     /* Frequently accessed CPSR bits are stored separately for efficiency.
        This contains all the other bits.  Use cpsr_{read,write} to access
@@ -606,7 +618,8 @@ typedef struct CPUARMState {
         uint32_t fpdscr[M_REG_NUM_BANKS];
         uint32_t cpacr[M_REG_NUM_BANKS];
         uint32_t nsacr;
-        int ltpsize;
+        uint32_t ltpsize;
+        uint32_t vpr;
     } v7m;
 
     /* Information associated with an exception about to be taken:
@@ -942,6 +955,8 @@ struct ARMCPU {
 
     /* For v8M, initial value of the Secure VTOR */
     uint32_t init_svtor;
+    /* For v8M, initial value of the Non-secure VTOR */
+    uint32_t init_nsvtor;
 
     /* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
      * QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
@@ -1026,6 +1041,7 @@ struct ARMCPU {
         uint64_t id_aa64mmfr2;
         uint64_t id_aa64dfr0;
         uint64_t id_aa64dfr1;
+        uint64_t id_aa64zfr0;
     } isar;
     uint64_t midr;
     uint32_t revidr;
@@ -1081,6 +1097,11 @@ struct ARMCPU {
     /* Used to set the maximum vector length the cpu will support.  */
     uint32_t sve_max_vq;
 
+#ifdef CONFIG_USER_ONLY
+    /* Used to set the default vector length at process start. */
+    uint32_t sve_default_vq;
+#endif
+
     /*
      * In sve_vq_map each set bit is a supported vector length of
      * (bit-number + 1) * 16 bytes, i.e. each bit number + 1 is the vector
@@ -1089,9 +1110,13 @@ struct ARMCPU {
      * While processing properties during initialization, corresponding
      * sve_vq_init bits are set for bits in sve_vq_map that have been
      * set by properties.
+     *
+     * Bits set in sve_vq_supported represent valid vector lengths for
+     * the CPU type.
      */
     DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ);
     DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ);
+    DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ);
 
     /* Generic timer counter frequency, in Hz */
     uint64_t gt_cntfrq_hz;
@@ -1105,11 +1130,10 @@ uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz);
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_arm_cpu;
-#endif
 
 void arm_cpu_do_interrupt(CPUState *cpu);
 void arm_v7m_cpu_do_interrupt(CPUState *cpu);
-bool arm_cpu_exec_interrupt(CPUState *cpu, int int_req);
+#endif /* !CONFIG_USER_ONLY */
 
 hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                          MemTxAttrs *attrs);
@@ -1192,12 +1216,6 @@ static inline bool is_a64(CPUARMState *env)
 #endif
 }
 
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_arm_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
-
 /**
  * pmu_op_start/finish
  * @env: CPUARMState
@@ -1482,11 +1500,17 @@ uint32_t cpsr_read(CPUARMState *env);
 typedef enum CPSRWriteType {
     CPSRWriteByInstr = 0,         /* from guest MSR or CPS */
     CPSRWriteExceptionReturn = 1, /* from guest exception return insn */
-    CPSRWriteRaw = 2,             /* trust values, do not switch reg banks */
+    CPSRWriteRaw = 2,
+        /* trust values, no reg bank switch, no hflags rebuild */
     CPSRWriteByGDBStub = 3,       /* from the GDB stub */
 } CPSRWriteType;
 
-/* Set the CPSR.  Note that some bits of mask must be all-set or all-clear.*/
+/*
+ * Set the CPSR.  Note that some bits of mask must be all-set or all-clear.
+ * This will do an arm_rebuild_hflags() if any of the bits in @mask
+ * correspond to TB flags bits cached in the hflags, unless @write_type
+ * is CPSRWriteRaw.
+ */
 void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
                 CPSRWriteType write_type);
 
@@ -1625,6 +1649,9 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
 #define SCR_ENSCXT            (1U << 25)
 #define SCR_ATA               (1U << 26)
 
+#define HSTR_TTEE (1 << 16)
+#define HSTR_TJDBX (1 << 17)
+
 /* Return the current FPSCR value.  */
 uint32_t vfp_get_fpscr(CPUARMState *env);
 void vfp_set_fpscr(CPUARMState *env, uint32_t val);
@@ -1658,6 +1685,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
 
 #define FPCR_LTPSIZE_SHIFT 16   /* LTPSIZE, M-profile only */
 #define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
+#define FPCR_LTPSIZE_LENGTH 3
 
 #define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
 #define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
@@ -1858,6 +1886,11 @@ FIELD(V7M_FPCCR, ASPEN, 31, 1)
      R_V7M_FPCCR_UFRDY_MASK |                   \
      R_V7M_FPCCR_ASPEN_MASK)
 
+/* v7M VPR bits */
+FIELD(V7M_VPR, P0, 0, 16)
+FIELD(V7M_VPR, MASK01, 16, 4)
+FIELD(V7M_VPR, MASK23, 20, 4)
+
 /*
  * System register ID fields.
  */
@@ -2133,6 +2166,16 @@ FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
 FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
 FIELD(ID_AA64DFR0, MTPMU, 48, 4)
 
+FIELD(ID_AA64ZFR0, SVEVER, 0, 4)
+FIELD(ID_AA64ZFR0, AES, 4, 4)
+FIELD(ID_AA64ZFR0, BITPERM, 16, 4)
+FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4)
+FIELD(ID_AA64ZFR0, SHA3, 32, 4)
+FIELD(ID_AA64ZFR0, SM4, 40, 4)
+FIELD(ID_AA64ZFR0, I8MM, 44, 4)
+FIELD(ID_AA64ZFR0, F32MM, 52, 4)
+FIELD(ID_AA64ZFR0, F64MM, 56, 4)
+
 FIELD(ID_DFR0, COPDBG, 0, 4)
 FIELD(ID_DFR0, COPSDBG, 4, 4)
 FIELD(ID_DFR0, MMAPDBG, 8, 4)
@@ -3167,7 +3210,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
 #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
 #define CPU_RESOLVING_TYPE TYPE_ARM_CPU
 
-#define cpu_signal_handler cpu_arm_signal_handler
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
+
 #define cpu_list arm_cpu_list
 
 /* ARM has the following "translation regimes" (as the ARM ARM calls them):
@@ -3574,6 +3618,7 @@ typedef CPUARMState CPUArchState;
 typedef ARMCPU ArchCPU;
 
 #include "exec/cpu-all.h"
+#include "exec/log_instr.h"
 #include "cpu_cheri.h"
 #include "cheri-lazy-capregs.h"
 
@@ -3594,11 +3639,15 @@ static inline void arm_set_xreg(CPUARMState *env, int regnum,
 #ifdef TARGET_CHERI
     update_capreg_to_intval(env, regnum, value);
 #else
+#ifdef TARGET_AARCH64
     if (is_a64(env)) {
         env->xregs[regnum] = value;
-    } else {
-        env->regs[regnum] = value;
+        qemu_log_instr_reg(env, arm64_regnames[regnum], value);
+        return;
     }
+#endif
+    env->regs[regnum] = value;
+    qemu_log_instr_reg(env, arm32_regnames[regnum], value);
 #endif
 }
 
@@ -3647,32 +3696,41 @@ static inline target_ulong get_aarch_reg_as_x(AARCH_REG_TYPE *aarch_reg)
 }
 
 /*
- * Bit usage in the TB flags field: bit 31 indicates whether we are
- * in 32 or 64 bit mode. The meaning of the other bits depends on that.
- * We put flags which are shared between 32 and 64 bit mode at the top
- * of the word, and flags which apply to only one mode at the bottom.
+ * We have more than 32-bits worth of state per TB, so we split the data
+ * between tb->flags and tb->cs_base, which is otherwise unused for ARM.
+ * We collect these two parts in CPUARMTBFlags where they are named
+ * flags and flags2 respectively.
+ *
+ * The flags that are shared between all execution modes, TBFLAG_ANY,
+ * are stored in flags.  The flags that are specific to a given mode
+ * are stores in flags2.  Since cs_base is sized on the configured
+ * address size, flags2 always has 64-bits for A64, and a minimum of
+ * 32-bits for A32 and M32.
+ *
+ * The bits for 32-bit A-profile and M-profile partially overlap:
  *
- *  31          20    18    14          9              0
- * +--------------+-----+-----+----------+--------------+
- * |              |     |   TBFLAG_A32   |              |
- * |              |     +-----+----------+  TBFLAG_AM32 |
- * |  TBFLAG_ANY  |           |TBFLAG_M32|              |
- * |              +-----------+----------+--------------|
- * |              |            TBFLAG_A64               |
- * +--------------+-------------------------------------+
- *  31          20                                     0
+ *  31         23         11 10             0
+ * +-------------+----------+----------------+
+ * |             |          |   TBFLAG_A32   |
+ * | TBFLAG_AM32 |          +-----+----------+
+ * |             |                |TBFLAG_M32|
+ * +-------------+----------------+----------+
+ *  31         23                6 5        0
  *
  * Unless otherwise noted, these bits are cached in env->hflags.
  */
-FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1)
-FIELD(TBFLAG_ANY, SS_ACTIVE, 30, 1)
-FIELD(TBFLAG_ANY, PSTATE_SS, 29, 1)     /* Not cached. */
-FIELD(TBFLAG_ANY, BE_DATA, 28, 1)
-FIELD(TBFLAG_ANY, MMUIDX, 24, 4)
+FIELD(TBFLAG_ANY, AARCH64_STATE, 0, 1)
+FIELD(TBFLAG_ANY, SS_ACTIVE, 1, 1)
+FIELD(TBFLAG_ANY, PSTATE__SS, 2, 1)      /* Not cached. */
+FIELD(TBFLAG_ANY, BE_DATA, 3, 1)
+FIELD(TBFLAG_ANY, MMUIDX, 4, 4)
 /* Target EL if we take a floating-point-disabled exception */
-FIELD(TBFLAG_ANY, FPEXC_EL, 22, 2)
+FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2)
 /* For A-profile only, target EL for debug exceptions.  */
-FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 20, 2)
+FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2)
+/* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */
+FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1)
+FIELD(TBFLAG_ANY, PSTATE__IL, 13, 1)
 
 #ifdef TARGET_CHERI
 
@@ -3693,7 +3751,6 @@ FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 20, 2)
 #define CCTLR_SBL (1 << 7)
 
 #define CCTLR_DEFINED_START 2
-#define CCTLR_DEFINED_LENGTH 6
 
 FIELD(TBFLAG_CHERI, CCTLR, 0, 6) // The 6 defined bits from CCTLR at all levels
 FIELD(TBFLAG_CHERI, PSTATE_C64, 6, 1) // The PSTATE.C64 bit
@@ -3704,54 +3761,56 @@ FIELD(TBFLAG_CHERI, SETTAG, 9, 1)
 FIELD(TBFLAG_CHERI, CAP_ENABLED, 10, 1)
 // These flags really belongs in TBFLAG_ANY, but there is no room. If upstream
 // wants this feature, then they can move some bits around
-FIELD(TBFLAG_CHERI, SCTLRA, 11, 1)
-FIELD(TBFLAG_CHERI, SCTLRSA, 12, 1)
-#define TBFLAG_CHERI_SIZE (CCTLR_DEFINED_LENGTH + 7)
+FIELD(TBFLAG_CHERI, SCTLRSA, 11, 1)
+#define TBFLAG_END(flag) (R_##flag##_SHIFT + R_##flag##_LENGTH)
+#define TBFLAG_CHERI_SIZE TBFLAG_END(TBFLAG_CHERI_SCTLRSA)
 _Static_assert(TBFLAG_CHERI_SIZE <= 32, "");
 
-#endif
+#endif /* TARGET_CHERI */
 
 /*
  * Bit usage when in AArch32 state, both A- and M-profile.
  */
-FIELD(TBFLAG_AM32, CONDEXEC, 0, 8)      /* Not cached. */
-FIELD(TBFLAG_AM32, THUMB, 8, 1)         /* Not cached. */
+FIELD(TBFLAG_AM32, CONDEXEC, 24, 8)      /* Not cached. */
+FIELD(TBFLAG_AM32, THUMB, 23, 1)         /* Not cached. */
 
 /*
  * Bit usage when in AArch32 state, for A-profile only.
  */
-FIELD(TBFLAG_A32, VECLEN, 9, 3)         /* Not cached. */
-FIELD(TBFLAG_A32, VECSTRIDE, 12, 2)     /* Not cached. */
+FIELD(TBFLAG_A32, VECLEN, 0, 3)         /* Not cached. */
+FIELD(TBFLAG_A32, VECSTRIDE, 3, 2)     /* Not cached. */
 /*
  * We store the bottom two bits of the CPAR as TB flags and handle
  * checks on the other bits at runtime. This shares the same bits as
  * VECSTRIDE, which is OK as no XScale CPU has VFP.
  * Not cached, because VECLEN+VECSTRIDE are not cached.
  */
-FIELD(TBFLAG_A32, XSCALE_CPAR, 12, 2)
-FIELD(TBFLAG_A32, VFPEN, 14, 1)         /* Partially cached, minus FPEXC. */
-FIELD(TBFLAG_A32, SCTLR_B, 15, 1)
-FIELD(TBFLAG_A32, HSTR_ACTIVE, 16, 1)
+FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2)
+FIELD(TBFLAG_A32, VFPEN, 7, 1)         /* Partially cached, minus FPEXC. */
+FIELD(TBFLAG_A32, SCTLR__B, 8, 1)      /* Cannot overlap with SCTLR_B */
+FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
 /*
  * Indicates whether cp register reads and writes by guest code should access
  * the secure or nonsecure bank of banked registers; note that this is not
  * the same thing as the current security state of the processor!
  */
-FIELD(TBFLAG_A32, NS, 17, 1)
+FIELD(TBFLAG_A32, NS, 10, 1)
 
 /*
  * Bit usage when in AArch32 state, for M-profile only.
  */
 /* Handler (ie not Thread) mode */
-FIELD(TBFLAG_M32, HANDLER, 9, 1)
+FIELD(TBFLAG_M32, HANDLER, 0, 1)
 /* Whether we should generate stack-limit checks */
-FIELD(TBFLAG_M32, STACKCHECK, 10, 1)
+FIELD(TBFLAG_M32, STACKCHECK, 1, 1)
 /* Set if FPCCR.LSPACT is set */
-FIELD(TBFLAG_M32, LSPACT, 11, 1)                 /* Not cached. */
+FIELD(TBFLAG_M32, LSPACT, 2, 1)                 /* Not cached. */
 /* Set if we must create a new FP context */
-FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 12, 1)     /* Not cached. */
+FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1)     /* Not cached. */
 /* Set if FPCCR.S does not match current security state */
-FIELD(TBFLAG_M32, FPCCR_S_WRONG, 13, 1)          /* Not cached. */
+FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1)          /* Not cached. */
+/* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */
+FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1)            /* Not cached. */
 
 /*
  * Bit usage when in AArch64 state
@@ -3769,12 +3828,33 @@ FIELD(TBFLAG_A64, TCMA, 16, 2)
 FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1)
 FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1)
 
+/*
+ * Helpers for using the above.
+ */
+#define DP_TBFLAG_ANY(DST, WHICH, VAL) \
+    (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL))
+#define DP_TBFLAG_A64(DST, WHICH, VAL) \
+    (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A64, WHICH, VAL))
+#define DP_TBFLAG_A32(DST, WHICH, VAL) \
+    (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL))
+#define DP_TBFLAG_M32(DST, WHICH, VAL) \
+    (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_M32, WHICH, VAL))
+#define DP_TBFLAG_AM32(DST, WHICH, VAL) \
+    (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL))
+
+#define EX_TBFLAG_ANY(IN, WHICH)   FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH)
+#define EX_TBFLAG_A64(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_A64, WHICH)
+#define EX_TBFLAG_A32(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH)
+#define EX_TBFLAG_M32(IN, WHICH)   FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH)
+#define EX_TBFLAG_AM32(IN, WHICH)  FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH)
+
 extern void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                        target_ulong *cs_base,
-                                       target_ulong *cs_top,
+                                       target_ulong *pcc_base,
+                                       target_ulong *pcc_top,
                                        uint32_t *cheri_flags, uint32_t *pflags);
 // Ugly macro hack to avoid having to modify cpu_get_tb_cpu_state in all targets
-#define cpu_get_tb_cpu_state_6 aarch_cpu_get_tb_cpu_state
+#define cpu_get_tb_cpu_state_ext aarch_cpu_get_tb_cpu_state
 
 /**
  * cpu_mmu_index:
@@ -3786,7 +3866,7 @@ extern void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
  */
 static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
 {
-    return FIELD_EX32(env->hflags, TBFLAG_ANY, MMUIDX);
+    return EX_TBFLAG_ANY(env->hflags, MMUIDX);
 }
 
 static inline bool bswap_code(bool sctlr_b)
@@ -4053,6 +4133,16 @@ static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id)
     return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0;
 }
 
+static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0;
+}
+
+static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id)
+{
+    return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0;
+}
+
 static inline bool isar_feature_aa32_ras(const ARMISARegisters *id)
 {
     return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0;
@@ -4082,6 +4172,28 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
     }
 }
 
+static inline bool isar_feature_aa32_mve(const ARMISARegisters *id)
+{
+    /*
+     * Return true if MVE is supported (either integer or floating point).
+     * We must check for M-profile as the MVFR1 field means something
+     * else for A-profile.
+     */
+    return isar_feature_aa32_mprofile(id) &&
+        FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0;
+}
+
+static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id)
+{
+    /*
+     * Return true if MVE is supported (either integer or floating point).
+     * We must check for M-profile as the MVFR1 field means something
+     * else for A-profile.
+     */
+    return isar_feature_aa32_mprofile(id) &&
+        FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2;
+}
+
 static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id)
 {
     /*
@@ -4352,6 +4464,16 @@ static inline bool isar_feature_aa64_pauth_arch(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0;
 }
 
+static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2;
+}
+
+static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0;
+}
+
 static inline bool isar_feature_aa64_sb(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0;
@@ -4377,6 +4499,11 @@ static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2;
 }
 
+static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
+}
+
 static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
 {
     /* We always set the AdvSIMD and FP fields identically.  */
@@ -4476,6 +4603,11 @@ static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2;
 }
 
+static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
+}
+
 static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
@@ -4496,6 +4628,56 @@ static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0;
 }
 
+static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2;
+}
+
+static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0;
+}
+
 /*
  * Feature tests for "does this exist in either 32-bit or 64-bit?"
  */
@@ -4725,9 +4907,6 @@ static inline uint32_t arm_rebuild_chflags_el(CPUARMState *env, int el)
         chflags = FIELD_DP32(chflags, TBFLAG_CHERI, CAP_ENABLED, 1);
     }
     uint64_t sctlr = arm_sctlr(env, el);
-    if (sctlr & SCTLR_A) {
-        chflags = FIELD_DP32(chflags, TBFLAG_CHERI, SCTLRA, 1);
-    }
     if ((el == 0) ? (sctlr & SCTLR_SA0) : (sctlr & SCTLR_SA)) {
         chflags = FIELD_DP32(chflags, TBFLAG_CHERI, SCTLRSA, 1);
     }
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 1cf5d3a4fde..26c43fc50f0 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -404,14 +404,17 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      * any of the above.  Finally, if SVE is not disabled, then at least one
      * vector length must be enabled.
      */
-    DECLARE_BITMAP(kvm_supported, ARM_MAX_VQ);
     DECLARE_BITMAP(tmp, ARM_MAX_VQ);
     uint32_t vq, max_vq = 0;
 
-    /* Collect the set of vector lengths supported by KVM. */
-    bitmap_zero(kvm_supported, ARM_MAX_VQ);
+    /*
+     * CPU models specify a set of supported vector lengths which are
+     * enabled by default.  Attempting to enable any vector length not set
+     * in the supported bitmap results in an error.  When KVM is enabled we
+     * fetch the supported bitmap from the host.
+     */
     if (kvm_enabled() && kvm_arm_sve_supported()) {
-        kvm_arm_sve_get_vls(CPU(cpu), kvm_supported);
+        kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported);
     } else if (kvm_enabled()) {
         assert(!cpu_isar_feature(aa64_sve, cpu));
     }
@@ -438,7 +441,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
              * For KVM we have to automatically enable all supported unitialized
              * lengths, even when the smaller lengths are not all powers-of-two.
              */
-            bitmap_andnot(tmp, kvm_supported, cpu->sve_vq_init, max_vq);
+            bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq);
             bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
         } else {
             /* Propagate enabled bits down through required powers-of-two. */
@@ -461,39 +464,30 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
             /* Disabling a supported length disables all larger lengths. */
             for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
                 if (test_bit(vq - 1, cpu->sve_vq_init) &&
-                    test_bit(vq - 1, kvm_supported)) {
+                    test_bit(vq - 1, cpu->sve_vq_supported)) {
                     break;
                 }
             }
-            max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
-            bitmap_andnot(cpu->sve_vq_map, kvm_supported,
-                          cpu->sve_vq_init, max_vq);
-            if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
-                error_setg(errp, "cannot disable sve%d", vq * 128);
-                error_append_hint(errp, "Disabling sve%d results in all "
-                                  "vector lengths being disabled.\n",
-                                  vq * 128);
-                error_append_hint(errp, "With SVE enabled, at least one "
-                                  "vector length must be enabled.\n");
-                return;
-            }
         } else {
             /* Disabling a power-of-two disables all larger lengths. */
-            if (test_bit(0, cpu->sve_vq_init)) {
-                error_setg(errp, "cannot disable sve128");
-                error_append_hint(errp, "Disabling sve128 results in all "
-                                  "vector lengths being disabled.\n");
-                error_append_hint(errp, "With SVE enabled, at least one "
-                                  "vector length must be enabled.\n");
-                return;
-            }
-            for (vq = 2; vq <= ARM_MAX_VQ; vq <<= 1) {
+            for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) {
                 if (test_bit(vq - 1, cpu->sve_vq_init)) {
                     break;
                 }
             }
-            max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
-            bitmap_complement(cpu->sve_vq_map, cpu->sve_vq_init, max_vq);
+        }
+
+        max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
+        bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported,
+                      cpu->sve_vq_init, max_vq);
+        if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
+            error_setg(errp, "cannot disable sve%d", vq * 128);
+            error_append_hint(errp, "Disabling sve%d results in all "
+                              "vector lengths being disabled.\n",
+                              vq * 128);
+            error_append_hint(errp, "With SVE enabled, at least one "
+                              "vector length must be enabled.\n");
+            return;
         }
 
         max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1;
@@ -529,46 +523,44 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
     assert(max_vq != 0);
     bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq);
 
-    if (kvm_enabled()) {
-        /* Ensure the set of lengths matches what KVM supports. */
-        bitmap_xor(tmp, cpu->sve_vq_map, kvm_supported, max_vq);
-        if (!bitmap_empty(tmp, max_vq)) {
-            vq = find_last_bit(tmp, max_vq) + 1;
-            if (test_bit(vq - 1, cpu->sve_vq_map)) {
-                if (cpu->sve_max_vq) {
-                    error_setg(errp, "cannot set sve-max-vq=%d",
-                               cpu->sve_max_vq);
-                    error_append_hint(errp, "This KVM host does not support "
-                                      "the vector length %d-bits.\n",
-                                      vq * 128);
-                    error_append_hint(errp, "It may not be possible to use "
-                                      "sve-max-vq with this KVM host. Try "
-                                      "using only sve<N> properties.\n");
-                } else {
-                    error_setg(errp, "cannot enable sve%d", vq * 128);
-                    error_append_hint(errp, "This KVM host does not support "
-                                      "the vector length %d-bits.\n",
-                                      vq * 128);
-                }
+    /* Ensure the set of lengths matches what is supported. */
+    bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq);
+    if (!bitmap_empty(tmp, max_vq)) {
+        vq = find_last_bit(tmp, max_vq) + 1;
+        if (test_bit(vq - 1, cpu->sve_vq_map)) {
+            if (cpu->sve_max_vq) {
+                error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq);
+                error_append_hint(errp, "This CPU does not support "
+                                  "the vector length %d-bits.\n", vq * 128);
+                error_append_hint(errp, "It may not be possible to use "
+                                  "sve-max-vq with this CPU. Try "
+                                  "using only sve<N> properties.\n");
             } else {
+                error_setg(errp, "cannot enable sve%d", vq * 128);
+                error_append_hint(errp, "This CPU does not support "
+                                  "the vector length %d-bits.\n", vq * 128);
+            }
+            return;
+        } else {
+            if (kvm_enabled()) {
                 error_setg(errp, "cannot disable sve%d", vq * 128);
                 error_append_hint(errp, "The KVM host requires all "
                                   "supported vector lengths smaller "
                                   "than %d bits to also be enabled.\n",
                                   max_vq * 128);
-            }
-            return;
-        }
-    } else {
-        /* Ensure all required powers-of-two are enabled. */
-        for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
-            if (!test_bit(vq - 1, cpu->sve_vq_map)) {
-                error_setg(errp, "cannot disable sve%d", vq * 128);
-                error_append_hint(errp, "sve%d is required as it "
-                                  "is a power-of-two length smaller than "
-                                  "the maximum, sve%d\n",
-                                  vq * 128, max_vq * 128);
                 return;
+            } else {
+                /* Ensure all required powers-of-two are enabled. */
+                for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
+                    if (!test_bit(vq - 1, cpu->sve_vq_map)) {
+                        error_setg(errp, "cannot disable sve%d", vq * 128);
+                        error_append_hint(errp, "sve%d is required as it "
+                                          "is a power-of-two length smaller "
+                                          "than the maximum, sve%d\n",
+                                          vq * 128, max_vq * 128);
+                        return;
+                    }
+                }
             }
         }
     }
@@ -698,6 +690,59 @@ static void cpu_arm_set_sve(Object *obj, bool value, Error **errp)
     cpu->isar.id_aa64pfr0 = t;
 }
 
+#ifdef CONFIG_USER_ONLY
+/* Mirror linux /proc/sys/abi/sve_default_vector_length. */
+static void cpu_arm_set_sve_default_vec_len(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+    int32_t default_len, default_vq, remainder;
+
+    if (!visit_type_int32(v, name, &default_len, errp)) {
+        return;
+    }
+
+    /* Undocumented, but the kernel allows -1 to indicate "maximum". */
+    if (default_len == -1) {
+        cpu->sve_default_vq = ARM_MAX_VQ;
+        return;
+    }
+
+    default_vq = default_len / 16;
+    remainder = default_len % 16;
+
+    /*
+     * Note that the 512 max comes from include/uapi/asm/sve_context.h
+     * and is the maximum architectural width of ZCR_ELx.LEN.
+     */
+    if (remainder || default_vq < 1 || default_vq > 512) {
+        error_setg(errp, "cannot set sve-default-vector-length");
+        if (remainder) {
+            error_append_hint(errp, "Vector length not a multiple of 16\n");
+        } else if (default_vq < 1) {
+            error_append_hint(errp, "Vector length smaller than 16\n");
+        } else {
+            error_append_hint(errp, "Vector length larger than %d\n",
+                              512 * 16);
+        }
+        return;
+    }
+
+    cpu->sve_default_vq = default_vq;
+}
+
+static void cpu_arm_get_sve_default_vec_len(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+    int32_t value = cpu->sve_default_vq * 16;
+
+    visit_type_int32(v, name, &value, errp);
+}
+#endif
+
 void aarch64_add_sve_properties(Object *obj)
 {
     uint32_t vq;
@@ -710,6 +755,13 @@ void aarch64_add_sve_properties(Object *obj)
         object_property_add(obj, name, "bool", cpu_arm_get_sve_vq,
                             cpu_arm_set_sve_vq, NULL, NULL);
     }
+
+#ifdef CONFIG_USER_ONLY
+    /* Mirror linux /proc/sys/abi/sve_default_vector_length. */
+    object_property_add(obj, "sve-default-vector-length", "int32",
+                        cpu_arm_get_sve_default_vec_len,
+                        cpu_arm_set_sve_default_vec_len, NULL, NULL);
+#endif
 }
 
 void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
@@ -790,6 +842,7 @@ static void aarch64_max_initfn(Object *obj)
         t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1);
         t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1);
         t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* v8.5-CondM */
+        t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */
         t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1);
         cpu->isar.id_aa64isar0 = t;
 
@@ -799,8 +852,10 @@ static void aarch64_max_initfn(Object *obj)
         t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1);
+        t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1);
         t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* ARMv8.4-RCPC */
+        t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1);
         cpu->isar.id_aa64isar1 = t;
 
         t = cpu->isar.id_aa64pfr0;
@@ -819,7 +874,7 @@ static void aarch64_max_initfn(Object *obj)
          * during realize if the board provides no tag memory, much like
          * we do for EL2 with the virtualization=on property.
          */
-        t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
+        t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);
         cpu->isar.id_aa64pfr1 = t;
 
         t = cpu->isar.id_aa64mmfr0;
@@ -841,6 +896,18 @@ static void aarch64_max_initfn(Object *obj)
         t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* TTST */
         cpu->isar.id_aa64mmfr2 = t;
 
+        t = cpu->isar.id_aa64zfr0;
+        t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2);  /* PMULL */
+        t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1);
+        t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1);
+        cpu->isar.id_aa64zfr0 = t;
+
         /* Replicate the same data to the 32-bit id registers.  */
         u = cpu->isar.id_isar5;
         u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */
@@ -857,6 +924,8 @@ static void aarch64_max_initfn(Object *obj)
         u = FIELD_DP32(u, ID_ISAR6, FHM, 1);
         u = FIELD_DP32(u, ID_ISAR6, SB, 1);
         u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1);
+        u = FIELD_DP32(u, ID_ISAR6, BF16, 1);
+        u = FIELD_DP32(u, ID_ISAR6, I8MM, 1);
         cpu->isar.id_isar6 = u;
 
         u = cpu->isar.id_pfr0;
@@ -902,6 +971,8 @@ static void aarch64_max_initfn(Object *obj)
         /* Default to PAUTH on, with the architected algorithm. */
         qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_property);
         qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property);
+
+        bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ);
     }
 
     aarch64_add_sve_properties(obj);
@@ -909,12 +980,60 @@ static void aarch64_max_initfn(Object *obj)
                         cpu_max_set_sve_max_vq, NULL, NULL);
 }
 
+static void aarch64_a64fx_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,a64fx";
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_EL2);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
+    cpu->midr = 0x461f0010;
+    cpu->revidr = 0x00000000;
+    cpu->ctr = 0x86668006;
+    cpu->reset_sctlr = 0x30000180;
+    cpu->isar.id_aa64pfr0 =   0x0000000101111111; /* No RAS Extensions */
+    cpu->isar.id_aa64pfr1 = 0x0000000000000000;
+    cpu->isar.id_aa64dfr0 = 0x0000000010305408;
+    cpu->isar.id_aa64dfr1 = 0x0000000000000000;
+    cpu->id_aa64afr0 = 0x0000000000000000;
+    cpu->id_aa64afr1 = 0x0000000000000000;
+    cpu->isar.id_aa64mmfr0 = 0x0000000000001122;
+    cpu->isar.id_aa64mmfr1 = 0x0000000011212100;
+    cpu->isar.id_aa64mmfr2 = 0x0000000000001011;
+    cpu->isar.id_aa64isar0 = 0x0000000010211120;
+    cpu->isar.id_aa64isar1 = 0x0000000000010001;
+    cpu->isar.id_aa64zfr0 = 0x0000000000000000;
+    cpu->clidr = 0x0000000080000023;
+    cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */
+    cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */
+    cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */
+    cpu->dcz_blocksize = 6; /* 256 bytes */
+    cpu->gic_num_lrs = 4;
+    cpu->gic_vpribits = 5;
+    cpu->gic_vprebits = 5;
+
+    /* Suppport of A64FX's vector length are 128,256 and 512bit only */
+    aarch64_add_sve_properties(obj);
+    bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ);
+    set_bit(0, cpu->sve_vq_supported); /* 128bit */
+    set_bit(1, cpu->sve_vq_supported); /* 256bit */
+    set_bit(3, cpu->sve_vq_supported); /* 512bit */
+
+    /* TODO:  Add A64FX specific HPC extension registers */
+}
+
 /* clang-format off */
 static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
     { .name = "cortex-a72",         .initfn = aarch64_a72_initfn },
     { .name = "morello",            .initfn = aarch64_morello_initfn },
+    { .name = "a64fx",              .initfn = aarch64_a64fx_initfn },
     { .name = "max",                .initfn = aarch64_max_initfn },
 };
 /* clang-format on */
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 9c5cf3dc1e1..1c42c499942 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -24,7 +24,7 @@
 #if !defined(TARGET_CHERI)
 #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
 
-#ifdef CONFIG_TCG
+#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
 static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     CPUClass *cc = CPU_GET_CLASS(cs);
@@ -48,7 +48,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     }
     return ret;
 }
-#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */
 
 static void arm926_initfn(Object *obj)
 {
@@ -656,12 +656,9 @@ static void cortex_m55_initfn(Object *obj)
     cpu->revidr = 0;
     cpu->pmsav7_dregion = 16;
     cpu->sau_sregion = 8;
-    /*
-     * These are the MVFR* values for the FPU, no MVE configuration;
-     * we will update them later when we implement MVE
-     */
+    /* These are the MVFR* values for the FPU + full MVE configuration */
     cpu->isar.mvfr0 = 0x10110221;
-    cpu->isar.mvfr1 = 0x12100011;
+    cpu->isar.mvfr1 = 0x12100211;
     cpu->isar.mvfr2 = 0x00000040;
     cpu->isar.id_pfr0 = 0x20000030;
     cpu->isar.id_pfr1 = 0x00000230;
@@ -900,19 +897,23 @@ static void pxa270c5_initfn(Object *obj)
 }
 
 #ifdef CONFIG_TCG
-static struct TCGCPUOps arm_v7m_tcg_ops = {
+static const struct TCGCPUOps arm_v7m_tcg_ops = {
     .initialize = arm_translate_init,
     .synchronize_from_tb = arm_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
-    .tlb_fill = arm_cpu_tlb_fill,
     .debug_excp_handler = arm_debug_excp_handler,
 
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+    .record_sigsegv = arm_cpu_record_sigsegv,
+    .record_sigbus = arm_cpu_record_sigbus,
+#else
+    .tlb_fill = arm_cpu_tlb_fill,
+    .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
     .do_interrupt = arm_v7m_cpu_do_interrupt,
     .do_transaction_failed = arm_cpu_do_transaction_failed,
     .do_unaligned_access = arm_cpu_do_unaligned_access,
     .adjust_watchpoint_address = arm_adjust_watchpoint_address,
     .debug_check_watchpoint = arm_debug_check_watchpoint,
+    .debug_check_breakpoint = arm_debug_check_breakpoint,
 #endif /* !CONFIG_USER_ONLY */
 };
 #endif /* CONFIG_TCG */
@@ -970,6 +971,8 @@ static void arm_max_initfn(Object *obj)
         t = FIELD_DP32(t, ID_ISAR6, FHM, 1);
         t = FIELD_DP32(t, ID_ISAR6, SB, 1);
         t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1);
+        t = FIELD_DP32(t, ID_ISAR6, BF16, 1);
+        t = FIELD_DP32(t, ID_ISAR6, I8MM, 1);
         cpu->isar.id_isar6 = t;
 
         t = cpu->isar.mvfr1;
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
index 8a617de9062..92015d7eef5 100644
--- a/target/arm/debug_helper.c
+++ b/target/arm/debug_helper.c
@@ -217,8 +217,9 @@ static bool check_watchpoints(ARMCPU *cpu)
     return false;
 }
 
-static bool check_breakpoints(ARMCPU *cpu)
+bool arm_debug_check_breakpoint(CPUState *cs)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
     int n;
 
@@ -239,15 +240,6 @@ static bool check_breakpoints(ARMCPU *cpu)
     return false;
 }
 
-void HELPER(check_breakpoints)(CPUARMState *env)
-{
-    ARMCPU *cpu = env_archcpu(env);
-
-    if (check_breakpoints(cpu)) {
-        HELPER(exception_internal(env, EXCP_DEBUG));
-    }
-}
-
 bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
 {
     /*
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index a8fff2a3d09..6ebeebd56ec 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -19,6 +19,7 @@
  */
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "internals.h"
 #include "exec/gdbstub.h"
 
 typedef struct RegisterSysregXmlParam {
@@ -84,6 +85,10 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 
     if (n < 16) {
         /* Core integer register.  */
+        if (n == 13 && arm_feature(env, ARM_FEATURE_M)) {
+            /* M profile SP low bits are always 0 */
+            tmp &= ~3;
+        }
         env->regs[n] = tmp;
         return 4;
     }
@@ -120,6 +125,133 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     return 0;
 }
 
+static int vfp_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
+
+    /* VFP data registers are always little-endian.  */
+    if (reg < nregs) {
+        return gdb_get_reg64(buf, *aa32_vfp_dreg(env, reg));
+    }
+    if (arm_feature(env, ARM_FEATURE_NEON)) {
+        /* Aliases for Q regs.  */
+        nregs += 16;
+        if (reg < nregs) {
+            uint64_t *q = aa32_vfp_qreg(env, reg - 32);
+            return gdb_get_reg128(buf, q[0], q[1]);
+        }
+    }
+    switch (reg - nregs) {
+    case 0:
+        return gdb_get_reg32(buf, vfp_get_fpscr(env));
+    }
+    return 0;
+}
+
+static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
+
+    if (reg < nregs) {
+        *aa32_vfp_dreg(env, reg) = ldq_le_p(buf);
+        return 8;
+    }
+    if (arm_feature(env, ARM_FEATURE_NEON)) {
+        nregs += 16;
+        if (reg < nregs) {
+            uint64_t *q = aa32_vfp_qreg(env, reg - 32);
+            q[0] = ldq_le_p(buf);
+            q[1] = ldq_le_p(buf + 8);
+            return 16;
+        }
+    }
+    switch (reg - nregs) {
+    case 0:
+        vfp_set_fpscr(env, ldl_p(buf));
+        return 4;
+    }
+    return 0;
+}
+
+static int vfp_gdb_get_sysreg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    switch (reg) {
+    case 0:
+        return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPSID]);
+    case 1:
+        return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPEXC]);
+    }
+    return 0;
+}
+
+static int vfp_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    switch (reg) {
+    case 0:
+        env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf);
+        return 4;
+    case 1:
+        env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30);
+        return 4;
+    }
+    return 0;
+}
+
+static int mve_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    switch (reg) {
+    case 0:
+        return gdb_get_reg32(buf, env->v7m.vpr);
+    default:
+        return 0;
+    }
+}
+
+static int mve_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    switch (reg) {
+    case 0:
+        env->v7m.vpr = ldl_p(buf);
+        return 4;
+    default:
+        return 0;
+    }
+}
+
+/**
+ * arm_get/set_gdb_*: get/set a gdb register
+ * @env: the CPU state
+ * @buf: a buffer to copy to/from
+ * @reg: register number (offset from start of group)
+ *
+ * We return the number of bytes copied
+ */
+
+static int arm_gdb_get_sysreg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    const ARMCPRegInfo *ri;
+    uint32_t key;
+
+    key = cpu->dyn_sysreg_xml.data.cpregs.keys[reg];
+    ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+    if (ri) {
+        if (cpreg_field_is_64bit(ri)) {
+            return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
+        } else {
+            return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
+        }
+    }
+    return 0;
+}
+
+static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    return 0;
+}
+
 static void arm_gen_one_xml_sysreg_tag(GString *s, DynamicGDBXMLInfo *dyn_xml,
                                        ARMCPRegInfo *ri, uint32_t ri_key,
                                        int bitsize, int regnum)
@@ -315,3 +447,61 @@ const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
     }
     return NULL;
 }
+
+void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    CPUARMState *env = &cpu->env;
+
+    if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+        /*
+         * The lower part of each SVE register aliases to the FPU
+         * registers so we don't need to include both.
+         */
+#ifdef TARGET_AARCH64
+        if (isar_feature_aa64_sve(&cpu->isar)) {
+            gdb_register_coprocessor(cs, arm_gdb_get_svereg, arm_gdb_set_svereg,
+                                     arm_gen_dynamic_svereg_xml(cs, cs->gdb_num_regs),
+                                     "sve-registers.xml", 0);
+        } else {
+            gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg,
+                                     aarch64_fpu_gdb_set_reg,
+                                     34, "aarch64-fpu.xml", 0);
+        }
+#endif
+    } else {
+        if (arm_feature(env, ARM_FEATURE_NEON)) {
+            gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+                                     49, "arm-neon.xml", 0);
+        } else if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+            gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+                                     33, "arm-vfp3.xml", 0);
+        } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
+            gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+                                     17, "arm-vfp.xml", 0);
+        }
+        if (!arm_feature(env, ARM_FEATURE_M)) {
+            /*
+             * A and R profile have FP sysregs FPEXC and FPSID that we
+             * expose to gdb.
+             */
+            gdb_register_coprocessor(cs, vfp_gdb_get_sysreg, vfp_gdb_set_sysreg,
+                                     2, "arm-vfp-sysregs.xml", 0);
+        }
+    }
+    if (cpu_isar_feature(aa32_mve, cpu)) {
+        gdb_register_coprocessor(cs, mve_gdb_get_reg, mve_gdb_set_reg,
+                                 1, "arm-m-profile-mve.xml", 0);
+    }
+    gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg,
+                             arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs),
+                             "system-registers.xml", 0);
+
+#if defined(TARGET_CHERI)
+    if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+        gdb_register_coprocessor(cs, aarch64_gdb_get_cheri_reg,
+                                 aarch64_gdb_set_cheri_reg, 41,
+                                 "aarch64-capability.xml", 0);
+    }
+#endif
+}
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index 538eefb4ae2..fa8b81eadf7 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -17,7 +17,9 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "cpu.h"
+#include "internals.h"
 #include "exec/gdbstub.h"
 
 int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
@@ -69,3 +71,194 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     /* Unknown register.  */
     return 0;
 }
+
+int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    switch (reg) {
+    case 0 ... 31:
+    {
+        /* 128 bit FP register - quads are in LE order */
+        uint64_t *q = aa64_vfp_qreg(env, reg);
+        return gdb_get_reg128(buf, q[1], q[0]);
+    }
+    case 32:
+        /* FPSR */
+        return gdb_get_reg32(buf, vfp_get_fpsr(env));
+    case 33:
+        /* FPCR */
+        return gdb_get_reg32(buf, vfp_get_fpcr(env));
+    default:
+        return 0;
+    }
+}
+
+int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    switch (reg) {
+    case 0 ... 31:
+        /* 128 bit FP register */
+        {
+            uint64_t *q = aa64_vfp_qreg(env, reg);
+            q[0] = ldq_le_p(buf);
+            q[1] = ldq_le_p(buf + 8);
+            return 16;
+        }
+    case 32:
+        /* FPSR */
+        vfp_set_fpsr(env, ldl_p(buf));
+        return 4;
+    case 33:
+        /* FPCR */
+        vfp_set_fpcr(env, ldl_p(buf));
+        return 4;
+    default:
+        return 0;
+    }
+}
+
+int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg)
+{
+    ARMCPU *cpu = env_archcpu(env);
+
+    switch (reg) {
+    /* The first 32 registers are the zregs */
+    case 0 ... 31:
+    {
+        int vq, len = 0;
+        for (vq = 0; vq < cpu->sve_max_vq; vq++) {
+            len += gdb_get_reg128(buf,
+                                  env->vfp.zregs[reg].d[vq * 2 + 1],
+                                  env->vfp.zregs[reg].d[vq * 2]);
+        }
+        return len;
+    }
+    case 32:
+        return gdb_get_reg32(buf, vfp_get_fpsr(env));
+    case 33:
+        return gdb_get_reg32(buf, vfp_get_fpcr(env));
+    /* then 16 predicates and the ffr */
+    case 34 ... 50:
+    {
+        int preg = reg - 34;
+        int vq, len = 0;
+        for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
+            len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]);
+        }
+        return len;
+    }
+    case 51:
+    {
+        /*
+         * We report in Vector Granules (VG) which is 64bit in a Z reg
+         * while the ZCR works in Vector Quads (VQ) which is 128bit chunks.
+         */
+        int vq = sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
+        return gdb_get_reg64(buf, vq * 2);
+    }
+    default:
+        /* gdbstub asked for something out our range */
+        qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg);
+        break;
+    }
+
+    return 0;
+}
+
+int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg)
+{
+    ARMCPU *cpu = env_archcpu(env);
+
+    /* The first 32 registers are the zregs */
+    switch (reg) {
+    /* The first 32 registers are the zregs */
+    case 0 ... 31:
+    {
+        int vq, len = 0;
+        uint64_t *p = (uint64_t *) buf;
+        for (vq = 0; vq < cpu->sve_max_vq; vq++) {
+            env->vfp.zregs[reg].d[vq * 2 + 1] = *p++;
+            env->vfp.zregs[reg].d[vq * 2] = *p++;
+            len += 16;
+        }
+        return len;
+    }
+    case 32:
+        vfp_set_fpsr(env, *(uint32_t *)buf);
+        return 4;
+    case 33:
+        vfp_set_fpcr(env, *(uint32_t *)buf);
+        return 4;
+    case 34 ... 50:
+    {
+        int preg = reg - 34;
+        int vq, len = 0;
+        uint64_t *p = (uint64_t *) buf;
+        for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
+            env->vfp.pregs[preg].p[vq / 4] = *p++;
+            len += 8;
+        }
+        return len;
+    }
+    case 51:
+        /* cannot set vg via gdbstub */
+        return 0;
+    default:
+        /* gdbstub asked for something out our range */
+        break;
+    }
+
+    return 0;
+}
+
+#ifdef TARGET_CHERI
+int aarch64_gdb_get_cheri_reg(CPUARMState *env, GByteArray *buf, int n)
+{
+    if (n < 31) {
+        /* Core capability register.  */
+        return gdb_get_general_purpose_capreg(buf, env, n);
+    }
+    switch (n) {
+    case 31:
+        return gdb_get_general_purpose_capreg(buf, env, n);
+    case 32:
+        return gdb_get_capreg(buf, cheri_get_current_pcc(env));
+    case 33:
+        return gdb_get_capreg(buf, cheri_get_ddc(env));
+    case 34:
+        return gdb_get_capreg(buf, &env->cp15.tpidr_el[0].cap);
+    case 35:
+        return gdb_get_capreg(buf, &env->sp_el[4].cap);
+    case 36:
+        return gdb_get_capreg(buf, &env->DDCs[4].cap);
+    case 37:
+        return gdb_get_capreg(buf, &env->cp15.rtpidr_el0.cap);
+    case 38:
+        return gdb_get_capreg(buf, &env->cid_el0.cap);
+    case 39:
+        /* cctlr */
+        return gdb_get_regl(buf, env->CCTLR_el[0]);
+    }
+    return 0;
+}
+
+int aarch64_gdb_set_cheri_reg(CPUARMState *env, uint8_t *mem_buf, int n)
+{
+    /* All CHERI registers are read-only currently.  */
+    if (n < 31) {
+        /* Core capability register.  */
+        return CHERI_CAP_SIZE + 1;
+    }
+    switch (n) {
+    case 31:
+    case 32:
+    case 33:
+    case 34:
+    case 35:
+    case 36:
+    case 37:
+    case 38:
+        return CHERI_CAP_SIZE + 1;
+    }
+    return 0;
+}
+#endif
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 7dc89e5c89a..0a7b188a329 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -32,7 +32,6 @@
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
 #include "qemu/atomic128.h"
-#include "tcg/tcg.h"
 #include "fpu/softfloat.h"
 #include <zlib.h> /* For crc32 */
 #include "exec/log_instr.h"
@@ -371,7 +370,9 @@ uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
         float16 nan = a;
         if (float16_is_signaling_nan(a, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float16_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float16_silence_nan(a, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan = float16_default_nan(fpst);
@@ -402,7 +403,9 @@ float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
         float32 nan = a;
         if (float32_is_signaling_nan(a, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float32_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float32_silence_nan(a, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan = float32_default_nan(fpst);
@@ -433,7 +436,9 @@ float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
         float64 nan = a;
         if (float64_is_signaling_nan(a, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float64_silence_nan(a, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float64_silence_nan(a, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan = float64_default_nan(fpst);
@@ -513,37 +518,19 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
     uintptr_t ra = GETPC();
     uint64_t o0, o1;
     bool success;
-
-#ifdef CONFIG_USER_ONLY
-    /* ??? Enforce alignment.  */
-    uint64_t *haddr = g2h(env_cpu(env), addr);
-
-    set_helper_retaddr(ra);
-    o0 = ldq_le_p(haddr + 0);
-    o1 = ldq_le_p(haddr + 1);
-    oldv = int128_make128(o0, o1);
-
-    success = int128_eq(oldv, cmpv);
-    if (success) {
-        stq_le_p(haddr + 0, int128_getlo(newv));
-        stq_le_p(haddr + 1, int128_gethi(newv));
-    }
-    clear_helper_retaddr();
-#else
     int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
-    TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
+    MemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    MemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
 
-    o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
-    o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
+    o0 = cpu_ldq_le_mmu(env, addr + 0, oi0, ra);
+    o1 = cpu_ldq_le_mmu(env, addr + 8, oi1, ra);
     oldv = int128_make128(o0, o1);
 
     success = int128_eq(oldv, cmpv);
     if (success) {
-        helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
-        helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
+        cpu_stq_le_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
+        cpu_stq_le_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
     }
-#endif
 
     return !success;
 }
@@ -555,16 +542,16 @@ uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
     uintptr_t ra = GETPC();
     bool success;
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
 
     assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
 
     cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
     newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
     success = int128_eq(oldv, cmpv);
     return !success;
@@ -583,37 +570,19 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
     uintptr_t ra = GETPC();
     uint64_t o0, o1;
     bool success;
-
-#ifdef CONFIG_USER_ONLY
-    /* ??? Enforce alignment.  */
-    uint64_t *haddr = g2h(env_cpu(env), addr);
-
-    set_helper_retaddr(ra);
-    o1 = ldq_be_p(haddr + 0);
-    o0 = ldq_be_p(haddr + 1);
-    oldv = int128_make128(o0, o1);
-
-    success = int128_eq(oldv, cmpv);
-    if (success) {
-        stq_be_p(haddr + 0, int128_gethi(newv));
-        stq_be_p(haddr + 1, int128_getlo(newv));
-    }
-    clear_helper_retaddr();
-#else
     int mem_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
-    TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
+    MemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+    MemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
 
-    o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
-    o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
+    o1 = cpu_ldq_be_mmu(env, addr + 0, oi0, ra);
+    o0 = cpu_ldq_be_mmu(env, addr + 8, oi1, ra);
     oldv = int128_make128(o0, o1);
 
     success = int128_eq(oldv, cmpv);
     if (success) {
-        helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
-        helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
+        cpu_stq_be_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
+        cpu_stq_be_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
     }
-#endif
 
     return !success;
 }
@@ -625,12 +594,12 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
     uintptr_t ra = GETPC();
     bool success;
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
 
     assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+    oi = make_memop_idx(MO_BE | MO_128 | MO_ALIGN, mem_idx);
 
     /*
      * High and low need to be switched here because this is not actually a
@@ -638,7 +607,7 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
      */
     cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
     newv = int128_make128(new_hi, new_lo);
-    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 
     success = int128_eq(oldv, cmpv);
     return !success;
@@ -651,16 +620,16 @@ void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
     Int128 oldv, cmpv, newv;
     uintptr_t ra = GETPC();
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
 
     assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
 
     cmpv = int128_make128(arm_get_xreg(env, rs), arm_get_xreg(env, rs + 1));
     newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 
     arm_set_xreg(env, rs, int128_getlo(oldv));
     arm_set_xreg(env, rs + 1, int128_gethi(oldv));
@@ -672,16 +641,16 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
     Int128 oldv, cmpv, newv;
     uintptr_t ra = GETPC();
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
 
     assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+    oi = make_memop_idx(MO_LE | MO_128 | MO_ALIGN, mem_idx);
 
     cmpv = int128_make128(arm_get_xreg(env, rs + 1), arm_get_xreg(env, rs));
     newv = int128_make128(new_lo, new_hi);
-    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 
     arm_set_xreg(env, rs + 1, int128_getlo(oldv));
     arm_set_xreg(env, rs, int128_gethi(oldv));
@@ -1052,7 +1021,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
          * the hflags rebuild, since we can pull the composite TBII field
          * from there.
          */
-        tbii = FIELD_EX32(env->hflags, TBFLAG_A64, TBII);
+        tbii = EX_TBFLAG_A64(env->hflags, TBII);
         if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
             /* TBI is enabled. */
             int core_mmu_idx = cpu_mmu_index(env, false);
@@ -1109,10 +1078,10 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
     if (!arm_singlestep_active(env)) {
         env->pstate &= ~PSTATE_SS;
     }
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "Illegal exception return at EL%d: "
-                  "resuming execution at 0x%" PRIx64 "\n",
-                  cur_el, get_aarch_reg_as_x(&env->pc));
+    helper_rebuild_hflags_a64(env, cur_el);
+    qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
+                  "resuming execution at 0x%" PRIx64 "\n", cur_el,
+                  get_aarch_reg_as_x(&env->pc));
 }
 
 /*
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index c2f0b0d3ee9..29fcc90a877 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -104,8 +104,7 @@ DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
 DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
 
-DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64)
-DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_check, TCG_CALL_NO_WG, i64, env, i32, i64)
 DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, cap_checked_ptr, env, i32,
                    i64)
 DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
new file mode 100644
index 00000000000..76bd25006d8
--- /dev/null
+++ b/target/arm/helper-mve.h
@@ -0,0 +1,890 @@
+/*
+ *  M-profile MVE specific helper definitions
+ *
+ *  Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+DEF_HELPER_FLAGS_3(mve_vldrb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vldrb_sh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_uh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld20b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld20h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld20w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld21b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld21h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld21w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld40b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld40h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld40w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld41b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld41h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld41w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld42b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld42h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld42w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld43b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld43h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld43w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst20b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst20h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst20w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst21b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst21h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst21w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst40b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst40h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst40w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst41b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst41h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst41w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst42b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst42h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst42w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst43b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst43h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst43w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+DEF_HELPER_FLAGS_4(mve_viduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+DEF_HELPER_FLAGS_4(mve_vidupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_5(mve_viwdupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_viwduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_viwdupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+
+DEF_HELPER_FLAGS_5(mve_vdwdupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_vdwduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_vdwdupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vclsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vrev16b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev32b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev32h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64w, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmvn, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vabsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfabss, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vnegb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqabsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqnegb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmaxab, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmaxah, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmaxaw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vminab, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vminah, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vminaw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_ss, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_us, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcvtb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtb_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovunbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovuntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vorn, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vpsel, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_1(mve_vpnot, TCG_CALL_NO_WG, void, env)
+
+DEF_HELPER_FLAGS_2(mve_vctp, TCG_CALL_NO_WG, void, env, i32)
+
+DEF_HELPER_FLAGS_4(mve_vaddb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vaddw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vsubb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsubh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsubw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vabdsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmullbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmullpbh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullpth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullpbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullptw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmullbh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmullbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmullth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulltw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vadc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vadci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsbc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsbci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfadds, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfsubh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfsubs, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmuls, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfabdh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfabds, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxnmh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxnms, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminnmh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminnms, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxnmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxnmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminnmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminnmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfcadd90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfcadd270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcmul0h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul0s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul180h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul180s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcmla0h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla0s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla180h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla180s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vsub_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsub_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsub_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmul_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmul_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlab, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlah, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlaw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlasb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlash, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlasw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlahb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlahh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlahw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlahb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlahh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlahw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlashb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlashh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlashw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlashb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlashh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlashw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmlaldavuh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmlsldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmladavsb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavub, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavuh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavuw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmladavsxb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsxh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsxw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vaddvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxnmvh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxnmvs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminnmvh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminnmvs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxnmavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxnmavs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminnmavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminnmavs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vabavsb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavsh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavsw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavub, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavuh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavuw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vsrib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsrih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_sshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_ushll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
+
+DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshr, TCG_CALL_NO_RWG, i32, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpeqb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpeqh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpeqw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpneb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpneh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpnew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpcsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpcsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpcsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmphib, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmphih, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmphiw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgeb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgeh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpltb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmplth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpltw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgtb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgtw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpleb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpleh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmplew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpeqh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpeqs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpneh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpnes, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgeh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpges, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmplth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmplts, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpgts, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpleh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmples, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpeq_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpeq_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpne_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpne_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpge_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpge_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmplt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmplt_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpgt_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmple_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmple_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfadd_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfsub_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfsub_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfmul_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfma_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfma_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfmas_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfmas_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vcvt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_hu, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_sf, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_uf, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_fs, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_fu, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrint_rm_h, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrint_rm_s, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vrintx_h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrintx_s, TCG_CALL_NO_WG, void, env, ptr, ptr)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index e4cadd2a65b..dc629f851a3 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -158,6 +158,128 @@ DEF_HELPER_FLAGS_5(sve_umulh_zpzz_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(sve_umulh_zpzz_d, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_s, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_d, TCG_CALL_NO_RWG,
@@ -204,6 +326,105 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
@@ -440,6 +661,16 @@ DEF_HELPER_FLAGS_4(sve_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(sve2_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -458,16 +689,19 @@ DEF_HELPER_FLAGS_4(sve_zip_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_zip_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_trn_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_trn_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(sve_compact_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_compact_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -488,6 +722,19 @@ DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_rbit_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve2_sqabs_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqneg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_splice, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_b, TCG_CALL_NO_RWG,
@@ -679,7 +926,8 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
 
 DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
@@ -949,6 +1197,8 @@ DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
@@ -1133,6 +1383,46 @@ DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve2_saddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_ssubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uaddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_usubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_saddw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_ssubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uaddw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_usubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -2073,4 +2363,438 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG,
 DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG,
                    void, env, ptr, ptr, ptr, tl, i32)
 
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_pmull_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sshll_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sshll_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sshll_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_ushll_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_ushll_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_ushll_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_eoril_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bext_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bdep_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bgrp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_cadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqcadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sabal_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uabal_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_adcl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_adcl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_shrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_shrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_rshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_rshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_addhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_addhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_raddhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_raddhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_subhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_subhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_match_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG,
+                   i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_histcnt_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_histcnt_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_histseg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_xar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_xar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_xar_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl1n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl2n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nbsl, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmull_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlal_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_smull_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cmla_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cdot_zzzz_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cdot_zzzz_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cdot_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cdot_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_fcvtlt_sd, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshlu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b94a8b1ca62..05adf547ab6 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -12,7 +12,6 @@
 #include "trace.h"
 #include "cpu.h"
 #include "internals.h"
-#include "exec/gdbstub.h"
 #include "exec/helper-proto.h"
 #include "qemu/host-utils.h"
 #include "qemu/main-loop.h"
@@ -55,101 +54,6 @@ static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address,
 static void switch_mode(CPUARMState *env, int mode);
 static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx);
 
-static int vfp_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg)
-{
-    ARMCPU *cpu = env_archcpu(env);
-    int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
-
-    /* VFP data registers are always little-endian.  */
-    if (reg < nregs) {
-        return gdb_get_reg64(buf, *aa32_vfp_dreg(env, reg));
-    }
-    if (arm_feature(env, ARM_FEATURE_NEON)) {
-        /* Aliases for Q regs.  */
-        nregs += 16;
-        if (reg < nregs) {
-            uint64_t *q = aa32_vfp_qreg(env, reg - 32);
-            return gdb_get_reg128(buf, q[0], q[1]);
-        }
-    }
-    switch (reg - nregs) {
-    case 0: return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPSID]); break;
-    case 1: return gdb_get_reg32(buf, vfp_get_fpscr(env)); break;
-    case 2: return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPEXC]); break;
-    }
-    return 0;
-}
-
-static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
-    ARMCPU *cpu = env_archcpu(env);
-    int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
-
-    if (reg < nregs) {
-        *aa32_vfp_dreg(env, reg) = ldq_le_p(buf);
-        return 8;
-    }
-    if (arm_feature(env, ARM_FEATURE_NEON)) {
-        nregs += 16;
-        if (reg < nregs) {
-            uint64_t *q = aa32_vfp_qreg(env, reg - 32);
-            q[0] = ldq_le_p(buf);
-            q[1] = ldq_le_p(buf + 8);
-            return 16;
-        }
-    }
-    switch (reg - nregs) {
-    case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
-    case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4;
-    case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
-    }
-    return 0;
-}
-
-static int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg)
-{
-    switch (reg) {
-    case 0 ... 31:
-    {
-        /* 128 bit FP register - quads are in LE order */
-        uint64_t *q = aa64_vfp_qreg(env, reg);
-        return gdb_get_reg128(buf, q[1], q[0]);
-    }
-    case 32:
-        /* FPSR */
-        return gdb_get_reg32(buf, vfp_get_fpsr(env));
-    case 33:
-        /* FPCR */
-        return gdb_get_reg32(buf,vfp_get_fpcr(env));
-    default:
-        return 0;
-    }
-}
-
-static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
-    switch (reg) {
-    case 0 ... 31:
-        /* 128 bit FP register */
-        {
-            uint64_t *q = aa64_vfp_qreg(env, reg);
-            q[0] = ldq_le_p(buf);
-            q[1] = ldq_le_p(buf + 8);
-            return 16;
-        }
-    case 32:
-        /* FPSR */
-        vfp_set_fpsr(env, ldl_p(buf));
-        return 4;
-    case 33:
-        /* FPCR */
-        vfp_set_fpcr(env, ldl_p(buf));
-        return 4;
-    default:
-        return 0;
-    }
-}
-
 #ifdef TARGET_CHERI
 static void raw_read_cap(CPUARMState *env, const ARMCPRegInfo *ri,
                          cap_register_t *cap_out)
@@ -255,187 +159,6 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
     }
 }
 
-/**
- * arm_get/set_gdb_*: get/set a gdb register
- * @env: the CPU state
- * @buf: a buffer to copy to/from
- * @reg: register number (offset from start of group)
- *
- * We return the number of bytes copied
- */
-
-static int arm_gdb_get_sysreg(CPUARMState *env, GByteArray *buf, int reg)
-{
-    ARMCPU *cpu = env_archcpu(env);
-    const ARMCPRegInfo *ri;
-    uint32_t key;
-
-    key = cpu->dyn_sysreg_xml.data.cpregs.keys[reg];
-    ri = get_arm_cp_reginfo(cpu->cp_regs, key);
-    if (ri) {
-        if (cpreg_field_is_64bit(ri)) {
-            return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
-        } else {
-            return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
-        }
-    }
-    return 0;
-}
-
-static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg)
-{
-    return 0;
-}
-
-#ifdef TARGET_AARCH64
-static int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg)
-{
-    ARMCPU *cpu = env_archcpu(env);
-
-    switch (reg) {
-    /* The first 32 registers are the zregs */
-    case 0 ... 31:
-    {
-        int vq, len = 0;
-        for (vq = 0; vq < cpu->sve_max_vq; vq++) {
-            len += gdb_get_reg128(buf,
-                                  env->vfp.zregs[reg].d[vq * 2 + 1],
-                                  env->vfp.zregs[reg].d[vq * 2]);
-        }
-        return len;
-    }
-    case 32:
-        return gdb_get_reg32(buf, vfp_get_fpsr(env));
-    case 33:
-        return gdb_get_reg32(buf, vfp_get_fpcr(env));
-    /* then 16 predicates and the ffr */
-    case 34 ... 50:
-    {
-        int preg = reg - 34;
-        int vq, len = 0;
-        for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
-            len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]);
-        }
-        return len;
-    }
-    case 51:
-    {
-        /*
-         * We report in Vector Granules (VG) which is 64bit in a Z reg
-         * while the ZCR works in Vector Quads (VQ) which is 128bit chunks.
-         */
-        int vq = sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
-        return gdb_get_reg64(buf, vq * 2);
-    }
-    default:
-        /* gdbstub asked for something out our range */
-        qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg);
-        break;
-    }
-
-    return 0;
-}
-
-static int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg)
-{
-    ARMCPU *cpu = env_archcpu(env);
-
-    /* The first 32 registers are the zregs */
-    switch (reg) {
-    /* The first 32 registers are the zregs */
-    case 0 ... 31:
-    {
-        int vq, len = 0;
-        uint64_t *p = (uint64_t *) buf;
-        for (vq = 0; vq < cpu->sve_max_vq; vq++) {
-            env->vfp.zregs[reg].d[vq * 2 + 1] = *p++;
-            env->vfp.zregs[reg].d[vq * 2] = *p++;
-            len += 16;
-        }
-        return len;
-    }
-    case 32:
-        vfp_set_fpsr(env, *(uint32_t *)buf);
-        return 4;
-    case 33:
-        vfp_set_fpcr(env, *(uint32_t *)buf);
-        return 4;
-    case 34 ... 50:
-    {
-        int preg = reg - 34;
-        int vq, len = 0;
-        uint64_t *p = (uint64_t *) buf;
-        for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
-            env->vfp.pregs[preg].p[vq / 4] = *p++;
-            len += 8;
-        }
-        return len;
-    }
-    case 51:
-        /* cannot set vg via gdbstub */
-        return 0;
-    default:
-        /* gdbstub asked for something out our range */
-        break;
-    }
-
-    return 0;
-}
-
-#ifdef TARGET_CHERI
-static int aarch64_gdb_get_cheri_reg(CPUARMState *env, GByteArray *buf, int n)
-{
-    if (n < 31) {
-        /* Core capability register.  */
-        return gdb_get_general_purpose_capreg(buf, env, n);
-    }
-    switch (n) {
-    case 31:
-        return gdb_get_general_purpose_capreg(buf, env, n);
-    case 32:
-        return gdb_get_capreg(buf, cheri_get_current_pcc(env));
-    case 33:
-        return gdb_get_capreg(buf, cheri_get_ddc(env));
-    case 34:
-        return gdb_get_capreg(buf, &env->cp15.tpidr_el[0].cap);
-    case 35:
-        return gdb_get_capreg(buf, &env->sp_el[4].cap);
-    case 36:
-        return gdb_get_capreg(buf, &env->DDCs[4].cap);
-    case 37:
-        return gdb_get_capreg(buf, &env->cp15.rtpidr_el0.cap);
-    case 38:
-        return gdb_get_capreg(buf, &env->cid_el0.cap);
-    case 39:
-        /* cctlr */
-        return gdb_get_regl(buf, env->CCTLR_el[0]);
-    }
-    return 0;
-}
-
-static int aarch64_gdb_set_cheri_reg(CPUARMState *env, uint8_t *mem_buf, int n)
-{
-    /* All CHERI registers are read-only currently.  */
-    if (n < 31) {
-        /* Core capability register.  */
-        return CHERI_CAP_SIZE + 1;
-    }
-    switch (n) {
-    case 31:
-    case 32:
-    case 33:
-    case 34:
-    case 35:
-    case 36:
-    case 37:
-    case 38:
-        return CHERI_CAP_SIZE + 1;
-    }
-    return 0;
-}
-#endif
-#endif /* TARGET_AARCH64 */
-
 static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
 {
    /* Return true if the regdef would cause an assertion if you called
@@ -1218,50 +941,6 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-/* Definitions for the PMU registers */
-#define PMCRN_MASK  0xf800
-#define PMCRN_SHIFT 11
-#define PMCRLC  0x40
-#define PMCRDP  0x20
-#define PMCRX   0x10
-#define PMCRD   0x8
-#define PMCRC   0x4
-#define PMCRP   0x2
-#define PMCRE   0x1
-/*
- * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
- * which can be written as 1 to trigger behaviour but which stay RAZ).
- */
-#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
-
-#define PMXEVTYPER_P          0x80000000
-#define PMXEVTYPER_U          0x40000000
-#define PMXEVTYPER_NSK        0x20000000
-#define PMXEVTYPER_NSU        0x10000000
-#define PMXEVTYPER_NSH        0x08000000
-#define PMXEVTYPER_M          0x04000000
-#define PMXEVTYPER_MT         0x02000000
-#define PMXEVTYPER_EVTCOUNT   0x0000ffff
-#define PMXEVTYPER_MASK       (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
-                               PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
-                               PMXEVTYPER_M | PMXEVTYPER_MT | \
-                               PMXEVTYPER_EVTCOUNT)
-
-#define PMCCFILTR             0xf8000000
-#define PMCCFILTR_M           PMXEVTYPER_M
-#define PMCCFILTR_EL0         (PMCCFILTR | PMCCFILTR_M)
-
-static inline uint32_t pmu_num_counters(CPUARMState *env)
-{
-  return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
-}
-
-/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
-static inline uint64_t pmu_counter_mask(CPUARMState *env)
-{
-  return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
-}
-
 typedef struct pm_event {
     uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */
     /* If the event is supported on this CPU (used to generate PMCEID[01]) */
@@ -2550,20 +2229,34 @@ static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->teecr = value;
 }
 
+static CPAccessResult teecr_access(CPUARMState *env, const ARMCPRegInfo *ri,
+                                   bool isread)
+{
+    /*
+     * HSTR.TTEE only exists in v7A, not v8A, but v8A doesn't have T2EE
+     * at all, so we don't need to check whether we're v8A.
+     */
+    if (arm_current_el(env) < 2 && !arm_is_secure_below_el3(env) &&
+        (env->cp15.hstr_el2 & HSTR_TTEE)) {
+        return CP_ACCESS_TRAP_EL2;
+    }
+    return CP_ACCESS_OK;
+}
+
 static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                     bool isread)
 {
     if (arm_current_el(env) == 0 && (env->teecr & 1)) {
         return CP_ACCESS_TRAP;
     }
-    return CP_ACCESS_OK;
+    return teecr_access(env, ri, isread);
 }
 
 static const ARMCPRegInfo t2ee_cp_reginfo[] = {
     { .name = "TEECR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 6, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, teecr),
       .resetvalue = 0,
-      .writefn = teecr_write },
+      .writefn = teecr_write, .accessfn = teecr_access },
     { .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr),
       .accessfn = teehbr_access, .resetvalue = 0 },
@@ -3826,20 +3519,29 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
     MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
     ARMMMUIdx mmu_idx;
     int secure = arm_is_secure_below_el3(env);
+    uint64_t hcr_el2 = arm_hcr_el2_eff(env);
+    bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE);
 
     switch (ri->opc2 & 6) {
     case 0:
         switch (ri->opc1) {
         case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */
             if (ri->crm == 9 && (env->pstate & PSTATE_PAN)) {
-                mmu_idx = (secure ? ARMMMUIdx_Stage1_SE1_PAN
+                mmu_idx = regime_e20 ?
+                          (secure ? ARMMMUIdx_SE20_2_PAN
+                           : ARMMMUIdx_E20_2_PAN) :
+                          (secure ? ARMMMUIdx_Stage1_SE1_PAN
                            : ARMMMUIdx_Stage1_E1_PAN);
             } else {
-                mmu_idx = secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1;
+                mmu_idx = regime_e20 ?
+                          (secure ? ARMMMUIdx_SE20_2 : ARMMMUIdx_E20_2) :
+                          (secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1);
             }
             break;
         case 4: /* AT S1E2R, AT S1E2W */
-            mmu_idx = secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2;
+            mmu_idx = hcr_el2 & HCR_E2H ?
+                      (secure ? ARMMMUIdx_SE20_2 : ARMMMUIdx_E20_2) :
+                      (secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2);
             break;
         case 6: /* AT S1E3R, AT S1E3W */
             mmu_idx = ARMMMUIdx_SE3;
@@ -3849,13 +3551,19 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
         }
         break;
     case 2: /* AT S1E0R, AT S1E0W */
-        mmu_idx = secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0;
+        mmu_idx = regime_e20 ?
+                  (secure ? ARMMMUIdx_SE20_0 : ARMMMUIdx_E20_0) :
+                  (secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0);
         break;
     case 4: /* AT S12E1R, AT S12E1W */
-        mmu_idx = secure ? ARMMMUIdx_SE10_1 : ARMMMUIdx_E10_1;
+        mmu_idx = regime_e20 ?
+                  (secure ? ARMMMUIdx_SE20_2 : ARMMMUIdx_E20_2) :
+                  (secure ? ARMMMUIdx_SE10_1 : ARMMMUIdx_E10_1);
         break;
     case 6: /* AT S12E0R, AT S12E0W */
-        mmu_idx = secure ? ARMMMUIdx_SE10_0 : ARMMMUIdx_E10_0;
+        mmu_idx = regime_e20 ?
+                  (secure ? ARMMMUIdx_SE20_0 : ARMMMUIdx_E20_0) :
+                  (secure ? ARMMMUIdx_SE10_0 : ARMMMUIdx_E10_0);
         break;
     default:
         g_assert_not_reached();
@@ -4278,8 +3986,9 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
       .access = PL1_RW, .accessfn = access_tvm_trvm,
       .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write,
       .raw_writefn = vmsa_ttbcr_raw_write,
-      .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]),
-                             offsetoflow32(CPUARMState, cp15.tcr_el[1])} },
+      /* No offsetoflow32 -- pass the entire TCR to writefn/raw_writefn. */
+      .bank_fieldoffsets = { offsetof(CPUARMState, cp15.tcr_el[3]),
+                             offsetof(CPUARMState, cp15.tcr_el[1])} },
     REGINFO_SENTINEL
 };
 
@@ -4290,8 +3999,10 @@ static const ARMCPRegInfo ttbcr2_reginfo = {
     .name = "TTBCR2", .cp = 15, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 3,
     .access = PL1_RW, .accessfn = access_tvm_trvm,
     .type = ARM_CP_ALIAS,
-    .bank_fieldoffsets = { offsetofhigh32(CPUARMState, cp15.tcr_el[3]),
-                           offsetofhigh32(CPUARMState, cp15.tcr_el[1]) },
+    .bank_fieldoffsets = {
+        offsetofhigh32(CPUARMState, cp15.tcr_el[3].raw_tcr),
+        offsetofhigh32(CPUARMState, cp15.tcr_el[1].raw_tcr),
+    },
 };
 
 static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -4923,7 +4634,7 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
     bool secure = arm_is_secure_below_el3(env);
     int mask = secure ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2;
-    int bits = tlbbits_for_regime(env, secure ? ARMMMUIdx_E2 : ARMMMUIdx_SE2,
+    int bits = tlbbits_for_regime(env, secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2,
                                   pageaddr);
 
     tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
@@ -4940,6 +4651,172 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                                   ARMMMUIdxBit_SE3, bits);
 }
 
+#ifdef TARGET_AARCH64
+static uint64_t tlbi_aa64_range_get_length(CPUARMState *env,
+                                           uint64_t value)
+{
+    unsigned int page_shift;
+    unsigned int page_size_granule;
+    uint64_t num;
+    uint64_t scale;
+    uint64_t exponent;
+    uint64_t length;
+
+    num = extract64(value, 39, 4);
+    scale = extract64(value, 44, 2);
+    page_size_granule = extract64(value, 46, 2);
+
+    page_shift = page_size_granule * 2 + 12;
+
+    if (page_size_granule == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid page size granule %d\n",
+                      page_size_granule);
+        return 0;
+    }
+
+    exponent = (5 * scale) + 1;
+    length = (num + 1) << (exponent + page_shift);
+
+    return length;
+}
+
+static uint64_t tlbi_aa64_range_get_base(CPUARMState *env, uint64_t value,
+                                        bool two_ranges)
+{
+    /* TODO: ARMv8.7 FEAT_LPA2 */
+    uint64_t pageaddr;
+
+    if (two_ranges) {
+        pageaddr = sextract64(value, 0, 37) << TARGET_PAGE_BITS;
+    } else {
+        pageaddr = extract64(value, 0, 37) << TARGET_PAGE_BITS;
+    }
+
+    return pageaddr;
+}
+
+static void do_rvae_write(CPUARMState *env, uint64_t value,
+                          int idxmap, bool synced)
+{
+    ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap);
+    bool two_ranges = regime_has_2_ranges(one_idx);
+    uint64_t baseaddr, length;
+    int bits;
+
+    baseaddr = tlbi_aa64_range_get_base(env, value, two_ranges);
+    length = tlbi_aa64_range_get_length(env, value);
+    bits = tlbbits_for_regime(env, one_idx, baseaddr);
+
+    if (synced) {
+        tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env),
+                                                  baseaddr,
+                                                  length,
+                                                  idxmap,
+                                                  bits);
+    } else {
+        tlb_flush_range_by_mmuidx(env_cpu(env), baseaddr,
+                                  length, idxmap, bits);
+    }
+}
+
+static void tlbi_aa64_rvae1_write(CPUARMState *env,
+                                  const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    /*
+     * Invalidate by VA range, EL1&0.
+     * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1,
+     * since we don't support flush-for-specific-ASID-only or
+     * flush-last-level-only.
+     */
+
+    do_rvae_write(env, value, vae1_tlbmask(env),
+                  tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae1is_write(CPUARMState *env,
+                                    const ARMCPRegInfo *ri,
+                                    uint64_t value)
+{
+    /*
+     * Invalidate by VA range, Inner/Outer Shareable EL1&0.
+     * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS,
+     * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support
+     * flush-for-specific-ASID-only, flush-last-level-only or inner/outer
+     * shareable specific flushes.
+     */
+
+    do_rvae_write(env, value, vae1_tlbmask(env), true);
+}
+
+static int vae2_tlbmask(CPUARMState *env)
+{
+    return (arm_is_secure_below_el3(env)
+            ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2);
+}
+
+static void tlbi_aa64_rvae2_write(CPUARMState *env,
+                                  const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    /*
+     * Invalidate by VA range, EL2.
+     * Currently handles all of RVAE2 and RVALE2,
+     * since we don't support flush-for-specific-ASID-only or
+     * flush-last-level-only.
+     */
+
+    do_rvae_write(env, value, vae2_tlbmask(env),
+                  tlb_force_broadcast(env));
+
+
+}
+
+static void tlbi_aa64_rvae2is_write(CPUARMState *env,
+                                    const ARMCPRegInfo *ri,
+                                    uint64_t value)
+{
+    /*
+     * Invalidate by VA range, Inner/Outer Shareable, EL2.
+     * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS,
+     * since we don't support flush-for-specific-ASID-only,
+     * flush-last-level-only or inner/outer shareable specific flushes.
+     */
+
+    do_rvae_write(env, value, vae2_tlbmask(env), true);
+
+}
+
+static void tlbi_aa64_rvae3_write(CPUARMState *env,
+                                  const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    /*
+     * Invalidate by VA range, EL3.
+     * Currently handles all of RVAE3 and RVALE3,
+     * since we don't support flush-for-specific-ASID-only or
+     * flush-last-level-only.
+     */
+
+    do_rvae_write(env, value, ARMMMUIdxBit_SE3,
+                  tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae3is_write(CPUARMState *env,
+                                    const ARMCPRegInfo *ri,
+                                    uint64_t value)
+{
+    /*
+     * Invalidate by VA range, EL3, Inner/Outer Shareable.
+     * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS,
+     * since we don't support flush-for-specific-ASID-only,
+     * flush-last-level-only or inner/outer specific flushes.
+     */
+
+    do_rvae_write(env, value, ARMMMUIdxBit_SE3, true);
+}
+#endif
+
 static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                       bool isread)
 {
@@ -6428,11 +6305,21 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
       .access = PL1_RW, .accessfn = access_tda,
       .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
       .resetvalue = 0 },
-    /* MDCCSR_EL0, aka DBGDSCRint. This is a read-only mirror of MDSCR_EL1.
+    /*
+     * MDCCSR_EL0[30:29] map to EDSCR[30:29].  Simply RAZ as the external
+     * Debug Communication Channel is not implemented.
+     */
+    { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 1, .opc2 = 0,
+      .access = PL0_R, .accessfn = access_tda,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    /*
+     * DBGDSCRint[15,12,5:2] map to MDSCR_EL1[15,12,5:2].  Map all bits as
+     * it is unlikely a guest will care.
      * We don't implement the configurable EL0 access.
      */
-    { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_BOTH,
-      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
+    { .name = "DBGDSCRint", .state = ARM_CP_STATE_AA32,
+      .cp = 14, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
       .type = ARM_CP_ALIAS,
       .access = PL1_R, .accessfn = access_tda,
       .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), },
@@ -6546,11 +6433,13 @@ int sve_exception_el(CPUARMState *env, int el)
     return 0;
 }
 
-static uint32_t sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len)
+uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len)
 {
     uint32_t end_len;
 
-    end_len = start_len &= 0xf;
+    start_len = MIN(start_len, ARM_MAX_VQ - 1);
+    end_len = start_len;
+
     if (!test_bit(start_len, cpu->sve_vq_map)) {
         end_len = find_last_bit(cpu->sve_vq_map, start_len);
         assert(end_len < start_len);
@@ -6576,7 +6465,7 @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
         zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
     }
 
-    return sve_zcr_get_valid_len(cpu, zcr_len);
+    return aarch64_sve_zcr_get_valid_len(cpu, zcr_len);
 }
 
 static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -7196,6 +7085,158 @@ static const ARMCPRegInfo pauth_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+static const ARMCPRegInfo tlbirange_reginfo[] = {
+    { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+   { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+   { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1is_write },
+    { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1_write },
+    { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1_write },
+   { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1_write },
+    { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae1_write },
+    { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2is_write },
+   { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2is_write },
+    { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2,
+      .access = PL2_W, .type = ARM_CP_NOP },
+   { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6,
+      .access = PL2_W, .type = ARM_CP_NOP },
+   { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2is_write },
+   { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2is_write },
+    { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2_write },
+   { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae2_write },
+   { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3is_write },
+   { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3is_write },
+   { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3is_write },
+   { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3is_write },
+   { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3_write },
+   { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_rvae3_write },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo tlbios_reginfo[] = {
+    { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_vmalle1is_write },
+    { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_vmalle1is_write },
+    { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_alle2is_write },
+   { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_alle1is_write },
+    { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6,
+      .access = PL2_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_alle1is_write },
+    { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7,
+      .access = PL2_W, .type = ARM_CP_NOP },
+    { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0,
+      .access = PL3_W, .type = ARM_CP_NO_RAW,
+      .writefn = tlbi_aa64_alle3is_write },
+    REGINFO_SENTINEL
+};
+
 static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     Error *err = NULL;
@@ -7532,6 +7573,21 @@ static CPAccessResult access_jazelle(CPUARMState *env, const ARMCPRegInfo *ri,
     return CP_ACCESS_OK;
 }
 
+static CPAccessResult access_joscr_jmcr(CPUARMState *env,
+                                        const ARMCPRegInfo *ri, bool isread)
+{
+    /*
+     * HSTR.TJDBX traps JOSCR and JMCR accesses, but it exists only
+     * in v7A, not in v8A.
+     */
+    if (!arm_feature(env, ARM_FEATURE_V8) &&
+        arm_current_el(env) < 2 && !arm_is_secure_below_el3(env) &&
+        (env->cp15.hstr_el2 & HSTR_TJDBX)) {
+        return CP_ACCESS_TRAP_EL2;
+    }
+    return CP_ACCESS_OK;
+}
+
 static const ARMCPRegInfo jazelle_regs[] = {
     { .name = "JIDR",
       .cp = 14, .crn = 0, .crm = 0, .opc1 = 7, .opc2 = 0,
@@ -7539,9 +7595,11 @@ static const ARMCPRegInfo jazelle_regs[] = {
       .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "JOSCR",
       .cp = 14, .crn = 1, .crm = 0, .opc1 = 7, .opc2 = 0,
+      .accessfn = access_joscr_jmcr,
       .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "JMCR",
       .cp = 14, .crn = 2, .crm = 0, .opc1 = 7, .opc2 = 0,
+      .accessfn = access_joscr_jmcr,
       .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
@@ -7838,8 +7896,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
               .access = PL1_R, .type = ARM_CP_CONST,
               .accessfn = access_aa64_tid3,
-              /* At present, only SVEver == 0 is defined anyway.  */
-              .resetvalue = 0 },
+              .resetvalue = cpu->isar.id_aa64zfr0 },
             { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -8571,6 +8628,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     if (cpu_isar_feature(aa64_rndr, cpu)) {
         define_arm_cp_regs(cpu, rndr_reginfo);
     }
+    if (cpu_isar_feature(aa64_tlbirange, cpu)) {
+        define_arm_cp_regs(cpu, tlbirange_reginfo);
+    }
+    if (cpu_isar_feature(aa64_tlbios, cpu)) {
+        define_arm_cp_regs(cpu, tlbios_reginfo);
+    }
 #ifndef CONFIG_USER_ONLY
     /* Data Cache clean instructions up to PoP */
     if (cpu_isar_feature(aa64_dcpop, cpu)) {
@@ -8710,51 +8773,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 #endif
 }
 
-void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
-{
-    CPUState *cs = CPU(cpu);
-    CPUARMState *env = &cpu->env;
-
-    if (arm_feature(env, ARM_FEATURE_AARCH64)) {
-        /*
-         * The lower part of each SVE register aliases to the FPU
-         * registers so we don't need to include both.
-         */
-#ifdef TARGET_AARCH64
-        if (isar_feature_aa64_sve(&cpu->isar)) {
-            gdb_register_coprocessor(cs, arm_gdb_get_svereg, arm_gdb_set_svereg,
-                                     arm_gen_dynamic_svereg_xml(cs, cs->gdb_num_regs),
-                                     "sve-registers.xml", 0);
-        } else
-#endif
-        {
-            gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg,
-                                     aarch64_fpu_gdb_set_reg,
-                                     34, "aarch64-fpu.xml", 0);
-        }
-    } else if (arm_feature(env, ARM_FEATURE_NEON)) {
-        gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
-                                 51, "arm-neon.xml", 0);
-    } else if (cpu_isar_feature(aa32_simd_r32, cpu)) {
-        gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
-                                 35, "arm-vfp3.xml", 0);
-    } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
-        gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
-                                 19, "arm-vfp.xml", 0);
-    }
-    gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg,
-                             arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs),
-                             "system-registers.xml", 0);
-
-#if defined(TARGET_CHERI)
-    if (arm_feature(env, ARM_FEATURE_AARCH64)) {
-        gdb_register_coprocessor(cs, aarch64_gdb_get_cheri_reg,
-                                 aarch64_gdb_set_cheri_reg, 41,
-                                 "aarch64-capability.xml", 0);
-    }
-#endif
-}
-
 /* Sort alphabetically by type name, except for "any". */
 static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -9262,6 +9280,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
                 CPSRWriteType write_type)
 {
     uint32_t changed_daif;
+    bool rebuild_hflags = (write_type != CPSRWriteRaw) &&
+        (mask & (CPSR_M | CPSR_E | CPSR_IL));
 
     if (mask & CPSR_NZCV) {
         env->ZF = (~val) & CPSR_Z;
@@ -9381,6 +9401,9 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
     }
     mask &= ~CACHED_CPSR_BITS;
     env->uncached_cpsr = (env->uncached_cpsr & ~mask) | (val & mask);
+    if (rebuild_hflags) {
+        arm_rebuild_hflags(env);
+    }
 }
 
 /* Sign/zero extend */
@@ -9392,6 +9415,18 @@ uint32_t HELPER(sxtb16)(uint32_t x)
     return res;
 }
 
+static void handle_possible_div0_trap(CPUARMState *env, uintptr_t ra)
+{
+    /*
+     * Take a division-by-zero exception if necessary; otherwise return
+     * to get the usual non-trapping division behaviour (result of 0)
+     */
+    if (arm_feature(env, ARM_FEATURE_M)
+        && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_DIV_0_TRP_MASK)) {
+        raise_exception_ra(env, EXCP_DIVBYZERO, 0, 1, ra);
+    }
+}
+
 uint32_t HELPER(uxtb16)(uint32_t x)
 {
     uint32_t res;
@@ -9400,19 +9435,24 @@ uint32_t HELPER(uxtb16)(uint32_t x)
     return res;
 }
 
-int32_t HELPER(sdiv)(int32_t num, int32_t den)
+int32_t HELPER(sdiv)(CPUARMState *env, int32_t num, int32_t den)
 {
-    if (den == 0)
-      return 0;
-    if (num == INT_MIN && den == -1)
-      return INT_MIN;
+    if (den == 0) {
+        handle_possible_div0_trap(env, GETPC());
+        return 0;
+    }
+    if (num == INT_MIN && den == -1) {
+        return INT_MIN;
+    }
     return num / den;
 }
 
-uint32_t HELPER(udiv)(uint32_t num, uint32_t den)
+uint32_t HELPER(udiv)(CPUARMState *env, uint32_t num, uint32_t den)
 {
-    if (den == 0)
-      return 0;
+    if (den == 0) {
+        handle_possible_div0_trap(env, GETPC());
+        return 0;
+    }
     return num / den;
 }
 
@@ -9611,6 +9651,7 @@ void arm_log_exception(int idx)
             [EXCP_LAZYFP] = "v7M exception during lazy FP stacking",
             [EXCP_LSERR] = "v8M LSERR UsageFault",
             [EXCP_UNALIGNED] = "v7M UNALIGNED UsageFault",
+            [EXCP_DIVBYZERO] = "v7M DIVBYZERO UsageFault",
         };
 
         if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -13559,42 +13600,49 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
 }
 #endif
 
-static uint32_t rebuild_hflags_common(CPUARMState *env, int fp_el,
-                                      ARMMMUIdx mmu_idx, uint32_t flags)
+static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
+                                           ARMMMUIdx mmu_idx,
+                                           CPUARMTBFlags flags)
 {
-    flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
-    flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
-                       arm_to_core_mmu_idx(mmu_idx));
+    DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el);
+    DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
 
     if (arm_singlestep_active(env)) {
-        flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
+        DP_TBFLAG_ANY(flags, SS_ACTIVE, 1);
     }
     return flags;
 }
 
-static uint32_t rebuild_hflags_common_32(CPUARMState *env, int fp_el,
-                                         ARMMMUIdx mmu_idx, uint32_t flags)
+static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el,
+                                              ARMMMUIdx mmu_idx,
+                                              CPUARMTBFlags flags)
 {
     bool sctlr_b = arm_sctlr_b(env);
 
     if (sctlr_b) {
-        flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, 1);
+        DP_TBFLAG_A32(flags, SCTLR__B, 1);
     }
     if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) {
-        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
+        DP_TBFLAG_ANY(flags, BE_DATA, 1);
     }
-    flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
+    DP_TBFLAG_A32(flags, NS, !access_secure_reg(env));
 
     return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
 }
 
-static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
-                                   ARMMMUIdx mmu_idx)
+static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el,
+                                        ARMMMUIdx mmu_idx)
 {
-    uint32_t flags = 0;
+    CPUARMTBFlags flags = {};
+    uint32_t ccr = env->v7m.ccr[env->v7m.secure];
+
+    /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */
+    if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) {
+        DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+    }
 
     if (arm_v7m_is_handler_mode(env)) {
-        flags = FIELD_DP32(flags, TBFLAG_M32, HANDLER, 1);
+        DP_TBFLAG_M32(flags, HANDLER, 1);
     }
 
     /*
@@ -13604,56 +13652,64 @@ static uint32_t rebuild_hflags_m32(CPUARMState *env, int fp_el,
      */
     if (arm_feature(env, ARM_FEATURE_V8) &&
         !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
-          (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
-        flags = FIELD_DP32(flags, TBFLAG_M32, STACKCHECK, 1);
+          (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+        DP_TBFLAG_M32(flags, STACKCHECK, 1);
     }
 
     return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
 }
 
-static uint32_t rebuild_hflags_aprofile(CPUARMState *env)
+static CPUARMTBFlags rebuild_hflags_aprofile(CPUARMState *env)
 {
-    int flags = 0;
+    CPUARMTBFlags flags = {};
 
-    flags = FIELD_DP32(flags, TBFLAG_ANY, DEBUG_TARGET_EL,
-                       arm_debug_target_el(env));
+    DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env));
     return flags;
 }
 
-static uint32_t rebuild_hflags_a32(CPUARMState *env, int fp_el,
-                                   ARMMMUIdx mmu_idx)
+static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
+                                        ARMMMUIdx mmu_idx)
 {
-    uint32_t flags = rebuild_hflags_aprofile(env);
+    CPUARMTBFlags flags = rebuild_hflags_aprofile(env);
+    int el = arm_current_el(env);
+
+    if (arm_sctlr(env, el) & SCTLR_A) {
+        DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+    }
 
     if (arm_el_is_aa64(env, 1)) {
-        flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
+        DP_TBFLAG_A32(flags, VFPEN, 1);
     }
 
-    if (arm_current_el(env) < 2 && env->cp15.hstr_el2 &&
+    if (el < 2 && env->cp15.hstr_el2 &&
         (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
-        flags = FIELD_DP32(flags, TBFLAG_A32, HSTR_ACTIVE, 1);
+        DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1);
+    }
+
+    if (env->uncached_cpsr & CPSR_IL) {
+        DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
     }
 
     return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
 }
 
-static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
-                                   ARMMMUIdx mmu_idx)
+static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
+                                        ARMMMUIdx mmu_idx)
 {
-    uint32_t flags = rebuild_hflags_aprofile(env);
+    CPUARMTBFlags flags = rebuild_hflags_aprofile(env);
     ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
     uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr;
     uint64_t sctlr;
     int tbii, tbid;
 
-    flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
+    DP_TBFLAG_ANY(flags, AARCH64_STATE, 1);
 
     /* Get control bits for tagged addresses.  */
     tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
     tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
 
-    flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
-    flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
+    DP_TBFLAG_A64(flags, TBII, tbii);
+    DP_TBFLAG_A64(flags, TBID, tbid);
 
     if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
         int sve_el = sve_exception_el(env, el);
@@ -13668,14 +13724,18 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
         } else {
             zcr_len = sve_zcr_len_for_el(env, el);
         }
-        flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
-        flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
+        DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
+        DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len);
     }
 
     sctlr = regime_sctlr(env, stage1);
 
+    if (sctlr & SCTLR_A) {
+        DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+    }
+
     if (arm_cpu_data_is_big_endian_a64(el, sctlr)) {
-        flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
+        DP_TBFLAG_ANY(flags, BE_DATA, 1);
     }
 
     if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
@@ -13686,14 +13746,14 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
          * The decision of which action to take is left to a helper.
          */
         if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
-            flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
+            DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1);
         }
     }
 
     if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
         /* Note that SCTLR_EL[23].BT == SCTLR_BT1.  */
         if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
-            flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
+            DP_TBFLAG_A64(flags, BT, 1);
         }
     }
 
@@ -13705,7 +13765,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
         case ARMMMUIdx_SE10_1:
         case ARMMMUIdx_SE10_1_PAN:
             /* TODO: ARMv8.3-NV */
-            flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1);
+            DP_TBFLAG_A64(flags, UNPRIV, 1);
             break;
         case ARMMMUIdx_E20_2:
         case ARMMMUIdx_E20_2_PAN:
@@ -13716,7 +13776,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
              * gated by HCR_EL2.<E2H,TGE> == '11', and so is LDTR.
              */
             if (env->cp15.hcr_el2 & HCR_TGE) {
-                flags = FIELD_DP32(flags, TBFLAG_A64, UNPRIV, 1);
+                DP_TBFLAG_A64(flags, UNPRIV, 1);
             }
             break;
         default:
@@ -13724,6 +13784,10 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
         }
     }
 
+    if (env->pstate & PSTATE_IL) {
+        DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
+    }
+
     if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
         /*
          * Set MTE_ACTIVE if any access may be Checked, and leave clear
@@ -13734,24 +13798,23 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
          * 4) If no Allocation Tag Access, then all accesses are Unchecked.
          */
         if (allocation_tag_access_enabled(env, el, sctlr)) {
-            flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1);
+            DP_TBFLAG_A64(flags, ATA, 1);
             if (tbid
                 && !(env->pstate & PSTATE_TCO)
                 && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
-                flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1);
+                DP_TBFLAG_A64(flags, MTE_ACTIVE, 1);
             }
         }
         /* And again for unprivileged accesses, if required.  */
-        if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV)
+        if (EX_TBFLAG_A64(flags, UNPRIV)
             && tbid
             && !(env->pstate & PSTATE_TCO)
             && (sctlr & SCTLR_TCF0)
             && allocation_tag_access_enabled(env, 0, sctlr)) {
-            flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1);
+            DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
         }
         /* Cache TCMA as well as TBI. */
-        flags = FIELD_DP32(flags, TBFLAG_A64, TCMA,
-                           aa64_va_parameter_tcma(tcr, mmu_idx));
+        DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
     }
 
 #ifdef TARGET_CHERI
@@ -13762,7 +13825,7 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
     return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
 }
 
-static uint32_t rebuild_hflags_internal(CPUARMState *env)
+static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env)
 {
     int el = arm_current_el(env);
     int fp_el = fp_exception_el(env, el);
@@ -13791,6 +13854,7 @@ void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env)
     int el = arm_current_el(env);
     int fp_el = fp_exception_el(env, el);
     ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
     env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx);
 }
 
@@ -13833,15 +13897,17 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
 static inline void assert_hflags_rebuild_correctly(CPUARMState *env)
 {
 #ifdef CONFIG_DEBUG_TCG
-    uint32_t env_flags_current = env->hflags;
+    CPUARMTBFlags c = env->hflags;
 #ifdef TARGET_CHERI
     uint32_t cheri_flags = env->chflags;
 #endif
-    uint32_t env_flags_rebuilt = rebuild_hflags_internal(env);
+    CPUARMTBFlags r = rebuild_hflags_internal(env);
 
-    if (unlikely(env_flags_current != env_flags_rebuilt)) {
-        fprintf(stderr, "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n",
-                env_flags_current, env_flags_rebuilt);
+    if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) {
+        fprintf(stderr, "TCG hflags mismatch "
+                        "(current:(0x%08x,0x" TARGET_FMT_lx ")"
+                        " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n",
+                c.flags, c.flags2, r.flags, r.flags2);
         abort();
     }
 #ifdef TARGET_CHERI
@@ -13855,29 +13921,55 @@ static inline void assert_hflags_rebuild_correctly(CPUARMState *env)
 #endif
 }
 
+static bool mve_no_pred(CPUARMState *env)
+{
+    /*
+     * Return true if there is definitely no predication of MVE
+     * instructions by VPR or LTPSIZE. (Returning false even if there
+     * isn't any predication is OK; generated code will just be
+     * a little worse.)
+     * If the CPU does not implement MVE then this TB flag is always 0.
+     *
+     * NOTE: if you change this logic, the "recalculate s->mve_no_pred"
+     * logic in gen_update_fp_context() needs to be updated to match.
+     *
+     * We do not include the effect of the ECI bits here -- they are
+     * tracked in other TB flags. This simplifies the logic for
+     * "when did we emit code that changes the MVE_NO_PRED TB flag
+     * and thus need to end the TB?".
+     */
+    if (cpu_isar_feature(aa32_mve, env_archcpu(env))) {
+        return false;
+    }
+    if (env->v7m.vpr) {
+        return false;
+    }
+    if (env->v7m.ltpsize < 4) {
+        return false;
+    }
+    return true;
+}
+
 void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
-                                target_ulong *cs_base, target_ulong *cs_top,
-                                uint32_t *cheri_flags, uint32_t *pflags)
+                                target_ulong *cs_base, target_ulong *pcc_base,
+                                target_ulong *pcc_top, uint32_t *cheri_flags,
+                                uint32_t *pflags)
 {
-    uint32_t flags = env->hflags;
+    CPUARMTBFlags flags;
 
-    *cs_base = 0;
     assert_hflags_rebuild_correctly(env);
+    flags = env->hflags;
 
-    if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) {
+    if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) {
         *pc = get_aarch_reg_as_x(&env->pc);
-
 #ifdef TARGET_CHERI
         cheri_cpu_get_tb_cpu_state(_cheri_get_pcc_unchecked(env),
-                                   cheri_get_ddc(env), cs_base, cs_top,
+                                   cheri_get_ddc(env), pcc_base, pcc_top,
                                    cheri_flags);
         *cheri_flags |= (env->chflags << TB_FLAG_CHERI_SPARE_INDEX_START);
-#else
-        *cs_base = 0;
 #endif
-
         if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
-            flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
+            DP_TBFLAG_A64(flags, BTYPE, env->btype);
         }
     } else {
         *pc = env->regs[15];
@@ -13886,7 +13978,7 @@ void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
                 FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
                 != env->v7m.secure) {
-                flags = FIELD_DP32(flags, TBFLAG_M32, FPCCR_S_WRONG, 1);
+                DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1);
             }
 
             if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
@@ -13898,12 +13990,16 @@ void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                  * active FP context; we must create a new FP context before
                  * executing any FP insn.
                  */
-                flags = FIELD_DP32(flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED, 1);
+                DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1);
             }
 
             bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
             if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
-                flags = FIELD_DP32(flags, TBFLAG_M32, LSPACT, 1);
+                DP_TBFLAG_M32(flags, LSPACT, 1);
+            }
+
+            if (mve_no_pred(env)) {
+                DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
             }
         } else {
             /*
@@ -13911,21 +14007,18 @@ void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
              * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
              */
             if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-                flags = FIELD_DP32(flags, TBFLAG_A32,
-                                   XSCALE_CPAR, env->cp15.c15_cpar);
+                DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar);
             } else {
-                flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN,
-                                   env->vfp.vec_len);
-                flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
-                                   env->vfp.vec_stride);
+                DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
+                DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
             }
             if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
-                flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
+                DP_TBFLAG_A32(flags, VFPEN, 1);
             }
         }
 
-        flags = FIELD_DP32(flags, TBFLAG_AM32, THUMB, env->thumb);
-        flags = FIELD_DP32(flags, TBFLAG_AM32, CONDEXEC, env->condexec_bits);
+        DP_TBFLAG_AM32(flags, THUMB, env->thumb);
+        DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits);
     }
 
     /*
@@ -13935,14 +14028,14 @@ void aarch_cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
      *     0            x       Inactive (the TB flag for SS is always 0)
      *     1            0       Active-pending
      *     1            1       Active-not-pending
-     * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
+     * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB.
      */
-    if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE) &&
-        (env->pstate & PSTATE_SS)) {
-        flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
+    if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) {
+        DP_TBFLAG_ANY(flags, PSTATE__SS, 1);
     }
 
-    *pflags = flags;
+    *pflags = flags.flags;
+    *cs_base = flags.flags2;
 }
 
 #ifdef TARGET_AARCH64
@@ -14033,11 +14126,18 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
 #endif
 
 #ifdef CONFIG_TCG_LOG_INSTR
-void HELPER(arm_log_instr)(CPUARMState *env, target_ulong pc, uint32_t opcode)
+void HELPER(arm_log_instr)(CPUARMState *env, uint64_t pc, uint32_t opcode,
+                           uint32_t opcode_size)
 {
     if (qemu_log_instr_enabled(env)) {
         qemu_log_instr_asid(env, cpu_get_asid(env, pc));
-        qemu_log_instr(env, pc, (char *)&opcode, sizeof(opcode));
+        if (opcode_size == 2) {
+            uint16_t opcode16 = opcode;
+            qemu_log_instr(env, pc, (char *)&opcode16, opcode_size);
+        } else {
+            tcg_debug_assert(opcode_size == 4);
+            qemu_log_instr(env, pc, (char *)&opcode, opcode_size);
+        }
     }
 }
 #endif
diff --git a/target/arm/helper.h b/target/arm/helper.h
index ac5e8438fcd..3e5ff55c74a 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -6,8 +6,8 @@ DEF_HELPER_3(add_saturate, i32, env, i32, i32)
 DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
 DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
 DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
-DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_NO_RWG_SE, s32, s32, s32)
-DEF_HELPER_FLAGS_2(udiv, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32)
+DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32)
 DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32)
 
 #define PAS_OP(pfx)  \
@@ -54,8 +54,6 @@ DEF_HELPER_1(yield, void, env)
 DEF_HELPER_1(pre_hvc, void, env)
 DEF_HELPER_2(pre_smc, void, env, i32)
 
-DEF_HELPER_1(check_breakpoints, void, env)
-
 DEF_HELPER_3(cpsr_write, void, env, i32, i32)
 DEF_HELPER_2(cpsr_write_eret, void, env, i32)
 DEF_HELPER_1(cpsr_read, i32, env)
@@ -75,6 +73,8 @@ DEF_HELPER_2(v7m_vlldm, void, env, i32)
 
 DEF_HELPER_2(v8m_stackcheck, void, env, i32)
 
+DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32)
+
 DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32)
 DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, ptr)
@@ -149,6 +149,8 @@ DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
 
 DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
 DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
+DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr)
+DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr)
 
 DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
 DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
@@ -597,34 +599,60 @@ DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_4(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_4(gvec_sdot_idx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_idx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fcmlah, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlas, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
+
+DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -834,6 +862,16 @@ DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -932,13 +970,65 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
 #endif
+#include "helper-mve.h"
 
 #ifdef CONFIG_TCG_LOG_INSTR
-DEF_HELPER_FLAGS_3(arm_log_instr, TCG_CALL_NO_WG, void, env, tl, i32)
+DEF_HELPER_FLAGS_4(arm_log_instr, TCG_CALL_NO_WG, void, env, i64, i32, i32)
 #endif
 
 #ifdef TARGET_CHERI
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
new file mode 100644
index 00000000000..0dc96560d34
--- /dev/null
+++ b/target/arm/hvf/hvf.c
@@ -0,0 +1,1285 @@
+/*
+ * QEMU Hypervisor.framework support for Apple Silicon
+
+ * Copyright 2020 Alexander Graf <agraf@csgraf.de>
+ * Copyright 2020 Google LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/hw_accel.h"
+#include "hvf_arm.h"
+
+#include <mach/mach_time.h>
+
+#include "exec/address-spaces.h"
+#include "hw/irq.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
+#include "arm-powerctl.h"
+#include "target/arm/cpu.h"
+#include "target/arm/internals.h"
+#include "trace/trace-target_arm_hvf.h"
+#include "migration/vmstate.h"
+
+#define HVF_SYSREG(crn, crm, op0, op1, op2) \
+        ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
+#define PL1_WRITE_MASK 0x4
+
+#define SYSREG(op0, op1, crn, crm, op2) \
+    ((op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (crm << 1))
+#define SYSREG_MASK           SYSREG(0x3, 0x7, 0xf, 0xf, 0x7)
+#define SYSREG_OSLAR_EL1      SYSREG(2, 0, 1, 0, 4)
+#define SYSREG_OSLSR_EL1      SYSREG(2, 0, 1, 1, 4)
+#define SYSREG_OSDLR_EL1      SYSREG(2, 0, 1, 3, 4)
+#define SYSREG_CNTPCT_EL0     SYSREG(3, 3, 14, 0, 1)
+#define SYSREG_PMCR_EL0       SYSREG(3, 3, 9, 12, 0)
+#define SYSREG_PMUSERENR_EL0  SYSREG(3, 3, 9, 14, 0)
+#define SYSREG_PMCNTENSET_EL0 SYSREG(3, 3, 9, 12, 1)
+#define SYSREG_PMCNTENCLR_EL0 SYSREG(3, 3, 9, 12, 2)
+#define SYSREG_PMINTENCLR_EL1 SYSREG(3, 0, 9, 14, 2)
+#define SYSREG_PMOVSCLR_EL0   SYSREG(3, 3, 9, 12, 3)
+#define SYSREG_PMSWINC_EL0    SYSREG(3, 3, 9, 12, 4)
+#define SYSREG_PMSELR_EL0     SYSREG(3, 3, 9, 12, 5)
+#define SYSREG_PMCEID0_EL0    SYSREG(3, 3, 9, 12, 6)
+#define SYSREG_PMCEID1_EL0    SYSREG(3, 3, 9, 12, 7)
+#define SYSREG_PMCCNTR_EL0    SYSREG(3, 3, 9, 13, 0)
+#define SYSREG_PMCCFILTR_EL0  SYSREG(3, 3, 14, 15, 7)
+
+#define WFX_IS_WFE (1 << 0)
+
+#define TMR_CTL_ENABLE  (1 << 0)
+#define TMR_CTL_IMASK   (1 << 1)
+#define TMR_CTL_ISTATUS (1 << 2)
+
+static void hvf_wfi(CPUState *cpu);
+
+typedef struct HVFVTimer {
+    /* Vtimer value during migration and paused state */
+    uint64_t vtimer_val;
+} HVFVTimer;
+
+static HVFVTimer vtimer;
+
+typedef struct ARMHostCPUFeatures {
+    ARMISARegisters isar;
+    uint64_t features;
+    uint64_t midr;
+    uint32_t reset_sctlr;
+    const char *dtb_compatible;
+} ARMHostCPUFeatures;
+
+static ARMHostCPUFeatures arm_host_cpu_features;
+
+struct hvf_reg_match {
+    int reg;
+    uint64_t offset;
+};
+
+static const struct hvf_reg_match hvf_reg_match[] = {
+    { HV_REG_X0,   offsetof(CPUARMState, xregs[0]) },
+    { HV_REG_X1,   offsetof(CPUARMState, xregs[1]) },
+    { HV_REG_X2,   offsetof(CPUARMState, xregs[2]) },
+    { HV_REG_X3,   offsetof(CPUARMState, xregs[3]) },
+    { HV_REG_X4,   offsetof(CPUARMState, xregs[4]) },
+    { HV_REG_X5,   offsetof(CPUARMState, xregs[5]) },
+    { HV_REG_X6,   offsetof(CPUARMState, xregs[6]) },
+    { HV_REG_X7,   offsetof(CPUARMState, xregs[7]) },
+    { HV_REG_X8,   offsetof(CPUARMState, xregs[8]) },
+    { HV_REG_X9,   offsetof(CPUARMState, xregs[9]) },
+    { HV_REG_X10,  offsetof(CPUARMState, xregs[10]) },
+    { HV_REG_X11,  offsetof(CPUARMState, xregs[11]) },
+    { HV_REG_X12,  offsetof(CPUARMState, xregs[12]) },
+    { HV_REG_X13,  offsetof(CPUARMState, xregs[13]) },
+    { HV_REG_X14,  offsetof(CPUARMState, xregs[14]) },
+    { HV_REG_X15,  offsetof(CPUARMState, xregs[15]) },
+    { HV_REG_X16,  offsetof(CPUARMState, xregs[16]) },
+    { HV_REG_X17,  offsetof(CPUARMState, xregs[17]) },
+    { HV_REG_X18,  offsetof(CPUARMState, xregs[18]) },
+    { HV_REG_X19,  offsetof(CPUARMState, xregs[19]) },
+    { HV_REG_X20,  offsetof(CPUARMState, xregs[20]) },
+    { HV_REG_X21,  offsetof(CPUARMState, xregs[21]) },
+    { HV_REG_X22,  offsetof(CPUARMState, xregs[22]) },
+    { HV_REG_X23,  offsetof(CPUARMState, xregs[23]) },
+    { HV_REG_X24,  offsetof(CPUARMState, xregs[24]) },
+    { HV_REG_X25,  offsetof(CPUARMState, xregs[25]) },
+    { HV_REG_X26,  offsetof(CPUARMState, xregs[26]) },
+    { HV_REG_X27,  offsetof(CPUARMState, xregs[27]) },
+    { HV_REG_X28,  offsetof(CPUARMState, xregs[28]) },
+    { HV_REG_X29,  offsetof(CPUARMState, xregs[29]) },
+    { HV_REG_X30,  offsetof(CPUARMState, xregs[30]) },
+    { HV_REG_PC,   offsetof(CPUARMState, pc) },
+};
+
+static const struct hvf_reg_match hvf_fpreg_match[] = {
+    { HV_SIMD_FP_REG_Q0,  offsetof(CPUARMState, vfp.zregs[0]) },
+    { HV_SIMD_FP_REG_Q1,  offsetof(CPUARMState, vfp.zregs[1]) },
+    { HV_SIMD_FP_REG_Q2,  offsetof(CPUARMState, vfp.zregs[2]) },
+    { HV_SIMD_FP_REG_Q3,  offsetof(CPUARMState, vfp.zregs[3]) },
+    { HV_SIMD_FP_REG_Q4,  offsetof(CPUARMState, vfp.zregs[4]) },
+    { HV_SIMD_FP_REG_Q5,  offsetof(CPUARMState, vfp.zregs[5]) },
+    { HV_SIMD_FP_REG_Q6,  offsetof(CPUARMState, vfp.zregs[6]) },
+    { HV_SIMD_FP_REG_Q7,  offsetof(CPUARMState, vfp.zregs[7]) },
+    { HV_SIMD_FP_REG_Q8,  offsetof(CPUARMState, vfp.zregs[8]) },
+    { HV_SIMD_FP_REG_Q9,  offsetof(CPUARMState, vfp.zregs[9]) },
+    { HV_SIMD_FP_REG_Q10, offsetof(CPUARMState, vfp.zregs[10]) },
+    { HV_SIMD_FP_REG_Q11, offsetof(CPUARMState, vfp.zregs[11]) },
+    { HV_SIMD_FP_REG_Q12, offsetof(CPUARMState, vfp.zregs[12]) },
+    { HV_SIMD_FP_REG_Q13, offsetof(CPUARMState, vfp.zregs[13]) },
+    { HV_SIMD_FP_REG_Q14, offsetof(CPUARMState, vfp.zregs[14]) },
+    { HV_SIMD_FP_REG_Q15, offsetof(CPUARMState, vfp.zregs[15]) },
+    { HV_SIMD_FP_REG_Q16, offsetof(CPUARMState, vfp.zregs[16]) },
+    { HV_SIMD_FP_REG_Q17, offsetof(CPUARMState, vfp.zregs[17]) },
+    { HV_SIMD_FP_REG_Q18, offsetof(CPUARMState, vfp.zregs[18]) },
+    { HV_SIMD_FP_REG_Q19, offsetof(CPUARMState, vfp.zregs[19]) },
+    { HV_SIMD_FP_REG_Q20, offsetof(CPUARMState, vfp.zregs[20]) },
+    { HV_SIMD_FP_REG_Q21, offsetof(CPUARMState, vfp.zregs[21]) },
+    { HV_SIMD_FP_REG_Q22, offsetof(CPUARMState, vfp.zregs[22]) },
+    { HV_SIMD_FP_REG_Q23, offsetof(CPUARMState, vfp.zregs[23]) },
+    { HV_SIMD_FP_REG_Q24, offsetof(CPUARMState, vfp.zregs[24]) },
+    { HV_SIMD_FP_REG_Q25, offsetof(CPUARMState, vfp.zregs[25]) },
+    { HV_SIMD_FP_REG_Q26, offsetof(CPUARMState, vfp.zregs[26]) },
+    { HV_SIMD_FP_REG_Q27, offsetof(CPUARMState, vfp.zregs[27]) },
+    { HV_SIMD_FP_REG_Q28, offsetof(CPUARMState, vfp.zregs[28]) },
+    { HV_SIMD_FP_REG_Q29, offsetof(CPUARMState, vfp.zregs[29]) },
+    { HV_SIMD_FP_REG_Q30, offsetof(CPUARMState, vfp.zregs[30]) },
+    { HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) },
+};
+
+struct hvf_sreg_match {
+    int reg;
+    uint32_t key;
+    uint32_t cp_idx;
+};
+
+static struct hvf_sreg_match hvf_sreg_match[] = {
+    { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) },
+
+    { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) },
+    { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) },
+    { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) },
+    { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) },
+
+#ifdef SYNC_NO_RAW_REGS
+    /*
+     * The registers below are manually synced on init because they are
+     * marked as NO_RAW. We still list them to make number space sync easier.
+     */
+    { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) },
+    { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) },
+    { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
+    { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
+#endif
+    { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) },
+    { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
+    { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
+    { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
+    { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) },
+#ifdef SYNC_NO_MMFR0
+    /* We keep the hardware MMFR0 around. HW limits are there anyway */
+    { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) },
+#endif
+    { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) },
+    { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) },
+
+    { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) },
+    { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) },
+    { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) },
+    { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) },
+    { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) },
+    { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) },
+
+    { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) },
+    { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) },
+    { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) },
+    { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) },
+    { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) },
+    { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) },
+    { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) },
+    { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) },
+    { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) },
+    { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) },
+
+    { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) },
+    { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) },
+    { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) },
+    { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) },
+    { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) },
+    { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) },
+    { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) },
+    { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) },
+    { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) },
+    { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) },
+    { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) },
+    { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) },
+    { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) },
+    { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) },
+    { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) },
+    { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) },
+    { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) },
+    { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) },
+    { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) },
+    { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) },
+};
+
+int hvf_get_registers(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    hv_return_t ret;
+    uint64_t val;
+    hv_simd_fp_uchar16_t fpval;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
+        ret = hv_vcpu_get_reg(cpu->hvf->fd, hvf_reg_match[i].reg, &val);
+        *(uint64_t *)((void *)env + hvf_reg_match[i].offset) = val;
+        assert_hvf_ok(ret);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
+        ret = hv_vcpu_get_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg,
+                                      &fpval);
+        memcpy((void *)env + hvf_fpreg_match[i].offset, &fpval, sizeof(fpval));
+        assert_hvf_ok(ret);
+    }
+
+    val = 0;
+    ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPCR, &val);
+    assert_hvf_ok(ret);
+    vfp_set_fpcr(env, val);
+
+    val = 0;
+    ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPSR, &val);
+    assert_hvf_ok(ret);
+    vfp_set_fpsr(env, val);
+
+    ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_CPSR, &val);
+    assert_hvf_ok(ret);
+    pstate_write(env, val);
+
+    for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
+        if (hvf_sreg_match[i].cp_idx == -1) {
+            continue;
+        }
+
+        ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, &val);
+        assert_hvf_ok(ret);
+
+        arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
+    }
+    assert(write_list_to_cpustate(arm_cpu));
+
+    aarch64_restore_sp(env, arm_current_el(env));
+
+    return 0;
+}
+
+int hvf_put_registers(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    hv_return_t ret;
+    uint64_t val;
+    hv_simd_fp_uchar16_t fpval;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
+        val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
+        ret = hv_vcpu_set_reg(cpu->hvf->fd, hvf_reg_match[i].reg, val);
+        assert_hvf_ok(ret);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
+        memcpy(&fpval, (void *)env + hvf_fpreg_match[i].offset, sizeof(fpval));
+        ret = hv_vcpu_set_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg,
+                                      fpval);
+        assert_hvf_ok(ret);
+    }
+
+    ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPCR, vfp_get_fpcr(env));
+    assert_hvf_ok(ret);
+
+    ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPSR, vfp_get_fpsr(env));
+    assert_hvf_ok(ret);
+
+    ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_CPSR, pstate_read(env));
+    assert_hvf_ok(ret);
+
+    aarch64_save_sp(env, arm_current_el(env));
+
+    assert(write_cpustate_to_list(arm_cpu, false));
+    for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
+        if (hvf_sreg_match[i].cp_idx == -1) {
+            continue;
+        }
+
+        val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx];
+        ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, val);
+        assert_hvf_ok(ret);
+    }
+
+    ret = hv_vcpu_set_vtimer_offset(cpu->hvf->fd, hvf_state->vtimer_offset);
+    assert_hvf_ok(ret);
+
+    return 0;
+}
+
+static void flush_cpu_state(CPUState *cpu)
+{
+    if (cpu->vcpu_dirty) {
+        hvf_put_registers(cpu);
+        cpu->vcpu_dirty = false;
+    }
+}
+
+static void hvf_set_reg(CPUState *cpu, int rt, uint64_t val)
+{
+    hv_return_t r;
+
+    flush_cpu_state(cpu);
+
+    if (rt < 31) {
+        r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_X0 + rt, val);
+        assert_hvf_ok(r);
+    }
+}
+
+static uint64_t hvf_get_reg(CPUState *cpu, int rt)
+{
+    uint64_t val = 0;
+    hv_return_t r;
+
+    flush_cpu_state(cpu);
+
+    if (rt < 31) {
+        r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_X0 + rt, &val);
+        assert_hvf_ok(r);
+    }
+
+    return val;
+}
+
+static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
+{
+    ARMISARegisters host_isar = {};
+    const struct isar_regs {
+        int reg;
+        uint64_t *val;
+    } regs[] = {
+        { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 },
+        { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 },
+        { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 },
+        { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 },
+        { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 },
+        { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 },
+        { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
+        { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
+        { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
+    };
+    hv_vcpu_t fd;
+    hv_return_t r = HV_SUCCESS;
+    hv_vcpu_exit_t *exit;
+    int i;
+
+    ahcf->dtb_compatible = "arm,arm-v8";
+    ahcf->features = (1ULL << ARM_FEATURE_V8) |
+                     (1ULL << ARM_FEATURE_NEON) |
+                     (1ULL << ARM_FEATURE_AARCH64) |
+                     (1ULL << ARM_FEATURE_PMU) |
+                     (1ULL << ARM_FEATURE_GENERIC_TIMER);
+
+    /* We set up a small vcpu to extract host registers */
+
+    if (hv_vcpu_create(&fd, &exit, NULL) != HV_SUCCESS) {
+        return false;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(regs); i++) {
+        r |= hv_vcpu_get_sys_reg(fd, regs[i].reg, regs[i].val);
+    }
+    r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr);
+    r |= hv_vcpu_destroy(fd);
+
+    ahcf->isar = host_isar;
+
+    /*
+     * A scratch vCPU returns SCTLR 0, so let's fill our default with the M1
+     * boot SCTLR from https://github.com/AsahiLinux/m1n1/issues/97
+     */
+    ahcf->reset_sctlr = 0x30100180;
+    /*
+     * SPAN is disabled by default when SCTLR.SPAN=1. To improve compatibility,
+     * let's disable it on boot and then allow guest software to turn it on by
+     * setting it to 0.
+     */
+    ahcf->reset_sctlr |= 0x00800000;
+
+    /* Make sure we don't advertise AArch32 support for EL0/EL1 */
+    if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) {
+        return false;
+    }
+
+    return r == HV_SUCCESS;
+}
+
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+    if (!arm_host_cpu_features.dtb_compatible) {
+        if (!hvf_enabled() ||
+            !hvf_arm_get_host_cpu_features(&arm_host_cpu_features)) {
+            /*
+             * We can't report this error yet, so flag that we need to
+             * in arm_cpu_realizefn().
+             */
+            cpu->host_cpu_probe_failed = true;
+            return;
+        }
+    }
+
+    cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
+    cpu->isar = arm_host_cpu_features.isar;
+    cpu->env.features = arm_host_cpu_features.features;
+    cpu->midr = arm_host_cpu_features.midr;
+    cpu->reset_sctlr = arm_host_cpu_features.reset_sctlr;
+}
+
+void hvf_arch_vcpu_destroy(CPUState *cpu)
+{
+}
+
+int hvf_arch_init_vcpu(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match);
+    uint32_t sregs_cnt = 0;
+    uint64_t pfr;
+    hv_return_t ret;
+    int i;
+
+    env->aarch64 = 1;
+    asm volatile("mrs %0, cntfrq_el0" : "=r"(arm_cpu->gt_cntfrq_hz));
+
+    /* Allocate enough space for our sysreg sync */
+    arm_cpu->cpreg_indexes = g_renew(uint64_t, arm_cpu->cpreg_indexes,
+                                     sregs_match_len);
+    arm_cpu->cpreg_values = g_renew(uint64_t, arm_cpu->cpreg_values,
+                                    sregs_match_len);
+    arm_cpu->cpreg_vmstate_indexes = g_renew(uint64_t,
+                                             arm_cpu->cpreg_vmstate_indexes,
+                                             sregs_match_len);
+    arm_cpu->cpreg_vmstate_values = g_renew(uint64_t,
+                                            arm_cpu->cpreg_vmstate_values,
+                                            sregs_match_len);
+
+    memset(arm_cpu->cpreg_values, 0, sregs_match_len * sizeof(uint64_t));
+
+    /* Populate cp list for all known sysregs */
+    for (i = 0; i < sregs_match_len; i++) {
+        const ARMCPRegInfo *ri;
+        uint32_t key = hvf_sreg_match[i].key;
+
+        ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
+        if (ri) {
+            assert(!(ri->type & ARM_CP_NO_RAW));
+            hvf_sreg_match[i].cp_idx = sregs_cnt;
+            arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key);
+        } else {
+            hvf_sreg_match[i].cp_idx = -1;
+        }
+    }
+    arm_cpu->cpreg_array_len = sregs_cnt;
+    arm_cpu->cpreg_vmstate_array_len = sregs_cnt;
+
+    assert(write_cpustate_to_list(arm_cpu, false));
+
+    /* Set CP_NO_RAW system registers on init */
+    ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MIDR_EL1,
+                              arm_cpu->midr);
+    assert_hvf_ok(ret);
+
+    ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MPIDR_EL1,
+                              arm_cpu->mp_affinity);
+    assert_hvf_ok(ret);
+
+    ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, &pfr);
+    assert_hvf_ok(ret);
+    pfr |= env->gicv3state ? (1 << 24) : 0;
+    ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, pfr);
+    assert_hvf_ok(ret);
+
+    /* We're limited to underlying hardware caps, override internal versions */
+    ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64MMFR0_EL1,
+                              &arm_cpu->isar.id_aa64mmfr0);
+    assert_hvf_ok(ret);
+
+    return 0;
+}
+
+void hvf_kick_vcpu_thread(CPUState *cpu)
+{
+    cpus_kick_thread(cpu);
+    hv_vcpus_exit(&cpu->hvf->fd, 1);
+}
+
+static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
+                                uint32_t syndrome)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+
+    cpu->exception_index = excp;
+    env->exception.target_el = 1;
+    env->exception.syndrome = syndrome;
+
+    arm_cpu_do_interrupt(cpu);
+}
+
+static void hvf_psci_cpu_off(ARMCPU *arm_cpu)
+{
+    int32_t ret = arm_set_cpu_off(arm_cpu->mp_affinity);
+    assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
+}
+
+/*
+ * Handle a PSCI call.
+ *
+ * Returns 0 on success
+ *         -1 when the PSCI call is unknown,
+ */
+static bool hvf_handle_psci_call(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    uint64_t param[4] = {
+        env->xregs[0],
+        env->xregs[1],
+        env->xregs[2],
+        env->xregs[3]
+    };
+    uint64_t context_id, mpidr;
+    bool target_aarch64 = true;
+    CPUState *target_cpu_state;
+    ARMCPU *target_cpu;
+    target_ulong entry;
+    int target_el = 1;
+    int32_t ret = 0;
+
+    trace_hvf_psci_call(param[0], param[1], param[2], param[3],
+                        arm_cpu->mp_affinity);
+
+    switch (param[0]) {
+    case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+        ret = QEMU_PSCI_0_2_RET_VERSION_0_2;
+        break;
+    case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+        ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */
+        break;
+    case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+        mpidr = param[1];
+
+        switch (param[2]) {
+        case 0:
+            target_cpu_state = arm_get_cpu_by_id(mpidr);
+            if (!target_cpu_state) {
+                ret = QEMU_PSCI_RET_INVALID_PARAMS;
+                break;
+            }
+            target_cpu = ARM_CPU(target_cpu_state);
+
+            ret = target_cpu->power_state;
+            break;
+        default:
+            /* Everything above affinity level 0 is always on. */
+            ret = 0;
+        }
+        break;
+    case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+        /*
+         * QEMU reset and shutdown are async requests, but PSCI
+         * mandates that we never return from the reset/shutdown
+         * call, so power the CPU off now so it doesn't execute
+         * anything further.
+         */
+        hvf_psci_cpu_off(arm_cpu);
+        break;
+    case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+        hvf_psci_cpu_off(arm_cpu);
+        break;
+    case QEMU_PSCI_0_1_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN64_CPU_ON:
+        mpidr = param[1];
+        entry = param[2];
+        context_id = param[3];
+        ret = arm_set_cpu_on(mpidr, entry, context_id,
+                             target_el, target_aarch64);
+        break;
+    case QEMU_PSCI_0_1_FN_CPU_OFF:
+    case QEMU_PSCI_0_2_FN_CPU_OFF:
+        hvf_psci_cpu_off(arm_cpu);
+        break;
+    case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+        /* Affinity levels are not supported in QEMU */
+        if (param[1] & 0xfffe0000) {
+            ret = QEMU_PSCI_RET_INVALID_PARAMS;
+            break;
+        }
+        /* Powerdown is not supported, we always go into WFI */
+        env->xregs[0] = 0;
+        hvf_wfi(cpu);
+        break;
+    case QEMU_PSCI_0_1_FN_MIGRATE:
+    case QEMU_PSCI_0_2_FN_MIGRATE:
+        ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+        break;
+    default:
+        return false;
+    }
+
+    env->xregs[0] = ret;
+    return true;
+}
+
+static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    uint64_t val = 0;
+
+    switch (reg) {
+    case SYSREG_CNTPCT_EL0:
+        val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
+              gt_cntfrq_period_ns(arm_cpu);
+        break;
+    case SYSREG_PMCR_EL0:
+        val = env->cp15.c9_pmcr;
+        break;
+    case SYSREG_PMCCNTR_EL0:
+        pmu_op_start(env);
+        val = env->cp15.c15_ccnt;
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMCNTENCLR_EL0:
+        val = env->cp15.c9_pmcnten;
+        break;
+    case SYSREG_PMOVSCLR_EL0:
+        val = env->cp15.c9_pmovsr;
+        break;
+    case SYSREG_PMSELR_EL0:
+        val = env->cp15.c9_pmselr;
+        break;
+    case SYSREG_PMINTENCLR_EL1:
+        val = env->cp15.c9_pminten;
+        break;
+    case SYSREG_PMCCFILTR_EL0:
+        val = env->cp15.pmccfiltr_el0;
+        break;
+    case SYSREG_PMCNTENSET_EL0:
+        val = env->cp15.c9_pmcnten;
+        break;
+    case SYSREG_PMUSERENR_EL0:
+        val = env->cp15.c9_pmuserenr;
+        break;
+    case SYSREG_PMCEID0_EL0:
+    case SYSREG_PMCEID1_EL0:
+        /* We can't really count anything yet, declare all events invalid */
+        val = 0;
+        break;
+    case SYSREG_OSLSR_EL1:
+        val = env->cp15.oslsr_el1;
+        break;
+    case SYSREG_OSDLR_EL1:
+        /* Dummy register */
+        break;
+    default:
+        cpu_synchronize_state(cpu);
+        trace_hvf_unhandled_sysreg_read(env->pc, reg,
+                                        (reg >> 20) & 0x3,
+                                        (reg >> 14) & 0x7,
+                                        (reg >> 10) & 0xf,
+                                        (reg >> 1) & 0xf,
+                                        (reg >> 17) & 0x7);
+        hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+        return 1;
+    }
+
+    trace_hvf_sysreg_read(reg,
+                          (reg >> 20) & 0x3,
+                          (reg >> 14) & 0x7,
+                          (reg >> 10) & 0xf,
+                          (reg >> 1) & 0xf,
+                          (reg >> 17) & 0x7,
+                          val);
+    hvf_set_reg(cpu, rt, val);
+
+    return 0;
+}
+
+static void pmu_update_irq(CPUARMState *env)
+{
+    ARMCPU *cpu = env_archcpu(env);
+    qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) &&
+            (env->cp15.c9_pminten & env->cp15.c9_pmovsr));
+}
+
+static bool pmu_event_supported(uint16_t number)
+{
+    return false;
+}
+
+/* Returns true if the counter (pass 31 for PMCCNTR) should count events using
+ * the current EL, security state, and register configuration.
+ */
+static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
+{
+    uint64_t filter;
+    bool enabled, filtered = true;
+    int el = arm_current_el(env);
+
+    enabled = (env->cp15.c9_pmcr & PMCRE) &&
+              (env->cp15.c9_pmcnten & (1 << counter));
+
+    if (counter == 31) {
+        filter = env->cp15.pmccfiltr_el0;
+    } else {
+        filter = env->cp15.c14_pmevtyper[counter];
+    }
+
+    if (el == 0) {
+        filtered = filter & PMXEVTYPER_U;
+    } else if (el == 1) {
+        filtered = filter & PMXEVTYPER_P;
+    }
+
+    if (counter != 31) {
+        /*
+         * If not checking PMCCNTR, ensure the counter is setup to an event we
+         * support
+         */
+        uint16_t event = filter & PMXEVTYPER_EVTCOUNT;
+        if (!pmu_event_supported(event)) {
+            return false;
+        }
+    }
+
+    return enabled && !filtered;
+}
+
+static void pmswinc_write(CPUARMState *env, uint64_t value)
+{
+    unsigned int i;
+    for (i = 0; i < pmu_num_counters(env); i++) {
+        /* Increment a counter's count iff: */
+        if ((value & (1 << i)) && /* counter's bit is set */
+                /* counter is enabled and not filtered */
+                pmu_counter_enabled(env, i) &&
+                /* counter is SW_INCR */
+                (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) {
+            /*
+             * Detect if this write causes an overflow since we can't predict
+             * PMSWINC overflows like we can for other events
+             */
+            uint32_t new_pmswinc = env->cp15.c14_pmevcntr[i] + 1;
+
+            if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & INT32_MIN) {
+                env->cp15.c9_pmovsr |= (1 << i);
+                pmu_update_irq(env);
+            }
+
+            env->cp15.c14_pmevcntr[i] = new_pmswinc;
+        }
+    }
+}
+
+static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+
+    trace_hvf_sysreg_write(reg,
+                           (reg >> 20) & 0x3,
+                           (reg >> 14) & 0x7,
+                           (reg >> 10) & 0xf,
+                           (reg >> 1) & 0xf,
+                           (reg >> 17) & 0x7,
+                           val);
+
+    switch (reg) {
+    case SYSREG_PMCCNTR_EL0:
+        pmu_op_start(env);
+        env->cp15.c15_ccnt = val;
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMCR_EL0:
+        pmu_op_start(env);
+
+        if (val & PMCRC) {
+            /* The counter has been reset */
+            env->cp15.c15_ccnt = 0;
+        }
+
+        if (val & PMCRP) {
+            unsigned int i;
+            for (i = 0; i < pmu_num_counters(env); i++) {
+                env->cp15.c14_pmevcntr[i] = 0;
+            }
+        }
+
+        env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK;
+        env->cp15.c9_pmcr |= (val & PMCR_WRITEABLE_MASK);
+
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMUSERENR_EL0:
+        env->cp15.c9_pmuserenr = val & 0xf;
+        break;
+    case SYSREG_PMCNTENSET_EL0:
+        env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env));
+        break;
+    case SYSREG_PMCNTENCLR_EL0:
+        env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env));
+        break;
+    case SYSREG_PMINTENCLR_EL1:
+        pmu_op_start(env);
+        env->cp15.c9_pminten |= val;
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMOVSCLR_EL0:
+        pmu_op_start(env);
+        env->cp15.c9_pmovsr &= ~val;
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMSWINC_EL0:
+        pmu_op_start(env);
+        pmswinc_write(env, val);
+        pmu_op_finish(env);
+        break;
+    case SYSREG_PMSELR_EL0:
+        env->cp15.c9_pmselr = val & 0x1f;
+        break;
+    case SYSREG_PMCCFILTR_EL0:
+        pmu_op_start(env);
+        env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0;
+        pmu_op_finish(env);
+        break;
+    case SYSREG_OSLAR_EL1:
+        env->cp15.oslsr_el1 = val & 1;
+        break;
+    case SYSREG_OSDLR_EL1:
+        /* Dummy register */
+        break;
+    default:
+        cpu_synchronize_state(cpu);
+        trace_hvf_unhandled_sysreg_write(env->pc, reg,
+                                         (reg >> 20) & 0x3,
+                                         (reg >> 14) & 0x7,
+                                         (reg >> 10) & 0xf,
+                                         (reg >> 1) & 0xf,
+                                         (reg >> 17) & 0x7);
+        hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+        return 1;
+    }
+
+    return 0;
+}
+
+static int hvf_inject_interrupts(CPUState *cpu)
+{
+    if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) {
+        trace_hvf_inject_fiq();
+        hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_FIQ,
+                                      true);
+    }
+
+    if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
+        trace_hvf_inject_irq();
+        hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_IRQ,
+                                      true);
+    }
+
+    return 0;
+}
+
+static uint64_t hvf_vtimer_val_raw(void)
+{
+    /*
+     * mach_absolute_time() returns the vtimer value without the VM
+     * offset that we define. Add our own offset on top.
+     */
+    return mach_absolute_time() - hvf_state->vtimer_offset;
+}
+
+static uint64_t hvf_vtimer_val(void)
+{
+    if (!runstate_is_running()) {
+        /* VM is paused, the vtimer value is in vtimer.vtimer_val */
+        return vtimer.vtimer_val;
+    }
+
+    return hvf_vtimer_val_raw();
+}
+
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+    /*
+     * Use pselect to sleep so that other threads can IPI us while we're
+     * sleeping.
+     */
+    qatomic_mb_set(&cpu->thread_kicked, false);
+    qemu_mutex_unlock_iothread();
+    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
+    qemu_mutex_lock_iothread();
+}
+
+static void hvf_wfi(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    struct timespec ts;
+    hv_return_t r;
+    uint64_t ctl;
+    uint64_t cval;
+    int64_t ticks_to_sleep;
+    uint64_t seconds;
+    uint64_t nanos;
+    uint32_t cntfrq;
+
+    if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) {
+        /* Interrupt pending, no need to wait */
+        return;
+    }
+
+    r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
+    assert_hvf_ok(r);
+
+    if (!(ctl & 1) || (ctl & 2)) {
+        /* Timer disabled or masked, just wait for an IPI. */
+        hvf_wait_for_ipi(cpu, NULL);
+        return;
+    }
+
+    r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
+    assert_hvf_ok(r);
+
+    ticks_to_sleep = cval - hvf_vtimer_val();
+    if (ticks_to_sleep < 0) {
+        return;
+    }
+
+    cntfrq = gt_cntfrq_period_ns(arm_cpu);
+    seconds = muldiv64(ticks_to_sleep, cntfrq, NANOSECONDS_PER_SECOND);
+    ticks_to_sleep -= muldiv64(seconds, NANOSECONDS_PER_SECOND, cntfrq);
+    nanos = ticks_to_sleep * cntfrq;
+
+    /*
+     * Don't sleep for less than the time a context switch would take,
+     * so that we can satisfy fast timer requests on the same CPU.
+     * Measurements on M1 show the sweet spot to be ~2ms.
+     */
+    if (!seconds && nanos < (2 * SCALE_MS)) {
+        return;
+    }
+
+    ts = (struct timespec) { seconds, nanos };
+    hvf_wait_for_ipi(cpu, &ts);
+}
+
+static void hvf_sync_vtimer(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    hv_return_t r;
+    uint64_t ctl;
+    bool irq_state;
+
+    if (!cpu->hvf->vtimer_masked) {
+        /* We will get notified on vtimer changes by hvf, nothing to do */
+        return;
+    }
+
+    r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
+    assert_hvf_ok(r);
+
+    irq_state = (ctl & (TMR_CTL_ENABLE | TMR_CTL_IMASK | TMR_CTL_ISTATUS)) ==
+                (TMR_CTL_ENABLE | TMR_CTL_ISTATUS);
+    qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], irq_state);
+
+    if (!irq_state) {
+        /* Timer no longer asserting, we can unmask it */
+        hv_vcpu_set_vtimer_mask(cpu->hvf->fd, false);
+        cpu->hvf->vtimer_masked = false;
+    }
+}
+
+int hvf_vcpu_exec(CPUState *cpu)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+    hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit;
+    hv_return_t r;
+    bool advance_pc = false;
+
+    if (hvf_inject_interrupts(cpu)) {
+        return EXCP_INTERRUPT;
+    }
+
+    if (cpu->halted) {
+        return EXCP_HLT;
+    }
+
+    flush_cpu_state(cpu);
+
+    qemu_mutex_unlock_iothread();
+    assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd));
+
+    /* handle VMEXIT */
+    uint64_t exit_reason = hvf_exit->reason;
+    uint64_t syndrome = hvf_exit->exception.syndrome;
+    uint32_t ec = syn_get_ec(syndrome);
+
+    qemu_mutex_lock_iothread();
+    switch (exit_reason) {
+    case HV_EXIT_REASON_EXCEPTION:
+        /* This is the main one, handle below. */
+        break;
+    case HV_EXIT_REASON_VTIMER_ACTIVATED:
+        qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
+        cpu->hvf->vtimer_masked = true;
+        return 0;
+    case HV_EXIT_REASON_CANCELED:
+        /* we got kicked, no exit to process */
+        return 0;
+    default:
+        assert(0);
+    }
+
+    hvf_sync_vtimer(cpu);
+
+    switch (ec) {
+    case EC_DATAABORT: {
+        bool isv = syndrome & ARM_EL_ISV;
+        bool iswrite = (syndrome >> 6) & 1;
+        bool s1ptw = (syndrome >> 7) & 1;
+        uint32_t sas = (syndrome >> 22) & 3;
+        uint32_t len = 1 << sas;
+        uint32_t srt = (syndrome >> 16) & 0x1f;
+        uint32_t cm = (syndrome >> 8) & 0x1;
+        uint64_t val = 0;
+
+        trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address,
+                             hvf_exit->exception.physical_address, isv,
+                             iswrite, s1ptw, len, srt);
+
+        if (cm) {
+            /* We don't cache MMIO regions */
+            advance_pc = true;
+            break;
+        }
+
+        assert(isv);
+
+        if (iswrite) {
+            val = hvf_get_reg(cpu, srt);
+            address_space_write(&address_space_memory,
+                                hvf_exit->exception.physical_address,
+                                MEMTXATTRS_UNSPECIFIED, &val, len);
+        } else {
+            address_space_read(&address_space_memory,
+                               hvf_exit->exception.physical_address,
+                               MEMTXATTRS_UNSPECIFIED, &val, len);
+            hvf_set_reg(cpu, srt, val);
+        }
+
+        advance_pc = true;
+        break;
+    }
+    case EC_SYSTEMREGISTERTRAP: {
+        bool isread = (syndrome >> 0) & 1;
+        uint32_t rt = (syndrome >> 5) & 0x1f;
+        uint32_t reg = syndrome & SYSREG_MASK;
+        uint64_t val;
+        int ret = 0;
+
+        if (isread) {
+            ret = hvf_sysreg_read(cpu, reg, rt);
+        } else {
+            val = hvf_get_reg(cpu, rt);
+            ret = hvf_sysreg_write(cpu, reg, val);
+        }
+
+        advance_pc = !ret;
+        break;
+    }
+    case EC_WFX_TRAP:
+        advance_pc = true;
+        if (!(syndrome & WFX_IS_WFE)) {
+            hvf_wfi(cpu);
+        }
+        break;
+    case EC_AA64_HVC:
+        cpu_synchronize_state(cpu);
+        if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) {
+            if (!hvf_handle_psci_call(cpu)) {
+                trace_hvf_unknown_hvc(env->xregs[0]);
+                /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
+                env->xregs[0] = -1;
+            }
+        } else {
+            trace_hvf_unknown_hvc(env->xregs[0]);
+            hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+        }
+        break;
+    case EC_AA64_SMC:
+        cpu_synchronize_state(cpu);
+        if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_SMC) {
+            advance_pc = true;
+
+            if (!hvf_handle_psci_call(cpu)) {
+                trace_hvf_unknown_smc(env->xregs[0]);
+                /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
+                env->xregs[0] = -1;
+            }
+        } else {
+            trace_hvf_unknown_smc(env->xregs[0]);
+            hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+        }
+        break;
+    default:
+        cpu_synchronize_state(cpu);
+        trace_hvf_exit(syndrome, ec, env->pc);
+        error_report("0x%llx: unhandled exception ec=0x%x", env->pc, ec);
+    }
+
+    if (advance_pc) {
+        uint64_t pc;
+
+        flush_cpu_state(cpu);
+
+        r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_PC, &pc);
+        assert_hvf_ok(r);
+        pc += 4;
+        r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_PC, pc);
+        assert_hvf_ok(r);
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_hvf_vtimer = {
+    .name = "hvf-vtimer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(vtimer_val, HVFVTimer),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void hvf_vm_state_change(void *opaque, bool running, RunState state)
+{
+    HVFVTimer *s = opaque;
+
+    if (running) {
+        /* Update vtimer offset on all CPUs */
+        hvf_state->vtimer_offset = mach_absolute_time() - s->vtimer_val;
+        cpu_synchronize_all_states();
+    } else {
+        /* Remember vtimer value on every pause */
+        s->vtimer_val = hvf_vtimer_val_raw();
+    }
+}
+
+int hvf_arch_init(void)
+{
+    hvf_state->vtimer_offset = mach_absolute_time();
+    vmstate_register(NULL, 0, &vmstate_hvf_vtimer, &vtimer);
+    qemu_add_vm_change_state_handler(hvf_vm_state_change, &vtimer);
+    return 0;
+}
diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
new file mode 100644
index 00000000000..855e6cce5ab
--- /dev/null
+++ b/target/arm/hvf/meson.build
@@ -0,0 +1,3 @@
+arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
+  'hvf.c',
+))
diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
new file mode 100644
index 00000000000..820e8e02979
--- /dev/null
+++ b/target/arm/hvf/trace-events
@@ -0,0 +1,11 @@
+hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg read at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
+hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
+hvf_inject_fiq(void) "injecting FIQ"
+hvf_inject_irq(void) "injecting IRQ"
+hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
+hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64
+hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")"
+hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
+hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
+hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
+hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
new file mode 100644
index 00000000000..ea238cff83f
--- /dev/null
+++ b/target/arm/hvf_arm.h
@@ -0,0 +1,18 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support -- ARM specifics
+ *
+ * Copyright (c) 2021 Alexander Graf
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_HVF_ARM_H
+#define QEMU_HVF_ARM_H
+
+#include "cpu.h"
+
+void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu);
+
+#endif
diff --git a/target/arm/internals.h b/target/arm/internals.h
index ad094640182..1e3603cbae2 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -26,6 +26,7 @@
 #define TARGET_ARM_INTERNALS_H
 
 #include "hw/registerfields.h"
+#include "tcg/tcg-gvec-desc.h"
 #include "cheri-lazy-capregs.h"
 #include "syndrome.h"
 
@@ -177,6 +178,16 @@ void arm_translate_init(void);
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
 #endif /* CONFIG_TCG */
 
+/**
+ * aarch64_sve_zcr_get_valid_len:
+ * @cpu: cpu context
+ * @start_len: maximum len to consider
+ *
+ * Return the maximum supported sve vector length <= @start_len.
+ * Note that both @start_len and the return value are in units
+ * of ZCR_ELx.LEN, so the vector bit length is (x + 1) * 128.
+ */
+uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len);
 
 enum arm_fprounding {
     FPROUNDING_TIEEVEN,
@@ -264,6 +275,9 @@ void hw_breakpoint_update(ARMCPU *cpu, int n);
  */
 void hw_breakpoint_update_all(ARMCPU *cpu);
 
+/* Callback function for checking if a breakpoint should trigger. */
+bool arm_debug_check_breakpoint(CPUState *cs);
+
 /* Callback function for checking if a watchpoint should trigger. */
 bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
 
@@ -534,9 +548,17 @@ static inline bool arm_extabort_type(MemTxResult result)
     return result != MEMTX_DECODE_ERROR;
 }
 
+#ifdef CONFIG_USER_ONLY
+void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra);
+void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr,
+                           MMUAccessType access_type, uintptr_t ra);
+#else
 bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr);
+#endif
 
 static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
 {
@@ -582,9 +604,9 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
 bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx);
 
 /* Raise a data fault alignment exception for the specified virtual address */
-void QEMU_NORETURN arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
-                                               MMUAccessType access_type,
-                                               int mmu_idx, uintptr_t retaddr);
+void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+                                 MMUAccessType access_type,
+                                 int mmu_idx, uintptr_t retaddr) QEMU_NORETURN;
 
 /* arm_cpu_do_transaction_failed: handle a memory system error response
  * (eg "no device/memory present at address") by raising an external abort
@@ -1299,14 +1321,10 @@ FIELD(MTEDESC, MIDX,  0, 4)
 FIELD(MTEDESC, TBI,   4, 2)
 FIELD(MTEDESC, TCMA,  6, 2)
 FIELD(MTEDESC, WRITE, 8, 1)
-FIELD(MTEDESC, ESIZE, 9, 5)
-FIELD(MTEDESC, TSIZE, 14, 10)  /* mte_checkN only */
+FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9)  /* size - 1 */
 
-bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
-uint64_t mte_check1(CPUARMState *env, uint32_t desc,
-                    uint64_t ptr, uintptr_t ra);
-uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
-                    uint64_t ptr, uintptr_t ra);
+bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr);
+uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra);
 
 static inline int allocation_tag_from_addr(uint64_t ptr)
 {
@@ -1362,4 +1380,70 @@ static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
     return ptr;
 }
 
+/* Values for M-profile PSR.ECI for MVE insns */
+enum MVEECIState {
+    ECI_NONE = 0, /* No completed beats */
+    ECI_A0 = 1, /* Completed: A0 */
+    ECI_A0A1 = 2, /* Completed: A0, A1 */
+    /* 3 is reserved */
+    ECI_A0A1A2 = 4, /* Completed: A0, A1, A2 */
+    ECI_A0A1A2B0 = 5, /* Completed: A0, A1, A2, B0 */
+    /* All other values reserved */
+};
+
+/* Definitions for the PMU registers */
+#define PMCRN_MASK  0xf800
+#define PMCRN_SHIFT 11
+#define PMCRLC  0x40
+#define PMCRDP  0x20
+#define PMCRX   0x10
+#define PMCRD   0x8
+#define PMCRC   0x4
+#define PMCRP   0x2
+#define PMCRE   0x1
+/*
+ * Mask of PMCR bits writeable by guest (not including WO bits like C, P,
+ * which can be written as 1 to trigger behaviour but which stay RAZ).
+ */
+#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
+
+#define PMXEVTYPER_P          0x80000000
+#define PMXEVTYPER_U          0x40000000
+#define PMXEVTYPER_NSK        0x20000000
+#define PMXEVTYPER_NSU        0x10000000
+#define PMXEVTYPER_NSH        0x08000000
+#define PMXEVTYPER_M          0x04000000
+#define PMXEVTYPER_MT         0x02000000
+#define PMXEVTYPER_EVTCOUNT   0x0000ffff
+#define PMXEVTYPER_MASK       (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
+                               PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
+                               PMXEVTYPER_M | PMXEVTYPER_MT | \
+                               PMXEVTYPER_EVTCOUNT)
+
+#define PMCCFILTR             0xf8000000
+#define PMCCFILTR_M           PMXEVTYPER_M
+#define PMCCFILTR_EL0         (PMCCFILTR | PMCCFILTR_M)
+
+static inline uint32_t pmu_num_counters(CPUARMState *env)
+{
+  return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
+}
+
+/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
+static inline uint64_t pmu_counter_mask(CPUARMState *env)
+{
+  return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
+}
+
+#ifdef TARGET_AARCH64
+int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg);
+int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg);
+int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg);
+int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg);
+#endif
+#ifdef TARGET_CHERI
+int aarch64_gdb_get_cheri_reg(CPUARMState *env, GByteArray *buf, int n);
+int aarch64_gdb_set_cheri_reg(CPUARMState *env, uint8_t *mem_buf, int n);
+#endif
+
 #endif
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index 580f1c1fee0..5fefc943005 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -85,6 +85,9 @@ MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND);
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON);
 MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE);
 
+#define QEMU_SMCCC_FN_ID_MASK ((uint64_t)0xffffffff)
+#define QEMU_SMCCC_PCI_CONFIG_FN_VERSION QEMU_PSCI_0_2_FN(0x130)
+
 /* PSCI v0.2 return values used by TCG emulation of PSCI */
 
 /* No Trusted OS migration to worry about when offlining CPUs */
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index d8381ba2245..bbf1ce7ba3b 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -70,12 +70,17 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
                                       struct kvm_vcpu_init *init)
 {
     int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
+    int max_vm_pa_size;
 
     kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
     if (kvmfd < 0) {
         goto err;
     }
-    vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
+    max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE);
+    if (max_vm_pa_size < 0) {
+        max_vm_pa_size = 0;
+    }
+    vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size);
     if (vmfd < 0) {
         goto err;
     }
@@ -330,6 +335,7 @@ static void kvm_arm_devlistener_del(MemoryListener *listener,
 }
 
 static MemoryListener devlistener = {
+    .name = "kvm-arm",
     .region_add = kvm_arm_devlistener_add,
     .region_del = kvm_arm_devlistener_del,
 };
@@ -998,7 +1004,6 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
     hwaddr xlat, len, doorbell_gpa;
     MemoryRegionSection mrs;
     MemoryRegion *mr;
-    int ret = 1;
 
     if (as == &address_space_memory) {
         return 0;
@@ -1006,15 +1011,19 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
 
     /* MSI doorbell address is translated by an IOMMU */
 
-    rcu_read_lock();
+    RCU_READ_LOCK_GUARD();
+
     mr = address_space_translate(as, address, &xlat, &len, true,
                                  MEMTXATTRS_UNSPECIFIED);
+
     if (!mr) {
-        goto unlock;
+        return 1;
     }
+
     mrs = memory_region_find(mr, xlat, 1);
+
     if (!mrs.mr) {
-        goto unlock;
+        return 1;
     }
 
     doorbell_gpa = mrs.offset_within_address_space;
@@ -1025,11 +1034,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
 
     trace_kvm_arm_fixup_msi_route(address, doorbell_gpa);
 
-    ret = 0;
-
-unlock:
-    rcu_read_unlock();
-    return ret;
+    return 0;
 }
 
 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index dff85f6db94..e790d6c9a57 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -647,17 +647,26 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 
     sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0;
 
-    kvm_arm_destroy_scratch_host_vcpu(fdarray);
-
-    if (err < 0) {
-        return false;
-    }
-
     /* Add feature bits that can't appear until after VCPU init. */
     if (sve_supported) {
         t = ahcf->isar.id_aa64pfr0;
         t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
         ahcf->isar.id_aa64pfr0 = t;
+
+        /*
+         * Before v5.1, KVM did not support SVE and did not expose
+         * ID_AA64ZFR0_EL1 even as RAZ.  After v5.1, KVM still does
+         * not expose the register to "user" requests like this
+         * unless the host supports SVE.
+         */
+        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0,
+                              ARM64_SYS_REG(3, 0, 0, 4, 4));
+    }
+
+    kvm_arm_destroy_scratch_host_vcpu(fdarray);
+
+    if (err < 0) {
+        return false;
     }
 
     /*
@@ -731,7 +740,7 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
     uint32_t vq = 0;
     int i, j;
 
-    bitmap_clear(map, 0, ARM_MAX_VQ);
+    bitmap_zero(map, ARM_MAX_VQ);
 
     /*
      * KVM ensures all host CPUs support the same set of vector lengths.
@@ -1401,14 +1410,10 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
 {
     ram_addr_t ram_addr;
     hwaddr paddr;
-    Object *obj = qdev_get_machine();
-    VirtMachineState *vms = VIRT_MACHINE(obj);
-    bool acpi_enabled = virt_is_acpi_enabled(vms);
 
     assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
 
-    if (acpi_enabled && addr &&
-            object_property_get_bool(obj, "ras", NULL)) {
+    if (acpi_ghes_present() && addr) {
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 34f8daa3775..b7f78b52154 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -214,8 +214,6 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
  */
 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
 
-#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
-
 /**
  * ARMHostCPUFeatures: information about the host CPU (identified
  * by asking the host kernel)
@@ -525,8 +523,8 @@ static inline const char *its_class_name(void)
         /* KVM implementation requires this capability */
         return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL;
     } else {
-        /* Software emulation is not implemented yet */
-        return NULL;
+        /* Software emulation based model */
+        return "arm-gicv3-its";
     }
 }
 
diff --git a/target/arm/m-nocp.decode b/target/arm/m-nocp.decode
index 6699626d7cb..b65c801c97c 100644
--- a/target/arm/m-nocp.decode
+++ b/target/arm/m-nocp.decode
@@ -34,6 +34,14 @@
 
 &nocp cp
 
+# M-profile VLDR/VSTR to sysreg
+%vldr_sysreg 22:1 13:3
+%imm7_0x4 0:7 !function=times_4
+
+&vldr_sysreg rn reg imm a w p
+@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
+             reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
+
 {
   # Special cases which do not take an early NOCP: VLLDM and VLSTM
   VLLDM_VLSTM  1110 1100 001 l:1 rn:4 0000 1010 op:1 000 0000
@@ -41,6 +49,22 @@
   VSCCLRM      1110 1100 1.01 1111 .... 1011 imm:7 0   vd=%vd_dp size=3
   VSCCLRM      1110 1100 1.01 1111 .... 1010 imm:8     vd=%vd_sp size=2
 
+  # FP system register accesses: these are a special case because accesses
+  # to FPCXT_NS succeed even if the FPU is disabled. We therefore need
+  # to handle them before the big NOCP blocks. Note that within these
+  # insns NOCP still has higher priority than UNDEFs; this is implemented
+  # by their returning 'false' for UNDEF so as to fall through into the
+  # NOCP check (in contrast to VLLDM etc, which call unallocated_encoding()
+  # for the UNDEFs there that must take precedence over NOCP.)
+
+  VMSR_VMRS    ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
+
+  # P=0 W=0 is SEE "Related encodings", so split into two patterns
+  VLDR_sysreg  ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
+  VLDR_sysreg  ---- 110 0 . . 1   1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
+  VSTR_sysreg  ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
+  VSTR_sysreg  ---- 110 0 . . 1   0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
+
   NOCP         111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp
   NOCP         111- 110- ---- ---- ---- cp:4 ---- ---- &nocp
   # From v8.1M onwards this range will also NOCP:
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index d63ae465e1e..2c9922dc292 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c
@@ -378,7 +378,7 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
             uint32_t shi = extract64(dn, 32, 32);
 
             if (i >= 16) {
-                faddr += 8; /* skip the slot for the FPSCR */
+                faddr += 8; /* skip the slot for the FPSCR/VPR */
             }
             stacked_ok = stacked_ok &&
                 v7m_stack_write(cpu, faddr, slo, mmu_idx, STACK_LAZYFP) &&
@@ -388,6 +388,11 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
         stacked_ok = stacked_ok &&
             v7m_stack_write(cpu, fpcar + 0x40,
                             vfp_get_fpscr(env), mmu_idx, STACK_LAZYFP);
+        if (cpu_isar_feature(aa32_mve, cpu)) {
+            stacked_ok = stacked_ok &&
+                v7m_stack_write(cpu, fpcar + 0x44,
+                                env->v7m.vpr, mmu_idx, STACK_LAZYFP);
+        }
     }
 
     /*
@@ -410,16 +415,19 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
     env->v7m.fpccr[is_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
 
     if (ts) {
-        /* Clear s0 to s31 and the FPSCR */
+        /* Clear s0 to s31 and the FPSCR and VPR */
         int i;
 
         for (i = 0; i < 32; i += 2) {
             *aa32_vfp_dreg(env, i / 2) = 0;
         }
         vfp_set_fpscr(env, 0);
+        if (cpu_isar_feature(aa32_mve, cpu)) {
+            env->v7m.vpr = 0;
+        }
     }
     /*
-     * Otherwise s0 to s15 and FPSCR are UNKNOWN; we choose to leave them
+     * Otherwise s0 to s15, FPSCR and VPR are UNKNOWN; we choose to leave them
      * unchanged.
      */
 }
@@ -1044,6 +1052,7 @@ static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
 void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
 {
     /* fptr is the value of Rn, the frame pointer we store the FP regs to */
+    ARMCPU *cpu = env_archcpu(env);
     bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
     bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK;
     uintptr_t ra = GETPC();
@@ -1092,9 +1101,12 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
             cpu_stl_data_ra(env, faddr + 4, shi, ra);
         }
         cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra);
+        if (cpu_isar_feature(aa32_mve, cpu)) {
+            cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra);
+        }
 
         /*
-         * If TS is 0 then s0 to s15 and FPSCR are UNKNOWN; we choose to
+         * If TS is 0 then s0 to s15, FPSCR and VPR are UNKNOWN; we choose to
          * leave them unchanged, matching our choice in v7m_preserve_fp_state.
          */
         if (ts) {
@@ -1102,6 +1114,9 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
                 *aa32_vfp_dreg(env, i / 2) = 0;
             }
             vfp_set_fpscr(env, 0);
+            if (cpu_isar_feature(aa32_mve, cpu)) {
+                env->v7m.vpr = 0;
+            }
         }
     } else {
         v7m_update_fpccr(env, fptr, false);
@@ -1112,6 +1127,7 @@ void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
 
 void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
 {
+    ARMCPU *cpu = env_archcpu(env);
     uintptr_t ra = GETPC();
 
     /* fptr is the value of Rn, the frame pointer we load the FP regs from */
@@ -1144,7 +1160,7 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
             uint32_t faddr = fptr + 4 * i;
 
             if (i >= 16) {
-                faddr += 8; /* skip the slot for the FPSCR */
+                faddr += 8; /* skip the slot for the FPSCR and VPR */
             }
 
             slo = cpu_ldl_data_ra(env, faddr, ra);
@@ -1155,6 +1171,9 @@ void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
         }
         fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra);
         vfp_set_fpscr(env, fpscr);
+        if (cpu_isar_feature(aa32_mve, cpu)) {
+            env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra);
+        }
     }
 
     env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
@@ -1298,7 +1317,7 @@ static bool v7m_push_stack(ARMCPU *cpu)
                     uint32_t shi = extract64(dn, 32, 32);
 
                     if (i >= 16) {
-                        faddr += 8; /* skip the slot for the FPSCR */
+                        faddr += 8; /* skip the slot for the FPSCR and VPR */
                     }
                     stacked_ok = stacked_ok &&
                         v7m_stack_write(cpu, faddr, slo,
@@ -1309,11 +1328,19 @@ static bool v7m_push_stack(ARMCPU *cpu)
                 stacked_ok = stacked_ok &&
                     v7m_stack_write(cpu, frameptr + 0x60,
                                     vfp_get_fpscr(env), mmu_idx, STACK_NORMAL);
+                if (cpu_isar_feature(aa32_mve, cpu)) {
+                    stacked_ok = stacked_ok &&
+                        v7m_stack_write(cpu, frameptr + 0x64,
+                                        env->v7m.vpr, mmu_idx, STACK_NORMAL);
+                }
                 if (cpacr_pass) {
                     for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
                         *aa32_vfp_dreg(env, i / 2) = 0;
                     }
                     vfp_set_fpscr(env, 0);
+                    if (cpu_isar_feature(aa32_mve, cpu)) {
+                        env->v7m.vpr = 0;
+                    }
                 }
             } else {
                 /* Lazy stacking enabled, save necessary info to stack later */
@@ -1527,6 +1554,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                     qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
                         "stackframe: NSACR prevents clearing FPU registers\n");
                     v7m_exception_taken(cpu, excret, true, false);
+                    return;
                 } else if (!cpacr_pass) {
                     armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
                                             exc_secure);
@@ -1534,15 +1562,19 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                     qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
                         "stackframe: CPACR prevents clearing FPU registers\n");
                     v7m_exception_taken(cpu, excret, true, false);
+                    return;
                 }
             }
-            /* Clear s0..s15 and FPSCR; TODO also VPR when MVE is implemented */
+            /* Clear s0..s15, FPSCR and VPR */
             int i;
 
             for (i = 0; i < 16; i += 2) {
                 *aa32_vfp_dreg(env, i / 2) = 0;
             }
             vfp_set_fpscr(env, 0);
+            if (cpu_isar_feature(aa32_mve, cpu)) {
+                env->v7m.vpr = 0;
+            }
         }
     }
 
@@ -1597,10 +1629,11 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
          * We use this limited C variable scope so we don't accidentally
          * use 'frame_sp_p' after we do something that makes it invalid.
          */
+        bool spsel = env->v7m.control[return_to_secure] & R_V7M_CONTROL_SPSEL_MASK;
         uint32_t *frame_sp_p = get_v7m_sp_ptr(env,
                                               return_to_secure,
                                               !return_to_handler,
-                                              return_to_sp_process);
+                                              spsel);
         uint32_t frameptr = *frame_sp_p;
         bool pop_ok = true;
         ARMMMUIdx mmu_idx;
@@ -1770,7 +1803,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                     uint32_t faddr = frameptr + 0x20 + 4 * i;
 
                     if (i >= 16) {
-                        faddr += 8; /* Skip the slot for the FPSCR */
+                        faddr += 8; /* Skip the slot for the FPSCR and VPR */
                     }
 
                     pop_ok = pop_ok &&
@@ -1789,6 +1822,11 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                 if (pop_ok) {
                     vfp_set_fpscr(env, fpscr);
                 }
+                if (cpu_isar_feature(aa32_mve, cpu)) {
+                    pop_ok = pop_ok &&
+                        v7m_stack_read(cpu, &env->v7m.vpr,
+                                       frameptr + 0x64, mmu_idx);
+                }
                 if (!pop_ok) {
                     /*
                      * These regs are 0 if security extension present;
@@ -1798,6 +1836,9 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                         *aa32_vfp_dreg(env, i / 2) = 0;
                     }
                     vfp_set_fpscr(env, 0);
+                    if (cpu_isar_feature(aa32_mve, cpu)) {
+                        env->v7m.vpr = 0;
+                    }
                 }
             }
         }
@@ -1889,7 +1930,7 @@ static bool do_v7m_function_return(ARMCPU *cpu)
 
     {
         bool threadmode, spsel;
-        TCGMemOpIdx oi;
+        MemOpIdx oi;
         ARMMMUIdx mmu_idx;
         uint32_t *frame_sp_p;
         uint32_t frameptr;
@@ -1906,9 +1947,9 @@ static bool do_v7m_function_return(ARMCPU *cpu)
          * do them as secure, so work out what MMU index that is.
          */
         mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
-        oi = make_memop_idx(MO_LE, arm_to_core_mmu_idx(mmu_idx));
-        newpc = helper_le_ldul_mmu(env, frameptr, oi, 0);
-        newpsr = helper_le_ldul_mmu(env, frameptr + 4, oi, 0);
+        oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
+        newpc = cpu_ldl_le_mmu(env, frameptr, oi, 0);
+        newpsr = cpu_ldl_le_mmu(env, frameptr + 4, oi, 0);
 
         /* Consistency checks on new IPSR */
         newpsr_exc = newpsr & XPSR_EXCP;
@@ -2207,9 +2248,14 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
         env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
         break;
     case EXCP_UNALIGNED:
+        /* Unaligned faults reported by M-profile aware code */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
         env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNALIGNED_MASK;
         break;
+    case EXCP_DIVBYZERO:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_DIVBYZERO_MASK;
+        break;
     case EXCP_SWI:
         /* The PC already points to the next instruction.  */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
@@ -2279,6 +2325,13 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
             }
             armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
             break;
+        case 0x1: /* Alignment fault reported by generic code */
+            qemu_log_mask(CPU_LOG_INT,
+                          "...really UsageFault with UFSR.UNALIGNED\n");
+            env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNALIGNED_MASK;
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                    env->v7m.secure);
+            break;
         default:
             /*
              * All other FSR values are either MPU faults or "can't happen
@@ -2524,13 +2577,13 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
             if (!env->v7m.secure) {
                 return;
             }
-            env->v7m.other_ss_msp = val;
+            env->v7m.other_ss_msp = val & ~3;
             return;
         case 0x89: /* PSP_NS */
             if (!env->v7m.secure) {
                 return;
             }
-            env->v7m.other_ss_psp = val;
+            env->v7m.other_ss_psp = val & ~3;
             return;
         case 0x8a: /* MSPLIM_NS */
             if (!env->v7m.secure) {
@@ -2599,11 +2652,10 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
 
             limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
 
-            if (val < limit) {
-                CPUState *cs = env_cpu(env);
+            val &= ~0x3;
 
-                cpu_restore_state(cs, GETPC(), true);
-                raise_exception(env, EXCP_STKOF, 0, 1);
+            if (val < limit) {
+                raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
             }
 
             if (is_psp) {
@@ -2624,16 +2676,16 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
         break;
     case 8: /* MSP */
         if (v7m_using_psp(env)) {
-            env->v7m.other_sp = val;
+            env->v7m.other_sp = val & ~3;
         } else {
-            env->regs[13] = val;
+            env->regs[13] = val & ~3;
         }
         break;
     case 9: /* PSP */
         if (v7m_using_psp(env)) {
-            env->regs[13] = val;
+            env->regs[13] = val & ~3;
         } else {
-            env->v7m.other_sp = val;
+            env->v7m.other_sp = val & ~3;
         }
         break;
     case 10: /* MSPLIM */
diff --git a/target/arm/machine.c b/target/arm/machine.c
index a966c110565..d67327b7b07 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -318,6 +318,25 @@ static const VMStateDescription vmstate_m_fp = {
     }
 };
 
+static bool mve_needed(void *opaque)
+{
+    ARMCPU *cpu = opaque;
+
+    return cpu_isar_feature(aa32_mve, cpu);
+}
+
+static const VMStateDescription vmstate_m_mve = {
+    .name = "cpu/m/mve",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = mve_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(env.v7m.vpr, ARMCPU),
+        VMSTATE_UINT32(env.v7m.ltpsize, ARMCPU),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_m = {
     .name = "cpu/m",
     .version_id = 4,
@@ -344,6 +363,7 @@ static const VMStateDescription vmstate_m = {
         &vmstate_m_other_sp,
         &vmstate_m_v8m,
         &vmstate_m_fp,
+        &vmstate_m_mve,
         NULL
     }
 };
@@ -761,6 +781,19 @@ static int cpu_post_load(void *opaque, int version_id)
     hw_breakpoint_update_all(cpu);
     hw_watchpoint_update_all(cpu);
 
+    /*
+     * TCG gen_update_fp_context() relies on the invariant that
+     * FPDSCR.LTPSIZE is constant 4 for M-profile with the LOB extension;
+     * forbid bogus incoming data with some other value.
+     */
+    if (arm_feature(env, ARM_FEATURE_M) && cpu_isar_feature(aa32_lob, cpu)) {
+        if (extract32(env->v7m.fpdscr[M_REG_NS],
+                      FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4 ||
+            extract32(env->v7m.fpdscr[M_REG_S],
+                      FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4) {
+            return -1;
+        }
+    }
     if (!kvm_enabled()) {
         pmu_op_finish(&cpu->env);
     }
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 6a4c9eca2c5..163ab5b636d 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -1,11 +1,12 @@
 gen = [
   decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
-  decodetree.process('neon-shared.decode', extra_args: '--static-decode=disas_neon_shared'),
-  decodetree.process('neon-dp.decode', extra_args: '--static-decode=disas_neon_dp'),
-  decodetree.process('neon-ls.decode', extra_args: '--static-decode=disas_neon_ls'),
-  decodetree.process('vfp.decode', extra_args: '--static-decode=disas_vfp'),
-  decodetree.process('vfp-uncond.decode', extra_args: '--static-decode=disas_vfp_uncond'),
-  decodetree.process('m-nocp.decode', extra_args: '--static-decode=disas_m_nocp'),
+  decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
+  decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
+  decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
+  decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'),
+  decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'),
+  decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'),
+  decodetree.process('mve.decode', extra_args: '--decode=disas_mve'),
   decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'),
   decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'),
   decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'),
@@ -23,10 +24,15 @@ arm_ss.add(files(
   'helper.c',
   'iwmmxt_helper.c',
   'm_helper.c',
+  'mve_helper.c',
   'neon_helper.c',
   'op_helper.c',
   'tlb_helper.c',
   'translate.c',
+  'translate-m-nocp.c',
+  'translate-mve.c',
+  'translate-neon.c',
+  'translate-vfp.c',
   'vec_helper.c',
   'vfp_helper.c',
   'cpu_tcg.c',
@@ -59,5 +65,7 @@ arm_softmmu_ss.add(files(
   'psci.c',
 ))
 
+subdir('hvf')
+
 target_arch += {'arm': arm_ss}
 target_softmmu_arch += {'arm': arm_softmmu_ss}
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index 8be17e1b707..e09b7e46a2f 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -84,10 +84,8 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
     uintptr_t index;
 
     if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
-        /* SIGSEGV */
-        arm_cpu_tlb_fill(env_cpu(env), ptr, ptr_size, ptr_access,
-                         ptr_mmu_idx, false, ra);
-        g_assert_not_reached();
+        cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
+                              !(flags & PAGE_VALID), ra);
     }
 
     /* Require both MAP_ANON and PROT_MTE for the page. */
@@ -121,7 +119,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
      * exception for inaccessible pages, and resolves the virtual address
      * into the softmmu tlb.
      *
-     * When RA == 0, this is for mte_probe1.  The page is expected to be
+     * When RA == 0, this is for mte_probe.  The page is expected to be
      * valid.  Indicate to probe_access_flags no-fault, then assert that
      * we received a valid page.
      */
@@ -538,13 +536,50 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
     }
 }
 
+static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
+                                uint64_t dirty_ptr, uintptr_t ra)
+{
+    int is_write, syn;
+
+    env->exception.vaddress = dirty_ptr;
+
+    is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+    syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
+                                0x11);
+    raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra);
+    g_assert_not_reached();
+}
+
+static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
+                                 uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el)
+{
+    int select;
+
+    if (regime_has_2_ranges(arm_mmu_idx)) {
+        select = extract64(dirty_ptr, 55, 1);
+    } else {
+        select = 0;
+    }
+    env->cp15.tfsr_el[el] |= 1 << select;
+#ifdef CONFIG_USER_ONLY
+    /*
+     * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
+     * which then sends a SIGSEGV when the thread is next scheduled.
+     * This cpu will return to the main loop at the end of the TB,
+     * which is rather sooner than "normal".  But the alternative
+     * is waiting until the next syscall.
+     */
+    qemu_cpu_kick(env_cpu(env));
+#endif
+}
+
 /* Record a tag check failure.  */
 static void mte_check_fail(CPUARMState *env, uint32_t desc,
                            uint64_t dirty_ptr, uintptr_t ra)
 {
     int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
     ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
-    int el, reg_el, tcf, select, is_write, syn;
+    int el, reg_el, tcf;
     uint64_t sctlr;
 
     reg_el = regime_el(env, arm_mmu_idx);
@@ -563,21 +598,9 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 
     switch (tcf) {
     case 1:
-        /*
-         * Tag check fail causes a synchronous exception.
-         *
-         * In restore_state_to_opc, we set the exception syndrome
-         * for the load or store operation.  Unwind first so we
-         * may overwrite that with the syndrome for the tag check.
-         */
-        cpu_restore_state(env_cpu(env), ra, true);
-        env->exception.vaddress = dirty_ptr;
-
-        is_write = FIELD_EX32(desc, MTEDESC, WRITE);
-        syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0,
-                                    is_write, 0x11);
-        raise_exception(env, EXCP_DATA_ABORT, syn, exception_target_el(env));
-        /* noreturn, but fall through to the assert anyway */
+        /* Tag check fail causes a synchronous exception. */
+        mte_sync_check_fail(env, desc, dirty_ptr, ra);
+        break;
 
     case 0:
         /*
@@ -589,108 +612,23 @@ static void mte_check_fail(CPUARMState *env, uint32_t desc,
 
     case 2:
         /* Tag check fail causes asynchronous flag set.  */
-        if (regime_has_2_ranges(arm_mmu_idx)) {
-            select = extract64(dirty_ptr, 55, 1);
-        } else {
-            select = 0;
-        }
-        env->cp15.tfsr_el[el] |= 1 << select;
-#ifdef CONFIG_USER_ONLY
-        /*
-         * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
-         * which then sends a SIGSEGV when the thread is next scheduled.
-         * This cpu will return to the main loop at the end of the TB,
-         * which is rather sooner than "normal".  But the alternative
-         * is waiting until the next syscall.
-         */
-        qemu_cpu_kick(env_cpu(env));
-#endif
+        mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
         break;
 
-    default:
-        /* Case 3: Reserved. */
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "Tag check failure with SCTLR_EL%d.TCF%s "
-                      "set to reserved value %d\n",
-                      reg_el, el ? "" : "0", tcf);
+    case 3:
+        /*
+         * Tag check fail causes asynchronous flag set for stores, or
+         * a synchronous exception for loads.
+         */
+        if (FIELD_EX32(desc, MTEDESC, WRITE)) {
+            mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
+        } else {
+            mte_sync_check_fail(env, desc, dirty_ptr, ra);
+        }
         break;
     }
 }
 
-/*
- * Perform an MTE checked access for a single logical or atomic access.
- */
-static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
-                           uintptr_t ra, int bit55)
-{
-    int mem_tag, mmu_idx, ptr_tag, size;
-    MMUAccessType type;
-    uint8_t *mem;
-
-    ptr_tag = allocation_tag_from_addr(ptr);
-
-    if (tcma_check(desc, bit55, ptr_tag)) {
-        return true;
-    }
-
-    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
-    type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
-    size = FIELD_EX32(desc, MTEDESC, ESIZE);
-
-    mem = allocation_tag_mem(env, mmu_idx, ptr, type, size,
-                             MMU_DATA_LOAD, 1, ra);
-    if (!mem) {
-        return true;
-    }
-
-    mem_tag = load_tag1(ptr, mem);
-    return ptr_tag == mem_tag;
-}
-
-/*
- * No-fault version of mte_check1, to be used by SVE for MemSingleNF.
- * Returns false if the access is Checked and the check failed.  This
- * is only intended to probe the tag -- the validity of the page must
- * be checked beforehand.
- */
-bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr)
-{
-    int bit55 = extract64(ptr, 55, 1);
-
-    /* If TBI is disabled, the access is unchecked. */
-    if (unlikely(!tbi_check(desc, bit55))) {
-        return true;
-    }
-
-    return mte_probe1_int(env, desc, ptr, 0, bit55);
-}
-
-uint64_t mte_check1(CPUARMState *env, uint32_t desc,
-                    uint64_t ptr, uintptr_t ra)
-{
-    int bit55 = extract64(ptr, 55, 1);
-
-    /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
-    if (unlikely(!tbi_check(desc, bit55))) {
-        return ptr;
-    }
-
-    if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
-        mte_check_fail(env, desc, ptr, ra);
-    }
-
-    return useronly_clean_ptr(ptr);
-}
-
-uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
-{
-    return mte_check1(env, desc, ptr, GETPC());
-}
-
-/*
- * Perform an MTE checked access for multiple logical accesses.
- */
-
 /**
  * checkN:
  * @tag: tag memory to test
@@ -753,59 +691,70 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count)
     return n;
 }
 
-uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
-                    uint64_t ptr, uintptr_t ra)
+/**
+ * mte_probe_int() - helper for mte_probe and mte_check
+ * @env: CPU environment
+ * @desc: MTEDESC descriptor
+ * @ptr: virtual address of the base of the access
+ * @fault: return virtual address of the first check failure
+ *
+ * Internal routine for both mte_probe and mte_check.
+ * Return zero on failure, filling in *fault.
+ * Return negative on trivial success for tbi disabled.
+ * Return positive on success with tbi enabled.
+ */
+static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
+                         uintptr_t ra, uint64_t *fault)
 {
     int mmu_idx, ptr_tag, bit55;
-    uint64_t ptr_last, ptr_end, prev_page, next_page;
-    uint64_t tag_first, tag_end;
-    uint64_t tag_byte_first, tag_byte_end;
-    uint32_t esize, total, tag_count, tag_size, n, c;
+    uint64_t ptr_last, prev_page, next_page;
+    uint64_t tag_first, tag_last;
+    uint64_t tag_byte_first, tag_byte_last;
+    uint32_t sizem1, tag_count, tag_size, n, c;
     uint8_t *mem1, *mem2;
     MMUAccessType type;
 
     bit55 = extract64(ptr, 55, 1);
+    *fault = ptr;
 
     /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
     if (unlikely(!tbi_check(desc, bit55))) {
-        return ptr;
+        return -1;
     }
 
     ptr_tag = allocation_tag_from_addr(ptr);
 
     if (tcma_check(desc, bit55, ptr_tag)) {
-        goto done;
+        return 1;
     }
 
     mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
     type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
-    esize = FIELD_EX32(desc, MTEDESC, ESIZE);
-    total = FIELD_EX32(desc, MTEDESC, TSIZE);
+    sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1);
 
-    /* Find the addr of the end of the access, and of the last element. */
-    ptr_end = ptr + total;
-    ptr_last = ptr_end - esize;
+    /* Find the addr of the end of the access */
+    ptr_last = ptr + sizem1;
 
     /* Round the bounds to the tag granule, and compute the number of tags. */
     tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
-    tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE);
-    tag_count = (tag_end - tag_first) / TAG_GRANULE;
+    tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE);
+    tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
 
     /* Round the bounds to twice the tag granule, and compute the bytes. */
     tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
-    tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE);
+    tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE);
 
     /* Locate the page boundaries. */
     prev_page = ptr & TARGET_PAGE_MASK;
     next_page = prev_page + TARGET_PAGE_SIZE;
 
-    if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) {
+    if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) {
         /* Memory access stays on one page. */
-        tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE);
-        mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total,
+        tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1;
+        mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1,
                                   MMU_DATA_LOAD, tag_size, ra);
         if (!mem1) {
-            goto done;
+            return 1;
         }
         /* Perform all of the comparisons. */
         n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
@@ -815,9 +764,9 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
         mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
                                   MMU_DATA_LOAD, tag_size, ra);
 
-        tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE);
+        tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1;
         mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
-                                  ptr_end - next_page,
+                                  ptr_last - next_page + 1,
                                   MMU_DATA_LOAD, tag_size, ra);
 
         /*
@@ -831,31 +780,57 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
         }
         if (n == c) {
             if (!mem2) {
-                goto done;
+                return 1;
             }
             n += checkN(mem2, 0, ptr_tag, tag_count - c);
         }
     }
 
+    if (likely(n == tag_count)) {
+        return 1;
+    }
+
     /*
-     * If we failed, we know which granule.  Compute the element that
-     * is first in that granule, and signal failure on that element.
+     * If we failed, we know which granule.  For the first granule, the
+     * failure address is @ptr, the first byte accessed.  Otherwise the
+     * failure address is the first byte of the nth granule.
      */
-    if (unlikely(n < tag_count)) {
-        uint64_t fail_ofs;
-
-        fail_ofs = tag_first + n * TAG_GRANULE - ptr;
-        fail_ofs = ROUND_UP(fail_ofs, esize);
-        mte_check_fail(env, desc, ptr + fail_ofs, ra);
+    if (n > 0) {
+        *fault = tag_first + n * TAG_GRANULE;
     }
+    return 0;
+}
 
- done:
+uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra)
+{
+    uint64_t fault;
+    int ret = mte_probe_int(env, desc, ptr, ra, &fault);
+
+    if (unlikely(ret == 0)) {
+        mte_check_fail(env, desc, fault, ra);
+    } else if (ret < 0) {
+        return ptr;
+    }
     return useronly_clean_ptr(ptr);
 }
 
-uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr)
 {
-    return mte_checkN(env, desc, ptr, GETPC());
+    return mte_check(env, desc, ptr, GETPC());
+}
+
+/*
+ * No-fault version of mte_check, to be used by SVE for MemSingleNF.
+ * Returns false if the access is Checked and the check failed.  This
+ * is only intended to probe the tag -- the validity of the page must
+ * be checked beforehand.
+ */
+bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+    uint64_t fault;
+    int ret = mte_probe_int(env, desc, ptr, 0, &fault);
+
+    return ret != 0;
 }
 
 /*
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
new file mode 100644
index 00000000000..14a4f398020
--- /dev/null
+++ b/target/arm/mve.decode
@@ -0,0 +1,832 @@
+# M-profile MVE instruction descriptions
+#
+#  Copyright (c) 2021 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+%qd 22:1 13:3
+%qm 5:1 1:3
+%qn 7:1 17:3
+
+# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
+%size_28 28:1 !function=plus_1
+
+# 2 operand fp insns have size in bit 20: 1 for 16 bit, 0 for 32 bit,
+# like Neon FP insns.
+%2op_fp_size 20:1 !function=neon_3same_fp_size
+# VCADD is an exception, where bit 20 is 0 for 16 bit and 1 for 32 bit
+%2op_fp_size_rev 20:1 !function=plus_1
+# FP scalars have size in bit 28, 1 for 16 bit, 0 for 32 bit
+%2op_fp_scalar_size 28:1 !function=neon_3same_fp_size
+
+# 1imm format immediate
+%imm_28_16_0 28:1 16:3 0:4
+
+&vldr_vstr rn qd imm p a w size l u
+&1op qd qm size
+&2op qd qm qn size
+&2scalar qd qn rm size
+&1imm qd imm cmode op
+&2shift qd qm shift size
+&vidup qd rn size imm
+&viwdup qd rn rm size imm
+&vcmp qm qn size mask
+&vcmp_scalar qn rm size mask
+&shl_scalar qda rm size
+&vmaxv qm rda size
+&vabav qn qm rda size
+&vldst_sg qd qm rn size msize os
+&vldst_sg_imm qd qm a w imm
+&vldst_il qd rn size pat w
+
+# scatter-gather memory size is in bits 6:4
+%sg_msize 6:1 4:1
+
+@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
+# Note that both Rn and Qd are 3 bits only (no D bit)
+@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
+
+@vldst_sg .... .... .... rn:4 .... ... size:2 ... ... os:1 &vldst_sg \
+          qd=%qd qm=%qm msize=%sg_msize
+
+# Qm is in the fields usually labeled Qn
+@vldst_sg_imm .... .... a:1 . w:1 . .... .... .... . imm:7 &vldst_sg_imm \
+              qd=%qd qm=%qn
+
+# Deinterleaving load/interleaving store
+@vldst_il .... .... .. w:1 . rn:4 .... ... size:2 pat:2 ..... &vldst_il \
+          qd=%qd
+
+@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
+@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
+@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
+@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
+@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
+     size=%size_28
+@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0
+
+# The _rev suffix indicates that Vn and Vm are reversed. This is
+# the case for shifts. In the Arm ARM these insns are documented
+# with the Vm and Vn fields in their usual places, but in the
+# assembly the operands are listed "backwards", ie in the order
+# Qd, Qm, Qn where other insns use Qd, Qn, Qm. For QEMU we choose
+# to consider Vm and Vn as being in different fields in the insn.
+# This gives us consistency with A64 and Neon.
+@2op_rev .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qn qn=%qm
+
+@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
+@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
+
+@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
+@2_shl_h .... .... .. 01  shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
+@2_shl_w .... .... .. 1   shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
+
+@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
+@2_shll_h .... .... ... 1  shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
+# VSHLL encoding T2 where shift == esize
+@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
+                qd=%qd qm=%qm size=0 shift=8
+@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
+                qd=%qd qm=%qm size=1 shift=16
+
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%rshift_i5  16:5 !function=rsub_32
+%rshift_i4  16:4 !function=rsub_16
+%rshift_i3  16:3 !function=rsub_8
+
+@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \
+         size=0 shift=%rshift_i3
+@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \
+         size=1 shift=%rshift_i4
+@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
+         size=2 shift=%rshift_i5
+
+@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd
+
+# Vector comparison; 4-bit Qm but 3-bit Qn
+%mask_22_13      22:1 13:3
+@vcmp    .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13
+@vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \
+             mask=%mask_22_13
+
+@vcmp_fp .... .... .... qn:3 . .... .... .... .... &vcmp \
+         qm=%qm size=%2op_fp_scalar_size mask=%mask_22_13
+
+# Bit 28 is a 2op_fp_scalar_size bit, but we do not decode it in this
+# format to avoid complicated overlapping-instruction-groups
+@vcmp_fp_scalar .... .... .... qn:3 . .... .... .... rm:4 &vcmp_scalar \
+                mask=%mask_22_13
+
+@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm
+
+@2op_fp .... .... .... .... .... .... .... .... &2op \
+        qd=%qd qn=%qn qm=%qm size=%2op_fp_size
+
+@2op_fp_size_rev .... .... .... .... .... .... .... .... &2op \
+                 qd=%qd qn=%qn qm=%qm size=%2op_fp_size_rev
+
+# 2-operand, but Qd and Qn share a field. Size is in bit 28, but we
+# don't decode it in this format
+@vmaxnma  .... .... .... .... .... .... .... .... &2op \
+          qd=%qd qn=%qd qm=%qm
+
+# Here also we don't decode the bit 28 size in the format to avoid
+# awkward nested overlap groups
+@vmaxnmv          .... .... .... .... rda:4 .... .... .... &vmaxv qm=%qm
+
+@2op_fp_scalar .... .... .... .... .... .... .... rm:4 &2scalar \
+               qd=%qd qn=%qn size=%2op_fp_scalar_size
+
+# Vector loads and stores
+
+# Widening loads and narrowing stores:
+# for these P=0 W=0 is 'related encoding'; sz=11 is 'related encoding'
+# This means we need to expand out to multiple patterns for P, W, SZ.
+# For stores the U bit must be 0 but we catch that in the trans_ function.
+# The naming scheme here is "VLDSTB_H == in-memory byte load/store to/from
+# signed halfword element in register", etc.
+VLDSTB_H         111 . 110 0 a:1 0 1   . 0 ... ... 0 111 01 ....... @vldst_wn \
+                 p=0 w=1 size=1
+VLDSTB_H         111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 01 ....... @vldst_wn \
+                 p=1 size=1
+VLDSTB_W         111 . 110 0 a:1 0 1   . 0 ... ... 0 111 10 ....... @vldst_wn \
+                 p=0 w=1 size=2
+VLDSTB_W         111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 10 ....... @vldst_wn \
+                 p=1 size=2
+VLDSTH_W         111 . 110 0 a:1 0 1   . 1 ... ... 0 111 10 ....... @vldst_wn \
+                 p=0 w=1 size=2
+VLDSTH_W         111 . 110 1 a:1 0 w:1 . 1 ... ... 0 111 10 ....... @vldst_wn \
+                 p=1 size=2
+
+# Non-widening loads/stores (P=0 W=0 is 'related encoding')
+VLDR_VSTR        1110110 0 a:1 . 1   . .... ... 111100 .......   @vldr_vstr \
+                 size=0 p=0 w=1
+VLDR_VSTR        1110110 0 a:1 . 1   . .... ... 111101 .......   @vldr_vstr \
+                 size=1 p=0 w=1
+VLDR_VSTR        1110110 0 a:1 . 1   . .... ... 111110 .......   @vldr_vstr \
+                 size=2 p=0 w=1
+VLDR_VSTR        1110110 1 a:1 . w:1 . .... ... 111100 .......   @vldr_vstr \
+                 size=0 p=1
+VLDR_VSTR        1110110 1 a:1 . w:1 . .... ... 111101 .......   @vldr_vstr \
+                 size=1 p=1
+VLDR_VSTR        1110110 1 a:1 . w:1 . .... ... 111110 .......   @vldr_vstr \
+                 size=2 p=1
+
+# gather loads/scatter stores
+VLDR_S_sg        111 0 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
+VLDR_U_sg        111 1 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
+VSTR_sg          111 0 1100 1 . 00 .... ... 0 111 . .... .... @vldst_sg
+
+VLDRW_sg_imm     111 1 1101 ... 1 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VLDRD_sg_imm     111 1 1101 ... 1 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+VSTRW_sg_imm     111 1 1101 ... 0 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VSTRD_sg_imm     111 1 1101 ... 0 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+
+# deinterleaving loads/interleaving stores
+VLD2             1111 1100 1 .. 1 .... ... 1 111 .. .. 00000 @vldst_il
+VLD4             1111 1100 1 .. 1 .... ... 1 111 .. .. 00001 @vldst_il
+VST2             1111 1100 1 .. 0 .... ... 1 111 .. .. 00000 @vldst_il
+VST4             1111 1100 1 .. 0 .... ... 1 111 .. .. 00001 @vldst_il
+
+# Moves between 2 32-bit vector lanes and 2 general purpose registers
+VMOV_to_2gp      1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+VMOV_from_2gp    1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+
+# Vector 2-op
+VAND             1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VBIC             1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VORR             1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VORN             1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VEOR             1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+
+VADD             1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
+VSUB             1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
+VMUL             1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
+
+# The VSHLL T2 encoding is not a @2op pattern, but is here because it
+# overlaps what would be size=0b11 VMULH/VRMULH
+{
+  VCVTB_SH       111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
+  VMAXNMA        111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2
+
+  VSHLL_BS       111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
+  VSHLL_BS       111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+  VQMOVUNB       111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+  VQMOVN_BS      111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
+  VMAXA          111 0 1110 0 . 11 .. 11 ... 0 1110 1 0 . 0 ... 1 @1op
+
+  VMULH_S        111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+  VCVTB_HS       111 1 1110 0 . 11  1111 ... 0 1110 0 0 . 0 ... 1  @1op_nosz
+
+  VMAXNMA        111 1 1110 0 . 11  1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1
+
+  VSHLL_BU       111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
+  VSHLL_BU       111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+  VMOVNB         111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+  VQMOVN_BU      111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
+  VMULH_U        111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+  VCVTT_SH       111 0 1110 0 . 11  1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
+  VMINNMA        111 0 1110 0 . 11  1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2
+  VSHLL_TS       111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
+  VSHLL_TS       111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+  VQMOVUNT       111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+  VQMOVN_TS      111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
+  VMINA          111 0 1110 0 . 11 .. 11 ... 1 1110 1 0 . 0 ... 1 @1op
+
+  VRMULH_S       111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+  VCVTT_HS       111 1 1110 0 . 11  1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
+  VMINNMA        111 1 1110 0 . 11  1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1
+  VSHLL_TU       111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
+  VSHLL_TU       111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+  VMOVNT         111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+  VQMOVN_TU      111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
+  VRMULH_U       111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+VMAX_S           111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
+VMAX_U           111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
+VMIN_S           111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
+VMIN_U           111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
+
+VABD_S           111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
+VABD_U           111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
+
+VHADD_S          111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
+VHADD_U          111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
+VHSUB_S          111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
+VHSUB_U          111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
+
+{
+  VMULLP_B       111 . 1110 0 . 11 ... 1 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
+  VMULL_BS       111 0 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
+  VMULL_BU       111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
+}
+{
+  VMULLP_T       111 . 1110 0 . 11 ... 1 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
+  VMULL_TS       111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
+  VMULL_TU       111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
+}
+
+VQDMULH          1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
+VQRDMULH         1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
+
+VQADD_S          111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
+VQADD_U          111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
+VQSUB_S          111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
+VQSUB_U          111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
+
+VSHL_S           111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
+VSHL_U           111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
+
+VRSHL_S          111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
+VRSHL_U          111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
+
+VQSHL_S          111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
+VQSHL_U          111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
+
+VQRSHL_S         111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
+VQRSHL_U         111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
+
+{
+  VCMUL0         111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
+  VQDMLADH       1110  1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
+  VQDMLSDH       1111  1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
+}
+
+{
+  VCMUL180       111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
+  VQDMLADHX      111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
+  VQDMLSDHX      111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
+}
+
+{
+  VCMUL90        111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 1 @2op_sz28
+  VQRDMLADH      111 0 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
+  VQRDMLSDH      111 1 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+  VCMUL270       111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 1 @2op_sz28
+  VQRDMLADHX     111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
+  VQRDMLSDHX     111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+VQDMULLB         111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28
+VQDMULLT         111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
+
+VRHADD_S         111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
+VRHADD_U         111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
+
+{
+  VADC           1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
+  VADCI          1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
+  VHCADD90       1110 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
+  VHCADD270      1110 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
+}
+
+{
+  VSBC           1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
+  VSBCI          1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
+  VCADD90        1111 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
+  VCADD270       1111 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
+}
+
+# Vector miscellaneous
+
+VCLS             1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
+VCLZ             1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
+
+VREV16           1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op
+VREV32           1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
+VREV64           1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
+
+VMVN             1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
+
+VABS             1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
+VABS_fp          1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
+VNEG             1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op
+VNEG_fp          1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
+
+VQABS            1111 1111 1 . 11 .. 00 ... 0 0111 01 . 0 ... 0 @1op
+VQNEG            1111 1111 1 . 11 .. 00 ... 0 0111 11 . 0 ... 0 @1op
+
+&vdup qd rt size
+# Qd is in the fields usually named Qn
+@vdup            .... .... . . .. ... . rt:4 .... . . . . .... qd=%qn &vdup
+
+# B and E bits encode size, which we decode here to the usual size values
+VDUP             1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0
+VDUP             1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1
+VDUP             1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
+
+# Incrementing and decrementing dup
+
+# VIDUP, VDDUP format immediate: 1 << (immh:imml)
+%imm_vidup 7:1 0:1 !function=vidup_imm
+
+# VIDUP, VDDUP registers: Rm bits [3:1] from insn, bit 0 is 1;
+# Rn bits [3:1] from insn, bit 0 is 0
+%vidup_rm 1:3 !function=times_2_plus_1
+%vidup_rn 17:3 !function=times_2
+
+@vidup           .... .... . . size:2 .... .... .... .... .... \
+                 qd=%qd imm=%imm_vidup rn=%vidup_rn &vidup
+@viwdup          .... .... . . size:2 .... .... .... .... .... \
+                 qd=%qd imm=%imm_vidup rm=%vidup_rm rn=%vidup_rn &viwdup
+{
+  VIDUP          1110 1110 0 . .. ... 1 ... 0 1111 . 110 111 . @vidup
+  VIWDUP         1110 1110 0 . .. ... 1 ... 0 1111 . 110 ... . @viwdup
+}
+{
+  VCMPGT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111  0110 .... @vcmp_fp_scalar size=2
+  VCMPLE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111  1110 .... @vcmp_fp_scalar size=2
+  VDDUP            1110 1110 0 . .. ... 1 ... 1 1111 . 110 111 . @vidup
+  VDWDUP           1110 1110 0 . .. ... 1 ... 1 1111 . 110 ... . @viwdup
+}
+
+# multiply-add long dual accumulate
+# rdahi: bits [3:1] from insn, bit 0 is 1
+# rdalo: bits [3:1] from insn, bit 0 is 0
+%rdahi 20:3 !function=times_2_plus_1
+%rdalo 13:3 !function=times_2
+# size bit is 0 for 16 bit, 1 for 32 bit
+%size_16 16:1 !function=plus_1
+
+&vmlaldav rdahi rdalo size qn qm x a
+&vmladav rda size qn qm x a
+
+@vmlaldav        .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
+                 qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
+@vmlaldav_nosz   .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
+                 qn=%qn rdahi=%rdahi rdalo=%rdalo size=0 &vmlaldav
+@vmladav         .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
+                 qn=%qn rda=%rdalo size=%size_16 &vmladav
+@vmladav_nosz    .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
+                 qn=%qn rda=%rdalo size=0 &vmladav
+
+{
+  VMLADAV_S      1110 1110 1111  ... . ... . 1110 . 0 . 0 ... 0 @vmladav
+  VMLALDAV_S     1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
+}
+{
+  VMLADAV_U      1111 1110 1111  ... . ... . 1110 . 0 . 0 ... 0 @vmladav
+  VMLALDAV_U     1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
+}
+
+{
+  VMLSDAV        1110 1110 1111  ... . ... . 1110 . 0 . 0 ... 1 @vmladav
+  VMLSLDAV       1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav
+}
+
+{
+  VMLSDAV        1111 1110 1111  ... 0 ... . 1110 . 0 . 0 ... 1 @vmladav_nosz
+  VRMLSLDAVH     1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz
+}
+
+VMLADAV_S        1110 1110 1111  ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
+VMLADAV_U        1111 1110 1111  ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
+
+{
+  [
+    VMAXNMAV     1110 1110 1110  11 00 ....  1111 0 0 . 0 ... 0 @vmaxnmv size=2
+    VMINNMAV     1110 1110 1110  11 00 ....  1111 1 0 . 0 ... 0 @vmaxnmv size=2
+    VMAXNMV      1110 1110 1110  11 10 ....  1111 0 0 . 0 ... 0 @vmaxnmv size=2
+    VMINNMV      1110 1110 1110  11 10 ....  1111 1 0 . 0 ... 0 @vmaxnmv size=2
+  ]
+  [
+    VMAXV_S      1110 1110 1110  .. 10 ....  1111 0 0 . 0 ... 0 @vmaxv
+    VMINV_S      1110 1110 1110  .. 10 ....  1111 1 0 . 0 ... 0 @vmaxv
+    VMAXAV       1110 1110 1110  .. 00 ....  1111 0 0 . 0 ... 0 @vmaxv
+    VMINAV       1110 1110 1110  .. 00 ....  1111 1 0 . 0 ... 0 @vmaxv
+  ]
+  VMLADAV_S      1110 1110 1111  ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
+  VRMLALDAVH_S   1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
+
+{
+  [
+    VMAXNMAV     1111 1110 1110  11 00 ....  1111 0 0 . 0 ... 0 @vmaxnmv size=1
+    VMINNMAV     1111 1110 1110  11 00 ....  1111 1 0 . 0 ... 0 @vmaxnmv size=1
+    VMAXNMV      1111 1110 1110  11 10 ....  1111 0 0 . 0 ... 0 @vmaxnmv size=1
+    VMINNMV      1111 1110 1110  11 10 ....  1111 1 0 . 0 ... 0 @vmaxnmv size=1
+  ]
+  [
+    VMAXV_U      1111 1110 1110  .. 10 ....  1111 0 0 . 0 ... 0 @vmaxv
+    VMINV_U      1111 1110 1110  .. 10 ....  1111 1 0 . 0 ... 0 @vmaxv
+  ]
+  VMLADAV_U      1111 1110 1111  ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
+  VRMLALDAVH_U   1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
+
+# Scalar operations
+
+{
+  VCMPEQ_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111  0100 .... @vcmp_fp_scalar size=2
+  VCMPNE_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111  1100 .... @vcmp_fp_scalar size=2
+  VADD_scalar      1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
+}
+
+{
+  VCMPLT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111  1100 .... @vcmp_fp_scalar size=2
+  VCMPGE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111  0100 .... @vcmp_fp_scalar size=2
+  VSUB_scalar      1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
+}
+
+{
+  VSHL_S_scalar   1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
+  VRSHL_S_scalar  1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
+  VQSHL_S_scalar  1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
+  VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
+  VMUL_scalar     1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
+}
+
+{
+  VSHL_U_scalar   1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
+  VRSHL_U_scalar  1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
+  VQSHL_U_scalar  1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
+  VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
+  VBRSR           1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
+}
+
+{
+  VADD_fp_scalar  111 . 1110 0 . 11 ... 0 ... 0 1111 . 100 .... @2op_fp_scalar
+  VHADD_S_scalar  1110  1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
+  VHADD_U_scalar  1111  1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
+}
+
+{
+  VSUB_fp_scalar  111 . 1110 0 . 11 ... 0 ... 1 1111 . 100 .... @2op_fp_scalar
+  VHSUB_S_scalar  1110  1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+  VHSUB_U_scalar  1111  1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+}
+
+{
+  VQADD_S_scalar  1110  1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+  VQADD_U_scalar  1111  1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+  VQDMULLB_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 110 .... @2scalar_nosz \
+                  size=%size_28
+}
+
+{
+  VQSUB_S_scalar  1110  1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+  VQSUB_U_scalar  1111  1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+  VQDMULLT_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 110 .... @2scalar_nosz \
+                  size=%size_28
+}
+
+{
+  VMUL_fp_scalar  111 . 1110 0 . 11 ... 1 ... 0 1110 . 110 .... @2op_fp_scalar
+  VQDMULH_scalar  1110  1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
+  VQRDMULH_scalar 1111  1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
+}
+
+{
+  VFMA_scalar     111 . 1110 0 . 11 ... 1 ... 0 1110 . 100 .... @2op_fp_scalar
+  # The U bit (28) is don't-care because it does not affect the result
+  VMLA            111 - 1110 0 . .. ... 1 ... 0 1110 . 100 .... @2scalar
+}
+
+{
+  VFMAS_scalar    111 . 1110 0 . 11 ... 1 ... 1 1110 . 100 .... @2op_fp_scalar
+  # The U bit (28) is don't-care because it does not affect the result
+  VMLAS           111 - 1110 0 . .. ... 1 ... 1 1110 . 100 .... @2scalar
+}
+
+VQRDMLAH         1110 1110 0 . .. ... 0 ... 0 1110 . 100 .... @2scalar
+VQRDMLASH        1110 1110 0 . .. ... 0 ... 1 1110 . 100 .... @2scalar
+VQDMLAH          1110 1110 0 . .. ... 0 ... 0 1110 . 110 .... @2scalar
+VQDMLASH         1110 1110 0 . .. ... 0 ... 1 1110 . 110 .... @2scalar
+
+# Vector add across vector
+{
+  VADDV          111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
+  VADDLV         111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \
+                 rdahi=%rdahi rdalo=%rdalo
+}
+
+@vabav           .... .... .. size:2 .... rda:4 .... .... .... &vabav qn=%qn qm=%qm
+
+VABAV_S          111 0 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
+VABAV_U          111 1 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
+
+# Logical immediate operations (1 reg and modified-immediate)
+
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
+# not in a way we can conveniently represent in decodetree without
+# a lot of repetition:
+# VORR: op=0, (cmode & 1) && cmode < 12
+# VBIC: op=1, (cmode & 1) && cmode < 12
+# VMOV: everything else
+# So we have a single decode line and check the cmode/op in the
+# trans function.
+Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
+
+# Shifts by immediate
+
+VSHLI             111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
+VSHLI             111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
+VSHLI             111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLI_S          111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
+VQSHLI_S          111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
+VQSHLI_S          111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLI_U          111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
+VQSHLI_U          111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
+VQSHLI_U          111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLUI           111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
+VQSHLUI           111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
+VQSHLUI           111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
+
+VSHRI_S           111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
+VSHRI_S           111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
+VSHRI_S           111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
+
+VSHRI_U           111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
+VSHRI_U           111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
+VSHRI_U           111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
+
+VRSHRI_S          111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
+VRSHRI_S          111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
+VRSHRI_S          111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
+
+VRSHRI_U          111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
+VRSHRI_U          111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
+VRSHRI_U          111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
+
+# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
+# Note that VMOVL is encoded as "VSHLL with a zero shift count"; we
+# implement it that way rather than special-casing it in the decode.
+VSHLL_BS          111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_BS          111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_BU          111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_BU          111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_TS          111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_TS          111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_TU          111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_TU          111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
+
+# Shift-and-insert
+VSRI              111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b
+VSRI              111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h
+VSRI              111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
+
+VSLI              111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
+VSLI              111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
+VSLI              111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
+
+# Narrowing shifts (which only support b and h sizes)
+VSHRNB            111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
+VSHRNB            111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
+VSHRNT            111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
+VSHRNT            111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
+
+VRSHRNB           111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
+VRSHRNB           111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
+VRSHRNT           111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
+VRSHRNT           111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
+
+VQSHRNB_S         111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNB_S         111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNT_S         111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNT_S         111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNB_U         111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNB_U         111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNT_U         111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNT_U         111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
+
+VQSHRUNB          111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
+VQSHRUNB          111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
+VQSHRUNT          111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
+VQSHRUNT          111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
+
+VQRSHRNB_S        111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNB_S        111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNT_S        111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNT_S        111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNB_U        111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNB_U        111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNT_U        111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNT_U        111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
+
+VQRSHRUNB         111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
+VQRSHRUNB         111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
+VQRSHRUNT         111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
+VQRSHRUNT         111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
+
+VSHLC             111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd
+
+# Comparisons. We expand out the conditions which are split across
+# encodings T1, T2, T3 and the fc bits. These include VPT, which is
+# effectively "VCMP then VPST". A plain "VCMP" has a mask field of zero.
+{
+  VCMPEQ_fp       111 . 1110 0 . 11 ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp_fp
+  VCMPEQ          111 1 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp
+}
+
+{
+  VCMPNE_fp       111 . 1110 0 . 11 ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp_fp
+  VCMPNE          111 1 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp
+}
+
+{
+  VCMPGE_fp       111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp_fp
+  VCMPGE          111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp
+}
+
+{
+  VCMPLT_fp       111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp_fp
+  VCMPLT          111 1 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp
+}
+
+{
+  VCMPGT_fp       111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp_fp
+  VCMPGT          111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp
+}
+
+{
+  VCMPLE_fp         111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp_fp
+  VCMPLE            1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp
+}
+
+{
+  VPSEL           1111 1110 0 . 11 ... 1 ... 0 1111 . 0 . 0 ... 1 @2op_nosz
+  VCMPCS          1111 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 1 @vcmp
+  VCMPHI          1111 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 1 @vcmp
+}
+
+{
+  VPNOT            1111 1110 0 0 11 000 1 000 0 1111 0100 1101
+  VPST             1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
+  VCMPEQ_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=1
+  VCMPEQ_scalar    1111 1110 0 . .. ... 1 ... 0 1111 0100 .... @vcmp_scalar
+}
+
+{
+  VCMPNE_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=1
+  VCMPNE_scalar    1111 1110 0 . .. ... 1 ... 0 1111 1100 .... @vcmp_scalar
+}
+
+{
+  VCMPGT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=1
+  VCMPGT_scalar    1111 1110 0 . .. ... 1 ... 1 1111 0110 .... @vcmp_scalar
+}
+
+{
+  VCMPLE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=1
+  VCMPLE_scalar    1111 1110 0 . .. ... 1 ... 1 1111 1110 .... @vcmp_scalar
+}
+
+{
+  VCMPGE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=1
+  VCMPGE_scalar    1111 1110 0 . .. ... 1 ... 1 1111 0100 .... @vcmp_scalar
+}
+{
+  VCMPLT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=1
+  VCMPLT_scalar    1111 1110 0 . .. ... 1 ... 1 1111 1100 .... @vcmp_scalar
+}
+
+VCMPCS_scalar     1111 1110 0 . .. ... 1 ... 0 1111 0 1 1 0 .... @vcmp_scalar
+VCMPHI_scalar     1111 1110 0 . .. ... 1 ... 0 1111 1 1 1 0 .... @vcmp_scalar
+
+# 2-operand FP
+VADD_fp           1110 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+VSUB_fp           1110 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+VMUL_fp           1111 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 1 ... 0 @2op_fp
+VABD_fp           1111 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+
+VMAXNM            1111 1111 0 . 0 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
+VMINNM            1111 1111 0 . 1 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
+
+VCADD90_fp        1111 1100 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCADD270_fp       1111 1101 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+
+VFMA              1110 1111 0 . 0 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
+VFMS              1110 1111 0 . 1 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
+
+VCMLA0            1111 110 00 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA90           1111 110 01 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA180          1111 110 10 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA270          1111 110 11 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+
+# floating-point <-> fixed-point conversions. Naming convention:
+# VCVT_<from><to>, S = signed int, U = unsigned int, H = halfprec, F = singleprec
+@vcvt             .... .... .. 1 ..... .... .. 1 . .... .... &2shift \
+                  qd=%qd qm=%qm shift=%rshift_i5 size=2
+@vcvt_f16         .... .... .. 11 .... .... .. 0 . .... .... &2shift \
+                  qd=%qd qm=%qm shift=%rshift_i4 size=1
+
+VCVT_SH_fixed     1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
+VCVT_UH_fixed     1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
+
+VCVT_HS_fixed     1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
+VCVT_HU_fixed     1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
+
+VCVT_SF_fixed     1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
+VCVT_UF_fixed     1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
+
+VCVT_FS_fixed     1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
+VCVT_FU_fixed     1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
+
+# VCVT between floating point and integer (halfprec and single);
+# VCVT_<from><to>, S = signed int, U = unsigned int, F = float
+VCVT_SF           1111 1111 1 . 11 .. 11 ... 0 011 00 1 . 0 ... 0 @1op
+VCVT_UF           1111 1111 1 . 11 .. 11 ... 0 011 01 1 . 0 ... 0 @1op
+VCVT_FS           1111 1111 1 . 11 .. 11 ... 0 011 10 1 . 0 ... 0 @1op
+VCVT_FU           1111 1111 1 . 11 .. 11 ... 0 011 11 1 . 0 ... 0 @1op
+
+# VCVT from floating point to integer with specified rounding mode
+VCVTAS            1111 1111 1 . 11 .. 11 ... 000 00 0 1 . 0 ... 0 @1op
+VCVTAU            1111 1111 1 . 11 .. 11 ... 000 00 1 1 . 0 ... 0 @1op
+VCVTNS            1111 1111 1 . 11 .. 11 ... 000 01 0 1 . 0 ... 0 @1op
+VCVTNU            1111 1111 1 . 11 .. 11 ... 000 01 1 1 . 0 ... 0 @1op
+VCVTPS            1111 1111 1 . 11 .. 11 ... 000 10 0 1 . 0 ... 0 @1op
+VCVTPU            1111 1111 1 . 11 .. 11 ... 000 10 1 1 . 0 ... 0 @1op
+VCVTMS            1111 1111 1 . 11 .. 11 ... 000 11 0 1 . 0 ... 0 @1op
+VCVTMU            1111 1111 1 . 11 .. 11 ... 000 11 1 1 . 0 ... 0 @1op
+
+VRINTN            1111 1111 1 . 11 .. 10 ... 001 000 1 . 0 ... 0 @1op
+VRINTX            1111 1111 1 . 11 .. 10 ... 001 001 1 . 0 ... 0 @1op
+VRINTA            1111 1111 1 . 11 .. 10 ... 001 010 1 . 0 ... 0 @1op
+VRINTZ            1111 1111 1 . 11 .. 10 ... 001 011 1 . 0 ... 0 @1op
+VRINTM            1111 1111 1 . 11 .. 10 ... 001 101 1 . 0 ... 0 @1op
+VRINTP            1111 1111 1 . 11 .. 10 ... 001 111 1 . 0 ... 0 @1op
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
new file mode 100644
index 00000000000..846962bf4c5
--- /dev/null
+++ b/target/arm/mve_helper.c
@@ -0,0 +1,3450 @@
+/*
+ * M-profile MVE Operations
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "vec_internal.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+#include "fpu/softfloat.h"
+
+static uint16_t mve_eci_mask(CPUARMState *env)
+{
+    /*
+     * Return the mask of which elements in the MVE vector correspond
+     * to beats being executed. The mask has 1 bits for executed lanes
+     * and 0 bits where ECI says this beat was already executed.
+     */
+    int eci;
+
+    if ((env->condexec_bits & 0xf) != 0) {
+        return 0xffff;
+    }
+
+    eci = env->condexec_bits >> 4;
+    switch (eci) {
+    case ECI_NONE:
+        return 0xffff;
+    case ECI_A0:
+        return 0xfff0;
+    case ECI_A0A1:
+        return 0xff00;
+    case ECI_A0A1A2:
+    case ECI_A0A1A2B0:
+        return 0xf000;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static uint16_t mve_element_mask(CPUARMState *env)
+{
+    /*
+     * Return the mask of which elements in the MVE vector should be
+     * updated. This is a combination of multiple things:
+     *  (1) by default, we update every lane in the vector
+     *  (2) VPT predication stores its state in the VPR register;
+     *  (3) low-overhead-branch tail predication will mask out part
+     *      the vector on the final iteration of the loop
+     *  (4) if EPSR.ECI is set then we must execute only some beats
+     *      of the insn
+     * We combine all these into a 16-bit result with the same semantics
+     * as VPR.P0: 0 to mask the lane, 1 if it is active.
+     * 8-bit vector ops will look at all bits of the result;
+     * 16-bit ops will look at bits 0, 2, 4, ...;
+     * 32-bit ops will look at bits 0, 4, 8 and 12.
+     * Compare pseudocode GetCurInstrBeat(), though that only returns
+     * the 4-bit slice of the mask corresponding to a single beat.
+     */
+    uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
+
+    if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) {
+        mask |= 0xff;
+    }
+    if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) {
+        mask |= 0xff00;
+    }
+
+    if (env->v7m.ltpsize < 4 &&
+        env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) {
+        /*
+         * Tail predication active, and this is the last loop iteration.
+         * The element size is (1 << ltpsize), and we only want to process
+         * loopcount elements, so we want to retain the least significant
+         * (loopcount * esize) predicate bits and zero out bits above that.
+         */
+        int masklen = env->regs[14] << env->v7m.ltpsize;
+        assert(masklen <= 16);
+        uint16_t ltpmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
+        mask &= ltpmask;
+    }
+
+    /*
+     * ECI bits indicate which beats are already executed;
+     * we handle this by effectively predicating them out.
+     */
+    mask &= mve_eci_mask(env);
+    return mask;
+}
+
+static void mve_advance_vpt(CPUARMState *env)
+{
+    /* Advance the VPT and ECI state if necessary */
+    uint32_t vpr = env->v7m.vpr;
+    unsigned mask01, mask23;
+    uint16_t inv_mask;
+    uint16_t eci_mask = mve_eci_mask(env);
+
+    if ((env->condexec_bits & 0xf) == 0) {
+        env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ?
+            (ECI_A0 << 4) : (ECI_NONE << 4);
+    }
+
+    if (!(vpr & (R_V7M_VPR_MASK01_MASK | R_V7M_VPR_MASK23_MASK))) {
+        /* VPT not enabled, nothing to do */
+        return;
+    }
+
+    /* Invert P0 bits if needed, but only for beats we actually executed */
+    mask01 = FIELD_EX32(vpr, V7M_VPR, MASK01);
+    mask23 = FIELD_EX32(vpr, V7M_VPR, MASK23);
+    /* Start by assuming we invert all bits corresponding to executed beats */
+    inv_mask = eci_mask;
+    if (mask01 <= 8) {
+        /* MASK01 says don't invert low half of P0 */
+        inv_mask &= ~0xff;
+    }
+    if (mask23 <= 8) {
+        /* MASK23 says don't invert high half of P0 */
+        inv_mask &= ~0xff00;
+    }
+    vpr ^= inv_mask;
+    /* Only update MASK01 if beat 1 executed */
+    if (eci_mask & 0xf0) {
+        vpr = FIELD_DP32(vpr, V7M_VPR, MASK01, mask01 << 1);
+    }
+    /* Beat 3 always executes, so update MASK23 */
+    vpr = FIELD_DP32(vpr, V7M_VPR, MASK23, mask23 << 1);
+    env->v7m.vpr = vpr;
+}
+
+/* For loads, predicated lanes are zeroed instead of keeping their old values */
+#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE)                         \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr)    \
+    {                                                                   \
+        TYPE *d = vd;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        unsigned b, e;                                                  \
+        /*                                                              \
+         * R_SXTM allows the dest reg to become UNKNOWN for abandoned   \
+         * beats so we don't care if we update part of the dest and     \
+         * then take an exception.                                      \
+         */                                                             \
+        for (b = 0, e = 0; b < 16; b += ESIZE, e++) {                   \
+            if (eci_mask & (1 << b)) {                                  \
+                d[H##ESIZE(e)] = (mask & (1 << b)) ?                    \
+                    cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0;     \
+            }                                                           \
+            addr += MSIZE;                                              \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE)                         \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr)    \
+    {                                                                   \
+        TYPE *d = vd;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned b, e;                                                  \
+        for (b = 0, e = 0; b < 16; b += ESIZE, e++) {                   \
+            if (mask & (1 << b)) {                                      \
+                cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+            }                                                           \
+            addr += MSIZE;                                              \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VLDR(vldrb, 1, ldub, 1, uint8_t)
+DO_VLDR(vldrh, 2, lduw, 2, uint16_t)
+DO_VLDR(vldrw, 4, ldl, 4, uint32_t)
+
+DO_VSTR(vstrb, 1, stb, 1, uint8_t)
+DO_VSTR(vstrh, 2, stw, 2, uint16_t)
+DO_VSTR(vstrw, 4, stl, 4, uint32_t)
+
+DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t)
+DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t)
+DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t)
+DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t)
+DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t)
+DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t)
+
+DO_VSTR(vstrb_h, 1, stb, 2, int16_t)
+DO_VSTR(vstrb_w, 1, stb, 4, int32_t)
+DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
+
+#undef DO_VLDR
+#undef DO_VSTR
+
+/*
+ * Gather loads/scatter stores. Here each element of Qm specifies
+ * an offset to use from the base register Rm. In the _os_ versions
+ * that offset is scaled by the element size.
+ * For loads, predicated lanes are zeroed instead of retaining
+ * their previous values.
+ */
+#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)        \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm,         \
+                          uint32_t base)                                \
+    {                                                                   \
+        TYPE *d = vd;                                                   \
+        OFFTYPE *m = vm;                                                \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        unsigned e;                                                     \
+        uint32_t addr;                                                  \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
+            if (!(eci_mask & 1)) {                                      \
+                continue;                                               \
+            }                                                           \
+            addr = ADDRFN(base, m[H##ESIZE(e)]);                        \
+            d[H##ESIZE(e)] = (mask & 1) ?                               \
+                cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0;         \
+            if (WB) {                                                   \
+                m[H##ESIZE(e)] = addr;                                  \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* We know here TYPE is unsigned so always the same as the offset type */
+#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB)                 \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm,         \
+                          uint32_t base)                                \
+    {                                                                   \
+        TYPE *d = vd;                                                   \
+        TYPE *m = vm;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        unsigned e;                                                     \
+        uint32_t addr;                                                  \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
+            if (!(eci_mask & 1)) {                                      \
+                continue;                                               \
+            }                                                           \
+            addr = ADDRFN(base, m[H##ESIZE(e)]);                        \
+            if (mask & 1) {                                             \
+                cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+            }                                                           \
+            if (WB) {                                                   \
+                m[H##ESIZE(e)] = addr;                                  \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/*
+ * 64-bit accesses are slightly different: they are done as two 32-bit
+ * accesses, controlled by the predicate mask for the relevant beat,
+ * and with a single 32-bit offset in the first of the two Qm elements.
+ * Note that for QEMU our IMPDEF AIRCR.ENDIANNESS is always 0 (little).
+ * Address writeback happens on the odd beats and updates the address
+ * stored in the even-beat element.
+ */
+#define DO_VLDR64_SG(OP, ADDRFN, WB)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm,         \
+                          uint32_t base)                                \
+    {                                                                   \
+        uint32_t *d = vd;                                               \
+        uint32_t *m = vm;                                               \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        unsigned e;                                                     \
+        uint32_t addr;                                                  \
+        for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) {      \
+            if (!(eci_mask & 1)) {                                      \
+                continue;                                               \
+            }                                                           \
+            addr = ADDRFN(base, m[H4(e & ~1)]);                         \
+            addr += 4 * (e & 1);                                        \
+            d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
+            if (WB && (e & 1)) {                                        \
+                m[H4(e & ~1)] = addr - 4;                               \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VSTR64_SG(OP, ADDRFN, WB)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm,         \
+                          uint32_t base)                                \
+    {                                                                   \
+        uint32_t *d = vd;                                               \
+        uint32_t *m = vm;                                               \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        unsigned e;                                                     \
+        uint32_t addr;                                                  \
+        for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) {      \
+            if (!(eci_mask & 1)) {                                      \
+                continue;                                               \
+            }                                                           \
+            addr = ADDRFN(base, m[H4(e & ~1)]);                         \
+            addr += 4 * (e & 1);                                        \
+            if (mask & 1) {                                             \
+                cpu_stl_data_ra(env, addr, d[H4(e)], GETPC());          \
+            }                                                           \
+            if (WB && (e & 1)) {                                        \
+                m[H4(e & ~1)] = addr - 4;                               \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define ADDR_ADD(BASE, OFFSET) ((BASE) + (OFFSET))
+#define ADDR_ADD_OSH(BASE, OFFSET) ((BASE) + ((OFFSET) << 1))
+#define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2))
+#define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3))
+
+DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false)
+DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false)
+
+DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false)
+DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
+DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true)
+DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true)
+DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
+
+/*
+ * Deinterleaving loads/interleaving stores.
+ *
+ * For these helpers we are passed the index of the first Qreg
+ * (VLD2/VST2 will also access Qn+1, VLD4/VST4 access Qn .. Qn+3)
+ * and the value of the base address register Rn.
+ * The helpers are specialized for pattern and element size, so
+ * for instance vld42h is VLD4 with pattern 2, element size MO_16.
+ *
+ * These insns are beatwise but not predicated, so we must honour ECI,
+ * but need not look at mve_element_mask().
+ *
+ * The pseudocode implements these insns with multiple memory accesses
+ * of the element size, but rules R_VVVG and R_FXDM permit us to make
+ * one 32-bit memory access per beat.
+ */
+#define DO_VLD4B(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat, e;                                                    \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            for (e = 0; e < 4; e++, data >>= 8) {                       \
+                uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
+                qd[H1(off[beat])] = data;                               \
+            }                                                           \
+        }                                                               \
+    }
+
+#define DO_VLD4H(OP, O1, O2)                                            \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O1, O2, O2 };               \
+        uint32_t addr, data;                                            \
+        int y; /* y counts 0 2 0 2 */                                   \
+        uint16_t *qd;                                                   \
+        for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) {   \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 8 + (beat & 1) * 4;               \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y);             \
+            qd[H2(off[beat])] = data;                                   \
+            data >>= 16;                                                \
+            qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1);         \
+            qd[H2(off[beat])] = data;                                   \
+        }                                                               \
+    }
+
+#define DO_VLD4W(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint32_t *qd;                                                   \
+        int y;                                                          \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            y = (beat + (O1 & 2)) & 3;                                  \
+            qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y);             \
+            qd[H4(off[beat] >> 2)] = data;                              \
+        }                                                               \
+    }
+
+DO_VLD4B(vld40b, 0, 1, 10, 11)
+DO_VLD4B(vld41b, 2, 3, 12, 13)
+DO_VLD4B(vld42b, 4, 5, 14, 15)
+DO_VLD4B(vld43b, 6, 7, 8, 9)
+
+DO_VLD4H(vld40h, 0, 5)
+DO_VLD4H(vld41h, 1, 6)
+DO_VLD4H(vld42h, 2, 7)
+DO_VLD4H(vld43h, 3, 4)
+
+DO_VLD4W(vld40w, 0, 1, 10, 11)
+DO_VLD4W(vld41w, 2, 3, 12, 13)
+DO_VLD4W(vld42w, 4, 5, 14, 15)
+DO_VLD4W(vld43w, 6, 7, 8, 9)
+
+#define DO_VLD2B(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat, e;                                                    \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint8_t *qd;                                                    \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 2;                                \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            for (e = 0; e < 4; e++, data >>= 8) {                       \
+                qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1));    \
+                qd[H1(off[beat] + (e >> 1))] = data;                    \
+            }                                                           \
+        }                                                               \
+    }
+
+#define DO_VLD2H(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        int e;                                                          \
+        uint16_t *qd;                                                   \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            for (e = 0; e < 2; e++, data >>= 16) {                      \
+                qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e);         \
+                qd[H2(off[beat])] = data;                               \
+            }                                                           \
+        }                                                               \
+    }
+
+#define DO_VLD2W(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint32_t *qd;                                                   \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat];                                    \
+            data = cpu_ldl_le_data_ra(env, addr, GETPC());              \
+            qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1));    \
+            qd[H4(off[beat] >> 3)] = data;                              \
+        }                                                               \
+    }
+
+DO_VLD2B(vld20b, 0, 2, 12, 14)
+DO_VLD2B(vld21b, 4, 6, 8, 10)
+
+DO_VLD2H(vld20h, 0, 1, 6, 7)
+DO_VLD2H(vld21h, 2, 3, 4, 5)
+
+DO_VLD2W(vld20w, 0, 4, 24, 28)
+DO_VLD2W(vld21w, 8, 12, 16, 20)
+
+#define DO_VST4B(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat, e;                                                    \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            data = 0;                                                   \
+            for (e = 3; e >= 0; e--) {                                  \
+                uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
+                data = (data << 8) | qd[H1(off[beat])];                 \
+            }                                                           \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+#define DO_VST4H(OP, O1, O2)                                            \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O1, O2, O2 };               \
+        uint32_t addr, data;                                            \
+        int y; /* y counts 0 2 0 2 */                                   \
+        uint16_t *qd;                                                   \
+        for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) {   \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 8 + (beat & 1) * 4;               \
+            qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y);             \
+            data = qd[H2(off[beat])];                                   \
+            qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1);         \
+            data |= qd[H2(off[beat])] << 16;                            \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+#define DO_VST4W(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint32_t *qd;                                                   \
+        int y;                                                          \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            y = (beat + (O1 & 2)) & 3;                                  \
+            qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y);             \
+            data = qd[H4(off[beat] >> 2)];                              \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+DO_VST4B(vst40b, 0, 1, 10, 11)
+DO_VST4B(vst41b, 2, 3, 12, 13)
+DO_VST4B(vst42b, 4, 5, 14, 15)
+DO_VST4B(vst43b, 6, 7, 8, 9)
+
+DO_VST4H(vst40h, 0, 5)
+DO_VST4H(vst41h, 1, 6)
+DO_VST4H(vst42h, 2, 7)
+DO_VST4H(vst43h, 3, 4)
+
+DO_VST4W(vst40w, 0, 1, 10, 11)
+DO_VST4W(vst41w, 2, 3, 12, 13)
+DO_VST4W(vst42w, 4, 5, 14, 15)
+DO_VST4W(vst43w, 6, 7, 8, 9)
+
+#define DO_VST2B(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat, e;                                                    \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint8_t *qd;                                                    \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 2;                                \
+            data = 0;                                                   \
+            for (e = 3; e >= 0; e--) {                                  \
+                qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1));    \
+                data = (data << 8) | qd[H1(off[beat] + (e >> 1))];      \
+            }                                                           \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+#define DO_VST2H(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        int e;                                                          \
+        uint16_t *qd;                                                   \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat] * 4;                                \
+            data = 0;                                                   \
+            for (e = 1; e >= 0; e--) {                                  \
+                qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e);         \
+                data = (data << 16) | qd[H2(off[beat])];                \
+            }                                                           \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+#define DO_VST2W(OP, O1, O2, O3, O4)                                    \
+    void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx,             \
+                          uint32_t base)                                \
+    {                                                                   \
+        int beat;                                                       \
+        uint16_t mask = mve_eci_mask(env);                              \
+        static const uint8_t off[4] = { O1, O2, O3, O4 };               \
+        uint32_t addr, data;                                            \
+        uint32_t *qd;                                                   \
+        for (beat = 0; beat < 4; beat++, mask >>= 4) {                  \
+            if ((mask & 1) == 0) {                                      \
+                /* ECI says skip this beat */                           \
+                continue;                                               \
+            }                                                           \
+            addr = base + off[beat];                                    \
+            qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1));    \
+            data = qd[H4(off[beat] >> 3)];                              \
+            cpu_stl_le_data_ra(env, addr, data, GETPC());               \
+        }                                                               \
+    }
+
+DO_VST2B(vst20b, 0, 2, 12, 14)
+DO_VST2B(vst21b, 4, 6, 8, 10)
+
+DO_VST2H(vst20h, 0, 1, 6, 7)
+DO_VST2H(vst21h, 2, 3, 4, 5)
+
+DO_VST2W(vst20w, 0, 4, 24, 28)
+DO_VST2W(vst21w, 8, 12, 16, 20)
+
+/*
+ * The mergemask(D, R, M) macro performs the operation "*D = R" but
+ * storing only the bytes which correspond to 1 bits in M,
+ * leaving other bytes in *D unchanged. We use _Generic
+ * to select the correct implementation based on the type of D.
+ */
+
+static void mergemask_ub(uint8_t *d, uint8_t r, uint16_t mask)
+{
+    if (mask & 1) {
+        *d = r;
+    }
+}
+
+static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask)
+{
+    mergemask_ub((uint8_t *)d, r, mask);
+}
+
+static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask)
+{
+    uint16_t bmask = expand_pred_b_data[mask & 3];
+    *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask)
+{
+    mergemask_uh((uint16_t *)d, r, mask);
+}
+
+static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask)
+{
+    uint32_t bmask = expand_pred_b_data[mask & 0xf];
+    *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask)
+{
+    mergemask_uw((uint32_t *)d, r, mask);
+}
+
+static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask)
+{
+    uint64_t bmask = expand_pred_b_data[mask & 0xff];
+    *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
+{
+    mergemask_uq((uint64_t *)d, r, mask);
+}
+
+#define mergemask(D, R, M)                      \
+    _Generic(D,                                 \
+             uint8_t *: mergemask_ub,           \
+             int8_t *:  mergemask_sb,           \
+             uint16_t *: mergemask_uh,          \
+             int16_t *:  mergemask_sh,          \
+             uint32_t *: mergemask_uw,          \
+             int32_t *:  mergemask_sw,          \
+             uint64_t *: mergemask_uq,          \
+             int64_t *:  mergemask_sq)(D, R, M)
+
+void HELPER(mve_vdup)(CPUARMState *env, void *vd, uint32_t val)
+{
+    /*
+     * The generated code already replicated an 8 or 16 bit constant
+     * into the 32-bit value, so we only need to write the 32-bit
+     * value to all elements of the Qreg, allowing for predication.
+     */
+    uint32_t *d = vd;
+    uint16_t mask = mve_element_mask(env);
+    unsigned e;
+    for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+        mergemask(&d[H4(e)], val, mask);
+    }
+    mve_advance_vpt(env);
+}
+
+#define DO_1OP(OP, ESIZE, TYPE, FN)                                     \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm)         \
+    {                                                                   \
+        TYPE *d = vd, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)]), mask);       \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_CLS_B(N)   (clrsb32(N) - 24)
+#define DO_CLS_H(N)   (clrsb32(N) - 16)
+
+DO_1OP(vclsb, 1, int8_t, DO_CLS_B)
+DO_1OP(vclsh, 2, int16_t, DO_CLS_H)
+DO_1OP(vclsw, 4, int32_t, clrsb32)
+
+#define DO_CLZ_B(N)   (clz32(N) - 24)
+#define DO_CLZ_H(N)   (clz32(N) - 16)
+
+DO_1OP(vclzb, 1, uint8_t, DO_CLZ_B)
+DO_1OP(vclzh, 2, uint16_t, DO_CLZ_H)
+DO_1OP(vclzw, 4, uint32_t, clz32)
+
+DO_1OP(vrev16b, 2, uint16_t, bswap16)
+DO_1OP(vrev32b, 4, uint32_t, bswap32)
+DO_1OP(vrev32h, 4, uint32_t, hswap32)
+DO_1OP(vrev64b, 8, uint64_t, bswap64)
+DO_1OP(vrev64h, 8, uint64_t, hswap64)
+DO_1OP(vrev64w, 8, uint64_t, wswap64)
+
+#define DO_NOT(N) (~(N))
+
+DO_1OP(vmvn, 8, uint64_t, DO_NOT)
+
+#define DO_ABS(N) ((N) < 0 ? -(N) : (N))
+#define DO_FABSH(N)  ((N) & dup_const(MO_16, 0x7fff))
+#define DO_FABSS(N)  ((N) & dup_const(MO_32, 0x7fffffff))
+
+DO_1OP(vabsb, 1, int8_t, DO_ABS)
+DO_1OP(vabsh, 2, int16_t, DO_ABS)
+DO_1OP(vabsw, 4, int32_t, DO_ABS)
+
+/* We can do these 64 bits at a time */
+DO_1OP(vfabsh, 8, uint64_t, DO_FABSH)
+DO_1OP(vfabss, 8, uint64_t, DO_FABSS)
+
+#define DO_NEG(N)    (-(N))
+#define DO_FNEGH(N) ((N) ^ dup_const(MO_16, 0x8000))
+#define DO_FNEGS(N) ((N) ^ dup_const(MO_32, 0x80000000))
+
+DO_1OP(vnegb, 1, int8_t, DO_NEG)
+DO_1OP(vnegh, 2, int16_t, DO_NEG)
+DO_1OP(vnegw, 4, int32_t, DO_NEG)
+
+/* We can do these 64 bits at a time */
+DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
+DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
+
+/*
+ * 1 operand immediates: Vda is destination and possibly also one source.
+ * All these insns work at 64-bit widths.
+ */
+#define DO_1OP_IMM(OP, FN)                                              \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vda, uint64_t imm)    \
+    {                                                                   \
+        uint64_t *da = vda;                                             \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / 8; e++, mask >>= 8) {                      \
+            mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask);            \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_MOVI(N, I) (I)
+#define DO_ANDI(N, I) ((N) & (I))
+#define DO_ORRI(N, I) ((N) | (I))
+
+DO_1OP_IMM(vmovi, DO_MOVI)
+DO_1OP_IMM(vandi, DO_ANDI)
+DO_1OP_IMM(vorri, DO_ORRI)
+
+#define DO_2OP(OP, ESIZE, TYPE, FN)                                     \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, void *vm)           \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)],                                  \
+                      FN(n[H##ESIZE(e)], m[H##ESIZE(e)]), mask);        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* provide unsigned 2-op helpers for all sizes */
+#define DO_2OP_U(OP, FN)                        \
+    DO_2OP(OP##b, 1, uint8_t, FN)               \
+    DO_2OP(OP##h, 2, uint16_t, FN)              \
+    DO_2OP(OP##w, 4, uint32_t, FN)
+
+/* provide signed 2-op helpers for all sizes */
+#define DO_2OP_S(OP, FN)                        \
+    DO_2OP(OP##b, 1, int8_t, FN)                \
+    DO_2OP(OP##h, 2, int16_t, FN)               \
+    DO_2OP(OP##w, 4, int32_t, FN)
+
+/*
+ * "Long" operations where two half-sized inputs (taken from either the
+ * top or the bottom of the input vector) produce a double-width result.
+ * Here ESIZE, TYPE are for the input, and LESIZE, LTYPE for the output.
+ */
+#define DO_2OP_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN)               \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+    {                                                                   \
+        LTYPE *d = vd;                                                  \
+        TYPE *n = vn, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned le;                                                    \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)],              \
+                         m[H##ESIZE(le * 2 + TOP)]);                    \
+            mergemask(&d[H##LESIZE(le)], r, mask);                      \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_2OP_SAT(OP, ESIZE, TYPE, FN)                                 \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        bool qc = false;                                                \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            bool sat = false;                                           \
+            TYPE r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat);          \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+            qc |= sat & mask & 1;                                       \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* provide unsigned 2-op helpers for all sizes */
+#define DO_2OP_SAT_U(OP, FN)                    \
+    DO_2OP_SAT(OP##b, 1, uint8_t, FN)           \
+    DO_2OP_SAT(OP##h, 2, uint16_t, FN)          \
+    DO_2OP_SAT(OP##w, 4, uint32_t, FN)
+
+/* provide signed 2-op helpers for all sizes */
+#define DO_2OP_SAT_S(OP, FN)                    \
+    DO_2OP_SAT(OP##b, 1, int8_t, FN)            \
+    DO_2OP_SAT(OP##h, 2, int16_t, FN)           \
+    DO_2OP_SAT(OP##w, 4, int32_t, FN)
+
+#define DO_AND(N, M)  ((N) & (M))
+#define DO_BIC(N, M)  ((N) & ~(M))
+#define DO_ORR(N, M)  ((N) | (M))
+#define DO_ORN(N, M)  ((N) | ~(M))
+#define DO_EOR(N, M)  ((N) ^ (M))
+
+DO_2OP(vand, 8, uint64_t, DO_AND)
+DO_2OP(vbic, 8, uint64_t, DO_BIC)
+DO_2OP(vorr, 8, uint64_t, DO_ORR)
+DO_2OP(vorn, 8, uint64_t, DO_ORN)
+DO_2OP(veor, 8, uint64_t, DO_EOR)
+
+#define DO_ADD(N, M) ((N) + (M))
+#define DO_SUB(N, M) ((N) - (M))
+#define DO_MUL(N, M) ((N) * (M))
+
+DO_2OP_U(vadd, DO_ADD)
+DO_2OP_U(vsub, DO_SUB)
+DO_2OP_U(vmul, DO_MUL)
+
+DO_2OP_L(vmullbsb, 0, 1, int8_t, 2, int16_t, DO_MUL)
+DO_2OP_L(vmullbsh, 0, 2, int16_t, 4, int32_t, DO_MUL)
+DO_2OP_L(vmullbsw, 0, 4, int32_t, 8, int64_t, DO_MUL)
+DO_2OP_L(vmullbub, 0, 1, uint8_t, 2, uint16_t, DO_MUL)
+DO_2OP_L(vmullbuh, 0, 2, uint16_t, 4, uint32_t, DO_MUL)
+DO_2OP_L(vmullbuw, 0, 4, uint32_t, 8, uint64_t, DO_MUL)
+
+DO_2OP_L(vmulltsb, 1, 1, int8_t, 2, int16_t, DO_MUL)
+DO_2OP_L(vmulltsh, 1, 2, int16_t, 4, int32_t, DO_MUL)
+DO_2OP_L(vmulltsw, 1, 4, int32_t, 8, int64_t, DO_MUL)
+DO_2OP_L(vmulltub, 1, 1, uint8_t, 2, uint16_t, DO_MUL)
+DO_2OP_L(vmulltuh, 1, 2, uint16_t, 4, uint32_t, DO_MUL)
+DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
+
+/*
+ * Polynomial multiply. We can always do this generating 64 bits
+ * of the result at a time, so we don't need to use DO_2OP_L.
+ */
+#define VMULLPH_MASK 0x00ff00ff00ff00ffULL
+#define VMULLPW_MASK 0x0000ffff0000ffffULL
+#define DO_VMULLPBH(N, M) pmull_h((N) & VMULLPH_MASK, (M) & VMULLPH_MASK)
+#define DO_VMULLPTH(N, M) DO_VMULLPBH((N) >> 8, (M) >> 8)
+#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK)
+#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16)
+
+DO_2OP(vmullpbh, 8, uint64_t, DO_VMULLPBH)
+DO_2OP(vmullpth, 8, uint64_t, DO_VMULLPTH)
+DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW)
+DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW)
+
+/*
+ * Because the computation type is at least twice as large as required,
+ * these work for both signed and unsigned source types.
+ */
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
+{
+    return (n * m) >> 8;
+}
+
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
+{
+    return (n * m) >> 16;
+}
+
+static inline uint32_t do_mulh_w(int64_t n, int64_t m)
+{
+    return (n * m) >> 32;
+}
+
+static inline uint8_t do_rmulh_b(int32_t n, int32_t m)
+{
+    return (n * m + (1U << 7)) >> 8;
+}
+
+static inline uint16_t do_rmulh_h(int32_t n, int32_t m)
+{
+    return (n * m + (1U << 15)) >> 16;
+}
+
+static inline uint32_t do_rmulh_w(int64_t n, int64_t m)
+{
+    return (n * m + (1U << 31)) >> 32;
+}
+
+DO_2OP(vmulhsb, 1, int8_t, do_mulh_b)
+DO_2OP(vmulhsh, 2, int16_t, do_mulh_h)
+DO_2OP(vmulhsw, 4, int32_t, do_mulh_w)
+DO_2OP(vmulhub, 1, uint8_t, do_mulh_b)
+DO_2OP(vmulhuh, 2, uint16_t, do_mulh_h)
+DO_2OP(vmulhuw, 4, uint32_t, do_mulh_w)
+
+DO_2OP(vrmulhsb, 1, int8_t, do_rmulh_b)
+DO_2OP(vrmulhsh, 2, int16_t, do_rmulh_h)
+DO_2OP(vrmulhsw, 4, int32_t, do_rmulh_w)
+DO_2OP(vrmulhub, 1, uint8_t, do_rmulh_b)
+DO_2OP(vrmulhuh, 2, uint16_t, do_rmulh_h)
+DO_2OP(vrmulhuw, 4, uint32_t, do_rmulh_w)
+
+#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
+
+DO_2OP_S(vmaxs, DO_MAX)
+DO_2OP_U(vmaxu, DO_MAX)
+DO_2OP_S(vmins, DO_MIN)
+DO_2OP_U(vminu, DO_MIN)
+
+#define DO_ABD(N, M)  ((N) >= (M) ? (N) - (M) : (M) - (N))
+
+DO_2OP_S(vabds, DO_ABD)
+DO_2OP_U(vabdu, DO_ABD)
+
+static inline uint32_t do_vhadd_u(uint32_t n, uint32_t m)
+{
+    return ((uint64_t)n + m) >> 1;
+}
+
+static inline int32_t do_vhadd_s(int32_t n, int32_t m)
+{
+    return ((int64_t)n + m) >> 1;
+}
+
+static inline uint32_t do_vhsub_u(uint32_t n, uint32_t m)
+{
+    return ((uint64_t)n - m) >> 1;
+}
+
+static inline int32_t do_vhsub_s(int32_t n, int32_t m)
+{
+    return ((int64_t)n - m) >> 1;
+}
+
+DO_2OP_S(vhadds, do_vhadd_s)
+DO_2OP_U(vhaddu, do_vhadd_u)
+DO_2OP_S(vhsubs, do_vhsub_s)
+DO_2OP_U(vhsubu, do_vhsub_u)
+
+#define DO_VSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
+#define DO_VSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
+#define DO_VRSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
+#define DO_VRSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
+
+DO_2OP_S(vshls, DO_VSHLS)
+DO_2OP_U(vshlu, DO_VSHLU)
+DO_2OP_S(vrshls, DO_VRSHLS)
+DO_2OP_U(vrshlu, DO_VRSHLU)
+
+#define DO_RHADD_S(N, M) (((int64_t)(N) + (M) + 1) >> 1)
+#define DO_RHADD_U(N, M) (((uint64_t)(N) + (M) + 1) >> 1)
+
+DO_2OP_S(vrhadds, DO_RHADD_S)
+DO_2OP_U(vrhaddu, DO_RHADD_U)
+
+static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,
+                    uint32_t inv, uint32_t carry_in, bool update_flags)
+{
+    uint16_t mask = mve_element_mask(env);
+    unsigned e;
+
+    /* If any additions trigger, we will update flags. */
+    if (mask & 0x1111) {
+        update_flags = true;
+    }
+
+    for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+        uint64_t r = carry_in;
+        r += n[H4(e)];
+        r += m[H4(e)] ^ inv;
+        if (mask & 1) {
+            carry_in = r >> 32;
+        }
+        mergemask(&d[H4(e)], r, mask);
+    }
+
+    if (update_flags) {
+        /* Store C, clear NZV. */
+        env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
+        env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
+    }
+    mve_advance_vpt(env);
+}
+
+void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+    bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+    do_vadc(env, vd, vn, vm, 0, carry_in, false);
+}
+
+void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+    bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+    do_vadc(env, vd, vn, vm, -1, carry_in, false);
+}
+
+
+void HELPER(mve_vadci)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+    do_vadc(env, vd, vn, vm, 0, 0, true);
+}
+
+void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+    do_vadc(env, vd, vn, vm, -1, 1, true);
+}
+
+#define DO_VCADD(OP, ESIZE, TYPE, FN0, FN1)                             \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        TYPE r[16 / ESIZE];                                             \
+        /* Calculate all results first to avoid overwriting inputs */   \
+        for (e = 0; e < 16 / ESIZE; e++) {                              \
+            if (!(e & 1)) {                                             \
+                r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)]);         \
+            } else {                                                    \
+                r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]);         \
+            }                                                           \
+        }                                                               \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)], r[e], mask);                     \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCADD_ALL(OP, FN0, FN1)              \
+    DO_VCADD(OP##b, 1, int8_t, FN0, FN1)        \
+    DO_VCADD(OP##h, 2, int16_t, FN0, FN1)       \
+    DO_VCADD(OP##w, 4, int32_t, FN0, FN1)
+
+DO_VCADD_ALL(vcadd90, DO_SUB, DO_ADD)
+DO_VCADD_ALL(vcadd270, DO_ADD, DO_SUB)
+DO_VCADD_ALL(vhcadd90, do_vhsub_s, do_vhadd_s)
+DO_VCADD_ALL(vhcadd270, do_vhadd_s, do_vhsub_s)
+
+static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
+{
+    if (val > max) {
+        *s = true;
+        return max;
+    } else if (val < min) {
+        *s = true;
+        return min;
+    }
+    return val;
+}
+
+#define DO_SQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, INT8_MIN, INT8_MAX, s)
+#define DO_SQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, INT16_MIN, INT16_MAX, s)
+#define DO_SQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT8_MAX, s)
+#define DO_UQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT16_MAX, s)
+#define DO_UQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT32_MAX, s)
+
+#define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
+#define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
+#define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
+#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
+#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
+
+/*
+ * For QDMULH and QRDMULH we simplify "double and shift by esize" into
+ * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
+ */
+#define DO_QDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m) >> 7, \
+                                        INT8_MIN, INT8_MAX, s)
+#define DO_QDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m) >> 15, \
+                                        INT16_MIN, INT16_MAX, s)
+#define DO_QDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m) >> 31, \
+                                        INT32_MIN, INT32_MAX, s)
+
+#define DO_QRDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 6)) >> 7, \
+                                         INT8_MIN, INT8_MAX, s)
+#define DO_QRDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 14)) >> 15, \
+                                         INT16_MIN, INT16_MAX, s)
+#define DO_QRDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 30)) >> 31, \
+                                         INT32_MIN, INT32_MAX, s)
+
+DO_2OP_SAT(vqdmulhb, 1, int8_t, DO_QDMULH_B)
+DO_2OP_SAT(vqdmulhh, 2, int16_t, DO_QDMULH_H)
+DO_2OP_SAT(vqdmulhw, 4, int32_t, DO_QDMULH_W)
+
+DO_2OP_SAT(vqrdmulhb, 1, int8_t, DO_QRDMULH_B)
+DO_2OP_SAT(vqrdmulhh, 2, int16_t, DO_QRDMULH_H)
+DO_2OP_SAT(vqrdmulhw, 4, int32_t, DO_QRDMULH_W)
+
+DO_2OP_SAT(vqaddub, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT(vqadduh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT(vqadduw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT(vqaddsb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT(vqaddsh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT(vqaddsw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT(vqsubub, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT(vqsubuh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT(vqsubuw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT(vqsubsb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT(vqsubsh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
+
+/*
+ * This wrapper fixes up the impedance mismatch between do_sqrshl_bhs()
+ * and friends wanting a uint32_t* sat and our needing a bool*.
+ */
+#define WRAP_QRSHL_HELPER(FN, N, M, ROUND, satp)                        \
+    ({                                                                  \
+        uint32_t su32 = 0;                                              \
+        typeof(N) r = FN(N, (int8_t)(M), sizeof(N) * 8, ROUND, &su32);  \
+        if (su32) {                                                     \
+            *satp = true;                                               \
+        }                                                               \
+        r;                                                              \
+    })
+
+#define DO_SQSHL_OP(N, M, satp) \
+    WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, false, satp)
+#define DO_UQSHL_OP(N, M, satp) \
+    WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, false, satp)
+#define DO_SQRSHL_OP(N, M, satp) \
+    WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp)
+#define DO_UQRSHL_OP(N, M, satp) \
+    WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp)
+#define DO_SUQSHL_OP(N, M, satp) \
+    WRAP_QRSHL_HELPER(do_suqrshl_bhs, N, M, false, satp)
+
+DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
+DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
+DO_2OP_SAT_S(vqrshls, DO_SQRSHL_OP)
+DO_2OP_SAT_U(vqrshlu, DO_UQRSHL_OP)
+
+/*
+ * Multiply add dual returning high half
+ * The 'FN' here takes four inputs A, B, C, D, a 0/1 indicator of
+ * whether to add the rounding constant, and the pointer to the
+ * saturation flag, and should do "(A * B + C * D) * 2 + rounding constant",
+ * saturate to twice the input size and return the high half; or
+ * (A * B - C * D) etc for VQDMLSDH.
+ */
+#define DO_VQDMLADH_OP(OP, ESIZE, TYPE, XCHG, ROUND, FN)                \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                void *vm)                               \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        bool qc = false;                                                \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            bool sat = false;                                           \
+            if ((e & 1) == XCHG) {                                      \
+                TYPE r = FN(n[H##ESIZE(e)],                             \
+                            m[H##ESIZE(e - XCHG)],                      \
+                            n[H##ESIZE(e + (1 - 2 * XCHG))],            \
+                            m[H##ESIZE(e + (1 - XCHG))],                \
+                            ROUND, &sat);                               \
+                mergemask(&d[H##ESIZE(e)], r, mask);                    \
+                qc |= sat & mask & 1;                                   \
+            }                                                           \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+static int8_t do_vqdmladh_b(int8_t a, int8_t b, int8_t c, int8_t d,
+                            int round, bool *sat)
+{
+    int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 7);
+    return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmladh_h(int16_t a, int16_t b, int16_t c, int16_t d,
+                             int round, bool *sat)
+{
+    int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 15);
+    return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+                             int round, bool *sat)
+{
+    int64_t m1 = (int64_t)a * b;
+    int64_t m2 = (int64_t)c * d;
+    int64_t r;
+    /*
+     * Architecturally we should do the entire add, double, round
+     * and then check for saturation. We do three saturating adds,
+     * but we need to be careful about the order. If the first
+     * m1 + m2 saturates then it's impossible for the *2+rc to
+     * bring it back into the non-saturated range. However, if
+     * m1 + m2 is negative then it's possible that doing the doubling
+     * would take the intermediate result below INT64_MAX and the
+     * addition of the rounding constant then brings it back in range.
+     * So we add half the rounding constant before doubling rather
+     * than adding the rounding constant after the doubling.
+     */
+    if (sadd64_overflow(m1, m2, &r) ||
+        sadd64_overflow(r, (round << 30), &r) ||
+        sadd64_overflow(r, r, &r)) {
+        *sat = true;
+        return r < 0 ? INT32_MAX : INT32_MIN;
+    }
+    return r >> 32;
+}
+
+static int8_t do_vqdmlsdh_b(int8_t a, int8_t b, int8_t c, int8_t d,
+                            int round, bool *sat)
+{
+    int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7);
+    return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmlsdh_h(int16_t a, int16_t b, int16_t c, int16_t d,
+                             int round, bool *sat)
+{
+    int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15);
+    return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmlsdh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+                             int round, bool *sat)
+{
+    int64_t m1 = (int64_t)a * b;
+    int64_t m2 = (int64_t)c * d;
+    int64_t r;
+    /* The same ordering issue as in do_vqdmladh_w applies here too */
+    if (ssub64_overflow(m1, m2, &r) ||
+        sadd64_overflow(r, (round << 30), &r) ||
+        sadd64_overflow(r, r, &r)) {
+        *sat = true;
+        return r < 0 ? INT32_MAX : INT32_MIN;
+    }
+    return r >> 32;
+}
+
+DO_VQDMLADH_OP(vqdmladhb, 1, int8_t, 0, 0, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqdmladhh, 2, int16_t, 0, 0, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqdmladhw, 4, int32_t, 0, 0, do_vqdmladh_w)
+DO_VQDMLADH_OP(vqdmladhxb, 1, int8_t, 1, 0, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqdmladhxh, 2, int16_t, 1, 0, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqdmladhxw, 4, int32_t, 1, 0, do_vqdmladh_w)
+
+DO_VQDMLADH_OP(vqrdmladhb, 1, int8_t, 0, 1, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqrdmladhh, 2, int16_t, 0, 1, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqrdmladhw, 4, int32_t, 0, 1, do_vqdmladh_w)
+DO_VQDMLADH_OP(vqrdmladhxb, 1, int8_t, 1, 1, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqrdmladhxh, 2, int16_t, 1, 1, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqrdmladhxw, 4, int32_t, 1, 1, do_vqdmladh_w)
+
+DO_VQDMLADH_OP(vqdmlsdhb, 1, int8_t, 0, 0, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqdmlsdhh, 2, int16_t, 0, 0, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqdmlsdhw, 4, int32_t, 0, 0, do_vqdmlsdh_w)
+DO_VQDMLADH_OP(vqdmlsdhxb, 1, int8_t, 1, 0, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqdmlsdhxh, 2, int16_t, 1, 0, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqdmlsdhxw, 4, int32_t, 1, 0, do_vqdmlsdh_w)
+
+DO_VQDMLADH_OP(vqrdmlsdhb, 1, int8_t, 0, 1, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqrdmlsdhh, 2, int16_t, 0, 1, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqrdmlsdhw, 4, int32_t, 0, 1, do_vqdmlsdh_w)
+DO_VQDMLADH_OP(vqrdmlsdhxb, 1, int8_t, 1, 1, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqrdmlsdhxh, 2, int16_t, 1, 1, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqrdmlsdhxw, 4, int32_t, 1, 1, do_vqdmlsdh_w)
+
+#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN)                              \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE m = rm;                                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m), mask);    \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, FN)                          \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE m = rm;                                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        bool qc = false;                                                \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            bool sat = false;                                           \
+            mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat),     \
+                      mask);                                            \
+            qc |= sat & mask & 1;                                       \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* "accumulating" version where FN takes d as well as n and m */
+#define DO_2OP_ACC_SCALAR(OP, ESIZE, TYPE, FN)                          \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE m = rm;                                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)],                                  \
+                      FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m), mask);     \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_2OP_SAT_ACC_SCALAR(OP, ESIZE, TYPE, FN)                      \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE m = rm;                                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        bool qc = false;                                                \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            bool sat = false;                                           \
+            mergemask(&d[H##ESIZE(e)],                                  \
+                      FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m, &sat),      \
+                      mask);                                            \
+            qc |= sat & mask & 1;                                       \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* provide unsigned 2-op scalar helpers for all sizes */
+#define DO_2OP_SCALAR_U(OP, FN)                 \
+    DO_2OP_SCALAR(OP##b, 1, uint8_t, FN)        \
+    DO_2OP_SCALAR(OP##h, 2, uint16_t, FN)       \
+    DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
+#define DO_2OP_SCALAR_S(OP, FN)                 \
+    DO_2OP_SCALAR(OP##b, 1, int8_t, FN)         \
+    DO_2OP_SCALAR(OP##h, 2, int16_t, FN)        \
+    DO_2OP_SCALAR(OP##w, 4, int32_t, FN)
+
+#define DO_2OP_ACC_SCALAR_U(OP, FN)             \
+    DO_2OP_ACC_SCALAR(OP##b, 1, uint8_t, FN)    \
+    DO_2OP_ACC_SCALAR(OP##h, 2, uint16_t, FN)   \
+    DO_2OP_ACC_SCALAR(OP##w, 4, uint32_t, FN)
+
+DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
+DO_2OP_SCALAR_U(vsub_scalar, DO_SUB)
+DO_2OP_SCALAR_U(vmul_scalar, DO_MUL)
+DO_2OP_SCALAR_S(vhadds_scalar, do_vhadd_s)
+DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
+DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
+DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
+
+DO_2OP_SAT_SCALAR(vqaddu_scalarb, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT_SCALAR(vqaddu_scalarh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT_SCALAR(vqaddu_scalarw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT_SCALAR(vqadds_scalarb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT_SCALAR(vqadds_scalarh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT_SCALAR(vqadds_scalarw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT_SCALAR(vqsubu_scalarb, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubu_scalarh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubu_scalarw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
+
+DO_2OP_SAT_SCALAR(vqdmulh_scalarb, 1, int8_t, DO_QDMULH_B)
+DO_2OP_SAT_SCALAR(vqdmulh_scalarh, 2, int16_t, DO_QDMULH_H)
+DO_2OP_SAT_SCALAR(vqdmulh_scalarw, 4, int32_t, DO_QDMULH_W)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarb, 1, int8_t, DO_QRDMULH_B)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarh, 2, int16_t, DO_QRDMULH_H)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarw, 4, int32_t, DO_QRDMULH_W)
+
+static int8_t do_vqdmlah_b(int8_t a, int8_t b, int8_t c, int round, bool *sat)
+{
+    int64_t r = (int64_t)a * b * 2 + ((int64_t)c << 8) + (round << 7);
+    return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmlah_h(int16_t a, int16_t b, int16_t c,
+                           int round, bool *sat)
+{
+    int64_t r = (int64_t)a * b * 2 + ((int64_t)c << 16) + (round << 15);
+    return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmlah_w(int32_t a, int32_t b, int32_t c,
+                            int round, bool *sat)
+{
+    /*
+     * Architecturally we should do the entire add, double, round
+     * and then check for saturation. We do three saturating adds,
+     * but we need to be careful about the order. If the first
+     * m1 + m2 saturates then it's impossible for the *2+rc to
+     * bring it back into the non-saturated range. However, if
+     * m1 + m2 is negative then it's possible that doing the doubling
+     * would take the intermediate result below INT64_MAX and the
+     * addition of the rounding constant then brings it back in range.
+     * So we add half the rounding constant and half the "c << esize"
+     * before doubling rather than adding the rounding constant after
+     * the doubling.
+     */
+    int64_t m1 = (int64_t)a * b;
+    int64_t m2 = (int64_t)c << 31;
+    int64_t r;
+    if (sadd64_overflow(m1, m2, &r) ||
+        sadd64_overflow(r, (round << 30), &r) ||
+        sadd64_overflow(r, r, &r)) {
+        *sat = true;
+        return r < 0 ? INT32_MAX : INT32_MIN;
+    }
+    return r >> 32;
+}
+
+/*
+ * The *MLAH insns are vector * scalar + vector;
+ * the *MLASH insns are vector * vector + scalar
+ */
+#define DO_VQDMLAH_B(D, N, M, S) do_vqdmlah_b(N, M, D, 0, S)
+#define DO_VQDMLAH_H(D, N, M, S) do_vqdmlah_h(N, M, D, 0, S)
+#define DO_VQDMLAH_W(D, N, M, S) do_vqdmlah_w(N, M, D, 0, S)
+#define DO_VQRDMLAH_B(D, N, M, S) do_vqdmlah_b(N, M, D, 1, S)
+#define DO_VQRDMLAH_H(D, N, M, S) do_vqdmlah_h(N, M, D, 1, S)
+#define DO_VQRDMLAH_W(D, N, M, S) do_vqdmlah_w(N, M, D, 1, S)
+
+#define DO_VQDMLASH_B(D, N, M, S) do_vqdmlah_b(N, D, M, 0, S)
+#define DO_VQDMLASH_H(D, N, M, S) do_vqdmlah_h(N, D, M, 0, S)
+#define DO_VQDMLASH_W(D, N, M, S) do_vqdmlah_w(N, D, M, 0, S)
+#define DO_VQRDMLASH_B(D, N, M, S) do_vqdmlah_b(N, D, M, 1, S)
+#define DO_VQRDMLASH_H(D, N, M, S) do_vqdmlah_h(N, D, M, 1, S)
+#define DO_VQRDMLASH_W(D, N, M, S) do_vqdmlah_w(N, D, M, 1, S)
+
+DO_2OP_SAT_ACC_SCALAR(vqdmlahb, 1, int8_t, DO_VQDMLAH_B)
+DO_2OP_SAT_ACC_SCALAR(vqdmlahh, 2, int16_t, DO_VQDMLAH_H)
+DO_2OP_SAT_ACC_SCALAR(vqdmlahw, 4, int32_t, DO_VQDMLAH_W)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahb, 1, int8_t, DO_VQRDMLAH_B)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahh, 2, int16_t, DO_VQRDMLAH_H)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahw, 4, int32_t, DO_VQRDMLAH_W)
+
+DO_2OP_SAT_ACC_SCALAR(vqdmlashb, 1, int8_t, DO_VQDMLASH_B)
+DO_2OP_SAT_ACC_SCALAR(vqdmlashh, 2, int16_t, DO_VQDMLASH_H)
+DO_2OP_SAT_ACC_SCALAR(vqdmlashw, 4, int32_t, DO_VQDMLASH_W)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashb, 1, int8_t, DO_VQRDMLASH_B)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashh, 2, int16_t, DO_VQRDMLASH_H)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashw, 4, int32_t, DO_VQRDMLASH_W)
+
+/* Vector by scalar plus vector */
+#define DO_VMLA(D, N, M) ((N) * (M) + (D))
+
+DO_2OP_ACC_SCALAR_U(vmla, DO_VMLA)
+
+/* Vector by vector plus scalar */
+#define DO_VMLAS(D, N, M) ((N) * (D) + (M))
+
+DO_2OP_ACC_SCALAR_U(vmlas, DO_VMLAS)
+
+/*
+ * Long saturating scalar ops. As with DO_2OP_L, TYPE and H are for the
+ * input (smaller) type and LESIZE, LTYPE, LH for the output (long) type.
+ * SATMASK specifies which bits of the predicate mask matter for determining
+ * whether to propagate a saturation indication into FPSCR.QC -- for
+ * the 16x16->32 case we must check only the bit corresponding to the T or B
+ * half that we used, but for the 32x32->64 case we propagate if the mask
+ * bit is set for either half.
+ */
+#define DO_2OP_SAT_SCALAR_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK) \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                uint32_t rm)                            \
+    {                                                                   \
+        LTYPE *d = vd;                                                  \
+        TYPE *n = vn;                                                   \
+        TYPE m = rm;                                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned le;                                                    \
+        bool qc = false;                                                \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            bool sat = false;                                           \
+            LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)], m, &sat);    \
+            mergemask(&d[H##LESIZE(le)], r, mask);                      \
+            qc |= sat && (mask & SATMASK);                              \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+static inline int32_t do_qdmullh(int16_t n, int16_t m, bool *sat)
+{
+    int64_t r = ((int64_t)n * m) * 2;
+    return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat);
+}
+
+static inline int64_t do_qdmullw(int32_t n, int32_t m, bool *sat)
+{
+    /* The multiply can't overflow, but the doubling might */
+    int64_t r = (int64_t)n * m;
+    if (r > INT64_MAX / 2) {
+        *sat = true;
+        return INT64_MAX;
+    } else if (r < INT64_MIN / 2) {
+        *sat = true;
+        return INT64_MIN;
+    } else {
+        return r * 2;
+    }
+}
+
+#define SATMASK16B 1
+#define SATMASK16T (1 << 2)
+#define SATMASK32 ((1 << 4) | 1)
+
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarh, 0, 2, int16_t, 4, int32_t, \
+                    do_qdmullh, SATMASK16B)
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarw, 0, 4, int32_t, 8, int64_t, \
+                    do_qdmullw, SATMASK32)
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarh, 1, 2, int16_t, 4, int32_t, \
+                    do_qdmullh, SATMASK16T)
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarw, 1, 4, int32_t, 8, int64_t, \
+                    do_qdmullw, SATMASK32)
+
+/*
+ * Long saturating ops
+ */
+#define DO_2OP_SAT_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK)  \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+                                void *vm)                               \
+    {                                                                   \
+        LTYPE *d = vd;                                                  \
+        TYPE *n = vn, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned le;                                                    \
+        bool qc = false;                                                \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            bool sat = false;                                           \
+            LTYPE op1 = n[H##ESIZE(le * 2 + TOP)];                      \
+            LTYPE op2 = m[H##ESIZE(le * 2 + TOP)];                      \
+            mergemask(&d[H##LESIZE(le)], FN(op1, op2, &sat), mask);     \
+            qc |= sat && (mask & SATMASK);                              \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_2OP_SAT_L(vqdmullbh, 0, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16B)
+DO_2OP_SAT_L(vqdmullbw, 0, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
+DO_2OP_SAT_L(vqdmullth, 1, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16T)
+DO_2OP_SAT_L(vqdmulltw, 1, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
+
+static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
+{
+    m &= 0xff;
+    if (m == 0) {
+        return 0;
+    }
+    n = revbit8(n);
+    if (m < 8) {
+        n >>= 8 - m;
+    }
+    return n;
+}
+
+static inline uint32_t do_vbrsrh(uint32_t n, uint32_t m)
+{
+    m &= 0xff;
+    if (m == 0) {
+        return 0;
+    }
+    n = revbit16(n);
+    if (m < 16) {
+        n >>= 16 - m;
+    }
+    return n;
+}
+
+static inline uint32_t do_vbrsrw(uint32_t n, uint32_t m)
+{
+    m &= 0xff;
+    if (m == 0) {
+        return 0;
+    }
+    n = revbit32(n);
+    if (m < 32) {
+        n >>= 32 - m;
+    }
+    return n;
+}
+
+DO_2OP_SCALAR(vbrsrb, 1, uint8_t, do_vbrsrb)
+DO_2OP_SCALAR(vbrsrh, 2, uint16_t, do_vbrsrh)
+DO_2OP_SCALAR(vbrsrw, 4, uint32_t, do_vbrsrw)
+
+/*
+ * Multiply add long dual accumulate ops.
+ */
+#define DO_LDAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC)                 \
+    uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,         \
+                                    void *vm, uint64_t a)               \
+    {                                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        TYPE *n = vn, *m = vm;                                          \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if (mask & 1) {                                             \
+                if (e & 1) {                                            \
+                    a ODDACC                                            \
+                        (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
+                } else {                                                \
+                    a EVENACC                                           \
+                        (int64_t)n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+        return a;                                                       \
+    }
+
+DO_LDAV(vmlaldavsh, 2, int16_t, false, +=, +=)
+DO_LDAV(vmlaldavxsh, 2, int16_t, true, +=, +=)
+DO_LDAV(vmlaldavsw, 4, int32_t, false, +=, +=)
+DO_LDAV(vmlaldavxsw, 4, int32_t, true, +=, +=)
+
+DO_LDAV(vmlaldavuh, 2, uint16_t, false, +=, +=)
+DO_LDAV(vmlaldavuw, 4, uint32_t, false, +=, +=)
+
+DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
+DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
+DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
+DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
+
+/*
+ * Multiply add dual accumulate ops
+ */
+#define DO_DAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,         \
+                                    void *vm, uint32_t a)               \
+    {                                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        TYPE *n = vn, *m = vm;                                          \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if (mask & 1) {                                             \
+                if (e & 1) {                                            \
+                    a ODDACC                                            \
+                        n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)];     \
+                } else {                                                \
+                    a EVENACC                                           \
+                        n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)];     \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+        return a;                                                       \
+    }
+
+#define DO_DAV_S(INSN, XCHG, EVENACC, ODDACC)           \
+    DO_DAV(INSN##b, 1, int8_t, XCHG, EVENACC, ODDACC)   \
+    DO_DAV(INSN##h, 2, int16_t, XCHG, EVENACC, ODDACC)  \
+    DO_DAV(INSN##w, 4, int32_t, XCHG, EVENACC, ODDACC)
+
+#define DO_DAV_U(INSN, XCHG, EVENACC, ODDACC)           \
+    DO_DAV(INSN##b, 1, uint8_t, XCHG, EVENACC, ODDACC)  \
+    DO_DAV(INSN##h, 2, uint16_t, XCHG, EVENACC, ODDACC) \
+    DO_DAV(INSN##w, 4, uint32_t, XCHG, EVENACC, ODDACC)
+
+DO_DAV_S(vmladavs, false, +=, +=)
+DO_DAV_U(vmladavu, false, +=, +=)
+DO_DAV_S(vmlsdav, false, +=, -=)
+DO_DAV_S(vmladavsx, true, +=, +=)
+DO_DAV_S(vmlsdavx, true, +=, -=)
+
+/*
+ * Rounding multiply add long dual accumulate high. In the pseudocode
+ * this is implemented with a 72-bit internal accumulator value of which
+ * the top 64 bits are returned. We optimize this to avoid having to
+ * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
+ * is squashed back into 64-bits after each beat.
+ */
+#define DO_LDAVH(OP, TYPE, LTYPE, XCHG, SUB)                            \
+    uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,         \
+                                    void *vm, uint64_t a)               \
+    {                                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        TYPE *n = vn, *m = vm;                                          \
+        for (e = 0; e < 16 / 4; e++, mask >>= 4) {                      \
+            if (mask & 1) {                                             \
+                LTYPE mul;                                              \
+                if (e & 1) {                                            \
+                    mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)];        \
+                    if (SUB) {                                          \
+                        mul = -mul;                                     \
+                    }                                                   \
+                } else {                                                \
+                    mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)];        \
+                }                                                       \
+                mul = (mul >> 8) + ((mul >> 7) & 1);                    \
+                a += mul;                                               \
+            }                                                           \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+        return a;                                                       \
+    }
+
+DO_LDAVH(vrmlaldavhsw, int32_t, int64_t, false, false)
+DO_LDAVH(vrmlaldavhxsw, int32_t, int64_t, true, false)
+
+DO_LDAVH(vrmlaldavhuw, uint32_t, uint64_t, false, false)
+
+DO_LDAVH(vrmlsldavhsw, int32_t, int64_t, false, true)
+DO_LDAVH(vrmlsldavhxsw, int32_t, int64_t, true, true)
+
+/* Vector add across vector */
+#define DO_VADDV(OP, ESIZE, TYPE)                               \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+                                    uint32_t ra)                \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm;                                           \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            if (mask & 1) {                                     \
+                ra += m[H##ESIZE(e)];                           \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }                                                           \
+
+DO_VADDV(vaddvsb, 1, int8_t)
+DO_VADDV(vaddvsh, 2, int16_t)
+DO_VADDV(vaddvsw, 4, int32_t)
+DO_VADDV(vaddvub, 1, uint8_t)
+DO_VADDV(vaddvuh, 2, uint16_t)
+DO_VADDV(vaddvuw, 4, uint32_t)
+
+/*
+ * Vector max/min across vector. Unlike VADDV, we must
+ * read ra as the element size, not its full width.
+ * We work with int64_t internally for simplicity.
+ */
+#define DO_VMAXMINV(OP, ESIZE, TYPE, RATYPE, FN)                \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+                                    uint32_t ra_in)             \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm;                                           \
+        int64_t ra = (RATYPE)ra_in;                             \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            if (mask & 1) {                                     \
+                ra = FN(ra, m[H##ESIZE(e)]);                    \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }                                                           \
+
+#define DO_VMAXMINV_U(INSN, FN)                         \
+    DO_VMAXMINV(INSN##b, 1, uint8_t, uint8_t, FN)       \
+    DO_VMAXMINV(INSN##h, 2, uint16_t, uint16_t, FN)     \
+    DO_VMAXMINV(INSN##w, 4, uint32_t, uint32_t, FN)
+#define DO_VMAXMINV_S(INSN, FN)                         \
+    DO_VMAXMINV(INSN##b, 1, int8_t, int8_t, FN)         \
+    DO_VMAXMINV(INSN##h, 2, int16_t, int16_t, FN)       \
+    DO_VMAXMINV(INSN##w, 4, int32_t, int32_t, FN)
+
+/*
+ * Helpers for max and min of absolute values across vector:
+ * note that we only take the absolute value of 'm', not 'n'
+ */
+static int64_t do_maxa(int64_t n, int64_t m)
+{
+    if (m < 0) {
+        m = -m;
+    }
+    return MAX(n, m);
+}
+
+static int64_t do_mina(int64_t n, int64_t m)
+{
+    if (m < 0) {
+        m = -m;
+    }
+    return MIN(n, m);
+}
+
+DO_VMAXMINV_S(vmaxvs, DO_MAX)
+DO_VMAXMINV_U(vmaxvu, DO_MAX)
+DO_VMAXMINV_S(vminvs, DO_MIN)
+DO_VMAXMINV_U(vminvu, DO_MIN)
+/*
+ * VMAXAV, VMINAV treat the general purpose input as unsigned
+ * and the vector elements as signed.
+ */
+DO_VMAXMINV(vmaxavb, 1, int8_t, uint8_t, do_maxa)
+DO_VMAXMINV(vmaxavh, 2, int16_t, uint16_t, do_maxa)
+DO_VMAXMINV(vmaxavw, 4, int32_t, uint32_t, do_maxa)
+DO_VMAXMINV(vminavb, 1, int8_t, uint8_t, do_mina)
+DO_VMAXMINV(vminavh, 2, int16_t, uint16_t, do_mina)
+DO_VMAXMINV(vminavw, 4, int32_t, uint32_t, do_mina)
+
+#define DO_VABAV(OP, ESIZE, TYPE)                               \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+                                    void *vm, uint32_t ra)      \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm, *n = vn;                                  \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            if (mask & 1) {                                     \
+                int64_t n0 = n[H##ESIZE(e)];                    \
+                int64_t m0 = m[H##ESIZE(e)];                    \
+                uint32_t r = n0 >= m0 ? (n0 - m0) : (m0 - n0);  \
+                ra += r;                                        \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }
+
+DO_VABAV(vabavsb, 1, int8_t)
+DO_VABAV(vabavsh, 2, int16_t)
+DO_VABAV(vabavsw, 4, int32_t)
+DO_VABAV(vabavub, 1, uint8_t)
+DO_VABAV(vabavuh, 2, uint16_t)
+DO_VABAV(vabavuw, 4, uint32_t)
+
+#define DO_VADDLV(OP, TYPE, LTYPE)                              \
+    uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+                                    uint64_t ra)                \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm;                                           \
+        for (e = 0; e < 16 / 4; e++, mask >>= 4) {              \
+            if (mask & 1) {                                     \
+                ra += (LTYPE)m[H4(e)];                          \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }                                                           \
+
+DO_VADDLV(vaddlv_s, int32_t, int64_t)
+DO_VADDLV(vaddlv_u, uint32_t, uint64_t)
+
+/* Shifts by immediate */
+#define DO_2SHIFT(OP, ESIZE, TYPE, FN)                          \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,     \
+                                void *vm, uint32_t shift)       \
+    {                                                           \
+        TYPE *d = vd, *m = vm;                                  \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            mergemask(&d[H##ESIZE(e)],                          \
+                      FN(m[H##ESIZE(e)], shift), mask);         \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+    }
+
+#define DO_2SHIFT_SAT(OP, ESIZE, TYPE, FN)                      \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,     \
+                                void *vm, uint32_t shift)       \
+    {                                                           \
+        TYPE *d = vd, *m = vm;                                  \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        bool qc = false;                                        \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            bool sat = false;                                   \
+            mergemask(&d[H##ESIZE(e)],                          \
+                      FN(m[H##ESIZE(e)], shift, &sat), mask);   \
+            qc |= sat & mask & 1;                               \
+        }                                                       \
+        if (qc) {                                               \
+            env->vfp.qc[0] = qc;                                \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+    }
+
+/* provide unsigned 2-op shift helpers for all sizes */
+#define DO_2SHIFT_U(OP, FN)                     \
+    DO_2SHIFT(OP##b, 1, uint8_t, FN)            \
+    DO_2SHIFT(OP##h, 2, uint16_t, FN)           \
+    DO_2SHIFT(OP##w, 4, uint32_t, FN)
+#define DO_2SHIFT_S(OP, FN)                     \
+    DO_2SHIFT(OP##b, 1, int8_t, FN)             \
+    DO_2SHIFT(OP##h, 2, int16_t, FN)            \
+    DO_2SHIFT(OP##w, 4, int32_t, FN)
+
+#define DO_2SHIFT_SAT_U(OP, FN)                 \
+    DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN)        \
+    DO_2SHIFT_SAT(OP##h, 2, uint16_t, FN)       \
+    DO_2SHIFT_SAT(OP##w, 4, uint32_t, FN)
+#define DO_2SHIFT_SAT_S(OP, FN)                 \
+    DO_2SHIFT_SAT(OP##b, 1, int8_t, FN)         \
+    DO_2SHIFT_SAT(OP##h, 2, int16_t, FN)        \
+    DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
+
+DO_2SHIFT_U(vshli_u, DO_VSHLU)
+DO_2SHIFT_S(vshli_s, DO_VSHLS)
+DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
+DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
+DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
+DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
+DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
+DO_2SHIFT_SAT_U(vqrshli_u, DO_UQRSHL_OP)
+DO_2SHIFT_SAT_S(vqrshli_s, DO_SQRSHL_OP)
+
+/* Shift-and-insert; we always work with 64 bits at a time */
+#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN)                    \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,             \
+                                void *vm, uint32_t shift)               \
+    {                                                                   \
+        uint64_t *d = vd, *m = vm;                                      \
+        uint16_t mask;                                                  \
+        uint64_t shiftmask;                                             \
+        unsigned e;                                                     \
+        if (shift == ESIZE * 8) {                                       \
+            /*                                                          \
+             * Only VSRI can shift by <dt>; it should mean "don't       \
+             * update the destination". The generic logic can't handle  \
+             * this because it would try to shift by an out-of-range    \
+             * amount, so special case it here.                         \
+             */                                                         \
+            goto done;                                                  \
+        }                                                               \
+        assert(shift < ESIZE * 8);                                      \
+        mask = mve_element_mask(env);                                   \
+        /* ESIZE / 2 gives the MO_* value if ESIZE is in [1,2,4] */     \
+        shiftmask = dup_const(ESIZE / 2, MASKFN(ESIZE * 8, shift));     \
+        for (e = 0; e < 16 / 8; e++, mask >>= 8) {                      \
+            uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) |       \
+                (d[H8(e)] & ~shiftmask);                                \
+            mergemask(&d[H8(e)], r, mask);                              \
+        }                                                               \
+done:                                                                   \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_SHL(N, SHIFT) ((N) << (SHIFT))
+#define DO_SHR(N, SHIFT) ((N) >> (SHIFT))
+#define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
+#define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
+
+DO_2SHIFT_INSERT(vsrib, 1, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vsrih, 2, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vsriw, 4, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vslib, 1, DO_SHL, SHL_MASK)
+DO_2SHIFT_INSERT(vslih, 2, DO_SHL, SHL_MASK)
+DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
+
+/*
+ * Long shifts taking half-sized inputs from top or bottom of the input
+ * vector and producing a double-width result. ESIZE, TYPE are for
+ * the input, and LESIZE, LTYPE for the output.
+ * Unlike the normal shift helpers, we do not handle negative shift counts,
+ * because the long shift is strictly left-only.
+ */
+#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE)                   \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,             \
+                                void *vm, uint32_t shift)               \
+    {                                                                   \
+        LTYPE *d = vd;                                                  \
+        TYPE *m = vm;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned le;                                                    \
+        assert(shift <= 16);                                            \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift;        \
+            mergemask(&d[H##LESIZE(le)], r, mask);                      \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VSHLL_ALL(OP, TOP)                                \
+    DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t)             \
+    DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t)           \
+    DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t)            \
+    DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t)          \
+
+DO_VSHLL_ALL(vshllb, false)
+DO_VSHLL_ALL(vshllt, true)
+
+/*
+ * Narrowing right shifts, taking a double sized input, shifting it
+ * and putting the result in either the top or bottom half of the output.
+ * ESIZE, TYPE are the output, and LESIZE, LTYPE the input.
+ */
+#define DO_VSHRN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN)       \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,     \
+                                void *vm, uint32_t shift)       \
+    {                                                           \
+        LTYPE *m = vm;                                          \
+        TYPE *d = vd;                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned le;                                            \
+        mask >>= ESIZE * TOP;                                   \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+            TYPE r = FN(m[H##LESIZE(le)], shift);               \
+            mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask);     \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+    }
+
+#define DO_VSHRN_ALL(OP, FN)                                    \
+    DO_VSHRN(OP##bb, false, 1, uint8_t, 2, uint16_t, FN)        \
+    DO_VSHRN(OP##bh, false, 2, uint16_t, 4, uint32_t, FN)       \
+    DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN)         \
+    DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
+
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
+{
+    if (likely(sh < 64)) {
+        return (x >> sh) + ((x >> (sh - 1)) & 1);
+    } else if (sh == 64) {
+        return x >> 63;
+    } else {
+        return 0;
+    }
+}
+
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+    if (likely(sh < 64)) {
+        return (x >> sh) + ((x >> (sh - 1)) & 1);
+    } else {
+        /* Rounding the sign bit always produces 0. */
+        return 0;
+    }
+}
+
+DO_VSHRN_ALL(vshrn, DO_SHR)
+DO_VSHRN_ALL(vrshrn, do_urshr)
+
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max,
+                                 bool *satp)
+{
+    if (val > max) {
+        *satp = true;
+        return max;
+    } else if (val < min) {
+        *satp = true;
+        return min;
+    } else {
+        return val;
+    }
+}
+
+/* Saturating narrowing right shifts */
+#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN)   \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd,     \
+                                void *vm, uint32_t shift)       \
+    {                                                           \
+        LTYPE *m = vm;                                          \
+        TYPE *d = vd;                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        bool qc = false;                                        \
+        unsigned le;                                            \
+        mask >>= ESIZE * TOP;                                   \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+            bool sat = false;                                   \
+            TYPE r = FN(m[H##LESIZE(le)], shift, &sat);         \
+            mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask);     \
+            qc |= sat & mask & 1;                               \
+        }                                                       \
+        if (qc) {                                               \
+            env->vfp.qc[0] = qc;                                \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+    }
+
+#define DO_VSHRN_SAT_UB(BOP, TOP, FN)                           \
+    DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN)       \
+    DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
+
+#define DO_VSHRN_SAT_UH(BOP, TOP, FN)                           \
+    DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN)      \
+    DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
+
+#define DO_VSHRN_SAT_SB(BOP, TOP, FN)                           \
+    DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN)         \
+    DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
+
+#define DO_VSHRN_SAT_SH(BOP, TOP, FN)                           \
+    DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN)        \
+    DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
+
+#define DO_SHRN_SB(N, M, SATP)                                  \
+    do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP)
+#define DO_SHRN_UB(N, M, SATP)                                  \
+    do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP)
+#define DO_SHRUN_B(N, M, SATP)                                  \
+    do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP)
+
+#define DO_SHRN_SH(N, M, SATP)                                  \
+    do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP)
+#define DO_SHRN_UH(N, M, SATP)                                  \
+    do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP)
+#define DO_SHRUN_H(N, M, SATP)                                  \
+    do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP)
+
+#define DO_RSHRN_SB(N, M, SATP)                                 \
+    do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP)
+#define DO_RSHRN_UB(N, M, SATP)                                 \
+    do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP)
+#define DO_RSHRUN_B(N, M, SATP)                                 \
+    do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP)
+
+#define DO_RSHRN_SH(N, M, SATP)                                 \
+    do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP)
+#define DO_RSHRN_UH(N, M, SATP)                                 \
+    do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP)
+#define DO_RSHRUN_H(N, M, SATP)                                 \
+    do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP)
+
+DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB)
+DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH)
+DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB)
+DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH)
+DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B)
+DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H)
+
+DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB)
+DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH)
+DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
+DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
+DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
+DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
+
+#define DO_VMOVN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE)                   \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm)         \
+    {                                                                   \
+        LTYPE *m = vm;                                                  \
+        TYPE *d = vd;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned le;                                                    \
+        mask >>= ESIZE * TOP;                                           \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            mergemask(&d[H##ESIZE(le * 2 + TOP)],                       \
+                      m[H##LESIZE(le)], mask);                          \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VMOVN(vmovnbb, false, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnbh, false, 2, uint16_t, 4, uint32_t)
+DO_VMOVN(vmovntb, true, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnth, true, 2, uint16_t, 4, uint32_t)
+
+#define DO_VMOVN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN)           \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm)         \
+    {                                                                   \
+        LTYPE *m = vm;                                                  \
+        TYPE *d = vd;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        bool qc = false;                                                \
+        unsigned le;                                                    \
+        mask >>= ESIZE * TOP;                                           \
+        for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) {         \
+            bool sat = false;                                           \
+            TYPE r = FN(m[H##LESIZE(le)], &sat);                        \
+            mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask);             \
+            qc |= sat & mask & 1;                                       \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VMOVN_SAT_UB(BOP, TOP, FN)                           \
+    DO_VMOVN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN)       \
+    DO_VMOVN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
+
+#define DO_VMOVN_SAT_UH(BOP, TOP, FN)                           \
+    DO_VMOVN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN)      \
+    DO_VMOVN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
+
+#define DO_VMOVN_SAT_SB(BOP, TOP, FN)                           \
+    DO_VMOVN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN)         \
+    DO_VMOVN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
+
+#define DO_VMOVN_SAT_SH(BOP, TOP, FN)                           \
+    DO_VMOVN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN)        \
+    DO_VMOVN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
+
+#define DO_VQMOVN_SB(N, SATP)                           \
+    do_sat_bhs((int64_t)(N), INT8_MIN, INT8_MAX, SATP)
+#define DO_VQMOVN_UB(N, SATP)                           \
+    do_sat_bhs((uint64_t)(N), 0, UINT8_MAX, SATP)
+#define DO_VQMOVUN_B(N, SATP)                           \
+    do_sat_bhs((int64_t)(N), 0, UINT8_MAX, SATP)
+
+#define DO_VQMOVN_SH(N, SATP)                           \
+    do_sat_bhs((int64_t)(N), INT16_MIN, INT16_MAX, SATP)
+#define DO_VQMOVN_UH(N, SATP)                           \
+    do_sat_bhs((uint64_t)(N), 0, UINT16_MAX, SATP)
+#define DO_VQMOVUN_H(N, SATP)                           \
+    do_sat_bhs((int64_t)(N), 0, UINT16_MAX, SATP)
+
+DO_VMOVN_SAT_SB(vqmovnbsb, vqmovntsb, DO_VQMOVN_SB)
+DO_VMOVN_SAT_SH(vqmovnbsh, vqmovntsh, DO_VQMOVN_SH)
+DO_VMOVN_SAT_UB(vqmovnbub, vqmovntub, DO_VQMOVN_UB)
+DO_VMOVN_SAT_UH(vqmovnbuh, vqmovntuh, DO_VQMOVN_UH)
+DO_VMOVN_SAT_SB(vqmovunbb, vqmovuntb, DO_VQMOVUN_B)
+DO_VMOVN_SAT_SH(vqmovunbh, vqmovunth, DO_VQMOVUN_H)
+
+uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
+                           uint32_t shift)
+{
+    uint32_t *d = vd;
+    uint16_t mask = mve_element_mask(env);
+    unsigned e;
+    uint32_t r;
+
+    /*
+     * For each 32-bit element, we shift it left, bringing in the
+     * low 'shift' bits of rdm at the bottom. Bits shifted out at
+     * the top become the new rdm, if the predicate mask permits.
+     * The final rdm value is returned to update the register.
+     * shift == 0 here means "shift by 32 bits".
+     */
+    if (shift == 0) {
+        for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+            r = rdm;
+            if (mask & 1) {
+                rdm = d[H4(e)];
+            }
+            mergemask(&d[H4(e)], r, mask);
+        }
+    } else {
+        uint32_t shiftmask = MAKE_64BIT_MASK(0, shift);
+
+        for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+            r = (d[H4(e)] << shift) | (rdm & shiftmask);
+            if (mask & 1) {
+                rdm = d[H4(e)] >> (32 - shift);
+            }
+            mergemask(&d[H4(e)], r, mask);
+        }
+    }
+    mve_advance_vpt(env);
+    return rdm;
+}
+
+uint64_t HELPER(mve_sshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_sqrshl_d(n, -(int8_t)shift, false, NULL);
+}
+
+uint64_t HELPER(mve_ushll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_uqrshl_d(n, (int8_t)shift, false, NULL);
+}
+
+uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
+}
+
+uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
+}
+
+uint64_t HELPER(mve_sqrshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF);
+}
+
+uint64_t HELPER(mve_uqrshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_uqrshl_d(n, (int8_t)shift, true, &env->QF);
+}
+
+/* Operate on 64-bit values, but saturate at 48 bits */
+static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift,
+                                    bool round, uint32_t *sat)
+{
+    int64_t val, extval;
+
+    if (shift <= -48) {
+        /* Rounding the sign bit always produces 0. */
+        if (round) {
+            return 0;
+        }
+        return src >> 63;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            val = (src >> 1) + (src & 1);
+        } else {
+            val = src >> -shift;
+        }
+        extval = sextract64(val, 0, 48);
+        if (!sat || val == extval) {
+            return extval;
+        }
+    } else if (shift < 48) {
+        int64_t extval = sextract64(src << shift, 0, 48);
+        if (!sat || src == (extval >> shift)) {
+            return extval;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return src >= 0 ? MAKE_64BIT_MASK(0, 47) : MAKE_64BIT_MASK(47, 17);
+}
+
+/* Operate on 64-bit values, but saturate at 48 bits */
+static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift,
+                                     bool round, uint32_t *sat)
+{
+    uint64_t val, extval;
+
+    if (shift <= -(48 + round)) {
+        return 0;
+    } else if (shift < 0) {
+        if (round) {
+            val = src >> (-shift - 1);
+            val = (val >> 1) + (val & 1);
+        } else {
+            val = src >> -shift;
+        }
+        extval = extract64(val, 0, 48);
+        if (!sat || val == extval) {
+            return extval;
+        }
+    } else if (shift < 48) {
+        uint64_t extval = extract64(src << shift, 0, 48);
+        if (!sat || src == (extval >> shift)) {
+            return extval;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return MAKE_64BIT_MASK(0, 48);
+}
+
+uint64_t HELPER(mve_sqrshrl48)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF);
+}
+
+uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+    return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
+}
+
+uint32_t HELPER(mve_uqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+    return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
+}
+
+uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+    return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
+}
+
+uint32_t HELPER(mve_uqrshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+    return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF);
+}
+
+uint32_t HELPER(mve_sqrshr)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+    return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF);
+}
+
+#define DO_VIDUP(OP, ESIZE, TYPE, FN)                           \
+    uint32_t HELPER(mve_##OP)(CPUARMState *env, void *vd,       \
+                           uint32_t offset, uint32_t imm)       \
+    {                                                           \
+        TYPE *d = vd;                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            mergemask(&d[H##ESIZE(e)], offset, mask);           \
+            offset = FN(offset, imm);                           \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return offset;                                          \
+    }
+
+#define DO_VIWDUP(OP, ESIZE, TYPE, FN)                          \
+    uint32_t HELPER(mve_##OP)(CPUARMState *env, void *vd,       \
+                              uint32_t offset, uint32_t wrap,   \
+                              uint32_t imm)                     \
+    {                                                           \
+        TYPE *d = vd;                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            mergemask(&d[H##ESIZE(e)], offset, mask);           \
+            offset = FN(offset, wrap, imm);                     \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return offset;                                          \
+    }
+
+#define DO_VIDUP_ALL(OP, FN)                    \
+    DO_VIDUP(OP##b, 1, int8_t, FN)              \
+    DO_VIDUP(OP##h, 2, int16_t, FN)             \
+    DO_VIDUP(OP##w, 4, int32_t, FN)
+
+#define DO_VIWDUP_ALL(OP, FN)                   \
+    DO_VIWDUP(OP##b, 1, int8_t, FN)             \
+    DO_VIWDUP(OP##h, 2, int16_t, FN)            \
+    DO_VIWDUP(OP##w, 4, int32_t, FN)
+
+static uint32_t do_add_wrap(uint32_t offset, uint32_t wrap, uint32_t imm)
+{
+    offset += imm;
+    if (offset == wrap) {
+        offset = 0;
+    }
+    return offset;
+}
+
+static uint32_t do_sub_wrap(uint32_t offset, uint32_t wrap, uint32_t imm)
+{
+    if (offset == 0) {
+        offset = wrap;
+    }
+    offset -= imm;
+    return offset;
+}
+
+DO_VIDUP_ALL(vidup, DO_ADD)
+DO_VIWDUP_ALL(viwdup, do_add_wrap)
+DO_VIWDUP_ALL(vdwdup, do_sub_wrap)
+
+/*
+ * Vector comparison.
+ * P0 bits for non-executed beats (where eci_mask is 0) are unchanged.
+ * P0 bits for predicated lanes in executed beats (where mask is 0) are 0.
+ * P0 bits otherwise are updated with the results of the comparisons.
+ * We must also keep unchanged the MASK fields at the top of v7m.vpr.
+ */
+#define DO_VCMP(OP, ESIZE, TYPE, FN)                                    \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, void *vm)   \
+    {                                                                   \
+        TYPE *n = vn, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        uint16_t beatpred = 0;                                          \
+        uint16_t emask = MAKE_64BIT_MASK(0, ESIZE);                     \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++) {                              \
+            bool r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)]);                \
+            /* Comparison sets 0/1 bits for each byte in the element */ \
+            beatpred |= r * emask;                                      \
+            emask <<= ESIZE;                                            \
+        }                                                               \
+        beatpred &= mask;                                               \
+        env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) |           \
+            (beatpred & eci_mask);                                      \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCMP_SCALAR(OP, ESIZE, TYPE, FN)                             \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,             \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *n = vn;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        uint16_t beatpred = 0;                                          \
+        uint16_t emask = MAKE_64BIT_MASK(0, ESIZE);                     \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++) {                              \
+            bool r = FN(n[H##ESIZE(e)], (TYPE)rm);                      \
+            /* Comparison sets 0/1 bits for each byte in the element */ \
+            beatpred |= r * emask;                                      \
+            emask <<= ESIZE;                                            \
+        }                                                               \
+        beatpred &= mask;                                               \
+        env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) |           \
+            (beatpred & eci_mask);                                      \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCMP_S(OP, FN)                               \
+    DO_VCMP(OP##b, 1, int8_t, FN)                       \
+    DO_VCMP(OP##h, 2, int16_t, FN)                      \
+    DO_VCMP(OP##w, 4, int32_t, FN)                      \
+    DO_VCMP_SCALAR(OP##_scalarb, 1, int8_t, FN)         \
+    DO_VCMP_SCALAR(OP##_scalarh, 2, int16_t, FN)        \
+    DO_VCMP_SCALAR(OP##_scalarw, 4, int32_t, FN)
+
+#define DO_VCMP_U(OP, FN)                               \
+    DO_VCMP(OP##b, 1, uint8_t, FN)                      \
+    DO_VCMP(OP##h, 2, uint16_t, FN)                     \
+    DO_VCMP(OP##w, 4, uint32_t, FN)                     \
+    DO_VCMP_SCALAR(OP##_scalarb, 1, uint8_t, FN)        \
+    DO_VCMP_SCALAR(OP##_scalarh, 2, uint16_t, FN)       \
+    DO_VCMP_SCALAR(OP##_scalarw, 4, uint32_t, FN)
+
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_NE(N, M) ((N) != (M))
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_GE(N, M) ((N) >= (M))
+#define DO_LT(N, M) ((N) < (M))
+#define DO_GT(N, M) ((N) > (M))
+#define DO_LE(N, M) ((N) <= (M))
+
+DO_VCMP_U(vcmpeq, DO_EQ)
+DO_VCMP_U(vcmpne, DO_NE)
+DO_VCMP_U(vcmpcs, DO_GE)
+DO_VCMP_U(vcmphi, DO_GT)
+DO_VCMP_S(vcmpge, DO_GE)
+DO_VCMP_S(vcmplt, DO_LT)
+DO_VCMP_S(vcmpgt, DO_GT)
+DO_VCMP_S(vcmple, DO_LE)
+
+void HELPER(mve_vpsel)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+    /*
+     * Qd[n] = VPR.P0[n] ? Qn[n] : Qm[n]
+     * but note that whether bytes are written to Qd is still subject
+     * to (all forms of) predication in the usual way.
+     */
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint16_t mask = mve_element_mask(env);
+    uint16_t p0 = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
+    unsigned e;
+    for (e = 0; e < 16 / 8; e++, mask >>= 8, p0 >>= 8) {
+        uint64_t r = m[H8(e)];
+        mergemask(&r, n[H8(e)], p0);
+        mergemask(&d[H8(e)], r, mask);
+    }
+    mve_advance_vpt(env);
+}
+
+void HELPER(mve_vpnot)(CPUARMState *env)
+{
+    /*
+     * P0 bits for unexecuted beats (where eci_mask is 0) are unchanged.
+     * P0 bits for predicated lanes in executed bits (where mask is 0) are 0.
+     * P0 bits otherwise are inverted.
+     * (This is the same logic as VCMP.)
+     * This insn is itself subject to predication and to beat-wise execution,
+     * and after it executes VPT state advances in the usual way.
+     */
+    uint16_t mask = mve_element_mask(env);
+    uint16_t eci_mask = mve_eci_mask(env);
+    uint16_t beatpred = ~env->v7m.vpr & mask;
+    env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (beatpred & eci_mask);
+    mve_advance_vpt(env);
+}
+
+/*
+ * VCTP: P0 unexecuted bits unchanged, predicated bits zeroed,
+ * otherwise set according to value of Rn. The calculation of
+ * newmask here works in the same way as the calculation of the
+ * ltpmask in mve_element_mask(), but we have pre-calculated
+ * the masklen in the generated code.
+ */
+void HELPER(mve_vctp)(CPUARMState *env, uint32_t masklen)
+{
+    uint16_t mask = mve_element_mask(env);
+    uint16_t eci_mask = mve_eci_mask(env);
+    uint16_t newmask;
+
+    assert(masklen <= 16);
+    newmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
+    newmask &= mask;
+    env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask);
+    mve_advance_vpt(env);
+}
+
+#define DO_1OP_SAT(OP, ESIZE, TYPE, FN)                                 \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm)         \
+    {                                                                   \
+        TYPE *d = vd, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        bool qc = false;                                                \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            bool sat = false;                                           \
+            mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)], &sat), mask); \
+            qc |= sat & mask & 1;                                       \
+        }                                                               \
+        if (qc) {                                                       \
+            env->vfp.qc[0] = qc;                                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VQABS_B(N, SATP) \
+    do_sat_bhs(DO_ABS((int64_t)N), INT8_MIN, INT8_MAX, SATP)
+#define DO_VQABS_H(N, SATP) \
+    do_sat_bhs(DO_ABS((int64_t)N), INT16_MIN, INT16_MAX, SATP)
+#define DO_VQABS_W(N, SATP) \
+    do_sat_bhs(DO_ABS((int64_t)N), INT32_MIN, INT32_MAX, SATP)
+
+#define DO_VQNEG_B(N, SATP) do_sat_bhs(-(int64_t)N, INT8_MIN, INT8_MAX, SATP)
+#define DO_VQNEG_H(N, SATP) do_sat_bhs(-(int64_t)N, INT16_MIN, INT16_MAX, SATP)
+#define DO_VQNEG_W(N, SATP) do_sat_bhs(-(int64_t)N, INT32_MIN, INT32_MAX, SATP)
+
+DO_1OP_SAT(vqabsb, 1, int8_t, DO_VQABS_B)
+DO_1OP_SAT(vqabsh, 2, int16_t, DO_VQABS_H)
+DO_1OP_SAT(vqabsw, 4, int32_t, DO_VQABS_W)
+
+DO_1OP_SAT(vqnegb, 1, int8_t, DO_VQNEG_B)
+DO_1OP_SAT(vqnegh, 2, int16_t, DO_VQNEG_H)
+DO_1OP_SAT(vqnegw, 4, int32_t, DO_VQNEG_W)
+
+/*
+ * VMAXA, VMINA: vd is unsigned; vm is signed, and we take its
+ * absolute value; we then do an unsigned comparison.
+ */
+#define DO_VMAXMINA(OP, ESIZE, STYPE, UTYPE, FN)                        \
+    void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm)         \
+    {                                                                   \
+        UTYPE *d = vd;                                                  \
+        STYPE *m = vm;                                                  \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            UTYPE r = DO_ABS(m[H##ESIZE(e)]);                           \
+            r = FN(d[H##ESIZE(e)], r);                                  \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VMAXMINA(vmaxab, 1, int8_t, uint8_t, DO_MAX)
+DO_VMAXMINA(vmaxah, 2, int16_t, uint16_t, DO_MAX)
+DO_VMAXMINA(vmaxaw, 4, int32_t, uint32_t, DO_MAX)
+DO_VMAXMINA(vminab, 1, int8_t, uint8_t, DO_MIN)
+DO_VMAXMINA(vminah, 2, int16_t, uint16_t, DO_MIN)
+DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
+
+/*
+ * 2-operand floating point. Note that if an element is partially
+ * predicated we must do the FP operation to update the non-predicated
+ * bytes, but we must be careful to avoid updating the FP exception
+ * state unless byte 0 of the element was unpredicated.
+ */
+#define DO_2OP_FP(OP, ESIZE, TYPE, FN)                                  \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, void *vm)           \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        TYPE r;                                                         \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst);               \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_2OP_FP_ALL(OP, FN)                  \
+    DO_2OP_FP(OP##h, 2, float16, float16_##FN) \
+    DO_2OP_FP(OP##s, 4, float32, float32_##FN)
+
+DO_2OP_FP_ALL(vfadd, add)
+DO_2OP_FP_ALL(vfsub, sub)
+DO_2OP_FP_ALL(vfmul, mul)
+
+static inline float16 float16_abd(float16 a, float16 b, float_status *s)
+{
+    return float16_abs(float16_sub(a, b, s));
+}
+
+static inline float32 float32_abd(float32 a, float32 b, float_status *s)
+{
+    return float32_abs(float32_sub(a, b, s));
+}
+
+DO_2OP_FP_ALL(vfabd, abd)
+DO_2OP_FP_ALL(vmaxnm, maxnum)
+DO_2OP_FP_ALL(vminnm, minnum)
+
+static inline float16 float16_maxnuma(float16 a, float16 b, float_status *s)
+{
+    return float16_maxnum(float16_abs(a), float16_abs(b), s);
+}
+
+static inline float32 float32_maxnuma(float32 a, float32 b, float_status *s)
+{
+    return float32_maxnum(float32_abs(a), float32_abs(b), s);
+}
+
+static inline float16 float16_minnuma(float16 a, float16 b, float_status *s)
+{
+    return float16_minnum(float16_abs(a), float16_abs(b), s);
+}
+
+static inline float32 float32_minnuma(float32 a, float32 b, float_status *s)
+{
+    return float32_minnum(float32_abs(a), float32_abs(b), s);
+}
+
+DO_2OP_FP_ALL(vmaxnma, maxnuma)
+DO_2OP_FP_ALL(vminnma, minnuma)
+
+#define DO_VCADD_FP(OP, ESIZE, TYPE, FN0, FN1)                          \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, void *vm)           \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        TYPE r[16 / ESIZE];                                             \
+        uint16_t tm, mask = mve_element_mask(env);                      \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        /* Calculate all results first to avoid overwriting inputs */   \
+        for (e = 0, tm = mask; e < 16 / ESIZE; e++, tm >>= ESIZE) {     \
+            if ((tm & MAKE_64BIT_MASK(0, ESIZE)) == 0) {                \
+                r[e] = 0;                                               \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(tm & 1)) {                                            \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            if (!(e & 1)) {                                             \
+                r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)], fpst);   \
+            } else {                                                    \
+                r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)], fpst);   \
+            }                                                           \
+        }                                                               \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            mergemask(&d[H##ESIZE(e)], r[e], mask);                     \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VCADD_FP(vfcadd90h, 2, float16, float16_sub, float16_add)
+DO_VCADD_FP(vfcadd90s, 4, float32, float32_sub, float32_add)
+DO_VCADD_FP(vfcadd270h, 2, float16, float16_add, float16_sub)
+DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
+
+#define DO_VFMA(OP, ESIZE, TYPE, CHS)                                   \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, void *vm)           \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        TYPE r;                                                         \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = n[H##ESIZE(e)];                                         \
+            if (CHS) {                                                  \
+                r = TYPE##_chs(r);                                      \
+            }                                                           \
+            r = TYPE##_muladd(r, m[H##ESIZE(e)], d[H##ESIZE(e)],        \
+                              0, fpst);                                 \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VFMA(vfmah, 2, float16, false)
+DO_VFMA(vfmas, 4, float32, false)
+DO_VFMA(vfmsh, 2, float16, true)
+DO_VFMA(vfmss, 4, float32, true)
+
+#define DO_VCMLA(OP, ESIZE, TYPE, ROT, FN)                              \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, void *vm)           \
+    {                                                                   \
+        TYPE *d = vd, *n = vn, *m = vm;                                 \
+        TYPE r0, r1, e1, e2, e3, e4;                                    \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst0, *fpst1;                                    \
+        float_status scratch_fpst;                                      \
+        /* We loop through pairs of elements at a time */               \
+        for (e = 0; e < 16 / ESIZE; e += 2, mask >>= ESIZE * 2) {       \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) {          \
+                continue;                                               \
+            }                                                           \
+            fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :   \
+                &env->vfp.standard_fp_status;                           \
+            fpst1 = fpst0;                                              \
+            if (!(mask & 1)) {                                          \
+                scratch_fpst = *fpst0;                                  \
+                fpst0 = &scratch_fpst;                                  \
+            }                                                           \
+            if (!(mask & (1 << ESIZE))) {                               \
+                scratch_fpst = *fpst1;                                  \
+                fpst1 = &scratch_fpst;                                  \
+            }                                                           \
+            switch (ROT) {                                              \
+            case 0:                                                     \
+                e1 = m[H##ESIZE(e)];                                    \
+                e2 = n[H##ESIZE(e)];                                    \
+                e3 = m[H##ESIZE(e + 1)];                                \
+                e4 = n[H##ESIZE(e)];                                    \
+                break;                                                  \
+            case 1:                                                     \
+                e1 = TYPE##_chs(m[H##ESIZE(e + 1)]);                    \
+                e2 = n[H##ESIZE(e + 1)];                                \
+                e3 = m[H##ESIZE(e)];                                    \
+                e4 = n[H##ESIZE(e + 1)];                                \
+                break;                                                  \
+            case 2:                                                     \
+                e1 = TYPE##_chs(m[H##ESIZE(e)]);                        \
+                e2 = n[H##ESIZE(e)];                                    \
+                e3 = TYPE##_chs(m[H##ESIZE(e + 1)]);                    \
+                e4 = n[H##ESIZE(e)];                                    \
+                break;                                                  \
+            case 3:                                                     \
+                e1 = m[H##ESIZE(e + 1)];                                \
+                e2 = n[H##ESIZE(e + 1)];                                \
+                e3 = TYPE##_chs(m[H##ESIZE(e)]);                        \
+                e4 = n[H##ESIZE(e + 1)];                                \
+                break;                                                  \
+            default:                                                    \
+                g_assert_not_reached();                                 \
+            }                                                           \
+            r0 = FN(e2, e1, d[H##ESIZE(e)], fpst0);                     \
+            r1 = FN(e4, e3, d[H##ESIZE(e + 1)], fpst1);                 \
+            mergemask(&d[H##ESIZE(e)], r0, mask);                       \
+            mergemask(&d[H##ESIZE(e + 1)], r1, mask >> ESIZE);          \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCMULH(N, M, D, S) float16_mul(N, M, S)
+#define DO_VCMULS(N, M, D, S) float32_mul(N, M, S)
+
+#define DO_VCMLAH(N, M, D, S) float16_muladd(N, M, D, 0, S)
+#define DO_VCMLAS(N, M, D, S) float32_muladd(N, M, D, 0, S)
+
+DO_VCMLA(vcmul0h, 2, float16, 0, DO_VCMULH)
+DO_VCMLA(vcmul0s, 4, float32, 0, DO_VCMULS)
+DO_VCMLA(vcmul90h, 2, float16, 1, DO_VCMULH)
+DO_VCMLA(vcmul90s, 4, float32, 1, DO_VCMULS)
+DO_VCMLA(vcmul180h, 2, float16, 2, DO_VCMULH)
+DO_VCMLA(vcmul180s, 4, float32, 2, DO_VCMULS)
+DO_VCMLA(vcmul270h, 2, float16, 3, DO_VCMULH)
+DO_VCMLA(vcmul270s, 4, float32, 3, DO_VCMULS)
+
+DO_VCMLA(vcmla0h, 2, float16, 0, DO_VCMLAH)
+DO_VCMLA(vcmla0s, 4, float32, 0, DO_VCMLAS)
+DO_VCMLA(vcmla90h, 2, float16, 1, DO_VCMLAH)
+DO_VCMLA(vcmla90s, 4, float32, 1, DO_VCMLAS)
+DO_VCMLA(vcmla180h, 2, float16, 2, DO_VCMLAH)
+DO_VCMLA(vcmla180s, 4, float32, 2, DO_VCMLAS)
+DO_VCMLA(vcmla270h, 2, float16, 3, DO_VCMLAH)
+DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
+
+#define DO_2OP_FP_SCALAR(OP, ESIZE, TYPE, FN)                           \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, uint32_t rm)        \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE r, m = rm;                                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(n[H##ESIZE(e)], m, fpst);                            \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_2OP_FP_SCALAR_ALL(OP, FN)                    \
+    DO_2OP_FP_SCALAR(OP##h, 2, float16, float16_##FN)   \
+    DO_2OP_FP_SCALAR(OP##s, 4, float32, float32_##FN)
+
+DO_2OP_FP_SCALAR_ALL(vfadd_scalar, add)
+DO_2OP_FP_SCALAR_ALL(vfsub_scalar, sub)
+DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
+
+#define DO_2OP_FP_ACC_SCALAR(OP, ESIZE, TYPE, FN)                       \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vn, uint32_t rm)        \
+    {                                                                   \
+        TYPE *d = vd, *n = vn;                                          \
+        TYPE r, m = rm;                                                 \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(n[H##ESIZE(e)], m, d[H##ESIZE(e)], 0, fpst);         \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+/* VFMAS is vector * vector + scalar, so swap op2 and op3 */
+#define DO_VFMAS_SCALARH(N, M, D, F, S) float16_muladd(N, D, M, F, S)
+#define DO_VFMAS_SCALARS(N, M, D, F, S) float32_muladd(N, D, M, F, S)
+
+/* VFMA is vector * scalar + vector */
+DO_2OP_FP_ACC_SCALAR(vfma_scalarh, 2, float16, float16_muladd)
+DO_2OP_FP_ACC_SCALAR(vfma_scalars, 4, float32, float32_muladd)
+DO_2OP_FP_ACC_SCALAR(vfmas_scalarh, 2, float16, DO_VFMAS_SCALARH)
+DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
+
+/* Floating point max/min across vector. */
+#define DO_FP_VMAXMINV(OP, ESIZE, TYPE, ABS, FN)                \
+    uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+                                    uint32_t ra_in)             \
+    {                                                           \
+        uint16_t mask = mve_element_mask(env);                  \
+        unsigned e;                                             \
+        TYPE *m = vm;                                           \
+        TYPE ra = (TYPE)ra_in;                                  \
+        float_status *fpst = (ESIZE == 2) ?                     \
+            &env->vfp.standard_fp_status_f16 :                  \
+            &env->vfp.standard_fp_status;                       \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {      \
+            if (mask & 1) {                                     \
+                TYPE v = m[H##ESIZE(e)];                        \
+                if (TYPE##_is_signaling_nan(ra, fpst)) {        \
+                    ra = TYPE##_silence_nan(ra, fpst);          \
+                    float_raise(float_flag_invalid, fpst);      \
+                }                                               \
+                if (TYPE##_is_signaling_nan(v, fpst)) {         \
+                    v = TYPE##_silence_nan(v, fpst);            \
+                    float_raise(float_flag_invalid, fpst);      \
+                }                                               \
+                if (ABS) {                                      \
+                    v = TYPE##_abs(v);                          \
+                }                                               \
+                ra = FN(ra, v, fpst);                           \
+            }                                                   \
+        }                                                       \
+        mve_advance_vpt(env);                                   \
+        return ra;                                              \
+    }                                                           \
+
+#define NOP(X) (X)
+
+DO_FP_VMAXMINV(vmaxnmvh, 2, float16, false, float16_maxnum)
+DO_FP_VMAXMINV(vmaxnmvs, 4, float32, false, float32_maxnum)
+DO_FP_VMAXMINV(vminnmvh, 2, float16, false, float16_minnum)
+DO_FP_VMAXMINV(vminnmvs, 4, float32, false, float32_minnum)
+DO_FP_VMAXMINV(vmaxnmavh, 2, float16, true, float16_maxnum)
+DO_FP_VMAXMINV(vmaxnmavs, 4, float32, true, float32_maxnum)
+DO_FP_VMAXMINV(vminnmavh, 2, float16, true, float16_minnum)
+DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
+
+/* FP compares; note that all comparisons signal InvalidOp for QNaNs */
+#define DO_VCMP_FP(OP, ESIZE, TYPE, FN)                                 \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, void *vm)   \
+    {                                                                   \
+        TYPE *n = vn, *m = vm;                                          \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        uint16_t beatpred = 0;                                          \
+        uint16_t emask = MAKE_64BIT_MASK(0, ESIZE);                     \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        bool r;                                                         \
+        for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) {             \
+            if ((mask & emask) == 0) {                                  \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & (1 << (e * ESIZE)))) {                         \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst);               \
+            /* Comparison sets 0/1 bits for each byte in the element */ \
+            beatpred |= r * emask;                                      \
+        }                                                               \
+        beatpred &= mask;                                               \
+        env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) |           \
+            (beatpred & eci_mask);                                      \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCMP_FP_SCALAR(OP, ESIZE, TYPE, FN)                          \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn,             \
+                                uint32_t rm)                            \
+    {                                                                   \
+        TYPE *n = vn;                                                   \
+        uint16_t mask = mve_element_mask(env);                          \
+        uint16_t eci_mask = mve_eci_mask(env);                          \
+        uint16_t beatpred = 0;                                          \
+        uint16_t emask = MAKE_64BIT_MASK(0, ESIZE);                     \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        bool r;                                                         \
+        for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) {             \
+            if ((mask & emask) == 0) {                                  \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & (1 << (e * ESIZE)))) {                         \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(n[H##ESIZE(e)], (TYPE)rm, fpst);                     \
+            /* Comparison sets 0/1 bits for each byte in the element */ \
+            beatpred |= r * emask;                                      \
+        }                                                               \
+        beatpred &= mask;                                               \
+        env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) |           \
+            (beatpred & eci_mask);                                      \
+        mve_advance_vpt(env);                                           \
+    }
+
+#define DO_VCMP_FP_BOTH(VOP, SOP, ESIZE, TYPE, FN)      \
+    DO_VCMP_FP(VOP, ESIZE, TYPE, FN)                    \
+    DO_VCMP_FP_SCALAR(SOP, ESIZE, TYPE, FN)
+
+/*
+ * Some care is needed here to get the correct result for the unordered case.
+ * Architecturally EQ, GE and GT are defined to be false for unordered, but
+ * the NE, LT and LE comparisons are defined as simple logical inverses of
+ * EQ, GE and GT and so they must return true for unordered. The softfloat
+ * comparison functions float*_{eq,le,lt} all return false for unordered.
+ */
+#define DO_GE16(X, Y, S) float16_le(Y, X, S)
+#define DO_GE32(X, Y, S) float32_le(Y, X, S)
+#define DO_GT16(X, Y, S) float16_lt(Y, X, S)
+#define DO_GT32(X, Y, S) float32_lt(Y, X, S)
+
+DO_VCMP_FP_BOTH(vfcmpeqh, vfcmpeq_scalarh, 2, float16, float16_eq)
+DO_VCMP_FP_BOTH(vfcmpeqs, vfcmpeq_scalars, 4, float32, float32_eq)
+
+DO_VCMP_FP_BOTH(vfcmpneh, vfcmpne_scalarh, 2, float16, !float16_eq)
+DO_VCMP_FP_BOTH(vfcmpnes, vfcmpne_scalars, 4, float32, !float32_eq)
+
+DO_VCMP_FP_BOTH(vfcmpgeh, vfcmpge_scalarh, 2, float16, DO_GE16)
+DO_VCMP_FP_BOTH(vfcmpges, vfcmpge_scalars, 4, float32, DO_GE32)
+
+DO_VCMP_FP_BOTH(vfcmplth, vfcmplt_scalarh, 2, float16, !DO_GE16)
+DO_VCMP_FP_BOTH(vfcmplts, vfcmplt_scalars, 4, float32, !DO_GE32)
+
+DO_VCMP_FP_BOTH(vfcmpgth, vfcmpgt_scalarh, 2, float16, DO_GT16)
+DO_VCMP_FP_BOTH(vfcmpgts, vfcmpgt_scalars, 4, float32, DO_GT32)
+
+DO_VCMP_FP_BOTH(vfcmpleh, vfcmple_scalarh, 2, float16, !DO_GT16)
+DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
+
+#define DO_VCVT_FIXED(OP, ESIZE, TYPE, FN)                              \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm,   \
+                                uint32_t shift)                         \
+    {                                                                   \
+        TYPE *d = vd, *m = vm;                                          \
+        TYPE r;                                                         \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(m[H##ESIZE(e)], shift, fpst);                        \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VCVT_FIXED(vcvt_sh, 2, int16_t, helper_vfp_shtoh)
+DO_VCVT_FIXED(vcvt_uh, 2, uint16_t, helper_vfp_uhtoh)
+DO_VCVT_FIXED(vcvt_hs, 2, int16_t, helper_vfp_toshh_round_to_zero)
+DO_VCVT_FIXED(vcvt_hu, 2, uint16_t, helper_vfp_touhh_round_to_zero)
+DO_VCVT_FIXED(vcvt_sf, 4, int32_t, helper_vfp_sltos)
+DO_VCVT_FIXED(vcvt_uf, 4, uint32_t, helper_vfp_ultos)
+DO_VCVT_FIXED(vcvt_fs, 4, int32_t, helper_vfp_tosls_round_to_zero)
+DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
+
+/* VCVT with specified rmode */
+#define DO_VCVT_RMODE(OP, ESIZE, TYPE, FN)                              \
+    void HELPER(glue(mve_, OP))(CPUARMState *env,                       \
+                                void *vd, void *vm, uint32_t rmode)     \
+    {                                                                   \
+        TYPE *d = vd, *m = vm;                                          \
+        TYPE r;                                                         \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        float_status *base_fpst = (ESIZE == 2) ?                        \
+            &env->vfp.standard_fp_status_f16 :                          \
+            &env->vfp.standard_fp_status;                               \
+        uint32_t prev_rmode = get_float_rounding_mode(base_fpst);       \
+        set_float_rounding_mode(rmode, base_fpst);                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = base_fpst;                                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(m[H##ESIZE(e)], 0, fpst);                            \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        set_float_rounding_mode(prev_rmode, base_fpst);                 \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_VCVT_RMODE(vcvt_rm_sh, 2, uint16_t, helper_vfp_toshh)
+DO_VCVT_RMODE(vcvt_rm_uh, 2, uint16_t, helper_vfp_touhh)
+DO_VCVT_RMODE(vcvt_rm_ss, 4, uint32_t, helper_vfp_tosls)
+DO_VCVT_RMODE(vcvt_rm_us, 4, uint32_t, helper_vfp_touls)
+
+#define DO_VRINT_RM_H(M, F, S) helper_rinth(M, S)
+#define DO_VRINT_RM_S(M, F, S) helper_rints(M, S)
+
+DO_VCVT_RMODE(vrint_rm_h, 2, uint16_t, DO_VRINT_RM_H)
+DO_VCVT_RMODE(vrint_rm_s, 4, uint32_t, DO_VRINT_RM_S)
+
+/*
+ * VCVT between halfprec and singleprec. As usual for halfprec
+ * conversions, FZ16 is ignored and AHP is observed.
+ */
+static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
+{
+    uint16_t *d = vd;
+    uint32_t *m = vm;
+    uint16_t r;
+    uint16_t mask = mve_element_mask(env);
+    bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+    unsigned e;
+    float_status *fpst;
+    float_status scratch_fpst;
+    float_status *base_fpst = &env->vfp.standard_fp_status;
+    bool old_fz = get_flush_to_zero(base_fpst);
+    set_flush_to_zero(false, base_fpst);
+    for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+        if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+            continue;
+        }
+        fpst = base_fpst;
+        if (!(mask & 1)) {
+            /* We need the result but without updating flags */
+            scratch_fpst = *fpst;
+            fpst = &scratch_fpst;
+        }
+        r = float32_to_float16(m[H4(e)], ieee, fpst);
+        mergemask(&d[H2(e * 2 + top)], r, mask >> (top * 2));
+    }
+    set_flush_to_zero(old_fz, base_fpst);
+    mve_advance_vpt(env);
+}
+
+static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
+{
+    uint32_t *d = vd;
+    uint16_t *m = vm;
+    uint32_t r;
+    uint16_t mask = mve_element_mask(env);
+    bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+    unsigned e;
+    float_status *fpst;
+    float_status scratch_fpst;
+    float_status *base_fpst = &env->vfp.standard_fp_status;
+    bool old_fiz = get_flush_inputs_to_zero(base_fpst);
+    set_flush_inputs_to_zero(false, base_fpst);
+    for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+        if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+            continue;
+        }
+        fpst = base_fpst;
+        if (!(mask & (1 << (top * 2)))) {
+            /* We need the result but without updating flags */
+            scratch_fpst = *fpst;
+            fpst = &scratch_fpst;
+        }
+        r = float16_to_float32(m[H2(e * 2 + top)], ieee, fpst);
+        mergemask(&d[H4(e)], r, mask);
+    }
+    set_flush_inputs_to_zero(old_fiz, base_fpst);
+    mve_advance_vpt(env);
+}
+
+void HELPER(mve_vcvtb_sh)(CPUARMState *env, void *vd, void *vm)
+{
+    do_vcvt_sh(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_sh)(CPUARMState *env, void *vd, void *vm)
+{
+    do_vcvt_sh(env, vd, vm, 1);
+}
+void HELPER(mve_vcvtb_hs)(CPUARMState *env, void *vd, void *vm)
+{
+    do_vcvt_hs(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
+{
+    do_vcvt_hs(env, vd, vm, 1);
+}
+
+#define DO_1OP_FP(OP, ESIZE, TYPE, FN)                                  \
+    void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm)   \
+    {                                                                   \
+        TYPE *d = vd, *m = vm;                                          \
+        TYPE r;                                                         \
+        uint16_t mask = mve_element_mask(env);                          \
+        unsigned e;                                                     \
+        float_status *fpst;                                             \
+        float_status scratch_fpst;                                      \
+        for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {              \
+            if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) {              \
+                continue;                                               \
+            }                                                           \
+            fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 :    \
+                &env->vfp.standard_fp_status;                           \
+            if (!(mask & 1)) {                                          \
+                /* We need the result but without updating flags */     \
+                scratch_fpst = *fpst;                                   \
+                fpst = &scratch_fpst;                                   \
+            }                                                           \
+            r = FN(m[H##ESIZE(e)], fpst);                               \
+            mergemask(&d[H##ESIZE(e)], r, mask);                        \
+        }                                                               \
+        mve_advance_vpt(env);                                           \
+    }
+
+DO_1OP_FP(vrintx_h, 2, float16, float16_round_to_int)
+DO_1OP_FP(vrintx_s, 4, float32, float32_round_to_int)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index ec83f10ab35..fd3a01bfa0b 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -521,6 +521,7 @@ Vimm_1r          1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
     VRINTZ       1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
 
     VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
+    VCVT_B16_F32 1111 001 11 . 11 .. 10 .... 0 1100 1 . 0 .... @2misc_q0
 
     VRINTM       1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
 
diff --git a/target/arm/neon-ls.decode b/target/arm/neon-ls.decode
index c17f5019e31..c5f364cbc00 100644
--- a/target/arm/neon-ls.decode
+++ b/target/arm/neon-ls.decode
@@ -41,12 +41,12 @@ VLD_all_lanes  1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
                vd=%vd_dp
 
 # Neon load/store single structure to one lane
-%imm1_5_p1 5:1 !function=plus1
-%imm1_6_p1 6:1 !function=plus1
+%imm1_5_p1 5:1 !function=plus_1
+%imm1_6_p1 6:1 !function=plus_1
 
 VLDST_single   1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
                vd=%vd_dp size=0 stride=1
-VLDST_single   1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 align:2 rm:4 \
+VLDST_single   1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 . align:1 rm:4 \
                vd=%vd_dp size=1 stride=%imm1_5_p1
-VLDST_single   1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 align:3 rm:4 \
+VLDST_single   1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 . align:2 rm:4 \
                vd=%vd_dp size=2 stride=%imm1_6_p1
diff --git a/target/arm/neon-shared.decode b/target/arm/neon-shared.decode
index ca0c699072e..8e6bd0b61f0 100644
--- a/target/arm/neon-shared.decode
+++ b/target/arm/neon-shared.decode
@@ -38,7 +38,7 @@
 # which is 0 for fp16 and 1 for fp32 into a MO_* constant.
 # (Note that this is the reverse of the sense of the 1-bit size
 # field in the 3same_fp Neon insns.)
-%vcadd_size    20:1 !function=plus1
+%vcadd_size    20:1 !function=plus_1
 
 VCMLA          1111 110 rot:2 . 1 . .... .... 1000 . q:1 . 0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
@@ -46,8 +46,13 @@ VCMLA          1111 110 rot:2 . 1 . .... .... 1000 . q:1 . 0 .... \
 VCADD          1111 110 rot:1 1 . 0 . .... .... 1000 . q:1 . 0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
 
-# VUDOT and VSDOT
-VDOT           1111 110 00 . 10 .... .... 1101 . q:1 . u:1 .... \
+VSDOT          1111 110 00 . 10 .... .... 1101 . q:1 . 0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUDOT          1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUSDOT         1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VDOT_b16       1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
 # VFM[AS]L
@@ -56,13 +61,33 @@ VFML           1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
 VFML           1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
 
+VSMMLA         1111 1100 0.10 .... .... 1100 .1.0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUMMLA         1111 1100 0.10 .... .... 1100 .1.1 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUSMMLA        1111 1100 1.10 .... .... 1100 .1.0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VMMLA_b16      1111 1100 0.00 .... .... 1100 .1.0 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VFMA_b16       1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \
+               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
 VCMLA_scalar   1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
                vn=%vn_dp vd=%vd_dp size=1
 VCMLA_scalar   1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
                vm=%vm_dp vn=%vn_dp vd=%vd_dp size=2 index=0
 
-VDOT_scalar    1111 1110 0 . 10 .... .... 1101 . q:1 index:1 u:1 rm:4 \
-               vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VSDOT_scalar   1111 1110 0 . 10 .... .... 1101 . q:1 index:1 0 vm:4 \
+               vn=%vn_dp vd=%vd_dp
+VUDOT_scalar   1111 1110 0 . 10 .... .... 1101 . q:1 index:1 1 vm:4 \
+               vn=%vn_dp vd=%vd_dp
+VUSDOT_scalar  1111 1110 1 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
+               vn=%vn_dp vd=%vd_dp
+VSUDOT_scalar  1111 1110 1 . 00 .... .... 1101 . q:1 index:1 1 vm:4 \
+               vn=%vn_dp vd=%vd_dp
+VDOT_b16_scal  1111 1110 0 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
+               vn=%vn_dp vd=%vd_dp
 
 %vfml_scalar_q0_rm 0:3 5:1
 %vfml_scalar_q1_index 5:1 3:1
@@ -70,3 +95,5 @@ VFML_scalar    1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
                rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
 VFML_scalar    1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
                index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
+VFMA_b16_scal  1111 1110 0.11 .... .... 1000 . q:1 . 1 . vm:3 \
+               index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index b637265691a..338b9189d5b 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -11,6 +11,7 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
+#include "vec_internal.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
@@ -576,496 +577,154 @@ NEON_POP(pmax_s16, neon_s16, 2)
 NEON_POP(pmax_u16, neon_u16, 2)
 #undef NEON_FN
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
-        tmp <= -(ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp < 0) { \
-        dest = src1 >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
 NEON_VOP(shl_u16, neon_u16, 2)
 #undef NEON_FN
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (sizeof(src1) * 8 - 1); \
-    } else if (tmp < 0) { \
-        dest = src1 >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
 NEON_VOP(shl_s16, neon_s16, 2)
 #undef NEON_FN
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if ((tmp >= (ssize_t)sizeof(src1) * 8) \
-        || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
-        dest = 0; \
-    } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
 NEON_VOP(rshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
 NEON_VOP(rshl_s16, neon_s16, 2)
 #undef NEON_FN
 
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator.  */
-uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift)
 {
-    int32_t dest;
-    int32_t val = (int32_t)valop;
-    int8_t shift = (int8_t)shiftop;
-    if ((shift >= 32) || (shift <= -32)) {
-        dest = 0;
-    } else if (shift < 0) {
-        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
-        dest = big_dest >> -shift;
-    } else {
-        dest = val << shift;
-    }
-    return dest;
+    return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
 }
 
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values.  */
-uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
-{
-    int8_t shift = (int8_t)shiftop;
-    int64_t val = valop;
-    if ((shift >= 64) || (shift <= -64)) {
-        val = 0;
-    } else if (shift < 0) {
-        val >>= (-shift - 1);
-        if (val == INT64_MAX) {
-            /* In this case, it means that the rounding constant is 1,
-             * and the addition would overflow. Return the actual
-             * result directly.  */
-            val = 0x4000000000000000LL;
-        } else {
-            val++;
-            val >>= 1;
-        }
-    } else {
-        val <<= shift;
-    }
-    return val;
+uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift)
+{
+    return do_sqrshl_d(val, (int8_t)shift, true, NULL);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8 || \
-        tmp < -(ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (-tmp - 1); \
-    } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
 NEON_VOP(rshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
 NEON_VOP(rshl_u16, neon_u16, 2)
 #undef NEON_FN
 
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator.  */
-uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift)
 {
-    uint32_t dest;
-    int8_t shift = (int8_t)shiftop;
-    if (shift >= 32 || shift < -32) {
-        dest = 0;
-    } else if (shift == -32) {
-        dest = val >> 31;
-    } else if (shift < 0) {
-        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
-        dest = big_dest >> -shift;
-    } else {
-        dest = val << shift;
-    }
-    return dest;
+    return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
 }
 
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values.  */
-uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
-{
-    int8_t shift = (uint8_t)shiftop;
-    if (shift >= 64 || shift < -64) {
-        val = 0;
-    } else if (shift == -64) {
-        /* Rounding a 1-bit result just preserves that bit.  */
-        val >>= 63;
-    } else if (shift < 0) {
-        val >>= (-shift - 1);
-        if (val == UINT64_MAX) {
-            /* In this case, it means that the rounding constant is 1,
-             * and the addition would overflow. Return the actual
-             * result directly.  */
-            val = 0x8000000000000000ULL;
-        } else {
-            val++;
-            val >>= 1;
-        }
-    } else {
-        val <<= shift;
-    }
-    return val;
+uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift)
+{
+    return do_uqrshl_d(val, (int8_t)shift, true, NULL);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-        if (src1) { \
-            SET_QC(); \
-            dest = ~0; \
-        } else { \
-            dest = 0; \
-        } \
-    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp < 0) { \
-        dest = src1 >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-        if ((dest >> tmp) != src1) { \
-            SET_QC(); \
-            dest = ~0; \
-        } \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
 NEON_VOP_ENV(qshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
 NEON_VOP_ENV(qshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qshl_u32, neon_u32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
-    int8_t shift = (int8_t)shiftop;
-    if (shift >= 64) {
-        if (val) {
-            val = ~(uint64_t)0;
-            SET_QC();
-        }
-    } else if (shift <= -64) {
-        val = 0;
-    } else if (shift < 0) {
-        val >>= -shift;
-    } else {
-        uint64_t tmp = val;
-        val <<= shift;
-        if ((val >> shift) != tmp) {
-            SET_QC();
-            val = ~(uint64_t)0;
-        }
-    }
-    return val;
+uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+    return do_uqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-        if (src1) { \
-            SET_QC(); \
-            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
-            if (src1 > 0) { \
-                dest--; \
-            } \
-        } else { \
-            dest = src1; \
-        } \
-    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> 31; \
-    } else if (tmp < 0) { \
-        dest = src1 >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-        if ((dest >> tmp) != src1) { \
-            SET_QC(); \
-            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
-            if (src1 > 0) { \
-                dest--; \
-            } \
-        } \
-    }} while (0)
+uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+    return do_uqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
 NEON_VOP_ENV(qshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
 NEON_VOP_ENV(qshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qshl_s32, neon_s32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
-    int8_t shift = (uint8_t)shiftop;
-    int64_t val = valop;
-    if (shift >= 64) {
-        if (val) {
-            SET_QC();
-            val = (val >> 63) ^ ~SIGNBIT64;
-        }
-    } else if (shift <= -64) {
-        val >>= 63;
-    } else if (shift < 0) {
-        val >>= -shift;
-    } else {
-        int64_t tmp = val;
-        val <<= shift;
-        if ((val >> shift) != tmp) {
-            SET_QC();
-            val = (tmp >> 63) ^ ~SIGNBIT64;
-        }
-    }
-    return val;
+uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+    return do_sqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
-        SET_QC(); \
-        dest = 0; \
-    } else { \
-        int8_t tmp; \
-        tmp = (int8_t)src2; \
-        if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-            if (src1) { \
-                SET_QC(); \
-                dest = ~0; \
-            } else { \
-                dest = 0; \
-            } \
-        } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
-            dest = 0; \
-        } else if (tmp < 0) { \
-            dest = src1 >> -tmp; \
-        } else { \
-            dest = src1 << tmp; \
-            if ((dest >> tmp) != src1) { \
-                SET_QC(); \
-                dest = ~0; \
-            } \
-        } \
-    }} while (0)
-NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
-NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
+uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+    return do_sqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
 #undef NEON_FN
 
-uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
 {
-    if ((int32_t)valop < 0) {
-        SET_QC();
-        return 0;
-    }
-    return helper_neon_qshl_u32(env, valop, shiftop);
+    return do_suqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
 }
 
-uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
 {
-    if ((int64_t)valop < 0) {
-        SET_QC();
-        return 0;
-    }
-    return helper_neon_qshl_u64(env, valop, shiftop);
+    return do_suqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-        if (src1) { \
-            SET_QC(); \
-            dest = ~0; \
-        } else { \
-            dest = 0; \
-        } \
-    } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (sizeof(src1) * 8 - 1); \
-    } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-        if ((dest >> tmp) != src1) { \
-            SET_QC(); \
-            dest = ~0; \
-        } \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
 #undef NEON_FN
 
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator.  */
-uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
 {
-    uint32_t dest;
-    int8_t shift = (int8_t)shiftop;
-    if (shift >= 32) {
-        if (val) {
-            SET_QC();
-            dest = ~0;
-        } else {
-            dest = 0;
-        }
-    } else if (shift < -32) {
-        dest = 0;
-    } else if (shift == -32) {
-        dest = val >> 31;
-    } else if (shift < 0) {
-        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
-        dest = big_dest >> -shift;
-    } else {
-        dest = val << shift;
-        if ((dest >> shift) != val) {
-            SET_QC();
-            dest = ~0;
-        }
-    }
-    return dest;
+    return do_uqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
 }
 
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values.  */
-uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
-    int8_t shift = (int8_t)shiftop;
-    if (shift >= 64) {
-        if (val) {
-            SET_QC();
-            val = ~0;
-        }
-    } else if (shift < -64) {
-        val = 0;
-    } else if (shift == -64) {
-        val >>= 63;
-    } else if (shift < 0) {
-        val >>= (-shift - 1);
-        if (val == UINT64_MAX) {
-            /* In this case, it means that the rounding constant is 1,
-             * and the addition would overflow. Return the actual
-             * result directly.  */
-            val = 0x8000000000000000ULL;
-        } else {
-            val++;
-            val >>= 1;
-        }
-    } else { \
-        uint64_t tmp = val;
-        val <<= shift;
-        if ((val >> shift) != tmp) {
-            SET_QC();
-            val = ~0;
-        }
-    }
-    return val;
+uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+    return do_uqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
 }
 
-#define NEON_FN(dest, src1, src2) do { \
-    int8_t tmp; \
-    tmp = (int8_t)src2; \
-    if (tmp >= (ssize_t)sizeof(src1) * 8) { \
-        if (src1) { \
-            SET_QC(); \
-            dest = (typeof(dest))(1 << (sizeof(src1) * 8 - 1)); \
-            if (src1 > 0) { \
-                dest--; \
-            } \
-        } else { \
-            dest = 0; \
-        } \
-    } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
-        dest = 0; \
-    } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
-    } else { \
-        dest = src1 << tmp; \
-        if ((dest >> tmp) != src1) { \
-            SET_QC(); \
-            dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
-            if (src1 > 0) { \
-                dest--; \
-            } \
-        } \
-    }} while (0)
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+    (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
 #undef NEON_FN
 
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator.  */
-uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
 {
-    int32_t dest;
-    int32_t val = (int32_t)valop;
-    int8_t shift = (int8_t)shiftop;
-    if (shift >= 32) {
-        if (val) {
-            SET_QC();
-            dest = (val >> 31) ^ ~SIGNBIT;
-        } else {
-            dest = 0;
-        }
-    } else if (shift <= -32) {
-        dest = 0;
-    } else if (shift < 0) {
-        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
-        dest = big_dest >> -shift;
-    } else {
-        dest = val << shift;
-        if ((dest >> shift) != val) {
-            SET_QC();
-            dest = (val >> 31) ^ ~SIGNBIT;
-        }
-    }
-    return dest;
+    return do_sqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
 }
 
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values.  */
-uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
-    int8_t shift = (uint8_t)shiftop;
-    int64_t val = valop;
-
-    if (shift >= 64) {
-        if (val) {
-            SET_QC();
-            val = (val >> 63) ^ ~SIGNBIT64;
-        }
-    } else if (shift <= -64) {
-        val = 0;
-    } else if (shift < 0) {
-        val >>= (-shift - 1);
-        if (val == INT64_MAX) {
-            /* In this case, it means that the rounding constant is 1,
-             * and the addition would overflow. Return the actual
-             * result directly.  */
-            val = 0x4000000000000000ULL;
-        } else {
-            val++;
-            val >>= 1;
-        }
-    } else {
-        int64_t tmp = val;
-        val <<= shift;
-        if ((val >> shift) != tmp) {
-            SET_QC();
-            val = (tmp >> 63) ^ ~SIGNBIT64;
-        }
-    }
-    return val;
+uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+    return do_sqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
 }
 
 uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 99452d7a3bd..624482f665f 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -17,7 +17,6 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
@@ -28,8 +27,8 @@
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp,
-                                    uint32_t syndrome, uint32_t target_el)
+void raise_exception(CPUARMState *env, uint32_t excp,
+                     uint32_t syndrome, uint32_t target_el)
 {
     CPUState *cs = env_cpu(env);
 
@@ -50,22 +49,21 @@ static CPUState *do_raise_exception(CPUARMState *env, uint32_t excp,
     cs->exception_index = excp;
     env->exception.syndrome = syndrome;
     env->exception.target_el = target_el;
-
-    return cs;
-}
-
-void raise_exception(CPUARMState *env, uint32_t excp,
-                     uint32_t syndrome, uint32_t target_el)
-{
-    CPUState *cs = do_raise_exception(env, excp, syndrome, target_el);
     cpu_loop_exit(cs);
 }
 
 void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
                         uint32_t target_el, uintptr_t ra)
 {
-    CPUState *cs = do_raise_exception(env, excp, syndrome, target_el);
-    cpu_loop_exit_restore(cs, ra);
+    CPUState *cs = env_cpu(env);
+
+    /*
+     * restore_state_to_opc() will set env->exception.syndrome, so
+     * we must restore CPU state here before setting the syndrome
+     * the caller passed us, and cannot use cpu_loop_exit_restore().
+     */
+    cpu_restore_state(cs, ra, true);
+    raise_exception(env, excp, syndrome, target_el);
 }
 
 uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
@@ -97,15 +95,12 @@ void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
      * raising an exception if the limit is breached.
      */
     if (newvalue < v7m_sp_limit(env)) {
-        CPUState *cs = env_cpu(env);
-
         /*
          * Stack limit exceptions are a rare case, so rather than syncing
-         * PC/condbits before the call, we use cpu_restore_state() to
-         * get them right before raising the exception.
+         * PC/condbits before the call, we use raise_exception_ra() so
+         * that cpu_restore_state() will sort them out.
          */
-        cpu_restore_state(cs, GETPC(), true);
-        raise_exception(env, EXCP_STKOF, 0, 1);
+        raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
     }
 }
 
@@ -229,6 +224,23 @@ void HELPER(setend)(CPUARMState *env)
     arm_rebuild_hflags(env);
 }
 
+void HELPER(check_bxj_trap)(CPUARMState *env, uint32_t rm)
+{
+    /*
+     * Only called if in NS EL0 or EL1 for a BXJ for a v7A CPU;
+     * check if HSTR.TJDBX means we need to trap to EL2.
+     */
+    if (env->cp15.hstr_el2 & HSTR_TJDBX) {
+        /*
+         * We know the condition code check passed, so take the IMPDEF
+         * choice to always report CV=1 COND 0xe
+         */
+        uint32_t syn = syn_bxjtrap(1, 0xe, rm);
+        raise_exception_ra(env, EXCP_HYP_TRAP, syn, 2, GETPC());
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
 /* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
  * The function returns the target EL (1-3) if the instruction is to be trapped;
  * otherwise it returns 0 indicating it is not trapped.
@@ -283,9 +295,21 @@ static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
 
     return 0;
 }
+#endif
 
 void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
 {
+#ifdef CONFIG_USER_ONLY
+    /*
+     * WFI in the user-mode emulator is technically permitted but not
+     * something any real-world code would do. AArch64 Linux kernels
+     * trap it via SCTRL_EL1.nTWI and make it an (expensive) NOP;
+     * AArch32 kernels don't trap it so it will delay a bit.
+     * For QEMU, make it NOP here, because trying to raise EXCP_HLT
+     * would trigger an abort.
+     */
+    return;
+#else
     CPUState *cs = env_cpu(env);
     int target_el = check_wfx_trap(env, false);
 
@@ -298,7 +322,7 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
 
     if (target_el) {
         if (env->aarch64) {
-            increment_aarch_reg(&env->pc, -insn_len);
+            increment_aarch_reg(&env->pc, -(target_ulong)insn_len);
         } else {
             env->regs[15] -= insn_len;
         }
@@ -310,6 +334,7 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
     cs->exception_index = EXCP_HLT;
     cs->halted = 1;
     cpu_loop_exit(cs);
+#endif
 }
 
 void HELPER(wfe)(CPUARMState *env)
@@ -453,6 +478,7 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
         env->usr_regs[regno - 8] = val;
     } else {
         env->regs[regno] = val;
+        qemu_log_instr_reg(env, arm32_regnames[regno], val);
     }
 }
 
diff --git a/target/arm/psci.c b/target/arm/psci.c
index 5574a21ffbf..750abe1c293 100644
--- a/target/arm/psci.c
+++ b/target/arm/psci.c
@@ -27,13 +27,15 @@
 
 bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
 {
-    /*
-     * Return true if the exception type matches the configured PSCI conduit.
-     * This is called before the SMC/HVC instruction is executed, to decide
-     * whether we should treat it as a PSCI call or with the architecturally
+    /* Return true if the r0/x0 value indicates a PSCI call and
+     * the exception type matches the configured PSCI conduit. This is
+     * called before the SMC/HVC instruction is executed, to decide whether
+     * we should treat it as a PSCI call or with the architecturally
      * defined behaviour for an SMC or HVC (which might be UNDEF or trap
      * to EL2 or to EL3).
      */
+    CPUARMState *env = &cpu->env;
+    uint64_t param = is_a64(env) ? arm_get_xreg(env, 0) : env->regs[0];
 
     switch (excp_type) {
     case EXCP_HVC:
@@ -50,7 +52,31 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
         return false;
     }
 
-    return true;
+    switch (param & QEMU_SMCCC_FN_ID_MASK) {
+    case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+    case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+    case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+    case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+    case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+    case QEMU_PSCI_0_1_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN_CPU_ON:
+    case QEMU_PSCI_0_2_FN64_CPU_ON:
+    case QEMU_PSCI_0_1_FN_CPU_OFF:
+    case QEMU_PSCI_0_2_FN_CPU_OFF:
+    case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+    case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+    case QEMU_PSCI_0_1_FN_MIGRATE:
+    case QEMU_PSCI_0_2_FN_MIGRATE:
+    /*
+     * Allow for DEN0115A PCI_Config_Access 1.0 probing
+     */
+    case QEMU_SMCCC_PCI_CONFIG_FN_VERSION:
+        return true;
+    default:
+        return false;
+    }
 }
 
 void arm_handle_psci_call(ARMCPU *cpu)
@@ -68,6 +94,7 @@ void arm_handle_psci_call(ARMCPU *cpu)
     uint64_t context_id, mpidr;
     target_ulong entry;
     int32_t ret = 0;
+    uint32_t psci_fn;
     int i;
 
     for (i = 0; i < 4; i++) {
@@ -78,13 +105,19 @@ void arm_handle_psci_call(ARMCPU *cpu)
          */
         param[i] = is_a64(env) ? arm_get_xreg(env, i) : env->regs[i];
     }
+    /*
+     * XXX-AM:
+     * DEN0028D mandates that implementation ignore the top 32bits from
+     * FN identifiers.
+     */
+    psci_fn = param[0] & QEMU_SMCCC_FN_ID_MASK;
 
-    if ((param[0] & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) {
+    if ((psci_fn & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) {
         ret = QEMU_PSCI_RET_INVALID_PARAMS;
         goto err;
     }
 
-    switch (param[0]) {
+    switch (psci_fn) {
         CPUState *target_cpu_state;
         ARMCPU *target_cpu;
 
@@ -172,9 +205,13 @@ void arm_handle_psci_call(ARMCPU *cpu)
         break;
     case QEMU_PSCI_0_1_FN_MIGRATE:
     case QEMU_PSCI_0_2_FN_MIGRATE:
-    default:
         ret = QEMU_PSCI_RET_NOT_SUPPORTED;
         break;
+    case QEMU_SMCCC_PCI_CONFIG_FN_VERSION:
+        ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+        break;
+    default:
+        g_assert_not_reached();
     }
 
 err:
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 5c906033588..c60b9f0fec5 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -22,7 +22,7 @@
 ###########################################################################
 # Named fields.  These are primarily for disjoint fields.
 
-%imm4_16_p1     16:4 !function=plus1
+%imm4_16_p1     16:4 !function=plus_1
 %imm6_22_5      22:1 5:5
 %imm7_22_16     22:2 16:5
 %imm8_16_10     16:5 10:3
@@ -30,6 +30,8 @@
 %size_23        23:2
 %dtype_23_13    23:2 13:2
 %index3_22_19   22:1 19:2
+%index3_19_11   19:2 11:1
+%index2_20_11   20:1 11:1
 
 # A combination of tsz:imm3 -- extract esize.
 %tszimm_esz     22:2 5:5 !function=tszimm_esz
@@ -65,11 +67,15 @@
 &rr_dbm         rd rn dbm
 &rrri           rd rn rm imm
 &rri_esz        rd rn imm esz
+&rrri_esz       rd rn rm imm esz
 &rrr_esz        rd rn rm esz
+&rrx_esz        rd rn rm index esz
 &rpr_esz        rd pg rn esz
 &rpr_s          rd pg rn s
 &rprr_s         rd pg rn rm s
 &rprr_esz       rd pg rn rm esz
+&rrrr_esz       rd ra rn rm esz
+&rrxr_esz       rd rn rm ra index esz
 &rprrr_esz      rd pg rn rm ra esz
 &rpri_esz       rd pg rn imm esz
 &ptrue          rd esz pat s
@@ -112,6 +118,8 @@
 @pd_pn_pm       ........ esz:2 .. rm:4 ....... rn:4 . rd:4      &rrr_esz
 @rdn_rm         ........ esz:2 ...... ...... rm:5 rd:5 \
                 &rrr_esz rn=%reg_movprfx
+@rdn_rm_e0      ........ .. ...... ...... rm:5 rd:5 \
+                &rrr_esz rn=%reg_movprfx esz=0
 @rdn_sh_i8u     ........ esz:2 ...... ...... ..... rd:5 \
                 &rri_esz rn=%reg_movprfx imm=%sh8_i8u
 @rdn_i8u        ........ esz:2 ...... ... imm:8 rd:5 \
@@ -119,6 +127,16 @@
 @rdn_i8s        ........ esz:2 ...... ... imm:s8 rd:5 \
                 &rri_esz rn=%reg_movprfx
 
+# Four operand, vector element size
+@rda_rn_rm      ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
+                &rrrr_esz ra=%reg_movprfx
+
+# Four operand with unused vector element size
+@rda_rn_rm_e0   ........ ... rm:5 ... ... rn:5 rd:5 \
+                &rrrr_esz esz=0 ra=%reg_movprfx
+@rdn_ra_rm_e0   ........ ... rm:5 ... ... ra:5 rd:5 \
+                &rrrr_esz esz=0 rn=%reg_movprfx
+
 # Three operand with "memory" size, aka immediate left shift
 @rd_rn_msz_rm   ........ ... rm:5 .... imm:2 rn:5 rd:5          &rrri
 
@@ -137,6 +155,7 @@
                 &rprrr_esz rn=%reg_movprfx
 @rdn_pg_rm_ra   ........ esz:2 . ra:5  ... pg:3 rm:5 rd:5 \
                 &rprrr_esz rn=%reg_movprfx
+@rd_pg_rn_rm   ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5       &rprr_esz
 
 # One register operand, with governing predicate, vector element size
 @rd_pg_rn       ........ esz:2 ... ... ... pg:3 rn:5 rd:5       &rpr_esz
@@ -234,6 +253,32 @@
 @rpri_scatter_store ....... msz:2 ..    imm:5 ... pg:3 rn:5 rd:5 \
                     &rpri_scatter_store
 
+# Two registers and a scalar by N-bit index
+@rrx_3          ........ .. . ..      rm:3 ...... rn:5 rd:5 \
+                &rrx_esz index=%index3_22_19
+@rrx_2          ........ .. . index:2 rm:3 ...... rn:5 rd:5  &rrx_esz
+@rrx_1          ........ .. . index:1 rm:4 ...... rn:5 rd:5  &rrx_esz
+
+# Two registers and a scalar by N-bit index, alternate
+@rrx_3a         ........ .. . .. rm:3 ...... rn:5 rd:5 \
+                &rrx_esz index=%index3_19_11
+@rrx_2a         ........ .. . .  rm:4 ...... rn:5 rd:5 \
+                &rrx_esz index=%index2_20_11
+
+# Three registers and a scalar by N-bit index
+@rrxr_3         ........ .. . ..      rm:3 ...... rn:5 rd:5 \
+                &rrxr_esz ra=%reg_movprfx index=%index3_22_19
+@rrxr_2         ........ .. . index:2 rm:3 ...... rn:5 rd:5 \
+                &rrxr_esz ra=%reg_movprfx
+@rrxr_1         ........ .. . index:1 rm:4 ...... rn:5 rd:5 \
+                &rrxr_esz ra=%reg_movprfx
+
+# Three registers and a scalar by N-bit index, alternate
+@rrxr_3a        ........ .. ... rm:3 ...... rn:5 rd:5 \
+                &rrxr_esz ra=%reg_movprfx index=%index3_19_11
+@rrxr_2a        ........ .. ..  rm:4 ...... rn:5 rd:5 \
+                &rrxr_esz ra=%reg_movprfx index=%index2_20_11
+
 ###########################################################################
 # Instruction patterns.  Grouped according to the SVE encodingindex.xhtml.
 
@@ -297,6 +342,11 @@ ASR_zpzi        00000100 .. 000 000 100 ... .. ... .....  @rdn_pg_tszimm_shr
 LSR_zpzi        00000100 .. 000 001 100 ... .. ... .....  @rdn_pg_tszimm_shr
 LSL_zpzi        00000100 .. 000 011 100 ... .. ... .....  @rdn_pg_tszimm_shl
 ASRD            00000100 .. 000 100 100 ... .. ... .....  @rdn_pg_tszimm_shr
+SQSHL_zpzi      00000100 .. 000 110 100 ... .. ... .....  @rdn_pg_tszimm_shl
+UQSHL_zpzi      00000100 .. 000 111 100 ... .. ... .....  @rdn_pg_tszimm_shl
+SRSHR           00000100 .. 001 100 100 ... .. ... .....  @rdn_pg_tszimm_shr
+URSHR           00000100 .. 001 101 100 ... .. ... .....  @rdn_pg_tszimm_shr
+SQSHLU          00000100 .. 001 111 100 ... .. ... .....  @rdn_pg_tszimm_shl
 
 # SVE bitwise shift by vector (predicated)
 ASR_zpzz        00000100 .. 010 000 100 ... ..... .....   @rdn_pg_rm
@@ -374,6 +424,17 @@ ORR_zzz         00000100 01 1 ..... 001 100 ..... .....         @rd_rn_rm_e0
 EOR_zzz         00000100 10 1 ..... 001 100 ..... .....         @rd_rn_rm_e0
 BIC_zzz         00000100 11 1 ..... 001 100 ..... .....         @rd_rn_rm_e0
 
+XAR             00000100 .. 1 ..... 001 101 rm:5  rd:5   &rrri_esz \
+                rn=%reg_movprfx esz=%tszimm16_esz imm=%tszimm16_shr
+
+# SVE2 bitwise ternary operations
+EOR3            00000100 00 1 ..... 001 110 ..... .....         @rdn_ra_rm_e0
+BSL             00000100 00 1 ..... 001 111 ..... .....         @rdn_ra_rm_e0
+BCAX            00000100 01 1 ..... 001 110 ..... .....         @rdn_ra_rm_e0
+BSL1N           00000100 01 1 ..... 001 111 ..... .....         @rdn_ra_rm_e0
+BSL2N           00000100 10 1 ..... 001 111 ..... .....         @rdn_ra_rm_e0
+NBSL            00000100 11 1 ..... 001 111 ..... .....         @rdn_ra_rm_e0
+
 ### SVE Index Generation Group
 
 # SVE index generation (immediate start, immediate increment)
@@ -472,10 +533,14 @@ CPY_z_i         00000101 .. 01 .... 00 . ........ .....   @rdn_pg4 imm=%sh8_i8s
 
 ### SVE Permute - Extract Group
 
-# SVE extract vector (immediate offset)
+# SVE extract vector (destructive)
 EXT             00000101 001 ..... 000 ... rm:5 rd:5 \
                 &rrri rn=%reg_movprfx imm=%imm8_16_10
 
+# SVE2 extract vector (constructive)
+EXT_sve2        00000101 011 ..... 000 ... rn:5 rd:5 \
+                &rri imm=%imm8_16_10
+
 ### SVE Permute - Unpredicated Group
 
 # SVE broadcast general register
@@ -500,6 +565,11 @@ TBL             00000101 .. 1 ..... 001100 ..... .....          @rd_rn_rm
 # SVE unpack vector elements
 UNPK            00000101 esz:2 1100 u:1 h:1 001110 rn:5 rd:5
 
+# SVE2 Table Lookup (three sources)
+
+TBL_sve2        00000101 .. 1 ..... 001010 ..... .....          @rd_rn_rm
+TBX             00000101 .. 1 ..... 001011 ..... .....          @rd_rn_rm
+
 ### SVE Permute - Predicates Group
 
 # SVE permute predicate elements
@@ -527,6 +597,14 @@ UZP2_z          00000101 .. 1 ..... 011 011 ..... .....         @rd_rn_rm
 TRN1_z          00000101 .. 1 ..... 011 100 ..... .....         @rd_rn_rm
 TRN2_z          00000101 .. 1 ..... 011 101 ..... .....         @rd_rn_rm
 
+# SVE2 permute vector segments
+ZIP1_q          00000101 10 1 ..... 000 000 ..... .....         @rd_rn_rm_e0
+ZIP2_q          00000101 10 1 ..... 000 001 ..... .....         @rd_rn_rm_e0
+UZP1_q          00000101 10 1 ..... 000 010 ..... .....         @rd_rn_rm_e0
+UZP2_q          00000101 10 1 ..... 000 011 ..... .....         @rd_rn_rm_e0
+TRN1_q          00000101 10 1 ..... 000 110 ..... .....         @rd_rn_rm_e0
+TRN2_q          00000101 10 1 ..... 000 111 ..... .....         @rd_rn_rm_e0
+
 ### SVE Permute - Predicated Group
 
 # SVE compress active elements
@@ -566,9 +644,12 @@ REVH            00000101 .. 1001 01 100 ... ..... .....         @rd_pg_rn
 REVW            00000101 .. 1001 10 100 ... ..... .....         @rd_pg_rn
 RBIT            00000101 .. 1001 11 100 ... ..... .....         @rd_pg_rn
 
-# SVE vector splice (predicated)
+# SVE vector splice (predicated, destructive)
 SPLICE          00000101 .. 101 100 100 ... ..... .....         @rdn_pg_rm
 
+# SVE2 vector splice (predicated, constructive)
+SPLICE_sve2     00000101 .. 101 101 100 ... ..... .....         @rd_pg_rn
+
 ### SVE Select Vectors Group
 
 # SVE select vector elements (predicated)
@@ -695,7 +776,10 @@ SINCDECP_z      00100101 .. 1010 d:1 u:1 10000 00 .... .....    @incdec2_pred
 CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
 
 # SVE integer compare scalar count and limit
-WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
+WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
+
+# SVE2 pointer conflict compare
+WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
 
 ### SVE Integer Wide Immediate - Unpredicated Group
 
@@ -724,13 +808,114 @@ UMIN_zzi        00100101 .. 101 011 110 ........ .....          @rdn_i8u
 MUL_zzi         00100101 .. 110 000 110 ........ .....          @rdn_i8s
 
 # SVE integer dot product (unpredicated)
-DOT_zzz         01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5      ra=%reg_movprfx
+DOT_zzzz        01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5 \
+                ra=%reg_movprfx
+
+# SVE2 complex dot product (vectors)
+CDOT_zzzz       01000100 esz:2 0 rm:5 0001 rot:2 rn:5 rd:5  ra=%reg_movprfx
+
+#### SVE Multiply - Indexed
 
 # SVE integer dot product (indexed)
-DOT_zzx         01000100 101 index:2 rm:3 00000 u:1 rn:5 rd:5 \
-                sz=0 ra=%reg_movprfx
-DOT_zzx         01000100 111 index:1 rm:4 00000 u:1 rn:5 rd:5 \
-                sz=1 ra=%reg_movprfx
+SDOT_zzxw_s     01000100 10 1 ..... 000000 ..... .....   @rrxr_2 esz=2
+SDOT_zzxw_d     01000100 11 1 ..... 000000 ..... .....   @rrxr_1 esz=3
+UDOT_zzxw_s     01000100 10 1 ..... 000001 ..... .....   @rrxr_2 esz=2
+UDOT_zzxw_d     01000100 11 1 ..... 000001 ..... .....   @rrxr_1 esz=3
+
+# SVE2 integer multiply-add (indexed)
+MLA_zzxz_h      01000100 0. 1 ..... 000010 ..... .....   @rrxr_3 esz=1
+MLA_zzxz_s      01000100 10 1 ..... 000010 ..... .....   @rrxr_2 esz=2
+MLA_zzxz_d      01000100 11 1 ..... 000010 ..... .....   @rrxr_1 esz=3
+MLS_zzxz_h      01000100 0. 1 ..... 000011 ..... .....   @rrxr_3 esz=1
+MLS_zzxz_s      01000100 10 1 ..... 000011 ..... .....   @rrxr_2 esz=2
+MLS_zzxz_d      01000100 11 1 ..... 000011 ..... .....   @rrxr_1 esz=3
+
+# SVE2 saturating multiply-add high (indexed)
+SQRDMLAH_zzxz_h 01000100 0. 1 ..... 000100 ..... .....   @rrxr_3 esz=1
+SQRDMLAH_zzxz_s 01000100 10 1 ..... 000100 ..... .....   @rrxr_2 esz=2
+SQRDMLAH_zzxz_d 01000100 11 1 ..... 000100 ..... .....   @rrxr_1 esz=3
+SQRDMLSH_zzxz_h 01000100 0. 1 ..... 000101 ..... .....   @rrxr_3 esz=1
+SQRDMLSH_zzxz_s 01000100 10 1 ..... 000101 ..... .....   @rrxr_2 esz=2
+SQRDMLSH_zzxz_d 01000100 11 1 ..... 000101 ..... .....   @rrxr_1 esz=3
+
+# SVE mixed sign dot product (indexed)
+USDOT_zzxw_s    01000100 10 1 ..... 000110 ..... .....   @rrxr_2 esz=2
+SUDOT_zzxw_s    01000100 10 1 ..... 000111 ..... .....   @rrxr_2 esz=2
+
+# SVE2 saturating multiply-add (indexed)
+SQDMLALB_zzxw_s 01000100 10 1 ..... 0010.0 ..... .....   @rrxr_3a esz=2
+SQDMLALB_zzxw_d 01000100 11 1 ..... 0010.0 ..... .....   @rrxr_2a esz=3
+SQDMLALT_zzxw_s 01000100 10 1 ..... 0010.1 ..... .....   @rrxr_3a esz=2
+SQDMLALT_zzxw_d 01000100 11 1 ..... 0010.1 ..... .....   @rrxr_2a esz=3
+SQDMLSLB_zzxw_s 01000100 10 1 ..... 0011.0 ..... .....   @rrxr_3a esz=2
+SQDMLSLB_zzxw_d 01000100 11 1 ..... 0011.0 ..... .....   @rrxr_2a esz=3
+SQDMLSLT_zzxw_s 01000100 10 1 ..... 0011.1 ..... .....   @rrxr_3a esz=2
+SQDMLSLT_zzxw_d 01000100 11 1 ..... 0011.1 ..... .....   @rrxr_2a esz=3
+
+# SVE2 complex integer dot product (indexed)
+CDOT_zzxw_s     01000100 10 1 index:2 rm:3 0100 rot:2 rn:5 rd:5 \
+                ra=%reg_movprfx
+CDOT_zzxw_d     01000100 11 1 index:1 rm:4 0100 rot:2 rn:5 rd:5 \
+                ra=%reg_movprfx
+
+# SVE2 complex integer multiply-add (indexed)
+CMLA_zzxz_h     01000100 10 1 index:2 rm:3 0110 rot:2 rn:5 rd:5 \
+                ra=%reg_movprfx
+CMLA_zzxz_s     01000100 11 1 index:1 rm:4 0110 rot:2 rn:5 rd:5 \
+                ra=%reg_movprfx
+
+# SVE2 complex saturating integer multiply-add (indexed)
+SQRDCMLAH_zzxz_h  01000100 10 1 index:2 rm:3 0111 rot:2 rn:5 rd:5 \
+                  ra=%reg_movprfx
+SQRDCMLAH_zzxz_s  01000100 11 1 index:1 rm:4 0111 rot:2 rn:5 rd:5 \
+                  ra=%reg_movprfx
+
+# SVE2 multiply-add long (indexed)
+SMLALB_zzxw_s   01000100 10 1 ..... 1000.0 ..... .....   @rrxr_3a esz=2
+SMLALB_zzxw_d   01000100 11 1 ..... 1000.0 ..... .....   @rrxr_2a esz=3
+SMLALT_zzxw_s   01000100 10 1 ..... 1000.1 ..... .....   @rrxr_3a esz=2
+SMLALT_zzxw_d   01000100 11 1 ..... 1000.1 ..... .....   @rrxr_2a esz=3
+UMLALB_zzxw_s   01000100 10 1 ..... 1001.0 ..... .....   @rrxr_3a esz=2
+UMLALB_zzxw_d   01000100 11 1 ..... 1001.0 ..... .....   @rrxr_2a esz=3
+UMLALT_zzxw_s   01000100 10 1 ..... 1001.1 ..... .....   @rrxr_3a esz=2
+UMLALT_zzxw_d   01000100 11 1 ..... 1001.1 ..... .....   @rrxr_2a esz=3
+SMLSLB_zzxw_s   01000100 10 1 ..... 1010.0 ..... .....   @rrxr_3a esz=2
+SMLSLB_zzxw_d   01000100 11 1 ..... 1010.0 ..... .....   @rrxr_2a esz=3
+SMLSLT_zzxw_s   01000100 10 1 ..... 1010.1 ..... .....   @rrxr_3a esz=2
+SMLSLT_zzxw_d   01000100 11 1 ..... 1010.1 ..... .....   @rrxr_2a esz=3
+UMLSLB_zzxw_s   01000100 10 1 ..... 1011.0 ..... .....   @rrxr_3a esz=2
+UMLSLB_zzxw_d   01000100 11 1 ..... 1011.0 ..... .....   @rrxr_2a esz=3
+UMLSLT_zzxw_s   01000100 10 1 ..... 1011.1 ..... .....   @rrxr_3a esz=2
+UMLSLT_zzxw_d   01000100 11 1 ..... 1011.1 ..... .....   @rrxr_2a esz=3
+
+# SVE2 integer multiply long (indexed)
+SMULLB_zzx_s    01000100 10 1 ..... 1100.0 ..... .....   @rrx_3a esz=2
+SMULLB_zzx_d    01000100 11 1 ..... 1100.0 ..... .....   @rrx_2a esz=3
+SMULLT_zzx_s    01000100 10 1 ..... 1100.1 ..... .....   @rrx_3a esz=2
+SMULLT_zzx_d    01000100 11 1 ..... 1100.1 ..... .....   @rrx_2a esz=3
+UMULLB_zzx_s    01000100 10 1 ..... 1101.0 ..... .....   @rrx_3a esz=2
+UMULLB_zzx_d    01000100 11 1 ..... 1101.0 ..... .....   @rrx_2a esz=3
+UMULLT_zzx_s    01000100 10 1 ..... 1101.1 ..... .....   @rrx_3a esz=2
+UMULLT_zzx_d    01000100 11 1 ..... 1101.1 ..... .....   @rrx_2a esz=3
+
+# SVE2 saturating multiply (indexed)
+SQDMULLB_zzx_s  01000100 10 1 ..... 1110.0 ..... .....   @rrx_3a esz=2
+SQDMULLB_zzx_d  01000100 11 1 ..... 1110.0 ..... .....   @rrx_2a esz=3
+SQDMULLT_zzx_s  01000100 10 1 ..... 1110.1 ..... .....   @rrx_3a esz=2
+SQDMULLT_zzx_d  01000100 11 1 ..... 1110.1 ..... .....   @rrx_2a esz=3
+
+# SVE2 saturating multiply high (indexed)
+SQDMULH_zzx_h   01000100 0. 1 ..... 111100 ..... .....   @rrx_3 esz=1
+SQDMULH_zzx_s   01000100 10 1 ..... 111100 ..... .....   @rrx_2 esz=2
+SQDMULH_zzx_d   01000100 11 1 ..... 111100 ..... .....   @rrx_1 esz=3
+SQRDMULH_zzx_h  01000100 0. 1 ..... 111101 ..... .....   @rrx_3 esz=1
+SQRDMULH_zzx_s  01000100 10 1 ..... 111101 ..... .....   @rrx_2 esz=2
+SQRDMULH_zzx_d  01000100 11 1 ..... 111101 ..... .....   @rrx_1 esz=3
+
+# SVE2 integer multiply (indexed)
+MUL_zzx_h       01000100 0. 1 ..... 111110 ..... .....   @rrx_3 esz=1
+MUL_zzx_s       01000100 10 1 ..... 111110 ..... .....   @rrx_2 esz=2
+MUL_zzx_d       01000100 11 1 ..... 111110 ..... .....   @rrx_1 esz=3
 
 # SVE floating-point complex add (predicated)
 FCADD           01100100 esz:2 00000 rot:1 100 pg:3 rm:5 rd:5 \
@@ -749,20 +934,19 @@ FCMLA_zzxz      01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \
 ### SVE FP Multiply-Add Indexed Group
 
 # SVE floating-point multiply-add (indexed)
-FMLA_zzxz       01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
-                ra=%reg_movprfx index=%index3_22_19 esz=1
-FMLA_zzxz       01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
-                ra=%reg_movprfx esz=2
-FMLA_zzxz       01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
-                ra=%reg_movprfx esz=3
+FMLA_zzxz       01100100 0. 1 ..... 000000 ..... .....  @rrxr_3 esz=1
+FMLA_zzxz       01100100 10 1 ..... 000000 ..... .....  @rrxr_2 esz=2
+FMLA_zzxz       01100100 11 1 ..... 000000 ..... .....  @rrxr_1 esz=3
+FMLS_zzxz       01100100 0. 1 ..... 000001 ..... .....  @rrxr_3 esz=1
+FMLS_zzxz       01100100 10 1 ..... 000001 ..... .....  @rrxr_2 esz=2
+FMLS_zzxz       01100100 11 1 ..... 000001 ..... .....  @rrxr_1 esz=3
 
 ### SVE FP Multiply Indexed Group
 
 # SVE floating-point multiply (indexed)
-FMUL_zzx        01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
-                index=%index3_22_19 esz=1
-FMUL_zzx        01100100 101 index:2 rm:3 001000 rn:5 rd:5      esz=2
-FMUL_zzx        01100100 111 index:1 rm:4 001000 rn:5 rd:5      esz=3
+FMUL_zzx        01100100 0. 1 ..... 001000 ..... .....   @rrx_3 esz=1
+FMUL_zzx        01100100 10 1 ..... 001000 ..... .....   @rrx_2 esz=2
+FMUL_zzx        01100100 11 1 ..... 001000 ..... .....   @rrx_1 esz=3
 
 ### SVE FP Fast Reduction Group
 
@@ -852,6 +1036,7 @@ FNMLS_zpzzz     01100101 .. 1 ..... 111 ... ..... .....         @rdn_pg_rm_ra
 # SVE floating-point convert precision
 FCVT_sh         01100101 10 0010 00 101 ... ..... .....         @rd_pg_rn_e0
 FCVT_hs         01100101 10 0010 01 101 ... ..... .....         @rd_pg_rn_e0
+BFCVT           01100101 10 0010 10 101 ... ..... .....         @rd_pg_rn_e0
 FCVT_dh         01100101 11 0010 00 101 ... ..... .....         @rd_pg_rn_e0
 FCVT_hd         01100101 11 0010 01 101 ... ..... .....         @rd_pg_rn_e0
 FCVT_ds         01100101 11 0010 10 101 ... ..... .....         @rd_pg_rn_e0
@@ -957,11 +1142,15 @@ LD_zpri         1010010 .. nreg:2 0.... 111 ... ..... .....     @rpri_load_msz
 # SVE load and broadcast quadword (scalar plus scalar)
 LD1RQ_zprr      1010010 .. 00 ..... 000 ... ..... ..... \
                 @rprr_load_msz nreg=0
+LD1RO_zprr      1010010 .. 01 ..... 000 ... ..... ..... \
+                @rprr_load_msz nreg=0
 
 # SVE load and broadcast quadword (scalar plus immediate)
 # LD1RQB, LD1RQH, LD1RQS, LD1RQD
 LD1RQ_zpri      1010010 .. 00 0.... 001 ... ..... ..... \
                 @rpri_load_msz nreg=0
+LD1RO_zpri      1010010 .. 01 0.... 001 ... ..... ..... \
+                @rpri_load_msz nreg=0
 
 # SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
 PRF             1000010 00 -1 ----- 0-- --- ----- 0 ----
@@ -1090,3 +1279,367 @@ ST1_zprz        1110010 .. 00 ..... 100 ... ..... ..... \
                 @rprr_scatter_store xs=0 esz=3 scale=0
 ST1_zprz        1110010 .. 00 ..... 110 ... ..... ..... \
                 @rprr_scatter_store xs=1 esz=3 scale=0
+
+#### SVE2 Support
+
+### SVE2 Integer Multiply - Unpredicated
+
+# SVE2 integer multiply vectors (unpredicated)
+MUL_zzz         00000100 .. 1 ..... 0110 00 ..... .....  @rd_rn_rm
+SMULH_zzz       00000100 .. 1 ..... 0110 10 ..... .....  @rd_rn_rm
+UMULH_zzz       00000100 .. 1 ..... 0110 11 ..... .....  @rd_rn_rm
+PMUL_zzz        00000100 00 1 ..... 0110 01 ..... .....  @rd_rn_rm_e0
+
+# SVE2 signed saturating doubling multiply high (unpredicated)
+SQDMULH_zzz     00000100 .. 1 ..... 0111 00 ..... .....  @rd_rn_rm
+SQRDMULH_zzz    00000100 .. 1 ..... 0111 01 ..... .....  @rd_rn_rm
+
+### SVE2 Integer - Predicated
+
+SADALP_zpzz     01000100 .. 000 100 101 ... ..... .....  @rdm_pg_rn
+UADALP_zpzz     01000100 .. 000 101 101 ... ..... .....  @rdm_pg_rn
+
+### SVE2 integer unary operations (predicated)
+
+URECPE          01000100 .. 000 000 101 ... ..... .....  @rd_pg_rn
+URSQRTE         01000100 .. 000 001 101 ... ..... .....  @rd_pg_rn
+SQABS           01000100 .. 001 000 101 ... ..... .....  @rd_pg_rn
+SQNEG           01000100 .. 001 001 101 ... ..... .....  @rd_pg_rn
+
+### SVE2 saturating/rounding bitwise shift left (predicated)
+
+SRSHL           01000100 .. 000 010 100 ... ..... .....  @rdn_pg_rm
+URSHL           01000100 .. 000 011 100 ... ..... .....  @rdn_pg_rm
+SRSHL           01000100 .. 000 110 100 ... ..... .....  @rdm_pg_rn # SRSHLR
+URSHL           01000100 .. 000 111 100 ... ..... .....  @rdm_pg_rn # URSHLR
+
+SQSHL           01000100 .. 001 000 100 ... ..... .....  @rdn_pg_rm
+UQSHL           01000100 .. 001 001 100 ... ..... .....  @rdn_pg_rm
+SQSHL           01000100 .. 001 100 100 ... ..... .....  @rdm_pg_rn # SQSHLR
+UQSHL           01000100 .. 001 101 100 ... ..... .....  @rdm_pg_rn # UQSHLR
+
+SQRSHL          01000100 .. 001 010 100 ... ..... .....  @rdn_pg_rm
+UQRSHL          01000100 .. 001 011 100 ... ..... .....  @rdn_pg_rm
+SQRSHL          01000100 .. 001 110 100 ... ..... .....  @rdm_pg_rn # SQRSHLR
+UQRSHL          01000100 .. 001 111 100 ... ..... .....  @rdm_pg_rn # UQRSHLR
+
+### SVE2 integer halving add/subtract (predicated)
+
+SHADD           01000100 .. 010 000 100 ... ..... .....  @rdn_pg_rm
+UHADD           01000100 .. 010 001 100 ... ..... .....  @rdn_pg_rm
+SHSUB           01000100 .. 010 010 100 ... ..... .....  @rdn_pg_rm
+UHSUB           01000100 .. 010 011 100 ... ..... .....  @rdn_pg_rm
+SRHADD          01000100 .. 010 100 100 ... ..... .....  @rdn_pg_rm
+URHADD          01000100 .. 010 101 100 ... ..... .....  @rdn_pg_rm
+SHSUB           01000100 .. 010 110 100 ... ..... .....  @rdm_pg_rn # SHSUBR
+UHSUB           01000100 .. 010 111 100 ... ..... .....  @rdm_pg_rn # UHSUBR
+
+### SVE2 integer pairwise arithmetic
+
+ADDP            01000100 .. 010 001 101 ... ..... .....  @rdn_pg_rm
+SMAXP           01000100 .. 010 100 101 ... ..... .....  @rdn_pg_rm
+UMAXP           01000100 .. 010 101 101 ... ..... .....  @rdn_pg_rm
+SMINP           01000100 .. 010 110 101 ... ..... .....  @rdn_pg_rm
+UMINP           01000100 .. 010 111 101 ... ..... .....  @rdn_pg_rm
+
+### SVE2 saturating add/subtract (predicated)
+
+SQADD_zpzz      01000100 .. 011 000 100 ... ..... .....  @rdn_pg_rm
+UQADD_zpzz      01000100 .. 011 001 100 ... ..... .....  @rdn_pg_rm
+SQSUB_zpzz      01000100 .. 011 010 100 ... ..... .....  @rdn_pg_rm
+UQSUB_zpzz      01000100 .. 011 011 100 ... ..... .....  @rdn_pg_rm
+SUQADD          01000100 .. 011 100 100 ... ..... .....  @rdn_pg_rm
+USQADD          01000100 .. 011 101 100 ... ..... .....  @rdn_pg_rm
+SQSUB_zpzz      01000100 .. 011 110 100 ... ..... .....  @rdm_pg_rn # SQSUBR
+UQSUB_zpzz      01000100 .. 011 111 100 ... ..... .....  @rdm_pg_rn # UQSUBR
+
+#### SVE2 Widening Integer Arithmetic
+
+## SVE2 integer add/subtract long
+
+SADDLB          01000101 .. 0 ..... 00 0000 ..... .....  @rd_rn_rm
+SADDLT          01000101 .. 0 ..... 00 0001 ..... .....  @rd_rn_rm
+UADDLB          01000101 .. 0 ..... 00 0010 ..... .....  @rd_rn_rm
+UADDLT          01000101 .. 0 ..... 00 0011 ..... .....  @rd_rn_rm
+
+SSUBLB          01000101 .. 0 ..... 00 0100 ..... .....  @rd_rn_rm
+SSUBLT          01000101 .. 0 ..... 00 0101 ..... .....  @rd_rn_rm
+USUBLB          01000101 .. 0 ..... 00 0110 ..... .....  @rd_rn_rm
+USUBLT          01000101 .. 0 ..... 00 0111 ..... .....  @rd_rn_rm
+
+SABDLB          01000101 .. 0 ..... 00 1100 ..... .....  @rd_rn_rm
+SABDLT          01000101 .. 0 ..... 00 1101 ..... .....  @rd_rn_rm
+UABDLB          01000101 .. 0 ..... 00 1110 ..... .....  @rd_rn_rm
+UABDLT          01000101 .. 0 ..... 00 1111 ..... .....  @rd_rn_rm
+
+## SVE2 integer add/subtract interleaved long
+
+SADDLBT         01000101 .. 0 ..... 1000 00 ..... .....  @rd_rn_rm
+SSUBLBT         01000101 .. 0 ..... 1000 10 ..... .....  @rd_rn_rm
+SSUBLTB         01000101 .. 0 ..... 1000 11 ..... .....  @rd_rn_rm
+
+## SVE2 integer add/subtract wide
+
+SADDWB          01000101 .. 0 ..... 010 000 ..... .....  @rd_rn_rm
+SADDWT          01000101 .. 0 ..... 010 001 ..... .....  @rd_rn_rm
+UADDWB          01000101 .. 0 ..... 010 010 ..... .....  @rd_rn_rm
+UADDWT          01000101 .. 0 ..... 010 011 ..... .....  @rd_rn_rm
+
+SSUBWB          01000101 .. 0 ..... 010 100 ..... .....  @rd_rn_rm
+SSUBWT          01000101 .. 0 ..... 010 101 ..... .....  @rd_rn_rm
+USUBWB          01000101 .. 0 ..... 010 110 ..... .....  @rd_rn_rm
+USUBWT          01000101 .. 0 ..... 010 111 ..... .....  @rd_rn_rm
+
+## SVE2 integer multiply long
+
+SQDMULLB_zzz    01000101 .. 0 ..... 011 000 ..... .....  @rd_rn_rm
+SQDMULLT_zzz    01000101 .. 0 ..... 011 001 ..... .....  @rd_rn_rm
+PMULLB          01000101 .. 0 ..... 011 010 ..... .....  @rd_rn_rm
+PMULLT          01000101 .. 0 ..... 011 011 ..... .....  @rd_rn_rm
+SMULLB_zzz      01000101 .. 0 ..... 011 100 ..... .....  @rd_rn_rm
+SMULLT_zzz      01000101 .. 0 ..... 011 101 ..... .....  @rd_rn_rm
+UMULLB_zzz      01000101 .. 0 ..... 011 110 ..... .....  @rd_rn_rm
+UMULLT_zzz      01000101 .. 0 ..... 011 111 ..... .....  @rd_rn_rm
+
+## SVE2 bitwise shift left long
+
+# Note bit23 == 0 is handled by esz > 0 in do_sve2_shll_tb.
+SSHLLB          01000101 .. 0 ..... 1010 00 ..... .....  @rd_rn_tszimm_shl
+SSHLLT          01000101 .. 0 ..... 1010 01 ..... .....  @rd_rn_tszimm_shl
+USHLLB          01000101 .. 0 ..... 1010 10 ..... .....  @rd_rn_tszimm_shl
+USHLLT          01000101 .. 0 ..... 1010 11 ..... .....  @rd_rn_tszimm_shl
+
+## SVE2 bitwise exclusive-or interleaved
+
+EORBT           01000101 .. 0 ..... 10010 0 ..... .....  @rd_rn_rm
+EORTB           01000101 .. 0 ..... 10010 1 ..... .....  @rd_rn_rm
+
+## SVE integer matrix multiply accumulate
+
+SMMLA           01000101 00 0 ..... 10011 0 ..... .....  @rda_rn_rm_e0
+USMMLA          01000101 10 0 ..... 10011 0 ..... .....  @rda_rn_rm_e0
+UMMLA           01000101 11 0 ..... 10011 0 ..... .....  @rda_rn_rm_e0
+
+## SVE2 bitwise permute
+
+BEXT            01000101 .. 0 ..... 1011 00 ..... .....  @rd_rn_rm
+BDEP            01000101 .. 0 ..... 1011 01 ..... .....  @rd_rn_rm
+BGRP            01000101 .. 0 ..... 1011 10 ..... .....  @rd_rn_rm
+
+#### SVE2 Accumulate
+
+## SVE2 complex integer add
+
+CADD_rot90      01000101 .. 00000 0 11011 0 ..... .....  @rdn_rm
+CADD_rot270     01000101 .. 00000 0 11011 1 ..... .....  @rdn_rm
+SQCADD_rot90    01000101 .. 00000 1 11011 0 ..... .....  @rdn_rm
+SQCADD_rot270   01000101 .. 00000 1 11011 1 ..... .....  @rdn_rm
+
+## SVE2 integer absolute difference and accumulate long
+
+SABALB          01000101 .. 0 ..... 1100 00 ..... .....  @rda_rn_rm
+SABALT          01000101 .. 0 ..... 1100 01 ..... .....  @rda_rn_rm
+UABALB          01000101 .. 0 ..... 1100 10 ..... .....  @rda_rn_rm
+UABALT          01000101 .. 0 ..... 1100 11 ..... .....  @rda_rn_rm
+
+## SVE2 integer add/subtract long with carry
+
+# ADC and SBC decoded via size in helper dispatch.
+ADCLB           01000101 .. 0 ..... 11010 0 ..... .....  @rda_rn_rm
+ADCLT           01000101 .. 0 ..... 11010 1 ..... .....  @rda_rn_rm
+
+## SVE2 bitwise shift right and accumulate
+
+# TODO: Use @rda and %reg_movprfx here.
+SSRA            01000101 .. 0 ..... 1110 00 ..... .....  @rd_rn_tszimm_shr
+USRA            01000101 .. 0 ..... 1110 01 ..... .....  @rd_rn_tszimm_shr
+SRSRA           01000101 .. 0 ..... 1110 10 ..... .....  @rd_rn_tszimm_shr
+URSRA           01000101 .. 0 ..... 1110 11 ..... .....  @rd_rn_tszimm_shr
+
+## SVE2 bitwise shift and insert
+
+SRI             01000101 .. 0 ..... 11110 0 ..... .....  @rd_rn_tszimm_shr
+SLI             01000101 .. 0 ..... 11110 1 ..... .....  @rd_rn_tszimm_shl
+
+## SVE2 integer absolute difference and accumulate
+
+# TODO: Use @rda and %reg_movprfx here.
+SABA            01000101 .. 0 ..... 11111 0 ..... .....  @rd_rn_rm
+UABA            01000101 .. 0 ..... 11111 1 ..... .....  @rd_rn_rm
+
+#### SVE2 Narrowing
+
+## SVE2 saturating extract narrow
+
+# Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0.
+SQXTNB          01000101 .. 1 ..... 010 000 ..... .....  @rd_rn_tszimm_shl
+SQXTNT          01000101 .. 1 ..... 010 001 ..... .....  @rd_rn_tszimm_shl
+UQXTNB          01000101 .. 1 ..... 010 010 ..... .....  @rd_rn_tszimm_shl
+UQXTNT          01000101 .. 1 ..... 010 011 ..... .....  @rd_rn_tszimm_shl
+SQXTUNB         01000101 .. 1 ..... 010 100 ..... .....  @rd_rn_tszimm_shl
+SQXTUNT         01000101 .. 1 ..... 010 101 ..... .....  @rd_rn_tszimm_shl
+
+## SVE2 bitwise shift right narrow
+
+# Bit 23 == 0 is handled by esz > 0 in the translator.
+SQSHRUNB        01000101 .. 1 ..... 00 0000 ..... .....  @rd_rn_tszimm_shr
+SQSHRUNT        01000101 .. 1 ..... 00 0001 ..... .....  @rd_rn_tszimm_shr
+SQRSHRUNB       01000101 .. 1 ..... 00 0010 ..... .....  @rd_rn_tszimm_shr
+SQRSHRUNT       01000101 .. 1 ..... 00 0011 ..... .....  @rd_rn_tszimm_shr
+SHRNB           01000101 .. 1 ..... 00 0100 ..... .....  @rd_rn_tszimm_shr
+SHRNT           01000101 .. 1 ..... 00 0101 ..... .....  @rd_rn_tszimm_shr
+RSHRNB          01000101 .. 1 ..... 00 0110 ..... .....  @rd_rn_tszimm_shr
+RSHRNT          01000101 .. 1 ..... 00 0111 ..... .....  @rd_rn_tszimm_shr
+SQSHRNB         01000101 .. 1 ..... 00 1000 ..... .....  @rd_rn_tszimm_shr
+SQSHRNT         01000101 .. 1 ..... 00 1001 ..... .....  @rd_rn_tszimm_shr
+SQRSHRNB        01000101 .. 1 ..... 00 1010 ..... .....  @rd_rn_tszimm_shr
+SQRSHRNT        01000101 .. 1 ..... 00 1011 ..... .....  @rd_rn_tszimm_shr
+UQSHRNB         01000101 .. 1 ..... 00 1100 ..... .....  @rd_rn_tszimm_shr
+UQSHRNT         01000101 .. 1 ..... 00 1101 ..... .....  @rd_rn_tszimm_shr
+UQRSHRNB        01000101 .. 1 ..... 00 1110 ..... .....  @rd_rn_tszimm_shr
+UQRSHRNT        01000101 .. 1 ..... 00 1111 ..... .....  @rd_rn_tszimm_shr
+
+## SVE2 integer add/subtract narrow high part
+
+ADDHNB          01000101 .. 1 ..... 011 000 ..... .....  @rd_rn_rm
+ADDHNT          01000101 .. 1 ..... 011 001 ..... .....  @rd_rn_rm
+RADDHNB         01000101 .. 1 ..... 011 010 ..... .....  @rd_rn_rm
+RADDHNT         01000101 .. 1 ..... 011 011 ..... .....  @rd_rn_rm
+SUBHNB          01000101 .. 1 ..... 011 100 ..... .....  @rd_rn_rm
+SUBHNT          01000101 .. 1 ..... 011 101 ..... .....  @rd_rn_rm
+RSUBHNB         01000101 .. 1 ..... 011 110 ..... .....  @rd_rn_rm
+RSUBHNT         01000101 .. 1 ..... 011 111 ..... .....  @rd_rn_rm
+
+### SVE2 Character Match
+
+MATCH           01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
+NMATCH          01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
+
+### SVE2 Histogram Computation
+
+HISTCNT         01000101 .. 1 ..... 110 ... ..... .....  @rd_pg_rn_rm
+HISTSEG         01000101 .. 1 ..... 101 000 ..... .....  @rd_rn_rm
+
+## SVE2 floating-point pairwise operations
+
+FADDP           01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
+FMAXNMP         01100100 .. 010 10 0 100 ... ..... ..... @rdn_pg_rm
+FMINNMP         01100100 .. 010 10 1 100 ... ..... ..... @rdn_pg_rm
+FMAXP           01100100 .. 010 11 0 100 ... ..... ..... @rdn_pg_rm
+FMINP           01100100 .. 010 11 1 100 ... ..... ..... @rdn_pg_rm
+
+#### SVE Integer Multiply-Add (unpredicated)
+
+## SVE2 saturating multiply-add long
+
+SQDMLALB_zzzw   01000100 .. 0 ..... 0110 00 ..... .....  @rda_rn_rm
+SQDMLALT_zzzw   01000100 .. 0 ..... 0110 01 ..... .....  @rda_rn_rm
+SQDMLSLB_zzzw   01000100 .. 0 ..... 0110 10 ..... .....  @rda_rn_rm
+SQDMLSLT_zzzw   01000100 .. 0 ..... 0110 11 ..... .....  @rda_rn_rm
+
+## SVE2 saturating multiply-add interleaved long
+
+SQDMLALBT       01000100 .. 0 ..... 00001 0 ..... .....  @rda_rn_rm
+SQDMLSLBT       01000100 .. 0 ..... 00001 1 ..... .....  @rda_rn_rm
+
+## SVE2 saturating multiply-add high
+
+SQRDMLAH_zzzz   01000100 .. 0 ..... 01110 0 ..... .....  @rda_rn_rm
+SQRDMLSH_zzzz   01000100 .. 0 ..... 01110 1 ..... .....  @rda_rn_rm
+
+## SVE2 integer multiply-add long
+
+SMLALB_zzzw     01000100 .. 0 ..... 010 000 ..... .....  @rda_rn_rm
+SMLALT_zzzw     01000100 .. 0 ..... 010 001 ..... .....  @rda_rn_rm
+UMLALB_zzzw     01000100 .. 0 ..... 010 010 ..... .....  @rda_rn_rm
+UMLALT_zzzw     01000100 .. 0 ..... 010 011 ..... .....  @rda_rn_rm
+SMLSLB_zzzw     01000100 .. 0 ..... 010 100 ..... .....  @rda_rn_rm
+SMLSLT_zzzw     01000100 .. 0 ..... 010 101 ..... .....  @rda_rn_rm
+UMLSLB_zzzw     01000100 .. 0 ..... 010 110 ..... .....  @rda_rn_rm
+UMLSLT_zzzw     01000100 .. 0 ..... 010 111 ..... .....  @rda_rn_rm
+
+## SVE2 complex integer multiply-add
+
+CMLA_zzzz       01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5  ra=%reg_movprfx
+SQRDCMLAH_zzzz  01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5  ra=%reg_movprfx
+
+## SVE mixed sign dot product
+
+USDOT_zzzz      01000100 .. 0 ..... 011 110 ..... .....  @rda_rn_rm
+
+### SVE2 floating point matrix multiply accumulate
+{
+  BFMMLA        01100100 01 1 ..... 111 001 ..... .....  @rda_rn_rm_e0
+  FMMLA         01100100 .. 1 ..... 111 001 ..... .....  @rda_rn_rm
+}
+
+### SVE2 Memory Gather Load Group
+
+# SVE2 64-bit gather non-temporal load
+#   (scalar plus unpacked 32-bit unscaled offsets)
+LDNT1_zprz      1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
+                &rprr_gather_load xs=0 esz=3 scale=0 ff=0
+
+# SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets)
+LDNT1_zprz      1000010 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \
+                &rprr_gather_load xs=0 esz=2 scale=0 ff=0
+
+### SVE2 Memory Store Group
+
+# SVE2 64-bit scatter non-temporal store (vector plus scalar)
+STNT1_zprz      1110010 .. 00 ..... 001 ... ..... ..... \
+                @rprr_scatter_store xs=2 esz=3 scale=0
+
+# SVE2 32-bit scatter non-temporal store (vector plus scalar)
+STNT1_zprz      1110010 .. 10 ..... 001 ... ..... ..... \
+                @rprr_scatter_store xs=0 esz=2 scale=0
+
+### SVE2 Crypto Extensions
+
+# SVE2 crypto unary operations
+# AESMC and AESIMC
+AESMC           01000101 00 10000011100 decrypt:1 00000 rd:5
+
+# SVE2 crypto destructive binary operations
+AESE            01000101 00 10001 0 11100 0 ..... .....  @rdn_rm_e0
+AESD            01000101 00 10001 0 11100 1 ..... .....  @rdn_rm_e0
+SM4E            01000101 00 10001 1 11100 0 ..... .....  @rdn_rm_e0
+
+# SVE2 crypto constructive binary operations
+SM4EKEY         01000101 00 1 ..... 11110 0 ..... .....  @rd_rn_rm_e0
+RAX1            01000101 00 1 ..... 11110 1 ..... .....  @rd_rn_rm_e0
+
+### SVE2 floating-point convert precision odd elements
+FCVTXNT_ds      01100100 00 0010 10 101 ... ..... .....  @rd_pg_rn_e0
+FCVTX_ds        01100101 00 0010 10 101 ... ..... .....  @rd_pg_rn_e0
+FCVTNT_sh       01100100 10 0010 00 101 ... ..... .....  @rd_pg_rn_e0
+BFCVTNT         01100100 10 0010 10 101 ... ..... .....  @rd_pg_rn_e0
+FCVTLT_hs       01100100 10 0010 01 101 ... ..... .....  @rd_pg_rn_e0
+FCVTNT_ds       01100100 11 0010 10 101 ... ..... .....  @rd_pg_rn_e0
+FCVTLT_sd       01100100 11 0010 11 101 ... ..... .....  @rd_pg_rn_e0
+
+### SVE2 floating-point convert to integer
+FLOGB           01100101 00 011 esz:2 0101 pg:3 rn:5 rd:5  &rpr_esz
+
+### SVE2 floating-point multiply-add long (vectors)
+FMLALB_zzzw     01100100 10 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
+FMLALT_zzzw     01100100 10 1 ..... 10 0 00 1 ..... .....  @rda_rn_rm_e0
+FMLSLB_zzzw     01100100 10 1 ..... 10 1 00 0 ..... .....  @rda_rn_rm_e0
+FMLSLT_zzzw     01100100 10 1 ..... 10 1 00 1 ..... .....  @rda_rn_rm_e0
+
+BFMLALB_zzzw    01100100 11 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
+BFMLALT_zzzw    01100100 11 1 ..... 10 0 00 1 ..... .....  @rda_rn_rm_e0
+
+### SVE2 floating-point bfloat16 dot-product
+BFDOT_zzzz      01100100 01 1 ..... 10 0 00 0 ..... .....  @rda_rn_rm_e0
+
+### SVE2 floating-point multiply-add long (indexed)
+FMLALB_zzxw     01100100 10 1 ..... 0100.0 ..... .....     @rrxr_3a esz=2
+FMLALT_zzxw     01100100 10 1 ..... 0100.1 ..... .....     @rrxr_3a esz=2
+FMLSLB_zzxw     01100100 10 1 ..... 0110.0 ..... .....     @rrxr_3a esz=2
+FMLSLT_zzxw     01100100 10 1 ..... 0110.1 ..... .....     @rrxr_3a esz=2
+BFMLALB_zzxw    01100100 11 1 ..... 0100.0 ..... .....     @rrxr_3a esz=2
+BFMLALT_zzxw    01100100 11 1 ..... 0100.1 ..... .....     @rrxr_3a esz=2
+
+### SVE2 floating-point bfloat16 dot-product (indexed)
+BFDOT_zzxz      01100100 01 1 ..... 010000 ..... .....     @rrxr_2 esz=2
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index fd6c58f96a8..07be55b7e1a 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -26,24 +26,9 @@
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
 #include "tcg/tcg.h"
+#include "vec_internal.h"
 
 
-/* Note that vector data is stored in host-endian 64-bit chunks,
-   so addressing units smaller than that needs a host-endian fixup.  */
-#ifdef HOST_WORDS_BIGENDIAN
-#define H1(x)   ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
-#else
-#define H1(x)   (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
-#endif
-
 /* Return a value for NZCV as per the ARM PredTest pseudofunction.
  *
  * The return value has bit 31 set if N is set, bit 1 set if Z is clear,
@@ -118,108 +103,13 @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
     return flags;
 }
 
-/* Expand active predicate bits to bytes, for byte elements.
- *  for (i = 0; i < 256; ++i) {
- *      unsigned long m = 0;
- *      for (j = 0; j < 8; j++) {
- *          if ((i >> j) & 1) {
- *              m |= 0xfful << (j << 3);
- *          }
- *      }
- *      printf("0x%016lx,\n", m);
- *  }
+/*
+ * Expand active predicate bits to bytes, for byte elements.
+ * (The data table itself is in vec_helper.c as MVE also needs it.)
  */
 static inline uint64_t expand_pred_b(uint8_t byte)
 {
-    static const uint64_t word[256] = {
-        0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
-        0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
-        0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
-        0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
-        0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
-        0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
-        0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
-        0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
-        0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
-        0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
-        0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
-        0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
-        0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
-        0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
-        0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
-        0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
-        0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
-        0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
-        0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
-        0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
-        0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
-        0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
-        0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
-        0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
-        0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
-        0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
-        0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
-        0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
-        0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
-        0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
-        0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
-        0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
-        0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
-        0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
-        0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
-        0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
-        0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
-        0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
-        0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
-        0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
-        0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
-        0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
-        0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
-        0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
-        0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
-        0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
-        0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
-        0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
-        0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
-        0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
-        0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
-        0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
-        0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
-        0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
-        0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
-        0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
-        0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
-        0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
-        0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
-        0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
-        0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
-        0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
-        0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
-        0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
-        0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
-        0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
-        0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
-        0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
-        0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
-        0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
-        0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
-        0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
-        0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
-        0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
-        0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
-        0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
-        0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
-        0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
-        0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
-        0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
-        0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
-        0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
-        0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
-        0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
-        0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
-        0xffffffffffffffff,
-    };
-    return word[byte];
+    return expand_pred_b_data[byte];
 }
 
 /* Similarly for half-word elements.
@@ -262,26 +152,6 @@ static inline uint64_t expand_pred_s(uint8_t byte)
     return word[byte & 0x11];
 }
 
-/* Swap 16-bit words within a 32-bit word.  */
-static inline uint32_t hswap32(uint32_t h)
-{
-    return rol32(h, 16);
-}
-
-/* Swap 16-bit words within a 64-bit word.  */
-static inline uint64_t hswap64(uint64_t h)
-{
-    uint64_t m = 0x0000ffff0000ffffull;
-    h = rol64(h, 32);
-    return ((h & m) << 16) | ((h >> 16) & m);
-}
-
-/* Swap 32-bit words within a 64-bit word.  */
-static inline uint64_t wswap64(uint64_t h)
-{
-    return rol64(h, 32);
-}
-
 #define LOGICAL_PPPP(NAME, FUNC) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
 {                                                                         \
@@ -517,9 +387,429 @@ DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR)
 DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR)
 DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
 
+static inline uint16_t do_sadalp_h(int16_t n, int16_t m)
+{
+    int8_t n1 = n, n2 = n >> 8;
+    return m + n1 + n2;
+}
+
+static inline uint32_t do_sadalp_s(int32_t n, int32_t m)
+{
+    int16_t n1 = n, n2 = n >> 16;
+    return m + n1 + n2;
+}
+
+static inline uint64_t do_sadalp_d(int64_t n, int64_t m)
+{
+    int32_t n1 = n, n2 = n >> 32;
+    return m + n1 + n2;
+}
+
+DO_ZPZZ(sve2_sadalp_zpzz_h, int16_t, H1_2, do_sadalp_h)
+DO_ZPZZ(sve2_sadalp_zpzz_s, int32_t, H1_4, do_sadalp_s)
+DO_ZPZZ_D(sve2_sadalp_zpzz_d, int64_t, do_sadalp_d)
+
+static inline uint16_t do_uadalp_h(uint16_t n, uint16_t m)
+{
+    uint8_t n1 = n, n2 = n >> 8;
+    return m + n1 + n2;
+}
+
+static inline uint32_t do_uadalp_s(uint32_t n, uint32_t m)
+{
+    uint16_t n1 = n, n2 = n >> 16;
+    return m + n1 + n2;
+}
+
+static inline uint64_t do_uadalp_d(uint64_t n, uint64_t m)
+{
+    uint32_t n1 = n, n2 = n >> 32;
+    return m + n1 + n2;
+}
+
+DO_ZPZZ(sve2_uadalp_zpzz_h, uint16_t, H1_2, do_uadalp_h)
+DO_ZPZZ(sve2_uadalp_zpzz_s, uint32_t, H1_4, do_uadalp_s)
+DO_ZPZZ_D(sve2_uadalp_zpzz_d, uint64_t, do_uadalp_d)
+
+#define do_srshl_b(n, m)  do_sqrshl_bhs(n, m, 8, true, NULL)
+#define do_srshl_h(n, m)  do_sqrshl_bhs(n, m, 16, true, NULL)
+#define do_srshl_s(n, m)  do_sqrshl_bhs(n, m, 32, true, NULL)
+#define do_srshl_d(n, m)  do_sqrshl_d(n, m, true, NULL)
+
+DO_ZPZZ(sve2_srshl_zpzz_b, int8_t, H1, do_srshl_b)
+DO_ZPZZ(sve2_srshl_zpzz_h, int16_t, H1_2, do_srshl_h)
+DO_ZPZZ(sve2_srshl_zpzz_s, int32_t, H1_4, do_srshl_s)
+DO_ZPZZ_D(sve2_srshl_zpzz_d, int64_t, do_srshl_d)
+
+#define do_urshl_b(n, m)  do_uqrshl_bhs(n, (int8_t)m, 8, true, NULL)
+#define do_urshl_h(n, m)  do_uqrshl_bhs(n, (int16_t)m, 16, true, NULL)
+#define do_urshl_s(n, m)  do_uqrshl_bhs(n, m, 32, true, NULL)
+#define do_urshl_d(n, m)  do_uqrshl_d(n, m, true, NULL)
+
+DO_ZPZZ(sve2_urshl_zpzz_b, uint8_t, H1, do_urshl_b)
+DO_ZPZZ(sve2_urshl_zpzz_h, uint16_t, H1_2, do_urshl_h)
+DO_ZPZZ(sve2_urshl_zpzz_s, uint32_t, H1_4, do_urshl_s)
+DO_ZPZZ_D(sve2_urshl_zpzz_d, uint64_t, do_urshl_d)
+
+/*
+ * Unlike the NEON and AdvSIMD versions, there is no QC bit to set.
+ * We pass in a pointer to a dummy saturation field to trigger
+ * the saturating arithmetic but discard the information about
+ * whether it has occurred.
+ */
+#define do_sqshl_b(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 8, false, &discard); })
+#define do_sqshl_h(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 16, false, &discard); })
+#define do_sqshl_s(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 32, false, &discard); })
+#define do_sqshl_d(n, m) \
+   ({ uint32_t discard; do_sqrshl_d(n, m, false, &discard); })
+
+DO_ZPZZ(sve2_sqshl_zpzz_b, int8_t, H1_2, do_sqshl_b)
+DO_ZPZZ(sve2_sqshl_zpzz_h, int16_t, H1_2, do_sqshl_h)
+DO_ZPZZ(sve2_sqshl_zpzz_s, int32_t, H1_4, do_sqshl_s)
+DO_ZPZZ_D(sve2_sqshl_zpzz_d, int64_t, do_sqshl_d)
+
+#define do_uqshl_b(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, (int8_t)m, 8, false, &discard); })
+#define do_uqshl_h(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, (int16_t)m, 16, false, &discard); })
+#define do_uqshl_s(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, m, 32, false, &discard); })
+#define do_uqshl_d(n, m) \
+   ({ uint32_t discard; do_uqrshl_d(n, m, false, &discard); })
+
+DO_ZPZZ(sve2_uqshl_zpzz_b, uint8_t, H1_2, do_uqshl_b)
+DO_ZPZZ(sve2_uqshl_zpzz_h, uint16_t, H1_2, do_uqshl_h)
+DO_ZPZZ(sve2_uqshl_zpzz_s, uint32_t, H1_4, do_uqshl_s)
+DO_ZPZZ_D(sve2_uqshl_zpzz_d, uint64_t, do_uqshl_d)
+
+#define do_sqrshl_b(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 8, true, &discard); })
+#define do_sqrshl_h(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 16, true, &discard); })
+#define do_sqrshl_s(n, m) \
+   ({ uint32_t discard; do_sqrshl_bhs(n, m, 32, true, &discard); })
+#define do_sqrshl_d(n, m) \
+   ({ uint32_t discard; do_sqrshl_d(n, m, true, &discard); })
+
+DO_ZPZZ(sve2_sqrshl_zpzz_b, int8_t, H1_2, do_sqrshl_b)
+DO_ZPZZ(sve2_sqrshl_zpzz_h, int16_t, H1_2, do_sqrshl_h)
+DO_ZPZZ(sve2_sqrshl_zpzz_s, int32_t, H1_4, do_sqrshl_s)
+DO_ZPZZ_D(sve2_sqrshl_zpzz_d, int64_t, do_sqrshl_d)
+
+#undef do_sqrshl_d
+
+#define do_uqrshl_b(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, (int8_t)m, 8, true, &discard); })
+#define do_uqrshl_h(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, (int16_t)m, 16, true, &discard); })
+#define do_uqrshl_s(n, m) \
+   ({ uint32_t discard; do_uqrshl_bhs(n, m, 32, true, &discard); })
+#define do_uqrshl_d(n, m) \
+   ({ uint32_t discard; do_uqrshl_d(n, m, true, &discard); })
+
+DO_ZPZZ(sve2_uqrshl_zpzz_b, uint8_t, H1_2, do_uqrshl_b)
+DO_ZPZZ(sve2_uqrshl_zpzz_h, uint16_t, H1_2, do_uqrshl_h)
+DO_ZPZZ(sve2_uqrshl_zpzz_s, uint32_t, H1_4, do_uqrshl_s)
+DO_ZPZZ_D(sve2_uqrshl_zpzz_d, uint64_t, do_uqrshl_d)
+
+#undef do_uqrshl_d
+
+#define DO_HADD_BHS(n, m)  (((int64_t)n + m) >> 1)
+#define DO_HADD_D(n, m)    ((n >> 1) + (m >> 1) + (n & m & 1))
+
+DO_ZPZZ(sve2_shadd_zpzz_b, int8_t, H1, DO_HADD_BHS)
+DO_ZPZZ(sve2_shadd_zpzz_h, int16_t, H1_2, DO_HADD_BHS)
+DO_ZPZZ(sve2_shadd_zpzz_s, int32_t, H1_4, DO_HADD_BHS)
+DO_ZPZZ_D(sve2_shadd_zpzz_d, int64_t, DO_HADD_D)
+
+DO_ZPZZ(sve2_uhadd_zpzz_b, uint8_t, H1, DO_HADD_BHS)
+DO_ZPZZ(sve2_uhadd_zpzz_h, uint16_t, H1_2, DO_HADD_BHS)
+DO_ZPZZ(sve2_uhadd_zpzz_s, uint32_t, H1_4, DO_HADD_BHS)
+DO_ZPZZ_D(sve2_uhadd_zpzz_d, uint64_t, DO_HADD_D)
+
+#define DO_RHADD_BHS(n, m)  (((int64_t)n + m + 1) >> 1)
+#define DO_RHADD_D(n, m)    ((n >> 1) + (m >> 1) + ((n | m) & 1))
+
+DO_ZPZZ(sve2_srhadd_zpzz_b, int8_t, H1, DO_RHADD_BHS)
+DO_ZPZZ(sve2_srhadd_zpzz_h, int16_t, H1_2, DO_RHADD_BHS)
+DO_ZPZZ(sve2_srhadd_zpzz_s, int32_t, H1_4, DO_RHADD_BHS)
+DO_ZPZZ_D(sve2_srhadd_zpzz_d, int64_t, DO_RHADD_D)
+
+DO_ZPZZ(sve2_urhadd_zpzz_b, uint8_t, H1, DO_RHADD_BHS)
+DO_ZPZZ(sve2_urhadd_zpzz_h, uint16_t, H1_2, DO_RHADD_BHS)
+DO_ZPZZ(sve2_urhadd_zpzz_s, uint32_t, H1_4, DO_RHADD_BHS)
+DO_ZPZZ_D(sve2_urhadd_zpzz_d, uint64_t, DO_RHADD_D)
+
+#define DO_HSUB_BHS(n, m)  (((int64_t)n - m) >> 1)
+#define DO_HSUB_D(n, m)    ((n >> 1) - (m >> 1) - (~n & m & 1))
+
+DO_ZPZZ(sve2_shsub_zpzz_b, int8_t, H1, DO_HSUB_BHS)
+DO_ZPZZ(sve2_shsub_zpzz_h, int16_t, H1_2, DO_HSUB_BHS)
+DO_ZPZZ(sve2_shsub_zpzz_s, int32_t, H1_4, DO_HSUB_BHS)
+DO_ZPZZ_D(sve2_shsub_zpzz_d, int64_t, DO_HSUB_D)
+
+DO_ZPZZ(sve2_uhsub_zpzz_b, uint8_t, H1, DO_HSUB_BHS)
+DO_ZPZZ(sve2_uhsub_zpzz_h, uint16_t, H1_2, DO_HSUB_BHS)
+DO_ZPZZ(sve2_uhsub_zpzz_s, uint32_t, H1_4, DO_HSUB_BHS)
+DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D)
+
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max)
+{
+    return val >= max ? max : val <= min ? min : val;
+}
+
+#define DO_SQADD_B(n, m) do_sat_bhs((int64_t)n + m, INT8_MIN, INT8_MAX)
+#define DO_SQADD_H(n, m) do_sat_bhs((int64_t)n + m, INT16_MIN, INT16_MAX)
+#define DO_SQADD_S(n, m) do_sat_bhs((int64_t)n + m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_sqadd_d(int64_t n, int64_t m)
+{
+    int64_t r = n + m;
+    if (((r ^ n) & ~(n ^ m)) < 0) {
+        /* Signed overflow.  */
+        return r < 0 ? INT64_MAX : INT64_MIN;
+    }
+    return r;
+}
+
+DO_ZPZZ(sve2_sqadd_zpzz_b, int8_t, H1, DO_SQADD_B)
+DO_ZPZZ(sve2_sqadd_zpzz_h, int16_t, H1_2, DO_SQADD_H)
+DO_ZPZZ(sve2_sqadd_zpzz_s, int32_t, H1_4, DO_SQADD_S)
+DO_ZPZZ_D(sve2_sqadd_zpzz_d, int64_t, do_sqadd_d)
+
+#define DO_UQADD_B(n, m) do_sat_bhs((int64_t)n + m, 0, UINT8_MAX)
+#define DO_UQADD_H(n, m) do_sat_bhs((int64_t)n + m, 0, UINT16_MAX)
+#define DO_UQADD_S(n, m) do_sat_bhs((int64_t)n + m, 0, UINT32_MAX)
+
+static inline uint64_t do_uqadd_d(uint64_t n, uint64_t m)
+{
+    uint64_t r = n + m;
+    return r < n ? UINT64_MAX : r;
+}
+
+DO_ZPZZ(sve2_uqadd_zpzz_b, uint8_t, H1, DO_UQADD_B)
+DO_ZPZZ(sve2_uqadd_zpzz_h, uint16_t, H1_2, DO_UQADD_H)
+DO_ZPZZ(sve2_uqadd_zpzz_s, uint32_t, H1_4, DO_UQADD_S)
+DO_ZPZZ_D(sve2_uqadd_zpzz_d, uint64_t, do_uqadd_d)
+
+#define DO_SQSUB_B(n, m) do_sat_bhs((int64_t)n - m, INT8_MIN, INT8_MAX)
+#define DO_SQSUB_H(n, m) do_sat_bhs((int64_t)n - m, INT16_MIN, INT16_MAX)
+#define DO_SQSUB_S(n, m) do_sat_bhs((int64_t)n - m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_sqsub_d(int64_t n, int64_t m)
+{
+    int64_t r = n - m;
+    if (((r ^ n) & (n ^ m)) < 0) {
+        /* Signed overflow.  */
+        return r < 0 ? INT64_MAX : INT64_MIN;
+    }
+    return r;
+}
+
+DO_ZPZZ(sve2_sqsub_zpzz_b, int8_t, H1, DO_SQSUB_B)
+DO_ZPZZ(sve2_sqsub_zpzz_h, int16_t, H1_2, DO_SQSUB_H)
+DO_ZPZZ(sve2_sqsub_zpzz_s, int32_t, H1_4, DO_SQSUB_S)
+DO_ZPZZ_D(sve2_sqsub_zpzz_d, int64_t, do_sqsub_d)
+
+#define DO_UQSUB_B(n, m) do_sat_bhs((int64_t)n - m, 0, UINT8_MAX)
+#define DO_UQSUB_H(n, m) do_sat_bhs((int64_t)n - m, 0, UINT16_MAX)
+#define DO_UQSUB_S(n, m) do_sat_bhs((int64_t)n - m, 0, UINT32_MAX)
+
+static inline uint64_t do_uqsub_d(uint64_t n, uint64_t m)
+{
+    return n > m ? n - m : 0;
+}
+
+DO_ZPZZ(sve2_uqsub_zpzz_b, uint8_t, H1, DO_UQSUB_B)
+DO_ZPZZ(sve2_uqsub_zpzz_h, uint16_t, H1_2, DO_UQSUB_H)
+DO_ZPZZ(sve2_uqsub_zpzz_s, uint32_t, H1_4, DO_UQSUB_S)
+DO_ZPZZ_D(sve2_uqsub_zpzz_d, uint64_t, do_uqsub_d)
+
+#define DO_SUQADD_B(n, m) \
+    do_sat_bhs((int64_t)(int8_t)n + m, INT8_MIN, INT8_MAX)
+#define DO_SUQADD_H(n, m) \
+    do_sat_bhs((int64_t)(int16_t)n + m, INT16_MIN, INT16_MAX)
+#define DO_SUQADD_S(n, m) \
+    do_sat_bhs((int64_t)(int32_t)n + m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_suqadd_d(int64_t n, uint64_t m)
+{
+    uint64_t r = n + m;
+
+    if (n < 0) {
+        /* Note that m - abs(n) cannot underflow. */
+        if (r > INT64_MAX) {
+            /* Result is either very large positive or negative. */
+            if (m > -n) {
+                /* m > abs(n), so r is a very large positive. */
+                return INT64_MAX;
+            }
+            /* Result is negative. */
+        }
+    } else {
+        /* Both inputs are positive: check for overflow.  */
+        if (r < m || r > INT64_MAX) {
+            return INT64_MAX;
+        }
+    }
+    return r;
+}
+
+DO_ZPZZ(sve2_suqadd_zpzz_b, uint8_t, H1, DO_SUQADD_B)
+DO_ZPZZ(sve2_suqadd_zpzz_h, uint16_t, H1_2, DO_SUQADD_H)
+DO_ZPZZ(sve2_suqadd_zpzz_s, uint32_t, H1_4, DO_SUQADD_S)
+DO_ZPZZ_D(sve2_suqadd_zpzz_d, uint64_t, do_suqadd_d)
+
+#define DO_USQADD_B(n, m) \
+    do_sat_bhs((int64_t)n + (int8_t)m, 0, UINT8_MAX)
+#define DO_USQADD_H(n, m) \
+    do_sat_bhs((int64_t)n + (int16_t)m, 0, UINT16_MAX)
+#define DO_USQADD_S(n, m) \
+    do_sat_bhs((int64_t)n + (int32_t)m, 0, UINT32_MAX)
+
+static inline uint64_t do_usqadd_d(uint64_t n, int64_t m)
+{
+    uint64_t r = n + m;
+
+    if (m < 0) {
+        return n < -m ? 0 : r;
+    }
+    return r < n ? UINT64_MAX : r;
+}
+
+DO_ZPZZ(sve2_usqadd_zpzz_b, uint8_t, H1, DO_USQADD_B)
+DO_ZPZZ(sve2_usqadd_zpzz_h, uint16_t, H1_2, DO_USQADD_H)
+DO_ZPZZ(sve2_usqadd_zpzz_s, uint32_t, H1_4, DO_USQADD_S)
+DO_ZPZZ_D(sve2_usqadd_zpzz_d, uint64_t, do_usqadd_d)
+
 #undef DO_ZPZZ
 #undef DO_ZPZZ_D
 
+/*
+ * Three operand expander, operating on element pairs.
+ * If the slot I is even, the elements from from VN {I, I+1}.
+ * If the slot I is odd, the elements from from VM {I-1, I}.
+ * Load all of the input elements in each pair before overwriting output.
+ */
+#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    for (i = 0; i < opr_sz; ) {                                 \
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));         \
+        do {                                                    \
+            TYPE n0 = *(TYPE *)(vn + H(i));                     \
+            TYPE m0 = *(TYPE *)(vm + H(i));                     \
+            TYPE n1 = *(TYPE *)(vn + H(i + sizeof(TYPE)));      \
+            TYPE m1 = *(TYPE *)(vm + H(i + sizeof(TYPE)));      \
+            if (pg & 1) {                                       \
+                *(TYPE *)(vd + H(i)) = OP(n0, n1);              \
+            }                                                   \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
+            if (pg & 1) {                                       \
+                *(TYPE *)(vd + H(i)) = OP(m0, m1);              \
+            }                                                   \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);             \
+        } while (i & 15);                                       \
+    }                                                           \
+}
+
+/* Similarly, specialized for 64-bit operands.  */
+#define DO_ZPZZ_PAIR_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;                  \
+    TYPE *d = vd, *n = vn, *m = vm;                             \
+    uint8_t *pg = vg;                                           \
+    for (i = 0; i < opr_sz; i += 2) {                           \
+        TYPE n0 = n[i], n1 = n[i + 1];                          \
+        TYPE m0 = m[i], m1 = m[i + 1];                          \
+        if (pg[H1(i)] & 1) {                                    \
+            d[i] = OP(n0, n1);                                  \
+        }                                                       \
+        if (pg[H1(i + 1)] & 1) {                                \
+            d[i + 1] = OP(m0, m1);                              \
+        }                                                       \
+    }                                                           \
+}
+
+DO_ZPZZ_PAIR(sve2_addp_zpzz_b, uint8_t, H1, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_PAIR_D(sve2_addp_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_b, uint8_t, H1, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_umaxp_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_b, uint8_t, H1, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_uminp_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_b, int8_t, H1, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_smaxp_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_b, int8_t, H1, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN)
+
+#undef DO_ZPZZ_PAIR
+#undef DO_ZPZZ_PAIR_D
+
+#define DO_ZPZZ_PAIR_FP(NAME, TYPE, H, OP)                              \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,               \
+                  void *status, uint32_t desc)                          \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    for (i = 0; i < opr_sz; ) {                                         \
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));                 \
+        do {                                                            \
+            TYPE n0 = *(TYPE *)(vn + H(i));                             \
+            TYPE m0 = *(TYPE *)(vm + H(i));                             \
+            TYPE n1 = *(TYPE *)(vn + H(i + sizeof(TYPE)));              \
+            TYPE m1 = *(TYPE *)(vm + H(i + sizeof(TYPE)));              \
+            if (pg & 1) {                                               \
+                *(TYPE *)(vd + H(i)) = OP(n0, n1, status);              \
+            }                                                           \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
+            if (pg & 1) {                                               \
+                *(TYPE *)(vd + H(i)) = OP(m0, m1, status);              \
+            }                                                           \
+            i += sizeof(TYPE), pg >>= sizeof(TYPE);                     \
+        } while (i & 15);                                               \
+    }                                                                   \
+}
+
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_h, float16, H1_2, float16_add)
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_s, float32, H1_4, float32_add)
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, H1_8, float64_add)
+
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_h, float16, H1_2, float16_maxnum)
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_s, float32, H1_4, float32_maxnum)
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, H1_8, float64_maxnum)
+
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_h, float16, H1_2, float16_minnum)
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_s, float32, H1_4, float32_minnum)
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, H1_8, float64_minnum)
+
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_h, float16, H1_2, float16_max)
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_s, float32, H1_4, float32_max)
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, H1_8, float64_max)
+
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_h, float16, H1_2, float16_min)
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_s, float32, H1_4, float32_min)
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, H1_8, float64_min)
+
+#undef DO_ZPZZ_PAIR_FP
+
 /* Three-operand expander, controlled by a predicate, in which the
  * third operand is "wide".  That is, for D = N op M, the same 64-bit
  * value of M is used with all of the narrower values of N.
@@ -684,6 +974,27 @@ DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
 DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
 DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
 
+#define DO_SQABS(X) \
+    ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \
+       x_ >= 0 ? x_ : x_ == min_ ? -min_ - 1 : -x_; })
+
+DO_ZPZ(sve2_sqabs_b, int8_t, H1, DO_SQABS)
+DO_ZPZ(sve2_sqabs_h, int16_t, H1_2, DO_SQABS)
+DO_ZPZ(sve2_sqabs_s, int32_t, H1_4, DO_SQABS)
+DO_ZPZ_D(sve2_sqabs_d, int64_t, DO_SQABS)
+
+#define DO_SQNEG(X) \
+    ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \
+       x_ == min_ ? -min_ - 1 : -x_; })
+
+DO_ZPZ(sve2_sqneg_b, uint8_t, H1, DO_SQNEG)
+DO_ZPZ(sve2_sqneg_h, uint16_t, H1_2, DO_SQNEG)
+DO_ZPZ(sve2_sqneg_s, uint32_t, H1_4, DO_SQNEG)
+DO_ZPZ_D(sve2_sqneg_d, uint64_t, DO_SQNEG)
+
+DO_ZPZ(sve2_urecpe_s, uint32_t, H1_4, helper_recpe_u32)
+DO_ZPZ(sve2_ursqrte_s, uint32_t, H1_4, helper_rsqrte_u32)
+
 /* Three-operand expander, unpredicated, in which the third operand is "wide".
  */
 #define DO_ZZW(NAME, TYPE, TYPEW, H, OP)                       \
@@ -726,6 +1037,709 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
 #undef DO_ZPZ
 #undef DO_ZPZ_D
 
+/*
+ * Three-operand expander, unpredicated, in which the two inputs are
+ * selected from the top or bottom half of the wide column.
+ */
+#define DO_ZZZ_TB(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)          \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    int sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN);     \
+    int sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPEN); \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {                       \
+        TYPEW nn = *(TYPEN *)(vn + HN(i + sel1));                       \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + sel2));                       \
+        *(TYPEW *)(vd + HW(i)) = OP(nn, mm);                            \
+    }                                                                   \
+}
+
+DO_ZZZ_TB(sve2_saddl_h, int16_t, int8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_TB(sve2_ssubl_h, int16_t, int8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_TB(sve2_ssubl_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_TB(sve2_sabdl_h, int16_t, int8_t, H1_2, H1, DO_ABD)
+DO_ZZZ_TB(sve2_sabdl_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZ_TB(sve2_uaddl_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_TB(sve2_uaddl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_TB(sve2_usubl_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_TB(sve2_usubl_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_TB(sve2_uabdl_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
+DO_ZZZ_TB(sve2_uabdl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZ_TB(sve2_smull_zzz_h, int16_t, int8_t, H1_2, H1, DO_MUL)
+DO_ZZZ_TB(sve2_smull_zzz_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZZ_TB(sve2_umull_zzz_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
+DO_ZZZ_TB(sve2_umull_zzz_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+/* Note that the multiply cannot overflow, but the doubling can. */
+static inline int16_t do_sqdmull_h(int16_t n, int16_t m)
+{
+    int16_t val = n * m;
+    return DO_SQADD_H(val, val);
+}
+
+static inline int32_t do_sqdmull_s(int32_t n, int32_t m)
+{
+    int32_t val = n * m;
+    return DO_SQADD_S(val, val);
+}
+
+static inline int64_t do_sqdmull_d(int64_t n, int64_t m)
+{
+    int64_t val = n * m;
+    return do_sqadd_d(val, val);
+}
+
+DO_ZZZ_TB(sve2_sqdmull_zzz_h, int16_t, int8_t, H1_2, H1, do_sqdmull_h)
+DO_ZZZ_TB(sve2_sqdmull_zzz_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
+DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
+
+#undef DO_ZZZ_TB
+
+#define DO_ZZZ_WTB(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    int sel2 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {              \
+        TYPEW nn = *(TYPEW *)(vn + HW(i));                     \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + sel2));              \
+        *(TYPEW *)(vd + HW(i)) = OP(nn, mm);                   \
+    }                                                          \
+}
+
+DO_ZZZ_WTB(sve2_saddw_h, int16_t, int8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_WTB(sve2_saddw_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_WTB(sve2_ssubw_h, int16_t, int8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_WTB(sve2_ssubw_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_WTB(sve2_uaddw_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_WTB(sve2_uaddw_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_WTB(sve2_usubw_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_WTB(sve2_usubw_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
+
+#undef DO_ZZZ_WTB
+
+#define DO_ZZZ_NTB(NAME, TYPE, H, OP)                                   \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)          \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    intptr_t sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPE); \
+    intptr_t sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPE); \
+    for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) {                    \
+        TYPE nn = *(TYPE *)(vn + H(i + sel1));                          \
+        TYPE mm = *(TYPE *)(vm + H(i + sel2));                          \
+        *(TYPE *)(vd + H(i + sel1)) = OP(nn, mm);                       \
+    }                                                                   \
+}
+
+DO_ZZZ_NTB(sve2_eoril_b, uint8_t, H1, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_h, uint16_t, H1_2, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_s, uint32_t, H1_4, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_d, uint64_t, H1_8, DO_EOR)
+
+#undef DO_ZZZ_NTB
+
+#define DO_ZZZW_ACC(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    intptr_t sel1 = simd_data(desc) * sizeof(TYPEN);            \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {               \
+        TYPEW nn = *(TYPEN *)(vn + HN(i + sel1));               \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + sel1));               \
+        TYPEW aa = *(TYPEW *)(va + HW(i));                      \
+        *(TYPEW *)(vd + HW(i)) = OP(nn, mm) + aa;               \
+    }                                                           \
+}
+
+DO_ZZZW_ACC(sve2_sabal_h, int16_t, int8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZW_ACC(sve2_uabal_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZW_ACC(sve2_smlal_zzzw_h, int16_t, int8_t, H1_2, H1, DO_MUL)
+DO_ZZZW_ACC(sve2_smlal_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZZW_ACC(sve2_umlal_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
+DO_ZZZW_ACC(sve2_umlal_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+#define DO_NMUL(N, M)  -(N * M)
+
+DO_ZZZW_ACC(sve2_smlsl_zzzw_h, int16_t, int8_t, H1_2, H1, DO_NMUL)
+DO_ZZZW_ACC(sve2_smlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_NMUL)
+DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_NMUL)
+
+DO_ZZZW_ACC(sve2_umlsl_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_NMUL)
+DO_ZZZW_ACC(sve2_umlsl_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_NMUL)
+DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_NMUL)
+
+#undef DO_ZZZW_ACC
+
+#define DO_XTNB(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)         \
+{                                                            \
+    intptr_t i, opr_sz = simd_oprsz(desc);                   \
+    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {             \
+        TYPE nn = *(TYPE *)(vn + i);                         \
+        nn = OP(nn) & MAKE_64BIT_MASK(0, sizeof(TYPE) * 4);  \
+        *(TYPE *)(vd + i) = nn;                              \
+    }                                                        \
+}
+
+#define DO_XTNT(NAME, TYPE, TYPEN, H, OP)                               \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)                    \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc), odd = H(sizeof(TYPEN));      \
+    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {                        \
+        TYPE nn = *(TYPE *)(vn + i);                                    \
+        *(TYPEN *)(vd + i + odd) = OP(nn);                              \
+    }                                                                   \
+}
+
+#define DO_SQXTN_H(n)  do_sat_bhs(n, INT8_MIN, INT8_MAX)
+#define DO_SQXTN_S(n)  do_sat_bhs(n, INT16_MIN, INT16_MAX)
+#define DO_SQXTN_D(n)  do_sat_bhs(n, INT32_MIN, INT32_MAX)
+
+DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H)
+DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S)
+DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D)
+
+DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H)
+DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S)
+DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D)
+
+#define DO_UQXTN_H(n)  do_sat_bhs(n, 0, UINT8_MAX)
+#define DO_UQXTN_S(n)  do_sat_bhs(n, 0, UINT16_MAX)
+#define DO_UQXTN_D(n)  do_sat_bhs(n, 0, UINT32_MAX)
+
+DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H)
+DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S)
+DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D)
+
+DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H)
+DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S)
+DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D)
+
+DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H)
+DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S)
+DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D)
+
+DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H)
+DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S)
+DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D)
+
+#undef DO_XTNB
+#undef DO_XTNT
+
+void HELPER(sve2_adcl_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int sel = H4(extract32(desc, SIMD_DATA_SHIFT, 1));
+    uint32_t inv = -extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+    uint32_t *a = va, *n = vn;
+    uint64_t *d = vd, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        uint32_t e1 = a[2 * i + H4(0)];
+        uint32_t e2 = n[2 * i + sel] ^ inv;
+        uint64_t c = extract64(m[i], 32, 1);
+        /* Compute and store the entire 33-bit result at once. */
+        d[i] = c + e1 + e2;
+    }
+}
+
+void HELPER(sve2_adcl_d)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int sel = extract32(desc, SIMD_DATA_SHIFT, 1);
+    uint64_t inv = -(uint64_t)extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+    uint64_t *d = vd, *a = va, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; i += 2) {
+        Int128 e1 = int128_make64(a[i]);
+        Int128 e2 = int128_make64(n[i + sel] ^ inv);
+        Int128 c = int128_make64(m[i + 1] & 1);
+        Int128 r = int128_add(int128_add(e1, e2), c);
+        d[i + 0] = int128_getlo(r);
+        d[i + 1] = int128_gethi(r);
+    }
+}
+
+#define DO_SQDMLAL(NAME, TYPEW, TYPEN, HW, HN, DMUL_OP, SUM_OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{                                                                       \
+    intptr_t i, opr_sz = simd_oprsz(desc);                              \
+    int sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN);     \
+    int sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPEN); \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {                       \
+        TYPEW nn = *(TYPEN *)(vn + HN(i + sel1));                       \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + sel2));                       \
+        TYPEW aa = *(TYPEW *)(va + HW(i));                              \
+        *(TYPEW *)(vd + HW(i)) = SUM_OP(aa, DMUL_OP(nn, mm));           \
+    }                                                                   \
+}
+
+DO_SQDMLAL(sve2_sqdmlal_zzzw_h, int16_t, int8_t, H1_2, H1,
+           do_sqdmull_h, DO_SQADD_H)
+DO_SQDMLAL(sve2_sqdmlal_zzzw_s, int32_t, int16_t, H1_4, H1_2,
+           do_sqdmull_s, DO_SQADD_S)
+DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, H1_8, H1_4,
+           do_sqdmull_d, do_sqadd_d)
+
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_h, int16_t, int8_t, H1_2, H1,
+           do_sqdmull_h, DO_SQSUB_H)
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2,
+           do_sqdmull_s, DO_SQSUB_S)
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4,
+           do_sqdmull_d, do_sqsub_d)
+
+#undef DO_SQDMLAL
+
+#define DO_CMLA_FUNC(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE);       \
+    int rot = simd_data(desc);                                  \
+    int sel_a = rot & 1, sel_b = sel_a ^ 1;                     \
+    bool sub_r = rot == 1 || rot == 2;                          \
+    bool sub_i = rot >= 2;                                      \
+    TYPE *d = vd, *n = vn, *m = vm, *a = va;                    \
+    for (i = 0; i < opr_sz; i += 2) {                           \
+        TYPE elt1_a = n[H(i + sel_a)];                          \
+        TYPE elt2_a = m[H(i + sel_a)];                          \
+        TYPE elt2_b = m[H(i + sel_b)];                          \
+        d[H(i)] = OP(elt1_a, elt2_a, a[H(i)], sub_r);           \
+        d[H(i + 1)] = OP(elt1_a, elt2_b, a[H(i + 1)], sub_i);   \
+    }                                                           \
+}
+
+#define DO_CMLA(N, M, A, S) (A + (N * M) * (S ? -1 : 1))
+
+DO_CMLA_FUNC(sve2_cmla_zzzz_b, uint8_t, H1, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_h, uint16_t, H2, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_s, uint32_t, H4, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, H8, DO_CMLA)
+
+#define DO_SQRDMLAH_B(N, M, A, S) \
+    do_sqrdmlah_b(N, M, A, S, true)
+#define DO_SQRDMLAH_H(N, M, A, S) \
+    ({ uint32_t discard; do_sqrdmlah_h(N, M, A, S, true, &discard); })
+#define DO_SQRDMLAH_S(N, M, A, S) \
+    ({ uint32_t discard; do_sqrdmlah_s(N, M, A, S, true, &discard); })
+#define DO_SQRDMLAH_D(N, M, A, S) \
+    do_sqrdmlah_d(N, M, A, S, true)
+
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_b, int8_t, H1, DO_SQRDMLAH_B)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_s, int32_t, H4, DO_SQRDMLAH_S)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, H8, DO_SQRDMLAH_D)
+
+#define DO_CMLA_IDX_FUNC(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc)    \
+{                                                                           \
+    intptr_t i, j, oprsz = simd_oprsz(desc);                                \
+    int rot = extract32(desc, SIMD_DATA_SHIFT, 2);                          \
+    int idx = extract32(desc, SIMD_DATA_SHIFT + 2, 2) * 2;                  \
+    int sel_a = rot & 1, sel_b = sel_a ^ 1;                                 \
+    bool sub_r = rot == 1 || rot == 2;                                      \
+    bool sub_i = rot >= 2;                                                  \
+    TYPE *d = vd, *n = vn, *m = vm, *a = va;                                \
+    for (i = 0; i < oprsz / sizeof(TYPE); i += 16 / sizeof(TYPE)) {         \
+        TYPE elt2_a = m[H(i + idx + sel_a)];                                \
+        TYPE elt2_b = m[H(i + idx + sel_b)];                                \
+        for (j = 0; j < 16 / sizeof(TYPE); j += 2) {                        \
+            TYPE elt1_a = n[H(i + j + sel_a)];                              \
+            d[H2(i + j)] = OP(elt1_a, elt2_a, a[H(i + j)], sub_r);          \
+            d[H2(i + j + 1)] = OP(elt1_a, elt2_b, a[H(i + j + 1)], sub_i);  \
+        }                                                                   \
+    }                                                                       \
+}
+
+DO_CMLA_IDX_FUNC(sve2_cmla_idx_h, int16_t, H2, DO_CMLA)
+DO_CMLA_IDX_FUNC(sve2_cmla_idx_s, int32_t, H4, DO_CMLA)
+
+DO_CMLA_IDX_FUNC(sve2_sqrdcmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_CMLA_IDX_FUNC(sve2_sqrdcmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S)
+
+#undef DO_CMLA
+#undef DO_CMLA_FUNC
+#undef DO_CMLA_IDX_FUNC
+#undef DO_SQRDMLAH_B
+#undef DO_SQRDMLAH_H
+#undef DO_SQRDMLAH_S
+#undef DO_SQRDMLAH_D
+
+/* Note N and M are 4 elements bundled into one unit. */
+static int32_t do_cdot_s(uint32_t n, uint32_t m, int32_t a,
+                         int sel_a, int sel_b, int sub_i)
+{
+    for (int i = 0; i <= 1; i++) {
+        int32_t elt1_r = (int8_t)(n >> (16 * i));
+        int32_t elt1_i = (int8_t)(n >> (16 * i + 8));
+        int32_t elt2_a = (int8_t)(m >> (16 * i + 8 * sel_a));
+        int32_t elt2_b = (int8_t)(m >> (16 * i + 8 * sel_b));
+
+        a += elt1_r * elt2_a + elt1_i * elt2_b * sub_i;
+    }
+    return a;
+}
+
+static int64_t do_cdot_d(uint64_t n, uint64_t m, int64_t a,
+                         int sel_a, int sel_b, int sub_i)
+{
+    for (int i = 0; i <= 1; i++) {
+        int64_t elt1_r = (int16_t)(n >> (32 * i + 0));
+        int64_t elt1_i = (int16_t)(n >> (32 * i + 16));
+        int64_t elt2_a = (int16_t)(m >> (32 * i + 16 * sel_a));
+        int64_t elt2_b = (int16_t)(m >> (32 * i + 16 * sel_b));
+
+        a += elt1_r * elt2_a + elt1_i * elt2_b * sub_i;
+    }
+    return a;
+}
+
+void HELPER(sve2_cdot_zzzz_s)(void *vd, void *vn, void *vm,
+                              void *va, uint32_t desc)
+{
+    int opr_sz = simd_oprsz(desc);
+    int rot = simd_data(desc);
+    int sel_a = rot & 1;
+    int sel_b = sel_a ^ 1;
+    int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+    uint32_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (int e = 0; e < opr_sz / 4; e++) {
+        d[e] = do_cdot_s(n[e], m[e], a[e], sel_a, sel_b, sub_i);
+    }
+}
+
+void HELPER(sve2_cdot_zzzz_d)(void *vd, void *vn, void *vm,
+                              void *va, uint32_t desc)
+{
+    int opr_sz = simd_oprsz(desc);
+    int rot = simd_data(desc);
+    int sel_a = rot & 1;
+    int sel_b = sel_a ^ 1;
+    int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+    uint64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (int e = 0; e < opr_sz / 8; e++) {
+        d[e] = do_cdot_d(n[e], m[e], a[e], sel_a, sel_b, sub_i);
+    }
+}
+
+void HELPER(sve2_cdot_idx_s)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    int opr_sz = simd_oprsz(desc);
+    int rot = extract32(desc, SIMD_DATA_SHIFT, 2);
+    int idx = H4(extract32(desc, SIMD_DATA_SHIFT + 2, 2));
+    int sel_a = rot & 1;
+    int sel_b = sel_a ^ 1;
+    int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+    uint32_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (int seg = 0; seg < opr_sz / 4; seg += 4) {
+        uint32_t seg_m = m[seg + idx];
+        for (int e = 0; e < 4; e++) {
+            d[seg + e] = do_cdot_s(n[seg + e], seg_m, a[seg + e],
+                                   sel_a, sel_b, sub_i);
+        }
+    }
+}
+
+void HELPER(sve2_cdot_idx_d)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    int seg, opr_sz = simd_oprsz(desc);
+    int rot = extract32(desc, SIMD_DATA_SHIFT, 2);
+    int idx = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
+    int sel_a = rot & 1;
+    int sel_b = sel_a ^ 1;
+    int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+    uint64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (seg = 0; seg < opr_sz / 8; seg += 2) {
+        uint64_t seg_m = m[seg + idx];
+        for (int e = 0; e < 2; e++) {
+            d[seg + e] = do_cdot_d(n[seg + e], seg_m, a[seg + e],
+                                   sel_a, sel_b, sub_i);
+        }
+    }
+}
+
+#define DO_ZZXZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{                                                                       \
+    intptr_t oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE);     \
+    intptr_t i, j, idx = simd_data(desc);                               \
+    TYPE *d = vd, *a = va, *n = vn, *m = (TYPE *)vm + H(idx);           \
+    for (i = 0; i < oprsz / sizeof(TYPE); i += segment) {               \
+        TYPE mm = m[i];                                                 \
+        for (j = 0; j < segment; j++) {                                 \
+            d[i + j] = OP(n[i + j], mm, a[i + j]);                      \
+        }                                                               \
+    }                                                                   \
+}
+
+#define DO_SQRDMLAH_H(N, M, A) \
+    ({ uint32_t discard; do_sqrdmlah_h(N, M, A, false, true, &discard); })
+#define DO_SQRDMLAH_S(N, M, A) \
+    ({ uint32_t discard; do_sqrdmlah_s(N, M, A, false, true, &discard); })
+#define DO_SQRDMLAH_D(N, M, A) do_sqrdmlah_d(N, M, A, false, true)
+
+DO_ZZXZ(sve2_sqrdmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_ZZXZ(sve2_sqrdmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S)
+DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, H8, DO_SQRDMLAH_D)
+
+#define DO_SQRDMLSH_H(N, M, A) \
+    ({ uint32_t discard; do_sqrdmlah_h(N, M, A, true, true, &discard); })
+#define DO_SQRDMLSH_S(N, M, A) \
+    ({ uint32_t discard; do_sqrdmlah_s(N, M, A, true, true, &discard); })
+#define DO_SQRDMLSH_D(N, M, A) do_sqrdmlah_d(N, M, A, true, true)
+
+DO_ZZXZ(sve2_sqrdmlsh_idx_h, int16_t, H2, DO_SQRDMLSH_H)
+DO_ZZXZ(sve2_sqrdmlsh_idx_s, int32_t, H4, DO_SQRDMLSH_S)
+DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, H8, DO_SQRDMLSH_D)
+
+#undef DO_ZZXZ
+
+#define DO_ZZXW(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc)  \
+{                                                                         \
+    intptr_t i, j, oprsz = simd_oprsz(desc);                              \
+    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN);   \
+    intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3) * sizeof(TYPEN); \
+    for (i = 0; i < oprsz; i += 16) {                                     \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + idx));                          \
+        for (j = 0; j < 16; j += sizeof(TYPEW)) {                         \
+            TYPEW nn = *(TYPEN *)(vn + HN(i + j + sel));                  \
+            TYPEW aa = *(TYPEW *)(va + HW(i + j));                        \
+            *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm, aa);                  \
+        }                                                                 \
+    }                                                                     \
+}
+
+#define DO_MLA(N, M, A)  (A + N * M)
+
+DO_ZZXW(sve2_smlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLA)
+DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLA)
+DO_ZZXW(sve2_umlal_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLA)
+DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLA)
+
+#define DO_MLS(N, M, A)  (A - N * M)
+
+DO_ZZXW(sve2_smlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLS)
+DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLS)
+DO_ZZXW(sve2_umlsl_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLS)
+DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLS)
+
+#define DO_SQDMLAL_S(N, M, A)  DO_SQADD_S(A, do_sqdmull_s(N, M))
+#define DO_SQDMLAL_D(N, M, A)  do_sqadd_d(A, do_sqdmull_d(N, M))
+
+DO_ZZXW(sve2_sqdmlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLAL_S)
+DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLAL_D)
+
+#define DO_SQDMLSL_S(N, M, A)  DO_SQSUB_S(A, do_sqdmull_s(N, M))
+#define DO_SQDMLSL_D(N, M, A)  do_sqsub_d(A, do_sqdmull_d(N, M))
+
+DO_ZZXW(sve2_sqdmlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLSL_S)
+DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLSL_D)
+
+#undef DO_MLA
+#undef DO_MLS
+#undef DO_ZZXW
+
+#define DO_ZZX(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)            \
+{                                                                         \
+    intptr_t i, j, oprsz = simd_oprsz(desc);                              \
+    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN);   \
+    intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3) * sizeof(TYPEN); \
+    for (i = 0; i < oprsz; i += 16) {                                     \
+        TYPEW mm = *(TYPEN *)(vm + HN(i + idx));                          \
+        for (j = 0; j < 16; j += sizeof(TYPEW)) {                         \
+            TYPEW nn = *(TYPEN *)(vn + HN(i + j + sel));                  \
+            *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm);                      \
+        }                                                                 \
+    }                                                                     \
+}
+
+DO_ZZX(sve2_sqdmull_idx_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
+DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
+
+DO_ZZX(sve2_smull_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZX(sve2_umull_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+#undef DO_ZZX
+
+#define DO_BITPERM(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    for (i = 0; i < opr_sz; i += sizeof(TYPE)) {               \
+        TYPE nn = *(TYPE *)(vn + i);                           \
+        TYPE mm = *(TYPE *)(vm + i);                           \
+        *(TYPE *)(vd + i) = OP(nn, mm, sizeof(TYPE) * 8);      \
+    }                                                          \
+}
+
+static uint64_t bitextract(uint64_t data, uint64_t mask, int n)
+{
+    uint64_t res = 0;
+    int db, rb = 0;
+
+    for (db = 0; db < n; ++db) {
+        if ((mask >> db) & 1) {
+            res |= ((data >> db) & 1) << rb;
+            ++rb;
+        }
+    }
+    return res;
+}
+
+DO_BITPERM(sve2_bext_b, uint8_t, bitextract)
+DO_BITPERM(sve2_bext_h, uint16_t, bitextract)
+DO_BITPERM(sve2_bext_s, uint32_t, bitextract)
+DO_BITPERM(sve2_bext_d, uint64_t, bitextract)
+
+static uint64_t bitdeposit(uint64_t data, uint64_t mask, int n)
+{
+    uint64_t res = 0;
+    int rb, db = 0;
+
+    for (rb = 0; rb < n; ++rb) {
+        if ((mask >> rb) & 1) {
+            res |= ((data >> db) & 1) << rb;
+            ++db;
+        }
+    }
+    return res;
+}
+
+DO_BITPERM(sve2_bdep_b, uint8_t, bitdeposit)
+DO_BITPERM(sve2_bdep_h, uint16_t, bitdeposit)
+DO_BITPERM(sve2_bdep_s, uint32_t, bitdeposit)
+DO_BITPERM(sve2_bdep_d, uint64_t, bitdeposit)
+
+static uint64_t bitgroup(uint64_t data, uint64_t mask, int n)
+{
+    uint64_t resm = 0, resu = 0;
+    int db, rbm = 0, rbu = 0;
+
+    for (db = 0; db < n; ++db) {
+        uint64_t val = (data >> db) & 1;
+        if ((mask >> db) & 1) {
+            resm |= val << rbm++;
+        } else {
+            resu |= val << rbu++;
+        }
+    }
+
+    return resm | (resu << rbm);
+}
+
+DO_BITPERM(sve2_bgrp_b, uint8_t, bitgroup)
+DO_BITPERM(sve2_bgrp_h, uint16_t, bitgroup)
+DO_BITPERM(sve2_bgrp_s, uint32_t, bitgroup)
+DO_BITPERM(sve2_bgrp_d, uint64_t, bitgroup)
+
+#undef DO_BITPERM
+
+#define DO_CADD(NAME, TYPE, H, ADD_OP, SUB_OP)                  \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)  \
+{                                                               \
+    intptr_t i, opr_sz = simd_oprsz(desc);                      \
+    int sub_r = simd_data(desc);                                \
+    if (sub_r) {                                                \
+        for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) {        \
+            TYPE acc_r = *(TYPE *)(vn + H(i));                  \
+            TYPE acc_i = *(TYPE *)(vn + H(i + sizeof(TYPE)));   \
+            TYPE el2_r = *(TYPE *)(vm + H(i));                  \
+            TYPE el2_i = *(TYPE *)(vm + H(i + sizeof(TYPE)));   \
+            acc_r = ADD_OP(acc_r, el2_i);                       \
+            acc_i = SUB_OP(acc_i, el2_r);                       \
+            *(TYPE *)(vd + H(i)) = acc_r;                       \
+            *(TYPE *)(vd + H(i + sizeof(TYPE))) = acc_i;        \
+        }                                                       \
+    } else {                                                    \
+        for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) {        \
+            TYPE acc_r = *(TYPE *)(vn + H(i));                  \
+            TYPE acc_i = *(TYPE *)(vn + H(i + sizeof(TYPE)));   \
+            TYPE el2_r = *(TYPE *)(vm + H(i));                  \
+            TYPE el2_i = *(TYPE *)(vm + H(i + sizeof(TYPE)));   \
+            acc_r = SUB_OP(acc_r, el2_i);                       \
+            acc_i = ADD_OP(acc_i, el2_r);                       \
+            *(TYPE *)(vd + H(i)) = acc_r;                       \
+            *(TYPE *)(vd + H(i + sizeof(TYPE))) = acc_i;        \
+        }                                                       \
+    }                                                           \
+}
+
+DO_CADD(sve2_cadd_b, int8_t, H1, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_h, int16_t, H1_2, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_s, int32_t, H1_4, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_d, int64_t, H1_8, DO_ADD, DO_SUB)
+
+DO_CADD(sve2_sqcadd_b, int8_t, H1, DO_SQADD_B, DO_SQSUB_B)
+DO_CADD(sve2_sqcadd_h, int16_t, H1_2, DO_SQADD_H, DO_SQSUB_H)
+DO_CADD(sve2_sqcadd_s, int32_t, H1_4, DO_SQADD_S, DO_SQSUB_S)
+DO_CADD(sve2_sqcadd_d, int64_t, H1_8, do_sqadd_d, do_sqsub_d)
+
+#undef DO_CADD
+
+#define DO_ZZI_SHLL(NAME, TYPEW, TYPEN, HW, HN) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
+{                                                              \
+    intptr_t i, opr_sz = simd_oprsz(desc);                     \
+    intptr_t sel = (simd_data(desc) & 1) * sizeof(TYPEN);      \
+    int shift = simd_data(desc) >> 1;                          \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {              \
+        TYPEW nn = *(TYPEN *)(vn + HN(i + sel));               \
+        *(TYPEW *)(vd + HW(i)) = nn << shift;                  \
+    }                                                          \
+}
+
+DO_ZZI_SHLL(sve2_sshll_h, int16_t, int8_t, H1_2, H1)
+DO_ZZI_SHLL(sve2_sshll_s, int32_t, int16_t, H1_4, H1_2)
+DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, H1_8, H1_4)
+
+DO_ZZI_SHLL(sve2_ushll_h, uint16_t, uint8_t, H1_2, H1)
+DO_ZZI_SHLL(sve2_ushll_s, uint32_t, uint16_t, H1_4, H1_2)
+DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, H1_8, H1_4)
+
+#undef DO_ZZI_SHLL
+
 /* Two-operand reduction expander, controlled by a predicate.
  * The difference between TYPERED and TYPERET has to do with
  * sign-extension.  E.g. for SMAX, TYPERED must be signed,
@@ -1044,39 +2058,273 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)  \
     }                                                           \
 }
 
-#define DO_SHR(N, M)  (N >> M)
-#define DO_SHL(N, M)  (N << M)
+#define DO_SHR(N, M)  (N >> M)
+#define DO_SHL(N, M)  (N << M)
+
+/* Arithmetic shift right for division.  This rounds negative numbers
+   toward zero as per signed division.  Therefore before shifting,
+   when N is negative, add 2**M-1.  */
+#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
+{
+    if (likely(sh < 64)) {
+        return (x >> sh) + ((x >> (sh - 1)) & 1);
+    } else if (sh == 64) {
+        return x >> 63;
+    } else {
+        return 0;
+    }
+}
+
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+    if (likely(sh < 64)) {
+        return (x >> sh) + ((x >> (sh - 1)) & 1);
+    } else {
+        /* Rounding the sign bit always produces 0. */
+        return 0;
+    }
+}
+
+DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
+
+DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
+
+DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
+DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
+
+DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
+DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
+DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
+DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+
+/* SVE2 bitwise shift by immediate */
+DO_ZPZI(sve2_sqshl_zpzi_b, int8_t, H1, do_sqshl_b)
+DO_ZPZI(sve2_sqshl_zpzi_h, int16_t, H1_2, do_sqshl_h)
+DO_ZPZI(sve2_sqshl_zpzi_s, int32_t, H1_4, do_sqshl_s)
+DO_ZPZI_D(sve2_sqshl_zpzi_d, int64_t, do_sqshl_d)
+
+DO_ZPZI(sve2_uqshl_zpzi_b, uint8_t, H1, do_uqshl_b)
+DO_ZPZI(sve2_uqshl_zpzi_h, uint16_t, H1_2, do_uqshl_h)
+DO_ZPZI(sve2_uqshl_zpzi_s, uint32_t, H1_4, do_uqshl_s)
+DO_ZPZI_D(sve2_uqshl_zpzi_d, uint64_t, do_uqshl_d)
+
+DO_ZPZI(sve2_srshr_b, int8_t, H1, do_srshr)
+DO_ZPZI(sve2_srshr_h, int16_t, H1_2, do_srshr)
+DO_ZPZI(sve2_srshr_s, int32_t, H1_4, do_srshr)
+DO_ZPZI_D(sve2_srshr_d, int64_t, do_srshr)
+
+DO_ZPZI(sve2_urshr_b, uint8_t, H1, do_urshr)
+DO_ZPZI(sve2_urshr_h, uint16_t, H1_2, do_urshr)
+DO_ZPZI(sve2_urshr_s, uint32_t, H1_4, do_urshr)
+DO_ZPZI_D(sve2_urshr_d, uint64_t, do_urshr)
+
+#define do_suqrshl_b(n, m) \
+   ({ uint32_t discard; do_suqrshl_bhs(n, (int8_t)m, 8, false, &discard); })
+#define do_suqrshl_h(n, m) \
+   ({ uint32_t discard; do_suqrshl_bhs(n, (int16_t)m, 16, false, &discard); })
+#define do_suqrshl_s(n, m) \
+   ({ uint32_t discard; do_suqrshl_bhs(n, m, 32, false, &discard); })
+#define do_suqrshl_d(n, m) \
+   ({ uint32_t discard; do_suqrshl_d(n, m, false, &discard); })
+
+DO_ZPZI(sve2_sqshlu_b, int8_t, H1, do_suqrshl_b)
+DO_ZPZI(sve2_sqshlu_h, int16_t, H1_2, do_suqrshl_h)
+DO_ZPZI(sve2_sqshlu_s, int32_t, H1_4, do_suqrshl_s)
+DO_ZPZI_D(sve2_sqshlu_d, int64_t, do_suqrshl_d)
+
+#undef DO_ASRD
+#undef DO_ZPZI
+#undef DO_ZPZI_D
+
+#define DO_SHRNB(NAME, TYPEW, TYPEN, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)         \
+{                                                            \
+    intptr_t i, opr_sz = simd_oprsz(desc);                   \
+    int shift = simd_data(desc);                             \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {            \
+        TYPEW nn = *(TYPEW *)(vn + i);                       \
+        *(TYPEW *)(vd + i) = (TYPEN)OP(nn, shift);           \
+    }                                                        \
+}
+
+#define DO_SHRNT(NAME, TYPEW, TYPEN, HW, HN, OP)                  \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc)              \
+{                                                                 \
+    intptr_t i, opr_sz = simd_oprsz(desc);                        \
+    int shift = simd_data(desc);                                  \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {                 \
+        TYPEW nn = *(TYPEW *)(vn + HW(i));                        \
+        *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, shift);   \
+    }                                                             \
+}
+
+DO_SHRNB(sve2_shrnb_h, uint16_t, uint8_t, DO_SHR)
+DO_SHRNB(sve2_shrnb_s, uint32_t, uint16_t, DO_SHR)
+DO_SHRNB(sve2_shrnb_d, uint64_t, uint32_t, DO_SHR)
+
+DO_SHRNT(sve2_shrnt_h, uint16_t, uint8_t, H1_2, H1, DO_SHR)
+DO_SHRNT(sve2_shrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_SHR)
+DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_SHR)
+
+DO_SHRNB(sve2_rshrnb_h, uint16_t, uint8_t, do_urshr)
+DO_SHRNB(sve2_rshrnb_s, uint32_t, uint16_t, do_urshr)
+DO_SHRNB(sve2_rshrnb_d, uint64_t, uint32_t, do_urshr)
+
+DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr)
+DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr)
+DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr)
+
+#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX)
+#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX)
+#define DO_SQSHRUN_D(x, sh) \
+    do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H)
+DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S)
+DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D)
+
+DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H)
+DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S)
+DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D)
+
+#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX)
+#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX)
+#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H)
+DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S)
+DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D)
+
+DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H)
+DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S)
+DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D)
+
+#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX)
+#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX)
+#define DO_SQSHRN_D(x, sh) do_sat_bhs(x >> sh, INT32_MIN, INT32_MAX)
+
+DO_SHRNB(sve2_sqshrnb_h, int16_t, uint8_t, DO_SQSHRN_H)
+DO_SHRNB(sve2_sqshrnb_s, int32_t, uint16_t, DO_SQSHRN_S)
+DO_SHRNB(sve2_sqshrnb_d, int64_t, uint32_t, DO_SQSHRN_D)
+
+DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H)
+DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S)
+DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D)
+
+#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX)
+#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX)
+#define DO_SQRSHRN_D(x, sh) do_sat_bhs(do_srshr(x, sh), INT32_MIN, INT32_MAX)
+
+DO_SHRNB(sve2_sqrshrnb_h, int16_t, uint8_t, DO_SQRSHRN_H)
+DO_SHRNB(sve2_sqrshrnb_s, int32_t, uint16_t, DO_SQRSHRN_S)
+DO_SHRNB(sve2_sqrshrnb_d, int64_t, uint32_t, DO_SQRSHRN_D)
+
+DO_SHRNT(sve2_sqrshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRN_H)
+DO_SHRNT(sve2_sqrshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRN_S)
+DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRN_D)
+
+#define DO_UQSHRN_H(x, sh) MIN(x >> sh, UINT8_MAX)
+#define DO_UQSHRN_S(x, sh) MIN(x >> sh, UINT16_MAX)
+#define DO_UQSHRN_D(x, sh) MIN(x >> sh, UINT32_MAX)
+
+DO_SHRNB(sve2_uqshrnb_h, uint16_t, uint8_t, DO_UQSHRN_H)
+DO_SHRNB(sve2_uqshrnb_s, uint32_t, uint16_t, DO_UQSHRN_S)
+DO_SHRNB(sve2_uqshrnb_d, uint64_t, uint32_t, DO_UQSHRN_D)
+
+DO_SHRNT(sve2_uqshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQSHRN_H)
+DO_SHRNT(sve2_uqshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQSHRN_S)
+DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQSHRN_D)
+
+#define DO_UQRSHRN_H(x, sh) MIN(do_urshr(x, sh), UINT8_MAX)
+#define DO_UQRSHRN_S(x, sh) MIN(do_urshr(x, sh), UINT16_MAX)
+#define DO_UQRSHRN_D(x, sh) MIN(do_urshr(x, sh), UINT32_MAX)
+
+DO_SHRNB(sve2_uqrshrnb_h, uint16_t, uint8_t, DO_UQRSHRN_H)
+DO_SHRNB(sve2_uqrshrnb_s, uint32_t, uint16_t, DO_UQRSHRN_S)
+DO_SHRNB(sve2_uqrshrnb_d, uint64_t, uint32_t, DO_UQRSHRN_D)
+
+DO_SHRNT(sve2_uqrshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQRSHRN_H)
+DO_SHRNT(sve2_uqrshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQRSHRN_S)
+DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQRSHRN_D)
+
+#undef DO_SHRNB
+#undef DO_SHRNT
+
+#define DO_BINOPNB(NAME, TYPEW, TYPEN, SHIFT, OP)                           \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)              \
+{                                                                           \
+    intptr_t i, opr_sz = simd_oprsz(desc);                                  \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {                           \
+        TYPEW nn = *(TYPEW *)(vn + i);                                      \
+        TYPEW mm = *(TYPEW *)(vm + i);                                      \
+        *(TYPEW *)(vd + i) = (TYPEN)OP(nn, mm, SHIFT);                      \
+    }                                                                       \
+}
+
+#define DO_BINOPNT(NAME, TYPEW, TYPEN, SHIFT, HW, HN, OP)                   \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)              \
+{                                                                           \
+    intptr_t i, opr_sz = simd_oprsz(desc);                                  \
+    for (i = 0; i < opr_sz; i += sizeof(TYPEW)) {                           \
+        TYPEW nn = *(TYPEW *)(vn + HW(i));                                  \
+        TYPEW mm = *(TYPEW *)(vm + HW(i));                                  \
+        *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, mm, SHIFT);         \
+    }                                                                       \
+}
+
+#define DO_ADDHN(N, M, SH)  ((N + M) >> SH)
+#define DO_RADDHN(N, M, SH) ((N + M + ((__typeof(N))1 << (SH - 1))) >> SH)
+#define DO_SUBHN(N, M, SH)  ((N - M) >> SH)
+#define DO_RSUBHN(N, M, SH) ((N - M + ((__typeof(N))1 << (SH - 1))) >> SH)
+
+DO_BINOPNB(sve2_addhnb_h, uint16_t, uint8_t, 8, DO_ADDHN)
+DO_BINOPNB(sve2_addhnb_s, uint32_t, uint16_t, 16, DO_ADDHN)
+DO_BINOPNB(sve2_addhnb_d, uint64_t, uint32_t, 32, DO_ADDHN)
+
+DO_BINOPNT(sve2_addhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_ADDHN)
+DO_BINOPNT(sve2_addhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_ADDHN)
+DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_ADDHN)
+
+DO_BINOPNB(sve2_raddhnb_h, uint16_t, uint8_t, 8, DO_RADDHN)
+DO_BINOPNB(sve2_raddhnb_s, uint32_t, uint16_t, 16, DO_RADDHN)
+DO_BINOPNB(sve2_raddhnb_d, uint64_t, uint32_t, 32, DO_RADDHN)
+
+DO_BINOPNT(sve2_raddhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RADDHN)
+DO_BINOPNT(sve2_raddhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RADDHN)
+DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RADDHN)
 
-/* Arithmetic shift right for division.  This rounds negative numbers
-   toward zero as per signed division.  Therefore before shifting,
-   when N is negative, add 2**M-1.  */
-#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+DO_BINOPNB(sve2_subhnb_h, uint16_t, uint8_t, 8, DO_SUBHN)
+DO_BINOPNB(sve2_subhnb_s, uint32_t, uint16_t, 16, DO_SUBHN)
+DO_BINOPNB(sve2_subhnb_d, uint64_t, uint32_t, 32, DO_SUBHN)
 
-DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
-DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
-DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
-DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
+DO_BINOPNT(sve2_subhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_SUBHN)
+DO_BINOPNT(sve2_subhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_SUBHN)
+DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_SUBHN)
 
-DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
-DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
-DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
-DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
+DO_BINOPNB(sve2_rsubhnb_h, uint16_t, uint8_t, 8, DO_RSUBHN)
+DO_BINOPNB(sve2_rsubhnb_s, uint32_t, uint16_t, 16, DO_RSUBHN)
+DO_BINOPNB(sve2_rsubhnb_d, uint64_t, uint32_t, 32, DO_RSUBHN)
 
-DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
-DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
-DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
-DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
+DO_BINOPNT(sve2_rsubhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RSUBHN)
+DO_BINOPNT(sve2_rsubhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RSUBHN)
+DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RSUBHN)
 
-DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
-DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
-DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
-DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+#undef DO_RSUBHN
+#undef DO_SUBHN
+#undef DO_RADDHN
+#undef DO_ADDHN
 
-#undef DO_SHR
-#undef DO_SHL
-#undef DO_ASRD
-#undef DO_ZPZI
-#undef DO_ZPZI_D
+#undef DO_BINOPNB
 
 /* Fully general four-operand expander, controlled by a predicate.
  */
@@ -1356,13 +2604,7 @@ void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
-        int r = *(int8_t *)(a + i) + b;
-        if (r > INT8_MAX) {
-            r = INT8_MAX;
-        } else if (r < INT8_MIN) {
-            r = INT8_MIN;
-        }
-        *(int8_t *)(d + i) = r;
+        *(int8_t *)(d + i) = DO_SQADD_B(b, *(int8_t *)(a + i));
     }
 }
 
@@ -1371,13 +2613,7 @@ void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
-        int r = *(int16_t *)(a + i) + b;
-        if (r > INT16_MAX) {
-            r = INT16_MAX;
-        } else if (r < INT16_MIN) {
-            r = INT16_MIN;
-        }
-        *(int16_t *)(d + i) = r;
+        *(int16_t *)(d + i) = DO_SQADD_H(b, *(int16_t *)(a + i));
     }
 }
 
@@ -1386,13 +2622,7 @@ void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
-        int64_t r = *(int32_t *)(a + i) + b;
-        if (r > INT32_MAX) {
-            r = INT32_MAX;
-        } else if (r < INT32_MIN) {
-            r = INT32_MIN;
-        }
-        *(int32_t *)(d + i) = r;
+        *(int32_t *)(d + i) = DO_SQADD_S(b, *(int32_t *)(a + i));
     }
 }
 
@@ -1401,13 +2631,7 @@ void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
-        int64_t ai = *(int64_t *)(a + i);
-        int64_t r = ai + b;
-        if (((r ^ ai) & ~(ai ^ b)) < 0) {
-            /* Signed overflow.  */
-            r = (r < 0 ? INT64_MAX : INT64_MIN);
-        }
-        *(int64_t *)(d + i) = r;
+        *(int64_t *)(d + i) = do_sqadd_d(b, *(int64_t *)(a + i));
     }
 }
 
@@ -1420,13 +2644,7 @@ void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
-        int r = *(uint8_t *)(a + i) + b;
-        if (r > UINT8_MAX) {
-            r = UINT8_MAX;
-        } else if (r < 0) {
-            r = 0;
-        }
-        *(uint8_t *)(d + i) = r;
+        *(uint8_t *)(d + i) = DO_UQADD_B(b, *(uint8_t *)(a + i));
     }
 }
 
@@ -1435,13 +2653,7 @@ void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
-        int r = *(uint16_t *)(a + i) + b;
-        if (r > UINT16_MAX) {
-            r = UINT16_MAX;
-        } else if (r < 0) {
-            r = 0;
-        }
-        *(uint16_t *)(d + i) = r;
+        *(uint16_t *)(d + i) = DO_UQADD_H(b, *(uint16_t *)(a + i));
     }
 }
 
@@ -1450,13 +2662,7 @@ void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
-        int64_t r = *(uint32_t *)(a + i) + b;
-        if (r > UINT32_MAX) {
-            r = UINT32_MAX;
-        } else if (r < 0) {
-            r = 0;
-        }
-        *(uint32_t *)(d + i) = r;
+        *(uint32_t *)(d + i) = DO_UQADD_S(b, *(uint32_t *)(a + i));
     }
 }
 
@@ -1465,11 +2671,7 @@ void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        uint64_t r = *(uint64_t *)(a + i) + b;
-        if (r < b) {
-            r = UINT64_MAX;
-        }
-        *(uint64_t *)(d + i) = r;
+        *(uint64_t *)(d + i) = do_uqadd_d(b, *(uint64_t *)(a + i));
     }
 }
 
@@ -1478,8 +2680,7 @@ void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
     intptr_t i, oprsz = simd_oprsz(desc);
 
     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        uint64_t ai = *(uint64_t *)(a + i);
-        *(uint64_t *)(d + i) = (ai < b ? 0 : ai - b);
+        *(uint64_t *)(d + i) = do_uqsub_d(*(uint64_t *)(a + i), b);
     }
 }
 
@@ -1724,7 +2925,7 @@ void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
 DO_INSR(sve_insr_b, uint8_t, H1)
 DO_INSR(sve_insr_h, uint16_t, H1_2)
 DO_INSR(sve_insr_s, uint32_t, H1_4)
-DO_INSR(sve_insr_d, uint64_t, )
+DO_INSR(sve_insr_d, uint64_t, H1_8)
 
 #undef DO_INSR
 
@@ -1772,28 +2973,80 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
     }
 }
 
-#define DO_TBL(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{                                                              \
-    intptr_t i, opr_sz = simd_oprsz(desc);                     \
-    uintptr_t elem = opr_sz / sizeof(TYPE);                    \
-    TYPE *d = vd, *n = vn, *m = vm;                            \
-    ARMVectorReg tmp;                                          \
-    if (unlikely(vd == vn)) {                                  \
-        n = memcpy(&tmp, vn, opr_sz);                          \
-    }                                                          \
-    for (i = 0; i < elem; i++) {                               \
-        TYPE j = m[H(i)];                                      \
-        d[H(i)] = j < elem ? n[H(j)] : 0;                      \
-    }                                                          \
+typedef void tb_impl_fn(void *, void *, void *, void *, uintptr_t, bool);
+
+static inline void do_tbl1(void *vd, void *vn, void *vm, uint32_t desc,
+                           bool is_tbx, tb_impl_fn *fn)
+{
+    ARMVectorReg scratch;
+    uintptr_t oprsz = simd_oprsz(desc);
+
+    if (unlikely(vd == vn)) {
+        vn = memcpy(&scratch, vn, oprsz);
+    }
+
+    fn(vd, vn, NULL, vm, oprsz, is_tbx);
+}
+
+static inline void do_tbl2(void *vd, void *vn0, void *vn1, void *vm,
+                           uint32_t desc, bool is_tbx, tb_impl_fn *fn)
+{
+    ARMVectorReg scratch;
+    uintptr_t oprsz = simd_oprsz(desc);
+
+    if (unlikely(vd == vn0)) {
+        vn0 = memcpy(&scratch, vn0, oprsz);
+        if (vd == vn1) {
+            vn1 = vn0;
+        }
+    } else if (unlikely(vd == vn1)) {
+        vn1 = memcpy(&scratch, vn1, oprsz);
+    }
+
+    fn(vd, vn0, vn1, vm, oprsz, is_tbx);
+}
+
+#define DO_TB(SUFF, TYPE, H)                                            \
+static inline void do_tb_##SUFF(void *vd, void *vt0, void *vt1,         \
+                                void *vm, uintptr_t oprsz, bool is_tbx) \
+{                                                                       \
+    TYPE *d = vd, *tbl0 = vt0, *tbl1 = vt1, *indexes = vm;              \
+    uintptr_t i, nelem = oprsz / sizeof(TYPE);                          \
+    for (i = 0; i < nelem; ++i) {                                       \
+        TYPE index = indexes[H1(i)], val = 0;                           \
+        if (index < nelem) {                                            \
+            val = tbl0[H(index)];                                       \
+        } else {                                                        \
+            index -= nelem;                                             \
+            if (tbl1 && index < nelem) {                                \
+                val = tbl1[H(index)];                                   \
+            } else if (is_tbx) {                                        \
+                continue;                                               \
+            }                                                           \
+        }                                                               \
+        d[H(i)] = val;                                                  \
+    }                                                                   \
+}                                                                       \
+void HELPER(sve_tbl_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                                       \
+    do_tbl1(vd, vn, vm, desc, false, do_tb_##SUFF);                     \
+}                                                                       \
+void HELPER(sve2_tbl_##SUFF)(void *vd, void *vn0, void *vn1,            \
+                             void *vm, uint32_t desc)                   \
+{                                                                       \
+    do_tbl2(vd, vn0, vn1, vm, desc, false, do_tb_##SUFF);               \
+}                                                                       \
+void HELPER(sve2_tbx_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \
+{                                                                       \
+    do_tbl1(vd, vn, vm, desc, true, do_tb_##SUFF);                      \
 }
 
-DO_TBL(sve_tbl_b, uint8_t, H1)
-DO_TBL(sve_tbl_h, uint16_t, H2)
-DO_TBL(sve_tbl_s, uint32_t, H4)
-DO_TBL(sve_tbl_d, uint64_t, )
+DO_TB(b, uint8_t, H1)
+DO_TB(h, uint16_t, H2)
+DO_TB(s, uint32_t, H4)
+DO_TB(d, uint64_t, H8)
 
-#undef TBL
+#undef DO_TB
 
 #define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
 void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
@@ -1812,11 +3065,11 @@ void HELPER(NAME)(void *vd, void *vn, uint32_t desc)           \
 
 DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
 DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
-DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, H8, H4)
 
 DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
 DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
-DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, H8, H4)
 
 #undef DO_UNPK
 
@@ -2143,36 +3396,45 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)       \
         *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + H(i));         \
         *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)(vm + H(i)); \
     }                                                                \
+    if (sizeof(TYPE) == 16 && unlikely(oprsz & 16)) {                \
+        memset(vd + oprsz - 16, 0, 16);                              \
+    }                                                                \
 }
 
 DO_ZIP(sve_zip_b, uint8_t, H1)
 DO_ZIP(sve_zip_h, uint16_t, H1_2)
 DO_ZIP(sve_zip_s, uint32_t, H1_4)
-DO_ZIP(sve_zip_d, uint64_t, )
+DO_ZIP(sve_zip_d, uint64_t, H1_8)
+DO_ZIP(sve2_zip_q, Int128, )
 
 #define DO_UZP(NAME, TYPE, H) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
 {                                                                      \
     intptr_t oprsz = simd_oprsz(desc);                                 \
-    intptr_t oprsz_2 = oprsz / 2;                                      \
     intptr_t odd_ofs = simd_data(desc);                                \
-    intptr_t i;                                                        \
+    intptr_t i, p;                                                     \
     ARMVectorReg tmp_m;                                                \
     if (unlikely((vm - vd) < (uintptr_t)oprsz)) {                      \
         vm = memcpy(&tmp_m, vm, oprsz);                                \
     }                                                                  \
-    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
-        *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(2 * i + odd_ofs));     \
-    }                                                                  \
-    for (i = 0; i < oprsz_2; i += sizeof(TYPE)) {                      \
-        *(TYPE *)(vd + H(oprsz_2 + i)) = *(TYPE *)(vm + H(2 * i + odd_ofs)); \
-    }                                                                  \
+    i = 0, p = odd_ofs;                                                \
+    do {                                                               \
+        *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(p));                   \
+        i += sizeof(TYPE), p += 2 * sizeof(TYPE);                      \
+    } while (p < oprsz);                                               \
+    p -= oprsz;                                                        \
+    do {                                                               \
+        *(TYPE *)(vd + H(i)) = *(TYPE *)(vm + H(p));                   \
+        i += sizeof(TYPE), p += 2 * sizeof(TYPE);                      \
+    } while (p < oprsz);                                               \
+    tcg_debug_assert(i == oprsz);                                      \
 }
 
 DO_UZP(sve_uzp_b, uint8_t, H1)
 DO_UZP(sve_uzp_h, uint16_t, H1_2)
 DO_UZP(sve_uzp_s, uint32_t, H1_4)
-DO_UZP(sve_uzp_d, uint64_t, )
+DO_UZP(sve_uzp_d, uint64_t, H1_8)
+DO_UZP(sve2_uzp_q, Int128, )
 
 #define DO_TRN(NAME, TYPE, H) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
@@ -2186,12 +3448,16 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc)         \
         *(TYPE *)(vd + H(i + 0)) = ae;                                 \
         *(TYPE *)(vd + H(i + sizeof(TYPE))) = be;                      \
     }                                                                  \
+    if (sizeof(TYPE) == 16 && unlikely(oprsz & 16)) {                  \
+        memset(vd + oprsz - 16, 0, 16);                                \
+    }                                                                  \
 }
 
 DO_TRN(sve_trn_b, uint8_t, H1)
 DO_TRN(sve_trn_h, uint16_t, H1_2)
 DO_TRN(sve_trn_s, uint32_t, H1_4)
-DO_TRN(sve_trn_d, uint64_t, )
+DO_TRN(sve_trn_d, uint64_t, H1_8)
+DO_TRN(sve2_trn_q, Int128, )
 
 #undef DO_ZIP
 #undef DO_UZP
@@ -2385,7 +3651,7 @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
 #define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
     DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
 #define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
-    DO_CMP_PPZZ(NAME, TYPE, OP,     , 0x0101010101010101ull)
+    DO_CMP_PPZZ(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
 
 DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t,  ==)
 DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
@@ -2530,7 +3796,7 @@ uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc)   \
 #define DO_CMP_PPZI_S(NAME, TYPE, OP) \
     DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
 #define DO_CMP_PPZI_D(NAME, TYPE, OP) \
-    DO_CMP_PPZI(NAME, TYPE, OP,     , 0x0101010101010101ull)
+    DO_CMP_PPZI(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
 
 DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t,  ==)
 DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
@@ -2848,7 +4114,7 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
     return sum;
 }
 
-uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
+uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc)
 {
     intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
     intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
@@ -2874,6 +4140,42 @@ uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
     return predtest_ones(d, oprsz, esz_mask);
 }
 
+uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+    intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+    intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+    uint64_t esz_mask = pred_esz_masks[esz];
+    ARMPredicateReg *d = vd;
+    intptr_t i, invcount, oprbits;
+    uint64_t bits;
+
+    if (count == 0) {
+        return do_zero(d, oprsz);
+    }
+
+    oprbits = oprsz * 8;
+    tcg_debug_assert(count <= oprbits);
+
+    bits = esz_mask;
+    if (oprbits & 63) {
+        bits &= MAKE_64BIT_MASK(0, oprbits & 63);
+    }
+
+    invcount = oprbits - count;
+    for (i = (oprsz - 1) / 8; i > invcount / 64; --i) {
+        d->p[i] = bits;
+        bits = esz_mask;
+    }
+
+    d->p[i] = bits & MAKE_64BIT_MASK(invcount & 63, 64);
+
+    while (--i >= 0) {
+        d->p[i] = 0;
+    }
+
+    return predtest_ones(d, oprsz, esz_mask);
+}
+
 /* Recursive reduction on a function;
  * C.f. the ARM ARM function ReducePredicated.
  *
@@ -2914,24 +4216,24 @@ uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc)    \
 
 DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
 DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
-DO_REDUCE(sve_faddv_d, float64,     , add, float64_zero)
+DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
 
 /* Identity is floatN_default_nan, without the function call.  */
 DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
 DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
-DO_REDUCE(sve_fminnmv_d, float64,     , minnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
 
 DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
 DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
-DO_REDUCE(sve_fmaxnmv_d, float64,     , maxnum, 0x7FF8000000000000ULL)
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
 
 DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
 DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
-DO_REDUCE(sve_fminv_d, float64,     , min, float64_infinity)
+DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
 
 DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
 DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
-DO_REDUCE(sve_fmaxv_d, float64,     , max, float64_chs(float64_infinity))
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
 
 #undef DO_REDUCE
 
@@ -3015,35 +4317,35 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,       \
 
 DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
 DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
-DO_ZPZZ_FP(sve_fadd_d, uint64_t,     , float64_add)
+DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add)
 
 DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
 DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
-DO_ZPZZ_FP(sve_fsub_d, uint64_t,     , float64_sub)
+DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub)
 
 DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
 DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
-DO_ZPZZ_FP(sve_fmul_d, uint64_t,     , float64_mul)
+DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul)
 
 DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
 DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
-DO_ZPZZ_FP(sve_fdiv_d, uint64_t,     , float64_div)
+DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div)
 
 DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
 DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
-DO_ZPZZ_FP(sve_fmin_d, uint64_t,     , float64_min)
+DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min)
 
 DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
 DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
-DO_ZPZZ_FP(sve_fmax_d, uint64_t,     , float64_max)
+DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
 
 DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
 DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
-DO_ZPZZ_FP(sve_fminnum_d, uint64_t,     , float64_minnum)
+DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
 
 DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
 DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
-DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t,     , float64_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum)
 
 static inline float16 abd_h(float16 a, float16 b, float_status *s)
 {
@@ -3062,7 +4364,7 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
 
 DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
 DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
-DO_ZPZZ_FP(sve_fabd_d, uint64_t,     , abd_d)
+DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
 
 static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
 {
@@ -3072,11 +4374,11 @@ static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
 
 DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
 DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
-DO_ZPZZ_FP(sve_fscalbn_d, int64_t,     , scalbn_d)
+DO_ZPZZ_FP(sve_fscalbn_d, int64_t, H1_8, scalbn_d)
 
 DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
 DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
-DO_ZPZZ_FP(sve_fmulx_d, uint64_t,     , helper_vfp_mulxd)
+DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd)
 
 #undef DO_ZPZZ_FP
 
@@ -3104,15 +4406,15 @@ void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar,  \
 
 DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
 DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
-DO_ZPZS_FP(sve_fadds_d, float64,     , float64_add)
+DO_ZPZS_FP(sve_fadds_d, float64, H1_8, float64_add)
 
 DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
 DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
-DO_ZPZS_FP(sve_fsubs_d, float64,     , float64_sub)
+DO_ZPZS_FP(sve_fsubs_d, float64, H1_8, float64_sub)
 
 DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
 DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
-DO_ZPZS_FP(sve_fmuls_d, float64,     , float64_mul)
+DO_ZPZS_FP(sve_fmuls_d, float64, H1_8, float64_mul)
 
 static inline float16 subr_h(float16 a, float16 b, float_status *s)
 {
@@ -3131,23 +4433,23 @@ static inline float64 subr_d(float64 a, float64 b, float_status *s)
 
 DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
 DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
-DO_ZPZS_FP(sve_fsubrs_d, float64,     , subr_d)
+DO_ZPZS_FP(sve_fsubrs_d, float64, H1_8, subr_d)
 
 DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
 DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
-DO_ZPZS_FP(sve_fmaxnms_d, float64,     , float64_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_d, float64, H1_8, float64_maxnum)
 
 DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
 DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
-DO_ZPZS_FP(sve_fminnms_d, float64,     , float64_minnum)
+DO_ZPZS_FP(sve_fminnms_d, float64, H1_8, float64_minnum)
 
 DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
 DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
-DO_ZPZS_FP(sve_fmaxs_d, float64,     , float64_max)
+DO_ZPZS_FP(sve_fmaxs_d, float64, H1_8, float64_max)
 
 DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
 DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
-DO_ZPZS_FP(sve_fmins_d, float64,     , float64_min)
+DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
 
 /* Fully general two-operand expander, controlled by a predicate,
  * With the extra float_status parameter.
@@ -3291,58 +4593,147 @@ static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
 
 DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
 DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
-DO_ZPZ_FP(sve_fcvt_dh, uint64_t,     , sve_f64_to_f16)
-DO_ZPZ_FP(sve_fcvt_hd, uint64_t,     , sve_f16_to_f64)
-DO_ZPZ_FP(sve_fcvt_ds, uint64_t,     , float64_to_float32)
-DO_ZPZ_FP(sve_fcvt_sd, uint64_t,     , float32_to_float64)
+DO_ZPZ_FP(sve_bfcvt,   uint32_t, H1_4, float32_to_bfloat16)
+DO_ZPZ_FP(sve_fcvt_dh, uint64_t, H1_8, sve_f64_to_f16)
+DO_ZPZ_FP(sve_fcvt_hd, uint64_t, H1_8, sve_f16_to_f64)
+DO_ZPZ_FP(sve_fcvt_ds, uint64_t, H1_8, float64_to_float32)
+DO_ZPZ_FP(sve_fcvt_sd, uint64_t, H1_8, float32_to_float64)
 
 DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
 DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
 DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
-DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t,     , vfp_float16_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t,     , vfp_float32_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t,     , helper_vfp_tosizd)
-DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t,     , vfp_float64_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, H1_8, vfp_float16_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, H1_8, vfp_float32_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, H1_8, helper_vfp_tosizd)
+DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, H1_8, vfp_float64_to_int64_rtz)
 
 DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
 DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
 DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
-DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t,     , vfp_float16_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t,     , vfp_float32_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t,     , helper_vfp_touizd)
-DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t,     , vfp_float64_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, H1_8, vfp_float16_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, H1_8, vfp_float32_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, H1_8, helper_vfp_touizd)
+DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, H1_8, vfp_float64_to_uint64_rtz)
 
 DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
 DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
-DO_ZPZ_FP(sve_frint_d, uint64_t,     , helper_rintd)
+DO_ZPZ_FP(sve_frint_d, uint64_t, H1_8, helper_rintd)
 
 DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
 DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
-DO_ZPZ_FP(sve_frintx_d, uint64_t,     , float64_round_to_int)
+DO_ZPZ_FP(sve_frintx_d, uint64_t, H1_8, float64_round_to_int)
 
 DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
 DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
-DO_ZPZ_FP(sve_frecpx_d, uint64_t,     , helper_frecpx_f64)
+DO_ZPZ_FP(sve_frecpx_d, uint64_t, H1_8, helper_frecpx_f64)
 
 DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
 DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
-DO_ZPZ_FP(sve_fsqrt_d, uint64_t,     , float64_sqrt)
+DO_ZPZ_FP(sve_fsqrt_d, uint64_t, H1_8, float64_sqrt)
 
 DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
 DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
 DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
-DO_ZPZ_FP(sve_scvt_sd, uint64_t,     , int32_to_float64)
-DO_ZPZ_FP(sve_scvt_dh, uint64_t,     , int64_to_float16)
-DO_ZPZ_FP(sve_scvt_ds, uint64_t,     , int64_to_float32)
-DO_ZPZ_FP(sve_scvt_dd, uint64_t,     , int64_to_float64)
+DO_ZPZ_FP(sve_scvt_sd, uint64_t, H1_8, int32_to_float64)
+DO_ZPZ_FP(sve_scvt_dh, uint64_t, H1_8, int64_to_float16)
+DO_ZPZ_FP(sve_scvt_ds, uint64_t, H1_8, int64_to_float32)
+DO_ZPZ_FP(sve_scvt_dd, uint64_t, H1_8, int64_to_float64)
 
 DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
 DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
 DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
-DO_ZPZ_FP(sve_ucvt_sd, uint64_t,     , uint32_to_float64)
-DO_ZPZ_FP(sve_ucvt_dh, uint64_t,     , uint64_to_float16)
-DO_ZPZ_FP(sve_ucvt_ds, uint64_t,     , uint64_to_float32)
-DO_ZPZ_FP(sve_ucvt_dd, uint64_t,     , uint64_to_float64)
+DO_ZPZ_FP(sve_ucvt_sd, uint64_t, H1_8, uint32_to_float64)
+DO_ZPZ_FP(sve_ucvt_dh, uint64_t, H1_8, uint64_to_float16)
+DO_ZPZ_FP(sve_ucvt_ds, uint64_t, H1_8, uint64_to_float32)
+DO_ZPZ_FP(sve_ucvt_dd, uint64_t, H1_8, uint64_to_float64)
+
+static int16_t do_float16_logb_as_int(float16 a, float_status *s)
+{
+    /* Extract frac to the top of the uint32_t. */
+    uint32_t frac = (uint32_t)a << (16 + 6);
+    int16_t exp = extract32(a, 10, 5);
+
+    if (unlikely(exp == 0)) {
+        if (frac != 0) {
+            if (!get_flush_inputs_to_zero(s)) {
+                /* denormal: bias - fractional_zeros */
+                return -15 - clz32(frac);
+            }
+            /* flush to zero */
+            float_raise(float_flag_input_denormal, s);
+        }
+    } else if (unlikely(exp == 0x1f)) {
+        if (frac == 0) {
+            return INT16_MAX; /* infinity */
+        }
+    } else {
+        /* normal: exp - bias */
+        return exp - 15;
+    }
+    /* nan or zero */
+    float_raise(float_flag_invalid, s);
+    return INT16_MIN;
+}
+
+static int32_t do_float32_logb_as_int(float32 a, float_status *s)
+{
+    /* Extract frac to the top of the uint32_t. */
+    uint32_t frac = a << 9;
+    int32_t exp = extract32(a, 23, 8);
+
+    if (unlikely(exp == 0)) {
+        if (frac != 0) {
+            if (!get_flush_inputs_to_zero(s)) {
+                /* denormal: bias - fractional_zeros */
+                return -127 - clz32(frac);
+            }
+            /* flush to zero */
+            float_raise(float_flag_input_denormal, s);
+        }
+    } else if (unlikely(exp == 0xff)) {
+        if (frac == 0) {
+            return INT32_MAX; /* infinity */
+        }
+    } else {
+        /* normal: exp - bias */
+        return exp - 127;
+    }
+    /* nan or zero */
+    float_raise(float_flag_invalid, s);
+    return INT32_MIN;
+}
+
+static int64_t do_float64_logb_as_int(float64 a, float_status *s)
+{
+    /* Extract frac to the top of the uint64_t. */
+    uint64_t frac = a << 12;
+    int64_t exp = extract64(a, 52, 11);
+
+    if (unlikely(exp == 0)) {
+        if (frac != 0) {
+            if (!get_flush_inputs_to_zero(s)) {
+                /* denormal: bias - fractional_zeros */
+                return -1023 - clz64(frac);
+            }
+            /* flush to zero */
+            float_raise(float_flag_input_denormal, s);
+        }
+    } else if (unlikely(exp == 0x7ff)) {
+        if (frac == 0) {
+            return INT64_MAX; /* infinity */
+        }
+    } else {
+        /* normal: exp - bias */
+        return exp - 1023;
+    }
+    /* nan or zero */
+    float_raise(float_flag_invalid, s);
+    return INT64_MIN;
+}
+
+DO_ZPZ_FP(flogb_h, float16, H1_2, do_float16_logb_as_int)
+DO_ZPZ_FP(flogb_s, float32, H1_4, do_float32_logb_as_int)
+DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
 
 #undef DO_ZPZ_FP
 
@@ -3520,7 +4911,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg,               \
 #define DO_FPCMP_PPZZ_S(NAME, OP) \
     DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
 #define DO_FPCMP_PPZZ_D(NAME, OP) \
-    DO_FPCMP_PPZZ(NAME##_d, float64,     , OP)
+    DO_FPCMP_PPZZ(NAME##_d, float64, H1_8, OP)
 
 #define DO_FPCMP_PPZZ_ALL(NAME, OP) \
     DO_FPCMP_PPZZ_H(NAME, OP)   \
@@ -3581,7 +4972,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vg,            \
 #define DO_FPCMP_PPZ0_S(NAME, OP) \
     DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
 #define DO_FPCMP_PPZ0_D(NAME, OP) \
-    DO_FPCMP_PPZ0(NAME##_d, float64,     , OP)
+    DO_FPCMP_PPZ0(NAME##_d, float64, H1_8, OP)
 
 #define DO_FPCMP_PPZ0_ALL(NAME, OP) \
     DO_FPCMP_PPZ0_H(NAME, OP)   \
@@ -3961,8 +5352,8 @@ DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
 DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t,  int8_t)
 DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
 DO_LD_PRIM_1(ld1bss, H1_4, uint32_t,  int8_t)
-DO_LD_PRIM_1(ld1bdu,     , uint64_t, uint8_t)
-DO_LD_PRIM_1(ld1bds,     , uint64_t,  int8_t)
+DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds, H1_8, uint64_t,  int8_t)
 
 #define DO_ST_PRIM_1(NAME, H, TE, TM)                   \
     DO_ST_HOST(st1##NAME, H, TE, TM, stb_p)             \
@@ -3971,7 +5362,7 @@ DO_LD_PRIM_1(ld1bds,     , uint64_t,  int8_t)
 DO_ST_PRIM_1(bb,   H1,  uint8_t, uint8_t)
 DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
 DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
-DO_ST_PRIM_1(bd,     , uint64_t, uint8_t)
+DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t)
 
 #define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
     DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p)    \
@@ -3988,22 +5379,22 @@ DO_ST_PRIM_1(bd,     , uint64_t, uint8_t)
 DO_LD_PRIM_2(hh,  H1_2, uint16_t, uint16_t, lduw)
 DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
 DO_LD_PRIM_2(hss, H1_4, uint32_t,  int16_t, lduw)
-DO_LD_PRIM_2(hdu,     , uint64_t, uint16_t, lduw)
-DO_LD_PRIM_2(hds,     , uint64_t,  int16_t, lduw)
+DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw)
+DO_LD_PRIM_2(hds, H1_8, uint64_t,  int16_t, lduw)
 
 DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
 DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
-DO_ST_PRIM_2(hd,     , uint64_t, uint16_t, stw)
+DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw)
 
 DO_LD_PRIM_2(ss,  H1_4, uint32_t, uint32_t, ldl)
-DO_LD_PRIM_2(sdu,     , uint64_t, uint32_t, ldl)
-DO_LD_PRIM_2(sds,     , uint64_t,  int32_t, ldl)
+DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl)
+DO_LD_PRIM_2(sds, H1_8, uint64_t,  int32_t, ldl)
 
 DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
-DO_ST_PRIM_2(sd,     , uint64_t, uint32_t, stl)
+DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
 
-DO_LD_PRIM_2(dd,     , uint64_t, uint64_t, ldq)
-DO_ST_PRIM_2(dd,     , uint64_t, uint64_t, stq)
+DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
+DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
 
 #undef DO_LD_TLB
 #undef DO_ST_TLB
@@ -4382,13 +5773,9 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
 #endif
 }
 
-typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t);
-
-static inline QEMU_ALWAYS_INLINE
-void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env,
-                                 uint64_t *vg, target_ulong addr, int esize,
-                                 int msize, uint32_t mtedesc, uintptr_t ra,
-                                 mte_check_fn *check)
+static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env,
+                                    uint64_t *vg, target_ulong addr, int esize,
+                                    int msize, uint32_t mtedesc, uintptr_t ra)
 {
     intptr_t mem_off, reg_off, reg_last;
 
@@ -4405,7 +5792,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env,
             uint64_t pg = vg[reg_off >> 6];
             do {
                 if ((pg >> (reg_off & 63)) & 1) {
-                    check(env, mtedesc, addr, ra);
+                    mte_check(env, mtedesc, addr, ra);
                 }
                 reg_off += esize;
                 mem_off += msize;
@@ -4422,7 +5809,7 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env,
             uint64_t pg = vg[reg_off >> 6];
             do {
                 if ((pg >> (reg_off & 63)) & 1) {
-                    check(env, mtedesc, addr, ra);
+                    mte_check(env, mtedesc, addr, ra);
                 }
                 reg_off += esize;
                 mem_off += msize;
@@ -4431,30 +5818,6 @@ void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env,
     }
 }
 
-typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env,
-                                        uint64_t *vg, target_ulong addr,
-                                        int esize, int msize, uint32_t mtedesc,
-                                        uintptr_t ra);
-
-static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env,
-                                     uint64_t *vg, target_ulong addr,
-                                     int esize, int msize, uint32_t mtedesc,
-                                     uintptr_t ra)
-{
-    sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize,
-                                mtedesc, ra, mte_check1);
-}
-
-static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env,
-                                     uint64_t *vg, target_ulong addr,
-                                     int esize, int msize, uint32_t mtedesc,
-                                     uintptr_t ra)
-{
-    sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize,
-                                mtedesc, ra, mte_checkN);
-}
-
-
 /*
  * Common helper for all contiguous 1,2,3,4-register predicated stores.
  */
@@ -4463,8 +5826,7 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
                uint32_t desc, const uintptr_t retaddr,
                const int esz, const int msz, const int N, uint32_t mtedesc,
                sve_ldst1_host_fn *host_fn,
-               sve_ldst1_tlb_fn *tlb_fn,
-               sve_cont_ldst_mte_check_fn *mte_check_fn)
+               sve_ldst1_tlb_fn *tlb_fn)
 {
     const unsigned rd = simd_data(desc);
     const intptr_t reg_max = simd_oprsz(desc);
@@ -4493,9 +5855,9 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
      * Handle mte checks for all active elements.
      * Since TBI must be set for MTE, !mtedesc => !mte_active.
      */
-    if (mte_check_fn && mtedesc) {
-        mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz,
-                     mtedesc, retaddr);
+    if (mtedesc) {
+        sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz,
+                                mtedesc, retaddr);
     }
 
     flags = info.page[0].flags | info.page[1].flags;
@@ -4621,8 +5983,7 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
         mtedesc = 0;
     }
 
-    sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
-              N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
+    sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn);
 }
 
 #define DO_LD1_1(NAME, ESZ)                                             \
@@ -4630,7 +5991,7 @@ void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg,                 \
                             target_ulong addr, uint32_t desc)           \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0,            \
-              sve_##NAME##_host, sve_##NAME##_tlb, NULL);               \
+              sve_##NAME##_host, sve_##NAME##_tlb);                     \
 }                                                                       \
 void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg,             \
                                 target_ulong addr, uint32_t desc)       \
@@ -4644,22 +6005,22 @@ void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg,              \
                                target_ulong addr, uint32_t desc)        \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0,             \
-              sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL);         \
+              sve_##NAME##_le_host, sve_##NAME##_le_tlb);               \
 }                                                                       \
 void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg,              \
                                target_ulong addr, uint32_t desc)        \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0,             \
-              sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL);         \
+              sve_##NAME##_be_host, sve_##NAME##_be_tlb);               \
 }                                                                       \
 void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg,          \
-                                 target_ulong addr, uint32_t desc)      \
+                                   target_ulong addr, uint32_t desc)    \
 {                                                                       \
     sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1,            \
                   sve_##NAME##_le_host, sve_##NAME##_le_tlb);           \
 }                                                                       \
 void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg,          \
-                                 target_ulong addr, uint32_t desc)      \
+                                   target_ulong addr, uint32_t desc)    \
 {                                                                       \
     sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1,            \
                   sve_##NAME##_be_host, sve_##NAME##_be_tlb);           \
@@ -4693,7 +6054,7 @@ void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg,                \
                              target_ulong addr, uint32_t desc)          \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0,           \
-              sve_ld1bb_host, sve_ld1bb_tlb, NULL);                     \
+              sve_ld1bb_host, sve_ld1bb_tlb);                           \
 }                                                                       \
 void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg,            \
                                  target_ulong addr, uint32_t desc)      \
@@ -4707,13 +6068,13 @@ void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg,         \
                                     target_ulong addr, uint32_t desc)   \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0,             \
-              sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL);   \
+              sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb);         \
 }                                                                       \
 void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg,         \
                                     target_ulong addr, uint32_t desc)   \
 {                                                                       \
     sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0,             \
-              sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL);   \
+              sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb);         \
 }                                                                       \
 void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg,     \
                                         target_ulong addr, uint32_t desc) \
@@ -4757,7 +6118,7 @@ DO_LDN_2(4, dd, MO_64)
  * linux-user/ in its get_user/put_user macros.
  *
  * TODO: Construct some helpers, written in assembly, that interact with
- * handle_cpu_signal to produce memory ops which can properly report errors
+ * host_signal_handler to produce memory ops which can properly report errors
  * without racing.
  */
 
@@ -4826,7 +6187,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
     if (fault == FAULT_FIRST) {
         /* Trapping mte check for the first-fault element.  */
         if (mtedesc) {
-            mte_check1(env, mtedesc, addr + mem_off, retaddr);
+            mte_check(env, mtedesc, addr + mem_off, retaddr);
         }
 
         /*
@@ -4869,7 +6230,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
                 /* Watchpoint hit, see below. */
                 goto do_fault;
             }
-            if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+            if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
                 goto do_fault;
             }
             /*
@@ -4919,7 +6280,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
                      & BP_MEM_READ)) {
                     goto do_fault;
                 }
-                if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+                if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
                     goto do_fault;
                 }
                 host_fn(vd, reg_off, host + mem_off);
@@ -5090,8 +6451,7 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
                uint32_t desc, const uintptr_t retaddr,
                const int esz, const int msz, const int N, uint32_t mtedesc,
                sve_ldst1_host_fn *host_fn,
-               sve_ldst1_tlb_fn *tlb_fn,
-               sve_cont_ldst_mte_check_fn *mte_check_fn)
+               sve_ldst1_tlb_fn *tlb_fn)
 {
     const unsigned rd = simd_data(desc);
     const intptr_t reg_max = simd_oprsz(desc);
@@ -5117,9 +6477,9 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
      * Handle mte checks for all active elements.
      * Since TBI must be set for MTE, !mtedesc => !mte_active.
      */
-    if (mte_check_fn && mtedesc) {
-        mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz,
-                     mtedesc, retaddr);
+    if (mtedesc) {
+        sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz,
+                                mtedesc, retaddr);
     }
 
     flags = info.page[0].flags | info.page[1].flags;
@@ -5233,8 +6593,7 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
         mtedesc = 0;
     }
 
-    sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
-              N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
+    sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn);
 }
 
 #define DO_STN_1(N, NAME, ESZ)                                          \
@@ -5242,7 +6601,7 @@ void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg,            \
                                  target_ulong addr, uint32_t desc)      \
 {                                                                       \
     sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0,            \
-              sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL);         \
+              sve_st1##NAME##_host, sve_st1##NAME##_tlb);               \
 }                                                                       \
 void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg,        \
                                      target_ulong addr, uint32_t desc)  \
@@ -5256,13 +6615,13 @@ void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg,         \
                                     target_ulong addr, uint32_t desc)   \
 {                                                                       \
     sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0,             \
-              sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL);   \
+              sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb);         \
 }                                                                       \
 void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg,         \
                                     target_ulong addr, uint32_t desc)   \
 {                                                                       \
     sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0,             \
-              sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL);   \
+              sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb);         \
 }                                                                       \
 void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg,     \
                                         target_ulong addr, uint32_t desc) \
@@ -5373,7 +6732,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
                                              info.attrs, BP_MEM_READ, retaddr);
                     }
                     if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
-                        mte_check1(env, mtedesc, addr, retaddr);
+                        mte_check(env, mtedesc, addr, retaddr);
                     }
                     host_fn(&scratch, reg_off, info.host);
                 } else {
@@ -5386,7 +6745,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
                                              BP_MEM_READ, retaddr);
                     }
                     if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
-                        mte_check1(env, mtedesc, addr, retaddr);
+                        mte_check(env, mtedesc, addr, retaddr);
                     }
                     tlb_fn(env, &scratch, reg_off, addr, retaddr);
                 }
@@ -5552,7 +6911,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
      */
     addr = base + (off_fn(vm, reg_off) << scale);
     if (mtedesc) {
-        mte_check1(env, mtedesc, addr, retaddr);
+        mte_check(env, mtedesc, addr, retaddr);
     }
     tlb_fn(env, vd, reg_off, addr, retaddr);
 
@@ -5588,7 +6947,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
                 }
                 if (mtedesc &&
                     arm_tlb_mte_tagged(&info.attrs) &&
-                    !mte_probe1(env, mtedesc, addr)) {
+                    !mte_probe(env, mtedesc, addr)) {
                     goto fault;
                 }
 
@@ -5773,7 +7132,7 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
                 }
 
                 if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
-                    mte_check1(env, mtedesc, addr, retaddr);
+                    mte_check(env, mtedesc, addr, retaddr);
                 }
             }
             i += 1;
@@ -5891,3 +7250,405 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
 
 #undef DO_ST1_ZPZ_S
 #undef DO_ST1_ZPZ_D
+
+void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = n[i] ^ m[i] ^ k[i];
+    }
+}
+
+void HELPER(sve2_bcax)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = n[i] ^ (m[i] & ~k[i]);
+    }
+}
+
+void HELPER(sve2_bsl1n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = (~n[i] & k[i]) | (m[i] & ~k[i]);
+    }
+}
+
+void HELPER(sve2_bsl2n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = (n[i] & k[i]) | (~m[i] & ~k[i]);
+    }
+}
+
+void HELPER(sve2_nbsl)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = ~((n[i] & k[i]) | (m[i] & ~k[i]));
+    }
+}
+
+/*
+ * Returns true if m0 or m1 contains the low uint8_t/uint16_t in n.
+ * See hasless(v,1) from
+ *   https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ */
+static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
+{
+    int bits = 8 << esz;
+    uint64_t ones = dup_const(esz, 1);
+    uint64_t signs = ones << (bits - 1);
+    uint64_t cmp0, cmp1;
+
+    cmp1 = dup_const(esz, n);
+    cmp0 = cmp1 ^ m0;
+    cmp1 = cmp1 ^ m1;
+    cmp0 = (cmp0 - ones) & ~cmp0;
+    cmp1 = (cmp1 - ones) & ~cmp1;
+    return (cmp0 | cmp1) & signs;
+}
+
+static inline uint32_t do_match(void *vd, void *vn, void *vm, void *vg,
+                                uint32_t desc, int esz, bool nmatch)
+{
+    uint16_t esz_mask = pred_esz_masks[esz];
+    intptr_t opr_sz = simd_oprsz(desc);
+    uint32_t flags = PREDTEST_INIT;
+    intptr_t i, j, k;
+
+    for (i = 0; i < opr_sz; i += 16) {
+        uint64_t m0 = *(uint64_t *)(vm + i);
+        uint64_t m1 = *(uint64_t *)(vm + i + 8);
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)) & esz_mask;
+        uint16_t out = 0;
+
+        for (j = 0; j < 16; j += 8) {
+            uint64_t n = *(uint64_t *)(vn + i + j);
+
+            for (k = 0; k < 8; k += 1 << esz) {
+                if (pg & (1 << (j + k))) {
+                    bool o = do_match2(n >> (k * 8), m0, m1, esz);
+                    out |= (o ^ nmatch) << (j + k);
+                }
+            }
+        }
+        *(uint16_t *)(vd + H1_2(i >> 3)) = out;
+        flags = iter_predtest_fwd(out, pg, flags);
+    }
+    return flags;
+}
+
+#define DO_PPZZ_MATCH(NAME, ESZ, INV)                                         \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)  \
+{                                                                             \
+    return do_match(vd, vn, vm, vg, desc, ESZ, INV);                          \
+}
+
+DO_PPZZ_MATCH(sve2_match_ppzz_b, MO_8, false)
+DO_PPZZ_MATCH(sve2_match_ppzz_h, MO_16, false)
+
+DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true)
+DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true)
+
+#undef DO_PPZZ_MATCH
+
+void HELPER(sve2_histcnt_s)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
+{
+    ARMVectorReg scratch;
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+    uint32_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    if (d == n) {
+        n = memcpy(&scratch, n, opr_sz);
+        if (d == m) {
+            m = n;
+        }
+    } else if (d == m) {
+        m = memcpy(&scratch, m, opr_sz);
+    }
+
+    for (i = 0; i < opr_sz; i += 4) {
+        uint64_t count = 0;
+        uint8_t pred;
+
+        pred = pg[H1(i >> 3)] >> (i & 7);
+        if (pred & 1) {
+            uint32_t nn = n[H4(i >> 2)];
+
+            for (j = 0; j <= i; j += 4) {
+                pred = pg[H1(j >> 3)] >> (j & 7);
+                if ((pred & 1) && nn == m[H4(j >> 2)]) {
+                    ++count;
+                }
+            }
+        }
+        d[H4(i >> 2)] = count;
+    }
+}
+
+void HELPER(sve2_histcnt_d)(void *vd, void *vn, void *vm, void *vg,
+                            uint32_t desc)
+{
+    ARMVectorReg scratch;
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint8_t *pg = vg;
+
+    if (d == n) {
+        n = memcpy(&scratch, n, opr_sz);
+        if (d == m) {
+            m = n;
+        }
+    } else if (d == m) {
+        m = memcpy(&scratch, m, opr_sz);
+    }
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        uint64_t count = 0;
+        if (pg[H1(i)] & 1) {
+            uint64_t nn = n[i];
+            for (j = 0; j <= i; ++j) {
+                if ((pg[H1(j)] & 1) && nn == m[j]) {
+                    ++count;
+                }
+            }
+        }
+        d[i] = count;
+    }
+}
+
+/*
+ * Returns the number of bytes in m0 and m1 that match n.
+ * Unlike do_match2 we don't just need true/false, we need an exact count.
+ * This requires two extra logical operations.
+ */
+static inline uint64_t do_histseg_cnt(uint8_t n, uint64_t m0, uint64_t m1)
+{
+    const uint64_t mask = dup_const(MO_8, 0x7f);
+    uint64_t cmp0, cmp1;
+
+    cmp1 = dup_const(MO_8, n);
+    cmp0 = cmp1 ^ m0;
+    cmp1 = cmp1 ^ m1;
+
+    /*
+     * 1: clear msb of each byte to avoid carry to next byte (& mask)
+     * 2: carry in to msb if byte != 0 (+ mask)
+     * 3: set msb if cmp has msb set (| cmp)
+     * 4: set ~msb to ignore them (| mask)
+     * We now have 0xff for byte != 0 or 0x7f for byte == 0.
+     * 5: invert, resulting in 0x80 if and only if byte == 0.
+     */
+    cmp0 = ~(((cmp0 & mask) + mask) | cmp0 | mask);
+    cmp1 = ~(((cmp1 & mask) + mask) | cmp1 | mask);
+
+    /*
+     * Combine the two compares in a way that the bits do
+     * not overlap, and so preserves the count of set bits.
+     * If the host has an efficient instruction for ctpop,
+     * then ctpop(x) + ctpop(y) has the same number of
+     * operations as ctpop(x | (y >> 1)).  If the host does
+     * not have an efficient ctpop, then we only want to
+     * use it once.
+     */
+    return ctpop64(cmp0 | (cmp1 >> 1));
+}
+
+void HELPER(sve2_histseg)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j;
+    intptr_t opr_sz = simd_oprsz(desc);
+
+    for (i = 0; i < opr_sz; i += 16) {
+        uint64_t n0 = *(uint64_t *)(vn + i);
+        uint64_t m0 = *(uint64_t *)(vm + i);
+        uint64_t n1 = *(uint64_t *)(vn + i + 8);
+        uint64_t m1 = *(uint64_t *)(vm + i + 8);
+        uint64_t out0 = 0;
+        uint64_t out1 = 0;
+
+        for (j = 0; j < 64; j += 8) {
+            uint64_t cnt0 = do_histseg_cnt(n0 >> j, m0, m1);
+            uint64_t cnt1 = do_histseg_cnt(n1 >> j, m0, m1);
+            out0 |= cnt0 << j;
+            out1 |= cnt1 << j;
+        }
+
+        *(uint64_t *)(vd + i) = out0;
+        *(uint64_t *)(vd + i + 8) = out1;
+    }
+}
+
+void HELPER(sve2_xar_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    int shr = simd_data(desc);
+    int shl = 8 - shr;
+    uint64_t mask = dup_const(MO_8, 0xff >> shr);
+    uint64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        uint64_t t = n[i] ^ m[i];
+        d[i] = ((t >> shr) & mask) | ((t << shl) & ~mask);
+    }
+}
+
+void HELPER(sve2_xar_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    int shr = simd_data(desc);
+    int shl = 16 - shr;
+    uint64_t mask = dup_const(MO_16, 0xffff >> shr);
+    uint64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        uint64_t t = n[i] ^ m[i];
+        d[i] = ((t >> shr) & mask) | ((t << shl) & ~mask);
+    }
+}
+
+void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+    int shr = simd_data(desc);
+    uint32_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = ror32(n[i] ^ m[i], shr);
+    }
+}
+
+void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
+                     void *status, uint32_t desc)
+{
+    intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
+
+    for (s = 0; s < opr_sz; ++s) {
+        float32 *n = vn + s * sizeof(float32) * 4;
+        float32 *m = vm + s * sizeof(float32) * 4;
+        float32 *a = va + s * sizeof(float32) * 4;
+        float32 *d = vd + s * sizeof(float32) * 4;
+        float32 n00 = n[H4(0)], n01 = n[H4(1)];
+        float32 n10 = n[H4(2)], n11 = n[H4(3)];
+        float32 m00 = m[H4(0)], m01 = m[H4(1)];
+        float32 m10 = m[H4(2)], m11 = m[H4(3)];
+        float32 p0, p1;
+
+        /* i = 0, j = 0 */
+        p0 = float32_mul(n00, m00, status);
+        p1 = float32_mul(n01, m01, status);
+        d[H4(0)] = float32_add(a[H4(0)], float32_add(p0, p1, status), status);
+
+        /* i = 0, j = 1 */
+        p0 = float32_mul(n00, m10, status);
+        p1 = float32_mul(n01, m11, status);
+        d[H4(1)] = float32_add(a[H4(1)], float32_add(p0, p1, status), status);
+
+        /* i = 1, j = 0 */
+        p0 = float32_mul(n10, m00, status);
+        p1 = float32_mul(n11, m01, status);
+        d[H4(2)] = float32_add(a[H4(2)], float32_add(p0, p1, status), status);
+
+        /* i = 1, j = 1 */
+        p0 = float32_mul(n10, m10, status);
+        p1 = float32_mul(n11, m11, status);
+        d[H4(3)] = float32_add(a[H4(3)], float32_add(p0, p1, status), status);
+    }
+}
+
+void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
+                     void *status, uint32_t desc)
+{
+    intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
+
+    for (s = 0; s < opr_sz; ++s) {
+        float64 *n = vn + s * sizeof(float64) * 4;
+        float64 *m = vm + s * sizeof(float64) * 4;
+        float64 *a = va + s * sizeof(float64) * 4;
+        float64 *d = vd + s * sizeof(float64) * 4;
+        float64 n00 = n[0], n01 = n[1], n10 = n[2], n11 = n[3];
+        float64 m00 = m[0], m01 = m[1], m10 = m[2], m11 = m[3];
+        float64 p0, p1;
+
+        /* i = 0, j = 0 */
+        p0 = float64_mul(n00, m00, status);
+        p1 = float64_mul(n01, m01, status);
+        d[0] = float64_add(a[0], float64_add(p0, p1, status), status);
+
+        /* i = 0, j = 1 */
+        p0 = float64_mul(n00, m10, status);
+        p1 = float64_mul(n01, m11, status);
+        d[1] = float64_add(a[1], float64_add(p0, p1, status), status);
+
+        /* i = 1, j = 0 */
+        p0 = float64_mul(n10, m00, status);
+        p1 = float64_mul(n11, m01, status);
+        d[2] = float64_add(a[2], float64_add(p0, p1, status), status);
+
+        /* i = 1, j = 1 */
+        p0 = float64_mul(n10, m10, status);
+        p1 = float64_mul(n11, m11, status);
+        d[3] = float64_add(a[3], float64_add(p0, p1, status), status);
+    }
+}
+
+#define DO_FCVTNT(NAME, TYPEW, TYPEN, HW, HN, OP)                             \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc)  \
+{                                                                             \
+    intptr_t i = simd_oprsz(desc);                                            \
+    uint64_t *g = vg;                                                         \
+    do {                                                                      \
+        uint64_t pg = g[(i - 1) >> 6];                                        \
+        do {                                                                  \
+            i -= sizeof(TYPEW);                                               \
+            if (likely((pg >> (i & 63)) & 1)) {                               \
+                TYPEW nn = *(TYPEW *)(vn + HW(i));                            \
+                *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, status);      \
+            }                                                                 \
+        } while (i & 63);                                                     \
+    } while (i != 0);                                                         \
+}
+
+DO_FCVTNT(sve_bfcvtnt,    uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16)
+DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
+DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32)
+
+#define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP)                             \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc)  \
+{                                                                             \
+    intptr_t i = simd_oprsz(desc);                                            \
+    uint64_t *g = vg;                                                         \
+    do {                                                                      \
+        uint64_t pg = g[(i - 1) >> 6];                                        \
+        do {                                                                  \
+            i -= sizeof(TYPEW);                                               \
+            if (likely((pg >> (i & 63)) & 1)) {                               \
+                TYPEN nn = *(TYPEN *)(vn + HN(i + sizeof(TYPEN)));            \
+                *(TYPEW *)(vd + HW(i)) = OP(nn, status);                      \
+            }                                                                 \
+        } while (i & 63);                                                     \
+    } while (i != 0);                                                         \
+}
+
+DO_FCVTLT(sve2_fcvtlt_hs, uint32_t, uint16_t, H1_4, H1_2, sve_f16_to_f32)
+DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64)
+
+#undef DO_FCVTLT
+#undef DO_FCVTNT
diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
index 8caae3d9255..b26a2462a60 100644
--- a/target/arm/syndrome.h
+++ b/target/arm/syndrome.h
@@ -37,6 +37,7 @@ enum arm_exception_class {
     EC_ADVSIMDFPACCESSTRAP    = 0x07,
     EC_FPIDTRAP               = 0x08,
     EC_PACTRAP                = 0x09,
+    EC_BXJTRAP                = 0x0a,
     EC_CP14RRTTRAP            = 0x0c,
     EC_BTITRAP                = 0x0d,
     EC_ILLEGALSTATE           = 0x0e,
@@ -243,6 +244,12 @@ static inline uint32_t syn_btitrap(int btype)
     return (EC_BTITRAP << ARM_EL_EC_SHIFT) | btype;
 }
 
+static inline uint32_t syn_bxjtrap(int cv, int cond, int rm)
+{
+    return (EC_BXJTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL |
+        (cv << 24) | (cond << 20) | rm;
+}
+
 static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
 {
     return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
@@ -308,4 +315,9 @@ static inline uint32_t syn_wfx(int cv, int cond, int ti, bool is_16bit)
            (cv << 24) | (cond << 20) | ti;
 }
 
+static inline uint32_t syn_illegalstate(void)
+{
+    return (EC_ILLEGALSTATE << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
 #endif /* TARGET_ARM_SYNDROME_H */
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
index 8b2c487fa7a..78fadef9d62 100644
--- a/target/arm/t32.decode
+++ b/target/arm/t32.decode
@@ -48,6 +48,16 @@
 &mcr             !extern cp opc1 crn crm opc2 rt
 &mcrr            !extern cp opc1 crm rt rt2
 
+&mve_shl_ri      rdalo rdahi shim
+&mve_shl_rr      rdalo rdahi rm
+&mve_sh_ri       rda shim
+&mve_sh_rr       rda rm
+
+# rdahi: bits [3:1] from insn, bit 0 is 1
+# rdalo: bits [3:1] from insn, bit 0 is 0
+%rdahi_9 9:3 !function=times_2_plus_1
+%rdalo_17 17:3 !function=times_2
+
 # Data-processing (register)
 
 %imm5_12_6       12:3 6:2
@@ -59,14 +69,72 @@
 @S_xrr_shi       ....... .... .   rn:4 .... .... .. shty:2 rm:4 \
                  &s_rrr_shi shim=%imm5_12_6 s=1 rd=0
 
+@mve_shl_ri      ....... .... . ... . . ... ... . .. .. .... \
+                 &mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
+@mve_shl_rr      ....... .... . ... . rm:4  ... . .. .. .... \
+                 &mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
+@mve_sh_ri       ....... .... . rda:4 . ... ... . .. .. .... \
+                 &mve_sh_ri shim=%imm5_12_6
+@mve_sh_rr       ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr
+
 {
   TST_xrri       1110101 0000 1 .... 0 ... 1111 .... ....     @S_xrr_shi
   AND_rrri       1110101 0000 . .... 0 ... .... .... ....     @s_rrr_shi
 }
 BIC_rrri         1110101 0001 . .... 0 ... .... .... ....     @s_rrr_shi
 {
+  # The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS
+  # and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE
+  # cases for MOVS/ORRS. We decode the MVE cases first, ensuring that
+  # they explicitly call unallocated_encoding() for cases that must UNDEF
+  # (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting
+  # the rest fall through (where ORR_rrri and MOV_rxri will end up
+  # handling them as r13 and r15 accesses with the same semantics as A32).
+  [
+    {
+      UQSHL_ri   1110101 0010 1 ....  0 ...  1111 .. 00 1111  @mve_sh_ri
+      LSLL_ri    1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111  @mve_shl_ri
+      UQSHLL_ri  1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111  @mve_shl_ri
+    }
+
+    {
+      URSHR_ri   1110101 0010 1 ....  0 ...  1111 .. 01 1111  @mve_sh_ri
+      LSRL_ri    1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111  @mve_shl_ri
+      URSHRL_ri  1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111  @mve_shl_ri
+    }
+
+    {
+      SRSHR_ri   1110101 0010 1 ....  0 ...  1111 .. 10 1111  @mve_sh_ri
+      ASRL_ri    1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111  @mve_shl_ri
+      SRSHRL_ri  1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111  @mve_shl_ri
+    }
+
+    {
+      SQSHL_ri   1110101 0010 1 ....  0 ...  1111 .. 11 1111  @mve_sh_ri
+      SQSHLL_ri  1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111  @mve_shl_ri
+    }
+
+    {
+      UQRSHL_rr    1110101 0010 1 ....  ....  1111 0000 1101  @mve_sh_rr
+      LSLL_rr      1110101 0010 1 ... 0 .... ... 1 0000 1101  @mve_shl_rr
+      UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101  @mve_shl_rr
+    }
+
+    {
+      SQRSHR_rr    1110101 0010 1 ....  ....  1111 0010 1101  @mve_sh_rr
+      ASRL_rr      1110101 0010 1 ... 0 .... ... 1 0010 1101  @mve_shl_rr
+      SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101  @mve_shl_rr
+    }
+
+    UQRSHLL48_rr 1110101 0010 1 ... 1 ....  ... 1  1000 1101  @mve_shl_rr
+    SQRSHRL48_rr 1110101 0010 1 ... 1 ....  ... 1  1010 1101  @mve_shl_rr
+  ]
+
   MOV_rxri       1110101 0010 . 1111 0 ... .... .... ....     @s_rxr_shi
   ORR_rrri       1110101 0010 . .... 0 ... .... .... ....     @s_rrr_shi
+
+  # v8.1M CSEL and friends
+  CSEL           1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
 }
 {
   MVN_rxri       1110101 0011 . 1111 0 ... .... .... ....     @s_rxr_shi
@@ -90,9 +158,6 @@ SBC_rrri         1110101 1011 . .... 0 ... .... .... ....     @s_rrr_shi
 }
 RSB_rrri         1110101 1110 . .... 0 ... .... .... ....     @s_rrr_shi
 
-# v8.1M CSEL and friends
-CSEL             1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
-
 # Data-processing (register-shifted register)
 
 MOV_rxrr         1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \
@@ -671,8 +736,18 @@ BL               1111 0. .......... 11.1 ............         @branch24
     # LE and WLS immediate
     %lob_imm 1:10 11:1 !function=times_2
 
-    DLS          1111 0 0000 100     rn:4 1110 0000 0000 0001
-    WLS          1111 0 0000 100     rn:4 1100 . .......... 1 imm=%lob_imm
-    LE           1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
+    DLS          1111 0 0000 100     rn:4 1110 0000 0000 0001 size=4
+    WLS          1111 0 0000 100     rn:4 1100 . .......... 1 imm=%lob_imm size=4
+    {
+      LE         1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
+      # This is WLSTP
+      WLS        1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
+    }
+    {
+      LCTP       1111 0 0000 000     1111 1110 0000 0000 0001
+      # This is DLSTP
+      DLS        1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
+    }
+    VCTP         1111 0 0000 0 size:2 rn:4 1110 1000 0000 0001
   ]
 }
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index c9391c300d1..8df64b0b9ce 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -147,28 +147,12 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
     arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
 }
 
-#endif /* !defined(CONFIG_USER_ONLY) */
-
 bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     ARMMMUFaultInfo fi = {};
-
-#ifdef CONFIG_USER_ONLY
-    int flags = page_get_flags(useronly_clean_ptr(address));
-    if (flags & PAGE_VALID) {
-        fi.type = ARMFault_Permission;
-    } else {
-        fi.type = ARMFault_Translation;
-    }
-    fi.level = 3;
-
-    /* now we have a real cpu fault */
-    cpu_restore_state(cs, retaddr, true);
-    arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi);
-#else
     hwaddr phys_addr;
     target_ulong page_size;
     int prot, ret;
@@ -218,5 +202,29 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
         }
         arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi);
     }
-#endif
 }
+#else
+void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra)
+{
+    ARMMMUFaultInfo fi = {
+        .type = maperr ? ARMFault_Translation : ARMFault_Permission,
+        .level = 3,
+    };
+    ARMCPU *cpu = ARM_CPU(cs);
+
+    /*
+     * We report both ESR and FAR to signal handlers.
+     * For now, it's easiest to deliver the fault normally.
+     */
+    cpu_restore_state(cs, ra, true);
+    arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
+}
+
+void arm_cpu_record_sigbus(CPUState *cs, vaddr addr,
+                           MMUAccessType access_type, uintptr_t ra)
+{
+    arm_cpu_do_unaligned_access(cs, addr, access_type, MMU_USER_IDX, ra);
+}
+#endif /* !defined(CONFIG_USER_ONLY) */
diff --git a/target/arm/trace-events b/target/arm/trace-events
index 41c63d75705..2a0ba7bffc1 100644
--- a/target/arm/trace-events
+++ b/target/arm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # helper.c
 arm_gt_recalc(int timer, int irqstate, uint64_t nexttick) "gt recalc: timer %d irqstate %d next tick 0x%" PRIx64
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
new file mode 100644
index 00000000000..17af8dc95a8
--- /dev/null
+++ b/target/arm/translate-a32.h
@@ -0,0 +1,154 @@
+/*
+ *  AArch32 translation, common definitions.
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_TRANSLATE_A64_H
+#define TARGET_ARM_TRANSLATE_A64_H
+
+/* Prototypes for autogenerated disassembler functions */
+bool disas_m_nocp(DisasContext *dc, uint32_t insn);
+bool disas_mve(DisasContext *dc, uint32_t insn);
+bool disas_vfp(DisasContext *s, uint32_t insn);
+bool disas_vfp_uncond(DisasContext *s, uint32_t insn);
+bool disas_neon_dp(DisasContext *s, uint32_t insn);
+bool disas_neon_ls(DisasContext *s, uint32_t insn);
+bool disas_neon_shared(DisasContext *s, uint32_t insn);
+
+void load_reg_var(DisasContext *s, TCGv_i32 var, int reg);
+void arm_gen_condlabel(DisasContext *s);
+bool vfp_access_check(DisasContext *s);
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update);
+void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop);
+void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop);
+void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop);
+void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop);
+TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs);
+void gen_set_cpsr(TCGv_i32 var, uint32_t mask);
+void gen_set_condexec(DisasContext *s);
+void gen_set_pc_im(DisasContext *s, target_ulong val);
+void gen_lookup_tb(DisasContext *s);
+long vfp_reg_offset(bool dp, unsigned reg);
+long neon_full_reg_offset(unsigned reg);
+long neon_element_offset(int reg, int element, MemOp memop);
+void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
+void clear_eci_state(DisasContext *s);
+bool mve_eci_check(DisasContext *s);
+void mve_update_eci(DisasContext *s);
+void mve_update_and_store_eci(DisasContext *s);
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size);
+
+static inline TCGv_i32 load_cpu_offset(int offset)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_ld_i32(tmp, cpu_env, offset);
+    return tmp;
+}
+
+#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
+
+static inline void store_cpu_offset(TCGv_i32 var, int offset)
+{
+    tcg_gen_st_i32(var, cpu_env, offset);
+    tcg_temp_free_i32(var);
+}
+
+#define store_cpu_field(var, name) \
+    store_cpu_offset(var, offsetof(CPUARMState, name))
+
+#define store_cpu_field_constant(val, name) \
+    tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, offsetof(CPUARMState, name))
+
+/* Create a new temporary and set it to the value of a CPU register.  */
+static inline TCGv_i32 load_reg(DisasContext *s, int reg)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    load_reg_var(s, tmp, reg);
+    return tmp;
+}
+
+void store_reg(DisasContext *s, int reg, TCGv_i32 var);
+
+void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
+                              TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
+                              TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
+                              TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
+                              TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                     int index, MemOp opc);
+void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                     int index, MemOp opc);
+void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                     int index, MemOp opc);
+void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                     int index, MemOp opc);
+
+#define DO_GEN_LD(SUFF, OPC)                                            \
+    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
+                                         TCGv_i32 a32, int index)       \
+    {                                                                   \
+        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
+    }
+
+#define DO_GEN_ST(SUFF, OPC)                                            \
+    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
+                                         TCGv_i32 a32, int index)       \
+    {                                                                   \
+        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
+    }
+
+static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
+                                 TCGv_i32 a32, int index)
+{
+    gen_aa32_ld_i64(s, val, a32, index, MO_Q);
+}
+
+static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
+                                 TCGv_i32 a32, int index)
+{
+    gen_aa32_st_i64(s, val, a32, index, MO_Q);
+}
+
+DO_GEN_LD(8u, MO_UB)
+DO_GEN_LD(16u, MO_UW)
+DO_GEN_LD(32u, MO_UL)
+DO_GEN_ST(8, MO_UB)
+DO_GEN_ST(16, MO_UW)
+DO_GEN_ST(32, MO_UL)
+
+#undef DO_GEN_LD
+#undef DO_GEN_ST
+
+#if defined(CONFIG_USER_ONLY)
+#define IS_USER(s) 1
+#else
+#define IS_USER(s) (s->user)
+#endif
+
+/* Set NZCV flags from the high 4 bits of var.  */
+#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
+
+/* Swap low and high halfwords.  */
+static inline void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
+{
+    tcg_gen_rotri_i32(dest, var, 16);
+}
+
+#endif
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index dbc8ace0422..7e4cbc9858d 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -36,7 +36,6 @@
 #include "exec/log.h"
 #include "exec/log_instr.h"
 
-#include "trace-tcg.h"
 #include "translate-a64.h"
 #include "qemu/atomic128.h"
 
@@ -67,24 +66,6 @@ static inline bool cctlr_set(DisasContext *ctx, uint32_t bits)
     return (ctx->base.cheri_flags & mask) == mask;
 }
 
-static inline MemOp memop_align_sctlr(DisasContext *ctx)
-{
-#ifdef STRICT_ALIGNMENT_CHECKS
-    return GET_FLAG(ctx, SCTLRA) ? MO_ALIGN : 0;
-#else
-    return 0;
-#endif
-}
-
-static inline MemOp memop_align_sctlr_size(DisasContext *ctx, int size)
-{
-#ifdef STRICT_ALIGNMENT_CHECKS
-    return GET_FLAG(ctx, SCTLRA) ? (size << MO_ASHIFT) : 0;
-#else
-    return 0;
-#endif
-}
-
 static inline bool get_sctlr_sa(DisasContext *ctx)
 {
 #ifdef STRICT_ALIGNMENT_CHECKS
@@ -108,11 +89,6 @@ static inline MemOp memop_align_sctlr(DisasContext *ctx)
     return 0;
 }
 
-static inline MemOp memop_align_sctlr_size(DisasContext *ctx, int size)
-{
-    return 0;
-}
-
 static inline bool get_sctlr_sa(DisasContext *ctx)
 {
     return false;
@@ -123,7 +99,7 @@ static inline bool get_sctlr_sa(DisasContext *ctx)
 /* Load/store exclusive handling */
 static TCGv_i64 cpu_exclusive_high;
 
-static const char *regnames[] = {
+const char * const arm64_regnames[32] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
@@ -169,11 +145,11 @@ void a64_translate_init(void)
         _cpu_cursors_do_not_access_directly[i] = tcg_global_mem_new(
             cpu_env,
             offsetof(CPUARMState, gpcapregs.decompressed[i].cap._cr_cursor),
-            regnames[i]);
+            arm64_regnames[i]);
 #else
         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
                                           offsetof(CPUARMState, xregs[i]),
-                                          regnames[i]);
+                                          arm64_regnames[i]);
 #endif
     }
 
@@ -568,11 +544,11 @@ gen_mte_and_cheri_check1_mmuidx(DisasContext *s, TCGv_i64 addr, bool is_read,
         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
-        desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size);
+        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
         tcg_desc = tcg_const_i32(desc);
 
         ret = new_tmp_a64(s);
-        gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr);
+        gen_helper_mte_check(ret, cpu_env, tcg_desc, addr);
         tcg_temp_free_i32(tcg_desc);
 
         return arm_bounds_checked(s, ret, (1 << log2_size), base_reg, is_read,
@@ -598,39 +574,31 @@ TCGv_cap_checked_ptr gen_mte_and_cheri_check1(DisasContext *s, TCGv_i64 addr,
  */
 TCGv_cap_checked_ptr gen_mte_and_cheri_checkN(DisasContext *s, TCGv_i64 addr,
                                               bool is_read, bool is_write,
-                                              bool tag_checked, int log2_esize,
-                                              int total_size, int base_reg,
-                                              bool alternate_base,
+                                              bool tag_checked, int size,
+                                              int base_reg, bool alternate_base,
                                               bool ddc_base)
 {
-    if (total_size != (1 << log2_esize)) {
-        if (tag_checked && s->mte_active[0]) {
-            TCGv_i32 tcg_desc;
-            TCGv_i64 ret;
-            int desc = 0;
+    if (tag_checked && s->mte_active[0]) {
+        TCGv_i32 tcg_desc;
+        TCGv_i64 ret;
+        int desc = 0;
 
-            desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
-            desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
-            desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
-            desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
-            desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize);
-            desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size);
-            tcg_desc = tcg_const_i32(desc);
+        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
+        tcg_desc = tcg_const_i32(desc);
 
-            ret = new_tmp_a64(s);
-            gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr);
-            tcg_temp_free_i32(tcg_desc);
+        ret = new_tmp_a64(s);
+        gen_helper_mte_check(ret, cpu_env, tcg_desc, addr);
+        tcg_temp_free_i32(tcg_desc);
 
-            return arm_bounds_checked(s, ret, total_size, base_reg, is_read,
-                                      is_write, alternate_base, ddc_base);
-        } else {
-            clean_data_tbi_and_cheri(s, addr, is_read, is_write, total_size,
-                                     base_reg, alternate_base, ddc_base);
-        }
+        return arm_bounds_checked(s, ret, size, base_reg, is_read,
+                                  is_write, alternate_base, ddc_base);
     }
-    return gen_mte_and_cheri_check1(s, addr, is_read, is_write, tag_checked,
-                                    log2_esize, base_reg, alternate_base,
-                                    ddc_base);
+    return clean_data_tbi_and_cheri(s, addr, is_read, is_write, size,
+                                    base_reg, alternate_base, ddc_base);
 }
 
 typedef struct DisasCompare64 {
@@ -674,14 +642,6 @@ static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
-                               uint32_t syndrome, uint32_t target_el)
-{
-    gen_a64_set_pc_im(pc);
-    gen_exception(excp, syndrome, target_el);
-    s->base.is_jmp = DISAS_NORETURN;
-}
-
 static void gen_set_exception_far(uint64_t far)
 {
     TCGv_i64 tfar = tcg_const_i64(far);
@@ -716,42 +676,25 @@ static void gen_step_complete_exception(DisasContext *s)
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
+static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
 {
-    /* No direct tb linking with singlestep (either QEMU's or the ARM
-     * debug architecture kind) or deterministic io
-     */
-    if (s->base.singlestep_enabled || s->ss_active ||
-        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    /* Only link tbs from inside the same guest page */
-    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
+    if (s->ss_active) {
         return false;
     }
-#endif
-
-    return true;
+    return translator_use_goto_tb(&s->base, dest);
 }
 
 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 {
-    const TranslationBlock *tb;
-
-    tb = s->base.tb;
-    if (use_goto_tb(s, n, dest)) {
+    if (use_goto_tb(s, dest)) {
         tcg_gen_goto_tb(n);
         gen_a64_set_pc_im_safe(s, dest);
-        tcg_gen_exit_tb(tb, n);
+        tcg_gen_exit_tb(s->base.tb, n);
         s->base.is_jmp = DISAS_NORETURN;
     } else {
         gen_a64_set_pc_im_safe(s, dest);
         if (s->ss_active) {
             gen_step_complete_exception(s);
-        } else if (s->base.singlestep_enabled) {
-            gen_exception_internal(EXCP_DEBUG);
         } else {
             tcg_gen_lookup_and_goto_ptr();
             s->base.is_jmp = DISAS_NORETURN;
@@ -759,13 +702,6 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
     }
 }
 
-void unallocated_encoding(DisasContext *s)
-{
-    /* Unallocated and reserved encodings are uncategorized */
-    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
-                       default_exception_el(s));
-}
-
 static void init_tmp_a64_array(DisasContext *s)
 {
 #ifdef CONFIG_DEBUG_TCG
@@ -1030,6 +966,34 @@ static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
     tcg_temp_free_ptr(qc_ptr);
 }
 
+/* Expand a 4-operand operation using an out-of-line helper.  */
+static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
+                             int rm, int ra, int data, gen_helper_gvec_4 *fn)
+{
+    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       vec_full_reg_offset(s, ra),
+                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
+/*
+ * Expand a 4-operand + fpstatus pointer + simd data value operation using
+ * an out-of-line helper.
+ */
+static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
+                              int rm, int ra, bool is_fp16, int data,
+                              gen_helper_gvec_4_ptr *fn)
+{
+    TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+    tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       vec_full_reg_offset(s, ra), fpst,
+                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+    tcg_temp_free_ptr(fpst);
+}
+
 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
  * than the 32 bit equivalent.
  */
@@ -1218,19 +1182,18 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
  * Store from GPR register to memory.
  */
 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
-                             TCGv_cap_checked_ptr tcg_addr, int size,
+                             TCGv_cap_checked_ptr tcg_addr, MemOp memop,
                              int memidx, bool iss_valid, unsigned int iss_srt,
                              bool iss_sf, bool iss_ar)
 {
-    g_assert(size <= 3);
-    tcg_gen_qemu_st_i64_with_checked_addr(
-        source, tcg_addr, memidx, s->be_data + size | memop_align_sctlr(s));
+    memop = finalize_memop(s, memop);
+    tcg_gen_qemu_st_i64_with_checked_addr(source, tcg_addr, memidx, memop);
 
     if (iss_valid) {
         uint32_t syn;
 
         syn = syn_data_abort_with_iss(0,
-                                      size,
+                                      (memop & MO_SIZE),
                                       false,
                                       iss_srt,
                                       iss_sf,
@@ -1241,10 +1204,12 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 }
 
 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
-                      TCGv_cap_checked_ptr tcg_addr, int size, bool iss_valid,
-                      unsigned int iss_srt, bool iss_sf, bool iss_ar)
+                      TCGv_cap_checked_ptr tcg_addr, MemOp memop,
+                      bool iss_valid,
+                      unsigned int iss_srt,
+                      bool iss_sf, bool iss_ar)
 {
-    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
+    do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
                      iss_valid, iss_srt, iss_sf, iss_ar);
 }
 
@@ -1252,23 +1217,16 @@ static void do_gpr_st(DisasContext *s, TCGv_i64 source,
  * Load from memory to GPR register
  */
 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest,
-                             TCGv_cap_checked_ptr tcg_addr, int size,
-                             bool is_signed, bool extend, int memidx,
-                             bool iss_valid, unsigned int iss_srt, bool iss_sf,
-                             bool iss_ar)
+                             TCGv_cap_checked_ptr tcg_addr,
+                             MemOp memop, bool extend, int memidx,
+                             bool iss_valid, unsigned int iss_srt,
+                             bool iss_sf, bool iss_ar)
 {
-    MemOp memop = s->be_data + size | memop_align_sctlr(s);
-
-    g_assert(size <= 3);
-
-    if (is_signed) {
-        memop += MO_SIGN;
-    }
-
+    memop = finalize_memop(s, memop);
     tcg_gen_qemu_ld_i64_with_checked_addr(dest, tcg_addr, memidx, memop);
 
-    if (extend && is_signed) {
-        g_assert(size < 3);
+    if (extend && (memop & MO_SIGN)) {
+        g_assert((memop & MO_SIZE) <= MO_32);
         tcg_gen_ext32u_i64(dest, dest);
     }
 
@@ -1281,8 +1239,8 @@ static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest,
         uint32_t syn;
 
         syn = syn_data_abort_with_iss(0,
-                                      size,
-                                      is_signed,
+                                      (memop & MO_SIZE),
+                                      (memop & MO_SIGN) != 0,
                                       iss_srt,
                                       iss_sf,
                                       iss_ar,
@@ -1292,12 +1250,12 @@ static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest,
 }
 
 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest,
-                      TCGv_cap_checked_ptr tcg_addr, int size, bool is_signed,
-                      bool extend, bool iss_valid, unsigned int iss_srt,
+                      TCGv_cap_checked_ptr tcg_addr,
+                      MemOp memop, bool extend,
+                      bool iss_valid, unsigned int iss_srt,
                       bool iss_sf, bool iss_ar)
 {
-    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
-                     get_mem_index(s),
+    do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
                      iss_valid, iss_srt, iss_sf, iss_ar);
 }
 
@@ -1308,30 +1266,33 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_cap_checked_ptr tcg_addr,
                      int size)
 {
     /* This writes the bottom N bits of a 128 bit wide vector to memory */
-    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 tmplo = tcg_temp_new_i64();
+    MemOp mop;
+
+    tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 
-    MemOp memop = s->be_data;
-    MemOp align = memop_align_sctlr_size(s, size);
-    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
     if (size < 4) {
-        tcg_gen_qemu_st_i64_with_checked_addr(tmp, tcg_addr, get_mem_index(s),
-                                              memop + size | align);
+        mop = finalize_memop(s, size);
+        tcg_gen_qemu_st_i64_with_checked_addr(tmplo, tcg_addr, get_mem_index(s), mop);
     } else {
         bool be = s->be_data == MO_BE;
         TCGv_cap_checked_ptr tcg_hiaddr = tcg_temp_new_cap_checked();
+        TCGv_i64 tmphi = tcg_temp_new_i64();
+
+        tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
 
+        mop = s->be_data | MO_Q;
+        tcg_gen_qemu_st_i64_with_checked_addr(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
+                            mop | (s->align_mem ? MO_ALIGN_16 : 0));
         tcg_gen_addi_i64((TCGv_i64)tcg_hiaddr, (TCGv_i64)tcg_addr, 8);
-        // If the first load is correctly aligned, so will the second
-        tcg_gen_qemu_st_i64_with_checked_addr(tmp, be ? tcg_hiaddr : tcg_addr,
-                                              get_mem_index(s),
-                                              memop | MO_Q | align);
-        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
-        tcg_gen_qemu_st_i64_with_checked_addr(tmp, be ? tcg_addr : tcg_hiaddr,
-                                              get_mem_index(s), memop | MO_Q);
+        tcg_gen_qemu_st_i64_with_checked_addr(be ? tmplo : tmphi, tcg_hiaddr,
+                            get_mem_index(s), mop);
+
         tcg_temp_free_cap_checked(tcg_hiaddr);
+        tcg_temp_free_i64(tmphi);
     }
 
-    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(tmplo);
 }
 
 /*
@@ -1343,13 +1304,12 @@ static void do_fp_ld(DisasContext *s, int destidx,
     /* This always zero-extends and writes to a full 128 bit wide vector */
     TCGv_i64 tmplo = tcg_temp_new_i64();
     TCGv_i64 tmphi = NULL;
-
-    MemOp memop = s->be_data;
-    MemOp align = memop_align_sctlr_size(s, size);
+    MemOp mop;
 
     if (size < 4) {
-        tcg_gen_qemu_ld_i64_with_checked_addr(tmplo, tcg_addr, get_mem_index(s),
-                                              memop + size | align);
+        mop = finalize_memop(s, size);
+        tcg_gen_qemu_ld_i64_with_checked_addr(tmplo, tcg_addr,
+                                              get_mem_index(s), mop);
     } else {
         bool be = s->be_data == MO_BE;
         TCGv_cap_checked_ptr tcg_hiaddr;
@@ -1357,12 +1317,13 @@ static void do_fp_ld(DisasContext *s, int destidx,
         tmphi = tcg_temp_new_i64();
         tcg_hiaddr = tcg_temp_new_cap_checked();
 
+        mop = s->be_data | MO_Q;
+        tcg_gen_qemu_ld_i64_with_checked_addr(
+            be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
+            mop | (s->align_mem ? MO_ALIGN_16 : 0));
         tcg_gen_addi_i64((TCGv_i64)tcg_hiaddr, (TCGv_i64)tcg_addr, 8);
-        tcg_gen_qemu_ld_i64_with_checked_addr(tmplo, be ? tcg_hiaddr : tcg_addr,
-                                              get_mem_index(s),
-                                              memop | MO_Q | align);
-        tcg_gen_qemu_ld_i64_with_checked_addr(tmphi, be ? tcg_addr : tcg_hiaddr,
-                                              get_mem_index(s), memop | MO_Q);
+        tcg_gen_qemu_ld_i64_with_checked_addr(be ? tmplo : tmphi, tcg_hiaddr,
+                                              get_mem_index(s), mop);
         tcg_temp_free_cap_checked(tcg_hiaddr);
     }
 
@@ -1393,7 +1354,7 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
                              int element, MemOp memop)
 {
     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
-    switch (memop) {
+    switch ((unsigned)memop) {
     case MO_8:
         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
         break;
@@ -1491,26 +1452,26 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 
 /* Store from vector register to memory */
 static void do_vec_st(DisasContext *s, int srcidx, int element,
-                      TCGv_cap_checked_ptr tcg_addr, int size, MemOp endian)
+                      TCGv_cap_checked_ptr tcg_addr, MemOp mop)
 {
     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
-    read_vec_element(s, tcg_tmp, srcidx, element, size);
+    read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
     tcg_gen_qemu_st_i64_with_checked_addr(tcg_tmp, tcg_addr, get_mem_index(s),
-                                          endian | size | memop_align_sctlr(s));
+                                          mop);
 
     tcg_temp_free_i64(tcg_tmp);
 }
 
 /* Load from memory to vector register */
 static void do_vec_ld(DisasContext *s, int destidx, int element,
-                      TCGv_cap_checked_ptr tcg_addr, int size, MemOp endian)
+                      TCGv_cap_checked_ptr tcg_addr, MemOp mop)
 {
     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
     tcg_gen_qemu_ld_i64_with_checked_addr(tcg_tmp, tcg_addr, get_mem_index(s),
-                                          endian | size | memop_align_sctlr(s));
-    write_vec_element(s, tcg_tmp, destidx, element, size);
+                                          mop);
+    write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
 
     tcg_temp_free_i64(tcg_tmp);
 }
@@ -3318,7 +3279,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
         clean_addr = gen_mte_and_cheri_check1(s, cpu_reg_sp(s, rn), false, true,
                                               rn != 31, size, rn, false, true);
-        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
+        /* TODO: ARMv8.4-LSE SCTLR.nAA */
+        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         return;
 
@@ -3335,8 +3297,9 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         }
         clean_addr = gen_mte_and_cheri_check1(s, cpu_reg_sp(s, rn), true, false,
                                               rn != 31, size, rn, false, true);
-        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
-                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+        /* TODO: ARMv8.4-LSE SCTLR.nAA */
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
+                  rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
         return;
 
@@ -3455,8 +3418,8 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
         /* Only unsigned 32bit loads target 32bit registers.  */
         bool iss_sf = opc != 0;
 
-        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
-                  true, rt, iss_sf, false);
+        do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+                  false, true, rt, iss_sf, false);
     }
     tcg_temp_free_i64(dirty_addr);
 }
@@ -3591,7 +3554,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     }
 
     clean_addr = gen_mte_and_cheri_checkN(s, dirty_addr, is_load, !is_load,
-                                          (wback || rn != 31) && !set_tag, size,
+                                          (wback || rn != 31) && !set_tag,
                                           2 << size, rn, false, true);
 
     if (is_vector) {
@@ -3616,12 +3579,12 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
             /* Do not modify tcg_rt before recognizing any exception
              * from the second load.
              */
-            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false, false,
-                      (unsigned int)-1, false, false);
+            do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
+                      false, false, (unsigned int)-1, false, false);
             tcg_gen_addi_i64((TCGv_i64)clean_addr, (TCGv_i64)clean_addr,
                              1 << size);
-            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false, false,
-                      (unsigned int)-1, false, false);
+            do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
+                      false, false, (unsigned int)-1, false, false);
 
             tcg_gen_mov_i64(tcg_rt, tmp);
             tcg_temp_free_i64(tmp);
@@ -3756,8 +3719,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
                              iss_valid, rt, iss_sf, false);
         } else {
-            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
-                             is_signed, is_extended, memidx,
+            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+                             is_extended, memidx,
                              iss_valid, rt, iss_sf, false);
         }
     }
@@ -3862,9 +3825,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
             do_gpr_st(s, tcg_rt, clean_addr, size,
                       true, rt, iss_sf, false);
         } else {
-            do_gpr_ld(s, tcg_rt, clean_addr, size,
-                      is_signed, is_extended,
-                      true, rt, iss_sf, false);
+            do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+                      is_extended, true, rt, iss_sf, false);
         }
     }
 }
@@ -3949,8 +3911,8 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
             do_gpr_st(s, tcg_rt, clean_addr, size,
                       true, rt, iss_sf, false);
         } else {
-            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
-                      true, rt, iss_sf, false);
+            do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+                      is_extended, true, rt, iss_sf, false);
         }
     }
 }
@@ -3977,9 +3939,10 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     int o3_opc = extract32(insn, 12, 4);
     bool r = extract32(insn, 22, 1);
     bool a = extract32(insn, 23, 1);
-    TCGv_i64 tcg_rs;
+    TCGv_i64 tcg_rs, tcg_rt;
     TCGv_cap_checked_ptr clean_addr;
     AtomicThreeOpFn *fn = NULL;
+    MemOp mop = s->be_data | size | MO_ALIGN;
 
     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
         unallocated_encoding(s);
@@ -4000,9 +3963,11 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
         break;
     case 004: /* LDSMAX */
         fn = tcg_gen_atomic_fetch_smax_i64;
+        mop |= MO_SIGN;
         break;
     case 005: /* LDSMIN */
         fn = tcg_gen_atomic_fetch_smin_i64;
+        mop |= MO_SIGN;
         break;
     case 006: /* LDUMAX */
         fn = tcg_gen_atomic_fetch_umax_i64;
@@ -4040,13 +4005,14 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
          * full load-acquire (we only need "load-acquire processor consistent"),
          * but we choose to implement them as full LDAQ.
          */
-        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false,
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
         return;
     }
 
     tcg_rs = read_cpu_reg(s, rs, true);
+    tcg_rt = cpu_reg(s, rt);
 
     if (o3_opc == 1) { /* LDCLR */
         tcg_gen_not_i64(tcg_rs, tcg_rs);
@@ -4055,9 +4021,25 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     /* The tcg atomic primitives are all full barriers.  Therefore we
      * can ignore the Acquire and Release bits of this instruction.
      */
-    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
-       s->be_data | size | MO_ALIGN);
+    fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
 
+    if (mop & MO_SIGN) {
+        switch (size) {
+        case MO_8:
+            tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
+            break;
+        case MO_16:
+            tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
+            break;
+        case MO_32:
+            tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
+            break;
+        case MO_64:
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
     gpr_reg_modified(s, rt, false);
 }
 
@@ -4117,7 +4099,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
                                  is_wback || rn != 31, size, rn, false, true);
 
     tcg_rt = cpu_reg(s, rt);
-    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
+    do_gpr_ld(s, tcg_rt, clean_addr, size,
               /* extend */ false, /* iss_valid */ !is_wback,
               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
 
@@ -4150,15 +4132,18 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
     TCGv_cap_checked_ptr clean_addr;
     TCGv_i64 dirty_addr;
     bool is_store = false;
-    bool is_signed = false;
     bool extend = false;
     bool iss_sf;
+    MemOp mop;
 
     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
         unallocated_encoding(s);
         return;
     }
 
+    /* TODO: ARMv8.4-LSE SCTLR.nAA */
+    mop = size | MO_ALIGN;
+
     switch (opc) {
     case 0: /* STLURB */
         is_store = true;
@@ -4170,21 +4155,21 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
-        is_signed = true;
+        mop |= MO_SIGN;
         break;
     case 3: /* LDAPURS* 32-bit variant */
         if (size > 1) {
             unallocated_encoding(s);
             return;
         }
-        is_signed = true;
+        mop |= MO_SIGN;
         extend = true; /* zero-extend 32->64 after signed load */
         break;
     default:
         g_assert_not_reached();
     }
 
-    iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
+    iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
 
     if (rn == 31) {
         gen_check_sp_alignment(s);
@@ -4199,14 +4184,14 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
     if (is_store) {
         /* Store-Release semantics */
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, iss_sf, true);
+        do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
     } else {
         /*
          * Load-AcquirePC semantics; we implement as the slightly more
          * restrictive Load-Acquire.
          */
-        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, is_signed, extend,
-                  true, rt, iss_sf, true);
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
+                  extend, true, rt, iss_sf, true);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
 }
@@ -4278,7 +4263,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     bool is_q = extract32(insn, 30, 1);
     TCGv_cap_checked_ptr clean_addr;
     TCGv_i64 tcg_rn, tcg_ebytes;
-    MemOp endian = s->be_data;
+    MemOp endian, align, mop;
 
     int total;    /* total bytes */
     int elements; /* elements per vector */
@@ -4346,6 +4331,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     }
 
     /* For our purposes, bytes are always little-endian.  */
+    endian = s->be_data;
     if (size == 0) {
         endian = MO_LE;
     }
@@ -4358,18 +4344,24 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
      * promote consecutive little-endian elements below.
      */
     clean_addr = gen_mte_and_cheri_checkN(s, tcg_rn, !is_store, is_store,
-                                          is_postidx || rn != 31, size, total,
+                                          is_postidx || rn != 31, total,
                                           rn, false, true);
 
     /*
      * Consecutive little-endian elements from a single register
      * can be promoted to a larger little-endian operation.
      */
+    align = MO_ALIGN;
     if (selem == 1 && endian == MO_LE) {
+        align = pow2_align(size);
         size = 3;
     }
-    elements = (is_q ? 16 : 8) >> size;
+    if (!s->align_mem) {
+        align = 0;
+    }
+    mop = endian | size | align;
 
+    elements = (is_q ? 16 : 8) >> size;
     tcg_ebytes = tcg_const_i64(1 << size);
     for (r = 0; r < rpt; r++) {
         int e;
@@ -4378,9 +4370,9 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
             for (xs = 0; xs < selem; xs++) {
                 int tt = (rt + r + xs) % 32;
                 if (is_store) {
-                    do_vec_st(s, tt, e, clean_addr, size, endian);
+                    do_vec_st(s, tt, e, clean_addr, mop);
                 } else {
-                    do_vec_ld(s, tt, e, clean_addr, size, endian);
+                    do_vec_ld(s, tt, e, clean_addr, mop);
                 }
                 tcg_gen_add_i64((TCGv_i64)clean_addr, (TCGv_i64)clean_addr,
                                 tcg_ebytes);
@@ -4457,6 +4449,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     int xs, total;
     TCGv_cap_checked_ptr clean_addr;
     TCGv_i64 tcg_rn, tcg_ebytes;
+    MemOp mop;
 
     if (extract32(insn, 31, 1)) {
         unallocated_encoding(s);
@@ -4517,8 +4510,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     tcg_rn = cpu_reg_sp(s, rn);
 
     clean_addr = gen_mte_and_cheri_checkN(s, tcg_rn, is_load, !is_load,
-                                          is_postidx || rn != 31, scale, total,
+                                          is_postidx || rn != 31, total,
                                           rn, false, true);
+    mop = finalize_memop(s, scale);
 
     tcg_ebytes = tcg_const_i64(1 << scale);
     for (xs = 0; xs < selem; xs++) {
@@ -4526,8 +4520,8 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
             /* Load and replicate to all elements */
             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
-            tcg_gen_qemu_ld_i64_with_checked_addr(
-                tcg_tmp, clean_addr, get_mem_index(s), s->be_data + scale);
+            tcg_gen_qemu_ld_i64_with_checked_addr(tcg_tmp, clean_addr,
+                                                  get_mem_index(s), mop);
             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
                                  (is_q + 1) * 8, vec_full_reg_size(s),
                                  tcg_tmp);
@@ -4535,9 +4529,9 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
         } else {
             /* Load/store one element per register */
             if (is_load) {
-                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
+                do_vec_ld(s, rt, index, clean_addr, mop);
             } else {
-                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
+                do_vec_st(s, rt, index, clean_addr, mop);
             }
         }
         tcg_gen_add_i64((TCGv_i64)clean_addr, (TCGv_i64)clean_addr, tcg_ebytes);
@@ -6101,22 +6095,13 @@ static void handle_rev32(DisasContext *s, unsigned int sf,
                          unsigned int rn, unsigned int rd)
 {
     TCGv_i64 tcg_rd = cpu_reg(s, rd);
+    TCGv_i64 tcg_rn = cpu_reg(s, rn);
 
     if (sf) {
-        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
-        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
-
-        /* bswap32_i64 requires zero high word */
-        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
-        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
-        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
-        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
-        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
-
-        tcg_temp_free_i64(tcg_tmp);
+        tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
+        tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
     } else {
-        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
-        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
+        tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
     }
 
     gpr_reg_modified(s, rd, false);
@@ -6966,6 +6951,9 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
     case 0x3: /* FSQRT */
         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
         goto done;
+    case 0x6: /* BFCVT */
+        gen_fpst = gen_helper_bfcvt;
+        break;
     case 0x8: /* FRINTN */
     case 0x9: /* FRINTP */
     case 0xa: /* FRINTM */
@@ -7187,8 +7175,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
     int rd = extract32(insn, 0, 5);
 
     if (mos) {
-        unallocated_encoding(s);
-        return;
+        goto do_unallocated;
     }
 
     switch (opcode) {
@@ -7197,8 +7184,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
         /* FCVT between half, single and double precision */
         int dtype = extract32(opcode, 0, 2);
         if (type == 2 || dtype == type) {
-            unallocated_encoding(s);
-            return;
+            goto do_unallocated;
         }
         if (!fp_access_check(s)) {
             return;
@@ -7210,8 +7196,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
 
     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
-            unallocated_encoding(s);
-            return;
+            goto do_unallocated;
         }
         /* fall through */
     case 0x0 ... 0x3:
@@ -7233,8 +7218,7 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
             break;
         case 3:
             if (!dc_isar_feature(aa64_fp16, s)) {
-                unallocated_encoding(s);
-                return;
+                goto do_unallocated;
             }
 
             if (!fp_access_check(s)) {
@@ -7243,11 +7227,28 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
             handle_fp_1src_half(s, opcode, rd, rn);
             break;
         default:
-            unallocated_encoding(s);
+            goto do_unallocated;
+        }
+        break;
+
+    case 0x6:
+        switch (type) {
+        case 1: /* BFCVT */
+            if (!dc_isar_feature(aa64_bf16, s)) {
+                goto do_unallocated;
+            }
+            if (!fp_access_check(s)) {
+                return;
+            }
+            handle_fp_1src_single(s, opcode, rd, rn);
+            break;
+        default:
+            goto do_unallocated;
         }
         break;
 
     default:
+    do_unallocated:
         unallocated_encoding(s);
         break;
     }
@@ -8875,8 +8876,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
 {
     int rd = extract32(insn, 0, 5);
     int cmode = extract32(insn, 12, 4);
-    int cmode_3_1 = extract32(cmode, 1, 3);
-    int cmode_0 = extract32(cmode, 0, 1);
     int o2 = extract32(insn, 11, 1);
     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
     bool is_neg = extract32(insn, 29, 1);
@@ -8895,84 +8894,13 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
         return;
     }
 
-    /* See AdvSIMDExpandImm() in ARM ARM */
-    switch (cmode_3_1) {
-    case 0: /* Replicate(Zeros(24):imm8, 2) */
-    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
-    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
-    case 3: /* Replicate(imm8:Zeros(24), 2) */
-    {
-        int shift = cmode_3_1 * 8;
-        imm = bitfield_replicate(abcdefgh << shift, 32);
-        break;
-    }
-    case 4: /* Replicate(Zeros(8):imm8, 4) */
-    case 5: /* Replicate(imm8:Zeros(8), 4) */
-    {
-        int shift = (cmode_3_1 & 0x1) * 8;
-        imm = bitfield_replicate(abcdefgh << shift, 16);
-        break;
-    }
-    case 6:
-        if (cmode_0) {
-            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
-            imm = (abcdefgh << 16) | 0xffff;
-        } else {
-            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
-            imm = (abcdefgh << 8) | 0xff;
-        }
-        imm = bitfield_replicate(imm, 32);
-        break;
-    case 7:
-        if (!cmode_0 && !is_neg) {
-            imm = bitfield_replicate(abcdefgh, 8);
-        } else if (!cmode_0 && is_neg) {
-            int i;
-            imm = 0;
-            for (i = 0; i < 8; i++) {
-                if ((abcdefgh) & (1 << i)) {
-                    imm |= 0xffULL << (i * 8);
-                }
-            }
-        } else if (cmode_0) {
-            if (is_neg) {
-                imm = (abcdefgh & 0x3f) << 48;
-                if (abcdefgh & 0x80) {
-                    imm |= 0x8000000000000000ULL;
-                }
-                if (abcdefgh & 0x40) {
-                    imm |= 0x3fc0000000000000ULL;
-                } else {
-                    imm |= 0x4000000000000000ULL;
-                }
-            } else {
-                if (o2) {
-                    /* FMOV (vector, immediate) - half-precision */
-                    imm = vfp_expand_imm(MO_16, abcdefgh);
-                    /* now duplicate across the lanes */
-                    imm = bitfield_replicate(imm, 16);
-                } else {
-                    imm = (abcdefgh & 0x3f) << 19;
-                    if (abcdefgh & 0x80) {
-                        imm |= 0x80000000;
-                    }
-                    if (abcdefgh & 0x40) {
-                        imm |= 0x3e000000;
-                    } else {
-                        imm |= 0x40000000;
-                    }
-                    imm |= (imm << 32);
-                }
-            }
-        }
-        break;
-    default:
-        fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
-        g_assert_not_reached();
-    }
-
-    if (cmode_3_1 != 7 && is_neg) {
-        imm = ~imm;
+    if (cmode == 15 && o2 && !is_neg) {
+        /* FMOV (vector, immediate) - half-precision */
+        imm = vfp_expand_imm(MO_16, abcdefgh);
+        /* now duplicate across the lanes */
+        imm = dup_const(MO_16, imm);
+    } else {
+        imm = asimd_imm_const(abcdefgh, cmode, is_neg);
     }
 
     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
@@ -11029,6 +10957,13 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
                 tcg_temp_free_i32(ahp);
             }
             break;
+        case 0x36: /* BFCVTN, BFCVTN2 */
+            {
+                TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
+                gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
+                tcg_temp_free_ptr(fpst);
+            }
+            break;
         case 0x56:  /* FCVTXN, FCVTXN2 */
             /* 64 bit to 32 bit float conversion
              * with von Neumann rounding (round to odd)
@@ -12659,38 +12594,46 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
  */
 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
 {
-    int opcode, fpopcode;
-    int is_q, u, a, rm, rn, rd;
-    int datasize, elements;
-    int pass;
-    TCGv_ptr fpst;
-    bool pairwise = false;
-
-    if (!dc_isar_feature(aa64_fp16, s)) {
-        unallocated_encoding(s);
-        return;
-    }
-
-    if (!fp_access_check(s)) {
-        return;
-    }
-
-    /* For these floating point ops, the U, a and opcode bits
+    int opcode = extract32(insn, 11, 3);
+    int u = extract32(insn, 29, 1);
+    int a = extract32(insn, 23, 1);
+    int is_q = extract32(insn, 30, 1);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+    /*
+     * For these floating point ops, the U, a and opcode bits
      * together indicate the operation.
      */
-    opcode = extract32(insn, 11, 3);
-    u = extract32(insn, 29, 1);
-    a = extract32(insn, 23, 1);
-    is_q = extract32(insn, 30, 1);
-    rm = extract32(insn, 16, 5);
-    rn = extract32(insn, 5, 5);
-    rd = extract32(insn, 0, 5);
-
-    fpopcode = opcode | (a << 3) |  (u << 4);
-    datasize = is_q ? 128 : 64;
-    elements = datasize / 16;
+    int fpopcode = opcode | (a << 3) | (u << 4);
+    int datasize = is_q ? 128 : 64;
+    int elements = datasize / 16;
+    bool pairwise;
+    TCGv_ptr fpst;
+    int pass;
 
     switch (fpopcode) {
+    case 0x0: /* FMAXNM */
+    case 0x1: /* FMLA */
+    case 0x2: /* FADD */
+    case 0x3: /* FMULX */
+    case 0x4: /* FCMEQ */
+    case 0x6: /* FMAX */
+    case 0x7: /* FRECPS */
+    case 0x8: /* FMINNM */
+    case 0x9: /* FMLS */
+    case 0xa: /* FSUB */
+    case 0xe: /* FMIN */
+    case 0xf: /* FRSQRTS */
+    case 0x13: /* FMUL */
+    case 0x14: /* FCMGE */
+    case 0x15: /* FACGE */
+    case 0x17: /* FDIV */
+    case 0x1a: /* FABD */
+    case 0x1c: /* FCMGT */
+    case 0x1d: /* FACGT */
+        pairwise = false;
+        break;
     case 0x10: /* FMAXNMP */
     case 0x12: /* FADDP */
     case 0x16: /* FMAXP */
@@ -12698,6 +12641,18 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
     case 0x1e: /* FMINP */
         pairwise = true;
         break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (!dc_isar_feature(aa64_fp16, s)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (!fp_access_check(s)) {
+        return;
     }
 
     fpst = fpstatus_ptr(FPST_FPCR_F16);
@@ -12822,8 +12777,6 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
                 break;
             default:
-                fprintf(stderr, "%s: insn 0x%04x, fpop 0x%2x @ 0x%" PRIx64 "\n",
-                        __func__, insn, fpopcode, s->pc_curr);
                 g_assert_not_reached();
             }
 
@@ -12874,6 +12827,22 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         feature = dc_isar_feature(aa64_dp, s);
         break;
+    case 0x03: /* USDOT */
+        if (size != MO_32) {
+            unallocated_encoding(s);
+            return;
+        }
+        feature = dc_isar_feature(aa64_i8mm, s);
+        break;
+    case 0x04: /* SMMLA */
+    case 0x14: /* UMMLA */
+    case 0x05: /* USMMLA */
+        if (!is_q || size != MO_32) {
+            unallocated_encoding(s);
+            return;
+        }
+        feature = dc_isar_feature(aa64_i8mm, s);
+        break;
     case 0x18: /* FCMLA, #0 */
     case 0x19: /* FCMLA, #90 */
     case 0x1a: /* FCMLA, #180 */
@@ -12888,6 +12857,24 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         feature = dc_isar_feature(aa64_fcma, s);
         break;
+    case 0x1d: /* BFMMLA */
+        if (size != MO_16 || !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        feature = dc_isar_feature(aa64_bf16, s);
+        break;
+    case 0x1f:
+        switch (size) {
+        case 1: /* BFDOT */
+        case 3: /* BFMLAL{B,T} */
+            feature = dc_isar_feature(aa64_bf16, s);
+            break;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     default:
         unallocated_encoding(s);
         return;
@@ -12910,10 +12897,23 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         return;
 
     case 0x2: /* SDOT / UDOT */
-        gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
         return;
 
+    case 0x3: /* USDOT */
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
+        return;
+
+    case 0x04: /* SMMLA, UMMLA */
+        gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
+                         u ? gen_helper_gvec_ummla_b
+                         : gen_helper_gvec_smmla_b);
+        return;
+    case 0x05: /* USMMLA */
+        gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
+        return;
+
     case 0x8: /* FCMLA, #0 */
     case 0x9: /* FCMLA, #90 */
     case 0xa: /* FCMLA, #180 */
@@ -12921,15 +12921,15 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         rot = extract32(opcode, 0, 2);
         switch (size) {
         case 1:
-            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
+            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
                               gen_helper_gvec_fcmlah);
             break;
         case 2:
-            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
                               gen_helper_gvec_fcmlas);
             break;
         case 3:
-            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
                               gen_helper_gvec_fcmlad);
             break;
         default:
@@ -12958,10 +12958,27 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
         }
         return;
 
-    default:
-        g_assert_not_reached();
-    }
-}
+    case 0xd: /* BFMMLA */
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
+        return;
+    case 0xf:
+        switch (size) {
+        case 1: /* BFDOT */
+            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
+            break;
+        case 3: /* BFMLAL{B,T} */
+            gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
+                              gen_helper_gvec_bfmlal);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        return;
+
+    default:
+        g_assert_not_reached();
+    }
+}
 
 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
                                   int size, int rn, int rd)
@@ -13041,10 +13058,10 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
             read_vec_element(s, tcg_tmp, rn, i, grp_size);
             switch (grp_size) {
             case MO_16:
-                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
+                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
                 break;
             case MO_32:
-                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
                 break;
             case MO_64:
                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
@@ -13400,6 +13417,16 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             }
             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
             return;
+        case 0x36: /* BFCVTN, BFCVTN2 */
+            if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
+                unallocated_encoding(s);
+                return;
+            }
+            if (!fp_access_check(s)) {
+                return;
+            }
+            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
+            return;
         case 0x17: /* FCVTL, FCVTL2 */
             if (!fp_access_check(s)) {
                 return;
@@ -13829,8 +13856,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
     case 0x7f: /* FSQRT (vector) */
         break;
     default:
-        fprintf(stderr, "%s: insn 0x%04x fpop 0x%2x\n", __func__, insn, fpop);
-        g_assert_not_reached();
+        unallocated_encoding(s);
+        return;
     }
 
 
@@ -14059,6 +14086,36 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
             return;
         }
         break;
+    case 0x0f:
+        switch (size) {
+        case 0: /* SUDOT */
+        case 2: /* USDOT */
+            if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
+                unallocated_encoding(s);
+                return;
+            }
+            size = MO_32;
+            break;
+        case 1: /* BFDOT */
+            if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
+                unallocated_encoding(s);
+                return;
+            }
+            size = MO_32;
+            break;
+        case 3: /* BFMLAL{B,T} */
+            if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
+                unallocated_encoding(s);
+                return;
+            }
+            /* can't set is_fp without other incorrect size checks */
+            size = MO_16;
+            break;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     case 0x11: /* FCMLA #0 */
     case 0x13: /* FCMLA #90 */
     case 0x15: /* FCMLA #180 */
@@ -14169,10 +14226,30 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     switch (16 * u + opcode) {
     case 0x0e: /* SDOT */
     case 0x1e: /* UDOT */
-        gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
+        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
                          u ? gen_helper_gvec_udot_idx_b
                          : gen_helper_gvec_sdot_idx_b);
         return;
+    case 0x0f:
+        switch (extract32(insn, 22, 2)) {
+        case 0: /* SUDOT */
+            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+                             gen_helper_gvec_sudot_idx_b);
+            return;
+        case 1: /* BFDOT */
+            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+                             gen_helper_gvec_bfdot_idx);
+            return;
+        case 2: /* USDOT */
+            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+                             gen_helper_gvec_usdot_idx_b);
+            return;
+        case 3: /* BFMLAL{B,T} */
+            gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
+                              gen_helper_gvec_bfmlal_idx);
+            return;
+        }
+        g_assert_not_reached();
     case 0x11: /* FCMLA #0 */
     case 0x13: /* FCMLA #90 */
     case 0x15: /* FCMLA #180 */
@@ -14180,9 +14257,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
         {
             int rot = extract32(insn, 13, 2);
             int data = (index << 2) | rot;
-            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+            tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
                                vec_full_reg_offset(s, rn),
-                               vec_full_reg_offset(s, rm), fpst,
+                               vec_full_reg_offset(s, rm),
+                               vec_full_reg_offset(s, rd), fpst,
                                is_q ? 16 : 8, vec_full_reg_size(s), data,
                                size == MO_64
                                ? gen_helper_gvec_fcmlas_idx
@@ -15076,8 +15154,6 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
     int imm6 = extract32(insn, 10, 6);
     int rn = extract32(insn, 5, 5);
     int rd = extract32(insn, 0, 5);
-    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
-    int pass;
 
     if (!dc_isar_feature(aa64_sha3, s)) {
         unallocated_encoding(s);
@@ -15088,25 +15164,10 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
         return;
     }
 
-    tcg_op1 = tcg_temp_new_i64();
-    tcg_op2 = tcg_temp_new_i64();
-    tcg_res[0] = tcg_temp_new_i64();
-    tcg_res[1] = tcg_temp_new_i64();
-
-    for (pass = 0; pass < 2; pass++) {
-        read_vec_element(s, tcg_op1, rn, pass, MO_64);
-        read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
-        tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
-        tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
-    }
-    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
-    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
-    tcg_temp_free_i64(tcg_op1);
-    tcg_temp_free_i64(tcg_op2);
-    tcg_temp_free_i64(tcg_res[0]);
-    tcg_temp_free_i64(tcg_res[1]);
+    gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
+                 vec_full_reg_offset(s, rn),
+                 vec_full_reg_offset(s, rm), imm6, 16,
+                 vec_full_reg_size(s));
 }
 
 /* Crypto three-reg imm2
@@ -15290,28 +15351,147 @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
     return false;
 }
 
-/* C3.1 A64 instruction index by encoding */
-static void disas_a64_insn(CPUARMState *env, DisasContext *s)
+static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
+                                          CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    CPUARMState *env = cpu->env_ptr;
+    ARMCPU *arm_cpu = env_archcpu(env);
+    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
+    int bound, core_mmu_idx;
+
+    dc->isar = &arm_cpu->isar;
+    dc->condjmp = 0;
+
+    dc->aarch64 = 1;
+    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
+     * there is no secure EL1, so we route exceptions to EL3.
+     */
+    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
+                               !arm_el_is_aa64(env, 3);
+    dc->thumb = 0;
+    dc->sctlr_b = 0;
+    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
+    dc->condexec_mask = 0;
+    dc->condexec_cond = 0;
+    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
+    dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
+    dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
+    dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
+    dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
+    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
+#if !defined(CONFIG_USER_ONLY)
+    dc->user = (dc->current_el == 0);
+#endif
+    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
+    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
+    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
+    dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
+    dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16;
+    dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
+    dc->bt = EX_TBFLAG_A64(tb_flags, BT);
+    dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
+    dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
+    dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
+    dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
+    dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
+    dc->vec_len = 0;
+    dc->vec_stride = 0;
+    dc->cp_regs = arm_cpu->cp_regs;
+    dc->features = env->features;
+    dc->dcz_blocksize = arm_cpu->dcz_blocksize;
+
+#ifdef CONFIG_USER_ONLY
+    /* In sve_probe_page, we assume TBI is enabled. */
+    tcg_debug_assert(dc->tbid & 1);
+#endif
+
+    /* Single step state. The code-generation logic here is:
+     *  SS_ACTIVE == 0:
+     *   generate code with no special handling for single-stepping (except
+     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
+     *   this happens anyway because those changes are all system register or
+     *   PSTATE writes).
+     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
+     *   emit code for one insn
+     *   emit code to clear PSTATE.SS
+     *   emit code to generate software step exception for completed step
+     *   end TB (as usual for having generated an exception)
+     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
+     *   emit code to generate a software step exception
+     *   end the TB
+     */
+    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
+    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
+    dc->is_ldex = false;
+    dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
+
+    /* Bound the number of insns to execute to those left on the page.  */
+    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
+
+    /* If architectural single step active, limit to 1.  */
+    if (dc->ss_active) {
+        bound = 1;
+    }
+    dc->base.max_insns = MIN(dc->base.max_insns, bound);
+
+    init_tmp_a64_array(dc);
+}
+
+static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
+{
+    DisasContext *s = container_of(db, DisasContext, base);
+    if (db->pc_next & 0b11) {
+        gen_set_exception_far(db->pc_next);
+        gen_exception_insn(s, db->pc_next, EXCP_PREFETCH_ABORT,
+                           syn_pc_alignment(false), default_exception_el(s));
+        return;
+    }
+}
+
+static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    tcg_gen_insn_start(dc->base.pc_next, 0, 0);
+    dc->insn_start = tcg_last_op();
+}
+
+static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
+    DisasContext *s = container_of(dcbase, DisasContext, base);
+    CPUARMState *env = cpu->env_ptr;
     uint32_t insn;
 
+    if (s->ss_active && !s->pstate_ss) {
+        /* Singlestep state is Active-pending.
+         * If we're in this state at the start of a TB then either
+         *  a) we just took an exception to an EL which is being debugged
+         *     and this is the first insn in the exception handler
+         *  b) debug exceptions were masked and we just unmasked them
+         *     without changing EL (eg by clearing PSTATE.D)
+         * In either case we're going to take a swstep exception in the
+         * "did not step an insn" case, and so the syndrome ISV and EX
+         * bits should be zero.
+         */
+        assert(s->base.num_insns == 1);
+        gen_swstep_exception(s, 0, 0);
+        s->base.is_jmp = DISAS_NORETURN;
+        return;
+    }
+
     s->pc_curr = s->base.pc_next;
-    insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
+    insn = arm_ldl_code(env, &s->base, s->base.pc_next, s->sctlr_b);
 
 #ifdef TARGET_CHERI
-
     gen_check_pcc_bounds_next_inst(s, 4);
+#endif
 
 #if defined(CONFIG_TCG_LOG_INSTR)
     if (unlikely(s->base.log_instr_enabled)) {
-        TCGv pc = tcg_const_tl(s->base.pc_next);
-        TCGv_i32 opc = tcg_const_i32(insn);
-        gen_helper_arm_log_instr(cpu_env, pc, opc);
-        tcg_temp_free(pc);
-        tcg_temp_free_i32(opc);
+        gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(s->pc_curr),
+                                 tcg_constant_i32(insn), tcg_constant_i32(4));
     }
-
-#endif
 #endif
 
     s->insn = insn;
@@ -15320,6 +15500,16 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
     s->fp_access_checked = false;
     s->sve_access_checked = false;
 
+    if (s->pstate_il) {
+        /*
+         * Illegal execution state. This has priority over BTI
+         * exceptions, but comes after instruction abort exceptions.
+         */
+        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+                           syn_illegalstate(), default_exception_el(s));
+        return;
+    }
+
     if (dc_isar_feature(aa64_bti, s)) {
         if (s->base.num_insns == 1) {
             /*
@@ -15358,7 +15548,6 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
         }
     }
 
-    // Top 4 bits of op0
     switch (extract32(insn, 25, 4)) {
     case 0x1: /* Morello Instructions */
 #ifdef TARGET_CHERI
@@ -15367,8 +15556,7 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
         }
         break;
 #endif
-    case 0x0:
-    case 0x3: /* UNALLOCATED */
+    case 0x0: case 0x3: /* UNALLOCATED */
         unallocated_encoding(s);
         break;
     case 0x2:
@@ -15417,167 +15605,15 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
         reset_btype(s);
     }
-}
-
-static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
-                                          CPUState *cpu)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-    CPUARMState *env = cpu->env_ptr;
-    ARMCPU *arm_cpu = env_archcpu(env);
-    uint32_t tb_flags = dc->base.tb->flags;
-    int bound, core_mmu_idx;
-
-    dc->isar = &arm_cpu->isar;
-    dc->condjmp = 0;
-
-    dc->aarch64 = 1;
-    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
-     * there is no secure EL1, so we route exceptions to EL3.
-     */
-    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
-                               !arm_el_is_aa64(env, 3);
-    dc->thumb = 0;
-    dc->sctlr_b = 0;
-    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
-    dc->condexec_mask = 0;
-    dc->condexec_cond = 0;
-    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
-    dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
-    dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
-    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
-    dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA);
-    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
-#if !defined(CONFIG_USER_ONLY)
-    dc->user = (dc->current_el == 0);
-#endif
-    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
-    dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
-    dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
-    dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
-    dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
-    dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
-    dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV);
-    dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA);
-    dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE);
-    dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE);
-    dc->vec_len = 0;
-    dc->vec_stride = 0;
-    dc->cp_regs = arm_cpu->cp_regs;
-    dc->features = env->features;
-    dc->dcz_blocksize = arm_cpu->dcz_blocksize;
-
-#ifdef CONFIG_USER_ONLY
-    /* In sve_probe_page, we assume TBI is enabled. */
-    tcg_debug_assert(dc->tbid & 1);
-#endif
-
-    /* Single step state. The code-generation logic here is:
-     *  SS_ACTIVE == 0:
-     *   generate code with no special handling for single-stepping (except
-     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
-     *   this happens anyway because those changes are all system register or
-     *   PSTATE writes).
-     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
-     *   emit code for one insn
-     *   emit code to clear PSTATE.SS
-     *   emit code to generate software step exception for completed step
-     *   end TB (as usual for having generated an exception)
-     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
-     *   emit code to generate a software step exception
-     *   end the TB
-     */
-    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
-    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
-    dc->is_ldex = false;
-    dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
-
-    /* Bound the number of insns to execute to those left on the page.  */
-    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
-
-    /* If architectural single step active, limit to 1.  */
-    if (dc->ss_active) {
-        bound = 1;
-    }
-    dc->base.max_insns = MIN(dc->base.max_insns, bound);
-
-    init_tmp_a64_array(dc);
-}
-
-static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
-{
-    DisasContext *s = container_of(db, DisasContext, base);
-    if (db->pc_next & 0b11) {
-        gen_set_exception_far(db->pc_next);
-        gen_exception_insn(s, db->pc_next, EXCP_PREFETCH_ABORT,
-                           syn_pc_alignment(false), default_exception_el(s));
-        return;
-    }
-}
-
-static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    tcg_gen_insn_start(dc->base.pc_next, 0, 0);
-    dc->insn_start = tcg_last_op();
-}
-
-static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                        const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    if (bp->flags & BP_CPU) {
-        gen_a64_set_pc_im(dc->base.pc_next);
-        gen_helper_check_breakpoints(cpu_env);
-        /* End the TB early; it likely won't be executed */
-        dc->base.is_jmp = DISAS_TOO_MANY;
-    } else {
-        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
-        /* The address covered by the breakpoint must be
-           included in [tb->pc, tb->pc + tb->size) in order
-           to for it to be properly cleared -- thus we
-           increment the PC here so that the logic setting
-           tb->size below does the right thing.  */
-        dc->base.pc_next += 4;
-        dc->base.is_jmp = DISAS_NORETURN;
-    }
 
-    return true;
-}
-
-static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-    CPUARMState *env = cpu->env_ptr;
-
-    if (dc->ss_active && !dc->pstate_ss) {
-        /* Singlestep state is Active-pending.
-         * If we're in this state at the start of a TB then either
-         *  a) we just took an exception to an EL which is being debugged
-         *     and this is the first insn in the exception handler
-         *  b) debug exceptions were masked and we just unmasked them
-         *     without changing EL (eg by clearing PSTATE.D)
-         * In either case we're going to take a swstep exception in the
-         * "did not step an insn" case, and so the syndrome ISV and EX
-         * bits should be zero.
-         */
-        assert(dc->base.num_insns == 1);
-        gen_swstep_exception(dc, 0, 0);
-        dc->base.is_jmp = DISAS_NORETURN;
-    } else {
-        disas_a64_insn(env, dc);
-    }
-
-    translator_loop_temp_check(&dc->base);
+    translator_loop_temp_check(&s->base);
 }
 
 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
-    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
+    if (unlikely(dc->ss_active)) {
         /* Note that this means single stepping WFI doesn't halt the CPU.
          * For conditional branch insns this is harmless unreachable code as
          * gen_goto_tb() has already handled emitting the debug exception
@@ -15589,11 +15625,7 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
             /* fall through */
         case DISAS_EXIT:
         case DISAS_JUMP:
-            if (dc->base.singlestep_enabled) {
-                gen_exception_internal(EXCP_DEBUG);
-            } else {
-                gen_step_complete_exception(dc);
-            }
+            gen_step_complete_exception(dc);
             break;
         case DISAS_NORETURN:
             break;
@@ -15672,7 +15704,6 @@ const TranslatorOps aarch64_translator_ops = {
     .init_disas_context = aarch64_tr_init_disas_context,
     .tb_start           = aarch64_tr_tb_start,
     .insn_start         = aarch64_tr_insn_start,
-    .breakpoint_check   = aarch64_tr_breakpoint_check,
     .translate_insn     = aarch64_tr_translate_insn,
     .tb_stop            = aarch64_tr_tb_stop,
     .disas_log          = aarch64_tr_disas_log,
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index a31defb3097..c5ee1c15aa6 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -18,8 +18,6 @@
 #ifndef TARGET_ARM_TRANSLATE_A64_H
 #define TARGET_ARM_TRANSLATE_A64_H
 
-void unallocated_encoding(DisasContext *s);
-
 #define unsupported_encoding(s, insn)                                    \
     do {                                                                 \
         qemu_log_mask(LOG_UNIMP,                                         \
@@ -54,9 +52,8 @@ TCGv_cap_checked_ptr gen_mte_and_cheri_check1(DisasContext *s, TCGv_i64 addr,
                                               bool ddc_base);
 TCGv_cap_checked_ptr gen_mte_and_cheri_checkN(DisasContext *s, TCGv_i64 addr,
                                               bool is_read, bool is_write,
-                                              bool tag_checked, int log2_esize,
-                                              int total_size, int base_reg,
-                                              bool alternate_base,
+                                              bool tag_checked, int size,
+                                              int base_reg, bool alternate_base,
                                               bool ddc_base);
 
 /* We should have at some point before trying to access an FP register
@@ -135,5 +132,8 @@ bool disas_sve(DisasContext *, uint32_t);
 
 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, int64_t shift,
+                  uint32_t opr_sz, uint32_t max_sz);
 
 #endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/translate-cheri.inc.c b/target/arm/translate-cheri.inc.c
index 2fb749a1123..8fddadd777e 100644
--- a/target/arm/translate-cheri.inc.c
+++ b/target/arm/translate-cheri.inc.c
@@ -446,9 +446,10 @@ static inline __attribute__((always_inline)) bool load_store_implementation(
         } else if (!vector) {
             MemOp memop = ctx->be_data + size;
 
-            bool fault_unaligned = exclusive || acquire_release;
-            if (fault_unaligned || memop_align_sctlr(ctx))
+            if (exclusive || acquire_release)
                 memop |= MO_ALIGN;
+            else
+                memop = finalize_memop(ctx, memop);  /* enforce alignment */
 
             TCGv_i64 tcg_rd = cpu_reg_maybe_0(ctx, rd);
             TCGv_i64 tcg_rd2;
diff --git a/target/arm/translate-m-nocp.c b/target/arm/translate-m-nocp.c
new file mode 100644
index 00000000000..d9e144e8eb3
--- /dev/null
+++ b/target/arm/translate-m-nocp.c
@@ -0,0 +1,791 @@
+/*
+ *  ARM translation: M-profile NOCP special-case instructions
+ *
+ *  Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+#include "decode-m-nocp.c.inc"
+
+/*
+ * Decode VLLDM and VLSTM are nonstandard because:
+ *  * if there is no FPU then these insns must NOP in
+ *    Secure state and UNDEF in Nonsecure state
+ *  * if there is an FPU then these insns do not have
+ *    the usual behaviour that vfp_access_check() provides of
+ *    being controlled by CPACR/NSACR enable bits or the
+ *    lazy-stacking logic.
+ */
+static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
+{
+    TCGv_i32 fptr;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
+        !arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    if (a->op) {
+        /*
+         * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
+         * to take the IMPDEF option to make memory accesses to the stack
+         * slots that correspond to the D16-D31 registers (discarding
+         * read data and writing UNKNOWN values), so for us the T2
+         * encoding behaves identically to the T1 encoding.
+         */
+        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+            return false;
+        }
+    } else {
+        /*
+         * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
+         * This is currently architecturally impossible, but we add the
+         * check to stay in line with the pseudocode. Note that we must
+         * emit code for the UNDEF so it takes precedence over the NOCP.
+         */
+        if (dc_isar_feature(aa32_simd_r32, s)) {
+            unallocated_encoding(s);
+            return true;
+        }
+    }
+
+    /*
+     * If not secure, UNDEF. We must emit code for this
+     * rather than returning false so that this takes
+     * precedence over the m-nocp.decode NOCP fallback.
+     */
+    if (!s->v8m_secure) {
+        unallocated_encoding(s);
+        return true;
+    }
+
+    s->eci_handled = true;
+
+    /* If no fpu, NOP. */
+    if (!dc_isar_feature(aa32_vfp, s)) {
+        clear_eci_state(s);
+        return true;
+    }
+
+    fptr = load_reg(s, a->rn);
+    if (a->l) {
+        gen_helper_v7m_vlldm(cpu_env, fptr);
+    } else {
+        gen_helper_v7m_vlstm(cpu_env, fptr);
+    }
+    tcg_temp_free_i32(fptr);
+
+    clear_eci_state(s);
+
+    /*
+     * End the TB, because we have updated FP control bits,
+     * and possibly VPR or LTPSIZE.
+     */
+    s->base.is_jmp = DISAS_UPDATE_EXIT;
+    return true;
+}
+
+static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
+{
+    int btmreg, topreg;
+    TCGv_i64 zero;
+    TCGv_i32 aspen, sfpa;
+
+    if (!dc_isar_feature(aa32_m_sec_state, s)) {
+        /* Before v8.1M, fall through in decode to NOCP check */
+        return false;
+    }
+
+    /* Explicitly UNDEF because this takes precedence over NOCP */
+    if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
+        unallocated_encoding(s);
+        return true;
+    }
+
+    s->eci_handled = true;
+
+    if (!dc_isar_feature(aa32_vfp_simd, s)) {
+        /* NOP if we have neither FP nor MVE */
+        clear_eci_state(s);
+        return true;
+    }
+
+    /*
+     * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
+     * active floating point context so we must NOP (without doing
+     * any lazy state preservation or the NOCP check).
+     */
+    aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
+    sfpa = load_cpu_field(v7m.control[M_REG_S]);
+    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+    tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
+    tcg_gen_or_i32(sfpa, sfpa, aspen);
+    arm_gen_condlabel(s);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
+
+    if (s->fp_excp_el != 0) {
+        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+                           syn_uncategorized(), s->fp_excp_el);
+        return true;
+    }
+
+    topreg = a->vd + a->imm - 1;
+    btmreg = a->vd;
+
+    /* Convert to Sreg numbers if the insn specified in Dregs */
+    if (a->size == 3) {
+        topreg = topreg * 2 + 1;
+        btmreg *= 2;
+    }
+
+    if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
+        /* UNPREDICTABLE: we choose to undef */
+        unallocated_encoding(s);
+        return true;
+    }
+
+    /* Silently ignore requests to clear D16-D31 if they don't exist */
+    if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
+        topreg = 31;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Zero the Sregs from btmreg to topreg inclusive. */
+    zero = tcg_const_i64(0);
+    if (btmreg & 1) {
+        write_neon_element64(zero, btmreg >> 1, 1, MO_32);
+        btmreg++;
+    }
+    for (; btmreg + 1 <= topreg; btmreg += 2) {
+        write_neon_element64(zero, btmreg >> 1, 0, MO_64);
+    }
+    if (btmreg == topreg) {
+        write_neon_element64(zero, btmreg >> 1, 0, MO_32);
+        btmreg++;
+    }
+    assert(btmreg == topreg + 1);
+    if (dc_isar_feature(aa32_mve, s)) {
+        TCGv_i32 z32 = tcg_const_i32(0);
+        store_cpu_field(z32, v7m.vpr);
+    }
+
+    clear_eci_state(s);
+    return true;
+}
+
+/*
+ * M-profile provides two different sets of instructions that can
+ * access floating point system registers: VMSR/VMRS (which move
+ * to/from a general purpose register) and VLDR/VSTR sysreg (which
+ * move directly to/from memory). In some cases there are also side
+ * effects which must happen after any write to memory (which could
+ * cause an exception). So we implement the common logic for the
+ * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
+ * which take pointers to callback functions which will perform the
+ * actual "read/write general purpose register" and "read/write
+ * memory" operations.
+ */
+
+/*
+ * Emit code to store the sysreg to its final destination; frees the
+ * TCG temp 'value' it is passed. do_access is true to do the store,
+ * and false to skip it and only perform side-effects like base
+ * register writeback.
+ */
+typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value,
+                               bool do_access);
+/*
+ * Emit code to load the value to be copied to the sysreg; returns
+ * a new TCG temporary. do_access is true to do the store,
+ * and false to skip it and only perform side-effects like base
+ * register writeback.
+ */
+typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque,
+                                  bool do_access);
+
+/* Common decode/access checks for fp sysreg read/write */
+typedef enum FPSysRegCheckResult {
+    FPSysRegCheckFailed, /* caller should return false */
+    FPSysRegCheckDone, /* caller should return true */
+    FPSysRegCheckContinue, /* caller should continue generating code */
+} FPSysRegCheckResult;
+
+static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
+{
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return FPSysRegCheckFailed;
+    }
+
+    switch (regno) {
+    case ARM_VFP_FPSCR:
+    case QEMU_VFP_FPSCR_NZCV:
+        break;
+    case ARM_VFP_FPSCR_NZCVQC:
+        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+            return FPSysRegCheckFailed;
+        }
+        break;
+    case ARM_VFP_FPCXT_S:
+    case ARM_VFP_FPCXT_NS:
+        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+            return FPSysRegCheckFailed;
+        }
+        if (!s->v8m_secure) {
+            return FPSysRegCheckFailed;
+        }
+        break;
+    case ARM_VFP_VPR:
+    case ARM_VFP_P0:
+        if (!dc_isar_feature(aa32_mve, s)) {
+            return FPSysRegCheckFailed;
+        }
+        break;
+    default:
+        return FPSysRegCheckFailed;
+    }
+
+    /*
+     * FPCXT_NS is a special case: it has specific handling for
+     * "current FP state is inactive", and must do the PreserveFPState()
+     * but not the usual full set of actions done by ExecuteFPCheck().
+     * So we don't call vfp_access_check() and the callers must handle this.
+     */
+    if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
+        return FPSysRegCheckDone;
+    }
+    return FPSysRegCheckContinue;
+}
+
+static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
+                                  TCGLabel *label)
+{
+    /*
+     * FPCXT_NS is a special case: it has specific handling for
+     * "current FP state is inactive", and must do the PreserveFPState()
+     * but not the usual full set of actions done by ExecuteFPCheck().
+     * We don't have a TB flag that matches the fpInactive check, so we
+     * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
+     *
+     * Emit code that checks fpInactive and does a conditional
+     * branch to label based on it:
+     *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
+     *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
+     */
+    assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
+
+    /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
+    TCGv_i32 aspen, fpca;
+    aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
+    fpca = load_cpu_field(v7m.control[M_REG_S]);
+    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+    tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
+    tcg_gen_or_i32(fpca, fpca, aspen);
+    tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
+    tcg_temp_free_i32(aspen);
+    tcg_temp_free_i32(fpca);
+}
+
+static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
+                                  fp_sysreg_loadfn *loadfn,
+                                  void *opaque)
+{
+    /* Do a write to an M-profile floating point system register */
+    TCGv_i32 tmp;
+    TCGLabel *lab_end = NULL;
+
+    switch (fp_sysreg_checks(s, regno)) {
+    case FPSysRegCheckFailed:
+        return false;
+    case FPSysRegCheckDone:
+        return true;
+    case FPSysRegCheckContinue:
+        break;
+    }
+
+    switch (regno) {
+    case ARM_VFP_FPSCR:
+        tmp = loadfn(s, opaque, true);
+        gen_helper_vfp_set_fpscr(cpu_env, tmp);
+        tcg_temp_free_i32(tmp);
+        gen_lookup_tb(s);
+        break;
+    case ARM_VFP_FPSCR_NZCVQC:
+    {
+        TCGv_i32 fpscr;
+        tmp = loadfn(s, opaque, true);
+        if (dc_isar_feature(aa32_mve, s)) {
+            /* QC is only present for MVE; otherwise RES0 */
+            TCGv_i32 qc = tcg_temp_new_i32();
+            tcg_gen_andi_i32(qc, tmp, FPCR_QC);
+            /*
+             * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
+             * here writing the same value into all elements is simplest.
+             */
+            tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
+                                 16, 16, qc);
+        }
+        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+        fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+        tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+        tcg_gen_or_i32(fpscr, fpscr, tmp);
+        store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
+        tcg_temp_free_i32(tmp);
+        break;
+    }
+    case ARM_VFP_FPCXT_NS:
+    {
+        TCGLabel *lab_active = gen_new_label();
+
+        lab_end = gen_new_label();
+        gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
+        /*
+         * fpInactive case: write is a NOP, so only do side effects
+         * like register writeback before we branch to end
+         */
+        loadfn(s, opaque, false);
+        tcg_gen_br(lab_end);
+
+        gen_set_label(lab_active);
+        /*
+         * !fpInactive: if FPU disabled, take NOCP exception;
+         * otherwise PreserveFPState(), and then FPCXT_NS writes
+         * behave the same as FPCXT_S writes.
+         */
+        if (!vfp_access_check_m(s, true)) {
+            /*
+             * This was only a conditional exception, so override
+             * gen_exception_insn()'s default to DISAS_NORETURN
+             */
+            s->base.is_jmp = DISAS_NEXT;
+            break;
+        }
+    }
+    /* fall through */
+    case ARM_VFP_FPCXT_S:
+    {
+        TCGv_i32 sfpa, control;
+        /*
+         * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
+         * bits [27:0] from value and zeroes bits [31:28].
+         */
+        tmp = loadfn(s, opaque, true);
+        sfpa = tcg_temp_new_i32();
+        tcg_gen_shri_i32(sfpa, tmp, 31);
+        control = load_cpu_field(v7m.control[M_REG_S]);
+        tcg_gen_deposit_i32(control, control, sfpa,
+                            R_V7M_CONTROL_SFPA_SHIFT, 1);
+        store_cpu_field(control, v7m.control[M_REG_S]);
+        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+        gen_helper_vfp_set_fpscr(cpu_env, tmp);
+        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+        tcg_temp_free_i32(tmp);
+        tcg_temp_free_i32(sfpa);
+        break;
+    }
+    case ARM_VFP_VPR:
+        /* Behaves as NOP if not privileged */
+        if (IS_USER(s)) {
+            loadfn(s, opaque, false);
+            break;
+        }
+        tmp = loadfn(s, opaque, true);
+        store_cpu_field(tmp, v7m.vpr);
+        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+        break;
+    case ARM_VFP_P0:
+    {
+        TCGv_i32 vpr;
+        tmp = loadfn(s, opaque, true);
+        vpr = load_cpu_field(v7m.vpr);
+        tcg_gen_deposit_i32(vpr, vpr, tmp,
+                            R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
+        store_cpu_field(vpr, v7m.vpr);
+        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+        tcg_temp_free_i32(tmp);
+        break;
+    }
+    default:
+        g_assert_not_reached();
+    }
+    if (lab_end) {
+        gen_set_label(lab_end);
+    }
+    return true;
+}
+
+static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
+                                 fp_sysreg_storefn *storefn,
+                                 void *opaque)
+{
+    /* Do a read from an M-profile floating point system register */
+    TCGv_i32 tmp;
+    TCGLabel *lab_end = NULL;
+    bool lookup_tb = false;
+
+    switch (fp_sysreg_checks(s, regno)) {
+    case FPSysRegCheckFailed:
+        return false;
+    case FPSysRegCheckDone:
+        return true;
+    case FPSysRegCheckContinue:
+        break;
+    }
+
+    if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
+        /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
+        regno = QEMU_VFP_FPSCR_NZCV;
+    }
+
+    switch (regno) {
+    case ARM_VFP_FPSCR:
+        tmp = tcg_temp_new_i32();
+        gen_helper_vfp_get_fpscr(tmp, cpu_env);
+        storefn(s, opaque, tmp, true);
+        break;
+    case ARM_VFP_FPSCR_NZCVQC:
+        tmp = tcg_temp_new_i32();
+        gen_helper_vfp_get_fpscr(tmp, cpu_env);
+        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
+        storefn(s, opaque, tmp, true);
+        break;
+    case QEMU_VFP_FPSCR_NZCV:
+        /*
+         * Read just NZCV; this is a special case to avoid the
+         * helper call for the "VMRS to CPSR.NZCV" insn.
+         */
+        tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+        storefn(s, opaque, tmp, true);
+        break;
+    case ARM_VFP_FPCXT_S:
+    {
+        TCGv_i32 control, sfpa, fpscr;
+        /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
+        tmp = tcg_temp_new_i32();
+        sfpa = tcg_temp_new_i32();
+        gen_helper_vfp_get_fpscr(tmp, cpu_env);
+        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+        control = load_cpu_field(v7m.control[M_REG_S]);
+        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
+        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
+        tcg_gen_or_i32(tmp, tmp, sfpa);
+        tcg_temp_free_i32(sfpa);
+        /*
+         * Store result before updating FPSCR etc, in case
+         * it is a memory write which causes an exception.
+         */
+        storefn(s, opaque, tmp, true);
+        /*
+         * Now we must reset FPSCR from FPDSCR_NS, and clear
+         * CONTROL.SFPA; so we'll end the TB here.
+         */
+        tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
+        store_cpu_field(control, v7m.control[M_REG_S]);
+        fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
+        tcg_temp_free_i32(fpscr);
+        lookup_tb = true;
+        break;
+    }
+    case ARM_VFP_FPCXT_NS:
+    {
+        TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
+        TCGLabel *lab_active = gen_new_label();
+
+        lookup_tb = true;
+
+        gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
+        /* fpInactive case: reads as FPDSCR_NS */
+        TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+        storefn(s, opaque, tmp, true);
+        lab_end = gen_new_label();
+        tcg_gen_br(lab_end);
+
+        gen_set_label(lab_active);
+        /*
+         * !fpInactive: if FPU disabled, take NOCP exception;
+         * otherwise PreserveFPState(), and then FPCXT_NS
+         * reads the same as FPCXT_S.
+         */
+        if (!vfp_access_check_m(s, true)) {
+            /*
+             * This was only a conditional exception, so override
+             * gen_exception_insn()'s default to DISAS_NORETURN
+             */
+            s->base.is_jmp = DISAS_NEXT;
+            break;
+        }
+        tmp = tcg_temp_new_i32();
+        sfpa = tcg_temp_new_i32();
+        fpscr = tcg_temp_new_i32();
+        gen_helper_vfp_get_fpscr(fpscr, cpu_env);
+        tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
+        control = load_cpu_field(v7m.control[M_REG_S]);
+        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
+        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
+        tcg_gen_or_i32(tmp, tmp, sfpa);
+        tcg_temp_free_i32(control);
+        /* Store result before updating FPSCR, in case it faults */
+        storefn(s, opaque, tmp, true);
+        /* If SFPA is zero then set FPSCR from FPDSCR_NS */
+        fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+        zero = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
+        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
+        tcg_temp_free_i32(zero);
+        tcg_temp_free_i32(sfpa);
+        tcg_temp_free_i32(fpdscr);
+        tcg_temp_free_i32(fpscr);
+        break;
+    }
+    case ARM_VFP_VPR:
+        /* Behaves as NOP if not privileged */
+        if (IS_USER(s)) {
+            storefn(s, opaque, NULL, false);
+            break;
+        }
+        tmp = load_cpu_field(v7m.vpr);
+        storefn(s, opaque, tmp, true);
+        break;
+    case ARM_VFP_P0:
+        tmp = load_cpu_field(v7m.vpr);
+        tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
+        storefn(s, opaque, tmp, true);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (lab_end) {
+        gen_set_label(lab_end);
+    }
+    if (lookup_tb) {
+        gen_lookup_tb(s);
+    }
+    return true;
+}
+
+static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value,
+                             bool do_access)
+{
+    arg_VMSR_VMRS *a = opaque;
+
+    if (!do_access) {
+        return;
+    }
+
+    if (a->rt == 15) {
+        /* Set the 4 flag bits in the CPSR */
+        gen_set_nzcv(value);
+        tcg_temp_free_i32(value);
+    } else {
+        store_reg(s, a->rt, value);
+    }
+}
+
+static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque, bool do_access)
+{
+    arg_VMSR_VMRS *a = opaque;
+
+    if (!do_access) {
+        return NULL;
+    }
+    return load_reg(s, a->rt);
+}
+
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
+{
+    /*
+     * Accesses to R15 are UNPREDICTABLE; we choose to undef.
+     * FPSCR -> r15 is a special case which writes to the PSR flags;
+     * set a->reg to a special value to tell gen_M_fp_sysreg_read()
+     * we only care about the top 4 bits of FPSCR there.
+     */
+    if (a->rt == 15) {
+        if (a->l && a->reg == ARM_VFP_FPSCR) {
+            a->reg = QEMU_VFP_FPSCR_NZCV;
+        } else {
+            return false;
+        }
+    }
+
+    if (a->l) {
+        /* VMRS, move FP system register to gp register */
+        return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
+    } else {
+        /* VMSR, move gp register to FP system register */
+        return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
+    }
+}
+
+static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value,
+                                bool do_access)
+{
+    arg_vldr_sysreg *a = opaque;
+    uint32_t offset = a->imm;
+    TCGv_i32 addr;
+
+    if (!a->a) {
+        offset = -offset;
+    }
+
+    if (!do_access && !a->w) {
+        return;
+    }
+
+    addr = load_reg(s, a->rn);
+    if (a->p) {
+        tcg_gen_addi_i32(addr, addr, offset);
+    }
+
+    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+        gen_helper_v8m_stackcheck(cpu_env, addr);
+    }
+
+    if (do_access) {
+        gen_aa32_st_i32(s, value, addr, get_mem_index(s),
+                        MO_UL | MO_ALIGN | s->be_data);
+        tcg_temp_free_i32(value);
+    }
+
+    if (a->w) {
+        /* writeback */
+        if (!a->p) {
+            tcg_gen_addi_i32(addr, addr, offset);
+        }
+        store_reg(s, a->rn, addr);
+    } else {
+        tcg_temp_free_i32(addr);
+    }
+}
+
+static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque,
+                                    bool do_access)
+{
+    arg_vldr_sysreg *a = opaque;
+    uint32_t offset = a->imm;
+    TCGv_i32 addr;
+    TCGv_i32 value = NULL;
+
+    if (!a->a) {
+        offset = -offset;
+    }
+
+    if (!do_access && !a->w) {
+        return NULL;
+    }
+
+    addr = load_reg(s, a->rn);
+    if (a->p) {
+        tcg_gen_addi_i32(addr, addr, offset);
+    }
+
+    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+        gen_helper_v8m_stackcheck(cpu_env, addr);
+    }
+
+    if (do_access) {
+        value = tcg_temp_new_i32();
+        gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
+                        MO_UL | MO_ALIGN | s->be_data);
+    }
+
+    if (a->w) {
+        /* writeback */
+        if (!a->p) {
+            tcg_gen_addi_i32(addr, addr, offset);
+        }
+        store_reg(s, a->rn, addr);
+    } else {
+        tcg_temp_free_i32(addr);
+    }
+    return value;
+}
+
+static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        return false;
+    }
+    if (a->rn == 15) {
+        return false;
+    }
+    return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
+}
+
+static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        return false;
+    }
+    if (a->rn == 15) {
+        return false;
+    }
+    return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
+}
+
+static bool trans_NOCP(DisasContext *s, arg_nocp *a)
+{
+    /*
+     * Handle M-profile early check for disabled coprocessor:
+     * all we need to do here is emit the NOCP exception if
+     * the coprocessor is disabled. Otherwise we return false
+     * and the real VFP/etc decode will handle the insn.
+     */
+    assert(arm_dc_feature(s, ARM_FEATURE_M));
+
+    if (a->cp == 11) {
+        a->cp = 10;
+    }
+    if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
+        (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
+        /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
+        a->cp = 10;
+    }
+
+    if (a->cp != 10) {
+        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+                           syn_uncategorized(), default_exception_el(s));
+        return true;
+    }
+
+    if (s->fp_excp_el != 0) {
+        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+                           syn_uncategorized(), s->fp_excp_el);
+        return true;
+    }
+
+    return false;
+}
+
+static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
+{
+    /* This range needs a coprocessor check for v8.1M and later only */
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        return false;
+    }
+    return trans_NOCP(s, a);
+}
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
new file mode 100644
index 00000000000..4267d43cc7c
--- /dev/null
+++ b/target/arm/translate-mve.c
@@ -0,0 +1,2311 @@
+/*
+ *  ARM translation: M-profile MVE instructions
+ *
+ *  Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/exec-all.h"
+#include "exec/gen-icount.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+static inline int vidup_imm(DisasContext *s, int x)
+{
+    return 1 << x;
+}
+
+/* Include the generated decoder */
+#include "decode-mve.c.inc"
+
+typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
+typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
+typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+
+/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
+static inline long mve_qreg_offset(unsigned reg)
+{
+    return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
+}
+
+static TCGv_ptr mve_qreg_ptr(unsigned reg)
+{
+    TCGv_ptr ret = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
+    return ret;
+}
+
+static bool mve_no_predication(DisasContext *s)
+{
+    /*
+     * Return true if we are executing the entire MVE instruction
+     * with no predication or partial-execution, and so we can safely
+     * use an inline TCG vector implementation.
+     */
+    return s->eci == 0 && s->mve_no_pred;
+}
+
+static bool mve_check_qreg_bank(DisasContext *s, int qmask)
+{
+    /*
+     * Check whether Qregs are in range. For v8.1M only Q0..Q7
+     * are supported, see VFPSmallRegisterBank().
+     */
+    return qmask < 8;
+}
+
+bool mve_eci_check(DisasContext *s)
+{
+    /*
+     * This is a beatwise insn: check that ECI is valid (not a
+     * reserved value) and note that we are handling it.
+     * Return true if OK, false if we generated an exception.
+     */
+    s->eci_handled = true;
+    switch (s->eci) {
+    case ECI_NONE:
+    case ECI_A0:
+    case ECI_A0A1:
+    case ECI_A0A1A2:
+    case ECI_A0A1A2B0:
+        return true;
+    default:
+        /* Reserved value: INVSTATE UsageFault */
+        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
+                           default_exception_el(s));
+        return false;
+    }
+}
+
+void mve_update_eci(DisasContext *s)
+{
+    /*
+     * The helper function will always update the CPUState field,
+     * so we only need to update the DisasContext field.
+     */
+    if (s->eci) {
+        s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
+    }
+}
+
+void mve_update_and_store_eci(DisasContext *s)
+{
+    /*
+     * For insns which don't call a helper function that will call
+     * mve_advance_vpt(), this version updates s->eci and also stores
+     * it out to the CPUState field.
+     */
+    if (s->eci) {
+        mve_update_eci(s);
+        store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
+    }
+}
+
+static bool mve_skip_first_beat(DisasContext *s)
+{
+    /* Return true if PSR.ECI says we must skip the first beat of this insn */
+    switch (s->eci) {
+    case ECI_NONE:
+        return false;
+    case ECI_A0:
+    case ECI_A0A1:
+    case ECI_A0A1A2:
+    case ECI_A0A1A2B0:
+        return true;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
+                    unsigned msize)
+{
+    TCGv_i32 addr;
+    uint32_t offset;
+    TCGv_ptr qreg;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd) ||
+        !fn) {
+        return false;
+    }
+
+    /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+    if (a->rn == 15 || (a->rn == 13 && a->w)) {
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    offset = a->imm << msize;
+    if (!a->a) {
+        offset = -offset;
+    }
+    addr = load_reg(s, a->rn);
+    if (a->p) {
+        tcg_gen_addi_i32(addr, addr, offset);
+    }
+
+    qreg = mve_qreg_ptr(a->qd);
+    fn(cpu_env, qreg, addr);
+    tcg_temp_free_ptr(qreg);
+
+    /*
+     * Writeback always happens after the last beat of the insn,
+     * regardless of predication
+     */
+    if (a->w) {
+        if (!a->p) {
+            tcg_gen_addi_i32(addr, addr, offset);
+        }
+        store_reg(s, a->rn, addr);
+    } else {
+        tcg_temp_free_i32(addr);
+    }
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
+{
+    static MVEGenLdStFn * const ldstfns[4][2] = {
+        { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
+        { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
+        { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
+        { NULL, NULL }
+    };
+    return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
+}
+
+#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
+    static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
+    {                                                           \
+        static MVEGenLdStFn * const ldstfns[2][2] = {           \
+            { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
+            { NULL, gen_helper_mve_##ULD },                     \
+        };                                                      \
+        return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
+    }
+
+DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
+DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
+DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
+
+static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
+{
+    TCGv_i32 addr;
+    TCGv_ptr qd, qm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qm) ||
+        !fn || a->rn == 15) {
+        /* Rn case is UNPREDICTABLE */
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    addr = load_reg(s, a->rn);
+
+    qd = mve_qreg_ptr(a->qd);
+    qm = mve_qreg_ptr(a->qm);
+    fn(cpu_env, qd, qm, addr);
+    tcg_temp_free_ptr(qd);
+    tcg_temp_free_ptr(qm);
+    tcg_temp_free_i32(addr);
+    mve_update_eci(s);
+    return true;
+}
+
+/*
+ * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
+ * signextended to halfword elements in register". _os_ indicates that
+ * the offsets in Qm should be scaled by the element size.
+ */
+/* This macro is just to make the arrays more compact in these functions */
+#define F(N) gen_helper_mve_##N
+
+/* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
+static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
+{
+    static MVEGenLdStSGFn * const fns[2][4][4] = { {
+            { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
+            { NULL, NULL,           F(vldrh_sg_sw), NULL },
+            { NULL, NULL,           NULL,           NULL },
+            { NULL, NULL,           NULL,           NULL }
+        }, {
+            { NULL, NULL,              NULL,              NULL },
+            { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
+            { NULL, NULL,              NULL,              NULL },
+            { NULL, NULL,              NULL,              NULL }
+        }
+    };
+    if (a->qd == a->qm) {
+        return false; /* UNPREDICTABLE */
+    }
+    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
+{
+    static MVEGenLdStSGFn * const fns[2][4][4] = { {
+            { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
+            { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
+            { NULL,           NULL,           F(vldrw_sg_uw), NULL },
+            { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
+        }, {
+            { NULL, NULL,              NULL,              NULL },
+            { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
+            { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
+            { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
+        }
+    };
+    if (a->qd == a->qm) {
+        return false; /* UNPREDICTABLE */
+    }
+    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
+{
+    static MVEGenLdStSGFn * const fns[2][4][4] = { {
+            { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
+            { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
+            { NULL,           NULL,           F(vstrw_sg_uw), NULL },
+            { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
+        }, {
+            { NULL, NULL,              NULL,              NULL },
+            { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
+            { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
+            { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
+        }
+    };
+    return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+#undef F
+
+static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
+                           MVEGenLdStSGFn *fn, unsigned msize)
+{
+    uint32_t offset;
+    TCGv_ptr qd, qm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qm) ||
+        !fn) {
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    offset = a->imm << msize;
+    if (!a->a) {
+        offset = -offset;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    qm = mve_qreg_ptr(a->qm);
+    fn(cpu_env, qd, qm, tcg_constant_i32(offset));
+    tcg_temp_free_ptr(qd);
+    tcg_temp_free_ptr(qm);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+    static MVEGenLdStSGFn * const fns[] = {
+        gen_helper_mve_vldrw_sg_uw,
+        gen_helper_mve_vldrw_sg_wb_uw,
+    };
+    if (a->qd == a->qm) {
+        return false; /* UNPREDICTABLE */
+    }
+    return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+    static MVEGenLdStSGFn * const fns[] = {
+        gen_helper_mve_vldrd_sg_ud,
+        gen_helper_mve_vldrd_sg_wb_ud,
+    };
+    if (a->qd == a->qm) {
+        return false; /* UNPREDICTABLE */
+    }
+    return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
+static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+    static MVEGenLdStSGFn * const fns[] = {
+        gen_helper_mve_vstrw_sg_uw,
+        gen_helper_mve_vstrw_sg_wb_uw,
+    };
+    return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+    static MVEGenLdStSGFn * const fns[] = {
+        gen_helper_mve_vstrd_sg_ud,
+        gen_helper_mve_vstrd_sg_wb_ud,
+    };
+    return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
+static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
+                        int addrinc)
+{
+    TCGv_i32 rn;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd) ||
+        !fn || (a->rn == 13 && a->w) || a->rn == 15) {
+        /* Variously UNPREDICTABLE or UNDEF or related-encoding */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    rn = load_reg(s, a->rn);
+    /*
+     * We pass the index of Qd, not a pointer, because the helper must
+     * access multiple Q registers starting at Qd and working up.
+     */
+    fn(cpu_env, tcg_constant_i32(a->qd), rn);
+
+    if (a->w) {
+        tcg_gen_addi_i32(rn, rn, addrinc);
+        store_reg(s, a->rn, rn);
+    } else {
+        tcg_temp_free_i32(rn);
+    }
+    mve_update_and_store_eci(s);
+    return true;
+}
+
+/* This macro is just to make the arrays more compact in these functions */
+#define F(N) gen_helper_mve_##N
+
+static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
+{
+    static MVEGenLdStIlFn * const fns[4][4] = {
+        { F(vld20b), F(vld20h), F(vld20w), NULL, },
+        { F(vld21b), F(vld21h), F(vld21w), NULL, },
+        { NULL, NULL, NULL, NULL },
+        { NULL, NULL, NULL, NULL },
+    };
+    if (a->qd > 6) {
+        return false;
+    }
+    return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
+{
+    static MVEGenLdStIlFn * const fns[4][4] = {
+        { F(vld40b), F(vld40h), F(vld40w), NULL, },
+        { F(vld41b), F(vld41h), F(vld41w), NULL, },
+        { F(vld42b), F(vld42h), F(vld42w), NULL, },
+        { F(vld43b), F(vld43h), F(vld43w), NULL, },
+    };
+    if (a->qd > 4) {
+        return false;
+    }
+    return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
+{
+    static MVEGenLdStIlFn * const fns[4][4] = {
+        { F(vst20b), F(vst20h), F(vst20w), NULL, },
+        { F(vst21b), F(vst21h), F(vst21w), NULL, },
+        { NULL, NULL, NULL, NULL },
+        { NULL, NULL, NULL, NULL },
+    };
+    if (a->qd > 6) {
+        return false;
+    }
+    return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
+{
+    static MVEGenLdStIlFn * const fns[4][4] = {
+        { F(vst40b), F(vst40h), F(vst40w), NULL, },
+        { F(vst41b), F(vst41h), F(vst41w), NULL, },
+        { F(vst42b), F(vst42h), F(vst42w), NULL, },
+        { F(vst43b), F(vst43h), F(vst43w), NULL, },
+    };
+    if (a->qd > 4) {
+        return false;
+    }
+    return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+#undef F
+
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
+{
+    TCGv_ptr qd;
+    TCGv_i32 rt;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd)) {
+        return false;
+    }
+    if (a->rt == 13 || a->rt == 15) {
+        /* UNPREDICTABLE; we choose to UNDEF */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    rt = load_reg(s, a->rt);
+    if (mve_no_predication(s)) {
+        tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
+    } else {
+        qd = mve_qreg_ptr(a->qd);
+        tcg_gen_dup_i32(a->size, rt, rt);
+        gen_helper_mve_vdup(cpu_env, qd, rt);
+        tcg_temp_free_ptr(qd);
+    }
+    tcg_temp_free_i32(rt);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
+                       GVecGen2Fn vecfn)
+{
+    TCGv_ptr qd, qm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qm) ||
+        !fn) {
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    if (vecfn && mve_no_predication(s)) {
+        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
+    } else {
+        qd = mve_qreg_ptr(a->qd);
+        qm = mve_qreg_ptr(a->qm);
+        fn(cpu_env, qd, qm);
+        tcg_temp_free_ptr(qd);
+        tcg_temp_free_ptr(qm);
+    }
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
+{
+    return do_1op_vec(s, a, fn, NULL);
+}
+
+#define DO_1OP_VEC(INSN, FN, VECFN)                             \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
+    {                                                           \
+        static MVEGenOneOpFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_1op_vec(s, a, fns[a->size], VECFN);           \
+    }
+
+#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
+
+DO_1OP(VCLZ, vclz)
+DO_1OP(VCLS, vcls)
+DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
+DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
+DO_1OP(VQABS, vqabs)
+DO_1OP(VQNEG, vqneg)
+DO_1OP(VMAXA, vmaxa)
+DO_1OP(VMINA, vmina)
+
+/*
+ * For simple float/int conversions we use the fixed-point
+ * conversion helpers with a zero shift count
+ */
+#define DO_VCVT(INSN, HFN, SFN)                                         \
+    static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
+    {                                                                   \
+        gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0));         \
+    }                                                                   \
+    static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
+    {                                                                   \
+        gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0));         \
+    }                                                                   \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
+    {                                                                   \
+        static MVEGenOneOpFn * const fns[] = {                          \
+            NULL,                                                       \
+            gen_##INSN##h,                                              \
+            gen_##INSN##s,                                              \
+            NULL,                                                       \
+        };                                                              \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
+            return false;                                               \
+        }                                                               \
+        return do_1op(s, a, fns[a->size]);                              \
+    }
+
+DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
+DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
+DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
+DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
+
+static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
+                          enum arm_fprounding rmode, bool u)
+{
+    /*
+     * Handle VCVT fp to int with specified rounding mode.
+     * This is a 1op fn but we must pass the rounding mode as
+     * an immediate to the helper.
+     */
+    TCGv_ptr qd, qm;
+    static MVEGenVCVTRmodeFn * const fns[4][2] = {
+        { NULL, NULL },
+        { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
+        { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
+        { NULL, NULL },
+    };
+    MVEGenVCVTRmodeFn *fn = fns[a->size][u];
+
+    if (!dc_isar_feature(aa32_mve_fp, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qm) ||
+        !fn) {
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    qm = mve_qreg_ptr(a->qm);
+    fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
+    tcg_temp_free_ptr(qd);
+    tcg_temp_free_ptr(qm);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_VCVT_RMODE(INSN, RMODE, U)                           \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
+    {                                                           \
+        return do_vcvt_rmode(s, a, RMODE, U);                   \
+    }                                                           \
+
+DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
+DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
+DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
+DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
+DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
+DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
+DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
+DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
+
+#define DO_VCVT_SH(INSN, FN)                                    \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
+    {                                                           \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_1op(s, a, gen_helper_mve_##FN);               \
+    }                                                           \
+
+DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
+DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
+DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
+DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
+
+#define DO_VRINT(INSN, RMODE)                                           \
+    static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
+    {                                                                   \
+        gen_helper_mve_vrint_rm_h(env, qd, qm,                          \
+                                  tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
+    }                                                                   \
+    static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
+    {                                                                   \
+        gen_helper_mve_vrint_rm_s(env, qd, qm,                          \
+                                  tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
+    }                                                                   \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
+    {                                                                   \
+        static MVEGenOneOpFn * const fns[] = {                          \
+            NULL,                                                       \
+            gen_##INSN##h,                                              \
+            gen_##INSN##s,                                              \
+            NULL,                                                       \
+        };                                                              \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
+            return false;                                               \
+        }                                                               \
+        return do_1op(s, a, fns[a->size]);                              \
+    }
+
+DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
+DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
+DO_VRINT(VRINTZ, FPROUNDING_ZERO)
+DO_VRINT(VRINTM, FPROUNDING_NEGINF)
+DO_VRINT(VRINTP, FPROUNDING_POSINF)
+
+static bool trans_VRINTX(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vrintx_h,
+        gen_helper_mve_vrintx_s,
+        NULL,
+    };
+    if (!dc_isar_feature(aa32_mve_fp, s)) {
+        return false;
+    }
+    return do_1op(s, a, fns[a->size]);
+}
+
+/* Narrowing moves: only size 0 and 1 are valid */
+#define DO_VMOVN(INSN, FN) \
+    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
+    {                                                           \
+        static MVEGenOneOpFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            NULL,                                               \
+            NULL,                                               \
+        };                                                      \
+        return do_1op(s, a, fns[a->size]);                      \
+    }
+
+DO_VMOVN(VMOVNB, vmovnb)
+DO_VMOVN(VMOVNT, vmovnt)
+DO_VMOVN(VQMOVUNB, vqmovunb)
+DO_VMOVN(VQMOVUNT, vqmovunt)
+DO_VMOVN(VQMOVN_BS, vqmovnbs)
+DO_VMOVN(VQMOVN_TS, vqmovnts)
+DO_VMOVN(VQMOVN_BU, vqmovnbu)
+DO_VMOVN(VQMOVN_TU, vqmovntu)
+
+static bool trans_VREV16(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        gen_helper_mve_vrev16b,
+        NULL,
+        NULL,
+        NULL,
+    };
+    return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VREV32(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        gen_helper_mve_vrev32b,
+        gen_helper_mve_vrev32h,
+        NULL,
+        NULL,
+    };
+    return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VREV64(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        gen_helper_mve_vrev64b,
+        gen_helper_mve_vrev64h,
+        gen_helper_mve_vrev64w,
+        NULL,
+    };
+    return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VMVN(DisasContext *s, arg_1op *a)
+{
+    return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
+}
+
+static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vfabsh,
+        gen_helper_mve_vfabss,
+        NULL,
+    };
+    if (!dc_isar_feature(aa32_mve_fp, s)) {
+        return false;
+    }
+    return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
+{
+    static MVEGenOneOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vfnegh,
+        gen_helper_mve_vfnegs,
+        NULL,
+    };
+    if (!dc_isar_feature(aa32_mve_fp, s)) {
+        return false;
+    }
+    return do_1op(s, a, fns[a->size]);
+}
+
+static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
+                       GVecGen3Fn *vecfn)
+{
+    TCGv_ptr qd, qn, qm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
+        !fn) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    if (vecfn && mve_no_predication(s)) {
+        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
+              mve_qreg_offset(a->qm), 16, 16);
+    } else {
+        qd = mve_qreg_ptr(a->qd);
+        qn = mve_qreg_ptr(a->qn);
+        qm = mve_qreg_ptr(a->qm);
+        fn(cpu_env, qd, qn, qm);
+        tcg_temp_free_ptr(qd);
+        tcg_temp_free_ptr(qn);
+        tcg_temp_free_ptr(qm);
+    }
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
+{
+    return do_2op_vec(s, a, fn, NULL);
+}
+
+#define DO_LOGIC(INSN, HELPER, VECFN)                           \
+    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
+    {                                                           \
+        return do_2op_vec(s, a, HELPER, VECFN);                 \
+    }
+
+DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
+DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
+DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
+DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
+DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
+
+static bool trans_VPSEL(DisasContext *s, arg_2op *a)
+{
+    /* This insn updates predication bits */
+    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    return do_2op(s, a, gen_helper_mve_vpsel);
+}
+
+#define DO_2OP_VEC(INSN, FN, VECFN)                             \
+    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
+    {                                                           \
+        static MVEGenTwoOpFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_2op_vec(s, a, fns[a->size], VECFN);           \
+    }
+
+#define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
+
+DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
+DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
+DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
+DO_2OP(VMULH_S, vmulhs)
+DO_2OP(VMULH_U, vmulhu)
+DO_2OP(VRMULH_S, vrmulhs)
+DO_2OP(VRMULH_U, vrmulhu)
+DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
+DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
+DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
+DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
+DO_2OP(VABD_S, vabds)
+DO_2OP(VABD_U, vabdu)
+DO_2OP(VHADD_S, vhadds)
+DO_2OP(VHADD_U, vhaddu)
+DO_2OP(VHSUB_S, vhsubs)
+DO_2OP(VHSUB_U, vhsubu)
+DO_2OP(VMULL_BS, vmullbs)
+DO_2OP(VMULL_BU, vmullbu)
+DO_2OP(VMULL_TS, vmullts)
+DO_2OP(VMULL_TU, vmulltu)
+DO_2OP(VQDMULH, vqdmulh)
+DO_2OP(VQRDMULH, vqrdmulh)
+DO_2OP(VQADD_S, vqadds)
+DO_2OP(VQADD_U, vqaddu)
+DO_2OP(VQSUB_S, vqsubs)
+DO_2OP(VQSUB_U, vqsubu)
+DO_2OP(VSHL_S, vshls)
+DO_2OP(VSHL_U, vshlu)
+DO_2OP(VRSHL_S, vrshls)
+DO_2OP(VRSHL_U, vrshlu)
+DO_2OP(VQSHL_S, vqshls)
+DO_2OP(VQSHL_U, vqshlu)
+DO_2OP(VQRSHL_S, vqrshls)
+DO_2OP(VQRSHL_U, vqrshlu)
+DO_2OP(VQDMLADH, vqdmladh)
+DO_2OP(VQDMLADHX, vqdmladhx)
+DO_2OP(VQRDMLADH, vqrdmladh)
+DO_2OP(VQRDMLADHX, vqrdmladhx)
+DO_2OP(VQDMLSDH, vqdmlsdh)
+DO_2OP(VQDMLSDHX, vqdmlsdhx)
+DO_2OP(VQRDMLSDH, vqrdmlsdh)
+DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
+DO_2OP(VRHADD_S, vrhadds)
+DO_2OP(VRHADD_U, vrhaddu)
+/*
+ * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
+ * so we can reuse the DO_2OP macro. (Our implementation calculates the
+ * "expected" results in this case.) Similarly for VHCADD.
+ */
+DO_2OP(VCADD90, vcadd90)
+DO_2OP(VCADD270, vcadd270)
+DO_2OP(VHCADD90, vhcadd90)
+DO_2OP(VHCADD270, vhcadd270)
+
+static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
+{
+    static MVEGenTwoOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vqdmullbh,
+        gen_helper_mve_vqdmullbw,
+        NULL,
+    };
+    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
+        /* UNPREDICTABLE; we choose to undef */
+        return false;
+    }
+    return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
+{
+    static MVEGenTwoOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vqdmullth,
+        gen_helper_mve_vqdmulltw,
+        NULL,
+    };
+    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
+        /* UNPREDICTABLE; we choose to undef */
+        return false;
+    }
+    return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
+{
+    /*
+     * Note that a->size indicates the output size, ie VMULL.P8
+     * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
+     * is the 16x16->32 operation and a->size is MO_32.
+     */
+    static MVEGenTwoOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vmullpbh,
+        gen_helper_mve_vmullpbw,
+        NULL,
+    };
+    return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
+{
+    /* a->size is as for trans_VMULLP_B */
+    static MVEGenTwoOpFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vmullpth,
+        gen_helper_mve_vmullptw,
+        NULL,
+    };
+    return do_2op(s, a, fns[a->size]);
+}
+
+/*
+ * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
+ * of the 32-bit elements in each lane of the input vectors, where the
+ * carry-out of each add is the carry-in of the next.  The initial carry
+ * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
+ * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
+ * These insns are subject to beat-wise execution.  Partial execution
+ * of an I=1 (initial carry input fixed) insn which does not
+ * execute the first beat must start with the current FPSCR.NZCV
+ * value, not the fixed constant input.
+ */
+static bool trans_VADC(DisasContext *s, arg_2op *a)
+{
+    return do_2op(s, a, gen_helper_mve_vadc);
+}
+
+static bool trans_VADCI(DisasContext *s, arg_2op *a)
+{
+    if (mve_skip_first_beat(s)) {
+        return trans_VADC(s, a);
+    }
+    return do_2op(s, a, gen_helper_mve_vadci);
+}
+
+static bool trans_VSBC(DisasContext *s, arg_2op *a)
+{
+    return do_2op(s, a, gen_helper_mve_vsbc);
+}
+
+static bool trans_VSBCI(DisasContext *s, arg_2op *a)
+{
+    if (mve_skip_first_beat(s)) {
+        return trans_VSBC(s, a);
+    }
+    return do_2op(s, a, gen_helper_mve_vsbci);
+}
+
+#define DO_2OP_FP(INSN, FN)                                     \
+    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
+    {                                                           \
+        static MVEGenTwoOpFn * const fns[] = {                  \
+            NULL,                                               \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##s,                             \
+            NULL,                                               \
+        };                                                      \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_2op(s, a, fns[a->size]);                      \
+    }
+
+DO_2OP_FP(VADD_fp, vfadd)
+DO_2OP_FP(VSUB_fp, vfsub)
+DO_2OP_FP(VMUL_fp, vfmul)
+DO_2OP_FP(VABD_fp, vfabd)
+DO_2OP_FP(VMAXNM, vmaxnm)
+DO_2OP_FP(VMINNM, vminnm)
+DO_2OP_FP(VCADD90_fp, vfcadd90)
+DO_2OP_FP(VCADD270_fp, vfcadd270)
+DO_2OP_FP(VFMA, vfma)
+DO_2OP_FP(VFMS, vfms)
+DO_2OP_FP(VCMUL0, vcmul0)
+DO_2OP_FP(VCMUL90, vcmul90)
+DO_2OP_FP(VCMUL180, vcmul180)
+DO_2OP_FP(VCMUL270, vcmul270)
+DO_2OP_FP(VCMLA0, vcmla0)
+DO_2OP_FP(VCMLA90, vcmla90)
+DO_2OP_FP(VCMLA180, vcmla180)
+DO_2OP_FP(VCMLA270, vcmla270)
+DO_2OP_FP(VMAXNMA, vmaxnma)
+DO_2OP_FP(VMINNMA, vminnma)
+
+static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
+                          MVEGenTwoOpScalarFn fn)
+{
+    TCGv_ptr qd, qn;
+    TCGv_i32 rm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qn) ||
+        !fn) {
+        return false;
+    }
+    if (a->rm == 13 || a->rm == 15) {
+        /* UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    qn = mve_qreg_ptr(a->qn);
+    rm = load_reg(s, a->rm);
+    fn(cpu_env, qd, qn, rm);
+    tcg_temp_free_i32(rm);
+    tcg_temp_free_ptr(qd);
+    tcg_temp_free_ptr(qn);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_2OP_SCALAR(INSN, FN)                                 \
+    static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
+    {                                                           \
+        static MVEGenTwoOpScalarFn * const fns[] = {            \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_2op_scalar(s, a, fns[a->size]);               \
+    }
+
+DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
+DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
+DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
+DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
+DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
+DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
+DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
+DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
+DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
+DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
+DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
+DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
+DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
+DO_2OP_SCALAR(VBRSR, vbrsr)
+DO_2OP_SCALAR(VMLA, vmla)
+DO_2OP_SCALAR(VMLAS, vmlas)
+DO_2OP_SCALAR(VQDMLAH, vqdmlah)
+DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
+DO_2OP_SCALAR(VQDMLASH, vqdmlash)
+DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
+
+static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
+{
+    static MVEGenTwoOpScalarFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vqdmullb_scalarh,
+        gen_helper_mve_vqdmullb_scalarw,
+        NULL,
+    };
+    if (a->qd == a->qn && a->size == MO_32) {
+        /* UNPREDICTABLE; we choose to undef */
+        return false;
+    }
+    return do_2op_scalar(s, a, fns[a->size]);
+}
+
+static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
+{
+    static MVEGenTwoOpScalarFn * const fns[] = {
+        NULL,
+        gen_helper_mve_vqdmullt_scalarh,
+        gen_helper_mve_vqdmullt_scalarw,
+        NULL,
+    };
+    if (a->qd == a->qn && a->size == MO_32) {
+        /* UNPREDICTABLE; we choose to undef */
+        return false;
+    }
+    return do_2op_scalar(s, a, fns[a->size]);
+}
+
+
+#define DO_2OP_FP_SCALAR(INSN, FN)                              \
+    static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
+    {                                                           \
+        static MVEGenTwoOpScalarFn * const fns[] = {            \
+            NULL,                                               \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##s,                             \
+            NULL,                                               \
+        };                                                      \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_2op_scalar(s, a, fns[a->size]);               \
+    }
+
+DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
+DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
+DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
+DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
+DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
+
+static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
+                             MVEGenLongDualAccOpFn *fn)
+{
+    TCGv_ptr qn, qm;
+    TCGv_i64 rda;
+    TCGv_i32 rdalo, rdahi;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qn | a->qm) ||
+        !fn) {
+        return false;
+    }
+    /*
+     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
+     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
+     */
+    if (a->rdahi == 13 || a->rdahi == 15) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qn = mve_qreg_ptr(a->qn);
+    qm = mve_qreg_ptr(a->qm);
+
+    /*
+     * This insn is subject to beat-wise execution. Partial execution
+     * of an A=0 (no-accumulate) insn which does not execute the first
+     * beat must start with the current rda value, not 0.
+     */
+    if (a->a || mve_skip_first_beat(s)) {
+        rda = tcg_temp_new_i64();
+        rdalo = load_reg(s, a->rdalo);
+        rdahi = load_reg(s, a->rdahi);
+        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+        tcg_temp_free_i32(rdalo);
+        tcg_temp_free_i32(rdahi);
+    } else {
+        rda = tcg_const_i64(0);
+    }
+
+    fn(rda, cpu_env, qn, qm, rda);
+    tcg_temp_free_ptr(qn);
+    tcg_temp_free_ptr(qm);
+
+    rdalo = tcg_temp_new_i32();
+    rdahi = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(rdalo, rda);
+    tcg_gen_extrh_i64_i32(rdahi, rda);
+    store_reg(s, a->rdalo, rdalo);
+    store_reg(s, a->rdahi, rdahi);
+    tcg_temp_free_i64(rda);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[4][2] = {
+        { NULL, NULL },
+        { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
+        { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
+        { NULL, NULL },
+    };
+    return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[4][2] = {
+        { NULL, NULL },
+        { gen_helper_mve_vmlaldavuh, NULL },
+        { gen_helper_mve_vmlaldavuw, NULL },
+        { NULL, NULL },
+    };
+    return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[4][2] = {
+        { NULL, NULL },
+        { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
+        { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
+        { NULL, NULL },
+    };
+    return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[] = {
+        gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
+    };
+    return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[] = {
+        gen_helper_mve_vrmlaldavhuw, NULL,
+    };
+    return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
+{
+    static MVEGenLongDualAccOpFn * const fns[] = {
+        gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
+    };
+    return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
+{
+    TCGv_ptr qn, qm;
+    TCGv_i32 rda;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qn) ||
+        !fn) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qn = mve_qreg_ptr(a->qn);
+    qm = mve_qreg_ptr(a->qm);
+
+    /*
+     * This insn is subject to beat-wise execution. Partial execution
+     * of an A=0 (no-accumulate) insn which does not execute the first
+     * beat must start with the current rda value, not 0.
+     */
+    if (a->a || mve_skip_first_beat(s)) {
+        rda = load_reg(s, a->rda);
+    } else {
+        rda = tcg_const_i32(0);
+    }
+
+    fn(rda, cpu_env, qn, qm, rda);
+    store_reg(s, a->rda, rda);
+    tcg_temp_free_ptr(qn);
+    tcg_temp_free_ptr(qm);
+
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_DUAL_ACC(INSN, FN)                                           \
+    static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
+    {                                                                   \
+        static MVEGenDualAccOpFn * const fns[4][2] = {                  \
+            { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
+            { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
+            { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
+            { NULL, NULL },                                             \
+        };                                                              \
+        return do_dual_acc(s, a, fns[a->size][a->x]);                   \
+    }
+
+DO_DUAL_ACC(VMLADAV_S, vmladavs)
+DO_DUAL_ACC(VMLSDAV, vmlsdav)
+
+static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
+{
+    static MVEGenDualAccOpFn * const fns[4][2] = {
+        { gen_helper_mve_vmladavub, NULL },
+        { gen_helper_mve_vmladavuh, NULL },
+        { gen_helper_mve_vmladavuw, NULL },
+        { NULL, NULL },
+    };
+    return do_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static void gen_vpst(DisasContext *s, uint32_t mask)
+{
+    /*
+     * Set the VPR mask fields. We take advantage of MASK01 and MASK23
+     * being adjacent fields in the register.
+     *
+     * Updating the masks is not predicated, but it is subject to beat-wise
+     * execution, and the mask is updated on the odd-numbered beats.
+     * So if PSR.ECI says we should skip beat 1, we mustn't update the
+     * 01 mask field.
+     */
+    TCGv_i32 vpr = load_cpu_field(v7m.vpr);
+    switch (s->eci) {
+    case ECI_NONE:
+    case ECI_A0:
+        /* Update both 01 and 23 fields */
+        tcg_gen_deposit_i32(vpr, vpr,
+                            tcg_constant_i32(mask | (mask << 4)),
+                            R_V7M_VPR_MASK01_SHIFT,
+                            R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
+        break;
+    case ECI_A0A1:
+    case ECI_A0A1A2:
+    case ECI_A0A1A2B0:
+        /* Update only the 23 mask field */
+        tcg_gen_deposit_i32(vpr, vpr,
+                            tcg_constant_i32(mask),
+                            R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    store_cpu_field(vpr, v7m.vpr);
+}
+
+static bool trans_VPST(DisasContext *s, arg_VPST *a)
+{
+    /* mask == 0 is a "related encoding" */
+    if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+    gen_vpst(s, a->mask);
+    mve_update_and_store_eci(s);
+    return true;
+}
+
+static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
+{
+    /*
+     * Invert the predicate in VPR.P0. We have call out to
+     * a helper because this insn itself is beatwise and can
+     * be predicated.
+     */
+    if (!dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    gen_helper_mve_vpnot(cpu_env);
+    /* This insn updates predication bits */
+    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
+{
+    /* VADDV: vector add across vector */
+    static MVEGenVADDVFn * const fns[4][2] = {
+        { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
+        { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
+        { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
+        { NULL, NULL }
+    };
+    TCGv_ptr qm;
+    TCGv_i32 rda;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        a->size == 3) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This insn is subject to beat-wise execution. Partial execution
+     * of an A=0 (no-accumulate) insn which does not execute the first
+     * beat must start with the current value of Rda, not zero.
+     */
+    if (a->a || mve_skip_first_beat(s)) {
+        /* Accumulate input from Rda */
+        rda = load_reg(s, a->rda);
+    } else {
+        /* Accumulate starting at zero */
+        rda = tcg_const_i32(0);
+    }
+
+    qm = mve_qreg_ptr(a->qm);
+    fns[a->size][a->u](rda, cpu_env, qm, rda);
+    store_reg(s, a->rda, rda);
+    tcg_temp_free_ptr(qm);
+
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
+{
+    /*
+     * Vector Add Long Across Vector: accumulate the 32-bit
+     * elements of the vector into a 64-bit result stored in
+     * a pair of general-purpose registers.
+     * No need to check Qm's bank: it is only 3 bits in decode.
+     */
+    TCGv_ptr qm;
+    TCGv_i64 rda;
+    TCGv_i32 rdalo, rdahi;
+
+    if (!dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+    /*
+     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
+     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
+     */
+    if (a->rdahi == 13 || a->rdahi == 15) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This insn is subject to beat-wise execution. Partial execution
+     * of an A=0 (no-accumulate) insn which does not execute the first
+     * beat must start with the current value of RdaHi:RdaLo, not zero.
+     */
+    if (a->a || mve_skip_first_beat(s)) {
+        /* Accumulate input from RdaHi:RdaLo */
+        rda = tcg_temp_new_i64();
+        rdalo = load_reg(s, a->rdalo);
+        rdahi = load_reg(s, a->rdahi);
+        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+        tcg_temp_free_i32(rdalo);
+        tcg_temp_free_i32(rdahi);
+    } else {
+        /* Accumulate starting at zero */
+        rda = tcg_const_i64(0);
+    }
+
+    qm = mve_qreg_ptr(a->qm);
+    if (a->u) {
+        gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
+    } else {
+        gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
+    }
+    tcg_temp_free_ptr(qm);
+
+    rdalo = tcg_temp_new_i32();
+    rdahi = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(rdalo, rda);
+    tcg_gen_extrh_i64_i32(rdahi, rda);
+    store_reg(s, a->rdalo, rdalo);
+    store_reg(s, a->rdahi, rdahi);
+    tcg_temp_free_i64(rda);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
+                    GVecGen2iFn *vecfn)
+{
+    TCGv_ptr qd;
+    uint64_t imm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd) ||
+        !fn) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+    if (vecfn && mve_no_predication(s)) {
+        vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
+              imm, 16, 16);
+    } else {
+        qd = mve_qreg_ptr(a->qd);
+        fn(cpu_env, qd, tcg_constant_i64(imm));
+        tcg_temp_free_ptr(qd);
+    }
+    mve_update_eci(s);
+    return true;
+}
+
+static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
+                           int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
+{
+    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+    MVEGenOneOpImmFn *fn;
+    GVecGen2iFn *vecfn;
+
+    if ((a->cmode & 1) && a->cmode < 12) {
+        if (a->op) {
+            /*
+             * For op=1, the immediate will be inverted by asimd_imm_const(),
+             * so the VBIC becomes a logical AND operation.
+             */
+            fn = gen_helper_mve_vandi;
+            vecfn = tcg_gen_gvec_andi;
+        } else {
+            fn = gen_helper_mve_vorri;
+            vecfn = tcg_gen_gvec_ori;
+        }
+    } else {
+        /* There is one unallocated cmode/op combination in this space */
+        if (a->cmode == 15 && a->op == 1) {
+            return false;
+        }
+        /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
+        fn = gen_helper_mve_vmovi;
+        vecfn = gen_gvec_vmovi;
+    }
+    return do_1imm(s, a, fn, vecfn);
+}
+
+static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
+                          bool negateshift, GVecGen2iFn vecfn)
+{
+    TCGv_ptr qd, qm;
+    int shift = a->shift;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qd | a->qm) ||
+        !fn) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * When we handle a right shift insn using a left-shift helper
+     * which permits a negative shift count to indicate a right-shift,
+     * we must negate the shift count.
+     */
+    if (negateshift) {
+        shift = -shift;
+    }
+
+    if (vecfn && mve_no_predication(s)) {
+        vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
+              shift, 16, 16);
+    } else {
+        qd = mve_qreg_ptr(a->qd);
+        qm = mve_qreg_ptr(a->qm);
+        fn(cpu_env, qd, qm, tcg_constant_i32(shift));
+        tcg_temp_free_ptr(qd);
+        tcg_temp_free_ptr(qm);
+    }
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
+                      bool negateshift)
+{
+    return do_2shift_vec(s, a, fn, negateshift, NULL);
+}
+
+#define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN)                     \
+    static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
+    {                                                                   \
+        static MVEGenTwoOpShiftFn * const fns[] = {                     \
+            gen_helper_mve_##FN##b,                                     \
+            gen_helper_mve_##FN##h,                                     \
+            gen_helper_mve_##FN##w,                                     \
+            NULL,                                                       \
+        };                                                              \
+        return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN);   \
+    }
+
+#define DO_2SHIFT(INSN, FN, NEGATESHIFT)        \
+    DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
+
+static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
+                           int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    /*
+     * We get here with a negated shift count, and we must handle
+     * shifts by the element size, which tcg_gen_gvec_sari() does not do.
+     */
+    shift = -shift;
+    if (shift == (8 << vece)) {
+        shift--;
+    }
+    tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
+}
+
+static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
+                           int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    /*
+     * We get here with a negated shift count, and we must handle
+     * shifts by the element size, which tcg_gen_gvec_shri() does not do.
+     */
+    shift = -shift;
+    if (shift == (8 << vece)) {
+        tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
+    } else {
+        tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
+    }
+}
+
+DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
+DO_2SHIFT(VQSHLI_S, vqshli_s, false)
+DO_2SHIFT(VQSHLI_U, vqshli_u, false)
+DO_2SHIFT(VQSHLUI, vqshlui_s, false)
+/* These right shifts use a left-shift helper with negated shift count */
+DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
+DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
+DO_2SHIFT(VRSHRI_S, vrshli_s, true)
+DO_2SHIFT(VRSHRI_U, vrshli_u, true)
+
+DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
+DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
+
+#define DO_2SHIFT_FP(INSN, FN)                                  \
+    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
+    {                                                           \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_2shift(s, a, gen_helper_mve_##FN, false);     \
+    }
+
+DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
+DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
+DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
+DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
+DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
+DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
+DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
+DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
+
+static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
+                             MVEGenTwoOpShiftFn *fn)
+{
+    TCGv_ptr qda;
+    TCGv_i32 rm;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qda) ||
+        a->rm == 13 || a->rm == 15 || !fn) {
+        /* Rm cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qda = mve_qreg_ptr(a->qda);
+    rm = load_reg(s, a->rm);
+    fn(cpu_env, qda, qda, rm);
+    tcg_temp_free_ptr(qda);
+    tcg_temp_free_i32(rm);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_2SHIFT_SCALAR(INSN, FN)                                      \
+    static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
+    {                                                                   \
+        static MVEGenTwoOpShiftFn * const fns[] = {                     \
+            gen_helper_mve_##FN##b,                                     \
+            gen_helper_mve_##FN##h,                                     \
+            gen_helper_mve_##FN##w,                                     \
+            NULL,                                                       \
+        };                                                              \
+        return do_2shift_scalar(s, a, fns[a->size]);                    \
+    }
+
+DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
+DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
+DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
+DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
+DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
+DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
+DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
+DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
+
+#define DO_VSHLL(INSN, FN)                                              \
+    static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
+    {                                                                   \
+        static MVEGenTwoOpShiftFn * const fns[] = {                     \
+            gen_helper_mve_##FN##b,                                     \
+            gen_helper_mve_##FN##h,                                     \
+        };                                                              \
+        return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN);  \
+    }
+
+/*
+ * For the VSHLL vector helpers, the vece is the size of the input
+ * (ie MO_8 or MO_16); the helpers want to work in the output size.
+ * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
+ */
+static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    unsigned ovece = vece + 1;
+    unsigned ibits = vece == MO_8 ? 8 : 16;
+    tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
+    tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+}
+
+static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    unsigned ovece = vece + 1;
+    tcg_gen_gvec_andi(ovece, dofs, aofs,
+                      ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
+    tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
+}
+
+static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    unsigned ovece = vece + 1;
+    unsigned ibits = vece == MO_8 ? 8 : 16;
+    if (shift == 0) {
+        tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
+    } else {
+        tcg_gen_gvec_andi(ovece, dofs, aofs,
+                          ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
+        tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+    }
+}
+
+static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    unsigned ovece = vece + 1;
+    unsigned ibits = vece == MO_8 ? 8 : 16;
+    if (shift == 0) {
+        tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
+    } else {
+        tcg_gen_gvec_andi(ovece, dofs, aofs,
+                          ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
+        tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+    }
+}
+
+DO_VSHLL(VSHLL_BS, vshllbs)
+DO_VSHLL(VSHLL_BU, vshllbu)
+DO_VSHLL(VSHLL_TS, vshllts)
+DO_VSHLL(VSHLL_TU, vshlltu)
+
+#define DO_2SHIFT_N(INSN, FN)                                   \
+    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
+    {                                                           \
+        static MVEGenTwoOpShiftFn * const fns[] = {             \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+        };                                                      \
+        return do_2shift(s, a, fns[a->size], false);            \
+    }
+
+DO_2SHIFT_N(VSHRNB, vshrnb)
+DO_2SHIFT_N(VSHRNT, vshrnt)
+DO_2SHIFT_N(VRSHRNB, vrshrnb)
+DO_2SHIFT_N(VRSHRNT, vrshrnt)
+DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
+DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
+DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
+DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
+DO_2SHIFT_N(VQSHRUNB, vqshrunb)
+DO_2SHIFT_N(VQSHRUNT, vqshrunt)
+DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
+DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
+DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
+DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
+DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
+DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
+
+static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
+{
+    /*
+     * Whole Vector Left Shift with Carry. The carry is taken
+     * from a general purpose register and written back there.
+     * An imm of 0 means "shift by 32".
+     */
+    TCGv_ptr qd;
+    TCGv_i32 rdm;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+        return false;
+    }
+    if (a->rdm == 13 || a->rdm == 15) {
+        /* CONSTRAINED UNPREDICTABLE: we UNDEF */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    rdm = load_reg(s, a->rdm);
+    gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
+    store_reg(s, a->rdm, rdm);
+    tcg_temp_free_ptr(qd);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
+{
+    TCGv_ptr qd;
+    TCGv_i32 rn;
+
+    /*
+     * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
+     * This fills the vector with elements of successively increasing
+     * or decreasing values, starting from Rn.
+     */
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+        return false;
+    }
+    if (a->size == MO_64) {
+        /* size 0b11 is another encoding */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    rn = load_reg(s, a->rn);
+    fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
+    store_reg(s, a->rn, rn);
+    tcg_temp_free_ptr(qd);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
+{
+    TCGv_ptr qd;
+    TCGv_i32 rn, rm;
+
+    /*
+     * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
+     * This fills the vector with elements of successively increasing
+     * or decreasing values, starting from Rn. Rm specifies a point where
+     * the count wraps back around to 0. The updated offset is written back
+     * to Rn.
+     */
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+        return false;
+    }
+    if (!fn || a->rm == 13 || a->rm == 15) {
+        /*
+         * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
+         * Rm == 13 is VIWDUP, VDWDUP.
+         */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qd = mve_qreg_ptr(a->qd);
+    rn = load_reg(s, a->rn);
+    rm = load_reg(s, a->rm);
+    fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
+    store_reg(s, a->rn, rn);
+    tcg_temp_free_ptr(qd);
+    tcg_temp_free_i32(rm);
+    mve_update_eci(s);
+    return true;
+}
+
+static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
+{
+    static MVEGenVIDUPFn * const fns[] = {
+        gen_helper_mve_vidupb,
+        gen_helper_mve_viduph,
+        gen_helper_mve_vidupw,
+        NULL,
+    };
+    return do_vidup(s, a, fns[a->size]);
+}
+
+static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
+{
+    static MVEGenVIDUPFn * const fns[] = {
+        gen_helper_mve_vidupb,
+        gen_helper_mve_viduph,
+        gen_helper_mve_vidupw,
+        NULL,
+    };
+    /* VDDUP is just like VIDUP but with a negative immediate */
+    a->imm = -a->imm;
+    return do_vidup(s, a, fns[a->size]);
+}
+
+static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
+{
+    static MVEGenVIWDUPFn * const fns[] = {
+        gen_helper_mve_viwdupb,
+        gen_helper_mve_viwduph,
+        gen_helper_mve_viwdupw,
+        NULL,
+    };
+    return do_viwdup(s, a, fns[a->size]);
+}
+
+static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
+{
+    static MVEGenVIWDUPFn * const fns[] = {
+        gen_helper_mve_vdwdupb,
+        gen_helper_mve_vdwduph,
+        gen_helper_mve_vdwdupw,
+        NULL,
+    };
+    return do_viwdup(s, a, fns[a->size]);
+}
+
+static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
+{
+    TCGv_ptr qn, qm;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
+        !fn) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qn = mve_qreg_ptr(a->qn);
+    qm = mve_qreg_ptr(a->qm);
+    fn(cpu_env, qn, qm);
+    tcg_temp_free_ptr(qn);
+    tcg_temp_free_ptr(qm);
+    if (a->mask) {
+        /* VPT */
+        gen_vpst(s, a->mask);
+    }
+    /* This insn updates predication bits */
+    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    mve_update_eci(s);
+    return true;
+}
+
+static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
+                           MVEGenScalarCmpFn *fn)
+{
+    TCGv_ptr qn;
+    TCGv_i32 rm;
+
+    if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qn = mve_qreg_ptr(a->qn);
+    if (a->rm == 15) {
+        /* Encoding Rm=0b1111 means "constant zero" */
+        rm = tcg_constant_i32(0);
+    } else {
+        rm = load_reg(s, a->rm);
+    }
+    fn(cpu_env, qn, rm);
+    tcg_temp_free_ptr(qn);
+    tcg_temp_free_i32(rm);
+    if (a->mask) {
+        /* VPT */
+        gen_vpst(s, a->mask);
+    }
+    /* This insn updates predication bits */
+    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_VCMP(INSN, FN)                                       \
+    static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
+    {                                                           \
+        static MVEGenCmpFn * const fns[] = {                    \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_vcmp(s, a, fns[a->size]);                     \
+    }                                                           \
+    static bool trans_##INSN##_scalar(DisasContext *s,          \
+                                      arg_vcmp_scalar *a)       \
+    {                                                           \
+        static MVEGenScalarCmpFn * const fns[] = {              \
+            gen_helper_mve_##FN##_scalarb,                      \
+            gen_helper_mve_##FN##_scalarh,                      \
+            gen_helper_mve_##FN##_scalarw,                      \
+            NULL,                                               \
+        };                                                      \
+        return do_vcmp_scalar(s, a, fns[a->size]);              \
+    }
+
+DO_VCMP(VCMPEQ, vcmpeq)
+DO_VCMP(VCMPNE, vcmpne)
+DO_VCMP(VCMPCS, vcmpcs)
+DO_VCMP(VCMPHI, vcmphi)
+DO_VCMP(VCMPGE, vcmpge)
+DO_VCMP(VCMPLT, vcmplt)
+DO_VCMP(VCMPGT, vcmpgt)
+DO_VCMP(VCMPLE, vcmple)
+
+#define DO_VCMP_FP(INSN, FN)                                    \
+    static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
+    {                                                           \
+        static MVEGenCmpFn * const fns[] = {                    \
+            NULL,                                               \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##s,                             \
+            NULL,                                               \
+        };                                                      \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_vcmp(s, a, fns[a->size]);                     \
+    }                                                           \
+    static bool trans_##INSN##_scalar(DisasContext *s,          \
+                                      arg_vcmp_scalar *a)       \
+    {                                                           \
+        static MVEGenScalarCmpFn * const fns[] = {              \
+            NULL,                                               \
+            gen_helper_mve_##FN##_scalarh,                      \
+            gen_helper_mve_##FN##_scalars,                      \
+            NULL,                                               \
+        };                                                      \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_vcmp_scalar(s, a, fns[a->size]);              \
+    }
+
+DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
+DO_VCMP_FP(VCMPNE_fp, vfcmpne)
+DO_VCMP_FP(VCMPGE_fp, vfcmpge)
+DO_VCMP_FP(VCMPLT_fp, vfcmplt)
+DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
+DO_VCMP_FP(VCMPLE_fp, vfcmple)
+
+static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
+{
+    /*
+     * MIN/MAX operations across a vector: compute the min or
+     * max of the initial value in a general purpose register
+     * and all the elements in the vector, and store it back
+     * into the general purpose register.
+     */
+    TCGv_ptr qm;
+    TCGv_i32 rda;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
+        !fn || a->rda == 13 || a->rda == 15) {
+        /* Rda cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qm = mve_qreg_ptr(a->qm);
+    rda = load_reg(s, a->rda);
+    fn(rda, cpu_env, qm, rda);
+    store_reg(s, a->rda, rda);
+    tcg_temp_free_ptr(qm);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_VMAXV(INSN, FN)                                      \
+    static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
+    {                                                           \
+        static MVEGenVADDVFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_vmaxv(s, a, fns[a->size]);                    \
+    }
+
+DO_VMAXV(VMAXV_S, vmaxvs)
+DO_VMAXV(VMAXV_U, vmaxvu)
+DO_VMAXV(VMAXAV, vmaxav)
+DO_VMAXV(VMINV_S, vminvs)
+DO_VMAXV(VMINV_U, vminvu)
+DO_VMAXV(VMINAV, vminav)
+
+#define DO_VMAXV_FP(INSN, FN)                                   \
+    static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
+    {                                                           \
+        static MVEGenVADDVFn * const fns[] = {                  \
+            NULL,                                               \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##s,                             \
+            NULL,                                               \
+        };                                                      \
+        if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
+            return false;                                       \
+        }                                                       \
+        return do_vmaxv(s, a, fns[a->size]);                    \
+    }
+
+DO_VMAXV_FP(VMAXNMV, vmaxnmv)
+DO_VMAXV_FP(VMINNMV, vminnmv)
+DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
+DO_VMAXV_FP(VMINNMAV, vminnmav)
+
+static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
+{
+    /* Absolute difference accumulated across vector */
+    TCGv_ptr qn, qm;
+    TCGv_i32 rda;
+
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !mve_check_qreg_bank(s, a->qm | a->qn) ||
+        !fn || a->rda == 13 || a->rda == 15) {
+        /* Rda cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    qm = mve_qreg_ptr(a->qm);
+    qn = mve_qreg_ptr(a->qn);
+    rda = load_reg(s, a->rda);
+    fn(rda, cpu_env, qn, qm, rda);
+    store_reg(s, a->rda, rda);
+    tcg_temp_free_ptr(qm);
+    tcg_temp_free_ptr(qn);
+    mve_update_eci(s);
+    return true;
+}
+
+#define DO_VABAV(INSN, FN)                                      \
+    static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
+    {                                                           \
+        static MVEGenVABAVFn * const fns[] = {                  \
+            gen_helper_mve_##FN##b,                             \
+            gen_helper_mve_##FN##h,                             \
+            gen_helper_mve_##FN##w,                             \
+            NULL,                                               \
+        };                                                      \
+        return do_vabav(s, a, fns[a->size]);                    \
+    }
+
+DO_VABAV(VABAV_S, vabavs)
+DO_VABAV(VABAV_U, vabavu)
+
+static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+    /*
+     * VMOV two 32-bit vector lanes to two general-purpose registers.
+     * This insn is not predicated but it is subject to beat-wise
+     * execution if it is not in an IT block. For us this means
+     * only that if PSR.ECI says we should not be executing the beat
+     * corresponding to the lane of the vector register being accessed
+     * then we should skip perfoming the move, and that we need to do
+     * the usual check for bad ECI state and advance of ECI state.
+     * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+     */
+    TCGv_i32 tmp;
+    int vd;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+        a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
+        a->rt == a->rt2) {
+        /* Rt/Rt2 cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Convert Qreg index to Dreg for read_neon_element32() etc */
+    vd = a->qd * 2;
+
+    if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+        tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, vd, a->idx, MO_32);
+        store_reg(s, a->rt, tmp);
+    }
+    if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+        tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, vd + 1, a->idx, MO_32);
+        store_reg(s, a->rt2, tmp);
+    }
+
+    mve_update_and_store_eci(s);
+    return true;
+}
+
+static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+    /*
+     * VMOV two general-purpose registers to two 32-bit vector lanes.
+     * This insn is not predicated but it is subject to beat-wise
+     * execution if it is not in an IT block. For us this means
+     * only that if PSR.ECI says we should not be executing the beat
+     * corresponding to the lane of the vector register being accessed
+     * then we should skip perfoming the move, and that we need to do
+     * the usual check for bad ECI state and advance of ECI state.
+     * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+     */
+    TCGv_i32 tmp;
+    int vd;
+
+    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+        a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
+        /* Rt/Rt2 cases are UNPREDICTABLE */
+        return false;
+    }
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Convert Qreg idx to Dreg for read_neon_element32() etc */
+    vd = a->qd * 2;
+
+    if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+        tmp = load_reg(s, a->rt);
+        write_neon_element32(tmp, vd, a->idx, MO_32);
+        tcg_temp_free_i32(tmp);
+    }
+    if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+        tmp = load_reg(s, a->rt2);
+        write_neon_element32(tmp, vd + 1, a->idx, MO_32);
+        tcg_temp_free_i32(tmp);
+    }
+
+    mve_update_and_store_eci(s);
+    return true;
+}
diff --git a/target/arm/translate-neon.c b/target/arm/translate-neon.c
new file mode 100644
index 00000000000..dd43de558e4
--- /dev/null
+++ b/target/arm/translate-neon.c
@@ -0,0 +1,4064 @@
+/*
+ *  ARM translation: AArch32 Neon instructions
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005-2007 CodeSourcery
+ *  Copyright (c) 2007 OpenedHand, Ltd.
+ *  Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/exec-all.h"
+#include "exec/gen-icount.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+/* Include the generated Neon decoder */
+#include "decode-neon-dp.c.inc"
+#include "decode-neon-ls.c.inc"
+#include "decode-neon-shared.c.inc"
+
+static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
+{
+    TCGv_ptr ret = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
+    return ret;
+}
+
+static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i32(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i32(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
+{
+    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+    switch (mop) {
+    case MO_UB:
+        tcg_gen_ld8u_i64(var, cpu_env, offset);
+        break;
+    case MO_UW:
+        tcg_gen_ld16u_i64(var, cpu_env, offset);
+        break;
+    case MO_UL:
+        tcg_gen_ld32u_i64(var, cpu_env, offset);
+        break;
+    case MO_Q:
+        tcg_gen_ld_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i32(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i32(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st_i32(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
+{
+    long offset = neon_element_offset(reg, ele, size);
+
+    switch (size) {
+    case MO_8:
+        tcg_gen_st8_i64(var, cpu_env, offset);
+        break;
+    case MO_16:
+        tcg_gen_st16_i64(var, cpu_env, offset);
+        break;
+    case MO_32:
+        tcg_gen_st32_i64(var, cpu_env, offset);
+        break;
+    case MO_64:
+        tcg_gen_st_i64(var, cpu_env, offset);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
+                         int data, gen_helper_gvec_4 *fn_gvec)
+{
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+        return false;
+    }
+
+    /*
+     * UNDEF accesses to odd registers for each bit of Q.
+     * Q will be 0b111 for all Q-reg instructions, otherwise
+     * when we have mixed Q- and D-reg inputs.
+     */
+    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    int opr_sz = q ? 16 : 8;
+    tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
+                       vfp_reg_offset(1, vn),
+                       vfp_reg_offset(1, vm),
+                       vfp_reg_offset(1, vd),
+                       opr_sz, opr_sz, data, fn_gvec);
+    return true;
+}
+
+static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
+                              int data, ARMFPStatusFlavour fp_flavour,
+                              gen_helper_gvec_4_ptr *fn_gvec_ptr)
+{
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+        return false;
+    }
+
+    /*
+     * UNDEF accesses to odd registers for each bit of Q.
+     * Q will be 0b111 for all Q-reg instructions, otherwise
+     * when we have mixed Q- and D-reg inputs.
+     */
+    if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    int opr_sz = q ? 16 : 8;
+    TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
+
+    tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+                       vfp_reg_offset(1, vn),
+                       vfp_reg_offset(1, vm),
+                       vfp_reg_offset(1, vd),
+                       fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
+{
+    if (!dc_isar_feature(aa32_vcma, s)) {
+        return false;
+    }
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+        return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
+                                 FPST_STD_F16, gen_helper_gvec_fcmlah);
+    }
+    return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
+                             FPST_STD, gen_helper_gvec_fcmlas);
+}
+
+static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
+{
+    int opr_sz;
+    TCGv_ptr fpst;
+    gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+    if (!dc_isar_feature(aa32_vcma, s)
+        || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+    fn_gvec_ptr = (a->size == MO_16) ?
+        gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       fpst, opr_sz, opr_sz, a->rot,
+                       fn_gvec_ptr);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
+{
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_sdot_b);
+}
+
+static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
+{
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_udot_b);
+}
+
+static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_usdot_b);
+}
+
+static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_bfdot);
+}
+
+static bool trans_VFML(DisasContext *s, arg_VFML *a)
+{
+    int opr_sz;
+
+    if (!dc_isar_feature(aa32_fhm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(a->q, a->vn),
+                       vfp_reg_offset(a->q, a->vm),
+                       cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
+                       gen_helper_gvec_fmlal_a32);
+    return true;
+}
+
+static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
+{
+    int data = (a->index << 2) | a->rot;
+
+    if (!dc_isar_feature(aa32_vcma, s)) {
+        return false;
+    }
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+        return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
+                                 FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
+    }
+    return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
+                             FPST_STD, gen_helper_gvec_fcmlas_idx);
+}
+
+static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
+{
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+                        gen_helper_gvec_sdot_idx_b);
+}
+
+static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
+{
+    if (!dc_isar_feature(aa32_dp, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+                        gen_helper_gvec_udot_idx_b);
+}
+
+static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+                        gen_helper_gvec_usdot_idx_b);
+}
+
+static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+                        gen_helper_gvec_sudot_idx_b);
+}
+
+static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+                        gen_helper_gvec_bfdot_idx);
+}
+
+static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
+{
+    int opr_sz;
+
+    if (!dc_isar_feature(aa32_fhm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    opr_sz = (1 + a->q) * 8;
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(a->q, a->vn),
+                       vfp_reg_offset(a->q, a->rm),
+                       cpu_env, opr_sz, opr_sz,
+                       (a->index << 2) | a->s, /* is_2 == 0 */
+                       gen_helper_gvec_fmlal_idx_a32);
+    return true;
+}
+
+static struct {
+    int nregs;
+    int interleave;
+    int spacing;
+} const neon_ls_element_type[11] = {
+    {1, 4, 1},
+    {1, 4, 2},
+    {4, 1, 1},
+    {2, 2, 2},
+    {1, 3, 1},
+    {1, 3, 2},
+    {3, 1, 1},
+    {1, 1, 1},
+    {1, 2, 1},
+    {1, 2, 2},
+    {2, 1, 1}
+};
+
+static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
+                                      int stride)
+{
+    if (rm != 15) {
+        TCGv_i32 base;
+
+        base = load_reg(s, rn);
+        if (rm == 13) {
+            tcg_gen_addi_i32(base, base, stride);
+        } else {
+            TCGv_i32 index;
+            index = load_reg(s, rm);
+            tcg_gen_add_i32(base, base, index);
+            tcg_temp_free_i32(index);
+        }
+        store_reg(s, rn, base);
+    }
+}
+
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
+{
+    /* Neon load/store multiple structures */
+    int nregs, interleave, spacing, reg, n;
+    MemOp mop, align, endian;
+    int mmu_idx = get_mem_index(s);
+    int size = a->size;
+    TCGv_i64 tmp64;
+    TCGv_i32 addr, tmp;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+    if (a->itype > 10) {
+        return false;
+    }
+    /* Catch UNDEF cases for bad values of align field */
+    switch (a->itype & 0xc) {
+    case 4:
+        if (a->align >= 2) {
+            return false;
+        }
+        break;
+    case 8:
+        if (a->align == 3) {
+            return false;
+        }
+        break;
+    default:
+        break;
+    }
+    nregs = neon_ls_element_type[a->itype].nregs;
+    interleave = neon_ls_element_type[a->itype].interleave;
+    spacing = neon_ls_element_type[a->itype].spacing;
+    if (size == 3 && (interleave | spacing) != 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* For our purposes, bytes are always little-endian.  */
+    endian = s->be_data;
+    if (size == 0) {
+        endian = MO_LE;
+    }
+
+    /* Enforce alignment requested by the instruction */
+    if (a->align) {
+        align = pow2_align(a->align + 2); /* 4 ** a->align */
+    } else {
+        align = s->align_mem ? MO_ALIGN : 0;
+    }
+
+    /*
+     * Consecutive little-endian elements from a single register
+     * can be promoted to a larger little-endian operation.
+     */
+    if (interleave == 1 && endian == MO_LE) {
+        /* Retain any natural alignment. */
+        if (align == MO_ALIGN) {
+            align = pow2_align(size);
+        }
+        size = 3;
+    }
+
+    tmp64 = tcg_temp_new_i64();
+    addr = tcg_temp_new_i32();
+    tmp = tcg_const_i32(1 << size);
+    load_reg_var(s, addr, a->rn);
+
+    mop = endian | size | align;
+    for (reg = 0; reg < nregs; reg++) {
+        for (n = 0; n < 8 >> size; n++) {
+            int xs;
+            for (xs = 0; xs < interleave; xs++) {
+                int tt = a->vd + reg + spacing * xs;
+
+                if (a->l) {
+                    gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
+                    neon_store_element64(tt, n, size, tmp64);
+                } else {
+                    neon_load_element64(tmp64, tt, n, size);
+                    gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
+                }
+                tcg_gen_add_i32(addr, addr, tmp);
+
+                /* Subsequent memory operations inherit alignment */
+                mop &= ~MO_AMASK;
+            }
+        }
+    }
+    tcg_temp_free_i32(addr);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i64(tmp64);
+
+    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
+    return true;
+}
+
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
+{
+    /* Neon load single structure to all lanes */
+    int reg, stride, vec_size;
+    int vd = a->vd;
+    int size = a->size;
+    int nregs = a->n + 1;
+    TCGv_i32 addr, tmp;
+    MemOp mop, align;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    align = 0;
+    if (size == 3) {
+        if (nregs != 4 || a->a == 0) {
+            return false;
+        }
+        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
+        size = MO_32;
+        align = MO_ALIGN_16;
+    } else if (a->a) {
+        switch (nregs) {
+        case 1:
+            if (size == 0) {
+                return false;
+            }
+            align = MO_ALIGN;
+            break;
+        case 2:
+            align = pow2_align(size + 1);
+            break;
+        case 3:
+            return false;
+        case 4:
+            align = pow2_align(size + 2);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * VLD1 to all lanes: T bit indicates how many Dregs to write.
+     * VLD2/3/4 to all lanes: T bit indicates register stride.
+     */
+    stride = a->t ? 2 : 1;
+    vec_size = nregs == 1 ? stride * 8 : 8;
+    mop = size | align;
+    tmp = tcg_temp_new_i32();
+    addr = tcg_temp_new_i32();
+    load_reg_var(s, addr, a->rn);
+    for (reg = 0; reg < nregs; reg++) {
+        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
+        if ((vd & 1) && vec_size == 16) {
+            /*
+             * We cannot write 16 bytes at once because the
+             * destination is unaligned.
+             */
+            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
+                                 8, 8, tmp);
+            tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
+                             neon_full_reg_offset(vd), 8, 8);
+        } else {
+            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
+                                 vec_size, vec_size, tmp);
+        }
+        tcg_gen_addi_i32(addr, addr, 1 << size);
+        vd += stride;
+
+        /* Subsequent memory operations inherit alignment */
+        mop &= ~MO_AMASK;
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(addr);
+
+    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
+
+    return true;
+}
+
+static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
+{
+    /* Neon load/store single structure to one lane */
+    int reg;
+    int nregs = a->n + 1;
+    int vd = a->vd;
+    TCGv_i32 addr, tmp;
+    MemOp mop;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
+    switch (nregs) {
+    case 1:
+        if (((a->align & (1 << a->size)) != 0) ||
+            (a->size == 2 && (a->align == 1 || a->align == 2))) {
+            return false;
+        }
+        break;
+    case 3:
+        if ((a->align & 1) != 0) {
+            return false;
+        }
+        /* fall through */
+    case 2:
+        if (a->size == 2 && (a->align & 2) != 0) {
+            return false;
+        }
+        break;
+    case 4:
+        if (a->size == 2 && a->align == 3) {
+            return false;
+        }
+        break;
+    default:
+        abort();
+    }
+    if ((vd + a->stride * (nregs - 1)) > 31) {
+        /*
+         * Attempts to write off the end of the register file are
+         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
+         * access off the end of the array that holds the register data.
+         */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Pick up SCTLR settings */
+    mop = finalize_memop(s, a->size);
+
+    if (a->align) {
+        MemOp align_op;
+
+        switch (nregs) {
+        case 1:
+            /* For VLD1, use natural alignment. */
+            align_op = MO_ALIGN;
+            break;
+        case 2:
+            /* For VLD2, use double alignment. */
+            align_op = pow2_align(a->size + 1);
+            break;
+        case 4:
+            if (a->size == MO_32) {
+                /*
+                 * For VLD4.32, align = 1 is double alignment, align = 2 is
+                 * quad alignment; align = 3 is rejected above.
+                 */
+                align_op = pow2_align(a->size + a->align);
+            } else {
+                /* For VLD4.8 and VLD.16, we want quad alignment. */
+                align_op = pow2_align(a->size + 2);
+            }
+            break;
+        default:
+            /* For VLD3, the alignment field is zero and rejected above. */
+            g_assert_not_reached();
+        }
+
+        mop = (mop & ~MO_AMASK) | align_op;
+    }
+
+    tmp = tcg_temp_new_i32();
+    addr = tcg_temp_new_i32();
+    load_reg_var(s, addr, a->rn);
+
+    for (reg = 0; reg < nregs; reg++) {
+        if (a->l) {
+            gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
+            neon_store_element(vd, a->reg_idx, a->size, tmp);
+        } else { /* Store */
+            neon_load_element(tmp, vd, a->reg_idx, a->size);
+            gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
+        }
+        vd += a->stride;
+        tcg_gen_addi_i32(addr, addr, 1 << a->size);
+
+        /* Subsequent memory operations inherit alignment */
+        mop &= ~MO_AMASK;
+    }
+    tcg_temp_free_i32(addr);
+    tcg_temp_free_i32(tmp);
+
+    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
+
+    return true;
+}
+
+static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
+{
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_full_reg_offset(a->vd);
+    int rn_ofs = neon_full_reg_offset(a->vn);
+    int rm_ofs = neon_full_reg_offset(a->vm);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+    return true;
+}
+
+#define DO_3SAME(INSN, FUNC)                                            \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_3SAME(VADD, tcg_gen_gvec_add)
+DO_3SAME(VSUB, tcg_gen_gvec_sub)
+DO_3SAME(VAND, tcg_gen_gvec_and)
+DO_3SAME(VBIC, tcg_gen_gvec_andc)
+DO_3SAME(VORR, tcg_gen_gvec_or)
+DO_3SAME(VORN, tcg_gen_gvec_orc)
+DO_3SAME(VEOR, tcg_gen_gvec_xor)
+DO_3SAME(VSHL_S, gen_gvec_sshl)
+DO_3SAME(VSHL_U, gen_gvec_ushl)
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
+
+/* These insns are all gvec_bitsel but with the inputs in various orders. */
+#define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
+    }                                                                   \
+    DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
+DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
+DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
+
+#define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size == 3) {                                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
+DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
+DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
+DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
+DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
+
+#define DO_3SAME_CMP(INSN, COND)                                        \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
+    }                                                                   \
+    DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+
+DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
+DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
+DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
+DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
+DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
+
+#define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
+                         uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
+    {                                                                      \
+        tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
+    }
+
+WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
+
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_3same(s, a, gen_VMUL_p_3s);
+}
+
+#define DO_VQRDMLAH(INSN, FUNC)                                         \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_rdm, s)) {                            \
+            return false;                                               \
+        }                                                               \
+        if (a->size != 1 && a->size != 2) {                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, FUNC);                                    \
+    }
+
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
+
+#define DO_SHA1(NAME, FUNC)                                             \
+    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
+    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_sha1, s)) {                           \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##NAME##_3s);                         \
+    }
+
+DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
+DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
+DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
+DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
+
+#define DO_SHA2(NAME, FUNC)                                             \
+    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
+    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (!dc_isar_feature(aa32_sha2, s)) {                           \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##NAME##_3s);                         \
+    }
+
+DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
+DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
+DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
+
+#define DO_3SAME_64(INSN, FUNC)                                         \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 op = { .fni8 = FUNC };                    \
+        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
+    }                                                                   \
+    DO_3SAME(INSN, gen_##INSN##_3s)
+
+#define DO_3SAME_64_ENV(INSN, FUNC)                                     \
+    static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
+    {                                                                   \
+        FUNC(d, cpu_env, n, m);                                         \
+    }                                                                   \
+    DO_3SAME_64(INSN, gen_##INSN##_elt)
+
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
+
+#define DO_3SAME_32(INSN, FUNC)                                         \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[4] = {                                \
+            { .fni4 = gen_helper_neon_##FUNC##8 },                      \
+            { .fni4 = gen_helper_neon_##FUNC##16 },                     \
+            { .fni4 = gen_helper_neon_##FUNC##32 },                     \
+            { 0 },                                                      \
+        };                                                              \
+        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+/*
+ * Some helper functions need to be passed the cpu_env. In order
+ * to use those with the gvec APIs like tcg_gen_gvec_3() we need
+ * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
+ * and which call a NeonGenTwoOpEnvFn().
+ */
+#define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
+    static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
+    {                                                                   \
+        FUNC(d, cpu_env, n, m);                                         \
+    }
+
+#define DO_3SAME_32_ENV(INSN, FUNC)                                     \
+    WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
+    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
+    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[4] = {                                \
+            { .fni4 = gen_##INSN##_tramp8 },                            \
+            { .fni4 = gen_##INSN##_tramp16 },                           \
+            { .fni4 = gen_##INSN##_tramp32 },                           \
+            { 0 },                                                      \
+        };                                                              \
+        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+DO_3SAME_32(VHADD_S, hadd_s)
+DO_3SAME_32(VHADD_U, hadd_u)
+DO_3SAME_32(VHSUB_S, hsub_s)
+DO_3SAME_32(VHSUB_U, hsub_u)
+DO_3SAME_32(VRHADD_S, rhadd_s)
+DO_3SAME_32(VRHADD_U, rhadd_u)
+DO_3SAME_32(VRSHL_S, rshl_s)
+DO_3SAME_32(VRSHL_U, rshl_u)
+
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
+
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
+{
+    /* Operations handled pairwise 32 bits at a time */
+    TCGv_i32 tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    assert(a->q == 0); /* enforced by decode patterns */
+
+    /*
+     * Note that we have to be careful not to clobber the source operands
+     * in the "vm == vd" case by storing the result of the first pass too
+     * early. Since Q is 0 there are always just two passes, so instead
+     * of a complicated loop over each pass we just unroll.
+     */
+    tmp = tcg_temp_new_i32();
+    tmp2 = tcg_temp_new_i32();
+    tmp3 = tcg_temp_new_i32();
+
+    read_neon_element32(tmp, a->vn, 0, MO_32);
+    read_neon_element32(tmp2, a->vn, 1, MO_32);
+    fn(tmp, tmp, tmp2);
+
+    read_neon_element32(tmp3, a->vm, 0, MO_32);
+    read_neon_element32(tmp2, a->vm, 1, MO_32);
+    fn(tmp3, tmp3, tmp2);
+
+    write_neon_element32(tmp, a->vd, 0, MO_32);
+    write_neon_element32(tmp3, a->vd, 1, MO_32);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(tmp2);
+    tcg_temp_free_i32(tmp3);
+    return true;
+}
+
+#define DO_3SAME_PAIR(INSN, func)                                       \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        static NeonGenTwoOpFn * const fns[] = {                         \
+            gen_helper_neon_##func##8,                                  \
+            gen_helper_neon_##func##16,                                 \
+            gen_helper_neon_##func##32,                                 \
+        };                                                              \
+        if (a->size > 2) {                                              \
+            return false;                                               \
+        }                                                               \
+        return do_3same_pair(s, a, fns[a->size]);                       \
+    }
+
+/* 32-bit pairwise ops end up the same as the elementwise versions.  */
+#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
+#define gen_helper_neon_padd_u32  tcg_gen_add_i32
+
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
+DO_3SAME_PAIR(VPADD, padd_u)
+
+#define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
+    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
+    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
+    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        static const GVecGen3 ops[2] = {                                \
+            { .fni4 = gen_##INSN##_tramp16 },                           \
+            { .fni4 = gen_##INSN##_tramp32 },                           \
+        };                                                              \
+        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
+    }                                                                   \
+    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
+    {                                                                   \
+        if (a->size != 1 && a->size != 2) {                             \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
+
+#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rn_ofs, uint32_t rm_ofs,              \
+                         uint32_t oprsz, uint32_t maxsz)                \
+    {                                                                   \
+        TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
+        tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
+                           oprsz, maxsz, 0, FUNC);                      \
+        tcg_temp_free_ptr(fpst);                                        \
+    }
+
+#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
+    WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
+    WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
+    {                                                                   \
+        if (a->size == MO_16) {                                         \
+            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
+                return false;                                           \
+            }                                                           \
+            return do_3same(s, a, gen_##INSN##_fp16_3s);                \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
+    }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
+DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
+DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
+DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
+DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
+DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
+DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
+DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
+DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
+DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
+DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
+DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
+DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
+DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
+
+WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
+WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
+WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
+WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
+
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+        return do_3same(s, a, gen_VMAXNM_fp16_3s);
+    }
+    return do_3same(s, a, gen_VMAXNM_fp32_3s);
+}
+
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+        return do_3same(s, a, gen_VMINNM_fp16_3s);
+    }
+    return do_3same(s, a, gen_VMINNM_fp32_3s);
+}
+
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
+                             gen_helper_gvec_3_ptr *fn)
+{
+    /* FP pairwise operations */
+    TCGv_ptr fpstatus;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    assert(a->q == 0); /* enforced by decode patterns */
+
+
+    fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+                       vfp_reg_offset(1, a->vn),
+                       vfp_reg_offset(1, a->vm),
+                       fpstatus, 8, 8, 0, fn);
+    tcg_temp_free_ptr(fpstatus);
+
+    return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_PAIR(INSN,FUNC)                                    \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+    {                                                               \
+        if (a->size == MO_16) {                                     \
+            if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
+                return false;                                       \
+            }                                                       \
+            return do_3same_fp_pair(s, a, FUNC##h);                 \
+        }                                                           \
+        return do_3same_fp_pair(s, a, FUNC##s);                     \
+    }
+
+DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
+DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
+DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
+
+static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
+{
+    /* Handle a 2-reg-shift insn which can be vectorized. */
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_full_reg_offset(a->vd);
+    int rm_ofs = neon_full_reg_offset(a->vm);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
+    return true;
+}
+
+#define DO_2SH(INSN, FUNC)                                              \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_vector_2sh(s, a, FUNC);                               \
+    }                                                                   \
+
+DO_2SH(VSHL, tcg_gen_gvec_shli)
+DO_2SH(VSLI, gen_gvec_sli)
+DO_2SH(VSRI, gen_gvec_sri)
+DO_2SH(VSRA_S, gen_gvec_ssra)
+DO_2SH(VSRA_U, gen_gvec_usra)
+DO_2SH(VRSHR_S, gen_gvec_srshr)
+DO_2SH(VRSHR_U, gen_gvec_urshr)
+DO_2SH(VRSRA_S, gen_gvec_srsra)
+DO_2SH(VRSRA_U, gen_gvec_ursra)
+
+static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    /* Signed shift out of range results in all-sign-bits */
+    a->shift = MIN(a->shift, (8 << a->size) - 1);
+    return do_vector_2sh(s, a, tcg_gen_gvec_sari);
+}
+
+static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
+}
+
+static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    /* Shift out of range is architecturally valid and results in zero. */
+    if (a->shift >= (8 << a->size)) {
+        return do_vector_2sh(s, a, gen_zero_rd_2sh);
+    } else {
+        return do_vector_2sh(s, a, tcg_gen_gvec_shri);
+    }
+}
+
+static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
+                             NeonGenTwo64OpEnvFn *fn)
+{
+    /*
+     * 2-reg-and-shift operations, size == 3 case, where the
+     * function needs to be passed cpu_env.
+     */
+    TCGv_i64 constimm;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * To avoid excessive duplication of ops we implement shift
+     * by immediate using the variable shift operations.
+     */
+    constimm = tcg_const_i64(dup_const(a->size, a->shift));
+
+    for (pass = 0; pass < a->q + 1; pass++) {
+        TCGv_i64 tmp = tcg_temp_new_i64();
+
+        read_neon_element64(tmp, a->vm, pass, MO_64);
+        fn(tmp, cpu_env, tmp, constimm);
+        write_neon_element64(tmp, a->vd, pass, MO_64);
+        tcg_temp_free_i64(tmp);
+    }
+    tcg_temp_free_i64(constimm);
+    return true;
+}
+
+static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
+                             NeonGenTwoOpEnvFn *fn)
+{
+    /*
+     * 2-reg-and-shift operations, size < 3 case, where the
+     * helper needs to be passed cpu_env.
+     */
+    TCGv_i32 constimm, tmp;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * To avoid excessive duplication of ops we implement shift
+     * by immediate using the variable shift operations.
+     */
+    constimm = tcg_const_i32(dup_const(a->size, a->shift));
+    tmp = tcg_temp_new_i32();
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        read_neon_element32(tmp, a->vm, pass, MO_32);
+        fn(tmp, cpu_env, tmp, constimm);
+        write_neon_element32(tmp, a->vd, pass, MO_32);
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(constimm);
+    return true;
+}
+
+#define DO_2SHIFT_ENV(INSN, FUNC)                                       \
+    static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
+    {                                                                   \
+        return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
+    }                                                                   \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        static NeonGenTwoOpEnvFn * const fns[] = {                      \
+            gen_helper_neon_##FUNC##8,                                  \
+            gen_helper_neon_##FUNC##16,                                 \
+            gen_helper_neon_##FUNC##32,                                 \
+        };                                                              \
+        assert(a->size < ARRAY_SIZE(fns));                              \
+        return do_2shift_env_32(s, a, fns[a->size]);                    \
+    }
+
+DO_2SHIFT_ENV(VQSHLU, qshlu_s)
+DO_2SHIFT_ENV(VQSHL_U, qshl_u)
+DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+
+static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwo64OpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
+    TCGv_i64 constimm, rm1, rm2;
+    TCGv_i32 rd;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count.
+     */
+    constimm = tcg_const_i64(-a->shift);
+    rm1 = tcg_temp_new_i64();
+    rm2 = tcg_temp_new_i64();
+    rd = tcg_temp_new_i32();
+
+    /* Load both inputs first to avoid potential overwrite if rm == rd */
+    read_neon_element64(rm1, a->vm, 0, MO_64);
+    read_neon_element64(rm2, a->vm, 1, MO_64);
+
+    shiftfn(rm1, rm1, constimm);
+    narrowfn(rd, cpu_env, rm1);
+    write_neon_element32(rd, a->vd, 0, MO_32);
+
+    shiftfn(rm2, rm2, constimm);
+    narrowfn(rd, cpu_env, rm2);
+    write_neon_element32(rd, a->vd, 1, MO_32);
+
+    tcg_temp_free_i32(rd);
+    tcg_temp_free_i64(rm1);
+    tcg_temp_free_i64(rm2);
+    tcg_temp_free_i64(constimm);
+
+    return true;
+}
+
+static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
+                                NeonGenTwoOpFn *shiftfn,
+                                NeonGenNarrowEnvFn *narrowfn)
+{
+    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
+    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
+    TCGv_i64 rtmp;
+    uint32_t imm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is always a right shift, and the shiftfn is always a
+     * left-shift helper, which thus needs the negated shift count
+     * duplicated into each lane of the immediate value.
+     */
+    if (a->size == 1) {
+        imm = (uint16_t)(-a->shift);
+        imm |= imm << 16;
+    } else {
+        /* size == 2 */
+        imm = -a->shift;
+    }
+    constimm = tcg_const_i32(imm);
+
+    /* Load all inputs first to avoid potential overwrite */
+    rm1 = tcg_temp_new_i32();
+    rm2 = tcg_temp_new_i32();
+    rm3 = tcg_temp_new_i32();
+    rm4 = tcg_temp_new_i32();
+    read_neon_element32(rm1, a->vm, 0, MO_32);
+    read_neon_element32(rm2, a->vm, 1, MO_32);
+    read_neon_element32(rm3, a->vm, 2, MO_32);
+    read_neon_element32(rm4, a->vm, 3, MO_32);
+    rtmp = tcg_temp_new_i64();
+
+    shiftfn(rm1, rm1, constimm);
+    shiftfn(rm2, rm2, constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
+    tcg_temp_free_i32(rm2);
+
+    narrowfn(rm1, cpu_env, rtmp);
+    write_neon_element32(rm1, a->vd, 0, MO_32);
+    tcg_temp_free_i32(rm1);
+
+    shiftfn(rm3, rm3, constimm);
+    shiftfn(rm4, rm4, constimm);
+    tcg_temp_free_i32(constimm);
+
+    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
+    tcg_temp_free_i32(rm4);
+
+    narrowfn(rm3, cpu_env, rtmp);
+    tcg_temp_free_i64(rtmp);
+    write_neon_element32(rm3, a->vd, 1, MO_32);
+    tcg_temp_free_i32(rm3);
+    return true;
+}
+
+#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
+    }
+#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
+    }
+
+static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    tcg_gen_extrl_i64_i32(dest, src);
+}
+
+static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u16(dest, src);
+}
+
+static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+    gen_helper_neon_narrow_u8(dest, src);
+}
+
+DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
+DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
+DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
+DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
+DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
+
+DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
+DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
+
+DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+                         NeonGenWidenFn *widenfn, bool u)
+{
+    TCGv_i64 tmp;
+    TCGv_i32 rm0, rm1;
+    uint64_t widen_mask = 0;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is a widen-and-shift operation. The shift is always less
+     * than the width of the source type, so after widening the input
+     * vector we can simply shift the whole 64-bit widened register,
+     * and then clear the potential overflow bits resulting from left
+     * bits of the narrow input appearing as right bits of the left
+     * neighbour narrow input. Calculate a mask of bits to clear.
+     */
+    if ((a->shift != 0) && (a->size < 2 || u)) {
+        int esize = 8 << a->size;
+        widen_mask = MAKE_64BIT_MASK(0, esize);
+        widen_mask >>= esize - a->shift;
+        widen_mask = dup_const(a->size + 1, widen_mask);
+    }
+
+    rm0 = tcg_temp_new_i32();
+    rm1 = tcg_temp_new_i32();
+    read_neon_element32(rm0, a->vm, 0, MO_32);
+    read_neon_element32(rm1, a->vm, 1, MO_32);
+    tmp = tcg_temp_new_i64();
+
+    widenfn(tmp, rm0);
+    tcg_temp_free_i32(rm0);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    write_neon_element64(tmp, a->vd, 0, MO_64);
+
+    widenfn(tmp, rm1);
+    tcg_temp_free_i32(rm1);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    write_neon_element64(tmp, a->vd, 1, MO_64);
+    tcg_temp_free_i64(tmp);
+    return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
+
+static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
+                      gen_helper_gvec_2_ptr *fn)
+{
+    /* FP operations in 2-reg-and-shift group */
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_full_reg_offset(a->vd);
+    int rm_ofs = neon_full_reg_offset(a->vm);
+    TCGv_ptr fpst;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+    tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+#define DO_FP_2SH(INSN, FUNC)                                           \
+    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
+    {                                                                   \
+        return do_fp_2sh(s, a, FUNC);                                   \
+    }
+
+DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
+DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
+DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
+DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
+
+DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
+DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
+DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
+DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
+
+static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
+                        GVecGen2iFn *fn)
+{
+    uint64_t imm;
+    int reg_ofs, vec_size;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    reg_ofs = neon_full_reg_offset(a->vd);
+    vec_size = a->q ? 16 : 8;
+    imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+    fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
+    return true;
+}
+
+static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
+                        int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
+{
+    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+    GVecGen2iFn *fn;
+
+    if ((a->cmode & 1) && a->cmode < 12) {
+        /* for op=1, the imm will be inverted, so BIC becomes AND. */
+        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+    } else {
+        /* There is one unallocated cmode/op combination in this space */
+        if (a->cmode == 15 && a->op == 1) {
+            return false;
+        }
+        fn = gen_VMOV_1r;
+    }
+    return do_1reg_imm(s, a, fn);
+}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+                           NeonGenWidenFn *widenfn,
+                           NeonGenTwo64OpFn *opfn,
+                           int src1_mop, int src2_mop)
+{
+    /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+    TCGv_i64 rn0_64, rn1_64, rm_64;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rn0_64 = tcg_temp_new_i64();
+    rn1_64 = tcg_temp_new_i64();
+    rm_64 = tcg_temp_new_i64();
+
+    if (src1_mop >= 0) {
+        read_neon_element64(rn0_64, a->vn, 0, src1_mop);
+    } else {
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vn, 0, MO_32);
+        widenfn(rn0_64, tmp);
+        tcg_temp_free_i32(tmp);
+    }
+    if (src2_mop >= 0) {
+        read_neon_element64(rm_64, a->vm, 0, src2_mop);
+    } else {
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vm, 0, MO_32);
+        widenfn(rm_64, tmp);
+        tcg_temp_free_i32(tmp);
+    }
+
+    opfn(rn0_64, rn0_64, rm_64);
+
+    /*
+     * Load second pass inputs before storing the first pass result, to
+     * avoid incorrect results if a narrow input overlaps with the result.
+     */
+    if (src1_mop >= 0) {
+        read_neon_element64(rn1_64, a->vn, 1, src1_mop);
+    } else {
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vn, 1, MO_32);
+        widenfn(rn1_64, tmp);
+        tcg_temp_free_i32(tmp);
+    }
+    if (src2_mop >= 0) {
+        read_neon_element64(rm_64, a->vm, 1, src2_mop);
+    } else {
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vm, 1, MO_32);
+        widenfn(rm_64, tmp);
+        tcg_temp_free_i32(tmp);
+    }
+
+    write_neon_element64(rn0_64, a->vd, 0, MO_64);
+
+    opfn(rn1_64, rn1_64, rm_64);
+    write_neon_element64(rn1_64, a->vd, 1, MO_64);
+
+    tcg_temp_free_i64(rn0_64);
+    tcg_temp_free_i64(rn1_64);
+    tcg_temp_free_i64(rm_64);
+
+    return true;
+}
+
+#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenWidenFn * const widenfn[] = {                     \
+            gen_helper_neon_widen_##S##8,                               \
+            gen_helper_neon_widen_##S##16,                              \
+            NULL, NULL,                                                 \
+        };                                                              \
+        static NeonGenTwo64OpFn * const addfn[] = {                     \
+            gen_helper_neon_##OP##l_u16,                                \
+            gen_helper_neon_##OP##l_u32,                                \
+            tcg_gen_##OP##_i64,                                         \
+            NULL,                                                       \
+        };                                                              \
+        int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
+        return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
+                              SRC1WIDE ? MO_Q : narrow_mop,             \
+                              narrow_mop);                              \
+    }
+
+DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
+DO_PREWIDEN(VADDL_U, u, add, false, 0)
+DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
+DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
+DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
+DO_PREWIDEN(VADDW_U, u, add, true, 0)
+DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
+DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
+
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
+                         NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
+{
+    /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
+    TCGv_i64 rn_64, rm_64;
+    TCGv_i32 rd0, rd1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn || !narrowfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if ((a->vn | a->vm) & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rn_64 = tcg_temp_new_i64();
+    rm_64 = tcg_temp_new_i64();
+    rd0 = tcg_temp_new_i32();
+    rd1 = tcg_temp_new_i32();
+
+    read_neon_element64(rn_64, a->vn, 0, MO_64);
+    read_neon_element64(rm_64, a->vm, 0, MO_64);
+
+    opfn(rn_64, rn_64, rm_64);
+
+    narrowfn(rd0, rn_64);
+
+    read_neon_element64(rn_64, a->vn, 1, MO_64);
+    read_neon_element64(rm_64, a->vm, 1, MO_64);
+
+    opfn(rn_64, rn_64, rm_64);
+
+    narrowfn(rd1, rn_64);
+
+    write_neon_element32(rd0, a->vd, 0, MO_32);
+    write_neon_element32(rd1, a->vd, 1, MO_32);
+
+    tcg_temp_free_i32(rd0);
+    tcg_temp_free_i32(rd1);
+    tcg_temp_free_i64(rn_64);
+    tcg_temp_free_i64(rm_64);
+
+    return true;
+}
+
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenTwo64OpFn * const addfn[] = {                     \
+            gen_helper_neon_##OP##l_u16,                                \
+            gen_helper_neon_##OP##l_u32,                                \
+            tcg_gen_##OP##_i64,                                         \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenNarrowFn * const narrowfn[] = {                   \
+            gen_helper_neon_##NARROWTYPE##_high_u8,                     \
+            gen_helper_neon_##NARROWTYPE##_high_u16,                    \
+            EXTOP,                                                      \
+            NULL,                                                       \
+        };                                                              \
+        return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
+    }
+
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
+{
+    tcg_gen_addi_i64(rn, rn, 1u << 31);
+    tcg_gen_extrh_i64_i32(rd, rn);
+}
+
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
+
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
+                       NeonGenTwoOpWidenFn *opfn,
+                       NeonGenTwo64OpFn *accfn)
+{
+    /*
+     * 3-regs different lengths, long operations.
+     * These perform an operation on two inputs that returns a double-width
+     * result, and then possibly perform an accumulation operation of
+     * that result into the double-width destination.
+     */
+    TCGv_i64 rd0, rd1, tmp;
+    TCGv_i32 rn, rm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* size == 3 case, which is an entirely different insn group */
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rd0 = tcg_temp_new_i64();
+    rd1 = tcg_temp_new_i64();
+
+    rn = tcg_temp_new_i32();
+    rm = tcg_temp_new_i32();
+    read_neon_element32(rn, a->vn, 0, MO_32);
+    read_neon_element32(rm, a->vm, 0, MO_32);
+    opfn(rd0, rn, rm);
+
+    read_neon_element32(rn, a->vn, 1, MO_32);
+    read_neon_element32(rm, a->vm, 1, MO_32);
+    opfn(rd1, rn, rm);
+    tcg_temp_free_i32(rn);
+    tcg_temp_free_i32(rm);
+
+    /* Don't store results until after all loads: they might overlap */
+    if (accfn) {
+        tmp = tcg_temp_new_i64();
+        read_neon_element64(tmp, a->vd, 0, MO_64);
+        accfn(rd0, tmp, rd0);
+        read_neon_element64(tmp, a->vd, 1, MO_64);
+        accfn(rd1, tmp, rd1);
+        tcg_temp_free_i64(tmp);
+    }
+
+    write_neon_element64(rd0, a->vd, 0, MO_64);
+    write_neon_element64(rd1, a->vd, 1, MO_64);
+    tcg_temp_free_i64(rd0);
+    tcg_temp_free_i64(rd1);
+
+    return true;
+}
+
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_s16,
+        gen_helper_neon_abdl_s32,
+        gen_helper_neon_abdl_s64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_u16,
+        gen_helper_neon_abdl_u32,
+        gen_helper_neon_abdl_u64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_s16,
+        gen_helper_neon_abdl_s32,
+        gen_helper_neon_abdl_s64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const addfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_abdl_u16,
+        gen_helper_neon_abdl_u32,
+        gen_helper_neon_abdl_u64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const addfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    TCGv_i32 lo = tcg_temp_new_i32();
+    TCGv_i32 hi = tcg_temp_new_i32();
+
+    tcg_gen_muls2_i32(lo, hi, rn, rm);
+    tcg_gen_concat_i32_i64(rd, lo, hi);
+
+    tcg_temp_free_i32(lo);
+    tcg_temp_free_i32(hi);
+}
+
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    TCGv_i32 lo = tcg_temp_new_i32();
+    TCGv_i32 hi = tcg_temp_new_i32();
+
+    tcg_gen_mulu2_i32(lo, hi, rn, rm);
+    tcg_gen_concat_i32_i64(rd, lo, hi);
+
+    tcg_temp_free_i32(lo);
+    tcg_temp_free_i32(hi);
+}
+
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_mull_s8,
+        gen_helper_neon_mull_s16,
+        gen_mull_s32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        gen_helper_neon_mull_u8,
+        gen_helper_neon_mull_u16,
+        gen_mull_u32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL(INSN,MULL,ACC)                                         \
+    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
+    {                                                                   \
+        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
+            gen_helper_neon_##MULL##8,                                  \
+            gen_helper_neon_##MULL##16,                                 \
+            gen_##MULL##32,                                             \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenTwo64OpFn * const accfn[] = {                     \
+            gen_helper_neon_##ACC##l_u16,                               \
+            gen_helper_neon_##ACC##l_u32,                               \
+            tcg_gen_##ACC##_i64,                                        \
+            NULL,                                                       \
+        };                                                              \
+        return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
+    }
+
+DO_VMLAL(VMLAL_S,mull_s,add)
+DO_VMLAL(VMLAL_U,mull_u,add)
+DO_VMLAL(VMLSL_S,mull_s,sub)
+DO_VMLAL(VMLSL_U,mull_u,sub)
+
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    gen_helper_neon_mull_s16(rd, rn, rm);
+    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
+}
+
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+    gen_mull_s32(rd, rn, rm);
+    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
+}
+
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLAL_acc_16,
+        gen_VQDMLAL_acc_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    gen_helper_neon_negl_u32(rm, rm);
+    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
+}
+
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+    tcg_gen_neg_i64(rm, rm);
+    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
+}
+
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLSL_acc_16,
+        gen_VQDMLSL_acc_32,
+        NULL,
+    };
+
+    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
+{
+    gen_helper_gvec_3 *fn_gvec;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    switch (a->size) {
+    case 0:
+        fn_gvec = gen_helper_neon_pmull_h;
+        break;
+    case 2:
+        if (!dc_isar_feature(aa32_pmull, s)) {
+            return false;
+        }
+        fn_gvec = gen_helper_gvec_pmull_q;
+        break;
+    default:
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
+                       neon_full_reg_offset(a->vn),
+                       neon_full_reg_offset(a->vm),
+                       16, 16, 0, fn_gvec);
+    return true;
+}
+
+static void gen_neon_dup_low16(TCGv_i32 var)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_ext16u_i32(var, var);
+    tcg_gen_shli_i32(tmp, var, 16);
+    tcg_gen_or_i32(var, var, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_neon_dup_high16(TCGv_i32 var)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_andi_i32(var, var, 0xffff0000);
+    tcg_gen_shri_i32(tmp, var, 16);
+    tcg_gen_or_i32(var, var, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    if (size == MO_16) {
+        read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
+        if (reg & 8) {
+            gen_neon_dup_high16(tmp);
+        } else {
+            gen_neon_dup_low16(tmp);
+        }
+    } else {
+        read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
+    }
+    return tmp;
+}
+
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
+                       NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
+{
+    /*
+     * Two registers and a scalar: perform an operation between
+     * the input elements and the scalar, and then possibly
+     * perform an accumulation operation of that result into the
+     * destination.
+     */
+    TCGv_i32 scalar, tmp;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->q && ((a->vd | a->vn) & 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(a->size, a->vm);
+    tmp = tcg_temp_new_i32();
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        read_neon_element32(tmp, a->vn, pass, MO_32);
+        opfn(tmp, tmp, scalar);
+        if (accfn) {
+            TCGv_i32 rd = tcg_temp_new_i32();
+            read_neon_element32(rd, a->vd, pass, MO_32);
+            accfn(tmp, rd, tmp);
+            tcg_temp_free_i32(rd);
+        }
+        write_neon_element32(tmp, a->vd, pass, MO_32);
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(scalar);
+    return true;
+}
+
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        gen_helper_neon_add_u16,
+        tcg_gen_add_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mul_u16,
+        tcg_gen_mul_i32,
+        NULL,
+    };
+    static NeonGenTwoOpFn * const accfn[] = {
+        NULL,
+        gen_helper_neon_sub_u16,
+        tcg_gen_sub_i32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
+                              gen_helper_gvec_3_ptr *fn)
+{
+    /* Two registers and a scalar, using gvec */
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_full_reg_offset(a->vd);
+    int rn_ofs = neon_full_reg_offset(a->vn);
+    int rm_ofs;
+    int idx;
+    TCGv_ptr fpstatus;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!fn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->q && ((a->vd | a->vn) & 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* a->vm is M:Vm, which encodes both register and index */
+    idx = extract32(a->vm, a->size + 2, 2);
+    a->vm = extract32(a->vm, 0, a->size + 2);
+    rm_ofs = neon_full_reg_offset(a->vm);
+
+    fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
+    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
+                       vec_size, vec_size, idx, fn);
+    tcg_temp_free_ptr(fpstatus);
+    return true;
+}
+
+#define DO_VMUL_F_2sc(NAME, FUNC)                                       \
+    static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
+    {                                                                   \
+        static gen_helper_gvec_3_ptr * const opfn[] = {                 \
+            NULL,                                                       \
+            gen_helper_##FUNC##_h,                                      \
+            gen_helper_##FUNC##_s,                                      \
+            NULL,                                                       \
+        };                                                              \
+        if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
+            return false;                                               \
+        }                                                               \
+        return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
+    }
+
+DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
+DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
+DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
+
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
+
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_VQDMULH_16,
+        gen_VQDMULH_32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpFn * const opfn[] = {
+        NULL,
+        gen_VQRDMULH_16,
+        gen_VQRDMULH_32,
+        NULL,
+    };
+
+    return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
+                            NeonGenThreeOpEnvFn *opfn)
+{
+    /*
+     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
+     * performs a kind of fused op-then-accumulate using a helper
+     * function that takes all of rd, rn and the scalar at once.
+     */
+    TCGv_i32 scalar, rn, rd;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_rdm, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->q && ((a->vd | a->vn) & 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(a->size, a->vm);
+    rn = tcg_temp_new_i32();
+    rd = tcg_temp_new_i32();
+
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        read_neon_element32(rn, a->vn, pass, MO_32);
+        read_neon_element32(rd, a->vd, pass, MO_32);
+        opfn(rd, cpu_env, rn, scalar, rd);
+        write_neon_element32(rd, a->vd, pass, MO_32);
+    }
+    tcg_temp_free_i32(rn);
+    tcg_temp_free_i32(rd);
+    tcg_temp_free_i32(scalar);
+
+    return true;
+}
+
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenThreeOpEnvFn *opfn[] = {
+        NULL,
+        gen_helper_neon_qrdmlah_s16,
+        gen_helper_neon_qrdmlah_s32,
+        NULL,
+    };
+    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenThreeOpEnvFn *opfn[] = {
+        NULL,
+        gen_helper_neon_qrdmlsh_s16,
+        gen_helper_neon_qrdmlsh_s32,
+        NULL,
+    };
+    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
+                            NeonGenTwoOpWidenFn *opfn,
+                            NeonGenTwo64OpFn *accfn)
+{
+    /*
+     * Two registers and a scalar, long operations: perform an
+     * operation on the input elements and the scalar which produces
+     * a double-width result, and then possibly perform an accumulation
+     * operation of that result into the destination.
+     */
+    TCGv_i32 scalar, rn;
+    TCGv_i64 rn0_64, rn1_64;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!opfn) {
+        /* Bad size (including size == 3, which is a different insn group) */
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    scalar = neon_get_scalar(a->size, a->vm);
+
+    /* Load all inputs before writing any outputs, in case of overlap */
+    rn = tcg_temp_new_i32();
+    read_neon_element32(rn, a->vn, 0, MO_32);
+    rn0_64 = tcg_temp_new_i64();
+    opfn(rn0_64, rn, scalar);
+
+    read_neon_element32(rn, a->vn, 1, MO_32);
+    rn1_64 = tcg_temp_new_i64();
+    opfn(rn1_64, rn, scalar);
+    tcg_temp_free_i32(rn);
+    tcg_temp_free_i32(scalar);
+
+    if (accfn) {
+        TCGv_i64 t64 = tcg_temp_new_i64();
+        read_neon_element64(t64, a->vd, 0, MO_64);
+        accfn(rn0_64, t64, rn0_64);
+        read_neon_element64(t64, a->vd, 1, MO_64);
+        accfn(rn1_64, t64, rn1_64);
+        tcg_temp_free_i64(t64);
+    }
+
+    write_neon_element64(rn0_64, a->vd, 0, MO_64);
+    write_neon_element64(rn1_64, a->vd, 1, MO_64);
+    tcg_temp_free_i64(rn0_64);
+    tcg_temp_free_i64(rn1_64);
+    return true;
+}
+
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mull_s16,
+        gen_mull_s32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_helper_neon_mull_u16,
+        gen_mull_u32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
+    static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
+    {                                                                   \
+        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
+            NULL,                                                       \
+            gen_helper_neon_##MULL##16,                                 \
+            gen_##MULL##32,                                             \
+            NULL,                                                       \
+        };                                                              \
+        static NeonGenTwo64OpFn * const accfn[] = {                     \
+            NULL,                                                       \
+            gen_helper_neon_##ACC##l_u32,                               \
+            tcg_gen_##ACC##_i64,                                        \
+            NULL,                                                       \
+        };                                                              \
+        return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
+    }
+
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
+
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLAL_acc_16,
+        gen_VQDMLAL_acc_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
+{
+    static NeonGenTwoOpWidenFn * const opfn[] = {
+        NULL,
+        gen_VQDMULL_16,
+        gen_VQDMULL_32,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        NULL,
+        gen_VQDMLSL_acc_16,
+        gen_VQDMLSL_acc_32,
+        NULL,
+    };
+
+    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (a->imm > 7 && !a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (!a->q) {
+        /* Extract 64 bits from <Vm:Vn> */
+        TCGv_i64 left, right, dest;
+
+        left = tcg_temp_new_i64();
+        right = tcg_temp_new_i64();
+        dest = tcg_temp_new_i64();
+
+        read_neon_element64(right, a->vn, 0, MO_64);
+        read_neon_element64(left, a->vm, 0, MO_64);
+        tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
+        write_neon_element64(dest, a->vd, 0, MO_64);
+
+        tcg_temp_free_i64(left);
+        tcg_temp_free_i64(right);
+        tcg_temp_free_i64(dest);
+    } else {
+        /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+        TCGv_i64 left, middle, right, destleft, destright;
+
+        left = tcg_temp_new_i64();
+        middle = tcg_temp_new_i64();
+        right = tcg_temp_new_i64();
+        destleft = tcg_temp_new_i64();
+        destright = tcg_temp_new_i64();
+
+        if (a->imm < 8) {
+            read_neon_element64(right, a->vn, 0, MO_64);
+            read_neon_element64(middle, a->vn, 1, MO_64);
+            tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
+            read_neon_element64(left, a->vm, 0, MO_64);
+            tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
+        } else {
+            read_neon_element64(right, a->vn, 1, MO_64);
+            read_neon_element64(middle, a->vm, 0, MO_64);
+            tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
+            read_neon_element64(left, a->vm, 1, MO_64);
+            tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
+        }
+
+        write_neon_element64(destright, a->vd, 0, MO_64);
+        write_neon_element64(destleft, a->vd, 1, MO_64);
+
+        tcg_temp_free_i64(destright);
+        tcg_temp_free_i64(destleft);
+        tcg_temp_free_i64(right);
+        tcg_temp_free_i64(middle);
+        tcg_temp_free_i64(left);
+    }
+    return true;
+}
+
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
+{
+    TCGv_i64 val, def;
+    TCGv_i32 desc;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn + a->len + 1) > 32) {
+        /*
+         * This is UNPREDICTABLE; we choose to UNDEF to avoid the
+         * helper function running off the end of the register file.
+         */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    desc = tcg_const_i32((a->vn << 2) | a->len);
+    def = tcg_temp_new_i64();
+    if (a->op) {
+        read_neon_element64(def, a->vd, 0, MO_64);
+    } else {
+        tcg_gen_movi_i64(def, 0);
+    }
+    val = tcg_temp_new_i64();
+    read_neon_element64(val, a->vm, 0, MO_64);
+
+    gen_helper_neon_tbl(val, cpu_env, desc, val, def);
+    write_neon_element64(val, a->vd, 0, MO_64);
+
+    tcg_temp_free_i64(def);
+    tcg_temp_free_i64(val);
+    tcg_temp_free_i32(desc);
+    return true;
+}
+
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
+                         neon_element_offset(a->vm, a->index, a->size),
+                         a->q ? 16 : 8, a->q ? 16 : 8);
+    return true;
+}
+
+static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
+{
+    int pass, half;
+    TCGv_i32 tmp[2];
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp[0] = tcg_temp_new_i32();
+    tmp[1] = tcg_temp_new_i32();
+
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        for (half = 0; half < 2; half++) {
+            read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
+            switch (a->size) {
+            case 0:
+                tcg_gen_bswap32_i32(tmp[half], tmp[half]);
+                break;
+            case 1:
+                gen_swap_half(tmp[half], tmp[half]);
+                break;
+            case 2:
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        }
+        write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
+        write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
+    }
+
+    tcg_temp_free_i32(tmp[0]);
+    tcg_temp_free_i32(tmp[1]);
+    return true;
+}
+
+static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
+                              NeonGenWidenFn *widenfn,
+                              NeonGenTwo64OpFn *opfn,
+                              NeonGenTwo64OpFn *accfn)
+{
+    /*
+     * Pairwise long operations: widen both halves of the pair,
+     * combine the pairs with the opfn, and then possibly accumulate
+     * into the destination with the accfn.
+     */
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    for (pass = 0; pass < a->q + 1; pass++) {
+        TCGv_i32 tmp;
+        TCGv_i64 rm0_64, rm1_64, rd_64;
+
+        rm0_64 = tcg_temp_new_i64();
+        rm1_64 = tcg_temp_new_i64();
+        rd_64 = tcg_temp_new_i64();
+
+        tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vm, pass * 2, MO_32);
+        widenfn(rm0_64, tmp);
+        read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
+        widenfn(rm1_64, tmp);
+        tcg_temp_free_i32(tmp);
+
+        opfn(rd_64, rm0_64, rm1_64);
+        tcg_temp_free_i64(rm0_64);
+        tcg_temp_free_i64(rm1_64);
+
+        if (accfn) {
+            TCGv_i64 tmp64 = tcg_temp_new_i64();
+            read_neon_element64(tmp64, a->vd, pass, MO_64);
+            accfn(rd_64, tmp64, rd_64);
+            tcg_temp_free_i64(tmp64);
+        }
+        write_neon_element64(rd_64, a->vd, pass, MO_64);
+        tcg_temp_free_i64(rd_64);
+    }
+    return true;
+}
+
+static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenWidenFn * const widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const opfn[] = {
+        gen_helper_neon_paddl_u16,
+        gen_helper_neon_paddl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+    static NeonGenTwo64OpFn * const accfn[] = {
+        gen_helper_neon_addl_u16,
+        gen_helper_neon_addl_u32,
+        tcg_gen_add_i64,
+        NULL,
+    };
+
+    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+                             accfn[a->size]);
+}
+
+typedef void ZipFn(TCGv_ptr, TCGv_ptr);
+
+static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
+                       ZipFn *fn)
+{
+    TCGv_ptr pd, pm;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!fn) {
+        /* Bad size or size/q combination */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    pd = vfp_reg_ptr(true, a->vd);
+    pm = vfp_reg_ptr(true, a->vm);
+    fn(pd, pm);
+    tcg_temp_free_ptr(pd);
+    tcg_temp_free_ptr(pm);
+    return true;
+}
+
+static bool trans_VUZP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_unzip8,
+            gen_helper_neon_unzip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qunzip8,
+            gen_helper_neon_qunzip16,
+            gen_helper_neon_qunzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool trans_VZIP(DisasContext *s, arg_2misc *a)
+{
+    static ZipFn * const fn[2][4] = {
+        {
+            gen_helper_neon_zip8,
+            gen_helper_neon_zip16,
+            NULL,
+            NULL,
+        }, {
+            gen_helper_neon_qzip8,
+            gen_helper_neon_qzip16,
+            gen_helper_neon_qzip32,
+            NULL,
+        }
+    };
+    return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool do_vmovn(DisasContext *s, arg_2misc *a,
+                     NeonGenNarrowEnvFn *narrowfn)
+{
+    TCGv_i64 rm;
+    TCGv_i32 rd0, rd1;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vm & 1) {
+        return false;
+    }
+
+    if (!narrowfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64();
+    rd0 = tcg_temp_new_i32();
+    rd1 = tcg_temp_new_i32();
+
+    read_neon_element64(rm, a->vm, 0, MO_64);
+    narrowfn(rd0, cpu_env, rm);
+    read_neon_element64(rm, a->vm, 1, MO_64);
+    narrowfn(rd1, cpu_env, rm);
+    write_neon_element32(rd0, a->vd, 0, MO_32);
+    write_neon_element32(rd1, a->vd, 1, MO_32);
+    tcg_temp_free_i32(rd0);
+    tcg_temp_free_i32(rd1);
+    tcg_temp_free_i64(rm);
+    return true;
+}
+
+#define DO_VMOVN(INSN, FUNC)                                    \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
+            FUNC##8,                                            \
+            FUNC##16,                                           \
+            FUNC##32,                                           \
+            NULL,                                               \
+        };                                                      \
+        return do_vmovn(s, a, narrowfn[a->size]);               \
+    }
+
+DO_VMOVN(VMOVN, gen_neon_narrow_u)
+DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
+DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
+DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
+
+static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i32 rm0, rm1;
+    TCGv_i64 rd;
+    static NeonGenWidenFn * const widenfns[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+        NULL,
+    };
+    NeonGenWidenFn *widenfn = widenfns[a->size];
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!widenfn) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rd = tcg_temp_new_i64();
+    rm0 = tcg_temp_new_i32();
+    rm1 = tcg_temp_new_i32();
+
+    read_neon_element32(rm0, a->vm, 0, MO_32);
+    read_neon_element32(rm1, a->vm, 1, MO_32);
+
+    widenfn(rd, rm0);
+    tcg_gen_shli_i64(rd, rd, 8 << a->size);
+    write_neon_element64(rd, a->vd, 0, MO_64);
+    widenfn(rd, rm1);
+    tcg_gen_shli_i64(rd, rd, 8 << a->size);
+    write_neon_element64(rd, a->vd, 1, MO_64);
+
+    tcg_temp_free_i64(rd);
+    tcg_temp_free_i32(rm0);
+    tcg_temp_free_i32(rm1);
+    return true;
+}
+
+static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i64 tmp;
+    TCGv_i32 dst0, dst1;
+
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_STD);
+    tmp = tcg_temp_new_i64();
+    dst0 = tcg_temp_new_i32();
+    dst1 = tcg_temp_new_i32();
+
+    read_neon_element64(tmp, a->vm, 0, MO_64);
+    gen_helper_bfcvt_pair(dst0, tmp, fpst);
+
+    read_neon_element64(tmp, a->vm, 1, MO_64);
+    gen_helper_bfcvt_pair(dst1, tmp, fpst);
+
+    write_neon_element32(dst0, a->vd, 0, MO_32);
+    write_neon_element32(dst1, a->vd, 1, MO_32);
+
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i32(dst0);
+    tcg_temp_free_i32(dst1);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vm & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_STD);
+    ahp = get_ahp_flag();
+    tmp = tcg_temp_new_i32();
+    read_neon_element32(tmp, a->vm, 0, MO_32);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+    tmp2 = tcg_temp_new_i32();
+    read_neon_element32(tmp2, a->vm, 1, MO_32);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
+    tcg_gen_shli_i32(tmp2, tmp2, 16);
+    tcg_gen_or_i32(tmp2, tmp2, tmp);
+    read_neon_element32(tmp, a->vm, 2, MO_32);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+    tmp3 = tcg_temp_new_i32();
+    read_neon_element32(tmp3, a->vm, 3, MO_32);
+    write_neon_element32(tmp2, a->vd, 0, MO_32);
+    tcg_temp_free_i32(tmp2);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
+    tcg_gen_shli_i32(tmp3, tmp3, 16);
+    tcg_gen_or_i32(tmp3, tmp3, tmp);
+    write_neon_element32(tmp3, a->vd, 1, MO_32);
+    tcg_temp_free_i32(tmp3);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(ahp);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+        !dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd & 1) || (a->size != 1)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_STD);
+    ahp = get_ahp_flag();
+    tmp3 = tcg_temp_new_i32();
+    tmp2 = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+    read_neon_element32(tmp, a->vm, 0, MO_32);
+    read_neon_element32(tmp2, a->vm, 1, MO_32);
+    tcg_gen_ext16u_i32(tmp3, tmp);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+    write_neon_element32(tmp3, a->vd, 0, MO_32);
+    tcg_gen_shri_i32(tmp, tmp, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
+    write_neon_element32(tmp, a->vd, 1, MO_32);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_ext16u_i32(tmp3, tmp2);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+    write_neon_element32(tmp3, a->vd, 2, MO_32);
+    tcg_temp_free_i32(tmp3);
+    tcg_gen_shri_i32(tmp2, tmp2, 16);
+    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
+    write_neon_element32(tmp2, a->vd, 3, MO_32);
+    tcg_temp_free_i32(tmp2);
+    tcg_temp_free_i32(ahp);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
+{
+    int vec_size = a->q ? 16 : 8;
+    int rd_ofs = neon_full_reg_offset(a->vd);
+    int rm_ofs = neon_full_reg_offset(a->vm);
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
+
+    return true;
+}
+
+#define DO_2MISC_VEC(INSN, FN)                                  \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        return do_2misc_vec(s, a, FN);                          \
+    }
+
+DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
+DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
+DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
+DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
+DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
+DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
+DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
+
+static bool trans_VMVN(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc_vec(s, a, tcg_gen_gvec_not);
+}
+
+#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
+                           DATA, FUNC);                                 \
+    }
+
+#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
+    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
+                         uint32_t rm_ofs, uint32_t oprsz,               \
+                         uint32_t maxsz)                                \
+    {                                                                   \
+        tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
+    }
+
+WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
+WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
+WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
+WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
+WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
+WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
+WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
+
+#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
+    {                                                           \
+        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
+            return false;                                       \
+        }                                                       \
+        return do_2misc_vec(s, a, gen_##INSN);                  \
+    }
+
+DO_2M_CRYPTO(AESE, aa32_aes, 0)
+DO_2M_CRYPTO(AESD, aa32_aes, 0)
+DO_2M_CRYPTO(AESMC, aa32_aes, 0)
+DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
+DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
+
+static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
+{
+    TCGv_i32 tmp;
+    int pass;
+
+    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!fn) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        read_neon_element32(tmp, a->vm, pass, MO_32);
+        fn(tmp, tmp);
+        write_neon_element32(tmp, a->vd, pass, MO_32);
+    }
+    tcg_temp_free_i32(tmp);
+
+    return true;
+}
+
+static bool trans_VREV32(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        tcg_gen_bswap32_i32,
+        gen_swap_half,
+        NULL,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VREV16(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_rev16);
+}
+
+static bool trans_VCLS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_cls_s8,
+        gen_helper_neon_cls_s16,
+        gen_helper_neon_cls_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
+{
+    tcg_gen_clzi_i32(rd, rm, 32);
+}
+
+static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_helper_neon_clz_u8,
+        gen_helper_neon_clz_u16,
+        do_VCLZ_32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VCNT(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 0) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_neon_cnt_u8);
+}
+
+static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
+                      vece == MO_16 ? 0x7fff : 0x7fffffff,
+                      oprsz, maxsz);
+}
+
+static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+    } else if (a->size != MO_32) {
+        return false;
+    }
+    return do_2misc_vec(s, a, gen_VABS_F);
+}
+
+static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+                       uint32_t oprsz, uint32_t maxsz)
+{
+    tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
+                      vece == MO_16 ? 0x8000 : 0x80000000,
+                      oprsz, maxsz);
+}
+
+static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
+{
+    if (a->size == MO_16) {
+        if (!dc_isar_feature(aa32_fp16_arith, s)) {
+            return false;
+        }
+    } else if (a->size != MO_32) {
+        return false;
+    }
+    return do_2misc_vec(s, a, gen_VNEG_F);
+}
+
+static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_recpe_u32);
+}
+
+static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
+{
+    if (a->size != 2) {
+        return false;
+    }
+    return do_2misc(s, a, gen_helper_rsqrte_u32);
+}
+
+#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
+    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
+    {                                                   \
+        FUNC(d, cpu_env, m);                            \
+    }
+
+WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
+WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
+WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
+WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
+WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
+WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
+
+static bool trans_VQABS(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQABS_s8,
+        gen_VQABS_s16,
+        gen_VQABS_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
+{
+    static NeonGenOneOpFn * const fn[] = {
+        gen_VQNEG_s8,
+        gen_VQNEG_s16,
+        gen_VQNEG_s32,
+        NULL,
+    };
+    return do_2misc(s, a, fn[a->size]);
+}
+
+#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
+    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
+                           uint32_t rm_ofs,                             \
+                           uint32_t oprsz, uint32_t maxsz)              \
+    {                                                                   \
+        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
+            NULL, HFUNC, SFUNC, NULL,                                   \
+        };                                                              \
+        TCGv_ptr fpst;                                                  \
+        fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
+        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
+                           fns[vece]);                                  \
+        tcg_temp_free_ptr(fpst);                                        \
+    }                                                                   \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
+    {                                                                   \
+        if (a->size == MO_16) {                                         \
+            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
+                return false;                                           \
+            }                                                           \
+        } else if (a->size != MO_32) {                                  \
+            return false;                                               \
+        }                                                               \
+        return do_2misc_vec(s, a, gen_##INSN);                          \
+    }
+
+DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
+DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
+DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
+DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
+DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
+DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
+DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
+DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
+DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
+DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
+DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
+
+DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
+
+static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+        return false;
+    }
+    return trans_VRINTX_impl(s, a);
+}
+
+#define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
+    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
+                           uint32_t rm_ofs,                             \
+                           uint32_t oprsz, uint32_t maxsz)              \
+    {                                                                   \
+        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
+            NULL,                                                       \
+            gen_helper_gvec_##OP##h,                                    \
+            gen_helper_gvec_##OP##s,                                    \
+            NULL,                                                       \
+        };                                                              \
+        TCGv_ptr fpst;                                                  \
+        fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
+        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
+                           arm_rmode_to_sf(RMODE), fns[vece]);          \
+        tcg_temp_free_ptr(fpst);                                        \
+    }                                                                   \
+    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
+    {                                                                   \
+        if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
+            return false;                                               \
+        }                                                               \
+        if (a->size == MO_16) {                                         \
+            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
+                return false;                                           \
+            }                                                           \
+        } else if (a->size != MO_32) {                                  \
+            return false;                                               \
+        }                                                               \
+        return do_2misc_vec(s, a, gen_##INSN);                          \
+    }
+
+DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
+DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
+DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
+DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
+DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
+DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
+
+DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
+DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
+DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
+DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
+DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
+
+static bool trans_VSWP(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i64 rm, rd;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->size != 0) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    rm = tcg_temp_new_i64();
+    rd = tcg_temp_new_i64();
+    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+        read_neon_element64(rm, a->vm, pass, MO_64);
+        read_neon_element64(rd, a->vd, pass, MO_64);
+        write_neon_element64(rm, a->vd, pass, MO_64);
+        write_neon_element64(rd, a->vm, pass, MO_64);
+    }
+    tcg_temp_free_i64(rm);
+    tcg_temp_free_i64(rd);
+
+    return true;
+}
+static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(rd, t0, 8);
+    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+    tcg_gen_or_i32(rd, rd, tmp);
+
+    tcg_gen_shri_i32(t1, t1, 8);
+    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+    tcg_gen_or_i32(t1, t1, tmp);
+    tcg_gen_mov_i32(t0, rd);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
+}
+
+static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
+{
+    TCGv_i32 rd, tmp;
+
+    rd = tcg_temp_new_i32();
+    tmp = tcg_temp_new_i32();
+
+    tcg_gen_shli_i32(rd, t0, 16);
+    tcg_gen_andi_i32(tmp, t1, 0xffff);
+    tcg_gen_or_i32(rd, rd, tmp);
+    tcg_gen_shri_i32(t1, t1, 16);
+    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+    tcg_gen_or_i32(t1, t1, tmp);
+    tcg_gen_mov_i32(t0, rd);
+
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(rd);
+}
+
+static bool trans_VTRN(DisasContext *s, arg_2misc *a)
+{
+    TCGv_i32 tmp, tmp2;
+    int pass;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vd | a->vm) & a->q) {
+        return false;
+    }
+
+    if (a->size == 3) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    tmp2 = tcg_temp_new_i32();
+    if (a->size == MO_32) {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
+            read_neon_element32(tmp, a->vm, pass, MO_32);
+            read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
+            write_neon_element32(tmp2, a->vm, pass, MO_32);
+            write_neon_element32(tmp, a->vd, pass + 1, MO_32);
+        }
+    } else {
+        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+            read_neon_element32(tmp, a->vm, pass, MO_32);
+            read_neon_element32(tmp2, a->vd, pass, MO_32);
+            if (a->size == MO_8) {
+                gen_neon_trn_u8(tmp, tmp2);
+            } else {
+                gen_neon_trn_u16(tmp, tmp2);
+            }
+            write_neon_element32(tmp2, a->vm, pass, MO_32);
+            write_neon_element32(tmp, a->vd, pass, MO_32);
+        }
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(tmp2);
+    return true;
+}
+
+static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_smmla_b);
+}
+
+static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_ummla_b);
+}
+
+static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
+{
+    if (!dc_isar_feature(aa32_i8mm, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_usmmla_b);
+}
+
+static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+                        gen_helper_gvec_bfmmla);
+}
+
+static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
+                             gen_helper_gvec_bfmlal);
+}
+
+static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
+{
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+    return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
+                             (a->index << 1) | a->q, FPST_STD,
+                             gen_helper_gvec_bfmlal_idx);
+}
diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc
deleted file mode 100644
index f6c68e30ab2..00000000000
--- a/target/arm/translate-neon.c.inc
+++ /dev/null
@@ -1,3942 +0,0 @@
-/*
- *  ARM translation: AArch32 Neon instructions
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *  Copyright (c) 2005-2007 CodeSourcery
- *  Copyright (c) 2007 OpenedHand, Ltd.
- *  Copyright (c) 2020 Linaro, Ltd.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * This file is intended to be included from translate.c; it uses
- * some macros and definitions provided by that file.
- * It might be possible to convert it to a standalone .c file eventually.
- */
-
-static inline int plus1(DisasContext *s, int x)
-{
-    return x + 1;
-}
-
-static inline int rsub_64(DisasContext *s, int x)
-{
-    return 64 - x;
-}
-
-static inline int rsub_32(DisasContext *s, int x)
-{
-    return 32 - x;
-}
-static inline int rsub_16(DisasContext *s, int x)
-{
-    return 16 - x;
-}
-static inline int rsub_8(DisasContext *s, int x)
-{
-    return 8 - x;
-}
-
-static inline int neon_3same_fp_size(DisasContext *s, int x)
-{
-    /* Convert 0==fp32, 1==fp16 into a MO_* value */
-    return MO_32 - x;
-}
-
-/* Include the generated Neon decoder */
-#include "decode-neon-dp.c.inc"
-#include "decode-neon-ls.c.inc"
-#include "decode-neon-shared.c.inc"
-
-static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i32(var, cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i32(var, cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld_i32(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
-{
-    long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
-
-    switch (mop) {
-    case MO_UB:
-        tcg_gen_ld8u_i64(var, cpu_env, offset);
-        break;
-    case MO_UW:
-        tcg_gen_ld16u_i64(var, cpu_env, offset);
-        break;
-    case MO_UL:
-        tcg_gen_ld32u_i64(var, cpu_env, offset);
-        break;
-    case MO_Q:
-        tcg_gen_ld_i64(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i32(var, cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i32(var, cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st_i32(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
-{
-    long offset = neon_element_offset(reg, ele, size);
-
-    switch (size) {
-    case MO_8:
-        tcg_gen_st8_i64(var, cpu_env, offset);
-        break;
-    case MO_16:
-        tcg_gen_st16_i64(var, cpu_env, offset);
-        break;
-    case MO_32:
-        tcg_gen_st32_i64(var, cpu_env, offset);
-        break;
-    case MO_64:
-        tcg_gen_st_i64(var, cpu_env, offset);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
-{
-    int opr_sz;
-    TCGv_ptr fpst;
-    gen_helper_gvec_3_ptr *fn_gvec_ptr;
-
-    if (!dc_isar_feature(aa32_vcma, s)
-        || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    opr_sz = (1 + a->q) * 8;
-    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
-    fn_gvec_ptr = (a->size == MO_16) ?
-        gen_helper_gvec_fcmlah : gen_helper_gvec_fcmlas;
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->vm),
-                       fpst, opr_sz, opr_sz, a->rot,
-                       fn_gvec_ptr);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
-{
-    int opr_sz;
-    TCGv_ptr fpst;
-    gen_helper_gvec_3_ptr *fn_gvec_ptr;
-
-    if (!dc_isar_feature(aa32_vcma, s)
-        || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    opr_sz = (1 + a->q) * 8;
-    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
-    fn_gvec_ptr = (a->size == MO_16) ?
-        gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->vm),
-                       fpst, opr_sz, opr_sz, a->rot,
-                       fn_gvec_ptr);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
-{
-    int opr_sz;
-    gen_helper_gvec_3 *fn_gvec;
-
-    if (!dc_isar_feature(aa32_dp, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    opr_sz = (1 + a->q) * 8;
-    fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
-    tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->vm),
-                       opr_sz, opr_sz, 0, fn_gvec);
-    return true;
-}
-
-static bool trans_VFML(DisasContext *s, arg_VFML *a)
-{
-    int opr_sz;
-
-    if (!dc_isar_feature(aa32_fhm, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    opr_sz = (1 + a->q) * 8;
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(a->q, a->vn),
-                       vfp_reg_offset(a->q, a->vm),
-                       cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
-                       gen_helper_gvec_fmlal_a32);
-    return true;
-}
-
-static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
-{
-    gen_helper_gvec_3_ptr *fn_gvec_ptr;
-    int opr_sz;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_vcma, s)) {
-        return false;
-    }
-    if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vn) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fn_gvec_ptr = (a->size == MO_16) ?
-        gen_helper_gvec_fcmlah_idx : gen_helper_gvec_fcmlas_idx;
-    opr_sz = (1 + a->q) * 8;
-    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->vm),
-                       fpst, opr_sz, opr_sz,
-                       (a->index << 2) | a->rot, fn_gvec_ptr);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
-{
-    gen_helper_gvec_3 *fn_gvec;
-    int opr_sz;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_dp, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vn) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
-    opr_sz = (1 + a->q) * 8;
-    fpst = fpstatus_ptr(FPST_STD);
-    tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->rm),
-                       opr_sz, opr_sz, a->index, fn_gvec);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
-{
-    int opr_sz;
-
-    if (!dc_isar_feature(aa32_fhm, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
-        return false;
-    }
-
-    if (a->vd & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    opr_sz = (1 + a->q) * 8;
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(a->q, a->vn),
-                       vfp_reg_offset(a->q, a->rm),
-                       cpu_env, opr_sz, opr_sz,
-                       (a->index << 2) | a->s, /* is_2 == 0 */
-                       gen_helper_gvec_fmlal_idx_a32);
-    return true;
-}
-
-static struct {
-    int nregs;
-    int interleave;
-    int spacing;
-} const neon_ls_element_type[11] = {
-    {1, 4, 1},
-    {1, 4, 2},
-    {4, 1, 1},
-    {2, 2, 2},
-    {1, 3, 1},
-    {1, 3, 2},
-    {3, 1, 1},
-    {1, 1, 1},
-    {1, 2, 1},
-    {1, 2, 2},
-    {2, 1, 1}
-};
-
-static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
-                                      int stride)
-{
-    if (rm != 15) {
-        TCGv_i32 base;
-
-        base = load_reg(s, rn);
-        if (rm == 13) {
-            tcg_gen_addi_i32(base, base, stride);
-        } else {
-            TCGv_i32 index;
-            index = load_reg(s, rm);
-            tcg_gen_add_i32(base, base, index);
-            tcg_temp_free_i32(index);
-        }
-        store_reg(s, rn, base);
-    }
-}
-
-static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
-{
-    /* Neon load/store multiple structures */
-    int nregs, interleave, spacing, reg, n;
-    MemOp endian = s->be_data;
-    int mmu_idx = get_mem_index(s);
-    int size = a->size;
-    TCGv_i64 tmp64;
-    TCGv_i32 addr, tmp;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-    if (a->itype > 10) {
-        return false;
-    }
-    /* Catch UNDEF cases for bad values of align field */
-    switch (a->itype & 0xc) {
-    case 4:
-        if (a->align >= 2) {
-            return false;
-        }
-        break;
-    case 8:
-        if (a->align == 3) {
-            return false;
-        }
-        break;
-    default:
-        break;
-    }
-    nregs = neon_ls_element_type[a->itype].nregs;
-    interleave = neon_ls_element_type[a->itype].interleave;
-    spacing = neon_ls_element_type[a->itype].spacing;
-    if (size == 3 && (interleave | spacing) != 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* For our purposes, bytes are always little-endian.  */
-    if (size == 0) {
-        endian = MO_LE;
-    }
-    /*
-     * Consecutive little-endian elements from a single register
-     * can be promoted to a larger little-endian operation.
-     */
-    if (interleave == 1 && endian == MO_LE) {
-        size = 3;
-    }
-    tmp64 = tcg_temp_new_i64();
-    addr = tcg_temp_new_i32();
-    tmp = tcg_const_i32(1 << size);
-    load_reg_var(s, addr, a->rn);
-    for (reg = 0; reg < nregs; reg++) {
-        for (n = 0; n < 8 >> size; n++) {
-            int xs;
-            for (xs = 0; xs < interleave; xs++) {
-                int tt = a->vd + reg + spacing * xs;
-
-                if (a->l) {
-                    gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
-                    neon_store_element64(tt, n, size, tmp64);
-                } else {
-                    neon_load_element64(tmp64, tt, n, size);
-                    gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
-                }
-                tcg_gen_add_i32(addr, addr, tmp);
-            }
-        }
-    }
-    tcg_temp_free_i32(addr);
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i64(tmp64);
-
-    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
-    return true;
-}
-
-static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
-{
-    /* Neon load single structure to all lanes */
-    int reg, stride, vec_size;
-    int vd = a->vd;
-    int size = a->size;
-    int nregs = a->n + 1;
-    TCGv_i32 addr, tmp;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (size == 3) {
-        if (nregs != 4 || a->a == 0) {
-            return false;
-        }
-        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
-        size = 2;
-    }
-    if (nregs == 1 && a->a == 1 && size == 0) {
-        return false;
-    }
-    if (nregs == 3 && a->a == 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * VLD1 to all lanes: T bit indicates how many Dregs to write.
-     * VLD2/3/4 to all lanes: T bit indicates register stride.
-     */
-    stride = a->t ? 2 : 1;
-    vec_size = nregs == 1 ? stride * 8 : 8;
-
-    tmp = tcg_temp_new_i32();
-    addr = tcg_temp_new_i32();
-    load_reg_var(s, addr, a->rn);
-    for (reg = 0; reg < nregs; reg++) {
-        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
-                        s->be_data | size);
-        if ((vd & 1) && vec_size == 16) {
-            /*
-             * We cannot write 16 bytes at once because the
-             * destination is unaligned.
-             */
-            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
-                                 8, 8, tmp);
-            tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
-                             neon_full_reg_offset(vd), 8, 8);
-        } else {
-            tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
-                                 vec_size, vec_size, tmp);
-        }
-        tcg_gen_addi_i32(addr, addr, 1 << size);
-        vd += stride;
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(addr);
-
-    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
-
-    return true;
-}
-
-static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
-{
-    /* Neon load/store single structure to one lane */
-    int reg;
-    int nregs = a->n + 1;
-    int vd = a->vd;
-    TCGv_i32 addr, tmp;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
-    switch (nregs) {
-    case 1:
-        if (((a->align & (1 << a->size)) != 0) ||
-            (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
-            return false;
-        }
-        break;
-    case 3:
-        if ((a->align & 1) != 0) {
-            return false;
-        }
-        /* fall through */
-    case 2:
-        if (a->size == 2 && (a->align & 2) != 0) {
-            return false;
-        }
-        break;
-    case 4:
-        if ((a->size == 2) && ((a->align & 3) == 3)) {
-            return false;
-        }
-        break;
-    default:
-        abort();
-    }
-    if ((vd + a->stride * (nregs - 1)) > 31) {
-        /*
-         * Attempts to write off the end of the register file are
-         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
-         * access off the end of the array that holds the register data.
-         */
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    addr = tcg_temp_new_i32();
-    load_reg_var(s, addr, a->rn);
-    /*
-     * TODO: if we implemented alignment exceptions, we should check
-     * addr against the alignment encoded in a->align here.
-     */
-    for (reg = 0; reg < nregs; reg++) {
-        if (a->l) {
-            gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
-                            s->be_data | a->size);
-            neon_store_element(vd, a->reg_idx, a->size, tmp);
-        } else { /* Store */
-            neon_load_element(tmp, vd, a->reg_idx, a->size);
-            gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
-                            s->be_data | a->size);
-        }
-        vd += a->stride;
-        tcg_gen_addi_i32(addr, addr, 1 << a->size);
-    }
-    tcg_temp_free_i32(addr);
-    tcg_temp_free_i32(tmp);
-
-    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
-
-    return true;
-}
-
-static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
-{
-    int vec_size = a->q ? 16 : 8;
-    int rd_ofs = neon_full_reg_offset(a->vd);
-    int rn_ofs = neon_full_reg_offset(a->vn);
-    int rm_ofs = neon_full_reg_offset(a->vm);
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
-    return true;
-}
-
-#define DO_3SAME(INSN, FUNC)                                            \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        return do_3same(s, a, FUNC);                                    \
-    }
-
-DO_3SAME(VADD, tcg_gen_gvec_add)
-DO_3SAME(VSUB, tcg_gen_gvec_sub)
-DO_3SAME(VAND, tcg_gen_gvec_and)
-DO_3SAME(VBIC, tcg_gen_gvec_andc)
-DO_3SAME(VORR, tcg_gen_gvec_or)
-DO_3SAME(VORN, tcg_gen_gvec_orc)
-DO_3SAME(VEOR, tcg_gen_gvec_xor)
-DO_3SAME(VSHL_S, gen_gvec_sshl)
-DO_3SAME(VSHL_U, gen_gvec_ushl)
-DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
-DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
-DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
-DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
-
-/* These insns are all gvec_bitsel but with the inputs in various orders. */
-#define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
-    }                                                                   \
-    DO_3SAME(INSN, gen_##INSN##_3s)
-
-DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
-DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
-DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
-
-#define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (a->size == 3) {                                             \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, FUNC);                                    \
-    }
-
-DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
-DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
-DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
-DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
-DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
-DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
-DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
-DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
-DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
-DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
-DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
-DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
-
-#define DO_3SAME_CMP(INSN, COND)                                        \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
-    }                                                                   \
-    DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
-
-DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
-DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
-DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
-DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
-DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
-
-#define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
-    static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
-                         uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
-    {                                                                      \
-        tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
-    }
-
-WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
-
-static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
-{
-    if (a->size != 0) {
-        return false;
-    }
-    return do_3same(s, a, gen_VMUL_p_3s);
-}
-
-#define DO_VQRDMLAH(INSN, FUNC)                                         \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (!dc_isar_feature(aa32_rdm, s)) {                            \
-            return false;                                               \
-        }                                                               \
-        if (a->size != 1 && a->size != 2) {                             \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, FUNC);                                    \
-    }
-
-DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
-DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
-
-#define DO_SHA1(NAME, FUNC)                                             \
-    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
-    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (!dc_isar_feature(aa32_sha1, s)) {                           \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, gen_##NAME##_3s);                         \
-    }
-
-DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
-DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
-DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
-DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
-
-#define DO_SHA2(NAME, FUNC)                                             \
-    WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
-    static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (!dc_isar_feature(aa32_sha2, s)) {                           \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, gen_##NAME##_3s);                         \
-    }
-
-DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
-DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
-DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
-
-#define DO_3SAME_64(INSN, FUNC)                                         \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        static const GVecGen3 op = { .fni8 = FUNC };                    \
-        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
-    }                                                                   \
-    DO_3SAME(INSN, gen_##INSN##_3s)
-
-#define DO_3SAME_64_ENV(INSN, FUNC)                                     \
-    static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
-    {                                                                   \
-        FUNC(d, cpu_env, n, m);                                         \
-    }                                                                   \
-    DO_3SAME_64(INSN, gen_##INSN##_elt)
-
-DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
-DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
-DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
-DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
-DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
-DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
-
-#define DO_3SAME_32(INSN, FUNC)                                         \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        static const GVecGen3 ops[4] = {                                \
-            { .fni4 = gen_helper_neon_##FUNC##8 },                      \
-            { .fni4 = gen_helper_neon_##FUNC##16 },                     \
-            { .fni4 = gen_helper_neon_##FUNC##32 },                     \
-            { 0 },                                                      \
-        };                                                              \
-        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
-    }                                                                   \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (a->size > 2) {                                              \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, gen_##INSN##_3s);                         \
-    }
-
-/*
- * Some helper functions need to be passed the cpu_env. In order
- * to use those with the gvec APIs like tcg_gen_gvec_3() we need
- * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
- * and which call a NeonGenTwoOpEnvFn().
- */
-#define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
-    static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
-    {                                                                   \
-        FUNC(d, cpu_env, n, m);                                         \
-    }
-
-#define DO_3SAME_32_ENV(INSN, FUNC)                                     \
-    WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
-    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
-    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        static const GVecGen3 ops[4] = {                                \
-            { .fni4 = gen_##INSN##_tramp8 },                            \
-            { .fni4 = gen_##INSN##_tramp16 },                           \
-            { .fni4 = gen_##INSN##_tramp32 },                           \
-            { 0 },                                                      \
-        };                                                              \
-        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
-    }                                                                   \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (a->size > 2) {                                              \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, gen_##INSN##_3s);                         \
-    }
-
-DO_3SAME_32(VHADD_S, hadd_s)
-DO_3SAME_32(VHADD_U, hadd_u)
-DO_3SAME_32(VHSUB_S, hsub_s)
-DO_3SAME_32(VHSUB_U, hsub_u)
-DO_3SAME_32(VRHADD_S, rhadd_s)
-DO_3SAME_32(VRHADD_U, rhadd_u)
-DO_3SAME_32(VRSHL_S, rshl_s)
-DO_3SAME_32(VRSHL_U, rshl_u)
-
-DO_3SAME_32_ENV(VQSHL_S, qshl_s)
-DO_3SAME_32_ENV(VQSHL_U, qshl_u)
-DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
-DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
-
-static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
-{
-    /* Operations handled pairwise 32 bits at a time */
-    TCGv_i32 tmp, tmp2, tmp3;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->size == 3) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    assert(a->q == 0); /* enforced by decode patterns */
-
-    /*
-     * Note that we have to be careful not to clobber the source operands
-     * in the "vm == vd" case by storing the result of the first pass too
-     * early. Since Q is 0 there are always just two passes, so instead
-     * of a complicated loop over each pass we just unroll.
-     */
-    tmp = tcg_temp_new_i32();
-    tmp2 = tcg_temp_new_i32();
-    tmp3 = tcg_temp_new_i32();
-
-    read_neon_element32(tmp, a->vn, 0, MO_32);
-    read_neon_element32(tmp2, a->vn, 1, MO_32);
-    fn(tmp, tmp, tmp2);
-
-    read_neon_element32(tmp3, a->vm, 0, MO_32);
-    read_neon_element32(tmp2, a->vm, 1, MO_32);
-    fn(tmp3, tmp3, tmp2);
-
-    write_neon_element32(tmp, a->vd, 0, MO_32);
-    write_neon_element32(tmp3, a->vd, 1, MO_32);
-
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(tmp2);
-    tcg_temp_free_i32(tmp3);
-    return true;
-}
-
-#define DO_3SAME_PAIR(INSN, func)                                       \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        static NeonGenTwoOpFn * const fns[] = {                         \
-            gen_helper_neon_##func##8,                                  \
-            gen_helper_neon_##func##16,                                 \
-            gen_helper_neon_##func##32,                                 \
-        };                                                              \
-        if (a->size > 2) {                                              \
-            return false;                                               \
-        }                                                               \
-        return do_3same_pair(s, a, fns[a->size]);                       \
-    }
-
-/* 32-bit pairwise ops end up the same as the elementwise versions.  */
-#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
-#define gen_helper_neon_padd_u32  tcg_gen_add_i32
-
-DO_3SAME_PAIR(VPMAX_S, pmax_s)
-DO_3SAME_PAIR(VPMIN_S, pmin_s)
-DO_3SAME_PAIR(VPMAX_U, pmax_u)
-DO_3SAME_PAIR(VPMIN_U, pmin_u)
-DO_3SAME_PAIR(VPADD, padd_u)
-
-#define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
-    WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
-    WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
-    static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
-                                uint32_t rn_ofs, uint32_t rm_ofs,       \
-                                uint32_t oprsz, uint32_t maxsz)         \
-    {                                                                   \
-        static const GVecGen3 ops[2] = {                                \
-            { .fni4 = gen_##INSN##_tramp16 },                           \
-            { .fni4 = gen_##INSN##_tramp32 },                           \
-        };                                                              \
-        tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
-    }                                                                   \
-    static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
-    {                                                                   \
-        if (a->size != 1 && a->size != 2) {                             \
-            return false;                                               \
-        }                                                               \
-        return do_3same(s, a, gen_##INSN##_3s);                         \
-    }
-
-DO_3SAME_VQDMULH(VQDMULH, qdmulh)
-DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
-
-#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
-    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
-                         uint32_t rn_ofs, uint32_t rm_ofs,              \
-                         uint32_t oprsz, uint32_t maxsz)                \
-    {                                                                   \
-        TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
-        tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
-                           oprsz, maxsz, 0, FUNC);                      \
-        tcg_temp_free_ptr(fpst);                                        \
-    }
-
-#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
-    WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
-    WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
-    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
-    {                                                                   \
-        if (a->size == MO_16) {                                         \
-            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
-                return false;                                           \
-            }                                                           \
-            return do_3same(s, a, gen_##INSN##_fp16_3s);                \
-        }                                                               \
-        return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
-    }
-
-
-DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
-DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
-DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
-DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
-DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
-DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
-DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
-DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
-DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
-DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
-DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
-DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
-DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
-DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
-DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
-DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
-DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
-
-WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
-WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
-WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
-WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
-
-static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
-        return false;
-    }
-
-    if (a->size == MO_16) {
-        if (!dc_isar_feature(aa32_fp16_arith, s)) {
-            return false;
-        }
-        return do_3same(s, a, gen_VMAXNM_fp16_3s);
-    }
-    return do_3same(s, a, gen_VMAXNM_fp32_3s);
-}
-
-static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
-        return false;
-    }
-
-    if (a->size == MO_16) {
-        if (!dc_isar_feature(aa32_fp16_arith, s)) {
-            return false;
-        }
-        return do_3same(s, a, gen_VMINNM_fp16_3s);
-    }
-    return do_3same(s, a, gen_VMINNM_fp32_3s);
-}
-
-static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
-                             gen_helper_gvec_3_ptr *fn)
-{
-    /* FP pairwise operations */
-    TCGv_ptr fpstatus;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    assert(a->q == 0); /* enforced by decode patterns */
-
-
-    fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
-    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
-                       vfp_reg_offset(1, a->vn),
-                       vfp_reg_offset(1, a->vm),
-                       fpstatus, 8, 8, 0, fn);
-    tcg_temp_free_ptr(fpstatus);
-
-    return true;
-}
-
-/*
- * For all the functions using this macro, size == 1 means fp16,
- * which is an architecture extension we don't implement yet.
- */
-#define DO_3S_FP_PAIR(INSN,FUNC)                                    \
-    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
-    {                                                               \
-        if (a->size == MO_16) {                                     \
-            if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
-                return false;                                       \
-            }                                                       \
-            return do_3same_fp_pair(s, a, FUNC##h);                 \
-        }                                                           \
-        return do_3same_fp_pair(s, a, FUNC##s);                     \
-    }
-
-DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
-DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
-DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
-
-static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
-{
-    /* Handle a 2-reg-shift insn which can be vectorized. */
-    int vec_size = a->q ? 16 : 8;
-    int rd_ofs = neon_full_reg_offset(a->vd);
-    int rm_ofs = neon_full_reg_offset(a->vm);
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
-    return true;
-}
-
-#define DO_2SH(INSN, FUNC)                                              \
-    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
-    {                                                                   \
-        return do_vector_2sh(s, a, FUNC);                               \
-    }                                                                   \
-
-DO_2SH(VSHL, tcg_gen_gvec_shli)
-DO_2SH(VSLI, gen_gvec_sli)
-DO_2SH(VSRI, gen_gvec_sri)
-DO_2SH(VSRA_S, gen_gvec_ssra)
-DO_2SH(VSRA_U, gen_gvec_usra)
-DO_2SH(VRSHR_S, gen_gvec_srshr)
-DO_2SH(VRSHR_U, gen_gvec_urshr)
-DO_2SH(VRSRA_S, gen_gvec_srsra)
-DO_2SH(VRSRA_U, gen_gvec_ursra)
-
-static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
-{
-    /* Signed shift out of range results in all-sign-bits */
-    a->shift = MIN(a->shift, (8 << a->size) - 1);
-    return do_vector_2sh(s, a, tcg_gen_gvec_sari);
-}
-
-static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
-                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
-{
-    tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
-}
-
-static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
-{
-    /* Shift out of range is architecturally valid and results in zero. */
-    if (a->shift >= (8 << a->size)) {
-        return do_vector_2sh(s, a, gen_zero_rd_2sh);
-    } else {
-        return do_vector_2sh(s, a, tcg_gen_gvec_shri);
-    }
-}
-
-static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
-                             NeonGenTwo64OpEnvFn *fn)
-{
-    /*
-     * 2-reg-and-shift operations, size == 3 case, where the
-     * function needs to be passed cpu_env.
-     */
-    TCGv_i64 constimm;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * To avoid excessive duplication of ops we implement shift
-     * by immediate using the variable shift operations.
-     */
-    constimm = tcg_const_i64(dup_const(a->size, a->shift));
-
-    for (pass = 0; pass < a->q + 1; pass++) {
-        TCGv_i64 tmp = tcg_temp_new_i64();
-
-        read_neon_element64(tmp, a->vm, pass, MO_64);
-        fn(tmp, cpu_env, tmp, constimm);
-        write_neon_element64(tmp, a->vd, pass, MO_64);
-        tcg_temp_free_i64(tmp);
-    }
-    tcg_temp_free_i64(constimm);
-    return true;
-}
-
-static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
-                             NeonGenTwoOpEnvFn *fn)
-{
-    /*
-     * 2-reg-and-shift operations, size < 3 case, where the
-     * helper needs to be passed cpu_env.
-     */
-    TCGv_i32 constimm, tmp;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * To avoid excessive duplication of ops we implement shift
-     * by immediate using the variable shift operations.
-     */
-    constimm = tcg_const_i32(dup_const(a->size, a->shift));
-    tmp = tcg_temp_new_i32();
-
-    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-        read_neon_element32(tmp, a->vm, pass, MO_32);
-        fn(tmp, cpu_env, tmp, constimm);
-        write_neon_element32(tmp, a->vd, pass, MO_32);
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(constimm);
-    return true;
-}
-
-#define DO_2SHIFT_ENV(INSN, FUNC)                                       \
-    static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
-    {                                                                   \
-        return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
-    }                                                                   \
-    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
-    {                                                                   \
-        static NeonGenTwoOpEnvFn * const fns[] = {                      \
-            gen_helper_neon_##FUNC##8,                                  \
-            gen_helper_neon_##FUNC##16,                                 \
-            gen_helper_neon_##FUNC##32,                                 \
-        };                                                              \
-        assert(a->size < ARRAY_SIZE(fns));                              \
-        return do_2shift_env_32(s, a, fns[a->size]);                    \
-    }
-
-DO_2SHIFT_ENV(VQSHLU, qshlu_s)
-DO_2SHIFT_ENV(VQSHL_U, qshl_u)
-DO_2SHIFT_ENV(VQSHL_S, qshl_s)
-
-static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
-                                NeonGenTwo64OpFn *shiftfn,
-                                NeonGenNarrowEnvFn *narrowfn)
-{
-    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
-    TCGv_i64 constimm, rm1, rm2;
-    TCGv_i32 rd;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vm & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * This is always a right shift, and the shiftfn is always a
-     * left-shift helper, which thus needs the negated shift count.
-     */
-    constimm = tcg_const_i64(-a->shift);
-    rm1 = tcg_temp_new_i64();
-    rm2 = tcg_temp_new_i64();
-    rd = tcg_temp_new_i32();
-
-    /* Load both inputs first to avoid potential overwrite if rm == rd */
-    read_neon_element64(rm1, a->vm, 0, MO_64);
-    read_neon_element64(rm2, a->vm, 1, MO_64);
-
-    shiftfn(rm1, rm1, constimm);
-    narrowfn(rd, cpu_env, rm1);
-    write_neon_element32(rd, a->vd, 0, MO_32);
-
-    shiftfn(rm2, rm2, constimm);
-    narrowfn(rd, cpu_env, rm2);
-    write_neon_element32(rd, a->vd, 1, MO_32);
-
-    tcg_temp_free_i32(rd);
-    tcg_temp_free_i64(rm1);
-    tcg_temp_free_i64(rm2);
-    tcg_temp_free_i64(constimm);
-
-    return true;
-}
-
-static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
-                                NeonGenTwoOpFn *shiftfn,
-                                NeonGenNarrowEnvFn *narrowfn)
-{
-    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
-    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
-    TCGv_i64 rtmp;
-    uint32_t imm;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vm & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * This is always a right shift, and the shiftfn is always a
-     * left-shift helper, which thus needs the negated shift count
-     * duplicated into each lane of the immediate value.
-     */
-    if (a->size == 1) {
-        imm = (uint16_t)(-a->shift);
-        imm |= imm << 16;
-    } else {
-        /* size == 2 */
-        imm = -a->shift;
-    }
-    constimm = tcg_const_i32(imm);
-
-    /* Load all inputs first to avoid potential overwrite */
-    rm1 = tcg_temp_new_i32();
-    rm2 = tcg_temp_new_i32();
-    rm3 = tcg_temp_new_i32();
-    rm4 = tcg_temp_new_i32();
-    read_neon_element32(rm1, a->vm, 0, MO_32);
-    read_neon_element32(rm2, a->vm, 1, MO_32);
-    read_neon_element32(rm3, a->vm, 2, MO_32);
-    read_neon_element32(rm4, a->vm, 3, MO_32);
-    rtmp = tcg_temp_new_i64();
-
-    shiftfn(rm1, rm1, constimm);
-    shiftfn(rm2, rm2, constimm);
-
-    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
-    tcg_temp_free_i32(rm2);
-
-    narrowfn(rm1, cpu_env, rtmp);
-    write_neon_element32(rm1, a->vd, 0, MO_32);
-    tcg_temp_free_i32(rm1);
-
-    shiftfn(rm3, rm3, constimm);
-    shiftfn(rm4, rm4, constimm);
-    tcg_temp_free_i32(constimm);
-
-    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
-    tcg_temp_free_i32(rm4);
-
-    narrowfn(rm3, cpu_env, rtmp);
-    tcg_temp_free_i64(rtmp);
-    write_neon_element32(rm3, a->vd, 1, MO_32);
-    tcg_temp_free_i32(rm3);
-    return true;
-}
-
-#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
-    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
-    {                                                                   \
-        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
-    }
-#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
-    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
-    {                                                                   \
-        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
-    }
-
-static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
-{
-    tcg_gen_extrl_i64_i32(dest, src);
-}
-
-static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
-{
-    gen_helper_neon_narrow_u16(dest, src);
-}
-
-static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
-{
-    gen_helper_neon_narrow_u8(dest, src);
-}
-
-DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
-DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
-DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
-
-DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
-DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
-DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
-
-DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
-DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
-DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
-
-DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
-DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
-DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
-DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
-DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
-DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
-
-DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
-DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
-DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
-
-DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
-DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
-DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
-
-DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
-DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
-DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
-
-static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
-                         NeonGenWidenFn *widenfn, bool u)
-{
-    TCGv_i64 tmp;
-    TCGv_i32 rm0, rm1;
-    uint64_t widen_mask = 0;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /*
-     * This is a widen-and-shift operation. The shift is always less
-     * than the width of the source type, so after widening the input
-     * vector we can simply shift the whole 64-bit widened register,
-     * and then clear the potential overflow bits resulting from left
-     * bits of the narrow input appearing as right bits of the left
-     * neighbour narrow input. Calculate a mask of bits to clear.
-     */
-    if ((a->shift != 0) && (a->size < 2 || u)) {
-        int esize = 8 << a->size;
-        widen_mask = MAKE_64BIT_MASK(0, esize);
-        widen_mask >>= esize - a->shift;
-        widen_mask = dup_const(a->size + 1, widen_mask);
-    }
-
-    rm0 = tcg_temp_new_i32();
-    rm1 = tcg_temp_new_i32();
-    read_neon_element32(rm0, a->vm, 0, MO_32);
-    read_neon_element32(rm1, a->vm, 1, MO_32);
-    tmp = tcg_temp_new_i64();
-
-    widenfn(tmp, rm0);
-    tcg_temp_free_i32(rm0);
-    if (a->shift != 0) {
-        tcg_gen_shli_i64(tmp, tmp, a->shift);
-        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
-    }
-    write_neon_element64(tmp, a->vd, 0, MO_64);
-
-    widenfn(tmp, rm1);
-    tcg_temp_free_i32(rm1);
-    if (a->shift != 0) {
-        tcg_gen_shli_i64(tmp, tmp, a->shift);
-        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
-    }
-    write_neon_element64(tmp, a->vd, 1, MO_64);
-    tcg_temp_free_i64(tmp);
-    return true;
-}
-
-static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_s8,
-        gen_helper_neon_widen_s16,
-        tcg_gen_ext_i32_i64,
-    };
-    return do_vshll_2sh(s, a, widenfn[a->size], false);
-}
-
-static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_u8,
-        gen_helper_neon_widen_u16,
-        tcg_gen_extu_i32_i64,
-    };
-    return do_vshll_2sh(s, a, widenfn[a->size], true);
-}
-
-static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
-                      gen_helper_gvec_2_ptr *fn)
-{
-    /* FP operations in 2-reg-and-shift group */
-    int vec_size = a->q ? 16 : 8;
-    int rd_ofs = neon_full_reg_offset(a->vd);
-    int rm_ofs = neon_full_reg_offset(a->vm);
-    TCGv_ptr fpst;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    if (a->size == MO_16) {
-        if (!dc_isar_feature(aa32_fp16_arith, s)) {
-            return false;
-        }
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
-    tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-#define DO_FP_2SH(INSN, FUNC)                                           \
-    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
-    {                                                                   \
-        return do_fp_2sh(s, a, FUNC);                                   \
-    }
-
-DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
-DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
-DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
-DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
-
-DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
-DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
-DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
-DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
-
-static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
-{
-    /*
-     * Expand the encoded constant.
-     * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
-     * We choose to not special-case this and will behave as if a
-     * valid constant encoding of 0 had been given.
-     * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
-     */
-    switch (cmode) {
-    case 0: case 1:
-        /* no-op */
-        break;
-    case 2: case 3:
-        imm <<= 8;
-        break;
-    case 4: case 5:
-        imm <<= 16;
-        break;
-    case 6: case 7:
-        imm <<= 24;
-        break;
-    case 8: case 9:
-        imm |= imm << 16;
-        break;
-    case 10: case 11:
-        imm = (imm << 8) | (imm << 24);
-        break;
-    case 12:
-        imm = (imm << 8) | 0xff;
-        break;
-    case 13:
-        imm = (imm << 16) | 0xffff;
-        break;
-    case 14:
-        if (op) {
-            /*
-             * This is the only case where the top and bottom 32 bits
-             * of the encoded constant differ.
-             */
-            uint64_t imm64 = 0;
-            int n;
-
-            for (n = 0; n < 8; n++) {
-                if (imm & (1 << n)) {
-                    imm64 |= (0xffULL << (n * 8));
-                }
-            }
-            return imm64;
-        }
-        imm |= (imm << 8) | (imm << 16) | (imm << 24);
-        break;
-    case 15:
-        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
-            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
-        break;
-    }
-    if (op) {
-        imm = ~imm;
-    }
-    return dup_const(MO_32, imm);
-}
-
-static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
-                        GVecGen2iFn *fn)
-{
-    uint64_t imm;
-    int reg_ofs, vec_size;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    reg_ofs = neon_full_reg_offset(a->vd);
-    vec_size = a->q ? 16 : 8;
-    imm = asimd_imm_const(a->imm, a->cmode, a->op);
-
-    fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
-    return true;
-}
-
-static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
-                        int64_t c, uint32_t oprsz, uint32_t maxsz)
-{
-    tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
-}
-
-static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
-{
-    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
-    GVecGen2iFn *fn;
-
-    if ((a->cmode & 1) && a->cmode < 12) {
-        /* for op=1, the imm will be inverted, so BIC becomes AND. */
-        fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
-    } else {
-        /* There is one unallocated cmode/op combination in this space */
-        if (a->cmode == 15 && a->op == 1) {
-            return false;
-        }
-        fn = gen_VMOV_1r;
-    }
-    return do_1reg_imm(s, a, fn);
-}
-
-static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
-                           NeonGenWidenFn *widenfn,
-                           NeonGenTwo64OpFn *opfn,
-                           int src1_mop, int src2_mop)
-{
-    /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
-    TCGv_i64 rn0_64, rn1_64, rm_64;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn) {
-        /* size == 3 case, which is an entirely different insn group */
-        return false;
-    }
-
-    if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rn0_64 = tcg_temp_new_i64();
-    rn1_64 = tcg_temp_new_i64();
-    rm_64 = tcg_temp_new_i64();
-
-    if (src1_mop >= 0) {
-        read_neon_element64(rn0_64, a->vn, 0, src1_mop);
-    } else {
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        read_neon_element32(tmp, a->vn, 0, MO_32);
-        widenfn(rn0_64, tmp);
-        tcg_temp_free_i32(tmp);
-    }
-    if (src2_mop >= 0) {
-        read_neon_element64(rm_64, a->vm, 0, src2_mop);
-    } else {
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        read_neon_element32(tmp, a->vm, 0, MO_32);
-        widenfn(rm_64, tmp);
-        tcg_temp_free_i32(tmp);
-    }
-
-    opfn(rn0_64, rn0_64, rm_64);
-
-    /*
-     * Load second pass inputs before storing the first pass result, to
-     * avoid incorrect results if a narrow input overlaps with the result.
-     */
-    if (src1_mop >= 0) {
-        read_neon_element64(rn1_64, a->vn, 1, src1_mop);
-    } else {
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        read_neon_element32(tmp, a->vn, 1, MO_32);
-        widenfn(rn1_64, tmp);
-        tcg_temp_free_i32(tmp);
-    }
-    if (src2_mop >= 0) {
-        read_neon_element64(rm_64, a->vm, 1, src2_mop);
-    } else {
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        read_neon_element32(tmp, a->vm, 1, MO_32);
-        widenfn(rm_64, tmp);
-        tcg_temp_free_i32(tmp);
-    }
-
-    write_neon_element64(rn0_64, a->vd, 0, MO_64);
-
-    opfn(rn1_64, rn1_64, rm_64);
-    write_neon_element64(rn1_64, a->vd, 1, MO_64);
-
-    tcg_temp_free_i64(rn0_64);
-    tcg_temp_free_i64(rn1_64);
-    tcg_temp_free_i64(rm_64);
-
-    return true;
-}
-
-#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
-    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
-    {                                                                   \
-        static NeonGenWidenFn * const widenfn[] = {                     \
-            gen_helper_neon_widen_##S##8,                               \
-            gen_helper_neon_widen_##S##16,                              \
-            NULL, NULL,                                                 \
-        };                                                              \
-        static NeonGenTwo64OpFn * const addfn[] = {                     \
-            gen_helper_neon_##OP##l_u16,                                \
-            gen_helper_neon_##OP##l_u32,                                \
-            tcg_gen_##OP##_i64,                                         \
-            NULL,                                                       \
-        };                                                              \
-        int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
-        return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
-                              SRC1WIDE ? MO_Q : narrow_mop,             \
-                              narrow_mop);                              \
-    }
-
-DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
-DO_PREWIDEN(VADDL_U, u, add, false, 0)
-DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
-DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
-DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
-DO_PREWIDEN(VADDW_U, u, add, true, 0)
-DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
-DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
-
-static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
-                         NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
-{
-    /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
-    TCGv_i64 rn_64, rm_64;
-    TCGv_i32 rd0, rd1;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn || !narrowfn) {
-        /* size == 3 case, which is an entirely different insn group */
-        return false;
-    }
-
-    if ((a->vn | a->vm) & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rn_64 = tcg_temp_new_i64();
-    rm_64 = tcg_temp_new_i64();
-    rd0 = tcg_temp_new_i32();
-    rd1 = tcg_temp_new_i32();
-
-    read_neon_element64(rn_64, a->vn, 0, MO_64);
-    read_neon_element64(rm_64, a->vm, 0, MO_64);
-
-    opfn(rn_64, rn_64, rm_64);
-
-    narrowfn(rd0, rn_64);
-
-    read_neon_element64(rn_64, a->vn, 1, MO_64);
-    read_neon_element64(rm_64, a->vm, 1, MO_64);
-
-    opfn(rn_64, rn_64, rm_64);
-
-    narrowfn(rd1, rn_64);
-
-    write_neon_element32(rd0, a->vd, 0, MO_32);
-    write_neon_element32(rd1, a->vd, 1, MO_32);
-
-    tcg_temp_free_i32(rd0);
-    tcg_temp_free_i32(rd1);
-    tcg_temp_free_i64(rn_64);
-    tcg_temp_free_i64(rm_64);
-
-    return true;
-}
-
-#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
-    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
-    {                                                                   \
-        static NeonGenTwo64OpFn * const addfn[] = {                     \
-            gen_helper_neon_##OP##l_u16,                                \
-            gen_helper_neon_##OP##l_u32,                                \
-            tcg_gen_##OP##_i64,                                         \
-            NULL,                                                       \
-        };                                                              \
-        static NeonGenNarrowFn * const narrowfn[] = {                   \
-            gen_helper_neon_##NARROWTYPE##_high_u8,                     \
-            gen_helper_neon_##NARROWTYPE##_high_u16,                    \
-            EXTOP,                                                      \
-            NULL,                                                       \
-        };                                                              \
-        return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
-    }
-
-static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
-{
-    tcg_gen_addi_i64(rn, rn, 1u << 31);
-    tcg_gen_extrh_i64_i32(rd, rn);
-}
-
-DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
-DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
-DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
-DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
-
-static bool do_long_3d(DisasContext *s, arg_3diff *a,
-                       NeonGenTwoOpWidenFn *opfn,
-                       NeonGenTwo64OpFn *accfn)
-{
-    /*
-     * 3-regs different lengths, long operations.
-     * These perform an operation on two inputs that returns a double-width
-     * result, and then possibly perform an accumulation operation of
-     * that result into the double-width destination.
-     */
-    TCGv_i64 rd0, rd1, tmp;
-    TCGv_i32 rn, rm;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn) {
-        /* size == 3 case, which is an entirely different insn group */
-        return false;
-    }
-
-    if (a->vd & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rd0 = tcg_temp_new_i64();
-    rd1 = tcg_temp_new_i64();
-
-    rn = tcg_temp_new_i32();
-    rm = tcg_temp_new_i32();
-    read_neon_element32(rn, a->vn, 0, MO_32);
-    read_neon_element32(rm, a->vm, 0, MO_32);
-    opfn(rd0, rn, rm);
-
-    read_neon_element32(rn, a->vn, 1, MO_32);
-    read_neon_element32(rm, a->vm, 1, MO_32);
-    opfn(rd1, rn, rm);
-    tcg_temp_free_i32(rn);
-    tcg_temp_free_i32(rm);
-
-    /* Don't store results until after all loads: they might overlap */
-    if (accfn) {
-        tmp = tcg_temp_new_i64();
-        read_neon_element64(tmp, a->vd, 0, MO_64);
-        accfn(rd0, tmp, rd0);
-        read_neon_element64(tmp, a->vd, 1, MO_64);
-        accfn(rd1, tmp, rd1);
-        tcg_temp_free_i64(tmp);
-    }
-
-    write_neon_element64(rd0, a->vd, 0, MO_64);
-    write_neon_element64(rd1, a->vd, 1, MO_64);
-    tcg_temp_free_i64(rd0);
-    tcg_temp_free_i64(rd1);
-
-    return true;
-}
-
-static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_abdl_s16,
-        gen_helper_neon_abdl_s32,
-        gen_helper_neon_abdl_s64,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_abdl_u16,
-        gen_helper_neon_abdl_u32,
-        gen_helper_neon_abdl_u64,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_abdl_s16,
-        gen_helper_neon_abdl_s32,
-        gen_helper_neon_abdl_s64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const addfn[] = {
-        gen_helper_neon_addl_u16,
-        gen_helper_neon_addl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
-}
-
-static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_abdl_u16,
-        gen_helper_neon_abdl_u32,
-        gen_helper_neon_abdl_u64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const addfn[] = {
-        gen_helper_neon_addl_u16,
-        gen_helper_neon_addl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
-}
-
-static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
-{
-    TCGv_i32 lo = tcg_temp_new_i32();
-    TCGv_i32 hi = tcg_temp_new_i32();
-
-    tcg_gen_muls2_i32(lo, hi, rn, rm);
-    tcg_gen_concat_i32_i64(rd, lo, hi);
-
-    tcg_temp_free_i32(lo);
-    tcg_temp_free_i32(hi);
-}
-
-static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
-{
-    TCGv_i32 lo = tcg_temp_new_i32();
-    TCGv_i32 hi = tcg_temp_new_i32();
-
-    tcg_gen_mulu2_i32(lo, hi, rn, rm);
-    tcg_gen_concat_i32_i64(rd, lo, hi);
-
-    tcg_temp_free_i32(lo);
-    tcg_temp_free_i32(hi);
-}
-
-static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_mull_s8,
-        gen_helper_neon_mull_s16,
-        gen_mull_s32,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        gen_helper_neon_mull_u8,
-        gen_helper_neon_mull_u16,
-        gen_mull_u32,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], NULL);
-}
-
-#define DO_VMLAL(INSN,MULL,ACC)                                         \
-    static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
-    {                                                                   \
-        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
-            gen_helper_neon_##MULL##8,                                  \
-            gen_helper_neon_##MULL##16,                                 \
-            gen_##MULL##32,                                             \
-            NULL,                                                       \
-        };                                                              \
-        static NeonGenTwo64OpFn * const accfn[] = {                     \
-            gen_helper_neon_##ACC##l_u16,                               \
-            gen_helper_neon_##ACC##l_u32,                               \
-            tcg_gen_##ACC##_i64,                                        \
-            NULL,                                                       \
-        };                                                              \
-        return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
-    }
-
-DO_VMLAL(VMLAL_S,mull_s,add)
-DO_VMLAL(VMLAL_U,mull_u,add)
-DO_VMLAL(VMLSL_S,mull_s,sub)
-DO_VMLAL(VMLSL_U,mull_u,sub)
-
-static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
-{
-    gen_helper_neon_mull_s16(rd, rn, rm);
-    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
-}
-
-static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
-{
-    gen_mull_s32(rd, rn, rm);
-    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
-}
-
-static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], NULL);
-}
-
-static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
-}
-
-static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
-}
-
-static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        NULL,
-        gen_VQDMLAL_acc_16,
-        gen_VQDMLAL_acc_32,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    gen_helper_neon_negl_u32(rm, rm);
-    gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
-}
-
-static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
-    tcg_gen_neg_i64(rm, rm);
-    gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
-}
-
-static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        NULL,
-        gen_VQDMLSL_acc_16,
-        gen_VQDMLSL_acc_32,
-        NULL,
-    };
-
-    return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
-{
-    gen_helper_gvec_3 *fn_gvec;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & 1) {
-        return false;
-    }
-
-    switch (a->size) {
-    case 0:
-        fn_gvec = gen_helper_neon_pmull_h;
-        break;
-    case 2:
-        if (!dc_isar_feature(aa32_pmull, s)) {
-            return false;
-        }
-        fn_gvec = gen_helper_gvec_pmull_q;
-        break;
-    default:
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
-                       neon_full_reg_offset(a->vn),
-                       neon_full_reg_offset(a->vm),
-                       16, 16, 0, fn_gvec);
-    return true;
-}
-
-static void gen_neon_dup_low16(TCGv_i32 var)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_ext16u_i32(var, var);
-    tcg_gen_shli_i32(tmp, var, 16);
-    tcg_gen_or_i32(var, var, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_high16(TCGv_i32 var)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_andi_i32(var, var, 0xffff0000);
-    tcg_gen_shri_i32(tmp, var, 16);
-    tcg_gen_or_i32(var, var, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    if (size == MO_16) {
-        read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
-        if (reg & 8) {
-            gen_neon_dup_high16(tmp);
-        } else {
-            gen_neon_dup_low16(tmp);
-        }
-    } else {
-        read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
-    }
-    return tmp;
-}
-
-static bool do_2scalar(DisasContext *s, arg_2scalar *a,
-                       NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
-{
-    /*
-     * Two registers and a scalar: perform an operation between
-     * the input elements and the scalar, and then possibly
-     * perform an accumulation operation of that result into the
-     * destination.
-     */
-    TCGv_i32 scalar, tmp;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn) {
-        /* Bad size (including size == 3, which is a different insn group) */
-        return false;
-    }
-
-    if (a->q && ((a->vd | a->vn) & 1)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    scalar = neon_get_scalar(a->size, a->vm);
-    tmp = tcg_temp_new_i32();
-
-    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-        read_neon_element32(tmp, a->vn, pass, MO_32);
-        opfn(tmp, tmp, scalar);
-        if (accfn) {
-            TCGv_i32 rd = tcg_temp_new_i32();
-            read_neon_element32(rd, a->vd, pass, MO_32);
-            accfn(tmp, rd, tmp);
-            tcg_temp_free_i32(rd);
-        }
-        write_neon_element32(tmp, a->vd, pass, MO_32);
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(scalar);
-    return true;
-}
-
-static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpFn * const opfn[] = {
-        NULL,
-        gen_helper_neon_mul_u16,
-        tcg_gen_mul_i32,
-        NULL,
-    };
-
-    return do_2scalar(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpFn * const opfn[] = {
-        NULL,
-        gen_helper_neon_mul_u16,
-        tcg_gen_mul_i32,
-        NULL,
-    };
-    static NeonGenTwoOpFn * const accfn[] = {
-        NULL,
-        gen_helper_neon_add_u16,
-        tcg_gen_add_i32,
-        NULL,
-    };
-
-    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpFn * const opfn[] = {
-        NULL,
-        gen_helper_neon_mul_u16,
-        tcg_gen_mul_i32,
-        NULL,
-    };
-    static NeonGenTwoOpFn * const accfn[] = {
-        NULL,
-        gen_helper_neon_sub_u16,
-        tcg_gen_sub_i32,
-        NULL,
-    };
-
-    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
-                              gen_helper_gvec_3_ptr *fn)
-{
-    /* Two registers and a scalar, using gvec */
-    int vec_size = a->q ? 16 : 8;
-    int rd_ofs = neon_full_reg_offset(a->vd);
-    int rn_ofs = neon_full_reg_offset(a->vn);
-    int rm_ofs;
-    int idx;
-    TCGv_ptr fpstatus;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!fn) {
-        /* Bad size (including size == 3, which is a different insn group) */
-        return false;
-    }
-
-    if (a->q && ((a->vd | a->vn) & 1)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* a->vm is M:Vm, which encodes both register and index */
-    idx = extract32(a->vm, a->size + 2, 2);
-    a->vm = extract32(a->vm, 0, a->size + 2);
-    rm_ofs = neon_full_reg_offset(a->vm);
-
-    fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
-    tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
-                       vec_size, vec_size, idx, fn);
-    tcg_temp_free_ptr(fpstatus);
-    return true;
-}
-
-#define DO_VMUL_F_2sc(NAME, FUNC)                                       \
-    static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
-    {                                                                   \
-        static gen_helper_gvec_3_ptr * const opfn[] = {                 \
-            NULL,                                                       \
-            gen_helper_##FUNC##_h,                                      \
-            gen_helper_##FUNC##_s,                                      \
-            NULL,                                                       \
-        };                                                              \
-        if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
-            return false;                                               \
-        }                                                               \
-        return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
-    }
-
-DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
-DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
-DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
-
-WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
-WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
-WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
-WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
-
-static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpFn * const opfn[] = {
-        NULL,
-        gen_VQDMULH_16,
-        gen_VQDMULH_32,
-        NULL,
-    };
-
-    return do_2scalar(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpFn * const opfn[] = {
-        NULL,
-        gen_VQRDMULH_16,
-        gen_VQRDMULH_32,
-        NULL,
-    };
-
-    return do_2scalar(s, a, opfn[a->size], NULL);
-}
-
-static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
-                            NeonGenThreeOpEnvFn *opfn)
-{
-    /*
-     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
-     * performs a kind of fused op-then-accumulate using a helper
-     * function that takes all of rd, rn and the scalar at once.
-     */
-    TCGv_i32 scalar, rn, rd;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_rdm, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn) {
-        /* Bad size (including size == 3, which is a different insn group) */
-        return false;
-    }
-
-    if (a->q && ((a->vd | a->vn) & 1)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    scalar = neon_get_scalar(a->size, a->vm);
-    rn = tcg_temp_new_i32();
-    rd = tcg_temp_new_i32();
-
-    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-        read_neon_element32(rn, a->vn, pass, MO_32);
-        read_neon_element32(rd, a->vd, pass, MO_32);
-        opfn(rd, cpu_env, rn, scalar, rd);
-        write_neon_element32(rd, a->vd, pass, MO_32);
-    }
-    tcg_temp_free_i32(rn);
-    tcg_temp_free_i32(rd);
-    tcg_temp_free_i32(scalar);
-
-    return true;
-}
-
-static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenThreeOpEnvFn *opfn[] = {
-        NULL,
-        gen_helper_neon_qrdmlah_s16,
-        gen_helper_neon_qrdmlah_s32,
-        NULL,
-    };
-    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
-}
-
-static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenThreeOpEnvFn *opfn[] = {
-        NULL,
-        gen_helper_neon_qrdmlsh_s16,
-        gen_helper_neon_qrdmlsh_s32,
-        NULL,
-    };
-    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
-}
-
-static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
-                            NeonGenTwoOpWidenFn *opfn,
-                            NeonGenTwo64OpFn *accfn)
-{
-    /*
-     * Two registers and a scalar, long operations: perform an
-     * operation on the input elements and the scalar which produces
-     * a double-width result, and then possibly perform an accumulation
-     * operation of that result into the destination.
-     */
-    TCGv_i32 scalar, rn;
-    TCGv_i64 rn0_64, rn1_64;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!opfn) {
-        /* Bad size (including size == 3, which is a different insn group) */
-        return false;
-    }
-
-    if (a->vd & 1) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    scalar = neon_get_scalar(a->size, a->vm);
-
-    /* Load all inputs before writing any outputs, in case of overlap */
-    rn = tcg_temp_new_i32();
-    read_neon_element32(rn, a->vn, 0, MO_32);
-    rn0_64 = tcg_temp_new_i64();
-    opfn(rn0_64, rn, scalar);
-
-    read_neon_element32(rn, a->vn, 1, MO_32);
-    rn1_64 = tcg_temp_new_i64();
-    opfn(rn1_64, rn, scalar);
-    tcg_temp_free_i32(rn);
-    tcg_temp_free_i32(scalar);
-
-    if (accfn) {
-        TCGv_i64 t64 = tcg_temp_new_i64();
-        read_neon_element64(t64, a->vd, 0, MO_64);
-        accfn(rn0_64, t64, rn0_64);
-        read_neon_element64(t64, a->vd, 1, MO_64);
-        accfn(rn1_64, t64, rn1_64);
-        tcg_temp_free_i64(t64);
-    }
-
-    write_neon_element64(rn0_64, a->vd, 0, MO_64);
-    write_neon_element64(rn1_64, a->vd, 1, MO_64);
-    tcg_temp_free_i64(rn0_64);
-    tcg_temp_free_i64(rn1_64);
-    return true;
-}
-
-static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_helper_neon_mull_s16,
-        gen_mull_s32,
-        NULL,
-    };
-
-    return do_2scalar_long(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_helper_neon_mull_u16,
-        gen_mull_u32,
-        NULL,
-    };
-
-    return do_2scalar_long(s, a, opfn[a->size], NULL);
-}
-
-#define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
-    static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
-    {                                                                   \
-        static NeonGenTwoOpWidenFn * const opfn[] = {                   \
-            NULL,                                                       \
-            gen_helper_neon_##MULL##16,                                 \
-            gen_##MULL##32,                                             \
-            NULL,                                                       \
-        };                                                              \
-        static NeonGenTwo64OpFn * const accfn[] = {                     \
-            NULL,                                                       \
-            gen_helper_neon_##ACC##l_u32,                               \
-            tcg_gen_##ACC##_i64,                                        \
-            NULL,                                                       \
-        };                                                              \
-        return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
-    }
-
-DO_VMLAL_2SC(VMLAL_S, mull_s, add)
-DO_VMLAL_2SC(VMLAL_U, mull_u, add)
-DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
-DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
-
-static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-
-    return do_2scalar_long(s, a, opfn[a->size], NULL);
-}
-
-static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        NULL,
-        gen_VQDMLAL_acc_16,
-        gen_VQDMLAL_acc_32,
-        NULL,
-    };
-
-    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
-{
-    static NeonGenTwoOpWidenFn * const opfn[] = {
-        NULL,
-        gen_VQDMULL_16,
-        gen_VQDMULL_32,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        NULL,
-        gen_VQDMLSL_acc_16,
-        gen_VQDMLSL_acc_32,
-        NULL,
-    };
-
-    return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
-}
-
-static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn | a->vm | a->vd) & a->q) {
-        return false;
-    }
-
-    if (a->imm > 7 && !a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (!a->q) {
-        /* Extract 64 bits from <Vm:Vn> */
-        TCGv_i64 left, right, dest;
-
-        left = tcg_temp_new_i64();
-        right = tcg_temp_new_i64();
-        dest = tcg_temp_new_i64();
-
-        read_neon_element64(right, a->vn, 0, MO_64);
-        read_neon_element64(left, a->vm, 0, MO_64);
-        tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
-        write_neon_element64(dest, a->vd, 0, MO_64);
-
-        tcg_temp_free_i64(left);
-        tcg_temp_free_i64(right);
-        tcg_temp_free_i64(dest);
-    } else {
-        /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
-        TCGv_i64 left, middle, right, destleft, destright;
-
-        left = tcg_temp_new_i64();
-        middle = tcg_temp_new_i64();
-        right = tcg_temp_new_i64();
-        destleft = tcg_temp_new_i64();
-        destright = tcg_temp_new_i64();
-
-        if (a->imm < 8) {
-            read_neon_element64(right, a->vn, 0, MO_64);
-            read_neon_element64(middle, a->vn, 1, MO_64);
-            tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
-            read_neon_element64(left, a->vm, 0, MO_64);
-            tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
-        } else {
-            read_neon_element64(right, a->vn, 1, MO_64);
-            read_neon_element64(middle, a->vm, 0, MO_64);
-            tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
-            read_neon_element64(left, a->vm, 1, MO_64);
-            tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
-        }
-
-        write_neon_element64(destright, a->vd, 0, MO_64);
-        write_neon_element64(destleft, a->vd, 1, MO_64);
-
-        tcg_temp_free_i64(destright);
-        tcg_temp_free_i64(destleft);
-        tcg_temp_free_i64(right);
-        tcg_temp_free_i64(middle);
-        tcg_temp_free_i64(left);
-    }
-    return true;
-}
-
-static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
-{
-    TCGv_i64 val, def;
-    TCGv_i32 desc;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vn + a->len + 1) > 32) {
-        /*
-         * This is UNPREDICTABLE; we choose to UNDEF to avoid the
-         * helper function running off the end of the register file.
-         */
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    desc = tcg_const_i32((a->vn << 2) | a->len);
-    def = tcg_temp_new_i64();
-    if (a->op) {
-        read_neon_element64(def, a->vd, 0, MO_64);
-    } else {
-        tcg_gen_movi_i64(def, 0);
-    }
-    val = tcg_temp_new_i64();
-    read_neon_element64(val, a->vm, 0, MO_64);
-
-    gen_helper_neon_tbl(val, cpu_env, desc, val, def);
-    write_neon_element64(val, a->vd, 0, MO_64);
-
-    tcg_temp_free_i64(def);
-    tcg_temp_free_i64(val);
-    tcg_temp_free_i32(desc);
-    return true;
-}
-
-static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
-                         neon_element_offset(a->vm, a->index, a->size),
-                         a->q ? 16 : 8, a->q ? 16 : 8);
-    return true;
-}
-
-static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
-{
-    int pass, half;
-    TCGv_i32 tmp[2];
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (a->size == 3) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp[0] = tcg_temp_new_i32();
-    tmp[1] = tcg_temp_new_i32();
-
-    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
-        for (half = 0; half < 2; half++) {
-            read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
-            switch (a->size) {
-            case 0:
-                tcg_gen_bswap32_i32(tmp[half], tmp[half]);
-                break;
-            case 1:
-                gen_swap_half(tmp[half], tmp[half]);
-                break;
-            case 2:
-                break;
-            default:
-                g_assert_not_reached();
-            }
-        }
-        write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
-        write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
-    }
-
-    tcg_temp_free_i32(tmp[0]);
-    tcg_temp_free_i32(tmp[1]);
-    return true;
-}
-
-static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
-                              NeonGenWidenFn *widenfn,
-                              NeonGenTwo64OpFn *opfn,
-                              NeonGenTwo64OpFn *accfn)
-{
-    /*
-     * Pairwise long operations: widen both halves of the pair,
-     * combine the pairs with the opfn, and then possibly accumulate
-     * into the destination with the accfn.
-     */
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (!widenfn) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    for (pass = 0; pass < a->q + 1; pass++) {
-        TCGv_i32 tmp;
-        TCGv_i64 rm0_64, rm1_64, rd_64;
-
-        rm0_64 = tcg_temp_new_i64();
-        rm1_64 = tcg_temp_new_i64();
-        rd_64 = tcg_temp_new_i64();
-
-        tmp = tcg_temp_new_i32();
-        read_neon_element32(tmp, a->vm, pass * 2, MO_32);
-        widenfn(rm0_64, tmp);
-        read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
-        widenfn(rm1_64, tmp);
-        tcg_temp_free_i32(tmp);
-
-        opfn(rd_64, rm0_64, rm1_64);
-        tcg_temp_free_i64(rm0_64);
-        tcg_temp_free_i64(rm1_64);
-
-        if (accfn) {
-            TCGv_i64 tmp64 = tcg_temp_new_i64();
-            read_neon_element64(tmp64, a->vd, pass, MO_64);
-            accfn(rd_64, tmp64, rd_64);
-            tcg_temp_free_i64(tmp64);
-        }
-        write_neon_element64(rd_64, a->vd, pass, MO_64);
-        tcg_temp_free_i64(rd_64);
-    }
-    return true;
-}
-
-static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_s8,
-        gen_helper_neon_widen_s16,
-        tcg_gen_ext_i32_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const opfn[] = {
-        gen_helper_neon_paddl_u16,
-        gen_helper_neon_paddl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
-}
-
-static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_u8,
-        gen_helper_neon_widen_u16,
-        tcg_gen_extu_i32_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const opfn[] = {
-        gen_helper_neon_paddl_u16,
-        gen_helper_neon_paddl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
-}
-
-static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_s8,
-        gen_helper_neon_widen_s16,
-        tcg_gen_ext_i32_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const opfn[] = {
-        gen_helper_neon_paddl_u16,
-        gen_helper_neon_paddl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        gen_helper_neon_addl_u16,
-        gen_helper_neon_addl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
-                             accfn[a->size]);
-}
-
-static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenWidenFn * const widenfn[] = {
-        gen_helper_neon_widen_u8,
-        gen_helper_neon_widen_u16,
-        tcg_gen_extu_i32_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const opfn[] = {
-        gen_helper_neon_paddl_u16,
-        gen_helper_neon_paddl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-    static NeonGenTwo64OpFn * const accfn[] = {
-        gen_helper_neon_addl_u16,
-        gen_helper_neon_addl_u32,
-        tcg_gen_add_i64,
-        NULL,
-    };
-
-    return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
-                             accfn[a->size]);
-}
-
-typedef void ZipFn(TCGv_ptr, TCGv_ptr);
-
-static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
-                       ZipFn *fn)
-{
-    TCGv_ptr pd, pm;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (!fn) {
-        /* Bad size or size/q combination */
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    pd = vfp_reg_ptr(true, a->vd);
-    pm = vfp_reg_ptr(true, a->vm);
-    fn(pd, pm);
-    tcg_temp_free_ptr(pd);
-    tcg_temp_free_ptr(pm);
-    return true;
-}
-
-static bool trans_VUZP(DisasContext *s, arg_2misc *a)
-{
-    static ZipFn * const fn[2][4] = {
-        {
-            gen_helper_neon_unzip8,
-            gen_helper_neon_unzip16,
-            NULL,
-            NULL,
-        }, {
-            gen_helper_neon_qunzip8,
-            gen_helper_neon_qunzip16,
-            gen_helper_neon_qunzip32,
-            NULL,
-        }
-    };
-    return do_zip_uzp(s, a, fn[a->q][a->size]);
-}
-
-static bool trans_VZIP(DisasContext *s, arg_2misc *a)
-{
-    static ZipFn * const fn[2][4] = {
-        {
-            gen_helper_neon_zip8,
-            gen_helper_neon_zip16,
-            NULL,
-            NULL,
-        }, {
-            gen_helper_neon_qzip8,
-            gen_helper_neon_qzip16,
-            gen_helper_neon_qzip32,
-            NULL,
-        }
-    };
-    return do_zip_uzp(s, a, fn[a->q][a->size]);
-}
-
-static bool do_vmovn(DisasContext *s, arg_2misc *a,
-                     NeonGenNarrowEnvFn *narrowfn)
-{
-    TCGv_i64 rm;
-    TCGv_i32 rd0, rd1;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vm & 1) {
-        return false;
-    }
-
-    if (!narrowfn) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rm = tcg_temp_new_i64();
-    rd0 = tcg_temp_new_i32();
-    rd1 = tcg_temp_new_i32();
-
-    read_neon_element64(rm, a->vm, 0, MO_64);
-    narrowfn(rd0, cpu_env, rm);
-    read_neon_element64(rm, a->vm, 1, MO_64);
-    narrowfn(rd1, cpu_env, rm);
-    write_neon_element32(rd0, a->vd, 0, MO_32);
-    write_neon_element32(rd1, a->vd, 1, MO_32);
-    tcg_temp_free_i32(rd0);
-    tcg_temp_free_i32(rd1);
-    tcg_temp_free_i64(rm);
-    return true;
-}
-
-#define DO_VMOVN(INSN, FUNC)                                    \
-    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
-    {                                                           \
-        static NeonGenNarrowEnvFn * const narrowfn[] = {        \
-            FUNC##8,                                            \
-            FUNC##16,                                           \
-            FUNC##32,                                           \
-            NULL,                                               \
-        };                                                      \
-        return do_vmovn(s, a, narrowfn[a->size]);               \
-    }
-
-DO_VMOVN(VMOVN, gen_neon_narrow_u)
-DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
-DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
-DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
-
-static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
-{
-    TCGv_i32 rm0, rm1;
-    TCGv_i64 rd;
-    static NeonGenWidenFn * const widenfns[] = {
-        gen_helper_neon_widen_u8,
-        gen_helper_neon_widen_u16,
-        tcg_gen_extu_i32_i64,
-        NULL,
-    };
-    NeonGenWidenFn *widenfn = widenfns[a->size];
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->vd & 1) {
-        return false;
-    }
-
-    if (!widenfn) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rd = tcg_temp_new_i64();
-    rm0 = tcg_temp_new_i32();
-    rm1 = tcg_temp_new_i32();
-
-    read_neon_element32(rm0, a->vm, 0, MO_32);
-    read_neon_element32(rm1, a->vm, 1, MO_32);
-
-    widenfn(rd, rm0);
-    tcg_gen_shli_i64(rd, rd, 8 << a->size);
-    write_neon_element64(rd, a->vd, 0, MO_64);
-    widenfn(rd, rm1);
-    tcg_gen_shli_i64(rd, rd, 8 << a->size);
-    write_neon_element64(rd, a->vd, 1, MO_64);
-
-    tcg_temp_free_i64(rd);
-    tcg_temp_free_i32(rm0);
-    tcg_temp_free_i32(rm1);
-    return true;
-}
-
-static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp, tmp, tmp2, tmp3;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
-        !dc_isar_feature(aa32_fp16_spconv, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vm & 1) || (a->size != 1)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_STD);
-    ahp = get_ahp_flag();
-    tmp = tcg_temp_new_i32();
-    read_neon_element32(tmp, a->vm, 0, MO_32);
-    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
-    tmp2 = tcg_temp_new_i32();
-    read_neon_element32(tmp2, a->vm, 1, MO_32);
-    gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
-    tcg_gen_shli_i32(tmp2, tmp2, 16);
-    tcg_gen_or_i32(tmp2, tmp2, tmp);
-    read_neon_element32(tmp, a->vm, 2, MO_32);
-    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
-    tmp3 = tcg_temp_new_i32();
-    read_neon_element32(tmp3, a->vm, 3, MO_32);
-    write_neon_element32(tmp2, a->vd, 0, MO_32);
-    tcg_temp_free_i32(tmp2);
-    gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
-    tcg_gen_shli_i32(tmp3, tmp3, 16);
-    tcg_gen_or_i32(tmp3, tmp3, tmp);
-    write_neon_element32(tmp3, a->vd, 1, MO_32);
-    tcg_temp_free_i32(tmp3);
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(ahp);
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp, tmp, tmp2, tmp3;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
-        !dc_isar_feature(aa32_fp16_spconv, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd & 1) || (a->size != 1)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_STD);
-    ahp = get_ahp_flag();
-    tmp3 = tcg_temp_new_i32();
-    tmp2 = tcg_temp_new_i32();
-    tmp = tcg_temp_new_i32();
-    read_neon_element32(tmp, a->vm, 0, MO_32);
-    read_neon_element32(tmp2, a->vm, 1, MO_32);
-    tcg_gen_ext16u_i32(tmp3, tmp);
-    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
-    write_neon_element32(tmp3, a->vd, 0, MO_32);
-    tcg_gen_shri_i32(tmp, tmp, 16);
-    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
-    write_neon_element32(tmp, a->vd, 1, MO_32);
-    tcg_temp_free_i32(tmp);
-    tcg_gen_ext16u_i32(tmp3, tmp2);
-    gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
-    write_neon_element32(tmp3, a->vd, 2, MO_32);
-    tcg_temp_free_i32(tmp3);
-    tcg_gen_shri_i32(tmp2, tmp2, 16);
-    gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
-    write_neon_element32(tmp2, a->vd, 3, MO_32);
-    tcg_temp_free_i32(tmp2);
-    tcg_temp_free_i32(ahp);
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
-{
-    int vec_size = a->q ? 16 : 8;
-    int rd_ofs = neon_full_reg_offset(a->vd);
-    int rm_ofs = neon_full_reg_offset(a->vm);
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->size == 3) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
-
-    return true;
-}
-
-#define DO_2MISC_VEC(INSN, FN)                                  \
-    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
-    {                                                           \
-        return do_2misc_vec(s, a, FN);                          \
-    }
-
-DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
-DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
-DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
-DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
-DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
-DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
-DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
-
-static bool trans_VMVN(DisasContext *s, arg_2misc *a)
-{
-    if (a->size != 0) {
-        return false;
-    }
-    return do_2misc_vec(s, a, tcg_gen_gvec_not);
-}
-
-#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
-    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
-                         uint32_t rm_ofs, uint32_t oprsz,               \
-                         uint32_t maxsz)                                \
-    {                                                                   \
-        tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
-                           DATA, FUNC);                                 \
-    }
-
-#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
-    static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
-                         uint32_t rm_ofs, uint32_t oprsz,               \
-                         uint32_t maxsz)                                \
-    {                                                                   \
-        tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
-    }
-
-WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
-WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
-WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
-WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
-WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
-WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
-WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
-
-#define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
-    static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
-    {                                                           \
-        if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
-            return false;                                       \
-        }                                                       \
-        return do_2misc_vec(s, a, gen_##INSN);                  \
-    }
-
-DO_2M_CRYPTO(AESE, aa32_aes, 0)
-DO_2M_CRYPTO(AESD, aa32_aes, 0)
-DO_2M_CRYPTO(AESMC, aa32_aes, 0)
-DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
-DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
-DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
-DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
-
-static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
-{
-    TCGv_i32 tmp;
-    int pass;
-
-    /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!fn) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-        read_neon_element32(tmp, a->vm, pass, MO_32);
-        fn(tmp, tmp);
-        write_neon_element32(tmp, a->vd, pass, MO_32);
-    }
-    tcg_temp_free_i32(tmp);
-
-    return true;
-}
-
-static bool trans_VREV32(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenOneOpFn * const fn[] = {
-        tcg_gen_bswap32_i32,
-        gen_swap_half,
-        NULL,
-        NULL,
-    };
-    return do_2misc(s, a, fn[a->size]);
-}
-
-static bool trans_VREV16(DisasContext *s, arg_2misc *a)
-{
-    if (a->size != 0) {
-        return false;
-    }
-    return do_2misc(s, a, gen_rev16);
-}
-
-static bool trans_VCLS(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenOneOpFn * const fn[] = {
-        gen_helper_neon_cls_s8,
-        gen_helper_neon_cls_s16,
-        gen_helper_neon_cls_s32,
-        NULL,
-    };
-    return do_2misc(s, a, fn[a->size]);
-}
-
-static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
-{
-    tcg_gen_clzi_i32(rd, rm, 32);
-}
-
-static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenOneOpFn * const fn[] = {
-        gen_helper_neon_clz_u8,
-        gen_helper_neon_clz_u16,
-        do_VCLZ_32,
-        NULL,
-    };
-    return do_2misc(s, a, fn[a->size]);
-}
-
-static bool trans_VCNT(DisasContext *s, arg_2misc *a)
-{
-    if (a->size != 0) {
-        return false;
-    }
-    return do_2misc(s, a, gen_helper_neon_cnt_u8);
-}
-
-static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
-                       uint32_t oprsz, uint32_t maxsz)
-{
-    tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
-                      vece == MO_16 ? 0x7fff : 0x7fffffff,
-                      oprsz, maxsz);
-}
-
-static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
-{
-    if (a->size == MO_16) {
-        if (!dc_isar_feature(aa32_fp16_arith, s)) {
-            return false;
-        }
-    } else if (a->size != MO_32) {
-        return false;
-    }
-    return do_2misc_vec(s, a, gen_VABS_F);
-}
-
-static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
-                       uint32_t oprsz, uint32_t maxsz)
-{
-    tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
-                      vece == MO_16 ? 0x8000 : 0x80000000,
-                      oprsz, maxsz);
-}
-
-static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
-{
-    if (a->size == MO_16) {
-        if (!dc_isar_feature(aa32_fp16_arith, s)) {
-            return false;
-        }
-    } else if (a->size != MO_32) {
-        return false;
-    }
-    return do_2misc_vec(s, a, gen_VNEG_F);
-}
-
-static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
-{
-    if (a->size != 2) {
-        return false;
-    }
-    return do_2misc(s, a, gen_helper_recpe_u32);
-}
-
-static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
-{
-    if (a->size != 2) {
-        return false;
-    }
-    return do_2misc(s, a, gen_helper_rsqrte_u32);
-}
-
-#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
-    static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
-    {                                                   \
-        FUNC(d, cpu_env, m);                            \
-    }
-
-WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
-WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
-WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
-WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
-WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
-WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
-
-static bool trans_VQABS(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenOneOpFn * const fn[] = {
-        gen_VQABS_s8,
-        gen_VQABS_s16,
-        gen_VQABS_s32,
-        NULL,
-    };
-    return do_2misc(s, a, fn[a->size]);
-}
-
-static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
-{
-    static NeonGenOneOpFn * const fn[] = {
-        gen_VQNEG_s8,
-        gen_VQNEG_s16,
-        gen_VQNEG_s32,
-        NULL,
-    };
-    return do_2misc(s, a, fn[a->size]);
-}
-
-#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
-    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
-                           uint32_t rm_ofs,                             \
-                           uint32_t oprsz, uint32_t maxsz)              \
-    {                                                                   \
-        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
-            NULL, HFUNC, SFUNC, NULL,                                   \
-        };                                                              \
-        TCGv_ptr fpst;                                                  \
-        fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
-        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
-                           fns[vece]);                                  \
-        tcg_temp_free_ptr(fpst);                                        \
-    }                                                                   \
-    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
-    {                                                                   \
-        if (a->size == MO_16) {                                         \
-            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
-                return false;                                           \
-            }                                                           \
-        } else if (a->size != MO_32) {                                  \
-            return false;                                               \
-        }                                                               \
-        return do_2misc_vec(s, a, gen_##INSN);                          \
-    }
-
-DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
-DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
-DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
-DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
-DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
-DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
-DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
-DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
-DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
-DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
-DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
-
-DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
-
-static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
-        return false;
-    }
-    return trans_VRINTX_impl(s, a);
-}
-
-#define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
-    static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
-                           uint32_t rm_ofs,                             \
-                           uint32_t oprsz, uint32_t maxsz)              \
-    {                                                                   \
-        static gen_helper_gvec_2_ptr * const fns[4] = {                 \
-            NULL,                                                       \
-            gen_helper_gvec_##OP##h,                                    \
-            gen_helper_gvec_##OP##s,                                    \
-            NULL,                                                       \
-        };                                                              \
-        TCGv_ptr fpst;                                                  \
-        fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
-        tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
-                           arm_rmode_to_sf(RMODE), fns[vece]);          \
-        tcg_temp_free_ptr(fpst);                                        \
-    }                                                                   \
-    static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
-    {                                                                   \
-        if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
-            return false;                                               \
-        }                                                               \
-        if (a->size == MO_16) {                                         \
-            if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
-                return false;                                           \
-            }                                                           \
-        } else if (a->size != MO_32) {                                  \
-            return false;                                               \
-        }                                                               \
-        return do_2misc_vec(s, a, gen_##INSN);                          \
-    }
-
-DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
-DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
-DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
-DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
-DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
-DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
-DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
-DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
-
-DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
-DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
-DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
-DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
-DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
-
-static bool trans_VSWP(DisasContext *s, arg_2misc *a)
-{
-    TCGv_i64 rm, rd;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (a->size != 0) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    rm = tcg_temp_new_i64();
-    rd = tcg_temp_new_i64();
-    for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
-        read_neon_element64(rm, a->vm, pass, MO_64);
-        read_neon_element64(rd, a->vd, pass, MO_64);
-        write_neon_element64(rm, a->vd, pass, MO_64);
-        write_neon_element64(rd, a->vm, pass, MO_64);
-    }
-    tcg_temp_free_i64(rm);
-    tcg_temp_free_i64(rd);
-
-    return true;
-}
-static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
-{
-    TCGv_i32 rd, tmp;
-
-    rd = tcg_temp_new_i32();
-    tmp = tcg_temp_new_i32();
-
-    tcg_gen_shli_i32(rd, t0, 8);
-    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
-    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
-    tcg_gen_or_i32(rd, rd, tmp);
-
-    tcg_gen_shri_i32(t1, t1, 8);
-    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
-    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
-    tcg_gen_or_i32(t1, t1, tmp);
-    tcg_gen_mov_i32(t0, rd);
-
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(rd);
-}
-
-static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
-{
-    TCGv_i32 rd, tmp;
-
-    rd = tcg_temp_new_i32();
-    tmp = tcg_temp_new_i32();
-
-    tcg_gen_shli_i32(rd, t0, 16);
-    tcg_gen_andi_i32(tmp, t1, 0xffff);
-    tcg_gen_or_i32(rd, rd, tmp);
-    tcg_gen_shri_i32(t1, t1, 16);
-    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
-    tcg_gen_or_i32(t1, t1, tmp);
-    tcg_gen_mov_i32(t0, rd);
-
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(rd);
-}
-
-static bool trans_VTRN(DisasContext *s, arg_2misc *a)
-{
-    TCGv_i32 tmp, tmp2;
-    int pass;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if ((a->vd | a->vm) & a->q) {
-        return false;
-    }
-
-    if (a->size == 3) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    tmp2 = tcg_temp_new_i32();
-    if (a->size == MO_32) {
-        for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
-            read_neon_element32(tmp, a->vm, pass, MO_32);
-            read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
-            write_neon_element32(tmp2, a->vm, pass, MO_32);
-            write_neon_element32(tmp, a->vd, pass + 1, MO_32);
-        }
-    } else {
-        for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
-            read_neon_element32(tmp, a->vm, pass, MO_32);
-            read_neon_element32(tmp2, a->vd, pass, MO_32);
-            if (a->size == MO_8) {
-                gen_neon_trn_u8(tmp, tmp2);
-            } else {
-                gen_neon_trn_u16(tmp, tmp2);
-            }
-            write_neon_element32(tmp2, a->vm, pass, MO_32);
-            write_neon_element32(tmp, a->vd, pass, MO_32);
-        }
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(tmp2);
-    return true;
-}
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index b7c70b10d61..2daec76bf8c 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -30,7 +30,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/log.h"
-#include "trace-tcg.h"
 #include "translate-a64.h"
 #include "fpu/softfloat.h"
 
@@ -71,11 +70,6 @@ static int tszimm_shl(DisasContext *s, int x)
     return x - (8 << tszimm_esz(s, x));
 }
 
-static inline int plus1(DisasContext *s, int x)
-{
-    return x + 1;
-}
-
 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
 static inline int expand_imm_sh8s(DisasContext *s, int x)
 {
@@ -163,6 +157,18 @@ static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
                        vsz, vsz, data, fn);
 }
 
+/* Invoke an out-of-line helper on 4 Zregs. */
+static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
+                              int rd, int rn, int rm, int ra, int data)
+{
+    unsigned vsz = vec_full_reg_size(s);
+    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       vec_full_reg_offset(s, ra),
+                       vsz, vsz, data, fn);
+}
+
 /* Invoke an out-of-line helper on 2 Zregs and a predicate. */
 static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
                              int rd, int rn, int pg, int data)
@@ -205,6 +211,17 @@ static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
             vec_full_reg_offset(s, rm), vsz, vsz);
 }
 
+/* Invoke a vector expander on four Zregs.  */
+static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
+                             int esz, int rd, int rn, int rm, int ra)
+{
+    unsigned vsz = vec_full_reg_size(s);
+    gvec_fn(esz, vec_full_reg_offset(s, rd),
+            vec_full_reg_offset(s, rn),
+            vec_full_reg_offset(s, rm),
+            vec_full_reg_offset(s, ra), vsz, vsz);
+}
+
 /* Invoke a vector move on two Zregs.  */
 static bool do_mov_z(DisasContext *s, int rd, int rn)
 {
@@ -317,6 +334,312 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
     return do_zzz_fn(s, a, tcg_gen_gvec_andc);
 }
 
+static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+    uint64_t mask = dup_const(MO_8, 0xff >> sh);
+
+    tcg_gen_xor_i64(t, n, m);
+    tcg_gen_shri_i64(d, t, sh);
+    tcg_gen_shli_i64(t, t, 8 - sh);
+    tcg_gen_andi_i64(d, d, mask);
+    tcg_gen_andi_i64(t, t, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+    uint64_t mask = dup_const(MO_16, 0xffff >> sh);
+
+    tcg_gen_xor_i64(t, n, m);
+    tcg_gen_shri_i64(d, t, sh);
+    tcg_gen_shli_i64(t, t, 16 - sh);
+    tcg_gen_andi_i64(d, d, mask);
+    tcg_gen_andi_i64(t, t, ~mask);
+    tcg_gen_or_i64(d, d, t);
+    tcg_temp_free_i64(t);
+}
+
+static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
+{
+    tcg_gen_xor_i32(d, n, m);
+    tcg_gen_rotri_i32(d, d, sh);
+}
+
+static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+    tcg_gen_xor_i64(d, n, m);
+    tcg_gen_rotri_i64(d, d, sh);
+}
+
+static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                        TCGv_vec m, int64_t sh)
+{
+    tcg_gen_xor_vec(vece, d, n, m);
+    tcg_gen_rotri_vec(vece, d, d, sh);
+}
+
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+                  uint32_t rm_ofs, int64_t shift,
+                  uint32_t opr_sz, uint32_t max_sz)
+{
+    static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
+    static const GVecGen3i ops[4] = {
+        { .fni8 = gen_xar8_i64,
+          .fniv = gen_xar_vec,
+          .fno = gen_helper_sve2_xar_b,
+          .opt_opc = vecop,
+          .vece = MO_8 },
+        { .fni8 = gen_xar16_i64,
+          .fniv = gen_xar_vec,
+          .fno = gen_helper_sve2_xar_h,
+          .opt_opc = vecop,
+          .vece = MO_16 },
+        { .fni4 = gen_xar_i32,
+          .fniv = gen_xar_vec,
+          .fno = gen_helper_sve2_xar_s,
+          .opt_opc = vecop,
+          .vece = MO_32 },
+        { .fni8 = gen_xar_i64,
+          .fniv = gen_xar_vec,
+          .fno = gen_helper_gvec_xar_d,
+          .opt_opc = vecop,
+          .vece = MO_64 }
+    };
+    int esize = 8 << vece;
+
+    /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
+    tcg_debug_assert(shift >= 0);
+    tcg_debug_assert(shift <= esize);
+    shift &= esize - 1;
+
+    if (shift == 0) {
+        /* xar with no rotate devolves to xor. */
+        tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
+    } else {
+        tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
+                        shift, &ops[vece]);
+    }
+}
+
+static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
+{
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
+                     vec_full_reg_offset(s, a->rn),
+                     vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
+    }
+    return true;
+}
+
+static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
+    }
+    return true;
+}
+
+static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+    tcg_gen_xor_i64(d, n, m);
+    tcg_gen_xor_i64(d, d, k);
+}
+
+static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                         TCGv_vec m, TCGv_vec k)
+{
+    tcg_gen_xor_vec(vece, d, n, m);
+    tcg_gen_xor_vec(vece, d, d, k);
+}
+
+static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen4 op = {
+        .fni8 = gen_eor3_i64,
+        .fniv = gen_eor3_vec,
+        .fno = gen_helper_sve2_eor3,
+        .vece = MO_64,
+        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    };
+    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_eor3);
+}
+
+static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+    tcg_gen_andc_i64(d, m, k);
+    tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                         TCGv_vec m, TCGv_vec k)
+{
+    tcg_gen_andc_vec(vece, d, m, k);
+    tcg_gen_xor_vec(vece, d, d, n);
+}
+
+static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen4 op = {
+        .fni8 = gen_bcax_i64,
+        .fniv = gen_bcax_vec,
+        .fno = gen_helper_sve2_bcax,
+        .vece = MO_64,
+        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    };
+    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_bcax);
+}
+
+static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    /* BSL differs from the generic bitsel in argument ordering. */
+    tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
+}
+
+static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_bsl);
+}
+
+static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+    tcg_gen_andc_i64(n, k, n);
+    tcg_gen_andc_i64(m, m, k);
+    tcg_gen_or_i64(d, n, m);
+}
+
+static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                          TCGv_vec m, TCGv_vec k)
+{
+    if (TCG_TARGET_HAS_bitsel_vec) {
+        tcg_gen_not_vec(vece, n, n);
+        tcg_gen_bitsel_vec(vece, d, k, n, m);
+    } else {
+        tcg_gen_andc_vec(vece, n, k, n);
+        tcg_gen_andc_vec(vece, m, m, k);
+        tcg_gen_or_vec(vece, d, n, m);
+    }
+}
+
+static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen4 op = {
+        .fni8 = gen_bsl1n_i64,
+        .fniv = gen_bsl1n_vec,
+        .fno = gen_helper_sve2_bsl1n,
+        .vece = MO_64,
+        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    };
+    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_bsl1n);
+}
+
+static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+    /*
+     * Z[dn] = (n & k) | (~m & ~k)
+     *       =         | ~(m | k)
+     */
+    tcg_gen_and_i64(n, n, k);
+    if (TCG_TARGET_HAS_orc_i64) {
+        tcg_gen_or_i64(m, m, k);
+        tcg_gen_orc_i64(d, n, m);
+    } else {
+        tcg_gen_nor_i64(m, m, k);
+        tcg_gen_or_i64(d, n, m);
+    }
+}
+
+static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                          TCGv_vec m, TCGv_vec k)
+{
+    if (TCG_TARGET_HAS_bitsel_vec) {
+        tcg_gen_not_vec(vece, m, m);
+        tcg_gen_bitsel_vec(vece, d, k, n, m);
+    } else {
+        tcg_gen_and_vec(vece, n, n, k);
+        tcg_gen_or_vec(vece, m, m, k);
+        tcg_gen_orc_vec(vece, d, n, m);
+    }
+}
+
+static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen4 op = {
+        .fni8 = gen_bsl2n_i64,
+        .fniv = gen_bsl2n_vec,
+        .fno = gen_helper_sve2_bsl2n,
+        .vece = MO_64,
+        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    };
+    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_bsl2n);
+}
+
+static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+    tcg_gen_and_i64(n, n, k);
+    tcg_gen_andc_i64(m, m, k);
+    tcg_gen_nor_i64(d, n, m);
+}
+
+static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+                          TCGv_vec m, TCGv_vec k)
+{
+    tcg_gen_bitsel_vec(vece, d, k, n, m);
+    tcg_gen_not_vec(vece, d, d);
+}
+
+static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen4 op = {
+        .fni8 = gen_nbsl_i64,
+        .fniv = gen_nbsl_vec,
+        .fno = gen_helper_sve2_nbsl,
+        .vece = MO_64,
+        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+    };
+    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sve2_zzzz_fn(s, a, gen_nbsl);
+}
+
 /*
  *** SVE Integer Arithmetic - Unpredicated Group
  */
@@ -715,6 +1038,66 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
     }
 }
 
+static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
+        gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
+    };
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
+        gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
+    };
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
+        gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
+    };
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
+        gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
+    };
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
+        gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
+    };
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzi_ool(s, a, fns[a->esz]);
+}
+
 /*
  *** SVE Bitwise Shift - Predicated Group
  */
@@ -1560,20 +1943,20 @@ static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
 {
     TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
     TCGv_i64 t2;
 
     if (u) {
         if (d) {
             tcg_gen_sub_i64(t0, reg, val);
-            tcg_gen_movi_i64(t1, 0);
-            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
+            t2 = tcg_constant_i64(0);
+            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
         } else {
             tcg_gen_add_i64(t0, reg, val);
-            tcg_gen_movi_i64(t1, -1);
-            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
+            t2 = tcg_constant_i64(-1);
+            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
         }
     } else {
+        TCGv_i64 t1 = tcg_temp_new_i64();
         if (d) {
             /* Detect signed overflow for subtraction.  */
             tcg_gen_xor_i64(t0, reg, val);
@@ -1583,7 +1966,7 @@ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
 
             /* Bound the result.  */
             tcg_gen_movi_i64(reg, INT64_MIN);
-            t2 = tcg_const_i64(0);
+            t2 = tcg_constant_i64(0);
             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
         } else {
             /* Detect signed overflow for addition.  */
@@ -1594,13 +1977,12 @@ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
 
             /* Bound the result.  */
             tcg_gen_movi_i64(t1, INT64_MAX);
-            t2 = tcg_const_i64(0);
+            t2 = tcg_constant_i64(0);
             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
         }
-        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t1);
     }
     tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
 }
 
 /* Similarly with a vector and a scalar operand.  */
@@ -1937,18 +2319,18 @@ static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
  *** SVE Permute Extract Group
  */
 
-static bool trans_EXT(DisasContext *s, arg_EXT *a)
+static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
 {
     if (!sve_access_check(s)) {
         return true;
     }
 
     unsigned vsz = vec_full_reg_size(s);
-    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
+    unsigned n_ofs = imm >= vsz ? 0 : imm;
     unsigned n_siz = vsz - n_ofs;
-    unsigned d = vec_full_reg_offset(s, a->rd);
-    unsigned n = vec_full_reg_offset(s, a->rn);
-    unsigned m = vec_full_reg_offset(s, a->rm);
+    unsigned d = vec_full_reg_offset(s, rd);
+    unsigned n = vec_full_reg_offset(s, rn);
+    unsigned m = vec_full_reg_offset(s, rm);
 
     /* Use host vector move insns if we have appropriate sizes
      * and no unfortunate overlap.
@@ -1967,6 +2349,19 @@ static bool trans_EXT(DisasContext *s, arg_EXT *a)
     return true;
 }
 
+static bool trans_EXT(DisasContext *s, arg_EXT *a)
+{
+    return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
+}
+
+static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
+}
+
 /*
  *** SVE Permute - Unpredicated Group
  */
@@ -2075,6 +2470,39 @@ static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
     return true;
 }
 
+static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[4] = {
+        gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
+        gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
+    };
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
+                          (a->rn + 1) % 32, a->rm, 0);
+    }
+    return true;
+}
+
+static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
+        gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
+    };
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
+    }
+    return true;
+}
+
 static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
 {
     static gen_helper_gvec_2 * const fns[4][2] = {
@@ -2249,6 +2677,32 @@ static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
     return do_zip(s, a, true);
 }
 
+static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn) + high_ofs,
+                           vec_full_reg_offset(s, a->rm) + high_ofs,
+                           vsz, vsz, 0, gen_helper_sve2_zip_q);
+    }
+    return true;
+}
+
+static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_zip_q(s, a, false);
+}
+
+static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_zip_q(s, a, true);
+}
+
 static gen_helper_gvec_3 * const uzp_fns[4] = {
     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
@@ -2264,6 +2718,22 @@ static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
 }
 
+static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
+}
+
+static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
+}
+
 static gen_helper_gvec_3 * const trn_fns[4] = {
     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
@@ -2279,10 +2749,26 @@ static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
 }
 
-/*
- *** SVE Permute Vector - Predicated Group
- */
-
+static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
+}
+
+static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
+}
+
+/*
+ *** SVE Permute Vector - Predicated Group
+ */
+
 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
 {
     static gen_helper_gvec_3 * const fns[4] = {
@@ -2684,6 +3170,18 @@ static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
     return true;
 }
 
+static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
+                          a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
+    }
+    return true;
+}
+
 /*
  *** SVE Integer Compare - Vectors Group
  */
@@ -3100,7 +3598,14 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     unsigned vsz = vec_full_reg_size(s);
     unsigned desc = 0;
     TCGCond cond;
+    uint64_t maxval;
+    /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
+    bool eq = a->eq == a->lt;
 
+    /* The greater-than conditions are all SVE2. */
+    if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
     if (!sve_access_check(s)) {
         return true;
     }
@@ -3123,22 +3628,42 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
      */
     t0 = tcg_temp_new_i64();
     t1 = tcg_temp_new_i64();
-    tcg_gen_sub_i64(t0, op1, op0);
+
+    if (a->lt) {
+        tcg_gen_sub_i64(t0, op1, op0);
+        if (a->u) {
+            maxval = a->sf ? UINT64_MAX : UINT32_MAX;
+            cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
+        } else {
+            maxval = a->sf ? INT64_MAX : INT32_MAX;
+            cond = eq ? TCG_COND_LE : TCG_COND_LT;
+        }
+    } else {
+        tcg_gen_sub_i64(t0, op0, op1);
+        if (a->u) {
+            maxval = 0;
+            cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
+        } else {
+            maxval = a->sf ? INT64_MIN : INT32_MIN;
+            cond = eq ? TCG_COND_GE : TCG_COND_GT;
+        }
+    }
 
     tmax = tcg_const_i64(vsz >> a->esz);
-    if (a->eq) {
+    if (eq) {
         /* Equality means one more iteration.  */
         tcg_gen_addi_i64(t0, t0, 1);
 
-        /* If op1 is max (un)signed integer (and the only time the addition
-         * above could overflow), then we produce an all-true predicate by
-         * setting the count to the vector length.  This is because the
-         * pseudocode is described as an increment + compare loop, and the
-         * max integer would always compare true.
+        /*
+         * For the less-than while, if op1 is maxval (and the only time
+         * the addition above could overflow), then we produce an all-true
+         * predicate by setting the count to the vector length.  This is
+         * because the pseudocode is described as an increment + compare
+         * loop, and the maximum integer would always compare true.
+         * Similarly, the greater-than while has the same issue with the
+         * minimum integer due to the decrement + compare loop.
          */
-        tcg_gen_movi_i64(t1, (a->sf
-                              ? (a->u ? UINT64_MAX : INT64_MAX)
-                              : (a->u ? UINT32_MAX : INT32_MAX)));
+        tcg_gen_movi_i64(t1, maxval);
         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
     }
 
@@ -3147,9 +3672,6 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     tcg_temp_free_i64(tmax);
 
     /* Set the count to zero if the condition is false.  */
-    cond = (a->u
-            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
-            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
     tcg_gen_movi_i64(t1, 0);
     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
     tcg_temp_free_i64(t1);
@@ -3169,7 +3691,78 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     ptr = tcg_temp_new_ptr();
     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
 
-    gen_helper_sve_while(t2, ptr, t2, t3);
+    if (a->lt) {
+        gen_helper_sve_whilel(t2, ptr, t2, t3);
+    } else {
+        gen_helper_sve_whileg(t2, ptr, t2, t3);
+    }
+    do_pred_flags(t2);
+
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+    return true;
+}
+
+static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
+{
+    TCGv_i64 op0, op1, diff, t1, tmax;
+    TCGv_i32 t2, t3;
+    TCGv_ptr ptr;
+    unsigned vsz = vec_full_reg_size(s);
+    unsigned desc = 0;
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    op0 = read_cpu_reg(s, a->rn, 1);
+    op1 = read_cpu_reg(s, a->rm, 1);
+
+    tmax = tcg_const_i64(vsz);
+    diff = tcg_temp_new_i64();
+
+    if (a->rw) {
+        /* WHILERW */
+        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
+        t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(diff, op0, op1);
+        tcg_gen_sub_i64(t1, op1, op0);
+        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
+        tcg_temp_free_i64(t1);
+        /* Round down to a multiple of ESIZE.  */
+        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+        /* If op1 == op0, diff == 0, and the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
+    } else {
+        /* WHILEWR */
+        tcg_gen_sub_i64(diff, op1, op0);
+        /* Round down to a multiple of ESIZE.  */
+        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+        /* If op0 >= op1, diff <= 0, the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
+    }
+
+    /* Bound to the maximum.  */
+    tcg_gen_umin_i64(diff, diff, tmax);
+    tcg_temp_free_i64(tmax);
+
+    /* Since we're bounded, pass as a 32-bit type.  */
+    t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, diff);
+    tcg_temp_free_i64(diff);
+
+    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
+    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+    t3 = tcg_const_i32(desc);
+
+    ptr = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
+
+    gen_helper_sve_whilel(t2, ptr, t2, t3);
     do_pred_flags(t2);
 
     tcg_temp_free_ptr(ptr);
@@ -3351,38 +3944,221 @@ DO_ZZI(UMIN, umin)
 
 #undef DO_ZZI
 
-static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
+static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
 {
-    static gen_helper_gvec_3 * const fns[2][2] = {
+    static gen_helper_gvec_4 * const fns[2][2] = {
         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
     };
 
     if (sve_access_check(s)) {
-        gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, 0);
+        gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
     }
     return true;
 }
 
-static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
+/*
+ * SVE Multiply - Indexed
+ */
+
+static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
+                        gen_helper_gvec_4 *fn)
 {
-    static gen_helper_gvec_3 * const fns[2][2] = {
-        { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
-        { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
-    };
+    if (fn == NULL) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
+    }
+    return true;
+}
+
+#define DO_RRXR(NAME, FUNC) \
+    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
+    { return do_zzxz_ool(s, a, FUNC); }
+
+DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
+DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
+DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
+DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
+
+static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
+        return false;
+    }
+    return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
+}
+
+static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
+        return false;
+    }
+    return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
+}
+
+#undef DO_RRXR
+
+static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
+                             gen_helper_gvec_3 *fn)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+                           vec_full_reg_offset(s, rn),
+                           vec_full_reg_offset(s, rm),
+                           vsz, vsz, data, fn);
+    }
+    return true;
+}
+
+#define DO_SVE2_RRX(NAME, FUNC) \
+    static bool NAME(DisasContext *s, arg_rrx_esz *a)  \
+    { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
+
+DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
+DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
+DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
 
+DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
+DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
+DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
+
+DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
+DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
+DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
+
+#undef DO_SVE2_RRX
+
+#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
+    static bool NAME(DisasContext *s, arg_rrx_esz *a)           \
+    {                                                           \
+        return do_sve2_zzz_data(s, a->rd, a->rn, a->rm,         \
+                                (a->index << 1) | TOP, FUNC);   \
+    }
+
+DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
+DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
+DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
+DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
+
+DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
+DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
+DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
+DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
+
+DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
+DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
+DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
+DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
+
+#undef DO_SVE2_RRX_TB
+
+static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
+                              int data, gen_helper_gvec_4 *fn)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
     if (sve_access_check(s)) {
-        gen_gvec_ool_zzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->index);
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+                           vec_full_reg_offset(s, rn),
+                           vec_full_reg_offset(s, rm),
+                           vec_full_reg_offset(s, ra),
+                           vsz, vsz, data, fn);
     }
     return true;
 }
 
+#define DO_SVE2_RRXR(NAME, FUNC) \
+    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
+    { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
+
+DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
+DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
+DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
+
+DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
+DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
+DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
+
+DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
+DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
+DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
+
+DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
+DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
+DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
+
+#undef DO_SVE2_RRXR
+
+#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
+    static bool NAME(DisasContext *s, arg_rrxr_esz *a)          \
+    {                                                           \
+        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
+                                 (a->index << 1) | TOP, FUNC);  \
+    }
+
+DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
+DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
+DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
+DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
+DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
+DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
+DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
+
+DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
+DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
+DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
+DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
+DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
+DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
+DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
+DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
+DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
+DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
+
+DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
+DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
+DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
+DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
+
+#undef DO_SVE2_RRXR_TB
+
+#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
+    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)       \
+    {                                                              \
+        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra,    \
+                                 (a->index << 2) | a->rot, FUNC);  \
+    }
+
+DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
+DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
+
+DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
+DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
+
+DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
+DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
+
+#undef DO_SVE2_RRXR_ROT
 
 /*
  *** SVE Floating Point Multiply-Add Indexed Group
  */
 
-static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
+static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
 {
     static gen_helper_gvec_4_ptr * const fns[3] = {
         gen_helper_gvec_fmla_idx_h,
@@ -3397,13 +4173,23 @@ static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
                            vec_full_reg_offset(s, a->ra),
-                           status, vsz, vsz, (a->index << 1) | a->sub,
+                           status, vsz, vsz, (a->index << 1) | sub,
                            fns[a->esz - 1]);
         tcg_temp_free_ptr(status);
     }
     return true;
 }
 
+static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
+{
+    return do_FMLA_zzxz(s, a, false);
+}
+
+static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
+{
+    return do_FMLA_zzxz(s, a, true);
+}
+
 /*
  *** SVE Floating Point Multiply Indexed Group
  */
@@ -3933,7 +4719,7 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
 
 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
 {
-    static gen_helper_gvec_3_ptr * const fns[2] = {
+    static gen_helper_gvec_4_ptr * const fns[2] = {
         gen_helper_gvec_fcmlah_idx,
         gen_helper_gvec_fcmlas_idx,
     };
@@ -3943,9 +4729,10 @@ static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
     if (sve_access_check(s)) {
         unsigned vsz = vec_full_reg_size(s);
         TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
-        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
                            status, vsz, vsz,
                            a->index * 4 + a->rot,
                            fns[a->esz - 1]);
@@ -3983,6 +4770,14 @@ static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
 }
 
+static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
+}
+
 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
 {
     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
@@ -4101,11 +4896,9 @@ static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
 }
 
-static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
+static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
+                          int mode, gen_helper_gvec_3_ptr *fn)
 {
-    if (a->esz == 0) {
-        return false;
-    }
     if (sve_access_check(s)) {
         unsigned vsz = vec_full_reg_size(s);
         TCGv_i32 tmode = tcg_const_i32(mode);
@@ -4116,7 +4909,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            pred_full_reg_offset(s, a->pg),
-                           status, vsz, vsz, 0, frint_fns[a->esz - 1]);
+                           status, vsz, vsz, 0, fn);
 
         gen_helper_set_rmode(tmode, tmode, status);
         tcg_temp_free_i32(tmode);
@@ -4127,27 +4920,42 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
 
 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
 {
-    return do_frint_mode(s, a, float_round_nearest_even);
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
 }
 
 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
 {
-    return do_frint_mode(s, a, float_round_up);
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
 }
 
 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
 {
-    return do_frint_mode(s, a, float_round_down);
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
 }
 
 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
 {
-    return do_frint_mode(s, a, float_round_to_zero);
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
 }
 
 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
 {
-    return do_frint_mode(s, a, float_round_ties_away);
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
 }
 
 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
@@ -4266,7 +5074,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     dirty_addr = tcg_temp_new_i64();
     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
     clean_addr = gen_mte_and_cheri_checkN(s, dirty_addr, true, false, rn != 31,
-                                          len, MO_8, rn, false, true);
+                                          len, rn, false, true);
     tcg_temp_free_i64(dirty_addr);
 
     /*
@@ -4358,7 +5166,7 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     dirty_addr = tcg_temp_new_i64();
     tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
     clean_addr = gen_mte_and_cheri_checkN(s, dirty_addr, false, true, rn != 31,
-                                          len, MO_8, rn, false, true);
+                                          len, rn, false, true);
     tcg_temp_free_i64(dirty_addr);
 
     /* Note that unpredicated load/store of vector/predicate registers
@@ -4517,8 +5325,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
-        desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
-        desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
+        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
         desc <<= SVE_MTEDESC_SHIFT;
     } else {
         addr = clean_data_tbi(s, addr);
@@ -4535,128 +5342,130 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
     tcg_temp_free_i32(t_desc);
 }
 
+/* Indexed by [mte][be][dtype][nreg] */
+static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
+    { /* mte inactive, little-endian */
+      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+          gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+        { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+          gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+        { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+          gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+      /* mte inactive, big-endian */
+      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+          gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+        { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+          gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+        { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+        { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+          gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
+
+    { /* mte active, little-endian */
+      { { gen_helper_sve_ld1bb_r_mte,
+          gen_helper_sve_ld2bb_r_mte,
+          gen_helper_sve_ld3bb_r_mte,
+          gen_helper_sve_ld4bb_r_mte },
+        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hh_le_r_mte,
+          gen_helper_sve_ld2hh_le_r_mte,
+          gen_helper_sve_ld3hh_le_r_mte,
+          gen_helper_sve_ld4hh_le_r_mte },
+        { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1ss_le_r_mte,
+          gen_helper_sve_ld2ss_le_r_mte,
+          gen_helper_sve_ld3ss_le_r_mte,
+          gen_helper_sve_ld4ss_le_r_mte },
+        { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1dd_le_r_mte,
+          gen_helper_sve_ld2dd_le_r_mte,
+          gen_helper_sve_ld3dd_le_r_mte,
+          gen_helper_sve_ld4dd_le_r_mte } },
+
+      /* mte active, big-endian */
+      { { gen_helper_sve_ld1bb_r_mte,
+          gen_helper_sve_ld2bb_r_mte,
+          gen_helper_sve_ld3bb_r_mte,
+          gen_helper_sve_ld4bb_r_mte },
+        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hh_be_r_mte,
+          gen_helper_sve_ld2hh_be_r_mte,
+          gen_helper_sve_ld3hh_be_r_mte,
+          gen_helper_sve_ld4hh_be_r_mte },
+        { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1ss_be_r_mte,
+          gen_helper_sve_ld2ss_be_r_mte,
+          gen_helper_sve_ld3ss_be_r_mte,
+          gen_helper_sve_ld4ss_be_r_mte },
+        { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
+
+        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+        { gen_helper_sve_ld1dd_be_r_mte,
+          gen_helper_sve_ld2dd_be_r_mte,
+          gen_helper_sve_ld3dd_be_r_mte,
+          gen_helper_sve_ld4dd_be_r_mte } } },
+};
+
 static void do_ld_zpa(DisasContext *s, int zt, int pg,
                       TCGv_i64 addr, int dtype, int nreg)
 {
-    static gen_helper_gvec_mem * const fns[2][2][16][4] = {
-        { /* mte inactive, little-endian */
-          { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
-            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
-            { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
-              gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
-            { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
-              gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
-            { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
-              gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
-
-          /* mte inactive, big-endian */
-          { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
-              gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
-            { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
-              gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
-            { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
-              gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
-            { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
-            { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
-              gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
-
-        { /* mte active, little-endian */
-          { { gen_helper_sve_ld1bb_r_mte,
-              gen_helper_sve_ld2bb_r_mte,
-              gen_helper_sve_ld3bb_r_mte,
-              gen_helper_sve_ld4bb_r_mte },
-            { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hh_le_r_mte,
-              gen_helper_sve_ld2hh_le_r_mte,
-              gen_helper_sve_ld3hh_le_r_mte,
-              gen_helper_sve_ld4hh_le_r_mte },
-            { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1ss_le_r_mte,
-              gen_helper_sve_ld2ss_le_r_mte,
-              gen_helper_sve_ld3ss_le_r_mte,
-              gen_helper_sve_ld4ss_le_r_mte },
-            { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1dd_le_r_mte,
-              gen_helper_sve_ld2dd_le_r_mte,
-              gen_helper_sve_ld3dd_le_r_mte,
-              gen_helper_sve_ld4dd_le_r_mte } },
-
-          /* mte active, big-endian */
-          { { gen_helper_sve_ld1bb_r_mte,
-              gen_helper_sve_ld2bb_r_mte,
-              gen_helper_sve_ld3bb_r_mte,
-              gen_helper_sve_ld4bb_r_mte },
-            { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hh_be_r_mte,
-              gen_helper_sve_ld2hh_be_r_mte,
-              gen_helper_sve_ld3hh_be_r_mte,
-              gen_helper_sve_ld4hh_be_r_mte },
-            { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1ss_be_r_mte,
-              gen_helper_sve_ld2ss_be_r_mte,
-              gen_helper_sve_ld3ss_be_r_mte,
-              gen_helper_sve_ld4ss_be_r_mte },
-            { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
-
-            { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
-            { gen_helper_sve_ld1dd_be_r_mte,
-              gen_helper_sve_ld2dd_be_r_mte,
-              gen_helper_sve_ld3dd_be_r_mte,
-              gen_helper_sve_ld4dd_be_r_mte } } },
-    };
     gen_helper_gvec_mem *fn
-        = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
+        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
 
     /*
      * While there are holes in the table, they are not
@@ -4894,23 +5703,13 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
     return true;
 }
 
-static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
+static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
 {
-    static gen_helper_gvec_mem * const fns[2][4] = {
-        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
-          gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
-        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
-          gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
-    };
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
-    TCGv_i32 t_desc;
-    int desc, poff;
+    int poff;
 
     /* Load the first quadword using the normal predicated load helpers.  */
-    desc = simd_desc(16, 16, zt);
-    t_desc = tcg_const_i32(desc);
-
     poff = pred_full_reg_offset(s, pg);
     if (vsz > 16) {
         /*
@@ -4933,15 +5732,16 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
     t_pg = tcg_temp_new_ptr();
     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
 
-    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
+    gen_helper_gvec_mem *fn
+        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
+    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
 
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(t_desc);
 
     /* Replicate that first quadword.  */
     if (vsz > 16) {
-        unsigned dofs = vec_full_reg_offset(s, zt);
-        tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
+        int doff = vec_full_reg_offset(s, zt);
+        tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
     }
 }
 
@@ -4955,7 +5755,7 @@ static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
         TCGv_i64 addr = new_tmp_a64(s);
         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
-        do_ldrq(s, a->rd, a->pg, addr, msz);
+        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
     }
     return true;
 }
@@ -4965,7 +5765,100 @@ static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
     if (sve_access_check(s)) {
         TCGv_i64 addr = new_tmp_a64(s);
         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
-        do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
+        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
+    }
+    return true;
+}
+
+static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
+{
+    unsigned vsz = vec_full_reg_size(s);
+    unsigned vsz_r32;
+    TCGv_ptr t_pg;
+    int poff, doff;
+
+    if (vsz < 32) {
+        /*
+         * Note that this UNDEFINED check comes after CheckSVEEnabled()
+         * in the ARM pseudocode, which is the sve_access_check() done
+         * in our caller.  We should not now return false from the caller.
+         */
+        unallocated_encoding(s);
+        return;
+    }
+
+    /* Load the first octaword using the normal predicated load helpers.  */
+
+    poff = pred_full_reg_offset(s, pg);
+    if (vsz > 32) {
+        /*
+         * Zero-extend the first 32 bits of the predicate into a temporary.
+         * This avoids triggering an assert making sure we don't have bits
+         * set within a predicate beyond VQ, but we have lowered VQ to 2
+         * for this load operation.
+         */
+        TCGv_i64 tmp = tcg_temp_new_i64();
+#ifdef HOST_WORDS_BIGENDIAN
+        poff += 4;
+#endif
+        tcg_gen_ld32u_i64(tmp, cpu_env, poff);
+
+        poff = offsetof(CPUARMState, vfp.preg_tmp);
+        tcg_gen_st_i64(tmp, cpu_env, poff);
+        tcg_temp_free_i64(tmp);
+    }
+
+    t_pg = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
+
+    gen_helper_gvec_mem *fn
+        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
+    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
+
+    tcg_temp_free_ptr(t_pg);
+
+    /*
+     * Replicate that first octaword.
+     * The replication happens in units of 32; if the full vector size
+     * is not a multiple of 32, the final bits are zeroed.
+     */
+    doff = vec_full_reg_offset(s, zt);
+    vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
+    if (vsz >= 64) {
+        tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
+    }
+    vsz -= vsz_r32;
+    if (vsz) {
+        tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
+    }
+}
+
+static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    if (a->rm == 31) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_i64 addr = new_tmp_a64(s);
+        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
+        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+        do_ldro(s, a->rd, a->pg, addr, a->dtype);
+    }
+    return true;
+}
+
+static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
+{
+    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_i64 addr = new_tmp_a64(s);
+        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
+        do_ldro(s, a->rd, a->pg, addr, a->dtype);
     }
     return true;
 }
@@ -5012,7 +5905,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
                                           a->rn, false, true);
 
     tcg_gen_qemu_ld_i64_with_checked_addr(temp, clean_addr, get_mem_index(s),
-                                          s->be_data | dtype_mop[a->dtype]);
+                        finalize_memop(s, dtype_mop[a->dtype]));
 
     /* Broadcast to *all* elements.  */
     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
@@ -5199,7 +6092,7 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
-        desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
+        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
         desc <<= SVE_MTEDESC_SHIFT;
     }
     desc = simd_desc(vsz, vsz, desc | scale);
@@ -5602,6 +6495,14 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
     return true;
 }
 
+static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return trans_LD1_zprz(s, a);
+}
+
 /* Indexed by [mte][be][xs][msz].  */
 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
     { /* MTE Inactive */
@@ -5754,6 +6655,14 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
     return true;
 }
 
+static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return trans_ST1_zprz(s, a);
+}
+
 /*
  * Prefetches
  */
@@ -5806,3 +6715,2040 @@ static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
 {
     return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
 }
+
+/*
+ * SVE2 Integer Multiply - Unpredicated
+ */
+
+static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
+    }
+    return true;
+}
+
+static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
+                            gen_helper_gvec_3 *fn)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
+    }
+    return true;
+}
+
+static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
+        gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
+    };
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
+        gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
+    };
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
+}
+
+static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
+        gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
+    };
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
+        gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
+    };
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
+
+/*
+ * SVE2 Integer - Predicated
+ */
+
+static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
+                             gen_helper_gvec_4 *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzz_ool(s, a, fn);
+}
+
+static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[3] = {
+        gen_helper_sve2_sadalp_zpzz_h,
+        gen_helper_sve2_sadalp_zpzz_s,
+        gen_helper_sve2_sadalp_zpzz_d,
+    };
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
+}
+
+static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[3] = {
+        gen_helper_sve2_uadalp_zpzz_h,
+        gen_helper_sve2_uadalp_zpzz_s,
+        gen_helper_sve2_uadalp_zpzz_d,
+    };
+    if (a->esz == 0) {
+        return false;
+    }
+    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
+}
+
+/*
+ * SVE2 integer unary operations (predicated)
+ */
+
+static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
+                            gen_helper_gvec_3 *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpz_ool(s, a, fn);
+}
+
+static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
+{
+    if (a->esz != 2) {
+        return false;
+    }
+    return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
+}
+
+static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
+{
+    if (a->esz != 2) {
+        return false;
+    }
+    return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
+}
+
+static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
+        gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
+    };
+    return do_sve2_zpz_ool(s, a, fns[a->esz]);
+}
+
+static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
+        gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
+    };
+    return do_sve2_zpz_ool(s, a, fns[a->esz]);
+}
+
+#define DO_SVE2_ZPZZ(NAME, name) \
+static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                \
+{                                                                         \
+    static gen_helper_gvec_4 * const fns[4] = {                           \
+        gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
+        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
+    };                                                                    \
+    return do_sve2_zpzz_ool(s, a, fns[a->esz]);                           \
+}
+
+DO_SVE2_ZPZZ(SQSHL, sqshl)
+DO_SVE2_ZPZZ(SQRSHL, sqrshl)
+DO_SVE2_ZPZZ(SRSHL, srshl)
+
+DO_SVE2_ZPZZ(UQSHL, uqshl)
+DO_SVE2_ZPZZ(UQRSHL, uqrshl)
+DO_SVE2_ZPZZ(URSHL, urshl)
+
+DO_SVE2_ZPZZ(SHADD, shadd)
+DO_SVE2_ZPZZ(SRHADD, srhadd)
+DO_SVE2_ZPZZ(SHSUB, shsub)
+
+DO_SVE2_ZPZZ(UHADD, uhadd)
+DO_SVE2_ZPZZ(URHADD, urhadd)
+DO_SVE2_ZPZZ(UHSUB, uhsub)
+
+DO_SVE2_ZPZZ(ADDP, addp)
+DO_SVE2_ZPZZ(SMAXP, smaxp)
+DO_SVE2_ZPZZ(UMAXP, umaxp)
+DO_SVE2_ZPZZ(SMINP, sminp)
+DO_SVE2_ZPZZ(UMINP, uminp)
+
+DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
+DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
+DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
+DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
+DO_SVE2_ZPZZ(SUQADD, suqadd)
+DO_SVE2_ZPZZ(USQADD, usqadd)
+
+/*
+ * SVE2 Widening Integer Arithmetic
+ */
+
+static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
+                            gen_helper_gvec_3 *fn, int data)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vsz, vsz, data, fn);
+    }
+    return true;
+}
+
+#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
+static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)               \
+{                                                                       \
+    static gen_helper_gvec_3 * const fns[4] = {                         \
+        NULL,                       gen_helper_sve2_##name##_h,         \
+        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,         \
+    };                                                                  \
+    return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1);      \
+}
+
+DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
+DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
+DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
+
+DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
+DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
+DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
+
+DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
+DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
+DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
+
+DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
+DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
+DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
+
+DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
+DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
+DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
+
+DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
+DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
+
+DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
+DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
+
+DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
+DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
+
+static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
+        gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
+    };
+    return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
+}
+
+static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_eor_tb(s, a, false);
+}
+
+static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_eor_tb(s, a, true);
+}
+
+static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
+        NULL,                    gen_helper_sve2_pmull_d,
+    };
+    if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
+        return false;
+    }
+    return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
+}
+
+static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_trans_pmull(s, a, false);
+}
+
+static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_trans_pmull(s, a, true);
+}
+
+#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
+static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)       \
+{                                                               \
+    static gen_helper_gvec_3 * const fns[4] = {                 \
+        NULL,                       gen_helper_sve2_##name##_h, \
+        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
+    };                                                          \
+    return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2);            \
+}
+
+DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
+DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
+DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
+DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
+
+DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
+DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
+DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
+DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
+
+static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
+{
+    int top = imm & 1;
+    int shl = imm >> 1;
+    int halfbits = 4 << vece;
+
+    if (top) {
+        if (shl == halfbits) {
+            TCGv_vec t = tcg_temp_new_vec_matching(d);
+            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
+            tcg_gen_and_vec(vece, d, n, t);
+            tcg_temp_free_vec(t);
+        } else {
+            tcg_gen_sari_vec(vece, d, n, halfbits);
+            tcg_gen_shli_vec(vece, d, d, shl);
+        }
+    } else {
+        tcg_gen_shli_vec(vece, d, n, halfbits);
+        tcg_gen_sari_vec(vece, d, d, halfbits - shl);
+    }
+}
+
+static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
+{
+    int halfbits = 4 << vece;
+    int top = imm & 1;
+    int shl = (imm >> 1);
+    int shift;
+    uint64_t mask;
+
+    mask = MAKE_64BIT_MASK(0, halfbits);
+    mask <<= shl;
+    mask = dup_const(vece, mask);
+
+    shift = shl - top * halfbits;
+    if (shift < 0) {
+        tcg_gen_shri_i64(d, n, -shift);
+    } else {
+        tcg_gen_shli_i64(d, n, shift);
+    }
+    tcg_gen_andi_i64(d, d, mask);
+}
+
+static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+    gen_ushll_i64(MO_16, d, n, imm);
+}
+
+static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+    gen_ushll_i64(MO_32, d, n, imm);
+}
+
+static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+    gen_ushll_i64(MO_64, d, n, imm);
+}
+
+static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
+{
+    int halfbits = 4 << vece;
+    int top = imm & 1;
+    int shl = imm >> 1;
+
+    if (top) {
+        if (shl == halfbits) {
+            TCGv_vec t = tcg_temp_new_vec_matching(d);
+            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
+            tcg_gen_and_vec(vece, d, n, t);
+            tcg_temp_free_vec(t);
+        } else {
+            tcg_gen_shri_vec(vece, d, n, halfbits);
+            tcg_gen_shli_vec(vece, d, d, shl);
+        }
+    } else {
+        if (shl == 0) {
+            TCGv_vec t = tcg_temp_new_vec_matching(d);
+            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+            tcg_gen_and_vec(vece, d, n, t);
+            tcg_temp_free_vec(t);
+        } else {
+            tcg_gen_shli_vec(vece, d, n, halfbits);
+            tcg_gen_shri_vec(vece, d, d, halfbits - shl);
+        }
+    }
+}
+
+static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
+                            bool sel, bool uns)
+{
+    static const TCGOpcode sshll_list[] = {
+        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
+    };
+    static const TCGOpcode ushll_list[] = {
+        INDEX_op_shli_vec, INDEX_op_shri_vec, 0
+    };
+    static const GVecGen2i ops[2][3] = {
+        { { .fniv = gen_sshll_vec,
+            .opt_opc = sshll_list,
+            .fno = gen_helper_sve2_sshll_h,
+            .vece = MO_16 },
+          { .fniv = gen_sshll_vec,
+            .opt_opc = sshll_list,
+            .fno = gen_helper_sve2_sshll_s,
+            .vece = MO_32 },
+          { .fniv = gen_sshll_vec,
+            .opt_opc = sshll_list,
+            .fno = gen_helper_sve2_sshll_d,
+            .vece = MO_64 } },
+        { { .fni8 = gen_ushll16_i64,
+            .fniv = gen_ushll_vec,
+            .opt_opc = ushll_list,
+            .fno = gen_helper_sve2_ushll_h,
+            .vece = MO_16 },
+          { .fni8 = gen_ushll32_i64,
+            .fniv = gen_ushll_vec,
+            .opt_opc = ushll_list,
+            .fno = gen_helper_sve2_ushll_s,
+            .vece = MO_32 },
+          { .fni8 = gen_ushll64_i64,
+            .fniv = gen_ushll_vec,
+            .opt_opc = ushll_list,
+            .fno = gen_helper_sve2_ushll_d,
+            .vece = MO_64 } },
+    };
+
+    if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
+                        vec_full_reg_offset(s, a->rn),
+                        vsz, vsz, (a->imm << 1) | sel,
+                        &ops[uns][a->esz]);
+    }
+    return true;
+}
+
+static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_shll_tb(s, a, false, false);
+}
+
+static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_shll_tb(s, a, true, false);
+}
+
+static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_shll_tb(s, a, false, true);
+}
+
+static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_shll_tb(s, a, true, true);
+}
+
+static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
+        gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
+    };
+    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
+        return false;
+    }
+    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
+}
+
+static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
+        gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
+    };
+    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
+        return false;
+    }
+    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
+}
+
+static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
+{
+    static gen_helper_gvec_3 * const fns[4] = {
+        gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
+        gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
+    };
+    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
+        return false;
+    }
+    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
+}
+
+static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
+{
+    static gen_helper_gvec_3 * const fns[2][4] = {
+        { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
+          gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
+        { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
+          gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
+    };
+    return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
+}
+
+static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_cadd(s, a, false, false);
+}
+
+static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_cadd(s, a, false, true);
+}
+
+static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_cadd(s, a, true, false);
+}
+
+static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_cadd(s, a, true, true);
+}
+
+static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
+                             gen_helper_gvec_4 *fn, int data)
+{
+    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+    }
+    return true;
+}
+
+static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[2][4] = {
+        { NULL,                    gen_helper_sve2_sabal_h,
+          gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
+        { NULL,                    gen_helper_sve2_uabal_h,
+          gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
+    };
+    return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
+}
+
+static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, false, false);
+}
+
+static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, false, true);
+}
+
+static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, true, false);
+}
+
+static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_abal(s, a, true, true);
+}
+
+static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[2] = {
+        gen_helper_sve2_adcl_s,
+        gen_helper_sve2_adcl_d,
+    };
+    /*
+     * Note that in this case the ESZ field encodes both size and sign.
+     * Split out 'subtract' into bit 1 of the data field for the helper.
+     */
+    return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
+}
+
+static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_adcl(s, a, false);
+}
+
+static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_adcl(s, a, true);
+}
+
+static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
+{
+    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
+        unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
+        fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
+    }
+    return true;
+}
+
+static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_ssra);
+}
+
+static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_usra);
+}
+
+static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_srsra);
+}
+
+static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_ursra);
+}
+
+static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_sri);
+}
+
+static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
+{
+    return do_sve2_fn2i(s, a, gen_gvec_sli);
+}
+
+static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
+    }
+    return true;
+}
+
+static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_sve2_fn_zzz(s, a, gen_gvec_saba);
+}
+
+static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
+}
+
+static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
+                                   const GVecGen2 ops[3])
+{
+    if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
+        !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
+                        vec_full_reg_offset(s, a->rn),
+                        vsz, vsz, &ops[a->esz]);
+    }
+    return true;
+}
+
+static const TCGOpcode sqxtn_list[] = {
+    INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+};
+
+static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t mask = (1ull << halfbits) - 1;
+    int64_t min = -1ull << (halfbits - 1);
+    int64_t max = -min - 1;
+
+    tcg_gen_dupi_vec(vece, t, min);
+    tcg_gen_smax_vec(vece, d, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_smin_vec(vece, d, d, t);
+    tcg_gen_dupi_vec(vece, t, mask);
+    tcg_gen_and_vec(vece, d, d, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_sqxtnb_vec,
+          .opt_opc = sqxtn_list,
+          .fno = gen_helper_sve2_sqxtnb_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqxtnb_vec,
+          .opt_opc = sqxtn_list,
+          .fno = gen_helper_sve2_sqxtnb_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqxtnb_vec,
+          .opt_opc = sqxtn_list,
+          .fno = gen_helper_sve2_sqxtnb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t mask = (1ull << halfbits) - 1;
+    int64_t min = -1ull << (halfbits - 1);
+    int64_t max = -min - 1;
+
+    tcg_gen_dupi_vec(vece, t, min);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_smin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_dupi_vec(vece, t, mask);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_sqxtnt_vec,
+          .opt_opc = sqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtnt_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqxtnt_vec,
+          .opt_opc = sqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtnt_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqxtnt_vec,
+          .opt_opc = sqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtnt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static const TCGOpcode uqxtn_list[] = {
+    INDEX_op_shli_vec, INDEX_op_umin_vec, 0
+};
+
+static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = (1ull << halfbits) - 1;
+
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_umin_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_uqxtnb_vec,
+          .opt_opc = uqxtn_list,
+          .fno = gen_helper_sve2_uqxtnb_h,
+          .vece = MO_16 },
+        { .fniv = gen_uqxtnb_vec,
+          .opt_opc = uqxtn_list,
+          .fno = gen_helper_sve2_uqxtnb_s,
+          .vece = MO_32 },
+        { .fniv = gen_uqxtnb_vec,
+          .opt_opc = uqxtn_list,
+          .fno = gen_helper_sve2_uqxtnb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = (1ull << halfbits) - 1;
+
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_umin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_uqxtnt_vec,
+          .opt_opc = uqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqxtnt_h,
+          .vece = MO_16 },
+        { .fniv = gen_uqxtnt_vec,
+          .opt_opc = uqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqxtnt_s,
+          .vece = MO_32 },
+        { .fniv = gen_uqxtnt_vec,
+          .opt_opc = uqxtn_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqxtnt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static const TCGOpcode sqxtun_list[] = {
+    INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
+};
+
+static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = (1ull << halfbits) - 1;
+
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, d, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_umin_vec(vece, d, d, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_sqxtunb_vec,
+          .opt_opc = sqxtun_list,
+          .fno = gen_helper_sve2_sqxtunb_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqxtunb_vec,
+          .opt_opc = sqxtun_list,
+          .fno = gen_helper_sve2_sqxtunb_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqxtunb_vec,
+          .opt_opc = sqxtun_list,
+          .fno = gen_helper_sve2_sqxtunb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = (1ull << halfbits) - 1;
+
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_umin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2 ops[3] = {
+        { .fniv = gen_sqxtunt_vec,
+          .opt_opc = sqxtun_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtunt_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqxtunt_vec,
+          .opt_opc = sqxtun_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtunt_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqxtunt_vec,
+          .opt_opc = sqxtun_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqxtunt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_narrow_extract(s, a, ops);
+}
+
+static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
+                               const GVecGen2i ops[3])
+{
+    if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    assert(a->imm > 0 && a->imm <= (8 << a->esz));
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
+                        vec_full_reg_offset(s, a->rn),
+                        vsz, vsz, a->imm, &ops[a->esz]);
+    }
+    return true;
+}
+
+static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
+{
+    int halfbits = 4 << vece;
+    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
+
+    tcg_gen_shri_i64(d, n, shr);
+    tcg_gen_andi_i64(d, d, mask);
+}
+
+static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    gen_shrnb_i64(MO_16, d, n, shr);
+}
+
+static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    gen_shrnb_i64(MO_32, d, n, shr);
+}
+
+static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    gen_shrnb_i64(MO_64, d, n, shr);
+}
+
+static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
+
+    tcg_gen_shri_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, mask);
+    tcg_gen_and_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
+    static const GVecGen2i ops[3] = {
+        { .fni8 = gen_shrnb16_i64,
+          .fniv = gen_shrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_shrnb_h,
+          .vece = MO_16 },
+        { .fni8 = gen_shrnb32_i64,
+          .fniv = gen_shrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_shrnb_s,
+          .vece = MO_32 },
+        { .fni8 = gen_shrnb64_i64,
+          .fniv = gen_shrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_shrnb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
+{
+    int halfbits = 4 << vece;
+    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
+
+    tcg_gen_shli_i64(n, n, halfbits - shr);
+    tcg_gen_andi_i64(n, n, ~mask);
+    tcg_gen_andi_i64(d, d, mask);
+    tcg_gen_or_i64(d, d, n);
+}
+
+static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    gen_shrnt_i64(MO_16, d, n, shr);
+}
+
+static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    gen_shrnt_i64(MO_32, d, n, shr);
+}
+
+static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+    tcg_gen_shri_i64(n, n, shr);
+    tcg_gen_deposit_i64(d, d, n, 32, 32);
+}
+
+static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
+
+    tcg_gen_shli_vec(vece, n, n, halfbits - shr);
+    tcg_gen_dupi_vec(vece, t, mask);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
+    static const GVecGen2i ops[3] = {
+        { .fni8 = gen_shrnt16_i64,
+          .fniv = gen_shrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_shrnt_h,
+          .vece = MO_16 },
+        { .fni8 = gen_shrnt32_i64,
+          .fniv = gen_shrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_shrnt_s,
+          .vece = MO_32 },
+        { .fni8 = gen_shrnt64_i64,
+          .fniv = gen_shrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_shrnt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_rshrnb_h },
+        { .fno = gen_helper_sve2_rshrnb_s },
+        { .fno = gen_helper_sve2_rshrnb_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_rshrnt_h },
+        { .fno = gen_helper_sve2_rshrnt_s },
+        { .fno = gen_helper_sve2_rshrnt_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
+                             TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
+                             TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_shli_vec, INDEX_op_sari_vec,
+        INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrunb_h },
+        { .fno = gen_helper_sve2_sqrshrunb_s },
+        { .fno = gen_helper_sve2_sqrshrunb_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrunt_h },
+        { .fno = gen_helper_sve2_sqrshrunt_s },
+        { .fno = gen_helper_sve2_sqrshrunt_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
+                            TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
+    int64_t min = -max - 1;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, min);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_smin_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_and_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrnb_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrnb_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrnb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
+                             TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
+    int64_t min = -max - 1;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, min);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, max);
+    tcg_gen_smin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_shli_vec, INDEX_op_sari_vec,
+        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrnt_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrnt_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrnt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrnb_h },
+        { .fno = gen_helper_sve2_sqrshrnb_s },
+        { .fno = gen_helper_sve2_sqrshrnb_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrnt_h },
+        { .fno = gen_helper_sve2_sqrshrnt_s },
+        { .fno = gen_helper_sve2_sqrshrnt_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
+                            TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_shri_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_shri_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_uqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_uqshrnb_h,
+          .vece = MO_16 },
+        { .fniv = gen_uqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_uqshrnb_s,
+          .vece = MO_32 },
+        { .fniv = gen_uqshrnb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_uqshrnb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
+                            TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_shri_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_uqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqshrnt_h,
+          .vece = MO_16 },
+        { .fniv = gen_uqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqshrnt_s,
+          .vece = MO_32 },
+        { .fniv = gen_uqshrnt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_uqshrnt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_uqrshrnb_h },
+        { .fno = gen_helper_sve2_uqrshrnb_s },
+        { .fno = gen_helper_sve2_uqrshrnb_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_uqrshrnt_h },
+        { .fno = gen_helper_sve2_uqrshrnt_s },
+        { .fno = gen_helper_sve2_uqrshrnt_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+#define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
+static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)                 \
+{                                                                         \
+    static gen_helper_gvec_3 * const fns[4] = {                           \
+        NULL,                       gen_helper_sve2_##name##_h,           \
+        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
+    };                                                                    \
+    return do_sve2_zzz_ool(s, a, fns[a->esz]);                            \
+}
+
+DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
+DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
+DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
+DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
+
+DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
+DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
+DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
+DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
+
+static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
+                               gen_helper_gvec_flags_4 *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_ppzz_flags(s, a, fn);
+}
+
+#define DO_SVE2_PPZZ_MATCH(NAME, name)                                      \
+static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
+{                                                                           \
+    static gen_helper_gvec_flags_4 * const fns[4] = {                       \
+        gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h,   \
+        NULL,                            NULL                               \
+    };                                                                      \
+    return do_sve2_ppzz_flags(s, a, fns[a->esz]);                           \
+}
+
+DO_SVE2_PPZZ_MATCH(MATCH, match)
+DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
+
+static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[2] = {
+        gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
+    };
+    if (a->esz < 2) {
+        return false;
+    }
+    return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
+}
+
+static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
+{
+    if (a->esz != 0) {
+        return false;
+    }
+    return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
+}
+
+static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
+                            gen_helper_gvec_4_ptr *fn)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpzz_fp(s, a, fn);
+}
+
+#define DO_SVE2_ZPZZ_FP(NAME, name)                                         \
+static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
+{                                                                           \
+    static gen_helper_gvec_4_ptr * const fns[4] = {                         \
+        NULL,                            gen_helper_sve2_##name##_zpzz_h,   \
+        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d    \
+    };                                                                      \
+    return do_sve2_zpzz_fp(s, a, fns[a->esz]);                              \
+}
+
+DO_SVE2_ZPZZ_FP(FADDP, faddp)
+DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
+DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
+DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
+DO_SVE2_ZPZZ_FP(FMINP, fminp)
+
+/*
+ * SVE Integer Multiply-Add (unpredicated)
+ */
+
+static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    gen_helper_gvec_4_ptr *fn;
+
+    switch (a->esz) {
+    case MO_32:
+        if (!dc_isar_feature(aa64_sve_f32mm, s)) {
+            return false;
+        }
+        fn = gen_helper_fmmla_s;
+        break;
+    case MO_64:
+        if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+            return false;
+        }
+        fn = gen_helper_fmmla_d;
+        break;
+    default:
+        return false;
+    }
+
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           status, vsz, vsz, 0, fn);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
+                            bool sel1, bool sel2)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
+        gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
+}
+
+static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
+                            bool sel1, bool sel2)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
+        gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
+}
+
+static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlal_zzzw(s, a, false, false);
+}
+
+static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlal_zzzw(s, a, true, true);
+}
+
+static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlal_zzzw(s, a, false, true);
+}
+
+static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlsl_zzzw(s, a, false, false);
+}
+
+static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlsl_zzzw(s, a, true, true);
+}
+
+static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_sqdmlsl_zzzw(s, a, false, true);
+}
+
+static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
+        gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
+}
+
+static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
+        gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
+}
+
+static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                         gen_helper_sve2_smlal_zzzw_h,
+        gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
+}
+
+static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_smlal_zzzw(s, a, false);
+}
+
+static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_smlal_zzzw(s, a, true);
+}
+
+static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                         gen_helper_sve2_umlal_zzzw_h,
+        gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
+}
+
+static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_umlal_zzzw(s, a, false);
+}
+
+static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_umlal_zzzw(s, a, true);
+}
+
+static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                         gen_helper_sve2_smlsl_zzzw_h,
+        gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
+}
+
+static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_smlsl_zzzw(s, a, false);
+}
+
+static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_smlsl_zzzw(s, a, true);
+}
+
+static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        NULL,                         gen_helper_sve2_umlsl_zzzw_h,
+        gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
+    };
+    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
+}
+
+static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_umlsl_zzzw(s, a, false);
+}
+
+static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_umlsl_zzzw(s, a, true);
+}
+
+static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
+        gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
+    };
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
+    }
+    return true;
+}
+
+static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_helper_gvec_4 *fn = (a->esz == MO_32
+                                 ? gen_helper_sve2_cdot_zzzz_s
+                                 : gen_helper_sve2_cdot_zzzz_d);
+        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
+    }
+    return true;
+}
+
+static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
+{
+    static gen_helper_gvec_4 * const fns[] = {
+        gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
+        gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
+    };
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
+    }
+    return true;
+}
+
+static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
+{
+    if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           vsz, vsz, 0, gen_helper_gvec_usdot_b);
+    }
+    return true;
+}
+
+static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
+{
+    if (!dc_isar_feature(aa64_sve2_aes, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
+    }
+    return true;
+}
+
+static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
+{
+    if (!dc_isar_feature(aa64_sve2_aes, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
+                         a->rd, a->rn, a->rm, decrypt);
+    }
+    return true;
+}
+
+static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_aese(s, a, false);
+}
+
+static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_aese(s, a, true);
+}
+
+static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
+{
+    if (!dc_isar_feature(aa64_sve2_sm4, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
+    }
+    return true;
+}
+
+static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_sm4(s, a, gen_helper_crypto_sm4e);
+}
+
+static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
+{
+    return do_sm4(s, a, gen_helper_crypto_sm4ekey);
+}
+
+static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2_sha3, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
+    }
+    return true;
+}
+
+static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
+}
+
+static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
+}
+
+static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
+}
+
+static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
+}
+
+static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
+}
+
+static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
+}
+
+static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
+}
+
+static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
+{
+    static gen_helper_gvec_3_ptr * const fns[] = {
+        NULL,               gen_helper_flogb_h,
+        gen_helper_flogb_s, gen_helper_flogb_d
+    };
+
+    if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_ptr status =
+            fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+        unsigned vsz = vec_full_reg_size(s);
+
+        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           pred_full_reg_offset(s, a->pg),
+                           status, vsz, vsz, 0, fns[a->esz]);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           cpu_env, vsz, vsz, (sel << 1) | sub,
+                           gen_helper_sve2_fmlal_zzzw_s);
+    }
+    return true;
+}
+
+static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_FMLAL_zzzw(s, a, false, false);
+}
+
+static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_FMLAL_zzzw(s, a, false, true);
+}
+
+static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_FMLAL_zzzw(s, a, true, false);
+}
+
+static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_FMLAL_zzzw(s, a, true, true);
+}
+
+static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
+{
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           cpu_env, vsz, vsz,
+                           (a->index << 2) | (sel << 1) | sub,
+                           gen_helper_sve2_fmlal_zzxw_s);
+    }
+    return true;
+}
+
+static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_FMLAL_zzxw(s, a, false, false);
+}
+
+static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_FMLAL_zzxw(s, a, false, true);
+}
+
+static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_FMLAL_zzxw(s, a, true, false);
+}
+
+static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_FMLAL_zzxw(s, a, true, true);
+}
+
+static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
+                             gen_helper_gvec_4 *fn, int data)
+{
+    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+    }
+    return true;
+}
+
+static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
+}
+
+static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
+}
+
+static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
+}
+
+static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
+                          a->rd, a->rn, a->rm, a->ra, 0);
+    }
+    return true;
+}
+
+static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
+                          a->rd, a->rn, a->rm, a->ra, a->index);
+    }
+    return true;
+}
+
+static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
+                          a->rd, a->rn, a->rm, a->ra, 0);
+    }
+    return true;
+}
+
+static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
+        unsigned vsz = vec_full_reg_size(s);
+
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           status, vsz, vsz, sel,
+                           gen_helper_gvec_bfmlal);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_BFMLAL_zzzw(s, a, false);
+}
+
+static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
+{
+    return do_BFMLAL_zzzw(s, a, true);
+}
+
+static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
+{
+    if (!dc_isar_feature(aa64_sve_bf16, s)) {
+        return false;
+    }
+    if (sve_access_check(s)) {
+        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
+        unsigned vsz = vec_full_reg_size(s);
+
+        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+                           vec_full_reg_offset(s, a->rn),
+                           vec_full_reg_offset(s, a->rm),
+                           vec_full_reg_offset(s, a->ra),
+                           status, vsz, vsz, (a->index << 1) | sel,
+                           gen_helper_gvec_bfmlal_idx);
+        tcg_temp_free_ptr(status);
+    }
+    return true;
+}
+
+static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_BFMLAL_zzxw(s, a, false);
+}
+
+static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
+{
+    return do_BFMLAL_zzxw(s, a, true);
+}
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
new file mode 100644
index 00000000000..59bcaec5beb
--- /dev/null
+++ b/target/arm/translate-vfp.c
@@ -0,0 +1,3627 @@
+/*
+ *  ARM translation: AArch32 VFP instructions
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005-2007 CodeSourcery
+ *  Copyright (c) 2007 OpenedHand, Ltd.
+ *  Copyright (c) 2019 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/exec-all.h"
+#include "exec/gen-icount.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+/* Include the generated VFP decoder */
+#include "decode-vfp.c.inc"
+#include "decode-vfp-uncond.c.inc"
+
+static inline void vfp_load_reg64(TCGv_i64 var, int reg)
+{
+    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
+}
+
+static inline void vfp_store_reg64(TCGv_i64 var, int reg)
+{
+    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
+}
+
+static inline void vfp_load_reg32(TCGv_i32 var, int reg)
+{
+    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
+}
+
+static inline void vfp_store_reg32(TCGv_i32 var, int reg)
+{
+    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
+}
+
+/*
+ * The imm8 encodes the sign bit, enough bits to represent an exponent in
+ * the range 01....1xx to 10....0xx, and the most significant 4 bits of
+ * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
+ */
+uint64_t vfp_expand_imm(int size, uint8_t imm8)
+{
+    uint64_t imm;
+
+    switch (size) {
+    case MO_64:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
+            extract32(imm8, 0, 6);
+        imm <<= 48;
+        break;
+    case MO_32:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
+            (extract32(imm8, 0, 6) << 3);
+        imm <<= 16;
+        break;
+    case MO_16:
+        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
+            (extract32(imm8, 0, 6) << 6);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return imm;
+}
+
+/*
+ * Return the offset of a 16-bit half of the specified VFP single-precision
+ * register. If top is true, returns the top 16 bits; otherwise the bottom
+ * 16 bits.
+ */
+static inline long vfp_f16_offset(unsigned reg, bool top)
+{
+    long offs = vfp_reg_offset(false, reg);
+#ifdef HOST_WORDS_BIGENDIAN
+    if (!top) {
+        offs += 2;
+    }
+#else
+    if (top) {
+        offs += 2;
+    }
+#endif
+    return offs;
+}
+
+/*
+ * Generate code for M-profile lazy FP state preservation if needed;
+ * this corresponds to the pseudocode PreserveFPState() function.
+ */
+static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
+{
+    if (s->v7m_lspact) {
+        /*
+         * Lazy state saving affects external memory and also the NVIC,
+         * so we must mark it as an IO operation for icount (and cause
+         * this to be the last insn in the TB).
+         */
+        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+            s->base.is_jmp = DISAS_UPDATE_EXIT;
+            gen_io_start();
+        }
+        gen_helper_v7m_preserve_fp_state(cpu_env);
+        /*
+         * If the preserve_fp_state helper doesn't throw an exception
+         * then it will clear LSPACT; we don't need to repeat this for
+         * any further FP insns in this TB.
+         */
+        s->v7m_lspact = false;
+        /*
+         * The helper might have zeroed VPR, so we do not know the
+         * correct value for the MVE_NO_PRED TB flag any more.
+         * If we're about to create a new fp context then that
+         * will precisely determine the MVE_NO_PRED value (see
+         * gen_update_fp_context()). Otherwise, we must:
+         *  - set s->mve_no_pred to false, so this instruction
+         *    is generated to use helper functions
+         *  - end the TB now, without chaining to the next TB
+         */
+        if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
+            s->mve_no_pred = false;
+            s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+        }
+    }
+}
+
+/*
+ * Generate code for M-profile FP context handling: update the
+ * ownership of the FP context, and create a new context if
+ * necessary. This corresponds to the parts of the pseudocode
+ * ExecuteFPCheck() after the inital PreserveFPState() call.
+ */
+static void gen_update_fp_context(DisasContext *s)
+{
+    /* Update ownership of FP context: set FPCCR.S to match current state */
+    if (s->v8m_fpccr_s_wrong) {
+        TCGv_i32 tmp;
+
+        tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
+        if (s->v8m_secure) {
+            tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
+        } else {
+            tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
+        }
+        store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
+        /* Don't need to do this for any further FP insns in this TB */
+        s->v8m_fpccr_s_wrong = false;
+    }
+
+    if (s->v7m_new_fp_ctxt_needed) {
+        /*
+         * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
+         * the FPSCR, and VPR.
+         */
+        TCGv_i32 control, fpscr;
+        uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
+
+        fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
+        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
+        tcg_temp_free_i32(fpscr);
+        if (dc_isar_feature(aa32_mve, s)) {
+            TCGv_i32 z32 = tcg_const_i32(0);
+            store_cpu_field(z32, v7m.vpr);
+        }
+        /*
+         * We just updated the FPSCR and VPR. Some of this state is cached
+         * in the MVE_NO_PRED TB flag. We want to avoid having to end the
+         * TB here, which means we need the new value of the MVE_NO_PRED
+         * flag to be exactly known here and the same for all executions.
+         * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
+         * always set to 0, so the new MVE_NO_PRED flag is always 1
+         * if and only if we have MVE.
+         *
+         * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
+         * but those do not exist for M-profile, so are not relevant here.)
+         */
+        s->mve_no_pred = dc_isar_feature(aa32_mve, s);
+
+        if (s->v8m_secure) {
+            bits |= R_V7M_CONTROL_SFPA_MASK;
+        }
+        control = load_cpu_field(v7m.control[M_REG_S]);
+        tcg_gen_ori_i32(control, control, bits);
+        store_cpu_field(control, v7m.control[M_REG_S]);
+        /* Don't need to do this for any further FP insns in this TB */
+        s->v7m_new_fp_ctxt_needed = false;
+    }
+}
+
+/*
+ * Check that VFP access is enabled, A-profile specific version.
+ *
+ * If VFP is enabled, return true. If not, emit code to generate an
+ * appropriate exception and return false.
+ * The ignore_vfp_enabled argument specifies that we should ignore
+ * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
+ * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
+ */
+static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
+{
+    if (s->fp_excp_el) {
+        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+        return false;
+    }
+
+    if (!s->vfp_enabled && !ignore_vfp_enabled) {
+        assert(!arm_dc_feature(s, ARM_FEATURE_M));
+        unallocated_encoding(s);
+        return false;
+    }
+    return true;
+}
+
+/*
+ * Check that VFP access is enabled, M-profile specific version.
+ *
+ * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
+ * return true. If not, emit code to generate an appropriate exception and
+ * return false.
+ * skip_context_update is true to skip the "update FP context" part of this.
+ */
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
+{
+    if (s->fp_excp_el) {
+        /*
+         * M-profile mostly catches the "FPU disabled" case early, in
+         * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
+         * which do coprocessor-checks are outside the large ranges of
+         * the encoding space handled by the patterns in m-nocp.decode,
+         * and for them we may need to raise NOCP here.
+         */
+        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+                           syn_uncategorized(), s->fp_excp_el);
+        return false;
+    }
+
+    /* Handle M-profile lazy FP state mechanics */
+
+    /* Trigger lazy-state preservation if necessary */
+    gen_preserve_fp_state(s, skip_context_update);
+
+    if (!skip_context_update) {
+        /* Update ownership of FP context and create new FP context if needed */
+        gen_update_fp_context(s);
+    }
+
+    return true;
+}
+
+/*
+ * The most usual kind of VFP access check, for everything except
+ * FMXR/FMRX to the always-available special registers.
+ */
+bool vfp_access_check(DisasContext *s)
+{
+    if (arm_dc_feature(s, ARM_FEATURE_M)) {
+        return vfp_access_check_m(s, false);
+    } else {
+        return vfp_access_check_a(s, false);
+    }
+}
+
+static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
+{
+    uint32_t rd, rn, rm;
+    int sz = a->sz;
+
+    if (!dc_isar_feature(aa32_vsel, s)) {
+        return false;
+    }
+
+    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vm | a->vn | a->vd) & 0x10)) {
+        return false;
+    }
+
+    rd = a->vd;
+    rn = a->vn;
+    rm = a->vm;
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (sz == 3) {
+        TCGv_i64 frn, frm, dest;
+        TCGv_i64 tmp, zero, zf, nf, vf;
+
+        zero = tcg_const_i64(0);
+
+        frn = tcg_temp_new_i64();
+        frm = tcg_temp_new_i64();
+        dest = tcg_temp_new_i64();
+
+        zf = tcg_temp_new_i64();
+        nf = tcg_temp_new_i64();
+        vf = tcg_temp_new_i64();
+
+        tcg_gen_extu_i32_i64(zf, cpu_ZF);
+        tcg_gen_ext_i32_i64(nf, cpu_NF);
+        tcg_gen_ext_i32_i64(vf, cpu_VF);
+
+        vfp_load_reg64(frn, rn);
+        vfp_load_reg64(frm, rm);
+        switch (a->cc) {
+        case 0: /* eq: Z */
+            tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
+                                frn, frm);
+            break;
+        case 1: /* vs: V */
+            tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
+                                frn, frm);
+            break;
+        case 2: /* ge: N == V -> N ^ V == 0 */
+            tmp = tcg_temp_new_i64();
+            tcg_gen_xor_i64(tmp, vf, nf);
+            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+                                frn, frm);
+            tcg_temp_free_i64(tmp);
+            break;
+        case 3: /* gt: !Z && N == V */
+            tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
+                                frn, frm);
+            tmp = tcg_temp_new_i64();
+            tcg_gen_xor_i64(tmp, vf, nf);
+            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
+                                dest, frm);
+            tcg_temp_free_i64(tmp);
+            break;
+        }
+        vfp_store_reg64(dest, rd);
+        tcg_temp_free_i64(frn);
+        tcg_temp_free_i64(frm);
+        tcg_temp_free_i64(dest);
+
+        tcg_temp_free_i64(zf);
+        tcg_temp_free_i64(nf);
+        tcg_temp_free_i64(vf);
+
+        tcg_temp_free_i64(zero);
+    } else {
+        TCGv_i32 frn, frm, dest;
+        TCGv_i32 tmp, zero;
+
+        zero = tcg_const_i32(0);
+
+        frn = tcg_temp_new_i32();
+        frm = tcg_temp_new_i32();
+        dest = tcg_temp_new_i32();
+        vfp_load_reg32(frn, rn);
+        vfp_load_reg32(frm, rm);
+        switch (a->cc) {
+        case 0: /* eq: Z */
+            tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
+                                frn, frm);
+            break;
+        case 1: /* vs: V */
+            tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
+                                frn, frm);
+            break;
+        case 2: /* ge: N == V -> N ^ V == 0 */
+            tmp = tcg_temp_new_i32();
+            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+                                frn, frm);
+            tcg_temp_free_i32(tmp);
+            break;
+        case 3: /* gt: !Z && N == V */
+            tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
+                                frn, frm);
+            tmp = tcg_temp_new_i32();
+            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
+                                dest, frm);
+            tcg_temp_free_i32(tmp);
+            break;
+        }
+        /* For fp16 the top half is always zeroes */
+        if (sz == 1) {
+            tcg_gen_andi_i32(dest, dest, 0xffff);
+        }
+        vfp_store_reg32(dest, rd);
+        tcg_temp_free_i32(frn);
+        tcg_temp_free_i32(frm);
+        tcg_temp_free_i32(dest);
+
+        tcg_temp_free_i32(zero);
+    }
+
+    return true;
+}
+
+/*
+ * Table for converting the most common AArch32 encoding of
+ * rounding mode to arm_fprounding order (which matches the
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
+ */
+static const uint8_t fp_decode_rm[] = {
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+};
+
+static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
+{
+    uint32_t rd, rm;
+    int sz = a->sz;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode;
+    int rounding = fp_decode_rm[a->rm];
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vm | a->vd) & 0x10)) {
+        return false;
+    }
+
+    rd = a->vd;
+    rm = a->vm;
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (sz == 1) {
+        fpst = fpstatus_ptr(FPST_FPCR_F16);
+    } else {
+        fpst = fpstatus_ptr(FPST_FPCR);
+    }
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+
+    if (sz == 3) {
+        TCGv_i64 tcg_op;
+        TCGv_i64 tcg_res;
+        tcg_op = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        vfp_load_reg64(tcg_op, rm);
+        gen_helper_rintd(tcg_res, tcg_op, fpst);
+        vfp_store_reg64(tcg_res, rd);
+        tcg_temp_free_i64(tcg_op);
+        tcg_temp_free_i64(tcg_res);
+    } else {
+        TCGv_i32 tcg_op;
+        TCGv_i32 tcg_res;
+        tcg_op = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        vfp_load_reg32(tcg_op, rm);
+        if (sz == 1) {
+            gen_helper_rinth(tcg_res, tcg_op, fpst);
+        } else {
+            gen_helper_rints(tcg_res, tcg_op, fpst);
+        }
+        vfp_store_reg32(tcg_res, rd);
+        tcg_temp_free_i32(tcg_op);
+        tcg_temp_free_i32(tcg_res);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
+{
+    uint32_t rd, rm;
+    int sz = a->sz;
+    TCGv_ptr fpst;
+    TCGv_i32 tcg_rmode, tcg_shift;
+    int rounding = fp_decode_rm[a->rm];
+    bool is_signed = a->op;
+
+    if (!dc_isar_feature(aa32_vcvt_dr, s)) {
+        return false;
+    }
+
+    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+        return false;
+    }
+
+    rd = a->vd;
+    rm = a->vm;
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (sz == 1) {
+        fpst = fpstatus_ptr(FPST_FPCR_F16);
+    } else {
+        fpst = fpstatus_ptr(FPST_FPCR);
+    }
+
+    tcg_shift = tcg_const_i32(0);
+
+    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+
+    if (sz == 3) {
+        TCGv_i64 tcg_double, tcg_res;
+        TCGv_i32 tcg_tmp;
+        tcg_double = tcg_temp_new_i64();
+        tcg_res = tcg_temp_new_i64();
+        tcg_tmp = tcg_temp_new_i32();
+        vfp_load_reg64(tcg_double, rm);
+        if (is_signed) {
+            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
+        } else {
+            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
+        }
+        tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
+        vfp_store_reg32(tcg_tmp, rd);
+        tcg_temp_free_i32(tcg_tmp);
+        tcg_temp_free_i64(tcg_res);
+        tcg_temp_free_i64(tcg_double);
+    } else {
+        TCGv_i32 tcg_single, tcg_res;
+        tcg_single = tcg_temp_new_i32();
+        tcg_res = tcg_temp_new_i32();
+        vfp_load_reg32(tcg_single, rm);
+        if (sz == 1) {
+            if (is_signed) {
+                gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
+            } else {
+                gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
+            }
+        } else {
+            if (is_signed) {
+                gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+            } else {
+                gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+            }
+        }
+        vfp_store_reg32(tcg_res, rd);
+        tcg_temp_free_i32(tcg_res);
+        tcg_temp_free_i32(tcg_single);
+    }
+
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    tcg_temp_free_i32(tcg_rmode);
+
+    tcg_temp_free_i32(tcg_shift);
+
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
+{
+    /*
+     * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
+     * and VMOV (general-purpose register to vector lane) insns are not
+     * predicated, but they are subject to beatwise execution if they are
+     * not in an IT block.
+     *
+     * Since our implementation always executes all 4 beats in one tick,
+     * this means only that if PSR.ECI says we should not be executing
+     * the beat corresponding to the lane of the vector register being
+     * accessed then we should skip performing the move, and that we need
+     * to do the usual check for bad ECI state and advance of ECI state.
+     *
+     * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
+     *
+     * Return true if this VMOV scalar <-> gpreg should be skipped because
+     * the MVE PSR.ECI state says we skip the beat where the store happens.
+     */
+
+    /* Calculate the byte offset into Qn which we're going to access */
+    int ofs = (index << size) + ((vn & 1) * 8);
+
+    if (!dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    switch (s->eci) {
+    case ECI_NONE:
+        return false;
+    case ECI_A0:
+        return ofs < 4;
+    case ECI_A0A1:
+        return ofs < 8;
+    case ECI_A0A1A2:
+    case ECI_A0A1A2B0:
+        return ofs < 12;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
+{
+    /* VMOV scalar to general purpose register */
+    TCGv_i32 tmp;
+
+    /*
+     * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
+     * all sizes, whether the CPU has fp or not.
+     */
+    if (!dc_isar_feature(aa32_mve, s)) {
+        if (a->size == MO_32
+            ? !dc_isar_feature(aa32_fpsp_v2, s)
+            : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
+            return false;
+        }
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+        return false;
+    }
+
+    if (dc_isar_feature(aa32_mve, s)) {
+        if (!mve_eci_check(s)) {
+            return true;
+        }
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+        tmp = tcg_temp_new_i32();
+        read_neon_element32(tmp, a->vn, a->index,
+                            a->size | (a->u ? 0 : MO_SIGN));
+        store_reg(s, a->rt, tmp);
+    }
+
+    if (dc_isar_feature(aa32_mve, s)) {
+        mve_update_and_store_eci(s);
+    }
+    return true;
+}
+
+static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
+{
+    /* VMOV general purpose register to scalar */
+    TCGv_i32 tmp;
+
+    /*
+     * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
+     * all sizes, whether the CPU has fp or not.
+     */
+    if (!dc_isar_feature(aa32_mve, s)) {
+        if (a->size == MO_32
+            ? !dc_isar_feature(aa32_fpsp_v2, s)
+            : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
+            return false;
+        }
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+        return false;
+    }
+
+    if (dc_isar_feature(aa32_mve, s)) {
+        if (!mve_eci_check(s)) {
+            return true;
+        }
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+        tmp = load_reg(s, a->rt);
+        write_neon_element32(tmp, a->vn, a->index, a->size);
+        tcg_temp_free_i32(tmp);
+    }
+
+    if (dc_isar_feature(aa32_mve, s)) {
+        mve_update_and_store_eci(s);
+    }
+    return true;
+}
+
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
+{
+    /* VDUP (general purpose register) */
+    TCGv_i32 tmp;
+    int size, vec_size;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+        return false;
+    }
+
+    if (a->b && a->e) {
+        return false;
+    }
+
+    if (a->q && (a->vn & 1)) {
+        return false;
+    }
+
+    vec_size = a->q ? 16 : 8;
+    if (a->b) {
+        size = 0;
+    } else if (a->e) {
+        size = 1;
+    } else {
+        size = 2;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = load_reg(s, a->rt);
+    tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
+                         vec_size, vec_size, tmp);
+    tcg_temp_free_i32(tmp);
+
+    return true;
+}
+
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
+{
+    TCGv_i32 tmp;
+    bool ignore_vfp_enabled = false;
+
+    if (arm_dc_feature(s, ARM_FEATURE_M)) {
+        /* M profile version was already handled in m-nocp.decode */
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    switch (a->reg) {
+    case ARM_VFP_FPSID:
+        /*
+         * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
+         * all ID registers to privileged access only.
+         */
+        if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
+            return false;
+        }
+        ignore_vfp_enabled = true;
+        break;
+    case ARM_VFP_MVFR0:
+    case ARM_VFP_MVFR1:
+        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
+            return false;
+        }
+        ignore_vfp_enabled = true;
+        break;
+    case ARM_VFP_MVFR2:
+        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
+            return false;
+        }
+        ignore_vfp_enabled = true;
+        break;
+    case ARM_VFP_FPSCR:
+        break;
+    case ARM_VFP_FPEXC:
+        if (IS_USER(s)) {
+            return false;
+        }
+        ignore_vfp_enabled = true;
+        break;
+    case ARM_VFP_FPINST:
+    case ARM_VFP_FPINST2:
+        /* Not present in VFPv3 */
+        if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
+            return false;
+        }
+        break;
+    default:
+        return false;
+    }
+
+    /*
+     * Call vfp_access_check_a() directly, because we need to tell
+     * it to ignore FPEXC.EN for some register accesses.
+     */
+    if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
+        return true;
+    }
+
+    if (a->l) {
+        /* VMRS, move VFP special register to gp register */
+        switch (a->reg) {
+        case ARM_VFP_MVFR0:
+        case ARM_VFP_MVFR1:
+        case ARM_VFP_MVFR2:
+        case ARM_VFP_FPSID:
+            if (s->current_el == 1) {
+                TCGv_i32 tcg_reg, tcg_rt;
+
+                gen_set_condexec(s);
+                gen_set_pc_im(s, s->pc_curr);
+                tcg_reg = tcg_const_i32(a->reg);
+                tcg_rt = tcg_const_i32(a->rt);
+                gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
+                tcg_temp_free_i32(tcg_reg);
+                tcg_temp_free_i32(tcg_rt);
+            }
+            /* fall through */
+        case ARM_VFP_FPEXC:
+        case ARM_VFP_FPINST:
+        case ARM_VFP_FPINST2:
+            tmp = load_cpu_field(vfp.xregs[a->reg]);
+            break;
+        case ARM_VFP_FPSCR:
+            if (a->rt == 15) {
+                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+                tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+            } else {
+                tmp = tcg_temp_new_i32();
+                gen_helper_vfp_get_fpscr(tmp, cpu_env);
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (a->rt == 15) {
+            /* Set the 4 flag bits in the CPSR.  */
+            gen_set_nzcv(tmp);
+            tcg_temp_free_i32(tmp);
+        } else {
+            store_reg(s, a->rt, tmp);
+        }
+    } else {
+        /* VMSR, move gp register to VFP special register */
+        switch (a->reg) {
+        case ARM_VFP_FPSID:
+        case ARM_VFP_MVFR0:
+        case ARM_VFP_MVFR1:
+        case ARM_VFP_MVFR2:
+            /* Writes are ignored.  */
+            break;
+        case ARM_VFP_FPSCR:
+            tmp = load_reg(s, a->rt);
+            gen_helper_vfp_set_fpscr(cpu_env, tmp);
+            tcg_temp_free_i32(tmp);
+            gen_lookup_tb(s);
+            break;
+        case ARM_VFP_FPEXC:
+            /*
+             * TODO: VFP subarchitecture support.
+             * For now, keep the EN bit only
+             */
+            tmp = load_reg(s, a->rt);
+            tcg_gen_andi_i32(tmp, tmp, 1 << 30);
+            store_cpu_field(tmp, vfp.xregs[a->reg]);
+            gen_lookup_tb(s);
+            break;
+        case ARM_VFP_FPINST:
+        case ARM_VFP_FPINST2:
+            tmp = load_reg(s, a->rt);
+            store_cpu_field(tmp, vfp.xregs[a->reg]);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    return true;
+}
+
+
+static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
+{
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (a->rt == 15) {
+        /* UNPREDICTABLE; we choose to UNDEF */
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->l) {
+        /* VFP to general purpose register */
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vn);
+        tcg_gen_andi_i32(tmp, tmp, 0xffff);
+        store_reg(s, a->rt, tmp);
+    } else {
+        /* general purpose register to VFP */
+        tmp = load_reg(s, a->rt);
+        tcg_gen_andi_i32(tmp, tmp, 0xffff);
+        vfp_store_reg32(tmp, a->vn);
+        tcg_temp_free_i32(tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
+{
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->l) {
+        /* VFP to general purpose register */
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vn);
+        if (a->rt == 15) {
+            /* Set the 4 flag bits in the CPSR.  */
+            gen_set_nzcv(tmp);
+            tcg_temp_free_i32(tmp);
+        } else {
+            store_reg(s, a->rt, tmp);
+        }
+    } else {
+        /* general purpose register to VFP */
+        tmp = load_reg(s, a->rt);
+        vfp_store_reg32(tmp, a->vn);
+        tcg_temp_free_i32(tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
+{
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    /*
+     * VMOV between two general-purpose registers and two single precision
+     * floating point registers
+     */
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->op) {
+        /* fpreg to gpreg */
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vm);
+        store_reg(s, a->rt, tmp);
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vm + 1);
+        store_reg(s, a->rt2, tmp);
+    } else {
+        /* gpreg to fpreg */
+        tmp = load_reg(s, a->rt);
+        vfp_store_reg32(tmp, a->vm);
+        tcg_temp_free_i32(tmp);
+        tmp = load_reg(s, a->rt2);
+        vfp_store_reg32(tmp, a->vm + 1);
+        tcg_temp_free_i32(tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
+{
+    TCGv_i32 tmp;
+
+    /*
+     * VMOV between two general-purpose registers and one double precision
+     * floating point register.  Note that this does not require support
+     * for double precision arithmetic.
+     */
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (a->op) {
+        /* fpreg to gpreg */
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vm * 2);
+        store_reg(s, a->rt, tmp);
+        tmp = tcg_temp_new_i32();
+        vfp_load_reg32(tmp, a->vm * 2 + 1);
+        store_reg(s, a->rt2, tmp);
+    } else {
+        /* gpreg to fpreg */
+        tmp = load_reg(s, a->rt);
+        vfp_store_reg32(tmp, a->vm * 2);
+        tcg_temp_free_i32(tmp);
+        tmp = load_reg(s, a->rt2);
+        vfp_store_reg32(tmp, a->vm * 2 + 1);
+        tcg_temp_free_i32(tmp);
+    }
+
+    return true;
+}
+
+static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+{
+    uint32_t offset;
+    TCGv_i32 addr, tmp;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
+    offset = a->imm << 1;
+    if (!a->u) {
+        offset = -offset;
+    }
+
+    /* For thumb, use of PC is UNPREDICTABLE.  */
+    addr = add_reg_for_lit(s, a->rn, offset);
+    tmp = tcg_temp_new_i32();
+    if (a->l) {
+        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
+        vfp_store_reg32(tmp, a->vd);
+    } else {
+        vfp_load_reg32(tmp, a->vd);
+        gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+{
+    uint32_t offset;
+    TCGv_i32 addr, tmp;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    offset = a->imm << 2;
+    if (!a->u) {
+        offset = -offset;
+    }
+
+    /* For thumb, use of PC is UNPREDICTABLE.  */
+    addr = add_reg_for_lit(s, a->rn, offset);
+    tmp = tcg_temp_new_i32();
+    if (a->l) {
+        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+        vfp_store_reg32(tmp, a->vd);
+    } else {
+        vfp_load_reg32(tmp, a->vd);
+        gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+    }
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
+{
+    uint32_t offset;
+    TCGv_i32 addr;
+    TCGv_i64 tmp;
+
+    /* Note that this does not require support for double arithmetic.  */
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    offset = a->imm << 2;
+    if (!a->u) {
+        offset = -offset;
+    }
+
+    /* For thumb, use of PC is UNPREDICTABLE.  */
+    addr = add_reg_for_lit(s, a->rn, offset);
+    tmp = tcg_temp_new_i64();
+    if (a->l) {
+        gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
+        vfp_store_reg64(tmp, a->vd);
+    } else {
+        vfp_load_reg64(tmp, a->vd);
+        gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
+    }
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i32(addr);
+
+    return true;
+}
+
+static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
+{
+    uint32_t offset;
+    TCGv_i32 addr, tmp;
+    int i, n;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    n = a->imm;
+
+    if (n == 0 || (a->vd + n) > 32) {
+        /*
+         * UNPREDICTABLE cases for bad immediates: we choose to
+         * UNDEF to avoid generating huge numbers of TCG ops
+         */
+        return false;
+    }
+    if (a->rn == 15 && a->w) {
+        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+        return false;
+    }
+
+    s->eci_handled = true;
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* For thumb, use of PC is UNPREDICTABLE.  */
+    addr = add_reg_for_lit(s, a->rn, 0);
+    if (a->p) {
+        /* pre-decrement */
+        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+    }
+
+    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+        /*
+         * Here 'addr' is the lowest address we will store to,
+         * and is either the old SP (if post-increment) or
+         * the new SP (if pre-decrement). For post-increment
+         * where the old value is below the limit and the new
+         * value is above, it is UNKNOWN whether the limit check
+         * triggers; we choose to trigger.
+         */
+        gen_helper_v8m_stackcheck(cpu_env, addr);
+    }
+
+    offset = 4;
+    tmp = tcg_temp_new_i32();
+    for (i = 0; i < n; i++) {
+        if (a->l) {
+            /* load */
+            gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+            vfp_store_reg32(tmp, a->vd + i);
+        } else {
+            /* store */
+            vfp_load_reg32(tmp, a->vd + i);
+            gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+        }
+        tcg_gen_addi_i32(addr, addr, offset);
+    }
+    tcg_temp_free_i32(tmp);
+    if (a->w) {
+        /* writeback */
+        if (a->p) {
+            offset = -offset * n;
+            tcg_gen_addi_i32(addr, addr, offset);
+        }
+        store_reg(s, a->rn, addr);
+    } else {
+        tcg_temp_free_i32(addr);
+    }
+
+    clear_eci_state(s);
+    return true;
+}
+
+static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
+{
+    uint32_t offset;
+    TCGv_i32 addr;
+    TCGv_i64 tmp;
+    int i, n;
+
+    /* Note that this does not require support for double arithmetic.  */
+    if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    n = a->imm >> 1;
+
+    if (n == 0 || (a->vd + n) > 32 || n > 16) {
+        /*
+         * UNPREDICTABLE cases for bad immediates: we choose to
+         * UNDEF to avoid generating huge numbers of TCG ops
+         */
+        return false;
+    }
+    if (a->rn == 15 && a->w) {
+        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
+        return false;
+    }
+
+    s->eci_handled = true;
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* For thumb, use of PC is UNPREDICTABLE.  */
+    addr = add_reg_for_lit(s, a->rn, 0);
+    if (a->p) {
+        /* pre-decrement */
+        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+    }
+
+    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+        /*
+         * Here 'addr' is the lowest address we will store to,
+         * and is either the old SP (if post-increment) or
+         * the new SP (if pre-decrement). For post-increment
+         * where the old value is below the limit and the new
+         * value is above, it is UNKNOWN whether the limit check
+         * triggers; we choose to trigger.
+         */
+        gen_helper_v8m_stackcheck(cpu_env, addr);
+    }
+
+    offset = 8;
+    tmp = tcg_temp_new_i64();
+    for (i = 0; i < n; i++) {
+        if (a->l) {
+            /* load */
+            gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
+            vfp_store_reg64(tmp, a->vd + i);
+        } else {
+            /* store */
+            vfp_load_reg64(tmp, a->vd + i);
+            gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
+        }
+        tcg_gen_addi_i32(addr, addr, offset);
+    }
+    tcg_temp_free_i64(tmp);
+    if (a->w) {
+        /* writeback */
+        if (a->p) {
+            offset = -offset * n;
+        } else if (a->imm & 1) {
+            offset = 4;
+        } else {
+            offset = 0;
+        }
+
+        if (offset != 0) {
+            tcg_gen_addi_i32(addr, addr, offset);
+        }
+        store_reg(s, a->rn, addr);
+    } else {
+        tcg_temp_free_i32(addr);
+    }
+
+    clear_eci_state(s);
+    return true;
+}
+
+/*
+ * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
+ * The callback should emit code to write a value to vd. If
+ * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
+ * will contain the old value of the relevant VFP register;
+ * otherwise it must be written to only.
+ */
+typedef void VFPGen3OpSPFn(TCGv_i32 vd,
+                           TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
+typedef void VFPGen3OpDPFn(TCGv_i64 vd,
+                           TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
+
+/*
+ * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
+ * The callback should emit code to write a value to vd (which
+ * should be written to only).
+ */
+typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
+typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
+
+/*
+ * Return true if the specified S reg is in a scalar bank
+ * (ie if it is s0..s7)
+ */
+static inline bool vfp_sreg_is_scalar(int reg)
+{
+    return (reg & 0x18) == 0;
+}
+
+/*
+ * Return true if the specified D reg is in a scalar bank
+ * (ie if it is d0..d3 or d16..d19)
+ */
+static inline bool vfp_dreg_is_scalar(int reg)
+{
+    return (reg & 0xc) == 0;
+}
+
+/*
+ * Advance the S reg number forwards by delta within its bank
+ * (ie increment the low 3 bits but leave the rest the same)
+ */
+static inline int vfp_advance_sreg(int reg, int delta)
+{
+    return ((reg + delta) & 0x7) | (reg & ~0x7);
+}
+
+/*
+ * Advance the D reg number forwards by delta within its bank
+ * (ie increment the low 2 bits but leave the rest the same)
+ */
+static inline int vfp_advance_dreg(int reg, int delta)
+{
+    return ((reg + delta) & 0x3) | (reg & ~0x3);
+}
+
+/*
+ * Perform a 3-operand VFP data processing instruction. fn is the
+ * callback to do the actual operation; this function deals with the
+ * code to handle looping around for VFP vector processing.
+ */
+static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
+                          int vd, int vn, int vm, bool reads_vd)
+{
+    uint32_t delta_m = 0;
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i32 f0, f1, fd;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_sreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = s->vec_stride + 1;
+
+            if (vfp_sreg_is_scalar(vm)) {
+                /* mixed scalar/vector */
+                delta_m = 0;
+            } else {
+                /* vector */
+                delta_m = delta_d;
+            }
+        }
+    }
+
+    f0 = tcg_temp_new_i32();
+    f1 = tcg_temp_new_i32();
+    fd = tcg_temp_new_i32();
+    fpst = fpstatus_ptr(FPST_FPCR);
+
+    vfp_load_reg32(f0, vn);
+    vfp_load_reg32(f1, vm);
+
+    for (;;) {
+        if (reads_vd) {
+            vfp_load_reg32(fd, vd);
+        }
+        fn(fd, f0, f1, fpst);
+        vfp_store_reg32(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_sreg(vd, delta_d);
+        vn = vfp_advance_sreg(vn, delta_d);
+        vfp_load_reg32(f0, vn);
+        if (delta_m) {
+            vm = vfp_advance_sreg(vm, delta_m);
+            vfp_load_reg32(f1, vm);
+        }
+    }
+
+    tcg_temp_free_i32(f0);
+    tcg_temp_free_i32(f1);
+    tcg_temp_free_i32(fd);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
+                          int vd, int vn, int vm, bool reads_vd)
+{
+    /*
+     * Do a half-precision operation. Functionally this is
+     * the same as do_vfp_3op_sp(), except:
+     *  - it uses the FPST_FPCR_F16
+     *  - it doesn't need the VFP vector handling (fp16 is a
+     *    v8 feature, and in v8 VFP vectors don't exist)
+     *  - it does the aa32_fp16_arith feature test
+     */
+    TCGv_i32 f0, f1, fd;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    f0 = tcg_temp_new_i32();
+    f1 = tcg_temp_new_i32();
+    fd = tcg_temp_new_i32();
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+
+    vfp_load_reg32(f0, vn);
+    vfp_load_reg32(f1, vm);
+
+    if (reads_vd) {
+        vfp_load_reg32(fd, vd);
+    }
+    fn(fd, f0, f1, fpst);
+    vfp_store_reg32(fd, vd);
+
+    tcg_temp_free_i32(f0);
+    tcg_temp_free_i32(f1);
+    tcg_temp_free_i32(fd);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
+                          int vd, int vn, int vm, bool reads_vd)
+{
+    uint32_t delta_m = 0;
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i64 f0, f1, fd;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_dreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = (s->vec_stride >> 1) + 1;
+
+            if (vfp_dreg_is_scalar(vm)) {
+                /* mixed scalar/vector */
+                delta_m = 0;
+            } else {
+                /* vector */
+                delta_m = delta_d;
+            }
+        }
+    }
+
+    f0 = tcg_temp_new_i64();
+    f1 = tcg_temp_new_i64();
+    fd = tcg_temp_new_i64();
+    fpst = fpstatus_ptr(FPST_FPCR);
+
+    vfp_load_reg64(f0, vn);
+    vfp_load_reg64(f1, vm);
+
+    for (;;) {
+        if (reads_vd) {
+            vfp_load_reg64(fd, vd);
+        }
+        fn(fd, f0, f1, fpst);
+        vfp_store_reg64(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_dreg(vd, delta_d);
+        vn = vfp_advance_dreg(vn, delta_d);
+        vfp_load_reg64(f0, vn);
+        if (delta_m) {
+            vm = vfp_advance_dreg(vm, delta_m);
+            vfp_load_reg64(f1, vm);
+        }
+    }
+
+    tcg_temp_free_i64(f0);
+    tcg_temp_free_i64(f1);
+    tcg_temp_free_i64(fd);
+    tcg_temp_free_ptr(fpst);
+
+    return true;
+}
+
+static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
+{
+    uint32_t delta_m = 0;
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i32 f0, fd;
+
+    /* Note that the caller must check the aa32_fpsp_v2 feature. */
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_sreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = s->vec_stride + 1;
+
+            if (vfp_sreg_is_scalar(vm)) {
+                /* mixed scalar/vector */
+                delta_m = 0;
+            } else {
+                /* vector */
+                delta_m = delta_d;
+            }
+        }
+    }
+
+    f0 = tcg_temp_new_i32();
+    fd = tcg_temp_new_i32();
+
+    vfp_load_reg32(f0, vm);
+
+    for (;;) {
+        fn(fd, f0);
+        vfp_store_reg32(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+
+        if (delta_m == 0) {
+            /* single source one-many */
+            while (veclen--) {
+                vd = vfp_advance_sreg(vd, delta_d);
+                vfp_store_reg32(fd, vd);
+            }
+            break;
+        }
+
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_sreg(vd, delta_d);
+        vm = vfp_advance_sreg(vm, delta_m);
+        vfp_load_reg32(f0, vm);
+    }
+
+    tcg_temp_free_i32(f0);
+    tcg_temp_free_i32(fd);
+
+    return true;
+}
+
+static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
+{
+    /*
+     * Do a half-precision operation. Functionally this is
+     * the same as do_vfp_2op_sp(), except:
+     *  - it doesn't need the VFP vector handling (fp16 is a
+     *    v8 feature, and in v8 VFP vectors don't exist)
+     *  - it does the aa32_fp16_arith feature test
+     */
+    TCGv_i32 f0;
+
+    /* Note that the caller must check the aa32_fp16_arith feature */
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    f0 = tcg_temp_new_i32();
+    vfp_load_reg32(f0, vm);
+    fn(f0, f0);
+    vfp_store_reg32(f0, vd);
+    tcg_temp_free_i32(f0);
+
+    return true;
+}
+
+static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
+{
+    uint32_t delta_m = 0;
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i64 f0, fd;
+
+    /* Note that the caller must check the aa32_fpdp_v2 feature. */
+
+    /* UNDEF accesses to D16-D31 if they don't exist */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_dreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = (s->vec_stride >> 1) + 1;
+
+            if (vfp_dreg_is_scalar(vm)) {
+                /* mixed scalar/vector */
+                delta_m = 0;
+            } else {
+                /* vector */
+                delta_m = delta_d;
+            }
+        }
+    }
+
+    f0 = tcg_temp_new_i64();
+    fd = tcg_temp_new_i64();
+
+    vfp_load_reg64(f0, vm);
+
+    for (;;) {
+        fn(fd, f0);
+        vfp_store_reg64(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+
+        if (delta_m == 0) {
+            /* single source one-many */
+            while (veclen--) {
+                vd = vfp_advance_dreg(vd, delta_d);
+                vfp_store_reg64(fd, vd);
+            }
+            break;
+        }
+
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_dreg(vd, delta_d);
+        vd = vfp_advance_dreg(vm, delta_m);
+        vfp_load_reg64(f0, vm);
+    }
+
+    tcg_temp_free_i64(f0);
+    tcg_temp_free_i64(fd);
+
+    return true;
+}
+
+static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* Note that order of inputs to the add matters for NaNs */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* Note that order of inputs to the add matters for NaNs */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_muls(tmp, vn, vm, fpst);
+    gen_helper_vfp_adds(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /* Note that order of inputs to the add matters for NaNs */
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    gen_helper_vfp_muld(tmp, vn, vm, fpst);
+    gen_helper_vfp_addd(vd, vd, tmp, fpst);
+    tcg_temp_free_i64(tmp);
+}
+
+static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VMLS: vd = vd + -(vn * vm)
+     * Note that order of inputs to the add matters for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tmp, tmp);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VMLS: vd = vd + -(vn * vm)
+     * Note that order of inputs to the add matters for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_muls(tmp, vn, vm, fpst);
+    gen_helper_vfp_negs(tmp, tmp);
+    gen_helper_vfp_adds(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /*
+     * VMLS: vd = vd + -(vn * vm)
+     * Note that order of inputs to the add matters for NaNs.
+     */
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    gen_helper_vfp_muld(tmp, vn, vm, fpst);
+    gen_helper_vfp_negd(tmp, tmp);
+    gen_helper_vfp_addd(vd, vd, tmp, fpst);
+    tcg_temp_free_i64(tmp);
+}
+
+static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(vd, vd);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_muls(tmp, vn, vm, fpst);
+    gen_helper_vfp_negs(vd, vd);
+    gen_helper_vfp_adds(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    gen_helper_vfp_muld(tmp, vn, vm, fpst);
+    gen_helper_vfp_negd(vd, vd);
+    gen_helper_vfp_addd(vd, vd, tmp, fpst);
+    tcg_temp_free_i64(tmp);
+}
+
+static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMLA: -fd + -(fn * fm) */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tmp, tmp);
+    gen_helper_vfp_negh(vd, vd);
+    gen_helper_vfp_addh(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMLA: -fd + -(fn * fm) */
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    gen_helper_vfp_muls(tmp, vn, vm, fpst);
+    gen_helper_vfp_negs(tmp, tmp);
+    gen_helper_vfp_negs(vd, vd);
+    gen_helper_vfp_adds(vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /* VNMLA: -fd + (fn * fm) */
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    gen_helper_vfp_muld(tmp, vn, vm, fpst);
+    gen_helper_vfp_negd(tmp, tmp);
+    gen_helper_vfp_negd(vd, vd);
+    gen_helper_vfp_addd(vd, vd, tmp, fpst);
+    tcg_temp_free_i64(tmp);
+}
+
+static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
+}
+
+static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMUL: -(fn * fm) */
+    gen_helper_vfp_mulh(vd, vn, vm, fpst);
+    gen_helper_vfp_negh(vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMUL: -(fn * fm) */
+    gen_helper_vfp_muls(vd, vn, vm, fpst);
+    gen_helper_vfp_negs(vd, vd);
+}
+
+static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /* VNMUL: -(fn * fm) */
+    gen_helper_vfp_muld(vd, vn, vm, fpst);
+    gen_helper_vfp_negd(vd, vd);
+}
+
+static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
+{
+    return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
+{
+    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+        return false;
+    }
+    return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
+                         a->vd, a->vn, a->vm, false);
+}
+
+static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i32 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only, and only with the FP16 extension.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
+     */
+    if (!dc_isar_feature(aa32_fp16_arith, s) ||
+        !dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i32();
+
+    vfp_load_reg32(vn, a->vn);
+    vfp_load_reg32(vm, a->vm);
+    if (neg_n) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negh(vn, vn);
+    }
+    vfp_load_reg32(vd, a->vd);
+    if (neg_d) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negh(vd, vd);
+    }
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
+    vfp_store_reg32(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(vn);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i32(vd);
+
+    return true;
+}
+
+static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i32 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
+     */
+    if (!dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+    /*
+     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     */
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i32();
+
+    vfp_load_reg32(vn, a->vn);
+    vfp_load_reg32(vm, a->vm);
+    if (neg_n) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negs(vn, vn);
+    }
+    vfp_load_reg32(vd, a->vd);
+    if (neg_d) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negs(vd, vd);
+    }
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
+    vfp_store_reg32(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(vn);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i32(vd);
+
+    return true;
+}
+
+static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
+{
+    /*
+     * VFNMA : fd = muladd(-fd,  fn, fm)
+     * VFNMS : fd = muladd(-fd, -fn, fm)
+     * VFMA  : fd = muladd( fd,  fn, fm)
+     * VFMS  : fd = muladd( fd, -fn, fm)
+     *
+     * These are fused multiply-add, and must be done as one floating
+     * point operation with no rounding between the multiplication and
+     * addition steps.  NB that doing the negations here as separate
+     * steps is correct : an input NaN should come out with its sign
+     * bit flipped if it is a negated-input.
+     */
+    TCGv_ptr fpst;
+    TCGv_i64 vn, vm, vd;
+
+    /*
+     * Present in VFPv4 only.
+     * Note that we can't rely on the SIMDFMAC check alone, because
+     * in a Neon-no-VFP core that ID register field will be non-zero.
+     */
+    if (!dc_isar_feature(aa32_simdfmac, s) ||
+        !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+    /*
+     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+     */
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vn = tcg_temp_new_i64();
+    vm = tcg_temp_new_i64();
+    vd = tcg_temp_new_i64();
+
+    vfp_load_reg64(vn, a->vn);
+    vfp_load_reg64(vm, a->vm);
+    if (neg_n) {
+        /* VFNMS, VFMS */
+        gen_helper_vfp_negd(vn, vn);
+    }
+    vfp_load_reg64(vd, a->vd);
+    if (neg_d) {
+        /* VFNMA, VFNMS */
+        gen_helper_vfp_negd(vd, vd);
+    }
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
+    vfp_store_reg64(vd, a->vd);
+
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(vn);
+    tcg_temp_free_i64(vm);
+    tcg_temp_free_i64(vd);
+
+    return true;
+}
+
+#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
+    static bool trans_##INSN##_##PREC(DisasContext *s,                  \
+                                      arg_##INSN##_##PREC *a)           \
+    {                                                                   \
+        return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
+    }
+
+#define MAKE_VFM_TRANS_FNS(PREC) \
+    MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
+    MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
+    MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
+    MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
+
+MAKE_VFM_TRANS_FNS(hp)
+MAKE_VFM_TRANS_FNS(sp)
+MAKE_VFM_TRANS_FNS(dp)
+
+static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
+{
+    TCGv_i32 fd;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
+    vfp_store_reg32(fd, a->vd);
+    tcg_temp_free_i32(fd);
+    return true;
+}
+
+static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
+{
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i32 fd;
+    uint32_t vd;
+
+    vd = a->vd;
+
+    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_sreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = s->vec_stride + 1;
+        }
+    }
+
+    fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
+
+    for (;;) {
+        vfp_store_reg32(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_sreg(vd, delta_d);
+    }
+
+    tcg_temp_free_i32(fd);
+    return true;
+}
+
+static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
+{
+    uint32_t delta_d = 0;
+    int veclen = s->vec_len;
+    TCGv_i64 fd;
+    uint32_t vd;
+
+    vd = a->vd;
+
+    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fpshvec, s) &&
+        (veclen != 0 || s->vec_stride != 0)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    if (veclen > 0) {
+        /* Figure out what type of vector operation this is.  */
+        if (vfp_dreg_is_scalar(vd)) {
+            /* scalar */
+            veclen = 0;
+        } else {
+            delta_d = (s->vec_stride >> 1) + 1;
+        }
+    }
+
+    fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
+
+    for (;;) {
+        vfp_store_reg64(fd, vd);
+
+        if (veclen == 0) {
+            break;
+        }
+
+        /* Set up the operands for the next iteration */
+        veclen--;
+        vd = vfp_advance_dreg(vd, delta_d);
+    }
+
+    tcg_temp_free_i64(fd);
+    return true;
+}
+
+#define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
+    static bool trans_##INSN##_##PREC(DisasContext *s,          \
+                                      arg_##INSN##_##PREC *a)   \
+    {                                                           \
+        if (!dc_isar_feature(CHECK, s)) {                       \
+            return false;                                       \
+        }                                                       \
+        return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
+    }
+
+#define DO_VFP_VMOV(INSN, PREC, FN)                             \
+    static bool trans_##INSN##_##PREC(DisasContext *s,          \
+                                      arg_##INSN##_##PREC *a)   \
+    {                                                           \
+        if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
+            !dc_isar_feature(aa32_mve, s)) {                    \
+            return false;                                       \
+        }                                                       \
+        return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
+    }
+
+DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
+DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
+
+DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
+DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
+DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
+
+DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
+DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
+DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
+
+static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
+{
+    gen_helper_vfp_sqrth(vd, vm, cpu_env);
+}
+
+static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
+{
+    gen_helper_vfp_sqrts(vd, vm, cpu_env);
+}
+
+static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
+{
+    gen_helper_vfp_sqrtd(vd, vm, cpu_env);
+}
+
+DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
+DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
+DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
+
+static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
+{
+    TCGv_i32 vd, vm;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    /* Vm/M bits must be zero for the Z variant */
+    if (a->z && a->vm != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vd = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+
+    vfp_load_reg32(vd, a->vd);
+    if (a->z) {
+        tcg_gen_movi_i32(vm, 0);
+    } else {
+        vfp_load_reg32(vm, a->vm);
+    }
+
+    if (a->e) {
+        gen_helper_vfp_cmpeh(vd, vm, cpu_env);
+    } else {
+        gen_helper_vfp_cmph(vd, vm, cpu_env);
+    }
+
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i32(vm);
+
+    return true;
+}
+
+static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
+{
+    TCGv_i32 vd, vm;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    /* Vm/M bits must be zero for the Z variant */
+    if (a->z && a->vm != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vd = tcg_temp_new_i32();
+    vm = tcg_temp_new_i32();
+
+    vfp_load_reg32(vd, a->vd);
+    if (a->z) {
+        tcg_gen_movi_i32(vm, 0);
+    } else {
+        vfp_load_reg32(vm, a->vm);
+    }
+
+    if (a->e) {
+        gen_helper_vfp_cmpes(vd, vm, cpu_env);
+    } else {
+        gen_helper_vfp_cmps(vd, vm, cpu_env);
+    }
+
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i32(vm);
+
+    return true;
+}
+
+static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
+{
+    TCGv_i64 vd, vm;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* Vm/M bits must be zero for the Z variant */
+    if (a->z && a->vm != 0) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vd = tcg_temp_new_i64();
+    vm = tcg_temp_new_i64();
+
+    vfp_load_reg64(vd, a->vd);
+    if (a->z) {
+        tcg_gen_movi_i64(vm, 0);
+    } else {
+        vfp_load_reg64(vm, a->vm);
+    }
+
+    if (a->e) {
+        gen_helper_vfp_cmped(vd, vm, cpu_env);
+    } else {
+        gen_helper_vfp_cmpd(vd, vm, cpu_env);
+    }
+
+    tcg_temp_free_i64(vd);
+    tcg_temp_free_i64(vm);
+
+    return true;
+}
+
+static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp_mode;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    ahp_mode = get_ahp_flag();
+    tmp = tcg_temp_new_i32();
+    /* The T bit tells us if we want the low or high 16 bits of Vm */
+    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
+    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_i32(ahp_mode);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp_mode;
+    TCGv_i32 tmp;
+    TCGv_i64 vd;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    ahp_mode = get_ahp_flag();
+    tmp = tcg_temp_new_i32();
+    /* The T bit tells us if we want the low or high 16 bits of Vm */
+    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
+    vd = tcg_temp_new_i64();
+    gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
+    vfp_store_reg64(vd, a->vd);
+    tcg_temp_free_i32(ahp_mode);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    tcg_temp_free_i64(vd);
+    return true;
+}
+
+static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_bf16, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    tmp = tcg_temp_new_i32();
+
+    vfp_load_reg32(tmp, a->vm);
+    gen_helper_bfcvt(tmp, tmp, fpst);
+    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp_mode;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    ahp_mode = get_ahp_flag();
+    tmp = tcg_temp_new_i32();
+
+    vfp_load_reg32(tmp, a->vm);
+    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
+    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
+    tcg_temp_free_i32(ahp_mode);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 ahp_mode;
+    TCGv_i32 tmp;
+    TCGv_i64 vm;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    ahp_mode = get_ahp_flag();
+    tmp = tcg_temp_new_i32();
+    vm = tcg_temp_new_i64();
+
+    vfp_load_reg64(vm, a->vm);
+    gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
+    tcg_temp_free_i64(vm);
+    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
+    tcg_temp_free_i32(ahp_mode);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_rinth(tmp, tmp, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_rints(tmp, tmp, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i64 tmp;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i64();
+    vfp_load_reg64(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_rintd(tmp, tmp, fpst);
+    vfp_store_reg64(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tmp);
+    return true;
+}
+
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+    TCGv_i32 tcg_rmode;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    tcg_rmode = tcg_const_i32(float_round_to_zero);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    gen_helper_rinth(tmp, tmp, fpst);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_rmode);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+    TCGv_i32 tcg_rmode;
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    tcg_rmode = tcg_const_i32(float_round_to_zero);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    gen_helper_rints(tmp, tmp, fpst);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tcg_rmode);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i64 tmp;
+    TCGv_i32 tcg_rmode;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i64();
+    vfp_load_reg64(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    tcg_rmode = tcg_const_i32(float_round_to_zero);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    gen_helper_rintd(tmp, tmp, fpst);
+    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+    vfp_store_reg64(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i32(tcg_rmode);
+    return true;
+}
+
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    gen_helper_rinth_exact(tmp, tmp, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32();
+    vfp_load_reg32(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_rints_exact(tmp, tmp, fpst);
+    vfp_store_reg32(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i32(tmp);
+    return true;
+}
+
+static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
+{
+    TCGv_ptr fpst;
+    TCGv_i64 tmp;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_vrint, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i64();
+    vfp_load_reg64(tmp, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    gen_helper_rintd_exact(tmp, tmp, fpst);
+    vfp_store_reg64(tmp, a->vd);
+    tcg_temp_free_ptr(fpst);
+    tcg_temp_free_i64(tmp);
+    return true;
+}
+
+static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
+{
+    TCGv_i64 vd;
+    TCGv_i32 vm;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i64();
+    vfp_load_reg32(vm, a->vm);
+    gen_helper_vfp_fcvtds(vd, vm, cpu_env);
+    vfp_store_reg64(vd, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i64(vd);
+    return true;
+}
+
+static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
+{
+    TCGv_i64 vm;
+    TCGv_i32 vd;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vd = tcg_temp_new_i32();
+    vm = tcg_temp_new_i64();
+    vfp_load_reg64(vm, a->vm);
+    gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
+    vfp_store_reg32(vd, a->vd);
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i64(vm);
+    return true;
+}
+
+static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
+{
+    TCGv_i32 vm;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vm = tcg_temp_new_i32();
+    vfp_load_reg32(vm, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    if (a->s) {
+        /* i32 -> f16 */
+        gen_helper_vfp_sitoh(vm, vm, fpst);
+    } else {
+        /* u32 -> f16 */
+        gen_helper_vfp_uitoh(vm, vm, fpst);
+    }
+    vfp_store_reg32(vm, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
+{
+    TCGv_i32 vm;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vm = tcg_temp_new_i32();
+    vfp_load_reg32(vm, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    if (a->s) {
+        /* i32 -> f32 */
+        gen_helper_vfp_sitos(vm, vm, fpst);
+    } else {
+        /* u32 -> f32 */
+        gen_helper_vfp_uitos(vm, vm, fpst);
+    }
+    vfp_store_reg32(vm, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
+{
+    TCGv_i32 vm;
+    TCGv_i64 vd;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vm = tcg_temp_new_i32();
+    vd = tcg_temp_new_i64();
+    vfp_load_reg32(vm, a->vm);
+    fpst = fpstatus_ptr(FPST_FPCR);
+    if (a->s) {
+        /* i32 -> f64 */
+        gen_helper_vfp_sitod(vd, vm, fpst);
+    } else {
+        /* u32 -> f64 */
+        gen_helper_vfp_uitod(vd, vm, fpst);
+    }
+    vfp_store_reg64(vd, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_i64(vd);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
+{
+    TCGv_i32 vd;
+    TCGv_i64 vm;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (!dc_isar_feature(aa32_jscvt, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    vm = tcg_temp_new_i64();
+    vd = tcg_temp_new_i32();
+    vfp_load_reg64(vm, a->vm);
+    gen_helper_vjcvt(vd, vm, cpu_env);
+    vfp_store_reg32(vd, a->vd);
+    tcg_temp_free_i64(vm);
+    tcg_temp_free_i32(vd);
+    return true;
+}
+
+static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
+{
+    TCGv_i32 vd, shift;
+    TCGv_ptr fpst;
+    int frac_bits;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+    vd = tcg_temp_new_i32();
+    vfp_load_reg32(vd, a->vd);
+
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    shift = tcg_const_i32(frac_bits);
+
+    /* Switch on op:U:sx bits */
+    switch (a->opc) {
+    case 0:
+        gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 1:
+        gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 2:
+        gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 3:
+        gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 4:
+        gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 5:
+        gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 6:
+        gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 7:
+        gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    vfp_store_reg32(vd, a->vd);
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i32(shift);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
+{
+    TCGv_i32 vd, shift;
+    TCGv_ptr fpst;
+    int frac_bits;
+
+    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+    vd = tcg_temp_new_i32();
+    vfp_load_reg32(vd, a->vd);
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    shift = tcg_const_i32(frac_bits);
+
+    /* Switch on op:U:sx bits */
+    switch (a->opc) {
+    case 0:
+        gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 1:
+        gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 2:
+        gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 3:
+        gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 4:
+        gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 5:
+        gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 6:
+        gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 7:
+        gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    vfp_store_reg32(vd, a->vd);
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i32(shift);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
+{
+    TCGv_i64 vd;
+    TCGv_i32 shift;
+    TCGv_ptr fpst;
+    int frac_bits;
+
+    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+    vd = tcg_temp_new_i64();
+    vfp_load_reg64(vd, a->vd);
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    shift = tcg_const_i32(frac_bits);
+
+    /* Switch on op:U:sx bits */
+    switch (a->opc) {
+    case 0:
+        gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 1:
+        gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 2:
+        gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 3:
+        gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
+        break;
+    case 4:
+        gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 5:
+        gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 6:
+        gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
+        break;
+    case 7:
+        gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    vfp_store_reg64(vd, a->vd);
+    tcg_temp_free_i64(vd);
+    tcg_temp_free_i32(shift);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
+{
+    TCGv_i32 vm;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR_F16);
+    vm = tcg_temp_new_i32();
+    vfp_load_reg32(vm, a->vm);
+
+    if (a->s) {
+        if (a->rz) {
+            gen_helper_vfp_tosizh(vm, vm, fpst);
+        } else {
+            gen_helper_vfp_tosih(vm, vm, fpst);
+        }
+    } else {
+        if (a->rz) {
+            gen_helper_vfp_touizh(vm, vm, fpst);
+        } else {
+            gen_helper_vfp_touih(vm, vm, fpst);
+        }
+    }
+    vfp_store_reg32(vm, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
+{
+    TCGv_i32 vm;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    vm = tcg_temp_new_i32();
+    vfp_load_reg32(vm, a->vm);
+
+    if (a->s) {
+        if (a->rz) {
+            gen_helper_vfp_tosizs(vm, vm, fpst);
+        } else {
+            gen_helper_vfp_tosis(vm, vm, fpst);
+        }
+    } else {
+        if (a->rz) {
+            gen_helper_vfp_touizs(vm, vm, fpst);
+        } else {
+            gen_helper_vfp_touis(vm, vm, fpst);
+        }
+    }
+    vfp_store_reg32(vm, a->vd);
+    tcg_temp_free_i32(vm);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
+{
+    TCGv_i32 vd;
+    TCGv_i64 vm;
+    TCGv_ptr fpst;
+
+    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    fpst = fpstatus_ptr(FPST_FPCR);
+    vm = tcg_temp_new_i64();
+    vd = tcg_temp_new_i32();
+    vfp_load_reg64(vm, a->vm);
+
+    if (a->s) {
+        if (a->rz) {
+            gen_helper_vfp_tosizd(vd, vm, fpst);
+        } else {
+            gen_helper_vfp_tosid(vd, vm, fpst);
+        }
+    } else {
+        if (a->rz) {
+            gen_helper_vfp_touizd(vd, vm, fpst);
+        } else {
+            gen_helper_vfp_touid(vd, vm, fpst);
+        }
+    }
+    vfp_store_reg32(vd, a->vd);
+    tcg_temp_free_i32(vd);
+    tcg_temp_free_i64(vm);
+    tcg_temp_free_ptr(fpst);
+    return true;
+}
+
+static bool trans_VINS(DisasContext *s, arg_VINS *a)
+{
+    TCGv_i32 rd, rm;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Insert low half of Vm into high half of Vd */
+    rm = tcg_temp_new_i32();
+    rd = tcg_temp_new_i32();
+    vfp_load_reg32(rm, a->vm);
+    vfp_load_reg32(rd, a->vd);
+    tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
+    vfp_store_reg32(rd, a->vd);
+    tcg_temp_free_i32(rm);
+    tcg_temp_free_i32(rd);
+    return true;
+}
+
+static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
+{
+    TCGv_i32 rm;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (s->vec_len != 0 || s->vec_stride != 0) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /* Set Vd to high half of Vm */
+    rm = tcg_temp_new_i32();
+    vfp_load_reg32(rm, a->vm);
+    tcg_gen_shri_i32(rm, rm, 16);
+    vfp_store_reg32(rm, a->vd);
+    tcg_temp_free_i32(rm);
+    return true;
+}
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
deleted file mode 100644
index 10766f210c1..00000000000
--- a/target/arm/translate-vfp.c.inc
+++ /dev/null
@@ -1,4050 +0,0 @@
-/*
- *  ARM translation: AArch32 VFP instructions
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *  Copyright (c) 2005-2007 CodeSourcery
- *  Copyright (c) 2007 OpenedHand, Ltd.
- *  Copyright (c) 2019 Linaro, Ltd.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * This file is intended to be included from translate.c; it uses
- * some macros and definitions provided by that file.
- * It might be possible to convert it to a standalone .c file eventually.
- */
-
-/* Include the generated VFP decoder */
-#include "decode-vfp.c.inc"
-#include "decode-vfp-uncond.c.inc"
-
-/*
- * The imm8 encodes the sign bit, enough bits to represent an exponent in
- * the range 01....1xx to 10....0xx, and the most significant 4 bits of
- * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
- */
-uint64_t vfp_expand_imm(int size, uint8_t imm8)
-{
-    uint64_t imm;
-
-    switch (size) {
-    case MO_64:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
-            extract32(imm8, 0, 6);
-        imm <<= 48;
-        break;
-    case MO_32:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
-            (extract32(imm8, 0, 6) << 3);
-        imm <<= 16;
-        break;
-    case MO_16:
-        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
-            (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
-            (extract32(imm8, 0, 6) << 6);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return imm;
-}
-
-/*
- * Return the offset of a 16-bit half of the specified VFP single-precision
- * register. If top is true, returns the top 16 bits; otherwise the bottom
- * 16 bits.
- */
-static inline long vfp_f16_offset(unsigned reg, bool top)
-{
-    long offs = vfp_reg_offset(false, reg);
-#ifdef HOST_WORDS_BIGENDIAN
-    if (!top) {
-        offs += 2;
-    }
-#else
-    if (top) {
-        offs += 2;
-    }
-#endif
-    return offs;
-}
-
-/*
- * Generate code for M-profile lazy FP state preservation if needed;
- * this corresponds to the pseudocode PreserveFPState() function.
- */
-static void gen_preserve_fp_state(DisasContext *s)
-{
-    if (s->v7m_lspact) {
-        /*
-         * Lazy state saving affects external memory and also the NVIC,
-         * so we must mark it as an IO operation for icount (and cause
-         * this to be the last insn in the TB).
-         */
-        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-            s->base.is_jmp = DISAS_UPDATE_EXIT;
-            gen_io_start();
-        }
-        gen_helper_v7m_preserve_fp_state(cpu_env);
-        /*
-         * If the preserve_fp_state helper doesn't throw an exception
-         * then it will clear LSPACT; we don't need to repeat this for
-         * any further FP insns in this TB.
-         */
-        s->v7m_lspact = false;
-    }
-}
-
-/*
- * Check that VFP access is enabled. If it is, do the necessary
- * M-profile lazy-FP handling and then return true.
- * If not, emit code to generate an appropriate exception and
- * return false.
- * The ignore_vfp_enabled argument specifies that we should ignore
- * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
- * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
- */
-static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
-{
-    if (s->fp_excp_el) {
-        /* M-profile handled this earlier, in disas_m_nocp() */
-        assert (!arm_dc_feature(s, ARM_FEATURE_M));
-        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false),
-                           s->fp_excp_el);
-        return false;
-    }
-
-    if (!s->vfp_enabled && !ignore_vfp_enabled) {
-        assert(!arm_dc_feature(s, ARM_FEATURE_M));
-        unallocated_encoding(s);
-        return false;
-    }
-
-    if (arm_dc_feature(s, ARM_FEATURE_M)) {
-        /* Handle M-profile lazy FP state mechanics */
-
-        /* Trigger lazy-state preservation if necessary */
-        gen_preserve_fp_state(s);
-
-        /* Update ownership of FP context: set FPCCR.S to match current state */
-        if (s->v8m_fpccr_s_wrong) {
-            TCGv_i32 tmp;
-
-            tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
-            if (s->v8m_secure) {
-                tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
-            } else {
-                tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
-            }
-            store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
-            /* Don't need to do this for any further FP insns in this TB */
-            s->v8m_fpccr_s_wrong = false;
-        }
-
-        if (s->v7m_new_fp_ctxt_needed) {
-            /*
-             * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
-             * and the FPSCR.
-             */
-            TCGv_i32 control, fpscr;
-            uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
-
-            fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
-            gen_helper_vfp_set_fpscr(cpu_env, fpscr);
-            tcg_temp_free_i32(fpscr);
-            /*
-             * We don't need to arrange to end the TB, because the only
-             * parts of FPSCR which we cache in the TB flags are the VECLEN
-             * and VECSTRIDE, and those don't exist for M-profile.
-             */
-
-            if (s->v8m_secure) {
-                bits |= R_V7M_CONTROL_SFPA_MASK;
-            }
-            control = load_cpu_field(v7m.control[M_REG_S]);
-            tcg_gen_ori_i32(control, control, bits);
-            store_cpu_field(control, v7m.control[M_REG_S]);
-            /* Don't need to do this for any further FP insns in this TB */
-            s->v7m_new_fp_ctxt_needed = false;
-        }
-    }
-
-    return true;
-}
-
-/*
- * The most usual kind of VFP access check, for everything except
- * FMXR/FMRX to the always-available special registers.
- */
-static bool vfp_access_check(DisasContext *s)
-{
-    return full_vfp_access_check(s, false);
-}
-
-static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
-{
-    uint32_t rd, rn, rm;
-    int sz = a->sz;
-
-    if (!dc_isar_feature(aa32_vsel, s)) {
-        return false;
-    }
-
-    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vm | a->vn | a->vd) & 0x10)) {
-        return false;
-    }
-
-    rd = a->vd;
-    rn = a->vn;
-    rm = a->vm;
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (sz == 3) {
-        TCGv_i64 frn, frm, dest;
-        TCGv_i64 tmp, zero, zf, nf, vf;
-
-        zero = tcg_const_i64(0);
-
-        frn = tcg_temp_new_i64();
-        frm = tcg_temp_new_i64();
-        dest = tcg_temp_new_i64();
-
-        zf = tcg_temp_new_i64();
-        nf = tcg_temp_new_i64();
-        vf = tcg_temp_new_i64();
-
-        tcg_gen_extu_i32_i64(zf, cpu_ZF);
-        tcg_gen_ext_i32_i64(nf, cpu_NF);
-        tcg_gen_ext_i32_i64(vf, cpu_VF);
-
-        vfp_load_reg64(frn, rn);
-        vfp_load_reg64(frm, rm);
-        switch (a->cc) {
-        case 0: /* eq: Z */
-            tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
-                                frn, frm);
-            break;
-        case 1: /* vs: V */
-            tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
-                                frn, frm);
-            break;
-        case 2: /* ge: N == V -> N ^ V == 0 */
-            tmp = tcg_temp_new_i64();
-            tcg_gen_xor_i64(tmp, vf, nf);
-            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
-                                frn, frm);
-            tcg_temp_free_i64(tmp);
-            break;
-        case 3: /* gt: !Z && N == V */
-            tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
-                                frn, frm);
-            tmp = tcg_temp_new_i64();
-            tcg_gen_xor_i64(tmp, vf, nf);
-            tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
-                                dest, frm);
-            tcg_temp_free_i64(tmp);
-            break;
-        }
-        vfp_store_reg64(dest, rd);
-        tcg_temp_free_i64(frn);
-        tcg_temp_free_i64(frm);
-        tcg_temp_free_i64(dest);
-
-        tcg_temp_free_i64(zf);
-        tcg_temp_free_i64(nf);
-        tcg_temp_free_i64(vf);
-
-        tcg_temp_free_i64(zero);
-    } else {
-        TCGv_i32 frn, frm, dest;
-        TCGv_i32 tmp, zero;
-
-        zero = tcg_const_i32(0);
-
-        frn = tcg_temp_new_i32();
-        frm = tcg_temp_new_i32();
-        dest = tcg_temp_new_i32();
-        vfp_load_reg32(frn, rn);
-        vfp_load_reg32(frm, rm);
-        switch (a->cc) {
-        case 0: /* eq: Z */
-            tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
-                                frn, frm);
-            break;
-        case 1: /* vs: V */
-            tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
-                                frn, frm);
-            break;
-        case 2: /* ge: N == V -> N ^ V == 0 */
-            tmp = tcg_temp_new_i32();
-            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
-                                frn, frm);
-            tcg_temp_free_i32(tmp);
-            break;
-        case 3: /* gt: !Z && N == V */
-            tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
-                                frn, frm);
-            tmp = tcg_temp_new_i32();
-            tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-            tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
-                                dest, frm);
-            tcg_temp_free_i32(tmp);
-            break;
-        }
-        /* For fp16 the top half is always zeroes */
-        if (sz == 1) {
-            tcg_gen_andi_i32(dest, dest, 0xffff);
-        }
-        vfp_store_reg32(dest, rd);
-        tcg_temp_free_i32(frn);
-        tcg_temp_free_i32(frm);
-        tcg_temp_free_i32(dest);
-
-        tcg_temp_free_i32(zero);
-    }
-
-    return true;
-}
-
-/*
- * Table for converting the most common AArch32 encoding of
- * rounding mode to arm_fprounding order (which matches the
- * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
- */
-static const uint8_t fp_decode_rm[] = {
-    FPROUNDING_TIEAWAY,
-    FPROUNDING_TIEEVEN,
-    FPROUNDING_POSINF,
-    FPROUNDING_NEGINF,
-};
-
-static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
-{
-    uint32_t rd, rm;
-    int sz = a->sz;
-    TCGv_ptr fpst;
-    TCGv_i32 tcg_rmode;
-    int rounding = fp_decode_rm[a->rm];
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vm | a->vd) & 0x10)) {
-        return false;
-    }
-
-    rd = a->vd;
-    rm = a->vm;
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (sz == 1) {
-        fpst = fpstatus_ptr(FPST_FPCR_F16);
-    } else {
-        fpst = fpstatus_ptr(FPST_FPCR);
-    }
-
-    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-
-    if (sz == 3) {
-        TCGv_i64 tcg_op;
-        TCGv_i64 tcg_res;
-        tcg_op = tcg_temp_new_i64();
-        tcg_res = tcg_temp_new_i64();
-        vfp_load_reg64(tcg_op, rm);
-        gen_helper_rintd(tcg_res, tcg_op, fpst);
-        vfp_store_reg64(tcg_res, rd);
-        tcg_temp_free_i64(tcg_op);
-        tcg_temp_free_i64(tcg_res);
-    } else {
-        TCGv_i32 tcg_op;
-        TCGv_i32 tcg_res;
-        tcg_op = tcg_temp_new_i32();
-        tcg_res = tcg_temp_new_i32();
-        vfp_load_reg32(tcg_op, rm);
-        if (sz == 1) {
-            gen_helper_rinth(tcg_res, tcg_op, fpst);
-        } else {
-            gen_helper_rints(tcg_res, tcg_op, fpst);
-        }
-        vfp_store_reg32(tcg_res, rd);
-        tcg_temp_free_i32(tcg_op);
-        tcg_temp_free_i32(tcg_res);
-    }
-
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    tcg_temp_free_i32(tcg_rmode);
-
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
-{
-    uint32_t rd, rm;
-    int sz = a->sz;
-    TCGv_ptr fpst;
-    TCGv_i32 tcg_rmode, tcg_shift;
-    int rounding = fp_decode_rm[a->rm];
-    bool is_signed = a->op;
-
-    if (!dc_isar_feature(aa32_vcvt_dr, s)) {
-        return false;
-    }
-
-    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
-        return false;
-    }
-
-    rd = a->vd;
-    rm = a->vm;
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (sz == 1) {
-        fpst = fpstatus_ptr(FPST_FPCR_F16);
-    } else {
-        fpst = fpstatus_ptr(FPST_FPCR);
-    }
-
-    tcg_shift = tcg_const_i32(0);
-
-    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-
-    if (sz == 3) {
-        TCGv_i64 tcg_double, tcg_res;
-        TCGv_i32 tcg_tmp;
-        tcg_double = tcg_temp_new_i64();
-        tcg_res = tcg_temp_new_i64();
-        tcg_tmp = tcg_temp_new_i32();
-        vfp_load_reg64(tcg_double, rm);
-        if (is_signed) {
-            gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
-        } else {
-            gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
-        }
-        tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
-        vfp_store_reg32(tcg_tmp, rd);
-        tcg_temp_free_i32(tcg_tmp);
-        tcg_temp_free_i64(tcg_res);
-        tcg_temp_free_i64(tcg_double);
-    } else {
-        TCGv_i32 tcg_single, tcg_res;
-        tcg_single = tcg_temp_new_i32();
-        tcg_res = tcg_temp_new_i32();
-        vfp_load_reg32(tcg_single, rm);
-        if (sz == 1) {
-            if (is_signed) {
-                gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
-            } else {
-                gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
-            }
-        } else {
-            if (is_signed) {
-                gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
-            } else {
-                gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
-            }
-        }
-        vfp_store_reg32(tcg_res, rd);
-        tcg_temp_free_i32(tcg_res);
-        tcg_temp_free_i32(tcg_single);
-    }
-
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    tcg_temp_free_i32(tcg_rmode);
-
-    tcg_temp_free_i32(tcg_shift);
-
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
-{
-    /* VMOV scalar to general purpose register */
-    TCGv_i32 tmp;
-
-    /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
-    if (a->size == MO_32
-        ? !dc_isar_feature(aa32_fpsp_v2, s)
-        : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
-    store_reg(s, a->rt, tmp);
-
-    return true;
-}
-
-static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
-{
-    /* VMOV general purpose register to scalar */
-    TCGv_i32 tmp;
-
-    /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
-    if (a->size == MO_32
-        ? !dc_isar_feature(aa32_fpsp_v2, s)
-        : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = load_reg(s, a->rt);
-    write_neon_element32(tmp, a->vn, a->index, a->size);
-    tcg_temp_free_i32(tmp);
-
-    return true;
-}
-
-static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
-{
-    /* VDUP (general purpose register) */
-    TCGv_i32 tmp;
-    int size, vec_size;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
-        return false;
-    }
-
-    if (a->b && a->e) {
-        return false;
-    }
-
-    if (a->q && (a->vn & 1)) {
-        return false;
-    }
-
-    vec_size = a->q ? 16 : 8;
-    if (a->b) {
-        size = 0;
-    } else if (a->e) {
-        size = 1;
-    } else {
-        size = 2;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = load_reg(s, a->rt);
-    tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
-                         vec_size, vec_size, tmp);
-    tcg_temp_free_i32(tmp);
-
-    return true;
-}
-
-/*
- * M-profile provides two different sets of instructions that can
- * access floating point system registers: VMSR/VMRS (which move
- * to/from a general purpose register) and VLDR/VSTR sysreg (which
- * move directly to/from memory). In some cases there are also side
- * effects which must happen after any write to memory (which could
- * cause an exception). So we implement the common logic for the
- * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
- * which take pointers to callback functions which will perform the
- * actual "read/write general purpose register" and "read/write
- * memory" operations.
- */
-
-/*
- * Emit code to store the sysreg to its final destination; frees the
- * TCG temp 'value' it is passed.
- */
-typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
-/*
- * Emit code to load the value to be copied to the sysreg; returns
- * a new TCG temporary
- */
-typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
-
-/* Common decode/access checks for fp sysreg read/write */
-typedef enum FPSysRegCheckResult {
-    FPSysRegCheckFailed, /* caller should return false */
-    FPSysRegCheckDone, /* caller should return true */
-    FPSysRegCheckContinue, /* caller should continue generating code */
-} FPSysRegCheckResult;
-
-static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
-{
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return FPSysRegCheckFailed;
-    }
-
-    switch (regno) {
-    case ARM_VFP_FPSCR:
-    case QEMU_VFP_FPSCR_NZCV:
-        break;
-    case ARM_VFP_FPSCR_NZCVQC:
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-            return false;
-        }
-        break;
-    case ARM_VFP_FPCXT_S:
-    case ARM_VFP_FPCXT_NS:
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-            return false;
-        }
-        if (!s->v8m_secure) {
-            return false;
-        }
-        break;
-    default:
-        return FPSysRegCheckFailed;
-    }
-
-    /*
-     * FPCXT_NS is a special case: it has specific handling for
-     * "current FP state is inactive", and must do the PreserveFPState()
-     * but not the usual full set of actions done by ExecuteFPCheck().
-     * So we don't call vfp_access_check() and the callers must handle this.
-     */
-    if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
-        return FPSysRegCheckDone;
-    }
-    return FPSysRegCheckContinue;
-}
-
-static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
-                                  TCGLabel *label)
-{
-    /*
-     * FPCXT_NS is a special case: it has specific handling for
-     * "current FP state is inactive", and must do the PreserveFPState()
-     * but not the usual full set of actions done by ExecuteFPCheck().
-     * We don't have a TB flag that matches the fpInactive check, so we
-     * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
-     *
-     * Emit code that checks fpInactive and does a conditional
-     * branch to label based on it:
-     *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
-     *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
-     */
-    assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
-
-    /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
-    TCGv_i32 aspen, fpca;
-    aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
-    fpca = load_cpu_field(v7m.control[M_REG_S]);
-    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
-    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
-    tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
-    tcg_gen_or_i32(fpca, fpca, aspen);
-    tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
-    tcg_temp_free_i32(aspen);
-    tcg_temp_free_i32(fpca);
-}
-
-static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
-
-                                  fp_sysreg_loadfn *loadfn,
-                                 void *opaque)
-{
-    /* Do a write to an M-profile floating point system register */
-    TCGv_i32 tmp;
-    TCGLabel *lab_end = NULL;
-
-    switch (fp_sysreg_checks(s, regno)) {
-    case FPSysRegCheckFailed:
-        return false;
-    case FPSysRegCheckDone:
-        return true;
-    case FPSysRegCheckContinue:
-        break;
-    }
-
-    switch (regno) {
-    case ARM_VFP_FPSCR:
-        tmp = loadfn(s, opaque);
-        gen_helper_vfp_set_fpscr(cpu_env, tmp);
-        tcg_temp_free_i32(tmp);
-        gen_lookup_tb(s);
-        break;
-    case ARM_VFP_FPSCR_NZCVQC:
-    {
-        TCGv_i32 fpscr;
-        tmp = loadfn(s, opaque);
-        /*
-         * TODO: when we implement MVE, write the QC bit.
-         * For non-MVE, QC is RES0.
-         */
-        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
-        fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-        tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
-        tcg_gen_or_i32(fpscr, fpscr, tmp);
-        store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
-        tcg_temp_free_i32(tmp);
-        break;
-    }
-    case ARM_VFP_FPCXT_NS:
-        lab_end = gen_new_label();
-        /* fpInactive case: write is a NOP, so branch to end */
-        gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
-        /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
-        gen_preserve_fp_state(s);
-        /* fall through */
-    case ARM_VFP_FPCXT_S:
-    {
-        TCGv_i32 sfpa, control;
-        /*
-         * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
-         * bits [27:0] from value and zeroes bits [31:28].
-         */
-        tmp = loadfn(s, opaque);
-        sfpa = tcg_temp_new_i32();
-        tcg_gen_shri_i32(sfpa, tmp, 31);
-        control = load_cpu_field(v7m.control[M_REG_S]);
-        tcg_gen_deposit_i32(control, control, sfpa,
-                            R_V7M_CONTROL_SFPA_SHIFT, 1);
-        store_cpu_field(control, v7m.control[M_REG_S]);
-        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
-        gen_helper_vfp_set_fpscr(cpu_env, tmp);
-        tcg_temp_free_i32(tmp);
-        tcg_temp_free_i32(sfpa);
-        break;
-    }
-    default:
-        g_assert_not_reached();
-    }
-    if (lab_end) {
-        gen_set_label(lab_end);
-    }
-    return true;
-}
-
-static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
-                                fp_sysreg_storefn *storefn,
-                                void *opaque)
-{
-    /* Do a read from an M-profile floating point system register */
-    TCGv_i32 tmp;
-    TCGLabel *lab_end = NULL;
-    bool lookup_tb = false;
-
-    switch (fp_sysreg_checks(s, regno)) {
-    case FPSysRegCheckFailed:
-        return false;
-    case FPSysRegCheckDone:
-        return true;
-    case FPSysRegCheckContinue:
-        break;
-    }
-
-    switch (regno) {
-    case ARM_VFP_FPSCR:
-        tmp = tcg_temp_new_i32();
-        gen_helper_vfp_get_fpscr(tmp, cpu_env);
-        storefn(s, opaque, tmp);
-        break;
-    case ARM_VFP_FPSCR_NZCVQC:
-        /*
-         * TODO: MVE has a QC bit, which we probably won't store
-         * in the xregs[] field. For non-MVE, where QC is RES0,
-         * we can just fall through to the FPSCR_NZCV case.
-         */
-    case QEMU_VFP_FPSCR_NZCV:
-        /*
-         * Read just NZCV; this is a special case to avoid the
-         * helper call for the "VMRS to CPSR.NZCV" insn.
-         */
-        tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-        tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
-        storefn(s, opaque, tmp);
-        break;
-    case ARM_VFP_FPCXT_S:
-    {
-        TCGv_i32 control, sfpa, fpscr;
-        /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
-        tmp = tcg_temp_new_i32();
-        sfpa = tcg_temp_new_i32();
-        gen_helper_vfp_get_fpscr(tmp, cpu_env);
-        tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
-        control = load_cpu_field(v7m.control[M_REG_S]);
-        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
-        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
-        tcg_gen_or_i32(tmp, tmp, sfpa);
-        tcg_temp_free_i32(sfpa);
-        /*
-         * Store result before updating FPSCR etc, in case
-         * it is a memory write which causes an exception.
-         */
-        storefn(s, opaque, tmp);
-        /*
-         * Now we must reset FPSCR from FPDSCR_NS, and clear
-         * CONTROL.SFPA; so we'll end the TB here.
-         */
-        tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
-        store_cpu_field(control, v7m.control[M_REG_S]);
-        fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
-        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
-        tcg_temp_free_i32(fpscr);
-        lookup_tb = true;
-        break;
-    }
-    case ARM_VFP_FPCXT_NS:
-    {
-        TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
-        TCGLabel *lab_active = gen_new_label();
-
-        lookup_tb = true;
-
-        gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
-        /* fpInactive case: reads as FPDSCR_NS */
-        TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
-        storefn(s, opaque, tmp);
-        lab_end = gen_new_label();
-        tcg_gen_br(lab_end);
-
-        gen_set_label(lab_active);
-        /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
-        gen_preserve_fp_state(s);
-        tmp = tcg_temp_new_i32();
-        sfpa = tcg_temp_new_i32();
-        fpscr = tcg_temp_new_i32();
-        gen_helper_vfp_get_fpscr(fpscr, cpu_env);
-        tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
-        control = load_cpu_field(v7m.control[M_REG_S]);
-        tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
-        tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
-        tcg_gen_or_i32(tmp, tmp, sfpa);
-        tcg_temp_free_i32(control);
-        /* Store result before updating FPSCR, in case it faults */
-        storefn(s, opaque, tmp);
-        /* If SFPA is zero then set FPSCR from FPDSCR_NS */
-        fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
-        zero = tcg_const_i32(0);
-        tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
-        gen_helper_vfp_set_fpscr(cpu_env, fpscr);
-        tcg_temp_free_i32(zero);
-        tcg_temp_free_i32(sfpa);
-        tcg_temp_free_i32(fpdscr);
-        tcg_temp_free_i32(fpscr);
-        break;
-    }
-    default:
-        g_assert_not_reached();
-    }
-
-    if (lab_end) {
-        gen_set_label(lab_end);
-    }
-    if (lookup_tb) {
-        gen_lookup_tb(s);
-    }
-    return true;
-}
-
-static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
-{
-    arg_VMSR_VMRS *a = opaque;
-
-    if (a->rt == 15) {
-        /* Set the 4 flag bits in the CPSR */
-        gen_set_nzcv(value);
-        tcg_temp_free_i32(value);
-    } else {
-        store_reg(s, a->rt, value);
-    }
-}
-
-static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
-{
-    arg_VMSR_VMRS *a = opaque;
-
-    return load_reg(s, a->rt);
-}
-
-static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
-{
-    /*
-     * Accesses to R15 are UNPREDICTABLE; we choose to undef.
-     * FPSCR -> r15 is a special case which writes to the PSR flags;
-     * set a->reg to a special value to tell gen_M_fp_sysreg_read()
-     * we only care about the top 4 bits of FPSCR there.
-     */
-    if (a->rt == 15) {
-        if (a->l && a->reg == ARM_VFP_FPSCR) {
-            a->reg = QEMU_VFP_FPSCR_NZCV;
-        } else {
-            return false;
-        }
-    }
-
-    if (a->l) {
-        /* VMRS, move FP system register to gp register */
-        return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
-    } else {
-        /* VMSR, move gp register to FP system register */
-        return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
-    }
-}
-
-static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
-{
-    TCGv_i32 tmp;
-    bool ignore_vfp_enabled = false;
-
-    if (arm_dc_feature(s, ARM_FEATURE_M)) {
-        return gen_M_VMSR_VMRS(s, a);
-    }
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    switch (a->reg) {
-    case ARM_VFP_FPSID:
-        /*
-         * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
-         * all ID registers to privileged access only.
-         */
-        if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
-            return false;
-        }
-        ignore_vfp_enabled = true;
-        break;
-    case ARM_VFP_MVFR0:
-    case ARM_VFP_MVFR1:
-        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
-            return false;
-        }
-        ignore_vfp_enabled = true;
-        break;
-    case ARM_VFP_MVFR2:
-        if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
-            return false;
-        }
-        ignore_vfp_enabled = true;
-        break;
-    case ARM_VFP_FPSCR:
-        break;
-    case ARM_VFP_FPEXC:
-        if (IS_USER(s)) {
-            return false;
-        }
-        ignore_vfp_enabled = true;
-        break;
-    case ARM_VFP_FPINST:
-    case ARM_VFP_FPINST2:
-        /* Not present in VFPv3 */
-        if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
-            return false;
-        }
-        break;
-    default:
-        return false;
-    }
-
-    if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
-        return true;
-    }
-
-    if (a->l) {
-        /* VMRS, move VFP special register to gp register */
-        switch (a->reg) {
-        case ARM_VFP_MVFR0:
-        case ARM_VFP_MVFR1:
-        case ARM_VFP_MVFR2:
-        case ARM_VFP_FPSID:
-            if (s->current_el == 1) {
-                TCGv_i32 tcg_reg, tcg_rt;
-
-                gen_set_condexec(s);
-                gen_set_pc_im(s, s->pc_curr);
-                tcg_reg = tcg_const_i32(a->reg);
-                tcg_rt = tcg_const_i32(a->rt);
-                gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
-                tcg_temp_free_i32(tcg_reg);
-                tcg_temp_free_i32(tcg_rt);
-            }
-            /* fall through */
-        case ARM_VFP_FPEXC:
-        case ARM_VFP_FPINST:
-        case ARM_VFP_FPINST2:
-            tmp = load_cpu_field(vfp.xregs[a->reg]);
-            break;
-        case ARM_VFP_FPSCR:
-            if (a->rt == 15) {
-                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
-                tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
-            } else {
-                tmp = tcg_temp_new_i32();
-                gen_helper_vfp_get_fpscr(tmp, cpu_env);
-            }
-            break;
-        default:
-            g_assert_not_reached();
-        }
-
-        if (a->rt == 15) {
-            /* Set the 4 flag bits in the CPSR.  */
-            gen_set_nzcv(tmp);
-            tcg_temp_free_i32(tmp);
-        } else {
-            store_reg(s, a->rt, tmp);
-        }
-    } else {
-        /* VMSR, move gp register to VFP special register */
-        switch (a->reg) {
-        case ARM_VFP_FPSID:
-        case ARM_VFP_MVFR0:
-        case ARM_VFP_MVFR1:
-        case ARM_VFP_MVFR2:
-            /* Writes are ignored.  */
-            break;
-        case ARM_VFP_FPSCR:
-            tmp = load_reg(s, a->rt);
-            gen_helper_vfp_set_fpscr(cpu_env, tmp);
-            tcg_temp_free_i32(tmp);
-            gen_lookup_tb(s);
-            break;
-        case ARM_VFP_FPEXC:
-            /*
-             * TODO: VFP subarchitecture support.
-             * For now, keep the EN bit only
-             */
-            tmp = load_reg(s, a->rt);
-            tcg_gen_andi_i32(tmp, tmp, 1 << 30);
-            store_cpu_field(tmp, vfp.xregs[a->reg]);
-            gen_lookup_tb(s);
-            break;
-        case ARM_VFP_FPINST:
-        case ARM_VFP_FPINST2:
-            tmp = load_reg(s, a->rt);
-            store_cpu_field(tmp, vfp.xregs[a->reg]);
-            break;
-        default:
-            g_assert_not_reached();
-        }
-    }
-
-    return true;
-}
-
-static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
-{
-    arg_vldr_sysreg *a = opaque;
-    uint32_t offset = a->imm;
-    TCGv_i32 addr;
-
-    if (!a->a) {
-        offset = - offset;
-    }
-
-    addr = load_reg(s, a->rn);
-    if (a->p) {
-        tcg_gen_addi_i32(addr, addr, offset);
-    }
-
-    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
-        gen_helper_v8m_stackcheck(cpu_env, addr);
-    }
-
-    gen_aa32_st_i32(s, value, addr, get_mem_index(s),
-                    MO_UL | MO_ALIGN | s->be_data);
-    tcg_temp_free_i32(value);
-
-    if (a->w) {
-        /* writeback */
-        if (!a->p) {
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-        store_reg(s, a->rn, addr);
-    } else {
-        tcg_temp_free_i32(addr);
-    }
-}
-
-static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
-{
-    arg_vldr_sysreg *a = opaque;
-    uint32_t offset = a->imm;
-    TCGv_i32 addr;
-    TCGv_i32 value = tcg_temp_new_i32();
-
-    if (!a->a) {
-        offset = - offset;
-    }
-
-    addr = load_reg(s, a->rn);
-    if (a->p) {
-        tcg_gen_addi_i32(addr, addr, offset);
-    }
-
-    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
-        gen_helper_v8m_stackcheck(cpu_env, addr);
-    }
-
-    gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
-                    MO_UL | MO_ALIGN | s->be_data);
-
-    if (a->w) {
-        /* writeback */
-        if (!a->p) {
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-        store_reg(s, a->rn, addr);
-    } else {
-        tcg_temp_free_i32(addr);
-    }
-    return value;
-}
-
-static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-        return false;
-    }
-    if (a->rn == 15) {
-        return false;
-    }
-    return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
-}
-
-static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
-{
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-        return false;
-    }
-    if (a->rn == 15) {
-        return false;
-    }
-    return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
-}
-
-static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
-{
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (a->rt == 15) {
-        /* UNPREDICTABLE; we choose to UNDEF */
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (a->l) {
-        /* VFP to general purpose register */
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vn);
-        tcg_gen_andi_i32(tmp, tmp, 0xffff);
-        store_reg(s, a->rt, tmp);
-    } else {
-        /* general purpose register to VFP */
-        tmp = load_reg(s, a->rt);
-        tcg_gen_andi_i32(tmp, tmp, 0xffff);
-        vfp_store_reg32(tmp, a->vn);
-        tcg_temp_free_i32(tmp);
-    }
-
-    return true;
-}
-
-static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
-{
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (a->l) {
-        /* VFP to general purpose register */
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vn);
-        if (a->rt == 15) {
-            /* Set the 4 flag bits in the CPSR.  */
-            gen_set_nzcv(tmp);
-            tcg_temp_free_i32(tmp);
-        } else {
-            store_reg(s, a->rt, tmp);
-        }
-    } else {
-        /* general purpose register to VFP */
-        tmp = load_reg(s, a->rt);
-        vfp_store_reg32(tmp, a->vn);
-        tcg_temp_free_i32(tmp);
-    }
-
-    return true;
-}
-
-static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
-{
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    /*
-     * VMOV between two general-purpose registers and two single precision
-     * floating point registers
-     */
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (a->op) {
-        /* fpreg to gpreg */
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vm);
-        store_reg(s, a->rt, tmp);
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vm + 1);
-        store_reg(s, a->rt2, tmp);
-    } else {
-        /* gpreg to fpreg */
-        tmp = load_reg(s, a->rt);
-        vfp_store_reg32(tmp, a->vm);
-        tcg_temp_free_i32(tmp);
-        tmp = load_reg(s, a->rt2);
-        vfp_store_reg32(tmp, a->vm + 1);
-        tcg_temp_free_i32(tmp);
-    }
-
-    return true;
-}
-
-static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
-{
-    TCGv_i32 tmp;
-
-    /*
-     * VMOV between two general-purpose registers and one double precision
-     * floating point register.  Note that this does not require support
-     * for double precision arithmetic.
-     */
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (a->op) {
-        /* fpreg to gpreg */
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vm * 2);
-        store_reg(s, a->rt, tmp);
-        tmp = tcg_temp_new_i32();
-        vfp_load_reg32(tmp, a->vm * 2 + 1);
-        store_reg(s, a->rt2, tmp);
-    } else {
-        /* gpreg to fpreg */
-        tmp = load_reg(s, a->rt);
-        vfp_store_reg32(tmp, a->vm * 2);
-        tcg_temp_free_i32(tmp);
-        tmp = load_reg(s, a->rt2);
-        vfp_store_reg32(tmp, a->vm * 2 + 1);
-        tcg_temp_free_i32(tmp);
-    }
-
-    return true;
-}
-
-static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
-{
-    uint32_t offset;
-    TCGv_i32 addr, tmp;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
-    offset = a->imm << 1;
-    if (!a->u) {
-        offset = -offset;
-    }
-
-    /* For thumb, use of PC is UNPREDICTABLE.  */
-    addr = add_reg_for_lit(s, a->rn, offset);
-    tmp = tcg_temp_new_i32();
-    if (a->l) {
-        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-        vfp_store_reg32(tmp, a->vd);
-    } else {
-        vfp_load_reg32(tmp, a->vd);
-        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(addr);
-
-    return true;
-}
-
-static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
-{
-    uint32_t offset;
-    TCGv_i32 addr, tmp;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    offset = a->imm << 2;
-    if (!a->u) {
-        offset = -offset;
-    }
-
-    /* For thumb, use of PC is UNPREDICTABLE.  */
-    addr = add_reg_for_lit(s, a->rn, offset);
-    tmp = tcg_temp_new_i32();
-    if (a->l) {
-        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-        vfp_store_reg32(tmp, a->vd);
-    } else {
-        vfp_load_reg32(tmp, a->vd);
-        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-    }
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i32(addr);
-
-    return true;
-}
-
-static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
-{
-    uint32_t offset;
-    TCGv_i32 addr;
-    TCGv_i64 tmp;
-
-    /* Note that this does not require support for double arithmetic.  */
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    offset = a->imm << 2;
-    if (!a->u) {
-        offset = -offset;
-    }
-
-    /* For thumb, use of PC is UNPREDICTABLE.  */
-    addr = add_reg_for_lit(s, a->rn, offset);
-    tmp = tcg_temp_new_i64();
-    if (a->l) {
-        gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
-        vfp_store_reg64(tmp, a->vd);
-    } else {
-        vfp_load_reg64(tmp, a->vd);
-        gen_aa32_st64(s, tmp, addr, get_mem_index(s));
-    }
-    tcg_temp_free_i64(tmp);
-    tcg_temp_free_i32(addr);
-
-    return true;
-}
-
-static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
-{
-    uint32_t offset;
-    TCGv_i32 addr, tmp;
-    int i, n;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    n = a->imm;
-
-    if (n == 0 || (a->vd + n) > 32) {
-        /*
-         * UNPREDICTABLE cases for bad immediates: we choose to
-         * UNDEF to avoid generating huge numbers of TCG ops
-         */
-        return false;
-    }
-    if (a->rn == 15 && a->w) {
-        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* For thumb, use of PC is UNPREDICTABLE.  */
-    addr = add_reg_for_lit(s, a->rn, 0);
-    if (a->p) {
-        /* pre-decrement */
-        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
-    }
-
-    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
-        /*
-         * Here 'addr' is the lowest address we will store to,
-         * and is either the old SP (if post-increment) or
-         * the new SP (if pre-decrement). For post-increment
-         * where the old value is below the limit and the new
-         * value is above, it is UNKNOWN whether the limit check
-         * triggers; we choose to trigger.
-         */
-        gen_helper_v8m_stackcheck(cpu_env, addr);
-    }
-
-    offset = 4;
-    tmp = tcg_temp_new_i32();
-    for (i = 0; i < n; i++) {
-        if (a->l) {
-            /* load */
-            gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-            vfp_store_reg32(tmp, a->vd + i);
-        } else {
-            /* store */
-            vfp_load_reg32(tmp, a->vd + i);
-            gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-        }
-        tcg_gen_addi_i32(addr, addr, offset);
-    }
-    tcg_temp_free_i32(tmp);
-    if (a->w) {
-        /* writeback */
-        if (a->p) {
-            offset = -offset * n;
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-        store_reg(s, a->rn, addr);
-    } else {
-        tcg_temp_free_i32(addr);
-    }
-
-    return true;
-}
-
-static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
-{
-    uint32_t offset;
-    TCGv_i32 addr;
-    TCGv_i64 tmp;
-    int i, n;
-
-    /* Note that this does not require support for double arithmetic.  */
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    n = a->imm >> 1;
-
-    if (n == 0 || (a->vd + n) > 32 || n > 16) {
-        /*
-         * UNPREDICTABLE cases for bad immediates: we choose to
-         * UNDEF to avoid generating huge numbers of TCG ops
-         */
-        return false;
-    }
-    if (a->rn == 15 && a->w) {
-        /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* For thumb, use of PC is UNPREDICTABLE.  */
-    addr = add_reg_for_lit(s, a->rn, 0);
-    if (a->p) {
-        /* pre-decrement */
-        tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
-    }
-
-    if (s->v8m_stackcheck && a->rn == 13 && a->w) {
-        /*
-         * Here 'addr' is the lowest address we will store to,
-         * and is either the old SP (if post-increment) or
-         * the new SP (if pre-decrement). For post-increment
-         * where the old value is below the limit and the new
-         * value is above, it is UNKNOWN whether the limit check
-         * triggers; we choose to trigger.
-         */
-        gen_helper_v8m_stackcheck(cpu_env, addr);
-    }
-
-    offset = 8;
-    tmp = tcg_temp_new_i64();
-    for (i = 0; i < n; i++) {
-        if (a->l) {
-            /* load */
-            gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
-            vfp_store_reg64(tmp, a->vd + i);
-        } else {
-            /* store */
-            vfp_load_reg64(tmp, a->vd + i);
-            gen_aa32_st64(s, tmp, addr, get_mem_index(s));
-        }
-        tcg_gen_addi_i32(addr, addr, offset);
-    }
-    tcg_temp_free_i64(tmp);
-    if (a->w) {
-        /* writeback */
-        if (a->p) {
-            offset = -offset * n;
-        } else if (a->imm & 1) {
-            offset = 4;
-        } else {
-            offset = 0;
-        }
-
-        if (offset != 0) {
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-        store_reg(s, a->rn, addr);
-    } else {
-        tcg_temp_free_i32(addr);
-    }
-
-    return true;
-}
-
-/*
- * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
- * The callback should emit code to write a value to vd. If
- * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
- * will contain the old value of the relevant VFP register;
- * otherwise it must be written to only.
- */
-typedef void VFPGen3OpSPFn(TCGv_i32 vd,
-                           TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
-typedef void VFPGen3OpDPFn(TCGv_i64 vd,
-                           TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
-
-/*
- * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
- * The callback should emit code to write a value to vd (which
- * should be written to only).
- */
-typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
-typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
-
-/*
- * Return true if the specified S reg is in a scalar bank
- * (ie if it is s0..s7)
- */
-static inline bool vfp_sreg_is_scalar(int reg)
-{
-    return (reg & 0x18) == 0;
-}
-
-/*
- * Return true if the specified D reg is in a scalar bank
- * (ie if it is d0..d3 or d16..d19)
- */
-static inline bool vfp_dreg_is_scalar(int reg)
-{
-    return (reg & 0xc) == 0;
-}
-
-/*
- * Advance the S reg number forwards by delta within its bank
- * (ie increment the low 3 bits but leave the rest the same)
- */
-static inline int vfp_advance_sreg(int reg, int delta)
-{
-    return ((reg + delta) & 0x7) | (reg & ~0x7);
-}
-
-/*
- * Advance the D reg number forwards by delta within its bank
- * (ie increment the low 2 bits but leave the rest the same)
- */
-static inline int vfp_advance_dreg(int reg, int delta)
-{
-    return ((reg + delta) & 0x3) | (reg & ~0x3);
-}
-
-/*
- * Perform a 3-operand VFP data processing instruction. fn is the
- * callback to do the actual operation; this function deals with the
- * code to handle looping around for VFP vector processing.
- */
-static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
-                          int vd, int vn, int vm, bool reads_vd)
-{
-    uint32_t delta_m = 0;
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i32 f0, f1, fd;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_sreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = s->vec_stride + 1;
-
-            if (vfp_sreg_is_scalar(vm)) {
-                /* mixed scalar/vector */
-                delta_m = 0;
-            } else {
-                /* vector */
-                delta_m = delta_d;
-            }
-        }
-    }
-
-    f0 = tcg_temp_new_i32();
-    f1 = tcg_temp_new_i32();
-    fd = tcg_temp_new_i32();
-    fpst = fpstatus_ptr(FPST_FPCR);
-
-    vfp_load_reg32(f0, vn);
-    vfp_load_reg32(f1, vm);
-
-    for (;;) {
-        if (reads_vd) {
-            vfp_load_reg32(fd, vd);
-        }
-        fn(fd, f0, f1, fpst);
-        vfp_store_reg32(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_sreg(vd, delta_d);
-        vn = vfp_advance_sreg(vn, delta_d);
-        vfp_load_reg32(f0, vn);
-        if (delta_m) {
-            vm = vfp_advance_sreg(vm, delta_m);
-            vfp_load_reg32(f1, vm);
-        }
-    }
-
-    tcg_temp_free_i32(f0);
-    tcg_temp_free_i32(f1);
-    tcg_temp_free_i32(fd);
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
-                          int vd, int vn, int vm, bool reads_vd)
-{
-    /*
-     * Do a half-precision operation. Functionally this is
-     * the same as do_vfp_3op_sp(), except:
-     *  - it uses the FPST_FPCR_F16
-     *  - it doesn't need the VFP vector handling (fp16 is a
-     *    v8 feature, and in v8 VFP vectors don't exist)
-     *  - it does the aa32_fp16_arith feature test
-     */
-    TCGv_i32 f0, f1, fd;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    f0 = tcg_temp_new_i32();
-    f1 = tcg_temp_new_i32();
-    fd = tcg_temp_new_i32();
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-
-    vfp_load_reg32(f0, vn);
-    vfp_load_reg32(f1, vm);
-
-    if (reads_vd) {
-        vfp_load_reg32(fd, vd);
-    }
-    fn(fd, f0, f1, fpst);
-    vfp_store_reg32(fd, vd);
-
-    tcg_temp_free_i32(f0);
-    tcg_temp_free_i32(f1);
-    tcg_temp_free_i32(fd);
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
-                          int vd, int vn, int vm, bool reads_vd)
-{
-    uint32_t delta_m = 0;
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i64 f0, f1, fd;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_dreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = (s->vec_stride >> 1) + 1;
-
-            if (vfp_dreg_is_scalar(vm)) {
-                /* mixed scalar/vector */
-                delta_m = 0;
-            } else {
-                /* vector */
-                delta_m = delta_d;
-            }
-        }
-    }
-
-    f0 = tcg_temp_new_i64();
-    f1 = tcg_temp_new_i64();
-    fd = tcg_temp_new_i64();
-    fpst = fpstatus_ptr(FPST_FPCR);
-
-    vfp_load_reg64(f0, vn);
-    vfp_load_reg64(f1, vm);
-
-    for (;;) {
-        if (reads_vd) {
-            vfp_load_reg64(fd, vd);
-        }
-        fn(fd, f0, f1, fpst);
-        vfp_store_reg64(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_dreg(vd, delta_d);
-        vn = vfp_advance_dreg(vn, delta_d);
-        vfp_load_reg64(f0, vn);
-        if (delta_m) {
-            vm = vfp_advance_dreg(vm, delta_m);
-            vfp_load_reg64(f1, vm);
-        }
-    }
-
-    tcg_temp_free_i64(f0);
-    tcg_temp_free_i64(f1);
-    tcg_temp_free_i64(fd);
-    tcg_temp_free_ptr(fpst);
-
-    return true;
-}
-
-static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
-{
-    uint32_t delta_m = 0;
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i32 f0, fd;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_sreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = s->vec_stride + 1;
-
-            if (vfp_sreg_is_scalar(vm)) {
-                /* mixed scalar/vector */
-                delta_m = 0;
-            } else {
-                /* vector */
-                delta_m = delta_d;
-            }
-        }
-    }
-
-    f0 = tcg_temp_new_i32();
-    fd = tcg_temp_new_i32();
-
-    vfp_load_reg32(f0, vm);
-
-    for (;;) {
-        fn(fd, f0);
-        vfp_store_reg32(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-
-        if (delta_m == 0) {
-            /* single source one-many */
-            while (veclen--) {
-                vd = vfp_advance_sreg(vd, delta_d);
-                vfp_store_reg32(fd, vd);
-            }
-            break;
-        }
-
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_sreg(vd, delta_d);
-        vm = vfp_advance_sreg(vm, delta_m);
-        vfp_load_reg32(f0, vm);
-    }
-
-    tcg_temp_free_i32(f0);
-    tcg_temp_free_i32(fd);
-
-    return true;
-}
-
-static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
-{
-    /*
-     * Do a half-precision operation. Functionally this is
-     * the same as do_vfp_2op_sp(), except:
-     *  - it doesn't need the VFP vector handling (fp16 is a
-     *    v8 feature, and in v8 VFP vectors don't exist)
-     *  - it does the aa32_fp16_arith feature test
-     */
-    TCGv_i32 f0;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    f0 = tcg_temp_new_i32();
-    vfp_load_reg32(f0, vm);
-    fn(f0, f0);
-    vfp_store_reg32(f0, vd);
-    tcg_temp_free_i32(f0);
-
-    return true;
-}
-
-static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
-{
-    uint32_t delta_m = 0;
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i64 f0, fd;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_dreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = (s->vec_stride >> 1) + 1;
-
-            if (vfp_dreg_is_scalar(vm)) {
-                /* mixed scalar/vector */
-                delta_m = 0;
-            } else {
-                /* vector */
-                delta_m = delta_d;
-            }
-        }
-    }
-
-    f0 = tcg_temp_new_i64();
-    fd = tcg_temp_new_i64();
-
-    vfp_load_reg64(f0, vm);
-
-    for (;;) {
-        fn(fd, f0);
-        vfp_store_reg64(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-
-        if (delta_m == 0) {
-            /* single source one-many */
-            while (veclen--) {
-                vd = vfp_advance_dreg(vd, delta_d);
-                vfp_store_reg64(fd, vd);
-            }
-            break;
-        }
-
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_dreg(vd, delta_d);
-        vd = vfp_advance_dreg(vm, delta_m);
-        vfp_load_reg64(f0, vm);
-    }
-
-    tcg_temp_free_i64(f0);
-    tcg_temp_free_i64(fd);
-
-    return true;
-}
-
-static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* Note that order of inputs to the add matters for NaNs */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
-    gen_helper_vfp_addh(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* Note that order of inputs to the add matters for NaNs */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_muls(tmp, vn, vm, fpst);
-    gen_helper_vfp_adds(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
-{
-    /* Note that order of inputs to the add matters for NaNs */
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    gen_helper_vfp_muld(tmp, vn, vm, fpst);
-    gen_helper_vfp_addd(vd, vd, tmp, fpst);
-    tcg_temp_free_i64(tmp);
-}
-
-static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /*
-     * VMLS: vd = vd + -(vn * vm)
-     * Note that order of inputs to the add matters for NaNs.
-     */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
-    gen_helper_vfp_negh(tmp, tmp);
-    gen_helper_vfp_addh(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /*
-     * VMLS: vd = vd + -(vn * vm)
-     * Note that order of inputs to the add matters for NaNs.
-     */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_muls(tmp, vn, vm, fpst);
-    gen_helper_vfp_negs(tmp, tmp);
-    gen_helper_vfp_adds(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
-{
-    /*
-     * VMLS: vd = vd + -(vn * vm)
-     * Note that order of inputs to the add matters for NaNs.
-     */
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    gen_helper_vfp_muld(tmp, vn, vm, fpst);
-    gen_helper_vfp_negd(tmp, tmp);
-    gen_helper_vfp_addd(vd, vd, tmp, fpst);
-    tcg_temp_free_i64(tmp);
-}
-
-static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /*
-     * VNMLS: -fd + (fn * fm)
-     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
-     * plausible looking simplifications because this will give wrong results
-     * for NaNs.
-     */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
-    gen_helper_vfp_negh(vd, vd);
-    gen_helper_vfp_addh(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /*
-     * VNMLS: -fd + (fn * fm)
-     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
-     * plausible looking simplifications because this will give wrong results
-     * for NaNs.
-     */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_muls(tmp, vn, vm, fpst);
-    gen_helper_vfp_negs(vd, vd);
-    gen_helper_vfp_adds(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
-{
-    /*
-     * VNMLS: -fd + (fn * fm)
-     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
-     * plausible looking simplifications because this will give wrong results
-     * for NaNs.
-     */
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    gen_helper_vfp_muld(tmp, vn, vm, fpst);
-    gen_helper_vfp_negd(vd, vd);
-    gen_helper_vfp_addd(vd, vd, tmp, fpst);
-    tcg_temp_free_i64(tmp);
-}
-
-static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* VNMLA: -fd + -(fn * fm) */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_mulh(tmp, vn, vm, fpst);
-    gen_helper_vfp_negh(tmp, tmp);
-    gen_helper_vfp_negh(vd, vd);
-    gen_helper_vfp_addh(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* VNMLA: -fd + -(fn * fm) */
-    TCGv_i32 tmp = tcg_temp_new_i32();
-
-    gen_helper_vfp_muls(tmp, vn, vm, fpst);
-    gen_helper_vfp_negs(tmp, tmp);
-    gen_helper_vfp_negs(vd, vd);
-    gen_helper_vfp_adds(vd, vd, tmp, fpst);
-    tcg_temp_free_i32(tmp);
-}
-
-static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
-}
-
-static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
-{
-    /* VNMLA: -fd + (fn * fm) */
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    gen_helper_vfp_muld(tmp, vn, vm, fpst);
-    gen_helper_vfp_negd(tmp, tmp);
-    gen_helper_vfp_negd(vd, vd);
-    gen_helper_vfp_addd(vd, vd, tmp, fpst);
-    tcg_temp_free_i64(tmp);
-}
-
-static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
-}
-
-static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
-}
-
-static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* VNMUL: -(fn * fm) */
-    gen_helper_vfp_mulh(vd, vn, vm, fpst);
-    gen_helper_vfp_negh(vd, vd);
-}
-
-static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
-}
-
-static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
-{
-    /* VNMUL: -(fn * fm) */
-    gen_helper_vfp_muls(vd, vn, vm, fpst);
-    gen_helper_vfp_negs(vd, vd);
-}
-
-static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
-}
-
-static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
-{
-    /* VNMUL: -(fn * fm) */
-    gen_helper_vfp_muld(vd, vn, vm, fpst);
-    gen_helper_vfp_negd(vd, vd);
-}
-
-static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
-{
-    return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
-{
-    return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
-{
-    return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
-{
-    if (!dc_isar_feature(aa32_vminmaxnm, s)) {
-        return false;
-    }
-    return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
-                         a->vd, a->vn, a->vm, false);
-}
-
-static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
-{
-    /*
-     * VFNMA : fd = muladd(-fd,  fn, fm)
-     * VFNMS : fd = muladd(-fd, -fn, fm)
-     * VFMA  : fd = muladd( fd,  fn, fm)
-     * VFMS  : fd = muladd( fd, -fn, fm)
-     *
-     * These are fused multiply-add, and must be done as one floating
-     * point operation with no rounding between the multiplication and
-     * addition steps.  NB that doing the negations here as separate
-     * steps is correct : an input NaN should come out with its sign
-     * bit flipped if it is a negated-input.
-     */
-    TCGv_ptr fpst;
-    TCGv_i32 vn, vm, vd;
-
-    /*
-     * Present in VFPv4 only, and only with the FP16 extension.
-     * Note that we can't rely on the SIMDFMAC check alone, because
-     * in a Neon-no-VFP core that ID register field will be non-zero.
-     */
-    if (!dc_isar_feature(aa32_fp16_arith, s) ||
-        !dc_isar_feature(aa32_simdfmac, s) ||
-        !dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vn = tcg_temp_new_i32();
-    vm = tcg_temp_new_i32();
-    vd = tcg_temp_new_i32();
-
-    vfp_load_reg32(vn, a->vn);
-    vfp_load_reg32(vm, a->vm);
-    if (neg_n) {
-        /* VFNMS, VFMS */
-        gen_helper_vfp_negh(vn, vn);
-    }
-    vfp_load_reg32(vd, a->vd);
-    if (neg_d) {
-        /* VFNMA, VFNMS */
-        gen_helper_vfp_negh(vd, vd);
-    }
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
-    vfp_store_reg32(vd, a->vd);
-
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(vn);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_i32(vd);
-
-    return true;
-}
-
-static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
-{
-    /*
-     * VFNMA : fd = muladd(-fd,  fn, fm)
-     * VFNMS : fd = muladd(-fd, -fn, fm)
-     * VFMA  : fd = muladd( fd,  fn, fm)
-     * VFMS  : fd = muladd( fd, -fn, fm)
-     *
-     * These are fused multiply-add, and must be done as one floating
-     * point operation with no rounding between the multiplication and
-     * addition steps.  NB that doing the negations here as separate
-     * steps is correct : an input NaN should come out with its sign
-     * bit flipped if it is a negated-input.
-     */
-    TCGv_ptr fpst;
-    TCGv_i32 vn, vm, vd;
-
-    /*
-     * Present in VFPv4 only.
-     * Note that we can't rely on the SIMDFMAC check alone, because
-     * in a Neon-no-VFP core that ID register field will be non-zero.
-     */
-    if (!dc_isar_feature(aa32_simdfmac, s) ||
-        !dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-    /*
-     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
-     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
-     */
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vn = tcg_temp_new_i32();
-    vm = tcg_temp_new_i32();
-    vd = tcg_temp_new_i32();
-
-    vfp_load_reg32(vn, a->vn);
-    vfp_load_reg32(vm, a->vm);
-    if (neg_n) {
-        /* VFNMS, VFMS */
-        gen_helper_vfp_negs(vn, vn);
-    }
-    vfp_load_reg32(vd, a->vd);
-    if (neg_d) {
-        /* VFNMA, VFNMS */
-        gen_helper_vfp_negs(vd, vd);
-    }
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
-    vfp_store_reg32(vd, a->vd);
-
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(vn);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_i32(vd);
-
-    return true;
-}
-
-static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
-{
-    /*
-     * VFNMA : fd = muladd(-fd,  fn, fm)
-     * VFNMS : fd = muladd(-fd, -fn, fm)
-     * VFMA  : fd = muladd( fd,  fn, fm)
-     * VFMS  : fd = muladd( fd, -fn, fm)
-     *
-     * These are fused multiply-add, and must be done as one floating
-     * point operation with no rounding between the multiplication and
-     * addition steps.  NB that doing the negations here as separate
-     * steps is correct : an input NaN should come out with its sign
-     * bit flipped if it is a negated-input.
-     */
-    TCGv_ptr fpst;
-    TCGv_i64 vn, vm, vd;
-
-    /*
-     * Present in VFPv4 only.
-     * Note that we can't rely on the SIMDFMAC check alone, because
-     * in a Neon-no-VFP core that ID register field will be non-zero.
-     */
-    if (!dc_isar_feature(aa32_simdfmac, s) ||
-        !dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-    /*
-     * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
-     * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
-     */
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) &&
-        ((a->vd | a->vn | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vn = tcg_temp_new_i64();
-    vm = tcg_temp_new_i64();
-    vd = tcg_temp_new_i64();
-
-    vfp_load_reg64(vn, a->vn);
-    vfp_load_reg64(vm, a->vm);
-    if (neg_n) {
-        /* VFNMS, VFMS */
-        gen_helper_vfp_negd(vn, vn);
-    }
-    vfp_load_reg64(vd, a->vd);
-    if (neg_d) {
-        /* VFNMA, VFNMS */
-        gen_helper_vfp_negd(vd, vd);
-    }
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
-    vfp_store_reg64(vd, a->vd);
-
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i64(vn);
-    tcg_temp_free_i64(vm);
-    tcg_temp_free_i64(vd);
-
-    return true;
-}
-
-#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
-    static bool trans_##INSN##_##PREC(DisasContext *s,                  \
-                                      arg_##INSN##_##PREC *a)           \
-    {                                                                   \
-        return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
-    }
-
-#define MAKE_VFM_TRANS_FNS(PREC) \
-    MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
-    MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
-    MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
-    MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
-
-MAKE_VFM_TRANS_FNS(hp)
-MAKE_VFM_TRANS_FNS(sp)
-MAKE_VFM_TRANS_FNS(dp)
-
-static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
-{
-    TCGv_i32 fd;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
-    vfp_store_reg32(fd, a->vd);
-    tcg_temp_free_i32(fd);
-    return true;
-}
-
-static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
-{
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i32 fd;
-    uint32_t vd;
-
-    vd = a->vd;
-
-    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_sreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = s->vec_stride + 1;
-        }
-    }
-
-    fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
-
-    for (;;) {
-        vfp_store_reg32(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_sreg(vd, delta_d);
-    }
-
-    tcg_temp_free_i32(fd);
-    return true;
-}
-
-static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
-{
-    uint32_t delta_d = 0;
-    int veclen = s->vec_len;
-    TCGv_i64 fd;
-    uint32_t vd;
-
-    vd = a->vd;
-
-    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fpshvec, s) &&
-        (veclen != 0 || s->vec_stride != 0)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    if (veclen > 0) {
-        /* Figure out what type of vector operation this is.  */
-        if (vfp_dreg_is_scalar(vd)) {
-            /* scalar */
-            veclen = 0;
-        } else {
-            delta_d = (s->vec_stride >> 1) + 1;
-        }
-    }
-
-    fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
-
-    for (;;) {
-        vfp_store_reg64(fd, vd);
-
-        if (veclen == 0) {
-            break;
-        }
-
-        /* Set up the operands for the next iteration */
-        veclen--;
-        vd = vfp_advance_dreg(vd, delta_d);
-    }
-
-    tcg_temp_free_i64(fd);
-    return true;
-}
-
-#define DO_VFP_2OP(INSN, PREC, FN)                              \
-    static bool trans_##INSN##_##PREC(DisasContext *s,          \
-                                      arg_##INSN##_##PREC *a)   \
-    {                                                           \
-        return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
-    }
-
-DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
-DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
-
-DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
-DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
-DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
-
-DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
-DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
-DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
-
-static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
-{
-    gen_helper_vfp_sqrth(vd, vm, cpu_env);
-}
-
-static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
-{
-    gen_helper_vfp_sqrts(vd, vm, cpu_env);
-}
-
-static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
-{
-    gen_helper_vfp_sqrtd(vd, vm, cpu_env);
-}
-
-DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
-DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
-DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
-
-static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
-{
-    TCGv_i32 vd, vm;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    /* Vm/M bits must be zero for the Z variant */
-    if (a->z && a->vm != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vd = tcg_temp_new_i32();
-    vm = tcg_temp_new_i32();
-
-    vfp_load_reg32(vd, a->vd);
-    if (a->z) {
-        tcg_gen_movi_i32(vm, 0);
-    } else {
-        vfp_load_reg32(vm, a->vm);
-    }
-
-    if (a->e) {
-        gen_helper_vfp_cmpeh(vd, vm, cpu_env);
-    } else {
-        gen_helper_vfp_cmph(vd, vm, cpu_env);
-    }
-
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i32(vm);
-
-    return true;
-}
-
-static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
-{
-    TCGv_i32 vd, vm;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    /* Vm/M bits must be zero for the Z variant */
-    if (a->z && a->vm != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vd = tcg_temp_new_i32();
-    vm = tcg_temp_new_i32();
-
-    vfp_load_reg32(vd, a->vd);
-    if (a->z) {
-        tcg_gen_movi_i32(vm, 0);
-    } else {
-        vfp_load_reg32(vm, a->vm);
-    }
-
-    if (a->e) {
-        gen_helper_vfp_cmpes(vd, vm, cpu_env);
-    } else {
-        gen_helper_vfp_cmps(vd, vm, cpu_env);
-    }
-
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i32(vm);
-
-    return true;
-}
-
-static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
-{
-    TCGv_i64 vd, vm;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* Vm/M bits must be zero for the Z variant */
-    if (a->z && a->vm != 0) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vd = tcg_temp_new_i64();
-    vm = tcg_temp_new_i64();
-
-    vfp_load_reg64(vd, a->vd);
-    if (a->z) {
-        tcg_gen_movi_i64(vm, 0);
-    } else {
-        vfp_load_reg64(vm, a->vm);
-    }
-
-    if (a->e) {
-        gen_helper_vfp_cmped(vd, vm, cpu_env);
-    } else {
-        gen_helper_vfp_cmpd(vd, vm, cpu_env);
-    }
-
-    tcg_temp_free_i64(vd);
-    tcg_temp_free_i64(vm);
-
-    return true;
-}
-
-static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp_mode;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    ahp_mode = get_ahp_flag();
-    tmp = tcg_temp_new_i32();
-    /* The T bit tells us if we want the low or high 16 bits of Vm */
-    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
-    gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_i32(ahp_mode);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp_mode;
-    TCGv_i32 tmp;
-    TCGv_i64 vd;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    ahp_mode = get_ahp_flag();
-    tmp = tcg_temp_new_i32();
-    /* The T bit tells us if we want the low or high 16 bits of Vm */
-    tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
-    vd = tcg_temp_new_i64();
-    gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
-    vfp_store_reg64(vd, a->vd);
-    tcg_temp_free_i32(ahp_mode);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    tcg_temp_free_i64(vd);
-    return true;
-}
-
-static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp_mode;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fp16_spconv, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    ahp_mode = get_ahp_flag();
-    tmp = tcg_temp_new_i32();
-
-    vfp_load_reg32(tmp, a->vm);
-    gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
-    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
-    tcg_temp_free_i32(ahp_mode);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 ahp_mode;
-    TCGv_i32 tmp;
-    TCGv_i64 vm;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    ahp_mode = get_ahp_flag();
-    tmp = tcg_temp_new_i32();
-    vm = tcg_temp_new_i64();
-
-    vfp_load_reg64(vm, a->vm);
-    gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
-    tcg_temp_free_i64(vm);
-    tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
-    tcg_temp_free_i32(ahp_mode);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    gen_helper_rinth(tmp, tmp, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_rints(tmp, tmp, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i64 tmp;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i64();
-    vfp_load_reg64(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_rintd(tmp, tmp, fpst);
-    vfp_store_reg64(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i64(tmp);
-    return true;
-}
-
-static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-    TCGv_i32 tcg_rmode;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    tcg_rmode = tcg_const_i32(float_round_to_zero);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    gen_helper_rinth(tmp, tmp, fpst);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tcg_rmode);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-    TCGv_i32 tcg_rmode;
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    tcg_rmode = tcg_const_i32(float_round_to_zero);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    gen_helper_rints(tmp, tmp, fpst);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tcg_rmode);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i64 tmp;
-    TCGv_i32 tcg_rmode;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i64();
-    vfp_load_reg64(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    tcg_rmode = tcg_const_i32(float_round_to_zero);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    gen_helper_rintd(tmp, tmp, fpst);
-    gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-    vfp_store_reg64(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i64(tmp);
-    tcg_temp_free_i32(tcg_rmode);
-    return true;
-}
-
-static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    gen_helper_rinth_exact(tmp, tmp, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i32 tmp;
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i32();
-    vfp_load_reg32(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_rints_exact(tmp, tmp, fpst);
-    vfp_store_reg32(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i32(tmp);
-    return true;
-}
-
-static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
-{
-    TCGv_ptr fpst;
-    TCGv_i64 tmp;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_vrint, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    tmp = tcg_temp_new_i64();
-    vfp_load_reg64(tmp, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    gen_helper_rintd_exact(tmp, tmp, fpst);
-    vfp_store_reg64(tmp, a->vd);
-    tcg_temp_free_ptr(fpst);
-    tcg_temp_free_i64(tmp);
-    return true;
-}
-
-static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
-{
-    TCGv_i64 vd;
-    TCGv_i32 vm;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vm = tcg_temp_new_i32();
-    vd = tcg_temp_new_i64();
-    vfp_load_reg32(vm, a->vm);
-    gen_helper_vfp_fcvtds(vd, vm, cpu_env);
-    vfp_store_reg64(vd, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_i64(vd);
-    return true;
-}
-
-static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
-{
-    TCGv_i64 vm;
-    TCGv_i32 vd;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vd = tcg_temp_new_i32();
-    vm = tcg_temp_new_i64();
-    vfp_load_reg64(vm, a->vm);
-    gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
-    vfp_store_reg32(vd, a->vd);
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i64(vm);
-    return true;
-}
-
-static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
-{
-    TCGv_i32 vm;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vm = tcg_temp_new_i32();
-    vfp_load_reg32(vm, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    if (a->s) {
-        /* i32 -> f16 */
-        gen_helper_vfp_sitoh(vm, vm, fpst);
-    } else {
-        /* u32 -> f16 */
-        gen_helper_vfp_uitoh(vm, vm, fpst);
-    }
-    vfp_store_reg32(vm, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
-{
-    TCGv_i32 vm;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vm = tcg_temp_new_i32();
-    vfp_load_reg32(vm, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    if (a->s) {
-        /* i32 -> f32 */
-        gen_helper_vfp_sitos(vm, vm, fpst);
-    } else {
-        /* u32 -> f32 */
-        gen_helper_vfp_uitos(vm, vm, fpst);
-    }
-    vfp_store_reg32(vm, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
-{
-    TCGv_i32 vm;
-    TCGv_i64 vd;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vm = tcg_temp_new_i32();
-    vd = tcg_temp_new_i64();
-    vfp_load_reg32(vm, a->vm);
-    fpst = fpstatus_ptr(FPST_FPCR);
-    if (a->s) {
-        /* i32 -> f64 */
-        gen_helper_vfp_sitod(vd, vm, fpst);
-    } else {
-        /* u32 -> f64 */
-        gen_helper_vfp_uitod(vd, vm, fpst);
-    }
-    vfp_store_reg64(vd, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_i64(vd);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
-{
-    TCGv_i32 vd;
-    TCGv_i64 vm;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    if (!dc_isar_feature(aa32_jscvt, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    vm = tcg_temp_new_i64();
-    vd = tcg_temp_new_i32();
-    vfp_load_reg64(vm, a->vm);
-    gen_helper_vjcvt(vd, vm, cpu_env);
-    vfp_store_reg32(vd, a->vd);
-    tcg_temp_free_i64(vm);
-    tcg_temp_free_i32(vd);
-    return true;
-}
-
-static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
-{
-    TCGv_i32 vd, shift;
-    TCGv_ptr fpst;
-    int frac_bits;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
-
-    vd = tcg_temp_new_i32();
-    vfp_load_reg32(vd, a->vd);
-
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    shift = tcg_const_i32(frac_bits);
-
-    /* Switch on op:U:sx bits */
-    switch (a->opc) {
-    case 0:
-        gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 1:
-        gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 2:
-        gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 3:
-        gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 4:
-        gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 5:
-        gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 6:
-        gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 7:
-        gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    vfp_store_reg32(vd, a->vd);
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i32(shift);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
-{
-    TCGv_i32 vd, shift;
-    TCGv_ptr fpst;
-    int frac_bits;
-
-    if (!dc_isar_feature(aa32_fpsp_v3, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
-
-    vd = tcg_temp_new_i32();
-    vfp_load_reg32(vd, a->vd);
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    shift = tcg_const_i32(frac_bits);
-
-    /* Switch on op:U:sx bits */
-    switch (a->opc) {
-    case 0:
-        gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 1:
-        gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 2:
-        gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 3:
-        gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 4:
-        gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 5:
-        gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 6:
-        gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 7:
-        gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    vfp_store_reg32(vd, a->vd);
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i32(shift);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
-{
-    TCGv_i64 vd;
-    TCGv_i32 shift;
-    TCGv_ptr fpst;
-    int frac_bits;
-
-    if (!dc_isar_feature(aa32_fpdp_v3, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
-
-    vd = tcg_temp_new_i64();
-    vfp_load_reg64(vd, a->vd);
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    shift = tcg_const_i32(frac_bits);
-
-    /* Switch on op:U:sx bits */
-    switch (a->opc) {
-    case 0:
-        gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 1:
-        gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 2:
-        gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 3:
-        gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
-        break;
-    case 4:
-        gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 5:
-        gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 6:
-        gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
-        break;
-    case 7:
-        gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    vfp_store_reg64(vd, a->vd);
-    tcg_temp_free_i64(vd);
-    tcg_temp_free_i32(shift);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
-{
-    TCGv_i32 vm;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR_F16);
-    vm = tcg_temp_new_i32();
-    vfp_load_reg32(vm, a->vm);
-
-    if (a->s) {
-        if (a->rz) {
-            gen_helper_vfp_tosizh(vm, vm, fpst);
-        } else {
-            gen_helper_vfp_tosih(vm, vm, fpst);
-        }
-    } else {
-        if (a->rz) {
-            gen_helper_vfp_touizh(vm, vm, fpst);
-        } else {
-            gen_helper_vfp_touih(vm, vm, fpst);
-        }
-    }
-    vfp_store_reg32(vm, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
-{
-    TCGv_i32 vm;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpsp_v2, s)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    vm = tcg_temp_new_i32();
-    vfp_load_reg32(vm, a->vm);
-
-    if (a->s) {
-        if (a->rz) {
-            gen_helper_vfp_tosizs(vm, vm, fpst);
-        } else {
-            gen_helper_vfp_tosis(vm, vm, fpst);
-        }
-    } else {
-        if (a->rz) {
-            gen_helper_vfp_touizs(vm, vm, fpst);
-        } else {
-            gen_helper_vfp_touis(vm, vm, fpst);
-        }
-    }
-    vfp_store_reg32(vm, a->vd);
-    tcg_temp_free_i32(vm);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
-{
-    TCGv_i32 vd;
-    TCGv_i64 vm;
-    TCGv_ptr fpst;
-
-    if (!dc_isar_feature(aa32_fpdp_v2, s)) {
-        return false;
-    }
-
-    /* UNDEF accesses to D16-D31 if they don't exist. */
-    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    fpst = fpstatus_ptr(FPST_FPCR);
-    vm = tcg_temp_new_i64();
-    vd = tcg_temp_new_i32();
-    vfp_load_reg64(vm, a->vm);
-
-    if (a->s) {
-        if (a->rz) {
-            gen_helper_vfp_tosizd(vd, vm, fpst);
-        } else {
-            gen_helper_vfp_tosid(vd, vm, fpst);
-        }
-    } else {
-        if (a->rz) {
-            gen_helper_vfp_touizd(vd, vm, fpst);
-        } else {
-            gen_helper_vfp_touid(vd, vm, fpst);
-        }
-    }
-    vfp_store_reg32(vd, a->vd);
-    tcg_temp_free_i32(vd);
-    tcg_temp_free_i64(vm);
-    tcg_temp_free_ptr(fpst);
-    return true;
-}
-
-/*
- * Decode VLLDM and VLSTM are nonstandard because:
- *  * if there is no FPU then these insns must NOP in
- *    Secure state and UNDEF in Nonsecure state
- *  * if there is an FPU then these insns do not have
- *    the usual behaviour that vfp_access_check() provides of
- *    being controlled by CPACR/NSACR enable bits or the
- *    lazy-stacking logic.
- */
-static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
-{
-    TCGv_i32 fptr;
-
-    if (!arm_dc_feature(s, ARM_FEATURE_M) ||
-        !arm_dc_feature(s, ARM_FEATURE_V8)) {
-        return false;
-    }
-
-    if (a->op) {
-        /*
-         * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
-         * to take the IMPDEF option to make memory accesses to the stack
-         * slots that correspond to the D16-D31 registers (discarding
-         * read data and writing UNKNOWN values), so for us the T2
-         * encoding behaves identically to the T1 encoding.
-         */
-        if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-            return false;
-        }
-    } else {
-        /*
-         * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
-         * This is currently architecturally impossible, but we add the
-         * check to stay in line with the pseudocode. Note that we must
-         * emit code for the UNDEF so it takes precedence over the NOCP.
-         */
-        if (dc_isar_feature(aa32_simd_r32, s)) {
-            unallocated_encoding(s);
-            return true;
-        }
-    }
-
-    /*
-     * If not secure, UNDEF. We must emit code for this
-     * rather than returning false so that this takes
-     * precedence over the m-nocp.decode NOCP fallback.
-     */
-    if (!s->v8m_secure) {
-        unallocated_encoding(s);
-        return true;
-    }
-    /* If no fpu, NOP. */
-    if (!dc_isar_feature(aa32_vfp, s)) {
-        return true;
-    }
-
-    fptr = load_reg(s, a->rn);
-    if (a->l) {
-        gen_helper_v7m_vlldm(cpu_env, fptr);
-    } else {
-        gen_helper_v7m_vlstm(cpu_env, fptr);
-    }
-    tcg_temp_free_i32(fptr);
-
-    /* End the TB, because we have updated FP control bits */
-    s->base.is_jmp = DISAS_UPDATE_EXIT;
-    return true;
-}
-
-static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
-{
-    int btmreg, topreg;
-    TCGv_i64 zero;
-    TCGv_i32 aspen, sfpa;
-
-    if (!dc_isar_feature(aa32_m_sec_state, s)) {
-        /* Before v8.1M, fall through in decode to NOCP check */
-        return false;
-    }
-
-    /* Explicitly UNDEF because this takes precedence over NOCP */
-    if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
-        unallocated_encoding(s);
-        return true;
-    }
-
-    if (!dc_isar_feature(aa32_vfp_simd, s)) {
-        /* NOP if we have neither FP nor MVE */
-        return true;
-    }
-
-    /*
-     * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
-     * active floating point context so we must NOP (without doing
-     * any lazy state preservation or the NOCP check).
-     */
-    aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
-    sfpa = load_cpu_field(v7m.control[M_REG_S]);
-    tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
-    tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
-    tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
-    tcg_gen_or_i32(sfpa, sfpa, aspen);
-    arm_gen_condlabel(s);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
-
-    if (s->fp_excp_el != 0) {
-        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
-                           syn_uncategorized(), s->fp_excp_el);
-        return true;
-    }
-
-    topreg = a->vd + a->imm - 1;
-    btmreg = a->vd;
-
-    /* Convert to Sreg numbers if the insn specified in Dregs */
-    if (a->size == 3) {
-        topreg = topreg * 2 + 1;
-        btmreg *= 2;
-    }
-
-    if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
-        /* UNPREDICTABLE: we choose to undef */
-        unallocated_encoding(s);
-        return true;
-    }
-
-    /* Silently ignore requests to clear D16-D31 if they don't exist */
-    if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
-        topreg = 31;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* Zero the Sregs from btmreg to topreg inclusive. */
-    zero = tcg_const_i64(0);
-    if (btmreg & 1) {
-        write_neon_element64(zero, btmreg >> 1, 1, MO_32);
-        btmreg++;
-    }
-    for (; btmreg + 1 <= topreg; btmreg += 2) {
-        write_neon_element64(zero, btmreg >> 1, 0, MO_64);
-    }
-    if (btmreg == topreg) {
-        write_neon_element64(zero, btmreg >> 1, 0, MO_32);
-        btmreg++;
-    }
-    assert(btmreg == topreg + 1);
-    /* TODO: when MVE is implemented, zero VPR here */
-    return true;
-}
-
-static bool trans_NOCP(DisasContext *s, arg_nocp *a)
-{
-    /*
-     * Handle M-profile early check for disabled coprocessor:
-     * all we need to do here is emit the NOCP exception if
-     * the coprocessor is disabled. Otherwise we return false
-     * and the real VFP/etc decode will handle the insn.
-     */
-    assert(arm_dc_feature(s, ARM_FEATURE_M));
-
-    if (a->cp == 11) {
-        a->cp = 10;
-    }
-    if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
-        (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
-        /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
-        a->cp = 10;
-    }
-
-    if (a->cp != 10) {
-        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
-                           syn_uncategorized(), default_exception_el(s));
-        return true;
-    }
-
-    if (s->fp_excp_el != 0) {
-        gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
-                           syn_uncategorized(), s->fp_excp_el);
-        return true;
-    }
-
-    return false;
-}
-
-static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
-{
-    /* This range needs a coprocessor check for v8.1M and later only */
-    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
-        return false;
-    }
-    return trans_NOCP(s, a);
-}
-
-static bool trans_VINS(DisasContext *s, arg_VINS *a)
-{
-    TCGv_i32 rd, rm;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* Insert low half of Vm into high half of Vd */
-    rm = tcg_temp_new_i32();
-    rd = tcg_temp_new_i32();
-    vfp_load_reg32(rm, a->vm);
-    vfp_load_reg32(rd, a->vd);
-    tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
-    vfp_store_reg32(rd, a->vd);
-    tcg_temp_free_i32(rm);
-    tcg_temp_free_i32(rd);
-    return true;
-}
-
-static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
-{
-    TCGv_i32 rm;
-
-    if (!dc_isar_feature(aa32_fp16_arith, s)) {
-        return false;
-    }
-
-    if (s->vec_len != 0 || s->vec_stride != 0) {
-        return false;
-    }
-
-    if (!vfp_access_check(s)) {
-        return true;
-    }
-
-    /* Set Vd to high half of Vm */
-    rm = tcg_temp_new_i32();
-    vfp_load_reg32(rm, a->vm);
-    tcg_gen_shri_i32(rm, rm, 16);
-    vfp_store_reg32(rm, a->vd);
-    tcg_temp_free_i32(rm);
-    return true;
-}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 2ca6c847d83..1f02ff28234 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -34,7 +34,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 #ifdef TARGET_CHERI
@@ -78,12 +77,7 @@
 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
 
 #include "translate.h"
-
-#if defined(CONFIG_USER_ONLY)
-#define IS_USER(s) 1
-#else
-#define IS_USER(s) (s->user)
-#endif
+#include "translate-a32.h"
 
 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
@@ -98,15 +92,10 @@ TCGv_i64 cpu_exclusive_val;
 
 #include "exec/gen-icount.h"
 
-static const char * const regnames[] =
+const char * const arm32_regnames[16] =
     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
 
-/* Function prototypes for gen_ functions calling Neon helpers.  */
-typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
-                                 TCGv_i32, TCGv_i32);
-/* Function prototypes for gen_ functions for fix point conversions */
-typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
 
 /* initialize TCG globals.  */
 void arm_translate_init(void)
@@ -116,7 +105,7 @@ void arm_translate_init(void)
     for (i = 0; i < 16; i++) {
         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
                                           offsetof(CPUARMState, regs[i]),
-                                          regnames[i]);
+                                          arm32_regnames[i]);
     }
     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
@@ -131,8 +120,78 @@ void arm_translate_init(void)
     a64_translate_init();
 }
 
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
+{
+    /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
+    switch (cmode) {
+    case 0: case 1:
+        /* no-op */
+        break;
+    case 2: case 3:
+        imm <<= 8;
+        break;
+    case 4: case 5:
+        imm <<= 16;
+        break;
+    case 6: case 7:
+        imm <<= 24;
+        break;
+    case 8: case 9:
+        imm |= imm << 16;
+        break;
+    case 10: case 11:
+        imm = (imm << 8) | (imm << 24);
+        break;
+    case 12:
+        imm = (imm << 8) | 0xff;
+        break;
+    case 13:
+        imm = (imm << 16) | 0xffff;
+        break;
+    case 14:
+        if (op) {
+            /*
+             * This and cmode == 15 op == 1 are the only cases where
+             * the top and bottom 32 bits of the encoded constant differ.
+             */
+            uint64_t imm64 = 0;
+            int n;
+
+            for (n = 0; n < 8; n++) {
+                if (imm & (1 << n)) {
+                    imm64 |= (0xffULL << (n * 8));
+                }
+            }
+            return imm64;
+        }
+        imm |= (imm << 8) | (imm << 16) | (imm << 24);
+        break;
+    case 15:
+        if (op) {
+            /* Reserved encoding for AArch32; valid for AArch64 */
+            uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
+            if (imm & 0x80) {
+                imm64 |= 0x8000000000000000ULL;
+            }
+            if (imm & 0x40) {
+                imm64 |= 0x3fc0000000000000ULL;
+            } else {
+                imm64 |= 0x4000000000000000ULL;
+            }
+            return imm64;
+        }
+        imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
+            | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
+        break;
+    }
+    if (op) {
+        imm = ~imm;
+    }
+    return dup_const(MO_32, imm);
+}
+
 /* Generate a label used for skipping this instruction */
-static void arm_gen_condlabel(DisasContext *s)
+void arm_gen_condlabel(DisasContext *s)
 {
     if (!s->condjmp) {
         s->condlabel = gen_new_label();
@@ -140,30 +199,6 @@ static void arm_gen_condlabel(DisasContext *s)
     }
 }
 
-/*
- * Constant expanders for the decoders.
- */
-
-static int negate(DisasContext *s, int x)
-{
-    return -x;
-}
-
-static int plus_2(DisasContext *s, int x)
-{
-    return x + 2;
-}
-
-static int times_2(DisasContext *s, int x)
-{
-    return x * 2;
-}
-
-static int times_4(DisasContext *s, int x)
-{
-    return x * 4;
-}
-
 /* Flags for the disas_set_da_iss info argument:
  * lower bits hold the Rt register number, higher bits are flags.
  */
@@ -242,24 +277,6 @@ static inline int get_a32_user_mem_index(DisasContext *s)
     }
 }
 
-static inline TCGv_i32 load_cpu_offset(int offset)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_ld_i32(tmp, cpu_env, offset);
-    return tmp;
-}
-
-#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
-
-static inline void store_cpu_offset(TCGv_i32 var, int offset)
-{
-    tcg_gen_st_i32(var, cpu_env, offset);
-    tcg_temp_free_i32(var);
-}
-
-#define store_cpu_field(var, name) \
-    store_cpu_offset(var, offsetof(CPUARMState, name))
-
 /* The architectural value of PC.  */
 static uint32_t read_pc(DisasContext *s)
 {
@@ -267,7 +284,7 @@ static uint32_t read_pc(DisasContext *s)
 }
 
 /* Set a variable to the value of a CPU register.  */
-static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
+void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 {
     if (reg == 15) {
         tcg_gen_movi_i32(var, read_pc(s));
@@ -276,20 +293,12 @@ static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
     }
 }
 
-/* Create a new temporary and set it to the value of a CPU register.  */
-static inline TCGv_i32 load_reg(DisasContext *s, int reg)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    load_reg_var(s, tmp, reg);
-    return tmp;
-}
-
 /*
  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
  * This is used for load/store for which use of PC implies (literal),
  * or ADD that implies ADR.
  */
-static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
+TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
 
@@ -303,7 +312,7 @@ static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 
 /* Set a CPU register.  The source must be a temporary and will be
    marked as dead.  */
-static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
+void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 {
     if (reg == 15) {
         /* In Thumb mode, we must ignore bit 0.
@@ -313,8 +322,21 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
          */
         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
         s->base.is_jmp = DISAS_JUMP;
+    } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
+        /* For M-profile SP bits [1:0] are always zero */
+        tcg_gen_andi_i32(var, var, ~3);
     }
     tcg_gen_mov_i32(cpu_R[reg], var);
+#ifdef CONFIG_TCG_LOG_INSTR
+    if (qemu_ctx_logging_enabled(s)) {
+        TCGv_ptr name = tcg_const_ptr(arm32_regnames[reg]);
+        TCGv new_val = tcg_temp_new();
+        tcg_gen_extu_i32_tl(new_val, var);
+        gen_helper_qemu_log_instr_reg(cpu_env, name, new_val);
+        tcg_temp_free(new_val);
+        tcg_temp_free_ptr(name);
+    }
+#endif
     tcg_temp_free_i32(var);
 }
 
@@ -344,15 +366,12 @@ static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 
-
-static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
+void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 {
     TCGv_i32 tmp_mask = tcg_const_i32(mask);
     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
-/* Set NZCV flags from the high 4 bits of var.  */
-#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 
 static void gen_exception_internal(int excp)
 {
@@ -363,7 +382,7 @@ static void gen_exception_internal(int excp)
     tcg_temp_free_i32(tcg_excp);
 }
 
-static void gen_step_complete_exception(DisasContext *s)
+static void gen_singlestep_exception(DisasContext *s)
 {
     /* We just completed step of an insn. Move from Active-not-pending
      * to Active-pending, and then also take the swstep exception.
@@ -379,30 +398,18 @@ static void gen_step_complete_exception(DisasContext *s)
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_singlestep_exception(DisasContext *s)
+void clear_eci_state(DisasContext *s)
 {
-    /* Generate the right kind of exception for singlestep, which is
-     * either the architectural singlestep or EXCP_DEBUG for QEMU's
-     * gdb singlestepping.
+    /*
+     * Clear any ECI/ICI state: used when a load multiple/store
+     * multiple insn executes.
      */
-    if (s->ss_active) {
-        gen_step_complete_exception(s);
-    } else {
-        gen_exception_internal(EXCP_DEBUG);
+    if (s->eci) {
+        store_cpu_field_constant(0, condexec_bits);
+        s->eci = 0;
     }
 }
 
-static inline bool is_singlestepping(DisasContext *s)
-{
-    /* Return true if we are singlestepping either because of
-     * architectural singlestep or QEMU gdbstub singlestep. This does
-     * not include the command line '-singlestep' mode which is rather
-     * misnamed as it only means "one instruction per TB" and doesn't
-     * affect the code we generate.
-     */
-    return s->base.singlestep_enabled || s->ss_active;
-}
-
 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 {
     TCGv_i32 tmp1 = tcg_temp_new_i32();
@@ -419,31 +426,22 @@ static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 }
 
 /* Byteswap each halfword.  */
-static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
+void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
-    TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
+    TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
     tcg_gen_shri_i32(tmp, var, 8);
     tcg_gen_and_i32(tmp, tmp, mask);
     tcg_gen_and_i32(var, var, mask);
     tcg_gen_shli_i32(var, var, 8);
     tcg_gen_or_i32(dest, var, tmp);
-    tcg_temp_free_i32(mask);
     tcg_temp_free_i32(tmp);
 }
 
 /* Byteswap low halfword and sign extend.  */
 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 {
-    tcg_gen_ext16u_i32(var, var);
-    tcg_gen_bswap16_i32(var, var);
-    tcg_gen_ext16s_i32(dest, var);
-}
-
-/* Swap low and high halfwords.  */
-static void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
-{
-    tcg_gen_rotri_i32(dest, var, 16);
+    tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
 }
 
 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
@@ -777,17 +775,16 @@ void arm_gen_test_cc(int cc, TCGLabel *label)
     arm_free_cc(&cmp);
 }
 
-static inline void gen_set_condexec(DisasContext *s)
+void gen_set_condexec(DisasContext *s)
 {
     if (s->condexec_mask) {
         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_movi_i32(tmp, val);
-        store_cpu_field(tmp, condexec_bits);
+
+        store_cpu_field_constant(val, condexec_bits);
     }
 }
 
-static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
+void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
     tcg_gen_movi_i32(cpu_R[15], val);
 }
@@ -854,7 +851,7 @@ static inline void gen_bx_excret_final_code(DisasContext *s)
     /* Is the new PC value in the magic range indicating exception return? */
     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
     /* No: end the TB as we would for a DISAS_JMP */
-    if (is_singlestepping(s)) {
+    if (s->ss_active) {
         gen_singlestep_exception(s);
     } else {
         tcg_gen_exit_tb(NULL, 0);
@@ -939,7 +936,23 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 #define IS_USER_ONLY 0
 #endif
 
-/* Abstractions of "generate code to do a guest load/store for
+MemOp pow2_align(unsigned i)
+{
+    static const MemOp mop_align[] = {
+        0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
+        /*
+         * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
+         * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
+         * see get_alignment_bits(). Enforce only 128-bit alignment for now.
+         */
+        MO_ALIGN_16
+    };
+    g_assert(i < ARRAY_SIZE(mop_align));
+    return mop_align[i];
+}
+
+/*
+ * Abstractions of "generate code to do a guest load/store for
  * AArch32", where a vaddr is always 32 bits (and is zero
  * extended if we're a 64 bit core) and  data is also
  * 32 bits unless specifically doing a 64 bit access.
@@ -947,7 +960,7 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
  * that the address argument is TCGv_i32 rather than TCGv.
  */
 
-static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
+static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 {
     TCGv addr = tcg_temp_new();
     tcg_gen_extu_i32_tl(addr, a32);
@@ -959,80 +972,47 @@ static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
     return addr;
 }
 
-static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
-                            int index, MemOp opc)
+/*
+ * Internal routines are used for NEON cases where the endianness
+ * and/or alignment has already been taken into account and manipulated.
+ */
+void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
+                              TCGv_i32 a32, int index, MemOp opc)
 {
-    TCGv addr;
-
-    if (arm_dc_feature(s, ARM_FEATURE_M) &&
-        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
-        opc |= MO_ALIGN;
-    }
-
-    addr = gen_aa32_addr(s, a32, opc);
+    TCGv addr = gen_aa32_addr(s, a32, opc);
     tcg_gen_qemu_ld_i32(val, addr, index, opc);
     tcg_temp_free(addr);
 }
 
-static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
-                            int index, MemOp opc)
+void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
+                              TCGv_i32 a32, int index, MemOp opc)
 {
-    TCGv addr;
-
-    if (arm_dc_feature(s, ARM_FEATURE_M) &&
-        !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
-        opc |= MO_ALIGN;
-    }
-
-    addr = gen_aa32_addr(s, a32, opc);
+    TCGv addr = gen_aa32_addr(s, a32, opc);
     tcg_gen_qemu_st_i32(val, addr, index, opc);
     tcg_temp_free(addr);
 }
 
-#define DO_GEN_LD(SUFF, OPC)                                             \
-static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 a32, int index)            \
-{                                                                        \
-    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
-}
+void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
+                              TCGv_i32 a32, int index, MemOp opc)
+{
+    TCGv addr = gen_aa32_addr(s, a32, opc);
 
-#define DO_GEN_ST(SUFF, OPC)                                             \
-static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 a32, int index)            \
-{                                                                        \
-    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
-}
+    tcg_gen_qemu_ld_i64(val, addr, index, opc);
 
-static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
-{
     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-    if (!IS_USER_ONLY && s->sctlr_b) {
+    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
         tcg_gen_rotri_i64(val, val, 32);
     }
-}
-
-static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
-                            int index, MemOp opc)
-{
-    TCGv addr = gen_aa32_addr(s, a32, opc);
-    tcg_gen_qemu_ld_i64(val, addr, index, opc);
-    gen_aa32_frob64(s, val);
     tcg_temp_free(addr);
 }
 
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 a32, int index)
-{
-    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
-}
-
-static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
-                            int index, MemOp opc)
+void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
+                              TCGv_i32 a32, int index, MemOp opc)
 {
     TCGv addr = gen_aa32_addr(s, a32, opc);
 
     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-    if (!IS_USER_ONLY && s->sctlr_b) {
+    if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
         TCGv_i64 tmp = tcg_temp_new_i64();
         tcg_gen_rotri_i64(tmp, val, 32);
         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
@@ -1043,18 +1023,43 @@ static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
     tcg_temp_free(addr);
 }
 
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 a32, int index)
+void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                     int index, MemOp opc)
+{
+    gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                     int index, MemOp opc)
+{
+    gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                     int index, MemOp opc)
+{
+    gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                     int index, MemOp opc)
 {
-    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
+    gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
 }
 
-DO_GEN_LD(8u, MO_UB)
-DO_GEN_LD(16u, MO_UW)
-DO_GEN_LD(32u, MO_UL)
-DO_GEN_ST(8, MO_UB)
-DO_GEN_ST(16, MO_UW)
-DO_GEN_ST(32, MO_UL)
+#define DO_GEN_LD(SUFF, OPC)                                            \
+    static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
+                                         TCGv_i32 a32, int index)       \
+    {                                                                   \
+        gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
+    }
+
+#define DO_GEN_ST(SUFF, OPC)                                            \
+    static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
+                                         TCGv_i32 a32, int index)       \
+    {                                                                   \
+        gen_aa32_st_i32(s, val, a32, index, OPC);                       \
+    }
 
 static inline void gen_hvc(DisasContext *s, int imm16)
 {
@@ -1097,11 +1102,15 @@ static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
-                               int syn, uint32_t target_el)
+void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
+                        uint32_t syn, uint32_t target_el)
 {
-    gen_set_condexec(s);
-    gen_set_pc_im(s, pc);
+    if (s->aarch64) {
+        gen_a64_set_pc_im(pc);
+    } else {
+        gen_set_condexec(s);
+        gen_set_pc_im(s, pc);
+    }
     gen_exception(excp, syn, target_el);
     s->base.is_jmp = DISAS_NORETURN;
 }
@@ -1118,7 +1127,7 @@ static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void unallocated_encoding(DisasContext *s)
+void unallocated_encoding(DisasContext *s)
 {
     /* Unallocated and reserved encodings are uncategorized */
     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
@@ -1142,7 +1151,7 @@ static void gen_exception_el(DisasContext *s, int excp, uint32_t syn,
 }
 
 /* Force a TB lookup after an instruction that changes the CPU state.  */
-static inline void gen_lookup_tb(DisasContext *s)
+void gen_lookup_tb(DisasContext *s)
 {
     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
     s->base.is_jmp = DISAS_EXIT;
@@ -1177,7 +1186,7 @@ static inline void gen_hlt(DisasContext *s, int imm)
 /*
  * Return the offset of a "full" NEON Dreg.
  */
-static long neon_full_reg_offset(unsigned reg)
+long neon_full_reg_offset(unsigned reg)
 {
     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
 }
@@ -1186,7 +1195,7 @@ static long neon_full_reg_offset(unsigned reg)
  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
  * where 0 is the least significant end of the register.
  */
-static long neon_element_offset(int reg, int element, MemOp memop)
+long neon_element_offset(int reg, int element, MemOp memop)
 {
     int element_size = 1 << (memop & MO_SIZE);
     int ofs = element * element_size;
@@ -1203,7 +1212,7 @@ static long neon_element_offset(int reg, int element, MemOp memop)
 }
 
 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
-static long vfp_reg_offset(bool dp, unsigned reg)
+long vfp_reg_offset(bool dp, unsigned reg)
 {
     if (dp) {
         return neon_element_offset(reg, 0, MO_64);
@@ -1212,27 +1221,7 @@ static long vfp_reg_offset(bool dp, unsigned reg)
     }
 }
 
-static inline void vfp_load_reg64(TCGv_i64 var, int reg)
-{
-    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
-}
-
-static inline void vfp_store_reg64(TCGv_i64 var, int reg)
-{
-    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
-}
-
-static inline void vfp_load_reg32(TCGv_i32 var, int reg)
-{
-    tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
-}
-
-static inline void vfp_store_reg32(TCGv_i32 var, int reg)
-{
-    tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
-}
-
-static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
+void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
 {
     long off = neon_element_offset(reg, ele, memop);
 
@@ -1258,7 +1247,7 @@ static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
     }
 }
 
-static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
+void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
 {
     long off = neon_element_offset(reg, ele, memop);
 
@@ -1277,7 +1266,7 @@ static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
     }
 }
 
-static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
+void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
 {
     long off = neon_element_offset(reg, ele, memop);
 
@@ -1296,7 +1285,7 @@ static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
     }
 }
 
-static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
+void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
 {
     long off = neon_element_offset(reg, ele, memop);
 
@@ -1312,20 +1301,8 @@ static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
     }
 }
 
-static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
-{
-    TCGv_ptr ret = tcg_temp_new_ptr();
-    tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
-    return ret;
-}
-
 #define ARM_CP_RW_BIT   (1 << 20)
 
-/* Include the VFP and Neon decoders */
-#include "decode-m-nocp.c.inc"
-#include "translate-vfp.c.inc"
-#include "translate-neon.c.inc"
-
 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
@@ -2618,16 +2595,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
     return 1;
 }
 
-static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
-{
-#ifndef CONFIG_USER_ONLY
-    return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
-           ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
 static void gen_goto_ptr(void)
 {
     tcg_gen_lookup_and_goto_ptr();
@@ -2639,7 +2606,7 @@ static void gen_goto_ptr(void)
  */
 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 {
-    if (use_goto_tb(s, dest)) {
+    if (translator_use_goto_tb(&s->base, dest)) {
         tcg_gen_goto_tb(n);
         gen_set_pc_im(s, dest);
         tcg_gen_exit_tb(s->base.tb, n);
@@ -2653,12 +2620,44 @@ static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 /* Jump, specifying which TB number to use if we gen_goto_tb() */
 static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
 {
-    if (unlikely(is_singlestepping(s))) {
+    if (unlikely(s->ss_active)) {
         /* An indirect jump so that we still trigger the debug exception.  */
         gen_set_pc_im(s, dest);
         s->base.is_jmp = DISAS_JUMP;
-    } else {
+        return;
+    }
+    switch (s->base.is_jmp) {
+    case DISAS_NEXT:
+    case DISAS_TOO_MANY:
+    case DISAS_NORETURN:
+        /*
+         * The normal case: just go to the destination TB.
+         * NB: NORETURN happens if we generate code like
+         *    gen_brcondi(l);
+         *    gen_jmp();
+         *    gen_set_label(l);
+         *    gen_jmp();
+         * on the second call to gen_jmp().
+         */
         gen_goto_tb(s, tbno, dest);
+        break;
+    case DISAS_UPDATE_NOCHAIN:
+    case DISAS_UPDATE_EXIT:
+        /*
+         * We already decided we're leaving the TB for some other reason.
+         * Avoid using goto_tb so we really do exit back to the main loop
+         * and don't chain to another TB.
+         */
+        gen_set_pc_im(s, dest);
+        gen_goto_ptr();
+        s->base.is_jmp = DISAS_NORETURN;
+        break;
+    default:
+        /*
+         * We shouldn't be emitting code for a jump and also have
+         * is_jmp set to one of the special cases like DISAS_SWI.
+         */
+        g_assert_not_reached();
     }
 }
 
@@ -3257,8 +3256,14 @@ static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
 
 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
 {
-    TCGv_i32 t = tcg_temp_new_i32();
+    TCGv_i32 t;
 
+    /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
+    if (sh == 32) {
+        tcg_gen_movi_i32(d, 0);
+        return;
+    }
+    t = tcg_temp_new_i32();
     tcg_gen_extract_i32(t, a, sh - 1, 1);
     tcg_gen_sari_i32(d, a, sh);
     tcg_gen_add_i32(d, d, t);
@@ -3458,8 +3463,14 @@ static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
 
 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
 {
-    TCGv_i32 t = tcg_temp_new_i32();
+    TCGv_i32 t;
 
+    /* Handle shift by the input size for the benefit of trans_URSHR_ri */
+    if (sh == 32) {
+        tcg_gen_extract_i32(d, a, sh - 1, 1);
+        return;
+    }
+    t = tcg_temp_new_i32();
     tcg_gen_extract_i32(t, a, sh - 1, 1);
     tcg_gen_shri_i32(d, a, sh);
     tcg_gen_add_i32(d, d, t);
@@ -5015,16 +5026,13 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
         TCGv_i32 tmp2 = tcg_temp_new_i32();
         TCGv_i64 t64 = tcg_temp_new_i64();
 
-        /* For AArch32, architecturally the 32-bit word at the lowest
+        /*
+         * For AArch32, architecturally the 32-bit word at the lowest
          * address is always Rt and the one at addr+4 is Rt2, even if
          * the CPU is big-endian. That means we don't want to do a
-         * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
-         * for an architecturally 64-bit access, but instead do a
-         * 64-bit access using MO_BE if appropriate and then split
-         * the two halves.
-         * This only makes a difference for BE32 user-mode, where
-         * frob64() must not flip the two halves of the 64-bit data
-         * but this code must treat BE32 user-mode like BE32 system.
+         * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
+         * architecturally 64-bit access, but instead do a 64-bit access
+         * using MO_BE if appropriate and then split the two halves.
          */
         TCGv taddr = gen_aa32_addr(s, addr, opc);
 
@@ -5084,14 +5092,15 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
         TCGv_i64 n64 = tcg_temp_new_i64();
 
         t2 = load_reg(s, rt2);
-        /* For AArch32, architecturally the 32-bit word at the lowest
+
+        /*
+         * For AArch32, architecturally the 32-bit word at the lowest
          * address is always Rt and the one at addr+4 is Rt2, even if
          * the CPU is big-endian. Since we're going to treat this as a
          * single 64-bit BE store, we need to put the two halves in the
          * opposite order for BE to LE, so that they end up in the right
-         * places.
-         * We don't want gen_aa32_frob64() because that does the wrong
-         * thing for BE32 usermode.
+         * places.  We don't want gen_aa32_st_i64, because that checks
+         * SCTLR_B as if for an architectural 64-bit access.
          */
         if (s->be_data == MO_BE) {
             tcg_gen_concat_i32_i64(n64, t2, t1);
@@ -5227,11 +5236,11 @@ static void gen_srs(DisasContext *s,
     }
     tcg_gen_addi_i32(addr, addr, offset);
     tmp = load_reg(s, 14);
-    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
     tmp = load_cpu_field(spsr);
     tcg_gen_addi_i32(addr, addr, 4);
-    gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
     if (writeback) {
         switch (amode) {
@@ -5749,6 +5758,247 @@ static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
     return true;
 }
 
+/*
+ * v8.1M MVE wide-shifts
+ */
+static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
+                          WideShiftImmFn *fn)
+{
+    TCGv_i64 rda;
+    TCGv_i32 rdalo, rdahi;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+        return false;
+    }
+    if (a->rdahi == 15) {
+        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
+        return false;
+    }
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+        a->rdahi == 13) {
+        /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
+        unallocated_encoding(s);
+        return true;
+    }
+
+    if (a->shim == 0) {
+        a->shim = 32;
+    }
+
+    rda = tcg_temp_new_i64();
+    rdalo = load_reg(s, a->rdalo);
+    rdahi = load_reg(s, a->rdahi);
+    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+
+    fn(rda, rda, a->shim);
+
+    tcg_gen_extrl_i64_i32(rdalo, rda);
+    tcg_gen_extrh_i64_i32(rdahi, rda);
+    store_reg(s, a->rdalo, rdalo);
+    store_reg(s, a->rdahi, rdahi);
+    tcg_temp_free_i64(rda);
+
+    return true;
+}
+
+static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
+}
+
+static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
+}
+
+static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
+}
+
+static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
+{
+    gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, gen_mve_sqshll);
+}
+
+static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
+{
+    gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, gen_mve_uqshll);
+}
+
+static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, gen_srshr64_i64);
+}
+
+static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+    return do_mve_shl_ri(s, a, gen_urshr64_i64);
+}
+
+static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
+{
+    TCGv_i64 rda;
+    TCGv_i32 rdalo, rdahi;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+        return false;
+    }
+    if (a->rdahi == 15) {
+        /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
+        return false;
+    }
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+        a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
+        a->rm == a->rdahi || a->rm == a->rdalo) {
+        /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
+        unallocated_encoding(s);
+        return true;
+    }
+
+    rda = tcg_temp_new_i64();
+    rdalo = load_reg(s, a->rdalo);
+    rdahi = load_reg(s, a->rdahi);
+    tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+
+    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
+    fn(rda, cpu_env, rda, cpu_R[a->rm]);
+
+    tcg_gen_extrl_i64_i32(rdalo, rda);
+    tcg_gen_extrh_i64_i32(rdahi, rda);
+    store_reg(s, a->rdalo, rdalo);
+    store_reg(s, a->rdahi, rdahi);
+    tcg_temp_free_i64(rda);
+
+    return true;
+}
+
+static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
+}
+
+static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
+}
+
+static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
+}
+
+static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
+}
+
+static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
+}
+
+static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+    return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
+}
+
+static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+        return false;
+    }
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+        a->rda == 13 || a->rda == 15) {
+        /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
+        unallocated_encoding(s);
+        return true;
+    }
+
+    if (a->shim == 0) {
+        a->shim = 32;
+    }
+    fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
+
+    return true;
+}
+
+static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+    return do_mve_sh_ri(s, a, gen_urshr32_i32);
+}
+
+static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+    return do_mve_sh_ri(s, a, gen_srshr32_i32);
+}
+
+static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
+{
+    gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+    return do_mve_sh_ri(s, a, gen_mve_sqshl);
+}
+
+static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
+{
+    gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+    return do_mve_sh_ri(s, a, gen_mve_uqshl);
+}
+
+static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
+{
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+        /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+        return false;
+    }
+    if (!dc_isar_feature(aa32_mve, s) ||
+        !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+        a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
+        a->rm == a->rda) {
+        /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
+        unallocated_encoding(s);
+        return true;
+    }
+
+    /* The helper takes care of the sign-extension of the low 8 bits of Rm */
+    fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
+    return true;
+}
+
+static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
+{
+    return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
+}
+
+static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
+{
+    return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
+}
+
 /*
  * Multiply and multiply accumulate
  */
@@ -6242,6 +6492,18 @@ static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
         return false;
     }
+    /*
+     * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
+     * TBFLAGS bit on a basically-never-happens case, so call a helper
+     * function to check for the trap and raise the exception if needed
+     * (passing it the register number for the syndrome value).
+     * v8A doesn't have this HSTR bit.
+     */
+    if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
+        arm_dc_feature(s, ARM_FEATURE_EL2) &&
+        s->current_el < 2 && s->ns) {
+        gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
+    }
     /* Trivial implementation equivalent to bx.  */
     gen_bx(s, load_reg(s, a->rm));
     return true;
@@ -6331,6 +6593,8 @@ static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
     if (!ENABLE_ARCH_5) {
         return false;
     }
+    /* BKPT is OK with ECI set and leaves it untouched */
+    s->eci_handled = true;
     if (arm_dc_feature(s, ARM_FEATURE_M) &&
         semihosting_enabled() &&
 #ifndef CONFIG_USER_ONLY
@@ -6495,7 +6759,7 @@ static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
     addr = op_addr_rr_pre(s, a);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
 
     /*
@@ -6513,10 +6777,18 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
     TCGv_i32 addr, tmp;
 
+    /*
+     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
+     * is either UNPREDICTABLE or has defined behaviour
+     */
+    if (s->thumb && a->rn == 15) {
+        return false;
+    }
+
     addr = op_addr_rr_pre(s, a);
 
     tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
     tcg_temp_free_i32(tmp);
 
@@ -6539,13 +6811,13 @@ static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
     addr = op_addr_rr_pre(s, a);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     store_reg(s, a->rt, tmp);
 
     tcg_gen_addi_i32(addr, addr, 4);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     store_reg(s, a->rt + 1, tmp);
 
     /* LDRD w/ base writeback is undefined if the registers overlap.  */
@@ -6568,13 +6840,13 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
     addr = op_addr_rr_pre(s, a);
 
     tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
 
     tcg_gen_addi_i32(addr, addr, 4);
 
     tmp = load_reg(s, a->rt + 1);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
 
     op_addr_rr_post(s, a, addr, -4);
@@ -6639,7 +6911,7 @@ static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
     addr = op_addr_ri_pre(s, a);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
 
     /*
@@ -6657,10 +6929,18 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
     TCGv_i32 addr, tmp;
 
+    /*
+     * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
+     * is either UNPREDICTABLE or has defined behaviour
+     */
+    if (s->thumb && a->rn == 15) {
+        return false;
+    }
+
     addr = op_addr_ri_pre(s, a);
 
     tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
     disas_set_da_iss(s, mop, issinfo);
     tcg_temp_free_i32(tmp);
 
@@ -6676,13 +6956,13 @@ static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
     addr = op_addr_ri_pre(s, a);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     store_reg(s, a->rt, tmp);
 
     tcg_gen_addi_i32(addr, addr, 4);
 
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     store_reg(s, rt2, tmp);
 
     /* LDRD w/ base writeback is undefined if the registers overlap.  */
@@ -6715,13 +6995,13 @@ static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
     addr = op_addr_ri_pre(s, a);
 
     tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
 
     tcg_gen_addi_i32(addr, addr, 4);
 
     tmp = load_reg(s, rt2);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
     tcg_temp_free_i32(tmp);
 
     op_addr_ri_post(s, a, addr, -4);
@@ -6950,7 +7230,7 @@ static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
     addr = load_reg(s, a->rn);
     tmp = load_reg(s, a->rt);
     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
+    gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
 
     tcg_temp_free_i32(tmp);
@@ -7106,7 +7386,7 @@ static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
 
     addr = load_reg(s, a->rn);
     tmp = tcg_temp_new_i32();
-    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
+    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
     tcg_temp_free_i32(addr);
 
@@ -7616,10 +7896,9 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
         t3 = tcg_temp_new_i32();
         tcg_gen_sari_i32(t3, t1, 31);
         qf = load_cpu_field(QF);
-        one = tcg_const_i32(1);
+        one = tcg_constant_i32(1);
         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
         store_cpu_field(qf, QF);
-        tcg_temp_free_i32(one);
         tcg_temp_free_i32(t3);
         tcg_temp_free_i32(t2);
     }
@@ -7779,9 +8058,9 @@ static bool op_div(DisasContext *s, arg_rrr *a, bool u)
     t1 = load_reg(s, a->rn);
     t2 = load_reg(s, a->rm);
     if (u) {
-        gen_helper_udiv(t1, t1, t2);
+        gen_helper_udiv(t1, cpu_env, t1, t2);
     } else {
-        gen_helper_sdiv(t1, t1, t2);
+        gen_helper_sdiv(t1, cpu_env, t1, t2);
     }
     tcg_temp_free_i32(t2);
     store_reg(s, a->rd, t1);
@@ -7882,6 +8161,8 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
         return true;
     }
 
+    s->eci_handled = true;
+
     addr = op_addr_block_pre(s, a, n);
     mem_idx = get_mem_index(s);
 
@@ -7898,7 +8179,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
         } else {
             tmp = load_reg(s, i);
         }
-        gen_aa32_st32(s, tmp, addr, mem_idx);
+        gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
         tcg_temp_free_i32(tmp);
 
         /* No need to add after the last transfer.  */
@@ -7908,6 +8189,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
     }
 
     op_addr_block_post(s, a, addr, n);
+    clear_eci_state(s);
     return true;
 }
 
@@ -7962,6 +8244,8 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
         return true;
     }
 
+    s->eci_handled = true;
+
     addr = op_addr_block_pre(s, a, n);
     mem_idx = get_mem_index(s);
     loaded_base = false;
@@ -7973,7 +8257,7 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
         }
 
         tmp = tcg_temp_new_i32();
-        gen_aa32_ld32u(s, tmp, addr, mem_idx);
+        gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
         if (user) {
             tmp2 = tcg_const_i32(i);
             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
@@ -8012,6 +8296,7 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
         /* Must exit loop to check un-masked IRQs */
         s->base.is_jmp = DISAS_EXIT;
     }
+    clear_eci_state(s);
     return true;
 }
 
@@ -8067,6 +8352,8 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
         return false;
     }
 
+    s->eci_handled = true;
+
     zero = tcg_const_i32(0);
     for (i = 0; i < 15; i++) {
         if (extract32(a->list, i, 1)) {
@@ -8084,6 +8371,7 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
         tcg_temp_free_i32(maskreg);
     }
     tcg_temp_free_i32(zero);
+    clear_eci_state(s);
     return true;
 }
 
@@ -8121,8 +8409,6 @@ static bool trans_BL(DisasContext *s, arg_i *a)
 
 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
 {
-    TCGv_i32 tmp;
-
     /*
      * BLX <imm> would be useless on M-profile; the encoding space
      * is used for other insns from v8.1M onward, and UNDEFs before that.
@@ -8136,8 +8422,7 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
         return false;
     }
     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
-    tmp = tcg_const_i32(!s->thumb);
-    store_cpu_field(tmp, thumb);
+    store_cpu_field_constant(!s->thumb, thumb);
     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
     return true;
 }
@@ -8205,13 +8490,33 @@ static bool trans_DLS(DisasContext *s, arg_DLS *a)
         return false;
     }
     if (a->rn == 13 || a->rn == 15) {
-        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+        /*
+         * For DLSTP rn == 15 is a related encoding (LCTP); the
+         * other cases caught by this condition are all
+         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
+         */
         return false;
     }
 
-    /* Not a while loop, no tail predication: just set LR to the count */
+    if (a->size != 4) {
+        /* DLSTP */
+        if (!dc_isar_feature(aa32_mve, s)) {
+            return false;
+        }
+        if (!vfp_access_check(s)) {
+            return true;
+        }
+    }
+
+    /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
     tmp = load_reg(s, a->rn);
     store_reg(s, 14, tmp);
+    if (a->size != 4) {
+        /* DLSTP: set FPSCR.LTPSIZE */
+        tmp = tcg_const_i32(a->size);
+        store_cpu_field(tmp, v7m.ltpsize);
+        s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    }
     return true;
 }
 
@@ -8225,7 +8530,11 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
         return false;
     }
     if (a->rn == 13 || a->rn == 15) {
-        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+        /*
+         * For WLSTP rn == 15 is a related encoding (LE); the
+         * other cases caught by this condition are all
+         * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
+         */
         return false;
     }
     if (s->condexec_mask) {
@@ -8238,10 +8547,45 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a)
          */
         return false;
     }
+    if (a->size != 4) {
+        /* WLSTP */
+        if (!dc_isar_feature(aa32_mve, s)) {
+            return false;
+        }
+        /*
+         * We need to check that the FPU is enabled here, but mustn't
+         * call vfp_access_check() to do that because we don't want to
+         * do the lazy state preservation in the "loop count is zero" case.
+         * Do the check-and-raise-exception by hand.
+         */
+        if (s->fp_excp_el) {
+            gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+                               syn_uncategorized(), s->fp_excp_el);
+            return true;
+        }
+    }
+
     nextlabel = gen_new_label();
     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
     tmp = load_reg(s, a->rn);
     store_reg(s, 14, tmp);
+    if (a->size != 4) {
+        /*
+         * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
+         * lazy state preservation, new FP context creation, etc,
+         * that vfp_access_check() does. We know that the actual
+         * access check will succeed (ie it won't generate code that
+         * throws an exception) because we did that check by hand earlier.
+         */
+        bool ok = vfp_access_check(s);
+        assert(ok);
+        tmp = tcg_const_i32(a->size);
+        store_cpu_field(tmp, v7m.ltpsize);
+        /*
+         * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
+         * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
+         */
+    }
     gen_jmp_tb(s, s->base.pc_next, 1);
 
     gen_set_label(nextlabel);
@@ -8260,22 +8604,170 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
      * any faster.
      */
     TCGv_i32 tmp;
+    TCGLabel *loopend;
+    bool fpu_active;
 
     if (!dc_isar_feature(aa32_lob, s)) {
         return false;
     }
+    if (a->f && a->tp) {
+        return false;
+    }
+    if (s->condexec_mask) {
+        /*
+         * LE in an IT block is CONSTRAINED UNPREDICTABLE;
+         * we choose to UNDEF, because otherwise our use of
+         * gen_goto_tb(1) would clash with the use of TB exit 1
+         * in the dc->condjmp condition-failed codepath in
+         * arm_tr_tb_stop() and we'd get an assertion.
+         */
+        return false;
+    }
+    if (a->tp) {
+        /* LETP */
+        if (!dc_isar_feature(aa32_mve, s)) {
+            return false;
+        }
+        if (!vfp_access_check(s)) {
+            s->eci_handled = true;
+            return true;
+        }
+    }
 
-    if (!a->f) {
-        /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
-        arm_gen_condlabel(s);
-        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
-        /* Decrement LR */
-        tmp = load_reg(s, 14);
-        tcg_gen_addi_i32(tmp, tmp, -1);
-        store_reg(s, 14, tmp);
+    /* LE/LETP is OK with ECI set and leaves it untouched */
+    s->eci_handled = true;
+
+    /*
+     * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
+     * UsageFault exception for the LE insn in that case. Note that we
+     * are not directly checking FPSCR.LTPSIZE but instead check the
+     * pseudocode LTPSIZE() function, which returns 4 if the FPU is
+     * not currently active (ie ActiveFPState() returns false). We
+     * can identify not-active purely from our TB state flags, as the
+     * FPU is active only if:
+     *  the FPU is enabled
+     *  AND lazy state preservation is not active
+     *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
+     *
+     * Usually we don't need to care about this distinction between
+     * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
+     * will either take an exception or clear the conditions that make
+     * the FPU not active. But LE is an unusual case of a non-FP insn
+     * that looks at LTPSIZE.
+     */
+    fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
+
+    if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
+        /* Need to do a runtime check for LTPSIZE != 4 */
+        TCGLabel *skipexc = gen_new_label();
+        tmp = load_cpu_field(v7m.ltpsize);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
+        tcg_temp_free_i32(tmp);
+        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
+                           default_exception_el(s));
+        gen_set_label(skipexc);
+    }
+
+    if (a->f) {
+        /* Loop-forever: just jump back to the loop start */
+        gen_jmp(s, read_pc(s) - a->imm);
+        return true;
+    }
+
+    /*
+     * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
+     * For LE, we know at this point that LTPSIZE must be 4 and the
+     * loop decrement value is 1. For LETP we need to calculate the decrement
+     * value from LTPSIZE.
+     */
+    loopend = gen_new_label();
+    if (!a->tp) {
+        tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
+        tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
+    } else {
+        /*
+         * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
+         * so that decr stays live after the brcondi.
+         */
+        TCGv_i32 decr = tcg_temp_local_new_i32();
+        TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
+        tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
+        tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
+        tcg_temp_free_i32(ltpsize);
+
+        tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
+
+        tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
+        tcg_temp_free_i32(decr);
     }
     /* Jump back to the loop start */
     gen_jmp(s, read_pc(s) - a->imm);
+
+    gen_set_label(loopend);
+    if (a->tp) {
+        /* Exits from tail-pred loops must reset LTPSIZE to 4 */
+        tmp = tcg_const_i32(4);
+        store_cpu_field(tmp, v7m.ltpsize);
+    }
+    /* End TB, continuing to following insn */
+    gen_jmp_tb(s, s->base.pc_next, 1);
+    return true;
+}
+
+static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
+{
+    /*
+     * M-profile Loop Clear with Tail Predication. Since our implementation
+     * doesn't cache branch information, all we need to do is reset
+     * FPSCR.LTPSIZE to 4.
+     */
+
+    if (!dc_isar_feature(aa32_lob, s) ||
+        !dc_isar_feature(aa32_mve, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    store_cpu_field_constant(4, v7m.ltpsize);
+    return true;
+}
+
+static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
+{
+    /*
+     * M-profile Create Vector Tail Predicate. This insn is itself
+     * predicated and is subject to beatwise execution.
+     */
+    TCGv_i32 rn_shifted, masklen;
+
+    if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
+        return false;
+    }
+
+    if (!mve_eci_check(s) || !vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * We pre-calculate the mask length here to avoid having
+     * to have multiple helpers specialized for size.
+     * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
+     */
+    rn_shifted = tcg_temp_new_i32();
+    masklen = load_reg(s, a->rn);
+    tcg_gen_shli_i32(rn_shifted, masklen, a->size);
+    tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
+                        masklen, tcg_constant_i32(1 << (4 - a->size)),
+                        rn_shifted, tcg_constant_i32(16));
+    gen_helper_mve_vctp(cpu_env, masklen);
+    tcg_temp_free_i32(masklen);
+    tcg_temp_free_i32(rn_shifted);
+    /* This insn updates predication bits */
+    s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+    mve_update_eci(s);
     return true;
 }
 
@@ -8290,8 +8782,7 @@ static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
     addr = load_reg(s, a->rn);
     tcg_gen_add_i32(addr, addr, tmp);
 
-    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
-                    half ? MO_UW | s->be_data : MO_UB);
+    gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
     tcg_temp_free_i32(addr);
 
     tcg_gen_add_i32(tmp, tmp, tmp);
@@ -8372,10 +8863,10 @@ static bool trans_RFE(DisasContext *s, arg_RFE *a)
 
     /* Load PC into tmp and CPSR into tmp2.  */
     t1 = tcg_temp_new_i32();
-    gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
+    gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
     tcg_gen_addi_i32(addr, addr, 4);
     t2 = tcg_temp_new_i32();
-    gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
+    gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
 
     if (a->w) {
         /* Base writeback.  */
@@ -8507,7 +8998,7 @@ static bool trans_ISB(DisasContext *s, arg_ISB *a)
      * self-modifying code correctly and also to take
      * any pending interrupts immediately.
      */
-    gen_goto_tb(s, 0, s->base.pc_next);
+    s->base.is_jmp = DISAS_TOO_MANY;
     return true;
 }
 
@@ -8521,7 +9012,7 @@ static bool trans_SB(DisasContext *s, arg_SB *a)
      * for TCG; MB and end the TB instead.
      */
     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-    gen_goto_tb(s, 0, s->base.pc_next);
+    s->base.is_jmp = DISAS_TOO_MANY;
     return true;
 }
 
@@ -8655,6 +9146,16 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
         return;
     }
 
+    if (s->pstate_il) {
+        /*
+         * Illegal execution state. This has priority over BTI
+         * exceptions, but comes after instruction abort exceptions.
+         */
+        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+                           syn_illegalstate(), default_exception_el(s));
+        return;
+    }
+
     if (cond == 0xf) {
         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
          * choose to UNDEF. In ARMv5 and above the space is used
@@ -8842,6 +9343,7 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
     if (disas_t32(s, insn) ||
         disas_vfp_uncond(s, insn) ||
         disas_neon_shared(s, insn) ||
+        disas_mve(s, insn) ||
         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
         return;
     }
@@ -8866,7 +9368,7 @@ static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
      * boundary, so we cross the page if the first 16 bits indicate
      * that this is a 32 bit insn.
      */
-    uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
+    uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
 
     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
 }
@@ -8876,7 +9378,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *dc = container_of(dcbase, DisasContext, base);
     CPUARMState *env = cs->env_ptr;
     ARMCPU *cpu = env_archcpu(env);
-    uint32_t tb_flags = dc->base.tb->flags;
+    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
     uint32_t condexec, core_mmu_idx;
 
     dc->isar = &cpu->isar;
@@ -8888,46 +9390,65 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
      */
     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
                                !arm_el_is_aa64(env, 3);
-    dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
-    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
-    condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
-    dc->condexec_mask = (condexec & 0xf) << 1;
-    dc->condexec_cond = condexec >> 4;
+    dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
+    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
+    condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
+    /*
+     * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
+     * is always the IT bits. On M-profile, some of the reserved encodings
+     * of IT are used instead to indicate either ICI or ECI, which
+     * indicate partial progress of a restartable insn that was interrupted
+     * partway through by an exception:
+     *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
+     *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
+     * In all cases CONDEXEC == 0 means "not in IT block or restartable
+     * insn, behave normally".
+     */
+    dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
+    dc->eci_handled = false;
+    dc->insn_eci_rewind = NULL;
+    if (condexec & 0xf) {
+        dc->condexec_mask = (condexec & 0xf) << 1;
+        dc->condexec_cond = condexec >> 4;
+    } else {
+        if (arm_feature(env, ARM_FEATURE_M)) {
+            dc->eci = condexec >> 4;
+        }
+    }
 
-    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
+    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (dc->current_el == 0);
 #endif
-    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
+    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
+    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
+    dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
 
     if (arm_feature(env, ARM_FEATURE_M)) {
         dc->vfp_enabled = 1;
         dc->be_data = MO_TE;
-        dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
+        dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
             regime_is_secure(env, dc->mmu_idx);
-        dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
-        dc->v8m_fpccr_s_wrong =
-            FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
+        dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
+        dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
         dc->v7m_new_fp_ctxt_needed =
-            FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
-        dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
+            EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
+        dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
+        dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
     } else {
-        dc->be_data =
-            FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
-        dc->debug_target_el =
-            FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
-        dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
-        dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
-        dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
-        dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
+        dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
+        dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
+        dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
+        dc->ns = EX_TBFLAG_A32(tb_flags, NS);
+        dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
-            dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
+            dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
         } else {
-            dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
-            dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
+            dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
+            dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
         }
     }
     dc->cp_regs = cpu->cp_regs;
@@ -8948,14 +9469,14 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
      *   emit code to generate a software step exception
      *   end the TB
      */
-    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
-    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
+    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
+    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
     dc->is_ldex = false;
 
     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
 
     /* If architectural single step active, limit to 1.  */
-    if (is_singlestepping(dc)) {
+    if (dc->ss_active) {
         dc->base.max_insns = 1;
     }
 
@@ -9008,47 +9529,27 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
     /* Reset the conditional execution bits immediately. This avoids
        complications trying to do it at the end of the block.  */
     if (dc->condexec_mask || dc->condexec_cond) {
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_movi_i32(tmp, 0);
-        store_cpu_field(tmp, condexec_bits);
+        store_cpu_field_constant(0, condexec_bits);
     }
 }
 
 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
+    /*
+     * The ECI/ICI bits share PSR bits with the IT bits, so we
+     * need to reconstitute the bits from the split-out DisasContext
+     * fields here.
+     */
+    uint32_t condexec_bits;
 
-    tcg_gen_insn_start(dc->base.pc_next,
-                       (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
-                       0);
-    dc->insn_start = tcg_last_op();
-}
-
-static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                    const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    if (bp->flags & BP_CPU) {
-        gen_set_condexec(dc);
-        gen_set_pc_im(dc, dc->base.pc_next);
-        gen_helper_check_breakpoints(cpu_env);
-        /* End the TB early; it's likely not going to be executed */
-        dc->base.is_jmp = DISAS_TOO_MANY;
+    if (dc->eci) {
+        condexec_bits = dc->eci << 4;
     } else {
-        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
-        /* The address covered by the breakpoint must be
-           included in [tb->pc, tb->pc + tb->size) in order
-           to for it to be properly cleared -- thus we
-           increment the PC here so that the logic setting
-           tb->size below does the right thing.  */
-        /* TODO: Advance PC by correct instruction length to
-         * avoid disassembler error messages */
-        dc->base.pc_next += 2;
-        dc->base.is_jmp = DISAS_NORETURN;
+        condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
     }
-
-    return true;
+    tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0);
+    dc->insn_start = tcg_last_op();
 }
 
 static bool arm_pre_translate_insn(DisasContext *dc)
@@ -9100,19 +9601,19 @@ static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     unsigned int insn;
 
     if (arm_pre_translate_insn(dc)) {
+        dc->base.pc_next += 4;
         return;
     }
 
     dc->pc_curr = dc->base.pc_next;
-    insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
+    insn = arm_ldl_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
     dc->insn = insn;
     dc->base.pc_next += 4;
 
 #if defined(CONFIG_TCG_LOG_INSTR)
     if (unlikely(dcbase->log_instr_enabled)) {
-        TCGv pc = tcg_const_tl(dcbase->pc_next);
-        gen_helper_arm_log_instr(cpu_env, pc, tcg_constant_i32(insn));
-        tcg_temp_free(pc);
+        gen_helper_arm_log_instr(cpu_env, tcg_constant_i64(dc->pc_curr),
+                                 tcg_constant_i32(insn), tcg_constant_i32(4));
     }
 #endif
 
@@ -9178,21 +9679,77 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     bool is_16bit;
 
     if (arm_pre_translate_insn(dc)) {
+        dc->base.pc_next += 2;
         return;
     }
 
     dc->pc_curr = dc->base.pc_next;
-    insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
+    insn = arm_lduw_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b);
     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
     dc->base.pc_next += 2;
     if (!is_16bit) {
-        uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
+        uint32_t insn2 = arm_lduw_code(env, &dc->base, dc->base.pc_next,
+                                       dc->sctlr_b);
 
         insn = insn << 16 | insn2;
         dc->base.pc_next += 2;
     }
     dc->insn = insn;
 
+#if defined(CONFIG_TCG_LOG_INSTR)
+    if (unlikely(dcbase->log_instr_enabled)) {
+        /* For Thumb we have to undo the 16-bit swap above for disassembly. */
+        gen_helper_arm_log_instr(
+            cpu_env, tcg_constant_i64(dc->pc_curr),
+            tcg_constant_i32(is_16bit ? insn : rol32(insn, 16)),
+            tcg_constant_i32(is_16bit ? 2 : 4));
+    }
+#endif
+
+    if (dc->pstate_il) {
+        /*
+         * Illegal execution state. This has priority over BTI
+         * exceptions, but comes after instruction abort exceptions.
+         */
+        gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF,
+                           syn_illegalstate(), default_exception_el(dc));
+        return;
+    }
+
+    if (dc->eci) {
+        /*
+         * For M-profile continuable instructions, ECI/ICI handling
+         * falls into these cases:
+         *  - interrupt-continuable instructions
+         *     These are the various load/store multiple insns (both
+         *     integer and fp). The ICI bits indicate the register
+         *     where the load/store can resume. We make the IMPDEF
+         *     choice to always do "instruction restart", ie ignore
+         *     the ICI value and always execute the ldm/stm from the
+         *     start. So all we need to do is zero PSR.ICI if the
+         *     insn executes.
+         *  - MVE instructions subject to beat-wise execution
+         *     Here the ECI bits indicate which beats have already been
+         *     executed, and we must honour this. Each insn of this
+         *     type will handle it correctly. We will update PSR.ECI
+         *     in the helper function for the insn (some ECI values
+         *     mean that the following insn also has been partially
+         *     executed).
+         *  - Special cases which don't advance ECI
+         *     The insns LE, LETP and BKPT leave the ECI/ICI state
+         *     bits untouched.
+         *  - all other insns (the common case)
+         *     Non-zero ECI/ICI means an INVSTATE UsageFault.
+         *     We place a rewind-marker here. Insns in the previous
+         *     three categories will set a flag in the DisasContext.
+         *     If the flag isn't set after we call disas_thumb_insn()
+         *     or disas_thumb2_insn() then we know we have a "some other
+         *     insn" case. We will rewind to the marker (ie throwing away
+         *     all the generated code) and instead emit "take exception".
+         */
+        dc->insn_eci_rewind = tcg_last_op();
+    }
+
     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
         uint32_t cond = dc->condexec_cond;
 
@@ -9221,6 +9778,17 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
         }
     }
 
+    if (dc->eci && !dc->eci_handled) {
+        /*
+         * Insn wasn't valid for ECI/ICI at all: undo what we
+         * just generated and instead emit an exception
+         */
+        tcg_remove_ops_after(dc->insn_eci_rewind);
+        dc->condjmp = 0;
+        gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
+                           default_exception_el(dc));
+    }
+
     arm_post_translate_insn(dc);
 
     /* Thumb is a variable-length ISA.  Stop translation when the next insn
@@ -9259,7 +9827,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
          * insn codepath itself.
          */
         gen_bx_excret_final_code(dc);
-    } else if (unlikely(is_singlestepping(dc))) {
+    } else if (unlikely(dc->ss_active)) {
         /* Unconditional and "condition passed" instruction codepath. */
         switch (dc->base.is_jmp) {
         case DISAS_SWI:
@@ -9354,7 +9922,7 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
         /* "Condition failed" instruction codepath for the branch/trap insn */
         gen_set_label(dc->condlabel);
         gen_set_condexec(dc);
-        if (unlikely(is_singlestepping(dc))) {
+        if (unlikely(dc->ss_active)) {
             gen_set_pc_im(dc, dc->base.pc_next);
             gen_singlestep_exception(dc);
         } else {
@@ -9375,7 +9943,6 @@ static const TranslatorOps arm_translator_ops = {
     .init_disas_context = arm_tr_init_disas_context,
     .tb_start           = arm_tr_tb_start,
     .insn_start         = arm_tr_insn_start,
-    .breakpoint_check   = arm_tr_breakpoint_check,
     .translate_insn     = arm_tr_translate_insn,
     .tb_stop            = arm_tr_tb_stop,
     .disas_log          = arm_tr_disas_log,
@@ -9385,7 +9952,6 @@ static const TranslatorOps thumb_translator_ops = {
     .init_disas_context = arm_tr_init_disas_context,
     .tb_start           = arm_tr_tb_start,
     .insn_start         = arm_tr_insn_start,
-    .breakpoint_check   = arm_tr_breakpoint_check,
     .translate_insn     = thumb_tr_translate_insn,
     .tb_stop            = arm_tr_tb_stop,
     .disas_log          = arm_tr_disas_log,
@@ -9396,12 +9962,13 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 {
     DisasContext dc = { };
     const TranslatorOps *ops = &arm_translator_ops;
+    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
 
-    if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
+    if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
         ops = &thumb_translator_ops;
     }
 #ifdef TARGET_AARCH64
-    if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
+    if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
         ops = &aarch64_translator_ops;
     }
 #ifdef TARGET_CHERI
diff --git a/target/arm/translate.h b/target/arm/translate.h
index e6d12bdf6e7..0875cf7bfcb 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -21,6 +21,15 @@ typedef struct DisasContext {
     /* Thumb-2 conditional execution bits.  */
     int condexec_mask;
     int condexec_cond;
+    /* M-profile ECI/ICI exception-continuable instruction state */
+    int eci;
+    /*
+     * trans_ functions for insns which are continuable should set this true
+     * after decode (ie after any UNDEF checks)
+     */
+    bool eci_handled;
+    /* TCG op to rewind to if this turns out to be an invalid ECI state */
+    TCGOp *insn_eci_rewind;
     int thumb;
     int sctlr_b;
     MemOp be_data;
@@ -87,6 +96,12 @@ typedef struct DisasContext {
     bool bt;
     /* True if any CP15 access is trapped by HSTR_EL2 */
     bool hstr_active;
+    /* True if memory operations require alignment */
+    bool align_mem;
+    /* True if PSTATE.IL is set */
+    bool pstate_il;
+    /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
+    bool mve_no_pred;
     /*
      * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
      *  < 0, set by the current instruction.
@@ -116,6 +131,66 @@ extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 extern TCGv_i64 cpu_exclusive_addr;
 extern TCGv_i64 cpu_exclusive_val;
 
+/*
+ * Constant expanders for the decoders.
+ */
+
+static inline int negate(DisasContext *s, int x)
+{
+    return -x;
+}
+
+static inline int plus_1(DisasContext *s, int x)
+{
+    return x + 1;
+}
+
+static inline int plus_2(DisasContext *s, int x)
+{
+    return x + 2;
+}
+
+static inline int times_2(DisasContext *s, int x)
+{
+    return x * 2;
+}
+
+static inline int times_4(DisasContext *s, int x)
+{
+    return x * 4;
+}
+
+static inline int times_2_plus_1(DisasContext *s, int x)
+{
+    return x * 2 + 1;
+}
+
+static inline int rsub_64(DisasContext *s, int x)
+{
+    return 64 - x;
+}
+
+static inline int rsub_32(DisasContext *s, int x)
+{
+    return 32 - x;
+}
+
+static inline int rsub_16(DisasContext *s, int x)
+{
+    return 16 - x;
+}
+
+static inline int rsub_8(DisasContext *s, int x)
+{
+    return 8 - x;
+}
+
+static inline int neon_3same_fp_size(DisasContext *s, int x)
+{
+    /* Convert 0==fp32, 1==fp16 into a MO_* value */
+    return MO_32 - x;
+}
+
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
     return (dc->features & (1ULL << feature)) != 0;
@@ -202,6 +277,10 @@ void arm_test_cc(DisasCompare *cmp, int cc);
 void arm_free_cc(DisasCompare *cmp);
 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
+MemOp pow2_align(unsigned i);
+void unallocated_encoding(DisasContext *s);
+void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
+                        uint32_t syn, uint32_t target_el);
 
 /* Return state of Alternate Half-precision flag, caller frees result */
 static inline TCGv_i32 get_ahp_flag(void)
@@ -379,6 +458,8 @@ typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32);
 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
+                                 TCGv_i32, TCGv_i32);
 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
@@ -394,6 +475,21 @@ typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
 typedef void AtomicThreeOpFn(TCGv_i64, TCGv_cap_checked_ptr, TCGv_i64, TCGArg,
                              MemOp);
+typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
+typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
+typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
+typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+
+/**
+ * arm_tbflags_from_tb:
+ * @tb: the TranslationBlock
+ *
+ * Extract the flag values from @tb.
+ */
+static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
+{
+    return (CPUARMTBFlags){ tb->flags, tb->cs_base };
+}
 
 /*
  * Enum for argument to fpstatus_ptr().
@@ -447,4 +543,45 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
     return statusptr;
 }
 
+/**
+ * finalize_memop:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Build the complete MemOp for a memory operation, including alignment
+ * and endianness.
+ *
+ * If (op & MO_AMASK) then the operation already contains the required
+ * alignment, e.g. for AccType_ATOMIC.  Otherwise, this an optionally
+ * unaligned operation, e.g. for AccType_NORMAL.
+ *
+ * In the latter case, there are configuration bits that require alignment,
+ * and this is applied here.  Note that there is no way to indicate that
+ * no alignment should ever be enforced; this must be handled manually.
+ */
+static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
+{
+    if (s->align_mem && !(opc & MO_AMASK)) {
+        opc |= MO_ALIGN;
+    }
+    return opc | s->be_data;
+}
+
+/**
+ * asimd_imm_const: Expand an encoded SIMD constant value
+ *
+ * Expand a SIMD constant value. This is essentially the pseudocode
+ * AdvSIMDExpandImm, except that we also perform the boolean NOT needed for
+ * VMVN and VBIC (when cmode < 14 && op == 1).
+ *
+ * The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
+ * callers must catch this; we return the 64-bit constant value defined
+ * for AArch64.
+ *
+ * cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
+ * is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
+ * we produce an immediate constant value of 0 in these cases.
+ */
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
+
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index 3fbeae87cb3..17fb1583622 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -22,33 +22,184 @@
 #include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "fpu/softfloat.h"
+#include "qemu/int128.h"
 #include "vec_internal.h"
 
-/* Note that vector data is stored in host-endian 64-bit chunks,
-   so addressing units smaller than that needs a host-endian fixup.  */
-#ifdef HOST_WORDS_BIGENDIAN
-#define H1(x)  ((x) ^ 7)
-#define H2(x)  ((x) ^ 3)
-#define H4(x)  ((x) ^ 1)
-#else
-#define H1(x)  (x)
-#define H2(x)  (x)
-#define H4(x)  (x)
-#endif
-
-/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
-                             bool neg, bool round, uint32_t *sat)
+/*
+ * Data for expanding active predicate bits to bytes, for byte elements.
+ *
+ *  for (i = 0; i < 256; ++i) {
+ *      unsigned long m = 0;
+ *      for (j = 0; j < 8; j++) {
+ *          if ((i >> j) & 1) {
+ *              m |= 0xfful << (j << 3);
+ *          }
+ *      }
+ *      printf("0x%016lx,\n", m);
+ *  }
+ */
+const uint64_t expand_pred_b_data[256] = {
+    0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
+    0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
+    0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
+    0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
+    0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
+    0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
+    0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
+    0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
+    0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
+    0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
+    0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
+    0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
+    0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
+    0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
+    0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
+    0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
+    0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
+    0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
+    0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
+    0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
+    0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
+    0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
+    0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
+    0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
+    0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
+    0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
+    0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
+    0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
+    0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
+    0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
+    0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
+    0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
+    0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
+    0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
+    0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
+    0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
+    0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
+    0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
+    0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
+    0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
+    0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
+    0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
+    0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
+    0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
+    0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
+    0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
+    0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
+    0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
+    0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
+    0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
+    0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
+    0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
+    0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
+    0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
+    0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
+    0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
+    0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
+    0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
+    0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
+    0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
+    0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
+    0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
+    0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
+    0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
+    0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
+    0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
+    0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
+    0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
+    0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
+    0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
+    0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
+    0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
+    0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
+    0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
+    0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
+    0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
+    0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
+    0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
+    0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
+    0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
+    0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
+    0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
+    0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
+    0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
+    0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
+    0xffffffffffffffff,
+};
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
+int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3,
+                     bool neg, bool round)
 {
     /*
      * Simplify:
-     * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
-     * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
+     * = ((a3 << 8) + ((e1 * e2) << 1) + (round << 7)) >> 8
+     * = ((a3 << 7) + (e1 * e2) + (round << 6)) >> 7
      */
     int32_t ret = (int32_t)src1 * src2;
     if (neg) {
         ret = -ret;
     }
+    ret += ((int32_t)src3 << 7) + (round << 6);
+    ret >>= 7;
+
+    if (ret != (int8_t)ret) {
+        ret = (ret < 0 ? INT8_MIN : INT8_MAX);
+    }
+    return ret;
+}
+
+void HELPER(sve2_sqrdmlah_b)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int8_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = do_sqrdmlah_b(n[i], m[i], a[i], false, true);
+    }
+}
+
+void HELPER(sve2_sqrdmlsh_b)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int8_t *d = vd, *n = vn, *m = vm, *a = va;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = do_sqrdmlah_b(n[i], m[i], a[i], true, true);
+    }
+}
+
+void HELPER(sve2_sqdmulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int8_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, false);
+    }
+}
+
+void HELPER(sve2_sqrdmulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int8_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, true);
+    }
+}
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
+int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+                      bool neg, bool round, uint32_t *sat)
+{
+    /* Simplify similarly to do_sqrdmlah_b above.  */
+    int32_t ret = (int32_t)src1 * src2;
+    if (neg) {
+        ret = -ret;
+    }
     ret += ((int32_t)src3 << 15) + (round << 14);
     ret >>= 15;
 
@@ -133,11 +284,87 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
+void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int16_t *d = vd, *n = vn, *m = vm, *a = va;
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = do_sqrdmlah_h(n[i], m[i], a[i], false, true, &discard);
+    }
+}
+
+void HELPER(sve2_sqrdmlsh_h)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int16_t *d = vd, *n = vn, *m = vm, *a = va;
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = do_sqrdmlah_h(n[i], m[i], a[i], true, true, &discard);
+    }
+}
+
+void HELPER(sve2_sqdmulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int16_t *d = vd, *n = vn, *m = vm;
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, &discard);
+    }
+}
+
+void HELPER(sve2_sqrdmulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int16_t *d = vd, *n = vn, *m = vm;
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, &discard);
+    }
+}
+
+void HELPER(sve2_sqdmulh_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+        int16_t mm = m[i];
+        for (j = 0; j < 16 / 2; ++j) {
+            d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, &discard);
+        }
+    }
+}
+
+void HELPER(sve2_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+        int16_t mm = m[i];
+        for (j = 0; j < 16 / 2; ++j) {
+            d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, &discard);
+        }
+    }
+}
+
 /* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
-                             bool neg, bool round, uint32_t *sat)
+int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+                      bool neg, bool round, uint32_t *sat)
 {
-    /* Simplify similarly to int_qrdmlah_s16 above.  */
+    /* Simplify similarly to do_sqrdmlah_b above.  */
     int64_t ret = (int64_t)src1 * src2;
     if (neg) {
         ret = -ret;
@@ -220,197 +447,253 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-/* Integer 8 and 16-bit dot-product.
- *
- * Note that for the loops herein, host endianness does not matter
- * with respect to the ordering of data within the 64-bit lanes.
- * All elements are treated equally, no matter where they are.
- */
-
-void HELPER(gvec_sdot_b)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
-    uint32_t *d = vd;
-    int8_t *n = vn, *m = vm;
+    int32_t *d = vd, *n = vn, *m = vm, *a = va;
+    uint32_t discard;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] += n[i * 4 + 0] * m[i * 4 + 0]
-              + n[i * 4 + 1] * m[i * 4 + 1]
-              + n[i * 4 + 2] * m[i * 4 + 2]
-              + n[i * 4 + 3] * m[i * 4 + 3];
+        d[i] = do_sqrdmlah_s(n[i], m[i], a[i], false, true, &discard);
     }
-    clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_udot_b)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqrdmlsh_s)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
-    uint32_t *d = vd;
-    uint8_t *n = vn, *m = vm;
+    int32_t *d = vd, *n = vn, *m = vm, *a = va;
+    uint32_t discard;
 
     for (i = 0; i < opr_sz / 4; ++i) {
-        d[i] += n[i * 4 + 0] * m[i * 4 + 0]
-              + n[i * 4 + 1] * m[i * 4 + 1]
-              + n[i * 4 + 2] * m[i * 4 + 2]
-              + n[i * 4 + 3] * m[i * 4 + 3];
+        d[i] = do_sqrdmlah_s(n[i], m[i], a[i], true, true, &discard);
     }
-    clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_sdot_h)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqdmulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
-    uint64_t *d = vd;
-    int16_t *n = vn, *m = vm;
+    int32_t *d = vd, *n = vn, *m = vm;
+    uint32_t discard;
 
-    for (i = 0; i < opr_sz / 8; ++i) {
-        d[i] += (int64_t)n[i * 4 + 0] * m[i * 4 + 0]
-              + (int64_t)n[i * 4 + 1] * m[i * 4 + 1]
-              + (int64_t)n[i * 4 + 2] * m[i * 4 + 2]
-              + (int64_t)n[i * 4 + 3] * m[i * 4 + 3];
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, &discard);
     }
-    clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_udot_h)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqrdmulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
-    uint64_t *d = vd;
-    uint16_t *n = vn, *m = vm;
+    int32_t *d = vd, *n = vn, *m = vm;
+    uint32_t discard;
 
-    for (i = 0; i < opr_sz / 8; ++i) {
-        d[i] += (uint64_t)n[i * 4 + 0] * m[i * 4 + 0]
-              + (uint64_t)n[i * 4 + 1] * m[i * 4 + 1]
-              + (uint64_t)n[i * 4 + 2] * m[i * 4 + 2]
-              + (uint64_t)n[i * 4 + 3] * m[i * 4 + 3];
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, &discard);
     }
-    clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqdmulh_idx_s)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, segend, opr_sz = simd_oprsz(desc), opr_sz_4 = opr_sz / 4;
-    intptr_t index = simd_data(desc);
-    uint32_t *d = vd;
-    int8_t *n = vn;
-    int8_t *m_indexed = (int8_t *)vm + H4(index) * 4;
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+        int32_t mm = m[i];
+        for (j = 0; j < 16 / 4; ++j) {
+            d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, &discard);
+        }
+    }
+}
 
-    /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
-     * Otherwise opr_sz is a multiple of 16.
-     */
-    segend = MIN(4, opr_sz_4);
-    i = 0;
-    do {
-        int8_t m0 = m_indexed[i * 4 + 0];
-        int8_t m1 = m_indexed[i * 4 + 1];
-        int8_t m2 = m_indexed[i * 4 + 2];
-        int8_t m3 = m_indexed[i * 4 + 3];
-
-        do {
-            d[i] += n[i * 4 + 0] * m0
-                  + n[i * 4 + 1] * m1
-                  + n[i * 4 + 2] * m2
-                  + n[i * 4 + 3] * m3;
-        } while (++i < segend);
-        segend = i + 4;
-    } while (i < opr_sz_4);
+void HELPER(sve2_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+    uint32_t discard;
+
+    for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+        int32_t mm = m[i];
+        for (j = 0; j < 16 / 4; ++j) {
+            d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, &discard);
+        }
+    }
+}
 
-    clear_tail(d, opr_sz, simd_maxsz(desc));
+/* Signed saturating rounding doubling multiply-accumulate high half, 64-bit */
+static int64_t do_sat128_d(Int128 r)
+{
+    int64_t ls = int128_getlo(r);
+    int64_t hs = int128_gethi(r);
+
+    if (unlikely(hs != (ls >> 63))) {
+        return hs < 0 ? INT64_MIN : INT64_MAX;
+    }
+    return ls;
 }
 
-void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
+int64_t do_sqrdmlah_d(int64_t n, int64_t m, int64_t a, bool neg, bool round)
 {
-    intptr_t i, segend, opr_sz = simd_oprsz(desc), opr_sz_4 = opr_sz / 4;
-    intptr_t index = simd_data(desc);
-    uint32_t *d = vd;
-    uint8_t *n = vn;
-    uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4;
+    uint64_t l, h;
+    Int128 r, t;
 
-    /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
-     * Otherwise opr_sz is a multiple of 16.
-     */
-    segend = MIN(4, opr_sz_4);
-    i = 0;
-    do {
-        uint8_t m0 = m_indexed[i * 4 + 0];
-        uint8_t m1 = m_indexed[i * 4 + 1];
-        uint8_t m2 = m_indexed[i * 4 + 2];
-        uint8_t m3 = m_indexed[i * 4 + 3];
-
-        do {
-            d[i] += n[i * 4 + 0] * m0
-                  + n[i * 4 + 1] * m1
-                  + n[i * 4 + 2] * m2
-                  + n[i * 4 + 3] * m3;
-        } while (++i < segend);
-        segend = i + 4;
-    } while (i < opr_sz_4);
+    /* As in do_sqrdmlah_b, but with 128-bit arithmetic. */
+    muls64(&l, &h, m, n);
+    r = int128_make128(l, h);
+    if (neg) {
+        r = int128_neg(r);
+    }
+    if (a) {
+        t = int128_exts64(a);
+        t = int128_lshift(t, 63);
+        r = int128_add(r, t);
+    }
+    if (round) {
+        t = int128_exts64(1ll << 62);
+        r = int128_add(r, t);
+    }
+    r = int128_rshift(r, 63);
 
-    clear_tail(d, opr_sz, simd_maxsz(desc));
+    return do_sat128_d(r);
 }
 
-void HELPER(gvec_sdot_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqrdmlah_d)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
 {
-    intptr_t i, opr_sz = simd_oprsz(desc), opr_sz_8 = opr_sz / 8;
-    intptr_t index = simd_data(desc);
-    uint64_t *d = vd;
-    int16_t *n = vn;
-    int16_t *m_indexed = (int16_t *)vm + index * 4;
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm, *a = va;
 
-    /* This is supported by SVE only, so opr_sz is always a multiple of 16.
-     * Process the entire segment all at once, writing back the results
-     * only after we've consumed all of the inputs.
-     */
-    for (i = 0; i < opr_sz_8 ; i += 2) {
-        uint64_t d0, d1;
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = do_sqrdmlah_d(n[i], m[i], a[i], false, true);
+    }
+}
 
-        d0  = n[i * 4 + 0] * (int64_t)m_indexed[i * 4 + 0];
-        d0 += n[i * 4 + 1] * (int64_t)m_indexed[i * 4 + 1];
-        d0 += n[i * 4 + 2] * (int64_t)m_indexed[i * 4 + 2];
-        d0 += n[i * 4 + 3] * (int64_t)m_indexed[i * 4 + 3];
-        d1  = n[i * 4 + 4] * (int64_t)m_indexed[i * 4 + 0];
-        d1 += n[i * 4 + 5] * (int64_t)m_indexed[i * 4 + 1];
-        d1 += n[i * 4 + 6] * (int64_t)m_indexed[i * 4 + 2];
-        d1 += n[i * 4 + 7] * (int64_t)m_indexed[i * 4 + 3];
+void HELPER(sve2_sqrdmlsh_d)(void *vd, void *vn, void *vm,
+                             void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm, *a = va;
 
-        d[i + 0] += d0;
-        d[i + 1] += d1;
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = do_sqrdmlah_d(n[i], m[i], a[i], true, true);
     }
+}
 
-    clear_tail(d, opr_sz, simd_maxsz(desc));
+void HELPER(sve2_sqdmulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, false);
+    }
 }
 
-void HELPER(gvec_udot_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+void HELPER(sve2_sqrdmulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
 {
-    intptr_t i, opr_sz = simd_oprsz(desc), opr_sz_8 = opr_sz / 8;
-    intptr_t index = simd_data(desc);
-    uint64_t *d = vd;
-    uint16_t *n = vn;
-    uint16_t *m_indexed = (uint16_t *)vm + index * 4;
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm;
 
-    /* This is supported by SVE only, so opr_sz is always a multiple of 16.
-     * Process the entire segment all at once, writing back the results
-     * only after we've consumed all of the inputs.
-     */
-    for (i = 0; i < opr_sz_8 ; i += 2) {
-        uint64_t d0, d1;
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, true);
+    }
+}
 
-        d0  = n[i * 4 + 0] * (uint64_t)m_indexed[i * 4 + 0];
-        d0 += n[i * 4 + 1] * (uint64_t)m_indexed[i * 4 + 1];
-        d0 += n[i * 4 + 2] * (uint64_t)m_indexed[i * 4 + 2];
-        d0 += n[i * 4 + 3] * (uint64_t)m_indexed[i * 4 + 3];
-        d1  = n[i * 4 + 4] * (uint64_t)m_indexed[i * 4 + 0];
-        d1 += n[i * 4 + 5] * (uint64_t)m_indexed[i * 4 + 1];
-        d1 += n[i * 4 + 6] * (uint64_t)m_indexed[i * 4 + 2];
-        d1 += n[i * 4 + 7] * (uint64_t)m_indexed[i * 4 + 3];
+void HELPER(sve2_sqdmulh_idx_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int64_t *d = vd, *n = vn, *m = (int64_t *)vm + idx;
 
-        d[i + 0] += d0;
-        d[i + 1] += d1;
+    for (i = 0; i < opr_sz / 8; i += 16 / 8) {
+        int64_t mm = m[i];
+        for (j = 0; j < 16 / 8; ++j) {
+            d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, false);
+        }
     }
+}
 
-    clear_tail(d, opr_sz, simd_maxsz(desc));
+void HELPER(sve2_sqrdmulh_idx_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int64_t *d = vd, *n = vn, *m = (int64_t *)vm + idx;
+
+    for (i = 0; i < opr_sz / 8; i += 16 / 8) {
+        int64_t mm = m[i];
+        for (j = 0; j < 16 / 8; ++j) {
+            d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, true);
+        }
+    }
 }
 
+/* Integer 8 and 16-bit dot-product.
+ *
+ * Note that for the loops herein, host endianness does not matter
+ * with respect to the ordering of data within the quad-width lanes.
+ * All elements are treated equally, no matter where they are.
+ */
+
+#define DO_DOT(NAME, TYPED, TYPEN, TYPEM) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc)  \
+{                                                                         \
+    intptr_t i, opr_sz = simd_oprsz(desc);                                \
+    TYPED *d = vd, *a = va;                                               \
+    TYPEN *n = vn;                                                        \
+    TYPEM *m = vm;                                                        \
+    for (i = 0; i < opr_sz / sizeof(TYPED); ++i) {                        \
+        d[i] = (a[i] +                                                    \
+                (TYPED)n[i * 4 + 0] * m[i * 4 + 0] +                      \
+                (TYPED)n[i * 4 + 1] * m[i * 4 + 1] +                      \
+                (TYPED)n[i * 4 + 2] * m[i * 4 + 2] +                      \
+                (TYPED)n[i * 4 + 3] * m[i * 4 + 3]);                      \
+    }                                                                     \
+    clear_tail(d, opr_sz, simd_maxsz(desc));                              \
+}
+
+DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t)
+DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t)
+DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t)
+DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t)
+DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t)
+
+#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc)  \
+{                                                                         \
+    intptr_t i = 0, opr_sz = simd_oprsz(desc);                            \
+    intptr_t opr_sz_n = opr_sz / sizeof(TYPED);                           \
+    intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n);                  \
+    intptr_t index = simd_data(desc);                                     \
+    TYPED *d = vd, *a = va;                                               \
+    TYPEN *n = vn;                                                        \
+    TYPEM *m_indexed = (TYPEM *)vm + HD(index) * 4;                       \
+    do {                                                                  \
+        TYPED m0 = m_indexed[i * 4 + 0];                                  \
+        TYPED m1 = m_indexed[i * 4 + 1];                                  \
+        TYPED m2 = m_indexed[i * 4 + 2];                                  \
+        TYPED m3 = m_indexed[i * 4 + 3];                                  \
+        do {                                                              \
+            d[i] = (a[i] +                                                \
+                    n[i * 4 + 0] * m0 +                                   \
+                    n[i * 4 + 1] * m1 +                                   \
+                    n[i * 4 + 2] * m2 +                                   \
+                    n[i * 4 + 3] * m3);                                   \
+        } while (++i < segend);                                           \
+        segend = i + 4;                                                   \
+    } while (i < opr_sz_n);                                               \
+    clear_tail(d, opr_sz, simd_maxsz(desc));                              \
+}
+
+DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4)
+DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4)
+DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
+DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
+
 void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
                          void *vfpst, uint32_t desc)
 {
@@ -495,13 +778,11 @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm,
+void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
                          void *vfpst, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
-    float16 *d = vd;
-    float16 *n = vn;
-    float16 *m = vm;
+    float16 *d = vd, *n = vn, *m = vm, *a = va;
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
@@ -518,19 +799,17 @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm,
         float16 e4 = e2;
         float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
 
-        d[H2(i)] = float16_muladd(e2, e1, d[H2(i)], 0, fpst);
-        d[H2(i + 1)] = float16_muladd(e4, e3, d[H2(i + 1)], 0, fpst);
+        d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
+        d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
+void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
                              void *vfpst, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
-    float16 *d = vd;
-    float16 *n = vn;
-    float16 *m = vm;
+    float16 *d = vd, *n = vn, *m = vm, *a = va;
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
@@ -554,20 +833,18 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
             float16 e2 = n[H2(j + flip)];
             float16 e4 = e2;
 
-            d[H2(j)] = float16_muladd(e2, e1, d[H2(j)], 0, fpst);
-            d[H2(j + 1)] = float16_muladd(e4, e3, d[H2(j + 1)], 0, fpst);
+            d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
+            d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
         }
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm,
+void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
                          void *vfpst, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
-    float32 *d = vd;
-    float32 *n = vn;
-    float32 *m = vm;
+    float32 *d = vd, *n = vn, *m = vm, *a = va;
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
@@ -584,19 +861,17 @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm,
         float32 e4 = e2;
         float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
 
-        d[H4(i)] = float32_muladd(e2, e1, d[H4(i)], 0, fpst);
-        d[H4(i + 1)] = float32_muladd(e4, e3, d[H4(i + 1)], 0, fpst);
+        d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
+        d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
+void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
                              void *vfpst, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
-    float32 *d = vd;
-    float32 *n = vn;
-    float32 *m = vm;
+    float32 *d = vd, *n = vn, *m = vm, *a = va;
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
@@ -620,20 +895,18 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
             float32 e2 = n[H4(j + flip)];
             float32 e4 = e2;
 
-            d[H4(j)] = float32_muladd(e2, e1, d[H4(j)], 0, fpst);
-            d[H4(j + 1)] = float32_muladd(e4, e3, d[H4(j + 1)], 0, fpst);
+            d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
+            d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
         }
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
-void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
+void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
                          void *vfpst, uint32_t desc)
 {
     uintptr_t opr_sz = simd_oprsz(desc);
-    float64 *d = vd;
-    float64 *n = vn;
-    float64 *m = vm;
+    float64 *d = vd, *n = vn, *m = vm, *a = va;
     float_status *fpst = vfpst;
     intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
     uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
@@ -650,8 +923,8 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
         float64 e4 = e2;
         float64 e3 = m[i + 1 - flip] ^ neg_imag;
 
-        d[i] = float64_muladd(e2, e1, d[i], 0, fpst);
-        d[i + 1] = float64_muladd(e4, e3, d[i + 1], 0, fpst);
+        d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
+        d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
     }
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
@@ -1055,7 +1328,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 
 DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
 DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
-DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, H8)
 
 #undef DO_MUL_IDX
 
@@ -1077,11 +1350,11 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc)   \
 
 DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
 DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
-DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +,   )
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, H8)
 
 DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
 DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
-DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -,   )
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
 
 #undef DO_MLA_IDX
 
@@ -1108,7 +1381,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
 
 DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
 DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
-DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, )
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8)
 
 /*
  * Non-fused multiply-accumulate operations, for Neon. NB that unlike
@@ -1146,7 +1419,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va,                  \
 
 DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
 DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
 
 #undef DO_FMLA_IDX
 
@@ -1497,6 +1770,27 @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
              get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
 }
 
+void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
+                               void *venv, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
+    CPUARMState *env = venv;
+    float_status *status = &env->vfp.fp_status;
+    bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+
+    for (i = 0; i < oprsz; i += sizeof(float32)) {
+        float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
+        float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
+        float32 nn = float16_to_float32_by_bits(nn_16, fz16);
+        float32 mm = float16_to_float32_by_bits(mm_16, fz16);
+        float32 aa = *(float32 *)(va + H1_4(i));
+
+        *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
+    }
+}
+
 static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
                          uint32_t desc, bool fz16)
 {
@@ -1541,6 +1835,32 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
                  get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
 }
 
+void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
+                               void *venv, uint32_t desc)
+{
+    intptr_t i, j, oprsz = simd_oprsz(desc);
+    uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
+    intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
+    CPUARMState *env = venv;
+    float_status *status = &env->vfp.fp_status;
+    bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+
+    for (i = 0; i < oprsz; i += 16) {
+        float16 mm_16 = *(float16 *)(vm + i + idx);
+        float32 mm = float16_to_float32_by_bits(mm_16, fz16);
+
+        for (j = 0; j < 16; j += sizeof(float32)) {
+            float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
+            float32 nn = float16_to_float32_by_bits(nn_16, fz16);
+            float32 aa = *(float32 *)(va + H1_4(i + j));
+
+            *(float32 *)(vd + H1_4(i + j)) =
+                float32_muladd(nn, mm, aa, 0, status);
+        }
+    }
+}
+
 void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc);
@@ -1708,11 +2028,23 @@ static uint64_t expand_byte_to_half(uint64_t x)
          | ((x & 0xff000000) << 24);
 }
 
-static uint64_t pmull_h(uint64_t op1, uint64_t op2)
+uint64_t pmull_w(uint64_t op1, uint64_t op2)
 {
     uint64_t result = 0;
     int i;
+    for (i = 0; i < 16; ++i) {
+        uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff;
+        result ^= op2 & mask;
+        op1 >>= 1;
+        op2 <<= 1;
+    }
+    return result;
+}
 
+uint64_t pmull_h(uint64_t op1, uint64_t op2)
+{
+    uint64_t result = 0;
+    int i;
     for (i = 0; i < 8; ++i) {
         uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff;
         result ^= op2 & mask;
@@ -1750,6 +2082,30 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
         d[i] = pmull_h(nn, mm);
     }
 }
+
+static uint64_t pmull_d(uint64_t op1, uint64_t op2)
+{
+    uint64_t result = 0;
+    int i;
+
+    for (i = 0; i < 32; ++i) {
+        uint64_t mask = -((op1 >> i) & 1);
+        result ^= (op2 << i) & mask;
+    }
+    return result;
+}
+
+void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t sel = H4(simd_data(desc));
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint32_t *n = vn, *m = vm;
+    uint64_t *d = vd;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        d[i] = pmull_d(n[2 * i + sel], m[2 * i + sel]);
+    }
+}
 #endif
 
 #define DO_CMP0(NAME, TYPE, OP)                         \
@@ -1985,3 +2341,326 @@ void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
     clear_tail(vd, oprsz, simd_maxsz(desc));
 }
 #endif
+
+/*
+ * NxN -> N highpart multiply
+ *
+ * TODO: expose this as a generic vector operation.
+ */
+
+void HELPER(gvec_smulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int8_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = ((int32_t)n[i] * m[i]) >> 8;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int16_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = ((int32_t)n[i] * m[i]) >> 16;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    int32_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = ((int64_t)n[i] * m[i]) >> 32;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t discard;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        muls64(&discard, &d[i], n[i], m[i]);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint8_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = ((uint32_t)n[i] * m[i]) >> 8;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint16_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 2; ++i) {
+        d[i] = ((uint32_t)n[i] * m[i]) >> 16;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint32_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = ((uint64_t)n[i] * m[i]) >> 32;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    uint64_t discard;
+
+    for (i = 0; i < opr_sz / 8; ++i) {
+        mulu64(&discard, &d[i], n[i], m[i]);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_xar_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    int shr = simd_data(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz; ++i) {
+        d[i] = ror64(n[i] ^ m[i], shr);
+    }
+    clear_tail(d, opr_sz * 8, simd_maxsz(desc));
+}
+
+/*
+ * Integer matrix-multiply accumulate
+ */
+
+static uint32_t do_smmla_b(uint32_t sum, void *vn, void *vm)
+{
+    int8_t *n = vn, *m = vm;
+
+    for (intptr_t k = 0; k < 8; ++k) {
+        sum += n[H1(k)] * m[H1(k)];
+    }
+    return sum;
+}
+
+static uint32_t do_ummla_b(uint32_t sum, void *vn, void *vm)
+{
+    uint8_t *n = vn, *m = vm;
+
+    for (intptr_t k = 0; k < 8; ++k) {
+        sum += n[H1(k)] * m[H1(k)];
+    }
+    return sum;
+}
+
+static uint32_t do_usmmla_b(uint32_t sum, void *vn, void *vm)
+{
+    uint8_t *n = vn;
+    int8_t *m = vm;
+
+    for (intptr_t k = 0; k < 8; ++k) {
+        sum += n[H1(k)] * m[H1(k)];
+    }
+    return sum;
+}
+
+static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
+                      uint32_t (*inner_loop)(uint32_t, void *, void *))
+{
+    intptr_t seg, opr_sz = simd_oprsz(desc);
+
+    for (seg = 0; seg < opr_sz; seg += 16) {
+        uint32_t *d = vd + seg;
+        uint32_t *a = va + seg;
+        uint32_t sum0, sum1, sum2, sum3;
+
+        /*
+         * Process the entire segment at once, writing back the
+         * results only after we've consumed all of the inputs.
+         *
+         * Key to indices by column:
+         *          i   j                  i             j
+         */
+        sum0 = a[H4(0 + 0)];
+        sum0 = inner_loop(sum0, vn + seg + 0, vm + seg + 0);
+        sum1 = a[H4(0 + 1)];
+        sum1 = inner_loop(sum1, vn + seg + 0, vm + seg + 8);
+        sum2 = a[H4(2 + 0)];
+        sum2 = inner_loop(sum2, vn + seg + 8, vm + seg + 0);
+        sum3 = a[H4(2 + 1)];
+        sum3 = inner_loop(sum3, vn + seg + 8, vm + seg + 8);
+
+        d[H4(0)] = sum0;
+        d[H4(1)] = sum1;
+        d[H4(2)] = sum2;
+        d[H4(3)] = sum3;
+    }
+    clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+#define DO_MMLA_B(NAME, INNER) \
+    void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+    { do_mmla_b(vd, vn, vm, va, desc, INNER); }
+
+DO_MMLA_B(gvec_smmla_b, do_smmla_b)
+DO_MMLA_B(gvec_ummla_b, do_ummla_b)
+DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
+
+/*
+ * BFloat16 Dot Product
+ */
+
+static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
+{
+    /* FPCR is ignored for BFDOT and BFMMLA. */
+    float_status bf_status = {
+        .tininess_before_rounding = float_tininess_before_rounding,
+        .float_rounding_mode = float_round_to_odd_inf,
+        .flush_to_zero = true,
+        .flush_inputs_to_zero = true,
+        .default_nan_mode = true,
+    };
+    float32 t1, t2;
+
+    /*
+     * Extract each BFloat16 from the element pair, and shift
+     * them such that they become float32.
+     */
+    t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
+    t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
+    t1 = float32_add(t1, t2, &bf_status);
+    t1 = float32_add(sum, t1, &bf_status);
+
+    return t1;
+}
+
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    float32 *d = vd, *a = va;
+    uint32_t *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        d[i] = bfdotadd(a[i], n[i], m[i]);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
+                            void *va, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t index = simd_data(desc);
+    intptr_t elements = opr_sz / 4;
+    intptr_t eltspersegment = MIN(16 / 4, elements);
+    float32 *d = vd, *a = va;
+    uint32_t *n = vn, *m = vm;
+
+    for (i = 0; i < elements; i += eltspersegment) {
+        uint32_t m_idx = m[i + H4(index)];
+
+        for (j = i; j < i + eltspersegment; j++) {
+            d[j] = bfdotadd(a[j], n[j], m_idx);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+    intptr_t s, opr_sz = simd_oprsz(desc);
+    float32 *d = vd, *a = va;
+    uint32_t *n = vn, *m = vm;
+
+    for (s = 0; s < opr_sz / 4; s += 4) {
+        float32 sum00, sum01, sum10, sum11;
+
+        /*
+         * Process the entire segment at once, writing back the
+         * results only after we've consumed all of the inputs.
+         *
+         * Key to indicies by column:
+         *               i   j           i   k             j   k
+         */
+        sum00 = a[s + H4(0 + 0)];
+        sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
+        sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
+
+        sum01 = a[s + H4(0 + 1)];
+        sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
+        sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
+
+        sum10 = a[s + H4(2 + 0)];
+        sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
+        sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
+
+        sum11 = a[s + H4(2 + 1)];
+        sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
+        sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
+
+        d[s + H4(0 + 0)] = sum00;
+        d[s + H4(0 + 1)] = sum01;
+        d[s + H4(2 + 0)] = sum10;
+        d[s + H4(2 + 1)] = sum11;
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
+                         void *stat, uint32_t desc)
+{
+    intptr_t i, opr_sz = simd_oprsz(desc);
+    intptr_t sel = simd_data(desc);
+    float32 *d = vd, *a = va;
+    bfloat16 *n = vn, *m = vm;
+
+    for (i = 0; i < opr_sz / 4; ++i) {
+        float32 nn = n[H2(i * 2 + sel)] << 16;
+        float32 mm = m[H2(i * 2 + sel)] << 16;
+        d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat);
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
+                             void *va, void *stat, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
+    intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3);
+    intptr_t elements = opr_sz / 4;
+    intptr_t eltspersegment = MIN(16 / 4, elements);
+    float32 *d = vd, *a = va;
+    bfloat16 *n = vn, *m = vm;
+
+    for (i = 0; i < elements; i += eltspersegment) {
+        float32 m_idx = m[H2(2 * i + index)] << 16;
+
+        for (j = i; j < i + eltspersegment; j++) {
+            float32 n_j = n[H2(2 * j + sel)] << 16;
+            d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
index e3eb3e7a6bc..2a335582906 100644
--- a/target/arm/vec_internal.h
+++ b/target/arm/vec_internal.h
@@ -20,6 +20,39 @@
 #ifndef TARGET_ARM_VEC_INTERNALS_H
 #define TARGET_ARM_VEC_INTERNALS_H
 
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ *
+ * The H<N> macros are used when indexing an array of elements of size N.
+ *
+ * The H1_<N> macros are used when performing byte arithmetic and then
+ * casting the final pointer to a type of size N.
+ */
+#ifdef HOST_WORDS_BIGENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#endif
+/*
+ * Access to 64-bit elements isn't host-endian dependent; we provide H8
+ * and H1_8 so that when a function is being generated from a macro we
+ * can pass these rather than an empty macro argument, for clarity.
+ */
+#define H8(x)   (x)
+#define H1_8(x) (x)
+
+/* Data for expanding active predicate bits to bytes, for byte elements. */
+extern const uint64_t expand_pred_b_data[256];
+
 static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
 {
     uint64_t *d = vd + opr_sz;
@@ -30,4 +63,158 @@ static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
     }
 }
 
+static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
+                                    bool round, uint32_t *sat)
+{
+    if (shift <= -bits) {
+        /* Rounding the sign bit always produces 0. */
+        if (round) {
+            return 0;
+        }
+        return src >> 31;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < bits) {
+        int32_t val = src << shift;
+        if (bits == 32) {
+            if (!sat || val >> shift == src) {
+                return val;
+            }
+        } else {
+            int32_t extval = sextract32(val, 0, bits);
+            if (!sat || val == extval) {
+                return extval;
+            }
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return (1u << (bits - 1)) - (src >= 0);
+}
+
+static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
+                                     bool round, uint32_t *sat)
+{
+    if (shift <= -(bits + round)) {
+        return 0;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < bits) {
+        uint32_t val = src << shift;
+        if (bits == 32) {
+            if (!sat || val >> shift == src) {
+                return val;
+            }
+        } else {
+            uint32_t extval = extract32(val, 0, bits);
+            if (!sat || val == extval) {
+                return extval;
+            }
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return MAKE_64BIT_MASK(0, bits);
+}
+
+static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
+                                     bool round, uint32_t *sat)
+{
+    if (sat && src < 0) {
+        *sat = 1;
+        return 0;
+    }
+    return do_uqrshl_bhs(src, shift, bits, round, sat);
+}
+
+static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
+                                  bool round, uint32_t *sat)
+{
+    if (shift <= -64) {
+        /* Rounding the sign bit always produces 0. */
+        if (round) {
+            return 0;
+        }
+        return src >> 63;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < 64) {
+        int64_t val = src << shift;
+        if (!sat || val >> shift == src) {
+            return val;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return src < 0 ? INT64_MIN : INT64_MAX;
+}
+
+static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
+                                   bool round, uint32_t *sat)
+{
+    if (shift <= -(64 + round)) {
+        return 0;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < 64) {
+        uint64_t val = src << shift;
+        if (!sat || val >> shift == src) {
+            return val;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return UINT64_MAX;
+}
+
+static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
+                                   bool round, uint32_t *sat)
+{
+    if (sat && src < 0) {
+        *sat = 1;
+        return 0;
+    }
+    return do_uqrshl_d(src, shift, round, sat);
+}
+
+int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
+int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
+int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
+int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
+
+/*
+ * 8 x 8 -> 16 vector polynomial multiply where the inputs are
+ * in the low 8 bits of each 16-bit element
+*/
+uint64_t pmull_h(uint64_t op1, uint64_t op2);
+/*
+ * 16 x 16 -> 32 vector polynomial multiply where the inputs are
+ * in the low 16 bits of each 32-bit element
+ */
+uint64_t pmull_w(uint64_t op1, uint64_t op2);
+
 #endif /* TARGET_ARM_VEC_INTERNALS_H */
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 6f7f28f9a46..5405e80197b 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -84,20 +84,6 @@ VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8      vd=%vd_sp
 VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8      vd=%vd_sp
 VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8      vd=%vd_dp
 
-# M-profile VLDR/VSTR to sysreg
-%vldr_sysreg 22:1 13:3
-%imm7_0x4 0:7 !function=times_4
-
-&vldr_sysreg rn reg imm a w p
-@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
-             reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
-
-# P=0 W=0 is SEE "Related encodings", so split into two patterns
-VLDR_sysreg  ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
-VLDR_sysreg  ---- 110 0 . . 1   1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
-VSTR_sysreg  ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
-VSTR_sysreg  ---- 110 0 . . 1   0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
-
 # We split the load/store multiple up into two patterns to avoid
 # overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
 # grouping:
@@ -205,6 +191,8 @@ VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
 
 # VCVTB and VCVTT to f16: Vd format is always vd_sp;
 # Vm format depends on size bit
+VCVT_b16_f32 ---- 1110 1.11 0011 .... 1001 t:1 1.0 .... \
+             vd=%vd_sp vm=%vm_sp
 VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
              vd=%vd_sp vm=%vm_sp
 VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 01b9d8557f7..24e3d820a5b 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -195,8 +195,10 @@ uint32_t vfp_get_fpscr(CPUARMState *env)
 
 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
 {
+    ARMCPU *cpu = env_archcpu(env);
+
     /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
-    if (!cpu_isar_feature(any_fp16, env_archcpu(env))) {
+    if (!cpu_isar_feature(any_fp16, cpu)) {
         val &= ~FPCR_FZ16;
     }
 
@@ -210,14 +212,16 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
          * because in v7A no-short-vector-support cores still had to
          * allow Stride/Len to be written with the only effect that
          * some insns are required to UNDEF if the guest sets them.
-         *
-         * TODO: if M-profile MVE implemented, set LTPSIZE.
          */
         env->vfp.vec_len = extract32(val, 16, 3);
         env->vfp.vec_stride = extract32(val, 20, 2);
+    } else if (cpu_isar_feature(aa32_mve, cpu)) {
+        env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+                                     FPCR_LTPSIZE_LENGTH);
     }
 
-    if (arm_feature(env, ARM_FEATURE_NEON)) {
+    if (arm_feature(env, ARM_FEATURE_NEON) ||
+        cpu_isar_feature(aa32_mve, cpu)) {
         /*
          * The bit we set within fpscr_q is arbitrary; the register as a
          * whole being zero/non-zero is what counts.
@@ -408,6 +412,18 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
     return float64_to_float32(x, &env->vfp.fp_status);
 }
 
+uint32_t HELPER(bfcvt)(float32 x, void *status)
+{
+    return float32_to_bfloat16(x, status);
+}
+
+uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
+{
+    bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
+    bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
+    return deposit32(lo, 16, 16, hi);
+}
+
 /*
  * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
  * must always round-to-nearest; the AArch64 ones honour the FPSCR
@@ -655,7 +671,9 @@ uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
         float16 nan = f16;
         if (float16_is_signaling_nan(f16, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float16_silence_nan(f16, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float16_silence_nan(f16, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan =  float16_default_nan(fpst);
@@ -703,7 +721,9 @@ float32 HELPER(recpe_f32)(float32 input, void *fpstp)
         float32 nan = f32;
         if (float32_is_signaling_nan(f32, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float32_silence_nan(f32, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float32_silence_nan(f32, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan =  float32_default_nan(fpst);
@@ -751,7 +771,9 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
         float64 nan = f64;
         if (float64_is_signaling_nan(f64, fpst)) {
             float_raise(float_flag_invalid, fpst);
-            nan = float64_silence_nan(f64, fpst);
+            if (!fpst->default_nan_mode) {
+                nan = float64_silence_nan(f64, fpst);
+            }
         }
         if (fpst->default_nan_mode) {
             nan =  float64_default_nan(fpst);
@@ -850,7 +872,9 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
         float16 nan = f16;
         if (float16_is_signaling_nan(f16, s)) {
             float_raise(float_flag_invalid, s);
-            nan = float16_silence_nan(f16, s);
+            if (!s->default_nan_mode) {
+                nan = float16_silence_nan(f16, fpstp);
+            }
         }
         if (s->default_nan_mode) {
             nan =  float16_default_nan(s);
@@ -894,7 +918,9 @@ float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
         float32 nan = f32;
         if (float32_is_signaling_nan(f32, s)) {
             float_raise(float_flag_invalid, s);
-            nan = float32_silence_nan(f32, s);
+            if (!s->default_nan_mode) {
+                nan = float32_silence_nan(f32, fpstp);
+            }
         }
         if (s->default_nan_mode) {
             nan =  float32_default_nan(s);
@@ -937,7 +963,9 @@ float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
         float64 nan = f64;
         if (float64_is_signaling_nan(f64, s)) {
             float_raise(float_flag_invalid, s);
-            nan = float64_silence_nan(f64, s);
+            if (!s->default_nan_mode) {
+                nan = float64_silence_nan(f64, fpstp);
+            }
         }
         if (s->default_nan_mode) {
             nan =  float64_default_nan(s);
diff --git a/target/avr/Kconfig b/target/avr/Kconfig
new file mode 100644
index 00000000000..155592d3537
--- /dev/null
+++ b/target/avr/Kconfig
@@ -0,0 +1,2 @@
+config AVR
+    bool
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index 0f4596932ba..5d70e34dd54 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -184,17 +184,20 @@ static void avr_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     qemu_fprintf(f, "\n");
 }
 
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps avr_sysemu_ops = {
+    .get_phys_page_debug = avr_cpu_get_phys_page_debug,
+};
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps avr_tcg_ops = {
+static const struct TCGCPUOps avr_tcg_ops = {
     .initialize = avr_cpu_tcg_init,
     .synchronize_from_tb = avr_cpu_synchronize_from_tb,
     .cpu_exec_interrupt = avr_cpu_exec_interrupt,
     .tlb_fill = avr_cpu_tlb_fill,
-
-#ifndef CONFIG_USER_ONLY
     .do_interrupt = avr_cpu_do_interrupt,
-#endif /* !CONFIG_USER_ONLY */
 };
 
 static void avr_cpu_class_init(ObjectClass *oc, void *data)
@@ -212,11 +215,12 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
     cc->dump_state = avr_cpu_dump_state;
     cc->set_pc = avr_cpu_set_pc;
     cc->memory_rw_debug = avr_cpu_memory_rw_debug;
-    cc->get_phys_page_debug = avr_cpu_get_phys_page_debug;
-    cc->vmsd = &vms_avr_cpu;
+    dc->vmsd = &vms_avr_cpu;
+    cc->sysemu_ops = &avr_sysemu_ops;
     cc->disas_set_info = avr_cpu_disas_set_info;
     cc->gdb_read_register = avr_cpu_gdb_read_register;
     cc->gdb_write_register = avr_cpu_gdb_write_register;
+    cc->gdb_adjust_breakpoint = avr_cpu_gdb_adjust_breakpoint;
     cc->gdb_num_core_regs = 35;
     cc->gdb_core_xml_file = "avr-cpu.xml";
     cc->tcg_ops = &avr_tcg_ops;
diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index d148e8c75a4..dceacf3cd72 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -162,6 +162,7 @@ hwaddr avr_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int avr_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int avr_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 int avr_print_insn(bfd_vma addr, disassemble_info *info);
+vaddr avr_cpu_gdb_adjust_breakpoint(CPUState *cpu, vaddr addr);
 
 static inline int avr_feature(CPUAVRState *env, AVRFeature feature)
 {
@@ -174,7 +175,6 @@ static inline void set_avr_feature(CPUAVRState *env, int feature)
 }
 
 #define cpu_list avr_cpu_list
-#define cpu_signal_handler cpu_avr_signal_handler
 #define cpu_mmu_index avr_cpu_mmu_index
 
 static inline int avr_cpu_mmu_index(CPUAVRState *env, bool ifetch)
@@ -186,7 +186,6 @@ void avr_cpu_tcg_init(void);
 
 void avr_cpu_list(void);
 int cpu_avr_exec(CPUState *cpu);
-int cpu_avr_signal_handler(int host_signum, void *pinfo, void *puc);
 int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf,
                             int len, bool is_write);
 
diff --git a/target/avr/gdbstub.c b/target/avr/gdbstub.c
index c28ed67efe1..1c1b908c920 100644
--- a/target/avr/gdbstub.c
+++ b/target/avr/gdbstub.c
@@ -82,3 +82,16 @@ int avr_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 
     return 0;
 }
+
+vaddr avr_cpu_gdb_adjust_breakpoint(CPUState *cpu, vaddr addr)
+{
+    /*
+     * This is due to some strange GDB behavior
+     * Let's assume main has address 0x100:
+     * b main   - sets breakpoint at address 0x00000100 (code)
+     * b *0x100 - sets breakpoint at address 0x00800100 (data)
+     *
+     * Force all breakpoints into code space.
+     */
+    return addr % OFFSET_DATA;
+}
diff --git a/target/avr/helper.c b/target/avr/helper.c
index 35e10195940..981c29da453 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -188,11 +188,7 @@ void helper_break(CPUAVRState *env)
 
 void helper_wdr(CPUAVRState *env)
 {
-    CPUState *cs = env_cpu(env);
-
-    /* WD is not implemented yet, placeholder */
-    cs->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(cs);
+    qemu_log_mask(LOG_UNIMP, "WDG reset (not implemented)\n");
 }
 
 /*
diff --git a/target/avr/helper.h b/target/avr/helper.h
index 8e1ae7fda03..4d02e648fae 100644
--- a/target/avr/helper.h
+++ b/target/avr/helper.h
@@ -19,10 +19,10 @@
  */
 
 DEF_HELPER_1(wdr, void, env)
-DEF_HELPER_1(debug, void, env)
-DEF_HELPER_1(break, void, env)
-DEF_HELPER_1(sleep, void, env)
-DEF_HELPER_1(unsupported, void, env)
+DEF_HELPER_1(debug, noreturn, env)
+DEF_HELPER_1(break, noreturn, env)
+DEF_HELPER_1(sleep, noreturn, env)
+DEF_HELPER_1(unsupported, noreturn, env)
 DEF_HELPER_3(outb, void, env, i32, i32)
 DEF_HELPER_2(inb, tl, env, i32)
 DEF_HELPER_3(fullwr, void, env, i32, i32)
diff --git a/target/avr/machine.c b/target/avr/machine.c
index de264f57c33..16f7a3e031d 100644
--- a/target/avr/machine.c
+++ b/target/avr/machine.c
@@ -98,8 +98,8 @@ static const VMStateInfo vms_eind = {
 
 const VMStateDescription vms_avr_cpu = {
     .name = "cpu",
-    .version_id = 0,
-    .minimum_version_id = 0,
+    .version_id = 1,
+    .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(env.pc_w, AVRCPU),
         VMSTATE_UINT32(env.sp, AVRCPU),
diff --git a/target/avr/translate.c b/target/avr/translate.c
index 850c5941d95..af8a3e0f9ce 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -70,17 +70,15 @@ static const char reg_names[NUMBER_OF_CPU_REGISTERS][8] = {
 };
 #define REG(x) (cpu_r[x])
 
-enum {
-    DISAS_EXIT   = DISAS_TARGET_0,  /* We want return to the cpu main loop.  */
-    DISAS_LOOKUP = DISAS_TARGET_1,  /* We have a variable condition exit.  */
-    DISAS_CHAIN  = DISAS_TARGET_2,  /* We have a single condition exit.  */
-};
+#define DISAS_EXIT   DISAS_TARGET_0  /* We want return to the cpu main loop.  */
+#define DISAS_LOOKUP DISAS_TARGET_1  /* We have a variable condition exit.  */
+#define DISAS_CHAIN  DISAS_TARGET_2  /* We have a single condition exit.  */
 
 typedef struct DisasContext DisasContext;
 
 /* This is the state at translation time. */
 struct DisasContext {
-    TranslationBlock *tb;
+    DisasContextBase base;
 
     CPUAVRState *env;
     CPUState *cs;
@@ -90,8 +88,6 @@ struct DisasContext {
 
     /* Routine used to access memory */
     int memidx;
-    int bstate;
-    int singlestep;
 
     /*
      * some AVR instructions can make the following instruction to be skipped
@@ -106,7 +102,7 @@ struct DisasContext {
      * used in the following manner (sketch)
      *
      * TCGLabel *skip_label = NULL;
-     * if (ctx.skip_cond != TCG_COND_NEVER) {
+     * if (ctx->skip_cond != TCG_COND_NEVER) {
      *     skip_label = gen_new_label();
      *     tcg_gen_brcond_tl(skip_cond, skip_var0, skip_var1, skip_label);
      * }
@@ -116,7 +112,7 @@ struct DisasContext {
      *     free_skip_var0 = false;
      * }
      *
-     * translate(&ctx);
+     * translate(ctx);
      *
      * if (skip_label) {
      *     gen_set_label(skip_label);
@@ -191,7 +187,7 @@ static bool avr_have_feature(DisasContext *ctx, int feature)
 {
     if (!avr_feature(ctx->env, feature)) {
         gen_helper_unsupported(cpu_env);
-        ctx->bstate = DISAS_NORETURN;
+        ctx->base.is_jmp = DISAS_NORETURN;
         return false;
     }
     return true;
@@ -1011,13 +1007,13 @@ static void gen_jmp_ez(DisasContext *ctx)
 {
     tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
     tcg_gen_or_tl(cpu_pc, cpu_pc, cpu_eind);
-    ctx->bstate = DISAS_LOOKUP;
+    ctx->base.is_jmp = DISAS_LOOKUP;
 }
 
 static void gen_jmp_z(DisasContext *ctx)
 {
     tcg_gen_deposit_tl(cpu_pc, cpu_r[30], cpu_r[31], 8, 8);
-    ctx->bstate = DISAS_LOOKUP;
+    ctx->base.is_jmp = DISAS_LOOKUP;
 }
 
 static void gen_push_ret(DisasContext *ctx, int ret)
@@ -1083,18 +1079,17 @@ static void gen_pop_ret(DisasContext *ctx, TCGv ret)
 
 static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 {
-    TranslationBlock *tb = ctx->tb;
+    const TranslationBlock *tb = ctx->base.tb;
 
-    if (ctx->singlestep == 0) {
+    if (translator_use_goto_tb(&ctx->base, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_i32(cpu_pc, dest);
         tcg_gen_exit_tb(tb, n);
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
-        gen_helper_debug(cpu_env);
-        tcg_gen_exit_tb(NULL, 0);
+        tcg_gen_lookup_and_goto_ptr();
     }
-    ctx->bstate = DISAS_NORETURN;
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 /*
@@ -1254,7 +1249,7 @@ static bool trans_RET(DisasContext *ctx, arg_RET *a)
 {
     gen_pop_ret(ctx, cpu_pc);
 
-    ctx->bstate = DISAS_LOOKUP;
+    ctx->base.is_jmp = DISAS_LOOKUP;
     return true;
 }
 
@@ -1272,7 +1267,7 @@ static bool trans_RETI(DisasContext *ctx, arg_RETI *a)
     tcg_gen_movi_tl(cpu_If, 1);
 
     /* Need to return to main loop to re-evaluate interrupts.  */
-    ctx->bstate = DISAS_EXIT;
+    ctx->base.is_jmp = DISAS_EXIT;
     return true;
 }
 
@@ -1484,7 +1479,7 @@ static bool trans_BRBC(DisasContext *ctx, arg_BRBC *a)
     gen_goto_tb(ctx, 0, ctx->npc + a->imm);
     gen_set_label(not_taken);
 
-    ctx->bstate = DISAS_CHAIN;
+    ctx->base.is_jmp = DISAS_CHAIN;
     return true;
 }
 
@@ -1533,7 +1528,7 @@ static bool trans_BRBS(DisasContext *ctx, arg_BRBS *a)
     gen_goto_tb(ctx, 0, ctx->npc + a->imm);
     gen_set_label(not_taken);
 
-    ctx->bstate = DISAS_CHAIN;
+    ctx->base.is_jmp = DISAS_CHAIN;
     return true;
 }
 
@@ -1610,7 +1605,7 @@ static TCGv gen_get_zaddr(void)
  */
 static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
 {
-    if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
+    if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
         gen_helper_fullwr(cpu_env, data, addr);
     } else {
         tcg_gen_qemu_st8(data, addr, MMU_DATA_IDX); /* mem[addr] = data */
@@ -1619,7 +1614,7 @@ static void gen_data_store(DisasContext *ctx, TCGv data, TCGv addr)
 
 static void gen_data_load(DisasContext *ctx, TCGv data, TCGv addr)
 {
-    if (ctx->tb->flags & TB_FLAGS_FULL_ACCESS) {
+    if (ctx->base.tb->flags & TB_FLAGS_FULL_ACCESS) {
         gen_helper_fullrd(data, cpu_env, addr);
     } else {
         tcg_gen_qemu_ld8u(data, addr, MMU_DATA_IDX); /* data = mem[addr] */
@@ -2793,7 +2788,7 @@ static bool trans_BREAK(DisasContext *ctx, arg_BREAK *a)
 #ifdef BREAKPOINT_ON_BREAK
     tcg_gen_movi_tl(cpu_pc, ctx->npc - 1);
     gen_helper_debug(cpu_env);
-    ctx->bstate = DISAS_EXIT;
+    ctx->base.is_jmp = DISAS_EXIT;
 #else
     /* NOP */
 #endif
@@ -2819,7 +2814,7 @@ static bool trans_NOP(DisasContext *ctx, arg_NOP *a)
 static bool trans_SLEEP(DisasContext *ctx, arg_SLEEP *a)
 {
     gen_helper_sleep(cpu_env);
-    ctx->bstate = DISAS_NORETURN;
+    ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
 
@@ -2850,7 +2845,7 @@ static void translate(DisasContext *ctx)
 
     if (!decode_insn(ctx, opcode)) {
         gen_helper_unsupported(cpu_env);
-        ctx->bstate = DISAS_NORETURN;
+        ctx->base.is_jmp = DISAS_NORETURN;
     }
 }
 
@@ -2899,112 +2894,103 @@ static bool canonicalize_skip(DisasContext *ctx)
     return true;
 }
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+static void avr_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUAVRState *env = cs->env_ptr;
-    DisasContext ctx = {
-        .tb = tb,
-        .cs = cs,
-        .env = env,
-        .memidx = 0,
-        .bstate = DISAS_NEXT,
-        .skip_cond = TCG_COND_NEVER,
-        .singlestep = cs->singlestep_enabled,
-    };
-    target_ulong pc_start = tb->pc / 2;
-    int num_insns = 0;
-
-    if (tb->flags & TB_FLAGS_FULL_ACCESS) {
+    uint32_t tb_flags = ctx->base.tb->flags;
+
+    ctx->cs = cs;
+    ctx->env = env;
+    ctx->npc = ctx->base.pc_first / 2;
+
+    ctx->skip_cond = TCG_COND_NEVER;
+    if (tb_flags & TB_FLAGS_SKIP) {
+        ctx->skip_cond = TCG_COND_ALWAYS;
+        ctx->skip_var0 = cpu_skip;
+    }
+
+    if (tb_flags & TB_FLAGS_FULL_ACCESS) {
         /*
          * This flag is set by ST/LD instruction we will regenerate it ONLY
          * with mem/cpu memory access instead of mem access
          */
-        max_insns = 1;
-    }
-    if (ctx.singlestep) {
-        max_insns = 1;
+        ctx->base.max_insns = 1;
     }
+}
 
-    gen_tb_start(tb);
+static void avr_tr_tb_start(DisasContextBase *db, CPUState *cs)
+{
+}
 
-    ctx.npc = pc_start;
-    if (tb->flags & TB_FLAGS_SKIP) {
-        ctx.skip_cond = TCG_COND_ALWAYS;
-        ctx.skip_var0 = cpu_skip;
-    }
+static void avr_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
-    do {
-        TCGLabel *skip_label = NULL;
+    tcg_gen_insn_start(ctx->npc);
+}
 
-        /* translate current instruction */
-        tcg_gen_insn_start(ctx.npc);
-        num_insns++;
+static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    TCGLabel *skip_label = NULL;
 
-        /*
-         * this is due to some strange GDB behavior
-         * let's assume main has address 0x100
-         * b main   - sets breakpoint at address 0x00000100 (code)
-         * b *0x100 - sets breakpoint at address 0x00800100 (data)
-         */
-        if (unlikely(!ctx.singlestep &&
-                (cpu_breakpoint_test(cs, OFFSET_CODE + ctx.npc * 2, BP_ANY) ||
-                 cpu_breakpoint_test(cs, OFFSET_DATA + ctx.npc * 2, BP_ANY)))) {
-            canonicalize_skip(&ctx);
-            tcg_gen_movi_tl(cpu_pc, ctx.npc);
-            gen_helper_debug(cpu_env);
-            goto done_generating;
+    /* Conditionally skip the next instruction, if indicated.  */
+    if (ctx->skip_cond != TCG_COND_NEVER) {
+        skip_label = gen_new_label();
+        if (ctx->skip_var0 == cpu_skip) {
+            /*
+             * Copy cpu_skip so that we may zero it before the branch.
+             * This ensures that cpu_skip is non-zero after the label
+             * if and only if the skipped insn itself sets a skip.
+             */
+            ctx->free_skip_var0 = true;
+            ctx->skip_var0 = tcg_temp_new();
+            tcg_gen_mov_tl(ctx->skip_var0, cpu_skip);
+            tcg_gen_movi_tl(cpu_skip, 0);
         }
-
-        /* Conditionally skip the next instruction, if indicated.  */
-        if (ctx.skip_cond != TCG_COND_NEVER) {
-            skip_label = gen_new_label();
-            if (ctx.skip_var0 == cpu_skip) {
-                /*
-                 * Copy cpu_skip so that we may zero it before the branch.
-                 * This ensures that cpu_skip is non-zero after the label
-                 * if and only if the skipped insn itself sets a skip.
-                 */
-                ctx.free_skip_var0 = true;
-                ctx.skip_var0 = tcg_temp_new();
-                tcg_gen_mov_tl(ctx.skip_var0, cpu_skip);
-                tcg_gen_movi_tl(cpu_skip, 0);
-            }
-            if (ctx.skip_var1 == NULL) {
-                tcg_gen_brcondi_tl(ctx.skip_cond, ctx.skip_var0, 0, skip_label);
-            } else {
-                tcg_gen_brcond_tl(ctx.skip_cond, ctx.skip_var0,
-                                  ctx.skip_var1, skip_label);
-                ctx.skip_var1 = NULL;
-            }
-            if (ctx.free_skip_var0) {
-                tcg_temp_free(ctx.skip_var0);
-                ctx.free_skip_var0 = false;
-            }
-            ctx.skip_cond = TCG_COND_NEVER;
-            ctx.skip_var0 = NULL;
+        if (ctx->skip_var1 == NULL) {
+            tcg_gen_brcondi_tl(ctx->skip_cond, ctx->skip_var0, 0, skip_label);
+        } else {
+            tcg_gen_brcond_tl(ctx->skip_cond, ctx->skip_var0,
+                              ctx->skip_var1, skip_label);
+            ctx->skip_var1 = NULL;
         }
+        if (ctx->free_skip_var0) {
+            tcg_temp_free(ctx->skip_var0);
+            ctx->free_skip_var0 = false;
+        }
+        ctx->skip_cond = TCG_COND_NEVER;
+        ctx->skip_var0 = NULL;
+    }
 
-        translate(&ctx);
+    translate(ctx);
 
-        if (skip_label) {
-            canonicalize_skip(&ctx);
-            gen_set_label(skip_label);
-            if (ctx.bstate == DISAS_NORETURN) {
-                ctx.bstate = DISAS_CHAIN;
-            }
+    ctx->base.pc_next = ctx->npc * 2;
+
+    if (skip_label) {
+        canonicalize_skip(ctx);
+        gen_set_label(skip_label);
+        if (ctx->base.is_jmp == DISAS_NORETURN) {
+            ctx->base.is_jmp = DISAS_CHAIN;
         }
-    } while (ctx.bstate == DISAS_NEXT
-             && num_insns < max_insns
-             && (ctx.npc - pc_start) * 2 < TARGET_PAGE_SIZE - 4
-             && !tcg_op_buf_full());
+    }
+
+    if (ctx->base.is_jmp == DISAS_NEXT) {
+        target_ulong page_first = ctx->base.pc_first & TARGET_PAGE_MASK;
 
-    if (tb->cflags & CF_LAST_IO) {
-        gen_io_end();
+        if ((ctx->base.pc_next - page_first) >= TARGET_PAGE_SIZE - 4) {
+            ctx->base.is_jmp = DISAS_TOO_MANY;
+        }
     }
+}
 
-    bool nonconst_skip = canonicalize_skip(&ctx);
+static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    bool nonconst_skip = canonicalize_skip(ctx);
 
-    switch (ctx.bstate) {
+    switch (ctx->base.is_jmp) {
     case DISAS_NORETURN:
         assert(!nonconst_skip);
         break;
@@ -3013,45 +2999,41 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
     case DISAS_CHAIN:
         if (!nonconst_skip) {
             /* Note gen_goto_tb checks singlestep.  */
-            gen_goto_tb(&ctx, 1, ctx.npc);
+            gen_goto_tb(ctx, 1, ctx->npc);
             break;
         }
-        tcg_gen_movi_tl(cpu_pc, ctx.npc);
+        tcg_gen_movi_tl(cpu_pc, ctx->npc);
         /* fall through */
     case DISAS_LOOKUP:
-        if (!ctx.singlestep) {
-            tcg_gen_lookup_and_goto_ptr();
-            break;
-        }
-        /* fall through */
+        tcg_gen_lookup_and_goto_ptr();
+        break;
     case DISAS_EXIT:
-        if (ctx.singlestep) {
-            gen_helper_debug(cpu_env);
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
         break;
     default:
         g_assert_not_reached();
     }
+}
 
-done_generating:
-    gen_tb_end(tb, num_insns);
-
-    tb->size = (ctx.npc - pc_start) * 2;
-    tb->icount = num_insns;
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(tb->pc)) {
-        FILE *fd;
-        fd = qemu_log_lock();
-        qemu_log("IN: %s\n", lookup_symbol(tb->pc));
-        log_target_disas(cs, tb->pc, tb->size);
-        qemu_log("\n");
-        qemu_log_unlock(fd);
-    }
-#endif
+static void avr_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
+{
+    qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
+    log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
+}
+
+static const TranslatorOps avr_tr_ops = {
+    .init_disas_context = avr_tr_init_disas_context,
+    .tb_start           = avr_tr_tb_start,
+    .insn_start         = avr_tr_insn_start,
+    .translate_insn     = avr_tr_translate_insn,
+    .tb_stop            = avr_tr_tb_stop,
+    .disas_log          = avr_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc = { };
+    translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns);
 }
 
 void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
diff --git a/target/cheri-common/cheri-translate-utils.h b/target/cheri-common/cheri-translate-utils.h
index 7d1f9f96191..12ad8a5cf29 100644
--- a/target/cheri-common/cheri-translate-utils.h
+++ b/target/cheri-common/cheri-translate-utils.h
@@ -128,9 +128,9 @@ TCGv_i64 cpu_reg(DisasContext *s, int reg);
 
 #elif defined(TARGET_RISCV)
 
-#define DDC_ENV_OFFSET offsetof(CPUArchState, DDC)
+#define DDC_ENV_OFFSET offsetof(CPUArchState, ddc)
 #define target_get_gpr_global(ctx, reg) (assert(0), (TCGv)NULL)
-#define target_get_gpr(ctx, t, reg) gen_get_gpr((TCGv)t, reg)
+#define target_get_gpr(ctx, t, reg) gen_get_gpr(ctx, (TCGv)t, reg)
     static inline void _gen_set_gpr(DisasContext *ctx, int reg_num_dst, TCGv t,
                                     bool clear_pesbt);
 #define target_set_gpr(ctx, reg, t) _gen_set_gpr(ctx, reg, (TCGv)t, false)
diff --git a/target/cheri-common/cheri_tagmem.c b/target/cheri-common/cheri_tagmem.c
index 8c803f0f203..63d362657ea 100644
--- a/target/cheri-common/cheri_tagmem.c
+++ b/target/cheri-common/cheri_tagmem.c
@@ -117,7 +117,14 @@ static inline size_t num_tagblocks(RAMBlock* ram)
 {
     uint64_t memory_size = memory_region_size(ram->mr);
     size_t result = DIV_ROUND_UP(memory_size, CHERI_CAP_SIZE * CAP_TAGBLK_SIZE);
-    assert(result == (memory_size / CHERI_CAP_SIZE) >> CAP_TAGBLK_SHFT);
+    if (memory_size != result * CHERI_CAP_SIZE * CAP_TAGBLK_SIZE) {
+        warn_report_once(
+            "WARNING: memory region %s size %" PRIu64
+            " is not a multiple of tag block size %d\r",
+            memory_region_name(ram->mr),
+            memory_size,
+            CHERI_CAP_SIZE * CAP_TAGBLK_SIZE);
+    }
     return result;
 }
 
diff --git a/target/cheri-common/cpu_cheri.h b/target/cheri-common/cpu_cheri.h
index 53943b02085..5b930082491 100644
--- a/target/cheri-common/cpu_cheri.h
+++ b/target/cheri-common/cpu_cheri.h
@@ -126,12 +126,12 @@ static inline bool cheri_cap_perms_valid_for_exec(const cap_register_t *pcc)
 
 static inline void cheri_cpu_get_tb_cpu_state(const cap_register_t *pcc,
                                               const cap_register_t *ddc,
-                                              target_ulong *cs_base,
-                                              target_ulong *cs_top,
+                                              target_ulong *pcc_base,
+                                              target_ulong *pcc_top,
                                               uint32_t *cheri_flags)
 {
-    *cs_base = cap_get_base(pcc);
-    *cs_top = cap_get_top(pcc);
+    *pcc_base = cap_get_base(pcc);
+    *pcc_top = cap_get_top(pcc);
     cheri_debug_assert(*cheri_flags == 0);
     if (cap_has_capmode_flag(pcc))
         *cheri_flags |= TB_FLAG_CHERI_CAPMODE;
@@ -143,7 +143,7 @@ static inline void cheri_cpu_get_tb_cpu_state(const cap_register_t *pcc,
             *cheri_flags |= TB_FLAG_CHERI_PCC_READABLE;
     }
 
-    if (*cs_base == 0)
+    if (*pcc_base == 0)
         *cheri_flags |= TB_FLAG_CHERI_PCC_BASE_ZERO;
     if (cap_get_top_full(pcc) == CAP_MAX_TOP)
         *cheri_flags |= TB_FLAG_CHERI_PCC_TOP_MAX;
diff --git a/target/cheri-common/op_helper_cheri_common.c b/target/cheri-common/op_helper_cheri_common.c
index 211efc40de6..8ec597aaca4 100644
--- a/target/cheri-common/op_helper_cheri_common.c
+++ b/target/cheri-common/op_helper_cheri_common.c
@@ -210,7 +210,7 @@ void CHERI_HELPER_IMPL(cgetpccsetaddr(CPUArchState *env, uint32_t cd,
 
 void CHERI_HELPER_IMPL(cheri_invalidate_tags(CPUArchState *env,
                                              target_ulong vaddr,
-                                             TCGMemOpIdx oi))
+                                             MemOpIdx oi))
 {
     cheri_tag_invalidate(env, vaddr, memop_size(get_memop(oi)), GETPC(),
                          get_mmuidx(oi));
@@ -221,7 +221,7 @@ void CHERI_HELPER_IMPL(cheri_invalidate_tags(CPUArchState *env,
  * backend.
  */
 void CHERI_HELPER_IMPL(cheri_invalidate_tags_condition(
-    CPUArchState *env, target_ulong vaddr, TCGMemOpIdx oi, uint32_t cond))
+    CPUArchState *env, target_ulong vaddr, MemOpIdx oi, uint32_t cond))
 {
     if (cond) {
         cheri_tag_invalidate(env, vaddr, memop_size(get_memop(oi)), GETPC(),
diff --git a/target/cris/Kconfig b/target/cris/Kconfig
new file mode 100644
index 00000000000..3fdc309fbbd
--- /dev/null
+++ b/target/cris/Kconfig
@@ -0,0 +1,2 @@
+config CRIS
+    bool
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index ed983380fca..ed6c7813424 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -193,24 +193,32 @@ static void cris_cpu_initfn(Object *obj)
 #endif
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps cris_sysemu_ops = {
+    .get_phys_page_debug = cris_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps crisv10_tcg_ops = {
+static const struct TCGCPUOps crisv10_tcg_ops = {
     .initialize = cris_initialize_crisv10_tcg,
-    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
-    .tlb_fill = cris_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = cris_cpu_tlb_fill,
+    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
     .do_interrupt = crisv10_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
 
-static struct TCGCPUOps crisv32_tcg_ops = {
+static const struct TCGCPUOps crisv32_tcg_ops = {
     .initialize = cris_initialize_tcg,
-    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
-    .tlb_fill = cris_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = cris_cpu_tlb_fill,
+    .cpu_exec_interrupt = cris_cpu_exec_interrupt,
     .do_interrupt = cris_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
@@ -292,8 +300,8 @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = cris_cpu_gdb_read_register;
     cc->gdb_write_register = cris_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = cris_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_cris_cpu;
+    cc->sysemu_ops = &cris_sysemu_ops;
 #endif
 
     cc->gdb_num_core_regs = 49;
diff --git a/target/cris/cpu.h b/target/cris/cpu.h
index d3b64929096..b445b194ea5 100644
--- a/target/cris/cpu.h
+++ b/target/cris/cpu.h
@@ -185,12 +185,16 @@ struct CRISCPU {
 
 #ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_cris_cpu;
-#endif
 
 void cris_cpu_do_interrupt(CPUState *cpu);
 void crisv10_cpu_do_interrupt(CPUState *cpu);
 bool cris_cpu_exec_interrupt(CPUState *cpu, int int_req);
 
+bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr);
+#endif
+
 void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags);
 
 hwaddr cris_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
@@ -199,12 +203,6 @@ int crisv10_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int cris_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int cris_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_cris_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
-
 void cris_initialize_tcg(void);
 void cris_initialize_crisv10_tcg(void);
 
@@ -250,8 +248,6 @@ enum {
 #define CRIS_CPU_TYPE_NAME(name) (name CRIS_CPU_TYPE_SUFFIX)
 #define CPU_RESOLVING_TYPE TYPE_CRIS_CPU
 
-#define cpu_signal_handler cpu_cris_signal_handler
-
 /* MMU modes definitions */
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
@@ -259,10 +255,6 @@ static inline int cpu_mmu_index (CPUCRISState *env, bool ifetch)
 	return !!(env->pregs[PR_CCS] & U_FLAG);
 }
 
-bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr);
-
 /* Support function regs.  */
 #define SFR_RW_GC_CFG      0][0
 #define SFR_RW_MM_CFG      env->pregs[PR_SRS]][0
diff --git a/target/cris/helper.c b/target/cris/helper.c
index 911867f3b48..a0d6ecdcd39 100644
--- a/target/cris/helper.c
+++ b/target/cris/helper.c
@@ -39,36 +39,6 @@
 #define D_LOG(...) do { } while (0)
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-
-void cris_cpu_do_interrupt(CPUState *cs)
-{
-    CRISCPU *cpu = CRIS_CPU(cs);
-    CPUCRISState *env = &cpu->env;
-
-    cs->exception_index = -1;
-    env->pregs[PR_ERP] = env->pc;
-}
-
-void crisv10_cpu_do_interrupt(CPUState *cs)
-{
-    cris_cpu_do_interrupt(cs);
-}
-
-bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    CRISCPU *cpu = CRIS_CPU(cs);
-
-    cs->exception_index = 0xaa;
-    cpu->env.pregs[PR_EDA] = address;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
-
 static void cris_shift_ccs(CPUCRISState *env)
 {
     uint32_t ccs;
@@ -287,7 +257,6 @@ hwaddr cris_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     D(fprintf(stderr, "%s %x -> %x\n", __func__, addr, phy));
     return phy;
 }
-#endif
 
 bool cris_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
diff --git a/target/cris/helper.h b/target/cris/helper.h
index 20d21c43585..3abf6086828 100644
--- a/target/cris/helper.h
+++ b/target/cris/helper.h
@@ -1,4 +1,4 @@
-DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_2(raise_exception, noreturn, env, i32)
 DEF_HELPER_2(tlb_flush_pid, void, env, i32)
 DEF_HELPER_2(spc_write, void, env, i32)
 DEF_HELPER_1(rfe, void, env)
diff --git a/target/cris/meson.build b/target/cris/meson.build
index 67c3793c85e..c1e326d9504 100644
--- a/target/cris/meson.build
+++ b/target/cris/meson.build
@@ -2,13 +2,16 @@ cris_ss = ss.source_set()
 cris_ss.add(files(
   'cpu.c',
   'gdbstub.c',
-  'helper.c',
   'op_helper.c',
   'translate.c',
 ))
 
 cris_softmmu_ss = ss.source_set()
-cris_softmmu_ss.add(files('mmu.c', 'machine.c'))
+cris_softmmu_ss.add(files(
+  'helper.c',
+  'machine.c',
+  'mmu.c',
+))
 
 target_arch += {'cris': cris_ss}
 target_softmmu_arch += {'cris': cris_softmmu_ss}
diff --git a/target/cris/translate.c b/target/cris/translate.c
index 6dd5a267a61..59325b388a8 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -37,7 +37,6 @@
 
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 
@@ -52,11 +51,17 @@
 #define BUG() (gen_BUG(dc, __FILE__, __LINE__))
 #define BUG_ON(x) ({if (x) BUG();})
 
-/* is_jmp field values */
-#define DISAS_JUMP    DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
-#define DISAS_SWI     DISAS_TARGET_3
+/*
+ * Target-specific is_jmp field values
+ */
+/* Only pc was modified dynamically */
+#define DISAS_JUMP          DISAS_TARGET_0
+/* Cpu state was modified dynamically, including pc */
+#define DISAS_UPDATE        DISAS_TARGET_1
+/* Cpu state was modified dynamically, excluding pc -- use npc */
+#define DISAS_UPDATE_NEXT   DISAS_TARGET_2
+/* PC update for delayed branch, see cpustate_changed otherwise */
+#define DISAS_DBRANCH       DISAS_TARGET_3
 
 /* Used by the decoder.  */
 #define EXTRACT_FIELD(src, start, end) \
@@ -85,6 +90,8 @@ static TCGv env_pc;
 
 /* This is the state at translation time.  */
 typedef struct DisasContext {
+    DisasContextBase base;
+
     CRISCPU *cpu;
     target_ulong pc, ppc;
 
@@ -112,8 +119,6 @@ typedef struct DisasContext {
 
     int cc_x_uptodate;  /* 1 - ccs, 2 - known | X_FLAG. 0 not up-to-date.  */
     int flags_uptodate; /* Whether or not $ccs is up-to-date.  */
-    int flagx_known; /* Whether or not flags_x has the x flag known at
-                translation time.  */
     int flags_x;
 
     int clear_x; /* Clear x after this insn?  */
@@ -121,7 +126,6 @@ typedef struct DisasContext {
     int clear_locked_irq; /* Clear the irq lockout.  */
     int cpustate_changed;
     unsigned int tb_flags; /* tb dependent flags.  */
-    int is_jmp;
 
 #define JMP_NOJMP     0
 #define JMP_DIRECT    1
@@ -131,9 +135,6 @@ typedef struct DisasContext {
     uint32_t jmp_pc;
 
     int delayed_branch;
-
-    TranslationBlock *tb;
-    int singlestep_enabled;
 } DisasContext;
 
 static void gen_BUG(DisasContext *dc, const char *file, int line)
@@ -141,14 +142,15 @@ static void gen_BUG(DisasContext *dc, const char *file, int line)
     cpu_abort(CPU(dc->cpu), "%s:%d pc=%x\n", file, line, dc->pc);
 }
 
-static const char *regnames_v32[] =
+static const char * const regnames_v32[] =
 {
     "$r0", "$r1", "$r2", "$r3",
     "$r4", "$r5", "$r6", "$r7",
     "$r8", "$r9", "$r10", "$r11",
     "$r12", "$r13", "$sp", "$acr",
 };
-static const char *pregnames_v32[] =
+
+static const char * const pregnames_v32[] =
 {
     "$bz", "$vr", "$pid", "$srs",
     "$wz", "$exs", "$eda", "$mof",
@@ -157,7 +159,7 @@ static const char *pregnames_v32[] =
 };
 
 /* We need this table to handle preg-moves with implicit width.  */
-static int preg_sizes[] = {
+static const int preg_sizes[] = {
     1, /* bz.  */
     1, /* vr.  */
     4, /* pid.  */
@@ -372,66 +374,26 @@ static inline void t_gen_add_flag(TCGv d, int flag)
 
 static inline void t_gen_addx_carry(DisasContext *dc, TCGv d)
 {
-    if (dc->flagx_known) {
-        if (dc->flags_x) {
-            TCGv c;
-            
-            c = tcg_temp_new();
-            t_gen_mov_TN_preg(c, PR_CCS);
-            /* C flag is already at bit 0.  */
-            tcg_gen_andi_tl(c, c, C_FLAG);
-            tcg_gen_add_tl(d, d, c);
-            tcg_temp_free(c);
-        }
-    } else {
-        TCGv x, c;
+    if (dc->flags_x) {
+        TCGv c = tcg_temp_new();
 
-        x = tcg_temp_new();
-        c = tcg_temp_new();
-        t_gen_mov_TN_preg(x, PR_CCS);
-        tcg_gen_mov_tl(c, x);
-
-        /* Propagate carry into d if X is set. Branch free.  */
+        t_gen_mov_TN_preg(c, PR_CCS);
+        /* C flag is already at bit 0.  */
         tcg_gen_andi_tl(c, c, C_FLAG);
-        tcg_gen_andi_tl(x, x, X_FLAG);
-        tcg_gen_shri_tl(x, x, 4);
-
-        tcg_gen_and_tl(x, x, c);
-        tcg_gen_add_tl(d, d, x);
-        tcg_temp_free(x);
+        tcg_gen_add_tl(d, d, c);
         tcg_temp_free(c);
     }
 }
 
 static inline void t_gen_subx_carry(DisasContext *dc, TCGv d)
 {
-    if (dc->flagx_known) {
-        if (dc->flags_x) {
-            TCGv c;
-            
-            c = tcg_temp_new();
-            t_gen_mov_TN_preg(c, PR_CCS);
-            /* C flag is already at bit 0.  */
-            tcg_gen_andi_tl(c, c, C_FLAG);
-            tcg_gen_sub_tl(d, d, c);
-            tcg_temp_free(c);
-        }
-    } else {
-        TCGv x, c;
+    if (dc->flags_x) {
+        TCGv c = tcg_temp_new();
 
-        x = tcg_temp_new();
-        c = tcg_temp_new();
-        t_gen_mov_TN_preg(x, PR_CCS);
-        tcg_gen_mov_tl(c, x);
-
-        /* Propagate carry into d if X is set. Branch free.  */
+        t_gen_mov_TN_preg(c, PR_CCS);
+        /* C flag is already at bit 0.  */
         tcg_gen_andi_tl(c, c, C_FLAG);
-        tcg_gen_andi_tl(x, x, X_FLAG);
-        tcg_gen_shri_tl(x, x, 4);
-
-        tcg_gen_and_tl(x, x, c);
-        tcg_gen_sub_tl(d, d, x);
-        tcg_temp_free(x);
+        tcg_gen_sub_tl(d, d, c);
         tcg_temp_free(c);
     }
 }
@@ -479,9 +441,9 @@ static inline void t_gen_swapw(TCGv d, TCGv s)
    ((T0 >> 5) & 0x02020202) |
    ((T0 >> 7) & 0x01010101));
  */
-static inline void t_gen_swapr(TCGv d, TCGv s)
+static void t_gen_swapr(TCGv d, TCGv s)
 {
-    struct {
+    static const struct {
         int shift; /* LSL when positive, LSR when negative.  */
         uint32_t mask;
     } bitrev[] = {
@@ -517,25 +479,9 @@ static inline void t_gen_swapr(TCGv d, TCGv s)
     tcg_temp_free(org_s);
 }
 
-static void t_gen_cc_jmp(TCGv pc_true, TCGv pc_false)
+static bool use_goto_tb(DisasContext *dc, target_ulong dest)
 {
-    TCGLabel *l1 = gen_new_label();
-
-    /* Conditional jmp.  */
-    tcg_gen_mov_tl(env_pc, pc_false);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, l1);
-    tcg_gen_mov_tl(env_pc, pc_true);
-    gen_set_label(l1);
-}
-
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
-{
-#ifndef CONFIG_USER_ONLY
-    return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
-           (dc->ppc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    return translator_use_goto_tb(&dc->base, dest);
 }
 
 static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
@@ -543,20 +489,18 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
     if (use_goto_tb(dc, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_tl(env_pc, dest);
-        tcg_gen_exit_tb(dc->tb, n);
+        tcg_gen_exit_tb(dc->base.tb, n);
     } else {
         tcg_gen_movi_tl(env_pc, dest);
-        tcg_gen_exit_tb(NULL, 0);
+        tcg_gen_lookup_and_goto_ptr();
     }
 }
 
 static inline void cris_clear_x_flag(DisasContext *dc)
 {
-    if (dc->flagx_known && dc->flags_x) {
+    if (dc->flags_x) {
         dc->flags_uptodate = 0;
     }
-
-    dc->flagx_known = 1;
     dc->flags_x = 0;
 }
 
@@ -641,12 +585,10 @@ static void cris_evaluate_flags(DisasContext *dc)
         break;
     }
 
-    if (dc->flagx_known) {
-        if (dc->flags_x) {
-            tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG);
-        } else if (dc->cc_op == CC_OP_FLAGS) {
-            tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG);
-        }
+    if (dc->flags_x) {
+        tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], X_FLAG);
+    } else if (dc->cc_op == CC_OP_FLAGS) {
+        tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~X_FLAG);
     }
     dc->flags_uptodate = 1;
 }
@@ -681,16 +623,11 @@ static void cris_update_cc_op(DisasContext *dc, int op, int size)
 static inline void cris_update_cc_x(DisasContext *dc)
 {
     /* Save the x flag state at the time of the cc snapshot.  */
-    if (dc->flagx_known) {
-        if (dc->cc_x_uptodate == (2 | dc->flags_x)) {
-            return;
-        }
-        tcg_gen_movi_tl(cc_x, dc->flags_x);
-        dc->cc_x_uptodate = 2 | dc->flags_x;
-    } else {
-        tcg_gen_andi_tl(cc_x, cpu_PR[PR_CCS], X_FLAG);
-        dc->cc_x_uptodate = 1;
+    if (dc->cc_x_uptodate == (2 | dc->flags_x)) {
+        return;
     }
+    tcg_gen_movi_tl(cc_x, dc->flags_x);
+    dc->cc_x_uptodate = 2 | dc->flags_x;
 }
 
 /* Update cc prior to executing ALU op. Needs source operands untouched.  */
@@ -1142,7 +1079,7 @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
 
     /* Conditional writes. We only support the kind were X and P are known
        at translation time.  */
-    if (dc->flagx_known && dc->flags_x && (dc->tb_flags & P_FLAG)) {
+    if (dc->flags_x && (dc->tb_flags & P_FLAG)) {
         dc->postinc = 0;
         cris_evaluate_flags(dc);
         tcg_gen_ori_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], C_FLAG);
@@ -1151,7 +1088,7 @@ static void gen_store (DisasContext *dc, TCGv addr, TCGv val,
 
     tcg_gen_qemu_st_tl(val, addr, mem_index, MO_TE + ctz32(size));
 
-    if (dc->flagx_known && dc->flags_x) {
+    if (dc->flags_x) {
         cris_evaluate_flags(dc);
         tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~C_FLAG);
     }
@@ -1288,7 +1225,7 @@ static int dec_prep_alu_m(CPUCRISState *env, DisasContext *dc,
 #if DISAS_CRIS
 static const char *cc_name(int cc)
 {
-    static const char *cc_names[16] = {
+    static const char * const cc_names[16] = {
         "cc", "cs", "ne", "eq", "vc", "vs", "pl", "mi",
         "ls", "hi", "ge", "lt", "gt", "le", "a", "p"
     };
@@ -1738,8 +1675,8 @@ static int dec_addc_r(CPUCRISState *env, DisasContext *dc)
     LOG_DIS("addc $r%u, $r%u\n",
             dc->op1, dc->op2);
     cris_evaluate_flags(dc);
+
     /* Set for this insn.  */
-    dc->flagx_known = 1;
     dc->flags_x = X_FLAG;
 
     cris_cc_mask(dc, CC_MASK_NZVC);
@@ -2026,7 +1963,6 @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
     }
 
     if (flags & X_FLAG) {
-        dc->flagx_known = 1;
         if (set) {
             dc->flags_x = X_FLAG;
         } else {
@@ -2037,14 +1973,14 @@ static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
     /* Break the TB if any of the SPI flag changes.  */
     if (flags & (P_FLAG | S_FLAG)) {
         tcg_gen_movi_tl(env_pc, dc->pc + 2);
-        dc->is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_UPDATE;
         dc->cpustate_changed = 1;
     }
 
     /* For the I flag, only act on posedge.  */
     if ((flags & I_FLAG)) {
         tcg_gen_movi_tl(env_pc, dc->pc + 2);
-        dc->is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_UPDATE;
         dc->cpustate_changed = 1;
     }
 
@@ -2490,7 +2426,6 @@ static int dec_addc_mr(CPUCRISState *env, DisasContext *dc)
     cris_evaluate_flags(dc);
 
     /* Set for this insn.  */
-    dc->flagx_known = 1;
     dc->flags_x = X_FLAG;
 
     cris_alu_m_alloc_temps(t);
@@ -2877,6 +2812,7 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
                        -offsetof(CRISCPU, env) + offsetof(CPUState, halted));
         tcg_gen_movi_tl(env_pc, dc->pc + 2);
         t_gen_raise_exception(EXCP_HLT);
+        dc->base.is_jmp = DISAS_NORETURN;
         return 2;
     }
 
@@ -2886,14 +2822,16 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
         LOG_DIS("rfe\n");
         cris_evaluate_flags(dc);
         gen_helper_rfe(cpu_env);
-        dc->is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_UPDATE;
+        dc->cpustate_changed = true;
         break;
     case 5:
         /* rfn.  */
         LOG_DIS("rfn\n");
         cris_evaluate_flags(dc);
         gen_helper_rfn(cpu_env);
-        dc->is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_UPDATE;
+        dc->cpustate_changed = true;
         break;
     case 6:
         LOG_DIS("break %d\n", dc->op1);
@@ -2904,7 +2842,7 @@ static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
         /* Breaks start at 16 in the exception vector.  */
         t_gen_movi_env_TN(trap_vector, dc->op1 + 16);
         t_gen_raise_exception(EXCP_BREAK);
-        dc->is_jmp = DISAS_UPDATE;
+        dc->base.is_jmp = DISAS_NORETURN;
         break;
     default:
         printf("op2=%x\n", dc->op2);
@@ -2934,7 +2872,7 @@ static int dec_null(CPUCRISState *env, DisasContext *dc)
     return 2;
 }
 
-static struct decoder_info {
+static const struct decoder_info {
     struct {
         uint32_t bits;
         uint32_t mask;
@@ -3122,17 +3060,12 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
  *
  */
 
-/* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+static void cris_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
     CPUCRISState *env = cs->env_ptr;
+    uint32_t tb_flags = dc->base.tb->flags;
     uint32_t pc_start;
-    unsigned int insn_len;
-    struct DisasContext ctx;
-    struct DisasContext *dc = &ctx;
-    uint32_t page_start;
-    target_ulong npc;
-    int num_insns;
 
     if (env->pregs[PR_VR] == 32) {
         dc->decoder = crisv32_decoder;
@@ -3142,147 +3075,120 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
         dc->clear_locked_irq = 1;
     }
 
-    /* Odd PC indicates that branch is rexecuting due to exception in the
+    /*
+     * Odd PC indicates that branch is rexecuting due to exception in the
      * delayslot, like in real hw.
      */
-    pc_start = tb->pc & ~1;
-    dc->cpu = env_archcpu(env);
-    dc->tb = tb;
+    pc_start = dc->base.pc_first & ~1;
+    dc->base.pc_first = pc_start;
+    dc->base.pc_next = pc_start;
 
-    dc->is_jmp = DISAS_NEXT;
+    dc->cpu = env_archcpu(env);
     dc->ppc = pc_start;
     dc->pc = pc_start;
-    dc->singlestep_enabled = cs->singlestep_enabled;
     dc->flags_uptodate = 1;
-    dc->flagx_known = 1;
-    dc->flags_x = tb->flags & X_FLAG;
+    dc->flags_x = tb_flags & X_FLAG;
     dc->cc_x_uptodate = 0;
     dc->cc_mask = 0;
     dc->update_cc = 0;
     dc->clear_prefix = 0;
+    dc->cpustate_changed = 0;
 
     cris_update_cc_op(dc, CC_OP_FLAGS, 4);
     dc->cc_size_uptodate = -1;
 
     /* Decode TB flags.  */
-    dc->tb_flags = tb->flags & (S_FLAG | P_FLAG | U_FLAG \
-            | X_FLAG | PFIX_FLAG);
-    dc->delayed_branch = !!(tb->flags & 7);
+    dc->tb_flags = tb_flags & (S_FLAG | P_FLAG | U_FLAG | X_FLAG | PFIX_FLAG);
+    dc->delayed_branch = !!(tb_flags & 7);
     if (dc->delayed_branch) {
         dc->jmp = JMP_INDIRECT;
     } else {
         dc->jmp = JMP_NOJMP;
     }
+}
 
-    dc->cpustate_changed = 0;
-
-    page_start = pc_start & TARGET_PAGE_MASK;
-    num_insns = 0;
+static void cris_tr_tb_start(DisasContextBase *db, CPUState *cpu)
+{
+}
 
-    gen_tb_start(tb);
-    do {
-        tcg_gen_insn_start(dc->delayed_branch == 1
-                           ? dc->ppc | 1 : dc->pc);
-        num_insns++;
+static void cris_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
 
-        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
-            cris_evaluate_flags(dc);
-            tcg_gen_movi_tl(env_pc, dc->pc);
-            t_gen_raise_exception(EXCP_DEBUG);
-            dc->is_jmp = DISAS_UPDATE;
-            /* The address covered by the breakpoint must be included in
-               [tb->pc, tb->pc + tb->size) in order to for it to be
-               properly cleared -- thus we increment the PC here so that
-               the logic setting tb->size below does the right thing.  */
-            dc->pc += 2;
-            break;
-        }
+    tcg_gen_insn_start(dc->delayed_branch == 1 ? dc->ppc | 1 : dc->pc);
+}
 
-        /* Pretty disas.  */
-        LOG_DIS("%8.8x:\t", dc->pc);
+static void cris_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    CPUCRISState *env = cs->env_ptr;
+    unsigned int insn_len;
 
-        if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
-            gen_io_start();
-        }
-        dc->clear_x = 1;
+    /* Pretty disas.  */
+    LOG_DIS("%8.8x:\t", dc->pc);
 
-        insn_len = dc->decoder(env, dc);
-        dc->ppc = dc->pc;
-        dc->pc += insn_len;
-        if (dc->clear_x) {
-            cris_clear_x_flag(dc);
-        }
+    dc->clear_x = 1;
 
-        /* Check for delayed branches here. If we do it before
-           actually generating any host code, the simulator will just
-           loop doing nothing for on this program location.  */
-        if (dc->delayed_branch) {
-            dc->delayed_branch--;
-            if (dc->delayed_branch == 0) {
-                if (tb->flags & 7) {
-                    t_gen_movi_env_TN(dslot, 0);
-                }
-                if (dc->cpustate_changed || !dc->flagx_known
-                    || (dc->flags_x != (tb->flags & X_FLAG))) {
-                    cris_store_direct_jmp(dc);
-                }
+    insn_len = dc->decoder(env, dc);
+    dc->ppc = dc->pc;
+    dc->pc += insn_len;
+    dc->base.pc_next += insn_len;
 
-                if (dc->clear_locked_irq) {
-                    dc->clear_locked_irq = 0;
-                    t_gen_movi_env_TN(locked_irq, 0);
-                }
+    if (dc->base.is_jmp == DISAS_NORETURN) {
+        return;
+    }
 
-                if (dc->jmp == JMP_DIRECT_CC) {
-                    TCGLabel *l1 = gen_new_label();
-                    cris_evaluate_flags(dc);
-
-                    /* Conditional jmp.  */
-                    tcg_gen_brcondi_tl(TCG_COND_EQ,
-                               env_btaken, 0, l1);
-                    gen_goto_tb(dc, 1, dc->jmp_pc);
-                    gen_set_label(l1);
-                    gen_goto_tb(dc, 0, dc->pc);
-                    dc->is_jmp = DISAS_TB_JUMP;
-                    dc->jmp = JMP_NOJMP;
-                } else if (dc->jmp == JMP_DIRECT) {
-                    cris_evaluate_flags(dc);
-                    gen_goto_tb(dc, 0, dc->jmp_pc);
-                    dc->is_jmp = DISAS_TB_JUMP;
-                    dc->jmp = JMP_NOJMP;
-                } else {
-                    TCGv c = tcg_const_tl(dc->pc);
-                    t_gen_cc_jmp(env_btarget, c);
-                    tcg_temp_free(c);
-                    dc->is_jmp = DISAS_JUMP;
-                }
-                break;
-            }
-        }
+    if (dc->clear_x) {
+        cris_clear_x_flag(dc);
+    }
 
-        /* If we are rexecuting a branch due to exceptions on
-           delay slots don't break.  */
-        if (!(tb->pc & 1) && cs->singlestep_enabled) {
-            break;
-        }
-    } while (!dc->is_jmp && !dc->cpustate_changed
-            && !tcg_op_buf_full()
-            && !singlestep
-            && (dc->pc - page_start < TARGET_PAGE_SIZE)
-            && num_insns < max_insns);
+    /*
+     * All branches are delayed branches, handled immediately below.
+     * We don't expect to see odd combinations of exit conditions.
+     */
+    assert(dc->base.is_jmp == DISAS_NEXT || dc->cpustate_changed);
 
-    if (dc->clear_locked_irq) {
-        t_gen_movi_env_TN(locked_irq, 0);
+    if (dc->delayed_branch && --dc->delayed_branch == 0) {
+        dc->base.is_jmp = DISAS_DBRANCH;
+        return;
     }
 
-    npc = dc->pc;
+    if (dc->base.is_jmp != DISAS_NEXT) {
+        return;
+    }
 
     /* Force an update if the per-tb cpu state has changed.  */
-    if (dc->is_jmp == DISAS_NEXT
-        && (dc->cpustate_changed || !dc->flagx_known
-        || (dc->flags_x != (tb->flags & X_FLAG)))) {
-        dc->is_jmp = DISAS_UPDATE;
-        tcg_gen_movi_tl(env_pc, npc);
+    if (dc->cpustate_changed) {
+        dc->base.is_jmp = DISAS_UPDATE_NEXT;
+        return;
+    }
+
+    /*
+     * FIXME: Only the first insn in the TB should cross a page boundary.
+     * If we can detect the length of the next insn easily, we should.
+     * In the meantime, simply stop when we do cross.
+     */
+    if ((dc->pc ^ dc->base.pc_first) & TARGET_PAGE_MASK) {
+        dc->base.is_jmp = DISAS_TOO_MANY;
+    }
+}
+
+static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    DisasJumpType is_jmp = dc->base.is_jmp;
+    target_ulong npc = dc->pc;
+
+    if (is_jmp == DISAS_NORETURN) {
+        /* If we have a broken branch+delayslot sequence, it's too late. */
+        assert(dc->delayed_branch != 1);
+        return;
+    }
+
+    if (dc->clear_locked_irq) {
+        t_gen_movi_env_TN(locked_irq, 0);
     }
+
     /* Broken branch+delayslot sequence.  */
     if (dc->delayed_branch == 1) {
         /* Set env->dslot to the size of the branch insn.  */
@@ -3292,54 +3198,105 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 
     cris_evaluate_flags(dc);
 
-    if (unlikely(cs->singlestep_enabled)) {
-        if (dc->is_jmp == DISAS_NEXT) {
-            tcg_gen_movi_tl(env_pc, npc);
+    /* Evaluate delayed branch destination and fold to another is_jmp case. */
+    if (is_jmp == DISAS_DBRANCH) {
+        if (dc->base.tb->flags & 7) {
+            t_gen_movi_env_TN(dslot, 0);
         }
-        t_gen_raise_exception(EXCP_DEBUG);
-    } else {
-        switch (dc->is_jmp) {
-        case DISAS_NEXT:
-            gen_goto_tb(dc, 1, npc);
-            break;
-        default:
-        case DISAS_JUMP:
-        case DISAS_UPDATE:
-            /* indicate that the hash table must be used
-                   to find the next TB */
-            tcg_gen_exit_tb(NULL, 0);
+
+        switch (dc->jmp) {
+        case JMP_DIRECT:
+            npc = dc->jmp_pc;
+            is_jmp = dc->cpustate_changed ? DISAS_UPDATE_NEXT : DISAS_TOO_MANY;
             break;
-        case DISAS_SWI:
-        case DISAS_TB_JUMP:
-            /* nothing more to generate */
+
+        case JMP_DIRECT_CC:
+            /*
+             * Use a conditional branch if either taken or not-taken path
+             * can use goto_tb.  If neither can, then treat it as indirect.
+             */
+            if (likely(!dc->cpustate_changed)
+                && (use_goto_tb(dc, dc->jmp_pc) || use_goto_tb(dc, npc))) {
+                TCGLabel *not_taken = gen_new_label();
+
+                tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, not_taken);
+                gen_goto_tb(dc, 1, dc->jmp_pc);
+                gen_set_label(not_taken);
+
+                /* not-taken case handled below. */
+                is_jmp = DISAS_TOO_MANY;
+                break;
+            }
+            tcg_gen_movi_tl(env_btarget, dc->jmp_pc);
+            /* fall through */
+
+        case JMP_INDIRECT:
+            tcg_gen_movcond_tl(TCG_COND_NE, env_pc,
+                               env_btaken, tcg_constant_tl(0),
+                               env_btarget, tcg_constant_tl(npc));
+            is_jmp = dc->cpustate_changed ? DISAS_UPDATE : DISAS_JUMP;
+
+            /*
+             * We have now consumed btaken and btarget.  Hint to the
+             * tcg compiler that the writeback to env may be dropped.
+             */
+            tcg_gen_discard_tl(env_btaken);
+            tcg_gen_discard_tl(env_btarget);
             break;
+
+        default:
+            g_assert_not_reached();
         }
     }
-    gen_tb_end(tb, num_insns);
 
-    tb->size = dc->pc - pc_start;
-    tb->icount = num_insns;
+    switch (is_jmp) {
+    case DISAS_TOO_MANY:
+        gen_goto_tb(dc, 0, npc);
+        break;
+    case DISAS_UPDATE_NEXT:
+        tcg_gen_movi_tl(env_pc, npc);
+        /* fall through */
+    case DISAS_JUMP:
+        tcg_gen_lookup_and_goto_ptr();
+        break;
+    case DISAS_UPDATE:
+        /* Indicate that interupts must be re-evaluated before the next TB. */
+        tcg_gen_exit_tb(NULL, 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
 
-#ifdef DEBUG_DISAS
-#if !DISAS_CRIS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(pc_start)) {
-        FILE *logfile = qemu_log_lock();
-        qemu_log("--------------\n");
-        qemu_log("IN: %s\n", lookup_symbol(pc_start));
-        log_target_disas(cs, pc_start, dc->pc - pc_start);
-        qemu_log_unlock(logfile);
+static void cris_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
+{
+    if (!DISAS_CRIS) {
+        qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
+        log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size);
     }
-#endif
-#endif
+}
+
+static const TranslatorOps cris_tr_ops = {
+    .init_disas_context = cris_tr_init_disas_context,
+    .tb_start           = cris_tr_tb_start,
+    .insn_start         = cris_tr_insn_start,
+    .translate_insn     = cris_tr_translate_insn,
+    .tb_stop            = cris_tr_tb_stop,
+    .disas_log          = cris_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc;
+    translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns);
 }
 
 void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
     CRISCPU *cpu = CRIS_CPU(cs);
     CPUCRISState *env = &cpu->env;
-    const char **regnames;
-    const char **pregnames;
+    const char * const *regnames;
+    const char * const *pregnames;
     int i;
 
     if (!env) {
diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc
index f7cd67be374..f500e934475 100644
--- a/target/cris/translate_v10.c.inc
+++ b/target/cris/translate_v10.c.inc
@@ -21,7 +21,7 @@
 #include "qemu/osdep.h"
 #include "crisv10-decode.h"
 
-static const char *regnames_v10[] =
+static const char * const regnames_v10[] =
 {
     "$r0", "$r1", "$r2", "$r3",
     "$r4", "$r5", "$r6", "$r7",
@@ -29,7 +29,7 @@ static const char *regnames_v10[] =
     "$r12", "$r13", "$sp", "$pc",
 };
 
-static const char *pregnames_v10[] =
+static const char * const pregnames_v10[] =
 {
     "$bz", "$vr", "$p2", "$p3",
     "$wz", "$ccr", "$p6-prefix", "$mof",
@@ -38,7 +38,7 @@ static const char *pregnames_v10[] =
 };
 
 /* We need this table to handle preg-moves with implicit width.  */
-static int preg_sizes_v10[] = {
+static const int preg_sizes_v10[] = {
     1, /* bz.  */
     1, /* vr.  */
     1, /* pid. */
@@ -61,6 +61,7 @@ static inline void cris_illegal_insn(DisasContext *dc)
 {
     qemu_log_mask(LOG_GUEST_ERROR, "illegal insn at pc=%x\n", dc->pc);
     t_gen_raise_exception(EXCP_BREAK);
+    dc->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_store_v10_conditional(DisasContext *dc, TCGv addr, TCGv val,
@@ -105,9 +106,8 @@ static void gen_store_v10(DisasContext *dc, TCGv addr, TCGv val,
         cris_store_direct_jmp(dc);
     }
 
-    /* Conditional writes. We only support the kind were X is known
-       at translation time.  */
-    if (dc->flagx_known && dc->flags_x) {
+    /* Conditional writes. */
+    if (dc->flags_x) {
         gen_store_v10_conditional(dc, addr, val, size, mem_index);
         return;
     }
@@ -375,7 +375,6 @@ static unsigned int dec10_setclrf(DisasContext *dc)
 
 
     if (flags & X_FLAG) {
-        dc->flagx_known = 1;
         if (set)
             dc->flags_x = X_FLAG;
         else
@@ -1169,7 +1168,7 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
                     t_gen_mov_env_TN(trap_vector, c);
                     tcg_temp_free(c);
                     t_gen_raise_exception(EXCP_BREAK);
-                    dc->is_jmp = DISAS_UPDATE;
+                    dc->base.is_jmp = DISAS_NORETURN;
                     return insn_len;
                 }
                 LOG_DIS("%d: jump.%d %d r%d r%d\n", __LINE__, size,
@@ -1277,7 +1276,7 @@ static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc)
     if (dc->clear_prefix && dc->tb_flags & PFIX_FLAG) {
         dc->tb_flags &= ~PFIX_FLAG;
         tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~PFIX_FLAG);
-        if (dc->tb_flags != dc->tb->flags) {
+        if (dc->tb_flags != dc->base.tb->flags) {
             dc->cpustate_changed = 1;
         }
     }
diff --git a/target/hexagon/README b/target/hexagon/README
index b0b24350706..372e24747c9 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -1,9 +1,13 @@
 Hexagon is Qualcomm's very long instruction word (VLIW) digital signal
-processor(DSP).
+processor(DSP).  We also support Hexagon Vector eXtensions (HVX).  HVX
+is a wide vector coprocessor designed for high performance computer vision,
+image processing, machine learning, and other workloads.
 
 The following versions of the Hexagon core are supported
     Scalar core: v67
     https://developer.qualcomm.com/downloads/qualcomm-hexagon-v67-programmer-s-reference-manual
+    HVX extension: v66
+    https://developer.qualcomm.com/downloads/qualcomm-hexagon-v66-hvx-programmer-s-reference-manual
 
 We presented an overview of the project at the 2019 KVM Forum.
     https://kvmforum2019.sched.com/event/Tmwc/qemu-hexagon-automatic-translation-of-the-isa-manual-pseudcode-to-tiny-code-instructions-of-a-vliw-architecture-niccolo-izzo-revng-taylor-simpson-qualcomm-innovation-center
@@ -124,6 +128,71 @@ There are also cases where we brute force the TCG code generation.
 Instructions with multiple definitions are examples.  These require special
 handling because qemu helpers can only return a single value.
 
+For HVX vectors, the generator behaves slightly differently.  The wide vectors
+won't fit in a TCGv or TCGv_i64, so we pass TCGv_ptr variables to pass the
+address to helper functions.  Here's an example for an HVX vector-add-word
+istruction.
+    static void generate_V6_vaddw(
+                    CPUHexagonState *env,
+                    DisasContext *ctx,
+                    Insn *insn,
+                    Packet *pkt)
+    {
+        const int VdN = insn->regno[0];
+        const intptr_t VdV_off =
+            ctx_future_vreg_off(ctx, VdN, 1, true);
+        TCGv_ptr VdV = tcg_temp_local_new_ptr();
+        tcg_gen_addi_ptr(VdV, cpu_env, VdV_off);
+        const int VuN = insn->regno[1];
+        const intptr_t VuV_off =
+            vreg_src_off(ctx, VuN);
+        TCGv_ptr VuV = tcg_temp_local_new_ptr();
+        const int VvN = insn->regno[2];
+        const intptr_t VvV_off =
+            vreg_src_off(ctx, VvN);
+        TCGv_ptr VvV = tcg_temp_local_new_ptr();
+        tcg_gen_addi_ptr(VuV, cpu_env, VuV_off);
+        tcg_gen_addi_ptr(VvV, cpu_env, VvV_off);
+        TCGv slot = tcg_constant_tl(insn->slot);
+        gen_helper_V6_vaddw(cpu_env, VdV, VuV, VvV, slot);
+        tcg_temp_free(slot);
+        gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
+        ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
+        tcg_temp_free_ptr(VdV);
+        tcg_temp_free_ptr(VuV);
+        tcg_temp_free_ptr(VvV);
+    }
+
+Notice that we also generate a variable named <operand>_off for each operand of
+the instruction.  This makes it easy to override the instruction semantics with
+functions from tcg-op-gvec.h.  Here's the override for this instruction.
+    #define fGEN_TCG_V6_vaddw(SHORTCODE) \
+        tcg_gen_gvec_add(MO_32, VdV_off, VuV_off, VvV_off, \
+                         sizeof(MMVector), sizeof(MMVector))
+
+Finally, we notice that the override doesn't use the TCGv_ptr variables, so
+we don't generate them when an override is present.  Here is what we generate
+when the override is present.
+    static void generate_V6_vaddw(
+                    CPUHexagonState *env,
+                    DisasContext *ctx,
+                    Insn *insn,
+                    Packet *pkt)
+    {
+        const int VdN = insn->regno[0];
+        const intptr_t VdV_off =
+            ctx_future_vreg_off(ctx, VdN, 1, true);
+        const int VuN = insn->regno[1];
+        const intptr_t VuV_off =
+            vreg_src_off(ctx, VuN);
+        const int VvN = insn->regno[2];
+        const intptr_t VvV_off =
+            vreg_src_off(ctx, VvN);
+        fGEN_TCG_V6_vaddw({ fHIDE(int i;) fVFOREACH(32, i) { VdV.w[i] = VuV.w[i] + VvV.w[i] ; } });
+        gen_log_vreg_write(ctx, VdV_off, VdN, EXT_DFL, insn->slot, false);
+        ctx_log_vreg_write(ctx, VdN, EXT_DFL, false);
+    }
+
 In addition to instruction semantics, we use a generator to create the decode
 tree.  This generation is also a two step process.  The first step is to run
 target/hexagon/gen_dectree_import.c to produce
@@ -140,6 +209,7 @@ runtime information for each thread and contains stuff like the GPR and
 predicate registers.
 
 macros.h
+mmvec/macros.h
 
 The Hexagon arch lib relies heavily on macros for the instruction semantics.
 This is a great advantage for qemu because we can override them for different
@@ -203,6 +273,15 @@ During runtime, the following fields in CPUHexagonState (see cpu.h) are used
     pred_written          boolean indicating if predicate was written
     mem_log_stores        record of the stores (indexed by slot)
 
+For Hexagon Vector eXtensions (HVX), the following fields are used
+    VRegs                       Vector registers
+    future_VRegs                Registers to be stored during packet commit
+    tmp_VRegs                   Temporary registers *not* stored during commit
+    VRegs_updated               Mask of predicated vector writes
+    QRegs                       Q (vector predicate) registers
+    future_QRegs                Registers to be stored during packet commit
+    QRegs_updated               Mask of predicated vector writes
+
 *** Debugging ***
 
 You can turn on a lot of debugging by changing the HEX_DEBUG macro to 1 in
diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c
index 09de1248188..68a55b3bd40 100644
--- a/target/hexagon/arch.c
+++ b/target/hexagon/arch.c
@@ -27,6 +27,97 @@
 #define SF_MANTBITS    23
 #define float32_nan    make_float32(0xffffffff)
 
+/*
+ * These three tables are used by the cabacdecbin instruction
+ */
+const uint8_t rLPS_table_64x4[64][4] = {
+    {128, 176, 208, 240},
+    {128, 167, 197, 227},
+    {128, 158, 187, 216},
+    {123, 150, 178, 205},
+    {116, 142, 169, 195},
+    {111, 135, 160, 185},
+    {105, 128, 152, 175},
+    {100, 122, 144, 166},
+    {95, 116, 137, 158},
+    {90, 110, 130, 150},
+    {85, 104, 123, 142},
+    {81, 99, 117, 135},
+    {77, 94, 111, 128},
+    {73, 89, 105, 122},
+    {69, 85, 100, 116},
+    {66, 80, 95, 110},
+    {62, 76, 90, 104},
+    {59, 72, 86, 99},
+    {56, 69, 81, 94},
+    {53, 65, 77, 89},
+    {51, 62, 73, 85},
+    {48, 59, 69, 80},
+    {46, 56, 66, 76},
+    {43, 53, 63, 72},
+    {41, 50, 59, 69},
+    {39, 48, 56, 65},
+    {37, 45, 54, 62},
+    {35, 43, 51, 59},
+    {33, 41, 48, 56},
+    {32, 39, 46, 53},
+    {30, 37, 43, 50},
+    {29, 35, 41, 48},
+    {27, 33, 39, 45},
+    {26, 31, 37, 43},
+    {24, 30, 35, 41},
+    {23, 28, 33, 39},
+    {22, 27, 32, 37},
+    {21, 26, 30, 35},
+    {20, 24, 29, 33},
+    {19, 23, 27, 31},
+    {18, 22, 26, 30},
+    {17, 21, 25, 28},
+    {16, 20, 23, 27},
+    {15, 19, 22, 25},
+    {14, 18, 21, 24},
+    {14, 17, 20, 23},
+    {13, 16, 19, 22},
+    {12, 15, 18, 21},
+    {12, 14, 17, 20},
+    {11, 14, 16, 19},
+    {11, 13, 15, 18},
+    {10, 12, 15, 17},
+    {10, 12, 14, 16},
+    {9, 11, 13, 15},
+    {9, 11, 12, 14},
+    {8, 10, 12, 14},
+    {8, 9, 11, 13},
+    {7, 9, 11, 12},
+    {7, 9, 10, 12},
+    {7, 8, 10, 11},
+    {6, 8, 9, 11},
+    {6, 7, 9, 10},
+    {6, 7, 8, 9},
+    {2, 2, 2, 2}
+};
+
+const uint8_t AC_next_state_MPS_64[64] = {
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+    11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+    21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+    31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+    41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+    51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+    61, 62, 62, 63
+};
+
+
+const uint8_t AC_next_state_LPS_64[64] = {
+    0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
+    8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
+    16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
+    23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
+    29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
+    34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
+    37, 38, 38, 63
+};
+
 #define BITS_MASK_8 0x5555555555555555ULL
 #define PAIR_MASK_8 0x3333333333333333ULL
 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
@@ -76,19 +167,6 @@ uint64_t deinterleave(uint64_t src)
     return myeven | (myodd << 32);
 }
 
-uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c)
-{
-    uint64_t tmpa, tmpb, tmpc;
-    tmpa = fGETUWORD(0, a);
-    tmpb = fGETUWORD(0, b);
-    tmpc = tmpa + tmpb + c;
-    tmpa = fGETUWORD(1, a);
-    tmpb = fGETUWORD(1, b);
-    tmpc = tmpa + tmpb + fGETUWORD(1, tmpc);
-    tmpc = fGETUWORD(1, tmpc);
-    return tmpc;
-}
-
 int32_t conv_round(int32_t a, int n)
 {
     int64_t val;
@@ -108,7 +186,7 @@ int32_t conv_round(int32_t a, int n)
 
 /* Floating Point Stuff */
 
-static const int softfloat_roundingmodes[] = {
+static const FloatRoundMode softfloat_roundingmodes[] = {
     float_round_nearest_even,
     float_round_to_zero,
     float_round_down,
@@ -156,12 +234,6 @@ void arch_fpop_end(CPUHexagonState *env)
     }
 }
 
-static float32 float32_mul_pow2(float32 a, uint32_t p, float_status *fp_status)
-{
-    float32 b = make_float32((SF_BIAS + p) << SF_MANTBITS);
-    return float32_mul(a, b, fp_status);
-}
-
 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
                          float_status *fp_status)
 {
@@ -200,12 +272,13 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
         /* or put Inf in num fixup? */
         uint8_t RsV_sign = float32_is_neg(RsV);
         uint8_t RtV_sign = float32_is_neg(RtV);
+        /* Check that RsV is NOT infinite before we overwrite it */
+        if (!float32_is_infinity(RsV)) {
+            float_raise(float_flag_divbyzero, fp_status);
+        }
         RsV = infinite_float32(RsV_sign ^ RtV_sign);
         RtV = float32_one;
         RdV = float32_one;
-        if (float32_is_infinity(RsV)) {
-            float_raise(float_flag_divbyzero, fp_status);
-        }
     } else if (float32_is_infinity(RtV)) {
         RsV = make_float32(0x80000000 & (RsV ^ RtV));
         RtV = float32_one;
@@ -230,22 +303,22 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
         if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
             /* Near quotient underflow / inexact Q */
             PeV = 0x80;
-            RtV = float32_mul_pow2(RtV, -64, fp_status);
-            RsV = float32_mul_pow2(RsV, 64, fp_status);
+            RtV = float32_scalbn(RtV, -64, fp_status);
+            RsV = float32_scalbn(RsV, 64, fp_status);
         } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
             /* Near quotient overflow */
             PeV = 0x40;
-            RtV = float32_mul_pow2(RtV, 32, fp_status);
-            RsV = float32_mul_pow2(RsV, -32, fp_status);
+            RtV = float32_scalbn(RtV, 32, fp_status);
+            RsV = float32_scalbn(RsV, -32, fp_status);
         } else if (n_exp <= SF_MANTBITS + 2) {
-            RtV = float32_mul_pow2(RtV, 64, fp_status);
-            RsV = float32_mul_pow2(RsV, 64, fp_status);
+            RtV = float32_scalbn(RtV, 64, fp_status);
+            RsV = float32_scalbn(RsV, 64, fp_status);
         } else if (d_exp <= 1) {
-            RtV = float32_mul_pow2(RtV, 32, fp_status);
-            RsV = float32_mul_pow2(RsV, 32, fp_status);
+            RtV = float32_scalbn(RtV, 32, fp_status);
+            RsV = float32_scalbn(RsV, 32, fp_status);
         } else if (d_exp > 252) {
-            RtV = float32_mul_pow2(RtV, -32, fp_status);
-            RsV = float32_mul_pow2(RsV, -32, fp_status);
+            RtV = float32_scalbn(RtV, -32, fp_status);
+            RsV = float32_scalbn(RsV, -32, fp_status);
         }
         RdV = 0;
         ret = 1;
@@ -265,7 +338,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
     int r_exp;
     int ret = 0;
     RsV = *Rs;
-    if (float32_is_infinity(RsV)) {
+    if (float32_is_any_nan(RsV)) {
         if (extract32(RsV, 22, 1) == 0) {
             float_raise(float_flag_invalid, fp_status);
         }
@@ -287,7 +360,7 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
         /* Basic checks passed */
         r_exp = float32_getexp(RsV);
         if (r_exp <= 24) {
-            RsV = float32_mul_pow2(RsV, 64, fp_status);
+            RsV = float32_scalbn(RsV, 64, fp_status);
             PeV = 0xe0;
         }
         RdV = 0;
@@ -298,3 +371,41 @@ int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
     *adjust = PeV;
     return ret;
 }
+
+const uint8_t recip_lookup_table[128] = {
+    0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
+    0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
+    0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
+    0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
+    0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
+    0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
+    0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
+    0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
+    0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
+    0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
+    0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
+    0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
+    0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
+    0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
+    0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
+    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
+};
+
+const uint8_t invsqrt_lookup_table[128] = {
+    0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
+    0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
+    0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
+    0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
+    0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
+    0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
+    0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
+    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
+    0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
+    0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
+    0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
+    0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
+    0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
+    0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
+    0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
+    0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
+};
diff --git a/target/hexagon/arch.h b/target/hexagon/arch.h
index 1f7f03693a5..70918065d32 100644
--- a/target/hexagon/arch.h
+++ b/target/hexagon/arch.h
@@ -20,9 +20,12 @@
 
 #include "qemu/int128.h"
 
+extern const uint8_t rLPS_table_64x4[64][4];
+extern const uint8_t AC_next_state_MPS_64[64];
+extern const uint8_t AC_next_state_LPS_64[64];
+
 uint64_t interleave(uint32_t odd, uint32_t even);
 uint64_t deinterleave(uint64_t src);
-uint32_t carry_from_add64(uint64_t a, uint64_t b, uint32_t c);
 int32_t conv_round(int32_t a, int n);
 void arch_fpop_start(CPUHexagonState *env);
 void arch_fpop_end(CPUHexagonState *env);
@@ -31,4 +34,8 @@ int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd,
 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
                           float_status *fp_status);
 
+extern const uint8_t recip_lookup_table[128];
+
+extern const uint8_t invsqrt_lookup_table[128];
+
 #endif
diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc
index 381550909da..dc890a557f5 100644
--- a/target/hexagon/attribs_def.h.inc
+++ b/target/hexagon/attribs_def.h.inc
@@ -41,6 +41,27 @@ DEF_ATTRIB(STORE, "Stores to memory", "", "")
 DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "")
 DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "")
 
+/* V6 Vector attributes */
+DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "")
+
+DEF_ATTRIB(CVI_NEW, "New value memory instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VM, "Memory instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VP, "Permute instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "")
+DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "")
+DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "")
+DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "")
+DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "")
+DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "")
+DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "")
+DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "")
+DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "")
+DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "")
+DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "")
+
 
 /* Change-of-flow attributes */
 DEF_ATTRIB(JUMP, "Jump-type instruction", "", "")
@@ -64,6 +85,7 @@ DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1")
 DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2")
 DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3")
 DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "")
+DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "")
 DEF_ATTRIB(WRITES_PRED_REG, "Writes a predicate register", "", "")
 
 DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "")
@@ -86,6 +108,7 @@ DEF_ATTRIB(HWLOOP1_END, "Ends HW loop1", "", "")
 DEF_ATTRIB(DCZEROA, "dczeroa type", "", "")
 DEF_ATTRIB(ICFLUSHOP, "icflush op type", "", "")
 DEF_ATTRIB(DCFLUSHOP, "dcflush op type", "", "")
+DEF_ATTRIB(L2FLUSHOP, "l2flush op type", "", "")
 DEF_ATTRIB(DCFETCH, "dcfetch type", "", "")
 
 DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "")
diff --git a/target/hexagon/conv_emu.c b/target/hexagon/conv_emu.c
deleted file mode 100644
index 3985b1032a4..00000000000
--- a/target/hexagon/conv_emu.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/host-utils.h"
-#include "fpu/softfloat.h"
-#include "macros.h"
-#include "conv_emu.h"
-
-#define LL_MAX_POS 0x7fffffffffffffffULL
-#define MAX_POS 0x7fffffffU
-
-static uint64_t conv_f64_to_8u_n(float64 in, int will_negate,
-                                 float_status *fp_status)
-{
-    uint8_t sign = float64_is_neg(in);
-    if (float64_is_infinity(in)) {
-        float_raise(float_flag_invalid, fp_status);
-        if (float64_is_neg(in)) {
-            return 0ULL;
-        } else {
-            return ~0ULL;
-        }
-    }
-    if (float64_is_any_nan(in)) {
-        float_raise(float_flag_invalid, fp_status);
-        return ~0ULL;
-    }
-    if (float64_is_zero(in)) {
-        return 0;
-    }
-    if (sign) {
-        float_raise(float_flag_invalid, fp_status);
-        return 0;
-    }
-    if (float64_lt(in, float64_half, fp_status)) {
-        /* Near zero, captures large fracshifts, denorms, etc */
-        float_raise(float_flag_inexact, fp_status);
-        switch (get_float_rounding_mode(fp_status)) {
-        case float_round_down:
-            if (will_negate) {
-                return 1;
-            } else {
-                return 0;
-            }
-        case float_round_up:
-            if (!will_negate) {
-                return 1;
-            } else {
-                return 0;
-            }
-        default:
-            return 0;    /* nearest or towards zero */
-        }
-    }
-    return float64_to_uint64(in, fp_status);
-}
-
-static void clr_float_exception_flags(uint8_t flag, float_status *fp_status)
-{
-    uint8_t flags = fp_status->float_exception_flags;
-    flags &= ~flag;
-    set_float_exception_flags(flags, fp_status);
-}
-
-static uint32_t conv_df_to_4u_n(float64 fp64, int will_negate,
-                                float_status *fp_status)
-{
-    uint64_t tmp;
-    tmp = conv_f64_to_8u_n(fp64, will_negate, fp_status);
-    if (tmp > 0x00000000ffffffffULL) {
-        clr_float_exception_flags(float_flag_inexact, fp_status);
-        float_raise(float_flag_invalid, fp_status);
-        return ~0U;
-    }
-    return (uint32_t)tmp;
-}
-
-uint64_t conv_df_to_8u(float64 in, float_status *fp_status)
-{
-    return conv_f64_to_8u_n(in, 0, fp_status);
-}
-
-uint32_t conv_df_to_4u(float64 in, float_status *fp_status)
-{
-    return conv_df_to_4u_n(in, 0, fp_status);
-}
-
-int64_t conv_df_to_8s(float64 in, float_status *fp_status)
-{
-    uint8_t sign = float64_is_neg(in);
-    uint64_t tmp;
-    if (float64_is_any_nan(in)) {
-        float_raise(float_flag_invalid, fp_status);
-        return -1;
-    }
-    if (sign) {
-        float64 minus_fp64 = float64_abs(in);
-        tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status);
-    } else {
-        tmp = conv_f64_to_8u_n(in, 0, fp_status);
-    }
-    if (tmp > (LL_MAX_POS + sign)) {
-        clr_float_exception_flags(float_flag_inexact, fp_status);
-        float_raise(float_flag_invalid, fp_status);
-        tmp = (LL_MAX_POS + sign);
-    }
-    if (sign) {
-        return -tmp;
-    } else {
-        return tmp;
-    }
-}
-
-int32_t conv_df_to_4s(float64 in, float_status *fp_status)
-{
-    uint8_t sign = float64_is_neg(in);
-    uint64_t tmp;
-    if (float64_is_any_nan(in)) {
-        float_raise(float_flag_invalid, fp_status);
-        return -1;
-    }
-    if (sign) {
-        float64 minus_fp64 = float64_abs(in);
-        tmp = conv_f64_to_8u_n(minus_fp64, 1, fp_status);
-    } else {
-        tmp = conv_f64_to_8u_n(in, 0, fp_status);
-    }
-    if (tmp > (MAX_POS + sign)) {
-        clr_float_exception_flags(float_flag_inexact, fp_status);
-        float_raise(float_flag_invalid, fp_status);
-        tmp = (MAX_POS + sign);
-    }
-    if (sign) {
-        return -tmp;
-    } else {
-        return tmp;
-    }
-}
-
-uint64_t conv_sf_to_8u(float32 in, float_status *fp_status)
-{
-    float64 fp64 = float32_to_float64(in, fp_status);
-    return conv_df_to_8u(fp64, fp_status);
-}
-
-uint32_t conv_sf_to_4u(float32 in, float_status *fp_status)
-{
-    float64 fp64 = float32_to_float64(in, fp_status);
-    return conv_df_to_4u(fp64, fp_status);
-}
-
-int64_t conv_sf_to_8s(float32 in, float_status *fp_status)
-{
-    float64 fp64 = float32_to_float64(in, fp_status);
-    return conv_df_to_8s(fp64, fp_status);
-}
-
-int32_t conv_sf_to_4s(float32 in, float_status *fp_status)
-{
-    float64 fp64 = float32_to_float64(in, fp_status);
-    return conv_df_to_4s(fp64, fp_status);
-}
diff --git a/target/hexagon/conv_emu.h b/target/hexagon/conv_emu.h
deleted file mode 100644
index cade9de91fe..00000000000
--- a/target/hexagon/conv_emu.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HEXAGON_CONV_EMU_H
-#define HEXAGON_CONV_EMU_H
-
-uint64_t conv_sf_to_8u(float32 in, float_status *fp_status);
-uint32_t conv_sf_to_4u(float32 in, float_status *fp_status);
-int64_t conv_sf_to_8s(float32 in, float_status *fp_status);
-int32_t conv_sf_to_4s(float32 in, float_status *fp_status);
-
-uint64_t conv_df_to_8u(float64 in, float_status *fp_status);
-uint32_t conv_df_to_4u(float64 in, float_status *fp_status);
-int64_t conv_df_to_8s(float64 in, float_status *fp_status);
-int32_t conv_df_to_4s(float64 in, float_status *fp_status);
-
-#endif
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index b0b3040dd13..fa9bd702d63 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -16,13 +16,13 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/qemu-print.h"
 #include "cpu.h"
 #include "internal.h"
 #include "exec/exec-all.h"
 #include "qapi/error.h"
 #include "hw/qdev-properties.h"
+#include "fpu/softfloat-helpers.h"
 
 static void hexagon_v67_cpu_init(Object *obj)
 {
@@ -59,7 +59,7 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
   "r24", "r25", "r26", "r27", "r28",  "r29", "r30", "r31",
   "sa0", "lc0", "sa1", "lc1", "p3_0", "c5",  "m0",  "m1",
   "usr", "pc",  "ugp", "gp",  "cs0",  "cs1", "c14", "c15",
-  "c16", "c17", "c18", "c19", "pkt_cnt",  "insn_cnt", "c22", "c23",
+  "c16", "c17", "c18", "c19", "pkt_cnt",  "insn_cnt", "hvx_cnt", "c23",
   "c24", "c25", "c26", "c27", "c28",  "c29", "c30", "c31",
 };
 
@@ -69,10 +69,9 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = {
  * stacks at different locations.  This is used to compensate so the diff is
  * cleaner.
  */
-static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env,
-                                             target_ulong addr)
+static target_ulong adjust_stack_ptrs(CPUHexagonState *env, target_ulong addr)
 {
-    HexagonCPU *cpu = container_of(env, HexagonCPU, env);
+    HexagonCPU *cpu = env_archcpu(env);
     target_ulong stack_adjust = cpu->lldb_stack_adjust;
     target_ulong stack_start = env->stack_start;
     target_ulong stack_size = 0x10000;
@@ -88,7 +87,7 @@ static inline target_ulong adjust_stack_ptrs(CPUHexagonState *env,
 }
 
 /* HEX_REG_P3_0 (aka C4) is an alias for the predicate registers */
-static inline target_ulong read_p3_0(CPUHexagonState *env)
+static target_ulong read_p3_0(CPUHexagonState *env)
 {
     int32_t control_reg = 0;
     int i;
@@ -114,9 +113,68 @@ static void print_reg(FILE *f, CPUHexagonState *env, int regnum)
                  hexagon_regnames[regnum], value);
 }
 
-static void hexagon_dump(CPUHexagonState *env, FILE *f)
+static void print_vreg(FILE *f, CPUHexagonState *env, int regnum,
+                       bool skip_if_zero)
 {
-    HexagonCPU *cpu = container_of(env, HexagonCPU, env);
+    if (skip_if_zero) {
+        bool nonzero_found = false;
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES; i++) {
+            if (env->VRegs[regnum].ub[i] != 0) {
+                nonzero_found = true;
+                break;
+            }
+        }
+        if (!nonzero_found) {
+            return;
+        }
+    }
+
+    qemu_fprintf(f, "  v%d = ( ", regnum);
+    qemu_fprintf(f, "0x%02x", env->VRegs[regnum].ub[MAX_VEC_SIZE_BYTES - 1]);
+    for (int i = MAX_VEC_SIZE_BYTES - 2; i >= 0; i--) {
+        qemu_fprintf(f, ", 0x%02x", env->VRegs[regnum].ub[i]);
+    }
+    qemu_fprintf(f, " )\n");
+}
+
+void hexagon_debug_vreg(CPUHexagonState *env, int regnum)
+{
+    print_vreg(stdout, env, regnum, false);
+}
+
+static void print_qreg(FILE *f, CPUHexagonState *env, int regnum,
+                       bool skip_if_zero)
+{
+    if (skip_if_zero) {
+        bool nonzero_found = false;
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES / 8; i++) {
+            if (env->QRegs[regnum].ub[i] != 0) {
+                nonzero_found = true;
+                break;
+            }
+        }
+        if (!nonzero_found) {
+            return;
+        }
+    }
+
+    qemu_fprintf(f, "  q%d = ( ", regnum);
+    qemu_fprintf(f, "0x%02x",
+                 env->QRegs[regnum].ub[MAX_VEC_SIZE_BYTES / 8 - 1]);
+    for (int i = MAX_VEC_SIZE_BYTES / 8 - 2; i >= 0; i--) {
+        qemu_fprintf(f, ", 0x%02x", env->QRegs[regnum].ub[i]);
+    }
+    qemu_fprintf(f, " )\n");
+}
+
+void hexagon_debug_qreg(CPUHexagonState *env, int regnum)
+{
+    print_qreg(stdout, env, regnum, false);
+}
+
+static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags)
+{
+    HexagonCPU *cpu = env_archcpu(env);
 
     if (cpu->lldb_compat) {
         /*
@@ -160,6 +218,17 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f)
     print_reg(f, env, HEX_REG_CS1);
 #endif
     qemu_fprintf(f, "}\n");
+
+    if (flags & CPU_DUMP_FPU) {
+        qemu_fprintf(f, "Vector Registers = {\n");
+        for (int i = 0; i < NUM_VREGS; i++) {
+            print_vreg(f, env, i, true);
+        }
+        for (int i = 0; i < NUM_QREGS; i++) {
+            print_qreg(f, env, i, true);
+        }
+        qemu_fprintf(f, "}\n");
+    }
 }
 
 static void hexagon_dump_state(CPUState *cs, FILE *f, int flags)
@@ -167,12 +236,12 @@ static void hexagon_dump_state(CPUState *cs, FILE *f, int flags)
     HexagonCPU *cpu = HEXAGON_CPU(cs);
     CPUHexagonState *env = &cpu->env;
 
-    hexagon_dump(env, f);
+    hexagon_dump(env, f, flags);
 }
 
 void hexagon_debug(CPUHexagonState *env)
 {
-    hexagon_dump(env, stdout);
+    hexagon_dump(env, stdout, CPU_DUMP_FPU);
 }
 
 static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
@@ -206,8 +275,12 @@ static void hexagon_cpu_reset(DeviceState *dev)
     CPUState *cs = CPU(dev);
     HexagonCPU *cpu = HEXAGON_CPU(cs);
     HexagonCPUClass *mcc = HEXAGON_CPU_GET_CLASS(cpu);
+    CPUHexagonState *env = &cpu->env;
 
     mcc->parent_reset(dev);
+
+    set_default_nan_mode(1, &env->fp_status);
+    set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
 }
 
 static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info)
@@ -242,34 +315,11 @@ static void hexagon_cpu_init(Object *obj)
     qdev_property_add_static(DEVICE(obj), &hexagon_lldb_stack_adjust_property);
 }
 
-static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size,
-                             MMUAccessType access_type, int mmu_idx,
-                             bool probe, uintptr_t retaddr)
-{
-#ifdef CONFIG_USER_ONLY
-    switch (access_type) {
-    case MMU_INST_FETCH:
-        cs->exception_index = HEX_EXCP_FETCH_NO_UPAGE;
-        break;
-    case MMU_DATA_LOAD:
-        cs->exception_index = HEX_EXCP_PRIV_NO_UREAD;
-        break;
-    case MMU_DATA_STORE:
-        cs->exception_index = HEX_EXCP_PRIV_NO_UWRITE;
-        break;
-    }
-    cpu_loop_exit_restore(cs, retaddr);
-#else
-#error System mode not implemented for Hexagon
-#endif
-}
-
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps hexagon_tcg_ops = {
+static const struct TCGCPUOps hexagon_tcg_ops = {
     .initialize = hexagon_translate_init,
     .synchronize_from_tb = hexagon_cpu_synchronize_from_tb,
-    .tlb_fill = hexagon_tlb_fill,
 };
 
 static void hexagon_cpu_class_init(ObjectClass *c, void *data)
@@ -289,7 +339,7 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
     cc->set_pc = hexagon_cpu_set_pc;
     cc->gdb_read_register = hexagon_gdb_read_register;
     cc->gdb_write_register = hexagon_gdb_write_register;
-    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS;
+    cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
     cc->gdb_stop_before_watchpoint = true;
     cc->disas_set_info = hexagon_cpu_disas_set_info;
     cc->tcg_ops = &hexagon_tcg_ops;
diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h
index e04eac591c8..de121d950f2 100644
--- a/target/hexagon/cpu.h
+++ b/target/hexagon/cpu.h
@@ -26,6 +26,7 @@ typedef struct CPUHexagonState CPUHexagonState;
 #include "qemu-common.h"
 #include "exec/cpu-defs.h"
 #include "hex_regs.h"
+#include "mmvec/mmvec.h"
 
 #define NUM_PREGS 4
 #define TOTAL_PER_THREAD_REGS 64
@@ -34,6 +35,7 @@ typedef struct CPUHexagonState CPUHexagonState;
 #define STORES_MAX 2
 #define REG_WRITES_MAX 32
 #define PRED_WRITES_MAX 5                   /* 4 insns + endloop */
+#define VSTORES_MAX 2
 
 #define TYPE_HEXAGON_CPU "hexagon-cpu"
 
@@ -52,6 +54,13 @@ typedef struct {
     uint64_t data64;
 } MemLog;
 
+typedef struct {
+    target_ulong va;
+    int size;
+    DECLARE_BITMAP(mask, MAX_VEC_SIZE_BYTES) QEMU_ALIGNED(16);
+    MMVector data QEMU_ALIGNED(16);
+} VStoreLog;
+
 #define EXEC_STATUS_OK          0x0000
 #define EXEC_STATUS_STOP        0x0002
 #define EXEC_STATUS_REPLAY      0x0010
@@ -64,6 +73,9 @@ typedef struct {
 #define CLEAR_EXCEPTION         (env->status &= (~EXEC_STATUS_EXCEPTION))
 #define SET_EXCEPTION           (env->status |= EXEC_STATUS_EXCEPTION)
 
+/* Maximum number of vector temps in a packet */
+#define VECTOR_TEMPS_MAX            4
+
 struct CPUHexagonState {
     target_ulong gpr[TOTAL_PER_THREAD_REGS];
     target_ulong pred[NUM_PREGS];
@@ -97,8 +109,27 @@ struct CPUHexagonState {
     target_ulong llsc_val;
     uint64_t     llsc_val_i64;
 
-    target_ulong is_gather_store_insn;
-    target_ulong gather_issued;
+    MMVector VRegs[NUM_VREGS] QEMU_ALIGNED(16);
+    MMVector future_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
+    MMVector tmp_VRegs[VECTOR_TEMPS_MAX] QEMU_ALIGNED(16);
+
+    VRegMask VRegs_updated;
+
+    MMQReg QRegs[NUM_QREGS] QEMU_ALIGNED(16);
+    MMQReg future_QRegs[NUM_QREGS] QEMU_ALIGNED(16);
+    QRegMask QRegs_updated;
+
+    /* Temporaries used within instructions */
+    MMVectorPair VuuV QEMU_ALIGNED(16);
+    MMVectorPair VvvV QEMU_ALIGNED(16);
+    MMVectorPair VxxV QEMU_ALIGNED(16);
+    MMVector     vtmp QEMU_ALIGNED(16);
+    MMQReg       qtmp QEMU_ALIGNED(16);
+
+    VStoreLog vstore[VSTORES_MAX];
+    target_ulong vstore_pending[VSTORES_MAX];
+    bool vtcm_pending;
+    VTCMStoreLog vtcm_log;
 };
 
 #define HEXAGON_CPU_CLASS(klass) \
@@ -127,16 +158,8 @@ typedef struct HexagonCPU {
     target_ulong lldb_stack_adjust;
 } HexagonCPU;
 
-static inline HexagonCPU *hexagon_env_get_cpu(CPUHexagonState *env)
-{
-    return container_of(env, HexagonCPU, env);
-}
-
 #include "cpu_bits.h"
 
-#define cpu_signal_handler cpu_hexagon_signal_handler
-int cpu_hexagon_signal_handler(int host_signum, void *pinfo, void *puc);
-
 static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
@@ -149,6 +172,15 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, target_ulong *pc,
 #endif
 }
 
+static inline int cpu_mmu_index(CPUHexagonState *env, bool ifetch)
+{
+#ifdef CONFIG_USER_ONLY
+    return MMU_USER_IDX;
+#else
+#error System mode not supported on Hexagon yet
+#endif
+}
+
 typedef struct CPUHexagonState CPUArchState;
 typedef HexagonCPU ArchCPU;
 
diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h
index 96af834d0ec..96fef71729f 100644
--- a/target/hexagon/cpu_bits.h
+++ b/target/hexagon/cpu_bits.h
@@ -47,7 +47,7 @@ static inline uint32_t iclass_bits(uint32_t encoding)
     return iclass;
 }
 
-static inline int is_packet_end(uint32_t endocing)
+static inline bool is_packet_end(uint32_t endocing)
 {
     uint32_t bits = parse_bits(endocing);
     return ((bits == 0x3) || (bits == 0x0));
diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c
index c9bacaa1eef..6f0f27b4ba5 100644
--- a/target/hexagon/decode.c
+++ b/target/hexagon/decode.c
@@ -16,13 +16,13 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "iclass.h"
 #include "attribs.h"
 #include "genptr.h"
 #include "decode.h"
 #include "insn.h"
 #include "printinsn.h"
+#include "mmvec/decode_ext_mmvec.h"
 
 #define fZXTN(N, M, VAL) ((VAL) & ((1LL << (N)) - 1))
 
@@ -47,9 +47,10 @@ enum {
         /* Name   Num Table */
 DEF_REGMAP(R_16,  16, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)
 DEF_REGMAP(R__8,  8,  0, 2, 4, 6, 16, 18, 20, 22)
+DEF_REGMAP(R_8,   8,  0, 1, 2, 3, 4, 5, 6, 7)
 
-#define DECODE_MAPPED_REG(REGNO, NAME) \
-    insn->regno[REGNO] = DECODE_REGISTER_##NAME[insn->regno[REGNO]];
+#define DECODE_MAPPED_REG(OPNUM, NAME) \
+    insn->regno[OPNUM] = DECODE_REGISTER_##NAME[insn->regno[OPNUM]];
 
 typedef struct {
     const struct DectreeTable *table_link;
@@ -158,6 +159,9 @@ static void decode_ext_init(void)
     for (i = EXT_IDX_noext; i < EXT_IDX_noext_AFTER; i++) {
         ext_trees[i] = &dectree_table_DECODE_EXT_EXT_noext;
     }
+    for (i = EXT_IDX_mmvec; i < EXT_IDX_mmvec_AFTER; i++) {
+        ext_trees[i] = &dectree_table_DECODE_EXT_EXT_mmvec;
+    }
 }
 
 typedef struct {
@@ -340,8 +344,8 @@ static void decode_split_cmpjump(Packet *pkt)
         if (GET_ATTRIB(pkt->insn[i].opcode, A_NEWCMPJUMP)) {
             last = pkt->num_insns;
             pkt->insn[last] = pkt->insn[i];    /* copy the instruction */
-            pkt->insn[last].part1 = 1;    /* last instruction does the CMP */
-            pkt->insn[i].part1 = 0;    /* existing instruction does the JUMP */
+            pkt->insn[last].part1 = true;      /* last insn does the CMP */
+            pkt->insn[i].part1 = false;        /* existing insn does the JUMP */
             pkt->num_insns++;
         }
     }
@@ -354,7 +358,7 @@ static void decode_split_cmpjump(Packet *pkt)
     }
 }
 
-static inline int decode_opcode_can_jump(int opcode)
+static bool decode_opcode_can_jump(int opcode)
 {
     if ((GET_ATTRIB(opcode, A_JUMP)) ||
         (GET_ATTRIB(opcode, A_CALL)) ||
@@ -362,15 +366,15 @@ static inline int decode_opcode_can_jump(int opcode)
         (opcode == J2_pause)) {
         /* Exception to A_JUMP attribute */
         if (opcode == J4_hintjumpr) {
-            return 0;
+            return false;
         }
-        return 1;
+        return true;
     }
 
-    return 0;
+    return false;
 }
 
-static inline int decode_opcode_ends_loop(int opcode)
+static bool decode_opcode_ends_loop(int opcode)
 {
     return GET_ATTRIB(opcode, A_HWLOOP0_END) ||
            GET_ATTRIB(opcode, A_HWLOOP1_END);
@@ -383,9 +387,9 @@ static void decode_set_insn_attr_fields(Packet *pkt)
     int numinsns = pkt->num_insns;
     uint16_t opcode;
 
-    pkt->pkt_has_cof = 0;
-    pkt->pkt_has_endloop = 0;
-    pkt->pkt_has_dczeroa = 0;
+    pkt->pkt_has_cof = false;
+    pkt->pkt_has_endloop = false;
+    pkt->pkt_has_dczeroa = false;
 
     for (i = 0; i < numinsns; i++) {
         opcode = pkt->insn[i].opcode;
@@ -394,14 +398,14 @@ static void decode_set_insn_attr_fields(Packet *pkt)
         }
 
         if (GET_ATTRIB(opcode, A_DCZEROA)) {
-            pkt->pkt_has_dczeroa = 1;
+            pkt->pkt_has_dczeroa = true;
         }
 
         if (GET_ATTRIB(opcode, A_STORE)) {
             if (pkt->insn[i].slot == 0) {
-                pkt->pkt_has_store_s0 = 1;
+                pkt->pkt_has_store_s0 = true;
             } else {
-                pkt->pkt_has_store_s1 = 1;
+                pkt->pkt_has_store_s1 = true;
             }
         }
 
@@ -422,9 +426,9 @@ static void decode_set_insn_attr_fields(Packet *pkt)
  */
 static void decode_shuffle_for_execution(Packet *packet)
 {
-    int changed = 0;
+    bool changed = false;
     int i;
-    int flag;    /* flag means we've seen a non-memory instruction */
+    bool flag;    /* flag means we've seen a non-memory instruction */
     int n_mems;
     int last_insn = packet->num_insns - 1;
 
@@ -437,7 +441,7 @@ static void decode_shuffle_for_execution(Packet *packet)
     }
 
     do {
-        changed = 0;
+        changed = false;
         /*
          * Stores go last, must not reorder.
          * Cannot shuffle stores past loads, either.
@@ -445,13 +449,13 @@ static void decode_shuffle_for_execution(Packet *packet)
          * then a store, shuffle the store to the front.  Don't shuffle
          * stores wrt each other or a load.
          */
-        for (flag = n_mems = 0, i = last_insn; i >= 0; i--) {
+        for (flag = false, n_mems = 0, i = last_insn; i >= 0; i--) {
             int opcode = packet->insn[i].opcode;
 
             if (flag && GET_ATTRIB(opcode, A_STORE)) {
                 decode_send_insn_to(packet, i, last_insn - n_mems);
                 n_mems++;
-                changed = 1;
+                changed = true;
             } else if (GET_ATTRIB(opcode, A_STORE)) {
                 n_mems++;
             } else if (GET_ATTRIB(opcode, A_LOAD)) {
@@ -466,7 +470,7 @@ static void decode_shuffle_for_execution(Packet *packet)
                  * a .new value
                  */
             } else {
-                flag = 1;
+                flag = true;
             }
         }
 
@@ -474,7 +478,7 @@ static void decode_shuffle_for_execution(Packet *packet)
             continue;
         }
         /* Compares go first, may be reordered wrt each other */
-        for (flag = 0, i = 0; i < last_insn + 1; i++) {
+        for (flag = false, i = 0; i < last_insn + 1; i++) {
             int opcode = packet->insn[i].opcode;
 
             if ((strstr(opcode_wregs[opcode], "Pd4") ||
@@ -483,7 +487,7 @@ static void decode_shuffle_for_execution(Packet *packet)
                 /* This should be a compare (not a store conditional) */
                 if (flag) {
                     decode_send_insn_to(packet, i, 0);
-                    changed = 1;
+                    changed = true;
                     continue;
                 }
             } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P3) &&
@@ -495,18 +499,18 @@ static void decode_shuffle_for_execution(Packet *packet)
                  */
                 if (flag) {
                     decode_send_insn_to(packet, i, 0);
-                    changed = 1;
+                    changed = true;
                     continue;
                 }
             } else if (GET_ATTRIB(opcode, A_IMPLICIT_WRITES_P0) &&
                        !GET_ATTRIB(opcode, A_NEWCMPJUMP)) {
                 if (flag) {
                     decode_send_insn_to(packet, i, 0);
-                    changed = 1;
+                    changed = true;
                     continue;
                 }
             } else {
-                flag = 1;
+                flag = true;
             }
         }
         if (changed) {
@@ -543,7 +547,7 @@ static void decode_apply_extenders(Packet *packet)
     int i;
     for (i = 0; i < packet->num_insns; i++) {
         if (GET_ATTRIB(packet->insn[i].opcode, A_IT_EXTENDER)) {
-            packet->insn[i + 1].extension_valid = 1;
+            packet->insn[i + 1].extension_valid = true;
             apply_extender(packet, i + 1, packet->insn[i].immed[0]);
         }
     }
@@ -567,8 +571,12 @@ static void decode_remove_extenders(Packet *packet)
 
 static SlotMask get_valid_slots(const Packet *pkt, unsigned int slot)
 {
-    return find_iclass_slots(pkt->insn[slot].opcode,
-                             pkt->insn[slot].iclass);
+    if (GET_ATTRIB(pkt->insn[slot].opcode, A_EXTENSION)) {
+        return mmvec_ext_decode_find_iclass_slots(pkt->insn[slot].opcode);
+    } else {
+        return find_iclass_slots(pkt->insn[slot].opcode,
+                                 pkt->insn[slot].iclass);
+    }
 }
 
 #define DECODE_NEW_TABLE(TAG, SIZE, WHATNOT)     /* NOTHING */
@@ -729,6 +737,11 @@ decode_insns_tablewalk(Insn *insn, const DectreeTable *table,
         }
         decode_op(insn, opc, encoding);
         return 1;
+    } else if (table->table[i].type == DECTREE_EXTSPACE) {
+        /*
+         * For now, HVX will be the only coproc
+         */
+        return decode_insns_tablewalk(insn, ext_trees[EXT_IDX_mmvec], encoding);
     } else {
         return 0;
     }
@@ -764,7 +777,7 @@ static void decode_add_endloop_insn(Insn *insn, int loopnum)
     }
 }
 
-static inline int decode_parsebits_is_loopend(uint32_t encoding32)
+static bool decode_parsebits_is_loopend(uint32_t encoding32)
 {
     uint32_t bits = parse_bits(encoding32);
     return bits == 0x2;
@@ -775,8 +788,11 @@ decode_set_slot_number(Packet *pkt)
 {
     int slot;
     int i;
-    int hit_mem_insn = 0;
-    int hit_duplex = 0;
+    bool hit_mem_insn = false;
+    bool hit_duplex = false;
+    bool slot0_found = false;
+    bool slot1_found = false;
+    int slot1_iidx = 0;
 
     /*
      * The slots are encoded in reverse order
@@ -801,7 +817,7 @@ decode_set_slot_number(Packet *pkt)
         if ((GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE) ||
              GET_ATTRIB(pkt->insn[i].opcode, A_MEMLIKE_PACKET_RULES)) &&
             !hit_mem_insn) {
-            hit_mem_insn = 1;
+            hit_mem_insn = true;
             pkt->insn[i].slot = 0;
             continue;
         }
@@ -818,7 +834,7 @@ decode_set_slot_number(Packet *pkt)
     for (i = pkt->num_insns - 1; i >= 0; i--) {
         /* First subinsn always goes to slot 0 */
         if (GET_ATTRIB(pkt->insn[i].opcode, A_SUBINSN) && !hit_duplex) {
-            hit_duplex = 1;
+            hit_duplex = true;
             pkt->insn[i].slot = 0;
             continue;
         }
@@ -830,13 +846,10 @@ decode_set_slot_number(Packet *pkt)
     }
 
     /* Fix the exceptions - slot 1 is never empty, always aligns to slot 0 */
-    int slot0_found = 0;
-    int slot1_found = 0;
-    int slot1_iidx = 0;
     for (i = pkt->num_insns - 1; i >= 0; i--) {
         /* Is slot0 used? */
         if (pkt->insn[i].slot == 0) {
-            int is_endloop = (pkt->insn[i].opcode == J2_endloop01);
+            bool is_endloop = (pkt->insn[i].opcode == J2_endloop01);
             is_endloop |= (pkt->insn[i].opcode == J2_endloop0);
             is_endloop |= (pkt->insn[i].opcode == J2_endloop1);
 
@@ -845,17 +858,17 @@ decode_set_slot_number(Packet *pkt)
              * slot0 for endloop
              */
             if (!is_endloop) {
-                slot0_found = 1;
+                slot0_found = true;
             }
         }
         /* Is slot1 used? */
         if (pkt->insn[i].slot == 1) {
-            slot1_found = 1;
+            slot1_found = true;
             slot1_iidx = i;
         }
     }
     /* Is slot0 empty and slot1 used? */
-    if ((slot0_found == 0) && (slot1_found == 1)) {
+    if ((!slot0_found) && slot1_found) {
         /* Then push it to slot0 */
         pkt->insn[slot1_iidx].slot = 0;
     }
@@ -873,8 +886,9 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
 {
     int num_insns = 0;
     int words_read = 0;
-    int end_of_packet = 0;
+    bool end_of_packet = false;
     int new_insns = 0;
+    int i;
     uint32_t encoding32;
 
     /* Initialize */
@@ -890,7 +904,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
          * decode works
          */
         if (pkt->insn[num_insns].opcode == A4_ext) {
-            pkt->insn[num_insns + 1].extension_valid = 1;
+            pkt->insn[num_insns + 1].extension_valid = true;
         }
         num_insns += new_insns;
         words_read++;
@@ -902,6 +916,11 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
         return 0;
     }
     pkt->encod_pkt_size_in_bytes = words_read * 4;
+    pkt->pkt_has_hvx = false;
+    for (i = 0; i < num_insns; i++) {
+        pkt->pkt_has_hvx |=
+            GET_ATTRIB(pkt->insn[i].opcode, A_CVI);
+    }
 
     /*
      * Check for :endloop in the parse bits
@@ -913,7 +932,7 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
         decode_add_endloop_insn(&pkt->insn[pkt->num_insns++], 0);
     }
     if (words_read >= 3) {
-        uint32_t has_loop0, has_loop1;
+        bool has_loop0, has_loop1;
         has_loop0 = decode_parsebits_is_loopend(words[0]);
         has_loop1 = decode_parsebits_is_loopend(words[1]);
         if (has_loop0 && has_loop1) {
@@ -932,6 +951,10 @@ int decode_packet(int max_words, const uint32_t *words, Packet *pkt,
     decode_set_slot_number(pkt);
     decode_fill_newvalue_regno(pkt);
 
+    if (pkt->pkt_has_hvx) {
+        mmvec_ext_decode_checks(pkt, disas_only);
+    }
+
     if (!disas_only) {
         decode_shuffle_for_execution(pkt);
         decode_split_cmpjump(pkt);
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 842d9037106..d3b45d494ff 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -19,7 +19,6 @@
 #include "qemu/int128.h"
 #include "fpu/softfloat.h"
 #include "macros.h"
-#include "conv_emu.h"
 #include "fma_emu.h"
 
 #define DF_INF_EXP     0x7ff
@@ -64,7 +63,7 @@ typedef union {
     };
 } Float;
 
-static inline uint64_t float64_getmant(float64 f64)
+static uint64_t float64_getmant(float64 f64)
 {
     Double a = { .i = f64 };
     if (float64_is_normal(f64)) {
@@ -91,7 +90,7 @@ int32_t float64_getexp(float64 f64)
     return -1;
 }
 
-static inline uint64_t float32_getmant(float32 f32)
+static uint64_t float32_getmant(float32 f32)
 {
     Float a = { .i = f32 };
     if (float32_is_normal(f32)) {
@@ -118,17 +117,17 @@ int32_t float32_getexp(float32 f32)
     return -1;
 }
 
-static inline uint32_t int128_getw0(Int128 x)
+static uint32_t int128_getw0(Int128 x)
 {
     return int128_getlo(x);
 }
 
-static inline uint32_t int128_getw1(Int128 x)
+static uint32_t int128_getw1(Int128 x)
 {
     return int128_getlo(x) >> 32;
 }
 
-static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
+static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
 {
     Int128 a, b;
     uint64_t pp0, pp1a, pp1b, pp1s, pp2;
@@ -152,7 +151,7 @@ static inline Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
     return int128_make128(ret_low, pp2 + (pp1s >> 32));
 }
 
-static inline Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
+static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
 {
     Int128 ret = int128_sub(a, b);
     if (borrow != 0) {
@@ -170,7 +169,7 @@ typedef struct {
     uint8_t sticky;
 } Accum;
 
-static inline void accum_init(Accum *p)
+static void accum_init(Accum *p)
 {
     p->mant = int128_zero();
     p->exp = 0;
@@ -180,7 +179,7 @@ static inline void accum_init(Accum *p)
     p->sticky = 0;
 }
 
-static inline Accum accum_norm_left(Accum a)
+static Accum accum_norm_left(Accum a)
 {
     a.exp--;
     a.mant = int128_lshift(a.mant, 1);
@@ -190,6 +189,7 @@ static inline Accum accum_norm_left(Accum a)
     return a;
 }
 
+/* This function is marked inline for performance reasons */
 static inline Accum accum_norm_right(Accum a, int amt)
 {
     if (amt > 130) {
@@ -226,7 +226,7 @@ static inline Accum accum_norm_right(Accum a, int amt)
  */
 static Accum accum_add(Accum a, Accum b);
 
-static inline Accum accum_sub(Accum a, Accum b, int negate)
+static Accum accum_sub(Accum a, Accum b, int negate)
 {
     Accum ret;
     accum_init(&ret);
@@ -329,7 +329,7 @@ static Accum accum_add(Accum a, Accum b)
 }
 
 /* Return an infinity with requested sign */
-static inline float64 infinite_float64(uint8_t sign)
+static float64 infinite_float64(uint8_t sign)
 {
     if (sign) {
         return make_float64(DF_MINUS_INF);
@@ -339,7 +339,7 @@ static inline float64 infinite_float64(uint8_t sign)
 }
 
 /* Return a maximum finite value with requested sign */
-static inline float64 maxfinite_float64(uint8_t sign)
+static float64 maxfinite_float64(uint8_t sign)
 {
     if (sign) {
         return make_float64(DF_MINUS_MAXF);
@@ -349,7 +349,7 @@ static inline float64 maxfinite_float64(uint8_t sign)
 }
 
 /* Return a zero value with requested sign */
-static inline float64 zero_float64(uint8_t sign)
+static float64 zero_float64(uint8_t sign)
 {
     if (sign) {
         return make_float64(0x8000000000000000);
@@ -369,7 +369,7 @@ float32 infinite_float32(uint8_t sign)
 }
 
 /* Return a maximum finite value with the requested sign */
-static inline float32 maxfinite_float32(uint8_t sign)
+static float32 maxfinite_float32(uint8_t sign)
 {
     if (sign) {
         return make_float32(SF_MINUS_MAXF);
@@ -379,7 +379,7 @@ static inline float32 maxfinite_float32(uint8_t sign)
 }
 
 /* Return a zero value with requested sign */
-static inline float32 zero_float32(uint8_t sign)
+static float32 zero_float32(uint8_t sign)
 {
     if (sign) {
         return make_float32(0x80000000);
@@ -389,7 +389,7 @@ static inline float32 zero_float32(uint8_t sign)
 }
 
 #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
-static inline SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
+static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
 { \
     if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
         && ((a.guard | a.round | a.sticky) == 0)) { \
@@ -526,8 +526,8 @@ static bool is_inf_prod(float64 a, float64 b)
             (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
 }
 
-static inline float64 special_fma(float64 a, float64 b, float64 c,
-                                  float_status *fp_status)
+static float64 special_fma(float64 a, float64 b, float64 c,
+                           float_status *fp_status)
 {
     float64 ret = make_float64(0);
 
@@ -586,8 +586,8 @@ static inline float64 special_fma(float64 a, float64 b, float64 c,
     g_assert_not_reached();
 }
 
-static inline float32 special_fmaf(float32 a, float32 b, float32 c,
-                                 float_status *fp_status)
+static float32 special_fmaf(float32 a, float32 b, float32 c,
+                            float_status *fp_status)
 {
     float64 aa, bb, cc;
     aa = float32_to_float64(a, fp_status);
diff --git a/target/hexagon/gen_dectree_import.c b/target/hexagon/gen_dectree_import.c
index 5b7ecfc6b38..ee354677fd6 100644
--- a/target/hexagon/gen_dectree_import.c
+++ b/target/hexagon/gen_dectree_import.c
@@ -40,6 +40,11 @@ const char * const opcode_names[] = {
  *         Q6INSN(A2_add,"Rd32=add(Rs32,Rt32)",ATTRIBS(),
  *         "Add 32-bit registers",
  *         { RdV=RsV+RtV;})
+ *     HVX instructions have the following form
+ *         EXTINSN(V6_vinsertwr, "Vx32.w=vinsert(Rt32)",
+ *         ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX,A_CVI_LATE),
+ *         "Insert Word Scalar into Vector",
+ *         VxV.uw[0] = RtV;)
  */
 const char * const opcode_syntax[XX_LAST_OPCODE] = {
 #define Q6INSN(TAG, BEH, ATTRIBS, DESCR, SEM) \
@@ -105,6 +110,14 @@ static const char *get_opcode_enc(int opcode)
 
 static const char *get_opcode_enc_class(int opcode)
 {
+    const char *tmp = opcode_encodings[opcode].encoding;
+    if (tmp == NULL) {
+        const char *test = "V6_";        /* HVX */
+        const char *name = opcode_names[opcode];
+        if (strncmp(name, test, strlen(test)) == 0) {
+            return "EXT_mmvec";
+        }
+    }
     return opcode_enc_class_names[opcode_encodings[opcode].enc_class];
 }
 
diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py
index 2b1c5d8e3e4..a446c453845 100755
--- a/target/hexagon/gen_helper_funcs.py
+++ b/target/hexagon/gen_helper_funcs.py
@@ -48,12 +48,26 @@ def gen_helper_arg_pair(f,regtype,regid,regno):
     if regno >= 0 : f.write(", ")
     f.write("int64_t %s%sV" % (regtype,regid))
 
+def gen_helper_arg_ext(f,regtype,regid,regno):
+    if regno > 0 : f.write(", ")
+    f.write("void *%s%sV_void" % (regtype,regid))
+
+def gen_helper_arg_ext_pair(f,regtype,regid,regno):
+    if regno > 0 : f.write(", ")
+    f.write("void *%s%sV_void" % (regtype,regid))
+
 def gen_helper_arg_opn(f,regtype,regid,i,tag):
     if (hex_common.is_pair(regid)):
-        gen_helper_arg_pair(f,regtype,regid,i)
+        if (hex_common.is_hvx_reg(regtype)):
+            gen_helper_arg_ext_pair(f,regtype,regid,i)
+        else:
+            gen_helper_arg_pair(f,regtype,regid,i)
     elif (hex_common.is_single(regid)):
         if hex_common.is_old_val(regtype, regid, tag):
-            gen_helper_arg(f,regtype,regid,i)
+            if (hex_common.is_hvx_reg(regtype)):
+                gen_helper_arg_ext(f,regtype,regid,i)
+            else:
+                gen_helper_arg(f,regtype,regid,i)
         elif hex_common.is_new_val(regtype, regid, tag):
             gen_helper_arg_new(f,regtype,regid,i)
         else:
@@ -72,25 +86,67 @@ def gen_helper_dest_decl_pair(f,regtype,regid,regno,subfield=""):
     f.write("    int64_t %s%sV%s = 0;\n" % \
         (regtype,regid,subfield))
 
+def gen_helper_dest_decl_ext(f,regtype,regid):
+    if (regtype == "Q"):
+        f.write("    /* %s%sV is *(MMQReg *)(%s%sV_void) */\n" % \
+            (regtype,regid,regtype,regid))
+    else:
+        f.write("    /* %s%sV is *(MMVector *)(%s%sV_void) */\n" % \
+            (regtype,regid,regtype,regid))
+
+def gen_helper_dest_decl_ext_pair(f,regtype,regid,regno):
+    f.write("    /* %s%sV is *(MMVectorPair *))%s%sV_void) */\n" % \
+        (regtype,regid,regtype, regid))
+
 def gen_helper_dest_decl_opn(f,regtype,regid,i):
     if (hex_common.is_pair(regid)):
-        gen_helper_dest_decl_pair(f,regtype,regid,i)
+        if (hex_common.is_hvx_reg(regtype)):
+            gen_helper_dest_decl_ext_pair(f,regtype,regid, i)
+        else:
+            gen_helper_dest_decl_pair(f,regtype,regid,i)
     elif (hex_common.is_single(regid)):
-        gen_helper_dest_decl(f,regtype,regid,i)
+        if (hex_common.is_hvx_reg(regtype)):
+            gen_helper_dest_decl_ext(f,regtype,regid)
+        else:
+            gen_helper_dest_decl(f,regtype,regid,i)
     else:
         print("Bad register parse: ",regtype,regid,toss,numregs)
 
+def gen_helper_src_var_ext(f,regtype,regid):
+    if (regtype == "Q"):
+       f.write("    /* %s%sV is *(MMQReg *)(%s%sV_void) */\n" % \
+           (regtype,regid,regtype,regid))
+    else:
+       f.write("    /* %s%sV is *(MMVector *)(%s%sV_void) */\n" % \
+           (regtype,regid,regtype,regid))
+
+def gen_helper_src_var_ext_pair(f,regtype,regid,regno):
+    f.write("    /* %s%sV%s is *(MMVectorPair *)(%s%sV%s_void) */\n" % \
+        (regtype,regid,regno,regtype,regid,regno))
+
 def gen_helper_return(f,regtype,regid,regno):
     f.write("    return %s%sV;\n" % (regtype,regid))
 
 def gen_helper_return_pair(f,regtype,regid,regno):
     f.write("    return %s%sV;\n" % (regtype,regid))
 
+def gen_helper_dst_write_ext(f,regtype,regid):
+    return
+
+def gen_helper_dst_write_ext_pair(f,regtype,regid):
+    return
+
 def gen_helper_return_opn(f, regtype, regid, i):
     if (hex_common.is_pair(regid)):
-        gen_helper_return_pair(f,regtype,regid,i)
+        if (hex_common.is_hvx_reg(regtype)):
+            gen_helper_dst_write_ext_pair(f,regtype,regid)
+        else:
+            gen_helper_return_pair(f,regtype,regid,i)
     elif (hex_common.is_single(regid)):
-        gen_helper_return(f,regtype,regid,i)
+        if (hex_common.is_hvx_reg(regtype)):
+            gen_helper_dst_write_ext(f,regtype,regid)
+        else:
+            gen_helper_return(f,regtype,regid,i)
     else:
         print("Bad register parse: ",regtype,regid,toss,numregs)
 
@@ -129,14 +185,20 @@ def gen_helper_function(f, tag, tagregs, tagimms):
                 % (tag, tag))
     else:
         ## The return type of the function is the type of the destination
-        ## register
+        ## register (if scalar)
         i=0
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_written(regid)):
                 if (hex_common.is_pair(regid)):
-                    gen_helper_return_type_pair(f,regtype,regid,i)
+                    if (hex_common.is_hvx_reg(regtype)):
+                        continue
+                    else:
+                        gen_helper_return_type_pair(f,regtype,regid,i)
                 elif (hex_common.is_single(regid)):
-                    gen_helper_return_type(f,regtype,regid,i)
+                    if (hex_common.is_hvx_reg(regtype)):
+                            continue
+                    else:
+                        gen_helper_return_type(f,regtype,regid,i)
                 else:
                     print("Bad register parse: ",regtype,regid,toss,numregs)
             i += 1
@@ -145,16 +207,37 @@ def gen_helper_function(f, tag, tagregs, tagimms):
             f.write("void")
         f.write(" HELPER(%s)(CPUHexagonState *env" % tag)
 
+        ## Arguments include the vector destination operands
         i = 1
+        for regtype,regid,toss,numregs in regs:
+            if (hex_common.is_written(regid)):
+                if (hex_common.is_pair(regid)):
+                    if (hex_common.is_hvx_reg(regtype)):
+                        gen_helper_arg_ext_pair(f,regtype,regid,i)
+                    else:
+                        continue
+                elif (hex_common.is_single(regid)):
+                    if (hex_common.is_hvx_reg(regtype)):
+                        gen_helper_arg_ext(f,regtype,regid,i)
+                    else:
+                        # This is the return value of the function
+                        continue
+                else:
+                    print("Bad register parse: ",regtype,regid,toss,numregs)
+                i += 1
 
         ## Arguments to the helper function are the source regs and immediates
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
+                if (hex_common.is_hvx_reg(regtype) and
+                    hex_common.is_readwrite(regid)):
+                    continue
                 gen_helper_arg_opn(f,regtype,regid,i,tag)
                 i += 1
         for immlett,bits,immshift in imms:
             gen_helper_arg_imm(f,immlett)
             i += 1
+
         if hex_common.need_slot(tag):
             if i > 0: f.write(", ")
             f.write("uint32_t slot")
@@ -173,6 +256,17 @@ def gen_helper_function(f, tag, tagregs, tagimms):
                 gen_helper_dest_decl_opn(f,regtype,regid,i)
             i += 1
 
+        for regtype,regid,toss,numregs in regs:
+            if (hex_common.is_read(regid)):
+                if (hex_common.is_pair(regid)):
+                    if (hex_common.is_hvx_reg(regtype)):
+                        gen_helper_src_var_ext_pair(f,regtype,regid,i)
+                elif (hex_common.is_single(regid)):
+                    if (hex_common.is_hvx_reg(regtype)):
+                        gen_helper_src_var_ext(f,regtype,regid)
+                else:
+                    print("Bad register parse: ",regtype,regid,toss,numregs)
+
         if 'A_FPOP' in hex_common.attribdict[tag]:
             f.write('    arch_fpop_start(env);\n');
 
@@ -192,11 +286,12 @@ def main():
     hex_common.read_semantics_file(sys.argv[1])
     hex_common.read_attribs_file(sys.argv[2])
     hex_common.read_overrides_file(sys.argv[3])
+    hex_common.read_overrides_file(sys.argv[4])
     hex_common.calculate_attribs()
     tagregs = hex_common.get_tagregs()
     tagimms = hex_common.get_tagimms()
 
-    with open(sys.argv[4], 'w') as f:
+    with open(sys.argv[5], 'w') as f:
         for tag in hex_common.tags:
             ## Skip the priv instructions
             if ( "A_PRIV" in hex_common.attribdict[tag] ) :
diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py
index ea41007ec96..3b4e993fd17 100755
--- a/target/hexagon/gen_helper_protos.py
+++ b/target/hexagon/gen_helper_protos.py
@@ -94,19 +94,33 @@ def gen_helper_prototype(f, tag, tagregs, tagimms):
             f.write('DEF_HELPER_%s(%s' % (def_helper_size, tag))
 
         ## Generate the qemu DEF_HELPER type for each result
+        ## Iterate over this list twice
+        ## - Emit the scalar result
+        ## - Emit the vector result
         i=0
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_written(regid)):
-                gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
+                if (not hex_common.is_hvx_reg(regtype)):
+                    gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
                 i += 1
 
         ## Put the env between the outputs and inputs
         f.write(', env' )
         i += 1
 
+        # Second pass
+        for regtype,regid,toss,numregs in regs:
+            if (hex_common.is_written(regid)):
+                if (hex_common.is_hvx_reg(regtype)):
+                    gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
+                    i += 1
+
         ## Generate the qemu type for each input operand (regs and immediates)
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
+                if (hex_common.is_hvx_reg(regtype) and
+                    hex_common.is_readwrite(regid)):
+                    continue
                 gen_def_helper_opn(f, tag, regtype, regid, toss, numregs, i)
                 i += 1
         for immlett,bits,immshift in imms:
@@ -121,11 +135,12 @@ def main():
     hex_common.read_semantics_file(sys.argv[1])
     hex_common.read_attribs_file(sys.argv[2])
     hex_common.read_overrides_file(sys.argv[3])
+    hex_common.read_overrides_file(sys.argv[4])
     hex_common.calculate_attribs()
     tagregs = hex_common.get_tagregs()
     tagimms = hex_common.get_tagimms()
 
-    with open(sys.argv[4], 'w') as f:
+    with open(sys.argv[5], 'w') as f:
         for tag in hex_common.tags:
             ## Skip the priv instructions
             if ( "A_PRIV" in hex_common.attribdict[tag] ) :
diff --git a/target/hexagon/gen_semantics.c b/target/hexagon/gen_semantics.c
index c5fccecc9ec..4a2bdd70e9c 100644
--- a/target/hexagon/gen_semantics.c
+++ b/target/hexagon/gen_semantics.c
@@ -44,6 +44,11 @@ int main(int argc, char *argv[])
  *         Q6INSN(A2_add,"Rd32=add(Rs32,Rt32)",ATTRIBS(),
  *         "Add 32-bit registers",
  *         { RdV=RsV+RtV;})
+ *     HVX instructions have the following form
+ *         EXTINSN(V6_vinsertwr, "Vx32.w=vinsert(Rt32)",
+ *         ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX),
+ *         "Insert Word Scalar into Vector",
+ *         VxV.uw[0] = RtV;)
  */
 #define Q6INSN(TAG, BEH, ATTRIBS, DESCR, SEM) \
     do { \
@@ -59,8 +64,23 @@ int main(int argc, char *argv[])
                          ")\n", \
                 #TAG, STRINGIZE(ATTRIBS)); \
     } while (0);
+#define EXTINSN(TAG, BEH, ATTRIBS, DESCR, SEM) \
+    do { \
+        fprintf(outfile, "SEMANTICS( \\\n" \
+                         "    \"%s\", \\\n" \
+                         "    %s, \\\n" \
+                         "    \"\"\"%s\"\"\" \\\n" \
+                         ")\n", \
+                #TAG, STRINGIZE(BEH), STRINGIZE(SEM)); \
+        fprintf(outfile, "ATTRIBUTES( \\\n" \
+                         "    \"%s\", \\\n" \
+                         "    \"%s\" \\\n" \
+                         ")\n", \
+                #TAG, STRINGIZE(ATTRIBS)); \
+    } while (0);
 #include "imported/allidefs.def"
 #undef Q6INSN
+#undef EXTINSN
 
 /*
  * Process the macro definitions
@@ -81,6 +101,19 @@ int main(int argc, char *argv[])
                      ")\n", \
             #MNAME, STRINGIZE(BEH), STRINGIZE(ATTRS));
 #include "imported/macros.def"
+#undef DEF_MACRO
+
+/*
+ * Process the macros for HVX
+ */
+#define DEF_MACRO(MNAME, BEH, ATTRS) \
+    fprintf(outfile, "MACROATTRIB( \\\n" \
+                     "    \"%s\", \\\n" \
+                     "    \"\"\"%s\"\"\", \\\n" \
+                     "    \"%s\" \\\n" \
+                     ")\n", \
+            #MNAME, STRINGIZE(BEH), STRINGIZE(ATTRS));
+#include "imported/allext_macros.def"
 #undef DEF_MACRO
 
     fclose(outfile);
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index e044deaff2a..c6f0879b6ee 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -37,7 +37,10 @@
  *     _sp       stack pointer relative            r0 = memw(r29+#12)
  *     _ap       absolute set                      r0 = memw(r1=##variable)
  *     _pr       post increment register           r0 = memw(r1++m1)
+ *     _pbr      post increment bit reverse        r0 = memw(r1++m1:brev)
  *     _pi       post increment immediate          r0 = memb(r1++#1)
+ *     _pci      post increment circular immediate r0 = memw(r1++#4:circ(m0))
+ *     _pcr      post increment circular register  r0 = memw(r1++I:circ(m0))
  */
 
 /* Macros for complex addressing modes */
@@ -51,12 +54,31 @@
         fEA_REG(RxV); \
         fPM_M(RxV, MuV); \
     } while (0)
+#define GET_EA_pbr \
+    do { \
+        gen_helper_fbrev(EA, RxV); \
+        tcg_gen_add_tl(RxV, RxV, MuV); \
+    } while (0)
 #define GET_EA_pi \
     do { \
         fEA_REG(RxV); \
         fPM_I(RxV, siV); \
     } while (0)
-
+#define GET_EA_pci \
+    do { \
+        TCGv tcgv_siV = tcg_constant_tl(siV); \
+        tcg_gen_mov_tl(EA, RxV); \
+        gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \
+                            hex_gpr[HEX_REG_CS0 + MuN]); \
+    } while (0)
+#define GET_EA_pcr(SHIFT) \
+    do { \
+        TCGv ireg = tcg_temp_new(); \
+        tcg_gen_mov_tl(EA, RxV); \
+        gen_read_ireg(ireg, MuV, (SHIFT)); \
+        gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+        tcg_temp_free(ireg); \
+    } while (0)
 
 /* Instructions with multiple definitions */
 #define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \
@@ -80,19 +102,229 @@
 #define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \
     fGEN_TCG_LOAD_AP(RddV, 8, u)
 
+#define fGEN_TCG_L2_loadrub_pci(SHORTCODE)    SHORTCODE
+#define fGEN_TCG_L2_loadrb_pci(SHORTCODE)     SHORTCODE
+#define fGEN_TCG_L2_loadruh_pci(SHORTCODE)    SHORTCODE
+#define fGEN_TCG_L2_loadrh_pci(SHORTCODE)     SHORTCODE
+#define fGEN_TCG_L2_loadri_pci(SHORTCODE)     SHORTCODE
+#define fGEN_TCG_L2_loadrd_pci(SHORTCODE)     SHORTCODE
+
+#define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \
+    do { \
+        TCGv ireg = tcg_temp_new(); \
+        tcg_gen_mov_tl(EA, RxV); \
+        gen_read_ireg(ireg, MuV, SHIFT); \
+        gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+        LOAD; \
+        tcg_temp_free(ireg); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV))
+#define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV))
+#define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV))
+#define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV))
+#define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV))
+#define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \
+      fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV))
+
 #define fGEN_TCG_L2_loadrub_pr(SHORTCODE)      SHORTCODE
+#define fGEN_TCG_L2_loadrub_pbr(SHORTCODE)     SHORTCODE
 #define fGEN_TCG_L2_loadrub_pi(SHORTCODE)      SHORTCODE
 #define fGEN_TCG_L2_loadrb_pr(SHORTCODE)       SHORTCODE
-#define fGEN_TCG_L2_loadrb_pi(SHORTCODE)       SHORTCODE;
+#define fGEN_TCG_L2_loadrb_pbr(SHORTCODE)      SHORTCODE
+#define fGEN_TCG_L2_loadrb_pi(SHORTCODE)       SHORTCODE
 #define fGEN_TCG_L2_loadruh_pr(SHORTCODE)      SHORTCODE
-#define fGEN_TCG_L2_loadruh_pi(SHORTCODE)      SHORTCODE;
+#define fGEN_TCG_L2_loadruh_pbr(SHORTCODE)     SHORTCODE
+#define fGEN_TCG_L2_loadruh_pi(SHORTCODE)      SHORTCODE
 #define fGEN_TCG_L2_loadrh_pr(SHORTCODE)       SHORTCODE
+#define fGEN_TCG_L2_loadrh_pbr(SHORTCODE)      SHORTCODE
 #define fGEN_TCG_L2_loadrh_pi(SHORTCODE)       SHORTCODE
 #define fGEN_TCG_L2_loadri_pr(SHORTCODE)       SHORTCODE
+#define fGEN_TCG_L2_loadri_pbr(SHORTCODE)      SHORTCODE
 #define fGEN_TCG_L2_loadri_pi(SHORTCODE)       SHORTCODE
 #define fGEN_TCG_L2_loadrd_pr(SHORTCODE)       SHORTCODE
+#define fGEN_TCG_L2_loadrd_pbr(SHORTCODE)      SHORTCODE
 #define fGEN_TCG_L2_loadrd_pi(SHORTCODE)       SHORTCODE
 
+/*
+ * These instructions load 2 bytes and places them in
+ * two halves of the destination register.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv byte = tcg_temp_new(); \
+        GET_EA; \
+        fLOAD(1, 2, u, EA, tmp); \
+        tcg_gen_movi_tl(RdV, 0); \
+        for (int i = 0; i < 2; i++) { \
+            gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \
+        } \
+        tcg_temp_free(tmp); \
+        tcg_temp_free(byte); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pi, true)
+#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pcr(1), false)
+#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pcr(1), true)
+
+/*
+ * These instructions load 4 bytes and places them in
+ * four halves of the destination register pair.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv byte = tcg_temp_new(); \
+        GET_EA; \
+        fLOAD(1, 4, u, EA, tmp);  \
+        tcg_gen_movi_i64(RddV, 0); \
+        for (int i = 0; i < 4; i++) { \
+            gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN)));  \
+        }  \
+        tcg_temp_free(tmp); \
+        tcg_temp_free(byte); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pcr(2), false)
+#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pcr(2), true)
+#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pi, true)
+
+/*
+ * These instructions load a half word, shift the destination right by 16 bits
+ * and place the loaded value in the high half word of the destination pair.
+ * The GET_EA macro determines the addressing mode.
+ */
+#define fGEN_TCG_loadalignh(GET_EA) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
+        GET_EA;  \
+        fLOAD(1, 2, u, EA, tmp);  \
+        tcg_gen_extu_i32_i64(tmp_i64, tmp); \
+        tcg_gen_shri_i64(RyyV, RyyV, 16); \
+        tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 48, 16); \
+        tcg_temp_free(tmp); \
+        tcg_temp_free_i64(tmp_i64); \
+    } while (0)
+
+#define fGEN_TCG_L4_loadalignh_ur(SHORTCODE) \
+    fGEN_TCG_loadalignh(fEA_IRs(UiV, RtV, uiV))
+#define fGEN_TCG_L2_loadalignh_io(SHORTCODE) \
+    fGEN_TCG_loadalignh(fEA_RI(RsV, siV))
+#define fGEN_TCG_L2_loadalignh_pci(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_pci)
+#define fGEN_TCG_L2_loadalignh_pcr(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_pcr(1))
+#define fGEN_TCG_L4_loadalignh_ap(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_ap)
+#define fGEN_TCG_L2_loadalignh_pr(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_pr)
+#define fGEN_TCG_L2_loadalignh_pbr(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_pbr)
+#define fGEN_TCG_L2_loadalignh_pi(SHORTCODE) \
+    fGEN_TCG_loadalignh(GET_EA_pi)
+
+/* Same as above, but loads a byte instead of half word */
+#define fGEN_TCG_loadalignb(GET_EA) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
+        GET_EA;  \
+        fLOAD(1, 1, u, EA, tmp);  \
+        tcg_gen_extu_i32_i64(tmp_i64, tmp); \
+        tcg_gen_shri_i64(RyyV, RyyV, 8); \
+        tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 56, 8); \
+        tcg_temp_free(tmp); \
+        tcg_temp_free_i64(tmp_i64); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadalignb_io(SHORTCODE) \
+    fGEN_TCG_loadalignb(fEA_RI(RsV, siV))
+#define fGEN_TCG_L4_loadalignb_ur(SHORTCODE) \
+    fGEN_TCG_loadalignb(fEA_IRs(UiV, RtV, uiV))
+#define fGEN_TCG_L2_loadalignb_pci(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_pci)
+#define fGEN_TCG_L2_loadalignb_pcr(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_pcr(0))
+#define fGEN_TCG_L4_loadalignb_ap(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_ap)
+#define fGEN_TCG_L2_loadalignb_pr(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_pr)
+#define fGEN_TCG_L2_loadalignb_pbr(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_pbr)
+#define fGEN_TCG_L2_loadalignb_pi(SHORTCODE) \
+    fGEN_TCG_loadalignb(GET_EA_pi)
+
 /*
  * Predicated loads
  * Here is a primer to understand the tag names
@@ -191,9 +423,192 @@
 #define fGEN_TCG_L4_loadd_locked(SHORTCODE) \
     SHORTCODE
 #define fGEN_TCG_S2_storew_locked(SHORTCODE) \
-    do { SHORTCODE; READ_PREG(PdV, PdN); } while (0)
+    SHORTCODE
 #define fGEN_TCG_S4_stored_locked(SHORTCODE) \
-    do { SHORTCODE; READ_PREG(PdV, PdN); } while (0)
+    SHORTCODE
+
+#define fGEN_TCG_STORE(SHORTCODE) \
+    do { \
+        TCGv HALF = tcg_temp_new(); \
+        TCGv BYTE = tcg_temp_new(); \
+        SHORTCODE; \
+        tcg_temp_free(HALF); \
+        tcg_temp_free(BYTE); \
+    } while (0)
+
+#define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
+    do { \
+        TCGv ireg = tcg_temp_new(); \
+        TCGv HALF = tcg_temp_new(); \
+        TCGv BYTE = tcg_temp_new(); \
+        tcg_gen_mov_tl(EA, RxV); \
+        gen_read_ireg(ireg, MuV, SHIFT); \
+        gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
+        STORE; \
+        tcg_temp_free(ireg); \
+        tcg_temp_free(HALF); \
+        tcg_temp_free(BYTE); \
+    } while (0)
+
+#define fGEN_TCG_S2_storerb_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerb_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV)))
+
+#define fGEN_TCG_S2_storerh_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerh_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV)))
+
+#define fGEN_TCG_S2_storerf_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerf_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV)))
+
+#define fGEN_TCG_S2_storeri_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storeri_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV))
+
+#define fGEN_TCG_S2_storerd_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerd_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV))
+
+#define fGEN_TCG_S2_storerbnew_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN)))
+
+#define fGEN_TCG_S2_storerhnew_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN)))
+
+#define fGEN_TCG_S2_storerinew_pbr(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \
+    fGEN_TCG_STORE(SHORTCODE)
+#define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
+    fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
+
+/*
+ * Mathematical operations with more than one definition require
+ * special handling
+ */
+#define fGEN_TCG_A5_ACS(SHORTCODE) \
+    do { \
+        gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
+        gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
+    } while (0)
+
+/*
+ * Approximate reciprocal
+ * r3,p1 = sfrecipa(r0, r1)
+ *
+ * The helper packs the 2 32-bit results into a 64-bit value,
+ * so unpack them into the proper results.
+ */
+#define fGEN_TCG_F2_sfrecipa(SHORTCODE) \
+    do { \
+        TCGv_i64 tmp = tcg_temp_new_i64(); \
+        gen_helper_sfrecipa(tmp, cpu_env, RsV, RtV);  \
+        tcg_gen_extrh_i64_i32(RdV, tmp); \
+        tcg_gen_extrl_i64_i32(PeV, tmp); \
+        tcg_temp_free_i64(tmp); \
+    } while (0)
+
+/*
+ * Approximation of the reciprocal square root
+ * r1,p0 = sfinvsqrta(r0)
+ *
+ * The helper packs the 2 32-bit results into a 64-bit value,
+ * so unpack them into the proper results.
+ */
+#define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \
+    do { \
+        TCGv_i64 tmp = tcg_temp_new_i64(); \
+        gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \
+        tcg_gen_extrh_i64_i32(RdV, tmp); \
+        tcg_gen_extrl_i64_i32(PeV, tmp); \
+        tcg_temp_free_i64(tmp); \
+    } while (0)
+
+/*
+ * Add or subtract with carry.
+ * Predicate register is used as an extra input and output.
+ * r5:4 = add(r1:0, r3:2, p1):carry
+ */
+#define fGEN_TCG_A4_addp_c(SHORTCODE) \
+    do { \
+        TCGv_i64 carry = tcg_temp_new_i64(); \
+        TCGv_i64 zero = tcg_constant_i64(0); \
+        tcg_gen_extu_i32_i64(carry, PxV); \
+        tcg_gen_andi_i64(carry, carry, 1); \
+        tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
+        tcg_gen_add2_i64(RddV, carry, RddV, carry, RttV, zero); \
+        tcg_gen_extrl_i64_i32(PxV, carry); \
+        gen_8bitsof(PxV, PxV); \
+        tcg_temp_free_i64(carry); \
+    } while (0)
+
+/* r5:4 = sub(r1:0, r3:2, p1):carry */
+#define fGEN_TCG_A4_subp_c(SHORTCODE) \
+    do { \
+        TCGv_i64 carry = tcg_temp_new_i64(); \
+        TCGv_i64 zero = tcg_constant_i64(0); \
+        TCGv_i64 not_RttV = tcg_temp_new_i64(); \
+        tcg_gen_extu_i32_i64(carry, PxV); \
+        tcg_gen_andi_i64(carry, carry, 1); \
+        tcg_gen_not_i64(not_RttV, RttV); \
+        tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
+        tcg_gen_add2_i64(RddV, carry, RddV, carry, not_RttV, zero); \
+        tcg_gen_extrl_i64_i32(PxV, carry); \
+        gen_8bitsof(PxV, PxV); \
+        tcg_temp_free_i64(carry); \
+        tcg_temp_free_i64(not_RttV); \
+    } while (0)
+
+/*
+ * Compare each of the 8 unsigned bytes
+ * The minimum is placed in each byte of the destination.
+ * Each bit of the predicate is set true if the bit from the first operand
+ * is greater than the bit from the second operand.
+ * r5:4,p1 = vminub(r1:0, r3:2)
+ */
+#define fGEN_TCG_A6_vminub_RdP(SHORTCODE) \
+    do { \
+        TCGv left = tcg_temp_new(); \
+        TCGv right = tcg_temp_new(); \
+        TCGv tmp = tcg_temp_new(); \
+        tcg_gen_movi_tl(PeV, 0); \
+        tcg_gen_movi_i64(RddV, 0); \
+        for (int i = 0; i < 8; i++) { \
+            gen_get_byte_i64(left, i, RttV, false); \
+            gen_get_byte_i64(right, i, RssV, false); \
+            tcg_gen_setcond_tl(TCG_COND_GT, tmp, left, right); \
+            tcg_gen_deposit_tl(PeV, PeV, tmp, i, 1); \
+            tcg_gen_umin_tl(tmp, left, right); \
+            gen_set_byte_i64(i, RddV, tmp); \
+        } \
+        tcg_temp_free(left); \
+        tcg_temp_free(right); \
+        tcg_temp_free(tmp); \
+    } while (0)
 
 /* Floating point */
 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
@@ -266,9 +681,8 @@
     gen_helper_sfmin(RdV, cpu_env, RsV, RtV)
 #define fGEN_TCG_F2_sfclass(SHORTCODE) \
     do { \
-        TCGv imm = tcg_const_tl(uiV); \
+        TCGv imm = tcg_constant_tl(uiV); \
         gen_helper_sfclass(PdV, cpu_env, RsV, imm); \
-        tcg_temp_free(imm); \
     } while (0)
 #define fGEN_TCG_F2_sffixupn(SHORTCODE) \
     gen_helper_sffixupn(RdV, cpu_env, RsV, RtV)
@@ -294,9 +708,8 @@
     gen_helper_dfcmpuo(PdV, cpu_env, RssV, RttV)
 #define fGEN_TCG_F2_dfclass(SHORTCODE) \
     do { \
-        TCGv imm = tcg_const_tl(uiV); \
+        TCGv imm = tcg_constant_tl(uiV); \
         gen_helper_dfclass(PdV, cpu_env, RssV, imm); \
-        tcg_temp_free(imm); \
     } while (0)
 #define fGEN_TCG_F2_sfmpy(SHORTCODE) \
     gen_helper_sfmpy(RdV, cpu_env, RsV, RtV)
@@ -316,4 +729,15 @@
 #define fGEN_TCG_F2_dfmpyhh(SHORTCODE) \
     gen_helper_dfmpyhh(RxxV, cpu_env, RxxV, RssV, RttV)
 
+/* Nothing to do for these in qemu, need to suppress compiler warnings */
+#define fGEN_TCG_Y4_l2fetch(SHORTCODE) \
+    do { \
+        RsV = RsV; \
+        RtV = RtV; \
+    } while (0)
+#define fGEN_TCG_Y5_l2fetch(SHORTCODE) \
+    do { \
+        RsV = RsV; \
+    } while (0)
+
 #endif
diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py
index db9f663a778..1fd9de95d56 100755
--- a/target/hexagon/gen_tcg_funcs.py
+++ b/target/hexagon/gen_tcg_funcs.py
@@ -119,10 +119,95 @@ def genptr_decl(f, tag, regtype, regid, regno):
                 (regtype, regid, regtype, regid))
         else:
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "V"):
+        if (regid in {"dd"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" %\
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" %\
+                 (regtype, regid))
+            if (hex_common.is_tmp_result(tag)):
+                f.write("        ctx_tmp_vreg_off(ctx, %s%sN, 2, true);\n" % \
+                     (regtype, regid))
+            else:
+                f.write("        ctx_future_vreg_off(ctx, %s%sN," % \
+                     (regtype, regid))
+                f.write(" 2, true);\n")
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                    (regtype, regid, regtype, regid))
+        elif (regid in {"uu", "vv", "xx"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" % \
+                 (regtype, regid))
+            f.write("        offsetof(CPUHexagonState, %s%sV);\n" % \
+                 (regtype, regid))
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                    (regtype, regid, regtype, regid))
+        elif (regid in {"s", "u", "v", "w"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" % \
+                              (regtype, regid))
+            f.write("        vreg_src_off(ctx, %s%sN);\n" % \
+                              (regtype, regid))
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+        elif (regid in {"d", "x", "y"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" % \
+                (regtype, regid))
+            if (hex_common.is_tmp_result(tag)):
+                f.write("        ctx_tmp_vreg_off(ctx, %s%sN, 1, true);\n" % \
+                    (regtype, regid))
+            else:
+                f.write("        ctx_future_vreg_off(ctx, %s%sN," % \
+                    (regtype, regid))
+                f.write(" 1, true);\n");
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                    (regtype, regid, regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "Q"):
+        if (regid in {"d", "e", "x"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" % \
+                (regtype, regid))
+            f.write("        offsetof(CPUHexagonState,\n")
+            f.write("                 future_QRegs[%s%sN]);\n" % \
+                (regtype, regid))
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                    (regtype, regid, regtype, regid))
+        elif (regid in {"s", "t", "u", "v"}):
+            f.write("    const int %s%sN = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            f.write("    const intptr_t %s%sV_off =\n" %\
+                (regtype, regid))
+            f.write("        offsetof(CPUHexagonState, QRegs[%s%sN]);\n" % \
+                (regtype, regid))
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    TCGv_ptr %s%sV = tcg_temp_new_ptr();\n" % \
+                    (regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
-def genptr_decl_new(f,regtype,regid,regno):
+def genptr_decl_new(f, tag, regtype, regid, regno):
     if (regtype == "N"):
         if (regid in {"s", "t"}):
             f.write("    TCGv %s%sN = hex_new_value[insn->regno[%d]];\n" % \
@@ -135,6 +220,21 @@ def genptr_decl_new(f,regtype,regid,regno):
                 (regtype, regid, regno))
         else:
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "O"):
+        if (regid == "s"):
+            f.write("    const intptr_t %s%sN_num = insn->regno[%d];\n" % \
+                (regtype, regid, regno))
+            if (hex_common.skip_qemu_helper(tag)):
+                f.write("    const intptr_t %s%sN_off =\n" % \
+                    (regtype, regid))
+                f.write("         ctx_future_vreg_off(ctx, %s%sN_num," % \
+                    (regtype, regid))
+                f.write(" 1, true);\n")
+            else:
+                f.write("    TCGv %s%sN = tcg_constant_tl(%s%sN_num);\n" % \
+                    (regtype, regid, regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
@@ -145,7 +245,7 @@ def genptr_decl_opn(f, tag, regtype, regid, toss, numregs, i):
         if hex_common.is_old_val(regtype, regid, tag):
             genptr_decl(f,tag, regtype, regid, i)
         elif hex_common.is_new_val(regtype, regid, tag):
-            genptr_decl_new(f,regtype,regid,i)
+            genptr_decl_new(f, tag, regtype, regid, i)
         else:
             print("Bad register parse: ",regtype,regid,toss,numregs)
     else:
@@ -159,7 +259,7 @@ def genptr_decl_imm(f,immlett):
     f.write("    int %s = insn->immed[%d];\n" % \
         (hex_common.imm_name(immlett), i))
 
-def genptr_free(f,regtype,regid,regno):
+def genptr_free(f, tag, regtype, regid, regno):
     if (regtype == "R"):
         if (regid in {"dd", "ss", "tt", "xx", "yy"}):
             f.write("    tcg_temp_free_i64(%s%sV);\n" % (regtype, regid))
@@ -182,33 +282,51 @@ def genptr_free(f,regtype,regid,regno):
     elif (regtype == "M"):
         if (regid != "u"):
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "V"):
+        if (regid in {"dd", "uu", "vv", "xx", \
+                      "d", "s", "u", "v", "w", "x", "y"}):
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    tcg_temp_free_ptr(%s%sV);\n" % \
+                    (regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "Q"):
+        if (regid in {"d", "e", "s", "t", "u", "v", "x"}):
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    tcg_temp_free_ptr(%s%sV);\n" % \
+                    (regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
-def genptr_free_new(f,regtype,regid,regno):
+def genptr_free_new(f, tag, regtype, regid, regno):
     if (regtype == "N"):
         if (regid not in {"s", "t"}):
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
         if (regid not in {"t", "u", "v"}):
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "O"):
+        if (regid != "s"):
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
 def genptr_free_opn(f,regtype,regid,i,tag):
     if (hex_common.is_pair(regid)):
-        genptr_free(f,regtype,regid,i)
+        genptr_free(f, tag, regtype, regid, i)
     elif (hex_common.is_single(regid)):
         if hex_common.is_old_val(regtype, regid, tag):
-            genptr_free(f,regtype,regid,i)
+            genptr_free(f, tag, regtype, regid, i)
         elif hex_common.is_new_val(regtype, regid, tag):
-            genptr_free_new(f,regtype,regid,i)
+            genptr_free_new(f, tag, regtype, regid, i)
         else:
             print("Bad register parse: ",regtype,regid,toss,numregs)
     else:
         print("Bad register parse: ",regtype,regid,toss,numregs)
 
-def genptr_src_read(f,regtype,regid):
+def genptr_src_read(f, tag, regtype, regid):
     if (regtype == "R"):
         if (regid in {"ss", "tt", "xx", "yy"}):
             f.write("    tcg_gen_concat_i32_i64(%s%sV, hex_gpr[%s%sN],\n" % \
@@ -238,6 +356,47 @@ def genptr_src_read(f,regtype,regid):
     elif (regtype == "M"):
         if (regid != "u"):
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "V"):
+        if (regid in {"uu", "vv", "xx"}):
+            f.write("    tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
+                (regtype, regid))
+            f.write("        vreg_src_off(ctx, %s%sN),\n" % \
+                (regtype, regid))
+            f.write("        sizeof(MMVector), sizeof(MMVector));\n")
+            f.write("    tcg_gen_gvec_mov(MO_64,\n")
+            f.write("        %s%sV_off + sizeof(MMVector),\n" % \
+                (regtype, regid))
+            f.write("        vreg_src_off(ctx, %s%sN ^ 1),\n" % \
+                (regtype, regid))
+            f.write("        sizeof(MMVector), sizeof(MMVector));\n")
+        elif (regid in {"s", "u", "v", "w"}):
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                                 (regtype, regid, regtype, regid))
+        elif (regid in {"x", "y"}):
+            f.write("    tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
+                             (regtype, regid))
+            f.write("        vreg_src_off(ctx, %s%sN),\n" % \
+                             (regtype, regid))
+            f.write("        sizeof(MMVector), sizeof(MMVector));\n")
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                                 (regtype, regid, regtype, regid))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "Q"):
+        if (regid in {"s", "t", "u", "v"}):
+            if (not hex_common.skip_qemu_helper(tag)):
+                f.write("    tcg_gen_addi_ptr(%s%sV, cpu_env, %s%sV_off);\n" % \
+                    (regtype, regid, regtype, regid))
+        elif (regid in {"x"}):
+            f.write("    tcg_gen_gvec_mov(MO_64, %s%sV_off,\n" % \
+                (regtype, regid))
+            f.write("        offsetof(CPUHexagonState, QRegs[%s%sN]),\n" % \
+                (regtype, regid))
+            f.write("        sizeof(MMQReg), sizeof(MMQReg));\n")
+        else:
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
@@ -248,15 +407,18 @@ def genptr_src_read_new(f,regtype,regid):
     elif (regtype == "P"):
         if (regid not in {"t", "u", "v"}):
             print("Bad register parse: ", regtype, regid)
+    elif (regtype == "O"):
+        if (regid != "s"):
+            print("Bad register parse: ", regtype, regid)
     else:
         print("Bad register parse: ", regtype, regid)
 
 def genptr_src_read_opn(f,regtype,regid,tag):
     if (hex_common.is_pair(regid)):
-        genptr_src_read(f,regtype,regid)
+        genptr_src_read(f, tag, regtype, regid)
     elif (hex_common.is_single(regid)):
         if hex_common.is_old_val(regtype, regid, tag):
-            genptr_src_read(f,regtype,regid)
+            genptr_src_read(f, tag, regtype, regid)
         elif hex_common.is_new_val(regtype, regid, tag):
             genptr_src_read_new(f,regtype,regid)
         else:
@@ -279,15 +441,12 @@ def gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i):
         print("Bad register parse: ",regtype,regid,toss,numregs)
 
 def gen_helper_decl_imm(f,immlett):
-    f.write("    TCGv tcgv_%s = tcg_const_tl(%s);\n" % \
+    f.write("    TCGv tcgv_%s = tcg_constant_tl(%s);\n" % \
         (hex_common.imm_name(immlett), hex_common.imm_name(immlett)))
 
 def gen_helper_call_imm(f,immlett):
     f.write(", tcgv_%s" % hex_common.imm_name(immlett))
 
-def gen_helper_free_imm(f,immlett):
-    f.write("    tcg_temp_free(tcgv_%s);\n" % hex_common.imm_name(immlett))
-
 def genptr_dst_write_pair(f, tag, regtype, regid):
     if ('A_CONDEXEC' in hex_common.attribdict[tag]):
         f.write("    gen_log_predicated_reg_write_pair(%s%sN, %s%sV, insn->slot);\n" % \
@@ -316,7 +475,7 @@ def genptr_dst_write(f, tag, regtype, regid):
             print("Bad register parse: ", regtype, regid)
     elif (regtype == "P"):
         if (regid in {"d", "e", "x"}):
-            f.write("    gen_log_pred_write(%s%sN, %s%sV);\n" % \
+            f.write("    gen_log_pred_write(ctx, %s%sN, %s%sV);\n" % \
                 (regtype, regid, regtype, regid))
             f.write("    ctx_log_pred_write(ctx, %s%sN);\n" % \
                 (regtype, regid))
@@ -334,11 +493,68 @@ def genptr_dst_write(f, tag, regtype, regid):
     else:
         print("Bad register parse: ", regtype, regid)
 
+def genptr_dst_write_ext(f, tag, regtype, regid, newv="EXT_DFL"):
+    if (regtype == "V"):
+        if (regid in {"dd", "xx", "yy"}):
+            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
+                is_predicated = "true"
+            else:
+                is_predicated = "false"
+            f.write("    gen_log_vreg_write_pair(ctx, %s%sV_off, %s%sN, " % \
+                (regtype, regid, regtype, regid))
+            f.write("%s, insn->slot, %s);\n" % \
+                (newv, is_predicated))
+            f.write("    ctx_log_vreg_write_pair(ctx, %s%sN, %s,\n" % \
+                (regtype, regid, newv))
+            f.write("        %s);\n" % (is_predicated))
+        elif (regid in {"d", "x", "y"}):
+            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
+                is_predicated = "true"
+            else:
+                is_predicated = "false"
+            f.write("    gen_log_vreg_write(ctx, %s%sV_off, %s%sN, %s, " % \
+                (regtype, regid, regtype, regid, newv))
+            f.write("insn->slot, %s);\n" % \
+                (is_predicated))
+            f.write("    ctx_log_vreg_write(ctx, %s%sN, %s, %s);\n" % \
+                (regtype, regid, newv, is_predicated))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    elif (regtype == "Q"):
+        if (regid in {"d", "e", "x"}):
+            if ('A_CONDEXEC' in hex_common.attribdict[tag]):
+                is_predicated = "true"
+            else:
+                is_predicated = "false"
+            f.write("    gen_log_qreg_write(%s%sV_off, %s%sN, %s, " % \
+                (regtype, regid, regtype, regid, newv))
+            f.write("insn->slot, %s);\n" % (is_predicated))
+            f.write("    ctx_log_qreg_write(ctx, %s%sN, %s);\n" % \
+                (regtype, regid, is_predicated))
+        else:
+            print("Bad register parse: ", regtype, regid)
+    else:
+        print("Bad register parse: ", regtype, regid)
+
 def genptr_dst_write_opn(f,regtype, regid, tag):
     if (hex_common.is_pair(regid)):
-        genptr_dst_write(f, tag, regtype, regid)
+        if (hex_common.is_hvx_reg(regtype)):
+            if (hex_common.is_tmp_result(tag)):
+                genptr_dst_write_ext(f, tag, regtype, regid, "EXT_TMP")
+            else:
+                genptr_dst_write_ext(f, tag, regtype, regid)
+        else:
+            genptr_dst_write(f, tag, regtype, regid)
     elif (hex_common.is_single(regid)):
-        genptr_dst_write(f, tag, regtype, regid)
+        if (hex_common.is_hvx_reg(regtype)):
+            if (hex_common.is_new_result(tag)):
+                genptr_dst_write_ext(f, tag, regtype, regid, "EXT_NEW")
+            if (hex_common.is_tmp_result(tag)):
+                genptr_dst_write_ext(f, tag, regtype, regid, "EXT_TMP")
+            else:
+                genptr_dst_write_ext(f, tag, regtype, regid, "EXT_DFL")
+        else:
+            genptr_dst_write(f, tag, regtype, regid)
     else:
         print("Bad register parse: ",regtype,regid,toss,numregs)
 
@@ -401,21 +617,32 @@ def gen_tcg_func(f, tag, regs, imms):
         for immlett,bits,immshift in imms:
             gen_helper_decl_imm(f,immlett)
         if hex_common.need_part1(tag):
-            f.write("    TCGv part1 = tcg_const_tl(insn->part1);\n")
+            f.write("    TCGv part1 = tcg_constant_tl(insn->part1);\n")
         if hex_common.need_slot(tag):
-            f.write("    TCGv slot = tcg_const_tl(insn->slot);\n")
+            f.write("    TCGv slot = tcg_constant_tl(insn->slot);\n")
         f.write("    gen_helper_%s(" % (tag))
         i=0
         ## If there is a scalar result, it is the return type
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_written(regid)):
+                if (hex_common.is_hvx_reg(regtype)):
+                    continue
                 gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i)
                 i += 1
         if (i > 0): f.write(", ")
         f.write("cpu_env")
         i=1
+        for regtype,regid,toss,numregs in regs:
+            if (hex_common.is_written(regid)):
+                if (not hex_common.is_hvx_reg(regtype)):
+                    continue
+                gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i)
+                i += 1
         for regtype,regid,toss,numregs in regs:
             if (hex_common.is_read(regid)):
+                if (hex_common.is_hvx_reg(regtype) and
+                    hex_common.is_readwrite(regid)):
+                    continue
                 gen_helper_call_opn(f, tag, regtype, regid, toss, numregs, i)
                 i += 1
         for immlett,bits,immshift in imms:
@@ -424,12 +651,6 @@ def gen_tcg_func(f, tag, regs, imms):
         if hex_common.need_slot(tag): f.write(", slot")
         if hex_common.need_part1(tag): f.write(", part1" )
         f.write(");\n")
-        if hex_common.need_slot(tag):
-            f.write("    tcg_temp_free(slot);\n")
-        if hex_common.need_part1(tag):
-            f.write("    tcg_temp_free(part1);\n")
-        for immlett,bits,immshift in imms:
-            gen_helper_free_imm(f,immlett)
 
     ## Write all the outputs
     for regtype,regid,toss,numregs in regs:
@@ -454,11 +675,12 @@ def main():
     hex_common.read_semantics_file(sys.argv[1])
     hex_common.read_attribs_file(sys.argv[2])
     hex_common.read_overrides_file(sys.argv[3])
+    hex_common.read_overrides_file(sys.argv[4])
     hex_common.calculate_attribs()
     tagregs = hex_common.get_tagregs()
     tagimms = hex_common.get_tagimms()
 
-    with open(sys.argv[4], 'w') as f:
+    with open(sys.argv[5], 'w') as f:
         f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
         f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
 
diff --git a/target/hexagon/gen_tcg_hvx.h b/target/hexagon/gen_tcg_hvx.h
new file mode 100644
index 00000000000..cdcc9382bb2
--- /dev/null
+++ b/target/hexagon/gen_tcg_hvx.h
@@ -0,0 +1,903 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_GEN_TCG_HVX_H
+#define HEXAGON_GEN_TCG_HVX_H
+
+/*
+ * Histogram instructions
+ *
+ * Note that these instructions operate directly on the vector registers
+ * and therefore happen after commit.
+ *
+ * The generate_<tag> function is called twice
+ *     The first time is during the normal TCG generation
+ *         ctx->pre_commit is true
+ *         In the masked cases, we save the mask to the qtmp temporary
+ *         Otherwise, there is nothing to do
+ *     The second call is at the end of gen_commit_packet
+ *         ctx->pre_commit is false
+ *         Generate the call to the helper
+ */
+
+static inline void assert_vhist_tmp(DisasContext *ctx)
+{
+    /* vhist instructions require exactly one .tmp to be defined */
+    g_assert(ctx->tmp_vregs_idx == 1);
+}
+
+#define fGEN_TCG_V6_vhist(SHORTCODE) \
+    if (!ctx->pre_commit) { \
+        assert_vhist_tmp(ctx); \
+        gen_helper_vhist(cpu_env); \
+    }
+#define fGEN_TCG_V6_vhistq(SHORTCODE) \
+    do { \
+        if (ctx->pre_commit) { \
+            intptr_t dstoff = offsetof(CPUHexagonState, qtmp); \
+            tcg_gen_gvec_mov(MO_64, dstoff, QvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            assert_vhist_tmp(ctx); \
+            gen_helper_vhistq(cpu_env); \
+        } \
+    } while (0)
+#define fGEN_TCG_V6_vwhist256(SHORTCODE) \
+    if (!ctx->pre_commit) { \
+        assert_vhist_tmp(ctx); \
+        gen_helper_vwhist256(cpu_env); \
+    }
+#define fGEN_TCG_V6_vwhist256q(SHORTCODE) \
+    do { \
+        if (ctx->pre_commit) { \
+            intptr_t dstoff = offsetof(CPUHexagonState, qtmp); \
+            tcg_gen_gvec_mov(MO_64, dstoff, QvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            assert_vhist_tmp(ctx); \
+            gen_helper_vwhist256q(cpu_env); \
+        } \
+    } while (0)
+#define fGEN_TCG_V6_vwhist256_sat(SHORTCODE) \
+    if (!ctx->pre_commit) { \
+        assert_vhist_tmp(ctx); \
+        gen_helper_vwhist256_sat(cpu_env); \
+    }
+#define fGEN_TCG_V6_vwhist256q_sat(SHORTCODE) \
+    do { \
+        if (ctx->pre_commit) { \
+            intptr_t dstoff = offsetof(CPUHexagonState, qtmp); \
+            tcg_gen_gvec_mov(MO_64, dstoff, QvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            assert_vhist_tmp(ctx); \
+            gen_helper_vwhist256q_sat(cpu_env); \
+        } \
+    } while (0)
+#define fGEN_TCG_V6_vwhist128(SHORTCODE) \
+    if (!ctx->pre_commit) { \
+        assert_vhist_tmp(ctx); \
+        gen_helper_vwhist128(cpu_env); \
+    }
+#define fGEN_TCG_V6_vwhist128q(SHORTCODE) \
+    do { \
+        if (ctx->pre_commit) { \
+            intptr_t dstoff = offsetof(CPUHexagonState, qtmp); \
+            tcg_gen_gvec_mov(MO_64, dstoff, QvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            assert_vhist_tmp(ctx); \
+            gen_helper_vwhist128q(cpu_env); \
+        } \
+    } while (0)
+#define fGEN_TCG_V6_vwhist128m(SHORTCODE) \
+    if (!ctx->pre_commit) { \
+        TCGv tcgv_uiV = tcg_constant_tl(uiV); \
+        assert_vhist_tmp(ctx); \
+        gen_helper_vwhist128m(cpu_env, tcgv_uiV); \
+    }
+#define fGEN_TCG_V6_vwhist128qm(SHORTCODE) \
+    do { \
+        if (ctx->pre_commit) { \
+            intptr_t dstoff = offsetof(CPUHexagonState, qtmp); \
+            tcg_gen_gvec_mov(MO_64, dstoff, QvV_off, \
+                             sizeof(MMVector), sizeof(MMVector)); \
+        } else { \
+            TCGv tcgv_uiV = tcg_constant_tl(uiV); \
+            assert_vhist_tmp(ctx); \
+            gen_helper_vwhist128qm(cpu_env, tcgv_uiV); \
+        } \
+    } while (0)
+
+
+#define fGEN_TCG_V6_vassign(SHORTCODE) \
+    tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+/* Vector conditional move */
+#define fGEN_TCG_VEC_CMOV(PRED) \
+    do { \
+        TCGv lsb = tcg_temp_new(); \
+        TCGLabel *false_label = gen_new_label(); \
+        TCGLabel *end_label = gen_new_label(); \
+        tcg_gen_andi_tl(lsb, PsV, 1); \
+        tcg_gen_brcondi_tl(TCG_COND_NE, lsb, PRED, false_label); \
+        tcg_temp_free(lsb); \
+        tcg_gen_gvec_mov(MO_64, VdV_off, VuV_off, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        tcg_gen_br(end_label); \
+        gen_set_label(false_label); \
+        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
+                       1 << insn->slot); \
+        gen_set_label(end_label); \
+    } while (0)
+
+
+/* Vector conditional move (true) */
+#define fGEN_TCG_V6_vcmov(SHORTCODE) \
+    fGEN_TCG_VEC_CMOV(1)
+
+/* Vector conditional move (false) */
+#define fGEN_TCG_V6_vncmov(SHORTCODE) \
+    fGEN_TCG_VEC_CMOV(0)
+
+/* Vector add - various forms */
+#define fGEN_TCG_V6_vaddb(SHORTCODE) \
+    tcg_gen_gvec_add(MO_8, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vaddh(SHORTCYDE) \
+    tcg_gen_gvec_add(MO_16, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vaddw(SHORTCODE) \
+    tcg_gen_gvec_add(MO_32, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vaddb_dv(SHORTCODE) \
+    tcg_gen_gvec_add(MO_8, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+#define fGEN_TCG_V6_vaddh_dv(SHORTCYDE) \
+    tcg_gen_gvec_add(MO_16, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+#define fGEN_TCG_V6_vaddw_dv(SHORTCODE) \
+    tcg_gen_gvec_add(MO_32, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+/* Vector sub - various forms */
+#define fGEN_TCG_V6_vsubb(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_8, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vsubh(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_16, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vsubw(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_32, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vsubb_dv(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_8, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+#define fGEN_TCG_V6_vsubh_dv(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_16, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+#define fGEN_TCG_V6_vsubw_dv(SHORTCODE) \
+    tcg_gen_gvec_sub(MO_32, VddV_off, VuuV_off, VvvV_off, \
+                     sizeof(MMVector) * 2, sizeof(MMVector) * 2)
+
+/* Vector shift right - various forms */
+#define fGEN_TCG_V6_vasrh(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 15); \
+        tcg_gen_gvec_sars(MO_16, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vasrh_acc(SHORTCODE) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 15); \
+        tcg_gen_gvec_sars(MO_16, tmpoff, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_gen_gvec_add(MO_16, VxV_off, VxV_off, tmpoff, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vasrw(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 31); \
+        tcg_gen_gvec_sars(MO_32, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vasrw_acc(SHORTCODE) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 31); \
+        tcg_gen_gvec_sars(MO_32, tmpoff, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_gen_gvec_add(MO_32, VxV_off, VxV_off, tmpoff, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vlsrb(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 7); \
+        tcg_gen_gvec_shrs(MO_8, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vlsrh(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 15); \
+        tcg_gen_gvec_shrs(MO_16, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vlsrw(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 31); \
+        tcg_gen_gvec_shrs(MO_32, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+/* Vector shift left - various forms */
+#define fGEN_TCG_V6_vaslb(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 7); \
+        tcg_gen_gvec_shls(MO_8, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vaslh(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 15); \
+        tcg_gen_gvec_shls(MO_16, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vaslh_acc(SHORTCODE) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 15); \
+        tcg_gen_gvec_shls(MO_16, tmpoff, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_gen_gvec_add(MO_16, VxV_off, VxV_off, tmpoff, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vaslw(SHORTCODE) \
+    do { \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 31); \
+        tcg_gen_gvec_shls(MO_32, VdV_off, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+#define fGEN_TCG_V6_vaslw_acc(SHORTCODE) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        TCGv shift = tcg_temp_new(); \
+        tcg_gen_andi_tl(shift, RtV, 31); \
+        tcg_gen_gvec_shls(MO_32, tmpoff, VuV_off, shift, \
+                          sizeof(MMVector), sizeof(MMVector)); \
+        tcg_gen_gvec_add(MO_32, VxV_off, VxV_off, tmpoff, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        tcg_temp_free(shift); \
+    } while (0)
+
+/* Vector max - various forms */
+#define fGEN_TCG_V6_vmaxw(SHORTCODE) \
+    tcg_gen_gvec_smax(MO_32, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vmaxh(SHORTCODE) \
+    tcg_gen_gvec_smax(MO_16, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vmaxuh(SHORTCODE) \
+    tcg_gen_gvec_umax(MO_16, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vmaxb(SHORTCODE) \
+    tcg_gen_gvec_smax(MO_8, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vmaxub(SHORTCODE) \
+    tcg_gen_gvec_umax(MO_8, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+
+/* Vector min - various forms */
+#define fGEN_TCG_V6_vminw(SHORTCODE) \
+    tcg_gen_gvec_smin(MO_32, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vminh(SHORTCODE) \
+    tcg_gen_gvec_smin(MO_16, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vminuh(SHORTCODE) \
+    tcg_gen_gvec_umin(MO_16, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vminb(SHORTCODE) \
+    tcg_gen_gvec_smin(MO_8, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+#define fGEN_TCG_V6_vminub(SHORTCODE) \
+    tcg_gen_gvec_umin(MO_8, VdV_off, VuV_off, VvV_off, \
+                      sizeof(MMVector), sizeof(MMVector))
+
+/* Vector logical ops */
+#define fGEN_TCG_V6_vxor(SHORTCODE) \
+    tcg_gen_gvec_xor(MO_64, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vand(SHORTCODE) \
+    tcg_gen_gvec_and(MO_64, VdV_off, VuV_off, VvV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vor(SHORTCODE) \
+    tcg_gen_gvec_or(MO_64, VdV_off, VuV_off, VvV_off, \
+                    sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vnot(SHORTCODE) \
+    tcg_gen_gvec_not(MO_64, VdV_off, VuV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+/* Q register logical ops */
+#define fGEN_TCG_V6_pred_or(SHORTCODE) \
+    tcg_gen_gvec_or(MO_64, QdV_off, QsV_off, QtV_off, \
+                    sizeof(MMQReg), sizeof(MMQReg))
+
+#define fGEN_TCG_V6_pred_and(SHORTCODE) \
+    tcg_gen_gvec_and(MO_64, QdV_off, QsV_off, QtV_off, \
+                     sizeof(MMQReg), sizeof(MMQReg))
+
+#define fGEN_TCG_V6_pred_xor(SHORTCODE) \
+    tcg_gen_gvec_xor(MO_64, QdV_off, QsV_off, QtV_off, \
+                     sizeof(MMQReg), sizeof(MMQReg))
+
+#define fGEN_TCG_V6_pred_or_n(SHORTCODE) \
+    tcg_gen_gvec_orc(MO_64, QdV_off, QsV_off, QtV_off, \
+                     sizeof(MMQReg), sizeof(MMQReg))
+
+#define fGEN_TCG_V6_pred_and_n(SHORTCODE) \
+    tcg_gen_gvec_andc(MO_64, QdV_off, QsV_off, QtV_off, \
+                      sizeof(MMQReg), sizeof(MMQReg))
+
+#define fGEN_TCG_V6_pred_not(SHORTCODE) \
+    tcg_gen_gvec_not(MO_64, QdV_off, QsV_off, \
+                     sizeof(MMQReg), sizeof(MMQReg))
+
+/* Vector compares */
+#define fGEN_TCG_VEC_CMP(COND, TYPE, SIZE) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        tcg_gen_gvec_cmp(COND, TYPE, tmpoff, VuV_off, VvV_off, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        vec_to_qvec(SIZE, QdV_off, tmpoff); \
+    } while (0)
+
+#define fGEN_TCG_V6_vgtw(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GT, MO_32, 4)
+#define fGEN_TCG_V6_vgth(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GT, MO_16, 2)
+#define fGEN_TCG_V6_vgtb(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GT, MO_8, 1)
+
+#define fGEN_TCG_V6_vgtuw(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GTU, MO_32, 4)
+#define fGEN_TCG_V6_vgtuh(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GTU, MO_16, 2)
+#define fGEN_TCG_V6_vgtub(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_GTU, MO_8, 1)
+
+#define fGEN_TCG_V6_veqw(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_EQ, MO_32, 4)
+#define fGEN_TCG_V6_veqh(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_EQ, MO_16, 2)
+#define fGEN_TCG_V6_veqb(SHORTCODE) \
+    fGEN_TCG_VEC_CMP(TCG_COND_EQ, MO_8, 1)
+
+#define fGEN_TCG_VEC_CMP_OP(COND, TYPE, SIZE, OP) \
+    do { \
+        intptr_t tmpoff = offsetof(CPUHexagonState, vtmp); \
+        intptr_t qoff = offsetof(CPUHexagonState, qtmp); \
+        tcg_gen_gvec_cmp(COND, TYPE, tmpoff, VuV_off, VvV_off, \
+                         sizeof(MMVector), sizeof(MMVector)); \
+        vec_to_qvec(SIZE, qoff, tmpoff); \
+        OP(MO_64, QxV_off, QxV_off, qoff, sizeof(MMQReg), sizeof(MMQReg)); \
+    } while (0)
+
+#define fGEN_TCG_V6_vgtw_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_32, 4, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgtw_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_32, 4, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgtw_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_32, 4, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_vgtuw_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_32, 4, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgtuw_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_32, 4, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgtuw_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_32, 4, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_vgth_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_16, 2, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgth_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_16, 2, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgth_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_16, 2, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_vgtuh_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_16, 2, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgtuh_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_16, 2, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgtuh_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_16, 2, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_vgtb_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_8, 1, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgtb_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_8, 1, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgtb_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GT, MO_8, 1, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_vgtub_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_8, 1, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_vgtub_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_8, 1, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_vgtub_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_GTU, MO_8, 1, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_veqw_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_32, 4, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_veqw_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_32, 4, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_veqw_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_32, 4, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_veqh_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_16, 2, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_veqh_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_16, 2, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_veqh_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_16, 2, tcg_gen_gvec_xor)
+
+#define fGEN_TCG_V6_veqb_and(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_8, 1, tcg_gen_gvec_and)
+#define fGEN_TCG_V6_veqb_or(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_8, 1, tcg_gen_gvec_or)
+#define fGEN_TCG_V6_veqb_xor(SHORTCODE) \
+    fGEN_TCG_VEC_CMP_OP(TCG_COND_EQ, MO_8, 1, tcg_gen_gvec_xor)
+
+/* Vector splat - various forms */
+#define fGEN_TCG_V6_lvsplatw(SHORTCODE) \
+    tcg_gen_gvec_dup_i32(MO_32, VdV_off, \
+                         sizeof(MMVector), sizeof(MMVector), RtV)
+
+#define fGEN_TCG_V6_lvsplath(SHORTCODE) \
+    tcg_gen_gvec_dup_i32(MO_16, VdV_off, \
+                         sizeof(MMVector), sizeof(MMVector), RtV)
+
+#define fGEN_TCG_V6_lvsplatb(SHORTCODE) \
+    tcg_gen_gvec_dup_i32(MO_8, VdV_off, \
+                         sizeof(MMVector), sizeof(MMVector), RtV)
+
+/* Vector absolute value - various forms */
+#define fGEN_TCG_V6_vabsb(SHORTCODE) \
+    tcg_gen_gvec_abs(MO_8, VdV_off, VuV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vabsh(SHORTCODE) \
+    tcg_gen_gvec_abs(MO_16, VdV_off, VuV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+#define fGEN_TCG_V6_vabsw(SHORTCODE) \
+    tcg_gen_gvec_abs(MO_32, VdV_off, VuV_off, \
+                     sizeof(MMVector), sizeof(MMVector))
+
+/* Vector loads */
+#define fGEN_TCG_V6_vL32b_pi(SHORTCODE)                    SHORTCODE
+#define fGEN_TCG_V6_vL32Ub_pi(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vL32b_cur_pi(SHORTCODE)                SHORTCODE
+#define fGEN_TCG_V6_vL32b_tmp_pi(SHORTCODE)                SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_pi(SHORTCODE)                 SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_cur_pi(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_tmp_pi(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vL32b_ai(SHORTCODE)                    SHORTCODE
+#define fGEN_TCG_V6_vL32Ub_ai(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vL32b_cur_ai(SHORTCODE)                SHORTCODE
+#define fGEN_TCG_V6_vL32b_tmp_ai(SHORTCODE)                SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_ai(SHORTCODE)                 SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_cur_ai(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_tmp_ai(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vL32b_ppu(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vL32Ub_ppu(SHORTCODE)                  SHORTCODE
+#define fGEN_TCG_V6_vL32b_cur_ppu(SHORTCODE)               SHORTCODE
+#define fGEN_TCG_V6_vL32b_tmp_ppu(SHORTCODE)               SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_ppu(SHORTCODE)                SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_cur_ppu(SHORTCODE)            SHORTCODE
+#define fGEN_TCG_V6_vL32b_nt_tmp_ppu(SHORTCODE)            SHORTCODE
+
+/* Predicated vector loads */
+#define fGEN_TCG_PRED_VEC_LOAD(GET_EA, PRED, DSTOFF, INC) \
+    do { \
+        TCGv LSB = tcg_temp_new(); \
+        TCGLabel *false_label = gen_new_label(); \
+        TCGLabel *end_label = gen_new_label(); \
+        GET_EA; \
+        PRED; \
+        tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
+        tcg_temp_free(LSB); \
+        gen_vreg_load(ctx, DSTOFF, EA, true); \
+        INC; \
+        tcg_gen_br(end_label); \
+        gen_set_label(false_label); \
+        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
+                       1 << insn->slot); \
+        gen_set_label(end_label); \
+    } while (0)
+
+#define fGEN_TCG_PRED_VEC_LOAD_pred_pi \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLD(PvV), \
+                           fEA_REG(RxV), \
+                           VdV_off, \
+                           fPM_I(RxV, siV * sizeof(MMVector)))
+#define fGEN_TCG_PRED_VEC_LOAD_npred_pi \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLDNOT(PvV), \
+                           fEA_REG(RxV), \
+                           VdV_off, \
+                           fPM_I(RxV, siV * sizeof(MMVector)))
+
+#define fGEN_TCG_V6_vL32b_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+#define fGEN_TCG_V6_vL32b_cur_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_cur_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+#define fGEN_TCG_V6_vL32b_tmp_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_tmp_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+#define fGEN_TCG_V6_vL32b_nt_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_nt_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+#define fGEN_TCG_V6_vL32b_nt_cur_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_nt_cur_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+#define fGEN_TCG_V6_vL32b_nt_tmp_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_pi
+#define fGEN_TCG_V6_vL32b_nt_tmp_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_pi
+
+#define fGEN_TCG_PRED_VEC_LOAD_pred_ai \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLD(PvV), \
+                           fEA_RI(RtV, siV * sizeof(MMVector)), \
+                           VdV_off, \
+                           do {} while (0))
+#define fGEN_TCG_PRED_VEC_LOAD_npred_ai \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLDNOT(PvV), \
+                           fEA_RI(RtV, siV * sizeof(MMVector)), \
+                           VdV_off, \
+                           do {} while (0))
+
+#define fGEN_TCG_V6_vL32b_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+#define fGEN_TCG_V6_vL32b_cur_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_cur_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+#define fGEN_TCG_V6_vL32b_tmp_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_tmp_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+#define fGEN_TCG_V6_vL32b_nt_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_nt_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+#define fGEN_TCG_V6_vL32b_nt_cur_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_nt_cur_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+#define fGEN_TCG_V6_vL32b_nt_tmp_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ai
+#define fGEN_TCG_V6_vL32b_nt_tmp_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ai
+
+#define fGEN_TCG_PRED_VEC_LOAD_pred_ppu \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLD(PvV), \
+                           fEA_REG(RxV), \
+                           VdV_off, \
+                           fPM_M(RxV, MuV))
+#define fGEN_TCG_PRED_VEC_LOAD_npred_ppu \
+    fGEN_TCG_PRED_VEC_LOAD(fLSBOLDNOT(PvV), \
+                           fEA_REG(RxV), \
+                           VdV_off, \
+                           fPM_M(RxV, MuV))
+
+#define fGEN_TCG_V6_vL32b_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+#define fGEN_TCG_V6_vL32b_cur_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_cur_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+#define fGEN_TCG_V6_vL32b_tmp_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_tmp_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+#define fGEN_TCG_V6_vL32b_nt_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_nt_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+#define fGEN_TCG_V6_vL32b_nt_cur_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_nt_cur_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+#define fGEN_TCG_V6_vL32b_nt_tmp_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_pred_ppu
+#define fGEN_TCG_V6_vL32b_nt_tmp_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_LOAD_npred_ppu
+
+/* Vector stores */
+#define fGEN_TCG_V6_vS32b_pi(SHORTCODE)                    SHORTCODE
+#define fGEN_TCG_V6_vS32Ub_pi(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_pi(SHORTCODE)                 SHORTCODE
+#define fGEN_TCG_V6_vS32b_ai(SHORTCODE)                    SHORTCODE
+#define fGEN_TCG_V6_vS32Ub_ai(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_ai(SHORTCODE)                 SHORTCODE
+#define fGEN_TCG_V6_vS32b_ppu(SHORTCODE)                   SHORTCODE
+#define fGEN_TCG_V6_vS32Ub_ppu(SHORTCODE)                  SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_ppu(SHORTCODE)                SHORTCODE
+
+/* New value vector stores */
+#define fGEN_TCG_NEWVAL_VEC_STORE(GET_EA, INC) \
+    do { \
+        GET_EA; \
+        gen_vreg_store(ctx, insn, pkt, EA, OsN_off, insn->slot, true); \
+        INC; \
+    } while (0)
+
+#define fGEN_TCG_NEWVAL_VEC_STORE_pi \
+    fGEN_TCG_NEWVAL_VEC_STORE(fEA_REG(RxV), fPM_I(RxV, siV * sizeof(MMVector)))
+
+#define fGEN_TCG_V6_vS32b_new_pi(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_pi
+#define fGEN_TCG_V6_vS32b_nt_new_pi(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_pi
+
+#define fGEN_TCG_NEWVAL_VEC_STORE_ai \
+    fGEN_TCG_NEWVAL_VEC_STORE(fEA_RI(RtV, siV * sizeof(MMVector)), \
+                              do { } while (0))
+
+#define fGEN_TCG_V6_vS32b_new_ai(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_ai
+#define fGEN_TCG_V6_vS32b_nt_new_ai(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_ai
+
+#define fGEN_TCG_NEWVAL_VEC_STORE_ppu \
+    fGEN_TCG_NEWVAL_VEC_STORE(fEA_REG(RxV), fPM_M(RxV, MuV))
+
+#define fGEN_TCG_V6_vS32b_new_ppu(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_ppu
+#define fGEN_TCG_V6_vS32b_nt_new_ppu(SHORTCODE) \
+    fGEN_TCG_NEWVAL_VEC_STORE_ppu
+
+/* Predicated vector stores */
+#define fGEN_TCG_PRED_VEC_STORE(GET_EA, PRED, SRCOFF, ALIGN, INC) \
+    do { \
+        TCGv LSB = tcg_temp_new(); \
+        TCGLabel *false_label = gen_new_label(); \
+        TCGLabel *end_label = gen_new_label(); \
+        GET_EA; \
+        PRED; \
+        tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, false_label); \
+        tcg_temp_free(LSB); \
+        gen_vreg_store(ctx, insn, pkt, EA, SRCOFF, insn->slot, ALIGN); \
+        INC; \
+        tcg_gen_br(end_label); \
+        gen_set_label(false_label); \
+        tcg_gen_ori_tl(hex_slot_cancelled, hex_slot_cancelled, \
+                       1 << insn->slot); \
+        gen_set_label(end_label); \
+    } while (0)
+
+#define fGEN_TCG_PRED_VEC_STORE_pred_pi(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_REG(RxV), \
+                            VsV_off, ALIGN, \
+                            fPM_I(RxV, siV * sizeof(MMVector)))
+#define fGEN_TCG_PRED_VEC_STORE_npred_pi(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_REG(RxV), \
+                            VsV_off, ALIGN, \
+                            fPM_I(RxV, siV * sizeof(MMVector)))
+#define fGEN_TCG_PRED_VEC_STORE_new_pred_pi \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_REG(RxV), \
+                            OsN_off, true, \
+                            fPM_I(RxV, siV * sizeof(MMVector)))
+#define fGEN_TCG_PRED_VEC_STORE_new_npred_pi \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_REG(RxV), \
+                            OsN_off, true, \
+                            fPM_I(RxV, siV * sizeof(MMVector)))
+
+#define fGEN_TCG_V6_vS32b_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_pi(true)
+#define fGEN_TCG_V6_vS32b_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_pi(true)
+#define fGEN_TCG_V6_vS32Ub_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_pi(false)
+#define fGEN_TCG_V6_vS32Ub_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_pi(false)
+#define fGEN_TCG_V6_vS32b_nt_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_pi(true)
+#define fGEN_TCG_V6_vS32b_nt_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_pi(true)
+#define fGEN_TCG_V6_vS32b_new_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_pi
+#define fGEN_TCG_V6_vS32b_new_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_pi
+#define fGEN_TCG_V6_vS32b_nt_new_pred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_pi
+#define fGEN_TCG_V6_vS32b_nt_new_npred_pi(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_pi
+
+#define fGEN_TCG_PRED_VEC_STORE_pred_ai(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_RI(RtV, siV * sizeof(MMVector)), \
+                            VsV_off, ALIGN, \
+                            do { } while (0))
+#define fGEN_TCG_PRED_VEC_STORE_npred_ai(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_RI(RtV, siV * sizeof(MMVector)), \
+                            VsV_off, ALIGN, \
+                            do { } while (0))
+#define fGEN_TCG_PRED_VEC_STORE_new_pred_ai \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_RI(RtV, siV * sizeof(MMVector)), \
+                            OsN_off, true, \
+                            do { } while (0))
+#define fGEN_TCG_PRED_VEC_STORE_new_npred_ai \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_RI(RtV, siV * sizeof(MMVector)), \
+                            OsN_off, true, \
+                            do { } while (0))
+
+#define fGEN_TCG_V6_vS32b_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ai(true)
+#define fGEN_TCG_V6_vS32b_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ai(true)
+#define fGEN_TCG_V6_vS32Ub_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ai(false)
+#define fGEN_TCG_V6_vS32Ub_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ai(false)
+#define fGEN_TCG_V6_vS32b_nt_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ai(true)
+#define fGEN_TCG_V6_vS32b_nt_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ai(true)
+#define fGEN_TCG_V6_vS32b_new_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_ai
+#define fGEN_TCG_V6_vS32b_new_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_ai
+#define fGEN_TCG_V6_vS32b_nt_new_pred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_ai
+#define fGEN_TCG_V6_vS32b_nt_new_npred_ai(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_ai
+
+#define fGEN_TCG_PRED_VEC_STORE_pred_ppu(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_REG(RxV), \
+                            VsV_off, ALIGN, \
+                            fPM_M(RxV, MuV))
+#define fGEN_TCG_PRED_VEC_STORE_npred_ppu(ALIGN) \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_REG(RxV), \
+                            VsV_off, ALIGN, \
+                            fPM_M(RxV, MuV))
+#define fGEN_TCG_PRED_VEC_STORE_new_pred_ppu \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLD(PvV), \
+                            fEA_REG(RxV), \
+                            OsN_off, true, \
+                            fPM_M(RxV, MuV))
+#define fGEN_TCG_PRED_VEC_STORE_new_npred_ppu \
+    fGEN_TCG_PRED_VEC_STORE(fLSBOLDNOT(PvV), \
+                            fEA_REG(RxV), \
+                            OsN_off, true, \
+                            fPM_M(RxV, MuV))
+
+#define fGEN_TCG_V6_vS32b_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ppu(true)
+#define fGEN_TCG_V6_vS32b_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ppu(true)
+#define fGEN_TCG_V6_vS32Ub_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ppu(false)
+#define fGEN_TCG_V6_vS32Ub_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ppu(false)
+#define fGEN_TCG_V6_vS32b_nt_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_pred_ppu(true)
+#define fGEN_TCG_V6_vS32b_nt_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_npred_ppu(true)
+#define fGEN_TCG_V6_vS32b_new_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_ppu
+#define fGEN_TCG_V6_vS32b_new_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_ppu
+#define fGEN_TCG_V6_vS32b_nt_new_pred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_pred_ppu
+#define fGEN_TCG_V6_vS32b_nt_new_npred_ppu(SHORTCODE) \
+    fGEN_TCG_PRED_VEC_STORE_new_npred_ppu
+
+/* Masked vector stores */
+#define fGEN_TCG_V6_vS32b_qpred_pi(SHORTCODE)              SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_qpred_pi(SHORTCODE)           SHORTCODE
+#define fGEN_TCG_V6_vS32b_qpred_ai(SHORTCODE)              SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_qpred_ai(SHORTCODE)           SHORTCODE
+#define fGEN_TCG_V6_vS32b_qpred_ppu(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_qpred_ppu(SHORTCODE)          SHORTCODE
+#define fGEN_TCG_V6_vS32b_nqpred_pi(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_nqpred_pi(SHORTCODE)          SHORTCODE
+#define fGEN_TCG_V6_vS32b_nqpred_ai(SHORTCODE)             SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_nqpred_ai(SHORTCODE)          SHORTCODE
+#define fGEN_TCG_V6_vS32b_nqpred_ppu(SHORTCODE)            SHORTCODE
+#define fGEN_TCG_V6_vS32b_nt_nqpred_ppu(SHORTCODE)         SHORTCODE
+
+/* Store release not modelled in qemu, but need to suppress compiler warnings */
+#define fGEN_TCG_V6_vS32b_srls_pi(SHORTCODE) \
+    do { \
+        siV = siV; \
+    } while (0)
+#define fGEN_TCG_V6_vS32b_srls_ai(SHORTCODE) \
+    do { \
+        RtV = RtV; \
+        siV = siV; \
+    } while (0)
+#define fGEN_TCG_V6_vS32b_srls_ppu(SHORTCODE) \
+    do { \
+        MuV = MuV; \
+    } while (0)
+
+#endif
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 7481f4c1dd3..4419d30e236 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -15,87 +15,85 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#define QEMU_GENERATE
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "cpu.h"
 #include "internal.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
 #include "insn.h"
 #include "opcodes.h"
 #include "translate.h"
+#define QEMU_GENERATE       /* Used internally by macros.h */
 #include "macros.h"
+#include "mmvec/macros.h"
+#undef QEMU_GENERATE
 #include "gen_tcg.h"
-
-static inline TCGv gen_read_preg(TCGv pred, uint8_t num)
-{
-    tcg_gen_mov_tl(pred, hex_pred[num]);
-    return pred;
-}
+#include "gen_tcg_hvx.h"
 
 static inline void gen_log_predicated_reg_write(int rnum, TCGv val, int slot)
 {
-    TCGv one = tcg_const_tl(1);
-    TCGv zero = tcg_const_tl(0);
+    TCGv zero = tcg_constant_tl(0);
     TCGv slot_mask = tcg_temp_new();
 
     tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
     tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
                            val, hex_new_value[rnum]);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum], slot_mask, zero,
-                       one, hex_reg_written[rnum]);
-#endif
-
-    tcg_temp_free(one);
-    tcg_temp_free(zero);
+    if (HEX_DEBUG) {
+        /*
+         * Do this so HELPER(debug_commit_end) will know
+         *
+         * Note that slot_mask indicates the value is not written
+         * (i.e., slot was cancelled), so we create a true/false value before
+         * or'ing with hex_reg_written[rnum].
+         */
+        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
+        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
+    }
+
     tcg_temp_free(slot_mask);
 }
 
 static inline void gen_log_reg_write(int rnum, TCGv val)
 {
     tcg_gen_mov_tl(hex_new_value[rnum], val);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-#endif
+    if (HEX_DEBUG) {
+        /* Do this so HELPER(debug_commit_end) will know */
+        tcg_gen_movi_tl(hex_reg_written[rnum], 1);
+    }
 }
 
 static void gen_log_predicated_reg_write_pair(int rnum, TCGv_i64 val, int slot)
 {
     TCGv val32 = tcg_temp_new();
-    TCGv one = tcg_const_tl(1);
-    TCGv zero = tcg_const_tl(0);
+    TCGv zero = tcg_constant_tl(0);
     TCGv slot_mask = tcg_temp_new();
 
     tcg_gen_andi_tl(slot_mask, hex_slot_cancelled, 1 << slot);
     /* Low word */
     tcg_gen_extrl_i64_i32(val32, val);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum], slot_mask, zero,
-                       val32, hex_new_value[rnum]);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum],
+    tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum],
                        slot_mask, zero,
-                       one, hex_reg_written[rnum]);
-#endif
-
+                       val32, hex_new_value[rnum]);
     /* High word */
     tcg_gen_extrh_i64_i32(val32, val);
     tcg_gen_movcond_tl(TCG_COND_EQ, hex_new_value[rnum + 1],
                        slot_mask, zero,
                        val32, hex_new_value[rnum + 1]);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_reg_written[rnum + 1],
-                       slot_mask, zero,
-                       one, hex_reg_written[rnum + 1]);
-#endif
+    if (HEX_DEBUG) {
+        /*
+         * Do this so HELPER(debug_commit_end) will know
+         *
+         * Note that slot_mask indicates the value is not written
+         * (i.e., slot was cancelled), so we create a true/false value before
+         * or'ing with hex_reg_written[rnum].
+         */
+        tcg_gen_setcond_tl(TCG_COND_EQ, slot_mask, slot_mask, zero);
+        tcg_gen_or_tl(hex_reg_written[rnum], hex_reg_written[rnum], slot_mask);
+        tcg_gen_or_tl(hex_reg_written[rnum + 1], hex_reg_written[rnum + 1],
+                      slot_mask);
+    }
 
     tcg_temp_free(val32);
-    tcg_temp_free(one);
-    tcg_temp_free(zero);
     tcg_temp_free(slot_mask);
 }
 
@@ -103,39 +101,41 @@ static void gen_log_reg_write_pair(int rnum, TCGv_i64 val)
 {
     /* Low word */
     tcg_gen_extrl_i64_i32(hex_new_value[rnum], val);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movi_tl(hex_reg_written[rnum], 1);
-#endif
+    if (HEX_DEBUG) {
+        /* Do this so HELPER(debug_commit_end) will know */
+        tcg_gen_movi_tl(hex_reg_written[rnum], 1);
+    }
 
     /* High word */
     tcg_gen_extrh_i64_i32(hex_new_value[rnum + 1], val);
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
-#endif
+    if (HEX_DEBUG) {
+        /* Do this so HELPER(debug_commit_end) will know */
+        tcg_gen_movi_tl(hex_reg_written[rnum + 1], 1);
+    }
 }
 
-static inline void gen_log_pred_write(int pnum, TCGv val)
+static inline void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val)
 {
-    TCGv zero = tcg_const_tl(0);
     TCGv base_val = tcg_temp_new();
-    TCGv and_val = tcg_temp_new();
-    TCGv pred_written = tcg_temp_new();
 
-    /* Multiple writes to the same preg are and'ed together */
     tcg_gen_andi_tl(base_val, val, 0xff);
-    tcg_gen_and_tl(and_val, base_val, hex_new_pred_value[pnum]);
-    tcg_gen_andi_tl(pred_written, hex_pred_written, 1 << pnum);
-    tcg_gen_movcond_tl(TCG_COND_NE, hex_new_pred_value[pnum],
-                       pred_written, zero,
-                       and_val, base_val);
+
+    /*
+     * Section 6.1.3 of the Hexagon V67 Programmer's Reference Manual
+     *
+     * Multiple writes to the same preg are and'ed together
+     * If this is the first predicate write in the packet, do a
+     * straight assignment.  Otherwise, do an and.
+     */
+    if (!test_bit(pnum, ctx->pregs_written)) {
+        tcg_gen_mov_tl(hex_new_pred_value[pnum], base_val);
+    } else {
+        tcg_gen_and_tl(hex_new_pred_value[pnum],
+                       hex_new_pred_value[pnum], base_val);
+    }
     tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pnum);
 
-    tcg_temp_free(zero);
     tcg_temp_free(base_val);
-    tcg_temp_free(and_val);
-    tcg_temp_free(pred_written);
 }
 
 static inline void gen_read_p3_0(TCGv control_reg)
@@ -168,6 +168,9 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num,
     } else if (reg_num == HEX_REG_QEMU_INSN_CNT) {
         tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_INSN_CNT],
                         ctx->num_insns);
+    } else if (reg_num == HEX_REG_QEMU_HVX_CNT) {
+        tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT],
+                        ctx->num_hvx_insns);
     } else {
         tcg_gen_mov_tl(dest, hex_gpr[reg_num]);
     }
@@ -182,9 +185,8 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num,
         tcg_gen_concat_i32_i64(dest, p3_0, hex_gpr[reg_num + 1]);
         tcg_temp_free(p3_0);
     } else if (reg_num == HEX_REG_PC - 1) {
-        TCGv pc = tcg_const_tl(ctx->base.pc_next);
+        TCGv pc = tcg_constant_tl(ctx->base.pc_next);
         tcg_gen_concat_i32_i64(dest, hex_gpr[reg_num], pc);
-        tcg_temp_free(pc);
     } else if (reg_num == HEX_REG_QEMU_PKT_CNT) {
         TCGv pkt_cnt = tcg_temp_new();
         TCGv insn_cnt = tcg_temp_new();
@@ -195,6 +197,12 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num,
         tcg_gen_concat_i32_i64(dest, pkt_cnt, insn_cnt);
         tcg_temp_free(pkt_cnt);
         tcg_temp_free(insn_cnt);
+    } else if (reg_num == HEX_REG_QEMU_HVX_CNT) {
+        TCGv hvx_cnt = tcg_temp_new();
+        tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT],
+                        ctx->num_hvx_insns);
+        tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]);
+        tcg_temp_free(hvx_cnt);
     } else {
         tcg_gen_concat_i32_i64(dest,
             hex_gpr[reg_num],
@@ -230,6 +238,9 @@ static inline void gen_write_ctrl_reg(DisasContext *ctx, int reg_num,
         if (reg_num == HEX_REG_QEMU_INSN_CNT) {
             ctx->num_insns = 0;
         }
+        if (reg_num == HEX_REG_QEMU_HVX_CNT) {
+            ctx->num_hvx_insns = 0;
+        }
     }
 }
 
@@ -251,7 +262,65 @@ static inline void gen_write_ctrl_reg_pair(DisasContext *ctx, int reg_num,
             ctx->num_packets = 0;
             ctx->num_insns = 0;
         }
+        if (reg_num == HEX_REG_QEMU_HVX_CNT) {
+            ctx->num_hvx_insns = 0;
+        }
+    }
+}
+
+static TCGv gen_get_byte(TCGv result, int N, TCGv src, bool sign)
+{
+    if (sign) {
+        tcg_gen_sextract_tl(result, src, N * 8, 8);
+    } else {
+        tcg_gen_extract_tl(result, src, N * 8, 8);
+    }
+    return result;
+}
+
+static TCGv gen_get_byte_i64(TCGv result, int N, TCGv_i64 src, bool sign)
+{
+    TCGv_i64 res64 = tcg_temp_new_i64();
+    if (sign) {
+        tcg_gen_sextract_i64(res64, src, N * 8, 8);
+    } else {
+        tcg_gen_extract_i64(res64, src, N * 8, 8);
     }
+    tcg_gen_extrl_i64_i32(result, res64);
+    tcg_temp_free_i64(res64);
+
+    return result;
+}
+
+static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign)
+{
+    if (sign) {
+        tcg_gen_sextract_tl(result, src, N * 16, 16);
+    } else {
+        tcg_gen_extract_tl(result, src, N * 16, 16);
+    }
+    return result;
+}
+
+static inline void gen_set_half(int N, TCGv result, TCGv src)
+{
+    tcg_gen_deposit_tl(result, result, src, N * 16, 16);
+}
+
+static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src)
+{
+    TCGv_i64 src64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(src64, src);
+    tcg_gen_deposit_i64(result, result, src64, N * 16, 16);
+    tcg_temp_free_i64(src64);
+}
+
+static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src)
+{
+    TCGv_i64 src64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(src64, src);
+    tcg_gen_deposit_i64(result, result, src64, N * 8, 8);
+    tcg_temp_free_i64(src64);
 }
 
 static inline void gen_load_locked4u(TCGv dest, TCGv vaddr, int mem_index)
@@ -268,8 +337,7 @@ static inline void gen_load_locked8u(TCGv_i64 dest, TCGv vaddr, int mem_index)
     tcg_gen_mov_i64(hex_llsc_val_i64, dest);
 }
 
-static inline void gen_store_conditional4(CPUHexagonState *env,
-                                          DisasContext *ctx, int prednum,
+static inline void gen_store_conditional4(DisasContext *ctx,
                                           TCGv pred, TCGv vaddr, TCGv src)
 {
     TCGLabel *fail = gen_new_label();
@@ -278,15 +346,13 @@ static inline void gen_store_conditional4(CPUHexagonState *env,
 
     tcg_gen_brcond_tl(TCG_COND_NE, vaddr, hex_llsc_addr, fail);
 
-    one = tcg_const_tl(0xff);
-    zero = tcg_const_tl(0);
+    one = tcg_constant_tl(0xff);
+    zero = tcg_constant_tl(0);
     tmp = tcg_temp_new();
     tcg_gen_atomic_cmpxchg_tl(tmp, hex_llsc_addr, hex_llsc_val, src,
                               ctx->mem_idx, MO_32);
-    tcg_gen_movcond_tl(TCG_COND_EQ, hex_pred[prednum], tmp, hex_llsc_val,
+    tcg_gen_movcond_tl(TCG_COND_EQ, pred, tmp, hex_llsc_val,
                        one, zero);
-    tcg_temp_free(one);
-    tcg_temp_free(zero);
     tcg_temp_free(tmp);
     tcg_gen_br(done);
 
@@ -297,8 +363,7 @@ static inline void gen_store_conditional4(CPUHexagonState *env,
     tcg_gen_movi_tl(hex_llsc_addr, ~0);
 }
 
-static inline void gen_store_conditional8(CPUHexagonState *env,
-                                          DisasContext *ctx, int prednum,
+static inline void gen_store_conditional8(DisasContext *ctx,
                                           TCGv pred, TCGv vaddr, TCGv_i64 src)
 {
     TCGLabel *fail = gen_new_label();
@@ -307,16 +372,14 @@ static inline void gen_store_conditional8(CPUHexagonState *env,
 
     tcg_gen_brcond_tl(TCG_COND_NE, vaddr, hex_llsc_addr, fail);
 
-    one = tcg_const_i64(0xff);
-    zero = tcg_const_i64(0);
+    one = tcg_constant_i64(0xff);
+    zero = tcg_constant_i64(0);
     tmp = tcg_temp_new_i64();
     tcg_gen_atomic_cmpxchg_i64(tmp, hex_llsc_addr, hex_llsc_val_i64, src,
                                ctx->mem_idx, MO_64);
     tcg_gen_movcond_i64(TCG_COND_EQ, tmp, tmp, hex_llsc_val_i64,
                         one, zero);
-    tcg_gen_extrl_i64_i32(hex_pred[prednum], tmp);
-    tcg_temp_free_i64(one);
-    tcg_temp_free_i64(zero);
+    tcg_gen_extrl_i64_i32(pred, tmp);
     tcg_temp_free_i64(tmp);
     tcg_gen_br(done);
 
@@ -327,5 +390,249 @@ static inline void gen_store_conditional8(CPUHexagonState *env,
     tcg_gen_movi_tl(hex_llsc_addr, ~0);
 }
 
+static inline void gen_store32(TCGv vaddr, TCGv src, int width, int slot)
+{
+    tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+    tcg_gen_movi_tl(hex_store_width[slot], width);
+    tcg_gen_mov_tl(hex_store_val32[slot], src);
+}
+
+static inline void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+                              DisasContext *ctx, int slot)
+{
+    gen_store32(vaddr, src, 1, slot);
+    ctx->store_width[slot] = 1;
+}
+
+static inline void gen_store1i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+                               DisasContext *ctx, int slot)
+{
+    TCGv tmp = tcg_constant_tl(src);
+    gen_store1(cpu_env, vaddr, tmp, ctx, slot);
+}
+
+static inline void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+                              DisasContext *ctx, int slot)
+{
+    gen_store32(vaddr, src, 2, slot);
+    ctx->store_width[slot] = 2;
+}
+
+static inline void gen_store2i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+                               DisasContext *ctx, int slot)
+{
+    TCGv tmp = tcg_constant_tl(src);
+    gen_store2(cpu_env, vaddr, tmp, ctx, slot);
+}
+
+static inline void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src,
+                              DisasContext *ctx, int slot)
+{
+    gen_store32(vaddr, src, 4, slot);
+    ctx->store_width[slot] = 4;
+}
+
+static inline void gen_store4i(TCGv_env cpu_env, TCGv vaddr, int32_t src,
+                               DisasContext *ctx, int slot)
+{
+    TCGv tmp = tcg_constant_tl(src);
+    gen_store4(cpu_env, vaddr, tmp, ctx, slot);
+}
+
+static inline void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src,
+                              DisasContext *ctx, int slot)
+{
+    tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+    tcg_gen_movi_tl(hex_store_width[slot], 8);
+    tcg_gen_mov_i64(hex_store_val64[slot], src);
+    ctx->store_width[slot] = 8;
+}
+
+static inline void gen_store8i(TCGv_env cpu_env, TCGv vaddr, int64_t src,
+                               DisasContext *ctx, int slot)
+{
+    TCGv_i64 tmp = tcg_constant_i64(src);
+    gen_store8(cpu_env, vaddr, tmp, ctx, slot);
+}
+
+static TCGv gen_8bitsof(TCGv result, TCGv value)
+{
+    TCGv zero = tcg_constant_tl(0);
+    TCGv ones = tcg_constant_tl(0xff);
+    tcg_gen_movcond_tl(TCG_COND_NE, result, value, zero, ones, zero);
+
+    return result;
+}
+
+static intptr_t vreg_src_off(DisasContext *ctx, int num)
+{
+    intptr_t offset = offsetof(CPUHexagonState, VRegs[num]);
+
+    if (test_bit(num, ctx->vregs_select)) {
+        offset = ctx_future_vreg_off(ctx, num, 1, false);
+    }
+    if (test_bit(num, ctx->vregs_updated_tmp)) {
+        offset = ctx_tmp_vreg_off(ctx, num, 1, false);
+    }
+    return offset;
+}
+
+static void gen_log_vreg_write(DisasContext *ctx, intptr_t srcoff, int num,
+                               VRegWriteType type, int slot_num,
+                               bool is_predicated)
+{
+    TCGLabel *label_end = NULL;
+    intptr_t dstoff;
+
+    if (is_predicated) {
+        TCGv cancelled = tcg_temp_local_new();
+        label_end = gen_new_label();
+
+        /* Don't do anything if the slot was cancelled */
+        tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
+        tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
+        tcg_temp_free(cancelled);
+    }
+
+    if (type != EXT_TMP) {
+        dstoff = ctx_future_vreg_off(ctx, num, 1, true);
+        tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
+                         sizeof(MMVector), sizeof(MMVector));
+        tcg_gen_ori_tl(hex_VRegs_updated, hex_VRegs_updated, 1 << num);
+    } else {
+        dstoff = ctx_tmp_vreg_off(ctx, num, 1, false);
+        tcg_gen_gvec_mov(MO_64, dstoff, srcoff,
+                         sizeof(MMVector), sizeof(MMVector));
+    }
+
+    if (is_predicated) {
+        gen_set_label(label_end);
+    }
+}
+
+static void gen_log_vreg_write_pair(DisasContext *ctx, intptr_t srcoff, int num,
+                                    VRegWriteType type, int slot_num,
+                                    bool is_predicated)
+{
+    gen_log_vreg_write(ctx, srcoff, num ^ 0, type, slot_num, is_predicated);
+    srcoff += sizeof(MMVector);
+    gen_log_vreg_write(ctx, srcoff, num ^ 1, type, slot_num, is_predicated);
+}
+
+static void gen_log_qreg_write(intptr_t srcoff, int num, int vnew,
+                               int slot_num, bool is_predicated)
+{
+    TCGLabel *label_end = NULL;
+    intptr_t dstoff;
+
+    if (is_predicated) {
+        TCGv cancelled = tcg_temp_local_new();
+        label_end = gen_new_label();
+
+        /* Don't do anything if the slot was cancelled */
+        tcg_gen_extract_tl(cancelled, hex_slot_cancelled, slot_num, 1);
+        tcg_gen_brcondi_tl(TCG_COND_NE, cancelled, 0, label_end);
+        tcg_temp_free(cancelled);
+    }
+
+    dstoff = offsetof(CPUHexagonState, future_QRegs[num]);
+    tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMQReg), sizeof(MMQReg));
+
+    if (is_predicated) {
+        tcg_gen_ori_tl(hex_QRegs_updated, hex_QRegs_updated, 1 << num);
+        gen_set_label(label_end);
+    }
+}
+
+static void gen_vreg_load(DisasContext *ctx, intptr_t dstoff, TCGv src,
+                          bool aligned)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    if (aligned) {
+        tcg_gen_andi_tl(src, src, ~((int32_t)sizeof(MMVector) - 1));
+    }
+    for (int i = 0; i < sizeof(MMVector) / 8; i++) {
+        tcg_gen_qemu_ld64(tmp, src, ctx->mem_idx);
+        tcg_gen_addi_tl(src, src, 8);
+        tcg_gen_st_i64(tmp, cpu_env, dstoff + i * 8);
+    }
+    tcg_temp_free_i64(tmp);
+}
+
+static void gen_vreg_store(DisasContext *ctx, Insn *insn, Packet *pkt,
+                           TCGv EA, intptr_t srcoff, int slot, bool aligned)
+{
+    intptr_t dstoff = offsetof(CPUHexagonState, vstore[slot].data);
+    intptr_t maskoff = offsetof(CPUHexagonState, vstore[slot].mask);
+
+    if (is_gather_store_insn(insn, pkt)) {
+        TCGv sl = tcg_constant_tl(slot);
+        gen_helper_gather_store(cpu_env, EA, sl);
+        return;
+    }
+
+    tcg_gen_movi_tl(hex_vstore_pending[slot], 1);
+    if (aligned) {
+        tcg_gen_andi_tl(hex_vstore_addr[slot], EA,
+                        ~((int32_t)sizeof(MMVector) - 1));
+    } else {
+        tcg_gen_mov_tl(hex_vstore_addr[slot], EA);
+    }
+    tcg_gen_movi_tl(hex_vstore_size[slot], sizeof(MMVector));
+
+    /* Copy the data to the vstore buffer */
+    tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector));
+    /* Set the mask to all 1's */
+    tcg_gen_gvec_dup_imm(MO_64, maskoff, sizeof(MMQReg), sizeof(MMQReg), ~0LL);
+}
+
+static void gen_vreg_masked_store(DisasContext *ctx, TCGv EA, intptr_t srcoff,
+                                  intptr_t bitsoff, int slot, bool invert)
+{
+    intptr_t dstoff = offsetof(CPUHexagonState, vstore[slot].data);
+    intptr_t maskoff = offsetof(CPUHexagonState, vstore[slot].mask);
+
+    tcg_gen_movi_tl(hex_vstore_pending[slot], 1);
+    tcg_gen_andi_tl(hex_vstore_addr[slot], EA,
+                    ~((int32_t)sizeof(MMVector) - 1));
+    tcg_gen_movi_tl(hex_vstore_size[slot], sizeof(MMVector));
+
+    /* Copy the data to the vstore buffer */
+    tcg_gen_gvec_mov(MO_64, dstoff, srcoff, sizeof(MMVector), sizeof(MMVector));
+    /* Copy the mask */
+    tcg_gen_gvec_mov(MO_64, maskoff, bitsoff, sizeof(MMQReg), sizeof(MMQReg));
+    if (invert) {
+        tcg_gen_gvec_not(MO_64, maskoff, maskoff,
+                         sizeof(MMQReg), sizeof(MMQReg));
+    }
+}
+
+static void vec_to_qvec(size_t size, intptr_t dstoff, intptr_t srcoff)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 word = tcg_temp_new_i64();
+    TCGv_i64 bits = tcg_temp_new_i64();
+    TCGv_i64 mask = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_constant_i64(0);
+    TCGv_i64 ones = tcg_constant_i64(~0);
+
+    for (int i = 0; i < sizeof(MMVector) / 8; i++) {
+        tcg_gen_ld_i64(tmp, cpu_env, srcoff + i * 8);
+        tcg_gen_movi_i64(mask, 0);
+
+        for (int j = 0; j < 8; j += size) {
+            tcg_gen_extract_i64(word, tmp, j * 8, size * 8);
+            tcg_gen_movcond_i64(TCG_COND_NE, bits, word, zero, ones, zero);
+            tcg_gen_deposit_i64(mask, mask, bits, j, size);
+        }
+
+        tcg_gen_st8_i64(mask, cpu_env, dstoff + i);
+    }
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(word);
+    tcg_temp_free_i64(bits);
+    tcg_temp_free_i64(mask);
+}
+
 #include "tcg_funcs_generated.c.inc"
 #include "tcg_func_table_generated.c.inc"
diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h
index a5f340ce672..c89aa4ed4d6 100644
--- a/target/hexagon/helper.h
+++ b/target/hexagon/helper.h
@@ -19,13 +19,18 @@
 #include "helper_protos_generated.h.inc"
 
 DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32)
-#if HEX_DEBUG
 DEF_HELPER_1(debug_start_packet, void, env)
 DEF_HELPER_FLAGS_3(debug_check_store_width, TCG_CALL_NO_WG, void, env, int, int)
 DEF_HELPER_FLAGS_3(debug_commit_end, TCG_CALL_NO_WG, void, env, int, int)
-#endif
 DEF_HELPER_2(commit_store, void, env, int)
+DEF_HELPER_3(gather_store, void, env, i32, int)
+DEF_HELPER_1(commit_hvx_stores, void, env)
 DEF_HELPER_FLAGS_4(fcircadd, TCG_CALL_NO_RWG_SE, s32, s32, s32, s32, s32)
+DEF_HELPER_FLAGS_1(fbrev, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_3(sfrecipa, i64, env, f32, f32)
+DEF_HELPER_2(sfinvsqrta, i64, env, f32)
+DEF_HELPER_4(vacsh_val, s64, env, s64, s64, s64)
+DEF_HELPER_FLAGS_4(vacsh_pred, TCG_CALL_NO_RWG_SE, s32, env, s64, s64, s64)
 
 /* Floating point */
 DEF_HELPER_2(conv_sf2df, f64, env, f32)
@@ -38,21 +43,21 @@ DEF_HELPER_2(conv_ud2sf, f32, env, s64)
 DEF_HELPER_2(conv_ud2df, f64, env, s64)
 DEF_HELPER_2(conv_d2sf, f32, env, s64)
 DEF_HELPER_2(conv_d2df, f64, env, s64)
-DEF_HELPER_2(conv_sf2uw, s32, env, f32)
+DEF_HELPER_2(conv_sf2uw, i32, env, f32)
 DEF_HELPER_2(conv_sf2w, s32, env, f32)
-DEF_HELPER_2(conv_sf2ud, s64, env, f32)
+DEF_HELPER_2(conv_sf2ud, i64, env, f32)
 DEF_HELPER_2(conv_sf2d, s64, env, f32)
-DEF_HELPER_2(conv_df2uw, s32, env, f64)
+DEF_HELPER_2(conv_df2uw, i32, env, f64)
 DEF_HELPER_2(conv_df2w, s32, env, f64)
-DEF_HELPER_2(conv_df2ud, s64, env, f64)
+DEF_HELPER_2(conv_df2ud, i64, env, f64)
 DEF_HELPER_2(conv_df2d, s64, env, f64)
-DEF_HELPER_2(conv_sf2uw_chop, s32, env, f32)
+DEF_HELPER_2(conv_sf2uw_chop, i32, env, f32)
 DEF_HELPER_2(conv_sf2w_chop, s32, env, f32)
-DEF_HELPER_2(conv_sf2ud_chop, s64, env, f32)
+DEF_HELPER_2(conv_sf2ud_chop, i64, env, f32)
 DEF_HELPER_2(conv_sf2d_chop, s64, env, f32)
-DEF_HELPER_2(conv_df2uw_chop, s32, env, f64)
+DEF_HELPER_2(conv_df2uw_chop, i32, env, f64)
 DEF_HELPER_2(conv_df2w_chop, s32, env, f64)
-DEF_HELPER_2(conv_df2ud_chop, s64, env, f64)
+DEF_HELPER_2(conv_df2ud_chop, i64, env, f64)
 DEF_HELPER_2(conv_df2d_chop, s64, env, f64)
 DEF_HELPER_3(sfadd, f32, env, f32, f32)
 DEF_HELPER_3(sfsub, f32, env, f32, f32)
@@ -86,3 +91,19 @@ DEF_HELPER_4(sffms_lib, f32, env, f32, f32, f32)
 
 DEF_HELPER_3(dfmpyfix, f64, env, f64, f64)
 DEF_HELPER_4(dfmpyhh, f64, env, f64, f64, f64)
+
+/* Histogram instructions */
+DEF_HELPER_1(vhist, void, env)
+DEF_HELPER_1(vhistq, void, env)
+DEF_HELPER_1(vwhist256, void, env)
+DEF_HELPER_1(vwhist256q, void, env)
+DEF_HELPER_1(vwhist256_sat, void, env)
+DEF_HELPER_1(vwhist256q_sat, void, env)
+DEF_HELPER_1(vwhist128, void, env)
+DEF_HELPER_1(vwhist128q, void, env)
+DEF_HELPER_2(vwhist128m, void, env, s32)
+DEF_HELPER_2(vwhist128qm, void, env, s32)
+
+DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int)
+DEF_HELPER_2(probe_hvx_stores, void, env, int)
+DEF_HELPER_3(probe_pkt_scalar_hvx_stores, void, env, int, int)
diff --git a/target/hexagon/hex_arch_types.h b/target/hexagon/hex_arch_types.h
index d721e1f9341..78ad607f538 100644
--- a/target/hexagon/hex_arch_types.h
+++ b/target/hexagon/hex_arch_types.h
@@ -19,6 +19,7 @@
 #define HEXAGON_ARCH_TYPES_H
 
 #include "qemu/osdep.h"
+#include "mmvec/mmvec.h"
 #include "qemu/int128.h"
 
 /*
@@ -35,4 +36,8 @@ typedef uint64_t    size8u_t;
 typedef int64_t     size8s_t;
 typedef Int128      size16s_t;
 
+typedef MMVector          mmvector_t;
+typedef MMVectorPair      mmvector_pair_t;
+typedef MMQReg            mmqret_t;
+
 #endif
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index b3b534057d5..c81aca8d2af 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -73,6 +73,8 @@ def calculate_attribs():
     add_qemu_macro_attrib('fWRITE_P1', 'A_WRITES_PRED_REG')
     add_qemu_macro_attrib('fWRITE_P2', 'A_WRITES_PRED_REG')
     add_qemu_macro_attrib('fWRITE_P3', 'A_WRITES_PRED_REG')
+    add_qemu_macro_attrib('fSET_OVERFLOW', 'A_IMPLICIT_WRITES_USR')
+    add_qemu_macro_attrib('fSET_LPCFG', 'A_IMPLICIT_WRITES_USR')
 
     # Recurse down macros, find attributes from sub-macros
     macroValues = list(macros.values())
@@ -143,6 +145,9 @@ def compute_tag_immediates(tag):
 ##          P                predicate register
 ##          R                GPR register
 ##          M                modifier register
+##          Q                HVX predicate vector
+##          V                HVX vector register
+##          O                HVX new vector register
 ##      regid can be one of the following
 ##          d, e             destination register
 ##          dd               destination register pair
@@ -178,6 +183,9 @@ def is_readwrite(regid):
 def is_scalar_reg(regtype):
     return regtype in "RPC"
 
+def is_hvx_reg(regtype):
+    return regtype in "VQ"
+
 def is_old_val(regtype, regid, tag):
     return regtype+regid+'V' in semdict[tag]
 
@@ -201,6 +209,13 @@ def need_ea(tag):
 def skip_qemu_helper(tag):
     return tag in overrides.keys()
 
+def is_tmp_result(tag):
+    return ('A_CVI_TMP' in attribdict[tag] or
+            'A_CVI_TMP_DST' in attribdict[tag])
+
+def is_new_result(tag):
+    return ('A_CVI_NEW' in attribdict[tag])
+
 def imm_name(immlett):
     return "%siV" % immlett
 
diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h
index f291911eefc..e1b3149b074 100644
--- a/target/hexagon/hex_regs.h
+++ b/target/hexagon/hex_regs.h
@@ -76,6 +76,7 @@ enum {
     /* Use reserved control registers for qemu execution counts */
     HEX_REG_QEMU_PKT_CNT      = 52,
     HEX_REG_QEMU_INSN_CNT     = 53,
+    HEX_REG_QEMU_HVX_CNT      = 54,
     HEX_REG_UTIMERLO          = 62,
     HEX_REG_UTIMERHI          = 63,
 };
diff --git a/target/hexagon/iclass.c b/target/hexagon/iclass.c
index 378d8a6a752..60912869935 100644
--- a/target/hexagon/iclass.c
+++ b/target/hexagon/iclass.c
@@ -53,10 +53,6 @@ SlotMask find_iclass_slots(Opcode opcode, int itype)
                (opcode == Y2_isync) ||
                (opcode == J2_pause) || (opcode == J4_hintjumpr)) {
         return SLOTS_2;
-    } else if ((itype == ICLASS_V2LDST) && (GET_ATTRIB(opcode, A_STORE))) {
-        return SLOTS_01;
-    } else if ((itype == ICLASS_V2LDST) && (!GET_ATTRIB(opcode, A_STORE))) {
-        return SLOTS_01;
     } else if (GET_ATTRIB(opcode, A_CRSLOT23)) {
         return SLOTS_23;
     } else if (GET_ATTRIB(opcode, A_RESTRICT_PREFERSLOT0)) {
diff --git a/target/hexagon/imported/allext.idef b/target/hexagon/imported/allext.idef
new file mode 100644
index 00000000000..9d4b23e9de6
--- /dev/null
+++ b/target/hexagon/imported/allext.idef
@@ -0,0 +1,25 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Top level file for all instruction set extensions
+ */
+#define EXTNAME mmvec
+#define EXTSTR "mmvec"
+#include "mmvec/ext.idef"
+#undef EXTNAME
+#undef EXTSTR
diff --git a/target/hexagon/imported/allext_macros.def b/target/hexagon/imported/allext_macros.def
new file mode 100644
index 00000000000..9c91199151d
--- /dev/null
+++ b/target/hexagon/imported/allext_macros.def
@@ -0,0 +1,25 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Top level file for all instruction set extensions
+ */
+#define EXTNAME mmvec
+#define EXTSTR "mmvec"
+#include "mmvec/macros.def"
+#undef EXTNAME
+#undef EXTSTR
diff --git a/target/hexagon/imported/allextenc.def b/target/hexagon/imported/allextenc.def
new file mode 100644
index 00000000000..39a3e93fad6
--- /dev/null
+++ b/target/hexagon/imported/allextenc.def
@@ -0,0 +1,20 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define EXTNAME mmvec
+#include "mmvec/encode_ext.def"
+#undef EXTNAME
diff --git a/target/hexagon/imported/allidefs.def b/target/hexagon/imported/allidefs.def
index 2aace298883..ee253b83a92 100644
--- a/target/hexagon/imported/allidefs.def
+++ b/target/hexagon/imported/allidefs.def
@@ -28,3 +28,4 @@
 #include "shift.idef"
 #include "system.idef"
 #include "subinsns.idef"
+#include "allext.idef"
diff --git a/target/hexagon/imported/alu.idef b/target/hexagon/imported/alu.idef
index 45cc529fbc9..58477ae40a9 100644
--- a/target/hexagon/imported/alu.idef
+++ b/target/hexagon/imported/alu.idef
@@ -153,6 +153,21 @@ Q6INSN(A2_subp,"Rdd32=sub(Rtt32,Rss32)",ATTRIBS(),
 "Sub",
 { RddV=RttV-RssV;})
 
+/* 64-bit with carry */
+
+Q6INSN(A4_addp_c,"Rdd32=add(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Add with Carry",
+{
+  RddV = RssV + RttV + fLSBOLD(PxV);
+  PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,RttV,fLSBOLD(PxV)));
+})
+
+Q6INSN(A4_subp_c,"Rdd32=sub(Rss32,Rtt32,Px4):carry",ATTRIBS(),"Sub with Carry",
+{
+  RddV = RssV + ~RttV + fLSBOLD(PxV);
+  PxV = f8BITSOF(fCARRY_FROM_ADD(RssV,~RttV,fLSBOLD(PxV)));
+})
+
+
 /* NEG and ABS */
 
 Q6INSN(A2_negsat,"Rd32=neg(Rs32):sat",ATTRIBS(),
@@ -1240,6 +1255,35 @@ MINMAX(uw,WORD,UWORD,2)
 #undef VMINORMAX3
 
 
+Q6INSN(A5_ACS,"Rxx32,Pe4=vacsh(Rss32,Rtt32)",ATTRIBS(),
+"Add Compare and Select elements of two vectors, record the maximums and the decisions ",
+{
+        fHIDE(int i;)
+        fHIDE(int xv;)
+        fHIDE(int sv;)
+        fHIDE(int tv;)
+        for (i = 0; i < 4; i++) {
+                xv = (int) fGETHALF(i,RxxV);
+                sv = (int) fGETHALF(i,RssV);
+                tv = (int) fGETHALF(i,RttV);
+                xv = xv + tv;           //assumes 17bit datapath
+                sv = sv - tv;           //assumes 17bit datapath
+                fSETBIT(i*2,  PeV,  (xv > sv));
+                fSETBIT(i*2+1,PeV,  (xv > sv));
+                fSETHALF(i,   RxxV, fSATH(fMAX(xv,sv)));
+        }
+})
+
+Q6INSN(A6_vminub_RdP,"Rdd32,Pe4=vminub(Rtt32,Rss32)",ATTRIBS(),
+"Vector minimum of bytes, records minimum and decision vector",
+{
+        fHIDE(int i;)
+        for (i = 0; i < 8; i++) {
+            fSETBIT(i, PeV,     (fGETUBYTE(i,RttV) > fGETUBYTE(i,RssV)));
+            fSETBYTE(i,RddV,fMIN(fGETUBYTE(i,RttV),fGETUBYTE(i,RssV)));
+        }
+})
+
 /**********************************************/
 /* Vector Min/Max                             */
 /**********************************************/
diff --git a/target/hexagon/imported/compare.idef b/target/hexagon/imported/compare.idef
index 35514678542..abd016ffb50 100644
--- a/target/hexagon/imported/compare.idef
+++ b/target/hexagon/imported/compare.idef
@@ -198,11 +198,11 @@ Q6INSN(C4_or_orn,"Pd4=or(Ps4,or(Pt4,!Pu4))",ATTRIBS(A_CRSLOT23),
 
 Q6INSN(C2_any8,"Pd4=any8(Ps4)",ATTRIBS(A_CRSLOT23),
 "Logical ANY of low 8 predicate bits",
-{ PsV ? (PdV=0xff) : (PdV=0x00); })
+{ PdV = (PsV ? 0xff : 0x00); })
 
 Q6INSN(C2_all8,"Pd4=all8(Ps4)",ATTRIBS(A_CRSLOT23),
 "Logical ALL of low 8 predicate bits",
-{ (PsV==0xff) ? (PdV=0xff) : (PdV=0x00); })
+{ PdV = (PsV == 0xff ? 0xff : 0x00); })
 
 Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(),
 "Pack the odd and even bits of two predicate registers",
@@ -212,7 +212,7 @@ Q6INSN(C2_vitpack,"Rd32=vitpack(Ps4,Pt4)",ATTRIBS(),
 
 Q6INSN(C2_mux,"Rd32=mux(Pu4,Rs32,Rt32)",ATTRIBS(),
 "Scalar MUX",
-{ (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=RtV); })
+{ RdV = (fLSBOLD(PuV) ? RsV : RtV); })
 
 
 Q6INSN(C2_cmovenewit,"if (Pu4.new) Rd32=#s12",ATTRIBS(A_ARCHV2),
@@ -269,18 +269,18 @@ Q6INSN(C2_ccombinewf,"if (!Pu4) Rdd32=combine(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
 
 Q6INSN(C2_muxii,"Rd32=mux(Pu4,#s8,#S8)",ATTRIBS(A_ARCHV2),
 "Scalar MUX immediates",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=SiV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : SiV); })
 
 
 
 Q6INSN(C2_muxir,"Rd32=mux(Pu4,Rs32,#s8)",ATTRIBS(A_ARCHV2),
 "Scalar MUX register immediate",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=RsV):(RdV=siV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? RsV : siV); })
 
 
 Q6INSN(C2_muxri,"Rd32=mux(Pu4,#s8,Rs32)",ATTRIBS(A_ARCHV2),
 "Scalar MUX register immediate",
-{ fIMMEXT(siV); (fLSBOLD(PuV)) ? (RdV=siV):(RdV=RsV); })
+{ fIMMEXT(siV); RdV = (fLSBOLD(PuV) ? siV : RsV); })
 
 
 
diff --git a/target/hexagon/imported/encode.def b/target/hexagon/imported/encode.def
index b9368d12841..e40e7fbffb2 100644
--- a/target/hexagon/imported/encode.def
+++ b/target/hexagon/imported/encode.def
@@ -71,6 +71,7 @@
 
 #include "encode_pp.def"
 #include "encode_subinsn.def"
+#include "allextenc.def"
 
 #ifdef __SELF_DEF_FIELD32
 #undef __SELF_DEF_FIELD32
diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def
index c21cb730af8..939c6fc55fb 100644
--- a/target/hexagon/imported/encode_pp.def
+++ b/target/hexagon/imported/encode_pp.def
@@ -294,12 +294,14 @@ DEF_CLASS32(ICLASS_LD" ---- -------- PP------ --------",LD)
 
 
 DEF_CLASS32(ICLASS_LD" 0--- -------- PP------ --------",LD_ADDR_ROFFSET)
+DEF_CLASS32(ICLASS_LD" 100- -------- PP----0- --------",LD_ADDR_POST_CIRC_IMMED)
 DEF_CLASS32(ICLASS_LD" 101- -------- PP00---- --------",LD_ADDR_POST_IMMED)
 DEF_CLASS32(ICLASS_LD" 101- -------- PP01---- --------",LD_ADDR_ABS_UPDATE_V4)
 DEF_CLASS32(ICLASS_LD" 101- -------- PP1----- --------",LD_ADDR_POST_IMMED_PRED_V2)
 DEF_CLASS32(ICLASS_LD" 110- -------- PP-0---- 0-------",LD_ADDR_POST_REG)
 DEF_CLASS32(ICLASS_LD" 110- -------- PP-1---- --------",LD_ADDR_ABS_PLUS_REG_V4)
 DEF_CLASS32(ICLASS_LD" 100- -------- PP----1- --------",LD_ADDR_POST_CREG_V2)
+DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 0-------",LD_ADDR_POST_BREV_REG)
 DEF_CLASS32(ICLASS_LD" 111- -------- PP------ 1-------",LD_ADDR_PRED_ABS_V4)
 
 DEF_FIELD32(ICLASS_LD" !!!- -------- PP------ --------",LD_Amode,"Amode")
@@ -308,18 +310,24 @@ DEF_FIELD32(ICLASS_LD" ---- --!----- PP------ --------",LD_UN,"Unsigned")
 
 #define STD_LD_ENC(TAG,OPC) \
 DEF_ENC32(L2_load##TAG##_io,   ICLASS_LD" 0 ii "OPC"  sssss  PPiiiiii  iiiddddd")\
+DEF_ENC32(L2_load##TAG##_pci,  ICLASS_LD" 1 00 "OPC"  xxxxx  PPu0--0i  iiiddddd")\
 DEF_ENC32(L2_load##TAG##_pi,   ICLASS_LD" 1 01 "OPC"  xxxxx  PP00---i  iiiddddd")\
 DEF_ENC32(L4_load##TAG##_ap,   ICLASS_LD" 1 01 "OPC"  eeeee  PP01IIII  -IIddddd")\
 DEF_ENC32(L2_load##TAG##_pr,   ICLASS_LD" 1 10 "OPC"  xxxxx  PPu0----  0--ddddd")\
 DEF_ENC32(L4_load##TAG##_ur,   ICLASS_LD" 1 10 "OPC"  ttttt  PPi1IIII  iIIddddd")\
+DEF_ENC32(L2_load##TAG##_pcr,  ICLASS_LD" 1 00 "OPC"  xxxxx  PPu0--1-  0--ddddd")\
+DEF_ENC32(L2_load##TAG##_pbr,  ICLASS_LD" 1 11 "OPC"  xxxxx  PPu0----  0--ddddd")
 
 
 #define STD_LDX_ENC(TAG,OPC) \
 DEF_ENC32(L2_load##TAG##_io,   ICLASS_LD" 0 ii "OPC"  sssss  PPiiiiii  iiiyyyyy")\
+DEF_ENC32(L2_load##TAG##_pci,  ICLASS_LD" 1 00 "OPC"  xxxxx  PPu0--0i  iiiyyyyy")\
 DEF_ENC32(L2_load##TAG##_pi,   ICLASS_LD" 1 01 "OPC"  xxxxx  PP00---i  iiiyyyyy")\
 DEF_ENC32(L4_load##TAG##_ap,   ICLASS_LD" 1 01 "OPC"  eeeee  PP01IIII  -IIyyyyy")\
 DEF_ENC32(L2_load##TAG##_pr,   ICLASS_LD" 1 10 "OPC"  xxxxx  PPu0----  0--yyyyy")\
 DEF_ENC32(L4_load##TAG##_ur,   ICLASS_LD" 1 10 "OPC"  ttttt  PPi1IIII  iIIyyyyy")\
+DEF_ENC32(L2_load##TAG##_pcr,  ICLASS_LD" 1 00 "OPC"  xxxxx  PPu0--1-  0--yyyyy")\
+DEF_ENC32(L2_load##TAG##_pbr,  ICLASS_LD" 1 11 "OPC"  xxxxx  PPu0----  0--yyyyy")
 
 
 #define STD_PLD_ENC(TAG,OPC) \
@@ -334,6 +342,15 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC"  iiiii  PP111tti  1--ddd
 
 
 /*               0 000  misc: dealloc,loadw_locked,dcfetch      */
+STD_LD_ENC(bzw4,"0 101")
+STD_LD_ENC(bzw2,"0 011")
+
+STD_LD_ENC(bsw4,"0 111")
+STD_LD_ENC(bsw2,"0 001")
+
+STD_LDX_ENC(alignh,"0 010")
+STD_LDX_ENC(alignb,"0 100")
+
 STD_LD_ENC(rb,  "1 000")
 STD_LD_ENC(rub, "1 001")
 STD_LD_ENC(rh,  "1 010")
@@ -351,6 +368,7 @@ STD_PLD_ENC(rd,  "1 110") /* note dest reg field LSB=0, 1 is reserved */
 
 DEF_CLASS32(    ICLASS_LD" 0--0 000----- PP------ --------",LD_MISC)
 DEF_ANTICLASS32(ICLASS_LD" 0--0 000----- PP------ --------",LD_ADDR_ROFFSET)
+DEF_ANTICLASS32(ICLASS_LD" 1000 000----- PP------ --------",LD_ADDR_POST_CIRC_IMMED)
 DEF_ANTICLASS32(ICLASS_LD" 1010 000----- PP------ --------",LD_ADDR_POST_IMMED)
 DEF_ANTICLASS32(ICLASS_LD" 1100 000----- PP------ --------",LD_ADDR_POST_REG)
 DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG)
@@ -397,6 +415,7 @@ DEF_FIELD32(ICLASS_ST" ---! !!------ PP------ --------",ST_Type,"Type")
 DEF_FIELD32(ICLASS_ST" ---- --!----- PP------ --------",ST_UN,"Unsigned")
 
 DEF_CLASS32(ICLASS_ST" 0--1 -------- PP------ --------",ST_ADDR_ROFFSET)
+DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------0-",ST_ADDR_POST_CIRC_IMMED)
 DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 0-----0-",ST_ADDR_POST_IMMED)
 DEF_CLASS32(ICLASS_ST" 1011 -------- PP0----- 1-------",ST_ADDR_ABS_UPDATE_V4)
 DEF_CLASS32(ICLASS_ST" 1011 -------- PP1----- --------",ST_ADDR_POST_IMMED_PRED_V2)
@@ -404,6 +423,7 @@ DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 1-------",ST_ADDR_PRED_ABS_V4)
 DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 0-------",ST_ADDR_POST_REG)
 DEF_CLASS32(ICLASS_ST" 1101 -------- PP------ 1-------",ST_ADDR_ABS_PLUS_REG_V4)
 DEF_CLASS32(ICLASS_ST" 1001 -------- PP------ ------1-",ST_ADDR_POST_CREG_V2)
+DEF_CLASS32(ICLASS_ST" 1111 -------- PP------ 0-------",ST_ADDR_POST_BREV_REG)
 DEF_CLASS32(ICLASS_ST" 0--0 1------- PP------ --------",ST_MISC_STORELIKE)
 DEF_CLASS32(ICLASS_ST" 1--0 0------- PP------ --------",ST_MISC_BUSOP)
 DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP)
@@ -411,10 +431,13 @@ DEF_CLASS32(ICLASS_ST" 0--0 0------- PP------ --------",ST_MISC_CACHEOP)
 
 #define STD_ST_ENC(TAG,OPC,SRC) \
 DEF_ENC32(S2_store##TAG##_io,   ICLASS_ST" 0 ii "OPC"  sssss  PPi"SRC"  iiiiiiii")\
+DEF_ENC32(S2_store##TAG##_pci,  ICLASS_ST" 1 00 "OPC"  xxxxx  PPu"SRC"  0iiii-0-")\
 DEF_ENC32(S2_store##TAG##_pi,   ICLASS_ST" 1 01 "OPC"  xxxxx  PP0"SRC"  0iiii-0-")\
 DEF_ENC32(S4_store##TAG##_ap,   ICLASS_ST" 1 01 "OPC"  eeeee  PP0"SRC"  1-IIIIII")\
 DEF_ENC32(S2_store##TAG##_pr,   ICLASS_ST" 1 10 "OPC"  xxxxx  PPu"SRC"  0-------")\
 DEF_ENC32(S4_store##TAG##_ur,   ICLASS_ST" 1 10 "OPC"  uuuuu  PPi"SRC"  1iIIIIII")\
+DEF_ENC32(S2_store##TAG##_pcr,  ICLASS_ST" 1 00 "OPC"  xxxxx  PPu"SRC"  0-----1-")\
+DEF_ENC32(S2_store##TAG##_pbr,  ICLASS_ST" 1 11 "OPC"  xxxxx  PPu"SRC"  0-------")
 
 
 #define STD_PST_ENC(TAG,OPC,SRC) \
@@ -470,6 +493,9 @@ DEF_ENC32(Y2_dccleana,     ICLASS_ST" 000 00 00sssss PP------ --------")
 DEF_ENC32(Y2_dcinva,       ICLASS_ST" 000 00 01sssss PP------ --------")
 DEF_ENC32(Y2_dccleaninva,  ICLASS_ST" 000 00 10sssss PP------ --------")
 
+DEF_ENC32(Y4_l2fetch,      ICLASS_ST" 011 00 00sssss PP-ttttt 000-----")
+DEF_ENC32(Y5_l2fetch,      ICLASS_ST" 011 01 00sssss PP-ttttt --------")
+
 /*******************************/
 /*                             */
 /*                             */
@@ -1017,6 +1043,8 @@ MPY_ENC(M7_dcmpyiwc_acc,     "1010","xxxxx","1","0","1","0","10")
 
 
 
+MPY_ENC(A5_ACS,              "1010","xxxxx","0","1","0","1","ee")
+MPY_ENC(A6_vminub_RdP,       "1010","ddddd","0","1","1","1","ee")
 /*
 */
 
@@ -1028,6 +1056,7 @@ MPY_ENC(F2_sfmin,            "1011","ddddd","0","0","0","1","01")
 MPY_ENC(F2_sfmpy,            "1011","ddddd","0","0","1","0","00")
 MPY_ENC(F2_sffixupn,         "1011","ddddd","0","0","1","1","00")
 MPY_ENC(F2_sffixupd,         "1011","ddddd","0","0","1","1","01")
+MPY_ENC(F2_sfrecipa,         "1011","ddddd","1","1","1","1","ee")
 
 DEF_FIELDROW_DESC32(ICLASS_M" 1100 -------- PP------ --------","[#12] Rd=(Rs,Rt)")
 DEF_FIELD32(ICLASS_M"         1100 -------- PP------ --!-----",Mc_tH,"Rt is High") /*Rt high */
@@ -1641,6 +1670,7 @@ SH2_RR_ENC(F2_conv_sf2w,          "1011","100","-","000","ddddd")
 SH2_RR_ENC(F2_conv_sf2uw_chop,    "1011","011","-","001","ddddd")
 SH2_RR_ENC(F2_conv_sf2w_chop,     "1011","100","-","001","ddddd")
 SH2_RR_ENC(F2_sffixupr,           "1011","101","-","000","ddddd")
+SH2_RR_ENC(F2_sfinvsqrta,         "1011","111","-","0ee","ddddd")
 
 
 DEF_FIELDROW_DESC32(ICLASS_S2op"      1100 -------- PP------ --------","[#12] Rd=(Rs,#u6)")
@@ -1740,11 +1770,14 @@ SH_RRR_ENC(S4_vxsubaddh,        "0001","01-","-","110","ddddd")
 SH_RRR_ENC(S4_vxaddsubhr,       "0001","11-","-","00-","ddddd")
 SH_RRR_ENC(S4_vxsubaddhr,       "0001","11-","-","01-","ddddd")
 SH_RRR_ENC(S4_extractp_rp,      "0001","11-","-","10-","ddddd")
+SH_RRR_ENC(S2_cabacdecbin,      "0001","11-","-","11-","ddddd") /* implicit P0 write */
 
 
 DEF_FIELDROW_DESC32(ICLASS_S3op" 0010 -------- PP------ --------","[#2] Rdd=(Rss,Rtt,Pu)")
 SH_RRR_ENC(S2_valignrb,         "0010","0--","-","-uu","ddddd")
 SH_RRR_ENC(S2_vsplicerb,        "0010","100","-","-uu","ddddd")
+SH_RRR_ENC(A4_addp_c,           "0010","110","-","-xx","ddddd")
+SH_RRR_ENC(A4_subp_c,           "0010","111","-","-xx","ddddd")
 
 
 DEF_FIELDROW_DESC32(ICLASS_S3op" 0011 -------- PP------ --------","[#3] Rdd=(Rss,Rt)")
diff --git a/target/hexagon/imported/float.idef b/target/hexagon/imported/float.idef
index 76cecfebf56..3e75bc46047 100644
--- a/target/hexagon/imported/float.idef
+++ b/target/hexagon/imported/float.idef
@@ -146,6 +146,22 @@ Q6INSN(F2_sfimm_n,"Rd32=sfmake(#u10):neg",ATTRIBS(),
 })
 
 
+Q6INSN(F2_sfrecipa,"Rd32,Pe4=sfrecipa(Rs32,Rt32)",ATTRIBS(),
+"Reciprocal Approximation for Division",
+{
+    fHIDE(int idx;)
+    fHIDE(int adjust;)
+    fHIDE(int mant;)
+    fHIDE(int exp;)
+    if (fSF_RECIP_COMMON(RsV,RtV,RdV,adjust)) {
+        PeV = adjust;
+        idx = (RtV >> 16) & 0x7f;
+        mant = (fSF_RECIP_LOOKUP(idx) << 15) | 1;
+        exp = fSF_BIAS() - (fSF_GETEXP(RtV) - fSF_BIAS()) - 1;
+        RdV = fMAKESF(fGETBIT(31,RtV),exp,mant);
+    }
+})
+
 Q6INSN(F2_sffixupn,"Rd32=sffixupn(Rs32,Rt32)",ATTRIBS(),
 "Fix Up Numerator",
 {
@@ -162,6 +178,22 @@ Q6INSN(F2_sffixupd,"Rd32=sffixupd(Rs32,Rt32)",ATTRIBS(),
     RdV = RtV;
 })
 
+Q6INSN(F2_sfinvsqrta,"Rd32,Pe4=sfinvsqrta(Rs32)",ATTRIBS(),
+"Reciprocal Square Root Approximation",
+{
+    fHIDE(int idx;)
+    fHIDE(int adjust;)
+    fHIDE(int mant;)
+    fHIDE(int exp;)
+    if (fSF_INVSQRT_COMMON(RsV,RdV,adjust)) {
+        PeV = adjust;
+        idx = (RsV >> 17) & 0x7f;
+        mant = (fSF_INVSQRT_LOOKUP(idx) << 15);
+        exp = fSF_BIAS() - ((fSF_GETEXP(RsV) - fSF_BIAS()) >> 1) - 1;
+        RdV = fMAKESF(fGETBIT(31,RsV),exp,mant);
+    }
+})
+
 Q6INSN(F2_sffixupr,"Rd32=sffixupr(Rs32)",ATTRIBS(),
 "Fix Up Radicand",
 {
diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef
index 78a2ea441c3..359d3b744e6 100644
--- a/target/hexagon/imported/ldst.idef
+++ b/target/hexagon/imported/ldst.idef
@@ -25,7 +25,10 @@ Q6INSN(L2_##TAG##_io,  OPER"(Rs32+#s11:"SHFT")",          ATTRIB,DESCR,{fIMMEXT(
 Q6INSN(L4_##TAG##_ur,  OPER"(Rt32<<#u2+#U6)",             ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RtV,uiV); SEMANTICS;})\
 Q6INSN(L4_##TAG##_ap,  OPER"(Re32=#U6)",                  ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\
 Q6INSN(L2_##TAG##_pr,  OPER"(Rx32++Mu2)",                 ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)",            ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS;})\
 Q6INSN(L2_##TAG##_pi,  OPER"(Rx32++#s4:"SHFT")",          ATTRIB,DESCR,{fEA_REG(RxV); fPM_I(RxV,siV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))",ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\
+Q6INSN(L2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))",  ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;})
 
 /* The set of 32-bit load instructions */
 STD_LD_AMODES(loadrub,"Rd32=memub","Load Unsigned Byte",ATTRIBS(A_LOAD),"0",fLOAD(1,1,u,EA,RdV),0)
@@ -35,6 +38,68 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1
 STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2)
 STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3)
 
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+  fSETHALF(i,RdV,fGETUBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+  fSETHALF(i,RddV,fGETUBYTE(i,tmpV));
+ }
+},2)
+
+
+
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+  fSETHALF(i,RdV,fGETBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+  fSETHALF(i,RddV,fGETBYTE(i,tmpV));
+ }
+},2)
+
+
+
+STD_LD_AMODES(loadalignh, "Ryy32=memh_fifo", "Load Half-word into shifted vector",
+ATTRIBS(A_LOAD),"1",
+{
+ fHIDE(size8u_t tmpV;)
+ fLOAD(1,2,u,EA,tmpV);
+ RyyV = (((size8u_t)RyyV)>>16)|(tmpV<<48);
+},1)
+
+
+STD_LD_AMODES(loadalignb, "Ryy32=memb_fifo", "Load byte into shifted vector",
+ATTRIBS(A_LOAD),"0",
+{
+ fHIDE(size8u_t tmpV;)
+ fLOAD(1,1,u,EA,tmpV);
+ RyyV = (((size8u_t)RyyV)>>8)|(tmpV<<56);
+},0)
+
+
+
+
 /* The set of addressing modes standard to all Store instructions */
 #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\
 Q6INSN(S2_##TAG##_io,  OPER"(Rs32+#s11:"SHFT")="DEST,     ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\
@@ -42,6 +107,9 @@ Q6INSN(S2_##TAG##_pi,  OPER"(Rx32++#s4:"SHFT")="DEST,     ATTRIB,DESCR,{fEA_REG(
 Q6INSN(S4_##TAG##_ap,  OPER"(Re32=#U6)="DEST,             ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IMM(UiV); SEMANTICS; ReV=UiV; })\
 Q6INSN(S2_##TAG##_pr,  OPER"(Rx32++Mu2)="DEST,            ATTRIB,DESCR,{fEA_REG(RxV); fPM_M(RxV,MuV); SEMANTICS; })\
 Q6INSN(S4_##TAG##_ur,  OPER"(Ru32<<#u2+#U6)="DEST,            ATTRIB,DESCR,{fMUST_IMMEXT(UiV); fEA_IRs(UiV,RuV,uiV); SEMANTICS;})\
+Q6INSN(S2_##TAG##_pbr, OPER"(Rx32++Mu2:brev)="DEST,       ATTRIB,DESCR,{fEA_BREVR(RxV); fPM_M(RxV,MuV); SEMANTICS; })\
+Q6INSN(S2_##TAG##_pci, OPER"(Rx32++#s4:"SHFT":circ(Mu2))="DEST,  ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRI(RxV,siV,MuV); SEMANTICS;})\
+Q6INSN(S2_##TAG##_pcr, OPER"(Rx32++I:circ(Mu2))="DEST,  ATTRIB,DESCR,{fEA_REG(RxV); fPM_CIRR(RxV,fREAD_IREG(MuV)<<SCALE,MuV); SEMANTICS;})
 
 
 /* The set of 32-bit store instructions */
diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def
index 65292c7afa1..e23f91562e2 100755
--- a/target/hexagon/imported/macros.def
+++ b/target/hexagon/imported/macros.def
@@ -92,6 +92,21 @@ DEF_MACRO(
     /* attribs */
 )
 
+
+DEF_MACRO(
+    fINSERT_RANGE,
+        {
+            int offset=LOWBIT;
+            int width=HIBIT-LOWBIT+1;
+            /* clear bits where new bits go */
+            INREG &= ~(((fCONSTLL(1)<<width)-1)<<offset);
+            /* OR in new bits */
+            INREG |= ((INVAL & ((fCONSTLL(1)<<width)-1)) << offset);
+        },
+    /* attribs */
+)
+
+
 DEF_MACRO(
     f8BITSOF,
     ( (VAL) ? 0xff : 0x00),
@@ -161,6 +176,12 @@ DEF_MACRO(
     (A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY)
 )
 
+DEF_MACRO(
+    fVSATUVALN,
+    ({ ((VAL) < 0) ? 0 : ((1LL<<(N))-1);}),
+    ()
+)
+
 DEF_MACRO(
     fSATUVALN,
     ({fSET_OVERFLOW(); ((VAL) < 0) ? 0 : ((1LL<<(N))-1);}),
@@ -173,6 +194,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fVSATVALN,
+    ({((VAL) < 0) ? (-(1LL<<((N)-1))) : ((1LL<<((N)-1))-1);}),
+    ()
+)
+
 DEF_MACRO(
     fZXTN, /* macro name */
     ((VAL) & ((1LL<<(N))-1)),
@@ -190,6 +217,11 @@ DEF_MACRO(
     ((fSXTN(N,64,VAL) == (VAL)) ? (VAL) : fSATVALN(N,VAL)),
     ()
 )
+DEF_MACRO(
+    fVSATN,
+    ((fSXTN(N,64,VAL) == (VAL)) ? (VAL) : fVSATVALN(N,VAL)),
+    ()
+)
 
 DEF_MACRO(
     fADDSAT64,
@@ -219,6 +251,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fVSATUN,
+    ((fZXTN(N,64,VAL) == (VAL)) ? (VAL) : fVSATUVALN(N,VAL)),
+    ()
+)
+
 DEF_MACRO(
     fSATUN,
     ((fZXTN(N,64,VAL) == (VAL)) ? (VAL) : fSATUVALN(N,VAL)),
@@ -238,6 +276,19 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fVSATH,
+    (fVSATN(16,VAL)),
+    ()
+)
+
+DEF_MACRO(
+    fVSATUH,
+    (fVSATUN(16,VAL)),
+    ()
+)
+
+
 DEF_MACRO(
     fSATUB,
     (fSATUN(8,VAL)),
@@ -250,6 +301,20 @@ DEF_MACRO(
 )
 
 
+DEF_MACRO(
+    fVSATUB,
+    (fVSATUN(8,VAL)),
+    ()
+)
+DEF_MACRO(
+    fVSATB,
+    (fVSATN(8,VAL)),
+    ()
+)
+
+
+
+
 /*************************************/
 /* immediate extension               */
 /*************************************/
@@ -276,6 +341,12 @@ DEF_MACRO(
 /* Read and Write Implicit Regs      */
 /*************************************/
 
+DEF_MACRO(
+    fREAD_IREG, /* read modifier register */
+    (fSXTN(11,64,(((VAL) & 0xf0000000)>>21) | ((VAL>>17)&0x7f) )),          /* behavior */
+    ()
+)
+
 DEF_MACRO(
     fREAD_LR, /* read link register */
     (READ_RREG(REG_LR)),          /* behavior */
@@ -306,6 +377,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fREAD_CSREG, /* read  CS register */
+    (READ_RREG(REG_CSA+N)),          /* behavior */
+    ()
+)
+
 DEF_MACRO(
     fREAD_LC0, /* read loop count */
     (READ_RREG(REG_LC0)),          /* behavior */
@@ -529,6 +606,18 @@ DEF_MACRO(
     /* optional attributes */
 )
 
+DEF_MACRO(
+    fCAST2_2s, /* macro name */
+    ((size2s_t)(A)),
+    /* optional attributes */
+)
+
+DEF_MACRO(
+    fCAST2_2u, /* macro name */
+    ((size2u_t)(A)),
+    /* optional attributes */
+)
+
 DEF_MACRO(
     fCAST4_4s, /* macro name */
     ((size4s_t)(A)),
@@ -806,6 +895,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fEA_BREVR, /* Calculate EA with bit reversed bottom of REGISTER */
+    EA=fbrev(REG),
+    ()
+)
+
 DEF_MACRO(
     fEA_GPI, /* Calculate EA with Global Poitner + Immediate */
     do { EA=fREAD_GP()+IMM; fGP_DOCHKPAGECROSS(fREAD_GP(),EA); } while (0),
@@ -824,11 +919,30 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fPM_CIRI, /* Post Modify Register using Circular arithmetic by Immediate */
+    do { fcirc_add(REG,siV,MuV); } while (0),
+    ()
+)
+
+DEF_MACRO(
+    fPM_CIRR, /* Post Modify Register using Circular arithmetic by register */
+    do { fcirc_add(REG,VAL,MuV); } while (0),
+    ()
+)
+
+
+
 DEF_MACRO(
     fSCALE, /* scale by N */
     (((size8s_t)(A))<<N),
     /* optional attributes */
 )
+DEF_MACRO(
+    fVSATW, /* saturating to 32-bits*/
+    fVSATN(32,((long long)A)),
+    ()
+)
 
 DEF_MACRO(
     fSATW, /* saturating to 32-bits*/
@@ -836,6 +950,12 @@ DEF_MACRO(
     ()
 )
 
+DEF_MACRO(
+    fVSAT, /* saturating to 32-bits*/
+    fVSATN(32,(A)),
+    ()
+)
+
 DEF_MACRO(
     fSAT, /* saturating to 32-bits*/
     fSATN(32,(A)),
@@ -1342,6 +1462,11 @@ DEF_MACRO(fSETBITS,
 /*************************************/
 /* Used for parity, etc........      */
 /*************************************/
+DEF_MACRO(fCOUNTONES_2,
+    count_ones_2(VAL),
+    /* nothing */
+)
+
 DEF_MACRO(fCOUNTONES_4,
     count_ones_4(VAL),
     /* nothing */
@@ -1372,6 +1497,11 @@ DEF_MACRO(fCL1_4,
     /* nothing */
 )
 
+DEF_MACRO(fCL1_2,
+    count_leading_ones_2(VAL),
+    /* nothing */
+)
+
 DEF_MACRO(fINTERLEAVE,
     interleave(ODD,EVEN),
     /* nothing */
@@ -1529,3 +1659,8 @@ DEF_MACRO(fBRANCH_SPECULATE_STALL,
     },
     ()
 )
+
+DEF_MACRO(IV1DEAD,
+    ,
+    ()
+)
diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def
new file mode 100644
index 00000000000..6fbbe2c4229
--- /dev/null
+++ b/target/hexagon/imported/mmvec/encode_ext.def
@@ -0,0 +1,794 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define CONCAT(A,B) A##B
+#define EXTEXTNAME(X) CONCAT(EXT_,X)
+#define DEF_ENC(TAG,STR) DEF_EXT_ENC(TAG,EXTEXTNAME(EXTNAME),STR)
+
+
+#ifndef NO_MMVEC
+DEF_ENC(V6_extractw,  ICLASS_LD" 001 0 000sssss  PP0uuuuu  --1ddddd") /* coproc insn, returns Rd */
+#endif
+
+
+#ifndef NO_MMVEC
+
+
+
+DEF_CLASS32(ICLASS_NCJ" 1--- -------- PP------ --------",COPROC_VMEM)
+DEF_CLASS32(ICLASS_NCJ" 1000 0-0ttttt PPi--iii ---ddddd",BaseOffset_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1000 1-0ttttt PPivviii ---ddddd",BaseOffset_if_Pv_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1000 0-1ttttt PPi--iii --------",BaseOffset_VMEM_Stores1)
+DEF_CLASS32(ICLASS_NCJ" 1000 1-0ttttt PPi--iii 00------",BaseOffset_VMEM_Stores2)
+DEF_CLASS32(ICLASS_NCJ" 1000 1-1ttttt PPivviii --------",BaseOffset_if_Pv_VMEM_Stores)
+
+DEF_CLASS32(ICLASS_NCJ" 1001 0-0xxxxx PP---iii ---ddddd",PostImm_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1001 1-0xxxxx PP-vviii ---ddddd",PostImm_if_Pv_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1001 0-1xxxxx PP---iii --------",PostImm_VMEM_Stores1)
+DEF_CLASS32(ICLASS_NCJ" 1001 1-0xxxxx PP---iii 00------",PostImm_VMEM_Stores2)
+DEF_CLASS32(ICLASS_NCJ" 1001 1-1xxxxx PP-vviii --------",PostImm_if_Pv_VMEM_Stores)
+
+DEF_CLASS32(ICLASS_NCJ" 1011 0-0xxxxx PPu----- ---ddddd",PostM_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1011 1-0xxxxx PPuvv--- ---ddddd",PostM_if_Pv_VMEM_Loads)
+DEF_CLASS32(ICLASS_NCJ" 1011 0-1xxxxx PPu----- --------",PostM_VMEM_Stores1)
+DEF_CLASS32(ICLASS_NCJ" 1011 1-0xxxxx PPu----- 00------",PostM_VMEM_Stores2)
+DEF_CLASS32(ICLASS_NCJ" 1011 1-1xxxxx PPuvv--- --------",PostM_if_Pv_VMEM_Stores)
+
+DEF_CLASS32(ICLASS_NCJ" 110- 0------- PP------ --------",Z_Load)
+DEF_CLASS32(ICLASS_NCJ" 110- 1------- PP------ --------",Z_Load_if_Pv)
+
+DEF_CLASS32(ICLASS_NCJ" 1111 000ttttt PPu--0-- ---vvvvv",Gather)
+DEF_CLASS32(ICLASS_NCJ" 1111 000ttttt PPu--1-- -ssvvvvv",Gather_if_Qs)
+DEF_CLASS32(ICLASS_NCJ" 1111 001ttttt PPuvvvvv ---wwwww",Scatter)
+DEF_CLASS32(ICLASS_NCJ" 1111 001ttttt PPuvvvvv -----sss",Scatter_New)
+DEF_CLASS32(ICLASS_NCJ" 1111 1--ttttt PPuvvvvv -sswwwww",Scatter_if_Qs)
+
+
+DEF_FIELD32(ICLASS_NCJ" 1--- -!------ PP------ --------",NT,"NonTemporal")
+
+
+
+DEF_FIELDROW_DESC32(                ICLASS_NCJ" 1 000 --- ----- PP i --iii ----- ---","[#0] vmem(Rt+#s4)[:nt]")
+
+#define LDST_ENC(TAG,MAJ3,MID3,RREG,TINY6,MIN3,VREG) DEF_ENC(TAG, ICLASS_NCJ "1" #MAJ3 #MID3 #RREG "PP" #TINY6 #MIN3 #VREG)
+
+#define LDST_BO(TAGPRE,MID3,PRED,MIN3,VREG) LDST_ENC(TAGPRE##_ai, 000,MID3,ttttt,i PRED iii,MIN3,VREG)
+#define LDST_PI(TAGPRE,MID3,PRED,MIN3,VREG) LDST_ENC(TAGPRE##_pi, 001,MID3,xxxxx,- PRED iii,MIN3,VREG)
+#define LDST_PM(TAGPRE,MID3,PRED,MIN3,VREG) LDST_ENC(TAGPRE##_ppu,011,MID3,xxxxx,u PRED ---,MIN3,VREG)
+
+#define LDST_BASICLD(OP,TAGPRE) \
+    OP(TAGPRE,                000,00,000,ddddd) \
+    OP(TAGPRE##_nt,           010,00,000,ddddd) \
+    OP(TAGPRE##_cur,          000,00,001,ddddd) \
+    OP(TAGPRE##_nt_cur,       010,00,001,ddddd) \
+    OP(TAGPRE##_tmp,          000,00,010,ddddd) \
+    OP(TAGPRE##_nt_tmp,       010,00,010,ddddd)
+
+#define LDST_BASICST(OP,TAGPRE) \
+    OP(TAGPRE,           001,--,000,sssss) \
+    OP(TAGPRE##_nt,      011,--,000,sssss) \
+    OP(TAGPRE##_new,     001,--,001,-0sss) \
+    OP(TAGPRE##_srls,    001,--,001,-1---) \
+    OP(TAGPRE##_nt_new,  011,--,001,--sss) \
+
+
+#define LDST_QPREDST(OP,TAGPRE) \
+    OP(TAGPRE##_qpred,    100,vv,000,sssss) \
+    OP(TAGPRE##_nt_qpred, 110,vv,000,sssss) \
+    OP(TAGPRE##_nqpred,   100,vv,001,sssss) \
+    OP(TAGPRE##_nt_nqpred,110,vv,001,sssss) \
+
+#define LDST_CONDLD(OP,TAGPRE) \
+    OP(TAGPRE##_pred,         100,vv,010,ddddd) \
+    OP(TAGPRE##_nt_pred,      110,vv,010,ddddd) \
+    OP(TAGPRE##_npred,        100,vv,011,ddddd) \
+    OP(TAGPRE##_nt_npred,     110,vv,011,ddddd) \
+    OP(TAGPRE##_cur_pred,     100,vv,100,ddddd) \
+    OP(TAGPRE##_nt_cur_pred,  110,vv,100,ddddd) \
+    OP(TAGPRE##_cur_npred,    100,vv,101,ddddd) \
+    OP(TAGPRE##_nt_cur_npred, 110,vv,101,ddddd) \
+    OP(TAGPRE##_tmp_pred,     100,vv,110,ddddd) \
+    OP(TAGPRE##_nt_tmp_pred,  110,vv,110,ddddd) \
+    OP(TAGPRE##_tmp_npred,    100,vv,111,ddddd) \
+    OP(TAGPRE##_nt_tmp_npred, 110,vv,111,ddddd) \
+
+#define LDST_PREDST(OP,TAGPRE,NT,MIN2) \
+    OP(TAGPRE##_pred,      1 NT 1,vv,MIN2 0,sssss) \
+    OP(TAGPRE##_npred,     1 NT 1,vv,MIN2 1,sssss)
+
+#define LDST_PREDSTNEW(OP,TAGPRE,NT,MIN2) \
+    OP(TAGPRE##_pred,      1 NT 1,vv,MIN2 0,NT 0 sss) \
+    OP(TAGPRE##_npred,     1 NT 1,vv,MIN2 1,NT 1 sss)
+
+// 0.0,vv,0--,sssss: pred st
+#define LDST_BASICPREDST(OP,TAGPRE) \
+    LDST_PREDST(OP,TAGPRE,             0,00) \
+    LDST_PREDST(OP,TAGPRE##_nt,        1,00) \
+    LDST_PREDSTNEW(OP,TAGPRE##_new,    0,01) \
+    LDST_PREDSTNEW(OP,TAGPRE##_nt_new, 1,01)
+
+
+
+LDST_BASICLD(LDST_BO,V6_vL32b)
+LDST_CONDLD(LDST_BO,V6_vL32b)
+LDST_BASICLD(LDST_PI,V6_vL32b)
+LDST_CONDLD(LDST_PI,V6_vL32b)
+LDST_BASICLD(LDST_PM,V6_vL32b)
+LDST_CONDLD(LDST_PM,V6_vL32b)
+
+// Loads
+
+LDST_BO(V6_vL32Ub,000,00,111,ddddd)
+//Stores
+LDST_BASICST(LDST_BO,V6_vS32b)
+
+
+LDST_BO(V6_vS32Ub,001,--,111,sssss)
+
+
+
+
+// Byte Enabled Stores
+LDST_QPREDST(LDST_BO,V6_vS32b)
+
+// Scalar Predicated Stores
+LDST_BASICPREDST(LDST_BO,V6_vS32b)
+
+
+LDST_PREDST(LDST_BO,V6_vS32Ub,0,11)
+
+
+
+
+DEF_FIELDROW_DESC32(                ICLASS_NCJ" 1 001 --- ----- PP - ----- ddddd ---","[#1] vmem(Rx++#s3)[:nt]")
+
+// Loads
+LDST_PI(V6_vL32Ub,000,00,111,ddddd)
+
+//Stores
+LDST_BASICST(LDST_PI,V6_vS32b)
+
+
+
+LDST_PI(V6_vS32Ub,001,--,111,sssss)
+
+
+// Byte Enabled Stores
+LDST_QPREDST(LDST_PI,V6_vS32b)
+
+
+// Scalar Predicated Stores
+LDST_BASICPREDST(LDST_PI,V6_vS32b)
+
+
+LDST_PREDST(LDST_PI,V6_vS32Ub,0,11)
+
+
+
+DEF_FIELDROW_DESC32(            ICLASS_NCJ" 1 011 --- ----- PP - ----- ----- ---","[#3] vmem(Rx++#M)[:nt]")
+
+// Loads
+LDST_PM(V6_vL32Ub,000,00,111,ddddd)
+
+//Stores
+LDST_BASICST(LDST_PM,V6_vS32b)
+
+
+
+LDST_PM(V6_vS32Ub,001,--,111,sssss)
+
+// Byte Enabled Stores
+LDST_QPREDST(LDST_PM,V6_vS32b)
+
+// Scalar Predicated Stores
+LDST_BASICPREDST(LDST_PM,V6_vS32b)
+
+
+LDST_PREDST(LDST_PM,V6_vS32Ub,0,11)
+
+
+
+DEF_ENC(V6_vaddcarrysat,    ICLASS_CJ" 1 101 100 vvvvv PP 1 uuuuu 0ss ddddd") //
+DEF_ENC(V6_vaddcarryo,        ICLASS_CJ" 1 101 101 vvvvv PP 1 uuuuu 0ee ddddd") //
+DEF_ENC(V6_vsubcarryo,        ICLASS_CJ" 1 101 101 vvvvv PP 1 uuuuu 1ee ddddd") //
+DEF_ENC(V6_vsatdw,          ICLASS_CJ" 1 101 100 vvvvv PP 1 uuuuu 111 ddddd") //
+
+DEF_FIELDROW_DESC32(           ICLASS_NCJ" 1 111 --- ----- PP - ----- ----- ---","[#6] vgather,vscatter")
+DEF_ENC(V6_vgathermw,         ICLASS_NCJ" 1 111 000 ttttt PP u --000 --- vvvvv")    // vtmp.w=vmem(Rt32,Mu2,Vv32.w).w
+DEF_ENC(V6_vgathermh,         ICLASS_NCJ" 1 111 000 ttttt PP u --001 --- vvvvv")    // vtmp.h=vmem(Rt32,Mu2,Vv32.h).h
+DEF_ENC(V6_vgathermhw,         ICLASS_NCJ" 1 111 000 ttttt PP u --010 --- vvvvv")    // vtmp.h=vmem(Rt32,Mu2,Vvv32.w).h
+
+
+DEF_ENC(V6_vgathermwq,         ICLASS_NCJ" 1 111 000 ttttt PP u --100 -ss vvvvv")    // if (Qs4) vtmp.w=vmem(Rt32,Mu2,Vv32.w).w
+DEF_ENC(V6_vgathermhq,         ICLASS_NCJ" 1 111 000 ttttt PP u --101 -ss vvvvv")    // if (Qs4) vtmp.h=vmem(Rt32,Mu2,Vv32.h).h
+DEF_ENC(V6_vgathermhwq,     ICLASS_NCJ" 1 111 000 ttttt PP u --110 -ss vvvvv")    // if (Qs4) vtmp.h=vmem(Rt32,Mu2,Vvv32.w).h
+
+
+
+DEF_ENC(V6_vscattermw,         ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 000 wwwww")    // vmem(Rt32,Mu2,Vv32.w)=Vw32.w
+DEF_ENC(V6_vscattermh,         ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 001 wwwww")    // vmem(Rt32,Mu2,Vv32.h)=Vw32.h
+DEF_ENC(V6_vscattermhw,     ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 010 wwwww")    // vmem(Rt32,Mu2,Vv32.h)=Vw32.h
+
+DEF_ENC(V6_vscattermw_add,     ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 100 wwwww")    // vmem(Rt32,Mu2,Vv32.w) += Vw32.w
+DEF_ENC(V6_vscattermh_add,     ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 101 wwwww")    // vmem(Rt32,Mu2,Vv32.h) += Vw32.h
+DEF_ENC(V6_vscattermhw_add, ICLASS_NCJ" 1 111 001 ttttt PP u vvvvv 110 wwwww")    // vmem(Rt32,Mu2,Vv32.h) += Vw32.h
+
+
+DEF_ENC(V6_vscattermwq,     ICLASS_NCJ" 1 111 100 ttttt PP u vvvvv 0ss wwwww")    // if (Qs4) vmem(Rt32,Mu2,Vv32.w)=Vw32.w
+DEF_ENC(V6_vscattermhq,     ICLASS_NCJ" 1 111 100 ttttt PP u vvvvv 1ss wwwww")    // if (Qs4) vmem(Rt32,Mu2,Vv32.h)=Vw32.h
+DEF_ENC(V6_vscattermhwq,     ICLASS_NCJ" 1 111 101 ttttt PP u vvvvv 0ss wwwww")    // if (Qs4) vmem(Rt32,Mu2,Vv32.h)=Vw32.h
+
+
+
+
+
+DEF_CLASS32(ICLASS_CJ" 1--- -------- PP------ --------",COPROC_VX)
+
+
+
+/***************************************************************
+*
+*  Group #0, Uses Q6 Rt8: new in v61
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(            ICLASS_CJ" 1 000 --- ----- PP - ----- ----- ---","[#1] Vd32=(Vu32, Vv32, Rt8)")
+DEF_ENC(V6_vasrhbsat,             ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vasruwuhrndsat,         ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vasrwuhrndsat,         ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vlutvvb_nm,             ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vlutvwh_nm,             ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vasruhubrndsat,         ICLASS_CJ" 1 000 vvv vvttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vasruwuhsat,         ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 100 ddddd") //
+DEF_ENC(V6_vasruhubsat,            ICLASS_CJ" 1 000 vvv vvttt PP 1 uuuuu 101 ddddd") //
+
+/***************************************************************
+*
+*  Group #1, Uses Q6 Rt32
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 001 --- ----- PP - ----- ----- ---","[#1] Vd32=(Vu32, Rt32)")
+DEF_ENC(V6_vtmpyb,             ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vtmpybus,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vdmpyhb,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vrmpyub,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vrmpybus,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vdsaduh,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vdmpybus,         ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vdmpybus_dv,     ICLASS_CJ" 1 001 000 ttttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vdmpyhsusat,     ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vdmpyhsuisat,     ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vdmpyhsat,         ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vdmpyhisat,         ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vdmpyhb_dv,         ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vmpybus,         ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vmpabus,         ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vmpahb,             ICLASS_CJ" 1 001 001 ttttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vmpyh,             ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vmpyhss,         ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vmpyhsrs,         ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vmpyuh,             ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vrmpybusi,         ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 10i ddddd") //
+DEF_ENC(V6_vrsadubi,         ICLASS_CJ" 1 001 010 ttttt PP 0 uuuuu 11i ddddd") //
+
+DEF_ENC(V6_vmpyihb,         ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vror,             ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vmpyuhe,         ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vmpabuu,         ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vlut4,            ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 100 ddddd") //
+
+
+DEF_ENC(V6_vasrw,             ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vasrh,             ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vaslw,             ICLASS_CJ" 1 001 011 ttttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vaslh,             ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vlsrw,             ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vlsrh,             ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vlsrb,            ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 011 ddddd") //
+
+DEF_ENC(V6_vmpauhb,            ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vmpyiwub,         ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vmpyiwh,         ICLASS_CJ" 1 001 100 ttttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vmpyiwb,         ICLASS_CJ" 1 001 101 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_lvsplatw,         ICLASS_CJ" 1 001 101 ttttt PP 0 ----0 001 ddddd") //
+
+
+
+DEF_ENC(V6_pred_scalar2,     ICLASS_CJ" 1 001 101 ttttt PP 0 ----- 010 -01dd") //
+DEF_ENC(V6_vandvrt,         ICLASS_CJ" 1 001 101 ttttt PP 0 uuuuu 010 -10dd") //
+DEF_ENC(V6_pred_scalar2v2,     ICLASS_CJ" 1 001 101 ttttt PP 0 ----- 010 -11dd") //
+
+DEF_ENC(V6_vtmpyhb,         ICLASS_CJ" 1 001 101 ttttt PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vandqrt,         ICLASS_CJ" 1 001 101 ttttt PP 0 --0uu 101 ddddd") //
+DEF_ENC(V6_vandnqrt,         ICLASS_CJ" 1 001 101 ttttt PP 0 --1uu 101 ddddd") //
+
+DEF_ENC(V6_vrmpyubi,         ICLASS_CJ" 1 001 101 ttttt PP 0 uuuuu 11i ddddd") //
+
+DEF_ENC(V6_vmpyub,             ICLASS_CJ" 1 001 110 ttttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_lvsplath,         ICLASS_CJ" 1 001 110 ttttt PP 0 ----- 001 ddddd") //
+DEF_ENC(V6_lvsplatb,         ICLASS_CJ" 1 001 110 ttttt PP 0 ----- 010 ddddd") //
+
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 001 --- ----- PP - ----- ----- ---","[#1] Vx32=(Vu32, Rt32)")
+DEF_ENC(V6_vtmpyb_acc,         ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vtmpybus_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vtmpyhb_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vdmpyhb_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 011 xxxxx") //
+DEF_ENC(V6_vrmpyub_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vrmpybus_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vdmpybus_acc,     ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vdmpybus_dv_acc, ICLASS_CJ" 1 001 000 ttttt PP 1 uuuuu 111 xxxxx") //
+
+DEF_ENC(V6_vdmpyhsusat_acc, ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vdmpyhsuisat_acc,ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vdmpyhisat_acc,     ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vdmpyhsat_acc,     ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 011 xxxxx") //
+DEF_ENC(V6_vdmpyhb_dv_acc,     ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vmpybus_acc,     ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vmpabus_acc,     ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vmpahb_acc,         ICLASS_CJ" 1 001 001 ttttt PP 1 uuuuu 111 xxxxx") //
+
+DEF_ENC(V6_vmpyhsat_acc,     ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vmpyuh_acc,         ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vmpyiwb_acc,     ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vmpyiwh_acc,     ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 011 xxxxx") //
+DEF_ENC(V6_vrmpybusi_acc,     ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 10i xxxxx") //
+DEF_ENC(V6_vrsadubi_acc,     ICLASS_CJ" 1 001 010 ttttt PP 1 uuuuu 11i xxxxx") //
+
+DEF_ENC(V6_vdsaduh_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vmpyihb_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vaslw_acc,         ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vandqrt_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 --0uu 011 xxxxx") //
+DEF_ENC(V6_vandnqrt_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 --1uu 011 xxxxx") //
+DEF_ENC(V6_vandvrt_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 100 ---xx") //
+DEF_ENC(V6_vasrw_acc,         ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vrmpyubi_acc,     ICLASS_CJ" 1 001 011 ttttt PP 1 uuuuu 11i xxxxx") //
+
+DEF_ENC(V6_vmpyub_acc,         ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vmpyiwub_acc,    ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vmpauhb_acc,        ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vmpyuhe_acc,        ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 011 xxxxx")
+DEF_ENC(V6_vmpahhsat,        ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vmpauhuhsat,        ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vmpsuhuhsat,        ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vasrh_acc,         ICLASS_CJ" 1 001 100 ttttt PP 1 uuuuu 111 xxxxx") //
+
+
+
+
+DEF_ENC(V6_vinsertwr,        ICLASS_CJ" 1 001 101 ttttt PP 1 ----- 001 xxxxx")
+
+DEF_ENC(V6_vmpabuu_acc,        ICLASS_CJ" 1 001 101 ttttt PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vaslh_acc,        ICLASS_CJ" 1 001 101 ttttt PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vmpyh_acc,        ICLASS_CJ" 1 001 101 ttttt PP 1 uuuuu 110 xxxxx") //
+
+
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 001 --- ----- PP - ----- ----- ---","[#1] (Vx32, Vy32, Rt32)")
+DEF_ENC(V6_vshuff,             ICLASS_CJ" 1 001 111 ttttt PP 1 yyyyy 001 xxxxx") //
+DEF_ENC(V6_vdeal,             ICLASS_CJ" 1 001 111 ttttt PP 1 yyyyy 010 xxxxx") //
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 010 --- ----- PP - ----- ----- ---","[#2] if (Ps) Vd=Vu")
+DEF_ENC(V6_vcmov,         ICLASS_CJ" 1 010 000 ----- PP - uuuuu -ss ddddd")
+DEF_ENC(V6_vncmov,         ICLASS_CJ" 1 010 001 ----- PP - uuuuu -ss ddddd")
+DEF_ENC(V6_vnccombine,     ICLASS_CJ" 1 010 010 vvvvv PP - uuuuu -ss ddddd")
+DEF_ENC(V6_vccombine,     ICLASS_CJ" 1 010 011 vvvvv PP - uuuuu -ss ddddd")
+
+DEF_ENC(V6_vrotr,       ICLASS_CJ" 1 010 100 vvvvv PP 1 uuuuu 111 ddddd")
+DEF_ENC(V6_vasr_into,   ICLASS_CJ" 1 010 101 vvvvv PP 1 uuuuu 111 xxxxx")
+
+/***************************************************************
+*
+*  Group #3, Uses Q6 Rt8
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 011 --- ----- PP - ----- ----- ---","[#3] Vd32=(Vu32, Vv32, Rt8)")
+DEF_ENC(V6_valignb,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vlalignb,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vasrwh,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vasrwhsat,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vasrwhrndsat,     ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vasrwuhsat,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vasrhubsat,         ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vasrhubrndsat,     ICLASS_CJ" 1 011 vvv vvttt PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vasrhbrndsat,     ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 000 ddddd") //
+DEF_ENC(V6_vlutvvb,            ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 001 ddddd")
+DEF_ENC(V6_vshuffvdd,         ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 011 ddddd") //
+DEF_ENC(V6_vdealvdd,         ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 100 ddddd") //
+DEF_ENC(V6_vlutvvb_oracc,    ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 101 xxxxx")
+DEF_ENC(V6_vlutvwh,            ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 110 ddddd")
+DEF_ENC(V6_vlutvwh_oracc,    ICLASS_CJ" 1 011 vvv vvttt PP 1 uuuuu 111 xxxxx")
+
+
+
+/***************************************************************
+*
+*  Group #4, No Q6 regs
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 100 --- ----- PP 0 ----- ----- ---","[#4] Vd32=(Vu32, Vv32)")
+DEF_ENC(V6_vrmpyubv,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vrmpybv,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vrmpybusv,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vdmpyhvsat,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vmpybv,         ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vmpyubv,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vmpybusv,     ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vmpyhv,         ICLASS_CJ" 1 100 000 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vmpyuhv,     ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vmpyhvsrs,     ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vmpyhus,     ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vmpabusv,     ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vmpyih,         ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vand,         ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vor,         ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vxor,         ICLASS_CJ" 1 100 001 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vaddw,         ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vaddubsat,     ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vadduhsat,     ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vaddhsat,     ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vaddwsat,     ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vsubb,         ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vsubh,         ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vsubw,         ICLASS_CJ" 1 100 010 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vsububsat,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vsubuhsat,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vsubhsat,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vsubwsat,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vaddb_dv,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vaddh_dv,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vaddw_dv,     ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vaddubsat_dv,ICLASS_CJ" 1 100 011 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vadduhsat_dv,ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vaddhsat_dv, ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vaddwsat_dv, ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vsubb_dv,     ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vsubh_dv,     ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vsubw_dv,     ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vsububsat_dv,ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vsubuhsat_dv,ICLASS_CJ" 1 100 100 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vsubhsat_dv,    ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vsubwsat_dv, ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vaddubh,     ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vadduhw,     ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vaddhw,         ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vsububh,     ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vsubuhw,        ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vsubhw,        ICLASS_CJ" 1 100 101 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vabsdiffub,    ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vabsdiffh,     ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vabsdiffuh,     ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vabsdiffw,     ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vavgub,         ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vavguh,         ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vavgh,        ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vavgw,        ICLASS_CJ" 1 100 110 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vnavgub,        ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vnavgh,         ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vnavgw,         ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vavgubrnd,     ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vavguhrnd,     ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vavghrnd,     ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vavgwrnd,    ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vmpabuuv,    ICLASS_CJ" 1 100 111 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 100 --- ----- PP 1 ----- ----- ---","[#4] Vx32=(Vu32, Vv32)")
+DEF_ENC(V6_vrmpyubv_acc,      ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vrmpybv_acc,       ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vrmpybusv_acc,    ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vdmpyhvsat_acc,    ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 011 xxxxx") //
+DEF_ENC(V6_vmpybv_acc,         ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vmpyubv_acc,     ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vmpybusv_acc,    ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vmpyhv_acc,        ICLASS_CJ" 1 100 000 vvvvv PP 1 uuuuu 111 xxxxx") //
+
+DEF_ENC(V6_vmpyuhv_acc,        ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vmpyhus_acc,     ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vaddhw_acc,        ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vmpyowh_64_acc,    ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 011 xxxxx")
+DEF_ENC(V6_vmpyih_acc,         ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vmpyiewuh_acc,    ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vmpyowh_sacc,    ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vmpyowh_rnd_sacc,ICLASS_CJ" 1 100 001 vvvvv PP 1 uuuuu 111 xxxxx") //
+
+DEF_ENC(V6_vmpyiewh_acc,      ICLASS_CJ" 1 100 010 vvvvv PP 1 uuuuu 000 xxxxx") //
+
+DEF_ENC(V6_vadduhw_acc,          ICLASS_CJ" 1 100 010 vvvvv PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vaddubh_acc,          ICLASS_CJ" 1 100 010 vvvvv PP 1 uuuuu 101 xxxxx") //
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 100 100 ----- PP 1 ----- ----- ---","[#4] Qx4=(Vu32, Vv32)")
+// Grouped by element size (lsbs), operation (next-lsbs) and operation (next-lsbs)
+DEF_ENC(V6_veqb_and,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 000xx") //
+DEF_ENC(V6_veqh_and,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 001xx") //
+DEF_ENC(V6_veqw_and,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 010xx") //
+
+DEF_ENC(V6_vgtb_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 100xx") //
+DEF_ENC(V6_vgth_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 101xx") //
+DEF_ENC(V6_vgtw_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 000 110xx") //
+
+DEF_ENC(V6_vgtub_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 001 000xx") //
+DEF_ENC(V6_vgtuh_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 001 001xx") //
+DEF_ENC(V6_vgtuw_and,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 001 010xx") //
+
+DEF_ENC(V6_veqb_or,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 000xx") //
+DEF_ENC(V6_veqh_or,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 001xx") //
+DEF_ENC(V6_veqw_or,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 010xx") //
+
+DEF_ENC(V6_vgtb_or,        ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 100xx") //
+DEF_ENC(V6_vgth_or,        ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 101xx") //
+DEF_ENC(V6_vgtw_or,        ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 010 110xx") //
+
+DEF_ENC(V6_vgtub_or,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 011 000xx") //
+DEF_ENC(V6_vgtuh_or,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 011 001xx") //
+DEF_ENC(V6_vgtuw_or,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 011 010xx") //
+
+DEF_ENC(V6_veqb_xor,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 000xx") //
+DEF_ENC(V6_veqh_xor,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 001xx") //
+DEF_ENC(V6_veqw_xor,     ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 010xx") //
+
+DEF_ENC(V6_vgtb_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 100xx") //
+DEF_ENC(V6_vgth_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 101xx") //
+DEF_ENC(V6_vgtw_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 100 110xx") //
+
+DEF_ENC(V6_vgtub_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 101 000xx") //
+DEF_ENC(V6_vgtuh_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 101 001xx") //
+DEF_ENC(V6_vgtuw_xor,    ICLASS_CJ" 1 100 100 vvvvv PP 1 uuuuu 101 010xx") //
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 100 101 ----- PP 1 ----- ----- ---","[#4] Qx4,Vd32=(Vu32, Vv32)")
+DEF_ENC(V6_vaddcarry,    ICLASS_CJ" 1 100 101 vvvvv PP 1 uuuuu 0xx ddddd") //
+DEF_ENC(V6_vsubcarry,    ICLASS_CJ" 1 100 101 vvvvv PP 1 uuuuu 1xx ddddd") //
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 100 11- ----- PP 1 ----- ----- ---","[#4] Vx32|=(Vu32, Vv32,#)")
+DEF_ENC(V6_vlutvvb_oracci,    ICLASS_CJ" 1 100 110 vvvvv PP 1 uuuuu iii xxxxx") //
+DEF_ENC(V6_vlutvwh_oracci,    ICLASS_CJ" 1 100 111 vvvvv PP 1 uuuuu iii xxxxx") //
+
+
+
+/***************************************************************
+*
+*  Group #5, Reserved/Deprecated. Uses Q6 Rx. Stupid FFT.
+*
+****************************************************************/
+
+
+
+
+/***************************************************************
+*
+*  Group #6, No Q6 regs
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 110 --0 ----- PP 0 ----- ----- ---","[#6] Vd32=Vu32")
+DEF_ENC(V6_vabsh,         ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vabsh_sat,     ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vabsw,         ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vabsw_sat,     ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vnot,         ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vdealh,         ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vdealb,         ICLASS_CJ" 1 110 --0 ---00 PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vunpackub,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vunpackuh,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vunpackb,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vunpackh,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vabsb,         ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vabsb_sat,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vshuffh,     ICLASS_CJ" 1 110 --0 ---01 PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vshuffb,     ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vzb,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vzh,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vsb,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vsh,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vcl0w,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vpopcounth,     ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vcl0h,         ICLASS_CJ" 1 110 --0 ---10 PP 0 uuuuu 111 ddddd") //
+
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 110 --0 ---11 PP 0 ----- ----- ---","[#6] Qd4=Qt4, Qs4")
+DEF_ENC(V6_pred_and,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 000dd") //
+DEF_ENC(V6_pred_or,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 001dd") //
+DEF_ENC(V6_pred_not,     ICLASS_CJ" 1 110 --0 ---11 PP 0 ---ss 000 010dd") //
+DEF_ENC(V6_pred_xor,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 011dd") //
+DEF_ENC(V6_pred_or_n,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 100dd") //
+DEF_ENC(V6_pred_and_n,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 101dd") //
+DEF_ENC(V6_shuffeqh,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 110dd") //
+DEF_ENC(V6_shuffeqw,     ICLASS_CJ" 1 110 tt0 ---11 PP 0 ---ss 000 111dd") //
+
+DEF_ENC(V6_vnormamtw,        ICLASS_CJ" 1 110 --0 ---11 PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vnormamth,        ICLASS_CJ" 1 110 --0 ---11 PP 0 uuuuu 101 ddddd") //
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 110 --1 ----- PP 0 ----- ----- ---","[#6] Vd32=Vu32,Vv32")
+DEF_ENC(V6_vlutvvbi,        ICLASS_CJ" 1 110 001 vvvvv PP 0 uuuuu iii ddddd")
+DEF_ENC(V6_vlutvwhi,        ICLASS_CJ" 1 110 011 vvvvv PP 0 uuuuu iii ddddd")
+
+DEF_ENC(V6_vaddbsat_dv,        ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 000 ddddd")
+DEF_ENC(V6_vsubbsat_dv,        ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 001 ddddd")
+DEF_ENC(V6_vadduwsat_dv,    ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 010 ddddd")
+DEF_ENC(V6_vsubuwsat_dv,    ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 011 ddddd")
+DEF_ENC(V6_vaddububb_sat,    ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 100 ddddd")
+DEF_ENC(V6_vsubububb_sat,    ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 101 ddddd")
+DEF_ENC(V6_vmpyewuh_64,        ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 110 ddddd")
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 110 --0 ----- PP 1 ----- ----- ---","Vx32=Vu32")
+DEF_ENC(V6_vunpackob,         ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vunpackoh,         ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 001 xxxxx") //
+//DEF_ENC(V6_vunpackow,     ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 010 xxxxx") //
+
+DEF_ENC(V6_vhist,            ICLASS_CJ" 1 110 --0 ---00 PP 1 -000- 100 -----")
+DEF_ENC(V6_vwhist256,        ICLASS_CJ" 1 110 --0 ---00 PP 1 -0010 100 -----")
+DEF_ENC(V6_vwhist256_sat,    ICLASS_CJ" 1 110 --0 ---00 PP 1 -0011 100 -----")
+DEF_ENC(V6_vwhist128,        ICLASS_CJ" 1 110 --0 ---00 PP 1 -010- 100 -----")
+DEF_ENC(V6_vwhist128m,        ICLASS_CJ" 1 110 --0 ---00 PP 1 -011i 100 -----")
+
+DEF_FIELDROW_DESC32(        ICLASS_CJ" 1 110 --0 ----- PP 1 ----- ----- ---","if (Qv4) Vx32=Vu32")
+DEF_ENC(V6_vaddbq,             ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vaddhq,             ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vaddwq,             ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vaddbnq,         ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 011 xxxxx") //
+DEF_ENC(V6_vaddhnq,         ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 100 xxxxx") //
+DEF_ENC(V6_vaddwnq,         ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 101 xxxxx") //
+DEF_ENC(V6_vsubbq,             ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 110 xxxxx") //
+DEF_ENC(V6_vsubhq,             ICLASS_CJ" 1 110 vv0 ---01 PP 1 uuuuu 111 xxxxx") //
+
+DEF_ENC(V6_vsubwq,             ICLASS_CJ" 1 110 vv0 ---10 PP 1 uuuuu 000 xxxxx") //
+DEF_ENC(V6_vsubbnq,         ICLASS_CJ" 1 110 vv0 ---10 PP 1 uuuuu 001 xxxxx") //
+DEF_ENC(V6_vsubhnq,         ICLASS_CJ" 1 110 vv0 ---10 PP 1 uuuuu 010 xxxxx") //
+DEF_ENC(V6_vsubwnq,         ICLASS_CJ" 1 110 vv0 ---10 PP 1 uuuuu 011 xxxxx") //
+
+DEF_ENC(V6_vhistq,            ICLASS_CJ" 1 110 vv0 ---10 PP 1 --00- 100 -----")
+DEF_ENC(V6_vwhist256q,        ICLASS_CJ" 1 110 vv0 ---10 PP 1 --010 100 -----")
+DEF_ENC(V6_vwhist256q_sat,    ICLASS_CJ" 1 110 vv0 ---10 PP 1 --011 100 -----")
+DEF_ENC(V6_vwhist128q,        ICLASS_CJ" 1 110 vv0 ---10 PP 1 --10- 100 -----")
+DEF_ENC(V6_vwhist128qm,        ICLASS_CJ" 1 110 vv0 ---10 PP 1 --11i 100 -----")
+
+
+DEF_ENC(V6_vandvqv,            ICLASS_CJ" 1 110 vv0 ---11 PP 1 uuuuu 000 ddddd")
+DEF_ENC(V6_vandvnqv,        ICLASS_CJ" 1 110 vv0 ---11 PP 1 uuuuu 001 ddddd")
+
+
+DEF_ENC(V6_vprefixqb,       ICLASS_CJ" 1 110 vv0 ---11 PP 1 --000 010 ddddd") //
+DEF_ENC(V6_vprefixqh,       ICLASS_CJ" 1 110 vv0 ---11 PP 1 --001 010 ddddd") //
+DEF_ENC(V6_vprefixqw,       ICLASS_CJ" 1 110 vv0 ---11 PP 1 --010 010 ddddd") //
+
+
+
+
+DEF_ENC(V6_vassign,            ICLASS_CJ" 1 110 --0 ---11 PP 1 uuuuu 111 ddddd")
+
+DEF_ENC(V6_valignbi,         ICLASS_CJ" 1 110 001 vvvvv PP 1 uuuuu iii ddddd")
+DEF_ENC(V6_vlalignbi,         ICLASS_CJ" 1 110 011 vvvvv PP 1 uuuuu iii ddddd")
+DEF_ENC(V6_vswap,             ICLASS_CJ" 1 110 101 vvvvv PP 1 uuuuu -tt ddddd") //
+DEF_ENC(V6_vmux,             ICLASS_CJ" 1 110 111 vvvvv PP 1 uuuuu -tt ddddd") //
+
+
+
+/***************************************************************
+*
+*  Group #7, No Q6 regs
+*
+****************************************************************/
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 111 --- ----- PP 0 ----- ----- ---","[#7] Vd32=(Vu32, Vv32)")
+DEF_ENC(V6_vaddbsat,    ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vminub,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vminuh,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vminh,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vminw,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vmaxub,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vmaxuh,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vmaxh,         ICLASS_CJ" 1 111 000 vvvvv PP 0 uuuuu 111 ddddd") //
+
+
+DEF_ENC(V6_vaddclbh,    ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 000 ddddd") //
+DEF_ENC(V6_vaddclbw,    ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 001 ddddd") //
+
+DEF_ENC(V6_vavguw,        ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 010 ddddd") //
+DEF_ENC(V6_vavguwrnd,    ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 011 ddddd") //
+DEF_ENC(V6_vavgb,        ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 100 ddddd") //
+DEF_ENC(V6_vavgbrnd,    ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 101 ddddd") //
+DEF_ENC(V6_vnavgb,        ICLASS_CJ" 1 111 000 vvvvv PP 1 uuuuu 110 ddddd") //
+
+
+DEF_ENC(V6_vmaxw,         ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vdelta,         ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vsubbsat,    ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vrdelta,     ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vminb,         ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vmaxb,         ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vsatuwuh,    ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vdealb4w,     ICLASS_CJ" 1 111 001 vvvvv PP 0 uuuuu 111 ddddd") //
+
+
+DEF_ENC(V6_vmpyowh_rnd,     ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vshuffeb,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vshuffob,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vshufeh,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vshufoh,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vshufoeh,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vshufoeb,      ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vcombine,     ICLASS_CJ" 1 111 010 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vmpyieoh,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vadduwsat,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vsathub,     ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vsatwh,         ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vroundwh,    ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 100 ddddd")
+DEF_ENC(V6_vroundwuh,    ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 101 ddddd")
+DEF_ENC(V6_vroundhb,    ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 110 ddddd")
+DEF_ENC(V6_vroundhub,    ICLASS_CJ" 1 111 011 vvvvv PP 0 uuuuu 111 ddddd")
+
+DEF_FIELDROW_DESC32(    ICLASS_CJ" 1 111 100 ----- PP - ----- ----- ---","[#7] Qd4=(Vu32, Vv32)")
+DEF_ENC(V6_veqb,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 000dd") //
+DEF_ENC(V6_veqh,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 001dd") //
+DEF_ENC(V6_veqw,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 010dd") //
+
+DEF_ENC(V6_vgtb,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 100dd") //
+DEF_ENC(V6_vgth,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 101dd") //
+DEF_ENC(V6_vgtw,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 000 110dd") //
+
+DEF_ENC(V6_vgtub,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 001 000dd") //
+DEF_ENC(V6_vgtuh,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 001 001dd") //
+DEF_ENC(V6_vgtuw,         ICLASS_CJ" 1 111 100 vvvvv PP 0 uuuuu 001 010dd") //
+
+
+DEF_ENC(V6_vasrwv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vlsrwv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vlsrhv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vasrhv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vaslwv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vaslhv,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vaddb,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vaddh,         ICLASS_CJ" 1 111 101 vvvvv PP 0 uuuuu 111 ddddd") //
+
+
+DEF_ENC(V6_vmpyiewuh,     ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 000 ddddd")
+DEF_ENC(V6_vmpyiowh,    ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 001 ddddd")
+DEF_ENC(V6_vpackeb,     ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vpackeh,     ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vsubuwsat,     ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vpackhub_sat,ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 101 ddddd") //
+DEF_ENC(V6_vpackhb_sat, ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 110 ddddd") //
+DEF_ENC(V6_vpackwuh_sat,ICLASS_CJ" 1 111 110 vvvvv PP 0 uuuuu 111 ddddd") //
+
+DEF_ENC(V6_vpackwh_sat, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 000 ddddd") //
+DEF_ENC(V6_vpackob,     ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 001 ddddd") //
+DEF_ENC(V6_vpackoh,     ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 010 ddddd") //
+DEF_ENC(V6_vrounduhub,     ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 011 ddddd") //
+DEF_ENC(V6_vrounduwuh,     ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 100 ddddd") //
+DEF_ENC(V6_vmpyewuh,    ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd")
+DEF_ENC(V6_vmpyowh,        ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd")
+
+
+#endif /* NO MMVEC */
diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef
new file mode 100644
index 00000000000..8ca5a606e10
--- /dev/null
+++ b/target/hexagon/imported/mmvec/ext.idef
@@ -0,0 +1,2606 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/******************************************************************************
+ *
+ *     HOYA: MULTI MEDIA INSTRUCITONS
+ *
+ ******************************************************************************/
+
+#ifndef EXTINSN
+#define EXTINSN Q6INSN
+#define __SELF_DEF_EXTINSN 1
+#endif
+
+#ifndef NO_MMVEC
+
+#define DO_FOR_EACH_CODE(WIDTH, CODE) \
+{ \
+    fHIDE(int i;) \
+    fVFOREACH(WIDTH, i) {\
+        CODE ;\
+    } \
+}
+
+
+
+
+#define ITERATOR_INSN_ANY_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+
+
+#define ITERATOR_INSN2_ANY_SLOT(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_ANY_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA_DV),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+
+#define ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+
+#define ITERATOR_INSN_SHIFT_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+
+
+#define ITERATOR_INSN_SHIFT_SLOT_VV_LATE(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_SHIFT_SLOT(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_SHIFT_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_PERMUTE_SLOT(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_PERMUTE_SLOT(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DEP(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),
+
+
+#define ITERATOR_INSN2_PERMUTE_SLOT_DEP(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT_DEP(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC_DEP(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_MPY_SLOT(WIDTH,TAG, SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_MPY_SLOT(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_MPY_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN2_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_MPY_SLOT_LATE(WIDTH,TAG, SYNTAX2,DESCR,CODE)
+
+
+#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+
+
+
+#define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC2(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV,A_CVI_VX_VSRC0_IS_DST), DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_SLOT2_DOUBLE_VEC(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV,A_RESTRICT_SLOT2ONLY), DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_VHISTLIKE(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_4SLOT),  \
+DESCR, fHIDE(mmvector_t input;) input = fTMPVDATA(); DO_FOR_EACH_CODE(WIDTH, CODE))
+
+
+
+
+
+/******************************************************************************************
+*
+* MMVECTOR MEMORY OPERATIONS - NO NAPALI V1
+*
+*******************************************************************************************/
+
+
+
+#define ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX_DV),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+
+
+#define ITERATOR_INSN_SHIFT_SLOT_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_SHIFT_SLOT_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_SHIFT_SLOT_NOV1(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+
+#define ITERATOR_INSN_ANY_SLOT_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_ANY_SLOT_NOV1(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+
+#define ITERATOR_INSN_MPY_SLOT_NOV1(WIDTH,TAG, SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, \
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_PERMUTE_SLOT_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_PERMUTE_SLOTT_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DEPT_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),
+
+
+#define ITERATOR_INSN2_PERMUTE_SLOT_DEPT_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT_DEP_NOV1(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC_DEPT_NOV1(WIDTH,TAG,SYNTAX,DESCR,CODE) \
+EXTINSN(V6_##TAG, SYNTAX, ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),  \
+DESCR, DO_FOR_EACH_CODE(WIDTH, CODE))
+
+#define ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX,SYNTAX2,DESCR,CODE) \
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC_NOV1(WIDTH,TAG,SYNTAX2,DESCR,CODE)
+
+#define NARROWING_SHIFT_NOV1(ITERSIZE,TAG,DSTM,DSTTYPE,SRCTYPE,SYNOPTS,SATFUNC,RNDFUNC,SHAMTMASK) \
+ITERATOR_INSN_SHIFT_SLOT_NOV1(ITERSIZE,TAG, \
+"Vd32." #DSTTYPE "=vasr(Vu32." #SRCTYPE ",Vv32." #SRCTYPE ",Rt8)" #SYNOPTS, \
+"Vector shift right and shuffle", \
+    fHIDE(int )shamt = RtV & SHAMTMASK; \
+    DSTM(0,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VvV.SRCTYPE[i],shamt) >> shamt)); \
+    DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuV.SRCTYPE[i],shamt) >> shamt)))
+
+#define MMVEC_AVGS_NOV1(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,vavg##TYPE,                        "Vd32=vavg"TYPE2"(Vu32,Vv32)",          "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC")",          "Vector Average "DESCR,                                      VdV.DEST[i]  = fVAVGS(       WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,vavg##TYPE##rnd,                   "Vd32=vavg"TYPE2"(Vu32,Vv32):rnd",      "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC"):rnd",      "Vector Average % Round"DESCR,                               VdV.DEST[i]  = fVAVGSRND(    WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,vnavg##TYPE,                       "Vd32=vnavg"TYPE2"(Vu32,Vv32)",         "Vd32."#DEST"=vnavg(Vu32."#SRC",Vv32."#SRC")",         "Vector Negative Average "DESCR,                             VdV.DEST[i]  = fVNAVGS(      WIDTH,  VuV.SRC[i], VvV.SRC[i]))
+
+  #define MMVEC_AVGU_NOV1(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,vavg##TYPE,                        "Vd32=vavg"TYPE2"(Vu32,Vv32)",         "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC")",        "Vector Average "DESCR,                                      VdV.DEST[i] = fVAVGU(   WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT_NOV1(WIDTH,vavg##TYPE##rnd,                   "Vd32=vavg"TYPE2"(Vu32,Vv32):rnd",     "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC"):rnd",    "Vector Average % Round"DESCR,                               VdV.DEST[i] = fVAVGURND(WIDTH,  VuV.SRC[i], VvV.SRC[i]))
+
+
+
+/******************************************************************************************
+*
+* MMVECTOR MEMORY OPERATIONS
+*
+*******************************************************************************************/
+
+#define MMVEC_EACH_EA(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,BEH) \
+EXTINSN(V6_##TAG##_pi,      SYNTAXA "(Rx32++#s3)" NT SYNTAXB,ATTRIB,DESCR,{ fEA_REG(RxV); BEH; fPM_I(RxV,VEC_SCALE(siV)); }) \
+EXTINSN(V6_##TAG##_ai,      SYNTAXA "(Rt32+#s4)" NT SYNTAXB,ATTRIB,DESCR,{ fEA_RI(RtV,VEC_SCALE(siV)); BEH;}) \
+EXTINSN(V6_##TAG##_ppu,      SYNTAXA "(Rx32++Mu2)" NT SYNTAXB,ATTRIB,DESCR,{ fEA_REG(RxV); BEH; fPM_M(RxV,MuV); }) \
+
+
+#define MMVEC_COND_EACH_EA_TRUE(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,SYNTAXP,BEH) \
+EXTINSN(V6_##TAG##_pred_pi,      "if (" #SYNTAXP "4) " SYNTAXA "(Rx32++#s3)" NT SYNTAXB, ATTRIB,DESCR, { if (fLSBOLD(SYNTAXP##V)) { fEA_REG(RxV); BEH; fPM_I(RxV,siV*fVECSIZE()); } else {CANCEL;}}) \
+EXTINSN(V6_##TAG##_pred_ai,      "if (" #SYNTAXP "4) " SYNTAXA "(Rt32+#s4)" NT SYNTAXB, ATTRIB,DESCR,  { if (fLSBOLD(SYNTAXP##V)) { fEA_RI(RtV,siV*fVECSIZE()); BEH;} else {CANCEL;}}) \
+EXTINSN(V6_##TAG##_pred_ppu,     "if (" #SYNTAXP "4) " SYNTAXA "(Rx32++Mu2)" NT SYNTAXB,ATTRIB,DESCR,  { if (fLSBOLD(SYNTAXP##V)) { fEA_REG(RxV); BEH; fPM_M(RxV,MuV); } else {CANCEL;}}) \
+
+#define MMVEC_COND_EACH_EA_FALSE(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,SYNTAXP,BEH) \
+EXTINSN(V6_##TAG##_npred_pi,     "if (!" #SYNTAXP "4) " SYNTAXA "(Rx32++#s3)" NT SYNTAXB,ATTRIB,DESCR,{ if (fLSBOLDNOT(SYNTAXP##V)) { fEA_REG(RxV); BEH; fPM_I(RxV,siV*fVECSIZE()); } else {CANCEL;}}) \
+EXTINSN(V6_##TAG##_npred_ai,     "if (!" #SYNTAXP "4) " SYNTAXA "(Rt32+#s4)" NT SYNTAXB,ATTRIB,DESCR, { if (fLSBOLDNOT(SYNTAXP##V)) { fEA_RI(RtV,siV*fVECSIZE()); BEH;} else {CANCEL;}}) \
+EXTINSN(V6_##TAG##_npred_ppu,    "if (!" #SYNTAXP "4) " SYNTAXA "(Rx32++Mu2)" NT SYNTAXB,ATTRIB,DESCR,{ if (fLSBOLDNOT(SYNTAXP##V)) { fEA_REG(RxV); BEH; fPM_M(RxV,MuV); } else {CANCEL;}})
+
+#define MMVEC_COND_EACH_EA(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,SYNTAXP,BEH) \
+MMVEC_COND_EACH_EA_TRUE(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,SYNTAXP,BEH) \
+MMVEC_COND_EACH_EA_FALSE(TAG,DESCR,ATTRIB,NT,SYNTAXA,SYNTAXB,SYNTAXP,BEH)
+
+
+#define VEC_SCALE(X) X*fVECSIZE()
+
+
+#define MMVEC_LD(TAG,DESCR,ATTRIB,NT) MMVEC_EACH_EA(TAG,DESCR,ATTRIB,NT,"Vd32=vmem","",fLOADMMV(EA,VdV))
+#define MMVEC_LDC(TAG,DESCR,ATTRIB,NT) MMVEC_EACH_EA(TAG##_cur,DESCR,ATTRIB,NT,"Vd32.cur=vmem","",fLOADMMV(EA,VdV))
+#define MMVEC_LDT(TAG,DESCR,ATTRIB,NT) MMVEC_EACH_EA(TAG##_tmp,DESCR,ATTRIB,NT,"Vd32.tmp=vmem","",fLOADMMV(EA,VdV))
+#define MMVEC_LDU(TAG,DESCR,ATTRIB,NT) MMVEC_EACH_EA(TAG,DESCR,ATTRIB,NT,"Vd32=vmemu","",fLOADMMVU(EA,VdV))
+
+
+#define MMVEC_STQ(TAG,DESCR,ATTRIB,NT) \
+MMVEC_EACH_EA(TAG##_qpred,DESCR,ATTRIB,NT,"if (Qv4) vmem","=Vs32",fSTOREMMVQ(EA,VsV,QvV)) \
+MMVEC_EACH_EA(TAG##_nqpred,DESCR,ATTRIB,NT,"if (!Qv4) vmem","=Vs32",fSTOREMMVNQ(EA,VsV,QvV))
+
+/****************************************************************
+* MAPPING FOR VMEMs
+****************************************************************/
+
+#define ATTR_VMEM A_EXTENSION,A_CVI,A_CVI_VM
+#define ATTR_VMEMU A_EXTENSION,A_CVI,A_CVI_VM,A_CVI_VP
+
+
+MMVEC_LD(vL32b,  "Aligned Vector Load",        ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_VA),)
+MMVEC_LDC(vL32b,  "Aligned Vector Load Cur",	ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_NEW,A_CVI_VA),)
+MMVEC_LDT(vL32b,  "Aligned Vector Load Tmp",	ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_TMP),)
+
+MMVEC_COND_EACH_EA(vL32b,"Conditional Aligned Vector Load",ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_VA),,"Vd32=vmem",,Pv,fLOADMMV(EA,VdV);)
+MMVEC_COND_EACH_EA(vL32b_cur,"Conditional Aligned Vector Load Cur",ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_VA,A_CVI_NEW),,"Vd32.cur=vmem",,Pv,fLOADMMV(EA,VdV);)
+MMVEC_COND_EACH_EA(vL32b_tmp,"Conditional Aligned Vector Load Tmp",ATTRIBS(ATTR_VMEM,A_LOAD,A_CVI_TMP),,"Vd32.tmp=vmem",,Pv,fLOADMMV(EA,VdV);)
+
+MMVEC_EACH_EA(vS32b,"Aligned Vector Store",ATTRIBS(ATTR_VMEM,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),,"vmem","=Vs32",fSTOREMMV(EA,VsV))
+MMVEC_COND_EACH_EA(vS32b,"Aligned Vector Store",ATTRIBS(ATTR_VMEM,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),,"vmem","=Vs32",Pv,fSTOREMMV(EA,VsV))
+
+
+MMVEC_STQ(vS32b,  "Aligned Vector Store",      ATTRIBS(ATTR_VMEM,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),)
+
+MMVEC_LDU(vL32Ub, "Unaligned Vector Load",     ATTRIBS(ATTR_VMEMU,A_LOAD,A_RESTRICT_NOSLOT1),)
+
+MMVEC_EACH_EA(vS32Ub,"Unaligned Vector Store",ATTRIBS(ATTR_VMEMU,A_STORE,A_RESTRICT_NOSLOT1),,"vmemu","=Vs32",fSTOREMMVU(EA,VsV))
+
+MMVEC_COND_EACH_EA(vS32Ub,"Unaligned Vector Store",ATTRIBS(ATTR_VMEMU,A_STORE,A_RESTRICT_NOSLOT1),,"vmemu","=Vs32",Pv,fSTOREMMVU(EA,VsV))
+
+MMVEC_EACH_EA(vS32b_new,"Aligned Vector Store New",ATTRIBS(ATTR_VMEM,A_STORE,A_CVI_NEW,A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY),,"vmem","=Os8.new",fSTOREMMV(EA,fNEWVREG(OsN)))
+
+// V65 store relase, zero byte store
+MMVEC_EACH_EA(vS32b_srls,"Aligned Vector Scatter Release",ATTRIBS(ATTR_VMEM,A_STORE,A_CVI_SCATTER_RELEASE,A_CVI_NEW,A_RESTRICT_SLOT0ONLY),,"vmem",":scatter_release",fSTORERELEASE(EA,0))
+
+
+
+MMVEC_COND_EACH_EA(vS32b_new,"Aligned Vector Store New",ATTRIBS(ATTR_VMEM,A_STORE,A_CVI_NEW,A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY),,"vmem","=Os8.new",Pv,fSTOREMMV(EA,fNEWVREG(OsN)))
+
+
+/******************************************************************************************
+*
+* MMVECTOR MEMORY OPERATIONS - NON TEMPORAL
+*
+*******************************************************************************************/
+
+#define ATTR_VMEM_NT A_EXTENSION,A_CVI,A_CVI_VM
+
+MMVEC_EACH_EA(vS32b_nt,"Aligned Vector Store - Non temporal",ATTRIBS(ATTR_VMEM_NT,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),":nt","vmem","=Vs32",fSTOREMMV(EA,VsV))
+MMVEC_COND_EACH_EA(vS32b_nt,"Aligned Vector Store - Non temporal",ATTRIBS(ATTR_VMEM_NT,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),":nt","vmem","=Vs32",Pv,fSTOREMMV(EA,VsV))
+
+MMVEC_EACH_EA(vS32b_nt_new,"Aligned Vector Store New - Non temporal",ATTRIBS(ATTR_VMEM_NT,A_STORE,A_CVI_NEW,A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY),":nt","vmem","=Os8.new",fSTOREMMV(EA,fNEWVREG(OsN)))
+MMVEC_COND_EACH_EA(vS32b_nt_new,"Aligned Vector Store New - Non temporal",ATTRIBS(ATTR_VMEM_NT,A_STORE,A_CVI_NEW,A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY),":nt","vmem","=Os8.new",Pv,fSTOREMMV(EA,fNEWVREG(OsN)))
+
+
+MMVEC_STQ(vS32b_nt,  "Aligned Vector Store - Non temporal",      ATTRIBS(ATTR_VMEM_NT,A_STORE,A_RESTRICT_SLOT0ONLY,A_CVI_VA),":nt")
+
+MMVEC_LD(vL32b_nt,  "Aligned Vector Load - Non temporal",       ATTRIBS(ATTR_VMEM_NT,A_LOAD,A_CVI_VA),":nt")
+MMVEC_LDC(vL32b_nt,  "Aligned Vector Load Cur - Non temporal",	ATTRIBS(ATTR_VMEM_NT,A_LOAD,A_CVI_NEW,A_CVI_VA),":nt")
+MMVEC_LDT(vL32b_nt,  "Aligned Vector Load Tmp - Non temporal",	ATTRIBS(ATTR_VMEM_NT,A_LOAD,A_CVI_TMP),":nt")
+
+MMVEC_COND_EACH_EA(vL32b_nt,"Conditional Aligned Vector Load",ATTRIBS(ATTR_VMEM_NT,A_CVI_VA),,"Vd32=vmem",":nt",Pv,fLOADMMV(EA,VdV);)
+MMVEC_COND_EACH_EA(vL32b_nt_cur,"Conditional Aligned Vector Load Cur",ATTRIBS(ATTR_VMEM_NT,A_CVI_VA,A_CVI_NEW),,"Vd32.cur=vmem",":nt",Pv,fLOADMMV(EA,VdV);)
+MMVEC_COND_EACH_EA(vL32b_nt_tmp,"Conditional Aligned Vector Load Tmp",ATTRIBS(ATTR_VMEM_NT,A_CVI_TMP),,"Vd32.tmp=vmem",":nt",Pv,fLOADMMV(EA,VdV);)
+
+
+#undef VEC_SCALE
+
+
+/***************************************************
+ * Vector Alignment
+ ************************************************/
+
+#define VALIGNB(SHIFT)  \
+    fHIDE(int i;) \
+    for(i = 0; i < fVBYTES(); i++) {\
+        VdV.ub[i] = (i+SHIFT>=fVBYTES()) ? VuV.ub[i+SHIFT-fVBYTES()] : VvV.ub[i+SHIFT];\
+	}
+
+EXTINSN(V6_valignb,  "Vd32=valign(Vu32,Vv32,Rt8)",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),"Align Two vectors by Rt8 as control",
+{
+	unsigned shift = RtV & (fVBYTES()-1);
+	VALIGNB(shift)
+})
+EXTINSN(V6_vlalignb, "Vd32=vlalign(Vu32,Vv32,Rt8)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),"Align Two vectors by Rt8 as control",
+{
+	unsigned shift = fVBYTES() - (RtV & (fVBYTES()-1));
+	VALIGNB(shift)
+})
+EXTINSN(V6_valignbi, "Vd32=valign(Vu32,Vv32,#u3)", 	ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),"Align Two vectors by #u3 as control",
+{
+	VALIGNB(uiV)
+})
+EXTINSN(V6_vlalignbi,"Vd32=vlalign(Vu32,Vv32,#u3)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),"Align Two vectors by #u3 as control",
+{
+	unsigned shift = fVBYTES() - uiV;
+	VALIGNB(shift)
+})
+
+EXTINSN(V6_vror, "Vd32=vror(Vu32,Rt32)", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),
+"Align Two vectors by Rt32 as control",
+{
+	fHIDE(int k;)
+	for (k=0;k<fVBYTES();k++) {
+		VdV.ub[k] = VuV.ub[(k+RtV)&(fVBYTES()-1)];
+	}
+	})
+
+
+
+
+
+
+
+/**************************************************************
+* Unpack elements with zero/sign extend and cross lane permute
+***************************************************************/
+
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(8,vunpackub,  "Vdd32=vunpackub(Vu32)", "Vdd32.uh=vunpack(Vu32.ub)", "Unpack byte with zero-extend",     fVARRAY_ELEMENT_ACCESS(VddV, uh, i)  = fZE8_16( VuV.ub[i]))
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(8,vunpackb,   "Vdd32=vunpackb(Vu32)",  "Vdd32.h=vunpack(Vu32.b)",   "Unpack bytes with sign-extend",    fVARRAY_ELEMENT_ACCESS(VddV, h,  i)  = fSE8_16( VuV.b[i] ))
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(16,vunpackuh, "Vdd32=vunpackuh(Vu32)", "Vdd32.uw=vunpack(Vu32.uh)", "Unpack halves with zero-extend",   fVARRAY_ELEMENT_ACCESS(VddV, uw, i)  = fZE16_32(VuV.uh[i]))
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(16,vunpackh,  "Vdd32=vunpackh(Vu32)",  "Vdd32.w=vunpack(Vu32.h)",   "Unpack halves with sign-extend",   fVARRAY_ELEMENT_ACCESS(VddV, w,  i)  = fSE16_32(VuV.h[i] ))
+
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(8, vunpackob, "Vxx32|=vunpackob(Vu32)", "Vxx32.h|=vunpacko(Vu32.b)", "Unpack byte to odd bytes ",       fVARRAY_ELEMENT_ACCESS(VxxV, uh, i) |= fZE8_16( VuV.ub[i])<<8)
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(16,vunpackoh, "Vxx32|=vunpackoh(Vu32)", "Vxx32.w|=vunpacko(Vu32.h)", "Unpack halves to odd halves",     fVARRAY_ELEMENT_ACCESS(VxxV, uw, i) |= fZE16_32(VuV.uh[i])<<16)
+
+
+/**************************************************************
+* Pack elements and cross lane permute
+***************************************************************/
+
+ ITERATOR_INSN2_PERMUTE_SLOT(16, vpackeb,  "Vd32=vpackeb(Vu32,Vv32)", "Vd32.b=vpacke(Vu32.h,Vv32.h)",
+ "Pack  bytes",
+    VdV.ub[i]               = fGETUBYTE(0, VvV.uh[i]);
+    VdV.ub[i+fVELEM(16)]    = fGETUBYTE(0, VuV.uh[i]))
+
+ ITERATOR_INSN2_PERMUTE_SLOT(32, vpackeh,  "Vd32=vpackeh(Vu32,Vv32)", "Vd32.h=vpacke(Vu32.w,Vv32.w)",
+ "Pack  halfwords",
+    VdV.uh[i]               = fGETUHALF(0, VvV.uw[i]);
+    VdV.uh[i+fVELEM(32)]    = fGETUHALF(0, VuV.uw[i]))
+
+  ITERATOR_INSN2_PERMUTE_SLOT(16, vpackob,  "Vd32=vpackob(Vu32,Vv32)", "Vd32.b=vpacko(Vu32.h,Vv32.h)",
+ "Pack  bytes",
+    VdV.ub[i]               = fGETUBYTE(1, VvV.uh[i]);
+    VdV.ub[i+fVELEM(16)]    = fGETUBYTE(1, VuV.uh[i]))
+
+ ITERATOR_INSN2_PERMUTE_SLOT(32, vpackoh,  "Vd32=vpackoh(Vu32,Vv32)", "Vd32.h=vpacko(Vu32.w,Vv32.w)",
+ "Pack  halfwords",
+    VdV.uh[i]               = fGETUHALF(1, VvV.uw[i]);
+    VdV.uh[i+fVELEM(32)]    = fGETUHALF(1, VuV.uw[i]))
+
+
+
+ITERATOR_INSN2_PERMUTE_SLOT(16, vpackhub_sat,  "Vd32=vpackhub(Vu32,Vv32):sat", "Vd32.ub=vpack(Vu32.h,Vv32.h):sat",
+ "Pack ubytes with saturation",
+    VdV.ub[i]               = fVSATUB(VvV.h[i]);
+    VdV.ub[i+fVELEM(16)]    = fVSATUB(VuV.h[i]))
+
+
+ITERATOR_INSN2_PERMUTE_SLOT(16, vpackhb_sat,  "Vd32=vpackhb(Vu32,Vv32):sat", "Vd32.b=vpack(Vu32.h,Vv32.h):sat",
+ "Pack bytes with saturation",
+    VdV.b[i]               = fVSATB(VvV.h[i]);
+    VdV.b[i+fVELEM(16)]    = fVSATB(VuV.h[i]))
+
+
+ITERATOR_INSN2_PERMUTE_SLOT(32, vpackwuh_sat,  "Vd32=vpackwuh(Vu32,Vv32):sat", "Vd32.uh=vpack(Vu32.w,Vv32.w):sat",
+ "Pack ubytes with saturation",
+    VdV.uh[i]               = fVSATUH(VvV.w[i]);
+    VdV.uh[i+fVELEM(32)]    = fVSATUH(VuV.w[i]))
+
+ITERATOR_INSN2_PERMUTE_SLOT(32, vpackwh_sat,  "Vd32=vpackwh(Vu32,Vv32):sat", "Vd32.h=vpack(Vu32.w,Vv32.w):sat",
+ "Pack bytes with saturation",
+    VdV.h[i]               = fVSATH(VvV.w[i]);
+    VdV.h[i+fVELEM(32)]    = fVSATH(VuV.w[i]))
+
+
+
+
+
+/**************************************************************
+* Zero/Sign Extend with in-lane permute
+***************************************************************/
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(16,vzb,"Vdd32=vzxtb(Vu32)","Vdd32.uh=vzxt(Vu32.ub)",
+"Vector Zero Extend Bytes",
+    VddV.v[0].uh[i] = fZE8_16(fGETUBYTE(0, VuV.uh[i]));
+    VddV.v[1].uh[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])))
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(16,vsb,"Vdd32=vsxtb(Vu32)","Vdd32.h=vsxt(Vu32.b)",
+"Vector Sign Extend Bytes",
+    VddV.v[0].h[i] = fSE8_16(fGETBYTE(0, VuV.h[i]));
+    VddV.v[1].h[i] = fSE8_16(fGETBYTE(1, VuV.h[i])))
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(32,vzh,"Vdd32=vzxth(Vu32)","Vdd32.uw=vzxt(Vu32.uh)",
+"Vector Zero Extend halfwords",
+    VddV.v[0].uw[i] = fZE16_32(fGETUHALF(0, VuV.uw[i]));
+    VddV.v[1].uw[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])))
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(32,vsh,"Vdd32=vsxth(Vu32)","Vdd32.w=vsxt(Vu32.h)",
+"Vector Sign Extend halfwords",
+    VddV.v[0].w[i] = fSE16_32(fGETHALF(0, VuV.w[i]));
+    VddV.v[1].w[i] = fSE16_32(fGETHALF(1, VuV.w[i])))
+
+
+/**********************************************************************
+*
+*
+*
+*               MMVECTOR REDUCTION
+*
+*
+*
+**********************************************************************/
+
+/********************************************
+*  2-WAY REDUCTION - UNSIGNED BYTE BY BYTE
+********************************************/
+
+
+ITERATOR_INSN2_MPY_SLOT(16,vdmpybus,"Vd32=vdmpybus(Vu32,Rt32)","Vd32.h=vdmpy(Vu32.ub,Rt32.b)",
+"Vector Dual Multiply-Accumulates unsigned bytes by bytes",
+    VdV.h[i]   = fMPY8US( fGETUBYTE(0, VuV.uh[i]), fGETBYTE((2*i) % 4, RtV));
+    VdV.h[i]  += fMPY8US( fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT(16,vdmpybus_acc,"Vx32+=vdmpybus(Vu32,Rt32)","Vx32.h+=vdmpy(Vu32.ub,Rt32.b)",
+"Vector Dual Multiply-Accumulates unsigned bytes by  bytes, and accumulate",
+    VxV.h[i] += fMPY8US( fGETUBYTE(0, VuV.uh[i]), fGETBYTE((2*i) % 4, RtV));
+    VxV.h[i] += fMPY8US( fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vdmpybus_dv,"Vdd32=vdmpybus(Vuu32,Rt32)","Vdd32.h=vdmpy(Vuu32.ub,Rt32.b)",
+"Vector Dual Multiply-Accumulates unsigned bytes by  bytes, and accumulate Sliding Window Reduction",
+    VddV.v[0].h[i]  = fMPY8US(fGETUBYTE(0, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
+    VddV.v[0].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i+1)%4, RtV));
+
+    VddV.v[1].h[i]  = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
+    VddV.v[1].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vdmpybus_dv_acc,"Vxx32+=vdmpybus(Vuu32,Rt32)","Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b)",
+"Vector Dual Multiply-Accumulates unsigned bytes by  bytes, and accumulate Sliding Window Reduction",
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i+1)%4, RtV));
+
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]),fGETBYTE((2*i) % 4, RtV));
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+
+
+/********************************************
+*  2-WAY REDUCTION - HALF BY BYTE
+********************************************/
+ITERATOR_INSN2_MPY_SLOT(32,vdmpyhb,"Vd32=vdmpyhb(Vu32,Rt32)","Vd32.w=vdmpy(Vu32.h,Rt32.b)",
+"Dual-Vector 2-Element Half x Byte Reduction with Sliding Window Overlap",
+    VdV.w[i]  = fMPY16SS(fGETHALF(0, VuV.w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VdV.w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT(32,vdmpyhb_acc,"Vx32+=vdmpyhb(Vu32,Rt32)","Vx32.w+=vdmpy(Vu32.h,Rt32.b)",
+"Dual-Vector 2-Element Half x Byte Reduction with Sliding Window Overlap",
+    VxV.w[i] += fMPY16SS(fGETHALF(0, VuV.w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VxV.w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhb_dv,"Vdd32=vdmpyhb(Vuu32,Rt32)","Vdd32.w=vdmpy(Vuu32.h,Rt32.b)",
+"Dual-Vector 2-Element Half x Byte Reduction with Sliding Window Overlap",
+    VddV.v[0].w[i]  = fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VddV.v[0].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+1)%4, RtV));
+
+    VddV.v[1].w[i]  = fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VddV.v[1].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhb_dv_acc,"Vxx32+=vdmpyhb(Vuu32,Rt32)","Vxx32.w+=vdmpy(Vuu32.h,Rt32.b)",
+"Dual-Vector 2-Element Half x Byte Reduction with Sliding Window Overlap",
+    VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VxxV.v[0].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+1)%4, RtV));
+
+    VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]),fGETBYTE((2*i+0)%4, RtV));
+    VxxV.v[1].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]),fGETBYTE((2*i+1)%4, RtV)))
+
+
+
+
+
+/********************************************
+*  2-WAY REDUCTION - HALF BY HALF
+********************************************/
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhvsat,"Vd32=vdmpyh(Vu32,Vv32):sat","Vd32.w=vdmpy(Vu32.h,Vv32.h):sat",
+"Vector halfword multiply, accumulate pairs, sat to word",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0, VvV.w[i]));
+    accum   += fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1, VvV.w[i]));
+    VdV.w[i] = fVSATW(accum))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhvsat_acc,"Vx32+=vdmpyh(Vu32,Vv32):sat","Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat",
+"Vector halfword multiply, accumulate pairs, sat to word",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0, VvV.w[i]));
+    accum   += fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1, VvV.w[i]));
+    VxV.w[i] = fVSATW(VxV.w[i]+accum))
+
+
+/* VDMPYH */
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsat,"Vd32=vdmpyh(Vu32,Rt32):sat","Vd32.w=vdmpy(Vu32.h,Rt32.h):sat",
+"Vector halfword multiply, accumulate pairs, saturate to word",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SS(fGETHALF(0, VuV.w[i]),fGETHALF(0, RtV));
+    accum   += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETHALF(1, RtV));
+    VdV.w[i] = fVSATW(accum))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsat_acc,"Vx32+=vdmpyh(Vu32,Rt32):sat","Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat",
+"Vector halfword multiply, accumulate pairs, saturate to word",
+    fHIDE(size8s_t) accum = VxV.w[i];
+    accum   += fMPY16SS(fGETHALF(0, VuV.w[i]),fGETHALF(0, RtV));
+    accum   += fMPY16SS(fGETHALF(1, VuV.w[i]),fGETHALF(1, RtV));
+    VxV.w[i] = fVSATW(accum))
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhisat,"Vd32=vdmpyh(Vuu32,Rt32):sat","Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat",
+"Dual Vector Signed Halfword by Signed Halfword 2-Way Reduction to Halfword with saturation",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]),fGETHALF(0,RtV));
+    accum   += fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]),fGETHALF(1,RtV));
+    VdV.w[i] = fVSATW(accum))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhisat_acc,"Vx32+=vdmpyh(Vuu32,Rt32):sat","Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat",
+"Dual Vector Signed Halfword by Signed Halfword 2-Way Reduction to Halfword with accumulation and saturation",
+    fHIDE(size8s_t) accum = VxV.w[i];
+    accum   += fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]),fGETHALF(0,RtV));
+    accum   += fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]),fGETHALF(1,RtV));
+    VxV.w[i] = fVSATW(accum))
+
+
+
+
+
+
+
+/* VDMPYHSU */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsusat,"Vd32=vdmpyhsu(Vu32,Rt32):sat","Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat",
+"Vector halfword multiply, accumulate pairs, saturate to word",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SU(fGETHALF(0, VuV.w[i]),fGETUHALF(0, RtV));
+    accum   += fMPY16SU(fGETHALF(1, VuV.w[i]),fGETUHALF(1, RtV));
+    VdV.w[i] = fVSATW(accum))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsusat_acc,"Vx32+=vdmpyhsu(Vu32,Rt32):sat","Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat",
+"Vector halfword multiply, accumulate pairs, saturate to word",
+    fHIDE(size8s_t) accum=VxV.w[i];
+    accum   += fMPY16SU(fGETHALF(0, VuV.w[i]),fGETUHALF(0, RtV));
+    accum   += fMPY16SU(fGETHALF(1, VuV.w[i]),fGETUHALF(1, RtV));
+    VxV.w[i] = fVSATW(accum))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsuisat,"Vd32=vdmpyhsu(Vuu32,Rt32,#1):sat","Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat",
+"Dual Vector Signed Halfword by Signed Halfword 2-Way Reduction to Halfword with saturation",
+    fHIDE(size8s_t accum;)
+    accum    = fMPY16SU(fGETHALF(1,VuuV.v[0].w[i]),fGETUHALF(0,RtV));
+    accum   += fMPY16SU(fGETHALF(0,VuuV.v[1].w[i]),fGETUHALF(1,RtV));
+    VdV.w[i] = fVSATW(accum))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdmpyhsuisat_acc,"Vx32+=vdmpyhsu(Vuu32,Rt32,#1):sat","Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat",
+"Dual Vector Signed Halfword by Signed Halfword 2-Way Reduction to Halfword with accumulation and saturation",
+    fHIDE(size8s_t) accum=VxV.w[i];
+    accum   += fMPY16SU(fGETHALF(1, VuuV.v[0].w[i]),fGETUHALF(0,RtV));
+    accum   += fMPY16SU(fGETHALF(0, VuuV.v[1].w[i]),fGETUHALF(1,RtV));
+    VxV.w[i] = fVSATW(accum))
+
+
+
+/********************************************
+*  3-WAY REDUCTION - UNSIGNED BYTE BY  BYTE
+********************************************/
+
+ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vtmpyb, "Vdd32=vtmpyb(Vuu32,Rt32)", "Vdd32.h=vtmpy(Vuu32.b,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VddV.v[0].h[i]  = fMPY8SS(fGETBYTE(0,VuuV.v[0].h[i]), fGETBYTE((2*i  )%4, RtV));
+    VddV.v[0].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i+1)%4, RtV));
+    VddV.v[0].h[i] += fGETBYTE(0,VuuV.v[1].h[i]);
+
+    VddV.v[1].h[i]  = fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i  )%4, RtV));
+    VddV.v[1].h[i] += fMPY8SS(fGETBYTE(0,VuuV.v[1].h[i]), fGETBYTE((2*i+1)%4, RtV));
+    VddV.v[1].h[i] += fGETBYTE(1,VuuV.v[1].h[i]))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vtmpyb_acc, "Vxx32+=vtmpyb(Vuu32,Rt32)", "Vxx32.h+=vtmpy(Vuu32.b,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VxxV.v[0].h[i] += fMPY8SS(fGETBYTE(0,VuuV.v[0].h[i]), fGETBYTE((2*i  )%4, RtV));
+    VxxV.v[0].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i+1)%4, RtV));
+    VxxV.v[0].h[i] += fGETBYTE(0,VuuV.v[1].h[i]);
+
+    VxxV.v[1].h[i] += fMPY8SS(fGETBYTE(1,VuuV.v[0].h[i]), fGETBYTE((2*i  )%4, RtV));
+    VxxV.v[1].h[i] += fMPY8SS(fGETBYTE(0,VuuV.v[1].h[i]), fGETBYTE((2*i+1)%4, RtV));
+    VxxV.v[1].h[i] += fGETBYTE(1,VuuV.v[1].h[i]))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vtmpybus, "Vdd32=vtmpybus(Vuu32,Rt32)", "Vdd32.h=vtmpy(Vuu32.ub,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VddV.v[0].h[i]  = fMPY8US(fGETUBYTE(0,VuuV.v[0].uh[i]), fGETBYTE((2*i  )%4, RtV));
+    VddV.v[0].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i+1)%4, RtV));
+    VddV.v[0].h[i] += fGETUBYTE(0,VuuV.v[1].uh[i]);
+
+    VddV.v[1].h[i]  = fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i  )%4, RtV));
+    VddV.v[1].h[i] += fMPY8US(fGETUBYTE(0,VuuV.v[1].uh[i]), fGETBYTE((2*i+1)%4, RtV));
+    VddV.v[1].h[i] += fGETUBYTE(1,VuuV.v[1].uh[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vtmpybus_acc, "Vxx32+=vtmpybus(Vuu32,Rt32)", "Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(0,VuuV.v[0].uh[i]), fGETBYTE((2*i  )%4, RtV));
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i+1)%4, RtV));
+    VxxV.v[0].h[i] += fGETUBYTE(0,VuuV.v[1].uh[i]);
+
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1,VuuV.v[0].uh[i]), fGETBYTE((2*i  )%4, RtV));
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(0,VuuV.v[1].uh[i]), fGETBYTE((2*i+1)%4, RtV));
+    VxxV.v[1].h[i] += fGETUBYTE(1,VuuV.v[1].uh[i]))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vtmpyhb, "Vdd32=vtmpyhb(Vuu32,Rt32)", "Vdd32.w=vtmpy(Vuu32.h,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VddV.v[0].w[i] = fMPY16SS(fGETHALF(0,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
+    VddV.v[0].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
+    VddV.v[0].w[i]+= fGETHALF(0,VuuV.v[1].w[i]);
+
+    VddV.v[1].w[i] = fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
+    VddV.v[1].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
+    VddV.v[1].w[i]+= fGETHALF(1,VuuV.v[1].w[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vtmpyhb_acc, "Vxx32+=vtmpyhb(Vuu32,Rt32)", "Vxx32.w+=vtmpy(Vuu32.h,Rt32.b)",
+"Dual Vector 3x1 Reduction",
+    VxxV.v[0].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
+    VxxV.v[0].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
+    VxxV.v[0].w[i]+= fGETHALF(0,VuuV.v[1].w[i]);
+
+    VxxV.v[1].w[i]+= fMPY16SS(fGETHALF(1,VuuV.v[0].w[i]), fSE8_16(fGETBYTE((2*i+0)%4, RtV)));
+    VxxV.v[1].w[i]+= fMPY16SS(fGETHALF(0,VuuV.v[1].w[i]), fSE8_16(fGETBYTE((2*i+1)%4, RtV)));
+    VxxV.v[1].w[i]+= fGETHALF(1,VuuV.v[1].w[i]))
+
+
+/********************************************
+*  4-WAY REDUCTION - UNSIGNED BYTE BY UNSIGNED BYTE
+********************************************/
+
+
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpyub,"Vd32=vrmpyub(Vu32,Rt32)","Vd32.uw=vrmpy(Vu32.ub,Rt32.ub)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VdV.uw[i]  = fMPY8UU(fGETUBYTE(0,VuV.uw[i]), fGETUBYTE(0,RtV));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,RtV));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(2,VuV.uw[i]), fGETUBYTE(2,RtV));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(3,VuV.uw[i]), fGETUBYTE(3,RtV)))
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpyub_acc,"Vx32+=vrmpyub(Vu32,Rt32)","Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients Accumulate",
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(0,VuV.uw[i]), fGETUBYTE(0,RtV));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,RtV));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(2,VuV.uw[i]), fGETUBYTE(2,RtV));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(3,VuV.uw[i]), fGETUBYTE(3,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpyubv,"Vd32=vrmpyub(Vu32,Vv32)","Vd32.uw=vrmpy(Vu32.ub,Vv32.ub)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VdV.uw[i]  = fMPY8UU(fGETUBYTE(0,VuV.uw[i]), fGETUBYTE(0,VvV.uw[i]));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,VvV.uw[i]));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(2,VuV.uw[i]), fGETUBYTE(2,VvV.uw[i]));
+    VdV.uw[i] += fMPY8UU(fGETUBYTE(3,VuV.uw[i]), fGETUBYTE(3,VvV.uw[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpyubv_acc,"Vx32+=vrmpyub(Vu32,Vv32)","Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients Accumulate",
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(0,VuV.uw[i]), fGETUBYTE(0,VvV.uw[i]));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(1,VuV.uw[i]), fGETUBYTE(1,VvV.uw[i]));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(2,VuV.uw[i]), fGETUBYTE(2,VvV.uw[i]));
+    VxV.uw[i] += fMPY8UU(fGETUBYTE(3,VuV.uw[i]), fGETUBYTE(3,VvV.uw[i])))
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpybv,"Vd32=vrmpyb(Vu32,Vv32)","Vd32.w=vrmpy(Vu32.b,Vv32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VdV.w[i]  = fMPY8SS(fGETBYTE(0, VuV.w[i]), fGETBYTE(0, VvV.w[i]));
+    VdV.w[i] += fMPY8SS(fGETBYTE(1, VuV.w[i]), fGETBYTE(1, VvV.w[i]));
+    VdV.w[i] += fMPY8SS(fGETBYTE(2, VuV.w[i]), fGETBYTE(2, VvV.w[i]));
+    VdV.w[i] += fMPY8SS(fGETBYTE(3, VuV.w[i]), fGETBYTE(3, VvV.w[i])))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybv_acc,"Vx32+=vrmpyb(Vu32,Vv32)","Vx32.w+=vrmpy(Vu32.b,Vv32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VxV.w[i] += fMPY8SS(fGETBYTE(0, VuV.w[i]), fGETBYTE(0, VvV.w[i]));
+    VxV.w[i] += fMPY8SS(fGETBYTE(1, VuV.w[i]), fGETBYTE(1, VvV.w[i]));
+    VxV.w[i] += fMPY8SS(fGETBYTE(2, VuV.w[i]), fGETBYTE(2, VvV.w[i]));
+    VxV.w[i] += fMPY8SS(fGETBYTE(3, VuV.w[i]), fGETBYTE(3, VvV.w[i])))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpyubi,"Vdd32=vrmpyub(Vuu32,Rt32,#u1)","Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1)",
+"Dual Vector Unsigned Byte By Signed Byte 4-way Reduction to Word",
+    VddV.v[0].uw[i]  = fMPY8UU(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
+    VddV.v[0].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0        ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV));
+    VddV.v[0].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[0        ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
+    VddV.v[0].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
+
+    VddV.v[1].uw[i]  = fMPY8UU(fGETUBYTE(0, VuuV.v[1        ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
+    VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[1        ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
+    VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
+    VddV.v[1].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpyubi_acc,"Vxx32+=vrmpyub(Vuu32,Rt32,#u1)","Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1)",
+"Dual Vector Unsigned Byte By Signed Byte 4-way Reduction with accumulate and saturation to Word",
+    VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
+    VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0        ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV));
+    VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[0        ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
+    VxxV.v[0].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
+
+    VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(0, VuuV.v[1        ].uw[i]),fGETUBYTE((2-uiV) & 0x3,RtV));
+    VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[1        ].uw[i]),fGETUBYTE((3-uiV) & 0x3,RtV));
+    VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETUBYTE((0-uiV) & 0x3,RtV));
+    VxxV.v[1].uw[i] += fMPY8UU(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETUBYTE((1-uiV) & 0x3,RtV)))
+
+
+
+
+/********************************************
+*  4-WAY REDUCTION - UNSIGNED BYTE BY  BYTE
+********************************************/
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpybus,"Vd32=vrmpybus(Vu32,Rt32)","Vd32.w=vrmpy(Vu32.ub,Rt32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VdV.w[i]  = fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,RtV));
+    VdV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,RtV));
+    VdV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,RtV));
+    VdV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpybus_acc,"Vx32+=vrmpybus(Vu32,Rt32)","Vx32.w+=vrmpy(Vu32.ub,Rt32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VxV.w[i] += fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,RtV));
+    VxV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,RtV));
+    VxV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,RtV));
+    VxV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusi,"Vdd32=vrmpybus(Vuu32,Rt32,#u1)","Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1)",
+"Dual Vector Unsigned Byte By Signed Byte 4-way Reduction to Word",
+    VddV.v[0].w[i]  = fMPY8US(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
+    VddV.v[0].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0        ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV));
+    VddV.v[0].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[0        ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
+    VddV.v[0].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
+
+    VddV.v[1].w[i]  = fMPY8US(fGETUBYTE(0, VuuV.v[1        ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
+    VddV.v[1].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[1        ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
+    VddV.v[1].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
+    VddV.v[1].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusi_acc,"Vxx32+=vrmpybus(Vuu32,Rt32,#u1)","Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1)",
+"Dual Vector Unsigned Byte By Signed Byte 4-way Reduction with accumulate and saturation to Word",
+    VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(0, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
+    VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0        ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV));
+    VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[0        ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
+    VxxV.v[0].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
+
+    VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(0, VuuV.v[1        ].uw[i]),fGETBYTE((2-uiV) & 0x3,RtV));
+    VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(1, VuuV.v[1        ].uw[i]),fGETBYTE((3-uiV) & 0x3,RtV));
+    VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(2, VuuV.v[uiV ? 1:0].uw[i]),fGETBYTE((0-uiV) & 0x3,RtV));
+    VxxV.v[1].w[i] += fMPY8US(fGETUBYTE(3, VuuV.v[0        ].uw[i]),fGETBYTE((1-uiV) & 0x3,RtV)))
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT(32,vrmpybusv,"Vd32=vrmpybus(Vu32,Vv32)","Vd32.w=vrmpy(Vu32.ub,Vv32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VdV.w[i]  = fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,VvV.w[i]));
+    VdV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,VvV.w[i]));
+    VdV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,VvV.w[i]));
+    VdV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,VvV.w[i])))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrmpybusv_acc,"Vx32+=vrmpybus(Vu32,Vv32)","Vx32.w+=vrmpy(Vu32.ub,Vv32.b)",
+"Vector Multiply-Accumulate Reduce with 4 byte coefficients",
+    VxV.w[i] += fMPY8US(fGETUBYTE(0,VuV.uw[i]), fGETBYTE(0,VvV.w[i]));
+    VxV.w[i] += fMPY8US(fGETUBYTE(1,VuV.uw[i]), fGETBYTE(1,VvV.w[i]));
+    VxV.w[i] += fMPY8US(fGETUBYTE(2,VuV.uw[i]), fGETBYTE(2,VvV.w[i]));
+    VxV.w[i] += fMPY8US(fGETUBYTE(3,VuV.uw[i]), fGETBYTE(3,VvV.w[i])))
+
+
+
+
+
+
+
+
+
+
+
+/********************************************
+*  2-WAY REDUCTION - SAD
+********************************************/
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdsaduh,"Vdd32=vdsaduh(Vuu32,Rt32)","Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh)",
+"Dual Vector Halfword by Byte 4-Way Reduction to Word",
+    VddV.v[0].uw[i]  = fABS(fGETUHALF(0, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
+    VddV.v[0].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(1,RtV));
+    VddV.v[1].uw[i]  = fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
+    VddV.v[1].uw[i] += fABS(fGETUHALF(0, VuuV.v[1].uw[i]) - fGETUHALF(1,RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vdsaduh_acc,"Vxx32+=vdsaduh(Vuu32,Rt32)","Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh)",
+"Dual Vector Halfword by Byte 4-Way Reduction to Word",
+    VxxV.v[0].uw[i] += fABS(fGETUHALF(0, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
+    VxxV.v[0].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(1,RtV));
+    VxxV.v[1].uw[i] += fABS(fGETUHALF(1, VuuV.v[0].uw[i]) - fGETUHALF(0,RtV));
+    VxxV.v[1].uw[i] += fABS(fGETUHALF(0, VuuV.v[1].uw[i]) - fGETUHALF(1,RtV)))
+
+
+
+
+/********************************************
+*  4-WAY REDUCTION - SAD
+********************************************/
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrsadubi,"Vdd32=vrsadub(Vuu32,Rt32,#u1)","Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1)",
+"Dual Vector Halfword by Byte 4-Way Reduction to Word",
+    VddV.v[0].uw[i]  = fABS(fZE8_16(fGETUBYTE(0, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3,RtV)));
+    VddV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3,RtV)));
+    VddV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3,RtV)));
+    VddV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3,RtV)));
+
+    VddV.v[1].uw[i]  = fABS(fZE8_16(fGETUBYTE(0, VuuV.v[1      ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3,RtV)));
+    VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[1      ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3,RtV)));
+    VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3,RtV)));
+    VddV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3,RtV))))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vrsadubi_acc,"Vxx32+=vrsadub(Vuu32,Rt32,#u1)","Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1)",
+"Dual Vector Halfword by Byte 4-Way Reduction to Word",
+    VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(0, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3,RtV)));
+    VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3,RtV)));
+    VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3,RtV)));
+    VxxV.v[0].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3,RtV)));
+
+    VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(0, VuuV.v[1      ].uw[i])) - fZE8_16(fGETUBYTE((2-uiV)&3,RtV)));
+    VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(1, VuuV.v[1      ].uw[i])) - fZE8_16(fGETUBYTE((3-uiV)&3,RtV)));
+    VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(2, VuuV.v[uiV?1:0].uw[i])) - fZE8_16(fGETUBYTE((0-uiV)&3,RtV)));
+    VxxV.v[1].uw[i] += fABS(fZE8_16(fGETUBYTE(3, VuuV.v[0      ].uw[i])) - fZE8_16(fGETUBYTE((1-uiV)&3,RtV))))
+
+
+
+
+
+
+
+
+
+
+/*********************************************************************
+ * MMVECTOR SHIFTING
+ * ******************************************************************/
+// Macro to shift arithmetically left/right and by either RT or Vv
+
+#define V_SHIFT(TYPE, DESC, SIZE, LOGSIZE, CASTTYPE)   \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vasr##TYPE,   "Vd32=vasr" #TYPE "(Vu32,Rt32)","Vd32."#TYPE"=vasr(Vu32."#TYPE",Rt32)",         "Vector arithmetic shift right " DESC,    VdV.TYPE[i]     = (VuV.TYPE[i]    >> (RtV & (SIZE-1)))) \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vasl##TYPE,   "Vd32=vasl" #TYPE "(Vu32,Rt32)","Vd32."#TYPE"=vasl(Vu32."#TYPE",Rt32)",         "Vector arithmetic shift left  " DESC,    VdV.TYPE[i]     = (VuV.TYPE[i]    << (RtV & (SIZE-1)))) \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vlsr##TYPE,   "Vd32=vlsr" #TYPE "(Vu32,Rt32)","Vd32.u"#TYPE"=vlsr(Vu32.u"#TYPE",Rt32)",       "Vector logical shift right "    DESC,    VdV.u##TYPE[i]  = (VuV.u##TYPE[i] >> (RtV & (SIZE-1)))) \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vasr##TYPE##v,"Vd32=vasr" #TYPE "(Vu32,Vv32)","Vd32."#TYPE"=vasr(Vu32."#TYPE",Vv32."#TYPE")", "Vector arithmetic shift right " DESC,    VdV.TYPE[i]     = fBIDIR_ASHIFTR(VuV.TYPE[i], fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),CASTTYPE)) \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vasl##TYPE##v,"Vd32=vasl" #TYPE "(Vu32,Vv32)","Vd32."#TYPE"=vasl(Vu32."#TYPE",Vv32."#TYPE")", "Vector arithmetic shift left  " DESC,    VdV.TYPE[i]     = fBIDIR_ASHIFTL(VuV.TYPE[i],  fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),CASTTYPE)) \
+ITERATOR_INSN2_SHIFT_SLOT(SIZE,vlsr##TYPE##v,"Vd32=vlsr" #TYPE "(Vu32,Vv32)","Vd32."#TYPE"=vlsr(Vu32."#TYPE",Vv32."#TYPE")", "Vector logical shift right "    DESC,    VdV.u##TYPE[i]  = fBIDIR_LSHIFTR(VuV.u##TYPE[i], fSXTN((LOGSIZE+1),SIZE,VvV.TYPE[i]),CASTTYPE)) \
+
+V_SHIFT(w, "word",   32,5,4_4)
+V_SHIFT(h, "halfword", 16,4,2_2)
+
+ITERATOR_INSN_SHIFT_SLOT(8,vlsrb,"Vd32.ub=vlsr(Vu32.ub,Rt32)","vec log shift right bytes", VdV.b[i] = VuV.ub[i] >> (RtV & 0x7))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vrotr,"Vd32=vrotr(Vu32,Vv32)","Vd32.uw=vrotr(Vu32.uw,Vv32.uw)","Vector word rotate right", VdV.uw[i] = ((VuV.uw[i] >> (VvV.uw[i] & 0x1f)) | (VuV.uw[i] << (32 - (VvV.uw[i] & 0x1f)))))
+
+/*********************************************************************
+ * MMVECTOR SHIFT AND PERMUTE
+ * ******************************************************************/
+
+ITERATOR_INSN2_PERMUTE_SLOT_DOUBLE_VEC(32,vasr_into,"Vxx32=vasrinto(Vu32,Vv32)","Vxx32.w=vasrinto(Vu32.w,Vv32.w)","ASR vector 1 elements and overlay dropping bits to MSB of vector 2 elements",
+    fHIDE(int64_t ) shift = (fSE32_64(VuV.w[i]) << 32);
+    fHIDE(int64_t ) mask  = (((fSE32_64(VxxV.v[0].w[i])) << 32) | fZE32_64(VxxV.v[0].w[i]));
+    fHIDE(int64_t) lomask = (((fSE32_64(1)) << 32) - 1);
+    fHIDE(int ) count = -(0x40 & VvV.w[i]) + (VvV.w[i] & 0x3f);
+    fHIDE(int64_t ) result = (count == -0x40) ? 0 : (((count < 0) ? ((shift << -(count)) | (mask & (lomask << -(count)))) : ((shift >> count) | (mask & (lomask >> count)))));
+    VxxV.v[1].w[i] = ((result >> 32) & 0xffffffff);
+    VxxV.v[0].w[i] = (result & 0xffffffff))
+
+#define NEW_NARROWING_SHIFT 1
+
+#if NEW_NARROWING_SHIFT
+#define NARROWING_SHIFT(ITERSIZE,TAG,DSTM,DSTTYPE,SRCTYPE,SYNOPTS,SATFUNC,RNDFUNC,SHAMTMASK) \
+ITERATOR_INSN_SHIFT_SLOT(ITERSIZE,TAG, \
+"Vd32." #DSTTYPE "=vasr(Vu32." #SRCTYPE ",Vv32." #SRCTYPE ",Rt8)" #SYNOPTS, \
+"Vector shift right and shuffle", \
+    fHIDE(int )shamt = RtV & SHAMTMASK; \
+    DSTM(0,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VvV.SRCTYPE[i],shamt) >> shamt)); \
+    DSTM(1,VdV.SRCTYPE[i],SATFUNC(RNDFUNC(VuV.SRCTYPE[i],shamt) >> shamt)))
+
+
+
+
+
+/* WORD TO HALF*/
+
+NARROWING_SHIFT(32,vasrwh,fSETHALF,h,w,,fECHO,fVNOROUND,0xF)
+NARROWING_SHIFT(32,vasrwhsat,fSETHALF,h,w,:sat,fVSATH,fVNOROUND,0xF)
+NARROWING_SHIFT(32,vasrwhrndsat,fSETHALF,h,w,:rnd:sat,fVSATH,fVROUND,0xF)
+NARROWING_SHIFT(32,vasrwuhrndsat,fSETHALF,uh,w,:rnd:sat,fVSATUH,fVROUND,0xF)
+NARROWING_SHIFT(32,vasrwuhsat,fSETHALF,uh,w,:sat,fVSATUH,fVNOROUND,0xF)
+NARROWING_SHIFT(32,vasruwuhrndsat,fSETHALF,uh,uw,:rnd:sat,fVSATUH,fVROUND,0xF)
+
+NARROWING_SHIFT_NOV1(32,vasruwuhsat,fSETHALF,uh,uw,:sat,fVSATUH,fVNOROUND,0xF)
+NARROWING_SHIFT(16,vasrhubsat,fSETBYTE,ub,h,:sat,fVSATUB,fVNOROUND,0x7)
+NARROWING_SHIFT(16,vasrhubrndsat,fSETBYTE,ub,h,:rnd:sat,fVSATUB,fVROUND,0x7)
+NARROWING_SHIFT(16,vasrhbsat,fSETBYTE,b,h,:sat,fVSATB,fVNOROUND,0x7)
+NARROWING_SHIFT(16,vasrhbrndsat,fSETBYTE,b,h,:rnd:sat,fVSATB,fVROUND,0x7)
+
+NARROWING_SHIFT_NOV1(16,vasruhubsat,fSETBYTE,ub,uh,:sat,fVSATUB,fVNOROUND,0x7)
+NARROWING_SHIFT_NOV1(16,vasruhubrndsat,fSETBYTE,ub,uh,:rnd:sat,fVSATUB,fVROUND,0x7)
+
+#else
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrwh,"Vd32=vasrwh(Vu32,Vv32,Rt8)","Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fSETHALF(0,VdV.w[i], (VvV.w[i] >> (RtV & 0xF)));
+    fSETHALF(1,VdV.w[i], (VuV.w[i] >> (RtV & 0xF))))
+
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrwhsat,"Vd32=vasrwh(Vu32,Vv32,Rt8):sat","Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fSETHALF(0,VdV.w[i], fVSATH(VvV.w[i] >> (RtV & 0xF)));
+    fSETHALF(1,VdV.w[i], fVSATH(VuV.w[i] >> (RtV & 0xF))))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrwhrndsat,"Vd32=vasrwh(Vu32,Vv32,Rt8):rnd:sat","Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fHIDE(int ) shamt = RtV & 0xF;
+    fSETHALF(0,VdV.w[i], fVSATH(  (VvV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
+    fSETHALF(1,VdV.w[i], fVSATH(  (VuV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrwuhrndsat,"Vd32=vasrwuh(Vu32,Vv32,Rt8):rnd:sat","Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fHIDE(int ) shamt = RtV & 0xF;
+    fSETHALF(0,VdV.w[i], fVSATUH(  (VvV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
+    fSETHALF(1,VdV.w[i], fVSATUH(  (VuV.w[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrwuhsat,"Vd32=vasrwuh(Vu32,Vv32,Rt8):sat","Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fSETHALF(0, VdV.uw[i], fVSATUH(VvV.w[i] >> (RtV & 0xF)));
+    fSETHALF(1, VdV.uw[i], fVSATUH(VuV.w[i] >> (RtV & 0xF))))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasruwuhrndsat,"Vd32=vasruwuh(Vu32,Vv32,Rt8):rnd:sat","Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat",
+"Vector arithmetic shift right words, shuffle even halfwords",
+    fHIDE(int ) shamt = RtV & 0xF;
+    fSETHALF(0,VdV.w[i], fVSATUH(  (VvV.uw[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt));
+    fSETHALF(1,VdV.w[i], fVSATUH(  (VuV.uw[i] + fBIDIR_ASHIFTL(1,(shamt-1),4_8) ) >> shamt)))
+#endif
+
+
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vroundwh,"Vd32=vroundwh(Vu32,Vv32):sat","Vd32.h=vround(Vu32.w,Vv32.w):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETHALF(0, VdV.uw[i], fVSATH((VvV.w[i] + fCONSTLL(0x8000)) >> 16));
+    fSETHALF(1, VdV.uw[i], fVSATH((VuV.w[i] + fCONSTLL(0x8000)) >> 16)))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vroundwuh,"Vd32=vroundwuh(Vu32,Vv32):sat","Vd32.uh=vround(Vu32.w,Vv32.w):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETHALF(0, VdV.uw[i], fVSATUH((VvV.w[i] + fCONSTLL(0x8000)) >> 16));
+    fSETHALF(1, VdV.uw[i], fVSATUH((VuV.w[i] + fCONSTLL(0x8000)) >> 16)))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vrounduwuh,"Vd32=vrounduwuh(Vu32,Vv32):sat","Vd32.uh=vround(Vu32.uw,Vv32.uw):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETHALF(0, VdV.uw[i], fVSATUH((VvV.uw[i] + fCONSTLL(0x8000)) >> 16));
+    fSETHALF(1, VdV.uw[i], fVSATUH((VuV.uw[i] + fCONSTLL(0x8000)) >> 16)))
+
+
+
+
+
+/* HALF TO BYTE*/
+
+ITERATOR_INSN2_SHIFT_SLOT(16,vroundhb,"Vd32=vroundhb(Vu32,Vv32):sat","Vd32.b=vround(Vu32.h,Vv32.h):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETBYTE(0, VdV.uh[i], fVSATB((VvV.h[i] + 0x80) >> 8));
+    fSETBYTE(1, VdV.uh[i], fVSATB((VuV.h[i] + 0x80) >> 8)))
+
+ITERATOR_INSN2_SHIFT_SLOT(16,vroundhub,"Vd32=vroundhub(Vu32,Vv32):sat","Vd32.ub=vround(Vu32.h,Vv32.h):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETBYTE(0, VdV.uh[i], fVSATUB((VvV.h[i] + 0x80) >> 8));
+    fSETBYTE(1, VdV.uh[i], fVSATUB((VuV.h[i] + 0x80) >> 8)))
+
+ITERATOR_INSN2_SHIFT_SLOT(16,vrounduhub,"Vd32=vrounduhub(Vu32,Vv32):sat","Vd32.ub=vround(Vu32.uh,Vv32.uh):sat",
+"Vector round words to halves, shuffle resultant halfwords",
+    fSETBYTE(0, VdV.uh[i], fVSATUB((VvV.uh[i] + 0x80) >> 8));
+    fSETBYTE(1, VdV.uh[i], fVSATUB((VuV.uh[i] + 0x80) >> 8)))
+
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vaslw_acc,"Vx32+=vaslw(Vu32,Rt32)","Vx32.w+=vasl(Vu32.w,Rt32)",
+"Vector shift add word",
+    VxV.w[i]  +=  (VuV.w[i] << (RtV & (32-1))))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vasrw_acc,"Vx32+=vasrw(Vu32,Rt32)","Vx32.w+=vasr(Vu32.w,Rt32)",
+"Vector shift add word",
+    VxV.w[i]  +=  (VuV.w[i] >> (RtV & (32-1))))
+
+ITERATOR_INSN2_SHIFT_SLOT_NOV1(16,vaslh_acc,"Vx32+=vaslh(Vu32,Rt32)","Vx32.h+=vasl(Vu32.h,Rt32)",
+"Vector shift add halfword",
+    VxV.h[i]  +=  (VuV.h[i] << (RtV & (16-1))))
+
+ITERATOR_INSN2_SHIFT_SLOT_NOV1(16,vasrh_acc,"Vx32+=vasrh(Vu32,Rt32)","Vx32.h+=vasr(Vu32.h,Rt32)",
+"Vector shift add halfword",
+    VxV.h[i]  +=  (VuV.h[i] >> (RtV & (16-1))))
+
+/**************************************************************************
+*
+* MMVECTOR ELEMENT-WISE ARITHMETIC
+*
+**************************************************************************/
+
+/**************************************************************************
+* MACROS GO IN MACROS.DEF NOT HERE!!!
+**************************************************************************/
+
+
+#define MMVEC_ABSDIFF(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_MPY_SLOT(WIDTH, vabsdiff##TYPE,                   "Vd32=vabsdiff"TYPE2"(Vu32,Vv32)" ,"Vd32."#DEST"=vabsdiff(Vu32."#SRC",Vv32."#SRC")" ,     "Vector Absolute of Difference "DESCR,   VdV.DEST[i] = (VuV.SRC[i] > VvV.SRC[i]) ? (VuV.SRC[i] - VvV.SRC[i]) : (VvV.SRC[i] - VuV.SRC[i]))
+
+#define MMVEC_ADDU_SAT(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vadd##TYPE##sat,                  "Vd32=vadd"TYPE2"(Vu32,Vv32):sat" ,    "Vd32."#DEST"=vadd(Vu32."#SRC",Vv32."#SRC"):sat",    "Vector Add & Saturate "DESCR,            VdV.DEST[i] = fVUADDSAT(WIDTH,  VuV.SRC[i], VvV.SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vadd##TYPE##sat_dv,    "Vdd32=vadd"TYPE2"(Vuu32,Vvv32):sat",  "Vdd32."#DEST"=vadd(Vuu32."#SRC",Vvv32."#SRC"):sat", "Double Vector Add & Saturate "DESCR,    VddV.v[0].DEST[i] = fVUADDSAT(WIDTH, VuuV.v[0].SRC[i],VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVUADDSAT(WIDTH, VuuV.v[1].SRC[i],VvvV.v[1].SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vsub##TYPE##sat,                  "Vd32=vsub"TYPE2"(Vu32,Vv32):sat",     "Vd32."#DEST"=vsub(Vu32."#SRC",Vv32."#SRC"):sat",    "Vector Add & Saturate "DESCR,            VdV.DEST[i] = fVUSUBSAT(WIDTH,  VuV.SRC[i], VvV.SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vsub##TYPE##sat_dv,    "Vdd32=vsub"TYPE2"(Vuu32,Vvv32):sat",  "Vdd32."#DEST"=vsub(Vuu32."#SRC",Vvv32."#SRC"):sat", "Double Vector Add & Saturate "DESCR,    VddV.v[0].DEST[i] = fVUSUBSAT(WIDTH, VuuV.v[0].SRC[i],VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVUSUBSAT(WIDTH, VuuV.v[1].SRC[i],VvvV.v[1].SRC[i]))\
+
+#define MMVEC_ADDS_SAT(TYPE,TYPE2,DESCR, WIDTH,DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vadd##TYPE##sat,                  "Vd32=vadd"TYPE2"(Vu32,Vv32):sat" ,    "Vd32."#DEST"=vadd(Vu32."#SRC",Vv32."#SRC"):sat",    "Vector Add & Saturate "DESCR,            VdV.DEST[i] = fVSADDSAT(WIDTH,  VuV.SRC[i],  VvV.SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vadd##TYPE##sat_dv,    "Vdd32=vadd"TYPE2"(Vuu32,Vvv32):sat",  "Vdd32."#DEST"=vadd(Vuu32."#SRC",Vvv32."#SRC"):sat", "Double Vector Add & Saturate "DESCR,    VddV.v[0].DEST[i] = fVSADDSAT(WIDTH, VuuV.v[0].SRC[i], VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVSADDSAT(WIDTH, VuuV.v[1].SRC[i], VvvV.v[1].SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vsub##TYPE##sat,                  "Vd32=vsub"TYPE2"(Vu32,Vv32):sat",     "Vd32."#DEST"=vsub(Vu32."#SRC",Vv32."#SRC"):sat",    "Vector Add & Saturate "DESCR,            VdV.DEST[i] = fVSSUBSAT(WIDTH,  VuV.SRC[i],  VvV.SRC[i]))\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vsub##TYPE##sat_dv,    "Vdd32=vsub"TYPE2"(Vuu32,Vvv32):sat",  "Vdd32."#DEST"=vsub(Vuu32."#SRC",Vvv32."#SRC"):sat", "Double Vector Add & Saturate "DESCR,    VddV.v[0].DEST[i] = fVSSUBSAT(WIDTH, VuuV.v[0].SRC[i], VvvV.v[0].SRC[i]); VddV.v[1].DEST[i] = fVSSUBSAT(WIDTH, VuuV.v[1].SRC[i], VvvV.v[1].SRC[i]))\
+
+#define MMVEC_AVGU(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vavg##TYPE,                        "Vd32=vavg"TYPE2"(Vu32,Vv32)",         "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC")",        "Vector Average "DESCR,                                      VdV.DEST[i] = fVAVGU(   WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vavg##TYPE##rnd,                   "Vd32=vavg"TYPE2"(Vu32,Vv32):rnd",     "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC"):rnd",    "Vector Average % Round"DESCR,                               VdV.DEST[i] = fVAVGURND(WIDTH,  VuV.SRC[i], VvV.SRC[i]))
+
+
+
+#define MMVEC_AVGS(TYPE,TYPE2,DESCR, WIDTH, DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vavg##TYPE,                        "Vd32=vavg"TYPE2"(Vu32,Vv32)",          "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC")",          "Vector Average "DESCR,                                      VdV.DEST[i]  = fVAVGS(       WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vavg##TYPE##rnd,                   "Vd32=vavg"TYPE2"(Vu32,Vv32):rnd",      "Vd32."#DEST"=vavg(Vu32."#SRC",Vv32."#SRC"):rnd",      "Vector Average % Round"DESCR,                               VdV.DEST[i]  = fVAVGSRND(    WIDTH,  VuV.SRC[i], VvV.SRC[i])) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vnavg##TYPE,                       "Vd32=vnavg"TYPE2"(Vu32,Vv32)",         "Vd32."#DEST"=vnavg(Vu32."#SRC",Vv32."#SRC")",         "Vector Negative Average "DESCR,                             VdV.DEST[i]  = fVNAVGS(      WIDTH,  VuV.SRC[i], VvV.SRC[i]))
+
+
+
+
+
+
+
+#define MMVEC_ADDWRAP(TYPE,TYPE2, DESCR, WIDTH , DEST,SRC)\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vadd##TYPE,                  "Vd32=vadd"TYPE2"(Vu32,Vv32)" ,     "Vd32."#DEST"=vadd(Vu32."#SRC",Vv32."#SRC")",    "Vector Add "DESCR,          VdV.DEST[i] =  VuV.SRC[i] +  VvV.SRC[i])\
+ITERATOR_INSN2_ANY_SLOT(WIDTH, vsub##TYPE,                  "Vd32=vsub"TYPE2"(Vu32,Vv32)" ,     "Vd32."#DEST"=vsub(Vu32."#SRC",Vv32."#SRC")",    "Vector Sub "DESCR,          VdV.DEST[i] =  VuV.SRC[i] -  VvV.SRC[i])\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vadd##TYPE##_dv,  "Vdd32=vadd"TYPE2"(Vuu32,Vvv32)" ,  "Vdd32."#DEST"=vadd(Vuu32."#SRC",Vvv32."#SRC")", "Double Vector Add "DESCR,   VddV.v[0].DEST[i] = VuuV.v[0].SRC[i] + VvvV.v[0].SRC[i]; VddV.v[1].DEST[i] = VuuV.v[1].SRC[i] + VvvV.v[1].SRC[i])\
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(WIDTH, vsub##TYPE##_dv,  "Vdd32=vsub"TYPE2"(Vuu32,Vvv32)" ,  "Vdd32."#DEST"=vsub(Vuu32."#SRC",Vvv32."#SRC")", "Double Vector Sub "DESCR,   VddV.v[0].DEST[i] = VuuV.v[0].SRC[i] - VvvV.v[0].SRC[i]; VddV.v[1].DEST[i] = VuuV.v[1].SRC[i] - VvvV.v[1].SRC[i]) \
+
+
+
+
+
+/* Wrapping Adds */
+MMVEC_ADDWRAP(b,    "b",    "Byte",         8,   b, b)
+MMVEC_ADDWRAP(h,    "h",    "Halfword",     16,  h, h)
+MMVEC_ADDWRAP(w,    "w",    "Word",         32,   w,    w)
+
+/* Saturating Adds */
+MMVEC_ADDU_SAT(ub, "ub",    "Unsigned Byte",        8,   ub,    ub)
+MMVEC_ADDU_SAT(uh, "uh",    "Unsigned Halfword",    16,  uh,    uh)
+MMVEC_ADDU_SAT(uw, "uw",    "Unsigned word",    32,  uw,    uw)
+MMVEC_ADDS_SAT(b,  "b",     "byte",             8,  b,     b)
+MMVEC_ADDS_SAT(h,  "h",     "Halfword",             16,  h,     h)
+MMVEC_ADDS_SAT(w,  "w",     "Word",                 32,  w,     w)
+
+
+/* Averaging Instructions */
+MMVEC_AVGU(ub,"ub",     "Unsigned Byte",     8,   ub,   ub)
+MMVEC_AVGU(uh,"uh",     "Unsigned Halfword", 16,  uh,   uh)
+MMVEC_AVGU_NOV1(uw,"uw",     "Unsigned Word",     32,  uw,   uw)
+MMVEC_AVGS_NOV1(b,   "b",    "Byte",               8,   b,   b)
+MMVEC_AVGS(h,   "h",    "Halfword",          16,   h,   h)
+MMVEC_AVGS(w,   "w",    "Word",              32,   w,   w)
+
+
+/* Absolute Difference */
+MMVEC_ABSDIFF(ub,"ub",  "Unsigned Byte",        8,   ub,    ub)
+MMVEC_ABSDIFF(uh,"uh",  "Unsigned Halfword",    16,  uh,    uh)
+MMVEC_ABSDIFF(h,"h",        "Halfword",             16,  uh,    h)
+MMVEC_ABSDIFF(w,"w",        "Word",                 32,  uw,    w)
+
+ITERATOR_INSN2_ANY_SLOT(8,vnavgub, "Vd32=vnavgub(Vu32,Vv32)", "Vd32.b=vnavg(Vu32.ub,Vv32.ub)",
+"Vector Negative Average Unsigned Byte", VdV.b[i]   = fVNAVGU(8, VuV.ub[i], VvV.ub[i]))
+
+ITERATOR_INSN_ANY_SLOT(32,vaddcarrysat,"Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat","add w/carry and saturate",
+VdV.w[i] = fVSATW(VuV.w[i]+VvV.w[i]+fGETQBIT(QsV,i*4)))
+
+ITERATOR_INSN_ANY_SLOT(32,vaddcarry,"Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry","add w/carry",
+VdV.w[i] = VuV.w[i]+VvV.w[i]+fGETQBIT(QxV,i*4);
+fSETQBITS(QxV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],VvV.w[i],fGETQBIT(QxV,i*4))))
+
+ITERATOR_INSN_ANY_SLOT(32,vsubcarry,"Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry","add w/carry",
+VdV.w[i] = VuV.w[i]+~VvV.w[i]+fGETQBIT(QxV,i*4);
+fSETQBITS(QxV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],~VvV.w[i],fGETQBIT(QxV,i*4))))
+
+ITERATOR_INSN_ANY_SLOT(32,vaddcarryo,"Vd32.w,Qe4=vadd(Vu32.w,Vv32.w):carry","add w/carry out-only",
+VdV.w[i] = VuV.w[i]+VvV.w[i];
+fSETQBITS(QeV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],VvV.w[i],0)))
+
+ITERATOR_INSN_ANY_SLOT(32,vsubcarryo,"Vd32.w,Qe4=vsub(Vu32.w,Vv32.w):carry","subtract w/carry out-only",
+VdV.w[i] = VuV.w[i]+~VvV.w[i]+1;
+fSETQBITS(QeV,4,0xF,4*i,-fCARRY_FROM_ADD32(VuV.w[i],~VvV.w[i],1)))
+
+
+ITERATOR_INSN_ANY_SLOT(32,vsatdw,"Vd32.w=vsatdw(Vu32.w,Vv32.w)","Saturate from 64-bits (higher 32-bits come from first vector) to 32-bits",VdV.w[i] = fVSATDW(VuV.w[i],VvV.w[i]))
+
+
+#define MMVEC_ADDSAT_MIX(TAGEND,SATF,WIDTH,DEST,SRC1,SRC2)\
+ITERATOR_INSN_ANY_SLOT(WIDTH, vadd##TAGEND,"Vd32."#DEST"=vadd(Vu32."#SRC1",Vv32."#SRC2"):sat",    "Vector Add mixed", VdV.DEST[i] =  SATF(VuV.SRC1[i] +  VvV.SRC2[i]))\
+ITERATOR_INSN_ANY_SLOT(WIDTH, vsub##TAGEND,"Vd32."#DEST"=vsub(Vu32."#SRC1",Vv32."#SRC2"):sat",    "Vector Sub mixed", VdV.DEST[i] =  SATF(VuV.SRC1[i] -  VvV.SRC2[i]))\
+
+MMVEC_ADDSAT_MIX(ububb_sat,fVSATUB,8,ub,ub,b)
+
+/****************************
+*   WIDENING
+****************************/
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vaddubh,"Vdd32=vaddub(Vu32,Vv32)","Vdd32.h=vadd(Vu32.ub,Vv32.ub)",
+"Vector addition with widen into two vectors",
+    VddV.v[0].h[i] = fZE8_16(fGETUBYTE(0, VuV.uh[i])) + fZE8_16(fGETUBYTE(0, VvV.uh[i]));
+    VddV.v[1].h[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])) + fZE8_16(fGETUBYTE(1, VvV.uh[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vsububh,"Vdd32=vsubub(Vu32,Vv32)","Vdd32.h=vsub(Vu32.ub,Vv32.ub)",
+"Vector subtraction with widen into two vectors",
+    VddV.v[0].h[i] = fZE8_16(fGETUBYTE(0, VuV.uh[i])) - fZE8_16(fGETUBYTE(0, VvV.uh[i]));
+    VddV.v[1].h[i] = fZE8_16(fGETUBYTE(1, VuV.uh[i])) - fZE8_16(fGETUBYTE(1, VvV.uh[i])))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vaddhw,"Vdd32=vaddh(Vu32,Vv32)","Vdd32.w=vadd(Vu32.h,Vv32.h)",
+"Vector addition with widen into two vectors",
+    VddV.v[0].w[i] = fGETHALF(0, VuV.w[i]) + fGETHALF(0, VvV.w[i]);
+    VddV.v[1].w[i] = fGETHALF(1, VuV.w[i]) + fGETHALF(1, VvV.w[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vsubhw,"Vdd32=vsubh(Vu32,Vv32)","Vdd32.w=vsub(Vu32.h,Vv32.h)",
+"Vector subtraction with widen into two vectors",
+    VddV.v[0].w[i] = fGETHALF(0, VuV.w[i]) - fGETHALF(0, VvV.w[i]);
+    VddV.v[1].w[i] = fGETHALF(1, VuV.w[i]) - fGETHALF(1, VvV.w[i]))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vadduhw,"Vdd32=vadduh(Vu32,Vv32)","Vdd32.w=vadd(Vu32.uh,Vv32.uh)",
+"Vector addition with widen into two vectors",
+    VddV.v[0].w[i] = fZE16_32(fGETUHALF(0, VuV.uw[i])) + fZE16_32(fGETUHALF(0, VvV.uw[i]));
+    VddV.v[1].w[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])) + fZE16_32(fGETUHALF(1, VvV.uw[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vsubuhw,"Vdd32=vsubuh(Vu32,Vv32)","Vdd32.w=vsub(Vu32.uh,Vv32.uh)",
+"Vector subtraction with widen into two vectors",
+    VddV.v[0].w[i] = fZE16_32(fGETUHALF(0, VuV.uw[i])) - fZE16_32(fGETUHALF(0, VvV.uw[i]));
+    VddV.v[1].w[i] = fZE16_32(fGETUHALF(1, VuV.uw[i])) - fZE16_32(fGETUHALF(1, VvV.uw[i])))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vaddhw_acc,"Vxx32+=vaddh(Vu32,Vv32)","Vxx32.w+=vadd(Vu32.h,Vv32.h)",
+"Vector addition with widen into two vectors",
+    VxxV.v[0].w[i] += fGETHALF(0, VuV.w[i]) + fGETHALF(0, VvV.w[i]);
+    VxxV.v[1].w[i] += fGETHALF(1, VuV.w[i]) + fGETHALF(1, VvV.w[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vadduhw_acc,"Vxx32+=vadduh(Vu32,Vv32)","Vxx32.w+=vadd(Vu32.uh,Vv32.uh)",
+"Vector addition with widen into two vectors",
+    VxxV.v[0].w[i] += fGETUHALF(0, VuV.w[i]) + fGETUHALF(0, VvV.w[i]);
+    VxxV.v[1].w[i] += fGETUHALF(1, VuV.w[i]) + fGETUHALF(1, VvV.w[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vaddubh_acc,"Vxx32+=vaddub(Vu32,Vv32)","Vxx32.h+=vadd(Vu32.ub,Vv32.ub)",
+"Vector addition with widen into two vectors",
+    VxxV.v[0].h[i] += fGETUBYTE(0, VuV.h[i]) + fGETUBYTE(0, VvV.h[i]);
+    VxxV.v[1].h[i] += fGETUBYTE(1, VuV.h[i]) + fGETUBYTE(1, VvV.h[i]))
+
+
+/****************************
+*   Conditional
+****************************/
+
+#define CONDADDSUB(WIDTH,TAGEND,LHSYN,RHSYN,DESCR,LHBEH,RHBEH) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vadd##TAGEND##q,"if (Qv4."#TAGEND") "LHSYN"+="RHSYN,"if (Qv4) "LHSYN"+="RHSYN,DESCR,LHBEH=fCONDMASK##WIDTH(QvV,i,LHBEH+RHBEH,LHBEH)) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vsub##TAGEND##q,"if (Qv4."#TAGEND") "LHSYN"-="RHSYN,"if (Qv4) "LHSYN"-="RHSYN,DESCR,LHBEH=fCONDMASK##WIDTH(QvV,i,LHBEH-RHBEH,LHBEH)) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vadd##TAGEND##nq,"if (!Qv4."#TAGEND") "LHSYN"+="RHSYN,"if (!Qv4) "LHSYN"+="RHSYN,DESCR,LHBEH=fCONDMASK##WIDTH(QvV,i,LHBEH,LHBEH+RHBEH)) \
+ITERATOR_INSN2_ANY_SLOT(WIDTH,vsub##TAGEND##nq,"if (!Qv4."#TAGEND") "LHSYN"-="RHSYN,"if (!Qv4) "LHSYN"-="RHSYN,DESCR,LHBEH=fCONDMASK##WIDTH(QvV,i,LHBEH,LHBEH-RHBEH)) \
+
+CONDADDSUB(8,b,"Vx32.b","Vu32.b","Conditional add/sub Byte",VxV.ub[i],VuV.ub[i])
+CONDADDSUB(16,h,"Vx32.h","Vu32.h","Conditional add/sub Half",VxV.h[i],VuV.h[i])
+CONDADDSUB(32,w,"Vx32.w","Vu32.w","Conditional add/sub Word",VxV.w[i],VuV.w[i])
+
+/*****************************************************
+ ABSOLUTE VALUES
+*****************************************************/
+// V65
+ITERATOR_INSN2_ANY_SLOT_NOV1(8,vabsb,        "Vd32=vabsb(Vu32)",     "Vd32.b=vabs(Vu32.b)",     "Vector absolute value of bytes",    VdV.b[i]  =  fABS(VuV.b[i]))
+ITERATOR_INSN2_ANY_SLOT_NOV1(8,vabsb_sat,    "Vd32=vabsb(Vu32):sat", "Vd32.b=vabs(Vu32.b):sat", "Vector absolute value of bytes",    VdV.b[i]  =  fVSATB(fABS(fSE8_16(VuV.b[i]))))
+
+
+ITERATOR_INSN2_ANY_SLOT(16,vabsh,        "Vd32=vabsh(Vu32)",     "Vd32.h=vabs(Vu32.h)",     "Vector absolute value of halfwords",    VdV.h[i]  =  fABS(VuV.h[i]))
+ITERATOR_INSN2_ANY_SLOT(16,vabsh_sat,    "Vd32=vabsh(Vu32):sat", "Vd32.h=vabs(Vu32.h):sat", "Vector absolute value of halfwords",    VdV.h[i]  =  fVSATH(fABS(fSE16_32(VuV.h[i]))))
+ITERATOR_INSN2_ANY_SLOT(32,vabsw,        "Vd32=vabsw(Vu32)",     "Vd32.w=vabs(Vu32.w)",     "Vector absolute value of words",        VdV.w[i]  =  fABS(VuV.w[i]))
+ITERATOR_INSN2_ANY_SLOT(32,vabsw_sat,    "Vd32=vabsw(Vu32):sat", "Vd32.w=vabs(Vu32.w):sat", "Vector absolute value of words",        VdV.w[i]  =  fVSATW(fABS(fSE32_64(VuV.w[i]))))
+
+
+/**************************************************************************
+ * MMVECTOR MULTIPLICATIONS
+ * ************************************************************************/
+
+
+/* Byte by Byte */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybv,"Vdd32=vmpyb(Vu32,Vv32)","Vdd32.h=vmpy(Vu32.b,Vv32.b)",
+"Vector absolute value of words",
+    VddV.v[0].h[i] =  fMPY8SS(fGETBYTE(0, VuV.h[i]), fGETBYTE(0, VvV.h[i]));
+    VddV.v[1].h[i] =  fMPY8SS(fGETBYTE(1, VuV.h[i]), fGETBYTE(1, VvV.h[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybv_acc,"Vxx32+=vmpyb(Vu32,Vv32)","Vxx32.h+=vmpy(Vu32.b,Vv32.b)",
+"Vector absolute value of words",
+    VxxV.v[0].h[i] +=  fMPY8SS(fGETBYTE(0, VuV.h[i]), fGETBYTE(0, VvV.h[i]));
+    VxxV.v[1].h[i] +=  fMPY8SS(fGETBYTE(1, VuV.h[i]), fGETBYTE(1, VvV.h[i])))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyubv,"Vdd32=vmpyub(Vu32,Vv32)","Vdd32.uh=vmpy(Vu32.ub,Vv32.ub)",
+"Vector absolute value of words",
+    VddV.v[0].uh[i] =  fMPY8UU(fGETUBYTE(0, VuV.uh[i]), fGETUBYTE(0, VvV.uh[i]) );
+    VddV.v[1].uh[i] =  fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE(1, VvV.uh[i]) ))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyubv_acc,"Vxx32+=vmpyub(Vu32,Vv32)","Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub)",
+"Vector absolute value of words",
+    VxxV.v[0].uh[i] +=  fMPY8UU(fGETUBYTE(0, VuV.uh[i]), fGETUBYTE(0, VvV.uh[i]) );
+    VxxV.v[1].uh[i] +=  fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE(1, VvV.uh[i]) ))
+
+
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybusv,"Vdd32=vmpybus(Vu32,Vv32)","Vdd32.h=vmpy(Vu32.ub,Vv32.b)",
+"Vector absolute value of words",
+    VddV.v[0].h[i]  = fMPY8US(fGETUBYTE(0, VuV.uh[i]), fGETBYTE(0, VvV.h[i]));
+    VddV.v[1].h[i]  = fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE(1, VvV.h[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybusv_acc,"Vxx32+=vmpybus(Vu32,Vv32)","Vxx32.h+=vmpy(Vu32.ub,Vv32.b)",
+"Vector absolute value of words",
+    VxxV.v[0].h[i]  += fMPY8US(fGETUBYTE(0, VuV.uh[i]), fGETBYTE(0, VvV.h[i]));
+    VxxV.v[1].h[i]  += fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE(1, VvV.h[i])))
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpabusv,"Vdd32=vmpabus(Vuu32,Vvv32)","Vdd32.h=vmpa(Vuu32.ub,Vvv32.b)",
+"Vertical Byte Multiply",
+    VddV.v[0].h[i] = fMPY8US(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETBYTE(0, VvvV.v[0].uh[i])) + fMPY8US(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(0, VvvV.v[1].uh[i]));
+    VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(1, VvvV.v[0].uh[i])) + fMPY8US(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETBYTE(1, VvvV.v[1].uh[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpabuuv,"Vdd32=vmpabuu(Vuu32,Vvv32)","Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub)",
+"Vertical Byte Multiply",
+    VddV.v[0].h[i] = fMPY8UU(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETUBYTE(0, VvvV.v[0].uh[i])) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(0, VvvV.v[1].uh[i]));
+    VddV.v[1].h[i] = fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(1, VvvV.v[0].uh[i])) + fMPY8UU(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETUBYTE(1, VvvV.v[1].uh[i])))
+
+
+
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhv,"Vdd32=vmpyh(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.h)",
+"Vector by Vector Halfword Multiply",
+    VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, VvV.w[i]));
+    VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, VvV.w[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhv_acc,"Vxx32+=vmpyh(Vu32,Vv32)","Vxx32.w+=vmpy(Vu32.h,Vv32.h)",
+"Vector by Vector Halfword Multiply",
+    VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, VvV.w[i]));
+    VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, VvV.w[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyuhv,"Vdd32=vmpyuh(Vu32,Vv32)","Vdd32.uw=vmpy(Vu32.uh,Vv32.uh)",
+"Vector by Vector Unsigned Halfword Multiply",
+    VddV.v[0].uw[i] = fMPY16UU(fGETUHALF(0, VuV.uw[i]), fGETUHALF(0, VvV.uw[i]));
+    VddV.v[1].uw[i] = fMPY16UU(fGETUHALF(1, VuV.uw[i]), fGETUHALF(1, VvV.uw[i])))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyuhv_acc,"Vxx32+=vmpyuh(Vu32,Vv32)","Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh)",
+"Vector by Vector Unsigned Halfword Multiply",
+    VxxV.v[0].uw[i] += fMPY16UU(fGETUHALF(0, VuV.uw[i]), fGETUHALF(0, VvV.uw[i]));
+    VxxV.v[1].uw[i] += fMPY16UU(fGETUHALF(1, VuV.uw[i]), fGETUHALF(1, VvV.uw[i])))
+
+
+
+/* Vector by Vector */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyhvsrs,"Vd32=vmpyh(Vu32,Vv32):<<1:rnd:sat","Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat",
+"Vector halfword multiply with round, shift, and sat16",
+    VdV.h[i] = fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(VuV.h[i],VvV.h[i]    )<<1))))))
+
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus, "Vdd32=vmpyhus(Vu32,Vv32)","Vdd32.w=vmpy(Vu32.h,Vv32.uh)",
+"Vector by Vector Halfword Multiply",
+    VddV.v[0].w[i] = fMPY16SU(fGETHALF(0, VuV.w[i]), fGETUHALF(0, VvV.uw[i]));
+    VddV.v[1].w[i] = fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i])))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus_acc, "Vxx32+=vmpyhus(Vu32,Vv32)","Vxx32.w+=vmpy(Vu32.h,Vv32.uh)",
+"Vector by Vector Halfword Multiply",
+    VxxV.v[0].w[i] += fMPY16SU(fGETHALF(0, VuV.w[i]), fGETUHALF(0, VvV.uw[i]));
+    VxxV.v[1].w[i] += fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i])))
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyih,"Vd32=vmpyih(Vu32,Vv32)","Vd32.h=vmpyi(Vu32.h,Vv32.h)",
+"Vector by Vector Halfword Multiply",
+    VdV.h[i] = fMPY16SS(VuV.h[i], VvV.h[i]))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyih_acc,"Vx32+=vmpyih(Vu32,Vv32)","Vx32.h+=vmpyi(Vu32.h,Vv32.h)",
+"Vector by Vector Halfword Multiply",
+    VxV.h[i] += fMPY16SS(VuV.h[i], VvV.h[i]))
+
+
+
+/* 32x32 high half / frac */
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyewuh,"Vd32=vmpyewuh(Vu32,Vv32)","Vd32.w=vmpye(Vu32.w,Vv32.uh)",
+"Vector by Vector Halfword Multiply",
+VdV.w[i] = fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) >> 16)
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyowh,"Vd32=vmpyowh(Vu32,Vv32):<<1:sat","Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat",
+"Vector by Vector Halfword Multiply",
+VdV.w[i] = fVSATW((((fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) >> 14) + 0) >> 1)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyowh_rnd,"Vd32=vmpyowh(Vu32,Vv32):<<1:rnd:sat","Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat",
+"Vector by Vector Halfword Multiply",
+VdV.w[i] = fVSATW((((fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) >> 14) + 1) >> 1)))
+
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpyewuh_64,"Vdd32=vmpye(Vu32.w,Vv32.uh)",
+"Word times Halfword Multiply, 64-bit result",
+	fHIDE(size8s_t prod;)
+	prod = fMPY32SU(VuV.w[i],fGETUHALF(0,VvV.w[i]));
+	VddV.v[1].w[i] = prod >> 16;
+	VddV.v[0].w[i] = prod << 16)
+
+ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpyowh_64_acc,"Vxx32+=vmpyo(Vu32.w,Vv32.h)",
+"Word times Halfword Multiply, 64-bit result",
+	fHIDE(size8s_t prod;)
+	prod = fMPY32SS(VuV.w[i],fGETHALF(1,VvV.w[i]))  + fSE32_64(VxxV.v[1].w[i]);
+	VxxV.v[1].w[i] = prod >> 16;
+	fSETHALF(0, VxxV.v[0].w[i], VxxV.v[0].w[i] >> 16);
+	fSETHALF(1, VxxV.v[0].w[i], prod & 0x0000ffff))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyowh_sacc,"Vx32+=vmpyowh(Vu32,Vv32):<<1:sat:shift","Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift",
+"Vector by Vector Halfword Multiply",
+IV1DEAD() VxV.w[i] = fVSATW(((((VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i]))) >> 14) + 0) >> 1)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyowh_rnd_sacc,"Vx32+=vmpyowh(Vu32,Vv32):<<1:rnd:sat:shift","Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift",
+"Vector by Vector Halfword Multiply",
+IV1DEAD() VxV.w[i] = fVSATW(((((VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i]))) >> 14) + 1) >> 1)))
+
+/* For 32x32 integer / low half */
+
+ITERATOR_INSN_MPY_SLOT(32,vmpyieoh,"Vd32.w=vmpyieo(Vu32.h,Vv32.h)","Odd/Even multiply for 32x32 low half",
+	VdV.w[i] = (fGETHALF(0,VuV.w[i])*fGETHALF(1,VvV.w[i])) << 16)
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiewuh,"Vd32=vmpyiewuh(Vu32,Vv32)","Vd32.w=vmpyie(Vu32.w,Vv32.uh)",
+"Vector by Vector Word by Halfword Multiply",
+IV1DEAD()    VdV.w[i] = fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) )
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiowh,"Vd32=vmpyiowh(Vu32,Vv32)","Vd32.w=vmpyio(Vu32.w,Vv32.h)",
+"Vector by Vector Word by Halfword Multiply",
+IV1DEAD()    VdV.w[i] = fMPY3216SS(VuV.w[i], fGETHALF(1, VvV.w[i])) )
+
+/* Add back these... */
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiewh_acc,"Vx32+=vmpyiewh(Vu32,Vv32)","Vx32.w+=vmpyie(Vu32.w,Vv32.h)",
+"Vector by Vector Word by Halfword Multiply",
+VxV.w[i] = VxV.w[i] + fMPY3216SS(VuV.w[i], fGETHALF(0, VvV.w[i])) )
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiewuh_acc,"Vx32+=vmpyiewuh(Vu32,Vv32)","Vx32.w+=vmpyie(Vu32.w,Vv32.uh)",
+"Vector by Vector Word by Halfword Multiply",
+VxV.w[i] = VxV.w[i] + fMPY3216SU(VuV.w[i], fGETUHALF(0, VvV.w[i])) )
+
+
+
+
+
+
+
+/* Vector by Scalar */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyub,"Vdd32=vmpyub(Vu32,Rt32)","Vdd32.uh=vmpy(Vu32.ub,Rt32.ub)",
+"Vector absolute value of words",
+    VddV.v[0].uh[i]  = fMPY8UU(fGETUBYTE(0, VuV.uh[i]), fGETUBYTE((2*i+0)%4, RtV));
+    VddV.v[1].uh[i]  = fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE((2*i+1)%4, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyub_acc,"Vxx32+=vmpyub(Vu32,Rt32)","Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub)",
+"Vector absolute value of words",
+    VxxV.v[0].uh[i] += fMPY8UU(fGETUBYTE(0, VuV.uh[i]), fGETUBYTE((2*i+0)%4, RtV));
+    VxxV.v[1].uh[i] += fMPY8UU(fGETUBYTE(1, VuV.uh[i]), fGETUBYTE((2*i+1)%4, RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybus,"Vdd32=vmpybus(Vu32,Rt32)","Vdd32.h=vmpy(Vu32.ub,Rt32.b)",
+"Vector absolute value of words",
+    VddV.v[0].h[i]  = fMPY8US(fGETUBYTE(0, VuV.uh[i]), fGETBYTE((2*i+0)%4, RtV));
+    VddV.v[1].h[i]  = fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpybus_acc,"Vxx32+=vmpybus(Vu32,Rt32)","Vxx32.h+=vmpy(Vu32.ub,Rt32.b)",
+"Vector absolute value of words",
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(0, VuV.uh[i]), fGETBYTE((2*i+0)%4, RtV));
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuV.uh[i]), fGETBYTE((2*i+1)%4, RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpabus,"Vdd32=vmpabus(Vuu32,Rt32)","Vdd32.h=vmpa(Vuu32.ub,Rt32.b)",
+"Vertical Byte Multiply",
+    VddV.v[0].h[i] = fMPY8US(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETBYTE(0, RtV)) + fMPY16SS(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(1, RtV));
+    VddV.v[1].h[i] = fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(2, RtV)) + fMPY16SS(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETBYTE(3, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpabus_acc,"Vxx32+=vmpabus(Vuu32,Rt32)","Vxx32.h+=vmpa(Vuu32.ub,Rt32.b)",
+"Vertical Byte Multiply",
+    VxxV.v[0].h[i] += fMPY8US(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETBYTE(0, RtV)) + fMPY16SS(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETBYTE(1, RtV));
+    VxxV.v[1].h[i] += fMPY8US(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETBYTE(2, RtV)) + fMPY16SS(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETBYTE(3, RtV)))
+
+// V65
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC_NOV1(16,vmpabuu,"Vdd32=vmpabuu(Vuu32,Rt32)","Vdd32.h=vmpa(Vuu32.ub,Rt32.ub)",
+"Vertical Byte Multiply",
+    VddV.v[0].uh[i] = fMPY8UU(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETUBYTE(0, RtV)) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(1, RtV));
+    VddV.v[1].uh[i] = fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(2, RtV)) + fMPY8UU(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETUBYTE(3, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC_NOV1(16,vmpabuu_acc,"Vxx32+=vmpabuu(Vuu32,Rt32)","Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub)",
+"Vertical Byte Multiply",
+    VxxV.v[0].uh[i] += fMPY8UU(fGETUBYTE(0, VuuV.v[0].uh[i]), fGETUBYTE(0, RtV)) + fMPY8UU(fGETUBYTE(0, VuuV.v[1].uh[i]), fGETUBYTE(1, RtV));
+    VxxV.v[1].uh[i] += fMPY8UU(fGETUBYTE(1, VuuV.v[0].uh[i]), fGETUBYTE(2, RtV)) + fMPY8UU(fGETUBYTE(1, VuuV.v[1].uh[i]), fGETUBYTE(3, RtV)))
+
+
+
+
+/* Half by Byte */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpahb,"Vdd32=vmpahb(Vuu32,Rt32)","Vdd32.w=vmpa(Vuu32.h,Rt32.b)",
+"Vertical Byte Multiply",
+    VddV.v[0].w[i] = fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(1, RtV)));
+    VddV.v[1].w[i] = fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16SS(fGETHALF(1, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(3, RtV))))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpahb_acc,"Vxx32+=vmpahb(Vuu32,Rt32)","Vxx32.w+=vmpa(Vuu32.h,Rt32.b)",
+"Vertical Byte Multiply",
+    VxxV.v[0].w[i] += fMPY16SS(fGETHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16SS(fGETHALF(0, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(1, RtV)));
+    VxxV.v[1].w[i] += fMPY16SS(fGETHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16SS(fGETHALF(1, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(3, RtV))))
+
+/* Half by Byte */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpauhb,"Vdd32=vmpauhb(Vuu32,Rt32)","Vdd32.w=vmpa(Vuu32.uh,Rt32.b)",
+"Vertical Byte Multiply",
+    VddV.v[0].w[i] = fMPY16US(fGETUHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16US(fGETUHALF(0, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(1, RtV)));
+    VddV.v[1].w[i] = fMPY16US(fGETUHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16US(fGETUHALF(1, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(3, RtV))))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpauhb_acc,"Vxx32+=vmpauhb(Vuu32,Rt32)","Vxx32.w+=vmpa(Vuu32.uh,Rt32.b)",
+"Vertical Byte Multiply",
+    VxxV.v[0].w[i] += fMPY16US(fGETUHALF(0, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(0, RtV))) + fMPY16US(fGETUHALF(0, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(1, RtV)));
+    VxxV.v[1].w[i] += fMPY16US(fGETUHALF(1, VuuV.v[0].w[i]), fSE8_16(fGETBYTE(2, RtV))) + fMPY16US(fGETUHALF(1, VuuV.v[1].w[i]), fSE8_16(fGETBYTE(3, RtV))))
+
+
+
+
+
+
+
+/* Half by Half */
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyh,"Vdd32=vmpyh(Vu32,Rt32)","Vdd32.w=vmpy(Vu32.h,Rt32.h)",
+"Vector absolute value of words",
+    VddV.v[0].w[i] =  fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV));
+    VddV.v[1].w[i] =  fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC_NOV1(32,vmpyh_acc,"Vxx32+=vmpyh(Vu32,Rt32)","Vxx32.w+=vmpy(Vu32.h,Rt32.h)",
+"Vector even halfwords with scalar lower halfword multiply with shift and sat32",
+    VxxV.v[0].w[i] =  fCAST8s(VxxV.v[0].w[i]) + fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV));
+    VxxV.v[1].w[i] =  fCAST8s(VxxV.v[1].w[i]) + fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhsat_acc,"Vxx32+=vmpyh(Vu32,Rt32):sat","Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat",
+"Vector even halfwords with scalar lower halfword multiply with shift and sat32",
+    VxxV.v[0].w[i] =  fVSATW(fCAST8s(VxxV.v[0].w[i]) + fMPY16SS(fGETHALF(0, VuV.w[i]), fGETHALF(0, RtV)));
+    VxxV.v[1].w[i] =  fVSATW(fCAST8s(VxxV.v[1].w[i]) + fMPY16SS(fGETHALF(1, VuV.w[i]), fGETHALF(1, RtV))))
+
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhss,"Vd32=vmpyh(Vu32,Rt32):<<1:sat","Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat",
+"Vector halfword by halfword multiply, shift by 1, and take upper 16 msb",
+          fSETHALF(0,VdV.w[i],fVSATH(fGETHALF(1,fVSAT((fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0,RtV))<<1)))));
+          fSETHALF(1,VdV.w[i],fVSATH(fGETHALF(1,fVSAT((fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1,RtV))<<1)))));
+)
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhsrs,"Vd32=vmpyh(Vu32,Rt32):<<1:rnd:sat","Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat",
+"Vector halfword with scalar halfword multiply with round, shift, and sat16",
+       fSETHALF(0,VdV.w[i],fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(fGETHALF(0,VuV.w[i]),fGETHALF(0,RtV))<<1))))));
+       fSETHALF(1,VdV.w[i],fVSATH(fGETHALF(1,fVSAT(fROUND((fMPY16SS(fGETHALF(1,VuV.w[i]),fGETHALF(1,RtV))<<1))))));
+)
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyuh,"Vdd32=vmpyuh(Vu32,Rt32)","Vdd32.uw=vmpy(Vu32.uh,Rt32.uh)",
+"Vector even halfword unsigned multiply by scalar",
+    VddV.v[0].uw[i] = fMPY16UU(fGETUHALF(0, VuV.uw[i]),fGETUHALF(0,RtV));
+    VddV.v[1].uw[i] = fMPY16UU(fGETUHALF(1, VuV.uw[i]),fGETUHALF(1,RtV)))
+
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyuh_acc,"Vxx32+=vmpyuh(Vu32,Rt32)","Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh)",
+"Vector even halfword unsigned multiply by scalar",
+    VxxV.v[0].uw[i] += fMPY16UU(fGETUHALF(0, VuV.uw[i]),fGETUHALF(0,RtV));
+    VxxV.v[1].uw[i] += fMPY16UU(fGETUHALF(1, VuV.uw[i]),fGETUHALF(1,RtV)))
+
+
+
+
+/********************************************
+*  HALF BY BYTE
+********************************************/
+ITERATOR_INSN2_MPY_SLOT(16,vmpyihb,"Vd32=vmpyihb(Vu32,Rt32)","Vd32.h=vmpyi(Vu32.h,Rt32.b)",
+"Vector word by byte multiply, keep lower result",
+VdV.h[i]  = fMPY16SS(VuV.h[i], fGETBYTE(i % 4, RtV) ))
+
+ITERATOR_INSN2_MPY_SLOT(16,vmpyihb_acc,"Vx32+=vmpyihb(Vu32,Rt32)","Vx32.h+=vmpyi(Vu32.h,Rt32.b)",
+"Vector word by byte multiply, keep lower result",
+VxV.h[i] += fMPY16SS(VuV.h[i], fGETBYTE(i % 4, RtV) ))
+
+
+/********************************************
+*  WORD BY BYTE
+********************************************/
+ITERATOR_INSN2_MPY_SLOT(32,vmpyiwb,"Vd32=vmpyiwb(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.b)",
+"Vector word by byte multiply, keep lower result",
+VdV.w[i]  = fMPY32SS(VuV.w[i], fGETBYTE(i % 4, RtV) ))
+
+ITERATOR_INSN2_MPY_SLOT(32,vmpyiwb_acc,"Vx32+=vmpyiwb(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,Rt32.b)",
+"Vector word by byte multiply, keep lower result",
+VxV.w[i] += fMPY32SS(VuV.w[i], fGETBYTE(i % 4, RtV) ))
+
+ITERATOR_INSN2_MPY_SLOT(32,vmpyiwub,"Vd32=vmpyiwub(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.ub)",
+"Vector word by byte multiply, keep lower result",
+VdV.w[i]  = fMPY32SS(VuV.w[i], fGETUBYTE(i % 4, RtV) ))
+
+ITERATOR_INSN2_MPY_SLOT(32,vmpyiwub_acc,"Vx32+=vmpyiwub(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,Rt32.ub)",
+"Vector word by byte multiply, keep lower result",
+VxV.w[i] += fMPY32SS(VuV.w[i], fGETUBYTE(i % 4, RtV) ))
+
+
+/********************************************
+*  WORD BY HALF
+********************************************/
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiwh,"Vd32=vmpyiwh(Vu32,Rt32)","Vd32.w=vmpyi(Vu32.w,Rt32.h)",
+"Vector word by byte multiply, keep lower result",
+VdV.w[i]  = fMPY32SS(VuV.w[i], fGETHALF(i % 2, RtV)))
+
+ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyiwh_acc,"Vx32+=vmpyiwh(Vu32,Rt32)","Vx32.w+=vmpyi(Vu32.w,Rt32.h)",
+"Vector word by byte multiply, keep lower result",
+VxV.w[i] += fMPY32SS(VuV.w[i], fGETHALF(i % 2, RtV)))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**************************************************************************
+ * MMVECTOR LOGICAL OPERATIONS
+ * ************************************************************************/
+ITERATOR_INSN_ANY_SLOT(16,vand,"Vd32=vand(Vu32,Vv32)", "Vector Logical And", VdV.uh[i] = VuV.uh[i] & VvV.h[i])
+ITERATOR_INSN_ANY_SLOT(16,vor, "Vd32=vor(Vu32,Vv32)",  "Vector Logical Or", VdV.uh[i] = VuV.uh[i] | VvV.h[i])
+ITERATOR_INSN_ANY_SLOT(16,vxor,"Vd32=vxor(Vu32,Vv32)", "Vector Logical XOR",    VdV.uh[i] = VuV.uh[i] ^ VvV.h[i])
+ITERATOR_INSN_ANY_SLOT(16,vnot,"Vd32=vnot(Vu32)",     "Vector Logical NOT", VdV.uh[i] = ~VuV.uh[i])
+
+
+
+
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandqrt,
+"Vd32.ub=vand(Qu4.ub,Rt32.ub)", "Vd32=vand(Qu4,Rt32)", "Insert Predicate into Vector",
+    VdV.ub[i] = fGETQBIT(QuV,i) ? fGETUBYTE(i % 4, RtV) : 0)
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandqrt_acc,
+"Vx32.ub|=vand(Qu4.ub,Rt32.ub)", "Vx32|=vand(Qu4,Rt32)",  "Insert Predicate into Vector",
+    VxV.ub[i] |= (fGETQBIT(QuV,i)) ? fGETUBYTE(i % 4, RtV) : 0)
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandnqrt,
+"Vd32.ub=vand(!Qu4.ub,Rt32.ub)", "Vd32=vand(!Qu4,Rt32)", "Insert Predicate into Vector",
+    VdV.ub[i] = !fGETQBIT(QuV,i) ? fGETUBYTE(i % 4, RtV) : 0)
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandnqrt_acc,
+"Vx32.ub|=vand(!Qu4.ub,Rt32.ub)", "Vx32|=vand(!Qu4,Rt32)",  "Insert Predicate into Vector",
+    VxV.ub[i] |= !(fGETQBIT(QuV,i)) ? fGETUBYTE(i % 4, RtV) : 0)
+
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandvrt,
+"Qd4.ub=vand(Vu32.ub,Rt32.ub)", "Qd4=vand(Vu32,Rt32)", "Insert into Predicate",
+    fSETQBIT(QdV,i,((VuV.ub[i] & fGETUBYTE(i % 4, RtV)) != 0) ? 1 : 0))
+
+ITERATOR_INSN2_MPY_SLOT_LATE(8, vandvrt_acc,
+"Qx4.ub|=vand(Vu32.ub,Rt32.ub)", "Qx4|=vand(Vu32,Rt32)", "Insert into Predicate ",
+    fSETQBIT(QxV,i,fGETQBIT(QxV,i)|(((VuV.ub[i] & fGETUBYTE(i % 4, RtV)) != 0) ? 1 : 0)))
+
+ITERATOR_INSN_ANY_SLOT(8,vandvqv,"Vd32=vand(Qv4,Vu32)","Mask off bytes",
+VdV.b[i] = fGETQBIT(QvV,i) ? VuV.b[i] : 0)
+ITERATOR_INSN_ANY_SLOT(8,vandvnqv,"Vd32=vand(!Qv4,Vu32)","Mask off bytes",
+VdV.b[i] = !fGETQBIT(QvV,i) ? VuV.b[i] : 0)
+
+
+ /***************************************************
+ * Compare Vector with Vector
+ ***************************************************/
+#define VCMP(DEST, ASRC, ASRCOP, CMP, N, SRC, MASK, WIDTH)        \
+{ \
+       for(fHIDE(int) i = 0; i < fVBYTES(); i += WIDTH) { \
+		fSETQBITS(DEST,WIDTH,MASK,i,ASRC ASRCOP ((VuV.SRC[i/WIDTH] CMP VvV.SRC[i/WIDTH]) ? MASK : 0)); \
+    } \
+       }
+
+
+#define MMVEC_CMPGT(TYPE,TYPE2,TYPE3,DESCR,N,MASK,WIDTH,SRC) \
+EXTINSN(V6_vgt##TYPE,       "Qd4=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" greater than", \
+	VCMP(QdV, , , >, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_vgt##TYPE##_and, "Qx4&=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" greater than with predicate-and", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), &, >, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_vgt##TYPE##_or,  "Qx4|=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" greater than with predicate-or", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), |, >, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_vgt##TYPE##_xor, "Qx4^=vcmp.gt(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" greater than with predicate-xor", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), ^, >, N, SRC, MASK, WIDTH))
+
+#define MMVEC_CMP(TYPE,TYPE2,TYPE3,DESCR,N,MASK, WIDTH, SRC)\
+MMVEC_CMPGT(TYPE,TYPE2,TYPE3,DESCR,N,MASK,WIDTH,SRC) \
+EXTINSN(V6_veq##TYPE,       "Qd4=vcmp.eq(Vu32." TYPE2 ",Vv32." TYPE2 ")",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" equal to", \
+	VCMP(QdV, , , ==, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_veq##TYPE##_and, "Qx4&=vcmp.eq(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" equalto with predicate-and", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), &, ==, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_veq##TYPE##_or,  "Qx4|=vcmp.eq(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" equalto with predicate-or", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), |, ==, N, SRC, MASK, WIDTH)) \
+EXTINSN(V6_veq##TYPE##_xor, "Qx4^=vcmp.eq(Vu32." TYPE2 ",Vv32." TYPE2 ")", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA), DESCR" equalto with predicate-xor", \
+	VCMP(QxV, fGETQBITS(QxV,WIDTH,MASK,i), ^, ==, N, SRC, MASK, WIDTH))
+
+
+MMVEC_CMP(w,"w","","Vector Word Compare ", fVELEM(32), 0xF, 4, w)
+MMVEC_CMP(h,"h","","Vector Half Compare ", fVELEM(16), 0x3, 2, h)
+MMVEC_CMP(b,"b","","Vector Half Compare ", fVELEM(8),  0x1, 1, b)
+MMVEC_CMPGT(uw,"uw","","Vector Unsigned Half Compare ", fVELEM(32), 0xF, 4,uw)
+MMVEC_CMPGT(uh,"uh","","Vector Unsigned Half Compare ", fVELEM(16), 0x3, 2,uh)
+MMVEC_CMPGT(ub,"ub","","Vector Unsigned Byte Compare ", fVELEM(8),  0x1, 1,ub)
+
+/***************************************************
+* Predicate Operations
+***************************************************/
+
+EXTINSN(V6_pred_scalar2, "Qd4=vsetq(Rt32)",         ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),   "Set Vector Predicate ",
+{
+    fHIDE(int i;)
+    for(i = 0; i < fVBYTES(); i++) fSETQBIT(QdV,i,(i < (RtV & (fVBYTES()-1))) ? 1 : 0);
+})
+
+EXTINSN(V6_pred_scalar2v2, "Qd4=vsetq2(Rt32)",         ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),   "Set Vector Predicate ",
+{
+    fHIDE(int i;)
+    for(i = 0; i < fVBYTES(); i++) fSETQBIT(QdV,i,(i <= ((RtV-1) & (fVBYTES()-1))) ? 1 : 0);
+})
+
+
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, shuffeqw, "Qd4.h=vshuffe(Qs4.w,Qt4.w)","Shrink Predicate", fSETQBIT(QdV,i, (i & 2) ? fGETQBIT(QsV,i-2) : fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, shuffeqh, "Qd4.b=vshuffe(Qs4.h,Qt4.h)","Shrink Predicate", fSETQBIT(QdV,i, (i & 1) ? fGETQBIT(QsV,i-1) : fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, pred_or, "Qd4=or(Qs4,Qt4)","Vector Predicate Or", fSETQBIT(QdV,i,fGETQBIT(QsV,i) || fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, pred_and, "Qd4=and(Qs4,Qt4)","Vector Predicate And", fSETQBIT(QdV,i,fGETQBIT(QsV,i) && fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, pred_xor, "Qd4=xor(Qs4,Qt4)","Vector Predicate Xor", fSETQBIT(QdV,i,fGETQBIT(QsV,i) ^ fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, pred_or_n, "Qd4=or(Qs4,!Qt4)","Vector Predicate Or with not", fSETQBIT(QdV,i,fGETQBIT(QsV,i) || !fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8, pred_and_n, "Qd4=and(Qs4,!Qt4)","Vector Predicate And  with not", fSETQBIT(QdV,i,fGETQBIT(QsV,i) && !fGETQBIT(QtV,i) ) )
+ITERATOR_INSN_ANY_SLOT(8, pred_not, "Qd4=not(Qs4)","Vector Predicate Not", fSETQBIT(QdV,i,!fGETQBIT(QsV,i) ) )
+
+
+
+EXTINSN(V6_vcmov,  "if (Ps4) Vd32=Vu32",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA),   "Conditional Mov",
+{
+if (fLSBOLD(PsV))	{
+	fHIDE(int i;)
+	fVFOREACH(8, i) {
+		VdV.ub[i] = VuV.ub[i];
+	}
+	} else {CANCEL;}
+})
+
+EXTINSN(V6_vncmov,  "if (!Ps4) Vd32=Vu32",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA),   "Conditional Mov",
+{
+if (fLSBOLDNOT(PsV))	{
+	fHIDE(int i;)
+	fVFOREACH(8, i) {
+		VdV.ub[i] = VuV.ub[i];
+	}
+	} else {CANCEL;}
+})
+
+EXTINSN(V6_vccombine,  "if (Ps4) Vdd32=vcombine(Vu32,Vv32)",	ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA_DV),   "Conditional Combine",
+{
+if (fLSBOLD(PsV))	{
+	fHIDE(int i;)
+	fVFOREACH(8, i) {
+		VddV.v[0].ub[i] = VvV.ub[i];
+		VddV.v[1].ub[i] = VuV.ub[i];
+	}
+	} else {CANCEL;}
+})
+
+EXTINSN(V6_vnccombine,  "if (!Ps4) Vdd32=vcombine(Vu32,Vv32)",	ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA_DV),   "Conditional Combine",
+{
+if (fLSBOLDNOT(PsV))	{
+	fHIDE(int i;)
+	fVFOREACH(8, i) {
+		VddV.v[0].ub[i] = VvV.ub[i];
+		VddV.v[1].ub[i] = VuV.ub[i];
+	}
+	} else {CANCEL;}
+})
+
+
+
+ITERATOR_INSN_ANY_SLOT(8,vmux,"Vd32=vmux(Qt4,Vu32,Vv32)",
+"Vector Select Element 8-bit",
+    VdV.ub[i] = fGETQBIT(QtV,i) ? VuV.ub[i] : VvV.ub[i])
+
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8,vswap,"Vdd32=vswap(Qt4,Vu32,Vv32)",
+"Vector Swap Element 8-bit",
+    VddV.v[0].ub[i] =  fGETQBIT(QtV,i) ? VuV.ub[i] : VvV.ub[i];
+	VddV.v[1].ub[i] = !fGETQBIT(QtV,i) ? VuV.ub[i] : VvV.ub[i])
+
+
+/***************************************************************************
+*
+*   MMVECTOR SORTING
+*
+****************************************************************************/
+
+#define MMVEC_SORT(TYPE,TYPE2,DESCR,ELEMENTSIZE,SRC)\
+ITERATOR_INSN2_ANY_SLOT(ELEMENTSIZE,vmax##TYPE, "Vd32=vmax" TYPE2 "(Vu32,Vv32)", "Vd32."#SRC"=vmax(Vu32."#SRC",Vv32."#SRC")", "Vector " DESCR " max", VdV.SRC[i] = (VuV.SRC[i] > VvV.SRC[i]) ? VuV.SRC[i] :  VvV.SRC[i])  \
+ITERATOR_INSN2_ANY_SLOT(ELEMENTSIZE,vmin##TYPE, "Vd32=vmin" TYPE2 "(Vu32,Vv32)", "Vd32."#SRC"=vmin(Vu32."#SRC",Vv32."#SRC")", "Vector " DESCR " min", VdV.SRC[i] = (VuV.SRC[i] < VvV.SRC[i]) ? VuV.SRC[i] :  VvV.SRC[i])
+
+MMVEC_SORT(b,"b", "signed byte",    8,  b)
+MMVEC_SORT(ub,"ub", "unsigned byte",    8,  ub)
+MMVEC_SORT(uh,"uh", "unsigned halfword",16, uh)
+MMVEC_SORT(h,   "h",    "halfword",         16, h)
+MMVEC_SORT(w,   "w",    "word",             32, w)
+
+
+
+
+
+
+
+
+
+/*************************************************************
+* SHUFFLES
+****************************************************************/
+
+ITERATOR_INSN2_ANY_SLOT(16,vsathub,"Vd32=vsathub(Vu32,Vv32)","Vd32.ub=vsat(Vu32.h,Vv32.h)",
+"Saturate and pack 32 halfwords to 32 unsigned bytes, and interleave them",
+    fSETBYTE(0, VdV.uh[i], fVSATUB(VvV.h[i]));
+    fSETBYTE(1, VdV.uh[i], fVSATUB(VuV.h[i])))
+
+ITERATOR_INSN2_ANY_SLOT(32,vsatwh,"Vd32=vsatwh(Vu32,Vv32)","Vd32.h=vsat(Vu32.w,Vv32.w)",
+"Saturate and pack 16 words to 16 halfwords, and interleave them",
+    fSETHALF(0, VdV.w[i], fVSATH(VvV.w[i]));
+    fSETHALF(1, VdV.w[i], fVSATH(VuV.w[i])))
+
+ITERATOR_INSN2_ANY_SLOT(32,vsatuwuh,"Vd32=vsatuwuh(Vu32,Vv32)","Vd32.uh=vsat(Vu32.uw,Vv32.uw)",
+"Saturate and pack 16 words to 16 halfwords, and interleave them",
+    fSETHALF(0, VdV.w[i], fVSATUH(VvV.uw[i]));
+    fSETHALF(1, VdV.w[i], fVSATUH(VuV.uw[i])))
+
+ITERATOR_INSN2_ANY_SLOT(16,vshuffeb,"Vd32=vshuffeb(Vu32,Vv32)","Vd32.b=vshuffe(Vu32.b,Vv32.b)",
+"Shuffle half words with in a lane",
+    fSETBYTE(0, VdV.uh[i], fGETUBYTE(0, VvV.uh[i]));
+    fSETBYTE(1, VdV.uh[i], fGETUBYTE(0, VuV.uh[i])))
+
+ITERATOR_INSN2_ANY_SLOT(16,vshuffob,"Vd32=vshuffob(Vu32,Vv32)","Vd32.b=vshuffo(Vu32.b,Vv32.b)",
+"Shuffle half words with in a lane",
+    fSETBYTE(0, VdV.uh[i], fGETUBYTE(1, VvV.uh[i]));
+    fSETBYTE(1, VdV.uh[i], fGETUBYTE(1, VuV.uh[i])))
+
+ITERATOR_INSN2_ANY_SLOT(32,vshufeh,"Vd32=vshuffeh(Vu32,Vv32)","Vd32.h=vshuffe(Vu32.h,Vv32.h)",
+"Shuffle half words with in a lane",
+    fSETHALF(0, VdV.uw[i], fGETUHALF(0, VvV.uw[i]));
+    fSETHALF(1, VdV.uw[i], fGETUHALF(0, VuV.uw[i])))
+
+ITERATOR_INSN2_ANY_SLOT(32,vshufoh,"Vd32=vshuffoh(Vu32,Vv32)","Vd32.h=vshuffo(Vu32.h,Vv32.h)",
+"Shuffle half words with in a lane",
+    fSETHALF(0, VdV.uw[i], fGETUHALF(1, VvV.uw[i]));
+    fSETHALF(1, VdV.uw[i], fGETUHALF(1, VuV.uw[i])))
+
+
+
+
+/**************************************************************************
+* Double Vector Shuffles
+**************************************************************************/
+
+EXTINSN(V6_vshuff, "vshuff(Vy32,Vx32,Rt32)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),
+"2x2->2x2 transpose, for multiple data sizes, inplace",
+{
+	fHIDE(int offset;)
+	for (offset=1; offset<fVBYTES(); offset<<=1) {
+		if ( RtV & offset) {
+			    fHIDE(int k;) \
+				fVFOREACH(8, k) {\
+				if (!( k & offset)) {
+					fSWAPB(VyV.ub[k], VxV.ub[k+offset]);
+				}
+			}
+		}
+	}
+	})
+
+EXTINSN(V6_vshuffvdd, "Vdd32=vshuff(Vu32,Vv32,Rt8)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),
+"2x2->2x2 transpose for multiple data sizes",
+{
+	fHIDE(int offset;)
+	VddV.v[0] = VvV;
+	VddV.v[1] = VuV;
+	for (offset=1; offset<fVBYTES(); offset<<=1) {
+		if ( RtV & offset) {
+			    fHIDE(int k;) \
+				fVFOREACH(8, k) {\
+				if (!( k & offset)) {
+					fSWAPB(VddV.v[1].ub[k], VddV.v[0].ub[k+offset]);
+				}
+			}
+		}
+	}
+	})
+
+EXTINSN(V6_vdeal, "vdeal(Vy32,Vx32,Rt32)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),
+" vector - vector deal - or deinterleave, for multiple data sizes, inplace",
+{
+	fHIDE(int offset;)
+	for (offset=fVBYTES()>>1; offset>0; offset>>=1) {
+		if ( RtV & offset) {
+			    fHIDE(int k;) \
+				fVFOREACH(8, k) {\
+				if (!( k & offset)) {
+					fSWAPB(VyV.ub[k], VxV.ub[k+offset]);
+				}
+			}
+		}
+	}
+	})
+
+EXTINSN(V6_vdealvdd, "Vdd32=vdeal(Vu32,Vv32,Rt8)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP_VS),
+" vector - vector deal - or deinterleave, for multiple data sizes",
+{
+	fHIDE(int offset;)
+	VddV.v[0] = VvV;
+	VddV.v[1] = VuV;
+	for (offset=fVBYTES()>>1; offset>0; offset>>=1) {
+		if ( RtV & offset) {
+			    fHIDE(int k;) \
+				fVFOREACH(8, k) {\
+				if (!( k & offset)) {
+					fSWAPB(VddV.v[1].ub[k], VddV.v[0].ub[k+offset]);
+				}
+			}
+		}
+	}
+	})
+
+/**************************************************************************/
+
+
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(32,vshufoeh,"Vdd32=vshuffoeh(Vu32,Vv32)","Vdd32.h=vshuffoe(Vu32.h,Vv32.h)",
+"Vector Shuffle half words",
+    fSETHALF(0, VddV.v[0].uw[i], fGETUHALF(0, VvV.uw[i]));
+    fSETHALF(1, VddV.v[0].uw[i], fGETUHALF(0, VuV.uw[i]));
+    fSETHALF(0, VddV.v[1].uw[i], fGETUHALF(1, VvV.uw[i]));
+    fSETHALF(1, VddV.v[1].uw[i], fGETUHALF(1, VuV.uw[i])))
+
+ITERATOR_INSN2_ANY_SLOT_DOUBLE_VEC(16,vshufoeb,"Vdd32=vshuffoeb(Vu32,Vv32)","Vdd32.b=vshuffoe(Vu32.b,Vv32.b)",
+"Vector Shuffle bytes",
+    fSETBYTE(0, VddV.v[0].uh[i], fGETUBYTE(0, VvV.uh[i]));
+    fSETBYTE(1, VddV.v[0].uh[i], fGETUBYTE(0, VuV.uh[i]));
+    fSETBYTE(0, VddV.v[1].uh[i], fGETUBYTE(1, VvV.uh[i]));
+    fSETBYTE(1, VddV.v[1].uh[i], fGETUBYTE(1, VuV.uh[i])))
+
+
+/***************************************************************
+* Deal
+***************************************************************/
+
+ITERATOR_INSN2_PERMUTE_SLOT(32, vdealh, "Vd32=vdealh(Vu32)", "Vd32.h=vdeal(Vu32.h)",
+"Deal Halfwords",
+    VdV.uh[i  ] = fGETUHALF(0, VuV.uw[i]);
+    VdV.uh[i+fVELEM(32)] = fGETUHALF(1, VuV.uw[i]))
+
+ITERATOR_INSN2_PERMUTE_SLOT(16, vdealb, "Vd32=vdealb(Vu32)", "Vd32.b=vdeal(Vu32.b)",
+"Deal Halfwords",
+    VdV.ub[i   ] = fGETUBYTE(0, VuV.uh[i]);
+    VdV.ub[i+fVELEM(16)] = fGETUBYTE(1, VuV.uh[i]))
+
+ITERATOR_INSN2_PERMUTE_SLOT(32, vdealb4w,  "Vd32=vdealb4w(Vu32,Vv32)", "Vd32.b=vdeale(Vu32.b,Vv32.b)",
+"Deal Two Vectors Bytes",
+    VdV.ub[0+i ] = fGETUBYTE(0, VvV.uw[i]);
+    VdV.ub[fVELEM(32)+i ] = fGETUBYTE(2, VvV.uw[i]);
+    VdV.ub[2*fVELEM(32)+i] = fGETUBYTE(0, VuV.uw[i]);
+    VdV.ub[3*fVELEM(32)+i] = fGETUBYTE(2, VuV.uw[i]))
+
+/***************************************************************
+* shuffle
+***************************************************************/
+
+ITERATOR_INSN2_PERMUTE_SLOT(32, vshuffh, "Vd32=vshuffh(Vu32)", "Vd32.h=vshuff(Vu32.h)",
+"Deal Halfwords",
+    fSETHALF(0, VdV.uw[i], VuV.uh[i]);
+    fSETHALF(1, VdV.uw[i], VuV.uh[i+fVELEM(32)]))
+
+ITERATOR_INSN2_PERMUTE_SLOT(16, vshuffb, "Vd32=vshuffb(Vu32)", "Vd32.b=vshuff(Vu32.b)",
+"Deal Halfwords",
+    fSETBYTE(0, VdV.uh[i], VuV.ub[i]);
+    fSETBYTE(1, VdV.uh[i], VuV.ub[i+fVELEM(16)]))
+
+
+
+
+
+/***********************************************************
+* INSERT AND EXTRACT
+*********************************************************/
+EXTINSN(V6_extractw, "Rd32=vextract(Vu32,Rs32)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VA,A_MEMLIKE,A_RESTRICT_SLOT0ONLY),
+"Extract an element from a vector to scalar",
+fHIDE(warn("RdN=%d VuN=%d RsN=%d RsV=0x%08x widx=%d",RdN,VuN,RsN,RsV,((RsV & (fVBYTES()-1)) >> 2));)
+RdV = VuV.uw[ (RsV & (fVBYTES()-1)) >> 2];
+fHIDE(warn("RdV=0x%08x",RdV);))
+
+EXTINSN(V6_vinsertwr, "Vx32.w=vinsert(Rt32)",
+ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VX),
+"Insert Word Scalar into Vector",
+VxV.uw[0] = RtV;)
+
+
+
+
+ITERATOR_INSN_MPY_SLOT_LATE(32,lvsplatw, "Vd32=vsplat(Rt32)", "Replicates scalar accross words in vector", VdV.uw[i] = RtV)
+
+ITERATOR_INSN_MPY_SLOT_LATE(16,lvsplath, "Vd32.h=vsplat(Rt32)", "Replicates scalar accross halves in vector", VdV.uh[i] = RtV)
+
+ITERATOR_INSN_MPY_SLOT_LATE(8,lvsplatb, "Vd32.b=vsplat(Rt32)", "Replicates scalar accross bytes in vector", VdV.ub[i] = RtV)
+
+
+ITERATOR_INSN_ANY_SLOT(32,vassign,"Vd32=Vu32","Copy a vector",VdV.w[i]=VuV.w[i])
+
+
+ITERATOR_INSN_ANY_SLOT_DOUBLE_VEC(8,vcombine,"Vdd32=vcombine(Vu32,Vv32)",
+"Vector assign, Any two to Vector Pair",
+    VddV.v[0].ub[i] = VvV.ub[i];
+    VddV.v[1].ub[i] = VuV.ub[i])
+
+
+
+///////////////////////////////////////////////////////////////////////////
+
+
+/*********************************************************
+* GENERAL PERMUTE NETWORKS
+*********************************************************/
+
+
+EXTINSN(V6_vdelta, "Vd32=vdelta(Vu32,Vv32)",    ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),
+"Reverse Benes Butterfly network ",
+{
+    fHIDE(int offset;)
+    fHIDE(int k;)
+    fHIDE(mmvector_t tmp;)
+    tmp = VuV;
+    for (offset=fVBYTES(); (offset>>=1)>0; ) {
+        for (k = 0; k<fVBYTES(); k++) {
+            VdV.ub[k] = (VvV.ub[k]&offset) ? tmp.ub[k^offset] : tmp.ub[k];
+        }
+        for (k = 0; k<fVBYTES(); k++) {
+            tmp.ub[k] = VdV.ub[k];
+        }
+    }
+})
+
+
+EXTINSN(V6_vrdelta, "Vd32=vrdelta(Vu32,Vv32)",  ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VP),
+"Forward Benes Butterfly network ",
+{
+	fHIDE(int offset;)
+    fHIDE(int k;)
+    fHIDE(mmvector_t tmp;)
+    tmp = VuV;
+    for (offset=1; offset<fVBYTES(); offset<<=1){
+        for (k = 0; k<fVBYTES(); k++) {
+            VdV.ub[k] = (VvV.ub[k]&offset) ? tmp.ub[k^offset] : tmp.ub[k];
+        }
+        for (k = 0; k<fVBYTES(); k++) {
+            tmp.ub[k] = VdV.ub[k];
+        }
+    }
+})
+
+
+
+
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vcl0w,"Vd32=vcl0w(Vu32)","Vd32.uw=vcl0(Vu32.uw)",         "Count Leading Zeros in Word",     VdV.uw[i]=fCL1_4(~VuV.uw[i]))
+ITERATOR_INSN2_SHIFT_SLOT(16,vcl0h,"Vd32=vcl0h(Vu32)","Vd32.uh=vcl0(Vu32.uh)",         "Count Leading Zeros in Word",    VdV.uh[i]=fCL1_2(~VuV.uh[i]))
+
+ITERATOR_INSN2_SHIFT_SLOT(32,vnormamtw,"Vd32=vnormamtw(Vu32)","Vd32.w=vnormamt(Vu32.w)","Norm Amount Word",
+VdV.w[i]=fMAX(fCL1_4(~VuV.w[i]),fCL1_4(VuV.w[i]))-1; fHIDE(IV1DEAD();))
+ITERATOR_INSN2_SHIFT_SLOT(16,vnormamth,"Vd32=vnormamth(Vu32)","Vd32.h=vnormamt(Vu32.h)","Norm Amount Halfword",
+VdV.h[i]=fMAX(fCL1_2(~VuV.h[i]),fCL1_2(VuV.h[i]))-1; fHIDE(IV1DEAD();))
+
+ITERATOR_INSN_SHIFT_SLOT_VV_LATE(32,vaddclbw,"Vd32.w=vadd(vclb(Vu32.w),Vv32.w)",
+"Count leading bits and add",
+VdV.w[i] = fMAX(fCL1_4(~VuV.w[i]),fCL1_4(VuV.w[i])) + VvV.w[i])
+
+ITERATOR_INSN_SHIFT_SLOT_VV_LATE(16,vaddclbh,"Vd32.h=vadd(vclb(Vu32.h),Vv32.h)",
+"Count leading bits and add",
+VdV.h[i] = fMAX(fCL1_2(~VuV.h[i]),fCL1_2(VuV.h[i])) + VvV.h[i])
+
+
+ITERATOR_INSN2_SHIFT_SLOT(16,vpopcounth,"Vd32=vpopcounth(Vu32)","Vd32.h=vpopcount(Vu32.h)",   "Count Leading Zeros in Word",  VdV.uh[i]=fCOUNTONES_2(VuV.uh[i]))
+
+
+#define fHIST(INPUTVEC) \
+	fUARCH_NOTE_PUMP_4X(); \
+	fHIDE(int lane;) \
+	fHIDE(mmvector_t tmp;) \
+	fVFOREACH(128, lane) { \
+		for (fHIDE(int )i=0; i<128/8; ++i) { \
+			unsigned char value = INPUTVEC.ub[(128/8)*lane+i]; \
+			unsigned char regno = value>>3; \
+			unsigned char element = value & 7; \
+			READ_EXT_VREG(regno,tmp,0); \
+			tmp.uh[(128/16)*lane+(element)]++; \
+			WRITE_EXT_VREG(regno,tmp,EXT_NEW); \
+		} \
+	}
+
+#define fHISTQ(INPUTVEC,QVAL) \
+	fUARCH_NOTE_PUMP_4X(); \
+	fHIDE(int lane;) \
+	fHIDE(mmvector_t tmp;) \
+	fVFOREACH(128, lane) { \
+		for (fHIDE(int )i=0; i<128/8; ++i) { \
+			unsigned char value = INPUTVEC.ub[(128/8)*lane+i]; \
+			unsigned char regno = value>>3; \
+			unsigned char element = value & 7; \
+			READ_EXT_VREG(regno,tmp,0); \
+			if (fGETQBIT(QVAL,128/8*lane+i)) tmp.uh[(128/16)*lane+(element)]++; \
+			WRITE_EXT_VREG(regno,tmp,EXT_NEW); \
+		} \
+	}
+
+
+
+EXTINSN(V6_vhist, "vhist",ATTRIBS(A_EXTENSION,A_CVI,A_CVI_4SLOT), "vhist instruction",{ fHIDE(mmvector_t inputVec;) inputVec=fTMPVDATA(); fHIST(inputVec); })
+EXTINSN(V6_vhistq, "vhist(Qv4)",ATTRIBS(A_EXTENSION,A_CVI,A_CVI_4SLOT), "vhist instruction",{ fHIDE(mmvector_t inputVec;) inputVec=fTMPVDATA(); fHISTQ(inputVec,QvV); })
+
+#undef fHIST
+#undef fHISTQ
+
+
+/* **** WEIGHTED HISTOGRAM **** */
+
+
+#if 1
+#define WHIST(EL,MASK,BSHIFT,COND,SATF) \
+	fHIDE(unsigned int) bucket = fGETUBYTE(0,input.h[i]); \
+	fHIDE(unsigned int) weight = fGETUBYTE(1,input.h[i]); \
+	fHIDE(unsigned int) vindex = (bucket >> 3) & 0x1F; \
+	fHIDE(unsigned int) elindex = ((i>>BSHIFT) & (~MASK)) | ((bucket>>BSHIFT) & MASK); \
+	fHIDE(mmvector_t tmp;) \
+	READ_EXT_VREG(vindex,tmp,0); \
+	COND tmp.EL[elindex] = SATF(tmp.EL[elindex] + weight); \
+	WRITE_EXT_VREG(vindex,tmp,EXT_NEW); \
+	fUARCH_NOTE_PUMP_2X();
+
+ITERATOR_INSN_VHISTLIKE(16,vwhist256,"vwhist256","vector weighted histogram halfword counters", WHIST(uh,7,0,,))
+ITERATOR_INSN_VHISTLIKE(16,vwhist256q,"vwhist256(Qv4)","vector weighted histogram halfword counters", WHIST(uh,7,0,if (fGETQBIT(QvV,2*i)),))
+ITERATOR_INSN_VHISTLIKE(16,vwhist256_sat,"vwhist256:sat","vector weighted histogram halfword counters", WHIST(uh,7,0,,fVSATUH))
+ITERATOR_INSN_VHISTLIKE(16,vwhist256q_sat,"vwhist256(Qv4):sat","vector weighted histogram halfword counters", WHIST(uh,7,0,if (fGETQBIT(QvV,2*i)),fVSATUH))
+ITERATOR_INSN_VHISTLIKE(16,vwhist128,"vwhist128","vector weighted histogram word counters", WHIST(uw,3,1,,))
+ITERATOR_INSN_VHISTLIKE(16,vwhist128q,"vwhist128(Qv4)","vector weighted histogram word counters", WHIST(uw,3,1,if (fGETQBIT(QvV,2*i)),))
+ITERATOR_INSN_VHISTLIKE(16,vwhist128m,"vwhist128(#u1)","vector weighted histogram word counters", WHIST(uw,3,1,if ((bucket & 1) == uiV),))
+ITERATOR_INSN_VHISTLIKE(16,vwhist128qm,"vwhist128(Qv4,#u1)","vector weighted histogram word counters", WHIST(uw,3,1,if (((bucket & 1) == uiV) && fGETQBIT(QvV,2*i)),))
+
+
+#endif
+
+
+
+/* ******   lookup table instructions                          ***********  */
+
+/* Use low bits from idx to choose next-bigger elements from vector, then use LSB from idx to choose odd or even element */
+
+ITERATOR_INSN_PERMUTE_SLOT(8,vlutvvb,"Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = RtV & 0x7;
+oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = VuV.ub[i];
+VdV.b[i] = ((idx & 0xE0) == (matchval << 5)) ? fGETBYTE(oddhalf,VvV.h[idx % fVELEM(16)]) : 0)
+
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(8,vlutvvb_oracc,"Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = RtV & 0x7;
+oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = VuV.ub[i];
+VxV.b[i] |= ((idx & 0xE0) == (matchval << 5)) ? fGETBYTE(oddhalf,VvV.h[idx % fVELEM(16)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(16,vlutvwh,"Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = RtV & 0xF;
+oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = fGETUBYTE(0,VuV.uh[i]);
+VddV.v[0].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
+idx = fGETUBYTE(1,VuV.uh[i]);
+VddV.v[1].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(16,vlutvwh_oracc,"Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = fGETUBYTE(0,RtV) & 0xF;
+oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = fGETUBYTE(0,VuV.uh[i]);
+VxxV.v[0].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
+idx = fGETUBYTE(1,VuV.uh[i]);
+VxxV.v[1].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT(8,vlutvvbi,"Vd32.b=vlut32(Vu32.b,Vv32.b,#u3)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = uiV & 0x7;
+oddhalf = (uiV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = VuV.ub[i];
+VdV.b[i] = ((idx & 0xE0) == (matchval << 5)) ? fGETBYTE(oddhalf,VvV.h[idx % fVELEM(16)]) : 0)
+
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(8,vlutvvb_oracci,"Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = uiV & 0x7;
+oddhalf = (uiV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = VuV.ub[i];
+VxV.b[i] |= ((idx & 0xE0) == (matchval << 5)) ? fGETBYTE(oddhalf,VvV.h[idx % fVELEM(16)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(16,vlutvwhi,"Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = uiV & 0xF;
+oddhalf = (uiV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = fGETUBYTE(0,VuV.uh[i]);
+VddV.v[0].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
+idx = fGETUBYTE(1,VuV.uh[i]);
+VddV.v[1].h[i] = ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(16,vlutvwh_oracci,"Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3)","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int matchval;) fHIDE(int oddhalf;)
+matchval = uiV & 0xF;
+oddhalf = (uiV >> (fVECLOGSIZE()-6)) & 0x1;
+idx = fGETUBYTE(0,VuV.uh[i]);
+VxxV.v[0].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0;
+idx = fGETUBYTE(1,VuV.uh[i]);
+VxxV.v[1].h[i] |= ((idx & 0xF0) == (matchval << 4)) ? fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]) : 0)
+
+ITERATOR_INSN_PERMUTE_SLOT(8,vlutvvb_nm,"Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int oddhalf;) fHIDE(int matchval;)
+    matchval = RtV & 0x7;
+    oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+    idx = VuV.ub[i];
+    idx = (idx&0x1F) | (matchval<<5);
+    VdV.b[i] = fGETBYTE(oddhalf,VvV.h[idx % fVELEM(16)]))
+
+ITERATOR_INSN_PERMUTE_SLOT_DOUBLE_VEC(16,vlutvwh_nm,"Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch","vector-vector table lookup",
+fHIDE(unsigned int idx;) fHIDE(int oddhalf;) fHIDE(int matchval;)
+    matchval = RtV & 0xF;
+    oddhalf = (RtV >> (fVECLOGSIZE()-6)) & 0x1;
+    idx = fGETUBYTE(0,VuV.uh[i]);
+    idx = (idx&0x0F) | (matchval<<4);
+    VddV.v[0].h[i] = fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]);
+    idx = fGETUBYTE(1,VuV.uh[i]);
+    idx = (idx&0x0F) | (matchval<<4);
+    VddV.v[1].h[i] = fGETHALF(oddhalf,VvV.w[idx % fVELEM(32)]))
+
+
+
+
+/******************************************************************************
+NON LINEAR - V65
+ ******************************************************************************/
+
+ITERATOR_INSN_SLOT2_DOUBLE_VEC(16,vmpahhsat,"Vx32.h=vmpa(Vx32.h,Vu32.h,Rtt32.h):sat","piecewise linear approximation",
+    VxV.h[i]= fVSATH( ( ( fMPY16SS(VxV.h[i],VuV.h[i])<<1) + (fGETHALF(( (VuV.h[i]>>14)&0x3), RttV )<<15))>>16))
+
+
+ITERATOR_INSN_SLOT2_DOUBLE_VEC(16,vmpauhuhsat,"Vx32.h=vmpa(Vx32.h,Vu32.uh,Rtt32.uh):sat","piecewise linear approximation",
+    VxV.h[i]= fVSATH( (  fMPY16SU(VxV.h[i],VuV.uh[i]) + (fGETUHALF(((VuV.uh[i]>>14)&0x3), RttV )<<15))>>16))
+
+ITERATOR_INSN_SLOT2_DOUBLE_VEC(16,vmpsuhuhsat,"Vx32.h=vmps(Vx32.h,Vu32.uh,Rtt32.uh):sat","piecewise linear approximation",
+    VxV.h[i]= fVSATH( (  fMPY16SU(VxV.h[i],VuV.uh[i]) - (fGETUHALF(((VuV.uh[i]>>14)&0x3), RttV )<<15))>>16))
+
+
+ITERATOR_INSN_SLOT2_DOUBLE_VEC(16,vlut4,"Vd32.h=vlut4(Vu32.uh,Rtt32.h)","4 entry lookup table",
+    VdV.h[i]= fGETHALF(  ((VuV.h[i]>>14)&0x3), RttV ))
+
+
+
+/******************************************************************************
+V65
+ ******************************************************************************/
+
+ITERATOR_INSN_MPY_SLOT_NOV1(32,vmpyuhe,"Vd32.uw=vmpye(Vu32.uh,Rt32.uh)",
+"Vector even halfword unsigned multiply by scalar",
+    VdV.uw[i] = fMPY16UU(fGETUHALF(0, VuV.uw[i]),fGETUHALF(0,RtV)))
+
+
+ITERATOR_INSN_MPY_SLOT_NOV1(32,vmpyuhe_acc,"Vx32.uw+=vmpye(Vu32.uh,Rt32.uh)",
+"Vector even halfword unsigned multiply by scalar",
+    VxV.uw[i] += fMPY16UU(fGETUHALF(0, VuV.uw[i]),fGETUHALF(0,RtV)))
+
+
+
+
+EXTINSN(V6_vgathermw,  "vtmp.w=vgather(Rt32,Mu2,Vv32.w).w", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather Words",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 4;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        EA = RtV+VvV.uw[i];
+        fVLOG_VTCM_GATHER_WORD(EA, VvV.uw[i], i,MuV);
+    }
+    fGATHER_FINISH()
+})
+EXTINSN(V6_vgathermh,  "vtmp.h=vgather(Rt32,Mu2,Vv32.h).h", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather halfwords",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(16, i) {
+        EA = RtV+VvV.uh[i];
+        fVLOG_VTCM_GATHER_HALFWORD(EA, VvV.uh[i], i,MuV);
+    }
+    fGATHER_FINISH()
+})
+
+
+
+EXTINSN(V6_vgathermhw,  "vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA_DV,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather halfwords",
+{
+    fHIDE(int i;)
+    fHIDE(int j;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+       for(j = 0; j < 2; j++) {
+            EA = RtV+VvvV.v[j].uw[i];
+            fVLOG_VTCM_GATHER_HALFWORD_DV(EA, VvvV.v[j].uw[i], (2*i+j),i,j,MuV);
+        }
+    }
+     fGATHER_FINISH()
+})
+
+
+EXTINSN(V6_vgathermwq,  "if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather Words",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 4;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        EA = RtV+VvV.uw[i];
+        fVLOG_VTCM_GATHER_WORDQ(EA, VvV.uw[i], i,QsV,MuV);
+    }
+    fGATHER_FINISH()
+})
+EXTINSN(V6_vgathermhq,  "if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather halfwords",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(16, i) {
+        EA = RtV+VvV.uh[i];
+        fVLOG_VTCM_GATHER_HALFWORDQ(EA, VvV.uh[i], i,QsV,MuV);
+    }
+    fGATHER_FINISH()
+})
+
+
+
+EXTINSN(V6_vgathermhwq,  "if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_GATHER,A_CVI_VA_DV,A_CVI_VM,A_CVI_TMP_DST,A_MEMLIKE), "Gather halfwords",
+{
+    fHIDE(int i;)
+    fHIDE(int j;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fGATHER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+       for(j = 0; j < 2; j++) {
+            EA = RtV+VvvV.v[j].uw[i];
+            fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, VvvV.v[j].uw[i], (2*i+j),i,j,QsV,MuV);
+       }
+    }
+    fGATHER_FINISH()
+})
+
+
+
+EXTINSN(V6_vscattermw , "vscatter(Rt32,Mu2,Vv32.w).w=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter Words",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 4;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        EA = RtV+VvV.uw[i];
+        fVLOG_VTCM_WORD(EA, VvV.uw[i], VwV,i,MuV);
+    }
+    fSCATTER_FINISH(0)
+})
+
+
+
+EXTINSN(V6_vscattermh , "vscatter(Rt32,Mu2,Vv32.h).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter halfWords",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(16, i) {
+        EA = RtV+VvV.uh[i];
+        fVLOG_VTCM_HALFWORD(EA,VvV.uh[i],VwV,i,MuV);
+    }
+    fSCATTER_FINISH(0)
+})
+
+
+EXTINSN(V6_vscattermw_add,  "vscatter(Rt32,Mu2,Vv32.w).w+=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter Words-Add",
+{
+    fHIDE(int i;)
+    fHIDE(int ALIGNMENT=4;)
+	fHIDE(int element_size = 4;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        EA = (RtV+fVALIGN(VvV.uw[i],ALIGNMENT));
+        fVLOG_VTCM_WORD_INCREMENT(EA,VvV.uw[i],VwV,i,ALIGNMENT,MuV);
+    }
+    fHIDE(fLOG_SCATTER_OP(4);)
+    fSCATTER_FINISH(1)
+})
+
+EXTINSN(V6_vscattermh_add,  "vscatter(Rt32,Mu2,Vv32.h).h+=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter halfword-Add",
+{
+    fHIDE(int i;)
+    fHIDE(int ALIGNMENT=2;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(16, i) {
+        EA = (RtV+fVALIGN(VvV.uh[i],ALIGNMENT));
+        fVLOG_VTCM_HALFWORD_INCREMENT(EA,VvV.uh[i],VwV,i,ALIGNMENT,MuV);
+    }
+    fHIDE(fLOG_SCATTER_OP(2);)
+    fSCATTER_FINISH(1)
+})
+
+
+EXTINSN(V6_vscattermwq,  "if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter Words conditional",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 4;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        EA = RtV+VvV.uw[i];
+        fVLOG_VTCM_WORDQ(EA,VvV.uw[i], VwV,i,QsV,MuV);
+    }
+    fSCATTER_FINISH(0)
+})
+
+EXTINSN(V6_vscattermhq,  "if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA,A_CVI_VM,A_MEMLIKE), "Scatter HalfWords conditional",
+{
+    fHIDE(int i;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(16, i) {
+        EA = RtV+VvV.uh[i];
+        fVLOG_VTCM_HALFWORDQ(EA,VvV.uh[i],VwV,i,QsV,MuV);
+    }
+    fSCATTER_FINISH(0)
+})
+
+
+
+
+EXTINSN(V6_vscattermhw , "vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter Words",
+{
+    fHIDE(int i;)
+    fHIDE(int j;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        for(j = 0; j < 2; j++) {
+            EA = RtV+VvvV.v[j].uw[i];
+            fVLOG_VTCM_HALFWORD_DV(EA,VvvV.v[j].uw[i],VwV,(2*i+j),i,j,MuV);
+        }
+    }
+    fSCATTER_FINISH(0)
+})
+
+
+
+EXTINSN(V6_vscattermhwq,  "if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords conditional",
+{
+    fHIDE(int i;)
+    fHIDE(int j;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        for(j = 0; j < 2; j++) {
+            EA = RtV+VvvV.v[j].uw[i];
+            fVLOG_VTCM_HALFWORDQ_DV(EA,VvvV.v[j].uw[i],VwV,(2*i+j),QsV,i,j,MuV);
+        }
+    }
+    fSCATTER_FINISH(0)
+})
+
+EXTINSN(V6_vscattermhw_add,  "vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32", ATTRIBS(A_EXTENSION,A_CVI,A_CVI_SCATTER,A_CVI_VA_DV,A_CVI_VM,A_MEMLIKE), "Scatter halfwords-add",
+{
+    fHIDE(int i;)
+    fHIDE(int j;)
+    fHIDE(int ALIGNMENT=2;)
+	fHIDE(int element_size = 2;)
+    fHIDE(fSCATTER_INIT( RtV, MuV, element_size);)
+    fVLASTBYTE(MuV, element_size);
+    fVALIGN(RtV, element_size);
+    fVFOREACH(32, i) {
+        for(j = 0; j < 2; j++) {
+             EA =  RtV + fVALIGN(VvvV.v[j].uw[i],ALIGNMENT);;
+             fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA,VvvV.v[j].uw[i],VwV,(2*i+j),i,j,ALIGNMENT,MuV);
+        }
+    }
+    fHIDE(fLOG_SCATTER_OP(2);)
+    fSCATTER_FINISH(1)
+})
+
+EXTINSN(V6_vprefixqb,"Vd32.b=prefixsum(Qv4)",   ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  "parallel prefix sum of Q into byte",
+{
+    fHIDE(int i;)
+    fHIDE(size1u_t acc = 0;)
+    fVFOREACH(8, i) {
+        acc += fGETQBIT(QvV,i);
+        VdV.ub[i] = acc;
+    }
+    } )
+EXTINSN(V6_vprefixqh,"Vd32.h=prefixsum(Qv4)",   ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  "parallel prefix sum of Q into halfwords",
+{
+    fHIDE(int i;)
+    fHIDE(size2u_t acc = 0;)
+    fVFOREACH(16, i) {
+        acc += fGETQBIT(QvV,i*2+0);
+        acc += fGETQBIT(QvV,i*2+1);
+        VdV.uh[i] = acc;
+    }
+    } )
+EXTINSN(V6_vprefixqw,"Vd32.w=prefixsum(Qv4)",   ATTRIBS(A_EXTENSION,A_CVI,A_CVI_VS),  "parallel prefix sum of Q into words",
+{
+    fHIDE(int i;)
+    fHIDE(size4u_t acc = 0;)
+    fVFOREACH(32, i) {
+        acc += fGETQBIT(QvV,i*4+0);
+        acc += fGETQBIT(QvV,i*4+1);
+        acc += fGETQBIT(QvV,i*4+2);
+        acc += fGETQBIT(QvV,i*4+3);
+        VdV.uw[i] = acc;
+    }
+    } )
+
+
+
+
+
+/******************************************************************************
+ DEBUG Vector/Register Printing
+ ******************************************************************************/
+
+#define PRINT_VU(TYPE, TYPE2, COUNT)\
+    int i;  \
+    size4u_t vec_len = fVBYTES();\
+    fprintf(stdout,"V%2d: ",VuN);  \
+    for (i=0;i<vec_len>>COUNT;i++) {         \
+        fprintf(stdout,TYPE2 " ", VuV.TYPE[i]); \
+    };  \
+    fprintf(stdout,"\\n");  \
+	fflush(stdout);\
+
+#undef ATTR_VMEM
+#undef ATTR_VMEMU
+#undef ATTR_VMEM_NT
+
+#endif /* NO_MMVEC */
+
+#ifdef __SELF_DEF_EXTINSN
+#undef EXTINSN
+#undef __SELF_DEF_EXTINSN
+#endif
diff --git a/target/hexagon/imported/mmvec/macros.def b/target/hexagon/imported/mmvec/macros.def
new file mode 100755
index 00000000000..7e5438a9980
--- /dev/null
+++ b/target/hexagon/imported/mmvec/macros.def
@@ -0,0 +1,842 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+DEF_MACRO(fDUMPQ,
+	do {
+		printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]);
+	} while (0),
+	()
+)
+
+DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV,
+	PROC->arch_proc_options->mmvec_use_full_va_for_lookup,
+	()
+)
+
+DEF_MACRO(fUSE_LOOKUP_ADDRESS,
+	1,
+	()
+)
+
+DEF_MACRO(fNOTQ,
+	({mmqreg_t _ret = {0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}),
+	()
+)
+
+DEF_MACRO(fGETQBITS,
+	((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))),
+	()
+)
+
+DEF_MACRO(fGETQBIT,
+	fGETQBITS(REG,1,1,BITNO),
+	()
+)
+
+DEF_MACRO(fGENMASKW,
+	(((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0)
+	|((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8)
+	|((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16)
+	|((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)),
+	()
+)
+DEF_MACRO(fGET10BIT,
+	{
+		COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6);
+		COE >>= 6;
+	},
+	()
+)
+
+DEF_MACRO(fVMAX,
+	(X>Y) ? X : Y,
+	()
+)
+
+
+DEF_MACRO(fGETNIBBLE,
+    ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ),
+    ()
+)
+
+DEF_MACRO(fGETCRUMB,
+    ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ),
+    ()
+)
+
+DEF_MACRO(fGETCRUMB_SYMMETRIC,
+    ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ),
+    ()
+)
+
+#define ZERO_OFFSET_2B +
+
+DEF_MACRO(fGENMASKH,
+	(((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0)
+	|((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)),
+	()
+)
+
+DEF_MACRO(fGETMASKW,
+	(VREG.w[IDX] & fGENMASKW((QREG),IDX)),
+	()
+)
+
+DEF_MACRO(fGETMASKH,
+	(VREG.h[IDX] & fGENMASKH((QREG),IDX)),
+	()
+)
+
+DEF_MACRO(fCONDMASK8,
+	(fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)),
+	()
+)
+
+DEF_MACRO(fCONDMASK16,
+	((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))),
+	()
+)
+
+DEF_MACRO(fCONDMASK32,
+	((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))),
+	()
+)
+
+
+DEF_MACRO(fSETQBITS,
+	do {
+		size4u_t __TMP = (VAL);
+		REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f));
+		REG.w[(BITNO)>>5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f));
+	} while (0),
+	()
+)
+
+DEF_MACRO(fSETQBIT,
+	fSETQBITS(REG,1,1,BITNO,VAL),
+	()
+)
+
+DEF_MACRO(fVBYTES,
+	(fVECSIZE()),
+	()
+)
+
+DEF_MACRO(fVHALVES,
+	(fVECSIZE()/2),
+	()
+)
+
+DEF_MACRO(fVWORDS,
+	(fVECSIZE()/4),
+	()
+)
+
+DEF_MACRO(fVDWORDS,
+	(fVECSIZE()/8),
+	()
+)
+
+DEF_MACRO(fVALIGN,
+    ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)),
+    ()
+)
+
+DEF_MACRO(fVLASTBYTE,
+    ( ADDR = ADDR | (LOG2_ALIGNMENT-1)),
+    ()
+)
+
+
+DEF_MACRO(fVELEM,
+    ((fVECSIZE()*8)/WIDTH),
+    ()
+)
+
+DEF_MACRO(fVECLOGSIZE,
+    (mmvec_current_veclogsize(thread)),
+    ()
+)
+
+DEF_MACRO(fVECSIZE,
+    (1<<fVECLOGSIZE()),
+    ()
+)
+
+DEF_MACRO(fSWAPB,
+    {
+		size1u_t tmp = A;
+		A = B;
+		B = tmp;
+	},
+    /* NOTHING */
+)
+
+DEF_MACRO(
+	fVZERO,
+	mmvec_zero_vector(),
+	()
+)
+
+DEF_MACRO(
+    fNEWVREG,
+    ((THREAD2STRUCT->VRegs_updated & (((VRegMask)1)<<VNUM)) ? THREAD2STRUCT->future_VRegs[VNUM] : mmvec_zero_vector()),
+    (A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY)
+)
+
+DEF_MACRO(
+	fV_AL_CHECK,
+	if ((EA) & (MASK)) {
+		warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA));
+	},
+	()
+)
+DEF_MACRO(fSCATTER_INIT,
+    {
+    mem_vector_scatter_init(thread, insn,   REGION_START, LENGTH, ELEMENT_SIZE);
+	if (EXCEPTION_DETECTED) return;
+    },
+    (A_STORE,A_MEMLIKE,A_RESTRICT_SLOT0ONLY)
+)
+
+DEF_MACRO(fGATHER_INIT,
+    {
+    mem_vector_gather_init(thread, insn,   REGION_START, LENGTH, ELEMENT_SIZE);
+	if (EXCEPTION_DETECTED) return;
+    },
+    (A_LOAD,A_MEMLIKE,A_RESTRICT_SLOT1ONLY)
+)
+
+DEF_MACRO(fSCATTER_FINISH,
+    {
+	if (EXCEPTION_DETECTED) return;
+    mem_vector_scatter_finish(thread, insn, OP);
+    },
+    ()
+)
+
+DEF_MACRO(fGATHER_FINISH,
+    {
+	if (EXCEPTION_DETECTED) return;
+    mem_vector_gather_finish(thread, insn);
+    },
+    ()
+)
+
+
+DEF_MACRO(CHECK_VTCM_PAGE,
+     {
+        int slot = insn->slot;
+        paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
+        pa = pa & ~(ALIGNMENT-1);
+        FLAG = (pa < (thread->mem_access[slot].paddr+LENGTH));
+     },
+    ()
+)
+DEF_MACRO(COUNT_OUT_OF_BOUNDS,
+     {
+        if (!FLAG)
+        {
+               THREAD2STRUCT->vtcm_log.oob_access += SIZE;
+               warn("Scatter/Gather out of bounds of region");
+        }
+     },
+    ()
+)
+
+DEF_MACRO(fLOG_SCATTER_OP,
+    {
+        // Log the size and indicate that the extension ext.c file needs to increment right before memory write
+        THREAD2STRUCT->vtcm_log.op = 1;
+        THREAD2STRUCT->vtcm_log.op_size = SIZE;
+    },
+    ()
+)
+
+
+
+DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT,
+    {
+        int slot = insn->slot;
+        int log_bank = 0;
+        int log_byte =0;
+        paddr_t pa = thread->mem_access[slot].paddr+(OFFSET & ~(ALIGNMENT-1));
+        paddr_t pa_high = thread->mem_access[slot].paddr+LEN;
+        for(int i0 = 0; i0 < 4; i0++)
+        {
+            log_byte =  ((OFFSET>=0)&&((pa+i0)<=pa_high));
+            log_bank |= (log_byte<<i0);
+            LOG_VTCM_BYTE(pa+i0,log_byte,INC.ub[4*IDX+i0],4*IDX+i0);
+        }
+        { LOG_VTCM_BANK(pa, log_bank, IDX); }
+    },
+    ()
+)
+
+DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT,
+    {
+        int slot = insn->slot;
+        int log_bank = 0;
+        int log_byte = 0;
+        paddr_t pa = thread->mem_access[slot].paddr+(OFFSET & ~(ALIGNMENT-1));
+        paddr_t pa_high = thread->mem_access[slot].paddr+LEN;
+        for(int i0 = 0; i0 < 2; i0++) {
+            log_byte =  ((OFFSET>=0)&&((pa+i0)<=pa_high));
+            log_bank |= (log_byte<<i0);
+            LOG_VTCM_BYTE(pa+i0,log_byte,INC.ub[2*IDX+i0],2*IDX+i0);
+        }
+        { LOG_VTCM_BANK(pa, log_bank,IDX); }
+    },
+    ()
+)
+
+DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV,
+    {
+        int slot = insn->slot;
+        int log_bank = 0;
+        int log_byte = 0;
+        paddr_t pa = thread->mem_access[slot].paddr+(OFFSET & ~(ALIGNMENT-1));
+        paddr_t pa_high = thread->mem_access[slot].paddr+LEN;
+        for(int i0 = 0; i0 < 2; i0++) {
+            log_byte =  ((OFFSET>=0)&&((pa+i0)<=pa_high));
+            log_bank |= (log_byte<<i0);
+            LOG_VTCM_BYTE(pa+i0,log_byte,INC.ub[2*IDX+i0],2*IDX+i0);
+        }
+        { LOG_VTCM_BANK(pa, log_bank,(2*IDX2+IDX_H));}
+    },
+    ()
+)
+
+
+
+DEF_MACRO(GATHER_FUNCTION,
+{
+        int slot = insn->slot;
+        int i0;
+        paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
+        paddr_t pa_high = thread->mem_access[slot].paddr+LEN;
+        int log_bank = 0;
+        int log_byte = 0;
+        for(i0 = 0; i0 < ELEMENT_SIZE; i0++)
+        {
+            log_byte =  ((OFFSET>=0)&&((pa+i0)<=pa_high)) && QVAL;
+            log_bank |= (log_byte<<i0);
+            size1u_t B  = sim_mem_read1(thread->system_ptr, thread->threadId, thread->mem_access[slot].paddr+OFFSET+i0);
+            THREAD2STRUCT->tmp_VRegs[0].ub[ELEMENT_SIZE*IDX+i0] = B;
+            LOG_VTCM_BYTE(pa+i0,log_byte,B,ELEMENT_SIZE*IDX+i0);
+        }
+        LOG_VTCM_BANK(pa, log_bank,BANK_IDX);
+},
+()
+)
+
+
+
+DEF_MACRO(fVLOG_VTCM_GATHER_WORD,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1);
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1);
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1);
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0));
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0));
+    },
+    ()
+)
+
+DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV,
+    {
+		GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0));
+    },
+    ()
+)
+
+
+DEF_MACRO(DEBUG_LOG_ADDR,
+    {
+
+        if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2)
+        {
+
+            int slot = insn->slot;
+            paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
+        }
+    },
+    ()
+)
+
+
+
+
+
+
+
+DEF_MACRO(SCATTER_OP_WRITE_TO_MEM,
+    {
+        for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE))
+        {
+            if ( mmvecx->vtcm_log.mask.ub[i] != 0) {
+                TYPE dst = 0;
+                TYPE inc = 0;
+                for(int j = 0; j < sizeof(TYPE); j++) {
+                    dst |= (sim_mem_read1(thread->system_ptr, thread->threadId, mmvecx->vtcm_log.pa[i+j]) << (8*j));
+                    inc |= mmvecx->vtcm_log.data.ub[j+i] << (8*j);
+
+                    mmvecx->vtcm_log.mask.ub[j+i] = 0;
+                    mmvecx->vtcm_log.data.ub[j+i] = 0;
+                    mmvecx->vtcm_log.offsets.ub[j+i] = 0;
+                }
+                dst += inc;
+                for(int j = 0; j < sizeof(TYPE); j++) {
+                    sim_mem_write1(thread->system_ptr,thread->threadId, mmvecx->vtcm_log.pa[i+j], (dst >> (8*j))& 0xFF );
+                }
+        }
+
+    }
+    },
+    ()
+)
+
+DEF_MACRO(SCATTER_FUNCTION,
+{
+        int slot = insn->slot;
+        int i0;
+        paddr_t pa = thread->mem_access[slot].paddr+OFFSET;
+        paddr_t pa_high = thread->mem_access[slot].paddr+LEN;
+        int log_bank = 0;
+        int log_byte = 0;
+        for(i0 = 0; i0 < ELEMENT_SIZE; i0++) {
+            log_byte = ((OFFSET>=0)&&((pa+i0)<=pa_high)) && QVAL;
+            log_bank |= (log_byte<<i0);
+            LOG_VTCM_BYTE(pa+i0,log_byte,IN.ub[ELEMENT_SIZE*IDX+i0],ELEMENT_SIZE*IDX+i0);
+        }
+        LOG_VTCM_BANK(pa, log_bank,BANK_IDX);
+
+},
+()
+)
+
+DEF_MACRO(fVLOG_VTCM_HALFWORD,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN);
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_WORD,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN);
+    },
+    ()
+)
+
+DEF_MACRO(fVLOG_VTCM_HALFWORDQ,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN);
+    },
+    ()
+)
+DEF_MACRO(fVLOG_VTCM_WORDQ,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN);
+    },
+    ()
+)
+
+
+
+
+
+DEF_MACRO(fVLOG_VTCM_HALFWORD_DV,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN);
+    },
+    ()
+)
+
+DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV,
+    {
+		SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN);
+    },
+    ()
+)
+
+
+
+
+
+
+DEF_MACRO(fSTORERELEASE,
+    {
+        fV_AL_CHECK(EA,fVECSIZE()-1);
+
+        mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+	(A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fVFETCH_AL,
+    {
+    fV_AL_CHECK(EA,fVECSIZE()-1);
+    mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), insn->slot, fVECSIZE());
+    },
+    (A_LOAD,A_MEMLIKE)
+)
+
+
+DEF_MACRO(fLOADMMV_AL,
+    {
+    fV_AL_CHECK(EA,ALIGNMENT-1);
+	thread->last_pkt->double_access_vec = 0;
+    mem_load_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_LOAD,A_MEMLIKE)
+)
+
+DEF_MACRO(fLOADMMV,
+	fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST),
+	()
+)
+
+DEF_MACRO(fLOADMMVQ,
+	do {
+		int __i;
+		fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST);
+		fVFOREACH(8,__i) if (!fGETQBIT(QVAL,__i)) DST.b[__i] = 0;
+	} while (0),
+	()
+)
+
+DEF_MACRO(fLOADMMVNQ,
+	do {
+		int __i;
+		fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST);
+		fVFOREACH(8,__i) if (fGETQBIT(QVAL,__i)) DST.b[__i] = 0;
+	} while (0),
+	()
+)
+
+DEF_MACRO(fLOADMMVU_AL,
+    {
+    size4u_t size2 = (EA)&(ALIGNMENT-1);
+    size4u_t size1 = LEN-size2;
+	thread->last_pkt->double_access_vec = 1;
+    mem_load_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS());
+    mem_load_vector_oddva(thread, insn, EA, EA,/* slot */ 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_LOAD,A_MEMLIKE)
+)
+
+DEF_MACRO(fLOADMMVU,
+	{
+		/* if address happens to be aligned, only do aligned load */
+        thread->last_pkt->pkt_has_vtcm_access = 0;
+        thread->last_pkt->pkt_access_count = 0;
+		if ( (EA & (fVECSIZE()-1)) == 0) {
+            thread->last_pkt->pkt_has_vmemu_access = 0;
+			thread->last_pkt->double_access = 0;
+
+			fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST);
+		} else {
+            thread->last_pkt->pkt_has_vmemu_access = 1;
+			thread->last_pkt->double_access = 1;
+
+			fLOADMMVU_AL(EA,fVECSIZE(),fVECSIZE(),DST);
+		}
+	},
+	()
+)
+
+DEF_MACRO(fSTOREMMV_AL,
+    {
+    fV_AL_CHECK(EA,ALIGNMENT-1);
+    mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMV,
+	fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC),
+	()
+)
+
+DEF_MACRO(fSTOREMMVQ_AL,
+    do {
+	mmvector_t maskvec;
+	int i;
+	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
+	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    } while (0),
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMVQ,
+	fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK),
+	()
+)
+
+DEF_MACRO(fSTOREMMVNQ_AL,
+    {
+	mmvector_t maskvec;
+	int i;
+	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
+        fV_AL_CHECK(EA,ALIGNMENT-1);
+	mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMVNQ,
+	fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK),
+	()
+)
+
+DEF_MACRO(fSTOREMMVU_AL,
+    {
+    size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
+    size4u_t size2;
+    if (size1>LEN) size1 = LEN;
+    size2 = LEN-size1;
+    mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS());
+    mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMVU,
+	{
+        thread->last_pkt->pkt_has_vtcm_access = 0;
+        thread->last_pkt->pkt_access_count = 0;
+		if ( (EA & (fVECSIZE()-1)) == 0) {
+			thread->last_pkt->double_access = 0;
+			fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC);
+		} else {
+			thread->last_pkt->double_access = 1;
+            thread->last_pkt->pkt_has_vmemu_access = 1;
+			fSTOREMMVU_AL(EA,fVECSIZE(),fVECSIZE(),SRC);
+		}
+	},
+	()
+)
+
+DEF_MACRO(fSTOREMMVQU_AL,
+    {
+	size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
+	size4u_t size2;
+	mmvector_t maskvec;
+	int i;
+	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
+	if (size1>LEN) size1 = LEN;
+	size2 = LEN-size1;
+	mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(),/* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS());
+	mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMVQU,
+	{
+        thread->last_pkt->pkt_has_vtcm_access = 0;
+        thread->last_pkt->pkt_access_count = 0;
+		if ( (EA & (fVECSIZE()-1)) == 0) {
+			thread->last_pkt->double_access = 0;
+			fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK);
+		} else {
+			thread->last_pkt->double_access = 1;
+            thread->last_pkt->pkt_has_vmemu_access = 1;
+			fSTOREMMVQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK);
+		}
+	},
+	()
+)
+
+DEF_MACRO(fSTOREMMVNQU_AL,
+    {
+	size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1));
+	size4u_t size2;
+	mmvector_t maskvec;
+	int i;
+	for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i);
+	if (size1>LEN) size1 = LEN;
+	size2 = LEN-size1;
+	mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS());
+	mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr));
+    },
+    (A_STORE,A_MEMLIKE)
+)
+
+DEF_MACRO(fSTOREMMVNQU,
+	{
+        thread->last_pkt->pkt_has_vtcm_access = 0;
+        thread->last_pkt->pkt_access_count = 0;
+		if ( (EA & (fVECSIZE()-1)) == 0) {
+			thread->last_pkt->double_access = 0;
+			fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK);
+		} else {
+			thread->last_pkt->double_access = 1;
+            thread->last_pkt->pkt_has_vmemu_access = 1;
+			fSTOREMMVNQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK);
+		}
+	},
+	()
+)
+
+
+
+
+DEF_MACRO(fVFOREACH,
+    for (VAR = 0; VAR < fVELEM(WIDTH); VAR++),
+    /* NOTHING */
+)
+
+DEF_MACRO(fVARRAY_ELEMENT_ACCESS,
+    ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))],
+    ()
+)
+
+DEF_MACRO(fVNEWCANCEL,
+	do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0),
+	()
+)
+
+DEF_MACRO(fTMPVDATA,
+	mmvec_vtmp_data(thread),
+	(A_CVI)
+)
+
+DEF_MACRO(fVSATDW,
+    fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ),
+    /* attribs */
+)
+
+DEF_MACRO(fVASL_SATHI,
+    fVSATW(((U)<<1) | ((V)>>31)),
+    /* attribs */
+)
+
+DEF_MACRO(fVUADDSAT,
+	fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U)  + fZXTN(WIDTH, 2*WIDTH, V)),
+	/* attribs */
+)
+
+DEF_MACRO(fVSADDSAT,
+	fVSATN(  WIDTH, fSXTN(WIDTH, 2*WIDTH, U)  + fSXTN(WIDTH, 2*WIDTH, V)),
+	/* attribs */
+)
+
+DEF_MACRO(fVUSUBSAT,
+	fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U)  - fZXTN(WIDTH, 2*WIDTH, V)),
+	/* attribs */
+)
+
+DEF_MACRO(fVSSUBSAT,
+	fVSATN(  WIDTH, fSXTN(WIDTH, 2*WIDTH, U)  - fSXTN(WIDTH, 2*WIDTH, V)),
+	/* attribs */
+)
+
+DEF_MACRO(fVAVGU,
+	((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVAVGURND,
+	((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVNAVGU,
+	((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVNAVGURNDSAT,
+	fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)),
+	/* attribs */
+)
+
+DEF_MACRO(fVAVGS,
+	((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVAVGSRND,
+	((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVNAVGS,
+	((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVNAVGSRND,
+	((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1),
+	/* attribs */
+)
+
+DEF_MACRO(fVNAVGSRNDSAT,
+	fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)),
+	/* attribs */
+)
+
+
+DEF_MACRO(fVNOROUND,
+	VAL,
+	/* NOTHING */
+)
+DEF_MACRO(fVNOSAT,
+	VAL,
+	/* NOTHING */
+)
+
+DEF_MACRO(fVROUND,
+	((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)),
+	/* NOTHING */
+)
+
+DEF_MACRO(fCARRY_FROM_ADD32,
+	(((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1),
+	/* NOTHING */
+)
+
+DEF_MACRO(fUARCH_NOTE_PUMP_4X,
+	,
+	()
+)
+
+DEF_MACRO(fUARCH_NOTE_PUMP_2X,
+	,
+	()
+)
diff --git a/target/hexagon/imported/shift.idef b/target/hexagon/imported/shift.idef
index e328ab7329e..b32c4e04d19 100644
--- a/target/hexagon/imported/shift.idef
+++ b/target/hexagon/imported/shift.idef
@@ -1029,6 +1029,53 @@ Q6INSN(S4_clbpaddi,"Rd32=add(clb(Rss32),#s6)",ATTRIBS(A_ARCHV2),
 { RdV = (fMAX(fCL1_8(RssV),fCL1_8(~RssV)))+siV;})
 
 
+
+Q6INSN(S2_cabacdecbin,"Rdd32=decbin(Rss32,Rtt32)",ATTRIBS(A_ARCHV3),"CABAC decode bin",
+{
+    fHIDE(size4u_t state;)
+    fHIDE(size4u_t valMPS;)
+    fHIDE(size4u_t bitpos;)
+    fHIDE(size4u_t range;)
+    fHIDE(size4u_t offset;)
+    fHIDE(size4u_t rLPS;)
+    fHIDE(size4u_t rMPS;)
+
+    state =  fEXTRACTU_RANGE( fGETWORD(1,RttV) ,5,0);
+    valMPS = fEXTRACTU_RANGE( fGETWORD(1,RttV) ,8,8);
+    bitpos = fEXTRACTU_RANGE( fGETWORD(0,RttV) ,4,0);
+    range =  fGETWORD(0,RssV);
+    offset = fGETWORD(1,RssV);
+
+    /* calculate rLPS */
+    range <<= bitpos;
+    offset <<= bitpos;
+    rLPS = rLPS_table_64x4[state][ (range >>29)&3];
+    rLPS  = rLPS << 23;   /* left aligned */
+
+    /* calculate rMPS */
+    rMPS= (range&0xff800000) - rLPS;
+
+    /* most probable region */
+    if (offset < rMPS) {
+        RddV = AC_next_state_MPS_64[state];
+        fINSERT_RANGE(RddV,8,8,valMPS);
+        fINSERT_RANGE(RddV,31,23,(rMPS>>23));
+        fSETWORD(1,RddV,offset);
+        fWRITE_P0(valMPS);
+
+
+    }
+    /* least probable region */
+    else {
+        RddV = AC_next_state_LPS_64[state];
+        fINSERT_RANGE(RddV,8,8,((!state)?(1-valMPS):(valMPS)));
+        fINSERT_RANGE(RddV,31,23,(rLPS>>23));
+        fSETWORD(1,RddV,(offset-rMPS));
+        fWRITE_P0((valMPS^1));
+    }
+})
+
+
 Q6INSN(S2_clb,"Rd32=clb(Rs32)",ATTRIBS(),
 "Count leading bits", {RdV = fMAX(fCL1_4(RsV),fCL1_4(~RsV));})
 
diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h
index 5756a1d0caa..aa263891477 100644
--- a/target/hexagon/insn.h
+++ b/target/hexagon/insn.h
@@ -40,14 +40,15 @@ struct Instruction {
 
     uint32_t iclass:6;
     uint32_t slot:3;
-    uint32_t part1:1;        /*
+    uint32_t which_extended:1;    /* If has an extender, which immediate */
+    uint32_t new_value_producer_slot:4;
+
+    bool part1;              /*
                               * cmp-jumps are split into two insns.
                               * set for the compare and clear for the jump
                               */
-    uint32_t extension_valid:1;   /* Has a constant extender attached */
-    uint32_t which_extended:1;    /* If has an extender, which immediate */
-    uint32_t is_endloop:1;   /* This is an end of loop */
-    uint32_t new_value_producer_slot:4;
+    bool extension_valid;   /* Has a constant extender attached */
+    bool is_endloop;   /* This is an end of loop */
     int32_t immed[IMMEDS_MAX];    /* immediate field */
 };
 
@@ -58,13 +59,16 @@ struct Packet {
     uint16_t encod_pkt_size_in_bytes;
 
     /* Pre-decodes about COF */
-    uint32_t pkt_has_cof:1;          /* Has any change-of-flow */
-    uint32_t pkt_has_endloop:1;
+    bool pkt_has_cof;          /* Has any change-of-flow */
+    bool pkt_has_endloop;
+
+    bool pkt_has_dczeroa;
 
-    uint32_t pkt_has_dczeroa:1;
+    bool pkt_has_store_s0;
+    bool pkt_has_store_s1;
 
-    uint32_t pkt_has_store_s0:1;
-    uint32_t pkt_has_store_s1:1;
+    bool pkt_has_hvx;
+    Insn *vhist_insn;
 
     Insn insn[INSTRUCTIONS_MAX];
 };
diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h
index 2da85c8606f..82ac3042ab3 100644
--- a/target/hexagon/internal.h
+++ b/target/hexagon/internal.h
@@ -22,14 +22,18 @@
  * Change HEX_DEBUG to 1 to turn on debugging output
  */
 #define HEX_DEBUG 0
-#if HEX_DEBUG
-#define HEX_DEBUG_LOG(...) qemu_log(__VA_ARGS__)
-#else
-#define HEX_DEBUG_LOG(...) do { } while (0)
-#endif
+#define HEX_DEBUG_LOG(...) \
+    do { \
+        if (HEX_DEBUG) { \
+            qemu_log(__VA_ARGS__); \
+        } \
+    } while (0)
 
 int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+
+void hexagon_debug_vreg(CPUHexagonState *env, int regnum);
+void hexagon_debug_qreg(CPUHexagonState *env, int regnum);
 void hexagon_debug(CPUHexagonState *env);
 
 extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index cfcb8173ba4..19d103cad5d 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -24,7 +24,6 @@
 
 #ifdef QEMU_GENERATE
 #define READ_REG(dest, NUM)              gen_read_reg(dest, NUM)
-#define READ_PREG(dest, NUM)             gen_read_preg(dest, (NUM))
 #else
 #define READ_REG(NUM)                    (env->gpr[(NUM)])
 #define READ_PREG(NUM)                   (env->pred[NUM])
@@ -63,7 +62,7 @@
                    reg_field_info[FIELD].offset)
 
 #define SET_USR_FIELD(FIELD, VAL) \
-    fINSERT_BITS(env->gpr[HEX_REG_USR], reg_field_info[FIELD].width, \
+    fINSERT_BITS(env->new_value[HEX_REG_USR], reg_field_info[FIELD].width, \
                  reg_field_info[FIELD].offset, (VAL))
 #endif
 
@@ -133,6 +132,38 @@
         CHECK_NOSHUF; \
         tcg_gen_qemu_ld64(DST, VA, ctx->mem_idx); \
     } while (0)
+
+#define MEM_STORE1_FUNC(X) \
+    __builtin_choose_expr(TYPE_INT(X), \
+        gen_store1i, \
+        __builtin_choose_expr(TYPE_TCGV(X), \
+            gen_store1, (void)0))
+#define MEM_STORE1(VA, DATA, SLOT) \
+    MEM_STORE1_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE2_FUNC(X) \
+    __builtin_choose_expr(TYPE_INT(X), \
+        gen_store2i, \
+        __builtin_choose_expr(TYPE_TCGV(X), \
+            gen_store2, (void)0))
+#define MEM_STORE2(VA, DATA, SLOT) \
+    MEM_STORE2_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE4_FUNC(X) \
+    __builtin_choose_expr(TYPE_INT(X), \
+        gen_store4i, \
+        __builtin_choose_expr(TYPE_TCGV(X), \
+            gen_store4, (void)0))
+#define MEM_STORE4(VA, DATA, SLOT) \
+    MEM_STORE4_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
+
+#define MEM_STORE8_FUNC(X) \
+    __builtin_choose_expr(TYPE_INT(X), \
+        gen_store8i, \
+        __builtin_choose_expr(TYPE_TCGV_I64(X), \
+            gen_store8, (void)0))
+#define MEM_STORE8(VA, DATA, SLOT) \
+    MEM_STORE8_FUNC(DATA)(cpu_env, VA, DATA, ctx, SLOT)
 #else
 #define MEM_LOAD1s(VA) ((int8_t)mem_load1(env, slot, VA))
 #define MEM_LOAD1u(VA) ((uint8_t)mem_load1(env, slot, VA))
@@ -156,18 +187,15 @@
 #ifdef QEMU_GENERATE
 static inline void gen_pred_cancel(TCGv pred, int slot_num)
  {
-    TCGv slot_mask = tcg_const_tl(1 << slot_num);
+    TCGv slot_mask = tcg_temp_new();
     TCGv tmp = tcg_temp_new();
-    TCGv zero = tcg_const_tl(0);
-    TCGv one = tcg_const_tl(1);
-    tcg_gen_or_tl(slot_mask, hex_slot_cancelled, slot_mask);
+    TCGv zero = tcg_constant_tl(0);
+    tcg_gen_ori_tl(slot_mask, hex_slot_cancelled, 1 << slot_num);
     tcg_gen_andi_tl(tmp, pred, 1);
     tcg_gen_movcond_tl(TCG_COND_EQ, hex_slot_cancelled, tmp, zero,
                        slot_mask, hex_slot_cancelled);
     tcg_temp_free(slot_mask);
     tcg_temp_free(tmp);
-    tcg_temp_free(zero);
-    tcg_temp_free(one);
 }
 #define PRED_LOAD_CANCEL(PRED, EA) \
     gen_pred_cancel(PRED, insn->is_endloop ? 4 : insn->slot)
@@ -190,6 +218,13 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num)
     (((HIBIT) - (LOWBIT) + 1) ? \
         extract64((INREG), (LOWBIT), ((HIBIT) - (LOWBIT) + 1)) : \
         0LL)
+#define fINSERT_RANGE(INREG, HIBIT, LOWBIT, INVAL) \
+    do { \
+        int width = ((HIBIT) - (LOWBIT) + 1); \
+        INREG = (width >= 0 ? \
+            deposit64((INREG), (LOWBIT), width, (INVAL)) : \
+            INREG); \
+    } while (0)
 
 #define f8BITSOF(VAL) ((VAL) ? 0xff : 0x00)
 
@@ -200,33 +235,26 @@ static inline void gen_pred_cancel(TCGv pred, int slot_num)
 #endif
 
 #ifdef QEMU_GENERATE
-#define fLSBNEW(PVAL)   tcg_gen_mov_tl(LSB, (PVAL))
-#define fLSBNEW0        tcg_gen_mov_tl(LSB, hex_new_pred_value[0])
-#define fLSBNEW1        tcg_gen_mov_tl(LSB, hex_new_pred_value[1])
+#define fLSBNEW(PVAL)   tcg_gen_andi_tl(LSB, (PVAL), 1)
+#define fLSBNEW0        tcg_gen_andi_tl(LSB, hex_new_pred_value[0], 1)
+#define fLSBNEW1        tcg_gen_andi_tl(LSB, hex_new_pred_value[1], 1)
 #else
-#define fLSBNEW(PVAL)   (PVAL)
-#define fLSBNEW0        new_pred_value(env, 0)
-#define fLSBNEW1        new_pred_value(env, 1)
+#define fLSBNEW(PVAL)   ((PVAL) & 1)
+#define fLSBNEW0        (env->new_pred_value[0] & 1)
+#define fLSBNEW1        (env->new_pred_value[1] & 1)
 #endif
 
 #ifdef QEMU_GENERATE
-static inline void gen_logical_not(TCGv dest, TCGv src)
-{
-    TCGv one = tcg_const_tl(1);
-    TCGv zero = tcg_const_tl(0);
-
-    tcg_gen_movcond_tl(TCG_COND_NE, dest, src, zero, zero, one);
-
-    tcg_temp_free(one);
-    tcg_temp_free(zero);
-}
 #define fLSBOLDNOT(VAL) \
     do { \
         tcg_gen_andi_tl(LSB, (VAL), 1); \
         tcg_gen_xori_tl(LSB, LSB, 1); \
     } while (0)
 #define fLSBNEWNOT(PNUM) \
-    gen_logical_not(LSB, (PNUM))
+    do { \
+        tcg_gen_andi_tl(LSB, (PNUM), 1); \
+        tcg_gen_xori_tl(LSB, LSB, 1); \
+    } while (0)
 #else
 #define fLSBNEWNOT(PNUM) (!fLSBNEW(PNUM))
 #define fLSBOLDNOT(VAL) (!fLSBOLD(VAL))
@@ -238,6 +266,10 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
 
 #define fNEWREG_ST(VAL) (VAL)
 
+#define fVSATUVALN(N, VAL) \
+    ({ \
+        (((int)(VAL)) < 0) ? 0 : ((1LL << (N)) - 1); \
+    })
 #define fSATUVALN(N, VAL) \
     ({ \
         fSET_OVERFLOW(); \
@@ -248,10 +280,16 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
         fSET_OVERFLOW(); \
         ((VAL) < 0) ? (-(1LL << ((N) - 1))) : ((1LL << ((N) - 1)) - 1); \
     })
+#define fVSATVALN(N, VAL) \
+    ({ \
+        ((VAL) < 0) ? (-(1LL << ((N) - 1))) : ((1LL << ((N) - 1)) - 1); \
+    })
 #define fZXTN(N, M, VAL) (((N) != 0) ? extract64((VAL), 0, (N)) : 0LL)
 #define fSXTN(N, M, VAL) (((N) != 0) ? sextract64((VAL), 0, (N)) : 0LL)
 #define fSATN(N, VAL) \
     ((fSXTN(N, 64, VAL) == (VAL)) ? (VAL) : fSATVALN(N, VAL))
+#define fVSATN(N, VAL) \
+    ((fSXTN(N, 64, VAL) == (VAL)) ? (VAL) : fVSATVALN(N, VAL))
 #define fADDSAT64(DST, A, B) \
     do { \
         uint64_t __a = fCAST8u(A); \
@@ -274,17 +312,56 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
             DST = __sum; \
         } \
     } while (0)
+#define fVSATUN(N, VAL) \
+    ((fZXTN(N, 64, VAL) == (VAL)) ? (VAL) : fVSATUVALN(N, VAL))
 #define fSATUN(N, VAL) \
     ((fZXTN(N, 64, VAL) == (VAL)) ? (VAL) : fSATUVALN(N, VAL))
 #define fSATH(VAL) (fSATN(16, VAL))
 #define fSATUH(VAL) (fSATUN(16, VAL))
+#define fVSATH(VAL) (fVSATN(16, VAL))
+#define fVSATUH(VAL) (fVSATUN(16, VAL))
 #define fSATUB(VAL) (fSATUN(8, VAL))
 #define fSATB(VAL) (fSATN(8, VAL))
+#define fVSATUB(VAL) (fVSATUN(8, VAL))
+#define fVSATB(VAL) (fVSATN(8, VAL))
 #define fIMMEXT(IMM) (IMM = IMM)
 #define fMUST_IMMEXT(IMM) fIMMEXT(IMM)
 
 #define fPCALIGN(IMM) IMM = (IMM & ~PCALIGN_MASK)
 
+#ifdef QEMU_GENERATE
+static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
+{
+    /*
+     * Section 2.2.4 of the Hexagon V67 Programmer's Reference Manual
+     *
+     *  The "I" value from a modifier register is divided into two pieces
+     *      LSB         bits 23:17
+     *      MSB         bits 31:28
+     * The value is signed
+     *
+     * At the end we shift the result according to the shift argument
+     */
+    TCGv msb = tcg_temp_new();
+    TCGv lsb = tcg_temp_new();
+
+    tcg_gen_extract_tl(lsb, val, 17, 7);
+    tcg_gen_sari_tl(msb, val, 21);
+    tcg_gen_deposit_tl(result, msb, lsb, 0, 7);
+
+    tcg_gen_shli_tl(result, result, shift);
+
+    tcg_temp_free(msb);
+    tcg_temp_free(lsb);
+
+    return result;
+}
+#define fREAD_IREG(VAL, SHIFT) gen_read_ireg(ireg, (VAL), (SHIFT))
+#else
+#define fREAD_IREG(VAL) \
+    (fSXTN(11, 64, (((VAL) & 0xf0000000) >> 21) | ((VAL >> 17) & 0x7f)))
+#endif
+
 #define fREAD_LR() (READ_REG(HEX_REG_LR))
 
 #define fWRITE_LR(A) WRITE_RREG(HEX_REG_LR, A)
@@ -341,8 +418,6 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
 #define fWRITE_LC0(VAL) WRITE_RREG(HEX_REG_LC0, VAL)
 #define fWRITE_LC1(VAL) WRITE_RREG(HEX_REG_LC1, VAL)
 
-#define fCARRY_FROM_ADD(A, B, C) carry_from_add64(A, B, C)
-
 #define fSET_OVERFLOW() SET_USR_FIELD(USR_OVF, 1)
 #define fSET_LPCFG(VAL) SET_USR_FIELD(USR_LPCFG, (VAL))
 #define fGET_LPCFG (GET_USR_FIELD(USR_LPCFG))
@@ -355,6 +430,8 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
 #define fCAST4s(A) ((int32_t)(A))
 #define fCAST8u(A) ((uint64_t)(A))
 #define fCAST8s(A) ((int64_t)(A))
+#define fCAST2_2s(A) ((int16_t)(A))
+#define fCAST2_2u(A) ((uint16_t)(A))
 #define fCAST4_4s(A) ((int32_t)(A))
 #define fCAST4_4u(A) ((uint32_t)(A))
 #define fCAST4_8s(A) ((int64_t)((int32_t)(A)))
@@ -402,6 +479,21 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
 #define fCAST8S_16S(A) (int128_exts64(A))
 #define fCAST16S_8S(A) (int128_getlo(A))
 
+#ifdef QEMU_GENERATE
+#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM)
+#define fEA_RRs(REG, REG2, SCALE) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        tcg_gen_shli_tl(tmp, REG2, SCALE); \
+        tcg_gen_add_tl(EA, REG, tmp); \
+        tcg_temp_free(tmp); \
+    } while (0)
+#define fEA_IRs(IMM, REG, SCALE) \
+    do { \
+        tcg_gen_shli_tl(EA, REG, SCALE); \
+        tcg_gen_addi_tl(EA, EA, IMM); \
+    } while (0)
+#else
 #define fEA_RI(REG, IMM) \
     do { \
         EA = REG + IMM; \
@@ -414,12 +506,20 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
     do { \
         EA = IMM + (REG << SCALE); \
     } while (0)
+#endif
 
 #ifdef QEMU_GENERATE
 #define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM)
 #define fEA_REG(REG) tcg_gen_mov_tl(EA, REG)
+#define fEA_BREVR(REG)      gen_helper_fbrev(EA, REG)
 #define fPM_I(REG, IMM)     tcg_gen_addi_tl(REG, REG, IMM)
 #define fPM_M(REG, MVAL)    tcg_gen_add_tl(REG, REG, MVAL)
+#define fPM_CIRI(REG, IMM, MVAL) \
+    do { \
+        TCGv tcgv_siV = tcg_constant_tl(siV); \
+        gen_helper_fcircadd(REG, REG, tcgv_siV, MuV, \
+                            hex_gpr[HEX_REG_CS0 + MuN]); \
+    } while (0)
 #else
 #define fEA_IMM(IMM)        do { EA = (IMM); } while (0)
 #define fEA_REG(REG)        do { EA = (REG); } while (0)
@@ -428,7 +528,9 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
 #define fPM_M(REG, MVAL)    do { REG = REG + (MVAL); } while (0)
 #endif
 #define fSCALE(N, A) (((int64_t)(A)) << N)
+#define fVSATW(A) fVSATN(32, ((long long)A))
 #define fSATW(A) fSATN(32, ((long long)A))
+#define fVSAT(A) fVSATN(32, (A))
 #define fSAT(A) fSATN(32, (A))
 #define fSAT_ORIG_SHL(A, ORIG_REG) \
     ((((int32_t)((fSAT(A)) ^ ((int32_t)(ORIG_REG)))) < 0) \
@@ -496,23 +598,43 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
     gen_load_locked##SIZE##SIGN(DST, EA, ctx->mem_idx);
 #endif
 
+#ifdef QEMU_GENERATE
+#define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, insn->slot)
+#else
 #define fSTORE(NUM, SIZE, EA, SRC) MEM_STORE##SIZE(EA, SRC, slot)
+#endif
 
 #ifdef QEMU_GENERATE
 #define fSTORE_LOCKED(NUM, SIZE, EA, SRC, PRED) \
-    gen_store_conditional##SIZE(env, ctx, PdN, PRED, EA, SRC);
+    gen_store_conditional##SIZE(ctx, PRED, EA, SRC);
 #endif
 
+#ifdef QEMU_GENERATE
+#define GETBYTE_FUNC(X) \
+    __builtin_choose_expr(TYPE_TCGV(X), \
+        gen_get_byte, \
+        __builtin_choose_expr(TYPE_TCGV_I64(X), \
+            gen_get_byte_i64, (void)0))
+#define fGETBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, true)
+#define fGETUBYTE(N, SRC) GETBYTE_FUNC(SRC)(BYTE, N, SRC, false)
+#else
 #define fGETBYTE(N, SRC) ((int8_t)((SRC >> ((N) * 8)) & 0xff))
 #define fGETUBYTE(N, SRC) ((uint8_t)((SRC >> ((N) * 8)) & 0xff))
+#endif
 
 #define fSETBYTE(N, DST, VAL) \
     do { \
         DST = (DST & ~(0x0ffLL << ((N) * 8))) | \
         (((uint64_t)((VAL) & 0x0ffLL)) << ((N) * 8)); \
     } while (0)
+
+#ifdef QEMU_GENERATE
+#define fGETHALF(N, SRC)  gen_get_half(HALF, N, SRC, true)
+#define fGETUHALF(N, SRC) gen_get_half(HALF, N, SRC, false)
+#else
 #define fGETHALF(N, SRC) ((int16_t)((SRC >> ((N) * 16)) & 0xffff))
 #define fGETUHALF(N, SRC) ((uint16_t)((SRC >> ((N) * 16)) & 0xffff))
+#endif
 #define fSETHALF(N, DST, VAL) \
     do { \
         DST = (DST & ~(0x0ffffLL << ((N) * 16))) | \
@@ -545,12 +667,14 @@ static inline void gen_logical_not(TCGv dest, TCGv src)
             fSETBIT(j, DST, VAL); \
         } \
     } while (0)
+#define fCOUNTONES_2(VAL) ctpop16(VAL)
 #define fCOUNTONES_4(VAL) ctpop32(VAL)
 #define fCOUNTONES_8(VAL) ctpop64(VAL)
 #define fBREV_8(VAL) revbit64(VAL)
 #define fBREV_4(VAL) revbit32(VAL)
 #define fCL1_8(VAL) clo64(VAL)
 #define fCL1_4(VAL) clo32(VAL)
+#define fCL1_2(VAL) (clz32(~(uint16_t)(VAL) & 0xffff) - 16)
 #define fINTERLEAVE(ODD, EVEN) interleave(ODD, EVEN)
 #define fDEINTERLEAVE(MIXED) deinterleave(MIXED)
 #define fHIDE(A) A
diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build
index bb0b4fb621d..b61243103f6 100644
--- a/target/hexagon/meson.build
+++ b/target/hexagon/meson.build
@@ -20,6 +20,7 @@ hexagon_ss = ss.source_set()
 hex_common_py = 'hex_common.py'
 attribs_def = meson.current_source_dir() / 'attribs_def.h.inc'
 gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h'
+gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h'
 
 #
 #  Step 1
@@ -63,8 +64,8 @@ helper_protos_generated = custom_target(
     'helper_protos_generated.h.inc',
     output: 'helper_protos_generated.h.inc',
     depends: [semantics_generated],
-    depend_files: [hex_common_py, attribs_def, gen_tcg_h],
-    command: [python, files('gen_helper_protos.py'), semantics_generated, attribs_def, gen_tcg_h, '@OUTPUT@'],
+    depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+    command: [python, files('gen_helper_protos.py'), semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h, '@OUTPUT@'],
 )
 hexagon_ss.add(helper_protos_generated)
 
@@ -72,8 +73,8 @@ tcg_funcs_generated = custom_target(
     'tcg_funcs_generated.c.inc',
     output: 'tcg_funcs_generated.c.inc',
     depends: [semantics_generated],
-    depend_files: [hex_common_py, attribs_def, gen_tcg_h],
-    command: [python, files('gen_tcg_funcs.py'), semantics_generated, attribs_def, gen_tcg_h, '@OUTPUT@'],
+    depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+    command: [python, files('gen_tcg_funcs.py'), semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h, '@OUTPUT@'],
 )
 hexagon_ss.add(tcg_funcs_generated)
 
@@ -90,8 +91,8 @@ helper_funcs_generated = custom_target(
     'helper_funcs_generated.c.inc',
     output: 'helper_funcs_generated.c.inc',
     depends: [semantics_generated],
-    depend_files: [hex_common_py, attribs_def, gen_tcg_h],
-    command: [python, files('gen_helper_funcs.py'), semantics_generated, attribs_def, gen_tcg_h, '@OUTPUT@'],
+    depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h],
+    command: [python, files('gen_helper_funcs.py'), semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h, '@OUTPUT@'],
 )
 hexagon_ss.add(helper_funcs_generated)
 
@@ -156,7 +157,8 @@ dectree_generated = custom_target(
     'dectree_generated.h.inc',
     output: 'dectree_generated.h.inc',
     depends: [iset_py],
-    command: ['env', 'PYTHONPATH=' + meson.current_build_dir(), files('dectree.py'), '@OUTPUT@'],
+    env: {'PYTHONPATH': meson.current_build_dir()},
+    command: [python, files('dectree.py'), '@OUTPUT@'],
 )
 hexagon_ss.add(dectree_generated)
 
@@ -173,7 +175,8 @@ hexagon_ss.add(files(
     'printinsn.c',
     'arch.c',
     'fma_emu.c',
-    'conv_emu.c',
+    'mmvec/decode_ext_mmvec.c',
+    'mmvec/system_ext_mmvec.c',
 ))
 
 target_arch += {'hexagon': hexagon_ss}
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c
new file mode 100644
index 00000000000..061a65ab881
--- /dev/null
+++ b/target/hexagon/mmvec/decode_ext_mmvec.c
@@ -0,0 +1,236 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "decode.h"
+#include "opcodes.h"
+#include "insn.h"
+#include "iclass.h"
+#include "mmvec/mmvec.h"
+#include "mmvec/decode_ext_mmvec.h"
+
+static void
+check_new_value(Packet *pkt)
+{
+    /* .new value for a MMVector store */
+    int i, j;
+    const char *reginfo;
+    const char *destletters;
+    const char *dststr = NULL;
+    uint16_t def_opcode;
+    char letter;
+    int def_regnum;
+
+    for (i = 1; i < pkt->num_insns; i++) {
+        uint16_t use_opcode = pkt->insn[i].opcode;
+        if (GET_ATTRIB(use_opcode, A_DOTNEWVALUE) &&
+            GET_ATTRIB(use_opcode, A_CVI) &&
+            GET_ATTRIB(use_opcode, A_STORE)) {
+            int use_regidx = strchr(opcode_reginfo[use_opcode], 's') -
+                opcode_reginfo[use_opcode];
+            /*
+             * What's encoded at the N-field is the offset to who's producing
+             * the value.
+             * Shift off the LSB which indicates odd/even register.
+             */
+            int def_off = ((pkt->insn[i].regno[use_regidx]) >> 1);
+            int def_oreg = pkt->insn[i].regno[use_regidx] & 1;
+            int def_idx = -1;
+            for (j = i - 1; (j >= 0) && (def_off >= 0); j--) {
+                if (!GET_ATTRIB(pkt->insn[j].opcode, A_CVI)) {
+                    continue;
+                }
+                def_off--;
+                if (def_off == 0) {
+                    def_idx = j;
+                    break;
+                }
+            }
+            /*
+             * Check for a badly encoded N-field which points to an instruction
+             * out-of-range
+             */
+            g_assert(!((def_off != 0) || (def_idx < 0) ||
+                       (def_idx > (pkt->num_insns - 1))));
+
+            /* def_idx is the index of the producer */
+            def_opcode = pkt->insn[def_idx].opcode;
+            reginfo = opcode_reginfo[def_opcode];
+            destletters = "dexy";
+            for (j = 0; (letter = destletters[j]) != 0; j++) {
+                dststr = strchr(reginfo, letter);
+                if (dststr != NULL) {
+                    break;
+                }
+            }
+            if ((dststr == NULL)  && GET_ATTRIB(def_opcode, A_CVI_GATHER)) {
+                def_regnum = 0;
+                pkt->insn[i].regno[use_regidx] = def_oreg;
+                pkt->insn[i].new_value_producer_slot = pkt->insn[def_idx].slot;
+            } else {
+                if (dststr == NULL) {
+                    /* still not there, we have a bad packet */
+                    g_assert_not_reached();
+                }
+                def_regnum = pkt->insn[def_idx].regno[dststr - reginfo];
+                /* Now patch up the consumer with the register number */
+                pkt->insn[i].regno[use_regidx] = def_regnum ^ def_oreg;
+                /* special case for (Vx,Vy) */
+                dststr = strchr(reginfo, 'y');
+                if (def_oreg && strchr(reginfo, 'x') && dststr) {
+                    def_regnum = pkt->insn[def_idx].regno[dststr - reginfo];
+                    pkt->insn[i].regno[use_regidx] = def_regnum;
+                }
+                /*
+                 * We need to remember who produces this value to later
+                 * check if it was dynamically cancelled
+                 */
+                pkt->insn[i].new_value_producer_slot = pkt->insn[def_idx].slot;
+            }
+        }
+    }
+}
+
+/*
+ * We don't want to reorder slot1/slot0 with respect to each other.
+ * So in our shuffling, we don't want to move the .cur / .tmp vmem earlier
+ * Instead, we should move the producing instruction later
+ * But the producing instruction might feed a .new store!
+ * So we may need to move that even later.
+ */
+
+static void
+decode_mmvec_move_cvi_to_end(Packet *pkt, int max)
+{
+    int i;
+    for (i = 0; i < max; i++) {
+        if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
+            int last_inst = pkt->num_insns - 1;
+            uint16_t last_opcode = pkt->insn[last_inst].opcode;
+
+            /*
+             * If the last instruction is an endloop, move to the one before it
+             * Keep endloop as the last thing always
+             */
+            if ((last_opcode == J2_endloop0) ||
+                (last_opcode == J2_endloop1) ||
+                (last_opcode == J2_endloop01)) {
+                last_inst--;
+            }
+
+            decode_send_insn_to(pkt, i, last_inst);
+            max--;
+            i--;    /* Retry this index now that packet has rotated */
+        }
+    }
+}
+
+static void
+decode_shuffle_for_execution_vops(Packet *pkt)
+{
+    /*
+     * Sort for .new
+     */
+    int i;
+    for (i = 0; i < pkt->num_insns; i++) {
+        uint16_t opcode = pkt->insn[i].opcode;
+        if (GET_ATTRIB(opcode, A_LOAD) &&
+            (GET_ATTRIB(opcode, A_CVI_NEW) ||
+             GET_ATTRIB(opcode, A_CVI_TMP))) {
+            /*
+             * Find prior consuming vector instructions
+             * Move to end of packet
+             */
+            decode_mmvec_move_cvi_to_end(pkt, i);
+            break;
+        }
+    }
+
+    /* Move HVX new value stores to the end of the packet */
+    for (i = 0; i < pkt->num_insns - 1; i++) {
+        uint16_t opcode = pkt->insn[i].opcode;
+        if (GET_ATTRIB(opcode, A_STORE) &&
+            GET_ATTRIB(opcode, A_CVI_NEW) &&
+            !GET_ATTRIB(opcode, A_CVI_SCATTER_RELEASE)) {
+            int last_inst = pkt->num_insns - 1;
+            uint16_t last_opcode = pkt->insn[last_inst].opcode;
+
+            /*
+             * If the last instruction is an endloop, move to the one before it
+             * Keep endloop as the last thing always
+             */
+            if ((last_opcode == J2_endloop0) ||
+                (last_opcode == J2_endloop1) ||
+                (last_opcode == J2_endloop01)) {
+                last_inst--;
+            }
+
+            decode_send_insn_to(pkt, i, last_inst);
+            break;
+        }
+    }
+}
+
+static void
+check_for_vhist(Packet *pkt)
+{
+    pkt->vhist_insn = NULL;
+    for (int i = 0; i < pkt->num_insns; i++) {
+        Insn *insn = &pkt->insn[i];
+        int opcode = insn->opcode;
+        if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_CVI_4SLOT)) {
+                pkt->vhist_insn = insn;
+                return;
+        }
+    }
+}
+
+/*
+ * Public Functions
+ */
+
+SlotMask mmvec_ext_decode_find_iclass_slots(int opcode)
+{
+    if (GET_ATTRIB(opcode, A_CVI_VM)) {
+        /* HVX memory instruction */
+        if (GET_ATTRIB(opcode, A_RESTRICT_SLOT0ONLY)) {
+            return SLOTS_0;
+        } else if (GET_ATTRIB(opcode, A_RESTRICT_SLOT1ONLY)) {
+            return SLOTS_1;
+        }
+        return SLOTS_01;
+    } else if (GET_ATTRIB(opcode, A_RESTRICT_SLOT2ONLY)) {
+        return SLOTS_2;
+    } else if (GET_ATTRIB(opcode, A_CVI_VX)) {
+        /* HVX multiply instruction */
+        return SLOTS_23;
+    } else if (GET_ATTRIB(opcode, A_CVI_VS_VX)) {
+        /* HVX permute/shift instruction */
+        return SLOTS_23;
+    } else {
+        return SLOTS_0123;
+    }
+}
+
+void mmvec_ext_decode_checks(Packet *pkt, bool disas_only)
+{
+    check_new_value(pkt);
+    if (!disas_only) {
+        decode_shuffle_for_execution_vops(pkt);
+    }
+    check_for_vhist(pkt);
+}
diff --git a/target/hexagon/mmvec/decode_ext_mmvec.h b/target/hexagon/mmvec/decode_ext_mmvec.h
new file mode 100644
index 00000000000..3664b68cf94
--- /dev/null
+++ b/target/hexagon/mmvec/decode_ext_mmvec.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_DECODE_EXT_MMVEC_H
+#define HEXAGON_DECODE_EXT_MMVEC_H
+
+void mmvec_ext_decode_checks(Packet *pkt, bool disas_only);
+SlotMask mmvec_ext_decode_find_iclass_slots(int opcode);
+
+#endif
diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h
new file mode 100644
index 00000000000..10f46303649
--- /dev/null
+++ b/target/hexagon/mmvec/macros.h
@@ -0,0 +1,354 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_MMVEC_MACROS_H
+#define HEXAGON_MMVEC_MACROS_H
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "arch.h"
+#include "mmvec/system_ext_mmvec.h"
+
+#ifndef QEMU_GENERATE
+#define VdV      (*(MMVector *)(VdV_void))
+#define VsV      (*(MMVector *)(VsV_void))
+#define VuV      (*(MMVector *)(VuV_void))
+#define VvV      (*(MMVector *)(VvV_void))
+#define VwV      (*(MMVector *)(VwV_void))
+#define VxV      (*(MMVector *)(VxV_void))
+#define VyV      (*(MMVector *)(VyV_void))
+
+#define VddV     (*(MMVectorPair *)(VddV_void))
+#define VuuV     (*(MMVectorPair *)(VuuV_void))
+#define VvvV     (*(MMVectorPair *)(VvvV_void))
+#define VxxV     (*(MMVectorPair *)(VxxV_void))
+
+#define QeV      (*(MMQReg *)(QeV_void))
+#define QdV      (*(MMQReg *)(QdV_void))
+#define QsV      (*(MMQReg *)(QsV_void))
+#define QtV      (*(MMQReg *)(QtV_void))
+#define QuV      (*(MMQReg *)(QuV_void))
+#define QvV      (*(MMQReg *)(QvV_void))
+#define QxV      (*(MMQReg *)(QxV_void))
+#endif
+
+#define LOG_VTCM_BYTE(VA, MASK, VAL, IDX) \
+    do { \
+        env->vtcm_log.data.ub[IDX] = (VAL); \
+        if (MASK) { \
+            set_bit((IDX), env->vtcm_log.mask); \
+        } else { \
+            clear_bit((IDX), env->vtcm_log.mask); \
+        } \
+        env->vtcm_log.va[IDX] = (VA); \
+    } while (0)
+
+#define fNOTQ(VAL) \
+    ({ \
+        MMQReg _ret;  \
+        int _i_;  \
+        for (_i_ = 0; _i_ < fVECSIZE() / 64; _i_++) { \
+            _ret.ud[_i_] = ~VAL.ud[_i_]; \
+        } \
+        _ret;\
+     })
+#define fGETQBITS(REG, WIDTH, MASK, BITNO) \
+    ((MASK) & (REG.w[(BITNO) >> 5] >> ((BITNO) & 0x1f)))
+#define fGETQBIT(REG, BITNO) fGETQBITS(REG, 1, 1, BITNO)
+#define fGENMASKW(QREG, IDX) \
+    (((fGETQBIT(QREG, (IDX * 4 + 0)) ? 0xFF : 0x0) << 0)  | \
+     ((fGETQBIT(QREG, (IDX * 4 + 1)) ? 0xFF : 0x0) << 8)  | \
+     ((fGETQBIT(QREG, (IDX * 4 + 2)) ? 0xFF : 0x0) << 16) | \
+     ((fGETQBIT(QREG, (IDX * 4 + 3)) ? 0xFF : 0x0) << 24))
+#define fGETNIBBLE(IDX, SRC) (fSXTN(4, 8, (SRC >> (4 * IDX)) & 0xF))
+#define fGETCRUMB(IDX, SRC) (fSXTN(2, 8, (SRC >> (2 * IDX)) & 0x3))
+#define fGETCRUMB_SYMMETRIC(IDX, SRC) \
+    ((fGETCRUMB(IDX, SRC) >= 0 ? (2 - fGETCRUMB(IDX, SRC)) \
+                               : fGETCRUMB(IDX, SRC)))
+#define fGENMASKH(QREG, IDX) \
+    (((fGETQBIT(QREG, (IDX * 2 + 0)) ? 0xFF : 0x0) << 0) | \
+     ((fGETQBIT(QREG, (IDX * 2 + 1)) ? 0xFF : 0x0) << 8))
+#define fGETMASKW(VREG, QREG, IDX) (VREG.w[IDX] & fGENMASKW((QREG), IDX))
+#define fGETMASKH(VREG, QREG, IDX) (VREG.h[IDX] & fGENMASKH((QREG), IDX))
+#define fCONDMASK8(QREG, IDX, YESVAL, NOVAL) \
+    (fGETQBIT(QREG, IDX) ? (YESVAL) : (NOVAL))
+#define fCONDMASK16(QREG, IDX, YESVAL, NOVAL) \
+    ((fGENMASKH(QREG, IDX) & (YESVAL)) | \
+     (fGENMASKH(fNOTQ(QREG), IDX) & (NOVAL)))
+#define fCONDMASK32(QREG, IDX, YESVAL, NOVAL) \
+    ((fGENMASKW(QREG, IDX) & (YESVAL)) | \
+     (fGENMASKW(fNOTQ(QREG), IDX) & (NOVAL)))
+#define fSETQBITS(REG, WIDTH, MASK, BITNO, VAL) \
+    do { \
+        uint32_t __TMP = (VAL); \
+        REG.w[(BITNO) >> 5] &= ~((MASK) << ((BITNO) & 0x1f)); \
+        REG.w[(BITNO) >> 5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); \
+    } while (0)
+#define fSETQBIT(REG, BITNO, VAL) fSETQBITS(REG, 1, 1, BITNO, VAL)
+#define fVBYTES() (fVECSIZE())
+#define fVALIGN(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR & ~(LOG2_ALIGNMENT - 1))
+#define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR | (LOG2_ALIGNMENT - 1))
+#define fVELEM(WIDTH) ((fVECSIZE() * 8) / WIDTH)
+#define fVECLOGSIZE() (7)
+#define fVECSIZE() (1 << fVECLOGSIZE())
+#define fSWAPB(A, B) do { uint8_t tmp = A; A = B; B = tmp; } while (0)
+#define fV_AL_CHECK(EA, MASK) \
+    if ((EA) & (MASK)) { \
+        warn("aligning misaligned vector. EA=%08x", (EA)); \
+    }
+#define fSCATTER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
+    mem_vector_scatter_init(env)
+#define fGATHER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
+    mem_vector_gather_init(env)
+#define fSCATTER_FINISH(OP)
+#define fGATHER_FINISH()
+#define fLOG_SCATTER_OP(SIZE) \
+    do { \
+        env->vtcm_log.op = true; \
+        env->vtcm_log.op_size = SIZE; \
+    } while (0)
+#define fVLOG_VTCM_WORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
+    do { \
+        int log_byte = 0; \
+        target_ulong va = EA; \
+        target_ulong va_high = EA + LEN; \
+        for (int i0 = 0; i0 < 4; i0++) { \
+            log_byte = (va + i0) <= va_high; \
+            LOG_VTCM_BYTE(va + i0, log_byte, INC. ub[4 * IDX + i0], \
+                          4 * IDX + i0); \
+        } \
+    } while (0)
+#define fVLOG_VTCM_HALFWORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
+    do { \
+        int log_byte = 0; \
+        target_ulong va = EA; \
+        target_ulong va_high = EA + LEN; \
+        for (int i0 = 0; i0 < 2; i0++) { \
+            log_byte = (va + i0) <= va_high; \
+            LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
+                          2 * IDX + i0); \
+        } \
+    } while (0)
+
+#define fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA, OFFSET, INC, IDX, IDX2, IDX_H, \
+                                         ALIGNMENT, LEN) \
+    do { \
+        int log_byte = 0; \
+        target_ulong va = EA; \
+        target_ulong va_high = EA + LEN; \
+        for (int i0 = 0; i0 < 2; i0++) { \
+            log_byte = (va + i0) <= va_high; \
+            LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
+                          2 * IDX + i0); \
+        } \
+    } while (0)
+
+/* NOTE - Will this always be tmp_VRegs[0]; */
+#define GATHER_FUNCTION(EA, OFFSET, IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL) \
+    do { \
+        int i0; \
+        target_ulong va = EA; \
+        target_ulong va_high = EA + LEN; \
+        uintptr_t ra = GETPC(); \
+        int log_bank = 0; \
+        int log_byte = 0; \
+        for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \
+            log_byte = ((va + i0) <= va_high) && QVAL; \
+            log_bank |= (log_byte << i0); \
+            uint8_t B; \
+            B = cpu_ldub_data_ra(env, EA + i0, ra); \
+            env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \
+            LOG_VTCM_BYTE(va + i0, log_byte, B, ELEMENT_SIZE * IDX + i0); \
+        } \
+    } while (0)
+#define fVLOG_VTCM_GATHER_WORD(EA, OFFSET, IDX, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1); \
+    } while (0)
+#define fVLOG_VTCM_GATHER_HALFWORD(EA, OFFSET, IDX, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1); \
+    } while (0)
+#define fVLOG_VTCM_GATHER_HALFWORD_DV(EA, OFFSET, IDX, IDX2, IDX_H, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), 1); \
+    } while (0)
+#define fVLOG_VTCM_GATHER_WORDQ(EA, OFFSET, IDX, Q, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
+                        fGETQBIT(QsV, 4 * IDX + i0)); \
+    } while (0)
+#define fVLOG_VTCM_GATHER_HALFWORDQ(EA, OFFSET, IDX, Q, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
+                        fGETQBIT(QsV, 2 * IDX + i0)); \
+    } while (0)
+#define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, OFFSET, IDX, IDX2, IDX_H, Q, LEN) \
+    do { \
+        GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
+                        fGETQBIT(QsV, 2 * IDX + i0)); \
+    } while (0)
+#define SCATTER_OP_WRITE_TO_MEM(TYPE) \
+    do { \
+        uintptr_t ra = GETPC(); \
+        for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
+            if (test_bit(i, env->vtcm_log.mask)) { \
+                TYPE dst = 0; \
+                TYPE inc = 0; \
+                for (int j = 0; j < sizeof(TYPE); j++) { \
+                    uint8_t val; \
+                    val = cpu_ldub_data_ra(env, env->vtcm_log.va[i + j], ra); \
+                    dst |= val << (8 * j); \
+                    inc |= env->vtcm_log.data.ub[j + i] << (8 * j); \
+                    clear_bit(j + i, env->vtcm_log.mask); \
+                    env->vtcm_log.data.ub[j + i] = 0; \
+                } \
+                dst += inc; \
+                for (int j = 0; j < sizeof(TYPE); j++) { \
+                    cpu_stb_data_ra(env, env->vtcm_log.va[i + j], \
+                                    (dst >> (8 * j)) & 0xFF, ra); \
+                } \
+            } \
+        } \
+    } while (0)
+#define SCATTER_OP_PROBE_MEM(TYPE, MMU_IDX, RETADDR) \
+    do { \
+        for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
+            if (test_bit(i, env->vtcm_log.mask)) { \
+                for (int j = 0; j < sizeof(TYPE); j++) { \
+                    probe_read(env, env->vtcm_log.va[i + j], 1, \
+                               MMU_IDX, RETADDR); \
+                    probe_write(env, env->vtcm_log.va[i + j], 1, \
+                                MMU_IDX, RETADDR); \
+                } \
+            } \
+        } \
+    } while (0)
+#define SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, ELEM_SIZE, BANK_IDX, QVAL, IN) \
+    do { \
+        int i0; \
+        target_ulong va = EA; \
+        target_ulong va_high = EA + LEN; \
+        int log_bank = 0; \
+        int log_byte = 0; \
+        for (i0 = 0; i0 < ELEM_SIZE; i0++) { \
+            log_byte = ((va + i0) <= va_high) && QVAL; \
+            log_bank |= (log_byte << i0); \
+            LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \
+                          ELEM_SIZE * IDX + i0); \
+        } \
+    } while (0)
+#define fVLOG_VTCM_HALFWORD(EA, OFFSET, IN, IDX, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1, IN); \
+    } while (0)
+#define fVLOG_VTCM_WORD(EA, OFFSET, IN, IDX, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1, IN); \
+    } while (0)
+#define fVLOG_VTCM_HALFWORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
+                         fGETQBIT(QsV, 2 * IDX + i0), IN); \
+    } while (0)
+#define fVLOG_VTCM_WORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
+                         fGETQBIT(QsV, 4 * IDX + i0), IN); \
+    } while (0)
+#define fVLOG_VTCM_HALFWORD_DV(EA, OFFSET, IN, IDX, IDX2, IDX_H, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, \
+                         (2 * IDX2 + IDX_H), 1, IN); \
+    } while (0)
+#define fVLOG_VTCM_HALFWORDQ_DV(EA, OFFSET, IN, IDX, Q, IDX2, IDX_H, LEN) \
+    do { \
+        SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
+                         fGETQBIT(QsV, 2 * IDX + i0), IN); \
+    } while (0)
+#define fSTORERELEASE(EA, TYPE) \
+    do { \
+        fV_AL_CHECK(EA, fVECSIZE() - 1); \
+    } while (0)
+#ifdef QEMU_GENERATE
+#define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true)
+#endif
+#ifdef QEMU_GENERATE
+#define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false)
+#endif
+#ifdef QEMU_GENERATE
+#define fSTOREMMV(EA, SRC) \
+    gen_vreg_store(ctx, insn, pkt, EA, SRC##_off, insn->slot, true)
+#endif
+#ifdef QEMU_GENERATE
+#define fSTOREMMVQ(EA, SRC, MASK) \
+    gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false)
+#endif
+#ifdef QEMU_GENERATE
+#define fSTOREMMVNQ(EA, SRC, MASK) \
+    gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true)
+#endif
+#ifdef QEMU_GENERATE
+#define fSTOREMMVU(EA, SRC) \
+    gen_vreg_store(ctx, insn, pkt, EA, SRC##_off, insn->slot, false)
+#endif
+#define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++)
+#define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) \
+    ARRAY.v[(INDEX) / (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % \
+    (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))]
+
+#define fVSATDW(U, V) fVSATW(((((long long)U) << 32) | fZXTN(32, 64, V)))
+#define fVASL_SATHI(U, V) fVSATW(((U) << 1) | ((V) >> 31))
+#define fVUADDSAT(WIDTH, U, V) \
+    fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V))
+#define fVSADDSAT(WIDTH, U, V) \
+    fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V))
+#define fVUSUBSAT(WIDTH, U, V) \
+    fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V))
+#define fVSSUBSAT(WIDTH, U, V) \
+    fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V))
+#define fVAVGU(WIDTH, U, V) \
+    ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
+#define fVAVGURND(WIDTH, U, V) \
+    ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
+#define fVNAVGU(WIDTH, U, V) \
+    ((fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
+#define fVNAVGURNDSAT(WIDTH, U, V) \
+    fVSATUN(WIDTH, ((fZXTN(WIDTH, 2 * WIDTH, U) - \
+                     fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
+#define fVAVGS(WIDTH, U, V) \
+    ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
+#define fVAVGSRND(WIDTH, U, V) \
+    ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
+#define fVNAVGS(WIDTH, U, V) \
+    ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
+#define fVNAVGSRND(WIDTH, U, V) \
+    ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
+#define fVNAVGSRNDSAT(WIDTH, U, V) \
+    fVSATN(WIDTH, ((fSXTN(WIDTH, 2 * WIDTH, U) - \
+                    fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
+#define fVNOROUND(VAL, SHAMT) VAL
+#define fVNOSAT(VAL) VAL
+#define fVROUND(VAL, SHAMT) \
+    ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
+#define fCARRY_FROM_ADD32(A, B, C) \
+    (((fZXTN(32, 64, A) + fZXTN(32, 64, B) + C) >> 32) & 1)
+#define fUARCH_NOTE_PUMP_4X()
+#define fUARCH_NOTE_PUMP_2X()
+
+#define IV1DEAD()
+#endif
diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h
new file mode 100644
index 00000000000..52d470709c0
--- /dev/null
+++ b/target/hexagon/mmvec/mmvec.h
@@ -0,0 +1,82 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_MMVEC_H
+#define HEXAGON_MMVEC_H
+
+#define MAX_VEC_SIZE_LOGBYTES 7
+#define MAX_VEC_SIZE_BYTES  (1 << MAX_VEC_SIZE_LOGBYTES)
+
+#define NUM_VREGS           32
+#define NUM_QREGS           4
+
+typedef uint32_t VRegMask; /* at least NUM_VREGS bits */
+typedef uint32_t QRegMask; /* at least NUM_QREGS bits */
+
+#define VECTOR_SIZE_BYTE    (fVECSIZE())
+
+typedef union {
+    uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
+    int64_t   d[MAX_VEC_SIZE_BYTES / 8];
+    uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+    int32_t   w[MAX_VEC_SIZE_BYTES / 4];
+    uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+    int16_t   h[MAX_VEC_SIZE_BYTES / 2];
+    uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
+    int8_t    b[MAX_VEC_SIZE_BYTES / 1];
+} MMVector;
+
+typedef union {
+    uint64_t ud[2 * MAX_VEC_SIZE_BYTES / 8];
+    int64_t   d[2 * MAX_VEC_SIZE_BYTES / 8];
+    uint32_t uw[2 * MAX_VEC_SIZE_BYTES / 4];
+    int32_t   w[2 * MAX_VEC_SIZE_BYTES / 4];
+    uint16_t uh[2 * MAX_VEC_SIZE_BYTES / 2];
+    int16_t   h[2 * MAX_VEC_SIZE_BYTES / 2];
+    uint8_t  ub[2 * MAX_VEC_SIZE_BYTES / 1];
+    int8_t    b[2 * MAX_VEC_SIZE_BYTES / 1];
+    MMVector v[2];
+} MMVectorPair;
+
+typedef union {
+    uint64_t ud[MAX_VEC_SIZE_BYTES / 8 / 8];
+    int64_t   d[MAX_VEC_SIZE_BYTES / 8 / 8];
+    uint32_t uw[MAX_VEC_SIZE_BYTES / 4 / 8];
+    int32_t   w[MAX_VEC_SIZE_BYTES / 4 / 8];
+    uint16_t uh[MAX_VEC_SIZE_BYTES / 2 / 8];
+    int16_t   h[MAX_VEC_SIZE_BYTES / 2 / 8];
+    uint8_t  ub[MAX_VEC_SIZE_BYTES / 1 / 8];
+    int8_t    b[MAX_VEC_SIZE_BYTES / 1 / 8];
+} MMQReg;
+
+typedef struct {
+    MMVector data;
+    DECLARE_BITMAP(mask, MAX_VEC_SIZE_BYTES);
+    target_ulong va[MAX_VEC_SIZE_BYTES];
+    bool op;
+    int op_size;
+} VTCMStoreLog;
+
+
+/* Types of vector register assignment */
+typedef enum {
+    EXT_DFL,      /* Default */
+    EXT_NEW,      /* New - value used in the same packet */
+    EXT_TMP       /* Temp - value used but not stored to register */
+} VRegWriteType;
+
+#endif
diff --git a/target/hexagon/mmvec/system_ext_mmvec.c b/target/hexagon/mmvec/system_ext_mmvec.c
new file mode 100644
index 00000000000..8351f2cc01b
--- /dev/null
+++ b/target/hexagon/mmvec/system_ext_mmvec.c
@@ -0,0 +1,47 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "mmvec/system_ext_mmvec.h"
+
+void mem_gather_store(CPUHexagonState *env, target_ulong vaddr, int slot)
+{
+    size_t size = sizeof(MMVector);
+
+    env->vstore_pending[slot] = 1;
+    env->vstore[slot].va   = vaddr;
+    env->vstore[slot].size = size;
+    memcpy(&env->vstore[slot].data.ub[0], &env->tmp_VRegs[0], size);
+
+    /* On a gather store, overwrite the store mask to emulate dropped gathers */
+    bitmap_copy(env->vstore[slot].mask, env->vtcm_log.mask, size);
+}
+
+void mem_vector_scatter_init(CPUHexagonState *env)
+{
+    bitmap_zero(env->vtcm_log.mask, MAX_VEC_SIZE_BYTES);
+
+    env->vtcm_pending = true;
+    env->vtcm_log.op = false;
+    env->vtcm_log.op_size = 0;
+}
+
+void mem_vector_gather_init(CPUHexagonState *env)
+{
+    bitmap_zero(env->vtcm_log.mask, MAX_VEC_SIZE_BYTES);
+}
diff --git a/target/hexagon/mmvec/system_ext_mmvec.h b/target/hexagon/mmvec/system_ext_mmvec.h
new file mode 100644
index 00000000000..bcefbffdf29
--- /dev/null
+++ b/target/hexagon/mmvec/system_ext_mmvec.h
@@ -0,0 +1,25 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HEXAGON_SYSTEM_EXT_MMVEC_H
+#define HEXAGON_SYSTEM_EXT_MMVEC_H
+
+void mem_gather_store(CPUHexagonState *env, target_ulong vaddr, int slot);
+void mem_vector_scatter_init(CPUHexagonState *env);
+void mem_vector_gather_init(CPUHexagonState *env);
+
+#endif
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 2c6d7185792..057baf9a48f 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -16,7 +16,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu.h"
+#include "qemu/log.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
 #include "fpu/softfloat.h"
 #include "cpu.h"
@@ -25,7 +27,8 @@
 #include "arch.h"
 #include "hex_arch_types.h"
 #include "fma_emu.h"
-#include "conv_emu.h"
+#include "mmvec/mmvec.h"
+#include "mmvec/macros.h"
 
 #define SF_BIAS        127
 #define SF_MANTBITS    23
@@ -35,7 +38,7 @@ static void QEMU_NORETURN do_raise_exception_err(CPUHexagonState *env,
                                                  uint32_t exception,
                                                  uintptr_t pc)
 {
-    CPUState *cs = CPU(hexagon_env_get_cpu(env));
+    CPUState *cs = env_cpu(env);
     qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
     cs->exception_index = exception;
     cpu_loop_exit_restore(cs, pc);
@@ -46,8 +49,8 @@ void QEMU_NORETURN HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
     do_raise_exception_err(env, excp, 0);
 }
 
-static inline void log_reg_write(CPUHexagonState *env, int rnum,
-                                 target_ulong val, uint32_t slot)
+static void log_reg_write(CPUHexagonState *env, int rnum,
+                          target_ulong val, uint32_t slot)
 {
     HEX_DEBUG_LOG("log_reg_write[%d] = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")",
                   rnum, val, val);
@@ -57,14 +60,13 @@ static inline void log_reg_write(CPUHexagonState *env, int rnum,
     HEX_DEBUG_LOG("\n");
 
     env->new_value[rnum] = val;
-#if HEX_DEBUG
-    /* Do this so HELPER(debug_commit_end) will know */
-    env->reg_written[rnum] = 1;
-#endif
+    if (HEX_DEBUG) {
+        /* Do this so HELPER(debug_commit_end) will know */
+        env->reg_written[rnum] = 1;
+    }
 }
 
-static inline void log_pred_write(CPUHexagonState *env, int pnum,
-                                  target_ulong val)
+static void log_pred_write(CPUHexagonState *env, int pnum, target_ulong val)
 {
     HEX_DEBUG_LOG("log_pred_write[%d] = " TARGET_FMT_ld
                   " (0x" TARGET_FMT_lx ")\n",
@@ -79,8 +81,8 @@ static inline void log_pred_write(CPUHexagonState *env, int pnum,
     }
 }
 
-static inline void log_store32(CPUHexagonState *env, target_ulong addr,
-                               target_ulong val, int width, int slot)
+static void log_store32(CPUHexagonState *env, target_ulong addr,
+                        target_ulong val, int width, int slot)
 {
     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
                   ", %" PRId32 " [0x08%" PRIx32 "])\n",
@@ -90,8 +92,8 @@ static inline void log_store32(CPUHexagonState *env, target_ulong addr,
     env->mem_log_stores[slot].data32 = val;
 }
 
-static inline void log_store64(CPUHexagonState *env, target_ulong addr,
-                               int64_t val, int width, int slot)
+static void log_store64(CPUHexagonState *env, target_ulong addr,
+                        int64_t val, int width, int slot)
 {
     HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
                   ", %" PRId64 " [0x016%" PRIx64 "])\n",
@@ -101,7 +103,7 @@ static inline void log_store64(CPUHexagonState *env, target_ulong addr,
     env->mem_log_stores[slot].data64 = val;
 }
 
-static inline void write_new_pc(CPUHexagonState *env, target_ulong addr)
+static void write_new_pc(CPUHexagonState *env, target_ulong addr)
 {
     HEX_DEBUG_LOG("write_new_pc(0x" TARGET_FMT_lx ")\n", addr);
 
@@ -119,7 +121,6 @@ static inline void write_new_pc(CPUHexagonState *env, target_ulong addr)
     }
 }
 
-#if HEX_DEBUG
 /* Handy place to set a breakpoint */
 void HELPER(debug_start_packet)(CPUHexagonState *env)
 {
@@ -130,14 +131,7 @@ void HELPER(debug_start_packet)(CPUHexagonState *env)
         env->reg_written[i] = 0;
     }
 }
-#endif
-
-static inline int32_t new_pred_value(CPUHexagonState *env, int pnum)
-{
-    return env->new_pred_value[pnum];
-}
 
-#if HEX_DEBUG
 /* Checks for bookkeeping errors between disassembly context and runtime */
 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
 {
@@ -147,33 +141,82 @@ void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
         g_assert_not_reached();
     }
 }
-#endif
 
 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
 {
-    switch (env->mem_log_stores[slot_num].width) {
+    uintptr_t ra = GETPC();
+    uint8_t width = env->mem_log_stores[slot_num].width;
+    target_ulong va = env->mem_log_stores[slot_num].va;
+
+    switch (width) {
     case 1:
-        put_user_u8(env->mem_log_stores[slot_num].data32,
-                    env->mem_log_stores[slot_num].va);
+        cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
         break;
     case 2:
-        put_user_u16(env->mem_log_stores[slot_num].data32,
-                     env->mem_log_stores[slot_num].va);
+        cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
         break;
     case 4:
-        put_user_u32(env->mem_log_stores[slot_num].data32,
-                     env->mem_log_stores[slot_num].va);
+        cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
         break;
     case 8:
-        put_user_u64(env->mem_log_stores[slot_num].data64,
-                     env->mem_log_stores[slot_num].va);
+        cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
         break;
     default:
         g_assert_not_reached();
     }
 }
 
-#if HEX_DEBUG
+void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
+{
+    mem_gather_store(env, addr, slot);
+}
+
+void HELPER(commit_hvx_stores)(CPUHexagonState *env)
+{
+    uintptr_t ra = GETPC();
+    int i;
+
+    /* Normal (possibly masked) vector store */
+    for (i = 0; i < VSTORES_MAX; i++) {
+        if (env->vstore_pending[i]) {
+            env->vstore_pending[i] = 0;
+            target_ulong va = env->vstore[i].va;
+            int size = env->vstore[i].size;
+            for (int j = 0; j < size; j++) {
+                if (test_bit(j, env->vstore[i].mask)) {
+                    cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
+                }
+            }
+        }
+    }
+
+    /* Scatter store */
+    if (env->vtcm_pending) {
+        env->vtcm_pending = false;
+        if (env->vtcm_log.op) {
+            /* Need to perform the scatter read/modify/write at commit time */
+            if (env->vtcm_log.op_size == 2) {
+                SCATTER_OP_WRITE_TO_MEM(uint16_t);
+            } else if (env->vtcm_log.op_size == 4) {
+                /* Word Scatter += */
+                SCATTER_OP_WRITE_TO_MEM(uint32_t);
+            } else {
+                g_assert_not_reached();
+            }
+        } else {
+            for (i = 0; i < sizeof(MMVector); i++) {
+                if (test_bit(i, env->vtcm_log.mask)) {
+                    cpu_stb_data_ra(env, env->vtcm_log.va[i],
+                                    env->vtcm_log.data.ub[i], ra);
+                    clear_bit(i, env->vtcm_log.mask);
+                    env->vtcm_log.data.ub[i] = 0;
+                }
+
+            }
+        }
+    }
+}
+
 static void print_store(CPUHexagonState *env, int slot)
 {
     if (!(env->slot_cancelled & (1 << slot))) {
@@ -252,19 +295,33 @@ void HELPER(debug_commit_end)(CPUHexagonState *env, int has_st0, int has_st1)
     HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->next_PC);
     HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
                   ", insn = " TARGET_FMT_lx
-                  "\n",
+                  ", hvx = " TARGET_FMT_lx "\n",
                   env->gpr[HEX_REG_QEMU_PKT_CNT],
-                  env->gpr[HEX_REG_QEMU_INSN_CNT]);
+                  env->gpr[HEX_REG_QEMU_INSN_CNT],
+                  env->gpr[HEX_REG_QEMU_HVX_CNT]);
 
 }
-#endif
 
-static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
+int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
 {
-    int32_t length = M & 0x0001ffff;
+    int32_t K_const = sextract32(M, 24, 4);
+    int32_t length = sextract32(M, 0, 17);
     uint32_t new_ptr = RxV + offset;
-    uint32_t start_addr = CS;
-    uint32_t end_addr = start_addr + length;
+    uint32_t start_addr;
+    uint32_t end_addr;
+
+    if (K_const == 0 && length >= 4) {
+        start_addr = CS;
+        end_addr = start_addr + length;
+    } else {
+        /*
+         * Versions v3 and earlier used the K value to specify a power-of-2 size
+         * 2^(K+2) that is greater than the buffer length
+         */
+        int32_t mask = (1 << (K_const + 2)) - 1;
+        start_addr = RxV & (~mask);
+        end_addr = start_addr | length;
+    }
 
     if (new_ptr >= end_addr) {
         new_ptr -= length;
@@ -275,46 +332,178 @@ static int32_t fcircadd_v4(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
     return new_ptr;
 }
 
-int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
+uint32_t HELPER(fbrev)(uint32_t addr)
 {
-    int32_t K_const = (M >> 24) & 0xf;
-    int32_t length = M & 0x1ffff;
-    int32_t mask = (1 << (K_const + 2)) - 1;
-    uint32_t new_ptr = RxV + offset;
-    uint32_t start_addr = RxV & (~mask);
-    uint32_t end_addr = start_addr | length;
+    /*
+     *  Bit reverse the low 16 bits of the address
+     */
+    return deposit32(addr, 0, 16, revbit16(addr));
+}
 
-    if (K_const == 0 && length >= 4) {
-        return fcircadd_v4(RxV, offset, M, CS);
+static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
+{
+    return make_float32(
+        ((sign & 1) << 31) |
+        ((exp & 0xff) << SF_MANTBITS) |
+        (mant & ((1 << SF_MANTBITS) - 1)));
+}
+
+/*
+ * sfrecipa, sfinvsqrta have two 32-bit results
+ *     r0,p0=sfrecipa(r1,r2)
+ *     r0,p0=sfinvsqrta(r1)
+ *
+ * Since helpers can only return a single value, we pack the two results
+ * into a 64-bit value.
+ */
+uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
+{
+    int32_t PeV = 0;
+    float32 RdV;
+    int idx;
+    int adjust;
+    int mant;
+    int exp;
+
+    arch_fpop_start(env);
+    if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
+        PeV = adjust;
+        idx = (RtV >> 16) & 0x7f;
+        mant = (recip_lookup_table[idx] << 15) | 1;
+        exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
+        RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
     }
+    arch_fpop_end(env);
+    return ((uint64_t)RdV << 32) | PeV;
+}
 
-    if (new_ptr >= end_addr) {
-        new_ptr -= length;
-    } else if (new_ptr < start_addr) {
-        new_ptr += length;
+uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
+{
+    int PeV = 0;
+    float32 RdV;
+    int idx;
+    int adjust;
+    int mant;
+    int exp;
+
+    arch_fpop_start(env);
+    if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
+        PeV = adjust;
+        idx = (RsV >> 17) & 0x7f;
+        mant = (invsqrt_lookup_table[idx] << 15);
+        exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
+        RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
+    }
+    arch_fpop_end(env);
+    return ((uint64_t)RdV << 32) | PeV;
+}
+
+int64_t HELPER(vacsh_val)(CPUHexagonState *env,
+                           int64_t RxxV, int64_t RssV, int64_t RttV)
+{
+    for (int i = 0; i < 4; i++) {
+        int xv = sextract64(RxxV, i * 16, 16);
+        int sv = sextract64(RssV, i * 16, 16);
+        int tv = sextract64(RttV, i * 16, 16);
+        int max;
+        xv = xv + tv;
+        sv = sv - tv;
+        max = xv > sv ? xv : sv;
+        /* Note that fSATH can set the OVF bit in usr */
+        RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
     }
+    return RxxV;
+}
 
-    return new_ptr;
+int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
+                           int64_t RxxV, int64_t RssV, int64_t RttV)
+{
+    int32_t PeV = 0;
+    for (int i = 0; i < 4; i++) {
+        int xv = sextract64(RxxV, i * 16, 16);
+        int sv = sextract64(RssV, i * 16, 16);
+        int tv = sextract64(RttV, i * 16, 16);
+        xv = xv + tv;
+        sv = sv - tv;
+        PeV = deposit32(PeV, i * 2, 1, (xv > sv));
+        PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
+    }
+    return PeV;
 }
 
-/*
- * Hexagon FP operations return ~0 insteat of NaN
- * The hex_check_sfnan/hex_check_dfnan functions perform this check
- */
-static float32 hex_check_sfnan(float32 x)
+static void probe_store(CPUHexagonState *env, int slot, int mmu_idx)
 {
-    if (float32_is_any_nan(x)) {
-        return make_float32(0xFFFFFFFFU);
+    if (!(env->slot_cancelled & (1 << slot))) {
+        size1u_t width = env->mem_log_stores[slot].width;
+        target_ulong va = env->mem_log_stores[slot].va;
+        uintptr_t ra = GETPC();
+        probe_write(env, va, width, mmu_idx, ra);
     }
-    return x;
 }
 
-static float64 hex_check_dfnan(float64 x)
+/* Called during packet commit when there are two scalar stores */
+void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int mmu_idx)
 {
-    if (float64_is_any_nan(x)) {
-        return make_float64(0xFFFFFFFFFFFFFFFFULL);
+    probe_store(env, 0, mmu_idx);
+}
+
+void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
+{
+    uintptr_t retaddr = GETPC();
+    int i;
+
+    /* Normal (possibly masked) vector store */
+    for (i = 0; i < VSTORES_MAX; i++) {
+        if (env->vstore_pending[i]) {
+            target_ulong va = env->vstore[i].va;
+            int size = env->vstore[i].size;
+            for (int j = 0; j < size; j++) {
+                if (test_bit(j, env->vstore[i].mask)) {
+                    probe_write(env, va + j, 1, mmu_idx, retaddr);
+                }
+            }
+        }
+    }
+
+    /* Scatter store */
+    if (env->vtcm_pending) {
+        if (env->vtcm_log.op) {
+            /* Need to perform the scatter read/modify/write at commit time */
+            if (env->vtcm_log.op_size == 2) {
+                SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
+            } else if (env->vtcm_log.op_size == 4) {
+                /* Word Scatter += */
+                SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
+            } else {
+                g_assert_not_reached();
+            }
+        } else {
+            for (int i = 0; i < sizeof(MMVector); i++) {
+                if (test_bit(i, env->vtcm_log.mask)) {
+                    probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
+                }
+
+            }
+        }
+    }
+}
+
+void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask,
+                                         int mmu_idx)
+{
+    bool has_st0        = (mask >> 0) & 1;
+    bool has_st1        = (mask >> 1) & 1;
+    bool has_hvx_stores = (mask >> 2) & 1;
+
+    if (has_st0) {
+        probe_store(env, 0, mmu_idx);
+    }
+    if (has_st1) {
+        probe_store(env, 1, mmu_idx);
+    }
+    if (has_hvx_stores) {
+        HELPER(probe_hvx_stores)(env, mmu_idx);
     }
-    return x;
 }
 
 /*
@@ -332,40 +521,36 @@ static void check_noshuf(CPUHexagonState *env, uint32_t slot)
     }
 }
 
-static inline uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
-                                target_ulong vaddr)
+static uint8_t mem_load1(CPUHexagonState *env, uint32_t slot,
+                         target_ulong vaddr)
 {
-    uint8_t retval;
+    uintptr_t ra = GETPC();
     check_noshuf(env, slot);
-    get_user_u8(retval, vaddr);
-    return retval;
+    return cpu_ldub_data_ra(env, vaddr, ra);
 }
 
-static inline uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
-                                 target_ulong vaddr)
+static uint16_t mem_load2(CPUHexagonState *env, uint32_t slot,
+                          target_ulong vaddr)
 {
-    uint16_t retval;
+    uintptr_t ra = GETPC();
     check_noshuf(env, slot);
-    get_user_u16(retval, vaddr);
-    return retval;
+    return cpu_lduw_data_ra(env, vaddr, ra);
 }
 
-static inline uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
-                                 target_ulong vaddr)
+static uint32_t mem_load4(CPUHexagonState *env, uint32_t slot,
+                          target_ulong vaddr)
 {
-    uint32_t retval;
+    uintptr_t ra = GETPC();
     check_noshuf(env, slot);
-    get_user_u32(retval, vaddr);
-    return retval;
+    return cpu_ldl_data_ra(env, vaddr, ra);
 }
 
-static inline uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
-                                 target_ulong vaddr)
+static uint64_t mem_load8(CPUHexagonState *env, uint32_t slot,
+                          target_ulong vaddr)
 {
-    uint64_t retval;
+    uintptr_t ra = GETPC();
     check_noshuf(env, slot);
-    get_user_u64(retval, vaddr);
-    return retval;
+    return cpu_ldq_data_ra(env, vaddr, ra);
 }
 
 /* Floating point */
@@ -374,7 +559,6 @@ float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
     float64 out_f64;
     arch_fpop_start(env);
     out_f64 = float32_to_float64(RsV, &env->fp_status);
-    out_f64 = hex_check_dfnan(out_f64);
     arch_fpop_end(env);
     return out_f64;
 }
@@ -384,7 +568,6 @@ float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
     float32 out_f32;
     arch_fpop_start(env);
     out_f32 = float64_to_float32(RssV, &env->fp_status);
-    out_f32 = hex_check_sfnan(out_f32);
     arch_fpop_end(env);
     return out_f32;
 }
@@ -394,7 +577,6 @@ float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = uint32_to_float32(RsV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -404,7 +586,6 @@ float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = uint32_to_float64(RsV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -414,7 +595,6 @@ float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = int32_to_float32(RsV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -424,7 +604,6 @@ float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = int32_to_float64(RsV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -434,7 +613,6 @@ float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = uint64_to_float32(RssV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -444,7 +622,6 @@ float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = uint64_to_float64(RssV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -454,7 +631,6 @@ float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = int64_to_float32(RssV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -464,16 +640,21 @@ float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = int64_to_float64(RssV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
 
-int32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
+uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
 {
-    int32_t RdV;
+    uint32_t RdV;
     arch_fpop_start(env);
-    RdV = conv_sf_to_4u(RsV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = 0;
+    } else {
+        RdV = float32_to_uint32(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
@@ -482,16 +663,28 @@ int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
 {
     int32_t RdV;
     arch_fpop_start(env);
-    RdV = conv_sf_to_4s(RsV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = -1;
+    } else {
+        RdV = float32_to_int32(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
 
-int64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
+uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
 {
-    int64_t RddV;
+    uint64_t RddV;
     arch_fpop_start(env);
-    RddV = conv_sf_to_8u(RsV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = 0;
+    } else {
+        RddV = float32_to_uint64(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
@@ -500,16 +693,28 @@ int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
 {
     int64_t RddV;
     arch_fpop_start(env);
-    RddV = conv_sf_to_8s(RsV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = -1;
+    } else {
+        RddV = float32_to_int64(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
 
-int32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
+uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
 {
-    int32_t RdV;
+    uint32_t RdV;
     arch_fpop_start(env);
-    RdV = conv_df_to_4u(RssV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = 0;
+    } else {
+        RdV = float64_to_uint32(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
@@ -518,16 +723,28 @@ int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
 {
     int32_t RdV;
     arch_fpop_start(env);
-    RdV = conv_df_to_4s(RssV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = -1;
+    } else {
+        RdV = float64_to_int32(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
 
-int64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
+uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
 {
-    int64_t RddV;
+    uint64_t RddV;
     arch_fpop_start(env);
-    RddV = conv_df_to_8u(RssV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = 0;
+    } else {
+        RddV = float64_to_uint64(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
@@ -536,17 +753,28 @@ int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
 {
     int64_t RddV;
     arch_fpop_start(env);
-    RddV = conv_df_to_8s(RssV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = -1;
+    } else {
+        RddV = float64_to_int64(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
 
-int32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
+uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
 {
-    int32_t RdV;
+    uint32_t RdV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RdV = conv_sf_to_4u(RsV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = 0;
+    } else {
+        RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
@@ -555,18 +783,28 @@ int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
 {
     int32_t RdV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RdV = conv_sf_to_4s(RsV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = -1;
+    } else {
+        RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
 
-int64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
+uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
 {
-    int64_t RddV;
+    uint64_t RddV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RddV = conv_sf_to_8u(RsV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = 0;
+    } else {
+        RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
@@ -575,18 +813,28 @@ int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
 {
     int64_t RddV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RddV = conv_sf_to_8s(RsV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float32_is_any_nan(RsV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = -1;
+    } else {
+        RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
 
-int32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
+uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
 {
-    int32_t RdV;
+    uint32_t RdV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RdV = conv_df_to_4u(RssV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float64_is_neg(RssV) && !float32_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = 0;
+    } else {
+        RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
@@ -595,18 +843,28 @@ int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
 {
     int32_t RdV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RdV = conv_df_to_4s(RssV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RdV = -1;
+    } else {
+        RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RdV;
 }
 
-int64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
+uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
 {
-    int64_t RddV;
+    uint64_t RddV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RddV = conv_df_to_8u(RssV, &env->fp_status);
+    /* Hexagon checks the sign before rounding */
+    if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = 0;
+    } else {
+        RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
@@ -615,8 +873,13 @@ int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
 {
     int64_t RddV;
     arch_fpop_start(env);
-    set_float_rounding_mode(float_round_to_zero, &env->fp_status);
-    RddV = conv_df_to_8s(RssV, &env->fp_status);
+    /* Hexagon returns -1 for NaN */
+    if (float64_is_any_nan(RssV)) {
+        float_raise(float_flag_invalid, &env->fp_status);
+        RddV = -1;
+    } else {
+        RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
+    }
     arch_fpop_end(env);
     return RddV;
 }
@@ -626,7 +889,6 @@ float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = float32_add(RsV, RtV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -636,7 +898,6 @@ float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = float32_sub(RsV, RtV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -688,7 +949,6 @@ float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = float32_maxnum(RsV, RtV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -698,7 +958,6 @@ float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = float32_minnum(RsV, RtV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -765,7 +1024,6 @@ float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = float64_add(RssV, RttV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -775,7 +1033,6 @@ float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
     float64 RddV;
     arch_fpop_start(env);
     RddV = float64_sub(RssV, RttV, &env->fp_status);
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -788,7 +1045,6 @@ float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
     if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
         float_raise(float_flag_invalid, &env->fp_status);
     }
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -801,7 +1057,6 @@ float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
     if (float64_is_any_nan(RssV) || float64_is_any_nan(RttV)) {
         float_raise(float_flag_invalid, &env->fp_status);
     }
-    RddV = hex_check_dfnan(RddV);
     arch_fpop_end(env);
     return RddV;
 }
@@ -877,7 +1132,6 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
     float32 RdV;
     arch_fpop_start(env);
     RdV = internal_mpyf(RsV, RtV, &env->fp_status);
-    RdV = hex_check_sfnan(RdV);
     arch_fpop_end(env);
     return RdV;
 }
@@ -887,7 +1141,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
 {
     arch_fpop_start(env);
     RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
-    RxV = hex_check_sfnan(RxV);
     arch_fpop_end(env);
     return RxV;
 }
@@ -919,7 +1172,6 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
     RxV = check_nan(RxV, RsV, &env->fp_status);
     RxV = check_nan(RxV, RtV, &env->fp_status);
     tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
-    tmp = hex_check_sfnan(tmp);
     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
         RxV = tmp;
     }
@@ -934,12 +1186,11 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
     arch_fpop_start(env);
     neg_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
     RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
-    RxV = hex_check_sfnan(RxV);
     arch_fpop_end(env);
     return RxV;
 }
 
-static inline bool is_inf_prod(int32_t a, int32_t b)
+static bool is_inf_prod(int32_t a, int32_t b)
 {
     return (float32_is_infinity(a) && float32_is_infinity(b)) ||
            (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
@@ -949,8 +1200,8 @@ static inline bool is_inf_prod(int32_t a, int32_t b)
 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
                           float32 RsV, float32 RtV)
 {
-    int infinp;
-    int infminusinf;
+    bool infinp;
+    bool infminusinf;
     float32 tmp;
 
     arch_fpop_start(env);
@@ -965,7 +1216,6 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
     RxV = check_nan(RxV, RsV, &env->fp_status);
     RxV = check_nan(RxV, RtV, &env->fp_status);
     tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
-    tmp = hex_check_sfnan(tmp);
     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
         RxV = tmp;
     }
@@ -983,8 +1233,8 @@ float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
                           float32 RsV, float32 RtV)
 {
-    int infinp;
-    int infminusinf;
+    bool infinp;
+    bool infminusinf;
     float32 tmp;
 
     arch_fpop_start(env);
@@ -1000,7 +1250,6 @@ float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
     RxV = check_nan(RxV, RtV, &env->fp_status);
     float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
     tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
-    tmp = hex_check_sfnan(tmp);
     if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
         RxV = tmp;
     }
@@ -1024,13 +1273,11 @@ float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
         float64_is_normal(RttV)) {
         RddV = float64_mul(RssV, make_float64(0x4330000000000000),
                            &env->fp_status);
-        RddV = hex_check_dfnan(RddV);
     } else if (float64_is_denormal(RttV) &&
                (float64_getexp(RssV) >= 512) &&
                float64_is_normal(RssV)) {
         RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
                            &env->fp_status);
-        RddV = hex_check_dfnan(RddV);
     } else {
         RddV = RssV;
     }
@@ -1043,11 +1290,175 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
 {
     arch_fpop_start(env);
     RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
-    RxxV = hex_check_dfnan(RxxV);
     arch_fpop_end(env);
     return RxxV;
 }
 
+/* Histogram instructions */
+
+void HELPER(vhist)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int lane = 0; lane < 8; lane++) {
+        for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
+            unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
+            unsigned char regno = value >> 3;
+            unsigned char element = value & 7;
+
+            env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
+        }
+    }
+}
+
+void HELPER(vhistq)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int lane = 0; lane < 8; lane++) {
+        for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
+            unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
+            unsigned char regno = value >> 3;
+            unsigned char element = value & 7;
+
+            if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
+                env->VRegs[regno].uh[
+                    (sizeof(MMVector) / 16) * lane + element]++;
+            }
+        }
+    }
+}
+
+void HELPER(vwhist256)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
+
+        env->VRegs[vindex].uh[elindex] =
+            env->VRegs[vindex].uh[elindex] + weight;
+    }
+}
+
+void HELPER(vwhist256q)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
+
+        if (fGETQBIT(env->qtmp, 2 * i)) {
+            env->VRegs[vindex].uh[elindex] =
+                env->VRegs[vindex].uh[elindex] + weight;
+        }
+    }
+}
+
+void HELPER(vwhist256_sat)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
+
+        env->VRegs[vindex].uh[elindex] =
+            fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
+    }
+}
+
+void HELPER(vwhist256q_sat)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
+
+        if (fGETQBIT(env->qtmp, 2 * i)) {
+            env->VRegs[vindex].uh[elindex] =
+                fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
+        }
+    }
+}
+
+void HELPER(vwhist128)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
+
+        env->VRegs[vindex].uw[elindex] =
+            env->VRegs[vindex].uw[elindex] + weight;
+    }
+}
+
+void HELPER(vwhist128q)(CPUHexagonState *env)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
+
+        if (fGETQBIT(env->qtmp, 2 * i)) {
+            env->VRegs[vindex].uw[elindex] =
+                env->VRegs[vindex].uw[elindex] + weight;
+        }
+    }
+}
+
+void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
+
+        if ((bucket & 1) == uiV) {
+            env->VRegs[vindex].uw[elindex] =
+                env->VRegs[vindex].uw[elindex] + weight;
+        }
+    }
+}
+
+void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
+{
+    MMVector *input = &env->tmp_VRegs[0];
+
+    for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
+        unsigned int bucket = fGETUBYTE(0, input->h[i]);
+        unsigned int weight = fGETUBYTE(1, input->h[i]);
+        unsigned int vindex = (bucket >> 3) & 0x1F;
+        unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
+
+        if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
+            env->VRegs[vindex].uw[elindex] =
+                env->VRegs[vindex].uw[elindex] + weight;
+        }
+    }
+}
+
 static void cancel_slot(CPUHexagonState *env, uint32_t slot)
 {
     HEX_DEBUG_LOG("Slot %d cancelled\n", slot);
diff --git a/target/hexagon/reg_fields.c b/target/hexagon/reg_fields.c
index bdcab79428a..67132037257 100644
--- a/target/hexagon/reg_fields.c
+++ b/target/hexagon/reg_fields.c
@@ -18,10 +18,9 @@
 #include "qemu/osdep.h"
 #include "reg_fields.h"
 
-const RegField reg_field_info[] = {
+const RegField reg_field_info[NUM_REG_FIELDS] = {
 #define DEF_REG_FIELD(TAG, START, WIDTH)    \
       { START, WIDTH },
 #include "reg_fields_def.h.inc"
-      { 0, 0 }
 #undef DEF_REG_FIELD
 };
diff --git a/target/hexagon/reg_fields.h b/target/hexagon/reg_fields.h
index d3c86c942f3..9e2ad5d9979 100644
--- a/target/hexagon/reg_fields.h
+++ b/target/hexagon/reg_fields.h
@@ -23,8 +23,6 @@ typedef struct {
     int width;
 } RegField;
 
-extern const RegField reg_field_info[];
-
 enum {
 #define DEF_REG_FIELD(TAG, START, WIDTH) \
     TAG,
@@ -33,4 +31,6 @@ enum {
 #undef DEF_REG_FIELD
 };
 
+extern const RegField reg_field_info[NUM_REG_FIELDS];
+
 #endif
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index eeaad5f8ba6..b6f541ecb23 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -19,6 +19,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "internal.h"
@@ -35,9 +36,7 @@ TCGv hex_this_PC;
 TCGv hex_slot_cancelled;
 TCGv hex_branch_taken;
 TCGv hex_new_value[TOTAL_PER_THREAD_REGS];
-#if HEX_DEBUG
 TCGv hex_reg_written[TOTAL_PER_THREAD_REGS];
-#endif
 TCGv hex_new_pred_value[NUM_PREGS];
 TCGv hex_pred_written;
 TCGv hex_store_addr[STORES_MAX];
@@ -49,24 +48,92 @@ TCGv hex_dczero_addr;
 TCGv hex_llsc_addr;
 TCGv hex_llsc_val;
 TCGv_i64 hex_llsc_val_i64;
+TCGv hex_VRegs_updated;
+TCGv hex_QRegs_updated;
+TCGv hex_vstore_addr[VSTORES_MAX];
+TCGv hex_vstore_size[VSTORES_MAX];
+TCGv hex_vstore_pending[VSTORES_MAX];
 
 static const char * const hexagon_prednames[] = {
   "p0", "p1", "p2", "p3"
 };
 
-void gen_exception(int excp)
+intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
+                          int num, bool alloc_ok)
 {
-    TCGv_i32 helper_tmp = tcg_const_i32(excp);
-    gen_helper_raise_exception(cpu_env, helper_tmp);
-    tcg_temp_free_i32(helper_tmp);
+    intptr_t offset;
+
+    /* See if it is already allocated */
+    for (int i = 0; i < ctx->future_vregs_idx; i++) {
+        if (ctx->future_vregs_num[i] == regnum) {
+            return offsetof(CPUHexagonState, future_VRegs[i]);
+        }
+    }
+
+    g_assert(alloc_ok);
+    offset = offsetof(CPUHexagonState, future_VRegs[ctx->future_vregs_idx]);
+    for (int i = 0; i < num; i++) {
+        ctx->future_vregs_num[ctx->future_vregs_idx + i] = regnum++;
+    }
+    ctx->future_vregs_idx += num;
+    g_assert(ctx->future_vregs_idx <= VECTOR_TEMPS_MAX);
+    return offset;
+}
+
+intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
+                          int num, bool alloc_ok)
+{
+    intptr_t offset;
+
+    /* See if it is already allocated */
+    for (int i = 0; i < ctx->tmp_vregs_idx; i++) {
+        if (ctx->tmp_vregs_num[i] == regnum) {
+            return offsetof(CPUHexagonState, tmp_VRegs[i]);
+        }
+    }
+
+    g_assert(alloc_ok);
+    offset = offsetof(CPUHexagonState, tmp_VRegs[ctx->tmp_vregs_idx]);
+    for (int i = 0; i < num; i++) {
+        ctx->tmp_vregs_num[ctx->tmp_vregs_idx + i] = regnum++;
+    }
+    ctx->tmp_vregs_idx += num;
+    g_assert(ctx->tmp_vregs_idx <= VECTOR_TEMPS_MAX);
+    return offset;
+}
+
+static void gen_exception_raw(int excp)
+{
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
 }
 
-void gen_exception_debug(void)
+static void gen_exec_counters(DisasContext *ctx)
 {
-    gen_exception(EXCP_DEBUG);
+    tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
+                    hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
+    tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
+                    hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
+    tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT],
+                    hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns);
+}
+
+static void gen_end_tb(DisasContext *ctx)
+{
+    gen_exec_counters(ctx);
+    tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+    tcg_gen_exit_tb(NULL, 0);
+    ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_exception_end_tb(DisasContext *ctx, int excp)
+{
+    gen_exec_counters(ctx);
+    tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
+    gen_exception_raw(excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
+
 }
 
-#if HEX_DEBUG
 #define PACKET_BUFFER_LEN              1028
 static void print_pkt(Packet *pkt)
 {
@@ -75,10 +142,12 @@ static void print_pkt(Packet *pkt)
     HEX_DEBUG_LOG("%s", buf->str);
     g_string_free(buf, true);
 }
-#define HEX_DEBUG_PRINT_PKT(pkt)  print_pkt(pkt)
-#else
-#define HEX_DEBUG_PRINT_PKT(pkt)  /* nothing */
-#endif
+#define HEX_DEBUG_PRINT_PKT(pkt) \
+    do { \
+        if (HEX_DEBUG) { \
+            print_pkt(pkt); \
+        } \
+    } while (0)
 
 static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
                              uint32_t words[])
@@ -88,8 +157,9 @@ static int read_packet_words(CPUHexagonState *env, DisasContext *ctx,
 
     memset(words, 0, PACKET_WORDS_MAX * sizeof(uint32_t));
     for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) {
-        words[nwords] = cpu_ldl_code(env,
-                                ctx->base.pc_next + nwords * sizeof(uint32_t));
+        words[nwords] =
+            translator_ldl(env, &ctx->base,
+                           ctx->base.pc_next + nwords * sizeof(uint32_t));
         found_end = is_packet_end(words[nwords]);
     }
     if (!found_end) {
@@ -148,17 +218,26 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
     ctx->reg_log_idx = 0;
     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
     ctx->preg_log_idx = 0;
+    bitmap_zero(ctx->pregs_written, NUM_PREGS);
+    ctx->future_vregs_idx = 0;
+    ctx->tmp_vregs_idx = 0;
+    ctx->vreg_log_idx = 0;
+    bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
+    bitmap_zero(ctx->vregs_updated, NUM_VREGS);
+    bitmap_zero(ctx->vregs_select, NUM_VREGS);
+    ctx->qreg_log_idx = 0;
     for (i = 0; i < STORES_MAX; i++) {
         ctx->store_width[i] = 0;
     }
     tcg_gen_movi_tl(hex_pkt_has_store_s1, pkt->pkt_has_store_s1);
-    ctx->s1_store_processed = 0;
+    ctx->s1_store_processed = false;
+    ctx->pre_commit = true;
 
-#if HEX_DEBUG
-    /* Handy place to set a breakpoint before the packet executes */
-    gen_helper_debug_start_packet(cpu_env);
-    tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
-#endif
+    if (HEX_DEBUG) {
+        /* Handy place to set a breakpoint before the packet executes */
+        gen_helper_debug_start_packet(cpu_env);
+        tcg_gen_movi_tl(hex_this_PC, ctx->base.pc_next);
+    }
 
     /* Initialize the runtime state for packet semantics */
     if (need_pc(pkt)) {
@@ -174,6 +253,26 @@ static void gen_start_packet(DisasContext *ctx, Packet *pkt)
     if (need_pred_written(pkt)) {
         tcg_gen_movi_tl(hex_pred_written, 0);
     }
+
+    if (pkt->pkt_has_hvx) {
+        tcg_gen_movi_tl(hex_VRegs_updated, 0);
+        tcg_gen_movi_tl(hex_QRegs_updated, 0);
+    }
+}
+
+bool is_gather_store_insn(Insn *insn, Packet *pkt)
+{
+    if (GET_ATTRIB(insn->opcode, A_CVI_NEW) &&
+        insn->new_value_producer_slot == 1) {
+        /* Look for gather instruction */
+        for (int i = 0; i < pkt->num_insns; i++) {
+            Insn *in = &pkt->insn[i];
+            if (GET_ATTRIB(in->opcode, A_CVI_GATHER) && in->slot == 1) {
+                return true;
+            }
+        }
+    }
+    return false;
 }
 
 /*
@@ -185,7 +284,12 @@ static void mark_implicit_reg_write(DisasContext *ctx, Insn *insn,
                                     int attrib, int rnum)
 {
     if (GET_ATTRIB(insn->opcode, attrib)) {
-        int is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC);
+        /*
+         * USR is used to set overflow and FP exceptions,
+         * so treat it as conditional
+         */
+        bool is_predicated = GET_ATTRIB(insn->opcode, A_CONDEXEC) ||
+                             rnum == HEX_REG_USR;
         if (is_predicated && !is_preloaded(ctx, rnum)) {
             tcg_gen_mov_tl(hex_new_value[rnum], hex_gpr[rnum]);
         }
@@ -202,7 +306,7 @@ static void mark_implicit_pred_write(DisasContext *ctx, Insn *insn,
     }
 }
 
-static void mark_implicit_writes(DisasContext *ctx, Insn *insn)
+static void mark_implicit_reg_writes(DisasContext *ctx, Insn *insn)
 {
     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_FP,  HEX_REG_FP);
     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SP,  HEX_REG_SP);
@@ -211,7 +315,12 @@ static void mark_implicit_writes(DisasContext *ctx, Insn *insn)
     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA0, HEX_REG_SA0);
     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_LC1, HEX_REG_LC1);
     mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1);
+    mark_implicit_reg_write(ctx, insn, A_IMPLICIT_WRITES_USR, HEX_REG_USR);
+    mark_implicit_reg_write(ctx, insn, A_FPOP, HEX_REG_USR);
+}
 
+static void mark_implicit_pred_writes(DisasContext *ctx, Insn *insn)
+{
     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P0, 0);
     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P1, 1);
     mark_implicit_pred_write(ctx, insn, A_IMPLICIT_WRITES_P2, 2);
@@ -222,11 +331,11 @@ static void gen_insn(CPUHexagonState *env, DisasContext *ctx,
                      Insn *insn, Packet *pkt)
 {
     if (insn->generate) {
-        mark_implicit_writes(ctx, insn);
+        mark_implicit_reg_writes(ctx, insn);
         insn->generate(env, ctx, insn, pkt);
+        mark_implicit_pred_writes(ctx, insn);
     } else {
-        gen_exception(HEX_EXCP_INVALID_OPCODE);
-        ctx->base.is_jmp = DISAS_NORETURN;
+        gen_exception_end_tb(ctx, HEX_EXCP_INVALID_OPCODE);
     }
 }
 
@@ -246,7 +355,6 @@ static void gen_reg_writes(DisasContext *ctx)
 
 static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
 {
-    TCGv zero, control_reg, pval;
     int i;
 
     /* Early exit if the log is empty */
@@ -254,10 +362,6 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
         return;
     }
 
-    zero = tcg_const_tl(0);
-    control_reg = tcg_temp_new();
-    pval = tcg_temp_new();
-
     /*
      * Only endloop instructions will conditionally
      * write a predicate.  If there are no endloop
@@ -265,6 +369,7 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
      * write of the predicates.
      */
     if (pkt->pkt_has_endloop) {
+        TCGv zero = tcg_constant_tl(0);
         TCGv pred_written = tcg_temp_new();
         for (i = 0; i < ctx->preg_log_idx; i++) {
             int pred_num = ctx->preg_log[i];
@@ -280,32 +385,23 @@ static void gen_pred_writes(DisasContext *ctx, Packet *pkt)
         for (i = 0; i < ctx->preg_log_idx; i++) {
             int pred_num = ctx->preg_log[i];
             tcg_gen_mov_tl(hex_pred[pred_num], hex_new_pred_value[pred_num]);
-#if HEX_DEBUG
-            /* Do this so HELPER(debug_commit_end) will know */
-            tcg_gen_ori_tl(hex_pred_written, hex_pred_written, 1 << pred_num);
-#endif
+            if (HEX_DEBUG) {
+                /* Do this so HELPER(debug_commit_end) will know */
+                tcg_gen_ori_tl(hex_pred_written, hex_pred_written,
+                               1 << pred_num);
+            }
         }
     }
-
-    tcg_temp_free(zero);
-    tcg_temp_free(control_reg);
-    tcg_temp_free(pval);
 }
 
-#if HEX_DEBUG
-static inline void gen_check_store_width(DisasContext *ctx, int slot_num)
+static void gen_check_store_width(DisasContext *ctx, int slot_num)
 {
-    TCGv slot = tcg_const_tl(slot_num);
-    TCGv check = tcg_const_tl(ctx->store_width[slot_num]);
-    gen_helper_debug_check_store_width(cpu_env, slot, check);
-    tcg_temp_free(slot);
-    tcg_temp_free(check);
+    if (HEX_DEBUG) {
+        TCGv slot = tcg_constant_tl(slot_num);
+        TCGv check = tcg_constant_tl(ctx->store_width[slot_num]);
+        gen_helper_debug_check_store_width(cpu_env, slot, check);
+    }
 }
-#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num) \
-    gen_check_store_width(ctx, slot_num)
-#else
-#define HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num)  /* nothing */
-#endif
 
 static bool slot_is_predicated(Packet *pkt, int slot_num)
 {
@@ -330,7 +426,7 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
     if (slot_num == 1 && ctx->s1_store_processed) {
         return;
     }
-    ctx->s1_store_processed = 1;
+    ctx->s1_store_processed = true;
 
     if (is_predicated) {
         TCGv cancelled = tcg_temp_new();
@@ -355,25 +451,25 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
          */
         switch (ctx->store_width[slot_num]) {
         case 1:
-            HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+            gen_check_store_width(ctx, slot_num);
             tcg_gen_qemu_st8(hex_store_val32[slot_num],
                              hex_store_addr[slot_num],
                              ctx->mem_idx);
             break;
         case 2:
-            HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+            gen_check_store_width(ctx, slot_num);
             tcg_gen_qemu_st16(hex_store_val32[slot_num],
                               hex_store_addr[slot_num],
                               ctx->mem_idx);
             break;
         case 4:
-            HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+            gen_check_store_width(ctx, slot_num);
             tcg_gen_qemu_st32(hex_store_val32[slot_num],
                               hex_store_addr[slot_num],
                               ctx->mem_idx);
             break;
         case 8:
-            HEX_DEBUG_GEN_CHECK_STORE_WIDTH(ctx, slot_num);
+            gen_check_store_width(ctx, slot_num);
             tcg_gen_qemu_st64(hex_store_val64[slot_num],
                               hex_store_addr[slot_num],
                               ctx->mem_idx);
@@ -385,9 +481,8 @@ void process_store(DisasContext *ctx, Packet *pkt, int slot_num)
                  * TCG generation time, we'll use a helper to
                  * avoid branching based on the width at runtime.
                  */
-                TCGv slot = tcg_const_tl(slot_num);
+                TCGv slot = tcg_constant_tl(slot_num);
                 gen_helper_commit_store(cpu_env, slot);
-                tcg_temp_free(slot);
             }
         }
         tcg_temp_free(address);
@@ -401,7 +496,7 @@ static void process_store_log(DisasContext *ctx, Packet *pkt)
 {
     /*
      *  When a packet has two stores, the hardware processes
-     *  slot 1 and then slot 2.  This will be important when
+     *  slot 1 and then slot 0.  This will be important when
      *  the memory accesses overlap.
      */
     if (pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa) {
@@ -418,7 +513,7 @@ static void process_dczeroa(DisasContext *ctx, Packet *pkt)
     if (pkt->pkt_has_dczeroa) {
         /* Store 32 bytes of zero starting at (addr & ~0x1f) */
         TCGv addr = tcg_temp_new();
-        TCGv_i64 zero = tcg_const_i64(0);
+        TCGv_i64 zero = tcg_constant_i64(0);
 
         tcg_gen_andi_tl(addr, hex_dczero_addr, ~0x1f);
         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
@@ -430,7 +525,93 @@ static void process_dczeroa(DisasContext *ctx, Packet *pkt)
         tcg_gen_qemu_st64(zero, addr, ctx->mem_idx);
 
         tcg_temp_free(addr);
-        tcg_temp_free_i64(zero);
+    }
+}
+
+static bool pkt_has_hvx_store(Packet *pkt)
+{
+    int i;
+    for (i = 0; i < pkt->num_insns; i++) {
+        int opcode = pkt->insn[i].opcode;
+        if (GET_ATTRIB(opcode, A_CVI) && GET_ATTRIB(opcode, A_STORE)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void gen_commit_hvx(DisasContext *ctx, Packet *pkt)
+{
+    int i;
+
+    /*
+     *    for (i = 0; i < ctx->vreg_log_idx; i++) {
+     *        int rnum = ctx->vreg_log[i];
+     *        if (ctx->vreg_is_predicated[i]) {
+     *            if (env->VRegs_updated & (1 << rnum)) {
+     *                env->VRegs[rnum] = env->future_VRegs[rnum];
+     *            }
+     *        } else {
+     *            env->VRegs[rnum] = env->future_VRegs[rnum];
+     *        }
+     *    }
+     */
+    for (i = 0; i < ctx->vreg_log_idx; i++) {
+        int rnum = ctx->vreg_log[i];
+        bool is_predicated = ctx->vreg_is_predicated[i];
+        intptr_t dstoff = offsetof(CPUHexagonState, VRegs[rnum]);
+        intptr_t srcoff = ctx_future_vreg_off(ctx, rnum, 1, false);
+        size_t size = sizeof(MMVector);
+
+        if (is_predicated) {
+            TCGv cmp = tcg_temp_new();
+            TCGLabel *label_skip = gen_new_label();
+
+            tcg_gen_andi_tl(cmp, hex_VRegs_updated, 1 << rnum);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
+            tcg_temp_free(cmp);
+            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
+            gen_set_label(label_skip);
+        } else {
+            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
+        }
+    }
+
+    /*
+     *    for (i = 0; i < ctx->qreg_log_idx; i++) {
+     *        int rnum = ctx->qreg_log[i];
+     *        if (ctx->qreg_is_predicated[i]) {
+     *            if (env->QRegs_updated) & (1 << rnum)) {
+     *                env->QRegs[rnum] = env->future_QRegs[rnum];
+     *            }
+     *        } else {
+     *            env->QRegs[rnum] = env->future_QRegs[rnum];
+     *        }
+     *    }
+     */
+    for (i = 0; i < ctx->qreg_log_idx; i++) {
+        int rnum = ctx->qreg_log[i];
+        bool is_predicated = ctx->qreg_is_predicated[i];
+        intptr_t dstoff = offsetof(CPUHexagonState, QRegs[rnum]);
+        intptr_t srcoff = offsetof(CPUHexagonState, future_QRegs[rnum]);
+        size_t size = sizeof(MMQReg);
+
+        if (is_predicated) {
+            TCGv cmp = tcg_temp_new();
+            TCGLabel *label_skip = gen_new_label();
+
+            tcg_gen_andi_tl(cmp, hex_QRegs_updated, 1 << rnum);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cmp, 0, label_skip);
+            tcg_temp_free(cmp);
+            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
+            gen_set_label(label_skip);
+        } else {
+            tcg_gen_gvec_mov(MO_64, dstoff, srcoff, size, size);
+        }
+    }
+
+    if (pkt_has_hvx_store(pkt)) {
+        gen_helper_commit_hvx_stores(cpu_env);
     }
 }
 
@@ -438,6 +619,7 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt)
 {
     int num_insns = pkt->num_insns;
     int num_real_insns = 0;
+    int num_hvx_insns = 0;
 
     for (int i = 0; i < num_insns; i++) {
         if (!pkt->insn[i].is_endloop &&
@@ -445,44 +627,101 @@ static void update_exec_counters(DisasContext *ctx, Packet *pkt)
             !GET_ATTRIB(pkt->insn[i].opcode, A_IT_NOP)) {
             num_real_insns++;
         }
+        if (GET_ATTRIB(pkt->insn[i].opcode, A_CVI)) {
+            num_hvx_insns++;
+        }
     }
 
     ctx->num_packets++;
     ctx->num_insns += num_real_insns;
+    ctx->num_hvx_insns += num_hvx_insns;
 }
 
-static void gen_exec_counters(DisasContext *ctx)
+static void gen_commit_packet(CPUHexagonState *env, DisasContext *ctx,
+                              Packet *pkt)
 {
-    tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT],
-                    hex_gpr[HEX_REG_QEMU_PKT_CNT], ctx->num_packets);
-    tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_INSN_CNT],
-                    hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns);
-}
+    /*
+     * If there is more than one store in a packet, make sure they are all OK
+     * before proceeding with the rest of the packet commit.
+     *
+     * dczeroa has to be the only store operation in the packet, so we go
+     * ahead and process that first.
+     *
+     * When there is an HVX store, there can also be a scalar store in either
+     * slot 0 or slot1, so we create a mask for the helper to indicate what
+     * work to do.
+     *
+     * When there are two scalar stores, we probe the one in slot 0.
+     *
+     * Note that we don't call the probe helper for packets with only one
+     * store.  Therefore, we call process_store_log before anything else
+     * involved in committing the packet.
+     */
+    bool has_store_s0 = pkt->pkt_has_store_s0;
+    bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed);
+    bool has_hvx_store = pkt_has_hvx_store(pkt);
+    if (pkt->pkt_has_dczeroa) {
+        /*
+         * The dczeroa will be the store in slot 0, check that we don't have
+         * a store in slot 1 or an HVX store.
+         */
+        g_assert(has_store_s0 && !has_store_s1 && !has_hvx_store);
+        process_dczeroa(ctx, pkt);
+    } else if (has_hvx_store) {
+        TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
+
+        if (!has_store_s0 && !has_store_s1) {
+            gen_helper_probe_hvx_stores(cpu_env, mem_idx);
+        } else {
+            int mask = 0;
+            TCGv mask_tcgv;
+
+            if (has_store_s0) {
+                mask |= (1 << 0);
+            }
+            if (has_store_s1) {
+                mask |= (1 << 1);
+            }
+            if (has_hvx_store) {
+                mask |= (1 << 2);
+            }
+            mask_tcgv = tcg_constant_tl(mask);
+            gen_helper_probe_pkt_scalar_hvx_stores(cpu_env, mask_tcgv, mem_idx);
+        }
+    } else if (has_store_s0 && has_store_s1) {
+        /*
+         * process_store_log will execute the slot 1 store first,
+         * so we only have to probe the store in slot 0
+         */
+        TCGv mem_idx = tcg_constant_tl(ctx->mem_idx);
+        gen_helper_probe_pkt_scalar_store_s0(cpu_env, mem_idx);
+    }
+
+    process_store_log(ctx, pkt);
 
-static void gen_commit_packet(DisasContext *ctx, Packet *pkt)
-{
     gen_reg_writes(ctx);
     gen_pred_writes(ctx, pkt);
-    process_store_log(ctx, pkt);
-    process_dczeroa(ctx, pkt);
+    if (pkt->pkt_has_hvx) {
+        gen_commit_hvx(ctx, pkt);
+    }
     update_exec_counters(ctx, pkt);
-#if HEX_DEBUG
-    {
+    if (HEX_DEBUG) {
         TCGv has_st0 =
-            tcg_const_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
+            tcg_constant_tl(pkt->pkt_has_store_s0 && !pkt->pkt_has_dczeroa);
         TCGv has_st1 =
-            tcg_const_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
+            tcg_constant_tl(pkt->pkt_has_store_s1 && !pkt->pkt_has_dczeroa);
 
         /* Handy place to set a breakpoint at the end of execution */
         gen_helper_debug_commit_end(cpu_env, has_st0, has_st1);
+    }
 
-        tcg_temp_free(has_st0);
-        tcg_temp_free(has_st1);
+    if (pkt->vhist_insn != NULL) {
+        ctx->pre_commit = false;
+        pkt->vhist_insn->generate(env, ctx, pkt->vhist_insn, pkt);
     }
-#endif
 
     if (pkt->pkt_has_cof) {
-        ctx->base.is_jmp = DISAS_NORETURN;
+        gen_end_tb(ctx);
     }
 }
 
@@ -495,8 +734,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
 
     nwords = read_packet_words(env, ctx, words);
     if (!nwords) {
-        gen_exception(HEX_EXCP_INVALID_PACKET);
-        ctx->base.is_jmp = DISAS_NORETURN;
+        gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
         return;
     }
 
@@ -506,11 +744,10 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx)
         for (i = 0; i < pkt.num_insns; i++) {
             gen_insn(env, ctx, &pkt.insn[i], &pkt);
         }
-        gen_commit_packet(ctx, &pkt);
+        gen_commit_packet(env, ctx, &pkt);
         ctx->base.pc_next += pkt.encod_pkt_size_in_bytes;
     } else {
-        gen_exception(HEX_EXCP_INVALID_PACKET);
-        ctx->base.is_jmp = DISAS_NORETURN;
+        gen_exception_end_tb(ctx, HEX_EXCP_INVALID_PACKET);
     }
 }
 
@@ -522,6 +759,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase,
     ctx->mem_idx = MMU_USER_IDX;
     ctx->num_packets = 0;
     ctx->num_insns = 0;
+    ctx->num_hvx_insns = 0;
 }
 
 static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -535,24 +773,6 @@ static void hexagon_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(ctx->base.pc_next);
 }
 
-static bool hexagon_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                        const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    gen_exception_debug();
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be
-     * properly cleared -- thus we increment the PC here so that
-     * the logic setting tb->size below does the right thing.
-     */
-    ctx->base.pc_next += 4;
-    return true;
-}
-
 static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx)
 {
     target_ulong page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
@@ -589,14 +809,10 @@ static void hexagon_tr_translate_packet(DisasContextBase *dcbase, CPUState *cpu)
          * The CPU log is used to compare against LLDB single stepping,
          * so end the TLB after every packet.
          */
-        HexagonCPU *hex_cpu = container_of(env, HexagonCPU, env);
+        HexagonCPU *hex_cpu = env_archcpu(env);
         if (hex_cpu->lldb_compat && qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
             ctx->base.is_jmp = DISAS_TOO_MANY;
         }
-#if HEX_DEBUG
-        /* When debugging, only put one packet per TB */
-        ctx->base.is_jmp = DISAS_TOO_MANY;
-#endif
     }
 }
 
@@ -608,20 +824,9 @@ static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
     case DISAS_TOO_MANY:
         gen_exec_counters(ctx);
         tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->base.pc_next);
-        if (ctx->base.singlestep_enabled) {
-            gen_exception_debug();
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
         break;
     case DISAS_NORETURN:
-        gen_exec_counters(ctx);
-        tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC);
-        if (ctx->base.singlestep_enabled) {
-            gen_exception_debug();
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
         break;
     default:
         g_assert_not_reached();
@@ -639,7 +844,6 @@ static const TranslatorOps hexagon_tr_ops = {
     .init_disas_context = hexagon_tr_init_disas_context,
     .tb_start           = hexagon_tr_tb_start,
     .insn_start         = hexagon_tr_insn_start,
-    .breakpoint_check   = hexagon_tr_breakpoint_check,
     .translate_insn     = hexagon_tr_translate_packet,
     .tb_stop            = hexagon_tr_tb_stop,
     .disas_log          = hexagon_tr_disas_log,
@@ -654,14 +858,15 @@ void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 
 #define NAME_LEN               64
 static char new_value_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
-#if HEX_DEBUG
 static char reg_written_names[TOTAL_PER_THREAD_REGS][NAME_LEN];
-#endif
 static char new_pred_value_names[NUM_PREGS][NAME_LEN];
 static char store_addr_names[STORES_MAX][NAME_LEN];
 static char store_width_names[STORES_MAX][NAME_LEN];
 static char store_val32_names[STORES_MAX][NAME_LEN];
 static char store_val64_names[STORES_MAX][NAME_LEN];
+static char vstore_addr_names[VSTORES_MAX][NAME_LEN];
+static char vstore_size_names[VSTORES_MAX][NAME_LEN];
+static char vstore_pending_names[VSTORES_MAX][NAME_LEN];
 
 void hexagon_translate_init(void)
 {
@@ -669,11 +874,11 @@ void hexagon_translate_init(void)
 
     opcode_init();
 
-#if HEX_DEBUG
-    if (!qemu_logfile) {
-        qemu_set_log(qemu_loglevel);
+    if (HEX_DEBUG) {
+        if (!qemu_logfile) {
+            qemu_set_log(qemu_loglevel);
+        }
     }
-#endif
 
     for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
         hex_gpr[i] = tcg_global_mem_new(cpu_env,
@@ -685,13 +890,13 @@ void hexagon_translate_init(void)
             offsetof(CPUHexagonState, new_value[i]),
             new_value_names[i]);
 
-#if HEX_DEBUG
-        snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
-                 hexagon_regnames[i]);
-        hex_reg_written[i] = tcg_global_mem_new(cpu_env,
-            offsetof(CPUHexagonState, reg_written[i]),
-            reg_written_names[i]);
-#endif
+        if (HEX_DEBUG) {
+            snprintf(reg_written_names[i], NAME_LEN, "reg_written_%s",
+                     hexagon_regnames[i]);
+            hex_reg_written[i] = tcg_global_mem_new(cpu_env,
+                offsetof(CPUHexagonState, reg_written[i]),
+                reg_written_names[i]);
+        }
     }
     for (i = 0; i < NUM_PREGS; i++) {
         hex_pred[i] = tcg_global_mem_new(cpu_env,
@@ -724,6 +929,10 @@ void hexagon_translate_init(void)
         offsetof(CPUHexagonState, llsc_val), "llsc_val");
     hex_llsc_val_i64 = tcg_global_mem_new_i64(cpu_env,
         offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64");
+    hex_VRegs_updated = tcg_global_mem_new(cpu_env,
+        offsetof(CPUHexagonState, VRegs_updated), "VRegs_updated");
+    hex_QRegs_updated = tcg_global_mem_new(cpu_env,
+        offsetof(CPUHexagonState, QRegs_updated), "QRegs_updated");
     for (i = 0; i < STORES_MAX; i++) {
         snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i);
         hex_store_addr[i] = tcg_global_mem_new(cpu_env,
@@ -745,4 +954,20 @@ void hexagon_translate_init(void)
             offsetof(CPUHexagonState, mem_log_stores[i].data64),
             store_val64_names[i]);
     }
+    for (int i = 0; i < VSTORES_MAX; i++) {
+        snprintf(vstore_addr_names[i], NAME_LEN, "vstore_addr_%d", i);
+        hex_vstore_addr[i] = tcg_global_mem_new(cpu_env,
+            offsetof(CPUHexagonState, vstore[i].va),
+            vstore_addr_names[i]);
+
+        snprintf(vstore_size_names[i], NAME_LEN, "vstore_size_%d", i);
+        hex_vstore_size[i] = tcg_global_mem_new(cpu_env,
+            offsetof(CPUHexagonState, vstore[i].size),
+            vstore_size_names[i]);
+
+        snprintf(vstore_pending_names[i], NAME_LEN, "vstore_pending_%d", i);
+        hex_vstore_pending[i] = tcg_global_mem_new(cpu_env,
+            offsetof(CPUHexagonState, vstore_pending[i]),
+            vstore_pending_names[i]);
+    }
 }
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 938f7fbb9f9..fccfb943406 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -29,22 +29,36 @@ typedef struct DisasContext {
     uint32_t mem_idx;
     uint32_t num_packets;
     uint32_t num_insns;
+    uint32_t num_hvx_insns;
     int reg_log[REG_WRITES_MAX];
     int reg_log_idx;
     DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
     int preg_log[PRED_WRITES_MAX];
     int preg_log_idx;
+    DECLARE_BITMAP(pregs_written, NUM_PREGS);
     uint8_t store_width[STORES_MAX];
-    uint8_t s1_store_processed;
+    bool s1_store_processed;
+    int future_vregs_idx;
+    int future_vregs_num[VECTOR_TEMPS_MAX];
+    int tmp_vregs_idx;
+    int tmp_vregs_num[VECTOR_TEMPS_MAX];
+    int vreg_log[NUM_VREGS];
+    bool vreg_is_predicated[NUM_VREGS];
+    int vreg_log_idx;
+    DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
+    DECLARE_BITMAP(vregs_updated, NUM_VREGS);
+    DECLARE_BITMAP(vregs_select, NUM_VREGS);
+    int qreg_log[NUM_QREGS];
+    bool qreg_is_predicated[NUM_QREGS];
+    int qreg_log_idx;
+    bool pre_commit;
 } DisasContext;
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum)
 {
-#if HEX_DEBUG
     if (test_bit(rnum, ctx->regs_written)) {
         HEX_DEBUG_LOG("WARNING: Multiple writes to r%d\n", rnum);
     }
-#endif
     ctx->reg_log[ctx->reg_log_idx] = rnum;
     ctx->reg_log_idx++;
     set_bit(rnum, ctx->regs_written);
@@ -60,6 +74,7 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
 {
     ctx->preg_log[ctx->preg_log_idx] = pnum;
     ctx->preg_log_idx++;
+    set_bit(pnum, ctx->pregs_written);
 }
 
 static inline bool is_preloaded(DisasContext *ctx, int num)
@@ -67,6 +82,46 @@ static inline bool is_preloaded(DisasContext *ctx, int num)
     return test_bit(num, ctx->regs_written);
 }
 
+intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
+                             int num, bool alloc_ok);
+intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
+                          int num, bool alloc_ok);
+
+static inline void ctx_log_vreg_write(DisasContext *ctx,
+                                      int rnum, VRegWriteType type,
+                                      bool is_predicated)
+{
+    if (type != EXT_TMP) {
+        ctx->vreg_log[ctx->vreg_log_idx] = rnum;
+        ctx->vreg_is_predicated[ctx->vreg_log_idx] = is_predicated;
+        ctx->vreg_log_idx++;
+
+        set_bit(rnum, ctx->vregs_updated);
+    }
+    if (type == EXT_NEW) {
+        set_bit(rnum, ctx->vregs_select);
+    }
+    if (type == EXT_TMP) {
+        set_bit(rnum, ctx->vregs_updated_tmp);
+    }
+}
+
+static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
+                                           int rnum, VRegWriteType type,
+                                           bool is_predicated)
+{
+    ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
+    ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+}
+
+static inline void ctx_log_qreg_write(DisasContext *ctx,
+                                      int rnum, bool is_predicated)
+{
+    ctx->qreg_log[ctx->qreg_log_idx] = rnum;
+    ctx->qreg_is_predicated[ctx->qreg_log_idx] = is_predicated;
+    ctx->qreg_log_idx++;
+}
+
 extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
 extern TCGv hex_pred[NUM_PREGS];
 extern TCGv hex_next_PC;
@@ -85,9 +140,12 @@ extern TCGv hex_dczero_addr;
 extern TCGv hex_llsc_addr;
 extern TCGv hex_llsc_val;
 extern TCGv_i64 hex_llsc_val_i64;
+extern TCGv hex_VRegs_updated;
+extern TCGv hex_QRegs_updated;
+extern TCGv hex_vstore_addr[VSTORES_MAX];
+extern TCGv hex_vstore_size[VSTORES_MAX];
+extern TCGv hex_vstore_pending[VSTORES_MAX];
 
-void gen_exception(int excp);
-void gen_exception_debug(void);
-
+bool is_gather_store_insn(Insn *insn, Packet *pkt);
 void process_store(DisasContext *ctx, Packet *pkt, int slot_num);
 #endif
diff --git a/target/hppa/Kconfig b/target/hppa/Kconfig
new file mode 100644
index 00000000000..395a35d799c
--- /dev/null
+++ b/target/hppa/Kconfig
@@ -0,0 +1,2 @@
+config HPPA
+    bool
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index d8fad52d1fe..23eb254228c 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -72,9 +72,10 @@ static void hppa_cpu_disas_set_info(CPUState *cs, disassemble_info *info)
 }
 
 #ifndef CONFIG_USER_ONLY
-static void hppa_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                         MMUAccessType access_type,
-                                         int mmu_idx, uintptr_t retaddr)
+static void QEMU_NORETURN
+hppa_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                             MMUAccessType access_type, int mmu_idx,
+                             uintptr_t retaddr)
 {
     HPPACPU *cpu = HPPA_CPU(cs);
     CPUHPPAState *env = &cpu->env;
@@ -131,15 +132,23 @@ static ObjectClass *hppa_cpu_class_by_name(const char *cpu_model)
     return object_class_by_name(TYPE_HPPA_CPU);
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps hppa_sysemu_ops = {
+    .get_phys_page_debug = hppa_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps hppa_tcg_ops = {
+static const struct TCGCPUOps hppa_tcg_ops = {
     .initialize = hppa_translate_init,
     .synchronize_from_tb = hppa_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = hppa_cpu_exec_interrupt,
-    .tlb_fill = hppa_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = hppa_cpu_tlb_fill,
+    .cpu_exec_interrupt = hppa_cpu_exec_interrupt,
     .do_interrupt = hppa_cpu_do_interrupt,
     .do_unaligned_access = hppa_cpu_do_unaligned_access,
 #endif /* !CONFIG_USER_ONLY */
@@ -161,8 +170,8 @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = hppa_cpu_gdb_read_register;
     cc->gdb_write_register = hppa_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = hppa_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_hppa_cpu;
+    cc->sysemu_ops = &hppa_sysemu_ops;
 #endif
     cc->disas_set_info = hppa_cpu_disas_set_info;
     cc->gdb_num_core_regs = 128;
diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h
index 61178fa6a2a..294fd7297f9 100644
--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -22,7 +22,6 @@
 
 #include "cpu-qom.h"
 #include "exec/cpu-defs.h"
-#include "exec/memory.h"
 
 /* PA-RISC 1.x processors have a strong memory model.  */
 /* ??? While we do not yet implement PA-RISC 2.0, those processors have
@@ -320,19 +319,16 @@ static inline void cpu_hppa_change_prot_id(CPUHPPAState *env) { }
 void cpu_hppa_change_prot_id(CPUHPPAState *env);
 #endif
 
-#define cpu_signal_handler cpu_hppa_signal_handler
-
-int cpu_hppa_signal_handler(int host_signum, void *pinfo, void *puc);
 hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr);
 int hppa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int hppa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-void hppa_cpu_do_interrupt(CPUState *cpu);
-bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void hppa_cpu_dump_state(CPUState *cs, FILE *f, int);
+#ifndef CONFIG_USER_ONLY
 bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                        MMUAccessType access_type, int mmu_idx,
                        bool probe, uintptr_t retaddr);
-#ifndef CONFIG_USER_ONLY
+void hppa_cpu_do_interrupt(CPUState *cpu);
+bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req);
 int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
                               int type, hwaddr *pphys, int *pprot);
 extern const MemoryRegionOps hppa_io_eir_ops;
diff --git a/target/hppa/helper.h b/target/hppa/helper.h
index 2d483aab58b..0a629ffa7c1 100644
--- a/target/hppa/helper.h
+++ b/target/hppa/helper.h
@@ -1,12 +1,9 @@
 #if TARGET_REGISTER_BITS == 64
 # define dh_alias_tr     i64
-# define dh_is_64bit_tr  1
 #else
 # define dh_alias_tr     i32
-# define dh_is_64bit_tr  0
 #endif
 #define dh_ctype_tr      target_ureg
-#define dh_is_signed_tr  0
 
 DEF_HELPER_2(excp, noreturn, env, int)
 DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tr)
diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c
index 349495d3610..13073ae2bda 100644
--- a/target/hppa/int_helper.c
+++ b/target/hppa/int_helper.c
@@ -88,7 +88,6 @@ void HELPER(write_eiem)(CPUHPPAState *env, target_ureg val)
     eval_interrupt(env_archcpu(env));
     qemu_mutex_unlock_iothread();
 }
-#endif /* !CONFIG_USER_ONLY */
 
 void hppa_cpu_do_interrupt(CPUState *cs)
 {
@@ -100,7 +99,6 @@ void hppa_cpu_do_interrupt(CPUState *cs)
     uint64_t iasq_f = env->iasq_f;
     uint64_t iasq_b = env->iasq_b;
 
-#ifndef CONFIG_USER_ONLY
     target_ureg old_psw;
 
     /* As documented in pa2.0 -- interruption handling.  */
@@ -187,7 +185,6 @@ void hppa_cpu_do_interrupt(CPUState *cs)
     env->iaoq_b = env->iaoq_f + 4;
     env->iasq_f = 0;
     env->iasq_b = 0;
-#endif
 
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         static const char * const names[] = {
@@ -248,7 +245,6 @@ void hppa_cpu_do_interrupt(CPUState *cs)
 
 bool hppa_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-#ifndef CONFIG_USER_ONLY
     HPPACPU *cpu = HPPA_CPU(cs);
     CPUHPPAState *env = &cpu->env;
 
@@ -258,6 +254,7 @@ bool hppa_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         hppa_cpu_do_interrupt(cs);
         return true;
     }
-#endif
     return false;
 }
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c
index afc5b56c3e8..bf07445cd12 100644
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -24,20 +24,6 @@
 #include "hw/core/cpu.h"
 #include "trace.h"
 
-#ifdef CONFIG_USER_ONLY
-bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    HPPACPU *cpu = HPPA_CPU(cs);
-
-    /* ??? Test between data page fault and data memory protection trap,
-       which would affect si_code.  */
-    cs->exception_index = EXCP_DMP;
-    cpu->env.cr[CR_IOR] = address;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-#else
 static hppa_tlb_entry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
 {
     int i;
@@ -392,4 +378,3 @@ int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr)
     hppa_tlb_entry *ent = hppa_find_tlb(env, vaddr);
     return ent ? ent->ar_type : -1;
 }
-#endif /* CONFIG_USER_ONLY */
diff --git a/target/hppa/meson.build b/target/hppa/meson.build
index 8a7ff82efc3..021e42a2d09 100644
--- a/target/hppa/meson.build
+++ b/target/hppa/meson.build
@@ -7,13 +7,15 @@ hppa_ss.add(files(
   'gdbstub.c',
   'helper.c',
   'int_helper.c',
-  'mem_helper.c',
   'op_helper.c',
   'translate.c',
 ))
 
 hppa_softmmu_ss = ss.source_set()
-hppa_softmmu_ss.add(files('machine.c'))
+hppa_softmmu_ss.add(files(
+  'machine.c',
+  'mem_helper.c',
+))
 
 target_arch += {'hppa': hppa_ss}
 target_softmmu_arch += {'hppa': hppa_softmmu_ss}
diff --git a/target/hppa/trace-events b/target/hppa/trace-events
index 0731ce7ce15..8931517890d 100644
--- a/target/hppa/trace-events
+++ b/target/hppa/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # mem_helper.c
 disable hppa_tlb_flush_ent(void *env, void *ent, uint64_t va_b, uint64_t va_e, uint64_t pa) "env=%p ent=%p va_b=0x%lx va_e=0x%lx pa=0x%lx"
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index 64af1e0d5cc..3b9744deb44 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -27,7 +27,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 /* Since we have a distinction between register size and address size,
@@ -35,7 +34,6 @@
 
 #undef TCGv
 #undef tcg_temp_new
-#undef tcg_global_reg_new
 #undef tcg_global_mem_new
 #undef tcg_temp_local_new
 #undef tcg_temp_free
@@ -60,7 +58,6 @@
 #define TCGv_reg             TCGv_i64
 
 #define tcg_temp_new         tcg_temp_new_i64
-#define tcg_global_reg_new   tcg_global_reg_new_i64
 #define tcg_global_mem_new   tcg_global_mem_new_i64
 #define tcg_temp_local_new   tcg_temp_local_new_i64
 #define tcg_temp_free        tcg_temp_free_i64
@@ -145,6 +142,7 @@
 #define tcg_gen_sextract_reg tcg_gen_sextract_i64
 #define tcg_const_reg        tcg_const_i64
 #define tcg_const_local_reg  tcg_const_local_i64
+#define tcg_constant_reg     tcg_constant_i64
 #define tcg_gen_movcond_reg  tcg_gen_movcond_i64
 #define tcg_gen_add2_reg     tcg_gen_add2_i64
 #define tcg_gen_sub2_reg     tcg_gen_sub2_i64
@@ -155,7 +153,6 @@
 #else
 #define TCGv_reg             TCGv_i32
 #define tcg_temp_new         tcg_temp_new_i32
-#define tcg_global_reg_new   tcg_global_reg_new_i32
 #define tcg_global_mem_new   tcg_global_mem_new_i32
 #define tcg_temp_local_new   tcg_temp_local_new_i32
 #define tcg_temp_free        tcg_temp_free_i32
@@ -239,6 +236,7 @@
 #define tcg_gen_sextract_reg tcg_gen_sextract_i32
 #define tcg_const_reg        tcg_const_i32
 #define tcg_const_local_reg  tcg_const_local_i32
+#define tcg_constant_reg     tcg_constant_i32
 #define tcg_gen_movcond_reg  tcg_gen_movcond_i32
 #define tcg_gen_add2_reg     tcg_gen_add2_i32
 #define tcg_gen_sub2_reg     tcg_gen_sub2_i32
@@ -251,8 +249,6 @@
 typedef struct DisasCond {
     TCGCond c;
     TCGv_reg a0, a1;
-    bool a0_is_n;
-    bool a1_is_0;
 } DisasCond;
 
 typedef struct DisasContext {
@@ -447,9 +443,7 @@ static DisasCond cond_make_n(void)
     return (DisasCond){
         .c = TCG_COND_NE,
         .a0 = cpu_psw_n,
-        .a0_is_n = true,
-        .a1 = NULL,
-        .a1_is_0 = true
+        .a1 = tcg_constant_reg(0)
     };
 }
 
@@ -457,7 +451,7 @@ static DisasCond cond_make_0_tmp(TCGCond c, TCGv_reg a0)
 {
     assert (c != TCG_COND_NEVER && c != TCG_COND_ALWAYS);
     return (DisasCond){
-        .c = c, .a0 = a0, .a1_is_0 = true
+        .c = c, .a0 = a0, .a1 = tcg_constant_reg(0)
     };
 }
 
@@ -481,26 +475,14 @@ static DisasCond cond_make(TCGCond c, TCGv_reg a0, TCGv_reg a1)
     return r;
 }
 
-static void cond_prep(DisasCond *cond)
-{
-    if (cond->a1_is_0) {
-        cond->a1_is_0 = false;
-        cond->a1 = tcg_const_reg(0);
-    }
-}
-
 static void cond_free(DisasCond *cond)
 {
     switch (cond->c) {
     default:
-        if (!cond->a0_is_n) {
+        if (cond->a0 != cpu_psw_n) {
             tcg_temp_free(cond->a0);
         }
-        if (!cond->a1_is_0) {
-            tcg_temp_free(cond->a1);
-        }
-        cond->a0_is_n = false;
-        cond->a1_is_0 = false;
+        tcg_temp_free(cond->a1);
         cond->a0 = NULL;
         cond->a1 = NULL;
         /* fallthru */
@@ -558,9 +540,8 @@ static TCGv_reg dest_gpr(DisasContext *ctx, unsigned reg)
 static void save_or_nullify(DisasContext *ctx, TCGv_reg dest, TCGv_reg t)
 {
     if (ctx->null_cond.c != TCG_COND_NEVER) {
-        cond_prep(&ctx->null_cond);
         tcg_gen_movcond_reg(ctx->null_cond.c, dest, ctx->null_cond.a0,
-                           ctx->null_cond.a1, dest, t);
+                            ctx->null_cond.a1, dest, t);
     } else {
         tcg_gen_mov_reg(dest, t);
     }
@@ -667,11 +648,9 @@ static void nullify_over(DisasContext *ctx)
         assert(ctx->null_cond.c != TCG_COND_ALWAYS);
 
         ctx->null_lab = gen_new_label();
-        cond_prep(&ctx->null_cond);
 
         /* If we're using PSW[N], copy it to a temp because... */
-        if (ctx->null_cond.a0_is_n) {
-            ctx->null_cond.a0_is_n = false;
+        if (ctx->null_cond.a0 == cpu_psw_n) {
             ctx->null_cond.a0 = tcg_temp_new();
             tcg_gen_mov_reg(ctx->null_cond.a0, cpu_psw_n);
         }
@@ -684,7 +663,7 @@ static void nullify_over(DisasContext *ctx)
         }
 
         tcg_gen_brcond_reg(ctx->null_cond.c, ctx->null_cond.a0,
-                          ctx->null_cond.a1, ctx->null_lab);
+                           ctx->null_cond.a1, ctx->null_lab);
         cond_free(&ctx->null_cond);
     }
 }
@@ -698,10 +677,9 @@ static void nullify_save(DisasContext *ctx)
         }
         return;
     }
-    if (!ctx->null_cond.a0_is_n) {
-        cond_prep(&ctx->null_cond);
+    if (ctx->null_cond.a0 != cpu_psw_n) {
         tcg_gen_setcond_reg(ctx->null_cond.c, cpu_psw_n,
-                           ctx->null_cond.a0, ctx->null_cond.a1);
+                            ctx->null_cond.a0, ctx->null_cond.a1);
         ctx->psw_n_nonzero = true;
     }
     cond_free(&ctx->null_cond);
@@ -772,9 +750,7 @@ static inline target_ureg iaoq_dest(DisasContext *ctx, target_sreg disp)
 
 static void gen_excp_1(int exception)
 {
-    TCGv_i32 t = tcg_const_i32(exception);
-    gen_helper_excp(cpu_env, t);
-    tcg_temp_free_i32(t);
+    gen_helper_excp(cpu_env, tcg_constant_i32(exception));
 }
 
 static void gen_excp(DisasContext *ctx, int exception)
@@ -788,12 +764,9 @@ static void gen_excp(DisasContext *ctx, int exception)
 
 static bool gen_excp_iir(DisasContext *ctx, int exc)
 {
-    TCGv_reg tmp;
-
     nullify_over(ctx);
-    tmp = tcg_const_reg(ctx->insn);
-    tcg_gen_st_reg(tmp, cpu_env, offsetof(CPUHPPAState, cr[CR_IIR]));
-    tcg_temp_free(tmp);
+    tcg_gen_st_reg(tcg_constant_reg(ctx->insn),
+                   cpu_env, offsetof(CPUHPPAState, cr[CR_IIR]));
     gen_excp(ctx, exc);
     return nullify_end(ctx);
 }
@@ -817,10 +790,7 @@ static bool gen_illegal(DisasContext *ctx)
 
 static bool use_goto_tb(DisasContext *ctx, target_ureg dest)
 {
-    /* Suppress goto_tb for page crossing, IO, or single-steping.  */
-    return !(((ctx->base.pc_first ^ dest) & TARGET_PAGE_MASK)
-             || (tb_cflags(ctx->base.tb) & CF_LAST_IO)
-             || ctx->base.singlestep_enabled);
+    return translator_use_goto_tb(&ctx->base, dest);
 }
 
 /* If the next insn is to be nullified, and it's on the same page,
@@ -844,11 +814,7 @@ static void gen_goto_tb(DisasContext *ctx, int which,
     } else {
         copy_iaoq_entry(cpu_iaoq_f, f, cpu_iaoq_b);
         copy_iaoq_entry(cpu_iaoq_b, b, ctx->iaoq_n_var);
-        if (ctx->base.singlestep_enabled) {
-            gen_excp_1(EXCP_DEBUG);
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
+        tcg_gen_lookup_and_goto_ptr();
     }
 }
 
@@ -1154,13 +1120,12 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     }
 
     if (!is_l || cond_need_cb(c)) {
-        TCGv_reg zero = tcg_const_reg(0);
+        TCGv_reg zero = tcg_constant_reg(0);
         cb_msb = get_temp(ctx);
         tcg_gen_add2_reg(dest, cb_msb, in1, zero, in2, zero);
         if (is_c) {
             tcg_gen_add2_reg(dest, cb_msb, dest, cb_msb, cpu_psw_cb_msb, zero);
         }
-        tcg_temp_free(zero);
         if (!is_l) {
             cb = get_temp(ctx);
             tcg_gen_xor_reg(cb, in1, in2);
@@ -1186,7 +1151,6 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     /* Emit any conditional trap before any writeback.  */
     cond = do_cond(cf, dest, cb_msb, sv);
     if (is_tc) {
-        cond_prep(&cond);
         tmp = tcg_temp_new();
         tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
         gen_helper_tcond(cpu_env, tmp);
@@ -1246,7 +1210,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
     cb = tcg_temp_new();
     cb_msb = tcg_temp_new();
 
-    zero = tcg_const_reg(0);
+    zero = tcg_constant_reg(0);
     if (is_b) {
         /* DEST,C = IN1 + ~IN2 + C.  */
         tcg_gen_not_reg(cb, in2);
@@ -1262,7 +1226,6 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
         tcg_gen_eqv_reg(cb, in1, in2);
         tcg_gen_xor_reg(cb, cb, dest);
     }
-    tcg_temp_free(zero);
 
     /* Compute signed overflow if required.  */
     sv = NULL;
@@ -1282,7 +1245,6 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_reg in1,
 
     /* Emit any conditional trap before any writeback.  */
     if (is_tc) {
-        cond_prep(&cond);
         tmp = tcg_temp_new();
         tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
         gen_helper_tcond(cpu_env, tmp);
@@ -1408,7 +1370,6 @@ static void do_unit(DisasContext *ctx, unsigned rt, TCGv_reg in1,
 
         if (is_tc) {
             TCGv_reg tmp = tcg_temp_new();
-            cond_prep(&cond);
             tcg_gen_setcond_reg(cond.c, tmp, cond.a0, cond.a1);
             gen_helper_tcond(cpu_env, tmp);
             tcg_temp_free(tmp);
@@ -1864,7 +1825,6 @@ static bool do_cbranch(DisasContext *ctx, target_sreg disp, bool is_n,
     }
 
     taken = gen_new_label();
-    cond_prep(cond);
     tcg_gen_brcond_reg(c, cond->a0, cond->a1, taken);
     cond_free(cond);
 
@@ -1961,7 +1921,6 @@ static bool do_ibranch(DisasContext *ctx, TCGv_reg dest,
         tcg_gen_lookup_and_goto_ptr();
         return nullify_end(ctx);
     } else {
-        cond_prep(&ctx->null_cond);
         c = ctx->null_cond.c;
         a0 = ctx->null_cond.a0;
         a1 = ctx->null_cond.a1;
@@ -2383,11 +2342,7 @@ static bool do_rfi(DisasContext *ctx, bool rfi_r)
         gen_helper_rfi(cpu_env);
     }
     /* Exit the TB to recognize new interrupts.  */
-    if (ctx->base.singlestep_enabled) {
-        gen_excp_1(EXCP_DEBUG);
-    } else {
-        tcg_gen_exit_tb(NULL, 0);
-    }
+    tcg_gen_exit_tb(NULL, 0);
     ctx->base.is_jmp = DISAS_NORETURN;
 
     return nullify_end(ctx);
@@ -2453,17 +2408,16 @@ static bool trans_probe(DisasContext *ctx, arg_probe *a)
     form_gva(ctx, &addr, &ofs, a->b, 0, 0, 0, a->sp, 0, false);
 
     if (a->imm) {
-        level = tcg_const_i32(a->ri);
+        level = tcg_constant_i32(a->ri);
     } else {
         level = tcg_temp_new_i32();
         tcg_gen_trunc_reg_i32(level, load_gpr(ctx, a->ri));
         tcg_gen_andi_i32(level, level, 3);
     }
-    want = tcg_const_i32(a->write ? PAGE_WRITE : PAGE_READ);
+    want = tcg_constant_i32(a->write ? PAGE_WRITE : PAGE_READ);
 
     gen_helper_probe(dest, cpu_env, addr, level, want);
 
-    tcg_temp_free_i32(want);
     tcg_temp_free_i32(level);
 
     save_gpr(ctx, a->t, dest);
@@ -2603,17 +2557,13 @@ static bool trans_lpa(DisasContext *ctx, arg_ldst *a)
 
 static bool trans_lci(DisasContext *ctx, arg_lci *a)
 {
-    TCGv_reg ci;
-
     CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
 
     /* The Coherence Index is an implementation-defined function of the
        physical address.  Two addresses with the same CI have a coherent
        view of the cache.  Our implementation is to return 0 for all,
        since the entire address space is coherent.  */
-    ci = tcg_const_reg(0);
-    save_gpr(ctx, a->t, ci);
-    tcg_temp_free(ci);
+    save_gpr(ctx, a->t, tcg_constant_reg(0));
 
     cond_free(&ctx->null_cond);
     return true;
@@ -2714,8 +2664,6 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
          *                      currently implemented as idle.
          */
         if ((rt == 10 || rt == 31) && r1 == rt && r2 == rt) { /* PAUSE */
-            TCGv_i32 tmp;
-
             /* No need to check for supervisor, as userland can only pause
                until the next timer interrupt.  */
             nullify_over(ctx);
@@ -2726,10 +2674,8 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf *a)
             nullify_set(ctx, 0);
 
             /* Tell the qemu main loop to halt until this cpu has work.  */
-            tmp = tcg_const_i32(1);
-            tcg_gen_st_i32(tmp, cpu_env, -offsetof(HPPACPU, env) +
-                                         offsetof(CPUState, halted));
-            tcg_temp_free_i32(tmp);
+            tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
+                           offsetof(CPUState, halted) - offsetof(HPPACPU, env));
             gen_excp_1(EXCP_HALTED);
             ctx->base.is_jmp = DISAS_NORETURN;
 
@@ -2837,7 +2783,7 @@ static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a)
     add2 = tcg_temp_new();
     addc = tcg_temp_new();
     dest = tcg_temp_new();
-    zero = tcg_const_reg(0);
+    zero = tcg_constant_reg(0);
 
     /* Form R1 << 1 | PSW[CB]{8}.  */
     tcg_gen_add_reg(add1, in1, in1);
@@ -2855,7 +2801,6 @@ static bool trans_ds(DisasContext *ctx, arg_rrr_cf *a)
     tcg_gen_add2_i32(dest, cpu_psw_cb_msb, dest, cpu_psw_cb_msb, addc, zero);
 
     tcg_temp_free(addc);
-    tcg_temp_free(zero);
 
     /* Write back the result register.  */
     save_gpr(ctx, a->t, dest);
@@ -2971,9 +2916,8 @@ static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
      */
     gen_helper_ldc_check(addr);
 
-    zero = tcg_const_reg(0);
+    zero = tcg_constant_reg(0);
     tcg_gen_atomic_xchg_reg(dest, addr, zero, ctx->mmu_idx, mop);
-    tcg_temp_free(zero);
 
     if (a->m) {
         save_gpr(ctx, a->b, ofs);
@@ -3886,15 +3830,13 @@ static bool trans_fcmp_f(DisasContext *ctx, arg_fclass2 *a)
 
     ta = load_frw0_i32(a->r1);
     tb = load_frw0_i32(a->r2);
-    ty = tcg_const_i32(a->y);
-    tc = tcg_const_i32(a->c);
+    ty = tcg_constant_i32(a->y);
+    tc = tcg_constant_i32(a->c);
 
     gen_helper_fcmp_s(cpu_env, ta, tb, ty, tc);
 
     tcg_temp_free_i32(ta);
     tcg_temp_free_i32(tb);
-    tcg_temp_free_i32(ty);
-    tcg_temp_free_i32(tc);
 
     return nullify_end(ctx);
 }
@@ -3908,15 +3850,13 @@ static bool trans_fcmp_d(DisasContext *ctx, arg_fclass2 *a)
 
     ta = load_frd0(a->r1);
     tb = load_frd0(a->r2);
-    ty = tcg_const_i32(a->y);
-    tc = tcg_const_i32(a->c);
+    ty = tcg_constant_i32(a->y);
+    tc = tcg_constant_i32(a->c);
 
     gen_helper_fcmp_d(cpu_env, ta, tb, ty, tc);
 
     tcg_temp_free_i64(ta);
     tcg_temp_free_i64(tb);
-    tcg_temp_free_i32(ty);
-    tcg_temp_free_i32(tc);
 
     return nullify_end(ctx);
 }
@@ -4208,16 +4148,6 @@ static void hppa_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     tcg_gen_insn_start(ctx->iaoq_f, ctx->iaoq_b);
 }
 
-static bool hppa_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    gen_excp(ctx, EXCP_DEBUG);
-    ctx->base.pc_next += 4;
-    return true;
-}
-
 static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
@@ -4236,7 +4166,7 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     {
         /* Always fetch the insn, even if nullified, so that we check
            the page permissions for execute.  */
-        uint32_t insn = translator_ldl(env, ctx->base.pc_next);
+        uint32_t insn = translator_ldl(env, &ctx->base, ctx->base.pc_next);
 
         /* Set up the IA queue for the next insn.
            This will be overwritten by a branch.  */
@@ -4336,10 +4266,9 @@ static void hppa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         nullify_save(ctx);
         /* FALLTHRU */
     case DISAS_IAQ_N_UPDATED:
-        if (ctx->base.singlestep_enabled) {
-            gen_excp_1(EXCP_DEBUG);
-        } else if (is_jmp != DISAS_IAQ_N_STALE_EXIT) {
+        if (is_jmp != DISAS_IAQ_N_STALE_EXIT) {
             tcg_gen_lookup_and_goto_ptr();
+            break;
         }
         /* FALLTHRU */
     case DISAS_EXIT:
@@ -4379,7 +4308,6 @@ static const TranslatorOps hppa_tr_ops = {
     .init_disas_context = hppa_tr_init_disas_context,
     .tb_start           = hppa_tr_tb_start,
     .insn_start         = hppa_tr_insn_start,
-    .breakpoint_check   = hppa_tr_breakpoint_check,
     .translate_insn     = hppa_tr_translate_insn,
     .tb_stop            = hppa_tr_tb_stop,
     .disas_log          = hppa_tr_disas_log,
diff --git a/target/i386/Kconfig b/target/i386/Kconfig
new file mode 100644
index 00000000000..ce6968906ee
--- /dev/null
+++ b/target/i386/Kconfig
@@ -0,0 +1,5 @@
+config I386
+    bool
+
+config X86_64
+    bool
diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
index aac21f1f60b..08ac957e99c 100644
--- a/target/i386/cpu-dump.c
+++ b/target/i386/cpu-dump.c
@@ -464,13 +464,13 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
             snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
 #ifdef TARGET_X86_64
         if (env->hflags & HF_CS64_MASK) {
-            qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
+            qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n",
                          env->cc_src, env->cc_dst,
                          cc_op_name);
         } else
 #endif
         {
-            qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
+            qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%s\n",
                          (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
                          cc_op_name);
         }
@@ -478,6 +478,11 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     qemu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
     if (flags & CPU_DUMP_FPU) {
         int fptag;
+        const uint64_t avx512_mask = XSTATE_OPMASK_MASK | \
+                                     XSTATE_ZMM_Hi256_MASK | \
+                                     XSTATE_Hi16_ZMM_MASK | \
+                                     XSTATE_YMM_MASK | XSTATE_SSE_MASK,
+                       avx_mask = XSTATE_YMM_MASK | XSTATE_SSE_MASK;
         fptag = 0;
         for(i = 0; i < 8; i++) {
             fptag |= ((!env->fptags[i]) << i);
@@ -499,21 +504,49 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
             else
                 qemu_fprintf(f, " ");
         }
-        if (env->hflags & HF_CS64_MASK)
-            nb = 16;
-        else
-            nb = 8;
-        for(i=0;i<nb;i++) {
-            qemu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
-                         i,
-                         env->xmm_regs[i].ZMM_L(3),
-                         env->xmm_regs[i].ZMM_L(2),
-                         env->xmm_regs[i].ZMM_L(1),
-                         env->xmm_regs[i].ZMM_L(0));
-            if ((i & 1) == 1)
-                qemu_fprintf(f, "\n");
-            else
-                qemu_fprintf(f, " ");
+
+        if ((env->xcr0 & avx512_mask) == avx512_mask) {
+            /* XSAVE enabled AVX512 */
+            for (i = 0; i < NB_OPMASK_REGS; i++) {
+                qemu_fprintf(f, "Opmask%02d=%016"PRIx64"%s", i,
+                             env->opmask_regs[i], ((i & 3) == 3) ? "\n" : " ");
+            }
+
+            nb = (env->hflags & HF_CS64_MASK) ? 32 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "ZMM%02d=%016"PRIx64" %016"PRIx64" %016"PRIx64
+                             " %016"PRIx64" %016"PRIx64" %016"PRIx64
+                             " %016"PRIx64" %016"PRIx64"\n",
+                             i,
+                             env->xmm_regs[i].ZMM_Q(7),
+                             env->xmm_regs[i].ZMM_Q(6),
+                             env->xmm_regs[i].ZMM_Q(5),
+                             env->xmm_regs[i].ZMM_Q(4),
+                             env->xmm_regs[i].ZMM_Q(3),
+                             env->xmm_regs[i].ZMM_Q(2),
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0));
+            }
+        } else if ((env->xcr0 & avx_mask)  == avx_mask) {
+            /* XSAVE enabled AVX */
+            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "YMM%02d=%016"PRIx64" %016"PRIx64" %016"PRIx64
+                             " %016"PRIx64"\n", i,
+                             env->xmm_regs[i].ZMM_Q(3),
+                             env->xmm_regs[i].ZMM_Q(2),
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0));
+            }
+        } else { /* SSE and below cases */
+            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "XMM%02d=%016"PRIx64" %016"PRIx64"%s",
+                             i,
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0),
+                             (i & 1) ? "\n" : " ");
+            }
         }
     }
     if (flags & CPU_DUMP_CODE) {
diff --git a/target/i386/cpu-internal.h b/target/i386/cpu-internal.h
new file mode 100644
index 00000000000..9baac5c0b45
--- /dev/null
+++ b/target/i386/cpu-internal.h
@@ -0,0 +1,70 @@
+/*
+ * i386 CPU internal definitions to be shared between cpu.c and cpu-sysemu.c
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef I386_CPU_INTERNAL_H
+#define I386_CPU_INTERNAL_H
+
+typedef enum FeatureWordType {
+   CPUID_FEATURE_WORD,
+   MSR_FEATURE_WORD,
+} FeatureWordType;
+
+typedef struct FeatureWordInfo {
+    FeatureWordType type;
+    /* feature flags names are taken from "Intel Processor Identification and
+     * the CPUID Instruction" and AMD's "CPUID Specification".
+     * In cases of disagreement between feature naming conventions,
+     * aliases may be added.
+     */
+    const char *feat_names[64];
+    union {
+        /* If type==CPUID_FEATURE_WORD */
+        struct {
+            uint32_t eax;   /* Input EAX for CPUID */
+            bool needs_ecx; /* CPUID instruction uses ECX as input */
+            uint32_t ecx;   /* Input ECX value for CPUID */
+            int reg;        /* output register (R_* constant) */
+        } cpuid;
+        /* If type==MSR_FEATURE_WORD */
+        struct {
+            uint32_t index;
+        } msr;
+    };
+    uint64_t tcg_features; /* Feature flags supported by TCG */
+    uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */
+    uint64_t migratable_flags; /* Feature flags known to be migratable */
+    /* Features that shouldn't be auto-enabled by "-cpu host" */
+    uint64_t no_autoenable_flags;
+} FeatureWordInfo;
+
+extern FeatureWordInfo feature_word_info[];
+
+void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
+
+#ifndef CONFIG_USER_ONLY
+GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs);
+void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v,
+                                const char *name, void *opaque, Error **errp);
+
+void x86_cpu_apic_create(X86CPU *cpu, Error **errp);
+void x86_cpu_apic_realize(X86CPU *cpu, Error **errp);
+void x86_cpu_machine_reset_cb(void *opaque);
+#endif /* !CONFIG_USER_ONLY */
+
+#endif /* I386_CPU_INTERNAL_H */
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
new file mode 100644
index 00000000000..37b7c562f53
--- /dev/null
+++ b/target/i386/cpu-sysemu.c
@@ -0,0 +1,352 @@
+/*
+ *  i386 CPUID, CPU class, definitions, models: sysemu-only code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "sysemu/xen.h"
+#include "sysemu/whpx.h"
+#include "kvm/kvm_i386.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-run-state.h"
+#include "qapi/qmp/qdict.h"
+#include "qom/qom-qobject.h"
+#include "qapi/qapi-commands-machine-target.h"
+#include "hw/qdev-properties.h"
+
+#include "exec/address-spaces.h"
+#include "hw/i386/apic_internal.h"
+
+#include "cpu-internal.h"
+
+/* Return a QDict containing keys for all properties that can be included
+ * in static expansion of CPU models. All properties set by x86_cpu_load_model()
+ * must be included in the dictionary.
+ */
+static QDict *x86_cpu_static_props(void)
+{
+    FeatureWord w;
+    int i;
+    static const char *props[] = {
+        "min-level",
+        "min-xlevel",
+        "family",
+        "model",
+        "stepping",
+        "model-id",
+        "vendor",
+        "lmce",
+        NULL,
+    };
+    static QDict *d;
+
+    if (d) {
+        return d;
+    }
+
+    d = qdict_new();
+    for (i = 0; props[i]; i++) {
+        qdict_put_null(d, props[i]);
+    }
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        FeatureWordInfo *fi = &feature_word_info[w];
+        int bit;
+        for (bit = 0; bit < 64; bit++) {
+            if (!fi->feat_names[bit]) {
+                continue;
+            }
+            qdict_put_null(d, fi->feat_names[bit]);
+        }
+    }
+
+    return d;
+}
+
+/* Add an entry to @props dict, with the value for property. */
+static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
+{
+    QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
+                                                 &error_abort);
+
+    qdict_put_obj(props, prop, value);
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model.
+ */
+static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
+{
+    QDict *sprops = x86_cpu_static_props();
+    const QDictEntry *e;
+
+    for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
+        const char *prop = qdict_entry_key(e);
+        x86_cpu_expand_prop(cpu, props, prop);
+    }
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model, including every
+ * writeable QOM property.
+ */
+static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
+{
+    ObjectPropertyIterator iter;
+    ObjectProperty *prop;
+
+    object_property_iter_init(&iter, OBJECT(cpu));
+    while ((prop = object_property_iter_next(&iter))) {
+        /* skip read-only or write-only properties */
+        if (!prop->get || !prop->set) {
+            continue;
+        }
+
+        /* "hotplugged" is the only property that is configurable
+         * on the command-line but will be set differently on CPUs
+         * created using "-cpu ... -smp ..." and by CPUs created
+         * on the fly by x86_cpu_from_model() for querying. Skip it.
+         */
+        if (!strcmp(prop->name, "hotplugged")) {
+            continue;
+        }
+        x86_cpu_expand_prop(cpu, props, prop->name);
+    }
+}
+
+static void object_apply_props(Object *obj, QDict *props, Error **errp)
+{
+    const QDictEntry *prop;
+
+    for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
+        if (!object_property_set_qobject(obj, qdict_entry_key(prop),
+                                         qdict_entry_value(prop), errp)) {
+            break;
+        }
+    }
+}
+
+/* Create X86CPU object according to model+props specification */
+static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
+{
+    X86CPU *xc = NULL;
+    X86CPUClass *xcc;
+    Error *err = NULL;
+
+    xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
+    if (xcc == NULL) {
+        error_setg(&err, "CPU model '%s' not found", model);
+        goto out;
+    }
+
+    xc = X86_CPU(object_new_with_class(OBJECT_CLASS(xcc)));
+    if (props) {
+        object_apply_props(OBJECT(xc), props, &err);
+        if (err) {
+            goto out;
+        }
+    }
+
+    x86_cpu_expand_features(xc, &err);
+    if (err) {
+        goto out;
+    }
+
+out:
+    if (err) {
+        error_propagate(errp, err);
+        object_unref(OBJECT(xc));
+        xc = NULL;
+    }
+    return xc;
+}
+
+CpuModelExpansionInfo *
+qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *model,
+                                                      Error **errp)
+{
+    X86CPU *xc = NULL;
+    Error *err = NULL;
+    CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
+    QDict *props = NULL;
+    const char *base_name;
+
+    xc = x86_cpu_from_model(model->name,
+                            model->has_props ?
+                                qobject_to(QDict, model->props) :
+                                NULL, &err);
+    if (err) {
+        goto out;
+    }
+
+    props = qdict_new();
+    ret->model = g_new0(CpuModelInfo, 1);
+    ret->model->props = QOBJECT(props);
+    ret->model->has_props = true;
+
+    switch (type) {
+    case CPU_MODEL_EXPANSION_TYPE_STATIC:
+        /* Static expansion will be based on "base" only */
+        base_name = "base";
+        x86_cpu_to_dict(xc, props);
+    break;
+    case CPU_MODEL_EXPANSION_TYPE_FULL:
+        /* As we don't return every single property, full expansion needs
+         * to keep the original model name+props, and add extra
+         * properties on top of that.
+         */
+        base_name = model->name;
+        x86_cpu_to_dict_full(xc, props);
+    break;
+    default:
+        error_setg(&err, "Unsupported expansion type");
+        goto out;
+    }
+
+    x86_cpu_to_dict(xc, props);
+
+    ret->model->name = g_strdup(base_name);
+
+out:
+    object_unref(OBJECT(xc));
+    if (err) {
+        error_propagate(errp, err);
+        qapi_free_CpuModelExpansionInfo(ret);
+        ret = NULL;
+    }
+    return ret;
+}
+
+void cpu_clear_apic_feature(CPUX86State *env)
+{
+    env->features[FEAT_1_EDX] &= ~CPUID_APIC;
+}
+
+bool cpu_is_bsp(X86CPU *cpu)
+{
+    return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
+}
+
+/* TODO: remove me, when reset over QOM tree is implemented */
+void x86_cpu_machine_reset_cb(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    cpu_reset(CPU(cpu));
+}
+
+APICCommonClass *apic_get_class(void)
+{
+    const char *apic_type = "apic";
+
+    /* TODO: in-kernel irqchip for hvf */
+    if (kvm_apic_in_kernel()) {
+        apic_type = "kvm-apic";
+    } else if (xen_enabled()) {
+        apic_type = "xen-apic";
+    } else if (whpx_apic_in_platform()) {
+        apic_type = "whpx-apic";
+    }
+
+    return APIC_COMMON_CLASS(object_class_by_name(apic_type));
+}
+
+void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
+{
+    APICCommonState *apic;
+    ObjectClass *apic_class = OBJECT_CLASS(apic_get_class());
+
+    cpu->apic_state = DEVICE(object_new_with_class(apic_class));
+
+    object_property_add_child(OBJECT(cpu), "lapic",
+                              OBJECT(cpu->apic_state));
+    object_unref(OBJECT(cpu->apic_state));
+
+    qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id);
+    /* TODO: convert to link<> */
+    apic = APIC_COMMON(cpu->apic_state);
+    apic->cpu = cpu;
+    apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE;
+}
+
+void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
+{
+    APICCommonState *apic;
+    static bool apic_mmio_map_once;
+
+    if (cpu->apic_state == NULL) {
+        return;
+    }
+    qdev_realize(DEVICE(cpu->apic_state), NULL, errp);
+
+    /* Map APIC MMIO area */
+    apic = APIC_COMMON(cpu->apic_state);
+    if (!apic_mmio_map_once) {
+        memory_region_add_subregion_overlap(get_system_memory(),
+                                            apic->apicbase &
+                                            MSR_IA32_APICBASE_BASE,
+                                            &apic->io_memory,
+                                            0x1000);
+        apic_mmio_map_once = true;
+     }
+}
+
+GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    GuestPanicInformation *panic_info = NULL;
+
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_CRASH)) {
+        panic_info = g_malloc0(sizeof(GuestPanicInformation));
+
+        panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V;
+
+        assert(HV_CRASH_PARAMS >= 5);
+        panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0];
+        panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1];
+        panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2];
+        panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3];
+        panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4];
+    }
+
+    return panic_info;
+}
+void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v,
+                                const char *name, void *opaque,
+                                Error **errp)
+{
+    CPUState *cs = CPU(obj);
+    GuestPanicInformation *panic_info;
+
+    if (!cs->crash_occurred) {
+        error_setg(errp, "No crash occurred");
+        return;
+    }
+
+    panic_info = x86_cpu_get_crash_info(cs);
+    if (panic_info == NULL) {
+        error_setg(errp, "No crash information");
+        return;
+    }
+
+    visit_type_GuestPanicInformation(v, "crash-information", &panic_info,
+                                     errp);
+    qapi_free_GuestPanicInformation(panic_info);
+}
+
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ad99cad0e7c..aa9e6368004 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1,5 +1,5 @@
 /*
- *  i386 CPUID helper functions
+ *  i386 CPUID, CPU class, definitions, models
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -20,49 +20,28 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "qemu/cutils.h"
-#include "qemu/bitops.h"
 #include "qemu/qemu-print.h"
-
 #include "cpu.h"
-#include "tcg/tcg-cpu.h"
 #include "tcg/helper-tcg.h"
-#include "exec/exec-all.h"
-#include "sysemu/kvm.h"
 #include "sysemu/reset.h"
 #include "sysemu/hvf.h"
-#include "sysemu/cpus.h"
-#include "sysemu/xen.h"
-#include "sysemu/whpx.h"
 #include "kvm/kvm_i386.h"
-#include "sev_i386.h"
-
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qemu/option.h"
-#include "qemu/config-file.h"
+#include "sev.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-machine.h"
-#include "qapi/qapi-visit-run-state.h"
-#include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qerror.h"
-#include "qapi/visitor.h"
-#include "qom/qom-qobject.h"
-#include "sysemu/arch_init.h"
 #include "qapi/qapi-commands-machine-target.h"
-
 #include "standard-headers/asm-x86/kvm_para.h"
-
-#include "sysemu/sysemu.h"
-#include "sysemu/tcg.h"
 #include "hw/qdev-properties.h"
 #include "hw/i386/topology.h"
 #ifndef CONFIG_USER_ONLY
 #include "exec/address-spaces.h"
-#include "hw/i386/apic_internal.h"
 #include "hw/boards.h"
+#include "hw/i386/sgx-epc.h"
 #endif
 
 #include "disas/capstone.h"
+#include "cpu-internal.h"
 
 /* Helpers for building CPUID[2] descriptors: */
 
@@ -595,8 +574,8 @@ static CPUCacheInfo legacy_l3_cache = {
 #define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
 #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
 
-static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
-                                     uint32_t vendor2, uint32_t vendor3)
+void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                              uint32_t vendor2, uint32_t vendor3)
 {
     int i;
     for (i = 0; i < 4; i++) {
@@ -654,7 +633,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_EXT4_FEATURES 0
-#define TCG_SVM_FEATURES CPUID_SVM_NPT
+#define TCG_SVM_FEATURES (CPUID_SVM_NPT | CPUID_SVM_VGIF | \
+          CPUID_SVM_SVME_ADDR_CHK)
 #define TCG_KVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \
           CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \
@@ -676,41 +656,11 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           /* missing:
           CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */
 #define TCG_14_0_ECX_FEATURES 0
+#define TCG_SGX_12_0_EAX_FEATURES 0
+#define TCG_SGX_12_0_EBX_FEATURES 0
+#define TCG_SGX_12_1_EAX_FEATURES 0
 
-typedef enum FeatureWordType {
-   CPUID_FEATURE_WORD,
-   MSR_FEATURE_WORD,
-} FeatureWordType;
-
-typedef struct FeatureWordInfo {
-    FeatureWordType type;
-    /* feature flags names are taken from "Intel Processor Identification and
-     * the CPUID Instruction" and AMD's "CPUID Specification".
-     * In cases of disagreement between feature naming conventions,
-     * aliases may be added.
-     */
-    const char *feat_names[64];
-    union {
-        /* If type==CPUID_FEATURE_WORD */
-        struct {
-            uint32_t eax;   /* Input EAX for CPUID */
-            bool needs_ecx; /* CPUID instruction uses ECX as input */
-            uint32_t ecx;   /* Input ECX value for CPUID */
-            int reg;        /* output register (R_* constant) */
-        } cpuid;
-        /* If type==MSR_FEATURE_WORD */
-        struct {
-            uint32_t index;
-        } msr;
-    };
-    uint64_t tcg_features; /* Feature flags supported by TCG */
-    uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */
-    uint64_t migratable_flags; /* Feature flags known to be migratable */
-    /* Features that shouldn't be auto-enabled by "-cpu host" */
-    uint64_t no_autoenable_flags;
-} FeatureWordInfo;
-
-static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
+FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
     [FEAT_1_EDX] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
@@ -832,94 +782,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
          */
         .no_autoenable_flags = ~0U,
     },
-    /*
-     * .feat_names are commented out for Hyper-V enlightenments because we
-     * don't want to have two different ways for enabling them on QEMU command
-     * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require
-     * enabling several feature bits simultaneously, exposing these bits
-     * individually may just confuse guests.
-     */
-    [FEAT_HYPERV_EAX] = {
-        .type = CPUID_FEATURE_WORD,
-        .feat_names = {
-            NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */,
-            NULL /* hv_msr_synic_access */, NULL /* hv_msr_stimer_access */,
-            NULL /* hv_msr_apic_access */, NULL /* hv_msr_hypercall_access */,
-            NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */,
-            NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */,
-            NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */,
-            NULL /* hv_msr_debug_access */, NULL /* hv_msr_reenlightenment_access */,
-            NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-        },
-        .cpuid = { .eax = 0x40000003, .reg = R_EAX, },
-    },
-    [FEAT_HYPERV_EBX] = {
-        .type = CPUID_FEATURE_WORD,
-        .feat_names = {
-            NULL /* hv_create_partitions */, NULL /* hv_access_partition_id */,
-            NULL /* hv_access_memory_pool */, NULL /* hv_adjust_message_buffers */,
-            NULL /* hv_post_messages */, NULL /* hv_signal_events */,
-            NULL /* hv_create_port */, NULL /* hv_connect_port */,
-            NULL /* hv_access_stats */, NULL, NULL, NULL /* hv_debugging */,
-            NULL /* hv_cpu_power_management */, NULL /* hv_configure_profiler */,
-            NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-        },
-        .cpuid = { .eax = 0x40000003, .reg = R_EBX, },
-    },
-    [FEAT_HYPERV_EDX] = {
-        .type = CPUID_FEATURE_WORD,
-        .feat_names = {
-            NULL /* hv_mwait */, NULL /* hv_guest_debugging */,
-            NULL /* hv_perf_monitor */, NULL /* hv_cpu_dynamic_part */,
-            NULL /* hv_hypercall_params_xmm */, NULL /* hv_guest_idle_state */,
-            NULL, NULL,
-            NULL, NULL, NULL /* hv_guest_crash_msr */, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-        },
-        .cpuid = { .eax = 0x40000003, .reg = R_EDX, },
-    },
-    [FEAT_HV_RECOMM_EAX] = {
-        .type = CPUID_FEATURE_WORD,
-        .feat_names = {
-            NULL /* hv_recommend_pv_as_switch */,
-            NULL /* hv_recommend_pv_tlbflush_local */,
-            NULL /* hv_recommend_pv_tlbflush_remote */,
-            NULL /* hv_recommend_msr_apic_access */,
-            NULL /* hv_recommend_msr_reset */,
-            NULL /* hv_recommend_relaxed_timing */,
-            NULL /* hv_recommend_dma_remapping */,
-            NULL /* hv_recommend_int_remapping */,
-            NULL /* hv_recommend_x2apic_msrs */,
-            NULL /* hv_recommend_autoeoi_deprecation */,
-            NULL /* hv_recommend_pv_ipi */,
-            NULL /* hv_recommend_ex_hypercalls */,
-            NULL /* hv_hypervisor_is_nested */,
-            NULL /* hv_recommend_int_mbec */,
-            NULL /* hv_recommend_evmcs */,
-            NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
-        },
-        .cpuid = { .eax = 0x40000004, .reg = R_EAX, },
-    },
-    [FEAT_HV_NESTED_EAX] = {
-        .type = CPUID_FEATURE_WORD,
-        .cpuid = { .eax = 0x4000000A, .reg = R_EAX, },
-    },
     [FEAT_SVM] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
@@ -938,7 +800,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
     [FEAT_7_0_EBX] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
-            "fsgsbase", "tsc-adjust", NULL, "bmi1",
+            "fsgsbase", "tsc-adjust", "sgx", "bmi1",
             "hle", "avx2", NULL, "smep",
             "bmi2", "erms", "invpcid", "rtm",
             NULL, NULL, "mpx", NULL,
@@ -964,7 +826,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "la57", NULL, NULL, NULL,
             NULL, NULL, "rdpid", NULL,
             "bus-lock-detect", "cldemote", NULL, "movdiri",
-            "movdir64b", NULL, NULL, "pks",
+            "movdir64b", NULL, "sgxlc", "pks",
         },
         .cpuid = {
             .eax = 7,
@@ -996,7 +858,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .type = CPUID_FEATURE_WORD,
         .feat_names = {
             NULL, NULL, NULL, NULL,
-            NULL, "avx512-bf16", NULL, NULL,
+            "avx-vnni", "avx512-bf16", NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
@@ -1175,7 +1037,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit",
             "vmx-rdseed-exit", "vmx-pml", NULL, NULL,
             "vmx-xsaves", NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            NULL, "vmx-tsc-scaling", NULL, NULL,
             NULL, NULL, NULL, NULL,
         },
         .msr = {
@@ -1325,6 +1187,65 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .tcg_features = TCG_14_0_ECX_FEATURES,
      },
 
+    [FEAT_SGX_12_0_EAX] = {
+        .type = CPUID_FEATURE_WORD,
+        .feat_names = {
+            "sgx1", "sgx2", NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid = {
+            .eax = 0x12,
+            .needs_ecx = true, .ecx = 0,
+            .reg = R_EAX,
+        },
+        .tcg_features = TCG_SGX_12_0_EAX_FEATURES,
+    },
+
+    [FEAT_SGX_12_0_EBX] = {
+        .type = CPUID_FEATURE_WORD,
+        .feat_names = {
+            "sgx-exinfo" , NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid = {
+            .eax = 0x12,
+            .needs_ecx = true, .ecx = 0,
+            .reg = R_EBX,
+        },
+        .tcg_features = TCG_SGX_12_0_EBX_FEATURES,
+    },
+
+    [FEAT_SGX_12_1_EAX] = {
+        .type = CPUID_FEATURE_WORD,
+        .feat_names = {
+            NULL, "sgx-debug", "sgx-mode64", NULL,
+            "sgx-provisionkey", "sgx-tokenkey", NULL, "sgx-kss",
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid = {
+            .eax = 0x12,
+            .needs_ecx = true, .ecx = 1,
+            .reg = R_EAX,
+        },
+        .tcg_features = TCG_SGX_12_1_EAX_FEATURES,
+    },
 };
 
 typedef struct FeatureMask {
@@ -1448,53 +1369,37 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
 };
 #undef REGISTER
 
-typedef struct ExtSaveArea {
-    uint32_t feature, bits;
-    uint32_t offset, size;
-} ExtSaveArea;
-
-static const ExtSaveArea x86_ext_save_areas[] = {
+ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
     [XSTATE_FP_BIT] = {
         /* x87 FP state component is always enabled if XSAVE is supported */
         .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE,
-        /* x87 state is in the legacy region of the XSAVE area */
-        .offset = 0,
         .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader),
     },
     [XSTATE_SSE_BIT] = {
         /* SSE state component is always enabled if XSAVE is supported */
         .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE,
-        /* SSE state is in the legacy region of the XSAVE area */
-        .offset = 0,
         .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader),
     },
     [XSTATE_YMM_BIT] =
           { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
-            .offset = offsetof(X86XSaveArea, avx_state),
             .size = sizeof(XSaveAVX) },
     [XSTATE_BNDREGS_BIT] =
           { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = offsetof(X86XSaveArea, bndreg_state),
             .size = sizeof(XSaveBNDREG)  },
     [XSTATE_BNDCSR_BIT] =
           { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
-            .offset = offsetof(X86XSaveArea, bndcsr_state),
             .size = sizeof(XSaveBNDCSR)  },
     [XSTATE_OPMASK_BIT] =
           { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = offsetof(X86XSaveArea, opmask_state),
             .size = sizeof(XSaveOpmask) },
     [XSTATE_ZMM_Hi256_BIT] =
           { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = offsetof(X86XSaveArea, zmm_hi256_state),
             .size = sizeof(XSaveZMM_Hi256) },
     [XSTATE_Hi16_ZMM_BIT] =
           { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
-            .offset = offsetof(X86XSaveArea, hi16_zmm_state),
             .size = sizeof(XSaveHi16_ZMM) },
     [XSTATE_PKRU_BIT] =
           { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
-            .offset = offsetof(X86XSaveArea, pkru_state),
             .size = sizeof(XSavePKRU) },
 };
 
@@ -1589,25 +1494,6 @@ void host_cpuid(uint32_t function, uint32_t count,
         *edx = vec[3];
 }
 
-void host_vendor_fms(char *vendor, int *family, int *model, int *stepping)
-{
-    uint32_t eax, ebx, ecx, edx;
-
-    host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-    x86_cpu_vendor_words2str(vendor, ebx, edx, ecx);
-
-    host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
-    if (family) {
-        *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
-    }
-    if (model) {
-        *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
-    }
-    if (stepping) {
-        *stepping = eax & 0x0F;
-    }
-}
-
 /* CPU class name definitions: */
 
 /* Return type name for a given CPU model name
@@ -1632,10 +1518,6 @@ static char *x86_cpu_class_get_model_name(X86CPUClass *cc)
                      strlen(class_name) - strlen(X86_CPU_TYPE_SUFFIX));
 }
 
-typedef struct PropValue {
-    const char *prop, *value;
-} PropValue;
-
 typedef struct X86CPUVersionDefinition {
     X86CPUVersion version;
     const char *alias;
@@ -1655,7 +1537,7 @@ typedef struct X86CPUDefinition {
     int stepping;
     FeatureWordArray features;
     const char *model_id;
-    CPUCaches *cache_info;
+    const CPUCaches *const cache_info;
     /*
      * Definitions for alternative versions of CPU model.
      * List is terminated by item with version == 0.
@@ -1668,7 +1550,7 @@ typedef struct X86CPUDefinition {
 /* Reference to a specific CPU model version */
 struct X86CPUModel {
     /* Base CPU definition */
-    X86CPUDefinition *cpudef;
+    const X86CPUDefinition *cpudef;
     /* CPU model version */
     X86CPUVersion version;
     const char *note;
@@ -1680,14 +1562,15 @@ struct X86CPUModel {
 };
 
 /* Get full model name for CPU version */
-static char *x86_cpu_versioned_model_name(X86CPUDefinition *cpudef,
+static char *x86_cpu_versioned_model_name(const X86CPUDefinition *cpudef,
                                           X86CPUVersion version)
 {
     assert(version > 0);
     return g_strdup_printf("%s-v%d", cpudef->name, (int)version);
 }
 
-static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition *def)
+static const X86CPUVersionDefinition *
+x86_cpu_def_get_versions(const X86CPUDefinition *def)
 {
     /* When X86CPUDefinition::versions is NULL, we register only v1 */
     static const X86CPUVersionDefinition default_version_list[] = {
@@ -1698,7 +1581,7 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition
     return def->versions ?: default_version_list;
 }
 
-static CPUCaches epyc_cache_info = {
+static const CPUCaches epyc_cache_info = {
     .l1d_cache = &(CPUCacheInfo) {
         .type = DATA_CACHE,
         .level = 1,
@@ -1748,7 +1631,7 @@ static CPUCaches epyc_cache_info = {
     },
 };
 
-static CPUCaches epyc_rome_cache_info = {
+static const CPUCaches epyc_rome_cache_info = {
     .l1d_cache = &(CPUCacheInfo) {
         .type = DATA_CACHE,
         .level = 1,
@@ -1798,7 +1681,7 @@ static CPUCaches epyc_rome_cache_info = {
     },
 };
 
-static CPUCaches epyc_milan_cache_info = {
+static const CPUCaches epyc_milan_cache_info = {
     .l1d_cache = &(CPUCacheInfo) {
         .type = DATA_CACHE,
         .level = 1,
@@ -1876,14 +1759,14 @@ static CPUCaches epyc_milan_cache_info = {
  *  PT in VMX operation
  */
 
-static X86CPUDefinition builtin_x86_defs[] = {
+static const X86CPUDefinition builtin_x86_defs[] = {
     {
         .name = "qemu64",
         .level = 0xd,
         .vendor = CPUID_VENDOR_AMD,
-        .family = 6,
-        .model = 6,
-        .stepping = 3,
+        .family = 15,
+        .model = 107,
+        .stepping = 1,
         .features[FEAT_1_EDX] =
             PPRO_FEATURES |
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
@@ -2881,12 +2764,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID |
             CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
             CPUID_7_0_EBX_SMAP,
-        /* Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component,
-         * and the only one defined in Skylake (processor tracing)
-         * probably will block migration anyway.
-         */
+        /* XSAVES is added in version 4 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -2962,6 +2840,15 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ }
                 }
             },
+            {
+                .version = 4,
+                .note = "IBRS, XSAVES, no TSX",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ }
+                }
+            },
             { /* end of list */ }
         }
     },
@@ -3001,12 +2888,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT,
         .features[FEAT_7_0_ECX] =
             CPUID_7_0_ECX_PKU,
-        /* Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component,
-         * and the only one defined in Skylake (processor tracing)
-         * probably will block migration anyway.
-         */
+        /* XSAVES is added in version 5 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3094,6 +2976,15 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ }
                 }
             },
+            {
+                .version = 5,
+                .note = "IBRS, XSAVES, EPT switching, no TSX",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ }
+                }
+            },
             { /* end of list */ }
         }
     },
@@ -3136,12 +3027,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_ECX_AVX512VNNI,
         .features[FEAT_7_0_EDX] =
             CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
-        /* Missing: XSAVES (not supported by some Linux versions,
-                * including v4.1 to v4.12).
-                * KVM doesn't yet expose any XSAVES state save component,
-                * and the only one defined in Skylake (processor tracing)
-                * probably will block migration anyway.
-                */
+        /* XSAVES is added in version 5 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3225,6 +3111,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
                   { /* end of list */ }
               },
             },
+            { .version = 5,
+              .note = "ARCH_CAPABILITIES, EPT switching, XSAVES, no TSX",
+              .props = (PropValue[]) {
+                  { "xsaves", "on" },
+                  { "vmx-xsaves", "on" },
+                  { /* end of list */ }
+              },
+            },
             { /* end of list */ }
         }
     },
@@ -3274,13 +3168,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
         .features[FEAT_7_1_EAX] =
             CPUID_7_1_EAX_AVX512_BF16,
-        /*
-         * Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component,
-         * and the only one defined in Skylake (processor tracing)
-         * probably will block migration anyway.
-         */
+        /* XSAVES is added in version 2 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3336,6 +3224,18 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING,
         .xlevel = 0x80000008,
         .model_id = "Intel Xeon Processor (Cooperlake)",
+        .versions = (X86CPUVersionDefinition[]) {
+            { .version = 1 },
+            { .version = 2,
+              .note = "XSAVES",
+              .props = (PropValue[]) {
+                  { "xsaves", "on" },
+                  { "vmx-xsaves", "on" },
+                  { /* end of list */ }
+              },
+            },
+            { /* end of list */ }
+        }
     },
     {
         .name = "Icelake-Client",
@@ -3378,12 +3278,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_ECX_AVX512_VPOPCNTDQ,
         .features[FEAT_7_0_EDX] =
             CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
-        /* Missing: XSAVES (not supported by some Linux versions,
-                * including v4.1 to v4.12).
-                * KVM doesn't yet expose any XSAVES state save component,
-                * and the only one defined in Skylake (processor tracing)
-                * probably will block migration anyway.
-                */
+        /* XSAVES is added in version 3 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3451,6 +3346,15 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ }
                 },
             },
+            {
+                .version = 3,
+                .note = "no TSX, XSAVES, deprecated",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ }
+                },
+            },
             { /* end of list */ }
         },
         .deprecation_note = "use Icelake-Server instead"
@@ -3499,12 +3403,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57,
         .features[FEAT_7_0_EDX] =
             CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
-        /* Missing: XSAVES (not supported by some Linux versions,
-                * including v4.1 to v4.12).
-                * KVM doesn't yet expose any XSAVES state save component,
-                * and the only one defined in Skylake (processor tracing)
-                * probably will block migration anyway.
-                */
+        /* XSAVES is added in version 5 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3597,6 +3496,15 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ }
                 },
             },
+            {
+                .version = 5,
+                .note = "XSAVES",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ }
+                },
+            },
             { /* end of list */ }
         }
     },
@@ -3631,13 +3539,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .features[FEAT_7_0_EDX] =
             CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES |
             CPUID_7_0_EDX_SPEC_CTRL_SSBD,
-        /*
-         * Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component,
-         * and the only one defined in Skylake (processor tracing)
-         * probably will block migration anyway.
-         */
+        /* XSAVES is added in version 3 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1,
         .features[FEAT_6_EAX] =
@@ -3704,6 +3606,15 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ },
                 },
             },
+            {
+                .version = 3,
+                .note = "XSAVES, no MPX, no MONITOR",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ },
+                },
+            },
             { /* end of list */ },
         },
     },
@@ -3762,13 +3673,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EDX_CORE_CAPABILITY,
         .features[FEAT_CORE_CAPABILITY] =
             MSR_CORE_CAP_SPLIT_LOCK_DETECT,
-        /*
-         * Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component,
-         * and the only one defined in Skylake (processor tracing)
-         * probably will block migration anyway.
-         */
+        /* XSAVES is is added in version 3 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -3833,6 +3738,24 @@ static X86CPUDefinition builtin_x86_defs[] = {
                     { /* end of list */ },
                 },
             },
+            {
+                .version = 3,
+                .note = "XSAVES, no MPX",
+                .props = (PropValue[]) {
+                    { "xsaves", "on" },
+                    { "vmx-xsaves", "on" },
+                    { /* end of list */ },
+                },
+            },
+            {
+                .version = 4,
+                .note = "no split lock detect, no core-capability",
+                .props = (PropValue[]) {
+                    { "split-lock-detect", "off" },
+                    { "core-capability", "off" },
+                    { /* end of list */ },
+                },
+            },
             { /* end of list */ },
         },
     },
@@ -4114,11 +4037,7 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
             CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED |
             CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT,
-        /*
-         * Missing: XSAVES (not supported by some Linux versions,
-         * including v4.1 to v4.12).
-         * KVM doesn't yet expose any XSAVES state save component.
-         */
+        /* XSAVES is added in version 2 */
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
             CPUID_XSAVE_XGETBV1,
@@ -4129,6 +4048,17 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .xlevel = 0x8000001E,
         .model_id = "Hygon Dhyana Processor",
         .cache_info = &epyc_cache_info,
+        .versions = (X86CPUVersionDefinition[]) {
+            { .version = 1 },
+            { .version = 2,
+              .note = "XSAVES",
+              .props = (PropValue[]) {
+                  { "xsaves", "on" },
+                  { /* end of list */ }
+              },
+            },
+            { /* end of list */ }
+        }
     },
     {
         .name = "EPYC-Rome",
@@ -4249,38 +4179,12 @@ static X86CPUDefinition builtin_x86_defs[] = {
     },
 };
 
-/* KVM-specific features that are automatically added/removed
- * from all CPU models when KVM is enabled.
- */
-static PropValue kvm_default_props[] = {
-    { "kvmclock", "on" },
-    { "kvm-nopiodelay", "on" },
-    { "kvm-asyncpf", "on" },
-    { "kvm-steal-time", "on" },
-    { "kvm-pv-eoi", "on" },
-    { "kvmclock-stable-bit", "on" },
-    { "x2apic", "on" },
-    { "kvm-msi-ext-dest-id", "off" },
-    { "acpi", "off" },
-    { "monitor", "off" },
-    { "svm", "off" },
-    { NULL, NULL },
-};
-
-/* TCG-specific defaults that override all CPU models when using TCG
- */
-static PropValue tcg_default_props[] = {
-    { "vme", "off" },
-    { NULL, NULL },
-};
-
-
 /*
  * We resolve CPU model aliases using -v1 when using "-machine
  * none", but this is just for compatibility while libvirt isn't
  * adapted to resolve CPU model versions before creating VMs.
  * See "Runnability guarantee of CPU models" at
- * docs/system/deprecated.rst.
+ * docs/about/deprecated.rst.
  */
 X86CPUVersion default_cpu_version = 1;
 
@@ -4316,61 +4220,6 @@ static X86CPUVersion x86_cpu_model_resolve_version(const X86CPUModel *model)
     return v;
 }
 
-void x86_cpu_change_kvm_default(const char *prop, const char *value)
-{
-    PropValue *pv;
-    for (pv = kvm_default_props; pv->prop; pv++) {
-        if (!strcmp(pv->prop, prop)) {
-            pv->value = value;
-            break;
-        }
-    }
-
-    /* It is valid to call this function only for properties that
-     * are already present in the kvm_default_props table.
-     */
-    assert(pv->prop);
-}
-
-static bool lmce_supported(void)
-{
-    uint64_t mce_cap = 0;
-
-#ifdef CONFIG_KVM
-    if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
-        return false;
-    }
-#endif
-
-    return !!(mce_cap & MCG_LMCE_P);
-}
-
-#define CPUID_MODEL_ID_SZ 48
-
-/**
- * cpu_x86_fill_model_id:
- * Get CPUID model ID string from host CPU.
- *
- * @str should have at least CPUID_MODEL_ID_SZ bytes
- *
- * The function does NOT add a null terminator to the string
- * automatically.
- */
-static int cpu_x86_fill_model_id(char *str)
-{
-    uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-    int i;
-
-    for (i = 0; i < 3; i++) {
-        host_cpuid(0x80000002 + i, 0, &eax, &ebx, &ecx, &edx);
-        memcpy(str + i * 16 +  0, &eax, 4);
-        memcpy(str + i * 16 +  4, &ebx, 4);
-        memcpy(str + i * 16 +  8, &ecx, 4);
-        memcpy(str + i * 16 + 12, &edx, 4);
-    }
-    return 0;
-}
-
 static Property max_x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
     DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
@@ -4393,62 +4242,31 @@ static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
 static void max_x86_cpu_initfn(Object *obj)
 {
     X86CPU *cpu = X86_CPU(obj);
-    CPUX86State *env = &cpu->env;
-    KVMState *s = kvm_state;
 
     /* We can't fill the features array here because we don't know yet if
      * "migratable" is true or false.
      */
     cpu->max_features = true;
-
-    if (accel_uses_host_cpuid()) {
-        char vendor[CPUID_VENDOR_SZ + 1] = { 0 };
-        char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 };
-        int family, model, stepping;
-
-        host_vendor_fms(vendor, &family, &model, &stepping);
-        cpu_x86_fill_model_id(model_id);
-
-        object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
-        object_property_set_int(OBJECT(cpu), "family", family, &error_abort);
-        object_property_set_int(OBJECT(cpu), "model", model, &error_abort);
-        object_property_set_int(OBJECT(cpu), "stepping", stepping,
-                                &error_abort);
-        object_property_set_str(OBJECT(cpu), "model-id", model_id,
-                                &error_abort);
-
-        if (kvm_enabled()) {
-            env->cpuid_min_level =
-                kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
-            env->cpuid_min_xlevel =
-                kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
-            env->cpuid_min_xlevel2 =
-                kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
-        } else {
-            env->cpuid_min_level =
-                hvf_get_supported_cpuid(0x0, 0, R_EAX);
-            env->cpuid_min_xlevel =
-                hvf_get_supported_cpuid(0x80000000, 0, R_EAX);
-            env->cpuid_min_xlevel2 =
-                hvf_get_supported_cpuid(0xC0000000, 0, R_EAX);
-        }
-
-        if (lmce_supported()) {
-            object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort);
-        }
-        object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort);
-    } else {
-        object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD,
-                                &error_abort);
-        object_property_set_int(OBJECT(cpu), "family", 6, &error_abort);
-        object_property_set_int(OBJECT(cpu), "model", 6, &error_abort);
-        object_property_set_int(OBJECT(cpu), "stepping", 3, &error_abort);
-        object_property_set_str(OBJECT(cpu), "model-id",
-                                "QEMU TCG CPU version " QEMU_HW_VERSION,
-                                &error_abort);
-    }
-
     object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort);
+
+    /*
+     * these defaults are used for TCG and all other accelerators
+     * besides KVM and HVF, which overwrite these values
+     */
+    object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD,
+                            &error_abort);
+#ifdef TARGET_X86_64
+    object_property_set_int(OBJECT(cpu), "family", 15, &error_abort);
+    object_property_set_int(OBJECT(cpu), "model", 107, &error_abort);
+    object_property_set_int(OBJECT(cpu), "stepping", 1, &error_abort);
+#else
+    object_property_set_int(OBJECT(cpu), "family", 6, &error_abort);
+    object_property_set_int(OBJECT(cpu), "model", 6, &error_abort);
+    object_property_set_int(OBJECT(cpu), "stepping", 3, &error_abort);
+#endif
+    object_property_set_str(OBJECT(cpu), "model-id",
+                            "QEMU TCG CPU version " QEMU_HW_VERSION,
+                            &error_abort);
 }
 
 static const TypeInfo max_x86_cpu_type_info = {
@@ -4458,34 +4276,9 @@ static const TypeInfo max_x86_cpu_type_info = {
     .class_init = max_x86_cpu_class_init,
 };
 
-#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
-static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
 {
-    X86CPUClass *xcc = X86_CPU_CLASS(oc);
-
-    xcc->host_cpuid_required = true;
-    xcc->ordering = 8;
-
-#if defined(CONFIG_KVM)
-    xcc->model_description =
-        "KVM processor with all supported host features ";
-#elif defined(CONFIG_HVF)
-    xcc->model_description =
-        "HVF processor with all supported host features ";
-#endif
-}
-
-static const TypeInfo host_x86_cpu_type_info = {
-    .name = X86_CPU_TYPE_NAME("host"),
-    .parent = X86_CPU_TYPE_NAME("max"),
-    .class_init = host_x86_cpu_class_init,
-};
-
-#endif
-
-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
-{
-    assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
+    assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
 
     switch (f->type) {
     case CPUID_FEATURE_WORD:
@@ -4930,7 +4723,6 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
     }
 }
 
-static void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
 static void x86_cpu_filter_features(X86CPU *cpu, bool verbose);
 
 /* Build a list with the name of all features on a feature word array */
@@ -5086,7 +4878,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
         desc = g_strdup_printf("%s", model_id);
     }
 
-    qemu_printf("x86 %-20s  %-58s\n", name, desc);
+    qemu_printf("x86 %-20s  %s\n", name, desc);
 }
 
 /* list available CPU models and flags */
@@ -5201,7 +4993,10 @@ static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
     return r;
 }
 
-static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
+/*
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ */
+void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
 {
     PropValue *pv;
     for (pv = props; pv->prop; pv++) {
@@ -5213,7 +5008,11 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
     }
 }
 
-/* Apply properties for the CPU model version specified in model */
+/*
+ * Apply properties for the CPU model version specified in model.
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ */
+
 static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model)
 {
     const X86CPUVersionDefinition *vdef;
@@ -5242,14 +5041,14 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model)
     assert(vdef->version == version);
 }
 
-/* Load data from X86CPUDefinition into a X86CPU object
+/*
+ * Load data from X86CPUDefinition into a X86CPU object.
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
  */
 static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
 {
-    X86CPUDefinition *def = model->cpudef;
+    const X86CPUDefinition *def = model->cpudef;
     CPUX86State *env = &cpu->env;
-    const char *vendor;
-    char host_vendor[CPUID_VENDOR_SZ + 1];
     FeatureWord w;
 
     /*NOTE: any property set by this function should be returned by
@@ -5276,20 +5075,6 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
     /* legacy-cache defaults to 'off' if CPU model provides cache info */
     cpu->legacy_cache = !def->cache_info;
 
-    /* Special cases not set in the X86CPUDefinition structs: */
-    /* TODO: in-kernel irqchip for hvf */
-    if (kvm_enabled()) {
-        if (!kvm_irqchip_in_kernel()) {
-            x86_cpu_change_kvm_default("x2apic", "off");
-        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
-            x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
-        }
-
-        x86_cpu_apply_props(cpu, kvm_default_props);
-    } else if (tcg_enabled()) {
-        x86_cpu_apply_props(cpu, tcg_default_props);
-    }
-
     env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR;
 
     /* sysenter isn't supported in compatibility mode on AMD,
@@ -5299,15 +5084,12 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
      * KVM's sysenter/syscall emulation in compatibility mode and
      * when doing cross vendor migration
      */
-    vendor = def->vendor;
-    if (accel_uses_host_cpuid()) {
-        uint32_t  ebx = 0, ecx = 0, edx = 0;
-        host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
-        x86_cpu_vendor_words2str(host_vendor, ebx, edx, ecx);
-        vendor = host_vendor;
-    }
 
-    object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
+    /*
+     * vendor property is set here but then overloaded with the
+     * host cpu vendor for KVM and HVF.
+     */
+    object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort);
 
     x86_cpu_apply_version_props(cpu, model);
 
@@ -5319,207 +5101,6 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
     memset(&env->user_features, 0, sizeof(env->user_features));
 }
 
-#ifndef CONFIG_USER_ONLY
-/* Return a QDict containing keys for all properties that can be included
- * in static expansion of CPU models. All properties set by x86_cpu_load_model()
- * must be included in the dictionary.
- */
-static QDict *x86_cpu_static_props(void)
-{
-    FeatureWord w;
-    int i;
-    static const char *props[] = {
-        "min-level",
-        "min-xlevel",
-        "family",
-        "model",
-        "stepping",
-        "model-id",
-        "vendor",
-        "lmce",
-        NULL,
-    };
-    static QDict *d;
-
-    if (d) {
-        return d;
-    }
-
-    d = qdict_new();
-    for (i = 0; props[i]; i++) {
-        qdict_put_null(d, props[i]);
-    }
-
-    for (w = 0; w < FEATURE_WORDS; w++) {
-        FeatureWordInfo *fi = &feature_word_info[w];
-        int bit;
-        for (bit = 0; bit < 64; bit++) {
-            if (!fi->feat_names[bit]) {
-                continue;
-            }
-            qdict_put_null(d, fi->feat_names[bit]);
-        }
-    }
-
-    return d;
-}
-
-/* Add an entry to @props dict, with the value for property. */
-static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
-{
-    QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
-                                                 &error_abort);
-
-    qdict_put_obj(props, prop, value);
-}
-
-/* Convert CPU model data from X86CPU object to a property dictionary
- * that can recreate exactly the same CPU model.
- */
-static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
-{
-    QDict *sprops = x86_cpu_static_props();
-    const QDictEntry *e;
-
-    for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
-        const char *prop = qdict_entry_key(e);
-        x86_cpu_expand_prop(cpu, props, prop);
-    }
-}
-
-/* Convert CPU model data from X86CPU object to a property dictionary
- * that can recreate exactly the same CPU model, including every
- * writeable QOM property.
- */
-static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
-{
-    ObjectPropertyIterator iter;
-    ObjectProperty *prop;
-
-    object_property_iter_init(&iter, OBJECT(cpu));
-    while ((prop = object_property_iter_next(&iter))) {
-        /* skip read-only or write-only properties */
-        if (!prop->get || !prop->set) {
-            continue;
-        }
-
-        /* "hotplugged" is the only property that is configurable
-         * on the command-line but will be set differently on CPUs
-         * created using "-cpu ... -smp ..." and by CPUs created
-         * on the fly by x86_cpu_from_model() for querying. Skip it.
-         */
-        if (!strcmp(prop->name, "hotplugged")) {
-            continue;
-        }
-        x86_cpu_expand_prop(cpu, props, prop->name);
-    }
-}
-
-static void object_apply_props(Object *obj, QDict *props, Error **errp)
-{
-    const QDictEntry *prop;
-
-    for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
-        if (!object_property_set_qobject(obj, qdict_entry_key(prop),
-                                         qdict_entry_value(prop), errp)) {
-            break;
-        }
-    }
-}
-
-/* Create X86CPU object according to model+props specification */
-static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
-{
-    X86CPU *xc = NULL;
-    X86CPUClass *xcc;
-    Error *err = NULL;
-
-    xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
-    if (xcc == NULL) {
-        error_setg(&err, "CPU model '%s' not found", model);
-        goto out;
-    }
-
-    xc = X86_CPU(object_new_with_class(OBJECT_CLASS(xcc)));
-    if (props) {
-        object_apply_props(OBJECT(xc), props, &err);
-        if (err) {
-            goto out;
-        }
-    }
-
-    x86_cpu_expand_features(xc, &err);
-    if (err) {
-        goto out;
-    }
-
-out:
-    if (err) {
-        error_propagate(errp, err);
-        object_unref(OBJECT(xc));
-        xc = NULL;
-    }
-    return xc;
-}
-
-CpuModelExpansionInfo *
-qmp_query_cpu_model_expansion(CpuModelExpansionType type,
-                                                      CpuModelInfo *model,
-                                                      Error **errp)
-{
-    X86CPU *xc = NULL;
-    Error *err = NULL;
-    CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
-    QDict *props = NULL;
-    const char *base_name;
-
-    xc = x86_cpu_from_model(model->name,
-                            model->has_props ?
-                                qobject_to(QDict, model->props) :
-                                NULL, &err);
-    if (err) {
-        goto out;
-    }
-
-    props = qdict_new();
-    ret->model = g_new0(CpuModelInfo, 1);
-    ret->model->props = QOBJECT(props);
-    ret->model->has_props = true;
-
-    switch (type) {
-    case CPU_MODEL_EXPANSION_TYPE_STATIC:
-        /* Static expansion will be based on "base" only */
-        base_name = "base";
-        x86_cpu_to_dict(xc, props);
-    break;
-    case CPU_MODEL_EXPANSION_TYPE_FULL:
-        /* As we don't return every single property, full expansion needs
-         * to keep the original model name+props, and add extra
-         * properties on top of that.
-         */
-        base_name = model->name;
-        x86_cpu_to_dict_full(xc, props);
-    break;
-    default:
-        error_setg(&err, "Unsupported expansion type");
-        goto out;
-    }
-
-    x86_cpu_to_dict(xc, props);
-
-    ret->model->name = g_strdup(base_name);
-
-out:
-    object_unref(OBJECT(xc));
-    if (err) {
-        error_propagate(errp, err);
-        qapi_free_CpuModelExpansionInfo(ret);
-        ret = NULL;
-    }
-    return ret;
-}
-#endif  /* !CONFIG_USER_ONLY */
-
 static gchar *x86_gdb_arch_name(CPUState *cs)
 {
 #ifdef TARGET_X86_64
@@ -5553,7 +5134,13 @@ static void x86_register_cpu_model_type(const char *name, X86CPUModel *model)
     type_register(&ti);
 }
 
-static void x86_register_cpudef_types(X86CPUDefinition *def)
+
+/*
+ * register builtin_x86_defs;
+ * "max", "base" and subclasses ("host") are not registered here.
+ * See x86_cpu_register_types for all model registrations.
+ */
+static void x86_register_cpudef_types(const X86CPUDefinition *def)
 {
     X86CPUModel *m;
     const X86CPUVersionDefinition *vdef;
@@ -5594,15 +5181,15 @@ static void x86_register_cpudef_types(X86CPUDefinition *def)
 
 }
 
-#if !defined(CONFIG_USER_ONLY)
-
-void cpu_clear_apic_feature(CPUX86State *env)
+uint32_t cpu_x86_virtual_addr_width(CPUX86State *env)
 {
-    env->features[FEAT_1_EDX] &= ~CPUID_APIC;
+    if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
+        return 57; /* 57 bits virtual */
+    } else {
+        return 48; /* 48 bits virtual */
+    }
 }
 
-#endif /* !CONFIG_USER_ONLY */
-
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx)
@@ -5666,6 +5253,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         if (cpu->cache_info_passthrough) {
             host_cpuid(index, 0, eax, ebx, ecx, edx);
             break;
+        } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
         }
         *eax = 1; /* Number of CPUID[EAX=2] calls required */
         *ebx = 0;
@@ -5687,6 +5277,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             if ((*eax & 31) && cs->nr_cores > 1) {
                 *eax |= (cs->nr_cores - 1) << 26;
             }
+        } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) {
+            *eax = *ebx = *ecx = *edx = 0;
         } else {
             *eax = 0;
             switch (count) {
@@ -5745,6 +5337,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 *ecx |= CPUID_7_0_ECX_OSPKE;
             }
             *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
+
+            /*
+             * SGX cannot be emulated in software.  If hardware does not
+             * support enabling SGX and/or SGX flexible launch control,
+             * then we need to update the VM's CPUID values accordingly.
+             */
+            if ((*ebx & CPUID_7_0_EBX_SGX) &&
+                (!kvm_enabled() ||
+                 !(kvm_arch_get_supported_cpuid(cs->kvm_state, 0x7, 0, R_EBX) &
+                    CPUID_7_0_EBX_SGX))) {
+                *ebx &= ~CPUID_7_0_EBX_SGX;
+            }
+
+            if ((*ecx & CPUID_7_0_ECX_SGX_LC) &&
+                (!(*ebx & CPUID_7_0_EBX_SGX) || !kvm_enabled() ||
+                 !(kvm_arch_get_supported_cpuid(cs->kvm_state, 0x7, 0, R_ECX) &
+                    CPUID_7_0_ECX_SGX_LC))) {
+                *ecx &= ~CPUID_7_0_ECX_SGX_LC;
+            }
         } else if (count == 1) {
             *eax = env->features[FEAT_7_1_EAX];
             *ebx = 0;
@@ -5880,6 +5491,66 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         break;
     }
+    case 0x12:
+#ifndef CONFIG_USER_ONLY
+        if (!kvm_enabled() ||
+            !(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX)) {
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
+        }
+
+        /*
+         * SGX sub-leafs CPUID.0x12.{0x2..N} enumerate EPC sections.  Retrieve
+         * the EPC properties, e.g. confidentiality and integrity, from the
+         * host's first EPC section, i.e. assume there is one EPC section or
+         * that all EPC sections have the same security properties.
+         */
+        if (count > 1) {
+            uint64_t epc_addr, epc_size;
+
+            if (sgx_epc_get_section(count - 2, &epc_addr, &epc_size)) {
+                *eax = *ebx = *ecx = *edx = 0;
+                break;
+            }
+            host_cpuid(index, 2, eax, ebx, ecx, edx);
+            *eax = (uint32_t)(epc_addr & 0xfffff000) | 0x1;
+            *ebx = (uint32_t)(epc_addr >> 32);
+            *ecx = (uint32_t)(epc_size & 0xfffff000) | (*ecx & 0xf);
+            *edx = (uint32_t)(epc_size >> 32);
+            break;
+        }
+
+        /*
+         * SGX sub-leafs CPUID.0x12.{0x0,0x1} are heavily dependent on hardware
+         * and KVM, i.e. QEMU cannot emulate features to override what KVM
+         * supports.  Features can be further restricted by userspace, but not
+         * made more permissive.
+         */
+        *eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EAX);
+        *ebx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EBX);
+        *ecx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_ECX);
+        *edx = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x12, count, R_EDX);
+
+        if (count == 0) {
+            *eax &= env->features[FEAT_SGX_12_0_EAX];
+            *ebx &= env->features[FEAT_SGX_12_0_EBX];
+        } else {
+            *eax &= env->features[FEAT_SGX_12_1_EAX];
+            *ebx &= 0; /* ebx reserve */
+            *ecx &= env->features[FEAT_XSAVE_COMP_LO];
+            *edx &= env->features[FEAT_XSAVE_COMP_HI];
+
+            /* FP and SSE are always allowed regardless of XSAVE/XCR0. */
+            *ecx |= XSTATE_FP_MASK | XSTATE_SSE_MASK;
+
+            /* Access to PROVISIONKEY requires additional credentials. */
+            if ((*eax & (1U << 4)) &&
+                !kvm_enable_sgx_provisioning(cs->kvm_state)) {
+                *eax &= ~(1U << 4);
+            }
+        }
+#endif
+        break;
     case 0x14: {
         /* Intel Processor Trace Enumeration */
         *eax = 0;
@@ -6000,16 +5671,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x80000008:
         /* virtual & phys address size in low 2 bytes. */
+        *eax = cpu->phys_bits;
         if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
             /* 64 bit processor */
-            *eax = cpu->phys_bits; /* configurable physical bits */
-            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
-                *eax |= 0x00003900; /* 57 bits virtual */
-            } else {
-                *eax |= 0x00003000; /* 48 bits virtual */
-            }
-        } else {
-            *eax = cpu->phys_bits;
+             *eax |= (cpu_x86_virtual_addr_width(env) << 8);
         }
         *ebx = env->features[FEAT_8000_0008_EBX];
         if (cs->nr_cores * cs->nr_threads > 1) {
@@ -6100,12 +5765,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *edx = 0;
         break;
     case 0x8000001F:
-        *eax = sev_enabled() ? 0x2 : 0;
-        *eax |= sev_es_enabled() ? 0x8 : 0;
-        *ebx = sev_get_cbit_position();
-        *ebx |= sev_get_reduced_phys_bits() << 6;
-        *ecx = 0;
-        *edx = 0;
+        *eax = *ebx = *ecx = *edx = 0;
+        if (sev_enabled()) {
+            *eax = 0x2;
+            *eax |= sev_es_enabled() ? 0x8 : 0;
+            *ebx = sev_get_cbit_position();
+            *ebx |= sev_get_reduced_phys_bits() << 6;
+        }
         break;
     default:
         /* reserved values: zero */
@@ -6117,6 +5783,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     }
 }
 
+static void x86_cpu_set_sgxlepubkeyhash(CPUX86State *env)
+{
+#ifndef CONFIG_USER_ONLY
+    /* Those default values are defined in Skylake HW */
+    env->msr_ia32_sgxlepubkeyhash[0] = 0xa6053e051270b7acULL;
+    env->msr_ia32_sgxlepubkeyhash[1] = 0x6cfbe8ba8b3b413dULL;
+    env->msr_ia32_sgxlepubkeyhash[2] = 0xc4916d99f2b3735dULL;
+    env->msr_ia32_sgxlepubkeyhash[3] = 0xd4f8c05909f9bb3bULL;
+#endif
+}
+
 static void x86_cpu_reset(DeviceState *dev)
 {
     CPUState *s = CPU(dev);
@@ -6134,8 +5811,9 @@ static void x86_cpu_reset(DeviceState *dev)
     env->old_exception = -1;
 
     /* init to reset state */
-
+    env->int_ctl = 0;
     env->hflags2 |= HF2_GIF_MASK;
+    env->hflags2 |= HF2_VGIF_MASK;
     env->hflags &= ~HF_GUEST_MASK;
 
     cpu_x86_update_cr0(env, 0x60000010);
@@ -6248,22 +5926,15 @@ static void x86_cpu_reset(DeviceState *dev)
     if (kvm_enabled()) {
         kvm_arch_reset_vcpu(cpu);
     }
-#endif
-}
 
-#ifndef CONFIG_USER_ONLY
-bool cpu_is_bsp(X86CPU *cpu)
-{
-    return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
-}
+    x86_cpu_set_sgxlepubkeyhash(env);
+
+    if (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE) {
+        env->amd_tsc_scale_msr =  MSR_AMD64_TSC_RATIO_DEFAULT;
+    }
 
-/* TODO: remove me, when reset over QOM tree is implemented */
-static void x86_cpu_machine_reset_cb(void *opaque)
-{
-    X86CPU *cpu = opaque;
-    cpu_reset(CPU(cpu));
-}
 #endif
+}
 
 static void mce_init(X86CPU *cpu)
 {
@@ -6282,109 +5953,6 @@ static void mce_init(X86CPU *cpu)
     }
 }
 
-#ifndef CONFIG_USER_ONLY
-APICCommonClass *apic_get_class(void)
-{
-    const char *apic_type = "apic";
-
-    /* TODO: in-kernel irqchip for hvf */
-    if (kvm_apic_in_kernel()) {
-        apic_type = "kvm-apic";
-    } else if (xen_enabled()) {
-        apic_type = "xen-apic";
-    } else if (whpx_apic_in_platform()) {
-        apic_type = "whpx-apic";
-    }
-
-    return APIC_COMMON_CLASS(object_class_by_name(apic_type));
-}
-
-static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
-{
-    APICCommonState *apic;
-    ObjectClass *apic_class = OBJECT_CLASS(apic_get_class());
-
-    cpu->apic_state = DEVICE(object_new_with_class(apic_class));
-
-    object_property_add_child(OBJECT(cpu), "lapic",
-                              OBJECT(cpu->apic_state));
-    object_unref(OBJECT(cpu->apic_state));
-
-    qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id);
-    /* TODO: convert to link<> */
-    apic = APIC_COMMON(cpu->apic_state);
-    apic->cpu = cpu;
-    apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE;
-}
-
-static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
-{
-    APICCommonState *apic;
-    static bool apic_mmio_map_once;
-
-    if (cpu->apic_state == NULL) {
-        return;
-    }
-    qdev_realize(DEVICE(cpu->apic_state), NULL, errp);
-
-    /* Map APIC MMIO area */
-    apic = APIC_COMMON(cpu->apic_state);
-    if (!apic_mmio_map_once) {
-        memory_region_add_subregion_overlap(get_system_memory(),
-                                            apic->apicbase &
-                                            MSR_IA32_APICBASE_BASE,
-                                            &apic->io_memory,
-                                            0x1000);
-        apic_mmio_map_once = true;
-     }
-}
-
-static void x86_cpu_machine_done(Notifier *n, void *unused)
-{
-    X86CPU *cpu = container_of(n, X86CPU, machine_done);
-    MemoryRegion *smram =
-        (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
-
-    if (smram) {
-        cpu->smram = g_new(MemoryRegion, 1);
-        memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram",
-                                 smram, 0, 4 * GiB);
-        memory_region_set_enabled(cpu->smram, true);
-        memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->smram, 1);
-    }
-}
-#else
-static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp)
-{
-}
-#endif
-
-/* Note: Only safe for use on x86(-64) hosts */
-static uint32_t x86_host_phys_bits(void)
-{
-    uint32_t eax;
-    uint32_t host_phys_bits;
-
-    host_cpuid(0x80000000, 0, &eax, NULL, NULL, NULL);
-    if (eax >= 0x80000008) {
-        host_cpuid(0x80000008, 0, &eax, NULL, NULL, NULL);
-        /* Note: According to AMD doc 25481 rev 2.34 they have a field
-         * at 23:16 that can specify a maximum physical address bits for
-         * the guest that can override this value; but I've not seen
-         * anything with that set.
-         */
-        host_phys_bits = eax & 0xff;
-    } else {
-        /* It's an odd 64 bit machine that doesn't have the leaf for
-         * physical address bits; fall back to 36 that's most older
-         * Intel.
-         */
-        host_phys_bits = 36;
-    }
-
-    return host_phys_bits;
-}
-
 static void x86_cpu_adjust_level(X86CPU *cpu, uint32_t *min, uint32_t value)
 {
     if (*min < value) {
@@ -6488,7 +6056,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
 /* Expand CPU configuration data, based on configured features
  * and host/accelerator capabilities when appropriate.
  */
-static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
+void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
 {
     CPUX86State *env = &cpu->env;
     FeatureWord w;
@@ -6573,8 +6141,15 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
             }
         }
 
-        /* CPU topology with multi-dies support requires CPUID[0x1F] */
-        if (env->nr_dies > 1) {
+        /*
+         * Intel CPU topology with multi-dies support requires CPUID[0x1F].
+         * For AMD Rome/Milan, cpuid level is 0x10, and guest OS should detect
+         * extended toplogy by leaf 0xB. Only adjust it for Intel CPU, unless
+         * cpu->vendor_cpuid_only has been unset for compatibility with older
+         * machine types.
+         */
+        if ((env->nr_dies > 1) &&
+            (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
             x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
         }
 
@@ -6587,6 +6162,11 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
         if (sev_enabled()) {
             x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F);
         }
+
+        /* SGX requires CPUID[0x12] for EPC enumeration */
+        if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) {
+            x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12);
+        }
     }
 
     /* Set cpuid_*level* based on cpuid_min_*level, if not explicitly set */
@@ -6602,6 +6182,10 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
     if (env->cpuid_xlevel2 == UINT32_MAX) {
         env->cpuid_xlevel2 = env->cpuid_min_xlevel2;
     }
+
+    if (kvm_enabled()) {
+        kvm_hyperv_expand_features(cpu, errp);
+    }
 }
 
 /*
@@ -6665,17 +6249,16 @@ static void x86_cpu_hyperv_realize(X86CPU *cpu)
 
     /* Hyper-V vendor id */
     if (!cpu->hyperv_vendor) {
-        memcpy(cpu->hyperv_vendor_id, "Microsoft Hv", 12);
-    } else {
-        len = strlen(cpu->hyperv_vendor);
-
-        if (len > 12) {
-            warn_report("hv-vendor-id truncated to 12 characters");
-            len = 12;
-        }
-        memset(cpu->hyperv_vendor_id, 0, 12);
-        memcpy(cpu->hyperv_vendor_id, cpu->hyperv_vendor, len);
+        object_property_set_str(OBJECT(cpu), "hv-vendor-id", "Microsoft Hv",
+                                &error_abort);
+    }
+    len = strlen(cpu->hyperv_vendor);
+    if (len > 12) {
+        warn_report("hv-vendor-id truncated to 12 characters");
+        len = 12;
     }
+    memset(cpu->hyperv_vendor_id, 0, 12);
+    memcpy(cpu->hyperv_vendor_id, cpu->hyperv_vendor, len);
 
     /* 'Hv#1' interface identification*/
     cpu->hyperv_interface_id[0] = 0x31237648;
@@ -6683,10 +6266,6 @@ static void x86_cpu_hyperv_realize(X86CPU *cpu)
     cpu->hyperv_interface_id[2] = 0;
     cpu->hyperv_interface_id[3] = 0;
 
-    /* Hypervisor system identity */
-    cpu->hyperv_version_id[0] = 0x00001bbc;
-    cpu->hyperv_version_id[1] = 0x00060001;
-
     /* Hypervisor implementation limits */
     cpu->hyperv_limits[0] = 64;
     cpu->hyperv_limits[1] = 0;
@@ -6702,47 +6281,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
     Error *local_err = NULL;
     static bool ht_warned;
 
-    if (xcc->host_cpuid_required) {
-        if (!accel_uses_host_cpuid()) {
-            g_autofree char *name = x86_cpu_class_get_model_name(xcc);
-            error_setg(&local_err, "CPU model '%s' requires KVM", name);
-            goto out;
-        }
-    }
-
-    if (cpu->max_features && accel_uses_host_cpuid()) {
-        if (enable_cpu_pm) {
-            host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx,
-                       &cpu->mwait.ecx, &cpu->mwait.edx);
-            env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR;
-            if (kvm_enabled() && kvm_has_waitpkg()) {
-                env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG;
-            }
-        }
-        if (kvm_enabled() && cpu->ucode_rev == 0) {
-            cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state,
-                                                                MSR_IA32_UCODE_REV);
-        }
-    }
-
-    if (cpu->ucode_rev == 0) {
-        /* The default is the same as KVM's.  */
-        if (IS_AMD_CPU(env)) {
-            cpu->ucode_rev = 0x01000065;
-        } else {
-            cpu->ucode_rev = 0x100000000ULL;
-        }
-    }
-
-    /* mwait extended info: needed for Core compatibility */
-    /* We always wake on interrupt even if host does not have the capability */
-    cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE;
-
     if (cpu->apic_id == UNASSIGNED_APIC_ID) {
         error_setg(errp, "apic-id property was not initialized properly");
         return;
     }
 
+    /*
+     * Process Hyper-V enlightenments.
+     * Note: this currently has to happen before the expansion of CPU features.
+     */
+    x86_cpu_hyperv_realize(cpu);
+
     x86_cpu_expand_features(cpu, &local_err);
     if (local_err) {
         goto out;
@@ -6767,37 +6316,60 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
            & CPUID_EXT2_AMD_ALIASES);
     }
 
+    x86_cpu_set_sgxlepubkeyhash(env);
+
+    /*
+     * note: the call to the framework needs to happen after feature expansion,
+     * but before the checks/modifications to ucode_rev, mwait, phys_bits.
+     * These may be set by the accel-specific code,
+     * and the results are subsequently checked / assumed in this function.
+     */
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (xcc->host_cpuid_required && !accel_uses_host_cpuid()) {
+        g_autofree char *name = x86_cpu_class_get_model_name(xcc);
+        error_setg(&local_err, "CPU model '%s' requires KVM or HVF", name);
+        goto out;
+    }
+
+    if (cpu->ucode_rev == 0) {
+        /*
+         * The default is the same as KVM's. Note that this check
+         * needs to happen after the evenual setting of ucode_rev in
+         * accel-specific code in cpu_exec_realizefn.
+         */
+        if (IS_AMD_CPU(env)) {
+            cpu->ucode_rev = 0x01000065;
+        } else {
+            cpu->ucode_rev = 0x100000000ULL;
+        }
+    }
+
+    /*
+     * mwait extended info: needed for Core compatibility
+     * We always wake on interrupt even if host does not have the capability.
+     *
+     * requires the accel-specific code in cpu_exec_realizefn to
+     * have already acquired the CPUID data into cpu->mwait.
+     */
+    cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE;
+
     /* For 64bit systems think about the number of physical bits to present.
      * ideally this should be the same as the host; anything other than matching
      * the host can cause incorrect guest behaviour.
      * QEMU used to pick the magic value of 40 bits that corresponds to
      * consumer AMD devices but nothing else.
+     *
+     * Note that this code assumes features expansion has already been done
+     * (as it checks for CPUID_EXT2_LM), and also assumes that potential
+     * phys_bits adjustments to match the host have been already done in
+     * accel-specific code in cpu_exec_realizefn.
      */
     if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
-        if (accel_uses_host_cpuid()) {
-            uint32_t host_phys_bits = x86_host_phys_bits();
-            static bool warned;
-
-            /* Print a warning if the user set it to a value that's not the
-             * host value.
-             */
-            if (cpu->phys_bits != host_phys_bits && cpu->phys_bits != 0 &&
-                !warned) {
-                warn_report("Host physical bits (%u)"
-                            " does not match phys-bits property (%u)",
-                            host_phys_bits, cpu->phys_bits);
-                warned = true;
-            }
-
-            if (cpu->host_phys_bits) {
-                /* The user asked for us to use the host physical bits */
-                cpu->phys_bits = host_phys_bits;
-                if (cpu->host_phys_bits_limit &&
-                    cpu->phys_bits > cpu->host_phys_bits_limit) {
-                    cpu->phys_bits = cpu->host_phys_bits_limit;
-                }
-            }
-        }
         if (cpu->phys_bits &&
             (cpu->phys_bits > TARGET_PHYS_ADDR_SPACE_BITS ||
             cpu->phys_bits < 32)) {
@@ -6806,9 +6378,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
                              TARGET_PHYS_ADDR_SPACE_BITS, cpu->phys_bits);
             return;
         }
-        /* 0 means it was not explicitly set by the user (or by machine
-         * compat_props or by the host code above). In this case, the default
-         * is the value used by TCG (40).
+        /*
+         * 0 means it was not explicitly set by the user (or by machine
+         * compat_props or by the host code in host-cpu.c).
+         * In this case, the default is the value used by TCG (40).
          */
         if (cpu->phys_bits == 0) {
             cpu->phys_bits = TCG_PHYS_ADDR_BITS;
@@ -6857,15 +6430,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         env->cache_info_amd.l3_cache = &legacy_l3_cache;
     }
 
-    /* Process Hyper-V enlightenments */
-    x86_cpu_hyperv_realize(cpu);
-
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
 #ifndef CONFIG_USER_ONLY
     MachineState *ms = MACHINE(qdev_get_machine());
     qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
@@ -6880,33 +6444,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
 
     mce_init(cpu);
 
-#ifndef CONFIG_USER_ONLY
-    if (tcg_enabled()) {
-        cpu->cpu_as_mem = g_new(MemoryRegion, 1);
-        cpu->cpu_as_root = g_new(MemoryRegion, 1);
-
-        /* Outer container... */
-        memory_region_init(cpu->cpu_as_root, OBJECT(cpu), "memory", ~0ull);
-        memory_region_set_enabled(cpu->cpu_as_root, true);
-
-        /* ... with two regions inside: normal system memory with low
-         * priority, and...
-         */
-        memory_region_init_alias(cpu->cpu_as_mem, OBJECT(cpu), "memory",
-                                 get_system_memory(), 0, ~0ull);
-        memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0);
-        memory_region_set_enabled(cpu->cpu_as_mem, true);
-
-        cs->num_ases = 2;
-        cpu_address_space_init(cs, 0, "cpu-memory", cs->memory);
-        cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root);
-
-        /* ... SMRAM with higher priority, linked from /machine/smram.  */
-        cpu->machine_done.notify = x86_cpu_machine_done;
-        qemu_add_machine_init_done_notifier(&cpu->machine_done);
-    }
-#endif
-
     qemu_init_vcpu(cs);
 
     /*
@@ -6929,10 +6466,12 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
             ht_warned = true;
     }
 
+#ifndef CONFIG_USER_ONLY
     x86_cpu_apic_realize(cpu, &local_err);
     if (local_err != NULL) {
         goto out;
     }
+#endif /* !CONFIG_USER_ONLY */
     cpu_reset(cs);
 
     xcc->parent_realize(dev, &local_err);
@@ -7056,51 +6595,10 @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc,
     x86_cpu_register_bit_prop(xcc, name, w, bitnr);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs)
+static void x86_cpu_post_initfn(Object *obj)
 {
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    GuestPanicInformation *panic_info = NULL;
-
-    if (env->features[FEAT_HYPERV_EDX] & HV_GUEST_CRASH_MSR_AVAILABLE) {
-        panic_info = g_malloc0(sizeof(GuestPanicInformation));
-
-        panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V;
-
-        assert(HV_CRASH_PARAMS >= 5);
-        panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0];
-        panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1];
-        panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2];
-        panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3];
-        panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4];
-    }
-
-    return panic_info;
+    accel_cpu_instance_init(CPU(obj));
 }
-static void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v,
-                                       const char *name, void *opaque,
-                                       Error **errp)
-{
-    CPUState *cs = CPU(obj);
-    GuestPanicInformation *panic_info;
-
-    if (!cs->crash_occurred) {
-        error_setg(errp, "No crash occurred");
-        return;
-    }
-
-    panic_info = x86_cpu_get_crash_info(cs);
-    if (panic_info == NULL) {
-        error_setg(errp, "No crash information");
-        return;
-    }
-
-    visit_type_GuestPanicInformation(v, "crash-information", &panic_info,
-                                     errp);
-    qapi_free_GuestPanicInformation(panic_info);
-}
-#endif /* !CONFIG_USER_ONLY */
 
 static void x86_cpu_initfn(Object *obj)
 {
@@ -7150,6 +6648,8 @@ static void x86_cpu_initfn(Object *obj)
     object_property_add_alias(obj, "sse4_1", obj, "sse4.1");
     object_property_add_alias(obj, "sse4_2", obj, "sse4.2");
 
+    object_property_add_alias(obj, "hv-apicv", obj, "hv-avic");
+
     if (xcc->model) {
         x86_cpu_load_model(cpu, xcc->model);
     }
@@ -7162,12 +6662,14 @@ static int64_t x86_cpu_get_arch_id(CPUState *cs)
     return cpu->apic_id;
 }
 
+#if !defined(CONFIG_USER_ONLY)
 static bool x86_cpu_get_paging_enabled(const CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);
 
     return cpu->env.cr[0] & CR0_PG_MASK;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static void x86_cpu_set_pc(CPUState *cs, vaddr value)
 {
@@ -7207,10 +6709,12 @@ int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
                       !(env->hflags & HF_INHIBIT_IRQ_MASK))))) {
             return CPU_INTERRUPT_HARD;
 #if !defined(CONFIG_USER_ONLY)
-        } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) &&
+        } else if (env->hflags2 & HF2_VGIF_MASK) {
+            if((interrupt_request & CPU_INTERRUPT_VIRQ) &&
                    (env->eflags & IF_MASK) &&
                    !(env->hflags & HF_INHIBIT_IRQ_MASK)) {
-            return CPU_INTERRUPT_VIRQ;
+                        return CPU_INTERRUPT_VIRQ;
+            }
 #endif
         }
     }
@@ -7333,9 +6837,23 @@ static Property x86_cpu_properties[] = {
                       HYPERV_FEAT_IPI, 0),
     DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features,
                       HYPERV_FEAT_STIMER_DIRECT, 0),
+    DEFINE_PROP_BIT64("hv-avic", X86CPU, hyperv_features,
+                      HYPERV_FEAT_AVIC, 0),
     DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU,
                             hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF),
     DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false),
+    DEFINE_PROP_BOOL("hv-enforce-cpuid", X86CPU, hyperv_enforce_cpuid, false),
+
+    /* WS2008R2 identify by default */
+    DEFINE_PROP_UINT32("hv-version-id-build", X86CPU, hyperv_ver_id_build,
+                       0x3839),
+    DEFINE_PROP_UINT16("hv-version-id-major", X86CPU, hyperv_ver_id_major,
+                       0x000A),
+    DEFINE_PROP_UINT16("hv-version-id-minor", X86CPU, hyperv_ver_id_minor,
+                       0x0000),
+    DEFINE_PROP_UINT32("hv-version-id-spack", X86CPU, hyperv_ver_id_sp, 0),
+    DEFINE_PROP_UINT8("hv-version-id-sbranch", X86CPU, hyperv_ver_id_sb, 0),
+    DEFINE_PROP_UINT32("hv-version-id-snumber", X86CPU, hyperv_ver_id_sn, 0),
 
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
@@ -7357,10 +6875,13 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
     DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor),
     DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
+    DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true),
     DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
     DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
     DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration,
                      false),
+    DEFINE_PROP_BOOL("kvm-pv-enforce-cpuid", X86CPU, kvm_pv_enforce_cpuid,
+                     false),
     DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true),
     DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true),
     DEFINE_PROP_BOOL("x-migrate-smi-count", X86CPU, migrate_smi_count,
@@ -7391,6 +6912,23 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps i386_sysemu_ops = {
+    .get_memory_mapping = x86_cpu_get_memory_mapping,
+    .get_paging_enabled = x86_cpu_get_paging_enabled,
+    .get_phys_page_attrs_debug = x86_cpu_get_phys_page_attrs_debug,
+    .asidx_from_attrs = x86_asidx_from_attrs,
+    .get_crash_info = x86_cpu_get_crash_info,
+    .write_elf32_note = x86_cpu_write_elf32_note,
+    .write_elf64_note = x86_cpu_write_elf64_note,
+    .write_elf32_qemunote = x86_cpu_write_elf32_qemunote,
+    .write_elf64_qemunote = x86_cpu_write_elf64_qemunote,
+    .legacy_vmsd = &vmstate_x86_cpu,
+};
+#endif
+
 static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
 {
     X86CPUClass *xcc = X86_CPU_CLASS(oc);
@@ -7410,28 +6948,14 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     cc->class_by_name = x86_cpu_class_by_name;
     cc->parse_features = x86_cpu_parse_featurestr;
     cc->has_work = x86_cpu_has_work;
-
-#ifdef CONFIG_TCG
-    tcg_cpu_common_class_init(cc);
-#endif /* CONFIG_TCG */
-
     cc->dump_state = x86_cpu_dump_state;
     cc->set_pc = x86_cpu_set_pc;
     cc->gdb_read_register = x86_cpu_gdb_read_register;
     cc->gdb_write_register = x86_cpu_gdb_write_register;
     cc->get_arch_id = x86_cpu_get_arch_id;
-    cc->get_paging_enabled = x86_cpu_get_paging_enabled;
 
 #ifndef CONFIG_USER_ONLY
-    cc->asidx_from_attrs = x86_asidx_from_attrs;
-    cc->get_memory_mapping = x86_cpu_get_memory_mapping;
-    cc->get_phys_page_attrs_debug = x86_cpu_get_phys_page_attrs_debug;
-    cc->get_crash_info = x86_cpu_get_crash_info;
-    cc->write_elf64_note = x86_cpu_write_elf64_note;
-    cc->write_elf64_qemunote = x86_cpu_write_elf64_qemunote;
-    cc->write_elf32_note = x86_cpu_write_elf32_note;
-    cc->write_elf32_qemunote = x86_cpu_write_elf32_qemunote;
-    cc->vmsd = &vmstate_x86_cpu;
+    cc->sysemu_ops = &i386_sysemu_ops;
 #endif /* !CONFIG_USER_ONLY */
 
     cc->gdb_arch_name = x86_gdb_arch_name;
@@ -7492,12 +7016,13 @@ static const TypeInfo x86_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(X86CPU),
     .instance_init = x86_cpu_initfn,
+    .instance_post_init = x86_cpu_post_initfn,
+
     .abstract = true,
     .class_size = sizeof(X86CPUClass),
     .class_init = x86_cpu_common_class_init,
 };
 
-
 /* "base" CPU model, used by query-cpu-model-expansion */
 static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
 {
@@ -7525,9 +7050,6 @@ static void x86_cpu_register_types(void)
     }
     type_register_static(&max_x86_cpu_type_info);
     type_register_static(&x86_base_cpu_type_info);
-#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
-    type_register_static(&host_x86_cpu_type_info);
-#endif
 }
 
 type_init(x86_cpu_register_types)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c1cde48ce4f..7b8946c44a6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -203,6 +203,7 @@ typedef enum X86Seg {
 #define HF2_MPX_PR_SHIFT         5 /* BNDCFGx.BNDPRESERVE */
 #define HF2_NPT_SHIFT            6 /* Nested Paging enabled */
 #define HF2_IGNNE_SHIFT          7 /* Ignore CR0.NE=0 */
+#define HF2_VGIF_SHIFT           8 /* Can take VIRQ*/
 
 #define HF2_GIF_MASK            (1 << HF2_GIF_SHIFT)
 #define HF2_HIF_MASK            (1 << HF2_HIF_SHIFT)
@@ -212,6 +213,7 @@ typedef enum X86Seg {
 #define HF2_MPX_PR_MASK         (1 << HF2_MPX_PR_SHIFT)
 #define HF2_NPT_MASK            (1 << HF2_NPT_SHIFT)
 #define HF2_IGNNE_MASK          (1 << HF2_IGNNE_SHIFT)
+#define HF2_VGIF_MASK           (1 << HF2_VGIF_SHIFT)
 
 #define CR0_PE_SHIFT 0
 #define CR0_MP_SHIFT 1
@@ -224,6 +226,8 @@ typedef enum X86Seg {
 #define CR0_NE_MASK  (1U << 5)
 #define CR0_WP_MASK  (1U << 16)
 #define CR0_AM_MASK  (1U << 18)
+#define CR0_NW_MASK  (1U << 29)
+#define CR0_CD_MASK  (1U << 30)
 #define CR0_PG_MASK  (1U << 31)
 
 #define CR4_VME_MASK  (1U << 0)
@@ -238,6 +242,7 @@ typedef enum X86Seg {
 #define CR4_OSFXSR_SHIFT 9
 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1U << 10)
+#define CR4_UMIP_MASK   (1U << 11)
 #define CR4_LA57_MASK   (1U << 12)
 #define CR4_VMXE_MASK   (1U << 13)
 #define CR4_SMXE_MASK   (1U << 14)
@@ -249,6 +254,15 @@ typedef enum X86Seg {
 #define CR4_PKE_MASK   (1U << 22)
 #define CR4_PKS_MASK   (1U << 24)
 
+#define CR4_RESERVED_MASK \
+(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
+                | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \
+                | CR4_MCE_MASK | CR4_PGE_MASK | CR4_PCE_MASK \
+                | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK |CR4_UMIP_MASK \
+                | CR4_LA57_MASK \
+                | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \
+                | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK))
+
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
 #define DR6_BT          (1 << 15)
@@ -266,6 +280,8 @@ typedef enum X86Seg {
 #define DR7_TYPE_IO_RW       0x2
 #define DR7_TYPE_DATA_RW     0x3
 
+#define DR_RESERVED_MASK 0xffffffff00000000ULL
+
 #define PG_PRESENT_BIT  0
 #define PG_RW_BIT       1
 #define PG_USER_BIT     2
@@ -303,6 +319,19 @@ typedef enum X86Seg {
 #define PG_ERROR_I_D_MASK  0x10
 #define PG_ERROR_PK_MASK   0x20
 
+#define PG_MODE_PAE      (1 << 0)
+#define PG_MODE_LMA      (1 << 1)
+#define PG_MODE_NXE      (1 << 2)
+#define PG_MODE_PSE      (1 << 3)
+#define PG_MODE_LA57     (1 << 4)
+#define PG_MODE_SVM_MASK MAKE_64BIT_MASK(0, 15)
+
+/* Bits of CR4 that do not affect the NPT page format.  */
+#define PG_MODE_WP       (1 << 16)
+#define PG_MODE_PKE      (1 << 17)
+#define PG_MODE_PKS      (1 << 18)
+#define PG_MODE_SMEP     (1 << 19)
+
 #define MCG_CTL_P       (1ULL<<8)   /* MCG_CAP register available */
 #define MCG_SER_P       (1ULL<<24) /* MCA recovery/new status bits */
 #define MCG_LMCE_P      (1ULL<<27) /* Local Machine Check Supported */
@@ -360,9 +389,17 @@ typedef enum X86Seg {
 #define MSR_IA32_PKRS                   0x6e1
 
 #define FEATURE_CONTROL_LOCKED                    (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX  (1ULL << 1)
 #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_SGX_LC                    (1ULL << 17)
+#define FEATURE_CONTROL_SGX                       (1ULL << 18)
 #define FEATURE_CONTROL_LMCE                      (1<<20)
 
+#define MSR_IA32_SGXLEPUBKEYHASH0       0x8c
+#define MSR_IA32_SGXLEPUBKEYHASH1       0x8d
+#define MSR_IA32_SGXLEPUBKEYHASH2       0x8e
+#define MSR_IA32_SGXLEPUBKEYHASH3       0x8f
+
 #define MSR_P6_PERFCTR0                 0xc1
 
 #define MSR_IA32_SMBASE                 0x9e
@@ -449,6 +486,11 @@ typedef enum X86Seg {
 #define MSR_EFER_SVME  (1 << 12)
 #define MSR_EFER_FFXSR (1 << 14)
 
+#define MSR_EFER_RESERVED\
+        (~(target_ulong)(MSR_EFER_SCE | MSR_EFER_LME\
+            | MSR_EFER_LMA | MSR_EFER_NXE | MSR_EFER_SVME\
+            | MSR_EFER_FFXSR))
+
 #define MSR_STAR                        0xc0000081
 #define MSR_LSTAR                       0xc0000082
 #define MSR_CSTAR                       0xc0000083
@@ -457,6 +499,9 @@ typedef enum X86Seg {
 #define MSR_GSBASE                      0xc0000101
 #define MSR_KERNELGSBASE                0xc0000102
 #define MSR_TSC_AUX                     0xc0000103
+#define MSR_AMD64_TSC_RATIO             0xc0000104
+
+#define MSR_AMD64_TSC_RATIO_DEFAULT     0x100000000ULL
 
 #define MSR_VM_HSAVE_PA                 0xc0010117
 
@@ -518,11 +563,6 @@ typedef enum FeatureWord {
     FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */
     FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
     FEAT_KVM_HINTS,     /* CPUID[4000_0001].EDX */
-    FEAT_HYPERV_EAX,    /* CPUID[4000_0003].EAX */
-    FEAT_HYPERV_EBX,    /* CPUID[4000_0003].EBX */
-    FEAT_HYPERV_EDX,    /* CPUID[4000_0003].EDX */
-    FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */
-    FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */
     FEAT_SVM,           /* CPUID[8000_000A].EDX */
     FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
     FEAT_6_EAX,         /* CPUID[6].EAX */
@@ -541,6 +581,9 @@ typedef enum FeatureWord {
     FEAT_VMX_BASIC,
     FEAT_VMX_VMFUNC,
     FEAT_14_0_ECX,
+    FEAT_SGX_12_0_EAX,  /* CPUID[EAX=0x12,ECX=0].EAX (SGX) */
+    FEAT_SGX_12_0_EBX,  /* CPUID[EAX=0x12,ECX=0].EBX (SGX MISCSELECT[31:0]) */
+    FEAT_SGX_12_1_EAX,  /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */
     FEATURE_WORDS,
 } FeatureWord;
 
@@ -689,6 +732,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 
 /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 #define CPUID_7_0_EBX_FSGSBASE          (1U << 0)
+/* Support SGX */
+#define CPUID_7_0_EBX_SGX               (1U << 2)
 /* 1st Group of Advanced Bit Manipulation Extensions */
 #define CPUID_7_0_EBX_BMI1              (1U << 3)
 /* Hardware Lock Elision */
@@ -776,6 +821,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_MOVDIRI           (1U << 27)
 /* Move 64 Bytes as Direct Store Instruction */
 #define CPUID_7_0_ECX_MOVDIR64B         (1U << 28)
+/* Support SGX Launch Control */
+#define CPUID_7_0_ECX_SGX_LC            (1U << 30)
 /* Protection Keys for Supervisor-mode Pages */
 #define CPUID_7_0_ECX_PKS               (1U << 31)
 
@@ -804,6 +851,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 /* Speculative Store Bypass Disable */
 #define CPUID_7_0_EDX_SPEC_CTRL_SSBD    (1U << 31)
 
+/* AVX VNNI Instruction */
+#define CPUID_7_1_EAX_AVX_VNNI          (1U << 4)
 /* AVX512 BFloat16 Instruction */
 #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
 
@@ -960,6 +1009,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define VMX_SECONDARY_EXEC_RDSEED_EXITING           0x00010000
 #define VMX_SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define VMX_SECONDARY_EXEC_XSAVES                   0x00100000
+#define VMX_SECONDARY_EXEC_TSC_SCALING              0x02000000
 
 #define VMX_PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define VMX_PIN_BASED_NMI_EXITING                   0x00000008
@@ -1009,6 +1059,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 #define HYPERV_FEAT_EVMCS               12
 #define HYPERV_FEAT_IPI                 13
 #define HYPERV_FEAT_STIMER_DIRECT       14
+#define HYPERV_FEAT_AVIC                15
 
 #ifndef HYPERV_SPINLOCK_NEVER_NOTIFY
 #define HYPERV_SPINLOCK_NEVER_NOTIFY             0xFFFFFFFF
@@ -1292,41 +1343,23 @@ typedef struct XSavePKRU {
     uint32_t padding;
 } XSavePKRU;
 
-typedef struct X86XSaveArea {
-    X86LegacyXSaveArea legacy;
-    X86XSaveHeader header;
-
-    /* Extended save areas: */
-
-    /* AVX State: */
-    XSaveAVX avx_state;
-    uint8_t padding[960 - 576 - sizeof(XSaveAVX)];
-    /* MPX State: */
-    XSaveBNDREG bndreg_state;
-    XSaveBNDCSR bndcsr_state;
-    /* AVX-512 State: */
-    XSaveOpmask opmask_state;
-    XSaveZMM_Hi256 zmm_hi256_state;
-    XSaveHi16_ZMM hi16_zmm_state;
-    /* PKRU State: */
-    XSavePKRU pkru_state;
-} X86XSaveArea;
-
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != 0x240);
 QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != 0x3c0);
 QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != 0x400);
 QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != 0x440);
 QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != 0x480);
 QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != 0x680);
 QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
-QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != 0xA80);
 QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
 
+typedef struct ExtSaveArea {
+    uint32_t feature, bits;
+    uint32_t offset, size;
+} ExtSaveArea;
+
+#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+
+extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
+
 typedef enum TPRAccess {
     TPR_ACCESS_READ,
     TPR_ACCESS_WRITE,
@@ -1440,6 +1473,8 @@ typedef struct CPUX86State {
     FPReg fpregs[8];
     /* KVM-only so far */
     uint16_t fpop;
+    uint16_t fpcs;
+    uint16_t fpds;
     uint64_t fpip;
     uint64_t fpdp;
 
@@ -1485,6 +1520,7 @@ typedef struct CPUX86State {
     uint64_t mcg_status;
     uint64_t msr_ia32_misc_enable;
     uint64_t msr_ia32_feature_control;
+    uint64_t msr_ia32_sgxlepubkeyhash[4];
 
     uint64_t msr_fixed_ctr_ctrl;
     uint64_t msr_global_ctrl;
@@ -1503,6 +1539,7 @@ typedef struct CPUX86State {
     uint32_t tsx_ctrl;
 
     uint64_t spec_ctrl;
+    uint64_t amd_tsc_scale_msr;
     uint64_t virt_ssbd;
 
     /* End of state preserved by INIT (dummy marker).  */
@@ -1564,6 +1601,7 @@ typedef struct CPUX86State {
     uint64_t nested_cr3;
     uint32_t nested_pg_mode;
     uint8_t v_tpr;
+    uint32_t int_ctl;
 
     /* KVM states, automatically cleared on reset */
     uint8_t nmi_injected;
@@ -1624,6 +1662,7 @@ typedef struct CPUX86State {
     uint64_t apic_bus_freq;
 #if defined(CONFIG_KVM) || defined(CONFIG_HVF)
     void *xsave_buf;
+    uint32_t xsave_buf_len;
 #endif
 #if defined(CONFIG_KVM)
     struct kvm_nested_state *nested_state;
@@ -1682,8 +1721,15 @@ struct X86CPU {
     OnOffAuto hyperv_no_nonarch_cs;
     uint32_t hyperv_vendor_id[3];
     uint32_t hyperv_interface_id[4];
-    uint32_t hyperv_version_id[4];
     uint32_t hyperv_limits[3];
+    uint32_t hyperv_nested[4];
+    bool hyperv_enforce_cpuid;
+    uint32_t hyperv_ver_id_build;
+    uint16_t hyperv_ver_id_major;
+    uint16_t hyperv_ver_id_minor;
+    uint32_t hyperv_ver_id_sp;
+    uint8_t hyperv_ver_id_sb;
+    uint32_t hyperv_ver_id_sn;
 
     bool check_cpuid;
     bool enforce_cpuid;
@@ -1749,6 +1795,9 @@ struct X86CPU {
     /* Enable auto level-increase for all CPUID leaves */
     bool full_cpuid_auto_level;
 
+    /* Only advertise CPUID leaves defined by the vendor */
+    bool vendor_cpuid_only;
+
     /* Enable auto level-increase for Intel Processor Trace leave */
     bool intel_pt_auto_level;
 
@@ -1764,6 +1813,9 @@ struct X86CPU {
     /* Stop SMI delivery for migration compatibility with old machines */
     bool kvm_no_smi_migration;
 
+    /* Forcefully disable KVM PV features not exposed in guest CPUIDs */
+    bool kvm_pv_enforce_cpuid;
+
     /* Number of physical address bits supported */
     uint32_t phys_bits;
 
@@ -1786,7 +1838,7 @@ struct X86CPU {
 
 
 #ifndef CONFIG_USER_ONLY
-extern VMStateDescription vmstate_x86_cpu;
+extern const VMStateDescription vmstate_x86_cpu;
 #endif
 
 int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
@@ -1814,10 +1866,16 @@ int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void x86_cpu_list(void);
 int cpu_x86_support_mca_broadcast(CPUX86State *env);
 
+#ifndef CONFIG_USER_ONLY
 int cpu_get_pic_interrupt(CPUX86State *s);
+
 /* MSDOS compatibility mode FPU exception support */
 void x86_register_ferr_irq(qemu_irq irq);
+void fpu_check_raise_ferr_irq(CPUX86State *s);
 void cpu_set_ignne(void);
+void cpu_clear_ignne(void);
+#endif
+
 /* mpx_helper.c */
 void cpu_sync_bndcs_hflags(CPUX86State *env);
 
@@ -1919,20 +1977,23 @@ void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
 void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
 void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
 
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_x86_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
-
 /* cpu.c */
+void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                              uint32_t vendor2, uint32_t vendor3);
+typedef struct PropValue {
+    const char *prop, *value;
+} PropValue;
+void x86_cpu_apply_props(X86CPU *cpu, PropValue *props);
+
+uint32_t cpu_x86_virtual_addr_width(CPUX86State *env);
+
+/* cpu.c other functions (cpuid) */
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx);
 void cpu_clear_apic_feature(CPUX86State *env);
 void host_cpuid(uint32_t function, uint32_t count,
                 uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
-void host_vendor_fms(char *vendor, int *family, int *model, int *stepping);
 
 /* helper.c */
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
@@ -1948,6 +2009,11 @@ static inline AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs)
     return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs));
 }
 
+/*
+ * load efer and update the corresponding hflags. XXX: do consistency
+ * checks with cpuid bits?
+ */
+void cpu_load_efer(CPUX86State *env, uint64_t val);
 uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr);
 uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr);
 uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr);
@@ -1978,7 +2044,6 @@ uint64_t cpu_get_tsc(CPUX86State *env);
 #define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("qemu32")
 #endif
 
-#define cpu_signal_handler cpu_x86_signal_handler
 #define cpu_list x86_cpu_list
 
 /* MMU modes definitions */
@@ -2044,21 +2109,6 @@ static inline uint32_t cpu_compute_eflags(CPUX86State *env)
     return eflags;
 }
 
-
-/* load efer and update the corresponding hflags. XXX: do consistency
-   checks with cpuid bits? */
-static inline void cpu_load_efer(CPUX86State *env, uint64_t val)
-{
-    env->efer = val;
-    env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
-    if (env->efer & MSR_EFER_LMA) {
-        env->hflags |= HF_LMA_MASK;
-    }
-    if (env->efer & MSR_EFER_SVME) {
-        env->hflags |= HF_SVME_MASK;
-    }
-}
-
 static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env)
 {
     return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 });
@@ -2105,6 +2155,9 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env)
            ((env->cr[4] & CR4_VMXE_MASK) || (env->hflags & HF_SMM_MASK));
 }
 
+/* excp_helper.c */
+int get_pg_mode(CPUX86State *env);
+
 /* fpu_helper.c */
 void update_fp_status(CPUX86State *env);
 void update_mxcsr_status(CPUX86State *env);
@@ -2130,24 +2183,25 @@ static inline void cpu_set_fpuc(CPUX86State *env, uint16_t fpuc)
 void helper_lock_init(void);
 
 /* svm_helper.c */
+#ifdef CONFIG_USER_ONLY
+static inline void
+cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
+                              uint64_t param, uintptr_t retaddr)
+{ /* no-op */ }
+static inline bool
+cpu_svm_has_intercept(CPUX86State *env, uint32_t type)
+{ return false; }
+#else
 void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
                                    uint64_t param, uintptr_t retaddr);
+bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type);
+#endif
+
 /* apic.c */
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
                                    TPRAccess access);
 
-
-/* Change the value of a KVM-specific default
- *
- * If value is NULL, no default will be set and the original
- * value from the CPU model table will be kept.
- *
- * It is valid to call this function only for properties that
- * are already present in the kvm_default_props table.
- */
-void x86_cpu_change_kvm_default(const char *prop, const char *value);
-
 /* Special values for X86CPUVersion: */
 
 /* Resolve to latest CPU version */
@@ -2179,8 +2233,8 @@ void x86_cpu_dump_local_apic_state(CPUState *cs, int flags);
 /* cpu.c */
 bool cpu_is_bsp(X86CPU *cpu);
 
-void x86_cpu_xrstor_all_areas(X86CPU *cpu, const X86XSaveArea *buf);
-void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf);
+void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen);
+void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen);
 void x86_update_hflags(CPUX86State* env);
 
 static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat)
@@ -2188,6 +2242,53 @@ static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat)
     return !!(cpu->hyperv_features & BIT(feat));
 }
 
+static inline uint64_t cr4_reserved_bits(CPUX86State *env)
+{
+    uint64_t reserved_bits = CR4_RESERVED_MASK;
+    if (!env->features[FEAT_XSAVE]) {
+        reserved_bits |= CR4_OSXSAVE_MASK;
+    }
+    if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMEP)) {
+        reserved_bits |= CR4_SMEP_MASK;
+    }
+    if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
+        reserved_bits |= CR4_SMAP_MASK;
+    }
+    if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_FSGSBASE)) {
+        reserved_bits |= CR4_FSGSBASE_MASK;
+    }
+    if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKU)) {
+        reserved_bits |= CR4_PKE_MASK;
+    }
+    if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57)) {
+        reserved_bits |= CR4_LA57_MASK;
+    }
+    if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_UMIP)) {
+        reserved_bits |= CR4_UMIP_MASK;
+    }
+    if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS)) {
+        reserved_bits |= CR4_PKS_MASK;
+    }
+    return reserved_bits;
+}
+
+static inline bool ctl_has_irq(CPUX86State *env)
+{
+    uint32_t int_prio;
+    uint32_t tpr;
+
+    int_prio = (env->int_ctl & V_INTR_PRIO_MASK) >> V_INTR_PRIO_SHIFT;
+    tpr = env->int_ctl & V_TPR_MASK;
+
+    if (env->int_ctl & V_IGN_TPR_MASK) {
+        return (env->int_ctl & V_IRQ_MASK);
+    }
+
+    return (env->int_ctl & V_IRQ_MASK) && (int_prio >= tpr);
+}
+
+hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
+                        int *prot);
 #if defined(TARGET_X86_64) && \
     defined(CONFIG_USER_ONLY) && \
     defined(CONFIG_LINUX)
diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c
index 41e265fc67a..098a2ad15a9 100644
--- a/target/i386/gdbstub.c
+++ b/target/i386/gdbstub.c
@@ -78,6 +78,23 @@ static const int gpr_map32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
 #define GDB_FORCE_64 0
 #endif
 
+static int gdb_read_reg_cs64(uint32_t hflags, GByteArray *buf, target_ulong val)
+{
+    if ((hflags & HF_CS64_MASK) || GDB_FORCE_64) {
+        return gdb_get_reg64(buf, val);
+    }
+    return gdb_get_reg32(buf, val);
+}
+
+static int gdb_write_reg_cs64(uint32_t hflags, uint8_t *buf, target_ulong *val)
+{
+    if (hflags & HF_CS64_MASK) {
+        *val = ldq_p(buf);
+        return 8;
+    }
+    *val = ldl_p(buf);
+    return 4;
+}
 
 int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
 {
@@ -142,25 +159,14 @@ int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
             return gdb_get_reg32(mem_buf, env->segs[R_FS].selector);
         case IDX_SEG_REGS + 5:
             return gdb_get_reg32(mem_buf, env->segs[R_GS].selector);
-
         case IDX_SEG_REGS + 6:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->segs[R_FS].base);
-            }
-            return gdb_get_reg32(mem_buf, env->segs[R_FS].base);
-
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->segs[R_FS].base);
         case IDX_SEG_REGS + 7:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->segs[R_GS].base);
-            }
-            return gdb_get_reg32(mem_buf, env->segs[R_GS].base);
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->segs[R_GS].base);
 
         case IDX_SEG_REGS + 8:
 #ifdef TARGET_X86_64
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->kernelgsbase);
-            }
-            return gdb_get_reg32(mem_buf, env->kernelgsbase);
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->kernelgsbase);
 #else
             return gdb_get_reg32(mem_buf, 0);
 #endif
@@ -188,45 +194,23 @@ int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
             return gdb_get_reg32(mem_buf, env->mxcsr);
 
         case IDX_CTL_CR0_REG:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->cr[0]);
-            }
-            return gdb_get_reg32(mem_buf, env->cr[0]);
-
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[0]);
         case IDX_CTL_CR2_REG:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->cr[2]);
-            }
-            return gdb_get_reg32(mem_buf, env->cr[2]);
-
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[2]);
         case IDX_CTL_CR3_REG:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->cr[3]);
-            }
-            return gdb_get_reg32(mem_buf, env->cr[3]);
-
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[3]);
         case IDX_CTL_CR4_REG:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->cr[4]);
-            }
-            return gdb_get_reg32(mem_buf, env->cr[4]);
-
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[4]);
         case IDX_CTL_CR8_REG:
-#ifdef CONFIG_SOFTMMU
+#ifndef CONFIG_USER_ONLY
             tpr = cpu_get_apic_tpr(cpu->apic_state);
 #else
             tpr = 0;
 #endif
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, tpr);
-            }
-            return gdb_get_reg32(mem_buf, tpr);
+            return gdb_read_reg_cs64(env->hflags, mem_buf, tpr);
 
         case IDX_CTL_EFER_REG:
-            if ((env->hflags & HF_CS64_MASK) || GDB_FORCE_64) {
-                return gdb_get_reg64(mem_buf, env->efer);
-            }
-            return gdb_get_reg32(mem_buf, env->efer);
+            return gdb_read_reg_cs64(env->hflags, mem_buf, env->efer);
         }
     }
     return 0;
@@ -266,7 +250,8 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 {
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
-    uint32_t tmp;
+    target_ulong tmp;
+    int len;
 
     /* N.B. GDB can't deal with changes in registers or sizes in the middle
        of a session. So if we're in 32-bit mode on a 64-bit cpu, still act
@@ -329,30 +314,13 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
             return x86_cpu_gdb_load_seg(cpu, R_FS, mem_buf);
         case IDX_SEG_REGS + 5:
             return x86_cpu_gdb_load_seg(cpu, R_GS, mem_buf);
-
         case IDX_SEG_REGS + 6:
-            if (env->hflags & HF_CS64_MASK) {
-                env->segs[R_FS].base = ldq_p(mem_buf);
-                return 8;
-            }
-            env->segs[R_FS].base = ldl_p(mem_buf);
-            return 4;
-
+            return gdb_write_reg_cs64(env->hflags, mem_buf, &env->segs[R_FS].base);
         case IDX_SEG_REGS + 7:
-            if (env->hflags & HF_CS64_MASK) {
-                env->segs[R_GS].base = ldq_p(mem_buf);
-                return 8;
-            }
-            env->segs[R_GS].base = ldl_p(mem_buf);
-            return 4;
-
+            return gdb_write_reg_cs64(env->hflags, mem_buf, &env->segs[R_GS].base);
         case IDX_SEG_REGS + 8:
 #ifdef TARGET_X86_64
-            if (env->hflags & HF_CS64_MASK) {
-                env->kernelgsbase = ldq_p(mem_buf);
-                return 8;
-            }
-            env->kernelgsbase = ldl_p(mem_buf);
+            return gdb_write_reg_cs64(env->hflags, mem_buf, &env->kernelgsbase);
 #endif
             return 4;
 
@@ -382,57 +350,46 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
             return 4;
 
         case IDX_CTL_CR0_REG:
-            if (env->hflags & HF_CS64_MASK) {
-                cpu_x86_update_cr0(env, ldq_p(mem_buf));
-                return 8;
-            }
-            cpu_x86_update_cr0(env, ldl_p(mem_buf));
-            return 4;
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            cpu_x86_update_cr0(env, tmp);
+#endif
+            return len;
 
         case IDX_CTL_CR2_REG:
-            if (env->hflags & HF_CS64_MASK) {
-                env->cr[2] = ldq_p(mem_buf);
-                return 8;
-            }
-            env->cr[2] = ldl_p(mem_buf);
-            return 4;
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            env->cr[2] = tmp;
+#endif
+            return len;
 
         case IDX_CTL_CR3_REG:
-            if (env->hflags & HF_CS64_MASK) {
-                cpu_x86_update_cr3(env, ldq_p(mem_buf));
-                return 8;
-            }
-            cpu_x86_update_cr3(env, ldl_p(mem_buf));
-            return 4;
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            cpu_x86_update_cr3(env, tmp);
+#endif
+            return len;
 
         case IDX_CTL_CR4_REG:
-            if (env->hflags & HF_CS64_MASK) {
-                cpu_x86_update_cr4(env, ldq_p(mem_buf));
-                return 8;
-            }
-            cpu_x86_update_cr4(env, ldl_p(mem_buf));
-            return 4;
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            cpu_x86_update_cr4(env, tmp);
+#endif
+            return len;
 
         case IDX_CTL_CR8_REG:
-            if (env->hflags & HF_CS64_MASK) {
-#ifdef CONFIG_SOFTMMU
-                cpu_set_apic_tpr(cpu->apic_state, ldq_p(mem_buf));
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            cpu_set_apic_tpr(cpu->apic_state, tmp);
 #endif
-                return 8;
-            }
-#ifdef CONFIG_SOFTMMU
-            cpu_set_apic_tpr(cpu->apic_state, ldl_p(mem_buf));
-#endif
-            return 4;
+            return len;
 
         case IDX_CTL_EFER_REG:
-            if (env->hflags & HF_CS64_MASK) {
-                cpu_load_efer(env, ldq_p(mem_buf));
-                return 8;
-            }
-            cpu_load_efer(env, ldl_p(mem_buf));
-            return 4;
-
+            len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp);
+#ifndef CONFIG_USER_ONLY
+            cpu_load_efer(env, tmp);
+#endif
+            return len;
         }
     }
     /* Unrecognised register.  */
diff --git a/target/i386/hax/hax-mem.c b/target/i386/hax/hax-mem.c
index 35495f5e822..a226d174d8e 100644
--- a/target/i386/hax/hax-mem.c
+++ b/target/i386/hax/hax-mem.c
@@ -285,6 +285,7 @@ static void hax_log_sync(MemoryListener *listener,
 }
 
 static MemoryListener hax_memory_listener = {
+    .name = "hax",
     .begin = hax_transaction_begin,
     .commit = hax_transaction_commit,
     .region_add = hax_region_add,
@@ -293,7 +294,8 @@ static MemoryListener hax_memory_listener = {
     .priority = 10,
 };
 
-static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
+static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+                                size_t max_size)
 {
     /*
      * We must register each RAM block with the HAXM kernel module, or
@@ -304,7 +306,7 @@ static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
      * host physical pages for the RAM block as part of this registration
      * process, hence the name hax_populate_ram().
      */
-    if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
+    if (hax_populate_ram((uint64_t)(uintptr_t)host, max_size) < 0) {
         fprintf(stderr, "HAX failed to populate RAM\n");
         abort();
     }
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 618ad1c4093..533b29cb91b 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -495,7 +495,7 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
     X86CPU *cpu = env_archcpu(env);
     CPUState *cs = env_cpu(env);
 
-    if (kvm_enabled() || whpx_enabled()) {
+    if (kvm_enabled() || whpx_enabled() || nvmm_enabled()) {
         env->tpr_access_type = access;
 
         cpu_interrupt(cs, CPU_INTERRUPT_TPR);
@@ -574,6 +574,19 @@ void do_cpu_sipi(X86CPU *cpu)
 #endif
 
 #ifndef CONFIG_USER_ONLY
+
+void cpu_load_efer(CPUX86State *env, uint64_t val)
+{
+    env->efer = val;
+    env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK);
+    if (env->efer & MSR_EFER_LMA) {
+        env->hflags |= HF_LMA_MASK;
+    }
+    if (env->efer & MSR_EFER_SVME) {
+        env->hflags |= HF_SVME_MASK;
+    }
+}
+
 uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr)
 {
     X86CPU *cpu = X86_CPU(cs);
diff --git a/target/i386/helper.h b/target/i386/helper.h
index c2ae2f7e61b..ac3b4d1ee32 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -42,13 +42,12 @@ DEF_HELPER_5(lcall_protected, void, env, int, tl, int, tl)
 DEF_HELPER_2(iret_real, void, env, int)
 DEF_HELPER_3(iret_protected, void, env, int, int)
 DEF_HELPER_3(lret_protected, void, env, int, int)
-DEF_HELPER_2(read_crN, tl, env, int)
-DEF_HELPER_3(write_crN, void, env, int, tl)
-DEF_HELPER_2(lmsw, void, env, tl)
 DEF_HELPER_1(clts, void, env)
+
+#ifndef CONFIG_USER_ONLY
 DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
 DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
-DEF_HELPER_2(invlpg, void, env, tl)
+#endif /* !CONFIG_USER_ONLY */
 
 DEF_HELPER_1(sysenter, void, env)
 DEF_HELPER_2(sysexit, void, env, int)
@@ -56,21 +55,21 @@ DEF_HELPER_2(sysexit, void, env, int)
 DEF_HELPER_2(syscall, void, env, int)
 DEF_HELPER_2(sysret, void, env, int)
 #endif
-DEF_HELPER_2(hlt, void, env, int)
-DEF_HELPER_2(monitor, void, env, tl)
-DEF_HELPER_2(mwait, void, env, int)
-DEF_HELPER_2(pause, void, env, int)
-DEF_HELPER_1(debug, void, env)
+DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_1(reset_rf, void, env)
-DEF_HELPER_3(raise_interrupt, void, env, int, int)
-DEF_HELPER_2(raise_exception, void, env, int)
+DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_1(cli, void, env)
 DEF_HELPER_1(sti, void, env)
 DEF_HELPER_1(clac, void, env)
 DEF_HELPER_1(stac, void, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
+
+#ifndef CONFIG_USER_ONLY
 DEF_HELPER_1(rsm, void, env)
+#endif /* !CONFIG_USER_ONLY */
+
 DEF_HELPER_2(into, void, env, int)
 DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg8b, void, env, tl)
@@ -78,27 +77,23 @@ DEF_HELPER_2(cmpxchg8b, void, env, tl)
 DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
-DEF_HELPER_1(single_step, void, env)
+DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_1(rechecking_single_step, void, env)
 DEF_HELPER_1(cpuid, void, env)
 DEF_HELPER_1(rdtsc, void, env)
 DEF_HELPER_1(rdtscp, void, env)
-DEF_HELPER_1(rdpmc, void, env)
-DEF_HELPER_1(rdmsr, void, env)
-DEF_HELPER_1(wrmsr, void, env)
+DEF_HELPER_FLAGS_1(rdpmc, TCG_CALL_NO_WG, noreturn, env)
 
-DEF_HELPER_2(check_iob, void, env, i32)
-DEF_HELPER_2(check_iow, void, env, i32)
-DEF_HELPER_2(check_iol, void, env, i32)
+#ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(outb, void, env, i32, i32)
 DEF_HELPER_2(inb, tl, env, i32)
 DEF_HELPER_3(outw, void, env, i32, i32)
 DEF_HELPER_2(inw, tl, env, i32)
 DEF_HELPER_3(outl, void, env, i32, i32)
 DEF_HELPER_2(inl, tl, env, i32)
+DEF_HELPER_FLAGS_3(check_io, TCG_CALL_NO_WG, void, env, i32, i32)
 DEF_HELPER_FLAGS_4(bpt_io, TCG_CALL_NO_WG, void, env, i32, i32, tl)
-
-DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64)
+DEF_HELPER_2(svm_check_intercept, void, env, i32)
 DEF_HELPER_4(svm_check_io, void, env, i32, i32, i32)
 DEF_HELPER_3(vmrun, void, env, int, int)
 DEF_HELPER_1(vmmcall, void, env)
@@ -106,8 +101,15 @@ DEF_HELPER_2(vmload, void, env, int)
 DEF_HELPER_2(vmsave, void, env, int)
 DEF_HELPER_1(stgi, void, env)
 DEF_HELPER_1(clgi, void, env)
-DEF_HELPER_1(skinit, void, env)
-DEF_HELPER_2(invlpga, void, env, int)
+DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_1(rdmsr, void, env)
+DEF_HELPER_1(wrmsr, void, env)
+DEF_HELPER_FLAGS_2(read_crN, TCG_CALL_NO_RWG, tl, env, int)
+DEF_HELPER_FLAGS_3(write_crN, TCG_CALL_NO_RWG, void, env, int, tl)
+#endif /* !CONFIG_USER_ONLY */
 
 /* x86 FPU */
 
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
new file mode 100644
index 00000000000..10f8aba86e5
--- /dev/null
+++ b/target/i386/host-cpu.c
@@ -0,0 +1,207 @@
+/*
+ * x86 host CPU functions, and "host" cpu type initialization
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "host-cpu.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+
+/* Note: Only safe for use on x86(-64) hosts */
+static uint32_t host_cpu_phys_bits(void)
+{
+    uint32_t eax;
+    uint32_t host_phys_bits;
+
+    host_cpuid(0x80000000, 0, &eax, NULL, NULL, NULL);
+    if (eax >= 0x80000008) {
+        host_cpuid(0x80000008, 0, &eax, NULL, NULL, NULL);
+        /*
+         * Note: According to AMD doc 25481 rev 2.34 they have a field
+         * at 23:16 that can specify a maximum physical address bits for
+         * the guest that can override this value; but I've not seen
+         * anything with that set.
+         */
+        host_phys_bits = eax & 0xff;
+    } else {
+        /*
+         * It's an odd 64 bit machine that doesn't have the leaf for
+         * physical address bits; fall back to 36 that's most older
+         * Intel.
+         */
+        host_phys_bits = 36;
+    }
+
+    return host_phys_bits;
+}
+
+static void host_cpu_enable_cpu_pm(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx,
+               &cpu->mwait.ecx, &cpu->mwait.edx);
+    env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR;
+}
+
+static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
+{
+    uint32_t host_phys_bits = host_cpu_phys_bits();
+    uint32_t phys_bits = cpu->phys_bits;
+    static bool warned;
+
+    /*
+     * Print a warning if the user set it to a value that's not the
+     * host value.
+     */
+    if (phys_bits != host_phys_bits && phys_bits != 0 &&
+        !warned) {
+        warn_report("Host physical bits (%u)"
+                    " does not match phys-bits property (%u)",
+                    host_phys_bits, phys_bits);
+        warned = true;
+    }
+
+    if (cpu->host_phys_bits) {
+        /* The user asked for us to use the host physical bits */
+        phys_bits = host_phys_bits;
+        if (cpu->host_phys_bits_limit &&
+            phys_bits > cpu->host_phys_bits_limit) {
+            phys_bits = cpu->host_phys_bits_limit;
+        }
+    }
+
+    return phys_bits;
+}
+
+bool host_cpu_realizefn(CPUState *cs, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    if (cpu->max_features && enable_cpu_pm) {
+        host_cpu_enable_cpu_pm(cpu);
+    }
+    if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
+        uint32_t phys_bits = host_cpu_adjust_phys_bits(cpu);
+
+        if (phys_bits &&
+            (phys_bits > TARGET_PHYS_ADDR_SPACE_BITS ||
+             phys_bits < 32)) {
+            error_setg(errp, "phys-bits should be between 32 and %u "
+                       " (but is %u)",
+                       TARGET_PHYS_ADDR_SPACE_BITS, phys_bits);
+            return false;
+        }
+        cpu->phys_bits = phys_bits;
+    }
+    return true;
+}
+
+#define CPUID_MODEL_ID_SZ 48
+/**
+ * cpu_x86_fill_model_id:
+ * Get CPUID model ID string from host CPU.
+ *
+ * @str should have at least CPUID_MODEL_ID_SZ bytes
+ *
+ * The function does NOT add a null terminator to the string
+ * automatically.
+ */
+static int host_cpu_fill_model_id(char *str)
+{
+    uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        host_cpuid(0x80000002 + i, 0, &eax, &ebx, &ecx, &edx);
+        memcpy(str + i * 16 +  0, &eax, 4);
+        memcpy(str + i * 16 +  4, &ebx, 4);
+        memcpy(str + i * 16 +  8, &ecx, 4);
+        memcpy(str + i * 16 + 12, &edx, 4);
+    }
+    return 0;
+}
+
+void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping)
+{
+    uint32_t eax, ebx, ecx, edx;
+
+    host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+    x86_cpu_vendor_words2str(vendor, ebx, edx, ecx);
+
+    host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
+    if (family) {
+        *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+    }
+    if (model) {
+        *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+    }
+    if (stepping) {
+        *stepping = eax & 0x0F;
+    }
+}
+
+void host_cpu_instance_init(X86CPU *cpu)
+{
+    X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
+
+    if (xcc->model) {
+        uint32_t ebx = 0, ecx = 0, edx = 0;
+        char vendor[CPUID_VENDOR_SZ + 1];
+
+        host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
+        x86_cpu_vendor_words2str(vendor, ebx, edx, ecx);
+        object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
+    }
+}
+
+void host_cpu_max_instance_init(X86CPU *cpu)
+{
+    char vendor[CPUID_VENDOR_SZ + 1] = { 0 };
+    char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 };
+    int family, model, stepping;
+
+    /* Use max host physical address bits if -cpu max option is applied */
+    object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort);
+
+    host_cpu_vendor_fms(vendor, &family, &model, &stepping);
+    host_cpu_fill_model_id(model_id);
+
+    object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
+    object_property_set_int(OBJECT(cpu), "family", family, &error_abort);
+    object_property_set_int(OBJECT(cpu), "model", model, &error_abort);
+    object_property_set_int(OBJECT(cpu), "stepping", stepping,
+                            &error_abort);
+    object_property_set_str(OBJECT(cpu), "model-id", model_id,
+                            &error_abort);
+}
+
+static void host_cpu_class_init(ObjectClass *oc, void *data)
+{
+    X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+    xcc->host_cpuid_required = true;
+    xcc->ordering = 8;
+    xcc->model_description =
+        g_strdup_printf("processor with all supported host features ");
+}
+
+static const TypeInfo host_cpu_type_info = {
+    .name = X86_CPU_TYPE_NAME("host"),
+    .parent = X86_CPU_TYPE_NAME("max"),
+    .class_init = host_cpu_class_init,
+};
+
+static void host_cpu_type_init(void)
+{
+    type_register_static(&host_cpu_type_info);
+}
+
+type_init(host_cpu_type_init);
diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h
new file mode 100644
index 00000000000..6a9bc918baa
--- /dev/null
+++ b/target/i386/host-cpu.h
@@ -0,0 +1,19 @@
+/*
+ * x86 host CPU type initialization and host CPU functions
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef HOST_CPU_H
+#define HOST_CPU_H
+
+void host_cpu_instance_init(X86CPU *cpu);
+void host_cpu_max_instance_init(X86CPU *cpu);
+bool host_cpu_realizefn(CPUState *cs, Error **errp);
+
+void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping);
+
+#endif /* HOST_CPU_H */
diff --git a/target/i386/hvf/hvf-accel-ops.c b/target/i386/hvf/hvf-accel-ops.c
deleted file mode 100644
index cbaad238e0d..00000000000
--- a/target/i386/hvf/hvf-accel-ops.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright 2008 IBM Corporation
- *           2008 Red Hat, Inc.
- * Copyright 2011 Intel Corporation
- * Copyright 2016 Veertu, Inc.
- * Copyright 2017 The Android Open Source Project
- *
- * QEMU Hypervisor.framework support
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * This file contain code under public domain from the hvdos project:
- * https://github.com/mist64/hvdos
- *
- * Parts Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
-#include "sysemu/hvf.h"
-#include "sysemu/runstate.h"
-#include "target/i386/cpu.h"
-#include "qemu/guest-random.h"
-
-#include "hvf-accel-ops.h"
-
-/*
- * The HVF-specific vCPU thread function. This one should only run when the host
- * CPU supports the VMX "unrestricted guest" feature.
- */
-static void *hvf_cpu_thread_fn(void *arg)
-{
-    CPUState *cpu = arg;
-
-    int r;
-
-    assert(hvf_enabled());
-
-    rcu_register_thread();
-
-    qemu_mutex_lock_iothread();
-    qemu_thread_get_self(cpu->thread);
-
-    cpu->thread_id = qemu_get_thread_id();
-    cpu->can_do_io = 1;
-    current_cpu = cpu;
-
-    hvf_init_vcpu(cpu);
-
-    /* signal CPU creation */
-    cpu_thread_signal_created(cpu);
-    qemu_guest_random_seed_thread_part2(cpu->random_seed);
-
-    do {
-        if (cpu_can_run(cpu)) {
-            r = hvf_vcpu_exec(cpu);
-            if (r == EXCP_DEBUG) {
-                cpu_handle_guest_debug(cpu);
-            }
-        }
-        qemu_wait_io_event(cpu);
-    } while (!cpu->unplug || cpu_can_run(cpu));
-
-    hvf_vcpu_destroy(cpu);
-    cpu_thread_signal_destroyed(cpu);
-    qemu_mutex_unlock_iothread();
-    rcu_unregister_thread();
-    return NULL;
-}
-
-static void hvf_start_vcpu_thread(CPUState *cpu)
-{
-    char thread_name[VCPU_THREAD_NAME_SIZE];
-
-    /*
-     * HVF currently does not support TCG, and only runs in
-     * unrestricted-guest mode.
-     */
-    assert(hvf_enabled());
-
-    cpu->thread = g_malloc0(sizeof(QemuThread));
-    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
-    qemu_cond_init(cpu->halt_cond);
-
-    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
-             cpu->cpu_index);
-    qemu_thread_create(cpu->thread, thread_name, hvf_cpu_thread_fn,
-                       cpu, QEMU_THREAD_JOINABLE);
-}
-
-static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
-{
-    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
-
-    ops->create_vcpu_thread = hvf_start_vcpu_thread;
-
-    ops->synchronize_post_reset = hvf_cpu_synchronize_post_reset;
-    ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
-    ops->synchronize_state = hvf_cpu_synchronize_state;
-    ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm;
-};
-static const TypeInfo hvf_accel_ops_type = {
-    .name = ACCEL_OPS_NAME("hvf"),
-
-    .parent = TYPE_ACCEL_OPS,
-    .class_init = hvf_accel_ops_class_init,
-    .abstract = true,
-};
-static void hvf_accel_ops_register_types(void)
-{
-    type_register_static(&hvf_accel_ops_type);
-}
-type_init(hvf_accel_ops_register_types);
diff --git a/target/i386/hvf/hvf-accel-ops.h b/target/i386/hvf/hvf-accel-ops.h
deleted file mode 100644
index 8f992da1686..00000000000
--- a/target/i386/hvf/hvf-accel-ops.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Accelerator CPUS Interface
- *
- * Copyright 2020 SUSE LLC
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef HVF_CPUS_H
-#define HVF_CPUS_H
-
-#include "sysemu/cpus.h"
-
-int hvf_init_vcpu(CPUState *);
-int hvf_vcpu_exec(CPUState *);
-void hvf_cpu_synchronize_state(CPUState *);
-void hvf_cpu_synchronize_post_reset(CPUState *);
-void hvf_cpu_synchronize_post_init(CPUState *);
-void hvf_cpu_synchronize_pre_loadvm(CPUState *);
-void hvf_vcpu_destroy(CPUState *);
-
-#endif /* HVF_CPUS_H */
diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c
new file mode 100644
index 00000000000..333db59898a
--- /dev/null
+++ b/target/i386/hvf/hvf-cpu.c
@@ -0,0 +1,97 @@
+/*
+ * x86 HVF CPU type initialization
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "host-cpu.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "sysemu/hvf.h"
+#include "hw/core/accel-cpu.h"
+
+static void hvf_cpu_max_instance_init(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    host_cpu_max_instance_init(cpu);
+
+    env->cpuid_min_level =
+        hvf_get_supported_cpuid(0x0, 0, R_EAX);
+    env->cpuid_min_xlevel =
+        hvf_get_supported_cpuid(0x80000000, 0, R_EAX);
+    env->cpuid_min_xlevel2 =
+        hvf_get_supported_cpuid(0xC0000000, 0, R_EAX);
+}
+
+static void hvf_cpu_xsave_init(void)
+{
+    static bool first = true;
+    int i;
+
+    if (!first) {
+        return;
+    }
+    first = false;
+
+    /* x87 and SSE states are in the legacy region of the XSAVE area. */
+    x86_ext_save_areas[XSTATE_FP_BIT].offset = 0;
+    x86_ext_save_areas[XSTATE_SSE_BIT].offset = 0;
+
+    for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
+        ExtSaveArea *esa = &x86_ext_save_areas[i];
+
+        if (esa->size) {
+            int sz = hvf_get_supported_cpuid(0xd, i, R_EAX);
+            if (sz != 0) {
+                assert(esa->size == sz);
+                esa->offset = hvf_get_supported_cpuid(0xd, i, R_EBX);
+            }
+        }
+    }
+}
+
+static void hvf_cpu_instance_init(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+
+    host_cpu_instance_init(cpu);
+
+    /* Special cases not set in the X86CPUDefinition structs: */
+    /* TODO: in-kernel irqchip for hvf */
+
+    if (cpu->max_features) {
+        hvf_cpu_max_instance_init(cpu);
+    }
+
+    hvf_cpu_xsave_init();
+}
+
+static void hvf_cpu_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+    acc->cpu_realizefn = host_cpu_realizefn;
+    acc->cpu_instance_init = hvf_cpu_instance_init;
+}
+
+static const TypeInfo hvf_cpu_accel_type_info = {
+    .name = ACCEL_CPU_NAME("hvf"),
+
+    .parent = TYPE_ACCEL_CPU,
+    .class_init = hvf_cpu_accel_class_init,
+    .abstract = true,
+};
+
+static void hvf_cpu_accel_register_types(void)
+{
+    type_register_static(&hvf_cpu_accel_type_info);
+}
+
+type_init(hvf_cpu_accel_register_types);
diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index 59cfca8875e..76e9235524c 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -18,42 +18,11 @@
 
 #include "qemu/accel.h"
 #include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
 #include "cpu.h"
 #include "x86.h"
 
-/* hvf_slot flags */
-#define HVF_SLOT_LOG (1 << 0)
-
-typedef struct hvf_slot {
-    uint64_t start;
-    uint64_t size;
-    uint8_t *mem;
-    int slot_id;
-    uint32_t flags;
-    MemoryRegion *region;
-} hvf_slot;
-
-typedef struct hvf_vcpu_caps {
-    uint64_t vmx_cap_pinbased;
-    uint64_t vmx_cap_procbased;
-    uint64_t vmx_cap_procbased2;
-    uint64_t vmx_cap_entry;
-    uint64_t vmx_cap_exit;
-    uint64_t vmx_cap_preemption_timer;
-} hvf_vcpu_caps;
-
-struct HVFState {
-    AccelState parent;
-    hvf_slot slots[32];
-    int num_slots;
-
-    hvf_vcpu_caps *hvf_caps;
-};
-extern HVFState *hvf_state;
-
-void hvf_set_phys_mem(MemoryRegionSection *, bool);
 void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int);
-hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
 
 #ifdef NEED_CPU_H
 /* Functions exported to host specific mode */
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 15f14ac69e7..4ba6e82fab3 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -51,7 +51,9 @@
 #include "qemu/error-report.h"
 
 #include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
 #include "sysemu/runstate.h"
+#include "sysemu/cpus.h"
 #include "hvf-i386.h"
 #include "vmcs.h"
 #include "vmx.h"
@@ -67,177 +69,11 @@
 #include <Hypervisor/hv_vmx.h>
 #include <sys/sysctl.h>
 
-#include "exec/address-spaces.h"
 #include "hw/i386/apic_internal.h"
 #include "qemu/main-loop.h"
 #include "qemu/accel.h"
 #include "target/i386/cpu.h"
 
-#include "hvf-accel-ops.h"
-
-HVFState *hvf_state;
-
-static void assert_hvf_ok(hv_return_t ret)
-{
-    if (ret == HV_SUCCESS) {
-        return;
-    }
-
-    switch (ret) {
-    case HV_ERROR:
-        error_report("Error: HV_ERROR");
-        break;
-    case HV_BUSY:
-        error_report("Error: HV_BUSY");
-        break;
-    case HV_BAD_ARGUMENT:
-        error_report("Error: HV_BAD_ARGUMENT");
-        break;
-    case HV_NO_RESOURCES:
-        error_report("Error: HV_NO_RESOURCES");
-        break;
-    case HV_NO_DEVICE:
-        error_report("Error: HV_NO_DEVICE");
-        break;
-    case HV_UNSUPPORTED:
-        error_report("Error: HV_UNSUPPORTED");
-        break;
-    default:
-        error_report("Unknown Error");
-    }
-
-    abort();
-}
-
-/* Memory slots */
-hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
-{
-    hvf_slot *slot;
-    int x;
-    for (x = 0; x < hvf_state->num_slots; ++x) {
-        slot = &hvf_state->slots[x];
-        if (slot->size && start < (slot->start + slot->size) &&
-            (start + size) > slot->start) {
-            return slot;
-        }
-    }
-    return NULL;
-}
-
-struct mac_slot {
-    int present;
-    uint64_t size;
-    uint64_t gpa_start;
-    uint64_t gva;
-};
-
-struct mac_slot mac_slots[32];
-
-static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
-{
-    struct mac_slot *macslot;
-    hv_return_t ret;
-
-    macslot = &mac_slots[slot->slot_id];
-
-    if (macslot->present) {
-        if (macslot->size != slot->size) {
-            macslot->present = 0;
-            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
-            assert_hvf_ok(ret);
-        }
-    }
-
-    if (!slot->size) {
-        return 0;
-    }
-
-    macslot->present = 1;
-    macslot->gpa_start = slot->start;
-    macslot->size = slot->size;
-    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
-    assert_hvf_ok(ret);
-    return 0;
-}
-
-void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
-{
-    hvf_slot *mem;
-    MemoryRegion *area = section->mr;
-    bool writeable = !area->readonly && !area->rom_device;
-    hv_memory_flags_t flags;
-
-    if (!memory_region_is_ram(area)) {
-        if (writeable) {
-            return;
-        } else if (!memory_region_is_romd(area)) {
-            /*
-             * If the memory device is not in romd_mode, then we actually want
-             * to remove the hvf memory slot so all accesses will trap.
-             */
-             add = false;
-        }
-    }
-
-    mem = hvf_find_overlap_slot(
-            section->offset_within_address_space,
-            int128_get64(section->size));
-
-    if (mem && add) {
-        if (mem->size == int128_get64(section->size) &&
-            mem->start == section->offset_within_address_space &&
-            mem->mem == (memory_region_get_ram_ptr(area) +
-            section->offset_within_region)) {
-            return; /* Same region was attempted to register, go away. */
-        }
-    }
-
-    /* Region needs to be reset. set the size to 0 and remap it. */
-    if (mem) {
-        mem->size = 0;
-        if (do_hvf_set_memory(mem, 0)) {
-            error_report("Failed to reset overlapping slot");
-            abort();
-        }
-    }
-
-    if (!add) {
-        return;
-    }
-
-    if (area->readonly ||
-        (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
-        flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
-    } else {
-        flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
-    }
-
-    /* Now make a new slot. */
-    int x;
-
-    for (x = 0; x < hvf_state->num_slots; ++x) {
-        mem = &hvf_state->slots[x];
-        if (!mem->size) {
-            break;
-        }
-    }
-
-    if (x == hvf_state->num_slots) {
-        error_report("No free slots");
-        abort();
-    }
-
-    mem->size = int128_get64(section->size);
-    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
-    mem->start = section->offset_within_address_space;
-    mem->region = area;
-
-    if (do_hvf_set_memory(mem, flags)) {
-        error_report("Error registering new memory slot");
-        abort();
-    }
-}
-
 void vmx_update_tpr(CPUState *cpu)
 {
     /* TODO: need integrate APIC handling */
@@ -245,11 +81,11 @@ void vmx_update_tpr(CPUState *cpu)
     int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
     int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 
-    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
+    wreg(cpu->hvf->fd, HV_X86_TPR, tpr);
     if (irr == -1) {
-        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+        wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
     } else {
-        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
+        wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
               irr >> 4);
     }
 }
@@ -257,7 +93,7 @@ void vmx_update_tpr(CPUState *cpu)
 static void update_apic_tpr(CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
-    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
+    int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4;
     cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 }
 
@@ -277,56 +113,6 @@ void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
     }
 }
 
-static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
-{
-    if (!cpu->vcpu_dirty) {
-        hvf_get_registers(cpu);
-        cpu->vcpu_dirty = true;
-    }
-}
-
-void hvf_cpu_synchronize_state(CPUState *cpu)
-{
-    if (!cpu->vcpu_dirty) {
-        run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
-    }
-}
-
-static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
-                                              run_on_cpu_data arg)
-{
-    hvf_put_registers(cpu);
-    cpu->vcpu_dirty = false;
-}
-
-void hvf_cpu_synchronize_post_reset(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
-}
-
-static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
-                                             run_on_cpu_data arg)
-{
-    hvf_put_registers(cpu);
-    cpu->vcpu_dirty = false;
-}
-
-void hvf_cpu_synchronize_post_init(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
-}
-
-static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
-                                              run_on_cpu_data arg)
-{
-    cpu->vcpu_dirty = true;
-}
-
-void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
-{
-    run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
-}
-
 static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 {
     int read, write;
@@ -371,90 +157,12 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
     return false;
 }
 
-static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
-{
-    hvf_slot *slot;
-
-    slot = hvf_find_overlap_slot(
-            section->offset_within_address_space,
-            int128_get64(section->size));
-
-    /* protect region against writes; begin tracking it */
-    if (on) {
-        slot->flags |= HVF_SLOT_LOG;
-        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
-                      HV_MEMORY_READ);
-    /* stop tracking region*/
-    } else {
-        slot->flags &= ~HVF_SLOT_LOG;
-        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
-                      HV_MEMORY_READ | HV_MEMORY_WRITE);
-    }
-}
-
-static void hvf_log_start(MemoryListener *listener,
-                          MemoryRegionSection *section, int old, int new)
-{
-    if (old != 0) {
-        return;
-    }
-
-    hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_log_stop(MemoryListener *listener,
-                         MemoryRegionSection *section, int old, int new)
-{
-    if (new != 0) {
-        return;
-    }
-
-    hvf_set_dirty_tracking(section, 0);
-}
-
-static void hvf_log_sync(MemoryListener *listener,
-                         MemoryRegionSection *section)
-{
-    /*
-     * sync of dirty pages is handled elsewhere; just make sure we keep
-     * tracking the region.
-     */
-    hvf_set_dirty_tracking(section, 1);
-}
-
-static void hvf_region_add(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-    hvf_set_phys_mem(section, true);
-}
-
-static void hvf_region_del(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-    hvf_set_phys_mem(section, false);
-}
-
-static MemoryListener hvf_memory_listener = {
-    .priority = 10,
-    .region_add = hvf_region_add,
-    .region_del = hvf_region_del,
-    .log_start = hvf_log_start,
-    .log_stop = hvf_log_stop,
-    .log_sync = hvf_log_sync,
-};
-
-void hvf_vcpu_destroy(CPUState *cpu)
+void hvf_arch_vcpu_destroy(CPUState *cpu)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
     CPUX86State *env = &x86_cpu->env;
 
-    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
     g_free(env->hvf_mmio_buf);
-    assert_hvf_ok(ret);
-}
-
-static void dummy_signal(int sig)
-{
 }
 
 static void init_tsc_freq(CPUX86State *env)
@@ -499,23 +207,20 @@ static inline bool apic_bus_freq_is_known(CPUX86State *env)
     return env->apic_bus_freq != 0;
 }
 
-int hvf_init_vcpu(CPUState *cpu)
+void hvf_kick_vcpu_thread(CPUState *cpu)
 {
+    cpus_kick_thread(cpu);
+}
+
+int hvf_arch_init(void)
+{
+    return 0;
+}
 
+int hvf_arch_init_vcpu(CPUState *cpu)
+{
     X86CPU *x86cpu = X86_CPU(cpu);
     CPUX86State *env = &x86cpu->env;
-    int r;
-
-    /* init cpu signals */
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = dummy_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
 
     init_emu();
     init_decoder();
@@ -532,10 +237,6 @@ int hvf_init_vcpu(CPUState *cpu)
         }
     }
 
-    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
-    cpu->vcpu_dirty = 1;
-    assert_hvf_ok(r);
-
     if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
         &hvf_state->hvf_caps->vmx_cap_pinbased)) {
         abort();
@@ -554,43 +255,50 @@ int hvf_init_vcpu(CPUState *cpu)
     }
 
     /* set VMCS control fields */
-    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+    wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
           VMCS_PIN_BASED_CTLS_EXTINT |
           VMCS_PIN_BASED_CTLS_NMI |
           VMCS_PIN_BASED_CTLS_VNMI));
-    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
+    wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
           VMCS_PRI_PROC_BASED_CTLS_HLT |
           VMCS_PRI_PROC_BASED_CTLS_MWAIT |
           VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
           VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
           VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
-    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
+    wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
                    VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 
-    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
+    wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
           0));
-    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
+    wvmcs(cpu->hvf->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 
-    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+    wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
 
     x86cpu = X86_CPU(cpu);
-    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
-
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
-    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
+    x86cpu->env.xsave_buf_len = 4096;
+    x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len);
+
+    /*
+     * The allocated storage must be large enough for all of the
+     * possible XSAVE state components.
+     */
+    assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len);
+
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_STAR, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_LSTAR, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_CSTAR, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FMASK, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_FSBASE, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_GSBASE, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_KERNELGSBASE, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_TSC_AUX, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_TSC, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_CS, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_EIP, 1);
+    hv_vcpu_enable_native_msr(cpu->hvf->fd, MSR_IA32_SYSENTER_ESP, 1);
 
     return 0;
 }
@@ -631,16 +339,16 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in
         }
         if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
             env->has_error_code = true;
-            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
+            env->error_code = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_ERROR);
         }
     }
-    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
+    if ((rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
         VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
         env->hflags2 |= HF2_NMI_MASK;
     } else {
         env->hflags2 &= ~HF2_NMI_MASK;
     }
-    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
+    if (rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY) &
          (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
          VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
         env->hflags |= HF_INHIBIT_IRQ_MASK;
@@ -719,20 +427,20 @@ int hvf_vcpu_exec(CPUState *cpu)
             return EXCP_HLT;
         }
 
-        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
+        hv_return_t r  = hv_vcpu_run(cpu->hvf->fd);
         assert_hvf_ok(r);
 
         /* handle VMEXIT */
-        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
-        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
-        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
+        uint64_t exit_reason = rvmcs(cpu->hvf->fd, VMCS_EXIT_REASON);
+        uint64_t exit_qual = rvmcs(cpu->hvf->fd, VMCS_EXIT_QUALIFICATION);
+        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf->fd,
                                            VMCS_EXIT_INSTRUCTION_LENGTH);
 
-        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
+        uint64_t idtvec_info = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
 
         hvf_store_events(cpu, ins_len, idtvec_info);
-        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
-        env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
+        rip = rreg(cpu->hvf->fd, HV_X86_RIP);
+        env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS);
 
         qemu_mutex_lock_iothread();
 
@@ -762,7 +470,7 @@ int hvf_vcpu_exec(CPUState *cpu)
         case EXIT_REASON_EPT_FAULT:
         {
             hvf_slot *slot;
-            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
+            uint64_t gpa = rvmcs(cpu->hvf->fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 
             if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
                 ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
@@ -807,7 +515,7 @@ int hvf_vcpu_exec(CPUState *cpu)
                 store_regs(cpu);
                 break;
             } else if (!string && !in) {
-                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
+                RAX(env) = rreg(cpu->hvf->fd, HV_X86_RAX);
                 hvf_handle_io(env, port, &RAX(env), 1, size, 1);
                 macvm_set_rip(cpu, rip + ins_len);
                 break;
@@ -823,21 +531,21 @@ int hvf_vcpu_exec(CPUState *cpu)
             break;
         }
         case EXIT_REASON_CPUID: {
-            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
-            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
-            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
-            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
+            uint32_t rax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
+            uint32_t rbx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RBX);
+            uint32_t rcx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
+            uint32_t rdx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
 
             if (rax == 1) {
                 /* CPUID1.ecx.OSXSAVE needs to know CR4 */
-                env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
+                env->cr[4] = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
             }
             hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 
-            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
-            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
-            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
-            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
+            wreg(cpu->hvf->fd, HV_X86_RAX, rax);
+            wreg(cpu->hvf->fd, HV_X86_RBX, rbx);
+            wreg(cpu->hvf->fd, HV_X86_RCX, rcx);
+            wreg(cpu->hvf->fd, HV_X86_RDX, rdx);
 
             macvm_set_rip(cpu, rip + ins_len);
             break;
@@ -845,16 +553,16 @@ int hvf_vcpu_exec(CPUState *cpu)
         case EXIT_REASON_XSETBV: {
             X86CPU *x86_cpu = X86_CPU(cpu);
             CPUX86State *env = &x86_cpu->env;
-            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
-            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
-            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
+            uint32_t eax = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RAX);
+            uint32_t ecx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RCX);
+            uint32_t edx = (uint32_t)rreg(cpu->hvf->fd, HV_X86_RDX);
 
             if (ecx) {
                 macvm_set_rip(cpu, rip + ins_len);
                 break;
             }
             env->xcr0 = ((uint64_t)edx << 32) | eax;
-            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
+            wreg(cpu->hvf->fd, HV_X86_XCR0, env->xcr0 | 1);
             macvm_set_rip(cpu, rip + ins_len);
             break;
         }
@@ -893,11 +601,11 @@ int hvf_vcpu_exec(CPUState *cpu)
 
             switch (cr) {
             case 0x0: {
-                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
+                macvm_set_cr0(cpu->hvf->fd, RRX(env, reg));
                 break;
             }
             case 4: {
-                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
+                macvm_set_cr4(cpu->hvf->fd, RRX(env, reg));
                 break;
             }
             case 8: {
@@ -933,7 +641,7 @@ int hvf_vcpu_exec(CPUState *cpu)
             break;
         }
         case EXIT_REASON_TASK_SWITCH: {
-            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
+            uint64_t vinfo = rvmcs(cpu->hvf->fd, VMCS_IDT_VECTORING_INFO);
             x68_segment_selector sel = {.sel = exit_qual & 0xffff};
             vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
              vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
@@ -946,8 +654,8 @@ int hvf_vcpu_exec(CPUState *cpu)
             break;
         }
         case EXIT_REASON_RDPMC:
-            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
-            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
+            wreg(cpu->hvf->fd, HV_X86_RAX, 0);
+            wreg(cpu->hvf->fd, HV_X86_RDX, 0);
             macvm_set_rip(cpu, rip + ins_len);
             break;
         case VMX_REASON_VMCALL:
@@ -963,48 +671,3 @@ int hvf_vcpu_exec(CPUState *cpu)
 
     return ret;
 }
-
-bool hvf_allowed;
-
-static int hvf_accel_init(MachineState *ms)
-{
-    int x;
-    hv_return_t ret;
-    HVFState *s;
-
-    ret = hv_vm_create(HV_VM_DEFAULT);
-    assert_hvf_ok(ret);
-
-    s = g_new0(HVFState, 1);
- 
-    s->num_slots = 32;
-    for (x = 0; x < s->num_slots; ++x) {
-        s->slots[x].size = 0;
-        s->slots[x].slot_id = x;
-    }
-  
-    hvf_state = s;
-    memory_listener_register(&hvf_memory_listener, &address_space_memory);
-    return 0;
-}
-
-static void hvf_accel_class_init(ObjectClass *oc, void *data)
-{
-    AccelClass *ac = ACCEL_CLASS(oc);
-    ac->name = "HVF";
-    ac->init_machine = hvf_accel_init;
-    ac->allowed = &hvf_allowed;
-}
-
-static const TypeInfo hvf_accel_type = {
-    .name = TYPE_HVF_ACCEL,
-    .parent = TYPE_ACCEL,
-    .class_init = hvf_accel_class_init,
-};
-
-static void hvf_type_init(void)
-{
-    type_register_static(&hvf_accel_type);
-}
-
-type_init(hvf_type_init);
diff --git a/target/i386/hvf/meson.build b/target/i386/hvf/meson.build
index e9eb5a5da8a..f6d4c394d3e 100644
--- a/target/i386/hvf/meson.build
+++ b/target/i386/hvf/meson.build
@@ -1,6 +1,5 @@
 i386_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
   'hvf.c',
-  'hvf-accel-ops.c',
   'x86.c',
   'x86_cpuid.c',
   'x86_decode.c',
@@ -10,4 +9,5 @@ i386_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
   'x86_mmu.c',
   'x86_task.c',
   'x86hvf.c',
+  'hvf-cpu.c',
 ))
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 24c4cdf0be0..6df87116f62 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -30,6 +30,8 @@
 #include "vmcs.h"
 #include "cpu.h"
 #include "x86.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
 
 #include "exec/address-spaces.h"
 
@@ -179,15 +181,15 @@ static inline void macvm_set_rip(CPUState *cpu, uint64_t rip)
     uint64_t val;
 
     /* BUG, should take considering overlap.. */
-    wreg(cpu->hvf_fd, HV_X86_RIP, rip);
+    wreg(cpu->hvf->fd, HV_X86_RIP, rip);
     env->eip = rip;
 
     /* after moving forward in rip, we need to clean INTERRUPTABILITY */
-   val = rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
+   val = rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
    if (val & (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
                VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
         env->hflags &= ~HF_INHIBIT_IRQ_MASK;
-        wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY,
+        wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY,
                val & ~(VMCS_INTERRUPTIBILITY_STI_BLOCKING |
                VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING));
    }
@@ -199,9 +201,9 @@ static inline void vmx_clear_nmi_blocking(CPUState *cpu)
     CPUX86State *env = &x86_cpu->env;
 
     env->hflags2 &= ~HF2_NMI_MASK;
-    uint32_t gi = (uint32_t) rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
+    uint32_t gi = (uint32_t) rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
     gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
-    wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
+    wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
 }
 
 static inline void vmx_set_nmi_blocking(CPUState *cpu)
@@ -210,16 +212,16 @@ static inline void vmx_set_nmi_blocking(CPUState *cpu)
     CPUX86State *env = &x86_cpu->env;
 
     env->hflags2 |= HF2_NMI_MASK;
-    uint32_t gi = (uint32_t)rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY);
+    uint32_t gi = (uint32_t)rvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY);
     gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
-    wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
+    wvmcs(cpu->hvf->fd, VMCS_GUEST_INTERRUPTIBILITY, gi);
 }
 
 static inline void vmx_set_nmi_window_exiting(CPUState *cpu)
 {
     uint64_t val;
-    val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
-    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val |
+    val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
+    wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val |
           VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING);
 
 }
@@ -228,8 +230,8 @@ static inline void vmx_clear_nmi_window_exiting(CPUState *cpu)
 {
 
     uint64_t val;
-    val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
-    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val &
+    val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
+    wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val &
           ~VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING);
 }
 
diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c
index cd045183a81..2898bb70a84 100644
--- a/target/i386/hvf/x86.c
+++ b/target/i386/hvf/x86.c
@@ -62,11 +62,11 @@ bool x86_read_segment_descriptor(struct CPUState *cpu,
     }
 
     if (GDT_SEL == sel.ti) {
-        base  = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE);
-        limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
+        base  = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE);
+        limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
     } else {
-        base  = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE);
-        limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT);
+        base  = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE);
+        limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT);
     }
 
     if (sel.index * 8 >= limit) {
@@ -85,11 +85,11 @@ bool x86_write_segment_descriptor(struct CPUState *cpu,
     uint32_t limit;
     
     if (GDT_SEL == sel.ti) {
-        base  = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE);
-        limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
+        base  = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_BASE);
+        limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
     } else {
-        base  = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE);
-        limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT);
+        base  = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_BASE);
+        limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_LDTR_LIMIT);
     }
     
     if (sel.index * 8 >= limit) {
@@ -103,8 +103,8 @@ bool x86_write_segment_descriptor(struct CPUState *cpu,
 bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc,
                         int gate)
 {
-    target_ulong base  = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE);
-    uint32_t limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT);
+    target_ulong base  = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_BASE);
+    uint32_t limit = rvmcs(cpu->hvf->fd, VMCS_GUEST_IDTR_LIMIT);
 
     memset(idt_desc, 0, sizeof(*idt_desc));
     if (gate * 8 >= limit) {
@@ -118,7 +118,7 @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc,
 
 bool x86_is_protected(struct CPUState *cpu)
 {
-    uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
+    uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
     return cr0 & CR0_PE;
 }
 
@@ -136,7 +136,7 @@ bool x86_is_v8086(struct CPUState *cpu)
 
 bool x86_is_long_mode(struct CPUState *cpu)
 {
-    return rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA;
+    return rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA;
 }
 
 bool x86_is_long64_mode(struct CPUState *cpu)
@@ -149,13 +149,13 @@ bool x86_is_long64_mode(struct CPUState *cpu)
 
 bool x86_is_paging_mode(struct CPUState *cpu)
 {
-    uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
+    uint64_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
     return cr0 & CR0_PG;
 }
 
 bool x86_is_pae_enabled(struct CPUState *cpu)
 {
-    uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
+    uint64_t cr4 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR4);
     return cr4 & CR4_PAE;
 }
 
diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c
index 9f539e73f6d..af15c06ac5d 100644
--- a/target/i386/hvf/x86_descr.c
+++ b/target/i386/hvf/x86_descr.c
@@ -48,47 +48,47 @@ static const struct vmx_segment_field {
 
 uint32_t vmx_read_segment_limit(CPUState *cpu, X86Seg seg)
 {
-    return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit);
+    return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit);
 }
 
 uint32_t vmx_read_segment_ar(CPUState *cpu, X86Seg seg)
 {
-    return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes);
+    return (uint32_t)rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes);
 }
 
 uint64_t vmx_read_segment_base(CPUState *cpu, X86Seg seg)
 {
-    return rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base);
+    return rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base);
 }
 
 x68_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg)
 {
     x68_segment_selector sel;
-    sel.sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector);
+    sel.sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector);
     return sel;
 }
 
 void vmx_write_segment_selector(struct CPUState *cpu, x68_segment_selector selector, X86Seg seg)
 {
-    wvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector, selector.sel);
+    wvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector, selector.sel);
 }
 
 void vmx_read_segment_descriptor(struct CPUState *cpu, struct vmx_segment *desc, X86Seg seg)
 {
-    desc->sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector);
-    desc->base = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base);
-    desc->limit = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit);
-    desc->ar = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes);
+    desc->sel = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].selector);
+    desc->base = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].base);
+    desc->limit = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].limit);
+    desc->ar = rvmcs(cpu->hvf->fd, vmx_segment_fields[seg].ar_bytes);
 }
 
 void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Seg seg)
 {
     const struct vmx_segment_field *sf = &vmx_segment_fields[seg];
 
-    wvmcs(cpu->hvf_fd, sf->base, desc->base);
-    wvmcs(cpu->hvf_fd, sf->limit, desc->limit);
-    wvmcs(cpu->hvf_fd, sf->selector, desc->sel);
-    wvmcs(cpu->hvf_fd, sf->ar_bytes, desc->ar);
+    wvmcs(cpu->hvf->fd, sf->base, desc->base);
+    wvmcs(cpu->hvf->fd, sf->limit, desc->limit);
+    wvmcs(cpu->hvf->fd, sf->selector, desc->sel);
+    wvmcs(cpu->hvf->fd, sf->ar_bytes, desc->ar);
 }
 
 void x86_segment_descriptor_to_vmx(struct CPUState *cpu, x68_segment_selector selector, struct x86_segment_descriptor *desc, struct vmx_segment *vmx_desc)
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index e52c39ddb1f..7c8203b21fb 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -674,7 +674,7 @@ void simulate_rdmsr(struct CPUState *cpu)
 
     switch (msr) {
     case MSR_IA32_TSC:
-        val = rdtscp() + rvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET);
+        val = rdtscp() + rvmcs(cpu->hvf->fd, VMCS_TSC_OFFSET);
         break;
     case MSR_IA32_APICBASE:
         val = cpu_get_apic_base(X86_CPU(cpu)->apic_state);
@@ -683,16 +683,16 @@ void simulate_rdmsr(struct CPUState *cpu)
         val = x86_cpu->ucode_rev;
         break;
     case MSR_EFER:
-        val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER);
+        val = rvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER);
         break;
     case MSR_FSBASE:
-        val = rvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE);
+        val = rvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE);
         break;
     case MSR_GSBASE:
-        val = rvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE);
+        val = rvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE);
         break;
     case MSR_KERNELGSBASE:
-        val = rvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE);
+        val = rvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE);
         break;
     case MSR_STAR:
         abort();
@@ -780,13 +780,13 @@ void simulate_wrmsr(struct CPUState *cpu)
         cpu_set_apic_base(X86_CPU(cpu)->apic_state, data);
         break;
     case MSR_FSBASE:
-        wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, data);
+        wvmcs(cpu->hvf->fd, VMCS_GUEST_FS_BASE, data);
         break;
     case MSR_GSBASE:
-        wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, data);
+        wvmcs(cpu->hvf->fd, VMCS_GUEST_GS_BASE, data);
         break;
     case MSR_KERNELGSBASE:
-        wvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE, data);
+        wvmcs(cpu->hvf->fd, VMCS_HOST_FS_BASE, data);
         break;
     case MSR_STAR:
         abort();
@@ -799,9 +799,9 @@ void simulate_wrmsr(struct CPUState *cpu)
         break;
     case MSR_EFER:
         /*printf("new efer %llx\n", EFER(cpu));*/
-        wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, data);
+        wvmcs(cpu->hvf->fd, VMCS_GUEST_IA32_EFER, data);
         if (data & MSR_EFER_NXE) {
-            hv_vcpu_invalidate_tlb(cpu->hvf_fd);
+            hv_vcpu_invalidate_tlb(cpu->hvf->fd);
         }
         break;
     case MSR_MTRRphysBase(0):
@@ -1425,21 +1425,21 @@ void load_regs(struct CPUState *cpu)
     CPUX86State *env = &x86_cpu->env;
 
     int i = 0;
-    RRX(env, R_EAX) = rreg(cpu->hvf_fd, HV_X86_RAX);
-    RRX(env, R_EBX) = rreg(cpu->hvf_fd, HV_X86_RBX);
-    RRX(env, R_ECX) = rreg(cpu->hvf_fd, HV_X86_RCX);
-    RRX(env, R_EDX) = rreg(cpu->hvf_fd, HV_X86_RDX);
-    RRX(env, R_ESI) = rreg(cpu->hvf_fd, HV_X86_RSI);
-    RRX(env, R_EDI) = rreg(cpu->hvf_fd, HV_X86_RDI);
-    RRX(env, R_ESP) = rreg(cpu->hvf_fd, HV_X86_RSP);
-    RRX(env, R_EBP) = rreg(cpu->hvf_fd, HV_X86_RBP);
+    RRX(env, R_EAX) = rreg(cpu->hvf->fd, HV_X86_RAX);
+    RRX(env, R_EBX) = rreg(cpu->hvf->fd, HV_X86_RBX);
+    RRX(env, R_ECX) = rreg(cpu->hvf->fd, HV_X86_RCX);
+    RRX(env, R_EDX) = rreg(cpu->hvf->fd, HV_X86_RDX);
+    RRX(env, R_ESI) = rreg(cpu->hvf->fd, HV_X86_RSI);
+    RRX(env, R_EDI) = rreg(cpu->hvf->fd, HV_X86_RDI);
+    RRX(env, R_ESP) = rreg(cpu->hvf->fd, HV_X86_RSP);
+    RRX(env, R_EBP) = rreg(cpu->hvf->fd, HV_X86_RBP);
     for (i = 8; i < 16; i++) {
-        RRX(env, i) = rreg(cpu->hvf_fd, HV_X86_RAX + i);
+        RRX(env, i) = rreg(cpu->hvf->fd, HV_X86_RAX + i);
     }
 
-    env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
+    env->eflags = rreg(cpu->hvf->fd, HV_X86_RFLAGS);
     rflags_to_lflags(env);
-    env->eip = rreg(cpu->hvf_fd, HV_X86_RIP);
+    env->eip = rreg(cpu->hvf->fd, HV_X86_RIP);
 }
 
 void store_regs(struct CPUState *cpu)
@@ -1448,20 +1448,20 @@ void store_regs(struct CPUState *cpu)
     CPUX86State *env = &x86_cpu->env;
 
     int i = 0;
-    wreg(cpu->hvf_fd, HV_X86_RAX, RAX(env));
-    wreg(cpu->hvf_fd, HV_X86_RBX, RBX(env));
-    wreg(cpu->hvf_fd, HV_X86_RCX, RCX(env));
-    wreg(cpu->hvf_fd, HV_X86_RDX, RDX(env));
-    wreg(cpu->hvf_fd, HV_X86_RSI, RSI(env));
-    wreg(cpu->hvf_fd, HV_X86_RDI, RDI(env));
-    wreg(cpu->hvf_fd, HV_X86_RBP, RBP(env));
-    wreg(cpu->hvf_fd, HV_X86_RSP, RSP(env));
+    wreg(cpu->hvf->fd, HV_X86_RAX, RAX(env));
+    wreg(cpu->hvf->fd, HV_X86_RBX, RBX(env));
+    wreg(cpu->hvf->fd, HV_X86_RCX, RCX(env));
+    wreg(cpu->hvf->fd, HV_X86_RDX, RDX(env));
+    wreg(cpu->hvf->fd, HV_X86_RSI, RSI(env));
+    wreg(cpu->hvf->fd, HV_X86_RDI, RDI(env));
+    wreg(cpu->hvf->fd, HV_X86_RBP, RBP(env));
+    wreg(cpu->hvf->fd, HV_X86_RSP, RSP(env));
     for (i = 8; i < 16; i++) {
-        wreg(cpu->hvf_fd, HV_X86_RAX + i, RRX(env, i));
+        wreg(cpu->hvf->fd, HV_X86_RAX + i, RRX(env, i));
     }
 
     lflags_to_rflags(env);
-    wreg(cpu->hvf_fd, HV_X86_RFLAGS, env->eflags);
+    wreg(cpu->hvf->fd, HV_X86_RFLAGS, env->eflags);
     macvm_set_rip(cpu, env->eip);
 }
 
diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
index 882a6237eea..e9ed0f5aa10 100644
--- a/target/i386/hvf/x86_mmu.c
+++ b/target/i386/hvf/x86_mmu.c
@@ -24,7 +24,6 @@
 #include "x86_mmu.h"
 #include "vmcs.h"
 #include "vmx.h"
-#include "exec/address-spaces.h"
 
 #define pte_present(pte) (pte & PT_PRESENT)
 #define pte_write_access(pte) (pte & PT_WRITE)
@@ -128,7 +127,7 @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
         pt->err_code |= MMU_PAGE_PT;
     }
 
-    uint32_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0);
+    uint32_t cr0 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0);
     /* check protection */
     if (cr0 & CR0_WP) {
         if (pt->write_access && !pte_write_access(pte)) {
@@ -173,7 +172,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code,
 {
     int top_level, level;
     bool is_large = false;
-    target_ulong cr3 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR3);
+    target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3);
     uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK;
     
     memset(pt, 0, sizeof(*pt));
diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c
index d66dfd76690..422156128b7 100644
--- a/target/i386/hvf/x86_task.c
+++ b/target/i386/hvf/x86_task.c
@@ -62,7 +62,7 @@ static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss)
     X86CPU *x86_cpu = X86_CPU(cpu);
     CPUX86State *env = &x86_cpu->env;
 
-    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, tss->cr3);
+    wvmcs(cpu->hvf->fd, VMCS_GUEST_CR3, tss->cr3);
 
     env->eip = tss->eip;
     env->eflags = tss->eflags | 2;
@@ -111,11 +111,11 @@ static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segme
 
 void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type)
 {
-    uint64_t rip = rreg(cpu->hvf_fd, HV_X86_RIP);
+    uint64_t rip = rreg(cpu->hvf->fd, HV_X86_RIP);
     if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION &&
                         gate_type != VMCS_INTR_T_HWINTR &&
                         gate_type != VMCS_INTR_T_NMI)) {
-        int ins_len = rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
+        int ins_len = rvmcs(cpu->hvf->fd, VMCS_EXIT_INSTRUCTION_LENGTH);
         macvm_set_rip(cpu, rip + ins_len);
         return;
     }
@@ -174,12 +174,12 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea
         //ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc);
         VM_PANIC("task_switch_16");
 
-    macvm_set_cr0(cpu->hvf_fd, rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0) | CR0_TS);
+    macvm_set_cr0(cpu->hvf->fd, rvmcs(cpu->hvf->fd, VMCS_GUEST_CR0) | CR0_TS);
     x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg);
     vmx_write_segment_descriptor(cpu, &vmx_seg, R_TR);
 
     store_regs(cpu);
 
-    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
-    hv_vcpu_flush(cpu->hvf_fd);
+    hv_vcpu_invalidate_tlb(cpu->hvf->fd);
+    hv_vcpu_flush(cpu->hvf->fd);
 }
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 0d7533742eb..05ec1bddc4e 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -26,14 +26,13 @@
 #include "cpu.h"
 #include "x86_descr.h"
 #include "x86_decode.h"
+#include "sysemu/hw_accel.h"
 
 #include "hw/i386/apic_internal.h"
 
 #include <Hypervisor/hv.h>
 #include <Hypervisor/hv_vmx.h>
 
-#include "hvf-accel-ops.h"
-
 void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
                      SegmentCache *qseg, bool is_tr)
 {
@@ -74,14 +73,12 @@ void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg)
 
 void hvf_put_xsave(CPUState *cpu_state)
 {
+    void *xsave = X86_CPU(cpu_state)->env.xsave_buf;
+    uint32_t xsave_len = X86_CPU(cpu_state)->env.xsave_buf_len;
 
-    struct X86XSaveArea *xsave;
-
-    xsave = X86_CPU(cpu_state)->env.xsave_buf;
-
-    x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave);
+    x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave, xsave_len);
 
-    if (hv_vcpu_write_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) {
+    if (hv_vcpu_write_fpstate(cpu_state->hvf->fd, xsave, xsave_len)) {
         abort();
     }
 }
@@ -91,19 +88,19 @@ void hvf_put_segments(CPUState *cpu_state)
     CPUX86State *env = &X86_CPU(cpu_state)->env;
     struct vmx_segment seg;
     
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit);
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE, env->idt.base);
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit);
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE, env->idt.base);
 
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit);
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE, env->gdt.base);
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit);
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base);
 
-    /* wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR2, env->cr[2]); */
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3, env->cr[3]);
+    /* wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR2, env->cr[2]); */
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3, env->cr[3]);
     vmx_update_tpr(cpu_state);
-    wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER, env->efer);
+    wvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER, env->efer);
 
-    macvm_set_cr4(cpu_state->hvf_fd, env->cr[4]);
-    macvm_set_cr0(cpu_state->hvf_fd, env->cr[0]);
+    macvm_set_cr4(cpu_state->hvf->fd, env->cr[4]);
+    macvm_set_cr0(cpu_state->hvf->fd, env->cr[0]);
 
     hvf_set_segment(cpu_state, &seg, &env->segs[R_CS], false);
     vmx_write_segment_descriptor(cpu_state, &seg, R_CS);
@@ -129,45 +126,44 @@ void hvf_put_segments(CPUState *cpu_state)
     hvf_set_segment(cpu_state, &seg, &env->ldt, false);
     vmx_write_segment_descriptor(cpu_state, &seg, R_LDTR);
     
-    hv_vcpu_flush(cpu_state->hvf_fd);
+    hv_vcpu_flush(cpu_state->hvf->fd);
 }
     
 void hvf_put_msrs(CPUState *cpu_state)
 {
     CPUX86State *env = &X86_CPU(cpu_state)->env;
 
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS,
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS,
                       env->sysenter_cs);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP,
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP,
                       env->sysenter_esp);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP,
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP,
                       env->sysenter_eip);
 
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_STAR, env->star);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_STAR, env->star);
 
 #ifdef TARGET_X86_64
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_CSTAR, env->cstar);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, env->kernelgsbase);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FMASK, env->fmask);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_LSTAR, env->lstar);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_CSTAR, env->cstar);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, env->kernelgsbase);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FMASK, env->fmask);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_LSTAR, env->lstar);
 #endif
 
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base);
-    hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_GSBASE, env->segs[R_GS].base);
+    hv_vcpu_write_msr(cpu_state->hvf->fd, MSR_FSBASE, env->segs[R_FS].base);
 }
 
 
 void hvf_get_xsave(CPUState *cpu_state)
 {
-    struct X86XSaveArea *xsave;
+    void *xsave = X86_CPU(cpu_state)->env.xsave_buf;
+    uint32_t xsave_len = X86_CPU(cpu_state)->env.xsave_buf_len;
 
-    xsave = X86_CPU(cpu_state)->env.xsave_buf;
-
-    if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) {
+    if (hv_vcpu_read_fpstate(cpu_state->hvf->fd, xsave, xsave_len)) {
         abort();
     }
 
-    x86_cpu_xrstor_all_areas(X86_CPU(cpu_state), xsave);
+    x86_cpu_xrstor_all_areas(X86_CPU(cpu_state), xsave, xsave_len);
 }
 
 void hvf_get_segments(CPUState *cpu_state)
@@ -202,17 +198,17 @@ void hvf_get_segments(CPUState *cpu_state)
     vmx_read_segment_descriptor(cpu_state, &seg, R_LDTR);
     hvf_get_segment(&env->ldt, &seg);
 
-    env->idt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT);
-    env->idt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE);
-    env->gdt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT);
-    env->gdt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE);
+    env->idt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_LIMIT);
+    env->idt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IDTR_BASE);
+    env->gdt.limit = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_LIMIT);
+    env->gdt.base = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_GDTR_BASE);
 
-    env->cr[0] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR0);
+    env->cr[0] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR0);
     env->cr[2] = 0;
-    env->cr[3] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3);
-    env->cr[4] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR4);
+    env->cr[3] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR3);
+    env->cr[4] = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_CR4);
     
-    env->efer = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER);
+    env->efer = rvmcs(cpu_state->hvf->fd, VMCS_GUEST_IA32_EFER);
 }
 
 void hvf_get_msrs(CPUState *cpu_state)
@@ -220,27 +216,27 @@ void hvf_get_msrs(CPUState *cpu_state)
     CPUX86State *env = &X86_CPU(cpu_state)->env;
     uint64_t tmp;
     
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, &tmp);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_CS, &tmp);
     env->sysenter_cs = tmp;
     
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, &tmp);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_ESP, &tmp);
     env->sysenter_esp = tmp;
 
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, &tmp);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_SYSENTER_EIP, &tmp);
     env->sysenter_eip = tmp;
 
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_STAR, &env->star);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_STAR, &env->star);
 
 #ifdef TARGET_X86_64
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_CSTAR, &env->cstar);
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, &env->kernelgsbase);
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_FMASK, &env->fmask);
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_LSTAR, &env->lstar);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_CSTAR, &env->cstar);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_KERNELGSBASE, &env->kernelgsbase);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_FMASK, &env->fmask);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_LSTAR, &env->lstar);
 #endif
 
-    hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_APICBASE, &tmp);
+    hv_vcpu_read_msr(cpu_state->hvf->fd, MSR_IA32_APICBASE, &tmp);
     
-    env->tsc = rdtscp() + rvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET);
+    env->tsc = rdtscp() + rvmcs(cpu_state->hvf->fd, VMCS_TSC_OFFSET);
 }
 
 int hvf_put_registers(CPUState *cpu_state)
@@ -248,26 +244,26 @@ int hvf_put_registers(CPUState *cpu_state)
     X86CPU *x86cpu = X86_CPU(cpu_state);
     CPUX86State *env = &x86cpu->env;
 
-    wreg(cpu_state->hvf_fd, HV_X86_RAX, env->regs[R_EAX]);
-    wreg(cpu_state->hvf_fd, HV_X86_RBX, env->regs[R_EBX]);
-    wreg(cpu_state->hvf_fd, HV_X86_RCX, env->regs[R_ECX]);
-    wreg(cpu_state->hvf_fd, HV_X86_RDX, env->regs[R_EDX]);
-    wreg(cpu_state->hvf_fd, HV_X86_RBP, env->regs[R_EBP]);
-    wreg(cpu_state->hvf_fd, HV_X86_RSP, env->regs[R_ESP]);
-    wreg(cpu_state->hvf_fd, HV_X86_RSI, env->regs[R_ESI]);
-    wreg(cpu_state->hvf_fd, HV_X86_RDI, env->regs[R_EDI]);
-    wreg(cpu_state->hvf_fd, HV_X86_R8, env->regs[8]);
-    wreg(cpu_state->hvf_fd, HV_X86_R9, env->regs[9]);
-    wreg(cpu_state->hvf_fd, HV_X86_R10, env->regs[10]);
-    wreg(cpu_state->hvf_fd, HV_X86_R11, env->regs[11]);
-    wreg(cpu_state->hvf_fd, HV_X86_R12, env->regs[12]);
-    wreg(cpu_state->hvf_fd, HV_X86_R13, env->regs[13]);
-    wreg(cpu_state->hvf_fd, HV_X86_R14, env->regs[14]);
-    wreg(cpu_state->hvf_fd, HV_X86_R15, env->regs[15]);
-    wreg(cpu_state->hvf_fd, HV_X86_RFLAGS, env->eflags);
-    wreg(cpu_state->hvf_fd, HV_X86_RIP, env->eip);
+    wreg(cpu_state->hvf->fd, HV_X86_RAX, env->regs[R_EAX]);
+    wreg(cpu_state->hvf->fd, HV_X86_RBX, env->regs[R_EBX]);
+    wreg(cpu_state->hvf->fd, HV_X86_RCX, env->regs[R_ECX]);
+    wreg(cpu_state->hvf->fd, HV_X86_RDX, env->regs[R_EDX]);
+    wreg(cpu_state->hvf->fd, HV_X86_RBP, env->regs[R_EBP]);
+    wreg(cpu_state->hvf->fd, HV_X86_RSP, env->regs[R_ESP]);
+    wreg(cpu_state->hvf->fd, HV_X86_RSI, env->regs[R_ESI]);
+    wreg(cpu_state->hvf->fd, HV_X86_RDI, env->regs[R_EDI]);
+    wreg(cpu_state->hvf->fd, HV_X86_R8, env->regs[8]);
+    wreg(cpu_state->hvf->fd, HV_X86_R9, env->regs[9]);
+    wreg(cpu_state->hvf->fd, HV_X86_R10, env->regs[10]);
+    wreg(cpu_state->hvf->fd, HV_X86_R11, env->regs[11]);
+    wreg(cpu_state->hvf->fd, HV_X86_R12, env->regs[12]);
+    wreg(cpu_state->hvf->fd, HV_X86_R13, env->regs[13]);
+    wreg(cpu_state->hvf->fd, HV_X86_R14, env->regs[14]);
+    wreg(cpu_state->hvf->fd, HV_X86_R15, env->regs[15]);
+    wreg(cpu_state->hvf->fd, HV_X86_RFLAGS, env->eflags);
+    wreg(cpu_state->hvf->fd, HV_X86_RIP, env->eip);
    
-    wreg(cpu_state->hvf_fd, HV_X86_XCR0, env->xcr0);
+    wreg(cpu_state->hvf->fd, HV_X86_XCR0, env->xcr0);
     
     hvf_put_xsave(cpu_state);
     
@@ -275,14 +271,14 @@ int hvf_put_registers(CPUState *cpu_state)
     
     hvf_put_msrs(cpu_state);
     
-    wreg(cpu_state->hvf_fd, HV_X86_DR0, env->dr[0]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR1, env->dr[1]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR2, env->dr[2]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR3, env->dr[3]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR4, env->dr[4]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR5, env->dr[5]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR6, env->dr[6]);
-    wreg(cpu_state->hvf_fd, HV_X86_DR7, env->dr[7]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR0, env->dr[0]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR1, env->dr[1]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR2, env->dr[2]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR3, env->dr[3]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR4, env->dr[4]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR5, env->dr[5]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR6, env->dr[6]);
+    wreg(cpu_state->hvf->fd, HV_X86_DR7, env->dr[7]);
     
     return 0;
 }
@@ -292,40 +288,40 @@ int hvf_get_registers(CPUState *cpu_state)
     X86CPU *x86cpu = X86_CPU(cpu_state);
     CPUX86State *env = &x86cpu->env;
 
-    env->regs[R_EAX] = rreg(cpu_state->hvf_fd, HV_X86_RAX);
-    env->regs[R_EBX] = rreg(cpu_state->hvf_fd, HV_X86_RBX);
-    env->regs[R_ECX] = rreg(cpu_state->hvf_fd, HV_X86_RCX);
-    env->regs[R_EDX] = rreg(cpu_state->hvf_fd, HV_X86_RDX);
-    env->regs[R_EBP] = rreg(cpu_state->hvf_fd, HV_X86_RBP);
-    env->regs[R_ESP] = rreg(cpu_state->hvf_fd, HV_X86_RSP);
-    env->regs[R_ESI] = rreg(cpu_state->hvf_fd, HV_X86_RSI);
-    env->regs[R_EDI] = rreg(cpu_state->hvf_fd, HV_X86_RDI);
-    env->regs[8] = rreg(cpu_state->hvf_fd, HV_X86_R8);
-    env->regs[9] = rreg(cpu_state->hvf_fd, HV_X86_R9);
-    env->regs[10] = rreg(cpu_state->hvf_fd, HV_X86_R10);
-    env->regs[11] = rreg(cpu_state->hvf_fd, HV_X86_R11);
-    env->regs[12] = rreg(cpu_state->hvf_fd, HV_X86_R12);
-    env->regs[13] = rreg(cpu_state->hvf_fd, HV_X86_R13);
-    env->regs[14] = rreg(cpu_state->hvf_fd, HV_X86_R14);
-    env->regs[15] = rreg(cpu_state->hvf_fd, HV_X86_R15);
+    env->regs[R_EAX] = rreg(cpu_state->hvf->fd, HV_X86_RAX);
+    env->regs[R_EBX] = rreg(cpu_state->hvf->fd, HV_X86_RBX);
+    env->regs[R_ECX] = rreg(cpu_state->hvf->fd, HV_X86_RCX);
+    env->regs[R_EDX] = rreg(cpu_state->hvf->fd, HV_X86_RDX);
+    env->regs[R_EBP] = rreg(cpu_state->hvf->fd, HV_X86_RBP);
+    env->regs[R_ESP] = rreg(cpu_state->hvf->fd, HV_X86_RSP);
+    env->regs[R_ESI] = rreg(cpu_state->hvf->fd, HV_X86_RSI);
+    env->regs[R_EDI] = rreg(cpu_state->hvf->fd, HV_X86_RDI);
+    env->regs[8] = rreg(cpu_state->hvf->fd, HV_X86_R8);
+    env->regs[9] = rreg(cpu_state->hvf->fd, HV_X86_R9);
+    env->regs[10] = rreg(cpu_state->hvf->fd, HV_X86_R10);
+    env->regs[11] = rreg(cpu_state->hvf->fd, HV_X86_R11);
+    env->regs[12] = rreg(cpu_state->hvf->fd, HV_X86_R12);
+    env->regs[13] = rreg(cpu_state->hvf->fd, HV_X86_R13);
+    env->regs[14] = rreg(cpu_state->hvf->fd, HV_X86_R14);
+    env->regs[15] = rreg(cpu_state->hvf->fd, HV_X86_R15);
     
-    env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS);
-    env->eip = rreg(cpu_state->hvf_fd, HV_X86_RIP);
+    env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
+    env->eip = rreg(cpu_state->hvf->fd, HV_X86_RIP);
    
     hvf_get_xsave(cpu_state);
-    env->xcr0 = rreg(cpu_state->hvf_fd, HV_X86_XCR0);
+    env->xcr0 = rreg(cpu_state->hvf->fd, HV_X86_XCR0);
     
     hvf_get_segments(cpu_state);
     hvf_get_msrs(cpu_state);
     
-    env->dr[0] = rreg(cpu_state->hvf_fd, HV_X86_DR0);
-    env->dr[1] = rreg(cpu_state->hvf_fd, HV_X86_DR1);
-    env->dr[2] = rreg(cpu_state->hvf_fd, HV_X86_DR2);
-    env->dr[3] = rreg(cpu_state->hvf_fd, HV_X86_DR3);
-    env->dr[4] = rreg(cpu_state->hvf_fd, HV_X86_DR4);
-    env->dr[5] = rreg(cpu_state->hvf_fd, HV_X86_DR5);
-    env->dr[6] = rreg(cpu_state->hvf_fd, HV_X86_DR6);
-    env->dr[7] = rreg(cpu_state->hvf_fd, HV_X86_DR7);
+    env->dr[0] = rreg(cpu_state->hvf->fd, HV_X86_DR0);
+    env->dr[1] = rreg(cpu_state->hvf->fd, HV_X86_DR1);
+    env->dr[2] = rreg(cpu_state->hvf->fd, HV_X86_DR2);
+    env->dr[3] = rreg(cpu_state->hvf->fd, HV_X86_DR3);
+    env->dr[4] = rreg(cpu_state->hvf->fd, HV_X86_DR4);
+    env->dr[5] = rreg(cpu_state->hvf->fd, HV_X86_DR5);
+    env->dr[6] = rreg(cpu_state->hvf->fd, HV_X86_DR6);
+    env->dr[7] = rreg(cpu_state->hvf->fd, HV_X86_DR7);
     
     x86_update_hflags(env);
     return 0;
@@ -334,16 +330,16 @@ int hvf_get_registers(CPUState *cpu_state)
 static void vmx_set_int_window_exiting(CPUState *cpu)
 {
      uint64_t val;
-     val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
-     wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val |
+     val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
+     wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val |
              VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
 }
 
 void vmx_clear_int_window_exiting(CPUState *cpu)
 {
      uint64_t val;
-     val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS);
-     wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val &
+     val = rvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS);
+     wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS, val &
              ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING);
 }
 
@@ -379,7 +375,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
     uint64_t info = 0;
     if (have_event) {
         info = vector | intr_type | VMCS_INTR_VALID;
-        uint64_t reason = rvmcs(cpu_state->hvf_fd, VMCS_EXIT_REASON);
+        uint64_t reason = rvmcs(cpu_state->hvf->fd, VMCS_EXIT_REASON);
         if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) {
             vmx_clear_nmi_blocking(cpu_state);
         }
@@ -388,17 +384,17 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
             info &= ~(1 << 12); /* clear undefined bit */
             if (intr_type == VMCS_INTR_T_SWINTR ||
                 intr_type == VMCS_INTR_T_SWEXCEPTION) {
-                wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INST_LENGTH, env->ins_len);
+                wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len);
             }
             
             if (env->has_error_code) {
-                wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR,
+                wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_EXCEPTION_ERROR,
                       env->error_code);
                 /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */
                 info |= VMCS_INTR_DEL_ERRCODE;
             }
             /*printf("reinject  %lx err %d\n", info, err);*/
-            wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
+            wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info);
         };
     }
 
@@ -406,7 +402,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
         if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) {
             cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI;
             info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI;
-            wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info);
+            wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, info);
         } else {
             vmx_set_nmi_window_exiting(cpu_state);
         }
@@ -418,7 +414,7 @@ bool hvf_inject_interrupts(CPUState *cpu_state)
         int line = cpu_get_pic_interrupt(&x86cpu->env);
         cpu_state->interrupt_request &= ~CPU_INTERRUPT_HARD;
         if (line >= 0) {
-            wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, line |
+            wvmcs(cpu_state->hvf->fd, VMCS_ENTRY_INTR_INFO, line |
                   VMCS_INTR_VALID | VMCS_INTR_T_HWINTR);
         }
     }
@@ -434,10 +430,13 @@ int hvf_process_events(CPUState *cpu_state)
     X86CPU *cpu = X86_CPU(cpu_state);
     CPUX86State *env = &cpu->env;
 
-    env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS);
+    if (!cpu_state->vcpu_dirty) {
+        /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
+        env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
+    }
 
     if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) {
-        hvf_cpu_synchronize_state(cpu_state);
+        cpu_synchronize_state(cpu_state);
         do_cpu_init(cpu);
     }
 
@@ -451,12 +450,12 @@ int hvf_process_events(CPUState *cpu_state)
         cpu_state->halted = 0;
     }
     if (cpu_state->interrupt_request & CPU_INTERRUPT_SIPI) {
-        hvf_cpu_synchronize_state(cpu_state);
+        cpu_synchronize_state(cpu_state);
         do_cpu_sipi(cpu);
     }
     if (cpu_state->interrupt_request & CPU_INTERRUPT_TPR) {
         cpu_state->interrupt_request &= ~CPU_INTERRUPT_TPR;
-        hvf_cpu_synchronize_state(cpu_state);
+        cpu_synchronize_state(cpu_state);
         apic_handle_tpr_access_report(cpu->apic_state, env->eip,
                                       env->tpr_access_type);
     }
diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h
index 635ab0f34e4..99ed8d608dd 100644
--- a/target/i386/hvf/x86hvf.h
+++ b/target/i386/hvf/x86hvf.h
@@ -21,8 +21,6 @@
 #include "x86_descr.h"
 
 int hvf_process_events(CPUState *);
-int hvf_put_registers(CPUState *);
-int hvf_get_registers(CPUState *);
 bool hvf_inject_interrupts(CPUState *);
 void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg,
                      SegmentCache *qseg, bool is_tr);
diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h
index e30d64b4ade..89f81afda7c 100644
--- a/target/i386/kvm/hyperv-proto.h
+++ b/target/i386/kvm/hyperv-proto.h
@@ -38,6 +38,12 @@
 #define HV_ACCESS_FREQUENCY_MSRS     (1u << 11)
 #define HV_ACCESS_REENLIGHTENMENTS_CONTROL  (1u << 13)
 
+/*
+ * HV_CPUID_FEATURES.EBX bits
+ */
+#define HV_POST_MESSAGES             (1u << 4)
+#define HV_SIGNAL_EVENTS             (1u << 5)
+
 /*
  * HV_CPUID_FEATURES.EDX bits
  */
@@ -60,6 +66,7 @@
 #define HV_APIC_ACCESS_RECOMMENDED          (1u << 3)
 #define HV_SYSTEM_RESET_RECOMMENDED         (1u << 4)
 #define HV_RELAXED_TIMING_RECOMMENDED       (1u << 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED     (1u << 9)
 #define HV_CLUSTER_IPI_RECOMMENDED          (1u << 10)
 #define HV_EX_PROCESSOR_MASKS_RECOMMENDED   (1u << 11)
 #define HV_ENLIGHTENED_VMCS_RECOMMENDED     (1u << 14)
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
new file mode 100644
index 00000000000..d95028018e8
--- /dev/null
+++ b/target/i386/kvm/kvm-cpu.c
@@ -0,0 +1,201 @@
+/*
+ * x86 KVM CPU type initialization
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "host-cpu.h"
+#include "kvm-cpu.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+
+#include "kvm_i386.h"
+#include "hw/core/accel-cpu.h"
+
+static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    /*
+     * The realize order is important, since x86_cpu_realize() checks if
+     * nothing else has been set by the user (or by accelerators) in
+     * cpu->ucode_rev and cpu->phys_bits, and updates the CPUID results in
+     * mwait.ecx.
+     * This accel realization code also assumes cpu features are already expanded.
+     *
+     * realize order:
+     *
+     * x86_cpu_realize():
+     *  -> x86_cpu_expand_features()
+     *  -> cpu_exec_realizefn():
+     *            -> accel_cpu_realizefn()
+     *               kvm_cpu_realizefn() -> host_cpu_realizefn()
+     *  -> check/update ucode_rev, phys_bits, mwait
+     */
+    if (cpu->max_features) {
+        if (enable_cpu_pm && kvm_has_waitpkg()) {
+            env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG;
+        }
+        if (cpu->ucode_rev == 0) {
+            cpu->ucode_rev =
+                kvm_arch_get_supported_msr_feature(kvm_state,
+                                                   MSR_IA32_UCODE_REV);
+        }
+    }
+    return host_cpu_realizefn(cs, errp);
+}
+
+static bool lmce_supported(void)
+{
+    uint64_t mce_cap = 0;
+
+    if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
+        return false;
+    }
+    return !!(mce_cap & MCG_LMCE_P);
+}
+
+static void kvm_cpu_max_instance_init(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    KVMState *s = kvm_state;
+
+    host_cpu_max_instance_init(cpu);
+
+    if (lmce_supported()) {
+        object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort);
+    }
+
+    env->cpuid_min_level =
+        kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
+    env->cpuid_min_xlevel =
+        kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
+    env->cpuid_min_xlevel2 =
+        kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
+}
+
+static void kvm_cpu_xsave_init(void)
+{
+    static bool first = true;
+    KVMState *s = kvm_state;
+    int i;
+
+    if (!first) {
+        return;
+    }
+    first = false;
+
+    /* x87 and SSE states are in the legacy region of the XSAVE area. */
+    x86_ext_save_areas[XSTATE_FP_BIT].offset = 0;
+    x86_ext_save_areas[XSTATE_SSE_BIT].offset = 0;
+
+    for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
+        ExtSaveArea *esa = &x86_ext_save_areas[i];
+
+        if (esa->size) {
+            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
+            if (sz != 0) {
+                assert(esa->size == sz);
+                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+            }
+        }
+    }
+}
+
+/*
+ * KVM-specific features that are automatically added/removed
+ * from cpudef models when KVM is enabled.
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ *
+ * NOTE: features can be enabled by default only if they were
+ *       already available in the oldest kernel version supported
+ *       by the KVM accelerator (see "OS requirements" section at
+ *       docs/system/target-i386.rst)
+ */
+static PropValue kvm_default_props[] = {
+    { "kvmclock", "on" },
+    { "kvm-nopiodelay", "on" },
+    { "kvm-asyncpf", "on" },
+    { "kvm-steal-time", "on" },
+    { "kvm-pv-eoi", "on" },
+    { "kvmclock-stable-bit", "on" },
+    { "x2apic", "on" },
+    { "kvm-msi-ext-dest-id", "off" },
+    { "acpi", "off" },
+    { "monitor", "off" },
+    { "svm", "off" },
+    { NULL, NULL },
+};
+
+/*
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ */
+void x86_cpu_change_kvm_default(const char *prop, const char *value)
+{
+    PropValue *pv;
+    for (pv = kvm_default_props; pv->prop; pv++) {
+        if (!strcmp(pv->prop, prop)) {
+            pv->value = value;
+            break;
+        }
+    }
+
+    /*
+     * It is valid to call this function only for properties that
+     * are already present in the kvm_default_props table.
+     */
+    assert(pv->prop);
+}
+
+static void kvm_cpu_instance_init(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
+
+    host_cpu_instance_init(cpu);
+
+    if (xcc->model) {
+        /* only applies to builtin_x86_defs cpus */
+        if (!kvm_irqchip_in_kernel()) {
+            x86_cpu_change_kvm_default("x2apic", "off");
+        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+            x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
+        }
+
+        /* Special cases not set in the X86CPUDefinition structs: */
+        x86_cpu_apply_props(cpu, kvm_default_props);
+    }
+
+    if (cpu->max_features) {
+        kvm_cpu_max_instance_init(cpu);
+    }
+
+    kvm_cpu_xsave_init();
+}
+
+static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+    acc->cpu_realizefn = kvm_cpu_realizefn;
+    acc->cpu_instance_init = kvm_cpu_instance_init;
+}
+static const TypeInfo kvm_cpu_accel_type_info = {
+    .name = ACCEL_CPU_NAME("kvm"),
+
+    .parent = TYPE_ACCEL_CPU,
+    .class_init = kvm_cpu_accel_class_init,
+    .abstract = true,
+};
+static void kvm_cpu_accel_register_types(void)
+{
+    type_register_static(&kvm_cpu_accel_type_info);
+}
+type_init(kvm_cpu_accel_register_types);
diff --git a/target/i386/kvm/kvm-cpu.h b/target/i386/kvm/kvm-cpu.h
new file mode 100644
index 00000000000..e858ca21e5c
--- /dev/null
+++ b/target/i386/kvm/kvm-cpu.h
@@ -0,0 +1,41 @@
+/*
+ * i386 KVM CPU type and functions
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef KVM_CPU_H
+#define KVM_CPU_H
+
+#ifdef CONFIG_KVM
+/*
+ * Change the value of a KVM-specific default
+ *
+ * If value is NULL, no default will be set and the original
+ * value from the CPU model table will be kept.
+ *
+ * It is valid to call this function only for properties that
+ * are already present in the kvm_default_props table.
+ */
+void x86_cpu_change_kvm_default(const char *prop, const char *value);
+
+#else /* !CONFIG_KVM */
+
+#define x86_cpu_change_kvm_default(a, b)
+
+#endif /* CONFIG_KVM */
+
+#endif /* KVM_CPU_H */
diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c
index 92f49121b8f..f6e7e4466e1 100644
--- a/target/i386/kvm/kvm-stub.c
+++ b/target/i386/kvm/kvm-stub.c
@@ -39,3 +39,8 @@ bool kvm_hv_vpindex_settable(void)
 {
     return false;
 }
+
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
+{
+    abort();
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 7fe9f527103..5a698bde19a 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -22,12 +22,13 @@
 #include "standard-headers/asm-x86/kvm_para.h"
 
 #include "cpu.h"
+#include "host-cpu.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/kvm_int.h"
 #include "sysemu/runstate.h"
 #include "kvm_i386.h"
-#include "sev_i386.h"
+#include "sev.h"
 #include "hyperv.h"
 #include "hyperv-proto.h"
 
@@ -43,7 +44,6 @@
 #include "hw/i386/intel_iommu.h"
 #include "hw/i386/x86-iommu.h"
 #include "hw/i386/e820_memory_layout.h"
-#include "sysemu/sev.h"
 
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
@@ -105,6 +105,7 @@ static bool has_msr_hv_reenlightenment;
 static bool has_msr_xss;
 static bool has_msr_umwait;
 static bool has_msr_spec_ctrl;
+static bool has_tsc_scale_msr;
 static bool has_msr_tsx_ctrl;
 static bool has_msr_virt_ssbd;
 static bool has_msr_smi_count;
@@ -128,8 +129,12 @@ static int has_exception_payload;
 static bool has_msr_mcg_ext_ctl;
 
 static struct kvm_cpuid2 *cpuid_cache;
+static struct kvm_cpuid2 *hv_cpuid_cache;
 static struct kvm_msr_list *kvm_feature_msrs;
 
+#define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */
+static RateLimit bus_lock_ratelimit_ctrl;
+
 int kvm_has_pit_state2(void)
 {
     return has_pit_state2;
@@ -288,7 +293,7 @@ static bool host_tsx_broken(void)
     int family, model, stepping;\
     char vendor[CPUID_VENDOR_SZ + 1];
 
-    host_vendor_fms(vendor, &family, &model, &stepping);
+    host_cpu_vendor_fms(vendor, &family, &model, &stepping);
 
     /* Check if we are running on a Haswell host known to have broken TSX */
     return !strcmp(vendor, CPUID_VENDOR_INTEL) &&
@@ -714,8 +719,7 @@ unsigned long kvm_arch_vcpu_id(CPUState *cs)
 
 static bool hyperv_enabled(X86CPU *cpu)
 {
-    CPUState *cs = CPU(cpu);
-    return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 &&
+    return kvm_check_extension(kvm_state, KVM_CAP_HYPERV) > 0 &&
         ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) ||
          cpu->hyperv_features || cpu->hyperv_passthrough);
 }
@@ -800,7 +804,8 @@ static bool tsc_is_stable_and_known(CPUX86State *env)
 static struct {
     const char *desc;
     struct {
-        uint32_t fw;
+        uint32_t func;
+        int reg;
         uint32_t bits;
     } flags[2];
     uint64_t dependencies;
@@ -808,68 +813,63 @@ static struct {
     [HYPERV_FEAT_RELAXED] = {
         .desc = "relaxed timing (hv-relaxed)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
-             .bits = HV_HYPERCALL_AVAILABLE},
-            {.fw = FEAT_HV_RECOMM_EAX,
+            {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
              .bits = HV_RELAXED_TIMING_RECOMMENDED}
         }
     },
     [HYPERV_FEAT_VAPIC] = {
         .desc = "virtual APIC (hv-vapic)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
-             .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE},
-            {.fw = FEAT_HV_RECOMM_EAX,
-             .bits = HV_APIC_ACCESS_RECOMMENDED}
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
+             .bits = HV_APIC_ACCESS_AVAILABLE}
         }
     },
     [HYPERV_FEAT_TIME] = {
         .desc = "clocksources (hv-time)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
-             .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE |
-             HV_REFERENCE_TSC_AVAILABLE}
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
+             .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE}
         }
     },
     [HYPERV_FEAT_CRASH] = {
         .desc = "crash MSRs (hv-crash)",
         .flags = {
-            {.fw = FEAT_HYPERV_EDX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EDX,
              .bits = HV_GUEST_CRASH_MSR_AVAILABLE}
         }
     },
     [HYPERV_FEAT_RESET] = {
         .desc = "reset MSR (hv-reset)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_RESET_AVAILABLE}
         }
     },
     [HYPERV_FEAT_VPINDEX] = {
         .desc = "VP_INDEX MSR (hv-vpindex)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_VP_INDEX_AVAILABLE}
         }
     },
     [HYPERV_FEAT_RUNTIME] = {
         .desc = "VP_RUNTIME MSR (hv-runtime)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_VP_RUNTIME_AVAILABLE}
         }
     },
     [HYPERV_FEAT_SYNIC] = {
         .desc = "synthetic interrupt controller (hv-synic)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_SYNIC_AVAILABLE}
         }
     },
     [HYPERV_FEAT_STIMER] = {
         .desc = "synthetic timers (hv-stimer)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_SYNTIMERS_AVAILABLE}
         },
         .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME)
@@ -877,23 +877,23 @@ static struct {
     [HYPERV_FEAT_FREQUENCIES] = {
         .desc = "frequency MSRs (hv-frequencies)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_ACCESS_FREQUENCY_MSRS},
-            {.fw = FEAT_HYPERV_EDX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EDX,
              .bits = HV_FREQUENCY_MSRS_AVAILABLE}
         }
     },
     [HYPERV_FEAT_REENLIGHTENMENT] = {
         .desc = "reenlightenment MSRs (hv-reenlightenment)",
         .flags = {
-            {.fw = FEAT_HYPERV_EAX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EAX,
              .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL}
         }
     },
     [HYPERV_FEAT_TLBFLUSH] = {
         .desc = "paravirtualized TLB flush (hv-tlbflush)",
         .flags = {
-            {.fw = FEAT_HV_RECOMM_EAX,
+            {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
              .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED |
              HV_EX_PROCESSOR_MASKS_RECOMMENDED}
         },
@@ -902,7 +902,7 @@ static struct {
     [HYPERV_FEAT_EVMCS] = {
         .desc = "enlightened VMCS (hv-evmcs)",
         .flags = {
-            {.fw = FEAT_HV_RECOMM_EAX,
+            {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
              .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED}
         },
         .dependencies = BIT(HYPERV_FEAT_VAPIC)
@@ -910,7 +910,7 @@ static struct {
     [HYPERV_FEAT_IPI] = {
         .desc = "paravirtualized IPI (hv-ipi)",
         .flags = {
-            {.fw = FEAT_HV_RECOMM_EAX,
+            {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
              .bits = HV_CLUSTER_IPI_RECOMMENDED |
              HV_EX_PROCESSOR_MASKS_RECOMMENDED}
         },
@@ -919,14 +919,22 @@ static struct {
     [HYPERV_FEAT_STIMER_DIRECT] = {
         .desc = "direct mode synthetic timers (hv-stimer-direct)",
         .flags = {
-            {.fw = FEAT_HYPERV_EDX,
+            {.func = HV_CPUID_FEATURES, .reg = R_EDX,
              .bits = HV_STIMER_DIRECT_MODE_AVAILABLE}
         },
         .dependencies = BIT(HYPERV_FEAT_STIMER)
     },
+    [HYPERV_FEAT_AVIC] = {
+        .desc = "AVIC/APICv support (hv-avic/hv-apicv)",
+        .flags = {
+            {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
+             .bits = HV_DEPRECATING_AEOI_RECOMMENDED}
+        }
+    },
 };
 
-static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max)
+static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
+                                           bool do_sys_ioctl)
 {
     struct kvm_cpuid2 *cpuid;
     int r, size;
@@ -935,7 +943,11 @@ static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max)
     cpuid = g_malloc0(size);
     cpuid->nent = max;
 
-    r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+    if (do_sys_ioctl) {
+        r = kvm_ioctl(kvm_state, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+    } else {
+        r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+    }
     if (r == 0 && cpuid->nent >= max) {
         r = -E2BIG;
     }
@@ -959,16 +971,44 @@ static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max)
 static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
 {
     struct kvm_cpuid2 *cpuid;
-    int max = 7; /* 0x40000000..0x40000005, 0x4000000A */
+    /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000080 leaves */
+    int max = 10;
+    int i;
+    bool do_sys_ioctl;
+
+    do_sys_ioctl =
+        kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID) > 0;
+
+    /*
+     * Non-empty KVM context is needed when KVM_CAP_SYS_HYPERV_CPUID is
+     * unsupported, kvm_hyperv_expand_features() checks for that.
+     */
+    assert(do_sys_ioctl || cs->kvm_state);
 
     /*
      * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with
      * -E2BIG, however, it doesn't report back the right size. Keep increasing
      * it and re-trying until we succeed.
      */
-    while ((cpuid = try_get_hv_cpuid(cs, max)) == NULL) {
+    while ((cpuid = try_get_hv_cpuid(cs, max, do_sys_ioctl)) == NULL) {
         max++;
     }
+
+    /*
+     * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before
+     * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the
+     * information early, just check for the capability and set the bit
+     * manually.
+     */
+    if (!do_sys_ioctl && kvm_check_extension(cs->kvm_state,
+                            KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
+        for (i = 0; i < cpuid->nent; i++) {
+            if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
+                cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
+            }
+        }
+    }
+
     return cpuid;
 }
 
@@ -1065,254 +1105,238 @@ static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs)
     return cpuid;
 }
 
-static int hv_cpuid_get_fw(struct kvm_cpuid2 *cpuid, int fw, uint32_t *r)
+static uint32_t hv_cpuid_get_host(CPUState *cs, uint32_t func, int reg)
 {
     struct kvm_cpuid_entry2 *entry;
-    uint32_t func;
-    int reg;
+    struct kvm_cpuid2 *cpuid;
 
-    switch (fw) {
-    case FEAT_HYPERV_EAX:
-        reg = R_EAX;
-        func = HV_CPUID_FEATURES;
-        break;
-    case FEAT_HYPERV_EDX:
-        reg = R_EDX;
-        func = HV_CPUID_FEATURES;
-        break;
-    case FEAT_HV_RECOMM_EAX:
-        reg = R_EAX;
-        func = HV_CPUID_ENLIGHTMENT_INFO;
-        break;
-    default:
-        return -EINVAL;
+    if (hv_cpuid_cache) {
+        cpuid = hv_cpuid_cache;
+    } else {
+        if (kvm_check_extension(kvm_state, KVM_CAP_HYPERV_CPUID) > 0) {
+            cpuid = get_supported_hv_cpuid(cs);
+        } else {
+            /*
+             * 'cs->kvm_state' may be NULL when Hyper-V features are expanded
+             * before KVM context is created but this is only done when
+             * KVM_CAP_SYS_HYPERV_CPUID is supported and it implies
+             * KVM_CAP_HYPERV_CPUID.
+             */
+            assert(cs->kvm_state);
+
+            cpuid = get_supported_hv_cpuid_legacy(cs);
+        }
+        hv_cpuid_cache = cpuid;
+    }
+
+    if (!cpuid) {
+        return 0;
     }
 
     entry = cpuid_find_entry(cpuid, func, 0);
     if (!entry) {
-        return -ENOENT;
+        return 0;
     }
 
-    switch (reg) {
-    case R_EAX:
-        *r = entry->eax;
-        break;
-    case R_EDX:
-        *r = entry->edx;
-        break;
-    default:
-        return -EINVAL;
+    return cpuid_entry_get_reg(entry, reg);
+}
+
+static bool hyperv_feature_supported(CPUState *cs, int feature)
+{
+    uint32_t func, bits;
+    int i, reg;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) {
+
+        func = kvm_hyperv_properties[feature].flags[i].func;
+        reg = kvm_hyperv_properties[feature].flags[i].reg;
+        bits = kvm_hyperv_properties[feature].flags[i].bits;
+
+        if (!func) {
+            continue;
+        }
+
+        if ((hv_cpuid_get_host(cs, func, reg) & bits) != bits) {
+            return false;
+        }
     }
 
-    return 0;
+    return true;
 }
 
-static int hv_cpuid_check_and_set(CPUState *cs, struct kvm_cpuid2 *cpuid,
-                                  int feature)
+/* Checks that all feature dependencies are enabled */
+static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp)
 {
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    uint32_t r, fw, bits;
     uint64_t deps;
-    int i, dep_feat;
-
-    if (!hyperv_feat_enabled(cpu, feature) && !cpu->hyperv_passthrough) {
-        return 0;
-    }
+    int dep_feat;
 
     deps = kvm_hyperv_properties[feature].dependencies;
     while (deps) {
         dep_feat = ctz64(deps);
         if (!(hyperv_feat_enabled(cpu, dep_feat))) {
-                fprintf(stderr,
-                        "Hyper-V %s requires Hyper-V %s\n",
-                        kvm_hyperv_properties[feature].desc,
-                        kvm_hyperv_properties[dep_feat].desc);
-                return 1;
+            error_setg(errp, "Hyper-V %s requires Hyper-V %s",
+                       kvm_hyperv_properties[feature].desc,
+                       kvm_hyperv_properties[dep_feat].desc);
+            return false;
         }
         deps &= ~(1ull << dep_feat);
     }
 
-    for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) {
-        fw = kvm_hyperv_properties[feature].flags[i].fw;
-        bits = kvm_hyperv_properties[feature].flags[i].bits;
+    return true;
+}
 
-        if (!fw) {
+static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    uint32_t r = 0;
+    int i, j;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties); i++) {
+        if (!hyperv_feat_enabled(cpu, i)) {
             continue;
         }
 
-        if (hv_cpuid_get_fw(cpuid, fw, &r) || (r & bits) != bits) {
-            if (hyperv_feat_enabled(cpu, feature)) {
-                fprintf(stderr,
-                        "Hyper-V %s is not supported by kernel\n",
-                        kvm_hyperv_properties[feature].desc);
-                return 1;
-            } else {
-                return 0;
+        for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) {
+            if (kvm_hyperv_properties[i].flags[j].func != func) {
+                continue;
+            }
+            if (kvm_hyperv_properties[i].flags[j].reg != reg) {
+                continue;
             }
-        }
-
-        env->features[fw] |= bits;
-    }
 
-    if (cpu->hyperv_passthrough) {
-        cpu->hyperv_features |= BIT(feature);
+            r |= kvm_hyperv_properties[i].flags[j].bits;
+        }
     }
 
-    return 0;
+    return r;
 }
 
 /*
- * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent in
- * case of success, errno < 0 in case of failure and 0 when no Hyper-V
- * extentions are enabled.
+ * Expand Hyper-V CPU features. In partucular, check that all the requested
+ * features are supported by the host and the sanity of the configuration
+ * (that all the required dependencies are included). Also, this takes care
+ * of 'hv_passthrough' mode and fills the environment with all supported
+ * Hyper-V features.
  */
-static int hyperv_handle_properties(CPUState *cs,
-                                    struct kvm_cpuid_entry2 *cpuid_ent)
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
 {
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    struct kvm_cpuid2 *cpuid;
-    struct kvm_cpuid_entry2 *c;
-    uint32_t cpuid_i = 0;
-    int r;
+    CPUState *cs = CPU(cpu);
+    Error *local_err = NULL;
+    int feat;
 
     if (!hyperv_enabled(cpu))
-        return 0;
-
-    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ||
-        cpu->hyperv_passthrough) {
-        uint16_t evmcs_version;
-
-        r = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
-                                (uintptr_t)&evmcs_version);
-
-        if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) && r) {
-            fprintf(stderr, "Hyper-V %s is not supported by kernel\n",
-                    kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
-            return -ENOSYS;
-        }
+        return true;
 
-        if (!r) {
-            env->features[FEAT_HV_RECOMM_EAX] |=
-                HV_ENLIGHTENED_VMCS_RECOMMENDED;
-            env->features[FEAT_HV_NESTED_EAX] = evmcs_version;
-        }
-    }
-
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_CPUID) > 0) {
-        cpuid = get_supported_hv_cpuid(cs);
-    } else {
-        cpuid = get_supported_hv_cpuid_legacy(cs);
-    }
+    /*
+     * When kvm_hyperv_expand_features is called at CPU feature expansion
+     * time per-CPU kvm_state is not available yet so we can only proceed
+     * when KVM_CAP_SYS_HYPERV_CPUID is supported.
+     */
+    if (!cs->kvm_state &&
+        !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID))
+        return true;
 
     if (cpu->hyperv_passthrough) {
-        memcpy(cpuid_ent, &cpuid->entries[0],
-               cpuid->nent * sizeof(cpuid->entries[0]));
-
-        c = cpuid_find_entry(cpuid, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 0);
-        if (c) {
-            cpu->hyperv_vendor_id[0] = c->ebx;
-            cpu->hyperv_vendor_id[1] = c->ecx;
-            cpu->hyperv_vendor_id[2] = c->edx;
-        }
-
-        c = cpuid_find_entry(cpuid, HV_CPUID_INTERFACE, 0);
-        if (c) {
-            cpu->hyperv_interface_id[0] = c->eax;
-            cpu->hyperv_interface_id[1] = c->ebx;
-            cpu->hyperv_interface_id[2] = c->ecx;
-            cpu->hyperv_interface_id[3] = c->edx;
-        }
-
-        c = cpuid_find_entry(cpuid, HV_CPUID_VERSION, 0);
-        if (c) {
-            cpu->hyperv_version_id[0] = c->eax;
-            cpu->hyperv_version_id[1] = c->ebx;
-            cpu->hyperv_version_id[2] = c->ecx;
-            cpu->hyperv_version_id[3] = c->edx;
-        }
+        cpu->hyperv_vendor_id[0] =
+            hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
+        cpu->hyperv_vendor_id[1] =
+            hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_ECX);
+        cpu->hyperv_vendor_id[2] =
+            hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EDX);
+        cpu->hyperv_vendor = g_realloc(cpu->hyperv_vendor,
+                                       sizeof(cpu->hyperv_vendor_id) + 1);
+        memcpy(cpu->hyperv_vendor, cpu->hyperv_vendor_id,
+               sizeof(cpu->hyperv_vendor_id));
+        cpu->hyperv_vendor[sizeof(cpu->hyperv_vendor_id)] = 0;
+
+        cpu->hyperv_interface_id[0] =
+            hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EAX);
+        cpu->hyperv_interface_id[1] =
+            hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EBX);
+        cpu->hyperv_interface_id[2] =
+            hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_ECX);
+        cpu->hyperv_interface_id[3] =
+            hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EDX);
+
+        cpu->hyperv_ver_id_build =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EAX);
+        cpu->hyperv_ver_id_major =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) >> 16;
+        cpu->hyperv_ver_id_minor =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) & 0xffff;
+        cpu->hyperv_ver_id_sp =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_ECX);
+        cpu->hyperv_ver_id_sb =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) >> 24;
+        cpu->hyperv_ver_id_sn =
+            hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) & 0xffffff;
+
+        cpu->hv_max_vps = hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS,
+                                            R_EAX);
+        cpu->hyperv_limits[0] =
+            hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EBX);
+        cpu->hyperv_limits[1] =
+            hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_ECX);
+        cpu->hyperv_limits[2] =
+            hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EDX);
+
+        cpu->hyperv_spinlock_attempts =
+            hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX);
 
-        c = cpuid_find_entry(cpuid, HV_CPUID_FEATURES, 0);
-        if (c) {
-            env->features[FEAT_HYPERV_EAX] = c->eax;
-            env->features[FEAT_HYPERV_EBX] = c->ebx;
-            env->features[FEAT_HYPERV_EDX] = c->edx;
-        }
-
-        c = cpuid_find_entry(cpuid, HV_CPUID_IMPLEMENT_LIMITS, 0);
-        if (c) {
-            cpu->hv_max_vps = c->eax;
-            cpu->hyperv_limits[0] = c->ebx;
-            cpu->hyperv_limits[1] = c->ecx;
-            cpu->hyperv_limits[2] = c->edx;
+        /*
+         * Mark feature as enabled in 'cpu->hyperv_features' as
+         * hv_build_cpuid_leaf() uses this info to build guest CPUIDs.
+         */
+        for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
+            if (hyperv_feature_supported(cs, feat)) {
+                cpu->hyperv_features |= BIT(feat);
+            }
         }
+    } else {
+        /* Check features availability and dependencies */
+        for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
+            /* If the feature was not requested skip it. */
+            if (!hyperv_feat_enabled(cpu, feat)) {
+                continue;
+            }
 
-        c = cpuid_find_entry(cpuid, HV_CPUID_ENLIGHTMENT_INFO, 0);
-        if (c) {
-            env->features[FEAT_HV_RECOMM_EAX] = c->eax;
+            /* Check if the feature is supported by KVM */
+            if (!hyperv_feature_supported(cs, feat)) {
+                error_setg(errp, "Hyper-V %s is not supported by kernel",
+                           kvm_hyperv_properties[feat].desc);
+                return false;
+            }
 
-            /* hv-spinlocks may have been overriden */
-            if (cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) {
-                c->ebx = cpu->hyperv_spinlock_attempts;
+            /* Check dependencies */
+            if (!hv_feature_check_deps(cpu, feat, &local_err)) {
+                error_propagate(errp, local_err);
+                return false;
             }
         }
-        c = cpuid_find_entry(cpuid, HV_CPUID_NESTED_FEATURES, 0);
-        if (c) {
-            env->features[FEAT_HV_NESTED_EAX] = c->eax;
-        }
     }
 
-    if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) {
-        env->features[FEAT_HV_RECOMM_EAX] |= HV_NO_NONARCH_CORESHARING;
-    } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) {
-        c = cpuid_find_entry(cpuid, HV_CPUID_ENLIGHTMENT_INFO, 0);
-        if (c) {
-            env->features[FEAT_HV_RECOMM_EAX] |=
-                c->eax & HV_NO_NONARCH_CORESHARING;
-        }
-    }
-
-    /* Features */
-    r = hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_RELAXED);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_VAPIC);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_TIME);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_CRASH);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_RESET);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_VPINDEX);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_RUNTIME);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_SYNIC);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_STIMER);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_FREQUENCIES);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_REENLIGHTENMENT);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_TLBFLUSH);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_EVMCS);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_IPI);
-    r |= hv_cpuid_check_and_set(cs, cpuid, HYPERV_FEAT_STIMER_DIRECT);
-
     /* Additional dependencies not covered by kvm_hyperv_properties[] */
     if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
         !cpu->hyperv_synic_kvm_only &&
         !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) {
-        fprintf(stderr, "Hyper-V %s requires Hyper-V %s\n",
-                kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc,
-                kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc);
-        r |= 1;
+        error_setg(errp, "Hyper-V %s requires Hyper-V %s",
+                   kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc,
+                   kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc);
+        return false;
     }
 
-    /* Not exposed by KVM but needed to make CPU hotplug in Windows work */
-    env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
-
-    if (r) {
-        r = -ENOSYS;
-        goto free;
-    }
+    return true;
+}
 
-    if (cpu->hyperv_passthrough) {
-        /* We already copied all feature words from KVM as is */
-        r = cpuid->nent;
-        goto free;
-    }
+/*
+ * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent.
+ */
+static int hyperv_fill_cpuids(CPUState *cs,
+                              struct kvm_cpuid_entry2 *cpuid_ent)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    struct kvm_cpuid_entry2 *c;
+    uint32_t cpuid_i = 0;
 
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
@@ -1331,22 +1355,49 @@ static int hyperv_handle_properties(CPUState *cs,
 
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_VERSION;
-    c->eax = cpu->hyperv_version_id[0];
-    c->ebx = cpu->hyperv_version_id[1];
-    c->ecx = cpu->hyperv_version_id[2];
-    c->edx = cpu->hyperv_version_id[3];
+    c->eax = cpu->hyperv_ver_id_build;
+    c->ebx = (uint32_t)cpu->hyperv_ver_id_major << 16 |
+        cpu->hyperv_ver_id_minor;
+    c->ecx = cpu->hyperv_ver_id_sp;
+    c->edx = (uint32_t)cpu->hyperv_ver_id_sb << 24 |
+        (cpu->hyperv_ver_id_sn & 0xffffff);
 
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_FEATURES;
-    c->eax = env->features[FEAT_HYPERV_EAX];
-    c->ebx = env->features[FEAT_HYPERV_EBX];
-    c->edx = env->features[FEAT_HYPERV_EDX];
+    c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EAX);
+    c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX);
+    c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX);
+
+    /* Unconditionally required with any Hyper-V enlightenment */
+    c->eax |= HV_HYPERCALL_AVAILABLE;
+
+    /* SynIC and Vmbus devices require messages/signals hypercalls */
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
+        !cpu->hyperv_synic_kvm_only) {
+        c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS;
+    }
+
+
+    /* Not exposed by KVM but needed to make CPU hotplug in Windows work */
+    c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
 
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_ENLIGHTMENT_INFO;
-    c->eax = env->features[FEAT_HV_RECOMM_EAX];
+    c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX);
     c->ebx = cpu->hyperv_spinlock_attempts;
 
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) &&
+        !hyperv_feat_enabled(cpu, HYPERV_FEAT_AVIC)) {
+        c->eax |= HV_APIC_ACCESS_RECOMMENDED;
+    }
+
+    if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) {
+        c->eax |= HV_NO_NONARCH_CORESHARING;
+    } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) {
+        c->eax |= hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX) &
+            HV_NO_NONARCH_CORESHARING;
+    }
+
     c = &cpuid_ent[cpuid_i++];
     c->function = HV_CPUID_IMPLEMENT_LIMITS;
     c->eax = cpu->hv_max_vps;
@@ -1366,19 +1417,30 @@ static int hyperv_handle_properties(CPUState *cs,
 
         c = &cpuid_ent[cpuid_i++];
         c->function = HV_CPUID_NESTED_FEATURES;
-        c->eax = env->features[FEAT_HV_NESTED_EAX];
+        c->eax = cpu->hyperv_nested[0];
     }
-    r = cpuid_i;
 
-free:
-    g_free(cpuid);
-
-    return r;
+    return cpuid_i;
 }
 
 static Error *hv_passthrough_mig_blocker;
 static Error *hv_no_nonarch_cs_mig_blocker;
 
+/* Checks that the exposed eVMCS version range is supported by KVM */
+static bool evmcs_version_supported(uint16_t evmcs_version,
+                                    uint16_t supported_evmcs_version)
+{
+    uint8_t min_version = evmcs_version & 0xff;
+    uint8_t max_version = evmcs_version >> 8;
+    uint8_t min_supported_version = supported_evmcs_version & 0xff;
+    uint8_t max_supported_version = supported_evmcs_version >> 8;
+
+    return (min_version >= min_supported_version) &&
+        (max_version <= max_supported_version);
+}
+
+#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
+
 static int hyperv_init_vcpu(X86CPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -1390,9 +1452,8 @@ static int hyperv_init_vcpu(X86CPU *cpu)
                    "'hv-passthrough' CPU flag prevents migration, use explicit"
                    " set of hv-* flags instead");
         ret = migrate_add_blocker(hv_passthrough_mig_blocker, &local_err);
-        if (local_err) {
+        if (ret < 0) {
             error_report_err(local_err);
-            error_free(hv_passthrough_mig_blocker);
             return ret;
         }
     }
@@ -1405,9 +1466,8 @@ static int hyperv_init_vcpu(X86CPU *cpu)
                    " make sure SMT is disabled and/or that vCPUs are properly"
                    " pinned)");
         ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, &local_err);
-        if (local_err) {
+        if (ret < 0) {
             error_report_err(local_err);
-            error_free(hv_no_nonarch_cs_mig_blocker);
             return ret;
         }
     }
@@ -1457,6 +1517,46 @@ static int hyperv_init_vcpu(X86CPU *cpu)
         }
     }
 
+    if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
+        uint16_t evmcs_version = DEFAULT_EVMCS_VERSION;
+        uint16_t supported_evmcs_version;
+
+        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
+                                  (uintptr_t)&supported_evmcs_version);
+
+        /*
+         * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs'
+         * option sets. Note: we hardcode the maximum supported eVMCS version
+         * to '1' as well so 'hv-evmcs' feature is migratable even when (and if)
+         * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have
+         * to be added.
+         */
+        if (ret < 0) {
+            error_report("Hyper-V %s is not supported by kernel",
+                         kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
+            return ret;
+        }
+
+        if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) {
+            error_report("eVMCS version range [%d..%d] is not supported by "
+                         "kernel (supported: [%d..%d])", evmcs_version & 0xff,
+                         evmcs_version >> 8, supported_evmcs_version & 0xff,
+                         supported_evmcs_version >> 8);
+            return -ENOTSUP;
+        }
+
+        cpu->hyperv_nested[0] = evmcs_version;
+    }
+
+    if (cpu->hyperv_enforce_cpuid) {
+        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENFORCE_CPUID, 0, 1);
+        if (ret < 0) {
+            error_report("failed to enable KVM_CAP_HYPERV_ENFORCE_CPUID: %s",
+                         strerror(-ret));
+            return ret;
+        }
+    }
+
     return 0;
 }
 
@@ -1514,12 +1614,26 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
 
-    /* Paravirtualization CPUIDs */
-    r = hyperv_handle_properties(cs, cpuid_data.entries);
-    if (r < 0) {
-        return r;
-    } else if (r > 0) {
-        cpuid_i = r;
+    /*
+     * kvm_hyperv_expand_features() is called here for the second time in case
+     * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
+     * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
+     * check which Hyper-V enlightenments are supported and which are not, we
+     * can still proceed and check/expand Hyper-V enlightenments here so legacy
+     * behavior is preserved.
+     */
+    if (!kvm_hyperv_expand_features(cpu, &local_err)) {
+        error_report_err(local_err);
+        return -ENOSYS;
+    }
+
+    if (hyperv_enabled(cpu)) {
+        r = hyperv_init_vcpu(cpu);
+        if (r) {
+            return r;
+        }
+
+        cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
         kvm_base = KVM_CPUID_SIGNATURE_NEXT;
         has_msr_hv_hypercall = true;
     }
@@ -1541,6 +1655,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
+    if (cpu->kvm_pv_enforce_cpuid) {
+        r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
+        if (r < 0) {
+            fprintf(stderr,
+                    "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
+                    strerror(-r));
+            abort();
+        }
+    }
+
     for (i = 0; i <= limit; i++) {
         if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
             fprintf(stderr, "unsupported level value: 0x%x\n", limit);
@@ -1615,6 +1739,25 @@ int kvm_arch_init_vcpu(CPUState *cs)
             }
             break;
         case 0x7:
+        case 0x12:
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (j > 1 && (c->eax & 0xf) != 1) {
+                    break;
+                }
+
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                                "cpuid(eax:0x12,ecx:0x%x)\n", j);
+                    abort();
+                }
+                c = &cpuid_data.entries[cpuid_i++];
+            }
+            break;
         case 0x14: {
             uint32_t times;
 
@@ -1789,6 +1932,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
                                   !!(c->ecx & CPUID_EXT_SMX);
     }
 
+    c = cpuid_find_entry(&cpuid_data.cpuid, 7, 0);
+    if (c && (c->ebx & CPUID_7_0_EBX_SGX)) {
+        has_msr_feature_control = true;
+    }
+
     if (env->mcg_cap & MCG_LMCE_P) {
         has_msr_mcg_ext_ctl = has_msr_feature_control = true;
     }
@@ -1800,9 +1948,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
                        "State blocked by non-migratable CPU device"
                        " (invtsc flag)");
             r = migrate_add_blocker(invtsc_mig_blocker, &local_err);
-            if (local_err) {
+            if (r < 0) {
                 error_report_err(local_err);
-                error_free(invtsc_mig_blocker);
                 return r;
             }
         }
@@ -1835,8 +1982,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
     }
 
     if (has_xsave) {
-        env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
-        memset(env->xsave_buf, 0, sizeof(struct kvm_xsave));
+        env->xsave_buf_len = sizeof(struct kvm_xsave);
+        env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+        memset(env->xsave_buf, 0, env->xsave_buf_len);
+
+        /*
+         * The allocated storage must be large enough for all of the
+         * possible XSAVE state components.
+         */
+        assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
+               <= env->xsave_buf_len);
     }
 
     max_nested_state_len = kvm_max_nested_state_length();
@@ -1868,11 +2023,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     kvm_init_msrs(cpu);
 
-    r = hyperv_init_vcpu(cpu);
-    if (r) {
-        goto fail;
-    }
-
     return 0;
 
  fail:
@@ -2067,6 +2217,9 @@ static int kvm_get_supported_msrs(KVMState *s)
             case MSR_IA32_SPEC_CTRL:
                 has_msr_spec_ctrl = true;
                 break;
+            case MSR_AMD64_TSC_RATIO:
+                has_tsc_scale_msr = true;
+                break;
             case MSR_IA32_TSX_CTRL:
                 has_msr_tsx_ctrl = true;
                 break;
@@ -2134,7 +2287,7 @@ static void register_smram_listener(Notifier *n, void *unused)
 
     address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM");
     kvm_memory_listener_register(kvm_state, &smram_listener,
-                                 &smram_address_space, 1);
+                                 &smram_address_space, 1, "kvm-smram");
 }
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -2267,6 +2420,28 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
 
+    if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
+        X86MachineState *x86ms = X86_MACHINE(ms);
+
+        if (x86ms->bus_lock_ratelimit > 0) {
+            ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
+            if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
+                error_report("kvm: bus lock detection unsupported");
+                return -ENOTSUP;
+            }
+            ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
+                                    KVM_BUS_LOCK_DETECTION_EXIT);
+            if (ret < 0) {
+                error_report("kvm: Failed to enable bus lock detection cap: %s",
+                             strerror(-ret));
+                return ret;
+            }
+            ratelimit_init(&bus_lock_ratelimit_ctrl);
+            ratelimit_set_speed(&bus_lock_ratelimit_ctrl,
+                                x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME);
+        }
+    }
+
     return 0;
 }
 
@@ -2396,54 +2571,15 @@ static int kvm_put_fpu(X86CPU *cpu)
     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
 }
 
-#define XSAVE_FCW_FSW     0
-#define XSAVE_FTW_FOP     1
-#define XSAVE_CWD_RIP     2
-#define XSAVE_CWD_RDP     4
-#define XSAVE_MXCSR       6
-#define XSAVE_ST_SPACE    8
-#define XSAVE_XMM_SPACE   40
-#define XSAVE_XSTATE_BV   128
-#define XSAVE_YMMH_SPACE  144
-#define XSAVE_BNDREGS     240
-#define XSAVE_BNDCSR      256
-#define XSAVE_OPMASK      272
-#define XSAVE_ZMM_Hi256   288
-#define XSAVE_Hi16_ZMM    416
-#define XSAVE_PKRU        672
-
-#define XSAVE_BYTE_OFFSET(word_offset) \
-    ((word_offset) * sizeof_field(struct kvm_xsave, region[0]))
-
-#define ASSERT_OFFSET(word_offset, field) \
-    QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \
-                      offsetof(X86XSaveArea, field))
-
-ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw);
-ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw);
-ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip);
-ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp);
-ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr);
-ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs);
-ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs);
-ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv);
-ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state);
-ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state);
-ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state);
-ASSERT_OFFSET(XSAVE_OPMASK, opmask_state);
-ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state);
-ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state);
-ASSERT_OFFSET(XSAVE_PKRU, pkru_state);
-
 static int kvm_put_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    X86XSaveArea *xsave = env->xsave_buf;
+    void *xsave = env->xsave_buf;
 
     if (!has_xsave) {
         return kvm_put_fpu(cpu);
     }
-    x86_cpu_xsave_all_areas(cpu, xsave);
+    x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len);
 
     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
 }
@@ -2630,8 +2766,6 @@ static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features)
     return must_be_one | (((uint64_t)can_be_one) << 32);
 }
 
-#define VMCS12_MAX_FIELD_INDEX (0x17)
-
 static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f)
 {
     uint64_t kvm_vmx_basic =
@@ -2721,8 +2855,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f)
                       CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK);
     kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0,
                       CR4_VMXE_MASK);
-    kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM,
-                      VMCS12_MAX_FIELD_INDEX << 1);
+
+    if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) {
+        /* TSC multiplier (0x2032).  */
+        kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32);
+    } else {
+        /* Preemption timer (0x482E).  */
+        kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x2E);
+    }
 }
 
 static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
@@ -2836,6 +2976,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
     if (has_msr_spec_ctrl) {
         kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl);
     }
+    if (has_tsc_scale_msr) {
+        kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr);
+    }
+
     if (has_msr_tsx_ctrl) {
         kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl);
     }
@@ -3030,6 +3174,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
             }
         }
 
+        if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
+            kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0,
+                              env->msr_ia32_sgxlepubkeyhash[0]);
+            kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1,
+                              env->msr_ia32_sgxlepubkeyhash[1]);
+            kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2,
+                              env->msr_ia32_sgxlepubkeyhash[2]);
+            kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3,
+                              env->msr_ia32_sgxlepubkeyhash[3]);
+        }
+
         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
          *       kvm_put_msr_feature_control. */
     }
@@ -3084,7 +3239,7 @@ static int kvm_get_fpu(X86CPU *cpu)
 static int kvm_get_xsave(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
-    X86XSaveArea *xsave = env->xsave_buf;
+    void *xsave = env->xsave_buf;
     int ret;
 
     if (!has_xsave) {
@@ -3095,7 +3250,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     if (ret < 0) {
         return ret;
     }
-    x86_cpu_xrstor_all_areas(cpu, xsave);
+    x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len);
 
     return 0;
 }
@@ -3230,6 +3385,10 @@ static int kvm_get_msrs(X86CPU *cpu)
     if (has_msr_spec_ctrl) {
         kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0);
     }
+    if (has_tsc_scale_msr) {
+        kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, 0);
+    }
+
     if (has_msr_tsx_ctrl) {
         kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0);
     }
@@ -3369,6 +3528,13 @@ static int kvm_get_msrs(X86CPU *cpu)
         }
     }
 
+    if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
+        kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
+    }
+
     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
     if (ret < 0) {
         return ret;
@@ -3634,6 +3800,9 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_IA32_SPEC_CTRL:
             env->spec_ctrl = msrs[i].data;
             break;
+        case MSR_AMD64_TSC_RATIO:
+            env->amd_tsc_scale_msr = msrs[i].data;
+            break;
         case MSR_IA32_TSX_CTRL:
             env->tsx_ctrl = msrs[i].data;
             break;
@@ -3658,6 +3827,10 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
             env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data;
             break;
+        case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
+            env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
+                           msrs[i].data;
+            break;
         }
     }
 
@@ -4221,6 +4394,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
     }
 }
 
+static void kvm_rate_limit_on_bus_lock(void)
+{
+    uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1);
+
+    if (delay_ns) {
+        g_usleep(delay_ns / SCALE_US);
+    }
+}
+
 MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
@@ -4236,6 +4418,9 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
     } else {
         env->eflags &= ~IF_MASK;
     }
+    if (run->flags & KVM_RUN_X86_BUS_LOCK) {
+        kvm_rate_limit_on_bus_lock();
+    }
 
     /* We need to protect the apic state against concurrent accesses from
      * different threads in case the userspace irqchip is used. */
@@ -4528,6 +4713,35 @@ void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
     }
 }
 
+static bool has_sgx_provisioning;
+
+static bool __kvm_enable_sgx_provisioning(KVMState *s)
+{
+    int fd, ret;
+
+    if (!kvm_vm_check_extension(s, KVM_CAP_SGX_ATTRIBUTE)) {
+        return false;
+    }
+
+    fd = qemu_open_old("/dev/sgx_provision", O_RDONLY);
+    if (fd < 0) {
+        return false;
+    }
+
+    ret = kvm_vm_enable_cap(s, KVM_CAP_SGX_ATTRIBUTE, 0, fd);
+    if (ret) {
+        error_report("Could not enable SGX PROVISIONKEY: %s", strerror(-ret));
+        exit(1);
+    }
+    close(fd);
+    return true;
+}
+
+bool kvm_enable_sgx_provisioning(KVMState *s)
+{
+    return MEMORIZE(__kvm_enable_sgx_provisioning(s), has_sgx_provisioning);
+}
+
 static bool host_supports_vmx(void)
 {
     uint32_t ecx, unused;
@@ -4594,6 +4808,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ioapic_eoi_broadcast(run->eoi.vector);
         ret = 0;
         break;
+    case KVM_EXIT_X86_BUS_LOCK:
+        /* already handled in kvm_arch_post_run */
+        ret = 0;
+        break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index dc725083891..a978509d507 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -47,7 +47,10 @@ bool kvm_has_x2apic_api(void);
 bool kvm_has_waitpkg(void);
 
 bool kvm_hv_vpindex_settable(void);
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
 
 uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
 
+bool kvm_enable_sgx_provisioning(KVMState *s);
+
 #endif
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 1d66559187f..736df8b72e3 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -1,3 +1,14 @@
 i386_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
-i386_softmmu_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
+
+i386_softmmu_kvm_ss = ss.source_set()
+
+i386_softmmu_kvm_ss.add(files(
+  'kvm.c',
+  'kvm-cpu.c',
+))
+
+i386_softmmu_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
+
 i386_softmmu_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
+
+i386_softmmu_ss.add_all(when: 'CONFIG_KVM', if_true: i386_softmmu_kvm_ss)
diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c
new file mode 100644
index 00000000000..6080c007a2e
--- /dev/null
+++ b/target/i386/kvm/sev-stub.c
@@ -0,0 +1,22 @@
+/*
+ * QEMU SEV stub
+ *
+ * Copyright Advanced Micro Devices 2018
+ *
+ * Authors:
+ *      Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sev.h"
+
+int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+    /* If we get here, cgs must be some non-SEV thing */
+    return 0;
+}
diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events
index b4e2d9e4ea9..7c369db1e15 100644
--- a/target/i386/kvm/trace-events
+++ b/target/i386/kvm/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # kvm.c
 kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 137604ddb89..83c2b91529b 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -203,7 +203,7 @@ static int cpu_pre_save(void *opaque)
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;
     int i;
-
+    env->v_tpr = env->int_ctl & V_TPR_MASK;
     /* FPU */
     env->fpus_vmstate = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     env->fptag_vmstate = 0;
@@ -1280,6 +1280,27 @@ static const VMStateDescription vmstate_spec_ctrl = {
     }
 };
 
+
+static bool amd_tsc_scale_msr_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE);
+}
+
+static const VMStateDescription amd_tsc_scale_msr_ctrl = {
+    .name = "cpu/amd_tsc_scale_msr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = amd_tsc_scale_msr_needed,
+    .fields = (VMStateField[]){
+        VMSTATE_UINT64(env.amd_tsc_scale_msr, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
 static bool intel_pt_enable_needed(void *opaque)
 {
     X86CPU *cpu = opaque;
@@ -1356,6 +1377,25 @@ static const VMStateDescription vmstate_svm_npt = {
     }
 };
 
+static bool svm_guest_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return tcg_enabled() && env->int_ctl;
+}
+
+static const VMStateDescription vmstate_svm_guest = {
+    .name = "cpu/svm_guest",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = svm_guest_needed,
+    .fields = (VMStateField[]){
+        VMSTATE_UINT32(env.int_ctl, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 #ifndef TARGET_X86_64
 static bool intel_efer32_needed(void *opaque)
 {
@@ -1396,7 +1436,26 @@ static const VMStateDescription vmstate_msr_tsx_ctrl = {
     }
 };
 
-VMStateDescription vmstate_x86_cpu = {
+static bool intel_sgx_msrs_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC);
+}
+
+static const VMStateDescription vmstate_msr_intel_sgx = {
+    .name = "cpu/intel_sgx",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = intel_sgx_msrs_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64_ARRAY(env.msr_ia32_sgxlepubkeyhash, X86CPU, 4),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+const VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
     .minimum_version_id = 11,
@@ -1520,10 +1579,12 @@ VMStateDescription vmstate_x86_cpu = {
         &vmstate_pkru,
         &vmstate_pkrs,
         &vmstate_spec_ctrl,
+        &amd_tsc_scale_msr_ctrl,
         &vmstate_mcg_ext_ctl,
         &vmstate_msr_intel_pt,
         &vmstate_msr_virt_ssbd,
         &vmstate_svm_npt,
+        &vmstate_svm_guest,
 #ifndef TARGET_X86_64
         &vmstate_efer32,
 #endif
@@ -1531,6 +1592,7 @@ VMStateDescription vmstate_x86_cpu = {
         &vmstate_nested_state,
 #endif
         &vmstate_msr_tsx_ctrl,
+        &vmstate_msr_intel_sgx,
         NULL
     }
 };
diff --git a/target/i386/meson.build b/target/i386/meson.build
index c4bf20b3196..ae38dc95635 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -6,7 +6,11 @@ i386_ss.add(files(
   'xsave_helper.c',
   'cpu-dump.c',
 ))
-i386_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-stub.c'))
+i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c'))
+
+# x86 cpu type
+i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c'))
 
 i386_softmmu_ss = ss.source_set()
 i386_softmmu_ss.add(files(
@@ -14,13 +18,19 @@ i386_softmmu_ss.add(files(
   'arch_memory_mapping.c',
   'machine.c',
   'monitor.c',
+  'cpu-sysemu.c',
 ))
+i386_softmmu_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-sysemu-stub.c'))
+
+i386_user_ss = ss.source_set()
 
 subdir('kvm')
 subdir('hax')
 subdir('whpx')
+subdir('nvmm')
 subdir('hvf')
 subdir('tcg')
 
 target_arch += {'i386': i386_ss}
 target_softmmu_arch += {'i386': i386_softmmu_ss}
+target_user_arch += {'i386': i386_user_ss}
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 5994408bee1..8e4b4d600c7 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -28,10 +28,9 @@
 #include "monitor/hmp-target.h"
 #include "monitor/hmp.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
 #include "sysemu/kvm.h"
-#include "sysemu/sev.h"
 #include "qapi/error.h"
-#include "sev_i386.h"
 #include "qapi/qapi-commands-misc-target.h"
 #include "qapi/qapi-commands-misc.h"
 #include "hw/i386/pc.h"
@@ -668,92 +667,3 @@ void hmp_info_local_apic(Monitor *mon, const QDict *qdict)
     }
     x86_cpu_dump_local_apic_state(cs, CPU_DUMP_FPU);
 }
-
-void hmp_info_io_apic(Monitor *mon, const QDict *qdict)
-{
-    monitor_printf(mon, "This command is obsolete and will be "
-                   "removed soon. Please use 'info pic' instead.\n");
-}
-
-SevInfo *qmp_query_sev(Error **errp)
-{
-    SevInfo *info;
-
-    info = sev_get_info();
-    if (!info) {
-        error_setg(errp, "SEV feature is not available");
-        return NULL;
-    }
-
-    return info;
-}
-
-void hmp_info_sev(Monitor *mon, const QDict *qdict)
-{
-    SevInfo *info = sev_get_info();
-
-    if (info && info->enabled) {
-        monitor_printf(mon, "handle: %d\n", info->handle);
-        monitor_printf(mon, "state: %s\n", SevState_str(info->state));
-        monitor_printf(mon, "build: %d\n", info->build_id);
-        monitor_printf(mon, "api version: %d.%d\n",
-                       info->api_major, info->api_minor);
-        monitor_printf(mon, "debug: %s\n",
-                       info->policy & SEV_POLICY_NODBG ? "off" : "on");
-        monitor_printf(mon, "key-sharing: %s\n",
-                       info->policy & SEV_POLICY_NOKS ? "off" : "on");
-    } else {
-        monitor_printf(mon, "SEV is not enabled\n");
-    }
-
-    qapi_free_SevInfo(info);
-}
-
-SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp)
-{
-    char *data;
-    SevLaunchMeasureInfo *info;
-
-    data = sev_get_launch_measurement();
-    if (!data) {
-        error_setg(errp, "Measurement is not available");
-        return NULL;
-    }
-
-    info = g_malloc0(sizeof(*info));
-    info->data = data;
-
-    return info;
-}
-
-SevCapability *qmp_query_sev_capabilities(Error **errp)
-{
-    return sev_get_capabilities(errp);
-}
-
-#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294"
-struct sev_secret_area {
-    uint32_t base;
-    uint32_t size;
-};
-
-void qmp_sev_inject_launch_secret(const char *packet_hdr,
-                                  const char *secret,
-                                  bool has_gpa, uint64_t gpa,
-                                  Error **errp)
-{
-    if (!has_gpa) {
-        uint8_t *data;
-        struct sev_secret_area *area;
-
-        if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) {
-            error_setg(errp, "SEV: no secret area found in OVMF,"
-                       " gpa must be specified.");
-            return;
-        }
-        area = (struct sev_secret_area *)data;
-        gpa = area->base;
-    }
-
-    sev_inject_launch_secret(packet_hdr, secret, gpa, errp);
-}
diff --git a/target/i386/nvmm/meson.build b/target/i386/nvmm/meson.build
new file mode 100644
index 00000000000..733e334083f
--- /dev/null
+++ b/target/i386/nvmm/meson.build
@@ -0,0 +1,8 @@
+i386_softmmu_ss.add(when: 'CONFIG_NVMM', if_true:
+  files(
+  'nvmm-all.c',
+  'nvmm-accel-ops.c',
+  )
+)
+
+i386_softmmu_ss.add(when: 'CONFIG_NVMM', if_true: nvmm)
diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c
new file mode 100644
index 00000000000..f788f75289f
--- /dev/null
+++ b/target/i386/nvmm/nvmm-accel-ops.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/kvm_int.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
+#include "qemu/guest-random.h"
+
+#include "sysemu/nvmm.h"
+#include "nvmm-accel-ops.h"
+
+static void *qemu_nvmm_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+    int r;
+
+    assert(nvmm_enabled());
+
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+    cpu->thread_id = qemu_get_thread_id();
+    current_cpu = cpu;
+
+    r = nvmm_init_vcpu(cpu);
+    if (r < 0) {
+        fprintf(stderr, "nvmm_init_vcpu failed: %s\n", strerror(-r));
+        exit(1);
+    }
+
+    /* signal CPU creation */
+    cpu_thread_signal_created(cpu);
+    qemu_guest_random_seed_thread_part2(cpu->random_seed);
+
+    do {
+        if (cpu_can_run(cpu)) {
+            r = nvmm_vcpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+        while (cpu_thread_is_idle(cpu)) {
+            qemu_cond_wait_iothread(cpu->halt_cond);
+        }
+        qemu_wait_io_event_common(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    nvmm_destroy_vcpu(cpu);
+    cpu_thread_signal_destroyed(cpu);
+    qemu_mutex_unlock_iothread();
+    rcu_unregister_thread();
+    return NULL;
+}
+
+static void nvmm_start_vcpu_thread(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/NVMM",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_nvmm_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+}
+
+/*
+ * Abort the call to run the virtual processor by another thread, and to
+ * return the control to that thread.
+ */
+static void nvmm_kick_vcpu_thread(CPUState *cpu)
+{
+    cpu->exit_request = 1;
+    cpus_kick_thread(cpu);
+}
+
+static void nvmm_accel_ops_class_init(ObjectClass *oc, void *data)
+{
+    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
+
+    ops->create_vcpu_thread = nvmm_start_vcpu_thread;
+    ops->kick_vcpu_thread = nvmm_kick_vcpu_thread;
+
+    ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset;
+    ops->synchronize_post_init = nvmm_cpu_synchronize_post_init;
+    ops->synchronize_state = nvmm_cpu_synchronize_state;
+    ops->synchronize_pre_loadvm = nvmm_cpu_synchronize_pre_loadvm;
+}
+
+static const TypeInfo nvmm_accel_ops_type = {
+    .name = ACCEL_OPS_NAME("nvmm"),
+
+    .parent = TYPE_ACCEL_OPS,
+    .class_init = nvmm_accel_ops_class_init,
+    .abstract = true,
+};
+
+static void nvmm_accel_ops_register_types(void)
+{
+    type_register_static(&nvmm_accel_ops_type);
+}
+type_init(nvmm_accel_ops_register_types);
diff --git a/target/i386/nvmm/nvmm-accel-ops.h b/target/i386/nvmm/nvmm-accel-ops.h
new file mode 100644
index 00000000000..43e24adcaf9
--- /dev/null
+++ b/target/i386/nvmm/nvmm-accel-ops.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef NVMM_CPUS_H
+#define NVMM_CPUS_H
+
+#include "sysemu/cpus.h"
+
+int nvmm_init_vcpu(CPUState *cpu);
+int nvmm_vcpu_exec(CPUState *cpu);
+void nvmm_destroy_vcpu(CPUState *cpu);
+
+void nvmm_cpu_synchronize_state(CPUState *cpu);
+void nvmm_cpu_synchronize_post_reset(CPUState *cpu);
+void nvmm_cpu_synchronize_post_init(CPUState *cpu);
+void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu);
+
+#endif /* NVMM_CPUS_H */
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
new file mode 100644
index 00000000000..9af261eea32
--- /dev/null
+++ b/target/i386/nvmm/nvmm-all.c
@@ -0,0 +1,1236 @@
+/*
+ * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
+ *
+ * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "exec/ioport.h"
+#include "qemu-common.h"
+#include "qemu/accel.h"
+#include "sysemu/nvmm.h"
+#include "sysemu/cpus.h"
+#include "sysemu/runstate.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/queue.h"
+#include "migration/blocker.h"
+#include "strings.h"
+
+#include "nvmm-accel-ops.h"
+
+#include <nvmm.h>
+
+struct qemu_vcpu {
+    struct nvmm_vcpu vcpu;
+    uint8_t tpr;
+    bool stop;
+
+    /* Window-exiting for INTs/NMIs. */
+    bool int_window_exit;
+    bool nmi_window_exit;
+
+    /* The guest is in an interrupt shadow (POP SS, etc). */
+    bool int_shadow;
+};
+
+struct qemu_machine {
+    struct nvmm_capability cap;
+    struct nvmm_machine mach;
+};
+
+/* -------------------------------------------------------------------------- */
+
+static bool nvmm_allowed;
+static struct qemu_machine qemu_mach;
+
+static struct qemu_vcpu *
+get_qemu_vcpu(CPUState *cpu)
+{
+    return (struct qemu_vcpu *)cpu->hax_vcpu;
+}
+
+static struct nvmm_machine *
+get_nvmm_mach(void)
+{
+    return &qemu_mach.mach;
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void
+nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg)
+{
+    uint32_t attrib = qseg->flags;
+
+    nseg->selector = qseg->selector;
+    nseg->limit = qseg->limit;
+    nseg->base = qseg->base;
+    nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK);
+    nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK);
+    nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK);
+    nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK);
+    nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK);
+    nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK);
+    nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK);
+    nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK);
+}
+
+static void
+nvmm_set_registers(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    struct nvmm_x64_state *state = vcpu->state;
+    uint64_t bitmap;
+    size_t i;
+    int ret;
+
+    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+
+    /* GPRs. */
+    state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX];
+    state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX];
+    state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX];
+    state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX];
+    state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP];
+    state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP];
+    state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI];
+    state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI];
+#ifdef TARGET_X86_64
+    state->gprs[NVMM_X64_GPR_R8]  = env->regs[R_R8];
+    state->gprs[NVMM_X64_GPR_R9]  = env->regs[R_R9];
+    state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10];
+    state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11];
+    state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12];
+    state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13];
+    state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14];
+    state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15];
+#endif
+
+    /* RIP and RFLAGS. */
+    state->gprs[NVMM_X64_GPR_RIP] = env->eip;
+    state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags;
+
+    /* Segments. */
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]);
+
+    /* Special segments. */
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr);
+    nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt);
+
+    /* Control registers. */
+    state->crs[NVMM_X64_CR_CR0] = env->cr[0];
+    state->crs[NVMM_X64_CR_CR2] = env->cr[2];
+    state->crs[NVMM_X64_CR_CR3] = env->cr[3];
+    state->crs[NVMM_X64_CR_CR4] = env->cr[4];
+    state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
+    state->crs[NVMM_X64_CR_XCR0] = env->xcr0;
+
+    /* Debug registers. */
+    state->drs[NVMM_X64_DR_DR0] = env->dr[0];
+    state->drs[NVMM_X64_DR_DR1] = env->dr[1];
+    state->drs[NVMM_X64_DR_DR2] = env->dr[2];
+    state->drs[NVMM_X64_DR_DR3] = env->dr[3];
+    state->drs[NVMM_X64_DR_DR6] = env->dr[6];
+    state->drs[NVMM_X64_DR_DR7] = env->dr[7];
+
+    /* FPU. */
+    state->fpu.fx_cw = env->fpuc;
+    state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11);
+    state->fpu.fx_tw = 0;
+    for (i = 0; i < 8; i++) {
+        state->fpu.fx_tw |= (!env->fptags[i]) << i;
+    }
+    state->fpu.fx_opcode = env->fpop;
+    state->fpu.fx_ip.fa_64 = env->fpip;
+    state->fpu.fx_dp.fa_64 = env->fpdp;
+    state->fpu.fx_mxcsr = env->mxcsr;
+    state->fpu.fx_mxcsr_mask = 0x0000FFFF;
+    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
+    memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs));
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0],
+            &env->xmm_regs[i].ZMM_Q(0), 8);
+        memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8],
+            &env->xmm_regs[i].ZMM_Q(1), 8);
+    }
+
+    /* MSRs. */
+    state->msrs[NVMM_X64_MSR_EFER] = env->efer;
+    state->msrs[NVMM_X64_MSR_STAR] = env->star;
+#ifdef TARGET_X86_64
+    state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar;
+    state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar;
+    state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask;
+    state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase;
+#endif
+    state->msrs[NVMM_X64_MSR_SYSENTER_CS]  = env->sysenter_cs;
+    state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp;
+    state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip;
+    state->msrs[NVMM_X64_MSR_PAT] = env->pat;
+    state->msrs[NVMM_X64_MSR_TSC] = env->tsc;
+
+    bitmap =
+        NVMM_X64_STATE_SEGS |
+        NVMM_X64_STATE_GPRS |
+        NVMM_X64_STATE_CRS  |
+        NVMM_X64_STATE_DRS  |
+        NVMM_X64_STATE_MSRS |
+        NVMM_X64_STATE_FPU;
+
+    ret = nvmm_vcpu_setstate(mach, vcpu, bitmap);
+    if (ret == -1) {
+        error_report("NVMM: Failed to set virtual processor context,"
+            " error=%d", errno);
+    }
+}
+
+static void
+nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg)
+{
+    qseg->selector = nseg->selector;
+    qseg->limit = nseg->limit;
+    qseg->base = nseg->base;
+
+    qseg->flags =
+        __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) |
+        __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK);
+}
+
+static void
+nvmm_get_registers(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct nvmm_x64_state *state = vcpu->state;
+    uint64_t bitmap, tpr;
+    size_t i;
+    int ret;
+
+    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+
+    bitmap =
+        NVMM_X64_STATE_SEGS |
+        NVMM_X64_STATE_GPRS |
+        NVMM_X64_STATE_CRS  |
+        NVMM_X64_STATE_DRS  |
+        NVMM_X64_STATE_MSRS |
+        NVMM_X64_STATE_FPU;
+
+    ret = nvmm_vcpu_getstate(mach, vcpu, bitmap);
+    if (ret == -1) {
+        error_report("NVMM: Failed to get virtual processor context,"
+            " error=%d", errno);
+    }
+
+    /* GPRs. */
+    env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX];
+    env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX];
+    env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX];
+    env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX];
+    env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP];
+    env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP];
+    env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI];
+    env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI];
+#ifdef TARGET_X86_64
+    env->regs[R_R8]  = state->gprs[NVMM_X64_GPR_R8];
+    env->regs[R_R9]  = state->gprs[NVMM_X64_GPR_R9];
+    env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10];
+    env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11];
+    env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12];
+    env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13];
+    env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14];
+    env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15];
+#endif
+
+    /* RIP and RFLAGS. */
+    env->eip = state->gprs[NVMM_X64_GPR_RIP];
+    env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS];
+
+    /* Segments. */
+    nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]);
+    nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]);
+    nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]);
+    nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]);
+    nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]);
+    nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]);
+
+    /* Special segments. */
+    nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]);
+    nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]);
+    nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]);
+    nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]);
+
+    /* Control registers. */
+    env->cr[0] = state->crs[NVMM_X64_CR_CR0];
+    env->cr[2] = state->crs[NVMM_X64_CR_CR2];
+    env->cr[3] = state->crs[NVMM_X64_CR_CR3];
+    env->cr[4] = state->crs[NVMM_X64_CR_CR4];
+    tpr = state->crs[NVMM_X64_CR_CR8];
+    if (tpr != qcpu->tpr) {
+        qcpu->tpr = tpr;
+        cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
+    }
+    env->xcr0 = state->crs[NVMM_X64_CR_XCR0];
+
+    /* Debug registers. */
+    env->dr[0] = state->drs[NVMM_X64_DR_DR0];
+    env->dr[1] = state->drs[NVMM_X64_DR_DR1];
+    env->dr[2] = state->drs[NVMM_X64_DR_DR2];
+    env->dr[3] = state->drs[NVMM_X64_DR_DR3];
+    env->dr[6] = state->drs[NVMM_X64_DR_DR6];
+    env->dr[7] = state->drs[NVMM_X64_DR_DR7];
+
+    /* FPU. */
+    env->fpuc = state->fpu.fx_cw;
+    env->fpstt = (state->fpu.fx_sw >> 11) & 0x7;
+    env->fpus = state->fpu.fx_sw & ~0x3800;
+    for (i = 0; i < 8; i++) {
+        env->fptags[i] = !((state->fpu.fx_tw >> i) & 1);
+    }
+    env->fpop = state->fpu.fx_opcode;
+    env->fpip = state->fpu.fx_ip.fa_64;
+    env->fpdp = state->fpu.fx_dp.fa_64;
+    env->mxcsr = state->fpu.fx_mxcsr;
+    assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs));
+    memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs));
+    for (i = 0; i < CPU_NB_REGS; i++) {
+        memcpy(&env->xmm_regs[i].ZMM_Q(0),
+            &state->fpu.fx_xmm[i].xmm_bytes[0], 8);
+        memcpy(&env->xmm_regs[i].ZMM_Q(1),
+            &state->fpu.fx_xmm[i].xmm_bytes[8], 8);
+    }
+
+    /* MSRs. */
+    env->efer = state->msrs[NVMM_X64_MSR_EFER];
+    env->star = state->msrs[NVMM_X64_MSR_STAR];
+#ifdef TARGET_X86_64
+    env->lstar = state->msrs[NVMM_X64_MSR_LSTAR];
+    env->cstar = state->msrs[NVMM_X64_MSR_CSTAR];
+    env->fmask = state->msrs[NVMM_X64_MSR_SFMASK];
+    env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE];
+#endif
+    env->sysenter_cs  = state->msrs[NVMM_X64_MSR_SYSENTER_CS];
+    env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP];
+    env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP];
+    env->pat = state->msrs[NVMM_X64_MSR_PAT];
+    env->tsc = state->msrs[NVMM_X64_MSR_TSC];
+
+    x86_update_hflags(env);
+}
+
+static bool
+nvmm_can_take_int(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    struct nvmm_machine *mach = get_nvmm_mach();
+
+    if (qcpu->int_window_exit) {
+        return false;
+    }
+
+    if (qcpu->int_shadow || !(env->eflags & IF_MASK)) {
+        struct nvmm_x64_state *state = vcpu->state;
+
+        /* Exit on interrupt window. */
+        nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR);
+        state->intr.int_window_exiting = 1;
+        nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR);
+
+        return false;
+    }
+
+    return true;
+}
+
+static bool
+nvmm_can_take_nmi(CPUState *cpu)
+{
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+
+    /*
+     * Contrary to INTs, NMIs always schedule an exit when they are
+     * completed. Therefore, if window-exiting is enabled, it means
+     * NMIs are blocked.
+     */
+    if (qcpu->nmi_window_exit) {
+        return false;
+    }
+
+    return true;
+}
+
+/*
+ * Called before the VCPU is run. We inject events generated by the I/O
+ * thread, and synchronize the guest TPR.
+ */
+static void
+nvmm_vcpu_pre_run(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct nvmm_x64_state *state = vcpu->state;
+    struct nvmm_vcpu_event *event = vcpu->event;
+    bool has_event = false;
+    bool sync_tpr = false;
+    uint8_t tpr;
+    int ret;
+
+    qemu_mutex_lock_iothread();
+
+    tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
+    if (tpr != qcpu->tpr) {
+        qcpu->tpr = tpr;
+        sync_tpr = true;
+    }
+
+    /*
+     * Force the VCPU out of its inner loop to process any INIT requests
+     * or commit pending TPR access.
+     */
+    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+        cpu->exit_request = 1;
+    }
+
+    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+        if (nvmm_can_take_nmi(cpu)) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
+            event->type = NVMM_VCPU_EVENT_INTR;
+            event->vector = 2;
+            has_event = true;
+        }
+    }
+
+    if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
+        if (nvmm_can_take_int(cpu)) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
+            event->type = NVMM_VCPU_EVENT_INTR;
+            event->vector = cpu_get_pic_interrupt(env);
+            has_event = true;
+        }
+    }
+
+    /* Don't want SMIs. */
+    if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
+        cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
+    }
+
+    if (sync_tpr) {
+        ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS);
+        if (ret == -1) {
+            error_report("NVMM: Failed to get CPU state,"
+                " error=%d", errno);
+        }
+
+        state->crs[NVMM_X64_CR_CR8] = qcpu->tpr;
+
+        ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS);
+        if (ret == -1) {
+            error_report("NVMM: Failed to set CPU state,"
+                " error=%d", errno);
+        }
+    }
+
+    if (has_event) {
+        ret = nvmm_vcpu_inject(mach, vcpu);
+        if (ret == -1) {
+            error_report("NVMM: Failed to inject event,"
+                " error=%d", errno);
+        }
+    }
+
+    qemu_mutex_unlock_iothread();
+}
+
+/*
+ * Called after the VCPU ran. We synchronize the host view of the TPR and
+ * RFLAGS.
+ */
+static void
+nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit)
+{
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    uint64_t tpr;
+
+    env->eflags = exit->exitstate.rflags;
+    qcpu->int_shadow = exit->exitstate.int_shadow;
+    qcpu->int_window_exit = exit->exitstate.int_window_exiting;
+    qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting;
+
+    tpr = exit->exitstate.cr8;
+    if (qcpu->tpr != tpr) {
+        qcpu->tpr = tpr;
+        qemu_mutex_lock_iothread();
+        cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr);
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void
+nvmm_io_callback(struct nvmm_io *io)
+{
+    MemTxAttrs attrs = { 0 };
+    int ret;
+
+    ret = address_space_rw(&address_space_io, io->port, attrs, io->data,
+        io->size, !io->in);
+    if (ret != MEMTX_OK) {
+        error_report("NVMM: I/O Transaction Failed "
+            "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"),
+            io->port, io->size);
+    }
+
+    /* Needed, otherwise infinite loop. */
+    current_cpu->vcpu_dirty = false;
+}
+
+static void
+nvmm_mem_callback(struct nvmm_mem *mem)
+{
+    cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write);
+
+    /* Needed, otherwise infinite loop. */
+    current_cpu->vcpu_dirty = false;
+}
+
+static struct nvmm_assist_callbacks nvmm_callbacks = {
+    .io = nvmm_io_callback,
+    .mem = nvmm_mem_callback
+};
+
+/* -------------------------------------------------------------------------- */
+
+static int
+nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
+{
+    int ret;
+
+    ret = nvmm_assist_mem(mach, vcpu);
+    if (ret == -1) {
+        error_report("NVMM: Mem Assist Failed [gpa=%p]",
+            (void *)vcpu->exit->u.mem.gpa);
+    }
+
+    return ret;
+}
+
+static int
+nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
+{
+    int ret;
+
+    ret = nvmm_assist_io(mach, vcpu);
+    if (ret == -1) {
+        error_report("NVMM: I/O Assist Failed [port=%d]",
+            (int)vcpu->exit->u.io.port);
+    }
+
+    return ret;
+}
+
+static int
+nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu,
+    struct nvmm_vcpu_exit *exit)
+{
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct nvmm_x64_state *state = vcpu->state;
+    uint64_t val;
+    int ret;
+
+    switch (exit->u.rdmsr.msr) {
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(x86_cpu->apic_state);
+        break;
+    case MSR_MTRRcap:
+    case MSR_MTRRdefType:
+    case MSR_MCG_CAP:
+    case MSR_MCG_STATUS:
+        val = 0;
+        break;
+    default: /* More MSRs to add? */
+        val = 0;
+        error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
+            exit->u.rdmsr.msr);
+        break;
+    }
+
+    ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
+    if (ret == -1) {
+        return -1;
+    }
+
+    state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF);
+    state->gprs[NVMM_X64_GPR_RDX] = (val >> 32);
+    state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc;
+
+    ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
+    if (ret == -1) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu,
+    struct nvmm_vcpu_exit *exit)
+{
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct nvmm_x64_state *state = vcpu->state;
+    uint64_t val;
+    int ret;
+
+    val = exit->u.wrmsr.val;
+
+    switch (exit->u.wrmsr.msr) {
+    case MSR_IA32_APICBASE:
+        cpu_set_apic_base(x86_cpu->apic_state, val);
+        break;
+    case MSR_MTRRdefType:
+    case MSR_MCG_STATUS:
+        break;
+    default: /* More MSRs to add? */
+        error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
+            exit->u.wrmsr.msr, val);
+        break;
+    }
+
+    ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS);
+    if (ret == -1) {
+        return -1;
+    }
+
+    state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc;
+
+    ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
+    if (ret == -1) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu,
+    struct nvmm_vcpu_exit *exit)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    int ret = 0;
+
+    qemu_mutex_lock_iothread();
+
+    if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+          (env->eflags & IF_MASK)) &&
+        !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+        cpu->exception_index = EXCP_HLT;
+        cpu->halted = true;
+        ret = 1;
+    }
+
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
+static int
+nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
+{
+    struct nvmm_vcpu_event *event = vcpu->event;
+
+    event->type = NVMM_VCPU_EVENT_EXCP;
+    event->vector = 6;
+    event->u.excp.error = 0;
+
+    return nvmm_vcpu_inject(mach, vcpu);
+}
+
+static int
+nvmm_vcpu_loop(CPUState *cpu)
+{
+    struct CPUX86State *env = (CPUArchState *)cpu->env_ptr;
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+    struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct nvmm_vcpu_exit *exit = vcpu->exit;
+    int ret;
+
+    /*
+     * Some asynchronous events must be handled outside of the inner
+     * VCPU loop. They are handled here.
+     */
+    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
+        nvmm_cpu_synchronize_state(cpu);
+        do_cpu_init(x86_cpu);
+        /* set int/nmi windows back to the reset state */
+    }
+    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+        apic_poll_irq(x86_cpu->apic_state);
+    }
+    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+         (env->eflags & IF_MASK)) ||
+        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+        cpu->halted = false;
+    }
+    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
+        nvmm_cpu_synchronize_state(cpu);
+        do_cpu_sipi(x86_cpu);
+    }
+    if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
+        cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
+        nvmm_cpu_synchronize_state(cpu);
+        apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
+            env->tpr_access_type);
+    }
+
+    if (cpu->halted) {
+        cpu->exception_index = EXCP_HLT;
+        qatomic_set(&cpu->exit_request, false);
+        return 0;
+    }
+
+    qemu_mutex_unlock_iothread();
+    cpu_exec_start(cpu);
+
+    /*
+     * Inner VCPU loop.
+     */
+    do {
+        if (cpu->vcpu_dirty) {
+            nvmm_set_registers(cpu);
+            cpu->vcpu_dirty = false;
+        }
+
+        if (qcpu->stop) {
+            cpu->exception_index = EXCP_INTERRUPT;
+            qcpu->stop = false;
+            ret = 1;
+            break;
+        }
+
+        nvmm_vcpu_pre_run(cpu);
+
+        if (qatomic_read(&cpu->exit_request)) {
+#if NVMM_USER_VERSION >= 2
+            nvmm_vcpu_stop(vcpu);
+#else
+            qemu_cpu_kick_self();
+#endif
+        }
+
+        /* Read exit_request before the kernel reads the immediate exit flag */
+        smp_rmb();
+        ret = nvmm_vcpu_run(mach, vcpu);
+        if (ret == -1) {
+            error_report("NVMM: Failed to exec a virtual processor,"
+                " error=%d", errno);
+            break;
+        }
+
+        nvmm_vcpu_post_run(cpu, exit);
+
+        switch (exit->reason) {
+        case NVMM_VCPU_EXIT_NONE:
+            break;
+#if NVMM_USER_VERSION >= 2
+        case NVMM_VCPU_EXIT_STOPPED:
+            /*
+             * The kernel cleared the immediate exit flag; cpu->exit_request
+             * must be cleared after
+             */
+            smp_wmb();
+            qcpu->stop = true;
+            break;
+#endif
+        case NVMM_VCPU_EXIT_MEMORY:
+            ret = nvmm_handle_mem(mach, vcpu);
+            break;
+        case NVMM_VCPU_EXIT_IO:
+            ret = nvmm_handle_io(mach, vcpu);
+            break;
+        case NVMM_VCPU_EXIT_INT_READY:
+        case NVMM_VCPU_EXIT_NMI_READY:
+        case NVMM_VCPU_EXIT_TPR_CHANGED:
+            break;
+        case NVMM_VCPU_EXIT_HALTED:
+            ret = nvmm_handle_halted(mach, cpu, exit);
+            break;
+        case NVMM_VCPU_EXIT_SHUTDOWN:
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+            cpu->exception_index = EXCP_INTERRUPT;
+            ret = 1;
+            break;
+        case NVMM_VCPU_EXIT_RDMSR:
+            ret = nvmm_handle_rdmsr(mach, cpu, exit);
+            break;
+        case NVMM_VCPU_EXIT_WRMSR:
+            ret = nvmm_handle_wrmsr(mach, cpu, exit);
+            break;
+        case NVMM_VCPU_EXIT_MONITOR:
+        case NVMM_VCPU_EXIT_MWAIT:
+            ret = nvmm_inject_ud(mach, vcpu);
+            break;
+        default:
+            error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
+                exit->reason, exit->u.inv.hwcode);
+            nvmm_get_registers(cpu);
+            qemu_mutex_lock_iothread();
+            qemu_system_guest_panicked(cpu_get_crash_info(cpu));
+            qemu_mutex_unlock_iothread();
+            ret = -1;
+            break;
+        }
+    } while (ret == 0);
+
+    cpu_exec_end(cpu);
+    qemu_mutex_lock_iothread();
+
+    qatomic_set(&cpu->exit_request, false);
+
+    return ret < 0;
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void
+do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
+{
+    nvmm_get_registers(cpu);
+    cpu->vcpu_dirty = true;
+}
+
+static void
+do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
+{
+    nvmm_set_registers(cpu);
+    cpu->vcpu_dirty = false;
+}
+
+static void
+do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
+{
+    nvmm_set_registers(cpu);
+    cpu->vcpu_dirty = false;
+}
+
+static void
+do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
+{
+    cpu->vcpu_dirty = true;
+}
+
+void nvmm_cpu_synchronize_state(CPUState *cpu)
+{
+    if (!cpu->vcpu_dirty) {
+        run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL);
+    }
+}
+
+void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
+}
+
+void nvmm_cpu_synchronize_post_init(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+}
+
+void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
+}
+
+/* -------------------------------------------------------------------------- */
+
+static Error *nvmm_migration_blocker;
+
+/*
+ * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
+ * and another thread signaling the vCPU thread to exit.
+ */
+
+static void
+nvmm_ipi_signal(int sigcpu)
+{
+    if (current_cpu) {
+        struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu);
+#if NVMM_USER_VERSION >= 2
+        struct nvmm_vcpu *vcpu = &qcpu->vcpu;
+        nvmm_vcpu_stop(vcpu);
+#else
+        qcpu->stop = true;
+#endif
+    }
+}
+
+static void
+nvmm_init_cpu_signals(void)
+{
+    struct sigaction sigact;
+    sigset_t set;
+
+    /* Install the IPI handler. */
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = nvmm_ipi_signal;
+    sigaction(SIG_IPI, &sigact, NULL);
+
+    /* Allow IPIs on the current thread. */
+    sigprocmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);
+    pthread_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+int
+nvmm_init_vcpu(CPUState *cpu)
+{
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct nvmm_vcpu_conf_cpuid cpuid;
+    struct nvmm_vcpu_conf_tpr tpr;
+    Error *local_error = NULL;
+    struct qemu_vcpu *qcpu;
+    int ret, err;
+
+    nvmm_init_cpu_signals();
+
+    if (nvmm_migration_blocker == NULL) {
+        error_setg(&nvmm_migration_blocker,
+            "NVMM: Migration not supported");
+
+        if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) {
+            error_report_err(local_error);
+            error_free(nvmm_migration_blocker);
+            return -EINVAL;
+        }
+    }
+
+    qcpu = g_malloc0(sizeof(*qcpu));
+    if (qcpu == NULL) {
+        error_report("NVMM: Failed to allocate VCPU context.");
+        return -ENOMEM;
+    }
+
+    ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu);
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Failed to create a virtual processor,"
+            " error=%d", err);
+        g_free(qcpu);
+        return -err;
+    }
+
+    memset(&cpuid, 0, sizeof(cpuid));
+    cpuid.mask = 1;
+    cpuid.leaf = 0x00000001;
+    cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR;
+    ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID,
+        &cpuid);
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Failed to configure a virtual processor,"
+            " error=%d", err);
+        g_free(qcpu);
+        return -err;
+    }
+
+    ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS,
+        &nvmm_callbacks);
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Failed to configure a virtual processor,"
+            " error=%d", err);
+        g_free(qcpu);
+        return -err;
+    }
+
+    if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) {
+        memset(&tpr, 0, sizeof(tpr));
+        tpr.exit_changed = 1;
+        ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr);
+        if (ret == -1) {
+            err = errno;
+            error_report("NVMM: Failed to configure a virtual processor,"
+                " error=%d", err);
+            g_free(qcpu);
+            return -err;
+        }
+    }
+
+    cpu->vcpu_dirty = true;
+    cpu->hax_vcpu = (struct hax_vcpu_state *)qcpu;
+
+    return 0;
+}
+
+int
+nvmm_vcpu_exec(CPUState *cpu)
+{
+    int ret, fatal;
+
+    while (1) {
+        if (cpu->exception_index >= EXCP_INTERRUPT) {
+            ret = cpu->exception_index;
+            cpu->exception_index = -1;
+            break;
+        }
+
+        fatal = nvmm_vcpu_loop(cpu);
+
+        if (fatal) {
+            error_report("NVMM: Failed to execute a VCPU.");
+            abort();
+        }
+    }
+
+    return ret;
+}
+
+void
+nvmm_destroy_vcpu(CPUState *cpu)
+{
+    struct nvmm_machine *mach = get_nvmm_mach();
+    struct qemu_vcpu *qcpu = get_qemu_vcpu(cpu);
+
+    nvmm_vcpu_destroy(mach, &qcpu->vcpu);
+    g_free(cpu->hax_vcpu);
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void
+nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva,
+    bool add, bool rom, const char *name)
+{
+    struct nvmm_machine *mach = get_nvmm_mach();
+    int ret, prot;
+
+    if (add) {
+        prot = PROT_READ | PROT_EXEC;
+        if (!rom) {
+            prot |= PROT_WRITE;
+        }
+        ret = nvmm_gpa_map(mach, hva, start_pa, size, prot);
+    } else {
+        ret = nvmm_gpa_unmap(mach, hva, start_pa, size);
+    }
+
+    if (ret == -1) {
+        error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
+            "Size:%p bytes, HostVA:%p, error=%d",
+            (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa,
+            (void *)size, (void *)hva, errno);
+    }
+}
+
+static void
+nvmm_process_section(MemoryRegionSection *section, int add)
+{
+    MemoryRegion *mr = section->mr;
+    hwaddr start_pa = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    unsigned int delta;
+    uintptr_t hva;
+
+    if (!memory_region_is_ram(mr)) {
+        return;
+    }
+
+    /* Adjust start_pa and size so that they are page-aligned. */
+    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
+    delta &= ~qemu_real_host_page_mask;
+    if (delta > size) {
+        return;
+    }
+    start_pa += delta;
+    size -= delta;
+    size &= qemu_real_host_page_mask;
+    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
+        return;
+    }
+
+    hva = (uintptr_t)memory_region_get_ram_ptr(mr) +
+        section->offset_within_region + delta;
+
+    nvmm_update_mapping(start_pa, size, hva, add,
+        memory_region_is_rom(mr), mr->name);
+}
+
+static void
+nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section)
+{
+    memory_region_ref(section->mr);
+    nvmm_process_section(section, 1);
+}
+
+static void
+nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section)
+{
+    nvmm_process_section(section, 0);
+    memory_region_unref(section->mr);
+}
+
+static void
+nvmm_transaction_begin(MemoryListener *listener)
+{
+    /* nothing */
+}
+
+static void
+nvmm_transaction_commit(MemoryListener *listener)
+{
+    /* nothing */
+}
+
+static void
+nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (!memory_region_is_ram(mr)) {
+        return;
+    }
+
+    memory_region_set_dirty(mr, 0, int128_get64(section->size));
+}
+
+static MemoryListener nvmm_memory_listener = {
+    .name = "nvmm",
+    .begin = nvmm_transaction_begin,
+    .commit = nvmm_transaction_commit,
+    .region_add = nvmm_region_add,
+    .region_del = nvmm_region_del,
+    .log_sync = nvmm_log_sync,
+    .priority = 10,
+};
+
+static void
+nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+                     size_t max_size)
+{
+    struct nvmm_machine *mach = get_nvmm_mach();
+    uintptr_t hva = (uintptr_t)host;
+    int ret;
+
+    ret = nvmm_hva_map(mach, hva, max_size);
+
+    if (ret == -1) {
+        error_report("NVMM: Failed to map HVA, HostVA:%p "
+            "Size:%p bytes, error=%d",
+            (void *)hva, (void *)size, errno);
+    }
+}
+
+static struct RAMBlockNotifier nvmm_ram_notifier = {
+    .ram_block_added = nvmm_ram_block_added
+};
+
+/* -------------------------------------------------------------------------- */
+
+static int
+nvmm_accel_init(MachineState *ms)
+{
+    int ret, err;
+
+    ret = nvmm_init();
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Initialization failed, error=%d", errno);
+        return -err;
+    }
+
+    ret = nvmm_capability(&qemu_mach.cap);
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Unable to fetch capability, error=%d", errno);
+        return -err;
+    }
+    if (qemu_mach.cap.version < NVMM_KERN_VERSION) {
+        error_report("NVMM: Unsupported version %u", qemu_mach.cap.version);
+        return -EPROGMISMATCH;
+    }
+    if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) {
+        error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size);
+        return -EPROGMISMATCH;
+    }
+
+    ret = nvmm_machine_create(&qemu_mach.mach);
+    if (ret == -1) {
+        err = errno;
+        error_report("NVMM: Machine creation failed, error=%d", errno);
+        return -err;
+    }
+
+    memory_listener_register(&nvmm_memory_listener, &address_space_memory);
+    ram_block_notifier_add(&nvmm_ram_notifier);
+
+    printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
+    return 0;
+}
+
+int
+nvmm_enabled(void)
+{
+    return nvmm_allowed;
+}
+
+static void
+nvmm_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "NVMM";
+    ac->init_machine = nvmm_accel_init;
+    ac->allowed = &nvmm_allowed;
+}
+
+static const TypeInfo nvmm_accel_type = {
+    .name = ACCEL_CLASS_NAME("nvmm"),
+    .parent = TYPE_ACCEL,
+    .class_init = nvmm_accel_class_init,
+};
+
+static void
+nvmm_type_init(void)
+{
+    type_register_static(&nvmm_accel_type);
+}
+
+type_init(nvmm_type_init);
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 6c0c849347a..e68af5c403a 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -30,9 +30,6 @@
 #define dh_ctype_Reg Reg *
 #define dh_ctype_ZMMReg ZMMReg *
 #define dh_ctype_MMXReg MMXReg *
-#define dh_is_signed_Reg dh_is_signed_ptr
-#define dh_is_signed_ZMMReg dh_is_signed_ptr
-#define dh_is_signed_MMXReg dh_is_signed_ptr
 
 DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg)
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
deleted file mode 100644
index 0207f1c5aae..00000000000
--- a/target/i386/sev-stub.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * QEMU SEV stub
- *
- * Copyright Advanced Micro Devices 2018
- *
- * Authors:
- *      Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "sev_i386.h"
-
-SevInfo *sev_get_info(void)
-{
-    return NULL;
-}
-
-bool sev_enabled(void)
-{
-    return false;
-}
-
-uint64_t sev_get_me_mask(void)
-{
-    return ~0;
-}
-
-uint32_t sev_get_cbit_position(void)
-{
-    return 0;
-}
-
-uint32_t sev_get_reduced_phys_bits(void)
-{
-    return 0;
-}
-
-char *sev_get_launch_measurement(void)
-{
-    return NULL;
-}
-
-SevCapability *sev_get_capabilities(Error **errp)
-{
-    error_setg(errp, "SEV is not available in this QEMU");
-    return NULL;
-}
-
-int sev_inject_launch_secret(const char *hdr, const char *secret,
-                             uint64_t gpa, Error **errp)
-{
-    return 1;
-}
-
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
-{
-    return 0;
-}
-
-bool sev_es_enabled(void)
-{
-    return false;
-}
-
-void sev_es_set_reset_vector(CPUState *cpu)
-{
-}
-
-int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
-{
-    abort();
-}
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
new file mode 100644
index 00000000000..7a29295d1ed
--- /dev/null
+++ b/target/i386/sev-sysemu-stub.c
@@ -0,0 +1,70 @@
+/*
+ * QEMU SEV system stub
+ *
+ * Copyright Advanced Micro Devices 2018
+ *
+ * Authors:
+ *      Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/error.h"
+#include "sev.h"
+
+SevInfo *qmp_query_sev(Error **errp)
+{
+    error_setg(errp, "SEV is not available in this QEMU");
+    return NULL;
+}
+
+SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp)
+{
+    error_setg(errp, "SEV is not available in this QEMU");
+    return NULL;
+}
+
+SevCapability *qmp_query_sev_capabilities(Error **errp)
+{
+    error_setg(errp, "SEV is not available in this QEMU");
+    return NULL;
+}
+
+void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
+                                  bool has_gpa, uint64_t gpa, Error **errp)
+{
+    error_setg(errp, "SEV is not available in this QEMU");
+}
+
+int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
+{
+    g_assert_not_reached();
+}
+
+void sev_es_set_reset_vector(CPUState *cpu)
+{
+}
+
+int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
+{
+    g_assert_not_reached();
+}
+
+SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce,
+                                                       Error **errp)
+{
+    error_setg(errp, "SEV is not available in this QEMU");
+    return NULL;
+}
+
+void hmp_info_sev(Monitor *mon, const QDict *qdict)
+{
+    monitor_printf(mon, "SEV is not available in this QEMU\n");
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 72b9e2ab401..025ff7a6f84 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -23,17 +23,21 @@
 #include "qemu/base64.h"
 #include "qemu/module.h"
 #include "qemu/uuid.h"
+#include "crypto/hash.h"
 #include "sysemu/kvm.h"
-#include "sev_i386.h"
+#include "sev.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
 #include "trace.h"
 #include "migration/blocker.h"
 #include "qom/object.h"
-#include "exec/address-spaces.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp-target.h"
+#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qmp/qerror.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
+#include "exec/address-spaces.h"
 
 #define TYPE_SEV_GUEST "sev-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
@@ -59,13 +63,13 @@ struct SevGuestState {
     char *session_file;
     uint32_t cbitpos;
     uint32_t reduced_phys_bits;
+    bool kernel_hashes;
 
     /* runtime state */
     uint32_t handle;
     uint8_t api_major;
     uint8_t api_minor;
     uint8_t build_id;
-    uint64_t me_mask;
     int sev_fd;
     SevState state;
     gchar *measurement;
@@ -84,33 +88,71 @@ typedef struct __attribute__((__packed__)) SevInfoBlock {
     uint32_t reset_addr;
 } SevInfoBlock;
 
+#define SEV_HASH_TABLE_RV_GUID  "7255371f-3a3b-4b04-927b-1da6efa8d454"
+typedef struct QEMU_PACKED SevHashTableDescriptor {
+    /* SEV hash table area guest address */
+    uint32_t base;
+    /* SEV hash table area size (in bytes) */
+    uint32_t size;
+} SevHashTableDescriptor;
+
+/* hard code sha256 digest size */
+#define HASH_SIZE 32
+
+typedef struct QEMU_PACKED SevHashTableEntry {
+    QemuUUID guid;
+    uint16_t len;
+    uint8_t hash[HASH_SIZE];
+} SevHashTableEntry;
+
+typedef struct QEMU_PACKED SevHashTable {
+    QemuUUID guid;
+    uint16_t len;
+    SevHashTableEntry cmdline;
+    SevHashTableEntry initrd;
+    SevHashTableEntry kernel;
+} SevHashTable;
+
+/*
+ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of
+ * 16 bytes.
+ */
+typedef struct QEMU_PACKED PaddedSevHashTable {
+    SevHashTable ht;
+    uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)];
+} PaddedSevHashTable;
+
+QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
+
 static SevGuestState *sev_guest;
 static Error *sev_mig_blocker;
 
 static const char *const sev_fw_errlist[] = {
-    "",
-    "Platform state is invalid",
-    "Guest state is invalid",
-    "Platform configuration is invalid",
-    "Buffer too small",
-    "Platform is already owned",
-    "Certificate is invalid",
-    "Policy is not allowed",
-    "Guest is not active",
-    "Invalid address",
-    "Bad signature",
-    "Bad measurement",
-    "Asid is already owned",
-    "Invalid ASID",
-    "WBINVD is required",
-    "DF_FLUSH is required",
-    "Guest handle is invalid",
-    "Invalid command",
-    "Guest is active",
-    "Hardware error",
-    "Hardware unsafe",
-    "Feature not supported",
-    "Invalid parameter"
+    [SEV_RET_SUCCESS]                = "",
+    [SEV_RET_INVALID_PLATFORM_STATE] = "Platform state is invalid",
+    [SEV_RET_INVALID_GUEST_STATE]    = "Guest state is invalid",
+    [SEV_RET_INAVLID_CONFIG]         = "Platform configuration is invalid",
+    [SEV_RET_INVALID_LEN]            = "Buffer too small",
+    [SEV_RET_ALREADY_OWNED]          = "Platform is already owned",
+    [SEV_RET_INVALID_CERTIFICATE]    = "Certificate is invalid",
+    [SEV_RET_POLICY_FAILURE]         = "Policy is not allowed",
+    [SEV_RET_INACTIVE]               = "Guest is not active",
+    [SEV_RET_INVALID_ADDRESS]        = "Invalid address",
+    [SEV_RET_BAD_SIGNATURE]          = "Bad signature",
+    [SEV_RET_BAD_MEASUREMENT]        = "Bad measurement",
+    [SEV_RET_ASID_OWNED]             = "ASID is already owned",
+    [SEV_RET_INVALID_ASID]           = "Invalid ASID",
+    [SEV_RET_WBINVD_REQUIRED]        = "WBINVD is required",
+    [SEV_RET_DFFLUSH_REQUIRED]       = "DF_FLUSH is required",
+    [SEV_RET_INVALID_GUEST]          = "Guest handle is invalid",
+    [SEV_RET_INVALID_COMMAND]        = "Invalid command",
+    [SEV_RET_ACTIVE]                 = "Guest is active",
+    [SEV_RET_HWSEV_RET_PLATFORM]     = "Hardware error",
+    [SEV_RET_HWSEV_RET_UNSAFE]       = "Hardware unsafe",
+    [SEV_RET_UNSUPPORTED]            = "Feature not supported",
+    [SEV_RET_INVALID_PARAM]          = "Invalid parameter",
+    [SEV_RET_RESOURCE_LIMIT]         = "Required firmware resource depleted",
+    [SEV_RET_SECURE_DATA_INVALID]    = "Part-specific integrity check failure",
 };
 
 #define SEV_FW_MAX_ERROR      ARRAY_SIZE(sev_fw_errlist)
@@ -181,7 +223,8 @@ sev_set_guest_state(SevGuestState *sev, SevState new_state)
 }
 
 static void
-sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
+sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+                    size_t max_size)
 {
     int r;
     struct kvm_enc_region range;
@@ -198,19 +241,20 @@ sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
     }
 
     range.addr = (__u64)(unsigned long)host;
-    range.size = size;
+    range.size = max_size;
 
-    trace_kvm_memcrypt_register_region(host, size);
+    trace_kvm_memcrypt_register_region(host, max_size);
     r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
     if (r) {
         error_report("%s: failed to register region (%p+%#zx) error '%s'",
-                     __func__, host, size, strerror(errno));
+                     __func__, host, max_size, strerror(errno));
         exit(1);
     }
 }
 
 static void
-sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size)
+sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
+                      size_t max_size)
 {
     int r;
     struct kvm_enc_region range;
@@ -227,13 +271,13 @@ sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size)
     }
 
     range.addr = (__u64)(unsigned long)host;
-    range.size = size;
+    range.size = max_size;
 
-    trace_kvm_memcrypt_unregister_region(host, size);
+    trace_kvm_memcrypt_unregister_region(host, max_size);
     r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_UNREG_REGION, &range);
     if (r) {
         error_report("%s: failed to unregister region (%p+%#zx)",
-                     __func__, host, size);
+                     __func__, host, max_size);
     }
 }
 
@@ -295,6 +339,20 @@ sev_guest_set_sev_device(Object *obj, const char *value, Error **errp)
     sev->sev_device = g_strdup(value);
 }
 
+static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp)
+{
+    SevGuestState *sev = SEV_GUEST(obj);
+
+    return sev->kernel_hashes;
+}
+
+static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
+{
+    SevGuestState *sev = SEV_GUEST(obj);
+
+    sev->kernel_hashes = value;
+}
+
 static void
 sev_guest_class_init(ObjectClass *oc, void *data)
 {
@@ -313,6 +371,11 @@ sev_guest_class_init(ObjectClass *oc, void *data)
                                   sev_guest_set_session_file);
     object_class_property_set_description(oc, "session-file",
             "guest owners session parameters (encoded with base64)");
+    object_class_property_add_bool(oc, "kernel-hashes",
+                                   sev_guest_get_kernel_hashes,
+                                   sev_guest_set_kernel_hashes);
+    object_class_property_set_description(oc, "kernel-hashes",
+            "add kernel hashes to guest firmware for measured Linux boot");
 }
 
 static void
@@ -359,12 +422,6 @@ sev_es_enabled(void)
     return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES);
 }
 
-uint64_t
-sev_get_me_mask(void)
-{
-    return sev_guest ? sev_guest->me_mask : ~0;
-}
-
 uint32_t
 sev_get_cbit_position(void)
 {
@@ -377,8 +434,7 @@ sev_get_reduced_phys_bits(void)
     return sev_guest ? sev_guest->reduced_phys_bits : 0;
 }
 
-SevInfo *
-sev_get_info(void)
+static SevInfo *sev_get_info(void)
 {
     SevInfo *info;
 
@@ -397,6 +453,40 @@ sev_get_info(void)
     return info;
 }
 
+SevInfo *qmp_query_sev(Error **errp)
+{
+    SevInfo *info;
+
+    info = sev_get_info();
+    if (!info) {
+        error_setg(errp, "SEV feature is not available");
+        return NULL;
+    }
+
+    return info;
+}
+
+void hmp_info_sev(Monitor *mon, const QDict *qdict)
+{
+    SevInfo *info = sev_get_info();
+
+    if (info && info->enabled) {
+        monitor_printf(mon, "handle: %d\n", info->handle);
+        monitor_printf(mon, "state: %s\n", SevState_str(info->state));
+        monitor_printf(mon, "build: %d\n", info->build_id);
+        monitor_printf(mon, "api version: %d.%d\n",
+                       info->api_major, info->api_minor);
+        monitor_printf(mon, "debug: %s\n",
+                       info->policy & SEV_POLICY_NODBG ? "off" : "on");
+        monitor_printf(mon, "key-sharing: %s\n",
+                       info->policy & SEV_POLICY_NOKS ? "off" : "on");
+    } else {
+        monitor_printf(mon, "SEV is not enabled\n");
+    }
+
+    qapi_free_SevInfo(info);
+}
+
 static int
 sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain,
                  size_t *cert_chain_len, Error **errp)
@@ -410,7 +500,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain,
     r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err);
     if (r < 0) {
         if (err != SEV_RET_INVALID_LEN) {
-            error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)",
+            error_setg(errp, "SEV: Failed to export PDH cert"
+                             " ret=%d fw_err=%d (%s)",
                        r, err, fw_error_to_str(err));
             return 1;
         }
@@ -423,7 +514,7 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain,
 
     r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err);
     if (r < 0) {
-        error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)",
+        error_setg(errp, "SEV: Failed to export PDH cert ret=%d fw_err=%d (%s)",
                    r, err, fw_error_to_str(err));
         goto e_free;
     }
@@ -440,8 +531,7 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain,
     return 1;
 }
 
-SevCapability *
-sev_get_capabilities(Error **errp)
+static SevCapability *sev_get_capabilities(Error **errp)
 {
     SevCapability *cap = NULL;
     guchar *pdh_data = NULL;
@@ -461,7 +551,7 @@ sev_get_capabilities(Error **errp)
 
     fd = open(DEFAULT_SEV_DEVICE, O_RDWR);
     if (fd < 0) {
-        error_setg_errno(errp, errno, "Failed to open %s",
+        error_setg_errno(errp, errno, "SEV: Failed to open %s",
                          DEFAULT_SEV_DEVICE);
         return NULL;
     }
@@ -491,15 +581,89 @@ sev_get_capabilities(Error **errp)
     return cap;
 }
 
+SevCapability *qmp_query_sev_capabilities(Error **errp)
+{
+    return sev_get_capabilities(errp);
+}
+
+static SevAttestationReport *sev_get_attestation_report(const char *mnonce,
+                                                        Error **errp)
+{
+    struct kvm_sev_attestation_report input = {};
+    SevAttestationReport *report = NULL;
+    SevGuestState *sev = sev_guest;
+    g_autofree guchar *data = NULL;
+    g_autofree guchar *buf = NULL;
+    gsize len;
+    int err = 0, ret;
+
+    if (!sev_enabled()) {
+        error_setg(errp, "SEV is not enabled");
+        return NULL;
+    }
+
+    /* lets decode the mnonce string */
+    buf = g_base64_decode(mnonce, &len);
+    if (!buf) {
+        error_setg(errp, "SEV: failed to decode mnonce input");
+        return NULL;
+    }
+
+    /* verify the input mnonce length */
+    if (len != sizeof(input.mnonce)) {
+        error_setg(errp, "SEV: mnonce must be %zu bytes (got %" G_GSIZE_FORMAT ")",
+                sizeof(input.mnonce), len);
+        return NULL;
+    }
+
+    /* Query the report length */
+    ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT,
+            &input, &err);
+    if (ret < 0) {
+        if (err != SEV_RET_INVALID_LEN) {
+            error_setg(errp, "SEV: Failed to query the attestation report"
+                             " length ret=%d fw_err=%d (%s)",
+                       ret, err, fw_error_to_str(err));
+            return NULL;
+        }
+    }
+
+    data = g_malloc(input.len);
+    input.uaddr = (unsigned long)data;
+    memcpy(input.mnonce, buf, sizeof(input.mnonce));
+
+    /* Query the report */
+    ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT,
+            &input, &err);
+    if (ret) {
+        error_setg_errno(errp, errno, "SEV: Failed to get attestation report"
+                " ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err));
+        return NULL;
+    }
+
+    report = g_new0(SevAttestationReport, 1);
+    report->data = g_base64_encode(data, input.len);
+
+    trace_kvm_sev_attestation_report(mnonce, report->data);
+
+    return report;
+}
+
+SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce,
+                                                       Error **errp)
+{
+    return sev_get_attestation_report(mnonce, errp);
+}
+
 static int
 sev_read_file_base64(const char *filename, guchar **data, gsize *len)
 {
     gsize sz;
-    gchar *base64;
+    g_autofree gchar *base64 = NULL;
     GError *error = NULL;
 
     if (!g_file_get_contents(filename, &base64, &sz, &error)) {
-        error_report("failed to read '%s' (%s)", filename, error->message);
+        error_report("SEV: Failed to read '%s' (%s)", filename, error->message);
         g_error_free(error);
         return -1;
     }
@@ -514,31 +678,29 @@ sev_launch_start(SevGuestState *sev)
     gsize sz;
     int ret = 1;
     int fw_error, rc;
-    struct kvm_sev_launch_start *start;
+    struct kvm_sev_launch_start start = {
+        .handle = sev->handle, .policy = sev->policy
+    };
     guchar *session = NULL, *dh_cert = NULL;
 
-    start = g_new0(struct kvm_sev_launch_start, 1);
-
-    start->handle = sev->handle;
-    start->policy = sev->policy;
     if (sev->session_file) {
         if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) {
             goto out;
         }
-        start->session_uaddr = (unsigned long)session;
-        start->session_len = sz;
+        start.session_uaddr = (unsigned long)session;
+        start.session_len = sz;
     }
 
     if (sev->dh_cert_file) {
         if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) {
             goto out;
         }
-        start->dh_uaddr = (unsigned long)dh_cert;
-        start->dh_len = sz;
+        start.dh_uaddr = (unsigned long)dh_cert;
+        start.dh_len = sz;
     }
 
-    trace_kvm_sev_launch_start(start->policy, session, dh_cert);
-    rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, start, &fw_error);
+    trace_kvm_sev_launch_start(start.policy, session, dh_cert);
+    rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error);
     if (rc < 0) {
         error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'",
                 __func__, ret, fw_error, fw_error_to_str(fw_error));
@@ -546,11 +708,10 @@ sev_launch_start(SevGuestState *sev)
     }
 
     sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE);
-    sev->handle = start->handle;
+    sev->handle = start.handle;
     ret = 0;
 
 out:
-    g_free(start);
     g_free(session);
     g_free(dh_cert);
     return ret;
@@ -598,8 +759,8 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
 {
     SevGuestState *sev = sev_guest;
     int ret, error;
-    guchar *data;
-    struct kvm_sev_launch_measure *measurement;
+    g_autofree guchar *data = NULL;
+    struct kvm_sev_launch_measure measurement = {};
 
     if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) {
         return;
@@ -613,43 +774,35 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
         }
     }
 
-    measurement = g_new0(struct kvm_sev_launch_measure, 1);
-
     /* query the measurement blob length */
     ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE,
-                    measurement, &error);
-    if (!measurement->len) {
+                    &measurement, &error);
+    if (!measurement.len) {
         error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'",
                      __func__, ret, error, fw_error_to_str(errno));
-        goto free_measurement;
+        return;
     }
 
-    data = g_new0(guchar, measurement->len);
-    measurement->uaddr = (unsigned long)data;
+    data = g_new0(guchar, measurement.len);
+    measurement.uaddr = (unsigned long)data;
 
     /* get the measurement blob */
     ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE,
-                    measurement, &error);
+                    &measurement, &error);
     if (ret) {
         error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'",
                      __func__, ret, error, fw_error_to_str(errno));
-        goto free_data;
+        return;
     }
 
     sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET);
 
     /* encode the measurement value and emit the event */
-    sev->measurement = g_base64_encode(data, measurement->len);
+    sev->measurement = g_base64_encode(data, measurement.len);
     trace_kvm_sev_launch_measurement(sev->measurement);
-
-free_data:
-    g_free(data);
-free_measurement:
-    g_free(measurement);
 }
 
-char *
-sev_get_launch_measurement(void)
+static char *sev_get_launch_measurement(void)
 {
     if (sev_guest &&
         sev_guest->state >= SEV_STATE_LAUNCH_SECRET) {
@@ -659,6 +812,23 @@ sev_get_launch_measurement(void)
     return NULL;
 }
 
+SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp)
+{
+    char *data;
+    SevLaunchMeasureInfo *info;
+
+    data = sev_get_launch_measurement();
+    if (!data) {
+        error_setg(errp, "SEV launch measurement is not available");
+        return NULL;
+    }
+
+    info = g_malloc0(sizeof(*info));
+    info->data = data;
+
+    return info;
+}
+
 static Notifier sev_machine_done_notify = {
     .notify = sev_launch_get_measure,
 };
@@ -667,7 +837,6 @@ static void
 sev_launch_finish(SevGuestState *sev)
 {
     int ret, error;
-    Error *local_err = NULL;
 
     trace_kvm_sev_launch_finish();
     ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error);
@@ -682,12 +851,7 @@ sev_launch_finish(SevGuestState *sev)
     /* add migration blocker */
     error_setg(&sev_mig_blocker,
                "SEV: Migration is not implemented");
-    ret = migrate_add_blocker(sev_mig_blocker, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        error_free(sev_mig_blocker);
-        exit(1);
-    }
+    migrate_add_blocker(sev_mig_blocker, &error_fatal);
 }
 
 static void
@@ -740,8 +904,6 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
         goto err;
     }
 
-    sev->me_mask = ~(1UL << sev->cbitpos);
-
     devname = object_property_get_str(OBJECT(sev), "sev-device", NULL);
     sev->sev_fd = open(devname, O_RDWR);
     if (sev->sev_fd < 0) {
@@ -820,7 +982,7 @@ sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
     if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) {
         int ret = sev_launch_update_data(sev_guest, ptr, len);
         if (ret < 0) {
-            error_setg(errp, "failed to encrypt pflash rom");
+            error_setg(errp, "SEV: Failed to encrypt pflash rom");
             return ret;
         }
     }
@@ -839,7 +1001,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret,
     MemoryRegion *mr = NULL;
 
     if (!sev_guest) {
-        error_setg(errp, "SEV: SEV not enabled.");
+        error_setg(errp, "SEV not enabled for guest");
         return 1;
     }
 
@@ -891,6 +1053,37 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret,
     return 0;
 }
 
+#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294"
+struct sev_secret_area {
+    uint32_t base;
+    uint32_t size;
+};
+
+void qmp_sev_inject_launch_secret(const char *packet_hdr,
+                                  const char *secret,
+                                  bool has_gpa, uint64_t gpa,
+                                  Error **errp)
+{
+    if (!sev_enabled()) {
+        error_setg(errp, "SEV not enabled for guest");
+        return;
+    }
+    if (!has_gpa) {
+        uint8_t *data;
+        struct sev_secret_area *area;
+
+        if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) {
+            error_setg(errp, "SEV: no secret area found in OVMF,"
+                       " gpa must be specified.");
+            return;
+        }
+        area = (struct sev_secret_area *)data;
+        gpa = area->base;
+    }
+
+    sev_inject_launch_secret(packet_hdr, secret, gpa, errp);
+}
+
 static int
 sev_es_parse_reset_block(SevInfoBlock *info, uint32_t *addr)
 {
@@ -1007,6 +1200,138 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size)
     return 0;
 }
 
+static const QemuUUID sev_hash_table_header_guid = {
+    .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93,
+                    0xd4, 0x11, 0xfd, 0x21)
+};
+
+static const QemuUUID sev_kernel_entry_guid = {
+    .data = UUID_LE(0x4de79437, 0xabd2, 0x427f, 0xb8, 0x35, 0xd5, 0xb1,
+                    0x72, 0xd2, 0x04, 0x5b)
+};
+static const QemuUUID sev_initrd_entry_guid = {
+    .data = UUID_LE(0x44baf731, 0x3a2f, 0x4bd7, 0x9a, 0xf1, 0x41, 0xe2,
+                    0x91, 0x69, 0x78, 0x1d)
+};
+static const QemuUUID sev_cmdline_entry_guid = {
+    .data = UUID_LE(0x97d02dd8, 0xbd20, 0x4c94, 0xaa, 0x78, 0xe7, 0x71,
+                    0x4d, 0x36, 0xab, 0x2a)
+};
+
+/*
+ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
+ * which is included in SEV's initial memory measurement.
+ */
+bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
+{
+    uint8_t *data;
+    SevHashTableDescriptor *area;
+    SevHashTable *ht;
+    PaddedSevHashTable *padded_ht;
+    uint8_t cmdline_hash[HASH_SIZE];
+    uint8_t initrd_hash[HASH_SIZE];
+    uint8_t kernel_hash[HASH_SIZE];
+    uint8_t *hashp;
+    size_t hash_len = HASH_SIZE;
+    hwaddr mapped_len = sizeof(*padded_ht);
+    MemTxAttrs attrs = { 0 };
+    bool ret = true;
+
+    /*
+     * Only add the kernel hashes if the sev-guest configuration explicitly
+     * stated kernel-hashes=on.
+     */
+    if (!sev_guest->kernel_hashes) {
+        return false;
+    }
+
+    if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
+        error_setg(errp, "SEV: kernel specified but guest firmware "
+                         "has no hashes table GUID");
+        return false;
+    }
+    area = (SevHashTableDescriptor *)data;
+    if (!area->base || area->size < sizeof(PaddedSevHashTable)) {
+        error_setg(errp, "SEV: guest firmware hashes table area is invalid "
+                         "(base=0x%x size=0x%x)", area->base, area->size);
+        return false;
+    }
+
+    /*
+     * Calculate hash of kernel command-line with the terminating null byte. If
+     * the user doesn't supply a command-line via -append, the 1-byte "\0" will
+     * be used.
+     */
+    hashp = cmdline_hash;
+    if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->cmdline_data,
+                           ctx->cmdline_size, &hashp, &hash_len, errp) < 0) {
+        return false;
+    }
+    assert(hash_len == HASH_SIZE);
+
+    /*
+     * Calculate hash of initrd. If the user doesn't supply an initrd via
+     * -initrd, an empty buffer will be used (ctx->initrd_size == 0).
+     */
+    hashp = initrd_hash;
+    if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->initrd_data,
+                           ctx->initrd_size, &hashp, &hash_len, errp) < 0) {
+        return false;
+    }
+    assert(hash_len == HASH_SIZE);
+
+    /* Calculate hash of the kernel */
+    hashp = kernel_hash;
+    struct iovec iov[2] = {
+        { .iov_base = ctx->setup_data, .iov_len = ctx->setup_size },
+        { .iov_base = ctx->kernel_data, .iov_len = ctx->kernel_size }
+    };
+    if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, iov, ARRAY_SIZE(iov),
+                            &hashp, &hash_len, errp) < 0) {
+        return false;
+    }
+    assert(hash_len == HASH_SIZE);
+
+    /*
+     * Populate the hashes table in the guest's memory at the OVMF-designated
+     * area for the SEV hashes table
+     */
+    padded_ht = address_space_map(&address_space_memory, area->base,
+                                  &mapped_len, true, attrs);
+    if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
+        error_setg(errp, "SEV: cannot map hashes table guest memory area");
+        return false;
+    }
+    ht = &padded_ht->ht;
+
+    ht->guid = sev_hash_table_header_guid;
+    ht->len = sizeof(*ht);
+
+    ht->cmdline.guid = sev_cmdline_entry_guid;
+    ht->cmdline.len = sizeof(ht->cmdline);
+    memcpy(ht->cmdline.hash, cmdline_hash, sizeof(ht->cmdline.hash));
+
+    ht->initrd.guid = sev_initrd_entry_guid;
+    ht->initrd.len = sizeof(ht->initrd);
+    memcpy(ht->initrd.hash, initrd_hash, sizeof(ht->initrd.hash));
+
+    ht->kernel.guid = sev_kernel_entry_guid;
+    ht->kernel.len = sizeof(ht->kernel);
+    memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash));
+
+    /* zero the excess data so the measurement can be reliably calculated */
+    memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
+
+    if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
+        ret = false;
+    }
+
+    address_space_unmap(&address_space_memory, padded_ht,
+                        mapped_len, true, mapped_len);
+
+    return ret;
+}
+
 static void
 sev_register_types(void)
 {
diff --git a/target/i386/sev.h b/target/i386/sev.h
new file mode 100644
index 00000000000..83e82aa42c4
--- /dev/null
+++ b/target/i386/sev.h
@@ -0,0 +1,62 @@
+/*
+ * QEMU Secure Encrypted Virutualization (SEV) support
+ *
+ * Copyright: Advanced Micro Devices, 2016-2018
+ *
+ * Authors:
+ *  Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_SEV_I386_H
+#define QEMU_SEV_I386_H
+
+#ifndef CONFIG_USER_ONLY
+#include CONFIG_DEVICES /* CONFIG_SEV */
+#endif
+
+#include "exec/confidential-guest-support.h"
+
+#define SEV_POLICY_NODBG        0x1
+#define SEV_POLICY_NOKS         0x2
+#define SEV_POLICY_ES           0x4
+#define SEV_POLICY_NOSEND       0x8
+#define SEV_POLICY_DOMAIN       0x10
+#define SEV_POLICY_SEV          0x20
+
+typedef struct SevKernelLoaderContext {
+    char *setup_data;
+    size_t setup_size;
+    char *kernel_data;
+    size_t kernel_size;
+    char *initrd_data;
+    size_t initrd_size;
+    char *cmdline_data;
+    size_t cmdline_size;
+} SevKernelLoaderContext;
+
+#ifdef CONFIG_SEV
+bool sev_enabled(void);
+bool sev_es_enabled(void);
+#else
+#define sev_enabled() 0
+#define sev_es_enabled() 0
+#endif
+
+extern uint32_t sev_get_cbit_position(void);
+extern uint32_t sev_get_reduced_phys_bits(void);
+extern bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
+
+int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
+int sev_inject_launch_secret(const char *hdr, const char *secret,
+                             uint64_t gpa, Error **errp);
+
+int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
+void sev_es_set_reset_vector(CPUState *cpu);
+
+int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
+
+#endif
diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h
deleted file mode 100644
index ae221d4c724..00000000000
--- a/target/i386/sev_i386.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * QEMU Secure Encrypted Virutualization (SEV) support
- *
- * Copyright: Advanced Micro Devices, 2016-2018
- *
- * Authors:
- *  Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_SEV_I386_H
-#define QEMU_SEV_I386_H
-
-#include "qom/object.h"
-#include "qapi/error.h"
-#include "sysemu/kvm.h"
-#include "sysemu/sev.h"
-#include "qemu/error-report.h"
-#include "qapi/qapi-types-misc-target.h"
-
-#define SEV_POLICY_NODBG        0x1
-#define SEV_POLICY_NOKS         0x2
-#define SEV_POLICY_ES           0x4
-#define SEV_POLICY_NOSEND       0x8
-#define SEV_POLICY_DOMAIN       0x10
-#define SEV_POLICY_SEV          0x20
-
-extern bool sev_es_enabled(void);
-extern uint64_t sev_get_me_mask(void);
-extern SevInfo *sev_get_info(void);
-extern uint32_t sev_get_cbit_position(void);
-extern uint32_t sev_get_reduced_phys_bits(void);
-extern char *sev_get_launch_measurement(void);
-extern SevCapability *sev_get_capabilities(Error **errp);
-
-#endif
diff --git a/target/i386/svm.h b/target/i386/svm.h
index ae30fc6f794..f9a785489d8 100644
--- a/target/i386/svm.h
+++ b/target/i386/svm.h
@@ -9,6 +9,12 @@
 #define V_IRQ_SHIFT 8
 #define V_IRQ_MASK (1 << V_IRQ_SHIFT)
 
+#define V_GIF_ENABLED_SHIFT 25
+#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT)
+
+#define V_GIF_SHIFT 9
+#define V_GIF_MASK (1 << V_GIF_SHIFT)
+
 #define V_INTR_PRIO_SHIFT 16
 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
 
@@ -18,6 +24,8 @@
 #define V_INTR_MASKING_SHIFT 24
 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
 
+#define V_VMLOAD_VMSAVE_ENABLED_MASK (1 << 1)
+
 #define SVM_INTERRUPT_SHADOW_MASK 1
 
 #define SVM_IOIO_STR_SHIFT 2
@@ -132,19 +140,14 @@
 
 #define SVM_NPT_ENABLED     (1 << 0)
 
-#define SVM_NPT_PAE         (1 << 0)
-#define SVM_NPT_LMA         (1 << 1)
-#define SVM_NPT_NXE         (1 << 2)
-#define SVM_NPT_PSE         (1 << 3)
-
-#define SVM_NPTEXIT_P       (1ULL << 0)
-#define SVM_NPTEXIT_RW      (1ULL << 1)
-#define SVM_NPTEXIT_US      (1ULL << 2)
-#define SVM_NPTEXIT_RSVD    (1ULL << 3)
-#define SVM_NPTEXIT_ID      (1ULL << 4)
 #define SVM_NPTEXIT_GPA     (1ULL << 32)
 #define SVM_NPTEXIT_GPT     (1ULL << 33)
 
+#define SVM_CR0_RESERVED_MASK 0xffffffff00000000U
+
+#define SVM_MSRPM_SIZE		(1ULL << 13)
+#define SVM_IOPM_SIZE		((1ULL << 13) + 1)
+
 struct QEMU_PACKED vmcb_control_area {
 	uint16_t intercept_cr_read;
 	uint16_t intercept_cr_write;
diff --git a/target/i386/tcg/bpt_helper.c b/target/i386/tcg/bpt_helper.c
index 979230ac12e..b6c1fff16e5 100644
--- a/target/i386/tcg/bpt_helper.c
+++ b/target/i386/tcg/bpt_helper.c
@@ -19,224 +19,10 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "helper-tcg.h"
 
-
-#ifndef CONFIG_USER_ONLY
-static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 1;
-}
-
-static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return (dr7 >> (index * 2)) & 2;
-
-}
-static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
-{
-    return hw_global_breakpoint_enabled(dr7, index) ||
-           hw_local_breakpoint_enabled(dr7, index);
-}
-
-static inline int hw_breakpoint_type(unsigned long dr7, int index)
-{
-    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
-}
-
-static inline int hw_breakpoint_len(unsigned long dr7, int index)
-{
-    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
-    return (len == 2) ? 8 : len + 1;
-}
-
-static int hw_breakpoint_insert(CPUX86State *env, int index)
-{
-    CPUState *cs = env_cpu(env);
-    target_ulong dr7 = env->dr[7];
-    target_ulong drN = env->dr[index];
-    int err = 0;
-
-    switch (hw_breakpoint_type(dr7, index)) {
-    case DR7_TYPE_BP_INST:
-        if (hw_breakpoint_enabled(dr7, index)) {
-            err = cpu_breakpoint_insert(cs, drN, BP_CPU,
-                                        &env->cpu_breakpoint[index]);
-        }
-        break;
-
-    case DR7_TYPE_IO_RW:
-        /* Notice when we should enable calls to bpt_io.  */
-        return hw_breakpoint_enabled(env->dr[7], index)
-               ? HF_IOBPT_MASK : 0;
-
-    case DR7_TYPE_DATA_WR:
-        if (hw_breakpoint_enabled(dr7, index)) {
-            err = cpu_watchpoint_insert(cs, drN,
-                                        hw_breakpoint_len(dr7, index),
-                                        BP_CPU | BP_MEM_WRITE,
-                                        &env->cpu_watchpoint[index]);
-        }
-        break;
-
-    case DR7_TYPE_DATA_RW:
-        if (hw_breakpoint_enabled(dr7, index)) {
-            err = cpu_watchpoint_insert(cs, drN,
-                                        hw_breakpoint_len(dr7, index),
-                                        BP_CPU | BP_MEM_ACCESS,
-                                        &env->cpu_watchpoint[index]);
-        }
-        break;
-    }
-    if (err) {
-        env->cpu_breakpoint[index] = NULL;
-    }
-    return 0;
-}
-
-static void hw_breakpoint_remove(CPUX86State *env, int index)
-{
-    CPUState *cs = env_cpu(env);
-
-    switch (hw_breakpoint_type(env->dr[7], index)) {
-    case DR7_TYPE_BP_INST:
-        if (env->cpu_breakpoint[index]) {
-            cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
-            env->cpu_breakpoint[index] = NULL;
-        }
-        break;
-
-    case DR7_TYPE_DATA_WR:
-    case DR7_TYPE_DATA_RW:
-        if (env->cpu_breakpoint[index]) {
-            cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
-            env->cpu_breakpoint[index] = NULL;
-        }
-        break;
-
-    case DR7_TYPE_IO_RW:
-        /* HF_IOBPT_MASK cleared elsewhere.  */
-        break;
-    }
-}
-
-void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7)
-{
-    target_ulong old_dr7 = env->dr[7];
-    int iobpt = 0;
-    int i;
-
-    new_dr7 |= DR7_FIXED_1;
-
-    /* If nothing is changing except the global/local enable bits,
-       then we can make the change more efficient.  */
-    if (((old_dr7 ^ new_dr7) & ~0xff) == 0) {
-        /* Fold the global and local enable bits together into the
-           global fields, then xor to show which registers have
-           changed collective enable state.  */
-        int mod = ((old_dr7 | old_dr7 * 2) ^ (new_dr7 | new_dr7 * 2)) & 0xff;
-
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            if ((mod & (2 << i * 2)) && !hw_breakpoint_enabled(new_dr7, i)) {
-                hw_breakpoint_remove(env, i);
-            }
-        }
-        env->dr[7] = new_dr7;
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            if (mod & (2 << i * 2) && hw_breakpoint_enabled(new_dr7, i)) {
-                iobpt |= hw_breakpoint_insert(env, i);
-            } else if (hw_breakpoint_type(new_dr7, i) == DR7_TYPE_IO_RW
-                       && hw_breakpoint_enabled(new_dr7, i)) {
-                iobpt |= HF_IOBPT_MASK;
-            }
-        }
-    } else {
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            hw_breakpoint_remove(env, i);
-        }
-        env->dr[7] = new_dr7;
-        for (i = 0; i < DR7_MAX_BP; i++) {
-            iobpt |= hw_breakpoint_insert(env, i);
-        }
-    }
-
-    env->hflags = (env->hflags & ~HF_IOBPT_MASK) | iobpt;
-}
-
-static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
-{
-    target_ulong dr6;
-    int reg;
-    bool hit_enabled = false;
-
-    dr6 = env->dr[6] & ~0xf;
-    for (reg = 0; reg < DR7_MAX_BP; reg++) {
-        bool bp_match = false;
-        bool wp_match = false;
-
-        switch (hw_breakpoint_type(env->dr[7], reg)) {
-        case DR7_TYPE_BP_INST:
-            if (env->dr[reg] == env->eip) {
-                bp_match = true;
-            }
-            break;
-        case DR7_TYPE_DATA_WR:
-        case DR7_TYPE_DATA_RW:
-            if (env->cpu_watchpoint[reg] &&
-                env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) {
-                wp_match = true;
-            }
-            break;
-        case DR7_TYPE_IO_RW:
-            break;
-        }
-        if (bp_match || wp_match) {
-            dr6 |= 1 << reg;
-            if (hw_breakpoint_enabled(env->dr[7], reg)) {
-                hit_enabled = true;
-            }
-        }
-    }
-
-    if (hit_enabled || force_dr6_update) {
-        env->dr[6] = dr6;
-    }
-
-    return hit_enabled;
-}
-
-void breakpoint_handler(CPUState *cs)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    CPUBreakpoint *bp;
-
-    if (cs->watchpoint_hit) {
-        if (cs->watchpoint_hit->flags & BP_CPU) {
-            cs->watchpoint_hit = NULL;
-            if (check_hw_breakpoints(env, false)) {
-                raise_exception(env, EXCP01_DB);
-            } else {
-                cpu_loop_exit_noexc(cs);
-            }
-        }
-    } else {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == env->eip) {
-                if (bp->flags & BP_CPU) {
-                    check_hw_breakpoints(env, true);
-                    raise_exception(env, EXCP01_DB);
-                }
-                break;
-            }
-        }
-    }
-}
-#endif
-
-void helper_single_step(CPUX86State *env)
+void QEMU_NORETURN helper_single_step(CPUX86State *env)
 {
 #ifndef CONFIG_USER_ONLY
     check_hw_breakpoints(env, true);
@@ -251,86 +37,3 @@ void helper_rechecking_single_step(CPUX86State *env)
         helper_single_step(env);
     }
 }
-
-void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
-{
-#ifndef CONFIG_USER_ONLY
-    switch (reg) {
-    case 0: case 1: case 2: case 3:
-        if (hw_breakpoint_enabled(env->dr[7], reg)
-            && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {
-            hw_breakpoint_remove(env, reg);
-            env->dr[reg] = t0;
-            hw_breakpoint_insert(env, reg);
-        } else {
-            env->dr[reg] = t0;
-        }
-        return;
-    case 4:
-        if (env->cr[4] & CR4_DE_MASK) {
-            break;
-        }
-        /* fallthru */
-    case 6:
-        env->dr[6] = t0 | DR6_FIXED_1;
-        return;
-    case 5:
-        if (env->cr[4] & CR4_DE_MASK) {
-            break;
-        }
-        /* fallthru */
-    case 7:
-        cpu_x86_update_dr7(env, t0);
-        return;
-    }
-    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
-#endif
-}
-
-target_ulong helper_get_dr(CPUX86State *env, int reg)
-{
-    switch (reg) {
-    case 0: case 1: case 2: case 3: case 6: case 7:
-        return env->dr[reg];
-    case 4:
-        if (env->cr[4] & CR4_DE_MASK) {
-            break;
-        } else {
-            return env->dr[6];
-        }
-    case 5:
-        if (env->cr[4] & CR4_DE_MASK) {
-            break;
-        } else {
-            return env->dr[7];
-        }
-    }
-    raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
-}
-
-/* Check if Port I/O is trapped by a breakpoint.  */
-void helper_bpt_io(CPUX86State *env, uint32_t port,
-                   uint32_t size, target_ulong next_eip)
-{
-#ifndef CONFIG_USER_ONLY
-    target_ulong dr7 = env->dr[7];
-    int i, hit = 0;
-
-    for (i = 0; i < DR7_MAX_BP; ++i) {
-        if (hw_breakpoint_type(dr7, i) == DR7_TYPE_IO_RW
-            && hw_breakpoint_enabled(dr7, i)) {
-            int bpt_len = hw_breakpoint_len(dr7, i);
-            if (port + size - 1 >= env->dr[i]
-                && port <= env->dr[i] + bpt_len - 1) {
-                hit |= 1 << i;
-            }
-        }
-    }
-
-    if (hit) {
-        env->dr[6] = (env->dr[6] & ~0xf) | hit;
-        env->eip = next_eip;
-        raise_exception(env, EXCP01_DB);
-    }
-#endif
-}
diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index 1e71e44510e..bdae887d0ab 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -25,12 +25,13 @@
 #include "exec/helper-proto.h"
 #include "helper-tcg.h"
 
-void helper_raise_interrupt(CPUX86State *env, int intno, int next_eip_addend)
+void QEMU_NORETURN helper_raise_interrupt(CPUX86State *env, int intno,
+                                          int next_eip_addend)
 {
     raise_interrupt(env, intno, 1, 0, next_eip_addend);
 }
 
-void helper_raise_exception(CPUX86State *env, int exception_index)
+void QEMU_NORETURN helper_raise_exception(CPUX86State *env, int exception_index)
 {
     raise_exception(env, exception_index);
 }
@@ -116,597 +117,25 @@ void QEMU_NORETURN raise_interrupt(CPUX86State *env, int intno, int is_int,
     raise_interrupt2(env, intno, is_int, error_code, next_eip_addend, 0);
 }
 
-void raise_exception_err(CPUX86State *env, int exception_index,
-                         int error_code)
+void QEMU_NORETURN raise_exception_err(CPUX86State *env, int exception_index,
+                                       int error_code)
 {
     raise_interrupt2(env, exception_index, 0, error_code, 0, 0);
 }
 
-void raise_exception_err_ra(CPUX86State *env, int exception_index,
-                            int error_code, uintptr_t retaddr)
+void QEMU_NORETURN raise_exception_err_ra(CPUX86State *env, int exception_index,
+                                          int error_code, uintptr_t retaddr)
 {
     raise_interrupt2(env, exception_index, 0, error_code, 0, retaddr);
 }
 
-void raise_exception(CPUX86State *env, int exception_index)
+void QEMU_NORETURN raise_exception(CPUX86State *env, int exception_index)
 {
     raise_interrupt2(env, exception_index, 0, 0, 0, 0);
 }
 
-void raise_exception_ra(CPUX86State *env, int exception_index, uintptr_t retaddr)
+void QEMU_NORETURN raise_exception_ra(CPUX86State *env, int exception_index,
+                                      uintptr_t retaddr)
 {
     raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
 }
-
-#if !defined(CONFIG_USER_ONLY)
-static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
-                        int *prot)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
-    uint64_t ptep, pte;
-    uint64_t exit_info_1 = 0;
-    target_ulong pde_addr, pte_addr;
-    uint32_t page_offset;
-    int page_size;
-
-    if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
-        return gphys;
-    }
-
-    if (!(env->nested_pg_mode & SVM_NPT_NXE)) {
-        rsvd_mask |= PG_NX_MASK;
-    }
-
-    if (env->nested_pg_mode & SVM_NPT_PAE) {
-        uint64_t pde, pdpe;
-        target_ulong pdpe_addr;
-
-#ifdef TARGET_X86_64
-        if (env->nested_pg_mode & SVM_NPT_LMA) {
-            uint64_t pml5e;
-            uint64_t pml4e_addr, pml4e;
-
-            pml5e = env->nested_cr3;
-            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
-
-            pml4e_addr = (pml5e & PG_ADDRESS_MASK) +
-                    (((gphys >> 39) & 0x1ff) << 3);
-            pml4e = x86_ldq_phys(cs, pml4e_addr);
-            if (!(pml4e & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
-                goto do_fault_rsvd;
-            }
-            if (!(pml4e & PG_ACCESSED_MASK)) {
-                pml4e |= PG_ACCESSED_MASK;
-                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
-            }
-            ptep &= pml4e ^ PG_NX_MASK;
-            pdpe_addr = (pml4e & PG_ADDRESS_MASK) +
-                    (((gphys >> 30) & 0x1ff) << 3);
-            pdpe = x86_ldq_phys(cs, pdpe_addr);
-            if (!(pdpe & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            if (pdpe & rsvd_mask) {
-                goto do_fault_rsvd;
-            }
-            ptep &= pdpe ^ PG_NX_MASK;
-            if (!(pdpe & PG_ACCESSED_MASK)) {
-                pdpe |= PG_ACCESSED_MASK;
-                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
-            }
-            if (pdpe & PG_PSE_MASK) {
-                /* 1 GB page */
-                page_size = 1024 * 1024 * 1024;
-                pte_addr = pdpe_addr;
-                pte = pdpe;
-                goto do_check_protect;
-            }
-        } else
-#endif
-        {
-            pdpe_addr = (env->nested_cr3 & ~0x1f) + ((gphys >> 27) & 0x18);
-            pdpe = x86_ldq_phys(cs, pdpe_addr);
-            if (!(pdpe & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            rsvd_mask |= PG_HI_USER_MASK;
-            if (pdpe & (rsvd_mask | PG_NX_MASK)) {
-                goto do_fault_rsvd;
-            }
-            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
-        }
-
-        pde_addr = (pdpe & PG_ADDRESS_MASK) + (((gphys >> 21) & 0x1ff) << 3);
-        pde = x86_ldq_phys(cs, pde_addr);
-        if (!(pde & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        if (pde & rsvd_mask) {
-            goto do_fault_rsvd;
-        }
-        ptep &= pde ^ PG_NX_MASK;
-        if (pde & PG_PSE_MASK) {
-            /* 2 MB page */
-            page_size = 2048 * 1024;
-            pte_addr = pde_addr;
-            pte = pde;
-            goto do_check_protect;
-        }
-        /* 4 KB page */
-        if (!(pde & PG_ACCESSED_MASK)) {
-            pde |= PG_ACCESSED_MASK;
-            x86_stl_phys_notdirty(cs, pde_addr, pde);
-        }
-        pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3);
-        pte = x86_ldq_phys(cs, pte_addr);
-        if (!(pte & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        if (pte & rsvd_mask) {
-            goto do_fault_rsvd;
-        }
-        /* combine pde and pte nx, user and rw protections */
-        ptep &= pte ^ PG_NX_MASK;
-        page_size = 4096;
-    } else {
-        uint32_t pde;
-
-        /* page directory entry */
-        pde_addr = (env->nested_cr3 & ~0xfff) + ((gphys >> 20) & 0xffc);
-        pde = x86_ldl_phys(cs, pde_addr);
-        if (!(pde & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        ptep = pde | PG_NX_MASK;
-
-        /* if host cr4 PSE bit is set, then we use a 4MB page */
-        if ((pde & PG_PSE_MASK) && (env->nested_pg_mode & SVM_NPT_PSE)) {
-            page_size = 4096 * 1024;
-            pte_addr = pde_addr;
-
-            /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
-             * Leave bits 20-13 in place for setting accessed/dirty bits below.
-             */
-            pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
-            rsvd_mask = 0x200000;
-            goto do_check_protect_pse36;
-        }
-
-        if (!(pde & PG_ACCESSED_MASK)) {
-            pde |= PG_ACCESSED_MASK;
-            x86_stl_phys_notdirty(cs, pde_addr, pde);
-        }
-
-        /* page directory entry */
-        pte_addr = (pde & ~0xfff) + ((gphys >> 10) & 0xffc);
-        pte = x86_ldl_phys(cs, pte_addr);
-        if (!(pte & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        /* combine pde and pte user and rw protections */
-        ptep &= pte | PG_NX_MASK;
-        page_size = 4096;
-        rsvd_mask = 0;
-    }
-
- do_check_protect:
-    rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
- do_check_protect_pse36:
-    if (pte & rsvd_mask) {
-        goto do_fault_rsvd;
-    }
-    ptep ^= PG_NX_MASK;
-
-    if (!(ptep & PG_USER_MASK)) {
-        goto do_fault_protect;
-    }
-    if (ptep & PG_NX_MASK) {
-        if (access_type == MMU_INST_FETCH) {
-            goto do_fault_protect;
-        }
-        *prot &= ~PAGE_EXEC;
-    }
-    if (!(ptep & PG_RW_MASK)) {
-        if (access_type == MMU_DATA_STORE) {
-            goto do_fault_protect;
-        }
-        *prot &= ~PAGE_WRITE;
-    }
-
-    pte &= PG_ADDRESS_MASK & ~(page_size - 1);
-    page_offset = gphys & (page_size - 1);
-    return pte + page_offset;
-
- do_fault_rsvd:
-    exit_info_1 |= SVM_NPTEXIT_RSVD;
- do_fault_protect:
-    exit_info_1 |= SVM_NPTEXIT_P;
- do_fault:
-    x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
-                 gphys);
-    exit_info_1 |= SVM_NPTEXIT_US;
-    if (access_type == MMU_DATA_STORE) {
-        exit_info_1 |= SVM_NPTEXIT_RW;
-    } else if (access_type == MMU_INST_FETCH) {
-        exit_info_1 |= SVM_NPTEXIT_ID;
-    }
-    if (prot) {
-        exit_info_1 |= SVM_NPTEXIT_GPA;
-    } else { /* page table access */
-        exit_info_1 |= SVM_NPTEXIT_GPT;
-    }
-    cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
-}
-
-/* return value:
- * -1 = cannot handle fault
- * 0  = nothing more to do
- * 1  = generate PF fault
- */
-static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
-                            int is_write1, int mmu_idx)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    uint64_t ptep, pte;
-    int32_t a20_mask;
-    target_ulong pde_addr, pte_addr;
-    int error_code = 0;
-    int is_dirty, prot, page_size, is_write, is_user;
-    hwaddr paddr;
-    uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
-    uint32_t page_offset;
-    target_ulong vaddr;
-    uint32_t pkr;
-
-    is_user = mmu_idx == MMU_USER_IDX;
-#if defined(DEBUG_MMU)
-    printf("MMU fault: addr=%" VADDR_PRIx " w=%d u=%d eip=" TARGET_FMT_lx "\n",
-           addr, is_write1, is_user, env->eip);
-#endif
-    is_write = is_write1 & 1;
-
-    a20_mask = x86_get_a20_mask(env);
-    if (!(env->cr[0] & CR0_PG_MASK)) {
-        pte = addr;
-#ifdef TARGET_X86_64
-        if (!(env->hflags & HF_LMA_MASK)) {
-            /* Without long mode we can only address 32bits in real mode */
-            pte = (uint32_t)pte;
-        }
-#endif
-        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-        page_size = 4096;
-        goto do_mapping;
-    }
-
-    if (!(env->efer & MSR_EFER_NXE)) {
-        rsvd_mask |= PG_NX_MASK;
-    }
-
-    if (env->cr[4] & CR4_PAE_MASK) {
-        uint64_t pde, pdpe;
-        target_ulong pdpe_addr;
-
-#ifdef TARGET_X86_64
-        if (env->hflags & HF_LMA_MASK) {
-            bool la57 = env->cr[4] & CR4_LA57_MASK;
-            uint64_t pml5e_addr, pml5e;
-            uint64_t pml4e_addr, pml4e;
-            int32_t sext;
-
-            /* test virtual address sign extension */
-            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
-            if (sext != 0 && sext != -1) {
-                env->error_code = 0;
-                cs->exception_index = EXCP0D_GPF;
-                return 1;
-            }
-
-            if (la57) {
-                pml5e_addr = ((env->cr[3] & ~0xfff) +
-                        (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
-                pml5e_addr = get_hphys(cs, pml5e_addr, MMU_DATA_STORE, NULL);
-                pml5e = x86_ldq_phys(cs, pml5e_addr);
-                if (!(pml5e & PG_PRESENT_MASK)) {
-                    goto do_fault;
-                }
-                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
-                    goto do_fault_rsvd;
-                }
-                if (!(pml5e & PG_ACCESSED_MASK)) {
-                    pml5e |= PG_ACCESSED_MASK;
-                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
-                }
-                ptep = pml5e ^ PG_NX_MASK;
-            } else {
-                pml5e = env->cr[3];
-                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
-            }
-
-            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
-                    (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
-            pml4e_addr = get_hphys(cs, pml4e_addr, MMU_DATA_STORE, false);
-            pml4e = x86_ldq_phys(cs, pml4e_addr);
-            if (!(pml4e & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
-                goto do_fault_rsvd;
-            }
-            if (!(pml4e & PG_ACCESSED_MASK)) {
-                pml4e |= PG_ACCESSED_MASK;
-                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
-            }
-            ptep &= pml4e ^ PG_NX_MASK;
-            pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
-                a20_mask;
-            pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, NULL);
-            pdpe = x86_ldq_phys(cs, pdpe_addr);
-            if (!(pdpe & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            if (pdpe & rsvd_mask) {
-                goto do_fault_rsvd;
-            }
-            ptep &= pdpe ^ PG_NX_MASK;
-            if (!(pdpe & PG_ACCESSED_MASK)) {
-                pdpe |= PG_ACCESSED_MASK;
-                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
-            }
-            if (pdpe & PG_PSE_MASK) {
-                /* 1 GB page */
-                page_size = 1024 * 1024 * 1024;
-                pte_addr = pdpe_addr;
-                pte = pdpe;
-                goto do_check_protect;
-            }
-        } else
-#endif
-        {
-            /* XXX: load them when cr3 is loaded ? */
-            pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
-                a20_mask;
-            pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, false);
-            pdpe = x86_ldq_phys(cs, pdpe_addr);
-            if (!(pdpe & PG_PRESENT_MASK)) {
-                goto do_fault;
-            }
-            rsvd_mask |= PG_HI_USER_MASK;
-            if (pdpe & (rsvd_mask | PG_NX_MASK)) {
-                goto do_fault_rsvd;
-            }
-            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
-        }
-
-        pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
-            a20_mask;
-        pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL);
-        pde = x86_ldq_phys(cs, pde_addr);
-        if (!(pde & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        if (pde & rsvd_mask) {
-            goto do_fault_rsvd;
-        }
-        ptep &= pde ^ PG_NX_MASK;
-        if (pde & PG_PSE_MASK) {
-            /* 2 MB page */
-            page_size = 2048 * 1024;
-            pte_addr = pde_addr;
-            pte = pde;
-            goto do_check_protect;
-        }
-        /* 4 KB page */
-        if (!(pde & PG_ACCESSED_MASK)) {
-            pde |= PG_ACCESSED_MASK;
-            x86_stl_phys_notdirty(cs, pde_addr, pde);
-        }
-        pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
-            a20_mask;
-        pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL);
-        pte = x86_ldq_phys(cs, pte_addr);
-        if (!(pte & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        if (pte & rsvd_mask) {
-            goto do_fault_rsvd;
-        }
-        /* combine pde and pte nx, user and rw protections */
-        ptep &= pte ^ PG_NX_MASK;
-        page_size = 4096;
-    } else {
-        uint32_t pde;
-
-        /* page directory entry */
-        pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
-            a20_mask;
-        pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL);
-        pde = x86_ldl_phys(cs, pde_addr);
-        if (!(pde & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        ptep = pde | PG_NX_MASK;
-
-        /* if PSE bit is set, then we use a 4MB page */
-        if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
-            page_size = 4096 * 1024;
-            pte_addr = pde_addr;
-
-            /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
-             * Leave bits 20-13 in place for setting accessed/dirty bits below.
-             */
-            pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
-            rsvd_mask = 0x200000;
-            goto do_check_protect_pse36;
-        }
-
-        if (!(pde & PG_ACCESSED_MASK)) {
-            pde |= PG_ACCESSED_MASK;
-            x86_stl_phys_notdirty(cs, pde_addr, pde);
-        }
-
-        /* page directory entry */
-        pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
-            a20_mask;
-        pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL);
-        pte = x86_ldl_phys(cs, pte_addr);
-        if (!(pte & PG_PRESENT_MASK)) {
-            goto do_fault;
-        }
-        /* combine pde and pte user and rw protections */
-        ptep &= pte | PG_NX_MASK;
-        page_size = 4096;
-        rsvd_mask = 0;
-    }
-
-do_check_protect:
-    rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
-do_check_protect_pse36:
-    if (pte & rsvd_mask) {
-        goto do_fault_rsvd;
-    }
-    ptep ^= PG_NX_MASK;
-
-    /* can the page can be put in the TLB?  prot will tell us */
-    if (is_user && !(ptep & PG_USER_MASK)) {
-        goto do_fault_protect;
-    }
-
-    prot = 0;
-    if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
-        prot |= PAGE_READ;
-        if ((ptep & PG_RW_MASK) || (!is_user && !(env->cr[0] & CR0_WP_MASK))) {
-            prot |= PAGE_WRITE;
-        }
-    }
-    if (!(ptep & PG_NX_MASK) &&
-        (mmu_idx == MMU_USER_IDX ||
-         !((env->cr[4] & CR4_SMEP_MASK) && (ptep & PG_USER_MASK)))) {
-        prot |= PAGE_EXEC;
-    }
-
-    if (!(env->hflags & HF_LMA_MASK)) {
-        pkr = 0;
-    } else if (ptep & PG_USER_MASK) {
-        pkr = env->cr[4] & CR4_PKE_MASK ? env->pkru : 0;
-    } else {
-        pkr = env->cr[4] & CR4_PKS_MASK ? env->pkrs : 0;
-    }
-    if (pkr) {
-        uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
-        uint32_t pkr_ad = (pkr >> pk * 2) & 1;
-        uint32_t pkr_wd = (pkr >> pk * 2) & 2;
-        uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-
-        if (pkr_ad) {
-            pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
-        } else if (pkr_wd && (is_user || env->cr[0] & CR0_WP_MASK)) {
-            pkr_prot &= ~PAGE_WRITE;
-        }
-
-        prot &= pkr_prot;
-        if ((pkr_prot & (1 << is_write1)) == 0) {
-            assert(is_write1 != 2);
-            error_code |= PG_ERROR_PK_MASK;
-            goto do_fault_protect;
-        }
-    }
-
-    if ((prot & (1 << is_write1)) == 0) {
-        goto do_fault_protect;
-    }
-
-    /* yes, it can! */
-    is_dirty = is_write && !(pte & PG_DIRTY_MASK);
-    if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
-        pte |= PG_ACCESSED_MASK;
-        if (is_dirty) {
-            pte |= PG_DIRTY_MASK;
-        }
-        x86_stl_phys_notdirty(cs, pte_addr, pte);
-    }
-
-    if (!(pte & PG_DIRTY_MASK)) {
-        /* only set write access if already dirty... otherwise wait
-           for dirty access */
-        assert(!is_write);
-        prot &= ~PAGE_WRITE;
-    }
-
- do_mapping:
-    pte = pte & a20_mask;
-
-    /* align to page_size */
-    pte &= PG_ADDRESS_MASK & ~(page_size - 1);
-    page_offset = addr & (page_size - 1);
-    paddr = get_hphys(cs, pte + page_offset, is_write1, &prot);
-
-    /* Even if 4MB pages, we map only one 4KB page in the cache to
-       avoid filling it too fast */
-    vaddr = addr & TARGET_PAGE_MASK;
-    paddr &= TARGET_PAGE_MASK;
-
-    assert(prot & (1 << is_write1));
-    tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
-                            prot, mmu_idx, page_size);
-    return 0;
- do_fault_rsvd:
-    error_code |= PG_ERROR_RSVD_MASK;
- do_fault_protect:
-    error_code |= PG_ERROR_P_MASK;
- do_fault:
-    error_code |= (is_write << PG_ERROR_W_BIT);
-    if (is_user)
-        error_code |= PG_ERROR_U_MASK;
-    if (is_write1 == 2 &&
-        (((env->efer & MSR_EFER_NXE) &&
-          (env->cr[4] & CR4_PAE_MASK)) ||
-         (env->cr[4] & CR4_SMEP_MASK)))
-        error_code |= PG_ERROR_I_D_MASK;
-    if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
-        /* cr2 is not modified in case of exceptions */
-        x86_stq_phys(cs,
-                 env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
-                 addr);
-    } else {
-        env->cr[2] = addr;
-    }
-    env->error_code = error_code;
-    cs->exception_index = EXCP0E_PAGE;
-    return 1;
-}
-#endif
-
-bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
-                      MMUAccessType access_type, int mmu_idx,
-                      bool probe, uintptr_t retaddr)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-
-#ifdef CONFIG_USER_ONLY
-    /* user mode only emulation */
-    env->cr[2] = addr;
-    env->error_code = (access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT;
-    env->error_code |= PG_ERROR_U_MASK;
-    cs->exception_index = EXCP0E_PAGE;
-    env->exception_is_int = 0;
-    env->exception_next_eip = -1;
-    cpu_loop_exit_restore(cs, retaddr);
-#else
-    env->retaddr = retaddr;
-    if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
-        /* FIXME: On error in get_hphys we have already jumped out.  */
-        g_assert(!probe);
-        raise_exception_err_ra(env, cs->exception_index,
-                               env->error_code, retaddr);
-    }
-    return true;
-#endif
-}
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 60ed93520a7..cdd8e9f9471 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -20,18 +20,12 @@
 #include "qemu/osdep.h"
 #include <math.h>
 #include "cpu.h"
+#include "tcg-cpu.h"
 #include "exec/helper-proto.h"
-#include "qemu/host-utils.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "fpu/softfloat.h"
 #include "fpu/softfloat-macros.h"
 #include "helper-tcg.h"
 
-#ifdef CONFIG_SOFTMMU
-#include "hw/irq.h"
-#endif
-
 /* float macros */
 #define FT0    (env->ft0)
 #define ST0    (env->fpregs[env->fpstt].d)
@@ -75,36 +69,6 @@
 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
 
-#if !defined(CONFIG_USER_ONLY)
-static qemu_irq ferr_irq;
-
-void x86_register_ferr_irq(qemu_irq irq)
-{
-    ferr_irq = irq;
-}
-
-static void cpu_clear_ignne(void)
-{
-    CPUX86State *env = &X86_CPU(first_cpu)->env;
-    env->hflags2 &= ~HF2_IGNNE_MASK;
-}
-
-void cpu_set_ignne(void)
-{
-    CPUX86State *env = &X86_CPU(first_cpu)->env;
-    env->hflags2 |= HF2_IGNNE_MASK;
-    /*
-     * We get here in response to a write to port F0h.  The chipset should
-     * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
-     * cleared, because FERR# and FP_IRQ are two separate pins on real
-     * hardware.  However, we don't model FERR# as a qemu_irq, so we just
-     * do directly what the chipset would do, i.e. deassert FP_IRQ.
-     */
-    qemu_irq_lower(ferr_irq);
-}
-#endif
-
-
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -117,8 +81,7 @@ static inline void fpop(CPUX86State *env)
     env->fpstt = (env->fpstt + 1) & 7;
 }
 
-static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
-                                   uintptr_t retaddr)
+static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
 {
     CPU_LDoubleU temp;
 
@@ -127,8 +90,8 @@ static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
     return temp.d;
 }
 
-static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
-                               uintptr_t retaddr)
+static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
+                    uintptr_t retaddr)
 {
     CPU_LDoubleU temp;
 
@@ -203,8 +166,8 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
         raise_exception_ra(env, EXCP10_COPR, retaddr);
     }
 #if !defined(CONFIG_USER_ONLY)
-    else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
-        qemu_irq_raise(ferr_irq);
+    else {
+        fpu_check_raise_ferr_irq(env);
     }
 #endif
 }
@@ -405,14 +368,14 @@ void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
     int new_fpstt;
 
     new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
+    env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
     env->fpstt = new_fpstt;
     env->fptags[new_fpstt] = 0; /* validate stack entry */
 }
 
 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 {
-    helper_fstt(env, ST0, ptr, GETPC());
+    do_fstt(env, ST0, ptr, GETPC());
 }
 
 void helper_fpush(CPUX86State *env)
@@ -711,38 +674,40 @@ uint32_t helper_fnstcw(CPUX86State *env)
 
 void update_fp_status(CPUX86State *env)
 {
-    int rnd_type;
+    FloatRoundMode rnd_mode;
+    FloatX80RoundPrec rnd_prec;
 
     /* set rounding mode */
     switch (env->fpuc & FPU_RC_MASK) {
     default:
     case FPU_RC_NEAR:
-        rnd_type = float_round_nearest_even;
+        rnd_mode = float_round_nearest_even;
         break;
     case FPU_RC_DOWN:
-        rnd_type = float_round_down;
+        rnd_mode = float_round_down;
         break;
     case FPU_RC_UP:
-        rnd_type = float_round_up;
+        rnd_mode = float_round_up;
         break;
     case FPU_RC_CHOP:
-        rnd_type = float_round_to_zero;
+        rnd_mode = float_round_to_zero;
         break;
     }
-    set_float_rounding_mode(rnd_type, &env->fp_status);
+    set_float_rounding_mode(rnd_mode, &env->fp_status);
+
     switch ((env->fpuc >> 8) & 3) {
     case 0:
-        rnd_type = 32;
+        rnd_prec = floatx80_precision_s;
         break;
     case 2:
-        rnd_type = 64;
+        rnd_prec = floatx80_precision_d;
         break;
     case 3:
     default:
-        rnd_type = 80;
+        rnd_prec = floatx80_precision_x;
         break;
     }
-    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+    set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
 }
 
 void helper_fldcw(CPUX86State *env, uint32_t val)
@@ -762,10 +727,14 @@ void helper_fwait(CPUX86State *env)
     }
 }
 
-void helper_fninit(CPUX86State *env)
+static void do_fninit(CPUX86State *env)
 {
     env->fpus = 0;
     env->fpstt = 0;
+    env->fpcs = 0;
+    env->fpds = 0;
+    env->fpip = 0;
+    env->fpdp = 0;
     cpu_set_fpuc(env, 0x37f);
     env->fptags[0] = 1;
     env->fptags[1] = 1;
@@ -777,6 +746,11 @@ void helper_fninit(CPUX86State *env)
     env->fptags[7] = 1;
 }
 
+void helper_fninit(CPUX86State *env)
+{
+    do_fninit(env);
+}
+
 /* BCD ops */
 
 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
@@ -1112,7 +1086,8 @@ void helper_f2xm1(CPUX86State *env)
                             &sig2);
             /* This result is inexact.  */
             sig1 |= 1;
-            ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
+            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                                sign, exp, sig0, sig1,
                                                 &env->fp_status);
         }
     } else {
@@ -1121,9 +1096,10 @@ void helper_f2xm1(CPUX86State *env)
         int32_t n, aexp, bexp;
         uint64_t asig0, asig1, asig2, bsig0, bsig1;
         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
-        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        FloatX80RoundPrec save_prec =
+            env->fp_status.floatx80_rounding_precision;
         env->fp_status.float_rounding_mode = float_round_nearest_even;
-        env->fp_status.floatx80_rounding_precision = 80;
+        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
 
         /* Find the nearest multiple of 1/32 to the argument.  */
         tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
@@ -1221,7 +1197,8 @@ void helper_f2xm1(CPUX86State *env)
             env->fp_status.float_rounding_mode = save_mode;
             /* This result is inexact.  */
             asig1 |= 1;
-            ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
+            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                                asign, aexp, asig0, asig1,
                                                 &env->fp_status);
         }
 
@@ -1339,8 +1316,9 @@ void helper_fpatan(CPUX86State *env)
          * division is exact, the result of fpatan is still inexact
          * (and underflowing where appropriate).
          */
-        signed char save_prec = env->fp_status.floatx80_rounding_precision;
-        env->fp_status.floatx80_rounding_precision = 80;
+        FloatX80RoundPrec save_prec =
+            env->fp_status.floatx80_rounding_precision;
+        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
         ST1 = floatx80_div(ST1, ST0, &env->fp_status);
         env->fp_status.floatx80_rounding_precision = save_prec;
         if (!floatx80_is_zero(ST1) &&
@@ -1359,7 +1337,8 @@ void helper_fpatan(CPUX86State *env)
             if (exp == 0) {
                 normalizeFloatx80Subnormal(sig, &exp, &sig);
             }
-            ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1,
+            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                                sign, exp, sig - 1,
                                                 -1, &env->fp_status);
         }
     } else {
@@ -1415,9 +1394,10 @@ void helper_fpatan(CPUX86State *env)
             uint64_t azsig2, azsig3, axsig0, axsig1;
             floatx80 x8;
             FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
-            signed char save_prec = env->fp_status.floatx80_rounding_precision;
+            FloatX80RoundPrec save_prec =
+                env->fp_status.floatx80_rounding_precision;
             env->fp_status.float_rounding_mode = float_round_nearest_even;
-            env->fp_status.floatx80_rounding_precision = 80;
+            env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
 
             if (arg0_exp == 0) {
                 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
@@ -1486,7 +1466,8 @@ void helper_fpatan(CPUX86State *env)
              * Split x as x = t + y, where t = n/8 is the nearest
              * multiple of 1/8 to x.
              */
-            x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0,
+            x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                               false, xexp + 3, xsig0,
                                                xsig1, &env->fp_status);
             n = floatx80_to_int32(x8, &env->fp_status);
             if (n == 0) {
@@ -1607,7 +1588,7 @@ void helper_fpatan(CPUX86State *env)
                 /* Compute z^2.  */
                 mul128To256(zsig0, zsig1, zsig0, zsig1,
                             &z2sig0, &z2sig1, &z2sig2, &z2sig3);
-                z2 = normalizeRoundAndPackFloatx80(80, false,
+                z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
                                                    zexp + zexp - 0x3ffe,
                                                    z2sig0, z2sig1,
                                                    &env->fp_status);
@@ -1727,7 +1708,7 @@ void helper_fpatan(CPUX86State *env)
         }
         /* This result is inexact.  */
         rsig1 |= 1;
-        ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp,
+        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
                                             rsig0, rsig1, &env->fp_status);
     }
 
@@ -1928,7 +1909,8 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
      */
     mul128To256(tsig0, tsig1, tsig0, tsig1,
                 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
-    t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
+    t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
+                                       texp + texp - 0x3ffe,
                                        t2sig0, t2sig1, &env->fp_status);
 
     /* Compute the lower parts of the polynomial expansion.  */
@@ -2042,15 +2024,17 @@ void helper_fyl2xp1(CPUX86State *env)
         exp += arg1_exp - 0x3ffe;
         /* This result is inexact.  */
         sig1 |= 1;
-        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
+        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                            arg0_sign ^ arg1_sign, exp,
                                             sig0, sig1, &env->fp_status);
     } else {
         int32_t aexp;
         uint64_t asig0, asig1, asig2;
         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
-        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        FloatX80RoundPrec save_prec =
+            env->fp_status.floatx80_rounding_precision;
         env->fp_status.float_rounding_mode = float_round_nearest_even;
-        env->fp_status.floatx80_rounding_precision = 80;
+        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
 
         helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
         /*
@@ -2065,7 +2049,8 @@ void helper_fyl2xp1(CPUX86State *env)
         /* This result is inexact.  */
         asig1 |= 1;
         env->fp_status.float_rounding_mode = save_mode;
-        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
+        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                            arg0_sign ^ arg1_sign, aexp,
                                             asig0, asig1, &env->fp_status);
         env->fp_status.floatx80_rounding_precision = save_prec;
     }
@@ -2149,9 +2134,10 @@ void helper_fyl2x(CPUX86State *env)
         int32_t int_exp;
         floatx80 arg0_m1;
         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
-        signed char save_prec = env->fp_status.floatx80_rounding_precision;
+        FloatX80RoundPrec save_prec =
+            env->fp_status.floatx80_rounding_precision;
         env->fp_status.float_rounding_mode = float_round_nearest_even;
-        env->fp_status.floatx80_rounding_precision = 80;
+        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
 
         if (arg0_exp == 0) {
             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
@@ -2208,7 +2194,8 @@ void helper_fyl2x(CPUX86State *env)
             /* This result is inexact.  */
             asig1 |= 1;
             env->fp_status.float_rounding_mode = save_mode;
-            ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
+            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
+                                                asign ^ arg1_sign, aexp,
                                                 asig0, asig1, &env->fp_status);
         }
 
@@ -2290,12 +2277,12 @@ void helper_fscale(CPUX86State *env)
         }
     } else {
         int n;
-        signed char save = env->fp_status.floatx80_rounding_precision;
+        FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
         uint8_t save_flags = get_float_exception_flags(&env->fp_status);
         set_float_exception_flags(0, &env->fp_status);
         n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
         set_float_exception_flags(save_flags, &env->fp_status);
-        env->fp_status.floatx80_rounding_precision = 80;
+        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
         env->fp_status.floatx80_rounding_precision = save;
     }
@@ -2395,19 +2382,19 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
-        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
-        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
-        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
-        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
+        cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
+        cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
+        cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
+        cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
     } else {
         /* 16 bit */
         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
-        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
-        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
+        cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
+        cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
+        cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
+        cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
     }
 }
 
@@ -2458,58 +2445,59 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
     do_fldenv(env, ptr, data32, GETPC());
 }
 
-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                     uintptr_t retaddr)
 {
     floatx80 tmp;
     int i;
 
-    do_fstenv(env, ptr, data32, GETPC());
+    do_fstenv(env, ptr, data32, retaddr);
 
     ptr += (14 << data32);
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
-        helper_fstt(env, tmp, ptr, GETPC());
+        do_fstt(env, tmp, ptr, retaddr);
         ptr += 10;
     }
 
-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    cpu_set_fpuc(env, 0x37f);
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    do_fninit(env);
 }
 
-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+{
+    do_fsave(env, ptr, data32, GETPC());
+}
+
+static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                      uintptr_t retaddr)
 {
     floatx80 tmp;
     int i;
 
-    do_fldenv(env, ptr, data32, GETPC());
+    do_fldenv(env, ptr, data32, retaddr);
     ptr += (14 << data32);
 
     for (i = 0; i < 8; i++) {
-        tmp = helper_fldt(env, ptr, GETPC());
+        tmp = do_fldt(env, ptr, retaddr);
         ST(i) = tmp;
         ptr += 10;
     }
 }
 
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+{
+    do_frstor(env, ptr, data32, GETPC());
+}
+
 #if defined(CONFIG_USER_ONLY)
 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
 {
-    helper_fsave(env, ptr, data32);
+    do_fsave(env, ptr, data32, 0);
 }
 
 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
 {
-    helper_frstor(env, ptr, data32);
+    do_frstor(env, ptr, data32, 0);
 }
 #endif
 
@@ -2539,7 +2527,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     addr = ptr + XO(legacy.fpregs);
     for (i = 0; i < 8; i++) {
         floatx80 tmp = ST(i);
-        helper_fstt(env, tmp, addr, ra);
+        do_fstt(env, tmp, addr, ra);
         addr += 16;
     }
 }
@@ -2594,10 +2582,8 @@ static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     cpu_stq_data_ra(env, ptr, env->pkru, ra);
 }
 
-void helper_fxsave(CPUX86State *env, target_ulong ptr)
+static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    uintptr_t ra = GETPC();
-
     /* The operand must be 16 byte aligned */
     if (ptr & 0xf) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
@@ -2616,6 +2602,11 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr)
     }
 }
 
+void helper_fxsave(CPUX86State *env, target_ulong ptr)
+{
+    do_fxsave(env, ptr, GETPC());
+}
+
 static uint64_t get_xinuse(CPUX86State *env)
 {
     uint64_t inuse = -1;
@@ -2703,7 +2694,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
     addr = ptr + XO(legacy.fpregs);
     for (i = 0; i < 8; i++) {
-        floatx80 tmp = helper_fldt(env, addr, ra);
+        floatx80 tmp = do_fldt(env, addr, ra);
         ST(i) = tmp;
         addr += 16;
     }
@@ -2758,10 +2749,8 @@ static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
 }
 
-void helper_fxrstor(CPUX86State *env, target_ulong ptr)
+static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    uintptr_t ra = GETPC();
-
     /* The operand must be 16 byte aligned */
     if (ptr & 0xf) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
@@ -2780,15 +2769,20 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
     }
 }
 
+void helper_fxrstor(CPUX86State *env, target_ulong ptr)
+{
+    do_fxrstor(env, ptr, GETPC());
+}
+
 #if defined(CONFIG_USER_ONLY)
 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fxsave(env, ptr);
+    do_fxsave(env, ptr, 0);
 }
 
 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_fxrstor(env, ptr);
+    do_fxrstor(env, ptr, 0);
 }
 #endif
 
@@ -2838,7 +2832,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
         if (xstate_bv & XSTATE_FP_MASK) {
             do_xrstor_fpu(env, ptr, ra);
         } else {
-            helper_fninit(env);
+            do_fninit(env);
             memset(env->fpregs, 0, sizeof(env->fpregs));
         }
     }
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index bcdfca06f69..0a4401e917f 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -38,12 +38,20 @@ QEMU_BUILD_BUG_ON(TCG_PHYS_ADDR_BITS > TARGET_PHYS_ADDR_SPACE_BITS);
  * @cpu: vCPU the interrupt is to be handled by.
  */
 void x86_cpu_do_interrupt(CPUState *cpu);
+#ifndef CONFIG_USER_ONLY
 bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req);
+#endif
 
 /* helper.c */
+#ifdef CONFIG_USER_ONLY
+void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra);
+#else
 bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr);
+#endif
 
 void breakpoint_handler(CPUState *cs);
 
@@ -76,16 +84,27 @@ extern const uint8_t parity_table[256];
 
 /* misc_helper.c */
 void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
+void do_pause(CPUX86State *env) QEMU_NORETURN;
 
-/* svm_helper.c */
+/* sysemu/svm_helper.c */
+#ifndef CONFIG_USER_ONLY
 void QEMU_NORETURN cpu_vmexit(CPUX86State *nenv, uint32_t exit_code,
                               uint64_t exit_info_1, uintptr_t retaddr);
 void do_vmexit(CPUX86State *env);
+#endif
 
 /* seg_helper.c */
 void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw);
+void do_interrupt_all(X86CPU *cpu, int intno, int is_int,
+                      int error_code, target_ulong next_eip, int is_hw);
+void handle_even_inj(CPUX86State *env, int intno, int is_int,
+                     int error_code, int is_hw, int rm);
+int exception_has_error_code(int intno);
 
 /* smm_helper.c */
 void do_smm_enter(X86CPU *cpu);
 
+/* bpt_helper.c */
+bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
+
 #endif /* I386_HELPER_TCG_H */
diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c
index 591f512bffe..a207e624cb9 100644
--- a/target/i386/tcg/mem_helper.c
+++ b/target/i386/tcg/mem_helper.c
@@ -64,22 +64,12 @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
     cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
     newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
 
-#ifdef CONFIG_USER_ONLY
-    {
-        uint64_t *haddr = g2h(env_cpu(env), a0);
-        cmpv = cpu_to_le64(cmpv);
-        newv = cpu_to_le64(newv);
-        oldv = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
-        oldv = le64_to_cpu(oldv);
-    }
-#else
     {
         uintptr_t ra = GETPC();
         int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
-        oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+        MemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+        oldv = cpu_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
     }
-#endif
 
     if (oldv == cmpv) {
         eflags |= CC_Z;
@@ -146,9 +136,8 @@ void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
         Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
 
         int mem_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-        Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv,
-                                                    newv, oi, ra);
+        MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
+        Int128 oldv = cpu_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
 
         if (int128_eq(oldv, cmpv)) {
             eflags |= CC_Z;
diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build
index 6a1a73cdbf0..f9110e890c8 100644
--- a/target/i386/tcg/meson.build
+++ b/target/i386/tcg/meson.build
@@ -8,7 +8,8 @@ i386_ss.add(when: 'CONFIG_TCG', if_true: files(
   'misc_helper.c',
   'mpx_helper.c',
   'seg_helper.c',
-  'smm_helper.c',
-  'svm_helper.c',
   'tcg-cpu.c',
   'translate.c'), if_false: files('tcg-stub.c'))
+
+subdir('sysemu')
+subdir('user')
diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c
index a25428c36e5..5769db5ace8 100644
--- a/target/i386/tcg/misc_helper.c
+++ b/target/i386/tcg/misc_helper.c
@@ -18,12 +18,9 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/address-spaces.h"
 #include "helper-tcg.h"
 
 /*
@@ -39,69 +36,6 @@ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask)
         (eflags & update_mask) | 0x2;
 }
 
-void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "outb: port=0x%04x, data=%02x\n", port, data);
-#else
-    address_space_stb(&address_space_io, port, data,
-                      cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
-target_ulong helper_inb(CPUX86State *env, uint32_t port)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "inb: port=0x%04x\n", port);
-    return 0;
-#else
-    return address_space_ldub(&address_space_io, port,
-                              cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
-void helper_outw(CPUX86State *env, uint32_t port, uint32_t data)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "outw: port=0x%04x, data=%04x\n", port, data);
-#else
-    address_space_stw(&address_space_io, port, data,
-                      cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
-target_ulong helper_inw(CPUX86State *env, uint32_t port)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "inw: port=0x%04x\n", port);
-    return 0;
-#else
-    return address_space_lduw(&address_space_io, port,
-                              cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
-void helper_outl(CPUX86State *env, uint32_t port, uint32_t data)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "outl: port=0x%04x, data=%08x\n", port, data);
-#else
-    address_space_stl(&address_space_io, port, data,
-                      cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
-target_ulong helper_inl(CPUX86State *env, uint32_t port)
-{
-#ifdef CONFIG_USER_ONLY
-    fprintf(stderr, "inl: port=0x%04x\n", port);
-    return 0;
-#else
-    return address_space_ldl(&address_space_io, port,
-                             cpu_get_mem_attrs(env), NULL);
-#endif
-}
-
 void helper_into(CPUX86State *env, int next_eip_addend)
 {
     int eflags;
@@ -126,84 +60,6 @@ void helper_cpuid(CPUX86State *env)
     env->regs[R_EDX] = edx;
 }
 
-#if defined(CONFIG_USER_ONLY)
-target_ulong helper_read_crN(CPUX86State *env, int reg)
-{
-    return 0;
-}
-
-void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
-{
-}
-#else
-target_ulong helper_read_crN(CPUX86State *env, int reg)
-{
-    target_ulong val;
-
-    cpu_svm_check_intercept_param(env, SVM_EXIT_READ_CR0 + reg, 0, GETPC());
-    switch (reg) {
-    default:
-        val = env->cr[reg];
-        break;
-    case 8:
-        if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            val = cpu_get_apic_tpr(env_archcpu(env)->apic_state);
-        } else {
-            val = env->v_tpr;
-        }
-        break;
-    }
-    return val;
-}
-
-void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
-{
-    cpu_svm_check_intercept_param(env, SVM_EXIT_WRITE_CR0 + reg, 0, GETPC());
-    switch (reg) {
-    case 0:
-        cpu_x86_update_cr0(env, t0);
-        break;
-    case 3:
-        cpu_x86_update_cr3(env, t0);
-        break;
-    case 4:
-        if (((t0 ^ env->cr[4]) & CR4_LA57_MASK) &&
-            (env->hflags & HF_CS64_MASK)) {
-             raise_exception_ra(env, EXCP0D_GPF, GETPC());
-        }
-        cpu_x86_update_cr4(env, t0);
-        break;
-    case 8:
-        if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            qemu_mutex_lock_iothread();
-            cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0);
-            qemu_mutex_unlock_iothread();
-        }
-        env->v_tpr = t0 & 0x0f;
-        break;
-    default:
-        env->cr[reg] = t0;
-        break;
-    }
-}
-#endif
-
-void helper_lmsw(CPUX86State *env, target_ulong t0)
-{
-    /* only 4 lower bits of CR0 are modified. PE cannot be set to zero
-       if already set to one. */
-    t0 = (env->cr[0] & ~0xe) | (t0 & 0xf);
-    helper_write_crN(env, 0, t0);
-}
-
-void helper_invlpg(CPUX86State *env, target_ulong addr)
-{
-    X86CPU *cpu = env_archcpu(env);
-
-    cpu_svm_check_intercept_param(env, SVM_EXIT_INVLPG, 0, GETPC());
-    tlb_flush_page(CPU(cpu), addr);
-}
-
 void helper_rdtsc(CPUX86State *env)
 {
     uint64_t val;
@@ -224,7 +80,7 @@ void helper_rdtscp(CPUX86State *env)
     env->regs[R_ECX] = (uint32_t)(env->tsc_aux);
 }
 
-void helper_rdpmc(CPUX86State *env)
+void QEMU_NORETURN helper_rdpmc(CPUX86State *env)
 {
     if (((env->cr[4] & CR4_PCE_MASK) == 0 ) &&
         ((env->hflags & HF_CPL_MASK) != 0)) {
@@ -237,419 +93,21 @@ void helper_rdpmc(CPUX86State *env)
     raise_exception_err(env, EXCP06_ILLOP, 0);
 }
 
-#if defined(CONFIG_USER_ONLY)
-void helper_wrmsr(CPUX86State *env)
-{
-}
-
-void helper_rdmsr(CPUX86State *env)
+void QEMU_NORETURN do_pause(CPUX86State *env)
 {
-}
-#else
-void helper_wrmsr(CPUX86State *env)
-{
-    uint64_t val;
     CPUState *cs = env_cpu(env);
 
-    cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC());
-
-    val = ((uint32_t)env->regs[R_EAX]) |
-        ((uint64_t)((uint32_t)env->regs[R_EDX]) << 32);
-
-    switch ((uint32_t)env->regs[R_ECX]) {
-    case MSR_IA32_SYSENTER_CS:
-        env->sysenter_cs = val & 0xffff;
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        env->sysenter_esp = val;
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        env->sysenter_eip = val;
-        break;
-    case MSR_IA32_APICBASE:
-        cpu_set_apic_base(env_archcpu(env)->apic_state, val);
-        break;
-    case MSR_EFER:
-        {
-            uint64_t update_mask;
-
-            update_mask = 0;
-            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_SYSCALL) {
-                update_mask |= MSR_EFER_SCE;
-            }
-            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
-                update_mask |= MSR_EFER_LME;
-            }
-            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
-                update_mask |= MSR_EFER_FFXSR;
-            }
-            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_NX) {
-                update_mask |= MSR_EFER_NXE;
-            }
-            if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) {
-                update_mask |= MSR_EFER_SVME;
-            }
-            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
-                update_mask |= MSR_EFER_FFXSR;
-            }
-            cpu_load_efer(env, (env->efer & ~update_mask) |
-                          (val & update_mask));
-        }
-        break;
-    case MSR_STAR:
-        env->star = val;
-        break;
-    case MSR_PAT:
-        env->pat = val;
-        break;
-    case MSR_IA32_PKRS:
-        if (val & 0xFFFFFFFF00000000ull) {
-            goto error;
-        }
-        env->pkrs = val;
-        tlb_flush(cs);
-        break;
-    case MSR_VM_HSAVE_PA:
-        env->vm_hsave = val;
-        break;
-#ifdef TARGET_X86_64
-    case MSR_LSTAR:
-        env->lstar = val;
-        break;
-    case MSR_CSTAR:
-        env->cstar = val;
-        break;
-    case MSR_FMASK:
-        env->fmask = val;
-        break;
-    case MSR_FSBASE:
-        env->segs[R_FS].base = val;
-        break;
-    case MSR_GSBASE:
-        env->segs[R_GS].base = val;
-        break;
-    case MSR_KERNELGSBASE:
-        env->kernelgsbase = val;
-        break;
-#endif
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        env->mtrr_var[((uint32_t)env->regs[R_ECX] -
-                       MSR_MTRRphysBase(0)) / 2].base = val;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        env->mtrr_var[((uint32_t)env->regs[R_ECX] -
-                       MSR_MTRRphysMask(0)) / 2].mask = val;
-        break;
-    case MSR_MTRRfix64K_00000:
-        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
-                        MSR_MTRRfix64K_00000] = val;
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
-                        MSR_MTRRfix16K_80000 + 1] = val;
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
-                        MSR_MTRRfix4K_C0000 + 3] = val;
-        break;
-    case MSR_MTRRdefType:
-        env->mtrr_deftype = val;
-        break;
-    case MSR_MCG_STATUS:
-        env->mcg_status = val;
-        break;
-    case MSR_MCG_CTL:
-        if ((env->mcg_cap & MCG_CTL_P)
-            && (val == 0 || val == ~(uint64_t)0)) {
-            env->mcg_ctl = val;
-        }
-        break;
-    case MSR_TSC_AUX:
-        env->tsc_aux = val;
-        break;
-    case MSR_IA32_MISC_ENABLE:
-        env->msr_ia32_misc_enable = val;
-        break;
-    case MSR_IA32_BNDCFGS:
-        /* FIXME: #GP if reserved bits are set.  */
-        /* FIXME: Extend highest implemented bit of linear address.  */
-        env->msr_bndcfgs = val;
-        cpu_sync_bndcs_hflags(env);
-        break;
-    default:
-        if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
-            && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
-            (4 * env->mcg_cap & 0xff)) {
-            uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
-            if ((offset & 0x3) != 0
-                || (val == 0 || val == ~(uint64_t)0)) {
-                env->mce_banks[offset] = val;
-            }
-            break;
-        }
-        /* XXX: exception? */
-        break;
-    }
-    return;
-error:
-    raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
-}
-
-void helper_rdmsr(CPUX86State *env)
-{
-    X86CPU *x86_cpu = env_archcpu(env);
-    uint64_t val;
-
-    cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC());
-
-    switch ((uint32_t)env->regs[R_ECX]) {
-    case MSR_IA32_SYSENTER_CS:
-        val = env->sysenter_cs;
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        val = env->sysenter_esp;
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        val = env->sysenter_eip;
-        break;
-    case MSR_IA32_APICBASE:
-        val = cpu_get_apic_base(env_archcpu(env)->apic_state);
-        break;
-    case MSR_EFER:
-        val = env->efer;
-        break;
-    case MSR_STAR:
-        val = env->star;
-        break;
-    case MSR_PAT:
-        val = env->pat;
-        break;
-    case MSR_IA32_PKRS:
-        val = env->pkrs;
-        break;
-    case MSR_VM_HSAVE_PA:
-        val = env->vm_hsave;
-        break;
-    case MSR_IA32_PERF_STATUS:
-        /* tsc_increment_by_tick */
-        val = 1000ULL;
-        /* CPU multiplier */
-        val |= (((uint64_t)4ULL) << 40);
-        break;
-#ifdef TARGET_X86_64
-    case MSR_LSTAR:
-        val = env->lstar;
-        break;
-    case MSR_CSTAR:
-        val = env->cstar;
-        break;
-    case MSR_FMASK:
-        val = env->fmask;
-        break;
-    case MSR_FSBASE:
-        val = env->segs[R_FS].base;
-        break;
-    case MSR_GSBASE:
-        val = env->segs[R_GS].base;
-        break;
-    case MSR_KERNELGSBASE:
-        val = env->kernelgsbase;
-        break;
-    case MSR_TSC_AUX:
-        val = env->tsc_aux;
-        break;
-#endif
-    case MSR_SMI_COUNT:
-        val = env->msr_smi_count;
-        break;
-    case MSR_MTRRphysBase(0):
-    case MSR_MTRRphysBase(1):
-    case MSR_MTRRphysBase(2):
-    case MSR_MTRRphysBase(3):
-    case MSR_MTRRphysBase(4):
-    case MSR_MTRRphysBase(5):
-    case MSR_MTRRphysBase(6):
-    case MSR_MTRRphysBase(7):
-        val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
-                             MSR_MTRRphysBase(0)) / 2].base;
-        break;
-    case MSR_MTRRphysMask(0):
-    case MSR_MTRRphysMask(1):
-    case MSR_MTRRphysMask(2):
-    case MSR_MTRRphysMask(3):
-    case MSR_MTRRphysMask(4):
-    case MSR_MTRRphysMask(5):
-    case MSR_MTRRphysMask(6):
-    case MSR_MTRRphysMask(7):
-        val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
-                             MSR_MTRRphysMask(0)) / 2].mask;
-        break;
-    case MSR_MTRRfix64K_00000:
-        val = env->mtrr_fixed[0];
-        break;
-    case MSR_MTRRfix16K_80000:
-    case MSR_MTRRfix16K_A0000:
-        val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
-                              MSR_MTRRfix16K_80000 + 1];
-        break;
-    case MSR_MTRRfix4K_C0000:
-    case MSR_MTRRfix4K_C8000:
-    case MSR_MTRRfix4K_D0000:
-    case MSR_MTRRfix4K_D8000:
-    case MSR_MTRRfix4K_E0000:
-    case MSR_MTRRfix4K_E8000:
-    case MSR_MTRRfix4K_F0000:
-    case MSR_MTRRfix4K_F8000:
-        val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
-                              MSR_MTRRfix4K_C0000 + 3];
-        break;
-    case MSR_MTRRdefType:
-        val = env->mtrr_deftype;
-        break;
-    case MSR_MTRRcap:
-        if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
-            val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT |
-                MSR_MTRRcap_WC_SUPPORTED;
-        } else {
-            /* XXX: exception? */
-            val = 0;
-        }
-        break;
-    case MSR_MCG_CAP:
-        val = env->mcg_cap;
-        break;
-    case MSR_MCG_CTL:
-        if (env->mcg_cap & MCG_CTL_P) {
-            val = env->mcg_ctl;
-        } else {
-            val = 0;
-        }
-        break;
-    case MSR_MCG_STATUS:
-        val = env->mcg_status;
-        break;
-    case MSR_IA32_MISC_ENABLE:
-        val = env->msr_ia32_misc_enable;
-        break;
-    case MSR_IA32_BNDCFGS:
-        val = env->msr_bndcfgs;
-        break;
-     case MSR_IA32_UCODE_REV:
-        val = x86_cpu->ucode_rev;
-        break;
-    default:
-        if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
-            && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
-            (4 * env->mcg_cap & 0xff)) {
-            uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
-            val = env->mce_banks[offset];
-            break;
-        }
-        /* XXX: exception? */
-        val = 0;
-        break;
-    }
-    env->regs[R_EAX] = (uint32_t)(val);
-    env->regs[R_EDX] = (uint32_t)(val >> 32);
-}
-#endif
-
-static void do_pause(X86CPU *cpu)
-{
-    CPUState *cs = CPU(cpu);
-
     /* Just let another CPU run.  */
     cs->exception_index = EXCP_INTERRUPT;
     cpu_loop_exit(cs);
 }
 
-static void do_hlt(X86CPU *cpu)
-{
-    CPUState *cs = CPU(cpu);
-    CPUX86State *env = &cpu->env;
-
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
-    cs->halted = 1;
-    cs->exception_index = EXCP_HLT;
-    cpu_loop_exit(cs);
-}
-
-void helper_hlt(CPUX86State *env, int next_eip_addend)
+void QEMU_NORETURN helper_pause(CPUX86State *env, int next_eip_addend)
 {
-    X86CPU *cpu = env_archcpu(env);
-
-    cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC());
-    env->eip += next_eip_addend;
-
-    do_hlt(cpu);
-}
-
-void helper_monitor(CPUX86State *env, target_ulong ptr)
-{
-    if ((uint32_t)env->regs[R_ECX] != 0) {
-        raise_exception_ra(env, EXCP0D_GPF, GETPC());
-    }
-    /* XXX: store address? */
-    cpu_svm_check_intercept_param(env, SVM_EXIT_MONITOR, 0, GETPC());
-}
-
-void helper_mwait(CPUX86State *env, int next_eip_addend)
-{
-    CPUState *cs = env_cpu(env);
-    X86CPU *cpu = env_archcpu(env);
-
-    if ((uint32_t)env->regs[R_ECX] != 0) {
-        raise_exception_ra(env, EXCP0D_GPF, GETPC());
-    }
-    cpu_svm_check_intercept_param(env, SVM_EXIT_MWAIT, 0, GETPC());
-    env->eip += next_eip_addend;
-
-    /* XXX: not complete but not completely erroneous */
-    if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
-        do_pause(cpu);
-    } else {
-        do_hlt(cpu);
-    }
-}
-
-void helper_pause(CPUX86State *env, int next_eip_addend)
-{
-    X86CPU *cpu = env_archcpu(env);
-
     cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC());
     env->eip += next_eip_addend;
 
-    do_pause(cpu);
-}
-
-void helper_debug(CPUX86State *env)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(cs);
+    do_pause(env);
 }
 
 uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx)
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index d180a381d17..baa905a0cd6 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -26,49 +26,7 @@
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "helper-tcg.h"
-
-//#define DEBUG_PCALL
-
-#ifdef DEBUG_PCALL
-# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
-# define LOG_PCALL_STATE(cpu)                                  \
-    log_cpu_state_mask(CPU_LOG_PCALL, (cpu), CPU_DUMP_CCOP)
-#else
-# define LOG_PCALL(...) do { } while (0)
-# define LOG_PCALL_STATE(cpu) do { } while (0)
-#endif
-
-/*
- * TODO: Convert callers to compute cpu_mmu_index_kernel once
- * and use *_mmuidx_ra directly.
- */
-#define cpu_ldub_kernel_ra(e, p, r) \
-    cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-#define cpu_lduw_kernel_ra(e, p, r) \
-    cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-#define cpu_ldl_kernel_ra(e, p, r) \
-    cpu_ldl_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-#define cpu_ldq_kernel_ra(e, p, r) \
-    cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
-
-#define cpu_stb_kernel_ra(e, p, v, r) \
-    cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-#define cpu_stw_kernel_ra(e, p, v, r) \
-    cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-#define cpu_stl_kernel_ra(e, p, v, r) \
-    cpu_stl_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-#define cpu_stq_kernel_ra(e, p, v, r) \
-    cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
-
-#define cpu_ldub_kernel(e, p)    cpu_ldub_kernel_ra(e, p, 0)
-#define cpu_lduw_kernel(e, p)    cpu_lduw_kernel_ra(e, p, 0)
-#define cpu_ldl_kernel(e, p)     cpu_ldl_kernel_ra(e, p, 0)
-#define cpu_ldq_kernel(e, p)     cpu_ldq_kernel_ra(e, p, 0)
-
-#define cpu_stb_kernel(e, p, v)  cpu_stb_kernel_ra(e, p, v, 0)
-#define cpu_stw_kernel(e, p, v)  cpu_stw_kernel_ra(e, p, v, 0)
-#define cpu_stl_kernel(e, p, v)  cpu_stl_kernel_ra(e, p, v, 0)
-#define cpu_stq_kernel(e, p, v)  cpu_stq_kernel_ra(e, p, v, 0)
+#include "seg_helper.h"
 
 /* return non zero if error */
 static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr,
@@ -319,11 +277,10 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector,
         new_eip = cpu_lduw_kernel_ra(env, tss_base + 0x0e, retaddr);
         new_eflags = cpu_lduw_kernel_ra(env, tss_base + 0x10, retaddr);
         for (i = 0; i < 8; i++) {
-            new_regs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x12 + i * 2),
-                                             retaddr) | 0xffff0000;
+            new_regs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x12 + i * 2), retaddr);
         }
         for (i = 0; i < 4; i++) {
-            new_segs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x22 + i * 4),
+            new_segs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x22 + i * 2),
                                              retaddr);
         }
         new_ldt = cpu_lduw_kernel_ra(env, tss_base + 0x2a, retaddr);
@@ -362,7 +319,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector,
     }
 
     /* save the current state in the old TSS */
-    if (type & 8) {
+    if (old_type & 8) {
         /* 32 bit */
         cpu_stl_kernel_ra(env, env->tr.base + 0x20, next_eip, retaddr);
         cpu_stl_kernel_ra(env, env->tr.base + 0x24, old_eflags, retaddr);
@@ -391,7 +348,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector,
         cpu_stw_kernel_ra(env, env->tr.base + (0x12 + 6 * 2), env->regs[R_ESI], retaddr);
         cpu_stw_kernel_ra(env, env->tr.base + (0x12 + 7 * 2), env->regs[R_EDI], retaddr);
         for (i = 0; i < 4; i++) {
-            cpu_stw_kernel_ra(env, env->tr.base + (0x22 + i * 4),
+            cpu_stw_kernel_ra(env, env->tr.base + (0x22 + i * 2),
                               env->segs[i].selector, retaddr);
         }
     }
@@ -433,19 +390,17 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector,
     env->eip = new_eip;
     eflags_mask = TF_MASK | AC_MASK | ID_MASK |
         IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK;
-    if (!(type & 8)) {
-        eflags_mask &= 0xffff;
+    if (type & 8) {
+        cpu_load_eflags(env, new_eflags, eflags_mask);
+        for (i = 0; i < 8; i++) {
+            env->regs[i] = new_regs[i];
+        }
+    } else {
+        cpu_load_eflags(env, new_eflags, eflags_mask & 0xffff);
+        for (i = 0; i < 8; i++) {
+            env->regs[i] = (env->regs[i] & 0xffff0000) | new_regs[i];
+        }
     }
-    cpu_load_eflags(env, new_eflags, eflags_mask);
-    /* XXX: what to do in 16 bit case? */
-    env->regs[R_EAX] = new_regs[0];
-    env->regs[R_ECX] = new_regs[1];
-    env->regs[R_EDX] = new_regs[2];
-    env->regs[R_EBX] = new_regs[3];
-    env->regs[R_ESP] = new_regs[4];
-    env->regs[R_EBP] = new_regs[5];
-    env->regs[R_ESI] = new_regs[6];
-    env->regs[R_EDI] = new_regs[7];
     if (new_eflags & VM_MASK) {
         for (i = 0; i < 6; i++) {
             load_seg_vm(env, i, new_segs[i]);
@@ -531,7 +486,7 @@ static inline unsigned int get_sp_mask(unsigned int e2)
     }
 }
 
-static int exception_has_error_code(int intno)
+int exception_has_error_code(int intno)
 {
     switch (intno) {
     case 8:
@@ -974,75 +929,7 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
                    e2);
     env->eip = offset;
 }
-#endif
-
-#ifdef TARGET_X86_64
-#if defined(CONFIG_USER_ONLY)
-void helper_syscall(CPUX86State *env, int next_eip_addend)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = EXCP_SYSCALL;
-    env->exception_is_int = 0;
-    env->exception_next_eip = env->eip + next_eip_addend;
-    cpu_loop_exit(cs);
-}
-#else
-void helper_syscall(CPUX86State *env, int next_eip_addend)
-{
-    int selector;
 
-    if (!(env->efer & MSR_EFER_SCE)) {
-        raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
-    }
-    selector = (env->star >> 32) & 0xffff;
-    if (env->hflags & HF_LMA_MASK) {
-        int code64;
-
-        env->regs[R_ECX] = env->eip + next_eip_addend;
-        env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK;
-
-        code64 = env->hflags & HF_CS64_MASK;
-
-        env->eflags &= ~(env->fmask | RF_MASK);
-        cpu_load_eflags(env, env->eflags, 0);
-        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
-                           0, 0xffffffff,
-                               DESC_G_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
-                               DESC_L_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_W_MASK | DESC_A_MASK);
-        if (code64) {
-            env->eip = env->lstar;
-        } else {
-            env->eip = env->cstar;
-        }
-    } else {
-        env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend);
-
-        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
-        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
-                           0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_W_MASK | DESC_A_MASK);
-        env->eip = (uint32_t)env->star;
-    }
-}
-#endif
-#endif
-
-#ifdef TARGET_X86_64
 void helper_sysret(CPUX86State *env, int dflag)
 {
     int cpl, selector;
@@ -1095,7 +982,7 @@ void helper_sysret(CPUX86State *env, int dflag)
                                DESC_W_MASK | DESC_A_MASK);
     }
 }
-#endif
+#endif /* TARGET_X86_64 */
 
 /* real mode interrupt */
 static void do_interrupt_real(CPUX86State *env, int intno, int is_int,
@@ -1136,84 +1023,13 @@ static void do_interrupt_real(CPUX86State *env, int intno, int is_int,
     env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK);
 }
 
-#if defined(CONFIG_USER_ONLY)
-/* fake user mode interrupt. is_int is TRUE if coming from the int
- * instruction. next_eip is the env->eip value AFTER the interrupt
- * instruction. It is only relevant if is_int is TRUE or if intno
- * is EXCP_SYSCALL.
- */
-static void do_interrupt_user(CPUX86State *env, int intno, int is_int,
-                              int error_code, target_ulong next_eip)
-{
-    if (is_int) {
-        SegmentCache *dt;
-        target_ulong ptr;
-        int dpl, cpl, shift;
-        uint32_t e2;
-
-        dt = &env->idt;
-        if (env->hflags & HF_LMA_MASK) {
-            shift = 4;
-        } else {
-            shift = 3;
-        }
-        ptr = dt->base + (intno << shift);
-        e2 = cpu_ldl_kernel(env, ptr + 4);
-
-        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-        cpl = env->hflags & HF_CPL_MASK;
-        /* check privilege if software int */
-        if (dpl < cpl) {
-            raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2);
-        }
-    }
-
-    /* Since we emulate only user space, we cannot do more than
-       exiting the emulation with the suitable exception and error
-       code. So update EIP for INT 0x80 and EXCP_SYSCALL. */
-    if (is_int || intno == EXCP_SYSCALL) {
-        env->eip = next_eip;
-    }
-}
-
-#else
-
-static void handle_even_inj(CPUX86State *env, int intno, int is_int,
-                            int error_code, int is_hw, int rm)
-{
-    CPUState *cs = env_cpu(env);
-    uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                                          control.event_inj));
-
-    if (!(event_inj & SVM_EVTINJ_VALID)) {
-        int type;
-
-        if (is_int) {
-            type = SVM_EVTINJ_TYPE_SOFT;
-        } else {
-            type = SVM_EVTINJ_TYPE_EXEPT;
-        }
-        event_inj = intno | type | SVM_EVTINJ_VALID;
-        if (!rm && exception_has_error_code(intno)) {
-            event_inj |= SVM_EVTINJ_VALID_ERR;
-            x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                             control.event_inj_err),
-                     error_code);
-        }
-        x86_stl_phys(cs,
-                 env->vm_vmcb + offsetof(struct vmcb, control.event_inj),
-                 event_inj);
-    }
-}
-#endif
-
 /*
  * Begin execution of an interruption. is_int is TRUE if coming from
  * the int instruction. next_eip is the env->eip value AFTER the interrupt
  * instruction. It is only relevant if is_int is TRUE.
  */
-static void do_interrupt_all(X86CPU *cpu, int intno, int is_int,
-                             int error_code, target_ulong next_eip, int is_hw)
+void do_interrupt_all(X86CPU *cpu, int intno, int is_int,
+                      int error_code, target_ulong next_eip, int is_hw)
 {
     CPUX86State *env = &cpu->env;
 
@@ -1289,107 +1105,11 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int,
 #endif
 }
 
-void x86_cpu_do_interrupt(CPUState *cs)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-
-#if defined(CONFIG_USER_ONLY)
-    /* if user mode only, we simulate a fake exception
-       which will be handled outside the cpu execution
-       loop */
-    do_interrupt_user(env, cs->exception_index,
-                      env->exception_is_int,
-                      env->error_code,
-                      env->exception_next_eip);
-    /* successfully delivered */
-    env->old_exception = -1;
-#else
-    if (cs->exception_index == EXCP_VMEXIT) {
-        assert(env->old_exception == -1);
-        do_vmexit(env);
-    } else {
-        do_interrupt_all(cpu, cs->exception_index,
-                         env->exception_is_int,
-                         env->error_code,
-                         env->exception_next_eip, 0);
-        /* successfully delivered */
-        env->old_exception = -1;
-    }
-#endif
-}
-
 void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw)
 {
     do_interrupt_all(env_archcpu(env), intno, 0, 0, 0, is_hw);
 }
 
-bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-    X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
-    int intno;
-
-    interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request);
-    if (!interrupt_request) {
-        return false;
-    }
-
-    /* Don't process multiple interrupt requests in a single call.
-     * This is required to make icount-driven execution deterministic.
-     */
-    switch (interrupt_request) {
-#if !defined(CONFIG_USER_ONLY)
-    case CPU_INTERRUPT_POLL:
-        cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
-        apic_poll_irq(cpu->apic_state);
-        break;
-#endif
-    case CPU_INTERRUPT_SIPI:
-        do_cpu_sipi(cpu);
-        break;
-    case CPU_INTERRUPT_SMI:
-        cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0);
-        cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
-        do_smm_enter(cpu);
-        break;
-    case CPU_INTERRUPT_NMI:
-        cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0);
-        cs->interrupt_request &= ~CPU_INTERRUPT_NMI;
-        env->hflags2 |= HF2_NMI_MASK;
-        do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
-        break;
-    case CPU_INTERRUPT_MCE:
-        cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
-        do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
-        break;
-    case CPU_INTERRUPT_HARD:
-        cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0);
-        cs->interrupt_request &= ~(CPU_INTERRUPT_HARD |
-                                   CPU_INTERRUPT_VIRQ);
-        intno = cpu_get_pic_interrupt(env);
-        qemu_log_mask(CPU_LOG_TB_IN_ASM,
-                      "Servicing hardware INT=0x%02x\n", intno);
-        do_interrupt_x86_hardirq(env, intno, 1);
-        break;
-#if !defined(CONFIG_USER_ONLY)
-    case CPU_INTERRUPT_VIRQ:
-        /* FIXME: this should respect TPR */
-        cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0);
-        intno = x86_ldl_phys(cs, env->vm_vmcb
-                             + offsetof(struct vmcb, control.int_vector));
-        qemu_log_mask(CPU_LOG_TB_IN_ASM,
-                      "Servicing virtual hardware INT=0x%02x\n", intno);
-        do_interrupt_x86_hardirq(env, intno, 1);
-        cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
-        break;
-#endif
-    }
-
-    /* Ensure that no TB jump will be modified as the program flow was changed.  */
-    return true;
-}
-
 void helper_lldt(CPUX86State *env, int selector)
 {
     SegmentCache *dt;
@@ -2621,62 +2341,3 @@ void helper_verw(CPUX86State *env, target_ulong selector1)
     }
     CC_SRC = eflags | CC_Z;
 }
-
-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_load_seg(CPUX86State *env, X86Seg seg_reg, int selector)
-{
-    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
-        int dpl = (env->eflags & VM_MASK) ? 3 : 0;
-        selector &= 0xffff;
-        cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff,
-                               DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
-                               DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
-    } else {
-        helper_load_seg(env, seg_reg, selector);
-    }
-}
-#endif
-
-/* check if Port I/O is allowed in TSS */
-static inline void check_io(CPUX86State *env, int addr, int size,
-                            uintptr_t retaddr)
-{
-    int io_offset, val, mask;
-
-    /* TSS must be a valid 32 bit one */
-    if (!(env->tr.flags & DESC_P_MASK) ||
-        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
-        env->tr.limit < 103) {
-        goto fail;
-    }
-    io_offset = cpu_lduw_kernel_ra(env, env->tr.base + 0x66, retaddr);
-    io_offset += (addr >> 3);
-    /* Note: the check needs two bytes */
-    if ((io_offset + 1) > env->tr.limit) {
-        goto fail;
-    }
-    val = cpu_lduw_kernel_ra(env, env->tr.base + io_offset, retaddr);
-    val >>= (addr & 7);
-    mask = (1 << size) - 1;
-    /* all bits must be zero to allow the I/O */
-    if ((val & mask) != 0) {
-    fail:
-        raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr);
-    }
-}
-
-void helper_check_iob(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 1, GETPC());
-}
-
-void helper_check_iow(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 2, GETPC());
-}
-
-void helper_check_iol(CPUX86State *env, uint32_t t0)
-{
-    check_io(env, t0, 4, GETPC());
-}
diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h
new file mode 100644
index 00000000000..ebf10352778
--- /dev/null
+++ b/target/i386/tcg/seg_helper.h
@@ -0,0 +1,66 @@
+/*
+ *  x86 segmentation related helpers macros
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef SEG_HELPER_H
+#define SEG_HELPER_H
+
+//#define DEBUG_PCALL
+
+#ifdef DEBUG_PCALL
+# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
+# define LOG_PCALL_STATE(cpu)                                  \
+    log_cpu_state_mask(CPU_LOG_PCALL, (cpu), CPU_DUMP_CCOP)
+#else
+# define LOG_PCALL(...) do { } while (0)
+# define LOG_PCALL_STATE(cpu) do { } while (0)
+#endif
+
+/*
+ * TODO: Convert callers to compute cpu_mmu_index_kernel once
+ * and use *_mmuidx_ra directly.
+ */
+#define cpu_ldub_kernel_ra(e, p, r) \
+    cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
+#define cpu_lduw_kernel_ra(e, p, r) \
+    cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
+#define cpu_ldl_kernel_ra(e, p, r) \
+    cpu_ldl_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
+#define cpu_ldq_kernel_ra(e, p, r) \
+    cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r)
+
+#define cpu_stb_kernel_ra(e, p, v, r) \
+    cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
+#define cpu_stw_kernel_ra(e, p, v, r) \
+    cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
+#define cpu_stl_kernel_ra(e, p, v, r) \
+    cpu_stl_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
+#define cpu_stq_kernel_ra(e, p, v, r) \
+    cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r)
+
+#define cpu_ldub_kernel(e, p)    cpu_ldub_kernel_ra(e, p, 0)
+#define cpu_lduw_kernel(e, p)    cpu_lduw_kernel_ra(e, p, 0)
+#define cpu_ldl_kernel(e, p)     cpu_ldl_kernel_ra(e, p, 0)
+#define cpu_ldq_kernel(e, p)     cpu_ldq_kernel_ra(e, p, 0)
+
+#define cpu_stb_kernel(e, p, v)  cpu_stb_kernel_ra(e, p, v, 0)
+#define cpu_stw_kernel(e, p, v)  cpu_stw_kernel_ra(e, p, v, 0)
+#define cpu_stl_kernel(e, p, v)  cpu_stl_kernel_ra(e, p, v, 0)
+#define cpu_stq_kernel(e, p, v)  cpu_stq_kernel_ra(e, p, v, 0)
+
+#endif /* SEG_HELPER_H */
diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c
new file mode 100644
index 00000000000..4d96a48a3ca
--- /dev/null
+++ b/target/i386/tcg/sysemu/bpt_helper.c
@@ -0,0 +1,298 @@
+/*
+ *  i386 breakpoint helpers - sysemu code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "tcg/helper-tcg.h"
+
+
+static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 1;
+}
+
+static inline bool hw_global_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return (dr7 >> (index * 2)) & 2;
+
+}
+static inline bool hw_breakpoint_enabled(unsigned long dr7, int index)
+{
+    return hw_global_breakpoint_enabled(dr7, index) ||
+           hw_local_breakpoint_enabled(dr7, index);
+}
+
+static inline int hw_breakpoint_type(unsigned long dr7, int index)
+{
+    return (dr7 >> (DR7_TYPE_SHIFT + (index * 4))) & 3;
+}
+
+static inline int hw_breakpoint_len(unsigned long dr7, int index)
+{
+    int len = ((dr7 >> (DR7_LEN_SHIFT + (index * 4))) & 3);
+    return (len == 2) ? 8 : len + 1;
+}
+
+static int hw_breakpoint_insert(CPUX86State *env, int index)
+{
+    CPUState *cs = env_cpu(env);
+    target_ulong dr7 = env->dr[7];
+    target_ulong drN = env->dr[index];
+    int err = 0;
+
+    switch (hw_breakpoint_type(dr7, index)) {
+    case DR7_TYPE_BP_INST:
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_breakpoint_insert(cs, drN, BP_CPU,
+                                        &env->cpu_breakpoint[index]);
+        }
+        break;
+
+    case DR7_TYPE_IO_RW:
+        /* Notice when we should enable calls to bpt_io.  */
+        return hw_breakpoint_enabled(env->dr[7], index)
+               ? HF_IOBPT_MASK : 0;
+
+    case DR7_TYPE_DATA_WR:
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_WRITE,
+                                        &env->cpu_watchpoint[index]);
+        }
+        break;
+
+    case DR7_TYPE_DATA_RW:
+        if (hw_breakpoint_enabled(dr7, index)) {
+            err = cpu_watchpoint_insert(cs, drN,
+                                        hw_breakpoint_len(dr7, index),
+                                        BP_CPU | BP_MEM_ACCESS,
+                                        &env->cpu_watchpoint[index]);
+        }
+        break;
+    }
+    if (err) {
+        env->cpu_breakpoint[index] = NULL;
+    }
+    return 0;
+}
+
+static void hw_breakpoint_remove(CPUX86State *env, int index)
+{
+    CPUState *cs = env_cpu(env);
+
+    switch (hw_breakpoint_type(env->dr[7], index)) {
+    case DR7_TYPE_BP_INST:
+        if (env->cpu_breakpoint[index]) {
+            cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]);
+            env->cpu_breakpoint[index] = NULL;
+        }
+        break;
+
+    case DR7_TYPE_DATA_WR:
+    case DR7_TYPE_DATA_RW:
+        if (env->cpu_watchpoint[index]) {
+            cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]);
+            env->cpu_watchpoint[index] = NULL;
+        }
+        break;
+
+    case DR7_TYPE_IO_RW:
+        /* HF_IOBPT_MASK cleared elsewhere.  */
+        break;
+    }
+}
+
+void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7)
+{
+    target_ulong old_dr7 = env->dr[7];
+    int iobpt = 0;
+    int i;
+
+    new_dr7 |= DR7_FIXED_1;
+
+    /* If nothing is changing except the global/local enable bits,
+       then we can make the change more efficient.  */
+    if (((old_dr7 ^ new_dr7) & ~0xff) == 0) {
+        /* Fold the global and local enable bits together into the
+           global fields, then xor to show which registers have
+           changed collective enable state.  */
+        int mod = ((old_dr7 | old_dr7 * 2) ^ (new_dr7 | new_dr7 * 2)) & 0xff;
+
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if ((mod & (2 << i * 2)) && !hw_breakpoint_enabled(new_dr7, i)) {
+                hw_breakpoint_remove(env, i);
+            }
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            if (mod & (2 << i * 2) && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= hw_breakpoint_insert(env, i);
+            } else if (hw_breakpoint_type(new_dr7, i) == DR7_TYPE_IO_RW
+                       && hw_breakpoint_enabled(new_dr7, i)) {
+                iobpt |= HF_IOBPT_MASK;
+            }
+        }
+    } else {
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            hw_breakpoint_remove(env, i);
+        }
+        env->dr[7] = new_dr7;
+        for (i = 0; i < DR7_MAX_BP; i++) {
+            iobpt |= hw_breakpoint_insert(env, i);
+        }
+    }
+
+    env->hflags = (env->hflags & ~HF_IOBPT_MASK) | iobpt;
+}
+
+bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
+{
+    target_ulong dr6;
+    int reg;
+    bool hit_enabled = false;
+
+    dr6 = env->dr[6] & ~0xf;
+    for (reg = 0; reg < DR7_MAX_BP; reg++) {
+        bool bp_match = false;
+        bool wp_match = false;
+
+        switch (hw_breakpoint_type(env->dr[7], reg)) {
+        case DR7_TYPE_BP_INST:
+            if (env->dr[reg] == env->eip) {
+                bp_match = true;
+            }
+            break;
+        case DR7_TYPE_DATA_WR:
+        case DR7_TYPE_DATA_RW:
+            if (env->cpu_watchpoint[reg] &&
+                env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) {
+                wp_match = true;
+            }
+            break;
+        case DR7_TYPE_IO_RW:
+            break;
+        }
+        if (bp_match || wp_match) {
+            dr6 |= 1 << reg;
+            if (hw_breakpoint_enabled(env->dr[7], reg)) {
+                hit_enabled = true;
+            }
+        }
+    }
+
+    if (hit_enabled || force_dr6_update) {
+        env->dr[6] = dr6;
+    }
+
+    return hit_enabled;
+}
+
+void breakpoint_handler(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    if (cs->watchpoint_hit) {
+        if (cs->watchpoint_hit->flags & BP_CPU) {
+            cs->watchpoint_hit = NULL;
+            if (check_hw_breakpoints(env, false)) {
+                raise_exception(env, EXCP01_DB);
+            } else {
+                cpu_loop_exit_noexc(cs);
+            }
+        }
+    } else {
+        if (cpu_breakpoint_test(cs, env->eip, BP_CPU)) {
+            check_hw_breakpoints(env, true);
+            raise_exception(env, EXCP01_DB);
+        }
+    }
+}
+
+target_ulong helper_get_dr(CPUX86State *env, int reg)
+{
+    if (reg >= 4 && reg < 6) {
+        if (env->cr[4] & CR4_DE_MASK) {
+            raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+        } else {
+            reg += 2;
+        }
+    }
+
+    return env->dr[reg];
+}
+
+void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
+{
+    if (reg >= 4 && reg < 6) {
+        if (env->cr[4] & CR4_DE_MASK) {
+            raise_exception_ra(env, EXCP06_ILLOP, GETPC());
+        } else {
+            reg += 2;
+        }
+    }
+
+    if (reg < 4) {
+        if (hw_breakpoint_enabled(env->dr[7], reg)
+            && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {
+            hw_breakpoint_remove(env, reg);
+            env->dr[reg] = t0;
+            hw_breakpoint_insert(env, reg);
+        } else {
+            env->dr[reg] = t0;
+        }
+    } else {
+        if (t0 & DR_RESERVED_MASK) {
+            raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+        }
+        if (reg == 6) {
+            env->dr[6] = t0 | DR6_FIXED_1;
+        } else {
+            cpu_x86_update_dr7(env, t0);
+        }
+    }
+}
+
+/* Check if Port I/O is trapped by a breakpoint.  */
+void helper_bpt_io(CPUX86State *env, uint32_t port,
+                   uint32_t size, target_ulong next_eip)
+{
+    target_ulong dr7 = env->dr[7];
+    int i, hit = 0;
+
+    for (i = 0; i < DR7_MAX_BP; ++i) {
+        if (hw_breakpoint_type(dr7, i) == DR7_TYPE_IO_RW
+            && hw_breakpoint_enabled(dr7, i)) {
+            int bpt_len = hw_breakpoint_len(dr7, i);
+            if (port + size - 1 >= env->dr[i]
+                && port <= env->dr[i] + bpt_len - 1) {
+                hit |= 1 << i;
+            }
+        }
+    }
+
+    if (hit) {
+        env->dr[6] = (env->dr[6] & ~0xf) | hit;
+        env->eip = next_eip;
+        raise_exception(env, EXCP01_DB);
+    }
+}
diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c
new file mode 100644
index 00000000000..5ba739fbed9
--- /dev/null
+++ b/target/i386/tcg/sysemu/excp_helper.c
@@ -0,0 +1,474 @@
+/*
+ *  x86 exception helpers - sysemu code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/helper-tcg.h"
+
+int get_pg_mode(CPUX86State *env)
+{
+    int pg_mode = 0;
+    if (env->cr[0] & CR0_WP_MASK) {
+        pg_mode |= PG_MODE_WP;
+    }
+    if (env->cr[4] & CR4_PAE_MASK) {
+        pg_mode |= PG_MODE_PAE;
+    }
+    if (env->cr[4] & CR4_PSE_MASK) {
+        pg_mode |= PG_MODE_PSE;
+    }
+    if (env->cr[4] & CR4_PKE_MASK) {
+        pg_mode |= PG_MODE_PKE;
+    }
+    if (env->cr[4] & CR4_PKS_MASK) {
+        pg_mode |= PG_MODE_PKS;
+    }
+    if (env->cr[4] & CR4_SMEP_MASK) {
+        pg_mode |= PG_MODE_SMEP;
+    }
+    if (env->cr[4] & CR4_LA57_MASK) {
+        pg_mode |= PG_MODE_LA57;
+    }
+    if (env->hflags & HF_LMA_MASK) {
+        pg_mode |= PG_MODE_LMA;
+    }
+    if (env->efer & MSR_EFER_NXE) {
+        pg_mode |= PG_MODE_NXE;
+    }
+    return pg_mode;
+}
+
+#define PG_ERROR_OK (-1)
+
+typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
+				int *prot);
+
+#define GET_HPHYS(cs, gpa, access_type, prot)  \
+	(get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
+
+static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
+                         uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
+                         hwaddr *xlat, int *page_size, int *prot)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    uint64_t ptep, pte;
+    int32_t a20_mask;
+    target_ulong pde_addr, pte_addr;
+    int error_code = 0;
+    int is_dirty, is_write, is_user;
+    uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
+    uint32_t page_offset;
+    uint32_t pkr;
+
+    is_user = (mmu_idx == MMU_USER_IDX);
+    is_write = is_write1 & 1;
+    a20_mask = x86_get_a20_mask(env);
+
+    if (!(pg_mode & PG_MODE_NXE)) {
+        rsvd_mask |= PG_NX_MASK;
+    }
+
+    if (pg_mode & PG_MODE_PAE) {
+        uint64_t pde, pdpe;
+        target_ulong pdpe_addr;
+
+#ifdef TARGET_X86_64
+        if (pg_mode & PG_MODE_LMA) {
+            bool la57 = pg_mode & PG_MODE_LA57;
+            uint64_t pml5e_addr, pml5e;
+            uint64_t pml4e_addr, pml4e;
+
+            if (la57) {
+                pml5e_addr = ((cr3 & ~0xfff) +
+                        (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
+                pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
+                pml5e = x86_ldq_phys(cs, pml5e_addr);
+                if (!(pml5e & PG_PRESENT_MASK)) {
+                    goto do_fault;
+                }
+                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
+                    goto do_fault_rsvd;
+                }
+                if (!(pml5e & PG_ACCESSED_MASK)) {
+                    pml5e |= PG_ACCESSED_MASK;
+                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
+                }
+                ptep = pml5e ^ PG_NX_MASK;
+            } else {
+                pml5e = cr3;
+                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+            }
+
+            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
+                    (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
+            pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
+            pml4e = x86_ldq_phys(cs, pml4e_addr);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
+                goto do_fault_rsvd;
+            }
+            if (!(pml4e & PG_ACCESSED_MASK)) {
+                pml4e |= PG_ACCESSED_MASK;
+                x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
+            }
+            ptep &= pml4e ^ PG_NX_MASK;
+            pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
+                a20_mask;
+            pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
+            pdpe = x86_ldq_phys(cs, pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            if (pdpe & rsvd_mask) {
+                goto do_fault_rsvd;
+            }
+            ptep &= pdpe ^ PG_NX_MASK;
+            if (!(pdpe & PG_ACCESSED_MASK)) {
+                pdpe |= PG_ACCESSED_MASK;
+                x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
+            }
+            if (pdpe & PG_PSE_MASK) {
+                /* 1 GB page */
+                *page_size = 1024 * 1024 * 1024;
+                pte_addr = pdpe_addr;
+                pte = pdpe;
+                goto do_check_protect;
+            }
+        } else
+#endif
+        {
+            /* XXX: load them when cr3 is loaded ? */
+            pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
+                a20_mask;
+            pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
+            pdpe = x86_ldq_phys(cs, pdpe_addr);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                goto do_fault;
+            }
+            rsvd_mask |= PG_HI_USER_MASK;
+            if (pdpe & (rsvd_mask | PG_NX_MASK)) {
+                goto do_fault_rsvd;
+            }
+            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+        }
+
+        pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
+            a20_mask;
+        pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
+        pde = x86_ldq_phys(cs, pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        if (pde & rsvd_mask) {
+            goto do_fault_rsvd;
+        }
+        ptep &= pde ^ PG_NX_MASK;
+        if (pde & PG_PSE_MASK) {
+            /* 2 MB page */
+            *page_size = 2048 * 1024;
+            pte_addr = pde_addr;
+            pte = pde;
+            goto do_check_protect;
+        }
+        /* 4 KB page */
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            x86_stl_phys_notdirty(cs, pde_addr, pde);
+        }
+        pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
+            a20_mask;
+        pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
+        pte = x86_ldq_phys(cs, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        if (pte & rsvd_mask) {
+            goto do_fault_rsvd;
+        }
+        /* combine pde and pte nx, user and rw protections */
+        ptep &= pte ^ PG_NX_MASK;
+        *page_size = 4096;
+    } else {
+        uint32_t pde;
+
+        /* page directory entry */
+        pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
+            a20_mask;
+        pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
+        pde = x86_ldl_phys(cs, pde_addr);
+        if (!(pde & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        ptep = pde | PG_NX_MASK;
+
+        /* if PSE bit is set, then we use a 4MB page */
+        if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
+            *page_size = 4096 * 1024;
+            pte_addr = pde_addr;
+
+            /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved.
+             * Leave bits 20-13 in place for setting accessed/dirty bits below.
+             */
+            pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
+            rsvd_mask = 0x200000;
+            goto do_check_protect_pse36;
+        }
+
+        if (!(pde & PG_ACCESSED_MASK)) {
+            pde |= PG_ACCESSED_MASK;
+            x86_stl_phys_notdirty(cs, pde_addr, pde);
+        }
+
+        /* page directory entry */
+        pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
+            a20_mask;
+        pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
+        pte = x86_ldl_phys(cs, pte_addr);
+        if (!(pte & PG_PRESENT_MASK)) {
+            goto do_fault;
+        }
+        /* combine pde and pte user and rw protections */
+        ptep &= pte | PG_NX_MASK;
+        *page_size = 4096;
+        rsvd_mask = 0;
+    }
+
+do_check_protect:
+    rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
+do_check_protect_pse36:
+    if (pte & rsvd_mask) {
+        goto do_fault_rsvd;
+    }
+    ptep ^= PG_NX_MASK;
+
+    /* can the page can be put in the TLB?  prot will tell us */
+    if (is_user && !(ptep & PG_USER_MASK)) {
+        goto do_fault_protect;
+    }
+
+    *prot = 0;
+    if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
+        *prot |= PAGE_READ;
+        if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
+            *prot |= PAGE_WRITE;
+        }
+    }
+    if (!(ptep & PG_NX_MASK) &&
+        (mmu_idx == MMU_USER_IDX ||
+         !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
+        *prot |= PAGE_EXEC;
+    }
+
+    if (!(pg_mode & PG_MODE_LMA)) {
+        pkr = 0;
+    } else if (ptep & PG_USER_MASK) {
+        pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
+    } else {
+        pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
+    }
+    if (pkr) {
+        uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
+        uint32_t pkr_ad = (pkr >> pk * 2) & 1;
+        uint32_t pkr_wd = (pkr >> pk * 2) & 2;
+        uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+
+        if (pkr_ad) {
+            pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
+        } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
+            pkr_prot &= ~PAGE_WRITE;
+        }
+
+        *prot &= pkr_prot;
+        if ((pkr_prot & (1 << is_write1)) == 0) {
+            assert(is_write1 != 2);
+            error_code |= PG_ERROR_PK_MASK;
+            goto do_fault_protect;
+        }
+    }
+
+    if ((*prot & (1 << is_write1)) == 0) {
+        goto do_fault_protect;
+    }
+
+    /* yes, it can! */
+    is_dirty = is_write && !(pte & PG_DIRTY_MASK);
+    if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
+        pte |= PG_ACCESSED_MASK;
+        if (is_dirty) {
+            pte |= PG_DIRTY_MASK;
+        }
+        x86_stl_phys_notdirty(cs, pte_addr, pte);
+    }
+
+    if (!(pte & PG_DIRTY_MASK)) {
+        /* only set write access if already dirty... otherwise wait
+           for dirty access */
+        assert(!is_write);
+        *prot &= ~PAGE_WRITE;
+    }
+
+    pte = pte & a20_mask;
+
+    /* align to page_size */
+    pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
+    page_offset = addr & (*page_size - 1);
+    *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
+    return PG_ERROR_OK;
+
+ do_fault_rsvd:
+    error_code |= PG_ERROR_RSVD_MASK;
+ do_fault_protect:
+    error_code |= PG_ERROR_P_MASK;
+ do_fault:
+    error_code |= (is_write << PG_ERROR_W_BIT);
+    if (is_user)
+        error_code |= PG_ERROR_U_MASK;
+    if (is_write1 == 2 &&
+        (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
+         (pg_mode & PG_MODE_SMEP)))
+        error_code |= PG_ERROR_I_D_MASK;
+    return error_code;
+}
+
+hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
+                        int *prot)
+{
+    CPUX86State *env = &X86_CPU(cs)->env;
+    uint64_t exit_info_1;
+    int page_size;
+    int next_prot;
+    hwaddr hphys;
+
+    if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
+        return gphys;
+    }
+
+    exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
+                               access_type, MMU_USER_IDX, env->nested_pg_mode,
+                               &hphys, &page_size, &next_prot);
+    if (exit_info_1 == PG_ERROR_OK) {
+        if (prot) {
+            *prot &= next_prot;
+        }
+        return hphys;
+    }
+
+    x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+                 gphys);
+    if (prot) {
+        exit_info_1 |= SVM_NPTEXIT_GPA;
+    } else { /* page table access */
+        exit_info_1 |= SVM_NPTEXIT_GPT;
+    }
+    cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
+}
+
+/* return value:
+ * -1 = cannot handle fault
+ * 0  = nothing more to do
+ * 1  = generate PF fault
+ */
+static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
+                            int is_write1, int mmu_idx)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    int error_code = PG_ERROR_OK;
+    int pg_mode, prot, page_size;
+    hwaddr paddr;
+    hwaddr vaddr;
+
+#if defined(DEBUG_MMU)
+    printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
+           addr, is_write1, mmu_idx, env->eip);
+#endif
+
+    if (!(env->cr[0] & CR0_PG_MASK)) {
+        paddr = addr;
+#ifdef TARGET_X86_64
+        if (!(env->hflags & HF_LMA_MASK)) {
+            /* Without long mode we can only address 32bits in real mode */
+            paddr = (uint32_t)paddr;
+        }
+#endif
+        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        page_size = 4096;
+    } else {
+        pg_mode = get_pg_mode(env);
+        if (pg_mode & PG_MODE_LMA) {
+            int32_t sext;
+
+            /* test virtual address sign extension */
+            sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
+            if (sext != 0 && sext != -1) {
+                env->error_code = 0;
+                cs->exception_index = EXCP0D_GPF;
+                return 1;
+            }
+        }
+
+        error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
+                                   mmu_idx, pg_mode,
+                                   &paddr, &page_size, &prot);
+    }
+
+    if (error_code == PG_ERROR_OK) {
+        /* Even if 4MB pages, we map only one 4KB page in the cache to
+           avoid filling it too fast */
+        vaddr = addr & TARGET_PAGE_MASK;
+        paddr &= TARGET_PAGE_MASK;
+
+        assert(prot & (1 << is_write1));
+        tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
+                                prot, mmu_idx, page_size);
+        return 0;
+    } else {
+        if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
+            /* cr2 is not modified in case of exceptions */
+            x86_stq_phys(cs,
+                     env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
+                     addr);
+        } else {
+            env->cr[2] = addr;
+        }
+        env->error_code = error_code;
+        cs->exception_index = EXCP0E_PAGE;
+        return 1;
+    }
+}
+
+bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    env->retaddr = retaddr;
+    if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
+        /* FIXME: On error in get_hphys we have already jumped out.  */
+        g_assert(!probe);
+        raise_exception_err_ra(env, cs->exception_index,
+                               env->error_code, retaddr);
+    }
+    return true;
+}
diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c
new file mode 100644
index 00000000000..1c3610da3b9
--- /dev/null
+++ b/target/i386/tcg/sysemu/fpu_helper.c
@@ -0,0 +1,57 @@
+/*
+ *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers (sysemu code)
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/irq.h"
+
+static qemu_irq ferr_irq;
+
+void x86_register_ferr_irq(qemu_irq irq)
+{
+    ferr_irq = irq;
+}
+
+void fpu_check_raise_ferr_irq(CPUX86State *env)
+{
+    if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
+        qemu_irq_raise(ferr_irq);
+        return;
+    }
+}
+
+void cpu_clear_ignne(void)
+{
+    CPUX86State *env = &X86_CPU(first_cpu)->env;
+    env->hflags2 &= ~HF2_IGNNE_MASK;
+}
+
+void cpu_set_ignne(void)
+{
+    CPUX86State *env = &X86_CPU(first_cpu)->env;
+    env->hflags2 |= HF2_IGNNE_MASK;
+    /*
+     * We get here in response to a write to port F0h.  The chipset should
+     * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
+     * cleared, because FERR# and FP_IRQ are two separate pins on real
+     * hardware.  However, we don't model FERR# as a qemu_irq, so we just
+     * do directly what the chipset would do, i.e. deassert FP_IRQ.
+     */
+    qemu_irq_lower(ferr_irq);
+}
diff --git a/target/i386/tcg/sysemu/meson.build b/target/i386/tcg/sysemu/meson.build
new file mode 100644
index 00000000000..2e444e766a5
--- /dev/null
+++ b/target/i386/tcg/sysemu/meson.build
@@ -0,0 +1,10 @@
+i386_softmmu_ss.add(when: ['CONFIG_TCG', 'CONFIG_SOFTMMU'], if_true: files(
+  'tcg-cpu.c',
+  'smm_helper.c',
+  'excp_helper.c',
+  'bpt_helper.c',
+  'misc_helper.c',
+  'fpu_helper.c',
+  'svm_helper.c',
+  'seg_helper.c',
+))
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
new file mode 100644
index 00000000000..9ccaa054c4c
--- /dev/null
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -0,0 +1,516 @@
+/*
+ *  x86 misc helpers - sysemu code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/address-spaces.h"
+#include "tcg/helper-tcg.h"
+
+void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
+{
+    address_space_stb(&address_space_io, port, data,
+                      cpu_get_mem_attrs(env), NULL);
+}
+
+target_ulong helper_inb(CPUX86State *env, uint32_t port)
+{
+    return address_space_ldub(&address_space_io, port,
+                              cpu_get_mem_attrs(env), NULL);
+}
+
+void helper_outw(CPUX86State *env, uint32_t port, uint32_t data)
+{
+    address_space_stw(&address_space_io, port, data,
+                      cpu_get_mem_attrs(env), NULL);
+}
+
+target_ulong helper_inw(CPUX86State *env, uint32_t port)
+{
+    return address_space_lduw(&address_space_io, port,
+                              cpu_get_mem_attrs(env), NULL);
+}
+
+void helper_outl(CPUX86State *env, uint32_t port, uint32_t data)
+{
+    address_space_stl(&address_space_io, port, data,
+                      cpu_get_mem_attrs(env), NULL);
+}
+
+target_ulong helper_inl(CPUX86State *env, uint32_t port)
+{
+    return address_space_ldl(&address_space_io, port,
+                             cpu_get_mem_attrs(env), NULL);
+}
+
+target_ulong helper_read_crN(CPUX86State *env, int reg)
+{
+    target_ulong val;
+
+    switch (reg) {
+    default:
+        val = env->cr[reg];
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            val = cpu_get_apic_tpr(env_archcpu(env)->apic_state);
+        } else {
+            val = env->int_ctl & V_TPR_MASK;
+        }
+        break;
+    }
+    return val;
+}
+
+void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
+{
+    switch (reg) {
+    case 0:
+        /*
+        * If we reach this point, the CR0 write intercept is disabled.
+        * But we could still exit if the hypervisor has requested the selective
+        * intercept for bits other than TS and MP
+        */
+        if (cpu_svm_has_intercept(env, SVM_EXIT_CR0_SEL_WRITE) &&
+            ((env->cr[0] ^ t0) & ~(CR0_TS_MASK | CR0_MP_MASK))) {
+            cpu_vmexit(env, SVM_EXIT_CR0_SEL_WRITE, 0, GETPC());
+        }
+        cpu_x86_update_cr0(env, t0);
+        break;
+    case 3:
+        if ((env->efer & MSR_EFER_LMA) &&
+                (t0 & ((~0ULL) << env_archcpu(env)->phys_bits))) {
+            cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+        }
+        if (!(env->efer & MSR_EFER_LMA)) {
+            t0 &= 0xffffffffUL;
+        }
+        cpu_x86_update_cr3(env, t0);
+        break;
+    case 4:
+        if (t0 & cr4_reserved_bits(env)) {
+            cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+        }
+        if (((t0 ^ env->cr[4]) & CR4_LA57_MASK) &&
+            (env->hflags & HF_CS64_MASK)) {
+            raise_exception_ra(env, EXCP0D_GPF, GETPC());
+        }
+        cpu_x86_update_cr4(env, t0);
+        break;
+    case 8:
+        if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            qemu_mutex_lock_iothread();
+            cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0);
+            qemu_mutex_unlock_iothread();
+        }
+        env->int_ctl = (env->int_ctl & ~V_TPR_MASK) | (t0 & V_TPR_MASK);
+
+        CPUState *cs = env_cpu(env);
+        if (ctl_has_irq(env)) {
+            cpu_interrupt(cs, CPU_INTERRUPT_VIRQ);
+        } else {
+            cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ);
+        }
+        break;
+    default:
+        env->cr[reg] = t0;
+        break;
+    }
+}
+
+void helper_wrmsr(CPUX86State *env)
+{
+    uint64_t val;
+    CPUState *cs = env_cpu(env);
+
+    cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC());
+
+    val = ((uint32_t)env->regs[R_EAX]) |
+        ((uint64_t)((uint32_t)env->regs[R_EDX]) << 32);
+
+    switch ((uint32_t)env->regs[R_ECX]) {
+    case MSR_IA32_SYSENTER_CS:
+        env->sysenter_cs = val & 0xffff;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        env->sysenter_esp = val;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        env->sysenter_eip = val;
+        break;
+    case MSR_IA32_APICBASE:
+        cpu_set_apic_base(env_archcpu(env)->apic_state, val);
+        break;
+    case MSR_EFER:
+        {
+            uint64_t update_mask;
+
+            update_mask = 0;
+            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_SYSCALL) {
+                update_mask |= MSR_EFER_SCE;
+            }
+            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
+                update_mask |= MSR_EFER_LME;
+            }
+            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
+                update_mask |= MSR_EFER_FFXSR;
+            }
+            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_NX) {
+                update_mask |= MSR_EFER_NXE;
+            }
+            if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) {
+                update_mask |= MSR_EFER_SVME;
+            }
+            if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_FFXSR) {
+                update_mask |= MSR_EFER_FFXSR;
+            }
+            cpu_load_efer(env, (env->efer & ~update_mask) |
+                          (val & update_mask));
+        }
+        break;
+    case MSR_STAR:
+        env->star = val;
+        break;
+    case MSR_PAT:
+        env->pat = val;
+        break;
+    case MSR_IA32_PKRS:
+        if (val & 0xFFFFFFFF00000000ull) {
+            goto error;
+        }
+        env->pkrs = val;
+        tlb_flush(cs);
+        break;
+    case MSR_VM_HSAVE_PA:
+        env->vm_hsave = val;
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        env->lstar = val;
+        break;
+    case MSR_CSTAR:
+        env->cstar = val;
+        break;
+    case MSR_FMASK:
+        env->fmask = val;
+        break;
+    case MSR_FSBASE:
+        env->segs[R_FS].base = val;
+        break;
+    case MSR_GSBASE:
+        env->segs[R_GS].base = val;
+        break;
+    case MSR_KERNELGSBASE:
+        env->kernelgsbase = val;
+        break;
+#endif
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        env->mtrr_var[((uint32_t)env->regs[R_ECX] -
+                       MSR_MTRRphysBase(0)) / 2].base = val;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        env->mtrr_var[((uint32_t)env->regs[R_ECX] -
+                       MSR_MTRRphysMask(0)) / 2].mask = val;
+        break;
+    case MSR_MTRRfix64K_00000:
+        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
+                        MSR_MTRRfix64K_00000] = val;
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
+                        MSR_MTRRfix16K_80000 + 1] = val;
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
+                        MSR_MTRRfix4K_C0000 + 3] = val;
+        break;
+    case MSR_MTRRdefType:
+        env->mtrr_deftype = val;
+        break;
+    case MSR_MCG_STATUS:
+        env->mcg_status = val;
+        break;
+    case MSR_MCG_CTL:
+        if ((env->mcg_cap & MCG_CTL_P)
+            && (val == 0 || val == ~(uint64_t)0)) {
+            env->mcg_ctl = val;
+        }
+        break;
+    case MSR_TSC_AUX:
+        env->tsc_aux = val;
+        break;
+    case MSR_IA32_MISC_ENABLE:
+        env->msr_ia32_misc_enable = val;
+        break;
+    case MSR_IA32_BNDCFGS:
+        /* FIXME: #GP if reserved bits are set.  */
+        /* FIXME: Extend highest implemented bit of linear address.  */
+        env->msr_bndcfgs = val;
+        cpu_sync_bndcs_hflags(env);
+        break;
+    default:
+        if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
+            && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
+            (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
+            if ((offset & 0x3) != 0
+                || (val == 0 || val == ~(uint64_t)0)) {
+                env->mce_banks[offset] = val;
+            }
+            break;
+        }
+        /* XXX: exception? */
+        break;
+    }
+    return;
+error:
+    raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+}
+
+void helper_rdmsr(CPUX86State *env)
+{
+    X86CPU *x86_cpu = env_archcpu(env);
+    uint64_t val;
+
+    cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC());
+
+    switch ((uint32_t)env->regs[R_ECX]) {
+    case MSR_IA32_SYSENTER_CS:
+        val = env->sysenter_cs;
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        val = env->sysenter_esp;
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        val = env->sysenter_eip;
+        break;
+    case MSR_IA32_APICBASE:
+        val = cpu_get_apic_base(env_archcpu(env)->apic_state);
+        break;
+    case MSR_EFER:
+        val = env->efer;
+        break;
+    case MSR_STAR:
+        val = env->star;
+        break;
+    case MSR_PAT:
+        val = env->pat;
+        break;
+    case MSR_IA32_PKRS:
+        val = env->pkrs;
+        break;
+    case MSR_VM_HSAVE_PA:
+        val = env->vm_hsave;
+        break;
+    case MSR_IA32_PERF_STATUS:
+        /* tsc_increment_by_tick */
+        val = 1000ULL;
+        /* CPU multiplier */
+        val |= (((uint64_t)4ULL) << 40);
+        break;
+#ifdef TARGET_X86_64
+    case MSR_LSTAR:
+        val = env->lstar;
+        break;
+    case MSR_CSTAR:
+        val = env->cstar;
+        break;
+    case MSR_FMASK:
+        val = env->fmask;
+        break;
+    case MSR_FSBASE:
+        val = env->segs[R_FS].base;
+        break;
+    case MSR_GSBASE:
+        val = env->segs[R_GS].base;
+        break;
+    case MSR_KERNELGSBASE:
+        val = env->kernelgsbase;
+        break;
+    case MSR_TSC_AUX:
+        val = env->tsc_aux;
+        break;
+#endif
+    case MSR_SMI_COUNT:
+        val = env->msr_smi_count;
+        break;
+    case MSR_MTRRphysBase(0):
+    case MSR_MTRRphysBase(1):
+    case MSR_MTRRphysBase(2):
+    case MSR_MTRRphysBase(3):
+    case MSR_MTRRphysBase(4):
+    case MSR_MTRRphysBase(5):
+    case MSR_MTRRphysBase(6):
+    case MSR_MTRRphysBase(7):
+        val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
+                             MSR_MTRRphysBase(0)) / 2].base;
+        break;
+    case MSR_MTRRphysMask(0):
+    case MSR_MTRRphysMask(1):
+    case MSR_MTRRphysMask(2):
+    case MSR_MTRRphysMask(3):
+    case MSR_MTRRphysMask(4):
+    case MSR_MTRRphysMask(5):
+    case MSR_MTRRphysMask(6):
+    case MSR_MTRRphysMask(7):
+        val = env->mtrr_var[((uint32_t)env->regs[R_ECX] -
+                             MSR_MTRRphysMask(0)) / 2].mask;
+        break;
+    case MSR_MTRRfix64K_00000:
+        val = env->mtrr_fixed[0];
+        break;
+    case MSR_MTRRfix16K_80000:
+    case MSR_MTRRfix16K_A0000:
+        val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
+                              MSR_MTRRfix16K_80000 + 1];
+        break;
+    case MSR_MTRRfix4K_C0000:
+    case MSR_MTRRfix4K_C8000:
+    case MSR_MTRRfix4K_D0000:
+    case MSR_MTRRfix4K_D8000:
+    case MSR_MTRRfix4K_E0000:
+    case MSR_MTRRfix4K_E8000:
+    case MSR_MTRRfix4K_F0000:
+    case MSR_MTRRfix4K_F8000:
+        val = env->mtrr_fixed[(uint32_t)env->regs[R_ECX] -
+                              MSR_MTRRfix4K_C0000 + 3];
+        break;
+    case MSR_MTRRdefType:
+        val = env->mtrr_deftype;
+        break;
+    case MSR_MTRRcap:
+        if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
+            val = MSR_MTRRcap_VCNT | MSR_MTRRcap_FIXRANGE_SUPPORT |
+                MSR_MTRRcap_WC_SUPPORTED;
+        } else {
+            /* XXX: exception? */
+            val = 0;
+        }
+        break;
+    case MSR_MCG_CAP:
+        val = env->mcg_cap;
+        break;
+    case MSR_MCG_CTL:
+        if (env->mcg_cap & MCG_CTL_P) {
+            val = env->mcg_ctl;
+        } else {
+            val = 0;
+        }
+        break;
+    case MSR_MCG_STATUS:
+        val = env->mcg_status;
+        break;
+    case MSR_IA32_MISC_ENABLE:
+        val = env->msr_ia32_misc_enable;
+        break;
+    case MSR_IA32_BNDCFGS:
+        val = env->msr_bndcfgs;
+        break;
+     case MSR_IA32_UCODE_REV:
+        val = x86_cpu->ucode_rev;
+        break;
+    default:
+        if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
+            && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
+            (4 * env->mcg_cap & 0xff)) {
+            uint32_t offset = (uint32_t)env->regs[R_ECX] - MSR_MC0_CTL;
+            val = env->mce_banks[offset];
+            break;
+        }
+        /* XXX: exception? */
+        val = 0;
+        break;
+    }
+    env->regs[R_EAX] = (uint32_t)(val);
+    env->regs[R_EDX] = (uint32_t)(val >> 32);
+}
+
+void helper_flush_page(CPUX86State *env, target_ulong addr)
+{
+    tlb_flush_page(env_cpu(env), addr);
+}
+
+static void QEMU_NORETURN do_hlt(CPUX86State *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    cs->halted = 1;
+    cs->exception_index = EXCP_HLT;
+    cpu_loop_exit(cs);
+}
+
+void QEMU_NORETURN helper_hlt(CPUX86State *env, int next_eip_addend)
+{
+    cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC());
+    env->eip += next_eip_addend;
+
+    do_hlt(env);
+}
+
+void helper_monitor(CPUX86State *env, target_ulong ptr)
+{
+    if ((uint32_t)env->regs[R_ECX] != 0) {
+        raise_exception_ra(env, EXCP0D_GPF, GETPC());
+    }
+    /* XXX: store address? */
+    cpu_svm_check_intercept_param(env, SVM_EXIT_MONITOR, 0, GETPC());
+}
+
+void QEMU_NORETURN helper_mwait(CPUX86State *env, int next_eip_addend)
+{
+    CPUState *cs = env_cpu(env);
+
+    if ((uint32_t)env->regs[R_ECX] != 0) {
+        raise_exception_ra(env, EXCP0D_GPF, GETPC());
+    }
+    cpu_svm_check_intercept_param(env, SVM_EXIT_MWAIT, 0, GETPC());
+    env->eip += next_eip_addend;
+
+    /* XXX: not complete but not completely erroneous */
+    if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
+        do_pause(env);
+    } else {
+        do_hlt(env);
+    }
+}
diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c
new file mode 100644
index 00000000000..bf3444c26b0
--- /dev/null
+++ b/target/i386/tcg/sysemu/seg_helper.c
@@ -0,0 +1,216 @@
+/*
+ *  x86 segmentation related helpers: (sysemu-only code)
+ *  TSS, interrupts, system calls, jumps and call/task gates, descriptors
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "tcg/helper-tcg.h"
+#include "../seg_helper.h"
+
+#ifdef TARGET_X86_64
+void helper_syscall(CPUX86State *env, int next_eip_addend)
+{
+    int selector;
+
+    if (!(env->efer & MSR_EFER_SCE)) {
+        raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC());
+    }
+    selector = (env->star >> 32) & 0xffff;
+    if (env->hflags & HF_LMA_MASK) {
+        int code64;
+
+        env->regs[R_ECX] = env->eip + next_eip_addend;
+        env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK;
+
+        code64 = env->hflags & HF_CS64_MASK;
+
+        env->eflags &= ~(env->fmask | RF_MASK);
+        cpu_load_eflags(env, env->eflags, 0);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                               DESC_L_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        if (code64) {
+            env->eip = env->lstar;
+        } else {
+            env->eip = env->cstar;
+        }
+    } else {
+        env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend);
+
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
+        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
+                           0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_W_MASK | DESC_A_MASK);
+        env->eip = (uint32_t)env->star;
+    }
+}
+#endif /* TARGET_X86_64 */
+
+void handle_even_inj(CPUX86State *env, int intno, int is_int,
+                     int error_code, int is_hw, int rm)
+{
+    CPUState *cs = env_cpu(env);
+    uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
+                                                          control.event_inj));
+
+    if (!(event_inj & SVM_EVTINJ_VALID)) {
+        int type;
+
+        if (is_int) {
+            type = SVM_EVTINJ_TYPE_SOFT;
+        } else {
+            type = SVM_EVTINJ_TYPE_EXEPT;
+        }
+        event_inj = intno | type | SVM_EVTINJ_VALID;
+        if (!rm && exception_has_error_code(intno)) {
+            event_inj |= SVM_EVTINJ_VALID_ERR;
+            x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
+                                             control.event_inj_err),
+                     error_code);
+        }
+        x86_stl_phys(cs,
+                 env->vm_vmcb + offsetof(struct vmcb, control.event_inj),
+                 event_inj);
+    }
+}
+
+void x86_cpu_do_interrupt(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    if (cs->exception_index == EXCP_VMEXIT) {
+        assert(env->old_exception == -1);
+        do_vmexit(env);
+    } else {
+        do_interrupt_all(cpu, cs->exception_index,
+                         env->exception_is_int,
+                         env->error_code,
+                         env->exception_next_eip, 0);
+        /* successfully delivered */
+        env->old_exception = -1;
+    }
+}
+
+bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+    int intno;
+
+    interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request);
+    if (!interrupt_request) {
+        return false;
+    }
+
+    /* Don't process multiple interrupt requests in a single call.
+     * This is required to make icount-driven execution deterministic.
+     */
+    switch (interrupt_request) {
+    case CPU_INTERRUPT_POLL:
+        cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
+        apic_poll_irq(cpu->apic_state);
+        break;
+    case CPU_INTERRUPT_SIPI:
+        do_cpu_sipi(cpu);
+        break;
+    case CPU_INTERRUPT_SMI:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0);
+        cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
+        do_smm_enter(cpu);
+        break;
+    case CPU_INTERRUPT_NMI:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0);
+        cs->interrupt_request &= ~CPU_INTERRUPT_NMI;
+        env->hflags2 |= HF2_NMI_MASK;
+        do_interrupt_x86_hardirq(env, EXCP02_NMI, 1);
+        break;
+    case CPU_INTERRUPT_MCE:
+        cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
+        do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0);
+        break;
+    case CPU_INTERRUPT_HARD:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0);
+        cs->interrupt_request &= ~(CPU_INTERRUPT_HARD |
+                                   CPU_INTERRUPT_VIRQ);
+        intno = cpu_get_pic_interrupt(env);
+        qemu_log_mask(CPU_LOG_TB_IN_ASM,
+                      "Servicing hardware INT=0x%02x\n", intno);
+        do_interrupt_x86_hardirq(env, intno, 1);
+        break;
+    case CPU_INTERRUPT_VIRQ:
+        cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0);
+        intno = x86_ldl_phys(cs, env->vm_vmcb
+                             + offsetof(struct vmcb, control.int_vector));
+        qemu_log_mask(CPU_LOG_TB_IN_ASM,
+                      "Servicing virtual hardware INT=0x%02x\n", intno);
+        do_interrupt_x86_hardirq(env, intno, 1);
+        cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+        env->int_ctl &= ~V_IRQ_MASK;
+        break;
+    }
+
+    /* Ensure that no TB jump will be modified as the program flow was changed.  */
+    return true;
+}
+
+/* check if Port I/O is allowed in TSS */
+void helper_check_io(CPUX86State *env, uint32_t addr, uint32_t size)
+{
+    uintptr_t retaddr = GETPC();
+    uint32_t io_offset, val, mask;
+
+    /* TSS must be a valid 32 bit one */
+    if (!(env->tr.flags & DESC_P_MASK) ||
+        ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 ||
+        env->tr.limit < 103) {
+        goto fail;
+    }
+    io_offset = cpu_lduw_kernel_ra(env, env->tr.base + 0x66, retaddr);
+    io_offset += (addr >> 3);
+    /* Note: the check needs two bytes */
+    if ((io_offset + 1) > env->tr.limit) {
+        goto fail;
+    }
+    val = cpu_lduw_kernel_ra(env, env->tr.base + io_offset, retaddr);
+    val >>= (addr & 7);
+    mask = (1 << size) - 1;
+    /* all bits must be zero to allow the I/O */
+    if ((val & mask) != 0) {
+    fail:
+        raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr);
+    }
+}
diff --git a/target/i386/tcg/smm_helper.c b/target/i386/tcg/sysemu/smm_helper.c
similarity index 98%
rename from target/i386/tcg/smm_helper.c
rename to target/i386/tcg/sysemu/smm_helper.c
index 62d027abd30..a45b5651c36 100644
--- a/target/i386/tcg/smm_helper.c
+++ b/target/i386/tcg/sysemu/smm_helper.c
@@ -1,5 +1,5 @@
 /*
- *  x86 SMM helpers
+ *  x86 SMM helpers (sysemu-only)
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -18,27 +18,14 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/log.h"
-#include "helper-tcg.h"
+#include "tcg/helper-tcg.h"
 
 
 /* SMM support */
 
-#if defined(CONFIG_USER_ONLY)
-
-void do_smm_enter(X86CPU *cpu)
-{
-}
-
-void helper_rsm(CPUX86State *env)
-{
-}
-
-#else
-
 #ifdef TARGET_X86_64
 #define SMM_REVISION_ID 0x00020064
 #else
@@ -330,5 +317,3 @@ void helper_rsm(CPUX86State *env)
     qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
     log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP);
 }
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/i386/tcg/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
similarity index 77%
rename from target/i386/tcg/svm_helper.c
rename to target/i386/tcg/sysemu/svm_helper.c
index 0145afceae0..6d39611eb64 100644
--- a/target/i386/tcg/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -1,5 +1,5 @@
 /*
- *  x86 SVM helpers
+ *  x86 SVM helpers (sysemu only)
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -22,66 +22,10 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
-#include "helper-tcg.h"
+#include "tcg/helper-tcg.h"
 
 /* Secure Virtual Machine helpers */
 
-#if defined(CONFIG_USER_ONLY)
-
-void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
-{
-}
-
-void helper_vmmcall(CPUX86State *env)
-{
-}
-
-void helper_vmload(CPUX86State *env, int aflag)
-{
-}
-
-void helper_vmsave(CPUX86State *env, int aflag)
-{
-}
-
-void helper_stgi(CPUX86State *env)
-{
-}
-
-void helper_clgi(CPUX86State *env)
-{
-}
-
-void helper_skinit(CPUX86State *env)
-{
-}
-
-void helper_invlpga(CPUX86State *env, int aflag)
-{
-}
-
-void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1,
-                uintptr_t retaddr)
-{
-    assert(0);
-}
-
-void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type,
-                                      uint64_t param)
-{
-}
-
-void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type,
-                                   uint64_t param, uintptr_t retaddr)
-{
-}
-
-void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param,
-                         uint32_t next_eip_addend)
-{
-}
-#else
-
 static inline void svm_save_seg(CPUX86State *env, hwaddr addr,
                                 const SegmentCache *sc)
 {
@@ -97,6 +41,16 @@ static inline void svm_save_seg(CPUX86State *env, hwaddr addr,
              ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00));
 }
 
+/*
+ * VMRUN and VMLOAD canonicalizes (i.e., sign-extend to bit 63) all base
+ * addresses in the segment registers that have been loaded.
+ */
+static inline void svm_canonicalization(CPUX86State *env, target_ulong *seg_base)
+{
+    uint16_t shift_amt = 64 - cpu_x86_virtual_addr_width(env);
+    *seg_base = ((((long) *seg_base) << shift_amt) >> shift_amt);
+}
+
 static inline void svm_load_seg(CPUX86State *env, hwaddr addr,
                                 SegmentCache *sc)
 {
@@ -109,6 +63,7 @@ static inline void svm_load_seg(CPUX86State *env, hwaddr addr,
     sc->limit = x86_ldl_phys(cs, addr + offsetof(struct vmcb_seg, limit));
     flags = x86_lduw_phys(cs, addr + offsetof(struct vmcb_seg, attrib));
     sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12);
+    svm_canonicalization(env, &sc->base);
 }
 
 static inline void svm_load_seg_cache(CPUX86State *env, hwaddr addr,
@@ -121,13 +76,85 @@ static inline void svm_load_seg_cache(CPUX86State *env, hwaddr addr,
                            sc->base, sc->limit, sc->flags);
 }
 
+static inline bool is_efer_invalid_state (CPUX86State *env)
+{
+    if (!(env->efer & MSR_EFER_SVME)) {
+        return true;
+    }
+
+    if (env->efer & MSR_EFER_RESERVED) {
+        return true;
+    }
+
+    if ((env->efer & (MSR_EFER_LMA | MSR_EFER_LME)) &&
+            !(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
+        return true;
+    }
+
+    if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK)
+                                && !(env->cr[4] & CR4_PAE_MASK)) {
+        return true;
+    }
+
+    if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK)
+                                && !(env->cr[0] & CR0_PE_MASK)) {
+        return true;
+    }
+
+    if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK)
+                                && (env->cr[4] & CR4_PAE_MASK)
+                                && (env->segs[R_CS].flags & DESC_L_MASK)
+                                && (env->segs[R_CS].flags & DESC_B_MASK)) {
+        return true;
+    }
+
+    return false;
+}
+
+static inline bool virtual_gif_enabled(CPUX86State *env)
+{
+    if (likely(env->hflags & HF_GUEST_MASK)) {
+        return (env->features[FEAT_SVM] & CPUID_SVM_VGIF)
+                    && (env->int_ctl & V_GIF_ENABLED_MASK);
+    }
+    return false;
+}
+
+static inline bool virtual_vm_load_save_enabled(CPUX86State *env, uint32_t exit_code, uintptr_t retaddr)
+{
+    uint64_t lbr_ctl;
+
+    if (likely(env->hflags & HF_GUEST_MASK)) {
+        if (likely(!(env->hflags2 & HF2_NPT_MASK)) || !(env->efer & MSR_EFER_LMA)) {
+            cpu_vmexit(env, exit_code, 0, retaddr);
+        }
+
+        lbr_ctl = x86_ldl_phys(env_cpu(env), env->vm_vmcb + offsetof(struct vmcb,
+                                                  control.lbr_ctl));
+        return (env->features[FEAT_SVM] & CPUID_SVM_V_VMSAVE_VMLOAD)
+                && (lbr_ctl & V_VMLOAD_VMSAVE_ENABLED_MASK);
+
+    }
+
+    return false;
+}
+
+static inline bool virtual_gif_set(CPUX86State *env)
+{
+    return !virtual_gif_enabled(env) || (env->int_ctl & V_GIF_MASK);
+}
+
 void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
 {
     CPUState *cs = env_cpu(env);
+    X86CPU *cpu = env_archcpu(env);
     target_ulong addr;
     uint64_t nested_ctl;
     uint32_t event_inj;
-    uint32_t int_ctl;
+    uint32_t asid;
+    uint64_t new_cr0;
+    uint64_t new_cr3;
+    uint64_t new_cr4;
 
     cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC());
 
@@ -210,27 +237,39 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
 
     nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
                                                           control.nested_ctl));
+    asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
+                                                          control.asid));
+
+    uint64_t msrpm_base_pa = x86_ldq_phys(cs, env->vm_vmcb +
+                                    offsetof(struct vmcb,
+                                            control.msrpm_base_pa));
+    uint64_t iopm_base_pa = x86_ldq_phys(cs, env->vm_vmcb +
+                                 offsetof(struct vmcb, control.iopm_base_pa));
+
+    if ((msrpm_base_pa & ~0xfff) >= (1ull << cpu->phys_bits) - SVM_MSRPM_SIZE) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+
+    if ((iopm_base_pa & ~0xfff) >= (1ull << cpu->phys_bits) - SVM_IOPM_SIZE) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
 
     env->nested_pg_mode = 0;
 
+    if (!cpu_svm_has_intercept(env, SVM_EXIT_VMRUN)) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+    if (asid == 0) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+
     if (nested_ctl & SVM_NPT_ENABLED) {
         env->nested_cr3 = x86_ldq_phys(cs,
                                 env->vm_vmcb + offsetof(struct vmcb,
                                                         control.nested_cr3));
         env->hflags2 |= HF2_NPT_MASK;
 
-        if (env->cr[4] & CR4_PAE_MASK) {
-            env->nested_pg_mode |= SVM_NPT_PAE;
-        }
-        if (env->cr[4] & CR4_PSE_MASK) {
-            env->nested_pg_mode |= SVM_NPT_PSE;
-        }
-        if (env->hflags & HF_LMA_MASK) {
-            env->nested_pg_mode |= SVM_NPT_LMA;
-        }
-        if (env->efer & MSR_EFER_NXE) {
-            env->nested_pg_mode |= SVM_NPT_NXE;
-        }
+        env->nested_pg_mode = get_pg_mode(env) & PG_MODE_SVM_MASK;
     }
 
     /* enable intercepts */
@@ -239,36 +278,35 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
     env->tsc_offset = x86_ldq_phys(cs, env->vm_vmcb +
                                offsetof(struct vmcb, control.tsc_offset));
 
-    env->gdt.base  = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                                      save.gdtr.base));
-    env->gdt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                                      save.gdtr.limit));
-
-    env->idt.base  = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                                      save.idtr.base));
-    env->idt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
-                                                      save.idtr.limit));
-
+    new_cr0 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr0));
+    if (new_cr0 & SVM_CR0_RESERVED_MASK) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+    if ((new_cr0 & CR0_NW_MASK) && !(new_cr0 & CR0_CD_MASK)) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+    new_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr3));
+    if ((env->efer & MSR_EFER_LMA) &&
+            (new_cr3 & ((~0ULL) << cpu->phys_bits))) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+    new_cr4 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4));
+    if (new_cr4 & cr4_reserved_bits(env)) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
     /* clear exit_info_2 so we behave like the real hardware */
     x86_stq_phys(cs,
              env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0);
 
-    cpu_x86_update_cr0(env, x86_ldq_phys(cs,
-                                     env->vm_vmcb + offsetof(struct vmcb,
-                                                             save.cr0)));
-    cpu_x86_update_cr4(env, x86_ldq_phys(cs,
-                                     env->vm_vmcb + offsetof(struct vmcb,
-                                                             save.cr4)));
-    cpu_x86_update_cr3(env, x86_ldq_phys(cs,
-                                     env->vm_vmcb + offsetof(struct vmcb,
-                                                             save.cr3)));
+    cpu_x86_update_cr0(env, new_cr0);
+    cpu_x86_update_cr4(env, new_cr4);
+    cpu_x86_update_cr3(env, new_cr3);
     env->cr[2] = x86_ldq_phys(cs,
                           env->vm_vmcb + offsetof(struct vmcb, save.cr2));
-    int_ctl = x86_ldl_phys(cs,
+    env->int_ctl = x86_ldl_phys(cs,
                        env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
     env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK);
-    if (int_ctl & V_INTR_MASKING_MASK) {
-        env->v_tpr = int_ctl & V_TPR_MASK;
+    if (env->int_ctl & V_INTR_MASKING_MASK) {
         env->hflags2 |= HF2_VINTR_MASK;
         if (env->eflags & IF_MASK) {
             env->hflags2 |= HF2_HIF_MASK;
@@ -292,6 +330,10 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                        R_SS);
     svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ds),
                        R_DS);
+    svm_load_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.idtr),
+                       &env->idt);
+    svm_load_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.gdtr),
+                       &env->gdt);
 
     env->eip = x86_ldq_phys(cs,
                         env->vm_vmcb + offsetof(struct vmcb, save.rip));
@@ -305,7 +347,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
     env->dr[6] = x86_ldq_phys(cs,
                           env->vm_vmcb + offsetof(struct vmcb, save.dr6));
 
-    /* FIXME: guest state consistency checks */
+#ifdef TARGET_X86_64
+    if (env->dr[6] & DR_RESERVED_MASK) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+    if (env->dr[7] & DR_RESERVED_MASK) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
+#endif
+
+    if (is_efer_invalid_state(env)) {
+        cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+    }
 
     switch (x86_ldub_phys(cs,
                       env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
@@ -319,12 +372,16 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
 
     env->hflags2 |= HF2_GIF_MASK;
 
-    if (int_ctl & V_IRQ_MASK) {
+    if (ctl_has_irq(env)) {
         CPUState *cs = env_cpu(env);
 
         cs->interrupt_request |= CPU_INTERRUPT_VIRQ;
     }
 
+    if (virtual_gif_set(env)) {
+        env->hflags2 |= HF2_VGIF_MASK;
+    }
+
     /* maybe we need to inject an event */
     event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
                                                  control.event_inj));
@@ -356,6 +413,9 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
             cpu_loop_exit(cs);
             break;
         case SVM_EVTINJ_TYPE_EXEPT:
+            if (vector == EXCP02_NMI || vector >= 31)  {
+                cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+            }
             cs->exception_index = vector;
             env->error_code = event_inj_err;
             env->exception_is_int = 0;
@@ -371,6 +431,9 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
             qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT");
             cpu_loop_exit(cs);
             break;
+        default:
+            cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
+            break;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", cs->exception_index,
                       env->error_code);
@@ -387,6 +450,7 @@ void helper_vmload(CPUX86State *env, int aflag)
 {
     CPUState *cs = env_cpu(env);
     target_ulong addr;
+    int prot;
 
     cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC());
 
@@ -396,6 +460,10 @@ void helper_vmload(CPUX86State *env, int aflag)
         addr = (uint32_t)env->regs[R_EAX];
     }
 
+    if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMLOAD, GETPC())) {
+        addr = get_hphys(cs, addr, MMU_DATA_LOAD, &prot);
+    }
+
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx
                   "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
                   addr, x86_ldq_phys(cs, addr + offsetof(struct vmcb,
@@ -413,6 +481,7 @@ void helper_vmload(CPUX86State *env, int aflag)
     env->lstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.lstar));
     env->cstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.cstar));
     env->fmask = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sfmask));
+    svm_canonicalization(env, &env->kernelgsbase);
 #endif
     env->star = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.star));
     env->sysenter_cs = x86_ldq_phys(cs,
@@ -421,12 +490,14 @@ void helper_vmload(CPUX86State *env, int aflag)
                                                  save.sysenter_esp));
     env->sysenter_eip = x86_ldq_phys(cs, addr + offsetof(struct vmcb,
                                                  save.sysenter_eip));
+
 }
 
 void helper_vmsave(CPUX86State *env, int aflag)
 {
     CPUState *cs = env_cpu(env);
     target_ulong addr;
+    int prot;
 
     cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC());
 
@@ -436,6 +507,10 @@ void helper_vmsave(CPUX86State *env, int aflag)
         addr = (uint32_t)env->regs[R_EAX];
     }
 
+    if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMSAVE, GETPC())) {
+        addr = get_hphys(cs, addr, MMU_DATA_STORE, &prot);
+    }
+
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx
                   "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n",
                   addr, x86_ldq_phys(cs,
@@ -470,120 +545,117 @@ void helper_vmsave(CPUX86State *env, int aflag)
 void helper_stgi(CPUX86State *env)
 {
     cpu_svm_check_intercept_param(env, SVM_EXIT_STGI, 0, GETPC());
-    env->hflags2 |= HF2_GIF_MASK;
+
+    if (virtual_gif_enabled(env)) {
+        env->int_ctl |= V_GIF_MASK;
+        env->hflags2 |= HF2_VGIF_MASK;
+    } else {
+        env->hflags2 |= HF2_GIF_MASK;
+    }
 }
 
 void helper_clgi(CPUX86State *env)
 {
     cpu_svm_check_intercept_param(env, SVM_EXIT_CLGI, 0, GETPC());
-    env->hflags2 &= ~HF2_GIF_MASK;
-}
-
-void helper_skinit(CPUX86State *env)
-{
-    cpu_svm_check_intercept_param(env, SVM_EXIT_SKINIT, 0, GETPC());
-    /* XXX: not implemented */
-    raise_exception(env, EXCP06_ILLOP);
-}
-
-void helper_invlpga(CPUX86State *env, int aflag)
-{
-    X86CPU *cpu = env_archcpu(env);
-    target_ulong addr;
-
-    cpu_svm_check_intercept_param(env, SVM_EXIT_INVLPGA, 0, GETPC());
 
-    if (aflag == 2) {
-        addr = env->regs[R_EAX];
+    if (virtual_gif_enabled(env)) {
+        env->int_ctl &= ~V_GIF_MASK;
+        env->hflags2 &= ~HF2_VGIF_MASK;
     } else {
-        addr = (uint32_t)env->regs[R_EAX];
+        env->hflags2 &= ~HF2_GIF_MASK;
     }
-
-    /* XXX: could use the ASID to see if it is needed to do the
-       flush */
-    tlb_flush_page(CPU(cpu), addr);
 }
 
-void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type,
-                                   uint64_t param, uintptr_t retaddr)
+bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type)
 {
-    CPUState *cs = env_cpu(env);
-
-    if (likely(!(env->hflags & HF_GUEST_MASK))) {
-        return;
-    }
     switch (type) {
     case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8:
         if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) {
-            cpu_vmexit(env, type, param, retaddr);
+            return true;
         }
         break;
     case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8:
         if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) {
-            cpu_vmexit(env, type, param, retaddr);
+            return true;
         }
         break;
     case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7:
         if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) {
-            cpu_vmexit(env, type, param, retaddr);
+            return true;
         }
         break;
     case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7:
         if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) {
-            cpu_vmexit(env, type, param, retaddr);
+            return true;
         }
         break;
     case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31:
         if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) {
-            cpu_vmexit(env, type, param, retaddr);
-        }
-        break;
-    case SVM_EXIT_MSR:
-        if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) {
-            /* FIXME: this should be read in at vmrun (faster this way?) */
-            uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb +
-                                     offsetof(struct vmcb,
-                                              control.msrpm_base_pa));
-            uint32_t t0, t1;
-
-            switch ((uint32_t)env->regs[R_ECX]) {
-            case 0 ... 0x1fff:
-                t0 = (env->regs[R_ECX] * 2) % 8;
-                t1 = (env->regs[R_ECX] * 2) / 8;
-                break;
-            case 0xc0000000 ... 0xc0001fff:
-                t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2;
-                t1 = (t0 / 8);
-                t0 %= 8;
-                break;
-            case 0xc0010000 ... 0xc0011fff:
-                t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2;
-                t1 = (t0 / 8);
-                t0 %= 8;
-                break;
-            default:
-                cpu_vmexit(env, type, param, retaddr);
-                t0 = 0;
-                t1 = 0;
-                break;
-            }
-            if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) {
-                cpu_vmexit(env, type, param, retaddr);
-            }
+            return true;
         }
         break;
     default:
         if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) {
-            cpu_vmexit(env, type, param, retaddr);
+            return true;
         }
         break;
     }
+    return false;
 }
 
-void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type,
-                                      uint64_t param)
+void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type,
+                                   uint64_t param, uintptr_t retaddr)
 {
-    cpu_svm_check_intercept_param(env, type, param, GETPC());
+    CPUState *cs = env_cpu(env);
+
+    if (likely(!(env->hflags & HF_GUEST_MASK))) {
+        return;
+    }
+
+    if (!cpu_svm_has_intercept(env, type)) {
+        return;
+    }
+
+    if (type == SVM_EXIT_MSR) {
+        /* FIXME: this should be read in at vmrun (faster this way?) */
+        uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb +
+                                    offsetof(struct vmcb,
+                                            control.msrpm_base_pa));
+        uint32_t t0, t1;
+
+        switch ((uint32_t)env->regs[R_ECX]) {
+        case 0 ... 0x1fff:
+            t0 = (env->regs[R_ECX] * 2) % 8;
+            t1 = (env->regs[R_ECX] * 2) / 8;
+            break;
+        case 0xc0000000 ... 0xc0001fff:
+            t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2;
+            t1 = (t0 / 8);
+            t0 %= 8;
+            break;
+        case 0xc0010000 ... 0xc0011fff:
+            t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2;
+            t1 = (t0 / 8);
+            t0 %= 8;
+            break;
+        default:
+            cpu_vmexit(env, type, param, retaddr);
+            t0 = 0;
+            t1 = 0;
+            break;
+        }
+        if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) {
+            cpu_vmexit(env, type, param, retaddr);
+        }
+        return;
+    }
+
+    cpu_vmexit(env, type, param, retaddr);
+}
+
+void helper_svm_check_intercept(CPUX86State *env, uint32_t type)
+{
+    cpu_svm_check_intercept_param(env, type, 0, GETPC());
 }
 
 void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param,
@@ -636,7 +708,6 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
 void do_vmexit(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
-    uint32_t int_ctl;
 
     if (env->hflags & HF_INHIBIT_IRQ_MASK) {
         x86_stl_phys(cs,
@@ -679,16 +750,8 @@ void do_vmexit(CPUX86State *env)
              env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]);
     x86_stq_phys(cs,
              env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]);
-
-    int_ctl = x86_ldl_phys(cs,
-                       env->vm_vmcb + offsetof(struct vmcb, control.int_ctl));
-    int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK);
-    int_ctl |= env->v_tpr & V_TPR_MASK;
-    if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
-        int_ctl |= V_IRQ_MASK;
-    }
     x86_stl_phys(cs,
-             env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl);
+             env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), env->int_ctl);
 
     x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rflags),
              cpu_compute_eflags(env));
@@ -711,6 +774,7 @@ void do_vmexit(CPUX86State *env)
     env->intercept = 0;
     env->intercept_exceptions = 0;
     cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+    env->int_ctl = 0;
     env->tsc_offset = 0;
 
     env->gdt.base  = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb,
@@ -778,6 +842,7 @@ void do_vmexit(CPUX86State *env)
              env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0);
 
     env->hflags2 &= ~HF2_GIF_MASK;
+    env->hflags2 &= ~HF2_VGIF_MASK;
     /* FIXME: Resets the current ASID register to zero (host ASID). */
 
     /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
@@ -796,5 +861,3 @@ void do_vmexit(CPUX86State *env)
        host's code segment or non-canonical (in the case of long mode), a
        #GP fault is delivered inside the host. */
 }
-
-#endif
diff --git a/target/i386/tcg/sysemu/tcg-cpu.c b/target/i386/tcg/sysemu/tcg-cpu.c
new file mode 100644
index 00000000000..c223c0fe9bc
--- /dev/null
+++ b/target/i386/tcg/sysemu/tcg-cpu.c
@@ -0,0 +1,83 @@
+/*
+ * i386 TCG cpu class initialization functions specific to sysemu
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/helper-tcg.h"
+
+#include "sysemu/sysemu.h"
+#include "qemu/units.h"
+#include "exec/address-spaces.h"
+
+#include "tcg/tcg-cpu.h"
+
+static void tcg_cpu_machine_done(Notifier *n, void *unused)
+{
+    X86CPU *cpu = container_of(n, X86CPU, machine_done);
+    MemoryRegion *smram =
+        (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
+
+    if (smram) {
+        cpu->smram = g_new(MemoryRegion, 1);
+        memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram",
+                                 smram, 0, 4 * GiB);
+        memory_region_set_enabled(cpu->smram, true);
+        memory_region_add_subregion_overlap(cpu->cpu_as_root, 0,
+                                            cpu->smram, 1);
+    }
+}
+
+bool tcg_cpu_realizefn(CPUState *cs, Error **errp)
+{
+    X86CPU *cpu = X86_CPU(cs);
+
+    /*
+     * The realize order is important, since x86_cpu_realize() checks if
+     * nothing else has been set by the user (or by accelerators) in
+     * cpu->ucode_rev and cpu->phys_bits, and the memory regions
+     * initialized here are needed for the vcpu initialization.
+     *
+     * realize order:
+     * tcg_cpu -> host_cpu -> x86_cpu
+     */
+    cpu->cpu_as_mem = g_new(MemoryRegion, 1);
+    cpu->cpu_as_root = g_new(MemoryRegion, 1);
+
+    /* Outer container... */
+    memory_region_init(cpu->cpu_as_root, OBJECT(cpu), "memory", ~0ull);
+    memory_region_set_enabled(cpu->cpu_as_root, true);
+
+    /*
+     * ... with two regions inside: normal system memory with low
+     * priority, and...
+     */
+    memory_region_init_alias(cpu->cpu_as_mem, OBJECT(cpu), "memory",
+                             get_system_memory(), 0, ~0ull);
+    memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0);
+    memory_region_set_enabled(cpu->cpu_as_mem, true);
+
+    cs->num_ases = 2;
+    cpu_address_space_init(cs, 0, "cpu-memory", cs->memory);
+    cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root);
+
+    /* ... SMRAM with higher priority, linked from /machine/smram.  */
+    cpu->machine_done.notify = tcg_cpu_machine_done;
+    qemu_add_machine_init_done_notifier(&cpu->machine_done);
+    return true;
+}
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 1e125d2175a..6fdfdf95989 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -19,14 +19,11 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "tcg-cpu.h"
-#include "exec/exec-all.h"
-#include "sysemu/runstate.h"
 #include "helper-tcg.h"
+#include "qemu/accel.h"
+#include "hw/core/accel-cpu.h"
 
-#if !defined(CONFIG_USER_ONLY)
-#include "hw/i386/apic.h"
-#endif
+#include "tcg-cpu.h"
 
 /* Frob eflags into and out of the CPU temporary format.  */
 
@@ -57,22 +54,107 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
     cpu->env.eip = tb->pc - tb->cs_base;
 }
 
+#ifndef CONFIG_USER_ONLY
+static bool x86_debug_check_breakpoint(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    /* RF disables all architectural breakpoints. */
+    return !(env->eflags & RF_MASK);
+}
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps x86_tcg_ops = {
+static const struct TCGCPUOps x86_tcg_ops = {
     .initialize = tcg_x86_init,
     .synchronize_from_tb = x86_cpu_synchronize_from_tb,
     .cpu_exec_enter = x86_cpu_exec_enter,
     .cpu_exec_exit = x86_cpu_exec_exit,
-    .cpu_exec_interrupt = x86_cpu_exec_interrupt,
-    .do_interrupt = x86_cpu_do_interrupt,
+#ifdef CONFIG_USER_ONLY
+    .fake_user_interrupt = x86_cpu_do_interrupt,
+    .record_sigsegv = x86_cpu_record_sigsegv,
+#else
     .tlb_fill = x86_cpu_tlb_fill,
-#ifndef CONFIG_USER_ONLY
+    .do_interrupt = x86_cpu_do_interrupt,
+    .cpu_exec_interrupt = x86_cpu_exec_interrupt,
     .debug_excp_handler = breakpoint_handler,
+    .debug_check_breakpoint = x86_debug_check_breakpoint,
 #endif /* !CONFIG_USER_ONLY */
 };
 
-void tcg_cpu_common_class_init(CPUClass *cc)
+static void tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc)
 {
+    /* for x86, all cpus use the same set of operations */
     cc->tcg_ops = &x86_tcg_ops;
 }
+
+static void tcg_cpu_class_init(CPUClass *cc)
+{
+    cc->init_accel_cpu = tcg_cpu_init_ops;
+}
+
+static void tcg_cpu_xsave_init(void)
+{
+#define XO(bit, field) \
+    x86_ext_save_areas[bit].offset = offsetof(X86XSaveArea, field);
+
+    XO(XSTATE_FP_BIT, legacy);
+    XO(XSTATE_SSE_BIT, legacy);
+    XO(XSTATE_YMM_BIT, avx_state);
+    XO(XSTATE_BNDREGS_BIT, bndreg_state);
+    XO(XSTATE_BNDCSR_BIT, bndcsr_state);
+    XO(XSTATE_OPMASK_BIT, opmask_state);
+    XO(XSTATE_ZMM_Hi256_BIT, zmm_hi256_state);
+    XO(XSTATE_Hi16_ZMM_BIT, hi16_zmm_state);
+    XO(XSTATE_PKRU_BIT, pkru_state);
+
+#undef XO
+}
+
+/*
+ * TCG-specific defaults that override cpudef models when using TCG.
+ * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
+ */
+static PropValue tcg_default_props[] = {
+    { "vme", "off" },
+    { NULL, NULL },
+};
+
+static void tcg_cpu_instance_init(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
+
+    if (xcc->model) {
+        /* Special cases not set in the X86CPUDefinition structs: */
+        x86_cpu_apply_props(cpu, tcg_default_props);
+    }
+
+    tcg_cpu_xsave_init();
+}
+
+static void tcg_cpu_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
+
+#ifndef CONFIG_USER_ONLY
+    acc->cpu_realizefn = tcg_cpu_realizefn;
+#endif /* CONFIG_USER_ONLY */
+
+    acc->cpu_class_init = tcg_cpu_class_init;
+    acc->cpu_instance_init = tcg_cpu_instance_init;
+}
+static const TypeInfo tcg_cpu_accel_type_info = {
+    .name = ACCEL_CPU_NAME("tcg"),
+
+    .parent = TYPE_ACCEL_CPU,
+    .class_init = tcg_cpu_accel_class_init,
+    .abstract = true,
+};
+static void tcg_cpu_accel_register_types(void)
+{
+    type_register_static(&tcg_cpu_accel_type_info);
+}
+type_init(tcg_cpu_accel_register_types);
diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h
index 81f02e562e7..53a84944551 100644
--- a/target/i386/tcg/tcg-cpu.h
+++ b/target/i386/tcg/tcg-cpu.h
@@ -1,15 +1,81 @@
 /*
- * i386 TCG CPU class initialization
+ * i386 TCG cpu class initialization functions
  *
- * Copyright 2020 SUSE LLC
+ *  Copyright (c) 2003 Fabrice Bellard
  *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
-
 #ifndef TCG_CPU_H
 #define TCG_CPU_H
 
-void tcg_cpu_common_class_init(CPUClass *cc);
+#define XSAVE_FCW_FSW_OFFSET    0x000
+#define XSAVE_FTW_FOP_OFFSET    0x004
+#define XSAVE_CWD_RIP_OFFSET    0x008
+#define XSAVE_CWD_RDP_OFFSET    0x010
+#define XSAVE_MXCSR_OFFSET      0x018
+#define XSAVE_ST_SPACE_OFFSET   0x020
+#define XSAVE_XMM_SPACE_OFFSET  0x0a0
+#define XSAVE_XSTATE_BV_OFFSET  0x200
+#define XSAVE_AVX_OFFSET        0x240
+#define XSAVE_BNDREG_OFFSET     0x3c0
+#define XSAVE_BNDCSR_OFFSET     0x400
+#define XSAVE_OPMASK_OFFSET     0x440
+#define XSAVE_ZMM_HI256_OFFSET  0x480
+#define XSAVE_HI16_ZMM_OFFSET   0x680
+#define XSAVE_PKRU_OFFSET       0xa80
+
+typedef struct X86XSaveArea {
+    X86LegacyXSaveArea legacy;
+    X86XSaveHeader header;
+
+    /* Extended save areas: */
+
+    /* AVX State: */
+    XSaveAVX avx_state;
+
+    /* Ensure that XSaveBNDREG is properly aligned. */
+    uint8_t padding[XSAVE_BNDREG_OFFSET
+                    - sizeof(X86LegacyXSaveArea)
+                    - sizeof(X86XSaveHeader)
+                    - sizeof(XSaveAVX)];
+
+    /* MPX State: */
+    XSaveBNDREG bndreg_state;
+    XSaveBNDCSR bndcsr_state;
+    /* AVX-512 State: */
+    XSaveOpmask opmask_state;
+    XSaveZMM_Hi256 zmm_hi256_state;
+    XSaveHi16_ZMM hi16_zmm_state;
+    /* PKRU State: */
+    XSavePKRU pkru_state;
+} X86XSaveArea;
+
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fcw) != XSAVE_FCW_FSW_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.ftw) != XSAVE_FTW_FOP_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpip) != XSAVE_CWD_RIP_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpdp) != XSAVE_CWD_RDP_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.mxcsr) != XSAVE_MXCSR_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpregs) != XSAVE_ST_SPACE_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.xmm_regs) != XSAVE_XMM_SPACE_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != XSAVE_AVX_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != XSAVE_BNDREG_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != XSAVE_BNDCSR_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != XSAVE_OPMASK_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET);
+QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET);
+
+bool tcg_cpu_realizefn(CPUState *cs, Error **errp);
 
 #endif /* TCG_CPU_H */
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 880bc455612..e9e14515409 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -30,7 +30,6 @@
 #include "exec/helper-gen.h"
 #include "helper-tcg.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 #define PREFIX_REPZ   0x01
@@ -39,16 +38,7 @@
 #define PREFIX_DATA   0x08
 #define PREFIX_ADR    0x10
 #define PREFIX_VEX    0x20
-
-#ifdef TARGET_X86_64
-#define CODE64(s) ((s)->code64)
-#define REX_X(s) ((s)->rex_x)
-#define REX_B(s) ((s)->rex_b)
-#else
-#define CODE64(s) 0
-#define REX_X(s) 0
-#define REX_B(s) 0
-#endif
+#define PREFIX_REX    0x40
 
 #ifdef TARGET_X86_64
 # define ctztl  ctz64
@@ -85,42 +75,38 @@ static TCGv_i64 cpu_bndu[4];
 typedef struct DisasContext {
     DisasContextBase base;
 
-    /* current insn context */
-    int override; /* -1 if no override */
-    int prefix;
+    target_ulong pc;       /* pc = eip + cs_base */
+    target_ulong pc_start; /* pc at TB entry */
+    target_ulong cs_base;  /* base of CS segment */
+
     MemOp aflag;
     MemOp dflag;
-    target_ulong pc_start;
-    target_ulong pc; /* pc = eip + cs_base */
-    /* current block context */
-    target_ulong cs_base; /* base of CS segment */
-    int pe;     /* protected mode */
-    int code32; /* 32 bit code segment */
-#ifdef TARGET_X86_64
-    int lma;    /* long mode active */
-    int code64; /* 64 bit code segment */
-    int rex_x, rex_b;
+
+    int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
+    uint8_t prefix;
+
+#ifndef CONFIG_USER_ONLY
+    uint8_t cpl;   /* code priv level */
+    uint8_t iopl;  /* i/o priv level */
 #endif
-    int vex_l;  /* vex vector length */
-    int vex_v;  /* vex vvvv register, without 1's complement.  */
-    int ss32;   /* 32 bit stack segment */
-    CCOp cc_op;  /* current CC operation */
-    bool cc_op_dirty;
+    uint8_t vex_l;  /* vex vector length */
+    uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
+    uint8_t popl_esp_hack; /* for correct popl with esp base handling */
+    uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
+
 #ifdef TARGET_X86_64
-    bool x86_64_hregs;
+    uint8_t rex_r;
+    uint8_t rex_x;
+    uint8_t rex_b;
+    bool rex_w;
 #endif
-    int addseg; /* non zero if either DS/ES/SS have a non zero base */
-    int f_st;   /* currently unused */
-    int vm86;   /* vm86 mode */
-    int cpl;
-    int iopl;
-    int tf;     /* TF cpu flag */
-    int jmp_opt; /* use direct block chaining for direct jumps */
-    int repz_opt; /* optimize jumps within repz instructions */
+    bool jmp_opt; /* use direct block chaining for direct jumps */
+    bool repz_opt; /* optimize jumps within repz instructions */
+    bool cc_op_dirty;
+
+    CCOp cc_op;  /* current CC operation */
     int mem_index; /* select memory access functions */
-    uint64_t flags; /* all execution flags */
-    int popl_esp_hack; /* for correct popl with esp base handling */
-    int rip_offset; /* only used in x86_64, but left for simplicity */
+    uint32_t flags; /* all execution flags */
     int cpuid_features;
     int cpuid_ext_features;
     int cpuid_ext2_features;
@@ -146,11 +132,97 @@ typedef struct DisasContext {
     sigjmp_buf jmpbuf;
 } DisasContext;
 
+/* The environment in which user-only runs is constrained. */
+#ifdef CONFIG_USER_ONLY
+#define PE(S)     true
+#define CPL(S)    3
+#define IOPL(S)   0
+#define SVME(S)   false
+#define GUEST(S)  false
+#else
+#define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
+#define CPL(S)    ((S)->cpl)
+#define IOPL(S)   ((S)->iopl)
+#define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
+#define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
+#endif
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
+#define VM86(S)   false
+#define CODE32(S) true
+#define SS32(S)   true
+#define ADDSEG(S) false
+#else
+#define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
+#define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
+#define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
+#define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
+#endif
+#if !defined(TARGET_X86_64)
+#define CODE64(S) false
+#define LMA(S)    false
+#elif defined(CONFIG_USER_ONLY)
+#define CODE64(S) true
+#define LMA(S)    true
+#else
+#define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
+#define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
+#endif
+
+#ifdef TARGET_X86_64
+#define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
+#define REX_W(S)       ((S)->rex_w)
+#define REX_R(S)       ((S)->rex_r + 0)
+#define REX_X(S)       ((S)->rex_x + 0)
+#define REX_B(S)       ((S)->rex_b + 0)
+#else
+#define REX_PREFIX(S)  false
+#define REX_W(S)       false
+#define REX_R(S)       0
+#define REX_X(S)       0
+#define REX_B(S)       0
+#endif
+
+/*
+ * Many sysemu-only helpers are not reachable for user-only.
+ * Define stub generators here, so that we need not either sprinkle
+ * ifdefs through the translator, nor provide the helper function.
+ */
+#define STUB_HELPER(NAME, ...) \
+    static inline void gen_helper_##NAME(__VA_ARGS__) \
+    { qemu_build_not_reached(); }
+
+#ifdef CONFIG_USER_ONLY
+STUB_HELPER(clgi, TCGv_env env)
+STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
+STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
+STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
+STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
+STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
+STUB_HELPER(monitor, TCGv_env env, TCGv addr)
+STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
+STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
+STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
+STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
+STUB_HELPER(rdmsr, TCGv_env env)
+STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
+STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
+STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
+STUB_HELPER(stgi, TCGv_env env)
+STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
+STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
+STUB_HELPER(vmmcall, TCGv_env env)
+STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
+STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
+STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
+STUB_HELPER(wrmsr, TCGv_env env)
+#endif
+
 static void gen_eob(DisasContext *s);
 static void gen_jr(DisasContext *s, TCGv dest);
 static void gen_jmp(DisasContext *s, target_ulong eip);
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
+static void gen_exception_gpf(DisasContext *s);
 
 /* i386 arith/logic operations */
 enum {
@@ -309,14 +381,10 @@ static void gen_update_cc_op(DisasContext *s)
  */
 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
 {
-    if (reg < 4) {
-        return false;
-    }
-#ifdef TARGET_X86_64
-    if (reg >= 8 || s->x86_64_hregs) {
+    /* Any time the REX prefix is present, byte registers are uniform */
+    if (reg < 4 || REX_PREFIX(s)) {
         return false;
     }
-#endif
     return true;
 }
 
@@ -333,7 +401,7 @@ static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
 /* Select the size of the stack pointer.  */
 static inline MemOp mo_stacksize(DisasContext *s)
 {
-    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
+    return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 }
 
 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
@@ -466,7 +534,7 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
 #endif
     case MO_32:
         /* 32 bit address */
-        if (ovr_seg < 0 && s->addseg) {
+        if (ovr_seg < 0 && ADDSEG(s)) {
             ovr_seg = def_seg;
         }
         if (ovr_seg < 0) {
@@ -479,7 +547,7 @@ static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
         tcg_gen_ext16u_tl(s->A0, a0);
         a0 = s->A0;
         if (ovr_seg < 0) {
-            if (s->addseg) {
+            if (ADDSEG(s)) {
                 ovr_seg = def_seg;
             } else {
                 return;
@@ -612,37 +680,40 @@ static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
     }
 }
 
-static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
+/*
+ * Validate that access to [port, port + 1<<ot) is allowed.
+ * Raise #GP, or VMM exit if not.
+ */
+static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
                          uint32_t svm_flags)
 {
-    target_ulong next_eip;
-
-    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        switch (ot) {
-        case MO_8:
-            gen_helper_check_iob(cpu_env, s->tmp2_i32);
-            break;
-        case MO_16:
-            gen_helper_check_iow(cpu_env, s->tmp2_i32);
-            break;
-        case MO_32:
-            gen_helper_check_iol(cpu_env, s->tmp2_i32);
-            break;
-        default:
-            tcg_abort();
-        }
+#ifdef CONFIG_USER_ONLY
+    /*
+     * We do not implement the ioperm(2) syscall, so the TSS check
+     * will always fail.
+     */
+    gen_exception_gpf(s);
+    return false;
+#else
+    if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
+        gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
     }
-    if(s->flags & HF_GUEST_MASK) {
+    if (GUEST(s)) {
+        target_ulong cur_eip = s->base.pc_next - s->cs_base;
+        target_ulong next_eip = s->pc - s->cs_base;
+
         gen_update_cc_op(s);
         gen_jmp_im(s, cur_eip);
-        svm_flags |= (1 << (4 + ot));
-        next_eip = s->pc - s->cs_base;
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
-        gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
-                                tcg_const_i32(svm_flags),
-                                tcg_const_i32(next_eip - cur_eip));
+        if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
+            svm_flags |= SVM_IOIO_REP_MASK;
+        }
+        svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
+        gen_helper_svm_check_io(cpu_env, port,
+                                tcg_constant_i32(svm_flags),
+                                tcg_constant_i32(next_eip - cur_eip));
     }
+    return true;
+#endif
 }
 
 static inline void gen_movs(DisasContext *s, MemOp ot)
@@ -1117,16 +1188,20 @@ static inline void gen_cmps(DisasContext *s, MemOp ot)
 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
 {
     if (s->flags & HF_IOBPT_MASK) {
+#ifdef CONFIG_USER_ONLY
+        /* user-mode cpu should not be in IOBPT mode */
+        g_assert_not_reached();
+#else
         TCGv_i32 t_size = tcg_const_i32(1 << ot);
         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
 
         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
         tcg_temp_free_i32(t_size);
         tcg_temp_free(t_next);
+#endif /* CONFIG_USER_ONLY */
     }
 }
 
-
 static inline void gen_ins(DisasContext *s, MemOp ot)
 {
     gen_string_movl_A0_EDI(s);
@@ -1272,6 +1347,42 @@ static void gen_illegal_opcode(DisasContext *s)
     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
 }
 
+/* Generate #GP for the current instruction. */
+static void gen_exception_gpf(DisasContext *s)
+{
+    gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
+}
+
+/* Check for cpl == 0; if not, raise #GP and return false. */
+static bool check_cpl0(DisasContext *s)
+{
+    if (CPL(s) == 0) {
+        return true;
+    }
+    gen_exception_gpf(s);
+    return false;
+}
+
+/* If vm86, check for iopl == 3; if not, raise #GP and return false. */
+static bool check_vm86_iopl(DisasContext *s)
+{
+    if (!VM86(s) || IOPL(s) == 3) {
+        return true;
+    }
+    gen_exception_gpf(s);
+    return false;
+}
+
+/* Check for iopl allowing access; if not, raise #GP and return false. */
+static bool check_iopl(DisasContext *s)
+{
+    if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
+        return true;
+    }
+    gen_exception_gpf(s);
+    return false;
+}
+
 /* if d == OR_TMP0, it means memory operand (address in A0) */
 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
 {
@@ -1917,28 +2028,28 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
 
 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_ldub(env, advance_pc(env, s, 1));
+    return translator_ldub(env, &s->base, advance_pc(env, s, 1));
 }
 
 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_ldsw(env, advance_pc(env, s, 2));
+    return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
 }
 
 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_lduw(env, advance_pc(env, s, 2));
+    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
 }
 
 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_ldl(env, advance_pc(env, s, 4));
+    return translator_ldl(env, &s->base, advance_pc(env, s, 4));
 }
 
 #ifdef TARGET_X86_64
 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_ldq(env, advance_pc(env, s, 8));
+    return translator_ldq(env, &s->base, advance_pc(env, s, 8));
 }
 #endif
 
@@ -2203,21 +2314,11 @@ static inline int insn_const_size(MemOp ot)
     }
 }
 
-static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
-{
-#ifndef CONFIG_USER_ONLY
-    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
-           (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
 {
     target_ulong pc = s->cs_base + eip;
 
-    if (use_goto_tb(s, pc))  {
+    if (translator_use_goto_tb(&s->base, pc))  {
         /* jump to same page: we can use a direct jump */
         tcg_gen_goto_tb(tb_num);
         gen_jmp_im(s, eip);
@@ -2305,14 +2406,14 @@ static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
    call this function with seg_reg == R_CS */
 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
 {
-    if (s->pe && !s->vm86) {
+    if (PE(s) && !VM86(s)) {
         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
            interrupts for the next instruction */
-        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
+        if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
             s->base.is_jmp = DISAS_TOO_MANY;
         }
     } else {
@@ -2323,28 +2424,13 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
     }
 }
 
-static inline int svm_is_rep(int prefixes)
-{
-    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
-}
-
-static inline void
-gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
-                              uint32_t type, uint64_t param)
+static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
 {
     /* no SVM activated; fast case */
-    if (likely(!(s->flags & HF_GUEST_MASK)))
+    if (likely(!GUEST(s))) {
         return;
-    gen_update_cc_op(s);
-    gen_jmp_im(s, pc_start - s->cs_base);
-    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
-                                         tcg_const_i64(param));
-}
-
-static inline void
-gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
-{
-    gen_svm_check_intercept_param(s, pc_start, type, 0);
+    }
+    gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
 }
 
 static inline void gen_stack_update(DisasContext *s, int addend)
@@ -2363,7 +2449,7 @@ static void gen_push_v(DisasContext *s, TCGv val)
     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
 
     if (!CODE64(s)) {
-        if (s->addseg) {
+        if (ADDSEG(s)) {
             new_esp = s->tmp4;
             tcg_gen_mov_tl(new_esp, s->A0);
         }
@@ -2392,12 +2478,12 @@ static inline void gen_pop_update(DisasContext *s, MemOp ot)
 
 static inline void gen_stack_A0(DisasContext *s)
 {
-    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
+    gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
 }
 
 static void gen_pusha(DisasContext *s)
 {
-    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
+    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
     MemOp d_ot = s->dflag;
     int size = 1 << d_ot;
     int i;
@@ -2413,7 +2499,7 @@ static void gen_pusha(DisasContext *s)
 
 static void gen_popa(DisasContext *s)
 {
-    MemOp s_ot = s->ss32 ? MO_32 : MO_16;
+    MemOp s_ot = SS32(s) ? MO_32 : MO_16;
     MemOp d_ot = s->dflag;
     int size = 1 << d_ot;
     int i;
@@ -2435,7 +2521,7 @@ static void gen_popa(DisasContext *s)
 static void gen_enter(DisasContext *s, int esp_addend, int level)
 {
     MemOp d_ot = mo_pushpop(s, s->dflag);
-    MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
+    MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
     int size = 1 << d_ot;
 
     /* Push BP; compute FrameTemp into T1.  */
@@ -2518,14 +2604,6 @@ static void gen_interrupt(DisasContext *s, int intno,
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_debug(DisasContext *s, target_ulong cur_eip)
-{
-    gen_update_cc_op(s);
-    gen_jmp_im(s, cur_eip);
-    gen_helper_debug(cpu_env);
-    s->base.is_jmp = DISAS_NORETURN;
-}
-
 static void gen_set_hflag(DisasContext *s, uint32_t mask)
 {
     if ((s->flags & mask) == 0) {
@@ -2582,12 +2660,10 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
     if (s->base.tb->flags & HF_RF_MASK) {
         gen_helper_reset_rf(cpu_env);
     }
-    if (s->base.singlestep_enabled) {
-        gen_helper_debug(cpu_env);
-    } else if (recheck_tf) {
+    if (recheck_tf) {
         gen_helper_rechecking_single_step(cpu_env);
         tcg_gen_exit_tb(NULL, 0);
-    } else if (s->tf) {
+    } else if (s->flags & HF_TF_MASK) {
         gen_helper_single_step(cpu_env);
     } else if (jr) {
         tcg_gen_lookup_and_goto_ptr();
@@ -3030,7 +3106,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-                    target_ulong pc_start, int rex_r)
+                    target_ulong pc_start)
 {
     int b1, op1_offset, op2_offset, is_xmm, val;
     int modrm, mod, rm, reg;
@@ -3100,8 +3176,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 
     modrm = x86_ldub_code(env, s);
     reg = ((modrm >> 3) & 7);
-    if (is_xmm)
-        reg |= rex_r;
+    if (is_xmm) {
+        reg |= REX_R(s);
+    }
     mod = (modrm >> 6) & 3;
     if (sse_fn_epp == SSE_SPECIAL) {
         b |= (b1 << 8);
@@ -3635,7 +3712,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 tcg_gen_ld16u_tl(s->T0, cpu_env,
                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
             }
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             gen_op_mov_reg_v(s, ot, reg, s->T0);
             break;
         case 0x1d6: /* movq ea, xmm */
@@ -3679,7 +3756,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                                  offsetof(CPUX86State, fpregs[rm].mmx));
                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
             }
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
             break;
 
@@ -3691,7 +3768,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             }
             modrm = x86_ldub_code(env, s);
             rm = modrm & 7;
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
                 goto unknown_op;
@@ -3767,7 +3844,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* Various integer extensions at 0f 38 f[0-f].  */
             b = modrm | (b1 << 8);
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
 
             switch (b) {
             case 0x3f0: /* crc32 Gd,Eb */
@@ -4121,7 +4198,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             b = modrm;
             modrm = x86_ldub_code(env, s);
             rm = modrm & 7;
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             mod = (modrm >> 6) & 3;
             if (b1 >= 2) {
                 goto unknown_op;
@@ -4141,7 +4218,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
                     gen_lea_modrm(env, s, modrm);
-                reg = ((modrm >> 3) & 7) | rex_r;
+                reg = ((modrm >> 3) & 7) | REX_R(s);
                 val = x86_ldub_code(env, s);
                 switch (b) {
                 case 0x14: /* pextrb */
@@ -4310,7 +4387,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
             /* Various integer extensions at 0f 3a f[0-f].  */
             b = modrm | (b1 << 8);
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
 
             switch (b) {
             case 0x3f0: /* rorx Gy,Ey, Ib */
@@ -4484,27 +4561,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     MemOp ot, aflag, dflag;
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
-    int rex_w, rex_r;
     target_ulong pc_start = s->base.pc_next;
 
     s->pc_start = s->pc = pc_start;
     s->override = -1;
 #ifdef TARGET_X86_64
+    s->rex_w = false;
+    s->rex_r = 0;
     s->rex_x = 0;
     s->rex_b = 0;
-    s->x86_64_hregs = false;
 #endif
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
     s->vex_v = 0;
     if (sigsetjmp(s->jmpbuf, 0) != 0) {
-        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        gen_exception_gpf(s);
         return s->pc;
     }
 
     prefixes = 0;
-    rex_w = -1;
-    rex_r = 0;
 
  next_byte:
     b = x86_ldub_code(env, s);
@@ -4547,12 +4622,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x40 ... 0x4f:
         if (CODE64(s)) {
             /* REX prefix */
-            rex_w = (b >> 3) & 1;
-            rex_r = (b & 0x4) << 1;
+            prefixes |= PREFIX_REX;
+            s->rex_w = (b >> 3) & 1;
+            s->rex_r = (b & 0x4) << 1;
             s->rex_x = (b & 0x2) << 2;
-            REX_B(s) = (b & 0x1) << 3;
-            /* select uniform byte register addressing */
-            s->x86_64_hregs = true;
+            s->rex_b = (b & 0x1) << 3;
             goto next_byte;
         }
         break;
@@ -4561,7 +4635,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xc4: /* 3-byte VEX */
         /* VEX prefixes cannot be used except in 32-bit mode.
            Otherwise the instruction is LES or LDS.  */
-        if (s->code32 && !s->vm86) {
+        if (CODE32(s) && !VM86(s)) {
             static const int pp_prefix[4] = {
                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
             };
@@ -4576,27 +4650,24 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
-                            | PREFIX_LOCK | PREFIX_DATA)) {
+                            | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
                 goto illegal_op;
             }
 #ifdef TARGET_X86_64
-            if (s->x86_64_hregs) {
-                goto illegal_op;
-            }
+            s->rex_r = (~vex2 >> 4) & 8;
 #endif
-            rex_r = (~vex2 >> 4) & 8;
             if (b == 0xc5) {
                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
                 vex3 = vex2;
                 b = x86_ldub_code(env, s) | 0x100;
             } else {
                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
+                vex3 = x86_ldub_code(env, s);
 #ifdef TARGET_X86_64
                 s->rex_x = (~vex2 >> 3) & 8;
                 s->rex_b = (~vex2 >> 2) & 8;
+                s->rex_w = (vex3 >> 7) & 1;
 #endif
-                vex3 = x86_ldub_code(env, s);
-                rex_w = (vex3 >> 7) & 1;
                 switch (vex2 & 0x1f) {
                 case 0x01: /* Implied 0f leading opcode bytes.  */
                     b = x86_ldub_code(env, s) | 0x100;
@@ -4623,18 +4694,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
            over 0x66 if both are present.  */
-        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
+        dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
     } else {
         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
-        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
+        if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
             dflag = MO_32;
         } else {
             dflag = MO_16;
         }
         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
-        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
+        if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
             aflag = MO_32;
         }  else {
             aflag = MO_16;
@@ -4674,7 +4745,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             switch(f) {
             case 0: /* OP Ev, Gv */
                 modrm = x86_ldub_code(env, s);
-                reg = ((modrm >> 3) & 7) | rex_r;
+                reg = ((modrm >> 3) & 7) | REX_R(s);
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
@@ -4696,7 +4767,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             case 1: /* OP Gv, Ev */
                 modrm = x86_ldub_code(env, s);
                 mod = (modrm >> 6) & 3;
-                reg = ((modrm >> 3) & 7) | rex_r;
+                reg = ((modrm >> 3) & 7) | REX_R(s);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
                     gen_lea_modrm(env, s, modrm);
@@ -5019,7 +5090,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 /* operand size for jumps is 64 bit */
                 ot = MO_64;
             } else if (op == 3 || op == 5) {
-                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
+                ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
             } else if (op == 6) {
                 /* default push size is 64 bit */
                 ot = mo_pushpop(s, dflag);
@@ -5068,7 +5139,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_lcall:
-            if (s->pe && !s->vm86) {
+            if (PE(s) && !VM86(s)) {
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
                                            tcg_const_i32(dflag - 1),
@@ -5098,7 +5169,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             gen_add_A0_im(s, 1 << ot);
             gen_op_ld_v(s, MO_16, s->T0, s->A0);
         do_ljmp:
-            if (s->pe && !s->vm86) {
+            if (PE(s) && !VM86(s)) {
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
                                           tcg_const_tl(s->pc - s->cs_base));
@@ -5122,7 +5193,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         ot = mo_b_d(b, dflag);
 
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_v_reg(s, ot, s->T1, reg);
@@ -5194,7 +5265,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6b:
         ot = dflag;
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         if (b == 0x69)
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
@@ -5246,7 +5317,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1c1: /* xadd Ev, Gv */
         ot = mo_b_d(b, dflag);
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         mod = (modrm >> 6) & 3;
         gen_op_mov_v_reg(s, ot, s->T0, reg);
         if (mod == 3) {
@@ -5278,7 +5349,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             ot = mo_b_d(b, dflag);
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             mod = (modrm >> 6) & 3;
             oldv = tcg_temp_new();
             newv = tcg_temp_new();
@@ -5476,7 +5547,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->base.is_jmp) {
             gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
-                s->tf = 0;
+                s->flags &= ~HF_TF_MASK;
                 gen_eob_inhibit_irq(s, true);
             } else {
                 gen_eob(s);
@@ -5500,7 +5571,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x89: /* mov Gv, Ev */
         ot = mo_b_d(b, dflag);
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
 
         /* generate a generic store */
         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
@@ -5526,7 +5597,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x8b: /* mov Ev, Gv */
         ot = mo_b_d(b, dflag);
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
 
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_reg_v(s, ot, reg, s->T0);
@@ -5542,7 +5613,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         if (s->base.is_jmp) {
             gen_jmp_im(s, s->pc - s->cs_base);
             if (reg == R_SS) {
-                s->tf = 0;
+                s->flags &= ~HF_TF_MASK;
                 gen_eob_inhibit_irq(s, true);
             } else {
                 gen_eob(s);
@@ -5576,7 +5647,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             s_ot = b & 8 ? MO_SIGN | ot : ot;
 
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
 
@@ -5615,7 +5686,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             TCGv ea = gen_lea_modrm_1(s, a);
@@ -5697,7 +5768,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x87: /* xchg Ev, Gv */
         ot = mo_b_d(b, dflag);
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
@@ -5734,7 +5805,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     do_lxx:
         ot = dflag != MO_16 ? MO_32 : MO_16;
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -5817,7 +5888,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         if (mod != 3) {
             gen_lea_modrm(env, s, modrm);
             opreg = OR_TMP0;
@@ -5838,503 +5909,555 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /************************/
         /* floats */
     case 0xd8 ... 0xdf:
-        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
-            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
-            /* XXX: what to do if illegal op ? */
-            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
-            break;
-        }
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        rm = modrm & 7;
-        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
-        if (mod != 3) {
-            /* memory op */
-            gen_lea_modrm(env, s, modrm);
-            switch(op) {
-            case 0x00 ... 0x07: /* fxxxs */
-            case 0x10 ... 0x17: /* fixxxl */
-            case 0x20 ... 0x27: /* fxxxl */
-            case 0x30 ... 0x37: /* fixxx */
-                {
-                    int op1;
-                    op1 = op & 7;
+        {
+            bool update_fip = true;
+
+            if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
+                /* if CR0.EM or CR0.TS are set, generate an FPU exception */
+                /* XXX: what to do if illegal op ? */
+                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
+                break;
+            }
+            modrm = x86_ldub_code(env, s);
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+            op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+            if (mod != 3) {
+                /* memory op */
+                AddressParts a = gen_lea_modrm_0(env, s, modrm);
+                TCGv ea = gen_lea_modrm_1(s, a);
+                TCGv last_addr = tcg_temp_new();
+                bool update_fdp = true;
+
+                tcg_gen_mov_tl(last_addr, ea);
+                gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
+
+                switch (op) {
+                case 0x00 ... 0x07: /* fxxxs */
+                case 0x10 ... 0x17: /* fixxxl */
+                case 0x20 ... 0x27: /* fxxxl */
+                case 0x30 ... 0x37: /* fixxx */
+                    {
+                        int op1;
+                        op1 = op & 7;
+
+                        switch (op >> 4) {
+                        case 0:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
+                            break;
+                        case 1:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
+                            break;
+                        case 2:
+                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                                s->mem_index, MO_LEQ);
+                            gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
+                            break;
+                        case 3:
+                        default:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LESW);
+                            gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
+                            break;
+                        }
 
-                    switch(op >> 4) {
+                        gen_helper_fp_arith_ST0_FT0(op1);
+                        if (op1 == 3) {
+                            /* fcomp needs pop */
+                            gen_helper_fpop(cpu_env);
+                        }
+                    }
+                    break;
+                case 0x08: /* flds */
+                case 0x0a: /* fsts */
+                case 0x0b: /* fstps */
+                case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+                case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+                case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
+                    switch (op & 7) {
                     case 0:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
-                        gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
+                        switch (op >> 4) {
+                        case 0:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
+                            break;
+                        case 1:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
+                            break;
+                        case 2:
+                            tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                                s->mem_index, MO_LEQ);
+                            gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
+                            break;
+                        case 3:
+                        default:
+                            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LESW);
+                            gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
+                            break;
+                        }
                         break;
                     case 1:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
-                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
-                        break;
-                    case 2:
-                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
-                                            s->mem_index, MO_LEQ);
-                        gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
+                        /* XXX: the corresponding CPUID bit must be tested ! */
+                        switch (op >> 4) {
+                        case 1:
+                            gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            break;
+                        case 2:
+                            gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
+                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                                s->mem_index, MO_LEQ);
+                            break;
+                        case 3:
+                        default:
+                            gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUW);
+                            break;
+                        }
+                        gen_helper_fpop(cpu_env);
                         break;
-                    case 3:
                     default:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LESW);
-                        gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
+                        switch (op >> 4) {
+                        case 0:
+                            gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            break;
+                        case 1:
+                            gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUL);
+                            break;
+                        case 2:
+                            gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
+                            tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                                s->mem_index, MO_LEQ);
+                            break;
+                        case 3:
+                        default:
+                            gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
+                            tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                                s->mem_index, MO_LEUW);
+                            break;
+                        }
+                        if ((op & 7) == 3) {
+                            gen_helper_fpop(cpu_env);
+                        }
                         break;
                     }
+                    break;
+                case 0x0c: /* fldenv mem */
+                    gen_helper_fldenv(cpu_env, s->A0,
+                                      tcg_const_i32(dflag - 1));
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x0d: /* fldcw mem */
+                    tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUW);
+                    gen_helper_fldcw(cpu_env, s->tmp2_i32);
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x0e: /* fnstenv mem */
+                    gen_helper_fstenv(cpu_env, s->A0,
+                                      tcg_const_i32(dflag - 1));
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x0f: /* fnstcw mem */
+                    gen_helper_fnstcw(s->tmp2_i32, cpu_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUW);
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x1d: /* fldt mem */
+                    gen_helper_fldt_ST0(cpu_env, s->A0);
+                    break;
+                case 0x1f: /* fstpt mem */
+                    gen_helper_fstt_ST0(cpu_env, s->A0);
+                    gen_helper_fpop(cpu_env);
+                    break;
+                case 0x2c: /* frstor mem */
+                    gen_helper_frstor(cpu_env, s->A0,
+                                      tcg_const_i32(dflag - 1));
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x2e: /* fnsave mem */
+                    gen_helper_fsave(cpu_env, s->A0,
+                                     tcg_const_i32(dflag - 1));
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x2f: /* fnstsw mem */
+                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                    tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
+                                        s->mem_index, MO_LEUW);
+                    update_fip = update_fdp = false;
+                    break;
+                case 0x3c: /* fbld */
+                    gen_helper_fbld_ST0(cpu_env, s->A0);
+                    break;
+                case 0x3e: /* fbstp */
+                    gen_helper_fbst_ST0(cpu_env, s->A0);
+                    gen_helper_fpop(cpu_env);
+                    break;
+                case 0x3d: /* fildll */
+                    tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEQ);
+                    gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
+                    break;
+                case 0x3f: /* fistpll */
+                    gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
+                    tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
+                                        s->mem_index, MO_LEQ);
+                    gen_helper_fpop(cpu_env);
+                    break;
+                default:
+                    goto unknown_op;
+                }
 
-                    gen_helper_fp_arith_ST0_FT0(op1);
-                    if (op1 == 3) {
-                        /* fcomp needs pop */
-                        gen_helper_fpop(cpu_env);
-                    }
+                if (update_fdp) {
+                    int last_seg = s->override >= 0 ? s->override : a.def_seg;
+
+                    tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
+                                   offsetof(CPUX86State,
+                                            segs[last_seg].selector));
+                    tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
+                                     offsetof(CPUX86State, fpds));
+                    tcg_gen_st_tl(last_addr, cpu_env,
+                                  offsetof(CPUX86State, fpdp));
                 }
-                break;
-            case 0x08: /* flds */
-            case 0x0a: /* fsts */
-            case 0x0b: /* fstps */
-            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
-            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
-            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
-                switch(op & 7) {
-                case 0:
-                    switch(op >> 4) {
-                    case 0:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
-                        gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
+                tcg_temp_free(last_addr);
+            } else {
+                /* register float ops */
+                opreg = rm;
+
+                switch (op) {
+                case 0x08: /* fld sti */
+                    gen_helper_fpush(cpu_env);
+                    gen_helper_fmov_ST0_STN(cpu_env,
+                                            tcg_const_i32((opreg + 1) & 7));
+                    break;
+                case 0x09: /* fxchg sti */
+                case 0x29: /* fxchg4 sti, undocumented op */
+                case 0x39: /* fxchg7 sti, undocumented op */
+                    gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
+                    break;
+                case 0x0a: /* grp d9/2 */
+                    switch (rm) {
+                    case 0: /* fnop */
+                        /* check exceptions (FreeBSD FPU probe) */
+                        gen_helper_fwait(cpu_env);
+                        update_fip = false;
                         break;
-                    case 1:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
-                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
+                    default:
+                        goto unknown_op;
+                    }
+                    break;
+                case 0x0c: /* grp d9/4 */
+                    switch (rm) {
+                    case 0: /* fchs */
+                        gen_helper_fchs_ST0(cpu_env);
                         break;
-                    case 2:
-                        tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
-                                            s->mem_index, MO_LEQ);
-                        gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
+                    case 1: /* fabs */
+                        gen_helper_fabs_ST0(cpu_env);
                         break;
-                    case 3:
-                    default:
-                        tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LESW);
-                        gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
+                    case 4: /* ftst */
+                        gen_helper_fldz_FT0(cpu_env);
+                        gen_helper_fcom_ST0_FT0(cpu_env);
+                        break;
+                    case 5: /* fxam */
+                        gen_helper_fxam_ST0(cpu_env);
                         break;
+                    default:
+                        goto unknown_op;
                     }
                     break;
-                case 1:
-                    /* XXX: the corresponding CPUID bit must be tested ! */
-                    switch(op >> 4) {
-                    case 1:
-                        gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
+                case 0x0d: /* grp d9/5 */
+                    {
+                        switch (rm) {
+                        case 0:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fld1_ST0(cpu_env);
+                            break;
+                        case 1:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldl2t_ST0(cpu_env);
+                            break;
+                        case 2:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldl2e_ST0(cpu_env);
+                            break;
+                        case 3:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldpi_ST0(cpu_env);
+                            break;
+                        case 4:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldlg2_ST0(cpu_env);
+                            break;
+                        case 5:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldln2_ST0(cpu_env);
+                            break;
+                        case 6:
+                            gen_helper_fpush(cpu_env);
+                            gen_helper_fldz_ST0(cpu_env);
+                            break;
+                        default:
+                            goto unknown_op;
+                        }
+                    }
+                    break;
+                case 0x0e: /* grp d9/6 */
+                    switch (rm) {
+                    case 0: /* f2xm1 */
+                        gen_helper_f2xm1(cpu_env);
                         break;
-                    case 2:
-                        gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
-                                            s->mem_index, MO_LEQ);
+                    case 1: /* fyl2x */
+                        gen_helper_fyl2x(cpu_env);
+                        break;
+                    case 2: /* fptan */
+                        gen_helper_fptan(cpu_env);
+                        break;
+                    case 3: /* fpatan */
+                        gen_helper_fpatan(cpu_env);
+                        break;
+                    case 4: /* fxtract */
+                        gen_helper_fxtract(cpu_env);
+                        break;
+                    case 5: /* fprem1 */
+                        gen_helper_fprem1(cpu_env);
+                        break;
+                    case 6: /* fdecstp */
+                        gen_helper_fdecstp(cpu_env);
                         break;
-                    case 3:
                     default:
-                        gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUW);
+                    case 7: /* fincstp */
+                        gen_helper_fincstp(cpu_env);
                         break;
                     }
-                    gen_helper_fpop(cpu_env);
                     break;
-                default:
-                    switch(op >> 4) {
-                    case 0:
-                        gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
+                case 0x0f: /* grp d9/7 */
+                    switch (rm) {
+                    case 0: /* fprem */
+                        gen_helper_fprem(cpu_env);
                         break;
-                    case 1:
-                        gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUL);
+                    case 1: /* fyl2xp1 */
+                        gen_helper_fyl2xp1(cpu_env);
                         break;
-                    case 2:
-                        gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
-                        tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
-                                            s->mem_index, MO_LEQ);
+                    case 2: /* fsqrt */
+                        gen_helper_fsqrt(cpu_env);
+                        break;
+                    case 3: /* fsincos */
+                        gen_helper_fsincos(cpu_env);
+                        break;
+                    case 5: /* fscale */
+                        gen_helper_fscale(cpu_env);
+                        break;
+                    case 4: /* frndint */
+                        gen_helper_frndint(cpu_env);
+                        break;
+                    case 6: /* fsin */
+                        gen_helper_fsin(cpu_env);
                         break;
-                    case 3:
                     default:
-                        gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
-                        tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                            s->mem_index, MO_LEUW);
+                    case 7: /* fcos */
+                        gen_helper_fcos(cpu_env);
                         break;
                     }
-                    if ((op & 7) == 3)
+                    break;
+                case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+                case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+                case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+                    {
+                        int op1;
+
+                        op1 = op & 7;
+                        if (op >= 0x20) {
+                            gen_helper_fp_arith_STN_ST0(op1, opreg);
+                            if (op >= 0x30) {
+                                gen_helper_fpop(cpu_env);
+                            }
+                        } else {
+                            gen_helper_fmov_FT0_STN(cpu_env,
+                                                    tcg_const_i32(opreg));
+                            gen_helper_fp_arith_ST0_FT0(op1);
+                        }
+                    }
+                    break;
+                case 0x02: /* fcom */
+                case 0x22: /* fcom2, undocumented op */
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fcom_ST0_FT0(cpu_env);
+                    break;
+                case 0x03: /* fcomp */
+                case 0x23: /* fcomp3, undocumented op */
+                case 0x32: /* fcomp5, undocumented op */
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fcom_ST0_FT0(cpu_env);
+                    gen_helper_fpop(cpu_env);
+                    break;
+                case 0x15: /* da/5 */
+                    switch (rm) {
+                    case 1: /* fucompp */
+                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                        gen_helper_fucom_ST0_FT0(cpu_env);
                         gen_helper_fpop(cpu_env);
+                        gen_helper_fpop(cpu_env);
+                        break;
+                    default:
+                        goto unknown_op;
+                    }
                     break;
-                }
-                break;
-            case 0x0c: /* fldenv mem */
-                gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
-                break;
-            case 0x0d: /* fldcw mem */
-                tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
-                                    s->mem_index, MO_LEUW);
-                gen_helper_fldcw(cpu_env, s->tmp2_i32);
-                break;
-            case 0x0e: /* fnstenv mem */
-                gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
-                break;
-            case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(s->tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                    s->mem_index, MO_LEUW);
-                break;
-            case 0x1d: /* fldt mem */
-                gen_helper_fldt_ST0(cpu_env, s->A0);
-                break;
-            case 0x1f: /* fstpt mem */
-                gen_helper_fstt_ST0(cpu_env, s->A0);
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x2c: /* frstor mem */
-                gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
-                break;
-            case 0x2e: /* fnsave mem */
-                gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
-                break;
-            case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(s->tmp2_i32, cpu_env);
-                tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
-                                    s->mem_index, MO_LEUW);
-                break;
-            case 0x3c: /* fbld */
-                gen_helper_fbld_ST0(cpu_env, s->A0);
-                break;
-            case 0x3e: /* fbstp */
-                gen_helper_fbst_ST0(cpu_env, s->A0);
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x3d: /* fildll */
-                tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-                gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
-                break;
-            case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
-                tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
-                gen_helper_fpop(cpu_env);
-                break;
-            default:
-                goto unknown_op;
-            }
-        } else {
-            /* register float ops */
-            opreg = rm;
-
-            switch(op) {
-            case 0x08: /* fld sti */
-                gen_helper_fpush(cpu_env);
-                gen_helper_fmov_ST0_STN(cpu_env,
-                                        tcg_const_i32((opreg + 1) & 7));
-                break;
-            case 0x09: /* fxchg sti */
-            case 0x29: /* fxchg4 sti, undocumented op */
-            case 0x39: /* fxchg7 sti, undocumented op */
-                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
-                break;
-            case 0x0a: /* grp d9/2 */
-                switch(rm) {
-                case 0: /* fnop */
-                    /* check exceptions (FreeBSD FPU probe) */
-                    gen_helper_fwait(cpu_env);
-                    break;
-                default:
-                    goto unknown_op;
-                }
-                break;
-            case 0x0c: /* grp d9/4 */
-                switch(rm) {
-                case 0: /* fchs */
-                    gen_helper_fchs_ST0(cpu_env);
-                    break;
-                case 1: /* fabs */
-                    gen_helper_fabs_ST0(cpu_env);
-                    break;
-                case 4: /* ftst */
-                    gen_helper_fldz_FT0(cpu_env);
-                    gen_helper_fcom_ST0_FT0(cpu_env);
-                    break;
-                case 5: /* fxam */
-                    gen_helper_fxam_ST0(cpu_env);
-                    break;
-                default:
-                    goto unknown_op;
-                }
-                break;
-            case 0x0d: /* grp d9/5 */
-                {
-                    switch(rm) {
-                    case 0:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fld1_ST0(cpu_env);
-                        break;
-                    case 1:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldl2t_ST0(cpu_env);
-                        break;
-                    case 2:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldl2e_ST0(cpu_env);
+                case 0x1c:
+                    switch (rm) {
+                    case 0: /* feni (287 only, just do nop here) */
                         break;
-                    case 3:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldpi_ST0(cpu_env);
+                    case 1: /* fdisi (287 only, just do nop here) */
                         break;
-                    case 4:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldlg2_ST0(cpu_env);
+                    case 2: /* fclex */
+                        gen_helper_fclex(cpu_env);
+                        update_fip = false;
                         break;
-                    case 5:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldln2_ST0(cpu_env);
+                    case 3: /* fninit */
+                        gen_helper_fninit(cpu_env);
+                        update_fip = false;
                         break;
-                    case 6:
-                        gen_helper_fpush(cpu_env);
-                        gen_helper_fldz_ST0(cpu_env);
+                    case 4: /* fsetpm (287 only, just do nop here) */
                         break;
                     default:
                         goto unknown_op;
                     }
-                }
-                break;
-            case 0x0e: /* grp d9/6 */
-                switch(rm) {
-                case 0: /* f2xm1 */
-                    gen_helper_f2xm1(cpu_env);
-                    break;
-                case 1: /* fyl2x */
-                    gen_helper_fyl2x(cpu_env);
-                    break;
-                case 2: /* fptan */
-                    gen_helper_fptan(cpu_env);
-                    break;
-                case 3: /* fpatan */
-                    gen_helper_fpatan(cpu_env);
-                    break;
-                case 4: /* fxtract */
-                    gen_helper_fxtract(cpu_env);
-                    break;
-                case 5: /* fprem1 */
-                    gen_helper_fprem1(cpu_env);
-                    break;
-                case 6: /* fdecstp */
-                    gen_helper_fdecstp(cpu_env);
-                    break;
-                default:
-                case 7: /* fincstp */
-                    gen_helper_fincstp(cpu_env);
-                    break;
-                }
-                break;
-            case 0x0f: /* grp d9/7 */
-                switch(rm) {
-                case 0: /* fprem */
-                    gen_helper_fprem(cpu_env);
-                    break;
-                case 1: /* fyl2xp1 */
-                    gen_helper_fyl2xp1(cpu_env);
                     break;
-                case 2: /* fsqrt */
-                    gen_helper_fsqrt(cpu_env);
+                case 0x1d: /* fucomi */
+                    if (!(s->cpuid_features & CPUID_CMOV)) {
+                        goto illegal_op;
+                    }
+                    gen_update_cc_op(s);
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fucomi_ST0_FT0(cpu_env);
+                    set_cc_op(s, CC_OP_EFLAGS);
                     break;
-                case 3: /* fsincos */
-                    gen_helper_fsincos(cpu_env);
+                case 0x1e: /* fcomi */
+                    if (!(s->cpuid_features & CPUID_CMOV)) {
+                        goto illegal_op;
+                    }
+                    gen_update_cc_op(s);
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fcomi_ST0_FT0(cpu_env);
+                    set_cc_op(s, CC_OP_EFLAGS);
                     break;
-                case 5: /* fscale */
-                    gen_helper_fscale(cpu_env);
+                case 0x28: /* ffree sti */
+                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
                     break;
-                case 4: /* frndint */
-                    gen_helper_frndint(cpu_env);
+                case 0x2a: /* fst sti */
+                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
                     break;
-                case 6: /* fsin */
-                    gen_helper_fsin(cpu_env);
+                case 0x2b: /* fstp sti */
+                case 0x0b: /* fstp1 sti, undocumented op */
+                case 0x3a: /* fstp8 sti, undocumented op */
+                case 0x3b: /* fstp9 sti, undocumented op */
+                    gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fpop(cpu_env);
                     break;
-                default:
-                case 7: /* fcos */
-                    gen_helper_fcos(cpu_env);
+                case 0x2c: /* fucom st(i) */
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fucom_ST0_FT0(cpu_env);
                     break;
-                }
-                break;
-            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
-            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
-            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
-                {
-                    int op1;
-
-                    op1 = op & 7;
-                    if (op >= 0x20) {
-                        gen_helper_fp_arith_STN_ST0(op1, opreg);
-                        if (op >= 0x30)
-                            gen_helper_fpop(cpu_env);
-                    } else {
-                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                        gen_helper_fp_arith_ST0_FT0(op1);
-                    }
-                }
-                break;
-            case 0x02: /* fcom */
-            case 0x22: /* fcom2, undocumented op */
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0(cpu_env);
-                break;
-            case 0x03: /* fcomp */
-            case 0x23: /* fcomp3, undocumented op */
-            case 0x32: /* fcomp5, undocumented op */
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0(cpu_env);
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x15: /* da/5 */
-                switch(rm) {
-                case 1: /* fucompp */
-                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                case 0x2d: /* fucomp st(i) */
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                     gen_helper_fucom_ST0_FT0(cpu_env);
                     gen_helper_fpop(cpu_env);
-                    gen_helper_fpop(cpu_env);
                     break;
-                default:
-                    goto unknown_op;
-                }
-                break;
-            case 0x1c:
-                switch(rm) {
-                case 0: /* feni (287 only, just do nop here) */
-                    break;
-                case 1: /* fdisi (287 only, just do nop here) */
-                    break;
-                case 2: /* fclex */
-                    gen_helper_fclex(cpu_env);
+                case 0x33: /* de/3 */
+                    switch (rm) {
+                    case 1: /* fcompp */
+                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                        gen_helper_fcom_ST0_FT0(cpu_env);
+                        gen_helper_fpop(cpu_env);
+                        gen_helper_fpop(cpu_env);
+                        break;
+                    default:
+                        goto unknown_op;
+                    }
                     break;
-                case 3: /* fninit */
-                    gen_helper_fninit(cpu_env);
+                case 0x38: /* ffreep sti, undocumented op */
+                    gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fpop(cpu_env);
                     break;
-                case 4: /* fsetpm (287 only, just do nop here) */
+                case 0x3c: /* df/4 */
+                    switch (rm) {
+                    case 0:
+                        gen_helper_fnstsw(s->tmp2_i32, cpu_env);
+                        tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+                        gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                        break;
+                    default:
+                        goto unknown_op;
+                    }
                     break;
-                default:
-                    goto unknown_op;
-                }
-                break;
-            case 0x1d: /* fucomi */
-                if (!(s->cpuid_features & CPUID_CMOV)) {
-                    goto illegal_op;
-                }
-                gen_update_cc_op(s);
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0(cpu_env);
-                set_cc_op(s, CC_OP_EFLAGS);
-                break;
-            case 0x1e: /* fcomi */
-                if (!(s->cpuid_features & CPUID_CMOV)) {
-                    goto illegal_op;
-                }
-                gen_update_cc_op(s);
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0(cpu_env);
-                set_cc_op(s, CC_OP_EFLAGS);
-                break;
-            case 0x28: /* ffree sti */
-                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
-                break;
-            case 0x2a: /* fst sti */
-                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
-                break;
-            case 0x2b: /* fstp sti */
-            case 0x0b: /* fstp1 sti, undocumented op */
-            case 0x3a: /* fstp8 sti, undocumented op */
-            case 0x3b: /* fstp9 sti, undocumented op */
-                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x2c: /* fucom st(i) */
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0(cpu_env);
-                break;
-            case 0x2d: /* fucomp st(i) */
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0(cpu_env);
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x33: /* de/3 */
-                switch(rm) {
-                case 1: /* fcompp */
-                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
-                    gen_helper_fcom_ST0_FT0(cpu_env);
+                case 0x3d: /* fucomip */
+                    if (!(s->cpuid_features & CPUID_CMOV)) {
+                        goto illegal_op;
+                    }
+                    gen_update_cc_op(s);
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fucomi_ST0_FT0(cpu_env);
                     gen_helper_fpop(cpu_env);
+                    set_cc_op(s, CC_OP_EFLAGS);
+                    break;
+                case 0x3e: /* fcomip */
+                    if (!(s->cpuid_features & CPUID_CMOV)) {
+                        goto illegal_op;
+                    }
+                    gen_update_cc_op(s);
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                    gen_helper_fcomi_ST0_FT0(cpu_env);
                     gen_helper_fpop(cpu_env);
+                    set_cc_op(s, CC_OP_EFLAGS);
                     break;
-                default:
-                    goto unknown_op;
-                }
-                break;
-            case 0x38: /* ffreep sti, undocumented op */
-                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fpop(cpu_env);
-                break;
-            case 0x3c: /* df/4 */
-                switch(rm) {
-                case 0:
-                    gen_helper_fnstsw(s->tmp2_i32, cpu_env);
-                    tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
-                    gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
+                case 0x10 ... 0x13: /* fcmovxx */
+                case 0x18 ... 0x1b:
+                    {
+                        int op1;
+                        TCGLabel *l1;
+                        static const uint8_t fcmov_cc[8] = {
+                            (JCC_B << 1),
+                            (JCC_Z << 1),
+                            (JCC_BE << 1),
+                            (JCC_P << 1),
+                        };
+
+                        if (!(s->cpuid_features & CPUID_CMOV)) {
+                            goto illegal_op;
+                        }
+                        op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
+                        l1 = gen_new_label();
+                        gen_jcc1_noeob(s, op1, l1);
+                        gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
+                        gen_set_label(l1);
+                    }
                     break;
                 default:
                     goto unknown_op;
                 }
-                break;
-            case 0x3d: /* fucomip */
-                if (!(s->cpuid_features & CPUID_CMOV)) {
-                    goto illegal_op;
-                }
-                gen_update_cc_op(s);
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0(cpu_env);
-                gen_helper_fpop(cpu_env);
-                set_cc_op(s, CC_OP_EFLAGS);
-                break;
-            case 0x3e: /* fcomip */
-                if (!(s->cpuid_features & CPUID_CMOV)) {
-                    goto illegal_op;
-                }
-                gen_update_cc_op(s);
-                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0(cpu_env);
-                gen_helper_fpop(cpu_env);
-                set_cc_op(s, CC_OP_EFLAGS);
-                break;
-            case 0x10 ... 0x13: /* fcmovxx */
-            case 0x18 ... 0x1b:
-                {
-                    int op1;
-                    TCGLabel *l1;
-                    static const uint8_t fcmov_cc[8] = {
-                        (JCC_B << 1),
-                        (JCC_Z << 1),
-                        (JCC_BE << 1),
-                        (JCC_P << 1),
-                    };
+            }
 
-                    if (!(s->cpuid_features & CPUID_CMOV)) {
-                        goto illegal_op;
-                    }
-                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
-                    l1 = gen_new_label();
-                    gen_jcc1_noeob(s, op1, l1);
-                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
-                    gen_set_label(l1);
-                }
-                break;
-            default:
-                goto unknown_op;
+            if (update_fip) {
+                tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
+                               offsetof(CPUX86State, segs[R_CS].selector));
+                tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
+                                 offsetof(CPUX86State, fpcs));
+                tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
+                              cpu_env, offsetof(CPUX86State, fpip));
             }
         }
         break;
@@ -6395,9 +6518,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6c: /* insS */
     case 0x6d:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-        gen_check_io(s, ot, pc_start - s->cs_base, 
-                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
+        if (!gen_check_io(s, ot, s->tmp2_i32,
+                          SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
@@ -6414,9 +6540,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x6e: /* outsS */
     case 0x6f:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-        gen_check_io(s, ot, pc_start - s->cs_base,
-                     svm_is_rep(prefixes) | 4);
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
+        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
@@ -6438,13 +6566,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe5:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(s->T0, val);
-        gen_check_io(s, ot, pc_start - s->cs_base,
-                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
-        tcg_gen_movi_i32(s->tmp2_i32, val);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
@@ -6456,15 +6584,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xe7:
         ot = mo_b_d32(b, dflag);
         val = x86_ldub_code(env, s);
-        tcg_gen_movi_tl(s->T0, val);
-        gen_check_io(s, ot, pc_start - s->cs_base,
-                     svm_is_rep(prefixes));
-        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
-
+        tcg_gen_movi_i32(s->tmp2_i32, val);
+        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
-        tcg_gen_movi_i32(s->tmp2_i32, val);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
@@ -6475,13 +6602,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xec:
     case 0xed:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-        gen_check_io(s, ot, pc_start - s->cs_base,
-                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
+        if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
         gen_bpt_io(s, s->tmp2_i32, ot);
@@ -6492,15 +6620,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xee:
     case 0xef:
         ot = mo_b_d32(b, dflag);
-        tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
-        gen_check_io(s, ot, pc_start - s->cs_base,
-                     svm_is_rep(prefixes));
-        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
-
+        tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
+        tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
+        if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
+            break;
+        }
         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
-        tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+        gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
         gen_bpt_io(s, s->tmp2_i32, ot);
@@ -6531,7 +6659,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0xca: /* lret im */
         val = x86_ldsw_code(env, s);
     do_lret:
-        if (s->pe && !s->vm86) {
+        if (PE(s) && !VM86(s)) {
             gen_update_cc_op(s);
             gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
@@ -6556,23 +6684,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         val = 0;
         goto do_lret;
     case 0xcf: /* iret */
-        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
-        if (!s->pe) {
-            /* real mode */
-            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
-            set_cc_op(s, CC_OP_EFLAGS);
-        } else if (s->vm86) {
-            if (s->iopl != 3) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            } else {
-                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
-                set_cc_op(s, CC_OP_EFLAGS);
+        gen_svm_check_intercept(s, SVM_EXIT_IRET);
+        if (!PE(s) || VM86(s)) {
+            /* real mode or vm86 mode */
+            if (!check_vm86_iopl(s)) {
+                break;
             }
+            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
         } else {
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
-            set_cc_op(s, CC_OP_EFLAGS);
         }
+        set_cc_op(s, CC_OP_EFLAGS);
         gen_eob(s);
         break;
     case 0xe8: /* call im */
@@ -6676,29 +6799,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         ot = dflag;
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         gen_cmovcc1(env, s, ot, b, modrm, reg);
         break;
 
         /************************/
         /* flags */
     case 0x9c: /* pushf */
-        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
-        if (s->vm86 && s->iopl != 3) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
+        if (check_vm86_iopl(s)) {
             gen_update_cc_op(s);
             gen_helper_read_eflags(s->T0, cpu_env);
             gen_push_v(s, s->T0);
         }
         break;
     case 0x9d: /* popf */
-        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
-        if (s->vm86 && s->iopl != 3) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        gen_svm_check_intercept(s, SVM_EXIT_POPF);
+        if (check_vm86_iopl(s)) {
             ot = gen_pop_T0(s);
-            if (s->cpl == 0) {
+            if (CPL(s) == 0) {
                 if (dflag != MO_16) {
                     gen_helper_write_eflags(cpu_env, s->T0,
                                             tcg_const_i32((TF_MASK | AC_MASK |
@@ -6713,7 +6832,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                                           & 0xffff));
                 }
             } else {
-                if (s->cpl <= s->iopl) {
+                if (CPL(s) <= IOPL(s)) {
                     if (dflag != MO_16) {
                         gen_helper_write_eflags(cpu_env, s->T0,
                                                 tcg_const_i32((TF_MASK |
@@ -6826,7 +6945,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     do_btx:
         ot = dflag;
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
@@ -6931,7 +7050,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1bd: /* bsr / lzcnt */
         ot = dflag;
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_extu(ot, s->T0);
 
@@ -7056,9 +7175,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0xcd: /* int N */
         val = x86_ldub_code(env, s);
-        if (s->vm86 && s->iopl != 3) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        if (check_vm86_iopl(s)) {
             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
         }
         break;
@@ -7071,33 +7188,21 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
 #ifdef WANT_ICEBP
     case 0xf1: /* icebp (undocumented, exits to external debugger) */
-        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
-        gen_debug(s, pc_start - s->cs_base);
+        gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
+        gen_debug(s);
         break;
 #endif
     case 0xfa: /* cli */
-        if (!s->vm86) {
-            if (s->cpl <= s->iopl) {
-                gen_helper_cli(cpu_env);
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
-        } else {
-            if (s->iopl == 3) {
-                gen_helper_cli(cpu_env);
-            } else {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            }
+        if (check_iopl(s)) {
+            gen_helper_cli(cpu_env);
         }
         break;
     case 0xfb: /* sti */
-        if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
+        if (check_iopl(s)) {
             gen_helper_sti(cpu_env);
             /* interruptions are enabled only the first insn after sti */
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob_inhibit_irq(s, true);
-        } else {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         }
         break;
     case 0x62: /* bound */
@@ -7122,17 +7227,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         reg = (b & 7) | REX_B(s);
 #ifdef TARGET_X86_64
         if (dflag == MO_64) {
-            gen_op_mov_v_reg(s, MO_64, s->T0, reg);
-            tcg_gen_bswap64_i64(s->T0, s->T0);
-            gen_op_mov_reg_v(s, MO_64, reg, s->T0);
-        } else
-#endif
-        {
-            gen_op_mov_v_reg(s, MO_32, s->T0, reg);
-            tcg_gen_ext32u_tl(s->T0, s->T0);
-            tcg_gen_bswap32_tl(s->T0, s->T0);
-            gen_op_mov_reg_v(s, MO_32, reg, s->T0);
+            tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
+            break;
         }
+#endif
+        tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
         break;
     case 0xd6: /* salc */
         if (CODE64(s))
@@ -7189,15 +7288,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         break;
     case 0x130: /* wrmsr */
     case 0x132: /* rdmsr */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        if (check_cpl0(s)) {
             gen_update_cc_op(s);
             gen_jmp_im(s, pc_start - s->cs_base);
             if (b & 2) {
                 gen_helper_rdmsr(cpu_env);
             } else {
                 gen_helper_wrmsr(cpu_env);
+                gen_jmp_im(s, s->pc - s->cs_base);
+                gen_eob(s);
             }
         }
         break;
@@ -7216,13 +7315,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_update_cc_op(s);
         gen_jmp_im(s, pc_start - s->cs_base);
         gen_helper_rdpmc(cpu_env);
+        s->base.is_jmp = DISAS_NORETURN;
         break;
     case 0x134: /* sysenter */
         /* For Intel SYSENTER is valid on 64-bit */
         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
             goto illegal_op;
-        if (!s->pe) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        if (!PE(s)) {
+            gen_exception_gpf(s);
         } else {
             gen_helper_sysenter(cpu_env);
             gen_eob(s);
@@ -7232,8 +7332,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         /* For Intel SYSEXIT is valid on 64-bit */
         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
             goto illegal_op;
-        if (!s->pe) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        if (!PE(s)) {
+            gen_exception_gpf(s);
         } else {
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
@@ -7251,12 +7351,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_eob_worker(s, false, true);
         break;
     case 0x107: /* sysret */
-        if (!s->pe) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+        if (!PE(s)) {
+            gen_exception_gpf(s);
         } else {
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
-            if (s->lma) {
+            if (LMA(s)) {
                 set_cc_op(s, CC_OP_EFLAGS);
             }
             /* TF handling for the sysret insn is different. The TF bit is
@@ -7273,9 +7373,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        if (check_cpl0(s)) {
             gen_update_cc_op(s);
             gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
@@ -7288,42 +7386,38 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* sldt */
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
+            gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 2: /* lldt */
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            } else {
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
+            if (check_cpl0(s)) {
+                gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 gen_helper_lldt(cpu_env, s->tmp2_i32);
             }
             break;
         case 1: /* str */
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
+            gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(s->T0, cpu_env,
                              offsetof(CPUX86State, tr.selector));
             ot = mod == 3 ? dflag : MO_16;
             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 3: /* ltr */
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-            } else {
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
+            if (check_cpl0(s)) {
+                gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
                 gen_helper_ltr(cpu_env, s->tmp2_i32);
@@ -7331,7 +7425,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
         case 4: /* verr */
         case 5: /* verw */
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             gen_update_cc_op(s);
@@ -7351,7 +7445,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         switch (modrm) {
         CASE_MODRM_MEM_OP(0): /* sgdt */
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
+            gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(s->T0,
                              cpu_env, offsetof(CPUX86State, gdt.limit));
@@ -7365,7 +7459,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xc8: /* monitor */
-            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
+            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
                 goto illegal_op;
             }
             gen_update_cc_op(s);
@@ -7377,18 +7471,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xc9: /* mwait */
-            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
+            if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
                 goto illegal_op;
             }
             gen_update_cc_op(s);
             gen_jmp_im(s, pc_start - s->cs_base);
             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
-            gen_eob(s);
+            s->base.is_jmp = DISAS_NORETURN;
             break;
 
         case 0xca: /* clac */
             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
-                || s->cpl != 0) {
+                || CPL(s) != 0) {
                 goto illegal_op;
             }
             gen_helper_clac(cpu_env);
@@ -7398,7 +7492,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
         case 0xcb: /* stac */
             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
-                || s->cpl != 0) {
+                || CPL(s) != 0) {
                 goto illegal_op;
             }
             gen_helper_stac(cpu_env);
@@ -7407,7 +7501,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         CASE_MODRM_MEM_OP(1): /* sidt */
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
+            gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
             gen_lea_modrm(env, s, modrm);
             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
             gen_op_st_v(s, MO_16, s->T0, s->A0);
@@ -7436,8 +7530,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
@@ -7450,11 +7543,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xd8: /* VMRUN */
-            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+            if (!SVME(s) || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             gen_update_cc_op(s);
@@ -7466,7 +7558,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xd9: /* VMMCALL */
-            if (!(s->flags & HF_SVME_MASK)) {
+            if (!SVME(s)) {
                 goto illegal_op;
             }
             gen_update_cc_op(s);
@@ -7475,11 +7567,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xda: /* VMLOAD */
-            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+            if (!SVME(s) || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             gen_update_cc_op(s);
@@ -7488,11 +7579,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xdb: /* VMSAVE */
-            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+            if (!SVME(s) || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             gen_update_cc_op(s);
@@ -7501,13 +7591,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xdc: /* STGI */
-            if ((!(s->flags & HF_SVME_MASK)
-                   && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
-                || !s->pe) {
+            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
+                || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             gen_update_cc_op(s);
@@ -7517,11 +7605,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xdd: /* CLGI */
-            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+            if (!SVME(s) || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
             gen_update_cc_op(s);
@@ -7530,35 +7617,37 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         case 0xde: /* SKINIT */
-            if ((!(s->flags & HF_SVME_MASK)
-                 && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
-                || !s->pe) {
+            if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
+                || !PE(s)) {
                 goto illegal_op;
             }
-            gen_update_cc_op(s);
-            gen_jmp_im(s, pc_start - s->cs_base);
-            gen_helper_skinit(cpu_env);
-            break;
+            gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
+            /* If not intercepted, not implemented -- raise #UD. */
+            goto illegal_op;
 
         case 0xdf: /* INVLPGA */
-            if (!(s->flags & HF_SVME_MASK) || !s->pe) {
+            if (!SVME(s) || !PE(s)) {
                 goto illegal_op;
             }
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
-            gen_update_cc_op(s);
-            gen_jmp_im(s, pc_start - s->cs_base);
-            gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
+            gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
+            if (s->aflag == MO_64) {
+                tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
+            } else {
+                tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
+            }
+            gen_helper_flush_page(cpu_env, s->A0);
+            gen_jmp_im(s, s->pc - s->cs_base);
+            gen_eob(s);
             break;
 
         CASE_MODRM_MEM_OP(2): /* lgdt */
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
+            gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
@@ -7571,11 +7660,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         CASE_MODRM_MEM_OP(3): /* lidt */
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
+            gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
             gen_lea_modrm(env, s, modrm);
             gen_op_ld_v(s, MO_16, s->T1, s->A0);
             gen_add_A0_im(s, 2);
@@ -7588,7 +7676,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             break;
 
         CASE_MODRM_OP(4): /* smsw */
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
+            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
             /*
              * In 32-bit mode, the higher 16 bits of the destination
@@ -7616,27 +7704,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
             break;
+
         CASE_MODRM_OP(6): /* lmsw */
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
-            gen_helper_lmsw(cpu_env, s->T0);
+            /*
+             * Only the 4 lower bits of CR0 are modified.
+             * PE cannot be set to zero if already set to one.
+             */
+            tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
+            tcg_gen_andi_tl(s->T0, s->T0, 0xf);
+            tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
+            tcg_gen_or_tl(s->T0, s->T0, s->T1);
+            gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
 
         CASE_MODRM_MEM_OP(7): /* invlpg */
-            if (s->cpl != 0) {
-                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
+            if (!check_cpl0(s)) {
                 break;
             }
-            gen_update_cc_op(s);
-            gen_jmp_im(s, pc_start - s->cs_base);
+            gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
             gen_lea_modrm(env, s, modrm);
-            gen_helper_invlpg(cpu_env, s->A0);
+            gen_helper_flush_page(cpu_env, s->A0);
             gen_jmp_im(s, s->pc - s->cs_base);
             gen_eob(s);
             break;
@@ -7644,9 +7738,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         case 0xf8: /* swapgs */
 #ifdef TARGET_X86_64
             if (CODE64(s)) {
-                if (s->cpl != 0) {
-                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-                } else {
+                if (check_cpl0(s)) {
                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
                                   offsetof(CPUX86State, kernelgsbase));
@@ -7680,10 +7772,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
     case 0x108: /* invd */
     case 0x109: /* wbinvd */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
-            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
+        if (check_cpl0(s)) {
+            gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
             /* nothing to do */
         }
         break;
@@ -7695,7 +7785,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             d_ot = dflag;
 
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
 
@@ -7717,7 +7807,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             TCGLabel *label1;
             TCGv t0, t1, t2, a0;
 
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
             t0 = tcg_temp_local_new();
             t1 = tcg_temp_local_new();
@@ -7765,11 +7855,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         {
             TCGLabel *label1;
             TCGv t0;
-            if (!s->pe || s->vm86)
+            if (!PE(s) || VM86(s))
                 goto illegal_op;
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
@@ -7810,7 +7900,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         if (s->flags & HF_MPX_EN_MASK) {
             mod = (modrm >> 6) & 3;
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             if (prefixes & PREFIX_REPZ) {
                 /* bndcl */
                 if (reg >= 4
@@ -7900,7 +7990,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         if (s->flags & HF_MPX_EN_MASK) {
             mod = (modrm >> 6) & 3;
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
                 /* bndmk */
                 if (reg >= 4
@@ -8002,65 +8092,60 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         modrm = x86_ldub_code(env, s);
         gen_nop_modrm(env, s, modrm);
         break;
+
     case 0x120: /* mov reg, crN */
     case 0x122: /* mov crN, reg */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
-            modrm = x86_ldub_code(env, s);
-            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
-             * AMD documentation (24594.pdf) and testing of
-             * intel 386 and 486 processors all show that the mod bits
-             * are assumed to be 1's, regardless of actual values.
-             */
-            rm = (modrm & 7) | REX_B(s);
-            reg = ((modrm >> 3) & 7) | rex_r;
-            if (CODE64(s))
-                ot = MO_64;
-            else
-                ot = MO_32;
-            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
+        if (!check_cpl0(s)) {
+            break;
+        }
+        modrm = x86_ldub_code(env, s);
+        /*
+         * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
+         * AMD documentation (24594.pdf) and testing of Intel 386 and 486
+         * processors all show that the mod bits are assumed to be 1's,
+         * regardless of actual values.
+         */
+        rm = (modrm & 7) | REX_B(s);
+        reg = ((modrm >> 3) & 7) | REX_R(s);
+        switch (reg) {
+        case 0:
+            if ((prefixes & PREFIX_LOCK) &&
                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
                 reg = 8;
             }
-            switch(reg) {
-            case 0:
-            case 2:
-            case 3:
-            case 4:
-            case 8:
-                gen_update_cc_op(s);
-                gen_jmp_im(s, pc_start - s->cs_base);
-                if (b & 2) {
-                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                        gen_io_start();
-                    }
-                    gen_op_mov_v_reg(s, ot, s->T0, rm);
-                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
-                                         s->T0);
-                    gen_jmp_im(s, s->pc - s->cs_base);
-                    gen_eob(s);
-                } else {
-                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                        gen_io_start();
-                    }
-                    gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
-                    gen_op_mov_reg_v(s, ot, rm, s->T0);
-                    if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
-                        gen_jmp(s, s->pc - s->cs_base);
-                    }
-                }
-                break;
-            default:
-                goto unknown_op;
+            break;
+        case 2:
+        case 3:
+        case 4:
+        case 8:
+            break;
+        default:
+            goto unknown_op;
+        }
+        ot  = (CODE64(s) ? MO_64 : MO_32);
+
+        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+            gen_io_start();
+        }
+        if (b & 2) {
+            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
+            gen_op_mov_v_reg(s, ot, s->T0, rm);
+            gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
+            gen_jmp_im(s, s->pc - s->cs_base);
+            gen_eob(s);
+        } else {
+            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
+            gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
+            gen_op_mov_reg_v(s, ot, rm, s->T0);
+            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+                gen_jmp(s, s->pc - s->cs_base);
             }
         }
         break;
+
     case 0x121: /* mov reg, drN */
     case 0x123: /* mov drN, reg */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
+        if (check_cpl0(s)) {
             modrm = x86_ldub_code(env, s);
             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
              * AMD documentation (24594.pdf) and testing of
@@ -8068,7 +8153,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
              * are assumed to be 1's, regardless of actual values.
              */
             rm = (modrm & 7) | REX_B(s);
-            reg = ((modrm >> 3) & 7) | rex_r;
+            reg = ((modrm >> 3) & 7) | REX_R(s);
             if (CODE64(s))
                 ot = MO_64;
             else
@@ -8077,14 +8162,14 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
                 goto illegal_op;
             }
             if (b & 2) {
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
+                gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_v_reg(s, ot, s->T0, rm);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
                 gen_jmp_im(s, s->pc - s->cs_base);
                 gen_eob(s);
             } else {
-                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
+                gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
                 tcg_gen_movi_i32(s->tmp2_i32, reg);
                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
                 gen_op_mov_reg_v(s, ot, rm, s->T0);
@@ -8092,10 +8177,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         }
         break;
     case 0x106: /* clts */
-        if (s->cpl != 0) {
-            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
-        } else {
-            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
+        if (check_cpl0(s)) {
+            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
             gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
             gen_jmp_im(s, s->pc - s->cs_base);
@@ -8111,7 +8194,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
         /* generate a generic store */
         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
@@ -8322,12 +8405,17 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_nop_modrm(env, s, modrm);
         break;
     case 0x1aa: /* rsm */
-        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
+        gen_svm_check_intercept(s, SVM_EXIT_RSM);
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
+#ifdef CONFIG_USER_ONLY
+        /* we should not be in SMM mode */
+        g_assert_not_reached();
+#else
         gen_update_cc_op(s);
         gen_jmp_im(s, s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
+#endif /* CONFIG_USER_ONLY */
         gen_eob(s);
         break;
     case 0x1b8: /* SSE4.2 popcnt */
@@ -8338,7 +8426,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
             goto illegal_op;
 
         modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | rex_r;
+        reg = ((modrm >> 3) & 7) | REX_R(s);
 
         if (s->prefix & PREFIX_DATA) {
             ot = MO_16;
@@ -8366,7 +8454,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1c2:
     case 0x1c4 ... 0x1c6:
     case 0x1d0 ... 0x1fe:
-        gen_sse(env, s, b, pc_start, rex_r);
+        gen_sse(env, s, b, pc_start);
         break;
     default:
         goto unknown_op;
@@ -8466,20 +8554,32 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     DisasContext *dc = container_of(dcbase, DisasContext, base);
     CPUX86State *env = cpu->env_ptr;
     uint32_t flags = dc->base.tb->flags;
-    target_ulong cs_base = dc->base.tb->cs_base;
-
-    dc->pe = (flags >> HF_PE_SHIFT) & 1;
-    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
-    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
-    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
-    dc->f_st = 0;
-    dc->vm86 = (flags >> VM_SHIFT) & 1;
-    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
-    dc->iopl = (flags >> IOPL_SHIFT) & 3;
-    dc->tf = (flags >> TF_SHIFT) & 1;
+    uint32_t cflags = tb_cflags(dc->base.tb);
+    int cpl = (flags >> HF_CPL_SHIFT) & 3;
+    int iopl = (flags >> IOPL_SHIFT) & 3;
+
+    dc->cs_base = dc->base.tb->cs_base;
+    dc->flags = flags;
+#ifndef CONFIG_USER_ONLY
+    dc->cpl = cpl;
+    dc->iopl = iopl;
+#endif
+
+    /* We make some simplifying assumptions; validate they're correct. */
+    g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
+    g_assert(CPL(dc) == cpl);
+    g_assert(IOPL(dc) == iopl);
+    g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
+    g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
+    g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
+    g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
+    g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
+    g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
+    g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
+    g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
+
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
-    dc->cs_base = cs_base;
     dc->popl_esp_hack = 0;
     /* select memory access functions */
     dc->mem_index = 0;
@@ -8492,29 +8592,14 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
-#ifdef TARGET_X86_64
-    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
-    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
-#endif
-    dc->flags = flags;
-    dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
-                    (flags & HF_INHIBIT_IRQ_MASK));
-    /* Do not optimize repz jumps at all in icount mode, because
-       rep movsS instructions are execured with different paths
-       in !repz_opt and repz_opt modes. The first one was used
-       always except single step mode. And this setting
-       disables jumps optimization and control paths become
-       equivalent in run and single step modes.
-       Now there will be no jump optimization for repz in
-       record/replay modes and there will always be an
-       additional step for ecx=0 when icount is enabled.
+    dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
+                    (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
+    /*
+     * If jmp_opt, we want to handle each string instruction individually.
+     * For icount also disable repz optimization so that each iteration
+     * is accounted separately.
      */
-    dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
-#if 0
-    /* check addseg logic */
-    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
-        printf("ERROR addseg\n");
-#endif
+    dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
 
     dc->T0 = tcg_temp_new();
     dc->T1 = tcg_temp_new();
@@ -8541,26 +8626,6 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
 }
 
-static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                     const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-    /* If RF is set, suppress an internally generated breakpoint.  */
-    int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
-    if (bp->flags & flags) {
-        gen_debug(dc, dc->base.pc_next - dc->cs_base);
-        dc->base.is_jmp = DISAS_NORETURN;
-        /* The address covered by the breakpoint must be included in
-           [tb->pc, tb->pc + tb->size) in order to for it to be
-           properly cleared -- thus we increment the PC here so that
-           the generic logic setting tb->size later does the right thing.  */
-        dc->base.pc_next += 1;
-        return true;
-    } else {
-        return false;
-    }
-}
-
 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
@@ -8572,13 +8637,14 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
      */
     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
+        dc->base.pc_next = dc->pc + 1;
         return;
     }
 #endif
 
     pc_next = disas_insn(dc, cpu);
 
-    if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
+    if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
         /* if single step mode, we generate only one instruction and
            generate an exception */
         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
@@ -8627,7 +8693,6 @@ static const TranslatorOps i386_tr_ops = {
     .init_disas_context = i386_tr_init_disas_context,
     .tb_start           = i386_tr_tb_start,
     .insn_start         = i386_tr_insn_start,
-    .breakpoint_check   = i386_tr_breakpoint_check,
     .translate_insn     = i386_tr_translate_insn,
     .tb_stop            = i386_tr_tb_stop,
     .disas_log          = i386_tr_disas_log,
diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c
new file mode 100644
index 00000000000..cd507e2a1b5
--- /dev/null
+++ b/target/i386/tcg/user/excp_helper.c
@@ -0,0 +1,50 @@
+/*
+ *  x86 exception helpers - user-mode specific code
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "tcg/helper-tcg.h"
+
+void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    /*
+     * The error_code that hw reports as part of the exception frame
+     * is copied to linux sigcontext.err.  The exception_index is
+     * copied to linux sigcontext.trapno.  Short of inventing a new
+     * place to store the trapno, we cannot let our caller raise the
+     * signal and set exception_index to EXCP_INTERRUPT.
+     */
+    env->cr[2] = addr;
+    env->error_code = ((access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT)
+                    | (maperr ? 0 : PG_ERROR_P_MASK)
+                    | PG_ERROR_U_MASK;
+    cs->exception_index = EXCP0E_PAGE;
+
+    /* Disable do_interrupt_user. */
+    env->exception_is_int = 0;
+    env->exception_next_eip = -1;
+
+    cpu_loop_exit_restore(cs, ra);
+}
diff --git a/target/i386/tcg/user/meson.build b/target/i386/tcg/user/meson.build
new file mode 100644
index 00000000000..1df6bc43439
--- /dev/null
+++ b/target/i386/tcg/user/meson.build
@@ -0,0 +1,4 @@
+i386_user_ss.add(when: ['CONFIG_TCG', 'CONFIG_USER_ONLY'], if_true: files(
+  'excp_helper.c',
+  'seg_helper.c',
+))
diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c
new file mode 100644
index 00000000000..67481b0aa8e
--- /dev/null
+++ b/target/i386/tcg/user/seg_helper.c
@@ -0,0 +1,109 @@
+/*
+ *  x86 segmentation related helpers (user-mode code):
+ *  TSS, interrupts, system calls, jumps and call/task gates, descriptors
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "tcg/helper-tcg.h"
+#include "tcg/seg_helper.h"
+
+#ifdef TARGET_X86_64
+void helper_syscall(CPUX86State *env, int next_eip_addend)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->exception_index = EXCP_SYSCALL;
+    env->exception_is_int = 0;
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit(cs);
+}
+#endif /* TARGET_X86_64 */
+
+/*
+ * fake user mode interrupt. is_int is TRUE if coming from the int
+ * instruction. next_eip is the env->eip value AFTER the interrupt
+ * instruction. It is only relevant if is_int is TRUE or if intno
+ * is EXCP_SYSCALL.
+ */
+static void do_interrupt_user(CPUX86State *env, int intno, int is_int,
+                              int error_code, target_ulong next_eip)
+{
+    if (is_int) {
+        SegmentCache *dt;
+        target_ulong ptr;
+        int dpl, cpl, shift;
+        uint32_t e2;
+
+        dt = &env->idt;
+        if (env->hflags & HF_LMA_MASK) {
+            shift = 4;
+        } else {
+            shift = 3;
+        }
+        ptr = dt->base + (intno << shift);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
+
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        /* check privilege if software int */
+        if (dpl < cpl) {
+            raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2);
+        }
+    }
+
+    /* Since we emulate only user space, we cannot do more than
+       exiting the emulation with the suitable exception and error
+       code. So update EIP for INT 0x80 and EXCP_SYSCALL. */
+    if (is_int || intno == EXCP_SYSCALL) {
+        env->eip = next_eip;
+    }
+}
+
+void x86_cpu_do_interrupt(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    /* if user mode only, we simulate a fake exception
+       which will be handled outside the cpu execution
+       loop */
+    do_interrupt_user(env, cs->exception_index,
+                      env->exception_is_int,
+                      env->error_code,
+                      env->exception_next_eip);
+    /* successfully delivered */
+    env->old_exception = -1;
+}
+
+void cpu_x86_load_seg(CPUX86State *env, X86Seg seg_reg, int selector)
+{
+    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        int dpl = (env->eflags & VM_MASK) ? 3 : 0;
+        selector &= 0xffff;
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                               (selector << 4), 0xffff,
+                               DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
+                               DESC_A_MASK | (dpl << DESC_DPL_SHIFT));
+    } else {
+        helper_load_seg(env, seg_reg, selector);
+    }
+}
diff --git a/target/i386/trace-events b/target/i386/trace-events
index a22ab24e214..2cd8726eebb 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # sev.c
 kvm_sev_init(void) ""
@@ -10,3 +10,4 @@ kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64
 kvm_sev_launch_measurement(const char *value) "data %s"
 kvm_sev_launch_finish(void) ""
 kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
+kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index f832f286ac3..ef896da0a21 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -1346,10 +1346,8 @@ int whpx_init_vcpu(CPUState *cpu)
                "State blocked due to non-migratable CPUID feature support,"
                "dirty memory tracking support, and XSAVE/XRSTOR support");
 
-        (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
-        if (local_error) {
+        if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) {
             error_report_err(local_error);
-            migrate_del_blocker(whpx_migration_blocker);
             error_free(whpx_migration_blocker);
             ret = -EINVAL;
             goto error;
@@ -1600,6 +1598,7 @@ static void whpx_log_sync(MemoryListener *listener,
 }
 
 static MemoryListener whpx_memory_listener = {
+    .name = "whpx",
     .begin = whpx_transaction_begin,
     .commit = whpx_transaction_commit,
     .region_add = whpx_region_add,
diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
index 818115e7d29..ac61a963440 100644
--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
@@ -6,14 +6,23 @@
 
 #include "cpu.h"
 
-void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf)
+void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
 {
     CPUX86State *env = &cpu->env;
-    X86XSaveArea *xsave = buf;
+    const ExtSaveArea *e, *f;
+    int i;
 
+    X86LegacyXSaveArea *legacy;
+    X86XSaveHeader *header;
     uint16_t cwd, swd, twd;
-    int i;
-    memset(xsave, 0, sizeof(X86XSaveArea));
+
+    memset(buf, 0, buflen);
+
+    e = &x86_ext_save_areas[XSTATE_FP_BIT];
+
+    legacy = buf + e->offset;
+    header = buf + e->offset + sizeof(*legacy);
+
     twd = 0;
     swd = env->fpus & ~(7 << 11);
     swd |= (env->fpstt & 7) << 11;
@@ -21,92 +30,222 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf)
     for (i = 0; i < 8; ++i) {
         twd |= (!env->fptags[i]) << i;
     }
-    xsave->legacy.fcw = cwd;
-    xsave->legacy.fsw = swd;
-    xsave->legacy.ftw = twd;
-    xsave->legacy.fpop = env->fpop;
-    xsave->legacy.fpip = env->fpip;
-    xsave->legacy.fpdp = env->fpdp;
-    memcpy(&xsave->legacy.fpregs, env->fpregs,
-            sizeof env->fpregs);
-    xsave->legacy.mxcsr = env->mxcsr;
-    xsave->header.xstate_bv = env->xstate_bv;
-    memcpy(&xsave->bndreg_state.bnd_regs, env->bnd_regs,
-            sizeof env->bnd_regs);
-    xsave->bndcsr_state.bndcsr = env->bndcs_regs;
-    memcpy(&xsave->opmask_state.opmask_regs, env->opmask_regs,
-            sizeof env->opmask_regs);
+    legacy->fcw = cwd;
+    legacy->fsw = swd;
+    legacy->ftw = twd;
+    legacy->fpop = env->fpop;
+    legacy->fpip = env->fpip;
+    legacy->fpdp = env->fpdp;
+    memcpy(&legacy->fpregs, env->fpregs,
+           sizeof(env->fpregs));
+    legacy->mxcsr = env->mxcsr;
 
     for (i = 0; i < CPU_NB_REGS; i++) {
-        uint8_t *xmm = xsave->legacy.xmm_regs[i];
-        uint8_t *ymmh = xsave->avx_state.ymmh[i];
-        uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
+        uint8_t *xmm = legacy->xmm_regs[i];
+
         stq_p(xmm,     env->xmm_regs[i].ZMM_Q(0));
-        stq_p(xmm+8,   env->xmm_regs[i].ZMM_Q(1));
-        stq_p(ymmh,    env->xmm_regs[i].ZMM_Q(2));
-        stq_p(ymmh+8,  env->xmm_regs[i].ZMM_Q(3));
-        stq_p(zmmh,    env->xmm_regs[i].ZMM_Q(4));
-        stq_p(zmmh+8,  env->xmm_regs[i].ZMM_Q(5));
-        stq_p(zmmh+16, env->xmm_regs[i].ZMM_Q(6));
-        stq_p(zmmh+24, env->xmm_regs[i].ZMM_Q(7));
+        stq_p(xmm + 8, env->xmm_regs[i].ZMM_Q(1));
     }
 
+    header->xstate_bv = env->xstate_bv;
+
+    e = &x86_ext_save_areas[XSTATE_YMM_BIT];
+    if (e->size && e->offset) {
+        XSaveAVX *avx;
+
+        avx = buf + e->offset;
+
+        for (i = 0; i < CPU_NB_REGS; i++) {
+            uint8_t *ymmh = avx->ymmh[i];
+
+            stq_p(ymmh,     env->xmm_regs[i].ZMM_Q(2));
+            stq_p(ymmh + 8, env->xmm_regs[i].ZMM_Q(3));
+        }
+    }
+
+    e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT];
+    if (e->size && e->offset) {
+        XSaveBNDREG *bndreg;
+        XSaveBNDCSR *bndcsr;
+
+        f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT];
+        assert(f->size);
+        assert(f->offset);
+
+        bndreg = buf + e->offset;
+        bndcsr = buf + f->offset;
+
+        memcpy(&bndreg->bnd_regs, env->bnd_regs,
+               sizeof(env->bnd_regs));
+        bndcsr->bndcsr = env->bndcs_regs;
+    }
+
+    e = &x86_ext_save_areas[XSTATE_OPMASK_BIT];
+    if (e->size && e->offset) {
+        XSaveOpmask *opmask;
+        XSaveZMM_Hi256 *zmm_hi256;
+#ifdef TARGET_X86_64
+        XSaveHi16_ZMM *hi16_zmm;
+#endif
+
+        f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT];
+        assert(f->size);
+        assert(f->offset);
+
+        opmask = buf + e->offset;
+        zmm_hi256 = buf + f->offset;
+
+        memcpy(&opmask->opmask_regs, env->opmask_regs,
+               sizeof(env->opmask_regs));
+
+        for (i = 0; i < CPU_NB_REGS; i++) {
+            uint8_t *zmmh = zmm_hi256->zmm_hi256[i];
+
+            stq_p(zmmh,      env->xmm_regs[i].ZMM_Q(4));
+            stq_p(zmmh + 8,  env->xmm_regs[i].ZMM_Q(5));
+            stq_p(zmmh + 16, env->xmm_regs[i].ZMM_Q(6));
+            stq_p(zmmh + 24, env->xmm_regs[i].ZMM_Q(7));
+        }
+
 #ifdef TARGET_X86_64
-    memcpy(&xsave->hi16_zmm_state.hi16_zmm, &env->xmm_regs[16],
-            16 * sizeof env->xmm_regs[16]);
-    memcpy(&xsave->pkru_state, &env->pkru, sizeof env->pkru);
+        f = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT];
+        assert(f->size);
+        assert(f->offset);
+
+        hi16_zmm = buf + f->offset;
+
+        memcpy(&hi16_zmm->hi16_zmm, &env->xmm_regs[16],
+               16 * sizeof(env->xmm_regs[16]));
 #endif
+    }
 
+#ifdef TARGET_X86_64
+    e = &x86_ext_save_areas[XSTATE_PKRU_BIT];
+    if (e->size && e->offset) {
+        XSavePKRU *pkru = buf + e->offset;
+
+        memcpy(pkru, &env->pkru, sizeof(env->pkru));
+    }
+#endif
 }
 
-void x86_cpu_xrstor_all_areas(X86CPU *cpu, const X86XSaveArea *buf)
+void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
 {
-
     CPUX86State *env = &cpu->env;
-    const X86XSaveArea *xsave = buf;
-
+    const ExtSaveArea *e, *f, *g;
     int i;
+
+    const X86LegacyXSaveArea *legacy;
+    const X86XSaveHeader *header;
     uint16_t cwd, swd, twd;
-    cwd = xsave->legacy.fcw;
-    swd = xsave->legacy.fsw;
-    twd = xsave->legacy.ftw;
-    env->fpop = xsave->legacy.fpop;
+
+    e = &x86_ext_save_areas[XSTATE_FP_BIT];
+
+    legacy = buf + e->offset;
+    header = buf + e->offset + sizeof(*legacy);
+
+    cwd = legacy->fcw;
+    swd = legacy->fsw;
+    twd = legacy->ftw;
+    env->fpop = legacy->fpop;
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
     for (i = 0; i < 8; ++i) {
         env->fptags[i] = !((twd >> i) & 1);
     }
-    env->fpip = xsave->legacy.fpip;
-    env->fpdp = xsave->legacy.fpdp;
-    env->mxcsr = xsave->legacy.mxcsr;
-    memcpy(env->fpregs, &xsave->legacy.fpregs,
-            sizeof env->fpregs);
-    env->xstate_bv = xsave->header.xstate_bv;
-    memcpy(env->bnd_regs, &xsave->bndreg_state.bnd_regs,
-            sizeof env->bnd_regs);
-    env->bndcs_regs = xsave->bndcsr_state.bndcsr;
-    memcpy(env->opmask_regs, &xsave->opmask_state.opmask_regs,
-            sizeof env->opmask_regs);
+    env->fpip = legacy->fpip;
+    env->fpdp = legacy->fpdp;
+    env->mxcsr = legacy->mxcsr;
+    memcpy(env->fpregs, &legacy->fpregs,
+           sizeof(env->fpregs));
 
     for (i = 0; i < CPU_NB_REGS; i++) {
-        const uint8_t *xmm = xsave->legacy.xmm_regs[i];
-        const uint8_t *ymmh = xsave->avx_state.ymmh[i];
-        const uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i];
+        const uint8_t *xmm = legacy->xmm_regs[i];
+
         env->xmm_regs[i].ZMM_Q(0) = ldq_p(xmm);
-        env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm+8);
-        env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh);
-        env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh+8);
-        env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh);
-        env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh+8);
-        env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh+16);
-        env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh+24);
+        env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm + 8);
+    }
+
+    env->xstate_bv = header->xstate_bv;
+
+    e = &x86_ext_save_areas[XSTATE_YMM_BIT];
+    if (e->size && e->offset) {
+        const XSaveAVX *avx;
+
+        avx = buf + e->offset;
+        for (i = 0; i < CPU_NB_REGS; i++) {
+            const uint8_t *ymmh = avx->ymmh[i];
+
+            env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh);
+            env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh + 8);
+        }
+    }
+
+    e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT];
+    if (e->size && e->offset) {
+        const XSaveBNDREG *bndreg;
+        const XSaveBNDCSR *bndcsr;
+
+        f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT];
+        assert(f->size);
+        assert(f->offset);
+
+        bndreg = buf + e->offset;
+        bndcsr = buf + f->offset;
+
+        memcpy(env->bnd_regs, &bndreg->bnd_regs,
+               sizeof(env->bnd_regs));
+        env->bndcs_regs = bndcsr->bndcsr;
     }
 
+    e = &x86_ext_save_areas[XSTATE_OPMASK_BIT];
+    if (e->size && e->offset) {
+        const XSaveOpmask *opmask;
+        const XSaveZMM_Hi256 *zmm_hi256;
 #ifdef TARGET_X86_64
-    memcpy(&env->xmm_regs[16], &xsave->hi16_zmm_state.hi16_zmm,
-           16 * sizeof env->xmm_regs[16]);
-    memcpy(&env->pkru, &xsave->pkru_state, sizeof env->pkru);
+        const XSaveHi16_ZMM *hi16_zmm;
 #endif
 
+        f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT];
+        assert(f->size);
+        assert(f->offset);
+
+        g = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT];
+        assert(g->size);
+        assert(g->offset);
+
+        opmask = buf + e->offset;
+        zmm_hi256 = buf + f->offset;
+#ifdef TARGET_X86_64
+        hi16_zmm = buf + g->offset;
+#endif
+
+        memcpy(env->opmask_regs, &opmask->opmask_regs,
+               sizeof(env->opmask_regs));
+
+        for (i = 0; i < CPU_NB_REGS; i++) {
+            const uint8_t *zmmh = zmm_hi256->zmm_hi256[i];
+
+            env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh);
+            env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh + 8);
+            env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh + 16);
+            env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh + 24);
+        }
+
+#ifdef TARGET_X86_64
+        memcpy(&env->xmm_regs[16], &hi16_zmm->hi16_zmm,
+               16 * sizeof(env->xmm_regs[16]));
+#endif
+    }
+
+#ifdef TARGET_X86_64
+    e = &x86_ext_save_areas[XSTATE_PKRU_BIT];
+    if (e->size && e->offset) {
+        const XSavePKRU *pkru;
+
+        pkru = buf + e->offset;
+        memcpy(&env->pkru, pkru, sizeof(env->pkru));
+    }
+#endif
 }
diff --git a/target/lm32/README b/target/lm32/README
deleted file mode 100644
index ba3508a7116..00000000000
--- a/target/lm32/README
+++ /dev/null
@@ -1,45 +0,0 @@
-LatticeMico32 target
---------------------
-
-General
--------
-All opcodes including the JUART CSRs are supported.
-
-
-JTAG UART
----------
-JTAG UART is routed to a serial console device. For the current boards it
-is the second one. Ie to enable it in the qemu virtual console window use
-the following command line parameters:
-  -serial vc -serial vc
-This will make serial0 (the lm32_uart) and serial1 (the JTAG UART)
-available as virtual consoles.
-
-
-Semihosting
------------
-Semihosting on this target is supported. Some system calls like read, write
-and exit are executed on the host if semihosting is enabled. See
-target/lm32-semi.c for all supported system calls. Emulation aware programs
-can use this mechanism to shut down the virtual machine and print to the
-host console. See the tcg tests for an example.
-
-
-Special instructions
---------------------
-The translation recognizes one special instruction to halt the cpu:
-  and r0, r0, r0
-On real hardware this instruction is a nop. It is not used by GCC and
-should (hopefully) not be used within hand-crafted assembly.
-Insert this instruction in your idle loop to reduce the cpu load on the
-host.
-
-
-Ignoring the MSB of the address bus
------------------------------------
-Some SoC ignores the MSB on the address bus. Thus creating a shadow memory
-area. As a general rule, 0x00000000-0x7fffffff is cached, whereas
-0x80000000-0xffffffff is not cached and used to access IO devices. This
-behaviour can be enabled with:
-  cpu_lm32_set_phys_msb_ignore(env, 1);
-
diff --git a/target/lm32/TODO b/target/lm32/TODO
deleted file mode 100644
index e163c42ebee..00000000000
--- a/target/lm32/TODO
+++ /dev/null
@@ -1 +0,0 @@
-* linux-user emulation
diff --git a/target/lm32/cpu-param.h b/target/lm32/cpu-param.h
deleted file mode 100644
index d89574ad194..00000000000
--- a/target/lm32/cpu-param.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * LatticeMico32 cpu parameters for qemu.
- *
- * Copyright (c) 2010 Michael Walle <michael@walle.cc>
- * SPDX-License-Identifier: LGPL-2.0+
- */
-
-#ifndef LM32_CPU_PARAM_H
-#define LM32_CPU_PARAM_H 1
-
-#define TARGET_LONG_BITS 32
-#define TARGET_PAGE_BITS 12
-#define TARGET_PHYS_ADDR_SPACE_BITS 32
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
-#define NB_MMU_MODES 1
-
-#endif
diff --git a/target/lm32/cpu-qom.h b/target/lm32/cpu-qom.h
deleted file mode 100644
index 245b35cd1d9..00000000000
--- a/target/lm32/cpu-qom.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * QEMU LatticeMico32 CPU
- *
- * Copyright (c) 2012 SUSE LINUX Products GmbH
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see
- * <http://www.gnu.org/licenses/lgpl-2.1.html>
- */
-#ifndef QEMU_LM32_CPU_QOM_H
-#define QEMU_LM32_CPU_QOM_H
-
-#include "hw/core/cpu.h"
-#include "qom/object.h"
-
-#define TYPE_LM32_CPU "lm32-cpu"
-
-OBJECT_DECLARE_TYPE(LM32CPU, LM32CPUClass,
-                    LM32_CPU)
-
-/**
- * LM32CPUClass:
- * @parent_realize: The parent class' realize handler.
- * @parent_reset: The parent class' reset handler.
- *
- * A LatticeMico32 CPU model.
- */
-struct LM32CPUClass {
-    /*< private >*/
-    CPUClass parent_class;
-    /*< public >*/
-
-    DeviceRealize parent_realize;
-    DeviceReset parent_reset;
-};
-
-
-#endif
diff --git a/target/lm32/cpu.c b/target/lm32/cpu.c
deleted file mode 100644
index c23d72874c0..00000000000
--- a/target/lm32/cpu.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * QEMU LatticeMico32 CPU
- *
- * Copyright (c) 2012 SUSE LINUX Products GmbH
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see
- * <http://www.gnu.org/licenses/lgpl-2.1.html>
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/qemu-print.h"
-#include "cpu.h"
-
-
-static void lm32_cpu_set_pc(CPUState *cs, vaddr value)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-
-    cpu->env.pc = value;
-}
-
-static void lm32_cpu_list_entry(gpointer data, gpointer user_data)
-{
-    ObjectClass *oc = data;
-    const char *typename = object_class_get_name(oc);
-    char *name;
-
-    name = g_strndup(typename, strlen(typename) - strlen(LM32_CPU_TYPE_SUFFIX));
-    qemu_printf("  %s\n", name);
-    g_free(name);
-}
-
-
-void lm32_cpu_list(void)
-{
-    GSList *list;
-
-    list = object_class_get_list_sorted(TYPE_LM32_CPU, false);
-    qemu_printf("Available CPUs:\n");
-    g_slist_foreach(list, lm32_cpu_list_entry, NULL);
-    g_slist_free(list);
-}
-
-static void lm32_cpu_init_cfg_reg(LM32CPU *cpu)
-{
-    CPULM32State *env = &cpu->env;
-    uint32_t cfg = 0;
-
-    if (cpu->features & LM32_FEATURE_MULTIPLY) {
-        cfg |= CFG_M;
-    }
-
-    if (cpu->features & LM32_FEATURE_DIVIDE) {
-        cfg |= CFG_D;
-    }
-
-    if (cpu->features & LM32_FEATURE_SHIFT) {
-        cfg |= CFG_S;
-    }
-
-    if (cpu->features & LM32_FEATURE_SIGN_EXTEND) {
-        cfg |= CFG_X;
-    }
-
-    if (cpu->features & LM32_FEATURE_I_CACHE) {
-        cfg |= CFG_IC;
-    }
-
-    if (cpu->features & LM32_FEATURE_D_CACHE) {
-        cfg |= CFG_DC;
-    }
-
-    if (cpu->features & LM32_FEATURE_CYCLE_COUNT) {
-        cfg |= CFG_CC;
-    }
-
-    cfg |= (cpu->num_interrupts << CFG_INT_SHIFT);
-    cfg |= (cpu->num_breakpoints << CFG_BP_SHIFT);
-    cfg |= (cpu->num_watchpoints << CFG_WP_SHIFT);
-    cfg |= (cpu->revision << CFG_REV_SHIFT);
-
-    env->cfg = cfg;
-}
-
-static bool lm32_cpu_has_work(CPUState *cs)
-{
-    return cs->interrupt_request & CPU_INTERRUPT_HARD;
-}
-
-static void lm32_cpu_reset(DeviceState *dev)
-{
-    CPUState *s = CPU(dev);
-    LM32CPU *cpu = LM32_CPU(s);
-    LM32CPUClass *lcc = LM32_CPU_GET_CLASS(cpu);
-    CPULM32State *env = &cpu->env;
-
-    lcc->parent_reset(dev);
-
-    /* reset cpu state */
-    memset(env, 0, offsetof(CPULM32State, end_reset_fields));
-
-    lm32_cpu_init_cfg_reg(cpu);
-}
-
-static void lm32_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
-{
-    info->mach = bfd_mach_lm32;
-    info->print_insn = print_insn_lm32;
-}
-
-static void lm32_cpu_realizefn(DeviceState *dev, Error **errp)
-{
-    CPUState *cs = CPU(dev);
-    LM32CPUClass *lcc = LM32_CPU_GET_CLASS(dev);
-    Error *local_err = NULL;
-
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    cpu_reset(cs);
-
-    qemu_init_vcpu(cs);
-
-    lcc->parent_realize(dev, errp);
-}
-
-static void lm32_cpu_initfn(Object *obj)
-{
-    LM32CPU *cpu = LM32_CPU(obj);
-    CPULM32State *env = &cpu->env;
-
-    cpu_set_cpustate_pointers(cpu);
-
-    env->flags = 0;
-}
-
-static void lm32_basic_cpu_initfn(Object *obj)
-{
-    LM32CPU *cpu = LM32_CPU(obj);
-
-    cpu->revision = 3;
-    cpu->num_interrupts = 32;
-    cpu->num_breakpoints = 4;
-    cpu->num_watchpoints = 4;
-    cpu->features = LM32_FEATURE_SHIFT
-                  | LM32_FEATURE_SIGN_EXTEND
-                  | LM32_FEATURE_CYCLE_COUNT;
-}
-
-static void lm32_standard_cpu_initfn(Object *obj)
-{
-    LM32CPU *cpu = LM32_CPU(obj);
-
-    cpu->revision = 3;
-    cpu->num_interrupts = 32;
-    cpu->num_breakpoints = 4;
-    cpu->num_watchpoints = 4;
-    cpu->features = LM32_FEATURE_MULTIPLY
-                  | LM32_FEATURE_DIVIDE
-                  | LM32_FEATURE_SHIFT
-                  | LM32_FEATURE_SIGN_EXTEND
-                  | LM32_FEATURE_I_CACHE
-                  | LM32_FEATURE_CYCLE_COUNT;
-}
-
-static void lm32_full_cpu_initfn(Object *obj)
-{
-    LM32CPU *cpu = LM32_CPU(obj);
-
-    cpu->revision = 3;
-    cpu->num_interrupts = 32;
-    cpu->num_breakpoints = 4;
-    cpu->num_watchpoints = 4;
-    cpu->features = LM32_FEATURE_MULTIPLY
-                  | LM32_FEATURE_DIVIDE
-                  | LM32_FEATURE_SHIFT
-                  | LM32_FEATURE_SIGN_EXTEND
-                  | LM32_FEATURE_I_CACHE
-                  | LM32_FEATURE_D_CACHE
-                  | LM32_FEATURE_CYCLE_COUNT;
-}
-
-static ObjectClass *lm32_cpu_class_by_name(const char *cpu_model)
-{
-    ObjectClass *oc;
-    char *typename;
-
-    typename = g_strdup_printf(LM32_CPU_TYPE_NAME("%s"), cpu_model);
-    oc = object_class_by_name(typename);
-    g_free(typename);
-    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_LM32_CPU) ||
-                       object_class_is_abstract(oc))) {
-        oc = NULL;
-    }
-    return oc;
-}
-
-#include "hw/core/tcg-cpu-ops.h"
-
-static struct TCGCPUOps lm32_tcg_ops = {
-    .initialize = lm32_translate_init,
-    .cpu_exec_interrupt = lm32_cpu_exec_interrupt,
-    .tlb_fill = lm32_cpu_tlb_fill,
-    .debug_excp_handler = lm32_debug_excp_handler,
-
-#ifndef CONFIG_USER_ONLY
-    .do_interrupt = lm32_cpu_do_interrupt,
-#endif /* !CONFIG_USER_ONLY */
-};
-
-static void lm32_cpu_class_init(ObjectClass *oc, void *data)
-{
-    LM32CPUClass *lcc = LM32_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    device_class_set_parent_realize(dc, lm32_cpu_realizefn,
-                                    &lcc->parent_realize);
-    device_class_set_parent_reset(dc, lm32_cpu_reset, &lcc->parent_reset);
-
-    cc->class_by_name = lm32_cpu_class_by_name;
-    cc->has_work = lm32_cpu_has_work;
-    cc->dump_state = lm32_cpu_dump_state;
-    cc->set_pc = lm32_cpu_set_pc;
-    cc->gdb_read_register = lm32_cpu_gdb_read_register;
-    cc->gdb_write_register = lm32_cpu_gdb_write_register;
-#ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = lm32_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_lm32_cpu;
-#endif
-    cc->gdb_num_core_regs = 32 + 7;
-    cc->gdb_stop_before_watchpoint = true;
-    cc->disas_set_info = lm32_cpu_disas_set_info;
-    cc->tcg_ops = &lm32_tcg_ops;
-}
-
-#define DEFINE_LM32_CPU_TYPE(cpu_model, initfn) \
-    { \
-        .parent = TYPE_LM32_CPU, \
-        .name = LM32_CPU_TYPE_NAME(cpu_model), \
-        .instance_init = initfn, \
-    }
-
-static const TypeInfo lm32_cpus_type_infos[] = {
-    { /* base class should be registered first */
-         .name = TYPE_LM32_CPU,
-         .parent = TYPE_CPU,
-         .instance_size = sizeof(LM32CPU),
-         .instance_init = lm32_cpu_initfn,
-         .abstract = true,
-         .class_size = sizeof(LM32CPUClass),
-         .class_init = lm32_cpu_class_init,
-    },
-    DEFINE_LM32_CPU_TYPE("lm32-basic", lm32_basic_cpu_initfn),
-    DEFINE_LM32_CPU_TYPE("lm32-standard", lm32_standard_cpu_initfn),
-    DEFINE_LM32_CPU_TYPE("lm32-full", lm32_full_cpu_initfn),
-};
-
-DEFINE_TYPES(lm32_cpus_type_infos)
diff --git a/target/lm32/cpu.h b/target/lm32/cpu.h
deleted file mode 100644
index ea7c01ca8b0..00000000000
--- a/target/lm32/cpu.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- *  LatticeMico32 virtual CPU header.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef LM32_CPU_H
-#define LM32_CPU_H
-
-#include "cpu-qom.h"
-#include "exec/cpu-defs.h"
-
-typedef struct CPULM32State CPULM32State;
-
-static inline int cpu_mmu_index(CPULM32State *env, bool ifetch)
-{
-    return 0;
-}
-
-/* Exceptions indices */
-enum {
-    EXCP_RESET = 0,
-    EXCP_BREAKPOINT,
-    EXCP_INSN_BUS_ERROR,
-    EXCP_WATCHPOINT,
-    EXCP_DATA_BUS_ERROR,
-    EXCP_DIVIDE_BY_ZERO,
-    EXCP_IRQ,
-    EXCP_SYSTEMCALL
-};
-
-/* Registers */
-enum {
-    R_R0 = 0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R10,
-    R_R11, R_R12, R_R13, R_R14, R_R15, R_R16, R_R17, R_R18, R_R19, R_R20,
-    R_R21, R_R22, R_R23, R_R24, R_R25, R_R26, R_R27, R_R28, R_R29, R_R30,
-    R_R31
-};
-
-/* Register aliases */
-enum {
-    R_GP = R_R26,
-    R_FP = R_R27,
-    R_SP = R_R28,
-    R_RA = R_R29,
-    R_EA = R_R30,
-    R_BA = R_R31
-};
-
-/* IE flags */
-enum {
-    IE_IE  = (1<<0),
-    IE_EIE = (1<<1),
-    IE_BIE = (1<<2),
-};
-
-/* DC flags */
-enum {
-    DC_SS  = (1<<0),
-    DC_RE  = (1<<1),
-    DC_C0  = (1<<2),
-    DC_C1  = (1<<3),
-    DC_C2  = (1<<4),
-    DC_C3  = (1<<5),
-};
-
-/* CFG mask */
-enum {
-    CFG_M         = (1<<0),
-    CFG_D         = (1<<1),
-    CFG_S         = (1<<2),
-    CFG_U         = (1<<3),
-    CFG_X         = (1<<4),
-    CFG_CC        = (1<<5),
-    CFG_IC        = (1<<6),
-    CFG_DC        = (1<<7),
-    CFG_G         = (1<<8),
-    CFG_H         = (1<<9),
-    CFG_R         = (1<<10),
-    CFG_J         = (1<<11),
-    CFG_INT_SHIFT = 12,
-    CFG_BP_SHIFT  = 18,
-    CFG_WP_SHIFT  = 22,
-    CFG_REV_SHIFT = 26,
-};
-
-/* CSRs */
-enum {
-    CSR_IE   = 0x00,
-    CSR_IM   = 0x01,
-    CSR_IP   = 0x02,
-    CSR_ICC  = 0x03,
-    CSR_DCC  = 0x04,
-    CSR_CC   = 0x05,
-    CSR_CFG  = 0x06,
-    CSR_EBA  = 0x07,
-    CSR_DC   = 0x08,
-    CSR_DEBA = 0x09,
-    CSR_JTX  = 0x0e,
-    CSR_JRX  = 0x0f,
-    CSR_BP0  = 0x10,
-    CSR_BP1  = 0x11,
-    CSR_BP2  = 0x12,
-    CSR_BP3  = 0x13,
-    CSR_WP0  = 0x18,
-    CSR_WP1  = 0x19,
-    CSR_WP2  = 0x1a,
-    CSR_WP3  = 0x1b,
-};
-
-enum {
-    LM32_FEATURE_MULTIPLY     =  1,
-    LM32_FEATURE_DIVIDE       =  2,
-    LM32_FEATURE_SHIFT        =  4,
-    LM32_FEATURE_SIGN_EXTEND  =  8,
-    LM32_FEATURE_I_CACHE      = 16,
-    LM32_FEATURE_D_CACHE      = 32,
-    LM32_FEATURE_CYCLE_COUNT  = 64,
-};
-
-enum {
-    LM32_FLAG_IGNORE_MSB = 1,
-};
-
-struct CPULM32State {
-    /* general registers */
-    uint32_t regs[32];
-
-    /* special registers */
-    uint32_t pc;        /* program counter */
-    uint32_t ie;        /* interrupt enable */
-    uint32_t icc;       /* instruction cache control */
-    uint32_t dcc;       /* data cache control */
-    uint32_t cc;        /* cycle counter */
-    uint32_t cfg;       /* configuration */
-
-    /* debug registers */
-    uint32_t dc;        /* debug control */
-    uint32_t bp[4];     /* breakpoints */
-    uint32_t wp[4];     /* watchpoints */
-
-    struct CPUBreakpoint *cpu_breakpoint[4];
-    struct CPUWatchpoint *cpu_watchpoint[4];
-
-    /* Fields up to this point are cleared by a CPU reset */
-    struct {} end_reset_fields;
-
-    /* Fields from here on are preserved across CPU reset. */
-    uint32_t eba;       /* exception base address */
-    uint32_t deba;      /* debug exception base address */
-
-    /* interrupt controller handle for callbacks */
-    DeviceState *pic_state;
-    /* JTAG UART handle for callbacks */
-    DeviceState *juart_state;
-
-    /* processor core features */
-    uint32_t flags;
-
-};
-
-/**
- * LM32CPU:
- * @env: #CPULM32State
- *
- * A LatticeMico32 CPU.
- */
-struct LM32CPU {
-    /*< private >*/
-    CPUState parent_obj;
-    /*< public >*/
-
-    CPUNegativeOffsetState neg;
-    CPULM32State env;
-
-    uint32_t revision;
-    uint8_t num_interrupts;
-    uint8_t num_breakpoints;
-    uint8_t num_watchpoints;
-    uint32_t features;
-};
-
-
-#ifndef CONFIG_USER_ONLY
-extern const VMStateDescription vmstate_lm32_cpu;
-#endif
-
-void lm32_cpu_do_interrupt(CPUState *cpu);
-bool lm32_cpu_exec_interrupt(CPUState *cs, int int_req);
-void lm32_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
-hwaddr lm32_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-int lm32_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
-int lm32_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-
-typedef enum {
-    LM32_WP_DISABLED = 0,
-    LM32_WP_READ,
-    LM32_WP_WRITE,
-    LM32_WP_READ_WRITE,
-} lm32_wp_t;
-
-static inline lm32_wp_t lm32_wp_type(uint32_t dc, int idx)
-{
-    assert(idx < 4);
-    return (dc >> (idx+1)*2) & 0x3;
-}
-
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_lm32_signal_handler(int host_signum, void *pinfo,
-                          void *puc);
-void lm32_cpu_list(void);
-void lm32_translate_init(void);
-void cpu_lm32_set_phys_msb_ignore(CPULM32State *env, int value);
-void QEMU_NORETURN raise_exception(CPULM32State *env, int index);
-void lm32_debug_excp_handler(CPUState *cs);
-void lm32_breakpoint_insert(CPULM32State *env, int index, target_ulong address);
-void lm32_breakpoint_remove(CPULM32State *env, int index);
-void lm32_watchpoint_insert(CPULM32State *env, int index, target_ulong address,
-        lm32_wp_t wp_type);
-void lm32_watchpoint_remove(CPULM32State *env, int index);
-bool lm32_cpu_do_semihosting(CPUState *cs);
-
-#define LM32_CPU_TYPE_SUFFIX "-" TYPE_LM32_CPU
-#define LM32_CPU_TYPE_NAME(model) model LM32_CPU_TYPE_SUFFIX
-#define CPU_RESOLVING_TYPE TYPE_LM32_CPU
-
-#define cpu_list lm32_cpu_list
-#define cpu_signal_handler cpu_lm32_signal_handler
-
-bool lm32_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr);
-
-typedef CPULM32State CPUArchState;
-typedef LM32CPU ArchCPU;
-
-#include "exec/cpu-all.h"
-
-static inline void cpu_get_tb_cpu_state(CPULM32State *env, target_ulong *pc,
-                                        target_ulong *cs_base, uint32_t *flags)
-{
-    *pc = env->pc;
-    *cs_base = 0;
-    *flags = 0;
-}
-
-#endif
diff --git a/target/lm32/gdbstub.c b/target/lm32/gdbstub.c
deleted file mode 100644
index 56f508a5b69..00000000000
--- a/target/lm32/gdbstub.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * LM32 gdb server stub
- *
- * Copyright (c) 2003-2005 Fabrice Bellard
- * Copyright (c) 2013 SUSE LINUX Products GmbH
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/gdbstub.h"
-#include "hw/lm32/lm32_pic.h"
-
-int lm32_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-
-    if (n < 32) {
-        return gdb_get_reg32(mem_buf, env->regs[n]);
-    } else {
-        switch (n) {
-        case 32:
-            return gdb_get_reg32(mem_buf, env->pc);
-        /* FIXME: put in right exception ID */
-        case 33:
-            return gdb_get_reg32(mem_buf, 0);
-        case 34:
-            return gdb_get_reg32(mem_buf, env->eba);
-        case 35:
-            return gdb_get_reg32(mem_buf, env->deba);
-        case 36:
-            return gdb_get_reg32(mem_buf, env->ie);
-        case 37:
-            return gdb_get_reg32(mem_buf, lm32_pic_get_im(env->pic_state));
-        case 38:
-            return gdb_get_reg32(mem_buf, lm32_pic_get_ip(env->pic_state));
-        }
-    }
-    return 0;
-}
-
-int lm32_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPUClass *cc = CPU_GET_CLASS(cs);
-    CPULM32State *env = &cpu->env;
-    uint32_t tmp;
-
-    if (n > cc->gdb_num_core_regs) {
-        return 0;
-    }
-
-    tmp = ldl_p(mem_buf);
-
-    if (n < 32) {
-        env->regs[n] = tmp;
-    } else {
-        switch (n) {
-        case 32:
-            env->pc = tmp;
-            break;
-        case 34:
-            env->eba = tmp;
-            break;
-        case 35:
-            env->deba = tmp;
-            break;
-        case 36:
-            env->ie = tmp;
-            break;
-        case 37:
-            lm32_pic_set_im(env->pic_state, tmp);
-            break;
-        case 38:
-            lm32_pic_set_ip(env->pic_state, tmp);
-            break;
-        }
-    }
-    return 4;
-}
diff --git a/target/lm32/helper.c b/target/lm32/helper.c
deleted file mode 100644
index 01cc3c53a50..00000000000
--- a/target/lm32/helper.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- *  LatticeMico32 helper routines.
- *
- *  Copyright (c) 2010-2014 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "qemu/host-utils.h"
-#include "semihosting/semihost.h"
-#include "exec/log.h"
-
-bool lm32_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-    int prot;
-
-    address &= TARGET_PAGE_MASK;
-    prot = PAGE_BITS;
-    if (env->flags & LM32_FLAG_IGNORE_MSB) {
-        tlb_set_page(cs, address, address & 0x7fffffff, prot, mmu_idx,
-                     TARGET_PAGE_SIZE);
-    } else {
-        tlb_set_page(cs, address, address, prot, mmu_idx, TARGET_PAGE_SIZE);
-    }
-    return true;
-}
-
-hwaddr lm32_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-
-    addr &= TARGET_PAGE_MASK;
-    if (cpu->env.flags & LM32_FLAG_IGNORE_MSB) {
-        return addr & 0x7fffffff;
-    } else {
-        return addr;
-    }
-}
-
-void lm32_breakpoint_insert(CPULM32State *env, int idx, target_ulong address)
-{
-    cpu_breakpoint_insert(env_cpu(env), address, BP_CPU,
-                          &env->cpu_breakpoint[idx]);
-}
-
-void lm32_breakpoint_remove(CPULM32State *env, int idx)
-{
-    if (!env->cpu_breakpoint[idx]) {
-        return;
-    }
-
-    cpu_breakpoint_remove_by_ref(env_cpu(env), env->cpu_breakpoint[idx]);
-    env->cpu_breakpoint[idx] = NULL;
-}
-
-void lm32_watchpoint_insert(CPULM32State *env, int idx, target_ulong address,
-                            lm32_wp_t wp_type)
-{
-    int flags = 0;
-
-    switch (wp_type) {
-    case LM32_WP_DISABLED:
-        /* nothing to do */
-        break;
-    case LM32_WP_READ:
-        flags = BP_CPU | BP_STOP_BEFORE_ACCESS | BP_MEM_READ;
-        break;
-    case LM32_WP_WRITE:
-        flags = BP_CPU | BP_STOP_BEFORE_ACCESS | BP_MEM_WRITE;
-        break;
-    case LM32_WP_READ_WRITE:
-        flags = BP_CPU | BP_STOP_BEFORE_ACCESS | BP_MEM_ACCESS;
-        break;
-    }
-
-    if (flags != 0) {
-        cpu_watchpoint_insert(env_cpu(env), address, 1, flags,
-                              &env->cpu_watchpoint[idx]);
-    }
-}
-
-void lm32_watchpoint_remove(CPULM32State *env, int idx)
-{
-    if (!env->cpu_watchpoint[idx]) {
-        return;
-    }
-
-    cpu_watchpoint_remove_by_ref(env_cpu(env), env->cpu_watchpoint[idx]);
-    env->cpu_watchpoint[idx] = NULL;
-}
-
-static bool check_watchpoints(CPULM32State *env)
-{
-    LM32CPU *cpu = env_archcpu(env);
-    int i;
-
-    for (i = 0; i < cpu->num_watchpoints; i++) {
-        if (env->cpu_watchpoint[i] &&
-                env->cpu_watchpoint[i]->flags & BP_WATCHPOINT_HIT) {
-            return true;
-        }
-    }
-    return false;
-}
-
-void lm32_debug_excp_handler(CPUState *cs)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-    CPUBreakpoint *bp;
-
-    if (cs->watchpoint_hit) {
-        if (cs->watchpoint_hit->flags & BP_CPU) {
-            cs->watchpoint_hit = NULL;
-            if (check_watchpoints(env)) {
-                raise_exception(env, EXCP_WATCHPOINT);
-            } else {
-                cpu_loop_exit_noexc(cs);
-            }
-        }
-    } else {
-        QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
-            if (bp->pc == env->pc) {
-                if (bp->flags & BP_CPU) {
-                    raise_exception(env, EXCP_BREAKPOINT);
-                }
-                break;
-            }
-        }
-    }
-}
-
-void lm32_cpu_do_interrupt(CPUState *cs)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-
-    qemu_log_mask(CPU_LOG_INT,
-            "exception at pc=%x type=%x\n", env->pc, cs->exception_index);
-
-    switch (cs->exception_index) {
-    case EXCP_SYSTEMCALL:
-        if (unlikely(semihosting_enabled())) {
-            /* do_semicall() returns true if call was handled. Otherwise
-             * do the normal exception handling. */
-            if (lm32_cpu_do_semihosting(cs)) {
-                env->pc += 4;
-                break;
-            }
-        }
-        /* fall through */
-    case EXCP_INSN_BUS_ERROR:
-    case EXCP_DATA_BUS_ERROR:
-    case EXCP_DIVIDE_BY_ZERO:
-    case EXCP_IRQ:
-        /* non-debug exceptions */
-        env->regs[R_EA] = env->pc;
-        env->ie |= (env->ie & IE_IE) ? IE_EIE : 0;
-        env->ie &= ~IE_IE;
-        if (env->dc & DC_RE) {
-            env->pc = env->deba + (cs->exception_index * 32);
-        } else {
-            env->pc = env->eba + (cs->exception_index * 32);
-        }
-        log_cpu_state_mask(CPU_LOG_INT, cs, 0);
-        break;
-    case EXCP_BREAKPOINT:
-    case EXCP_WATCHPOINT:
-        /* debug exceptions */
-        env->regs[R_BA] = env->pc;
-        env->ie |= (env->ie & IE_IE) ? IE_BIE : 0;
-        env->ie &= ~IE_IE;
-        env->pc = env->deba + (cs->exception_index * 32);
-        log_cpu_state_mask(CPU_LOG_INT, cs, 0);
-        break;
-    default:
-        cpu_abort(cs, "unhandled exception type=%d\n",
-                  cs->exception_index);
-        break;
-    }
-}
-
-bool lm32_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-
-    if ((interrupt_request & CPU_INTERRUPT_HARD) && (env->ie & IE_IE)) {
-        cs->exception_index = EXCP_IRQ;
-        lm32_cpu_do_interrupt(cs);
-        return true;
-    }
-    return false;
-}
-
-/* Some soc ignores the MSB on the address bus. Thus creating a shadow memory
- * area. As a general rule, 0x00000000-0x7fffffff is cached, whereas
- * 0x80000000-0xffffffff is not cached and used to access IO devices. */
-void cpu_lm32_set_phys_msb_ignore(CPULM32State *env, int value)
-{
-    if (value) {
-        env->flags |= LM32_FLAG_IGNORE_MSB;
-    } else {
-        env->flags &= ~LM32_FLAG_IGNORE_MSB;
-    }
-}
diff --git a/target/lm32/helper.h b/target/lm32/helper.h
deleted file mode 100644
index 445578c4390..00000000000
--- a/target/lm32/helper.h
+++ /dev/null
@@ -1,14 +0,0 @@
-DEF_HELPER_2(raise_exception, void, env, i32)
-DEF_HELPER_1(hlt, void, env)
-DEF_HELPER_3(wcsr_bp, void, env, i32, i32)
-DEF_HELPER_3(wcsr_wp, void, env, i32, i32)
-DEF_HELPER_2(wcsr_dc, void, env, i32)
-DEF_HELPER_2(wcsr_im, void, env, i32)
-DEF_HELPER_2(wcsr_ip, void, env, i32)
-DEF_HELPER_2(wcsr_jtx, void, env, i32)
-DEF_HELPER_2(wcsr_jrx, void, env, i32)
-DEF_HELPER_1(rcsr_im, i32, env)
-DEF_HELPER_1(rcsr_ip, i32, env)
-DEF_HELPER_1(rcsr_jtx, i32, env)
-DEF_HELPER_1(rcsr_jrx, i32, env)
-DEF_HELPER_1(ill, void, env)
diff --git a/target/lm32/lm32-semi.c b/target/lm32/lm32-semi.c
deleted file mode 100644
index 6a11a6299ad..00000000000
--- a/target/lm32/lm32-semi.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- *  Lattice Mico32 semihosting syscall interface
- *
- *  Copyright (c) 2014 Michael Walle <michael@walle.cc>
- *
- * Based on target/m68k/m68k-semi.c, which is
- *  Copyright (c) 2005-2007 CodeSourcery.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "qemu/log.h"
-#include "exec/softmmu-semi.h"
-
-enum {
-    TARGET_SYS_exit    = 1,
-    TARGET_SYS_open    = 2,
-    TARGET_SYS_close   = 3,
-    TARGET_SYS_read    = 4,
-    TARGET_SYS_write   = 5,
-    TARGET_SYS_lseek   = 6,
-    TARGET_SYS_fstat   = 10,
-    TARGET_SYS_stat    = 15,
-};
-
-enum {
-    NEWLIB_O_RDONLY    =   0x0,
-    NEWLIB_O_WRONLY    =   0x1,
-    NEWLIB_O_RDWR      =   0x2,
-    NEWLIB_O_APPEND    =   0x8,
-    NEWLIB_O_CREAT     = 0x200,
-    NEWLIB_O_TRUNC     = 0x400,
-    NEWLIB_O_EXCL      = 0x800,
-};
-
-static int translate_openflags(int flags)
-{
-    int hf;
-
-    if (flags & NEWLIB_O_WRONLY) {
-        hf = O_WRONLY;
-    } else if (flags & NEWLIB_O_RDWR) {
-        hf = O_RDWR;
-    } else {
-        hf = O_RDONLY;
-    }
-
-    if (flags & NEWLIB_O_APPEND) {
-        hf |= O_APPEND;
-    }
-
-    if (flags & NEWLIB_O_CREAT) {
-        hf |= O_CREAT;
-    }
-
-    if (flags & NEWLIB_O_TRUNC) {
-        hf |= O_TRUNC;
-    }
-
-    if (flags & NEWLIB_O_EXCL) {
-        hf |= O_EXCL;
-    }
-
-    return hf;
-}
-
-struct newlib_stat {
-    int16_t     newlib_st_dev;     /* device */
-    uint16_t    newlib_st_ino;     /* inode */
-    uint16_t    newlib_st_mode;    /* protection */
-    uint16_t    newlib_st_nlink;   /* number of hard links */
-    uint16_t    newlib_st_uid;     /* user ID of owner */
-    uint16_t    newlib_st_gid;     /* group ID of owner */
-    int16_t     newlib_st_rdev;    /* device type (if inode device) */
-    int32_t     newlib_st_size;    /* total size, in bytes */
-    int32_t     newlib_st_atime;   /* time of last access */
-    uint32_t    newlib_st_spare1;
-    int32_t     newlib_st_mtime;   /* time of last modification */
-    uint32_t    newlib_st_spare2;
-    int32_t     newlib_st_ctime;   /* time of last change */
-    uint32_t    newlib_st_spare3;
-} QEMU_PACKED;
-
-static int translate_stat(CPULM32State *env, target_ulong addr,
-        struct stat *s)
-{
-    struct newlib_stat *p;
-
-    p = lock_user(VERIFY_WRITE, addr, sizeof(struct newlib_stat), 0);
-    if (!p) {
-        return 0;
-    }
-    p->newlib_st_dev = cpu_to_be16(s->st_dev);
-    p->newlib_st_ino = cpu_to_be16(s->st_ino);
-    p->newlib_st_mode = cpu_to_be16(s->st_mode);
-    p->newlib_st_nlink = cpu_to_be16(s->st_nlink);
-    p->newlib_st_uid = cpu_to_be16(s->st_uid);
-    p->newlib_st_gid = cpu_to_be16(s->st_gid);
-    p->newlib_st_rdev = cpu_to_be16(s->st_rdev);
-    p->newlib_st_size = cpu_to_be32(s->st_size);
-    p->newlib_st_atime = cpu_to_be32(s->st_atime);
-    p->newlib_st_mtime = cpu_to_be32(s->st_mtime);
-    p->newlib_st_ctime = cpu_to_be32(s->st_ctime);
-    unlock_user(p, addr, sizeof(struct newlib_stat));
-
-    return 1;
-}
-
-bool lm32_cpu_do_semihosting(CPUState *cs)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-
-    int ret = -1;
-    target_ulong nr, arg0, arg1, arg2;
-    void *p;
-    struct stat s;
-
-    nr = env->regs[R_R8];
-    arg0 = env->regs[R_R1];
-    arg1 = env->regs[R_R2];
-    arg2 = env->regs[R_R3];
-
-    switch (nr) {
-    case TARGET_SYS_exit:
-        /* void _exit(int rc) */
-        exit(arg0);
-
-    case TARGET_SYS_open:
-        /* int open(const char *pathname, int flags) */
-        p = lock_user_string(arg0);
-        if (!p) {
-            ret = -1;
-        } else {
-            ret = open(p, translate_openflags(arg2));
-            unlock_user(p, arg0, 0);
-        }
-        break;
-
-    case TARGET_SYS_read:
-        /* ssize_t read(int fd, const void *buf, size_t count) */
-        p = lock_user(VERIFY_WRITE, arg1, arg2, 0);
-        if (!p) {
-            ret = -1;
-        } else {
-            ret = read(arg0, p, arg2);
-            unlock_user(p, arg1, arg2);
-        }
-        break;
-
-    case TARGET_SYS_write:
-        /* ssize_t write(int fd, const void *buf, size_t count) */
-        p = lock_user(VERIFY_READ, arg1, arg2, 1);
-        if (!p) {
-            ret = -1;
-        } else {
-            ret = write(arg0, p, arg2);
-            unlock_user(p, arg1, 0);
-        }
-        break;
-
-    case TARGET_SYS_close:
-        /* int close(int fd) */
-        /* don't close stdin/stdout/stderr */
-        if (arg0 > 2) {
-            ret = close(arg0);
-        } else {
-            ret = 0;
-        }
-        break;
-
-    case TARGET_SYS_lseek:
-        /* off_t lseek(int fd, off_t offset, int whence */
-        ret = lseek(arg0, arg1, arg2);
-        break;
-
-    case TARGET_SYS_stat:
-        /* int stat(const char *path, struct stat *buf) */
-        p = lock_user_string(arg0);
-        if (!p) {
-            ret = -1;
-        } else {
-            ret = stat(p, &s);
-            unlock_user(p, arg0, 0);
-            if (translate_stat(env, arg1, &s) == 0) {
-                ret = -1;
-            }
-        }
-        break;
-
-    case TARGET_SYS_fstat:
-        /* int stat(int fd, struct stat *buf) */
-        ret = fstat(arg0, &s);
-        if (ret == 0) {
-            if (translate_stat(env, arg1, &s) == 0) {
-                ret = -1;
-            }
-        }
-        break;
-
-    default:
-        /* unhandled */
-        return false;
-    }
-
-    env->regs[R_R1] = ret;
-    return true;
-}
diff --git a/target/lm32/machine.c b/target/lm32/machine.c
deleted file mode 100644
index 365eaa2e47a..00000000000
--- a/target/lm32/machine.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "migration/cpu.h"
-
-static const VMStateDescription vmstate_env = {
-    .name = "env",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(regs, CPULM32State, 32),
-        VMSTATE_UINT32(pc, CPULM32State),
-        VMSTATE_UINT32(ie, CPULM32State),
-        VMSTATE_UINT32(icc, CPULM32State),
-        VMSTATE_UINT32(dcc, CPULM32State),
-        VMSTATE_UINT32(cc, CPULM32State),
-        VMSTATE_UINT32(eba, CPULM32State),
-        VMSTATE_UINT32(dc, CPULM32State),
-        VMSTATE_UINT32(deba, CPULM32State),
-        VMSTATE_UINT32_ARRAY(bp, CPULM32State, 4),
-        VMSTATE_UINT32_ARRAY(wp, CPULM32State, 4),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-const VMStateDescription vmstate_lm32_cpu = {
-    .name = "cpu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_STRUCT(env, LM32CPU, 1, vmstate_env, CPULM32State),
-        VMSTATE_END_OF_LIST()
-    }
-};
diff --git a/target/lm32/meson.build b/target/lm32/meson.build
deleted file mode 100644
index ef0eef07f12..00000000000
--- a/target/lm32/meson.build
+++ /dev/null
@@ -1,15 +0,0 @@
-lm32_ss = ss.source_set()
-lm32_ss.add(files(
-  'cpu.c',
-  'gdbstub.c',
-  'helper.c',
-  'lm32-semi.c',
-  'op_helper.c',
-  'translate.c',
-))
-
-lm32_softmmu_ss = ss.source_set()
-lm32_softmmu_ss.add(files('machine.c'))
-
-target_arch += {'lm32': lm32_ss}
-target_softmmu_arch += {'lm32': lm32_softmmu_ss}
diff --git a/target/lm32/op_helper.c b/target/lm32/op_helper.c
deleted file mode 100644
index e39fcd56473..00000000000
--- a/target/lm32/op_helper.c
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "qemu/host-utils.h"
-#include "qemu/main-loop.h"
-#include "sysemu/runstate.h"
-
-#include "hw/lm32/lm32_pic.h"
-#include "hw/char/lm32_juart.h"
-
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-
-#ifndef CONFIG_USER_ONLY
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-void raise_exception(CPULM32State *env, int index)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = index;
-    cpu_loop_exit(cs);
-}
-
-void HELPER(raise_exception)(CPULM32State *env, uint32_t index)
-{
-    raise_exception(env, index);
-}
-
-void HELPER(hlt)(CPULM32State *env)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->halted = 1;
-    cs->exception_index = EXCP_HLT;
-    cpu_loop_exit(cs);
-}
-
-void HELPER(ill)(CPULM32State *env)
-{
-#ifndef CONFIG_USER_ONLY
-    CPUState *cs = env_cpu(env);
-    fprintf(stderr, "VM paused due to illegal instruction. "
-            "Connect a debugger or switch to the monitor console "
-            "to find out more.\n");
-    vm_stop(RUN_STATE_PAUSED);
-    cs->halted = 1;
-    raise_exception(env, EXCP_HALTED);
-#endif
-}
-
-void HELPER(wcsr_bp)(CPULM32State *env, uint32_t bp, uint32_t idx)
-{
-    uint32_t addr = bp & ~1;
-
-    assert(idx < 4);
-
-    env->bp[idx] = bp;
-    lm32_breakpoint_remove(env, idx);
-    if (bp & 1) {
-        lm32_breakpoint_insert(env, idx, addr);
-    }
-}
-
-void HELPER(wcsr_wp)(CPULM32State *env, uint32_t wp, uint32_t idx)
-{
-    lm32_wp_t wp_type;
-
-    assert(idx < 4);
-
-    env->wp[idx] = wp;
-
-    wp_type = lm32_wp_type(env->dc, idx);
-    lm32_watchpoint_remove(env, idx);
-    if (wp_type != LM32_WP_DISABLED) {
-        lm32_watchpoint_insert(env, idx, wp, wp_type);
-    }
-}
-
-void HELPER(wcsr_dc)(CPULM32State *env, uint32_t dc)
-{
-    uint32_t old_dc;
-    int i;
-    lm32_wp_t old_type;
-    lm32_wp_t new_type;
-
-    old_dc = env->dc;
-    env->dc = dc;
-
-    for (i = 0; i < 4; i++) {
-        old_type = lm32_wp_type(old_dc, i);
-        new_type = lm32_wp_type(dc, i);
-
-        if (old_type != new_type) {
-            lm32_watchpoint_remove(env, i);
-            if (new_type != LM32_WP_DISABLED) {
-                lm32_watchpoint_insert(env, i, env->wp[i], new_type);
-            }
-        }
-    }
-}
-
-void HELPER(wcsr_im)(CPULM32State *env, uint32_t im)
-{
-    qemu_mutex_lock_iothread();
-    lm32_pic_set_im(env->pic_state, im);
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(wcsr_ip)(CPULM32State *env, uint32_t im)
-{
-    qemu_mutex_lock_iothread();
-    lm32_pic_set_ip(env->pic_state, im);
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(wcsr_jtx)(CPULM32State *env, uint32_t jtx)
-{
-    lm32_juart_set_jtx(env->juart_state, jtx);
-}
-
-void HELPER(wcsr_jrx)(CPULM32State *env, uint32_t jrx)
-{
-    lm32_juart_set_jrx(env->juart_state, jrx);
-}
-
-uint32_t HELPER(rcsr_im)(CPULM32State *env)
-{
-    return lm32_pic_get_im(env->pic_state);
-}
-
-uint32_t HELPER(rcsr_ip)(CPULM32State *env)
-{
-    return lm32_pic_get_ip(env->pic_state);
-}
-
-uint32_t HELPER(rcsr_jtx)(CPULM32State *env)
-{
-    return lm32_juart_get_jtx(env->juart_state);
-}
-
-uint32_t HELPER(rcsr_jrx)(CPULM32State *env)
-{
-    return lm32_juart_get_jrx(env->juart_state);
-}
-#endif
-
diff --git a/target/lm32/translate.c b/target/lm32/translate.c
deleted file mode 100644
index 20c70d03f18..00000000000
--- a/target/lm32/translate.c
+++ /dev/null
@@ -1,1237 +0,0 @@
-/*
- *  LatticeMico32 main translation routines.
- *
- *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "disas/disas.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/translator.h"
-#include "tcg/tcg-op.h"
-#include "qemu/qemu-print.h"
-
-#include "exec/cpu_ldst.h"
-#include "hw/lm32/lm32_pic.h"
-
-#include "exec/helper-gen.h"
-
-#include "trace-tcg.h"
-#include "exec/log.h"
-
-
-#define DISAS_LM32 0
-
-#define LOG_DIS(...) \
-    do { \
-        if (DISAS_LM32) { \
-            qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__); \
-        } \
-    } while (0)
-
-#define EXTRACT_FIELD(src, start, end) \
-            (((src) >> start) & ((1 << (end - start + 1)) - 1))
-
-#define MEM_INDEX 0
-
-/* is_jmp field values */
-#define DISAS_JUMP    DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
-
-static TCGv cpu_R[32];
-static TCGv cpu_pc;
-static TCGv cpu_ie;
-static TCGv cpu_icc;
-static TCGv cpu_dcc;
-static TCGv cpu_cc;
-static TCGv cpu_cfg;
-static TCGv cpu_eba;
-static TCGv cpu_dc;
-static TCGv cpu_deba;
-static TCGv cpu_bp[4];
-static TCGv cpu_wp[4];
-
-#include "exec/gen-icount.h"
-
-enum {
-    OP_FMT_RI,
-    OP_FMT_RR,
-    OP_FMT_CR,
-    OP_FMT_I
-};
-
-/* This is the state at translation time.  */
-typedef struct DisasContext {
-    target_ulong pc;
-
-    /* Decoder.  */
-    int format;
-    uint32_t ir;
-    uint8_t opcode;
-    uint8_t r0, r1, r2, csr;
-    uint16_t imm5;
-    uint16_t imm16;
-    uint32_t imm26;
-
-    unsigned int delayed_branch;
-    unsigned int tb_flags, synced_flags; /* tb dependent flags.  */
-    int is_jmp;
-
-    TranslationBlock *tb;
-    int singlestep_enabled;
-
-    uint32_t features;
-    uint8_t num_breakpoints;
-    uint8_t num_watchpoints;
-} DisasContext;
-
-static const char *regnames[] = {
-    "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
-    "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-    "r24", "r25", "r26/gp", "r27/fp", "r28/sp", "r29/ra",
-    "r30/ea", "r31/ba", "bp0", "bp1", "bp2", "bp3", "wp0",
-    "wp1", "wp2", "wp3"
-};
-
-static inline int zero_extend(unsigned int val, int width)
-{
-    return val & ((1 << width) - 1);
-}
-
-static inline int sign_extend(unsigned int val, int width)
-{
-    int sval;
-
-    /* LSL.  */
-    val <<= 32 - width;
-    sval = val;
-    /* ASR.  */
-    sval >>= 32 - width;
-
-    return sval;
-}
-
-static inline void t_gen_raise_exception(DisasContext *dc, uint32_t index)
-{
-    TCGv_i32 tmp = tcg_const_i32(index);
-
-    gen_helper_raise_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static inline void t_gen_illegal_insn(DisasContext *dc)
-{
-    tcg_gen_movi_tl(cpu_pc, dc->pc);
-    gen_helper_ill(cpu_env);
-}
-
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
-{
-    if (unlikely(dc->singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
-{
-    if (use_goto_tb(dc, dest)) {
-        tcg_gen_goto_tb(n);
-        tcg_gen_movi_tl(cpu_pc, dest);
-        tcg_gen_exit_tb(dc->tb, n);
-    } else {
-        tcg_gen_movi_tl(cpu_pc, dest);
-        if (dc->singlestep_enabled) {
-            t_gen_raise_exception(dc, EXCP_DEBUG);
-        }
-        tcg_gen_exit_tb(NULL, 0);
-    }
-}
-
-static void dec_add(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        if (dc->r0 == R_R0) {
-            if (dc->r1 == R_R0 && dc->imm16 == 0) {
-                LOG_DIS("nop\n");
-            } else {
-                LOG_DIS("mvi r%d, %d\n", dc->r1, sign_extend(dc->imm16, 16));
-            }
-        } else {
-            LOG_DIS("addi r%d, r%d, %d\n", dc->r1, dc->r0,
-                    sign_extend(dc->imm16, 16));
-        }
-    } else {
-        LOG_DIS("add r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_addi_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                sign_extend(dc->imm16, 16));
-    } else {
-        tcg_gen_add_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_and(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("andi r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("and r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_andi_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                zero_extend(dc->imm16, 16));
-    } else  {
-        if (dc->r0 == 0 && dc->r1 == 0 && dc->r2 == 0) {
-            tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-            gen_helper_hlt(cpu_env);
-        } else {
-            tcg_gen_and_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-        }
-    }
-}
-
-static void dec_andhi(DisasContext *dc)
-{
-    LOG_DIS("andhi r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm16);
-
-    tcg_gen_andi_tl(cpu_R[dc->r1], cpu_R[dc->r0], (dc->imm16 << 16));
-}
-
-static void dec_b(DisasContext *dc)
-{
-    if (dc->r0 == R_RA) {
-        LOG_DIS("ret\n");
-    } else if (dc->r0 == R_EA) {
-        LOG_DIS("eret\n");
-    } else if (dc->r0 == R_BA) {
-        LOG_DIS("bret\n");
-    } else {
-        LOG_DIS("b r%d\n", dc->r0);
-    }
-
-    /* restore IE.IE in case of an eret */
-    if (dc->r0 == R_EA) {
-        TCGv t0 = tcg_temp_new();
-        TCGLabel *l1 = gen_new_label();
-        tcg_gen_andi_tl(t0, cpu_ie, IE_EIE);
-        tcg_gen_ori_tl(cpu_ie, cpu_ie, IE_IE);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, IE_EIE, l1);
-        tcg_gen_andi_tl(cpu_ie, cpu_ie, ~IE_IE);
-        gen_set_label(l1);
-        tcg_temp_free(t0);
-    } else if (dc->r0 == R_BA) {
-        TCGv t0 = tcg_temp_new();
-        TCGLabel *l1 = gen_new_label();
-        tcg_gen_andi_tl(t0, cpu_ie, IE_BIE);
-        tcg_gen_ori_tl(cpu_ie, cpu_ie, IE_IE);
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, IE_BIE, l1);
-        tcg_gen_andi_tl(cpu_ie, cpu_ie, ~IE_IE);
-        gen_set_label(l1);
-        tcg_temp_free(t0);
-    }
-    tcg_gen_mov_tl(cpu_pc, cpu_R[dc->r0]);
-
-    dc->is_jmp = DISAS_JUMP;
-}
-
-static void dec_bi(DisasContext *dc)
-{
-    LOG_DIS("bi %d\n", sign_extend(dc->imm26 << 2, 26));
-
-    gen_goto_tb(dc, 0, dc->pc + (sign_extend(dc->imm26 << 2, 26)));
-
-    dc->is_jmp = DISAS_TB_JUMP;
-}
-
-static inline void gen_cond_branch(DisasContext *dc, int cond)
-{
-    TCGLabel *l1 = gen_new_label();
-    tcg_gen_brcond_tl(cond, cpu_R[dc->r0], cpu_R[dc->r1], l1);
-    gen_goto_tb(dc, 0, dc->pc + 4);
-    gen_set_label(l1);
-    gen_goto_tb(dc, 1, dc->pc + (sign_extend(dc->imm16 << 2, 16)));
-    dc->is_jmp = DISAS_TB_JUMP;
-}
-
-static void dec_be(DisasContext *dc)
-{
-    LOG_DIS("be r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16) * 4);
-
-    gen_cond_branch(dc, TCG_COND_EQ);
-}
-
-static void dec_bg(DisasContext *dc)
-{
-    LOG_DIS("bg r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16 * 4));
-
-    gen_cond_branch(dc, TCG_COND_GT);
-}
-
-static void dec_bge(DisasContext *dc)
-{
-    LOG_DIS("bge r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16) * 4);
-
-    gen_cond_branch(dc, TCG_COND_GE);
-}
-
-static void dec_bgeu(DisasContext *dc)
-{
-    LOG_DIS("bgeu r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16) * 4);
-
-    gen_cond_branch(dc, TCG_COND_GEU);
-}
-
-static void dec_bgu(DisasContext *dc)
-{
-    LOG_DIS("bgu r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16) * 4);
-
-    gen_cond_branch(dc, TCG_COND_GTU);
-}
-
-static void dec_bne(DisasContext *dc)
-{
-    LOG_DIS("bne r%d, r%d, %d\n", dc->r1, dc->r0,
-            sign_extend(dc->imm16, 16) * 4);
-
-    gen_cond_branch(dc, TCG_COND_NE);
-}
-
-static void dec_call(DisasContext *dc)
-{
-    LOG_DIS("call r%d\n", dc->r0);
-
-    tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
-    tcg_gen_mov_tl(cpu_pc, cpu_R[dc->r0]);
-
-    dc->is_jmp = DISAS_JUMP;
-}
-
-static void dec_calli(DisasContext *dc)
-{
-    LOG_DIS("calli %d\n", sign_extend(dc->imm26, 26) * 4);
-
-    tcg_gen_movi_tl(cpu_R[R_RA], dc->pc + 4);
-    gen_goto_tb(dc, 0, dc->pc + (sign_extend(dc->imm26 << 2, 26)));
-
-    dc->is_jmp = DISAS_TB_JUMP;
-}
-
-static inline void gen_compare(DisasContext *dc, int cond)
-{
-    int i;
-
-    if (dc->format == OP_FMT_RI) {
-        switch (cond) {
-        case TCG_COND_GEU:
-        case TCG_COND_GTU:
-            i = zero_extend(dc->imm16, 16);
-            break;
-        default:
-            i = sign_extend(dc->imm16, 16);
-            break;
-        }
-
-        tcg_gen_setcondi_tl(cond, cpu_R[dc->r1], cpu_R[dc->r0], i);
-    } else {
-        tcg_gen_setcond_tl(cond, cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_cmpe(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpei r%d, r%d, %d\n", dc->r1, dc->r0,
-                sign_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpe r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_EQ);
-}
-
-static void dec_cmpg(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgi r%d, r%d, %d\n", dc->r1, dc->r0,
-                sign_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpg r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_GT);
-}
-
-static void dec_cmpge(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgei r%d, r%d, %d\n", dc->r1, dc->r0,
-                sign_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpge r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_GE);
-}
-
-static void dec_cmpgeu(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgeui r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpgeu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_GEU);
-}
-
-static void dec_cmpgu(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgui r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpgu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_GTU);
-}
-
-static void dec_cmpne(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpnei r%d, r%d, %d\n", dc->r1, dc->r0,
-                sign_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("cmpne r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    gen_compare(dc, TCG_COND_NE);
-}
-
-static void dec_divu(DisasContext *dc)
-{
-    TCGLabel *l1;
-
-    LOG_DIS("divu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-
-    if (!(dc->features & LM32_FEATURE_DIVIDE)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "hardware divider is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    l1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[dc->r1], 0, l1);
-    tcg_gen_movi_tl(cpu_pc, dc->pc);
-    t_gen_raise_exception(dc, EXCP_DIVIDE_BY_ZERO);
-    gen_set_label(l1);
-    tcg_gen_divu_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-}
-
-static void dec_lb(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("lb r%d, (r%d+%d)\n", dc->r1, dc->r0, dc->imm16);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_ld8s(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_lbu(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("lbu r%d, (r%d+%d)\n", dc->r1, dc->r0, dc->imm16);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_ld8u(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_lh(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("lh r%d, (r%d+%d)\n", dc->r1, dc->r0, dc->imm16);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_ld16s(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_lhu(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("lhu r%d, (r%d+%d)\n", dc->r1, dc->r0, dc->imm16);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_ld16u(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_lw(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("lw r%d, (r%d+%d)\n", dc->r1, dc->r0, sign_extend(dc->imm16, 16));
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_ld32s(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_modu(DisasContext *dc)
-{
-    TCGLabel *l1;
-
-    LOG_DIS("modu r%d, r%d, %d\n", dc->r2, dc->r0, dc->r1);
-
-    if (!(dc->features & LM32_FEATURE_DIVIDE)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "hardware divider is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    l1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[dc->r1], 0, l1);
-    tcg_gen_movi_tl(cpu_pc, dc->pc);
-    t_gen_raise_exception(dc, EXCP_DIVIDE_BY_ZERO);
-    gen_set_label(l1);
-    tcg_gen_remu_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-}
-
-static void dec_mul(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("muli r%d, r%d, %d\n", dc->r1, dc->r0,
-                sign_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("mul r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (!(dc->features & LM32_FEATURE_MULTIPLY)) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "hardware multiplier is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_muli_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                sign_extend(dc->imm16, 16));
-    } else {
-        tcg_gen_mul_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_nor(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("nori r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("nor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_movi_tl(t0, zero_extend(dc->imm16, 16));
-        tcg_gen_nor_tl(cpu_R[dc->r1], cpu_R[dc->r0], t0);
-        tcg_temp_free(t0);
-    } else {
-        tcg_gen_nor_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_or(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("ori r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        if (dc->r1 == R_R0) {
-            LOG_DIS("mv r%d, r%d\n", dc->r2, dc->r0);
-        } else {
-            LOG_DIS("or r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-        }
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_ori_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                zero_extend(dc->imm16, 16));
-    } else {
-        tcg_gen_or_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_orhi(DisasContext *dc)
-{
-    if (dc->r0 == R_R0) {
-        LOG_DIS("mvhi r%d, %d\n", dc->r1, dc->imm16);
-    } else {
-        LOG_DIS("orhi r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm16);
-    }
-
-    tcg_gen_ori_tl(cpu_R[dc->r1], cpu_R[dc->r0], (dc->imm16 << 16));
-}
-
-static void dec_scall(DisasContext *dc)
-{
-    switch (dc->imm5) {
-    case 2:
-        LOG_DIS("break\n");
-        tcg_gen_movi_tl(cpu_pc, dc->pc);
-        t_gen_raise_exception(dc, EXCP_BREAKPOINT);
-        break;
-    case 7:
-        LOG_DIS("scall\n");
-        tcg_gen_movi_tl(cpu_pc, dc->pc);
-        t_gen_raise_exception(dc, EXCP_SYSTEMCALL);
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "invalid opcode @0x%x", dc->pc);
-        t_gen_illegal_insn(dc);
-        break;
-    }
-}
-
-static void dec_rcsr(DisasContext *dc)
-{
-    LOG_DIS("rcsr r%d, %d\n", dc->r2, dc->csr);
-
-    switch (dc->csr) {
-    case CSR_IE:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_ie);
-        break;
-    case CSR_IM:
-        gen_helper_rcsr_im(cpu_R[dc->r2], cpu_env);
-        break;
-    case CSR_IP:
-        gen_helper_rcsr_ip(cpu_R[dc->r2], cpu_env);
-        break;
-    case CSR_CC:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_cc);
-        break;
-    case CSR_CFG:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_cfg);
-        break;
-    case CSR_EBA:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_eba);
-        break;
-    case CSR_DC:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_dc);
-        break;
-    case CSR_DEBA:
-        tcg_gen_mov_tl(cpu_R[dc->r2], cpu_deba);
-        break;
-    case CSR_JTX:
-        gen_helper_rcsr_jtx(cpu_R[dc->r2], cpu_env);
-        break;
-    case CSR_JRX:
-        gen_helper_rcsr_jrx(cpu_R[dc->r2], cpu_env);
-        break;
-    case CSR_ICC:
-    case CSR_DCC:
-    case CSR_BP0:
-    case CSR_BP1:
-    case CSR_BP2:
-    case CSR_BP3:
-    case CSR_WP0:
-    case CSR_WP1:
-    case CSR_WP2:
-    case CSR_WP3:
-        qemu_log_mask(LOG_GUEST_ERROR, "invalid read access csr=%x\n", dc->csr);
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "read_csr: unknown csr=%x\n", dc->csr);
-        break;
-    }
-}
-
-static void dec_sb(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("sb (r%d+%d), r%d\n", dc->r0, dc->imm16, dc->r1);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_st8(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_sextb(DisasContext *dc)
-{
-    LOG_DIS("sextb r%d, r%d\n", dc->r2, dc->r0);
-
-    if (!(dc->features & LM32_FEATURE_SIGN_EXTEND)) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "hardware sign extender is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    tcg_gen_ext8s_tl(cpu_R[dc->r2], cpu_R[dc->r0]);
-}
-
-static void dec_sexth(DisasContext *dc)
-{
-    LOG_DIS("sexth r%d, r%d\n", dc->r2, dc->r0);
-
-    if (!(dc->features & LM32_FEATURE_SIGN_EXTEND)) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "hardware sign extender is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    tcg_gen_ext16s_tl(cpu_R[dc->r2], cpu_R[dc->r0]);
-}
-
-static void dec_sh(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("sh (r%d+%d), r%d\n", dc->r0, dc->imm16, dc->r1);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_st16(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_sl(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("sli r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm5);
-    } else {
-        LOG_DIS("sl r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (!(dc->features & LM32_FEATURE_SHIFT)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "hardware shifter is not available\n");
-        t_gen_illegal_insn(dc);
-        return;
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_shli_tl(cpu_R[dc->r1], cpu_R[dc->r0], dc->imm5);
-    } else {
-        TCGv t0 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cpu_R[dc->r1], 0x1f);
-        tcg_gen_shl_tl(cpu_R[dc->r2], cpu_R[dc->r0], t0);
-        tcg_temp_free(t0);
-    }
-}
-
-static void dec_sr(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("sri r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm5);
-    } else {
-        LOG_DIS("sr r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    /* The real CPU (w/o hardware shifter) only supports right shift by exactly
-     * one bit */
-    if (dc->format == OP_FMT_RI) {
-        if (!(dc->features & LM32_FEATURE_SHIFT) && (dc->imm5 != 1)) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                    "hardware shifter is not available\n");
-            t_gen_illegal_insn(dc);
-            return;
-        }
-        tcg_gen_sari_tl(cpu_R[dc->r1], cpu_R[dc->r0], dc->imm5);
-    } else {
-        TCGLabel *l1 = gen_new_label();
-        TCGLabel *l2 = gen_new_label();
-        TCGv t0 = tcg_temp_local_new();
-        tcg_gen_andi_tl(t0, cpu_R[dc->r1], 0x1f);
-
-        if (!(dc->features & LM32_FEATURE_SHIFT)) {
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 1, l1);
-            t_gen_illegal_insn(dc);
-            tcg_gen_br(l2);
-        }
-
-        gen_set_label(l1);
-        tcg_gen_sar_tl(cpu_R[dc->r2], cpu_R[dc->r0], t0);
-        gen_set_label(l2);
-
-        tcg_temp_free(t0);
-    }
-}
-
-static void dec_sru(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("srui r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm5);
-    } else {
-        LOG_DIS("sru r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        if (!(dc->features & LM32_FEATURE_SHIFT) && (dc->imm5 != 1)) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                    "hardware shifter is not available\n");
-            t_gen_illegal_insn(dc);
-            return;
-        }
-        tcg_gen_shri_tl(cpu_R[dc->r1], cpu_R[dc->r0], dc->imm5);
-    } else {
-        TCGLabel *l1 = gen_new_label();
-        TCGLabel *l2 = gen_new_label();
-        TCGv t0 = tcg_temp_local_new();
-        tcg_gen_andi_tl(t0, cpu_R[dc->r1], 0x1f);
-
-        if (!(dc->features & LM32_FEATURE_SHIFT)) {
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 1, l1);
-            t_gen_illegal_insn(dc);
-            tcg_gen_br(l2);
-        }
-
-        gen_set_label(l1);
-        tcg_gen_shr_tl(cpu_R[dc->r2], cpu_R[dc->r0], t0);
-        gen_set_label(l2);
-
-        tcg_temp_free(t0);
-    }
-}
-
-static void dec_sub(DisasContext *dc)
-{
-    LOG_DIS("sub r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-
-    tcg_gen_sub_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-}
-
-static void dec_sw(DisasContext *dc)
-{
-    TCGv t0;
-
-    LOG_DIS("sw (r%d+%d), r%d\n", dc->r0, sign_extend(dc->imm16, 16), dc->r1);
-
-    t0 = tcg_temp_new();
-    tcg_gen_addi_tl(t0, cpu_R[dc->r0], sign_extend(dc->imm16, 16));
-    tcg_gen_qemu_st32(cpu_R[dc->r1], t0, MEM_INDEX);
-    tcg_temp_free(t0);
-}
-
-static void dec_user(DisasContext *dc)
-{
-    LOG_DIS("user");
-
-    qemu_log_mask(LOG_GUEST_ERROR, "user instruction undefined\n");
-    t_gen_illegal_insn(dc);
-}
-
-static void dec_wcsr(DisasContext *dc)
-{
-    int no;
-
-    LOG_DIS("wcsr %d, r%d\n", dc->csr, dc->r1);
-
-    switch (dc->csr) {
-    case CSR_IE:
-        tcg_gen_mov_tl(cpu_ie, cpu_R[dc->r1]);
-        tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-        dc->is_jmp = DISAS_UPDATE;
-        break;
-    case CSR_IM:
-        /* mark as an io operation because it could cause an interrupt */
-        if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
-        gen_helper_wcsr_im(cpu_env, cpu_R[dc->r1]);
-        tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-        dc->is_jmp = DISAS_UPDATE;
-        break;
-    case CSR_IP:
-        /* mark as an io operation because it could cause an interrupt */
-        if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
-        gen_helper_wcsr_ip(cpu_env, cpu_R[dc->r1]);
-        tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-        dc->is_jmp = DISAS_UPDATE;
-        break;
-    case CSR_ICC:
-        /* TODO */
-        break;
-    case CSR_DCC:
-        /* TODO */
-        break;
-    case CSR_EBA:
-        tcg_gen_mov_tl(cpu_eba, cpu_R[dc->r1]);
-        break;
-    case CSR_DEBA:
-        tcg_gen_mov_tl(cpu_deba, cpu_R[dc->r1]);
-        break;
-    case CSR_JTX:
-        gen_helper_wcsr_jtx(cpu_env, cpu_R[dc->r1]);
-        break;
-    case CSR_JRX:
-        gen_helper_wcsr_jrx(cpu_env, cpu_R[dc->r1]);
-        break;
-    case CSR_DC:
-        gen_helper_wcsr_dc(cpu_env, cpu_R[dc->r1]);
-        break;
-    case CSR_BP0:
-    case CSR_BP1:
-    case CSR_BP2:
-    case CSR_BP3:
-        no = dc->csr - CSR_BP0;
-        if (dc->num_breakpoints <= no) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                          "breakpoint #%i is not available\n", no);
-            t_gen_illegal_insn(dc);
-            break;
-        }
-        gen_helper_wcsr_bp(cpu_env, cpu_R[dc->r1], tcg_const_i32(no));
-        break;
-    case CSR_WP0:
-    case CSR_WP1:
-    case CSR_WP2:
-    case CSR_WP3:
-        no = dc->csr - CSR_WP0;
-        if (dc->num_watchpoints <= no) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                          "watchpoint #%i is not available\n", no);
-            t_gen_illegal_insn(dc);
-            break;
-        }
-        gen_helper_wcsr_wp(cpu_env, cpu_R[dc->r1], tcg_const_i32(no));
-        break;
-    case CSR_CC:
-    case CSR_CFG:
-        qemu_log_mask(LOG_GUEST_ERROR, "invalid write access csr=%x\n",
-                      dc->csr);
-        break;
-    default:
-        qemu_log_mask(LOG_GUEST_ERROR, "write_csr: unknown csr=%x\n",
-                      dc->csr);
-        break;
-    }
-}
-
-static void dec_xnor(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("xnori r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        if (dc->r1 == R_R0) {
-            LOG_DIS("not r%d, r%d\n", dc->r2, dc->r0);
-        } else {
-            LOG_DIS("xnor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-        }
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_xori_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                zero_extend(dc->imm16, 16));
-        tcg_gen_not_tl(cpu_R[dc->r1], cpu_R[dc->r1]);
-    } else {
-        tcg_gen_eqv_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_xor(DisasContext *dc)
-{
-    if (dc->format == OP_FMT_RI) {
-        LOG_DIS("xori r%d, r%d, %d\n", dc->r1, dc->r0,
-                zero_extend(dc->imm16, 16));
-    } else {
-        LOG_DIS("xor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
-    }
-
-    if (dc->format == OP_FMT_RI) {
-        tcg_gen_xori_tl(cpu_R[dc->r1], cpu_R[dc->r0],
-                zero_extend(dc->imm16, 16));
-    } else {
-        tcg_gen_xor_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
-    }
-}
-
-static void dec_ill(DisasContext *dc)
-{
-    qemu_log_mask(LOG_GUEST_ERROR, "invalid opcode 0x%02x\n", dc->opcode);
-    t_gen_illegal_insn(dc);
-}
-
-typedef void (*DecoderInfo)(DisasContext *dc);
-static const DecoderInfo decinfo[] = {
-    dec_sru, dec_nor, dec_mul, dec_sh, dec_lb, dec_sr, dec_xor, dec_lh,
-    dec_and, dec_xnor, dec_lw, dec_lhu, dec_sb, dec_add, dec_or, dec_sl,
-    dec_lbu, dec_be, dec_bg, dec_bge, dec_bgeu, dec_bgu, dec_sw, dec_bne,
-    dec_andhi, dec_cmpe, dec_cmpg, dec_cmpge, dec_cmpgeu, dec_cmpgu, dec_orhi,
-    dec_cmpne,
-    dec_sru, dec_nor, dec_mul, dec_divu, dec_rcsr, dec_sr, dec_xor, dec_ill,
-    dec_and, dec_xnor, dec_ill, dec_scall, dec_sextb, dec_add, dec_or, dec_sl,
-    dec_b, dec_modu, dec_sub, dec_user, dec_wcsr, dec_ill, dec_call, dec_sexth,
-    dec_bi, dec_cmpe, dec_cmpg, dec_cmpge, dec_cmpgeu, dec_cmpgu, dec_calli,
-    dec_cmpne
-};
-
-static inline void decode(DisasContext *dc, uint32_t ir)
-{
-    dc->ir = ir;
-    LOG_DIS("%8.8x\t", dc->ir);
-
-    dc->opcode = EXTRACT_FIELD(ir, 26, 31);
-
-    dc->imm5 = EXTRACT_FIELD(ir, 0, 4);
-    dc->imm16 = EXTRACT_FIELD(ir, 0, 15);
-    dc->imm26 = EXTRACT_FIELD(ir, 0, 25);
-
-    dc->csr = EXTRACT_FIELD(ir, 21, 25);
-    dc->r0 = EXTRACT_FIELD(ir, 21, 25);
-    dc->r1 = EXTRACT_FIELD(ir, 16, 20);
-    dc->r2 = EXTRACT_FIELD(ir, 11, 15);
-
-    /* bit 31 seems to indicate insn type.  */
-    if (ir & (1 << 31)) {
-        dc->format = OP_FMT_RR;
-    } else {
-        dc->format = OP_FMT_RI;
-    }
-
-    assert(ARRAY_SIZE(decinfo) == 64);
-    assert(dc->opcode < 64);
-
-    decinfo[dc->opcode](dc);
-}
-
-/* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
-{
-    CPULM32State *env = cs->env_ptr;
-    LM32CPU *cpu = env_archcpu(env);
-    struct DisasContext ctx, *dc = &ctx;
-    uint32_t pc_start;
-    uint32_t page_start;
-    int num_insns;
-
-    pc_start = tb->pc;
-    dc->features = cpu->features;
-    dc->num_breakpoints = cpu->num_breakpoints;
-    dc->num_watchpoints = cpu->num_watchpoints;
-    dc->tb = tb;
-
-    dc->is_jmp = DISAS_NEXT;
-    dc->pc = pc_start;
-    dc->singlestep_enabled = cs->singlestep_enabled;
-
-    if (pc_start & 3) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "unaligned PC=%x. Ignoring lowest bits.\n", pc_start);
-        pc_start &= ~3;
-    }
-
-    page_start = pc_start & TARGET_PAGE_MASK;
-    num_insns = 0;
-
-    gen_tb_start(tb);
-    do {
-        tcg_gen_insn_start(dc->pc);
-        num_insns++;
-
-        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
-            tcg_gen_movi_tl(cpu_pc, dc->pc);
-            t_gen_raise_exception(dc, EXCP_DEBUG);
-            dc->is_jmp = DISAS_UPDATE;
-            /* The address covered by the breakpoint must be included in
-               [tb->pc, tb->pc + tb->size) in order to for it to be
-               properly cleared -- thus we increment the PC here so that
-               the logic setting tb->size below does the right thing.  */
-            dc->pc += 4;
-            break;
-        }
-
-        /* Pretty disas.  */
-        LOG_DIS("%8.8x:\t", dc->pc);
-
-        if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
-            gen_io_start();
-        }
-
-        decode(dc, cpu_ldl_code(env, dc->pc));
-        dc->pc += 4;
-    } while (!dc->is_jmp
-         && !tcg_op_buf_full()
-         && !cs->singlestep_enabled
-         && !singlestep
-         && (dc->pc - page_start < TARGET_PAGE_SIZE)
-         && num_insns < max_insns);
-
-
-    if (unlikely(cs->singlestep_enabled)) {
-        if (dc->is_jmp == DISAS_NEXT) {
-            tcg_gen_movi_tl(cpu_pc, dc->pc);
-        }
-        t_gen_raise_exception(dc, EXCP_DEBUG);
-    } else {
-        switch (dc->is_jmp) {
-        case DISAS_NEXT:
-            gen_goto_tb(dc, 1, dc->pc);
-            break;
-        default:
-        case DISAS_JUMP:
-        case DISAS_UPDATE:
-            /* indicate that the hash table must be used
-               to find the next TB */
-            tcg_gen_exit_tb(NULL, 0);
-            break;
-        case DISAS_TB_JUMP:
-            /* nothing more to generate */
-            break;
-        }
-    }
-
-    gen_tb_end(tb, num_insns);
-
-    tb->size = dc->pc - pc_start;
-    tb->icount = num_insns;
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(pc_start)) {
-        FILE *logfile = qemu_log_lock();
-        qemu_log("\n");
-        log_target_disas(cs, pc_start, dc->pc - pc_start);
-        qemu_log_unlock(logfile);
-    }
-#endif
-}
-
-void lm32_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-    LM32CPU *cpu = LM32_CPU(cs);
-    CPULM32State *env = &cpu->env;
-    int i;
-
-    if (!env) {
-        return;
-    }
-
-    qemu_fprintf(f, "IN: PC=%x %s\n",
-                 env->pc, lookup_symbol(env->pc));
-
-    qemu_fprintf(f, "ie=%8.8x (IE=%x EIE=%x BIE=%x) im=%8.8x ip=%8.8x\n",
-                 env->ie,
-                 (env->ie & IE_IE) ? 1 : 0,
-                 (env->ie & IE_EIE) ? 1 : 0,
-                 (env->ie & IE_BIE) ? 1 : 0,
-                 lm32_pic_get_im(env->pic_state),
-                 lm32_pic_get_ip(env->pic_state));
-    qemu_fprintf(f, "eba=%8.8x deba=%8.8x\n",
-                 env->eba,
-                 env->deba);
-
-    for (i = 0; i < 32; i++) {
-        qemu_fprintf(f, "r%2.2d=%8.8x ", i, env->regs[i]);
-        if ((i + 1) % 4 == 0) {
-            qemu_fprintf(f, "\n");
-        }
-    }
-    qemu_fprintf(f, "\n\n");
-}
-
-void restore_state_to_opc(CPULM32State *env, TranslationBlock *tb,
-                          target_ulong *data)
-{
-    env->pc = data[0];
-}
-
-void lm32_translate_init(void)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(cpu_R); i++) {
-        cpu_R[i] = tcg_global_mem_new(cpu_env,
-                          offsetof(CPULM32State, regs[i]),
-                          regnames[i]);
-    }
-
-    for (i = 0; i < ARRAY_SIZE(cpu_bp); i++) {
-        cpu_bp[i] = tcg_global_mem_new(cpu_env,
-                          offsetof(CPULM32State, bp[i]),
-                          regnames[32+i]);
-    }
-
-    for (i = 0; i < ARRAY_SIZE(cpu_wp); i++) {
-        cpu_wp[i] = tcg_global_mem_new(cpu_env,
-                          offsetof(CPULM32State, wp[i]),
-                          regnames[36+i]);
-    }
-
-    cpu_pc = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, pc),
-                    "pc");
-    cpu_ie = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, ie),
-                    "ie");
-    cpu_icc = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, icc),
-                    "icc");
-    cpu_dcc = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, dcc),
-                    "dcc");
-    cpu_cc = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, cc),
-                    "cc");
-    cpu_cfg = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, cfg),
-                    "cfg");
-    cpu_eba = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, eba),
-                    "eba");
-    cpu_dc = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, dc),
-                    "dc");
-    cpu_deba = tcg_global_mem_new(cpu_env,
-                    offsetof(CPULM32State, deba),
-                    "deba");
-}
-
diff --git a/target/m68k/Kconfig b/target/m68k/Kconfig
new file mode 100644
index 00000000000..23debad519a
--- /dev/null
+++ b/target/m68k/Kconfig
@@ -0,0 +1,2 @@
+config M68K
+    bool
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index a14874b4da2..c7aeb7da9c4 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -503,14 +503,22 @@ static const VMStateDescription vmstate_m68k_cpu = {
 };
 #endif
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps m68k_sysemu_ops = {
+    .get_phys_page_debug = m68k_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps m68k_tcg_ops = {
+static const struct TCGCPUOps m68k_tcg_ops = {
     .initialize = m68k_tcg_init,
-    .cpu_exec_interrupt = m68k_cpu_exec_interrupt,
-    .tlb_fill = m68k_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = m68k_cpu_tlb_fill,
+    .cpu_exec_interrupt = m68k_cpu_exec_interrupt,
     .do_interrupt = m68k_cpu_do_interrupt,
     .do_transaction_failed = m68k_cpu_transaction_failed,
 #endif /* !CONFIG_USER_ONLY */
@@ -533,8 +541,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
     cc->gdb_read_register = m68k_cpu_gdb_read_register;
     cc->gdb_write_register = m68k_cpu_gdb_write_register;
 #if defined(CONFIG_SOFTMMU)
-    cc->get_phys_page_debug = m68k_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_m68k_cpu;
+    cc->sysemu_ops = &m68k_sysemu_ops;
 #endif
     cc->disas_set_info = m68k_cpu_disas_set_info;
 
diff --git a/target/m68k/cpu.h b/target/m68k/cpu.h
index 402c86c8769..a3423729ef0 100644
--- a/target/m68k/cpu.h
+++ b/target/m68k/cpu.h
@@ -166,8 +166,10 @@ struct M68kCPU {
 };
 
 
+#ifndef CONFIG_USER_ONLY
 void m68k_cpu_do_interrupt(CPUState *cpu);
 bool m68k_cpu_exec_interrupt(CPUState *cpu, int int_req);
+#endif /* !CONFIG_USER_ONLY */
 void m68k_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr m68k_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int m68k_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
@@ -175,13 +177,6 @@ int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 
 void m68k_tcg_init(void);
 void m68k_cpu_init_gdb(M68kCPU *cpu);
-/*
- * you can call this signal handler from your SIGBUS and SIGSEGV
- * signal handlers to inform the virtual CPU of exceptions. non zero
- * is returned if the signal was handled by the virtual CPU.
- */
-int cpu_m68k_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
 uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
 void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
 void cpu_m68k_set_sr(CPUM68KState *env, uint32_t);
@@ -230,6 +225,9 @@ typedef enum {
 #define SR_T_SHIFT 14
 #define SR_T  0xc000
 
+#define M68K_SR_TRACE(sr) ((sr & SR_T) >> SR_T_SHIFT)
+#define M68K_SR_TRACE_ANY_INS 0x2
+
 #define M68K_SSP    0
 #define M68K_USP    1
 #define M68K_ISP    2
@@ -558,7 +556,6 @@ enum {
 #define M68K_CPU_TYPE_NAME(model) model M68K_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_M68K_CPU
 
-#define cpu_signal_handler cpu_m68k_signal_handler
 #define cpu_list m68k_cpu_list
 
 /* MMU modes definitions */
@@ -590,6 +587,8 @@ typedef M68kCPU ArchCPU;
 #define TB_FLAGS_SFC_S          (1 << TB_FLAGS_SFC_S_BIT)
 #define TB_FLAGS_DFC_S_BIT      15
 #define TB_FLAGS_DFC_S          (1 << TB_FLAGS_DFC_S_BIT)
+#define TB_FLAGS_TRACE          16
+#define TB_FLAGS_TRACE_BIT      (1 << TB_FLAGS_TRACE)
 
 static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
@@ -602,6 +601,9 @@ static inline void cpu_get_tb_cpu_state(CPUM68KState *env, target_ulong *pc,
         *flags |= (env->sfc << (TB_FLAGS_SFC_S_BIT - 2)) & TB_FLAGS_SFC_S;
         *flags |= (env->dfc << (TB_FLAGS_DFC_S_BIT - 2)) & TB_FLAGS_DFC_S;
     }
+    if (M68K_SR_TRACE(env->sr) == M68K_SR_TRACE_ANY_INS) {
+        *flags |= TB_FLAGS_TRACE;
+    }
 }
 
 void dump_mmu(CPUM68KState *env);
diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c
index 797000e7482..fdc4937e29e 100644
--- a/target/m68k/fpu_helper.c
+++ b/target/m68k/fpu_helper.c
@@ -94,13 +94,13 @@ static void m68k_restore_precision_mode(CPUM68KState *env)
 {
     switch (env->fpcr & FPCR_PREC_MASK) {
     case FPCR_PREC_X: /* extended */
-        set_floatx80_rounding_precision(80, &env->fp_status);
+        set_floatx80_rounding_precision(floatx80_precision_x, &env->fp_status);
         break;
     case FPCR_PREC_S: /* single */
-        set_floatx80_rounding_precision(32, &env->fp_status);
+        set_floatx80_rounding_precision(floatx80_precision_s, &env->fp_status);
         break;
     case FPCR_PREC_D: /* double */
-        set_floatx80_rounding_precision(64, &env->fp_status);
+        set_floatx80_rounding_precision(floatx80_precision_d, &env->fp_status);
         break;
     case FPCR_PREC_U: /* undefined */
     default:
@@ -111,9 +111,9 @@ static void m68k_restore_precision_mode(CPUM68KState *env)
 static void cf_restore_precision_mode(CPUM68KState *env)
 {
     if (env->fpcr & FPCR_PREC_S) { /* single */
-        set_floatx80_rounding_precision(32, &env->fp_status);
+        set_floatx80_rounding_precision(floatx80_precision_s, &env->fp_status);
     } else { /* double */
-        set_floatx80_rounding_precision(64, &env->fp_status);
+        set_floatx80_rounding_precision(floatx80_precision_d, &env->fp_status);
     }
 }
 
@@ -166,8 +166,8 @@ void HELPER(set_fpcr)(CPUM68KState *env, uint32_t val)
 
 #define PREC_BEGIN(prec)                                        \
     do {                                                        \
-        int old;                                                \
-        old = get_floatx80_rounding_precision(&env->fp_status); \
+        FloatX80RoundPrec old =                                 \
+            get_floatx80_rounding_precision(&env->fp_status);   \
         set_floatx80_rounding_precision(prec, &env->fp_status)  \
 
 #define PREC_END()                                              \
@@ -176,14 +176,14 @@ void HELPER(set_fpcr)(CPUM68KState *env, uint32_t val)
 
 void HELPER(fsround)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_round(val->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdround)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_round(val->d, &env->fp_status);
     PREC_END();
 }
@@ -195,14 +195,14 @@ void HELPER(fsqrt)(CPUM68KState *env, FPReg *res, FPReg *val)
 
 void HELPER(fssqrt)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_sqrt(val->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdsqrt)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_sqrt(val->d, &env->fp_status);
     PREC_END();
 }
@@ -214,14 +214,14 @@ void HELPER(fabs)(CPUM68KState *env, FPReg *res, FPReg *val)
 
 void HELPER(fsabs)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_round(floatx80_abs(val->d), &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdabs)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_round(floatx80_abs(val->d), &env->fp_status);
     PREC_END();
 }
@@ -233,14 +233,14 @@ void HELPER(fneg)(CPUM68KState *env, FPReg *res, FPReg *val)
 
 void HELPER(fsneg)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_round(floatx80_chs(val->d), &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdneg)(CPUM68KState *env, FPReg *res, FPReg *val)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_round(floatx80_chs(val->d), &env->fp_status);
     PREC_END();
 }
@@ -252,14 +252,14 @@ void HELPER(fadd)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fsadd)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_add(val0->d, val1->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdadd)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_add(val0->d, val1->d, &env->fp_status);
     PREC_END();
 }
@@ -271,14 +271,14 @@ void HELPER(fsub)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fssub)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_sub(val1->d, val0->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdsub)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_sub(val1->d, val0->d, &env->fp_status);
     PREC_END();
 }
@@ -290,14 +290,14 @@ void HELPER(fmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fsmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_mul(val0->d, val1->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fdmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_mul(val0->d, val1->d, &env->fp_status);
     PREC_END();
 }
@@ -307,7 +307,7 @@ void HELPER(fsglmul)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
     FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status);
     floatx80 a, b;
 
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     set_float_rounding_mode(float_round_to_zero, &env->fp_status);
     a = floatx80_round(val0->d, &env->fp_status);
     b = floatx80_round(val1->d, &env->fp_status);
@@ -323,14 +323,14 @@ void HELPER(fdiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 
 void HELPER(fsdiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     res->d = floatx80_div(val1->d, val0->d, &env->fp_status);
     PREC_END();
 }
 
 void HELPER(fddiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
 {
-    PREC_BEGIN(64);
+    PREC_BEGIN(floatx80_precision_d);
     res->d = floatx80_div(val1->d, val0->d, &env->fp_status);
     PREC_END();
 }
@@ -340,7 +340,7 @@ void HELPER(fsgldiv)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1)
     FloatRoundMode rounding_mode = get_float_rounding_mode(&env->fp_status);
     floatx80 a, b;
 
-    PREC_BEGIN(32);
+    PREC_BEGIN(floatx80_precision_s);
     set_float_rounding_mode(float_round_to_zero, &env->fp_status);
     a = floatx80_round(val1->d, &env->fp_status);
     b = floatx80_round(val0->d, &env->fp_status);
diff --git a/target/m68k/helper.c b/target/m68k/helper.c
index 137a3e1a3d2..5728e48585f 100644
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -978,16 +978,12 @@ void m68k_set_irq_level(M68kCPU *cpu, int level, uint8_t vector)
     }
 }
 
-#endif
-
 bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                        MMUAccessType qemu_access_type, int mmu_idx,
                        bool probe, uintptr_t retaddr)
 {
     M68kCPU *cpu = M68K_CPU(cs);
     CPUM68KState *env = &cpu->env;
-
-#ifndef CONFIG_USER_ONLY
     hwaddr physical;
     int prot;
     int access_type;
@@ -1051,12 +1047,12 @@ bool m68k_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     if (!(access_type & ACCESS_STORE)) {
         env->mmu.ssw |= M68K_RW_040;
     }
-#endif
 
     cs->exception_index = EXCP_ACCESS;
     env->mmu.ar = address;
     cpu_loop_exit_restore(cs, retaddr);
 }
+#endif /* !CONFIG_USER_ONLY */
 
 uint32_t HELPER(bitrev)(uint32_t x)
 {
diff --git a/target/m68k/helper.h b/target/m68k/helper.h
index 77808497a95..9842eeaa959 100644
--- a/target/m68k/helper.h
+++ b/target/m68k/helper.h
@@ -17,7 +17,6 @@ DEF_HELPER_4(cas2l_parallel, void, env, i32, i32, i32)
 
 #define dh_alias_fp ptr
 #define dh_ctype_fp FPReg *
-#define dh_is_signed_fp dh_is_signed_ptr
 
 DEF_HELPER_3(exts32, void, env, fp, s32)
 DEF_HELPER_3(extf32, void, env, fp, f32)
diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c
index d919245e4f8..44ec7e4612c 100644
--- a/target/m68k/m68k-semi.c
+++ b/target/m68k/m68k-semi.c
@@ -20,11 +20,11 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/gdbstub.h"
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #define SEMIHOSTING_HEAP_SIZE (128 * 1024 * 1024)
 #else
-#include "exec/gdbstub.h"
 #include "exec/softmmu-semi.h"
 #include "hw/boards.h"
 #endif
diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c
index ae1ba4b4375..cfbc987ba64 100644
--- a/target/m68k/op_helper.c
+++ b/target/m68k/op_helper.c
@@ -23,18 +23,7 @@
 #include "exec/cpu_ldst.h"
 #include "semihosting/semihost.h"
 
-#if defined(CONFIG_USER_ONLY)
-
-void m68k_cpu_do_interrupt(CPUState *cs)
-{
-    cs->exception_index = -1;
-}
-
-static inline void do_interrupt_m68k_hardirq(CPUM68KState *env)
-{
-}
-
-#else
+#if !defined(CONFIG_USER_ONLY)
 
 static void cf_rte(CPUM68KState *env)
 {
@@ -515,7 +504,6 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
         cpu_loop_exit(cs);
     }
 }
-#endif
 
 bool m68k_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
@@ -537,6 +525,8 @@ bool m68k_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     return false;
 }
 
+#endif /* !CONFIG_USER_ONLY */
+
 static void raise_exception_ra(CPUM68KState *env, int tt, uintptr_t raddr)
 {
     CPUState *cs = env_cpu(env);
@@ -782,9 +772,9 @@ static void do_cas2l(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2,
     uint32_t u2 = env->dregs[Du2];
     uint32_t l1, l2;
     uintptr_t ra = GETPC();
-#if defined(CONFIG_ATOMIC64) && !defined(CONFIG_USER_ONLY)
+#if defined(CONFIG_ATOMIC64)
     int mmu_idx = cpu_mmu_index(env, 0);
-    TCGMemOpIdx oi;
+    MemOpIdx oi = make_memop_idx(MO_BEQ, mmu_idx);
 #endif
 
     if (parallel) {
@@ -794,23 +784,13 @@ static void do_cas2l(CPUM68KState *env, uint32_t regs, uint32_t a1, uint32_t a2,
         if ((a1 & 7) == 0 && a2 == a1 + 4) {
             c = deposit64(c2, 32, 32, c1);
             u = deposit64(u2, 32, 32, u1);
-#ifdef CONFIG_USER_ONLY
-            l = helper_atomic_cmpxchgq_be(env, a1, c, u);
-#else
-            oi = make_memop_idx(MO_BEQ, mmu_idx);
-            l = helper_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
-#endif
+            l = cpu_atomic_cmpxchgq_be_mmu(env, a1, c, u, oi, ra);
             l1 = l >> 32;
             l2 = l;
         } else if ((a2 & 7) == 0 && a1 == a2 + 4) {
             c = deposit64(c1, 32, 32, c2);
             u = deposit64(u1, 32, 32, u2);
-#ifdef CONFIG_USER_ONLY
-            l = helper_atomic_cmpxchgq_be(env, a2, c, u);
-#else
-            oi = make_memop_idx(MO_BEQ, mmu_idx);
-            l = helper_atomic_cmpxchgq_be_mmu(env, a2, c, u, oi, ra);
-#endif
+            l = cpu_atomic_cmpxchgq_be_mmu(env, a2, c, u, oi, ra);
             l2 = l >> 32;
             l1 = l;
         } else
diff --git a/target/m68k/softfloat.c b/target/m68k/softfloat.c
index b6d0ed7acfe..02dcc03d15d 100644
--- a/target/m68k/softfloat.c
+++ b/target/m68k/softfloat.c
@@ -227,7 +227,8 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig, fSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, j, k;
     floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
@@ -270,7 +271,7 @@ floatx80 floatx80_lognp1(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -426,7 +427,8 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig, fSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, j, k, adjk;
     floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
@@ -469,7 +471,7 @@ floatx80 floatx80_logn(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -594,7 +596,8 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     floatx80 fp0, fp1;
 
@@ -626,7 +629,7 @@ floatx80 floatx80_log10(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     fp0 = floatx80_logn(a, status);
     fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */
@@ -651,7 +654,8 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     floatx80 fp0, fp1;
 
@@ -686,7 +690,7 @@ floatx80 floatx80_log2(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     if (aSig == one_sig) { /* X is 2^k */
         status->float_rounding_mode = user_rnd_mode;
@@ -718,7 +722,8 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, n, j, k, m, m1;
     floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
@@ -746,7 +751,7 @@ floatx80 floatx80_etox(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     adjflag = 0;
 
@@ -902,7 +907,8 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, n, j, l, m, m1;
     floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
@@ -929,7 +935,7 @@ floatx80 floatx80_twotox(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     fp0 = a;
 
@@ -1052,7 +1058,8 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, n, j, l, m, m1;
     floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
@@ -1079,7 +1086,7 @@ floatx80 floatx80_tentox(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     fp0 = a;
 
@@ -1207,7 +1214,8 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
@@ -1233,7 +1241,7 @@ floatx80 floatx80_tan(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -1417,7 +1425,8 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
@@ -1443,7 +1452,7 @@ floatx80 floatx80_sin(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -1656,7 +1665,8 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
     int32_t aExp, xExp;
     uint64_t aSig, xSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, l, n, j;
     floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
@@ -1682,7 +1692,7 @@ floatx80 floatx80_cos(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -1893,7 +1903,8 @@ floatx80 floatx80_atan(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, tbl_index;
     floatx80 fp0, fp1, fp2, fp3, xsave;
@@ -1920,7 +1931,7 @@ floatx80 floatx80_atan(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     if (compact < 0x3FFB8000 || compact > 0x4002FFFF) {
         /* |X| >= 16 or |X| < 1/16 */
@@ -2090,7 +2101,8 @@ floatx80 floatx80_asin(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1, fp2, one;
@@ -2124,7 +2136,7 @@ floatx80 floatx80_asin(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     one = packFloatx80(0, one_exp, one_sig);
     fp0 = a;
@@ -2155,7 +2167,8 @@ floatx80 floatx80_acos(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1, one;
@@ -2193,7 +2206,7 @@ floatx80 floatx80_acos(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     one = packFloatx80(0, one_exp, one_sig);
     fp0 = a;
@@ -2224,7 +2237,8 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1, fp2, one;
@@ -2257,7 +2271,7 @@ floatx80 floatx80_atanh(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     one = packFloatx80(0, one_exp, one_sig);
     fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */
@@ -2289,7 +2303,8 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact, n, j, m, m1;
     floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
@@ -2316,7 +2331,7 @@ floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     if (aExp >= 0x3FFD) { /* |X| >= 1/4 */
         compact = floatx80_make_compact(aExp, aSig);
@@ -2541,7 +2556,8 @@ floatx80 floatx80_tanh(floatx80 a, float_status *status)
     int32_t aExp, vExp;
     uint64_t aSig, vSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1;
@@ -2565,7 +2581,7 @@ floatx80 floatx80_tanh(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -2656,7 +2672,8 @@ floatx80 floatx80_sinh(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1, fp2;
@@ -2681,7 +2698,7 @@ floatx80 floatx80_sinh(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
@@ -2744,7 +2761,8 @@ floatx80 floatx80_cosh(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
-    int8_t user_rnd_mode, user_rnd_prec;
+    FloatRoundMode user_rnd_mode;
+    FloatX80RoundPrec user_rnd_prec;
 
     int32_t compact;
     floatx80 fp0, fp1;
@@ -2767,7 +2785,7 @@ floatx80 floatx80_cosh(floatx80 a, float_status *status)
     user_rnd_mode = status->float_rounding_mode;
     user_rnd_prec = status->floatx80_rounding_precision;
     status->float_rounding_mode = float_round_nearest_even;
-    status->floatx80_rounding_precision = 80;
+    status->floatx80_rounding_precision = floatx80_precision_x;
 
     compact = floatx80_make_compact(aExp, aSig);
 
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 200018ae6a6..af43c8eab8e 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -31,7 +31,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 #include "fpu/softfloat.h"
 
@@ -124,6 +123,7 @@ typedef struct DisasContext {
 #define MAX_TO_RELEASE 8
     int release_count;
     TCGv release[MAX_TO_RELEASE];
+    bool ss_active;
 } DisasContext;
 
 static void init_release_array(DisasContext *s)
@@ -389,7 +389,7 @@ static TCGv gen_ldst(DisasContext *s, int opsize, TCGv addr, TCGv val,
 static inline uint16_t read_im16(CPUM68KState *env, DisasContext *s)
 {
     uint16_t im;
-    im = translator_lduw(env, s->pc);
+    im = translator_lduw(env, &s->base, s->pc);
     s->pc += 2;
     return im;
 }
@@ -1493,22 +1493,14 @@ static void gen_exit_tb(DisasContext *s)
         }                                                               \
     } while (0)
 
-static inline bool use_goto_tb(DisasContext *s, uint32_t dest)
-{
-#ifndef CONFIG_USER_ONLY
-    return (s->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)
-        || (s->base.pc_next & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
 /* Generate a jump to an immediate address.  */
 static void gen_jmp_tb(DisasContext *s, int n, uint32_t dest)
 {
-    if (unlikely(s->base.singlestep_enabled)) {
-        gen_exception(s, dest, EXCP_DEBUG);
-    } else if (use_goto_tb(s, dest)) {
+    if (unlikely(s->ss_active)) {
+        update_cc_op(s);
+        tcg_gen_movi_i32(QREG_PC, dest);
+        gen_raise_exception(EXCP_TRACE);
+    } else if (translator_use_goto_tb(&s->base, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_i32(QREG_PC, dest);
         tcg_gen_exit_tb(s->base.tb, n);
@@ -6172,6 +6164,12 @@ static void m68k_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
     dc->done_mac = 0;
     dc->writeback_mask = 0;
     init_release_array(dc);
+
+    dc->ss_active = (M68K_SR_TRACE(env->sr) == M68K_SR_TRACE_ANY_INS);
+    /* If architectural single step active, limit to 1 */
+    if (dc->ss_active) {
+        dc->base.max_insns = 1;
+    }
 }
 
 static void m68k_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
@@ -6184,23 +6182,6 @@ static void m68k_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
 }
 
-static bool m68k_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                     const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    gen_exception(dc, dc->base.pc_next, EXCP_DEBUG);
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be
-     * properly cleared -- thus we increment the PC here so that
-     * the logic setting tb->size below does the right thing.
-     */
-    dc->base.pc_next += 2;
-
-    return true;
-}
-
 static void m68k_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
@@ -6245,17 +6226,17 @@ static void m68k_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
         break;
     case DISAS_TOO_MANY:
         update_cc_op(dc);
-        if (dc->base.singlestep_enabled) {
+        if (dc->ss_active) {
             tcg_gen_movi_i32(QREG_PC, dc->pc);
-            gen_raise_exception(EXCP_DEBUG);
+            gen_raise_exception(EXCP_TRACE);
         } else {
             gen_jmp_tb(dc, 0, dc->pc);
         }
         break;
     case DISAS_JUMP:
         /* We updated CC_OP and PC in gen_jmp/gen_jmp_im.  */
-        if (dc->base.singlestep_enabled) {
-            gen_raise_exception(EXCP_DEBUG);
+        if (dc->ss_active) {
+            gen_raise_exception(EXCP_TRACE);
         } else {
             tcg_gen_lookup_and_goto_ptr();
         }
@@ -6265,8 +6246,8 @@ static void m68k_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
          * We updated CC_OP and PC in gen_exit_tb, but also modified
          * other state that may require returning to the main loop.
          */
-        if (dc->base.singlestep_enabled) {
-            gen_raise_exception(EXCP_DEBUG);
+        if (dc->ss_active) {
+            gen_raise_exception(EXCP_TRACE);
         } else {
             tcg_gen_exit_tb(NULL, 0);
         }
@@ -6286,7 +6267,6 @@ static const TranslatorOps m68k_tr_ops = {
     .init_disas_context = m68k_tr_init_disas_context,
     .tb_start           = m68k_tr_tb_start,
     .insn_start         = m68k_tr_insn_start,
-    .breakpoint_check   = m68k_tr_breakpoint_check,
     .translate_insn     = m68k_tr_translate_insn,
     .tb_stop            = m68k_tr_tb_stop,
     .disas_log          = m68k_tr_disas_log,
diff --git a/target/meson.build b/target/meson.build
index 4af43d98d5c..be8c1e15645 100644
--- a/target/meson.build
+++ b/target/meson.build
@@ -6,11 +6,9 @@ subdir('cris')
 subdir('hexagon')
 subdir('hppa')
 subdir('i386')
-subdir('lm32')
 subdir('m68k')
 subdir('microblaze')
 subdir('mips')
-subdir('moxie')
 subdir('nios2')
 subdir('openrisc')
 subdir('ppc')
@@ -20,5 +18,4 @@ subdir('s390x')
 subdir('sh4')
 subdir('sparc')
 subdir('tricore')
-subdir('unicore32')
 subdir('xtensa')
diff --git a/target/microblaze/Kconfig b/target/microblaze/Kconfig
new file mode 100644
index 00000000000..a5410d9218d
--- /dev/null
+++ b/target/microblaze/Kconfig
@@ -0,0 +1,2 @@
+config MICROBLAZE
+    bool
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index 433ba202037..b9c888b87e0 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -352,15 +352,23 @@ static ObjectClass *mb_cpu_class_by_name(const char *cpu_model)
     return object_class_by_name(TYPE_MICROBLAZE_CPU);
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps mb_sysemu_ops = {
+    .get_phys_page_attrs_debug = mb_cpu_get_phys_page_attrs_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps mb_tcg_ops = {
+static const struct TCGCPUOps mb_tcg_ops = {
     .initialize = mb_tcg_init,
     .synchronize_from_tb = mb_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = mb_cpu_exec_interrupt,
-    .tlb_fill = mb_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = mb_cpu_tlb_fill,
+    .cpu_exec_interrupt = mb_cpu_exec_interrupt,
     .do_interrupt = mb_cpu_do_interrupt,
     .do_transaction_failed = mb_cpu_transaction_failed,
     .do_unaligned_access = mb_cpu_do_unaligned_access,
@@ -386,8 +394,8 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_write_register = mb_cpu_gdb_write_register;
 
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_attrs_debug = mb_cpu_get_phys_page_attrs_debug;
     dc->vmsd = &vmstate_mb_cpu;
+    cc->sysemu_ops = &mb_sysemu_ops;
 #endif
     device_class_set_props(dc, mb_properties);
     cc->gdb_num_core_regs = 32 + 27;
diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
index e4bba8a7551..e9cd0b88dea 100644
--- a/target/microblaze/cpu.h
+++ b/target/microblaze/cpu.h
@@ -355,11 +355,13 @@ struct MicroBlazeCPU {
 };
 
 
+#ifndef CONFIG_USER_ONLY
 void mb_cpu_do_interrupt(CPUState *cs);
 bool mb_cpu_exec_interrupt(CPUState *cs, int int_req);
+#endif /* !CONFIG_USER_ONLY */
 void mb_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
                                 MMUAccessType access_type,
-                                int mmu_idx, uintptr_t retaddr);
+                                int mmu_idx, uintptr_t retaddr) QEMU_NORETURN;
 void mb_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
                                         MemTxAttrs *attrs);
@@ -383,26 +385,15 @@ static inline void mb_cpu_write_msr(CPUMBState *env, uint32_t val)
 }
 
 void mb_tcg_init(void);
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_mb_signal_handler(int host_signum, void *pinfo,
-                          void *puc);
 
 #define CPU_RESOLVING_TYPE TYPE_MICROBLAZE_CPU
 
-#define cpu_signal_handler cpu_mb_signal_handler
-
 /* MMU modes definitions */
 #define MMU_NOMMU_IDX   0
 #define MMU_KERNEL_IDX  1
 #define MMU_USER_IDX    2
 /* See NB_MMU_MODES further up the file.  */
 
-bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                     MMUAccessType access_type, int mmu_idx,
-                     bool probe, uintptr_t retaddr);
-
 typedef CPUMBState CPUArchState;
 typedef MicroBlazeCPU ArchCPU;
 
@@ -420,6 +411,10 @@ static inline void cpu_get_tb_cpu_state(CPUMBState *env, target_ulong *pc,
 }
 
 #if !defined(CONFIG_USER_ONLY)
+bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                     MMUAccessType access_type, int mmu_idx,
+                     bool probe, uintptr_t retaddr);
+
 void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
                                unsigned size, MMUAccessType access_type,
                                int mmu_idx, MemTxAttrs attrs,
diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c
index 20dbd673136..a607fe68e58 100644
--- a/target/microblaze/helper.c
+++ b/target/microblaze/helper.c
@@ -24,28 +24,7 @@
 #include "qemu/host-utils.h"
 #include "exec/log.h"
 
-#if defined(CONFIG_USER_ONLY)
-
-void mb_cpu_do_interrupt(CPUState *cs)
-{
-    MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
-    CPUMBState *env = &cpu->env;
-
-    cs->exception_index = -1;
-    env->res_addr = RES_ADDR_NONE;
-    env->regs[14] = env->pc;
-}
-
-bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                     MMUAccessType access_type, int mmu_idx,
-                     bool probe, uintptr_t retaddr)
-{
-    cs->exception_index = 0xaa;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
+#ifndef CONFIG_USER_ONLY
 static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu,
                                     MMUAccessType access_type)
 {
@@ -271,7 +250,6 @@ hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
 
     return paddr;
 }
-#endif
 
 bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
@@ -289,6 +267,8 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     return false;
 }
 
+#endif /* !CONFIG_USER_ONLY */
+
 void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                 MMUAccessType access_type,
                                 int mmu_idx, uintptr_t retaddr)
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index c1b13f4c7d3..2561b904b9c 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -29,7 +29,6 @@
 #include "exec/translator.h"
 #include "qemu/qemu-print.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 #define EXTRACT_FIELD(src, start, end) \
@@ -125,23 +124,9 @@ static void gen_raise_hw_excp(DisasContext *dc, uint32_t esr_ec)
     gen_raise_exception_sync(dc, EXCP_HW_EXCP);
 }
 
-static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
-{
-#ifndef CONFIG_USER_ONLY
-    return (dc->base.pc_first & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
 static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
 {
-    if (dc->base.singlestep_enabled) {
-        TCGv_i32 tmp = tcg_const_i32(EXCP_DEBUG);
-        tcg_gen_movi_i32(cpu_pc, dest);
-        gen_helper_raise_exception(cpu_env, tmp);
-        tcg_temp_free_i32(tmp);
-    } else if (use_goto_tb(dc, dest)) {
+    if (translator_use_goto_tb(&dc->base, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_i32(cpu_pc, dest);
         tcg_gen_exit_tb(dc->base.tb, n);
@@ -737,6 +722,7 @@ static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb)
 }
 #endif
 
+#ifndef CONFIG_USER_ONLY
 static void record_unaligned_ess(DisasContext *dc, int rd,
                                  MemOp size, bool store)
 {
@@ -749,6 +735,7 @@ static void record_unaligned_ess(DisasContext *dc, int rd,
 
     tcg_set_insn_start_param(dc->insn_start, 1, iflags);
 }
+#endif
 
 static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
                     int mem_index, bool rev)
@@ -770,12 +757,19 @@ static bool do_load(DisasContext *dc, int rd, TCGv addr, MemOp mop,
         }
     }
 
+    /*
+     * For system mode, enforce alignment if the cpu configuration
+     * requires it.  For user-mode, the Linux kernel will have fixed up
+     * any unaligned access, so emulate that by *not* setting MO_ALIGN.
+     */
+#ifndef CONFIG_USER_ONLY
     if (size > MO_8 &&
         (dc->tb_flags & MSR_EE) &&
         dc->cfg->unaligned_exceptions) {
         record_unaligned_ess(dc, rd, size, false);
         mop |= MO_ALIGN;
     }
+#endif
 
     tcg_gen_qemu_ld_i32(reg_for_write(dc, rd), addr, mem_index, mop);
 
@@ -916,12 +910,19 @@ static bool do_store(DisasContext *dc, int rd, TCGv addr, MemOp mop,
         }
     }
 
+    /*
+     * For system mode, enforce alignment if the cpu configuration
+     * requires it.  For user-mode, the Linux kernel will have fixed up
+     * any unaligned access, so emulate that by *not* setting MO_ALIGN.
+     */
+#ifndef CONFIG_USER_ONLY
     if (size > MO_8 &&
         (dc->tb_flags & MSR_EE) &&
         dc->cfg->unaligned_exceptions) {
         record_unaligned_ess(dc, rd, size, true);
         mop |= MO_ALIGN;
     }
+#endif
 
     tcg_gen_qemu_st_i32(reg_for_read(dc, rd), addr, mem_index, mop);
 
@@ -1683,23 +1684,6 @@ static void mb_tr_insn_start(DisasContextBase *dcb, CPUState *cs)
     dc->insn_start = tcg_last_op();
 }
 
-static bool mb_tr_breakpoint_check(DisasContextBase *dcb, CPUState *cs,
-                                   const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcb, DisasContext, base);
-
-    gen_raise_exception_sync(dc, EXCP_DEBUG);
-
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be
-     * properly cleared -- thus we increment the PC here so that
-     * the logic setting tb->size below does the right thing.
-     */
-    dc->base.pc_next += 4;
-    return true;
-}
-
 static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs)
 {
     DisasContext *dc = container_of(dcb, DisasContext, base);
@@ -1806,7 +1790,7 @@ static void mb_tr_tb_stop(DisasContextBase *dcb, CPUState *cs)
         break;
 
     case DISAS_JUMP:
-        if (dc->jmp_dest != -1 && !cs->singlestep_enabled) {
+        if (dc->jmp_dest != -1 && !(tb_cflags(dc->base.tb) & CF_NO_GOTO_TB)) {
             /* Direct jump. */
             tcg_gen_discard_i32(cpu_btarget);
 
@@ -1831,15 +1815,10 @@ static void mb_tr_tb_stop(DisasContextBase *dcb, CPUState *cs)
             return;
         }
 
-        /* Indirect jump (or direct jump w/ singlestep) */
+        /* Indirect jump (or direct jump w/ goto_tb disabled) */
         tcg_gen_mov_i32(cpu_pc, cpu_btarget);
         tcg_gen_discard_i32(cpu_btarget);
-
-        if (unlikely(cs->singlestep_enabled)) {
-            gen_raise_exception(dc, EXCP_DEBUG);
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
+        tcg_gen_lookup_and_goto_ptr();
         return;
 
     default:
@@ -1864,7 +1843,6 @@ static const TranslatorOps mb_tr_ops = {
     .init_disas_context = mb_tr_init_disas_context,
     .tb_start           = mb_tr_tb_start,
     .insn_start         = mb_tr_insn_start,
-    .breakpoint_check   = mb_tr_breakpoint_check,
     .translate_insn     = mb_tr_translate_insn,
     .tb_stop            = mb_tr_tb_stop,
     .disas_log          = mb_tr_disas_log,
diff --git a/target/mips/Kconfig b/target/mips/Kconfig
new file mode 100644
index 00000000000..6adf1453548
--- /dev/null
+++ b/target/mips/Kconfig
@@ -0,0 +1,6 @@
+config MIPS
+    bool
+
+config MIPS64
+    bool
+    select MIPS
diff --git a/target/mips/cpu-defs.c.inc b/target/mips/cpu-defs.c.inc
index a7845b59337..595df727214 100644
--- a/target/mips/cpu-defs.c.inc
+++ b/target/mips/cpu-defs.c.inc
@@ -369,7 +369,6 @@ const mips_def_t mips_defs[] =
          * Config3: VZ, CTXTC, CDMM, TL
          * Config4: MMUExtDef
          * Config5: MRP
-         * FIR(FCR0): Has2008
          * */
         .name = "P5600",
         .CP0_PRid = 0x0001A800,
@@ -919,7 +918,7 @@ const mips_def_t mips_defs[] =
         .mmu_type = MMU_TYPE_R4000,
     },
     {
-        .name = "Loongson-3A1000",
+        .name = "Loongson-3A1000", /* Loongson-3A R1, GS464-based */
         .CP0_PRid = 0x6305,
         /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
         .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
@@ -942,14 +941,14 @@ const mips_def_t mips_defs[] =
                     (0x1 << FCR0_D) | (0x1 << FCR0_S),
         .CP1_fcr31 = 0,
         .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
-        .SEGBITS = 42,
+        .SEGBITS = 48,
         .PABITS = 48,
         .insn_flags = CPU_MIPS64R2 | INSN_LOONGSON3A |
                       ASE_LMMI | ASE_LEXT,
         .mmu_type = MMU_TYPE_R4000,
     },
     {
-        .name = "Loongson-3A4000", /* GS464V-based */
+        .name = "Loongson-3A4000", /* Loongson-3A R4, GS464V-based */
         .CP0_PRid = 0x14C000,
         /* 64KB I-cache and d-cache. 4 way with 32 bit cache line size.  */
         .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
@@ -1000,6 +999,7 @@ const mips_def_t mips_defs[] =
                     (0x1 << FCR0_D) | (0x1 << FCR0_S),
         .CP1_fcr31 = 0,
         .CP1_fcr31_rw_bitmask = 0xFF83FFFF,
+        .MSAIR = (0x01 << MSAIR_ProcID) | (0x40 << MSAIR_Rev),
         .SEGBITS = 48,
         .PABITS = 48,
         .insn_flags = CPU_MIPS64R2 | INSN_LOONGSON3A |
diff --git a/target/mips/cpu-qom.h b/target/mips/cpu-qom.h
index 826ab130193..dda0c911fa7 100644
--- a/target/mips/cpu-qom.h
+++ b/target/mips/cpu-qom.h
@@ -47,6 +47,9 @@ struct MIPSCPUClass {
     DeviceRealize parent_realize;
     DeviceReset parent_reset;
     const struct mips_def_t *cpu_def;
+
+    /* Used for the jazz board to modify mips_cpu_do_transaction_failed. */
+    bool no_data_aborts;
 };
 
 
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index 2ee34c5dcef..1252b45f31d 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -40,155 +40,84 @@
 #include "qapi/qapi-commands-machine-target.h"
 #include "fpu_helper.h"
 
-#if !defined(CONFIG_USER_ONLY)
+const char regnames[32][3] = {
+    "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+    "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+    "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
+};
 
-/* Called for updates to CP0_Status.  */
-void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
+static void fpu_dump_fpr(fpr_t *fpr, FILE *f, bool is_fpu64)
 {
-    int32_t tcstatus, *tcst;
-    uint32_t v = cpu->CP0_Status;
-    uint32_t cu, mx, asid, ksu;
-    uint32_t mask = ((1 << CP0TCSt_TCU3)
-                       | (1 << CP0TCSt_TCU2)
-                       | (1 << CP0TCSt_TCU1)
-                       | (1 << CP0TCSt_TCU0)
-                       | (1 << CP0TCSt_TMX)
-                       | (3 << CP0TCSt_TKSU)
-                       | (0xff << CP0TCSt_TASID));
-
-    cu = (v >> CP0St_CU0) & 0xf;
-    mx = (v >> CP0St_MX) & 0x1;
-    ksu = (v >> CP0St_KSU) & 0x3;
-    asid = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-
-    tcstatus = cu << CP0TCSt_TCU0;
-    tcstatus |= mx << CP0TCSt_TMX;
-    tcstatus |= ksu << CP0TCSt_TKSU;
-    tcstatus |= asid;
-
-    if (tc == cpu->current_tc) {
-        tcst = &cpu->active_tc.CP0_TCStatus;
+    if (is_fpu64) {
+        qemu_fprintf(f, "w:%08x d:%016" PRIx64 " fd:%13g fs:%13g psu: %13g\n",
+                     fpr->w[FP_ENDIAN_IDX], fpr->d,
+                     (double)fpr->fd,
+                     (double)fpr->fs[FP_ENDIAN_IDX],
+                     (double)fpr->fs[!FP_ENDIAN_IDX]);
     } else {
-        tcst = &cpu->tcs[tc].CP0_TCStatus;
-    }
+        fpr_t tmp;
 
-    *tcst &= ~mask;
-    *tcst |= tcstatus;
-    compute_hflags(cpu);
+        tmp.w[FP_ENDIAN_IDX] = fpr->w[FP_ENDIAN_IDX];
+        tmp.w[!FP_ENDIAN_IDX] = (fpr + 1)->w[FP_ENDIAN_IDX];
+        qemu_fprintf(f, "w:%08x d:%016" PRIx64 " fd:%13g fs:%13g psu:%13g\n",
+                     tmp.w[FP_ENDIAN_IDX], tmp.d,
+                     (double)tmp.fd,
+                     (double)tmp.fs[FP_ENDIAN_IDX],
+                     (double)tmp.fs[!FP_ENDIAN_IDX]);
+    }
 }
 
-void cpu_mips_store_status(CPUMIPSState *env, target_ulong val)
+static void fpu_dump_state(CPUMIPSState *env, FILE *f, int flags)
 {
-    uint32_t mask = env->CP0_Status_rw_bitmask;
-    target_ulong old = env->CP0_Status;
-
-    if (env->insn_flags & ISA_MIPS_R6) {
-        bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
-#if defined(TARGET_MIPS64)
-        uint32_t ksux = (1 << CP0St_KX) & val;
-        ksux |= (ksux >> 1) & val; /* KX = 0 forces SX to be 0 */
-        ksux |= (ksux >> 1) & val; /* SX = 0 forces UX to be 0 */
-        val = (val & ~(7 << CP0St_UX)) | ksux;
-#endif
-        if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) {
-            mask &= ~(3 << CP0St_KSU);
-        }
-        mask &= ~(((1 << CP0St_SR) | (1 << CP0St_NMI)) & val);
-    }
+    int i;
+    bool is_fpu64 = !!(env->hflags & MIPS_HFLAG_F64);
 
-    env->CP0_Status = (old & ~mask) | (val & mask);
-#if defined(TARGET_MIPS64)
-    if ((env->CP0_Status ^ old) & (old & (7 << CP0St_UX))) {
-        /* Access to at least one of the 64-bit segments has been disabled */
-        tlb_flush(env_cpu(env));
-    }
-#endif
-    if (ase_mt_available(env)) {
-        sync_c0_status(env, env, env->current_tc);
-    } else {
-        compute_hflags(env);
+    qemu_fprintf(f,
+                 "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%02x\n",
+                 env->active_fpu.fcr0, env->active_fpu.fcr31, is_fpu64,
+                 get_float_exception_flags(&env->active_fpu.fp_status));
+    for (i = 0; i < 32; (is_fpu64) ? i++ : (i += 2)) {
+        qemu_fprintf(f, "%3s: ", fregnames[i]);
+        fpu_dump_fpr(&env->active_fpu.fpr[i], f, is_fpu64);
     }
 }
 
-void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val)
+static void mips_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
-    uint32_t mask = 0x00C00300;
-    uint32_t old = env->CP0_Cause;
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
     int i;
 
-    if (env->insn_flags & ISA_MIPS_R2) {
-        mask |= 1 << CP0Ca_DC;
-    }
-    if (env->insn_flags & ISA_MIPS_R6) {
-        mask &= ~((1 << CP0Ca_WP) & val);
-    }
-
-    env->CP0_Cause = (env->CP0_Cause & ~mask) | (val & mask);
-
-    if ((old ^ env->CP0_Cause) & (1 << CP0Ca_DC)) {
-        if (env->CP0_Cause & (1 << CP0Ca_DC)) {
-            cpu_mips_stop_count(env);
-        } else {
-            cpu_mips_start_count(env);
+    qemu_fprintf(f, "pc=0x" TARGET_FMT_lx " HI=0x" TARGET_FMT_lx
+                 " LO=0x" TARGET_FMT_lx " ds %04x "
+                 TARGET_FMT_lx " " TARGET_FMT_ld "\n",
+                 PC_ADDR(env), env->active_tc.HI[0], env->active_tc.LO[0],
+                 env->hflags, env->btarget, env->bcond);
+    for (i = 0; i < 32; i++) {
+        if ((i & 3) == 0) {
+            qemu_fprintf(f, "GPR%02d:", i);
         }
-    }
-
-    /* Set/reset software interrupts */
-    for (i = 0 ; i < 2 ; i++) {
-        if ((old ^ env->CP0_Cause) & (1 << (CP0Ca_IP + i))) {
-            cpu_mips_soft_irq(env, i, env->CP0_Cause & (1 << (CP0Ca_IP + i)));
+        qemu_fprintf(f, " %s " TARGET_FMT_lx,
+                     regnames[i], env->active_tc.gpr[i]);
+        if ((i & 3) == 3) {
+            qemu_fprintf(f, "\n");
         }
     }
-}
-
-#endif /* !CONFIG_USER_ONLY */
-
-static const char * const excp_names[EXCP_LAST + 1] = {
-    [EXCP_RESET] = "reset",
-    [EXCP_SRESET] = "soft reset",
-    [EXCP_DSS] = "debug single step",
-    [EXCP_DINT] = "debug interrupt",
-    [EXCP_NMI] = "non-maskable interrupt",
-    [EXCP_MCHECK] = "machine check",
-    [EXCP_EXT_INTERRUPT] = "interrupt",
-    [EXCP_DFWATCH] = "deferred watchpoint",
-    [EXCP_DIB] = "debug instruction breakpoint",
-    [EXCP_IWATCH] = "instruction fetch watchpoint",
-    [EXCP_AdEL] = "address error load",
-    [EXCP_AdES] = "address error store",
-    [EXCP_TLBF] = "TLB refill",
-    [EXCP_IBE] = "instruction bus error",
-    [EXCP_DBp] = "debug breakpoint",
-    [EXCP_SYSCALL] = "syscall",
-    [EXCP_BREAK] = "break",
-    [EXCP_CpU] = "coprocessor unusable",
-    [EXCP_RI] = "reserved instruction",
-    [EXCP_OVERFLOW] = "arithmetic overflow",
-    [EXCP_TRAP] = "trap",
-    [EXCP_FPE] = "floating point",
-    [EXCP_DDBS] = "debug data break store",
-    [EXCP_DWATCH] = "data watchpoint",
-    [EXCP_LTLBL] = "TLB modify",
-    [EXCP_TLBL] = "TLB load",
-    [EXCP_TLBS] = "TLB store",
-    [EXCP_DBE] = "data bus error",
-    [EXCP_DDBL] = "debug data break load",
-    [EXCP_THREAD] = "thread",
-    [EXCP_MDMX] = "MDMX",
-    [EXCP_C2E] = "precise coprocessor 2",
-    [EXCP_CACHE] = "cache error",
-    [EXCP_TLBXI] = "TLB execute-inhibit",
-    [EXCP_TLBRI] = "TLB read-inhibit",
-    [EXCP_MSADIS] = "MSA disabled",
-    [EXCP_MSAFPE] = "MSA floating point",
-};
 
-const char *mips_exception_name(int32_t exception)
-{
-    if (exception < 0 || exception > EXCP_LAST) {
-        return "unknown";
+    qemu_fprintf(f, "CP0 Status  0x%08x Cause   0x%08x EPC    0x"
+                 TARGET_FMT_lx "\n",
+                 env->CP0_Status, env->CP0_Cause, get_CP0_EPC(env));
+    qemu_fprintf(f, "    Config0 0x%08x Config1 0x%08x LLAddr 0x%016"
+                 PRIx64 "\n",
+                 env->CP0_Config0, env->CP0_Config1, env->CP0_LLAddr);
+    qemu_fprintf(f, "    Config2 0x%08x Config3 0x%08x\n",
+                 env->CP0_Config2, env->CP0_Config3);
+    qemu_fprintf(f, "    Config4 0x%08x Config5 0x%08x\n",
+                 env->CP0_Config4, env->CP0_Config5);
+    if ((flags & CPU_DUMP_FPU) && (env->hflags & MIPS_HFLAG_FPU)) {
+        fpu_dump_state(env, f, flags);
     }
-    return excp_names[exception];
 }
 
 void cpu_set_exception_base(int vp_index, target_ulong address)
@@ -197,197 +126,6 @@ void cpu_set_exception_base(int vp_index, target_ulong address)
     vp->env.exception_base = address;
 }
 
-target_ulong exception_resume_pc(CPUMIPSState *env)
-{
-    target_ulong bad_pc;
-    target_ulong isa_mode;
-
-    isa_mode = !!(env->hflags & MIPS_HFLAG_M16);
-    bad_pc = PC_ADDR(env) | isa_mode;
-    if (env->hflags & MIPS_HFLAG_BMASK) {
-        /*
-         * If the exception was raised from a delay slot, come back to
-         * the jump.
-         */
-        bad_pc -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
-    }
-
-    return bad_pc;
-}
-
-bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-    if (interrupt_request & CPU_INTERRUPT_HARD) {
-        MIPSCPU *cpu = MIPS_CPU(cs);
-        CPUMIPSState *env = &cpu->env;
-
-        if (cpu_mips_hw_interrupts_enabled(env) &&
-            cpu_mips_hw_interrupts_pending(env)) {
-            /* Raise it */
-            cs->exception_index = EXCP_EXT_INTERRUPT;
-            env->error_code = 0;
-            mips_cpu_do_interrupt(cs);
-            return true;
-        }
-    }
-    return false;
-}
-
-void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
-                                          MipsExcp exception,
-                                          int error_code,
-                                          uintptr_t pc)
-{
-    CPUState *cs = env_cpu(env);
-
-#ifdef TARGET_CHERI
-    // Translate CP0 Unusable to CP2 ASR fault if we are in kernel mode and
-    // PCC is missing ASR:
-    if (exception == EXCP_CpU && error_code == 0 && in_kernel_mode(env)) {
-        if (!cheri_have_access_sysregs(env)) {
-            do_raise_c2_exception_noreg(env, CapEx_AccessSystemRegsViolation, pc);
-        }
-    }
-#endif
-    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT)) {
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_INT, "%s: %d (%s) %d\n",
-                                   __func__, exception,
-                                   mips_exception_name(exception), error_code);
-    }
-    cs->exception_index = exception;
-    env->error_code = error_code;
-
-    cpu_loop_exit_restore(cs, pc);
-}
-
-#ifndef TARGET_MIPS64
-const char mips_gp_regnames[32][5] = {
-    {"zero"}, {"at"},   {"v0"},   {"v1"},   {"a0"},   {"a1"},   {"a2"},   {"a3"},
-    {"t0"},   {"t1"},   {"t2"},   {"t3"},   {"t4"},   {"t5"},   {"t6"},   {"t7"},
-    {"s0"},   {"s1"},   {"s2"},   {"s3"},   {"s4"},   {"s5"},   {"s6"},   {"s7"},
-    {"t8"},   {"t9"},   {"k0"},   {"k1"},   {"gp"},   {"sp"},   {"s8"},   {"ra"
-};
-#else
-// Use n64 register names
-const char mips_gp_regnames[32][5] = {
-    {"zero"}, {"at"},   {"v0"},   {"v1"},   {"a0"},   {"a1"},   {"a2"},   {"a3"},
-    {"a4"},   {"a5"},   {"a6"},   {"a7"},   {"t0"},   {"t1"},   {"t2"},   {"t3"},
-    {"s0"},   {"s1"},   {"s2"},   {"s3"},   {"s4"},   {"s5"},   {"s6"},   {"s7"},
-    {"t8"},   {"t9"},   {"k0"},   {"k1"},   {"gp"},   {"sp"},   {"s8"},   {"ra"},
-};
-#endif
-
-const char mips_regnames_HI[4][4] = {
-    {"HI0"}, {"HI1"}, {"HI2"}, {"HI3"},
-};
-
-const char mips_regnames_LO[4][4] = {
-    {"LO0"}, {"LO1"}, {"LO2"}, {"LO3"},
-};
-
-const char mips_fregnames[32][4] = {
-    {"f0"},  {"f1"},  {"f2"},  {"f3"},  {"f4"},  {"f5"},  {"f6"},  {"f7"},
-    {"f8"},  {"f9"},  {"f10"}, {"f11"}, {"f12"}, {"f13"}, {"f14"}, {"f15"},
-    {"f16"}, {"f17"}, {"f18"}, {"f19"}, {"f20"}, {"f21"}, {"f22"}, {"f23"},
-    {"f24"}, {"f25"}, {"f26"}, {"f27"}, {"f28"}, {"f29"}, {"f30"}, {"f31"},
-};
-
-const char mips_msaregnames[64][7] = {
-    {"w0.d0"},  {"w0.d1"},  {"w1.d0"},  {"w1.d1"},
-    {"w2.d0"},  {"w2.d1"},  {"w3.d0"},  {"w3.d1"},
-    {"w4.d0"},  {"w4.d1"},  {"w5.d0"},  {"w5.d1"},
-    {"w6.d0"},  {"w6.d1"},  {"w7.d0"},  {"w7.d1"},
-    {"w8.d0"},  {"w8.d1"},  {"w9.d0"},  {"w9.d1"},
-    {"w10.d0"}, {"w10.d1"}, {"w11.d0"}, {"w11.d1"},
-    {"w12.d0"}, {"w12.d1"}, {"w13.d0"}, {"w13.d1"},
-    {"w14.d0"}, {"w14.d1"}, {"w15.d0"}, {"w15.d1"},
-    {"w16.d0"}, {"w16.d1"}, {"w17.d0"}, {"w17.d1"},
-    {"w18.d0"}, {"w18.d1"}, {"w19.d0"}, {"w19.d1"},
-    {"w20.d0"}, {"w20.d1"}, {"w21.d0"}, {"w21.d1"},
-    {"w22.d0"}, {"w22.d1"}, {"w23.d0"}, {"w23.d1"},
-    {"w24.d0"}, {"w24.d1"}, {"w25.d0"}, {"w25.d1"},
-    {"w26.d0"}, {"w26.d1"}, {"w27.d0"}, {"w27.d1"},
-    {"w28.d0"}, {"w28.d1"}, {"w29.d0"}, {"w29.d1"},
-    {"w30.d0"}, {"w30.d1"}, {"w31.d0"}, {"w31.d1"},
-};
-
-#if !defined(TARGET_MIPS64)
-const char * const mips_mxuregnames[16][7] = {
-    {"XR1"},  {"XR2"},  {"XR3"},  {"XR4"},  {"XR5"},  {"XR6"},  {"XR7"},  {"XR8"},
-    {"XR9"},  {"XR10"}, {"XR11"}, {"XR12"}, {"XR13"}, {"XR14"}, {"XR15"}, {"MXU_CR"},
-};
-#endif
-
-/*
- * Names of coprocessor 0 registers.
- */
-const char mips_cop0_regnames[32*8][32] = {
-/*0*/   {"Index"},        {"MVPControl"},   {"MVPConf0"},     {"MVPConf1"},
-        {0},              {0},              {0},              {0},
-/*1*/   {"Random"},       {"VPEControl"},   {"VPEConf0"},     {"VPEConf1"},
-        {"YQMask"},       {"VPESchedule"},  {"VPEScheFBack"}, {"VPEOpt"},
-/*2*/   {"EntryLo0"},     {"TCStatus"},     {"TCBind"},       {"TCRestart"},
-        {"TCHalt"},       {"TCContext"},    {"TCSchedule"},   {"TCScheFBack"},
-/*3*/   {"EntryLo1"},     {0},              {0},              {0},
-        {0},              {0},              {0},              {"TCOpt"},
-/*4*/   {"Context"},      {"ContextConfig"}, {"UserLocal"},    {"XContextConfig"},
-        {0},              {0},              {0},              {0},
-/*5*/   {"PageMask"},     {"PageGrain"},    {"SegCtl0"},      {"SegCtl1"},
-        {"SegCtl2"},      {0},              {0},              {0},
-/*6*/   {"Wired"},        {"SRSConf0"},     {"SRSConf1"},     {"SRSConf2"},
-        {"SRSConf3"},     {"SRSConf4"},     {0},              {0},
-/*7*/   {"HWREna"},       {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*8*/   {"BadVAddr"},     {"BadInstr"},     {"BadInstrP"},    {0},
-        {0},              {0},              {0},              {0},
-/*9*/   {"Count"},        {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*10*/  {"EntryHi"},      {0},              {0},              {0},
-        {0},              {"MSAAccess"},    {"MSASave"},      {"MSARequest"},
-/*11*/  {"Compare"},      {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*12*/  {"Status"},       {"IntCtl"},       {"SRSCtl"},       {"SRSMap"},
-        {"ViewIPL"},      {"SRSMap2"},      {0},              {0},
-/*13*/  {"Cause"},        {0},              {0},              {0},
-        {"ViewRIPL"},     {"NestedExc"},    {0},              {0},
-/*14*/  {"EPC"},          {0},              {"NestedEPC"},    {0},
-        {0},              {0},              {0},              {0},
-/*15*/  {"PRId"},         {"EBase"},        {"CDMMBase"},     {"CMGCRBase"},
-        {0},              {0},              {0},              {0},
-/*16*/  {"Config"},       {"Config1"},      {"Config2"},      {"Config3"},
-        {"Config4"},      {"Config5"},      {"Config6"},      {"Config7"},
-/*17*/  {"LLAddr"},       {"internal_lladdr (virtual)"}, {"internal_llval"}, {0},
-        {0},              {0},              {0},              {0},
-/*18*/  {"WatchLo"},      {"WatchLo1"},     {"WatchLo2"},     {"WatchLo3"},
-        {"WatchLo4"},     {"WatchLo5"},     {"WatchLo6"},     {"WatchLo7"},
-/*19*/  {"WatchHi"},      {"WatchHi1"},     {"WatchHi2"},     {"WatchHi3"},
-        {"WatchHi4"},     {"WatchHi5"},     {"WatchHi6"},     {"WatchHi7"},
-/*20*/  {"XContext"},     {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*21*/  {0},              {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*22*/  {0},              {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*23*/  {"Debug"},        {"TraceControl"}, {"TraceControl2"}, {"UserTraceData"},
-        {"TraceIBPC"},    {"TraceDBPC"},    {"Debug2"},       {0},
-/*24*/  {"DEPC"},         {0},              {"TraceControl3"}, {"UserTraceData2"},
-        {0},              {0},              {0},              {0},
-/*25*/  {"PerfCnt"},      {"PerfCnt1"},     {"PerfCnt2"},     {"PerfCnt3"},
-        {"PerfCnt4"},     {"PerfCnt5"},     {"PerfCnt6"},     {"PerfCnt7"},
-/*26*/  {"ErrCtl"},       {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*27*/  {"CacheErr"},     {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*28*/  {"ITagLo"},       {"IDataLo"},      {"DTagLo"},       {"DDataLo"},
-        {"L23TagLo"},     {"L23DataLo"},    {0},              {0},
-/*29*/  {"ITagHi"},       {"IDataHi"},      {"DTagHi"},       {0},
-        {0},              {"L23DataHi"},    {0},              {0},
-/*30*/  {"ErrorEPC"},     {0},              {0},              {0},
-        {0},              {0},              {0},              {0},
-/*31*/  {"DESAVE"},       {0},              {"KScratch1"},    {"KScratch2"},
-        {"KScratch3"},    {"KScratch4"},    {"KScratch5"},    {"KScratch6"},
-};
-
 #ifdef TARGET_CHERI
 const char cheri_gp_regnames[32][4] = {
     {"C00"}, {"C01"}, {"C02"}, {"C03"}, {"C04"}, {"C05"}, {"C06"}, {"C07"},
@@ -417,57 +155,10 @@ const char * const mips_cpu_mode_names[QEMU_LOG_INSTR_CPU_MODE_MAX] = {
 static void mips_cpu_set_pc(CPUState *cs, vaddr value)
 {
     MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
 
-    mips_update_pc(env, value & ~(target_ulong)1, /*can_be_unrepresentable=*/false);
-    if (value & 1) {
-        env->hflags |= MIPS_HFLAG_M16;
-    } else {
-        env->hflags &= ~(MIPS_HFLAG_M16);
-    }
+    mips_env_set_pc(&cpu->env, value);
 }
 
-#ifdef CONFIG_TCG
-static void mips_cpu_synchronize_from_tb(CPUState *cs,
-                                         const TranslationBlock *tb)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-
-    mips_update_pc(env, tb->pc, /*can_be_unrepresentable=*/false);
-    env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
-
-    /*
-     * Break any load-link that's in flight on this CPU since we've been
-     * preempted.  This is sufficiently rare that it shouldn't hurt to do it
-     * every preemption.  Of course, it's also only really safe to use ->lladdr
-     * for capability work at all because we're forcing MTTCG off for CHERI due
-     * to its tag table implementation.  But this doesn't make it any worse!
-     *
-     * See also target/mips/op_helper:/helper_eret
-     */
-    env->lladdr = 1;
-}
-
-# ifndef CONFIG_USER_ONLY
-static bool mips_io_recompile_replay_branch(CPUState *cs,
-                                            const TranslationBlock *tb)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-
-    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && PC_ADDR(env) != tb->pc) {
-        mips_update_pc(env, PC_ADDR(env) - (env->hflags & MIPS_HFLAG_B16 ? 2 : 4),
-                       /*can_be_unrepresentable=*/false);
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-        return true;
-    }
-    return false;
-}
-# endif /* !CONFIG_USER_ONLY */
-#endif /* CONFIG_TCG */
-
 static bool mips_cpu_has_work(CPUState *cs)
 {
     MIPSCPU *cpu = MIPS_CPU(cs);
@@ -893,7 +584,7 @@ static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
 
     env->exception_base = (int32_t)0xBFC00000;
 
-#ifndef CONFIG_USER_ONLY
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
     mmu_init(env, env->cpu_model);
 #endif
     fpu_init(env, env->cpu_model);
@@ -973,19 +664,28 @@ static Property mips_cpu_properties[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps mips_sysemu_ops = {
+    .get_phys_page_debug = mips_cpu_get_phys_page_debug,
+    .legacy_vmsd = &vmstate_mips_cpu,
+};
+#endif
+
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
 /*
  * NB: cannot be const, as some elements are changed for specific
  * mips hardware (see hw/mips/jazz.c).
  */
-static struct TCGCPUOps mips_tcg_ops = {
+static const struct TCGCPUOps mips_tcg_ops = {
     .initialize = mips_tcg_init,
     .synchronize_from_tb = mips_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = mips_cpu_exec_interrupt,
-    .tlb_fill = mips_cpu_tlb_fill,
 
 #if !defined(CONFIG_USER_ONLY)
+    .tlb_fill = mips_cpu_tlb_fill,
+    .cpu_exec_interrupt = mips_cpu_exec_interrupt,
     .do_interrupt = mips_cpu_do_interrupt,
     .do_transaction_failed = mips_cpu_do_transaction_failed,
     .do_unaligned_access = mips_cpu_do_unaligned_access,
@@ -1012,8 +712,7 @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
     cc->gdb_read_register = mips_cpu_gdb_read_register;
     cc->gdb_write_register = mips_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = mips_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_mips_cpu;
+    cc->sysemu_ops = &mips_sysemu_ops;
 #endif
     cc->disas_set_info = mips_cpu_disas_set_info;
 #if defined(TARGET_MIPS64)
@@ -1027,7 +726,6 @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
     cc->tcg_ops = &mips_tcg_ops;
 #endif /* CONFIG_TCG */
 #if defined(TARGET_CHERI)
-    cc->dump_statistics = cheri_cpu_dump_statistics;
     start_ns = get_clock();
     atexit(dump_cpu_ips_on_exit);
 #if defined(DO_CHERI_STATISTICS)
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index f337bdbf552..43eb416611f 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -6,7 +6,6 @@
 #include "fpu/softfloat-types.h"
 #include "hw/clock.h"
 #include "mips-defs.h"
-#include "exec/log.h"
 
 #ifdef TARGET_CHERI
 #include "cheri_defs.h"
@@ -152,6 +151,9 @@ struct cheri_cap_hwregs {
     cap_register_t EPCC; /* CapHwr 31 */
 };
 
+/* Needed for cheri-common logging */
+extern const char cheri_gp_regnames[32][4];
+
 #endif /* TARGET_CHERI */
 
 
@@ -1286,14 +1288,13 @@ struct CPUMIPSState {
     CPUMIPSMVPContext *mvp;
 #if !defined(CONFIG_USER_ONLY)
     CPUMIPSTLBContext *tlb;
-#endif
-
-    const mips_def_t *cpu_model;
     void *irq[8];
-    QEMUTimer *timer; /* Internal timer */
     struct MIPSITUState *itu;
     MemoryRegion *itc_tag; /* ITC Configuration Tags */
+#endif
 
+    const mips_def_t *cpu_model;
+    QEMUTimer *timer; /* Internal timer */
     target_ulong exception_base; /* ExceptionBase input to the core */
     uint64_t cp0_count_ns; /* CP0_Count clock period (in nanoseconds) */
 };
@@ -1325,23 +1326,8 @@ struct MIPSCPU {
     unsigned cp0_count_rate;
 };
 
-/* Register names for logging output */
-extern const char mips_gp_regnames[32][5];
-extern const char mips_regnames_HI[4][4];
-extern const char mips_regnames_LO[4][4];
-extern const char mips_fregnames[32][4];
-extern const char mips_msaregnames[64][7];
-extern const char mips_mxuregnames[16][7];
-extern const char mips_cop0_regnames[32*8][32];
-#ifdef TARGET_CHERI
-/* Needed for cheri-common logging */
-extern const char cheri_gp_regnames[32][4];
-extern const char mips_cheri_hw_regnames[32][10];
-#endif
-
 void mips_cpu_list(void);
 
-#define cpu_signal_handler cpu_mips_signal_handler
 #define cpu_list mips_cpu_list
 
 extern void cpu_wrdsp(uint32_t rs, uint32_t mask_num, CPUMIPSState *env);
@@ -1426,8 +1412,6 @@ typedef enum {
  */
 #define CPU_INTERRUPT_WAKE CPU_INTERRUPT_TGT_INT_0
 
-int cpu_mips_signal_handler(int host_signum, void *pinfo, void *puc);
-
 #define MIPS_CPU_TYPE_SUFFIX "-" TYPE_MIPS_CPU
 #define MIPS_CPU_TYPE_NAME(model) model MIPS_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_MIPS_CPU
@@ -1466,32 +1450,36 @@ uint64_t cpu_mips_phys_to_kseg1(void *opaque, uint64_t addr);
 bool mips_um_ksegs_enabled(void);
 void mips_um_ksegs_enable(void);
 
+#if !defined(CONFIG_USER_ONLY)
+
 /* mips_int.c */
 void cpu_mips_soft_irq(CPUMIPSState *env, int irq, int level);
 
 /* mips_itu.c */
 void itc_reconfigure(struct MIPSITUState *tag);
 
+#endif /* !CONFIG_USER_ONLY */
+
 /* helper.c */
 target_ulong exception_resume_pc(CPUMIPSState *env);
 
 static inline void
 mips_cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
-                          target_ulong *cs_base, target_ulong *cs_top,
-                          uint32_t *cheri_flags, uint32_t *flags)
+                          target_ulong *cs_base, target_ulong *pcc_base,
+                          target_ulong *pcc_top, uint32_t *cheri_flags,
+                          uint32_t *flags)
 {
     *pc = PC_ADDR(env); // We want the full virtual address here (no offset)
+    *cs_base = 0;
     *flags = env->hflags &
              (MIPS_HFLAG_TMASK | MIPS_HFLAG_BMASK | MIPS_HFLAG_HWRENA_ULR);
 #ifdef TARGET_CHERI
     cheri_cpu_get_tb_cpu_state(&env->active_tc.PCC, &env->active_tc.CHWR.DDC,
-                               cs_base, cs_top, cheri_flags);
-#else
-    *cs_base = 0;
+                               pcc_base, pcc_top, cheri_flags);
 #endif
 }
 // Ugly macro hack to avoid having to modify cpu_get_tb_cpu_state in all targets
-#define cpu_get_tb_cpu_state_6 mips_cpu_get_tb_cpu_state
+#define cpu_get_tb_cpu_state_ext mips_cpu_get_tb_cpu_state
 
 static inline bool should_use_error_epc(CPUMIPSState *env)
 {
diff --git a/target/mips/fpu.c b/target/mips/fpu.c
new file mode 100644
index 00000000000..c7c487c1f9f
--- /dev/null
+++ b/target/mips/fpu.c
@@ -0,0 +1,25 @@
+/*
+ * Helpers for emulation of FPU-related MIPS instructions.
+ *
+ *  Copyright (C) 2004-2005  Jocelyn Mayer
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+#include "qemu/osdep.h"
+#include "fpu/softfloat-helpers.h"
+#include "fpu_helper.h"
+
+/* convert MIPS rounding mode in FCR31 to IEEE library */
+const FloatRoundMode ieee_rm[4] = {
+    float_round_nearest_even,
+    float_round_to_zero,
+    float_round_up,
+    float_round_down
+};
+
+const char fregnames[32][4] = {
+    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+    "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+};
diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
deleted file mode 100644
index 6dd853259e2..00000000000
--- a/target/mips/fpu_helper.c
+++ /dev/null
@@ -1,2262 +0,0 @@
-/*
- *  Helpers for emulation of FPU-related MIPS instructions.
- *
- *  Copyright (C) 2004-2005  Jocelyn Mayer
- *  Copyright (C) 2020  Wave Computing, Inc.
- *  Copyright (C) 2020  Aleksandar Markovic <amarkovic@wavecomp.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "fpu/softfloat.h"
-#include "fpu_helper.h"
-
-
-/* Complex FPU operations which may need stack space. */
-
-#define FLOAT_TWO32 make_float32(1 << 30)
-#define FLOAT_TWO64 make_float64(1ULL << 62)
-
-#define FP_TO_INT32_OVERFLOW 0x7fffffff
-#define FP_TO_INT64_OVERFLOW 0x7fffffffffffffffULL
-
-/* convert MIPS rounding mode in FCR31 to IEEE library */
-const FloatRoundMode ieee_rm[4] = {
-    float_round_nearest_even,
-    float_round_to_zero,
-    float_round_up,
-    float_round_down
-};
-
-target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
-{
-    target_ulong arg1 = 0;
-
-    switch (reg) {
-    case 0:
-        arg1 = (int32_t)env->active_fpu.fcr0;
-        break;
-    case 1:
-        /* UFR Support - Read Status FR */
-        if (env->active_fpu.fcr0 & (1 << FCR0_UFRP)) {
-            if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
-                arg1 = (int32_t)
-                       ((env->CP0_Status & (1  << CP0St_FR)) >> CP0St_FR);
-            } else {
-                do_raise_exception(env, EXCP_RI, GETPC());
-            }
-        }
-        break;
-    case 5:
-        /* FRE Support - read Config5.FRE bit */
-        if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
-            if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
-                arg1 = (env->CP0_Config5 >> CP0C5_FRE) & 1;
-            } else {
-                helper_raise_exception(env, EXCP_RI);
-            }
-        }
-        break;
-    case 25:
-        arg1 = ((env->active_fpu.fcr31 >> 24) & 0xfe) |
-               ((env->active_fpu.fcr31 >> 23) & 0x1);
-        break;
-    case 26:
-        arg1 = env->active_fpu.fcr31 & 0x0003f07c;
-        break;
-    case 28:
-        arg1 = (env->active_fpu.fcr31 & 0x00000f83) |
-               ((env->active_fpu.fcr31 >> 22) & 0x4);
-        break;
-    default:
-        arg1 = (int32_t)env->active_fpu.fcr31;
-        break;
-    }
-
-    return arg1;
-}
-
-void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt)
-{
-    switch (fs) {
-    case 1:
-        /* UFR Alias - Reset Status FR */
-        if (!((env->active_fpu.fcr0 & (1 << FCR0_UFRP)) && (rt == 0))) {
-            return;
-        }
-        if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
-            env->CP0_Status &= ~(1 << CP0St_FR);
-            compute_hflags(env);
-        } else {
-            do_raise_exception(env, EXCP_RI, GETPC());
-        }
-        break;
-    case 4:
-        /* UNFR Alias - Set Status FR */
-        if (!((env->active_fpu.fcr0 & (1 << FCR0_UFRP)) && (rt == 0))) {
-            return;
-        }
-        if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
-            env->CP0_Status |= (1 << CP0St_FR);
-            compute_hflags(env);
-        } else {
-            do_raise_exception(env, EXCP_RI, GETPC());
-        }
-        break;
-    case 5:
-        /* FRE Support - clear Config5.FRE bit */
-        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
-            return;
-        }
-        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
-            env->CP0_Config5 &= ~(1 << CP0C5_FRE);
-            compute_hflags(env);
-        } else {
-            helper_raise_exception(env, EXCP_RI);
-        }
-        break;
-    case 6:
-        /* FRE Support - set Config5.FRE bit */
-        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
-            return;
-        }
-        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
-            env->CP0_Config5 |= (1 << CP0C5_FRE);
-            compute_hflags(env);
-        } else {
-            helper_raise_exception(env, EXCP_RI);
-        }
-        break;
-    case 25:
-        if ((env->insn_flags & ISA_MIPS_R6) || (arg1 & 0xffffff00)) {
-            return;
-        }
-        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0x017fffff) |
-                                ((arg1 & 0xfe) << 24) |
-                                ((arg1 & 0x1) << 23);
-        break;
-    case 26:
-        if (arg1 & 0x007c0000) {
-            return;
-        }
-        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0xfffc0f83) |
-                                (arg1 & 0x0003f07c);
-        break;
-    case 28:
-        if (arg1 & 0x007c0000) {
-            return;
-        }
-        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0xfefff07c) |
-                                (arg1 & 0x00000f83) |
-                                ((arg1 & 0x4) << 22);
-        break;
-    case 31:
-        env->active_fpu.fcr31 = (arg1 & env->active_fpu.fcr31_rw_bitmask) |
-               (env->active_fpu.fcr31 & ~(env->active_fpu.fcr31_rw_bitmask));
-        break;
-    default:
-        if (env->insn_flags & ISA_MIPS_R6) {
-            do_raise_exception(env, EXCP_RI, GETPC());
-        }
-        return;
-    }
-    restore_fp_status(env);
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) &
-        GET_FP_CAUSE(env->active_fpu.fcr31)) {
-        do_raise_exception(env, EXCP_FPE, GETPC());
-    }
-}
-
-static inline int ieee_to_mips_xcpt(int ieee_xcpt)
-{
-    int mips_xcpt = 0;
-
-    if (ieee_xcpt & float_flag_invalid) {
-        mips_xcpt |= FP_INVALID;
-    }
-    if (ieee_xcpt & float_flag_overflow) {
-        mips_xcpt |= FP_OVERFLOW;
-    }
-    if (ieee_xcpt & float_flag_underflow) {
-        mips_xcpt |= FP_UNDERFLOW;
-    }
-    if (ieee_xcpt & float_flag_divbyzero) {
-        mips_xcpt |= FP_DIV0;
-    }
-    if (ieee_xcpt & float_flag_inexact) {
-        mips_xcpt |= FP_INEXACT;
-    }
-
-    return mips_xcpt;
-}
-
-static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc)
-{
-    int ieee_exception_flags = get_float_exception_flags(
-                                   &env->active_fpu.fp_status);
-    int mips_exception_flags = 0;
-
-    if (ieee_exception_flags) {
-        mips_exception_flags = ieee_to_mips_xcpt(ieee_exception_flags);
-    }
-
-    SET_FP_CAUSE(env->active_fpu.fcr31, mips_exception_flags);
-
-    if (mips_exception_flags)  {
-        set_float_exception_flags(0, &env->active_fpu.fp_status);
-
-        if (GET_FP_ENABLE(env->active_fpu.fcr31) & mips_exception_flags) {
-            do_raise_exception(env, EXCP_FPE, pc);
-        } else {
-            UPDATE_FP_FLAGS(env->active_fpu.fcr31, mips_exception_flags);
-        }
-    }
-}
-
-/*
- * Float support.
- * Single precition routines have a "s" suffix, double precision a
- * "d" suffix, 32bit integer "w", 64bit integer "l", paired single "ps",
- * paired single lower "pl", paired single upper "pu".
- */
-
-/* unary operations, modifying fp status  */
-uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    fdt0 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt0;
-}
-
-uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
-{
-    fst0 = float32_sqrt(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t fdt2;
-
-    fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint64_t helper_float_cvt_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_cvt_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
-{
-    uint32_t fst2;
-    uint32_t fsth2;
-
-    fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
-    fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fst2;
-}
-
-uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-    uint32_t wth2;
-    int excp, excph;
-
-    wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
-    excp = get_float_exception_flags(&env->active_fpu.fp_status);
-    if (excp & (float_flag_overflow | float_flag_invalid)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
-    excph = get_float_exception_flags(&env->active_fpu.fp_status);
-    if (excph & (float_flag_overflow | float_flag_invalid)) {
-        wth2 = FP_TO_INT32_OVERFLOW;
-    }
-
-    set_float_exception_flags(excp | excph, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-
-    return ((uint64_t)wth2 << 32) | wt2;
-}
-
-uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t fst2;
-
-    fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
-{
-    uint32_t fst2;
-
-    fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
-{
-    uint32_t fst2;
-
-    fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
-{
-    uint32_t wt2;
-
-    wt2 = wt0;
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
-{
-    uint32_t wt2;
-
-    wt2 = wth0;
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_cvt_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_cvt_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_round_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-                            &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_round_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-                            &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_round_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-                            &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_round_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-                            &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_trunc_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    dt2 = float64_to_int64_round_to_zero(fdt0,
-                                         &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_trunc_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_trunc_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_trunc_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_ceil_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_ceil_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_ceil_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_ceil_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_floor_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_floor_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        dt2 = FP_TO_INT64_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_floor_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_floor_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-        & (float_flag_invalid | float_flag_overflow)) {
-        wt2 = FP_TO_INT32_OVERFLOW;
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_cvt_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_cvt_2008_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_cvt_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_cvt_2008_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_round_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-            &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_round_2008_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-            &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_round_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-            &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_round_2008_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_nearest_even,
-            &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_trunc_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_trunc_2008_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_trunc_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_trunc_2008_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_ceil_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_ceil_2008_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_ceil_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_ceil_2008_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_floor_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint64_t helper_float_floor_2008_l_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint64_t dt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            dt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_floor_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float64_is_any_nan(fdt0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint32_t helper_float_floor_2008_w_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t wt2;
-
-    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
-    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    restore_rounding_mode(env);
-    if (get_float_exception_flags(&env->active_fpu.fp_status)
-            & float_flag_invalid) {
-        if (float32_is_any_nan(fst0)) {
-            wt2 = 0;
-        }
-    }
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-/* unary operations, not modifying fp status  */
-
-uint64_t helper_float_abs_d(uint64_t fdt0)
-{
-   return float64_abs(fdt0);
-}
-
-uint32_t helper_float_abs_s(uint32_t fst0)
-{
-    return float32_abs(fst0);
-}
-
-uint64_t helper_float_abs_ps(uint64_t fdt0)
-{
-    uint32_t wt0;
-    uint32_t wth0;
-
-    wt0 = float32_abs(fdt0 & 0XFFFFFFFF);
-    wth0 = float32_abs(fdt0 >> 32);
-    return ((uint64_t)wth0 << 32) | wt0;
-}
-
-uint64_t helper_float_chs_d(uint64_t fdt0)
-{
-   return float64_chs(fdt0);
-}
-
-uint32_t helper_float_chs_s(uint32_t fst0)
-{
-    return float32_chs(fst0);
-}
-
-uint64_t helper_float_chs_ps(uint64_t fdt0)
-{
-    uint32_t wt0;
-    uint32_t wth0;
-
-    wt0 = float32_chs(fdt0 & 0XFFFFFFFF);
-    wth0 = float32_chs(fdt0 >> 32);
-    return ((uint64_t)wth0 << 32) | wt0;
-}
-
-/* MIPS specific unary operations */
-uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t fst2;
-
-    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t fst2;
-
-    fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
-    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t fst2;
-
-    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t fstl2;
-    uint32_t fsth2;
-
-    fstl2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF,
-                        &env->active_fpu.fp_status);
-    fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint64_t fdt2;
-
-    fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
-{
-    uint32_t fst2;
-
-    fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
-    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
-{
-    uint32_t fstl2;
-    uint32_t fsth2;
-
-    fstl2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
-    fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
-    fstl2 = float32_div(float32_one, fstl2, &env->active_fpu.fp_status);
-    fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs)
-{
-    uint64_t fdret;
-
-    fdret = float64_round_to_int(fs, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint32_t helper_float_rint_s(CPUMIPSState *env, uint32_t fs)
-{
-    uint32_t fdret;
-
-    fdret = float32_round_to_int(fs, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-#define FLOAT_CLASS_SIGNALING_NAN      0x001
-#define FLOAT_CLASS_QUIET_NAN          0x002
-#define FLOAT_CLASS_NEGATIVE_INFINITY  0x004
-#define FLOAT_CLASS_NEGATIVE_NORMAL    0x008
-#define FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
-#define FLOAT_CLASS_NEGATIVE_ZERO      0x020
-#define FLOAT_CLASS_POSITIVE_INFINITY  0x040
-#define FLOAT_CLASS_POSITIVE_NORMAL    0x080
-#define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
-#define FLOAT_CLASS_POSITIVE_ZERO      0x200
-
-uint64_t float_class_d(uint64_t arg, float_status *status)
-{
-    if (float64_is_signaling_nan(arg, status)) {
-        return FLOAT_CLASS_SIGNALING_NAN;
-    } else if (float64_is_quiet_nan(arg, status)) {
-        return FLOAT_CLASS_QUIET_NAN;
-    } else if (float64_is_neg(arg)) {
-        if (float64_is_infinity(arg)) {
-            return FLOAT_CLASS_NEGATIVE_INFINITY;
-        } else if (float64_is_zero(arg)) {
-            return FLOAT_CLASS_NEGATIVE_ZERO;
-        } else if (float64_is_zero_or_denormal(arg)) {
-            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
-        } else {
-            return FLOAT_CLASS_NEGATIVE_NORMAL;
-        }
-    } else {
-        if (float64_is_infinity(arg)) {
-            return FLOAT_CLASS_POSITIVE_INFINITY;
-        } else if (float64_is_zero(arg)) {
-            return FLOAT_CLASS_POSITIVE_ZERO;
-        } else if (float64_is_zero_or_denormal(arg)) {
-            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
-        } else {
-            return FLOAT_CLASS_POSITIVE_NORMAL;
-        }
-    }
-}
-
-uint64_t helper_float_class_d(CPUMIPSState *env, uint64_t arg)
-{
-    return float_class_d(arg, &env->active_fpu.fp_status);
-}
-
-uint32_t float_class_s(uint32_t arg, float_status *status)
-{
-    if (float32_is_signaling_nan(arg, status)) {
-        return FLOAT_CLASS_SIGNALING_NAN;
-    } else if (float32_is_quiet_nan(arg, status)) {
-        return FLOAT_CLASS_QUIET_NAN;
-    } else if (float32_is_neg(arg)) {
-        if (float32_is_infinity(arg)) {
-            return FLOAT_CLASS_NEGATIVE_INFINITY;
-        } else if (float32_is_zero(arg)) {
-            return FLOAT_CLASS_NEGATIVE_ZERO;
-        } else if (float32_is_zero_or_denormal(arg)) {
-            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
-        } else {
-            return FLOAT_CLASS_NEGATIVE_NORMAL;
-        }
-    } else {
-        if (float32_is_infinity(arg)) {
-            return FLOAT_CLASS_POSITIVE_INFINITY;
-        } else if (float32_is_zero(arg)) {
-            return FLOAT_CLASS_POSITIVE_ZERO;
-        } else if (float32_is_zero_or_denormal(arg)) {
-            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
-        } else {
-            return FLOAT_CLASS_POSITIVE_NORMAL;
-        }
-    }
-}
-
-uint32_t helper_float_class_s(CPUMIPSState *env, uint32_t arg)
-{
-    return float_class_s(arg, &env->active_fpu.fp_status);
-}
-
-/* binary operations */
-
-uint64_t helper_float_add_d(CPUMIPSState *env,
-                            uint64_t fdt0, uint64_t fdt1)
-{
-    uint64_t dt2;
-
-    dt2 = float64_add(fdt0, fdt1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_add_s(CPUMIPSState *env,
-                            uint32_t fst0, uint32_t fst1)
-{
-    uint32_t wt2;
-
-    wt2 = float32_add(fst0, fst1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_add_ps(CPUMIPSState *env,
-                             uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t wtl2;
-    uint32_t wth2;
-
-    wtl2 = float32_add(fstl0, fstl1, &env->active_fpu.fp_status);
-    wth2 = float32_add(fsth0, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)wth2 << 32) | wtl2;
-}
-
-uint64_t helper_float_sub_d(CPUMIPSState *env,
-                            uint64_t fdt0, uint64_t fdt1)
-{
-    uint64_t dt2;
-
-    dt2 = float64_sub(fdt0, fdt1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_sub_s(CPUMIPSState *env,
-                            uint32_t fst0, uint32_t fst1)
-{
-    uint32_t wt2;
-
-    wt2 = float32_sub(fst0, fst1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_sub_ps(CPUMIPSState *env,
-                             uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t wtl2;
-    uint32_t wth2;
-
-    wtl2 = float32_sub(fstl0, fstl1, &env->active_fpu.fp_status);
-    wth2 = float32_sub(fsth0, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)wth2 << 32) | wtl2;
-}
-
-uint64_t helper_float_mul_d(CPUMIPSState *env,
-                            uint64_t fdt0, uint64_t fdt1)
-{
-    uint64_t dt2;
-
-    dt2 = float64_mul(fdt0, fdt1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_mul_s(CPUMIPSState *env,
-                            uint32_t fst0, uint32_t fst1)
-{
-    uint32_t wt2;
-
-    wt2 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_mul_ps(CPUMIPSState *env,
-                             uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t wtl2;
-    uint32_t wth2;
-
-    wtl2 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
-    wth2 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)wth2 << 32) | wtl2;
-}
-
-uint64_t helper_float_div_d(CPUMIPSState *env,
-                            uint64_t fdt0, uint64_t fdt1)
-{
-    uint64_t dt2;
-
-    dt2 = float64_div(fdt0, fdt1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return dt2;
-}
-
-uint32_t helper_float_div_s(CPUMIPSState *env,
-                            uint32_t fst0, uint32_t fst1)
-{
-    uint32_t wt2;
-
-    wt2 = float32_div(fst0, fst1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return wt2;
-}
-
-uint64_t helper_float_div_ps(CPUMIPSState *env,
-                             uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t wtl2;
-    uint32_t wth2;
-
-    wtl2 = float32_div(fstl0, fstl1, &env->active_fpu.fp_status);
-    wth2 = float32_div(fsth0, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)wth2 << 32) | wtl2;
-}
-
-
-/* MIPS specific binary operations */
-uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
-{
-    fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
-    fdt2 = float64_chs(float64_sub(fdt2, float64_one,
-                                   &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
-{
-    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_sub(fst2, float32_one,
-                                       &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
-    fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fstl2 = float32_chs(float32_sub(fstl2, float32_one,
-                                       &env->active_fpu.fp_status));
-    fsth2 = float32_chs(float32_sub(fsth2, float32_one,
-                                       &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
-{
-    fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
-    fdt2 = float64_sub(fdt2, float64_one, &env->active_fpu.fp_status);
-    fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64,
-                                       &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return fdt2;
-}
-
-uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
-{
-    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
-    fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32,
-                                       &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return fst2;
-}
-
-uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
-    fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fstl2 = float32_sub(fstl2, float32_one, &env->active_fpu.fp_status);
-    fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status);
-    fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32,
-                                       &env->active_fpu.fp_status));
-    fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32,
-                                       &env->active_fpu.fp_status));
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2;
-    uint32_t fsth2;
-
-    fstl2 = float32_add(fstl0, fsth0, &env->active_fpu.fp_status);
-    fsth2 = float32_add(fstl1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2;
-    uint32_t fsth2;
-
-    fstl2 = float32_mul(fstl0, fsth0, &env->active_fpu.fp_status);
-    fsth2 = float32_mul(fstl1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth2 << 32) | fstl2;
-}
-
-
-uint32_t helper_float_max_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
-{
-    uint32_t fdret;
-
-    fdret = float32_maxnum(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_max_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
-{
-    uint64_t fdret;
-
-    fdret = float64_maxnum(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint32_t helper_float_maxa_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
-{
-    uint32_t fdret;
-
-    fdret = float32_maxnummag(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_maxa_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
-{
-    uint64_t fdret;
-
-    fdret = float64_maxnummag(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint32_t helper_float_min_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
-{
-    uint32_t fdret;
-
-    fdret = float32_minnum(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_min_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
-{
-    uint64_t fdret;
-
-    fdret = float64_minnum(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint32_t helper_float_mina_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
-{
-    uint32_t fdret;
-
-    fdret = float32_minnummag(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_mina_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
-{
-    uint64_t fdret;
-
-    fdret = float64_minnummag(fs, ft, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-
-/* ternary operations */
-
-uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0,
-                             uint64_t fst1, uint64_t fst2)
-{
-    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint32_t helper_float_madd_s(CPUMIPSState *env, uint32_t fst0,
-                             uint32_t fst1, uint32_t fst2)
-{
-    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint64_t helper_float_madd_ps(CPUMIPSState *env, uint64_t fdt0,
-                              uint64_t fdt1, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
-    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
-    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
-    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth0 << 32) | fstl0;
-}
-
-uint64_t helper_float_msub_d(CPUMIPSState *env, uint64_t fst0,
-                             uint64_t fst1, uint64_t fst2)
-{
-    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint32_t helper_float_msub_s(CPUMIPSState *env, uint32_t fst0,
-                             uint32_t fst1, uint32_t fst2)
-{
-    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint64_t helper_float_msub_ps(CPUMIPSState *env, uint64_t fdt0,
-                              uint64_t fdt1, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
-    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
-    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
-    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth0 << 32) | fstl0;
-}
-
-uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0,
-                             uint64_t fst1, uint64_t fst2)
-{
-    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
-    fst0 = float64_chs(fst0);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0,
-                             uint32_t fst1, uint32_t fst2)
-{
-    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
-    fst0 = float32_chs(fst0);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0,
-                              uint64_t fdt1, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
-    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
-    fstl0 = float32_chs(fstl0);
-    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
-    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
-    fsth0 = float32_chs(fsth0);
-
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth0 << 32) | fstl0;
-}
-
-uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0,
-                             uint64_t fst1, uint64_t fst2)
-{
-    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
-    fst0 = float64_chs(fst0);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0,
-                             uint32_t fst1, uint32_t fst2)
-{
-    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
-    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
-    fst0 = float32_chs(fst0);
-
-    update_fcr31(env, GETPC());
-    return fst0;
-}
-
-uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0,
-                              uint64_t fdt1, uint64_t fdt2)
-{
-    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
-    uint32_t fsth0 = fdt0 >> 32;
-    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
-    uint32_t fsth1 = fdt1 >> 32;
-    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
-    uint32_t fsth2 = fdt2 >> 32;
-
-    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
-    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
-    fstl0 = float32_chs(fstl0);
-    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
-    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
-    fsth0 = float32_chs(fsth0);
-
-    update_fcr31(env, GETPC());
-    return ((uint64_t)fsth0 << 32) | fstl0;
-}
-
-
-uint32_t helper_float_maddf_s(CPUMIPSState *env, uint32_t fs,
-                              uint32_t ft, uint32_t fd)
-{
-    uint32_t fdret;
-
-    fdret = float32_muladd(fs, ft, fd, 0,
-                           &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_maddf_d(CPUMIPSState *env, uint64_t fs,
-                              uint64_t ft, uint64_t fd)
-{
-    uint64_t fdret;
-
-    fdret = float64_muladd(fs, ft, fd, 0,
-                           &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint32_t helper_float_msubf_s(CPUMIPSState *env, uint32_t fs,
-                              uint32_t ft, uint32_t fd)
-{
-    uint32_t fdret;
-
-    fdret = float32_muladd(fs, ft, fd, float_muladd_negate_product,
-                           &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-uint64_t helper_float_msubf_d(CPUMIPSState *env, uint64_t fs,
-                              uint64_t ft, uint64_t fd)
-{
-    uint64_t fdret;
-
-    fdret = float64_muladd(fs, ft, fd, float_muladd_negate_product,
-                           &env->active_fpu.fp_status);
-
-    update_fcr31(env, GETPC());
-    return fdret;
-}
-
-
-/* compare operations */
-#define FOP_COND_D(op, cond)                                   \
-void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
-                         uint64_t fdt1, int cc)                \
-{                                                              \
-    int c;                                                     \
-    c = cond;                                                  \
-    update_fcr31(env, GETPC());                                \
-    if (c)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                      \
-    else                                                       \
-        CLEAR_FP_COND(cc, env->active_fpu);                    \
-}                                                              \
-void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
-                            uint64_t fdt1, int cc)             \
-{                                                              \
-    int c;                                                     \
-    fdt0 = float64_abs(fdt0);                                  \
-    fdt1 = float64_abs(fdt1);                                  \
-    c = cond;                                                  \
-    update_fcr31(env, GETPC());                                \
-    if (c)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                      \
-    else                                                       \
-        CLEAR_FP_COND(cc, env->active_fpu);                    \
-}
-
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float64_unordered_quiet() is still called.
- */
-FOP_COND_D(f,    (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_D(un,   float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(eq,   float64_eq_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ueq,  float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_eq_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(olt,  float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ult,  float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ole,  float64_le_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ule,  float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_le_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float64_unordered() is still called.
- */
-FOP_COND_D(sf,   (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_D(ngle, float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(seq,  float64_eq(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ngl,  float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_eq(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(lt,   float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(nge,  float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(le,   float64_le(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_D(ngt,  float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_le(fdt0, fdt1,
-                                       &env->active_fpu.fp_status))
-
-#define FOP_COND_S(op, cond)                                   \
-void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
-                         uint32_t fst1, int cc)                \
-{                                                              \
-    int c;                                                     \
-    c = cond;                                                  \
-    update_fcr31(env, GETPC());                                \
-    if (c)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                      \
-    else                                                       \
-        CLEAR_FP_COND(cc, env->active_fpu);                    \
-}                                                              \
-void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
-                            uint32_t fst1, int cc)             \
-{                                                              \
-    int c;                                                     \
-    fst0 = float32_abs(fst0);                                  \
-    fst1 = float32_abs(fst1);                                  \
-    c = cond;                                                  \
-    update_fcr31(env, GETPC());                                \
-    if (c)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                      \
-    else                                                       \
-        CLEAR_FP_COND(cc, env->active_fpu);                    \
-}
-
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered_quiet() is still called.
- */
-FOP_COND_S(f,    (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_S(un,   float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(eq,   float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ueq,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(olt,  float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ult,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ole,  float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ule,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered() is still called.
- */
-FOP_COND_S(sf,   (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_S(ngle, float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(seq,  float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ngl,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(lt,   float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(nge,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(le,   float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_S(ngt,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                 || float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status))
-
-#define FOP_COND_PS(op, condl, condh)                           \
-void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
-                          uint64_t fdt1, int cc)                \
-{                                                               \
-    uint32_t fst0, fsth0, fst1, fsth1;                          \
-    int ch, cl;                                                 \
-    fst0 = fdt0 & 0XFFFFFFFF;                                   \
-    fsth0 = fdt0 >> 32;                                         \
-    fst1 = fdt1 & 0XFFFFFFFF;                                   \
-    fsth1 = fdt1 >> 32;                                         \
-    cl = condl;                                                 \
-    ch = condh;                                                 \
-    update_fcr31(env, GETPC());                                 \
-    if (cl)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                       \
-    else                                                        \
-        CLEAR_FP_COND(cc, env->active_fpu);                     \
-    if (ch)                                                     \
-        SET_FP_COND(cc + 1, env->active_fpu);                   \
-    else                                                        \
-        CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
-}                                                               \
-void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
-                             uint64_t fdt1, int cc)             \
-{                                                               \
-    uint32_t fst0, fsth0, fst1, fsth1;                          \
-    int ch, cl;                                                 \
-    fst0 = float32_abs(fdt0 & 0XFFFFFFFF);                      \
-    fsth0 = float32_abs(fdt0 >> 32);                            \
-    fst1 = float32_abs(fdt1 & 0XFFFFFFFF);                      \
-    fsth1 = float32_abs(fdt1 >> 32);                            \
-    cl = condl;                                                 \
-    ch = condh;                                                 \
-    update_fcr31(env, GETPC());                                 \
-    if (cl)                                                     \
-        SET_FP_COND(cc, env->active_fpu);                       \
-    else                                                        \
-        CLEAR_FP_COND(cc, env->active_fpu);                     \
-    if (ch)                                                     \
-        SET_FP_COND(cc + 1, env->active_fpu);                   \
-    else                                                        \
-        CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
-}
-
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered_quiet() is still called.
- */
-FOP_COND_PS(f,    (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0),
-                  (float32_unordered_quiet(fsth1, fsth0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_PS(un,   float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered_quiet(fsth1, fsth0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(eq,   float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_eq_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ueq,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered_quiet(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_eq_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(olt,  float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_lt_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ult,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered_quiet(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_lt_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ole,  float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_le_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ule,  float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered_quiet(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_le_quiet(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered() is still called.
- */
-FOP_COND_PS(sf,   (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0),
-                  (float32_unordered(fsth1, fsth0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_COND_PS(ngle, float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered(fsth1, fsth0,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(seq,  float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_eq(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ngl,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_eq(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(lt,   float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_lt(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(nge,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_lt(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(le,   float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_le(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-FOP_COND_PS(ngt,  float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                  || float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status),
-                  float32_unordered(fsth1, fsth0,
-                                       &env->active_fpu.fp_status)
-                  || float32_le(fsth0, fsth1,
-                                       &env->active_fpu.fp_status))
-
-/* R6 compare operations */
-#define FOP_CONDN_D(op, cond)                                       \
-uint64_t helper_r6_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,   \
-                                uint64_t fdt1)                      \
-{                                                                   \
-    uint64_t c;                                                     \
-    c = cond;                                                       \
-    update_fcr31(env, GETPC());                                     \
-    if (c) {                                                        \
-        return -1;                                                  \
-    } else {                                                        \
-        return 0;                                                   \
-    }                                                               \
-}
-
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float64_unordered_quiet() is still called.
- */
-FOP_CONDN_D(af,  (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_CONDN_D(un,  (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(eq,  (float64_eq_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(ueq, (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_eq_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(lt,  (float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(ult, (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(le,  (float64_le_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(ule, (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                 || float64_le_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float64_unordered() is still called.\
- */
-FOP_CONDN_D(saf,  (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_CONDN_D(sun,  (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(seq,  (float64_eq(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sueq, (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_eq(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(slt,  (float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sult, (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sle,  (float64_le(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sule, (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_le(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(or,   (float64_le_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_le_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(une,  (float64_unordered_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(ne,   (float64_lt_quiet(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt_quiet(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sor,  (float64_le(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_le(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sune, (float64_unordered(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_D(sne,  (float64_lt(fdt1, fdt0,
-                                       &env->active_fpu.fp_status)
-                   || float64_lt(fdt0, fdt1,
-                                       &env->active_fpu.fp_status)))
-
-#define FOP_CONDN_S(op, cond)                                       \
-uint32_t helper_r6_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,   \
-                                uint32_t fst1)                      \
-{                                                                   \
-    uint64_t c;                                                     \
-    c = cond;                                                       \
-    update_fcr31(env, GETPC());                                     \
-    if (c) {                                                        \
-        return -1;                                                  \
-    } else {                                                        \
-        return 0;                                                   \
-    }                                                               \
-}
-
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered_quiet() is still called.
- */
-FOP_CONDN_S(af,   (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_CONDN_S(un,   (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(eq,   (float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(ueq,  (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_eq_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(lt,   (float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(ult,  (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(le,   (float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(ule,  (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-/*
- * NOTE: the comma operator will make "cond" to eval to false,
- * but float32_unordered() is still called.
- */
-FOP_CONDN_S(saf,  (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status), 0))
-FOP_CONDN_S(sun,  (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(seq,  (float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sueq, (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_eq(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(slt,  (float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sult, (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sle,  (float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sule, (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(or,   (float32_le_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_le_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(une,  (float32_unordered_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(ne,   (float32_lt_quiet(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt_quiet(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sor,  (float32_le(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_le(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sune, (float32_unordered(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
-FOP_CONDN_S(sne,  (float32_lt(fst1, fst0,
-                                       &env->active_fpu.fp_status)
-                   || float32_lt(fst0, fst1,
-                                       &env->active_fpu.fp_status)))
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h
index 1c2d6d35a71..7f2ef1c4b39 100644
--- a/target/mips/fpu_helper.h
+++ b/target/mips/fpu_helper.h
@@ -27,8 +27,20 @@ static inline void restore_flush_mode(CPUMIPSState *env)
 
 static inline void restore_snan_bit_mode(CPUMIPSState *env)
 {
-    set_snan_bit_is_one((env->active_fpu.fcr31 & (1 << FCR31_NAN2008)) == 0,
-                        &env->active_fpu.fp_status);
+    bool nan2008 = env->active_fpu.fcr31 & (1 << FCR31_NAN2008);
+
+    /*
+     * With nan2008, SNaNs are silenced in the usual way.
+     * Before that, SNaNs are not silenced; default nans are produced.
+     */
+    set_snan_bit_is_one(!nan2008, &env->active_fpu.fp_status);
+    /*
+     * FIXME: setting this breaks the cheritest MIPS FPU tests and there is
+     * no easy cherry-pick to restore the previous behaviour. For now just
+     * comment out this call and revisit once we have merged up to 9.2.
+     *
+     set_default_nan_mode(!nan2008, &env->active_fpu.fp_status);
+     */
 }
 
 static inline void restore_fp_status(CPUMIPSState *env)
diff --git a/target/mips/helper.h b/target/mips/helper.h
index f39b26db9b0..325774b2ebf 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -2,10 +2,6 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
 DEF_HELPER_2(raise_exception, noreturn, env, i32)
 DEF_HELPER_1(raise_exception_debug, noreturn, env)
 
-#ifndef CONFIG_USER_ONLY
-DEF_HELPER_1(do_semihosting, void, env)
-#endif
-
 #ifdef TARGET_MIPS64
 DEF_HELPER_4(sdl, void, env, tl, tl, int)
 DEF_HELPER_4(sdr, void, env, tl, tl, int)
@@ -20,21 +16,6 @@ DEF_HELPER_3(lld, tl, env, cap_checked_ptr, int)
 #endif
 #endif
 
-DEF_HELPER_3(muls, tl, env, tl, tl)
-DEF_HELPER_3(mulsu, tl, env, tl, tl)
-DEF_HELPER_3(macc, tl, env, tl, tl)
-DEF_HELPER_3(maccu, tl, env, tl, tl)
-DEF_HELPER_3(msac, tl, env, tl, tl)
-DEF_HELPER_3(msacu, tl, env, tl, tl)
-DEF_HELPER_3(mulhi, tl, env, tl, tl)
-DEF_HELPER_3(mulhiu, tl, env, tl, tl)
-DEF_HELPER_3(mulshi, tl, env, tl, tl)
-DEF_HELPER_3(mulshiu, tl, env, tl, tl)
-DEF_HELPER_3(macchi, tl, env, tl, tl)
-DEF_HELPER_3(macchiu, tl, env, tl, tl)
-DEF_HELPER_3(msachi, tl, env, tl, tl)
-DEF_HELPER_3(msachiu, tl, env, tl, tl)
-
 DEF_HELPER_FLAGS_1(bitswap, TCG_CALL_NO_RWG_SE, tl, tl)
 #ifdef TARGET_MIPS64
 DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl)
@@ -43,142 +24,6 @@ DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32)
 
 #ifndef CONFIG_USER_ONLY
-/* CP0 helpers */
-DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
-DEF_HELPER_1(mfc0_mvpconf0, tl, env)
-DEF_HELPER_1(mfc0_mvpconf1, tl, env)
-DEF_HELPER_1(mftc0_vpecontrol, tl, env)
-DEF_HELPER_1(mftc0_vpeconf0, tl, env)
-DEF_HELPER_1(mfc0_random, tl, env)
-DEF_HELPER_1(mfc0_tcstatus, tl, env)
-DEF_HELPER_1(mftc0_tcstatus, tl, env)
-DEF_HELPER_1(mfc0_tcbind, tl, env)
-DEF_HELPER_1(mftc0_tcbind, tl, env)
-DEF_HELPER_1(mfc0_tcrestart, tl, env)
-DEF_HELPER_1(mftc0_tcrestart, tl, env)
-DEF_HELPER_1(mfc0_tchalt, tl, env)
-DEF_HELPER_1(mftc0_tchalt, tl, env)
-DEF_HELPER_1(mfc0_tccontext, tl, env)
-DEF_HELPER_1(mftc0_tccontext, tl, env)
-DEF_HELPER_1(mfc0_tcschedule, tl, env)
-DEF_HELPER_1(mftc0_tcschedule, tl, env)
-DEF_HELPER_1(mfc0_tcschefback, tl, env)
-DEF_HELPER_1(mftc0_tcschefback, tl, env)
-DEF_HELPER_1(mfc0_count, tl, env)
-DEF_HELPER_1(mfc0_saar, tl, env)
-DEF_HELPER_1(mfhc0_saar, tl, env)
-DEF_HELPER_1(mftc0_entryhi, tl, env)
-DEF_HELPER_1(mftc0_status, tl, env)
-DEF_HELPER_1(mftc0_cause, tl, env)
-DEF_HELPER_1(mftc0_epc, tl, env)
-DEF_HELPER_1(mftc0_ebase, tl, env)
-DEF_HELPER_2(mftc0_configx, tl, env, tl)
-DEF_HELPER_1(mfc0_lladdr, tl, env)
-DEF_HELPER_1(mfc0_maar, tl, env)
-DEF_HELPER_1(mfhc0_maar, tl, env)
-DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
-DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
-DEF_HELPER_2(mfhc0_watchhi, tl, env, i32)
-DEF_HELPER_1(mfc0_debug, tl, env)
-DEF_HELPER_1(mftc0_debug, tl, env)
-/* Break after n cycles have been executed */
-DEF_HELPER_1(check_breakcount, void, env)
-#ifdef TARGET_MIPS64
-DEF_HELPER_1(dmfc0_tcrestart, tl, env)
-DEF_HELPER_1(dmfc0_tchalt, tl, env)
-DEF_HELPER_1(dmfc0_tccontext, tl, env)
-DEF_HELPER_1(dmfc0_tcschedule, tl, env)
-DEF_HELPER_1(dmfc0_tcschefback, tl, env)
-DEF_HELPER_1(dmfc0_lladdr, tl, env)
-DEF_HELPER_1(dmfc0_maar, tl, env)
-DEF_HELPER_2(dmfc0_watchlo, tl, env, i32)
-DEF_HELPER_2(dmfc0_watchhi, tl, env, i32)
-DEF_HELPER_1(dmfc0_saar, tl, env)
-#endif /* TARGET_MIPS64 */
-
-DEF_HELPER_2(mtc0_index, void, env, tl)
-DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
-DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
-DEF_HELPER_2(mttc0_vpecontrol, void, env, tl)
-DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
-DEF_HELPER_2(mttc0_vpeconf0, void, env, tl)
-DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
-DEF_HELPER_2(mtc0_yqmask, void, env, tl)
-DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
-DEF_HELPER_2(mtc0_entrylo0, void, env, tl)
-DEF_HELPER_2(mtc0_tcstatus, void, env, tl)
-DEF_HELPER_2(mttc0_tcstatus, void, env, tl)
-DEF_HELPER_2(mtc0_tcbind, void, env, tl)
-DEF_HELPER_2(mttc0_tcbind, void, env, tl)
-DEF_HELPER_2(mtc0_tcrestart, void, env, tl)
-DEF_HELPER_2(mttc0_tcrestart, void, env, tl)
-DEF_HELPER_2(mtc0_tchalt, void, env, tl)
-DEF_HELPER_2(mttc0_tchalt, void, env, tl)
-DEF_HELPER_2(mtc0_tccontext, void, env, tl)
-DEF_HELPER_2(mttc0_tccontext, void, env, tl)
-DEF_HELPER_2(mtc0_tcschedule, void, env, tl)
-DEF_HELPER_2(mttc0_tcschedule, void, env, tl)
-DEF_HELPER_2(mtc0_tcschefback, void, env, tl)
-DEF_HELPER_2(mttc0_tcschefback, void, env, tl)
-DEF_HELPER_2(mtc0_entrylo1, void, env, tl)
-DEF_HELPER_2(mtc0_context, void, env, tl)
-DEF_HELPER_2(mtc0_memorymapid, void, env, tl)
-DEF_HELPER_2(mtc0_pagemask, void, env, tl)
-DEF_HELPER_2(mtc0_pagegrain, void, env, tl)
-DEF_HELPER_2(mtc0_segctl0, void, env, tl)
-DEF_HELPER_2(mtc0_segctl1, void, env, tl)
-DEF_HELPER_2(mtc0_segctl2, void, env, tl)
-DEF_HELPER_2(mtc0_pwfield, void, env, tl)
-DEF_HELPER_2(mtc0_pwsize, void, env, tl)
-DEF_HELPER_2(mtc0_wired, void, env, tl)
-DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
-DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
-DEF_HELPER_2(mtc0_srsconf2, void, env, tl)
-DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
-DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
-DEF_HELPER_2(mtc0_hwrena, void, env, tl)
-DEF_HELPER_2(mtc0_pwctl, void, env, tl)
-DEF_HELPER_2(mtc0_count, void, env, tl)
-DEF_HELPER_2(mtc0_saari, void, env, tl)
-DEF_HELPER_2(mtc0_saar, void, env, tl)
-DEF_HELPER_2(mthc0_saar, void, env, tl)
-DEF_HELPER_2(mtc0_entryhi, void, env, tl)
-DEF_HELPER_2(mttc0_entryhi, void, env, tl)
-DEF_HELPER_2(mtc0_compare, void, env, tl)
-DEF_HELPER_2(mtc0_status, void, env, tl)
-DEF_HELPER_2(mttc0_status, void, env, tl)
-DEF_HELPER_2(mtc0_intctl, void, env, tl)
-DEF_HELPER_2(mtc0_srsctl, void, env, tl)
-DEF_HELPER_2(mtc0_cause, void, env, tl)
-DEF_HELPER_2(mttc0_cause, void, env, tl)
-DEF_HELPER_2(mtc0_ebase, void, env, tl)
-DEF_HELPER_2(mttc0_ebase, void, env, tl)
-DEF_HELPER_2(mtc0_config0, void, env, tl)
-DEF_HELPER_2(mtc0_config2, void, env, tl)
-DEF_HELPER_2(mtc0_config3, void, env, tl)
-DEF_HELPER_2(mtc0_config4, void, env, tl)
-DEF_HELPER_2(mtc0_config5, void, env, tl)
-DEF_HELPER_2(mtc0_lladdr, void, env, tl)
-DEF_HELPER_2(mtc0_maar, void, env, tl)
-DEF_HELPER_2(mthc0_maar, void, env, tl)
-DEF_HELPER_2(mtc0_maari, void, env, tl)
-DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
-DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
-DEF_HELPER_3(mthc0_watchhi, void, env, tl, i32)
-DEF_HELPER_2(mtc0_xcontext, void, env, tl)
-DEF_HELPER_2(mtc0_framemask, void, env, tl)
-DEF_HELPER_2(mtc0_debug, void, env, tl)
-DEF_HELPER_2(mttc0_debug, void, env, tl)
-DEF_HELPER_2(mtc0_performance0, void, env, tl)
-DEF_HELPER_2(mtc0_errctl, void, env, tl)
-DEF_HELPER_2(mtc0_taglo, void, env, tl)
-DEF_HELPER_2(mtc0_datalo, void, env, tl)
-DEF_HELPER_2(mtc0_taghi, void, env, tl)
-DEF_HELPER_2(mtc0_datahi, void, env, tl)
-
-// Dump MIPS register state
-DEF_HELPER_2(mtc0_dumpstate, void, env, tl)
-
 // QEMU-CHERI extension:
 DEF_HELPER_1(mfc0_rtc64, i64, env)
 DEF_HELPER_2(mtc0_rtc64, void, env, i64)
@@ -194,9 +39,13 @@ DEF_HELPER_FLAGS_3(mips_log_instr32, TCG_CALL_NO_WG, void, env, tl, i32)
 DEF_HELPER_FLAGS_1(mips_log_instr_drop, TCG_CALL_NO_WG, void, env)
 /* Extra MIPS-only helpers */
 DEF_HELPER_2(cheri_debug_message, void, env, i64)
-#endif
-
+// Dump MIPS register state
+DEF_HELPER_2(mtc0_dumpstate, void, env, tl)
+#endif /* CONFIG_TCG_LOG_INSTR */
+/* Break after n cycles have been executed */
+DEF_HELPER_1(check_breakcount, void, env)
 DEF_HELPER_2(magic_library_function, void, env, tl)
+DEF_HELPER_1(smp_yield, void, env)
 
 #if defined(TARGET_CHERI)
 #include "cheri-helper-common.h"
@@ -243,34 +92,14 @@ DEF_HELPER_3(cllc_without_tcg, void, env, i32, i32)
 DEF_HELPER_2(mtc0_capfilter_lo, void, env, tl)
 DEF_HELPER_2(mtc0_capfilter_hi, void, env, tl)
 DEF_HELPER_2(mtc0_capfilter_perms, void, env, tl)
-#endif
-
-DEF_HELPER_1(smp_yield, void, env)
 
-#if defined(TARGET_MIPS64)
-DEF_HELPER_2(dmtc0_entrylo0, void, env, i64)
-DEF_HELPER_2(dmtc0_entrylo1, void, env, i64)
-#endif
-
-/* MIPS MT functions */
-DEF_HELPER_2(mftgpr, tl, env, i32)
-DEF_HELPER_2(mftlo, tl, env, i32)
-DEF_HELPER_2(mfthi, tl, env, i32)
-DEF_HELPER_2(mftacx, tl, env, i32)
-DEF_HELPER_1(mftdsp, tl, env)
-DEF_HELPER_3(mttgpr, void, env, tl, i32)
-DEF_HELPER_3(mttlo, void, env, tl, i32)
-DEF_HELPER_3(mtthi, void, env, tl, i32)
-DEF_HELPER_3(mttacx, void, env, tl, i32)
-DEF_HELPER_2(mttdsp, void, env, tl)
-DEF_HELPER_0(dmt, tl)
-DEF_HELPER_0(emt, tl)
-DEF_HELPER_1(dvpe, tl, env)
-DEF_HELPER_1(evpe, tl, env)
-
-/* R6 Multi-threading */
-DEF_HELPER_1(dvp, tl, env)
-DEF_HELPER_1(evp, tl, env)
+/* Special functions */
+/* cannot access EPC directly since it is the offset of EPCC */
+DEF_HELPER_1(mfc0_epc, tl, env)
+DEF_HELPER_2(mtc0_epc, void, env, tl)
+DEF_HELPER_1(mfc0_error_epc, tl, env)
+DEF_HELPER_2(mtc0_error_epc, void, env, tl)
+#endif /* TARGET_CHERI */
 #endif /* !CONFIG_USER_ONLY */
 
 /* microMIPS functions */
@@ -437,28 +266,6 @@ FOP_PROTO(sune)
 FOP_PROTO(sne)
 #undef FOP_PROTO
 
-/* Special functions */
-#ifndef CONFIG_USER_ONLY
-#if defined(TARGET_CHERI)
-/* cannot access EPC directly since it is the offset of EPCC */
-DEF_HELPER_1(mfc0_epc, tl, env)
-DEF_HELPER_2(mtc0_epc, void, env, tl)
-DEF_HELPER_1(mfc0_error_epc, tl, env)
-DEF_HELPER_2(mtc0_error_epc, void, env, tl)
-#endif
-DEF_HELPER_1(tlbwi, void, env)
-DEF_HELPER_1(tlbwr, void, env)
-DEF_HELPER_1(tlbp, void, env)
-DEF_HELPER_1(tlbr, void, env)
-DEF_HELPER_1(tlbinv, void, env)
-DEF_HELPER_1(tlbinvf, void, env)
-DEF_HELPER_1(di, tl, env)
-DEF_HELPER_1(ei, tl, env)
-DEF_HELPER_1(eret, void, env)
-DEF_HELPER_1(eretnc, void, env)
-DEF_HELPER_1(deret, void, env)
-DEF_HELPER_3(ginvt, void, env, tl, i32)
-#endif /* !CONFIG_USER_ONLY */
 DEF_HELPER_1(rdhwr_cpunum, tl, env)
 DEF_HELPER_1(rdhwr_synci_step, tl, env)
 DEF_HELPER_1(rdhwr_cc, tl, env)
@@ -869,6 +676,11 @@ DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
 DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
 DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
 
-DEF_HELPER_3(cache, void, env, tl, i32)
+#ifndef CONFIG_USER_ONLY
+#include "tcg/sysemu_helper.h.inc"
+#endif /* !CONFIG_USER_ONLY */
+
+#include "tcg/msa_helper.h.inc"
 
-#include "msa_helper.h.inc"
+/* Vendor extensions */
+#include "tcg/vr54xx_helper.h.inc"
diff --git a/target/mips/internal.h b/target/mips/internal.h
index b266e7f6419..0c347e31604 100644
--- a/target/mips/internal.h
+++ b/target/mips/internal.h
@@ -9,8 +9,12 @@
 #define MIPS_INTERNAL_H
 
 #include "exec/memattrs.h"
+#ifdef CONFIG_TCG
 #include "qemu/log.h"
 #include "exec/log_instr.h"
+#include "tcg/tcg-internal.h"
+#endif
+
 
 /*
  * MMU types, the first four entries have the same layout as the
@@ -73,6 +77,15 @@ struct mips_def_t {
     int32_t SAARP;
 };
 
+extern const char regnames[32][3];
+extern const char fregnames[32][4];
+extern const char regnames_HI[4][4];
+extern const char regnames_LO[4][4];
+extern const char mips_cop0_regnames[32*8][32];
+#ifdef TARGET_CHERI
+extern const char mips_cheri_hw_regnames[32][10];
+#endif
+
 extern const struct mips_def_t mips_defs[];
 extern const int mips_defs_number;
 
@@ -87,16 +100,40 @@ int mips_gdb_set_sys_reg(CPUMIPSState *env, uint8_t *mem_buf, int n);
 
 void mips_cpu_do_interrupt(CPUState *cpu);
 bool mips_cpu_exec_interrupt(CPUState *cpu, int int_req);
-void mips_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr mips_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int mips_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int mips_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
-                                  MMUAccessType access_type,
-                                  int mmu_idx, uintptr_t retaddr);
+
+#define USEG_LIMIT      ((target_ulong)(int32_t)0x7FFFFFFFUL)
+#define KSEG0_BASE      ((target_ulong)(int32_t)0x80000000UL)
+#define KSEG1_BASE      ((target_ulong)(int32_t)0xA0000000UL)
+#define KSEG2_BASE      ((target_ulong)(int32_t)0xC0000000UL)
+#define KSEG3_BASE      ((target_ulong)(int32_t)0xE0000000UL)
+
+#define KVM_KSEG0_BASE  ((target_ulong)(int32_t)0x40000000UL)
+#define KVM_KSEG2_BASE  ((target_ulong)(int32_t)0x60000000UL)
 
 #if !defined(CONFIG_USER_ONLY)
 
+enum {
+#ifdef TARGET_CHERI
+    TLBRET_S = -5,
+#else
+    TLBRET_XI = -6,
+    TLBRET_RI = -5,
+#endif /* TARGET_CHERI */
+    TLBRET_DIRTY = -4,
+    TLBRET_INVALID = -3,
+    TLBRET_NOMATCH = -2,
+    TLBRET_BADADDR = -1,
+    TLBRET_MATCH = 0
+};
+
+int get_physical_address(CPUMIPSState *env, hwaddr *physical,
+                         int *prot, target_ulong real_address,
+                         MMUAccessType access_type, int mmu_idx);
+hwaddr mips_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
+
 typedef struct r4k_tlb_t r4k_tlb_t;
 struct r4k_tlb_t {
     target_ulong VPN;
@@ -145,36 +182,13 @@ struct CPUMIPSTLBContext {
     } mmu;
 };
 
-int no_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                       target_ulong address, MMUAccessType access_type);
-int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                          target_ulong address, MMUAccessType access_type);
-int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                    target_ulong address, MMUAccessType access_type);
-void r4k_helper_tlbwi(CPUMIPSState *env, uintptr_t retpc);
-void r4k_helper_tlbwr(CPUMIPSState *env, uintptr_t retpc);
-void r4k_helper_tlbp(CPUMIPSState *env);
-void r4k_helper_tlbr(CPUMIPSState *env);
-void r4k_helper_tlbinv(CPUMIPSState *env);
-void r4k_helper_tlbinvf(CPUMIPSState *env);
-void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra);
-bool r4k_lookup_tlb(CPUMIPSState *env, int *matching, bool use_extra);
-uint32_t cpu_mips_get_random(CPUMIPSState *env);
-
-void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
-                                    vaddr addr, unsigned size,
-                                    MMUAccessType access_type,
-                                    int mmu_idx, MemTxAttrs attrs,
-                                    MemTxResult response, uintptr_t retaddr);
-hwaddr cpu_mips_translate_address(CPUMIPSState *env, target_ulong address,
-                                  MMUAccessType access_type);
-#endif
-
-#define cpu_signal_handler cpu_mips_signal_handler
+void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc);
+void cpu_mips_store_status(CPUMIPSState *env, target_ulong val);
+void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val);
 
-#ifndef CONFIG_USER_ONLY
 extern const VMStateDescription vmstate_mips_cpu;
-#endif
+
+#endif /* !CONFIG_USER_ONLY */
 
 static inline bool cpu_mips_hw_interrupts_enabled(CPUMIPSState *env)
 {
@@ -218,8 +232,6 @@ static inline bool cpu_mips_hw_interrupts_pending(CPUMIPSState *env)
     return r;
 }
 
-void mips_tcg_init(void);
-
 void msa_reset(CPUMIPSState *env);
 
 /* cp0_timer.c */
@@ -229,17 +241,20 @@ void cpu_mips_store_compare(CPUMIPSState *env, uint32_t value);
 void cpu_mips_start_count(CPUMIPSState *env);
 void cpu_mips_stop_count(CPUMIPSState *env);
 
+
 uint64_t cpu_mips_get_rtc64 (CPUMIPSState *env);
 void cpu_mips_set_rtc64 (CPUMIPSState *env, uint64_t value);
 
-/* helper.c */
-void mmu_init(CPUMIPSState *env, const mips_def_t *def);
-bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr);
+static inline void mips_env_set_pc(CPUMIPSState *env, target_ulong value)
+{
+    mips_update_pc(env, value & ~(target_ulong)1, /*can_be_unrepresentable=*/false);
+    if (value & 1) {
+        env->hflags |= MIPS_HFLAG_M16;
+    } else {
+        env->hflags &= ~(MIPS_HFLAG_M16);
+    }
+}
 
-/* op_helper.c */
-void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask);
 
 static inline void restore_pamask(CPUMIPSState *env)
 {
@@ -472,24 +487,6 @@ static inline void compute_hflags(CPUMIPSState *env)
     }
 }
 
-void cpu_mips_tlb_flush(CPUMIPSState *env);
-void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc);
-void cpu_mips_store_status(CPUMIPSState *env, target_ulong val);
-void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val);
-
-const char *mips_exception_name(int32_t exception);
-
-void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env, MipsExcp exception,
-                                          int error_code, uintptr_t pc);
-
-static inline void QEMU_NORETURN do_raise_exception(CPUMIPSState *env,
-                                                    MipsExcp exception,
-                                                    uintptr_t pc)
-{
-    /* NOTE: pc is a HOST program counter (from GETPC()) and not a MIPS guest pc */
-    do_raise_exception_err(env, exception, env->error_code & EXCP_INST_NOTAVAIL, pc);
-}
-
 static inline void check_hwrena(CPUMIPSState *env, int reg, uintptr_t pc) {
     if ((env->hflags & MIPS_HFLAG_CP0) || (env->CP0_HWREna & (1 << reg))) {
         return;
@@ -561,8 +558,6 @@ void r4k_dump_tlb(CPUMIPSState *env, int idx);
 #endif
 void do_hexdump(GString *strbuf, uint8_t* buffer, target_ulong length,
                 target_ulong vaddr);
-hwaddr do_translate_address(CPUMIPSState *env, target_ulong address,
-                            MMUAccessType access_type, uintptr_t retaddr);
 
 #ifdef TARGET_CHERI
 #include "cheri-helper-utils.h"
diff --git a/target/mips/machine.c b/target/mips/machine.c
deleted file mode 100644
index b5fe8f87dca..00000000000
--- a/target/mips/machine.c
+++ /dev/null
@@ -1,348 +0,0 @@
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "migration/cpu.h"
-#include "fpu_helper.h"
-
-static int cpu_post_load(void *opaque, int version_id)
-{
-    MIPSCPU *cpu = opaque;
-    CPUMIPSState *env = &cpu->env;
-
-    restore_fp_status(env);
-    restore_msa_fp_status(env);
-    compute_hflags(env);
-    restore_pamask(env);
-
-    return 0;
-}
-
-/* FPU state */
-
-static int get_fpr(QEMUFile *f, void *pv, size_t size,
-                   const VMStateField *field)
-{
-    int i;
-    fpr_t *v = pv;
-    /* Restore entire MSA vector register */
-    for (i = 0; i < MSA_WRLEN / 64; i++) {
-        qemu_get_sbe64s(f, &v->wr.d[i]);
-    }
-    return 0;
-}
-
-static int put_fpr(QEMUFile *f, void *pv, size_t size,
-                   const VMStateField *field, JSONWriter *vmdesc)
-{
-    int i;
-    fpr_t *v = pv;
-    /* Save entire MSA vector register */
-    for (i = 0; i < MSA_WRLEN / 64; i++) {
-        qemu_put_sbe64s(f, &v->wr.d[i]);
-    }
-
-    return 0;
-}
-
-const VMStateInfo vmstate_info_fpr = {
-    .name = "fpr",
-    .get  = get_fpr,
-    .put  = put_fpr,
-};
-
-#define VMSTATE_FPR_ARRAY_V(_f, _s, _n, _v)                     \
-    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_fpr, fpr_t)
-
-#define VMSTATE_FPR_ARRAY(_f, _s, _n)                           \
-    VMSTATE_FPR_ARRAY_V(_f, _s, _n, 0)
-
-static VMStateField vmstate_fpu_fields[] = {
-    VMSTATE_FPR_ARRAY(fpr, CPUMIPSFPUContext, 32),
-    VMSTATE_UINT32(fcr0, CPUMIPSFPUContext),
-    VMSTATE_UINT32(fcr31, CPUMIPSFPUContext),
-    VMSTATE_END_OF_LIST()
-};
-
-const VMStateDescription vmstate_fpu = {
-    .name = "cpu/fpu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = vmstate_fpu_fields
-};
-
-const VMStateDescription vmstate_inactive_fpu = {
-    .name = "cpu/inactive_fpu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = vmstate_fpu_fields
-};
-
-/* TC state */
-
-static VMStateField vmstate_tc_fields[] = {
-    VMSTATE_UINTTL_ARRAY(gpr, TCState, 32),
-#ifdef TARGET_CHERI
-    VMSTATE_UINTTL(PCC._cr_cursor, TCState),
-#else
-    VMSTATE_UINTTL(PC, TCState),
-#endif
-    VMSTATE_UINTTL_ARRAY(HI, TCState, MIPS_DSP_ACC),
-    VMSTATE_UINTTL_ARRAY(LO, TCState, MIPS_DSP_ACC),
-    VMSTATE_UINTTL_ARRAY(ACX, TCState, MIPS_DSP_ACC),
-    VMSTATE_UINTTL(DSPControl, TCState),
-    VMSTATE_INT32(CP0_TCStatus, TCState),
-    VMSTATE_INT32(CP0_TCBind, TCState),
-    VMSTATE_UINTTL(CP0_TCHalt, TCState),
-    VMSTATE_UINTTL(CP0_TCContext, TCState),
-    VMSTATE_UINTTL(CP0_TCSchedule, TCState),
-    VMSTATE_UINTTL(CP0_TCScheFBack, TCState),
-    VMSTATE_INT32(CP0_Debug_tcstatus, TCState),
-    VMSTATE_UINTTL(CP0_UserLocal, TCState),
-    VMSTATE_INT32(msacsr, TCState),
-    VMSTATE_END_OF_LIST()
-};
-
-const VMStateDescription vmstate_tc = {
-    .name = "cpu/tc",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = vmstate_tc_fields
-};
-
-const VMStateDescription vmstate_inactive_tc = {
-    .name = "cpu/inactive_tc",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = vmstate_tc_fields
-};
-
-/* MVP state */
-
-const VMStateDescription vmstate_mvp = {
-    .name = "cpu/mvp",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT32(CP0_MVPControl, CPUMIPSMVPContext),
-        VMSTATE_INT32(CP0_MVPConf0, CPUMIPSMVPContext),
-        VMSTATE_INT32(CP0_MVPConf1, CPUMIPSMVPContext),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-/* TLB state */
-
-static int get_tlb(QEMUFile *f, void *pv, size_t size,
-                   const VMStateField *field)
-{
-    r4k_tlb_t *v = pv;
-    uint16_t flags;
-
-    qemu_get_betls(f, &v->VPN);
-    qemu_get_be32s(f, &v->PageMask);
-    qemu_get_be16s(f, &v->ASID);
-    qemu_get_be16s(f, &flags);
-    v->G = (flags >> 10) & 1;
-    v->C0 = (flags >> 7) & 3;
-    v->C1 = (flags >> 4) & 3;
-    v->V0 = (flags >> 3) & 1;
-    v->V1 = (flags >> 2) & 1;
-    v->D0 = (flags >> 1) & 1;
-    v->D1 = (flags >> 0) & 1;
-    v->EHINV = (flags >> 15) & 1;
-#if defined(TARGET_CHERI)
-    v->S1 = (flags >> 14) & 1;
-    v->S0 = (flags >> 13) & 1;
-    v->L1 = (flags >> 12) & 1;
-    v->L0 = (flags >> 11) & 1;
-#else
-    v->RI1 = (flags >> 14) & 1;
-    v->RI0 = (flags >> 13) & 1;
-    v->XI1 = (flags >> 12) & 1;
-    v->XI0 = (flags >> 11) & 1;
-#endif /* TARGET_CHERI */
-    qemu_get_be64s(f, &v->PFN[0]);
-    qemu_get_be64s(f, &v->PFN[1]);
-
-    return 0;
-}
-
-static int put_tlb(QEMUFile *f, void *pv, size_t size,
-                   const VMStateField *field, JSONWriter *vmdesc)
-{
-    r4k_tlb_t *v = pv;
-
-    uint16_t asid = v->ASID;
-    uint16_t flags = ((v->EHINV << 15) |
-#if defined(TARGET_CHERI)
-                      (v->S1 << 14) |
-                      (v->S0 << 13) |
-                      (v->L1 << 12) |
-                      (v->L0 << 11) |
-#else
-                      (v->RI1 << 14) |
-                      (v->RI0 << 13) |
-                      (v->XI1 << 12) |
-                      (v->XI0 << 11) |
-#endif /* TARGET_CHERI */
-                      (v->G << 10) |
-                      (v->C0 << 7) |
-                      (v->C1 << 4) |
-                      (v->V0 << 3) |
-                      (v->V1 << 2) |
-                      (v->D0 << 1) |
-                      (v->D1 << 0));
-
-    qemu_put_betls(f, &v->VPN);
-    qemu_put_be32s(f, &v->PageMask);
-    qemu_put_be16s(f, &asid);
-    qemu_put_be16s(f, &flags);
-    qemu_put_be64s(f, &v->PFN[0]);
-    qemu_put_be64s(f, &v->PFN[1]);
-
-    return 0;
-}
-
-const VMStateInfo vmstate_info_tlb = {
-    .name = "tlb_entry",
-    .get  = get_tlb,
-    .put  = put_tlb,
-};
-
-#define VMSTATE_TLB_ARRAY_V(_f, _s, _n, _v)                     \
-    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_tlb, r4k_tlb_t)
-
-#define VMSTATE_TLB_ARRAY(_f, _s, _n)                           \
-    VMSTATE_TLB_ARRAY_V(_f, _s, _n, 0)
-
-const VMStateDescription vmstate_tlb = {
-    .name = "cpu/tlb",
-    .version_id = 2,
-    .minimum_version_id = 2,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(nb_tlb, CPUMIPSTLBContext),
-        VMSTATE_UINT32(tlb_in_use, CPUMIPSTLBContext),
-        VMSTATE_TLB_ARRAY(mmu.r4k.tlb, CPUMIPSTLBContext, MIPS_TLB_MAX),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-/* MIPS CPU state */
-
-const VMStateDescription vmstate_mips_cpu = {
-    .name = "cpu",
-    .version_id = 20,
-    .minimum_version_id = 20,
-    .post_load = cpu_post_load,
-    .fields = (VMStateField[]) {
-        /* Active TC */
-        VMSTATE_STRUCT(env.active_tc, MIPSCPU, 1, vmstate_tc, TCState),
-
-        /* Active FPU */
-        VMSTATE_STRUCT(env.active_fpu, MIPSCPU, 1, vmstate_fpu,
-                       CPUMIPSFPUContext),
-
-        /* MVP */
-        VMSTATE_STRUCT_POINTER(env.mvp, MIPSCPU, vmstate_mvp,
-                               CPUMIPSMVPContext),
-
-        /* TLB */
-        VMSTATE_STRUCT_POINTER(env.tlb, MIPSCPU, vmstate_tlb,
-                               CPUMIPSTLBContext),
-
-        /* CPU metastate */
-        VMSTATE_UINT32(env.current_tc, MIPSCPU),
-        VMSTATE_UINT32(env.current_fpu, MIPSCPU),
-        VMSTATE_INT32(env.error_code, MIPSCPU),
-        VMSTATE_UINTTL(env.btarget, MIPSCPU),
-        VMSTATE_UINTTL(env.bcond, MIPSCPU),
-
-        /* Remaining CP0 registers */
-        VMSTATE_INT32(env.CP0_Index, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Random, MIPSCPU),
-        VMSTATE_INT32(env.CP0_VPEControl, MIPSCPU),
-        VMSTATE_INT32(env.CP0_VPEConf0, MIPSCPU),
-        VMSTATE_INT32(env.CP0_VPEConf1, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_YQMask, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_VPESchedule, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_VPEScheFBack, MIPSCPU),
-        VMSTATE_INT32(env.CP0_VPEOpt, MIPSCPU),
-        VMSTATE_UINT64(env.CP0_EntryLo0, MIPSCPU),
-        VMSTATE_UINT64(env.CP0_EntryLo1, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_Context, MIPSCPU),
-        VMSTATE_INT32(env.CP0_MemoryMapID, MIPSCPU),
-        VMSTATE_INT32(env.CP0_PageMask, MIPSCPU),
-        VMSTATE_INT32(env.CP0_PageGrain, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Wired, MIPSCPU),
-        VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSConf3, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSConf4, MIPSCPU),
-        VMSTATE_INT32(env.CP0_HWREna, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_BadVAddr, MIPSCPU),
-        VMSTATE_UINT32(env.CP0_BadInstr, MIPSCPU),
-        VMSTATE_UINT32(env.CP0_BadInstrP, MIPSCPU),
-        VMSTATE_UINT32(env.CP0_BadInstrX, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Count, MIPSCPU),
-        VMSTATE_UINT32(env.CP0_SAARI, MIPSCPU),
-        VMSTATE_UINT64_ARRAY(env.CP0_SAAR, MIPSCPU, 2),
-        VMSTATE_UINTTL(env.CP0_EntryHi, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Compare, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Status, MIPSCPU),
-        VMSTATE_INT32(env.CP0_IntCtl, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSCtl, MIPSCPU),
-        VMSTATE_INT32(env.CP0_SRSMap, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Cause, MIPSCPU),
-#ifndef TARGET_CHERI
-        // FIXME: would be nice to print for CHERI but needs direct field access
-        VMSTATE_UINTTL(env.CP0_EPC, MIPSCPU),
-#endif
-        VMSTATE_INT32(env.CP0_PRid, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_EBase, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config0, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config1, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config2, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config3, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config4, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config5, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config6, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Config7, MIPSCPU),
-        VMSTATE_UINT64(env.CP0_LLAddr, MIPSCPU),
-        VMSTATE_UINT64_ARRAY(env.CP0_MAAR, MIPSCPU, MIPS_MAAR_MAX),
-        VMSTATE_INT32(env.CP0_MAARI, MIPSCPU),
-        VMSTATE_UINTTL(env.lladdr, MIPSCPU),
-        VMSTATE_UINTTL_ARRAY(env.CP0_WatchLo, MIPSCPU, 8),
-        VMSTATE_UINT64_ARRAY(env.CP0_WatchHi, MIPSCPU, 8),
-        VMSTATE_UINTTL(env.CP0_XContext, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Framemask, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Debug, MIPSCPU),
-        VMSTATE_UINTTL(env.CP0_DEPC, MIPSCPU),
-        VMSTATE_INT32(env.CP0_Performance0, MIPSCPU),
-        VMSTATE_UINT64(env.CP0_TagLo, MIPSCPU),
-        VMSTATE_INT32(env.CP0_DataLo, MIPSCPU),
-        VMSTATE_INT32(env.CP0_TagHi, MIPSCPU),
-        VMSTATE_INT32(env.CP0_DataHi, MIPSCPU),
-#ifndef TARGET_CHERI
-        // FIXME: would be nice to print for CHERI but needs direct field access
-        VMSTATE_UINTTL(env.CP0_ErrorEPC, MIPSCPU),
-#endif
-        VMSTATE_INT32(env.CP0_DESAVE, MIPSCPU),
-        VMSTATE_UINTTL_ARRAY(env.CP0_KScratch, MIPSCPU, MIPS_KSCRATCH_NUM),
-
-        /* Inactive TC */
-        VMSTATE_STRUCT_ARRAY(env.tcs, MIPSCPU, MIPS_SHADOW_SET_MAX, 1,
-                             vmstate_inactive_tc, TCState),
-        VMSTATE_STRUCT_ARRAY(env.fpus, MIPSCPU, MIPS_FPU_MAX, 1,
-                             vmstate_inactive_fpu, CPUMIPSFPUContext),
-
-        VMSTATE_END_OF_LIST()
-    },
-};
diff --git a/target/mips/meson.build b/target/mips/meson.build
index da73468c504..d4146469c40 100644
--- a/target/mips/meson.build
+++ b/target/mips/meson.build
@@ -1,56 +1,28 @@
-gen = [
-  decodetree.process('mips32r6.decode', extra_args: '--static-decode=decode_mips32r6'),
-  decodetree.process('mips64r6.decode', extra_args: '--static-decode=decode_mips64r6'),
-  decodetree.process('msa32.decode', extra_args: '--static-decode=decode_msa32'),
-  decodetree.process('msa64.decode', extra_args: '--static-decode=decode_msa64'),
-  decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'),
-]
-
+mips_user_ss = ss.source_set()
+mips_softmmu_ss = ss.source_set()
 mips_ss = ss.source_set()
 mips_ss.add(files(
   'cpu.c',
+  'fpu.c',
   'gdbstub.c',
+  'msa.c',
 ))
-mips_tcg_ss = ss.source_set()
-mips_tcg_ss.add(gen)
-mips_tcg_ss.add(files(
-  'dsp_helper.c',
-  'fpu_helper.c',
-  'lmmi_helper.c',
-  'msa_helper.c',
-  'msa_translate.c',
-  'op_helper.c',
-  'op_helper_beri.c',
-  'rel6_translate.c',
-  'tlb_helper.c',
-  'translate.c',
-  'translate_addr_const.c',
-  'txx9_translate.c',
-))
-mips_ss.add(when: ['CONFIG_TCG', 'TARGET_MIPS64'], if_true: files(
-  'tx79_translate.c',
-))
-mips_tcg_ss.add(when: 'TARGET_MIPS64', if_false: files(
-  'mxu_translate.c',
-))
-
-mips_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
 
+mips_ss.add(files('op_helper_beri.c'))
 mips_ss.add(when: 'TARGET_CHERI', if_true: files('op_helper_cheri.c'))
-mips_ss.add(when: ['CONFIG_TCG', 'CONFIG_TCG_LOG_INSTR'], if_true: files('op_helper_log_instr.c'))
+mips_ss.add(when: ['CONFIG_TCG', 'CONFIG_TCG_LOG_INSTR'],
+  if_true: files('op_helper_log_instr.c'))
 
-mips_softmmu_ss = ss.source_set()
-mips_softmmu_ss.add(files(
-  'addr.c',
-  'cp0_timer.c',
-  'machine.c',
-))
-mips_softmmu_ss.add(when: 'CONFIG_TCG', if_true: files(
-  'cp0_helper.c',
-  'mips-semi.c',
-))
+if have_system
+  subdir('sysemu')
+endif
+
+if 'CONFIG_TCG' in config_all
+  subdir('tcg')
+endif
 
-mips_ss.add_all(when: 'CONFIG_TCG', if_true: [mips_tcg_ss])
+mips_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
 
 target_arch += {'mips': mips_ss}
 target_softmmu_arch += {'mips': mips_softmmu_ss}
+target_user_arch += {'mips': mips_user_ss}
diff --git a/target/mips/mips32r6.decode b/target/mips/mips32r6.decode
deleted file mode 100644
index 837c991edc5..00000000000
--- a/target/mips/mips32r6.decode
+++ /dev/null
@@ -1,36 +0,0 @@
-# MIPS32 Release 6 instruction set
-#
-# Copyright (C) 2020  Philippe Mathieu-Daudé
-#
-# SPDX-License-Identifier: LGPL-2.1-or-later
-#
-# Reference:
-#       MIPS Architecture for Programmers Volume II-A
-#       The MIPS32 Instruction Set Reference Manual, Revision 6.06
-#       (Document Number: MD00086-2B-MIPS32BIS-AFP-06.06)
-#
-
-&rtype              rs rt rd sa
-
-@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &rtype
-
-LSA                 000000 ..... ..... ..... 000 .. 000101  @lsa
-
-REMOVED             010011 ----- ----- ----- ----- ------   # COP1X (COP3)
-
-REMOVED             011100 ----- ----- ----- ----- ------   # SPECIAL2
-
-REMOVED             011111 ----- ----- ----------  011001   # LWLE
-REMOVED             011111 ----- ----- ----------  011010   # LWRE
-REMOVED             011111 ----- ----- ----------  100001   # SWLE
-REMOVED             011111 ----- ----- ----------  100010   # SWRE
-
-REMOVED             100010 ----- ----- ----------------     # LWL
-REMOVED             100110 ----- ----- ----------------     # LWR
-REMOVED             101010 ----- ----- ----------------     # SWL
-REMOVED             101110 ----- ----- ----------------     # SWR
-
-REMOVED             101111 ----- ----- ----------------     # CACHE
-REMOVED             110000 ----- ----- ----------------     # LL
-REMOVED             110011 ----- ----- ----------------     # PREF
-REMOVED             111000 ----- ----- ----------------     # SC
diff --git a/target/mips/mips64r6.decode b/target/mips/mips64r6.decode
deleted file mode 100644
index b58d8009ccd..00000000000
--- a/target/mips/mips64r6.decode
+++ /dev/null
@@ -1,27 +0,0 @@
-# MIPS64 Release 6 instruction set
-#
-# Copyright (C) 2020  Philippe Mathieu-Daudé
-#
-# SPDX-License-Identifier: LGPL-2.1-or-later
-#
-# Reference:
-#       MIPS Architecture for Programmers Volume II-A
-#       The MIPS64 Instruction Set Reference Manual, Revision 6.06
-#       (Document Number: MD00087-2B-MIPS64BIS-AFP-6.06)
-#
-
-&rtype              rs rt rd sa !extern
-
-&REMOVED            !extern
-
-@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &rtype
-
-DLSA                000000 ..... ..... ..... 000 .. 010101  @lsa
-
-REMOVED             011010 ----- ----- ----------------     # LDL
-REMOVED             011011 ----- ----- ----------------     # LDR
-REMOVED             101100 ----- ----- ----------------     # SDL
-REMOVED             101101 ----- ----- ----------------     # SDR
-
-REMOVED             110100 ----- ----- ----------------     # LLD
-REMOVED             111100 ----- ----- ----------------     # SCD
diff --git a/target/mips/msa.c b/target/mips/msa.c
new file mode 100644
index 00000000000..61f1a9a5936
--- /dev/null
+++ b/target/mips/msa.c
@@ -0,0 +1,60 @@
+/*
+ * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
+ *
+ * Copyright (c) 2014 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "fpu/softfloat.h"
+#include "fpu_helper.h"
+
+void msa_reset(CPUMIPSState *env)
+{
+    if (!ase_msa_available(env)) {
+        return;
+    }
+
+#ifdef CONFIG_USER_ONLY
+    /* MSA access enabled */
+    env->CP0_Config5 |= 1 << CP0C5_MSAEn;
+    env->CP0_Status |= (1 << CP0St_CU1) | (1 << CP0St_FR);
+#endif
+
+    /*
+     * MSA CSR:
+     * - non-signaling floating point exception mode off (NX bit is 0)
+     * - Cause, Enables, and Flags are all 0
+     * - round to nearest / ties to even (RM bits are 0)
+     */
+    env->active_tc.msacsr = 0;
+
+    restore_msa_fp_status(env);
+
+    /* tininess detected after rounding.*/
+    set_float_detect_tininess(float_tininess_after_rounding,
+                              &env->active_tc.msa_fp_status);
+
+    /* clear float_status exception flags */
+    set_float_exception_flags(0, &env->active_tc.msa_fp_status);
+
+    /* clear float_status nan mode */
+    set_default_nan_mode(0, &env->active_tc.msa_fp_status);
+
+    /* set proper signanling bit meaning ("1" means "quiet") */
+    set_snan_bit_is_one(0, &env->active_tc.msa_fp_status);
+}
diff --git a/target/mips/msa32.decode b/target/mips/msa32.decode
deleted file mode 100644
index ca200e373b1..00000000000
--- a/target/mips/msa32.decode
+++ /dev/null
@@ -1,29 +0,0 @@
-# MIPS SIMD Architecture Module instruction set
-#
-# Copyright (C) 2020  Philippe Mathieu-Daudé
-#
-# SPDX-License-Identifier: LGPL-2.1-or-later
-#
-# Reference:
-#       MIPS Architecture for Programmers Volume IV-j
-#       The MIPS32 SIMD Architecture Module, Revision 1.12
-#       (Document Number: MD00866-2B-MSA32-AFP-01.12)
-#
-
-&rtype              rs rt rd sa
-
-&msa_bz             df wt s16
-
-@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &rtype
-@bz                 ...... ... ..   wt:5 s16:16             &msa_bz df=3
-@bz_df              ...... ... df:2 wt:5 s16:16             &msa_bz
-
-LSA                 000000 ..... ..... ..... 000 .. 000101  @lsa
-
-BZ_V                010001 01011  ..... ................    @bz
-BNZ_V               010001 01111  ..... ................    @bz
-
-BZ_x                010001 110 .. ..... ................    @bz_df
-BNZ_x               010001 111 .. ..... ................    @bz_df
-
-MSA                 011110 --------------------------
diff --git a/target/mips/msa64.decode b/target/mips/msa64.decode
deleted file mode 100644
index d2442474d0b..00000000000
--- a/target/mips/msa64.decode
+++ /dev/null
@@ -1,17 +0,0 @@
-# MIPS SIMD Architecture Module instruction set
-#
-# Copyright (C) 2020  Philippe Mathieu-Daudé
-#
-# SPDX-License-Identifier: LGPL-2.1-or-later
-#
-# Reference:
-#       MIPS Architecture for Programmers Volume IV-j
-#       The MIPS64 SIMD Architecture Module, Revision 1.12
-#       (Document Number: MD00868-1D-MSA64-AFP-01.12)
-#
-
-&rtype              rs rt rd sa !extern
-
-@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &rtype
-
-DLSA                 000000 ..... ..... ..... 000 .. 010101 @lsa
diff --git a/target/mips/msa_translate.c b/target/mips/msa_translate.c
deleted file mode 100644
index ae6587edf69..00000000000
--- a/target/mips/msa_translate.c
+++ /dev/null
@@ -1,2286 +0,0 @@
-/*
- *  MIPS SIMD Architecture (MSA) translation routines
- *
- *  Copyright (c) 2004-2005 Jocelyn Mayer
- *  Copyright (c) 2006 Marius Groeger (FPU operations)
- *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
- *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
- *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
- *  Copyright (c) 2020 Philippe Mathieu-Daudé
- *
- * SPDX-License-Identifier: LGPL-2.1-or-later
- */
-#include "qemu/osdep.h"
-#include "tcg/tcg-op.h"
-#include "exec/helper-gen.h"
-#include "translate.h"
-#include "fpu_helper.h"
-#include "internal.h"
-
-/* Include the auto-generated decoder.  */
-#include "decode-msa32.c.inc"
-#include "decode-msa64.c.inc"
-
-#define OPC_MSA (0x1E << 26)
-
-#define MASK_MSA_MINOR(op)          (MASK_OP_MAJOR(op) | (op & 0x3F))
-enum {
-    OPC_MSA_I8_00   = 0x00 | OPC_MSA,
-    OPC_MSA_I8_01   = 0x01 | OPC_MSA,
-    OPC_MSA_I8_02   = 0x02 | OPC_MSA,
-    OPC_MSA_I5_06   = 0x06 | OPC_MSA,
-    OPC_MSA_I5_07   = 0x07 | OPC_MSA,
-    OPC_MSA_BIT_09  = 0x09 | OPC_MSA,
-    OPC_MSA_BIT_0A  = 0x0A | OPC_MSA,
-    OPC_MSA_3R_0D   = 0x0D | OPC_MSA,
-    OPC_MSA_3R_0E   = 0x0E | OPC_MSA,
-    OPC_MSA_3R_0F   = 0x0F | OPC_MSA,
-    OPC_MSA_3R_10   = 0x10 | OPC_MSA,
-    OPC_MSA_3R_11   = 0x11 | OPC_MSA,
-    OPC_MSA_3R_12   = 0x12 | OPC_MSA,
-    OPC_MSA_3R_13   = 0x13 | OPC_MSA,
-    OPC_MSA_3R_14   = 0x14 | OPC_MSA,
-    OPC_MSA_3R_15   = 0x15 | OPC_MSA,
-    OPC_MSA_ELM     = 0x19 | OPC_MSA,
-    OPC_MSA_3RF_1A  = 0x1A | OPC_MSA,
-    OPC_MSA_3RF_1B  = 0x1B | OPC_MSA,
-    OPC_MSA_3RF_1C  = 0x1C | OPC_MSA,
-    OPC_MSA_VEC     = 0x1E | OPC_MSA,
-
-    /* MI10 instruction */
-    OPC_LD_B        = (0x20) | OPC_MSA,
-    OPC_LD_H        = (0x21) | OPC_MSA,
-    OPC_LD_W        = (0x22) | OPC_MSA,
-    OPC_LD_D        = (0x23) | OPC_MSA,
-    OPC_ST_B        = (0x24) | OPC_MSA,
-    OPC_ST_H        = (0x25) | OPC_MSA,
-    OPC_ST_W        = (0x26) | OPC_MSA,
-    OPC_ST_D        = (0x27) | OPC_MSA,
-};
-
-enum {
-    /* I5 instruction df(bits 22..21) = _b, _h, _w, _d */
-    OPC_ADDVI_df    = (0x0 << 23) | OPC_MSA_I5_06,
-    OPC_CEQI_df     = (0x0 << 23) | OPC_MSA_I5_07,
-    OPC_SUBVI_df    = (0x1 << 23) | OPC_MSA_I5_06,
-    OPC_MAXI_S_df   = (0x2 << 23) | OPC_MSA_I5_06,
-    OPC_CLTI_S_df   = (0x2 << 23) | OPC_MSA_I5_07,
-    OPC_MAXI_U_df   = (0x3 << 23) | OPC_MSA_I5_06,
-    OPC_CLTI_U_df   = (0x3 << 23) | OPC_MSA_I5_07,
-    OPC_MINI_S_df   = (0x4 << 23) | OPC_MSA_I5_06,
-    OPC_CLEI_S_df   = (0x4 << 23) | OPC_MSA_I5_07,
-    OPC_MINI_U_df   = (0x5 << 23) | OPC_MSA_I5_06,
-    OPC_CLEI_U_df   = (0x5 << 23) | OPC_MSA_I5_07,
-    OPC_LDI_df      = (0x6 << 23) | OPC_MSA_I5_07,
-
-    /* I8 instruction */
-    OPC_ANDI_B      = (0x0 << 24) | OPC_MSA_I8_00,
-    OPC_BMNZI_B     = (0x0 << 24) | OPC_MSA_I8_01,
-    OPC_SHF_B       = (0x0 << 24) | OPC_MSA_I8_02,
-    OPC_ORI_B       = (0x1 << 24) | OPC_MSA_I8_00,
-    OPC_BMZI_B      = (0x1 << 24) | OPC_MSA_I8_01,
-    OPC_SHF_H       = (0x1 << 24) | OPC_MSA_I8_02,
-    OPC_NORI_B      = (0x2 << 24) | OPC_MSA_I8_00,
-    OPC_BSELI_B     = (0x2 << 24) | OPC_MSA_I8_01,
-    OPC_SHF_W       = (0x2 << 24) | OPC_MSA_I8_02,
-    OPC_XORI_B      = (0x3 << 24) | OPC_MSA_I8_00,
-
-    /* VEC/2R/2RF instruction */
-    OPC_AND_V       = (0x00 << 21) | OPC_MSA_VEC,
-    OPC_OR_V        = (0x01 << 21) | OPC_MSA_VEC,
-    OPC_NOR_V       = (0x02 << 21) | OPC_MSA_VEC,
-    OPC_XOR_V       = (0x03 << 21) | OPC_MSA_VEC,
-    OPC_BMNZ_V      = (0x04 << 21) | OPC_MSA_VEC,
-    OPC_BMZ_V       = (0x05 << 21) | OPC_MSA_VEC,
-    OPC_BSEL_V      = (0x06 << 21) | OPC_MSA_VEC,
-
-    OPC_MSA_2R      = (0x18 << 21) | OPC_MSA_VEC,
-    OPC_MSA_2RF     = (0x19 << 21) | OPC_MSA_VEC,
-
-    /* 2R instruction df(bits 17..16) = _b, _h, _w, _d */
-    OPC_FILL_df     = (0x00 << 18) | OPC_MSA_2R,
-    OPC_PCNT_df     = (0x01 << 18) | OPC_MSA_2R,
-    OPC_NLOC_df     = (0x02 << 18) | OPC_MSA_2R,
-    OPC_NLZC_df     = (0x03 << 18) | OPC_MSA_2R,
-
-    /* 2RF instruction df(bit 16) = _w, _d */
-    OPC_FCLASS_df   = (0x00 << 17) | OPC_MSA_2RF,
-    OPC_FTRUNC_S_df = (0x01 << 17) | OPC_MSA_2RF,
-    OPC_FTRUNC_U_df = (0x02 << 17) | OPC_MSA_2RF,
-    OPC_FSQRT_df    = (0x03 << 17) | OPC_MSA_2RF,
-    OPC_FRSQRT_df   = (0x04 << 17) | OPC_MSA_2RF,
-    OPC_FRCP_df     = (0x05 << 17) | OPC_MSA_2RF,
-    OPC_FRINT_df    = (0x06 << 17) | OPC_MSA_2RF,
-    OPC_FLOG2_df    = (0x07 << 17) | OPC_MSA_2RF,
-    OPC_FEXUPL_df   = (0x08 << 17) | OPC_MSA_2RF,
-    OPC_FEXUPR_df   = (0x09 << 17) | OPC_MSA_2RF,
-    OPC_FFQL_df     = (0x0A << 17) | OPC_MSA_2RF,
-    OPC_FFQR_df     = (0x0B << 17) | OPC_MSA_2RF,
-    OPC_FTINT_S_df  = (0x0C << 17) | OPC_MSA_2RF,
-    OPC_FTINT_U_df  = (0x0D << 17) | OPC_MSA_2RF,
-    OPC_FFINT_S_df  = (0x0E << 17) | OPC_MSA_2RF,
-    OPC_FFINT_U_df  = (0x0F << 17) | OPC_MSA_2RF,
-
-    /* 3R instruction df(bits 22..21) = _b, _h, _w, d */
-    OPC_SLL_df      = (0x0 << 23) | OPC_MSA_3R_0D,
-    OPC_ADDV_df     = (0x0 << 23) | OPC_MSA_3R_0E,
-    OPC_CEQ_df      = (0x0 << 23) | OPC_MSA_3R_0F,
-    OPC_ADD_A_df    = (0x0 << 23) | OPC_MSA_3R_10,
-    OPC_SUBS_S_df   = (0x0 << 23) | OPC_MSA_3R_11,
-    OPC_MULV_df     = (0x0 << 23) | OPC_MSA_3R_12,
-    OPC_DOTP_S_df   = (0x0 << 23) | OPC_MSA_3R_13,
-    OPC_SLD_df      = (0x0 << 23) | OPC_MSA_3R_14,
-    OPC_VSHF_df     = (0x0 << 23) | OPC_MSA_3R_15,
-    OPC_SRA_df      = (0x1 << 23) | OPC_MSA_3R_0D,
-    OPC_SUBV_df     = (0x1 << 23) | OPC_MSA_3R_0E,
-    OPC_ADDS_A_df   = (0x1 << 23) | OPC_MSA_3R_10,
-    OPC_SUBS_U_df   = (0x1 << 23) | OPC_MSA_3R_11,
-    OPC_MADDV_df    = (0x1 << 23) | OPC_MSA_3R_12,
-    OPC_DOTP_U_df   = (0x1 << 23) | OPC_MSA_3R_13,
-    OPC_SPLAT_df    = (0x1 << 23) | OPC_MSA_3R_14,
-    OPC_SRAR_df     = (0x1 << 23) | OPC_MSA_3R_15,
-    OPC_SRL_df      = (0x2 << 23) | OPC_MSA_3R_0D,
-    OPC_MAX_S_df    = (0x2 << 23) | OPC_MSA_3R_0E,
-    OPC_CLT_S_df    = (0x2 << 23) | OPC_MSA_3R_0F,
-    OPC_ADDS_S_df   = (0x2 << 23) | OPC_MSA_3R_10,
-    OPC_SUBSUS_U_df = (0x2 << 23) | OPC_MSA_3R_11,
-    OPC_MSUBV_df    = (0x2 << 23) | OPC_MSA_3R_12,
-    OPC_DPADD_S_df  = (0x2 << 23) | OPC_MSA_3R_13,
-    OPC_PCKEV_df    = (0x2 << 23) | OPC_MSA_3R_14,
-    OPC_SRLR_df     = (0x2 << 23) | OPC_MSA_3R_15,
-    OPC_BCLR_df     = (0x3 << 23) | OPC_MSA_3R_0D,
-    OPC_MAX_U_df    = (0x3 << 23) | OPC_MSA_3R_0E,
-    OPC_CLT_U_df    = (0x3 << 23) | OPC_MSA_3R_0F,
-    OPC_ADDS_U_df   = (0x3 << 23) | OPC_MSA_3R_10,
-    OPC_SUBSUU_S_df = (0x3 << 23) | OPC_MSA_3R_11,
-    OPC_DPADD_U_df  = (0x3 << 23) | OPC_MSA_3R_13,
-    OPC_PCKOD_df    = (0x3 << 23) | OPC_MSA_3R_14,
-    OPC_BSET_df     = (0x4 << 23) | OPC_MSA_3R_0D,
-    OPC_MIN_S_df    = (0x4 << 23) | OPC_MSA_3R_0E,
-    OPC_CLE_S_df    = (0x4 << 23) | OPC_MSA_3R_0F,
-    OPC_AVE_S_df    = (0x4 << 23) | OPC_MSA_3R_10,
-    OPC_ASUB_S_df   = (0x4 << 23) | OPC_MSA_3R_11,
-    OPC_DIV_S_df    = (0x4 << 23) | OPC_MSA_3R_12,
-    OPC_DPSUB_S_df  = (0x4 << 23) | OPC_MSA_3R_13,
-    OPC_ILVL_df     = (0x4 << 23) | OPC_MSA_3R_14,
-    OPC_HADD_S_df   = (0x4 << 23) | OPC_MSA_3R_15,
-    OPC_BNEG_df     = (0x5 << 23) | OPC_MSA_3R_0D,
-    OPC_MIN_U_df    = (0x5 << 23) | OPC_MSA_3R_0E,
-    OPC_CLE_U_df    = (0x5 << 23) | OPC_MSA_3R_0F,
-    OPC_AVE_U_df    = (0x5 << 23) | OPC_MSA_3R_10,
-    OPC_ASUB_U_df   = (0x5 << 23) | OPC_MSA_3R_11,
-    OPC_DIV_U_df    = (0x5 << 23) | OPC_MSA_3R_12,
-    OPC_DPSUB_U_df  = (0x5 << 23) | OPC_MSA_3R_13,
-    OPC_ILVR_df     = (0x5 << 23) | OPC_MSA_3R_14,
-    OPC_HADD_U_df   = (0x5 << 23) | OPC_MSA_3R_15,
-    OPC_BINSL_df    = (0x6 << 23) | OPC_MSA_3R_0D,
-    OPC_MAX_A_df    = (0x6 << 23) | OPC_MSA_3R_0E,
-    OPC_AVER_S_df   = (0x6 << 23) | OPC_MSA_3R_10,
-    OPC_MOD_S_df    = (0x6 << 23) | OPC_MSA_3R_12,
-    OPC_ILVEV_df    = (0x6 << 23) | OPC_MSA_3R_14,
-    OPC_HSUB_S_df   = (0x6 << 23) | OPC_MSA_3R_15,
-    OPC_BINSR_df    = (0x7 << 23) | OPC_MSA_3R_0D,
-    OPC_MIN_A_df    = (0x7 << 23) | OPC_MSA_3R_0E,
-    OPC_AVER_U_df   = (0x7 << 23) | OPC_MSA_3R_10,
-    OPC_MOD_U_df    = (0x7 << 23) | OPC_MSA_3R_12,
-    OPC_ILVOD_df    = (0x7 << 23) | OPC_MSA_3R_14,
-    OPC_HSUB_U_df   = (0x7 << 23) | OPC_MSA_3R_15,
-
-    /* ELM instructions df(bits 21..16) = _b, _h, _w, _d */
-    OPC_SLDI_df     = (0x0 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-    OPC_CTCMSA      = (0x0 << 22) | (0x3E << 16) | OPC_MSA_ELM,
-    OPC_SPLATI_df   = (0x1 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-    OPC_CFCMSA      = (0x1 << 22) | (0x3E << 16) | OPC_MSA_ELM,
-    OPC_COPY_S_df   = (0x2 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-    OPC_MOVE_V      = (0x2 << 22) | (0x3E << 16) | OPC_MSA_ELM,
-    OPC_COPY_U_df   = (0x3 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-    OPC_INSERT_df   = (0x4 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-    OPC_INSVE_df    = (0x5 << 22) | (0x00 << 16) | OPC_MSA_ELM,
-
-    /* 3RF instruction _df(bit 21) = _w, _d */
-    OPC_FCAF_df     = (0x0 << 22) | OPC_MSA_3RF_1A,
-    OPC_FADD_df     = (0x0 << 22) | OPC_MSA_3RF_1B,
-    OPC_FCUN_df     = (0x1 << 22) | OPC_MSA_3RF_1A,
-    OPC_FSUB_df     = (0x1 << 22) | OPC_MSA_3RF_1B,
-    OPC_FCOR_df     = (0x1 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCEQ_df     = (0x2 << 22) | OPC_MSA_3RF_1A,
-    OPC_FMUL_df     = (0x2 << 22) | OPC_MSA_3RF_1B,
-    OPC_FCUNE_df    = (0x2 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCUEQ_df    = (0x3 << 22) | OPC_MSA_3RF_1A,
-    OPC_FDIV_df     = (0x3 << 22) | OPC_MSA_3RF_1B,
-    OPC_FCNE_df     = (0x3 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCLT_df     = (0x4 << 22) | OPC_MSA_3RF_1A,
-    OPC_FMADD_df    = (0x4 << 22) | OPC_MSA_3RF_1B,
-    OPC_MUL_Q_df    = (0x4 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCULT_df    = (0x5 << 22) | OPC_MSA_3RF_1A,
-    OPC_FMSUB_df    = (0x5 << 22) | OPC_MSA_3RF_1B,
-    OPC_MADD_Q_df   = (0x5 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCLE_df     = (0x6 << 22) | OPC_MSA_3RF_1A,
-    OPC_MSUB_Q_df   = (0x6 << 22) | OPC_MSA_3RF_1C,
-    OPC_FCULE_df    = (0x7 << 22) | OPC_MSA_3RF_1A,
-    OPC_FEXP2_df    = (0x7 << 22) | OPC_MSA_3RF_1B,
-    OPC_FSAF_df     = (0x8 << 22) | OPC_MSA_3RF_1A,
-    OPC_FEXDO_df    = (0x8 << 22) | OPC_MSA_3RF_1B,
-    OPC_FSUN_df     = (0x9 << 22) | OPC_MSA_3RF_1A,
-    OPC_FSOR_df     = (0x9 << 22) | OPC_MSA_3RF_1C,
-    OPC_FSEQ_df     = (0xA << 22) | OPC_MSA_3RF_1A,
-    OPC_FTQ_df      = (0xA << 22) | OPC_MSA_3RF_1B,
-    OPC_FSUNE_df    = (0xA << 22) | OPC_MSA_3RF_1C,
-    OPC_FSUEQ_df    = (0xB << 22) | OPC_MSA_3RF_1A,
-    OPC_FSNE_df     = (0xB << 22) | OPC_MSA_3RF_1C,
-    OPC_FSLT_df     = (0xC << 22) | OPC_MSA_3RF_1A,
-    OPC_FMIN_df     = (0xC << 22) | OPC_MSA_3RF_1B,
-    OPC_MULR_Q_df   = (0xC << 22) | OPC_MSA_3RF_1C,
-    OPC_FSULT_df    = (0xD << 22) | OPC_MSA_3RF_1A,
-    OPC_FMIN_A_df   = (0xD << 22) | OPC_MSA_3RF_1B,
-    OPC_MADDR_Q_df  = (0xD << 22) | OPC_MSA_3RF_1C,
-    OPC_FSLE_df     = (0xE << 22) | OPC_MSA_3RF_1A,
-    OPC_FMAX_df     = (0xE << 22) | OPC_MSA_3RF_1B,
-    OPC_MSUBR_Q_df  = (0xE << 22) | OPC_MSA_3RF_1C,
-    OPC_FSULE_df    = (0xF << 22) | OPC_MSA_3RF_1A,
-    OPC_FMAX_A_df   = (0xF << 22) | OPC_MSA_3RF_1B,
-
-    /* BIT instruction df(bits 22..16) = _B _H _W _D */
-    OPC_SLLI_df     = (0x0 << 23) | OPC_MSA_BIT_09,
-    OPC_SAT_S_df    = (0x0 << 23) | OPC_MSA_BIT_0A,
-    OPC_SRAI_df     = (0x1 << 23) | OPC_MSA_BIT_09,
-    OPC_SAT_U_df    = (0x1 << 23) | OPC_MSA_BIT_0A,
-    OPC_SRLI_df     = (0x2 << 23) | OPC_MSA_BIT_09,
-    OPC_SRARI_df    = (0x2 << 23) | OPC_MSA_BIT_0A,
-    OPC_BCLRI_df    = (0x3 << 23) | OPC_MSA_BIT_09,
-    OPC_SRLRI_df    = (0x3 << 23) | OPC_MSA_BIT_0A,
-    OPC_BSETI_df    = (0x4 << 23) | OPC_MSA_BIT_09,
-    OPC_BNEGI_df    = (0x5 << 23) | OPC_MSA_BIT_09,
-    OPC_BINSLI_df   = (0x6 << 23) | OPC_MSA_BIT_09,
-    OPC_BINSRI_df   = (0x7 << 23) | OPC_MSA_BIT_09,
-};
-
-static const char * const msaregnames[] = {
-    "w0.d0",  "w0.d1",  "w1.d0",  "w1.d1",
-    "w2.d0",  "w2.d1",  "w3.d0",  "w3.d1",
-    "w4.d0",  "w4.d1",  "w5.d0",  "w5.d1",
-    "w6.d0",  "w6.d1",  "w7.d0",  "w7.d1",
-    "w8.d0",  "w8.d1",  "w9.d0",  "w9.d1",
-    "w10.d0", "w10.d1", "w11.d0", "w11.d1",
-    "w12.d0", "w12.d1", "w13.d0", "w13.d1",
-    "w14.d0", "w14.d1", "w15.d0", "w15.d1",
-    "w16.d0", "w16.d1", "w17.d0", "w17.d1",
-    "w18.d0", "w18.d1", "w19.d0", "w19.d1",
-    "w20.d0", "w20.d1", "w21.d0", "w21.d1",
-    "w22.d0", "w22.d1", "w23.d0", "w23.d1",
-    "w24.d0", "w24.d1", "w25.d0", "w25.d1",
-    "w26.d0", "w26.d1", "w27.d0", "w27.d1",
-    "w28.d0", "w28.d1", "w29.d0", "w29.d1",
-    "w30.d0", "w30.d1", "w31.d0", "w31.d1",
-};
-
-static TCGv_i64 msa_wr_d[64];
-
-void msa_translate_init(void)
-{
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
-
-        /*
-         * The MSA vector registers are mapped on the
-         * scalar floating-point unit (FPU) registers.
-         */
-        msa_wr_d[i * 2] = fpu_f64[i];
-        off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
-        msa_wr_d[i * 2 + 1] =
-                tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2 + 1]);
-    }
-}
-
-static inline int check_msa_access(DisasContext *ctx)
-{
-    if (unlikely((ctx->hflags & MIPS_HFLAG_FPU) &&
-                 !(ctx->hflags & MIPS_HFLAG_F64))) {
-        gen_reserved_instruction(ctx);
-        return 0;
-    }
-
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_MSA))) {
-        generate_exception_end(ctx, EXCP_MSADIS);
-        return 0;
-    }
-    return 1;
-}
-
-static void gen_check_zero_element(TCGv tresult, uint8_t df, uint8_t wt,
-                                   TCGCond cond)
-{
-    /* generates tcg ops to check if any element is 0 */
-    /* Note this function only works with MSA_WRLEN = 128 */
-    uint64_t eval_zero_or_big = 0;
-    uint64_t eval_big = 0;
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
-    switch (df) {
-    case DF_BYTE:
-        eval_zero_or_big = 0x0101010101010101ULL;
-        eval_big = 0x8080808080808080ULL;
-        break;
-    case DF_HALF:
-        eval_zero_or_big = 0x0001000100010001ULL;
-        eval_big = 0x8000800080008000ULL;
-        break;
-    case DF_WORD:
-        eval_zero_or_big = 0x0000000100000001ULL;
-        eval_big = 0x8000000080000000ULL;
-        break;
-    case DF_DOUBLE:
-        eval_zero_or_big = 0x0000000000000001ULL;
-        eval_big = 0x8000000000000000ULL;
-        break;
-    }
-    tcg_gen_subi_i64(t0, msa_wr_d[wt << 1], eval_zero_or_big);
-    tcg_gen_andc_i64(t0, t0, msa_wr_d[wt << 1]);
-    tcg_gen_andi_i64(t0, t0, eval_big);
-    tcg_gen_subi_i64(t1, msa_wr_d[(wt << 1) + 1], eval_zero_or_big);
-    tcg_gen_andc_i64(t1, t1, msa_wr_d[(wt << 1) + 1]);
-    tcg_gen_andi_i64(t1, t1, eval_big);
-    tcg_gen_or_i64(t0, t0, t1);
-    /* if all bits are zero then all elements are not zero */
-    /* if some bit is non-zero then some element is zero */
-    tcg_gen_setcondi_i64(cond, t0, t0, 0);
-    tcg_gen_trunc_i64_tl(tresult, t0);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-static bool gen_msa_BxZ_V(DisasContext *ctx, int wt, int s16, TCGCond cond)
-{
-    TCGv_i64 t0;
-
-    check_msa_access(ctx);
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        gen_reserved_instruction(ctx);
-        return true;
-    }
-    t0 = tcg_temp_new_i64();
-    tcg_gen_or_i64(t0, msa_wr_d[wt << 1], msa_wr_d[(wt << 1) + 1]);
-    tcg_gen_setcondi_i64(cond, t0, t0, 0);
-    tcg_gen_trunc_i64_tl(bcond, t0);
-    tcg_temp_free_i64(t0);
-
-    ctx->btarget = ctx->base.pc_next + (s16 << 2) + 4;
-
-    ctx->hflags |= MIPS_HFLAG_BC;
-    ctx->hflags |= MIPS_HFLAG_BDS32;
-
-    return true;
-}
-
-static bool trans_BZ_V(DisasContext *ctx, arg_msa_bz *a)
-{
-    return gen_msa_BxZ_V(ctx, a->wt, a->s16, TCG_COND_EQ);
-}
-
-static bool trans_BNZ_V(DisasContext *ctx, arg_msa_bz *a)
-{
-    return gen_msa_BxZ_V(ctx, a->wt, a->s16, TCG_COND_NE);
-}
-
-static bool gen_msa_BxZ(DisasContext *ctx, int df, int wt, int s16, bool if_not)
-{
-    check_msa_access(ctx);
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        gen_reserved_instruction(ctx);
-        return true;
-    }
-
-    gen_check_zero_element(bcond, df, wt, if_not ? TCG_COND_EQ : TCG_COND_NE);
-
-    ctx->btarget = ctx->base.pc_next + (s16 << 2) + 4;
-    ctx->hflags |= MIPS_HFLAG_BC;
-    ctx->hflags |= MIPS_HFLAG_BDS32;
-
-    return true;
-}
-
-static bool trans_BZ_x(DisasContext *ctx, arg_msa_bz *a)
-{
-    return gen_msa_BxZ(ctx, a->df, a->wt, a->s16, false);
-}
-
-static bool trans_BNZ_x(DisasContext *ctx, arg_msa_bz *a)
-{
-    return gen_msa_BxZ(ctx, a->df, a->wt, a->s16, true);
-}
-
-static void gen_msa_i8(DisasContext *ctx)
-{
-#define MASK_MSA_I8(op)    (MASK_MSA_MINOR(op) | (op & (0x03 << 24)))
-    uint8_t i8 = (ctx->opcode >> 16) & 0xff;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 ti8 = tcg_const_i32(i8);
-
-    switch (MASK_MSA_I8(ctx->opcode)) {
-    case OPC_ANDI_B:
-        gen_helper_msa_andi_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_ORI_B:
-        gen_helper_msa_ori_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_NORI_B:
-        gen_helper_msa_nori_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_XORI_B:
-        gen_helper_msa_xori_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_BMNZI_B:
-        gen_helper_msa_bmnzi_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_BMZI_B:
-        gen_helper_msa_bmzi_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_BSELI_B:
-        gen_helper_msa_bseli_b(cpu_env, twd, tws, ti8);
-        break;
-    case OPC_SHF_B:
-    case OPC_SHF_H:
-    case OPC_SHF_W:
-        {
-            uint8_t df = (ctx->opcode >> 24) & 0x3;
-            if (df == DF_DOUBLE) {
-                gen_reserved_instruction(ctx);
-            } else {
-                TCGv_i32 tdf = tcg_const_i32(df);
-                gen_helper_msa_shf_df(cpu_env, tdf, twd, tws, ti8);
-                tcg_temp_free_i32(tdf);
-            }
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(ti8);
-}
-
-static void gen_msa_i5(DisasContext *ctx)
-{
-#define MASK_MSA_I5(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
-    uint8_t df = (ctx->opcode >> 21) & 0x3;
-    int8_t s5 = (int8_t) sextract32(ctx->opcode, 16, 5);
-    uint8_t u5 = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 tdf = tcg_const_i32(df);
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 timm = tcg_temp_new_i32();
-    tcg_gen_movi_i32(timm, u5);
-
-    switch (MASK_MSA_I5(ctx->opcode)) {
-    case OPC_ADDVI_df:
-        gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_SUBVI_df:
-        gen_helper_msa_subvi_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_MAXI_S_df:
-        tcg_gen_movi_i32(timm, s5);
-        gen_helper_msa_maxi_s_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_MAXI_U_df:
-        gen_helper_msa_maxi_u_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_MINI_S_df:
-        tcg_gen_movi_i32(timm, s5);
-        gen_helper_msa_mini_s_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_MINI_U_df:
-        gen_helper_msa_mini_u_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_CEQI_df:
-        tcg_gen_movi_i32(timm, s5);
-        gen_helper_msa_ceqi_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_CLTI_S_df:
-        tcg_gen_movi_i32(timm, s5);
-        gen_helper_msa_clti_s_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_CLTI_U_df:
-        gen_helper_msa_clti_u_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_CLEI_S_df:
-        tcg_gen_movi_i32(timm, s5);
-        gen_helper_msa_clei_s_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_CLEI_U_df:
-        gen_helper_msa_clei_u_df(cpu_env, tdf, twd, tws, timm);
-        break;
-    case OPC_LDI_df:
-        {
-            int32_t s10 = sextract32(ctx->opcode, 11, 10);
-            tcg_gen_movi_i32(timm, s10);
-            gen_helper_msa_ldi_df(cpu_env, tdf, twd, timm);
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(tdf);
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(timm);
-}
-
-static void gen_msa_bit(DisasContext *ctx)
-{
-#define MASK_MSA_BIT(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
-    uint8_t dfm = (ctx->opcode >> 16) & 0x7f;
-    uint32_t df = 0, m = 0;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 tdf;
-    TCGv_i32 tm;
-    TCGv_i32 twd;
-    TCGv_i32 tws;
-
-    if ((dfm & 0x40) == 0x00) {
-        m = dfm & 0x3f;
-        df = DF_DOUBLE;
-    } else if ((dfm & 0x60) == 0x40) {
-        m = dfm & 0x1f;
-        df = DF_WORD;
-    } else if ((dfm & 0x70) == 0x60) {
-        m = dfm & 0x0f;
-        df = DF_HALF;
-    } else if ((dfm & 0x78) == 0x70) {
-        m = dfm & 0x7;
-        df = DF_BYTE;
-    } else {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    tdf = tcg_const_i32(df);
-    tm  = tcg_const_i32(m);
-    twd = tcg_const_i32(wd);
-    tws = tcg_const_i32(ws);
-
-    switch (MASK_MSA_BIT(ctx->opcode)) {
-    case OPC_SLLI_df:
-        gen_helper_msa_slli_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SRAI_df:
-        gen_helper_msa_srai_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SRLI_df:
-        gen_helper_msa_srli_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_BCLRI_df:
-        gen_helper_msa_bclri_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_BSETI_df:
-        gen_helper_msa_bseti_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_BNEGI_df:
-        gen_helper_msa_bnegi_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_BINSLI_df:
-        gen_helper_msa_binsli_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_BINSRI_df:
-        gen_helper_msa_binsri_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SAT_S_df:
-        gen_helper_msa_sat_s_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SAT_U_df:
-        gen_helper_msa_sat_u_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SRARI_df:
-        gen_helper_msa_srari_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    case OPC_SRLRI_df:
-        gen_helper_msa_srlri_df(cpu_env, tdf, twd, tws, tm);
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(tdf);
-    tcg_temp_free_i32(tm);
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-}
-
-static void gen_msa_3r(DisasContext *ctx)
-{
-#define MASK_MSA_3R(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
-    uint8_t df = (ctx->opcode >> 21) & 0x3;
-    uint8_t wt = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 tdf = tcg_const_i32(df);
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twt = tcg_const_i32(wt);
-
-    switch (MASK_MSA_3R(ctx->opcode)) {
-    case OPC_BINSL_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_binsl_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_binsl_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_binsl_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_binsl_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_BINSR_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_binsr_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_binsr_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_binsr_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_binsr_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_BCLR_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_bclr_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_bclr_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_bclr_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_bclr_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_BNEG_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_bneg_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_bneg_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_bneg_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_bneg_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_BSET_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_bset_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_bset_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_bset_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_bset_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ADD_A_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_add_a_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_add_a_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_add_a_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_add_a_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ADDS_A_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_adds_a_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_adds_a_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_adds_a_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_adds_a_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ADDS_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_adds_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_adds_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_adds_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_adds_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ADDS_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_adds_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_adds_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_adds_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_adds_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ADDV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_addv_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_addv_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_addv_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_addv_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_AVE_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ave_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ave_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ave_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ave_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_AVE_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ave_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ave_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ave_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ave_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_AVER_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_aver_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_aver_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_aver_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_aver_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_AVER_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_aver_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_aver_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_aver_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_aver_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_CEQ_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ceq_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ceq_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ceq_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ceq_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_CLE_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_cle_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_cle_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_cle_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_cle_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_CLE_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_cle_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_cle_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_cle_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_cle_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_CLT_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_clt_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_clt_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_clt_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_clt_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_CLT_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_clt_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_clt_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_clt_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_clt_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_DIV_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_div_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_div_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_div_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_div_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_DIV_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_div_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_div_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_div_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_div_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MAX_A_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_max_a_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_max_a_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_max_a_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_max_a_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MAX_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_max_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_max_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_max_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_max_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MAX_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_max_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_max_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_max_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_max_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MIN_A_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_min_a_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_min_a_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_min_a_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_min_a_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MIN_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_min_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_min_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_min_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_min_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MIN_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_min_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_min_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_min_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_min_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MOD_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_mod_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_mod_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_mod_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_mod_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MOD_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_mod_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_mod_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_mod_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_mod_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MADDV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_maddv_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_maddv_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_maddv_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_maddv_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MSUBV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_msubv_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_msubv_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_msubv_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_msubv_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ASUB_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_asub_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_asub_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_asub_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_asub_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ASUB_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_asub_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_asub_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_asub_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_asub_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ILVEV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ilvev_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ilvev_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ilvev_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ilvev_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ILVOD_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ilvod_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ilvod_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ilvod_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ilvod_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ILVL_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ilvl_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ilvl_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ilvl_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ilvl_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_ILVR_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_ilvr_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_ilvr_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_ilvr_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_ilvr_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_PCKEV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_pckev_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_pckev_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_pckev_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_pckev_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_PCKOD_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_pckod_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_pckod_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_pckod_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_pckod_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SLL_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_sll_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_sll_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_sll_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_sll_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SRA_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_sra_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_sra_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_sra_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_sra_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SRAR_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_srar_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_srar_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_srar_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_srar_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SRL_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_srl_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_srl_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_srl_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_srl_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SRLR_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_srlr_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_srlr_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_srlr_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_srlr_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SUBS_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_subs_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_subs_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_subs_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_subs_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_MULV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_mulv_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_mulv_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_mulv_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_mulv_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SLD_df:
-        gen_helper_msa_sld_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_VSHF_df:
-        gen_helper_msa_vshf_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_SUBV_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_subv_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_subv_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_subv_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_subv_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SUBS_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_subs_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_subs_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_subs_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_subs_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SPLAT_df:
-        gen_helper_msa_splat_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_SUBSUS_U_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_subsus_u_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_subsus_u_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_subsus_u_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_subsus_u_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-    case OPC_SUBSUU_S_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_subsuu_s_b(cpu_env, twd, tws, twt);
-            break;
-        case DF_HALF:
-            gen_helper_msa_subsuu_s_h(cpu_env, twd, tws, twt);
-            break;
-        case DF_WORD:
-            gen_helper_msa_subsuu_s_w(cpu_env, twd, tws, twt);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_subsuu_s_d(cpu_env, twd, tws, twt);
-            break;
-        }
-        break;
-
-    case OPC_DOTP_S_df:
-    case OPC_DOTP_U_df:
-    case OPC_DPADD_S_df:
-    case OPC_DPADD_U_df:
-    case OPC_DPSUB_S_df:
-    case OPC_HADD_S_df:
-    case OPC_DPSUB_U_df:
-    case OPC_HADD_U_df:
-    case OPC_HSUB_S_df:
-    case OPC_HSUB_U_df:
-        if (df == DF_BYTE) {
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        switch (MASK_MSA_3R(ctx->opcode)) {
-        case OPC_HADD_S_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_hadd_s_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_hadd_s_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_hadd_s_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_HADD_U_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_hadd_u_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_hadd_u_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_hadd_u_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_HSUB_S_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_hsub_s_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_hsub_s_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_hsub_s_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_HSUB_U_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_hsub_u_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_hsub_u_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_hsub_u_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DOTP_S_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dotp_s_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dotp_s_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dotp_s_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DOTP_U_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dotp_u_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dotp_u_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dotp_u_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DPADD_S_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dpadd_s_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dpadd_s_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dpadd_s_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DPADD_U_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dpadd_u_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dpadd_u_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dpadd_u_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DPSUB_S_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dpsub_s_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dpsub_s_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dpsub_s_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        case OPC_DPSUB_U_df:
-            switch (df) {
-            case DF_HALF:
-                gen_helper_msa_dpsub_u_h(cpu_env, twd, tws, twt);
-                break;
-            case DF_WORD:
-                gen_helper_msa_dpsub_u_w(cpu_env, twd, tws, twt);
-                break;
-            case DF_DOUBLE:
-                gen_helper_msa_dpsub_u_d(cpu_env, twd, tws, twt);
-                break;
-            }
-            break;
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(twt);
-    tcg_temp_free_i32(tdf);
-}
-
-static void gen_msa_elm_3e(DisasContext *ctx)
-{
-#define MASK_MSA_ELM_DF3E(op)   (MASK_MSA_MINOR(op) | (op & (0x3FF << 16)))
-    uint8_t source = (ctx->opcode >> 11) & 0x1f;
-    uint8_t dest = (ctx->opcode >> 6) & 0x1f;
-    TCGv telm = tcg_temp_new();
-    TCGv_i32 tsr = tcg_const_i32(source);
-    TCGv_i32 tdt = tcg_const_i32(dest);
-
-    switch (MASK_MSA_ELM_DF3E(ctx->opcode)) {
-    case OPC_CTCMSA:
-        gen_load_gpr(telm, source);
-        gen_helper_msa_ctcmsa(cpu_env, telm, tdt);
-        break;
-    case OPC_CFCMSA:
-        gen_helper_msa_cfcmsa(telm, cpu_env, tsr);
-        gen_store_gpr(telm, dest);
-        break;
-    case OPC_MOVE_V:
-        gen_helper_msa_move_v(cpu_env, tdt, tsr);
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free(telm);
-    tcg_temp_free_i32(tdt);
-    tcg_temp_free_i32(tsr);
-}
-
-static void gen_msa_elm_df(DisasContext *ctx, uint32_t df, uint32_t n)
-{
-#define MASK_MSA_ELM(op)    (MASK_MSA_MINOR(op) | (op & (0xf << 22)))
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tn  = tcg_const_i32(n);
-    TCGv_i32 tdf = tcg_const_i32(df);
-
-    switch (MASK_MSA_ELM(ctx->opcode)) {
-    case OPC_SLDI_df:
-        gen_helper_msa_sldi_df(cpu_env, tdf, twd, tws, tn);
-        break;
-    case OPC_SPLATI_df:
-        gen_helper_msa_splati_df(cpu_env, tdf, twd, tws, tn);
-        break;
-    case OPC_INSVE_df:
-        gen_helper_msa_insve_df(cpu_env, tdf, twd, tws, tn);
-        break;
-    case OPC_COPY_S_df:
-    case OPC_COPY_U_df:
-    case OPC_INSERT_df:
-#if !defined(TARGET_MIPS64)
-        /* Double format valid only for MIPS64 */
-        if (df == DF_DOUBLE) {
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        if ((MASK_MSA_ELM(ctx->opcode) == OPC_COPY_U_df) &&
-              (df == DF_WORD)) {
-            gen_reserved_instruction(ctx);
-            break;
-        }
-#endif
-        switch (MASK_MSA_ELM(ctx->opcode)) {
-        case OPC_COPY_S_df:
-            if (likely(wd != 0)) {
-                switch (df) {
-                case DF_BYTE:
-                    gen_helper_msa_copy_s_b(cpu_env, twd, tws, tn);
-                    break;
-                case DF_HALF:
-                    gen_helper_msa_copy_s_h(cpu_env, twd, tws, tn);
-                    break;
-                case DF_WORD:
-                    gen_helper_msa_copy_s_w(cpu_env, twd, tws, tn);
-                    break;
-#if defined(TARGET_MIPS64)
-                case DF_DOUBLE:
-                    gen_helper_msa_copy_s_d(cpu_env, twd, tws, tn);
-                    break;
-#endif
-                default:
-                    assert(0);
-                }
-            }
-            break;
-        case OPC_COPY_U_df:
-            if (likely(wd != 0)) {
-                switch (df) {
-                case DF_BYTE:
-                    gen_helper_msa_copy_u_b(cpu_env, twd, tws, tn);
-                    break;
-                case DF_HALF:
-                    gen_helper_msa_copy_u_h(cpu_env, twd, tws, tn);
-                    break;
-#if defined(TARGET_MIPS64)
-                case DF_WORD:
-                    gen_helper_msa_copy_u_w(cpu_env, twd, tws, tn);
-                    break;
-#endif
-                default:
-                    assert(0);
-                }
-            }
-            break;
-        case OPC_INSERT_df:
-            switch (df) {
-            case DF_BYTE:
-                gen_helper_msa_insert_b(cpu_env, twd, tws, tn);
-                break;
-            case DF_HALF:
-                gen_helper_msa_insert_h(cpu_env, twd, tws, tn);
-                break;
-            case DF_WORD:
-                gen_helper_msa_insert_w(cpu_env, twd, tws, tn);
-                break;
-#if defined(TARGET_MIPS64)
-            case DF_DOUBLE:
-                gen_helper_msa_insert_d(cpu_env, twd, tws, tn);
-                break;
-#endif
-            default:
-                assert(0);
-            }
-            break;
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-    }
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(tn);
-    tcg_temp_free_i32(tdf);
-}
-
-static void gen_msa_elm(DisasContext *ctx)
-{
-    uint8_t dfn = (ctx->opcode >> 16) & 0x3f;
-    uint32_t df = 0, n = 0;
-
-    if ((dfn & 0x30) == 0x00) {
-        n = dfn & 0x0f;
-        df = DF_BYTE;
-    } else if ((dfn & 0x38) == 0x20) {
-        n = dfn & 0x07;
-        df = DF_HALF;
-    } else if ((dfn & 0x3c) == 0x30) {
-        n = dfn & 0x03;
-        df = DF_WORD;
-    } else if ((dfn & 0x3e) == 0x38) {
-        n = dfn & 0x01;
-        df = DF_DOUBLE;
-    } else if (dfn == 0x3E) {
-        /* CTCMSA, CFCMSA, MOVE.V */
-        gen_msa_elm_3e(ctx);
-        return;
-    } else {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    gen_msa_elm_df(ctx, df, n);
-}
-
-static void gen_msa_3rf(DisasContext *ctx)
-{
-#define MASK_MSA_3RF(op)    (MASK_MSA_MINOR(op) | (op & (0xf << 22)))
-    uint8_t df = (ctx->opcode >> 21) & 0x1;
-    uint8_t wt = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twt = tcg_const_i32(wt);
-    TCGv_i32 tdf = tcg_temp_new_i32();
-
-    /* adjust df value for floating-point instruction */
-    tcg_gen_movi_i32(tdf, df + 2);
-
-    switch (MASK_MSA_3RF(ctx->opcode)) {
-    case OPC_FCAF_df:
-        gen_helper_msa_fcaf_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FADD_df:
-        gen_helper_msa_fadd_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCUN_df:
-        gen_helper_msa_fcun_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSUB_df:
-        gen_helper_msa_fsub_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCOR_df:
-        gen_helper_msa_fcor_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCEQ_df:
-        gen_helper_msa_fceq_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMUL_df:
-        gen_helper_msa_fmul_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCUNE_df:
-        gen_helper_msa_fcune_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCUEQ_df:
-        gen_helper_msa_fcueq_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FDIV_df:
-        gen_helper_msa_fdiv_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCNE_df:
-        gen_helper_msa_fcne_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCLT_df:
-        gen_helper_msa_fclt_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMADD_df:
-        gen_helper_msa_fmadd_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MUL_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_mul_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCULT_df:
-        gen_helper_msa_fcult_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMSUB_df:
-        gen_helper_msa_fmsub_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MADD_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_madd_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCLE_df:
-        gen_helper_msa_fcle_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MSUB_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_msub_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FCULE_df:
-        gen_helper_msa_fcule_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FEXP2_df:
-        gen_helper_msa_fexp2_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSAF_df:
-        gen_helper_msa_fsaf_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FEXDO_df:
-        gen_helper_msa_fexdo_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSUN_df:
-        gen_helper_msa_fsun_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSOR_df:
-        gen_helper_msa_fsor_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSEQ_df:
-        gen_helper_msa_fseq_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FTQ_df:
-        gen_helper_msa_ftq_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSUNE_df:
-        gen_helper_msa_fsune_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSUEQ_df:
-        gen_helper_msa_fsueq_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSNE_df:
-        gen_helper_msa_fsne_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSLT_df:
-        gen_helper_msa_fslt_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMIN_df:
-        gen_helper_msa_fmin_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MULR_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_mulr_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSULT_df:
-        gen_helper_msa_fsult_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMIN_A_df:
-        gen_helper_msa_fmin_a_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MADDR_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_maddr_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSLE_df:
-        gen_helper_msa_fsle_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMAX_df:
-        gen_helper_msa_fmax_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_MSUBR_Q_df:
-        tcg_gen_movi_i32(tdf, df + 1);
-        gen_helper_msa_msubr_q_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FSULE_df:
-        gen_helper_msa_fsule_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    case OPC_FMAX_A_df:
-        gen_helper_msa_fmax_a_df(cpu_env, tdf, twd, tws, twt);
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(twt);
-    tcg_temp_free_i32(tdf);
-}
-
-static void gen_msa_2r(DisasContext *ctx)
-{
-#define MASK_MSA_2R(op)     (MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
-                            (op & (0x7 << 18)))
-    uint8_t wt = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-    uint8_t df = (ctx->opcode >> 16) & 0x3;
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twt = tcg_const_i32(wt);
-    TCGv_i32 tdf = tcg_const_i32(df);
-
-    switch (MASK_MSA_2R(ctx->opcode)) {
-    case OPC_FILL_df:
-#if !defined(TARGET_MIPS64)
-        /* Double format valid only for MIPS64 */
-        if (df == DF_DOUBLE) {
-            gen_reserved_instruction(ctx);
-            break;
-        }
-#endif
-        gen_helper_msa_fill_df(cpu_env, tdf, twd, tws); /* trs */
-        break;
-    case OPC_NLOC_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_nloc_b(cpu_env, twd, tws);
-            break;
-        case DF_HALF:
-            gen_helper_msa_nloc_h(cpu_env, twd, tws);
-            break;
-        case DF_WORD:
-            gen_helper_msa_nloc_w(cpu_env, twd, tws);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_nloc_d(cpu_env, twd, tws);
-            break;
-        }
-        break;
-    case OPC_NLZC_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_nlzc_b(cpu_env, twd, tws);
-            break;
-        case DF_HALF:
-            gen_helper_msa_nlzc_h(cpu_env, twd, tws);
-            break;
-        case DF_WORD:
-            gen_helper_msa_nlzc_w(cpu_env, twd, tws);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_nlzc_d(cpu_env, twd, tws);
-            break;
-        }
-        break;
-    case OPC_PCNT_df:
-        switch (df) {
-        case DF_BYTE:
-            gen_helper_msa_pcnt_b(cpu_env, twd, tws);
-            break;
-        case DF_HALF:
-            gen_helper_msa_pcnt_h(cpu_env, twd, tws);
-            break;
-        case DF_WORD:
-            gen_helper_msa_pcnt_w(cpu_env, twd, tws);
-            break;
-        case DF_DOUBLE:
-            gen_helper_msa_pcnt_d(cpu_env, twd, tws);
-            break;
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(twt);
-    tcg_temp_free_i32(tdf);
-}
-
-static void gen_msa_2rf(DisasContext *ctx)
-{
-#define MASK_MSA_2RF(op)    (MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
-                            (op & (0xf << 17)))
-    uint8_t wt = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-    uint8_t df = (ctx->opcode >> 16) & 0x1;
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twt = tcg_const_i32(wt);
-    /* adjust df value for floating-point instruction */
-    TCGv_i32 tdf = tcg_const_i32(df + 2);
-
-    switch (MASK_MSA_2RF(ctx->opcode)) {
-    case OPC_FCLASS_df:
-        gen_helper_msa_fclass_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FTRUNC_S_df:
-        gen_helper_msa_ftrunc_s_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FTRUNC_U_df:
-        gen_helper_msa_ftrunc_u_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FSQRT_df:
-        gen_helper_msa_fsqrt_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FRSQRT_df:
-        gen_helper_msa_frsqrt_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FRCP_df:
-        gen_helper_msa_frcp_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FRINT_df:
-        gen_helper_msa_frint_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FLOG2_df:
-        gen_helper_msa_flog2_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FEXUPL_df:
-        gen_helper_msa_fexupl_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FEXUPR_df:
-        gen_helper_msa_fexupr_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FFQL_df:
-        gen_helper_msa_ffql_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FFQR_df:
-        gen_helper_msa_ffqr_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FTINT_S_df:
-        gen_helper_msa_ftint_s_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FTINT_U_df:
-        gen_helper_msa_ftint_u_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FFINT_S_df:
-        gen_helper_msa_ffint_s_df(cpu_env, tdf, twd, tws);
-        break;
-    case OPC_FFINT_U_df:
-        gen_helper_msa_ffint_u_df(cpu_env, tdf, twd, tws);
-        break;
-    }
-
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(twt);
-    tcg_temp_free_i32(tdf);
-}
-
-static void gen_msa_vec_v(DisasContext *ctx)
-{
-#define MASK_MSA_VEC(op)    (MASK_MSA_MINOR(op) | (op & (0x1f << 21)))
-    uint8_t wt = (ctx->opcode >> 16) & 0x1f;
-    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
-    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-    TCGv_i32 twd = tcg_const_i32(wd);
-    TCGv_i32 tws = tcg_const_i32(ws);
-    TCGv_i32 twt = tcg_const_i32(wt);
-
-    switch (MASK_MSA_VEC(ctx->opcode)) {
-    case OPC_AND_V:
-        gen_helper_msa_and_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_OR_V:
-        gen_helper_msa_or_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_NOR_V:
-        gen_helper_msa_nor_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_XOR_V:
-        gen_helper_msa_xor_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_BMNZ_V:
-        gen_helper_msa_bmnz_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_BMZ_V:
-        gen_helper_msa_bmz_v(cpu_env, twd, tws, twt);
-        break;
-    case OPC_BSEL_V:
-        gen_helper_msa_bsel_v(cpu_env, twd, tws, twt);
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(twd);
-    tcg_temp_free_i32(tws);
-    tcg_temp_free_i32(twt);
-}
-
-static void gen_msa_vec(DisasContext *ctx)
-{
-    switch (MASK_MSA_VEC(ctx->opcode)) {
-    case OPC_AND_V:
-    case OPC_OR_V:
-    case OPC_NOR_V:
-    case OPC_XOR_V:
-    case OPC_BMNZ_V:
-    case OPC_BMZ_V:
-    case OPC_BSEL_V:
-        gen_msa_vec_v(ctx);
-        break;
-    case OPC_MSA_2R:
-        gen_msa_2r(ctx);
-        break;
-    case OPC_MSA_2RF:
-        gen_msa_2rf(ctx);
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void gen_msa(DisasContext *ctx)
-{
-    uint32_t opcode = ctx->opcode;
-
-    check_msa_access(ctx);
-
-    switch (MASK_MSA_MINOR(opcode)) {
-    case OPC_MSA_I8_00:
-    case OPC_MSA_I8_01:
-    case OPC_MSA_I8_02:
-        gen_msa_i8(ctx);
-        break;
-    case OPC_MSA_I5_06:
-    case OPC_MSA_I5_07:
-        gen_msa_i5(ctx);
-        break;
-    case OPC_MSA_BIT_09:
-    case OPC_MSA_BIT_0A:
-        gen_msa_bit(ctx);
-        break;
-    case OPC_MSA_3R_0D:
-    case OPC_MSA_3R_0E:
-    case OPC_MSA_3R_0F:
-    case OPC_MSA_3R_10:
-    case OPC_MSA_3R_11:
-    case OPC_MSA_3R_12:
-    case OPC_MSA_3R_13:
-    case OPC_MSA_3R_14:
-    case OPC_MSA_3R_15:
-        gen_msa_3r(ctx);
-        break;
-    case OPC_MSA_ELM:
-        gen_msa_elm(ctx);
-        break;
-    case OPC_MSA_3RF_1A:
-    case OPC_MSA_3RF_1B:
-    case OPC_MSA_3RF_1C:
-        gen_msa_3rf(ctx);
-        break;
-    case OPC_MSA_VEC:
-        gen_msa_vec(ctx);
-        break;
-    case OPC_LD_B:
-    case OPC_LD_H:
-    case OPC_LD_W:
-    case OPC_LD_D:
-    case OPC_ST_B:
-    case OPC_ST_H:
-    case OPC_ST_W:
-    case OPC_ST_D:
-        {
-            int32_t s10 = sextract32(ctx->opcode, 16, 10);
-            uint8_t rs = (ctx->opcode >> 11) & 0x1f;
-            uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-            uint8_t df = (ctx->opcode >> 0) & 0x3;
-
-            TCGv_i32 twd = tcg_const_i32(wd);
-            TCGv taddr = tcg_temp_new();
-            gen_base_offset_addr(ctx, taddr, rs, s10 << df);
-
-            switch (MASK_MSA_MINOR(opcode)) {
-            case OPC_LD_B:
-                gen_helper_msa_ld_b(cpu_env, twd, taddr);
-                break;
-            case OPC_LD_H:
-                gen_helper_msa_ld_h(cpu_env, twd, taddr);
-                break;
-            case OPC_LD_W:
-                gen_helper_msa_ld_w(cpu_env, twd, taddr);
-                break;
-            case OPC_LD_D:
-                gen_helper_msa_ld_d(cpu_env, twd, taddr);
-                break;
-            case OPC_ST_B:
-                gen_helper_msa_st_b(cpu_env, twd, taddr);
-                break;
-            case OPC_ST_H:
-                gen_helper_msa_st_h(cpu_env, twd, taddr);
-                break;
-            case OPC_ST_W:
-                gen_helper_msa_st_w(cpu_env, twd, taddr);
-                break;
-            case OPC_ST_D:
-                gen_helper_msa_st_d(cpu_env, twd, taddr);
-                break;
-            }
-
-            tcg_temp_free_i32(twd);
-            tcg_temp_free(taddr);
-        }
-        break;
-    default:
-        MIPS_INVAL("MSA instruction");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static bool trans_MSA(DisasContext *ctx, arg_MSA *a)
-{
-    gen_msa(ctx);
-
-    return true;
-}
-
-static bool trans_LSA(DisasContext *ctx, arg_rtype *a)
-{
-    return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa);
-}
-
-static bool trans_DLSA(DisasContext *ctx, arg_rtype *a)
-{
-    return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa);
-}
-
-bool decode_ase_msa(DisasContext *ctx, uint32_t insn)
-{
-    if (TARGET_LONG_BITS == 64 && decode_msa64(ctx, insn)) {
-        return true;
-    }
-    return decode_msa32(ctx, insn);
-}
diff --git a/target/mips/op_helper.c b/target/mips/op_helper.c
deleted file mode 100644
index e2ae31fd334..00000000000
--- a/target/mips/op_helper.c
+++ /dev/null
@@ -1,2122 +0,0 @@
-/*
- *  MIPS emulation helpers for qemu.
- *
- *  Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "qemu/error-report.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/memop.h"
-#include "exec/log.h"
-#include "exec/log_instr.h"
-#include "exec/ramblock.h"
-#ifdef TARGET_CHERI
-#include "cheri_tagmem.h"
-#include "cheri-helper-utils.h"
-#endif
-#include "fpu_helper.h"
-
-/*****************************************************************************/
-/* Exceptions processing helpers */
-
-void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
-                                int error_code)
-{
-    do_raise_exception_err(env, exception, error_code, 0);
-}
-
-void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
-{
-    do_raise_exception(env, exception, GETPC());
-}
-
-void helper_raise_exception_debug(CPUMIPSState *env)
-{
-    do_raise_exception(env, EXCP_DEBUG, 0);
-}
-
-static void raise_exception(CPUMIPSState *env, uint32_t exception)
-{
-    do_raise_exception(env, exception, 0);
-}
-
-void helper_check_breakcount(struct CPUMIPSState* env)
-{
-    CPUState *cs = env_cpu(env);
-    /* Decrement the startup breakcount, if set. */
-    if (unlikely(cs->breakcount)) {
-        cs->breakcount--;
-        if (cs->breakcount == 0UL) {
-            if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT | CPU_LOG_EXEC))
-                qemu_log_instr_or_mask_msg(env, CPU_LOG_INT | CPU_LOG_EXEC,
-                    "Reached breakcount!\n");
-            helper_raise_exception_debug(env);
-        }
-    }
-}
-
-/* 64 bits arithmetic for 32 bits hosts */
-static inline uint64_t get_HILO(CPUMIPSState *env)
-{
-    return ((uint64_t)(env->active_tc.HI[0]) << 32) |
-           (uint32_t)env->active_tc.LO[0];
-}
-
-static inline target_ulong set_HIT0_LO(CPUMIPSState *env, uint64_t HILO)
-{
-    env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
-    return env->active_tc.HI[0] = (int32_t)(HILO >> 32);
-}
-
-static inline target_ulong set_HI_LOT0(CPUMIPSState *env, uint64_t HILO)
-{
-    target_ulong tmp = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
-    env->active_tc.HI[0] = (int32_t)(HILO >> 32);
-    return tmp;
-}
-
-/* Multiplication variants of the vr54xx. */
-target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
-                         target_ulong arg2)
-{
-    return set_HI_LOT0(env, 0 - ((int64_t)(int32_t)arg1 *
-                                 (int64_t)(int32_t)arg2));
-}
-
-target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
-                          target_ulong arg2)
-{
-    return set_HI_LOT0(env, 0 - (uint64_t)(uint32_t)arg1 *
-                       (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
-                         target_ulong arg2)
-{
-    return set_HI_LOT0(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
-                       (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
-                           target_ulong arg2)
-{
-    return set_HIT0_LO(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
-                       (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
-                          target_ulong arg2)
-{
-    return set_HI_LOT0(env, (uint64_t)get_HILO(env) +
-                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
-                            target_ulong arg2)
-{
-    return set_HIT0_LO(env, (uint64_t)get_HILO(env) +
-                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
-                         target_ulong arg2)
-{
-    return set_HI_LOT0(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
-                       (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
-                           target_ulong arg2)
-{
-    return set_HIT0_LO(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
-                       (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
-                          target_ulong arg2)
-{
-    return set_HI_LOT0(env, (uint64_t)get_HILO(env) -
-                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
-                            target_ulong arg2)
-{
-    return set_HIT0_LO(env, (uint64_t)get_HILO(env) -
-                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
-                          target_ulong arg2)
-{
-    return set_HIT0_LO(env, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
-                           target_ulong arg2)
-{
-    return set_HIT0_LO(env, (uint64_t)(uint32_t)arg1 *
-                       (uint64_t)(uint32_t)arg2);
-}
-
-target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
-                           target_ulong arg2)
-{
-    return set_HIT0_LO(env, 0 - (int64_t)(int32_t)arg1 *
-                       (int64_t)(int32_t)arg2);
-}
-
-target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
-                            target_ulong arg2)
-{
-    return set_HIT0_LO(env, 0 - (uint64_t)(uint32_t)arg1 *
-                       (uint64_t)(uint32_t)arg2);
-}
-
-static inline target_ulong bitswap(target_ulong v)
-{
-    v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
-              ((v & (target_ulong)0x5555555555555555ULL) << 1);
-    v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
-              ((v & (target_ulong)0x3333333333333333ULL) << 2);
-    v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
-              ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
-    return v;
-}
-
-#ifdef TARGET_MIPS64
-target_ulong helper_dbitswap(target_ulong rt)
-{
-    return bitswap(rt);
-}
-#endif
-
-target_ulong helper_bitswap(target_ulong rt)
-{
-    return (int32_t)bitswap(rt);
-}
-
-target_ulong helper_rotx(target_ulong rs, uint32_t shift, uint32_t shiftx,
-                        uint32_t stripe)
-{
-    int i;
-    uint64_t tmp0 = ((uint64_t)rs) << 32 | ((uint64_t)rs & 0xffffffff);
-    uint64_t tmp1 = tmp0;
-    for (i = 0; i <= 46; i++) {
-        int s;
-        if (i & 0x8) {
-            s = shift;
-        } else {
-            s = shiftx;
-        }
-
-        if (stripe != 0 && !(i & 0x4)) {
-            s = ~s;
-        }
-        if (s & 0x10) {
-            if (tmp0 & (1LL << (i + 16))) {
-                tmp1 |= 1LL << i;
-            } else {
-                tmp1 &= ~(1LL << i);
-            }
-        }
-    }
-
-    uint64_t tmp2 = tmp1;
-    for (i = 0; i <= 38; i++) {
-        int s;
-        if (i & 0x4) {
-            s = shift;
-        } else {
-            s = shiftx;
-        }
-
-        if (s & 0x8) {
-            if (tmp1 & (1LL << (i + 8))) {
-                tmp2 |= 1LL << i;
-            } else {
-                tmp2 &= ~(1LL << i);
-            }
-        }
-    }
-
-    uint64_t tmp3 = tmp2;
-    for (i = 0; i <= 34; i++) {
-        int s;
-        if (i & 0x2) {
-            s = shift;
-        } else {
-            s = shiftx;
-        }
-        if (s & 0x4) {
-            if (tmp2 & (1LL << (i + 4))) {
-                tmp3 |= 1LL << i;
-            } else {
-                tmp3 &= ~(1LL << i);
-            }
-        }
-    }
-
-    uint64_t tmp4 = tmp3;
-    for (i = 0; i <= 32; i++) {
-        int s;
-        if (i & 0x1) {
-            s = shift;
-        } else {
-            s = shiftx;
-        }
-        if (s & 0x2) {
-            if (tmp3 & (1LL << (i + 2))) {
-                tmp4 |= 1LL << i;
-            } else {
-                tmp4 &= ~(1LL << i);
-            }
-        }
-    }
-
-    uint64_t tmp5 = tmp4;
-    for (i = 0; i <= 31; i++) {
-        int s;
-        s = shift;
-        if (s & 0x1) {
-            if (tmp4 & (1LL << (i + 1))) {
-                tmp5 |= 1LL << i;
-            } else {
-                tmp5 &= ~(1LL << i);
-            }
-        }
-    }
-
-    return (int64_t)(int32_t)(uint32_t)tmp5;
-}
-
-#ifndef CONFIG_USER_ONLY
-
-hwaddr do_translate_address(CPUMIPSState *env, target_ulong address,
-                            MMUAccessType access_type, uintptr_t retaddr)
-{
-    hwaddr paddr;
-    CPUState *cs = env_cpu(env);
-
-    paddr = cpu_mips_translate_address(env, address, access_type);
-
-    if (paddr == -1LL) {
-        cpu_loop_exit_restore(cs, retaddr);
-    } else {
-        return paddr;
-    }
-}
-
-#define HELPER_LD_ATOMIC(name, insn, almask, do_cast)                         \
-target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx)  \
-{                                                                             \
-    if (arg & almask) {                                                       \
-        if (!(env->hflags & MIPS_HFLAG_DM)) {                                 \
-            env->CP0_BadVAddr = arg;                                          \
-        }                                                                     \
-        do_raise_exception(env, EXCP_AdEL, GETPC());                          \
-    }                                                                         \
-    env->CP0_LLAddr = do_translate_address(env, arg, MMU_DATA_LOAD, GETPC()); \
-    env->lladdr = arg;                                                        \
-    env->llval = do_cast cpu_##insn##_mmuidx_ra(env, arg, mem_idx, GETPC());  \
-    return env->llval;                                                        \
-}
-HELPER_LD_ATOMIC(ll, ldl, 0x3, (target_long)(int32_t))
-#ifdef TARGET_MIPS64
-HELPER_LD_ATOMIC(lld, ldq, 0x7, (target_ulong))
-#endif
-#undef HELPER_LD_ATOMIC
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK(v) ((v) & 3)
-#define GET_OFFSET(addr, offset) (addr + (offset))
-#else
-#define GET_LMASK(v) (((v) & 3) ^ 3)
-#define GET_OFFSET(addr, offset) (addr - (offset))
-#endif
-
-#ifdef TARGET_CHERI
-static inline target_ulong ccheck_store_right(CPUMIPSState *env, target_ulong offset, uint32_t len, uintptr_t retpc)
-{
-#ifndef TARGET_WORDS_BIGENDIAN
-#error "This check is only valid for big endian targets, for little endian the load/store left instructions need to be checked"
-#endif
-    // For swr/sdr if offset & 3/7 == 0 we store only first byte, if all low bits are set we store the full amount
-    uint32_t low_bits = (uint32_t)offset & (len - 1);
-    uint32_t stored_bytes = low_bits + 1;
-    // From spec:
-    //if BigEndianMem = 1 then
-    //  pAddr <- pAddr(PSIZE-1)..3 || 000 (for ldr), 00 for lwr
-    //endif
-    // clear the low bits in offset to perform the length check
-    target_ulong write_offset = offset & ~((target_ulong)len - 1);
-    // fprintf(stderr, "%s: len=%d, offset=%zd, write_offset=%zd: will touch %d bytes\n",
-    //    __func__, len, (size_t)offset, (size_t)write_offset, stored_bytes);
-    // return the actual address by adding the low bits (this is expected by translate.c
-    return check_ddc(env, CAP_PERM_STORE, write_offset, stored_bytes, retpc) + low_bits;
-}
-#endif
-
-static inline void invalidate_tags_store_left_right(CPUMIPSState *env,
-                                                    target_ulong addr,
-                                                    uintptr_t retpc) {
-#ifdef TARGET_CHERI
-    // swr/sdr/swl/sdl will never invalidate more than one capability
-    cheri_tag_invalidate(env, addr, 1, retpc, cpu_mmu_index(env, false));
-#endif
-}
-
-
-
-void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
-                int mem_idx)
-{
-#ifdef TARGET_CHERI
-    const int num_bytes = 4 - GET_LMASK(arg2);
-    arg2 = check_ddc(env, CAP_PERM_STORE, arg2, num_bytes, GETPC());
-#endif
-    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 24), mem_idx, GETPC());
-
-    if (GET_LMASK(arg2) <= 2) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK(arg2) <= 1) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK(arg2) == 0) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 3), (uint8_t)arg1,
-                          mem_idx, GETPC());
-    }
-    invalidate_tags_store_left_right(env, arg2, GETPC());
-}
-
-void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
-                int mem_idx)
-{
-#ifdef TARGET_CHERI
-    arg2 = ccheck_store_right(env, arg2, 4, GETPC());
-#endif
-    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
-
-    if (GET_LMASK(arg2) >= 1) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK(arg2) >= 2) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK(arg2) == 3) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24),
-                          mem_idx, GETPC());
-    }
-    invalidate_tags_store_left_right(env, arg2, GETPC());
-}
-
-#if defined(TARGET_MIPS64)
-/*
- * "half" load and stores.  We must do the memory access inline,
- * or fault handling won't work.
- */
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK64(v) ((v) & 7)
-#else
-#define GET_LMASK64(v) (((v) & 7) ^ 7)
-#endif
-
-void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
-                int mem_idx)
-{
-#ifdef TARGET_CHERI
-    const int num_bytes = 4 - GET_LMASK(arg2);
-    arg2 = check_ddc(env, CAP_PERM_STORE, arg2, num_bytes, GETPC());
-#endif
-    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 56), mem_idx, GETPC());
-
-    if (GET_LMASK64(arg2) <= 6) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 5) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 4) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 3) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 2) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 1) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) <= 0) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 7), (uint8_t)arg1,
-                          mem_idx, GETPC());
-    }
-    invalidate_tags_store_left_right(env, arg2, GETPC());
-}
-
-void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
-                int mem_idx)
-{
-#ifdef TARGET_CHERI
-    arg2 = ccheck_store_right(env, arg2, 8, GETPC());
-#endif
-    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
-
-    if (GET_LMASK64(arg2) >= 1) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) >= 2) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) >= 3) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) >= 4) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) >= 5) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) >= 6) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48),
-                          mem_idx, GETPC());
-    }
-
-    if (GET_LMASK64(arg2) == 7) {
-        cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56),
-                          mem_idx, GETPC());
-    }
-    invalidate_tags_store_left_right(env, arg2, GETPC());
-}
-#endif /* TARGET_MIPS64 */
-
-static const int multiple_regs[] = { 16, 17, 18, 19, 20, 21, 22, 23, 30 };
-
-void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
-                uint32_t mem_idx)
-{
-    target_ulong base_reglist = reglist & 0xf;
-    target_ulong do_r31 = reglist & 0x10;
-
-    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
-        target_ulong i;
-
-        for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] =
-                (target_long)cpu_ldl_mmuidx_ra(env, addr, mem_idx, GETPC());
-            addr += 4;
-        }
-    }
-
-    if (do_r31) {
-        env->active_tc.gpr[31] =
-            (target_long)cpu_ldl_mmuidx_ra(env, addr, mem_idx, GETPC());
-    }
-}
-
-void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
-                uint32_t mem_idx)
-{
-    target_ulong base_reglist = reglist & 0xf;
-    target_ulong do_r31 = reglist & 0x10;
-
-    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
-        target_ulong i;
-
-        for (i = 0; i < base_reglist; i++) {
-            cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
-                              mem_idx, GETPC());
-            addr += 4;
-        }
-    }
-
-    if (do_r31) {
-        cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
-    }
-}
-
-#if defined(TARGET_MIPS64)
-void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
-                uint32_t mem_idx)
-{
-    target_ulong base_reglist = reglist & 0xf;
-    target_ulong do_r31 = reglist & 0x10;
-
-    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
-        target_ulong i;
-
-        for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] =
-                cpu_ldq_mmuidx_ra(env, addr, mem_idx, GETPC());
-            addr += 8;
-        }
-    }
-
-    if (do_r31) {
-        env->active_tc.gpr[31] =
-            cpu_ldq_mmuidx_ra(env, addr, mem_idx, GETPC());
-    }
-}
-
-void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
-                uint32_t mem_idx)
-{
-    target_ulong base_reglist = reglist & 0xf;
-    target_ulong do_r31 = reglist & 0x10;
-
-    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
-        target_ulong i;
-
-        for (i = 0; i < base_reglist; i++) {
-            cpu_stq_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
-                              mem_idx, GETPC());
-            addr += 8;
-        }
-    }
-
-    if (do_r31) {
-        cpu_stq_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
-    }
-}
-#endif
-
-
-void helper_fork(target_ulong arg1, target_ulong arg2)
-{
-    /*
-     * arg1 = rt, arg2 = rs
-     * TODO: store to TC register
-     */
-}
-
-target_ulong helper_yield(CPUMIPSState *env, target_ulong arg)
-{
-    target_long arg1 = arg;
-
-    if (arg1 < 0) {
-        /* No scheduling policy implemented. */
-        if (arg1 != -2) {
-            if (env->CP0_VPEControl & (1 << CP0VPECo_YSI) &&
-                env->active_tc.CP0_TCStatus & (1 << CP0TCSt_DT)) {
-                env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-                env->CP0_VPEControl |= 4 << CP0VPECo_EXCPT;
-                do_raise_exception(env, EXCP_THREAD, GETPC());
-            }
-        }
-    } else if (arg1 == 0) {
-        if (0) {
-            /* TODO: TC underflow */
-            env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-            do_raise_exception(env, EXCP_THREAD, GETPC());
-        } else {
-            /* TODO: Deallocate TC */
-        }
-    } else if (arg1 > 0) {
-        /* Yield qualifier inputs not implemented. */
-        env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-        env->CP0_VPEControl |= 2 << CP0VPECo_EXCPT;
-        do_raise_exception(env, EXCP_THREAD, GETPC());
-    }
-    return env->CP0_YQMask;
-}
-
-#ifndef CONFIG_USER_ONLY
-/* TLB management */
-static void r4k_mips_tlb_flush_extra(CPUMIPSState *env, int first)
-{
-    /* Discard entries from env->tlb[first] onwards.  */
-    while (env->tlb->tlb_in_use > first) {
-        r4k_invalidate_tlb(env, --env->tlb->tlb_in_use, 0);
-    }
-}
-
-static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo)
-{
-#if defined(TARGET_MIPS64)
-    return extract64(entrylo, 6, 54);
-#else
-    return extract64(entrylo, 6, 24) | /* PFN */
-           (extract64(entrylo, 32, 32) << 24); /* PFNX */
-#endif
-}
-
-static void r4k_fill_tlb(CPUMIPSState *env, int idx)
-{
-    r4k_tlb_t *tlb;
-    uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1);
-
-    /* XXX: detect conflicting TLBs and raise a MCHECK exception when needed */
-    tlb = &env->tlb->mmu.r4k.tlb[idx];
-    if (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) {
-        tlb->EHINV = 1;
-        return;
-    }
-    tlb->EHINV = 0;
-    tlb->VPN = env->CP0_EntryHi & (TARGET_PAGE_MASK << 1);
-#if defined(TARGET_MIPS64)
-    tlb->VPN &= env->SEGMask;
-#endif
-    tlb->ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    tlb->MMID = env->CP0_MemoryMapID;
-    tlb->PageMask = env->CP0_PageMask;
-    tlb->G = env->CP0_EntryLo0 & env->CP0_EntryLo1 & 1;
-    tlb->V0 = (env->CP0_EntryLo0 & 2) != 0;
-    tlb->D0 = (env->CP0_EntryLo0 & 4) != 0;
-    tlb->C0 = (env->CP0_EntryLo0 >> 3) & 0x7;
-#if defined(TARGET_CHERI)
-    tlb->L0 = (env->CP0_EntryLo0 >> CP0EnLo_L) & 1;
-    tlb->S0 = (env->CP0_EntryLo0 >> CP0EnLo_S) & 1;
-    tlb->CLG0 = (env->CP0_EntryLo0 >> CP0EnLo_CLG) & 1;
-#else
-    tlb->XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) & 1;
-    tlb->RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) & 1;
-#endif /* TARGET_CHERI */
-    tlb->PFN[0] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo0) & ~mask) << 12;
-    tlb->V1 = (env->CP0_EntryLo1 & 2) != 0;
-    tlb->D1 = (env->CP0_EntryLo1 & 4) != 0;
-    tlb->C1 = (env->CP0_EntryLo1 >> 3) & 0x7;
-#if defined(TARGET_CHERI)
-    tlb->L1 = (env->CP0_EntryLo1 >> CP0EnLo_L) & 1;
-    tlb->S1 = (env->CP0_EntryLo1 >> CP0EnLo_S) & 1;
-    tlb->CLG1 = (env->CP0_EntryLo1 >> CP0EnLo_CLG) & 1;
-#else
-    tlb->XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) & 1;
-    tlb->RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) & 1;
-#endif /* TARGET_CHERI */
-    tlb->PFN[1] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo1) & ~mask) << 12;
-}
-
-void r4k_helper_tlbinv(CPUMIPSState *env)
-{
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    uint32_t tlb_mmid;
-    r4k_tlb_t *tlb;
-    int idx;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
-        tlb = &env->tlb->mmu.r4k.tlb[idx];
-        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-        if (!tlb->G && tlb_mmid == MMID) {
-            tlb->EHINV = 1;
-        }
-    }
-    cpu_mips_tlb_flush(env);
-}
-
-void r4k_helper_tlbinvf(CPUMIPSState *env)
-{
-    int idx;
-
-    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
-        env->tlb->mmu.r4k.tlb[idx].EHINV = 1;
-    }
-    cpu_mips_tlb_flush(env);
-}
-
-void r4k_helper_tlbwi(CPUMIPSState *env, uintptr_t retpc)
-{
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    target_ulong VPN;
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    uint32_t tlb_mmid;
-    bool EHINV, G, V0, D0, V1, D1;
-#if defined(TARGET_CHERI)
-    bool S0, S1, L0, L1, CLG0, CLG1;
-#else
-    bool XI0, XI1, RI0, RI1;
-#endif
-    r4k_tlb_t *tlb;
-    int idx;
-    int duplicate = -1;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-
-    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
-    tlb = &env->tlb->mmu.r4k.tlb[idx];
-    VPN = env->CP0_EntryHi & (TARGET_PAGE_MASK << 1);
-#if defined(TARGET_MIPS64)
-    VPN &= env->SEGMask;
-#endif
-    EHINV = (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) != 0;
-    G = env->CP0_EntryLo0 & env->CP0_EntryLo1 & 1;
-    V0 = (env->CP0_EntryLo0 & 2) != 0;
-    D0 = (env->CP0_EntryLo0 & 4) != 0;
-#if defined(TARGET_CHERI)
-    S0 = (env->CP0_EntryLo0 >> CP0EnLo_S) &1;
-    L0 = (env->CP0_EntryLo0 >> CP0EnLo_L) &1;
-    CLG0 = (env->CP0_EntryLo0 >> CP0EnLo_CLG) &1;
-#else
-    XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) &1;
-    RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) &1;
-#endif
-    V1 = (env->CP0_EntryLo1 & 2) != 0;
-    D1 = (env->CP0_EntryLo1 & 4) != 0;
-#if defined(TARGET_CHERI)
-    S1 = (env->CP0_EntryLo1 >> CP0EnLo_S) &1;
-    L1 = (env->CP0_EntryLo1 >> CP0EnLo_L) &1;
-    CLG1 = (env->CP0_EntryLo1 >> CP0EnLo_CLG) &1;
-#else
-    XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) &1;
-    RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) &1;
-#endif
-
-    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-    /*
-     * Discard cached TLB entries, unless tlbwi is just upgrading access
-     * permissions on the current entry.
-     */
-    if (tlb->VPN != VPN || tlb_mmid != MMID || tlb->G != G ||
-        (!tlb->EHINV && EHINV) ||
-        (tlb->V0 && !V0) || (tlb->D0 && !D0) ||
-#if defined(TARGET_CHERI)
-        (!tlb->S0 && S0) || (!tlb->L0 && L0) || (tlb->CLG0 != CLG0) ||
-#else
-        (!tlb->XI0 && XI0) || (!tlb->RI0 && RI0) ||
-#endif
-        (tlb->V1 && !V1) || (tlb->D1 && !D1) ||
-#if defined(TARGET_CHERI)
-        (!tlb->S1 && S1) || (!tlb->L1 && L1) || (tlb->CLG1 != CLG1)) {
-#else
-        (!tlb->XI1 && XI1) || (!tlb->RI1 && RI1)) {
-#endif
-        r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
-    }
-
-    /*
-     * Check if the tlb contains another entry with the same
-     * VPN and ASID. If so, raise a Machine Check Exception.
-     */
-    if (r4k_lookup_tlb(env, &duplicate, /*use_extra*/false) &&
-        duplicate != idx) {
-        do_raise_exception(env, EXCP_MCHECK, retpc);
-    }
-
-    r4k_invalidate_tlb(env, idx, 0);
-    r4k_fill_tlb(env, idx);
-#ifdef CONFIG_TCG_LOG_INSTR
-    r4k_dump_tlb(env, idx);
-#endif /* CONFIG_TCG_LOG_INSTR */
-}
-
-void r4k_helper_tlbwr(CPUMIPSState *env, uintptr_t retpc)
-{
-    int r = cpu_mips_get_random(env);
-
-    /*
-     * Check if the tlb contains another entry with the same
-     * VPN and ASID. If so, raise a Machine Check Exception.
-     */
-    if (r4k_lookup_tlb(env, NULL, /*use_extra*/true)) {
-        do_raise_exception(env, EXCP_MCHECK, retpc);
-    }
-
-    r4k_invalidate_tlb(env, r, 1);
-    r4k_fill_tlb(env, r);
-#ifdef CONFIG_TCG_LOG_INSTR
-    r4k_dump_tlb(env, r);
-#endif /* CONFIG_TCG_LOG_INSTR */
-}
-
-void r4k_helper_tlbp(CPUMIPSState *env)
-{
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    r4k_tlb_t *tlb;
-    target_ulong mask;
-    target_ulong tag;
-    target_ulong VPN;
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    uint32_t tlb_mmid;
-    int i;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-    for (i = 0; i < env->tlb->nb_tlb; i++) {
-        tlb = &env->tlb->mmu.r4k.tlb[i];
-        /* 1k pages are not supported. */
-        mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-        tag = env->CP0_EntryHi & ~mask;
-        VPN = tlb->VPN & ~mask;
-#if defined(TARGET_MIPS64)
-        tag &= env->SEGMask;
-#endif
-        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-        /* Check ASID/MMID, virtual page number & size */
-        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
-            /* TLB match */
-            env->CP0_Index = i;
-            break;
-        }
-    }
-    if (i == env->tlb->nb_tlb) {
-        /* No match.  Discard any shadow entries, if any of them match.  */
-        for (i = env->tlb->nb_tlb; i < env->tlb->tlb_in_use; i++) {
-            tlb = &env->tlb->mmu.r4k.tlb[i];
-            /* 1k pages are not supported. */
-            mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-            tag = env->CP0_EntryHi & ~mask;
-            VPN = tlb->VPN & ~mask;
-#if defined(TARGET_MIPS64)
-            tag &= env->SEGMask;
-#endif
-            tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-            /* Check ASID/MMID, virtual page number & size */
-            if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag) {
-                r4k_mips_tlb_flush_extra(env, i);
-                break;
-            }
-        }
-
-        env->CP0_Index |= 0x80000000;
-    }
-}
-
-static inline uint64_t get_entrylo_pfn_from_tlb(uint64_t tlb_pfn)
-{
-#if defined(TARGET_MIPS64)
-    return tlb_pfn << 6;
-#else
-    return (extract64(tlb_pfn, 0, 24) << 6) | /* PFN */
-           (extract64(tlb_pfn, 24, 32) << 32); /* PFNX */
-#endif
-}
-
-void r4k_helper_tlbr(CPUMIPSState *env)
-{
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    uint32_t tlb_mmid;
-    r4k_tlb_t *tlb;
-    int idx;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
-    tlb = &env->tlb->mmu.r4k.tlb[idx];
-
-    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-    /* If this will change the current ASID/MMID, flush qemu's TLB.  */
-    if (MMID != tlb_mmid) {
-        cpu_mips_tlb_flush(env);
-    }
-
-    r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
-
-#ifdef CPU_CHERI
-    uint64_t save_clg = env->CP0_EntryHi & CP0EnHi_CLG_MASK;
-#endif
-
-    if (tlb->EHINV) {
-        env->CP0_EntryHi = 1 << CP0EnHi_EHINV;
-        env->CP0_PageMask = 0;
-        env->CP0_EntryLo0 = 0;
-        env->CP0_EntryLo1 = 0;
-    } else {
-        env->CP0_EntryHi = mi ? tlb->VPN : tlb->VPN | tlb->ASID;
-        env->CP0_MemoryMapID = tlb->MMID;
-        env->CP0_PageMask = tlb->PageMask;
-        env->CP0_EntryLo0 = tlb->G | (tlb->V0 << 1) | (tlb->D0 << 2) |
-#if defined(TARGET_CHERI)
-                        ((uint64_t)tlb->L0 << CP0EnLo_L) |
-                        ((uint64_t)tlb->S0 << CP0EnLo_S) |
-                        ((uint64_t)tlb->CLG0 << CP0EnLo_CLG) |
-#else
-                        ((uint64_t)tlb->RI0 << CP0EnLo_RI) |
-                        ((uint64_t)tlb->XI0 << CP0EnLo_XI) |
-#endif /* TARGET_CHERI */
-                        (tlb->C0 << 3) |
-                        get_entrylo_pfn_from_tlb(tlb->PFN[0] >> 12);
-        env->CP0_EntryLo1 = tlb->G | (tlb->V1 << 1) | (tlb->D1 << 2) |
-#if defined(TARGET_CHERI)
-                        ((uint64_t)tlb->L1 << CP0EnLo_L) |
-                        ((uint64_t)tlb->S1 << CP0EnLo_S) |
-                        ((uint64_t)tlb->CLG1 << CP0EnLo_CLG) |
-#else
-                        ((uint64_t)tlb->RI1 << CP0EnLo_RI) |
-                        ((uint64_t)tlb->XI1 << CP0EnLo_XI) |
-#endif /* TARGET_CHERI */
-                        (tlb->C1 << 3) |
-                        get_entrylo_pfn_from_tlb(tlb->PFN[1] >> 12);
-    }
-
-#ifdef CPU_CHERI
-    env->CP0_EntryHi |= save_clg;
-#endif
-}
-
-void helper_tlbwi(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbwi(env, GETPC());
-}
-
-void helper_tlbwr(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbwr(env, GETPC());
-}
-
-void helper_tlbp(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbp(env);
-}
-
-void helper_tlbr(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbr(env);
-}
-
-void helper_tlbinv(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbinv(env);
-}
-
-void helper_tlbinvf(CPUMIPSState *env)
-{
-    env->tlb->helper_tlbinvf(env);
-}
-
-static void global_invalidate_tlb(CPUMIPSState *env,
-                           uint32_t invMsgVPN2,
-                           uint8_t invMsgR,
-                           uint32_t invMsgMMid,
-                           bool invAll,
-                           bool invVAMMid,
-                           bool invMMid,
-                           bool invVA)
-{
-
-    int idx;
-    r4k_tlb_t *tlb;
-    bool VAMatch;
-    bool MMidMatch;
-
-    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
-        tlb = &env->tlb->mmu.r4k.tlb[idx];
-        VAMatch =
-            (((tlb->VPN & ~tlb->PageMask) == (invMsgVPN2 & ~tlb->PageMask))
-#ifdef TARGET_MIPS64
-            &&
-            (extract64(env->CP0_EntryHi, 62, 2) == invMsgR)
-#endif
-            );
-        MMidMatch = tlb->MMID == invMsgMMid;
-        if ((invAll && (idx > env->CP0_Wired)) ||
-            (VAMatch && invVAMMid && (tlb->G || MMidMatch)) ||
-            (VAMatch && invVA) ||
-            (MMidMatch && !(tlb->G) && invMMid)) {
-            tlb->EHINV = 1;
-        }
-    }
-    cpu_mips_tlb_flush(env);
-}
-
-void helper_ginvt(CPUMIPSState *env, target_ulong arg, uint32_t type)
-{
-    bool invAll = type == 0;
-    bool invVA = type == 1;
-    bool invMMid = type == 2;
-    bool invVAMMid = type == 3;
-    uint32_t invMsgVPN2 = arg & (TARGET_PAGE_MASK << 1);
-    uint8_t invMsgR = 0;
-    uint32_t invMsgMMid = env->CP0_MemoryMapID;
-    CPUState *other_cs = first_cpu;
-
-#ifdef TARGET_MIPS64
-    invMsgR = extract64(arg, 62, 2);
-#endif
-
-    CPU_FOREACH(other_cs) {
-        MIPSCPU *other_cpu = MIPS_CPU(other_cs);
-        global_invalidate_tlb(&other_cpu->env, invMsgVPN2, invMsgR, invMsgMMid,
-                              invAll, invVAMMid, invMMid, invVA);
-    }
-}
-
-/* Specials */
-target_ulong helper_di(CPUMIPSState *env)
-{
-    target_ulong t0 = env->CP0_Status;
-
-    env->CP0_Status = t0 & ~(1 << CP0St_IE);
-    return t0;
-}
-
-target_ulong helper_ei(CPUMIPSState *env)
-{
-    target_ulong t0 = env->CP0_Status;
-
-    env->CP0_Status = t0 | (1 << CP0St_IE);
-    return t0;
-}
-
-static void debug_pre_eret(CPUMIPSState *env)
-{
-    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_EXEC)) {
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-            "ERET: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
-             PC_ADDR(env), get_CP0_EPC(env));
-        if (should_use_error_epc(env)) {
-            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-                " ErrorEPC " TARGET_FMT_lx, get_CP0_ErrorEPC(env));
-        }
-        if (env->hflags & MIPS_HFLAG_DM) {
-            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-                " DEPC " TARGET_FMT_lx, env->CP0_DEPC);
-        }
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC, "\n");
-    }
-
-#if defined(CONFIG_TCG_LOG_INSTR) && defined(TARGET_CHERI)
-    if (qemu_log_instr_enabled(env)) {
-        // Print the new PCC value for debugging traces (compare to null
-        // so that we always print it)
-        qemu_log_instr_cap(env, "PCC", &env->active_tc.PCC);
-        qemu_log_instr_cap(env, "EPCC", &env->active_tc.CHWR.EPCC);
-        qemu_log_instr_cap(env, "ErrorEPCC", &env->active_tc.CHWR.ErrorEPCC);
-    }
-#endif /* defined(CONFIG_TCG_LOG_INSTR) && defined(TARGET_CHERI) */
-}
-
-static void debug_post_eret(CPUMIPSState *env)
-{
-    const char *flag;
-
-#ifdef CONFIG_TCG_LOG_INSTR
-    mips_log_instr_mode_changed(env, cpu_get_recent_pc(env));
-#endif
-
-    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_EXEC)) {
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-            "  =>  PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
-            PC_ADDR(env), get_CP0_EPC(env));
-        if (should_use_error_epc(env)) {
-            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-                " ErrorEPC " TARGET_FMT_lx, get_CP0_ErrorEPC(env));
-        }
-        if (env->hflags & MIPS_HFLAG_DM) {
-            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
-                " DEPC " TARGET_FMT_lx, env->CP0_DEPC);
-        }
-        switch (cpu_mmu_index(env, false)) {
-        case 3:
-            flag = ", ERL\n";
-            break;
-        case MIPS_HFLAG_UM:
-            flag = ", UM\n";
-            break;
-        case MIPS_HFLAG_SM:
-            flag = ", SM\n";
-            break;
-        case MIPS_HFLAG_KM:
-            flag = "\n";
-            break;
-        default:
-            cpu_abort(env_cpu(env), "Invalid MMU mode!\n");
-            break;
-        }
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC, "%s", flag);
-    }
-}
-
-#ifdef TARGET_CHERI
-static void set_pc_for_eret(CPUMIPSState *env, cap_register_t *error_pcc)
-#else
-static void set_pc_for_eret(CPUMIPSState *env, target_ulong error_pc)
-#endif
-{
-#ifdef TARGET_CHERI
-    target_ulong error_pc = cap_get_cursor(error_pcc);
-    cheri_update_pcc_for_exc_return(&env->active_tc.PCC, error_pcc,
-                                    error_pc & ~(target_ulong)1);
-#else
-    mips_update_pc(env, error_pc & ~(target_ulong)1, /*can_be_unrepresentable=*/true);
-#endif
-    if (error_pc & 1) {
-#if defined(TARGET_CHERI)
-        warn_report("Got target pc with low bit set, but QEMU-CHERI does not"
-                    " support microMIPS: 0x%" PRIx64, error_pc);
-#else
-        env->hflags |= MIPS_HFLAG_M16;
-#endif
-    } else {
-        env->hflags &= ~(MIPS_HFLAG_M16);
-    }
-}
-
-static inline void exception_return(CPUMIPSState *env)
-{
-    debug_pre_eret(env);
-    if (env->CP0_Status & (1 << CP0St_ERL)) {
-#ifdef TARGET_CHERI
-        set_pc_for_eret(env, &env->active_tc.CHWR.ErrorEPCC);
-#else
-        set_pc_for_eret(env, env->CP0_ErrorEPC);
-#endif
-        env->CP0_Status &= ~(1 << CP0St_ERL);
-    } else {
-#ifdef TARGET_CHERI
-        set_pc_for_eret(env, &env->active_tc.CHWR.EPCC);
-#else
-        set_pc_for_eret(env, env->CP0_EPC);
-#endif
-        env->CP0_Status &= ~(1 << CP0St_EXL);
-    }
-    compute_hflags(env);
-    debug_post_eret(env);
-}
-
-void helper_eret(CPUMIPSState *env)
-{
-    exception_return(env);
-    env->CP0_LLAddr = 1;
-    env->lladdr = 1;
-}
-
-void helper_eretnc(CPUMIPSState *env)
-{
-#ifdef TARGET_CHERI
-    do_raise_exception(env, EXCP_RI, GETPC()); /* This does not unset LL reservation? */
-#endif
-    exception_return(env);
-}
-
-void helper_deret(CPUMIPSState *env)
-{
-#ifdef TARGET_CHERI
-    do_raise_exception(env, EXCP_RI, GETPC()); /* This ignores EPCC */
-#else
-    debug_pre_eret(env);
-
-    env->hflags &= ~MIPS_HFLAG_DM;
-    compute_hflags(env);
-
-    set_pc_for_eret(env, env->CP0_DEPC);
-
-    debug_post_eret(env);
-#endif
-}
-#endif /* !CONFIG_USER_ONLY */
-
-target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
-{
-    check_hwrena(env, 0, GETPC());
-    return env->CP0_EBase & 0x3ff;
-}
-
-target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
-{
-    check_hwrena(env, 1, GETPC());
-    return env->SYNCI_Step;
-}
-
-target_ulong helper_rdhwr_cc(CPUMIPSState *env)
-{
-    check_hwrena(env, 2, GETPC());
-#ifdef CONFIG_USER_ONLY
-    return env->CP0_Count;
-#else
-    return (int32_t)cpu_mips_get_count(env);
-#endif
-}
-
-target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
-{
-    check_hwrena(env, 3, GETPC());
-    return env->CCRes;
-}
-
-target_ulong helper_rdhwr_performance(CPUMIPSState *env)
-{
-    check_hwrena(env, 4, GETPC());
-    return env->CP0_Performance0;
-}
-
-target_ulong helper_rdhwr_xnp(CPUMIPSState *env)
-{
-    check_hwrena(env, 5, GETPC());
-    return (env->CP0_Config5 >> CP0C5_XNP) & 1;
-}
-
-void helper_pmon(CPUMIPSState *env, int function)
-{
-    function /= 2;
-    switch (function) {
-    case 2: /* TODO: char inbyte(int waitflag); */
-        if (env->active_tc.gpr[4] == 0) {
-            env->active_tc.gpr[2] = -1;
-        }
-        /* Fall through */
-    case 11: /* TODO: char inbyte (void); */
-        env->active_tc.gpr[2] = -1;
-        break;
-    case 3:
-    case 12:
-        printf("%c", (char)(env->active_tc.gpr[4] & 0xFF));
-        break;
-    case 17:
-        break;
-    case 158:
-        {
-            unsigned char *fmt = (void *)(uintptr_t)env->active_tc.gpr[4];
-            printf("%s", fmt);
-        }
-        break;
-    }
-}
-
-void helper_wait(CPUMIPSState *env)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->halted = 1;
-    cpu_reset_interrupt(cs, CPU_INTERRUPT_WAKE);
-    /*
-     * Last instruction in the block, PC was updated before
-     * - no need to recover PC and icount.
-     */
-    raise_exception(env, EXCP_HLT);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                  MMUAccessType access_type,
-                                  int mmu_idx, uintptr_t retaddr)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-    int error_code = 0;
-    int excp;
-
-    if (!(env->hflags & MIPS_HFLAG_DM)) {
-        env->CP0_BadVAddr = addr;
-    }
-
-    if (access_type == MMU_DATA_STORE) {
-        excp = EXCP_AdES;
-    } else {
-        excp = EXCP_AdEL;
-        if (access_type == MMU_INST_FETCH) {
-            error_code |= EXCP_INST_NOTAVAIL;
-        }
-    }
-
-    do_raise_exception_err(env, excp, error_code, retaddr);
-}
-
-void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
-                                    vaddr addr, unsigned size,
-                                    MMUAccessType access_type,
-                                    int mmu_idx, MemTxAttrs attrs,
-                                    MemTxResult response, uintptr_t retaddr)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-
-    if (access_type == MMU_INST_FETCH) {
-        do_raise_exception(env, EXCP_IBE, retaddr);
-    } else {
-        do_raise_exception(env, EXCP_DBE, retaddr);
-    }
-}
-#endif /* !CONFIG_USER_ONLY */
-
-void helper_cache(CPUMIPSState *env, target_ulong addr, uint32_t op)
-{
-#ifndef CONFIG_USER_ONLY
-    static const char *const type_name[] = {
-        "Primary Instruction",
-        "Primary Data or Unified Primary",
-        "Tertiary",
-        "Secondary"
-    };
-    uint32_t cache_type = extract32(op, 0, 2);
-    uint32_t cache_operation = extract32(op, 2, 3);
-    target_ulong index = addr & 0x1fffffff;
-
-    switch (cache_operation) {
-    case 0b010: /* Index Store Tag */
-        memory_region_dispatch_write(env->itc_tag, index, env->CP0_TagLo,
-                                     MO_64, MEMTXATTRS_UNSPECIFIED);
-        break;
-    case 0b001: /* Index Load Tag */
-        memory_region_dispatch_read(env->itc_tag, index, &env->CP0_TagLo,
-                                    MO_64, MEMTXATTRS_UNSPECIFIED);
-        break;
-    case 0b000: /* Index Invalidate */
-    case 0b100: /* Hit Invalidate */
-    case 0b110: /* Hit Writeback */
-        /* no-op */
-        break;
-    default:
-        qemu_log_mask(LOG_UNIMP, "cache operation:%u (type: %s cache)\n",
-                      cache_operation, type_name[cache_type]);
-        break;
-    }
-#endif
-}
-
-#define TARGET_PAGE_SIZE_MIN (1 << TARGET_PAGE_BITS_MIN)
-static uint8_t ZEROARRAY[TARGET_PAGE_SIZE_MIN];
-
-/* Reduce the length so that addr + len doesn't cross a page boundary.  */
-static inline target_ulong adj_len_to_page(target_ulong len, target_ulong addr)
-{
-#ifndef CONFIG_USER_ONLY
-    target_ulong low_bits = (addr & ~TARGET_PAGE_MASK);
-    if (low_bits + len - 1 >= TARGET_PAGE_SIZE_MIN) {
-        return TARGET_PAGE_SIZE_MIN - low_bits;
-    }
-#endif
-    return len;
-}
-
-
-#define MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG UINT64_C(0xbadc0de)
-#define MIPS_REGNUM_V0 2
-#define MIPS_REGNUM_V1 3
-#define MIPS_REGNUM_A0 4
-#define MIPS_REGNUM_A1 5
-#define MIPS_REGNUM_A2 6
-#define MIPS_REGNUM_A3 7
-
-#ifdef CONFIG_DEBUG_TCG
-#define MAGIC_MEMSET_STATS 1
-#else
-#define MAGIC_MEMSET_STATS 0
-#endif
-
-#if MAGIC_MEMSET_STATS != 0
-static bool memset_stats_dump_registered = false;
-
-struct nop_stats {
-    uint64_t kernel_mode_bytes;
-    uint64_t kernel_mode_count;
-    uint64_t user_mode_bytes;
-    uint64_t user_mode_count;
-};
-
-static struct nop_stats magic_memset_zero_bytes;
-static struct nop_stats magic_memset_nonzero_bytes;
-
-static struct nop_stats magic_memcpy_bytes;
-static struct nop_stats magic_memmove_bytes;
-static struct nop_stats magic_bcopy_bytes;
-
-static struct nop_stats magic_memmove_slowpath;
-
-static inline void print_nop_stats(const char* msg, struct nop_stats* stats) {
-    warn_report("%s in kernel mode: %" PRId64 " (%f MB) in %" PRId64 " calls\r", msg,
-                stats->kernel_mode_bytes, stats->kernel_mode_bytes / (1024.0 * 1024.0), stats->kernel_mode_count);
-    warn_report("%s in user   mode: %" PRId64 " (%f MB) in %" PRId64 " calls\r", msg,
-                stats->user_mode_bytes, stats->user_mode_bytes / (1024.0 * 1024.0), stats->user_mode_count);
-}
-
-static void dump_memset_stats_on_exit(void) {
-    print_nop_stats("memset (zero)    with magic nop", &magic_memset_zero_bytes);
-    print_nop_stats("memset (nonzero) with magic nop", &magic_memset_nonzero_bytes);
-    print_nop_stats("memcpy with magic nop", &magic_memcpy_bytes);
-    print_nop_stats("memmove with magic nop", &magic_memmove_bytes);
-    print_nop_stats("bcopy with magic nop", &magic_bcopy_bytes);
-    print_nop_stats("memmove/memcpy/bcopy slowpath", &magic_memmove_slowpath);
-}
-
-static inline void collect_magic_nop_stats(CPUMIPSState *env, struct nop_stats* stats, target_ulong bytes) {
-    if (!memset_stats_dump_registered) {
-        // TODO: move this to CPU_init
-        atexit(dump_memset_stats_on_exit);
-        memset_stats_dump_registered = true;
-    }
-    if (in_kernel_mode(env)) {
-        stats->kernel_mode_bytes += bytes;
-        stats->kernel_mode_count++;
-    } else {
-        stats->user_mode_bytes += bytes;
-        stats->user_mode_count++;
-    }
-}
-#else
-#define collect_magic_nop_stats(env, stats, bytes)
-#endif
-
-
-static inline void
-store_byte_and_clear_tag(CPUMIPSState *env, target_ulong vaddr, uint8_t val,
-                         TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    helper_ret_stb_mmu(env, vaddr, val, oi, retaddr);
-#ifdef TARGET_CHERI
-    // If we returned (i.e. write was successful) we also need to invalidate the
-    // tags bit to ensure we are consistent with sb
-    cheri_tag_invalidate(env, vaddr, 1, retaddr, cpu_mmu_index(env, false));
-#endif
-}
-
-static inline void
-store_u32_and_clear_tag(CPUMIPSState *env, target_ulong vaddr, uint32_t val,
-                         TCGMemOpIdx oi, uintptr_t retaddr)
-{
-    helper_ret_stw_mmu(env, vaddr, val, oi, retaddr);
-#ifdef TARGET_CHERI
-    // If we returned (i.e. write was successful) we also need to invalidate the
-    // tags bit to ensure we are consistent with sb
-    cheri_tag_invalidate(env, vaddr, 4, retaddr, cpu_mmu_index(env, false));
-#endif
-}
-
-#ifdef TARGET_CHERI
-#define CHECK_AND_ADD_DDC(env, perms, ptr, len, retpc) check_ddc(env, perms, ptr, len, retpc);
-#else
-#define CHECK_AND_ADD_DDC(env, perms, ptr, len, retpc) ptr
-#endif
-
-static bool do_magic_memmove(CPUMIPSState *env, uint64_t ra, int dest_regnum, int src_regnum)
-{
-    tcg_debug_assert(dest_regnum != src_regnum);
-    const target_ulong original_dest_ddc_offset = env->active_tc.gpr[dest_regnum]; // $a0 = dest
-    const target_ulong original_src_ddc_offset = env->active_tc.gpr[src_regnum];  // $a1 = src
-    const target_ulong original_len = env->active_tc.gpr[MIPS_REGNUM_A2];  // $a2 = len
-    int mmu_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-    target_ulong len = original_len;
-    target_ulong already_written = 0;
-    const bool is_continuation = (env->active_tc.gpr[MIPS_REGNUM_V1] >> 32) == MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG;
-    if (is_continuation) {
-        // This is a partial write -> $a0 is the original dest argument.
-        // The already written bytes (from the partial write) was stored in $v0 by the previous call
-        already_written = env->active_tc.gpr[MIPS_REGNUM_V0];
-        tcg_debug_assert(already_written < len);
-        len -= already_written; // update the remaining length
-#if 0
-        fprintf(stderr, "--- %s: Got continuation for 0x" TARGET_FMT_lx " byte access at 0x" TARGET_FMT_plx
-                        " -- current dest = 0x" TARGET_FMT_plx " -- current len = 0x" TARGET_FMT_lx "\r\n",
-                        __func__, original_len, original_dest, dest, len);
-#endif
-    } else {
-        // Not a partial write -> $v0 should be zero otherwise this is a usage error!
-        if (env->active_tc.gpr[MIPS_REGNUM_V0] != 0) {
-            error_report("ERROR: Attempted to call memset library function "
-                          "with non-zero value in $v0 (0x" TARGET_FMT_lx
-                          ") and continuation flag not set in $v1 (0x" TARGET_FMT_lx
-                          ")!\n", env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1]);
-            do_raise_exception(env, EXCP_RI, GETPC());
-        }
-    }
-    if (len == 0) {
-        goto success; // nothing to do
-    }
-    if (original_src_ddc_offset == original_dest_ddc_offset) {
-        already_written = original_len;
-        goto success; // nothing to do
-    }
-    // Check capability bounds for the whole copy
-    // If it is going to fail we don't bother doing a partial copy!
-    const target_ulong original_src = CHECK_AND_ADD_DDC(env, CAP_PERM_LOAD, original_src_ddc_offset, original_len, ra);
-    const target_ulong original_dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, original_dest_ddc_offset, original_len, ra);
-
-    // Mark this as a continuation in $v1 (so that we continue sensibly if we get a tlb miss and longjump out)
-    env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[3];
-
-    const target_ulong dest_past_end = original_dest + original_len;
-    const target_ulong src_past_end = original_src + original_len;
-#if 0 // FIXME: for some reason this causes errors
-    const bool dest_same_page = (original_dest & TARGET_PAGE_MASK) == ((dest_past_end - 1) & TARGET_PAGE_MASK);
-    const bool src_same_page = (original_dest & TARGET_PAGE_MASK) == ((dest_past_end - 1) & TARGET_PAGE_MASK);
-    // If neither src nor dest buffer cross a page boundary we can just do an address_space_read+write
-    // Fast case: less than a page and neither of the buffers crosses a page boundary
-    CPUState *cs = env_cpu(env);
-    if (dest_same_page && src_same_page) {
-        tcg_debug_assert(already_written == 0);
-        tcg_debug_assert(len <= TARGET_PAGE_SIZE);
-        // The translation operation might trap and longjump out!
-        hwaddr src_paddr = do_translate_address(env, original_src, MMU_DATA_LOAD, ra);
-        hwaddr dest_paddr = do_translate_address(env, original_dest, MMU_DATA_STORE, ra);
-        // Do a single load+store to update the MMU flags
-        // uint8_t first_value = helper_ret_ldub_mmu(env, original_src, oi, ra);
-        // Note: address_space_write will also clear the tag bits!
-        MemTxResult result = MEMTX_ERROR;
-        uint8_t buffer[TARGET_PAGE_SIZE];
-        result = address_space_read(cs->as, src_paddr, MEMTXATTRS_UNSPECIFIED, buffer, len);
-        if (result != MEMTX_OK) {
-            warn_report("magic memmove: error reading %d bytes from paddr %"HWADDR_PRIx
-                        ". Unmapped memory? Error code was %d\r", (int)len, src_paddr, result);
-            // same ignored error would happen with normal loads/stores -> just continue
-        }
-        fprintf(stderr, "Used fast path to read %d bytes\r\n", (int)len);
-        // do_hexdump(stderr, buffer, len, original_src);
-        // fprintf(stderr, "\r");
-        // also write one byte to the target buffer to ensure that the flags are updated
-        // store_byte_and_clear_tag(env, original_dest, first_value, oi, ra); // might trap
-        result = address_space_write(cs->as, dest_paddr, MEMTXATTRS_UNSPECIFIED, buffer, len);
-#ifdef CONFIG_TCG_LOG_INSTR
-        if (qemu_log_instr_enabled(env)) {
-            for (int i = 0; i < len; i++) {
-                helper_qemu_log_instr_load64(env, original_src + i, buffer[i], MO_8);
-                helper_qemu_log_instr_store64(env, original_dest + i, buffer[i], MO_8);
-            }
-        }
-#endif
-        if (result != MEMTX_OK) {
-            warn_report("magic memmove: error writing %d bytes to paddr %"HWADDR_PRIx
-                        ". Unmapped memory? Error code was %d\r", (int)len, dest_paddr, result);
-            // same ignored error would happen with normal loads/stores -> just continue
-        }
-        already_written += len;
-        env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
-        goto success;
-    }
-#endif
-
-    const bool has_overlap = MAX(original_dest, original_src) >= MAX(dest_past_end, src_past_end);
-    if (has_overlap) {
-        warn_report("Found multipage magic memmove with overlap: dst=" TARGET_FMT_plx " src=" TARGET_FMT_plx
-                    " len=0x" TARGET_FMT_lx "\r", original_dest, original_src, original_len);
-        // slow path: byte copies
-    }
-
-    const bool copy_backwards = original_src < original_dest;
-    if (copy_backwards) {
-        target_ulong current_dest_cursor = original_dest + len - 1;
-        target_ulong current_src_cursor = original_src + len - 1;
-        /* Slow path (probably attempt to do this to an I/O device or
-         * similar, or clearing of a block of code we have translations
-         * cached for). Just do a series of byte writes as the architecture
-         * demands. It's not worth trying to use a cpu_physical_memory_map(),
-         * memset(), unmap() sequence here because:
-         *  + we'd need to account for the blocksize being larger than a page
-         *  + the direct-RAM access case is almost always going to be dealt
-         *    with in the fastpath code above, so there's no speed benefit
-         *  + we would have to deal with the map returning NULL because the
-         *    bounce buffer was in use
-         */
-        tcg_debug_assert(original_len - already_written == len);
-        collect_magic_nop_stats(env, &magic_memmove_slowpath, len);
-        while (already_written < original_len) {
-            uint8_t value = helper_ret_ldub_mmu(env, current_src_cursor, oi, ra);
-            store_byte_and_clear_tag(env, current_dest_cursor, value, oi, ra); // might trap
-             current_dest_cursor--;
-            current_src_cursor--;
-            already_written++;
-            env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
-        }
-    } else {
-        // copy forwards
-        target_ulong current_dest_cursor = original_dest + already_written;
-        target_ulong current_src_cursor = original_src + already_written;
-        /* Slow path (probably attempt to do this to an I/O device or
-         * similar, or clearing of a block of code we have translations
-         * cached for). Just do a series of byte writes as the architecture
-         * demands. It's not worth trying to use a cpu_physical_memory_map(),
-         * memset(), unmap() sequence here because:
-         *  + we'd need to account for the blocksize being larger than a page
-         *  + the direct-RAM access case is almost always going to be dealt
-         *    with in the fastpath code above, so there's no speed benefit
-         *  + we would have to deal with the map returning NULL because the
-         *    bounce buffer was in use
-         */
-        tcg_debug_assert(original_len - already_written == len);
-        collect_magic_nop_stats(env, &magic_memmove_slowpath, len);
-        while (already_written < original_len) {
-            uint8_t value = helper_ret_ldub_mmu(env, current_src_cursor, oi, ra);
-            store_byte_and_clear_tag(env, current_dest_cursor, value, oi, ra); // might trap
-            current_dest_cursor++;
-            current_src_cursor++;
-            already_written++;
-            env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
-        }
-    }
-
-    env->lladdr = 1;
-success:
-    if (unlikely(already_written != original_len)) {
-        error_report("ERROR: %s: failed to memmove all bytes to " TARGET_FMT_plx " (" TARGET_FMT_plx " with $ddc added).\r\n"
-                     "Remainig len = " TARGET_FMT_plx ", full len = " TARGET_FMT_plx ".\r\n"
-                     "Source address = " TARGET_FMT_plx " (" TARGET_FMT_plx " with $ddc added)\r\n",
-                     __func__, original_dest_ddc_offset, original_dest, len, original_len, original_src_ddc_offset, original_src);
-        error_report("$a0: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A0]);
-        error_report("$a1: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A1]);
-        error_report("$a2: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A2]);
-        error_report("$v0: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_V0]);
-        error_report("$v1: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_V1]);
-        abort();
-    }
-    env->active_tc.gpr[MIPS_REGNUM_V0] = original_dest_ddc_offset; // return value of memcpy is the dest argument
-    return true;
-}
-
-static void do_memset_pattern_hostaddr(void* hostaddr, uint64_t value, uint64_t nitems, unsigned pattern_length, uint64_t ra) {
-    if (pattern_length == 1) {
-        memset(hostaddr, value, nitems);
-    } else if (pattern_length == 4) {
-        uint32_t* ptr = hostaddr;
-        uint32_t target_value = tswap32((uint32_t)value);
-        for (target_ulong i = 0; i < nitems; i++) {
-            *ptr = target_value;
-            ptr++;
-        }
-    } else {
-        assert(false && "unsupported memset pattern length");
-    }
-}
-
-static bool do_magic_memset(CPUMIPSState *env, uint64_t ra, uint pattern_length)
-{
-    // TODO: just use address_space_write?
-
-    // See target/s390x/mem_helper.c and arm/helper.c HELPER(dc_zva)
-    int mmu_idx = cpu_mmu_index(env, false);
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
-    const target_ulong original_dest_ddc_offset = env->active_tc.gpr[MIPS_REGNUM_A0];      // $a0 = dest
-    uint64_t value = env->active_tc.gpr[MIPS_REGNUM_A1]; // $a1 = c
-    const target_ulong original_len_nitems = env->active_tc.gpr[MIPS_REGNUM_A2];       // $a2 = len
-    const target_ulong original_len_bytes = original_len_nitems * pattern_length;
-    target_ulong dest = original_dest_ddc_offset;
-    target_ulong len_nitems = original_len_nitems;
-    const bool is_continuation = (env->active_tc.gpr[MIPS_REGNUM_V1] >> 32) == MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG;
-    if (is_continuation) {
-        // This is a partial write -> $a0 is the original dest argument.
-        // The updated dest (after the partial write) was stored in $v0 by the previous call
-        dest = env->active_tc.gpr[MIPS_REGNUM_V0];
-        if (dest < original_dest_ddc_offset || dest >= original_dest_ddc_offset + original_len_bytes) {
-            error_report("ERROR: Attempted to call memset library function "
-                         "with invalid dest in $v0 (0x" TARGET_FMT_lx
-                         ") and continuation flag set. orig dest = 0x" TARGET_FMT_lx
-                         " and orig bytes = 0x" TARGET_FMT_lx "!\n",
-                         env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_A0],
-                         env->active_tc.gpr[MIPS_REGNUM_A2]);
-            do_raise_exception(env, EXCP_RI, ra);
-        }
-        target_ulong already_written = dest - original_dest_ddc_offset;
-        len_nitems -= already_written / pattern_length; // update the remaining length
-        assert((already_written % pattern_length) == 0);
-#if 0
-        fprintf(stderr, "--- %s: Got continuation for 0x" TARGET_FMT_lx " byte access at 0x" TARGET_FMT_plx
-                        " -- current dest = 0x" TARGET_FMT_plx " -- current len = 0x" TARGET_FMT_lx "\r\n",
-                        __func__, original_len, original_dest, dest, len_nitems);
-#endif
-    } else {
-        // Not a partial write -> $v0 should be zero otherwise this is a usage error!
-        if (env->active_tc.gpr[MIPS_REGNUM_V0] != 0) {
-            error_report("ERROR: Attempted to call memset library function "
-                         "with non-zero value in $v0 (0x" TARGET_FMT_lx
-                         ") and continuation flag not set in $v1 (0x" TARGET_FMT_lx
-                         ")!\n", env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1]);
-            do_raise_exception(env, EXCP_RI, ra);
-        }
-    }
-
-    if (len_nitems == 0) {
-        goto success; // nothing to do
-    }
-
-    dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, dest, len_nitems * pattern_length, ra);
-    const target_ulong original_dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, original_dest_ddc_offset, original_len_nitems, ra);
-
-    tcg_debug_assert(dest + (len_nitems * pattern_length) == original_dest + original_len_bytes && "continuation broken?");
-
-    CPUState *cs = env_cpu(env);
-
-    while (len_nitems > 0) {
-        const target_ulong total_len_nbytes = len_nitems * pattern_length;
-        assert(dest + total_len_nbytes == original_dest + original_len_bytes && "continuation broken?");
-        // probing for write access:
-        // fprintf(stderr, "Probing for write access at " TARGET_FMT_plx "\r\n", dest);
-        // fflush(stderr);
-        // update $v0 to point to the updated dest in case probe_write_access takes a tlb fault:
-        env->active_tc.gpr[MIPS_REGNUM_V0] = dest;
-        // and mark this as a continuation in $v1 (so that we continue sensibly after the tlb miss was handled)
-        env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[3];
-        // If the host address is not in the tlb after the second write we are writing
-        // to something strange so just fall back to the slowpath
-        target_ulong l_adj_bytes = adj_len_to_page(total_len_nbytes, dest);
-        target_ulong l_adj_nitems = l_adj_bytes;
-        if (unlikely(pattern_length != 1)) {
-            l_adj_nitems = l_adj_bytes / pattern_length;
-        }
-        tcg_debug_assert(l_adj_nitems != 0);
-        tcg_debug_assert(((dest + l_adj_bytes - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) && "should not cross a page boundary!");
-        void* hostaddr = NULL;
-#if 0
-        for (int try = 0; try < 2; try++) {
-            hostaddr = tlb_vaddr_to_host(env, dest, 1, mmu_idx);
-            if (hostaddr)
-                break;
-            /* OK, try a store and see if we can populate the tlb. This
-             * might cause an exception if the memory isn't writable,
-             * in which case we will longjmp out of here. We must for
-             * this purpose use the actual register value passed to us
-             * so that we get the fault address right.
-             */
-            store_byte_and_clear_tag(env, dest, 0xff, oi, ra);
-            // This should definitely fill the host TLB. If not we are probably writing to I/O memory
-            // FIXME: tlb_vaddr_to_host() also returns null if the notdirty flag is set (not sure if that's a bug
-            probe_write(env, dest, 1, mmu_idx, ra);
-        }
-#endif
-        if (hostaddr) {
-            /* If it's all in the TLB it's fair game for just writing to;
-             * we know we don't need to update dirty status, etc.
-             */
-            tcg_debug_assert(dest + total_len_nbytes == original_dest + original_len_bytes && "continuation broken?");
-            // Do one store byte to update MMU flags (not sure this is necessary)
-            store_byte_and_clear_tag(env, dest, value, oi, ra);
-            do_memset_pattern_hostaddr(hostaddr, value, l_adj_nitems, pattern_length, ra);
-#ifdef TARGET_CHERI
-            // We also need to invalidate the tags bits written by the memset
-            // qemu_ram_addr_from_host is faster than using the v2r routines in cheri_tag_invalidate
-            ram_addr_t ram_offset;
-            RAMBlock *block = qemu_ram_block_from_host(hostaddr, false, &ram_offset);
-            cheri_tag_phys_invalidate(env, block, ram_offset, l_adj_bytes, &dest);
-#endif
-#ifdef CONFIG_TCG_LOG_INSTR
-            if (qemu_log_instr_enabled(env)) {
-                // TODO: dump as a single big block?
-                for (target_ulong i = 0; i < l_adj_nitems; i++) {
-                    if (pattern_length == 1)
-                        helper_qemu_log_instr_store64(env, dest + i, value, MO_8);
-                    else if (pattern_length == 4)
-                        helper_qemu_log_instr_store64(
-                            env, dest + (i * pattern_length), value, MO_32);
-                    else
-                        assert(false && "invalid pattern length");
-                }
-                qemu_log_instr_extra(env, "%s: Set " TARGET_FMT_ld
-                    " %d-byte items to 0x%" PRIx64 " at 0x" TARGET_FMT_plx "\n",
-                    __func__, l_adj_nitems, pattern_length, value, dest);
-            }
-#endif
-            // fprintf(stderr, "%s: Set " TARGET_FMT_ld " bytes to 0x%x at 0x" TARGET_FMT_plx "/%p\r\n", __func__, l_adj, value, dest, hostaddr);
-            dest += l_adj_bytes;
-            len_nitems -= l_adj_nitems;
-        } else {
-            // First try address_space_write and if that fails fall back to bytewise setting
-            hwaddr paddr = do_translate_address(env, dest, MMU_DATA_STORE, ra);
-            // Note: address_space_write will also clear the tag bits!
-            MemTxResult result = MEMTX_ERROR;
-            if (value == 0) {
-                tcg_debug_assert(l_adj_bytes <= sizeof(ZEROARRAY));
-                result = address_space_write(cs->as, paddr, MEMTXATTRS_UNSPECIFIED, ZEROARRAY, l_adj_bytes);
-            } else {
-                // create a buffer filled with the correct value and use that for the write
-                uint8_t setbuffer[TARGET_PAGE_SIZE_MIN];
-                tcg_debug_assert(l_adj_bytes <= sizeof(setbuffer));
-                do_memset_pattern_hostaddr(setbuffer, value, l_adj_nitems, pattern_length, ra);
-                result = address_space_write(cs->as, paddr, MEMTXATTRS_UNSPECIFIED, setbuffer, l_adj_bytes);
-            }
-            if (result == MEMTX_OK) {
-                dest += l_adj_bytes;
-                len_nitems -= l_adj_nitems;
-#ifdef CONFIG_TCG_LOG_INSTR
-                if (qemu_log_instr_enabled(env)) {
-                    // TODO: dump as a single big block?
-                    for (target_ulong i = 0; i < l_adj_nitems; i++) {
-                        if (pattern_length == 1)
-                            helper_qemu_log_instr_store64(
-                                env, dest + i, value, MO_8);
-                        else if (pattern_length == 4)
-                            helper_qemu_log_instr_store64(
-                                env, dest + (i * pattern_length), value, MO_32);
-                        else
-                            assert(false && "invalid pattern length");
-                    }
-                    qemu_log_instr_extra(env, "%s: Set " TARGET_FMT_ld
-                        " %d-byte items to 0x%" PRIx64 " at 0x" TARGET_FMT_plx "\n",
-                        __func__, l_adj_nitems, pattern_length, value, dest);
-                }
-#endif
-                continue; // try again with next page
-            } else {
-                warn_report("address_space_write failed with error %d for %"HWADDR_PRIx "\r", result, paddr);
-                // fall back to bytewise copy slow path
-            }
-            /* Slow path (probably attempt to do this to an I/O device or
-             * similar, or clearing of a block of code we have translations
-             * cached for). Just do a series of byte writes as the architecture
-             * demands. It's not worth trying to use a cpu_physical_memory_map(),
-             * memset(), unmap() sequence here because:
-             *  + we'd need to account for the blocksize being larger than a page
-             *  + the direct-RAM access case is almost always going to be dealt
-             *    with in the fastpath code above, so there's no speed benefit
-             *  + we would have to deal with the map returning NULL because the
-             *    bounce buffer was in use
-             */
-            warn_report("%s: Falling back to memset slowpath for address "
-                        TARGET_FMT_plx " (phys addr=%" HWADDR_PRIx", len_nitems=0x"
-                        TARGET_FMT_lx ")! I/O memory or QEMU TLB bug?\r",
-                        __func__, dest, mips_cpu_get_phys_page_debug(env_cpu(env), dest), len_nitems);
-            target_ulong end = original_dest + original_len_bytes;
-            tcg_debug_assert(((end - dest) % pattern_length) == 0);
-            // in case we fault mark this as a continuation in $v1 (so that we continue sensibly after the tlb miss was handled)
-            env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[MIPS_REGNUM_V1];
-            while (dest < end) {
-                tcg_debug_assert(dest + (len_nitems * pattern_length) == original_dest + original_len_bytes && "continuation broken?");
-                // update $v0 to point to the updated dest in case probe_write_access takes a tlb fault:
-                env->active_tc.gpr[MIPS_REGNUM_V0] = dest;
-                if (pattern_length == 1) {
-                    store_byte_and_clear_tag(env, dest, value, oi, ra); // might trap
-                } else if (pattern_length == 4) {
-                    store_u32_and_clear_tag(env, dest, value, oi, ra); // might trap
-                } else {
-                    assert(false && "invalid pattern length");
-                }
-#ifdef CONFIG_TCG_LOG_INSTR
-                if (qemu_log_instr_enabled(env)) {
-                    if (pattern_length == 1)
-                        helper_qemu_log_instr_store64(env, dest, value, MO_8);
-                    else if (pattern_length == 4)
-                        helper_qemu_log_instr_store64(env, dest, value, MO_32);
-                    else
-                        assert(false && "invalid pattern length");
-                    helper_qemu_log_instr_store64(env, dest, value, MO_8);
-                }
-#endif
-                dest += pattern_length;
-                len_nitems--;
-            }
-        }
-    }
-    tcg_debug_assert(len_nitems == 0);
-    env->lladdr = 1;
-success:
-    env->active_tc.gpr[MIPS_REGNUM_V0] = original_dest_ddc_offset; // return value of memset is the src argument
-    // also update a0 and a2 to match what the kernel memset does (a0 -> buf end, a2 -> 0):
-    env->active_tc.gpr[MIPS_REGNUM_A0] = dest;
-    env->active_tc.gpr[MIPS_REGNUM_A2] = len_nitems;
-#if MAGIC_MEMSET_STATS != 0
-    collect_magic_nop_stats(env, value == 0 ? &magic_memset_zero_bytes : &magic_memset_nonzero_bytes, original_len_bytes);
-#endif
-    return true;
-}
-
-#define MAGIC_HELPER_DONE_FLAG 0xDEC0DED
-
-enum {
-    MAGIC_NOP_MEMSET = 1,
-    MAGIC_NOP_MEMSET_C = 2,
-    MAGIC_NOP_MEMCPY = 3,
-    MAGIC_NOP_MEMCPY_C = 4,
-    MAGIC_NOP_MEMMOVE = 5,
-    MAGIC_NOP_MEMMOVE_C = 6,
-    MAGIC_NOP_BCOPY = 7,
-    MAGIC_NOP_U32_MEMSET = 8,
-};
-
-void do_hexdump(GString *strbuf, uint8_t* buffer, target_ulong length,
-                target_ulong vaddr)
-{
-    char ascii_chars[17] = { 0 };
-    target_ulong line_start = vaddr & ~0xf;
-    target_ulong addr;
-
-    /* print leading empty space to always start with an aligned address */
-    if (line_start != vaddr) {
-        g_string_append_printf(strbuf, "    " TARGET_FMT_lx" : ", line_start);
-        for (addr = line_start; addr < vaddr; addr++) {
-            if ((addr % 4) == 0) {
-                g_string_append_printf(strbuf, "   ");
-            } else {
-                g_string_append_printf(strbuf, "  ");
-            }
-            ascii_chars[addr % 16] = ' ';
-        }
-    }
-    ascii_chars[16] = '\0';
-    for (addr = vaddr; addr < vaddr + length; addr++) {
-        if ((addr % 16) == 0) {
-            g_string_append_printf(strbuf, "    " TARGET_FMT_lx ": ",
-                                   line_start);
-        }
-        if ((addr % 4) == 0) {
-            g_string_append_printf(strbuf, " ");
-        }
-        unsigned char c = (unsigned char)buffer[addr - vaddr];
-        g_string_append_printf(strbuf, "%02x", c);
-        ascii_chars[addr % 16] = isprint(c) ? c : '.';
-        if ((addr % 16) == 15) {
-            g_string_append_printf(strbuf, "  %s\r\n", ascii_chars);
-            line_start += 16;
-        }
-    }
-    if (line_start != vaddr + length) {
-        const target_ulong hexdump_end_addr = (vaddr + length) | 0xf;
-        for (addr = vaddr + length; addr <= hexdump_end_addr; addr++) {
-            if ((addr % 4) == 0) {
-                g_string_append_printf(strbuf, "   ");
-            } else {
-                g_string_append_printf(strbuf, "  ");
-            }
-            ascii_chars[addr % 16] = ' ';
-        }
-        g_string_append_printf(strbuf, "  %s\r\n", ascii_chars);
-    }
-}
-
-// Magic library function calls:
-void helper_magic_library_function(CPUMIPSState *env, target_ulong which)
-{
-    qemu_log_instr_extra(env, "--- Calling magic library function 0x"
-                         TARGET_FMT_lx "\n", which);
-    // High bits can be used by function to indicate continuation after TLB miss
-    switch (which & UINT32_MAX) {
-    case MAGIC_NOP_MEMSET:
-        if (!do_magic_memset(env, GETPC(), /*pattern_size=*/1))
-            return;
-        // otherwise update $v1 to indicate success
-        break;
-
-    case MAGIC_NOP_U32_MEMSET:
-        if (!do_magic_memset(env, GETPC(), /*pattern_size=*/4))
-            return;
-        // otherwise update $v1 to indicate success
-        break;
-
-    case MAGIC_NOP_MEMCPY:
-        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A0, MIPS_REGNUM_A1))
-            goto error;
-        collect_magic_nop_stats(env, &magic_memcpy_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
-        break;
-
-    case MAGIC_NOP_MEMMOVE:
-        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A0, MIPS_REGNUM_A1))
-            goto error;
-        collect_magic_nop_stats(env, &magic_memmove_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
-        break;
-
-    case MAGIC_NOP_BCOPY: // src + dest arguments swapped
-        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A1, MIPS_REGNUM_A0))
-            goto error;
-        collect_magic_nop_stats(env, &magic_bcopy_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
-        break;
-
-    case 0xf0:
-    case 0xf1:
-    {
-        uint8_t buffer[TARGET_PAGE_SIZE_MIN];
-        // to match memset/memcpy calling convention (use a0 and a2)
-        target_ulong src = env->active_tc.gpr[MIPS_REGNUM_A0];
-        target_ulong real_len = env->active_tc.gpr[MIPS_REGNUM_A2];
-        fprintf(stderr, "--- Memory dump at %s(%s): " TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
-                lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), real_len, src);
-        while (real_len > 0) {
-            target_ulong len = adj_len_to_page(real_len, src);
-            real_len -= len;
-            if (len != env->active_tc.gpr[MIPS_REGNUM_A2]) {
-                fprintf(stderr, "--- partial dump at %s(%s): " TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
-                        lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), len, src);
-            }
-            if (cpu_memory_rw_debug(env_cpu(env), src, buffer, len, false) == 0) {
-                bool have_nonzero = false;
-                for (int i = 0; i < len; i++)
-                    if (buffer[i] != 0)
-                        have_nonzero = true;
-                if (have_nonzero) {
-                    /* This is probably inefficient but we don't dump that much.. */
-                    GString *strbuf = g_string_sized_new(TARGET_PAGE_SIZE_MIN);
-                    do_hexdump(strbuf, buffer, len, src);
-                    fwrite(strbuf->str, strbuf->len, 1, stderr);
-                    g_string_free(strbuf, true);
-                }
-                else
-                    fprintf(stderr, "   -- all zeroes\r\n");
-            } else {
-                fprintf(stderr, "--- Memory dump at %s(%s): Could not fetch" TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
-                        lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), len, src);
-            }
-        }
-    }
-    case 0xfe:
-    case 0xff:
-        // dump argument and return registers:
-        warn_report("%s(%s): argument+return registers: \r\n"
-                    "\tv0 = 0x" TARGET_FMT_lx "\tv1 = 0x" TARGET_FMT_lx "\r\n"
-                    "\ta0 = 0x" TARGET_FMT_lx "\ta1 = 0x" TARGET_FMT_lx "\r\n"
-                    "\ta2 = 0x" TARGET_FMT_lx "\ta3 = 0x" TARGET_FMT_lx "\r\n",
-                    lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xfe ? "entry" : "exit"),
-                    env->active_tc.gpr[2], env->active_tc.gpr[3],
-                    env->active_tc.gpr[4], env->active_tc.gpr[5],
-                    env->active_tc.gpr[6], env->active_tc.gpr[7]);
-        break;
-    case MAGIC_HELPER_DONE_FLAG:
-        qemu_maybe_log_instr_extra(env, "ERROR: Attempted to call library "
-            "function with success flag set in $v1!\n");
-        do_raise_exception(env, EXCP_RI, GETPC());
-    default:
-        qemu_maybe_log_instr_extra(env, "ERROR: Attempted to call invalid "
-            "library function " TARGET_FMT_lx "\n", which);
-        return;
-    }
-    // Indicate success by setting $v1 to 0xaffe
-    env->active_tc.gpr[MIPS_REGNUM_V1] = MAGIC_HELPER_DONE_FLAG;
-    return;
-error:
-    warn_report("%s: magic nop %d failed: \r\n"
-                    "\tv0 = 0x" TARGET_FMT_lx "\tv1 = 0x" TARGET_FMT_lx "\r\n"
-                    "\ta0 = 0x" TARGET_FMT_lx "\ta1 = 0x" TARGET_FMT_lx "\r\n"
-                    "\ta2 = 0x" TARGET_FMT_lx "\ta3 = 0x" TARGET_FMT_lx "\r\n",
-                    __func__, (int)(which & UINT32_MAX),
-                    env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1],
-                    env->active_tc.gpr[MIPS_REGNUM_A0], env->active_tc.gpr[MIPS_REGNUM_A1],
-                    env->active_tc.gpr[MIPS_REGNUM_A2], env->active_tc.gpr[MIPS_REGNUM_A3]);
-}
-
-void helper_smp_yield(CPUMIPSState *env) {
-    CPUState *cs = env_cpu(env);
-    cs->exception_index = EXCP_YIELD;
-
-    cpu_loop_exit(cs);
-}
-
diff --git a/target/mips/op_helper_cheri.c b/target/mips/op_helper_cheri.c
index f5b5e7875be..0c03f754bf4 100644
--- a/target/mips/op_helper_cheri.c
+++ b/target/mips/op_helper_cheri.c
@@ -604,7 +604,7 @@ target_ulong CHERI_HELPER_IMPL(cloadlinked(CPUArchState *env, uint32_t cb, uint3
         // TODO: should #if (CHERI_UNALIGNED) also disable this check?
         do_raise_c0_exception(env, EXCP_AdEL, addr);
     } else {
-        env->CP0_LLAddr = do_translate_address(env, addr, 0, _host_return_address);
+        env->CP0_LLAddr = cpu_mips_translate_address(env, addr, 0, _host_return_address);
         env->lladdr = addr;
         return addr;
     }
diff --git a/target/mips/op_helper_log_instr.c b/target/mips/op_helper_log_instr.c
index fcc8d0421ce..28c320d8c28 100644
--- a/target/mips/op_helper_log_instr.c
+++ b/target/mips/op_helper_log_instr.c
@@ -53,7 +53,7 @@ void helper_mips_log_instr_gpr(CPUArchState *env, uint32_t reg,
                                target_ulong value)
 {
     if (qemu_log_instr_enabled(env))
-        qemu_log_instr_reg(env, mips_gp_regnames[reg], value);
+        qemu_log_instr_reg(env, regnames[reg], value);
 }
 
 /*
@@ -79,9 +79,9 @@ void helper_mips_log_instr_hilo(CPUArchState *env, uint32_t sel, uint32_t index,
         return;
 
     if (sel) {
-        qemu_log_instr_reg(env, mips_regnames_LO[index], value);
+        qemu_log_instr_reg(env, regnames_LO[index], value);
     } else {
-        qemu_log_instr_reg(env, mips_regnames_HI[index], value);
+        qemu_log_instr_reg(env, regnames_HI[index], value);
     }
 }
 
@@ -186,6 +186,8 @@ void helper_cheri_debug_message(struct CPUMIPSState* env, uint64_t pc)
     }
 }
 
+
+
 #ifndef CONFIG_USER_ONLY
 
 void r4k_dump_tlb(CPUMIPSState *env, int idx)
@@ -325,3 +327,53 @@ void helper_mtc0_dumpstate(CPUMIPSState *env, target_ulong arg1)
     if (logfile != stderr)
         qemu_log_unlock(logfile);
 }
+
+void do_hexdump(GString *strbuf, uint8_t* buffer, target_ulong length,
+                target_ulong vaddr)
+{
+    char ascii_chars[17] = { 0 };
+    target_ulong line_start = vaddr & ~0xf;
+    target_ulong addr;
+
+    /* print leading empty space to always start with an aligned address */
+    if (line_start != vaddr) {
+        g_string_append_printf(strbuf, "    " TARGET_FMT_lx" : ", line_start);
+        for (addr = line_start; addr < vaddr; addr++) {
+            if ((addr % 4) == 0) {
+                g_string_append_printf(strbuf, "   ");
+            } else {
+                g_string_append_printf(strbuf, "  ");
+            }
+            ascii_chars[addr % 16] = ' ';
+        }
+    }
+    ascii_chars[16] = '\0';
+    for (addr = vaddr; addr < vaddr + length; addr++) {
+        if ((addr % 16) == 0) {
+            g_string_append_printf(strbuf, "    " TARGET_FMT_lx ": ",
+                                   line_start);
+        }
+        if ((addr % 4) == 0) {
+            g_string_append_printf(strbuf, " ");
+        }
+        unsigned char c = (unsigned char)buffer[addr - vaddr];
+        g_string_append_printf(strbuf, "%02x", c);
+        ascii_chars[addr % 16] = isprint(c) ? c : '.';
+        if ((addr % 16) == 15) {
+            g_string_append_printf(strbuf, "  %s\r\n", ascii_chars);
+            line_start += 16;
+        }
+    }
+    if (line_start != vaddr + length) {
+        const target_ulong hexdump_end_addr = (vaddr + length) | 0xf;
+        for (addr = vaddr + length; addr <= hexdump_end_addr; addr++) {
+            if ((addr % 4) == 0) {
+                g_string_append_printf(strbuf, "   ");
+            } else {
+                g_string_append_printf(strbuf, "  ");
+            }
+            ascii_chars[addr % 16] = ' ';
+        }
+        g_string_append_printf(strbuf, "  %s\r\n", ascii_chars);
+    }
+}
diff --git a/target/mips/rel6_translate.c b/target/mips/rel6_translate.c
deleted file mode 100644
index 0354370927d..00000000000
--- a/target/mips/rel6_translate.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  MIPS emulation for QEMU - Release 6 translation routines
- *
- *  Copyright (c) 2020 Philippe Mathieu-Daudé
- *
- * SPDX-License-Identifier: LGPL-2.1-or-later
- *
- * This code is licensed under the LGPL v2.1 or later.
- */
-
-#include "qemu/osdep.h"
-#include "tcg/tcg-op.h"
-#include "exec/helper-gen.h"
-#include "translate.h"
-
-/* Include the auto-generated decoder.  */
-#include "decode-mips32r6.c.inc"
-#include "decode-mips64r6.c.inc"
-
-bool trans_REMOVED(DisasContext *ctx, arg_REMOVED *a)
-{
-    gen_reserved_instruction(ctx);
-
-    return true;
-}
-
-static bool trans_LSA(DisasContext *ctx, arg_rtype *a)
-{
-    return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa);
-}
-
-static bool trans_DLSA(DisasContext *ctx, arg_rtype *a)
-{
-    return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa);
-}
-
-bool decode_isa_rel6(DisasContext *ctx, uint32_t insn)
-{
-    if (TARGET_LONG_BITS == 64 && decode_mips64r6(ctx, insn)) {
-        return true;
-    }
-    return decode_mips32r6(ctx, insn);
-}
diff --git a/target/mips/addr.c b/target/mips/sysemu/addr.c
similarity index 100%
rename from target/mips/addr.c
rename to target/mips/sysemu/addr.c
diff --git a/target/mips/sysemu/cp0.c b/target/mips/sysemu/cp0.c
new file mode 100644
index 00000000000..bae37f515bf
--- /dev/null
+++ b/target/mips/sysemu/cp0.c
@@ -0,0 +1,123 @@
+/*
+ * QEMU MIPS CPU
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "exec/exec-all.h"
+
+/* Called for updates to CP0_Status.  */
+void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
+{
+    int32_t tcstatus, *tcst;
+    uint32_t v = cpu->CP0_Status;
+    uint32_t cu, mx, asid, ksu;
+    uint32_t mask = ((1 << CP0TCSt_TCU3)
+                       | (1 << CP0TCSt_TCU2)
+                       | (1 << CP0TCSt_TCU1)
+                       | (1 << CP0TCSt_TCU0)
+                       | (1 << CP0TCSt_TMX)
+                       | (3 << CP0TCSt_TKSU)
+                       | (0xff << CP0TCSt_TASID));
+
+    cu = (v >> CP0St_CU0) & 0xf;
+    mx = (v >> CP0St_MX) & 0x1;
+    ksu = (v >> CP0St_KSU) & 0x3;
+    asid = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+
+    tcstatus = cu << CP0TCSt_TCU0;
+    tcstatus |= mx << CP0TCSt_TMX;
+    tcstatus |= ksu << CP0TCSt_TKSU;
+    tcstatus |= asid;
+
+    if (tc == cpu->current_tc) {
+        tcst = &cpu->active_tc.CP0_TCStatus;
+    } else {
+        tcst = &cpu->tcs[tc].CP0_TCStatus;
+    }
+
+    *tcst &= ~mask;
+    *tcst |= tcstatus;
+    compute_hflags(cpu);
+}
+
+void cpu_mips_store_status(CPUMIPSState *env, target_ulong val)
+{
+    uint32_t mask = env->CP0_Status_rw_bitmask;
+    target_ulong old = env->CP0_Status;
+
+    if (env->insn_flags & ISA_MIPS_R6) {
+        bool has_supervisor = extract32(mask, CP0St_KSU, 2) == 0x3;
+#if defined(TARGET_MIPS64)
+        uint32_t ksux = (1 << CP0St_KX) & val;
+        ksux |= (ksux >> 1) & val; /* KX = 0 forces SX to be 0 */
+        ksux |= (ksux >> 1) & val; /* SX = 0 forces UX to be 0 */
+        val = (val & ~(7 << CP0St_UX)) | ksux;
+#endif
+        if (has_supervisor && extract32(val, CP0St_KSU, 2) == 0x3) {
+            mask &= ~(3 << CP0St_KSU);
+        }
+        mask &= ~(((1 << CP0St_SR) | (1 << CP0St_NMI)) & val);
+    }
+
+    env->CP0_Status = (old & ~mask) | (val & mask);
+#if defined(TARGET_MIPS64)
+    if ((env->CP0_Status ^ old) & (old & (7 << CP0St_UX))) {
+        /* Access to at least one of the 64-bit segments has been disabled */
+        tlb_flush(env_cpu(env));
+    }
+#endif
+    if (ase_mt_available(env)) {
+        sync_c0_status(env, env, env->current_tc);
+    } else {
+        compute_hflags(env);
+    }
+}
+
+void cpu_mips_store_cause(CPUMIPSState *env, target_ulong val)
+{
+    uint32_t mask = 0x00C00300;
+    uint32_t old = env->CP0_Cause;
+    int i;
+
+    if (env->insn_flags & ISA_MIPS_R2) {
+        mask |= 1 << CP0Ca_DC;
+    }
+    if (env->insn_flags & ISA_MIPS_R6) {
+        mask &= ~((1 << CP0Ca_WP) & val);
+    }
+
+    env->CP0_Cause = (env->CP0_Cause & ~mask) | (val & mask);
+
+    if ((old ^ env->CP0_Cause) & (1 << CP0Ca_DC)) {
+        if (env->CP0_Cause & (1 << CP0Ca_DC)) {
+            cpu_mips_stop_count(env);
+        } else {
+            cpu_mips_start_count(env);
+        }
+    }
+
+    /* Set/reset software interrupts */
+    for (i = 0 ; i < 2 ; i++) {
+        if ((old ^ env->CP0_Cause) & (1 << (CP0Ca_IP + i))) {
+            cpu_mips_soft_irq(env, i, env->CP0_Cause & (1 << (CP0Ca_IP + i)));
+        }
+    }
+}
diff --git a/target/mips/cp0_timer.c b/target/mips/sysemu/cp0_timer.c
similarity index 100%
rename from target/mips/cp0_timer.c
rename to target/mips/sysemu/cp0_timer.c
diff --git a/target/mips/sysemu/machine.c b/target/mips/sysemu/machine.c
new file mode 100644
index 00000000000..096ecf93ba2
--- /dev/null
+++ b/target/mips/sysemu/machine.c
@@ -0,0 +1,357 @@
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "migration/cpu.h"
+#include "fpu_helper.h"
+
+static int cpu_post_load(void *opaque, int version_id)
+{
+    MIPSCPU *cpu = opaque;
+    CPUMIPSState *env = &cpu->env;
+
+    restore_fp_status(env);
+    restore_msa_fp_status(env);
+    compute_hflags(env);
+    restore_pamask(env);
+
+    return 0;
+}
+
+/* FPU state */
+
+static int get_fpr(QEMUFile *f, void *pv, size_t size,
+                   const VMStateField *field)
+{
+    int i;
+    fpr_t *v = pv;
+    /* Restore entire MSA vector register */
+    for (i = 0; i < MSA_WRLEN / 64; i++) {
+        qemu_get_sbe64s(f, &v->wr.d[i]);
+    }
+    return 0;
+}
+
+static int put_fpr(QEMUFile *f, void *pv, size_t size,
+                   const VMStateField *field, JSONWriter *vmdesc)
+{
+    int i;
+    fpr_t *v = pv;
+    /* Save entire MSA vector register */
+    for (i = 0; i < MSA_WRLEN / 64; i++) {
+        qemu_put_sbe64s(f, &v->wr.d[i]);
+    }
+
+    return 0;
+}
+
+const VMStateInfo vmstate_info_fpr = {
+    .name = "fpr",
+    .get  = get_fpr,
+    .put  = put_fpr,
+};
+
+#define VMSTATE_FPR_ARRAY_V(_f, _s, _n, _v)                     \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_fpr, fpr_t)
+
+#define VMSTATE_FPR_ARRAY(_f, _s, _n)                           \
+    VMSTATE_FPR_ARRAY_V(_f, _s, _n, 0)
+
+static VMStateField vmstate_fpu_fields[] = {
+    VMSTATE_FPR_ARRAY(fpr, CPUMIPSFPUContext, 32),
+    VMSTATE_UINT32(fcr0, CPUMIPSFPUContext),
+    VMSTATE_UINT32(fcr31, CPUMIPSFPUContext),
+    VMSTATE_END_OF_LIST()
+};
+
+const VMStateDescription vmstate_fpu = {
+    .name = "cpu/fpu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = vmstate_fpu_fields
+};
+
+const VMStateDescription vmstate_inactive_fpu = {
+    .name = "cpu/inactive_fpu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = vmstate_fpu_fields
+};
+
+/* TC state */
+
+static VMStateField vmstate_tc_fields[] = {
+    VMSTATE_UINTTL_ARRAY(gpr, TCState, 32),
+#if defined(TARGET_MIPS64)
+    VMSTATE_UINT64_ARRAY(gpr_hi, TCState, 32),
+#endif /* TARGET_MIPS64 */
+#ifdef TARGET_CHERI
+    VMSTATE_UINTTL(PCC._cr_cursor, TCState),
+#else
+    VMSTATE_UINTTL(PC, TCState),
+#endif
+    VMSTATE_UINTTL_ARRAY(HI, TCState, MIPS_DSP_ACC),
+    VMSTATE_UINTTL_ARRAY(LO, TCState, MIPS_DSP_ACC),
+    VMSTATE_UINTTL_ARRAY(ACX, TCState, MIPS_DSP_ACC),
+    VMSTATE_UINTTL(DSPControl, TCState),
+    VMSTATE_INT32(CP0_TCStatus, TCState),
+    VMSTATE_INT32(CP0_TCBind, TCState),
+    VMSTATE_UINTTL(CP0_TCHalt, TCState),
+    VMSTATE_UINTTL(CP0_TCContext, TCState),
+    VMSTATE_UINTTL(CP0_TCSchedule, TCState),
+    VMSTATE_UINTTL(CP0_TCScheFBack, TCState),
+    VMSTATE_INT32(CP0_Debug_tcstatus, TCState),
+    VMSTATE_UINTTL(CP0_UserLocal, TCState),
+    VMSTATE_INT32(msacsr, TCState),
+    VMSTATE_UINTTL_ARRAY(mxu_gpr, TCState, NUMBER_OF_MXU_REGISTERS - 1),
+    VMSTATE_UINTTL(mxu_cr, TCState),
+    VMSTATE_END_OF_LIST()
+};
+
+const VMStateDescription vmstate_tc = {
+    .name = "cpu/tc",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = vmstate_tc_fields
+};
+
+const VMStateDescription vmstate_inactive_tc = {
+    .name = "cpu/inactive_tc",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = vmstate_tc_fields
+};
+
+/* MVP state */
+
+const VMStateDescription vmstate_mvp = {
+    .name = "cpu/mvp",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(CP0_MVPControl, CPUMIPSMVPContext),
+        VMSTATE_INT32(CP0_MVPConf0, CPUMIPSMVPContext),
+        VMSTATE_INT32(CP0_MVPConf1, CPUMIPSMVPContext),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/* TLB state */
+
+static int get_tlb(QEMUFile *f, void *pv, size_t size,
+                   const VMStateField *field)
+{
+    r4k_tlb_t *v = pv;
+    uint16_t flags;
+
+    qemu_get_betls(f, &v->VPN);
+    qemu_get_be32s(f, &v->PageMask);
+    qemu_get_be16s(f, &v->ASID);
+    qemu_get_be16s(f, &flags);
+    v->G = (flags >> 10) & 1;
+    v->C0 = (flags >> 7) & 3;
+    v->C1 = (flags >> 4) & 3;
+    v->V0 = (flags >> 3) & 1;
+    v->V1 = (flags >> 2) & 1;
+    v->D0 = (flags >> 1) & 1;
+    v->D1 = (flags >> 0) & 1;
+    v->EHINV = (flags >> 15) & 1;
+#if defined(TARGET_CHERI)
+    v->S1 = (flags >> 14) & 1;
+    v->S0 = (flags >> 13) & 1;
+    v->L1 = (flags >> 12) & 1;
+    v->L0 = (flags >> 11) & 1;
+#else
+    v->RI1 = (flags >> 14) & 1;
+    v->RI0 = (flags >> 13) & 1;
+    v->XI1 = (flags >> 12) & 1;
+    v->XI0 = (flags >> 11) & 1;
+#endif /* TARGET_CHERI */
+    qemu_get_be64s(f, &v->PFN[0]);
+    qemu_get_be64s(f, &v->PFN[1]);
+
+    return 0;
+}
+
+static int put_tlb(QEMUFile *f, void *pv, size_t size,
+                   const VMStateField *field, JSONWriter *vmdesc)
+{
+    r4k_tlb_t *v = pv;
+
+    uint16_t asid = v->ASID;
+    uint16_t flags = ((v->EHINV << 15) |
+#if defined(TARGET_CHERI)
+                      (v->S1 << 14) |
+                      (v->S0 << 13) |
+                      (v->L1 << 12) |
+                      (v->L0 << 11) |
+#else
+                      (v->RI1 << 14) |
+                      (v->RI0 << 13) |
+                      (v->XI1 << 12) |
+                      (v->XI0 << 11) |
+#endif /* TARGET_CHERI */
+                      (v->G << 10) |
+                      (v->C0 << 7) |
+                      (v->C1 << 4) |
+                      (v->V0 << 3) |
+                      (v->V1 << 2) |
+                      (v->D0 << 1) |
+                      (v->D1 << 0));
+
+    qemu_put_betls(f, &v->VPN);
+    qemu_put_be32s(f, &v->PageMask);
+    qemu_put_be16s(f, &asid);
+    qemu_put_be16s(f, &flags);
+    qemu_put_be64s(f, &v->PFN[0]);
+    qemu_put_be64s(f, &v->PFN[1]);
+
+    return 0;
+}
+
+const VMStateInfo vmstate_info_tlb = {
+    .name = "tlb_entry",
+    .get  = get_tlb,
+    .put  = put_tlb,
+};
+
+#define VMSTATE_TLB_ARRAY_V(_f, _s, _n, _v)                     \
+    VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_tlb, r4k_tlb_t)
+
+#define VMSTATE_TLB_ARRAY(_f, _s, _n)                           \
+    VMSTATE_TLB_ARRAY_V(_f, _s, _n, 0)
+
+const VMStateDescription vmstate_tlb = {
+    .name = "cpu/tlb",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(nb_tlb, CPUMIPSTLBContext),
+        VMSTATE_UINT32(tlb_in_use, CPUMIPSTLBContext),
+        VMSTATE_TLB_ARRAY(mmu.r4k.tlb, CPUMIPSTLBContext, MIPS_TLB_MAX),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/* MIPS CPU state */
+
+const VMStateDescription vmstate_mips_cpu = {
+    .name = "cpu",
+    .version_id = 21,
+    .minimum_version_id = 21,
+    .post_load = cpu_post_load,
+    .fields = (VMStateField[]) {
+        /* Active TC */
+        VMSTATE_STRUCT(env.active_tc, MIPSCPU, 1, vmstate_tc, TCState),
+
+        /* Active FPU */
+        VMSTATE_STRUCT(env.active_fpu, MIPSCPU, 1, vmstate_fpu,
+                       CPUMIPSFPUContext),
+
+        /* MVP */
+        VMSTATE_STRUCT_POINTER(env.mvp, MIPSCPU, vmstate_mvp,
+                               CPUMIPSMVPContext),
+
+        /* TLB */
+        VMSTATE_STRUCT_POINTER(env.tlb, MIPSCPU, vmstate_tlb,
+                               CPUMIPSTLBContext),
+
+        /* CPU metastate */
+        VMSTATE_UINT32(env.current_tc, MIPSCPU),
+        VMSTATE_UINT32(env.current_fpu, MIPSCPU),
+        VMSTATE_INT32(env.error_code, MIPSCPU),
+        VMSTATE_UINTTL(env.btarget, MIPSCPU),
+        VMSTATE_UINTTL(env.bcond, MIPSCPU),
+
+        /* Remaining CP0 registers */
+        VMSTATE_INT32(env.CP0_Index, MIPSCPU),
+        VMSTATE_INT32(env.CP0_VPControl, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Random, MIPSCPU),
+        VMSTATE_INT32(env.CP0_VPEControl, MIPSCPU),
+        VMSTATE_INT32(env.CP0_VPEConf0, MIPSCPU),
+        VMSTATE_INT32(env.CP0_VPEConf1, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_YQMask, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_VPESchedule, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_VPEScheFBack, MIPSCPU),
+        VMSTATE_INT32(env.CP0_VPEOpt, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_EntryLo0, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_EntryLo1, MIPSCPU),
+        VMSTATE_INT32(env.CP0_GlobalNumber, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_Context, MIPSCPU),
+        VMSTATE_INT32(env.CP0_MemoryMapID, MIPSCPU),
+        VMSTATE_INT32(env.CP0_PageMask, MIPSCPU),
+        VMSTATE_INT32(env.CP0_PageGrain, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_SegCtl0, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_SegCtl1, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_SegCtl2, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWBase, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWField, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_PWSize, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Wired, MIPSCPU),
+        VMSTATE_INT32(env.CP0_PWCtl, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSConf0, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSConf1, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSConf2, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSConf3, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSConf4, MIPSCPU),
+        VMSTATE_INT32(env.CP0_HWREna, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_BadVAddr, MIPSCPU),
+        VMSTATE_UINT32(env.CP0_BadInstr, MIPSCPU),
+        VMSTATE_UINT32(env.CP0_BadInstrP, MIPSCPU),
+        VMSTATE_UINT32(env.CP0_BadInstrX, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Count, MIPSCPU),
+        VMSTATE_UINT32(env.CP0_SAARI, MIPSCPU),
+        VMSTATE_UINT64_ARRAY(env.CP0_SAAR, MIPSCPU, 2),
+        VMSTATE_UINTTL(env.CP0_EntryHi, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Compare, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Status, MIPSCPU),
+        VMSTATE_INT32(env.CP0_IntCtl, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSCtl, MIPSCPU),
+        VMSTATE_INT32(env.CP0_SRSMap, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Cause, MIPSCPU),
+#ifndef TARGET_CHERI
+        // FIXME: would be nice to print for CHERI but needs direct field access
+        VMSTATE_UINTTL(env.CP0_EPC, MIPSCPU),
+#endif
+        VMSTATE_INT32(env.CP0_PRid, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_EBase, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_CMGCRBase, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config0, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config1, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config2, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config3, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config4, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config5, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config6, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Config7, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_LLAddr, MIPSCPU),
+        VMSTATE_UINT64_ARRAY(env.CP0_MAAR, MIPSCPU, MIPS_MAAR_MAX),
+        VMSTATE_INT32(env.CP0_MAARI, MIPSCPU),
+        VMSTATE_UINTTL(env.lladdr, MIPSCPU),
+        VMSTATE_UINTTL_ARRAY(env.CP0_WatchLo, MIPSCPU, 8),
+        VMSTATE_UINT64_ARRAY(env.CP0_WatchHi, MIPSCPU, 8),
+        VMSTATE_UINTTL(env.CP0_XContext, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Framemask, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Debug, MIPSCPU),
+        VMSTATE_UINTTL(env.CP0_DEPC, MIPSCPU),
+        VMSTATE_INT32(env.CP0_Performance0, MIPSCPU),
+        VMSTATE_INT32(env.CP0_ErrCtl, MIPSCPU),
+        VMSTATE_UINT64(env.CP0_TagLo, MIPSCPU),
+        VMSTATE_INT32(env.CP0_DataLo, MIPSCPU),
+        VMSTATE_INT32(env.CP0_TagHi, MIPSCPU),
+        VMSTATE_INT32(env.CP0_DataHi, MIPSCPU),
+#ifndef TARGET_CHERI
+        // FIXME: would be nice to print for CHERI but needs direct field access
+        VMSTATE_UINTTL(env.CP0_ErrorEPC, MIPSCPU),
+#endif
+        VMSTATE_INT32(env.CP0_DESAVE, MIPSCPU),
+        VMSTATE_UINTTL_ARRAY(env.CP0_KScratch, MIPSCPU, MIPS_KSCRATCH_NUM),
+
+        /* Inactive TC */
+        VMSTATE_STRUCT_ARRAY(env.tcs, MIPSCPU, MIPS_SHADOW_SET_MAX, 1,
+                             vmstate_inactive_tc, TCState),
+        VMSTATE_STRUCT_ARRAY(env.fpus, MIPSCPU, MIPS_FPU_MAX, 1,
+                             vmstate_inactive_fpu, CPUMIPSFPUContext),
+
+        VMSTATE_END_OF_LIST()
+    },
+};
diff --git a/target/mips/sysemu/meson.build b/target/mips/sysemu/meson.build
new file mode 100644
index 00000000000..cefc2275828
--- /dev/null
+++ b/target/mips/sysemu/meson.build
@@ -0,0 +1,7 @@
+mips_softmmu_ss.add(files(
+  'addr.c',
+  'cp0.c',
+  'cp0_timer.c',
+  'machine.c',
+  'physaddr.c',
+))
diff --git a/target/mips/sysemu/physaddr.c b/target/mips/sysemu/physaddr.c
new file mode 100644
index 00000000000..9044a2caf87
--- /dev/null
+++ b/target/mips/sysemu/physaddr.c
@@ -0,0 +1,265 @@
+/*
+ * MIPS TLB (Translation lookaside buffer) helpers.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "../internal.h"
+
+static int is_seg_am_mapped(unsigned int am, bool eu, int mmu_idx)
+{
+    /*
+     * Interpret access control mode and mmu_idx.
+     *           AdE?     TLB?
+     *      AM  K S U E  K S U E
+     * UK    0  0 1 1 0  0 - - 0
+     * MK    1  0 1 1 0  1 - - !eu
+     * MSK   2  0 0 1 0  1 1 - !eu
+     * MUSK  3  0 0 0 0  1 1 1 !eu
+     * MUSUK 4  0 0 0 0  0 1 1 0
+     * USK   5  0 0 1 0  0 0 - 0
+     * -     6  - - - -  - - - -
+     * UUSK  7  0 0 0 0  0 0 0 0
+     */
+    int32_t adetlb_mask;
+
+    switch (mmu_idx) {
+    case 3: /* ERL */
+        /* If EU is set, always unmapped */
+        if (eu) {
+            return 0;
+        }
+        /* fall through */
+    case MIPS_HFLAG_KM:
+        /* Never AdE, TLB mapped if AM={1,2,3} */
+        adetlb_mask = 0x70000000;
+        goto check_tlb;
+
+    case MIPS_HFLAG_SM:
+        /* AdE if AM={0,1}, TLB mapped if AM={2,3,4} */
+        adetlb_mask = 0xc0380000;
+        goto check_ade;
+
+    case MIPS_HFLAG_UM:
+        /* AdE if AM={0,1,2,5}, TLB mapped if AM={3,4} */
+        adetlb_mask = 0xe4180000;
+        /* fall through */
+    check_ade:
+        /* does this AM cause AdE in current execution mode */
+        if ((adetlb_mask << am) < 0) {
+            return TLBRET_BADADDR;
+        }
+        adetlb_mask <<= 8;
+        /* fall through */
+    check_tlb:
+        /* is this AM mapped in current execution mode */
+        return ((adetlb_mask << am) < 0);
+    default:
+        assert(0);
+        return TLBRET_BADADDR;
+    };
+}
+
+static int get_seg_physical_address(CPUMIPSState *env, hwaddr *physical,
+                                    int *prot, target_ulong real_address,
+                                    MMUAccessType access_type, int mmu_idx,
+                                    unsigned int am, bool eu,
+                                    target_ulong segmask,
+                                    hwaddr physical_base)
+{
+    int mapped = is_seg_am_mapped(am, eu, mmu_idx);
+
+    if (mapped < 0) {
+        /* is_seg_am_mapped can report TLBRET_BADADDR */
+        return mapped;
+    } else if (mapped) {
+        /* The segment is TLB mapped */
+        return env->tlb->map_address(env, physical, prot, real_address,
+                                     access_type);
+    } else {
+        /* The segment is unmapped */
+        *physical = physical_base | (real_address & segmask);
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return TLBRET_MATCH;
+    }
+}
+
+static int get_segctl_physical_address(CPUMIPSState *env, hwaddr *physical,
+                                       int *prot, target_ulong real_address,
+                                       MMUAccessType access_type, int mmu_idx,
+                                       uint16_t segctl, target_ulong segmask)
+{
+    unsigned int am = (segctl & CP0SC_AM_MASK) >> CP0SC_AM;
+    bool eu = (segctl >> CP0SC_EU) & 1;
+    hwaddr pa = ((hwaddr)segctl & CP0SC_PA_MASK) << 20;
+
+    return get_seg_physical_address(env, physical, prot, real_address,
+                                    access_type, mmu_idx, am, eu, segmask,
+                                    pa & ~(hwaddr)segmask);
+}
+
+int get_physical_address(CPUMIPSState *env, hwaddr *physical,
+                         int *prot, target_ulong real_address,
+                         MMUAccessType access_type, int mmu_idx)
+{
+    /* User mode can only access useg/xuseg */
+#if defined(TARGET_MIPS64)
+    int user_mode = mmu_idx == MIPS_HFLAG_UM;
+    int supervisor_mode = mmu_idx == MIPS_HFLAG_SM;
+    int kernel_mode = !user_mode && !supervisor_mode;
+    int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
+    int SX = (env->CP0_Status & (1 << CP0St_SX)) != 0;
+    int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
+#endif
+    int ret = TLBRET_MATCH;
+    /* effective address (modified for KVM T&E kernel segments) */
+    target_ulong address = real_address;
+
+    if (mips_um_ksegs_enabled()) {
+        /* KVM T&E adds guest kernel segments in useg */
+        if (real_address >= KVM_KSEG0_BASE) {
+            if (real_address < KVM_KSEG2_BASE) {
+                /* kseg0 */
+                address += KSEG0_BASE - KVM_KSEG0_BASE;
+            } else if (real_address <= USEG_LIMIT) {
+                /* kseg2/3 */
+                address += KSEG2_BASE - KVM_KSEG2_BASE;
+            }
+        }
+    }
+
+    if (address <= USEG_LIMIT) {
+        /* useg */
+        uint16_t segctl;
+
+        if (address >= 0x40000000UL) {
+            segctl = env->CP0_SegCtl2;
+        } else {
+            segctl = env->CP0_SegCtl2 >> 16;
+        }
+        ret = get_segctl_physical_address(env, physical, prot,
+                                          real_address, access_type,
+                                          mmu_idx, segctl, 0x3FFFFFFF);
+#if defined(TARGET_MIPS64)
+    } else if (address < 0x4000000000000000ULL) {
+        /* xuseg */
+        if (UX && address <= (0x3FFFFFFFFFFFFFFFULL & env->SEGMask)) {
+            ret = env->tlb->map_address(env, physical, prot,
+                                        real_address, access_type);
+        } else {
+            ret = TLBRET_BADADDR;
+        }
+    } else if (address < 0x8000000000000000ULL) {
+        /* xsseg */
+        if ((supervisor_mode || kernel_mode) &&
+            SX && address <= (0x7FFFFFFFFFFFFFFFULL & env->SEGMask)) {
+            ret = env->tlb->map_address(env, physical, prot,
+                                        real_address, access_type);
+        } else {
+            ret = TLBRET_BADADDR;
+        }
+    } else if (address < 0xC000000000000000ULL) {
+        /* xkphys */
+        if ((address & 0x07FFFFFFFFFFFFFFULL) <= env->PAMask) {
+            /* KX/SX/UX bit to check for each xkphys EVA access mode */
+            static const uint8_t am_ksux[8] = {
+                [CP0SC_AM_UK]    = (1u << CP0St_KX),
+                [CP0SC_AM_MK]    = (1u << CP0St_KX),
+                [CP0SC_AM_MSK]   = (1u << CP0St_SX),
+                [CP0SC_AM_MUSK]  = (1u << CP0St_UX),
+                [CP0SC_AM_MUSUK] = (1u << CP0St_UX),
+                [CP0SC_AM_USK]   = (1u << CP0St_SX),
+                [6]              = (1u << CP0St_KX),
+                [CP0SC_AM_UUSK]  = (1u << CP0St_UX),
+            };
+            unsigned int am = CP0SC_AM_UK;
+            unsigned int xr = (env->CP0_SegCtl2 & CP0SC2_XR_MASK) >> CP0SC2_XR;
+
+            if (xr & (1 << ((address >> 59) & 0x7))) {
+                am = (env->CP0_SegCtl1 & CP0SC1_XAM_MASK) >> CP0SC1_XAM;
+            }
+            /* Does CP0_Status.KX/SX/UX permit the access mode (am) */
+            if (env->CP0_Status & am_ksux[am]) {
+                ret = get_seg_physical_address(env, physical, prot,
+                                               real_address, access_type,
+                                               mmu_idx, am, false, env->PAMask,
+                                               0);
+            } else {
+                ret = TLBRET_BADADDR;
+            }
+        } else {
+            ret = TLBRET_BADADDR;
+        }
+    } else if (address < 0xFFFFFFFF80000000ULL) {
+        /* xkseg */
+        if (kernel_mode && KX &&
+            address <= (0xFFFFFFFF7FFFFFFFULL & env->SEGMask)) {
+            ret = env->tlb->map_address(env, physical, prot,
+                                        real_address, access_type);
+        } else {
+            ret = TLBRET_BADADDR;
+        }
+#endif
+    /*
+     * XXXAR: I am not entirely sure deleting the previous code using  is the correct way to make commit
+     * a78cda6e0212ccb16711d0dd30c2bb3480b36415 (40 bits for CHERI) to work with
+     * the new get_segctl_physical_address added in commit
+     * 480e79aedd322fcfac17052caff21626ea7c78e2. As far as I can see from the
+     * BERI hardware reference we should still be using 0x1FFFFFFF as the mask
+     * for kseg0 and kseg1
+     */
+    } else if (address < KSEG1_BASE) {
+        /* kseg0 */
+        ret = get_segctl_physical_address(env, physical, prot, real_address,
+                                          access_type, mmu_idx,
+                                          env->CP0_SegCtl1 >> 16, 0x1FFFFFFF);
+    } else if (address < KSEG2_BASE) {
+        /* kseg1 */
+        ret = get_segctl_physical_address(env, physical, prot, real_address,
+                                          access_type, mmu_idx,
+                                          env->CP0_SegCtl1, 0x1FFFFFFF);
+    } else if (address < KSEG3_BASE) {
+        /* sseg (kseg2) */
+        ret = get_segctl_physical_address(env, physical, prot, real_address,
+                                          access_type, mmu_idx,
+                                          env->CP0_SegCtl0 >> 16, 0x1FFFFFFF);
+    } else {
+        /*
+         * kseg3
+         * XXX: debug segment is not emulated
+         */
+        ret = get_segctl_physical_address(env, physical, prot, real_address,
+                                          access_type, mmu_idx,
+                                          env->CP0_SegCtl0, 0x1FFFFFFF);
+    }
+    return ret;
+}
+
+hwaddr mips_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+    hwaddr phys_addr;
+    int prot;
+
+    if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
+                             cpu_mmu_index(env, false)) != 0) {
+        return -1;
+    }
+    return phys_addr;
+}
diff --git a/target/mips/dsp_helper.c b/target/mips/tcg/dsp_helper.c
similarity index 100%
rename from target/mips/dsp_helper.c
rename to target/mips/tcg/dsp_helper.c
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
new file mode 100644
index 00000000000..c5ac034b342
--- /dev/null
+++ b/target/mips/tcg/exception.c
@@ -0,0 +1,187 @@
+/*
+ *  MIPS Exceptions processing helpers for QEMU.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#ifdef TARGET_CHERI
+#include "cheri_utils.h"
+#endif
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+
+target_ulong exception_resume_pc(CPUMIPSState *env)
+{
+    target_ulong bad_pc;
+    target_ulong isa_mode;
+
+    isa_mode = !!(env->hflags & MIPS_HFLAG_M16);
+    bad_pc = PC_ADDR(env) | isa_mode;
+    if (env->hflags & MIPS_HFLAG_BMASK) {
+        /*
+         * If the exception was raised from a delay slot, come back to
+         * the jump.
+         */
+        bad_pc -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
+    }
+
+    return bad_pc;
+}
+
+void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
+                                int error_code)
+{
+    do_raise_exception_err(env, exception, error_code, 0);
+}
+
+void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
+{
+    do_raise_exception(env, exception, GETPC());
+}
+
+void helper_raise_exception_debug(CPUMIPSState *env)
+{
+    do_raise_exception(env, EXCP_DEBUG, 0);
+}
+
+static void raise_exception(CPUMIPSState *env, uint32_t exception)
+{
+    do_raise_exception(env, exception, 0);
+}
+
+void helper_wait(CPUMIPSState *env)
+{
+    CPUState *cs = env_cpu(env);
+
+    cs->halted = 1;
+    cpu_reset_interrupt(cs, CPU_INTERRUPT_WAKE);
+    /*
+     * Last instruction in the block, PC was updated before
+     * - no need to recover PC and icount.
+     */
+    raise_exception(env, EXCP_HLT);
+}
+
+void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+
+    mips_update_pc(env, tb->pc, /*can_be_unrepresentable=*/false);
+    env->hflags &= ~MIPS_HFLAG_BMASK;
+    env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
+
+    /*
+     * Break any load-link that's in flight on this CPU since we've been
+     * preempted.  This is sufficiently rare that it shouldn't hurt to do it
+     * every preemption.  Of course, it's also only really safe to use ->lladdr
+     * for capability work at all because we're forcing MTTCG off for CHERI due
+     * to its tag table implementation.  But this doesn't make it any worse!
+     *
+     * See also target/mips/op_helper:/helper_eret
+     */
+    env->lladdr = 1;
+}
+
+static const char * const excp_names[EXCP_LAST + 1] = {
+    [EXCP_RESET] = "reset",
+    [EXCP_SRESET] = "soft reset",
+    [EXCP_DSS] = "debug single step",
+    [EXCP_DINT] = "debug interrupt",
+    [EXCP_NMI] = "non-maskable interrupt",
+    [EXCP_MCHECK] = "machine check",
+    [EXCP_EXT_INTERRUPT] = "interrupt",
+    [EXCP_DFWATCH] = "deferred watchpoint",
+    [EXCP_DIB] = "debug instruction breakpoint",
+    [EXCP_IWATCH] = "instruction fetch watchpoint",
+    [EXCP_AdEL] = "address error load",
+    [EXCP_AdES] = "address error store",
+    [EXCP_TLBF] = "TLB refill",
+    [EXCP_IBE] = "instruction bus error",
+    [EXCP_DBp] = "debug breakpoint",
+    [EXCP_SYSCALL] = "syscall",
+    [EXCP_BREAK] = "break",
+    [EXCP_CpU] = "coprocessor unusable",
+    [EXCP_RI] = "reserved instruction",
+    [EXCP_OVERFLOW] = "arithmetic overflow",
+    [EXCP_TRAP] = "trap",
+    [EXCP_FPE] = "floating point",
+    [EXCP_DDBS] = "debug data break store",
+    [EXCP_DWATCH] = "data watchpoint",
+    [EXCP_LTLBL] = "TLB modify",
+    [EXCP_TLBL] = "TLB load",
+    [EXCP_TLBS] = "TLB store",
+    [EXCP_DBE] = "data bus error",
+    [EXCP_DDBL] = "debug data break load",
+    [EXCP_THREAD] = "thread",
+    [EXCP_MDMX] = "MDMX",
+    [EXCP_C2E] = "precise coprocessor 2",
+    [EXCP_CACHE] = "cache error",
+    [EXCP_TLBXI] = "TLB execute-inhibit",
+    [EXCP_TLBRI] = "TLB read-inhibit",
+    [EXCP_MSADIS] = "MSA disabled",
+    [EXCP_MSAFPE] = "MSA floating point",
+};
+
+const char *mips_exception_name(int32_t exception)
+{
+    if (exception < 0 || exception > EXCP_LAST) {
+        return "unknown";
+    }
+    return excp_names[exception];
+}
+
+void do_raise_exception_err(CPUMIPSState *env, MipsExcp exception,
+                            int error_code, uintptr_t pc)
+{
+    CPUState *cs = env_cpu(env);
+
+#ifdef TARGET_CHERI
+    // Translate CP0 Unusable to CP2 ASR fault if we are in kernel mode and
+    // PCC is missing ASR:
+    if (exception == EXCP_CpU && error_code == 0 && in_kernel_mode(env)) {
+        if (!cheri_have_access_sysregs(env)) {
+            do_raise_c2_exception_noreg(env, CapEx_AccessSystemRegsViolation, pc);
+        }
+    }
+#endif
+    qemu_log_mask(CPU_LOG_INT, "%s: %d (%s) %d\n",
+                  __func__, exception, mips_exception_name(exception),
+                  error_code);
+    cs->exception_index = exception;
+    env->error_code = error_code;
+
+    cpu_loop_exit_restore(cs, pc);
+}
+
+void helper_check_breakcount(struct CPUMIPSState* env)
+{
+    CPUState *cs = env_cpu(env);
+    /* Decrement the startup breakcount, if set. */
+    if (unlikely(cs->breakcount)) {
+        cs->breakcount--;
+        if (cs->breakcount == 0UL) {
+            if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT | CPU_LOG_EXEC))
+                qemu_log_instr_or_mask_msg(env, CPU_LOG_INT | CPU_LOG_EXEC,
+                    "Reached breakcount!\n");
+            helper_raise_exception_debug(env);
+        }
+    }
+}
diff --git a/target/mips/tcg/fpu_helper.c b/target/mips/tcg/fpu_helper.c
new file mode 100644
index 00000000000..8ce56ed7c81
--- /dev/null
+++ b/target/mips/tcg/fpu_helper.c
@@ -0,0 +1,2254 @@
+/*
+ *  Helpers for emulation of FPU-related MIPS instructions.
+ *
+ *  Copyright (C) 2004-2005  Jocelyn Mayer
+ *  Copyright (C) 2020  Wave Computing, Inc.
+ *  Copyright (C) 2020  Aleksandar Markovic <amarkovic@wavecomp.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
+#include "fpu_helper.h"
+
+
+/* Complex FPU operations which may need stack space. */
+
+#define FLOAT_TWO32 make_float32(1 << 30)
+#define FLOAT_TWO64 make_float64(1ULL << 62)
+
+#define FP_TO_INT32_OVERFLOW 0x7fffffff
+#define FP_TO_INT64_OVERFLOW 0x7fffffffffffffffULL
+
+target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
+{
+    target_ulong arg1 = 0;
+
+    switch (reg) {
+    case 0:
+        arg1 = (int32_t)env->active_fpu.fcr0;
+        break;
+    case 1:
+        /* UFR Support - Read Status FR */
+        if (env->active_fpu.fcr0 & (1 << FCR0_UFRP)) {
+            if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
+                arg1 = (int32_t)
+                       ((env->CP0_Status & (1  << CP0St_FR)) >> CP0St_FR);
+            } else {
+                do_raise_exception(env, EXCP_RI, GETPC());
+            }
+        }
+        break;
+    case 5:
+        /* FRE Support - read Config5.FRE bit */
+        if (env->active_fpu.fcr0 & (1 << FCR0_FREP)) {
+            if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+                arg1 = (env->CP0_Config5 >> CP0C5_FRE) & 1;
+            } else {
+                helper_raise_exception(env, EXCP_RI);
+            }
+        }
+        break;
+    case 25:
+        arg1 = ((env->active_fpu.fcr31 >> 24) & 0xfe) |
+               ((env->active_fpu.fcr31 >> 23) & 0x1);
+        break;
+    case 26:
+        arg1 = env->active_fpu.fcr31 & 0x0003f07c;
+        break;
+    case 28:
+        arg1 = (env->active_fpu.fcr31 & 0x00000f83) |
+               ((env->active_fpu.fcr31 >> 22) & 0x4);
+        break;
+    default:
+        arg1 = (int32_t)env->active_fpu.fcr31;
+        break;
+    }
+
+    return arg1;
+}
+
+void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt)
+{
+    switch (fs) {
+    case 1:
+        /* UFR Alias - Reset Status FR */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_UFRP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
+            env->CP0_Status &= ~(1 << CP0St_FR);
+            compute_hflags(env);
+        } else {
+            do_raise_exception(env, EXCP_RI, GETPC());
+        }
+        break;
+    case 4:
+        /* UNFR Alias - Set Status FR */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_UFRP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFR)) {
+            env->CP0_Status |= (1 << CP0St_FR);
+            compute_hflags(env);
+        } else {
+            do_raise_exception(env, EXCP_RI, GETPC());
+        }
+        break;
+    case 5:
+        /* FRE Support - clear Config5.FRE bit */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+            env->CP0_Config5 &= ~(1 << CP0C5_FRE);
+            compute_hflags(env);
+        } else {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case 6:
+        /* FRE Support - set Config5.FRE bit */
+        if (!((env->active_fpu.fcr0 & (1 << FCR0_FREP)) && (rt == 0))) {
+            return;
+        }
+        if (env->CP0_Config5 & (1 << CP0C5_UFE)) {
+            env->CP0_Config5 |= (1 << CP0C5_FRE);
+            compute_hflags(env);
+        } else {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case 25:
+        if ((env->insn_flags & ISA_MIPS_R6) || (arg1 & 0xffffff00)) {
+            return;
+        }
+        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0x017fffff) |
+                                ((arg1 & 0xfe) << 24) |
+                                ((arg1 & 0x1) << 23);
+        break;
+    case 26:
+        if (arg1 & 0x007c0000) {
+            return;
+        }
+        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0xfffc0f83) |
+                                (arg1 & 0x0003f07c);
+        break;
+    case 28:
+        if (arg1 & 0x007c0000) {
+            return;
+        }
+        env->active_fpu.fcr31 = (env->active_fpu.fcr31 & 0xfefff07c) |
+                                (arg1 & 0x00000f83) |
+                                ((arg1 & 0x4) << 22);
+        break;
+    case 31:
+        env->active_fpu.fcr31 = (arg1 & env->active_fpu.fcr31_rw_bitmask) |
+               (env->active_fpu.fcr31 & ~(env->active_fpu.fcr31_rw_bitmask));
+        break;
+    default:
+        if (env->insn_flags & ISA_MIPS_R6) {
+            do_raise_exception(env, EXCP_RI, GETPC());
+        }
+        return;
+    }
+    restore_fp_status(env);
+    set_float_exception_flags(0, &env->active_fpu.fp_status);
+    if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) &
+        GET_FP_CAUSE(env->active_fpu.fcr31)) {
+        do_raise_exception(env, EXCP_FPE, GETPC());
+    }
+}
+
+static inline int ieee_to_mips_xcpt(int ieee_xcpt)
+{
+    int mips_xcpt = 0;
+
+    if (ieee_xcpt & float_flag_invalid) {
+        mips_xcpt |= FP_INVALID;
+    }
+    if (ieee_xcpt & float_flag_overflow) {
+        mips_xcpt |= FP_OVERFLOW;
+    }
+    if (ieee_xcpt & float_flag_underflow) {
+        mips_xcpt |= FP_UNDERFLOW;
+    }
+    if (ieee_xcpt & float_flag_divbyzero) {
+        mips_xcpt |= FP_DIV0;
+    }
+    if (ieee_xcpt & float_flag_inexact) {
+        mips_xcpt |= FP_INEXACT;
+    }
+
+    return mips_xcpt;
+}
+
+static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc)
+{
+    int ieee_exception_flags = get_float_exception_flags(
+                                   &env->active_fpu.fp_status);
+    int mips_exception_flags = 0;
+
+    if (ieee_exception_flags) {
+        mips_exception_flags = ieee_to_mips_xcpt(ieee_exception_flags);
+    }
+
+    SET_FP_CAUSE(env->active_fpu.fcr31, mips_exception_flags);
+
+    if (mips_exception_flags)  {
+        set_float_exception_flags(0, &env->active_fpu.fp_status);
+
+        if (GET_FP_ENABLE(env->active_fpu.fcr31) & mips_exception_flags) {
+            do_raise_exception(env, EXCP_FPE, pc);
+        } else {
+            UPDATE_FP_FLAGS(env->active_fpu.fcr31, mips_exception_flags);
+        }
+    }
+}
+
+/*
+ * Float support.
+ * Single precition routines have a "s" suffix, double precision a
+ * "d" suffix, 32bit integer "w", 64bit integer "l", paired single "ps",
+ * paired single lower "pl", paired single upper "pu".
+ */
+
+/* unary operations, modifying fp status  */
+uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    fdt0 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt0;
+}
+
+uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
+{
+    fst0 = float32_sqrt(fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t fdt2;
+
+    fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint64_t helper_float_cvt_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_cvt_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
+{
+    uint32_t fst2;
+    uint32_t fsth2;
+
+    fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fst2;
+}
+
+uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+    uint32_t wth2;
+    int excp, excph;
+
+    wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    excp = get_float_exception_flags(&env->active_fpu.fp_status);
+    if (excp & (float_flag_overflow | float_flag_invalid)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+
+    set_float_exception_flags(0, &env->active_fpu.fp_status);
+    wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
+    excph = get_float_exception_flags(&env->active_fpu.fp_status);
+    if (excph & (float_flag_overflow | float_flag_invalid)) {
+        wth2 = FP_TO_INT32_OVERFLOW;
+    }
+
+    set_float_exception_flags(excp | excph, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+
+    return ((uint64_t)wth2 << 32) | wt2;
+}
+
+uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t fst2;
+
+    fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
+{
+    uint32_t fst2;
+
+    fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
+{
+    uint32_t fst2;
+
+    fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
+{
+    uint32_t wt2;
+
+    wt2 = wt0;
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
+{
+    uint32_t wt2;
+
+    wt2 = wth0;
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_cvt_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_cvt_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_round_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+                            &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_round_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+                            &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_round_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+                            &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_round_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+                            &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_trunc_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    dt2 = float64_to_int64_round_to_zero(fdt0,
+                                         &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_trunc_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_trunc_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_trunc_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_ceil_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_ceil_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_ceil_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_ceil_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_floor_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_floor_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_floor_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_floor_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_cvt_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_cvt_2008_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_cvt_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_cvt_2008_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_round_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+            &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_round_2008_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+            &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_round_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+            &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_round_2008_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_nearest_even,
+            &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_trunc_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_trunc_2008_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_trunc_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_trunc_2008_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_ceil_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_ceil_2008_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_ceil_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_ceil_2008_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_floor_2008_l_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint64_t helper_float_floor_2008_l_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint64_t dt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            dt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_floor_2008_w_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float64_is_any_nan(fdt0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint32_t helper_float_floor_2008_w_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t wt2;
+
+    set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
+    wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
+    restore_rounding_mode(env);
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+            & float_flag_invalid) {
+        if (float32_is_any_nan(fst0)) {
+            wt2 = 0;
+        }
+    }
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+/* unary operations, not modifying fp status  */
+
+uint64_t helper_float_abs_d(uint64_t fdt0)
+{
+   return float64_abs(fdt0);
+}
+
+uint32_t helper_float_abs_s(uint32_t fst0)
+{
+    return float32_abs(fst0);
+}
+
+uint64_t helper_float_abs_ps(uint64_t fdt0)
+{
+    uint32_t wt0;
+    uint32_t wth0;
+
+    wt0 = float32_abs(fdt0 & 0XFFFFFFFF);
+    wth0 = float32_abs(fdt0 >> 32);
+    return ((uint64_t)wth0 << 32) | wt0;
+}
+
+uint64_t helper_float_chs_d(uint64_t fdt0)
+{
+   return float64_chs(fdt0);
+}
+
+uint32_t helper_float_chs_s(uint32_t fst0)
+{
+    return float32_chs(fst0);
+}
+
+uint64_t helper_float_chs_ps(uint64_t fdt0)
+{
+    uint32_t wt0;
+    uint32_t wth0;
+
+    wt0 = float32_chs(fdt0 & 0XFFFFFFFF);
+    wth0 = float32_chs(fdt0 >> 32);
+    return ((uint64_t)wth0 << 32) | wt0;
+}
+
+/* MIPS specific unary operations */
+uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t fst2;
+
+    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
+    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t fst2;
+
+    fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
+    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t fst2;
+
+    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t fstl2;
+    uint32_t fsth2;
+
+    fstl2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF,
+                        &env->active_fpu.fp_status);
+    fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint64_t fdt2;
+
+    fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
+    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
+{
+    uint32_t fst2;
+
+    fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
+    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
+{
+    uint32_t fstl2;
+    uint32_t fsth2;
+
+    fstl2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
+    fstl2 = float32_div(float32_one, fstl2, &env->active_fpu.fp_status);
+    fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs)
+{
+    uint64_t fdret;
+
+    fdret = float64_round_to_int(fs, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint32_t helper_float_rint_s(CPUMIPSState *env, uint32_t fs)
+{
+    uint32_t fdret;
+
+    fdret = float32_round_to_int(fs, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+#define FLOAT_CLASS_SIGNALING_NAN      0x001
+#define FLOAT_CLASS_QUIET_NAN          0x002
+#define FLOAT_CLASS_NEGATIVE_INFINITY  0x004
+#define FLOAT_CLASS_NEGATIVE_NORMAL    0x008
+#define FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
+#define FLOAT_CLASS_NEGATIVE_ZERO      0x020
+#define FLOAT_CLASS_POSITIVE_INFINITY  0x040
+#define FLOAT_CLASS_POSITIVE_NORMAL    0x080
+#define FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
+#define FLOAT_CLASS_POSITIVE_ZERO      0x200
+
+uint64_t float_class_d(uint64_t arg, float_status *status)
+{
+    if (float64_is_signaling_nan(arg, status)) {
+        return FLOAT_CLASS_SIGNALING_NAN;
+    } else if (float64_is_quiet_nan(arg, status)) {
+        return FLOAT_CLASS_QUIET_NAN;
+    } else if (float64_is_neg(arg)) {
+        if (float64_is_infinity(arg)) {
+            return FLOAT_CLASS_NEGATIVE_INFINITY;
+        } else if (float64_is_zero(arg)) {
+            return FLOAT_CLASS_NEGATIVE_ZERO;
+        } else if (float64_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_NEGATIVE_NORMAL;
+        }
+    } else {
+        if (float64_is_infinity(arg)) {
+            return FLOAT_CLASS_POSITIVE_INFINITY;
+        } else if (float64_is_zero(arg)) {
+            return FLOAT_CLASS_POSITIVE_ZERO;
+        } else if (float64_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_POSITIVE_NORMAL;
+        }
+    }
+}
+
+uint64_t helper_float_class_d(CPUMIPSState *env, uint64_t arg)
+{
+    return float_class_d(arg, &env->active_fpu.fp_status);
+}
+
+uint32_t float_class_s(uint32_t arg, float_status *status)
+{
+    if (float32_is_signaling_nan(arg, status)) {
+        return FLOAT_CLASS_SIGNALING_NAN;
+    } else if (float32_is_quiet_nan(arg, status)) {
+        return FLOAT_CLASS_QUIET_NAN;
+    } else if (float32_is_neg(arg)) {
+        if (float32_is_infinity(arg)) {
+            return FLOAT_CLASS_NEGATIVE_INFINITY;
+        } else if (float32_is_zero(arg)) {
+            return FLOAT_CLASS_NEGATIVE_ZERO;
+        } else if (float32_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_NEGATIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_NEGATIVE_NORMAL;
+        }
+    } else {
+        if (float32_is_infinity(arg)) {
+            return FLOAT_CLASS_POSITIVE_INFINITY;
+        } else if (float32_is_zero(arg)) {
+            return FLOAT_CLASS_POSITIVE_ZERO;
+        } else if (float32_is_zero_or_denormal(arg)) {
+            return FLOAT_CLASS_POSITIVE_SUBNORMAL;
+        } else {
+            return FLOAT_CLASS_POSITIVE_NORMAL;
+        }
+    }
+}
+
+uint32_t helper_float_class_s(CPUMIPSState *env, uint32_t arg)
+{
+    return float_class_s(arg, &env->active_fpu.fp_status);
+}
+
+/* binary operations */
+
+uint64_t helper_float_add_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_add(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_add_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_add(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_add_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_add(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_add(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_sub_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_sub(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_sub_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_sub(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_sub_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_sub(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_sub(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_mul_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_mul(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_mul_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_mul_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+uint64_t helper_float_div_d(CPUMIPSState *env,
+                            uint64_t fdt0, uint64_t fdt1)
+{
+    uint64_t dt2;
+
+    dt2 = float64_div(fdt0, fdt1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return dt2;
+}
+
+uint32_t helper_float_div_s(CPUMIPSState *env,
+                            uint32_t fst0, uint32_t fst1)
+{
+    uint32_t wt2;
+
+    wt2 = float32_div(fst0, fst1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return wt2;
+}
+
+uint64_t helper_float_div_ps(CPUMIPSState *env,
+                             uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t wtl2;
+    uint32_t wth2;
+
+    wtl2 = float32_div(fstl0, fstl1, &env->active_fpu.fp_status);
+    wth2 = float32_div(fsth0, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)wth2 << 32) | wtl2;
+}
+
+
+/* MIPS specific binary operations */
+uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
+{
+    fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
+    fdt2 = float64_chs(float64_sub(fdt2, float64_one,
+                                   &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
+{
+    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
+    fst2 = float32_chs(float32_sub(fst2, float32_one,
+                                       &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
+    fstl2 = float32_chs(float32_sub(fstl2, float32_one,
+                                       &env->active_fpu.fp_status));
+    fsth2 = float32_chs(float32_sub(fsth2, float32_one,
+                                       &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
+{
+    fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
+    fdt2 = float64_sub(fdt2, float64_one, &env->active_fpu.fp_status);
+    fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64,
+                                       &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return fdt2;
+}
+
+uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
+{
+    fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
+    fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status);
+    fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32,
+                                       &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return fst2;
+}
+
+uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl2 = float32_mul(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
+    fstl2 = float32_sub(fstl2, float32_one, &env->active_fpu.fp_status);
+    fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status);
+    fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32,
+                                       &env->active_fpu.fp_status));
+    fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32,
+                                       &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2;
+    uint32_t fsth2;
+
+    fstl2 = float32_add(fstl0, fsth0, &env->active_fpu.fp_status);
+    fsth2 = float32_add(fstl1, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2;
+    uint32_t fsth2;
+
+    fstl2 = float32_mul(fstl0, fsth0, &env->active_fpu.fp_status);
+    fsth2 = float32_mul(fstl1, fsth1, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth2 << 32) | fstl2;
+}
+
+
+uint32_t helper_float_max_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
+{
+    uint32_t fdret;
+
+    fdret = float32_maxnum(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_max_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
+{
+    uint64_t fdret;
+
+    fdret = float64_maxnum(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint32_t helper_float_maxa_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
+{
+    uint32_t fdret;
+
+    fdret = float32_maxnummag(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_maxa_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
+{
+    uint64_t fdret;
+
+    fdret = float64_maxnummag(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint32_t helper_float_min_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
+{
+    uint32_t fdret;
+
+    fdret = float32_minnum(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_min_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
+{
+    uint64_t fdret;
+
+    fdret = float64_minnum(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint32_t helper_float_mina_s(CPUMIPSState *env, uint32_t fs, uint32_t ft)
+{
+    uint32_t fdret;
+
+    fdret = float32_minnummag(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_mina_d(CPUMIPSState *env, uint64_t fs, uint64_t ft)
+{
+    uint64_t fdret;
+
+    fdret = float64_minnummag(fs, ft, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+
+/* ternary operations */
+
+uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_madd_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_madd_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_msub_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_msub_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_msub_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_add(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float64_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_add(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float32_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_add(fstl0, fstl2, &env->active_fpu.fp_status);
+    fstl0 = float32_chs(fstl0);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_add(fsth0, fsth2, &env->active_fpu.fp_status);
+    fsth0 = float32_chs(fsth0);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0,
+                             uint64_t fst1, uint64_t fst2)
+{
+    fst0 = float64_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float64_sub(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float64_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0,
+                             uint32_t fst1, uint32_t fst2)
+{
+    fst0 = float32_mul(fst0, fst1, &env->active_fpu.fp_status);
+    fst0 = float32_sub(fst0, fst2, &env->active_fpu.fp_status);
+    fst0 = float32_chs(fst0);
+
+    update_fcr31(env, GETPC());
+    return fst0;
+}
+
+uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0,
+                              uint64_t fdt1, uint64_t fdt2)
+{
+    uint32_t fstl0 = fdt0 & 0XFFFFFFFF;
+    uint32_t fsth0 = fdt0 >> 32;
+    uint32_t fstl1 = fdt1 & 0XFFFFFFFF;
+    uint32_t fsth1 = fdt1 >> 32;
+    uint32_t fstl2 = fdt2 & 0XFFFFFFFF;
+    uint32_t fsth2 = fdt2 >> 32;
+
+    fstl0 = float32_mul(fstl0, fstl1, &env->active_fpu.fp_status);
+    fstl0 = float32_sub(fstl0, fstl2, &env->active_fpu.fp_status);
+    fstl0 = float32_chs(fstl0);
+    fsth0 = float32_mul(fsth0, fsth1, &env->active_fpu.fp_status);
+    fsth0 = float32_sub(fsth0, fsth2, &env->active_fpu.fp_status);
+    fsth0 = float32_chs(fsth0);
+
+    update_fcr31(env, GETPC());
+    return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
+
+uint32_t helper_float_maddf_s(CPUMIPSState *env, uint32_t fs,
+                              uint32_t ft, uint32_t fd)
+{
+    uint32_t fdret;
+
+    fdret = float32_muladd(fs, ft, fd, 0,
+                           &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_maddf_d(CPUMIPSState *env, uint64_t fs,
+                              uint64_t ft, uint64_t fd)
+{
+    uint64_t fdret;
+
+    fdret = float64_muladd(fs, ft, fd, 0,
+                           &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint32_t helper_float_msubf_s(CPUMIPSState *env, uint32_t fs,
+                              uint32_t ft, uint32_t fd)
+{
+    uint32_t fdret;
+
+    fdret = float32_muladd(fs, ft, fd, float_muladd_negate_product,
+                           &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+uint64_t helper_float_msubf_d(CPUMIPSState *env, uint64_t fs,
+                              uint64_t ft, uint64_t fd)
+{
+    uint64_t fdret;
+
+    fdret = float64_muladd(fs, ft, fd, float_muladd_negate_product,
+                           &env->active_fpu.fp_status);
+
+    update_fcr31(env, GETPC());
+    return fdret;
+}
+
+
+/* compare operations */
+#define FOP_COND_D(op, cond)                                   \
+void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                         uint64_t fdt1, int cc)                \
+{                                                              \
+    int c;                                                     \
+    c = cond;                                                  \
+    update_fcr31(env, GETPC());                                \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                      \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->active_fpu);                    \
+}                                                              \
+void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                            uint64_t fdt1, int cc)             \
+{                                                              \
+    int c;                                                     \
+    fdt0 = float64_abs(fdt0);                                  \
+    fdt1 = float64_abs(fdt1);                                  \
+    c = cond;                                                  \
+    update_fcr31(env, GETPC());                                \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                      \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->active_fpu);                    \
+}
+
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float64_unordered_quiet() is still called.
+ */
+FOP_COND_D(f,    (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_D(un,   float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(eq,   float64_eq_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ueq,  float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_eq_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(olt,  float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ult,  float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ole,  float64_le_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ule,  float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_le_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float64_unordered() is still called.
+ */
+FOP_COND_D(sf,   (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_D(ngle, float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(seq,  float64_eq(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ngl,  float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_eq(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(lt,   float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(nge,  float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(le,   float64_le(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_D(ngt,  float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_le(fdt0, fdt1,
+                                       &env->active_fpu.fp_status))
+
+#define FOP_COND_S(op, cond)                                   \
+void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
+                         uint32_t fst1, int cc)                \
+{                                                              \
+    int c;                                                     \
+    c = cond;                                                  \
+    update_fcr31(env, GETPC());                                \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                      \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->active_fpu);                    \
+}                                                              \
+void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
+                            uint32_t fst1, int cc)             \
+{                                                              \
+    int c;                                                     \
+    fst0 = float32_abs(fst0);                                  \
+    fst1 = float32_abs(fst1);                                  \
+    c = cond;                                                  \
+    update_fcr31(env, GETPC());                                \
+    if (c)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                      \
+    else                                                       \
+        CLEAR_FP_COND(cc, env->active_fpu);                    \
+}
+
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered_quiet() is still called.
+ */
+FOP_COND_S(f,    (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_S(un,   float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(eq,   float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ueq,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(olt,  float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ult,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ole,  float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ule,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered() is still called.
+ */
+FOP_COND_S(sf,   (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_S(ngle, float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(seq,  float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ngl,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(lt,   float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(nge,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(le,   float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_S(ngt,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                 || float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status))
+
+#define FOP_COND_PS(op, condl, condh)                           \
+void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                          uint64_t fdt1, int cc)                \
+{                                                               \
+    uint32_t fst0, fsth0, fst1, fsth1;                          \
+    int ch, cl;                                                 \
+    fst0 = fdt0 & 0XFFFFFFFF;                                   \
+    fsth0 = fdt0 >> 32;                                         \
+    fst1 = fdt1 & 0XFFFFFFFF;                                   \
+    fsth1 = fdt1 >> 32;                                         \
+    cl = condl;                                                 \
+    ch = condh;                                                 \
+    update_fcr31(env, GETPC());                                 \
+    if (cl)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                       \
+    else                                                        \
+        CLEAR_FP_COND(cc, env->active_fpu);                     \
+    if (ch)                                                     \
+        SET_FP_COND(cc + 1, env->active_fpu);                   \
+    else                                                        \
+        CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
+}                                                               \
+void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                             uint64_t fdt1, int cc)             \
+{                                                               \
+    uint32_t fst0, fsth0, fst1, fsth1;                          \
+    int ch, cl;                                                 \
+    fst0 = float32_abs(fdt0 & 0XFFFFFFFF);                      \
+    fsth0 = float32_abs(fdt0 >> 32);                            \
+    fst1 = float32_abs(fdt1 & 0XFFFFFFFF);                      \
+    fsth1 = float32_abs(fdt1 >> 32);                            \
+    cl = condl;                                                 \
+    ch = condh;                                                 \
+    update_fcr31(env, GETPC());                                 \
+    if (cl)                                                     \
+        SET_FP_COND(cc, env->active_fpu);                       \
+    else                                                        \
+        CLEAR_FP_COND(cc, env->active_fpu);                     \
+    if (ch)                                                     \
+        SET_FP_COND(cc + 1, env->active_fpu);                   \
+    else                                                        \
+        CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
+}
+
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered_quiet() is still called.
+ */
+FOP_COND_PS(f,    (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0),
+                  (float32_unordered_quiet(fsth1, fsth0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_PS(un,   float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered_quiet(fsth1, fsth0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(eq,   float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_eq_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ueq,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered_quiet(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_eq_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(olt,  float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_lt_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ult,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered_quiet(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_lt_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ole,  float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_le_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ule,  float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered_quiet(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_le_quiet(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered() is still called.
+ */
+FOP_COND_PS(sf,   (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0),
+                  (float32_unordered(fsth1, fsth0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_COND_PS(ngle, float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered(fsth1, fsth0,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(seq,  float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_eq(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ngl,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_eq(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(lt,   float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_lt(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(nge,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_lt(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(le,   float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_le(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+FOP_COND_PS(ngt,  float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                  || float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status),
+                  float32_unordered(fsth1, fsth0,
+                                       &env->active_fpu.fp_status)
+                  || float32_le(fsth0, fsth1,
+                                       &env->active_fpu.fp_status))
+
+/* R6 compare operations */
+#define FOP_CONDN_D(op, cond)                                       \
+uint64_t helper_r6_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,   \
+                                uint64_t fdt1)                      \
+{                                                                   \
+    uint64_t c;                                                     \
+    c = cond;                                                       \
+    update_fcr31(env, GETPC());                                     \
+    if (c) {                                                        \
+        return -1;                                                  \
+    } else {                                                        \
+        return 0;                                                   \
+    }                                                               \
+}
+
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float64_unordered_quiet() is still called.
+ */
+FOP_CONDN_D(af,  (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_CONDN_D(un,  (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(eq,  (float64_eq_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(ueq, (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_eq_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(lt,  (float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(ult, (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(le,  (float64_le_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(ule, (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                 || float64_le_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float64_unordered() is still called.\
+ */
+FOP_CONDN_D(saf,  (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_CONDN_D(sun,  (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(seq,  (float64_eq(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sueq, (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_eq(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(slt,  (float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sult, (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sle,  (float64_le(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sule, (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_le(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(or,   (float64_le_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_le_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(une,  (float64_unordered_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(ne,   (float64_lt_quiet(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt_quiet(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sor,  (float64_le(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_le(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sune, (float64_unordered(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_D(sne,  (float64_lt(fdt1, fdt0,
+                                       &env->active_fpu.fp_status)
+                   || float64_lt(fdt0, fdt1,
+                                       &env->active_fpu.fp_status)))
+
+#define FOP_CONDN_S(op, cond)                                       \
+uint32_t helper_r6_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,   \
+                                uint32_t fst1)                      \
+{                                                                   \
+    uint64_t c;                                                     \
+    c = cond;                                                       \
+    update_fcr31(env, GETPC());                                     \
+    if (c) {                                                        \
+        return -1;                                                  \
+    } else {                                                        \
+        return 0;                                                   \
+    }                                                               \
+}
+
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered_quiet() is still called.
+ */
+FOP_CONDN_S(af,   (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_CONDN_S(un,   (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(eq,   (float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(ueq,  (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_eq_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(lt,   (float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(ult,  (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(le,   (float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(ule,  (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+/*
+ * NOTE: the comma operator will make "cond" to eval to false,
+ * but float32_unordered() is still called.
+ */
+FOP_CONDN_S(saf,  (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status), 0))
+FOP_CONDN_S(sun,  (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(seq,  (float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sueq, (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_eq(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(slt,  (float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sult, (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sle,  (float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sule, (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(or,   (float32_le_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_le_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(une,  (float32_unordered_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(ne,   (float32_lt_quiet(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt_quiet(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sor,  (float32_le(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_le(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sune, (float32_unordered(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
+FOP_CONDN_S(sne,  (float32_lt(fst1, fst0,
+                                       &env->active_fpu.fp_status)
+                   || float32_lt(fst0, fst1,
+                                       &env->active_fpu.fp_status)))
diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
new file mode 100644
index 00000000000..0073481a9c4
--- /dev/null
+++ b/target/mips/tcg/ldst_helper.c
@@ -0,0 +1,358 @@
+/*
+ *  MIPS emulation load/store helpers for QEMU.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/memop.h"
+#include "internal.h"
+#ifdef TARGET_CHERI
+#include "cheri_tagmem.h"
+#endif
+
+#ifndef CONFIG_USER_ONLY
+
+#define HELPER_LD_ATOMIC(name, insn, almask, do_cast)                         \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx)  \
+{                                                                             \
+    if (arg & almask) {                                                       \
+        if (!(env->hflags & MIPS_HFLAG_DM)) {                                 \
+            env->CP0_BadVAddr = arg;                                          \
+        }                                                                     \
+        do_raise_exception(env, EXCP_AdEL, GETPC());                          \
+    }                                                                         \
+    env->CP0_LLAddr = cpu_mips_translate_address(env, arg, MMU_DATA_LOAD,     \
+                                                 GETPC());                    \
+    env->lladdr = arg;                                                        \
+    env->llval = do_cast cpu_##insn##_mmuidx_ra(env, arg, mem_idx, GETPC());  \
+    return env->llval;                                                        \
+}
+HELPER_LD_ATOMIC(ll, ldl, 0x3, (target_long)(int32_t))
+#ifdef TARGET_MIPS64
+HELPER_LD_ATOMIC(lld, ldq, 0x7, (target_ulong))
+#endif
+#undef HELPER_LD_ATOMIC
+
+#endif /* !CONFIG_USER_ONLY */
+
+static inline bool cpu_is_bigendian(CPUMIPSState *env)
+{
+    return extract32(env->CP0_Config0, CP0C0_BE, 1);
+}
+
+static inline target_ulong get_lmask(CPUMIPSState *env,
+                                     target_ulong value, unsigned bits)
+{
+    unsigned mask = (bits / BITS_PER_BYTE) - 1;
+
+    value &= mask;
+
+    if (!cpu_is_bigendian(env)) {
+        value ^= mask;
+    }
+
+    return value;
+}
+
+#ifdef TARGET_CHERI
+static inline target_ulong ccheck_store_right(CPUMIPSState *env, target_ulong offset, uint32_t len, uintptr_t retpc)
+{
+#ifndef TARGET_WORDS_BIGENDIAN
+#error "This check is only valid for big endian targets, for little endian the load/store left instructions need to be checked"
+#endif
+    // For swr/sdr if offset & 3/7 == 0 we store only first byte, if all low bits are set we store the full amount
+    uint32_t low_bits = (uint32_t)offset & (len - 1);
+    uint32_t stored_bytes = low_bits + 1;
+    // From spec:
+    //if BigEndianMem = 1 then
+    //  pAddr <- pAddr(PSIZE-1)..3 || 000 (for ldr), 00 for lwr
+    //endif
+    // clear the low bits in offset to perform the length check
+    target_ulong write_offset = offset & ~((target_ulong)len - 1);
+    // fprintf(stderr, "%s: len=%d, offset=%zd, write_offset=%zd: will touch %d bytes\n",
+    //    __func__, len, (size_t)offset, (size_t)write_offset, stored_bytes);
+    // return the actual address by adding the low bits (this is expected by translate.c
+    return check_ddc(env, CAP_PERM_STORE, write_offset, stored_bytes, retpc) + low_bits;
+}
+#endif
+
+static inline void invalidate_tags_store_left_right(CPUMIPSState *env,
+                                                    target_ulong addr,
+                                                    uintptr_t retpc) {
+#ifdef TARGET_CHERI
+    // swr/sdr/swl/sdl will never invalidate more than one capability
+    cheri_tag_invalidate(env, addr, 1, retpc, cpu_mmu_index(env, false));
+#endif
+}
+
+void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
+{
+    target_ulong lmask = get_lmask(env, arg2, 32);
+    int dir = cpu_is_bigendian(env) ? 1 : -1;
+
+#ifdef TARGET_CHERI
+    const int num_bytes = 4 - lmask;
+    arg2 = check_ddc(env, CAP_PERM_STORE, arg2, num_bytes, GETPC());
+#endif
+    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 24), mem_idx, GETPC());
+
+    if (lmask <= 2) {
+        cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 16),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 1) {
+        cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 8),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask == 0) {
+        cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)arg1,
+                          mem_idx, GETPC());
+    }
+    invalidate_tags_store_left_right(env, arg2, GETPC());
+}
+
+void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
+{
+    target_ulong lmask = get_lmask(env, arg2, 32);
+    int dir = cpu_is_bigendian(env) ? 1 : -1;
+
+#ifdef TARGET_CHERI
+    arg2 = ccheck_store_right(env, arg2, 4, GETPC());
+#endif
+    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
+
+    if (lmask >= 1) {
+        cpu_stb_mmuidx_ra(env, arg2 - 1 * dir, (uint8_t)(arg1 >> 8),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 2) {
+        cpu_stb_mmuidx_ra(env, arg2 - 2 * dir, (uint8_t)(arg1 >> 16),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask == 3) {
+        cpu_stb_mmuidx_ra(env, arg2 - 3 * dir, (uint8_t)(arg1 >> 24),
+                          mem_idx, GETPC());
+    }
+    invalidate_tags_store_left_right(env, arg2, GETPC());
+}
+
+#if defined(TARGET_MIPS64)
+/*
+ * "half" load and stores.  We must do the memory access inline,
+ * or fault handling won't work.
+ */
+
+void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
+{
+    target_ulong lmask = get_lmask(env, arg2, 64);
+    int dir = cpu_is_bigendian(env) ? 1 : -1;
+
+#ifdef TARGET_CHERI
+    const int num_bytes = 4 - lmask;
+    arg2 = check_ddc(env, CAP_PERM_STORE, arg2, num_bytes, GETPC());
+#endif
+    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 56), mem_idx, GETPC());
+
+    if (lmask <= 6) {
+        cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 48),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 5) {
+        cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 40),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 4) {
+        cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)(arg1 >> 32),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 3) {
+        cpu_stb_mmuidx_ra(env, arg2 + 4 * dir, (uint8_t)(arg1 >> 24),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 2) {
+        cpu_stb_mmuidx_ra(env, arg2 + 5 * dir, (uint8_t)(arg1 >> 16),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 1) {
+        cpu_stb_mmuidx_ra(env, arg2 + 6 * dir, (uint8_t)(arg1 >> 8),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask <= 0) {
+        cpu_stb_mmuidx_ra(env, arg2 + 7 * dir, (uint8_t)arg1,
+                          mem_idx, GETPC());
+    }
+    invalidate_tags_store_left_right(env, arg2, GETPC());
+}
+
+void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
+{
+    target_ulong lmask = get_lmask(env, arg2, 64);
+    int dir = cpu_is_bigendian(env) ? 1 : -1;
+
+#ifdef TARGET_CHERI
+    arg2 = ccheck_store_right(env, arg2, 8, GETPC());
+#endif
+    cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
+
+    if (lmask >= 1) {
+        cpu_stb_mmuidx_ra(env, arg2 - 1 * dir, (uint8_t)(arg1 >> 8),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 2) {
+        cpu_stb_mmuidx_ra(env, arg2 - 2 * dir, (uint8_t)(arg1 >> 16),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 3) {
+        cpu_stb_mmuidx_ra(env, arg2 - 3 * dir, (uint8_t)(arg1 >> 24),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 4) {
+        cpu_stb_mmuidx_ra(env, arg2 - 4 * dir, (uint8_t)(arg1 >> 32),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 5) {
+        cpu_stb_mmuidx_ra(env, arg2 - 5 * dir, (uint8_t)(arg1 >> 40),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask >= 6) {
+        cpu_stb_mmuidx_ra(env, arg2 - 6 * dir, (uint8_t)(arg1 >> 48),
+                          mem_idx, GETPC());
+    }
+
+    if (lmask == 7) {
+        cpu_stb_mmuidx_ra(env, arg2 - 7 * dir, (uint8_t)(arg1 >> 56),
+                          mem_idx, GETPC());
+    }
+    invalidate_tags_store_left_right(env, arg2, GETPC());
+}
+#endif /* TARGET_MIPS64 */
+
+static const int multiple_regs[] = { 16, 17, 18, 19, 20, 21, 22, 23, 30 };
+
+void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            env->active_tc.gpr[multiple_regs[i]] =
+                (target_long)cpu_ldl_mmuidx_ra(env, addr, mem_idx, GETPC());
+            addr += 4;
+        }
+    }
+
+    if (do_r31) {
+        env->active_tc.gpr[31] =
+            (target_long)cpu_ldl_mmuidx_ra(env, addr, mem_idx, GETPC());
+    }
+}
+
+void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
+                              mem_idx, GETPC());
+            addr += 4;
+        }
+    }
+
+    if (do_r31) {
+        cpu_stw_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
+    }
+}
+
+#if defined(TARGET_MIPS64)
+void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            env->active_tc.gpr[multiple_regs[i]] =
+                cpu_ldq_mmuidx_ra(env, addr, mem_idx, GETPC());
+            addr += 8;
+        }
+    }
+
+    if (do_r31) {
+        env->active_tc.gpr[31] =
+            cpu_ldq_mmuidx_ra(env, addr, mem_idx, GETPC());
+    }
+}
+
+void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
+{
+    target_ulong base_reglist = reglist & 0xf;
+    target_ulong do_r31 = reglist & 0x10;
+
+    if (base_reglist > 0 && base_reglist <= ARRAY_SIZE(multiple_regs)) {
+        target_ulong i;
+
+        for (i = 0; i < base_reglist; i++) {
+            cpu_stq_mmuidx_ra(env, addr, env->active_tc.gpr[multiple_regs[i]],
+                              mem_idx, GETPC());
+            addr += 8;
+        }
+    }
+
+    if (do_r31) {
+        cpu_stq_mmuidx_ra(env, addr, env->active_tc.gpr[31], mem_idx, GETPC());
+    }
+}
+
+#endif /* TARGET_MIPS64 */
diff --git a/target/mips/lmmi_helper.c b/target/mips/tcg/lmmi_helper.c
similarity index 100%
rename from target/mips/lmmi_helper.c
rename to target/mips/tcg/lmmi_helper.c
diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build
new file mode 100644
index 00000000000..53985e89bff
--- /dev/null
+++ b/target/mips/tcg/meson.build
@@ -0,0 +1,34 @@
+gen = [
+  decodetree.process('rel6.decode', extra_args: ['--decode=decode_isa_rel6']),
+  decodetree.process('msa.decode', extra_args: '--decode=decode_ase_msa'),
+  decodetree.process('tx79.decode', extra_args: '--static-decode=decode_tx79'),
+  decodetree.process('vr54xx.decode', extra_args: '--decode=decode_ext_vr54xx'),
+]
+
+mips_ss.add(gen)
+mips_ss.add(files(
+  'dsp_helper.c',
+  'exception.c',
+  'fpu_helper.c',
+  'ldst_helper.c',
+  'lmmi_helper.c',
+  'msa_helper.c',
+  'msa_translate.c',
+  'op_helper.c',
+  'rel6_translate.c',
+  'translate.c',
+  'translate_addr_const.c',
+  'txx9_translate.c',
+  'vr54xx_helper.c',
+  'vr54xx_translate.c',
+))
+
+tx79_ss = ss.source_set()
+tx79_ss.add(when: 'TARGET_CHERI', if_false: files('tx79_translate.c'))
+
+mips_ss.add(when: 'TARGET_MIPS64', if_false: files('mxu_translate.c'))
+mips_ss.add_all(when: 'TARGET_MIPS64', if_true: [tx79_ss])
+
+if have_system
+  subdir('sysemu')
+endif
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
new file mode 100644
index 00000000000..bf53812dde5
--- /dev/null
+++ b/target/mips/tcg/micromips_translate.c.inc
@@ -0,0 +1,3231 @@
+/*
+ *  microMIPS translation routines
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+/*
+ * microMIPS32/microMIPS64 major opcodes
+ *
+ * 1. MIPS Architecture for Programmers Volume II-B:
+ *      The microMIPS32 Instruction Set (Revision 3.05)
+ *
+ *    Table 6.2 microMIPS32 Encoding of Major Opcode Field
+ *
+ * 2. MIPS Architecture For Programmers Volume II-A:
+ *      The MIPS64 Instruction Set (Revision 3.51)
+ */
+
+enum {
+    POOL32A = 0x00,
+    POOL16A = 0x01,
+    LBU16 = 0x02,
+    MOVE16 = 0x03,
+    ADDI32 = 0x04,
+    R6_LUI = 0x04,
+    AUI = 0x04,
+    LBU32 = 0x05,
+    SB32 = 0x06,
+    LB32 = 0x07,
+
+    POOL32B = 0x08,
+    POOL16B = 0x09,
+    LHU16 = 0x0a,
+    ANDI16 = 0x0b,
+    ADDIU32 = 0x0c,
+    LHU32 = 0x0d,
+    SH32 = 0x0e,
+    LH32 = 0x0f,
+
+    POOL32I = 0x10,
+    POOL16C = 0x11,
+    LWSP16 = 0x12,
+    POOL16D = 0x13,
+    ORI32 = 0x14,
+    POOL32F = 0x15,
+    POOL32S = 0x16,  /* MIPS64 */
+    DADDIU32 = 0x17, /* MIPS64 */
+
+    POOL32C = 0x18,
+    LWGP16 = 0x19,
+    LW16 = 0x1a,
+    POOL16E = 0x1b,
+    XORI32 = 0x1c,
+    JALS32 = 0x1d,
+    BOVC = 0x1d,
+    BEQC = 0x1d,
+    BEQZALC = 0x1d,
+    ADDIUPC = 0x1e,
+    PCREL = 0x1e,
+    BNVC = 0x1f,
+    BNEC = 0x1f,
+    BNEZALC = 0x1f,
+
+    R6_BEQZC = 0x20,
+    JIC = 0x20,
+    POOL16F = 0x21,
+    SB16 = 0x22,
+    BEQZ16 = 0x23,
+    BEQZC16 = 0x23,
+    SLTI32 = 0x24,
+    BEQ32 = 0x25,
+    BC = 0x25,
+    SWC132 = 0x26,
+    LWC132 = 0x27,
+
+    /* 0x29 is reserved */
+    RES_29 = 0x29,
+    R6_BNEZC = 0x28,
+    JIALC = 0x28,
+    SH16 = 0x2a,
+    BNEZ16 = 0x2b,
+    BNEZC16 = 0x2b,
+    SLTIU32 = 0x2c,
+    BNE32 = 0x2d,
+    BALC = 0x2d,
+    SDC132 = 0x2e,
+    LDC132 = 0x2f,
+
+    /* 0x31 is reserved */
+    RES_31 = 0x31,
+    BLEZALC = 0x30,
+    BGEZALC = 0x30,
+    BGEUC = 0x30,
+    SWSP16 = 0x32,
+    B16 = 0x33,
+    BC16 = 0x33,
+    ANDI32 = 0x34,
+    J32 = 0x35,
+    BGTZC = 0x35,
+    BLTZC = 0x35,
+    BLTC = 0x35,
+    SD32 = 0x36, /* MIPS64 */
+    LD32 = 0x37, /* MIPS64 */
+
+    /* 0x39 is reserved */
+    RES_39 = 0x39,
+    BGTZALC = 0x38,
+    BLTZALC = 0x38,
+    BLTUC = 0x38,
+    SW16 = 0x3a,
+    LI16 = 0x3b,
+    JALX32 = 0x3c,
+    JAL32 = 0x3d,
+    BLEZC = 0x3d,
+    BGEZC = 0x3d,
+    BGEC = 0x3d,
+    SW32 = 0x3e,
+    LW32 = 0x3f
+};
+
+/* PCREL Instructions perform PC-Relative address calculation. bits 20..16 */
+enum {
+    ADDIUPC_00 = 0x00,
+    ADDIUPC_01 = 0x01,
+    ADDIUPC_02 = 0x02,
+    ADDIUPC_03 = 0x03,
+    ADDIUPC_04 = 0x04,
+    ADDIUPC_05 = 0x05,
+    ADDIUPC_06 = 0x06,
+    ADDIUPC_07 = 0x07,
+    AUIPC = 0x1e,
+    ALUIPC = 0x1f,
+    LWPC_08 = 0x08,
+    LWPC_09 = 0x09,
+    LWPC_0A = 0x0A,
+    LWPC_0B = 0x0B,
+    LWPC_0C = 0x0C,
+    LWPC_0D = 0x0D,
+    LWPC_0E = 0x0E,
+    LWPC_0F = 0x0F,
+};
+
+/* POOL32A encoding of minor opcode field */
+
+enum {
+    /*
+     * These opcodes are distinguished only by bits 9..6; those bits are
+     * what are recorded below.
+     */
+    SLL32 = 0x0,
+    SRL32 = 0x1,
+    SRA = 0x2,
+    ROTR = 0x3,
+    SELEQZ = 0x5,
+    SELNEZ = 0x6,
+    R6_RDHWR = 0x7,
+
+    SLLV = 0x0,
+    SRLV = 0x1,
+    SRAV = 0x2,
+    ROTRV = 0x3,
+    ADD = 0x4,
+    ADDU32 = 0x5,
+    SUB = 0x6,
+    SUBU32 = 0x7,
+    MUL = 0x8,
+    AND = 0x9,
+    OR32 = 0xa,
+    NOR = 0xb,
+    XOR32 = 0xc,
+    SLT = 0xd,
+    SLTU = 0xe,
+
+    MOVN = 0x0,
+    R6_MUL  = 0x0,
+    MOVZ = 0x1,
+    MUH  = 0x1,
+    MULU = 0x2,
+    MUHU = 0x3,
+    LWXS = 0x4,
+    R6_DIV  = 0x4,
+    MOD  = 0x5,
+    R6_DIVU = 0x6,
+    MODU = 0x7,
+
+    /* The following can be distinguished by their lower 6 bits. */
+    BREAK32 = 0x07,
+    INS = 0x0c,
+    LSA = 0x0f,
+    ALIGN = 0x1f,
+    EXT = 0x2c,
+    POOL32AXF = 0x3c,
+    SIGRIE = 0x3f
+};
+
+/* POOL32AXF encoding of minor opcode field extension */
+
+/*
+ * 1. MIPS Architecture for Programmers Volume II-B:
+ *      The microMIPS32 Instruction Set (Revision 3.05)
+ *
+ *    Table 6.5 POOL32Axf Encoding of Minor Opcode Extension Field
+ *
+ * 2. MIPS Architecture for Programmers VolumeIV-e:
+ *      The MIPS DSP Application-Specific Extension
+ *        to the microMIPS32 Architecture (Revision 2.34)
+ *
+ *    Table 5.5 POOL32Axf Encoding of Minor Opcode Extension Field
+ */
+
+enum {
+    /* bits 11..6 */
+    TEQ = 0x00,
+    TGE = 0x08,
+    TGEU = 0x10,
+    TLT = 0x20,
+    TLTU = 0x28,
+    TNE = 0x30,
+
+    MFC0 = 0x03,
+    MTC0 = 0x0b,
+
+    /* begin of microMIPS32 DSP */
+
+    /* bits 13..12 for 0x01 */
+    MFHI_ACC = 0x0,
+    MFLO_ACC = 0x1,
+    MTHI_ACC = 0x2,
+    MTLO_ACC = 0x3,
+
+    /* bits 13..12 for 0x2a */
+    MADD_ACC = 0x0,
+    MADDU_ACC = 0x1,
+    MSUB_ACC = 0x2,
+    MSUBU_ACC = 0x3,
+
+    /* bits 13..12 for 0x32 */
+    MULT_ACC = 0x0,
+    MULTU_ACC = 0x1,
+
+    /* end of microMIPS32 DSP */
+
+    /* bits 15..12 for 0x2c */
+    BITSWAP = 0x0,
+    SEB = 0x2,
+    SEH = 0x3,
+    CLO = 0x4,
+    CLZ = 0x5,
+    RDHWR = 0x6,
+    WSBH = 0x7,
+    MULT = 0x8,
+    MULTU = 0x9,
+    DIV = 0xa,
+    DIVU = 0xb,
+    MADD = 0xc,
+    MADDU = 0xd,
+    MSUB = 0xe,
+    MSUBU = 0xf,
+
+    /* bits 15..12 for 0x34 */
+    MFC2 = 0x4,
+    MTC2 = 0x5,
+    MFHC2 = 0x8,
+    MTHC2 = 0x9,
+    CFC2 = 0xc,
+    CTC2 = 0xd,
+
+    /* bits 15..12 for 0x3c */
+    JALR = 0x0,
+    JR = 0x0,                   /* alias */
+    JALRC = 0x0,
+    JRC = 0x0,
+    JALR_HB = 0x1,
+    JALRC_HB = 0x1,
+    JALRS = 0x4,
+    JALRS_HB = 0x5,
+
+    /* bits 15..12 for 0x05 */
+    RDPGPR = 0xe,
+    WRPGPR = 0xf,
+
+    /* bits 15..12 for 0x0d */
+    TLBP = 0x0,
+    TLBR = 0x1,
+    TLBWI = 0x2,
+    TLBWR = 0x3,
+    TLBINV = 0x4,
+    TLBINVF = 0x5,
+    WAIT = 0x9,
+    IRET = 0xd,
+    DERET = 0xe,
+    ERET = 0xf,
+
+    /* bits 15..12 for 0x15 */
+    DMT = 0x0,
+    DVPE = 0x1,
+    EMT = 0x2,
+    EVPE = 0x3,
+
+    /* bits 15..12 for 0x1d */
+    DI = 0x4,
+    EI = 0x5,
+
+    /* bits 15..12 for 0x2d */
+    SYNC = 0x6,
+    SYSCALL = 0x8,
+    SDBBP = 0xd,
+
+    /* bits 15..12 for 0x35 */
+    MFHI32 = 0x0,
+    MFLO32 = 0x1,
+    MTHI32 = 0x2,
+    MTLO32 = 0x3,
+};
+
+/* POOL32B encoding of minor opcode field (bits 15..12) */
+
+enum {
+    LWC2 = 0x0,
+    LWP = 0x1,
+    LDP = 0x4,
+    LWM32 = 0x5,
+    CACHE = 0x6,
+    LDM = 0x7,
+    SWC2 = 0x8,
+    SWP = 0x9,
+    SDP = 0xc,
+    SWM32 = 0xd,
+    SDM = 0xf
+};
+
+/* POOL32C encoding of minor opcode field (bits 15..12) */
+
+enum {
+    LWL = 0x0,
+    SWL = 0x8,
+    LWR = 0x1,
+    SWR = 0x9,
+    PREF = 0x2,
+    ST_EVA = 0xa,
+    LL = 0x3,
+    SC = 0xb,
+    LDL = 0x4,
+    SDL = 0xc,
+    LDR = 0x5,
+    SDR = 0xd,
+    LD_EVA = 0x6,
+    LWU = 0xe,
+    LLD = 0x7,
+    SCD = 0xf
+};
+
+/* POOL32C LD-EVA encoding of minor opcode field (bits 11..9) */
+
+enum {
+    LBUE = 0x0,
+    LHUE = 0x1,
+    LWLE = 0x2,
+    LWRE = 0x3,
+    LBE = 0x4,
+    LHE = 0x5,
+    LLE = 0x6,
+    LWE = 0x7,
+};
+
+/* POOL32C ST-EVA encoding of minor opcode field (bits 11..9) */
+
+enum {
+    SWLE = 0x0,
+    SWRE = 0x1,
+    PREFE = 0x2,
+    CACHEE = 0x3,
+    SBE = 0x4,
+    SHE = 0x5,
+    SCE = 0x6,
+    SWE = 0x7,
+};
+
+/* POOL32F encoding of minor opcode field (bits 5..0) */
+
+enum {
+    /* These are the bit 7..6 values */
+    ADD_FMT = 0x0,
+
+    SUB_FMT = 0x1,
+
+    MUL_FMT = 0x2,
+
+    DIV_FMT = 0x3,
+
+    /* These are the bit 8..6 values */
+    MOVN_FMT = 0x0,
+    RSQRT2_FMT = 0x0,
+    MOVF_FMT = 0x0,
+    RINT_FMT = 0x0,
+    SELNEZ_FMT = 0x0,
+
+    MOVZ_FMT = 0x1,
+    LWXC1 = 0x1,
+    MOVT_FMT = 0x1,
+    CLASS_FMT = 0x1,
+    SELEQZ_FMT = 0x1,
+
+    PLL_PS = 0x2,
+    SWXC1 = 0x2,
+    SEL_FMT = 0x2,
+
+    PLU_PS = 0x3,
+    LDXC1 = 0x3,
+
+    MOVN_FMT_04 = 0x4,
+    PUL_PS = 0x4,
+    SDXC1 = 0x4,
+    RECIP2_FMT = 0x4,
+
+    MOVZ_FMT_05 = 0x05,
+    PUU_PS = 0x5,
+    LUXC1 = 0x5,
+
+    CVT_PS_S = 0x6,
+    SUXC1 = 0x6,
+    ADDR_PS = 0x6,
+    PREFX = 0x6,
+    MADDF_FMT = 0x6,
+
+    MULR_PS = 0x7,
+    MSUBF_FMT = 0x7,
+
+    MADD_S = 0x01,
+    MADD_D = 0x09,
+    MADD_PS = 0x11,
+    ALNV_PS = 0x19,
+    MSUB_S = 0x21,
+    MSUB_D = 0x29,
+    MSUB_PS = 0x31,
+
+    NMADD_S = 0x02,
+    NMADD_D = 0x0a,
+    NMADD_PS = 0x12,
+    NMSUB_S = 0x22,
+    NMSUB_D = 0x2a,
+    NMSUB_PS = 0x32,
+
+    MIN_FMT = 0x3,
+    MAX_FMT = 0xb,
+    MINA_FMT = 0x23,
+    MAXA_FMT = 0x2b,
+    POOL32FXF = 0x3b,
+
+    CABS_COND_FMT = 0x1c,              /* MIPS3D */
+    C_COND_FMT = 0x3c,
+
+    CMP_CONDN_S = 0x5,
+    CMP_CONDN_D = 0x15
+};
+
+/* POOL32Fxf encoding of minor opcode extension field */
+
+enum {
+    CVT_L = 0x04,
+    RSQRT_FMT = 0x08,
+    FLOOR_L = 0x0c,
+    CVT_PW_PS = 0x1c,
+    CVT_W = 0x24,
+    SQRT_FMT = 0x28,
+    FLOOR_W = 0x2c,
+    CVT_PS_PW = 0x3c,
+    CFC1 = 0x40,
+    RECIP_FMT = 0x48,
+    CEIL_L = 0x4c,
+    CTC1 = 0x60,
+    CEIL_W = 0x6c,
+    MFC1 = 0x80,
+    CVT_S_PL = 0x84,
+    TRUNC_L = 0x8c,
+    MTC1 = 0xa0,
+    CVT_S_PU = 0xa4,
+    TRUNC_W = 0xac,
+    MFHC1 = 0xc0,
+    ROUND_L = 0xcc,
+    MTHC1 = 0xe0,
+    ROUND_W = 0xec,
+
+    MOV_FMT = 0x01,
+    MOVF = 0x05,
+    ABS_FMT = 0x0d,
+    RSQRT1_FMT = 0x1d,
+    MOVT = 0x25,
+    NEG_FMT = 0x2d,
+    CVT_D = 0x4d,
+    RECIP1_FMT = 0x5d,
+    CVT_S = 0x6d
+};
+
+/* POOL32I encoding of minor opcode field (bits 25..21) */
+
+enum {
+    BLTZ = 0x00,
+    BLTZAL = 0x01,
+    BGEZ = 0x02,
+    BGEZAL = 0x03,
+    BLEZ = 0x04,
+    BNEZC = 0x05,
+    BGTZ = 0x06,
+    BEQZC = 0x07,
+    TLTI = 0x08,
+    BC1EQZC = 0x08,
+    TGEI = 0x09,
+    BC1NEZC = 0x09,
+    TLTIU = 0x0a,
+    BC2EQZC = 0x0a,
+    TGEIU = 0x0b,
+    BC2NEZC = 0x0a,
+    TNEI = 0x0c,
+    R6_SYNCI = 0x0c,
+    LUI = 0x0d,
+    TEQI = 0x0e,
+    SYNCI = 0x10,
+    BLTZALS = 0x11,
+    BGEZALS = 0x13,
+    BC2F = 0x14,
+    BC2T = 0x15,
+    /* These overlap and are distinguished by bit16 of the instruction */
+    BC1F = 0x1c,
+    BC1T = 0x1d,
+    BC1ANY2F = 0x1c,
+    BC1ANY2T = 0x1d,
+    BC1ANY4F = 0x1e,
+    BC1ANY4T = 0x1f
+};
+
+/* POOL16A encoding of minor opcode field */
+
+enum {
+    ADDU16 = 0x0,
+    SUBU16 = 0x1
+};
+
+/* POOL16B encoding of minor opcode field */
+
+enum {
+    SLL16 = 0x0,
+    SRL16 = 0x1
+};
+
+/* POOL16C encoding of minor opcode field */
+
+enum {
+    NOT16 = 0x00,
+    XOR16 = 0x04,
+    AND16 = 0x08,
+    OR16 = 0x0c,
+    LWM16 = 0x10,
+    SWM16 = 0x14,
+    JR16 = 0x18,
+    JRC16 = 0x1a,
+    JALR16 = 0x1c,
+    JALR16S = 0x1e,
+    MFHI16 = 0x20,
+    MFLO16 = 0x24,
+    BREAK16 = 0x28,
+    SDBBP16 = 0x2c,
+    JRADDIUSP = 0x30
+};
+
+/* R6 POOL16C encoding of minor opcode field (bits 0..5) */
+
+enum {
+    R6_NOT16    = 0x00,
+    R6_AND16    = 0x01,
+    R6_LWM16    = 0x02,
+    R6_JRC16    = 0x03,
+    MOVEP       = 0x04,
+    MOVEP_05    = 0x05,
+    MOVEP_06    = 0x06,
+    MOVEP_07    = 0x07,
+    R6_XOR16    = 0x08,
+    R6_OR16     = 0x09,
+    R6_SWM16    = 0x0a,
+    JALRC16     = 0x0b,
+    MOVEP_0C    = 0x0c,
+    MOVEP_0D    = 0x0d,
+    MOVEP_0E    = 0x0e,
+    MOVEP_0F    = 0x0f,
+    JRCADDIUSP  = 0x13,
+    R6_BREAK16  = 0x1b,
+    R6_SDBBP16  = 0x3b
+};
+
+/* POOL16D encoding of minor opcode field */
+
+enum {
+    ADDIUS5 = 0x0,
+    ADDIUSP = 0x1
+};
+
+/* POOL16E encoding of minor opcode field */
+
+enum {
+    ADDIUR2 = 0x0,
+    ADDIUR1SP = 0x1
+};
+
+static int mmreg(int r)
+{
+    static const int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
+
+    return map[r];
+}
+
+/* Used for 16-bit store instructions.  */
+static int mmreg2(int r)
+{
+    static const int map[] = { 0, 17, 2, 3, 4, 5, 6, 7 };
+
+    return map[r];
+}
+
+#define uMIPS_RD(op) ((op >> 7) & 0x7)
+#define uMIPS_RS(op) ((op >> 4) & 0x7)
+#define uMIPS_RS2(op) uMIPS_RS(op)
+#define uMIPS_RS1(op) ((op >> 1) & 0x7)
+#define uMIPS_RD5(op) ((op >> 5) & 0x1f)
+#define uMIPS_RS5(op) (op & 0x1f)
+
+/* Signed immediate */
+#define SIMM(op, start, width)                                          \
+    ((int32_t)(((op >> start) & ((~0U) >> (32 - width)))                \
+               << (32 - width))                                         \
+     >> (32 - width))
+/* Zero-extended immediate */
+#define ZIMM(op, start, width) ((op >> start) & ((~0U) >> (32 - width)))
+
+static void gen_addiur1sp(DisasContext *ctx)
+{
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, 29, ((ctx->opcode >> 1) & 0x3f) << 2);
+}
+
+static void gen_addiur2(DisasContext *ctx)
+{
+    static const int decoded_imm[] = { 1, 4, 8, 12, 16, 20, 24, -1 };
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+    int rs = mmreg(uMIPS_RS(ctx->opcode));
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, rs, decoded_imm[ZIMM(ctx->opcode, 1, 3)]);
+}
+
+static void gen_addiusp(DisasContext *ctx)
+{
+    int encoded = ZIMM(ctx->opcode, 1, 9);
+    int decoded;
+
+    if (encoded <= 1) {
+        decoded = 256 + encoded;
+    } else if (encoded <= 255) {
+        decoded = encoded;
+    } else if (encoded <= 509) {
+        decoded = encoded - 512;
+    } else {
+        decoded = encoded - 768;
+    }
+
+    gen_arith_imm(ctx, OPC_ADDIU, 29, 29, decoded << 2);
+}
+
+static void gen_addius5(DisasContext *ctx)
+{
+    int imm = SIMM(ctx->opcode, 1, 4);
+    int rd = (ctx->opcode >> 5) & 0x1f;
+
+    gen_arith_imm(ctx, OPC_ADDIU, rd, rd, imm);
+}
+
+static void gen_andi16(DisasContext *ctx)
+{
+    static const int decoded_imm[] = { 128, 1, 2, 3, 4, 7, 8, 15, 16,
+                                 31, 32, 63, 64, 255, 32768, 65535 };
+    int rd = mmreg(uMIPS_RD(ctx->opcode));
+    int rs = mmreg(uMIPS_RS(ctx->opcode));
+    int encoded = ZIMM(ctx->opcode, 0, 4);
+
+    gen_logic_imm(ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]);
+}
+
+static void gen_ldst_multiple(DisasContext *ctx, uint32_t opc, int reglist,
+                              int base, int16_t offset)
+{
+    TCGv t0, t1;
+    TCGv_i32 t2;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+
+    t1 = tcg_const_tl(reglist);
+    t2 = tcg_const_i32(ctx->mem_idx);
+
+    save_cpu_state(ctx, 1);
+    switch (opc) {
+    case LWM32:
+        gen_helper_lwm(cpu_env, t0, t1, t2);
+        break;
+    case SWM32:
+        gen_helper_swm(cpu_env, t0, t1, t2);
+        break;
+#ifdef TARGET_MIPS64
+    case LDM:
+        gen_helper_ldm(cpu_env, t0, t1, t2);
+        break;
+    case SDM:
+        gen_helper_sdm(cpu_env, t0, t1, t2);
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free_i32(t2);
+}
+
+
+static void gen_pool16c_insn(DisasContext *ctx)
+{
+    int rd = mmreg((ctx->opcode >> 3) & 0x7);
+    int rs = mmreg(ctx->opcode & 0x7);
+
+    switch (((ctx->opcode) >> 4) & 0x3f) {
+    case NOT16 + 0:
+    case NOT16 + 1:
+    case NOT16 + 2:
+    case NOT16 + 3:
+        gen_logic(ctx, OPC_NOR, rd, rs, 0);
+        break;
+    case XOR16 + 0:
+    case XOR16 + 1:
+    case XOR16 + 2:
+    case XOR16 + 3:
+        gen_logic(ctx, OPC_XOR, rd, rd, rs);
+        break;
+    case AND16 + 0:
+    case AND16 + 1:
+    case AND16 + 2:
+    case AND16 + 3:
+        gen_logic(ctx, OPC_AND, rd, rd, rs);
+        break;
+    case OR16 + 0:
+    case OR16 + 1:
+    case OR16 + 2:
+    case OR16 + 3:
+        gen_logic(ctx, OPC_OR, rd, rd, rs);
+        break;
+    case LWM16 + 0:
+    case LWM16 + 1:
+    case LWM16 + 2:
+    case LWM16 + 3:
+        {
+            static const int lwm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
+            int offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_ldst_multiple(ctx, LWM32, lwm_convert[(ctx->opcode >> 4) & 0x3],
+                              29, offset << 2);
+        }
+        break;
+    case SWM16 + 0:
+    case SWM16 + 1:
+    case SWM16 + 2:
+    case SWM16 + 3:
+        {
+            static const int swm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
+            int offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_ldst_multiple(ctx, SWM32, swm_convert[(ctx->opcode >> 4) & 0x3],
+                              29, offset << 2);
+        }
+        break;
+    case JR16 + 0:
+    case JR16 + 1:
+        {
+            int reg = ctx->opcode & 0x1f;
+
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 4);
+        }
+        break;
+    case JRC16 + 0:
+    case JRC16 + 1:
+        {
+            int reg = ctx->opcode & 0x1f;
+            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 0);
+            /*
+             * Let normal delay slot handling in our caller take us
+             * to the branch target.
+             */
+        }
+        break;
+    case JALR16 + 0:
+    case JALR16 + 1:
+        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 4);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        break;
+    case JALR16S + 0:
+    case JALR16S + 1:
+        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 2);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        break;
+    case MFHI16 + 0:
+    case MFHI16 + 1:
+        gen_HILO(ctx, OPC_MFHI, 0, uMIPS_RS5(ctx->opcode));
+        break;
+    case MFLO16 + 0:
+    case MFLO16 + 1:
+        gen_HILO(ctx, OPC_MFLO, 0, uMIPS_RS5(ctx->opcode));
+        break;
+    case BREAK16:
+        generate_exception_end(ctx, EXCP_BREAK);
+        break;
+    case SDBBP16:
+        if (is_uhi(extract32(ctx->opcode, 0, 4))) {
+            gen_helper_do_semihosting(cpu_env);
+        } else {
+            /*
+             * XXX: not clear which exception should be raised
+             *      when in debug mode...
+             */
+            check_insn(ctx, ISA_MIPS_R1);
+            generate_exception_end(ctx, EXCP_DBp);
+        }
+        break;
+    case JRADDIUSP + 0:
+    case JRADDIUSP + 1:
+        {
+            int imm = ZIMM(ctx->opcode, 0, 5);
+            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
+            /*
+             * Let normal delay slot handling in our caller take us
+             * to the branch target.
+             */
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static inline void gen_movep(DisasContext *ctx, int enc_dest, int enc_rt,
+                             int enc_rs)
+{
+    int rd, re;
+    static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
+    static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
+    static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
+
+    rd = rd_enc[enc_dest];
+    re = re_enc[enc_dest];
+    gen_load_gpr(cpu_gpr[rd], rs_rt_enc[enc_rs]);
+    gen_load_gpr(cpu_gpr[re], rs_rt_enc[enc_rt]);
+}
+
+static void gen_pool16c_r6_insn(DisasContext *ctx)
+{
+    int rt = mmreg((ctx->opcode >> 7) & 0x7);
+    int rs = mmreg((ctx->opcode >> 4) & 0x7);
+
+    switch (ctx->opcode & 0xf) {
+    case R6_NOT16:
+        gen_logic(ctx, OPC_NOR, rt, rs, 0);
+        break;
+    case R6_AND16:
+        gen_logic(ctx, OPC_AND, rt, rt, rs);
+        break;
+    case R6_LWM16:
+        {
+            int lwm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
+            int offset = extract32(ctx->opcode, 4, 4);
+            gen_ldst_multiple(ctx, LWM32, lwm_converted, 29, offset << 2);
+        }
+        break;
+    case R6_JRC16: /* JRCADDIUSP */
+        if ((ctx->opcode >> 4) & 1) {
+            /* JRCADDIUSP */
+            int imm = extract32(ctx->opcode, 5, 5);
+            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
+        } else {
+            /* JRC16 */
+            rs = extract32(ctx->opcode, 5, 5);
+            gen_compute_branch(ctx, OPC_JR, 2, rs, 0, 0, 0);
+        }
+        break;
+    case MOVEP:
+    case MOVEP_05:
+    case MOVEP_06:
+    case MOVEP_07:
+    case MOVEP_0C:
+    case MOVEP_0D:
+    case MOVEP_0E:
+    case MOVEP_0F:
+        {
+            int enc_dest = uMIPS_RD(ctx->opcode);
+            int enc_rt = uMIPS_RS2(ctx->opcode);
+            int enc_rs = (ctx->opcode & 3) | ((ctx->opcode >> 1) & 4);
+            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
+        }
+        break;
+    case R6_XOR16:
+        gen_logic(ctx, OPC_XOR, rt, rt, rs);
+        break;
+    case R6_OR16:
+        gen_logic(ctx, OPC_OR, rt, rt, rs);
+        break;
+    case R6_SWM16:
+        {
+            int swm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
+            int offset = extract32(ctx->opcode, 4, 4);
+            gen_ldst_multiple(ctx, SWM32, swm_converted, 29, offset << 2);
+        }
+        break;
+    case JALRC16: /* BREAK16, SDBBP16 */
+        switch (ctx->opcode & 0x3f) {
+        case JALRC16:
+        case JALRC16 + 0x20:
+            /* JALRC16 */
+            gen_compute_branch(ctx, OPC_JALR, 2, (ctx->opcode >> 5) & 0x1f,
+                               31, 0, 0);
+            break;
+        case R6_BREAK16:
+            /* BREAK16 */
+            generate_exception(ctx, EXCP_BREAK);
+            break;
+        case R6_SDBBP16:
+            /* SDBBP16 */
+            if (is_uhi(extract32(ctx->opcode, 6, 4))) {
+                gen_helper_do_semihosting(cpu_env);
+            } else {
+                if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                    generate_exception(ctx, EXCP_RI);
+                } else {
+                    generate_exception(ctx, EXCP_DBp);
+                }
+            }
+            break;
+        }
+        break;
+    default:
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+}
+
+static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
+                          int base, int16_t offset)
+{
+    TCGv t0, t1;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK || rd == 31) {
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+
+    switch (opc) {
+    case LWP:
+        if (rd == base) {
+            gen_reserved_instruction(ctx);
+            return;
+        }
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t1, rd);
+        tcg_gen_movi_tl(t1, 4);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t1, rd + 1);
+        break;
+    case SWP:
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        tcg_gen_movi_tl(t1, 4);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        gen_load_gpr(t1, rd + 1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        break;
+#ifdef TARGET_MIPS64
+    case LDP:
+        if (rd == base) {
+            gen_reserved_instruction(ctx);
+            return;
+        }
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t1, rd);
+        tcg_gen_movi_tl(t1, 8);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t1, rd + 1);
+        break;
+    case SDP:
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        tcg_gen_movi_tl(t1, 8);
+        gen_op_addr_add(ctx, t0, t0, t1);
+        gen_load_gpr(t1, rd + 1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ);
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_pool32axf(CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
+{
+    int extension = (ctx->opcode >> 6) & 0x3f;
+    int minor = (ctx->opcode >> 12) & 0xf;
+    uint32_t mips32_op;
+
+    switch (extension) {
+    case TEQ:
+        mips32_op = OPC_TEQ;
+        goto do_trap;
+    case TGE:
+        mips32_op = OPC_TGE;
+        goto do_trap;
+    case TGEU:
+        mips32_op = OPC_TGEU;
+        goto do_trap;
+    case TLT:
+        mips32_op = OPC_TLT;
+        goto do_trap;
+    case TLTU:
+        mips32_op = OPC_TLTU;
+        goto do_trap;
+    case TNE:
+        mips32_op = OPC_TNE;
+    do_trap:
+        gen_trap(ctx, mips32_op, rs, rt, -1);
+        break;
+#ifndef CONFIG_USER_ONLY
+    case MFC0:
+    case MFC0 + 32:
+        check_cp0_enabled(ctx);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            break;
+        }
+        gen_mfc0(ctx, cpu_gpr[rt], rs, (ctx->opcode >> 11) & 0x7);
+        break;
+    case MTC0:
+    case MTC0 + 32:
+        check_cp0_enabled(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_mtc0(ctx, t0, rs, (ctx->opcode >> 11) & 0x7);
+            tcg_temp_free(t0);
+        }
+        break;
+#endif
+    case 0x2a:
+        switch (minor & 3) {
+        case MADD_ACC:
+            gen_muldiv(ctx, OPC_MADD, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MADDU_ACC:
+            gen_muldiv(ctx, OPC_MADDU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MSUB_ACC:
+            gen_muldiv(ctx, OPC_MSUB, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MSUBU_ACC:
+            gen_muldiv(ctx, OPC_MSUBU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x32:
+        switch (minor & 3) {
+        case MULT_ACC:
+            gen_muldiv(ctx, OPC_MULT, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        case MULTU_ACC:
+            gen_muldiv(ctx, OPC_MULTU, (ctx->opcode >> 14) & 3, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x2c:
+        switch (minor) {
+        case BITSWAP:
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_bitswap(ctx, OPC_BITSWAP, rs, rt);
+            break;
+        case SEB:
+            gen_bshfl(ctx, OPC_SEB, rs, rt);
+            break;
+        case SEH:
+            gen_bshfl(ctx, OPC_SEH, rs, rt);
+            break;
+        case CLO:
+            mips32_op = OPC_CLO;
+            goto do_cl;
+        case CLZ:
+            mips32_op = OPC_CLZ;
+        do_cl:
+            check_insn(ctx, ISA_MIPS_R1);
+            gen_cl(ctx, mips32_op, rt, rs);
+            break;
+        case RDHWR:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_rdhwr(ctx, rt, rs, 0);
+            break;
+        case WSBH:
+            gen_bshfl(ctx, OPC_WSBH, rs, rt);
+            break;
+        case MULT:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MULT;
+            goto do_mul;
+        case MULTU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MULTU;
+            goto do_mul;
+        case DIV:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_DIV;
+            goto do_div;
+        case DIVU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_DIVU;
+            goto do_div;
+        do_div:
+            check_insn(ctx, ISA_MIPS_R1);
+            gen_muldiv(ctx, mips32_op, 0, rs, rt);
+            break;
+        case MADD:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MADD;
+            goto do_mul;
+        case MADDU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MADDU;
+            goto do_mul;
+        case MSUB:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MSUB;
+            goto do_mul;
+        case MSUBU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_MSUBU;
+        do_mul:
+            check_insn(ctx, ISA_MIPS_R1);
+            gen_muldiv(ctx, mips32_op, 0, rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x34:
+        switch (minor) {
+        case MFC2:
+        case MTC2:
+        case MFHC2:
+        case MTHC2:
+        case CFC2:
+        case CTC2:
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x3c:
+        switch (minor) {
+        case JALR:    /* JALRC */
+        case JALR_HB: /* JALRC_HB */
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /* JALRC, JALRC_HB */
+                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 0);
+            } else {
+                /* JALR, JALR_HB */
+                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
+                ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            }
+            break;
+        case JALRS:
+        case JALRS_HB:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x05:
+        switch (minor) {
+        case RDPGPR:
+            check_cp0_enabled(ctx);
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_load_srsgpr(ctx, rs, rt);
+            break;
+        case WRPGPR:
+            check_cp0_enabled(ctx);
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_store_srsgpr(rs, rt);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+#ifndef CONFIG_USER_ONLY
+    case 0x0d:
+        switch (minor) {
+        case TLBP:
+            mips32_op = OPC_TLBP;
+            goto do_cp0;
+        case TLBR:
+            mips32_op = OPC_TLBR;
+            goto do_cp0;
+        case TLBWI:
+            mips32_op = OPC_TLBWI;
+            goto do_cp0;
+        case TLBWR:
+            mips32_op = OPC_TLBWR;
+            goto do_cp0;
+        case TLBINV:
+            mips32_op = OPC_TLBINV;
+            goto do_cp0;
+        case TLBINVF:
+            mips32_op = OPC_TLBINVF;
+            goto do_cp0;
+        case WAIT:
+            mips32_op = OPC_WAIT;
+            goto do_cp0;
+        case DERET:
+            mips32_op = OPC_DERET;
+            goto do_cp0;
+        case ERET:
+            mips32_op = OPC_ERET;
+        do_cp0:
+            gen_cp0(env, ctx, mips32_op, rt, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x1d:
+        switch (minor) {
+        case DI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_di(t0, cpu_env);
+                gen_store_gpr(t0, rs);
+                /*
+                 * Stop translation as we may have switched the execution
+                 * mode.
+                 */
+                ctx->base.is_jmp = DISAS_STOP;
+                tcg_temp_free(t0);
+            }
+            break;
+        case EI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_ei(t0, cpu_env);
+                gen_store_gpr(t0, rs);
+                /*
+                 * DISAS_STOP isn't sufficient, we need to ensure we break out
+                 * of translated code to check for pending interrupts.
+                 */
+                gen_save_pc(ctx->base.pc_next + 4);
+                ctx->base.is_jmp = DISAS_EXIT;
+                tcg_temp_free(t0);
+            }
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+#endif
+    case 0x2d:
+        switch (minor) {
+        case SYNC:
+            gen_sync(extract32(ctx->opcode, 16, 5));
+            break;
+        case SYSCALL:
+            generate_exception_end(ctx, EXCP_SYSCALL);
+            break;
+        case SDBBP:
+            if (is_uhi(extract32(ctx->opcode, 16, 10))) {
+                gen_helper_do_semihosting(cpu_env);
+            } else {
+                check_insn(ctx, ISA_MIPS_R1);
+                if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                    gen_reserved_instruction(ctx);
+                } else {
+                    generate_exception_end(ctx, EXCP_DBp);
+                }
+            }
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x01:
+        switch (minor & 3) {
+        case MFHI_ACC:
+            gen_HILO(ctx, OPC_MFHI, minor >> 2, rs);
+            break;
+        case MFLO_ACC:
+            gen_HILO(ctx, OPC_MFLO, minor >> 2, rs);
+            break;
+        case MTHI_ACC:
+            gen_HILO(ctx, OPC_MTHI, minor >> 2, rs);
+            break;
+        case MTLO_ACC:
+            gen_HILO(ctx, OPC_MTLO, minor >> 2, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    case 0x35:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        switch (minor) {
+        case MFHI32:
+            gen_HILO(ctx, OPC_MFHI, 0, rs);
+            break;
+        case MFLO32:
+            gen_HILO(ctx, OPC_MFLO, 0, rs);
+            break;
+        case MTHI32:
+            gen_HILO(ctx, OPC_MTHI, 0, rs);
+            break;
+        case MTLO32:
+            gen_HILO(ctx, OPC_MTLO, 0, rs);
+            break;
+        default:
+            goto pool32axf_invalid;
+        }
+        break;
+    default:
+    pool32axf_invalid:
+        MIPS_INVAL("pool32axf");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void gen_pool32fxf(DisasContext *ctx, int rt, int rs)
+{
+    int extension = (ctx->opcode >> 6) & 0x3ff;
+    uint32_t mips32_op;
+
+#define FLOAT_1BIT_FMT(opc, fmt)    ((fmt << 8) | opc)
+#define FLOAT_2BIT_FMT(opc, fmt)    ((fmt << 7) | opc)
+#define COND_FLOAT_MOV(opc, cond)   ((cond << 7) | opc)
+
+    switch (extension) {
+    case FLOAT_1BIT_FMT(CFC1, 0):
+        mips32_op = OPC_CFC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(CTC1, 0):
+        mips32_op = OPC_CTC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MFC1, 0):
+        mips32_op = OPC_MFC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MTC1, 0):
+        mips32_op = OPC_MTC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MFHC1, 0):
+        mips32_op = OPC_MFHC1;
+        goto do_cp1;
+    case FLOAT_1BIT_FMT(MTHC1, 0):
+        mips32_op = OPC_MTHC1;
+    do_cp1:
+        gen_cp1(ctx, mips32_op, rt, rs);
+        break;
+
+        /* Reciprocal square root */
+    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_S):
+        mips32_op = OPC_RSQRT_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_D):
+        mips32_op = OPC_RSQRT_D;
+        goto do_unaryfp;
+
+        /* Square root */
+    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_S):
+        mips32_op = OPC_SQRT_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_D):
+        mips32_op = OPC_SQRT_D;
+        goto do_unaryfp;
+
+        /* Reciprocal */
+    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_S):
+        mips32_op = OPC_RECIP_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_D):
+        mips32_op = OPC_RECIP_D;
+        goto do_unaryfp;
+
+        /* Floor */
+    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_S):
+        mips32_op = OPC_FLOOR_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_D):
+        mips32_op = OPC_FLOOR_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_S):
+        mips32_op = OPC_FLOOR_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_D):
+        mips32_op = OPC_FLOOR_W_D;
+        goto do_unaryfp;
+
+        /* Ceiling */
+    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_S):
+        mips32_op = OPC_CEIL_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_D):
+        mips32_op = OPC_CEIL_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_S):
+        mips32_op = OPC_CEIL_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_D):
+        mips32_op = OPC_CEIL_W_D;
+        goto do_unaryfp;
+
+        /* Truncation */
+    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_S):
+        mips32_op = OPC_TRUNC_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_D):
+        mips32_op = OPC_TRUNC_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_S):
+        mips32_op = OPC_TRUNC_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_D):
+        mips32_op = OPC_TRUNC_W_D;
+        goto do_unaryfp;
+
+        /* Round */
+    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_S):
+        mips32_op = OPC_ROUND_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_D):
+        mips32_op = OPC_ROUND_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_S):
+        mips32_op = OPC_ROUND_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_D):
+        mips32_op = OPC_ROUND_W_D;
+        goto do_unaryfp;
+
+        /* Integer to floating-point conversion */
+    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_S):
+        mips32_op = OPC_CVT_L_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_D):
+        mips32_op = OPC_CVT_L_D;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_S):
+        mips32_op = OPC_CVT_W_S;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_D):
+        mips32_op = OPC_CVT_W_D;
+        goto do_unaryfp;
+
+        /* Paired-foo conversions */
+    case FLOAT_1BIT_FMT(CVT_S_PL, 0):
+        mips32_op = OPC_CVT_S_PL;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_S_PU, 0):
+        mips32_op = OPC_CVT_S_PU;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_PW_PS, 0):
+        mips32_op = OPC_CVT_PW_PS;
+        goto do_unaryfp;
+    case FLOAT_1BIT_FMT(CVT_PS_PW, 0):
+        mips32_op = OPC_CVT_PS_PW;
+        goto do_unaryfp;
+
+        /* Floating-point moves */
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_S):
+        mips32_op = OPC_MOV_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_D):
+        mips32_op = OPC_MOV_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_MOV_PS;
+        goto do_unaryfp;
+
+        /* Absolute value */
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_S):
+        mips32_op = OPC_ABS_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_D):
+        mips32_op = OPC_ABS_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_ABS_PS;
+        goto do_unaryfp;
+
+        /* Negation */
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_S):
+        mips32_op = OPC_NEG_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_D):
+        mips32_op = OPC_NEG_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_NEG_PS;
+        goto do_unaryfp;
+
+        /* Reciprocal square root step */
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_S):
+        mips32_op = OPC_RSQRT1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_D):
+        mips32_op = OPC_RSQRT1_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_RSQRT1_PS;
+        goto do_unaryfp;
+
+        /* Reciprocal step */
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_S):
+        mips32_op = OPC_RECIP1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_D):
+        mips32_op = OPC_RECIP1_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_PS):
+        mips32_op = OPC_RECIP1_PS;
+        goto do_unaryfp;
+
+        /* Conversions from double */
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_S):
+        mips32_op = OPC_CVT_D_S;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_W):
+        mips32_op = OPC_CVT_D_W;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_L):
+        mips32_op = OPC_CVT_D_L;
+        goto do_unaryfp;
+
+        /* Conversions from single */
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_D):
+        mips32_op = OPC_CVT_S_D;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_W):
+        mips32_op = OPC_CVT_S_W;
+        goto do_unaryfp;
+    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_L):
+        mips32_op = OPC_CVT_S_L;
+    do_unaryfp:
+        gen_farith(ctx, mips32_op, -1, rs, rt, 0);
+        break;
+
+        /* Conditional moves on floating-point codes */
+    case COND_FLOAT_MOV(MOVT, 0):
+    case COND_FLOAT_MOV(MOVT, 1):
+    case COND_FLOAT_MOV(MOVT, 2):
+    case COND_FLOAT_MOV(MOVT, 3):
+    case COND_FLOAT_MOV(MOVT, 4):
+    case COND_FLOAT_MOV(MOVT, 5):
+    case COND_FLOAT_MOV(MOVT, 6):
+    case COND_FLOAT_MOV(MOVT, 7):
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 1);
+        break;
+    case COND_FLOAT_MOV(MOVF, 0):
+    case COND_FLOAT_MOV(MOVF, 1):
+    case COND_FLOAT_MOV(MOVF, 2):
+    case COND_FLOAT_MOV(MOVF, 3):
+    case COND_FLOAT_MOV(MOVF, 4):
+    case COND_FLOAT_MOV(MOVF, 5):
+    case COND_FLOAT_MOV(MOVF, 6):
+    case COND_FLOAT_MOV(MOVF, 7):
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 0);
+        break;
+    default:
+        MIPS_INVAL("pool32fxf");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
+{
+    int32_t offset;
+    uint16_t insn;
+    int rt, rs, rd, rr;
+    int16_t imm;
+    uint32_t op, minor, minor2, mips32_op;
+    uint32_t cond, fmt, cc;
+
+    insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
+    ctx->opcode = (ctx->opcode << 16) | insn;
+
+    rt = (ctx->opcode >> 21) & 0x1f;
+    rs = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    rr = (ctx->opcode >> 6) & 0x1f;
+    imm = (int16_t) ctx->opcode;
+
+    op = (ctx->opcode >> 26) & 0x3f;
+    switch (op) {
+    case POOL32A:
+        minor = ctx->opcode & 0x3f;
+        switch (minor) {
+        case 0x00:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+            case SLL32:
+                mips32_op = OPC_SLL;
+                goto do_shifti;
+            case SRA:
+                mips32_op = OPC_SRA;
+                goto do_shifti;
+            case SRL32:
+                mips32_op = OPC_SRL;
+                goto do_shifti;
+            case ROTR:
+                mips32_op = OPC_ROTR;
+            do_shifti:
+                gen_shift_imm(ctx, mips32_op, rt, rs, rd);
+                break;
+            case SELEQZ:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_cond_move(ctx, OPC_SELEQZ, rd, rs, rt);
+                break;
+            case SELNEZ:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt);
+                break;
+            case R6_RDHWR:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case 0x10:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+                /* Arithmetic */
+            case ADD:
+                mips32_op = OPC_ADD;
+                goto do_arith;
+            case ADDU32:
+                mips32_op = OPC_ADDU;
+                goto do_arith;
+            case SUB:
+                mips32_op = OPC_SUB;
+                goto do_arith;
+            case SUBU32:
+                mips32_op = OPC_SUBU;
+                goto do_arith;
+            case MUL:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MUL;
+            do_arith:
+                gen_arith(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Shifts */
+            case SLLV:
+                mips32_op = OPC_SLLV;
+                goto do_shift;
+            case SRLV:
+                mips32_op = OPC_SRLV;
+                goto do_shift;
+            case SRAV:
+                mips32_op = OPC_SRAV;
+                goto do_shift;
+            case ROTRV:
+                mips32_op = OPC_ROTRV;
+            do_shift:
+                gen_shift(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Logical operations */
+            case AND:
+                mips32_op = OPC_AND;
+                goto do_logic;
+            case OR32:
+                mips32_op = OPC_OR;
+                goto do_logic;
+            case NOR:
+                mips32_op = OPC_NOR;
+                goto do_logic;
+            case XOR32:
+                mips32_op = OPC_XOR;
+            do_logic:
+                gen_logic(ctx, mips32_op, rd, rs, rt);
+                break;
+                /* Set less than */
+            case SLT:
+                mips32_op = OPC_SLT;
+                goto do_slt;
+            case SLTU:
+                mips32_op = OPC_SLTU;
+            do_slt:
+                gen_slt(ctx, mips32_op, rd, rs, rt);
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case 0x18:
+            minor = (ctx->opcode >> 6) & 0xf;
+            switch (minor) {
+                /* Conditional moves */
+            case MOVN: /* MUL */
+                if (ctx->insn_flags & ISA_MIPS_R6) {
+                    /* MUL */
+                    gen_r6_muldiv(ctx, R6_OPC_MUL, rd, rs, rt);
+                } else {
+                    /* MOVN */
+                    gen_cond_move(ctx, OPC_MOVN, rd, rs, rt);
+                }
+                break;
+            case MOVZ: /* MUH */
+                if (ctx->insn_flags & ISA_MIPS_R6) {
+                    /* MUH */
+                    gen_r6_muldiv(ctx, R6_OPC_MUH, rd, rs, rt);
+                } else {
+                    /* MOVZ */
+                    gen_cond_move(ctx, OPC_MOVZ, rd, rs, rt);
+                }
+                break;
+            case MULU:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_muldiv(ctx, R6_OPC_MULU, rd, rs, rt);
+                break;
+            case MUHU:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_muldiv(ctx, R6_OPC_MUHU, rd, rs, rt);
+                break;
+            case LWXS: /* DIV */
+                if (ctx->insn_flags & ISA_MIPS_R6) {
+                    /* DIV */
+                    gen_r6_muldiv(ctx, R6_OPC_DIV, rd, rs, rt);
+                } else {
+                    /* LWXS */
+                    gen_ldxs(ctx, rs, rt, rd);
+                }
+                break;
+            case MOD:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_muldiv(ctx, R6_OPC_MOD, rd, rs, rt);
+                break;
+            case R6_DIVU:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_muldiv(ctx, R6_OPC_DIVU, rd, rs, rt);
+                break;
+            case MODU:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_muldiv(ctx, R6_OPC_MODU, rd, rs, rt);
+                break;
+            default:
+                goto pool32a_invalid;
+            }
+            break;
+        case INS:
+            gen_bitops(ctx, OPC_INS, rt, rs, rr, rd);
+            return;
+        case LSA:
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2));
+            break;
+        case ALIGN:
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_align(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 9, 2));
+            break;
+        case EXT:
+            gen_bitops(ctx, OPC_EXT, rt, rs, rr, rd);
+            return;
+        case POOL32AXF:
+            gen_pool32axf(env, ctx, rt, rs);
+            break;
+        case BREAK32:
+            generate_exception_end(ctx, EXCP_BREAK);
+            break;
+        case SIGRIE:
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_reserved_instruction(ctx);
+            break;
+        default:
+        pool32a_invalid:
+                MIPS_INVAL("pool32a");
+                gen_reserved_instruction(ctx);
+                break;
+        }
+        break;
+    case POOL32B:
+        minor = (ctx->opcode >> 12) & 0xf;
+        switch (minor) {
+        case CACHE:
+            check_cp0_enabled(ctx);
+            if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+                gen_cache_operation(ctx, rt, rs, imm);
+            }
+            break;
+        case LWC2:
+        case SWC2:
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+#ifdef TARGET_MIPS64
+        case LDP:
+        case SDP:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+#endif
+            /* fall through */
+        case LWP:
+        case SWP:
+            gen_ldst_pair(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+#ifdef TARGET_MIPS64
+        case LDM:
+        case SDM:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+#endif
+            /* fall through */
+        case LWM32:
+        case SWM32:
+            gen_ldst_multiple(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
+            break;
+        default:
+            MIPS_INVAL("pool32b");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case POOL32F:
+        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
+            minor = ctx->opcode & 0x3f;
+            check_cp1_enabled(ctx);
+            switch (minor) {
+            case ALNV_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_ALNV_PS;
+                goto do_madd;
+            case MADD_S:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MADD_S;
+                goto do_madd;
+            case MADD_D:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MADD_D;
+                goto do_madd;
+            case MADD_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MADD_PS;
+                goto do_madd;
+            case MSUB_S:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MSUB_S;
+                goto do_madd;
+            case MSUB_D:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MSUB_D;
+                goto do_madd;
+            case MSUB_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_MSUB_PS;
+                goto do_madd;
+            case NMADD_S:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMADD_S;
+                goto do_madd;
+            case NMADD_D:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMADD_D;
+                goto do_madd;
+            case NMADD_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMADD_PS;
+                goto do_madd;
+            case NMSUB_S:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMSUB_S;
+                goto do_madd;
+            case NMSUB_D:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMSUB_D;
+                goto do_madd;
+            case NMSUB_PS:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_NMSUB_PS;
+            do_madd:
+                gen_flt3_arith(ctx, mips32_op, rd, rr, rs, rt);
+                break;
+            case CABS_COND_FMT:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                cond = (ctx->opcode >> 6) & 0xf;
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 10) & 0x3;
+                switch (fmt) {
+                case 0x0:
+                    gen_cmpabs_s(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x1:
+                    gen_cmpabs_d(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x2:
+                    gen_cmpabs_ps(ctx, cond, rt, rs, cc);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case C_COND_FMT:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                cond = (ctx->opcode >> 6) & 0xf;
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 10) & 0x3;
+                switch (fmt) {
+                case 0x0:
+                    gen_cmp_s(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x1:
+                    gen_cmp_d(ctx, cond, rt, rs, cc);
+                    break;
+                case 0x2:
+                    gen_cmp_ps(ctx, cond, rt, rs, cc);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case CMP_CONDN_S:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_cmp_s(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
+                break;
+            case CMP_CONDN_D:
+                check_insn(ctx, ISA_MIPS_R6);
+                gen_r6_cmp_d(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
+                break;
+            case POOL32FXF:
+                gen_pool32fxf(ctx, rt, rs);
+                break;
+            case 0x00:
+                /* PLL foo */
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case PLL_PS:
+                    mips32_op = OPC_PLL_PS;
+                    goto do_ps;
+                case PLU_PS:
+                    mips32_op = OPC_PLU_PS;
+                    goto do_ps;
+                case PUL_PS:
+                    mips32_op = OPC_PUL_PS;
+                    goto do_ps;
+                case PUU_PS:
+                    mips32_op = OPC_PUU_PS;
+                    goto do_ps;
+                case CVT_PS_S:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_CVT_PS_S;
+                do_ps:
+                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case MIN_FMT:
+                check_insn(ctx, ISA_MIPS_R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MIN_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MIN_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x08:
+                /* [LS][WDU]XC1 */
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case LWXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_LWXC1;
+                    goto do_ldst_cp1;
+                case SWXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_SWXC1;
+                    goto do_ldst_cp1;
+                case LDXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_LDXC1;
+                    goto do_ldst_cp1;
+                case SDXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_SDXC1;
+                    goto do_ldst_cp1;
+                case LUXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_LUXC1;
+                    goto do_ldst_cp1;
+                case SUXC1:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    mips32_op = OPC_SUXC1;
+                do_ldst_cp1:
+                    gen_flt3_ldst(ctx, mips32_op, rd, rd, rt, rs);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case MAX_FMT:
+                check_insn(ctx, ISA_MIPS_R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MAX_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MAX_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x18:
+                /* 3D insns */
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                fmt = (ctx->opcode >> 9) & 0x3;
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case RSQRT2_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_RSQRT2_S;
+                        goto do_3d;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_RSQRT2_D;
+                        goto do_3d;
+                    case FMT_SDPS_PS:
+                        mips32_op = OPC_RSQRT2_PS;
+                        goto do_3d;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case RECIP2_FMT:
+                    switch (fmt) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_RECIP2_S;
+                        goto do_3d;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_RECIP2_D;
+                        goto do_3d;
+                    case FMT_SDPS_PS:
+                        mips32_op = OPC_RECIP2_PS;
+                        goto do_3d;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case ADDR_PS:
+                    mips32_op = OPC_ADDR_PS;
+                    goto do_3d;
+                case MULR_PS:
+                    mips32_op = OPC_MULR_PS;
+                do_3d:
+                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x20:
+                /* MOV[FT].fmt, PREFX, RINT.fmt, CLASS.fmt*/
+                cc = (ctx->opcode >> 13) & 0x7;
+                fmt = (ctx->opcode >> 9) & 0x3;
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case MOVF_FMT: /* RINT_FMT */
+                    if (ctx->insn_flags & ISA_MIPS_R6) {
+                        /* RINT_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_farith(ctx, OPC_RINT_S, 0, rt, rs, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_farith(ctx, OPC_RINT_D, 0, rt, rs, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVF_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_movcf_s(ctx, rs, rt, cc, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_movcf_d(ctx, rs, rt, cc, 0);
+                            break;
+                        case FMT_SDPS_PS:
+                            check_ps(ctx);
+                            gen_movcf_ps(ctx, rs, rt, cc, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    }
+                    break;
+                case MOVT_FMT: /* CLASS_FMT */
+                    if (ctx->insn_flags & ISA_MIPS_R6) {
+                        /* CLASS_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_farith(ctx, OPC_CLASS_S, 0, rt, rs, 0);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_farith(ctx, OPC_CLASS_D, 0, rt, rs, 0);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVT_FMT */
+                        switch (fmt) {
+                        case FMT_SDPS_S:
+                            gen_movcf_s(ctx, rs, rt, cc, 1);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_movcf_d(ctx, rs, rt, cc, 1);
+                            break;
+                        case FMT_SDPS_PS:
+                            check_ps(ctx);
+                            gen_movcf_ps(ctx, rs, rt, cc, 1);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    }
+                    break;
+                case PREFX:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+#define FINSN_3ARG_SDPS(prfx)                           \
+                switch ((ctx->opcode >> 8) & 0x3) {     \
+                case FMT_SDPS_S:                        \
+                    mips32_op = OPC_##prfx##_S;         \
+                    goto do_fpop;                       \
+                case FMT_SDPS_D:                        \
+                    mips32_op = OPC_##prfx##_D;         \
+                    goto do_fpop;                       \
+                case FMT_SDPS_PS:                       \
+                    check_ps(ctx);                      \
+                    mips32_op = OPC_##prfx##_PS;        \
+                    goto do_fpop;                       \
+                default:                                \
+                    goto pool32f_invalid;               \
+                }
+            case MINA_FMT:
+                check_insn(ctx, ISA_MIPS_R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MINA_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MINA_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case MAXA_FMT:
+                check_insn(ctx, ISA_MIPS_R6);
+                switch ((ctx->opcode >> 9) & 0x3) {
+                case FMT_SDPS_S:
+                    gen_farith(ctx, OPC_MAXA_S, rt, rs, rd, 0);
+                    break;
+                case FMT_SDPS_D:
+                    gen_farith(ctx, OPC_MAXA_D, rt, rs, rd, 0);
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x30:
+                /* regular FP ops */
+                switch ((ctx->opcode >> 6) & 0x3) {
+                case ADD_FMT:
+                    FINSN_3ARG_SDPS(ADD);
+                    break;
+                case SUB_FMT:
+                    FINSN_3ARG_SDPS(SUB);
+                    break;
+                case MUL_FMT:
+                    FINSN_3ARG_SDPS(MUL);
+                    break;
+                case DIV_FMT:
+                    fmt = (ctx->opcode >> 8) & 0x3;
+                    if (fmt == 1) {
+                        mips32_op = OPC_DIV_D;
+                    } else if (fmt == 0) {
+                        mips32_op = OPC_DIV_S;
+                    } else {
+                        goto pool32f_invalid;
+                    }
+                    goto do_fpop;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            case 0x38:
+                /* cmovs */
+                switch ((ctx->opcode >> 6) & 0x7) {
+                case MOVN_FMT: /* SELEQZ_FMT */
+                    if (ctx->insn_flags & ISA_MIPS_R6) {
+                        /* SELEQZ_FMT */
+                        switch ((ctx->opcode >> 9) & 0x3) {
+                        case FMT_SDPS_S:
+                            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVN_FMT */
+                        FINSN_3ARG_SDPS(MOVN);
+                    }
+                    break;
+                case MOVN_FMT_04:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    FINSN_3ARG_SDPS(MOVN);
+                    break;
+                case MOVZ_FMT: /* SELNEZ_FMT */
+                    if (ctx->insn_flags & ISA_MIPS_R6) {
+                        /* SELNEZ_FMT */
+                        switch ((ctx->opcode >> 9) & 0x3) {
+                        case FMT_SDPS_S:
+                            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
+                            break;
+                        case FMT_SDPS_D:
+                            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
+                            break;
+                        default:
+                            goto pool32f_invalid;
+                        }
+                    } else {
+                        /* MOVZ_FMT */
+                        FINSN_3ARG_SDPS(MOVZ);
+                    }
+                    break;
+                case MOVZ_FMT_05:
+                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                    FINSN_3ARG_SDPS(MOVZ);
+                    break;
+                case SEL_FMT:
+                    check_insn(ctx, ISA_MIPS_R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        gen_sel_s(ctx, OPC_SEL_S, rd, rt, rs);
+                        break;
+                    case FMT_SDPS_D:
+                        gen_sel_d(ctx, OPC_SEL_D, rd, rt, rs);
+                        break;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case MADDF_FMT:
+                    check_insn(ctx, ISA_MIPS_R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_MADDF_S;
+                        goto do_fpop;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_MADDF_D;
+                        goto do_fpop;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                case MSUBF_FMT:
+                    check_insn(ctx, ISA_MIPS_R6);
+                    switch ((ctx->opcode >> 9) & 0x3) {
+                    case FMT_SDPS_S:
+                        mips32_op = OPC_MSUBF_S;
+                        goto do_fpop;
+                    case FMT_SDPS_D:
+                        mips32_op = OPC_MSUBF_D;
+                        goto do_fpop;
+                    default:
+                        goto pool32f_invalid;
+                    }
+                    break;
+                default:
+                    goto pool32f_invalid;
+                }
+                break;
+            do_fpop:
+                gen_farith(ctx, mips32_op, rt, rs, rd, 0);
+                break;
+            default:
+            pool32f_invalid:
+                MIPS_INVAL("pool32f");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        } else {
+            generate_exception_err(ctx, EXCP_CpU, 1);
+        }
+        break;
+    case POOL32I:
+        minor = (ctx->opcode >> 21) & 0x1f;
+        switch (minor) {
+        case BLTZ:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BLTZ, 4, rs, -1, imm << 1, 4);
+            break;
+        case BLTZAL:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
+        case BLTZALS:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
+        case BGEZ:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BGEZ, 4, rs, -1, imm << 1, 4);
+            break;
+        case BGEZAL:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
+        case BGEZALS:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+            break;
+        case BLEZ:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BLEZ, 4, rs, -1, imm << 1, 4);
+            break;
+        case BGTZ:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, OPC_BGTZ, 4, rs, -1, imm << 1, 4);
+            break;
+
+            /* Traps */
+        case TLTI: /* BC1EQZC */
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /* BC1EQZC */
+                check_cp1_enabled(ctx);
+                gen_compute_branch1_r6(ctx, OPC_BC1EQZ, rs, imm << 1, 0);
+            } else {
+                /* TLTI */
+                mips32_op = OPC_TLTI;
+                goto do_trapi;
+            }
+            break;
+        case TGEI: /* BC1NEZC */
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /* BC1NEZC */
+                check_cp1_enabled(ctx);
+                gen_compute_branch1_r6(ctx, OPC_BC1NEZ, rs, imm << 1, 0);
+            } else {
+                /* TGEI */
+                mips32_op = OPC_TGEI;
+                goto do_trapi;
+            }
+            break;
+        case TLTIU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_TLTIU;
+            goto do_trapi;
+        case TGEIU:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_TGEIU;
+            goto do_trapi;
+        case TNEI: /* SYNCI */
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /* SYNCI */
+                /*
+                 * Break the TB to be able to sync copied instructions
+                 * immediately.
+                 */
+                ctx->base.is_jmp = DISAS_STOP;
+            } else {
+                /* TNEI */
+                mips32_op = OPC_TNEI;
+                goto do_trapi;
+            }
+            break;
+        case TEQI:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_TEQI;
+        do_trapi:
+            gen_trap(ctx, mips32_op, rs, -1, imm);
+            break;
+
+        case BNEZC:
+        case BEQZC:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
+                               4, rs, 0, imm << 1, 0);
+            /*
+             * Compact branches don't have a delay slot, so just let
+             * the normal delay slot handling take us to the branch
+             * target.
+             */
+            break;
+        case LUI:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_logic_imm(ctx, OPC_LUI, rs, 0, imm);
+            break;
+        case SYNCI:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            /*
+             * Break the TB to be able to sync copied instructions
+             * immediately.
+             */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case BC2F:
+        case BC2T:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+            break;
+        case BC1F:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1FANY2 : OPC_BC1F;
+            goto do_cp1branch;
+        case BC1T:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1TANY2 : OPC_BC1T;
+            goto do_cp1branch;
+        case BC1ANY4F:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_BC1FANY4;
+            goto do_cp1mips3d;
+        case BC1ANY4T:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_BC1TANY4;
+        do_cp1mips3d:
+            check_cop1x(ctx);
+            check_insn(ctx, ASE_MIPS3D);
+            /* Fall through */
+        do_cp1branch:
+            if (env->CP0_Config1 & (1 << CP0C1_FP)) {
+                check_cp1_enabled(ctx);
+                gen_compute_branch1(ctx, mips32_op,
+                                    (ctx->opcode >> 18) & 0x7, imm << 1);
+            } else {
+                generate_exception_err(ctx, EXCP_CpU, 1);
+            }
+            break;
+        default:
+            MIPS_INVAL("pool32i");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case POOL32C:
+        minor = (ctx->opcode >> 12) & 0xf;
+        offset = sextract32(ctx->opcode, 0,
+                            (ctx->insn_flags & ISA_MIPS_R6) ? 9 : 12);
+        switch (minor) {
+        case LWL:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_LWL;
+            goto do_ld_lr;
+        case SWL:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_SWL;
+            goto do_st_lr;
+        case LWR:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_LWR;
+            goto do_ld_lr;
+        case SWR:
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_SWR;
+            goto do_st_lr;
+#if defined(TARGET_MIPS64)
+        case LDL:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_LDL;
+            goto do_ld_lr;
+        case SDL:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_SDL;
+            goto do_st_lr;
+        case LDR:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_LDR;
+            goto do_ld_lr;
+        case SDR:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            mips32_op = OPC_SDR;
+            goto do_st_lr;
+        case LWU:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            mips32_op = OPC_LWU;
+            goto do_ld_lr;
+        case LLD:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            mips32_op = OPC_LLD;
+            goto do_ld_lr;
+#endif
+        case LL:
+            mips32_op = OPC_LL;
+            goto do_ld_lr;
+        do_ld_lr:
+            gen_ld(ctx, mips32_op, rt, rs, offset);
+            break;
+        do_st_lr:
+            gen_st(ctx, mips32_op, rt, rs, offset);
+            break;
+        case SC:
+            gen_st_cond(ctx, rt, rs, offset, MO_TESL, false);
+            break;
+#if defined(TARGET_MIPS64)
+        case SCD:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_st_cond(ctx, rt, rs, offset, MO_TEQ, false);
+            break;
+#endif
+        case LD_EVA:
+            if (!ctx->eva) {
+                MIPS_INVAL("pool32c ld-eva");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            check_cp0_enabled(ctx);
+
+            minor2 = (ctx->opcode >> 9) & 0x7;
+            offset = sextract32(ctx->opcode, 0, 9);
+            switch (minor2) {
+            case LBUE:
+                mips32_op = OPC_LBUE;
+                goto do_ld_lr;
+            case LHUE:
+                mips32_op = OPC_LHUE;
+                goto do_ld_lr;
+            case LWLE:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_LWLE;
+                goto do_ld_lr;
+            case LWRE:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_LWRE;
+                goto do_ld_lr;
+            case LBE:
+                mips32_op = OPC_LBE;
+                goto do_ld_lr;
+            case LHE:
+                mips32_op = OPC_LHE;
+                goto do_ld_lr;
+            case LLE:
+                mips32_op = OPC_LLE;
+                goto do_ld_lr;
+            case LWE:
+                mips32_op = OPC_LWE;
+                goto do_ld_lr;
+            };
+            break;
+        case ST_EVA:
+            if (!ctx->eva) {
+                MIPS_INVAL("pool32c st-eva");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            check_cp0_enabled(ctx);
+
+            minor2 = (ctx->opcode >> 9) & 0x7;
+            offset = sextract32(ctx->opcode, 0, 9);
+            switch (minor2) {
+            case SWLE:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_SWLE;
+                goto do_st_lr;
+            case SWRE:
+                check_insn_opc_removed(ctx, ISA_MIPS_R6);
+                mips32_op = OPC_SWRE;
+                goto do_st_lr;
+            case PREFE:
+                /* Treat as no-op */
+                if ((ctx->insn_flags & ISA_MIPS_R6) && (rt >= 24)) {
+                    /* hint codes 24-31 are reserved and signal RI */
+                    generate_exception(ctx, EXCP_RI);
+                }
+                break;
+            case CACHEE:
+                /* Treat as no-op */
+                if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+                    gen_cache_operation(ctx, rt, rs, offset);
+                }
+                break;
+            case SBE:
+                mips32_op = OPC_SBE;
+                goto do_st_lr;
+            case SHE:
+                mips32_op = OPC_SHE;
+                goto do_st_lr;
+            case SCE:
+                gen_st_cond(ctx, rt, rs, offset, MO_TESL, true);
+                break;
+            case SWE:
+                mips32_op = OPC_SWE;
+                goto do_st_lr;
+            };
+            break;
+        case PREF:
+            /* Treat as no-op */
+            if ((ctx->insn_flags & ISA_MIPS_R6) && (rt >= 24)) {
+                /* hint codes 24-31 are reserved and signal RI */
+                generate_exception(ctx, EXCP_RI);
+            }
+            break;
+        default:
+            MIPS_INVAL("pool32c");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case ADDI32: /* AUI, LUI */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* AUI, LUI */
+            gen_logic_imm(ctx, OPC_LUI, rt, rs, imm);
+        } else {
+            /* ADDI32 */
+            mips32_op = OPC_ADDI;
+            goto do_addi;
+        }
+        break;
+    case ADDIU32:
+        mips32_op = OPC_ADDIU;
+    do_addi:
+        gen_arith_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+
+        /* Logical operations */
+    case ORI32:
+        mips32_op = OPC_ORI;
+        goto do_logici;
+    case XORI32:
+        mips32_op = OPC_XORI;
+        goto do_logici;
+    case ANDI32:
+        mips32_op = OPC_ANDI;
+    do_logici:
+        gen_logic_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+
+        /* Set less than immediate */
+    case SLTI32:
+        mips32_op = OPC_SLTI;
+        goto do_slti;
+    case SLTIU32:
+        mips32_op = OPC_SLTIU;
+    do_slti:
+        gen_slt_imm(ctx, mips32_op, rt, rs, imm);
+        break;
+    case JALX32:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
+        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset, 4);
+        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        break;
+    case JALS32: /* BOVC, BEQC, BEQZALC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rs >= rt) {
+                /* BOVC */
+                mips32_op = OPC_BOVC;
+            } else if (rs < rt && rs == 0) {
+                /* BEQZALC */
+                mips32_op = OPC_BEQZALC;
+            } else {
+                /* BEQC */
+                mips32_op = OPC_BEQC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* JALS32 */
+            offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
+            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        }
+        break;
+    case BEQ32: /* BC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* BC */
+            gen_compute_compact_branch(ctx, OPC_BC, 0, 0,
+                                       sextract32(ctx->opcode << 1, 0, 27));
+        } else {
+            /* BEQ32 */
+            gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
+        }
+        break;
+    case BNE32: /* BALC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* BALC */
+            gen_compute_compact_branch(ctx, OPC_BALC, 0, 0,
+                                       sextract32(ctx->opcode << 1, 0, 27));
+        } else {
+            /* BNE32 */
+            gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
+        }
+        break;
+    case J32: /* BGTZC, BLTZC, BLTC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rs == 0 && rt != 0) {
+                /* BGTZC */
+                mips32_op = OPC_BGTZC;
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* BLTZC */
+                mips32_op = OPC_BLTZC;
+            } else {
+                /* BLTC */
+                mips32_op = OPC_BLTC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* J32 */
+            gen_compute_branch(ctx, OPC_J, 4, rt, rs,
+                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+        }
+        break;
+    case JAL32: /* BLEZC, BGEZC, BGEC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rs == 0 && rt != 0) {
+                /* BLEZC */
+                mips32_op = OPC_BLEZC;
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* BGEZC */
+                mips32_op = OPC_BGEZC;
+            } else {
+                /* BGEC */
+                mips32_op = OPC_BGEC;
+            }
+            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        } else {
+            /* JAL32 */
+            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
+                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
+            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
+        }
+        break;
+        /* Floating point (COP1) */
+    case LWC132:
+        mips32_op = OPC_LWC1;
+        goto do_cop1;
+    case LDC132:
+        mips32_op = OPC_LDC1;
+        goto do_cop1;
+    case SWC132:
+        mips32_op = OPC_SWC1;
+        goto do_cop1;
+    case SDC132:
+        mips32_op = OPC_SDC1;
+    do_cop1:
+        gen_cop1_ldst(ctx, mips32_op, rt, rs, imm);
+        break;
+    case ADDIUPC: /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
+            switch ((ctx->opcode >> 16) & 0x1f) {
+            case ADDIUPC_00:
+            case ADDIUPC_01:
+            case ADDIUPC_02:
+            case ADDIUPC_03:
+            case ADDIUPC_04:
+            case ADDIUPC_05:
+            case ADDIUPC_06:
+            case ADDIUPC_07:
+                gen_pcrel(ctx, OPC_ADDIUPC, ctx->base.pc_next & ~0x3, rt);
+                break;
+            case AUIPC:
+                gen_pcrel(ctx, OPC_AUIPC, ctx->base.pc_next, rt);
+                break;
+            case ALUIPC:
+                gen_pcrel(ctx, OPC_ALUIPC, ctx->base.pc_next, rt);
+                break;
+            case LWPC_08:
+            case LWPC_09:
+            case LWPC_0A:
+            case LWPC_0B:
+            case LWPC_0C:
+            case LWPC_0D:
+            case LWPC_0E:
+            case LWPC_0F:
+                gen_pcrel(ctx, R6_OPC_LWPC, ctx->base.pc_next & ~0x3, rt);
+                break;
+            default:
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+        } else {
+            /* ADDIUPC */
+            int reg = mmreg(ZIMM(ctx->opcode, 23, 3));
+            offset = SIMM(ctx->opcode, 0, 23) << 2;
+
+            gen_addiupc(ctx, reg, offset, 0, 0);
+        }
+        break;
+    case BNVC: /* BNEC, BNEZALC */
+        check_insn(ctx, ISA_MIPS_R6);
+        if (rs >= rt) {
+            /* BNVC */
+            mips32_op = OPC_BNVC;
+        } else if (rs < rt && rs == 0) {
+            /* BNEZALC */
+            mips32_op = OPC_BNEZALC;
+        } else {
+            /* BNEC */
+            mips32_op = OPC_BNEC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
+    case R6_BNEZC: /* JIALC */
+        check_insn(ctx, ISA_MIPS_R6);
+        if (rt != 0) {
+            /* BNEZC */
+            gen_compute_compact_branch(ctx, OPC_BNEZC, rt, 0,
+                                       sextract32(ctx->opcode << 1, 0, 22));
+        } else {
+            /* JIALC */
+            gen_compute_compact_branch(ctx, OPC_JIALC, 0, rs, imm);
+        }
+        break;
+    case R6_BEQZC: /* JIC */
+        check_insn(ctx, ISA_MIPS_R6);
+        if (rt != 0) {
+            /* BEQZC */
+            gen_compute_compact_branch(ctx, OPC_BEQZC, rt, 0,
+                                       sextract32(ctx->opcode << 1, 0, 22));
+        } else {
+            /* JIC */
+            gen_compute_compact_branch(ctx, OPC_JIC, 0, rs, imm);
+        }
+        break;
+    case BLEZALC: /* BGEZALC, BGEUC */
+        check_insn(ctx, ISA_MIPS_R6);
+        if (rs == 0 && rt != 0) {
+            /* BLEZALC */
+            mips32_op = OPC_BLEZALC;
+        } else if (rs != 0 && rt != 0 && rs == rt) {
+            /* BGEZALC */
+            mips32_op = OPC_BGEZALC;
+        } else {
+            /* BGEUC */
+            mips32_op = OPC_BGEUC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
+    case BGTZALC: /* BLTZALC, BLTUC */
+        check_insn(ctx, ISA_MIPS_R6);
+        if (rs == 0 && rt != 0) {
+            /* BGTZALC */
+            mips32_op = OPC_BGTZALC;
+        } else if (rs != 0 && rt != 0 && rs == rt) {
+            /* BLTZALC */
+            mips32_op = OPC_BLTZALC;
+        } else {
+            /* BLTUC */
+            mips32_op = OPC_BLTUC;
+        }
+        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
+        break;
+        /* Loads and stores */
+    case LB32:
+        mips32_op = OPC_LB;
+        goto do_ld;
+    case LBU32:
+        mips32_op = OPC_LBU;
+        goto do_ld;
+    case LH32:
+        mips32_op = OPC_LH;
+        goto do_ld;
+    case LHU32:
+        mips32_op = OPC_LHU;
+        goto do_ld;
+    case LW32:
+        mips32_op = OPC_LW;
+        goto do_ld;
+#ifdef TARGET_MIPS64
+    case LD32:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        mips32_op = OPC_LD;
+        goto do_ld;
+    case SD32:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        mips32_op = OPC_SD;
+        goto do_st;
+#endif
+    case SB32:
+        mips32_op = OPC_SB;
+        goto do_st;
+    case SH32:
+        mips32_op = OPC_SH;
+        goto do_st;
+    case SW32:
+        mips32_op = OPC_SW;
+        goto do_st;
+    do_ld:
+        gen_ld(ctx, mips32_op, rt, rs, imm);
+        break;
+    do_st:
+        gen_st(ctx, mips32_op, rt, rs, imm);
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static int decode_isa_micromips(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t op;
+
+    /* make sure instructions are on a halfword boundary */
+    if (ctx->base.pc_next & 0x1) {
+        env->CP0_BadVAddr = ctx->base.pc_next;
+        generate_exception_end(ctx, EXCP_AdEL);
+        return 2;
+    }
+
+    op = (ctx->opcode >> 10) & 0x3f;
+    /* Enforce properly-sized instructions in a delay slot */
+    if (ctx->hflags & MIPS_HFLAG_BDS_STRICT) {
+        switch (op & 0x7) { /* MSB-3..MSB-5 */
+        case 0:
+        /* POOL32A, POOL32B, POOL32I, POOL32C */
+        case 4:
+        /* ADDI32, ADDIU32, ORI32, XORI32, SLTI32, SLTIU32, ANDI32, JALX32 */
+        case 5:
+        /* LBU32, LHU32, POOL32F, JALS32, BEQ32, BNE32, J32, JAL32 */
+        case 6:
+        /* SB32, SH32, ADDIUPC, SWC132, SDC132, SW32 */
+        case 7:
+        /* LB32, LH32, LWC132, LDC132, LW32 */
+            if (ctx->hflags & MIPS_HFLAG_BDS16) {
+                gen_reserved_instruction(ctx);
+                return 2;
+            }
+            break;
+        case 1:
+        /* POOL16A, POOL16B, POOL16C, LWGP16, POOL16F */
+        case 2:
+        /* LBU16, LHU16, LWSP16, LW16, SB16, SH16, SWSP16, SW16 */
+        case 3:
+        /* MOVE16, ANDI16, POOL16D, POOL16E, BEQZ16, BNEZ16, B16, LI16 */
+            if (ctx->hflags & MIPS_HFLAG_BDS32) {
+                gen_reserved_instruction(ctx);
+                return 2;
+            }
+            break;
+        }
+    }
+
+    switch (op) {
+    case POOL16A:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rs1 = mmreg(uMIPS_RS1(ctx->opcode));
+            int rs2 = mmreg(uMIPS_RS2(ctx->opcode));
+            uint32_t opc = 0;
+
+            switch (ctx->opcode & 0x1) {
+            case ADDU16:
+                opc = OPC_ADDU;
+                break;
+            case SUBU16:
+                opc = OPC_SUBU;
+                break;
+            }
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /*
+                 * In the Release 6, the register number location in
+                 * the instruction encoding has changed.
+                 */
+                gen_arith(ctx, opc, rs1, rd, rs2);
+            } else {
+                gen_arith(ctx, opc, rd, rs1, rs2);
+            }
+        }
+        break;
+    case POOL16B:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rs = mmreg(uMIPS_RS(ctx->opcode));
+            int amount = (ctx->opcode >> 1) & 0x7;
+            uint32_t opc = 0;
+            amount = amount == 0 ? 8 : amount;
+
+            switch (ctx->opcode & 0x1) {
+            case SLL16:
+                opc = OPC_SLL;
+                break;
+            case SRL16:
+                opc = OPC_SRL;
+                break;
+            }
+
+            gen_shift_imm(ctx, opc, rd, rs, amount);
+        }
+        break;
+    case POOL16C:
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            gen_pool16c_r6_insn(ctx);
+        } else {
+            gen_pool16c_insn(ctx);
+        }
+        break;
+    case LWGP16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = 28;            /* GP */
+            int16_t offset = SIMM(ctx->opcode, 0, 7) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case POOL16F:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        if (ctx->opcode & 1) {
+            gen_reserved_instruction(ctx);
+        } else {
+            /* MOVEP */
+            int enc_dest = uMIPS_RD(ctx->opcode);
+            int enc_rt = uMIPS_RS2(ctx->opcode);
+            int enc_rs = uMIPS_RS1(ctx->opcode);
+            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
+        }
+        break;
+    case LBU16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4);
+            offset = (offset == 0xf ? -1 : offset);
+
+            gen_ld(ctx, OPC_LBU, rd, rb, offset);
+        }
+        break;
+    case LHU16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
+
+            gen_ld(ctx, OPC_LHU, rd, rb, offset);
+        }
+        break;
+    case LWSP16:
+        {
+            int rd = (ctx->opcode >> 5) & 0x1f;
+            int rb = 29;            /* SP */
+            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case LW16:
+        {
+            int rd = mmreg(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
+
+            gen_ld(ctx, OPC_LW, rd, rb, offset);
+        }
+        break;
+    case SB16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4);
+
+            gen_st(ctx, OPC_SB, rd, rb, offset);
+        }
+        break;
+    case SH16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
+
+            gen_st(ctx, OPC_SH, rd, rb, offset);
+        }
+        break;
+    case SWSP16:
+        {
+            int rd = (ctx->opcode >> 5) & 0x1f;
+            int rb = 29;            /* SP */
+            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
+
+            gen_st(ctx, OPC_SW, rd, rb, offset);
+        }
+        break;
+    case SW16:
+        {
+            int rd = mmreg2(uMIPS_RD(ctx->opcode));
+            int rb = mmreg(uMIPS_RS(ctx->opcode));
+            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
+
+            gen_st(ctx, OPC_SW, rd, rb, offset);
+        }
+        break;
+    case MOVE16:
+        {
+            int rd = uMIPS_RD5(ctx->opcode);
+            int rs = uMIPS_RS5(ctx->opcode);
+
+            gen_arith(ctx, OPC_ADDU, rd, rs, 0);
+        }
+        break;
+    case ANDI16:
+        gen_andi16(ctx);
+        break;
+    case POOL16D:
+        switch (ctx->opcode & 0x1) {
+        case ADDIUS5:
+            gen_addius5(ctx);
+            break;
+        case ADDIUSP:
+            gen_addiusp(ctx);
+            break;
+        }
+        break;
+    case POOL16E:
+        switch (ctx->opcode & 0x1) {
+        case ADDIUR2:
+            gen_addiur2(ctx);
+            break;
+        case ADDIUR1SP:
+            gen_addiur1sp(ctx);
+            break;
+        }
+        break;
+    case B16: /* BC16 */
+        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
+                           sextract32(ctx->opcode, 0, 10) << 1,
+                           (ctx->insn_flags & ISA_MIPS_R6) ? 0 : 4);
+        break;
+    case BNEZ16: /* BNEZC16 */
+    case BEQZ16: /* BEQZC16 */
+        gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
+                           mmreg(uMIPS_RD(ctx->opcode)),
+                           0, sextract32(ctx->opcode, 0, 7) << 1,
+                           (ctx->insn_flags & ISA_MIPS_R6) ? 0 : 4);
+
+        break;
+    case LI16:
+        {
+            int reg = mmreg(uMIPS_RD(ctx->opcode));
+            int imm = ZIMM(ctx->opcode, 0, 7);
+
+            imm = (imm == 0x7f ? -1 : imm);
+            tcg_gen_movi_tl(cpu_gpr[reg], imm);
+        }
+        break;
+    case RES_29:
+    case RES_31:
+    case RES_39:
+        gen_reserved_instruction(ctx);
+        break;
+    default:
+        decode_micromips32_opc(env, ctx);
+        return 4;
+    }
+
+    return 2;
+}
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
new file mode 100644
index 00000000000..84d816603aa
--- /dev/null
+++ b/target/mips/tcg/mips16e_translate.c.inc
@@ -0,0 +1,1123 @@
+/*
+ *  MIPS16 extension (Code Compaction) ASE translation routines
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+/* MIPS16 major opcodes */
+enum {
+  M16_OPC_ADDIUSP = 0x00,
+  M16_OPC_ADDIUPC = 0x01,
+  M16_OPC_B = 0x02,
+  M16_OPC_JAL = 0x03,
+  M16_OPC_BEQZ = 0x04,
+  M16_OPC_BNEQZ = 0x05,
+  M16_OPC_SHIFT = 0x06,
+  M16_OPC_LD = 0x07,
+  M16_OPC_RRIA = 0x08,
+  M16_OPC_ADDIU8 = 0x09,
+  M16_OPC_SLTI = 0x0a,
+  M16_OPC_SLTIU = 0x0b,
+  M16_OPC_I8 = 0x0c,
+  M16_OPC_LI = 0x0d,
+  M16_OPC_CMPI = 0x0e,
+  M16_OPC_SD = 0x0f,
+  M16_OPC_LB = 0x10,
+  M16_OPC_LH = 0x11,
+  M16_OPC_LWSP = 0x12,
+  M16_OPC_LW = 0x13,
+  M16_OPC_LBU = 0x14,
+  M16_OPC_LHU = 0x15,
+  M16_OPC_LWPC = 0x16,
+  M16_OPC_LWU = 0x17,
+  M16_OPC_SB = 0x18,
+  M16_OPC_SH = 0x19,
+  M16_OPC_SWSP = 0x1a,
+  M16_OPC_SW = 0x1b,
+  M16_OPC_RRR = 0x1c,
+  M16_OPC_RR = 0x1d,
+  M16_OPC_EXTEND = 0x1e,
+  M16_OPC_I64 = 0x1f
+};
+
+/* I8 funct field */
+enum {
+  I8_BTEQZ = 0x0,
+  I8_BTNEZ = 0x1,
+  I8_SWRASP = 0x2,
+  I8_ADJSP = 0x3,
+  I8_SVRS = 0x4,
+  I8_MOV32R = 0x5,
+  I8_MOVR32 = 0x7
+};
+
+/* RRR f field */
+enum {
+  RRR_DADDU = 0x0,
+  RRR_ADDU = 0x1,
+  RRR_DSUBU = 0x2,
+  RRR_SUBU = 0x3
+};
+
+/* RR funct field */
+enum {
+  RR_JR = 0x00,
+  RR_SDBBP = 0x01,
+  RR_SLT = 0x02,
+  RR_SLTU = 0x03,
+  RR_SLLV = 0x04,
+  RR_BREAK = 0x05,
+  RR_SRLV = 0x06,
+  RR_SRAV = 0x07,
+  RR_DSRL = 0x08,
+  RR_CMP = 0x0a,
+  RR_NEG = 0x0b,
+  RR_AND = 0x0c,
+  RR_OR = 0x0d,
+  RR_XOR = 0x0e,
+  RR_NOT = 0x0f,
+  RR_MFHI = 0x10,
+  RR_CNVT = 0x11,
+  RR_MFLO = 0x12,
+  RR_DSRA = 0x13,
+  RR_DSLLV = 0x14,
+  RR_DSRLV = 0x16,
+  RR_DSRAV = 0x17,
+  RR_MULT = 0x18,
+  RR_MULTU = 0x19,
+  RR_DIV = 0x1a,
+  RR_DIVU = 0x1b,
+  RR_DMULT = 0x1c,
+  RR_DMULTU = 0x1d,
+  RR_DDIV = 0x1e,
+  RR_DDIVU = 0x1f
+};
+
+/* I64 funct field */
+enum {
+  I64_LDSP = 0x0,
+  I64_SDSP = 0x1,
+  I64_SDRASP = 0x2,
+  I64_DADJSP = 0x3,
+  I64_LDPC = 0x4,
+  I64_DADDIU5 = 0x5,
+  I64_DADDIUPC = 0x6,
+  I64_DADDIUSP = 0x7
+};
+
+/* RR ry field for CNVT */
+enum {
+  RR_RY_CNVT_ZEB = 0x0,
+  RR_RY_CNVT_ZEH = 0x1,
+  RR_RY_CNVT_ZEW = 0x2,
+  RR_RY_CNVT_SEB = 0x4,
+  RR_RY_CNVT_SEH = 0x5,
+  RR_RY_CNVT_SEW = 0x6,
+};
+
+static int xlat(int r)
+{
+  static int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
+
+  return map[r];
+}
+
+static void gen_mips16_save(DisasContext *ctx,
+                            int xsregs, int aregs,
+                            int do_ra, int do_s0, int do_s1,
+                            int framesize)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    int args, astatic;
+
+    switch (aregs) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+    case 11:
+        args = 0;
+        break;
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+        args = 1;
+        break;
+    case 8:
+    case 9:
+    case 10:
+        args = 2;
+        break;
+    case 12:
+    case 13:
+        args = 3;
+        break;
+    case 14:
+        args = 4;
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    switch (args) {
+    case 4:
+        gen_base_offset_addr(ctx, t0, 29, 12);
+        gen_load_gpr(t1, 7);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 3:
+        gen_base_offset_addr(ctx, t0, 29, 8);
+        gen_load_gpr(t1, 6);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 2:
+        gen_base_offset_addr(ctx, t0, 29, 4);
+        gen_load_gpr(t1, 5);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+        /* Fall through */
+    case 1:
+        gen_base_offset_addr(ctx, t0, 29, 0);
+        gen_load_gpr(t1, 4);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+    }
+
+    gen_load_gpr(t0, 29);
+
+#define DECR_AND_STORE(reg) do {                                 \
+        tcg_gen_movi_tl(t2, -4);                                 \
+        gen_op_addr_add(ctx, t0, t0, t2);                        \
+        gen_load_gpr(t1, reg);                                   \
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
+    } while (0)
+
+    if (do_ra) {
+        DECR_AND_STORE(31);
+    }
+
+    switch (xsregs) {
+    case 7:
+        DECR_AND_STORE(30);
+        /* Fall through */
+    case 6:
+        DECR_AND_STORE(23);
+        /* Fall through */
+    case 5:
+        DECR_AND_STORE(22);
+        /* Fall through */
+    case 4:
+        DECR_AND_STORE(21);
+        /* Fall through */
+    case 3:
+        DECR_AND_STORE(20);
+        /* Fall through */
+    case 2:
+        DECR_AND_STORE(19);
+        /* Fall through */
+    case 1:
+        DECR_AND_STORE(18);
+    }
+
+    if (do_s1) {
+        DECR_AND_STORE(17);
+    }
+    if (do_s0) {
+        DECR_AND_STORE(16);
+    }
+
+    switch (aregs) {
+    case 0:
+    case 4:
+    case 8:
+    case 12:
+    case 14:
+        astatic = 0;
+        break;
+    case 1:
+    case 5:
+    case 9:
+    case 13:
+        astatic = 1;
+        break;
+    case 2:
+    case 6:
+    case 10:
+        astatic = 2;
+        break;
+    case 3:
+    case 7:
+        astatic = 3;
+        break;
+    case 11:
+        astatic = 4;
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    if (astatic > 0) {
+        DECR_AND_STORE(7);
+        if (astatic > 1) {
+            DECR_AND_STORE(6);
+            if (astatic > 2) {
+                DECR_AND_STORE(5);
+                if (astatic > 3) {
+                    DECR_AND_STORE(4);
+                }
+            }
+        }
+    }
+#undef DECR_AND_STORE
+
+    tcg_gen_movi_tl(t2, -framesize);
+    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+static void gen_mips16_restore(DisasContext *ctx,
+                               int xsregs, int aregs,
+                               int do_ra, int do_s0, int do_s1,
+                               int framesize)
+{
+    int astatic;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    tcg_gen_movi_tl(t2, framesize);
+    gen_op_addr_add(ctx, t0, cpu_gpr[29], t2);
+
+#define DECR_AND_LOAD(reg) do {                            \
+        tcg_gen_movi_tl(t2, -4);                           \
+        gen_op_addr_add(ctx, t0, t0, t2);                  \
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
+        gen_store_gpr(t1, reg);                            \
+    } while (0)
+
+    if (do_ra) {
+        DECR_AND_LOAD(31);
+    }
+
+    switch (xsregs) {
+    case 7:
+        DECR_AND_LOAD(30);
+        /* Fall through */
+    case 6:
+        DECR_AND_LOAD(23);
+        /* Fall through */
+    case 5:
+        DECR_AND_LOAD(22);
+        /* Fall through */
+    case 4:
+        DECR_AND_LOAD(21);
+        /* Fall through */
+    case 3:
+        DECR_AND_LOAD(20);
+        /* Fall through */
+    case 2:
+        DECR_AND_LOAD(19);
+        /* Fall through */
+    case 1:
+        DECR_AND_LOAD(18);
+    }
+
+    if (do_s1) {
+        DECR_AND_LOAD(17);
+    }
+    if (do_s0) {
+        DECR_AND_LOAD(16);
+    }
+
+    switch (aregs) {
+    case 0:
+    case 4:
+    case 8:
+    case 12:
+    case 14:
+        astatic = 0;
+        break;
+    case 1:
+    case 5:
+    case 9:
+    case 13:
+        astatic = 1;
+        break;
+    case 2:
+    case 6:
+    case 10:
+        astatic = 2;
+        break;
+    case 3:
+    case 7:
+        astatic = 3;
+        break;
+    case 11:
+        astatic = 4;
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    if (astatic > 0) {
+        DECR_AND_LOAD(7);
+        if (astatic > 1) {
+            DECR_AND_LOAD(6);
+            if (astatic > 2) {
+                DECR_AND_LOAD(5);
+                if (astatic > 3) {
+                    DECR_AND_LOAD(4);
+                }
+            }
+        }
+    }
+#undef DECR_AND_LOAD
+
+    tcg_gen_movi_tl(t2, framesize);
+    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+#if defined(TARGET_MIPS64)
+static void decode_i64_mips16(DisasContext *ctx,
+                              int ry, int funct, int16_t offset,
+                              int extended)
+{
+    switch (funct) {
+    case I64_LDSP:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 3;
+        gen_ld(ctx, OPC_LD, ry, 29, offset);
+        break;
+    case I64_SDSP:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 3;
+        gen_st(ctx, OPC_SD, ry, 29, offset);
+        break;
+    case I64_SDRASP:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : (ctx->opcode & 0xff) << 3;
+        gen_st(ctx, OPC_SD, 31, 29, offset);
+        break;
+    case I64_DADJSP:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : ((int8_t)ctx->opcode) << 3;
+        gen_arith_imm(ctx, OPC_DADDIU, 29, 29, offset);
+        break;
+    case I64_LDPC:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
+            gen_reserved_instruction(ctx);
+        } else {
+            offset = extended ? offset : offset << 3;
+            gen_ld(ctx, OPC_LDPC, ry, 0, offset);
+        }
+        break;
+    case I64_DADDIU5:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : ((int8_t)(offset << 3)) >> 3;
+        gen_arith_imm(ctx, OPC_DADDIU, ry, ry, offset);
+        break;
+    case I64_DADDIUPC:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 2;
+        gen_addiupc(ctx, ry, offset, 1, extended);
+        break;
+    case I64_DADDIUSP:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        offset = extended ? offset : offset << 2;
+        gen_arith_imm(ctx, OPC_DADDIU, ry, 29, offset);
+        break;
+    }
+}
+#endif
+
+static int decode_extended_mips16_opc(CPUMIPSState *env, DisasContext *ctx)
+{
+    int extend = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
+    int op, rx, ry, funct, sa;
+    int16_t imm, offset;
+
+    ctx->opcode = (ctx->opcode << 16) | extend;
+    op = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 22) & 0x1f;
+    funct = (ctx->opcode >> 8) & 0x7;
+    rx = xlat((ctx->opcode >> 8) & 0x7);
+    ry = xlat((ctx->opcode >> 5) & 0x7);
+    offset = imm = (int16_t) (((ctx->opcode >> 16) & 0x1f) << 11
+                              | ((ctx->opcode >> 21) & 0x3f) << 5
+                              | (ctx->opcode & 0x1f));
+
+    /*
+     * The extended opcodes cleverly reuse the opcodes from their 16-bit
+     * counterparts.
+     */
+    switch (op) {
+    case M16_OPC_ADDIUSP:
+        gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
+        break;
+    case M16_OPC_ADDIUPC:
+        gen_addiupc(ctx, rx, imm, 0, 1);
+        break;
+    case M16_OPC_B:
+        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BEQZ:
+        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BNEQZ:
+        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_SHIFT:
+        switch (ctx->opcode & 0x3) {
+        case 0x0:
+            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
+            break;
+        case 0x1:
+#if defined(TARGET_MIPS64)
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
+#else
+            gen_reserved_instruction(ctx);
+#endif
+            break;
+        case 0x2:
+            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
+            break;
+        case 0x3:
+            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LD, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_RRIA:
+        imm = ctx->opcode & 0xf;
+        imm = imm | ((ctx->opcode >> 20) & 0x7f) << 4;
+        imm = imm | ((ctx->opcode >> 16) & 0xf) << 11;
+        imm = (int16_t) (imm << 1) >> 1;
+        if ((ctx->opcode >> 4) & 0x1) {
+#if defined(TARGET_MIPS64)
+            check_mips_64(ctx);
+            gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
+#else
+            gen_reserved_instruction(ctx);
+#endif
+        } else {
+            gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
+        }
+        break;
+    case M16_OPC_ADDIU8:
+        gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
+        break;
+    case M16_OPC_SLTI:
+        gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
+        break;
+    case M16_OPC_SLTIU:
+        gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
+        break;
+    case M16_OPC_I8:
+        switch (funct) {
+        case I8_BTEQZ:
+            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1, 0);
+            break;
+        case I8_BTNEZ:
+            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1, 0);
+            break;
+        case I8_SWRASP:
+            gen_st(ctx, OPC_SW, 31, 29, imm);
+            break;
+        case I8_ADJSP:
+            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm);
+            break;
+        case I8_SVRS:
+            check_insn(ctx, ISA_MIPS_R1);
+            {
+                int xsregs = (ctx->opcode >> 24) & 0x7;
+                int aregs = (ctx->opcode >> 16) & 0xf;
+                int do_ra = (ctx->opcode >> 6) & 0x1;
+                int do_s0 = (ctx->opcode >> 5) & 0x1;
+                int do_s1 = (ctx->opcode >> 4) & 0x1;
+                int framesize = (((ctx->opcode >> 20) & 0xf) << 4
+                                 | (ctx->opcode & 0xf)) << 3;
+
+                if (ctx->opcode & (1 << 7)) {
+                    gen_mips16_save(ctx, xsregs, aregs,
+                                    do_ra, do_s0, do_s1,
+                                    framesize);
+                } else {
+                    gen_mips16_restore(ctx, xsregs, aregs,
+                                       do_ra, do_s0, do_s1,
+                                       framesize);
+                }
+            }
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case M16_OPC_LI:
+        tcg_gen_movi_tl(cpu_gpr[rx], (uint16_t) imm);
+        break;
+    case M16_OPC_CMPI:
+        tcg_gen_xori_tl(cpu_gpr[24], cpu_gpr[rx], (uint16_t) imm);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_st(ctx, OPC_SD, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_LB:
+        gen_ld(ctx, OPC_LB, ry, rx, offset);
+        break;
+    case M16_OPC_LH:
+        gen_ld(ctx, OPC_LH, ry, rx, offset);
+        break;
+    case M16_OPC_LWSP:
+        gen_ld(ctx, OPC_LW, rx, 29, offset);
+        break;
+    case M16_OPC_LW:
+        gen_ld(ctx, OPC_LW, ry, rx, offset);
+        break;
+    case M16_OPC_LBU:
+        gen_ld(ctx, OPC_LBU, ry, rx, offset);
+        break;
+    case M16_OPC_LHU:
+        gen_ld(ctx, OPC_LHU, ry, rx, offset);
+        break;
+    case M16_OPC_LWPC:
+        gen_ld(ctx, OPC_LWPC, rx, 0, offset);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LWU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LWU, ry, rx, offset);
+        break;
+#endif
+    case M16_OPC_SB:
+        gen_st(ctx, OPC_SB, ry, rx, offset);
+        break;
+    case M16_OPC_SH:
+        gen_st(ctx, OPC_SH, ry, rx, offset);
+        break;
+    case M16_OPC_SWSP:
+        gen_st(ctx, OPC_SW, rx, 29, offset);
+        break;
+    case M16_OPC_SW:
+        gen_st(ctx, OPC_SW, ry, rx, offset);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_I64:
+        decode_i64_mips16(ctx, ry, funct, offset, 1);
+        break;
+#endif
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    return 4;
+}
+
+static int decode_ase_mips16e(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rx, ry;
+    int sa;
+    int op, cnvt_op, op1, offset;
+    int funct;
+    int n_bytes;
+
+    op = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 2) & 0x7;
+    sa = sa == 0 ? 8 : sa;
+    rx = xlat((ctx->opcode >> 8) & 0x7);
+    cnvt_op = (ctx->opcode >> 5) & 0x7;
+    ry = xlat((ctx->opcode >> 5) & 0x7);
+    op1 = offset = ctx->opcode & 0x1f;
+
+    n_bytes = 2;
+
+    switch (op) {
+    case M16_OPC_ADDIUSP:
+        {
+            int16_t imm = ((uint8_t) ctx->opcode) << 2;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
+        }
+        break;
+    case M16_OPC_ADDIUPC:
+        gen_addiupc(ctx, rx, ((uint8_t) ctx->opcode) << 2, 0, 0);
+        break;
+    case M16_OPC_B:
+        offset = (ctx->opcode & 0x7ff) << 1;
+        offset = (int16_t)(offset << 4) >> 4;
+        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_JAL:
+        offset = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
+        offset = (((ctx->opcode & 0x1f) << 21)
+                  | ((ctx->opcode >> 5) & 0x1f) << 16
+                  | offset) << 2;
+        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALX : OPC_JAL;
+        gen_compute_branch(ctx, op, 4, rx, ry, offset, 2);
+        n_bytes = 4;
+        break;
+    case M16_OPC_BEQZ:
+        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0,
+                           ((int8_t)ctx->opcode) << 1, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_BNEQZ:
+        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0,
+                           ((int8_t)ctx->opcode) << 1, 0);
+        /* No delay slot, so just process as a normal instruction */
+        break;
+    case M16_OPC_SHIFT:
+        switch (ctx->opcode & 0x3) {
+        case 0x0:
+            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
+            break;
+        case 0x1:
+#if defined(TARGET_MIPS64)
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
+#else
+            gen_reserved_instruction(ctx);
+#endif
+            break;
+        case 0x2:
+            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
+            break;
+        case 0x3:
+            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LD, ry, rx, offset << 3);
+        break;
+#endif
+    case M16_OPC_RRIA:
+        {
+            int16_t imm = (int8_t)((ctx->opcode & 0xf) << 4) >> 4;
+
+            if ((ctx->opcode >> 4) & 1) {
+#if defined(TARGET_MIPS64)
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
+#else
+                gen_reserved_instruction(ctx);
+#endif
+            } else {
+                gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
+            }
+        }
+        break;
+    case M16_OPC_ADDIU8:
+        {
+            int16_t imm = (int8_t) ctx->opcode;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
+        }
+        break;
+    case M16_OPC_SLTI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
+        }
+        break;
+    case M16_OPC_SLTIU:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
+        }
+        break;
+    case M16_OPC_I8:
+        {
+            int reg32;
+
+            funct = (ctx->opcode >> 8) & 0x7;
+            switch (funct) {
+            case I8_BTEQZ:
+                gen_compute_branch(ctx, OPC_BEQ, 2, 24, 0,
+                                   ((int8_t)ctx->opcode) << 1, 0);
+                break;
+            case I8_BTNEZ:
+                gen_compute_branch(ctx, OPC_BNE, 2, 24, 0,
+                                   ((int8_t)ctx->opcode) << 1, 0);
+                break;
+            case I8_SWRASP:
+                gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
+                break;
+            case I8_ADJSP:
+                gen_arith_imm(ctx, OPC_ADDIU, 29, 29,
+                              ((int8_t)ctx->opcode) << 3);
+                break;
+            case I8_SVRS:
+                check_insn(ctx, ISA_MIPS_R1);
+                {
+                    int do_ra = ctx->opcode & (1 << 6);
+                    int do_s0 = ctx->opcode & (1 << 5);
+                    int do_s1 = ctx->opcode & (1 << 4);
+                    int framesize = ctx->opcode & 0xf;
+
+                    if (framesize == 0) {
+                        framesize = 128;
+                    } else {
+                        framesize = framesize << 3;
+                    }
+
+                    if (ctx->opcode & (1 << 7)) {
+                        gen_mips16_save(ctx, 0, 0,
+                                        do_ra, do_s0, do_s1, framesize);
+                    } else {
+                        gen_mips16_restore(ctx, 0, 0,
+                                           do_ra, do_s0, do_s1, framesize);
+                    }
+                }
+                break;
+            case I8_MOV32R:
+                {
+                    int rz = xlat(ctx->opcode & 0x7);
+
+                    reg32 = (((ctx->opcode >> 3) & 0x3) << 3) |
+                        ((ctx->opcode >> 5) & 0x7);
+                    gen_arith(ctx, OPC_ADDU, reg32, rz, 0);
+                }
+                break;
+            case I8_MOVR32:
+                reg32 = ctx->opcode & 0x1f;
+                gen_arith(ctx, OPC_ADDU, ry, reg32, 0);
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case M16_OPC_LI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+
+            gen_arith_imm(ctx, OPC_ADDIU, rx, 0, imm);
+        }
+        break;
+    case M16_OPC_CMPI:
+        {
+            int16_t imm = (uint8_t) ctx->opcode;
+            gen_logic_imm(ctx, OPC_XORI, 24, rx, imm);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_st(ctx, OPC_SD, ry, rx, offset << 3);
+        break;
+#endif
+    case M16_OPC_LB:
+        gen_ld(ctx, OPC_LB, ry, rx, offset);
+        break;
+    case M16_OPC_LH:
+        gen_ld(ctx, OPC_LH, ry, rx, offset << 1);
+        break;
+    case M16_OPC_LWSP:
+        gen_ld(ctx, OPC_LW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        break;
+    case M16_OPC_LW:
+        gen_ld(ctx, OPC_LW, ry, rx, offset << 2);
+        break;
+    case M16_OPC_LBU:
+        gen_ld(ctx, OPC_LBU, ry, rx, offset);
+        break;
+    case M16_OPC_LHU:
+        gen_ld(ctx, OPC_LHU, ry, rx, offset << 1);
+        break;
+    case M16_OPC_LWPC:
+        gen_ld(ctx, OPC_LWPC, rx, 0, ((uint8_t)ctx->opcode) << 2);
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_LWU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(ctx, OPC_LWU, ry, rx, offset << 2);
+        break;
+#endif
+    case M16_OPC_SB:
+        gen_st(ctx, OPC_SB, ry, rx, offset);
+        break;
+    case M16_OPC_SH:
+        gen_st(ctx, OPC_SH, ry, rx, offset << 1);
+        break;
+    case M16_OPC_SWSP:
+        gen_st(ctx, OPC_SW, rx, 29, ((uint8_t)ctx->opcode) << 2);
+        break;
+    case M16_OPC_SW:
+        gen_st(ctx, OPC_SW, ry, rx, offset << 2);
+        break;
+    case M16_OPC_RRR:
+        {
+            int rz = xlat((ctx->opcode >> 2) & 0x7);
+            int mips32_op;
+
+            switch (ctx->opcode & 0x3) {
+            case RRR_ADDU:
+                mips32_op = OPC_ADDU;
+                break;
+            case RRR_SUBU:
+                mips32_op = OPC_SUBU;
+                break;
+#if defined(TARGET_MIPS64)
+            case RRR_DADDU:
+                mips32_op = OPC_DADDU;
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                break;
+            case RRR_DSUBU:
+                mips32_op = OPC_DSUBU;
+                check_insn(ctx, ISA_MIPS3);
+                check_mips_64(ctx);
+                break;
+#endif
+            default:
+                gen_reserved_instruction(ctx);
+                goto done;
+            }
+
+            gen_arith(ctx, mips32_op, rz, rx, ry);
+        done:
+            ;
+        }
+        break;
+    case M16_OPC_RR:
+        switch (op1) {
+        case RR_JR:
+            {
+                int nd = (ctx->opcode >> 7) & 0x1;
+                int link = (ctx->opcode >> 6) & 0x1;
+                int ra = (ctx->opcode >> 5) & 0x1;
+
+                if (nd) {
+                    check_insn(ctx, ISA_MIPS_R1);
+                }
+
+                if (link) {
+                    op = OPC_JALR;
+                } else {
+                    op = OPC_JR;
+                }
+
+                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0,
+                                   (nd ? 0 : 2));
+            }
+            break;
+        case RR_SDBBP:
+            if (is_uhi(extract32(ctx->opcode, 5, 6))) {
+                gen_helper_do_semihosting(cpu_env);
+            } else {
+                /*
+                 * XXX: not clear which exception should be raised
+                 *      when in debug mode...
+                 */
+                check_insn(ctx, ISA_MIPS_R1);
+                generate_exception_end(ctx, EXCP_DBp);
+            }
+            break;
+        case RR_SLT:
+            gen_slt(ctx, OPC_SLT, 24, rx, ry);
+            break;
+        case RR_SLTU:
+            gen_slt(ctx, OPC_SLTU, 24, rx, ry);
+            break;
+        case RR_BREAK:
+            generate_exception_end(ctx, EXCP_BREAK);
+            break;
+        case RR_SLLV:
+            gen_shift(ctx, OPC_SLLV, ry, rx, ry);
+            break;
+        case RR_SRLV:
+            gen_shift(ctx, OPC_SRLV, ry, rx, ry);
+            break;
+        case RR_SRAV:
+            gen_shift(ctx, OPC_SRAV, ry, rx, ry);
+            break;
+#if defined(TARGET_MIPS64)
+        case RR_DSRL:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSRL, ry, ry, sa);
+            break;
+#endif
+        case RR_CMP:
+            gen_logic(ctx, OPC_XOR, 24, rx, ry);
+            break;
+        case RR_NEG:
+            gen_arith(ctx, OPC_SUBU, rx, 0, ry);
+            break;
+        case RR_AND:
+            gen_logic(ctx, OPC_AND, rx, rx, ry);
+            break;
+        case RR_OR:
+            gen_logic(ctx, OPC_OR, rx, rx, ry);
+            break;
+        case RR_XOR:
+            gen_logic(ctx, OPC_XOR, rx, rx, ry);
+            break;
+        case RR_NOT:
+            gen_logic(ctx, OPC_NOR, rx, ry, 0);
+            break;
+        case RR_MFHI:
+            gen_HILO(ctx, OPC_MFHI, 0, rx);
+            break;
+        case RR_CNVT:
+            check_insn(ctx, ISA_MIPS_R1);
+            switch (cnvt_op) {
+            case RR_RY_CNVT_ZEB:
+                tcg_gen_ext8u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_ZEH:
+                tcg_gen_ext16u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEB:
+                tcg_gen_ext8s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEH:
+                tcg_gen_ext16s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+#if defined(TARGET_MIPS64)
+            case RR_RY_CNVT_ZEW:
+                check_insn(ctx, ISA_MIPS_R1);
+                check_mips_64(ctx);
+                tcg_gen_ext32u_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+            case RR_RY_CNVT_SEW:
+                check_insn(ctx, ISA_MIPS_R1);
+                check_mips_64(ctx);
+                tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+                break;
+#endif
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        case RR_MFLO:
+            gen_HILO(ctx, OPC_MFLO, 0, rx);
+            break;
+#if defined(TARGET_MIPS64)
+        case RR_DSRA:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, OPC_DSRA, ry, ry, sa);
+            break;
+        case RR_DSLLV:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSLLV, ry, rx, ry);
+            break;
+        case RR_DSRLV:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSRLV, ry, rx, ry);
+            break;
+        case RR_DSRAV:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift(ctx, OPC_DSRAV, ry, rx, ry);
+            break;
+#endif
+        case RR_MULT:
+            gen_muldiv(ctx, OPC_MULT, 0, rx, ry);
+            break;
+        case RR_MULTU:
+            gen_muldiv(ctx, OPC_MULTU, 0, rx, ry);
+            break;
+        case RR_DIV:
+            gen_muldiv(ctx, OPC_DIV, 0, rx, ry);
+            break;
+        case RR_DIVU:
+            gen_muldiv(ctx, OPC_DIVU, 0, rx, ry);
+            break;
+#if defined(TARGET_MIPS64)
+        case RR_DMULT:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DMULT, 0, rx, ry);
+            break;
+        case RR_DMULTU:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DMULTU, 0, rx, ry);
+            break;
+        case RR_DDIV:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DDIV, 0, rx, ry);
+            break;
+        case RR_DDIVU:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_muldiv(ctx, OPC_DDIVU, 0, rx, ry);
+            break;
+#endif
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case M16_OPC_EXTEND:
+        decode_extended_mips16_opc(env, ctx);
+        n_bytes = 4;
+        break;
+#if defined(TARGET_MIPS64)
+    case M16_OPC_I64:
+        funct = (ctx->opcode >> 8) & 0x7;
+        decode_i64_mips16(ctx, ry, funct, offset, 0);
+        break;
+#endif
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    return n_bytes;
+}
diff --git a/target/mips/tcg/msa.decode b/target/mips/tcg/msa.decode
new file mode 100644
index 00000000000..95752891956
--- /dev/null
+++ b/target/mips/tcg/msa.decode
@@ -0,0 +1,258 @@
+# MIPS SIMD Architecture Module instruction set
+#
+# Copyright (C) 2020  Philippe Mathieu-Daudé
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Reference:
+#       MIPS Architecture for Programmers Volume IV-j
+#       - The MIPS32 SIMD Architecture Module, Revision 1.12
+#         (Document Number: MD00866-2B-MSA32-AFP-01.12)
+#       - The MIPS64 SIMD Architecture Module, Revision 1.12
+#         (Document Number: MD00868-1D-MSA64-AFP-01.12)
+
+&r                  rs rt rd sa
+
+&msa_r              df  wd ws wt
+&msa_bz             df        wt sa
+&msa_ldi            df  wd       sa
+&msa_i              df  wd ws    sa
+&msa_bit            df  wd ws       m
+&msa_elm_df         df  wd ws       n
+&msa_elm                wd ws
+
+%elm_df             16:6 !function=elm_df
+%elm_n              16:6 !function=elm_n
+%bit_df             16:7 !function=bit_df
+%bit_m              16:7 !function=bit_m
+%2r_df_w            16:1 !function=plus_2
+%3r_df_h            21:1 !function=plus_1
+%3r_df_w            21:1 !function=plus_2
+
+@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &r
+@ldst               ...... sa:s10 ws:5 wd:5 .... df:2       &msa_i
+@bz_v               ...... ... ..    wt:5 sa:16             &msa_bz df=3
+@bz                 ...... ...  df:2 wt:5 sa:16             &msa_bz
+@elm_df             ...... .... ......    ws:5 wd:5 ......  &msa_elm_df df=%elm_df n=%elm_n
+@elm                ...... ..........     ws:5 wd:5 ......  &msa_elm
+@vec                ...... .....     wt:5 ws:5 wd:5 ......  &msa_r df=0
+@2r                 ...... ........  df:2 ws:5 wd:5 ......  &msa_r wt=0
+@2rf                ...... ......... .    ws:5 wd:5 ......  &msa_r wt=0 df=%2r_df_w
+@3r                 ...... ...  df:2 wt:5 ws:5 wd:5 ......  &msa_r
+@3rf_h              ...... .... .    wt:5 ws:5 wd:5 ......  &msa_r df=%3r_df_h
+@3rf_w              ...... .... .    wt:5 ws:5 wd:5 ......  &msa_r df=%3r_df_w
+@u5                 ...... ... df:2 sa:5  ws:5 wd:5 ......  &msa_i
+@s5                 ...... ... df:2 sa:s5 ws:5 wd:5 ......  &msa_i
+@i8_df              ......     df:2 sa:s8 ws:5 wd:5 ......  &msa_i
+@i8                 ...... ..       sa:s8 ws:5 wd:5 ......  &msa_i df=0
+@ldi                ...... ... df:2 sa:s10     wd:5 ......  &msa_ldi
+@bit                ...... ... .......    ws:5 wd:5 ......  &msa_bit df=%bit_df m=%bit_m
+
+LSA                 000000 ..... ..... ..... 000 .. 000101  @lsa
+DLSA                000000 ..... ..... ..... 000 .. 010101  @lsa
+
+BZ_V                010001 01011  ..... ................    @bz_v
+BNZ_V               010001 01111  ..... ................    @bz_v
+BZ                  010001 110 .. ..... ................    @bz
+BNZ                 010001 111 .. ..... ................    @bz
+
+ANDI                011110 00 ........ ..... .....  000000  @i8
+ORI                 011110 01 ........ ..... .....  000000  @i8
+NORI                011110 10 ........ ..... .....  000000  @i8
+XORI                011110 11 ........ ..... .....  000000  @i8
+BMNZI               011110 00 ........ ..... .....  000001  @i8
+BMZI                011110 01 ........ ..... .....  000001  @i8
+BSELI               011110 10 ........ ..... .....  000001  @i8
+SHF                 011110 .. ........ ..... .....  000010  @i8_df
+
+ADDVI               011110 000 .. ..... ..... ..... 000110  @u5
+SUBVI               011110 001 .. ..... ..... ..... 000110  @u5
+MAXI_S              011110 010 .. ..... ..... ..... 000110  @s5
+MAXI_U              011110 011 .. ..... ..... ..... 000110  @u5
+MINI_S              011110 100 .. ..... ..... ..... 000110  @s5
+MINI_U              011110 101 .. ..... ..... ..... 000110  @u5
+
+CEQI                011110 000 .. ..... ..... ..... 000111  @s5
+CLTI_S              011110 010 .. ..... ..... ..... 000111  @s5
+CLTI_U              011110 011 .. ..... ..... ..... 000111  @u5
+CLEI_S              011110 100 .. ..... ..... ..... 000111  @s5
+CLEI_U              011110 101 .. ..... ..... ..... 000111  @u5
+
+LDI                 011110 110 .. ..........  ..... 000111  @ldi
+
+SLLI                011110 000 ....... ..... .....  001001  @bit
+SRAI                011110 001 ....... ..... .....  001001  @bit
+SRLI                011110 010 ....... ..... .....  001001  @bit
+BCLRI               011110 011 ....... ..... .....  001001  @bit
+BSETI               011110 100 ....... ..... .....  001001  @bit
+BNEGI               011110 101 ....... ..... .....  001001  @bit
+BINSLI              011110 110 ....... ..... .....  001001  @bit
+BINSRI              011110 111 ....... ..... .....  001001  @bit
+
+SAT_S               011110 000 ....... ..... .....  001010  @bit
+SAT_U               011110 001 ....... ..... .....  001010  @bit
+SRARI               011110 010 ....... ..... .....  001010  @bit
+SRLRI               011110 011 ....... ..... .....  001010  @bit
+
+SLL                 011110 000.. ..... ..... .....  001101  @3r
+SRA                 011110 001.. ..... ..... .....  001101  @3r
+SRL                 011110 010.. ..... ..... .....  001101  @3r
+BCLR                011110 011.. ..... ..... .....  001101  @3r
+BSET                011110 100.. ..... ..... .....  001101  @3r
+BNEG                011110 101.. ..... ..... .....  001101  @3r
+BINSL               011110 110.. ..... ..... .....  001101  @3r
+BINSR               011110 111.. ..... ..... .....  001101  @3r
+
+ADDV                011110 000.. ..... ..... .....  001110  @3r
+SUBV                011110 001.. ..... ..... .....  001110  @3r
+MAX_S               011110 010.. ..... ..... .....  001110  @3r
+MAX_U               011110 011.. ..... ..... .....  001110  @3r
+MIN_S               011110 100.. ..... ..... .....  001110  @3r
+MIN_U               011110 101.. ..... ..... .....  001110  @3r
+MAX_A               011110 110.. ..... ..... .....  001110  @3r
+MIN_A               011110 111.. ..... ..... .....  001110  @3r
+
+CEQ                 011110 000.. ..... ..... .....  001111  @3r
+CLT_S               011110 010.. ..... ..... .....  001111  @3r
+CLT_U               011110 011.. ..... ..... .....  001111  @3r
+CLE_S               011110 100.. ..... ..... .....  001111  @3r
+CLE_U               011110 101.. ..... ..... .....  001111  @3r
+
+ADD_A               011110 000.. ..... ..... .....  010000  @3r
+ADDS_A              011110 001.. ..... ..... .....  010000  @3r
+ADDS_S              011110 010.. ..... ..... .....  010000  @3r
+ADDS_U              011110 011.. ..... ..... .....  010000  @3r
+AVE_S               011110 100.. ..... ..... .....  010000  @3r
+AVE_U               011110 101.. ..... ..... .....  010000  @3r
+AVER_S              011110 110.. ..... ..... .....  010000  @3r
+AVER_U              011110 111.. ..... ..... .....  010000  @3r
+
+SUBS_S              011110 000.. ..... ..... .....  010001  @3r
+SUBS_U              011110 001.. ..... ..... .....  010001  @3r
+SUBSUS_U            011110 010.. ..... ..... .....  010001  @3r
+SUBSUU_S            011110 011.. ..... ..... .....  010001  @3r
+ASUB_S              011110 100.. ..... ..... .....  010001  @3r
+ASUB_U              011110 101.. ..... ..... .....  010001  @3r
+
+MULV                011110 000.. ..... ..... .....  010010  @3r
+MADDV               011110 001.. ..... ..... .....  010010  @3r
+MSUBV               011110 010.. ..... ..... .....  010010  @3r
+DIV_S               011110 100.. ..... ..... .....  010010  @3r
+DIV_U               011110 101.. ..... ..... .....  010010  @3r
+MOD_S               011110 110.. ..... ..... .....  010010  @3r
+MOD_U               011110 111.. ..... ..... .....  010010  @3r
+
+DOTP_S              011110 000.. ..... ..... .....  010011  @3r
+DOTP_U              011110 001.. ..... ..... .....  010011  @3r
+DPADD_S             011110 010.. ..... ..... .....  010011  @3r
+DPADD_U             011110 011.. ..... ..... .....  010011  @3r
+DPSUB_S             011110 100.. ..... ..... .....  010011  @3r
+DPSUB_U             011110 101.. ..... ..... .....  010011  @3r
+
+SLD                 011110 000 .. ..... ..... ..... 010100  @3r
+SPLAT               011110 001 .. ..... ..... ..... 010100  @3r
+PCKEV               011110 010 .. ..... ..... ..... 010100  @3r
+PCKOD               011110 011 .. ..... ..... ..... 010100  @3r
+ILVL                011110 100 .. ..... ..... ..... 010100  @3r
+ILVR                011110 101 .. ..... ..... ..... 010100  @3r
+ILVEV               011110 110 .. ..... ..... ..... 010100  @3r
+ILVOD               011110 111 .. ..... ..... ..... 010100  @3r
+
+VSHF                011110 000 .. ..... ..... ..... 010101  @3r
+SRAR                011110 001 .. ..... ..... ..... 010101  @3r
+SRLR                011110 010 .. ..... ..... ..... 010101  @3r
+HADD_S              011110 100.. ..... ..... .....  010101  @3r
+HADD_U              011110 101.. ..... ..... .....  010101  @3r
+HSUB_S              011110 110.. ..... ..... .....  010101  @3r
+HSUB_U              011110 111.. ..... ..... .....  010101  @3r
+
+{
+  CTCMSA            011110 0000111110  ..... .....  011001  @elm
+  SLDI              011110 0000 ...... ..... .....  011001  @elm_df
+}
+{
+  CFCMSA            011110 0001111110  ..... .....  011001  @elm
+  SPLATI            011110 0001 ...... ..... .....  011001  @elm_df
+}
+{
+  MOVE_V            011110 0010111110  ..... .....  011001  @elm
+  COPY_S            011110 0010 ...... ..... .....  011001  @elm_df
+}
+COPY_U              011110 0011 ...... ..... .....  011001  @elm_df
+INSERT              011110 0100 ...... ..... .....  011001  @elm_df
+INSVE               011110 0101 ...... ..... .....  011001  @elm_df
+
+FCAF                011110 0000 . ..... ..... ..... 011010  @3rf_w
+FCUN                011110 0001 . ..... ..... ..... 011010  @3rf_w
+FCEQ                011110 0010 . ..... ..... ..... 011010  @3rf_w
+FCUEQ               011110 0011 . ..... ..... ..... 011010  @3rf_w
+FCLT                011110 0100 . ..... ..... ..... 011010  @3rf_w
+FCULT               011110 0101 . ..... ..... ..... 011010  @3rf_w
+FCLE                011110 0110 . ..... ..... ..... 011010  @3rf_w
+FCULE               011110 0111 . ..... ..... ..... 011010  @3rf_w
+FSAF                011110 1000 . ..... ..... ..... 011010  @3rf_w
+FSUN                011110 1001 . ..... ..... ..... 011010  @3rf_w
+FSEQ                011110 1010 . ..... ..... ..... 011010  @3rf_w
+FSUEQ               011110 1011 . ..... ..... ..... 011010  @3rf_w
+FSLT                011110 1100 . ..... ..... ..... 011010  @3rf_w
+FSULT               011110 1101 . ..... ..... ..... 011010  @3rf_w
+FSLE                011110 1110 . ..... ..... ..... 011010  @3rf_w
+FSULE               011110 1111 . ..... ..... ..... 011010  @3rf_w
+
+FADD                011110 0000 . ..... ..... ..... 011011  @3rf_w
+FSUB                011110 0001 . ..... ..... ..... 011011  @3rf_w
+FMUL                011110 0010 . ..... ..... ..... 011011  @3rf_w
+FDIV                011110 0011 . ..... ..... ..... 011011  @3rf_w
+FMADD               011110 0100 . ..... ..... ..... 011011  @3rf_w
+FMSUB               011110 0101 . ..... ..... ..... 011011  @3rf_w
+FEXP2               011110 0111 . ..... ..... ..... 011011  @3rf_w
+FEXDO               011110 1000 . ..... ..... ..... 011011  @3rf_w
+FTQ                 011110 1010 . ..... ..... ..... 011011  @3rf_w
+FMIN                011110 1100 . ..... ..... ..... 011011  @3rf_w
+FMIN_A              011110 1101 . ..... ..... ..... 011011  @3rf_w
+FMAX                011110 1110 . ..... ..... ..... 011011  @3rf_w
+FMAX_A              011110 1111 . ..... ..... ..... 011011  @3rf_w
+
+FCOR                011110 0001 . ..... ..... ..... 011100  @3rf_w
+FCUNE               011110 0010 . ..... ..... ..... 011100  @3rf_w
+FCNE                011110 0011 . ..... ..... ..... 011100  @3rf_w
+MUL_Q               011110 0100 . ..... ..... ..... 011100  @3rf_h
+MADD_Q              011110 0101 . ..... ..... ..... 011100  @3rf_h
+MSUB_Q              011110 0110 . ..... ..... ..... 011100  @3rf_h
+FSOR                011110 1001 . ..... ..... ..... 011100  @3rf_w
+FSUNE               011110 1010 . ..... ..... ..... 011100  @3rf_w
+FSNE                011110 1011 . ..... ..... ..... 011100  @3rf_w
+MULR_Q              011110 1100 . ..... ..... ..... 011100  @3rf_h
+MADDR_Q             011110 1101 . ..... ..... ..... 011100  @3rf_h
+MSUBR_Q             011110 1110 . ..... ..... ..... 011100  @3rf_h
+
+AND_V               011110 00000 ..... ..... .....  011110  @vec
+OR_V                011110 00001 ..... ..... .....  011110  @vec
+NOR_V               011110 00010 ..... ..... .....  011110  @vec
+XOR_V               011110 00011 ..... ..... .....  011110  @vec
+BMNZ_V              011110 00100 ..... ..... .....  011110  @vec
+BMZ_V               011110 00101 ..... ..... .....  011110  @vec
+BSEL_V              011110 00110 ..... ..... .....  011110  @vec
+FILL                011110 11000000 .. ..... .....  011110  @2r
+PCNT                011110 11000001 .. ..... .....  011110  @2r
+NLOC                011110 11000010 .. ..... .....  011110  @2r
+NLZC                011110 11000011 .. ..... .....  011110  @2r
+FCLASS              011110 110010000 . ..... .....  011110  @2rf
+FTRUNC_S            011110 110010001 . ..... .....  011110  @2rf
+FTRUNC_U            011110 110010010 . ..... .....  011110  @2rf
+FSQRT               011110 110010011 . ..... .....  011110  @2rf
+FRSQRT              011110 110010100 . ..... .....  011110  @2rf
+FRCP                011110 110010101 . ..... .....  011110  @2rf
+FRINT               011110 110010110 . ..... .....  011110  @2rf
+FLOG2               011110 110010111 . ..... .....  011110  @2rf
+FEXUPL              011110 110011000 . ..... .....  011110  @2rf
+FEXUPR              011110 110011001 . ..... .....  011110  @2rf
+FFQL                011110 110011010 . ..... .....  011110  @2rf
+FFQR                011110 110011011 . ..... .....  011110  @2rf
+FTINT_S             011110 110011100 . ..... .....  011110  @2rf
+FTINT_U             011110 110011101 . ..... .....  011110  @2rf
+FFINT_S             011110 110011110 . ..... .....  011110  @2rf
+FFINT_U             011110 110011111 . ..... .....  011110  @2rf
+
+LD                  011110 .......... ..... .....   1000 .. @ldst
+ST                  011110 .......... ..... .....   1001 .. @ldst
diff --git a/target/mips/msa_helper.c b/target/mips/tcg/msa_helper.c
similarity index 93%
rename from target/mips/msa_helper.c
rename to target/mips/tcg/msa_helper.c
index 4caefe29ad7..5667b1f0a15 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/tcg/msa_helper.c
@@ -3231,22 +3231,22 @@ void helper_msa_maddv_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->b[0]  = msa_maddv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
-    pwd->b[1]  = msa_maddv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
-    pwd->b[2]  = msa_maddv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
-    pwd->b[3]  = msa_maddv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
-    pwd->b[4]  = msa_maddv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
-    pwd->b[5]  = msa_maddv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
-    pwd->b[6]  = msa_maddv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
-    pwd->b[7]  = msa_maddv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
-    pwd->b[8]  = msa_maddv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
-    pwd->b[9]  = msa_maddv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
-    pwd->b[10] = msa_maddv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
-    pwd->b[11] = msa_maddv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
-    pwd->b[12] = msa_maddv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
-    pwd->b[13] = msa_maddv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
-    pwd->b[14] = msa_maddv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
-    pwd->b[15] = msa_maddv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
+    pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
 }
 
 void helper_msa_maddv_h(CPUMIPSState *env,
@@ -3303,22 +3303,22 @@ void helper_msa_msubv_b(CPUMIPSState *env,
     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 
-    pwd->b[0]  = msa_msubv_df(DF_BYTE, pwt->b[0],  pws->b[0],  pwt->b[0]);
-    pwd->b[1]  = msa_msubv_df(DF_BYTE, pwt->b[1],  pws->b[1],  pwt->b[1]);
-    pwd->b[2]  = msa_msubv_df(DF_BYTE, pwt->b[2],  pws->b[2],  pwt->b[2]);
-    pwd->b[3]  = msa_msubv_df(DF_BYTE, pwt->b[3],  pws->b[3],  pwt->b[3]);
-    pwd->b[4]  = msa_msubv_df(DF_BYTE, pwt->b[4],  pws->b[4],  pwt->b[4]);
-    pwd->b[5]  = msa_msubv_df(DF_BYTE, pwt->b[5],  pws->b[5],  pwt->b[5]);
-    pwd->b[6]  = msa_msubv_df(DF_BYTE, pwt->b[6],  pws->b[6],  pwt->b[6]);
-    pwd->b[7]  = msa_msubv_df(DF_BYTE, pwt->b[7],  pws->b[7],  pwt->b[7]);
-    pwd->b[8]  = msa_msubv_df(DF_BYTE, pwt->b[8],  pws->b[8],  pwt->b[8]);
-    pwd->b[9]  = msa_msubv_df(DF_BYTE, pwt->b[9],  pws->b[9],  pwt->b[9]);
-    pwd->b[10] = msa_msubv_df(DF_BYTE, pwt->b[10], pws->b[10], pwt->b[10]);
-    pwd->b[11] = msa_msubv_df(DF_BYTE, pwt->b[11], pws->b[11], pwt->b[11]);
-    pwd->b[12] = msa_msubv_df(DF_BYTE, pwt->b[12], pws->b[12], pwt->b[12]);
-    pwd->b[13] = msa_msubv_df(DF_BYTE, pwt->b[13], pws->b[13], pwt->b[13]);
-    pwd->b[14] = msa_msubv_df(DF_BYTE, pwt->b[14], pws->b[14], pwt->b[14]);
-    pwd->b[15] = msa_msubv_df(DF_BYTE, pwt->b[15], pws->b[15], pwt->b[15]);
+    pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
+    pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
+    pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
+    pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
+    pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
+    pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
+    pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
+    pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
+    pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
+    pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
+    pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
+    pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
+    pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
+    pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
+    pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
+    pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
 }
 
 void helper_msa_msubv_h(CPUMIPSState *env,
@@ -8211,185 +8211,93 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
 
 #if !defined(CONFIG_USER_ONLY)
-#define MEMOP_IDX(DF)                                           \
-        TCGMemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,  \
-                                        cpu_mmu_index(env, false));
+#define MEMOP_IDX(DF)                                                   \
+    MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
+                                 cpu_mmu_index(env, false));
 #else
 #define MEMOP_IDX(DF)
 #endif
 
+#ifdef TARGET_WORDS_BIGENDIAN
+static inline uint64_t bswap16x4(uint64_t x)
+{
+    uint64_t m = 0x00ff00ff00ff00ffull;
+    return ((x & m) << 8) | ((x >> 8) & m);
+}
+
+static inline uint64_t bswap32x2(uint64_t x)
+{
+    return ror64(bswap64(x), 32);
+}
+#endif
+
 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    MEMOP_IDX(DF_BYTE)
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
-    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
-    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
-    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
-    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
-    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
-    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
-    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
-    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
-    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
-    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
-    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
-    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
-    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
-    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
-    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
-#else
-    pwd->b[0]  = helper_ret_ldub_mmu(env, addr + (7  << DF_BYTE), oi, GETPC());
-    pwd->b[1]  = helper_ret_ldub_mmu(env, addr + (6  << DF_BYTE), oi, GETPC());
-    pwd->b[2]  = helper_ret_ldub_mmu(env, addr + (5  << DF_BYTE), oi, GETPC());
-    pwd->b[3]  = helper_ret_ldub_mmu(env, addr + (4  << DF_BYTE), oi, GETPC());
-    pwd->b[4]  = helper_ret_ldub_mmu(env, addr + (3  << DF_BYTE), oi, GETPC());
-    pwd->b[5]  = helper_ret_ldub_mmu(env, addr + (2  << DF_BYTE), oi, GETPC());
-    pwd->b[6]  = helper_ret_ldub_mmu(env, addr + (1  << DF_BYTE), oi, GETPC());
-    pwd->b[7]  = helper_ret_ldub_mmu(env, addr + (0  << DF_BYTE), oi, GETPC());
-    pwd->b[8]  = helper_ret_ldub_mmu(env, addr + (15 << DF_BYTE), oi, GETPC());
-    pwd->b[9]  = helper_ret_ldub_mmu(env, addr + (14 << DF_BYTE), oi, GETPC());
-    pwd->b[10] = helper_ret_ldub_mmu(env, addr + (13 << DF_BYTE), oi, GETPC());
-    pwd->b[11] = helper_ret_ldub_mmu(env, addr + (12 << DF_BYTE), oi, GETPC());
-    pwd->b[12] = helper_ret_ldub_mmu(env, addr + (11 << DF_BYTE), oi, GETPC());
-    pwd->b[13] = helper_ret_ldub_mmu(env, addr + (10 << DF_BYTE), oi, GETPC());
-    pwd->b[14] = helper_ret_ldub_mmu(env, addr + (9  << DF_BYTE), oi, GETPC());
-    pwd->b[15] = helper_ret_ldub_mmu(env, addr + (8  << DF_BYTE), oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->b[0]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
-    pwd->b[1]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
-    pwd->b[2]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
-    pwd->b[3]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
-    pwd->b[4]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
-    pwd->b[5]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
-    pwd->b[6]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
-    pwd->b[7]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
-    pwd->b[8]  = cpu_ldub_data(env, addr + (8  << DF_BYTE));
-    pwd->b[9]  = cpu_ldub_data(env, addr + (9  << DF_BYTE));
-    pwd->b[10] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
-    pwd->b[11] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
-    pwd->b[12] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
-    pwd->b[13] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
-    pwd->b[14] = cpu_ldub_data(env, addr + (14 << DF_BYTE));
-    pwd->b[15] = cpu_ldub_data(env, addr + (15 << DF_BYTE));
-#else
-    pwd->b[0]  = cpu_ldub_data(env, addr + (7  << DF_BYTE));
-    pwd->b[1]  = cpu_ldub_data(env, addr + (6  << DF_BYTE));
-    pwd->b[2]  = cpu_ldub_data(env, addr + (5  << DF_BYTE));
-    pwd->b[3]  = cpu_ldub_data(env, addr + (4  << DF_BYTE));
-    pwd->b[4]  = cpu_ldub_data(env, addr + (3  << DF_BYTE));
-    pwd->b[5]  = cpu_ldub_data(env, addr + (2  << DF_BYTE));
-    pwd->b[6]  = cpu_ldub_data(env, addr + (1  << DF_BYTE));
-    pwd->b[7]  = cpu_ldub_data(env, addr + (0  << DF_BYTE));
-    pwd->b[8]  = cpu_ldub_data(env, addr + (15 << DF_BYTE));
-    pwd->b[9]  = cpu_ldub_data(env, addr + (14 << DF_BYTE));
-    pwd->b[10] = cpu_ldub_data(env, addr + (13 << DF_BYTE));
-    pwd->b[11] = cpu_ldub_data(env, addr + (12 << DF_BYTE));
-    pwd->b[12] = cpu_ldub_data(env, addr + (11 << DF_BYTE));
-    pwd->b[13] = cpu_ldub_data(env, addr + (10 << DF_BYTE));
-    pwd->b[14] = cpu_ldub_data(env, addr + (9 << DF_BYTE));
-    pwd->b[15] = cpu_ldub_data(env, addr + (8 << DF_BYTE));
-#endif
-#endif
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
+
+    /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
+    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
+    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
+    pwd->d[0] = d0;
+    pwd->d[1] = d1;
 }
 
 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    MEMOP_IDX(DF_HALF)
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
-    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
-    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
-    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
-    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
-    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
-    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
-    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
-#else
-    pwd->h[0] = helper_ret_lduw_mmu(env, addr + (3 << DF_HALF), oi, GETPC());
-    pwd->h[1] = helper_ret_lduw_mmu(env, addr + (2 << DF_HALF), oi, GETPC());
-    pwd->h[2] = helper_ret_lduw_mmu(env, addr + (1 << DF_HALF), oi, GETPC());
-    pwd->h[3] = helper_ret_lduw_mmu(env, addr + (0 << DF_HALF), oi, GETPC());
-    pwd->h[4] = helper_ret_lduw_mmu(env, addr + (7 << DF_HALF), oi, GETPC());
-    pwd->h[5] = helper_ret_lduw_mmu(env, addr + (6 << DF_HALF), oi, GETPC());
-    pwd->h[6] = helper_ret_lduw_mmu(env, addr + (5 << DF_HALF), oi, GETPC());
-    pwd->h[7] = helper_ret_lduw_mmu(env, addr + (4 << DF_HALF), oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->h[0] = cpu_lduw_data(env, addr + (0 << DF_HALF));
-    pwd->h[1] = cpu_lduw_data(env, addr + (1 << DF_HALF));
-    pwd->h[2] = cpu_lduw_data(env, addr + (2 << DF_HALF));
-    pwd->h[3] = cpu_lduw_data(env, addr + (3 << DF_HALF));
-    pwd->h[4] = cpu_lduw_data(env, addr + (4 << DF_HALF));
-    pwd->h[5] = cpu_lduw_data(env, addr + (5 << DF_HALF));
-    pwd->h[6] = cpu_lduw_data(env, addr + (6 << DF_HALF));
-    pwd->h[7] = cpu_lduw_data(env, addr + (7 << DF_HALF));
-#else
-    pwd->h[0] = cpu_lduw_data(env, addr + (3 << DF_HALF));
-    pwd->h[1] = cpu_lduw_data(env, addr + (2 << DF_HALF));
-    pwd->h[2] = cpu_lduw_data(env, addr + (1 << DF_HALF));
-    pwd->h[3] = cpu_lduw_data(env, addr + (0 << DF_HALF));
-    pwd->h[4] = cpu_lduw_data(env, addr + (7 << DF_HALF));
-    pwd->h[5] = cpu_lduw_data(env, addr + (6 << DF_HALF));
-    pwd->h[6] = cpu_lduw_data(env, addr + (5 << DF_HALF));
-    pwd->h[7] = cpu_lduw_data(env, addr + (4 << DF_HALF));
-#endif
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
+
+    /*
+     * Load 8 bytes at a time.  Use little-endian load, then for
+     * big-endian target, we must then swap the four halfwords.
+     */
+    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
+    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
+#ifdef TARGET_WORDS_BIGENDIAN
+    d0 = bswap16x4(d0);
+    d1 = bswap16x4(d1);
 #endif
+    pwd->d[0] = d0;
+    pwd->d[1] = d1;
 }
 
 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    MEMOP_IDX(DF_WORD)
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
-    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
-    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
-    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
-#else
-    pwd->w[0] = helper_ret_ldul_mmu(env, addr + (1 << DF_WORD), oi, GETPC());
-    pwd->w[1] = helper_ret_ldul_mmu(env, addr + (0 << DF_WORD), oi, GETPC());
-    pwd->w[2] = helper_ret_ldul_mmu(env, addr + (3 << DF_WORD), oi, GETPC());
-    pwd->w[3] = helper_ret_ldul_mmu(env, addr + (2 << DF_WORD), oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    pwd->w[0] = cpu_ldl_data(env, addr + (0 << DF_WORD));
-    pwd->w[1] = cpu_ldl_data(env, addr + (1 << DF_WORD));
-    pwd->w[2] = cpu_ldl_data(env, addr + (2 << DF_WORD));
-    pwd->w[3] = cpu_ldl_data(env, addr + (3 << DF_WORD));
-#else
-    pwd->w[0] = cpu_ldl_data(env, addr + (1 << DF_WORD));
-    pwd->w[1] = cpu_ldl_data(env, addr + (0 << DF_WORD));
-    pwd->w[2] = cpu_ldl_data(env, addr + (3 << DF_WORD));
-    pwd->w[3] = cpu_ldl_data(env, addr + (2 << DF_WORD));
-#endif
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
+
+    /*
+     * Load 8 bytes at a time.  Use little-endian load, then for
+     * big-endian target, we must then bswap the two words.
+     */
+    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
+    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
+#ifdef TARGET_WORDS_BIGENDIAN
+    d0 = bswap32x2(d0);
+    d1 = bswap32x2(d1);
 #endif
+    pwd->d[0] = d0;
+    pwd->d[1] = d1;
 }
 
 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
                      target_ulong addr)
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    MEMOP_IDX(DF_DOUBLE)
-#if !defined(CONFIG_USER_ONLY)
-    pwd->d[0] = helper_ret_ldq_mmu(env, addr + (0 << DF_DOUBLE), oi, GETPC());
-    pwd->d[1] = helper_ret_ldq_mmu(env, addr + (1 << DF_DOUBLE), oi, GETPC());
-#else
-    pwd->d[0] = cpu_ldq_data(env, addr + (0 << DF_DOUBLE));
-    pwd->d[1] = cpu_ldq_data(env, addr + (1 << DF_DOUBLE));
-#endif
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
+
+    d0 = cpu_ldq_data_ra(env, addr + 0, ra);
+    d1 = cpu_ldq_data_ra(env, addr + 8, ra);
+    pwd->d[0] = d0;
+    pwd->d[1] = d1;
 }
 
 #define MSA_PAGESPAN(x) \
@@ -8415,82 +8323,13 @@ void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
 
-    MEMOP_IDX(DF_BYTE)
-    ensure_writable_pages(env, addr, mmu_idx, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[0],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[1],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[2],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[3],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[4],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[5],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[6],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[7],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[8],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[9],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[10], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[11], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[12], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[13], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[14], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[15], oi, GETPC());
-#else
-    helper_ret_stb_mmu(env, addr + (7  << DF_BYTE), pwd->b[0],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (6  << DF_BYTE), pwd->b[1],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (5  << DF_BYTE), pwd->b[2],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (4  << DF_BYTE), pwd->b[3],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (3  << DF_BYTE), pwd->b[4],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (2  << DF_BYTE), pwd->b[5],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (1  << DF_BYTE), pwd->b[6],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (0  << DF_BYTE), pwd->b[7],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (15 << DF_BYTE), pwd->b[8],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (14 << DF_BYTE), pwd->b[9],  oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (13 << DF_BYTE), pwd->b[10], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (12 << DF_BYTE), pwd->b[11], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (11 << DF_BYTE), pwd->b[12], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (10 << DF_BYTE), pwd->b[13], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (9  << DF_BYTE), pwd->b[14], oi, GETPC());
-    helper_ret_stb_mmu(env, addr + (8  << DF_BYTE), pwd->b[15], oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[0]);
-    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[1]);
-    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[2]);
-    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[3]);
-    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[4]);
-    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[5]);
-    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[6]);
-    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[7]);
-    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[8]);
-    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[9]);
-    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[10]);
-    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[11]);
-    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[12]);
-    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[13]);
-    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[14]);
-    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[15]);
-#else
-    cpu_stb_data(env, addr + (7  << DF_BYTE), pwd->b[0]);
-    cpu_stb_data(env, addr + (6  << DF_BYTE), pwd->b[1]);
-    cpu_stb_data(env, addr + (5  << DF_BYTE), pwd->b[2]);
-    cpu_stb_data(env, addr + (4  << DF_BYTE), pwd->b[3]);
-    cpu_stb_data(env, addr + (3  << DF_BYTE), pwd->b[4]);
-    cpu_stb_data(env, addr + (2  << DF_BYTE), pwd->b[5]);
-    cpu_stb_data(env, addr + (1  << DF_BYTE), pwd->b[6]);
-    cpu_stb_data(env, addr + (0  << DF_BYTE), pwd->b[7]);
-    cpu_stb_data(env, addr + (15 << DF_BYTE), pwd->b[8]);
-    cpu_stb_data(env, addr + (14 << DF_BYTE), pwd->b[9]);
-    cpu_stb_data(env, addr + (13 << DF_BYTE), pwd->b[10]);
-    cpu_stb_data(env, addr + (12 << DF_BYTE), pwd->b[11]);
-    cpu_stb_data(env, addr + (11 << DF_BYTE), pwd->b[12]);
-    cpu_stb_data(env, addr + (10 << DF_BYTE), pwd->b[13]);
-    cpu_stb_data(env, addr + (9  << DF_BYTE), pwd->b[14]);
-    cpu_stb_data(env, addr + (8  << DF_BYTE), pwd->b[15]);
-#endif
-#endif
+    ensure_writable_pages(env, addr, mmu_idx, ra);
+
+    /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
+    cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
+    cpu_stq_le_data_ra(env, addr + 0, pwd->d[1], ra);
 }
 
 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
@@ -8498,50 +8337,20 @@ void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
 
-    MEMOP_IDX(DF_HALF)
-    ensure_writable_pages(env, addr, mmu_idx, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[0], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[1], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[2], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[3], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[4], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[5], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[6], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[7], oi, GETPC());
-#else
-    helper_ret_stw_mmu(env, addr + (3 << DF_HALF), pwd->h[0], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (2 << DF_HALF), pwd->h[1], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (1 << DF_HALF), pwd->h[2], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (0 << DF_HALF), pwd->h[3], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (7 << DF_HALF), pwd->h[4], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (6 << DF_HALF), pwd->h[5], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (5 << DF_HALF), pwd->h[6], oi, GETPC());
-    helper_ret_stw_mmu(env, addr + (4 << DF_HALF), pwd->h[7], oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[0]);
-    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[1]);
-    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[2]);
-    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[3]);
-    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[4]);
-    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[5]);
-    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[6]);
-    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[7]);
-#else
-    cpu_stw_data(env, addr + (3 << DF_HALF), pwd->h[0]);
-    cpu_stw_data(env, addr + (2 << DF_HALF), pwd->h[1]);
-    cpu_stw_data(env, addr + (1 << DF_HALF), pwd->h[2]);
-    cpu_stw_data(env, addr + (0 << DF_HALF), pwd->h[3]);
-    cpu_stw_data(env, addr + (7 << DF_HALF), pwd->h[4]);
-    cpu_stw_data(env, addr + (6 << DF_HALF), pwd->h[5]);
-    cpu_stw_data(env, addr + (5 << DF_HALF), pwd->h[6]);
-    cpu_stw_data(env, addr + (4 << DF_HALF), pwd->h[7]);
-#endif
+    ensure_writable_pages(env, addr, mmu_idx, ra);
+
+    /* Store 8 bytes at a time.  See helper_msa_ld_h. */
+    d0 = pwd->d[0];
+    d1 = pwd->d[1];
+#ifdef TARGET_WORDS_BIGENDIAN
+    d0 = bswap16x4(d0);
+    d1 = bswap16x4(d1);
 #endif
+    cpu_stq_le_data_ra(env, addr + 0, d0, ra);
+    cpu_stq_le_data_ra(env, addr + 8, d1, ra);
 }
 
 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
@@ -8549,34 +8358,20 @@ void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
+    uint64_t d0, d1;
 
-    MEMOP_IDX(DF_WORD)
-    ensure_writable_pages(env, addr, mmu_idx, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-#if !defined(HOST_WORDS_BIGENDIAN)
-    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), pwd->w[0], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), pwd->w[1], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), pwd->w[2], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), pwd->w[3], oi, GETPC());
-#else
-    helper_ret_stl_mmu(env, addr + (1 << DF_WORD), pwd->w[0], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (0 << DF_WORD), pwd->w[1], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (3 << DF_WORD), pwd->w[2], oi, GETPC());
-    helper_ret_stl_mmu(env, addr + (2 << DF_WORD), pwd->w[3], oi, GETPC());
-#endif
-#else
-#if !defined(HOST_WORDS_BIGENDIAN)
-    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[0]);
-    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[1]);
-    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[2]);
-    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[3]);
-#else
-    cpu_stl_data(env, addr + (1 << DF_WORD), pwd->w[0]);
-    cpu_stl_data(env, addr + (0 << DF_WORD), pwd->w[1]);
-    cpu_stl_data(env, addr + (3 << DF_WORD), pwd->w[2]);
-    cpu_stl_data(env, addr + (2 << DF_WORD), pwd->w[3]);
-#endif
+    ensure_writable_pages(env, addr, mmu_idx, ra);
+
+    /* Store 8 bytes at a time.  See helper_msa_ld_w. */
+    d0 = pwd->d[0];
+    d1 = pwd->d[1];
+#ifdef TARGET_WORDS_BIGENDIAN
+    d0 = bswap32x2(d0);
+    d1 = bswap32x2(d1);
 #endif
+    cpu_stq_le_data_ra(env, addr + 0, d0, ra);
+    cpu_stq_le_data_ra(env, addr + 8, d1, ra);
 }
 
 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
@@ -8584,50 +8379,10 @@ void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
 {
     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
     int mmu_idx = cpu_mmu_index(env, false);
+    uintptr_t ra = GETPC();
 
-    MEMOP_IDX(DF_DOUBLE)
     ensure_writable_pages(env, addr, mmu_idx, GETPC());
-#if !defined(CONFIG_USER_ONLY)
-    helper_ret_stq_mmu(env, addr + (0 << DF_DOUBLE), pwd->d[0], oi, GETPC());
-    helper_ret_stq_mmu(env, addr + (1 << DF_DOUBLE), pwd->d[1], oi, GETPC());
-#else
-    cpu_stq_data(env, addr + (0 << DF_DOUBLE), pwd->d[0]);
-    cpu_stq_data(env, addr + (1 << DF_DOUBLE), pwd->d[1]);
-#endif
-}
-
-void msa_reset(CPUMIPSState *env)
-{
-    if (!ase_msa_available(env)) {
-        return;
-    }
-
-#ifdef CONFIG_USER_ONLY
-    /* MSA access enabled */
-    env->CP0_Config5 |= 1 << CP0C5_MSAEn;
-    env->CP0_Status |= (1 << CP0St_CU1) | (1 << CP0St_FR);
-#endif
-
-    /*
-     * MSA CSR:
-     * - non-signaling floating point exception mode off (NX bit is 0)
-     * - Cause, Enables, and Flags are all 0
-     * - round to nearest / ties to even (RM bits are 0)
-     */
-    env->active_tc.msacsr = 0;
-
-    restore_msa_fp_status(env);
-
-    /* tininess detected after rounding.*/
-    set_float_detect_tininess(float_tininess_after_rounding,
-                              &env->active_tc.msa_fp_status);
-
-    /* clear float_status exception flags */
-    set_float_exception_flags(0, &env->active_tc.msa_fp_status);
-
-    /* clear float_status nan mode */
-    set_default_nan_mode(0, &env->active_tc.msa_fp_status);
 
-    /* set proper signanling bit meaning ("1" means "quiet") */
-    set_snan_bit_is_one(0, &env->active_tc.msa_fp_status);
+    cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
+    cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
 }
diff --git a/target/mips/msa_helper.h.inc b/target/mips/tcg/msa_helper.h.inc
similarity index 100%
rename from target/mips/msa_helper.h.inc
rename to target/mips/tcg/msa_helper.h.inc
diff --git a/target/mips/tcg/msa_translate.c b/target/mips/tcg/msa_translate.c
new file mode 100644
index 00000000000..7576b3ed86b
--- /dev/null
+++ b/target/mips/tcg/msa_translate.c
@@ -0,0 +1,799 @@
+/*
+ *  MIPS SIMD Architecture (MSA) translation routines
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
+ *  Copyright (c) 2020 Philippe Mathieu-Daudé
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+#include "fpu_helper.h"
+#include "internal.h"
+
+static int elm_n(DisasContext *ctx, int x);
+static int elm_df(DisasContext *ctx, int x);
+static int bit_m(DisasContext *ctx, int x);
+static int bit_df(DisasContext *ctx, int x);
+
+static inline int plus_1(DisasContext *s, int x)
+{
+    return x + 1;
+}
+
+static inline int plus_2(DisasContext *s, int x)
+{
+    return x + 2;
+}
+
+/* Include the auto-generated decoder.  */
+#include "decode-msa.c.inc"
+
+static const char msaregnames[][6] = {
+    "w0.d0",  "w0.d1",  "w1.d0",  "w1.d1",
+    "w2.d0",  "w2.d1",  "w3.d0",  "w3.d1",
+    "w4.d0",  "w4.d1",  "w5.d0",  "w5.d1",
+    "w6.d0",  "w6.d1",  "w7.d0",  "w7.d1",
+    "w8.d0",  "w8.d1",  "w9.d0",  "w9.d1",
+    "w10.d0", "w10.d1", "w11.d0", "w11.d1",
+    "w12.d0", "w12.d1", "w13.d0", "w13.d1",
+    "w14.d0", "w14.d1", "w15.d0", "w15.d1",
+    "w16.d0", "w16.d1", "w17.d0", "w17.d1",
+    "w18.d0", "w18.d1", "w19.d0", "w19.d1",
+    "w20.d0", "w20.d1", "w21.d0", "w21.d1",
+    "w22.d0", "w22.d1", "w23.d0", "w23.d1",
+    "w24.d0", "w24.d1", "w25.d0", "w25.d1",
+    "w26.d0", "w26.d1", "w27.d0", "w27.d1",
+    "w28.d0", "w28.d1", "w29.d0", "w29.d1",
+    "w30.d0", "w30.d1", "w31.d0", "w31.d1",
+};
+
+/* Encoding of Operation Field (must be indexed by CPUMIPSMSADataFormat) */
+struct dfe {
+    int start;
+    int length;
+    uint32_t mask;
+};
+
+/*
+ * Extract immediate from df/{m,n} format (used by ELM & BIT instructions).
+ * Returns the immediate value, or -1 if the format does not match.
+ */
+static int df_extract_val(DisasContext *ctx, int x, const struct dfe *s)
+{
+    for (unsigned i = 0; i < 4; i++) {
+        if (extract32(x, s->start, s->length) == s->mask) {
+            return extract32(x, 0, s->start);
+        }
+    }
+    return -1;
+}
+
+/*
+ * Extract DataField from df/{m,n} format (used by ELM & BIT instructions).
+ * Returns the DataField, or -1 if the format does not match.
+ */
+static int df_extract_df(DisasContext *ctx, int x, const struct dfe *s)
+{
+    for (unsigned i = 0; i < 4; i++) {
+        if (extract32(x, s->start, s->length) == s->mask) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static const struct dfe df_elm[] = {
+    /* Table 3.26 ELM Instruction Format */
+    [DF_BYTE]   = {4, 2, 0b00},
+    [DF_HALF]   = {3, 3, 0b100},
+    [DF_WORD]   = {2, 4, 0b1100},
+    [DF_DOUBLE] = {1, 5, 0b11100}
+};
+
+static int elm_n(DisasContext *ctx, int x)
+{
+    return df_extract_val(ctx, x, df_elm);
+}
+
+static int elm_df(DisasContext *ctx, int x)
+{
+    return df_extract_df(ctx, x, df_elm);
+}
+
+static const struct dfe df_bit[] = {
+    /* Table 3.28 BIT Instruction Format */
+    [DF_BYTE]   = {3, 4, 0b1110},
+    [DF_HALF]   = {4, 3, 0b110},
+    [DF_WORD]   = {5, 2, 0b10},
+    [DF_DOUBLE] = {6, 1, 0b0}
+};
+
+static int bit_m(DisasContext *ctx, int x)
+{
+    return df_extract_val(ctx, x, df_bit);
+}
+
+static int bit_df(DisasContext *ctx, int x)
+{
+    return df_extract_df(ctx, x, df_bit);
+}
+
+static TCGv_i64 msa_wr_d[64];
+
+void msa_translate_init(void)
+{
+    int i;
+
+    for (i = 0; i < 32; i++) {
+        int off;
+
+        /*
+         * The MSA vector registers are mapped on the
+         * scalar floating-point unit (FPU) registers.
+         */
+        off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
+        msa_wr_d[i * 2] = fpu_f64[i];
+
+        off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
+        msa_wr_d[i * 2 + 1] =
+                tcg_global_mem_new_i64(cpu_env, off, msaregnames[i * 2 + 1]);
+    }
+}
+
+/*
+ * Check if MSA is enabled.
+ * This function is always called with MSA available.
+ * If MSA is disabled, raise an exception.
+ */
+static inline bool check_msa_enabled(DisasContext *ctx)
+{
+    if (unlikely((ctx->hflags & MIPS_HFLAG_FPU) &&
+                 !(ctx->hflags & MIPS_HFLAG_F64))) {
+        gen_reserved_instruction(ctx);
+        return false;
+    }
+
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_MSA))) {
+        generate_exception_end(ctx, EXCP_MSADIS);
+        return false;
+    }
+    return true;
+}
+
+typedef void gen_helper_piv(TCGv_ptr, TCGv_i32, TCGv);
+typedef void gen_helper_pii(TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void gen_helper_piii(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void gen_helper_piiii(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+
+#define TRANS_DF_x(TYPE, NAME, trans_func, gen_func) \
+    static gen_helper_p##TYPE * const NAME##_tab[4] = { \
+        gen_func##_b, gen_func##_h, gen_func##_w, gen_func##_d \
+    }; \
+    TRANS(NAME, trans_func, NAME##_tab[a->df])
+
+#define TRANS_DF_iv(NAME, trans_func, gen_func) \
+    TRANS_DF_x(iv, NAME, trans_func, gen_func)
+
+#define TRANS_DF_ii(NAME, trans_func, gen_func) \
+    TRANS_DF_x(ii, NAME, trans_func, gen_func)
+
+#define TRANS_DF_iii(NAME, trans_func, gen_func) \
+    TRANS_DF_x(iii, NAME, trans_func, gen_func)
+
+#define TRANS_DF_iii_b(NAME, trans_func, gen_func) \
+    static gen_helper_piii * const NAME##_tab[4] = { \
+        NULL, gen_func##_h, gen_func##_w, gen_func##_d \
+    }; \
+    static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
+    { \
+        return trans_func(ctx, a, NAME##_tab[a->df]); \
+    }
+
+static void gen_check_zero_element(TCGv tresult, uint8_t df, uint8_t wt,
+                                   TCGCond cond)
+{
+    /* generates tcg ops to check if any element is 0 */
+    /* Note this function only works with MSA_WRLEN = 128 */
+    uint64_t eval_zero_or_big = dup_const(df, 1);
+    uint64_t eval_big = eval_zero_or_big << ((8 << df) - 1);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_subi_i64(t0, msa_wr_d[wt << 1], eval_zero_or_big);
+    tcg_gen_andc_i64(t0, t0, msa_wr_d[wt << 1]);
+    tcg_gen_andi_i64(t0, t0, eval_big);
+    tcg_gen_subi_i64(t1, msa_wr_d[(wt << 1) + 1], eval_zero_or_big);
+    tcg_gen_andc_i64(t1, t1, msa_wr_d[(wt << 1) + 1]);
+    tcg_gen_andi_i64(t1, t1, eval_big);
+    tcg_gen_or_i64(t0, t0, t1);
+    /* if all bits are zero then all elements are not zero */
+    /* if some bit is non-zero then some element is zero */
+    tcg_gen_setcondi_i64(cond, t0, t0, 0);
+    tcg_gen_trunc_i64_tl(tresult, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static bool gen_msa_BxZ_V(DisasContext *ctx, int wt, int sa, TCGCond cond)
+{
+    TCGv_i64 t0;
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        gen_reserved_instruction(ctx);
+        return true;
+    }
+    t0 = tcg_temp_new_i64();
+    tcg_gen_or_i64(t0, msa_wr_d[wt << 1], msa_wr_d[(wt << 1) + 1]);
+    tcg_gen_setcondi_i64(cond, t0, t0, 0);
+    tcg_gen_trunc_i64_tl(bcond, t0);
+    tcg_temp_free_i64(t0);
+
+    ctx->btarget = ctx->base.pc_next + (sa << 2) + 4;
+
+    ctx->hflags |= MIPS_HFLAG_BC;
+    ctx->hflags |= MIPS_HFLAG_BDS32;
+
+    return true;
+}
+
+static bool trans_BZ_V(DisasContext *ctx, arg_msa_bz *a)
+{
+    return gen_msa_BxZ_V(ctx, a->wt, a->sa, TCG_COND_EQ);
+}
+
+static bool trans_BNZ_V(DisasContext *ctx, arg_msa_bz *a)
+{
+    return gen_msa_BxZ_V(ctx, a->wt, a->sa, TCG_COND_NE);
+}
+
+static bool gen_msa_BxZ(DisasContext *ctx, int df, int wt, int sa, bool if_not)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        gen_reserved_instruction(ctx);
+        return true;
+    }
+
+    gen_check_zero_element(bcond, df, wt, if_not ? TCG_COND_EQ : TCG_COND_NE);
+
+    ctx->btarget = ctx->base.pc_next + (sa << 2) + 4;
+    ctx->hflags |= MIPS_HFLAG_BC;
+    ctx->hflags |= MIPS_HFLAG_BDS32;
+
+    return true;
+}
+
+static bool trans_BZ(DisasContext *ctx, arg_msa_bz *a)
+{
+    return gen_msa_BxZ(ctx, a->df, a->wt, a->sa, false);
+}
+
+static bool trans_BNZ(DisasContext *ctx, arg_msa_bz *a)
+{
+    return gen_msa_BxZ(ctx, a->df, a->wt, a->sa, true);
+}
+
+static bool trans_msa_i8(DisasContext *ctx, arg_msa_i *a,
+                         gen_helper_piii *gen_msa_i8)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_i8(cpu_env,
+               tcg_constant_i32(a->wd),
+               tcg_constant_i32(a->ws),
+               tcg_constant_i32(a->sa));
+
+    return true;
+}
+
+TRANS(ANDI,     trans_msa_i8, gen_helper_msa_andi_b);
+TRANS(ORI,      trans_msa_i8, gen_helper_msa_ori_b);
+TRANS(NORI,     trans_msa_i8, gen_helper_msa_nori_b);
+TRANS(XORI,     trans_msa_i8, gen_helper_msa_xori_b);
+TRANS(BMNZI,    trans_msa_i8, gen_helper_msa_bmnzi_b);
+TRANS(BMZI,     trans_msa_i8, gen_helper_msa_bmzi_b);
+TRANS(BSELI,    trans_msa_i8, gen_helper_msa_bseli_b);
+
+static bool trans_SHF(DisasContext *ctx, arg_msa_i *a)
+{
+    if (a->df == DF_DOUBLE) {
+        return false;
+    }
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_helper_msa_shf_df(cpu_env,
+                          tcg_constant_i32(a->df),
+                          tcg_constant_i32(a->wd),
+                          tcg_constant_i32(a->ws),
+                          tcg_constant_i32(a->sa));
+
+    return true;
+}
+
+static bool trans_msa_i5(DisasContext *ctx, arg_msa_i *a,
+                         gen_helper_piiii *gen_msa_i5)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_i5(cpu_env,
+               tcg_constant_i32(a->df),
+               tcg_constant_i32(a->wd),
+               tcg_constant_i32(a->ws),
+               tcg_constant_i32(a->sa));
+
+    return true;
+}
+
+TRANS(ADDVI,    trans_msa_i5, gen_helper_msa_addvi_df);
+TRANS(SUBVI,    trans_msa_i5, gen_helper_msa_subvi_df);
+TRANS(MAXI_S,   trans_msa_i5, gen_helper_msa_maxi_s_df);
+TRANS(MAXI_U,   trans_msa_i5, gen_helper_msa_maxi_u_df);
+TRANS(MINI_S,   trans_msa_i5, gen_helper_msa_mini_s_df);
+TRANS(MINI_U,   trans_msa_i5, gen_helper_msa_mini_u_df);
+TRANS(CLTI_S,   trans_msa_i5, gen_helper_msa_clti_s_df);
+TRANS(CLTI_U,   trans_msa_i5, gen_helper_msa_clti_u_df);
+TRANS(CLEI_S,   trans_msa_i5, gen_helper_msa_clei_s_df);
+TRANS(CLEI_U,   trans_msa_i5, gen_helper_msa_clei_u_df);
+TRANS(CEQI,     trans_msa_i5, gen_helper_msa_ceqi_df);
+
+static bool trans_LDI(DisasContext *ctx, arg_msa_ldi *a)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_helper_msa_ldi_df(cpu_env,
+                          tcg_constant_i32(a->df),
+                          tcg_constant_i32(a->wd),
+                          tcg_constant_i32(a->sa));
+
+    return true;
+}
+
+static bool trans_msa_bit(DisasContext *ctx, arg_msa_bit *a,
+                          gen_helper_piiii *gen_msa_bit)
+{
+    if (a->df < 0) {
+        return false;
+    }
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_bit(cpu_env,
+                tcg_constant_i32(a->df),
+                tcg_constant_i32(a->wd),
+                tcg_constant_i32(a->ws),
+                tcg_constant_i32(a->m));
+
+    return true;
+}
+
+TRANS(SLLI,     trans_msa_bit, gen_helper_msa_slli_df);
+TRANS(SRAI,     trans_msa_bit, gen_helper_msa_srai_df);
+TRANS(SRLI,     trans_msa_bit, gen_helper_msa_srli_df);
+TRANS(BCLRI,    trans_msa_bit, gen_helper_msa_bclri_df);
+TRANS(BSETI,    trans_msa_bit, gen_helper_msa_bseti_df);
+TRANS(BNEGI,    trans_msa_bit, gen_helper_msa_bnegi_df);
+TRANS(BINSLI,   trans_msa_bit, gen_helper_msa_binsli_df);
+TRANS(BINSRI,   trans_msa_bit, gen_helper_msa_binsri_df);
+TRANS(SAT_S,    trans_msa_bit, gen_helper_msa_sat_u_df);
+TRANS(SAT_U,    trans_msa_bit, gen_helper_msa_sat_u_df);
+TRANS(SRARI,    trans_msa_bit, gen_helper_msa_srari_df);
+TRANS(SRLRI,    trans_msa_bit, gen_helper_msa_srlri_df);
+
+static bool trans_msa_3rf(DisasContext *ctx, arg_msa_r *a,
+                          gen_helper_piiii *gen_msa_3rf)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_3rf(cpu_env,
+                tcg_constant_i32(a->df),
+                tcg_constant_i32(a->wd),
+                tcg_constant_i32(a->ws),
+                tcg_constant_i32(a->wt));
+
+    return true;
+}
+
+static bool trans_msa_3r(DisasContext *ctx, arg_msa_r *a,
+                         gen_helper_piii *gen_msa_3r)
+{
+    if (!gen_msa_3r) {
+        return false;
+    }
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_3r(cpu_env,
+               tcg_constant_i32(a->wd),
+               tcg_constant_i32(a->ws),
+               tcg_constant_i32(a->wt));
+
+    return true;
+}
+
+TRANS(AND_V,            trans_msa_3r,   gen_helper_msa_and_v);
+TRANS(OR_V,             trans_msa_3r,   gen_helper_msa_or_v);
+TRANS(NOR_V,            trans_msa_3r,   gen_helper_msa_nor_v);
+TRANS(XOR_V,            trans_msa_3r,   gen_helper_msa_xor_v);
+TRANS(BMNZ_V,           trans_msa_3r,   gen_helper_msa_bmnz_v);
+TRANS(BMZ_V,            trans_msa_3r,   gen_helper_msa_bmz_v);
+TRANS(BSEL_V,           trans_msa_3r,   gen_helper_msa_bsel_v);
+
+TRANS_DF_iii(SLL,       trans_msa_3r,   gen_helper_msa_sll);
+TRANS_DF_iii(SRA,       trans_msa_3r,   gen_helper_msa_sra);
+TRANS_DF_iii(SRL,       trans_msa_3r,   gen_helper_msa_srl);
+TRANS_DF_iii(BCLR,      trans_msa_3r,   gen_helper_msa_bclr);
+TRANS_DF_iii(BSET,      trans_msa_3r,   gen_helper_msa_bset);
+TRANS_DF_iii(BNEG,      trans_msa_3r,   gen_helper_msa_bneg);
+TRANS_DF_iii(BINSL,     trans_msa_3r,   gen_helper_msa_binsl);
+TRANS_DF_iii(BINSR,     trans_msa_3r,   gen_helper_msa_binsr);
+
+TRANS_DF_iii(ADDV,      trans_msa_3r,   gen_helper_msa_addv);
+TRANS_DF_iii(SUBV,      trans_msa_3r,   gen_helper_msa_subv);
+TRANS_DF_iii(MAX_S,     trans_msa_3r,   gen_helper_msa_max_s);
+TRANS_DF_iii(MAX_U,     trans_msa_3r,   gen_helper_msa_max_u);
+TRANS_DF_iii(MIN_S,     trans_msa_3r,   gen_helper_msa_min_s);
+TRANS_DF_iii(MIN_U,     trans_msa_3r,   gen_helper_msa_min_u);
+TRANS_DF_iii(MAX_A,     trans_msa_3r,   gen_helper_msa_max_a);
+TRANS_DF_iii(MIN_A,     trans_msa_3r,   gen_helper_msa_min_a);
+
+TRANS_DF_iii(CEQ,       trans_msa_3r,   gen_helper_msa_ceq);
+TRANS_DF_iii(CLT_S,     trans_msa_3r,   gen_helper_msa_clt_s);
+TRANS_DF_iii(CLT_U,     trans_msa_3r,   gen_helper_msa_clt_u);
+TRANS_DF_iii(CLE_S,     trans_msa_3r,   gen_helper_msa_cle_s);
+TRANS_DF_iii(CLE_U,     trans_msa_3r,   gen_helper_msa_cle_u);
+
+TRANS_DF_iii(ADD_A,     trans_msa_3r,   gen_helper_msa_add_a);
+TRANS_DF_iii(ADDS_A,    trans_msa_3r,   gen_helper_msa_adds_a);
+TRANS_DF_iii(ADDS_S,    trans_msa_3r,   gen_helper_msa_adds_s);
+TRANS_DF_iii(ADDS_U,    trans_msa_3r,   gen_helper_msa_adds_u);
+TRANS_DF_iii(AVE_S,     trans_msa_3r,   gen_helper_msa_ave_s);
+TRANS_DF_iii(AVE_U,     trans_msa_3r,   gen_helper_msa_ave_u);
+TRANS_DF_iii(AVER_S,    trans_msa_3r,   gen_helper_msa_aver_s);
+TRANS_DF_iii(AVER_U,    trans_msa_3r,   gen_helper_msa_aver_u);
+
+TRANS_DF_iii(SUBS_S,    trans_msa_3r,   gen_helper_msa_subs_s);
+TRANS_DF_iii(SUBS_U,    trans_msa_3r,   gen_helper_msa_subs_u);
+TRANS_DF_iii(SUBSUS_U,  trans_msa_3r,   gen_helper_msa_subsus_u);
+TRANS_DF_iii(SUBSUU_S,  trans_msa_3r,   gen_helper_msa_subsuu_s);
+TRANS_DF_iii(ASUB_S,    trans_msa_3r,   gen_helper_msa_asub_s);
+TRANS_DF_iii(ASUB_U,    trans_msa_3r,   gen_helper_msa_asub_u);
+
+TRANS_DF_iii(MULV,      trans_msa_3r,   gen_helper_msa_mulv);
+TRANS_DF_iii(MADDV,     trans_msa_3r,   gen_helper_msa_maddv);
+TRANS_DF_iii(MSUBV,     trans_msa_3r,   gen_helper_msa_msubv);
+TRANS_DF_iii(DIV_S,     trans_msa_3r,   gen_helper_msa_div_s);
+TRANS_DF_iii(DIV_U,     trans_msa_3r,   gen_helper_msa_div_u);
+TRANS_DF_iii(MOD_S,     trans_msa_3r,   gen_helper_msa_mod_s);
+TRANS_DF_iii(MOD_U,     trans_msa_3r,   gen_helper_msa_mod_u);
+
+TRANS_DF_iii_b(DOTP_S,  trans_msa_3r,   gen_helper_msa_dotp_s);
+TRANS_DF_iii_b(DOTP_U,  trans_msa_3r,   gen_helper_msa_dotp_u);
+TRANS_DF_iii_b(DPADD_S, trans_msa_3r,   gen_helper_msa_dpadd_s);
+TRANS_DF_iii_b(DPADD_U, trans_msa_3r,   gen_helper_msa_dpadd_u);
+TRANS_DF_iii_b(DPSUB_S, trans_msa_3r,   gen_helper_msa_dpsub_s);
+TRANS_DF_iii_b(DPSUB_U, trans_msa_3r,   gen_helper_msa_dpsub_u);
+
+TRANS(SLD,              trans_msa_3rf,  gen_helper_msa_sld_df);
+TRANS(SPLAT,            trans_msa_3rf,  gen_helper_msa_splat_df);
+TRANS_DF_iii(PCKEV,     trans_msa_3r,   gen_helper_msa_pckev);
+TRANS_DF_iii(PCKOD,     trans_msa_3r,   gen_helper_msa_pckod);
+TRANS_DF_iii(ILVL,      trans_msa_3r,   gen_helper_msa_ilvl);
+TRANS_DF_iii(ILVR,      trans_msa_3r,   gen_helper_msa_ilvr);
+TRANS_DF_iii(ILVEV,     trans_msa_3r,   gen_helper_msa_ilvev);
+TRANS_DF_iii(ILVOD,     trans_msa_3r,   gen_helper_msa_ilvod);
+
+TRANS(VSHF,             trans_msa_3rf,  gen_helper_msa_vshf_df);
+TRANS_DF_iii(SRAR,      trans_msa_3r,   gen_helper_msa_srar);
+TRANS_DF_iii(SRLR,      trans_msa_3r,   gen_helper_msa_srlr);
+TRANS_DF_iii_b(HADD_S,  trans_msa_3r,   gen_helper_msa_hadd_s);
+TRANS_DF_iii_b(HADD_U,  trans_msa_3r,   gen_helper_msa_hadd_u);
+TRANS_DF_iii_b(HSUB_S,  trans_msa_3r,   gen_helper_msa_hsub_s);
+TRANS_DF_iii_b(HSUB_U,  trans_msa_3r,   gen_helper_msa_hsub_u);
+
+static bool trans_MOVE_V(DisasContext *ctx, arg_msa_elm *a)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_helper_msa_move_v(cpu_env,
+                          tcg_constant_i32(a->wd),
+                          tcg_constant_i32(a->ws));
+
+    return true;
+}
+
+static bool trans_CTCMSA(DisasContext *ctx, arg_msa_elm *a)
+{
+    TCGv telm;
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    telm = tcg_temp_new();
+
+    gen_load_gpr(telm, a->ws);
+    gen_helper_msa_ctcmsa(cpu_env, telm, tcg_constant_i32(a->wd));
+
+    tcg_temp_free(telm);
+
+    return true;
+}
+
+static bool trans_CFCMSA(DisasContext *ctx, arg_msa_elm *a)
+{
+    TCGv telm;
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    telm = tcg_temp_new();
+
+    gen_helper_msa_cfcmsa(telm, cpu_env, tcg_constant_i32(a->ws));
+    gen_store_gpr(telm, a->wd);
+
+    tcg_temp_free(telm);
+
+    return true;
+}
+
+static bool trans_msa_elm(DisasContext *ctx, arg_msa_elm_df *a,
+                          gen_helper_piiii *gen_msa_elm_df)
+{
+    if (a->df < 0) {
+        return false;
+    }
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_elm_df(cpu_env,
+                   tcg_constant_i32(a->df),
+                   tcg_constant_i32(a->wd),
+                   tcg_constant_i32(a->ws),
+                   tcg_constant_i32(a->n));
+
+    return true;
+}
+
+TRANS(SLDI,   trans_msa_elm, gen_helper_msa_sldi_df);
+TRANS(SPLATI, trans_msa_elm, gen_helper_msa_splati_df);
+TRANS(INSVE,  trans_msa_elm, gen_helper_msa_insve_df);
+
+static bool trans_msa_elm_fn(DisasContext *ctx, arg_msa_elm_df *a,
+                             gen_helper_piii * const gen_msa_elm[4])
+{
+    if (a->df < 0 || !gen_msa_elm[a->df]) {
+        return false;
+    }
+
+    if (check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    if (a->wd == 0) {
+        /* Treat as NOP. */
+        return true;
+    }
+
+    gen_msa_elm[a->df](cpu_env,
+                       tcg_constant_i32(a->wd),
+                       tcg_constant_i32(a->ws),
+                       tcg_constant_i32(a->n));
+
+    return true;
+}
+
+#if defined(TARGET_MIPS64)
+#define NULL_IF_MIPS32(function) function
+#else
+#define NULL_IF_MIPS32(function) NULL
+#endif
+
+static bool trans_COPY_U(DisasContext *ctx, arg_msa_elm_df *a)
+{
+    static gen_helper_piii * const gen_msa_copy_u[4] = {
+        gen_helper_msa_copy_u_b, gen_helper_msa_copy_u_h,
+        NULL_IF_MIPS32(gen_helper_msa_copy_u_w), NULL
+    };
+
+    return trans_msa_elm_fn(ctx, a, gen_msa_copy_u);
+}
+
+static bool trans_COPY_S(DisasContext *ctx, arg_msa_elm_df *a)
+{
+    static gen_helper_piii * const gen_msa_copy_s[4] = {
+        gen_helper_msa_copy_s_b, gen_helper_msa_copy_s_h,
+        gen_helper_msa_copy_s_w, NULL_IF_MIPS32(gen_helper_msa_copy_s_d)
+    };
+
+    return trans_msa_elm_fn(ctx, a, gen_msa_copy_s);
+}
+
+static bool trans_INSERT(DisasContext *ctx, arg_msa_elm_df *a)
+{
+    static gen_helper_piii * const gen_msa_insert[4] = {
+        gen_helper_msa_insert_b, gen_helper_msa_insert_h,
+        gen_helper_msa_insert_w, NULL_IF_MIPS32(gen_helper_msa_insert_d)
+    };
+
+    return trans_msa_elm_fn(ctx, a, gen_msa_insert);
+}
+
+TRANS(FCAF,     trans_msa_3rf, gen_helper_msa_fcaf_df);
+TRANS(FCUN,     trans_msa_3rf, gen_helper_msa_fcun_df);
+TRANS(FCEQ,     trans_msa_3rf, gen_helper_msa_fceq_df);
+TRANS(FCUEQ,    trans_msa_3rf, gen_helper_msa_fcueq_df);
+TRANS(FCLT,     trans_msa_3rf, gen_helper_msa_fclt_df);
+TRANS(FCULT,    trans_msa_3rf, gen_helper_msa_fcult_df);
+TRANS(FCLE,     trans_msa_3rf, gen_helper_msa_fcle_df);
+TRANS(FCULE,    trans_msa_3rf, gen_helper_msa_fcule_df);
+TRANS(FSAF,     trans_msa_3rf, gen_helper_msa_fsaf_df);
+TRANS(FSUN,     trans_msa_3rf, gen_helper_msa_fsun_df);
+TRANS(FSEQ,     trans_msa_3rf, gen_helper_msa_fseq_df);
+TRANS(FSUEQ,    trans_msa_3rf, gen_helper_msa_fsueq_df);
+TRANS(FSLT,     trans_msa_3rf, gen_helper_msa_fslt_df);
+TRANS(FSULT,    trans_msa_3rf, gen_helper_msa_fsult_df);
+TRANS(FSLE,     trans_msa_3rf, gen_helper_msa_fsle_df);
+TRANS(FSULE,    trans_msa_3rf, gen_helper_msa_fsule_df);
+
+TRANS(FADD,     trans_msa_3rf, gen_helper_msa_fadd_df);
+TRANS(FSUB,     trans_msa_3rf, gen_helper_msa_fsub_df);
+TRANS(FMUL,     trans_msa_3rf, gen_helper_msa_fmul_df);
+TRANS(FDIV,     trans_msa_3rf, gen_helper_msa_fdiv_df);
+TRANS(FMADD,    trans_msa_3rf, gen_helper_msa_fmadd_df);
+TRANS(FMSUB,    trans_msa_3rf, gen_helper_msa_fmsub_df);
+TRANS(FEXP2,    trans_msa_3rf, gen_helper_msa_fexp2_df);
+TRANS(FEXDO,    trans_msa_3rf, gen_helper_msa_fexdo_df);
+TRANS(FTQ,      trans_msa_3rf, gen_helper_msa_ftq_df);
+TRANS(FMIN,     trans_msa_3rf, gen_helper_msa_fmin_df);
+TRANS(FMIN_A,   trans_msa_3rf, gen_helper_msa_fmin_a_df);
+TRANS(FMAX,     trans_msa_3rf, gen_helper_msa_fmax_df);
+TRANS(FMAX_A,   trans_msa_3rf, gen_helper_msa_fmax_a_df);
+
+TRANS(FCOR,     trans_msa_3rf, gen_helper_msa_fcor_df);
+TRANS(FCUNE,    trans_msa_3rf, gen_helper_msa_fcune_df);
+TRANS(FCNE,     trans_msa_3rf, gen_helper_msa_fcne_df);
+TRANS(MUL_Q,    trans_msa_3rf, gen_helper_msa_mul_q_df);
+TRANS(MADD_Q,   trans_msa_3rf, gen_helper_msa_madd_q_df);
+TRANS(MSUB_Q,   trans_msa_3rf, gen_helper_msa_msub_q_df);
+TRANS(FSOR,     trans_msa_3rf, gen_helper_msa_fsor_df);
+TRANS(FSUNE,    trans_msa_3rf, gen_helper_msa_fsune_df);
+TRANS(FSNE,     trans_msa_3rf, gen_helper_msa_fsne_df);
+TRANS(MULR_Q,   trans_msa_3rf, gen_helper_msa_mulr_q_df);
+TRANS(MADDR_Q,  trans_msa_3rf, gen_helper_msa_maddr_q_df);
+TRANS(MSUBR_Q,  trans_msa_3rf, gen_helper_msa_msubr_q_df);
+
+static bool trans_msa_2r(DisasContext *ctx, arg_msa_r *a,
+                         gen_helper_pii *gen_msa_2r)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_2r(cpu_env, tcg_constant_i32(a->wd), tcg_constant_i32(a->ws));
+
+    return true;
+}
+
+TRANS_DF_ii(PCNT, trans_msa_2r, gen_helper_msa_pcnt);
+TRANS_DF_ii(NLOC, trans_msa_2r, gen_helper_msa_nloc);
+TRANS_DF_ii(NLZC, trans_msa_2r, gen_helper_msa_nlzc);
+
+static bool trans_FILL(DisasContext *ctx, arg_msa_r *a)
+{
+    if (TARGET_LONG_BITS != 64 && a->df == DF_DOUBLE) {
+        /* Double format valid only for MIPS64 */
+        return false;
+    }
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_helper_msa_fill_df(cpu_env,
+                           tcg_constant_i32(a->df),
+                           tcg_constant_i32(a->wd),
+                           tcg_constant_i32(a->ws));
+
+    return true;
+}
+
+static bool trans_msa_2rf(DisasContext *ctx, arg_msa_r *a,
+                          gen_helper_piii *gen_msa_2rf)
+{
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    gen_msa_2rf(cpu_env,
+                tcg_constant_i32(a->df),
+                tcg_constant_i32(a->wd),
+                tcg_constant_i32(a->ws));
+
+    return true;
+}
+
+TRANS(FCLASS,   trans_msa_2rf, gen_helper_msa_fclass_df);
+TRANS(FTRUNC_S, trans_msa_2rf, gen_helper_msa_fclass_df);
+TRANS(FTRUNC_U, trans_msa_2rf, gen_helper_msa_ftrunc_s_df);
+TRANS(FSQRT,    trans_msa_2rf, gen_helper_msa_fsqrt_df);
+TRANS(FRSQRT,   trans_msa_2rf, gen_helper_msa_frsqrt_df);
+TRANS(FRCP,     trans_msa_2rf, gen_helper_msa_frcp_df);
+TRANS(FRINT,    trans_msa_2rf, gen_helper_msa_frint_df);
+TRANS(FLOG2,    trans_msa_2rf, gen_helper_msa_flog2_df);
+TRANS(FEXUPL,   trans_msa_2rf, gen_helper_msa_fexupl_df);
+TRANS(FEXUPR,   trans_msa_2rf, gen_helper_msa_fexupr_df);
+TRANS(FFQL,     trans_msa_2rf, gen_helper_msa_ffql_df);
+TRANS(FFQR,     trans_msa_2rf, gen_helper_msa_ffqr_df);
+TRANS(FTINT_S,  trans_msa_2rf, gen_helper_msa_ftint_s_df);
+TRANS(FTINT_U,  trans_msa_2rf, gen_helper_msa_ftint_u_df);
+TRANS(FFINT_S,  trans_msa_2rf, gen_helper_msa_ffint_s_df);
+TRANS(FFINT_U,  trans_msa_2rf, gen_helper_msa_ffint_u_df);
+
+static bool trans_msa_ldst(DisasContext *ctx, arg_msa_i *a,
+                           gen_helper_piv *gen_msa_ldst)
+{
+    TCGv taddr;
+
+    if (!check_msa_enabled(ctx)) {
+        return true;
+    }
+
+    taddr = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, taddr, a->ws, a->sa << a->df);
+    gen_msa_ldst(cpu_env, tcg_constant_i32(a->wd), taddr);
+
+    tcg_temp_free(taddr);
+
+    return true;
+}
+
+TRANS_DF_iv(LD, trans_msa_ldst, gen_helper_msa_ld);
+TRANS_DF_iv(ST, trans_msa_ldst, gen_helper_msa_st);
+
+static bool trans_LSA(DisasContext *ctx, arg_r *a)
+{
+    return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa);
+}
+
+static bool trans_DLSA(DisasContext *ctx, arg_r *a)
+{
+    if (TARGET_LONG_BITS != 64) {
+        return false;
+    }
+    return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa);
+}
diff --git a/target/mips/mxu_translate.c b/target/mips/tcg/mxu_translate.c
similarity index 99%
rename from target/mips/mxu_translate.c
rename to target/mips/tcg/mxu_translate.c
index fb0a811af6c..f52244e1b2b 100644
--- a/target/mips/mxu_translate.c
+++ b/target/mips/tcg/mxu_translate.c
@@ -447,9 +447,9 @@ enum {
 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
 static TCGv mxu_CR;
 
-static const char * const mxuregnames[] = {
+static const char mxuregnames[][4] = {
     "XR1",  "XR2",  "XR3",  "XR4",  "XR5",  "XR6",  "XR7",  "XR8",
-    "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR",
+    "XR9",  "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
 };
 
 void mxu_translate_init(void)
@@ -857,12 +857,8 @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
         tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
     }
     tcg_gen_add_tl(t1, t0, t1);
-    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
+    tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
 
-    if (sel == 1) {
-        /* S32LDDR */
-        tcg_gen_bswap32_tl(t1, t1);
-    }
     gen_store_mxu_gpr(t1, XRa);
 
     tcg_temp_free(t0);
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
new file mode 100644
index 00000000000..d091984912e
--- /dev/null
+++ b/target/mips/tcg/nanomips_translate.c.inc
@@ -0,0 +1,4928 @@
+/*
+ *  MIPS emulation for QEMU - nanoMIPS translation routines
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+/* MAJOR, P16, and P32 pools opcodes */
+enum {
+    NM_P_ADDIU      = 0x00,
+    NM_ADDIUPC      = 0x01,
+    NM_MOVE_BALC    = 0x02,
+    NM_P16_MV       = 0x04,
+    NM_LW16         = 0x05,
+    NM_BC16         = 0x06,
+    NM_P16_SR       = 0x07,
+
+    NM_POOL32A      = 0x08,
+    NM_P_BAL        = 0x0a,
+    NM_P16_SHIFT    = 0x0c,
+    NM_LWSP16       = 0x0d,
+    NM_BALC16       = 0x0e,
+    NM_P16_4X4      = 0x0f,
+
+    NM_P_GP_W       = 0x10,
+    NM_P_GP_BH      = 0x11,
+    NM_P_J          = 0x12,
+    NM_P16C         = 0x14,
+    NM_LWGP16       = 0x15,
+    NM_P16_LB       = 0x17,
+
+    NM_P48I         = 0x18,
+    NM_P16_A1       = 0x1c,
+    NM_LW4X4        = 0x1d,
+    NM_P16_LH       = 0x1f,
+
+    NM_P_U12        = 0x20,
+    NM_P_LS_U12     = 0x21,
+    NM_P_BR1        = 0x22,
+    NM_P16_A2       = 0x24,
+    NM_SW16         = 0x25,
+    NM_BEQZC16      = 0x26,
+
+    NM_POOL32F      = 0x28,
+    NM_P_LS_S9      = 0x29,
+    NM_P_BR2        = 0x2a,
+
+    NM_P16_ADDU     = 0x2c,
+    NM_SWSP16       = 0x2d,
+    NM_BNEZC16      = 0x2e,
+    NM_MOVEP        = 0x2f,
+
+    NM_POOL32S      = 0x30,
+    NM_P_BRI        = 0x32,
+    NM_LI16         = 0x34,
+    NM_SWGP16       = 0x35,
+    NM_P16_BR       = 0x36,
+
+    NM_P_LUI        = 0x38,
+    NM_ANDI16       = 0x3c,
+    NM_SW4X4        = 0x3d,
+    NM_MOVEPREV     = 0x3f,
+};
+
+/* POOL32A instruction pool */
+enum {
+    NM_POOL32A0    = 0x00,
+    NM_SPECIAL2    = 0x01,
+    NM_COP2_1      = 0x02,
+    NM_UDI         = 0x03,
+    NM_POOL32A5    = 0x05,
+    NM_POOL32A7    = 0x07,
+};
+
+/* P.GP.W instruction pool */
+enum {
+    NM_ADDIUGP_W = 0x00,
+    NM_LWGP      = 0x02,
+    NM_SWGP      = 0x03,
+};
+
+/* P48I instruction pool */
+enum {
+    NM_LI48        = 0x00,
+    NM_ADDIU48     = 0x01,
+    NM_ADDIUGP48   = 0x02,
+    NM_ADDIUPC48   = 0x03,
+    NM_LWPC48      = 0x0b,
+    NM_SWPC48      = 0x0f,
+};
+
+/* P.U12 instruction pool */
+enum {
+    NM_ORI      = 0x00,
+    NM_XORI     = 0x01,
+    NM_ANDI     = 0x02,
+    NM_P_SR     = 0x03,
+    NM_SLTI     = 0x04,
+    NM_SLTIU    = 0x05,
+    NM_SEQI     = 0x06,
+    NM_ADDIUNEG = 0x08,
+    NM_P_SHIFT  = 0x0c,
+    NM_P_ROTX   = 0x0d,
+    NM_P_INS    = 0x0e,
+    NM_P_EXT    = 0x0f,
+};
+
+/* POOL32F instruction pool */
+enum {
+    NM_POOL32F_0   = 0x00,
+    NM_POOL32F_3   = 0x03,
+    NM_POOL32F_5   = 0x05,
+};
+
+/* POOL32S instruction pool */
+enum {
+    NM_POOL32S_0   = 0x00,
+    NM_POOL32S_4   = 0x04,
+};
+
+/* P.LUI instruction pool */
+enum {
+    NM_LUI      = 0x00,
+    NM_ALUIPC   = 0x01,
+};
+
+/* P.GP.BH instruction pool */
+enum {
+    NM_LBGP      = 0x00,
+    NM_SBGP      = 0x01,
+    NM_LBUGP     = 0x02,
+    NM_ADDIUGP_B = 0x03,
+    NM_P_GP_LH   = 0x04,
+    NM_P_GP_SH   = 0x05,
+    NM_P_GP_CP1  = 0x06,
+};
+
+/* P.LS.U12 instruction pool */
+enum {
+    NM_LB        = 0x00,
+    NM_SB        = 0x01,
+    NM_LBU       = 0x02,
+    NM_P_PREFU12 = 0x03,
+    NM_LH        = 0x04,
+    NM_SH        = 0x05,
+    NM_LHU       = 0x06,
+    NM_LWU       = 0x07,
+    NM_LW        = 0x08,
+    NM_SW        = 0x09,
+    NM_LWC1      = 0x0a,
+    NM_SWC1      = 0x0b,
+    NM_LDC1      = 0x0e,
+    NM_SDC1      = 0x0f,
+};
+
+/* P.LS.S9 instruction pool */
+enum {
+    NM_P_LS_S0         = 0x00,
+    NM_P_LS_S1         = 0x01,
+    NM_P_LS_E0         = 0x02,
+    NM_P_LS_WM         = 0x04,
+    NM_P_LS_UAWM       = 0x05,
+};
+
+/* P.BAL instruction pool */
+enum {
+    NM_BC       = 0x00,
+    NM_BALC     = 0x01,
+};
+
+/* P.J instruction pool */
+enum {
+    NM_JALRC    = 0x00,
+    NM_JALRC_HB = 0x01,
+    NM_P_BALRSC = 0x08,
+};
+
+/* P.BR1 instruction pool */
+enum {
+    NM_BEQC     = 0x00,
+    NM_P_BR3A   = 0x01,
+    NM_BGEC     = 0x02,
+    NM_BGEUC    = 0x03,
+};
+
+/* P.BR2 instruction pool */
+enum {
+    NM_BNEC     = 0x00,
+    NM_BLTC     = 0x02,
+    NM_BLTUC    = 0x03,
+};
+
+/* P.BRI instruction pool */
+enum {
+    NM_BEQIC    = 0x00,
+    NM_BBEQZC   = 0x01,
+    NM_BGEIC    = 0x02,
+    NM_BGEIUC   = 0x03,
+    NM_BNEIC    = 0x04,
+    NM_BBNEZC   = 0x05,
+    NM_BLTIC    = 0x06,
+    NM_BLTIUC   = 0x07,
+};
+
+/* P16.SHIFT instruction pool */
+enum {
+    NM_SLL16    = 0x00,
+    NM_SRL16    = 0x01,
+};
+
+/* POOL16C instruction pool */
+enum {
+    NM_POOL16C_0  = 0x00,
+    NM_LWXS16     = 0x01,
+};
+
+/* P16.A1 instruction pool */
+enum {
+    NM_ADDIUR1SP = 0x01,
+};
+
+/* P16.A2 instruction pool */
+enum {
+    NM_ADDIUR2  = 0x00,
+    NM_P_ADDIURS5  = 0x01,
+};
+
+/* P16.ADDU instruction pool */
+enum {
+    NM_ADDU16     = 0x00,
+    NM_SUBU16     = 0x01,
+};
+
+/* P16.SR instruction pool */
+enum {
+    NM_SAVE16        = 0x00,
+    NM_RESTORE_JRC16 = 0x01,
+};
+
+/* P16.4X4 instruction pool */
+enum {
+    NM_ADDU4X4      = 0x00,
+    NM_MUL4X4       = 0x01,
+};
+
+/* P16.LB instruction pool */
+enum {
+    NM_LB16       = 0x00,
+    NM_SB16       = 0x01,
+    NM_LBU16      = 0x02,
+};
+
+/* P16.LH  instruction pool */
+enum {
+    NM_LH16     = 0x00,
+    NM_SH16     = 0x01,
+    NM_LHU16    = 0x02,
+};
+
+/* P.RI instruction pool */
+enum {
+    NM_SIGRIE       = 0x00,
+    NM_P_SYSCALL    = 0x01,
+    NM_BREAK        = 0x02,
+    NM_SDBBP        = 0x03,
+};
+
+/* POOL32A0 instruction pool */
+enum {
+    NM_P_TRAP   = 0x00,
+    NM_SEB      = 0x01,
+    NM_SLLV     = 0x02,
+    NM_MUL      = 0x03,
+    NM_MFC0     = 0x06,
+    NM_MFHC0    = 0x07,
+    NM_SEH      = 0x09,
+    NM_SRLV     = 0x0a,
+    NM_MUH      = 0x0b,
+    NM_MTC0     = 0x0e,
+    NM_MTHC0    = 0x0f,
+    NM_SRAV     = 0x12,
+    NM_MULU     = 0x13,
+    NM_ROTRV    = 0x1a,
+    NM_MUHU     = 0x1b,
+    NM_ADD      = 0x22,
+    NM_DIV      = 0x23,
+    NM_ADDU     = 0x2a,
+    NM_MOD      = 0x2b,
+    NM_SUB      = 0x32,
+    NM_DIVU     = 0x33,
+    NM_RDHWR    = 0x38,
+    NM_SUBU     = 0x3a,
+    NM_MODU     = 0x3b,
+    NM_P_CMOVE  = 0x42,
+    NM_FORK     = 0x45,
+    NM_MFTR     = 0x46,
+    NM_MFHTR    = 0x47,
+    NM_AND      = 0x4a,
+    NM_YIELD    = 0x4d,
+    NM_MTTR     = 0x4e,
+    NM_MTHTR    = 0x4f,
+    NM_OR       = 0x52,
+    NM_D_E_MT_VPE = 0x56,
+    NM_NOR      = 0x5a,
+    NM_XOR      = 0x62,
+    NM_SLT      = 0x6a,
+    NM_P_SLTU   = 0x72,
+    NM_SOV      = 0x7a,
+};
+
+/* CRC32 instruction pool */
+enum {
+    NM_CRC32B   = 0x00,
+    NM_CRC32H   = 0x01,
+    NM_CRC32W   = 0x02,
+    NM_CRC32CB  = 0x04,
+    NM_CRC32CH  = 0x05,
+    NM_CRC32CW  = 0x06,
+};
+
+/* POOL32A5 instruction pool */
+enum {
+    NM_CMP_EQ_PH        = 0x00,
+    NM_CMP_LT_PH        = 0x08,
+    NM_CMP_LE_PH        = 0x10,
+    NM_CMPGU_EQ_QB      = 0x18,
+    NM_CMPGU_LT_QB      = 0x20,
+    NM_CMPGU_LE_QB      = 0x28,
+    NM_CMPGDU_EQ_QB     = 0x30,
+    NM_CMPGDU_LT_QB     = 0x38,
+    NM_CMPGDU_LE_QB     = 0x40,
+    NM_CMPU_EQ_QB       = 0x48,
+    NM_CMPU_LT_QB       = 0x50,
+    NM_CMPU_LE_QB       = 0x58,
+    NM_ADDQ_S_W         = 0x60,
+    NM_SUBQ_S_W         = 0x68,
+    NM_ADDSC            = 0x70,
+    NM_ADDWC            = 0x78,
+
+    NM_ADDQ_S_PH   = 0x01,
+    NM_ADDQH_R_PH  = 0x09,
+    NM_ADDQH_R_W   = 0x11,
+    NM_ADDU_S_QB   = 0x19,
+    NM_ADDU_S_PH   = 0x21,
+    NM_ADDUH_R_QB  = 0x29,
+    NM_SHRAV_R_PH  = 0x31,
+    NM_SHRAV_R_QB  = 0x39,
+    NM_SUBQ_S_PH   = 0x41,
+    NM_SUBQH_R_PH  = 0x49,
+    NM_SUBQH_R_W   = 0x51,
+    NM_SUBU_S_QB   = 0x59,
+    NM_SUBU_S_PH   = 0x61,
+    NM_SUBUH_R_QB  = 0x69,
+    NM_SHLLV_S_PH  = 0x71,
+    NM_PRECR_SRA_R_PH_W = 0x79,
+
+    NM_MULEU_S_PH_QBL   = 0x12,
+    NM_MULEU_S_PH_QBR   = 0x1a,
+    NM_MULQ_RS_PH       = 0x22,
+    NM_MULQ_S_PH        = 0x2a,
+    NM_MULQ_RS_W        = 0x32,
+    NM_MULQ_S_W         = 0x3a,
+    NM_APPEND           = 0x42,
+    NM_MODSUB           = 0x52,
+    NM_SHRAV_R_W        = 0x5a,
+    NM_SHRLV_PH         = 0x62,
+    NM_SHRLV_QB         = 0x6a,
+    NM_SHLLV_QB         = 0x72,
+    NM_SHLLV_S_W        = 0x7a,
+
+    NM_SHILO            = 0x03,
+
+    NM_MULEQ_S_W_PHL    = 0x04,
+    NM_MULEQ_S_W_PHR    = 0x0c,
+
+    NM_MUL_S_PH         = 0x05,
+    NM_PRECR_QB_PH      = 0x0d,
+    NM_PRECRQ_QB_PH     = 0x15,
+    NM_PRECRQ_PH_W      = 0x1d,
+    NM_PRECRQ_RS_PH_W   = 0x25,
+    NM_PRECRQU_S_QB_PH  = 0x2d,
+    NM_PACKRL_PH        = 0x35,
+    NM_PICK_QB          = 0x3d,
+    NM_PICK_PH          = 0x45,
+
+    NM_SHRA_R_W         = 0x5e,
+    NM_SHRA_R_PH        = 0x66,
+    NM_SHLL_S_PH        = 0x76,
+    NM_SHLL_S_W         = 0x7e,
+
+    NM_REPL_PH          = 0x07
+};
+
+/* POOL32A7 instruction pool */
+enum {
+    NM_P_LSX        = 0x00,
+    NM_LSA          = 0x01,
+    NM_EXTW         = 0x03,
+    NM_POOL32AXF    = 0x07,
+};
+
+/* P.SR instruction pool */
+enum {
+    NM_PP_SR           = 0x00,
+    NM_P_SR_F          = 0x01,
+};
+
+/* P.SHIFT instruction pool */
+enum {
+    NM_P_SLL        = 0x00,
+    NM_SRL          = 0x02,
+    NM_SRA          = 0x04,
+    NM_ROTR         = 0x06,
+};
+
+/* P.ROTX instruction pool */
+enum {
+    NM_ROTX         = 0x00,
+};
+
+/* P.INS instruction pool */
+enum {
+    NM_INS          = 0x00,
+};
+
+/* P.EXT instruction pool */
+enum {
+    NM_EXT          = 0x00,
+};
+
+/* POOL32F_0 (fmt) instruction pool */
+enum {
+    NM_RINT_S              = 0x04,
+    NM_RINT_D              = 0x44,
+    NM_ADD_S               = 0x06,
+    NM_SELEQZ_S            = 0x07,
+    NM_SELEQZ_D            = 0x47,
+    NM_CLASS_S             = 0x0c,
+    NM_CLASS_D             = 0x4c,
+    NM_SUB_S               = 0x0e,
+    NM_SELNEZ_S            = 0x0f,
+    NM_SELNEZ_D            = 0x4f,
+    NM_MUL_S               = 0x16,
+    NM_SEL_S               = 0x17,
+    NM_SEL_D               = 0x57,
+    NM_DIV_S               = 0x1e,
+    NM_ADD_D               = 0x26,
+    NM_SUB_D               = 0x2e,
+    NM_MUL_D               = 0x36,
+    NM_MADDF_S             = 0x37,
+    NM_MADDF_D             = 0x77,
+    NM_DIV_D               = 0x3e,
+    NM_MSUBF_S             = 0x3f,
+    NM_MSUBF_D             = 0x7f,
+};
+
+/* POOL32F_3  instruction pool */
+enum {
+    NM_MIN_FMT         = 0x00,
+    NM_MAX_FMT         = 0x01,
+    NM_MINA_FMT        = 0x04,
+    NM_MAXA_FMT        = 0x05,
+    NM_POOL32FXF       = 0x07,
+};
+
+/* POOL32F_5  instruction pool */
+enum {
+    NM_CMP_CONDN_S     = 0x00,
+    NM_CMP_CONDN_D     = 0x02,
+};
+
+/* P.GP.LH instruction pool */
+enum {
+    NM_LHGP    = 0x00,
+    NM_LHUGP   = 0x01,
+};
+
+/* P.GP.SH instruction pool */
+enum {
+    NM_SHGP    = 0x00,
+};
+
+/* P.GP.CP1 instruction pool */
+enum {
+    NM_LWC1GP       = 0x00,
+    NM_SWC1GP       = 0x01,
+    NM_LDC1GP       = 0x02,
+    NM_SDC1GP       = 0x03,
+};
+
+/* P.LS.S0 instruction pool */
+enum {
+    NM_LBS9     = 0x00,
+    NM_LHS9     = 0x04,
+    NM_LWS9     = 0x08,
+    NM_LDS9     = 0x0c,
+
+    NM_SBS9     = 0x01,
+    NM_SHS9     = 0x05,
+    NM_SWS9     = 0x09,
+    NM_SDS9     = 0x0d,
+
+    NM_LBUS9    = 0x02,
+    NM_LHUS9    = 0x06,
+    NM_LWC1S9   = 0x0a,
+    NM_LDC1S9   = 0x0e,
+
+    NM_P_PREFS9 = 0x03,
+    NM_LWUS9    = 0x07,
+    NM_SWC1S9   = 0x0b,
+    NM_SDC1S9   = 0x0f,
+};
+
+/* P.LS.S1 instruction pool */
+enum {
+    NM_ASET_ACLR = 0x02,
+    NM_UALH      = 0x04,
+    NM_UASH      = 0x05,
+    NM_CACHE     = 0x07,
+    NM_P_LL      = 0x0a,
+    NM_P_SC      = 0x0b,
+};
+
+/* P.LS.E0 instruction pool */
+enum {
+    NM_LBE      = 0x00,
+    NM_SBE      = 0x01,
+    NM_LBUE     = 0x02,
+    NM_P_PREFE  = 0x03,
+    NM_LHE      = 0x04,
+    NM_SHE      = 0x05,
+    NM_LHUE     = 0x06,
+    NM_CACHEE   = 0x07,
+    NM_LWE      = 0x08,
+    NM_SWE      = 0x09,
+    NM_P_LLE    = 0x0a,
+    NM_P_SCE    = 0x0b,
+};
+
+/* P.PREFE instruction pool */
+enum {
+    NM_SYNCIE   = 0x00,
+    NM_PREFE    = 0x01,
+};
+
+/* P.LLE instruction pool */
+enum {
+    NM_LLE      = 0x00,
+    NM_LLWPE    = 0x01,
+};
+
+/* P.SCE instruction pool */
+enum {
+    NM_SCE      = 0x00,
+    NM_SCWPE    = 0x01,
+};
+
+/* P.LS.WM instruction pool */
+enum {
+    NM_LWM       = 0x00,
+    NM_SWM       = 0x01,
+};
+
+/* P.LS.UAWM instruction pool */
+enum {
+    NM_UALWM       = 0x00,
+    NM_UASWM       = 0x01,
+};
+
+/* P.BR3A instruction pool */
+enum {
+    NM_BC1EQZC          = 0x00,
+    NM_BC1NEZC          = 0x01,
+    NM_BC2EQZC          = 0x02,
+    NM_BC2NEZC          = 0x03,
+    NM_BPOSGE32C        = 0x04,
+};
+
+/* P16.RI instruction pool */
+enum {
+    NM_P16_SYSCALL  = 0x01,
+    NM_BREAK16      = 0x02,
+    NM_SDBBP16      = 0x03,
+};
+
+/* POOL16C_0 instruction pool */
+enum {
+    NM_POOL16C_00      = 0x00,
+};
+
+/* P16.JRC instruction pool */
+enum {
+    NM_JRC          = 0x00,
+    NM_JALRC16      = 0x01,
+};
+
+/* P.SYSCALL instruction pool */
+enum {
+    NM_SYSCALL      = 0x00,
+    NM_HYPCALL      = 0x01,
+};
+
+/* P.TRAP instruction pool */
+enum {
+    NM_TEQ          = 0x00,
+    NM_TNE          = 0x01,
+};
+
+/* P.CMOVE instruction pool */
+enum {
+    NM_MOVZ            = 0x00,
+    NM_MOVN            = 0x01,
+};
+
+/* POOL32Axf instruction pool */
+enum {
+    NM_POOL32AXF_1 = 0x01,
+    NM_POOL32AXF_2 = 0x02,
+    NM_POOL32AXF_4 = 0x04,
+    NM_POOL32AXF_5 = 0x05,
+    NM_POOL32AXF_7 = 0x07,
+};
+
+/* POOL32Axf_1 instruction pool */
+enum {
+    NM_POOL32AXF_1_0 = 0x00,
+    NM_POOL32AXF_1_1 = 0x01,
+    NM_POOL32AXF_1_3 = 0x03,
+    NM_POOL32AXF_1_4 = 0x04,
+    NM_POOL32AXF_1_5 = 0x05,
+    NM_POOL32AXF_1_7 = 0x07,
+};
+
+/* POOL32Axf_2 instruction pool */
+enum {
+    NM_POOL32AXF_2_0_7     = 0x00,
+    NM_POOL32AXF_2_8_15    = 0x01,
+    NM_POOL32AXF_2_16_23   = 0x02,
+    NM_POOL32AXF_2_24_31   = 0x03,
+};
+
+/* POOL32Axf_7 instruction pool */
+enum {
+    NM_SHRA_R_QB    = 0x0,
+    NM_SHRL_PH      = 0x1,
+    NM_REPL_QB      = 0x2,
+};
+
+/* POOL32Axf_1_0 instruction pool */
+enum {
+    NM_MFHI = 0x0,
+    NM_MFLO = 0x1,
+    NM_MTHI = 0x2,
+    NM_MTLO = 0x3,
+};
+
+/* POOL32Axf_1_1 instruction pool */
+enum {
+    NM_MTHLIP = 0x0,
+    NM_SHILOV = 0x1,
+};
+
+/* POOL32Axf_1_3 instruction pool */
+enum {
+    NM_RDDSP    = 0x0,
+    NM_WRDSP    = 0x1,
+    NM_EXTP     = 0x2,
+    NM_EXTPDP   = 0x3,
+};
+
+/* POOL32Axf_1_4 instruction pool */
+enum {
+    NM_SHLL_QB  = 0x0,
+    NM_SHRL_QB  = 0x1,
+};
+
+/* POOL32Axf_1_5 instruction pool */
+enum {
+    NM_MAQ_S_W_PHR   = 0x0,
+    NM_MAQ_S_W_PHL   = 0x1,
+    NM_MAQ_SA_W_PHR  = 0x2,
+    NM_MAQ_SA_W_PHL  = 0x3,
+};
+
+/* POOL32Axf_1_7 instruction pool */
+enum {
+    NM_EXTR_W       = 0x0,
+    NM_EXTR_R_W     = 0x1,
+    NM_EXTR_RS_W    = 0x2,
+    NM_EXTR_S_H     = 0x3,
+};
+
+/* POOL32Axf_2_0_7 instruction pool */
+enum {
+    NM_DPA_W_PH     = 0x0,
+    NM_DPAQ_S_W_PH  = 0x1,
+    NM_DPS_W_PH     = 0x2,
+    NM_DPSQ_S_W_PH  = 0x3,
+    NM_BALIGN       = 0x4,
+    NM_MADD         = 0x5,
+    NM_MULT         = 0x6,
+    NM_EXTRV_W      = 0x7,
+};
+
+/* POOL32Axf_2_8_15 instruction pool */
+enum {
+    NM_DPAX_W_PH    = 0x0,
+    NM_DPAQ_SA_L_W  = 0x1,
+    NM_DPSX_W_PH    = 0x2,
+    NM_DPSQ_SA_L_W  = 0x3,
+    NM_MADDU        = 0x5,
+    NM_MULTU        = 0x6,
+    NM_EXTRV_R_W    = 0x7,
+};
+
+/* POOL32Axf_2_16_23 instruction pool */
+enum {
+    NM_DPAU_H_QBL       = 0x0,
+    NM_DPAQX_S_W_PH     = 0x1,
+    NM_DPSU_H_QBL       = 0x2,
+    NM_DPSQX_S_W_PH     = 0x3,
+    NM_EXTPV            = 0x4,
+    NM_MSUB             = 0x5,
+    NM_MULSA_W_PH       = 0x6,
+    NM_EXTRV_RS_W       = 0x7,
+};
+
+/* POOL32Axf_2_24_31 instruction pool */
+enum {
+    NM_DPAU_H_QBR       = 0x0,
+    NM_DPAQX_SA_W_PH    = 0x1,
+    NM_DPSU_H_QBR       = 0x2,
+    NM_DPSQX_SA_W_PH    = 0x3,
+    NM_EXTPDPV          = 0x4,
+    NM_MSUBU            = 0x5,
+    NM_MULSAQ_S_W_PH    = 0x6,
+    NM_EXTRV_S_H        = 0x7,
+};
+
+/* POOL32Axf_{4, 5} instruction pool */
+enum {
+    NM_CLO      = 0x25,
+    NM_CLZ      = 0x2d,
+
+    NM_TLBP     = 0x01,
+    NM_TLBR     = 0x09,
+    NM_TLBWI    = 0x11,
+    NM_TLBWR    = 0x19,
+    NM_TLBINV   = 0x03,
+    NM_TLBINVF  = 0x0b,
+    NM_DI       = 0x23,
+    NM_EI       = 0x2b,
+    NM_RDPGPR   = 0x70,
+    NM_WRPGPR   = 0x78,
+    NM_WAIT     = 0x61,
+    NM_DERET    = 0x71,
+    NM_ERETX    = 0x79,
+
+    /* nanoMIPS DSP instructions */
+    NM_ABSQ_S_QB        = 0x00,
+    NM_ABSQ_S_PH        = 0x08,
+    NM_ABSQ_S_W         = 0x10,
+    NM_PRECEQ_W_PHL     = 0x28,
+    NM_PRECEQ_W_PHR     = 0x30,
+    NM_PRECEQU_PH_QBL   = 0x38,
+    NM_PRECEQU_PH_QBR   = 0x48,
+    NM_PRECEU_PH_QBL    = 0x58,
+    NM_PRECEU_PH_QBR    = 0x68,
+    NM_PRECEQU_PH_QBLA  = 0x39,
+    NM_PRECEQU_PH_QBRA  = 0x49,
+    NM_PRECEU_PH_QBLA   = 0x59,
+    NM_PRECEU_PH_QBRA   = 0x69,
+    NM_REPLV_PH         = 0x01,
+    NM_REPLV_QB         = 0x09,
+    NM_BITREV           = 0x18,
+    NM_INSV             = 0x20,
+    NM_RADDU_W_QB       = 0x78,
+
+    NM_BITSWAP          = 0x05,
+    NM_WSBH             = 0x3d,
+};
+
+/* PP.SR instruction pool */
+enum {
+    NM_SAVE         = 0x00,
+    NM_RESTORE      = 0x02,
+    NM_RESTORE_JRC  = 0x03,
+};
+
+/* P.SR.F instruction pool */
+enum {
+    NM_SAVEF        = 0x00,
+    NM_RESTOREF     = 0x01,
+};
+
+/* P16.SYSCALL  instruction pool */
+enum {
+    NM_SYSCALL16     = 0x00,
+    NM_HYPCALL16     = 0x01,
+};
+
+/* POOL16C_00 instruction pool */
+enum {
+    NM_NOT16           = 0x00,
+    NM_XOR16           = 0x01,
+    NM_AND16           = 0x02,
+    NM_OR16            = 0x03,
+};
+
+/* PP.LSX and PP.LSXS instruction pool */
+enum {
+    NM_LBX      = 0x00,
+    NM_LHX      = 0x04,
+    NM_LWX      = 0x08,
+    NM_LDX      = 0x0c,
+
+    NM_SBX      = 0x01,
+    NM_SHX      = 0x05,
+    NM_SWX      = 0x09,
+    NM_SDX      = 0x0d,
+
+    NM_LBUX     = 0x02,
+    NM_LHUX     = 0x06,
+    NM_LWC1X    = 0x0a,
+    NM_LDC1X    = 0x0e,
+
+    NM_LWUX     = 0x07,
+    NM_SWC1X    = 0x0b,
+    NM_SDC1X    = 0x0f,
+
+    NM_LHXS     = 0x04,
+    NM_LWXS     = 0x08,
+    NM_LDXS     = 0x0c,
+
+    NM_SHXS     = 0x05,
+    NM_SWXS     = 0x09,
+    NM_SDXS     = 0x0d,
+
+    NM_LHUXS    = 0x06,
+    NM_LWC1XS   = 0x0a,
+    NM_LDC1XS   = 0x0e,
+
+    NM_LWUXS    = 0x07,
+    NM_SWC1XS   = 0x0b,
+    NM_SDC1XS   = 0x0f,
+};
+
+/* ERETx instruction pool */
+enum {
+    NM_ERET     = 0x00,
+    NM_ERETNC   = 0x01,
+};
+
+/* POOL32FxF_{0, 1} insturction pool */
+enum {
+    NM_CFC1     = 0x40,
+    NM_CTC1     = 0x60,
+    NM_MFC1     = 0x80,
+    NM_MTC1     = 0xa0,
+    NM_MFHC1    = 0xc0,
+    NM_MTHC1    = 0xe0,
+
+    NM_CVT_S_PL = 0x84,
+    NM_CVT_S_PU = 0xa4,
+
+    NM_CVT_L_S     = 0x004,
+    NM_CVT_L_D     = 0x104,
+    NM_CVT_W_S     = 0x024,
+    NM_CVT_W_D     = 0x124,
+
+    NM_RSQRT_S     = 0x008,
+    NM_RSQRT_D     = 0x108,
+
+    NM_SQRT_S      = 0x028,
+    NM_SQRT_D      = 0x128,
+
+    NM_RECIP_S     = 0x048,
+    NM_RECIP_D     = 0x148,
+
+    NM_FLOOR_L_S   = 0x00c,
+    NM_FLOOR_L_D   = 0x10c,
+
+    NM_FLOOR_W_S   = 0x02c,
+    NM_FLOOR_W_D   = 0x12c,
+
+    NM_CEIL_L_S    = 0x04c,
+    NM_CEIL_L_D    = 0x14c,
+    NM_CEIL_W_S    = 0x06c,
+    NM_CEIL_W_D    = 0x16c,
+    NM_TRUNC_L_S   = 0x08c,
+    NM_TRUNC_L_D   = 0x18c,
+    NM_TRUNC_W_S   = 0x0ac,
+    NM_TRUNC_W_D   = 0x1ac,
+    NM_ROUND_L_S   = 0x0cc,
+    NM_ROUND_L_D   = 0x1cc,
+    NM_ROUND_W_S   = 0x0ec,
+    NM_ROUND_W_D   = 0x1ec,
+
+    NM_MOV_S       = 0x01,
+    NM_MOV_D       = 0x81,
+    NM_ABS_S       = 0x0d,
+    NM_ABS_D       = 0x8d,
+    NM_NEG_S       = 0x2d,
+    NM_NEG_D       = 0xad,
+    NM_CVT_D_S     = 0x04d,
+    NM_CVT_D_W     = 0x0cd,
+    NM_CVT_D_L     = 0x14d,
+    NM_CVT_S_D     = 0x06d,
+    NM_CVT_S_W     = 0x0ed,
+    NM_CVT_S_L     = 0x16d,
+};
+
+/* P.LL instruction pool */
+enum {
+    NM_LL       = 0x00,
+    NM_LLWP     = 0x01,
+};
+
+/* P.SC instruction pool */
+enum {
+    NM_SC       = 0x00,
+    NM_SCWP     = 0x01,
+};
+
+/* P.DVP instruction pool */
+enum {
+    NM_DVP      = 0x00,
+    NM_EVP      = 0x01,
+};
+
+
+/*
+ *
+ * nanoMIPS decoding engine
+ *
+ */
+
+
+/* extraction utilities */
+
+#define NANOMIPS_EXTRACT_RT3(op) ((op >> 7) & 0x7)
+#define NANOMIPS_EXTRACT_RS3(op) ((op >> 4) & 0x7)
+#define NANOMIPS_EXTRACT_RD3(op) ((op >> 1) & 0x7)
+#define NANOMIPS_EXTRACT_RD5(op) ((op >> 5) & 0x1f)
+#define NANOMIPS_EXTRACT_RS5(op) (op & 0x1f)
+
+/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr3'). */
+static inline int decode_gpr_gpr3(int r)
+{
+    static const int map[] = { 16, 17, 18, 19,  4,  5,  6,  7 };
+
+    return map[r & 0x7];
+}
+
+/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr3.src.store'). */
+static inline int decode_gpr_gpr3_src_store(int r)
+{
+    static const int map[] = {  0, 17, 18, 19,  4,  5,  6,  7 };
+
+    return map[r & 0x7];
+}
+
+/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr4'). */
+static inline int decode_gpr_gpr4(int r)
+{
+    static const int map[] = {  8,  9, 10, 11,  4,  5,  6,  7,
+                               16, 17, 18, 19, 20, 21, 22, 23 };
+
+    return map[r & 0xf];
+}
+
+/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr4.zero'). */
+static inline int decode_gpr_gpr4_zero(int r)
+{
+    static const int map[] = {  8,  9, 10,  0,  4,  5,  6,  7,
+                               16, 17, 18, 19, 20, 21, 22, 23 };
+
+    return map[r & 0xf];
+}
+
+static void gen_ext(DisasContext *ctx, int wordsz, int rd, int rs, int rt,
+                    int shift)
+{
+    gen_align_bits(ctx, wordsz, rd, rs, rt, wordsz - shift);
+}
+
+static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
+                    uint32_t reg1, uint32_t reg2)
+{
+    TCGv taddr = tcg_temp_new();
+    TCGv_i64 tval = tcg_temp_new_i64();
+    TCGv tmp1 = tcg_temp_new();
+    TCGv tmp2 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, taddr, base, offset);
+    tcg_gen_qemu_ld64(tval, taddr, ctx->mem_idx);
+    if (cpu_is_bigendian(ctx)) {
+        tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
+    } else {
+        tcg_gen_extr_i64_tl(tmp1, tmp2, tval);
+    }
+    gen_store_gpr(tmp1, reg1);
+    tcg_temp_free(tmp1);
+    gen_store_gpr(tmp2, reg2);
+    tcg_temp_free(tmp2);
+    tcg_gen_st_i64(tval, cpu_env, offsetof(CPUMIPSState, llval_wp));
+    tcg_temp_free_i64(tval);
+    tcg_gen_st_tl(taddr, cpu_env, offsetof(CPUMIPSState, lladdr));
+    tcg_temp_free(taddr);
+}
+
+static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
+                    uint32_t reg1, uint32_t reg2, bool eva)
+{
+    TCGv taddr = tcg_temp_local_new();
+    TCGv lladdr = tcg_temp_local_new();
+    TCGv_i64 tval = tcg_temp_new_i64();
+    TCGv_i64 llval = tcg_temp_new_i64();
+    TCGv_i64 val = tcg_temp_new_i64();
+    TCGv tmp1 = tcg_temp_new();
+    TCGv tmp2 = tcg_temp_new();
+    TCGLabel *lab_fail = gen_new_label();
+    TCGLabel *lab_done = gen_new_label();
+
+    gen_base_offset_addr(ctx, taddr, base, offset);
+
+    tcg_gen_ld_tl(lladdr, cpu_env, offsetof(CPUMIPSState, lladdr));
+    tcg_gen_brcond_tl(TCG_COND_NE, taddr, lladdr, lab_fail);
+
+    gen_load_gpr(tmp1, reg1);
+    gen_load_gpr(tmp2, reg2);
+
+    if (cpu_is_bigendian(ctx)) {
+        tcg_gen_concat_tl_i64(tval, tmp2, tmp1);
+    } else {
+        tcg_gen_concat_tl_i64(tval, tmp1, tmp2);
+    }
+
+    tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
+    tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
+                               eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
+    if (reg1 != 0) {
+        tcg_gen_movi_tl(cpu_gpr[reg1], 1);
+    }
+    tcg_gen_brcond_i64(TCG_COND_EQ, val, llval, lab_done);
+
+    gen_set_label(lab_fail);
+
+    if (reg1 != 0) {
+        tcg_gen_movi_tl(cpu_gpr[reg1], 0);
+    }
+    gen_set_label(lab_done);
+    tcg_gen_movi_tl(lladdr, -1);
+    tcg_gen_st_tl(lladdr, cpu_env, offsetof(CPUMIPSState, lladdr));
+}
+
+static void gen_adjust_sp(DisasContext *ctx, int u)
+{
+    gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], u);
+}
+
+static void gen_save(DisasContext *ctx, uint8_t rt, uint8_t count,
+                     uint8_t gp, uint16_t u)
+{
+    int counter = 0;
+    TCGv va = tcg_temp_new();
+    TCGv t0 = tcg_temp_new();
+
+    while (counter != count) {
+        bool use_gp = gp && (counter == count - 1);
+        int this_rt = use_gp ? 28 : (rt & 0x10) | ((rt + counter) & 0x1f);
+        int this_offset = -((counter + 1) << 2);
+        gen_base_offset_addr(ctx, va, 29, this_offset);
+        gen_load_gpr(t0, this_rt);
+        tcg_gen_qemu_st_tl(t0, va, ctx->mem_idx,
+                           (MO_TEUL | ctx->default_tcg_memop_mask));
+        counter++;
+    }
+
+    /* adjust stack pointer */
+    gen_adjust_sp(ctx, -u);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(va);
+}
+
+static void gen_restore(DisasContext *ctx, uint8_t rt, uint8_t count,
+                        uint8_t gp, uint16_t u)
+{
+    int counter = 0;
+    TCGv va = tcg_temp_new();
+    TCGv t0 = tcg_temp_new();
+
+    while (counter != count) {
+        bool use_gp = gp && (counter == count - 1);
+        int this_rt = use_gp ? 28 : (rt & 0x10) | ((rt + counter) & 0x1f);
+        int this_offset = u - ((counter + 1) << 2);
+        gen_base_offset_addr(ctx, va, 29, this_offset);
+        tcg_gen_qemu_ld_tl(t0, va, ctx->mem_idx, MO_TESL |
+                        ctx->default_tcg_memop_mask);
+        tcg_gen_ext32s_tl(t0, t0);
+        gen_store_gpr(t0, this_rt);
+        counter++;
+    }
+
+    /* adjust stack pointer */
+    gen_adjust_sp(ctx, u);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(va);
+}
+
+static void gen_compute_branch_nm(DisasContext *ctx, uint32_t opc,
+                                  int insn_bytes,
+                                  int rs, int rt, int32_t offset)
+{
+    target_ulong btgt = -1;
+    int bcond_compute = 0;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    /* Load needed operands */
+    switch (opc) {
+    case OPC_BEQ:
+    case OPC_BNE:
+        /* Compare two registers */
+        if (rs != rt) {
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            bcond_compute = 1;
+        }
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_BGEZAL:
+        /* Compare to zero */
+        if (rs != 0) {
+            gen_load_gpr(t0, rs);
+            bcond_compute = 1;
+        }
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_BPOSGE32:
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
+        bcond_compute = 1;
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_JR:
+    case OPC_JALR:
+        /* Jump to register */
+        if (offset != 0 && offset != 16) {
+            /*
+             * Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
+             * others are reserved.
+             */
+            MIPS_INVAL("jump hint");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+        gen_load_gpr(btarget, rs);
+        break;
+    default:
+        MIPS_INVAL("branch/jump");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+    if (bcond_compute == 0) {
+        /* No condition to be computed */
+        switch (opc) {
+        case OPC_BEQ:     /* rx == rx        */
+            /* Always take */
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_BGEZAL:  /* 0 >= 0          */
+            /* Always take and link */
+            tcg_gen_movi_tl(cpu_gpr[31],
+                            ctx->base.pc_next + insn_bytes);
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_BNE:     /* rx != rx        */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 8);
+            /* Skip the instruction in the delay slot */
+            ctx->base.pc_next += 4;
+            goto out;
+        case OPC_JR:
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        case OPC_JALR:
+            if (rt > 0) {
+                tcg_gen_movi_tl(cpu_gpr[rt],
+                                ctx->base.pc_next + insn_bytes);
+            }
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        default:
+            MIPS_INVAL("branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+    } else {
+        switch (opc) {
+        case OPC_BEQ:
+            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
+            goto not_likely;
+        case OPC_BNE:
+            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
+            goto not_likely;
+        case OPC_BGEZAL:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
+            tcg_gen_movi_tl(cpu_gpr[31],
+                            ctx->base.pc_next + insn_bytes);
+            goto not_likely;
+        case OPC_BPOSGE32:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
+        not_likely:
+            ctx->hflags |= MIPS_HFLAG_BC;
+            break;
+        default:
+            MIPS_INVAL("conditional branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+    }
+
+    ctx->btarget = btgt;
+
+ out:
+    if (insn_bytes == 2) {
+        ctx->hflags |= MIPS_HFLAG_B16;
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_pool16c_nanomips_insn(DisasContext *ctx)
+{
+    int rt = decode_gpr_gpr3(NANOMIPS_EXTRACT_RT3(ctx->opcode));
+    int rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
+
+    switch (extract32(ctx->opcode, 2, 2)) {
+    case NM_NOT16:
+        gen_logic(ctx, OPC_NOR, rt, rs, 0);
+        break;
+    case NM_AND16:
+        gen_logic(ctx, OPC_AND, rt, rt, rs);
+        break;
+    case NM_XOR16:
+        gen_logic(ctx, OPC_XOR, rt, rt, rs);
+        break;
+    case NM_OR16:
+        gen_logic(ctx, OPC_OR, rt, rt, rs);
+        break;
+    }
+}
+
+static void gen_pool32a0_nanomips_insn(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rt = extract32(ctx->opcode, 21, 5);
+    int rs = extract32(ctx->opcode, 16, 5);
+    int rd = extract32(ctx->opcode, 11, 5);
+
+    switch (extract32(ctx->opcode, 3, 7)) {
+    case NM_P_TRAP:
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case NM_TEQ:
+            check_nms(ctx);
+            gen_trap(ctx, OPC_TEQ, rs, rt, -1);
+            break;
+        case NM_TNE:
+            check_nms(ctx);
+            gen_trap(ctx, OPC_TNE, rs, rt, -1);
+            break;
+        }
+        break;
+    case NM_RDHWR:
+        check_nms(ctx);
+        gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
+        break;
+    case NM_SEB:
+        check_nms(ctx);
+        gen_bshfl(ctx, OPC_SEB, rs, rt);
+        break;
+    case NM_SEH:
+        gen_bshfl(ctx, OPC_SEH, rs, rt);
+        break;
+    case NM_SLLV:
+        gen_shift(ctx, OPC_SLLV, rd, rt, rs);
+        break;
+    case NM_SRLV:
+        gen_shift(ctx, OPC_SRLV, rd, rt, rs);
+        break;
+    case NM_SRAV:
+        gen_shift(ctx, OPC_SRAV, rd, rt, rs);
+        break;
+    case NM_ROTRV:
+        gen_shift(ctx, OPC_ROTRV, rd, rt, rs);
+        break;
+    case NM_ADD:
+        gen_arith(ctx, OPC_ADD, rd, rs, rt);
+        break;
+    case NM_ADDU:
+        gen_arith(ctx, OPC_ADDU, rd, rs, rt);
+        break;
+    case NM_SUB:
+        check_nms(ctx);
+        gen_arith(ctx, OPC_SUB, rd, rs, rt);
+        break;
+    case NM_SUBU:
+        gen_arith(ctx, OPC_SUBU, rd, rs, rt);
+        break;
+    case NM_P_CMOVE:
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case NM_MOVZ:
+            gen_cond_move(ctx, OPC_MOVZ, rd, rs, rt);
+            break;
+        case NM_MOVN:
+            gen_cond_move(ctx, OPC_MOVN, rd, rs, rt);
+            break;
+        }
+        break;
+    case NM_AND:
+        gen_logic(ctx, OPC_AND, rd, rs, rt);
+        break;
+    case NM_OR:
+        gen_logic(ctx, OPC_OR, rd, rs, rt);
+        break;
+    case NM_NOR:
+        gen_logic(ctx, OPC_NOR, rd, rs, rt);
+        break;
+    case NM_XOR:
+        gen_logic(ctx, OPC_XOR, rd, rs, rt);
+        break;
+    case NM_SLT:
+        gen_slt(ctx, OPC_SLT, rd, rs, rt);
+        break;
+    case NM_P_SLTU:
+        if (rd == 0) {
+            /* P_DVP */
+#ifndef CONFIG_USER_ONLY
+            TCGv t0 = tcg_temp_new();
+            switch (extract32(ctx->opcode, 10, 1)) {
+            case NM_DVP:
+                if (ctx->vp) {
+                    check_cp0_enabled(ctx);
+                    gen_helper_dvp(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                }
+                break;
+            case NM_EVP:
+                if (ctx->vp) {
+                    check_cp0_enabled(ctx);
+                    gen_helper_evp(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                }
+                break;
+            }
+            tcg_temp_free(t0);
+#endif
+        } else {
+            gen_slt(ctx, OPC_SLTU, rd, rs, rt);
+        }
+        break;
+    case NM_SOV:
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_add_tl(t0, t1, t2);
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_xor_tl(t1, t1, t2);
+            tcg_gen_xor_tl(t2, t0, t2);
+            tcg_gen_andc_tl(t1, t2, t1);
+
+            /* operands of same sign, result different sign */
+            tcg_gen_setcondi_tl(TCG_COND_LT, t0, t1, 0);
+            gen_store_gpr(t0, rd);
+
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            tcg_temp_free(t2);
+        }
+        break;
+    case NM_MUL:
+        gen_r6_muldiv(ctx, R6_OPC_MUL, rd, rs, rt);
+        break;
+    case NM_MUH:
+        gen_r6_muldiv(ctx, R6_OPC_MUH, rd, rs, rt);
+        break;
+    case NM_MULU:
+        gen_r6_muldiv(ctx, R6_OPC_MULU, rd, rs, rt);
+        break;
+    case NM_MUHU:
+        gen_r6_muldiv(ctx, R6_OPC_MUHU, rd, rs, rt);
+        break;
+    case NM_DIV:
+        gen_r6_muldiv(ctx, R6_OPC_DIV, rd, rs, rt);
+        break;
+    case NM_MOD:
+        gen_r6_muldiv(ctx, R6_OPC_MOD, rd, rs, rt);
+        break;
+    case NM_DIVU:
+        gen_r6_muldiv(ctx, R6_OPC_DIVU, rd, rs, rt);
+        break;
+    case NM_MODU:
+        gen_r6_muldiv(ctx, R6_OPC_MODU, rd, rs, rt);
+        break;
+#ifndef CONFIG_USER_ONLY
+    case NM_MFC0:
+        check_cp0_enabled(ctx);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            break;
+        }
+        gen_mfc0(ctx, cpu_gpr[rt], rs, extract32(ctx->opcode, 11, 3));
+        break;
+    case NM_MTC0:
+        check_cp0_enabled(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_mtc0(ctx, t0, rs, extract32(ctx->opcode, 11, 3));
+            tcg_temp_free(t0);
+        }
+        break;
+    case NM_D_E_MT_VPE:
+        {
+            uint8_t sc = extract32(ctx->opcode, 10, 1);
+            TCGv t0 = tcg_temp_new();
+
+            switch (sc) {
+            case 0:
+                if (rs == 1) {
+                    /* DMT */
+                    check_cp0_mt(ctx);
+                    gen_helper_dmt(t0);
+                    gen_store_gpr(t0, rt);
+                } else if (rs == 0) {
+                    /* DVPE */
+                    check_cp0_mt(ctx);
+                    gen_helper_dvpe(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                } else {
+                    gen_reserved_instruction(ctx);
+                }
+                break;
+            case 1:
+                if (rs == 1) {
+                    /* EMT */
+                    check_cp0_mt(ctx);
+                    gen_helper_emt(t0);
+                    gen_store_gpr(t0, rt);
+                } else if (rs == 0) {
+                    /* EVPE */
+                    check_cp0_mt(ctx);
+                    gen_helper_evpe(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                } else {
+                    gen_reserved_instruction(ctx);
+                }
+                break;
+            }
+
+            tcg_temp_free(t0);
+        }
+        break;
+    case NM_FORK:
+        check_mt(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_load_gpr(t1, rs);
+            gen_helper_fork(t0, t1);
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+        }
+        break;
+    case NM_MFTR:
+    case NM_MFHTR:
+        check_cp0_enabled(ctx);
+        if (rd == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_mftr(env, ctx, rs, rt, extract32(ctx->opcode, 10, 1),
+                 extract32(ctx->opcode, 11, 5), extract32(ctx->opcode, 3, 1));
+        break;
+    case NM_MTTR:
+    case NM_MTHTR:
+        check_cp0_enabled(ctx);
+        gen_mttr(env, ctx, rs, rt, extract32(ctx->opcode, 10, 1),
+                 extract32(ctx->opcode, 11, 5), extract32(ctx->opcode, 3, 1));
+        break;
+    case NM_YIELD:
+        check_mt(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rs);
+            gen_helper_yield(t0, cpu_env, t0);
+            gen_store_gpr(t0, rt);
+            tcg_temp_free(t0);
+        }
+        break;
+#endif
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+/* dsp */
+static void gen_pool32axf_1_5_nanomips_insn(DisasContext *ctx, uint32_t opc,
+                                            int ret, int v1, int v2)
+{
+    TCGv_i32 t0;
+    TCGv v0_t;
+    TCGv v1_t;
+
+    t0 = tcg_temp_new_i32();
+
+    v0_t = tcg_temp_new();
+    v1_t = tcg_temp_new();
+
+    tcg_gen_movi_i32(t0, v2 >> 3);
+
+    gen_load_gpr(v0_t, ret);
+    gen_load_gpr(v1_t, v1);
+
+    switch (opc) {
+    case NM_MAQ_S_W_PHR:
+        check_dsp(ctx);
+        gen_helper_maq_s_w_phr(t0, v1_t, v0_t, cpu_env);
+        break;
+    case NM_MAQ_S_W_PHL:
+        check_dsp(ctx);
+        gen_helper_maq_s_w_phl(t0, v1_t, v0_t, cpu_env);
+        break;
+    case NM_MAQ_SA_W_PHR:
+        check_dsp(ctx);
+        gen_helper_maq_sa_w_phr(t0, v1_t, v0_t, cpu_env);
+        break;
+    case NM_MAQ_SA_W_PHL:
+        check_dsp(ctx);
+        gen_helper_maq_sa_w_phl(t0, v1_t, v0_t, cpu_env);
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free_i32(t0);
+
+    tcg_temp_free(v0_t);
+    tcg_temp_free(v1_t);
+}
+
+
+static void gen_pool32axf_1_nanomips_insn(DisasContext *ctx, uint32_t opc,
+                                    int ret, int v1, int v2)
+{
+    int16_t imm;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv v0_t = tcg_temp_new();
+
+    gen_load_gpr(v0_t, v1);
+
+    switch (opc) {
+    case NM_POOL32AXF_1_0:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 12, 2)) {
+        case NM_MFHI:
+            gen_HILO(ctx, OPC_MFHI, v2 >> 3, ret);
+            break;
+        case NM_MFLO:
+            gen_HILO(ctx, OPC_MFLO, v2 >> 3, ret);
+            break;
+        case NM_MTHI:
+            gen_HILO(ctx, OPC_MTHI, v2 >> 3, v1);
+            break;
+        case NM_MTLO:
+            gen_HILO(ctx, OPC_MTLO, v2 >> 3, v1);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_1_1:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 12, 2)) {
+        case NM_MTHLIP:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_mthlip(t0, v0_t, cpu_env);
+            break;
+        case NM_SHILOV:
+            tcg_gen_movi_tl(t0, v2 >> 3);
+            gen_helper_shilo(t0, v0_t, cpu_env);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_1_3:
+        check_dsp(ctx);
+        imm = extract32(ctx->opcode, 14, 7);
+        switch (extract32(ctx->opcode, 12, 2)) {
+        case NM_RDDSP:
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_rddsp(t0, t0, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_WRDSP:
+            gen_load_gpr(t0, ret);
+            tcg_gen_movi_tl(t1, imm);
+            gen_helper_wrdsp(t0, t1, cpu_env);
+            break;
+        case NM_EXTP:
+            tcg_gen_movi_tl(t0, v2 >> 3);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extp(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_EXTPDP:
+            tcg_gen_movi_tl(t0, v2 >> 3);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extpdp(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_1_4:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, v2 >> 2);
+        switch (extract32(ctx->opcode, 12, 1)) {
+        case NM_SHLL_QB:
+            gen_helper_shll_qb(t0, t0, v0_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_SHRL_QB:
+            gen_helper_shrl_qb(t0, t0, v0_t);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_1_5:
+        opc = extract32(ctx->opcode, 12, 2);
+        gen_pool32axf_1_5_nanomips_insn(ctx, opc, ret, v1, v2);
+        break;
+    case NM_POOL32AXF_1_7:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, v2 >> 3);
+        tcg_gen_movi_tl(t1, v1);
+        switch (extract32(ctx->opcode, 12, 2)) {
+        case NM_EXTR_W:
+            gen_helper_extr_w(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_EXTR_R_W:
+            gen_helper_extr_r_w(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_EXTR_RS_W:
+            gen_helper_extr_rs_w(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_EXTR_S_H:
+            gen_helper_extr_s_h(t0, t0, t1, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(v0_t);
+}
+
+static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc,
+                                    TCGv v0, TCGv v1, int rd)
+{
+    TCGv_i32 t0;
+
+    t0 = tcg_temp_new_i32();
+
+    tcg_gen_movi_i32(t0, rd >> 3);
+
+    switch (opc) {
+    case NM_POOL32AXF_2_0_7:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpa_w_ph(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPS_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dps_w_ph(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPSQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpsq_s_w_ph(t0, v1, v0, cpu_env);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_8_15:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAX_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpax_w_ph(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPAQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPSX_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPSQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpsq_sa_l_w(t0, v0, v1, cpu_env);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_16_23:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPAQX_S_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPSU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env);
+            break;
+        case NM_DPSQX_S_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env);
+            break;
+        case NM_MULSA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_24_31:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPAQX_SA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPSU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env);
+            break;
+        case NM_DPSQX_SA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env);
+            break;
+        case NM_MULSAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_mulsaq_s_w_ph(t0, v1, v0, cpu_env);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free_i32(t0);
+}
+
+static void gen_pool32axf_2_nanomips_insn(DisasContext *ctx, uint32_t opc,
+                                          int rt, int rs, int rd)
+{
+    int ret = rt;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv v0_t = tcg_temp_new();
+    TCGv v1_t = tcg_temp_new();
+
+    gen_load_gpr(v0_t, rt);
+    gen_load_gpr(v1_t, rs);
+
+    switch (opc) {
+    case NM_POOL32AXF_2_0_7:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPA_W_PH:
+        case NM_DPAQ_S_W_PH:
+        case NM_DPS_W_PH:
+        case NM_DPSQ_S_W_PH:
+            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
+            break;
+        case NM_BALIGN:
+            check_dsp_r2(ctx);
+            if (rt != 0) {
+                gen_load_gpr(t0, rs);
+                rd &= 3;
+                if (rd != 0 && rd != 2) {
+                    tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 8 * rd);
+                    tcg_gen_ext32u_tl(t0, t0);
+                    tcg_gen_shri_tl(t0, t0, 8 * (4 - rd));
+                    tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+                }
+                tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            }
+            break;
+        case NM_MADD:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                TCGv_i64 t3 = tcg_temp_new_i64();
+
+                gen_load_gpr(t0, rt);
+                gen_load_gpr(t1, rs);
+                tcg_gen_ext_tl_i64(t2, t0);
+                tcg_gen_ext_tl_i64(t3, t1);
+                tcg_gen_mul_i64(t2, t2, t3);
+                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+                tcg_gen_add_i64(t2, t2, t3);
+                tcg_temp_free_i64(t3);
+                gen_move_low32(cpu_LO[acc], t2);
+                gen_move_high32(cpu_HI[acc], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+        case NM_MULT:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i32 t2 = tcg_temp_new_i32();
+                TCGv_i32 t3 = tcg_temp_new_i32();
+
+                if (acc || ctx->insn_flags & ISA_MIPS_R6) {
+                    check_dsp_r2(ctx);
+                }
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_trunc_tl_i32(t2, t0);
+                tcg_gen_trunc_tl_i32(t3, t1);
+                tcg_gen_muls2_i32(t2, t3, t2, t3);
+                tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+                tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+                tcg_temp_free_i32(t2);
+                tcg_temp_free_i32(t3);
+            }
+            break;
+        case NM_EXTRV_W:
+            check_dsp(ctx);
+            gen_load_gpr(v1_t, rs);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extr_w(t0, t0, v1_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_8_15:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAX_W_PH:
+        case NM_DPAQ_SA_L_W:
+        case NM_DPSX_W_PH:
+        case NM_DPSQ_SA_L_W:
+            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
+            break;
+        case NM_MADDU:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                TCGv_i64 t3 = tcg_temp_new_i64();
+
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_ext32u_tl(t0, t0);
+                tcg_gen_ext32u_tl(t1, t1);
+                tcg_gen_extu_tl_i64(t2, t0);
+                tcg_gen_extu_tl_i64(t3, t1);
+                tcg_gen_mul_i64(t2, t2, t3);
+                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+                tcg_gen_add_i64(t2, t2, t3);
+                tcg_temp_free_i64(t3);
+                gen_move_low32(cpu_LO[acc], t2);
+                gen_move_high32(cpu_HI[acc], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+        case NM_MULTU:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i32 t2 = tcg_temp_new_i32();
+                TCGv_i32 t3 = tcg_temp_new_i32();
+
+                if (acc || ctx->insn_flags & ISA_MIPS_R6) {
+                    check_dsp_r2(ctx);
+                }
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_trunc_tl_i32(t2, t0);
+                tcg_gen_trunc_tl_i32(t3, t1);
+                tcg_gen_mulu2_i32(t2, t3, t2, t3);
+                tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+                tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+                tcg_temp_free_i32(t2);
+                tcg_temp_free_i32(t3);
+            }
+            break;
+        case NM_EXTRV_R_W:
+            check_dsp(ctx);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extr_r_w(t0, t0, v1_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_16_23:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAU_H_QBL:
+        case NM_DPAQX_S_W_PH:
+        case NM_DPSU_H_QBL:
+        case NM_DPSQX_S_W_PH:
+        case NM_MULSA_W_PH:
+            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
+            break;
+        case NM_EXTPV:
+            check_dsp(ctx);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extp(t0, t0, v1_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_MSUB:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                TCGv_i64 t3 = tcg_temp_new_i64();
+
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_ext_tl_i64(t2, t0);
+                tcg_gen_ext_tl_i64(t3, t1);
+                tcg_gen_mul_i64(t2, t2, t3);
+                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+                tcg_gen_sub_i64(t2, t3, t2);
+                tcg_temp_free_i64(t3);
+                gen_move_low32(cpu_LO[acc], t2);
+                gen_move_high32(cpu_HI[acc], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+        case NM_EXTRV_RS_W:
+            check_dsp(ctx);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extr_rs_w(t0, t0, v1_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_2_24_31:
+        switch (extract32(ctx->opcode, 9, 3)) {
+        case NM_DPAU_H_QBR:
+        case NM_DPAQX_SA_W_PH:
+        case NM_DPSU_H_QBR:
+        case NM_DPSQX_SA_W_PH:
+        case NM_MULSAQ_S_W_PH:
+            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
+            break;
+        case NM_EXTPDPV:
+            check_dsp(ctx);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extpdp(t0, t0, v1_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        case NM_MSUBU:
+            check_dsp(ctx);
+            {
+                int acc = extract32(ctx->opcode, 14, 2);
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                TCGv_i64 t3 = tcg_temp_new_i64();
+
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_ext32u_tl(t0, t0);
+                tcg_gen_ext32u_tl(t1, t1);
+                tcg_gen_extu_tl_i64(t2, t0);
+                tcg_gen_extu_tl_i64(t3, t1);
+                tcg_gen_mul_i64(t2, t2, t3);
+                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+                tcg_gen_sub_i64(t2, t3, t2);
+                tcg_temp_free_i64(t3);
+                gen_move_low32(cpu_LO[acc], t2);
+                gen_move_high32(cpu_HI[acc], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+        case NM_EXTRV_S_H:
+            check_dsp(ctx);
+            tcg_gen_movi_tl(t0, rd >> 3);
+            gen_helper_extr_s_h(t0, t0, v0_t, cpu_env);
+            gen_store_gpr(t0, ret);
+            break;
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+
+    tcg_temp_free(v0_t);
+    tcg_temp_free(v1_t);
+}
+
+static void gen_pool32axf_4_nanomips_insn(DisasContext *ctx, uint32_t opc,
+                                          int rt, int rs)
+{
+    int ret = rt;
+    TCGv t0 = tcg_temp_new();
+    TCGv v0_t = tcg_temp_new();
+
+    gen_load_gpr(v0_t, rs);
+
+    switch (opc) {
+    case NM_ABSQ_S_QB:
+        check_dsp_r2(ctx);
+        gen_helper_absq_s_qb(v0_t, v0_t, cpu_env);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_ABSQ_S_PH:
+        check_dsp(ctx);
+        gen_helper_absq_s_ph(v0_t, v0_t, cpu_env);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_ABSQ_S_W:
+        check_dsp(ctx);
+        gen_helper_absq_s_w(v0_t, v0_t, cpu_env);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQ_W_PHL:
+        check_dsp(ctx);
+        tcg_gen_andi_tl(v0_t, v0_t, 0xFFFF0000);
+        tcg_gen_ext32s_tl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQ_W_PHR:
+        check_dsp(ctx);
+        tcg_gen_andi_tl(v0_t, v0_t, 0x0000FFFF);
+        tcg_gen_shli_tl(v0_t, v0_t, 16);
+        tcg_gen_ext32s_tl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQU_PH_QBL:
+        check_dsp(ctx);
+        gen_helper_precequ_ph_qbl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQU_PH_QBR:
+        check_dsp(ctx);
+        gen_helper_precequ_ph_qbr(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQU_PH_QBLA:
+        check_dsp(ctx);
+        gen_helper_precequ_ph_qbla(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEQU_PH_QBRA:
+        check_dsp(ctx);
+        gen_helper_precequ_ph_qbra(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEU_PH_QBL:
+        check_dsp(ctx);
+        gen_helper_preceu_ph_qbl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEU_PH_QBR:
+        check_dsp(ctx);
+        gen_helper_preceu_ph_qbr(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEU_PH_QBLA:
+        check_dsp(ctx);
+        gen_helper_preceu_ph_qbla(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_PRECEU_PH_QBRA:
+        check_dsp(ctx);
+        gen_helper_preceu_ph_qbra(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_REPLV_PH:
+        check_dsp(ctx);
+        tcg_gen_ext16u_tl(v0_t, v0_t);
+        tcg_gen_shli_tl(t0, v0_t, 16);
+        tcg_gen_or_tl(v0_t, v0_t, t0);
+        tcg_gen_ext32s_tl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_REPLV_QB:
+        check_dsp(ctx);
+        tcg_gen_ext8u_tl(v0_t, v0_t);
+        tcg_gen_shli_tl(t0, v0_t, 8);
+        tcg_gen_or_tl(v0_t, v0_t, t0);
+        tcg_gen_shli_tl(t0, v0_t, 16);
+        tcg_gen_or_tl(v0_t, v0_t, t0);
+        tcg_gen_ext32s_tl(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_BITREV:
+        check_dsp(ctx);
+        gen_helper_bitrev(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_INSV:
+        check_dsp(ctx);
+        {
+            TCGv tv0 = tcg_temp_new();
+
+            gen_load_gpr(tv0, rt);
+            gen_helper_insv(v0_t, cpu_env, v0_t, tv0);
+            gen_store_gpr(v0_t, ret);
+            tcg_temp_free(tv0);
+        }
+        break;
+    case NM_RADDU_W_QB:
+        check_dsp(ctx);
+        gen_helper_raddu_w_qb(v0_t, v0_t);
+        gen_store_gpr(v0_t, ret);
+        break;
+    case NM_BITSWAP:
+        gen_bitswap(ctx, OPC_BITSWAP, ret, rs);
+        break;
+    case NM_CLO:
+        check_nms(ctx);
+        gen_cl(ctx, OPC_CLO, ret, rs);
+        break;
+    case NM_CLZ:
+        check_nms(ctx);
+        gen_cl(ctx, OPC_CLZ, ret, rs);
+        break;
+    case NM_WSBH:
+        gen_bshfl(ctx, OPC_WSBH, ret, rs);
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free(v0_t);
+    tcg_temp_free(t0);
+}
+
+static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc,
+                                          int rt, int rs, int rd)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv rs_t = tcg_temp_new();
+
+    gen_load_gpr(rs_t, rs);
+
+    switch (opc) {
+    case NM_SHRA_R_QB:
+        check_dsp_r2(ctx);
+        tcg_gen_movi_tl(t0, rd >> 2);
+        switch (extract32(ctx->opcode, 12, 1)) {
+        case 0:
+            /* NM_SHRA_QB */
+            gen_helper_shra_qb(t0, t0, rs_t);
+            gen_store_gpr(t0, rt);
+            break;
+        case 1:
+            /* NM_SHRA_R_QB */
+            gen_helper_shra_r_qb(t0, t0, rs_t);
+            gen_store_gpr(t0, rt);
+            break;
+        }
+        break;
+    case NM_SHRL_PH:
+        check_dsp_r2(ctx);
+        tcg_gen_movi_tl(t0, rd >> 1);
+        gen_helper_shrl_ph(t0, t0, rs_t);
+        gen_store_gpr(t0, rt);
+        break;
+    case NM_REPL_QB:
+        check_dsp(ctx);
+        {
+            int16_t imm;
+            target_long result;
+            imm = extract32(ctx->opcode, 13, 8);
+            result = (uint32_t)imm << 24 |
+                     (uint32_t)imm << 16 |
+                     (uint32_t)imm << 8  |
+                     (uint32_t)imm;
+            result = (int32_t)result;
+            tcg_gen_movi_tl(t0, result);
+            gen_store_gpr(t0, rt);
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(rs_t);
+}
+
+
+static void gen_pool32axf_nanomips_insn(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rt = extract32(ctx->opcode, 21, 5);
+    int rs = extract32(ctx->opcode, 16, 5);
+    int rd = extract32(ctx->opcode, 11, 5);
+
+    switch (extract32(ctx->opcode, 6, 3)) {
+    case NM_POOL32AXF_1:
+        {
+            int32_t op1 = extract32(ctx->opcode, 9, 3);
+            gen_pool32axf_1_nanomips_insn(ctx, op1, rt, rs, rd);
+        }
+        break;
+    case NM_POOL32AXF_2:
+        {
+            int32_t op1 = extract32(ctx->opcode, 12, 2);
+            gen_pool32axf_2_nanomips_insn(ctx, op1, rt, rs, rd);
+        }
+        break;
+    case NM_POOL32AXF_4:
+        {
+            int32_t op1 = extract32(ctx->opcode, 9, 7);
+            gen_pool32axf_4_nanomips_insn(ctx, op1, rt, rs);
+        }
+        break;
+    case NM_POOL32AXF_5:
+        switch (extract32(ctx->opcode, 9, 7)) {
+#ifndef CONFIG_USER_ONLY
+        case NM_TLBP:
+            gen_cp0(env, ctx, OPC_TLBP, 0, 0);
+            break;
+        case NM_TLBR:
+            gen_cp0(env, ctx, OPC_TLBR, 0, 0);
+            break;
+        case NM_TLBWI:
+            gen_cp0(env, ctx, OPC_TLBWI, 0, 0);
+            break;
+        case NM_TLBWR:
+            gen_cp0(env, ctx, OPC_TLBWR, 0, 0);
+            break;
+        case NM_TLBINV:
+            gen_cp0(env, ctx, OPC_TLBINV, 0, 0);
+            break;
+        case NM_TLBINVF:
+            gen_cp0(env, ctx, OPC_TLBINVF, 0, 0);
+            break;
+        case NM_DI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_di(t0, cpu_env);
+                gen_store_gpr(t0, rt);
+            /* Stop translation as we may have switched the execution mode */
+                ctx->base.is_jmp = DISAS_STOP;
+                tcg_temp_free(t0);
+            }
+            break;
+        case NM_EI:
+            check_cp0_enabled(ctx);
+            {
+                TCGv t0 = tcg_temp_new();
+
+                save_cpu_state(ctx, 1);
+                gen_helper_ei(t0, cpu_env);
+                gen_store_gpr(t0, rt);
+            /* Stop translation as we may have switched the execution mode */
+                ctx->base.is_jmp = DISAS_STOP;
+                tcg_temp_free(t0);
+            }
+            break;
+        case NM_RDPGPR:
+            check_cp0_enabled(ctx);
+            gen_load_srsgpr(ctx, rs, rt);
+            break;
+        case NM_WRPGPR:
+            check_cp0_enabled(ctx);
+            gen_store_srsgpr(rs, rt);
+            break;
+        case NM_WAIT:
+            gen_cp0(env, ctx, OPC_WAIT, 0, 0);
+            break;
+        case NM_DERET:
+            gen_cp0(env, ctx, OPC_DERET, 0, 0);
+            break;
+        case NM_ERETX:
+            gen_cp0(env, ctx, OPC_ERET, 0, 0);
+            break;
+#endif
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32AXF_7:
+        {
+            int32_t op1 = extract32(ctx->opcode, 9, 3);
+            gen_pool32axf_7_nanomips_insn(ctx, op1, rt, rs, rd);
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+/* Immediate Value Compact Branches */
+static void gen_compute_imm_branch(DisasContext *ctx, uint32_t opc,
+                                   int rt, int32_t imm, int32_t offset)
+{
+    TCGCond cond = TCG_COND_ALWAYS;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rt);
+    tcg_gen_movi_tl(t1, imm);
+    ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+
+    /* Load needed operands and calculate btarget */
+    switch (opc) {
+    case NM_BEQIC:
+        if (rt == 0 && imm == 0) {
+            /* Unconditional branch */
+        } else if (rt == 0 && imm != 0) {
+            /* Treat as NOP */
+            goto out;
+        } else {
+            cond = TCG_COND_EQ;
+        }
+        break;
+    case NM_BBEQZC:
+    case NM_BBNEZC:
+        check_nms(ctx);
+        if (imm >= 32 && !(ctx->hflags & MIPS_HFLAG_64)) {
+            gen_reserved_instruction(ctx);
+            goto out;
+        } else if (rt == 0 && opc == NM_BBEQZC) {
+            /* Unconditional branch */
+        } else if (rt == 0 && opc == NM_BBNEZC) {
+            /* Treat as NOP */
+            goto out;
+        } else {
+            tcg_gen_shri_tl(t0, t0, imm);
+            tcg_gen_andi_tl(t0, t0, 1);
+            tcg_gen_movi_tl(t1, 0);
+            if (opc == NM_BBEQZC) {
+                cond = TCG_COND_EQ;
+            } else {
+                cond = TCG_COND_NE;
+            }
+        }
+        break;
+    case NM_BNEIC:
+        if (rt == 0 && imm == 0) {
+            /* Treat as NOP */
+            goto out;
+        } else if (rt == 0 && imm != 0) {
+            /* Unconditional branch */
+        } else {
+            cond = TCG_COND_NE;
+        }
+        break;
+    case NM_BGEIC:
+        if (rt == 0 && imm == 0) {
+            /* Unconditional branch */
+        } else  {
+            cond = TCG_COND_GE;
+        }
+        break;
+    case NM_BLTIC:
+        cond = TCG_COND_LT;
+        break;
+    case NM_BGEIUC:
+        if (rt == 0 && imm == 0) {
+            /* Unconditional branch */
+        } else  {
+            cond = TCG_COND_GEU;
+        }
+        break;
+    case NM_BLTIUC:
+        cond = TCG_COND_LTU;
+        break;
+    default:
+        MIPS_INVAL("Immediate Value Compact branch");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    /* branch completion */
+    clear_branch_hflags(ctx);
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    if (cond == TCG_COND_ALWAYS) {
+        /* Uncoditional compact branch */
+        gen_goto_tb(ctx, 0, ctx->btarget);
+    } else {
+        /* Conditional compact branch */
+        TCGLabel *fs = gen_new_label();
+
+        tcg_gen_brcond_tl(tcg_invert_cond(cond), t0, t1, fs);
+
+        gen_goto_tb(ctx, 1, ctx->btarget);
+        gen_set_label(fs);
+
+        gen_goto_tb(ctx, 0, ctx->base.pc_next + 4);
+    }
+
+out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* P.BALRSC type nanoMIPS R6 branches: BALRSC and BRSC */
+static void gen_compute_nanomips_pbalrsc_branch(DisasContext *ctx, int rs,
+                                                int rt)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    /* load rs */
+    gen_load_gpr(t0, rs);
+
+    /* link */
+    if (rt != 0) {
+        tcg_gen_movi_tl(cpu_gpr[rt], ctx->base.pc_next + 4);
+    }
+
+    /* calculate btarget */
+    tcg_gen_shli_tl(t0, t0, 1);
+    tcg_gen_movi_tl(t1, ctx->base.pc_next + 4);
+    gen_op_addr_add(ctx, btarget, t1, t0);
+
+    /* branch completion */
+    clear_branch_hflags(ctx);
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    /* unconditional branch to register */
+    tcg_gen_mov_tl(cpu_PC, btarget);
+    tcg_gen_lookup_and_goto_ptr();
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* nanoMIPS Branches */
+static void gen_compute_compact_branch_nm(DisasContext *ctx, uint32_t opc,
+                                       int rs, int rt, int32_t offset)
+{
+    int bcond_compute = 0;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    /* Load needed operands and calculate btarget */
+    switch (opc) {
+    /* compact branch */
+    case OPC_BGEC:
+    case OPC_BLTC:
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BGEUC:
+    case OPC_BLTUC:
+        if (rs == 0 || rs == rt) {
+            /* OPC_BLEZALC, OPC_BGEZALC */
+            /* OPC_BGTZALC, OPC_BLTZALC */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4);
+        }
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BC:
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BEQZC:
+        if (rs != 0) {
+            /* OPC_BEQZC, OPC_BNEZC */
+            gen_load_gpr(t0, rs);
+            bcond_compute = 1;
+            ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        } else {
+            /* OPC_JIC, OPC_JIALC */
+            TCGv tbase = tcg_temp_new();
+            TCGv toffset = tcg_temp_new();
+
+            gen_load_gpr(tbase, rt);
+            tcg_gen_movi_tl(toffset, offset);
+            gen_op_addr_add(ctx, btarget, tbase, toffset);
+            tcg_temp_free(tbase);
+            tcg_temp_free(toffset);
+        }
+        break;
+    default:
+        MIPS_INVAL("Compact branch/jump");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    if (bcond_compute == 0) {
+        /* Uncoditional compact branch */
+        switch (opc) {
+        case OPC_BC:
+            gen_goto_tb(ctx, 0, ctx->btarget);
+            break;
+        default:
+            MIPS_INVAL("Compact branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+    } else {
+        /* Conditional compact branch */
+        TCGLabel *fs = gen_new_label();
+
+        switch (opc) {
+        case OPC_BGEUC:
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
+            }
+            break;
+        case OPC_BLTUC:
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
+            }
+            break;
+        case OPC_BGEC:
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
+            }
+            break;
+        case OPC_BLTC:
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
+            }
+            break;
+        case OPC_BEQZC:
+            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
+            break;
+        default:
+            MIPS_INVAL("Compact conditional branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+
+        /* branch completion */
+        clear_branch_hflags(ctx);
+        ctx->base.is_jmp = DISAS_NORETURN;
+
+        /* Generating branch here as compact branches don't have delay slot */
+        gen_goto_tb(ctx, 1, ctx->btarget);
+        gen_set_label(fs);
+
+        gen_goto_tb(ctx, 0, ctx->base.pc_next + 4);
+    }
+
+out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+
+/* nanoMIPS CP1 Branches */
+static void gen_compute_branch_cp1_nm(DisasContext *ctx, uint32_t op,
+                                   int32_t ft, int32_t offset)
+{
+    target_ulong btarget;
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    gen_load_fpr64(ctx, t0, ft);
+    tcg_gen_andi_i64(t0, t0, 1);
+
+    btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+
+    switch (op) {
+    case NM_BC1EQZC:
+        tcg_gen_xori_i64(t0, t0, 1);
+        ctx->hflags |= MIPS_HFLAG_BC;
+        break;
+    case NM_BC1NEZC:
+        /* t0 already set */
+        ctx->hflags |= MIPS_HFLAG_BC;
+        break;
+    default:
+        MIPS_INVAL("cp1 cond branch");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    tcg_gen_trunc_i64_tl(bcond, t0);
+
+    ctx->btarget = btarget;
+
+out:
+    tcg_temp_free_i64(t0);
+}
+
+
+static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    if ((extract32(ctx->opcode, 6, 1)) == 1) {
+        /* PP.LSXS instructions require shifting */
+        switch (extract32(ctx->opcode, 7, 4)) {
+        case NM_SHXS:
+            check_nms(ctx);
+            /* fall through */
+        case NM_LHXS:
+        case NM_LHUXS:
+            tcg_gen_shli_tl(t0, t0, 1);
+            break;
+        case NM_SWXS:
+            check_nms(ctx);
+            /* fall through */
+        case NM_LWXS:
+        case NM_LWC1XS:
+        case NM_SWC1XS:
+            tcg_gen_shli_tl(t0, t0, 2);
+            break;
+        case NM_LDC1XS:
+        case NM_SDC1XS:
+            tcg_gen_shli_tl(t0, t0, 3);
+            break;
+        }
+    }
+    gen_op_addr_add(ctx, t0, t0, t1);
+
+    switch (extract32(ctx->opcode, 7, 4)) {
+    case NM_LBX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
+                           MO_SB);
+        gen_store_gpr(t0, rd);
+        break;
+    case NM_LHX:
+    /*case NM_LHXS:*/
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
+                           MO_TESW);
+        gen_store_gpr(t0, rd);
+        break;
+    case NM_LWX:
+    /*case NM_LWXS:*/
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
+                           MO_TESL);
+        gen_store_gpr(t0, rd);
+        break;
+    case NM_LBUX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
+                           MO_UB);
+        gen_store_gpr(t0, rd);
+        break;
+    case NM_LHUX:
+    /*case NM_LHUXS:*/
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
+                           MO_TEUW);
+        gen_store_gpr(t0, rd);
+        break;
+    case NM_SBX:
+        check_nms(ctx);
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+                           MO_8);
+        break;
+    case NM_SHX:
+    /*case NM_SHXS:*/
+        check_nms(ctx);
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+                           MO_TEUW);
+        break;
+    case NM_SWX:
+    /*case NM_SWXS:*/
+        check_nms(ctx);
+        gen_load_gpr(t1, rd);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
+                           MO_TEUL);
+        break;
+    case NM_LWC1X:
+    /*case NM_LWC1XS:*/
+    case NM_LDC1X:
+    /*case NM_LDC1XS:*/
+    case NM_SWC1X:
+    /*case NM_SWC1XS:*/
+    case NM_SDC1X:
+    /*case NM_SDC1XS:*/
+        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
+            check_cp1_enabled(ctx);
+            switch (extract32(ctx->opcode, 7, 4)) {
+            case NM_LWC1X:
+            /*case NM_LWC1XS:*/
+                gen_flt_ldst(ctx, OPC_LWC1, rd, t0);
+                break;
+            case NM_LDC1X:
+            /*case NM_LDC1XS:*/
+                gen_flt_ldst(ctx, OPC_LDC1, rd, t0);
+                break;
+            case NM_SWC1X:
+            /*case NM_SWC1XS:*/
+                gen_flt_ldst(ctx, OPC_SWC1, rd, t0);
+                break;
+            case NM_SDC1X:
+            /*case NM_SDC1XS:*/
+                gen_flt_ldst(ctx, OPC_SDC1, rd, t0);
+                break;
+            }
+        } else {
+            generate_exception_err(ctx, EXCP_CpU, 1);
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_pool32f_nanomips_insn(DisasContext *ctx)
+{
+    int rt, rs, rd;
+
+    rt = extract32(ctx->opcode, 21, 5);
+    rs = extract32(ctx->opcode, 16, 5);
+    rd = extract32(ctx->opcode, 11, 5);
+
+    if (!(ctx->CP0_Config1 & (1 << CP0C1_FP))) {
+        gen_reserved_instruction(ctx);
+        return;
+    }
+    check_cp1_enabled(ctx);
+    switch (extract32(ctx->opcode, 0, 3)) {
+    case NM_POOL32F_0:
+        switch (extract32(ctx->opcode, 3, 7)) {
+        case NM_RINT_S:
+            gen_farith(ctx, OPC_RINT_S, 0, rt, rs, 0);
+            break;
+        case NM_RINT_D:
+            gen_farith(ctx, OPC_RINT_D, 0, rt, rs, 0);
+            break;
+        case NM_CLASS_S:
+            gen_farith(ctx, OPC_CLASS_S, 0, rt, rs, 0);
+            break;
+        case NM_CLASS_D:
+            gen_farith(ctx, OPC_CLASS_D, 0, rt, rs, 0);
+            break;
+        case NM_ADD_S:
+            gen_farith(ctx, OPC_ADD_S, rt, rs, rd, 0);
+            break;
+        case NM_ADD_D:
+            gen_farith(ctx, OPC_ADD_D, rt, rs, rd, 0);
+            break;
+        case NM_SUB_S:
+            gen_farith(ctx, OPC_SUB_S, rt, rs, rd, 0);
+            break;
+        case NM_SUB_D:
+            gen_farith(ctx, OPC_SUB_D, rt, rs, rd, 0);
+            break;
+        case NM_MUL_S:
+            gen_farith(ctx, OPC_MUL_S, rt, rs, rd, 0);
+            break;
+        case NM_MUL_D:
+            gen_farith(ctx, OPC_MUL_D, rt, rs, rd, 0);
+            break;
+        case NM_DIV_S:
+            gen_farith(ctx, OPC_DIV_S, rt, rs, rd, 0);
+            break;
+        case NM_DIV_D:
+            gen_farith(ctx, OPC_DIV_D, rt, rs, rd, 0);
+            break;
+        case NM_SELEQZ_S:
+            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
+            break;
+        case NM_SELEQZ_D:
+            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
+            break;
+        case NM_SELNEZ_S:
+            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
+            break;
+        case NM_SELNEZ_D:
+            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
+            break;
+        case NM_SEL_S:
+            gen_sel_s(ctx, OPC_SEL_S, rd, rt, rs);
+            break;
+        case NM_SEL_D:
+            gen_sel_d(ctx, OPC_SEL_D, rd, rt, rs);
+            break;
+        case NM_MADDF_S:
+            gen_farith(ctx, OPC_MADDF_S, rt, rs, rd, 0);
+            break;
+        case NM_MADDF_D:
+            gen_farith(ctx, OPC_MADDF_D, rt, rs, rd, 0);
+            break;
+        case NM_MSUBF_S:
+            gen_farith(ctx, OPC_MSUBF_S, rt, rs, rd, 0);
+            break;
+        case NM_MSUBF_D:
+            gen_farith(ctx, OPC_MSUBF_D, rt, rs, rd, 0);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32F_3:
+        switch (extract32(ctx->opcode, 3, 3)) {
+        case NM_MIN_FMT:
+            switch (extract32(ctx->opcode, 9, 1)) {
+            case FMT_SDPS_S:
+                gen_farith(ctx, OPC_MIN_S, rt, rs, rd, 0);
+                break;
+            case FMT_SDPS_D:
+                gen_farith(ctx, OPC_MIN_D, rt, rs, rd, 0);
+                break;
+            }
+            break;
+        case NM_MAX_FMT:
+            switch (extract32(ctx->opcode, 9, 1)) {
+            case FMT_SDPS_S:
+                gen_farith(ctx, OPC_MAX_S, rt, rs, rd, 0);
+                break;
+            case FMT_SDPS_D:
+                gen_farith(ctx, OPC_MAX_D, rt, rs, rd, 0);
+                break;
+            }
+            break;
+        case NM_MINA_FMT:
+            switch (extract32(ctx->opcode, 9, 1)) {
+            case FMT_SDPS_S:
+                gen_farith(ctx, OPC_MINA_S, rt, rs, rd, 0);
+                break;
+            case FMT_SDPS_D:
+                gen_farith(ctx, OPC_MINA_D, rt, rs, rd, 0);
+                break;
+            }
+            break;
+        case NM_MAXA_FMT:
+            switch (extract32(ctx->opcode, 9, 1)) {
+            case FMT_SDPS_S:
+                gen_farith(ctx, OPC_MAXA_S, rt, rs, rd, 0);
+                break;
+            case FMT_SDPS_D:
+                gen_farith(ctx, OPC_MAXA_D, rt, rs, rd, 0);
+                break;
+            }
+            break;
+        case NM_POOL32FXF:
+            switch (extract32(ctx->opcode, 6, 8)) {
+            case NM_CFC1:
+                gen_cp1(ctx, OPC_CFC1, rt, rs);
+                break;
+            case NM_CTC1:
+                gen_cp1(ctx, OPC_CTC1, rt, rs);
+                break;
+            case NM_MFC1:
+                gen_cp1(ctx, OPC_MFC1, rt, rs);
+                break;
+            case NM_MTC1:
+                gen_cp1(ctx, OPC_MTC1, rt, rs);
+                break;
+            case NM_MFHC1:
+                gen_cp1(ctx, OPC_MFHC1, rt, rs);
+                break;
+            case NM_MTHC1:
+                gen_cp1(ctx, OPC_MTHC1, rt, rs);
+                break;
+            case NM_CVT_S_PL:
+                gen_farith(ctx, OPC_CVT_S_PL, -1, rs, rt, 0);
+                break;
+            case NM_CVT_S_PU:
+                gen_farith(ctx, OPC_CVT_S_PU, -1, rs, rt, 0);
+                break;
+            default:
+                switch (extract32(ctx->opcode, 6, 9)) {
+                case NM_CVT_L_S:
+                    gen_farith(ctx, OPC_CVT_L_S, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_L_D:
+                    gen_farith(ctx, OPC_CVT_L_D, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_W_S:
+                    gen_farith(ctx, OPC_CVT_W_S, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_W_D:
+                    gen_farith(ctx, OPC_CVT_W_D, -1, rs, rt, 0);
+                    break;
+                case NM_RSQRT_S:
+                    gen_farith(ctx, OPC_RSQRT_S, -1, rs, rt, 0);
+                    break;
+                case NM_RSQRT_D:
+                    gen_farith(ctx, OPC_RSQRT_D, -1, rs, rt, 0);
+                    break;
+                case NM_SQRT_S:
+                    gen_farith(ctx, OPC_SQRT_S, -1, rs, rt, 0);
+                    break;
+                case NM_SQRT_D:
+                    gen_farith(ctx, OPC_SQRT_D, -1, rs, rt, 0);
+                    break;
+                case NM_RECIP_S:
+                    gen_farith(ctx, OPC_RECIP_S, -1, rs, rt, 0);
+                    break;
+                case NM_RECIP_D:
+                    gen_farith(ctx, OPC_RECIP_D, -1, rs, rt, 0);
+                    break;
+                case NM_FLOOR_L_S:
+                    gen_farith(ctx, OPC_FLOOR_L_S, -1, rs, rt, 0);
+                    break;
+                case NM_FLOOR_L_D:
+                    gen_farith(ctx, OPC_FLOOR_L_D, -1, rs, rt, 0);
+                    break;
+                case NM_FLOOR_W_S:
+                    gen_farith(ctx, OPC_FLOOR_W_S, -1, rs, rt, 0);
+                    break;
+                case NM_FLOOR_W_D:
+                    gen_farith(ctx, OPC_FLOOR_W_D, -1, rs, rt, 0);
+                    break;
+                case NM_CEIL_L_S:
+                    gen_farith(ctx, OPC_CEIL_L_S, -1, rs, rt, 0);
+                    break;
+                case NM_CEIL_L_D:
+                    gen_farith(ctx, OPC_CEIL_L_D, -1, rs, rt, 0);
+                    break;
+                case NM_CEIL_W_S:
+                    gen_farith(ctx, OPC_CEIL_W_S, -1, rs, rt, 0);
+                    break;
+                case NM_CEIL_W_D:
+                    gen_farith(ctx, OPC_CEIL_W_D, -1, rs, rt, 0);
+                    break;
+                case NM_TRUNC_L_S:
+                    gen_farith(ctx, OPC_TRUNC_L_S, -1, rs, rt, 0);
+                    break;
+                case NM_TRUNC_L_D:
+                    gen_farith(ctx, OPC_TRUNC_L_D, -1, rs, rt, 0);
+                    break;
+                case NM_TRUNC_W_S:
+                    gen_farith(ctx, OPC_TRUNC_W_S, -1, rs, rt, 0);
+                    break;
+                case NM_TRUNC_W_D:
+                    gen_farith(ctx, OPC_TRUNC_W_D, -1, rs, rt, 0);
+                    break;
+                case NM_ROUND_L_S:
+                    gen_farith(ctx, OPC_ROUND_L_S, -1, rs, rt, 0);
+                    break;
+                case NM_ROUND_L_D:
+                    gen_farith(ctx, OPC_ROUND_L_D, -1, rs, rt, 0);
+                    break;
+                case NM_ROUND_W_S:
+                    gen_farith(ctx, OPC_ROUND_W_S, -1, rs, rt, 0);
+                    break;
+                case NM_ROUND_W_D:
+                    gen_farith(ctx, OPC_ROUND_W_D, -1, rs, rt, 0);
+                    break;
+                case NM_MOV_S:
+                    gen_farith(ctx, OPC_MOV_S, -1, rs, rt, 0);
+                    break;
+                case NM_MOV_D:
+                    gen_farith(ctx, OPC_MOV_D, -1, rs, rt, 0);
+                    break;
+                case NM_ABS_S:
+                    gen_farith(ctx, OPC_ABS_S, -1, rs, rt, 0);
+                    break;
+                case NM_ABS_D:
+                    gen_farith(ctx, OPC_ABS_D, -1, rs, rt, 0);
+                    break;
+                case NM_NEG_S:
+                    gen_farith(ctx, OPC_NEG_S, -1, rs, rt, 0);
+                    break;
+                case NM_NEG_D:
+                    gen_farith(ctx, OPC_NEG_D, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_D_S:
+                    gen_farith(ctx, OPC_CVT_D_S, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_D_W:
+                    gen_farith(ctx, OPC_CVT_D_W, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_D_L:
+                    gen_farith(ctx, OPC_CVT_D_L, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_S_D:
+                    gen_farith(ctx, OPC_CVT_S_D, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_S_W:
+                    gen_farith(ctx, OPC_CVT_S_W, -1, rs, rt, 0);
+                    break;
+                case NM_CVT_S_L:
+                    gen_farith(ctx, OPC_CVT_S_L, -1, rs, rt, 0);
+                    break;
+                default:
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                break;
+            }
+            break;
+        }
+        break;
+    case NM_POOL32F_5:
+        switch (extract32(ctx->opcode, 3, 3)) {
+        case NM_CMP_CONDN_S:
+            gen_r6_cmp_s(ctx, extract32(ctx->opcode, 6, 5), rt, rs, rd);
+            break;
+        case NM_CMP_CONDN_D:
+            gen_r6_cmp_d(ctx, extract32(ctx->opcode, 6, 5), rt, rs, rd);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc,
+                                       int rd, int rs, int rt)
+{
+    int ret = rd;
+    TCGv t0 = tcg_temp_new();
+    TCGv v1_t = tcg_temp_new();
+    TCGv v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, rs);
+    gen_load_gpr(v2_t, rt);
+
+    switch (opc) {
+    case NM_CMP_EQ_PH:
+        check_dsp(ctx);
+        gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMP_LT_PH:
+        check_dsp(ctx);
+        gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMP_LE_PH:
+        check_dsp(ctx);
+        gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMPU_EQ_QB:
+        check_dsp(ctx);
+        gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMPU_LT_QB:
+        check_dsp(ctx);
+        gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMPU_LE_QB:
+        check_dsp(ctx);
+        gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
+        break;
+    case NM_CMPGU_EQ_QB:
+        check_dsp(ctx);
+        gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_CMPGU_LT_QB:
+        check_dsp(ctx);
+        gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_CMPGU_LE_QB:
+        check_dsp(ctx);
+        gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_CMPGDU_EQ_QB:
+        check_dsp_r2(ctx);
+        gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t);
+        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_CMPGDU_LT_QB:
+        check_dsp_r2(ctx);
+        gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t);
+        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_CMPGDU_LE_QB:
+        check_dsp_r2(ctx);
+        gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t);
+        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PACKRL_PH:
+        check_dsp(ctx);
+        gen_helper_packrl_ph(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PICK_QB:
+        check_dsp(ctx);
+        gen_helper_pick_qb(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PICK_PH:
+        check_dsp(ctx);
+        gen_helper_pick_ph(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_ADDQ_S_W:
+        check_dsp(ctx);
+        gen_helper_addq_s_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SUBQ_S_W:
+        check_dsp(ctx);
+        gen_helper_subq_s_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_ADDSC:
+        check_dsp(ctx);
+        gen_helper_addsc(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_ADDWC:
+        check_dsp(ctx);
+        gen_helper_addwc(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_ADDQ_S_PH:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDQ_PH */
+            gen_helper_addq_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDQ_S_PH */
+            gen_helper_addq_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_ADDQH_R_PH:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDQH_PH */
+            gen_helper_addqh_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDQH_R_PH */
+            gen_helper_addqh_r_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_ADDQH_R_W:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDQH_W */
+            gen_helper_addqh_w(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDQH_R_W */
+            gen_helper_addqh_r_w(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_ADDU_S_QB:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDU_QB */
+            gen_helper_addu_qb(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDU_S_QB */
+            gen_helper_addu_s_qb(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_ADDU_S_PH:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDU_PH */
+            gen_helper_addu_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDU_S_PH */
+            gen_helper_addu_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_ADDUH_R_QB:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* ADDUH_QB */
+            gen_helper_adduh_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* ADDUH_R_QB */
+            gen_helper_adduh_r_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SHRAV_R_PH:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SHRAV_PH */
+            gen_helper_shra_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SHRAV_R_PH */
+            gen_helper_shra_r_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SHRAV_R_QB:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SHRAV_QB */
+            gen_helper_shra_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SHRAV_R_QB */
+            gen_helper_shra_r_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBQ_S_PH:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBQ_PH */
+            gen_helper_subq_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBQ_S_PH */
+            gen_helper_subq_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBQH_R_PH:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBQH_PH */
+            gen_helper_subqh_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBQH_R_PH */
+            gen_helper_subqh_r_ph(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBQH_R_W:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBQH_W */
+            gen_helper_subqh_w(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBQH_R_W */
+            gen_helper_subqh_r_w(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBU_S_QB:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBU_QB */
+            gen_helper_subu_qb(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBU_S_QB */
+            gen_helper_subu_s_qb(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBU_S_PH:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBU_PH */
+            gen_helper_subu_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBU_S_PH */
+            gen_helper_subu_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SUBUH_R_QB:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SUBUH_QB */
+            gen_helper_subuh_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SUBUH_R_QB */
+            gen_helper_subuh_r_qb(v1_t, v1_t, v2_t);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_SHLLV_S_PH:
+        check_dsp(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SHLLV_PH */
+            gen_helper_shll_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* SHLLV_S_PH */
+            gen_helper_shll_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_PRECR_SRA_R_PH_W:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* PRECR_SRA_PH_W */
+            {
+                TCGv_i32 sa_t = tcg_const_i32(rd);
+                gen_helper_precr_sra_ph_w(v1_t, sa_t, v1_t,
+                                          cpu_gpr[rt]);
+                gen_store_gpr(v1_t, rt);
+                tcg_temp_free_i32(sa_t);
+            }
+            break;
+        case 1:
+            /* PRECR_SRA_R_PH_W */
+            {
+                TCGv_i32 sa_t = tcg_const_i32(rd);
+                gen_helper_precr_sra_r_ph_w(v1_t, sa_t, v1_t,
+                                            cpu_gpr[rt]);
+                gen_store_gpr(v1_t, rt);
+                tcg_temp_free_i32(sa_t);
+            }
+            break;
+       }
+        break;
+    case NM_MULEU_S_PH_QBL:
+        check_dsp(ctx);
+        gen_helper_muleu_s_ph_qbl(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULEU_S_PH_QBR:
+        check_dsp(ctx);
+        gen_helper_muleu_s_ph_qbr(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULQ_RS_PH:
+        check_dsp(ctx);
+        gen_helper_mulq_rs_ph(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULQ_S_PH:
+        check_dsp_r2(ctx);
+        gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULQ_RS_W:
+        check_dsp_r2(ctx);
+        gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULQ_S_W:
+        check_dsp_r2(ctx);
+        gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_APPEND:
+        check_dsp_r2(ctx);
+        gen_load_gpr(t0, rs);
+        if (rd != 0) {
+            tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd);
+        }
+        tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+        break;
+    case NM_MODSUB:
+        check_dsp(ctx);
+        gen_helper_modsub(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHRAV_R_W:
+        check_dsp(ctx);
+        gen_helper_shra_r_w(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHRLV_PH:
+        check_dsp_r2(ctx);
+        gen_helper_shrl_ph(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHRLV_QB:
+        check_dsp(ctx);
+        gen_helper_shrl_qb(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHLLV_QB:
+        check_dsp(ctx);
+        gen_helper_shll_qb(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHLLV_S_W:
+        check_dsp(ctx);
+        gen_helper_shll_s_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHILO:
+        check_dsp(ctx);
+        {
+            TCGv tv0 = tcg_temp_new();
+            TCGv tv1 = tcg_temp_new();
+            int16_t imm = extract32(ctx->opcode, 16, 7);
+
+            tcg_gen_movi_tl(tv0, rd >> 3);
+            tcg_gen_movi_tl(tv1, imm);
+            gen_helper_shilo(tv0, tv1, cpu_env);
+            tcg_temp_free(tv1);
+            tcg_temp_free(tv0);
+        }
+        break;
+    case NM_MULEQ_S_W_PHL:
+        check_dsp(ctx);
+        gen_helper_muleq_s_w_phl(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MULEQ_S_W_PHR:
+        check_dsp(ctx);
+        gen_helper_muleq_s_w_phr(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_MUL_S_PH:
+        check_dsp_r2(ctx);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* MUL_PH */
+            gen_helper_mul_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        case 1:
+            /* MUL_S_PH */
+            gen_helper_mul_s_ph(v1_t, v1_t, v2_t, cpu_env);
+            gen_store_gpr(v1_t, ret);
+            break;
+        }
+        break;
+    case NM_PRECR_QB_PH:
+        check_dsp_r2(ctx);
+        gen_helper_precr_qb_ph(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PRECRQ_QB_PH:
+        check_dsp(ctx);
+        gen_helper_precrq_qb_ph(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PRECRQ_PH_W:
+        check_dsp(ctx);
+        gen_helper_precrq_ph_w(v1_t, v1_t, v2_t);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PRECRQ_RS_PH_W:
+        check_dsp(ctx);
+        gen_helper_precrq_rs_ph_w(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_PRECRQU_S_QB_PH:
+        check_dsp(ctx);
+        gen_helper_precrqu_s_qb_ph(v1_t, v1_t, v2_t, cpu_env);
+        gen_store_gpr(v1_t, ret);
+        break;
+    case NM_SHRA_R_W:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, rd);
+        gen_helper_shra_r_w(v1_t, t0, v1_t);
+        gen_store_gpr(v1_t, rt);
+        break;
+    case NM_SHRA_R_PH:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, rd >> 1);
+        switch (extract32(ctx->opcode, 10, 1)) {
+        case 0:
+            /* SHRA_PH */
+            gen_helper_shra_ph(v1_t, t0, v1_t);
+            gen_store_gpr(v1_t, rt);
+            break;
+        case 1:
+            /* SHRA_R_PH */
+            gen_helper_shra_r_ph(v1_t, t0, v1_t);
+            gen_store_gpr(v1_t, rt);
+            break;
+        }
+        break;
+    case NM_SHLL_S_PH:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, rd >> 1);
+        switch (extract32(ctx->opcode, 10, 2)) {
+        case 0:
+            /* SHLL_PH */
+            gen_helper_shll_ph(v1_t, t0, v1_t, cpu_env);
+            gen_store_gpr(v1_t, rt);
+            break;
+        case 2:
+            /* SHLL_S_PH */
+            gen_helper_shll_s_ph(v1_t, t0, v1_t, cpu_env);
+            gen_store_gpr(v1_t, rt);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_SHLL_S_W:
+        check_dsp(ctx);
+        tcg_gen_movi_tl(t0, rd);
+        gen_helper_shll_s_w(v1_t, t0, v1_t, cpu_env);
+        gen_store_gpr(v1_t, rt);
+        break;
+    case NM_REPL_PH:
+        check_dsp(ctx);
+        {
+            int16_t imm;
+            imm = sextract32(ctx->opcode, 11, 11);
+            imm = (int16_t)(imm << 6) >> 6;
+            if (rt != 0) {
+                tcg_gen_movi_tl(cpu_gpr[rt], dup_const(MO_16, imm));
+            }
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    tcg_temp_free(v2_t);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(t0);
+}
+
+static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint16_t insn;
+    uint32_t op;
+    int rt, rs, rd;
+    int offset;
+    int imm;
+
+    insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 2);
+    ctx->opcode = (ctx->opcode << 16) | insn;
+
+    rt = extract32(ctx->opcode, 21, 5);
+    rs = extract32(ctx->opcode, 16, 5);
+    rd = extract32(ctx->opcode, 11, 5);
+
+    op = extract32(ctx->opcode, 26, 6);
+    switch (op) {
+    case NM_P_ADDIU:
+        if (rt == 0) {
+            /* P.RI */
+            switch (extract32(ctx->opcode, 19, 2)) {
+            case NM_SIGRIE:
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            case NM_P_SYSCALL:
+                if ((extract32(ctx->opcode, 18, 1)) == NM_SYSCALL) {
+                    generate_exception_end(ctx, EXCP_SYSCALL);
+                } else {
+                    gen_reserved_instruction(ctx);
+                }
+                break;
+            case NM_BREAK:
+                generate_exception_end(ctx, EXCP_BREAK);
+                break;
+            case NM_SDBBP:
+                if (is_uhi(extract32(ctx->opcode, 0, 19))) {
+                    gen_helper_do_semihosting(cpu_env);
+                } else {
+                    if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                        gen_reserved_instruction(ctx);
+                    } else {
+                        generate_exception_end(ctx, EXCP_DBp);
+                    }
+                }
+                break;
+            }
+        } else {
+            /* NM_ADDIU */
+            imm = extract32(ctx->opcode, 0, 16);
+            if (rs != 0) {
+                tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], imm);
+            } else {
+                tcg_gen_movi_tl(cpu_gpr[rt], imm);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+        }
+        break;
+    case NM_ADDIUPC:
+        if (rt != 0) {
+            offset = sextract32(ctx->opcode, 0, 1) << 21 |
+                     extract32(ctx->opcode, 1, 20) << 1;
+            target_long addr = addr_add(ctx, ctx->base.pc_next + 4, offset);
+            tcg_gen_movi_tl(cpu_gpr[rt], addr);
+        }
+        break;
+    case NM_POOL32A:
+        switch (ctx->opcode & 0x07) {
+        case NM_POOL32A0:
+            gen_pool32a0_nanomips_insn(env, ctx);
+            break;
+        case NM_POOL32A5:
+            {
+                int32_t op1 = extract32(ctx->opcode, 3, 7);
+                gen_pool32a5_nanomips_insn(ctx, op1, rd, rs, rt);
+            }
+            break;
+        case NM_POOL32A7:
+            switch (extract32(ctx->opcode, 3, 3)) {
+            case NM_P_LSX:
+                gen_p_lsx(ctx, rd, rs, rt);
+                break;
+            case NM_LSA:
+                /*
+                 * In nanoMIPS, the shift field directly encodes the shift
+                 * amount, meaning that the supported shift values are in
+                 * the range 0 to 3 (instead of 1 to 4 in MIPSR6).
+                 */
+                gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) - 1);
+                break;
+            case NM_EXTW:
+                gen_ext(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 6, 5));
+                break;
+            case NM_POOL32AXF:
+                gen_pool32axf_nanomips_insn(env, ctx);
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_P_GP_W:
+        switch (ctx->opcode & 0x03) {
+        case NM_ADDIUGP_W:
+            if (rt != 0) {
+                offset = extract32(ctx->opcode, 0, 21);
+                gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], offset);
+            }
+            break;
+        case NM_LWGP:
+            gen_ld(ctx, OPC_LW, rt, 28, extract32(ctx->opcode, 2, 19) << 2);
+            break;
+        case NM_SWGP:
+            gen_st(ctx, OPC_SW, rt, 28, extract32(ctx->opcode, 2, 19) << 2);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_P48I:
+        {
+            insn = translator_lduw(env, &ctx->base, ctx->base.pc_next + 4);
+            target_long addr_off = extract32(ctx->opcode, 0, 16) | insn << 16;
+            switch (extract32(ctx->opcode, 16, 5)) {
+            case NM_LI48:
+                check_nms(ctx);
+                if (rt != 0) {
+                    tcg_gen_movi_tl(cpu_gpr[rt], addr_off);
+                }
+                break;
+            case NM_ADDIU48:
+                check_nms(ctx);
+                if (rt != 0) {
+                    tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rt], addr_off);
+                    tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+                }
+                break;
+            case NM_ADDIUGP48:
+                check_nms(ctx);
+                if (rt != 0) {
+                    gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], addr_off);
+                }
+                break;
+            case NM_ADDIUPC48:
+                check_nms(ctx);
+                if (rt != 0) {
+                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
+                                                addr_off);
+
+                    tcg_gen_movi_tl(cpu_gpr[rt], addr);
+                }
+                break;
+            case NM_LWPC48:
+                check_nms(ctx);
+                if (rt != 0) {
+                    TCGv t0;
+                    t0 = tcg_temp_new();
+
+                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
+                                                addr_off);
+
+                    tcg_gen_movi_tl(t0, addr);
+                    tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
+                    tcg_temp_free(t0);
+                }
+                break;
+            case NM_SWPC48:
+                check_nms(ctx);
+                {
+                    TCGv t0, t1;
+                    t0 = tcg_temp_new();
+                    t1 = tcg_temp_new();
+
+                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
+                                                addr_off);
+
+                    tcg_gen_movi_tl(t0, addr);
+                    gen_load_gpr(t1, rt);
+
+                    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
+
+                    tcg_temp_free(t0);
+                    tcg_temp_free(t1);
+                }
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            return 6;
+        }
+    case NM_P_U12:
+        switch (extract32(ctx->opcode, 12, 4)) {
+        case NM_ORI:
+            gen_logic_imm(ctx, OPC_ORI, rt, rs, extract32(ctx->opcode, 0, 12));
+            break;
+        case NM_XORI:
+            gen_logic_imm(ctx, OPC_XORI, rt, rs, extract32(ctx->opcode, 0, 12));
+            break;
+        case NM_ANDI:
+            gen_logic_imm(ctx, OPC_ANDI, rt, rs, extract32(ctx->opcode, 0, 12));
+            break;
+        case NM_P_SR:
+            switch (extract32(ctx->opcode, 20, 1)) {
+            case NM_PP_SR:
+                switch (ctx->opcode & 3) {
+                case NM_SAVE:
+                    gen_save(ctx, rt, extract32(ctx->opcode, 16, 4),
+                             extract32(ctx->opcode, 2, 1),
+                             extract32(ctx->opcode, 3, 9) << 3);
+                    break;
+                case NM_RESTORE:
+                case NM_RESTORE_JRC:
+                    gen_restore(ctx, rt, extract32(ctx->opcode, 16, 4),
+                                extract32(ctx->opcode, 2, 1),
+                                extract32(ctx->opcode, 3, 9) << 3);
+                    if ((ctx->opcode & 3) == NM_RESTORE_JRC) {
+                        gen_compute_branch_nm(ctx, OPC_JR, 2, 31, 0, 0);
+                    }
+                    break;
+                default:
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                break;
+            case NM_P_SR_F:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        case NM_SLTI:
+            gen_slt_imm(ctx, OPC_SLTI, rt, rs, extract32(ctx->opcode, 0, 12));
+            break;
+        case NM_SLTIU:
+            gen_slt_imm(ctx, OPC_SLTIU, rt, rs, extract32(ctx->opcode, 0, 12));
+            break;
+        case NM_SEQI:
+            {
+                TCGv t0 = tcg_temp_new();
+
+                imm = extract32(ctx->opcode, 0, 12);
+                gen_load_gpr(t0, rs);
+                tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, imm);
+                gen_store_gpr(t0, rt);
+
+                tcg_temp_free(t0);
+            }
+            break;
+        case NM_ADDIUNEG:
+            imm = (int16_t) extract32(ctx->opcode, 0, 12);
+            gen_arith_imm(ctx, OPC_ADDIU, rt, rs, -imm);
+            break;
+        case NM_P_SHIFT:
+            {
+                int shift = extract32(ctx->opcode, 0, 5);
+                switch (extract32(ctx->opcode, 5, 4)) {
+                case NM_P_SLL:
+                    if (rt == 0 && shift == 0) {
+                        /* NOP */
+                    } else if (rt == 0 && shift == 3) {
+                        /* EHB - treat as NOP */
+                    } else if (rt == 0 && shift == 5) {
+                        /* PAUSE - treat as NOP */
+                    } else if (rt == 0 && shift == 6) {
+                        /* SYNC */
+                        gen_sync(extract32(ctx->opcode, 16, 5));
+                    } else {
+                        /* SLL */
+                        gen_shift_imm(ctx, OPC_SLL, rt, rs,
+                                      extract32(ctx->opcode, 0, 5));
+                    }
+                    break;
+                case NM_SRL:
+                    gen_shift_imm(ctx, OPC_SRL, rt, rs,
+                                  extract32(ctx->opcode, 0, 5));
+                    break;
+                case NM_SRA:
+                    gen_shift_imm(ctx, OPC_SRA, rt, rs,
+                                  extract32(ctx->opcode, 0, 5));
+                    break;
+                case NM_ROTR:
+                    gen_shift_imm(ctx, OPC_ROTR, rt, rs,
+                                  extract32(ctx->opcode, 0, 5));
+                    break;
+                }
+            }
+            break;
+        case NM_P_ROTX:
+            check_nms(ctx);
+            if (rt != 0) {
+                TCGv t0 = tcg_temp_new();
+                TCGv_i32 shift = tcg_const_i32(extract32(ctx->opcode, 0, 5));
+                TCGv_i32 shiftx = tcg_const_i32(extract32(ctx->opcode, 7, 4)
+                                                << 1);
+                TCGv_i32 stripe = tcg_const_i32(extract32(ctx->opcode, 6, 1));
+
+                gen_load_gpr(t0, rs);
+                gen_helper_rotx(cpu_gpr[rt], t0, shift, shiftx, stripe);
+                tcg_temp_free(t0);
+
+                tcg_temp_free_i32(shift);
+                tcg_temp_free_i32(shiftx);
+                tcg_temp_free_i32(stripe);
+            }
+            break;
+        case NM_P_INS:
+            switch (((ctx->opcode >> 10) & 2) |
+                    (extract32(ctx->opcode, 5, 1))) {
+            case NM_INS:
+                check_nms(ctx);
+                gen_bitops(ctx, OPC_INS, rt, rs, extract32(ctx->opcode, 0, 5),
+                           extract32(ctx->opcode, 6, 5));
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        case NM_P_EXT:
+            switch (((ctx->opcode >> 10) & 2) |
+                    (extract32(ctx->opcode, 5, 1))) {
+            case NM_EXT:
+                check_nms(ctx);
+                gen_bitops(ctx, OPC_EXT, rt, rs, extract32(ctx->opcode, 0, 5),
+                           extract32(ctx->opcode, 6, 5));
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_POOL32F:
+        gen_pool32f_nanomips_insn(ctx);
+        break;
+    case NM_POOL32S:
+        break;
+    case NM_P_LUI:
+        switch (extract32(ctx->opcode, 1, 1)) {
+        case NM_LUI:
+            if (rt != 0) {
+                tcg_gen_movi_tl(cpu_gpr[rt],
+                                sextract32(ctx->opcode, 0, 1) << 31 |
+                                extract32(ctx->opcode, 2, 10) << 21 |
+                                extract32(ctx->opcode, 12, 9) << 12);
+            }
+            break;
+        case NM_ALUIPC:
+            if (rt != 0) {
+                offset = sextract32(ctx->opcode, 0, 1) << 31 |
+                         extract32(ctx->opcode, 2, 10) << 21 |
+                         extract32(ctx->opcode, 12, 9) << 12;
+                target_long addr;
+                addr = ~0xFFF & addr_add(ctx, ctx->base.pc_next + 4, offset);
+                tcg_gen_movi_tl(cpu_gpr[rt], addr);
+            }
+            break;
+        }
+        break;
+    case NM_P_GP_BH:
+        {
+            uint32_t u = extract32(ctx->opcode, 0, 18);
+
+            switch (extract32(ctx->opcode, 18, 3)) {
+            case NM_LBGP:
+                gen_ld(ctx, OPC_LB, rt, 28, u);
+                break;
+            case NM_SBGP:
+                gen_st(ctx, OPC_SB, rt, 28, u);
+                break;
+            case NM_LBUGP:
+                gen_ld(ctx, OPC_LBU, rt, 28, u);
+                break;
+            case NM_ADDIUGP_B:
+                if (rt != 0) {
+                    gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], u);
+                }
+                break;
+            case NM_P_GP_LH:
+                u &= ~1;
+                switch (ctx->opcode & 1) {
+                case NM_LHGP:
+                    gen_ld(ctx, OPC_LH, rt, 28, u);
+                    break;
+                case NM_LHUGP:
+                    gen_ld(ctx, OPC_LHU, rt, 28, u);
+                    break;
+                }
+                break;
+            case NM_P_GP_SH:
+                u &= ~1;
+                switch (ctx->opcode & 1) {
+                case NM_SHGP:
+                    gen_st(ctx, OPC_SH, rt, 28, u);
+                    break;
+                default:
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                break;
+            case NM_P_GP_CP1:
+                u &= ~0x3;
+                switch (ctx->opcode & 0x3) {
+                case NM_LWC1GP:
+                    gen_cop1_ldst(ctx, OPC_LWC1, rt, 28, u);
+                    break;
+                case NM_LDC1GP:
+                    gen_cop1_ldst(ctx, OPC_LDC1, rt, 28, u);
+                    break;
+                case NM_SWC1GP:
+                    gen_cop1_ldst(ctx, OPC_SWC1, rt, 28, u);
+                    break;
+                case NM_SDC1GP:
+                    gen_cop1_ldst(ctx, OPC_SDC1, rt, 28, u);
+                    break;
+                }
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case NM_P_LS_U12:
+        {
+            uint32_t u = extract32(ctx->opcode, 0, 12);
+
+            switch (extract32(ctx->opcode, 12, 4)) {
+            case NM_P_PREFU12:
+                if (rt == 31) {
+                    /* SYNCI */
+                    /*
+                     * Break the TB to be able to sync copied instructions
+                     * immediately.
+                     */
+                    ctx->base.is_jmp = DISAS_STOP;
+                } else {
+                    /* PREF */
+                    /* Treat as NOP. */
+                }
+                break;
+            case NM_LB:
+                gen_ld(ctx, OPC_LB, rt, rs, u);
+                break;
+            case NM_LH:
+                gen_ld(ctx, OPC_LH, rt, rs, u);
+                break;
+            case NM_LW:
+                gen_ld(ctx, OPC_LW, rt, rs, u);
+                break;
+            case NM_LBU:
+                gen_ld(ctx, OPC_LBU, rt, rs, u);
+                break;
+            case NM_LHU:
+                gen_ld(ctx, OPC_LHU, rt, rs, u);
+                break;
+            case NM_SB:
+                gen_st(ctx, OPC_SB, rt, rs, u);
+                break;
+            case NM_SH:
+                gen_st(ctx, OPC_SH, rt, rs, u);
+                break;
+            case NM_SW:
+                gen_st(ctx, OPC_SW, rt, rs, u);
+                break;
+            case NM_LWC1:
+                gen_cop1_ldst(ctx, OPC_LWC1, rt, rs, u);
+                break;
+            case NM_LDC1:
+                gen_cop1_ldst(ctx, OPC_LDC1, rt, rs, u);
+                break;
+            case NM_SWC1:
+                gen_cop1_ldst(ctx, OPC_SWC1, rt, rs, u);
+                break;
+            case NM_SDC1:
+                gen_cop1_ldst(ctx, OPC_SDC1, rt, rs, u);
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case NM_P_LS_S9:
+        {
+            int32_t s = (sextract32(ctx->opcode, 15, 1) << 8) |
+                        extract32(ctx->opcode, 0, 8);
+
+            switch (extract32(ctx->opcode, 8, 3)) {
+            case NM_P_LS_S0:
+                switch (extract32(ctx->opcode, 11, 4)) {
+                case NM_LBS9:
+                    gen_ld(ctx, OPC_LB, rt, rs, s);
+                    break;
+                case NM_LHS9:
+                    gen_ld(ctx, OPC_LH, rt, rs, s);
+                    break;
+                case NM_LWS9:
+                    gen_ld(ctx, OPC_LW, rt, rs, s);
+                    break;
+                case NM_LBUS9:
+                    gen_ld(ctx, OPC_LBU, rt, rs, s);
+                    break;
+                case NM_LHUS9:
+                    gen_ld(ctx, OPC_LHU, rt, rs, s);
+                    break;
+                case NM_SBS9:
+                    gen_st(ctx, OPC_SB, rt, rs, s);
+                    break;
+                case NM_SHS9:
+                    gen_st(ctx, OPC_SH, rt, rs, s);
+                    break;
+                case NM_SWS9:
+                    gen_st(ctx, OPC_SW, rt, rs, s);
+                    break;
+                case NM_LWC1S9:
+                    gen_cop1_ldst(ctx, OPC_LWC1, rt, rs, s);
+                    break;
+                case NM_LDC1S9:
+                    gen_cop1_ldst(ctx, OPC_LDC1, rt, rs, s);
+                    break;
+                case NM_SWC1S9:
+                    gen_cop1_ldst(ctx, OPC_SWC1, rt, rs, s);
+                    break;
+                case NM_SDC1S9:
+                    gen_cop1_ldst(ctx, OPC_SDC1, rt, rs, s);
+                    break;
+                case NM_P_PREFS9:
+                    if (rt == 31) {
+                        /* SYNCI */
+                        /*
+                         * Break the TB to be able to sync copied instructions
+                         * immediately.
+                         */
+                        ctx->base.is_jmp = DISAS_STOP;
+                    } else {
+                        /* PREF */
+                        /* Treat as NOP. */
+                    }
+                    break;
+                default:
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                break;
+            case NM_P_LS_S1:
+                switch (extract32(ctx->opcode, 11, 4)) {
+                case NM_UALH:
+                case NM_UASH:
+                    check_nms(ctx);
+                    {
+                        TCGv t0 = tcg_temp_new();
+                        TCGv t1 = tcg_temp_new();
+
+                        gen_base_offset_addr(ctx, t0, rs, s);
+
+                        switch (extract32(ctx->opcode, 11, 4)) {
+                        case NM_UALH:
+                            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
+                                               MO_UNALN);
+                            gen_store_gpr(t0, rt);
+                            break;
+                        case NM_UASH:
+                            gen_load_gpr(t1, rt);
+                            tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
+                                               MO_UNALN);
+                            break;
+                        }
+                        tcg_temp_free(t0);
+                        tcg_temp_free(t1);
+                    }
+                    break;
+                case NM_P_LL:
+                    switch (ctx->opcode & 0x03) {
+                    case NM_LL:
+                        gen_ld(ctx, OPC_LL, rt, rs, s);
+                        break;
+                    case NM_LLWP:
+                        check_xnp(ctx);
+                        gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
+                        break;
+                    }
+                    break;
+                case NM_P_SC:
+                    switch (ctx->opcode & 0x03) {
+                    case NM_SC:
+                        gen_st_cond(ctx, rt, rs, s, MO_TESL, false);
+                        break;
+                    case NM_SCWP:
+                        check_xnp(ctx);
+                        gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5),
+                                 false);
+                        break;
+                    }
+                    break;
+                case NM_CACHE:
+                    check_cp0_enabled(ctx);
+                    if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+                        gen_cache_operation(ctx, rt, rs, s);
+                    }
+                    break;
+                }
+                break;
+            case NM_P_LS_E0:
+                switch (extract32(ctx->opcode, 11, 4)) {
+                case NM_LBE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LBE, rt, rs, s);
+                    break;
+                case NM_SBE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SBE, rt, rs, s);
+                    break;
+                case NM_LBUE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LBUE, rt, rs, s);
+                    break;
+                case NM_P_PREFE:
+                    if (rt == 31) {
+                        /* case NM_SYNCIE */
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        /*
+                         * Break the TB to be able to sync copied instructions
+                         * immediately.
+                         */
+                        ctx->base.is_jmp = DISAS_STOP;
+                    } else {
+                        /* case NM_PREFE */
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        /* Treat as NOP. */
+                    }
+                    break;
+                case NM_LHE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LHE, rt, rs, s);
+                    break;
+                case NM_SHE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SHE, rt, rs, s);
+                    break;
+                case NM_LHUE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LHUE, rt, rs, s);
+                    break;
+                case NM_CACHEE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    check_nms_dl_il_sl_tl_l2c(ctx);
+                    gen_cache_operation(ctx, rt, rs, s);
+                    break;
+                case NM_LWE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_ld(ctx, OPC_LWE, rt, rs, s);
+                    break;
+                case NM_SWE:
+                    check_eva(ctx);
+                    check_cp0_enabled(ctx);
+                    gen_st(ctx, OPC_SWE, rt, rs, s);
+                    break;
+                case NM_P_LLE:
+                    switch (extract32(ctx->opcode, 2, 2)) {
+                    case NM_LLE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_ld(ctx, OPC_LLE, rt, rs, s);
+                        break;
+                    case NM_LLWPE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
+                        break;
+                    default:
+                        gen_reserved_instruction(ctx);
+                        break;
+                    }
+                    break;
+                case NM_P_SCE:
+                    switch (extract32(ctx->opcode, 2, 2)) {
+                    case NM_SCE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_st_cond(ctx, rt, rs, s, MO_TESL, true);
+                        break;
+                    case NM_SCWPE:
+                        check_xnp(ctx);
+                        check_eva(ctx);
+                        check_cp0_enabled(ctx);
+                        gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5),
+                                 true);
+                        break;
+                    default:
+                        gen_reserved_instruction(ctx);
+                        break;
+                    }
+                    break;
+                }
+                break;
+            case NM_P_LS_WM:
+            case NM_P_LS_UAWM:
+                check_nms(ctx);
+                {
+                    int count = extract32(ctx->opcode, 12, 3);
+                    int counter = 0;
+
+                    offset = sextract32(ctx->opcode, 15, 1) << 8 |
+                             extract32(ctx->opcode, 0, 8);
+                    TCGv va = tcg_temp_new();
+                    TCGv t1 = tcg_temp_new();
+                    MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
+                                      NM_P_LS_UAWM ? MO_UNALN : 0;
+
+                    count = (count == 0) ? 8 : count;
+                    while (counter != count) {
+                        int this_rt = ((rt + counter) & 0x1f) | (rt & 0x10);
+                        int this_offset = offset + (counter << 2);
+
+                        gen_base_offset_addr(ctx, va, rs, this_offset);
+
+                        switch (extract32(ctx->opcode, 11, 1)) {
+                        case NM_LWM:
+                            tcg_gen_qemu_ld_tl(t1, va, ctx->mem_idx,
+                                               memop | MO_TESL);
+                            gen_store_gpr(t1, this_rt);
+                            if ((this_rt == rs) &&
+                                (counter != (count - 1))) {
+                                /* UNPREDICTABLE */
+                            }
+                            break;
+                        case NM_SWM:
+                            this_rt = (rt == 0) ? 0 : this_rt;
+                            gen_load_gpr(t1, this_rt);
+                            tcg_gen_qemu_st_tl(t1, va, ctx->mem_idx,
+                                               memop | MO_TEUL);
+                            break;
+                        }
+                        counter++;
+                    }
+                    tcg_temp_free(va);
+                    tcg_temp_free(t1);
+                }
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case NM_MOVE_BALC:
+        check_nms(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+            int32_t s = sextract32(ctx->opcode, 0, 1) << 21 |
+                        extract32(ctx->opcode, 1, 20) << 1;
+            rd = (extract32(ctx->opcode, 24, 1)) == 0 ? 4 : 5;
+            rt = decode_gpr_gpr4_zero(extract32(ctx->opcode, 25, 1) << 3 |
+                            extract32(ctx->opcode, 21, 3));
+            gen_load_gpr(t0, rt);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            gen_compute_branch_nm(ctx, OPC_BGEZAL, 4, 0, 0, s);
+            tcg_temp_free(t0);
+        }
+        break;
+    case NM_P_BAL:
+        {
+            int32_t s = sextract32(ctx->opcode, 0, 1) << 25 |
+                        extract32(ctx->opcode, 1, 24) << 1;
+
+            if ((extract32(ctx->opcode, 25, 1)) == 0) {
+                /* BC */
+                gen_compute_branch_nm(ctx, OPC_BEQ, 4, 0, 0, s);
+            } else {
+                /* BALC */
+                gen_compute_branch_nm(ctx, OPC_BGEZAL, 4, 0, 0, s);
+            }
+        }
+        break;
+    case NM_P_J:
+        switch (extract32(ctx->opcode, 12, 4)) {
+        case NM_JALRC:
+        case NM_JALRC_HB:
+            gen_compute_branch_nm(ctx, OPC_JALR, 4, rs, rt, 0);
+            break;
+        case NM_P_BALRSC:
+            gen_compute_nanomips_pbalrsc_branch(ctx, rs, rt);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_P_BR1:
+        {
+            int32_t s = sextract32(ctx->opcode, 0, 1) << 14 |
+                        extract32(ctx->opcode, 1, 13) << 1;
+            switch (extract32(ctx->opcode, 14, 2)) {
+            case NM_BEQC:
+                check_nms(ctx);
+                gen_compute_branch_nm(ctx, OPC_BEQ, 4, rs, rt, s);
+                break;
+            case NM_P_BR3A:
+                s = sextract32(ctx->opcode, 0, 1) << 14 |
+                    extract32(ctx->opcode, 1, 13) << 1;
+                check_cp1_enabled(ctx);
+                switch (extract32(ctx->opcode, 16, 5)) {
+                case NM_BC1EQZC:
+                    gen_compute_branch_cp1_nm(ctx, OPC_BC1EQZ, rt, s);
+                    break;
+                case NM_BC1NEZC:
+                    gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s);
+                    break;
+                case NM_BPOSGE32C:
+                    check_dsp_r3(ctx);
+                    {
+                        int32_t imm = extract32(ctx->opcode, 1, 13) |
+                                      extract32(ctx->opcode, 0, 1) << 13;
+
+                        gen_compute_branch_nm(ctx, OPC_BPOSGE32, 4, -1, -2,
+                                              imm << 1);
+                    }
+                    break;
+                default:
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                break;
+            case NM_BGEC:
+                if (rs == rt) {
+                    gen_compute_compact_branch_nm(ctx, OPC_BC, rs, rt, s);
+                } else {
+                    gen_compute_compact_branch_nm(ctx, OPC_BGEC, rs, rt, s);
+                }
+                break;
+            case NM_BGEUC:
+                if (rs == rt || rt == 0) {
+                    gen_compute_compact_branch_nm(ctx, OPC_BC, 0, 0, s);
+                } else if (rs == 0) {
+                    gen_compute_compact_branch_nm(ctx, OPC_BEQZC, rt, 0, s);
+                } else {
+                    gen_compute_compact_branch_nm(ctx, OPC_BGEUC, rs, rt, s);
+                }
+                break;
+            }
+        }
+        break;
+    case NM_P_BR2:
+        {
+            int32_t s = sextract32(ctx->opcode, 0, 1) << 14 |
+                        extract32(ctx->opcode, 1, 13) << 1;
+            switch (extract32(ctx->opcode, 14, 2)) {
+            case NM_BNEC:
+                check_nms(ctx);
+                gen_compute_branch_nm(ctx, OPC_BNE, 4, rs, rt, s);
+                break;
+            case NM_BLTC:
+                if (rs != 0 && rt != 0 && rs == rt) {
+                    /* NOP */
+                    ctx->hflags |= MIPS_HFLAG_FBNSLOT;
+                } else {
+                    gen_compute_compact_branch_nm(ctx, OPC_BLTC, rs, rt, s);
+                }
+                break;
+            case NM_BLTUC:
+                if (rs == 0 || rs == rt) {
+                    /* NOP */
+                    ctx->hflags |= MIPS_HFLAG_FBNSLOT;
+                } else {
+                    gen_compute_compact_branch_nm(ctx, OPC_BLTUC, rs, rt, s);
+                }
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case NM_P_BRI:
+        {
+            int32_t s = sextract32(ctx->opcode, 0, 1) << 11 |
+                        extract32(ctx->opcode, 1, 10) << 1;
+            uint32_t u = extract32(ctx->opcode, 11, 7);
+
+            gen_compute_imm_branch(ctx, extract32(ctx->opcode, 18, 3),
+                                   rt, u, s);
+        }
+        break;
+    default:
+        gen_reserved_instruction(ctx);
+        break;
+    }
+    return 4;
+}
+
+static int decode_isa_nanomips(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t op;
+    int rt = decode_gpr_gpr3(NANOMIPS_EXTRACT_RT3(ctx->opcode));
+    int rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
+    int rd = decode_gpr_gpr3(NANOMIPS_EXTRACT_RD3(ctx->opcode));
+    int offset;
+    int imm;
+
+    /* make sure instructions are on a halfword boundary */
+    if (ctx->base.pc_next & 0x1) {
+        TCGv tmp = tcg_const_tl(ctx->base.pc_next);
+        tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
+        tcg_temp_free(tmp);
+        generate_exception_end(ctx, EXCP_AdEL);
+        return 2;
+    }
+
+    op = extract32(ctx->opcode, 10, 6);
+    switch (op) {
+    case NM_P16_MV:
+        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
+        if (rt != 0) {
+            /* MOVE */
+            rs = NANOMIPS_EXTRACT_RS5(ctx->opcode);
+            gen_arith(ctx, OPC_ADDU, rt, rs, 0);
+        } else {
+            /* P16.RI */
+            switch (extract32(ctx->opcode, 3, 2)) {
+            case NM_P16_SYSCALL:
+                if (extract32(ctx->opcode, 2, 1) == 0) {
+                    generate_exception_end(ctx, EXCP_SYSCALL);
+                } else {
+                    gen_reserved_instruction(ctx);
+                }
+                break;
+            case NM_BREAK16:
+                generate_exception_end(ctx, EXCP_BREAK);
+                break;
+            case NM_SDBBP16:
+                if (is_uhi(extract32(ctx->opcode, 0, 3))) {
+                    gen_helper_do_semihosting(cpu_env);
+                } else {
+                    if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                        gen_reserved_instruction(ctx);
+                    } else {
+                        generate_exception_end(ctx, EXCP_DBp);
+                    }
+                }
+                break;
+            default:
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        break;
+    case NM_P16_SHIFT:
+        {
+            int shift = extract32(ctx->opcode, 0, 3);
+            uint32_t opc = 0;
+            shift = (shift == 0) ? 8 : shift;
+
+            switch (extract32(ctx->opcode, 3, 1)) {
+            case NM_SLL16:
+                opc = OPC_SLL;
+                break;
+            case NM_SRL16:
+                opc = OPC_SRL;
+                break;
+            }
+            gen_shift_imm(ctx, opc, rt, rs, shift);
+        }
+        break;
+    case NM_P16C:
+        switch (ctx->opcode & 1) {
+        case NM_POOL16C_0:
+            gen_pool16c_nanomips_insn(ctx);
+            break;
+        case NM_LWXS16:
+            gen_ldxs(ctx, rt, rs, rd);
+            break;
+        }
+        break;
+    case NM_P16_A1:
+        switch (extract32(ctx->opcode, 6, 1)) {
+        case NM_ADDIUR1SP:
+            imm = extract32(ctx->opcode, 0, 6) << 2;
+            gen_arith_imm(ctx, OPC_ADDIU, rt, 29, imm);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_P16_A2:
+        switch (extract32(ctx->opcode, 3, 1)) {
+        case NM_ADDIUR2:
+            imm = extract32(ctx->opcode, 0, 3) << 2;
+            gen_arith_imm(ctx, OPC_ADDIU, rt, rs, imm);
+            break;
+        case NM_P_ADDIURS5:
+            rt = extract32(ctx->opcode, 5, 5);
+            if (rt != 0) {
+                /* imm = sign_extend(s[3] . s[2:0] , from_nbits = 4) */
+                imm = (sextract32(ctx->opcode, 4, 1) << 3) |
+                      (extract32(ctx->opcode, 0, 3));
+                gen_arith_imm(ctx, OPC_ADDIU, rt, rt, imm);
+            }
+            break;
+        }
+        break;
+    case NM_P16_ADDU:
+        switch (ctx->opcode & 0x1) {
+        case NM_ADDU16:
+            gen_arith(ctx, OPC_ADDU, rd, rs, rt);
+            break;
+        case NM_SUBU16:
+            gen_arith(ctx, OPC_SUBU, rd, rs, rt);
+            break;
+        }
+        break;
+    case NM_P16_4X4:
+        rt = (extract32(ctx->opcode, 9, 1) << 3) |
+              extract32(ctx->opcode, 5, 3);
+        rs = (extract32(ctx->opcode, 4, 1) << 3) |
+              extract32(ctx->opcode, 0, 3);
+        rt = decode_gpr_gpr4(rt);
+        rs = decode_gpr_gpr4(rs);
+        switch ((extract32(ctx->opcode, 7, 2) & 0x2) |
+                (extract32(ctx->opcode, 3, 1))) {
+        case NM_ADDU4X4:
+            check_nms(ctx);
+            gen_arith(ctx, OPC_ADDU, rt, rs, rt);
+            break;
+        case NM_MUL4X4:
+            check_nms(ctx);
+            gen_r6_muldiv(ctx, R6_OPC_MUL, rt, rs, rt);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_LI16:
+        {
+            int imm = extract32(ctx->opcode, 0, 7);
+            imm = (imm == 0x7f ? -1 : imm);
+            if (rt != 0) {
+                tcg_gen_movi_tl(cpu_gpr[rt], imm);
+            }
+        }
+        break;
+    case NM_ANDI16:
+        {
+            uint32_t u = extract32(ctx->opcode, 0, 4);
+            u = (u == 12) ? 0xff :
+                (u == 13) ? 0xffff : u;
+            gen_logic_imm(ctx, OPC_ANDI, rt, rs, u);
+        }
+        break;
+    case NM_P16_LB:
+        offset = extract32(ctx->opcode, 0, 2);
+        switch (extract32(ctx->opcode, 2, 2)) {
+        case NM_LB16:
+            gen_ld(ctx, OPC_LB, rt, rs, offset);
+            break;
+        case NM_SB16:
+            rt = decode_gpr_gpr3_src_store(
+                     NANOMIPS_EXTRACT_RT3(ctx->opcode));
+            gen_st(ctx, OPC_SB, rt, rs, offset);
+            break;
+        case NM_LBU16:
+            gen_ld(ctx, OPC_LBU, rt, rs, offset);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_P16_LH:
+        offset = extract32(ctx->opcode, 1, 2) << 1;
+        switch ((extract32(ctx->opcode, 3, 1) << 1) | (ctx->opcode & 1)) {
+        case NM_LH16:
+            gen_ld(ctx, OPC_LH, rt, rs, offset);
+            break;
+        case NM_SH16:
+            rt = decode_gpr_gpr3_src_store(
+                     NANOMIPS_EXTRACT_RT3(ctx->opcode));
+            gen_st(ctx, OPC_SH, rt, rs, offset);
+            break;
+        case NM_LHU16:
+            gen_ld(ctx, OPC_LHU, rt, rs, offset);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case NM_LW16:
+        offset = extract32(ctx->opcode, 0, 4) << 2;
+        gen_ld(ctx, OPC_LW, rt, rs, offset);
+        break;
+    case NM_LWSP16:
+        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
+        offset = extract32(ctx->opcode, 0, 5) << 2;
+        gen_ld(ctx, OPC_LW, rt, 29, offset);
+        break;
+    case NM_LW4X4:
+        check_nms(ctx);
+        rt = (extract32(ctx->opcode, 9, 1) << 3) |
+             extract32(ctx->opcode, 5, 3);
+        rs = (extract32(ctx->opcode, 4, 1) << 3) |
+             extract32(ctx->opcode, 0, 3);
+        offset = (extract32(ctx->opcode, 3, 1) << 3) |
+                 (extract32(ctx->opcode, 8, 1) << 2);
+        rt = decode_gpr_gpr4(rt);
+        rs = decode_gpr_gpr4(rs);
+        gen_ld(ctx, OPC_LW, rt, rs, offset);
+        break;
+    case NM_SW4X4:
+        check_nms(ctx);
+        rt = (extract32(ctx->opcode, 9, 1) << 3) |
+             extract32(ctx->opcode, 5, 3);
+        rs = (extract32(ctx->opcode, 4, 1) << 3) |
+             extract32(ctx->opcode, 0, 3);
+        offset = (extract32(ctx->opcode, 3, 1) << 3) |
+                 (extract32(ctx->opcode, 8, 1) << 2);
+        rt = decode_gpr_gpr4_zero(rt);
+        rs = decode_gpr_gpr4(rs);
+        gen_st(ctx, OPC_SW, rt, rs, offset);
+        break;
+    case NM_LWGP16:
+        offset = extract32(ctx->opcode, 0, 7) << 2;
+        gen_ld(ctx, OPC_LW, rt, 28, offset);
+        break;
+    case NM_SWSP16:
+        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
+        offset = extract32(ctx->opcode, 0, 5) << 2;
+        gen_st(ctx, OPC_SW, rt, 29, offset);
+        break;
+    case NM_SW16:
+        rt = decode_gpr_gpr3_src_store(
+                 NANOMIPS_EXTRACT_RT3(ctx->opcode));
+        rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
+        offset = extract32(ctx->opcode, 0, 4) << 2;
+        gen_st(ctx, OPC_SW, rt, rs, offset);
+        break;
+    case NM_SWGP16:
+        rt = decode_gpr_gpr3_src_store(
+                 NANOMIPS_EXTRACT_RT3(ctx->opcode));
+        offset = extract32(ctx->opcode, 0, 7) << 2;
+        gen_st(ctx, OPC_SW, rt, 28, offset);
+        break;
+    case NM_BC16:
+        gen_compute_branch_nm(ctx, OPC_BEQ, 2, 0, 0,
+                              (sextract32(ctx->opcode, 0, 1) << 10) |
+                              (extract32(ctx->opcode, 1, 9) << 1));
+        break;
+    case NM_BALC16:
+        gen_compute_branch_nm(ctx, OPC_BGEZAL, 2, 0, 0,
+                              (sextract32(ctx->opcode, 0, 1) << 10) |
+                              (extract32(ctx->opcode, 1, 9) << 1));
+        break;
+    case NM_BEQZC16:
+        gen_compute_branch_nm(ctx, OPC_BEQ, 2, rt, 0,
+                              (sextract32(ctx->opcode, 0, 1) << 7) |
+                              (extract32(ctx->opcode, 1, 6) << 1));
+        break;
+    case NM_BNEZC16:
+        gen_compute_branch_nm(ctx, OPC_BNE, 2, rt, 0,
+                              (sextract32(ctx->opcode, 0, 1) << 7) |
+                              (extract32(ctx->opcode, 1, 6) << 1));
+        break;
+    case NM_P16_BR:
+        switch (ctx->opcode & 0xf) {
+        case 0:
+            /* P16.JRC */
+            switch (extract32(ctx->opcode, 4, 1)) {
+            case NM_JRC:
+                gen_compute_branch_nm(ctx, OPC_JR, 2,
+                                      extract32(ctx->opcode, 5, 5), 0, 0);
+                break;
+            case NM_JALRC16:
+                gen_compute_branch_nm(ctx, OPC_JALR, 2,
+                                      extract32(ctx->opcode, 5, 5), 31, 0);
+                break;
+            }
+            break;
+        default:
+            {
+                /* P16.BRI */
+                uint32_t opc = extract32(ctx->opcode, 4, 3) <
+                               extract32(ctx->opcode, 7, 3) ? OPC_BEQ : OPC_BNE;
+                gen_compute_branch_nm(ctx, opc, 2, rs, rt,
+                                      extract32(ctx->opcode, 0, 4) << 1);
+            }
+            break;
+        }
+        break;
+    case NM_P16_SR:
+        {
+            int count = extract32(ctx->opcode, 0, 4);
+            int u = extract32(ctx->opcode, 4, 4) << 4;
+
+            rt = 30 + extract32(ctx->opcode, 9, 1);
+            switch (extract32(ctx->opcode, 8, 1)) {
+            case NM_SAVE16:
+                gen_save(ctx, rt, count, 0, u);
+                break;
+            case NM_RESTORE_JRC16:
+                gen_restore(ctx, rt, count, 0, u);
+                gen_compute_branch_nm(ctx, OPC_JR, 2, 31, 0, 0);
+                break;
+            }
+        }
+        break;
+    case NM_MOVEP:
+    case NM_MOVEPREV:
+        check_nms(ctx);
+        {
+            static const int gpr2reg1[] = {4, 5, 6, 7};
+            static const int gpr2reg2[] = {5, 6, 7, 8};
+            int re;
+            int rd2 = extract32(ctx->opcode, 3, 1) << 1 |
+                      extract32(ctx->opcode, 8, 1);
+            int r1 = gpr2reg1[rd2];
+            int r2 = gpr2reg2[rd2];
+            int r3 = extract32(ctx->opcode, 4, 1) << 3 |
+                     extract32(ctx->opcode, 0, 3);
+            int r4 = extract32(ctx->opcode, 9, 1) << 3 |
+                     extract32(ctx->opcode, 5, 3);
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+            if (op == NM_MOVEP) {
+                rd = r1;
+                re = r2;
+                rs = decode_gpr_gpr4_zero(r3);
+                rt = decode_gpr_gpr4_zero(r4);
+            } else {
+                rd = decode_gpr_gpr4(r3);
+                re = decode_gpr_gpr4(r4);
+                rs = r1;
+                rt = r2;
+            }
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_mov_tl(cpu_gpr[re], t1);
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+        }
+        break;
+    default:
+        return decode_nanomips_32_48_opc(env, ctx);
+    }
+
+    return 2;
+}
diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c
new file mode 100644
index 00000000000..6931c77e058
--- /dev/null
+++ b/target/mips/tcg/op_helper.c
@@ -0,0 +1,294 @@
+/*
+ *  MIPS emulation helpers for qemu.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/memop.h"
+#include "fpu_helper.h"
+
+static inline target_ulong bitswap(target_ulong v)
+{
+    v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
+              ((v & (target_ulong)0x5555555555555555ULL) << 1);
+    v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
+              ((v & (target_ulong)0x3333333333333333ULL) << 2);
+    v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
+              ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
+    return v;
+}
+
+#ifdef TARGET_MIPS64
+target_ulong helper_dbitswap(target_ulong rt)
+{
+    return bitswap(rt);
+}
+#endif
+
+target_ulong helper_bitswap(target_ulong rt)
+{
+    return (int32_t)bitswap(rt);
+}
+
+target_ulong helper_rotx(target_ulong rs, uint32_t shift, uint32_t shiftx,
+                        uint32_t stripe)
+{
+    int i;
+    uint64_t tmp0 = ((uint64_t)rs) << 32 | ((uint64_t)rs & 0xffffffff);
+    uint64_t tmp1 = tmp0;
+    for (i = 0; i <= 46; i++) {
+        int s;
+        if (i & 0x8) {
+            s = shift;
+        } else {
+            s = shiftx;
+        }
+
+        if (stripe != 0 && !(i & 0x4)) {
+            s = ~s;
+        }
+        if (s & 0x10) {
+            if (tmp0 & (1LL << (i + 16))) {
+                tmp1 |= 1LL << i;
+            } else {
+                tmp1 &= ~(1LL << i);
+            }
+        }
+    }
+
+    uint64_t tmp2 = tmp1;
+    for (i = 0; i <= 38; i++) {
+        int s;
+        if (i & 0x4) {
+            s = shift;
+        } else {
+            s = shiftx;
+        }
+
+        if (s & 0x8) {
+            if (tmp1 & (1LL << (i + 8))) {
+                tmp2 |= 1LL << i;
+            } else {
+                tmp2 &= ~(1LL << i);
+            }
+        }
+    }
+
+    uint64_t tmp3 = tmp2;
+    for (i = 0; i <= 34; i++) {
+        int s;
+        if (i & 0x2) {
+            s = shift;
+        } else {
+            s = shiftx;
+        }
+        if (s & 0x4) {
+            if (tmp2 & (1LL << (i + 4))) {
+                tmp3 |= 1LL << i;
+            } else {
+                tmp3 &= ~(1LL << i);
+            }
+        }
+    }
+
+    uint64_t tmp4 = tmp3;
+    for (i = 0; i <= 32; i++) {
+        int s;
+        if (i & 0x1) {
+            s = shift;
+        } else {
+            s = shiftx;
+        }
+        if (s & 0x2) {
+            if (tmp3 & (1LL << (i + 2))) {
+                tmp4 |= 1LL << i;
+            } else {
+                tmp4 &= ~(1LL << i);
+            }
+        }
+    }
+
+    uint64_t tmp5 = tmp4;
+    for (i = 0; i <= 31; i++) {
+        int s;
+        s = shift;
+        if (s & 0x1) {
+            if (tmp4 & (1LL << (i + 1))) {
+                tmp5 |= 1LL << i;
+            } else {
+                tmp5 &= ~(1LL << i);
+            }
+        }
+    }
+
+    return (int64_t)(int32_t)(uint32_t)tmp5;
+}
+
+void helper_fork(target_ulong arg1, target_ulong arg2)
+{
+    /*
+     * arg1 = rt, arg2 = rs
+     * TODO: store to TC register
+     */
+}
+
+target_ulong helper_yield(CPUMIPSState *env, target_ulong arg)
+{
+    target_long arg1 = arg;
+
+    if (arg1 < 0) {
+        /* No scheduling policy implemented. */
+        if (arg1 != -2) {
+            if (env->CP0_VPEControl & (1 << CP0VPECo_YSI) &&
+                env->active_tc.CP0_TCStatus & (1 << CP0TCSt_DT)) {
+                env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
+                env->CP0_VPEControl |= 4 << CP0VPECo_EXCPT;
+                do_raise_exception(env, EXCP_THREAD, GETPC());
+            }
+        }
+    } else if (arg1 == 0) {
+        if (0) {
+            /* TODO: TC underflow */
+            env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
+            do_raise_exception(env, EXCP_THREAD, GETPC());
+        } else {
+            /* TODO: Deallocate TC */
+        }
+    } else if (arg1 > 0) {
+        /* Yield qualifier inputs not implemented. */
+        env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
+        env->CP0_VPEControl |= 2 << CP0VPECo_EXCPT;
+        do_raise_exception(env, EXCP_THREAD, GETPC());
+    }
+    return env->CP0_YQMask;
+}
+
+target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
+{
+    check_hwrena(env, 0, GETPC());
+    return env->CP0_EBase & 0x3ff;
+}
+
+target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
+{
+    check_hwrena(env, 1, GETPC());
+    return env->SYNCI_Step;
+}
+
+target_ulong helper_rdhwr_cc(CPUMIPSState *env)
+{
+    check_hwrena(env, 2, GETPC());
+#ifdef CONFIG_USER_ONLY
+    return env->CP0_Count;
+#else
+    return (int32_t)cpu_mips_get_count(env);
+#endif
+}
+
+target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
+{
+    check_hwrena(env, 3, GETPC());
+    return env->CCRes;
+}
+
+target_ulong helper_rdhwr_performance(CPUMIPSState *env)
+{
+    check_hwrena(env, 4, GETPC());
+    return env->CP0_Performance0;
+}
+
+target_ulong helper_rdhwr_xnp(CPUMIPSState *env)
+{
+    check_hwrena(env, 5, GETPC());
+    return (env->CP0_Config5 >> CP0C5_XNP) & 1;
+}
+
+void helper_pmon(CPUMIPSState *env, int function)
+{
+    function /= 2;
+    switch (function) {
+    case 2: /* TODO: char inbyte(int waitflag); */
+        if (env->active_tc.gpr[4] == 0) {
+            env->active_tc.gpr[2] = -1;
+        }
+        /* Fall through */
+    case 11: /* TODO: char inbyte (void); */
+        env->active_tc.gpr[2] = -1;
+        break;
+    case 3:
+    case 12:
+        printf("%c", (char)(env->active_tc.gpr[4] & 0xFF));
+        break;
+    case 17:
+        break;
+    case 158:
+        {
+            unsigned char *fmt = (void *)(uintptr_t)env->active_tc.gpr[4];
+            printf("%s", fmt);
+        }
+        break;
+    }
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void mips_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                  MMUAccessType access_type,
+                                  int mmu_idx, uintptr_t retaddr)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+    int error_code = 0;
+    int excp;
+
+    if (!(env->hflags & MIPS_HFLAG_DM)) {
+        env->CP0_BadVAddr = addr;
+    }
+
+    if (access_type == MMU_DATA_STORE) {
+        excp = EXCP_AdES;
+    } else {
+        excp = EXCP_AdEL;
+        if (access_type == MMU_INST_FETCH) {
+            error_code |= EXCP_INST_NOTAVAIL;
+        }
+    }
+
+    do_raise_exception_err(env, excp, error_code, retaddr);
+}
+
+void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
+                                    vaddr addr, unsigned size,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, MemTxAttrs attrs,
+                                    MemTxResult response, uintptr_t retaddr)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    MIPSCPUClass *mcc = MIPS_CPU_GET_CLASS(cpu);
+    CPUMIPSState *env = &cpu->env;
+
+    if (access_type == MMU_INST_FETCH) {
+        do_raise_exception(env, EXCP_IBE, retaddr);
+    } else if (!mcc->no_data_aborts) {
+        do_raise_exception(env, EXCP_DBE, retaddr);
+    }
+}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/mips/tcg/rel6.decode b/target/mips/tcg/rel6.decode
new file mode 100644
index 00000000000..d6989cf56e8
--- /dev/null
+++ b/target/mips/tcg/rel6.decode
@@ -0,0 +1,49 @@
+# MIPS32 Release 6 instruction set
+#
+# Copyright (C) 2020  Philippe Mathieu-Daudé
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Reference:
+#
+#       MIPS Architecture for Programmers Volume II-A
+#       The MIPS32 Instruction Set Reference Manual, Revision 6.06
+#       (Document Number: MD00086-2B-MIPS32BIS-AFP-06.06)
+#
+#       MIPS Architecture for Programmers Volume II-A
+#       The MIPS64 Instruction Set Reference Manual, Revision 6.06
+#       (Document Number: MD00087-2B-MIPS64BIS-AFP-6.06)
+
+&r                  rs rt rd sa
+
+@lsa                ...... rs:5 rt:5 rd:5 ... sa:2 ......   &r
+
+LSA                 000000 ..... ..... ..... 000 .. 000101  @lsa
+DLSA                000000 ..... ..... ..... 000 .. 010101  @lsa
+
+REMOVED             010011 ----- ----- ----- ----- ------   # COP1X (COP3)
+
+REMOVED             011100 ----- ----- ----- ----- ------   # SPECIAL2
+
+REMOVED             011010 ----- ----- ----------------     # LDL
+REMOVED             011011 ----- ----- ----------------     # LDR
+
+REMOVED             011111 ----- ----- ----------  011001   # LWLE
+REMOVED             011111 ----- ----- ----------  011010   # LWRE
+REMOVED             011111 ----- ----- ----------  100001   # SWLE
+REMOVED             011111 ----- ----- ----------  100010   # SWRE
+
+REMOVED             100010 ----- ----- ----------------     # LWL
+REMOVED             100110 ----- ----- ----------------     # LWR
+REMOVED             101010 ----- ----- ----------------     # SWL
+REMOVED             101100 ----- ----- ----------------     # SDL
+REMOVED             101101 ----- ----- ----------------     # SDR
+REMOVED             101110 ----- ----- ----------------     # SWR
+
+REMOVED             101111 ----- ----- ----------------     # CACHE
+
+REMOVED             110000 ----- ----- ----------------     # LL
+REMOVED             110011 ----- ----- ----------------     # PREF
+REMOVED             110100 ----- ----- ----------------     # LLD
+REMOVED             111000 ----- ----- ----------------     # SC
+REMOVED             111100 ----- ----- ----------------     # SCD
diff --git a/target/mips/tcg/rel6_translate.c b/target/mips/tcg/rel6_translate.c
new file mode 100644
index 00000000000..d631851258a
--- /dev/null
+++ b/target/mips/tcg/rel6_translate.c
@@ -0,0 +1,37 @@
+/*
+ *  MIPS emulation for QEMU - Release 6 translation routines
+ *
+ *  Copyright (c) 2020 Philippe Mathieu-Daudé
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This code is licensed under the LGPL v2.1 or later.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/* Include the auto-generated decoders.  */
+#include "decode-rel6.c.inc"
+
+bool trans_REMOVED(DisasContext *ctx, arg_REMOVED *a)
+{
+    gen_reserved_instruction(ctx);
+
+    return true;
+}
+
+static bool trans_LSA(DisasContext *ctx, arg_r *a)
+{
+    return gen_lsa(ctx, a->rd, a->rt, a->rs, a->sa);
+}
+
+static bool trans_DLSA(DisasContext *ctx, arg_r *a)
+{
+    if (TARGET_LONG_BITS != 64) {
+        return false;
+    }
+    return gen_dlsa(ctx, a->rd, a->rt, a->rs, a->sa);
+}
diff --git a/target/mips/cp0_helper.c b/target/mips/tcg/sysemu/cp0_helper.c
similarity index 92%
rename from target/mips/cp0_helper.c
rename to target/mips/tcg/sysemu/cp0_helper.c
index f9bc1eae402..ffd239c100a 100644
--- a/target/mips/cp0_helper.c
+++ b/target/mips/tcg/sysemu/cp0_helper.c
@@ -32,6 +32,75 @@
 #include "sysemu/cpus.h"
 #include "sysemu/runstate.h"
 
+/*
+ * Names of coprocessor 0 registers.
+ */
+const char mips_cop0_regnames[32*8][32] = {
+/*0*/   {"Index"},        {"MVPControl"},   {"MVPConf0"},     {"MVPConf1"},
+        {0},              {0},              {0},              {0},
+/*1*/   {"Random"},       {"VPEControl"},   {"VPEConf0"},     {"VPEConf1"},
+        {"YQMask"},       {"VPESchedule"},  {"VPEScheFBack"}, {"VPEOpt"},
+/*2*/   {"EntryLo0"},     {"TCStatus"},     {"TCBind"},       {"TCRestart"},
+        {"TCHalt"},       {"TCContext"},    {"TCSchedule"},   {"TCScheFBack"},
+/*3*/   {"EntryLo1"},     {0},              {0},              {0},
+        {0},              {0},              {0},              {"TCOpt"},
+/*4*/   {"Context"},      {"ContextConfig"}, {"UserLocal"},    {"XContextConfig"},
+        {0},              {0},              {0},              {0},
+/*5*/   {"PageMask"},     {"PageGrain"},    {"SegCtl0"},      {"SegCtl1"},
+        {"SegCtl2"},      {0},              {0},              {0},
+/*6*/   {"Wired"},        {"SRSConf0"},     {"SRSConf1"},     {"SRSConf2"},
+        {"SRSConf3"},     {"SRSConf4"},     {0},              {0},
+/*7*/   {"HWREna"},       {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*8*/   {"BadVAddr"},     {"BadInstr"},     {"BadInstrP"},    {0},
+        {0},              {0},              {0},              {0},
+/*9*/   {"Count"},        {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*10*/  {"EntryHi"},      {0},              {0},              {0},
+        {0},              {"MSAAccess"},    {"MSASave"},      {"MSARequest"},
+/*11*/  {"Compare"},      {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*12*/  {"Status"},       {"IntCtl"},       {"SRSCtl"},       {"SRSMap"},
+        {"ViewIPL"},      {"SRSMap2"},      {0},              {0},
+/*13*/  {"Cause"},        {0},              {0},              {0},
+        {"ViewRIPL"},     {"NestedExc"},    {0},              {0},
+/*14*/  {"EPC"},          {0},              {"NestedEPC"},    {0},
+        {0},              {0},              {0},              {0},
+/*15*/  {"PRId"},         {"EBase"},        {"CDMMBase"},     {"CMGCRBase"},
+        {0},              {0},              {0},              {0},
+/*16*/  {"Config"},       {"Config1"},      {"Config2"},      {"Config3"},
+        {"Config4"},      {"Config5"},      {"Config6"},      {"Config7"},
+/*17*/  {"LLAddr"},       {"internal_lladdr (virtual)"}, {"internal_llval"}, {0},
+        {0},              {0},              {0},              {0},
+/*18*/  {"WatchLo"},      {"WatchLo1"},     {"WatchLo2"},     {"WatchLo3"},
+        {"WatchLo4"},     {"WatchLo5"},     {"WatchLo6"},     {"WatchLo7"},
+/*19*/  {"WatchHi"},      {"WatchHi1"},     {"WatchHi2"},     {"WatchHi3"},
+        {"WatchHi4"},     {"WatchHi5"},     {"WatchHi6"},     {"WatchHi7"},
+/*20*/  {"XContext"},     {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*21*/  {0},              {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*22*/  {0},              {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*23*/  {"Debug"},        {"TraceControl"}, {"TraceControl2"}, {"UserTraceData"},
+        {"TraceIBPC"},    {"TraceDBPC"},    {"Debug2"},       {0},
+/*24*/  {"DEPC"},         {0},              {"TraceControl3"}, {"UserTraceData2"},
+        {0},              {0},              {0},              {0},
+/*25*/  {"PerfCnt"},      {"PerfCnt1"},     {"PerfCnt2"},     {"PerfCnt3"},
+        {"PerfCnt4"},     {"PerfCnt5"},     {"PerfCnt6"},     {"PerfCnt7"},
+/*26*/  {"ErrCtl"},       {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*27*/  {"CacheErr"},     {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*28*/  {"ITagLo"},       {"IDataLo"},      {"DTagLo"},       {"DDataLo"},
+        {"L23TagLo"},     {"L23DataLo"},    {0},              {0},
+/*29*/  {"ITagHi"},       {"IDataHi"},      {"DTagHi"},       {0},
+        {0},              {"L23DataHi"},    {0},              {0},
+/*30*/  {"ErrorEPC"},     {0},              {0},              {0},
+        {0},              {0},              {0},              {0},
+/*31*/  {"DESAVE"},       {0},              {"KScratch1"},    {"KScratch2"},
+        {"KScratch3"},    {"KScratch4"},    {"KScratch5"},    {"KScratch6"},
+};
 
 /* SMP helpers.  */
 static bool mips_vpe_is_wfi(MIPSCPU *c)
diff --git a/target/mips/tcg/sysemu/meson.build b/target/mips/tcg/sysemu/meson.build
new file mode 100644
index 00000000000..b14a2d77350
--- /dev/null
+++ b/target/mips/tcg/sysemu/meson.build
@@ -0,0 +1,7 @@
+mips_softmmu_ss.add(files(
+  'cp0_helper.c',
+  'mips-semi.c',
+  'special_helper.c',
+  'tlb_helper.c',
+  'op_helper_magic.c',
+))
diff --git a/target/mips/mips-semi.c b/target/mips/tcg/sysemu/mips-semi.c
similarity index 96%
rename from target/mips/mips-semi.c
rename to target/mips/tcg/sysemu/mips-semi.c
index 6de60fa6dd7..b4a383ae90c 100644
--- a/target/mips/mips-semi.c
+++ b/target/mips/tcg/sysemu/mips-semi.c
@@ -74,25 +74,19 @@ enum UHIOpenFlags {
     UHIOpen_EXCL   = 0x800
 };
 
-/* Errno values taken from asm-mips/errno.h */
-static uint16_t host_to_mips_errno[] = {
-    [ENAMETOOLONG] = 78,
+static int errno_mips(int host_errno)
+{
+    /* Errno values taken from asm-mips/errno.h */
+    switch (host_errno) {
+    case 0:             return 0;
+    case ENAMETOOLONG:  return 78;
 #ifdef EOVERFLOW
-    [EOVERFLOW]    = 79,
+    case EOVERFLOW:     return 79;
 #endif
 #ifdef ELOOP
-    [ELOOP]        = 90,
+    case ELOOP:         return 90;
 #endif
-};
-
-static int errno_mips(int err)
-{
-    if (err < 0 || err >= ARRAY_SIZE(host_to_mips_errno)) {
-        return EINVAL;
-    } else if (host_to_mips_errno[err]) {
-        return host_to_mips_errno[err];
-    } else {
-        return err;
+    default:            return EINVAL;
     }
 }
 
diff --git a/target/mips/tcg/sysemu/op_helper_magic.c b/target/mips/tcg/sysemu/op_helper_magic.c
new file mode 100644
index 00000000000..027f66a9086
--- /dev/null
+++ b/target/mips/tcg/sysemu/op_helper_magic.c
@@ -0,0 +1,709 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2015-2016 Stacey Son <sson@FreeBSD.org>
+ * Copyright (c) 2016-2018 Alfredo Mazzinghi <am2419@cl.cam.ac.uk>
+ * Copyright (c) 2016-2018 Alex Richardson <Alexander.Richardson@cl.cam.ac.uk>
+ * All rights reserved.
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/log.h"
+#include "exec/log_instr.h"
+#ifdef TARGET_CHERI
+#include "cheri_tagmem.h"
+#endif
+
+#define TARGET_PAGE_SIZE_MIN (1 << TARGET_PAGE_BITS_MIN)
+static uint8_t ZEROARRAY[TARGET_PAGE_SIZE_MIN];
+
+/* Reduce the length so that addr + len doesn't cross a page boundary.  */
+static inline target_ulong adj_len_to_page(target_ulong len, target_ulong addr)
+{
+#ifndef CONFIG_USER_ONLY
+    target_ulong low_bits = (addr & ~TARGET_PAGE_MASK);
+    if (low_bits + len - 1 >= TARGET_PAGE_SIZE_MIN) {
+        return TARGET_PAGE_SIZE_MIN - low_bits;
+    }
+#endif
+    return len;
+}
+
+#define MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG UINT64_C(0xbadc0de)
+#define MIPS_REGNUM_V0 2
+#define MIPS_REGNUM_V1 3
+#define MIPS_REGNUM_A0 4
+#define MIPS_REGNUM_A1 5
+#define MIPS_REGNUM_A2 6
+#define MIPS_REGNUM_A3 7
+
+#ifdef CONFIG_DEBUG_TCG
+#define MAGIC_MEMSET_STATS 1
+#else
+#define MAGIC_MEMSET_STATS 0
+#endif
+
+#if MAGIC_MEMSET_STATS != 0
+static bool memset_stats_dump_registered = false;
+
+struct nop_stats {
+    uint64_t kernel_mode_bytes;
+    uint64_t kernel_mode_count;
+    uint64_t user_mode_bytes;
+    uint64_t user_mode_count;
+};
+
+static struct nop_stats magic_memset_zero_bytes;
+static struct nop_stats magic_memset_nonzero_bytes;
+
+static struct nop_stats magic_memcpy_bytes;
+static struct nop_stats magic_memmove_bytes;
+static struct nop_stats magic_bcopy_bytes;
+
+static struct nop_stats magic_memmove_slowpath;
+
+static inline void print_nop_stats(const char* msg, struct nop_stats* stats) {
+    warn_report("%s in kernel mode: %" PRId64 " (%f MB) in %" PRId64 " calls\r", msg,
+                stats->kernel_mode_bytes, stats->kernel_mode_bytes / (1024.0 * 1024.0), stats->kernel_mode_count);
+    warn_report("%s in user   mode: %" PRId64 " (%f MB) in %" PRId64 " calls\r", msg,
+                stats->user_mode_bytes, stats->user_mode_bytes / (1024.0 * 1024.0), stats->user_mode_count);
+}
+
+static void dump_memset_stats_on_exit(void) {
+    print_nop_stats("memset (zero)    with magic nop", &magic_memset_zero_bytes);
+    print_nop_stats("memset (nonzero) with magic nop", &magic_memset_nonzero_bytes);
+    print_nop_stats("memcpy with magic nop", &magic_memcpy_bytes);
+    print_nop_stats("memmove with magic nop", &magic_memmove_bytes);
+    print_nop_stats("bcopy with magic nop", &magic_bcopy_bytes);
+    print_nop_stats("memmove/memcpy/bcopy slowpath", &magic_memmove_slowpath);
+}
+
+static inline void collect_magic_nop_stats(CPUMIPSState *env, struct nop_stats* stats, target_ulong bytes) {
+    if (!memset_stats_dump_registered) {
+        // TODO: move this to CPU_init
+        atexit(dump_memset_stats_on_exit);
+        memset_stats_dump_registered = true;
+    }
+    if (in_kernel_mode(env)) {
+        stats->kernel_mode_bytes += bytes;
+        stats->kernel_mode_count++;
+    } else {
+        stats->user_mode_bytes += bytes;
+        stats->user_mode_count++;
+    }
+}
+#else
+#define collect_magic_nop_stats(env, stats, bytes)
+#endif
+
+static inline void
+store_byte_and_clear_tag(CPUMIPSState *env, target_ulong vaddr, uint8_t val,
+                         MemOpIdx oi, uintptr_t retaddr)
+{
+    cpu_stb_mmu(env, vaddr, val, oi, retaddr);
+#ifdef TARGET_CHERI
+    // If we returned (i.e. write was successful) we also need to invalidate the
+    // tags bit to ensure we are consistent with sb
+    cheri_tag_invalidate(env, vaddr, 1, retaddr, cpu_mmu_index(env, false));
+#endif
+}
+
+static inline void
+store_u32_and_clear_tag(CPUMIPSState *env, target_ulong vaddr, uint32_t val,
+                         MemOpIdx oi, uintptr_t retaddr)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    cpu_stw_be_mmu(env, vaddr, val, oi, retaddr);
+#else
+    cpu_stw_le_mmu(env, vaddr, val, oi, retaddr);
+#endif
+
+#ifdef TARGET_CHERI
+    // If we returned (i.e. write was successful) we also need to invalidate the
+    // tags bit to ensure we are consistent with sb
+    cheri_tag_invalidate(env, vaddr, 4, retaddr, cpu_mmu_index(env, false));
+#endif
+}
+
+#ifdef TARGET_CHERI
+#define CHECK_AND_ADD_DDC(env, perms, ptr, len, retpc) check_ddc(env, perms, ptr, len, retpc);
+#else
+#define CHECK_AND_ADD_DDC(env, perms, ptr, len, retpc) ptr
+#endif
+
+static bool do_magic_memmove(CPUMIPSState *env, uint64_t ra, int dest_regnum, int src_regnum)
+{
+    tcg_debug_assert(dest_regnum != src_regnum);
+    const target_ulong original_dest_ddc_offset = env->active_tc.gpr[dest_regnum]; // $a0 = dest
+    const target_ulong original_src_ddc_offset = env->active_tc.gpr[src_regnum];  // $a1 = src
+    const target_ulong original_len = env->active_tc.gpr[MIPS_REGNUM_A2];  // $a2 = len
+    int mmu_idx = cpu_mmu_index(env, false);
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    target_ulong len = original_len;
+    target_ulong already_written = 0;
+    const bool is_continuation = (env->active_tc.gpr[MIPS_REGNUM_V1] >> 32) == MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG;
+    if (is_continuation) {
+        // This is a partial write -> $a0 is the original dest argument.
+        // The already written bytes (from the partial write) was stored in $v0 by the previous call
+        already_written = env->active_tc.gpr[MIPS_REGNUM_V0];
+        tcg_debug_assert(already_written < len);
+        len -= already_written; // update the remaining length
+#if 0
+        fprintf(stderr, "--- %s: Got continuation for 0x" TARGET_FMT_lx " byte access at 0x" TARGET_FMT_plx
+                        " -- current dest = 0x" TARGET_FMT_plx " -- current len = 0x" TARGET_FMT_lx "\r\n",
+                        __func__, original_len, original_dest, dest, len);
+#endif
+    } else {
+        // Not a partial write -> $v0 should be zero otherwise this is a usage error!
+        if (env->active_tc.gpr[MIPS_REGNUM_V0] != 0) {
+            error_report("ERROR: Attempted to call memset library function "
+                          "with non-zero value in $v0 (0x" TARGET_FMT_lx
+                          ") and continuation flag not set in $v1 (0x" TARGET_FMT_lx
+                          ")!\n", env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1]);
+            do_raise_exception(env, EXCP_RI, GETPC());
+        }
+    }
+    if (len == 0) {
+        goto success; // nothing to do
+    }
+    if (original_src_ddc_offset == original_dest_ddc_offset) {
+        already_written = original_len;
+        goto success; // nothing to do
+    }
+    // Check capability bounds for the whole copy
+    // If it is going to fail we don't bother doing a partial copy!
+    const target_ulong original_src = CHECK_AND_ADD_DDC(env, CAP_PERM_LOAD, original_src_ddc_offset, original_len, ra);
+    const target_ulong original_dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, original_dest_ddc_offset, original_len, ra);
+
+    // Mark this as a continuation in $v1 (so that we continue sensibly if we get a tlb miss and longjump out)
+    env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[3];
+
+    const target_ulong dest_past_end = original_dest + original_len;
+    const target_ulong src_past_end = original_src + original_len;
+#if 0 // FIXME: for some reason this causes errors
+    const bool dest_same_page = (original_dest & TARGET_PAGE_MASK) == ((dest_past_end - 1) & TARGET_PAGE_MASK);
+    const bool src_same_page = (original_dest & TARGET_PAGE_MASK) == ((dest_past_end - 1) & TARGET_PAGE_MASK);
+    // If neither src nor dest buffer cross a page boundary we can just do an address_space_read+write
+    // Fast case: less than a page and neither of the buffers crosses a page boundary
+    CPUState *cs = env_cpu(env);
+    if (dest_same_page && src_same_page) {
+        tcg_debug_assert(already_written == 0);
+        tcg_debug_assert(len <= TARGET_PAGE_SIZE);
+        // The translation operation might trap and longjump out!
+        hwaddr src_paddr = cpu_mips_translate_address(env, original_src,
+            MMU_DATA_LOAD, ra);
+        hwaddr dest_paddr = cpu_mips_translate_address(env, original_dest,
+            MMU_DATA_STORE, ra);
+        // Do a single load+store to update the MMU flags
+        // uint8_t first_value = helper_ret_ldub_mmu(env, original_src, oi, ra);
+        // Note: address_space_write will also clear the tag bits!
+        MemTxResult result = MEMTX_ERROR;
+        uint8_t buffer[TARGET_PAGE_SIZE];
+        result = address_space_read(cs->as, src_paddr, MEMTXATTRS_UNSPECIFIED, buffer, len);
+        if (result != MEMTX_OK) {
+            warn_report("magic memmove: error reading %d bytes from paddr %"HWADDR_PRIx
+                        ". Unmapped memory? Error code was %d\r", (int)len, src_paddr, result);
+            // same ignored error would happen with normal loads/stores -> just continue
+        }
+        fprintf(stderr, "Used fast path to read %d bytes\r\n", (int)len);
+        // do_hexdump(stderr, buffer, len, original_src);
+        // fprintf(stderr, "\r");
+        // also write one byte to the target buffer to ensure that the flags are updated
+        // store_byte_and_clear_tag(env, original_dest, first_value, oi, ra); // might trap
+        result = address_space_write(cs->as, dest_paddr, MEMTXATTRS_UNSPECIFIED, buffer, len);
+#ifdef CONFIG_TCG_LOG_INSTR
+        if (qemu_log_instr_enabled(env)) {
+            for (int i = 0; i < len; i++) {
+                helper_qemu_log_instr_load64(env, original_src + i, buffer[i], MO_8);
+                helper_qemu_log_instr_store64(env, original_dest + i, buffer[i], MO_8);
+            }
+        }
+#endif
+        if (result != MEMTX_OK) {
+            warn_report("magic memmove: error writing %d bytes to paddr %"HWADDR_PRIx
+                        ". Unmapped memory? Error code was %d\r", (int)len, dest_paddr, result);
+            // same ignored error would happen with normal loads/stores -> just continue
+        }
+        already_written += len;
+        env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
+        goto success;
+    }
+#endif
+
+    const bool has_overlap = MAX(original_dest, original_src) >= MAX(dest_past_end, src_past_end);
+    if (has_overlap) {
+        warn_report("Found multipage magic memmove with overlap: dst=" TARGET_FMT_plx " src=" TARGET_FMT_plx
+                    " len=0x" TARGET_FMT_lx "\r", original_dest, original_src, original_len);
+        // slow path: byte copies
+    }
+
+    const bool copy_backwards = original_src < original_dest;
+    if (copy_backwards) {
+        target_ulong current_dest_cursor = original_dest + len - 1;
+        target_ulong current_src_cursor = original_src + len - 1;
+        /* Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        tcg_debug_assert(original_len - already_written == len);
+        collect_magic_nop_stats(env, &magic_memmove_slowpath, len);
+        while (already_written < original_len) {
+            uint8_t value = cpu_ldb_mmu(env, current_src_cursor, oi, ra);
+            store_byte_and_clear_tag(env, current_dest_cursor, value, oi, ra); // might trap
+             current_dest_cursor--;
+            current_src_cursor--;
+            already_written++;
+            env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
+        }
+    } else {
+        // copy forwards
+        target_ulong current_dest_cursor = original_dest + already_written;
+        target_ulong current_src_cursor = original_src + already_written;
+        /* Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        tcg_debug_assert(original_len - already_written == len);
+        collect_magic_nop_stats(env, &magic_memmove_slowpath, len);
+        while (already_written < original_len) {
+            uint8_t value = cpu_ldb_mmu(env, current_src_cursor, oi, ra);
+            store_byte_and_clear_tag(env, current_dest_cursor, value, oi, ra); // might trap
+            current_dest_cursor++;
+            current_src_cursor++;
+            already_written++;
+            env->active_tc.gpr[MIPS_REGNUM_V0] = already_written;
+        }
+    }
+
+    env->lladdr = 1;
+success:
+    if (unlikely(already_written != original_len)) {
+        error_report("ERROR: %s: failed to memmove all bytes to " TARGET_FMT_plx " (" TARGET_FMT_plx " with $ddc added).\r\n"
+                     "Remainig len = " TARGET_FMT_plx ", full len = " TARGET_FMT_plx ".\r\n"
+                     "Source address = " TARGET_FMT_plx " (" TARGET_FMT_plx " with $ddc added)\r\n",
+                     __func__, original_dest_ddc_offset, original_dest, len, original_len, original_src_ddc_offset, original_src);
+        error_report("$a0: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A0]);
+        error_report("$a1: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A1]);
+        error_report("$a2: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_A2]);
+        error_report("$v0: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_V0]);
+        error_report("$v1: " TARGET_FMT_plx "\r\n", env->active_tc.gpr[MIPS_REGNUM_V1]);
+        abort();
+    }
+    env->active_tc.gpr[MIPS_REGNUM_V0] = original_dest_ddc_offset; // return value of memcpy is the dest argument
+    return true;
+}
+
+static void do_memset_pattern_hostaddr(void* hostaddr, uint64_t value, uint64_t nitems, unsigned pattern_length, uint64_t ra) {
+    if (pattern_length == 1) {
+        memset(hostaddr, value, nitems);
+    } else if (pattern_length == 4) {
+        uint32_t* ptr = hostaddr;
+        uint32_t target_value = tswap32((uint32_t)value);
+        for (target_ulong i = 0; i < nitems; i++) {
+            *ptr = target_value;
+            ptr++;
+        }
+    } else {
+        assert(false && "unsupported memset pattern length");
+    }
+}
+
+static bool do_magic_memset(CPUMIPSState *env, uint64_t ra, uint pattern_length)
+{
+    // TODO: just use address_space_write?
+
+    // See target/s390x/mem_helper.c and arm/helper.c HELPER(dc_zva)
+    int mmu_idx = cpu_mmu_index(env, false);
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+    const target_ulong original_dest_ddc_offset = env->active_tc.gpr[MIPS_REGNUM_A0];      // $a0 = dest
+    uint64_t value = env->active_tc.gpr[MIPS_REGNUM_A1]; // $a1 = c
+    const target_ulong original_len_nitems = env->active_tc.gpr[MIPS_REGNUM_A2];       // $a2 = len
+    const target_ulong original_len_bytes = original_len_nitems * pattern_length;
+    target_ulong dest = original_dest_ddc_offset;
+    target_ulong len_nitems = original_len_nitems;
+    const bool is_continuation = (env->active_tc.gpr[MIPS_REGNUM_V1] >> 32) == MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG;
+    if (is_continuation) {
+        // This is a partial write -> $a0 is the original dest argument.
+        // The updated dest (after the partial write) was stored in $v0 by the previous call
+        dest = env->active_tc.gpr[MIPS_REGNUM_V0];
+        if (dest < original_dest_ddc_offset || dest >= original_dest_ddc_offset + original_len_bytes) {
+            error_report("ERROR: Attempted to call memset library function "
+                         "with invalid dest in $v0 (0x" TARGET_FMT_lx
+                         ") and continuation flag set. orig dest = 0x" TARGET_FMT_lx
+                         " and orig bytes = 0x" TARGET_FMT_lx "!\n",
+                         env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_A0],
+                         env->active_tc.gpr[MIPS_REGNUM_A2]);
+            do_raise_exception(env, EXCP_RI, ra);
+        }
+        target_ulong already_written = dest - original_dest_ddc_offset;
+        len_nitems -= already_written / pattern_length; // update the remaining length
+        assert((already_written % pattern_length) == 0);
+#if 0
+        fprintf(stderr, "--- %s: Got continuation for 0x" TARGET_FMT_lx " byte access at 0x" TARGET_FMT_plx
+                        " -- current dest = 0x" TARGET_FMT_plx " -- current len = 0x" TARGET_FMT_lx "\r\n",
+                        __func__, original_len, original_dest, dest, len_nitems);
+#endif
+    } else {
+        // Not a partial write -> $v0 should be zero otherwise this is a usage error!
+        if (env->active_tc.gpr[MIPS_REGNUM_V0] != 0) {
+            error_report("ERROR: Attempted to call memset library function "
+                         "with non-zero value in $v0 (0x" TARGET_FMT_lx
+                         ") and continuation flag not set in $v1 (0x" TARGET_FMT_lx
+                         ")!\n", env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1]);
+            do_raise_exception(env, EXCP_RI, ra);
+        }
+    }
+
+    if (len_nitems == 0) {
+        goto success; // nothing to do
+    }
+
+    dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, dest, len_nitems * pattern_length, ra);
+    const target_ulong original_dest = CHECK_AND_ADD_DDC(env, CAP_PERM_STORE, original_dest_ddc_offset, original_len_nitems, ra);
+
+    tcg_debug_assert(dest + (len_nitems * pattern_length) == original_dest + original_len_bytes && "continuation broken?");
+
+    CPUState *cs = env_cpu(env);
+
+    while (len_nitems > 0) {
+        const target_ulong total_len_nbytes = len_nitems * pattern_length;
+        assert(dest + total_len_nbytes == original_dest + original_len_bytes && "continuation broken?");
+        // probing for write access:
+        // fprintf(stderr, "Probing for write access at " TARGET_FMT_plx "\r\n", dest);
+        // fflush(stderr);
+        // update $v0 to point to the updated dest in case probe_write_access takes a tlb fault:
+        env->active_tc.gpr[MIPS_REGNUM_V0] = dest;
+        // and mark this as a continuation in $v1 (so that we continue sensibly after the tlb miss was handled)
+        env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[3];
+        // If the host address is not in the tlb after the second write we are writing
+        // to something strange so just fall back to the slowpath
+        target_ulong l_adj_bytes = adj_len_to_page(total_len_nbytes, dest);
+        target_ulong l_adj_nitems = l_adj_bytes;
+        if (unlikely(pattern_length != 1)) {
+            l_adj_nitems = l_adj_bytes / pattern_length;
+        }
+        tcg_debug_assert(l_adj_nitems != 0);
+        tcg_debug_assert(((dest + l_adj_bytes - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) && "should not cross a page boundary!");
+        void* hostaddr = NULL;
+#if 0
+        for (int try = 0; try < 2; try++) {
+            hostaddr = tlb_vaddr_to_host(env, dest, 1, mmu_idx);
+            if (hostaddr)
+                break;
+            /* OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            store_byte_and_clear_tag(env, dest, 0xff, oi, ra);
+            // This should definitely fill the host TLB. If not we are probably writing to I/O memory
+            // FIXME: tlb_vaddr_to_host() also returns null if the notdirty flag is set (not sure if that's a bug
+            probe_write(env, dest, 1, mmu_idx, ra);
+        }
+#endif
+        if (hostaddr) {
+            /* If it's all in the TLB it's fair game for just writing to;
+             * we know we don't need to update dirty status, etc.
+             */
+            tcg_debug_assert(dest + total_len_nbytes == original_dest + original_len_bytes && "continuation broken?");
+            // Do one store byte to update MMU flags (not sure this is necessary)
+            store_byte_and_clear_tag(env, dest, value, oi, ra);
+            do_memset_pattern_hostaddr(hostaddr, value, l_adj_nitems, pattern_length, ra);
+#ifdef TARGET_CHERI
+            // We also need to invalidate the tags bits written by the memset
+            // qemu_ram_addr_from_host is faster than using the v2r routines in cheri_tag_invalidate
+            ram_addr_t ram_offset;
+            RAMBlock *block = qemu_ram_block_from_host(hostaddr, false, &ram_offset);
+            cheri_tag_phys_invalidate(env, block, ram_offset, l_adj_bytes, &dest);
+#endif
+#ifdef CONFIG_TCG_LOG_INSTR
+            if (qemu_log_instr_enabled(env)) {
+                // TODO: dump as a single big block?
+                for (target_ulong i = 0; i < l_adj_nitems; i++) {
+                    if (pattern_length == 1)
+                        helper_qemu_log_instr_store64(env, dest + i, value, MO_8);
+                    else if (pattern_length == 4)
+                        helper_qemu_log_instr_store64(
+                            env, dest + (i * pattern_length), value, MO_32);
+                    else
+                        assert(false && "invalid pattern length");
+                }
+                qemu_log_instr_extra(env, "%s: Set " TARGET_FMT_ld
+                    " %d-byte items to 0x%" PRIx64 " at 0x" TARGET_FMT_plx "\n",
+                    __func__, l_adj_nitems, pattern_length, value, dest);
+            }
+#endif
+            // fprintf(stderr, "%s: Set " TARGET_FMT_ld " bytes to 0x%x at 0x" TARGET_FMT_plx "/%p\r\n", __func__, l_adj, value, dest, hostaddr);
+            dest += l_adj_bytes;
+            len_nitems -= l_adj_nitems;
+        } else {
+            // First try address_space_write and if that fails fall back to bytewise setting
+            hwaddr paddr = cpu_mips_translate_address(env, dest, MMU_DATA_STORE, ra);
+            // Note: address_space_write will also clear the tag bits!
+            MemTxResult result = MEMTX_ERROR;
+            if (value == 0) {
+                tcg_debug_assert(l_adj_bytes <= sizeof(ZEROARRAY));
+                result = address_space_write(cs->as, paddr, MEMTXATTRS_UNSPECIFIED, ZEROARRAY, l_adj_bytes);
+            } else {
+                // create a buffer filled with the correct value and use that for the write
+                uint8_t setbuffer[TARGET_PAGE_SIZE_MIN];
+                tcg_debug_assert(l_adj_bytes <= sizeof(setbuffer));
+                do_memset_pattern_hostaddr(setbuffer, value, l_adj_nitems, pattern_length, ra);
+                result = address_space_write(cs->as, paddr, MEMTXATTRS_UNSPECIFIED, setbuffer, l_adj_bytes);
+            }
+            if (result == MEMTX_OK) {
+                dest += l_adj_bytes;
+                len_nitems -= l_adj_nitems;
+#ifdef CONFIG_TCG_LOG_INSTR
+                if (qemu_log_instr_enabled(env)) {
+                    // TODO: dump as a single big block?
+                    for (target_ulong i = 0; i < l_adj_nitems; i++) {
+                        if (pattern_length == 1)
+                            helper_qemu_log_instr_store64(
+                                env, dest + i, value, MO_8);
+                        else if (pattern_length == 4)
+                            helper_qemu_log_instr_store64(
+                                env, dest + (i * pattern_length), value, MO_32);
+                        else
+                            assert(false && "invalid pattern length");
+                    }
+                    qemu_log_instr_extra(env, "%s: Set " TARGET_FMT_ld
+                        " %d-byte items to 0x%" PRIx64 " at 0x" TARGET_FMT_plx "\n",
+                        __func__, l_adj_nitems, pattern_length, value, dest);
+                }
+#endif
+                continue; // try again with next page
+            } else {
+                warn_report("address_space_write failed with error %d for %"HWADDR_PRIx "\r", result, paddr);
+                // fall back to bytewise copy slow path
+            }
+            /* Slow path (probably attempt to do this to an I/O device or
+             * similar, or clearing of a block of code we have translations
+             * cached for). Just do a series of byte writes as the architecture
+             * demands. It's not worth trying to use a cpu_physical_memory_map(),
+             * memset(), unmap() sequence here because:
+             *  + we'd need to account for the blocksize being larger than a page
+             *  + the direct-RAM access case is almost always going to be dealt
+             *    with in the fastpath code above, so there's no speed benefit
+             *  + we would have to deal with the map returning NULL because the
+             *    bounce buffer was in use
+             */
+            warn_report("%s: Falling back to memset slowpath for address "
+                        TARGET_FMT_plx " (phys addr=%" HWADDR_PRIx", len_nitems=0x"
+                        TARGET_FMT_lx ")! I/O memory or QEMU TLB bug?\r",
+                        __func__, dest, mips_cpu_get_phys_page_debug(env_cpu(env), dest), len_nitems);
+            target_ulong end = original_dest + original_len_bytes;
+            tcg_debug_assert(((end - dest) % pattern_length) == 0);
+            // in case we fault mark this as a continuation in $v1 (so that we continue sensibly after the tlb miss was handled)
+            env->active_tc.gpr[MIPS_REGNUM_V1] = (MAGIC_LIBCALL_HELPER_CONTINUATION_FLAG << 32) | env->active_tc.gpr[MIPS_REGNUM_V1];
+            while (dest < end) {
+                tcg_debug_assert(dest + (len_nitems * pattern_length) == original_dest + original_len_bytes && "continuation broken?");
+                // update $v0 to point to the updated dest in case probe_write_access takes a tlb fault:
+                env->active_tc.gpr[MIPS_REGNUM_V0] = dest;
+                if (pattern_length == 1) {
+                    store_byte_and_clear_tag(env, dest, value, oi, ra); // might trap
+                } else if (pattern_length == 4) {
+                    store_u32_and_clear_tag(env, dest, value, oi, ra); // might trap
+                } else {
+                    assert(false && "invalid pattern length");
+                }
+#ifdef CONFIG_TCG_LOG_INSTR
+                if (qemu_log_instr_enabled(env)) {
+                    if (pattern_length == 1)
+                        helper_qemu_log_instr_store64(env, dest, value, MO_8);
+                    else if (pattern_length == 4)
+                        helper_qemu_log_instr_store64(env, dest, value, MO_32);
+                    else
+                        assert(false && "invalid pattern length");
+                    helper_qemu_log_instr_store64(env, dest, value, MO_8);
+                }
+#endif
+                dest += pattern_length;
+                len_nitems--;
+            }
+        }
+    }
+    tcg_debug_assert(len_nitems == 0);
+    env->lladdr = 1;
+success:
+    env->active_tc.gpr[MIPS_REGNUM_V0] = original_dest_ddc_offset; // return value of memset is the src argument
+    // also update a0 and a2 to match what the kernel memset does (a0 -> buf end, a2 -> 0):
+    env->active_tc.gpr[MIPS_REGNUM_A0] = dest;
+    env->active_tc.gpr[MIPS_REGNUM_A2] = len_nitems;
+#if MAGIC_MEMSET_STATS != 0
+    collect_magic_nop_stats(env, value == 0 ? &magic_memset_zero_bytes : &magic_memset_nonzero_bytes, original_len_bytes);
+#endif
+    return true;
+}
+
+#define MAGIC_HELPER_DONE_FLAG 0xDEC0DED
+
+enum {
+    MAGIC_NOP_MEMSET = 1,
+    MAGIC_NOP_MEMSET_C = 2,
+    MAGIC_NOP_MEMCPY = 3,
+    MAGIC_NOP_MEMCPY_C = 4,
+    MAGIC_NOP_MEMMOVE = 5,
+    MAGIC_NOP_MEMMOVE_C = 6,
+    MAGIC_NOP_BCOPY = 7,
+    MAGIC_NOP_U32_MEMSET = 8,
+};
+
+// Magic library function calls:
+void helper_magic_library_function(CPUMIPSState *env, target_ulong which)
+{
+    qemu_log_instr_extra(env, "--- Calling magic library function 0x"
+                         TARGET_FMT_lx "\n", which);
+    // High bits can be used by function to indicate continuation after TLB miss
+    switch (which & UINT32_MAX) {
+    case MAGIC_NOP_MEMSET:
+        if (!do_magic_memset(env, GETPC(), /*pattern_size=*/1))
+            return;
+        // otherwise update $v1 to indicate success
+        break;
+
+    case MAGIC_NOP_U32_MEMSET:
+        if (!do_magic_memset(env, GETPC(), /*pattern_size=*/4))
+            return;
+        // otherwise update $v1 to indicate success
+        break;
+
+    case MAGIC_NOP_MEMCPY:
+        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A0, MIPS_REGNUM_A1))
+            goto error;
+        collect_magic_nop_stats(env, &magic_memcpy_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
+        break;
+
+    case MAGIC_NOP_MEMMOVE:
+        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A0, MIPS_REGNUM_A1))
+            goto error;
+        collect_magic_nop_stats(env, &magic_memmove_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
+        break;
+
+    case MAGIC_NOP_BCOPY: // src + dest arguments swapped
+        if (!do_magic_memmove(env, GETPC(), MIPS_REGNUM_A1, MIPS_REGNUM_A0))
+            goto error;
+        collect_magic_nop_stats(env, &magic_bcopy_bytes, env->active_tc.gpr[MIPS_REGNUM_A2]);
+        break;
+
+    case 0xf0:
+    case 0xf1:
+    {
+        uint8_t buffer[TARGET_PAGE_SIZE_MIN];
+        // to match memset/memcpy calling convention (use a0 and a2)
+        target_ulong src = env->active_tc.gpr[MIPS_REGNUM_A0];
+        target_ulong real_len = env->active_tc.gpr[MIPS_REGNUM_A2];
+        fprintf(stderr, "--- Memory dump at %s(%s): " TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
+                lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), real_len, src);
+        while (real_len > 0) {
+            target_ulong len = adj_len_to_page(real_len, src);
+            real_len -= len;
+            if (len != env->active_tc.gpr[MIPS_REGNUM_A2]) {
+                fprintf(stderr, "--- partial dump at %s(%s): " TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
+                        lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), len, src);
+            }
+            if (cpu_memory_rw_debug(env_cpu(env), src, buffer, len, false) == 0) {
+                bool have_nonzero = false;
+                for (int i = 0; i < len; i++)
+                    if (buffer[i] != 0)
+                        have_nonzero = true;
+                if (have_nonzero) {
+                    /* This is probably inefficient but we don't dump that much.. */
+                    GString *strbuf = g_string_sized_new(TARGET_PAGE_SIZE_MIN);
+                    do_hexdump(strbuf, buffer, len, src);
+                    fwrite(strbuf->str, strbuf->len, 1, stderr);
+                    g_string_free(strbuf, true);
+                }
+                else
+                    fprintf(stderr, "   -- all zeroes\r\n");
+            } else {
+                fprintf(stderr, "--- Memory dump at %s(%s): Could not fetch" TARGET_FMT_lu " bytes at " TARGET_FMT_plx "\r\n",
+                        lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xf0 ? "entry" : "exit"), len, src);
+            }
+        }
+    }
+    case 0xfe:
+    case 0xff:
+        // dump argument and return registers:
+        warn_report("%s(%s): argument+return registers: \r\n"
+                    "\tv0 = 0x" TARGET_FMT_lx "\tv1 = 0x" TARGET_FMT_lx "\r\n"
+                    "\ta0 = 0x" TARGET_FMT_lx "\ta1 = 0x" TARGET_FMT_lx "\r\n"
+                    "\ta2 = 0x" TARGET_FMT_lx "\ta3 = 0x" TARGET_FMT_lx "\r\n",
+                    lookup_symbol(PC_ADDR(env)), ((which & UINT32_MAX) == 0xfe ? "entry" : "exit"),
+                    env->active_tc.gpr[2], env->active_tc.gpr[3],
+                    env->active_tc.gpr[4], env->active_tc.gpr[5],
+                    env->active_tc.gpr[6], env->active_tc.gpr[7]);
+        break;
+    case MAGIC_HELPER_DONE_FLAG:
+        qemu_maybe_log_instr_extra(env, "ERROR: Attempted to call library "
+            "function with success flag set in $v1!\n");
+        do_raise_exception(env, EXCP_RI, GETPC());
+    default:
+        qemu_maybe_log_instr_extra(env, "ERROR: Attempted to call invalid "
+            "library function " TARGET_FMT_lx "\n", which);
+        return;
+    }
+    // Indicate success by setting $v1 to 0xaffe
+    env->active_tc.gpr[MIPS_REGNUM_V1] = MAGIC_HELPER_DONE_FLAG;
+    return;
+error:
+    warn_report("%s: magic nop %d failed: \r\n"
+                    "\tv0 = 0x" TARGET_FMT_lx "\tv1 = 0x" TARGET_FMT_lx "\r\n"
+                    "\ta0 = 0x" TARGET_FMT_lx "\ta1 = 0x" TARGET_FMT_lx "\r\n"
+                    "\ta2 = 0x" TARGET_FMT_lx "\ta3 = 0x" TARGET_FMT_lx "\r\n",
+                    __func__, (int)(which & UINT32_MAX),
+                    env->active_tc.gpr[MIPS_REGNUM_V0], env->active_tc.gpr[MIPS_REGNUM_V1],
+                    env->active_tc.gpr[MIPS_REGNUM_A0], env->active_tc.gpr[MIPS_REGNUM_A1],
+                    env->active_tc.gpr[MIPS_REGNUM_A2], env->active_tc.gpr[MIPS_REGNUM_A3]);
+}
+
+void helper_smp_yield(CPUMIPSState *env) {
+    CPUState *cs = env_cpu(env);
+    cs->exception_index = EXCP_YIELD;
+
+    cpu_loop_exit(cs);
+}
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
new file mode 100644
index 00000000000..281ebad0fcf
--- /dev/null
+++ b/target/mips/tcg/sysemu/special_helper.c
@@ -0,0 +1,235 @@
+/*
+ *  QEMU MIPS emulation: Special opcode helpers
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "internal.h"
+
+/* Specials */
+target_ulong helper_di(CPUMIPSState *env)
+{
+    target_ulong t0 = env->CP0_Status;
+
+    env->CP0_Status = t0 & ~(1 << CP0St_IE);
+    return t0;
+}
+
+target_ulong helper_ei(CPUMIPSState *env)
+{
+    target_ulong t0 = env->CP0_Status;
+
+    env->CP0_Status = t0 | (1 << CP0St_IE);
+    return t0;
+}
+
+static void debug_pre_eret(CPUMIPSState *env)
+{
+    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_EXEC)) {
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+            "ERET: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
+             PC_ADDR(env), get_CP0_EPC(env));
+        if (should_use_error_epc(env)) {
+            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+                " ErrorEPC " TARGET_FMT_lx, get_CP0_ErrorEPC(env));
+        }
+        if (env->hflags & MIPS_HFLAG_DM) {
+            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+                " DEPC " TARGET_FMT_lx, env->CP0_DEPC);
+        }
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC, "\n");
+    }
+
+#if defined(CONFIG_TCG_LOG_INSTR) && defined(TARGET_CHERI)
+    if (qemu_log_instr_enabled(env)) {
+        // Print the new PCC value for debugging traces (compare to null
+        // so that we always print it)
+        qemu_log_instr_cap(env, "PCC", &env->active_tc.PCC);
+        qemu_log_instr_cap(env, "EPCC", &env->active_tc.CHWR.EPCC);
+        qemu_log_instr_cap(env, "ErrorEPCC", &env->active_tc.CHWR.ErrorEPCC);
+    }
+#endif /* defined(CONFIG_TCG_LOG_INSTR) && defined(TARGET_CHERI) */
+}
+
+static void debug_post_eret(CPUMIPSState *env)
+{
+    const char *flag;
+
+#ifdef CONFIG_TCG_LOG_INSTR
+    mips_log_instr_mode_changed(env, cpu_get_recent_pc(env));
+#endif
+    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_EXEC)) {
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+            "  =>  PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
+            PC_ADDR(env), get_CP0_EPC(env));
+        if (should_use_error_epc(env)) {
+            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+                " ErrorEPC " TARGET_FMT_lx, get_CP0_ErrorEPC(env));
+        }
+        if (env->hflags & MIPS_HFLAG_DM) {
+            qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC,
+                " DEPC " TARGET_FMT_lx, env->CP0_DEPC);
+        }
+        switch (cpu_mmu_index(env, false)) {
+        case 3:
+            flag = ", ERL\n";
+            break;
+        case MIPS_HFLAG_UM:
+            flag = ", UM\n";
+            break;
+        case MIPS_HFLAG_SM:
+            flag = ", SM\n";
+            break;
+        case MIPS_HFLAG_KM:
+            flag = "\n";
+            break;
+        default:
+            cpu_abort(env_cpu(env), "Invalid MMU mode!\n");
+            break;
+        }
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_EXEC, "%s", flag);
+    }
+}
+
+bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+
+    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && PC_ADDR(env) != tb->pc) {
+        mips_update_pc(env, PC_ADDR(env) - (env->hflags & MIPS_HFLAG_B16 ? 2 : 4),
+                       /*can_be_unrepresentable=*/false);
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+        return true;
+    }
+    return false;
+}
+
+#ifdef TARGET_CHERI
+static void set_pc_for_eret(CPUMIPSState *env, cap_register_t *error_pcc)
+#else
+static void set_pc_for_eret(CPUMIPSState *env, target_ulong error_pc)
+#endif
+{
+#ifdef TARGET_CHERI
+    target_ulong error_pc = cap_get_cursor(error_pcc);
+    cheri_update_pcc_for_exc_return(&env->active_tc.PCC, error_pcc,
+                                    error_pc & ~(target_ulong)1);
+#else
+    mips_update_pc(env, error_pc & ~(target_ulong)1, /*can_be_unrepresentable=*/true);
+#endif
+    if (error_pc & 1) {
+#if defined(TARGET_CHERI)
+        warn_report("Got target pc with low bit set, but QEMU-CHERI does not"
+                    " support microMIPS: 0x%" PRIx64, error_pc);
+#else
+        env->hflags |= MIPS_HFLAG_M16;
+#endif
+    } else {
+        env->hflags &= ~(MIPS_HFLAG_M16);
+    }
+}
+
+static inline void exception_return(CPUMIPSState *env)
+{
+    debug_pre_eret(env);
+    if (env->CP0_Status & (1 << CP0St_ERL)) {
+#ifdef TARGET_CHERI
+        set_pc_for_eret(env, &env->active_tc.CHWR.ErrorEPCC);
+#else
+        set_pc_for_eret(env, env->CP0_ErrorEPC);
+#endif
+        env->CP0_Status &= ~(1 << CP0St_ERL);
+    } else {
+#ifdef TARGET_CHERI
+        set_pc_for_eret(env, &env->active_tc.CHWR.EPCC);
+#else
+        set_pc_for_eret(env, env->CP0_EPC);
+#endif
+        env->CP0_Status &= ~(1 << CP0St_EXL);
+    }
+    compute_hflags(env);
+    debug_post_eret(env);
+}
+
+void helper_eret(CPUMIPSState *env)
+{
+    exception_return(env);
+    env->CP0_LLAddr = 1;
+    env->lladdr = 1;
+}
+
+void helper_eretnc(CPUMIPSState *env)
+{
+#ifdef TARGET_CHERI
+    do_raise_exception(env, EXCP_RI, GETPC()); /* This does not unset LL reservation? */
+#endif
+    exception_return(env);
+}
+
+void helper_deret(CPUMIPSState *env)
+{
+#ifdef TARGET_CHERI
+    do_raise_exception(env, EXCP_RI, GETPC()); /* This ignores EPCC */
+#else
+    debug_pre_eret(env);
+
+    env->hflags &= ~MIPS_HFLAG_DM;
+    compute_hflags(env);
+
+    mips_env_set_pc(env, env->CP0_DEPC);
+
+    debug_post_eret(env);
+#endif
+}
+
+void helper_cache(CPUMIPSState *env, target_ulong addr, uint32_t op)
+{
+    static const char *const type_name[] = {
+        "Primary Instruction",
+        "Primary Data or Unified Primary",
+        "Tertiary",
+        "Secondary"
+    };
+    uint32_t cache_type = extract32(op, 0, 2);
+    uint32_t cache_operation = extract32(op, 2, 3);
+    target_ulong index = addr & 0x1fffffff;
+
+    switch (cache_operation) {
+    case 0b010: /* Index Store Tag */
+        memory_region_dispatch_write(env->itc_tag, index, env->CP0_TagLo,
+                                     MO_64, MEMTXATTRS_UNSPECIFIED);
+        break;
+    case 0b001: /* Index Load Tag */
+        memory_region_dispatch_read(env->itc_tag, index, &env->CP0_TagLo,
+                                    MO_64, MEMTXATTRS_UNSPECIFIED);
+        break;
+    case 0b000: /* Index Invalidate */
+    case 0b100: /* Hit Invalidate */
+    case 0b110: /* Hit Writeback */
+        /* no-op */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "cache operation:%u (type: %s cache)\n",
+                      cache_operation, type_name[cache_type]);
+        break;
+    }
+}
diff --git a/target/mips/tcg/sysemu/tlb_helper.c b/target/mips/tcg/sysemu/tlb_helper.c
new file mode 100644
index 00000000000..b0d3c016152
--- /dev/null
+++ b/target/mips/tcg/sysemu/tlb_helper.c
@@ -0,0 +1,1733 @@
+/*
+ * MIPS TLB (Translation lookaside buffer) helpers.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qemu/bitops.h"
+
+#include "cpu.h"
+#include "internal.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/log.h"
+#include "exec/log_instr.h"
+#include "qemu/atomic.h"
+#include "qemu/error-report.h"
+#include "hw/mips/cpudevs.h"
+#include "exec/helper-proto.h"
+
+static bool r4k_lookup_tlb(CPUMIPSState *env, int *matching, bool use_extra);
+
+/* TLB management */
+static void r4k_mips_tlb_flush_extra(CPUMIPSState *env, int first)
+{
+    /* Discard entries from env->tlb[first] onwards.  */
+    while (env->tlb->tlb_in_use > first) {
+        r4k_invalidate_tlb(env, --env->tlb->tlb_in_use, 0);
+    }
+}
+
+static inline uint64_t get_tlb_pfn_from_entrylo(uint64_t entrylo)
+{
+#if defined(TARGET_MIPS64)
+    return extract64(entrylo, 6, 54);
+#else
+    return extract64(entrylo, 6, 24) | /* PFN */
+           (extract64(entrylo, 32, 32) << 24); /* PFNX */
+#endif
+}
+
+static void r4k_fill_tlb(CPUMIPSState *env, int idx)
+{
+    r4k_tlb_t *tlb;
+    uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1);
+
+    /* XXX: detect conflicting TLBs and raise a MCHECK exception when needed */
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    if (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) {
+        tlb->EHINV = 1;
+        return;
+    }
+    tlb->EHINV = 0;
+    tlb->VPN = env->CP0_EntryHi & (TARGET_PAGE_MASK << 1);
+#if defined(TARGET_MIPS64)
+    tlb->VPN &= env->SEGMask;
+#endif
+    tlb->ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    tlb->MMID = env->CP0_MemoryMapID;
+    tlb->PageMask = env->CP0_PageMask;
+    tlb->G = env->CP0_EntryLo0 & env->CP0_EntryLo1 & 1;
+    tlb->V0 = (env->CP0_EntryLo0 & 2) != 0;
+    tlb->D0 = (env->CP0_EntryLo0 & 4) != 0;
+    tlb->C0 = (env->CP0_EntryLo0 >> 3) & 0x7;
+#if defined(TARGET_CHERI)
+    tlb->L0 = (env->CP0_EntryLo0 >> CP0EnLo_L) & 1;
+    tlb->S0 = (env->CP0_EntryLo0 >> CP0EnLo_S) & 1;
+    tlb->CLG0 = (env->CP0_EntryLo0 >> CP0EnLo_CLG) & 1;
+#else
+    tlb->XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) & 1;
+    tlb->RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) & 1;
+#endif /* TARGET_CHERI */
+    tlb->PFN[0] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo0) & ~mask) << 12;
+    tlb->V1 = (env->CP0_EntryLo1 & 2) != 0;
+    tlb->D1 = (env->CP0_EntryLo1 & 4) != 0;
+    tlb->C1 = (env->CP0_EntryLo1 >> 3) & 0x7;
+#if defined(TARGET_CHERI)
+    tlb->L1 = (env->CP0_EntryLo1 >> CP0EnLo_L) & 1;
+    tlb->S1 = (env->CP0_EntryLo1 >> CP0EnLo_S) & 1;
+    tlb->CLG1 = (env->CP0_EntryLo1 >> CP0EnLo_CLG) & 1;
+#else
+    tlb->XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) & 1;
+    tlb->RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) & 1;
+#endif /* TARGET_CHERI */
+    tlb->PFN[1] = (get_tlb_pfn_from_entrylo(env->CP0_EntryLo1) & ~mask) << 12;
+}
+
+static void r4k_helper_tlbinv(CPUMIPSState *env)
+{
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    uint32_t tlb_mmid;
+    r4k_tlb_t *tlb;
+    int idx;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
+        tlb = &env->tlb->mmu.r4k.tlb[idx];
+        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+        if (!tlb->G && tlb_mmid == MMID) {
+            tlb->EHINV = 1;
+        }
+    }
+    cpu_mips_tlb_flush(env);
+}
+
+static void r4k_helper_tlbinvf(CPUMIPSState *env)
+{
+    int idx;
+
+    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
+        env->tlb->mmu.r4k.tlb[idx].EHINV = 1;
+    }
+    cpu_mips_tlb_flush(env);
+}
+
+static void r4k_helper_tlbwi(CPUMIPSState *env, uintptr_t retpc)
+{
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    target_ulong VPN;
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    uint32_t tlb_mmid;
+    bool EHINV, G, V0, D0, V1, D1;
+#if defined(TARGET_CHERI)
+    bool S0, S1, L0, L1, CLG0, CLG1;
+#else
+    bool XI0, XI1, RI0, RI1;
+#endif
+    r4k_tlb_t *tlb;
+    int idx;
+    int duplicate = -1;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+
+    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    VPN = env->CP0_EntryHi & (TARGET_PAGE_MASK << 1);
+#if defined(TARGET_MIPS64)
+    VPN &= env->SEGMask;
+#endif
+    EHINV = (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) != 0;
+    G = env->CP0_EntryLo0 & env->CP0_EntryLo1 & 1;
+    V0 = (env->CP0_EntryLo0 & 2) != 0;
+    D0 = (env->CP0_EntryLo0 & 4) != 0;
+#if defined(TARGET_CHERI)
+    S0 = (env->CP0_EntryLo0 >> CP0EnLo_S) &1;
+    L0 = (env->CP0_EntryLo0 >> CP0EnLo_L) &1;
+    CLG0 = (env->CP0_EntryLo0 >> CP0EnLo_CLG) &1;
+#else
+    XI0 = (env->CP0_EntryLo0 >> CP0EnLo_XI) &1;
+    RI0 = (env->CP0_EntryLo0 >> CP0EnLo_RI) &1;
+#endif
+    V1 = (env->CP0_EntryLo1 & 2) != 0;
+    D1 = (env->CP0_EntryLo1 & 4) != 0;
+#if defined(TARGET_CHERI)
+    S1 = (env->CP0_EntryLo1 >> CP0EnLo_S) &1;
+    L1 = (env->CP0_EntryLo1 >> CP0EnLo_L) &1;
+    CLG1 = (env->CP0_EntryLo1 >> CP0EnLo_CLG) &1;
+#else
+    XI1 = (env->CP0_EntryLo1 >> CP0EnLo_XI) &1;
+    RI1 = (env->CP0_EntryLo1 >> CP0EnLo_RI) &1;
+#endif
+
+    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+    /*
+     * Discard cached TLB entries, unless tlbwi is just upgrading access
+     * permissions on the current entry.
+     */
+    if (tlb->VPN != VPN || tlb_mmid != MMID || tlb->G != G ||
+        (!tlb->EHINV && EHINV) ||
+        (tlb->V0 && !V0) || (tlb->D0 && !D0) ||
+#if defined(TARGET_CHERI)
+        (!tlb->S0 && S0) || (!tlb->L0 && L0) || (tlb->CLG0 != CLG0) ||
+#else
+        (!tlb->XI0 && XI0) || (!tlb->RI0 && RI0) ||
+#endif
+        (tlb->V1 && !V1) || (tlb->D1 && !D1) ||
+#if defined(TARGET_CHERI)
+        (!tlb->S1 && S1) || (!tlb->L1 && L1) || (tlb->CLG1 != CLG1)) {
+#else
+        (!tlb->XI1 && XI1) || (!tlb->RI1 && RI1)) {
+#endif
+        r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
+    }
+
+    /*
+     * Check if the tlb contains another entry with the same
+     * VPN and ASID. If so, raise a Machine Check Exception.
+     */
+    if (r4k_lookup_tlb(env, &duplicate, /*use_extra*/false) &&
+        duplicate != idx) {
+        do_raise_exception(env, EXCP_MCHECK, retpc);
+    }
+
+    r4k_invalidate_tlb(env, idx, 0);
+    r4k_fill_tlb(env, idx);
+#ifdef CONFIG_TCG_LOG_INSTR
+    r4k_dump_tlb(env, idx);
+#endif /* CONFIG_TCG_LOG_INSTR */
+}
+
+static void r4k_helper_tlbwr(CPUMIPSState *env, uintptr_t retpc)
+{
+    int r = cpu_mips_get_random(env);
+
+    /*
+     * Check if the tlb contains another entry with the same
+     * VPN and ASID. If so, raise a Machine Check Exception.
+     */
+    if (r4k_lookup_tlb(env, NULL, /*use_extra*/true)) {
+        do_raise_exception(env, EXCP_MCHECK, retpc);
+    }
+
+    r4k_invalidate_tlb(env, r, 1);
+    r4k_fill_tlb(env, r);
+#ifdef CONFIG_TCG_LOG_INSTR
+    r4k_dump_tlb(env, r);
+#endif /* CONFIG_TCG_LOG_INSTR */
+}
+
+static void r4k_helper_tlbp(CPUMIPSState *env)
+{
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    r4k_tlb_t *tlb;
+    target_ulong mask;
+    target_ulong tag;
+    target_ulong VPN;
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    uint32_t tlb_mmid;
+    int i;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+    for (i = 0; i < env->tlb->nb_tlb; i++) {
+        tlb = &env->tlb->mmu.r4k.tlb[i];
+        /* 1k pages are not supported. */
+        mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+        tag = env->CP0_EntryHi & ~mask;
+        VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        tag &= env->SEGMask;
+#endif
+        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+        /* Check ASID/MMID, virtual page number & size */
+        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
+            /* TLB match */
+            env->CP0_Index = i;
+            break;
+        }
+    }
+    if (i == env->tlb->nb_tlb) {
+        /* No match.  Discard any shadow entries, if any of them match.  */
+        for (i = env->tlb->nb_tlb; i < env->tlb->tlb_in_use; i++) {
+            tlb = &env->tlb->mmu.r4k.tlb[i];
+            /* 1k pages are not supported. */
+            mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+            tag = env->CP0_EntryHi & ~mask;
+            VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+            tag &= env->SEGMask;
+#endif
+            tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+            /* Check ASID/MMID, virtual page number & size */
+            if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag) {
+                r4k_mips_tlb_flush_extra(env, i);
+                break;
+            }
+        }
+
+        env->CP0_Index |= 0x80000000;
+    }
+}
+
+static inline uint64_t get_entrylo_pfn_from_tlb(uint64_t tlb_pfn)
+{
+#if defined(TARGET_MIPS64)
+    return tlb_pfn << 6;
+#else
+    return (extract64(tlb_pfn, 0, 24) << 6) | /* PFN */
+           (extract64(tlb_pfn, 24, 32) << 32); /* PFNX */
+#endif
+}
+
+static void r4k_helper_tlbr(CPUMIPSState *env)
+{
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    uint32_t tlb_mmid;
+    r4k_tlb_t *tlb;
+    int idx;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+    idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+
+    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+    /* If this will change the current ASID/MMID, flush qemu's TLB.  */
+    if (MMID != tlb_mmid) {
+        cpu_mips_tlb_flush(env);
+    }
+
+    r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
+
+#ifdef CPU_CHERI
+    uint64_t save_clg = env->CP0_EntryHi & CP0EnHi_CLG_MASK;
+#endif
+
+    if (tlb->EHINV) {
+        env->CP0_EntryHi = 1 << CP0EnHi_EHINV;
+        env->CP0_PageMask = 0;
+        env->CP0_EntryLo0 = 0;
+        env->CP0_EntryLo1 = 0;
+    } else {
+        env->CP0_EntryHi = mi ? tlb->VPN : tlb->VPN | tlb->ASID;
+        env->CP0_MemoryMapID = tlb->MMID;
+        env->CP0_PageMask = tlb->PageMask;
+        env->CP0_EntryLo0 = tlb->G | (tlb->V0 << 1) | (tlb->D0 << 2) |
+#if defined(TARGET_CHERI)
+                        ((uint64_t)tlb->L0 << CP0EnLo_L) |
+                        ((uint64_t)tlb->S0 << CP0EnLo_S) |
+                        ((uint64_t)tlb->CLG0 << CP0EnLo_CLG) |
+#else
+                        ((uint64_t)tlb->RI0 << CP0EnLo_RI) |
+                        ((uint64_t)tlb->XI0 << CP0EnLo_XI) |
+#endif /* TARGET_CHERI */
+                        (tlb->C0 << 3) |
+                        get_entrylo_pfn_from_tlb(tlb->PFN[0] >> 12);
+        env->CP0_EntryLo1 = tlb->G | (tlb->V1 << 1) | (tlb->D1 << 2) |
+#if defined(TARGET_CHERI)
+                        ((uint64_t)tlb->L1 << CP0EnLo_L) |
+                        ((uint64_t)tlb->S1 << CP0EnLo_S) |
+                        ((uint64_t)tlb->CLG1 << CP0EnLo_CLG) |
+#else
+                        ((uint64_t)tlb->RI1 << CP0EnLo_RI) |
+                        ((uint64_t)tlb->XI1 << CP0EnLo_XI) |
+#endif /* TARGET_CHERI */
+                        (tlb->C1 << 3) |
+                        get_entrylo_pfn_from_tlb(tlb->PFN[1] >> 12);
+    }
+
+#ifdef CPU_CHERI
+    env->CP0_EntryHi |= save_clg;
+#endif
+}
+
+void helper_tlbwi(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbwi(env, GETPC());
+}
+
+void helper_tlbwr(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbwr(env, GETPC());
+}
+
+void helper_tlbp(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbp(env);
+}
+
+void helper_tlbr(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbr(env);
+}
+
+void helper_tlbinv(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbinv(env);
+}
+
+void helper_tlbinvf(CPUMIPSState *env)
+{
+    env->tlb->helper_tlbinvf(env);
+}
+
+static void global_invalidate_tlb(CPUMIPSState *env,
+                           uint32_t invMsgVPN2,
+                           uint8_t invMsgR,
+                           uint32_t invMsgMMid,
+                           bool invAll,
+                           bool invVAMMid,
+                           bool invMMid,
+                           bool invVA)
+{
+
+    int idx;
+    r4k_tlb_t *tlb;
+    bool VAMatch;
+    bool MMidMatch;
+
+    for (idx = 0; idx < env->tlb->nb_tlb; idx++) {
+        tlb = &env->tlb->mmu.r4k.tlb[idx];
+        VAMatch =
+            (((tlb->VPN & ~tlb->PageMask) == (invMsgVPN2 & ~tlb->PageMask))
+#ifdef TARGET_MIPS64
+            &&
+            (extract64(env->CP0_EntryHi, 62, 2) == invMsgR)
+#endif
+            );
+        MMidMatch = tlb->MMID == invMsgMMid;
+        if ((invAll && (idx > env->CP0_Wired)) ||
+            (VAMatch && invVAMMid && (tlb->G || MMidMatch)) ||
+            (VAMatch && invVA) ||
+            (MMidMatch && !(tlb->G) && invMMid)) {
+            tlb->EHINV = 1;
+        }
+    }
+    cpu_mips_tlb_flush(env);
+}
+
+void helper_ginvt(CPUMIPSState *env, target_ulong arg, uint32_t type)
+{
+    bool invAll = type == 0;
+    bool invVA = type == 1;
+    bool invMMid = type == 2;
+    bool invVAMMid = type == 3;
+    uint32_t invMsgVPN2 = arg & (TARGET_PAGE_MASK << 1);
+    uint8_t invMsgR = 0;
+    uint32_t invMsgMMid = env->CP0_MemoryMapID;
+    CPUState *other_cs = first_cpu;
+
+#ifdef TARGET_MIPS64
+    invMsgR = extract64(arg, 62, 2);
+#endif
+
+    CPU_FOREACH(other_cs) {
+        MIPSCPU *other_cpu = MIPS_CPU(other_cs);
+        global_invalidate_tlb(&other_cpu->env, invMsgVPN2, invMsgR, invMsgMMid,
+                              invAll, invVAMMid, invMMid, invVA);
+    }
+}
+
+/* no MMU emulation */
+static int no_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
+                              target_ulong address, MMUAccessType access_type)
+{
+    *physical = address;
+    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+    return TLBRET_MATCH;
+}
+
+/* fixed mapping MMU emulation */
+static int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical,
+                                 int *prot, target_ulong address,
+                                 MMUAccessType access_type)
+{
+    if (address <= (int32_t)0x7FFFFFFFUL) {
+        if (!(env->CP0_Status & (1 << CP0St_ERL))) {
+            *physical = address + 0x40000000UL;
+        } else {
+            *physical = address;
+        }
+    } else if (address <= (int32_t)0xBFFFFFFFUL) {
+        *physical = address & 0x1FFFFFFF;
+    } else {
+        *physical = address;
+    }
+
+    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+    return TLBRET_MATCH;
+}
+
+/* MIPS32/MIPS64 R4000-style MMU emulation */
+static int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
+                           target_ulong address, MMUAccessType access_type)
+{
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    uint32_t tlb_mmid;
+    int i;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+
+#if defined(TARGET_CHERI)
+    unsigned gclg_bit;
+    if (address < 0x4000000000000000) {
+        /* useg, xuseg */
+        gclg_bit = CP0EnHi_CLGU;
+    } else if (address < 0x8000000000000000) {
+        /* xsseg */
+        gclg_bit = CP0EnHi_CLGS;
+    } else if (address < 0xFFFFFFFFC0000000) {
+        /* xkphys (won't be called), xkseg, kseg0, kseg1 */
+        gclg_bit = CP0EnHi_CLGK;
+    } else if (address < 0xFFFFFFFFE0000000) {
+        /* sseg */
+        gclg_bit = CP0EnHi_CLGS;
+    } else {
+        /* kseg3 */
+        gclg_bit = CP0EnHi_CLGK;
+    }
+    bool gclg = !!(env->CP0_EntryHi & (1UL << gclg_bit));
+#endif
+
+    for (i = 0; i < env->tlb->tlb_in_use; i++) {
+        r4k_tlb_t *tlb = &env->tlb->mmu.r4k.tlb[i];
+        /* 1k pages are not supported. */
+        target_ulong mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+        target_ulong tag = address & ~mask;
+        target_ulong VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        tag &= env->SEGMask;
+#endif
+
+        /* Check ASID/MMID, virtual page number & size */
+        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
+            /* TLB match */
+            int n = !!(address & mask & ~(mask >> 1));
+            /* Check access rights */
+            if (!(n ? tlb->V1 : tlb->V0)) {
+                return TLBRET_INVALID;
+            }
+#if defined(TARGET_CHERI)
+            if (access_type == MMU_DATA_CAP_STORE) {
+                /*
+                 * If we're trying to do a cap-store, first check for the
+                 * dirty/store-permitted bit before looking at the the
+                 * store-capability inhibit.
+                 */
+                if (!(n ? tlb->D1 : tlb->D0)) {
+                    return TLBRET_DIRTY;
+                }
+                if (n ? tlb->S1 : tlb->S0) {
+                    return TLBRET_S;
+                }
+            }
+
+            if (n ? tlb->S1 : tlb->S0) {
+                *prot |= PAGE_SC_TRAP;
+            }
+#else
+            if (access_type == MMU_INST_FETCH && (n ? tlb->XI1 : tlb->XI0)) {
+                return TLBRET_XI;
+            }
+            if (access_type == MMU_DATA_LOAD && (n ? tlb->RI1 : tlb->RI0)) {
+                return TLBRET_RI;
+            }
+#endif /* TARGET_CHERI */
+            if (((access_type != MMU_DATA_STORE)
+#if defined(TARGET_CHERI)
+                  && (access_type != MMU_DATA_CAP_STORE)
+#endif
+                ) || (n ? tlb->D1 : tlb->D0)) {
+
+                *physical = tlb->PFN[n] | (address & (mask >> 1));
+                *prot = PAGE_READ;
+                if (n ? tlb->D1 : tlb->D0) {
+                    *prot |= PAGE_WRITE;
+                }
+#if !defined(TARGET_CHERI)
+                if (!(n ? tlb->XI1 : tlb->XI0)) {
+#else
+                if (true) {
+#endif
+                    *prot |= PAGE_EXEC;
+                }
+
+#if defined(TARGET_CHERI)
+                if (n ? tlb->L1 : tlb->L0) {
+                    *prot |= PAGE_LC_CLEAR;
+                }
+                bool pclg = n ? tlb->CLG1 : tlb->CLG0;
+                if (pclg != gclg) {
+                    *prot |= PAGE_LC_TRAP;
+                }
+#endif
+
+                return TLBRET_MATCH;
+            }
+            return TLBRET_DIRTY;
+        }
+    }
+    return TLBRET_NOMATCH;
+}
+
+static void no_mmu_init(CPUMIPSState *env, const mips_def_t *def)
+{
+    env->tlb->nb_tlb = 1;
+    env->tlb->map_address = &no_mmu_map_address;
+}
+
+static void fixed_mmu_init(CPUMIPSState *env, const mips_def_t *def)
+{
+    env->tlb->nb_tlb = 1;
+    env->tlb->map_address = &fixed_mmu_map_address;
+}
+
+static void r4k_mmu_init(CPUMIPSState *env, const mips_def_t *def)
+{
+    env->tlb->nb_tlb = 1 + ((def->CP0_Config1 >> CP0C1_MMU) & 63);
+    env->tlb->map_address = &r4k_map_address;
+    env->tlb->helper_tlbwi = r4k_helper_tlbwi;
+    env->tlb->helper_tlbwr = r4k_helper_tlbwr;
+    env->tlb->helper_tlbp = r4k_helper_tlbp;
+    env->tlb->helper_tlbr = r4k_helper_tlbr;
+    env->tlb->helper_tlbinv = r4k_helper_tlbinv;
+    env->tlb->helper_tlbinvf = r4k_helper_tlbinvf;
+}
+
+void mmu_init(CPUMIPSState *env, const mips_def_t *def)
+{
+    env->tlb = g_malloc0(sizeof(CPUMIPSTLBContext));
+
+    switch (def->mmu_type) {
+    case MMU_TYPE_NONE:
+        no_mmu_init(env, def);
+        break;
+    case MMU_TYPE_R4000:
+        r4k_mmu_init(env, def);
+        break;
+    case MMU_TYPE_FMT:
+        fixed_mmu_init(env, def);
+        break;
+    case MMU_TYPE_R3000:
+    case MMU_TYPE_R6000:
+    case MMU_TYPE_R8000:
+    default:
+        cpu_abort(env_cpu(env), "MMU type not supported\n");
+    }
+}
+
+void cpu_mips_tlb_flush(CPUMIPSState *env)
+{
+    /* Flush qemu's TLB and discard all shadowed entries.  */
+    tlb_flush(env_cpu(env));
+    env->tlb->tlb_in_use = env->tlb->nb_tlb;
+}
+
+#ifdef TARGET_CHERI
+static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
+                                MMUAccessType access_type, int tlb_error, int reg)
+#else
+static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
+                                MMUAccessType access_type, int tlb_error)
+#endif
+{
+    CPUState *cs = env_cpu(env);
+    MipsExcp exception = 0;
+    int error_code = 0;
+
+    if (access_type == MMU_INST_FETCH) {
+        error_code |= EXCP_INST_NOTAVAIL;
+    }
+    switch (tlb_error) {
+    default:
+    case TLBRET_BADADDR:
+        /* Reference to kernel address from user mode or supervisor mode */
+        /* Reference to supervisor address from user mode */
+        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
+            exception = EXCP_AdES;
+        } else {
+            exception = EXCP_AdEL;
+        }
+        break;
+    case TLBRET_NOMATCH:
+        /* No TLB match for a mapped address */
+        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
+            exception = EXCP_TLBS;
+        } else {
+            exception = EXCP_TLBL;
+        }
+        error_code |= EXCP_TLB_NOMATCH;
+        break;
+    case TLBRET_INVALID:
+        /* TLB match with no valid bit */
+        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
+            exception = EXCP_TLBS;
+        } else {
+            exception = EXCP_TLBL;
+        }
+        break;
+    case TLBRET_DIRTY:
+        /* TLB match but 'D' bit is cleared */
+        exception = EXCP_LTLBL;
+        break;
+#ifdef TARGET_CHERI
+    case TLBRET_S:
+        /* TLB capability store bit was set, blocking capability store. */
+        cpu_mips_store_capcause(env, reg, CapEx_TLBNoStoreCap);
+        exception = EXCP_C2E;
+        break;
+#else
+    case TLBRET_XI:
+        /* Execute-Inhibit Exception */
+        if (env->CP0_PageGrain & (1 << CP0PG_IEC)) {
+            exception = EXCP_TLBXI;
+        } else {
+            exception = EXCP_TLBL;
+        }
+        break;
+    case TLBRET_RI:
+        /* Read-Inhibit Exception */
+        if (env->CP0_PageGrain & (1 << CP0PG_IEC)) {
+            exception = EXCP_TLBRI;
+        } else {
+            exception = EXCP_TLBL;
+        }
+        break;
+#endif /* TARGET_CHERI */
+    }
+    /* Raise exception */
+    if (!(env->hflags & MIPS_HFLAG_DM)) {
+        env->CP0_BadVAddr = address;
+    }
+    env->CP0_Context = (env->CP0_Context & ~0x007fffff) |
+                       ((address >> 9) & 0x007ffff0);
+    env->CP0_EntryHi = (env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask) |
+                       (env->CP0_EntryHi & CP0EnHi_CLG_MASK) |
+                       (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) |
+                       (address & (TARGET_PAGE_MASK << 1));
+#if defined(TARGET_MIPS64)
+    env->CP0_EntryHi &= env->SEGMask | CP0EnHi_CLG_MASK;
+    env->CP0_XContext =
+        (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) | /* PTEBase */
+        (extract64(address, 62, 2) << (env->SEGBITS - 9)) |     /* R       */
+        (extract64(address, 13, env->SEGBITS - 13) << 4);       /* BadVPN2 */
+#endif
+    cs->exception_index = exception;
+    env->error_code = error_code;
+#ifdef TARGET_CHERI
+    if (access_type == MMU_INST_FETCH)
+        env->statcounters_itlb_miss++;
+    else
+        env->statcounters_dtlb_miss++;
+#endif
+}
+
+#if !defined(TARGET_MIPS64)
+
+/*
+ * Perform hardware page table walk
+ *
+ * Memory accesses are performed using the KERNEL privilege level.
+ * Synchronous exceptions detected on memory accesses cause a silent exit
+ * from page table walking, resulting in a TLB or XTLB Refill exception.
+ *
+ * Implementations are not required to support page table walk memory
+ * accesses from mapped memory regions. When an unsupported access is
+ * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill
+ * exception.
+ *
+ * Note that if an exception is caused by AddressTranslation or LoadMemory
+ * functions, the exception is not taken, a silent exit is taken,
+ * resulting in a TLB or XTLB Refill exception.
+ */
+
+static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size,
+        uint64_t *pte)
+{
+    if ((vaddr & ((entry_size >> 3) - 1)) != 0) {
+        return false;
+    }
+    if (entry_size == 64) {
+        *pte = cpu_ldq_code(env, vaddr);
+    } else {
+        *pte = cpu_ldl_code(env, vaddr);
+    }
+    return true;
+}
+
+static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry,
+        int entry_size, int ptei)
+{
+    uint64_t result = entry;
+    uint64_t rixi;
+    if (ptei > entry_size) {
+        ptei -= 32;
+    }
+    result >>= (ptei - 2);
+    rixi = result & 3;
+    result >>= 2;
+    result |= rixi << CP0EnLo_XI;
+    return result;
+}
+
+static int walk_directory(CPUMIPSState *env, uint64_t *vaddr,
+        int directory_index, bool *huge_page, bool *hgpg_directory_hit,
+        uint64_t *pw_entrylo0, uint64_t *pw_entrylo1)
+{
+    int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1;
+    int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F;
+    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
+    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
+    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
+    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
+    int directory_shift = (ptew > 1) ? -1 :
+            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
+    int leaf_shift = (ptew > 1) ? -1 :
+            (ptew == 1) ? native_shift + 1 : native_shift;
+    uint32_t direntry_size = 1 << (directory_shift + 3);
+    uint32_t leafentry_size = 1 << (leaf_shift + 3);
+    uint64_t entry;
+    uint64_t paddr;
+    int prot;
+    uint64_t lsb = 0;
+    uint64_t w = 0;
+
+    if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD,
+                             cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        /* wrong base address */
+        return 0;
+    }
+    if (!get_pte(env, *vaddr, direntry_size, &entry)) {
+        return 0;
+    }
+
+    if ((entry & (1 << psn)) && hugepg) {
+        *huge_page = true;
+        *hgpg_directory_hit = true;
+        entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
+        w = directory_index - 1;
+        if (directory_index & 0x1) {
+            /* Generate adjacent page from same PTE for odd TLB page */
+            lsb = BIT_ULL(w) >> 6;
+            *pw_entrylo0 = entry & ~lsb; /* even page */
+            *pw_entrylo1 = entry | lsb; /* odd page */
+        } else if (dph) {
+            int oddpagebit = 1 << leaf_shift;
+            uint64_t vaddr2 = *vaddr ^ oddpagebit;
+            if (*vaddr & oddpagebit) {
+                *pw_entrylo1 = entry;
+            } else {
+                *pw_entrylo0 = entry;
+            }
+            if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD,
+                                     cpu_mmu_index(env, false)) !=
+                                     TLBRET_MATCH) {
+                return 0;
+            }
+            if (!get_pte(env, vaddr2, leafentry_size, &entry)) {
+                return 0;
+            }
+            entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
+            if (*vaddr & oddpagebit) {
+                *pw_entrylo0 = entry;
+            } else {
+                *pw_entrylo1 = entry;
+            }
+        } else {
+            return 0;
+        }
+        return 1;
+    } else {
+        *vaddr = entry;
+        return 2;
+    }
+}
+
+static bool page_table_walk_refill(CPUMIPSState *env, vaddr address,
+                                   int mmu_idx)
+{
+    int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F;
+    int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F;
+    int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F;
+    int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F;
+    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
+
+    /* Initial values */
+    bool huge_page = false;
+    bool hgpg_bdhit = false;
+    bool hgpg_gdhit = false;
+    bool hgpg_udhit = false;
+    bool hgpg_mdhit = false;
+
+    int32_t pw_pagemask = 0;
+    target_ulong pw_entryhi = 0;
+    uint64_t pw_entrylo0 = 0;
+    uint64_t pw_entrylo1 = 0;
+
+    /* Native pointer size */
+    /*For the 32-bit architectures, this bit is fixed to 0.*/
+    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
+
+    /* Indices from PWField */
+    int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F;
+    int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F;
+    int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F;
+    int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F;
+    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
+
+    /* Indices computed from faulting address */
+    int gindex = (address >> pf_gdw) & ((1 << gdw) - 1);
+    int uindex = (address >> pf_udw) & ((1 << udw) - 1);
+    int mindex = (address >> pf_mdw) & ((1 << mdw) - 1);
+    int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1);
+
+    /* Other HTW configs */
+    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
+
+    /* HTW Shift values (depend on entry size) */
+    int directory_shift = (ptew > 1) ? -1 :
+            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
+    int leaf_shift = (ptew > 1) ? -1 :
+            (ptew == 1) ? native_shift + 1 : native_shift;
+
+    /* Offsets into tables */
+    int goffset = gindex << directory_shift;
+    int uoffset = uindex << directory_shift;
+    int moffset = mindex << directory_shift;
+    int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1);
+    int ptoffset1 = ptoffset0 | (1 << (leaf_shift));
+
+    uint32_t leafentry_size = 1 << (leaf_shift + 3);
+
+    /* Starting address - Page Table Base */
+    uint64_t vaddr = env->CP0_PWBase;
+
+    uint64_t dir_entry;
+    uint64_t paddr;
+    int prot;
+    int m;
+
+    if (!(env->CP0_Config3 & (1 << CP0C3_PW))) {
+        /* walker is unimplemented */
+        return false;
+    }
+    if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) {
+        /* walker is disabled */
+        return false;
+    }
+    if (!(gdw > 0 || udw > 0 || mdw > 0)) {
+        /* no structure to walk */
+        return false;
+    }
+    if ((directory_shift == -1) || (leaf_shift == -1)) {
+        return false;
+    }
+
+    /* Global Directory */
+    if (gdw > 0) {
+        vaddr |= goffset;
+        switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Upper directory */
+    if (udw > 0) {
+        vaddr |= uoffset;
+        switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Middle directory */
+    if (mdw > 0) {
+        vaddr |= moffset;
+        switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit,
+                               &pw_entrylo0, &pw_entrylo1))
+        {
+        case 0:
+            return false;
+        case 1:
+            goto refill;
+        case 2:
+        default:
+            break;
+        }
+    }
+
+    /* Leaf Level Page Table - First half of PTE pair */
+    vaddr |= ptoffset0;
+    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
+                             cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        return false;
+    }
+    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
+        return false;
+    }
+    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
+    pw_entrylo0 = dir_entry;
+
+    /* Leaf Level Page Table - Second half of PTE pair */
+    vaddr |= ptoffset1;
+    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
+                             cpu_mmu_index(env, false)) !=
+                             TLBRET_MATCH) {
+        return false;
+    }
+    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
+        return false;
+    }
+    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
+    pw_entrylo1 = dir_entry;
+
+refill:
+
+    m = (1 << pf_ptw) - 1;
+
+    if (huge_page) {
+        switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 |
+                hgpg_mdhit)
+        {
+        case 4:
+            m = (1 << pf_gdw) - 1;
+            if (pf_gdw & 1) {
+                m >>= 1;
+            }
+            break;
+        case 2:
+            m = (1 << pf_udw) - 1;
+            if (pf_udw & 1) {
+                m >>= 1;
+            }
+            break;
+        case 1:
+            m = (1 << pf_mdw) - 1;
+            if (pf_mdw & 1) {
+                m >>= 1;
+            }
+            break;
+        }
+    }
+    pw_pagemask = m >> TARGET_PAGE_BITS_MIN;
+    update_pagemask(env, pw_pagemask << CP0PM_MASK, &pw_pagemask);
+    pw_entryhi = (address & ~0x1fff) |
+                 (env->CP0_EntryHi & (0xFF | CP0EnHi_CLG_MASK));
+    {
+        target_ulong tmp_entryhi = env->CP0_EntryHi;
+        int32_t tmp_pagemask = env->CP0_PageMask;
+        uint64_t tmp_entrylo0 = env->CP0_EntryLo0;
+        uint64_t tmp_entrylo1 = env->CP0_EntryLo1;
+
+        env->CP0_EntryHi = pw_entryhi;
+        env->CP0_PageMask = pw_pagemask;
+        env->CP0_EntryLo0 = pw_entrylo0;
+        env->CP0_EntryLo1 = pw_entrylo1;
+
+        /*
+         * The hardware page walker inserts a page into the TLB in a manner
+         * identical to a TLBWR instruction as executed by the software refill
+         * handler.
+         */
+        r4k_helper_tlbwr(env);
+
+        env->CP0_EntryHi = tmp_entryhi;
+        env->CP0_PageMask = tmp_pagemask;
+        env->CP0_EntryLo0 = tmp_entrylo0;
+        env->CP0_EntryLo1 = tmp_entrylo1;
+    }
+    return true;
+}
+#endif
+
+bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+    hwaddr physical;
+    int prot;
+    int ret = TLBRET_BADADDR;
+
+    /* data access */
+    MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
+#ifdef TARGET_CHERI
+    attrs.tag_setting = access_type == MMU_DATA_CAP_STORE;
+#endif
+    /* XXX: put correct access by using cpu_restore_state() correctly */
+    ret = get_physical_address(env, &physical, &prot, address,
+                               access_type, mmu_idx);
+    switch (ret) {
+    case TLBRET_MATCH:
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s address=%" VADDR_PRIx " physical " TARGET_FMT_plx
+                      " prot %d\n", __func__, address, physical, prot);
+        break;
+    default:
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s address=%" VADDR_PRIx " ret %d\n", __func__, address,
+                      ret);
+        break;
+    }
+    if (ret == TLBRET_MATCH) {
+        tlb_set_page_with_attrs(cs, address & TARGET_PAGE_MASK,
+                                physical & TARGET_PAGE_MASK, attrs, prot,
+                                mmu_idx, TARGET_PAGE_SIZE);
+        return true;
+    }
+#if !defined(TARGET_MIPS64)
+    if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) {
+        /*
+         * Memory reads during hardware page table walking are performed
+         * as if they were kernel-mode load instructions.
+         */
+        int mode = (env->hflags & MIPS_HFLAG_KSU);
+        bool ret_walker;
+        env->hflags &= ~MIPS_HFLAG_KSU;
+        ret_walker = page_table_walk_refill(env, address, mmu_idx);
+        env->hflags |= mode;
+        if (ret_walker) {
+            ret = get_physical_address(env, &physical, &prot, address,
+                                       access_type, mmu_idx);
+            if (ret == TLBRET_MATCH) {
+                tlb_set_page_with_attrs(cs, address & TARGET_PAGE_MASK,
+                                        physical & TARGET_PAGE_MASK, attrs,
+                                        prot, mmu_idx, TARGET_PAGE_SIZE);
+                return true;
+            }
+        }
+    }
+#endif
+    if (probe) {
+        return false;
+    }
+
+#ifdef TARGET_CHERI
+    /*
+     * NOTE: The register for the exception has possibly already been set.
+     * An extra reg argument might have been preferable, but this interferes
+     * less with the other targets, which don't pass a register number here.
+     */
+    raise_mmu_exception(env, address, access_type, ret, 0xff);
+#else
+    raise_mmu_exception(env, address, access_type, ret);
+#endif
+    do_raise_exception_err(env, cs->exception_index, env->error_code, retaddr);
+}
+
+hwaddr cpu_mips_translate_address(CPUMIPSState *env, target_ulong address,
+                                  MMUAccessType access_type, uintptr_t retaddr)
+{
+    hwaddr physical;
+    int prot;
+    int ret = 0;
+    CPUState *cs = env_cpu(env);
+
+    /* data access */
+    ret = get_physical_address(env, &physical, &prot, address, access_type,
+                               cpu_mmu_index(env, false));
+    if (ret == TLBRET_MATCH) {
+        return physical;
+    }
+
+#ifdef TARGET_CHERI
+    raise_mmu_exception(env, address, access_type, ret, 0xff);
+#else
+    raise_mmu_exception(env, address, access_type, ret);
+#endif
+    cpu_loop_exit_restore(cs, retaddr);
+    return -1LL;
+}
+
+static void set_hflags_for_handler(CPUMIPSState *env)
+{
+    /* Exception handlers are entered in 32-bit mode.  */
+    env->hflags &= ~(MIPS_HFLAG_M16);
+    /* ...except that microMIPS lets you choose.  */
+    if (env->insn_flags & ASE_MICROMIPS) {
+        env->hflags |= (!!(env->CP0_Config3 &
+                           (1 << CP0C3_ISA_ON_EXC))
+                        << MIPS_HFLAG_M16_SHIFT);
+    }
+}
+
+static inline void set_badinstr_registers(CPUMIPSState *env)
+{
+    if (env->insn_flags & ISA_NANOMIPS32) {
+        if (env->CP0_Config3 & (1 << CP0C3_BI)) {
+            uint32_t instr = (cpu_lduw_code(env, PC_ADDR(env))) << 16;
+            if ((instr & 0x10000000) == 0) {
+                instr |= cpu_lduw_code(env, PC_ADDR(env) + 2);
+            }
+            env->CP0_BadInstr = instr;
+
+            if ((instr & 0xFC000000) == 0x60000000) {
+                instr = cpu_lduw_code(env, PC_ADDR(env) + 4) << 16;
+                env->CP0_BadInstrX = instr;
+            }
+        }
+        return;
+    }
+
+    if (env->hflags & MIPS_HFLAG_M16) {
+        /* TODO: add BadInstr support for microMIPS */
+        return;
+    }
+    if (env->CP0_Config3 & (1 << CP0C3_BI)) {
+        env->CP0_BadInstr = cpu_ldl_code(env, PC_ADDR(env));
+    }
+    if ((env->CP0_Config3 & (1 << CP0C3_BP)) &&
+        (env->hflags & MIPS_HFLAG_BMASK)) {
+        env->CP0_BadInstrP = cpu_ldl_code(env, PC_ADDR(env) - 4);
+    }
+}
+
+static inline void mips_update_pc_for_exc_handler(CPUMIPSState *env,
+                                                  target_ulong new_pc)
+{
+#ifdef TARGET_CHERI
+    /* always set PCC from KCC even with EXL */
+    cheri_update_pcc_for_exc_handler(&env->active_tc.PCC,
+                                     &env->active_tc.CHWR.KCC, new_pc);
+#else
+    mips_update_pc(env, new_pc, /*can_be_unrep=*/true);
+#endif
+}
+
+void mips_cpu_do_interrupt(CPUState *cs)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+    CPUMIPSState *env = &cpu->env;
+    tcg_debug_assert(pc_is_current(env));
+    bool update_badinstr = 0;
+    target_ulong offset;
+    int cause = -1;
+
+    /* Log interrupt extra debug info */
+    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT) &&
+        cs->exception_index != EXCP_EXT_INTERRUPT) {
+        qemu_log_instr_or_mask_msg(
+            env, CPU_LOG_INT,
+            "%s enter: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx
+            " %s exception, (hflags & MIPS_HFLAG_BMASK)=%x, hflags=%x\n",
+            __func__, PC_ADDR(env), get_CP0_EPC(env),
+            mips_exception_name(cs->exception_index),
+            env->hflags & MIPS_HFLAG_BMASK, env->hflags);
+#ifdef TARGET_CHERI
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_INT,
+            "\tPCC=" PRINT_CAP_FMTSTR "\n\tKCC= " PRINT_CAP_FMTSTR
+            "\n\tEPCC=" PRINT_CAP_FMTSTR "\n",
+            PRINT_CAP_ARGS(cheri_get_current_pcc(env)),
+            PRINT_CAP_ARGS(&env->active_tc.CHWR.KCC),
+            PRINT_CAP_ARGS(&env->active_tc.CHWR.EPCC));
+#endif
+    }
+
+    if (cs->exception_index == EXCP_EXT_INTERRUPT &&
+        (env->hflags & MIPS_HFLAG_DM)) {
+        cs->exception_index = EXCP_DINT;
+    }
+    offset = 0x180;
+    switch (cs->exception_index) {
+    case EXCP_DSS:
+        env->CP0_Debug |= 1 << CP0DB_DSS;
+        /*
+         * Debug single step cannot be raised inside a delay slot and
+         * resume will always occur on the next instruction
+         * (but we assume the pc has always been updated during
+         * code translation).
+         */
+        env->CP0_DEPC = PC_ADDR(env) | !!(env->hflags & MIPS_HFLAG_M16);
+        goto enter_debug_mode;
+    case EXCP_DINT:
+        env->CP0_Debug |= 1 << CP0DB_DINT;
+        goto set_DEPC;
+    case EXCP_DIB:
+        env->CP0_Debug |= 1 << CP0DB_DIB;
+        goto set_DEPC;
+    case EXCP_DBp:
+        env->CP0_Debug |= 1 << CP0DB_DBp;
+        /* Setup DExcCode - SDBBP instruction */
+        env->CP0_Debug = (env->CP0_Debug & ~(0x1fULL << CP0DB_DEC)) |
+                         (9 << CP0DB_DEC);
+        goto set_DEPC;
+    case EXCP_DDBS:
+        env->CP0_Debug |= 1 << CP0DB_DDBS;
+        goto set_DEPC;
+    case EXCP_DDBL:
+        env->CP0_Debug |= 1 << CP0DB_DDBL;
+    set_DEPC:
+        env->CP0_DEPC = exception_resume_pc(env);
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+ enter_debug_mode:
+        if (env->insn_flags & ISA_MIPS3) {
+            env->hflags |= MIPS_HFLAG_64;
+            if (!(env->insn_flags & ISA_MIPS_R6) ||
+                env->CP0_Status & (1 << CP0St_KX)) {
+                env->hflags &= ~MIPS_HFLAG_AWRAP;
+            }
+        }
+        env->hflags |= MIPS_HFLAG_DM | MIPS_HFLAG_CP0;
+        env->hflags &= ~(MIPS_HFLAG_KSU);
+        /* EJTAG probe trap enable is not implemented... */
+        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
+            env->CP0_Cause &= ~(1U << CP0Ca_BD);
+        }
+        mips_update_pc_for_exc_handler(env, env->exception_base + 0x480);
+        set_hflags_for_handler(env);
+        break;
+    case EXCP_RESET:
+        cpu_reset(CPU(cpu));
+        break;
+    case EXCP_SRESET:
+        env->CP0_Status |= (1 << CP0St_SR);
+        memset(env->CP0_WatchLo, 0, sizeof(env->CP0_WatchLo));
+        goto set_error_EPC;
+    case EXCP_NMI:
+        env->CP0_Status |= (1 << CP0St_NMI);
+ set_error_EPC:
+#ifdef TARGET_CHERI
+        env->active_tc.CHWR.ErrorEPCC = *cheri_get_current_pcc(env);
+        // Note: set_CP0_ErrorEPC() handles the special cases of sealed/unrep EPCC
+#endif /* TARGET_CHERI */
+        set_CP0_ErrorEPC(env, exception_resume_pc(env));
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+        env->CP0_Status |= (1 << CP0St_ERL) | (1 << CP0St_BEV);
+        if (env->insn_flags & ISA_MIPS3) {
+            env->hflags |= MIPS_HFLAG_64;
+            if (!(env->insn_flags & ISA_MIPS_R6) ||
+                env->CP0_Status & (1 << CP0St_KX)) {
+                env->hflags &= ~MIPS_HFLAG_AWRAP;
+            }
+        }
+        env->hflags |= MIPS_HFLAG_CP0;
+        env->hflags &= ~(MIPS_HFLAG_KSU);
+        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
+            env->CP0_Cause &= ~(1U << CP0Ca_BD);
+        }
+        mips_update_pc_for_exc_handler(env, env->exception_base);
+        set_hflags_for_handler(env);
+        break;
+    case EXCP_EXT_INTERRUPT:
+        cause = 0;
+        if (env->CP0_Cause & (1 << CP0Ca_IV)) {
+            uint32_t spacing = (env->CP0_IntCtl >> CP0IntCtl_VS) & 0x1f;
+
+            if ((env->CP0_Status & (1 << CP0St_BEV)) || spacing == 0) {
+                offset = 0x200;
+            } else {
+                uint32_t vector = 0;
+                uint32_t pending = (env->CP0_Cause & CP0Ca_IP_mask) >> CP0Ca_IP;
+
+                if (env->CP0_Config3 & (1 << CP0C3_VEIC)) {
+                    /*
+                     * For VEIC mode, the external interrupt controller feeds
+                     * the vector through the CP0Cause IP lines.
+                     */
+                    vector = pending;
+                } else {
+                    /*
+                     * Vectored Interrupts
+                     * Mask with Status.IM7-IM0 to get enabled interrupts.
+                     */
+                    pending &= (env->CP0_Status >> CP0St_IM) & 0xff;
+                    /* Find the highest-priority interrupt. */
+                    while (pending >>= 1) {
+                        vector++;
+                    }
+                }
+                offset = 0x200 + (vector * (spacing << 5));
+            }
+        }
+        goto set_EPC;
+    case EXCP_LTLBL:
+        cause = 1;
+        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
+        goto set_EPC;
+    case EXCP_TLBL:
+        cause = 2;
+        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
+        if ((env->error_code & EXCP_TLB_NOMATCH) &&
+            !(env->CP0_Status & (1 << CP0St_EXL))) {
+#if defined(TARGET_MIPS64)
+            int R = env->CP0_BadVAddr >> 62;
+            int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
+            int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
+
+            if ((R != 0 || UX) && (R != 3 || KX) &&
+                (!(env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)))) {
+                offset = 0x080;
+            } else {
+#endif
+                offset = 0x000;
+#if defined(TARGET_MIPS64)
+            }
+#endif
+        }
+        goto set_EPC;
+    case EXCP_TLBS:
+        cause = 3;
+        update_badinstr = 1;
+        if ((env->error_code & EXCP_TLB_NOMATCH) &&
+            !(env->CP0_Status & (1 << CP0St_EXL))) {
+#if defined(TARGET_MIPS64)
+            int R = env->CP0_BadVAddr >> 62;
+            int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
+            int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
+
+            if ((R != 0 || UX) && (R != 3 || KX) &&
+                (!(env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)))) {
+                offset = 0x080;
+            } else {
+#endif
+                offset = 0x000;
+#if defined(TARGET_MIPS64)
+            }
+#endif
+        }
+        goto set_EPC;
+    case EXCP_AdEL:
+        cause = 4;
+        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
+        goto set_EPC;
+    case EXCP_AdES:
+        cause = 5;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_IBE:
+        update_badinstr = 0;
+        cause = 6;
+        goto set_EPC;
+    case EXCP_DBE:
+        cause = 7;
+        goto set_EPC;
+    case EXCP_SYSCALL:
+        cause = 8;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_BREAK:
+        cause = 9;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_RI:
+        cause = 10;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_CpU:
+        cause = 11;
+        update_badinstr = 1;
+        env->CP0_Cause = (env->CP0_Cause & ~(0x3 << CP0Ca_CE)) |
+                         (env->error_code << CP0Ca_CE);
+        goto set_EPC;
+    case EXCP_OVERFLOW:
+        cause = 12;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_TRAP:
+        cause = 13;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_MSAFPE:
+        cause = 14;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_FPE:
+        cause = 15;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_C2E:
+        cause = 18;
+        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
+#ifdef TARGET_CHERI
+        if ((env->CP2_CapCause >> 8) == CapEx_CallTrap ||
+                (env->CP2_CapCause >> 8) == CapEx_ReturnTrap)
+            offset = 0x280;
+#endif /* TARGET_CHERI */
+        goto set_EPC;
+    case EXCP_TLBRI:
+        cause = 19;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_TLBXI:
+        cause = 20;
+        goto set_EPC;
+    case EXCP_MSADIS:
+        cause = 21;
+        update_badinstr = 1;
+        goto set_EPC;
+    case EXCP_MDMX:
+        cause = 22;
+        goto set_EPC;
+    case EXCP_DWATCH:
+        cause = 23;
+        /* XXX: TODO: manage deferred watch exceptions */
+        goto set_EPC;
+    case EXCP_MCHECK:
+        cause = 24;
+        goto set_EPC;
+    case EXCP_THREAD:
+        cause = 25;
+        goto set_EPC;
+    case EXCP_DSPDIS:
+        cause = 26;
+        goto set_EPC;
+    case EXCP_CACHE:
+        cause = 30;
+        offset = 0x100;
+ set_EPC:
+        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
+#ifdef TARGET_CHERI
+            env->active_tc.CHWR.EPCC = *cheri_get_current_pcc(env);
+            // Note: set_CP0_EPC() handles the special cases of sealed/unrep EPCC
+#endif /* TARGET_CHERI */
+            set_CP0_EPC(env, exception_resume_pc(env));
+            if (update_badinstr) {
+                set_badinstr_registers(env);
+            }
+            if (env->hflags & MIPS_HFLAG_BMASK) {
+                env->CP0_Cause |= (1U << CP0Ca_BD);
+            } else {
+                env->CP0_Cause &= ~(1U << CP0Ca_BD);
+            }
+            env->CP0_Status |= (1 << CP0St_EXL);
+            if (env->insn_flags & ISA_MIPS3) {
+                env->hflags |= MIPS_HFLAG_64;
+                if (!(env->insn_flags & ISA_MIPS_R6) ||
+                    env->CP0_Status & (1 << CP0St_KX)) {
+                    env->hflags &= ~MIPS_HFLAG_AWRAP;
+                }
+            }
+            env->hflags |= MIPS_HFLAG_CP0;
+            env->hflags &= ~(MIPS_HFLAG_KSU);
+        }
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+        target_ulong new_pc;
+        if (env->CP0_Status & (1 << CP0St_BEV)) {
+            new_pc = env->exception_base + 0x200;
+        } else if (cause == 30 && !(env->CP0_Config3 & (1 << CP0C3_SC) &&
+                                    env->CP0_Config5 & (1 << CP0C5_CV))) {
+            /* Force KSeg1 for cache errors */
+            new_pc = KSEG1_BASE | (env->CP0_EBase & 0x1FFFF000);
+        } else {
+            new_pc = env->CP0_EBase & ~0xfff;
+        }
+        new_pc += offset;
+        mips_update_pc_for_exc_handler(env, new_pc);
+        set_hflags_for_handler(env);
+        env->CP0_Cause = (env->CP0_Cause & ~(0x1f << CP0Ca_EC)) |
+                         (cause << CP0Ca_EC);
+        break;
+    default:
+        abort();
+    }
+
+#ifdef TARGET_CHERI
+    // We may have to change the CP0 access flag since CHERI may have previously
+    // disabled it by installing a $pcc without the Access_Sys_Regs flag
+    update_cp0_access_for_pc(env);
+    assert(can_access_cp0(env) && "Installing $pcc without ASR in exception?");
+    assert(!cheri_get_current_pcc(env)->cr_tag ||
+           cap_is_representable(cheri_get_current_pcc(env)));
+#endif /* TARGET_CHERI */
+
+#ifdef CONFIG_TCG_LOG_INSTR
+    if (qemu_log_instr_enabled(env)) {
+        uint32_t log_cause;
+        /* Log generic exception/interrupt info */
+        if (cs->exception_index == EXCP_EXT_INTERRUPT) {
+            log_cause = (env->CP0_Cause & CP0Ca_IP_mask) >> CP0Ca_IP;
+            qemu_log_instr_interrupt(env, log_cause, PC_ADDR(env));
+        } else {
+            /*
+             * Note on CHERI-MIPS the logged cause is the concatenation
+             * {cap_cause | cause}.
+             */
+#ifdef TARGET_CHERI
+            log_cause = cause | (((uint32_t)env->CP2_CapCause) << 8);
+#else
+            log_cause = cause;
+#endif
+            qemu_log_instr_exception(env, log_cause, PC_ADDR(env),
+                                     env->CP0_BadVAddr);
+        }
+#ifdef TARGET_CHERI
+        /* Log extra changed register information */
+        qemu_log_instr_cap(env, "PCC", cheri_get_current_pcc(env));
+        qemu_log_instr_cap(env, "EPCC", &env->active_tc.CHWR.EPCC);
+        qemu_log_instr_cap(env, "ErrorEPCC", &env->active_tc.CHWR.ErrorEPCC);
+#endif
+        qemu_log_instr_reg(env, "ErrorEPC", get_CP0_ErrorEPC(env));
+    }
+#endif /* CONFIG_TCG_LOG_INSTR */
+
+    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT)
+        && cs->exception_index != EXCP_EXT_INTERRUPT) {
+        qemu_log_instr_or_mask_msg(env, CPU_LOG_INT, "%s: PC " TARGET_FMT_lx
+                 " EPC " TARGET_FMT_lx " cause %d\n"
+                 "    S %08x C %08x A " TARGET_FMT_lx " D " TARGET_FMT_lx "\n",
+                 __func__, PC_ADDR(env), get_CP0_EPC(env), cause,
+                 env->CP0_Status, env->CP0_Cause, env->CP0_BadVAddr,
+                 env->CP0_DEPC);
+    }
+    cs->exception_index = EXCP_NONE;
+
+#ifdef CONFIG_TCG_LOG_INSTR
+    mips_log_instr_mode_changed(env, cpu_get_recent_pc(env));
+#endif
+}
+
+static bool r4k_lookup_tlb(CPUMIPSState *env, int *matching, bool use_extra)
+{
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    r4k_tlb_t *tlb;
+    target_ulong mask;
+    target_ulong tag;
+    target_ulong VPN;
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    uint32_t tlb_mmid;
+    int i;
+    int limit = (use_extra) ? env->tlb->tlb_in_use : env->tlb->nb_tlb;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+    for (i = 0; i < limit; i++) {
+        tlb = &env->tlb->mmu.r4k.tlb[i];
+        /* 1k pages are not supported. */
+        mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+        tag = env->CP0_EntryHi & ~mask;
+        VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        tag &= env->SEGMask;
+#endif
+        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+        /* Check ASID/MMID, virtual page number & size */
+        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
+            if (matching)
+                *matching = i;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool mips_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    if (interrupt_request & CPU_INTERRUPT_HARD) {
+        MIPSCPU *cpu = MIPS_CPU(cs);
+        CPUMIPSState *env = &cpu->env;
+
+        if (cpu_mips_hw_interrupts_enabled(env) &&
+            cpu_mips_hw_interrupts_pending(env)) {
+            /* Raise it */
+            cs->exception_index = EXCP_EXT_INTERRUPT;
+            env->error_code = 0;
+            mips_cpu_do_interrupt(cs);
+            return true;
+        }
+    }
+    return false;
+}
+
+void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra)
+{
+    CPUState *cs = env_cpu(env);
+    r4k_tlb_t *tlb;
+    target_ulong addr;
+    target_ulong end;
+    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
+    uint32_t MMID = env->CP0_MemoryMapID;
+    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
+    uint32_t tlb_mmid;
+    target_ulong mask;
+
+    MMID = mi ? MMID : (uint32_t) ASID;
+
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    /*
+     * The qemu TLB is flushed when the ASID/MMID changes, so no need to
+     * flush these entries again.
+     */
+    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
+    if (tlb->G == 0 && tlb_mmid != MMID) {
+        return;
+    }
+
+    if (use_extra && env->tlb->tlb_in_use < MIPS_TLB_MAX) {
+        /*
+         * For tlbwr, we can shadow the discarded entry into
+         * a new (fake) TLB entry, as long as the guest can not
+         * tell that it's there.
+         */
+        env->tlb->mmu.r4k.tlb[env->tlb->tlb_in_use] = *tlb;
+        env->tlb->tlb_in_use++;
+        return;
+    }
+
+    /* 1k pages are not supported. */
+    mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
+    if (tlb->V0) {
+        addr = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
+            addr |= 0x3FFFFF0000000000ULL;
+        }
+#endif
+        end = addr | (mask >> 1);
+        while (addr < end) {
+            tlb_flush_page(cs, addr);
+            addr += TARGET_PAGE_SIZE;
+        }
+    }
+    if (tlb->V1) {
+        addr = (tlb->VPN & ~mask) | ((mask >> 1) + 1);
+#if defined(TARGET_MIPS64)
+        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
+            addr |= 0x3FFFFF0000000000ULL;
+        }
+#endif
+        end = addr | mask;
+        while (addr - 1 < end) {
+            tlb_flush_page(cs, addr);
+            addr += TARGET_PAGE_SIZE;
+        }
+    }
+}
diff --git a/target/mips/tcg/sysemu_helper.h.inc b/target/mips/tcg/sysemu_helper.h.inc
new file mode 100644
index 00000000000..4353a966f97
--- /dev/null
+++ b/target/mips/tcg/sysemu_helper.h.inc
@@ -0,0 +1,185 @@
+/*
+ *  QEMU MIPS sysemu helpers
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+DEF_HELPER_1(do_semihosting, void, env)
+
+/* CP0 helpers */
+DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
+DEF_HELPER_1(mfc0_mvpconf0, tl, env)
+DEF_HELPER_1(mfc0_mvpconf1, tl, env)
+DEF_HELPER_1(mftc0_vpecontrol, tl, env)
+DEF_HELPER_1(mftc0_vpeconf0, tl, env)
+DEF_HELPER_1(mfc0_random, tl, env)
+DEF_HELPER_1(mfc0_tcstatus, tl, env)
+DEF_HELPER_1(mftc0_tcstatus, tl, env)
+DEF_HELPER_1(mfc0_tcbind, tl, env)
+DEF_HELPER_1(mftc0_tcbind, tl, env)
+DEF_HELPER_1(mfc0_tcrestart, tl, env)
+DEF_HELPER_1(mftc0_tcrestart, tl, env)
+DEF_HELPER_1(mfc0_tchalt, tl, env)
+DEF_HELPER_1(mftc0_tchalt, tl, env)
+DEF_HELPER_1(mfc0_tccontext, tl, env)
+DEF_HELPER_1(mftc0_tccontext, tl, env)
+DEF_HELPER_1(mfc0_tcschedule, tl, env)
+DEF_HELPER_1(mftc0_tcschedule, tl, env)
+DEF_HELPER_1(mfc0_tcschefback, tl, env)
+DEF_HELPER_1(mftc0_tcschefback, tl, env)
+DEF_HELPER_1(mfc0_count, tl, env)
+DEF_HELPER_1(mfc0_saar, tl, env)
+DEF_HELPER_1(mfhc0_saar, tl, env)
+DEF_HELPER_1(mftc0_entryhi, tl, env)
+DEF_HELPER_1(mftc0_status, tl, env)
+DEF_HELPER_1(mftc0_cause, tl, env)
+DEF_HELPER_1(mftc0_epc, tl, env)
+DEF_HELPER_1(mftc0_ebase, tl, env)
+DEF_HELPER_2(mftc0_configx, tl, env, tl)
+DEF_HELPER_1(mfc0_lladdr, tl, env)
+DEF_HELPER_1(mfc0_maar, tl, env)
+DEF_HELPER_1(mfhc0_maar, tl, env)
+DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
+DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
+DEF_HELPER_2(mfhc0_watchhi, tl, env, i32)
+DEF_HELPER_1(mfc0_debug, tl, env)
+DEF_HELPER_1(mftc0_debug, tl, env)
+#ifdef TARGET_MIPS64
+DEF_HELPER_1(dmfc0_tcrestart, tl, env)
+DEF_HELPER_1(dmfc0_tchalt, tl, env)
+DEF_HELPER_1(dmfc0_tccontext, tl, env)
+DEF_HELPER_1(dmfc0_tcschedule, tl, env)
+DEF_HELPER_1(dmfc0_tcschefback, tl, env)
+DEF_HELPER_1(dmfc0_lladdr, tl, env)
+DEF_HELPER_1(dmfc0_maar, tl, env)
+DEF_HELPER_2(dmfc0_watchlo, tl, env, i32)
+DEF_HELPER_2(dmfc0_watchhi, tl, env, i32)
+DEF_HELPER_1(dmfc0_saar, tl, env)
+#endif /* TARGET_MIPS64 */
+
+DEF_HELPER_2(mtc0_index, void, env, tl)
+DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mttc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mttc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
+DEF_HELPER_2(mtc0_yqmask, void, env, tl)
+DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo0, void, env, tl)
+DEF_HELPER_2(mtc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mttc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mtc0_tcbind, void, env, tl)
+DEF_HELPER_2(mttc0_tcbind, void, env, tl)
+DEF_HELPER_2(mtc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mttc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mtc0_tchalt, void, env, tl)
+DEF_HELPER_2(mttc0_tchalt, void, env, tl)
+DEF_HELPER_2(mtc0_tccontext, void, env, tl)
+DEF_HELPER_2(mttc0_tccontext, void, env, tl)
+DEF_HELPER_2(mtc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mttc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mtc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mttc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo1, void, env, tl)
+DEF_HELPER_2(mtc0_context, void, env, tl)
+DEF_HELPER_2(mtc0_memorymapid, void, env, tl)
+DEF_HELPER_2(mtc0_pagemask, void, env, tl)
+DEF_HELPER_2(mtc0_pagegrain, void, env, tl)
+DEF_HELPER_2(mtc0_segctl0, void, env, tl)
+DEF_HELPER_2(mtc0_segctl1, void, env, tl)
+DEF_HELPER_2(mtc0_segctl2, void, env, tl)
+DEF_HELPER_2(mtc0_pwfield, void, env, tl)
+DEF_HELPER_2(mtc0_pwsize, void, env, tl)
+DEF_HELPER_2(mtc0_wired, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf2, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
+DEF_HELPER_2(mtc0_hwrena, void, env, tl)
+DEF_HELPER_2(mtc0_pwctl, void, env, tl)
+DEF_HELPER_2(mtc0_count, void, env, tl)
+DEF_HELPER_2(mtc0_saari, void, env, tl)
+DEF_HELPER_2(mtc0_saar, void, env, tl)
+DEF_HELPER_2(mthc0_saar, void, env, tl)
+DEF_HELPER_2(mtc0_entryhi, void, env, tl)
+DEF_HELPER_2(mttc0_entryhi, void, env, tl)
+DEF_HELPER_2(mtc0_compare, void, env, tl)
+DEF_HELPER_2(mtc0_status, void, env, tl)
+DEF_HELPER_2(mttc0_status, void, env, tl)
+DEF_HELPER_2(mtc0_intctl, void, env, tl)
+DEF_HELPER_2(mtc0_srsctl, void, env, tl)
+DEF_HELPER_2(mtc0_cause, void, env, tl)
+DEF_HELPER_2(mttc0_cause, void, env, tl)
+DEF_HELPER_2(mtc0_ebase, void, env, tl)
+DEF_HELPER_2(mttc0_ebase, void, env, tl)
+DEF_HELPER_2(mtc0_config0, void, env, tl)
+DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_config3, void, env, tl)
+DEF_HELPER_2(mtc0_config4, void, env, tl)
+DEF_HELPER_2(mtc0_config5, void, env, tl)
+DEF_HELPER_2(mtc0_lladdr, void, env, tl)
+DEF_HELPER_2(mtc0_maar, void, env, tl)
+DEF_HELPER_2(mthc0_maar, void, env, tl)
+DEF_HELPER_2(mtc0_maari, void, env, tl)
+DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
+DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
+DEF_HELPER_3(mthc0_watchhi, void, env, tl, i32)
+DEF_HELPER_2(mtc0_xcontext, void, env, tl)
+DEF_HELPER_2(mtc0_framemask, void, env, tl)
+DEF_HELPER_2(mtc0_debug, void, env, tl)
+DEF_HELPER_2(mttc0_debug, void, env, tl)
+DEF_HELPER_2(mtc0_performance0, void, env, tl)
+DEF_HELPER_2(mtc0_errctl, void, env, tl)
+DEF_HELPER_2(mtc0_taglo, void, env, tl)
+DEF_HELPER_2(mtc0_datalo, void, env, tl)
+DEF_HELPER_2(mtc0_taghi, void, env, tl)
+DEF_HELPER_2(mtc0_datahi, void, env, tl)
+
+#if defined(TARGET_MIPS64)
+DEF_HELPER_2(dmtc0_entrylo0, void, env, i64)
+DEF_HELPER_2(dmtc0_entrylo1, void, env, i64)
+#endif
+
+/* MIPS MT functions */
+DEF_HELPER_2(mftgpr, tl, env, i32)
+DEF_HELPER_2(mftlo, tl, env, i32)
+DEF_HELPER_2(mfthi, tl, env, i32)
+DEF_HELPER_2(mftacx, tl, env, i32)
+DEF_HELPER_1(mftdsp, tl, env)
+DEF_HELPER_3(mttgpr, void, env, tl, i32)
+DEF_HELPER_3(mttlo, void, env, tl, i32)
+DEF_HELPER_3(mtthi, void, env, tl, i32)
+DEF_HELPER_3(mttacx, void, env, tl, i32)
+DEF_HELPER_2(mttdsp, void, env, tl)
+DEF_HELPER_0(dmt, tl)
+DEF_HELPER_0(emt, tl)
+DEF_HELPER_1(dvpe, tl, env)
+DEF_HELPER_1(evpe, tl, env)
+
+/* R6 Multi-threading */
+DEF_HELPER_1(dvp, tl, env)
+DEF_HELPER_1(evp, tl, env)
+
+/* TLB */
+DEF_HELPER_1(tlbwi, void, env)
+DEF_HELPER_1(tlbwr, void, env)
+DEF_HELPER_1(tlbp, void, env)
+DEF_HELPER_1(tlbr, void, env)
+DEF_HELPER_1(tlbinv, void, env)
+DEF_HELPER_1(tlbinvf, void, env)
+DEF_HELPER_3(ginvt, void, env, tl, i32)
+
+/* Special */
+DEF_HELPER_1(di, tl, env)
+DEF_HELPER_1(ei, tl, env)
+DEF_HELPER_1(eret, void, env)
+DEF_HELPER_1(eretnc, void, env)
+DEF_HELPER_1(deret, void, env)
+DEF_HELPER_3(cache, void, env, tl, i32)
diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
new file mode 100644
index 00000000000..4945afdba8e
--- /dev/null
+++ b/target/mips/tcg/tcg-internal.h
@@ -0,0 +1,67 @@
+/*
+ * MIPS internal definitions and helpers (TCG accelerator)
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIPS_TCG_INTERNAL_H
+#define MIPS_TCG_INTERNAL_H
+
+#include "tcg/tcg.h"
+#include "exec/memattrs.h"
+#include "hw/core/cpu.h"
+#include "cpu.h"
+
+void mips_tcg_init(void);
+
+void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
+                                  MMUAccessType access_type, int mmu_idx,
+                                  uintptr_t retaddr) QEMU_NORETURN;
+
+const char *mips_exception_name(int32_t exception);
+
+void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env, MipsExcp exception,
+                                          int error_code, uintptr_t pc);
+
+static inline void QEMU_NORETURN do_raise_exception(CPUMIPSState *env,
+                                                    MipsExcp exception,
+                                                    uintptr_t pc)
+{
+    /* NOTE: pc is a HOST program counter (from GETPC()) and not a MIPS guest pc */
+    do_raise_exception_err(env, exception, env->error_code & EXCP_INST_NOTAVAIL, pc);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+void mips_cpu_do_interrupt(CPUState *cpu);
+bool mips_cpu_exec_interrupt(CPUState *cpu, int int_req);
+
+void mmu_init(CPUMIPSState *env, const mips_def_t *def);
+
+void update_pagemask(CPUMIPSState *env, target_ulong arg1, int32_t *pagemask);
+
+void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra);
+uint32_t cpu_mips_get_random(CPUMIPSState *env);
+
+bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb);
+
+hwaddr cpu_mips_translate_address(CPUMIPSState *env, target_ulong address,
+                                  MMUAccessType access_type, uintptr_t retaddr);
+void mips_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
+                                    vaddr addr, unsigned size,
+                                    MMUAccessType access_type,
+                                    int mmu_idx, MemTxAttrs attrs,
+                                    MemTxResult response, uintptr_t retaddr);
+void cpu_mips_tlb_flush(CPUMIPSState *env);
+
+bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr);
+
+#endif /* !CONFIG_USER_ONLY */
+
+#endif
diff --git a/target/mips/tcg/trace-events b/target/mips/tcg/trace-events
new file mode 100644
index 00000000000..0c55e0bbad2
--- /dev/null
+++ b/target/mips/tcg/trace-events
@@ -0,0 +1,5 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# translate.c
+mips_translate_c0(const char *instr, const char *rn, int reg, int sel) "%s %s (reg %d sel %d)"
+mips_translate_tr(const char *instr, int rt, int u, int sel, int h) "%s (reg %d u %d sel %d h %d)"
diff --git a/target/mips/tcg/trace.h b/target/mips/tcg/trace.h
new file mode 100644
index 00000000000..b8c6c4568ec
--- /dev/null
+++ b/target/mips/tcg/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-target_mips_tcg.h"
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
new file mode 100644
index 00000000000..a5d008e2c62
--- /dev/null
+++ b/target/mips/tcg/translate.c
@@ -0,0 +1,17270 @@
+/*
+ *  MIPS emulation for QEMU - main translation routines
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
+ *  Copyright (c) 2020 Philippe Mathieu-Daudé
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "tcg/tcg-op.h"
+#include "exec/translator.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "semihosting/semihost.h"
+
+#include "trace.h"
+#include "exec/translator.h"
+#include "exec/log.h"
+#include "exec/log_instr.h"
+#include "qemu/qemu-print.h"
+#include "fpu_helper.h"
+#include "translate.h"
+#ifdef TARGET_CHERI
+#include "cheri-helper-utils.h"
+#endif
+
+
+/*
+ * Many sysemu-only helpers are not reachable for user-only.
+ * Define stub generators here, so that we need not either sprinkle
+ * ifdefs through the translator, nor provide the helper function.
+ */
+#define STUB_HELPER(NAME, ...) \
+    static inline void gen_helper_##NAME(__VA_ARGS__) \
+    { g_assert_not_reached(); }
+
+#ifdef CONFIG_USER_ONLY
+STUB_HELPER(cache, TCGv_env env, TCGv val, TCGv_i32 reg)
+#endif
+
+enum {
+    /* indirect opcode tables */
+    OPC_SPECIAL  = (0x00 << 26),
+    OPC_REGIMM   = (0x01 << 26),
+    OPC_CP0      = (0x10 << 26),
+    OPC_CP2      = (0x12 << 26),
+    OPC_CP3      = (0x13 << 26),
+    OPC_SPECIAL2 = (0x1C << 26),
+    OPC_SPECIAL3 = (0x1F << 26),
+    /* arithmetic with immediate */
+    OPC_ADDI     = (0x08 << 26),
+    OPC_ADDIU    = (0x09 << 26),
+    OPC_SLTI     = (0x0A << 26),
+    OPC_SLTIU    = (0x0B << 26),
+    /* logic with immediate */
+    OPC_ANDI     = (0x0C << 26),
+    OPC_ORI      = (0x0D << 26),
+    OPC_XORI     = (0x0E << 26),
+    OPC_LUI      = (0x0F << 26),
+    /* arithmetic with immediate */
+    OPC_DADDI    = (0x18 << 26),
+    OPC_DADDIU   = (0x19 << 26),
+    /* Jump and branches */
+    OPC_J        = (0x02 << 26),
+    OPC_JAL      = (0x03 << 26),
+    OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
+    OPC_BEQL     = (0x14 << 26),
+    OPC_BNE      = (0x05 << 26),
+    OPC_BNEL     = (0x15 << 26),
+    OPC_BLEZ     = (0x06 << 26),
+    OPC_BLEZL    = (0x16 << 26),
+    OPC_BGTZ     = (0x07 << 26),
+    OPC_BGTZL    = (0x17 << 26),
+    OPC_JALX     = (0x1D << 26),
+    OPC_DAUI     = (0x1D << 26),
+    /* Load and stores */
+    OPC_LDL      = (0x1A << 26),
+    OPC_LDR      = (0x1B << 26),
+    OPC_LB       = (0x20 << 26),
+    OPC_LH       = (0x21 << 26),
+    OPC_LWL      = (0x22 << 26),
+    OPC_LW       = (0x23 << 26),
+    OPC_LWPC     = OPC_LW | 0x5,
+    OPC_LBU      = (0x24 << 26),
+    OPC_LHU      = (0x25 << 26),
+    OPC_LWR      = (0x26 << 26),
+    OPC_LWU      = (0x27 << 26),
+    OPC_SB       = (0x28 << 26),
+    OPC_SH       = (0x29 << 26),
+    OPC_SWL      = (0x2A << 26),
+    OPC_SW       = (0x2B << 26),
+    OPC_SDL      = (0x2C << 26),
+    OPC_SDR      = (0x2D << 26),
+    OPC_SWR      = (0x2E << 26),
+    OPC_LL       = (0x30 << 26),
+    OPC_LLD      = (0x34 << 26),
+    OPC_LD       = (0x37 << 26),
+    OPC_LDPC     = OPC_LD | 0x5,
+    OPC_SC       = (0x38 << 26),
+    OPC_SCD      = (0x3C << 26),
+    OPC_SD       = (0x3F << 26),
+    /* Floating point load/store */
+    OPC_LWC1     = (0x31 << 26),
+    OPC_LWC2     = (0x32 << 26),
+    OPC_LDC1     = (0x35 << 26),
+    OPC_LDC2     = (0x36 << 26),
+    OPC_SWC1     = (0x39 << 26),
+    OPC_SWC2     = (0x3A << 26),
+    OPC_SDC1     = (0x3D << 26),
+    OPC_SDC2     = (0x3E << 26),
+    /* Compact Branches */
+    OPC_BLEZALC  = (0x06 << 26),
+    OPC_BGEZALC  = (0x06 << 26),
+    OPC_BGEUC    = (0x06 << 26),
+    OPC_BGTZALC  = (0x07 << 26),
+    OPC_BLTZALC  = (0x07 << 26),
+    OPC_BLTUC    = (0x07 << 26),
+    OPC_BOVC     = (0x08 << 26),
+    OPC_BEQZALC  = (0x08 << 26),
+    OPC_BEQC     = (0x08 << 26),
+    OPC_BLEZC    = (0x16 << 26),
+    OPC_BGEZC    = (0x16 << 26),
+    OPC_BGEC     = (0x16 << 26),
+    OPC_BGTZC    = (0x17 << 26),
+    OPC_BLTZC    = (0x17 << 26),
+    OPC_BLTC     = (0x17 << 26),
+    OPC_BNVC     = (0x18 << 26),
+    OPC_BNEZALC  = (0x18 << 26),
+    OPC_BNEC     = (0x18 << 26),
+    OPC_BC       = (0x32 << 26),
+#if defined(TARGET_CHERI)
+    /* Load Via Capability Register */
+    OPC_CLOAD    = (0x32 << 26),
+    /* Load Capability Register */
+    OPC_CLOADC   = (0x36 << 26),
+    /* Store Via Capability Register */
+    OPC_CSTORE   = (0x3A << 26),
+    /* Store Capability Register */
+    OPC_CSTOREC  = (0x3E << 26),
+#endif /* TARGET_CHERI */
+    OPC_BEQZC    = (0x36 << 26),
+    OPC_JIC      = (0x36 << 26),
+    OPC_BALC     = (0x3A << 26),
+    OPC_BNEZC    = (0x3E << 26),
+    OPC_JIALC    = (0x3E << 26),
+    /* MDMX ASE specific */
+    OPC_MDMX     = (0x1E << 26),
+    /* Cache and prefetch */
+    OPC_CACHE    = (0x2F << 26),
+    OPC_PREF     = (0x33 << 26),
+    /* PC-relative address computation / loads */
+    OPC_PCREL    = (0x3B << 26),
+
+// XXXAR: experimental CHERI instructions
+#if defined(TARGET_CHERI)
+    // For the new experimental CLC we reuse the JALX (since mode switch to
+    // micromips is not supported) and DAUI (add upper immediate, MIPS64R6 only)
+    OPC_CLOADC_LargeImm = OPC_JALX,
+    // For the new large immediate CStoreC (may not be needed but symmetry is
+    // nice) we use
+    OPC_CSTOREC_LargeImm = OPC_MDMX,
+
+    // why can't this table be ordered by opcode number rather than group?
+    // would make it a lot easier to find conflicting values
+#endif
+};
+
+/* PC-relative address computation / loads  */
+#define MASK_OPC_PCREL_TOP2BITS(op) (MASK_OP_MAJOR(op) | (op & (3 << 19)))
+#define MASK_OPC_PCREL_TOP5BITS(op) (MASK_OP_MAJOR(op) | (op & (0x1f << 16)))
+enum {
+    /* Instructions determined by bits 19 and 20 */
+    OPC_ADDIUPC = OPC_PCREL | (0 << 19),
+    R6_OPC_LWPC = OPC_PCREL | (1 << 19),
+    OPC_LWUPC   = OPC_PCREL | (2 << 19),
+
+    /* Instructions determined by bits 16 ... 20 */
+    OPC_AUIPC   = OPC_PCREL | (0x1e << 16),
+    OPC_ALUIPC  = OPC_PCREL | (0x1f << 16),
+
+    /* Other */
+    R6_OPC_LDPC = OPC_PCREL | (6 << 18),
+};
+
+/* MIPS special opcodes */
+#define MASK_SPECIAL(op)            (MASK_OP_MAJOR(op) | (op & 0x3F))
+
+enum {
+    /* Shifts */
+    OPC_SLL      = 0x00 | OPC_SPECIAL,
+    /* NOP is SLL r0, r0, 0   */
+    /* SSNOP is SLL r0, r0, 1 */
+    /* EHB is SLL r0, r0, 3 */
+    OPC_SRL      = 0x02 | OPC_SPECIAL, /* also ROTR */
+    OPC_ROTR     = OPC_SRL | (1 << 21),
+    OPC_SRA      = 0x03 | OPC_SPECIAL,
+    OPC_SLLV     = 0x04 | OPC_SPECIAL,
+    OPC_SRLV     = 0x06 | OPC_SPECIAL, /* also ROTRV */
+    OPC_ROTRV    = OPC_SRLV | (1 << 6),
+    OPC_SRAV     = 0x07 | OPC_SPECIAL,
+    OPC_DSLLV    = 0x14 | OPC_SPECIAL,
+    OPC_DSRLV    = 0x16 | OPC_SPECIAL, /* also DROTRV */
+    OPC_DROTRV   = OPC_DSRLV | (1 << 6),
+    OPC_DSRAV    = 0x17 | OPC_SPECIAL,
+    OPC_DSLL     = 0x38 | OPC_SPECIAL,
+    OPC_DSRL     = 0x3A | OPC_SPECIAL, /* also DROTR */
+    OPC_DROTR    = OPC_DSRL | (1 << 21),
+    OPC_DSRA     = 0x3B | OPC_SPECIAL,
+    OPC_DSLL32   = 0x3C | OPC_SPECIAL,
+    OPC_DSRL32   = 0x3E | OPC_SPECIAL, /* also DROTR32 */
+    OPC_DROTR32  = OPC_DSRL32 | (1 << 21),
+    OPC_DSRA32   = 0x3F | OPC_SPECIAL,
+    /* Multiplication / division */
+    OPC_MULT     = 0x18 | OPC_SPECIAL,
+    OPC_MULTU    = 0x19 | OPC_SPECIAL,
+    OPC_DIV      = 0x1A | OPC_SPECIAL,
+    OPC_DIVU     = 0x1B | OPC_SPECIAL,
+    OPC_DMULT    = 0x1C | OPC_SPECIAL,
+    OPC_DMULTU   = 0x1D | OPC_SPECIAL,
+    OPC_DDIV     = 0x1E | OPC_SPECIAL,
+    OPC_DDIVU    = 0x1F | OPC_SPECIAL,
+
+    /* 2 registers arithmetic / logic */
+    OPC_ADD      = 0x20 | OPC_SPECIAL,
+    OPC_ADDU     = 0x21 | OPC_SPECIAL,
+    OPC_SUB      = 0x22 | OPC_SPECIAL,
+    OPC_SUBU     = 0x23 | OPC_SPECIAL,
+    OPC_AND      = 0x24 | OPC_SPECIAL,
+    OPC_OR       = 0x25 | OPC_SPECIAL,
+    OPC_XOR      = 0x26 | OPC_SPECIAL,
+    OPC_NOR      = 0x27 | OPC_SPECIAL,
+    OPC_SLT      = 0x2A | OPC_SPECIAL,
+    OPC_SLTU     = 0x2B | OPC_SPECIAL,
+    OPC_DADD     = 0x2C | OPC_SPECIAL,
+    OPC_DADDU    = 0x2D | OPC_SPECIAL,
+    OPC_DSUB     = 0x2E | OPC_SPECIAL,
+    OPC_DSUBU    = 0x2F | OPC_SPECIAL,
+    /* Jumps */
+    OPC_JR       = 0x08 | OPC_SPECIAL, /* Also JR.HB */
+    OPC_JALR     = 0x09 | OPC_SPECIAL, /* Also JALR.HB */
+    /* Traps */
+    OPC_TGE      = 0x30 | OPC_SPECIAL,
+    OPC_TGEU     = 0x31 | OPC_SPECIAL,
+    OPC_TLT      = 0x32 | OPC_SPECIAL,
+    OPC_TLTU     = 0x33 | OPC_SPECIAL,
+    OPC_TEQ      = 0x34 | OPC_SPECIAL,
+    OPC_TNE      = 0x36 | OPC_SPECIAL,
+    /* HI / LO registers load & stores */
+    OPC_MFHI     = 0x10 | OPC_SPECIAL,
+    OPC_MTHI     = 0x11 | OPC_SPECIAL,
+    OPC_MFLO     = 0x12 | OPC_SPECIAL,
+    OPC_MTLO     = 0x13 | OPC_SPECIAL,
+    /* Conditional moves */
+    OPC_MOVZ     = 0x0A | OPC_SPECIAL,
+    OPC_MOVN     = 0x0B | OPC_SPECIAL,
+
+    OPC_SELEQZ   = 0x35 | OPC_SPECIAL,
+    OPC_SELNEZ   = 0x37 | OPC_SPECIAL,
+
+    OPC_MOVCI    = 0x01 | OPC_SPECIAL,
+
+    /* Special */
+    OPC_PMON     = 0x05 | OPC_SPECIAL, /* unofficial */
+    OPC_SYSCALL  = 0x0C | OPC_SPECIAL,
+    OPC_BREAK    = 0x0D | OPC_SPECIAL,
+    OPC_SPIM     = 0x0E | OPC_SPECIAL, /* unofficial */
+    OPC_SYNC     = 0x0F | OPC_SPECIAL,
+
+    OPC_SPECIAL28_RESERVED = 0x28 | OPC_SPECIAL,
+    OPC_SPECIAL29_RESERVED = 0x29 | OPC_SPECIAL,
+    OPC_SPECIAL39_RESERVED = 0x39 | OPC_SPECIAL,
+    OPC_SPECIAL3D_RESERVED = 0x3D | OPC_SPECIAL,
+};
+
+/*
+ * R6 Multiply and Divide instructions have the same opcode
+ * and function field as legacy OPC_MULT[U]/OPC_DIV[U]
+ */
+#define MASK_R6_MULDIV(op)          (MASK_SPECIAL(op) | (op & (0x7ff)))
+
+enum {
+    R6_OPC_MUL   = OPC_MULT  | (2 << 6),
+    R6_OPC_MUH   = OPC_MULT  | (3 << 6),
+    R6_OPC_MULU  = OPC_MULTU | (2 << 6),
+    R6_OPC_MUHU  = OPC_MULTU | (3 << 6),
+    R6_OPC_DIV   = OPC_DIV   | (2 << 6),
+    R6_OPC_MOD   = OPC_DIV   | (3 << 6),
+    R6_OPC_DIVU  = OPC_DIVU  | (2 << 6),
+    R6_OPC_MODU  = OPC_DIVU  | (3 << 6),
+
+    R6_OPC_DMUL   = OPC_DMULT  | (2 << 6),
+    R6_OPC_DMUH   = OPC_DMULT  | (3 << 6),
+    R6_OPC_DMULU  = OPC_DMULTU | (2 << 6),
+    R6_OPC_DMUHU  = OPC_DMULTU | (3 << 6),
+    R6_OPC_DDIV   = OPC_DDIV   | (2 << 6),
+    R6_OPC_DMOD   = OPC_DDIV   | (3 << 6),
+    R6_OPC_DDIVU  = OPC_DDIVU  | (2 << 6),
+    R6_OPC_DMODU  = OPC_DDIVU  | (3 << 6),
+
+    R6_OPC_CLZ      = 0x10 | OPC_SPECIAL,
+    R6_OPC_CLO      = 0x11 | OPC_SPECIAL,
+    R6_OPC_DCLZ     = 0x12 | OPC_SPECIAL,
+    R6_OPC_DCLO     = 0x13 | OPC_SPECIAL,
+    R6_OPC_SDBBP    = 0x0e | OPC_SPECIAL,
+};
+
+/* REGIMM (rt field) opcodes */
+#define MASK_REGIMM(op)             (MASK_OP_MAJOR(op) | (op & (0x1F << 16)))
+
+enum {
+    OPC_BLTZ     = (0x00 << 16) | OPC_REGIMM,
+    OPC_BLTZL    = (0x02 << 16) | OPC_REGIMM,
+    OPC_BGEZ     = (0x01 << 16) | OPC_REGIMM,
+    OPC_BGEZL    = (0x03 << 16) | OPC_REGIMM,
+    OPC_BLTZAL   = (0x10 << 16) | OPC_REGIMM,
+    OPC_BLTZALL  = (0x12 << 16) | OPC_REGIMM,
+    OPC_BGEZAL   = (0x11 << 16) | OPC_REGIMM,
+    OPC_BGEZALL  = (0x13 << 16) | OPC_REGIMM,
+    OPC_TGEI     = (0x08 << 16) | OPC_REGIMM,
+    OPC_TGEIU    = (0x09 << 16) | OPC_REGIMM,
+    OPC_TLTI     = (0x0A << 16) | OPC_REGIMM,
+    OPC_TLTIU    = (0x0B << 16) | OPC_REGIMM,
+    OPC_TEQI     = (0x0C << 16) | OPC_REGIMM,
+    OPC_TNEI     = (0x0E << 16) | OPC_REGIMM,
+    OPC_SIGRIE   = (0x17 << 16) | OPC_REGIMM,
+    OPC_SYNCI    = (0x1F << 16) | OPC_REGIMM,
+
+    OPC_DAHI     = (0x06 << 16) | OPC_REGIMM,
+    OPC_DATI     = (0x1e << 16) | OPC_REGIMM,
+};
+
+/* Special2 opcodes */
+#define MASK_SPECIAL2(op)           (MASK_OP_MAJOR(op) | (op & 0x3F))
+
+enum {
+    /* Multiply & xxx operations */
+    OPC_MADD     = 0x00 | OPC_SPECIAL2,
+    OPC_MADDU    = 0x01 | OPC_SPECIAL2,
+    OPC_MUL      = 0x02 | OPC_SPECIAL2,
+    OPC_MSUB     = 0x04 | OPC_SPECIAL2,
+    OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
+    /* Loongson 2F */
+    OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
+    OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
+    OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
+    OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
+    OPC_DIV_G_2F    = 0x14 | OPC_SPECIAL2,
+    OPC_DDIV_G_2F   = 0x15 | OPC_SPECIAL2,
+    OPC_DIVU_G_2F   = 0x16 | OPC_SPECIAL2,
+    OPC_DDIVU_G_2F  = 0x17 | OPC_SPECIAL2,
+    OPC_MOD_G_2F    = 0x1c | OPC_SPECIAL2,
+    OPC_DMOD_G_2F   = 0x1d | OPC_SPECIAL2,
+    OPC_MODU_G_2F   = 0x1e | OPC_SPECIAL2,
+    OPC_DMODU_G_2F  = 0x1f | OPC_SPECIAL2,
+    /* Misc */
+    OPC_CLZ      = 0x20 | OPC_SPECIAL2,
+    OPC_CLO      = 0x21 | OPC_SPECIAL2,
+    OPC_DCLZ     = 0x24 | OPC_SPECIAL2,
+    OPC_DCLO     = 0x25 | OPC_SPECIAL2,
+    /* Special */
+    OPC_SDBBP    = 0x3F | OPC_SPECIAL2,
+};
+
+/* Special3 opcodes */
+#define MASK_SPECIAL3(op)           (MASK_OP_MAJOR(op) | (op & 0x3F))
+
+enum {
+    OPC_EXT      = 0x00 | OPC_SPECIAL3,
+    OPC_DEXTM    = 0x01 | OPC_SPECIAL3,
+    OPC_DEXTU    = 0x02 | OPC_SPECIAL3,
+    OPC_DEXT     = 0x03 | OPC_SPECIAL3,
+    OPC_INS      = 0x04 | OPC_SPECIAL3,
+    OPC_DINSM    = 0x05 | OPC_SPECIAL3,
+    OPC_DINSU    = 0x06 | OPC_SPECIAL3,
+    OPC_DINS     = 0x07 | OPC_SPECIAL3,
+    OPC_FORK     = 0x08 | OPC_SPECIAL3,
+    OPC_YIELD    = 0x09 | OPC_SPECIAL3,
+    OPC_BSHFL    = 0x20 | OPC_SPECIAL3,
+    OPC_DBSHFL   = 0x24 | OPC_SPECIAL3,
+    OPC_RDHWR    = 0x3B | OPC_SPECIAL3,
+    OPC_GINV     = 0x3D | OPC_SPECIAL3,
+
+    /* Loongson 2E */
+    OPC_MULT_G_2E   = 0x18 | OPC_SPECIAL3,
+    OPC_MULTU_G_2E  = 0x19 | OPC_SPECIAL3,
+    OPC_DIV_G_2E    = 0x1A | OPC_SPECIAL3,
+    OPC_DIVU_G_2E   = 0x1B | OPC_SPECIAL3,
+    OPC_DMULT_G_2E  = 0x1C | OPC_SPECIAL3,
+    OPC_DMULTU_G_2E = 0x1D | OPC_SPECIAL3,
+    OPC_DDIV_G_2E   = 0x1E | OPC_SPECIAL3,
+    OPC_DDIVU_G_2E  = 0x1F | OPC_SPECIAL3,
+    OPC_MOD_G_2E    = 0x22 | OPC_SPECIAL3,
+    OPC_MODU_G_2E   = 0x23 | OPC_SPECIAL3,
+    OPC_DMOD_G_2E   = 0x26 | OPC_SPECIAL3,
+    OPC_DMODU_G_2E  = 0x27 | OPC_SPECIAL3,
+
+    /* MIPS DSP Load */
+    OPC_LX_DSP         = 0x0A | OPC_SPECIAL3,
+    /* MIPS DSP Arithmetic */
+    OPC_ADDU_QB_DSP    = 0x10 | OPC_SPECIAL3,
+    OPC_ADDU_OB_DSP    = 0x14 | OPC_SPECIAL3,
+    OPC_ABSQ_S_PH_DSP  = 0x12 | OPC_SPECIAL3,
+    OPC_ABSQ_S_QH_DSP  = 0x16 | OPC_SPECIAL3,
+    /* OPC_ADDUH_QB_DSP is same as OPC_MULT_G_2E.  */
+    /* OPC_ADDUH_QB_DSP   = 0x18 | OPC_SPECIAL3,  */
+    OPC_CMPU_EQ_QB_DSP = 0x11 | OPC_SPECIAL3,
+    OPC_CMPU_EQ_OB_DSP = 0x15 | OPC_SPECIAL3,
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_QB_DSP    = 0x13 | OPC_SPECIAL3,
+    OPC_SHLL_OB_DSP    = 0x17 | OPC_SPECIAL3,
+    /* MIPS DSP Multiply Sub-class insns */
+    /* OPC_MUL_PH_DSP is same as OPC_ADDUH_QB_DSP.  */
+    /* OPC_MUL_PH_DSP     = 0x18 | OPC_SPECIAL3,  */
+    OPC_DPA_W_PH_DSP   = 0x30 | OPC_SPECIAL3,
+    OPC_DPAQ_W_QH_DSP  = 0x34 | OPC_SPECIAL3,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_INSV_DSP       = 0x0C | OPC_SPECIAL3,
+    OPC_DINSV_DSP      = 0x0D | OPC_SPECIAL3,
+    /* MIPS DSP Append Sub-class */
+    OPC_APPEND_DSP     = 0x31 | OPC_SPECIAL3,
+    OPC_DAPPEND_DSP    = 0x35 | OPC_SPECIAL3,
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_EXTR_W_DSP     = 0x38 | OPC_SPECIAL3,
+    OPC_DEXTR_W_DSP    = 0x3C | OPC_SPECIAL3,
+
+    /* EVA */
+    OPC_LWLE           = 0x19 | OPC_SPECIAL3,
+    OPC_LWRE           = 0x1A | OPC_SPECIAL3,
+    OPC_CACHEE         = 0x1B | OPC_SPECIAL3,
+    OPC_SBE            = 0x1C | OPC_SPECIAL3,
+    OPC_SHE            = 0x1D | OPC_SPECIAL3,
+    OPC_SCE            = 0x1E | OPC_SPECIAL3,
+    OPC_SWE            = 0x1F | OPC_SPECIAL3,
+    OPC_SWLE           = 0x21 | OPC_SPECIAL3,
+    OPC_SWRE           = 0x22 | OPC_SPECIAL3,
+    OPC_PREFE          = 0x23 | OPC_SPECIAL3,
+    OPC_LBUE           = 0x28 | OPC_SPECIAL3,
+    OPC_LHUE           = 0x29 | OPC_SPECIAL3,
+    OPC_LBE            = 0x2C | OPC_SPECIAL3,
+    OPC_LHE            = 0x2D | OPC_SPECIAL3,
+    OPC_LLE            = 0x2E | OPC_SPECIAL3,
+    OPC_LWE            = 0x2F | OPC_SPECIAL3,
+
+    /* R6 */
+    R6_OPC_PREF        = 0x35 | OPC_SPECIAL3,
+    R6_OPC_CACHE       = 0x25 | OPC_SPECIAL3,
+    R6_OPC_LL          = 0x36 | OPC_SPECIAL3,
+    R6_OPC_SC          = 0x26 | OPC_SPECIAL3,
+    R6_OPC_LLD         = 0x37 | OPC_SPECIAL3,
+    R6_OPC_SCD         = 0x27 | OPC_SPECIAL3,
+};
+
+/* Loongson EXT load/store quad word opcodes */
+#define MASK_LOONGSON_GSLSQ(op)           (MASK_OP_MAJOR(op) | (op & 0x8020))
+enum {
+    OPC_GSLQ        = 0x0020 | OPC_LWC2,
+    OPC_GSLQC1      = 0x8020 | OPC_LWC2,
+    OPC_GSSHFL      = OPC_LWC2,
+    OPC_GSSQ        = 0x0020 | OPC_SWC2,
+    OPC_GSSQC1      = 0x8020 | OPC_SWC2,
+    OPC_GSSHFS      = OPC_SWC2,
+};
+
+/* Loongson EXT shifted load/store opcodes */
+#define MASK_LOONGSON_GSSHFLS(op)         (MASK_OP_MAJOR(op) | (op & 0xc03f))
+enum {
+    OPC_GSLWLC1     = 0x4 | OPC_GSSHFL,
+    OPC_GSLWRC1     = 0x5 | OPC_GSSHFL,
+    OPC_GSLDLC1     = 0x6 | OPC_GSSHFL,
+    OPC_GSLDRC1     = 0x7 | OPC_GSSHFL,
+    OPC_GSSWLC1     = 0x4 | OPC_GSSHFS,
+    OPC_GSSWRC1     = 0x5 | OPC_GSSHFS,
+    OPC_GSSDLC1     = 0x6 | OPC_GSSHFS,
+    OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
+};
+
+/* Loongson EXT LDC2/SDC2 opcodes */
+#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
+
+enum {
+    OPC_GSLBX      = 0x0 | OPC_LDC2,
+    OPC_GSLHX      = 0x1 | OPC_LDC2,
+    OPC_GSLWX      = 0x2 | OPC_LDC2,
+    OPC_GSLDX      = 0x3 | OPC_LDC2,
+    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
+    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
+    OPC_GSSBX      = 0x0 | OPC_SDC2,
+    OPC_GSSHX      = 0x1 | OPC_SDC2,
+    OPC_GSSWX      = 0x2 | OPC_SDC2,
+    OPC_GSSDX      = 0x3 | OPC_SDC2,
+    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
+    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
+};
+
+/* BSHFL opcodes */
+#define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+
+enum {
+    OPC_WSBH      = (0x02 << 6) | OPC_BSHFL,
+    OPC_SEB       = (0x10 << 6) | OPC_BSHFL,
+    OPC_SEH       = (0x18 << 6) | OPC_BSHFL,
+    OPC_ALIGN     = (0x08 << 6) | OPC_BSHFL, /* 010.bp (010.00 to 010.11) */
+    OPC_ALIGN_1   = (0x09 << 6) | OPC_BSHFL,
+    OPC_ALIGN_2   = (0x0A << 6) | OPC_BSHFL,
+    OPC_ALIGN_3   = (0x0B << 6) | OPC_BSHFL,
+    OPC_BITSWAP   = (0x00 << 6) | OPC_BSHFL  /* 00000 */
+};
+
+/* DBSHFL opcodes */
+#define MASK_DBSHFL(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+
+enum {
+    OPC_DSBH       = (0x02 << 6) | OPC_DBSHFL,
+    OPC_DSHD       = (0x05 << 6) | OPC_DBSHFL,
+    OPC_DALIGN     = (0x08 << 6) | OPC_DBSHFL, /* 01.bp (01.000 to 01.111) */
+    OPC_DALIGN_1   = (0x09 << 6) | OPC_DBSHFL,
+    OPC_DALIGN_2   = (0x0A << 6) | OPC_DBSHFL,
+    OPC_DALIGN_3   = (0x0B << 6) | OPC_DBSHFL,
+    OPC_DALIGN_4   = (0x0C << 6) | OPC_DBSHFL,
+    OPC_DALIGN_5   = (0x0D << 6) | OPC_DBSHFL,
+    OPC_DALIGN_6   = (0x0E << 6) | OPC_DBSHFL,
+    OPC_DALIGN_7   = (0x0F << 6) | OPC_DBSHFL,
+    OPC_DBITSWAP   = (0x00 << 6) | OPC_DBSHFL, /* 00000 */
+};
+
+/* MIPS DSP REGIMM opcodes */
+enum {
+    OPC_BPOSGE32 = (0x1C << 16) | OPC_REGIMM,
+    OPC_BPOSGE64 = (0x1D << 16) | OPC_REGIMM,
+};
+
+#define MASK_LX(op)                 (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+/* MIPS DSP Load */
+enum {
+    OPC_LBUX = (0x06 << 6) | OPC_LX_DSP,
+    OPC_LHX  = (0x04 << 6) | OPC_LX_DSP,
+    OPC_LWX  = (0x00 << 6) | OPC_LX_DSP,
+    OPC_LDX = (0x08 << 6) | OPC_LX_DSP,
+};
+
+#define MASK_ADDU_QB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ADDQ_PH        = (0x0A << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDQ_S_PH      = (0x0E << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDQ_S_W       = (0x16 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_QB        = (0x00 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_S_QB      = (0x04 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_PH        = (0x08 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_S_PH      = (0x0C << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_PH        = (0x0B << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_S_PH      = (0x0F << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_S_W       = (0x17 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_QB        = (0x01 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_S_QB      = (0x05 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_PH        = (0x09 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_S_PH      = (0x0D << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDSC          = (0x10 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDWC          = (0x11 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MODSUB         = (0x12 << 6) | OPC_ADDU_QB_DSP,
+    OPC_RADDU_W_QB     = (0x14 << 6) | OPC_ADDU_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEU_S_PH_QBL = (0x06 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEU_S_PH_QBR = (0x07 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_RS_PH     = (0x1F << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHL  = (0x1C << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHR  = (0x1D << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_S_PH      = (0x1E << 6) | OPC_ADDU_QB_DSP,
+};
+
+#define OPC_ADDUH_QB_DSP OPC_MULT_G_2E
+#define MASK_ADDUH_QB(op)           (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ADDUH_QB   = (0x00 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDUH_R_QB = (0x02 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_PH   = (0x08 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_R_PH = (0x0A << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_W    = (0x10 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_R_W  = (0x12 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBUH_QB   = (0x01 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBUH_R_QB = (0x03 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_PH   = (0x09 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_R_PH = (0x0B << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_W    = (0x11 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_R_W  = (0x13 << 6) | OPC_ADDUH_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MUL_PH     = (0x0C << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MUL_S_PH   = (0x0E << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_S_W   = (0x16 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_RS_W  = (0x17 << 6) | OPC_ADDUH_QB_DSP,
+};
+
+#define MASK_ABSQ_S_PH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ABSQ_S_QB       = (0x01 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_ABSQ_S_PH       = (0x09 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_ABSQ_S_W        = (0x11 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQ_W_PHL    = (0x0C << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQ_W_PHR    = (0x0D << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBL  = (0x04 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBR  = (0x05 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBLA = (0x06 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBRA = (0x07 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBL   = (0x1C << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBR   = (0x1D << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBLA  = (0x1E << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBRA  = (0x1F << 6) | OPC_ABSQ_S_PH_DSP,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_BITREV          = (0x1B << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPL_QB         = (0x02 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPLV_QB        = (0x03 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPL_PH         = (0x0A << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPLV_PH        = (0x0B << 6) | OPC_ABSQ_S_PH_DSP,
+};
+
+#define MASK_CMPU_EQ_QB(op)         (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECR_QB_PH      = (0x0D << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_QB_PH     = (0x0C << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECR_SRA_PH_W   = (0x1E << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECR_SRA_R_PH_W = (0x1F << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_PH_W      = (0x14 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_RS_PH_W   = (0x15 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQU_S_QB_PH  = (0x0F << 6) | OPC_CMPU_EQ_QB_DSP,
+    /* DSP Compare-Pick Sub-class */
+    OPC_CMPU_EQ_QB       = (0x00 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPU_LT_QB       = (0x01 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPU_LE_QB       = (0x02 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_EQ_QB      = (0x04 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_LT_QB      = (0x05 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_LE_QB      = (0x06 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_EQ_QB     = (0x18 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_LT_QB     = (0x19 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_LE_QB     = (0x1A << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_EQ_PH        = (0x08 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_LT_PH        = (0x09 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_LE_PH        = (0x0A << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PICK_QB          = (0x03 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PICK_PH          = (0x0B << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PACKRL_PH        = (0x0E << 6) | OPC_CMPU_EQ_QB_DSP,
+};
+
+#define MASK_SHLL_QB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_QB    = (0x00 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_QB   = (0x02 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_PH    = (0x08 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_PH   = (0x0A << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_S_PH  = (0x0C << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_S_PH = (0x0E << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_S_W   = (0x14 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_S_W  = (0x16 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRL_QB    = (0x01 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRLV_QB   = (0x03 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRL_PH    = (0x19 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRLV_PH   = (0x1B << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_QB    = (0x04 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_QB  = (0x05 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_QB   = (0x06 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_QB = (0x07 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_PH    = (0x09 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_PH   = (0x0B << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_PH  = (0x0D << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_PH = (0x0F << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_W   = (0x15 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_W  = (0x17 << 6) | OPC_SHLL_QB_DSP,
+};
+
+#define MASK_DPA_W_PH(op)           (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DPAU_H_QBL    = (0x03 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAU_H_QBR    = (0x07 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBL    = (0x0B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBR    = (0x0F << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPA_W_PH      = (0x00 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAX_W_PH     = (0x08 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_S_W_PH   = (0x04 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_S_W_PH  = (0x18 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_SA_W_PH = (0x1A << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPS_W_PH      = (0x01 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSX_W_PH     = (0x09 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_S_W_PH   = (0x05 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_S_W_PH  = (0x19 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_SA_W_PH = (0x1B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSAQ_S_W_PH = (0x06 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_SA_L_W   = (0x0C << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_SA_L_W   = (0x0D << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHL   = (0x14 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHR   = (0x16 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHL  = (0x10 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHR  = (0x12 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
+};
+
+#define MASK_INSV(op)               (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_INSV = (0x00 << 6) | OPC_INSV_DSP,
+};
+
+#define MASK_APPEND(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Append Sub-class */
+    OPC_APPEND  = (0x00 << 6) | OPC_APPEND_DSP,
+    OPC_PREPEND = (0x01 << 6) | OPC_APPEND_DSP,
+    OPC_BALIGN  = (0x10 << 6) | OPC_APPEND_DSP,
+};
+
+#define MASK_EXTR_W(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_EXTR_W     = (0x00 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_R_W   = (0x04 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_RS_W  = (0x06 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_S_H   = (0x0E << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_S_H  = (0x0F << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_W    = (0x01 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_R_W  = (0x05 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_RS_W = (0x07 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTP       = (0x02 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPV      = (0x03 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPDP     = (0x0A << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPDPV    = (0x0B << 6) | OPC_EXTR_W_DSP,
+    OPC_SHILO      = (0x1A << 6) | OPC_EXTR_W_DSP,
+    OPC_SHILOV     = (0x1B << 6) | OPC_EXTR_W_DSP,
+    OPC_MTHLIP     = (0x1F << 6) | OPC_EXTR_W_DSP,
+    OPC_WRDSP      = (0x13 << 6) | OPC_EXTR_W_DSP,
+    OPC_RDDSP      = (0x12 << 6) | OPC_EXTR_W_DSP,
+};
+
+#define MASK_ABSQ_S_QH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECEQ_L_PWL    = (0x14 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_L_PWR    = (0x15 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHL   = (0x0C << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHR   = (0x0D << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHLA  = (0x0E << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHRA  = (0x0F << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBL  = (0x04 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBR  = (0x05 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBLA = (0x06 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBRA = (0x07 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBL   = (0x1C << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBR   = (0x1D << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBLA  = (0x1E << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBRA  = (0x1F << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_OB       = (0x01 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_PW       = (0x11 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_QH       = (0x09 << 6) | OPC_ABSQ_S_QH_DSP,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_REPL_OB         = (0x02 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPL_PW         = (0x12 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPL_QH         = (0x0A << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_OB        = (0x03 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_PW        = (0x13 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_QH        = (0x0B << 6) | OPC_ABSQ_S_QH_DSP,
+};
+
+#define MASK_ADDU_OB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEQ_S_PW_QHL = (0x1C << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEQ_S_PW_QHR = (0x1D << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBL = (0x06 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBR = (0x07 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULQ_RS_QH     = (0x1F << 6) | OPC_ADDU_OB_DSP,
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_RADDU_L_OB     = (0x14 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_PW        = (0x13 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_S_PW      = (0x17 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_QH        = (0x0B << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_S_QH      = (0x0F << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_OB        = (0x01 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_S_OB      = (0x05 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_QH        = (0x09 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_S_QH      = (0x0D << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBUH_OB       = (0x19 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBUH_R_OB     = (0x1B << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_PW        = (0x12 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_S_PW      = (0x16 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_QH        = (0x0A << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_S_QH      = (0x0E << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_OB        = (0x00 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_S_OB      = (0x04 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_QH        = (0x08 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_S_QH      = (0x0C << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDUH_OB       = (0x18 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDUH_R_OB     = (0x1A << 6) | OPC_ADDU_OB_DSP,
+};
+
+#define MASK_CMPU_EQ_OB(op)         (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Compare-Pick Sub-class */
+    OPC_CMP_EQ_PW         = (0x10 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LT_PW         = (0x11 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LE_PW         = (0x12 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_EQ_QH         = (0x08 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LT_QH         = (0x09 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LE_QH         = (0x0A << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_EQ_OB      = (0x18 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_LT_OB      = (0x19 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_LE_OB      = (0x1A << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_EQ_OB       = (0x04 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_LT_OB       = (0x05 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_LE_OB       = (0x06 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_EQ_OB        = (0x00 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_LT_OB        = (0x01 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_LE_OB        = (0x02 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PACKRL_PW         = (0x0E << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_OB           = (0x03 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_PW           = (0x13 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_QH           = (0x0B << 6) | OPC_CMPU_EQ_OB_DSP,
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECR_OB_QH       = (0x0D << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECR_SRA_QH_PW   = (0x1E << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECR_SRA_R_QH_PW = (0x1F << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_OB_QH      = (0x0C << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_PW_L       = (0x1C << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_QH_PW      = (0x14 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_RS_QH_PW   = (0x15 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQU_S_OB_QH   = (0x0F << 6) | OPC_CMPU_EQ_OB_DSP,
+};
+
+#define MASK_DAPPEND(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Append Sub-class */
+    OPC_DAPPEND  = (0x00 << 6) | OPC_DAPPEND_DSP,
+    OPC_PREPENDD = (0x03 << 6) | OPC_DAPPEND_DSP,
+    OPC_PREPENDW = (0x01 << 6) | OPC_DAPPEND_DSP,
+    OPC_DBALIGN  = (0x10 << 6) | OPC_DAPPEND_DSP,
+};
+
+#define MASK_DEXTR_W(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_DMTHLIP     = (0x1F << 6) | OPC_DEXTR_W_DSP,
+    OPC_DSHILO      = (0x1A << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTP       = (0x02 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPDP     = (0x0A << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPDPV    = (0x0B << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPV      = (0x03 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_L     = (0x10 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_R_L   = (0x14 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_RS_L  = (0x16 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_W     = (0x00 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_R_W   = (0x04 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_RS_W  = (0x06 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_S_H   = (0x0E << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_L    = (0x11 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_R_L  = (0x15 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_RS_L = (0x17 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_S_H  = (0x0F << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_W    = (0x01 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_R_W  = (0x05 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_RS_W = (0x07 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DSHILOV     = (0x1B << 6) | OPC_DEXTR_W_DSP,
+};
+
+#define MASK_DINSV(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_DINSV = (0x00 << 6) | OPC_DINSV_DSP,
+};
+
+#define MASK_DPAQ_W_QH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DMADD         = (0x19 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMADDU        = (0x1D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUB         = (0x1B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUBU        = (0x1F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPA_W_QH      = (0x00 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_S_W_QH   = (0x04 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_SA_L_PW  = (0x0C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBL    = (0x03 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBR    = (0x07 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPS_W_QH      = (0x01 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_S_W_QH   = (0x05 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_SA_L_PW  = (0x0D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBL    = (0x0B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBR    = (0x0F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWL   = (0x1C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWR   = (0x1E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLL  = (0x14 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLL = (0x10 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLR  = (0x15 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLR = (0x11 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRL  = (0x16 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRL = (0x12 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRR  = (0x17 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRR = (0x13 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_L_PW = (0x0E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_W_QH = (0x06 << 6) | OPC_DPAQ_W_QH_DSP,
+};
+
+#define MASK_SHLL_OB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_PW    = (0x10 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_S_PW  = (0x14 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_OB   = (0x02 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_PW   = (0x12 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_S_PW = (0x16 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_QH   = (0x0A << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_S_QH = (0x0E << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_PW    = (0x11 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_PW  = (0x15 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_OB   = (0x06 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_OB = (0x07 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_PW   = (0x13 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_PW = (0x17 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_QH   = (0x0B << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_QH = (0x0F << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRLV_OB   = (0x03 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRLV_QH   = (0x1B << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_OB    = (0x00 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_QH    = (0x08 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_S_QH  = (0x0C << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_OB    = (0x04 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_OB  = (0x05 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_QH    = (0x09 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_QH  = (0x0D << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRL_OB    = (0x01 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRL_QH    = (0x19 << 6) | OPC_SHLL_OB_DSP,
+};
+
+/* Coprocessor 0 (rs field) */
+#define MASK_CP0(op)                (MASK_OP_MAJOR(op) | (op & (0x1F << 21)))
+
+enum {
+    OPC_MFC0     = (0x00 << 21) | OPC_CP0,
+    OPC_DMFC0    = (0x01 << 21) | OPC_CP0,
+    OPC_MFHC0    = (0x02 << 21) | OPC_CP0,
+    OPC_MTC0     = (0x04 << 21) | OPC_CP0,
+    OPC_DMTC0    = (0x05 << 21) | OPC_CP0,
+    OPC_MTHC0    = (0x06 << 21) | OPC_CP0,
+    OPC_MFTR     = (0x08 << 21) | OPC_CP0,
+    OPC_RDPGPR   = (0x0A << 21) | OPC_CP0,
+    OPC_MFMC0    = (0x0B << 21) | OPC_CP0,
+    OPC_MTTR     = (0x0C << 21) | OPC_CP0,
+    OPC_WRPGPR   = (0x0E << 21) | OPC_CP0,
+    OPC_C0       = (0x10 << 21) | OPC_CP0,
+    OPC_C0_1     = (0x11 << 21) | OPC_CP0,
+    OPC_C0_2     = (0x12 << 21) | OPC_CP0,
+    OPC_C0_3     = (0x13 << 21) | OPC_CP0,
+    OPC_C0_4     = (0x14 << 21) | OPC_CP0,
+    OPC_C0_5     = (0x15 << 21) | OPC_CP0,
+    OPC_C0_6     = (0x16 << 21) | OPC_CP0,
+    OPC_C0_7     = (0x17 << 21) | OPC_CP0,
+    OPC_C0_8     = (0x18 << 21) | OPC_CP0,
+    OPC_C0_9     = (0x19 << 21) | OPC_CP0,
+    OPC_C0_A     = (0x1A << 21) | OPC_CP0,
+    OPC_C0_B     = (0x1B << 21) | OPC_CP0,
+    OPC_C0_C     = (0x1C << 21) | OPC_CP0,
+    OPC_C0_D     = (0x1D << 21) | OPC_CP0,
+    OPC_C0_E     = (0x1E << 21) | OPC_CP0,
+    OPC_C0_F     = (0x1F << 21) | OPC_CP0,
+};
+
+/* MFMC0 opcodes */
+#define MASK_MFMC0(op)              (MASK_CP0(op) | (op & 0xFFFF))
+
+enum {
+    OPC_DMT      = 0x01 | (0 << 5) | (0x0F << 6) | (0x01 << 11) | OPC_MFMC0,
+    OPC_EMT      = 0x01 | (1 << 5) | (0x0F << 6) | (0x01 << 11) | OPC_MFMC0,
+    OPC_DVPE     = 0x01 | (0 << 5) | OPC_MFMC0,
+    OPC_EVPE     = 0x01 | (1 << 5) | OPC_MFMC0,
+    OPC_DI       = (0 << 5) | (0x0C << 11) | OPC_MFMC0,
+    OPC_EI       = (1 << 5) | (0x0C << 11) | OPC_MFMC0,
+    OPC_DVP      = 0x04 | (0 << 3) | (1 << 5) | (0 << 11) | OPC_MFMC0,
+    OPC_EVP      = 0x04 | (0 << 3) | (0 << 5) | (0 << 11) | OPC_MFMC0,
+};
+
+/* Coprocessor 0 (with rs == C0) */
+#define MASK_C0(op)                 (MASK_CP0(op) | (op & 0x3F))
+
+enum {
+    OPC_TLBR     = 0x01 | OPC_C0,
+    OPC_TLBWI    = 0x02 | OPC_C0,
+    OPC_TLBINV   = 0x03 | OPC_C0,
+    OPC_TLBINVF  = 0x04 | OPC_C0,
+    OPC_TLBWR    = 0x06 | OPC_C0,
+    OPC_TLBP     = 0x08 | OPC_C0,
+    OPC_RFE      = 0x10 | OPC_C0,
+    OPC_ERET     = 0x18 | OPC_C0,
+    OPC_DERET    = 0x1F | OPC_C0,
+    OPC_WAIT     = 0x20 | OPC_C0,
+};
+
+#define MASK_CP2(op)                (MASK_OP_MAJOR(op) | (op & (0x1F << 21)))
+
+enum {
+    OPC_MFC2    = (0x00 << 21) | OPC_CP2,
+    OPC_DMFC2   = (0x01 << 21) | OPC_CP2,
+    OPC_CFC2    = (0x02 << 21) | OPC_CP2,
+    OPC_MFHC2   = (0x03 << 21) | OPC_CP2,
+    OPC_MTC2    = (0x04 << 21) | OPC_CP2,
+    OPC_DMTC2   = (0x05 << 21) | OPC_CP2,
+    OPC_CTC2    = (0x06 << 21) | OPC_CP2,
+    OPC_MTHC2   = (0x07 << 21) | OPC_CP2,
+    OPC_BC2     = (0x08 << 21) | OPC_CP2,
+    OPC_BC2EQZ  = (0x09 << 21) | OPC_CP2,
+    OPC_BC2NEZ  = (0x0D << 21) | OPC_CP2,
+};
+
+#if defined(TARGET_CHERI)
+
+#define MASK_CAP3(op)       (MASK_CP2(op) | ((op) & 0x7))
+#define MASK_CAP4(op)       (MASK_CP2(op) | ((op) & 0xf))
+#define MASK_CAP6(op)       (MASK_CP2(op) | ((op) & 0x3f))
+#define MASK_CAP7(op)       (MASK_CP2(op) | ((op) & (0x1f << 6)) |  (0x3f))
+#define MASK_CAP8(op)       (MASK_CP2(op) | ((op) & (0x1f << 11)) | \
+        (0x1f << 6) |  (0x3f))
+
+enum {
+    OPC_CGET        = OPC_CP2 | (0x00 << 21),
+    OPC_CSETBOUNDS  = OPC_CP2 | (0x01 << 21),
+    OPC_CSEAL       = OPC_CP2 | (0x02 << 21),
+    OPC_CUNSEAL     = OPC_CP2 | (0x03 << 21),
+    OPC_CMISC       = OPC_CP2 | (0x04 << 21),
+    OPC_CCALL       = OPC_CP2 | (0x05 << 21),
+    OPC_CRETURN     = OPC_CP2 | (0x06 << 21),
+    OPC_CJALR       = OPC_CP2 | (0x07 << 21),
+    OPC_CJR         = OPC_CP2 | (0x08 << 21),
+    OPC_CBTU        = OPC_CP2 | (0x09 << 21),
+    OPC_CBTS        = OPC_CP2 | (0x0a << 21),
+    OPC_CCHECK      = OPC_CP2 | (0x0b << 21),
+    OPC_CTOPTR      = OPC_CP2 | (0x0c << 21),
+    OPC_COFFSET     = OPC_CP2 | (0x0d << 21),
+    OPC_CPTRCMP     = OPC_CP2 | (0x0e << 21),
+    OPC_CCLEARREGS  = OPC_CP2 | (0x0f << 21),
+    OPC_CLL         = OPC_CP2 | (0x10 << 21),
+};
+
+enum {
+    OPC_CGETPERM        = OPC_CGET | (0x00),
+    OPC_CGETTYPE        = OPC_CGET | (0x01),
+    OPC_CGETBASE        = OPC_CGET | (0x02),
+    OPC_CGETLEN         = OPC_CGET | (0x03),
+    OPC_CGETCAUSE       = OPC_CGET | (0x04),
+    OPC_CGETTAG         = OPC_CGET | (0x05),
+    OPC_CGETSEALED      = OPC_CGET | (0x06),
+    OPC_CGETPCC         = OPC_CGET | (0x07),
+
+    OPC_CSETBOUNDSEXACT = OPC_CGET | (0x09),
+    OPC_CSUB            = OPC_CGET | (0x0a),
+};
+
+enum {
+    OPC_CANDPERM    = OPC_CMISC | (0x0),
+    /* 0x2 was CIncBase (prior to CHERI-128) */
+    /* 0x3 was CSetLen (prior to CHERI-128) */
+    OPC_CSETCAUSE   = OPC_CMISC | (0x4),
+    OPC_CCLEARTAG   = OPC_CMISC | (0x5),
+    OPC_MTC2SEL6    = OPC_CMISC | (0x6),
+    OPC_CFROMPTR    = OPC_CMISC | (0x7),
+};
+
+enum {
+    OPC_CCHECKPERM  = OPC_CCHECK | (0x0),
+    OPC_CCHECKTYPE  = OPC_CCHECK | (0x1),
+};
+
+enum {
+    OPC_CINCOFFSET  = OPC_COFFSET | (0x0),
+    OPC_CSETOFFSET  = OPC_COFFSET | (0x1),
+    OPC_CGETOFFSET  = OPC_COFFSET | (0x2),
+};
+
+enum {
+    OPC_CEQ         = OPC_CPTRCMP | (0x0),
+    OPC_CNE         = OPC_CPTRCMP | (0x1),
+    OPC_CLT         = OPC_CPTRCMP | (0x2),
+    OPC_CLE         = OPC_CPTRCMP | (0x3),
+    OPC_CLTU        = OPC_CPTRCMP | (0x4),
+    OPC_CLEU        = OPC_CPTRCMP | (0x5),
+    OPC_CEXEQ       = OPC_CPTRCMP | (0x6),
+    OPC_CNEXEQ      = OPC_CPTRCMP | (0x7),
+};
+
+enum {
+    OPC_CSCB        = OPC_CLL | (0x0),
+    OPC_CSCH        = OPC_CLL | (0x1),
+    OPC_CSCW        = OPC_CLL | (0x2),
+    OPC_CSCD        = OPC_CLL | (0x3),
+
+    OPC_CSCC        = OPC_CLL | (0x7),
+
+    OPC_CLLBU       = OPC_CLL | (0x8),
+    OPC_CLLHU       = OPC_CLL | (0x9),
+    OPC_CLLWU       = OPC_CLL | (0xa),
+    OPC_CLLD        = OPC_CLL | (0xb),
+    OPC_CLLB        = OPC_CLL | (0xc),
+    OPC_CLLH        = OPC_CLL | (0xd),
+    OPC_CLLW        = OPC_CLL | (0xe),
+
+    OPC_CLLC        = OPC_CLL | (0xf),
+};
+
+#define MASK_CLDST_OFFSET(opc)   ((opc >> 3) & 0xff)
+#define MASK_CLDST_OPC(opc)     ((opc) & ((0x3f << 26) | 0x7))
+
+/* Load Via Capability Register */
+enum {
+    OPC_CLBU        = OPC_CLOAD | (0x0),
+    OPC_CLHU        = OPC_CLOAD | (0x1),
+    OPC_CLWU        = OPC_CLOAD | (0x2),
+    OPC_CLDU        = OPC_CLOAD | (0x3),
+    OPC_CLB         = OPC_CLOAD | (0x4),
+    OPC_CLH         = OPC_CLOAD | (0x5),
+    OPC_CLW         = OPC_CLOAD | (0x6),
+    OPC_CLD         = OPC_CLOAD | (0x7),
+};
+
+/* Store Via Capability Register */
+enum {
+    OPC_CSB         = OPC_CSTORE | (0x0),
+    OPC_CSH         = OPC_CSTORE | (0x1),
+    OPC_CSW         = OPC_CSTORE | (0x2),
+    OPC_CSD         = OPC_CSTORE | (0x3),
+};
+
+/* Version 1.17 and 1.22 ISA encodings (*_NI) to replace above. */
+enum {
+    /* Common new ISA encoding blocks */
+    /* non-immediate capability instructions */
+    OPC_CAP_NI          = OPC_CP2 | (0x00 << 21),
+    /* 2-operand capability instructions */
+    OPC_C2OPERAND_NI    = OPC_CAP_NI | (0x3f),
+    /* 1-operand capability instructions */
+    OPC_C1OPERAND_NI    = OPC_C2OPERAND_NI | (0x1f << 6),
+};
+
+enum {
+    /* One operand instructions */
+    OPC_CGETPCC_NI      = OPC_C1OPERAND_NI | (0x00 << 11),
+    OPC_CGETCAUSE_NI    = OPC_C1OPERAND_NI | (0x01 << 11),
+    OPC_CSETCAUSE_NI    = OPC_C1OPERAND_NI | (0x02 << 11),
+    OPC_CJR_NI          = OPC_C1OPERAND_NI | (0x03 << 11),
+};
+
+enum {
+    /* Two Operand Instructions */
+    OPC_CGETPERM_NI     = OPC_C2OPERAND_NI | (0x00 << 6),
+    OPC_CGETTYPE_NI     = OPC_C2OPERAND_NI | (0x01 << 6),
+    OPC_CGETBASE_NI     = OPC_C2OPERAND_NI | (0x02 << 6),
+    OPC_CGETLEN_NI      = OPC_C2OPERAND_NI | (0x03 << 6),
+    OPC_CGETTAG_NI      = OPC_C2OPERAND_NI | (0x04 << 6),
+    OPC_CGETSEALED_NI   = OPC_C2OPERAND_NI | (0x05 << 6),
+    OPC_CGETOFFSET_NI   = OPC_C2OPERAND_NI | (0x06 << 6),
+    OPC_CGETPCCSETOFF_NI = OPC_C2OPERAND_NI | (0x07 << 6),
+    OPC_CCHECKPERM_NI   = OPC_C2OPERAND_NI | (0x08 << 6),
+    OPC_CCHECKTYPE_NI   = OPC_C2OPERAND_NI | (0x09 << 6),
+    OPC_CMOVE_NI        = OPC_C2OPERAND_NI | (0x0a << 6),
+    OPC_CCLEARTAG_NI    = OPC_C2OPERAND_NI | (0x0b << 6),
+    OPC_CJALR_NI        = OPC_C2OPERAND_NI | (0x0c << 6),
+    OPC_CREADHWR_NI     = OPC_C2OPERAND_NI | (0x0d << 6),
+    OPC_CWRITEHWR_NI    = OPC_C2OPERAND_NI | (0x0e << 6),
+    OPC_CGETADDR_NI     = OPC_C2OPERAND_NI | (0x0f << 6),
+    OPC_CRAP_NI         = OPC_C2OPERAND_NI | (0x10 << 6),
+    OPC_CRAM_NI         = OPC_C2OPERAND_NI | (0x11 << 6),
+    OPC_CGETFLAGS_NI    = OPC_C2OPERAND_NI | (0x12 << 6),
+    OPC_CGETPCCINCOFF_NI = OPC_C2OPERAND_NI | (0x13 << 6),
+    OPC_CGETPCCSETADDR_NI = OPC_C2OPERAND_NI | (0x14 << 6),
+    OPC_CSEALENTRY_NI   = OPC_C2OPERAND_NI | (0x1d << 6),
+    OPC_CLOADTAGS_NI    = OPC_C2OPERAND_NI | (0x1e << 6),
+};
+
+enum {
+    /* Three operand instructions 1.22 */
+    OPC_CSEAL_NI        = OPC_CAP_NI | (0x0b),
+    OPC_CUNSEAL_NI      = OPC_CAP_NI | (0x0c),
+    OPC_CANDPERM_NI     = OPC_CAP_NI | (0x0d),
+    OPC_CSETFLAGS_NI    = OPC_CAP_NI | (0x0e),
+    OPC_CSETOFFSET_NI   = OPC_CAP_NI | (0x0f),
+    OPC_CSETBOUNDS_NI   = OPC_CAP_NI | (0x08),
+    /* OPC_SETBOUNDSEXACT_NI = OPC_CAP_NI | (0x09), unchanged OPC_SETBOUNDSEXACT */
+    OPC_CINCOFFSET_NI   = OPC_CAP_NI | (0x11),
+    OPC_CTOPTR_NI       = OPC_CAP_NI | (0x12),
+    OPC_CFROMPTR_NI     = OPC_CAP_NI | (0x13),
+    /* OPC_CSUB_NI      = OPC_CAP_NI | (0x0a), unchanged OPC_CSUB */
+    OPC_CEQ_NI          = OPC_CAP_NI | (0x14),
+    OPC_CNE_NI          = OPC_CAP_NI | (0x15),
+    OPC_CLT_NI          = OPC_CAP_NI | (0x16),
+    OPC_CLE_NI          = OPC_CAP_NI | (0x17),
+    OPC_CLTU_NI         = OPC_CAP_NI | (0x18),
+    OPC_CLEU_NI         = OPC_CAP_NI | (0x19),
+    OPC_CEXEQ_NI        = OPC_CAP_NI | (0x1a),
+    OPC_CMOVZ_NI        = OPC_CAP_NI | (0x1b),
+    OPC_CMOVN_NI        = OPC_CAP_NI | (0x1c),
+    OPC_CBUILDCAP_NI    = OPC_CAP_NI | (0x1d),
+    OPC_CCOPYTYPE_NI    = OPC_CAP_NI | (0x1e),
+    OPC_CCSEAL_NI       = OPC_CAP_NI | (0x1f),
+    OPC_CTESTSUBSET_NI  = OPC_CAP_NI | (0x20),
+    OPC_CNEXEQ_NI       = OPC_CAP_NI | (0x21),
+    OPC_CSETADDR_NI     = OPC_CAP_NI | (0x22),
+    OPC_CGETANDADDR_NI  = OPC_CAP_NI | (0x23),
+    OPC_CANDADDR_NI     = OPC_CAP_NI | (0x24),
+};
+
+enum {
+    /* instructions with immediate values 1.22 */
+    OPC_CSETBOUNDSIMM_NI   = OPC_CP2 | (0x14 << 21),
+    OPC_CINCOFFSETIMM_NI = OPC_CP2 | (0x13 << 21),
+    /* OPC_CBTU_NI           = OPC_CP2 | (0x09 << 21), unchanged OPC_CBTU */
+    /* OPC_CBTS_NI           = OPC_CP2 | (0x0a << 21), unchanged OPC_CBTS */
+    OPC_CBEZ_NI          = OPC_CP2 | (0x11 << 21),
+    OPC_CBNZ_NI          = OPC_CP2 | (0x12 << 21),
+    /* OPC_CCALL_NI          = OPC_CP2 | (0x05 << 21), unchanged OPC_CCALL */
+    /* OPC_CCLEARREGS variants unchanged */
+    OPC_CRETURN_NI       = OPC_CP2 | (0x05 << 21 | 0x7ff)
+};
+
+#endif /* TARGET_CHERI */
+
+
+
+#define MASK_LMMI(op)    (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
+
+enum {
+    OPC_PADDSH      = (24 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDUSH     = (25 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDH       = (26 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDW       = (27 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDSB      = (28 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDUSB     = (29 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDB       = (30 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDD       = (31 << 21) | (0x00) | OPC_CP2,
+
+    OPC_PSUBSH      = (24 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBUSH     = (25 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBH       = (26 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBW       = (27 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBSB      = (28 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBUSB     = (29 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBB       = (30 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBD       = (31 << 21) | (0x01) | OPC_CP2,
+
+    OPC_PSHUFH      = (24 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKSSWH    = (25 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKSSHB    = (26 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKUSHB    = (27 << 21) | (0x02) | OPC_CP2,
+    OPC_XOR_CP2     = (28 << 21) | (0x02) | OPC_CP2,
+    OPC_NOR_CP2     = (29 << 21) | (0x02) | OPC_CP2,
+    OPC_AND_CP2     = (30 << 21) | (0x02) | OPC_CP2,
+    OPC_PANDN       = (31 << 21) | (0x02) | OPC_CP2,
+
+    OPC_PUNPCKLHW   = (24 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKHHW   = (25 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKLBH   = (26 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKHBH   = (27 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_0    = (28 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_1    = (29 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_2    = (30 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_3    = (31 << 21) | (0x03) | OPC_CP2,
+
+    OPC_PAVGH       = (24 << 21) | (0x08) | OPC_CP2,
+    OPC_PAVGB       = (25 << 21) | (0x08) | OPC_CP2,
+    OPC_PMAXSH      = (26 << 21) | (0x08) | OPC_CP2,
+    OPC_PMINSH      = (27 << 21) | (0x08) | OPC_CP2,
+    OPC_PMAXUB      = (28 << 21) | (0x08) | OPC_CP2,
+    OPC_PMINUB      = (29 << 21) | (0x08) | OPC_CP2,
+
+    OPC_PCMPEQW     = (24 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTW     = (25 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPEQH     = (26 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTH     = (27 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPEQB     = (28 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTB     = (29 << 21) | (0x09) | OPC_CP2,
+
+    OPC_PSLLW       = (24 << 21) | (0x0A) | OPC_CP2,
+    OPC_PSLLH       = (25 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULLH      = (26 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULHH      = (27 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULUW      = (28 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULHUH     = (29 << 21) | (0x0A) | OPC_CP2,
+
+    OPC_PSRLW       = (24 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRLH       = (25 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRAW       = (26 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRAH       = (27 << 21) | (0x0B) | OPC_CP2,
+    OPC_PUNPCKLWD   = (28 << 21) | (0x0B) | OPC_CP2,
+    OPC_PUNPCKHWD   = (29 << 21) | (0x0B) | OPC_CP2,
+
+    OPC_ADDU_CP2    = (24 << 21) | (0x0C) | OPC_CP2,
+    OPC_OR_CP2      = (25 << 21) | (0x0C) | OPC_CP2,
+    OPC_ADD_CP2     = (26 << 21) | (0x0C) | OPC_CP2,
+    OPC_DADD_CP2    = (27 << 21) | (0x0C) | OPC_CP2,
+    OPC_SEQU_CP2    = (28 << 21) | (0x0C) | OPC_CP2,
+    OPC_SEQ_CP2     = (29 << 21) | (0x0C) | OPC_CP2,
+
+    OPC_SUBU_CP2    = (24 << 21) | (0x0D) | OPC_CP2,
+    OPC_PASUBUB     = (25 << 21) | (0x0D) | OPC_CP2,
+    OPC_SUB_CP2     = (26 << 21) | (0x0D) | OPC_CP2,
+    OPC_DSUB_CP2    = (27 << 21) | (0x0D) | OPC_CP2,
+    OPC_SLTU_CP2    = (28 << 21) | (0x0D) | OPC_CP2,
+    OPC_SLT_CP2     = (29 << 21) | (0x0D) | OPC_CP2,
+
+    OPC_SLL_CP2     = (24 << 21) | (0x0E) | OPC_CP2,
+    OPC_DSLL_CP2    = (25 << 21) | (0x0E) | OPC_CP2,
+    OPC_PEXTRH      = (26 << 21) | (0x0E) | OPC_CP2,
+    OPC_PMADDHW     = (27 << 21) | (0x0E) | OPC_CP2,
+    OPC_SLEU_CP2    = (28 << 21) | (0x0E) | OPC_CP2,
+    OPC_SLE_CP2     = (29 << 21) | (0x0E) | OPC_CP2,
+
+    OPC_SRL_CP2     = (24 << 21) | (0x0F) | OPC_CP2,
+    OPC_DSRL_CP2    = (25 << 21) | (0x0F) | OPC_CP2,
+    OPC_SRA_CP2     = (26 << 21) | (0x0F) | OPC_CP2,
+    OPC_DSRA_CP2    = (27 << 21) | (0x0F) | OPC_CP2,
+    OPC_BIADD       = (28 << 21) | (0x0F) | OPC_CP2,
+    OPC_PMOVMSKB    = (29 << 21) | (0x0F) | OPC_CP2,
+};
+
+
+#define MASK_CP3(op)                (MASK_OP_MAJOR(op) | (op & 0x3F))
+
+enum {
+    OPC_LWXC1       = 0x00 | OPC_CP3,
+    OPC_LDXC1       = 0x01 | OPC_CP3,
+    OPC_LUXC1       = 0x05 | OPC_CP3,
+    OPC_SWXC1       = 0x08 | OPC_CP3,
+    OPC_SDXC1       = 0x09 | OPC_CP3,
+    OPC_SUXC1       = 0x0D | OPC_CP3,
+    OPC_PREFX       = 0x0F | OPC_CP3,
+    OPC_ALNV_PS     = 0x1E | OPC_CP3,
+    OPC_MADD_S      = 0x20 | OPC_CP3,
+    OPC_MADD_D      = 0x21 | OPC_CP3,
+    OPC_MADD_PS     = 0x26 | OPC_CP3,
+    OPC_MSUB_S      = 0x28 | OPC_CP3,
+    OPC_MSUB_D      = 0x29 | OPC_CP3,
+    OPC_MSUB_PS     = 0x2E | OPC_CP3,
+    OPC_NMADD_S     = 0x30 | OPC_CP3,
+    OPC_NMADD_D     = 0x31 | OPC_CP3,
+    OPC_NMADD_PS    = 0x36 | OPC_CP3,
+    OPC_NMSUB_S     = 0x38 | OPC_CP3,
+    OPC_NMSUB_D     = 0x39 | OPC_CP3,
+    OPC_NMSUB_PS    = 0x3E | OPC_CP3,
+};
+
+/*
+ *     MMI (MultiMedia Instruction) encodings
+ *     ======================================
+ *
+ * MMI instructions encoding table keys:
+ *
+ *     *   This code is reserved for future use. An attempt to execute it
+ *         causes a Reserved Instruction exception.
+ *     %   This code indicates an instruction class. The instruction word
+ *         must be further decoded by examining additional tables that show
+ *         the values for other instruction fields.
+ *     #   This code is reserved for the unsupported instructions DMULT,
+ *         DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt
+ *         to execute it causes a Reserved Instruction exception.
+ *
+ * MMI instructions encoded by opcode field (MMI, LQ, SQ):
+ *
+ *  31    26                                        0
+ * +--------+----------------------------------------+
+ * | opcode |                                        |
+ * +--------+----------------------------------------+
+ *
+ *   opcode  bits 28..26
+ *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ *   31..29 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
+ *   -------+-------+-------+-------+-------+-------+-------+-------+-------
+ *    0 000 |SPECIAL| REGIMM|   J   |  JAL  |  BEQ  |  BNE  |  BLEZ |  BGTZ
+ *    1 001 |  ADDI | ADDIU |  SLTI | SLTIU |  ANDI |  ORI  |  XORI |  LUI
+ *    2 010 |  COP0 |  COP1 |   *   |   *   |  BEQL |  BNEL | BLEZL | BGTZL
+ *    3 011 | DADDI | DADDIU|  LDL  |  LDR  |  MMI% |   *   |   LQ  |   SQ
+ *    4 100 |   LB  |   LH  |  LWL  |   LW  |  LBU  |  LHU  |  LWR  |  LWU
+ *    5 101 |   SB  |   SH  |  SWL  |   SW  |  SDL  |  SDR  |  SWR  | CACHE
+ *    6 110 |   #   |  LWC1 |   #   |  PREF |   #   |  LDC1 |   #   |   LD
+ *    7 111 |   #   |  SWC1 |   #   |   *   |   #   |  SDC1 |   #   |   SD
+ */
+
+enum {
+    MMI_OPC_CLASS_MMI = 0x1C << 26,    /* Same as OPC_SPECIAL2 */
+    MMI_OPC_SQ        = 0x1F << 26,    /* Same as OPC_SPECIAL3 */
+};
+
+/*
+ * MMI instructions with opcode field = MMI:
+ *
+ *  31    26                                 5      0
+ * +--------+-------------------------------+--------+
+ * |   MMI  |                               |function|
+ * +--------+-------------------------------+--------+
+ *
+ * function  bits 2..0
+ *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ *     5..3 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
+ *   -------+-------+-------+-------+-------+-------+-------+-------+-------
+ *    0 000 |  MADD | MADDU |   *   |   *   | PLZCW |   *   |   *   |   *
+ *    1 001 | MMI0% | MMI2% |   *   |   *   |   *   |   *   |   *   |   *
+ *    2 010 | MFHI1 | MTHI1 | MFLO1 | MTLO1 |   *   |   *   |   *   |   *
+ *    3 011 | MULT1 | MULTU1|  DIV1 | DIVU1 |   *   |   *   |   *   |   *
+ *    4 100 | MADD1 | MADDU1|   *   |   *   |   *   |   *   |   *   |   *
+ *    5 101 | MMI1% | MMI3% |   *   |   *   |   *   |   *   |   *   |   *
+ *    6 110 | PMFHL | PMTHL |   *   |   *   | PSLLH |   *   | PSRLH | PSRAH
+ *    7 111 |   *   |   *   |   *   |   *   | PSLLW |   *   | PSRLW | PSRAW
+ */
+
+#define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
+enum {
+    MMI_OPC_MADD       = 0x00 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADD */
+    MMI_OPC_MADDU      = 0x01 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADDU */
+    MMI_OPC_MULT1      = 0x18 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MULT */
+    MMI_OPC_MULTU1     = 0x19 | MMI_OPC_CLASS_MMI, /* Same min. as OPC_MULTU */
+    MMI_OPC_DIV1       = 0x1A | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIV  */
+    MMI_OPC_DIVU1      = 0x1B | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIVU */
+    MMI_OPC_MADD1      = 0x20 | MMI_OPC_CLASS_MMI,
+    MMI_OPC_MADDU1     = 0x21 | MMI_OPC_CLASS_MMI,
+};
+
+/* global register indices */
+TCGv cpu_gpr[32], cpu_PC;
+/*
+ * For CPUs using 128-bit GPR registers, we put the lower halves in cpu_gpr[])
+ * and the upper halves in cpu_gpr_hi[].
+ */
+TCGv_i64 cpu_gpr_hi[32];
+TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC];
+static TCGv cpu_dspctrl, btarget;
+TCGv bcond;
+static TCGv cpu_lladdr, cpu_llval;
+static TCGv_i32 hflags;
+TCGv_i32 fpu_fcr0, fpu_fcr31;
+TCGv_i64 fpu_f64[32];
+static TCGv cpu_statcounters_icount_kernel, cpu_statcounters_icount_user;
+
+
+#include "exec/gen-icount.h"
+
+#define DISAS_STOP       DISAS_TARGET_0
+#define DISAS_EXIT       DISAS_TARGET_1
+
+const char regnames_HI[4][4] = {
+    "HI0", "HI1", "HI2", "HI3",
+};
+
+const char regnames_LO[4][4] = {
+    "LO0", "LO1", "LO2", "LO3",
+};
+
+/* General purpose registers moves. */
+void gen_load_gpr(TCGv t, int reg)
+{
+    if (reg == 0) {
+        tcg_gen_movi_tl(t, 0);
+    } else {
+        tcg_gen_mov_tl(t, cpu_gpr[reg]);
+    }
+}
+
+void _gen_store_gpr(DisasContext *ctx, TCGv t, int reg)
+{
+    if (reg != 0) {
+        tcg_gen_mov_tl(cpu_gpr[reg], t);
+        gen_log_instr_gpr_update(ctx, reg);
+    }
+}
+
+
+#if defined(TARGET_MIPS64)
+void gen_load_gpr_hi(TCGv_i64 t, int reg)
+{
+    if (reg == 0) {
+        tcg_gen_movi_i64(t, 0);
+    } else {
+        tcg_gen_mov_i64(t, cpu_gpr_hi[reg]);
+    }
+}
+
+void gen_store_gpr_hi(TCGv_i64 t, int reg)
+{
+    if (reg != 0) {
+        tcg_gen_mov_i64(cpu_gpr_hi[reg], t);
+    }
+}
+#endif /* TARGET_MIPS64 */
+
+/* Moves to/from shadow registers. */
+static inline void gen_load_srsgpr(DisasContext *ctx, int from, int to)
+{
+    TCGv t0 = tcg_temp_new();
+
+    if (from == 0) {
+        tcg_gen_movi_tl(t0, 0);
+    } else {
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_ptr addr = tcg_temp_new_ptr();
+
+        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUMIPSState, CP0_SRSCtl));
+        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
+        tcg_gen_andi_i32(t2, t2, 0xf);
+        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
+        tcg_gen_ext_i32_ptr(addr, t2);
+        tcg_gen_add_ptr(addr, cpu_env, addr);
+
+        tcg_gen_ld_tl(t0, addr, sizeof(target_ulong) * from);
+        tcg_temp_free_ptr(addr);
+        tcg_temp_free_i32(t2);
+    }
+    _gen_store_gpr(ctx, t0, to);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_store_srsgpr(int from, int to)
+{
+    if (to != 0) {
+        TCGv t0 = tcg_temp_new();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_ptr addr = tcg_temp_new_ptr();
+
+        gen_load_gpr(t0, from);
+        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUMIPSState, CP0_SRSCtl));
+        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
+        tcg_gen_andi_i32(t2, t2, 0xf);
+        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
+        tcg_gen_ext_i32_ptr(addr, t2);
+        tcg_gen_add_ptr(addr, cpu_env, addr);
+
+        tcg_gen_st_tl(t0, addr, sizeof(target_ulong) * to);
+        tcg_temp_free_ptr(addr);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free(t0);
+    }
+}
+
+/* Tests */
+static inline void gen_save_pc(target_ulong pc)
+{
+    tcg_gen_movi_tl(cpu_PC, pc);
+#ifdef CONFIG_DEBUG_TCG
+    tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
+#endif
+}
+#include "cheri-translate-utils.h"
+
+static inline void save_cpu_state(DisasContext *ctx, int do_save_pc)
+{
+    LOG_DISAS("hflags %08x saved %08x\n", ctx->hflags, ctx->saved_hflags);
+    if (do_save_pc && ctx->base.pc_next != ctx->saved_pc) {
+        gen_save_pc(ctx->base.pc_next);
+        ctx->saved_pc = ctx->base.pc_next;
+    }
+    if (ctx->hflags != ctx->saved_hflags) {
+        tcg_gen_movi_i32(hflags, ctx->hflags);
+        ctx->saved_hflags = ctx->hflags;
+        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
+        case MIPS_HFLAG_BR:
+            break;
+#ifdef TARGET_CHERI
+        case MIPS_HFLAG_BRCCALL:
+        case MIPS_HFLAG_BRC:
+            break;
+#endif
+        case MIPS_HFLAG_BC:
+        case MIPS_HFLAG_BL:
+        case MIPS_HFLAG_B:
+            tcg_gen_movi_tl(btarget, ctx->btarget);
+            break;
+        }
+    }
+}
+
+static inline void restore_cpu_state(CPUMIPSState *env, DisasContext *ctx)
+{
+    ctx->saved_hflags = ctx->hflags;
+    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
+    case MIPS_HFLAG_BR:
+        break;
+#ifdef TARGET_CHERI
+    case MIPS_HFLAG_BRCCALL:
+    case MIPS_HFLAG_BRC:
+        break;
+#endif
+    case MIPS_HFLAG_BC:
+    case MIPS_HFLAG_BL:
+    case MIPS_HFLAG_B:
+        ctx->btarget = env->btarget;
+        break;
+    }
+}
+
+void generate_exception_err(DisasContext *ctx, MipsExcp excp, int err)
+{
+    save_cpu_state(ctx, 1);
+    gen_helper_raise_exception_err(cpu_env, tcg_constant_i32(excp),
+                                   tcg_constant_i32(err));
+    ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+void generate_exception(DisasContext *ctx, int excp)
+{
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
+}
+
+void generate_exception_end(DisasContext *ctx, int excp)
+{
+    generate_exception_err(ctx, excp, 0);
+}
+
+void cheri_tcg_save_pc(DisasContextBase *db) { gen_save_pc(db->pc_next); }
+// We have to save the current PC before setting DISAS_NORETURN (see
+// generate_exception_err())
+void cheri_tcg_prepare_for_unconditional_exception(DisasContextBase *db)
+{
+    cheri_tcg_save_pc(db);
+    db->is_jmp = DISAS_NORETURN;
+}
+
+void gen_reserved_instruction(DisasContext *ctx)
+{
+    generate_exception_end(ctx, EXCP_RI);
+}
+
+/* Floating point register moves. */
+void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+{
+    if (ctx->hflags & MIPS_HFLAG_FRE) {
+        generate_exception(ctx, EXCP_RI);
+    }
+    tcg_gen_extrl_i64_i32(t, fpu_f64[reg]);
+}
+
+void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
+{
+    TCGv_i64 t64;
+    if (ctx->hflags & MIPS_HFLAG_FRE) {
+        generate_exception(ctx, EXCP_RI);
+    }
+    t64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t64, t);
+    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
+    tcg_temp_free_i64(t64);
+}
+
+static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+{
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_extrh_i64_i32(t, fpu_f64[reg]);
+    } else {
+        gen_load_fpr32(ctx, t, reg | 1);
+    }
+}
+
+static void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
+{
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        TCGv_i64 t64 = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t64, t);
+        tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
+        tcg_temp_free_i64(t64);
+    } else {
+        gen_store_fpr32(ctx, t, reg | 1);
+    }
+}
+
+void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+{
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_mov_i64(t, fpu_f64[reg]);
+    } else {
+        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
+    }
+}
+
+void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
+{
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_mov_i64(fpu_f64[reg], t);
+    } else {
+        TCGv_i64 t0;
+        tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32);
+        t0 = tcg_temp_new_i64();
+        tcg_gen_shri_i64(t0, t, 32);
+        tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32);
+        tcg_temp_free_i64(t0);
+    }
+}
+
+int get_fp_bit(int cc)
+{
+    if (cc) {
+        return 24 + cc;
+    } else {
+        return 23;
+    }
+}
+
+/* Addresses computation */
+void gen_op_addr_add(DisasContext *ctx, TCGv ret, TCGv arg0, TCGv arg1)
+{
+    tcg_gen_add_tl(ret, arg0, arg1);
+
+#if defined(TARGET_MIPS64)
+    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
+        tcg_gen_ext32s_i64(ret, ret);
+    }
+#endif
+}
+
+#include "translate_cheri.c"
+
+#ifndef TARGET_CHERI
+static inline void gen_op_addr_addi(DisasContext *ctx, TCGv ret, TCGv base,
+                                    target_long ofs)
+{
+    tcg_gen_addi_tl(ret, base, ofs);
+
+#if defined(TARGET_MIPS64)
+    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
+        tcg_gen_ext32s_i64(ret, ret);
+    }
+#endif
+}
+#endif
+
+/* Addresses computation (translation time) */
+static target_long addr_add(DisasContext *ctx, target_long base,
+                            target_long offset)
+{
+    target_long sum = base + offset;
+
+#if defined(TARGET_MIPS64)
+    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
+        sum = (int32_t)sum;
+    }
+#endif
+    return sum;
+}
+
+/* Sign-extract the low 32-bits to a target_long.  */
+void gen_move_low32(TCGv ret, TCGv_i64 arg)
+{
+#if defined(TARGET_MIPS64)
+    tcg_gen_ext32s_i64(ret, arg);
+#else
+    tcg_gen_extrl_i64_i32(ret, arg);
+#endif
+}
+
+/* Sign-extract the high 32-bits to a target_long.  */
+void gen_move_high32(TCGv ret, TCGv_i64 arg)
+{
+#if defined(TARGET_MIPS64)
+    tcg_gen_sari_i64(ret, arg, 32);
+#else
+    tcg_gen_extrh_i64_i32(ret, arg);
+#endif
+}
+
+bool check_cp0_enabled(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0))) {
+        generate_exception_end(ctx, EXCP_CpU);
+        return false;
+    }
+    return true;
+}
+
+void check_cp1_enabled(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_FPU))) {
+        generate_exception_err(ctx, EXCP_CpU, 1);
+    }
+}
+
+/*
+ * Verify that the processor is running with COP1X instructions enabled.
+ * This is associated with the nabla symbol in the MIPS32 and MIPS64
+ * opcode tables.
+ */
+void check_cop1x(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_COP1X))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * Verify that the processor is running with 64-bit floating-point
+ * operations enabled.
+ */
+void check_cp1_64bitmode(DisasContext *ctx)
+{
+    if (unlikely(~ctx->hflags & (MIPS_HFLAG_F64 | MIPS_HFLAG_COP1X))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * Verify if floating point register is valid; an operation is not defined
+ * if bit 0 of any register specification is set and the FR bit in the
+ * Status register equals zero, since the register numbers specify an
+ * even-odd pair of adjacent coprocessor general registers. When the FR bit
+ * in the Status register equals one, both even and odd register numbers
+ * are valid. This limitation exists only for 64 bit wide (d,l,ps) registers.
+ *
+ * Multiple 64 bit wide registers can be checked by calling
+ * gen_op_cp1_registers(freg1 | freg2 | ... | fregN);
+ */
+void check_cp1_registers(DisasContext *ctx, int regs)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_F64) && (regs & 1))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * Verify that the processor is running with DSP instructions enabled.
+ * This is enabled by CP0 Status register MX(24) bit.
+ */
+static inline void check_dsp(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception_end(ctx, EXCP_DSPDIS);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+    }
+}
+
+static inline void check_dsp_r2(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception_end(ctx, EXCP_DSPDIS);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+    }
+}
+
+#ifndef TARGET_CHERI
+static inline void check_dsp_r3(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) {
+        if (ctx->insn_flags & ASE_DSP) {
+            generate_exception_end(ctx, EXCP_DSPDIS);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+    }
+}
+#endif
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * CPU does not support the instruction set corresponding to flags.
+ */
+void check_insn(DisasContext *ctx, uint64_t flags)
+{
+    if (unlikely(!(ctx->insn_flags & flags))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * CPU has corresponding flag set which indicates that the instruction
+ * has been removed.
+ */
+static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags)
+{
+    if (unlikely(ctx->insn_flags & flags)) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * The Linux kernel traps certain reserved instruction exceptions to
+ * emulate the corresponding instructions. QEMU is the kernel in user
+ * mode, so those traps are emulated by accepting the instructions.
+ *
+ * A reserved instruction exception is generated for flagged CPUs if
+ * QEMU runs in system mode.
+ */
+static inline void check_insn_opc_user_only(DisasContext *ctx, uint64_t flags)
+{
+#ifndef CONFIG_USER_ONLY
+    check_insn_opc_removed(ctx, flags);
+#endif
+}
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * CPU does not support 64-bit paired-single (PS) floating point data type.
+ */
+static inline void check_ps(DisasContext *ctx)
+{
+    if (unlikely(!ctx->ps)) {
+        generate_exception(ctx, EXCP_RI);
+    }
+    check_cp1_64bitmode(ctx);
+}
+
+/*
+ * This code generates a "reserved instruction" exception if cpu is not
+ * 64-bit or 64-bit instructions are not enabled.
+ */
+void check_mips_64(DisasContext *ctx)
+{
+    if (unlikely((TARGET_LONG_BITS != 64) || !(ctx->hflags & MIPS_HFLAG_64))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
+static inline void check_mvh(DisasContext *ctx)
+{
+    if (unlikely(!ctx->mvh)) {
+        generate_exception(ctx, EXCP_RI);
+    }
+}
+#endif
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 XNP bit is set.
+ */
+#ifndef TARGET_CHERI
+static inline void check_xnp(DisasContext *ctx)
+{
+    if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_XNP))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config3 PW bit is NOT set.
+ */
+static inline void check_pw(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+#endif
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config3 MT bit is NOT set.
+ */
+static inline void check_mt(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_MT)))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * This code generates a "coprocessor unusable" exception if CP0 is not
+ * available, and, if that is not the case, generates a "reserved instruction"
+ * exception if the Config5 MT bit is NOT set. This is needed for availability
+ * control of some of MT ASE instructions.
+ */
+static inline void check_cp0_mt(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0))) {
+        generate_exception_end(ctx, EXCP_CpU);
+    } else {
+        if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_MT)))) {
+            gen_reserved_instruction(ctx);
+        }
+    }
+}
+#endif
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 NMS bit is set.
+ */
+#ifndef TARGET_CHERI
+static inline void check_nms(DisasContext *ctx)
+{
+    if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_NMS))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 NMS bit is set, and Config1 DL, Config1 IL, Config2 SL,
+ * Config2 TL, and Config5 L2C are unset.
+ */
+static inline void check_nms_dl_il_sl_tl_l2c(DisasContext *ctx)
+{
+    if (unlikely((ctx->CP0_Config5 & (1 << CP0C5_NMS)) &&
+                 !(ctx->CP0_Config1 & (1 << CP0C1_DL)) &&
+                 !(ctx->CP0_Config1 & (1 << CP0C1_IL)) &&
+                 !(ctx->CP0_Config2 & (1 << CP0C2_SL)) &&
+                 !(ctx->CP0_Config2 & (1 << CP0C2_TL)) &&
+                 !(ctx->CP0_Config5 & (1 << CP0C5_L2C)))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+
+/*
+ * This code generates a "reserved instruction" exception if the
+ * Config5 EVA bit is NOT set.
+ */
+static inline void check_eva(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->CP0_Config5 & (1 << CP0C5_EVA)))) {
+        gen_reserved_instruction(ctx);
+    }
+}
+#endif // !TARGET_CHERI
+
+
+/*
+ * Define small wrappers for gen_load_fpr* so that we have a uniform
+ * calling interface for 32 and 64-bit FPRs.  No sense in changing
+ * all callers for gen_load_fpr32 when we need the CTX parameter for
+ * this one use.
+ */
+#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(ctx, x, y)
+#define gen_ldcmp_fpr64(ctx, x, y) gen_load_fpr64(ctx, x, y)
+#define FOP_CONDS(type, abs, fmt, ifmt, bits)                                 \
+static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
+                                               int ft, int fs, int cc)        \
+{                                                                             \
+    TCGv_i##bits fp0 = tcg_temp_new_i##bits();                                \
+    TCGv_i##bits fp1 = tcg_temp_new_i##bits();                                \
+    switch (ifmt) {                                                           \
+    case FMT_PS:                                                              \
+        check_ps(ctx);                                                        \
+        break;                                                                \
+    case FMT_D:                                                               \
+        if (abs) {                                                            \
+            check_cop1x(ctx);                                                 \
+        }                                                                     \
+        check_cp1_registers(ctx, fs | ft);                                    \
+        break;                                                                \
+    case FMT_S:                                                               \
+        if (abs) {                                                            \
+            check_cop1x(ctx);                                                 \
+        }                                                                     \
+        break;                                                                \
+    }                                                                         \
+    gen_ldcmp_fpr##bits(ctx, fp0, fs);                                        \
+    gen_ldcmp_fpr##bits(ctx, fp1, ft);                                        \
+    switch (n) {                                                              \
+    case  0:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);         \
+    break;                                                                    \
+    case  1:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);        \
+    break;                                                                    \
+    case  2:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);        \
+    break;                                                                    \
+    case  3:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);       \
+    break;                                                                    \
+    case  4:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);       \
+    break;                                                                    \
+    case  5:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);       \
+    break;                                                                    \
+    case  6:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);       \
+    break;                                                                    \
+    case  7:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);       \
+    break;                                                                    \
+    case  8:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);        \
+    break;                                                                    \
+    case  9:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc);      \
+    break;                                                                    \
+    case 10:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);       \
+    break;                                                                    \
+    case 11:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);       \
+    break;                                                                    \
+    case 12:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);        \
+    break;                                                                    \
+    case 13:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);       \
+    break;                                                                    \
+    case 14:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);        \
+    break;                                                                    \
+    case 15:                                                                  \
+        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);       \
+    break;                                                                    \
+    default:                                                                  \
+        abort();                                                              \
+    }                                                                         \
+    tcg_temp_free_i##bits(fp0);                                               \
+    tcg_temp_free_i##bits(fp1);                                               \
+}
+
+FOP_CONDS(, 0, d, FMT_D, 64)
+FOP_CONDS(abs, 1, d, FMT_D, 64)
+FOP_CONDS(, 0, s, FMT_S, 32)
+FOP_CONDS(abs, 1, s, FMT_S, 32)
+FOP_CONDS(, 0, ps, FMT_PS, 64)
+FOP_CONDS(abs, 1, ps, FMT_PS, 64)
+#undef FOP_CONDS
+
+#define FOP_CONDNS(fmt, ifmt, bits, STORE)                              \
+static inline void gen_r6_cmp_ ## fmt(DisasContext *ctx, int n,         \
+                                      int ft, int fs, int fd)           \
+{                                                                       \
+    TCGv_i ## bits fp0 = tcg_temp_new_i ## bits();                      \
+    TCGv_i ## bits fp1 = tcg_temp_new_i ## bits();                      \
+    if (ifmt == FMT_D) {                                                \
+        check_cp1_registers(ctx, fs | ft | fd);                         \
+    }                                                                   \
+    gen_ldcmp_fpr ## bits(ctx, fp0, fs);                                \
+    gen_ldcmp_fpr ## bits(ctx, fp1, ft);                                \
+    switch (n) {                                                        \
+    case  0:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _af(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case  1:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _un(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case  2:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _eq(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case  3:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _ueq(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case  4:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _lt(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case  5:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _ult(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case  6:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _le(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case  7:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _ule(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case  8:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _saf(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case  9:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sun(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 10:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _seq(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 11:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sueq(fp0, cpu_env, fp0, fp1);     \
+        break;                                                          \
+    case 12:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _slt(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 13:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sult(fp0, cpu_env, fp0, fp1);     \
+        break;                                                          \
+    case 14:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sle(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 15:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sule(fp0, cpu_env, fp0, fp1);     \
+        break;                                                          \
+    case 17:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _or(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case 18:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _une(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 19:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _ne(fp0, cpu_env, fp0, fp1);       \
+        break;                                                          \
+    case 25:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sor(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    case 26:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sune(fp0, cpu_env, fp0, fp1);     \
+        break;                                                          \
+    case 27:                                                            \
+        gen_helper_r6_cmp_ ## fmt ## _sne(fp0, cpu_env, fp0, fp1);      \
+        break;                                                          \
+    default:                                                            \
+        abort();                                                        \
+    }                                                                   \
+    STORE;                                                              \
+    tcg_temp_free_i ## bits(fp0);                                       \
+    tcg_temp_free_i ## bits(fp1);                                       \
+}
+
+FOP_CONDNS(d, FMT_D, 64, gen_store_fpr64(ctx, fp0, fd))
+FOP_CONDNS(s, FMT_S, 32, gen_store_fpr32(ctx, fp0, fd))
+#undef FOP_CONDNS
+#undef gen_ldcmp_fpr32
+#undef gen_ldcmp_fpr64
+
+/* load/store instructions. */
+#ifdef CONFIG_USER_ONLY
+#define OP_LD_ATOMIC(insn, fname)                                          \
+static inline void op_ld_##insn(TCGv ret, TCGv arg1, int mem_idx,          \
+                                DisasContext *ctx)                         \
+{                                                                          \
+    TCGv t0 = tcg_temp_new();                                              \
+    tcg_gen_mov_tl(t0, arg1);                                              \
+    tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                         \
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, lladdr));            \
+    tcg_gen_st_tl(ret, cpu_env, offsetof(CPUMIPSState, llval));            \
+    tcg_temp_free(t0);                                                     \
+}
+#else
+#define OP_LD_ATOMIC(insn, fname)                                          \
+static inline void op_ld_##insn(TCGv ret, TCGv_cap_checked_ptr arg1,       \
+                                int mem_idx, DisasContext *ctx)            \
+{                                                                          \
+    gen_helper_##insn(ret, cpu_env, arg1, tcg_constant_i32(mem_idx));      \
+}
+#endif
+OP_LD_ATOMIC(ll, ld32s);
+#if defined(TARGET_MIPS64)
+OP_LD_ATOMIC(lld, ld64);
+#endif
+#undef OP_LD_ATOMIC
+
+void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset)
+{
+    if (base == 0) {
+        tcg_gen_movi_tl(addr, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(addr, base);
+    } else {
+        tcg_gen_movi_tl(addr, offset);
+        gen_op_addr_add(ctx, addr, cpu_gpr[base], addr);
+    }
+}
+
+static target_ulong pc_relative_pc(DisasContext *ctx)
+{
+    target_ulong pc = ctx->base.pc_next;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        int branch_bytes = ctx->hflags & MIPS_HFLAG_BDS16 ? 2 : 4;
+
+        pc -= branch_bytes;
+    }
+
+    pc &= ~(target_ulong)3;
+    return pc;
+}
+
+/* Load */
+static void gen_ld(DisasContext *ctx, uint32_t opc,
+                   int rt, int base, int offset)
+{
+    TCGv t0, t1, t2;
+    int mem_idx = ctx->mem_idx;
+
+    if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F |
+                                      INSN_LOONGSON3A)) {
+        /*
+         * Loongson CPU uses a load to zero register for prefetch.
+         * We emulate it as a NOP. On other CPU we must perform the
+         * actual memory access.
+         */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_base_offset_addr(ctx, t0, base, offset);
+
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    TCGv_cap_checked_ptr ddc_interposed = tcg_temp_new_cap_checked();
+#else
+    TCGv_cap_checked_ptr ddc_interposed = t0;
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+
+    switch (opc) {
+#if defined(TARGET_MIPS64)
+    case OPC_LWU:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
+                                 MO_TEUL | ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LD:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
+                                 MO_TEQ | ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LLD:
+    case R6_OPC_LLD:
+        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 8);
+        op_ld_lld(t0, ddc_interposed, mem_idx, ctx);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LDL:
+        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 8);
+        t1 = tcg_temp_new();
+        /*
+         * Do a byte access to possibly trigger a page
+         * fault with the unaligned address.
+         */
+        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
+        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 7);
+        if (!cpu_is_bigendian(ctx)) {
+            tcg_gen_xori_tl(t1, t1, 7);
+        }
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~7);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEQ);
+        tcg_gen_shl_tl(t0, t0, t1);
+        t2 = tcg_const_tl(-1);
+        tcg_gen_shl_tl(t2, t2, t1);
+        gen_load_gpr(t1, rt);
+        tcg_gen_andc_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LDR:
+        generate_ccheck_load_right(ddc_interposed, t0, 8);
+        t1 = tcg_temp_new();
+        /*
+         * Do a byte access to possibly trigger a page
+         * fault with the unaligned address.
+         */
+        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
+        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 7);
+        if (cpu_is_bigendian(ctx)) {
+            tcg_gen_xori_tl(t1, t1, 7);
+        }
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~7);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEQ);
+        tcg_gen_shr_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 63);
+        t2 = tcg_const_tl(0xfffffffffffffffeull);
+        tcg_gen_shl_tl(t2, t2, t1);
+        gen_load_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LDPC:
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
+        gen_op_addr_add(ctx, t0, t0, t1);
+        tcg_temp_free(t1);
+        generate_ccheck_load_pcrel(t0, 8);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, PCC_CHECKED(t0), mem_idx, MO_TEQ);
+        gen_store_gpr(t0, rt);
+        break;
+#endif
+    case OPC_LWPC:
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
+        gen_op_addr_add(ctx, t0, t0, t1);
+        generate_ccheck_load_pcrel(t0, 4);
+        tcg_gen_mov_tl(t1, t0);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, PCC_CHECKED(t0), mem_idx, MO_TESL);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LWE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LW:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
+                                 MO_TESL | ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LHE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LH:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
+                                 MO_TESW | ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LHUE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LHU:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
+                                 MO_TEUW | ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LBE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LB:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx, MO_SB);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LBUE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LBU:
+        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx, MO_UB);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LWLE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LWL:
+        t1 = tcg_temp_new();
+        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 4);
+        /*
+         * Do a byte access to possibly trigger a page
+         * fault with the unaligned address.
+         */
+        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
+        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 3);
+        if (!cpu_is_bigendian(ctx)) {
+            tcg_gen_xori_tl(t1, t1, 3);
+        }
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~3);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEUL);
+        tcg_gen_shl_tl(t0, t0, t1);
+        t2 = tcg_const_tl(-1);
+        tcg_gen_shl_tl(t2, t2, t1);
+        gen_load_gpr(t1, rt);
+        tcg_gen_andc_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_ext32s_tl(t0, t0);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LWRE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LWR:
+        generate_ccheck_load_right(ddc_interposed, t0, 4);
+        t1 = tcg_temp_new();
+        /*
+         * Do a byte access to possibly trigger a page
+         * fault with the unaligned address.
+         */
+        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
+        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 3);
+        if (cpu_is_bigendian(ctx)) {
+            tcg_gen_xori_tl(t1, t1, 3);
+        }
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~3);
+        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEUL);
+        tcg_gen_shr_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 31);
+        t2 = tcg_const_tl(0xfffffffeull);
+        tcg_gen_shl_tl(t2, t2, t1);
+        gen_load_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_ext32s_tl(t0, t0);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_LLE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_LL:
+    case R6_OPC_LL:
+        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 4);
+        op_ld_ll(t0, ddc_interposed, mem_idx, ctx);
+        gen_store_gpr(t0, rt);
+        break;
+    }
+    tcg_temp_free(t0);
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    tcg_temp_free_cap_checked(ddc_interposed);
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+}
+
+/* Store */
+static void gen_st(DisasContext *ctx, uint32_t opc, int rt,
+                   int base, int offset)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int mem_idx = ctx->mem_idx;
+
+    gen_base_offset_addr(ctx, t0, base, offset);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+#if defined(TARGET_MIPS64)
+    case OPC_SD:
+        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
+                                 MO_TEQ | ctx->default_tcg_memop_mask);
+        break;
+    case OPC_SDL:
+        gen_helper_0e2i(sdl, t1, t0, mem_idx);
+        break;
+    case OPC_SDR:
+        gen_helper_0e2i(sdr, t1, t0, mem_idx);
+        break;
+#endif
+    case OPC_SWE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_SW:
+        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
+                                 MO_TEUL | ctx->default_tcg_memop_mask);
+        break;
+    case OPC_SHE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_SH:
+        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
+                                 MO_TEUW | ctx->default_tcg_memop_mask);
+        break;
+    case OPC_SBE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_SB:
+        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx, MO_8);
+        break;
+    case OPC_SWLE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_SWL:
+        gen_helper_0e2i(swl, t1, t0, mem_idx);
+        break;
+    case OPC_SWRE:
+        mem_idx = MIPS_HFLAG_UM;
+        /* fall through */
+    case OPC_SWR:
+        gen_helper_0e2i(swr, t1, t0, mem_idx);
+        break;
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+
+/* Store conditional */
+static void gen_st_cond(DisasContext *ctx, int rt, int base, int offset,
+                        MemOp tcg_mo, bool eva)
+{
+    TCGv t0, val;
+    TCGv_cap_checked_ptr addr;
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *done = gen_new_label();
+    TCGLabel *not_misaligned = gen_new_label();
+
+    t0 = tcg_temp_new();
+    // IMPORTANT! addr must be allocated using tcg_temp_local_new since we
+    // reference it in two different basic blocks.
+    // The naming makes no sense... But is documented in tcg/README.
+    // I should read the documentation before debugging :(
+    addr = tcg_temp_local_new_cap_checked();
+    gen_base_offset_addr(ctx, t0, base, offset);
+    /* We have to add $ddc to the address for the alignment check and the memory access */
+    generate_ddc_checked_store_ptr(addr, ctx, t0, memop_size(tcg_mo));
+    /*
+     * Alignment must be checked even if the CPU supports unaligned accesses:
+     *
+     * The effective address must be naturally-aligned. If either of the 2/3
+     * least-significant bits of the address is non-zero, an Address Error
+     * exception occurs.
+     */
+    tcg_gen_andi_tl(t0, (TCGv)addr, memop_size(tcg_mo) - 1);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x0, not_misaligned);
+    generate_exception(ctx, EXCP_AdES);
+    gen_set_label(not_misaligned);
+
+    //    DEBUG_VALUE((TCGv)addr);
+    //    DEBUG_VALUE(cpu_lladdr);
+    /* compare the address against that of the preceeding LL */
+    tcg_gen_brcond_tl(TCG_COND_EQ, (TCGv)addr, cpu_lladdr, l1);
+    tcg_gen_movi_tl(t0, 0);
+    gen_store_gpr(t0, rt);
+    tcg_gen_br(done);
+
+    gen_set_label(l1);
+    /* generate cmpxchg */
+    val = tcg_temp_local_new(); // local_new since used in another BB
+    gen_load_gpr(val, rt);
+
+    //    DEBUG_VALUE((TCGv)addr);
+    //    DEBUG_VALUE(cpu_llval);
+    //    DEBUG_VALUE(val);
+    tcg_gen_atomic_cmpxchg_tl_with_checked_addr(
+        t0, addr, cpu_llval, val, eva ? MIPS_HFLAG_UM : ctx->mem_idx, tcg_mo);
+
+    //    DEBUG_VALUE(t0);
+    tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_llval);
+    //    DEBUG_VALUE(t0);
+    gen_store_gpr(t0, rt);
+    tcg_temp_free(val);
+
+    gen_set_label(done);
+    tcg_temp_free_cap_checked(addr);
+    tcg_temp_free(t0);
+}
+
+/* Load and store */
+static void gen_flt_ldst(DisasContext *ctx, uint32_t opc, int ft,
+                         TCGv t0)
+{
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    TCGv t1 = tcg_temp_new();
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+    /*
+     * Don't do NOP if destination is zero: we must perform the actual
+     * memory access.
+     */
+    switch (opc) {
+    case OPC_LWC1:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            gen_ddc_interposed_ld_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx,
+                                      MO_TESL | ctx->default_tcg_memop_mask);
+            gen_store_fpr32(ctx, fp0, ft);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_SWC1:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, ft);
+            gen_ddc_interposed_st_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx,
+                                      MO_TEUL | ctx->default_tcg_memop_mask);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_LDC1:
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ |
+                                      ctx->default_tcg_memop_mask);
+            gen_store_fpr64(ctx, fp0, ft);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SDC1:
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, ft);
+            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ |
+                                      ctx->default_tcg_memop_mask);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    default:
+        MIPS_INVAL("flt_ldst");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    tcg_temp_free(t1);
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+}
+
+static void gen_cop1_ldst(DisasContext *ctx, uint32_t op, int rt,
+                          int rs, int16_t imm)
+{
+    TCGv t0 = tcg_temp_new();
+
+    if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
+        check_cp1_enabled(ctx);
+        switch (op) {
+        case OPC_LDC1:
+        case OPC_SDC1:
+            check_insn(ctx, ISA_MIPS2);
+            /* Fallthrough */
+        default:
+            gen_base_offset_addr(ctx, t0, rs, imm);
+            gen_flt_ldst(ctx, op, rt, t0);
+        }
+    } else {
+        generate_exception_err(ctx, EXCP_CpU, 1);
+    }
+    tcg_temp_free(t0);
+}
+
+/* Arithmetic with immediate operand */
+static void gen_arith_imm(DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int imm)
+{
+    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
+
+    if (rt == 0 && opc != OPC_ADDI && opc != OPC_DADDI) {
+        /*
+         * If no destination, treat it as a NOP.
+         * For addi, we must generate the overflow exception when needed.
+         */
+        return;
+    }
+    switch (opc) {
+    case OPC_ADDI:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            tcg_gen_addi_tl(t0, t1, uimm);
+            tcg_gen_ext32s_tl(t0, t0);
+
+            tcg_gen_xori_tl(t1, t1, ~uimm);
+            tcg_gen_xori_tl(t2, t0, uimm);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /* operands of same sign, result different sign */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            tcg_gen_ext32s_tl(t0, t0);
+            gen_store_gpr(t0, rt);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_ADDIU:
+        if (rs != 0) {
+            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DADDI:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            tcg_gen_addi_tl(t0, t1, uimm);
+
+            tcg_gen_xori_tl(t1, t1, ~uimm);
+            tcg_gen_xori_tl(t2, t0, uimm);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /* operands of same sign, result different sign */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            gen_store_gpr(t0, rt);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_DADDIU:
+        if (rs != 0) {
+            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+#endif
+    }
+}
+
+#define GEN_INSTR_TRACE_HELPER(env, name)                                      \
+    {                                                                          \
+        TCGv_i64 tpc = tcg_const_i64(ctx->base.pc_next);                       \
+        gen_save_pc(ctx->base.pc_next);                                        \
+        gen_helper_##name(env, tpc);                                           \
+        tcg_temp_free_i64(tpc);                                                \
+        /* Exit translation block since tracing flag may change */             \
+        if (ctx->hflags & MIPS_HFLAG_BMASK) {                                  \
+            warn_report("warning: magic trace helper "                         \
+                "in delay / forbidden slot at PC 0x" TARGET_FMT_lx             \
+                " may not work as expected\n", ctx->base.pc_next);             \
+            return; /* We are already exiting the TB */                        \
+        }                                                                      \
+        gen_save_pc(ctx->base.pc_next + 4);                                    \
+        ctx->base.is_jmp = DISAS_EXIT;                                         \
+        return;                                                                \
+    }
+
+/* Logic with immediate operand */
+static void gen_logic_imm(DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
+{
+    target_ulong uimm;
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+#ifdef CONFIG_TCG_LOG_INSTR
+        if (opc == OPC_ORI && rs == 0) {
+
+            /* With 'li $0, 0xbeef' turn on instruction trace logging. */
+            if ((uint16_t)imm == 0xbeef)
+                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_start);
+
+            /* With 'li $0, 0xdead' turn off instruction trace logging. */
+            if ((uint16_t)imm == 0xdead)
+                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_stop);
+
+            /* With 'li $0, 0xdeaf' switch to userspace-only instruction trace logging. */
+            if ((uint16_t)imm == 0xdeaf)
+                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_user_start);
+
+            /* With 'li $0, 0xfaed' switch off userspace-only instruction trace logging. */
+            if ((uint16_t)imm == 0xfaed)
+                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_stop);
+
+            if ((uint16_t)imm == 0xface)
+                GEN_INSTR_TRACE_HELPER(cpu_env, cheri_debug_message);
+
+            /* With 0xcode invoke QEMU helper functions such as fast memset, memcpy etc.
+             * They are designed to take the same register arguments as the libc function:
+             * Currently supported values are:
+             * $v1 = 1 -> memset(ptr=$a0, c=$a1, len=$a2)
+             * $v1 = 2 -> purecap memset/memset_c(ptr=$c3, c=$a0, len=$a1)
+             * $v1 = 3 -> memcpy(dst=$a0, src=$a1, len=$a2)
+             * $v1 = 4 -> purecap memcpy/memcpy_c(dst=$c3, src=$c4, len=$a0)
+             * $v1 = 5 -> memmove(dst=$a0, src=$a1, len=$a2)
+             * $v1 = 6 -> purecap memmmove/memmove_c(dst=$c3, src=$c4, len=$a0)
+             * $v1 = 7 -> bcopy(src=$a0, dst=$a1, len=$a2)
+             * TODO: strlen? str{l,n}cpy?
+             */
+            if ((uint16_t)imm == 0xC0DE) {
+                save_cpu_state(ctx, 1);
+                gen_helper_magic_library_function(cpu_env, cpu_gpr[3]);
+            }
+
+            /* With 'li $0, 0xea1d' perform smp yield. */
+            if ((uint16_t)imm == 0xea1d) {
+                gen_save_pc(ctx->base.pc_next + 4);
+                gen_helper_smp_yield(cpu_env);
+            }
+
+            /* Buffered tracing switches, same as RISC-V */
+            if ((uint16_t)imm == 0x01 || (uint16_t)imm == 0x02) {
+                TCGv_i32 ttmp = tcg_const_i32(((uint16_t)imm == 0x01));
+                gen_helper_qemu_log_instr_buffered_mode(cpu_env, ttmp);
+                tcg_temp_free_i32(ttmp);
+            }
+            if ((uint16_t)imm == 0x03)
+                gen_helper_qemu_log_instr_buffer_flush(cpu_env);
+
+        }
+#endif /* CONFIG_TCG_LOG_INSTR */
+        return;
+    }
+    uimm = (uint16_t)imm;
+    switch (opc) {
+    case OPC_ANDI:
+        if (likely(rs != 0)) {
+            tcg_gen_andi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+    case OPC_ORI:
+        if (rs != 0) {
+            tcg_gen_ori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+    case OPC_XORI:
+        if (likely(rs != 0)) {
+            tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+    case OPC_LUI:
+        if (rs != 0 && (ctx->insn_flags & ISA_MIPS_R6)) {
+            /* OPC_AUI */
+            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], imm << 16);
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], imm << 16);
+        }
+        gen_log_instr_gpr_update(ctx, rt);
+        break;
+
+    default:
+        break;
+    }
+}
+
+/* Set on less than with immediate operand */
+static void gen_slt_imm(DisasContext *ctx, uint32_t opc,
+                        int rt, int rs, int16_t imm)
+{
+    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
+    TCGv t0;
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    switch (opc) {
+    case OPC_SLTI:
+        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr[rt], t0, uimm);
+        break;
+    case OPC_SLTIU:
+        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, uimm);
+        break;
+    }
+    gen_log_instr_gpr_update(ctx, rt);
+    tcg_temp_free(t0);
+}
+
+/* Shifts with immediate operand */
+static void gen_shift_imm(DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
+{
+    target_ulong uimm = ((uint16_t)imm) & 0x1f;
+    TCGv t0;
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    switch (opc) {
+    case OPC_SLL:
+        tcg_gen_shli_tl(t0, t0, uimm);
+        tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
+        break;
+    case OPC_SRA:
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
+        break;
+    case OPC_SRL:
+        if (uimm != 0) {
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
+        } else {
+            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
+        }
+        break;
+    case OPC_ROTR:
+        if (uimm != 0) {
+            TCGv_i32 t1 = tcg_temp_new_i32();
+
+            tcg_gen_trunc_tl_i32(t1, t0);
+            tcg_gen_rotri_i32(t1, t1, uimm);
+            tcg_gen_ext_i32_tl(cpu_gpr[rt], t1);
+            tcg_temp_free_i32(t1);
+        } else {
+            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DSLL:
+        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm);
+        break;
+    case OPC_DSRA:
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
+        break;
+    case OPC_DSRL:
+        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
+        break;
+    case OPC_DROTR:
+        if (uimm != 0) {
+            tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm);
+        } else {
+            tcg_gen_mov_tl(cpu_gpr[rt], t0);
+        }
+        break;
+    case OPC_DSLL32:
+        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm + 32);
+        break;
+    case OPC_DSRA32:
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm + 32);
+        break;
+    case OPC_DSRL32:
+        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
+        break;
+    case OPC_DROTR32:
+        tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm + 32);
+        break;
+#endif
+    }
+    gen_log_instr_gpr_update(ctx, rt);
+    tcg_temp_free(t0);
+}
+
+/* Arithmetic */
+static void gen_arith(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
+{
+    if (rd == 0 && opc != OPC_ADD && opc != OPC_SUB
+       && opc != OPC_DADD && opc != OPC_DSUB) {
+        /*
+         * If no destination, treat it as a NOP.
+         * For add & sub, we must generate the overflow exception when needed.
+         */
+        return;
+    }
+
+    switch (opc) {
+    case OPC_ADD:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_add_tl(t0, t1, t2);
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_xor_tl(t1, t1, t2);
+            tcg_gen_xor_tl(t2, t0, t2);
+            tcg_gen_andc_tl(t1, t2, t1);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /* operands of same sign, result different sign */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_ADDU:
+        if (rs != 0 && rt != 0) {
+            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rd);
+        break;
+    case OPC_SUB:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_sub_tl(t0, t1, t2);
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_xor_tl(t2, t1, t2);
+            tcg_gen_xor_tl(t1, t0, t1);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /*
+             * operands of different sign, first operand and the result
+             * of different sign
+             */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_SUBU:
+        if (rs != 0 && rt != 0) {
+            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rd);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DADD:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_add_tl(t0, t1, t2);
+            tcg_gen_xor_tl(t1, t1, t2);
+            tcg_gen_xor_tl(t2, t0, t2);
+            tcg_gen_andc_tl(t1, t2, t1);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /* operands of same sign, result different sign */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_DADDU:
+        if (rs != 0 && rt != 0) {
+            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rd);
+        break;
+    case OPC_DSUB:
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
+            TCGLabel *l1 = gen_new_label();
+
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_sub_tl(t0, t1, t2);
+            tcg_gen_xor_tl(t2, t1, t2);
+            tcg_gen_xor_tl(t1, t0, t1);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+            /*
+             * Operands of different sign, first operand and result different
+             * sign.
+             */
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(l1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+        }
+        break;
+    case OPC_DSUBU:
+        if (rs != 0 && rt != 0) {
+            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rd);
+        break;
+#endif
+    case OPC_MUL:
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_mul_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        gen_log_instr_gpr_update(ctx, rd);
+        break;
+    }
+}
+
+/* Conditional move */
+static void gen_cond_move(DisasContext *ctx, uint32_t opc,
+                          int rd, int rs, int rt)
+{
+    TCGv t0, t1, t2;
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    t1 = tcg_const_tl(0);
+    t2 = tcg_temp_new();
+    gen_load_gpr(t2, rs);
+    switch (opc) {
+    case OPC_MOVN:
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
+        break;
+    case OPC_MOVZ:
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
+        break;
+    case OPC_SELNEZ:
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, t1);
+        break;
+    case OPC_SELEQZ:
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, t1);
+        break;
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+}
+
+/* Logic */
+static void gen_logic(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
+{
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+
+    switch (opc) {
+    case OPC_AND:
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_and_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        break;
+    case OPC_NOR:
+        if (rs != 0 && rt != 0) {
+            tcg_gen_nor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], ~((target_ulong)0));
+        }
+        break;
+    case OPC_OR:
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_or_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        break;
+    case OPC_XOR:
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        break;
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+}
+
+/* Set on lower than */
+static void gen_slt(DisasContext *ctx, uint32_t opc,
+                    int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+    case OPC_SLT:
+        tcg_gen_setcond_tl(TCG_COND_LT, cpu_gpr[rd], t0, t1);
+        break;
+    case OPC_SLTU:
+        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t0, t1);
+        break;
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* Shifts */
+static void gen_shift(DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /*
+         * If no destination, treat it as a NOP.
+         * For add & sub, we must generate the overflow exception when needed.
+         */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+    case OPC_SLLV:
+        tcg_gen_andi_tl(t0, t0, 0x1f);
+        tcg_gen_shl_tl(t0, t1, t0);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        break;
+    case OPC_SRAV:
+        tcg_gen_andi_tl(t0, t0, 0x1f);
+        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
+        break;
+    case OPC_SRLV:
+        tcg_gen_ext32u_tl(t1, t1);
+        tcg_gen_andi_tl(t0, t0, 0x1f);
+        tcg_gen_shr_tl(t0, t1, t0);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        break;
+    case OPC_ROTRV:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_andi_i32(t2, t2, 0x1f);
+            tcg_gen_rotr_i32(t2, t3, t2);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DSLLV:
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_shl_tl(cpu_gpr[rd], t1, t0);
+        break;
+    case OPC_DSRAV:
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
+        break;
+    case OPC_DSRLV:
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_shr_tl(cpu_gpr[rd], t1, t0);
+        break;
+    case OPC_DROTRV:
+        tcg_gen_andi_tl(t0, t0, 0x3f);
+        tcg_gen_rotr_tl(cpu_gpr[rd], t1, t0);
+        break;
+#endif
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* Arithmetic on HI/LO registers */
+static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
+{
+    if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    if (acc != 0) {
+        check_dsp(ctx);
+    }
+
+    switch (opc) {
+    case OPC_MFHI:
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[acc]);
+        }
+        gen_log_instr_gpr_update(ctx, reg);
+        break;
+    case OPC_MFLO:
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[acc]);
+        }
+        gen_log_instr_gpr_update(ctx, reg);
+        break;
+    case OPC_MTHI:
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_HI[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_HI[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_HI[acc], 0);
+        }
+        gen_log_instr_hilo_update(ctx, /*hiLO*/0, acc, reg);
+        break;
+    case OPC_MTLO:
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_LO[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_LO[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_LO[acc], 0);
+        }
+        gen_log_instr_hilo_update(ctx, /*hiLO*/1, acc, reg);
+        break;
+    }
+}
+
+static inline void gen_r6_pcrel_ld(DisasContext *ctx, target_long addr, int reg,
+                                   int memidx, MemOp memop)
+{
+    TCGv t0 = tcg_const_tl(addr);
+    TCGv tval = tcg_temp_new();
+    generate_ccheck_load_pcrel(t0, memop_size(memop));
+    tcg_gen_qemu_ld_tl_with_checked_addr(tval, PCC_CHECKED(t0), memidx, memop);
+    gen_store_gpr(tval, reg);
+    tcg_temp_free(tval);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_pcrel(DisasContext *ctx, int opc, target_ulong pc,
+                             int rs)
+{
+    target_long offset;
+    target_long addr;
+
+    switch (MASK_OPC_PCREL_TOP2BITS(opc)) {
+    case OPC_ADDIUPC:
+        if (rs != 0) {
+            offset = sextract32(ctx->opcode << 2, 0, 21);
+            /* For CHERI the result is an offset relative to PC. */
+            addr = addr_add(ctx, pc - pcc_reloc(ctx), offset);
+            tcg_gen_movi_tl(cpu_gpr[rs], addr);
+            gen_log_instr_gpr_update(ctx, rs);
+        }
+        break;
+    case R6_OPC_LWPC:
+        offset = sextract32(ctx->opcode << 2, 0, 21);
+        addr = addr_add(ctx, pc, offset);
+        // For CHERI we check that the absolute PC address is within PCC
+        gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TESL);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_LWUPC:
+        check_mips_64(ctx);
+        offset = sextract32(ctx->opcode << 2, 0, 21);
+        addr = addr_add(ctx, pc, offset);
+        // For CHERI we check that the absolute PC address is within PCC
+        gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TEUL);
+        break;
+#endif
+    default:
+        switch (MASK_OPC_PCREL_TOP5BITS(opc)) {
+        case OPC_AUIPC:
+            if (rs != 0) {
+                offset = sextract32(ctx->opcode, 0, 16) << 16;
+                /* For CHERI the result is an offset relative to PC. */
+                addr = addr_add(ctx, pc - pcc_reloc(ctx), offset);
+                tcg_gen_movi_tl(cpu_gpr[rs], addr);
+                gen_log_instr_gpr_update(ctx, rs);
+            }
+            break;
+        case OPC_ALUIPC:
+            if (rs != 0) {
+                offset = sextract32(ctx->opcode, 0, 16) << 16;
+                /* For CHERI the result is an offset relative to PC. */
+                addr = ~0xFFFF & addr_add(ctx, pc - pcc_reloc(ctx), offset);
+                tcg_gen_movi_tl(cpu_gpr[rs], addr);
+                gen_log_instr_gpr_update(ctx, rs);
+            }
+            break;
+#if defined(TARGET_MIPS64)
+        case R6_OPC_LDPC: /* bits 16 and 17 are part of immediate */
+        case R6_OPC_LDPC + (1 << 16):
+        case R6_OPC_LDPC + (2 << 16):
+        case R6_OPC_LDPC + (3 << 16):
+            check_mips_64(ctx);
+            offset = sextract32(ctx->opcode << 3, 0, 21);
+            addr = addr_add(ctx, (pc & ~0x7), offset);
+            // For CHERI we check that the absolute PC address is within PCC
+            gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TEQ);
+            break;
+#endif
+        default:
+            MIPS_INVAL("OPC_PCREL");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    }
+}
+
+static void gen_r6_muldiv(DisasContext *ctx, int opc, int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case R6_OPC_DIV:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_MOD:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DIVU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_MODU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_MUL:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mul_i32(t2, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case R6_OPC_MUH:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case R6_OPC_MULU:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mul_i32(t2, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case R6_OPC_MUHU:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case R6_OPC_DDIV:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DMOD:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DDIVU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_i64(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DMODU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_remu_i64(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DMUL:
+        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
+        break;
+    case R6_OPC_DMUH:
+        {
+            TCGv t2 = tcg_temp_new();
+            tcg_gen_muls2_i64(t2, cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t2);
+        }
+        break;
+    case R6_OPC_DMULU:
+        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
+        break;
+    case R6_OPC_DMUHU:
+        {
+            TCGv t2 = tcg_temp_new();
+            tcg_gen_mulu2_i64(t2, cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t2);
+        }
+        break;
+#endif
+    default:
+        MIPS_INVAL("r6 mul/div");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+ out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+#if defined(TARGET_MIPS64)
+static void gen_div1_tx79(DisasContext *ctx, uint32_t opc, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case MMI_OPC_DIV1:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[1], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[1], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+            tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case MMI_OPC_DIVU1:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_tl(cpu_LO[1], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[1], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+            tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    default:
+        MIPS_INVAL("div1 TX79");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+ out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+#endif
+
+static void gen_muldiv(DisasContext *ctx, uint32_t opc,
+                       int acc, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    if (acc != 0) {
+        check_dsp(ctx);
+    }
+
+    switch (opc) {
+    case OPC_DIV:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
+            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case OPC_DIVU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[acc], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
+            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case OPC_MULT:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case OPC_MULTU:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DDIV:
+        {
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case OPC_DDIVU:
+        {
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+            tcg_gen_divu_i64(cpu_LO[acc], t0, t1);
+            tcg_gen_remu_i64(cpu_HI[acc], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
+        }
+        break;
+    case OPC_DMULT:
+        tcg_gen_muls2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
+        break;
+    case OPC_DMULTU:
+        tcg_gen_mulu2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
+        break;
+#endif
+    case OPC_MADD:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    case OPC_MADDU:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    case OPC_MSUB:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_sub_i64(t2, t3, t2);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    case OPC_MSUBU:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_sub_i64(t2, t3, t2);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    default:
+        MIPS_INVAL("mul/div");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+ out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/*
+ * These MULT[U] and MADD[U] instructions implemented in for example
+ * the Toshiba/Sony R5900 and the Toshiba TX19, TX39 and TX79 core
+ * architectures are special three-operand variants with the syntax
+ *
+ *     MULT[U][1] rd, rs, rt
+ *
+ * such that
+ *
+ *     (rd, LO, HI) <- rs * rt
+ *
+ * and
+ *
+ *     MADD[U][1] rd, rs, rt
+ *
+ * such that
+ *
+ *     (rd, LO, HI) <- (LO, HI) + rs * rt
+ *
+ * where the low-order 32-bits of the result is placed into both the
+ * GPR rd and the special register LO. The high-order 32-bits of the
+ * result is placed into the special register HI.
+ *
+ * If the GPR rd is omitted in assembly language, it is taken to be 0,
+ * which is the zero register that always reads as 0.
+ */
+static void gen_mul_txx9(DisasContext *ctx, uint32_t opc,
+                         int rd, int rs, int rt)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int acc = 0;
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case MMI_OPC_MULT1:
+        acc = 1;
+        /* Fall through */
+    case OPC_MULT:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_muls2_i32(t2, t3, t2, t3);
+            if (rd) {
+                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            }
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case MMI_OPC_MULTU1:
+        acc = 1;
+        /* Fall through */
+    case OPC_MULTU:
+        {
+            TCGv_i32 t2 = tcg_temp_new_i32();
+            TCGv_i32 t3 = tcg_temp_new_i32();
+            tcg_gen_trunc_tl_i32(t2, t0);
+            tcg_gen_trunc_tl_i32(t3, t1);
+            tcg_gen_mulu2_i32(t2, t3, t2, t3);
+            if (rd) {
+                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+            }
+            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
+            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
+            tcg_temp_free_i32(t2);
+            tcg_temp_free_i32(t3);
+        }
+        break;
+    case MMI_OPC_MADD1:
+        acc = 1;
+        /* Fall through */
+    case MMI_OPC_MADD:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            if (rd) {
+                gen_move_low32(cpu_gpr[rd], t2);
+            }
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    case MMI_OPC_MADDU1:
+        acc = 1;
+        /* Fall through */
+    case MMI_OPC_MADDU:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            gen_move_low32(cpu_LO[acc], t2);
+            gen_move_high32(cpu_HI[acc], t2);
+            if (rd) {
+                gen_move_low32(cpu_gpr[rd], t2);
+            }
+            tcg_temp_free_i64(t2);
+        }
+        break;
+    default:
+        MIPS_INVAL("mul/madd TXx9");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+    if (rd) {
+        gen_log_instr_gpr_update(ctx, rd);
+    }
+ out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_cl(DisasContext *ctx, uint32_t opc,
+                   int rd, int rs)
+{
+    TCGv t0;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = cpu_gpr[rd];
+    gen_load_gpr(t0, rs);
+
+    switch (opc) {
+    case OPC_CLO:
+    case R6_OPC_CLO:
+#if defined(TARGET_MIPS64)
+    case OPC_DCLO:
+    case R6_OPC_DCLO:
+#endif
+        tcg_gen_not_tl(t0, t0);
+        break;
+    }
+
+    switch (opc) {
+    case OPC_CLO:
+    case R6_OPC_CLO:
+    case OPC_CLZ:
+    case R6_OPC_CLZ:
+        tcg_gen_ext32u_tl(t0, t0);
+        tcg_gen_clzi_tl(t0, t0, TARGET_LONG_BITS);
+        tcg_gen_subi_tl(t0, t0, TARGET_LONG_BITS - 32);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DCLO:
+    case R6_OPC_DCLO:
+    case OPC_DCLZ:
+    case R6_OPC_DCLZ:
+        tcg_gen_clzi_i64(t0, t0, 64);
+        break;
+#endif
+    }
+}
+
+/* Godson integer instructions */
+static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
+                                 int rd, int rs, int rt)
+{
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+#endif
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    default:
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    }
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    switch (opc) {
+    case OPC_MULT_G_2E:
+    case OPC_MULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        break;
+    case OPC_MULTU_G_2E:
+    case OPC_MULTU_G_2F:
+        tcg_gen_ext32u_tl(t0, t0);
+        tcg_gen_ext32u_tl(t1, t1);
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        break;
+    case OPC_DIV_G_2E:
+    case OPC_DIV_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DIVU_G_2E:
+    case OPC_DIVU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        break;
+    case OPC_MOD_G_2E:
+    case OPC_MOD_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_MODU_G_2E:
+    case OPC_MODU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+            gen_set_label(l2);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT_G_2E:
+    case OPC_DMULT_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        break;
+    case OPC_DMULTU_G_2E:
+    case OPC_DMULTU_G_2F:
+        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+        break;
+    case OPC_DDIV_G_2E:
+    case OPC_DDIV_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DDIVU_G_2E:
+    case OPC_DDIVU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        break;
+    case OPC_DMOD_G_2E:
+    case OPC_DMOD_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            TCGLabel *l3 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            gen_set_label(l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l3);
+            gen_set_label(l2);
+            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l3);
+        }
+        break;
+    case OPC_DMODU_G_2E:
+    case OPC_DMODU_G_2F:
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
+            gen_set_label(l2);
+        }
+        break;
+#endif
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+#if !defined(TARGET_CHERI)
+/* Loongson multimedia instructions */
+static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
+{
+    uint32_t opc, shift_max;
+    TCGv_i64 t0, t1;
+    TCGCond cond;
+
+    opc = MASK_LMMI(ctx->opcode);
+    switch (opc) {
+    case OPC_ADD_CP2:
+    case OPC_SUB_CP2:
+    case OPC_DADD_CP2:
+    case OPC_DSUB_CP2:
+        t0 = tcg_temp_local_new_i64();
+        t1 = tcg_temp_local_new_i64();
+        break;
+    default:
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        break;
+    }
+
+    check_cp1_enabled(ctx);
+    gen_load_fpr64(ctx, t0, rs);
+    gen_load_fpr64(ctx, t1, rt);
+
+    switch (opc) {
+    case OPC_PADDSH:
+        gen_helper_paddsh(t0, t0, t1);
+        break;
+    case OPC_PADDUSH:
+        gen_helper_paddush(t0, t0, t1);
+        break;
+    case OPC_PADDH:
+        gen_helper_paddh(t0, t0, t1);
+        break;
+    case OPC_PADDW:
+        gen_helper_paddw(t0, t0, t1);
+        break;
+    case OPC_PADDSB:
+        gen_helper_paddsb(t0, t0, t1);
+        break;
+    case OPC_PADDUSB:
+        gen_helper_paddusb(t0, t0, t1);
+        break;
+    case OPC_PADDB:
+        gen_helper_paddb(t0, t0, t1);
+        break;
+
+    case OPC_PSUBSH:
+        gen_helper_psubsh(t0, t0, t1);
+        break;
+    case OPC_PSUBUSH:
+        gen_helper_psubush(t0, t0, t1);
+        break;
+    case OPC_PSUBH:
+        gen_helper_psubh(t0, t0, t1);
+        break;
+    case OPC_PSUBW:
+        gen_helper_psubw(t0, t0, t1);
+        break;
+    case OPC_PSUBSB:
+        gen_helper_psubsb(t0, t0, t1);
+        break;
+    case OPC_PSUBUSB:
+        gen_helper_psubusb(t0, t0, t1);
+        break;
+    case OPC_PSUBB:
+        gen_helper_psubb(t0, t0, t1);
+        break;
+
+    case OPC_PSHUFH:
+        gen_helper_pshufh(t0, t0, t1);
+        break;
+    case OPC_PACKSSWH:
+        gen_helper_packsswh(t0, t0, t1);
+        break;
+    case OPC_PACKSSHB:
+        gen_helper_packsshb(t0, t0, t1);
+        break;
+    case OPC_PACKUSHB:
+        gen_helper_packushb(t0, t0, t1);
+        break;
+
+    case OPC_PUNPCKLHW:
+        gen_helper_punpcklhw(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHHW:
+        gen_helper_punpckhhw(t0, t0, t1);
+        break;
+    case OPC_PUNPCKLBH:
+        gen_helper_punpcklbh(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHBH:
+        gen_helper_punpckhbh(t0, t0, t1);
+        break;
+    case OPC_PUNPCKLWD:
+        gen_helper_punpcklwd(t0, t0, t1);
+        break;
+    case OPC_PUNPCKHWD:
+        gen_helper_punpckhwd(t0, t0, t1);
+        break;
+
+    case OPC_PAVGH:
+        gen_helper_pavgh(t0, t0, t1);
+        break;
+    case OPC_PAVGB:
+        gen_helper_pavgb(t0, t0, t1);
+        break;
+    case OPC_PMAXSH:
+        gen_helper_pmaxsh(t0, t0, t1);
+        break;
+    case OPC_PMINSH:
+        gen_helper_pminsh(t0, t0, t1);
+        break;
+    case OPC_PMAXUB:
+        gen_helper_pmaxub(t0, t0, t1);
+        break;
+    case OPC_PMINUB:
+        gen_helper_pminub(t0, t0, t1);
+        break;
+
+    case OPC_PCMPEQW:
+        gen_helper_pcmpeqw(t0, t0, t1);
+        break;
+    case OPC_PCMPGTW:
+        gen_helper_pcmpgtw(t0, t0, t1);
+        break;
+    case OPC_PCMPEQH:
+        gen_helper_pcmpeqh(t0, t0, t1);
+        break;
+    case OPC_PCMPGTH:
+        gen_helper_pcmpgth(t0, t0, t1);
+        break;
+    case OPC_PCMPEQB:
+        gen_helper_pcmpeqb(t0, t0, t1);
+        break;
+    case OPC_PCMPGTB:
+        gen_helper_pcmpgtb(t0, t0, t1);
+        break;
+
+    case OPC_PSLLW:
+        gen_helper_psllw(t0, t0, t1);
+        break;
+    case OPC_PSLLH:
+        gen_helper_psllh(t0, t0, t1);
+        break;
+    case OPC_PSRLW:
+        gen_helper_psrlw(t0, t0, t1);
+        break;
+    case OPC_PSRLH:
+        gen_helper_psrlh(t0, t0, t1);
+        break;
+    case OPC_PSRAW:
+        gen_helper_psraw(t0, t0, t1);
+        break;
+    case OPC_PSRAH:
+        gen_helper_psrah(t0, t0, t1);
+        break;
+
+    case OPC_PMULLH:
+        gen_helper_pmullh(t0, t0, t1);
+        break;
+    case OPC_PMULHH:
+        gen_helper_pmulhh(t0, t0, t1);
+        break;
+    case OPC_PMULHUH:
+        gen_helper_pmulhuh(t0, t0, t1);
+        break;
+    case OPC_PMADDHW:
+        gen_helper_pmaddhw(t0, t0, t1);
+        break;
+
+    case OPC_PASUBUB:
+        gen_helper_pasubub(t0, t0, t1);
+        break;
+    case OPC_BIADD:
+        gen_helper_biadd(t0, t0);
+        break;
+    case OPC_PMOVMSKB:
+        gen_helper_pmovmskb(t0, t0);
+        break;
+
+    case OPC_PADDD:
+        tcg_gen_add_i64(t0, t0, t1);
+        break;
+    case OPC_PSUBD:
+        tcg_gen_sub_i64(t0, t0, t1);
+        break;
+    case OPC_XOR_CP2:
+        tcg_gen_xor_i64(t0, t0, t1);
+        break;
+    case OPC_NOR_CP2:
+        tcg_gen_nor_i64(t0, t0, t1);
+        break;
+    case OPC_AND_CP2:
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+    case OPC_OR_CP2:
+        tcg_gen_or_i64(t0, t0, t1);
+        break;
+
+    case OPC_PANDN:
+        tcg_gen_andc_i64(t0, t1, t0);
+        break;
+
+    case OPC_PINSRH_0:
+        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
+        break;
+    case OPC_PINSRH_1:
+        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
+        break;
+    case OPC_PINSRH_2:
+        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
+        break;
+    case OPC_PINSRH_3:
+        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
+        break;
+
+    case OPC_PEXTRH:
+        tcg_gen_andi_i64(t1, t1, 3);
+        tcg_gen_shli_i64(t1, t1, 4);
+        tcg_gen_shr_i64(t0, t0, t1);
+        tcg_gen_ext16u_i64(t0, t0);
+        break;
+
+    case OPC_ADDU_CP2:
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        break;
+    case OPC_SUBU_CP2:
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        break;
+
+    case OPC_SLL_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRL_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRA_CP2:
+        shift_max = 32;
+        goto do_shift;
+    case OPC_DSLL_CP2:
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRL_CP2:
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRA_CP2:
+        shift_max = 64;
+        goto do_shift;
+    do_shift:
+        /* Make sure shift count isn't TCG undefined behaviour.  */
+        tcg_gen_andi_i64(t1, t1, shift_max - 1);
+
+        switch (opc) {
+        case OPC_SLL_CP2:
+        case OPC_DSLL_CP2:
+            tcg_gen_shl_i64(t0, t0, t1);
+            break;
+        case OPC_SRA_CP2:
+        case OPC_DSRA_CP2:
+            /*
+             * Since SRA is UndefinedResult without sign-extended inputs,
+             * we can treat SRA and DSRA the same.
+             */
+            tcg_gen_sar_i64(t0, t0, t1);
+            break;
+        case OPC_SRL_CP2:
+            /* We want to shift in zeros for SRL; zero-extend first.  */
+            tcg_gen_ext32u_i64(t0, t0);
+            /* FALLTHRU */
+        case OPC_DSRL_CP2:
+            tcg_gen_shr_i64(t0, t0, t1);
+            break;
+        }
+
+        if (shift_max == 32) {
+            tcg_gen_ext32s_i64(t0, t0);
+        }
+
+        /* Shifts larger than MAX produce zero.  */
+        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
+        tcg_gen_neg_i64(t1, t1);
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+
+    case OPC_ADD_CP2:
+    case OPC_DADD_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGLabel *lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_add_i64(t0, t1, t2);
+            if (opc == OPC_ADD_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_andc_i64(t1, t2, t1);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+            break;
+        }
+
+    case OPC_SUB_CP2:
+    case OPC_DSUB_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGLabel *lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_sub_i64(t0, t1, t2);
+            if (opc == OPC_SUB_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_and_i64(t1, t1, t2);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+            break;
+        }
+
+    case OPC_PMULUW:
+        tcg_gen_ext32u_i64(t0, t0);
+        tcg_gen_ext32u_i64(t1, t1);
+        tcg_gen_mul_i64(t0, t0, t1);
+        break;
+
+    case OPC_SEQU_CP2:
+    case OPC_SEQ_CP2:
+        cond = TCG_COND_EQ;
+        goto do_cc_cond;
+        break;
+    case OPC_SLTU_CP2:
+        cond = TCG_COND_LTU;
+        goto do_cc_cond;
+        break;
+    case OPC_SLT_CP2:
+        cond = TCG_COND_LT;
+        goto do_cc_cond;
+        break;
+    case OPC_SLEU_CP2:
+        cond = TCG_COND_LEU;
+        goto do_cc_cond;
+        break;
+    case OPC_SLE_CP2:
+        cond = TCG_COND_LE;
+    do_cc_cond:
+        {
+            int cc = (ctx->opcode >> 8) & 0x7;
+            TCGv_i64 t64 = tcg_temp_new_i64();
+            TCGv_i32 t32 = tcg_temp_new_i32();
+
+            tcg_gen_setcond_i64(cond, t64, t0, t1);
+            tcg_gen_extrl_i64_i32(t32, t64);
+            tcg_gen_deposit_i32(fpu_fcr31, fpu_fcr31, t32,
+                                get_fp_bit(cc), 1);
+
+            tcg_temp_free_i32(t32);
+            tcg_temp_free_i64(t64);
+        }
+        goto no_rd;
+        break;
+    default:
+        MIPS_INVAL("loongson_cp2");
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    gen_store_fpr64(ctx, t0, rd);
+
+no_rd:
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static void gen_loongson_lswc2(DisasContext *ctx, int rt,
+                               int rs, int rd)
+{
+    TCGv t0, t1, t2;
+    TCGv_i32 fp0;
+#if defined(TARGET_MIPS64)
+    int lsq_rt1 = ctx->opcode & 0x1f;
+    int lsq_offset = sextract32(ctx->opcode, 6, 9) << 4;
+#endif
+    int shf_offset = sextract32(ctx->opcode, 6, 8);
+
+    t0 = tcg_temp_new();
+
+    switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) {
+#if defined(TARGET_MIPS64)
+    case OPC_GSLQ:
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_store_gpr(t1, rt);
+        gen_store_gpr(t0, lsq_rt1);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSLQC1:
+        check_cp1_enabled(ctx);
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_store_fpr64(ctx, t1, rt);
+        gen_store_fpr64(ctx, t0, lsq_rt1);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSQ:
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        gen_load_gpr(t1, lsq_rt1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSQC1:
+        check_cp1_enabled(ctx);
+        t1 = tcg_temp_new();
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
+        gen_load_fpr64(ctx, t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
+        gen_load_fpr64(ctx, t1, lsq_rt1);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    case OPC_GSSHFL:
+        switch (MASK_LOONGSON_GSSHFLS(ctx->opcode)) {
+        case OPC_GSLWLC1:
+            check_cp1_enabled(ctx);
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            t1 = tcg_temp_new();
+            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+            tcg_gen_andi_tl(t1, t0, 3);
+            if (!cpu_is_bigendian(ctx)) {
+                tcg_gen_xori_tl(t1, t1, 3);
+            }
+            tcg_gen_shli_tl(t1, t1, 3);
+            tcg_gen_andi_tl(t0, t0, ~3);
+            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
+            tcg_gen_shl_tl(t0, t0, t1);
+            t2 = tcg_const_tl(-1);
+            tcg_gen_shl_tl(t2, t2, t1);
+            fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, rt);
+            tcg_gen_ext_i32_tl(t1, fp0);
+            tcg_gen_andc_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+#if defined(TARGET_MIPS64)
+            tcg_gen_extrl_i64_i32(fp0, t0);
+#else
+            tcg_gen_ext32s_tl(fp0, t0);
+#endif
+            gen_store_fpr32(ctx, fp0, rt);
+            tcg_temp_free_i32(fp0);
+            break;
+        case OPC_GSLWRC1:
+            check_cp1_enabled(ctx);
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            t1 = tcg_temp_new();
+            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+            tcg_gen_andi_tl(t1, t0, 3);
+            if (cpu_is_bigendian(ctx)) {
+                tcg_gen_xori_tl(t1, t1, 3);
+            }
+            tcg_gen_shli_tl(t1, t1, 3);
+            tcg_gen_andi_tl(t0, t0, ~3);
+            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
+            tcg_gen_shr_tl(t0, t0, t1);
+            tcg_gen_xori_tl(t1, t1, 31);
+            t2 = tcg_const_tl(0xfffffffeull);
+            tcg_gen_shl_tl(t2, t2, t1);
+            fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, rt);
+            tcg_gen_ext_i32_tl(t1, fp0);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+#if defined(TARGET_MIPS64)
+            tcg_gen_extrl_i64_i32(fp0, t0);
+#else
+            tcg_gen_ext32s_tl(fp0, t0);
+#endif
+            gen_store_fpr32(ctx, fp0, rt);
+            tcg_temp_free_i32(fp0);
+            break;
+#if defined(TARGET_MIPS64)
+        case OPC_GSLDLC1:
+            check_cp1_enabled(ctx);
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            t1 = tcg_temp_new();
+            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+            tcg_gen_andi_tl(t1, t0, 7);
+            if (!cpu_is_bigendian(ctx)) {
+                tcg_gen_xori_tl(t1, t1, 7);
+            }
+            tcg_gen_shli_tl(t1, t1, 3);
+            tcg_gen_andi_tl(t0, t0, ~7);
+            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
+            tcg_gen_shl_tl(t0, t0, t1);
+            t2 = tcg_const_tl(-1);
+            tcg_gen_shl_tl(t2, t2, t1);
+            gen_load_fpr64(ctx, t1, rt);
+            tcg_gen_andc_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+            gen_store_fpr64(ctx, t0, rt);
+            break;
+        case OPC_GSLDRC1:
+            check_cp1_enabled(ctx);
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            t1 = tcg_temp_new();
+            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
+            tcg_gen_andi_tl(t1, t0, 7);
+            if (cpu_is_bigendian(ctx)) {
+                tcg_gen_xori_tl(t1, t1, 7);
+            }
+            tcg_gen_shli_tl(t1, t1, 3);
+            tcg_gen_andi_tl(t0, t0, ~7);
+            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
+            tcg_gen_shr_tl(t0, t0, t1);
+            tcg_gen_xori_tl(t1, t1, 63);
+            t2 = tcg_const_tl(0xfffffffffffffffeull);
+            tcg_gen_shl_tl(t2, t2, t1);
+            gen_load_fpr64(ctx, t1, rt);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+            gen_store_fpr64(ctx, t0, rt);
+            break;
+#endif
+        default:
+            MIPS_INVAL("loongson_gsshfl");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_GSSHFS:
+        switch (MASK_LOONGSON_GSSHFLS(ctx->opcode)) {
+        case OPC_GSSWLC1:
+            check_cp1_enabled(ctx);
+            t1 = tcg_temp_new();
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, rt);
+            tcg_gen_ext_i32_tl(t1, fp0);
+            gen_helper_0e2i(swl, t1, t0, ctx->mem_idx);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free(t1);
+            break;
+        case OPC_GSSWRC1:
+            check_cp1_enabled(ctx);
+            t1 = tcg_temp_new();
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, rt);
+            tcg_gen_ext_i32_tl(t1, fp0);
+            gen_helper_0e2i(swr, t1, t0, ctx->mem_idx);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free(t1);
+            break;
+#if defined(TARGET_MIPS64)
+        case OPC_GSSDLC1:
+            check_cp1_enabled(ctx);
+            t1 = tcg_temp_new();
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            gen_load_fpr64(ctx, t1, rt);
+            gen_helper_0e2i(sdl, t1, t0, ctx->mem_idx);
+            tcg_temp_free(t1);
+            break;
+        case OPC_GSSDRC1:
+            check_cp1_enabled(ctx);
+            t1 = tcg_temp_new();
+            gen_base_offset_addr(ctx, t0, rs, shf_offset);
+            gen_load_fpr64(ctx, t1, rt);
+            gen_helper_0e2i(sdr, t1, t0, ctx->mem_idx);
+            tcg_temp_free(t1);
+            break;
+#endif
+        default:
+            MIPS_INVAL("loongson_gsshfs");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    default:
+        MIPS_INVAL("loongson_gslsq");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+    tcg_temp_free(t0);
+}
+
+/* Loongson EXT LDC2/SDC2 */
+static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
+                               int rs, int rd)
+{
+    int offset = sextract32(ctx->opcode, 3, 8);
+    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
+    TCGv t0, t1;
+    TCGv_i32 fp0;
+
+    /* Pre-conditions */
+    switch (opc) {
+    case OPC_GSLBX:
+    case OPC_GSLHX:
+    case OPC_GSLWX:
+    case OPC_GSLDX:
+        /* prefetch, implement as NOP */
+        if (rt == 0) {
+            return;
+        }
+        break;
+    case OPC_GSSBX:
+    case OPC_GSSHX:
+    case OPC_GSSWX:
+    case OPC_GSSDX:
+        break;
+    case OPC_GSLWXC1:
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDXC1:
+#endif
+        check_cp1_enabled(ctx);
+        /* prefetch, implement as NOP */
+        if (rt == 0) {
+            return;
+        }
+        break;
+    case OPC_GSSWXC1:
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDXC1:
+#endif
+        check_cp1_enabled(ctx);
+        break;
+    default:
+        MIPS_INVAL("loongson_lsdc2");
+        gen_reserved_instruction(ctx);
+        return;
+        break;
+    }
+
+    t0 = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, t0, rs, offset);
+    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+
+    switch (opc) {
+    case OPC_GSLBX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_GSLHX:
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
+                           ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_GSLWX:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
+                           ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDX:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_store_gpr(t0, rt);
+        break;
+#endif
+    case OPC_GSLWXC1:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        fp0 = tcg_temp_new_i32();
+        tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
+                            ctx->default_tcg_memop_mask);
+        gen_store_fpr32(ctx, fp0, rt);
+        tcg_temp_free_i32(fp0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSLDXC1:
+        gen_base_offset_addr(ctx, t0, rs, offset);
+        if (rd) {
+            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
+        }
+        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        gen_store_fpr64(ctx, t0, rt);
+        break;
+#endif
+    case OPC_GSSBX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSHX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
+                           ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+    case OPC_GSSWX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
+                           ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDX:
+        t1 = tcg_temp_new();
+        gen_load_gpr(t1, rt);
+        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
+                           ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    case OPC_GSSWXC1:
+        fp0 = tcg_temp_new_i32();
+        gen_load_fpr32(ctx, fp0, rt);
+        tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free_i32(fp0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_GSSDXC1:
+        t1 = tcg_temp_new();
+        gen_load_fpr64(ctx, t1, rt);
+        tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ |
+                            ctx->default_tcg_memop_mask);
+        tcg_temp_free(t1);
+        break;
+#endif
+    default:
+        break;
+    }
+
+    tcg_temp_free(t0);
+}
+#endif /* !defined(TARGET_CHERI) */
+
+/* Traps */
+static void gen_trap(DisasContext *ctx, uint32_t opc,
+                     int rs, int rt, int16_t imm)
+{
+    int cond;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    cond = 0;
+    /* Load needed operands */
+    switch (opc) {
+    case OPC_TEQ:
+    case OPC_TGE:
+    case OPC_TGEU:
+    case OPC_TLT:
+    case OPC_TLTU:
+    case OPC_TNE:
+        /* Compare two registers */
+        if (rs != rt) {
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            cond = 1;
+        }
+        break;
+    case OPC_TEQI:
+    case OPC_TGEI:
+    case OPC_TGEIU:
+    case OPC_TLTI:
+    case OPC_TLTIU:
+    case OPC_TNEI:
+        /* Compare register to immediate */
+        if (rs != 0 || imm != 0) {
+            gen_load_gpr(t0, rs);
+            tcg_gen_movi_tl(t1, (int32_t)imm);
+            cond = 1;
+        }
+        break;
+    }
+    if (cond == 0) {
+        switch (opc) {
+        case OPC_TEQ:   /* rs == rs */
+        case OPC_TEQI:  /* r0 == 0  */
+        case OPC_TGE:   /* rs >= rs */
+        case OPC_TGEI:  /* r0 >= 0  */
+        case OPC_TGEU:  /* rs >= rs unsigned */
+        case OPC_TGEIU: /* r0 >= 0  unsigned */
+            /* Always trap */
+            generate_exception_end(ctx, EXCP_TRAP);
+            break;
+        case OPC_TLT:   /* rs < rs           */
+        case OPC_TLTI:  /* r0 < 0            */
+        case OPC_TLTU:  /* rs < rs unsigned  */
+        case OPC_TLTIU: /* r0 < 0  unsigned  */
+        case OPC_TNE:   /* rs != rs          */
+        case OPC_TNEI:  /* r0 != 0           */
+            /* Never trap: treat as NOP. */
+            break;
+        }
+    } else {
+        TCGLabel *l1 = gen_new_label();
+
+        switch (opc) {
+        case OPC_TEQ:
+        case OPC_TEQI:
+            tcg_gen_brcond_tl(TCG_COND_NE, t0, t1, l1);
+            break;
+        case OPC_TGE:
+        case OPC_TGEI:
+            tcg_gen_brcond_tl(TCG_COND_LT, t0, t1, l1);
+            break;
+        case OPC_TGEU:
+        case OPC_TGEIU:
+            tcg_gen_brcond_tl(TCG_COND_LTU, t0, t1, l1);
+            break;
+        case OPC_TLT:
+        case OPC_TLTI:
+            tcg_gen_brcond_tl(TCG_COND_GE, t0, t1, l1);
+            break;
+        case OPC_TLTU:
+        case OPC_TLTIU:
+            tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
+            break;
+        case OPC_TNE:
+        case OPC_TNEI:
+            tcg_gen_brcond_tl(TCG_COND_EQ, t0, t1, l1);
+            break;
+        }
+        generate_exception(ctx, EXCP_TRAP);
+        gen_set_label(l1);
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
+{
+    if (translator_use_goto_tb(&ctx->base, dest)) {
+        tcg_gen_goto_tb(n);
+        gen_save_pc(dest);
+        tcg_gen_exit_tb(ctx->base.tb, n);
+    } else {
+        gen_save_pc(dest);
+        tcg_gen_lookup_and_goto_ptr();
+    }
+}
+
+/* Branches (before delay slot) */
+static void gen_compute_branch(DisasContext *ctx, uint32_t opc,
+                               int insn_bytes,
+                               int rs, int rt, int32_t offset,
+                               int delayslot_size)
+{
+    target_ulong btgt = -1;
+    int blink = 0;
+    int bcond_compute = 0;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    // Note: For CHERI btgt is an absolute address not an offset relative
+    // to PCC.base.
+#if defined(TARGET_CHERI)
+    bool btarget_checked = false;
+    // Some debug assertions to ensure all branch cases are bounds checked
+#define SET_BTARGET_CHECKED(value) \
+    do { tcg_debug_assert(!btarget_checked); btarget_checked = value; } while (false)
+#else
+#define SET_BTARGET_CHECKED(value)
+#endif // defined(TARGET_CHERI)
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+#ifdef MIPS_DEBUG_DISAS
+        LOG_DISAS("Branch in delay / forbidden slot at PC 0x"
+                  TARGET_FMT_lx "\n", ctx->base.pc_next);
+#endif
+        gen_reserved_instruction(ctx);
+        SET_BTARGET_CHECKED(true); // exception raised -> no need to check
+        goto out;
+    }
+
+    /* Load needed operands */
+    switch (opc) {
+    case OPC_BEQ:
+    case OPC_BEQL:
+    case OPC_BNE:
+    case OPC_BNEL:
+        /* Compare two registers */
+        if (rs != rt) {
+            gen_load_gpr(t0, rs);
+            gen_load_gpr(t1, rt);
+            bcond_compute = 1;
+        }
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_BGEZ:
+    case OPC_BGEZAL:
+    case OPC_BGEZALL:
+    case OPC_BGEZL:
+    case OPC_BGTZ:
+    case OPC_BGTZL:
+    case OPC_BLEZ:
+    case OPC_BLEZL:
+    case OPC_BLTZ:
+    case OPC_BLTZAL:
+    case OPC_BLTZALL:
+    case OPC_BLTZL:
+        /* Compare to zero */
+        if (rs != 0) {
+            gen_load_gpr(t0, rs);
+            bcond_compute = 1;
+        }
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_BPOSGE32:
+#if defined(TARGET_MIPS64)
+    case OPC_BPOSGE64:
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x7F);
+#else
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
+#endif
+        bcond_compute = 1;
+        btgt = ctx->base.pc_next + insn_bytes + offset;
+        break;
+    case OPC_J:
+    case OPC_JAL:
+#ifndef TARGET_CHERI
+    case OPC_JALX:
+#endif
+        /* Jump to immediate (taking PCC.base into account) */
+        btgt = (((ctx->base.pc_next + insn_bytes - pcc_reloc(ctx)) &
+                 (int32_t)0xF0000000) |
+                (uint32_t)offset) +
+               pcc_reloc(ctx);
+        break;
+    case OPC_JR:
+    case OPC_JALR:
+        /* Jump to register */
+        if (offset != 0 && offset != 16) {
+            /*
+             * Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
+             * others are reserved.
+             */
+            MIPS_INVAL("jump hint");
+            gen_reserved_instruction(ctx);
+            SET_BTARGET_CHECKED(true); // exception raised -> no need to check
+            goto out;
+        }
+        gen_load_gpr(btarget, rs);
+#ifdef TARGET_CHERI
+        /* Add PCC.base to rs (jr/jalr is relative to PCC) */
+        tcg_gen_addi_tl(btarget, btarget, pcc_reloc(ctx));
+#endif /* TARGET_CHERI */
+        gen_check_branch_target_dynamic(ctx, btarget);
+        SET_BTARGET_CHECKED(true);
+        break;
+    default:
+        MIPS_INVAL("branch/jump");
+        gen_reserved_instruction(ctx);
+        SET_BTARGET_CHECKED(true); // exception raised -> no need to check
+        goto out;
+    }
+    if (bcond_compute == 0) {
+        /* No condition to be computed */
+        switch (opc) {
+        case OPC_BEQ:     /* rx == rx        */
+        case OPC_BEQL:    /* rx == rx likely */
+        case OPC_BGEZ:    /* 0 >= 0          */
+        case OPC_BGEZL:   /* 0 >= 0 likely   */
+        case OPC_BLEZ:    /* 0 <= 0          */
+        case OPC_BLEZL:   /* 0 <= 0 likely   */
+            /* Always take */
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_BGEZAL:  /* 0 >= 0          */
+        case OPC_BGEZALL: /* 0 >= 0 likely   */
+            /* Always take and link */
+            blink = 31;
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_BNE:     /* rx != rx        */
+        case OPC_BGTZ:    /* 0 > 0           */
+        case OPC_BLTZ:    /* 0 < 0           */
+            /* Treat as NOP. */
+            SET_BTARGET_CHECKED(true); // not taken -> no need to check
+            goto out;
+        case OPC_BLTZAL:  /* 0 < 0           */
+            /*
+             * Handle as an unconditional branch to get correct delay
+             * slot checking.
+             */
+            blink = 31;
+            btgt = ctx->base.pc_next + insn_bytes + delayslot_size;
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_BLTZALL: /* 0 < 0 likely */
+            /* For CHERI, we have to subtract PCC.base from r31 */
+            tcg_gen_movi_tl(cpu_gpr[31],
+                            ctx->base.pc_next + 8 - pcc_reloc(ctx));
+            gen_log_instr_gpr_update(ctx, 31);
+            /* Skip the instruction in the delay slot */
+            ctx->base.pc_next += 4;
+            SET_BTARGET_CHECKED(true); // not taken -> no need to check
+            goto out;
+        case OPC_BNEL:    /* rx != rx likely */
+        case OPC_BGTZL:   /* 0 > 0 likely */
+        case OPC_BLTZL:   /* 0 < 0 likely */
+            /* Skip the instruction in the delay slot */
+            ctx->base.pc_next += 4;
+            SET_BTARGET_CHECKED(true); // not taken -> no need to check
+            goto out;
+        case OPC_J:
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+#ifndef TARGET_CHERI
+        case OPC_JALX:
+            ctx->hflags |= MIPS_HFLAG_BX;
+            /* Fallthrough */
+#endif
+        case OPC_JAL:
+            blink = 31;
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        case OPC_JR:
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        case OPC_JALR:
+            blink = rt;
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        default:
+            MIPS_INVAL("branch/jump");
+            gen_reserved_instruction(ctx);
+            SET_BTARGET_CHECKED(true); // exception raised -> no need to check
+            goto out;
+        }
+    } else {
+        switch (opc) {
+        case OPC_BEQ:
+            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
+            goto not_likely;
+        case OPC_BEQL:
+            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
+            goto likely;
+        case OPC_BNE:
+            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
+            goto not_likely;
+        case OPC_BNEL:
+            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
+            goto likely;
+        case OPC_BGEZ:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
+            goto not_likely;
+        case OPC_BGEZL:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
+            goto likely;
+        case OPC_BGEZAL:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
+            blink = 31;
+            goto not_likely;
+        case OPC_BGEZALL:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
+            blink = 31;
+            goto likely;
+        case OPC_BGTZ:
+            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
+            goto not_likely;
+        case OPC_BGTZL:
+            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
+            goto likely;
+        case OPC_BLEZ:
+            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
+            goto not_likely;
+        case OPC_BLEZL:
+            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
+            goto likely;
+        case OPC_BLTZ:
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
+            goto not_likely;
+        case OPC_BLTZL:
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
+            goto likely;
+        case OPC_BPOSGE32:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
+            goto not_likely;
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 64);
+            goto not_likely;
+#endif
+        case OPC_BLTZAL:
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
+            blink = 31;
+        not_likely:
+            ctx->hflags |= MIPS_HFLAG_BC;
+            break;
+        case OPC_BLTZALL:
+            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
+            blink = 31;
+        likely:
+            ctx->hflags |= MIPS_HFLAG_BL;
+            break;
+        default:
+            MIPS_INVAL("conditional branch/jump");
+            gen_reserved_instruction(ctx);
+            SET_BTARGET_CHECKED(true); // not taken -> no need to check
+            goto out;
+        }
+    }
+
+    ctx->btarget = btgt;
+
+#ifdef TARGET_CHERI
+    if (bcond_compute) {
+        // Check that the conditional branch target is in range (but only if the branch is taken)
+        tcg_debug_assert(btgt != -1 && "btgt should have been set!");
+        gen_check_cond_branch_target(ctx, bcond, btgt);
+        SET_BTARGET_CHECKED(true);
+    } else if (!btarget_checked) {
+        tcg_debug_assert(btgt != -1 && "btgt should have been set!");
+        gen_check_branch_target(ctx, btgt);
+        SET_BTARGET_CHECKED(true);
+    }
+#endif
+
+    switch (delayslot_size) {
+    case 2:
+        ctx->hflags |= MIPS_HFLAG_BDS16;
+        break;
+    case 4:
+        ctx->hflags |= MIPS_HFLAG_BDS32;
+        break;
+    }
+
+    if (blink > 0) {
+        int post_delay = insn_bytes + delayslot_size;
+        int lowbit = !!(ctx->hflags & MIPS_HFLAG_M16);
+
+        tcg_gen_movi_tl(cpu_gpr[blink],
+                        /* Subtract PCC.base from r[blink] */
+                        ctx->base.pc_next + post_delay + lowbit -
+                            pcc_reloc(ctx));
+        gen_log_instr_gpr_update(ctx, blink);
+    }
+
+ out:
+    if (insn_bytes == 2) {
+        ctx->hflags |= MIPS_HFLAG_B16;
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+#ifdef TARGET_CHERI
+    tcg_debug_assert(btarget_checked);
+#endif
+
+}
+
+/* special3 bitfield operations */
+static void gen_bitops(DisasContext *ctx, uint32_t opc, int rt,
+                       int rs, int lsb, int msb)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_gpr(t1, rs);
+    switch (opc) {
+    case OPC_EXT:
+        if (lsb + msb > 31) {
+            goto fail;
+        }
+        if (msb != 31) {
+            tcg_gen_extract_tl(t0, t1, lsb, msb + 1);
+        } else {
+            /*
+             * The two checks together imply that lsb == 0,
+             * so this is a simple sign-extension.
+             */
+            tcg_gen_ext32s_tl(t0, t1);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DEXTU:
+        lsb += 32;
+        goto do_dext;
+    case OPC_DEXTM:
+        msb += 32;
+        goto do_dext;
+    case OPC_DEXT:
+    do_dext:
+        if (lsb + msb > 63) {
+            goto fail;
+        }
+        tcg_gen_extract_tl(t0, t1, lsb, msb + 1);
+        break;
+#endif
+    case OPC_INS:
+        if (lsb > msb) {
+            goto fail;
+        }
+        gen_load_gpr(t0, rt);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
+        tcg_gen_ext32s_tl(t0, t0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DINSU:
+        lsb += 32;
+        /* FALLTHRU */
+    case OPC_DINSM:
+        msb += 32;
+        /* FALLTHRU */
+    case OPC_DINS:
+        if (lsb > msb) {
+            goto fail;
+        }
+        gen_load_gpr(t0, rt);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
+        break;
+#endif
+    default:
+fail:
+        MIPS_INVAL("bitops");
+        gen_reserved_instruction(ctx);
+        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+        return;
+    }
+    gen_store_gpr(t0, rt);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_bshfl(DisasContext *ctx, uint32_t op2, int rt, int rd)
+{
+    TCGv t0;
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    switch (op2) {
+    case OPC_WSBH:
+        {
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_const_tl(0x00FF00FF);
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_gen_and_tl(t0, t0, t2);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t2);
+            tcg_temp_free(t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        }
+        break;
+    case OPC_SEB:
+        tcg_gen_ext8s_tl(cpu_gpr[rd], t0);
+        break;
+    case OPC_SEH:
+        tcg_gen_ext16s_tl(cpu_gpr[rd], t0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DSBH:
+        {
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_const_tl(0x00FF00FF00FF00FFULL);
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_gen_and_tl(t0, t0, t2);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t2);
+            tcg_temp_free(t1);
+        }
+        break;
+    case OPC_DSHD:
+        {
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_const_tl(0x0000FFFF0000FFFFULL);
+
+            tcg_gen_shri_tl(t1, t0, 16);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_gen_and_tl(t0, t0, t2);
+            tcg_gen_shli_tl(t0, t0, 16);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_shri_tl(t1, t0, 32);
+            tcg_gen_shli_tl(t0, t0, 32);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t2);
+            tcg_temp_free(t1);
+        }
+        break;
+#endif
+    default:
+        MIPS_INVAL("bsfhl");
+        gen_reserved_instruction(ctx);
+        tcg_temp_free(t0);
+        return;
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+    tcg_temp_free(t0);
+}
+
+static void gen_align_bits(DisasContext *ctx, int wordsz, int rd, int rs,
+                           int rt, int bits)
+{
+    TCGv t0;
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    if (bits == 0 || bits == wordsz) {
+        if (bits == 0) {
+            gen_load_gpr(t0, rt);
+        } else {
+            gen_load_gpr(t0, rs);
+        }
+        switch (wordsz) {
+        case 32:
+            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+            break;
+#if defined(TARGET_MIPS64)
+        case 64:
+            tcg_gen_mov_tl(cpu_gpr[rd], t0);
+            break;
+#endif
+        }
+    } else {
+        TCGv t1 = tcg_temp_new();
+        gen_load_gpr(t0, rt);
+        gen_load_gpr(t1, rs);
+        switch (wordsz) {
+        case 32:
+            {
+                TCGv_i64 t2 = tcg_temp_new_i64();
+                tcg_gen_concat_tl_i64(t2, t1, t0);
+                tcg_gen_shri_i64(t2, t2, 32 - bits);
+                gen_move_low32(cpu_gpr[rd], t2);
+                tcg_temp_free_i64(t2);
+            }
+            break;
+#if defined(TARGET_MIPS64)
+        case 64:
+            tcg_gen_shli_tl(t0, t0, bits);
+            tcg_gen_shri_tl(t1, t1, 64 - bits);
+            tcg_gen_or_tl(cpu_gpr[rd], t1, t0);
+            break;
+#endif
+        }
+        tcg_temp_free(t1);
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+
+    tcg_temp_free(t0);
+}
+
+void gen_align(DisasContext *ctx, int wordsz, int rd, int rs, int rt, int bp)
+{
+    gen_align_bits(ctx, wordsz, rd, rs, rt, bp * 8);
+}
+
+static void gen_bitswap(DisasContext *ctx, int opc, int rd, int rt)
+{
+    TCGv t0;
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    switch (opc) {
+    case OPC_BITSWAP:
+        gen_helper_bitswap(cpu_gpr[rd], t0);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DBITSWAP:
+        gen_helper_dbitswap(cpu_gpr[rd], t0);
+        break;
+#endif
+    }
+    gen_log_instr_gpr_update(ctx, rd);
+
+    tcg_temp_free(t0);
+}
+
+#ifndef CONFIG_USER_ONLY
+/* CP0 (MMU and control) */
+static inline void gen_mthc0_entrylo(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t0, arg);
+    tcg_gen_ld_i64(t1, cpu_env, off);
+#if defined(TARGET_MIPS64)
+    tcg_gen_deposit_i64(t1, t1, t0, 30, 32);
+#else
+    tcg_gen_concat32_i64(t1, t1, t0);
+#endif
+    tcg_gen_st_i64(t1, cpu_env, off);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mthc0_store64(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t0, arg);
+    tcg_gen_ld_i64(t1, cpu_env, off);
+    tcg_gen_concat32_i64(t1, t1, t0);
+    tcg_gen_st_i64(t1, cpu_env, off);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mfhc0_entrylo(TCGv arg, target_ulong off)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(t0, cpu_env, off);
+#if defined(TARGET_MIPS64)
+    tcg_gen_shri_i64(t0, t0, 30);
+#else
+    tcg_gen_shri_i64(t0, t0, 32);
+#endif
+    gen_move_low32(arg, t0);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mfhc0_load64(TCGv arg, target_ulong off, int shift)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(t0, cpu_env, off);
+    tcg_gen_shri_i64(t0, t0, 32 + shift);
+    gen_move_low32(arg, t0);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_mfc0_load32(TCGv arg, target_ulong off)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    tcg_gen_ld_i32(t0, cpu_env, off);
+    tcg_gen_ext_i32_tl(arg, t0);
+    tcg_temp_free_i32(t0);
+}
+
+static inline void gen_mfc0_load64(TCGv arg, target_ulong off)
+{
+    tcg_gen_ld_tl(arg, cpu_env, off);
+    tcg_gen_ext32s_tl(arg, arg);
+}
+
+static inline void gen_mtc0_store32(
+    DisasContext *ctx, TCGv arg, int reg, int sel, target_ulong off)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, arg);
+    tcg_gen_st_i32(t0, cpu_env, off);
+    tcg_temp_free_i32(t0);
+    gen_log_instr_cop0_update(ctx, reg, sel, off);
+}
+
+static inline void gen_mtc0_store(
+    DisasContext *ctx, TCGv arg, int reg, int sel, target_ulong off)
+{
+    tcg_gen_st_tl(arg, cpu_env, off);
+    gen_log_instr_cop0_update(ctx, reg, sel, off);
+}
+
+#define CP0_CHECK(c)                            \
+    do {                                        \
+        if (!(c)) {                             \
+            goto cp0_unimplemented;             \
+        }                                       \
+    } while (0)
+
+static void gen_mfhc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+
+    switch (reg) {
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case 0:
+            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
+            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
+            register_name = "EntryLo0";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
+            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
+            register_name = "EntryLo1";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mfhc0_saar(arg, cpu_env);
+            register_name = "SAAR";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_LLAddr),
+                             ctx->CP0_LLAddr_shift);
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mfhc0_maar(arg, cpu_env);
+            register_name = "MAAR";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            /* upper 32 bits are only available when Config5MI != 0 */
+            CP0_CHECK(ctx->mi);
+            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_WatchHi[sel]), 0);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case 0:
+        case 2:
+        case 4:
+        case 6:
+            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_TagLo), 0);
+            register_name = "TagLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+        goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("mfhc0", register_name, reg, sel);
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "mfhc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+    tcg_gen_movi_tl(arg, 0);
+}
+
+static void gen_mthc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+    uint64_t mask = ctx->PAMask >> 36;
+
+    switch (reg) {
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case 0:
+            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
+            register_name = "EntryLo0";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
+            register_name = "EntryLo1";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mthc0_saar(cpu_env, arg);
+            register_name = "SAAR";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            /*
+             * LLAddr is read-only (the only exception is bit 0 if LLB is
+             * supported); the CP0_LLAddr_rw_bitmask does not seem to be
+             * relevant for modern MIPS cores supporting MTHC0, therefore
+             * treating MTHC0 to LLAddr as NOP.
+             */
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mthc0_maar(cpu_env, arg);
+            register_name = "MAAR";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            /* upper 32 bits are only available when Config5MI != 0 */
+            CP0_CHECK(ctx->mi);
+            gen_helper_0e1i(mthc0_watchhi, arg, sel);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case 0:
+        case 2:
+        case 4:
+        case 6:
+            tcg_gen_andi_tl(arg, arg, mask);
+            gen_mthc0_store64(arg, offsetof(CPUMIPSState, CP0_TagLo));
+            register_name = "TagLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+        goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("mthc0", register_name, reg, sel);
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "mthc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+}
+
+static inline void gen_mfc0_unimplemented(DisasContext *ctx, TCGv arg)
+{
+    if (ctx->insn_flags & ISA_MIPS_R6) {
+        tcg_gen_movi_tl(arg, 0);
+    } else {
+        tcg_gen_movi_tl(arg, ~0);
+    }
+}
+
+static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+
+#ifndef TARGET_CHERI
+    if (sel != 0) {
+        check_insn(ctx, ISA_MIPS_R1);
+    }
+#endif
+
+    switch (reg) {
+    case CP0_REGISTER_00:
+        switch (sel) {
+        case CP0_REG00__INDEX:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Index));
+            register_name = "Index";
+            break;
+        case CP0_REG00__MVPCONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
+            register_name = "MVPControl";
+            break;
+        case CP0_REG00__MVPCONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
+            register_name = "MVPConf0";
+            break;
+        case CP0_REG00__MVPCONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
+            register_name = "MVPConf1";
+            break;
+        case CP0_REG00__VPCONTROL:
+            CP0_CHECK(ctx->vp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPControl));
+            register_name = "VPControl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_01:
+        switch (sel) {
+        case CP0_REG01__RANDOM:
+            CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+            gen_helper_mfc0_random(arg, cpu_env);
+            register_name = "Random";
+            break;
+        case CP0_REG01__VPECONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
+            register_name = "VPEControl";
+            break;
+        case CP0_REG01__VPECONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
+            register_name = "VPEConf0";
+            break;
+        case CP0_REG01__VPECONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
+            register_name = "VPEConf1";
+            break;
+        case CP0_REG01__YQMASK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_YQMask));
+            register_name = "YQMask";
+            break;
+        case CP0_REG01__VPESCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPESchedule));
+            register_name = "VPESchedule";
+            break;
+        case CP0_REG01__VPESCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPEScheFBack));
+            register_name = "VPEScheFBack";
+            break;
+        case CP0_REG01__VPEOPT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
+            register_name = "VPEOpt";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case CP0_REG02__ENTRYLO0:
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env,
+                               offsetof(CPUMIPSState, CP0_EntryLo0));
+#if defined(TARGET_MIPS64)
+                if (ctx->rxi) {
+                    /* Move RI/XI fields to bits 31:30 */
+#if defined(TARGET_CHERI)
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_L);
+#else
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
+#endif /* TARGET_CHERI */
+                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
+                }
+#endif
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
+            register_name = "EntryLo0";
+            break;
+        case CP0_REG02__TCSTATUS:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
+            register_name = "TCStatus";
+            break;
+        case CP0_REG02__TCBIND:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
+            register_name = "TCBind";
+            break;
+        case CP0_REG02__TCRESTART:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcrestart(arg, cpu_env);
+            register_name = "TCRestart";
+            break;
+        case CP0_REG02__TCHALT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tchalt(arg, cpu_env);
+            register_name = "TCHalt";
+            break;
+        case CP0_REG02__TCCONTEXT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tccontext(arg, cpu_env);
+            register_name = "TCContext";
+            break;
+        case CP0_REG02__TCSCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcschedule(arg, cpu_env);
+            register_name = "TCSchedule";
+            break;
+        case CP0_REG02__TCSCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcschefback(arg, cpu_env);
+            register_name = "TCScheFBack";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env,
+                               offsetof(CPUMIPSState, CP0_EntryLo1));
+#if defined(TARGET_MIPS64)
+                if (ctx->rxi) {
+                    /* Move RI/XI fields to bits 31:30 */
+#if defined(TARGET_CHERI)
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_L);
+#else
+                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
+#endif /* TARGET_CHERI */
+                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
+                }
+#endif
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
+            register_name = "EntryLo1";
+            break;
+        case CP0_REG03__GLOBALNUM:
+            CP0_CHECK(ctx->vp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_GlobalNumber));
+            register_name = "GlobalNumber";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_04:
+        switch (sel) {
+        case CP0_REG04__CONTEXT:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_Context));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "Context";
+            break;
+        case CP0_REG04__CONTEXTCONFIG:
+            /* SmartMIPS ASE */
+            /* gen_helper_mfc0_contextconfig(arg); */
+            register_name = "ContextConfig";
+            goto cp0_unimplemented;
+        case CP0_REG04__USERLOCAL:
+            CP0_CHECK(ctx->ulri);
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "UserLocal";
+            break;
+        case CP0_REG04__MMID:
+            CP0_CHECK(ctx->mi);
+            gen_helper_mtc0_memorymapid(cpu_env, arg);
+            register_name = "MMID";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_05:
+        switch (sel) {
+        case CP0_REG05__PAGEMASK:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageMask));
+            register_name = "PageMask";
+            break;
+        case CP0_REG05__PAGEGRAIN:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
+            register_name = "PageGrain";
+            break;
+        case CP0_REG05__SEGCTL0:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl0));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "SegCtl0";
+            break;
+        case CP0_REG05__SEGCTL1:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl1));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "SegCtl1";
+            break;
+        case CP0_REG05__SEGCTL2:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "SegCtl2";
+            break;
+        case CP0_REG05__PWBASE:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase));
+            register_name = "PWBase";
+            break;
+        case CP0_REG05__PWFIELD:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField));
+            register_name = "PWField";
+            break;
+        case CP0_REG05__PWSIZE:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize));
+            register_name = "PWSize";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_06:
+        switch (sel) {
+        case CP0_REG06__WIRED:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Wired));
+            register_name = "Wired";
+            break;
+        case CP0_REG06__SRSCONF0:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
+            register_name = "SRSConf0";
+            break;
+        case CP0_REG06__SRSCONF1:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
+            register_name = "SRSConf1";
+            break;
+        case CP0_REG06__SRSCONF2:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
+            register_name = "SRSConf2";
+            break;
+        case CP0_REG06__SRSCONF3:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
+            register_name = "SRSConf3";
+            break;
+        case CP0_REG06__SRSCONF4:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
+            register_name = "SRSConf4";
+            break;
+        case CP0_REG06__PWCTL:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
+            register_name = "PWCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_07:
+        switch (sel) {
+        case CP0_REG07__HWRENA:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
+            register_name = "HWREna";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_08:
+        switch (sel) {
+        case CP0_REG08__BADVADDR:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "BadVAddr";
+            break;
+        case CP0_REG08__BADINSTR:
+            CP0_CHECK(ctx->bi);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstr));
+            register_name = "BadInstr";
+            break;
+        case CP0_REG08__BADINSTRP:
+            CP0_CHECK(ctx->bp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrP));
+            register_name = "BadInstrP";
+            break;
+        case CP0_REG08__BADINSTRX:
+            CP0_CHECK(ctx->bi);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrX));
+            tcg_gen_andi_tl(arg, arg, ~0xffff);
+            register_name = "BadInstrX";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+        case CP0_REG09__COUNT:
+            /* Mark as an IO operation because we read the time.  */
+            if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+                gen_io_start();
+            }
+            gen_helper_mfc0_count(arg, cpu_env);
+            /*
+             * Break the TB to be able to take timer interrupts immediately
+             * after reading count. DISAS_STOP isn't sufficient, we need to
+             * ensure we break completely out of translated code.
+             */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Count";
+            break;
+#if !defined(TARGET_CHERI) /* Conflicts with RTC */
+        case CP0_REG09__SAARI:
+            CP0_CHECK(ctx->saar);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SAARI));
+            register_name = "SAARI";
+            break;
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mfc0_saar(arg, cpu_env);
+            register_name = "SAAR";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_10:
+        switch (sel) {
+        case CP0_REG10__ENTRYHI:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryHi));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "EntryHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_11:
+        switch (sel) {
+        case CP0_REG11__COMPARE:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Compare));
+            register_name = "Compare";
+            break;
+        /* 6,7 are implementation dependent */
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_12:
+        switch (sel) {
+        case CP0_REG12__STATUS:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Status));
+            register_name = "Status";
+            break;
+        case CP0_REG12__INTCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
+            register_name = "IntCtl";
+            break;
+        case CP0_REG12__SRSCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
+            register_name = "SRSCtl";
+            break;
+        case CP0_REG12__SRSMAP:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
+            register_name = "SRSMap";
+            break;
+        default:
+            goto cp0_unimplemented;
+       }
+        break;
+    case CP0_REGISTER_13:
+        switch (sel) {
+        case CP0_REG13__CAUSE:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Cause));
+            register_name = "Cause";
+            break;
+        default:
+            goto cp0_unimplemented;
+       }
+        break;
+    case CP0_REGISTER_14:
+        switch (sel) {
+        case CP0_REG14__EPC:
+#ifdef TARGET_CHERI
+            gen_helper_mfc0_epc(arg, cpu_env);
+#else
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EPC));
+#endif
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "EPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_15:
+        switch (sel) {
+        case CP0_REG15__PRID:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PRid));
+            register_name = "PRid";
+            break;
+        case CP0_REG15__EBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EBase));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "EBase";
+            break;
+        case CP0_REG15__CMGCRBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            CP0_CHECK(ctx->cmgcr);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_CMGCRBase));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "CMGCRBase";
+            break;
+#ifdef TARGET_CHERI
+        case 6:
+            /*
+             * See section 7.3.5 Core Identification (CPO Register 15,
+             * Select 6 of the BERI Hardware Reference.
+             */
+            gen_helper_mfc0_coreid(arg, cpu_env);
+            register_name = "CoreID";
+            break;
+        case 7:
+            /*
+             * See section 7.3.6 Thread Identification (CPO Register 15,
+             * Select 7 of the BERI Hardware Reference.
+             */
+            tcg_gen_movi_tl(arg, 0); /* currently unimplemented */
+            register_name = "ThreadID";
+            break;
+#endif /* TARGET_CHERI */
+        default:
+            goto cp0_unimplemented;
+       }
+        break;
+    case CP0_REGISTER_16:
+        switch (sel) {
+        case CP0_REG16__CONFIG:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config0));
+            register_name = "Config";
+            break;
+        case CP0_REG16__CONFIG1:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config1));
+            register_name = "Config1";
+            break;
+        case CP0_REG16__CONFIG2:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config2));
+            register_name = "Config2";
+            break;
+        case CP0_REG16__CONFIG3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config3));
+            register_name = "Config3";
+            break;
+        case CP0_REG16__CONFIG4:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config4));
+            register_name = "Config4";
+            break;
+        case CP0_REG16__CONFIG5:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config5));
+            register_name = "Config5";
+            break;
+        /* 6,7 are implementation dependent */
+        case CP0_REG16__CONFIG6:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config6));
+            register_name = "Config6";
+            break;
+        case CP0_REG16__CONFIG7:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config7));
+            register_name = "Config7";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            gen_helper_mfc0_lladdr(arg, cpu_env);
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mfc0_maar(arg, cpu_env);
+            register_name = "MAAR";
+            break;
+        case CP0_REG17__MAARI:
+            CP0_CHECK(ctx->mrp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_MAARI));
+            register_name = "MAARI";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_18:
+        switch (sel) {
+        case CP0_REG18__WATCHLO0:
+        case CP0_REG18__WATCHLO1:
+        case CP0_REG18__WATCHLO2:
+        case CP0_REG18__WATCHLO3:
+        case CP0_REG18__WATCHLO4:
+        case CP0_REG18__WATCHLO5:
+        case CP0_REG18__WATCHLO6:
+        case CP0_REG18__WATCHLO7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_1e0i(mfc0_watchlo, arg, sel);
+            register_name = "WatchLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_20:
+        switch (sel) {
+        case CP0_REG20__XCONTEXT:
+#if defined(TARGET_MIPS64)
+            check_insn(ctx, ISA_MIPS3);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "XContext";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_21:
+       /* Officially reserved, but sel 0 is used for R1x000 framemask */
+        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+        switch (sel) {
+        case 0:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Framemask));
+            register_name = "Framemask";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_22:
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
+        register_name = "'Diagnostic"; /* implementation dependent */
+        break;
+    case CP0_REGISTER_23:
+        switch (sel) {
+        case CP0_REG23__DEBUG:
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
+            register_name = "Debug";
+            break;
+        case CP0_REG23__TRACECONTROL:
+            /* PDtrace support */
+            /* gen_helper_mfc0_tracecontrol(arg);  */
+            register_name = "TraceControl";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACECONTROL2:
+            /* PDtrace support */
+            /* gen_helper_mfc0_tracecontrol2(arg); */
+            register_name = "TraceControl2";
+            goto cp0_unimplemented;
+        case CP0_REG23__USERTRACEDATA1:
+            /* PDtrace support */
+            /* gen_helper_mfc0_usertracedata1(arg);*/
+            register_name = "UserTraceData1";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEIBPC:
+            /* PDtrace support */
+            /* gen_helper_mfc0_traceibpc(arg);     */
+            register_name = "TraceIBPC";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEDBPC:
+            /* PDtrace support */
+            /* gen_helper_mfc0_tracedbpc(arg);     */
+            register_name = "TraceDBPC";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_24:
+        switch (sel) {
+        case CP0_REG24__DEPC:
+            /* EJTAG support */
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_DEPC));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "DEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_25:
+        switch (sel) {
+        case CP0_REG25__PERFCTL0:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Performance0));
+            register_name = "Performance0";
+            break;
+        case CP0_REG25__PERFCNT0:
+            /* gen_helper_mfc0_performance1(arg); */
+            register_name = "Performance1";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL1:
+            /* gen_helper_mfc0_performance2(arg); */
+            register_name = "Performance2";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT1:
+            /* gen_helper_mfc0_performance3(arg); */
+            register_name = "Performance3";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL2:
+            /* gen_helper_mfc0_performance4(arg); */
+            register_name = "Performance4";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT2:
+            /* gen_helper_mfc0_performance5(arg); */
+            register_name = "Performance5";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL3:
+            /* gen_helper_mfc0_performance6(arg); */
+            register_name = "Performance6";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT3:
+            /* gen_helper_mfc0_performance7(arg); */
+            register_name = "Performance7";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_26:
+        switch (sel) {
+        case CP0_REG26__ERRCTL:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_ErrCtl));
+            register_name = "ErrCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_27:
+        switch (sel) {
+        case CP0_REG27__CACHERR:
+            tcg_gen_movi_tl(arg, 0); /* unimplemented */
+            register_name = "CacheErr";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case CP0_REG28__TAGLO:
+        case CP0_REG28__TAGLO1:
+        case CP0_REG28__TAGLO2:
+        case CP0_REG28__TAGLO3:
+            {
+                TCGv_i64 tmp = tcg_temp_new_i64();
+                tcg_gen_ld_i64(tmp, cpu_env, offsetof(CPUMIPSState, CP0_TagLo));
+                gen_move_low32(arg, tmp);
+                tcg_temp_free_i64(tmp);
+            }
+            register_name = "TagLo";
+            break;
+        case CP0_REG28__DATALO:
+        case CP0_REG28__DATALO1:
+        case CP0_REG28__DATALO2:
+        case CP0_REG28__DATALO3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataLo));
+            register_name = "DataLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_29:
+        switch (sel) {
+        case CP0_REG29__TAGHI:
+        case CP0_REG29__TAGHI1:
+        case CP0_REG29__TAGHI2:
+        case CP0_REG29__TAGHI3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagHi));
+            register_name = "TagHi";
+            break;
+        case CP0_REG29__DATAHI:
+        case CP0_REG29__DATAHI1:
+        case CP0_REG29__DATAHI2:
+        case CP0_REG29__DATAHI3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataHi));
+            register_name = "DataHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_30:
+        switch (sel) {
+        case CP0_REG30__ERROREPC:
+#ifdef TARGET_CHERI
+            gen_helper_mfc0_error_epc(arg, cpu_env);
+#else
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_ErrorEPC));
+#endif
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "ErrorEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_31:
+        switch (sel) {
+        case CP0_REG31__DESAVE:
+            /* EJTAG support */
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DESAVE));
+            register_name = "DESAVE";
+            break;
+        case CP0_REG31__KSCRATCH1:
+        case CP0_REG31__KSCRATCH2:
+        case CP0_REG31__KSCRATCH3:
+        case CP0_REG31__KSCRATCH4:
+        case CP0_REG31__KSCRATCH5:
+        case CP0_REG31__KSCRATCH6:
+            CP0_CHECK(ctx->kscrexist & (1 << sel));
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
+            tcg_gen_ext32s_tl(arg, arg);
+            register_name = "KScratch";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+       goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("mfc0", register_name, reg, sel);
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "mfc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+    gen_mfc0_unimplemented(ctx, arg);
+}
+
+static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+
+    if (sel != 0) {
+        check_insn(ctx, ISA_MIPS_R1);
+    }
+
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+
+    switch (reg) {
+    case CP0_REGISTER_00:
+        switch (sel) {
+        case CP0_REG00__INDEX:
+            gen_helper_mtc0_index(cpu_env, arg);
+            register_name = "Index";
+            break;
+        case CP0_REG00__MVPCONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
+            register_name = "MVPControl";
+            break;
+        case CP0_REG00__MVPCONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            /* ignored */
+            register_name = "MVPConf0";
+            break;
+        case CP0_REG00__MVPCONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            /* ignored */
+            register_name = "MVPConf1";
+            break;
+        case CP0_REG00__VPCONTROL:
+            CP0_CHECK(ctx->vp);
+            /* ignored */
+            register_name = "VPControl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_01:
+        switch (sel) {
+        case CP0_REG01__RANDOM:
+            /* ignored */
+            register_name = "Random";
+            break;
+        case CP0_REG01__VPECONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
+            register_name = "VPEControl";
+            break;
+        case CP0_REG01__VPECONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
+            register_name = "VPEConf0";
+            break;
+        case CP0_REG01__VPECONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
+            register_name = "VPEConf1";
+            break;
+        case CP0_REG01__YQMASK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
+            register_name = "YQMask";
+            break;
+        case CP0_REG01__VPESCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_VPESchedule));
+            register_name = "VPESchedule";
+            break;
+        case CP0_REG01__VPESCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_VPEScheFBack));
+            register_name = "VPEScheFBack";
+            break;
+        case CP0_REG01__VPEOPT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
+            register_name = "VPEOpt";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case CP0_REG02__ENTRYLO0:
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
+            register_name = "EntryLo0";
+            break;
+        case CP0_REG02__TCSTATUS:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
+            register_name = "TCStatus";
+            break;
+        case CP0_REG02__TCBIND:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
+            register_name = "TCBind";
+            break;
+        case CP0_REG02__TCRESTART:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
+            register_name = "TCRestart";
+            break;
+        case CP0_REG02__TCHALT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
+            register_name = "TCHalt";
+            break;
+        case CP0_REG02__TCCONTEXT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
+            register_name = "TCContext";
+            break;
+        case CP0_REG02__TCSCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
+            register_name = "TCSchedule";
+            break;
+        case CP0_REG02__TCSCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
+            register_name = "TCScheFBack";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
+            register_name = "EntryLo1";
+            break;
+        case CP0_REG03__GLOBALNUM:
+            CP0_CHECK(ctx->vp);
+            /* ignored */
+            register_name = "GlobalNumber";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_04:
+        switch (sel) {
+        case CP0_REG04__CONTEXT:
+            gen_helper_mtc0_context(cpu_env, arg);
+            register_name = "Context";
+            break;
+        case CP0_REG04__CONTEXTCONFIG:
+            /* SmartMIPS ASE */
+            /* gen_helper_mtc0_contextconfig(arg); */
+            register_name = "ContextConfig";
+            goto cp0_unimplemented;
+        case CP0_REG04__USERLOCAL:
+            CP0_CHECK(ctx->ulri);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+            register_name = "UserLocal";
+            break;
+        case CP0_REG04__MMID:
+            CP0_CHECK(ctx->mi);
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_MemoryMapID));
+            register_name = "MMID";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_05:
+        switch (sel) {
+        case CP0_REG05__PAGEMASK:
+            gen_helper_mtc0_pagemask(cpu_env, arg);
+            register_name = "PageMask";
+            break;
+        case CP0_REG05__PAGEGRAIN:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
+            register_name = "PageGrain";
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG05__SEGCTL0:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl0(cpu_env, arg);
+            register_name = "SegCtl0";
+            break;
+        case CP0_REG05__SEGCTL1:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl1(cpu_env, arg);
+            register_name = "SegCtl1";
+            break;
+        case CP0_REG05__SEGCTL2:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl2(cpu_env, arg);
+            register_name = "SegCtl2";
+            break;
+        case CP0_REG05__PWBASE:
+            check_pw(ctx);
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_PWBase));
+            register_name = "PWBase";
+            break;
+        case CP0_REG05__PWFIELD:
+            check_pw(ctx);
+            gen_helper_mtc0_pwfield(cpu_env, arg);
+            register_name = "PWField";
+            break;
+        case CP0_REG05__PWSIZE:
+            check_pw(ctx);
+            gen_helper_mtc0_pwsize(cpu_env, arg);
+            register_name = "PWSize";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_06:
+        switch (sel) {
+        case CP0_REG06__WIRED:
+            gen_helper_mtc0_wired(cpu_env, arg);
+            register_name = "Wired";
+            break;
+        case CP0_REG06__SRSCONF0:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
+            register_name = "SRSConf0";
+            break;
+        case CP0_REG06__SRSCONF1:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
+            register_name = "SRSConf1";
+            break;
+        case CP0_REG06__SRSCONF2:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
+            register_name = "SRSConf2";
+            break;
+        case CP0_REG06__SRSCONF3:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
+            register_name = "SRSConf3";
+            break;
+        case CP0_REG06__SRSCONF4:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
+            register_name = "SRSConf4";
+            break;
+        case CP0_REG06__PWCTL:
+            check_pw(ctx);
+            gen_helper_mtc0_pwctl(cpu_env, arg);
+            register_name = "PWCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_07:
+        switch (sel) {
+        case CP0_REG07__HWRENA:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "HWREna";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_08:
+        switch (sel) {
+        case CP0_REG08__BADVADDR:
+            /* ignored */
+            register_name = "BadVAddr";
+            break;
+        case CP0_REG08__BADINSTR:
+            /* ignored */
+            register_name = "BadInstr";
+            break;
+        case CP0_REG08__BADINSTRP:
+            /* ignored */
+            register_name = "BadInstrP";
+            break;
+        case CP0_REG08__BADINSTRX:
+            /* ignored */
+            register_name = "BadInstrX";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+        case CP0_REG09__COUNT:
+            gen_helper_mtc0_count(cpu_env, arg);
+            register_name = "Count";
+            break;
+#if !defined(TARGET_CHERI) /* Conflicts with RTC */
+        case CP0_REG09__SAARI:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mtc0_saari(cpu_env, arg);
+            register_name = "SAARI";
+            break;
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mtc0_saar(cpu_env, arg);
+            register_name = "SAAR";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_10:
+        switch (sel) {
+        case CP0_REG10__ENTRYHI:
+            gen_helper_mtc0_entryhi(cpu_env, arg);
+            register_name = "EntryHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_11:
+        switch (sel) {
+        case CP0_REG11__COMPARE:
+            gen_helper_mtc0_compare(cpu_env, arg);
+            register_name = "Compare";
+            break;
+        /* 6,7 are implementation dependent */
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_12:
+        switch (sel) {
+        case CP0_REG12__STATUS:
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_status(cpu_env, arg);
+            /* DISAS_STOP isn't good enough here, hflags may have changed. */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Status";
+            break;
+        case CP0_REG12__INTCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_intctl(cpu_env, arg);
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "IntCtl";
+            break;
+        case CP0_REG12__SRSCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "SRSCtl";
+            break;
+        case CP0_REG12__SRSMAP:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_SRSMap));
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "SRSMap";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_13:
+        switch (sel) {
+        case CP0_REG13__CAUSE:
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_cause(cpu_env, arg);
+            /*
+             * Stop translation as we may have triggered an interrupt.
+             * DISAS_STOP isn't sufficient, we need to ensure we break out of
+             * translated code to check for pending interrupts.
+             */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Cause";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_14:
+        switch (sel) {
+        case CP0_REG14__EPC:
+#ifdef TARGET_CHERI
+            gen_helper_mtc0_epc(cpu_env, arg);
+#else
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_EPC));
+#endif
+            register_name = "EPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_15:
+        switch (sel) {
+        case CP0_REG15__PRID:
+            /* ignored */
+            register_name = "PRid";
+            break;
+        case CP0_REG15__EBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_ebase(cpu_env, arg);
+            register_name = "EBase";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_16:
+        switch (sel) {
+        case CP0_REG16__CONFIG:
+            gen_helper_mtc0_config0(cpu_env, arg);
+            register_name = "Config";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG1:
+            /* ignored, read only */
+            register_name = "Config1";
+            break;
+        case CP0_REG16__CONFIG2:
+            gen_helper_mtc0_config2(cpu_env, arg);
+            register_name = "Config2";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG3:
+            gen_helper_mtc0_config3(cpu_env, arg);
+            register_name = "Config3";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG4:
+            gen_helper_mtc0_config4(cpu_env, arg);
+            register_name = "Config4";
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG5:
+            gen_helper_mtc0_config5(cpu_env, arg);
+            register_name = "Config5";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        /* 6,7 are implementation dependent */
+        case CP0_REG16__CONFIG6:
+            /* ignored */
+            register_name = "Config6";
+            break;
+        case CP0_REG16__CONFIG7:
+            /* ignored */
+            register_name = "Config7";
+            break;
+        default:
+            register_name = "Invalid config selector";
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            gen_helper_mtc0_lladdr(cpu_env, arg);
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mtc0_maar(cpu_env, arg);
+            register_name = "MAAR";
+            break;
+        case CP0_REG17__MAARI:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mtc0_maari(cpu_env, arg);
+            register_name = "MAARI";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_18:
+        switch (sel) {
+        case CP0_REG18__WATCHLO0:
+        case CP0_REG18__WATCHLO1:
+        case CP0_REG18__WATCHLO2:
+        case CP0_REG18__WATCHLO3:
+        case CP0_REG18__WATCHLO4:
+        case CP0_REG18__WATCHLO5:
+        case CP0_REG18__WATCHLO6:
+        case CP0_REG18__WATCHLO7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
+            register_name = "WatchLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_20:
+        switch (sel) {
+        case CP0_REG20__XCONTEXT:
+#if defined(TARGET_MIPS64)
+            check_insn(ctx, ISA_MIPS3);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
+            register_name = "XContext";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_21:
+       /* Officially reserved, but sel 0 is used for R1x000 framemask */
+        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+        switch (sel) {
+        case 0:
+            gen_helper_mtc0_framemask(cpu_env, arg);
+            register_name = "Framemask";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_22:
+        /* ignored */
+        register_name = "Diagnostic"; /* implementation dependent */
+        break;
+    case CP0_REGISTER_23:
+        switch (sel) {
+        case CP0_REG23__DEBUG:
+            save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
+            /* DISAS_STOP isn't good enough here, hflags may have changed. */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Debug";
+            break;
+        case CP0_REG23__TRACECONTROL:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracecontrol(cpu_env, arg);  */
+            register_name = "TraceControl";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACECONTROL2:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracecontrol2(cpu_env, arg); */
+            register_name = "TraceControl2";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            goto cp0_unimplemented;
+        case CP0_REG23__USERTRACEDATA1:
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            /* PDtrace support */
+            /* gen_helper_mtc0_usertracedata1(cpu_env, arg);*/
+            register_name = "UserTraceData";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEIBPC:
+            /* PDtrace support */
+            /* gen_helper_mtc0_traceibpc(cpu_env, arg);     */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceIBPC";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEDBPC:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracedbpc(cpu_env, arg);     */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceDBPC";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_24:
+        switch (sel) {
+        case CP0_REG24__DEPC:
+            /* EJTAG support */
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_DEPC));
+            register_name = "DEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_25:
+        switch (sel) {
+        case CP0_REG25__PERFCTL0:
+            gen_helper_mtc0_performance0(cpu_env, arg);
+            register_name = "Performance0";
+            break;
+        case CP0_REG25__PERFCNT0:
+            /* gen_helper_mtc0_performance1(arg); */
+            register_name = "Performance1";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL1:
+            /* gen_helper_mtc0_performance2(arg); */
+            register_name = "Performance2";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT1:
+            /* gen_helper_mtc0_performance3(arg); */
+            register_name = "Performance3";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL2:
+            /* gen_helper_mtc0_performance4(arg); */
+            register_name = "Performance4";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT2:
+            /* gen_helper_mtc0_performance5(arg); */
+            register_name = "Performance5";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL3:
+            /* gen_helper_mtc0_performance6(arg); */
+            register_name = "Performance6";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT3:
+            /* gen_helper_mtc0_performance7(arg); */
+            register_name = "Performance7";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+       break;
+    case CP0_REGISTER_26:
+        save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
+        gen_helper_mtc0_dumpstate(cpu_env, arg); /* CHERI: dump reg state */
+#if !defined(TARGET_CHERI)
+        switch (sel) {
+        case CP0_REG26__ERRCTL:
+            gen_helper_mtc0_errctl(cpu_env, arg);
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "ErrCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+#endif /* TARGET_CHERI */
+        break;
+    case CP0_REGISTER_27:
+        switch (sel) {
+        case CP0_REG27__CACHERR:
+            /* ignored */
+            register_name = "CacheErr";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+       break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case CP0_REG28__TAGLO:
+        case CP0_REG28__TAGLO1:
+        case CP0_REG28__TAGLO2:
+        case CP0_REG28__TAGLO3:
+            gen_helper_mtc0_taglo(cpu_env, arg);
+            register_name = "TagLo";
+            break;
+        case CP0_REG28__DATALO:
+        case CP0_REG28__DATALO1:
+        case CP0_REG28__DATALO2:
+        case CP0_REG28__DATALO3:
+            gen_helper_mtc0_datalo(cpu_env, arg);
+            register_name = "DataLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_29:
+        switch (sel) {
+        case CP0_REG29__TAGHI:
+        case CP0_REG29__TAGHI1:
+        case CP0_REG29__TAGHI2:
+        case CP0_REG29__TAGHI3:
+            gen_helper_mtc0_taghi(cpu_env, arg);
+            register_name = "TagHi";
+            break;
+        case CP0_REG29__DATAHI:
+        case CP0_REG29__DATAHI1:
+        case CP0_REG29__DATAHI2:
+        case CP0_REG29__DATAHI3:
+            gen_helper_mtc0_datahi(cpu_env, arg);
+            register_name = "DataHi";
+            break;
+        default:
+            register_name = "invalid sel";
+            goto cp0_unimplemented;
+        }
+       break;
+    case CP0_REGISTER_30:
+        switch (sel) {
+        case CP0_REG30__ERROREPC:
+#ifdef TARGET_CHERI
+            gen_helper_mtc0_error_epc(cpu_env, arg);
+#else
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_ErrorEPC));
+#endif
+            register_name = "ErrorEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_31:
+        switch (sel) {
+        case CP0_REG31__DESAVE:
+            /* EJTAG support */
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_DESAVE));
+            register_name = "DESAVE";
+            break;
+        case CP0_REG31__KSCRATCH1:
+        case CP0_REG31__KSCRATCH2:
+        case CP0_REG31__KSCRATCH3:
+        case CP0_REG31__KSCRATCH4:
+        case CP0_REG31__KSCRATCH5:
+        case CP0_REG31__KSCRATCH6:
+            CP0_CHECK(ctx->kscrexist & (1 << sel));
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
+            register_name = "KScratch";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+       goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("mtc0", register_name, reg, sel);
+
+    /* For simplicity assume that all writes can cause interrupts.  */
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        /*
+         * DISAS_STOP isn't sufficient, we need to ensure we break out of
+         * translated code to check for pending interrupts.
+         */
+        gen_save_pc(ctx->base.pc_next + 4);
+        ctx->base.is_jmp = DISAS_EXIT;
+    }
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "mtc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+}
+
+#if defined(TARGET_MIPS64)
+static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+
+#ifndef TARGET_CHERI
+    if (sel != 0) {
+        check_insn(ctx, ISA_MIPS_R1);
+    }
+#endif /* ! TARGET_CHERI */
+
+    switch (reg) {
+    case CP0_REGISTER_00:
+        switch (sel) {
+        case CP0_REG00__INDEX:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Index));
+            register_name = "Index";
+            break;
+        case CP0_REG00__MVPCONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
+            register_name = "MVPControl";
+            break;
+        case CP0_REG00__MVPCONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
+            register_name = "MVPConf0";
+            break;
+        case CP0_REG00__MVPCONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
+            register_name = "MVPConf1";
+            break;
+        case CP0_REG00__VPCONTROL:
+            CP0_CHECK(ctx->vp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPControl));
+            register_name = "VPControl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_01:
+        switch (sel) {
+        case CP0_REG01__RANDOM:
+            CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+            gen_helper_mfc0_random(arg, cpu_env);
+            register_name = "Random";
+            break;
+        case CP0_REG01__VPECONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
+            register_name = "VPEControl";
+            break;
+        case CP0_REG01__VPECONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
+            register_name = "VPEConf0";
+            break;
+        case CP0_REG01__VPECONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
+            register_name = "VPEConf1";
+            break;
+        case CP0_REG01__YQMASK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_YQMask));
+            register_name = "YQMask";
+            break;
+        case CP0_REG01__VPESCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_VPESchedule));
+            register_name = "VPESchedule";
+            break;
+        case CP0_REG01__VPESCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_VPEScheFBack));
+            register_name = "VPEScheFBack";
+            break;
+        case CP0_REG01__VPEOPT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
+            register_name = "VPEOpt";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case CP0_REG02__ENTRYLO0:
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_EntryLo0));
+            register_name = "EntryLo0";
+            break;
+        case CP0_REG02__TCSTATUS:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
+            register_name = "TCStatus";
+            break;
+        case CP0_REG02__TCBIND:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
+            register_name = "TCBind";
+            break;
+        case CP0_REG02__TCRESTART:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_dmfc0_tcrestart(arg, cpu_env);
+            register_name = "TCRestart";
+            break;
+        case CP0_REG02__TCHALT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_dmfc0_tchalt(arg, cpu_env);
+            register_name = "TCHalt";
+            break;
+        case CP0_REG02__TCCONTEXT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_dmfc0_tccontext(arg, cpu_env);
+            register_name = "TCContext";
+            break;
+        case CP0_REG02__TCSCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_dmfc0_tcschedule(arg, cpu_env);
+            register_name = "TCSchedule";
+            break;
+        case CP0_REG02__TCSCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_dmfc0_tcschefback(arg, cpu_env);
+            register_name = "TCScheFBack";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryLo1));
+            register_name = "EntryLo1";
+            break;
+        case CP0_REG03__GLOBALNUM:
+            CP0_CHECK(ctx->vp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_GlobalNumber));
+            register_name = "GlobalNumber";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_04:
+        switch (sel) {
+        case CP0_REG04__CONTEXT:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_Context));
+            register_name = "Context";
+            break;
+        case CP0_REG04__CONTEXTCONFIG:
+            /* SmartMIPS ASE */
+            /* gen_helper_dmfc0_contextconfig(arg); */
+            register_name = "ContextConfig";
+            goto cp0_unimplemented;
+        case CP0_REG04__USERLOCAL:
+            CP0_CHECK(ctx->ulri);
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+            register_name = "UserLocal";
+            break;
+        case CP0_REG04__MMID:
+            CP0_CHECK(ctx->mi);
+            gen_helper_mtc0_memorymapid(cpu_env, arg);
+            register_name = "MMID";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_05:
+        switch (sel) {
+        case CP0_REG05__PAGEMASK:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageMask));
+            register_name = "PageMask";
+            break;
+        case CP0_REG05__PAGEGRAIN:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
+            register_name = "PageGrain";
+            break;
+        case CP0_REG05__SEGCTL0:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl0));
+            register_name = "SegCtl0";
+            break;
+        case CP0_REG05__SEGCTL1:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl1));
+            register_name = "SegCtl1";
+            break;
+        case CP0_REG05__SEGCTL2:
+            CP0_CHECK(ctx->sc);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2));
+            register_name = "SegCtl2";
+            break;
+        case CP0_REG05__PWBASE:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase));
+            register_name = "PWBase";
+            break;
+        case CP0_REG05__PWFIELD:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField));
+            register_name = "PWField";
+            break;
+        case CP0_REG05__PWSIZE:
+            check_pw(ctx);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize));
+            register_name = "PWSize";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_06:
+        switch (sel) {
+        case CP0_REG06__WIRED:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Wired));
+            register_name = "Wired";
+            break;
+        case CP0_REG06__SRSCONF0:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
+            register_name = "SRSConf0";
+            break;
+        case CP0_REG06__SRSCONF1:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
+            register_name = "SRSConf1";
+            break;
+        case CP0_REG06__SRSCONF2:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
+            register_name = "SRSConf2";
+            break;
+        case CP0_REG06__SRSCONF3:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
+            register_name = "SRSConf3";
+            break;
+        case CP0_REG06__SRSCONF4:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
+            register_name = "SRSConf4";
+            break;
+        case CP0_REG06__PWCTL:
+            check_pw(ctx);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
+            register_name = "PWCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_07:
+        switch (sel) {
+        case CP0_REG07__HWRENA:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
+            register_name = "HWREna";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_08:
+        switch (sel) {
+        case CP0_REG08__BADVADDR:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
+            register_name = "BadVAddr";
+            break;
+        case CP0_REG08__BADINSTR:
+            CP0_CHECK(ctx->bi);
+            tcg_gen_ld32u_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadInstr));
+            register_name = "BadInstr";
+            break;
+        case CP0_REG08__BADINSTRP:
+            CP0_CHECK(ctx->bp);
+            tcg_gen_ld32u_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadInstrP));
+            register_name = "BadInstrP";
+            break;
+        case CP0_REG08__BADINSTRX:
+            CP0_CHECK(ctx->bi);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrX));
+            tcg_gen_andi_tl(arg, arg, ~0xffff);
+            register_name = "BadInstrX";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+        case CP0_REG09__COUNT:
+            /* Mark as an IO operation because we read the time.  */
+            if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+                gen_io_start();
+            }
+            gen_helper_mfc0_count(arg, cpu_env);
+            /*
+             * Break the TB to be able to take timer interrupts immediately
+             * after reading count. DISAS_STOP isn't sufficient, we need to
+             * ensure we break completely out of translated code.
+             */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Count";
+            break;
+#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
+        case CP0_REG09__SAARI:
+            CP0_CHECK(ctx->saar);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SAARI));
+            register_name = "SAARI";
+            break;
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_dmfc0_saar(arg, cpu_env);
+            register_name = "SAAR";
+            break;
+#else
+        /* 6,7 are implementation dependent */
+        case 6:
+            // QEMU-CHERI extension:
+            gen_helper_mfc0_rtc64(arg, cpu_env);
+            register_name = "RTC";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_10:
+        switch (sel) {
+        case CP0_REG10__ENTRYHI:
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryHi));
+            register_name = "EntryHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_11:
+        switch (sel) {
+        case CP0_REG11__COMPARE:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Compare));
+            register_name = "Compare";
+            break;
+        /* 6,7 are implementation dependent */
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_12:
+        switch (sel) {
+        case CP0_REG12__STATUS:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Status));
+            register_name = "Status";
+            break;
+        case CP0_REG12__INTCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
+            register_name = "IntCtl";
+            break;
+        case CP0_REG12__SRSCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
+            register_name = "SRSCtl";
+            break;
+        case CP0_REG12__SRSMAP:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
+            register_name = "SRSMap";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_13:
+        switch (sel) {
+        case CP0_REG13__CAUSE:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Cause));
+            register_name = "Cause";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_14:
+        switch (sel) {
+        case CP0_REG14__EPC:
+#ifdef TARGET_CHERI
+            gen_helper_mfc0_epc(arg, cpu_env);
+#else
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EPC));
+#endif
+            register_name = "EPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_15:
+        switch (sel) {
+        case CP0_REG15__PRID:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PRid));
+            register_name = "PRid";
+            break;
+        case CP0_REG15__EBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EBase));
+            register_name = "EBase";
+            break;
+        case CP0_REG15__CMGCRBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            CP0_CHECK(ctx->cmgcr);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_CMGCRBase));
+            register_name = "CMGCRBase";
+            break;
+        case 6:
+            /*
+             * See section 7.3.5 Core Identification (CPO Register 15,
+             * Select 6 of the BERI Hardware Reference.
+             */
+            gen_helper_mfc0_coreid(arg, cpu_env);
+            register_name = "CoreID";
+            break;
+        case 7:
+            /*
+             * See section 7.3.6 Thread Identification (CPO Register 15,
+             * Select 7 of the BERI Hardware Reference.
+             */
+            tcg_gen_movi_tl(arg, 0); /* currently unimplemented */
+            register_name = "ThreadID";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_16:
+        switch (sel) {
+        case CP0_REG16__CONFIG:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config0));
+            register_name = "Config";
+            break;
+        case CP0_REG16__CONFIG1:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config1));
+            register_name = "Config1";
+            break;
+        case CP0_REG16__CONFIG2:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config2));
+            register_name = "Config2";
+            break;
+        case CP0_REG16__CONFIG3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config3));
+            register_name = "Config3";
+            break;
+        case CP0_REG16__CONFIG4:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config4));
+            register_name = "Config4";
+            break;
+        case CP0_REG16__CONFIG5:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config5));
+            register_name = "Config5";
+            break;
+        /* 6,7 are implementation dependent */
+        case CP0_REG16__CONFIG6:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config6));
+            register_name = "Config6";
+            break;
+        case CP0_REG16__CONFIG7:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config7));
+            register_name = "Config7";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            gen_helper_dmfc0_lladdr(arg, cpu_env);
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_dmfc0_maar(arg, cpu_env);
+            register_name = "MAAR";
+            break;
+        case CP0_REG17__MAARI:
+            CP0_CHECK(ctx->mrp);
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_MAARI));
+            register_name = "MAARI";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_18:
+        switch (sel) {
+        case CP0_REG18__WATCHLO0:
+        case CP0_REG18__WATCHLO1:
+        case CP0_REG18__WATCHLO2:
+        case CP0_REG18__WATCHLO3:
+        case CP0_REG18__WATCHLO4:
+        case CP0_REG18__WATCHLO5:
+        case CP0_REG18__WATCHLO6:
+        case CP0_REG18__WATCHLO7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_1e0i(dmfc0_watchlo, arg, sel);
+            register_name = "WatchLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_1e0i(dmfc0_watchhi, arg, sel);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_20:
+        switch (sel) {
+        case CP0_REG20__XCONTEXT:
+            check_insn(ctx, ISA_MIPS3);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
+            register_name = "XContext";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_21:
+        /* Officially reserved, but sel 0 is used for R1x000 framemask */
+        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+        switch (sel) {
+        case 0:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Framemask));
+            register_name = "Framemask";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_22:
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
+        register_name = "'Diagnostic"; /* implementation dependent */
+        break;
+    case CP0_REGISTER_23:
+        switch (sel) {
+        case CP0_REG23__DEBUG:
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
+            register_name = "Debug";
+            break;
+        case CP0_REG23__TRACECONTROL:
+            /* PDtrace support */
+            /* gen_helper_dmfc0_tracecontrol(arg, cpu_env);  */
+            register_name = "TraceControl";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACECONTROL2:
+            /* PDtrace support */
+            /* gen_helper_dmfc0_tracecontrol2(arg, cpu_env); */
+            register_name = "TraceControl2";
+            goto cp0_unimplemented;
+        case CP0_REG23__USERTRACEDATA1:
+            /* PDtrace support */
+            /* gen_helper_dmfc0_usertracedata1(arg, cpu_env);*/
+            register_name = "UserTraceData1";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEIBPC:
+            /* PDtrace support */
+            /* gen_helper_dmfc0_traceibpc(arg, cpu_env);     */
+            register_name = "TraceIBPC";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEDBPC:
+            /* PDtrace support */
+            /* gen_helper_dmfc0_tracedbpc(arg, cpu_env);     */
+            register_name = "TraceDBPC";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_24:
+        switch (sel) {
+        case CP0_REG24__DEPC:
+            /* EJTAG support */
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_DEPC));
+            register_name = "DEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_25:
+        switch (sel) {
+        case CP0_REG25__PERFCTL0:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Performance0));
+            register_name = "Performance0";
+            break;
+        case CP0_REG25__PERFCNT0:
+            /* gen_helper_dmfc0_performance1(arg); */
+            register_name = "Performance1";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL1:
+            /* gen_helper_dmfc0_performance2(arg); */
+            register_name = "Performance2";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT1:
+            /* gen_helper_dmfc0_performance3(arg); */
+            register_name = "Performance3";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL2:
+            /* gen_helper_dmfc0_performance4(arg); */
+            register_name = "Performance4";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT2:
+            /* gen_helper_dmfc0_performance5(arg); */
+            register_name = "Performance5";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL3:
+            /* gen_helper_dmfc0_performance6(arg); */
+            register_name = "Performance6";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT3:
+            /* gen_helper_dmfc0_performance7(arg); */
+            register_name = "Performance7";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_26:
+        switch (sel) {
+        case CP0_REG26__ERRCTL:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_ErrCtl));
+            register_name = "ErrCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_27:
+        switch (sel) {
+        /* ignored */
+        case CP0_REG27__CACHERR:
+            tcg_gen_movi_tl(arg, 0); /* unimplemented */
+            register_name = "CacheErr";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case CP0_REG28__TAGLO:
+        case CP0_REG28__TAGLO1:
+        case CP0_REG28__TAGLO2:
+        case CP0_REG28__TAGLO3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagLo));
+            register_name = "TagLo";
+            break;
+        case CP0_REG28__DATALO:
+        case CP0_REG28__DATALO1:
+        case CP0_REG28__DATALO2:
+        case CP0_REG28__DATALO3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataLo));
+            register_name = "DataLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_29:
+        switch (sel) {
+        case CP0_REG29__TAGHI:
+        case CP0_REG29__TAGHI1:
+        case CP0_REG29__TAGHI2:
+        case CP0_REG29__TAGHI3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagHi));
+            register_name = "TagHi";
+            break;
+        case CP0_REG29__DATAHI:
+        case CP0_REG29__DATAHI1:
+        case CP0_REG29__DATAHI2:
+        case CP0_REG29__DATAHI3:
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataHi));
+            register_name = "DataHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_30:
+        switch (sel) {
+        case CP0_REG30__ERROREPC:
+#ifdef TARGET_CHERI
+            gen_helper_mfc0_error_epc(arg, cpu_env);
+#else
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_ErrorEPC));
+#endif
+            register_name = "ErrorEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_31:
+        switch (sel) {
+        case CP0_REG31__DESAVE:
+            /* EJTAG support */
+            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DESAVE));
+            register_name = "DESAVE";
+            break;
+        case CP0_REG31__KSCRATCH1:
+        case CP0_REG31__KSCRATCH2:
+        case CP0_REG31__KSCRATCH3:
+        case CP0_REG31__KSCRATCH4:
+        case CP0_REG31__KSCRATCH5:
+        case CP0_REG31__KSCRATCH6:
+            CP0_CHECK(ctx->kscrexist & (1 << sel));
+            tcg_gen_ld_tl(arg, cpu_env,
+                          offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
+            register_name = "KScratch";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+        goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("dmfc0", register_name, reg, sel);
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "dmfc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+    gen_mfc0_unimplemented(ctx, arg);
+}
+
+static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
+{
+    const char *register_name = "invalid";
+
+    if (sel != 0) {
+        check_insn(ctx, ISA_MIPS_R1);
+    }
+
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+
+    switch (reg) {
+    case CP0_REGISTER_00:
+        switch (sel) {
+        case CP0_REG00__INDEX:
+            gen_helper_mtc0_index(cpu_env, arg);
+            register_name = "Index";
+            break;
+        case CP0_REG00__MVPCONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
+            register_name = "MVPControl";
+            break;
+        case CP0_REG00__MVPCONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            /* ignored */
+            register_name = "MVPConf0";
+            break;
+        case CP0_REG00__MVPCONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            /* ignored */
+            register_name = "MVPConf1";
+            break;
+        case CP0_REG00__VPCONTROL:
+            CP0_CHECK(ctx->vp);
+            /* ignored */
+            register_name = "VPControl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_01:
+        switch (sel) {
+        case CP0_REG01__RANDOM:
+            /* ignored */
+            register_name = "Random";
+            break;
+        case CP0_REG01__VPECONTROL:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
+            register_name = "VPEControl";
+            break;
+        case CP0_REG01__VPECONF0:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
+            register_name = "VPEConf0";
+            break;
+        case CP0_REG01__VPECONF1:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
+            register_name = "VPEConf1";
+            break;
+        case CP0_REG01__YQMASK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
+            register_name = "YQMask";
+            break;
+        case CP0_REG01__VPESCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_VPESchedule));
+            register_name = "VPESchedule";
+            break;
+        case CP0_REG01__VPESCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                          offsetof(CPUMIPSState, CP0_VPEScheFBack));
+            register_name = "VPEScheFBack";
+            break;
+        case CP0_REG01__VPEOPT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
+            register_name = "VPEOpt";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_02:
+        switch (sel) {
+        case CP0_REG02__ENTRYLO0:
+            gen_helper_dmtc0_entrylo0(cpu_env, arg);
+            register_name = "EntryLo0";
+            break;
+        case CP0_REG02__TCSTATUS:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
+            register_name = "TCStatus";
+            break;
+        case CP0_REG02__TCBIND:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
+            register_name = "TCBind";
+            break;
+        case CP0_REG02__TCRESTART:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
+            register_name = "TCRestart";
+            break;
+        case CP0_REG02__TCHALT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
+            register_name = "TCHalt";
+            break;
+        case CP0_REG02__TCCONTEXT:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
+            register_name = "TCContext";
+            break;
+        case CP0_REG02__TCSCHEDULE:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
+            register_name = "TCSchedule";
+            break;
+        case CP0_REG02__TCSCHEFBACK:
+            CP0_CHECK(ctx->insn_flags & ASE_MT);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
+            register_name = "TCScheFBack";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_03:
+        switch (sel) {
+        case CP0_REG03__ENTRYLO1:
+            gen_helper_dmtc0_entrylo1(cpu_env, arg);
+            register_name = "EntryLo1";
+            break;
+        case CP0_REG03__GLOBALNUM:
+            CP0_CHECK(ctx->vp);
+            /* ignored */
+            register_name = "GlobalNumber";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_04:
+        switch (sel) {
+        case CP0_REG04__CONTEXT:
+            gen_helper_mtc0_context(cpu_env, arg);
+            register_name = "Context";
+            break;
+        case CP0_REG04__CONTEXTCONFIG:
+            /* SmartMIPS ASE */
+            /* gen_helper_dmtc0_contextconfig(arg); */
+            register_name = "ContextConfig";
+            goto cp0_unimplemented;
+        case CP0_REG04__USERLOCAL:
+            CP0_CHECK(ctx->ulri);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+            register_name = "UserLocal";
+            break;
+        case CP0_REG04__MMID:
+            CP0_CHECK(ctx->mi);
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_MemoryMapID));
+            register_name = "MMID";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_05:
+        switch (sel) {
+        case CP0_REG05__PAGEMASK:
+            gen_helper_mtc0_pagemask(cpu_env, arg);
+            register_name = "PageMask";
+            break;
+        case CP0_REG05__PAGEGRAIN:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
+            register_name = "PageGrain";
+            break;
+        case CP0_REG05__SEGCTL0:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl0(cpu_env, arg);
+            register_name = "SegCtl0";
+            break;
+        case CP0_REG05__SEGCTL1:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl1(cpu_env, arg);
+            register_name = "SegCtl1";
+            break;
+        case CP0_REG05__SEGCTL2:
+            CP0_CHECK(ctx->sc);
+            gen_helper_mtc0_segctl2(cpu_env, arg);
+            register_name = "SegCtl2";
+            break;
+        case CP0_REG05__PWBASE:
+            check_pw(ctx);
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_PWBase));
+            register_name = "PWBase";
+            break;
+        case CP0_REG05__PWFIELD:
+            check_pw(ctx);
+            gen_helper_mtc0_pwfield(cpu_env, arg);
+            register_name = "PWField";
+            break;
+        case CP0_REG05__PWSIZE:
+            check_pw(ctx);
+            gen_helper_mtc0_pwsize(cpu_env, arg);
+            register_name = "PWSize";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_06:
+        switch (sel) {
+        case CP0_REG06__WIRED:
+            gen_helper_mtc0_wired(cpu_env, arg);
+            register_name = "Wired";
+            break;
+        case CP0_REG06__SRSCONF0:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
+            register_name = "SRSConf0";
+            break;
+        case CP0_REG06__SRSCONF1:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
+            register_name = "SRSConf1";
+            break;
+        case CP0_REG06__SRSCONF2:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
+            register_name = "SRSConf2";
+            break;
+        case CP0_REG06__SRSCONF3:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
+            register_name = "SRSConf3";
+            break;
+        case CP0_REG06__SRSCONF4:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
+            register_name = "SRSConf4";
+            break;
+        case CP0_REG06__PWCTL:
+            check_pw(ctx);
+            gen_helper_mtc0_pwctl(cpu_env, arg);
+            register_name = "PWCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_07:
+        switch (sel) {
+        case CP0_REG07__HWRENA:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "HWREna";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_08:
+        switch (sel) {
+        case CP0_REG08__BADVADDR:
+            /* ignored */
+            register_name = "BadVAddr";
+            break;
+        case CP0_REG08__BADINSTR:
+            /* ignored */
+            register_name = "BadInstr";
+            break;
+        case CP0_REG08__BADINSTRP:
+            /* ignored */
+            register_name = "BadInstrP";
+            break;
+        case CP0_REG08__BADINSTRX:
+            /* ignored */
+            register_name = "BadInstrX";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_09:
+        switch (sel) {
+        case CP0_REG09__COUNT:
+            gen_helper_mtc0_count(cpu_env, arg);
+            register_name = "Count";
+            break;
+#if !defined(TARGET_CHERI) /* Conflicts with RTC */
+        case CP0_REG09__SAARI:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mtc0_saari(cpu_env, arg);
+            register_name = "SAARI";
+            break;
+        case CP0_REG09__SAAR:
+            CP0_CHECK(ctx->saar);
+            gen_helper_mtc0_saar(cpu_env, arg);
+            register_name = "SAAR";
+            break;
+#else
+        /* 6,7 are implementation dependent */
+        case 6:
+            gen_helper_mtc0_rtc64(cpu_env, arg);
+            register_name = "RTC";
+            break;
+#endif
+        default:
+            goto cp0_unimplemented;
+        }
+        /* Stop translation as we may have switched the execution mode */
+        ctx->base.is_jmp = DISAS_STOP;
+        break;
+    case CP0_REGISTER_10:
+        switch (sel) {
+        case CP0_REG10__ENTRYHI:
+            gen_helper_mtc0_entryhi(cpu_env, arg);
+            register_name = "EntryHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_11:
+        switch (sel) {
+        case CP0_REG11__COMPARE:
+            gen_helper_mtc0_compare(cpu_env, arg);
+            register_name = "Compare";
+            break;
+        /* 6,7 are implementation dependent */
+        default:
+            goto cp0_unimplemented;
+        }
+        /* Stop translation as we may have switched the execution mode */
+        ctx->base.is_jmp = DISAS_STOP;
+        break;
+    case CP0_REGISTER_12:
+        switch (sel) {
+        case CP0_REG12__STATUS:
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_status(cpu_env, arg);
+            /* DISAS_STOP isn't good enough here, hflags may have changed. */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Status";
+            break;
+        case CP0_REG12__INTCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_intctl(cpu_env, arg);
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "IntCtl";
+            break;
+        case CP0_REG12__SRSCTL:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "SRSCtl";
+            break;
+        case CP0_REG12__SRSMAP:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_SRSMap));
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "SRSMap";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_13:
+        switch (sel) {
+        case CP0_REG13__CAUSE:
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_cause(cpu_env, arg);
+            /*
+             * Stop translation as we may have triggered an interrupt.
+             * DISAS_STOP isn't sufficient, we need to ensure we break out of
+             * translated code to check for pending interrupts.
+             */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Cause";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_14:
+        switch (sel) {
+        case CP0_REG14__EPC:
+#ifdef TARGET_CHERI
+            gen_helper_mtc0_epc(cpu_env, arg);
+#else
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_EPC));
+#endif
+            register_name = "EPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_15:
+        switch (sel) {
+        case CP0_REG15__PRID:
+            /* ignored */
+            register_name = "PRid";
+            break;
+        case CP0_REG15__EBASE:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_helper_mtc0_ebase(cpu_env, arg);
+            register_name = "EBase";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_16:
+        switch (sel) {
+        case CP0_REG16__CONFIG:
+            gen_helper_mtc0_config0(cpu_env, arg);
+            register_name = "Config";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG1:
+            /* ignored, read only */
+            register_name = "Config1";
+            break;
+        case CP0_REG16__CONFIG2:
+            gen_helper_mtc0_config2(cpu_env, arg);
+            register_name = "Config2";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG3:
+            gen_helper_mtc0_config3(cpu_env, arg);
+            register_name = "Config3";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case CP0_REG16__CONFIG4:
+            /* currently ignored */
+            register_name = "Config4";
+            break;
+        case CP0_REG16__CONFIG5:
+            gen_helper_mtc0_config5(cpu_env, arg);
+            register_name = "Config5";
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        /* 6,7 are implementation dependent */
+        default:
+            register_name = "Invalid config selector";
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_17:
+        switch (sel) {
+        case CP0_REG17__LLADDR:
+            gen_helper_mtc0_lladdr(cpu_env, arg);
+            register_name = "LLAddr";
+            break;
+        case CP0_REG17__MAAR:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mtc0_maar(cpu_env, arg);
+            register_name = "MAAR";
+            break;
+        case CP0_REG17__MAARI:
+            CP0_CHECK(ctx->mrp);
+            gen_helper_mtc0_maari(cpu_env, arg);
+            register_name = "MAARI";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_18:
+        switch (sel) {
+        case CP0_REG18__WATCHLO0:
+        case CP0_REG18__WATCHLO1:
+        case CP0_REG18__WATCHLO2:
+        case CP0_REG18__WATCHLO3:
+        case CP0_REG18__WATCHLO4:
+        case CP0_REG18__WATCHLO5:
+        case CP0_REG18__WATCHLO6:
+        case CP0_REG18__WATCHLO7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
+            register_name = "WatchLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_19:
+        switch (sel) {
+        case CP0_REG19__WATCHHI0:
+        case CP0_REG19__WATCHHI1:
+        case CP0_REG19__WATCHHI2:
+        case CP0_REG19__WATCHHI3:
+        case CP0_REG19__WATCHHI4:
+        case CP0_REG19__WATCHHI5:
+        case CP0_REG19__WATCHHI6:
+        case CP0_REG19__WATCHHI7:
+            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
+            register_name = "WatchHi";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_20:
+        switch (sel) {
+        case CP0_REG20__XCONTEXT:
+            check_insn(ctx, ISA_MIPS3);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
+            register_name = "XContext";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_21:
+       /* Officially reserved, but sel 0 is used for R1x000 framemask */
+        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
+        switch (sel) {
+        case 0:
+            gen_helper_mtc0_framemask(cpu_env, arg);
+            register_name = "Framemask";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_22:
+        /* ignored */
+        register_name = "Diagnostic"; /* implementation dependent */
+        break;
+    case CP0_REGISTER_23:
+        switch (sel) {
+        case CP0_REG23__DEBUG:
+            save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
+            /* DISAS_STOP isn't good enough here, hflags may have changed. */
+            gen_save_pc(ctx->base.pc_next + 4);
+            ctx->base.is_jmp = DISAS_EXIT;
+            register_name = "Debug";
+            break;
+        case CP0_REG23__TRACECONTROL:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracecontrol(cpu_env, arg);  */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceControl";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACECONTROL2:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracecontrol2(cpu_env, arg); */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceControl2";
+            goto cp0_unimplemented;
+        case CP0_REG23__USERTRACEDATA1:
+            /* PDtrace support */
+            /* gen_helper_mtc0_usertracedata1(cpu_env, arg);*/
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "UserTraceData1";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEIBPC:
+            /* PDtrace support */
+            /* gen_helper_mtc0_traceibpc(cpu_env, arg);     */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceIBPC";
+            goto cp0_unimplemented;
+        case CP0_REG23__TRACEDBPC:
+            /* PDtrace support */
+            /* gen_helper_mtc0_tracedbpc(cpu_env, arg);     */
+            /* Stop translation as we may have switched the execution mode */
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "TraceDBPC";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_24:
+        switch (sel) {
+        case CP0_REG24__DEPC:
+            /* EJTAG support */
+            gen_mtc0_store(ctx, arg, reg, sel, offsetof(CPUMIPSState, CP0_DEPC));
+            register_name = "DEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_25:
+        switch (sel) {
+        case CP0_REG25__PERFCTL0:
+            gen_helper_mtc0_performance0(cpu_env, arg);
+            register_name = "Performance0";
+            break;
+        case CP0_REG25__PERFCNT0:
+            /* gen_helper_mtc0_performance1(cpu_env, arg); */
+            register_name = "Performance1";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL1:
+            /* gen_helper_mtc0_performance2(cpu_env, arg); */
+            register_name = "Performance2";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT1:
+            /* gen_helper_mtc0_performance3(cpu_env, arg); */
+            register_name = "Performance3";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL2:
+            /* gen_helper_mtc0_performance4(cpu_env, arg); */
+            register_name = "Performance4";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT2:
+            /* gen_helper_mtc0_performance5(cpu_env, arg); */
+            register_name = "Performance5";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCTL3:
+            /* gen_helper_mtc0_performance6(cpu_env, arg); */
+            register_name = "Performance6";
+            goto cp0_unimplemented;
+        case CP0_REG25__PERFCNT3:
+            /* gen_helper_mtc0_performance7(cpu_env, arg); */
+            register_name = "Performance7";
+            goto cp0_unimplemented;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_26:
+        save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
+        gen_helper_mtc0_dumpstate(cpu_env, arg); /* CHERI: dump reg state */
+#if !defined(TARGET_CHERI)
+        switch (sel) {
+        case CP0_REG26__ERRCTL:
+            gen_helper_mtc0_errctl(cpu_env, arg);
+            ctx->base.is_jmp = DISAS_STOP;
+            register_name = "ErrCtl";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+#endif /* TARGET_CHERI */
+        break;
+    case CP0_REGISTER_27:
+        switch (sel) {
+        case CP0_REG27__CACHERR:
+            /* ignored */
+            register_name = "CacheErr";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_28:
+        switch (sel) {
+        case CP0_REG28__TAGLO:
+        case CP0_REG28__TAGLO1:
+        case CP0_REG28__TAGLO2:
+        case CP0_REG28__TAGLO3:
+            gen_helper_mtc0_taglo(cpu_env, arg);
+            register_name = "TagLo";
+            break;
+        case CP0_REG28__DATALO:
+        case CP0_REG28__DATALO1:
+        case CP0_REG28__DATALO2:
+        case CP0_REG28__DATALO3:
+            gen_helper_mtc0_datalo(cpu_env, arg);
+            register_name = "DataLo";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_29:
+        switch (sel) {
+        case CP0_REG29__TAGHI:
+        case CP0_REG29__TAGHI1:
+        case CP0_REG29__TAGHI2:
+        case CP0_REG29__TAGHI3:
+            gen_helper_mtc0_taghi(cpu_env, arg);
+            register_name = "TagHi";
+            break;
+        case CP0_REG29__DATAHI:
+        case CP0_REG29__DATAHI1:
+        case CP0_REG29__DATAHI2:
+        case CP0_REG29__DATAHI3:
+            gen_helper_mtc0_datahi(cpu_env, arg);
+            register_name = "DataHi";
+            break;
+        default:
+            register_name = "invalid sel";
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_30:
+        switch (sel) {
+        case CP0_REG30__ERROREPC:
+#ifdef TARGET_CHERI
+            gen_helper_mtc0_error_epc(cpu_env, arg);
+#else
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_ErrorEPC));
+#endif
+            register_name = "ErrorEPC";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    case CP0_REGISTER_31:
+        switch (sel) {
+        case CP0_REG31__DESAVE:
+            /* EJTAG support */
+            gen_mtc0_store32(ctx, arg, reg, sel,
+                             offsetof(CPUMIPSState, CP0_DESAVE));
+            register_name = "DESAVE";
+            break;
+        case CP0_REG31__KSCRATCH1:
+        case CP0_REG31__KSCRATCH2:
+        case CP0_REG31__KSCRATCH3:
+        case CP0_REG31__KSCRATCH4:
+        case CP0_REG31__KSCRATCH5:
+        case CP0_REG31__KSCRATCH6:
+            CP0_CHECK(ctx->kscrexist & (1 << sel));
+            gen_mtc0_store(ctx, arg, reg, sel,
+                           offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
+            register_name = "KScratch";
+            break;
+        default:
+            goto cp0_unimplemented;
+        }
+        break;
+    default:
+        goto cp0_unimplemented;
+    }
+    trace_mips_translate_c0("dmtc0", register_name, reg, sel);
+
+    /* For simplicity assume that all writes can cause interrupts.  */
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        /*
+         * DISAS_STOP isn't sufficient, we need to ensure we break out of
+         * translated code to check for pending interrupts.
+         */
+        gen_save_pc(ctx->base.pc_next + 4);
+        ctx->base.is_jmp = DISAS_EXIT;
+    }
+    return;
+
+cp0_unimplemented:
+    qemu_log_mask(LOG_UNIMP, "dmtc0 %s (reg %d sel %d)\n",
+                  register_name, reg, sel);
+}
+#endif /* TARGET_MIPS64 */
+
+static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd,
+                     int u, int sel, int h)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    TCGv t0 = tcg_temp_local_new();
+
+    if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
+        ((env->tcs[other_tc].CP0_TCBind & (0xf << CP0TCBd_CurVPE)) !=
+         (env->active_tc.CP0_TCBind & (0xf << CP0TCBd_CurVPE)))) {
+        tcg_gen_movi_tl(t0, -1);
+    } else if ((env->CP0_VPEControl & (0xff << CP0VPECo_TargTC)) >
+               (env->mvp->CP0_MVPConf0 & (0xff << CP0MVPC0_PTC))) {
+        tcg_gen_movi_tl(t0, -1);
+    } else if (u == 0) {
+        switch (rt) {
+        case 1:
+            switch (sel) {
+            case 1:
+                gen_helper_mftc0_vpecontrol(t0, cpu_env);
+                break;
+            case 2:
+                gen_helper_mftc0_vpeconf0(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 2:
+            switch (sel) {
+            case 1:
+                gen_helper_mftc0_tcstatus(t0, cpu_env);
+                break;
+            case 2:
+                gen_helper_mftc0_tcbind(t0, cpu_env);
+                break;
+            case 3:
+                gen_helper_mftc0_tcrestart(t0, cpu_env);
+                break;
+            case 4:
+                gen_helper_mftc0_tchalt(t0, cpu_env);
+                break;
+            case 5:
+                gen_helper_mftc0_tccontext(t0, cpu_env);
+                break;
+            case 6:
+                gen_helper_mftc0_tcschedule(t0, cpu_env);
+                break;
+            case 7:
+                gen_helper_mftc0_tcschefback(t0, cpu_env);
+                break;
+            default:
+                gen_mfc0(ctx, t0, rt, sel);
+                break;
+            }
+            break;
+        case 10:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_entryhi(t0, cpu_env);
+                break;
+            default:
+                gen_mfc0(ctx, t0, rt, sel);
+                break;
+            }
+            break;
+        case 12:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_status(t0, cpu_env);
+                break;
+            default:
+                gen_mfc0(ctx, t0, rt, sel);
+                break;
+            }
+            break;
+        case 13:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_cause(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 14:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_epc(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 15:
+            switch (sel) {
+            case 1:
+                gen_helper_mftc0_ebase(t0, cpu_env);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 16:
+            switch (sel) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                gen_helper_mftc0_configx(t0, cpu_env, tcg_const_tl(sel));
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 23:
+            switch (sel) {
+            case 0:
+                gen_helper_mftc0_debug(t0, cpu_env);
+                break;
+            default:
+                gen_mfc0(ctx, t0, rt, sel);
+                break;
+            }
+            break;
+        default:
+            gen_mfc0(ctx, t0, rt, sel);
+        }
+    } else {
+        switch (sel) {
+        /* GPR registers. */
+        case 0:
+            gen_helper_1e0i(mftgpr, t0, rt);
+            break;
+        /* Auxiliary CPU registers */
+        case 1:
+            switch (rt) {
+            case 0:
+                gen_helper_1e0i(mftlo, t0, 0);
+                break;
+            case 1:
+                gen_helper_1e0i(mfthi, t0, 0);
+                break;
+            case 2:
+                gen_helper_1e0i(mftacx, t0, 0);
+                break;
+            case 4:
+                gen_helper_1e0i(mftlo, t0, 1);
+                break;
+            case 5:
+                gen_helper_1e0i(mfthi, t0, 1);
+                break;
+            case 6:
+                gen_helper_1e0i(mftacx, t0, 1);
+                break;
+            case 8:
+                gen_helper_1e0i(mftlo, t0, 2);
+                break;
+            case 9:
+                gen_helper_1e0i(mfthi, t0, 2);
+                break;
+            case 10:
+                gen_helper_1e0i(mftacx, t0, 2);
+                break;
+            case 12:
+                gen_helper_1e0i(mftlo, t0, 3);
+                break;
+            case 13:
+                gen_helper_1e0i(mfthi, t0, 3);
+                break;
+            case 14:
+                gen_helper_1e0i(mftacx, t0, 3);
+                break;
+            case 16:
+                gen_helper_mftdsp(t0, cpu_env);
+                break;
+            default:
+                goto die;
+            }
+            break;
+        /* Floating point (COP1). */
+        case 2:
+            /* XXX: For now we support only a single FPU context. */
+            if (h == 0) {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                gen_load_fpr32(ctx, fp0, rt);
+                tcg_gen_ext_i32_tl(t0, fp0);
+                tcg_temp_free_i32(fp0);
+            } else {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                gen_load_fpr32h(ctx, fp0, rt);
+                tcg_gen_ext_i32_tl(t0, fp0);
+                tcg_temp_free_i32(fp0);
+            }
+            break;
+        case 3:
+            /* XXX: For now we support only a single FPU context. */
+            gen_helper_1e0i(cfc1, t0, rt);
+            break;
+        /* COP2: Not implemented. */
+        case 4:
+        case 5:
+            /* fall through */
+        default:
+            goto die;
+        }
+    }
+    trace_mips_translate_tr("mftr", rt, u, sel, h);
+    gen_store_gpr(t0, rd);
+    tcg_temp_free(t0);
+    return;
+
+die:
+    tcg_temp_free(t0);
+    LOG_DISAS("mftr (reg %d u %d sel %d h %d)\n", rt, u, sel, h);
+    gen_reserved_instruction(ctx);
+}
+
+static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
+                     int u, int sel, int h)
+{
+    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
+    TCGv t0 = tcg_temp_local_new();
+
+    gen_load_gpr(t0, rt);
+    if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
+        ((env->tcs[other_tc].CP0_TCBind & (0xf << CP0TCBd_CurVPE)) !=
+         (env->active_tc.CP0_TCBind & (0xf << CP0TCBd_CurVPE)))) {
+        /* NOP */
+        ;
+    } else if ((env->CP0_VPEControl & (0xff << CP0VPECo_TargTC)) >
+             (env->mvp->CP0_MVPConf0 & (0xff << CP0MVPC0_PTC))) {
+        /* NOP */
+        ;
+    } else if (u == 0) {
+        switch (rd) {
+        case 1:
+            switch (sel) {
+            case 1:
+                gen_helper_mttc0_vpecontrol(cpu_env, t0);
+                break;
+            case 2:
+                gen_helper_mttc0_vpeconf0(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 2:
+            switch (sel) {
+            case 1:
+                gen_helper_mttc0_tcstatus(cpu_env, t0);
+                break;
+            case 2:
+                gen_helper_mttc0_tcbind(cpu_env, t0);
+                break;
+            case 3:
+                gen_helper_mttc0_tcrestart(cpu_env, t0);
+                break;
+            case 4:
+                gen_helper_mttc0_tchalt(cpu_env, t0);
+                break;
+            case 5:
+                gen_helper_mttc0_tccontext(cpu_env, t0);
+                break;
+            case 6:
+                gen_helper_mttc0_tcschedule(cpu_env, t0);
+                break;
+            case 7:
+                gen_helper_mttc0_tcschefback(cpu_env, t0);
+                break;
+            default:
+                gen_mtc0(ctx, t0, rd, sel);
+                break;
+            }
+            break;
+        case 10:
+            switch (sel) {
+            case 0:
+                gen_helper_mttc0_entryhi(cpu_env, t0);
+                break;
+            default:
+                gen_mtc0(ctx, t0, rd, sel);
+                break;
+            }
+            break;
+        case 12:
+            switch (sel) {
+            case 0:
+                gen_helper_mttc0_status(cpu_env, t0);
+                break;
+            default:
+                gen_mtc0(ctx, t0, rd, sel);
+                break;
+            }
+            break;
+        case 13:
+            switch (sel) {
+            case 0:
+                gen_helper_mttc0_cause(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 15:
+            switch (sel) {
+            case 1:
+                gen_helper_mttc0_ebase(cpu_env, t0);
+                break;
+            default:
+                goto die;
+                break;
+            }
+            break;
+        case 23:
+            switch (sel) {
+            case 0:
+                gen_helper_mttc0_debug(cpu_env, t0);
+                break;
+            default:
+                gen_mtc0(ctx, t0, rd, sel);
+                break;
+            }
+            break;
+        default:
+            gen_mtc0(ctx, t0, rd, sel);
+        }
+    } else {
+        switch (sel) {
+        /* GPR registers. */
+        case 0:
+            gen_helper_0e1i(mttgpr, t0, rd);
+            break;
+        /* Auxiliary CPU registers */
+        case 1:
+            switch (rd) {
+            case 0:
+                gen_helper_0e1i(mttlo, t0, 0);
+                break;
+            case 1:
+                gen_helper_0e1i(mtthi, t0, 0);
+                break;
+            case 2:
+                gen_helper_0e1i(mttacx, t0, 0);
+                break;
+            case 4:
+                gen_helper_0e1i(mttlo, t0, 1);
+                break;
+            case 5:
+                gen_helper_0e1i(mtthi, t0, 1);
+                break;
+            case 6:
+                gen_helper_0e1i(mttacx, t0, 1);
+                break;
+            case 8:
+                gen_helper_0e1i(mttlo, t0, 2);
+                break;
+            case 9:
+                gen_helper_0e1i(mtthi, t0, 2);
+                break;
+            case 10:
+                gen_helper_0e1i(mttacx, t0, 2);
+                break;
+            case 12:
+                gen_helper_0e1i(mttlo, t0, 3);
+                break;
+            case 13:
+                gen_helper_0e1i(mtthi, t0, 3);
+                break;
+            case 14:
+                gen_helper_0e1i(mttacx, t0, 3);
+                break;
+            case 16:
+                gen_helper_mttdsp(cpu_env, t0);
+                break;
+            default:
+                goto die;
+            }
+            break;
+        /* Floating point (COP1). */
+        case 2:
+            /* XXX: For now we support only a single FPU context. */
+            if (h == 0) {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                tcg_gen_trunc_tl_i32(fp0, t0);
+                gen_store_fpr32(ctx, fp0, rd);
+                tcg_temp_free_i32(fp0);
+            } else {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                tcg_gen_trunc_tl_i32(fp0, t0);
+                gen_store_fpr32h(ctx, fp0, rd);
+                tcg_temp_free_i32(fp0);
+            }
+            break;
+        case 3:
+            /* XXX: For now we support only a single FPU context. */
+            gen_helper_0e2i(ctc1, t0, tcg_constant_i32(rd), rt);
+            /* Stop translation as we may have changed hflags */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        /* COP2: Not implemented. */
+        case 4:
+        case 5:
+            /* fall through */
+        default:
+            goto die;
+        }
+    }
+    trace_mips_translate_tr("mttr", rd, u, sel, h);
+    tcg_temp_free(t0);
+    return;
+
+die:
+    tcg_temp_free(t0);
+    LOG_DISAS("mttr (reg %d u %d sel %d h %d)\n", rd, u, sel, h);
+    gen_reserved_instruction(ctx);
+}
+
+static void gen_cp0(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                    int rt, int rd)
+{
+    const char *opn = "ldst";
+
+    check_cp0_enabled(ctx);
+    switch (opc) {
+    case OPC_MFC0:
+        if (rt == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_mfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        gen_log_instr_gpr_update(ctx, rt);
+        opn = "mfc0";
+        break;
+    case OPC_MTC0:
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_mtc0(ctx, t0, rd, ctx->opcode & 0x7);
+            tcg_temp_free(t0);
+        }
+        opn = "mtc0";
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMFC0:
+        check_insn(ctx, ISA_MIPS3);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_dmfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        gen_log_instr_gpr_update(ctx, rt);
+        opn = "dmfc0";
+        break;
+    case OPC_DMTC0:
+        check_insn(ctx, ISA_MIPS3);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_dmtc0(ctx, t0, rd, ctx->opcode & 0x7);
+            tcg_temp_free(t0);
+        }
+        opn = "dmtc0";
+        break;
+#endif
+    case OPC_MFHC0:
+        check_mvh(ctx);
+        if (rt == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_mfhc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
+        gen_log_instr_gpr_update(ctx, rt);
+        opn = "mfhc0";
+        break;
+    case OPC_MTHC0:
+        check_mvh(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+            gen_load_gpr(t0, rt);
+            gen_mthc0(ctx, t0, rd, ctx->opcode & 0x7);
+            tcg_temp_free(t0);
+        }
+        opn = "mthc0";
+        break;
+    case OPC_MFTR:
+        check_cp0_enabled(ctx);
+        if (rd == 0) {
+            /* Treat as NOP. */
+            return;
+        }
+        gen_mftr(env, ctx, rt, rd, (ctx->opcode >> 5) & 1,
+                 ctx->opcode & 0x7, (ctx->opcode >> 4) & 1);
+        opn = "mftr";
+        break;
+    case OPC_MTTR:
+        check_cp0_enabled(ctx);
+        gen_mttr(env, ctx, rd, rt, (ctx->opcode >> 5) & 1,
+                 ctx->opcode & 0x7, (ctx->opcode >> 4) & 1);
+        opn = "mttr";
+        break;
+    case OPC_TLBWI:
+        opn = "tlbwi";
+        if (!env->tlb->helper_tlbwi) {
+            goto die;
+        }
+        gen_helper_tlbwi(cpu_env);
+        break;
+    case OPC_TLBINV:
+        opn = "tlbinv";
+        if (ctx->ie >= 2) {
+            if (!env->tlb->helper_tlbinv) {
+                goto die;
+            }
+            gen_helper_tlbinv(cpu_env);
+        } /* treat as nop if TLBINV not supported */
+        break;
+    case OPC_TLBINVF:
+        opn = "tlbinvf";
+        if (ctx->ie >= 2) {
+            if (!env->tlb->helper_tlbinvf) {
+                goto die;
+            }
+            gen_helper_tlbinvf(cpu_env);
+        } /* treat as nop if TLBINV not supported */
+        break;
+    case OPC_TLBWR:
+        opn = "tlbwr";
+        if (!env->tlb->helper_tlbwr) {
+            goto die;
+        }
+        gen_helper_tlbwr(cpu_env);
+        break;
+    case OPC_TLBP:
+        opn = "tlbp";
+        if (!env->tlb->helper_tlbp) {
+            goto die;
+        }
+        gen_helper_tlbp(cpu_env);
+        break;
+    case OPC_TLBR:
+        opn = "tlbr";
+        if (!env->tlb->helper_tlbr) {
+            goto die;
+        }
+        gen_helper_tlbr(cpu_env);
+        break;
+    case OPC_ERET: /* OPC_ERETNC */
+        if ((ctx->insn_flags & ISA_MIPS_R6) &&
+            (ctx->hflags & MIPS_HFLAG_BMASK)) {
+            goto die;
+        } else {
+            int bit_shift = (ctx->hflags & MIPS_HFLAG_M16) ? 16 : 6;
+            gen_save_pc(ctx->base.pc_next);
+            if (ctx->opcode & (1 << bit_shift)) {
+                /* OPC_ERETNC */
+                opn = "eretnc";
+                check_insn(ctx, ISA_MIPS_R5);
+                gen_helper_eretnc(cpu_env);
+            } else {
+                /* OPC_ERET */
+                opn = "eret";
+                check_insn(ctx, ISA_MIPS2);
+                gen_helper_eret(cpu_env);
+            }
+            ctx->base.is_jmp = DISAS_EXIT;
+        }
+        break;
+    case OPC_DERET:
+        opn = "deret";
+        check_insn(ctx, ISA_MIPS_R1);
+        if ((ctx->insn_flags & ISA_MIPS_R6) &&
+            (ctx->hflags & MIPS_HFLAG_BMASK)) {
+            goto die;
+        }
+        if (!(ctx->hflags & MIPS_HFLAG_DM)) {
+            MIPS_INVAL(opn);
+            gen_reserved_instruction(ctx);
+        } else {
+            gen_helper_deret(cpu_env);
+            ctx->base.is_jmp = DISAS_EXIT;
+        }
+        break;
+    case OPC_WAIT:
+        opn = "wait";
+        check_insn(ctx, ISA_MIPS3 | ISA_MIPS_R1);
+        if ((ctx->insn_flags & ISA_MIPS_R6) &&
+            (ctx->hflags & MIPS_HFLAG_BMASK)) {
+            goto die;
+        }
+        /* If we get an exception, we want to restart at next instruction */
+        ctx->base.pc_next += 4;
+        save_cpu_state(ctx, 1);
+        ctx->base.pc_next -= 4;
+        gen_helper_wait(cpu_env);
+        ctx->base.is_jmp = DISAS_NORETURN;
+        break;
+    default:
+ die:
+        MIPS_INVAL(opn);
+        gen_reserved_instruction(ctx);
+        return;
+    }
+    (void)opn; /* avoid a compiler warning */
+}
+#endif /* !CONFIG_USER_ONLY */
+
+/* CP1 Branches (before delay slot) */
+static void gen_compute_branch1(DisasContext *ctx, uint32_t op,
+                                int32_t cc, int32_t offset)
+{
+    target_ulong btarget;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    if (cc != 0) {
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1);
+    }
+
+    btarget = ctx->base.pc_next + 4 + offset;
+
+    switch (op) {
+    case OPC_BC1F:
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
+        goto not_likely;
+    case OPC_BC1FL:
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
+        goto likely;
+    case OPC_BC1T:
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
+        goto not_likely;
+    case OPC_BC1TL:
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
+    likely:
+        ctx->hflags |= MIPS_HFLAG_BL;
+        break;
+    case OPC_BC1FANY2:
+        {
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
+            tcg_gen_nand_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
+        }
+        goto not_likely;
+    case OPC_BC1TANY2:
+        {
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
+        }
+        goto not_likely;
+    case OPC_BC1FANY4:
+        {
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
+            tcg_gen_and_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 2));
+            tcg_gen_and_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 3));
+            tcg_gen_nand_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
+        }
+        goto not_likely;
+    case OPC_BC1TANY4:
+        {
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
+        }
+    not_likely:
+        ctx->hflags |= MIPS_HFLAG_BC;
+        break;
+    default:
+        MIPS_INVAL("cp1 cond branch");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+    // Check that the conditional branch target is in range (but only if the
+    // branch is taken).
+    gen_check_cond_branch_target(ctx, bcond, btarget);
+
+    ctx->btarget = btarget;
+    ctx->hflags |= MIPS_HFLAG_BDS32;
+ out:
+    tcg_temp_free_i32(t0);
+}
+
+/* R6 CP1 Branches */
+static void gen_compute_branch1_r6(DisasContext *ctx, uint32_t op,
+                                   int32_t ft, int32_t offset,
+                                   int delayslot_size)
+{
+    target_ulong btarget;
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+#ifdef MIPS_DEBUG_DISAS
+        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
+                  "\n", ctx->base.pc_next);
+#endif
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    gen_load_fpr64(ctx, t0, ft);
+    tcg_gen_andi_i64(t0, t0, 1);
+
+    btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+
+    switch (op) {
+    case OPC_BC1EQZ:
+        tcg_gen_xori_i64(t0, t0, 1);
+        ctx->hflags |= MIPS_HFLAG_BC;
+        break;
+    case OPC_BC1NEZ:
+        /* t0 already set */
+        ctx->hflags |= MIPS_HFLAG_BC;
+        break;
+    default:
+        MIPS_INVAL("cp1 cond branch");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    tcg_gen_trunc_i64_tl(bcond, t0);
+
+    // Check that the conditional branch target is in range (but only if the
+    // branch is taken)
+    gen_check_cond_branch_target(ctx, bcond, btarget);
+    ctx->btarget = btarget;
+
+    switch (delayslot_size) {
+    case 2:
+        ctx->hflags |= MIPS_HFLAG_BDS16;
+        break;
+    case 4:
+        ctx->hflags |= MIPS_HFLAG_BDS32;
+        break;
+    }
+
+out:
+    tcg_temp_free_i64(t0);
+}
+
+/* Coprocessor 1 (FPU) */
+
+#define FOP(func, fmt) (((fmt) << 21) | (func))
+
+enum fopcode {
+    OPC_ADD_S = FOP(0, FMT_S),
+    OPC_SUB_S = FOP(1, FMT_S),
+    OPC_MUL_S = FOP(2, FMT_S),
+    OPC_DIV_S = FOP(3, FMT_S),
+    OPC_SQRT_S = FOP(4, FMT_S),
+    OPC_ABS_S = FOP(5, FMT_S),
+    OPC_MOV_S = FOP(6, FMT_S),
+    OPC_NEG_S = FOP(7, FMT_S),
+    OPC_ROUND_L_S = FOP(8, FMT_S),
+    OPC_TRUNC_L_S = FOP(9, FMT_S),
+    OPC_CEIL_L_S = FOP(10, FMT_S),
+    OPC_FLOOR_L_S = FOP(11, FMT_S),
+    OPC_ROUND_W_S = FOP(12, FMT_S),
+    OPC_TRUNC_W_S = FOP(13, FMT_S),
+    OPC_CEIL_W_S = FOP(14, FMT_S),
+    OPC_FLOOR_W_S = FOP(15, FMT_S),
+    OPC_SEL_S = FOP(16, FMT_S),
+    OPC_MOVCF_S = FOP(17, FMT_S),
+    OPC_MOVZ_S = FOP(18, FMT_S),
+    OPC_MOVN_S = FOP(19, FMT_S),
+    OPC_SELEQZ_S = FOP(20, FMT_S),
+    OPC_RECIP_S = FOP(21, FMT_S),
+    OPC_RSQRT_S = FOP(22, FMT_S),
+    OPC_SELNEZ_S = FOP(23, FMT_S),
+    OPC_MADDF_S = FOP(24, FMT_S),
+    OPC_MSUBF_S = FOP(25, FMT_S),
+    OPC_RINT_S = FOP(26, FMT_S),
+    OPC_CLASS_S = FOP(27, FMT_S),
+    OPC_MIN_S = FOP(28, FMT_S),
+    OPC_RECIP2_S = FOP(28, FMT_S),
+    OPC_MINA_S = FOP(29, FMT_S),
+    OPC_RECIP1_S = FOP(29, FMT_S),
+    OPC_MAX_S = FOP(30, FMT_S),
+    OPC_RSQRT1_S = FOP(30, FMT_S),
+    OPC_MAXA_S = FOP(31, FMT_S),
+    OPC_RSQRT2_S = FOP(31, FMT_S),
+    OPC_CVT_D_S = FOP(33, FMT_S),
+    OPC_CVT_W_S = FOP(36, FMT_S),
+    OPC_CVT_L_S = FOP(37, FMT_S),
+    OPC_CVT_PS_S = FOP(38, FMT_S),
+    OPC_CMP_F_S = FOP(48, FMT_S),
+    OPC_CMP_UN_S = FOP(49, FMT_S),
+    OPC_CMP_EQ_S = FOP(50, FMT_S),
+    OPC_CMP_UEQ_S = FOP(51, FMT_S),
+    OPC_CMP_OLT_S = FOP(52, FMT_S),
+    OPC_CMP_ULT_S = FOP(53, FMT_S),
+    OPC_CMP_OLE_S = FOP(54, FMT_S),
+    OPC_CMP_ULE_S = FOP(55, FMT_S),
+    OPC_CMP_SF_S = FOP(56, FMT_S),
+    OPC_CMP_NGLE_S = FOP(57, FMT_S),
+    OPC_CMP_SEQ_S = FOP(58, FMT_S),
+    OPC_CMP_NGL_S = FOP(59, FMT_S),
+    OPC_CMP_LT_S = FOP(60, FMT_S),
+    OPC_CMP_NGE_S = FOP(61, FMT_S),
+    OPC_CMP_LE_S = FOP(62, FMT_S),
+    OPC_CMP_NGT_S = FOP(63, FMT_S),
+
+    OPC_ADD_D = FOP(0, FMT_D),
+    OPC_SUB_D = FOP(1, FMT_D),
+    OPC_MUL_D = FOP(2, FMT_D),
+    OPC_DIV_D = FOP(3, FMT_D),
+    OPC_SQRT_D = FOP(4, FMT_D),
+    OPC_ABS_D = FOP(5, FMT_D),
+    OPC_MOV_D = FOP(6, FMT_D),
+    OPC_NEG_D = FOP(7, FMT_D),
+    OPC_ROUND_L_D = FOP(8, FMT_D),
+    OPC_TRUNC_L_D = FOP(9, FMT_D),
+    OPC_CEIL_L_D = FOP(10, FMT_D),
+    OPC_FLOOR_L_D = FOP(11, FMT_D),
+    OPC_ROUND_W_D = FOP(12, FMT_D),
+    OPC_TRUNC_W_D = FOP(13, FMT_D),
+    OPC_CEIL_W_D = FOP(14, FMT_D),
+    OPC_FLOOR_W_D = FOP(15, FMT_D),
+    OPC_SEL_D = FOP(16, FMT_D),
+    OPC_MOVCF_D = FOP(17, FMT_D),
+    OPC_MOVZ_D = FOP(18, FMT_D),
+    OPC_MOVN_D = FOP(19, FMT_D),
+    OPC_SELEQZ_D = FOP(20, FMT_D),
+    OPC_RECIP_D = FOP(21, FMT_D),
+    OPC_RSQRT_D = FOP(22, FMT_D),
+    OPC_SELNEZ_D = FOP(23, FMT_D),
+    OPC_MADDF_D = FOP(24, FMT_D),
+    OPC_MSUBF_D = FOP(25, FMT_D),
+    OPC_RINT_D = FOP(26, FMT_D),
+    OPC_CLASS_D = FOP(27, FMT_D),
+    OPC_MIN_D = FOP(28, FMT_D),
+    OPC_RECIP2_D = FOP(28, FMT_D),
+    OPC_MINA_D = FOP(29, FMT_D),
+    OPC_RECIP1_D = FOP(29, FMT_D),
+    OPC_MAX_D = FOP(30, FMT_D),
+    OPC_RSQRT1_D = FOP(30, FMT_D),
+    OPC_MAXA_D = FOP(31, FMT_D),
+    OPC_RSQRT2_D = FOP(31, FMT_D),
+    OPC_CVT_S_D = FOP(32, FMT_D),
+    OPC_CVT_W_D = FOP(36, FMT_D),
+    OPC_CVT_L_D = FOP(37, FMT_D),
+    OPC_CMP_F_D = FOP(48, FMT_D),
+    OPC_CMP_UN_D = FOP(49, FMT_D),
+    OPC_CMP_EQ_D = FOP(50, FMT_D),
+    OPC_CMP_UEQ_D = FOP(51, FMT_D),
+    OPC_CMP_OLT_D = FOP(52, FMT_D),
+    OPC_CMP_ULT_D = FOP(53, FMT_D),
+    OPC_CMP_OLE_D = FOP(54, FMT_D),
+    OPC_CMP_ULE_D = FOP(55, FMT_D),
+    OPC_CMP_SF_D = FOP(56, FMT_D),
+    OPC_CMP_NGLE_D = FOP(57, FMT_D),
+    OPC_CMP_SEQ_D = FOP(58, FMT_D),
+    OPC_CMP_NGL_D = FOP(59, FMT_D),
+    OPC_CMP_LT_D = FOP(60, FMT_D),
+    OPC_CMP_NGE_D = FOP(61, FMT_D),
+    OPC_CMP_LE_D = FOP(62, FMT_D),
+    OPC_CMP_NGT_D = FOP(63, FMT_D),
+
+    OPC_CVT_S_W = FOP(32, FMT_W),
+    OPC_CVT_D_W = FOP(33, FMT_W),
+    OPC_CVT_S_L = FOP(32, FMT_L),
+    OPC_CVT_D_L = FOP(33, FMT_L),
+    OPC_CVT_PS_PW = FOP(38, FMT_W),
+
+    OPC_ADD_PS = FOP(0, FMT_PS),
+    OPC_SUB_PS = FOP(1, FMT_PS),
+    OPC_MUL_PS = FOP(2, FMT_PS),
+    OPC_DIV_PS = FOP(3, FMT_PS),
+    OPC_ABS_PS = FOP(5, FMT_PS),
+    OPC_MOV_PS = FOP(6, FMT_PS),
+    OPC_NEG_PS = FOP(7, FMT_PS),
+    OPC_MOVCF_PS = FOP(17, FMT_PS),
+    OPC_MOVZ_PS = FOP(18, FMT_PS),
+    OPC_MOVN_PS = FOP(19, FMT_PS),
+    OPC_ADDR_PS = FOP(24, FMT_PS),
+    OPC_MULR_PS = FOP(26, FMT_PS),
+    OPC_RECIP2_PS = FOP(28, FMT_PS),
+    OPC_RECIP1_PS = FOP(29, FMT_PS),
+    OPC_RSQRT1_PS = FOP(30, FMT_PS),
+    OPC_RSQRT2_PS = FOP(31, FMT_PS),
+
+    OPC_CVT_S_PU = FOP(32, FMT_PS),
+    OPC_CVT_PW_PS = FOP(36, FMT_PS),
+    OPC_CVT_S_PL = FOP(40, FMT_PS),
+    OPC_PLL_PS = FOP(44, FMT_PS),
+    OPC_PLU_PS = FOP(45, FMT_PS),
+    OPC_PUL_PS = FOP(46, FMT_PS),
+    OPC_PUU_PS = FOP(47, FMT_PS),
+    OPC_CMP_F_PS = FOP(48, FMT_PS),
+    OPC_CMP_UN_PS = FOP(49, FMT_PS),
+    OPC_CMP_EQ_PS = FOP(50, FMT_PS),
+    OPC_CMP_UEQ_PS = FOP(51, FMT_PS),
+    OPC_CMP_OLT_PS = FOP(52, FMT_PS),
+    OPC_CMP_ULT_PS = FOP(53, FMT_PS),
+    OPC_CMP_OLE_PS = FOP(54, FMT_PS),
+    OPC_CMP_ULE_PS = FOP(55, FMT_PS),
+    OPC_CMP_SF_PS = FOP(56, FMT_PS),
+    OPC_CMP_NGLE_PS = FOP(57, FMT_PS),
+    OPC_CMP_SEQ_PS = FOP(58, FMT_PS),
+    OPC_CMP_NGL_PS = FOP(59, FMT_PS),
+    OPC_CMP_LT_PS = FOP(60, FMT_PS),
+    OPC_CMP_NGE_PS = FOP(61, FMT_PS),
+    OPC_CMP_LE_PS = FOP(62, FMT_PS),
+    OPC_CMP_NGT_PS = FOP(63, FMT_PS),
+};
+
+enum r6_f_cmp_op {
+    R6_OPC_CMP_AF_S   = FOP(0, FMT_W),
+    R6_OPC_CMP_UN_S   = FOP(1, FMT_W),
+    R6_OPC_CMP_EQ_S   = FOP(2, FMT_W),
+    R6_OPC_CMP_UEQ_S  = FOP(3, FMT_W),
+    R6_OPC_CMP_LT_S   = FOP(4, FMT_W),
+    R6_OPC_CMP_ULT_S  = FOP(5, FMT_W),
+    R6_OPC_CMP_LE_S   = FOP(6, FMT_W),
+    R6_OPC_CMP_ULE_S  = FOP(7, FMT_W),
+    R6_OPC_CMP_SAF_S  = FOP(8, FMT_W),
+    R6_OPC_CMP_SUN_S  = FOP(9, FMT_W),
+    R6_OPC_CMP_SEQ_S  = FOP(10, FMT_W),
+    R6_OPC_CMP_SEUQ_S = FOP(11, FMT_W),
+    R6_OPC_CMP_SLT_S  = FOP(12, FMT_W),
+    R6_OPC_CMP_SULT_S = FOP(13, FMT_W),
+    R6_OPC_CMP_SLE_S  = FOP(14, FMT_W),
+    R6_OPC_CMP_SULE_S = FOP(15, FMT_W),
+    R6_OPC_CMP_OR_S   = FOP(17, FMT_W),
+    R6_OPC_CMP_UNE_S  = FOP(18, FMT_W),
+    R6_OPC_CMP_NE_S   = FOP(19, FMT_W),
+    R6_OPC_CMP_SOR_S  = FOP(25, FMT_W),
+    R6_OPC_CMP_SUNE_S = FOP(26, FMT_W),
+    R6_OPC_CMP_SNE_S  = FOP(27, FMT_W),
+
+    R6_OPC_CMP_AF_D   = FOP(0, FMT_L),
+    R6_OPC_CMP_UN_D   = FOP(1, FMT_L),
+    R6_OPC_CMP_EQ_D   = FOP(2, FMT_L),
+    R6_OPC_CMP_UEQ_D  = FOP(3, FMT_L),
+    R6_OPC_CMP_LT_D   = FOP(4, FMT_L),
+    R6_OPC_CMP_ULT_D  = FOP(5, FMT_L),
+    R6_OPC_CMP_LE_D   = FOP(6, FMT_L),
+    R6_OPC_CMP_ULE_D  = FOP(7, FMT_L),
+    R6_OPC_CMP_SAF_D  = FOP(8, FMT_L),
+    R6_OPC_CMP_SUN_D  = FOP(9, FMT_L),
+    R6_OPC_CMP_SEQ_D  = FOP(10, FMT_L),
+    R6_OPC_CMP_SEUQ_D = FOP(11, FMT_L),
+    R6_OPC_CMP_SLT_D  = FOP(12, FMT_L),
+    R6_OPC_CMP_SULT_D = FOP(13, FMT_L),
+    R6_OPC_CMP_SLE_D  = FOP(14, FMT_L),
+    R6_OPC_CMP_SULE_D = FOP(15, FMT_L),
+    R6_OPC_CMP_OR_D   = FOP(17, FMT_L),
+    R6_OPC_CMP_UNE_D  = FOP(18, FMT_L),
+    R6_OPC_CMP_NE_D   = FOP(19, FMT_L),
+    R6_OPC_CMP_SOR_D  = FOP(25, FMT_L),
+    R6_OPC_CMP_SUNE_D = FOP(26, FMT_L),
+    R6_OPC_CMP_SNE_D  = FOP(27, FMT_L),
+};
+
+static void gen_cp1(DisasContext *ctx, uint32_t opc, int rt, int fs)
+{
+    TCGv t0 = tcg_temp_new();
+
+    switch (opc) {
+    case OPC_MFC1:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free_i32(fp0);
+        }
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_MTC1:
+        gen_load_gpr(t0, rt);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32(ctx, fp0, fs);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CFC1:
+        gen_helper_1e0i(cfc1, t0, fs);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_CTC1:
+        gen_load_gpr(t0, rt);
+        save_cpu_state(ctx, 0);
+        gen_helper_0e2i(ctc1, t0, tcg_constant_i32(fs), rt);
+        /* Stop translation as we may have changed hflags */
+        ctx->base.is_jmp = DISAS_STOP;
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMFC1:
+        gen_load_fpr64(ctx, t0, fs);
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_DMTC1:
+        gen_load_gpr(t0, rt);
+        gen_store_fpr64(ctx, t0, fs);
+        break;
+#endif
+    case OPC_MFHC1:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32h(ctx, fp0, fs);
+            tcg_gen_ext_i32_tl(t0, fp0);
+            tcg_temp_free_i32(fp0);
+        }
+        gen_store_gpr(t0, rt);
+        break;
+    case OPC_MTHC1:
+        gen_load_gpr(t0, rt);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32h(ctx, fp0, fs);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    default:
+        MIPS_INVAL("cp1 move");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+ out:
+    tcg_temp_free(t0);
+}
+
+static void gen_movci(DisasContext *ctx, int rd, int rs, int cc, int tf)
+{
+    TCGLabel *l1;
+    TCGCond cond;
+    TCGv_i32 t0;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    if (tf) {
+        cond = TCG_COND_EQ;
+    } else {
+        cond = TCG_COND_NE;
+    }
+
+    l1 = gen_new_label();
+    t0 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    tcg_temp_free_i32(t0);
+    gen_load_gpr(cpu_gpr[rd], rs);
+    gen_log_instr_gpr_update(ctx, rd);
+    gen_set_label(l1);
+}
+
+static inline void gen_movcf_s(DisasContext *ctx, int fs, int fd, int cc,
+                               int tf)
+{
+    int cond;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGLabel *l1 = gen_new_label();
+
+    if (tf) {
+        cond = TCG_COND_EQ;
+    } else {
+        cond = TCG_COND_NE;
+    }
+
+    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
+    gen_set_label(l1);
+    tcg_temp_free_i32(t0);
+}
+
+static inline void gen_movcf_d(DisasContext *ctx, int fs, int fd, int cc,
+                               int tf)
+{
+    int cond;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i64 fp0;
+    TCGLabel *l1 = gen_new_label();
+
+    if (tf) {
+        cond = TCG_COND_EQ;
+    } else {
+        cond = TCG_COND_NE;
+    }
+
+    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    tcg_temp_free_i32(t0);
+    fp0 = tcg_temp_new_i64();
+    gen_load_fpr64(ctx, fp0, fs);
+    gen_store_fpr64(ctx, fp0, fd);
+    tcg_temp_free_i64(fp0);
+    gen_set_label(l1);
+}
+
+static inline void gen_movcf_ps(DisasContext *ctx, int fs, int fd,
+                                int cc, int tf)
+{
+    int cond;
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+
+    if (tf) {
+        cond = TCG_COND_EQ;
+    } else {
+        cond = TCG_COND_NE;
+    }
+
+    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(ctx, t0, fs);
+    gen_store_fpr32(ctx, t0, fd);
+    gen_set_label(l1);
+
+    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc + 1));
+    tcg_gen_brcondi_i32(cond, t0, 0, l2);
+    gen_load_fpr32h(ctx, t0, fs);
+    gen_store_fpr32h(ctx, t0, fd);
+    tcg_temp_free_i32(t0);
+    gen_set_label(l2);
+}
+
+static void gen_sel_s(DisasContext *ctx, enum fopcode op1, int fd, int ft,
+                      int fs)
+{
+    TCGv_i32 t1 = tcg_const_i32(0);
+    TCGv_i32 fp0 = tcg_temp_new_i32();
+    TCGv_i32 fp1 = tcg_temp_new_i32();
+    TCGv_i32 fp2 = tcg_temp_new_i32();
+    gen_load_fpr32(ctx, fp0, fd);
+    gen_load_fpr32(ctx, fp1, ft);
+    gen_load_fpr32(ctx, fp2, fs);
+
+    switch (op1) {
+    case OPC_SEL_S:
+        tcg_gen_andi_i32(fp0, fp0, 1);
+        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
+        break;
+    case OPC_SELEQZ_S:
+        tcg_gen_andi_i32(fp1, fp1, 1);
+        tcg_gen_movcond_i32(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
+        break;
+    case OPC_SELNEZ_S:
+        tcg_gen_andi_i32(fp1, fp1, 1);
+        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
+        break;
+    default:
+        MIPS_INVAL("gen_sel_s");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    gen_store_fpr32(ctx, fp0, fd);
+    tcg_temp_free_i32(fp2);
+    tcg_temp_free_i32(fp1);
+    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_sel_d(DisasContext *ctx, enum fopcode op1, int fd, int ft,
+                      int fs)
+{
+    TCGv_i64 t1 = tcg_const_i64(0);
+    TCGv_i64 fp0 = tcg_temp_new_i64();
+    TCGv_i64 fp1 = tcg_temp_new_i64();
+    TCGv_i64 fp2 = tcg_temp_new_i64();
+    gen_load_fpr64(ctx, fp0, fd);
+    gen_load_fpr64(ctx, fp1, ft);
+    gen_load_fpr64(ctx, fp2, fs);
+
+    switch (op1) {
+    case OPC_SEL_D:
+        tcg_gen_andi_i64(fp0, fp0, 1);
+        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
+        break;
+    case OPC_SELEQZ_D:
+        tcg_gen_andi_i64(fp1, fp1, 1);
+        tcg_gen_movcond_i64(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
+        break;
+    case OPC_SELNEZ_D:
+        tcg_gen_andi_i64(fp1, fp1, 1);
+        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
+        break;
+    default:
+        MIPS_INVAL("gen_sel_d");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+
+    gen_store_fpr64(ctx, fp0, fd);
+    tcg_temp_free_i64(fp2);
+    tcg_temp_free_i64(fp1);
+    tcg_temp_free_i64(fp0);
+    tcg_temp_free_i64(t1);
+}
+
+static void gen_farith(DisasContext *ctx, enum fopcode op1,
+                       int ft, int fs, int fd, int cc)
+{
+    uint32_t func = ctx->opcode & 0x3f;
+    switch (op1) {
+    case OPC_ADD_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_SUB_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_MUL_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_DIV_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_SQRT_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_ABS_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->abs2008) {
+                tcg_gen_andi_i32(fp0, fp0, 0x7fffffffUL);
+            } else {
+                gen_helper_float_abs_s(fp0, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_MOV_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_NEG_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->abs2008) {
+                tcg_gen_xori_i32(fp0, fp0, 1UL << 31);
+            } else {
+                gen_helper_float_chs_s(fp0, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_ROUND_L_S:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_round_2008_l_s(fp64, cpu_env, fp32);
+            } else {
+                gen_helper_float_round_l_s(fp64, cpu_env, fp32);
+            }
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_TRUNC_L_S:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_trunc_2008_l_s(fp64, cpu_env, fp32);
+            } else {
+                gen_helper_float_trunc_l_s(fp64, cpu_env, fp32);
+            }
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_CEIL_L_S:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_ceil_2008_l_s(fp64, cpu_env, fp32);
+            } else {
+                gen_helper_float_ceil_l_s(fp64, cpu_env, fp32);
+            }
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_FLOOR_L_S:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_floor_2008_l_s(fp64, cpu_env, fp32);
+            } else {
+                gen_helper_float_floor_l_s(fp64, cpu_env, fp32);
+            }
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_ROUND_W_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_round_2008_w_s(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_round_w_s(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_TRUNC_W_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_trunc_2008_w_s(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_trunc_w_s(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CEIL_W_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_ceil_2008_w_s(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_ceil_w_s(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_FLOOR_W_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_floor_2008_w_s(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_floor_w_s(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_SEL_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_s(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_SELEQZ_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_s(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_SELNEZ_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_s(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_MOVCF_S:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        gen_movcf_s(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        break;
+    case OPC_MOVZ_S:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i32 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+            }
+            fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+            gen_set_label(l1);
+        }
+        break;
+    case OPC_MOVN_S:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i32 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+                fp0 = tcg_temp_new_i32();
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_store_fpr32(ctx, fp0, fd);
+                tcg_temp_free_i32(fp0);
+                gen_set_label(l1);
+            }
+        }
+        break;
+    case OPC_RECIP_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_recip_s(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_RSQRT_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_MADDF_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fd);
+            gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_MSUBF_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fd);
+            gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_RINT_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_rint_s(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CLASS_S:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_class_s(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_MIN_S: /* OPC_RECIP2_S */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MIN_S */
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_min_s(fp2, cpu_env, fp0, fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        } else {
+            /* OPC_RECIP2_S */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+                TCGv_i32 fp1 = tcg_temp_new_i32();
+
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_load_fpr32(ctx, fp1, ft);
+                gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
+                tcg_temp_free_i32(fp1);
+                gen_store_fpr32(ctx, fp0, fd);
+                tcg_temp_free_i32(fp0);
+            }
+        }
+        break;
+    case OPC_MINA_S: /* OPC_RECIP1_S */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MINA_S */
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_mina_s(fp2, cpu_env, fp0, fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        } else {
+            /* OPC_RECIP1_S */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_helper_float_recip1_s(fp0, cpu_env, fp0);
+                gen_store_fpr32(ctx, fp0, fd);
+                tcg_temp_free_i32(fp0);
+            }
+        }
+        break;
+    case OPC_MAX_S: /* OPC_RSQRT1_S */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MAX_S */
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_max_s(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr32(ctx, fp1, fd);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        } else {
+            /* OPC_RSQRT1_S */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
+                gen_store_fpr32(ctx, fp0, fd);
+                tcg_temp_free_i32(fp0);
+            }
+        }
+        break;
+    case OPC_MAXA_S: /* OPC_RSQRT2_S */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MAXA_S */
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_helper_float_maxa_s(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr32(ctx, fp1, fd);
+            tcg_temp_free_i32(fp1);
+            tcg_temp_free_i32(fp0);
+        } else {
+            /* OPC_RSQRT2_S */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i32 fp0 = tcg_temp_new_i32();
+                TCGv_i32 fp1 = tcg_temp_new_i32();
+
+                gen_load_fpr32(ctx, fp0, fs);
+                gen_load_fpr32(ctx, fp1, ft);
+                gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
+                tcg_temp_free_i32(fp1);
+                gen_store_fpr32(ctx, fp0, fd);
+                tcg_temp_free_i32(fp0);
+            }
+        }
+        break;
+    case OPC_CVT_D_S:
+        check_cp1_registers(ctx, fd);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_CVT_W_S:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_cvt_2008_w_s(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_cvt_w_s(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CVT_L_S:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_cvt_2008_l_s(fp64, cpu_env, fp32);
+            } else {
+                gen_helper_float_cvt_l_s(fp64, cpu_env, fp32);
+            }
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_CVT_PS_S:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+            TCGv_i32 fp32_0 = tcg_temp_new_i32();
+            TCGv_i32 fp32_1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp32_0, fs);
+            gen_load_fpr32(ctx, fp32_1, ft);
+            tcg_gen_concat_i32_i64(fp64, fp32_1, fp32_0);
+            tcg_temp_free_i32(fp32_1);
+            tcg_temp_free_i32(fp32_0);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_CMP_F_S:
+    case OPC_CMP_UN_S:
+    case OPC_CMP_EQ_S:
+    case OPC_CMP_UEQ_S:
+    case OPC_CMP_OLT_S:
+    case OPC_CMP_ULT_S:
+    case OPC_CMP_OLE_S:
+    case OPC_CMP_ULE_S:
+    case OPC_CMP_SF_S:
+    case OPC_CMP_NGLE_S:
+    case OPC_CMP_SEQ_S:
+    case OPC_CMP_NGL_S:
+    case OPC_CMP_LT_S:
+    case OPC_CMP_NGE_S:
+    case OPC_CMP_LE_S:
+    case OPC_CMP_NGT_S:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_s(ctx, func - 48, ft, fs, cc);
+        } else {
+            gen_cmp_s(ctx, func - 48, ft, fs, cc);
+        }
+        break;
+    case OPC_ADD_D:
+        check_cp1_registers(ctx, fs | ft | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_add_d(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SUB_D:
+        check_cp1_registers(ctx, fs | ft | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_sub_d(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MUL_D:
+        check_cp1_registers(ctx, fs | ft | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_mul_d(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_DIV_D:
+        check_cp1_registers(ctx, fs | ft | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_div_d(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SQRT_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_sqrt_d(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_ABS_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->abs2008) {
+                tcg_gen_andi_i64(fp0, fp0, 0x7fffffffffffffffULL);
+            } else {
+                gen_helper_float_abs_d(fp0, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MOV_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_NEG_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->abs2008) {
+                tcg_gen_xori_i64(fp0, fp0, 1ULL << 63);
+            } else {
+                gen_helper_float_chs_d(fp0, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_ROUND_L_D:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_round_2008_l_d(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_round_l_d(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_TRUNC_L_D:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_trunc_2008_l_d(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_trunc_l_d(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CEIL_L_D:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_ceil_2008_l_d(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_ceil_l_d(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_FLOOR_L_D:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_floor_2008_l_d(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_floor_l_d(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_ROUND_W_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_round_2008_w_d(fp32, cpu_env, fp64);
+            } else {
+                gen_helper_float_round_w_d(fp32, cpu_env, fp64);
+            }
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_TRUNC_W_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_trunc_2008_w_d(fp32, cpu_env, fp64);
+            } else {
+                gen_helper_float_trunc_w_d(fp32, cpu_env, fp64);
+            }
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_CEIL_W_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_ceil_2008_w_d(fp32, cpu_env, fp64);
+            } else {
+                gen_helper_float_ceil_w_d(fp32, cpu_env, fp64);
+            }
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_FLOOR_W_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_floor_2008_w_d(fp32, cpu_env, fp64);
+            } else {
+                gen_helper_float_floor_w_d(fp32, cpu_env, fp64);
+            }
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_SEL_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_d(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_SELEQZ_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_d(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_SELNEZ_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_sel_d(ctx, op1, fd, ft, fs);
+        break;
+    case OPC_MOVCF_D:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        gen_movcf_d(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        break;
+    case OPC_MOVZ_D:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+            }
+            fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+            gen_set_label(l1);
+        }
+        break;
+    case OPC_MOVN_D:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+                fp0 = tcg_temp_new_i64();
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+                gen_set_label(l1);
+            }
+        }
+        break;
+    case OPC_RECIP_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_recip_d(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RSQRT_D:
+        check_cp1_registers(ctx, fs | fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_rsqrt_d(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MADDF_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fd);
+            gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MSUBF_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fd);
+            gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RINT_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_rint_d(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CLASS_D:
+        check_insn(ctx, ISA_MIPS_R6);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_class_d(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MIN_D: /* OPC_RECIP2_D */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MIN_D */
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_min_d(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr64(ctx, fp1, fd);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        } else {
+            /* OPC_RECIP2_D */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i64 fp0 = tcg_temp_new_i64();
+                TCGv_i64 fp1 = tcg_temp_new_i64();
+
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_load_fpr64(ctx, fp1, ft);
+                gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
+                tcg_temp_free_i64(fp1);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+            }
+        }
+        break;
+    case OPC_MINA_D: /* OPC_RECIP1_D */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MINA_D */
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_mina_d(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr64(ctx, fp1, fd);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        } else {
+            /* OPC_RECIP1_D */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i64 fp0 = tcg_temp_new_i64();
+
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_helper_float_recip1_d(fp0, cpu_env, fp0);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+            }
+        }
+        break;
+    case OPC_MAX_D: /*  OPC_RSQRT1_D */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MAX_D */
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_max_d(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr64(ctx, fp1, fd);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        } else {
+            /* OPC_RSQRT1_D */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i64 fp0 = tcg_temp_new_i64();
+
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+            }
+        }
+        break;
+    case OPC_MAXA_D: /* OPC_RSQRT2_D */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_MAXA_D */
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_maxa_d(fp1, cpu_env, fp0, fp1);
+            gen_store_fpr64(ctx, fp1, fd);
+            tcg_temp_free_i64(fp1);
+            tcg_temp_free_i64(fp0);
+        } else {
+            /* OPC_RSQRT2_D */
+            check_cp1_64bitmode(ctx);
+            {
+                TCGv_i64 fp0 = tcg_temp_new_i64();
+                TCGv_i64 fp1 = tcg_temp_new_i64();
+
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_load_fpr64(ctx, fp1, ft);
+                gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
+                tcg_temp_free_i64(fp1);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+            }
+        }
+        break;
+    case OPC_CMP_F_D:
+    case OPC_CMP_UN_D:
+    case OPC_CMP_EQ_D:
+    case OPC_CMP_UEQ_D:
+    case OPC_CMP_OLT_D:
+    case OPC_CMP_ULT_D:
+    case OPC_CMP_OLE_D:
+    case OPC_CMP_ULE_D:
+    case OPC_CMP_SF_D:
+    case OPC_CMP_NGLE_D:
+    case OPC_CMP_SEQ_D:
+    case OPC_CMP_NGL_D:
+    case OPC_CMP_LT_D:
+    case OPC_CMP_NGE_D:
+    case OPC_CMP_LE_D:
+    case OPC_CMP_NGT_D:
+        check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_d(ctx, func - 48, ft, fs, cc);
+        } else {
+            gen_cmp_d(ctx, func - 48, ft, fs, cc);
+        }
+        break;
+    case OPC_CVT_S_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            gen_helper_float_cvts_d(fp32, cpu_env, fp64);
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_CVT_W_D:
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_cvt_2008_w_d(fp32, cpu_env, fp64);
+            } else {
+                gen_helper_float_cvt_w_d(fp32, cpu_env, fp64);
+            }
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_CVT_L_D:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            if (ctx->nan2008) {
+                gen_helper_float_cvt_2008_l_d(fp0, cpu_env, fp0);
+            } else {
+                gen_helper_float_cvt_l_d(fp0, cpu_env, fp0);
+            }
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CVT_S_W:
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_cvts_w(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CVT_D_W:
+        check_cp1_registers(ctx, fd);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr32(ctx, fp32, fs);
+            gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
+            tcg_temp_free_i32(fp32);
+            gen_store_fpr64(ctx, fp64, fd);
+            tcg_temp_free_i64(fp64);
+        }
+        break;
+    case OPC_CVT_S_L:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp32 = tcg_temp_new_i32();
+            TCGv_i64 fp64 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp64, fs);
+            gen_helper_float_cvts_l(fp32, cpu_env, fp64);
+            tcg_temp_free_i64(fp64);
+            gen_store_fpr32(ctx, fp32, fd);
+            tcg_temp_free_i32(fp32);
+        }
+        break;
+    case OPC_CVT_D_L:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_cvtd_l(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CVT_PS_PW:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_cvtps_pw(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_ADD_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_add_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SUB_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_sub_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MUL_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_mul_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_ABS_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_abs_ps(fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MOV_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_NEG_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_chs_ps(fp0, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MOVCF_PS:
+        check_ps(ctx);
+        gen_movcf_ps(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
+        break;
+    case OPC_MOVZ_PS:
+        check_ps(ctx);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+            }
+            fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+            gen_set_label(l1);
+        }
+        break;
+    case OPC_MOVN_PS:
+        check_ps(ctx);
+        {
+            TCGLabel *l1 = gen_new_label();
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                gen_log_instr_gpr_update(ctx, ft);
+                fp0 = tcg_temp_new_i64();
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+                gen_set_label(l1);
+            }
+        }
+        break;
+    case OPC_ADDR_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, ft);
+            gen_load_fpr64(ctx, fp1, fs);
+            gen_helper_float_addr_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_MULR_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, ft);
+            gen_load_fpr64(ctx, fp1, fs);
+            gen_helper_float_mulr_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RECIP2_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RECIP1_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_recip1_ps(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RSQRT1_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_rsqrt1_ps(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_RSQRT2_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_helper_float_rsqrt2_ps(fp0, cpu_env, fp0, fp1);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CVT_S_PU:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32h(ctx, fp0, fs);
+            gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_CVT_PW_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_helper_float_cvtpw_ps(fp0, cpu_env, fp0);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_CVT_S_PL:
+        check_cp1_64bitmode(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_PLL_PS:
+        check_ps(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_store_fpr32h(ctx, fp0, fd);
+            gen_store_fpr32(ctx, fp1, fd);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+        }
+        break;
+    case OPC_PLU_PS:
+        check_ps(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32h(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+        }
+        break;
+    case OPC_PUL_PS:
+        check_ps(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32h(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+        }
+        break;
+    case OPC_PUU_PS:
+        check_ps(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+
+            gen_load_fpr32h(ctx, fp0, fs);
+            gen_load_fpr32h(ctx, fp1, ft);
+            gen_store_fpr32(ctx, fp1, fd);
+            gen_store_fpr32h(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+        }
+        break;
+    case OPC_CMP_F_PS:
+    case OPC_CMP_UN_PS:
+    case OPC_CMP_EQ_PS:
+    case OPC_CMP_UEQ_PS:
+    case OPC_CMP_OLT_PS:
+    case OPC_CMP_ULT_PS:
+    case OPC_CMP_OLE_PS:
+    case OPC_CMP_ULE_PS:
+    case OPC_CMP_SF_PS:
+    case OPC_CMP_NGLE_PS:
+    case OPC_CMP_SEQ_PS:
+    case OPC_CMP_NGL_PS:
+    case OPC_CMP_LT_PS:
+    case OPC_CMP_NGE_PS:
+    case OPC_CMP_LE_PS:
+    case OPC_CMP_NGT_PS:
+        if (ctx->opcode & (1 << 6)) {
+            gen_cmpabs_ps(ctx, func - 48, ft, fs, cc);
+        } else {
+            gen_cmp_ps(ctx, func - 48, ft, fs, cc);
+        }
+        break;
+    default:
+        MIPS_INVAL("farith");
+        gen_reserved_instruction(ctx);
+        return;
+    }
+}
+
+/* Coprocessor 3 (FPU) */
+static void gen_flt3_ldst(DisasContext *ctx, uint32_t opc,
+                          int fd, int fs, int base, int index)
+{
+    TCGv t0 = tcg_temp_new();
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    TCGv t1 = tcg_temp_new();
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+
+    if (base == 0) {
+        gen_load_gpr(t0, index);
+    } else if (index == 0) {
+        gen_load_gpr(t0, base);
+    } else {
+        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[index]);
+    }
+    /*
+     * Don't do NOP if destination is zero: we must perform the actual
+     * memory access.
+     */
+    switch (opc) {
+    case OPC_LWXC1:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
+            tcg_gen_trunc_tl_i32(fp0, t0);
+            gen_store_fpr32(ctx, fp0, fd);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_LDXC1:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_LUXC1:
+        check_cp1_64bitmode(ctx);
+        tcg_gen_andi_tl(t0, t0, ~0x7);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SWXC1:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_ddc_interposed_st_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEUL);
+            tcg_temp_free_i32(fp0);
+        }
+        break;
+    case OPC_SDXC1:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fs);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    case OPC_SUXC1:
+        check_cp1_64bitmode(ctx);
+        tcg_gen_andi_tl(t0, t0, ~0x7);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
+            tcg_temp_free_i64(fp0);
+        }
+        break;
+    }
+    tcg_temp_free(t0);
+#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
+    tcg_temp_free(t1);
+#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
+}
+
+static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
+                           int fd, int fr, int fs, int ft)
+{
+    switch (opc) {
+    case OPC_ALNV_PS:
+        check_ps(ctx);
+        {
+            TCGv t0 = tcg_temp_local_new();
+            TCGv_i32 fp = tcg_temp_new_i32();
+            TCGv_i32 fph = tcg_temp_new_i32();
+            TCGLabel *l1 = gen_new_label();
+            TCGLabel *l2 = gen_new_label();
+
+            gen_load_gpr(t0, fr);
+            tcg_gen_andi_tl(t0, t0, 0x7);
+
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
+            gen_load_fpr32(ctx, fp, fs);
+            gen_load_fpr32h(ctx, fph, fs);
+            gen_store_fpr32(ctx, fp, fd);
+            gen_store_fpr32h(ctx, fph, fd);
+            tcg_gen_br(l2);
+            gen_set_label(l1);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
+            tcg_temp_free(t0);
+            if (cpu_is_bigendian(ctx)) {
+                gen_load_fpr32(ctx, fp, fs);
+                gen_load_fpr32h(ctx, fph, ft);
+                gen_store_fpr32h(ctx, fp, fd);
+                gen_store_fpr32(ctx, fph, fd);
+            } else {
+                gen_load_fpr32h(ctx, fph, fs);
+                gen_load_fpr32(ctx, fp, ft);
+                gen_store_fpr32(ctx, fph, fd);
+                gen_store_fpr32h(ctx, fp, fd);
+            }
+            gen_set_label(l2);
+            tcg_temp_free_i32(fp);
+            tcg_temp_free_i32(fph);
+        }
+        break;
+    case OPC_MADD_S:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
+            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+        }
+        break;
+    case OPC_MADD_D:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd | fs | ft | fr);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_MADD_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_MSUB_S:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
+            gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+        }
+        break;
+    case OPC_MSUB_D:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd | fs | ft | fr);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_MSUB_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_NMADD_S:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
+            gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+        }
+        break;
+    case OPC_NMADD_D:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd | fs | ft | fr);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_NMADD_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_NMSUB_S:
+        check_cop1x(ctx);
+        {
+            TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv_i32 fp1 = tcg_temp_new_i32();
+            TCGv_i32 fp2 = tcg_temp_new_i32();
+
+            gen_load_fpr32(ctx, fp0, fs);
+            gen_load_fpr32(ctx, fp1, ft);
+            gen_load_fpr32(ctx, fp2, fr);
+            gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i32(fp0);
+            tcg_temp_free_i32(fp1);
+            gen_store_fpr32(ctx, fp2, fd);
+            tcg_temp_free_i32(fp2);
+        }
+        break;
+    case OPC_NMSUB_D:
+        check_cop1x(ctx);
+        check_cp1_registers(ctx, fd | fs | ft | fr);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    case OPC_NMSUB_PS:
+        check_ps(ctx);
+        {
+            TCGv_i64 fp0 = tcg_temp_new_i64();
+            TCGv_i64 fp1 = tcg_temp_new_i64();
+            TCGv_i64 fp2 = tcg_temp_new_i64();
+
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_load_fpr64(ctx, fp1, ft);
+            gen_load_fpr64(ctx, fp2, fr);
+            gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            tcg_temp_free_i64(fp0);
+            tcg_temp_free_i64(fp1);
+            gen_store_fpr64(ctx, fp2, fd);
+            tcg_temp_free_i64(fp2);
+        }
+        break;
+    default:
+        MIPS_INVAL("flt3_arith");
+        gen_reserved_instruction(ctx);
+        return;
+    }
+}
+
+void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
+{
+    TCGv t0;
+
+#if !defined(CONFIG_USER_ONLY)
+    /*
+     * The Linux kernel will emulate rdhwr if it's not supported natively.
+     * Therefore only check the ISA in system mode.
+     */
+    check_insn(ctx, ISA_MIPS_R2);
+#endif
+    t0 = tcg_temp_new();
+
+    switch (rd) {
+    case 0:
+        gen_helper_rdhwr_cpunum(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 1:
+        gen_helper_rdhwr_synci_step(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 2:
+        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+            gen_io_start();
+        }
+        gen_helper_rdhwr_cc(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        /*
+         * Break the TB to be able to take timer interrupts immediately
+         * after reading count. DISAS_STOP isn't sufficient, we need to ensure
+         * we break completely out of translated code.
+         */
+        gen_save_pc(ctx->base.pc_next + 4);
+        ctx->base.is_jmp = DISAS_EXIT;
+        break;
+    case 3:
+        gen_helper_rdhwr_ccres(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+#if !defined(TARGET_CHERI)
+    case 4:
+        check_insn(ctx, ISA_MIPS_R6);
+        if (sel != 0) {
+            /*
+             * Performance counter registers are not implemented other than
+             * control register 0.
+             */
+            generate_exception(ctx, EXCP_RI);
+        }
+        gen_helper_rdhwr_performance(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 5:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_helper_rdhwr_xnp(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+#endif
+    case 29:
+#if defined(CONFIG_USER_ONLY)
+        tcg_gen_ld_tl(t0, cpu_env,
+                      offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+        gen_store_gpr(t0, rt);
+        break;
+#else
+        if ((ctx->hflags & MIPS_HFLAG_CP0) ||
+            (ctx->hflags & MIPS_HFLAG_HWRENA_ULR)) {
+            tcg_gen_ld_tl(t0, cpu_env,
+                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
+            gen_store_gpr(t0, rt);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+        break;
+#endif
+#if defined(TARGET_CHERI)
+    /*
+     * Fake registers to keep libstatcounters from triggering segfaultr
+     */
+    case 4: /* ICOUNT */
+        gen_helper_1e0i(rdhwr_statcounters_icount, t0, sel);
+        gen_store_gpr(t0, rt);
+        break;
+    case 5: /* ITLB MISS */
+        gen_helper_rdhwr_statcounters_itlb_miss(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 6: /* DTLB MISS */
+        gen_helper_rdhwr_statcounters_dtlb_miss(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 7: /* RESET */
+        gen_helper_rdhwr_statcounters_reset(t0, cpu_env);
+        gen_store_gpr(t0, rt);
+        break;
+    case 11:
+        gen_helper_1e0i(rdhwr_statcounters_memory, t0, sel);
+        gen_store_gpr(t0, rt);
+        break;
+    case 8:
+    case 9:
+    case 10:
+    case 12:
+    case 13:
+    case 14:
+        gen_helper_1e0i(rdhwr_statcounters_ignored, t0, rd);
+        gen_store_gpr(t0, rt);
+        break;
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("rdhwr");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+    tcg_temp_free(t0);
+}
+
+static inline void clear_branch_hflags(DisasContext *ctx)
+{
+    ctx->hflags &= ~MIPS_HFLAG_BMASK;
+    if (ctx->base.is_jmp == DISAS_NEXT) {
+        save_cpu_state(ctx, 0);
+    } else {
+        /*
+         * It is not safe to save ctx->hflags as hflags may be changed
+         * in execution time by the instruction in delay / forbidden slot.
+         */
+        tcg_gen_andi_i32(hflags, hflags, ~MIPS_HFLAG_BMASK);
+    }
+}
+
+static void gen_branch(DisasContext *ctx, int insn_bytes)
+{
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        int proc_hflags = ctx->hflags & MIPS_HFLAG_BMASK;
+        /* Branches completion */
+        clear_branch_hflags(ctx);
+        ctx->base.is_jmp = DISAS_NORETURN;
+        /* FIXME: Need to clear can_do_io.  */
+        switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) {
+        case MIPS_HFLAG_FBNSLOT:
+            gen_goto_tb(ctx, 0, ctx->base.pc_next + insn_bytes);
+            break;
+        case MIPS_HFLAG_B:
+            /* unconditional branch */
+            if (proc_hflags & MIPS_HFLAG_BX) {
+                tcg_gen_xori_i32(hflags, hflags, MIPS_HFLAG_M16);
+            }
+            gen_goto_tb(ctx, 0, ctx->btarget);
+            break;
+        case MIPS_HFLAG_BL:
+            /* blikely taken case */
+            gen_goto_tb(ctx, 0, ctx->btarget);
+            break;
+        case MIPS_HFLAG_BC:
+            /* Conditional branch */
+            {
+                TCGLabel *l1 = gen_new_label();
+
+                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
+                gen_goto_tb(ctx, 1, ctx->base.pc_next + insn_bytes);
+                gen_set_label(l1);
+                gen_goto_tb(ctx, 0, ctx->btarget);
+            }
+            break;
+        case MIPS_HFLAG_BR:
+            /* unconditional branch to register */
+            if (ctx->insn_flags & (ASE_MIPS16 | ASE_MICROMIPS)) {
+                TCGv t0 = tcg_temp_new();
+                TCGv_i32 t1 = tcg_temp_new_i32();
+
+                tcg_gen_andi_tl(t0, btarget, 0x1);
+                tcg_gen_trunc_tl_i32(t1, t0);
+                tcg_temp_free(t0);
+                tcg_gen_andi_i32(hflags, hflags, ~(uint32_t)MIPS_HFLAG_M16);
+                tcg_gen_shli_i32(t1, t1, MIPS_HFLAG_M16_SHIFT);
+                tcg_gen_or_i32(hflags, hflags, t1);
+                tcg_temp_free_i32(t1);
+
+                tcg_gen_andi_tl(cpu_PC, btarget, ~(target_ulong)0x1);
+            } else {
+                tcg_gen_mov_tl(cpu_PC, btarget);
+            }
+#ifdef CONFIG_DEBUG_TCG
+            tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
+#endif
+            tcg_gen_lookup_and_goto_ptr();
+            break;
+#ifdef TARGET_CHERI
+        case MIPS_HFLAG_BRCCALL:
+            /* unconditional branch to capability register from a ccall.
+             * Can fall through since otype and seal are not copied anyway.
+             */
+            /* fallthrough */
+        case MIPS_HFLAG_BRC:
+            /* unconditional branch to capability register */
+
+            tcg_gen_mov_tl(cpu_PC, btarget);
+            /* Update PCC with capability register */
+            gen_helper_copy_cap_btarget_to_pcc(cpu_env);
+#ifdef CONFIG_DEBUG_TCG
+            tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
+#endif
+
+            if (ctx->base.singlestep_enabled) {
+                save_cpu_state(ctx, 0);
+                gen_helper_raise_exception(cpu_env,
+                                           tcg_constant_i32(EXCP_DEBUG));
+            }
+            tcg_gen_exit_tb(NULL, 0);
+            break;
+#endif /* TARGET_CHERI */
+        default:
+            LOG_DISAS("unknown branch 0x%x\n", proc_hflags);
+            gen_reserved_instruction(ctx);
+        }
+    }
+}
+
+/* Compact Branches */
+static void gen_compute_compact_branch(DisasContext *ctx, uint32_t opc,
+                                       int rs, int rt, int32_t offset)
+{
+    int bcond_compute = 0;
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    int m16_lowbit = (ctx->hflags & MIPS_HFLAG_M16) != 0;
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+#ifdef MIPS_DEBUG_DISAS
+        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
+                  "\n", ctx->base.pc_next);
+#endif
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    /* Load needed operands and calculate btarget */
+    switch (opc) {
+    /* compact branch */
+    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
+    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        if (rs <= rt && rs == 0) {
+            /* OPC_BEQZALC, OPC_BNEZALC */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
+            gen_log_instr_gpr_update(ctx, 31);
+        }
+        break;
+    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
+    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
+    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
+        if (rs == 0 || rs == rt) {
+            /* OPC_BLEZALC, OPC_BGEZALC */
+            /* OPC_BGTZALC, OPC_BLTZALC */
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
+            gen_log_instr_gpr_update(ctx, 31);
+        }
+        gen_load_gpr(t0, rs);
+        gen_load_gpr(t1, rt);
+        bcond_compute = 1;
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BC:
+    case OPC_BALC:
+        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        break;
+    case OPC_BEQZC:
+    case OPC_BNEZC:
+        if (rs != 0) {
+            /* OPC_BEQZC, OPC_BNEZC */
+            gen_load_gpr(t0, rs);
+            bcond_compute = 1;
+            ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
+        } else {
+            /* OPC_JIC, OPC_JIALC */
+            TCGv tbase = tcg_temp_new();
+            TCGv toffset = tcg_constant_tl(offset);
+
+            gen_load_gpr(tbase, rt);
+            gen_op_addr_add(ctx, btarget, tbase, toffset);
+            tcg_temp_free(tbase);
+        }
+        break;
+    default:
+        MIPS_INVAL("Compact branch/jump");
+        gen_reserved_instruction(ctx);
+        goto out;
+    }
+
+    if (bcond_compute == 0) {
+        /* Unconditional compact branch */
+        switch (opc) {
+        case OPC_JIALC:
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
+            gen_log_instr_gpr_update(ctx, 31);
+            /* Fallthrough */
+        case OPC_JIC:
+            ctx->hflags |= MIPS_HFLAG_BR;
+            break;
+        case OPC_BALC:
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
+            gen_log_instr_gpr_update(ctx, 31);
+            /* Fallthrough */
+        case OPC_BC:
+            ctx->hflags |= MIPS_HFLAG_B;
+            break;
+        default:
+            MIPS_INVAL("Compact branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+        // Bounds check against $pcc for unconditional branches.
+        gen_check_branch_target(ctx, ctx->btarget);
+        /* Generating branch here as compact branches don't have delay slot */
+        gen_branch(ctx, 4);
+    } else {
+        /* Conditional compact branch */
+        TCGLabel *fs = gen_new_label();
+        save_cpu_state(ctx, 0);
+
+        switch (opc) {
+        case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
+            }
+            break;
+        case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZALC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTUC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
+            }
+            break;
+        case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BLEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BGEZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
+            } else {
+                /* OPC_BGEC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
+            }
+            break;
+        case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
+            if (rs == 0 && rt != 0) {
+                /* OPC_BGTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
+            } else if (rs != 0 && rt != 0 && rs == rt) {
+                /* OPC_BLTZC */
+                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
+            } else {
+                /* OPC_BLTC */
+                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
+            }
+            break;
+        case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
+        case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
+            if (rs >= rt) {
+                /* OPC_BOVC, OPC_BNVC */
+                TCGv t2 = tcg_temp_new();
+                TCGv t3 = tcg_temp_new();
+                TCGv t4 = tcg_temp_new();
+                TCGv input_overflow = tcg_temp_new();
+
+                gen_load_gpr(t0, rs);
+                gen_load_gpr(t1, rt);
+                tcg_gen_ext32s_tl(t2, t0);
+                tcg_gen_setcond_tl(TCG_COND_NE, input_overflow, t2, t0);
+                tcg_gen_ext32s_tl(t3, t1);
+                tcg_gen_setcond_tl(TCG_COND_NE, t4, t3, t1);
+                tcg_gen_or_tl(input_overflow, input_overflow, t4);
+
+                tcg_gen_add_tl(t4, t2, t3);
+                tcg_gen_ext32s_tl(t4, t4);
+                tcg_gen_xor_tl(t2, t2, t3);
+                tcg_gen_xor_tl(t3, t4, t3);
+                tcg_gen_andc_tl(t2, t3, t2);
+                tcg_gen_setcondi_tl(TCG_COND_LT, t4, t2, 0);
+                tcg_gen_or_tl(t4, t4, input_overflow);
+                if (opc == OPC_BOVC) {
+                    /* OPC_BOVC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t4, 0, fs);
+                } else {
+                    /* OPC_BNVC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t4, 0, fs);
+                }
+                tcg_temp_free(input_overflow);
+                tcg_temp_free(t4);
+                tcg_temp_free(t3);
+                tcg_temp_free(t2);
+            } else if (rs < rt && rs == 0) {
+                /* OPC_BEQZALC, OPC_BNEZALC */
+                if (opc == OPC_BEQZALC) {
+                    /* OPC_BEQZALC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t1, 0, fs);
+                } else {
+                    /* OPC_BNEZALC */
+                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t1, 0, fs);
+                }
+            } else {
+                /* OPC_BEQC, OPC_BNEC */
+                if (opc == OPC_BEQC) {
+                    /* OPC_BEQC */
+                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_EQ), t0, t1, fs);
+                } else {
+                    /* OPC_BNEC */
+                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_NE), t0, t1, fs);
+                }
+            }
+            break;
+        case OPC_BEQZC:
+            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
+            break;
+        case OPC_BNEZC:
+            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t0, 0, fs);
+            break;
+        default:
+            MIPS_INVAL("Compact conditional branch/jump");
+            gen_reserved_instruction(ctx);
+            goto out;
+        }
+        // Bounds check against $pcc for conditional branches.
+        gen_check_cond_branch_target(ctx, bcond, ctx->btarget);
+
+        /* Generating branch here as compact branches don't have delay slot */
+        gen_goto_tb(ctx, 1, ctx->btarget);
+        gen_set_label(fs);
+
+        ctx->hflags |= MIPS_HFLAG_FBNSLOT;
+    }
+
+out:
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+void gen_addiupc(DisasContext *ctx, int rx, int imm,
+                 int is_64_bit, int extended)
+{
+    TCGv t0;
+
+    if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
+        gen_reserved_instruction(ctx);
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    tcg_gen_movi_tl(t0, pc_relative_pc(ctx));
+    tcg_gen_addi_tl(cpu_gpr[rx], t0, imm);
+    if (!is_64_bit) {
+        tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
+    }
+    gen_log_instr_gpr_update(ctx, rx);
+
+    tcg_temp_free(t0);
+}
+
+static void gen_cache_operation(DisasContext *ctx, uint32_t op, int base,
+                                int16_t offset)
+{
+    TCGv_i32 t0 = tcg_const_i32(op);
+    TCGv t1 = tcg_temp_new();
+    gen_base_offset_addr(ctx, t1, base, offset);
+    gen_helper_cache(cpu_env, t1, t0);
+    tcg_temp_free(t1);
+    tcg_temp_free_i32(t0);
+}
+
+static inline bool is_uhi(int sdbbp_code)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;
+#else
+    return semihosting_enabled() && sdbbp_code == 1;
+#endif
+}
+
+#ifdef CONFIG_USER_ONLY
+/* The above should dead-code away any calls to this..*/
+static inline void gen_helper_do_semihosting(void *env)
+{
+    g_assert_not_reached();
+}
+#endif
+
+void gen_ldxs(DisasContext *ctx, int base, int index, int rd)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, base);
+
+    if (index != 0) {
+        gen_load_gpr(t1, index);
+        tcg_gen_shli_tl(t1, t1, 2);
+        gen_op_addr_add(ctx, t0, t1, t0);
+    }
+    gen_ddc_interposed_ld_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
+    gen_store_gpr(t1, rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+static void gen_sync(int stype)
+{
+    TCGBar tcg_mo = TCG_BAR_SC;
+
+    switch (stype) {
+    case 0x4: /* SYNC_WMB */
+        tcg_mo |= TCG_MO_ST_ST;
+        break;
+    case 0x10: /* SYNC_MB */
+        tcg_mo |= TCG_MO_ALL;
+        break;
+    case 0x11: /* SYNC_ACQUIRE */
+        tcg_mo |= TCG_MO_LD_LD | TCG_MO_LD_ST;
+        break;
+    case 0x12: /* SYNC_RELEASE */
+        tcg_mo |= TCG_MO_ST_ST | TCG_MO_LD_ST;
+        break;
+    case 0x13: /* SYNC_RMB */
+        tcg_mo |= TCG_MO_LD_LD;
+        break;
+    default:
+        tcg_mo |= TCG_MO_ALL;
+        break;
+    }
+
+    tcg_gen_mb(tcg_mo);
+}
+
+/* ISA extensions (ASEs) */
+
+/* MIPS16 extension to MIPS32 */
+#ifndef TARGET_CHERI
+#include "mips16e_translate.c.inc"
+#endif
+
+/* microMIPS extension to MIPS32/MIPS64 */
+
+/*
+ * Values for microMIPS fmt field.  Variable-width, depending on which
+ * formats the instruction supports.
+ */
+enum {
+    FMT_SD_S = 0,
+    FMT_SD_D = 1,
+
+    FMT_SDPS_S = 0,
+    FMT_SDPS_D = 1,
+    FMT_SDPS_PS = 2,
+
+    FMT_SWL_S = 0,
+    FMT_SWL_W = 1,
+    FMT_SWL_L = 2,
+
+    FMT_DWL_D = 0,
+    FMT_DWL_W = 1,
+    FMT_DWL_L = 2
+};
+
+#ifndef TARGET_CHERI
+#include "micromips_translate.c.inc"
+
+#include "nanomips_translate.c.inc"
+#endif
+
+/* MIPSDSP functions. */
+static void gen_mipsdsp_ld(DisasContext *ctx, uint32_t opc,
+                           int rd, int base, int offset)
+{
+    TCGv t0;
+
+    check_dsp(ctx);
+    t0 = tcg_temp_new();
+
+    if (base == 0) {
+        gen_load_gpr(t0, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(t0, base);
+    } else {
+        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]);
+    }
+
+    switch (opc) {
+    case OPC_LBUX:
+        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_UB);
+        gen_store_gpr(t0, rd);
+        break;
+    case OPC_LHX:
+        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESW);
+        gen_store_gpr(t0, rd);
+        break;
+    case OPC_LWX:
+        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
+        gen_store_gpr(t0, rd);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_LDX:
+        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
+        gen_store_gpr(t0, rd);
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+}
+
+static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                              int ret, int v1, int v2)
+{
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */
+    case OPC_MULT_G_2E:
+        check_dsp_r2(ctx);
+        switch (op2) {
+        case OPC_ADDUH_QB:
+            gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_QB:
+            gen_helper_adduh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_PH:
+            gen_helper_addqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_PH:
+            gen_helper_addqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_W:
+            gen_helper_addqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_W:
+            gen_helper_addqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_QB:
+            gen_helper_subuh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_QB:
+            gen_helper_subuh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_PH:
+            gen_helper_subqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_PH:
+            gen_helper_subqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_W:
+            gen_helper_subqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_W:
+            gen_helper_subqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_ABSQ_S_QB:
+            check_dsp_r2(ctx);
+            gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_absq_s_ph(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_W:
+            check_dsp(ctx);
+            gen_helper_absq_s_w(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_PRECEQ_W_PHL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFF0000);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQ_W_PHR:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0x0000FFFF);
+            tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 16);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        }
+        break;
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_ADDQ_PH:
+            check_dsp(ctx);
+            gen_helper_addq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_addq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_W:
+            check_dsp(ctx);
+            gen_helper_addq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QB:
+            check_dsp(ctx);
+            gen_helper_addu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QB:
+            check_dsp(ctx);
+            gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_PH:
+            check_dsp_r2(ctx);
+            gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_PH:
+            check_dsp_r2(ctx);
+            gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_PH:
+            check_dsp(ctx);
+            gen_helper_subq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_subq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_W:
+            check_dsp(ctx);
+            gen_helper_subq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QB:
+            check_dsp(ctx);
+            gen_helper_subu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QB:
+            check_dsp(ctx);
+            gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_PH:
+            check_dsp_r2(ctx);
+            gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_PH:
+            check_dsp_r2(ctx);
+            gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDSC:
+            check_dsp(ctx);
+            gen_helper_addsc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDWC:
+            check_dsp(ctx);
+            gen_helper_addwc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MODSUB:
+            check_dsp(ctx);
+            gen_helper_modsub(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_RADDU_W_QB:
+            check_dsp(ctx);
+            gen_helper_raddu_w_qb(cpu_gpr[ret], v1_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_PRECR_QB_PH:
+            check_dsp_r2(ctx);
+            gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_PH_W:
+            check_dsp_r2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                          cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_PH_W:
+            check_dsp_r2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                            cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECRQ_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_ph_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_ph_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_qb_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_PRECEQ_L_PWL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFFFFFF00000000ull);
+            break;
+        case OPC_PRECEQ_L_PWR:
+            check_dsp(ctx);
+            tcg_gen_shli_tl(cpu_gpr[ret], v2_t, 32);
+            break;
+        case OPC_PRECEQ_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHLA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHRA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_ABSQ_S_OB:
+            check_dsp_r2(ctx);
+            gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_absq_s_pw(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_absq_s_qh(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_RADDU_L_OB:
+            check_dsp(ctx);
+            gen_helper_raddu_l_ob(cpu_gpr[ret], v1_t);
+            break;
+        case OPC_SUBQ_PW:
+            check_dsp(ctx);
+            gen_helper_subq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_subq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_QH:
+            check_dsp(ctx);
+            gen_helper_subq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_subq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_OB:
+            check_dsp(ctx);
+            gen_helper_subu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_OB:
+            check_dsp(ctx);
+            gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QH:
+            check_dsp_r2(ctx);
+            gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QH:
+            check_dsp_r2(ctx);
+            gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBUH_OB:
+            check_dsp_r2(ctx);
+            gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_OB:
+            check_dsp_r2(ctx);
+            gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQ_PW:
+            check_dsp(ctx);
+            gen_helper_addq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_addq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_QH:
+            check_dsp(ctx);
+            gen_helper_addq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_addq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_OB:
+            check_dsp(ctx);
+            gen_helper_addu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_OB:
+            check_dsp(ctx);
+            gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QH:
+            check_dsp_r2(ctx);
+            gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QH:
+            check_dsp_r2(ctx);
+            gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDUH_OB:
+            check_dsp_r2(ctx);
+            gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_OB:
+            check_dsp_r2(ctx);
+            gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_PRECR_OB_QH:
+            check_dsp_r2(ctx);
+            gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_QH_PW:
+            check_dsp_r2(ctx);
+            {
+                TCGv_i32 ret_t = tcg_const_i32(ret);
+                gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t);
+                tcg_temp_free_i32(ret_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_QH_PW:
+            check_dsp_r2(ctx);
+            {
+                TCGv_i32 sa_v = tcg_const_i32(ret);
+                gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v);
+                tcg_temp_free_i32(sa_v);
+                break;
+            }
+        case OPC_PRECRQ_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrq_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_PW_L:
+            check_dsp(ctx);
+            gen_helper_precrq_pw_l(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_qh_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_qh_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_ob_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+}
+
+static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc,
+                              int ret, int v1, int v2)
+{
+    uint32_t op2;
+    TCGv t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_tl(t0, v1);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (opc) {
+    case OPC_SHLL_QB_DSP:
+        {
+            op2 = MASK_SHLL_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_SHLL_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHRL_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRL_PH:
+                check_dsp_r2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_PH:
+                check_dsp_r2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_QB:
+                check_dsp_r2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_QB:
+                check_dsp_r2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_QB:
+                check_dsp_r2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_QB:
+                check_dsp_r2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK SHLL.QB");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            break;
+        }
+#ifdef TARGET_MIPS64
+    case OPC_SHLL_OB_DSP:
+        op2 = MASK_SHLL_OB(ctx->opcode);
+        switch (op2) {
+        case OPC_SHLL_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHRA_OB:
+            check_dsp_r2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_OB:
+            check_dsp_r2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_OB:
+            check_dsp_r2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_OB:
+            check_dsp_r2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_QH:
+            check_dsp_r2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_QH:
+            check_dsp_r2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK SHLL.OB");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+}
+
+static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                 int ret, int v1, int v2, int check_ret)
+{
+    TCGv_i32 t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new_i32();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_i32(t0, ret);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /*
+     * OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+     * the same mask and op1.
+     */
+    case OPC_MULT_G_2E:
+        check_dsp_r2(ctx);
+        switch (op2) {
+        case  OPC_MUL_PH:
+            gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case  OPC_MUL_S_PH:
+            gen_helper_mul_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_W:
+            gen_helper_mulq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_W:
+            gen_helper_mulq_rs_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_DPA_W_PH_DSP:
+        switch (op2) {
+        case OPC_DPAU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAX_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_S_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_SA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPS_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSX_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_S_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_SA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_mulsaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpaq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpsq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSA_W_PH:
+            check_dsp_r2(ctx);
+            gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DPAQ_W_QH_DSP:
+        {
+            int ac = ret & 0x03;
+            tcg_gen_movi_i32(t0, ac);
+
+            switch (op2) {
+            case OPC_DMADD:
+                check_dsp(ctx);
+                gen_helper_dmadd(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMADDU:
+                check_dsp(ctx);
+                gen_helper_dmaddu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUB:
+                check_dsp(ctx);
+                gen_helper_dmsub(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUBU:
+                check_dsp(ctx);
+                gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPA_W_QH:
+                check_dsp_r2(ctx);
+                gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpaq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPS_W_QH:
+                check_dsp_r2(ctx);
+                gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpsq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpsq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWL:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWR:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_L_PW:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            }
+        }
+        break;
+#endif
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_MULEU_S_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_PH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_PH:
+            check_dsp_r2(ctx);
+            gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_MULEQ_S_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_QH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+}
+
+static void gen_mipsdsp_bitinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                int ret, int val)
+{
+    int16_t imm;
+    TCGv t0;
+    TCGv val_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    val_t = tcg_temp_new();
+    gen_load_gpr(val_t, val);
+
+    switch (op1) {
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_BITREV:
+            check_dsp(ctx);
+            gen_helper_bitrev(cpu_gpr[ret], val_t);
+            break;
+        case OPC_REPL_QB:
+            check_dsp(ctx);
+            {
+                target_long result;
+                imm = (ctx->opcode >> 16) & 0xFF;
+                result = (uint32_t)imm << 24 |
+                         (uint32_t)imm << 16 |
+                         (uint32_t)imm << 8  |
+                         (uint32_t)imm;
+                result = (int32_t)result;
+                tcg_gen_movi_tl(cpu_gpr[ret], result);
+            }
+            break;
+        case OPC_REPLV_QB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_REPL_PH:
+            check_dsp(ctx);
+            {
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+                tcg_gen_movi_tl(cpu_gpr[ret], \
+                                (target_long)((int32_t)imm << 16 | \
+                                (uint16_t)imm));
+            }
+            break;
+        case OPC_REPLV_PH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_REPL_OB:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0xFF;
+                temp = ((uint64_t)imm << 8) | (uint64_t)imm;
+                temp = (temp << 16) | temp;
+                temp = (temp << 32) | temp;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_PW:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+                temp = ((target_long)imm << 32) \
+                       | ((target_long)imm & 0xFFFFFFFF);
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_QH:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+
+                temp = ((uint64_t)(uint16_t)imm << 48) |
+                       ((uint64_t)(uint16_t)imm << 32) |
+                       ((uint64_t)(uint16_t)imm << 16) |
+                       (uint64_t)(uint16_t)imm;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPLV_OB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_PW:
+            check_dsp(ctx);
+            tcg_gen_ext32u_i64(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_QH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        }
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(val_t);
+}
+
+static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx,
+                                     uint32_t op1, uint32_t op2,
+                                     int ret, int v1, int v2, int check_ret)
+{
+    TCGv t1;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_CMPU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGDU_EQ_QB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LT_QB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LE_QB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgu_le_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMP_EQ_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QB:
+            check_dsp(ctx);
+            gen_helper_pick_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PH:
+            check_dsp(ctx);
+            gen_helper_pick_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PH:
+            check_dsp(ctx);
+            gen_helper_packrl_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_CMP_EQ_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_le_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_EQ_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_EQ_OB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LT_OB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LE_OB:
+            check_dsp_r2(ctx);
+            gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PW:
+            check_dsp(ctx);
+            gen_helper_packrl_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PICK_OB:
+            check_dsp(ctx);
+            gen_helper_pick_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PW:
+            check_dsp(ctx);
+            gen_helper_pick_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QH:
+            check_dsp(ctx);
+            gen_helper_pick_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+}
+
+static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx,
+                               uint32_t op1, int rt, int rs, int sa)
+{
+    TCGv t0;
+
+    check_dsp_r2(ctx);
+
+    if (rt == 0) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+
+    switch (op1) {
+    case OPC_APPEND_DSP:
+        switch (MASK_APPEND(ctx->opcode)) {
+        case OPC_APPEND:
+            if (sa != 0) {
+                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 32 - sa);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        case OPC_PREPEND:
+            if (sa != 0) {
+                tcg_gen_ext32u_tl(cpu_gpr[rt], cpu_gpr[rt]);
+                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
+                tcg_gen_shli_tl(t0, t0, 32 - sa);
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        case OPC_BALIGN:
+            sa &= 3;
+            if (sa != 0 && sa != 2) {
+                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
+                tcg_gen_ext32u_tl(t0, t0);
+                tcg_gen_shri_tl(t0, t0, 8 * (4 - sa));
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK APPEND");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DAPPEND_DSP:
+        switch (MASK_DAPPEND(ctx->opcode)) {
+        case OPC_DAPPEND:
+            if (sa != 0) {
+                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 64 - sa);
+            }
+            break;
+        case OPC_PREPENDD:
+            tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], 0x20 | sa);
+            tcg_gen_shli_tl(t0, t0, 64 - (0x20 | sa));
+            tcg_gen_or_tl(cpu_gpr[rt], t0, t0);
+            break;
+        case OPC_PREPENDW:
+            if (sa != 0) {
+                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
+                tcg_gen_shli_tl(t0, t0, 64 - sa);
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            break;
+        case OPC_DBALIGN:
+            sa &= 7;
+            if (sa != 0 && sa != 2 && sa != 4) {
+                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
+                tcg_gen_shri_tl(t0, t0, 8 * (8 - sa));
+                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
+            }
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DAPPEND");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+}
+
+static void gen_mipsdsp_accinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                int ret, int v1, int v2, int check_ret)
+
+{
+    TCGv t0;
+    TCGv t1;
+    TCGv v1_t;
+    int16_t imm;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+
+    switch (op1) {
+    case OPC_EXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_EXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_SHILO:
+            imm = (ctx->opcode >> 20) & 0x3F;
+            tcg_gen_movi_tl(t0, ret);
+            tcg_gen_movi_tl(t1, imm);
+            gen_helper_shilo(t0, t1, cpu_env);
+            break;
+        case OPC_SHILOV:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_shilo(t0, v1_t, cpu_env);
+            break;
+        case OPC_MTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_mthlip(t0, v1_t, cpu_env);
+            break;
+        case OPC_WRDSP:
+            imm = (ctx->opcode >> 11) & 0x3FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_wrdsp(v1_t, t0, cpu_env);
+            break;
+        case OPC_RDDSP:
+            imm = (ctx->opcode >> 16) & 0x03FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_rddsp(cpu_gpr[ret], t0, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DEXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_DMTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_dmthlip(v1_t, t0, cpu_env);
+            break;
+        case OPC_DSHILO:
+            {
+                int shift = (ctx->opcode >> 19) & 0x7F;
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, shift);
+                tcg_gen_movi_tl(t1, ac);
+                gen_helper_dshilo(t0, t1, cpu_env);
+                break;
+            }
+        case OPC_DSHILOV:
+            {
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, ac);
+                gen_helper_dshilo(v1_t, t0, cpu_env);
+                break;
+            }
+        case OPC_DEXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+
+            gen_helper_dextp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTR_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+}
+
+/* End MIPSDSP functions. */
+
+static void decode_opc_special_r6(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd, sa;
+    uint32_t op1, op2;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 6) & 0x1f;
+
+    op1 = MASK_SPECIAL(ctx->opcode);
+    switch (op1) {
+    case OPC_MULT:
+    case OPC_MULTU:
+    case OPC_DIV:
+    case OPC_DIVU:
+        op2 = MASK_R6_MULDIV(ctx->opcode);
+        switch (op2) {
+        case R6_OPC_MUL:
+        case R6_OPC_MUH:
+        case R6_OPC_MULU:
+        case R6_OPC_MUHU:
+        case R6_OPC_DIV:
+        case R6_OPC_MOD:
+        case R6_OPC_DIVU:
+        case R6_OPC_MODU:
+            gen_r6_muldiv(ctx, op2, rd, rs, rt);
+            break;
+        default:
+            MIPS_INVAL("special_r6 muldiv");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_SELEQZ:
+    case OPC_SELNEZ:
+        gen_cond_move(ctx, op1, rd, rs, rt);
+        break;
+    case R6_OPC_CLO:
+    case R6_OPC_CLZ:
+        if (rt == 0 && sa == 1) {
+            /*
+             * Major opcode and function field is shared with preR6 MFHI/MTHI.
+             * We need additionally to check other fields.
+             */
+            gen_cl(ctx, op1, rd, rs);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+        break;
+    case R6_OPC_SDBBP:
+        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
+            gen_helper_do_semihosting(cpu_env);
+        } else {
+            if (ctx->hflags & MIPS_HFLAG_SBRI) {
+                gen_reserved_instruction(ctx);
+            } else {
+                generate_exception_end(ctx, EXCP_DBp);
+            }
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case R6_OPC_DCLO:
+    case R6_OPC_DCLZ:
+        if (rt == 0 && sa == 1) {
+            /*
+             * Major opcode and function field is shared with preR6 MFHI/MTHI.
+             * We need additionally to check other fields.
+             */
+            check_mips_64(ctx);
+            gen_cl(ctx, op1, rd, rs);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+        break;
+    case OPC_DMULT:
+    case OPC_DMULTU:
+    case OPC_DDIV:
+    case OPC_DDIVU:
+
+        op2 = MASK_R6_MULDIV(ctx->opcode);
+        switch (op2) {
+        case R6_OPC_DMUL:
+        case R6_OPC_DMUH:
+        case R6_OPC_DMULU:
+        case R6_OPC_DMUHU:
+        case R6_OPC_DDIV:
+        case R6_OPC_DMOD:
+        case R6_OPC_DDIVU:
+        case R6_OPC_DMODU:
+            check_mips_64(ctx);
+            gen_r6_muldiv(ctx, op2, rd, rs, rt);
+            break;
+        default:
+            MIPS_INVAL("special_r6 muldiv");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("special_r6");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_special_tx79(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs = extract32(ctx->opcode, 21, 5);
+    int rt = extract32(ctx->opcode, 16, 5);
+    int rd = extract32(ctx->opcode, 11, 5);
+    uint32_t op1 = MASK_SPECIAL(ctx->opcode);
+
+    switch (op1) {
+    case OPC_MOVN:         /* Conditional move */
+    case OPC_MOVZ:
+        gen_cond_move(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_MFHI:          /* Move from HI/LO */
+    case OPC_MFLO:
+        gen_HILO(ctx, op1, 0, rd);
+        break;
+    case OPC_MTHI:
+    case OPC_MTLO:          /* Move to HI/LO */
+        gen_HILO(ctx, op1, 0, rs);
+        break;
+    case OPC_MULT:
+    case OPC_MULTU:
+        gen_mul_txx9(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_DIV:
+    case OPC_DIVU:
+        gen_muldiv(ctx, op1, 0, rs, rt);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT:
+    case OPC_DMULTU:
+    case OPC_DDIV:
+    case OPC_DDIVU:
+        check_insn_opc_user_only(ctx, INSN_R5900);
+        gen_muldiv(ctx, op1, 0, rs, rt);
+        break;
+#endif
+    case OPC_JR:
+        gen_compute_branch(ctx, op1, 4, rs, 0, 0, 4);
+        break;
+    default:            /* Invalid */
+        MIPS_INVAL("special_tx79");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd;
+    uint32_t op1;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+
+    op1 = MASK_SPECIAL(ctx->opcode);
+    switch (op1) {
+    case OPC_MOVN:         /* Conditional move */
+    case OPC_MOVZ:
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1 |
+                   INSN_LOONGSON2E | INSN_LOONGSON2F);
+        gen_cond_move(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_MFHI:          /* Move from HI/LO */
+    case OPC_MFLO:
+        gen_HILO(ctx, op1, rs & 3, rd);
+        break;
+    case OPC_MTHI:
+    case OPC_MTLO:          /* Move to HI/LO */
+        gen_HILO(ctx, op1, rd & 3, rs);
+        break;
+    case OPC_MOVCI:
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1);
+        if (env->CP0_Config1 & (1 << CP0C1_FP)) {
+            check_cp1_enabled(ctx);
+            gen_movci(ctx, rd, rs, (ctx->opcode >> 18) & 0x7,
+                      (ctx->opcode >> 16) & 1);
+        } else {
+            generate_exception_err(ctx, EXCP_CpU, 1);
+        }
+        break;
+    case OPC_MULT:
+    case OPC_MULTU:
+        gen_muldiv(ctx, op1, rd & 3, rs, rt);
+        break;
+    case OPC_DIV:
+    case OPC_DIVU:
+        gen_muldiv(ctx, op1, 0, rs, rt);
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DMULT:
+    case OPC_DMULTU:
+    case OPC_DDIV:
+    case OPC_DDIVU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_muldiv(ctx, op1, 0, rs, rt);
+        break;
+#endif
+    case OPC_JR:
+        gen_compute_branch(ctx, op1, 4, rs, 0, 0, 4);
+        break;
+    case OPC_SPIM:
+#ifdef MIPS_STRICT_STANDARD
+        MIPS_INVAL("SPIM");
+        gen_reserved_instruction(ctx);
+#else
+        /* Implemented as RI exception for now. */
+        MIPS_INVAL("spim (unofficial)");
+        gen_reserved_instruction(ctx);
+#endif
+        break;
+    default:            /* Invalid */
+        MIPS_INVAL("special_legacy");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd, sa;
+    uint32_t op1;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 6) & 0x1f;
+
+    op1 = MASK_SPECIAL(ctx->opcode);
+    switch (op1) {
+    case OPC_SLL:          /* Shift with immediate */
+        if (sa == 5 && rd == 0 &&
+            rs == 0 && rt == 0) { /* PAUSE */
+            if ((ctx->insn_flags & ISA_MIPS_R6) &&
+                (ctx->hflags & MIPS_HFLAG_BMASK)) {
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        }
+        /* Fallthrough */
+    case OPC_SRA:
+        gen_shift_imm(ctx, op1, rd, rt, sa);
+        break;
+    case OPC_SRL:
+        switch ((ctx->opcode >> 21) & 0x1f) {
+        case 1:
+            /* rotr is decoded as srl on non-R2 CPUs */
+            if (ctx->insn_flags & ISA_MIPS_R2) {
+                op1 = OPC_ROTR;
+            }
+            /* Fallthrough */
+        case 0:
+            gen_shift_imm(ctx, op1, rd, rt, sa);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_ADD:
+    case OPC_ADDU:
+    case OPC_SUB:
+    case OPC_SUBU:
+        gen_arith(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_SLLV:         /* Shifts */
+    case OPC_SRAV:
+        gen_shift(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_SRLV:
+        switch ((ctx->opcode >> 6) & 0x1f) {
+        case 1:
+            /* rotrv is decoded as srlv on non-R2 CPUs */
+            if (ctx->insn_flags & ISA_MIPS_R2) {
+                op1 = OPC_ROTRV;
+            }
+            /* Fallthrough */
+        case 0:
+            gen_shift(ctx, op1, rd, rs, rt);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_SLT:          /* Set on less than */
+    case OPC_SLTU:
+        gen_slt(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_AND:          /* Logic*/
+    case OPC_OR:
+    case OPC_NOR:
+    case OPC_XOR:
+        gen_logic(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_JALR:
+        gen_compute_branch(ctx, op1, 4, rs, rd, sa, 4);
+        break;
+    case OPC_TGE: /* Traps */
+    case OPC_TGEU:
+    case OPC_TLT:
+    case OPC_TLTU:
+    case OPC_TEQ:
+    case OPC_TNE:
+        check_insn(ctx, ISA_MIPS2);
+        gen_trap(ctx, op1, rs, rt, -1);
+        break;
+    case OPC_PMON:
+        /* Pmon entry point, also R4010 selsl */
+#ifdef MIPS_STRICT_STANDARD
+        MIPS_INVAL("PMON / selsl");
+        gen_reserved_instruction(ctx);
+#else
+        gen_helper_pmon(cpu_env, tcg_constant_i32(sa));
+#endif
+        break;
+    case OPC_SYSCALL:
+        generate_exception_end(ctx, EXCP_SYSCALL);
+        break;
+    case OPC_BREAK:
+        generate_exception_end(ctx, EXCP_BREAK);
+        break;
+    case OPC_SYNC:
+        check_insn(ctx, ISA_MIPS2);
+        gen_sync(extract32(ctx->opcode, 6, 5));
+        break;
+
+#if defined(TARGET_MIPS64)
+        /* MIPS64 specific opcodes */
+    case OPC_DSLL:
+    case OPC_DSRA:
+    case OPC_DSLL32:
+    case OPC_DSRA32:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_shift_imm(ctx, op1, rd, rt, sa);
+        break;
+    case OPC_DSRL:
+        switch ((ctx->opcode >> 21) & 0x1f) {
+        case 1:
+            /* drotr is decoded as dsrl on non-R2 CPUs */
+            if (ctx->insn_flags & ISA_MIPS_R2) {
+                op1 = OPC_DROTR;
+            }
+            /* Fallthrough */
+        case 0:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, op1, rd, rt, sa);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_DSRL32:
+        switch ((ctx->opcode >> 21) & 0x1f) {
+        case 1:
+            /* drotr32 is decoded as dsrl32 on non-R2 CPUs */
+            if (ctx->insn_flags & ISA_MIPS_R2) {
+                op1 = OPC_DROTR32;
+            }
+            /* Fallthrough */
+        case 0:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift_imm(ctx, op1, rd, rt, sa);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_DADD:
+    case OPC_DADDU:
+    case OPC_DSUB:
+    case OPC_DSUBU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_arith(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_DSLLV:
+    case OPC_DSRAV:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_shift(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_DSRLV:
+        switch ((ctx->opcode >> 6) & 0x1f) {
+        case 1:
+            /* drotrv is decoded as dsrlv on non-R2 CPUs */
+            if (ctx->insn_flags & ISA_MIPS_R2) {
+                op1 = OPC_DROTRV;
+            }
+            /* Fallthrough */
+        case 0:
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift(ctx, op1, rd, rs, rt);
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#endif
+    default:
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            decode_opc_special_r6(env, ctx);
+        } else if (ctx->insn_flags & INSN_R5900) {
+            decode_opc_special_tx79(env, ctx);
+        } else {
+            decode_opc_special_legacy(env, ctx);
+        }
+    }
+}
+
+
+static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd;
+    uint32_t op1;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+
+    op1 = MASK_SPECIAL2(ctx->opcode);
+    switch (op1) {
+    case OPC_MADD: /* Multiply and add/sub */
+    case OPC_MADDU:
+    case OPC_MSUB:
+    case OPC_MSUBU:
+        check_insn(ctx, ISA_MIPS_R1);
+        gen_muldiv(ctx, op1, rd & 3, rs, rt);
+        break;
+    case OPC_MUL:
+        gen_arith(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_DIV_G_2F:
+    case OPC_DIVU_G_2F:
+    case OPC_MULT_G_2F:
+    case OPC_MULTU_G_2F:
+    case OPC_MOD_G_2F:
+    case OPC_MODU_G_2F:
+        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
+        gen_loongson_integer(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_CLO:
+    case OPC_CLZ:
+        check_insn(ctx, ISA_MIPS_R1);
+        gen_cl(ctx, op1, rd, rs);
+        break;
+    case OPC_SDBBP:
+        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
+            gen_helper_do_semihosting(cpu_env);
+        } else {
+            /*
+             * XXX: not clear which exception should be raised
+             *      when in debug mode...
+             */
+            check_insn(ctx, ISA_MIPS_R1);
+            generate_exception_end(ctx, EXCP_DBp);
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DCLO:
+    case OPC_DCLZ:
+        check_insn(ctx, ISA_MIPS_R1);
+        check_mips_64(ctx);
+        gen_cl(ctx, op1, rd, rs);
+        break;
+    case OPC_DMULT_G_2F:
+    case OPC_DMULTU_G_2F:
+    case OPC_DDIV_G_2F:
+    case OPC_DDIVU_G_2F:
+    case OPC_DMOD_G_2F:
+    case OPC_DMODU_G_2F:
+        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
+        gen_loongson_integer(ctx, op1, rd, rs, rt);
+        break;
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("special2_legacy");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd, sa;
+    uint32_t op1, op2;
+    int16_t imm;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 6) & 0x1f;
+    imm = (int16_t)ctx->opcode >> 7;
+
+    op1 = MASK_SPECIAL3(ctx->opcode);
+    switch (op1) {
+    case R6_OPC_PREF:
+        if (rt >= 24) {
+            /* hint codes 24-31 are reserved and signal RI */
+            gen_reserved_instruction(ctx);
+        }
+        /* Treat as NOP. */
+        break;
+    case R6_OPC_CACHE:
+        check_cp0_enabled(ctx);
+        if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+            gen_cache_operation(ctx, rt, rs, imm);
+        }
+        break;
+    case R6_OPC_SC:
+        gen_st_cond(ctx, rt, rs, imm, MO_TESL, false);
+        break;
+    case R6_OPC_LL:
+        gen_ld(ctx, op1, rt, rs, imm);
+        break;
+    case OPC_BSHFL:
+        {
+            if (rd == 0) {
+                /* Treat as NOP. */
+                break;
+            }
+            op2 = MASK_BSHFL(ctx->opcode);
+            switch (op2) {
+            case OPC_ALIGN:
+            case OPC_ALIGN_1:
+            case OPC_ALIGN_2:
+            case OPC_ALIGN_3:
+                gen_align(ctx, 32, rd, rs, rt, sa & 3);
+                break;
+            case OPC_BITSWAP:
+                gen_bitswap(ctx, op2, rd, rt);
+                break;
+            }
+        }
+        break;
+#ifndef CONFIG_USER_ONLY
+    case OPC_GINV:
+        if (unlikely(ctx->gi <= 1)) {
+            gen_reserved_instruction(ctx);
+        }
+        check_cp0_enabled(ctx);
+        switch ((ctx->opcode >> 6) & 3) {
+        case 0:    /* GINVI */
+            /* Treat as NOP. */
+            break;
+        case 2:    /* GINVT */
+            gen_helper_0e1i(ginvt, cpu_gpr[rs], extract32(ctx->opcode, 8, 2));
+            break;
+        default:
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#endif
+#if defined(TARGET_MIPS64)
+    case R6_OPC_SCD:
+        gen_st_cond(ctx, rt, rs, imm, MO_TEQ, false);
+        break;
+    case R6_OPC_LLD:
+        gen_ld(ctx, op1, rt, rs, imm);
+        break;
+    case OPC_DBSHFL:
+        check_mips_64(ctx);
+        {
+            if (rd == 0) {
+                /* Treat as NOP. */
+                break;
+            }
+            op2 = MASK_DBSHFL(ctx->opcode);
+            switch (op2) {
+            case OPC_DALIGN:
+            case OPC_DALIGN_1:
+            case OPC_DALIGN_2:
+            case OPC_DALIGN_3:
+            case OPC_DALIGN_4:
+            case OPC_DALIGN_5:
+            case OPC_DALIGN_6:
+            case OPC_DALIGN_7:
+                gen_align(ctx, 64, rd, rs, rt, sa & 7);
+                break;
+            case OPC_DBITSWAP:
+                gen_bitswap(ctx, op2, rd, rt);
+                break;
+            }
+
+        }
+        break;
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("special3_r6");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd;
+    uint32_t op1, op2;
+
+#ifdef CONFIG_TCG_LOG_INSTR
+    if (unlikely(ctx->base.log_instr_enabled)) {
+        warn_report("DSP instruction tracing is not implemented\n");
+    }
+#endif
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+
+    op1 = MASK_SPECIAL3(ctx->opcode);
+    switch (op1) {
+    case OPC_DIV_G_2E:
+    case OPC_DIVU_G_2E:
+    case OPC_MOD_G_2E:
+    case OPC_MODU_G_2E:
+    case OPC_MULT_G_2E:
+    case OPC_MULTU_G_2E:
+        /*
+         * OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+         * the same mask and op1.
+         */
+        if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) {
+            op2 = MASK_ADDUH_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_ADDUH_QB:
+            case OPC_ADDUH_R_QB:
+            case OPC_ADDQH_PH:
+            case OPC_ADDQH_R_PH:
+            case OPC_ADDQH_W:
+            case OPC_ADDQH_R_W:
+            case OPC_SUBUH_QB:
+            case OPC_SUBUH_R_QB:
+            case OPC_SUBQH_PH:
+            case OPC_SUBQH_R_PH:
+            case OPC_SUBQH_W:
+            case OPC_SUBQH_R_W:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_MUL_PH:
+            case OPC_MUL_S_PH:
+            case OPC_MULQ_S_W:
+            case OPC_MULQ_RS_W:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:
+                MIPS_INVAL("MASK ADDUH.QB");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        } else if (ctx->insn_flags & INSN_LOONGSON2E) {
+            gen_loongson_integer(ctx, op1, rd, rs, rt);
+        } else {
+            gen_reserved_instruction(ctx);
+        }
+        break;
+    case OPC_LX_DSP:
+        op2 = MASK_LX(ctx->opcode);
+        switch (op2) {
+#if defined(TARGET_MIPS64)
+        case OPC_LDX:
+#endif
+        case OPC_LBUX:
+        case OPC_LHX:
+        case OPC_LWX:
+            gen_mipsdsp_ld(ctx, op2, rd, rs, rt);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK LX");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_ABSQ_S_PH_DSP:
+        op2 = MASK_ABSQ_S_PH(ctx->opcode);
+        switch (op2) {
+        case OPC_ABSQ_S_QB:
+        case OPC_ABSQ_S_PH:
+        case OPC_ABSQ_S_W:
+        case OPC_PRECEQ_W_PHL:
+        case OPC_PRECEQ_W_PHR:
+        case OPC_PRECEQU_PH_QBL:
+        case OPC_PRECEQU_PH_QBR:
+        case OPC_PRECEQU_PH_QBLA:
+        case OPC_PRECEQU_PH_QBRA:
+        case OPC_PRECEU_PH_QBL:
+        case OPC_PRECEU_PH_QBR:
+        case OPC_PRECEU_PH_QBLA:
+        case OPC_PRECEU_PH_QBRA:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_BITREV:
+        case OPC_REPL_QB:
+        case OPC_REPLV_QB:
+        case OPC_REPL_PH:
+        case OPC_REPLV_PH:
+            gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
+            break;
+        default:
+            MIPS_INVAL("MASK ABSQ_S.PH");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_ADDU_QB_DSP:
+        op2 = MASK_ADDU_QB(ctx->opcode);
+        switch (op2) {
+        case OPC_ADDQ_PH:
+        case OPC_ADDQ_S_PH:
+        case OPC_ADDQ_S_W:
+        case OPC_ADDU_QB:
+        case OPC_ADDU_S_QB:
+        case OPC_ADDU_PH:
+        case OPC_ADDU_S_PH:
+        case OPC_SUBQ_PH:
+        case OPC_SUBQ_S_PH:
+        case OPC_SUBQ_S_W:
+        case OPC_SUBU_QB:
+        case OPC_SUBU_S_QB:
+        case OPC_SUBU_PH:
+        case OPC_SUBU_S_PH:
+        case OPC_ADDSC:
+        case OPC_ADDWC:
+        case OPC_MODSUB:
+        case OPC_RADDU_W_QB:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_MULEU_S_PH_QBL:
+        case OPC_MULEU_S_PH_QBR:
+        case OPC_MULQ_RS_PH:
+        case OPC_MULEQ_S_W_PHL:
+        case OPC_MULEQ_S_W_PHR:
+        case OPC_MULQ_S_PH:
+            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK ADDU.QB");
+            gen_reserved_instruction(ctx);
+            break;
+
+        }
+        break;
+    case OPC_CMPU_EQ_QB_DSP:
+        op2 = MASK_CMPU_EQ_QB(ctx->opcode);
+        switch (op2) {
+        case OPC_PRECR_SRA_PH_W:
+        case OPC_PRECR_SRA_R_PH_W:
+            gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+            break;
+        case OPC_PRECR_QB_PH:
+        case OPC_PRECRQ_QB_PH:
+        case OPC_PRECRQ_PH_W:
+        case OPC_PRECRQ_RS_PH_W:
+        case OPC_PRECRQU_S_QB_PH:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_CMPU_EQ_QB:
+        case OPC_CMPU_LT_QB:
+        case OPC_CMPU_LE_QB:
+        case OPC_CMP_EQ_PH:
+        case OPC_CMP_LT_PH:
+        case OPC_CMP_LE_PH:
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        case OPC_CMPGU_EQ_QB:
+        case OPC_CMPGU_LT_QB:
+        case OPC_CMPGU_LE_QB:
+        case OPC_CMPGDU_EQ_QB:
+        case OPC_CMPGDU_LT_QB:
+        case OPC_CMPGDU_LE_QB:
+        case OPC_PICK_QB:
+        case OPC_PICK_PH:
+        case OPC_PACKRL_PH:
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK CMPU.EQ.QB");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_SHLL_QB_DSP:
+        gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_DPA_W_PH_DSP:
+        op2 = MASK_DPA_W_PH(ctx->opcode);
+        switch (op2) {
+        case OPC_DPAU_H_QBL:
+        case OPC_DPAU_H_QBR:
+        case OPC_DPSU_H_QBL:
+        case OPC_DPSU_H_QBR:
+        case OPC_DPA_W_PH:
+        case OPC_DPAX_W_PH:
+        case OPC_DPAQ_S_W_PH:
+        case OPC_DPAQX_S_W_PH:
+        case OPC_DPAQX_SA_W_PH:
+        case OPC_DPS_W_PH:
+        case OPC_DPSX_W_PH:
+        case OPC_DPSQ_S_W_PH:
+        case OPC_DPSQX_S_W_PH:
+        case OPC_DPSQX_SA_W_PH:
+        case OPC_MULSAQ_S_W_PH:
+        case OPC_DPAQ_SA_L_W:
+        case OPC_DPSQ_SA_L_W:
+        case OPC_MAQ_S_W_PHL:
+        case OPC_MAQ_S_W_PHR:
+        case OPC_MAQ_SA_W_PHL:
+        case OPC_MAQ_SA_W_PHR:
+        case OPC_MULSA_W_PH:
+            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DPAW.PH");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_INSV_DSP:
+        op2 = MASK_INSV(ctx->opcode);
+        switch (op2) {
+        case OPC_INSV:
+            check_dsp(ctx);
+            {
+                TCGv t0, t1;
+
+                if (rt == 0) {
+                    break;
+                }
+
+                t0 = tcg_temp_new();
+                t1 = tcg_temp_new();
+
+                gen_load_gpr(t0, rt);
+                gen_load_gpr(t1, rs);
+
+                gen_helper_insv(cpu_gpr[rt], cpu_env, t1, t0);
+
+                tcg_temp_free(t0);
+                tcg_temp_free(t1);
+                break;
+            }
+        default:            /* Invalid */
+            MIPS_INVAL("MASK INSV");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_APPEND_DSP:
+        gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
+        break;
+    case OPC_EXTR_W_DSP:
+        op2 = MASK_EXTR_W(ctx->opcode);
+        switch (op2) {
+        case OPC_EXTR_W:
+        case OPC_EXTR_R_W:
+        case OPC_EXTR_RS_W:
+        case OPC_EXTR_S_H:
+        case OPC_EXTRV_S_H:
+        case OPC_EXTRV_W:
+        case OPC_EXTRV_R_W:
+        case OPC_EXTRV_RS_W:
+        case OPC_EXTP:
+        case OPC_EXTPV:
+        case OPC_EXTPDP:
+        case OPC_EXTPDPV:
+            gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+            break;
+        case OPC_RDDSP:
+            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 1);
+            break;
+        case OPC_SHILO:
+        case OPC_SHILOV:
+        case OPC_MTHLIP:
+        case OPC_WRDSP:
+            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK EXTR.W");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DDIV_G_2E:
+    case OPC_DDIVU_G_2E:
+    case OPC_DMULT_G_2E:
+    case OPC_DMULTU_G_2E:
+    case OPC_DMOD_G_2E:
+    case OPC_DMODU_G_2E:
+        check_insn(ctx, INSN_LOONGSON2E);
+        gen_loongson_integer(ctx, op1, rd, rs, rt);
+        break;
+    case OPC_ABSQ_S_QH_DSP:
+        op2 = MASK_ABSQ_S_QH(ctx->opcode);
+        switch (op2) {
+        case OPC_PRECEQ_L_PWL:
+        case OPC_PRECEQ_L_PWR:
+        case OPC_PRECEQ_PW_QHL:
+        case OPC_PRECEQ_PW_QHR:
+        case OPC_PRECEQ_PW_QHLA:
+        case OPC_PRECEQ_PW_QHRA:
+        case OPC_PRECEQU_QH_OBL:
+        case OPC_PRECEQU_QH_OBR:
+        case OPC_PRECEQU_QH_OBLA:
+        case OPC_PRECEQU_QH_OBRA:
+        case OPC_PRECEU_QH_OBL:
+        case OPC_PRECEU_QH_OBR:
+        case OPC_PRECEU_QH_OBLA:
+        case OPC_PRECEU_QH_OBRA:
+        case OPC_ABSQ_S_OB:
+        case OPC_ABSQ_S_PW:
+        case OPC_ABSQ_S_QH:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_REPL_OB:
+        case OPC_REPL_PW:
+        case OPC_REPL_QH:
+        case OPC_REPLV_OB:
+        case OPC_REPLV_PW:
+        case OPC_REPLV_QH:
+            gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK ABSQ_S.QH");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_ADDU_OB_DSP:
+        op2 = MASK_ADDU_OB(ctx->opcode);
+        switch (op2) {
+        case OPC_RADDU_L_OB:
+        case OPC_SUBQ_PW:
+        case OPC_SUBQ_S_PW:
+        case OPC_SUBQ_QH:
+        case OPC_SUBQ_S_QH:
+        case OPC_SUBU_OB:
+        case OPC_SUBU_S_OB:
+        case OPC_SUBU_QH:
+        case OPC_SUBU_S_QH:
+        case OPC_SUBUH_OB:
+        case OPC_SUBUH_R_OB:
+        case OPC_ADDQ_PW:
+        case OPC_ADDQ_S_PW:
+        case OPC_ADDQ_QH:
+        case OPC_ADDQ_S_QH:
+        case OPC_ADDU_OB:
+        case OPC_ADDU_S_OB:
+        case OPC_ADDU_QH:
+        case OPC_ADDU_S_QH:
+        case OPC_ADDUH_OB:
+        case OPC_ADDUH_R_OB:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_MULEQ_S_PW_QHL:
+        case OPC_MULEQ_S_PW_QHR:
+        case OPC_MULEU_S_QH_OBL:
+        case OPC_MULEU_S_QH_OBR:
+        case OPC_MULQ_RS_QH:
+            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK ADDU.OB");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_OB_DSP:
+        op2 = MASK_CMPU_EQ_OB(ctx->opcode);
+        switch (op2) {
+        case OPC_PRECR_SRA_QH_PW:
+        case OPC_PRECR_SRA_R_QH_PW:
+            /* Return value is rt. */
+            gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+            break;
+        case OPC_PRECR_OB_QH:
+        case OPC_PRECRQ_OB_QH:
+        case OPC_PRECRQ_PW_L:
+        case OPC_PRECRQ_QH_PW:
+        case OPC_PRECRQ_RS_QH_PW:
+        case OPC_PRECRQU_S_OB_QH:
+            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+            break;
+        case OPC_CMPU_EQ_OB:
+        case OPC_CMPU_LT_OB:
+        case OPC_CMPU_LE_OB:
+        case OPC_CMP_EQ_QH:
+        case OPC_CMP_LT_QH:
+        case OPC_CMP_LE_QH:
+        case OPC_CMP_EQ_PW:
+        case OPC_CMP_LT_PW:
+        case OPC_CMP_LE_PW:
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        case OPC_CMPGDU_EQ_OB:
+        case OPC_CMPGDU_LT_OB:
+        case OPC_CMPGDU_LE_OB:
+        case OPC_CMPGU_EQ_OB:
+        case OPC_CMPGU_LT_OB:
+        case OPC_CMPGU_LE_OB:
+        case OPC_PACKRL_PW:
+        case OPC_PICK_OB:
+        case OPC_PICK_PW:
+        case OPC_PICK_QH:
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK CMPU_EQ.OB");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_DAPPEND_DSP:
+        gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
+        break;
+    case OPC_DEXTR_W_DSP:
+        op2 = MASK_DEXTR_W(ctx->opcode);
+        switch (op2) {
+        case OPC_DEXTP:
+        case OPC_DEXTPDP:
+        case OPC_DEXTPDPV:
+        case OPC_DEXTPV:
+        case OPC_DEXTR_L:
+        case OPC_DEXTR_R_L:
+        case OPC_DEXTR_RS_L:
+        case OPC_DEXTR_W:
+        case OPC_DEXTR_R_W:
+        case OPC_DEXTR_RS_W:
+        case OPC_DEXTR_S_H:
+        case OPC_DEXTRV_L:
+        case OPC_DEXTRV_R_L:
+        case OPC_DEXTRV_RS_L:
+        case OPC_DEXTRV_S_H:
+        case OPC_DEXTRV_W:
+        case OPC_DEXTRV_R_W:
+        case OPC_DEXTRV_RS_W:
+            gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+            break;
+        case OPC_DMTHLIP:
+        case OPC_DSHILO:
+        case OPC_DSHILOV:
+            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK EXTR.W");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_DPAQ_W_QH_DSP:
+        op2 = MASK_DPAQ_W_QH(ctx->opcode);
+        switch (op2) {
+        case OPC_DPAU_H_OBL:
+        case OPC_DPAU_H_OBR:
+        case OPC_DPSU_H_OBL:
+        case OPC_DPSU_H_OBR:
+        case OPC_DPA_W_QH:
+        case OPC_DPAQ_S_W_QH:
+        case OPC_DPS_W_QH:
+        case OPC_DPSQ_S_W_QH:
+        case OPC_MULSAQ_S_W_QH:
+        case OPC_DPAQ_SA_L_PW:
+        case OPC_DPSQ_SA_L_PW:
+        case OPC_MULSAQ_S_L_PW:
+            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        case OPC_MAQ_S_W_QHLL:
+        case OPC_MAQ_S_W_QHLR:
+        case OPC_MAQ_S_W_QHRL:
+        case OPC_MAQ_S_W_QHRR:
+        case OPC_MAQ_SA_W_QHLL:
+        case OPC_MAQ_SA_W_QHLR:
+        case OPC_MAQ_SA_W_QHRL:
+        case OPC_MAQ_SA_W_QHRR:
+        case OPC_MAQ_S_L_PWL:
+        case OPC_MAQ_S_L_PWR:
+        case OPC_DMADD:
+        case OPC_DMADDU:
+        case OPC_DMSUB:
+        case OPC_DMSUBU:
+            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DPAQ.W.QH");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_DINSV_DSP:
+        op2 = MASK_INSV(ctx->opcode);
+        switch (op2) {
+        case OPC_DINSV:
+        {
+            TCGv t0, t1;
+
+            check_dsp(ctx);
+
+            if (rt == 0) {
+                break;
+            }
+
+            t0 = tcg_temp_new();
+            t1 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_load_gpr(t1, rs);
+
+            gen_helper_dinsv(cpu_gpr[rt], cpu_env, t1, t0);
+
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+            break;
+        }
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DINSV");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_SHLL_OB_DSP:
+        gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+        break;
+#endif
+    default:            /* Invalid */
+        MIPS_INVAL("special3_legacy");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+
+#if defined(TARGET_MIPS64)
+
+static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opc = MASK_MMI(ctx->opcode);
+    int rs = extract32(ctx->opcode, 21, 5);
+    int rt = extract32(ctx->opcode, 16, 5);
+    int rd = extract32(ctx->opcode, 11, 5);
+
+#ifdef CONFIG_TCG_LOG_INSTR
+    if (unlikely(ctx->base.log_instr_enabled)) {
+        warn_report("OPC_SPECIAL2 MMI instruction tracing is "
+                    "not implemented\n");
+    }
+#endif
+
+    switch (opc) {
+    case MMI_OPC_MULT1:
+    case MMI_OPC_MULTU1:
+    case MMI_OPC_MADD:
+    case MMI_OPC_MADDU:
+    case MMI_OPC_MADD1:
+    case MMI_OPC_MADDU1:
+        gen_mul_txx9(ctx, opc, rd, rs, rt);
+        break;
+    case MMI_OPC_DIV1:
+    case MMI_OPC_DIVU1:
+        gen_div1_tx79(ctx, opc, rs, rt);
+        break;
+    default:
+        MIPS_INVAL("TX79 MMI class");
+        gen_reserved_instruction(ctx);
+        break;
+    }
+}
+
+static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
+{
+    gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_SQ */
+}
+
+/*
+ * The TX79-specific instruction Store Quadword
+ *
+ * +--------+-------+-------+------------------------+
+ * | 011111 |  base |   rt  |           offset       | SQ
+ * +--------+-------+-------+------------------------+
+ *      6       5       5                 16
+ *
+ * has the same opcode as the Read Hardware Register instruction
+ *
+ * +--------+-------+-------+-------+-------+--------+
+ * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
+ * +--------+-------+-------+-------+-------+--------+
+ *      6       5       5       5       5        6
+ *
+ * that is required, trapped and emulated by the Linux kernel. However, all
+ * RDHWR encodings yield address error exceptions on the TX79 since the SQ
+ * offset is odd. Therefore all valid SQ instructions can execute normally.
+ * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
+ * between SQ and RDHWR, as the Linux kernel does.
+ */
+static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
+{
+    int base = extract32(ctx->opcode, 21, 5);
+    int rt = extract32(ctx->opcode, 16, 5);
+    int offset = extract32(ctx->opcode, 0, 16);
+
+#ifdef CONFIG_USER_ONLY
+    uint32_t op1 = MASK_SPECIAL3(ctx->opcode);
+    uint32_t op2 = extract32(ctx->opcode, 6, 5);
+
+    if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) {
+        int rd = extract32(ctx->opcode, 11, 5);
+
+        gen_rdhwr(ctx, rt, rd, 0);
+        return;
+    }
+#endif
+
+    gen_mmi_sq(ctx, base, rt, offset);
+}
+
+#endif
+
+static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
+{
+    int rs, rt, rd, sa;
+    uint32_t op1, op2;
+    int16_t imm;
+
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 6) & 0x1f;
+    imm = sextract32(ctx->opcode, 7, 9);
+
+    op1 = MASK_SPECIAL3(ctx->opcode);
+
+#ifndef TARGET_CHERI
+    /*
+     * EVA loads and stores overlap Loongson 2E instructions decoded by
+     * decode_opc_special3_legacy(), so be careful to allow their decoding when
+     * EVA is absent.
+     */
+    if (ctx->eva) {
+        switch (op1) {
+        case OPC_LWLE:
+        case OPC_LWRE:
+        case OPC_LBUE:
+        case OPC_LHUE:
+        case OPC_LBE:
+        case OPC_LHE:
+        case OPC_LLE:
+        case OPC_LWE:
+            check_cp0_enabled(ctx);
+            gen_ld(ctx, op1, rt, rs, imm);
+            return;
+        case OPC_SWLE:
+        case OPC_SWRE:
+        case OPC_SBE:
+        case OPC_SHE:
+        case OPC_SWE:
+            check_cp0_enabled(ctx);
+            gen_st(ctx, op1, rt, rs, imm);
+            return;
+        case OPC_SCE:
+            check_cp0_enabled(ctx);
+            gen_st_cond(ctx, rt, rs, imm, MO_TESL, true);
+            return;
+        case OPC_CACHEE:
+            check_eva(ctx);
+            check_cp0_enabled(ctx);
+            if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+                gen_cache_operation(ctx, rt, rs, imm);
+            }
+            return;
+        case OPC_PREFE:
+            check_cp0_enabled(ctx);
+            /* Treat as NOP. */
+            return;
+        }
+    }
+#endif
+
+    switch (op1) {
+    case OPC_EXT:
+    case OPC_INS:
+        check_insn(ctx, ISA_MIPS_R2);
+        gen_bitops(ctx, op1, rt, rs, sa, rd);
+        break;
+    case OPC_BSHFL:
+        op2 = MASK_BSHFL(ctx->opcode);
+        switch (op2) {
+        case OPC_ALIGN:
+        case OPC_ALIGN_1:
+        case OPC_ALIGN_2:
+        case OPC_ALIGN_3:
+        case OPC_BITSWAP:
+            check_insn(ctx, ISA_MIPS_R6);
+            decode_opc_special3_r6(env, ctx);
+            break;
+        default:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_bshfl(ctx, op2, rt, rd);
+            break;
+        }
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_DEXTM:
+    case OPC_DEXTU:
+    case OPC_DEXT:
+    case OPC_DINSM:
+    case OPC_DINSU:
+    case OPC_DINS:
+        check_insn(ctx, ISA_MIPS_R2);
+        check_mips_64(ctx);
+        gen_bitops(ctx, op1, rt, rs, sa, rd);
+        break;
+    case OPC_DBSHFL:
+        op2 = MASK_DBSHFL(ctx->opcode);
+        switch (op2) {
+        case OPC_DALIGN:
+        case OPC_DALIGN_1:
+        case OPC_DALIGN_2:
+        case OPC_DALIGN_3:
+        case OPC_DALIGN_4:
+        case OPC_DALIGN_5:
+        case OPC_DALIGN_6:
+        case OPC_DALIGN_7:
+        case OPC_DBITSWAP:
+            check_insn(ctx, ISA_MIPS_R6);
+            decode_opc_special3_r6(env, ctx);
+            break;
+        default:
+            check_insn(ctx, ISA_MIPS_R2);
+            check_mips_64(ctx);
+            op2 = MASK_DBSHFL(ctx->opcode);
+            gen_bshfl(ctx, op2, rt, rd);
+            break;
+        }
+        break;
+#endif
+    case OPC_RDHWR:
+#ifdef TARGET_CHERI
+        // For CHERI/BERI statcounters we need a 4 bit selector instead of 3
+        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 4));
+#else
+        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 3));
+#endif
+        break;
+    case OPC_FORK:
+        check_mt(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+            TCGv t1 = tcg_temp_new();
+
+            gen_load_gpr(t0, rt);
+            gen_load_gpr(t1, rs);
+            gen_helper_fork(t0, t1);
+            tcg_temp_free(t0);
+            tcg_temp_free(t1);
+        }
+        break;
+    case OPC_YIELD:
+        check_mt(ctx);
+        {
+            TCGv t0 = tcg_temp_new();
+
+            gen_load_gpr(t0, rs);
+            gen_helper_yield(t0, cpu_env, t0);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
+        }
+        break;
+    default:
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            decode_opc_special3_r6(env, ctx);
+        } else {
+            decode_opc_special3_legacy(env, ctx);
+        }
+    }
+}
+
+static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
+{
+    int32_t offset;
+    int rs, rt, rd, sa;
+    uint32_t op, op1;
+    int16_t imm;
+
+    op = MASK_OP_MAJOR(ctx->opcode);
+    rs = (ctx->opcode >> 21) & 0x1f;
+    rt = (ctx->opcode >> 16) & 0x1f;
+    rd = (ctx->opcode >> 11) & 0x1f;
+    sa = (ctx->opcode >> 6) & 0x1f;
+    imm = (int16_t)ctx->opcode;
+    switch (op) {
+    case OPC_SPECIAL:
+        decode_opc_special(env, ctx);
+        break;
+    case OPC_SPECIAL2:
+#if defined(TARGET_MIPS64)
+        if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
+            decode_mmi(env, ctx);
+            break;
+        }
+#endif
+        if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) {
+            if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) {
+                gen_arith(ctx, OPC_MUL, rd, rs, rt);
+            } else {
+                decode_ase_mxu(ctx, ctx->opcode);
+            }
+            break;
+        }
+        decode_opc_special2_legacy(env, ctx);
+        break;
+    case OPC_SPECIAL3:
+#ifdef CONFIG_TCG_LOG_INSTR
+    if (unlikely(ctx->base.log_instr_enabled)) {
+        warn_report("OPC_SPECIAL3 (mmi) instruction tracing is "
+                    "not implemented\n");
+    }
+#endif
+#if defined(TARGET_MIPS64)
+        if (ctx->insn_flags & INSN_R5900) {
+            decode_mmi_sq(env, ctx);    /* MMI_OPC_SQ */
+        } else {
+            decode_opc_special3(env, ctx);
+        }
+#else
+        decode_opc_special3(env, ctx);
+#endif
+        break;
+    case OPC_REGIMM:
+        op1 = MASK_REGIMM(ctx->opcode);
+        switch (op1) {
+        case OPC_BLTZL: /* REGIMM branches */
+        case OPC_BGEZL:
+        case OPC_BLTZALL:
+        case OPC_BGEZALL:
+            check_insn(ctx, ISA_MIPS2);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            /* Fallthrough */
+        case OPC_BLTZ:
+        case OPC_BGEZ:
+            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
+            break;
+        case OPC_BLTZAL:
+        case OPC_BGEZAL:
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                if (rs == 0) {
+                    /* OPC_NAL, OPC_BAL */
+                    gen_compute_branch(ctx, op1, 4, 0, -1, imm << 2, 4);
+                } else {
+                    gen_reserved_instruction(ctx);
+                }
+            } else {
+                gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
+            }
+            break;
+        case OPC_TGEI: /* REGIMM traps */
+        case OPC_TGEIU:
+        case OPC_TLTI:
+        case OPC_TLTIU:
+        case OPC_TEQI:
+
+        case OPC_TNEI:
+            check_insn(ctx, ISA_MIPS2);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_trap(ctx, op1, rs, -1, imm);
+            break;
+        case OPC_SIGRIE:
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_reserved_instruction(ctx);
+            break;
+        case OPC_SYNCI:
+            check_insn(ctx, ISA_MIPS_R2);
+            /*
+             * Break the TB to be able to sync copied instructions
+             * immediately.
+             */
+            ctx->base.is_jmp = DISAS_STOP;
+            break;
+        case OPC_BPOSGE32:    /* MIPS DSP branch */
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+#endif
+            check_dsp(ctx);
+            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2, 4);
+            break;
+#if defined(TARGET_MIPS64)
+        case OPC_DAHI:
+            check_insn(ctx, ISA_MIPS_R6);
+            check_mips_64(ctx);
+            if (rs != 0) {
+                tcg_gen_addi_tl(cpu_gpr[rs], cpu_gpr[rs], (int64_t)imm << 32);
+                gen_log_instr_gpr_update(ctx, rs);
+            }
+            break;
+        case OPC_DATI:
+            check_insn(ctx, ISA_MIPS_R6);
+            check_mips_64(ctx);
+            if (rs != 0) {
+                tcg_gen_addi_tl(cpu_gpr[rs], cpu_gpr[rs], (int64_t)imm << 48);
+                gen_log_instr_gpr_update(ctx, rs);
+            }
+            break;
+#endif
+        default:            /* Invalid */
+            MIPS_INVAL("regimm");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_CP0:
+        check_cp0_enabled(ctx);
+        op1 = MASK_CP0(ctx->opcode);
+        switch (op1) {
+        case OPC_MFC0:
+        case OPC_MTC0:
+        case OPC_MFTR:
+        case OPC_MTTR:
+        case OPC_MFHC0:
+        case OPC_MTHC0:
+#if defined(TARGET_MIPS64)
+        case OPC_DMFC0:
+        case OPC_DMTC0:
+#endif
+#ifndef CONFIG_USER_ONLY
+            gen_cp0(env, ctx, op1, rt, rd);
+#endif /* !CONFIG_USER_ONLY */
+            break;
+        case OPC_C0:
+        case OPC_C0_1:
+        case OPC_C0_2:
+        case OPC_C0_3:
+        case OPC_C0_4:
+        case OPC_C0_5:
+        case OPC_C0_6:
+        case OPC_C0_7:
+        case OPC_C0_8:
+        case OPC_C0_9:
+        case OPC_C0_A:
+        case OPC_C0_B:
+        case OPC_C0_C:
+        case OPC_C0_D:
+        case OPC_C0_E:
+        case OPC_C0_F:
+#ifndef CONFIG_USER_ONLY
+            gen_cp0(env, ctx, MASK_C0(ctx->opcode), rt, rd);
+#endif /* !CONFIG_USER_ONLY */
+            break;
+        case OPC_MFMC0:
+#ifndef CONFIG_USER_ONLY
+            {
+                uint32_t op2;
+                TCGv t0 = tcg_temp_new();
+
+                op2 = MASK_MFMC0(ctx->opcode);
+                switch (op2) {
+                case OPC_DMT:
+                    check_cp0_mt(ctx);
+                    gen_helper_dmt(t0);
+                    gen_store_gpr(t0, rt);
+                    break;
+                case OPC_EMT:
+                    check_cp0_mt(ctx);
+                    gen_helper_emt(t0);
+                    gen_store_gpr(t0, rt);
+                    break;
+                case OPC_DVPE:
+                    check_cp0_mt(ctx);
+                    gen_helper_dvpe(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                    break;
+                case OPC_EVPE:
+                    check_cp0_mt(ctx);
+                    gen_helper_evpe(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                    break;
+                case OPC_DVP:
+                    check_insn(ctx, ISA_MIPS_R6);
+                    if (ctx->vp) {
+                        gen_helper_dvp(t0, cpu_env);
+                        gen_store_gpr(t0, rt);
+                    }
+                    break;
+                case OPC_EVP:
+                    check_insn(ctx, ISA_MIPS_R6);
+                    if (ctx->vp) {
+                        gen_helper_evp(t0, cpu_env);
+                        gen_store_gpr(t0, rt);
+                    }
+                    break;
+                case OPC_DI:
+                    check_insn(ctx, ISA_MIPS_R2);
+                    save_cpu_state(ctx, 1);
+                    gen_helper_di(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                    /*
+                     * Stop translation as we may have switched
+                     * the execution mode.
+                     */
+                    ctx->base.is_jmp = DISAS_STOP;
+                    break;
+                case OPC_EI:
+                    check_insn(ctx, ISA_MIPS_R2);
+                    save_cpu_state(ctx, 1);
+                    gen_helper_ei(t0, cpu_env);
+                    gen_store_gpr(t0, rt);
+                    /*
+                     * DISAS_STOP isn't sufficient, we need to ensure we break
+                     * out of translated code to check for pending interrupts.
+                     */
+                    gen_save_pc(ctx->base.pc_next + 4);
+                    ctx->base.is_jmp = DISAS_EXIT;
+                    break;
+                default:            /* Invalid */
+                    MIPS_INVAL("mfmc0");
+                    gen_reserved_instruction(ctx);
+                    break;
+                }
+                tcg_temp_free(t0);
+            }
+#endif /* !CONFIG_USER_ONLY */
+            break;
+        case OPC_RDPGPR:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_load_srsgpr(ctx, rt, rd);
+            break;
+        case OPC_WRPGPR:
+            check_insn(ctx, ISA_MIPS_R2);
+            gen_store_srsgpr(rt, rd);
+            break;
+        default:
+            MIPS_INVAL("cp0");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC, OPC_ADDI */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_BOVC, OPC_BEQZALC, OPC_BEQC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        } else {
+            /* OPC_ADDI */
+            /* Arithmetic with immediate opcode */
+            gen_arith_imm(ctx, op, rt, rs, imm);
+        }
+        break;
+    case OPC_ADDIU:
+         gen_arith_imm(ctx, op, rt, rs, imm);
+         break;
+    case OPC_SLTI: /* Set on less than with immediate opcode */
+    case OPC_SLTIU:
+         gen_slt_imm(ctx, op, rt, rs, imm);
+         break;
+    case OPC_ANDI: /* Arithmetic with immediate opcode */
+    case OPC_LUI: /* OPC_AUI */
+    case OPC_ORI:
+    case OPC_XORI:
+         gen_logic_imm(ctx, op, rt, rs, imm);
+         break;
+    case OPC_J: /* Jump */
+    case OPC_JAL:
+         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
+         gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
+         break;
+    /* Branch */
+    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC, OPC_BLEZL */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rt == 0) {
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            /* OPC_BLEZC, OPC_BGEZC, OPC_BGEC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        } else {
+            /* OPC_BLEZL */
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
+        }
+        break;
+    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC, OPC_BGTZL */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rt == 0) {
+                gen_reserved_instruction(ctx);
+                break;
+            }
+            /* OPC_BGTZC, OPC_BLTZC, OPC_BLTC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        } else {
+            /* OPC_BGTZL */
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
+        }
+        break;
+    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC, OPC_BLEZ */
+        if (rt == 0) {
+            /* OPC_BLEZ */
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
+        } else {
+            check_insn(ctx, ISA_MIPS_R6);
+            /* OPC_BLEZALC, OPC_BGEZALC, OPC_BGEUC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        }
+        break;
+    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC, OPC_BGTZ */
+        if (rt == 0) {
+            /* OPC_BGTZ */
+            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
+        } else {
+            check_insn(ctx, ISA_MIPS_R6);
+            /* OPC_BGTZALC, OPC_BLTZALC, OPC_BLTUC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        }
+        break;
+    case OPC_BEQL:
+    case OPC_BNEL:
+        check_insn(ctx, ISA_MIPS2);
+         check_insn_opc_removed(ctx, ISA_MIPS_R6);
+        /* Fallthrough */
+    case OPC_BEQ:
+    case OPC_BNE:
+         gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
+         break;
+    case OPC_LL: /* Load and stores */
+        check_insn(ctx, ISA_MIPS2);
+        if (ctx->insn_flags & INSN_R5900) {
+            check_insn_opc_user_only(ctx, INSN_R5900);
+        }
+        /* Fallthrough */
+    case OPC_LWL:
+    case OPC_LWR:
+    case OPC_LB:
+    case OPC_LH:
+    case OPC_LW:
+    case OPC_LWPC:
+    case OPC_LBU:
+    case OPC_LHU:
+         gen_ld(ctx, op, rt, rs, imm);
+         break;
+    case OPC_SWL:
+    case OPC_SWR:
+    case OPC_SB:
+    case OPC_SH:
+    case OPC_SW:
+         gen_st(ctx, op, rt, rs, imm);
+         break;
+    case OPC_SC:
+        check_insn(ctx, ISA_MIPS2);
+        if (ctx->insn_flags & INSN_R5900) {
+            check_insn_opc_user_only(ctx, INSN_R5900);
+        }
+        gen_st_cond(ctx, rt, rs, imm, MO_TESL, false);
+        break;
+    case OPC_CACHE:
+        check_cp0_enabled(ctx);
+        check_insn(ctx, ISA_MIPS3 | ISA_MIPS_R1);
+        if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
+            gen_cache_operation(ctx, rt, rs, imm);
+        }
+        /* Treat as NOP. */
+        break;
+    case OPC_PREF:
+        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1 | INSN_R5900);
+        /* Treat as NOP. */
+        break;
+
+    /* Floating point (COP1). */
+    case OPC_LWC1:
+    case OPC_LDC1:
+    case OPC_SWC1:
+    case OPC_SDC1:
+        gen_cop1_ldst(ctx, op, rt, rs, imm);
+        break;
+
+    case OPC_CP1:
+        op1 = MASK_CP1(ctx->opcode);
+
+        switch (op1) {
+        case OPC_MFHC1:
+        case OPC_MTHC1:
+            check_cp1_enabled(ctx);
+            check_insn(ctx, ISA_MIPS_R2);
+            /* fall through */
+        case OPC_MFC1:
+        case OPC_CFC1:
+        case OPC_MTC1:
+        case OPC_CTC1:
+            check_cp1_enabled(ctx);
+            gen_cp1(ctx, op1, rt, rd);
+            break;
+#if defined(TARGET_MIPS64)
+        case OPC_DMFC1:
+        case OPC_DMTC1:
+            check_cp1_enabled(ctx);
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_cp1(ctx, op1, rt, rd);
+            break;
+#endif
+        case OPC_BC1EQZ: /* OPC_BC1ANY2 */
+            check_cp1_enabled(ctx);
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                /* OPC_BC1EQZ */
+                gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
+                                       rt, imm << 2, 4);
+            } else {
+                /* OPC_BC1ANY2 */
+                check_cop1x(ctx);
+                check_insn(ctx, ASE_MIPS3D);
+                gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
+                                    (rt >> 2) & 0x7, imm << 2);
+            }
+            break;
+        case OPC_BC1NEZ:
+            check_cp1_enabled(ctx);
+            check_insn(ctx, ISA_MIPS_R6);
+            gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
+                                   rt, imm << 2, 4);
+            break;
+        case OPC_BC1ANY4:
+            check_cp1_enabled(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            check_cop1x(ctx);
+            check_insn(ctx, ASE_MIPS3D);
+            /* fall through */
+        case OPC_BC1:
+            check_cp1_enabled(ctx);
+            check_insn_opc_removed(ctx, ISA_MIPS_R6);
+            gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
+                                (rt >> 2) & 0x7, imm << 2);
+            break;
+        case OPC_PS_FMT:
+            check_ps(ctx);
+            /* fall through */
+        case OPC_S_FMT:
+        case OPC_D_FMT:
+            check_cp1_enabled(ctx);
+            gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
+                       (imm >> 8) & 0x7);
+            break;
+        case OPC_W_FMT:
+        case OPC_L_FMT:
+        {
+            int r6_op = ctx->opcode & FOP(0x3f, 0x1f);
+            check_cp1_enabled(ctx);
+            if (ctx->insn_flags & ISA_MIPS_R6) {
+                switch (r6_op) {
+                case R6_OPC_CMP_AF_S:
+                case R6_OPC_CMP_UN_S:
+                case R6_OPC_CMP_EQ_S:
+                case R6_OPC_CMP_UEQ_S:
+                case R6_OPC_CMP_LT_S:
+                case R6_OPC_CMP_ULT_S:
+                case R6_OPC_CMP_LE_S:
+                case R6_OPC_CMP_ULE_S:
+                case R6_OPC_CMP_SAF_S:
+                case R6_OPC_CMP_SUN_S:
+                case R6_OPC_CMP_SEQ_S:
+                case R6_OPC_CMP_SEUQ_S:
+                case R6_OPC_CMP_SLT_S:
+                case R6_OPC_CMP_SULT_S:
+                case R6_OPC_CMP_SLE_S:
+                case R6_OPC_CMP_SULE_S:
+                case R6_OPC_CMP_OR_S:
+                case R6_OPC_CMP_UNE_S:
+                case R6_OPC_CMP_NE_S:
+                case R6_OPC_CMP_SOR_S:
+                case R6_OPC_CMP_SUNE_S:
+                case R6_OPC_CMP_SNE_S:
+                    gen_r6_cmp_s(ctx, ctx->opcode & 0x1f, rt, rd, sa);
+                    break;
+                case R6_OPC_CMP_AF_D:
+                case R6_OPC_CMP_UN_D:
+                case R6_OPC_CMP_EQ_D:
+                case R6_OPC_CMP_UEQ_D:
+                case R6_OPC_CMP_LT_D:
+                case R6_OPC_CMP_ULT_D:
+                case R6_OPC_CMP_LE_D:
+                case R6_OPC_CMP_ULE_D:
+                case R6_OPC_CMP_SAF_D:
+                case R6_OPC_CMP_SUN_D:
+                case R6_OPC_CMP_SEQ_D:
+                case R6_OPC_CMP_SEUQ_D:
+                case R6_OPC_CMP_SLT_D:
+                case R6_OPC_CMP_SULT_D:
+                case R6_OPC_CMP_SLE_D:
+                case R6_OPC_CMP_SULE_D:
+                case R6_OPC_CMP_OR_D:
+                case R6_OPC_CMP_UNE_D:
+                case R6_OPC_CMP_NE_D:
+                case R6_OPC_CMP_SOR_D:
+                case R6_OPC_CMP_SUNE_D:
+                case R6_OPC_CMP_SNE_D:
+                    gen_r6_cmp_d(ctx, ctx->opcode & 0x1f, rt, rd, sa);
+                    break;
+                default:
+                    gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f),
+                               rt, rd, sa, (imm >> 8) & 0x7);
+
+                    break;
+                }
+            } else {
+                gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
+                           (imm >> 8) & 0x7);
+            }
+            break;
+        }
+        default:
+            MIPS_INVAL("cp1");
+            gen_reserved_instruction(ctx);
+            break;
+        }
+        break;
+
+#if defined(TARGET_CHERI)
+    case OPC_CLOAD:     /* Load Via Capability Register */
+        {
+            uint32_t opc = ctx->opcode;
+
+            check_cop2x(ctx);
+
+            switch(MASK_CLDST_OPC(opc)) {
+            case OPC_CLBU:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_UB | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLHU:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TEUW | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLWU:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TEUL | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLDU:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TEQ | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLB:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_SB | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLH:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TESW | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLW:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TESL | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            case OPC_CLD:
+                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                  MO_TEQ | ctx->default_tcg_memop_mask,
+                                  MASK_CLDST_OPC(opc));
+                break;
+            default:
+                MIPS_INVAL("cl");
+                generate_exception (ctx, EXCP_RI);
+                break;
+            }
+        }
+        break;
+    case OPC_CLOADC:    /* Load Capability Register */
+        check_cop2x(ctx);
+        generate_clc(ctx, rs, rt, rd, ctx->opcode & 0x7ff, false);
+        break;
+    case OPC_CLOADC_LargeImm: /* Load Capability Register with 16bit immediate */
+        check_cop2x(ctx);
+        generate_clc(ctx, rs, rt, 0, ctx->opcode & 0xffff, true);
+        break;
+    case OPC_CSTORE:    /* Store Via Capability Register */
+        {
+            uint32_t opc = ctx->opcode;
+
+            check_cop2x(ctx);
+
+/*
+ * XXX CHERI seems to be ignoring bit 2 given how 'cscdr' is encoded.
+ *     For now, just ignore bit 2 by masking it.
+ *
+ *          switch(MASK_CLDST_OPC(opc)) {
+ */
+            uint32_t inst_opc = MASK_CLDST_OPC(opc) & ~0x4;
+            switch(inst_opc) {
+            case OPC_CSB:
+                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc), MO_8,
+                                inst_opc);
+                break;
+            case OPC_CSH:
+                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                MO_TEUW | ctx->default_tcg_memop_mask,
+                                inst_opc);
+                break;
+            case OPC_CSW:
+                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                MO_TEUL | ctx->default_tcg_memop_mask,
+                                inst_opc);
+                break;
+            case OPC_CSD:
+                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
+                                MO_TEQ | ctx->default_tcg_memop_mask, inst_opc);
+                break;
+            default:
+                MIPS_INVAL("cs");
+                generate_exception (ctx, EXCP_RI);
+                break;
+            }
+        }
+        break;
+    case OPC_CSTOREC:   /* Store Capability Register */
+        check_cop2x(ctx);
+        generate_csc(ctx, rs, rt, rd, imm & 0x7ff, false);
+        break;
+    case OPC_CSTOREC_LargeImm:   /* Store Capability Register with 16-bit immediate */
+        check_cop2x(ctx);
+        generate_csc(ctx, rs, rt, 0, imm & 0xffff, true);
+        break;
+#else /* ! TARGET_CHERI */
+    /* Compact branches [R6] and COP2 [non-R6] */
+    case OPC_BC: /* OPC_LWC2 */
+    case OPC_BALC: /* OPC_SWC2 */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_BC, OPC_BALC */
+            gen_compute_compact_branch(ctx, op, 0, 0,
+                                       sextract32(ctx->opcode << 2, 0, 28));
+        } else if (ctx->insn_flags & ASE_LEXT) {
+            gen_loongson_lswc2(ctx, rt, rs, rd);
+        } else {
+            /* OPC_LWC2, OPC_SWC2 */
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+        }
+        break;
+    case OPC_BEQZC: /* OPC_JIC, OPC_LDC2 */
+    case OPC_BNEZC: /* OPC_JIALC, OPC_SDC2 */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            if (rs != 0) {
+                /* OPC_BEQZC, OPC_BNEZC */
+                gen_compute_compact_branch(ctx, op, rs, 0,
+                                           sextract32(ctx->opcode << 2, 0, 23));
+            } else {
+                /* OPC_JIC, OPC_JIALC */
+                gen_compute_compact_branch(ctx, op, 0, rt, imm);
+            }
+        } else if (ctx->insn_flags & ASE_LEXT) {
+            gen_loongson_lsdc2(ctx, rt, rs, rd);
+        } else {
+            /* OPC_LWC2, OPC_SWC2 */
+            /* COP2: Not implemented. */
+            generate_exception_err(ctx, EXCP_CpU, 2);
+        }
+        break;
+#endif /* ! TARGET_CHERI */
+    case OPC_CP2:
+#if defined(TARGET_CHERI)
+#if defined(TARGET_MIPS64)
+        check_mips_64(ctx);
+#endif
+        gen_cp2(ctx, ctx->opcode, rt, rd, sa);
+        break;
+#else /* ! TARGET_CHERI */
+        check_insn(ctx, ASE_LMMI);
+        /* Note that these instructions use different fields.  */
+        gen_loongson_multimedia(ctx, sa, rd, rt);
+        break;
+#endif /* ! TARGET_CHERI */
+    case OPC_CP3:
+        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
+            check_cp1_enabled(ctx);
+            op1 = MASK_CP3(ctx->opcode);
+            switch (op1) {
+            case OPC_LUXC1:
+            case OPC_SUXC1:
+                check_insn(ctx, ISA_MIPS5 | ISA_MIPS_R2);
+                /* Fallthrough */
+            case OPC_LWXC1:
+            case OPC_LDXC1:
+            case OPC_SWXC1:
+            case OPC_SDXC1:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
+                gen_flt3_ldst(ctx, op1, sa, rd, rs, rt);
+                break;
+            case OPC_PREFX:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
+                /* Treat as NOP. */
+                break;
+            case OPC_ALNV_PS:
+                check_insn(ctx, ISA_MIPS5 | ISA_MIPS_R2);
+                /* Fallthrough */
+            case OPC_MADD_S:
+            case OPC_MADD_D:
+            case OPC_MADD_PS:
+            case OPC_MSUB_S:
+            case OPC_MSUB_D:
+            case OPC_MSUB_PS:
+            case OPC_NMADD_S:
+            case OPC_NMADD_D:
+            case OPC_NMADD_PS:
+            case OPC_NMSUB_S:
+            case OPC_NMSUB_D:
+            case OPC_NMSUB_PS:
+                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
+                gen_flt3_arith(ctx, op1, sa, rs, rd, rt);
+                break;
+            default:
+                MIPS_INVAL("cp3");
+                gen_reserved_instruction(ctx);
+                break;
+            }
+        } else {
+            generate_exception_err(ctx, EXCP_CpU, 1);
+        }
+        break;
+
+#if defined(TARGET_MIPS64)
+    /* MIPS64 opcodes */
+    case OPC_LLD:
+        if (ctx->insn_flags & INSN_R5900) {
+            check_insn_opc_user_only(ctx, INSN_R5900);
+        }
+        /* fall through */
+    case OPC_LDL:
+    case OPC_LDR:
+    case OPC_LWU:
+    case OPC_LD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_ld(ctx, op, rt, rs, imm);
+        break;
+    case OPC_SDL:
+    case OPC_SDR:
+    case OPC_SD:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_st(ctx, op, rt, rs, imm);
+        break;
+    case OPC_SCD:
+        check_insn(ctx, ISA_MIPS3);
+        if (ctx->insn_flags & INSN_R5900) {
+            check_insn_opc_user_only(ctx, INSN_R5900);
+        }
+        check_mips_64(ctx);
+        gen_st_cond(ctx, rt, rs, imm, MO_TEQ, false);
+        break;
+    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC, OPC_DADDI */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            /* OPC_BNVC, OPC_BNEZALC, OPC_BNEC */
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        } else {
+            /* OPC_DADDI */
+            check_insn(ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_arith_imm(ctx, op, rt, rs, imm);
+        }
+        break;
+    case OPC_DADDIU:
+        check_insn(ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_arith_imm(ctx, op, rt, rs, imm);
+        break;
+#else
+    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
+        } else {
+            MIPS_INVAL("major opcode");
+            gen_reserved_instruction(ctx);
+        }
+        break;
+#endif
+
+#if !defined(TARGET_CHERI) /* CHERI reuses these opcodes for experimental instrs */
+    case OPC_DAUI: /* OPC_JALX */
+        if (ctx->insn_flags & ISA_MIPS_R6) {
+#if defined(TARGET_MIPS64)
+            /* OPC_DAUI */
+            check_mips_64(ctx);
+            if (rs == 0) {
+                generate_exception(ctx, EXCP_RI);
+            } else if (rt != 0) {
+                TCGv t0 = tcg_temp_new();
+                gen_load_gpr(t0, rs);
+                tcg_gen_addi_tl(cpu_gpr[rt], t0, imm << 16);
+                gen_log_instr_gpr_update(ctx, rt);
+                tcg_temp_free(t0);
+            }
+#else
+            gen_reserved_instruction(ctx);
+            MIPS_INVAL("major opcode");
+#endif
+        } else {
+            /* OPC_JALX */
+            if (ctx->insn_flags & (ASE_MIPS16 | ASE_MICROMIPS)) {
+                offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
+                gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
+            } else {
+                gen_reserved_instruction(ctx);
+            }
+        }
+        break;
+    case OPC_MDMX:
+        /* MDMX: Not implemented. */
+        return false;
+#endif /* ! TAGET_CHERI */
+    case OPC_PCREL:
+        check_insn(ctx, ISA_MIPS_R6);
+        gen_pcrel(ctx, ctx->opcode, ctx->base.pc_next, rs);
+        break;
+    default:            /* Invalid */
+        MIPS_INVAL("major opcode");
+        return false;
+    }
+    return true;
+}
+
+static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
+{
+    /* make sure instructions are on a word boundary */
+    if (ctx->base.pc_next & 0x3) {
+        env->CP0_BadVAddr = ctx->base.pc_next;
+        generate_exception_err(ctx, EXCP_AdEL, EXCP_INST_NOTAVAIL);
+        return;
+    }
+
+    /* Handle blikely not taken case */
+    if ((ctx->hflags & MIPS_HFLAG_BMASK_BASE) == MIPS_HFLAG_BL) {
+        TCGLabel *l1 = gen_new_label();
+
+        tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
+        tcg_gen_movi_i32(hflags, ctx->hflags & ~MIPS_HFLAG_BMASK);
+        gen_goto_tb(ctx, 1, ctx->base.pc_next + 4);
+        gen_set_label(l1);
+    }
+
+    /* Transition to the auto-generated decoder.  */
+
+    /* Vendor specific extensions */
+    if (cpu_supports_isa(env, INSN_R5900) && decode_ext_txx9(ctx, ctx->opcode)) {
+        return;
+    }
+    if (cpu_supports_isa(env, INSN_VR54XX) && decode_ext_vr54xx(ctx, ctx->opcode)) {
+        return;
+    }
+
+    /* ISA extensions */
+    if (ase_msa_available(env) && decode_ase_msa(ctx, ctx->opcode)) {
+        return;
+    }
+
+    /* ISA (from latest to oldest) */
+    if (cpu_supports_isa(env, ISA_MIPS_R6) && decode_isa_rel6(ctx, ctx->opcode)) {
+        return;
+    }
+
+    if (decode_opc_legacy(env, ctx)) {
+        return;
+    }
+
+    gen_reserved_instruction(ctx);
+}
+
+static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    CPUMIPSState *env = cs->env_ptr;
+
+    ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
+    ctx->saved_pc = -1;
+    ctx->insn_flags = env->insn_flags;
+    ctx->CP0_Config0 = env->CP0_Config0;
+    ctx->CP0_Config1 = env->CP0_Config1;
+    ctx->CP0_Config2 = env->CP0_Config2;
+    ctx->CP0_Config3 = env->CP0_Config3;
+    ctx->CP0_Config5 = env->CP0_Config5;
+    ctx->btarget = 0;
+    ctx->kscrexist = (env->CP0_Config4 >> CP0C4_KScrExist) & 0xff;
+    ctx->rxi = (env->CP0_Config3 >> CP0C3_RXI) & 1;
+    ctx->ie = (env->CP0_Config4 >> CP0C4_IE) & 3;
+    ctx->bi = (env->CP0_Config3 >> CP0C3_BI) & 1;
+    ctx->bp = (env->CP0_Config3 >> CP0C3_BP) & 1;
+    ctx->PAMask = env->PAMask;
+    ctx->mvh = (env->CP0_Config5 >> CP0C5_MVH) & 1;
+    ctx->eva = (env->CP0_Config5 >> CP0C5_EVA) & 1;
+    ctx->sc = (env->CP0_Config3 >> CP0C3_SC) & 1;
+    ctx->CP0_LLAddr_shift = env->CP0_LLAddr_shift;
+    ctx->cmgcr = (env->CP0_Config3 >> CP0C3_CMGCR) & 1;
+    /* Restore delay slot state from the tb context.  */
+    ctx->hflags = (uint32_t)ctx->base.tb->flags; /* FIXME: maybe use 64 bits? */
+    ctx->ulri = (env->CP0_Config3 >> CP0C3_ULRI) & 1;
+    ctx->ps = ((env->active_fpu.fcr0 >> FCR0_PS) & 1) ||
+             (env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F));
+    ctx->vp = (env->CP0_Config5 >> CP0C5_VP) & 1;
+    ctx->mrp = (env->CP0_Config5 >> CP0C5_MRP) & 1;
+    ctx->nan2008 = (env->active_fpu.fcr31 >> FCR31_NAN2008) & 1;
+    ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1;
+    ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1;
+    ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3;
+    restore_cpu_state(env, ctx);
+#ifdef CONFIG_USER_ONLY
+        ctx->mem_idx = MIPS_HFLAG_UM;
+#else
+        ctx->mem_idx = hflags_mmu_index(ctx->hflags);
+#endif
+#if defined(CHERI_UNALIGNED)
+    ctx->default_tcg_memop_mask = MO_UNALN;
+#else
+    ctx->default_tcg_memop_mask = (ctx->insn_flags & (ISA_MIPS_R6 |
+                                  INSN_LOONGSON3A)) ? MO_UNALN : MO_ALIGN;
+#endif
+
+    /*
+     * Execute a branch and its delay slot as a single instruction.
+     * This is what GDB expects and is consistent with what the
+     * hardware does (e.g. if a delay slot instruction faults, the
+     * reported PC is the PC of the branch).
+     */
+    if (ctx->base.singlestep_enabled && (ctx->hflags & MIPS_HFLAG_BMASK)) {
+        ctx->base.max_insns = 2;
+    }
+
+    LOG_DISAS("\ntb %p idx %d hflags %04x\n", ctx->base.tb, ctx->mem_idx,
+              ctx->hflags);
+}
+
+static inline void mips_update_statcounters_icount(DisasContext *ctx)
+{
+    // FIXME: Don't do this for every executed instruction.
+#ifdef NOTYET
+    _debug_value(cpu_PC, "mips_update_icount");
+    if (ctx->icount_already_added == ctx->base.num_insns) {
+        qemu_log("%s: icount already updated: %d\n", __func__, ctx->base.num_insns);
+        return;
+    }
+    target_ulong diff = ctx->base.num_insns - ctx->icount_already_added;
+#else
+    target_ulong diff = 1;
+#endif
+    if (ctx->hflags & MIPS_HFLAG_UM) {
+        tcg_gen_addi_tl(cpu_statcounters_icount_user,
+                        cpu_statcounters_icount_user, diff);
+    } else {
+        tcg_gen_addi_tl(cpu_statcounters_icount_kernel,
+                        cpu_statcounters_icount_kernel, diff);
+    }
+#ifdef NOTYET
+    ctx->icount_already_added = ctx->base.num_insns;
+#endif
+}
+
+static void mips_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
+{
+}
+
+static void mips_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+    /* If QEMU was started with -bc option insert a check for breakcount */
+    if (unlikely(cs->breakcount)) {
+        gen_helper_check_breakcount(cpu_env);
+    }
+
+    tcg_gen_insn_start(ctx->base.pc_next, ctx->hflags & MIPS_HFLAG_BMASK,
+                       ctx->btarget);
+
+    /* Update the icount statcounter */
+    // TODO: it would be nice if we could do this at TB end but exceptions
+    // use longjmp to jump out of the tb so it is quite difficult.
+    mips_update_statcounters_icount(ctx); // Increment the current icount value
+}
+
+#ifdef CONFIG_TCG_LOG_INSTR
+static inline void gen_mips_log_instr_unsupported(
+    DisasContext *ctx, const char *what)
+{
+    if (unlikely(ctx->base.log_instr_enabled)) {
+        warn_report("%s instruction tracing is not implemented\n", what);
+        gen_helper_mips_log_instr_drop(cpu_env);
+    }
+}
+
+static inline void gen_mips_log_instr32(DisasContext *ctx)
+{
+    if (unlikely(ctx->base.log_instr_enabled)) {
+        TCGv pc = tcg_const_tl(ctx->base.pc_next);
+        TCGv_i32 opc = tcg_constant_i32(ctx->opcode);
+        gen_helper_mips_log_instr32(cpu_env, pc, opc);
+        tcg_temp_free(pc);
+    }
+}
+#else
+#define gen_mips_log_instr_unsupported(ctx, what) ((void)0)
+#define gen_mips_log_instr32(ctx) ((void)0)
+#endif
+
+static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    CPUMIPSState *env = cs->env_ptr;
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    int insn_bytes;
+    int is_slot;
+    // XXX: we don't support micromips, etc. so we can hardcode 4 bytes as the
+    // instruction size (see assert below).
+    gen_check_pcc_bounds_next_inst(ctx, 4);
+    is_slot = ctx->hflags & MIPS_HFLAG_BMASK;
+    if (ctx->insn_flags & ISA_NANOMIPS32) {
+#ifdef TARGET_CHERI
+        /* NanoMIPS not supported */
+        gen_reserved_instruction(ctx);
+        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
+        return;
+#else
+        gen_mips_log_instr_unsupported(ctx, "nanomips");
+        ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
+        insn_bytes = decode_isa_nanomips(env, ctx);
+#endif
+    } else if (!(ctx->hflags & MIPS_HFLAG_M16)) {
+        ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
+        insn_bytes = 4;
+        gen_mips_log_instr32(ctx);
+        decode_opc(env, ctx);
+    } else if (ctx->insn_flags & ASE_MICROMIPS) {
+#ifdef TARGET_CHERI
+        gen_reserved_instruction(ctx);
+        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
+        return;
+#else
+        gen_mips_log_instr_unsupported(ctx, "micromips");
+        ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
+        insn_bytes = decode_isa_micromips(env, ctx);
+#endif
+    } else if (ctx->insn_flags & ASE_MIPS16) {
+#ifdef TARGET_CHERI
+        gen_reserved_instruction(ctx);
+        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
+        return;
+#else
+        gen_mips_log_instr_unsupported(ctx, "mips16");
+        ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
+        insn_bytes = decode_ase_mips16e(env, ctx);
+#endif
+    } else {
+        gen_reserved_instruction(ctx);
+        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
+        return;
+    }
+#ifdef TARGET_CHERI
+    tcg_debug_assert(insn_bytes == 4 && "Compressed insns not supported");
+#endif
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        if (!(ctx->hflags & (MIPS_HFLAG_BDS16 | MIPS_HFLAG_BDS32 |
+                             MIPS_HFLAG_FBNSLOT))) {
+            /*
+             * Force to generate branch as there is neither delay nor
+             * forbidden slot.
+             */
+            is_slot = 1;
+        }
+        if ((ctx->hflags & MIPS_HFLAG_M16) &&
+            (ctx->hflags & MIPS_HFLAG_FBNSLOT)) {
+            /*
+             * Force to generate branch as microMIPS R6 doesn't restrict
+             * branches in the forbidden slot.
+             */
+            is_slot = 1;
+        }
+    }
+    if (is_slot) {
+        gen_branch(ctx, insn_bytes);
+    }
+    ctx->base.pc_next += insn_bytes;
+
+    if (ctx->base.is_jmp != DISAS_NEXT) {
+        return;
+    }
+
+    /*
+     * End the TB on (most) page crossings.
+     * See mips_tr_init_disas_context about single-stepping a branch
+     * together with its delay slot.
+     */
+    if (ctx->base.pc_next - ctx->page_start >= TARGET_PAGE_SIZE
+        && !ctx->base.singlestep_enabled) {
+        ctx->base.is_jmp = DISAS_TOO_MANY;
+    }
+}
+
+static void mips_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    // tcg_debug_assert(ctx->icount_already_added == dcbase->num_insns);
+
+    switch (ctx->base.is_jmp) {
+    case DISAS_STOP:
+        gen_save_pc(ctx->base.pc_next);
+        tcg_gen_lookup_and_goto_ptr();
+        break;
+    case DISAS_NEXT:
+    case DISAS_TOO_MANY:
+        save_cpu_state(ctx, 0);
+        gen_goto_tb(ctx, 0, ctx->base.pc_next);
+        break;
+    case DISAS_EXIT:
+        tcg_gen_exit_tb(NULL, 0);
+        break;
+    case DISAS_NORETURN:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void mips_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
+{
+    qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
+    log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
+}
+
+static const TranslatorOps mips_tr_ops = {
+    .init_disas_context = mips_tr_init_disas_context,
+    .tb_start           = mips_tr_tb_start,
+    .insn_start         = mips_tr_insn_start,
+    .translate_insn     = mips_tr_translate_insn,
+    .tb_stop            = mips_tr_tb_stop,
+    .disas_log          = mips_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext ctx;
+
+    translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns);
+}
+
+static void fpu_dump_state(CPUMIPSState *env, FILE * f, int flags)
+{
+    int i;
+    int is_fpu64 = !!(env->hflags & MIPS_HFLAG_F64);
+
+#define printfpr(fp)                                                    \
+    do {                                                                \
+        if (is_fpu64)                                                   \
+            qemu_fprintf(f, "w:%08x d:%016" PRIx64                      \
+                         " fd:%13g fs:%13g psu: %13g\n",                \
+                         (fp)->w[FP_ENDIAN_IDX], (fp)->d,               \
+                         (double)(fp)->fd,                              \
+                         (double)(fp)->fs[FP_ENDIAN_IDX],               \
+                         (double)(fp)->fs[!FP_ENDIAN_IDX]);             \
+        else {                                                          \
+            fpr_t tmp;                                                  \
+            tmp.w[FP_ENDIAN_IDX] = (fp)->w[FP_ENDIAN_IDX];              \
+            tmp.w[!FP_ENDIAN_IDX] = ((fp) + 1)->w[FP_ENDIAN_IDX];       \
+            qemu_fprintf(f, "w:%08x d:%016" PRIx64                      \
+                         " fd:%13g fs:%13g psu:%13g\n",                 \
+                         tmp.w[FP_ENDIAN_IDX], tmp.d,                   \
+                         (double)tmp.fd,                                \
+                         (double)tmp.fs[FP_ENDIAN_IDX],                 \
+                         (double)tmp.fs[!FP_ENDIAN_IDX]);               \
+        }                                                               \
+    } while (0)
+
+
+    qemu_fprintf(f,
+                 "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%02x\n",
+                 env->active_fpu.fcr0, env->active_fpu.fcr31, is_fpu64,
+                 get_float_exception_flags(&env->active_fpu.fp_status));
+    for (i = 0; i < 32; (is_fpu64) ? i++ : (i += 2)) {
+        qemu_fprintf(f, "%3s: ", fregnames[i]);
+        printfpr(&env->active_fpu.fpr[i]);
+    }
+
+#undef printfpr
+}
+
+void mips_tcg_init(void)
+{
+    int i;
+
+    cpu_gpr[0] = NULL;
+    for (i = 1; i < 32; i++)
+        cpu_gpr[i] = tcg_global_mem_new(cpu_env,
+                                        offsetof(CPUMIPSState,
+                                                 active_tc.gpr[i]),
+                                        regnames[i]);
+#if defined(TARGET_MIPS64)
+    cpu_gpr_hi[0] = NULL;
+
+    for (unsigned i = 1; i < 32; i++) {
+        g_autofree char *rname = g_strdup_printf("%s[hi]", regnames[i]);
+
+        cpu_gpr_hi[i] = tcg_global_mem_new_i64(cpu_env,
+                                               offsetof(CPUMIPSState,
+                                                        active_tc.gpr_hi[i]),
+                                               rname);
+    }
+#endif /* !TARGET_MIPS64 */
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
+
+        fpu_f64[i] = tcg_global_mem_new_i64(cpu_env, off, fregnames[i]);
+    }
+#ifdef TARGET_CHERI
+    cpu_PC = tcg_global_mem_new(cpu_env,
+                                offsetof(CPUMIPSState, active_tc.PCC._cr_cursor), "PC");
+    /// XXXAR: We currently interpose using DDC.cursor and not DDC.base!
+    ddc_interposition = tcg_global_mem_new(cpu_env,
+                                offsetof(CPUMIPSState, active_tc.CHWR.DDC._cr_cursor), "ddc_interpose");
+#else
+    msa_translate_init();
+    cpu_PC = tcg_global_mem_new(cpu_env,
+                                offsetof(CPUMIPSState, active_tc.PC), "PC");
+#endif
+#ifdef CONFIG_DEBUG_TCG
+    _pc_is_current = tcg_global_mem_new(
+        cpu_env, offsetof(CPUArchState, active_tc._pc_is_current),
+        "_pc_is_current");
+#endif
+    for (i = 0; i < MIPS_DSP_ACC; i++) {
+        cpu_HI[i] = tcg_global_mem_new(cpu_env,
+                                       offsetof(CPUMIPSState, active_tc.HI[i]),
+                                       regnames_HI[i]);
+        cpu_LO[i] = tcg_global_mem_new(cpu_env,
+                                       offsetof(CPUMIPSState, active_tc.LO[i]),
+                                       regnames_LO[i]);
+    }
+    cpu_dspctrl = tcg_global_mem_new(cpu_env,
+                                     offsetof(CPUMIPSState,
+                                              active_tc.DSPControl),
+                                     "DSPControl");
+    bcond = tcg_global_mem_new(cpu_env,
+                               offsetof(CPUMIPSState, bcond), "bcond");
+    btarget = tcg_global_mem_new(cpu_env,
+                                 offsetof(CPUMIPSState, btarget), "btarget");
+    hflags = tcg_global_mem_new_i32(cpu_env,
+                                    offsetof(CPUMIPSState, hflags), "hflags");
+
+    fpu_fcr0 = tcg_global_mem_new_i32(cpu_env,
+                                      offsetof(CPUMIPSState, active_fpu.fcr0),
+                                      "fcr0");
+    fpu_fcr31 = tcg_global_mem_new_i32(cpu_env,
+                                       offsetof(CPUMIPSState, active_fpu.fcr31),
+                                       "fcr31");
+    cpu_lladdr = tcg_global_mem_new(cpu_env, offsetof(CPUMIPSState, lladdr),
+                                    "lladdr");
+    cpu_llval = tcg_global_mem_new(cpu_env, offsetof(CPUMIPSState, llval),
+                                   "llval");
+    cpu_statcounters_icount_kernel = tcg_global_mem_new(
+        cpu_env, offsetof(CPUMIPSState, statcounters_icount_kernel),
+        "statcounters_icount_kernel");
+    cpu_statcounters_icount_user = tcg_global_mem_new(
+        cpu_env, offsetof(CPUMIPSState, statcounters_icount_user),
+        "statcounters_icount_user");
+
+    if (TARGET_LONG_BITS == 32) {
+        mxu_translate_init();
+    }
+}
+
+void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
+                          target_ulong *data)
+{
+    mips_update_pc(env, data[0], /*can_be_unrepresentable=*/false);
+    env->hflags &= ~MIPS_HFLAG_BMASK;
+    env->hflags |= data[1];
+    switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
+    case MIPS_HFLAG_BR:
+#ifdef TARGET_CHERI
+    case MIPS_HFLAG_BRCCALL:
+    case MIPS_HFLAG_BRC:
+#endif
+        break;
+    case MIPS_HFLAG_BC:
+    case MIPS_HFLAG_BL:
+    case MIPS_HFLAG_B:
+        env->btarget = data[2];
+        break;
+    }
+}
+
+void gen_cheri_break_loadlink(TCGv_cap_checked_ptr out_addr)
+{
+    tcg_gen_movi_tl(cpu_lladdr, 0);
+}
diff --git a/target/mips/translate.h b/target/mips/tcg/translate.h
similarity index 87%
rename from target/mips/translate.h
rename to target/mips/tcg/translate.h
index 026afe3a27e..e15a7dc581d 100644
--- a/target/mips/translate.h
+++ b/target/mips/tcg/translate.h
@@ -19,6 +19,7 @@ typedef struct DisasContext {
     target_ulong page_start;
     uint32_t opcode;
     uint64_t insn_flags;
+    int32_t CP0_Config0;
     int32_t CP0_Config1;
     int32_t CP0_Config2;
     int32_t CP0_Config3;
@@ -116,6 +117,18 @@ enum {
     OPC_BC1TANY4     = (0x01 << 16) | OPC_BC1ANY4,
 };
 
+#define gen_helper_0e1i(name, arg1, arg2) do { \
+    gen_helper_##name(cpu_env, arg1, tcg_constant_i32(arg2)); \
+    } while (0)
+
+#define gen_helper_1e0i(name, ret, arg1) do { \
+    gen_helper_##name(ret, cpu_env, tcg_constant_i32(arg1)); \
+    } while (0)
+
+#define gen_helper_0e2i(name, arg1, arg2, arg3) do { \
+    gen_helper_##name(cpu_env, arg1, arg2, tcg_constant_i32(arg3));\
+    } while (0)
+
 void generate_exception(DisasContext *ctx, int excp);
 void generate_exception_err(DisasContext *ctx, int excp, int err);
 void generate_exception_end(DisasContext *ctx, int excp);
@@ -123,7 +136,12 @@ void gen_reserved_instruction(DisasContext *ctx);
 
 void check_insn(DisasContext *ctx, uint64_t flags);
 void check_mips_64(DisasContext *ctx);
-void check_cp0_enabled(DisasContext *ctx);
+/**
+ * check_cp0_enabled:
+ * Return %true if CP0 is enabled, otherwise return %false
+ * and emit a 'coprocessor unusable' exception.
+ */
+bool check_cp0_enabled(DisasContext *ctx);
 void check_cp1_enabled(DisasContext *ctx);
 void check_cp1_64bitmode(DisasContext *ctx);
 void check_cp1_registers(DisasContext *ctx, int regs);
@@ -150,6 +168,11 @@ void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg);
 void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg);
 int get_fp_bit(int cc);
 
+void gen_ldxs(DisasContext *ctx, int base, int index, int rd);
+void gen_align(DisasContext *ctx, int wordsz, int rd, int rs, int rt, int bp);
+void gen_addiupc(DisasContext *ctx, int rx, int imm,
+                 int is_64_bit, int extended);
+
 /*
  * Address Computation and Large Constant Instructions
  */
@@ -270,5 +293,19 @@ bool decode_ext_txx9(DisasContext *ctx, uint32_t insn);
 #if defined(TARGET_MIPS64)
 bool decode_ext_tx79(DisasContext *ctx, uint32_t insn);
 #endif
+bool decode_ext_vr54xx(DisasContext *ctx, uint32_t insn);
+
+/*
+ * Helpers for implementing sets of trans_* functions.
+ * Defer the implementation of NAME to FUNC, with optional extra arguments.
+ */
+#define TRANS(NAME, FUNC, ...) \
+    static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
+    { return FUNC(ctx, a, __VA_ARGS__); }
+
+static inline bool cpu_is_bigendian(DisasContext *ctx)
+{
+    return extract32(ctx->CP0_Config0, CP0C0_BE, 1);
+}
 
 #endif
diff --git a/target/mips/translate_addr_const.c b/target/mips/tcg/translate_addr_const.c
similarity index 100%
rename from target/mips/translate_addr_const.c
rename to target/mips/tcg/translate_addr_const.c
diff --git a/target/mips/translate_cheri.c b/target/mips/tcg/translate_cheri.c
similarity index 99%
rename from target/mips/translate_cheri.c
rename to target/mips/tcg/translate_cheri.c
index d54941ec450..80acb162904 100644
--- a/target/mips/translate_cheri.c
+++ b/target/mips/tcg/translate_cheri.c
@@ -1033,7 +1033,6 @@ static void gen_mtc2(DisasContext *ctx, TCGv arg, int reg, int sel)
     LOG_DISAS("mtc2 %s (reg %d sel %d)\n", rn, reg, sel);
     /* For simplicity assume that all writes can cause interrupts.  */
     if (ctx->base.tb->cflags & CF_USE_ICOUNT) {
-        gen_io_end();
         ctx->base.is_jmp = DISAS_EXIT;
     }
     return;
@@ -1319,12 +1318,14 @@ static void gen_cp2 (DisasContext *ctx, uint32_t opc, int r16, int r11, int r6)
                 break;
             case OPC_CREADHWR_NI:   /* 0x0d << 6 */
                 check_cop2x(ctx);
-                gen_helper_2_consti32(creadhwr, r16, r11);
+                gen_helper_creadhwr(cpu_env, tcg_constant_i32(r16),
+                                    tcg_constant_i32(r11));
                 opn = "creadhwr";
                 break;
             case OPC_CWRITEHWR_NI:  /* 0x0e << 6 */
                 check_cop2x(ctx);
-                gen_helper_2_consti32(cwritehwr, r16, r11);
+                gen_helper_cwritehwr(cpu_env, tcg_constant_i32(r16),
+                                     tcg_constant_i32(r11));
                 if (r11 == CP2HWR_DDC) {
                     // When DDC changes we have to exit the current translation
                     // block since we cache DDC properties in the flags to
@@ -1372,7 +1373,8 @@ static void gen_cp2 (DisasContext *ctx, uint32_t opc, int r16, int r11, int r6)
                 break;
             case OPC_CSEALENTRY_NI:   /* 0x1d << 6 */
                 check_cop2x(ctx);
-                gen_helper_2_consti32(csealentry, r16, r11);
+                gen_helper_csealentry(cpu_env, tcg_constant_i32(r16),
+                                      tcg_constant_i32(r11));
                 opn = "csealentry";
                 break;
             case OPC_CLOADTAGS_NI:   /* 0x1e << 6 */
diff --git a/target/mips/tcg/tx79.decode b/target/mips/tcg/tx79.decode
new file mode 100644
index 00000000000..57d87a2076a
--- /dev/null
+++ b/target/mips/tcg/tx79.decode
@@ -0,0 +1,73 @@
+# Toshiba C790's instruction set
+#
+# Copyright (C) 2021  Philippe Mathieu-Daudé
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Toshiba Appendix B  C790-Specific Instruction Set Details
+
+###########################################################################
+# Named attribute sets.  These are used to make nice(er) names
+# when creating helpers common to those for the individual
+# instruction patterns.
+
+&r               rs rt rd sa
+
+&i               base rt offset
+
+###########################################################################
+# Named instruction formats.  These are generally used to
+# reduce the amount of duplication between instruction patterns.
+
+@rs_rt_rd       ...... rs:5  rt:5  rd:5  ..... ......   &r sa=0
+@rt_rd          ...... ..... rt:5  rd:5  ..... ......   &r sa=0 rs=0
+@rs             ...... rs:5  ..... ..........  ......   &r sa=0      rt=0 rd=0
+@rd             ...... ..........  rd:5  ..... ......   &r sa=0 rs=0 rt=0
+
+@ldst            ...... base:5 rt:5 offset:16           &i
+
+###########################################################################
+
+MFHI1           011100 0000000000  ..... 00000 010000   @rd
+MTHI1           011100 .....  0000000000 00000 010001   @rs
+MFLO1           011100 0000000000  ..... 00000 010010   @rd
+MTLO1           011100 .....  0000000000 00000 010011   @rs
+
+# MMI0
+
+PSUBW           011100 ..... ..... ..... 00001 001000   @rs_rt_rd
+PCGTW           011100 ..... ..... ..... 00010 001000   @rs_rt_rd
+PSUBH           011100 ..... ..... ..... 00101 001000   @rs_rt_rd
+PCGTH           011100 ..... ..... ..... 00110 001000   @rs_rt_rd
+PSUBB           011100 ..... ..... ..... 01001 001000   @rs_rt_rd
+PCGTB           011100 ..... ..... ..... 01010 001000   @rs_rt_rd
+PEXTLW          011100 ..... ..... ..... 10010 001000   @rs_rt_rd
+PPACW           011100 ..... ..... ..... 10011 001000   @rs_rt_rd
+PEXTLH          011100 ..... ..... ..... 10110 001000   @rs_rt_rd
+PEXTLB          011100 ..... ..... ..... 11010 001000   @rs_rt_rd
+
+# MMI1
+
+PCEQW           011100 ..... ..... ..... 00010 101000   @rs_rt_rd
+PCEQH           011100 ..... ..... ..... 00110 101000   @rs_rt_rd
+PCEQB           011100 ..... ..... ..... 01010 101000   @rs_rt_rd
+PEXTUW          011100 ..... ..... ..... 10010 101000   @rs_rt_rd
+
+# MMI2
+
+PCPYLD          011100 ..... ..... ..... 01110 001001   @rs_rt_rd
+PAND            011100 ..... ..... ..... 10010 001001   @rs_rt_rd
+PXOR            011100 ..... ..... ..... 10011 001001   @rs_rt_rd
+PROT3W          011100 00000 ..... ..... 11111 001001   @rt_rd
+
+# MMI3
+
+PCPYUD          011100 ..... ..... ..... 01110 101001   @rs_rt_rd
+POR             011100 ..... ..... ..... 10010 101001   @rs_rt_rd
+PNOR            011100 ..... ..... ..... 10011 101001   @rs_rt_rd
+PCPYH           011100 00000 ..... ..... 11011 101001   @rt_rd
+
+# SPECIAL
+
+LQ              011110 ..... ..... ................     @ldst
+SQ              011111 ..... ..... ................     @ldst
diff --git a/target/mips/tcg/tx79_translate.c b/target/mips/tcg/tx79_translate.c
new file mode 100644
index 00000000000..6d836233c2c
--- /dev/null
+++ b/target/mips/tcg/tx79_translate.c
@@ -0,0 +1,687 @@
+/*
+ * Toshiba TX79-specific instructions translation routines
+ *
+ *  Copyright (c) 2018 Fredrik Noring
+ *  Copyright (c) 2021 Philippe Mathieu-Daudé
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+
+/* Include the auto-generated decoder.  */
+#include "decode-tx79.c.inc"
+
+/*
+ *     Overview of the TX79-specific instruction set
+ *     =============================================
+ *
+ * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits
+ * are only used by the specific quadword (128-bit) LQ/SQ load/store
+ * instructions and certain multimedia instructions (MMIs). These MMIs
+ * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit
+ * or sixteen 8-bit paths.
+ *
+ * Reference:
+ *
+ * The Toshiba TX System RISC TX79 Core Architecture manual,
+ * https://wiki.qemu.org/File:C790.pdf
+ */
+
+bool decode_ext_tx79(DisasContext *ctx, uint32_t insn)
+{
+    if (TARGET_LONG_BITS == 64 && decode_tx79(ctx, insn)) {
+        return true;
+    }
+    return false;
+}
+
+/*
+ *     Three-Operand Multiply and Multiply-Add (4 instructions)
+ *     --------------------------------------------------------
+ * MADD    [rd,] rs, rt      Multiply/Add
+ * MADDU   [rd,] rs, rt      Multiply/Add Unsigned
+ * MULT    [rd,] rs, rt      Multiply (3-operand)
+ * MULTU   [rd,] rs, rt      Multiply Unsigned (3-operand)
+ */
+
+/*
+ *     Multiply Instructions for Pipeline 1 (10 instructions)
+ *     ------------------------------------------------------
+ * MULT1   [rd,] rs, rt      Multiply Pipeline 1
+ * MULTU1  [rd,] rs, rt      Multiply Unsigned Pipeline 1
+ * DIV1    rs, rt            Divide Pipeline 1
+ * DIVU1   rs, rt            Divide Unsigned Pipeline 1
+ * MADD1   [rd,] rs, rt      Multiply-Add Pipeline 1
+ * MADDU1  [rd,] rs, rt      Multiply-Add Unsigned Pipeline 1
+ * MFHI1   rd                Move From HI1 Register
+ * MFLO1   rd                Move From LO1 Register
+ * MTHI1   rs                Move To HI1 Register
+ * MTLO1   rs                Move To LO1 Register
+ */
+
+static bool trans_MFHI1(DisasContext *ctx, arg_r *a)
+{
+    gen_store_gpr(cpu_HI[1], a->rd);
+    gen_log_instr_gpr_update(ctx, a->rd);
+
+    return true;
+}
+
+static bool trans_MFLO1(DisasContext *ctx, arg_r *a)
+{
+    gen_store_gpr(cpu_LO[1], a->rd);
+    gen_log_instr_gpr_update(ctx, a->rd);
+
+    return true;
+}
+
+static bool trans_MTHI1(DisasContext *ctx, arg_r *a)
+{
+    gen_load_gpr(cpu_HI[1], a->rs);
+
+    return true;
+}
+
+static bool trans_MTLO1(DisasContext *ctx, arg_r *a)
+{
+    gen_load_gpr(cpu_LO[1], a->rs);
+
+    return true;
+}
+
+/*
+ *     Arithmetic (19 instructions)
+ *     ----------------------------
+ * PADDB   rd, rs, rt        Parallel Add Byte
+ * PSUBB   rd, rs, rt        Parallel Subtract Byte
+ * PADDH   rd, rs, rt        Parallel Add Halfword
+ * PSUBH   rd, rs, rt        Parallel Subtract Halfword
+ * PADDW   rd, rs, rt        Parallel Add Word
+ * PSUBW   rd, rs, rt        Parallel Subtract Word
+ * PADSBH  rd, rs, rt        Parallel Add/Subtract Halfword
+ * PADDSB  rd, rs, rt        Parallel Add with Signed Saturation Byte
+ * PSUBSB  rd, rs, rt        Parallel Subtract with Signed Saturation Byte
+ * PADDSH  rd, rs, rt        Parallel Add with Signed Saturation Halfword
+ * PSUBSH  rd, rs, rt        Parallel Subtract with Signed Saturation Halfword
+ * PADDSW  rd, rs, rt        Parallel Add with Signed Saturation Word
+ * PSUBSW  rd, rs, rt        Parallel Subtract with Signed Saturation Word
+ * PADDUB  rd, rs, rt        Parallel Add with Unsigned saturation Byte
+ * PSUBUB  rd, rs, rt        Parallel Subtract with Unsigned saturation Byte
+ * PADDUH  rd, rs, rt        Parallel Add with Unsigned saturation Halfword
+ * PSUBUH  rd, rs, rt        Parallel Subtract with Unsigned saturation Halfword
+ * PADDUW  rd, rs, rt        Parallel Add with Unsigned saturation Word
+ * PSUBUW  rd, rs, rt        Parallel Subtract with Unsigned saturation Word
+ */
+
+static bool trans_parallel_arith(DisasContext *ctx, arg_r *a,
+                                 void (*gen_logic_i64)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 ax, bx;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    ax = tcg_temp_new_i64();
+    bx = tcg_temp_new_i64();
+
+    /* Lower half */
+    gen_load_gpr(ax, a->rs);
+    gen_load_gpr(bx, a->rt);
+    gen_logic_i64(cpu_gpr[a->rd], ax, bx);
+
+    /* Upper half */
+    gen_load_gpr_hi(ax, a->rs);
+    gen_load_gpr_hi(bx, a->rt);
+    gen_logic_i64(cpu_gpr_hi[a->rd], ax, bx);
+
+    tcg_temp_free(bx);
+    tcg_temp_free(ax);
+
+    return true;
+}
+
+/* Parallel Subtract Byte */
+static bool trans_PSUBB(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_vec_sub8_i64);
+}
+
+/* Parallel Subtract Halfword */
+static bool trans_PSUBH(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_vec_sub16_i64);
+}
+
+/* Parallel Subtract Word */
+static bool trans_PSUBW(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_vec_sub32_i64);
+}
+
+/*
+ *     Min/Max (4 instructions)
+ *     ------------------------
+ * PMAXH   rd, rs, rt        Parallel Maximum Halfword
+ * PMINH   rd, rs, rt        Parallel Minimum Halfword
+ * PMAXW   rd, rs, rt        Parallel Maximum Word
+ * PMINW   rd, rs, rt        Parallel Minimum Word
+ */
+
+/*
+ *     Absolute (2 instructions)
+ *     -------------------------
+ * PABSH   rd, rt            Parallel Absolute Halfword
+ * PABSW   rd, rt            Parallel Absolute Word
+ */
+
+/*
+ *     Logical (4 instructions)
+ *     ------------------------
+ * PAND    rd, rs, rt        Parallel AND
+ * POR     rd, rs, rt        Parallel OR
+ * PXOR    rd, rs, rt        Parallel XOR
+ * PNOR    rd, rs, rt        Parallel NOR
+ */
+
+/* Parallel And */
+static bool trans_PAND(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_and_i64);
+}
+
+/* Parallel Or */
+static bool trans_POR(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_or_i64);
+}
+
+/* Parallel Exclusive Or */
+static bool trans_PXOR(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_xor_i64);
+}
+
+/* Parallel Not Or */
+static bool trans_PNOR(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_arith(ctx, a, tcg_gen_nor_i64);
+}
+
+/*
+ *     Shift (9 instructions)
+ *     ----------------------
+ * PSLLH   rd, rt, sa        Parallel Shift Left Logical Halfword
+ * PSRLH   rd, rt, sa        Parallel Shift Right Logical Halfword
+ * PSRAH   rd, rt, sa        Parallel Shift Right Arithmetic Halfword
+ * PSLLW   rd, rt, sa        Parallel Shift Left Logical Word
+ * PSRLW   rd, rt, sa        Parallel Shift Right Logical Word
+ * PSRAW   rd, rt, sa        Parallel Shift Right Arithmetic Word
+ * PSLLVW  rd, rt, rs        Parallel Shift Left Logical Variable Word
+ * PSRLVW  rd, rt, rs        Parallel Shift Right Logical Variable Word
+ * PSRAVW  rd, rt, rs        Parallel Shift Right Arithmetic Variable Word
+ */
+
+/*
+ *     Compare (6 instructions)
+ *     ------------------------
+ * PCGTB   rd, rs, rt        Parallel Compare for Greater Than Byte
+ * PCEQB   rd, rs, rt        Parallel Compare for Equal Byte
+ * PCGTH   rd, rs, rt        Parallel Compare for Greater Than Halfword
+ * PCEQH   rd, rs, rt        Parallel Compare for Equal Halfword
+ * PCGTW   rd, rs, rt        Parallel Compare for Greater Than Word
+ * PCEQW   rd, rs, rt        Parallel Compare for Equal Word
+ */
+
+static bool trans_parallel_compare(DisasContext *ctx, arg_r *a,
+                                   TCGCond cond, unsigned wlen)
+{
+    TCGv_i64 c0, c1, ax, bx, t0, t1, t2;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    c0 = tcg_const_tl(0);
+    c1 = tcg_const_tl(0xffffffff);
+    ax = tcg_temp_new_i64();
+    bx = tcg_temp_new_i64();
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+
+    /* Lower half */
+    gen_load_gpr(ax, a->rs);
+    gen_load_gpr(bx, a->rt);
+    for (int i = 0; i < (64 / wlen); i++) {
+        tcg_gen_sextract_i64(t0, ax, wlen * i, wlen);
+        tcg_gen_sextract_i64(t1, bx, wlen * i, wlen);
+        tcg_gen_movcond_i64(cond, t2, t1, t0, c1, c0);
+        tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rd], t2, wlen * i, wlen);
+    }
+    /* Upper half */
+    gen_load_gpr_hi(ax, a->rs);
+    gen_load_gpr_hi(bx, a->rt);
+    for (int i = 0; i < (64 / wlen); i++) {
+        tcg_gen_sextract_i64(t0, ax, wlen * i, wlen);
+        tcg_gen_sextract_i64(t1, bx, wlen * i, wlen);
+        tcg_gen_movcond_i64(cond, t2, t1, t0, c1, c0);
+        tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], t2, wlen * i, wlen);
+    }
+
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(bx);
+    tcg_temp_free(ax);
+    tcg_temp_free(c1);
+    tcg_temp_free(c0);
+
+    return true;
+}
+
+/* Parallel Compare for Greater Than Byte */
+static bool trans_PCGTB(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_GE, 8);
+}
+
+/* Parallel Compare for Equal Byte */
+static bool trans_PCEQB(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_EQ, 8);
+}
+
+/* Parallel Compare for Greater Than Halfword */
+static bool trans_PCGTH(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_GE, 16);
+}
+
+/* Parallel Compare for Equal Halfword */
+static bool trans_PCEQH(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_EQ, 16);
+}
+
+/* Parallel Compare for Greater Than Word */
+static bool trans_PCGTW(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_GE, 32);
+}
+
+/* Parallel Compare for Equal Word */
+static bool trans_PCEQW(DisasContext *ctx, arg_r *a)
+{
+    return trans_parallel_compare(ctx, a, TCG_COND_EQ, 32);
+}
+
+/*
+ *     LZC (1 instruction)
+ *     -------------------
+ * PLZCW   rd, rs            Parallel Leading Zero or One Count Word
+ */
+
+/*
+ *     Quadword Load and Store (2 instructions)
+ *     ----------------------------------------
+ * LQ      rt, offset(base)  Load Quadword
+ * SQ      rt, offset(base)  Store Quadword
+ */
+
+static bool trans_LQ(DisasContext *ctx, arg_i *a)
+{
+    TCGv_i64 t0;
+    TCGv addr;
+
+    if (a->rt == 0) {
+        /* nop */
+        return true;
+    }
+
+    t0 = tcg_temp_new_i64();
+    addr = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, addr, a->base, a->offset);
+    /*
+     * Clear least-significant four bits of the effective
+     * address, effectively creating an aligned address.
+     */
+    tcg_gen_andi_tl(addr, addr, ~0xf);
+
+    /* Lower half */
+    tcg_gen_qemu_ld_i64(t0, addr, ctx->mem_idx, MO_TEQ);
+    gen_store_gpr(t0, a->rt);
+
+    /* Upper half */
+    tcg_gen_addi_i64(addr, addr, 8);
+    tcg_gen_qemu_ld_i64(t0, addr, ctx->mem_idx, MO_TEQ);
+    gen_store_gpr_hi(t0, a->rt);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(addr);
+
+    return true;
+}
+
+static bool trans_SQ(DisasContext *ctx, arg_i *a)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv addr = tcg_temp_new();
+
+    gen_base_offset_addr(ctx, addr, a->base, a->offset);
+    /*
+     * Clear least-significant four bits of the effective
+     * address, effectively creating an aligned address.
+     */
+    tcg_gen_andi_tl(addr, addr, ~0xf);
+
+    /* Lower half */
+    gen_load_gpr(t0, a->rt);
+    tcg_gen_qemu_st_i64(t0, addr, ctx->mem_idx, MO_TEQ);
+
+    /* Upper half */
+    tcg_gen_addi_i64(addr, addr, 8);
+    gen_load_gpr_hi(t0, a->rt);
+    tcg_gen_qemu_st_i64(t0, addr, ctx->mem_idx, MO_TEQ);
+
+    tcg_temp_free(addr);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+/*
+ *     Multiply and Divide (19 instructions)
+ *     -------------------------------------
+ * PMULTW  rd, rs, rt        Parallel Multiply Word
+ * PMULTUW rd, rs, rt        Parallel Multiply Unsigned Word
+ * PDIVW   rs, rt            Parallel Divide Word
+ * PDIVUW  rs, rt            Parallel Divide Unsigned Word
+ * PMADDW  rd, rs, rt        Parallel Multiply-Add Word
+ * PMADDUW rd, rs, rt        Parallel Multiply-Add Unsigned Word
+ * PMSUBW  rd, rs, rt        Parallel Multiply-Subtract Word
+ * PMULTH  rd, rs, rt        Parallel Multiply Halfword
+ * PMADDH  rd, rs, rt        Parallel Multiply-Add Halfword
+ * PMSUBH  rd, rs, rt        Parallel Multiply-Subtract Halfword
+ * PHMADH  rd, rs, rt        Parallel Horizontal Multiply-Add Halfword
+ * PHMSBH  rd, rs, rt        Parallel Horizontal Multiply-Subtract Halfword
+ * PDIVBW  rs, rt            Parallel Divide Broadcast Word
+ * PMFHI   rd                Parallel Move From HI Register
+ * PMFLO   rd                Parallel Move From LO Register
+ * PMTHI   rs                Parallel Move To HI Register
+ * PMTLO   rs                Parallel Move To LO Register
+ * PMFHL   rd                Parallel Move From HI/LO Register
+ * PMTHL   rs                Parallel Move To HI/LO Register
+ */
+
+/*
+ *     Pack/Extend (11 instructions)
+ *     -----------------------------
+ * PPAC5   rd, rt            Parallel Pack to 5 bits
+ * PPACB   rd, rs, rt        Parallel Pack to Byte
+ * PPACH   rd, rs, rt        Parallel Pack to Halfword
+ * PPACW   rd, rs, rt        Parallel Pack to Word
+ * PEXT5   rd, rt            Parallel Extend Upper from 5 bits
+ * PEXTUB  rd, rs, rt        Parallel Extend Upper from Byte
+ * PEXTLB  rd, rs, rt        Parallel Extend Lower from Byte
+ * PEXTUH  rd, rs, rt        Parallel Extend Upper from Halfword
+ * PEXTLH  rd, rs, rt        Parallel Extend Lower from Halfword
+ * PEXTUW  rd, rs, rt        Parallel Extend Upper from Word
+ * PEXTLW  rd, rs, rt        Parallel Extend Lower from Word
+ */
+
+/* Parallel Pack to Word */
+static bool trans_PPACW(DisasContext *ctx, arg_r *a)
+{
+    TCGv_i64 a0, b0, t0;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    a0 = tcg_temp_new_i64();
+    b0 = tcg_temp_new_i64();
+    t0 = tcg_temp_new_i64();
+
+    gen_load_gpr(a0, a->rs);
+    gen_load_gpr(b0, a->rt);
+
+    gen_load_gpr_hi(t0, a->rt); /* b1 */
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], b0, t0, 32, 32);
+
+    gen_load_gpr_hi(t0, a->rs); /* a1 */
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], a0, t0, 32, 32);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(b0);
+    tcg_temp_free(a0);
+
+    return true;
+}
+
+static void gen_pextw(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_deposit_i64(dl, b, a, 32, 32);
+    tcg_gen_shri_i64(b, b, 32);
+    tcg_gen_deposit_i64(dh, a, b, 0, 32);
+}
+
+static bool trans_PEXTLx(DisasContext *ctx, arg_r *a, unsigned wlen)
+{
+    TCGv_i64 ax, bx;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    ax = tcg_temp_new_i64();
+    bx = tcg_temp_new_i64();
+
+    gen_load_gpr(ax, a->rs);
+    gen_load_gpr(bx, a->rt);
+
+    /* Lower half */
+    for (int i = 0; i < 64 / (2 * wlen); i++) {
+        tcg_gen_deposit_i64(cpu_gpr[a->rd],
+                            cpu_gpr[a->rd], bx, 2 * wlen * i, wlen);
+        tcg_gen_deposit_i64(cpu_gpr[a->rd],
+                            cpu_gpr[a->rd], ax, 2 * wlen * i + wlen, wlen);
+        tcg_gen_shri_i64(bx, bx, wlen);
+        tcg_gen_shri_i64(ax, ax, wlen);
+    }
+    /* Upper half */
+    for (int i = 0; i < 64 / (2 * wlen); i++) {
+        tcg_gen_deposit_i64(cpu_gpr_hi[a->rd],
+                            cpu_gpr_hi[a->rd], bx, 2 * wlen * i, wlen);
+        tcg_gen_deposit_i64(cpu_gpr_hi[a->rd],
+                            cpu_gpr_hi[a->rd], ax, 2 * wlen * i + wlen, wlen);
+        tcg_gen_shri_i64(bx, bx, wlen);
+        tcg_gen_shri_i64(ax, ax, wlen);
+    }
+
+    tcg_temp_free(bx);
+    tcg_temp_free(ax);
+
+    return true;
+}
+
+/* Parallel Extend Lower from Byte */
+static bool trans_PEXTLB(DisasContext *ctx, arg_r *a)
+{
+    return trans_PEXTLx(ctx, a, 8);
+}
+
+/* Parallel Extend Lower from Halfword */
+static bool trans_PEXTLH(DisasContext *ctx, arg_r *a)
+{
+    return trans_PEXTLx(ctx, a, 16);
+}
+
+/* Parallel Extend Lower from Word */
+static bool trans_PEXTLW(DisasContext *ctx, arg_r *a)
+{
+    TCGv_i64 ax, bx;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    ax = tcg_temp_new_i64();
+    bx = tcg_temp_new_i64();
+
+    gen_load_gpr(ax, a->rs);
+    gen_load_gpr(bx, a->rt);
+    gen_pextw(cpu_gpr[a->rd], cpu_gpr_hi[a->rd], ax, bx);
+
+    tcg_temp_free(bx);
+    tcg_temp_free(ax);
+
+    return true;
+}
+
+/* Parallel Extend Upper from Word */
+static bool trans_PEXTUW(DisasContext *ctx, arg_r *a)
+{
+    TCGv_i64 ax, bx;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    ax = tcg_temp_new_i64();
+    bx = tcg_temp_new_i64();
+
+    gen_load_gpr_hi(ax, a->rs);
+    gen_load_gpr_hi(bx, a->rt);
+    gen_pextw(cpu_gpr[a->rd], cpu_gpr_hi[a->rd], ax, bx);
+
+    tcg_temp_free(bx);
+    tcg_temp_free(ax);
+
+    return true;
+}
+
+/*
+ *     Others (16 instructions)
+ *     ------------------------
+ * PCPYH   rd, rt            Parallel Copy Halfword
+ * PCPYLD  rd, rs, rt        Parallel Copy Lower Doubleword
+ * PCPYUD  rd, rs, rt        Parallel Copy Upper Doubleword
+ * PREVH   rd, rt            Parallel Reverse Halfword
+ * PINTH   rd, rs, rt        Parallel Interleave Halfword
+ * PINTEH  rd, rs, rt        Parallel Interleave Even Halfword
+ * PEXEH   rd, rt            Parallel Exchange Even Halfword
+ * PEXCH   rd, rt            Parallel Exchange Center Halfword
+ * PEXEW   rd, rt            Parallel Exchange Even Word
+ * PEXCW   rd, rt            Parallel Exchange Center Word
+ * QFSRV   rd, rs, rt        Quadword Funnel Shift Right Variable
+ * MFSA    rd                Move from Shift Amount Register
+ * MTSA    rs                Move to Shift Amount Register
+ * MTSAB   rs, immediate     Move Byte Count to Shift Amount Register
+ * MTSAH   rs, immediate     Move Halfword Count to Shift Amount Register
+ * PROT3W  rd, rt            Parallel Rotate 3 Words
+ */
+
+/* Parallel Copy Halfword */
+static bool trans_PCPYH(DisasContext *s, arg_r *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+        return true;
+    }
+
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rt], cpu_gpr[a->rt], 16, 16);
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rd], cpu_gpr[a->rd], 32, 32);
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt], cpu_gpr_hi[a->rt], 16, 16);
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], 32, 32);
+
+    return true;
+}
+
+/* Parallel Copy Lower Doubleword */
+static bool trans_PCPYLD(DisasContext *s, arg_r *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    if (a->rs == 0) {
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+    } else {
+        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr[a->rs]);
+    }
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
+    } else if (a->rd != a->rt) {
+        tcg_gen_mov_i64(cpu_gpr[a->rd], cpu_gpr[a->rt]);
+    }
+
+    return true;
+}
+
+/* Parallel Copy Upper Doubleword */
+static bool trans_PCPYUD(DisasContext *s, arg_r *a)
+{
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+
+    gen_load_gpr_hi(cpu_gpr[a->rd], a->rs);
+
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+    } else if (a->rd != a->rt) {
+        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt]);
+    }
+
+    return true;
+}
+
+/* Parallel Rotate 3 Words Left */
+static bool trans_PROT3W(DisasContext *ctx, arg_r *a)
+{
+    TCGv_i64 ax;
+
+    if (a->rd == 0) {
+        /* nop */
+        return true;
+    }
+    if (a->rt == 0) {
+        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
+        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
+        return true;
+    }
+
+    ax = tcg_temp_new_i64();
+
+    tcg_gen_mov_i64(ax, cpu_gpr_hi[a->rt]);
+    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], ax, cpu_gpr[a->rt], 0, 32);
+
+    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rt], ax, 0, 32);
+    tcg_gen_rotri_i64(cpu_gpr[a->rd], cpu_gpr[a->rd], 32);
+
+    tcg_temp_free(ax);
+
+    return true;
+}
diff --git a/target/mips/txx9_translate.c b/target/mips/tcg/txx9_translate.c
similarity index 86%
rename from target/mips/txx9_translate.c
rename to target/mips/tcg/txx9_translate.c
index 8a2c0b766bd..1c0e45eeb1b 100644
--- a/target/mips/txx9_translate.c
+++ b/target/mips/tcg/txx9_translate.c
@@ -11,7 +11,7 @@
 
 bool decode_ext_txx9(DisasContext *ctx, uint32_t insn)
 {
-#if defined(TARGET_MIPS64)
+#if defined(TARGET_MIPS64) && !defined(TARGET_CHERI)
     if (decode_ext_tx79(ctx, insn)) {
         return true;
     }
diff --git a/target/mips/tcg/vr54xx.decode b/target/mips/tcg/vr54xx.decode
new file mode 100644
index 00000000000..4fc708d80ae
--- /dev/null
+++ b/target/mips/tcg/vr54xx.decode
@@ -0,0 +1,27 @@
+# MIPS VR5432 instruction set extensions
+#
+# Copyright (C) 2021  Philippe Mathieu-Daudé
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Reference: VR5432 Microprocessor User’s Manual
+#            (Document Number U13751EU5V0UM00)
+
+&r              rs rt rd
+
+@rs_rt_rd       ...... rs:5  rt:5  rd:5  ..... ......   &r
+
+MULS            000000 ..... ..... ..... 00011011000    @rs_rt_rd
+MULSU           000000 ..... ..... ..... 00011011001    @rs_rt_rd
+MACC            000000 ..... ..... ..... 00101011000    @rs_rt_rd
+MACCU           000000 ..... ..... ..... 00101011001    @rs_rt_rd
+MSAC            000000 ..... ..... ..... 00111011000    @rs_rt_rd
+MSACU           000000 ..... ..... ..... 00111011001    @rs_rt_rd
+MULHI           000000 ..... ..... ..... 01001011000    @rs_rt_rd
+MULHIU          000000 ..... ..... ..... 01001011001    @rs_rt_rd
+MULSHI          000000 ..... ..... ..... 01011011000    @rs_rt_rd
+MULSHIU         000000 ..... ..... ..... 01011011001    @rs_rt_rd
+MACCHI          000000 ..... ..... ..... 01101011000    @rs_rt_rd
+MACCHIU         000000 ..... ..... ..... 01101011001    @rs_rt_rd
+MSACHI          000000 ..... ..... ..... 01111011000    @rs_rt_rd
+MSACHIU         000000 ..... ..... ..... 01111011001    @rs_rt_rd
diff --git a/target/mips/tcg/vr54xx_helper.c b/target/mips/tcg/vr54xx_helper.c
new file mode 100644
index 00000000000..2255bd11163
--- /dev/null
+++ b/target/mips/tcg/vr54xx_helper.c
@@ -0,0 +1,142 @@
+/*
+ *  MIPS VR5432 emulation helpers
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+
+/* 64 bits arithmetic for 32 bits hosts */
+static inline uint64_t get_HILO(CPUMIPSState *env)
+{
+    return ((uint64_t)(env->active_tc.HI[0]) << 32) |
+           (uint32_t)env->active_tc.LO[0];
+}
+
+static inline target_ulong set_HIT0_LO(CPUMIPSState *env, uint64_t HILO)
+{
+    env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
+    return env->active_tc.HI[0] = (int32_t)(HILO >> 32);
+}
+
+static inline target_ulong set_HI_LOT0(CPUMIPSState *env, uint64_t HILO)
+{
+    target_ulong tmp = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
+    env->active_tc.HI[0] = (int32_t)(HILO >> 32);
+    return tmp;
+}
+
+/* Multiplication variants of the vr54xx. */
+target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
+{
+    return set_HI_LOT0(env, 0 - ((int64_t)(int32_t)arg1 *
+                                 (int64_t)(int32_t)arg2));
+}
+
+target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
+{
+    return set_HI_LOT0(env, 0 - (uint64_t)(uint32_t)arg1 *
+                                (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
+{
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                                                     (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
+{
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
+{
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) + (uint64_t)(uint32_t)arg1 *
+                                                      (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
+{
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) + (uint64_t)(uint32_t)arg1 *
+                                                      (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
+{
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                                                     (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
+{
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                                                     (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
+{
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) - (uint64_t)(uint32_t)arg1 *
+                                                      (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
+{
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) - (uint64_t)(uint32_t)arg1 *
+                                                      (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
+{
+    return set_HIT0_LO(env, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
+{
+    return set_HIT0_LO(env, (uint64_t)(uint32_t)arg1 *
+                            (uint64_t)(uint32_t)arg2);
+}
+
+target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
+{
+    return set_HIT0_LO(env, 0 - (int64_t)(int32_t)arg1 *
+                                (int64_t)(int32_t)arg2);
+}
+
+target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
+{
+    return set_HIT0_LO(env, 0 - (uint64_t)(uint32_t)arg1 *
+                                (uint64_t)(uint32_t)arg2);
+}
diff --git a/target/mips/tcg/vr54xx_helper.h.inc b/target/mips/tcg/vr54xx_helper.h.inc
new file mode 100644
index 00000000000..50b1f5b818d
--- /dev/null
+++ b/target/mips/tcg/vr54xx_helper.h.inc
@@ -0,0 +1,24 @@
+/*
+ * MIPS NEC Vr54xx instruction emulation helpers for QEMU.
+ *
+ *  Copyright (c) 2004-2005 Jocelyn Mayer
+ *  Copyright (c) 2006 Marius Groeger (FPU operations)
+ *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+DEF_HELPER_3(muls, tl, env, tl, tl)
+DEF_HELPER_3(mulsu, tl, env, tl, tl)
+DEF_HELPER_3(macc, tl, env, tl, tl)
+DEF_HELPER_3(maccu, tl, env, tl, tl)
+DEF_HELPER_3(msac, tl, env, tl, tl)
+DEF_HELPER_3(msacu, tl, env, tl, tl)
+DEF_HELPER_3(mulhi, tl, env, tl, tl)
+DEF_HELPER_3(mulhiu, tl, env, tl, tl)
+DEF_HELPER_3(mulshi, tl, env, tl, tl)
+DEF_HELPER_3(mulshiu, tl, env, tl, tl)
+DEF_HELPER_3(macchi, tl, env, tl, tl)
+DEF_HELPER_3(macchiu, tl, env, tl, tl)
+DEF_HELPER_3(msachi, tl, env, tl, tl)
+DEF_HELPER_3(msachiu, tl, env, tl, tl)
diff --git a/target/mips/tcg/vr54xx_translate.c b/target/mips/tcg/vr54xx_translate.c
new file mode 100644
index 00000000000..3e2c98f2c6a
--- /dev/null
+++ b/target/mips/tcg/vr54xx_translate.c
@@ -0,0 +1,72 @@
+/*
+ * VR5432 extensions translation routines
+ *
+ * Reference: VR5432 Microprocessor User’s Manual
+ *            (Document Number U13751EU5V0UM00)
+ *
+ *  Copyright (c) 2021 Philippe Mathieu-Daudé
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg-op.h"
+#include "exec/helper-gen.h"
+#include "translate.h"
+#include "internal.h"
+
+/* Include the auto-generated decoder. */
+#include "decode-vr54xx.c.inc"
+
+/*
+ * Integer Multiply-Accumulate Instructions
+ *
+ * MACC         Multiply, accumulate, and move LO
+ * MACCHI       Multiply, accumulate, and move HI
+ * MACCHIU      Unsigned multiply, accumulate, and move HI
+ * MACCU        Unsigned multiply, accumulate, and move LO
+ * MSAC         Multiply, negate, accumulate, and move LO
+ * MSACHI       Multiply, negate, accumulate, and move HI
+ * MSACHIU      Unsigned multiply, negate, accumulate, and move HI
+ * MSACU        Unsigned multiply, negate, accumulate, and move LO
+ * MULHI        Multiply and move HI
+ * MULHIU       Unsigned multiply and move HI
+ * MULS         Multiply, negate, and move LO
+ * MULSHI       Multiply, negate, and move HI
+ * MULSHIU      Unsigned multiply, negate, and move HI
+ * MULSU        Unsigned multiply, negate, and move LO
+ */
+
+static bool trans_mult_acc(DisasContext *ctx, arg_r *a,
+                           void (*gen_helper_mult_acc)(TCGv, TCGv_ptr, TCGv, TCGv))
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, a->rs);
+    gen_load_gpr(t1, a->rt);
+
+    gen_helper_mult_acc(t0, cpu_env, t0, t1);
+
+    gen_store_gpr(t0, a->rd);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+
+    return false;
+}
+
+TRANS(MACC,     trans_mult_acc, gen_helper_macc);
+TRANS(MACCHI,   trans_mult_acc, gen_helper_macchi);
+TRANS(MACCHIU,  trans_mult_acc, gen_helper_macchiu);
+TRANS(MACCU,    trans_mult_acc, gen_helper_maccu);
+TRANS(MSAC,     trans_mult_acc, gen_helper_msac);
+TRANS(MSACHI,   trans_mult_acc, gen_helper_msachi);
+TRANS(MSACHIU,  trans_mult_acc, gen_helper_msachiu);
+TRANS(MSACU,    trans_mult_acc, gen_helper_msacu);
+TRANS(MULHI,    trans_mult_acc, gen_helper_mulhi);
+TRANS(MULHIU,   trans_mult_acc, gen_helper_mulhiu);
+TRANS(MULS,     trans_mult_acc, gen_helper_muls);
+TRANS(MULSHI,   trans_mult_acc, gen_helper_mulshi);
+TRANS(MULSHIU,  trans_mult_acc, gen_helper_mulshiu);
+TRANS(MULSU,    trans_mult_acc, gen_helper_mulsu);
diff --git a/target/mips/tlb_helper.c b/target/mips/tlb_helper.c
deleted file mode 100644
index 37cfda2c38f..00000000000
--- a/target/mips/tlb_helper.c
+++ /dev/null
@@ -1,1584 +0,0 @@
-/*
- * MIPS TLB (Translation lookaside buffer) helpers.
- *
- *  Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "internal.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/log.h"
-#include "exec/log_instr.h"
-#include "qemu/atomic.h"
-#include "qemu/error-report.h"
-#include "hw/mips/cpudevs.h"
-
-#ifdef TARGET_CHERI
-#include "cheri-helper-utils.h"
-#endif
-
-enum {
-#ifdef TARGET_CHERI
-    TLBRET_S = -5,
-#else
-    TLBRET_XI = -6,
-    TLBRET_RI = -5,
-#endif /* TARGET_CHERI */
-    TLBRET_DIRTY = -4,
-    TLBRET_INVALID = -3,
-    TLBRET_NOMATCH = -2,
-    TLBRET_BADADDR = -1,
-    TLBRET_MATCH = 0
-};
-
-#if !defined(CONFIG_USER_ONLY)
-
-/* no MMU emulation */
-int no_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                       target_ulong address, MMUAccessType access_type)
-{
-    *physical = address;
-    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-    return TLBRET_MATCH;
-}
-
-/* fixed mapping MMU emulation */
-int fixed_mmu_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                          target_ulong address, MMUAccessType access_type)
-{
-    if (address <= (int32_t)0x7FFFFFFFUL) {
-        if (!(env->CP0_Status & (1 << CP0St_ERL))) {
-            *physical = address + 0x40000000UL;
-        } else {
-            *physical = address;
-        }
-    } else if (address <= (int32_t)0xBFFFFFFFUL) {
-        *physical = address & 0x1FFFFFFF;
-    } else {
-        *physical = address;
-    }
-
-    *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-    return TLBRET_MATCH;
-}
-
-/* MIPS32/MIPS64 R4000-style MMU emulation */
-int r4k_map_address(CPUMIPSState *env, hwaddr *physical, int *prot,
-                    target_ulong address, MMUAccessType access_type)
-{
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    uint32_t tlb_mmid;
-    int i;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-
-#if defined(TARGET_CHERI)
-    unsigned gclg_bit;
-    if (address < 0x4000000000000000) {
-        /* useg, xuseg */
-        gclg_bit = CP0EnHi_CLGU;
-    } else if (address < 0x8000000000000000) {
-        /* xsseg */
-        gclg_bit = CP0EnHi_CLGS;
-    } else if (address < 0xFFFFFFFFC0000000) {
-        /* xkphys (won't be called), xkseg, kseg0, kseg1 */
-        gclg_bit = CP0EnHi_CLGK;
-    } else if (address < 0xFFFFFFFFE0000000) {
-        /* sseg */
-        gclg_bit = CP0EnHi_CLGS;
-    } else {
-        /* kseg3 */
-        gclg_bit = CP0EnHi_CLGK;
-    }
-    bool gclg = !!(env->CP0_EntryHi & (1UL << gclg_bit));
-#endif
-
-    for (i = 0; i < env->tlb->tlb_in_use; i++) {
-        r4k_tlb_t *tlb = &env->tlb->mmu.r4k.tlb[i];
-        /* 1k pages are not supported. */
-        target_ulong mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-        target_ulong tag = address & ~mask;
-        target_ulong VPN = tlb->VPN & ~mask;
-#if defined(TARGET_MIPS64)
-        tag &= env->SEGMask;
-#endif
-
-        /* Check ASID/MMID, virtual page number & size */
-        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
-            /* TLB match */
-            int n = !!(address & mask & ~(mask >> 1));
-            /* Check access rights */
-            if (!(n ? tlb->V1 : tlb->V0)) {
-                return TLBRET_INVALID;
-            }
-#if defined(TARGET_CHERI)
-            if (access_type == MMU_DATA_CAP_STORE) {
-                /*
-                 * If we're trying to do a cap-store, first check for the
-                 * dirty/store-permitted bit before looking at the the
-                 * store-capability inhibit.
-                 */
-                if (!(n ? tlb->D1 : tlb->D0)) {
-                    return TLBRET_DIRTY;
-                }
-                if (n ? tlb->S1 : tlb->S0) {
-                    return TLBRET_S;
-                }
-            }
-
-            if (n ? tlb->S1 : tlb->S0) {
-                *prot |= PAGE_SC_TRAP;
-            }
-#else
-            if (access_type == MMU_INST_FETCH && (n ? tlb->XI1 : tlb->XI0)) {
-                return TLBRET_XI;
-            }
-            if (access_type == MMU_DATA_LOAD && (n ? tlb->RI1 : tlb->RI0)) {
-                return TLBRET_RI;
-            }
-#endif /* TARGET_CHERI */
-            if (((access_type != MMU_DATA_STORE)
-#if defined(TARGET_CHERI)
-                  && (access_type != MMU_DATA_CAP_STORE)
-#endif
-                ) || (n ? tlb->D1 : tlb->D0)) {
-
-                *physical = tlb->PFN[n] | (address & (mask >> 1));
-                *prot = PAGE_READ;
-                if (n ? tlb->D1 : tlb->D0) {
-                    *prot |= PAGE_WRITE;
-                }
-#if !defined(TARGET_CHERI)
-                if (!(n ? tlb->XI1 : tlb->XI0)) {
-#else
-                if (true) {
-#endif
-                    *prot |= PAGE_EXEC;
-                }
-
-#if defined(TARGET_CHERI)
-                if (n ? tlb->L1 : tlb->L0) {
-                    *prot |= PAGE_LC_CLEAR;
-                }
-                bool pclg = n ? tlb->CLG1 : tlb->CLG0;
-                if (pclg != gclg) {
-                    *prot |= PAGE_LC_TRAP;
-                }
-#endif
-
-                return TLBRET_MATCH;
-            }
-            return TLBRET_DIRTY;
-        }
-    }
-    return TLBRET_NOMATCH;
-}
-
-static void no_mmu_init(CPUMIPSState *env, const mips_def_t *def)
-{
-    env->tlb->nb_tlb = 1;
-    env->tlb->map_address = &no_mmu_map_address;
-}
-
-static void fixed_mmu_init(CPUMIPSState *env, const mips_def_t *def)
-{
-    env->tlb->nb_tlb = 1;
-    env->tlb->map_address = &fixed_mmu_map_address;
-}
-
-static void r4k_mmu_init(CPUMIPSState *env, const mips_def_t *def)
-{
-    env->tlb->nb_tlb = 1 + ((def->CP0_Config1 >> CP0C1_MMU) & 63);
-    env->tlb->map_address = &r4k_map_address;
-    env->tlb->helper_tlbwi = r4k_helper_tlbwi;
-    env->tlb->helper_tlbwr = r4k_helper_tlbwr;
-    env->tlb->helper_tlbp = r4k_helper_tlbp;
-    env->tlb->helper_tlbr = r4k_helper_tlbr;
-    env->tlb->helper_tlbinv = r4k_helper_tlbinv;
-    env->tlb->helper_tlbinvf = r4k_helper_tlbinvf;
-}
-
-void mmu_init(CPUMIPSState *env, const mips_def_t *def)
-{
-    env->tlb = g_malloc0(sizeof(CPUMIPSTLBContext));
-
-    switch (def->mmu_type) {
-    case MMU_TYPE_NONE:
-        no_mmu_init(env, def);
-        break;
-    case MMU_TYPE_R4000:
-        r4k_mmu_init(env, def);
-        break;
-    case MMU_TYPE_FMT:
-        fixed_mmu_init(env, def);
-        break;
-    case MMU_TYPE_R3000:
-    case MMU_TYPE_R6000:
-    case MMU_TYPE_R8000:
-    default:
-        cpu_abort(env_cpu(env), "MMU type not supported\n");
-    }
-}
-
-static int is_seg_am_mapped(unsigned int am, bool eu, int mmu_idx)
-{
-    /*
-     * Interpret access control mode and mmu_idx.
-     *           AdE?     TLB?
-     *      AM  K S U E  K S U E
-     * UK    0  0 1 1 0  0 - - 0
-     * MK    1  0 1 1 0  1 - - !eu
-     * MSK   2  0 0 1 0  1 1 - !eu
-     * MUSK  3  0 0 0 0  1 1 1 !eu
-     * MUSUK 4  0 0 0 0  0 1 1 0
-     * USK   5  0 0 1 0  0 0 - 0
-     * -     6  - - - -  - - - -
-     * UUSK  7  0 0 0 0  0 0 0 0
-     */
-    int32_t adetlb_mask;
-
-    switch (mmu_idx) {
-    case 3: /* ERL */
-        /* If EU is set, always unmapped */
-        if (eu) {
-            return 0;
-        }
-        /* fall through */
-    case MIPS_HFLAG_KM:
-        /* Never AdE, TLB mapped if AM={1,2,3} */
-        adetlb_mask = 0x70000000;
-        goto check_tlb;
-
-    case MIPS_HFLAG_SM:
-        /* AdE if AM={0,1}, TLB mapped if AM={2,3,4} */
-        adetlb_mask = 0xc0380000;
-        goto check_ade;
-
-    case MIPS_HFLAG_UM:
-        /* AdE if AM={0,1,2,5}, TLB mapped if AM={3,4} */
-        adetlb_mask = 0xe4180000;
-        /* fall through */
-    check_ade:
-        /* does this AM cause AdE in current execution mode */
-        if ((adetlb_mask << am) < 0) {
-            return TLBRET_BADADDR;
-        }
-        adetlb_mask <<= 8;
-        /* fall through */
-    check_tlb:
-        /* is this AM mapped in current execution mode */
-        return ((adetlb_mask << am) < 0);
-    default:
-        assert(0);
-        return TLBRET_BADADDR;
-    };
-}
-
-static int get_seg_physical_address(CPUMIPSState *env, hwaddr *physical,
-                                    int *prot, target_ulong real_address,
-                                    MMUAccessType access_type, int mmu_idx,
-                                    unsigned int am, bool eu,
-                                    target_ulong segmask,
-                                    hwaddr physical_base)
-{
-    int mapped = is_seg_am_mapped(am, eu, mmu_idx);
-
-    if (mapped < 0) {
-        /* is_seg_am_mapped can report TLBRET_BADADDR */
-        return mapped;
-    } else if (mapped) {
-        /* The segment is TLB mapped */
-        return env->tlb->map_address(env, physical, prot, real_address,
-                                     access_type);
-    } else {
-        /* The segment is unmapped */
-        *physical = physical_base | (real_address & segmask);
-        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-        return TLBRET_MATCH;
-    }
-}
-
-static int get_segctl_physical_address(CPUMIPSState *env, hwaddr *physical,
-                                       int *prot, target_ulong real_address,
-                                       MMUAccessType access_type, int mmu_idx,
-                                       uint16_t segctl, target_ulong segmask)
-{
-    unsigned int am = (segctl & CP0SC_AM_MASK) >> CP0SC_AM;
-    bool eu = (segctl >> CP0SC_EU) & 1;
-    hwaddr pa = ((hwaddr)segctl & CP0SC_PA_MASK) << 20;
-
-    return get_seg_physical_address(env, physical, prot, real_address,
-                                    access_type, mmu_idx, am, eu, segmask,
-                                    pa & ~(hwaddr)segmask);
-}
-
-static int get_physical_address(CPUMIPSState *env, hwaddr *physical,
-                                int *prot, target_ulong real_address,
-                                MMUAccessType access_type, int mmu_idx)
-{
-    /* User mode can only access useg/xuseg */
-#if defined(TARGET_MIPS64)
-    int user_mode = mmu_idx == MIPS_HFLAG_UM;
-    int supervisor_mode = mmu_idx == MIPS_HFLAG_SM;
-    int kernel_mode = !user_mode && !supervisor_mode;
-    int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
-    int SX = (env->CP0_Status & (1 << CP0St_SX)) != 0;
-    int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
-#endif
-    int ret = TLBRET_MATCH;
-    /* effective address (modified for KVM T&E kernel segments) */
-    target_ulong address = real_address;
-
-#define USEG_LIMIT      ((target_ulong)(int32_t)0x7FFFFFFFUL)
-#define KSEG0_BASE      ((target_ulong)(int32_t)0x80000000UL)
-#define KSEG1_BASE      ((target_ulong)(int32_t)0xA0000000UL)
-#define KSEG2_BASE      ((target_ulong)(int32_t)0xC0000000UL)
-#define KSEG3_BASE      ((target_ulong)(int32_t)0xE0000000UL)
-
-#define KVM_KSEG0_BASE  ((target_ulong)(int32_t)0x40000000UL)
-#define KVM_KSEG2_BASE  ((target_ulong)(int32_t)0x60000000UL)
-
-    if (mips_um_ksegs_enabled()) {
-        /* KVM T&E adds guest kernel segments in useg */
-        if (real_address >= KVM_KSEG0_BASE) {
-            if (real_address < KVM_KSEG2_BASE) {
-                /* kseg0 */
-                address += KSEG0_BASE - KVM_KSEG0_BASE;
-            } else if (real_address <= USEG_LIMIT) {
-                /* kseg2/3 */
-                address += KSEG2_BASE - KVM_KSEG2_BASE;
-            }
-        }
-    }
-
-    if (address <= USEG_LIMIT) {
-        /* useg */
-        uint16_t segctl;
-
-        if (address >= 0x40000000UL) {
-            segctl = env->CP0_SegCtl2;
-        } else {
-            segctl = env->CP0_SegCtl2 >> 16;
-        }
-        ret = get_segctl_physical_address(env, physical, prot,
-                                          real_address, access_type,
-                                          mmu_idx, segctl, 0x3FFFFFFF);
-#if defined(TARGET_MIPS64)
-    } else if (address < 0x4000000000000000ULL) {
-        /* xuseg */
-        if (UX && address <= (0x3FFFFFFFFFFFFFFFULL & env->SEGMask)) {
-            ret = env->tlb->map_address(env, physical, prot,
-                                        real_address, access_type);
-        } else {
-            ret = TLBRET_BADADDR;
-        }
-    } else if (address < 0x8000000000000000ULL) {
-        /* xsseg */
-        if ((supervisor_mode || kernel_mode) &&
-            SX && address <= (0x7FFFFFFFFFFFFFFFULL & env->SEGMask)) {
-            ret = env->tlb->map_address(env, physical, prot,
-                                        real_address, access_type);
-        } else {
-            ret = TLBRET_BADADDR;
-        }
-    } else if (address < 0xC000000000000000ULL) {
-        /* xkphys */
-        if ((address & 0x07FFFFFFFFFFFFFFULL) <= env->PAMask) {
-            /* KX/SX/UX bit to check for each xkphys EVA access mode */
-            static const uint8_t am_ksux[8] = {
-                [CP0SC_AM_UK]    = (1u << CP0St_KX),
-                [CP0SC_AM_MK]    = (1u << CP0St_KX),
-                [CP0SC_AM_MSK]   = (1u << CP0St_SX),
-                [CP0SC_AM_MUSK]  = (1u << CP0St_UX),
-                [CP0SC_AM_MUSUK] = (1u << CP0St_UX),
-                [CP0SC_AM_USK]   = (1u << CP0St_SX),
-                [6]              = (1u << CP0St_KX),
-                [CP0SC_AM_UUSK]  = (1u << CP0St_UX),
-            };
-            unsigned int am = CP0SC_AM_UK;
-            unsigned int xr = (env->CP0_SegCtl2 & CP0SC2_XR_MASK) >> CP0SC2_XR;
-
-            if (xr & (1 << ((address >> 59) & 0x7))) {
-                am = (env->CP0_SegCtl1 & CP0SC1_XAM_MASK) >> CP0SC1_XAM;
-            }
-            /* Does CP0_Status.KX/SX/UX permit the access mode (am) */
-            if (env->CP0_Status & am_ksux[am]) {
-                ret = get_seg_physical_address(env, physical, prot,
-                                               real_address, access_type,
-                                               mmu_idx, am, false, env->PAMask,
-                                               0);
-            } else {
-                ret = TLBRET_BADADDR;
-            }
-        } else {
-            ret = TLBRET_BADADDR;
-        }
-    } else if (address < 0xFFFFFFFF80000000ULL) {
-        /* xkseg */
-        if (kernel_mode && KX &&
-            address <= (0xFFFFFFFF7FFFFFFFULL & env->SEGMask)) {
-            ret = env->tlb->map_address(env, physical, prot,
-                                        real_address, access_type);
-        } else {
-            ret = TLBRET_BADADDR;
-        }
-#endif
-    /*
-     * XXXAR: I am not entirely sure deleting the previous code using  is the correct way to make commit
-     * a78cda6e0212ccb16711d0dd30c2bb3480b36415 (40 bits for CHERI) to work with
-     * the new get_segctl_physical_address added in commit
-     * 480e79aedd322fcfac17052caff21626ea7c78e2. As far as I can see from the
-     * BERI hardware reference we should still be using 0x1FFFFFFF as the mask
-     * for kseg0 and kseg1
-     */
-    } else if (address < KSEG1_BASE) {
-        /* kseg0 */
-        ret = get_segctl_physical_address(env, physical, prot, real_address,
-                                          access_type, mmu_idx,
-                                          env->CP0_SegCtl1 >> 16, 0x1FFFFFFF);
-    } else if (address < KSEG2_BASE) {
-        /* kseg1 */
-        ret = get_segctl_physical_address(env, physical, prot, real_address,
-                                          access_type, mmu_idx,
-                                          env->CP0_SegCtl1, 0x1FFFFFFF);
-    } else if (address < KSEG3_BASE) {
-        /* sseg (kseg2) */
-        ret = get_segctl_physical_address(env, physical, prot, real_address,
-                                          access_type, mmu_idx,
-                                          env->CP0_SegCtl0 >> 16, 0x1FFFFFFF);
-    } else {
-        /*
-         * kseg3
-         * XXX: debug segment is not emulated
-         */
-        ret = get_segctl_physical_address(env, physical, prot, real_address,
-                                          access_type, mmu_idx,
-                                          env->CP0_SegCtl0, 0x1FFFFFFF);
-    }
-    return ret;
-}
-
-void cpu_mips_tlb_flush(CPUMIPSState *env)
-{
-    /* Flush qemu's TLB and discard all shadowed entries.  */
-    tlb_flush(env_cpu(env));
-    env->tlb->tlb_in_use = env->tlb->nb_tlb;
-}
-
-#endif /* !CONFIG_USER_ONLY */
-
-#ifdef TARGET_CHERI
-static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
-                                MMUAccessType access_type, int tlb_error, int reg)
-#else
-static void raise_mmu_exception(CPUMIPSState *env, target_ulong address,
-                                MMUAccessType access_type, int tlb_error)
-#endif
-{
-    CPUState *cs = env_cpu(env);
-    MipsExcp exception = 0;
-    int error_code = 0;
-
-    if (access_type == MMU_INST_FETCH) {
-        error_code |= EXCP_INST_NOTAVAIL;
-    }
-    switch (tlb_error) {
-    default:
-    case TLBRET_BADADDR:
-        /* Reference to kernel address from user mode or supervisor mode */
-        /* Reference to supervisor address from user mode */
-        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
-            exception = EXCP_AdES;
-        } else {
-            exception = EXCP_AdEL;
-        }
-        break;
-    case TLBRET_NOMATCH:
-        /* No TLB match for a mapped address */
-        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
-            exception = EXCP_TLBS;
-        } else {
-            exception = EXCP_TLBL;
-        }
-        error_code |= EXCP_TLB_NOMATCH;
-        break;
-    case TLBRET_INVALID:
-        /* TLB match with no valid bit */
-        if (access_type == MMU_DATA_STORE || access_type == MMU_DATA_CAP_STORE) {
-            exception = EXCP_TLBS;
-        } else {
-            exception = EXCP_TLBL;
-        }
-        break;
-    case TLBRET_DIRTY:
-        /* TLB match but 'D' bit is cleared */
-        exception = EXCP_LTLBL;
-        break;
-#ifdef TARGET_CHERI
-    case TLBRET_S:
-        /* TLB capability store bit was set, blocking capability store. */
-        cpu_mips_store_capcause(env, reg, CapEx_TLBNoStoreCap);
-        exception = EXCP_C2E;
-        break;
-#else
-    case TLBRET_XI:
-        /* Execute-Inhibit Exception */
-        if (env->CP0_PageGrain & (1 << CP0PG_IEC)) {
-            exception = EXCP_TLBXI;
-        } else {
-            exception = EXCP_TLBL;
-        }
-        break;
-    case TLBRET_RI:
-        /* Read-Inhibit Exception */
-        if (env->CP0_PageGrain & (1 << CP0PG_IEC)) {
-            exception = EXCP_TLBRI;
-        } else {
-            exception = EXCP_TLBL;
-        }
-        break;
-#endif /* TARGET_CHERI */
-    }
-    /* Raise exception */
-    if (!(env->hflags & MIPS_HFLAG_DM)) {
-        env->CP0_BadVAddr = address;
-    }
-    env->CP0_Context = (env->CP0_Context & ~0x007fffff) |
-                       ((address >> 9) & 0x007ffff0);
-    env->CP0_EntryHi = (env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask) |
-                       (env->CP0_EntryHi & CP0EnHi_CLG_MASK) |
-                       (env->CP0_EntryHi & (1 << CP0EnHi_EHINV)) |
-                       (address & (TARGET_PAGE_MASK << 1));
-#if defined(TARGET_MIPS64)
-    env->CP0_EntryHi &= env->SEGMask | CP0EnHi_CLG_MASK;
-    env->CP0_XContext =
-        (env->CP0_XContext & ((~0ULL) << (env->SEGBITS - 7))) | /* PTEBase */
-        (extract64(address, 62, 2) << (env->SEGBITS - 9)) |     /* R       */
-        (extract64(address, 13, env->SEGBITS - 13) << 4);       /* BadVPN2 */
-#endif
-    cs->exception_index = exception;
-    env->error_code = error_code;
-#ifdef TARGET_CHERI
-    if (access_type == MMU_INST_FETCH)
-        env->statcounters_itlb_miss++;
-    else
-        env->statcounters_dtlb_miss++;
-#endif
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-hwaddr mips_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-    hwaddr phys_addr;
-    int prot;
-
-    if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) != 0) {
-        return -1;
-    }
-    return phys_addr;
-}
-
-#if !defined(TARGET_MIPS64)
-
-/*
- * Perform hardware page table walk
- *
- * Memory accesses are performed using the KERNEL privilege level.
- * Synchronous exceptions detected on memory accesses cause a silent exit
- * from page table walking, resulting in a TLB or XTLB Refill exception.
- *
- * Implementations are not required to support page table walk memory
- * accesses from mapped memory regions. When an unsupported access is
- * attempted, a silent exit is taken, resulting in a TLB or XTLB Refill
- * exception.
- *
- * Note that if an exception is caused by AddressTranslation or LoadMemory
- * functions, the exception is not taken, a silent exit is taken,
- * resulting in a TLB or XTLB Refill exception.
- */
-
-static bool get_pte(CPUMIPSState *env, uint64_t vaddr, int entry_size,
-        uint64_t *pte)
-{
-    if ((vaddr & ((entry_size >> 3) - 1)) != 0) {
-        return false;
-    }
-    if (entry_size == 64) {
-        *pte = cpu_ldq_code(env, vaddr);
-    } else {
-        *pte = cpu_ldl_code(env, vaddr);
-    }
-    return true;
-}
-
-static uint64_t get_tlb_entry_layout(CPUMIPSState *env, uint64_t entry,
-        int entry_size, int ptei)
-{
-    uint64_t result = entry;
-    uint64_t rixi;
-    if (ptei > entry_size) {
-        ptei -= 32;
-    }
-    result >>= (ptei - 2);
-    rixi = result & 3;
-    result >>= 2;
-    result |= rixi << CP0EnLo_XI;
-    return result;
-}
-
-static int walk_directory(CPUMIPSState *env, uint64_t *vaddr,
-        int directory_index, bool *huge_page, bool *hgpg_directory_hit,
-        uint64_t *pw_entrylo0, uint64_t *pw_entrylo1)
-{
-    int dph = (env->CP0_PWCtl >> CP0PC_DPH) & 0x1;
-    int psn = (env->CP0_PWCtl >> CP0PC_PSN) & 0x3F;
-    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
-    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
-    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
-    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
-    int directory_shift = (ptew > 1) ? -1 :
-            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
-    int leaf_shift = (ptew > 1) ? -1 :
-            (ptew == 1) ? native_shift + 1 : native_shift;
-    uint32_t direntry_size = 1 << (directory_shift + 3);
-    uint32_t leafentry_size = 1 << (leaf_shift + 3);
-    uint64_t entry;
-    uint64_t paddr;
-    int prot;
-    uint64_t lsb = 0;
-    uint64_t w = 0;
-
-    if (get_physical_address(env, &paddr, &prot, *vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
-        /* wrong base address */
-        return 0;
-    }
-    if (!get_pte(env, *vaddr, direntry_size, &entry)) {
-        return 0;
-    }
-
-    if ((entry & (1 << psn)) && hugepg) {
-        *huge_page = true;
-        *hgpg_directory_hit = true;
-        entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
-        w = directory_index - 1;
-        if (directory_index & 0x1) {
-            /* Generate adjacent page from same PTE for odd TLB page */
-            lsb = (1 << w) >> 6;
-            *pw_entrylo0 = entry & ~lsb; /* even page */
-            *pw_entrylo1 = entry | lsb; /* odd page */
-        } else if (dph) {
-            int oddpagebit = 1 << leaf_shift;
-            uint64_t vaddr2 = *vaddr ^ oddpagebit;
-            if (*vaddr & oddpagebit) {
-                *pw_entrylo1 = entry;
-            } else {
-                *pw_entrylo0 = entry;
-            }
-            if (get_physical_address(env, &paddr, &prot, vaddr2, MMU_DATA_LOAD,
-                                     cpu_mmu_index(env, false)) !=
-                                     TLBRET_MATCH) {
-                return 0;
-            }
-            if (!get_pte(env, vaddr2, leafentry_size, &entry)) {
-                return 0;
-            }
-            entry = get_tlb_entry_layout(env, entry, leafentry_size, pf_ptew);
-            if (*vaddr & oddpagebit) {
-                *pw_entrylo0 = entry;
-            } else {
-                *pw_entrylo1 = entry;
-            }
-        } else {
-            return 0;
-        }
-        return 1;
-    } else {
-        *vaddr = entry;
-        return 2;
-    }
-}
-
-static bool page_table_walk_refill(CPUMIPSState *env, vaddr address,
-                                   int mmu_idx)
-{
-    int gdw = (env->CP0_PWSize >> CP0PS_GDW) & 0x3F;
-    int udw = (env->CP0_PWSize >> CP0PS_UDW) & 0x3F;
-    int mdw = (env->CP0_PWSize >> CP0PS_MDW) & 0x3F;
-    int ptw = (env->CP0_PWSize >> CP0PS_PTW) & 0x3F;
-    int ptew = (env->CP0_PWSize >> CP0PS_PTEW) & 0x3F;
-
-    /* Initial values */
-    bool huge_page = false;
-    bool hgpg_bdhit = false;
-    bool hgpg_gdhit = false;
-    bool hgpg_udhit = false;
-    bool hgpg_mdhit = false;
-
-    int32_t pw_pagemask = 0;
-    target_ulong pw_entryhi = 0;
-    uint64_t pw_entrylo0 = 0;
-    uint64_t pw_entrylo1 = 0;
-
-    /* Native pointer size */
-    /*For the 32-bit architectures, this bit is fixed to 0.*/
-    int native_shift = (((env->CP0_PWSize >> CP0PS_PS) & 1) == 0) ? 2 : 3;
-
-    /* Indices from PWField */
-    int pf_gdw = (env->CP0_PWField >> CP0PF_GDW) & 0x3F;
-    int pf_udw = (env->CP0_PWField >> CP0PF_UDW) & 0x3F;
-    int pf_mdw = (env->CP0_PWField >> CP0PF_MDW) & 0x3F;
-    int pf_ptw = (env->CP0_PWField >> CP0PF_PTW) & 0x3F;
-    int pf_ptew = (env->CP0_PWField >> CP0PF_PTEW) & 0x3F;
-
-    /* Indices computed from faulting address */
-    int gindex = (address >> pf_gdw) & ((1 << gdw) - 1);
-    int uindex = (address >> pf_udw) & ((1 << udw) - 1);
-    int mindex = (address >> pf_mdw) & ((1 << mdw) - 1);
-    int ptindex = (address >> pf_ptw) & ((1 << ptw) - 1);
-
-    /* Other HTW configs */
-    int hugepg = (env->CP0_PWCtl >> CP0PC_HUGEPG) & 0x1;
-
-    /* HTW Shift values (depend on entry size) */
-    int directory_shift = (ptew > 1) ? -1 :
-            (hugepg && (ptew == 1)) ? native_shift + 1 : native_shift;
-    int leaf_shift = (ptew > 1) ? -1 :
-            (ptew == 1) ? native_shift + 1 : native_shift;
-
-    /* Offsets into tables */
-    int goffset = gindex << directory_shift;
-    int uoffset = uindex << directory_shift;
-    int moffset = mindex << directory_shift;
-    int ptoffset0 = (ptindex >> 1) << (leaf_shift + 1);
-    int ptoffset1 = ptoffset0 | (1 << (leaf_shift));
-
-    uint32_t leafentry_size = 1 << (leaf_shift + 3);
-
-    /* Starting address - Page Table Base */
-    uint64_t vaddr = env->CP0_PWBase;
-
-    uint64_t dir_entry;
-    uint64_t paddr;
-    int prot;
-    int m;
-
-    if (!(env->CP0_Config3 & (1 << CP0C3_PW))) {
-        /* walker is unimplemented */
-        return false;
-    }
-    if (!(env->CP0_PWCtl & (1 << CP0PC_PWEN))) {
-        /* walker is disabled */
-        return false;
-    }
-    if (!(gdw > 0 || udw > 0 || mdw > 0)) {
-        /* no structure to walk */
-        return false;
-    }
-    if ((directory_shift == -1) || (leaf_shift == -1)) {
-        return false;
-    }
-
-    /* Global Directory */
-    if (gdw > 0) {
-        vaddr |= goffset;
-        switch (walk_directory(env, &vaddr, pf_gdw, &huge_page, &hgpg_gdhit,
-                               &pw_entrylo0, &pw_entrylo1))
-        {
-        case 0:
-            return false;
-        case 1:
-            goto refill;
-        case 2:
-        default:
-            break;
-        }
-    }
-
-    /* Upper directory */
-    if (udw > 0) {
-        vaddr |= uoffset;
-        switch (walk_directory(env, &vaddr, pf_udw, &huge_page, &hgpg_udhit,
-                               &pw_entrylo0, &pw_entrylo1))
-        {
-        case 0:
-            return false;
-        case 1:
-            goto refill;
-        case 2:
-        default:
-            break;
-        }
-    }
-
-    /* Middle directory */
-    if (mdw > 0) {
-        vaddr |= moffset;
-        switch (walk_directory(env, &vaddr, pf_mdw, &huge_page, &hgpg_mdhit,
-                               &pw_entrylo0, &pw_entrylo1))
-        {
-        case 0:
-            return false;
-        case 1:
-            goto refill;
-        case 2:
-        default:
-            break;
-        }
-    }
-
-    /* Leaf Level Page Table - First half of PTE pair */
-    vaddr |= ptoffset0;
-    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
-        return false;
-    }
-    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
-        return false;
-    }
-    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
-    pw_entrylo0 = dir_entry;
-
-    /* Leaf Level Page Table - Second half of PTE pair */
-    vaddr |= ptoffset1;
-    if (get_physical_address(env, &paddr, &prot, vaddr, MMU_DATA_LOAD,
-                             cpu_mmu_index(env, false)) !=
-                             TLBRET_MATCH) {
-        return false;
-    }
-    if (!get_pte(env, vaddr, leafentry_size, &dir_entry)) {
-        return false;
-    }
-    dir_entry = get_tlb_entry_layout(env, dir_entry, leafentry_size, pf_ptew);
-    pw_entrylo1 = dir_entry;
-
-refill:
-
-    m = (1 << pf_ptw) - 1;
-
-    if (huge_page) {
-        switch (hgpg_bdhit << 3 | hgpg_gdhit << 2 | hgpg_udhit << 1 |
-                hgpg_mdhit)
-        {
-        case 4:
-            m = (1 << pf_gdw) - 1;
-            if (pf_gdw & 1) {
-                m >>= 1;
-            }
-            break;
-        case 2:
-            m = (1 << pf_udw) - 1;
-            if (pf_udw & 1) {
-                m >>= 1;
-            }
-            break;
-        case 1:
-            m = (1 << pf_mdw) - 1;
-            if (pf_mdw & 1) {
-                m >>= 1;
-            }
-            break;
-        }
-    }
-    pw_pagemask = m >> TARGET_PAGE_BITS_MIN;
-    update_pagemask(env, pw_pagemask << CP0PM_MASK, &pw_pagemask);
-    pw_entryhi = (address & ~0x1fff) |
-                 (env->CP0_EntryHi & (0xFF | CP0EnHi_CLG_MASK));
-    {
-        target_ulong tmp_entryhi = env->CP0_EntryHi;
-        int32_t tmp_pagemask = env->CP0_PageMask;
-        uint64_t tmp_entrylo0 = env->CP0_EntryLo0;
-        uint64_t tmp_entrylo1 = env->CP0_EntryLo1;
-
-        env->CP0_EntryHi = pw_entryhi;
-        env->CP0_PageMask = pw_pagemask;
-        env->CP0_EntryLo0 = pw_entrylo0;
-        env->CP0_EntryLo1 = pw_entrylo1;
-
-        /*
-         * The hardware page walker inserts a page into the TLB in a manner
-         * identical to a TLBWR instruction as executed by the software refill
-         * handler.
-         */
-        r4k_helper_tlbwr(env);
-
-        env->CP0_EntryHi = tmp_entryhi;
-        env->CP0_PageMask = tmp_pagemask;
-        env->CP0_EntryLo0 = tmp_entrylo0;
-        env->CP0_EntryLo1 = tmp_entrylo1;
-    }
-    return true;
-}
-#endif
-#endif /* !CONFIG_USER_ONLY */
-
-bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-#if !defined(CONFIG_USER_ONLY)
-    hwaddr physical;
-    int prot;
-#endif
-    int ret = TLBRET_BADADDR;
-
-    /* data access */
-#if !defined(CONFIG_USER_ONLY)
-    MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
-#ifdef TARGET_CHERI
-    attrs.tag_setting = access_type == MMU_DATA_CAP_STORE;
-#endif
-    /* XXX: put correct access by using cpu_restore_state() correctly */
-    ret = get_physical_address(env, &physical, &prot, address,
-                               access_type, mmu_idx);
-    switch (ret) {
-    case TLBRET_MATCH:
-        qemu_log_mask(CPU_LOG_MMU,
-                      "%s address=%" VADDR_PRIx " physical " TARGET_FMT_plx
-                      " prot %d\n", __func__, address, physical, prot);
-        break;
-    default:
-        qemu_log_mask(CPU_LOG_MMU,
-                      "%s address=%" VADDR_PRIx " ret %d\n", __func__, address,
-                      ret);
-        break;
-    }
-    if (ret == TLBRET_MATCH) {
-        tlb_set_page_with_attrs(cs, address & TARGET_PAGE_MASK,
-                                physical & TARGET_PAGE_MASK, attrs, prot,
-                                mmu_idx, TARGET_PAGE_SIZE);
-        return true;
-    }
-#if !defined(TARGET_MIPS64)
-    if ((ret == TLBRET_NOMATCH) && (env->tlb->nb_tlb > 1)) {
-        /*
-         * Memory reads during hardware page table walking are performed
-         * as if they were kernel-mode load instructions.
-         */
-        int mode = (env->hflags & MIPS_HFLAG_KSU);
-        bool ret_walker;
-        env->hflags &= ~MIPS_HFLAG_KSU;
-        ret_walker = page_table_walk_refill(env, address, mmu_idx);
-        env->hflags |= mode;
-        if (ret_walker) {
-            ret = get_physical_address(env, &physical, &prot, address,
-                                       access_type, mmu_idx);
-            if (ret == TLBRET_MATCH) {
-                tlb_set_page_with_attrs(cs, address & TARGET_PAGE_MASK,
-                                        physical & TARGET_PAGE_MASK, attrs,
-                                        prot, mmu_idx, TARGET_PAGE_SIZE);
-                return true;
-            }
-        }
-    }
-#endif
-    if (probe) {
-        return false;
-    }
-#endif
-
-#ifdef TARGET_CHERI
-    /*
-     * NOTE: The register for the exception has possibly already been set.
-     * An extra reg argument might have been preferable, but this interferes
-     * less with the other targets, which don't pass a register number here.
-     */
-    raise_mmu_exception(env, address, access_type, ret, 0xff);
-#else
-    raise_mmu_exception(env, address, access_type, ret);
-#endif
-    do_raise_exception_err(env, cs->exception_index, env->error_code, retaddr);
-}
-
-#ifndef CONFIG_USER_ONLY
-hwaddr cpu_mips_translate_address(CPUMIPSState *env, target_ulong address,
-                                  MMUAccessType access_type)
-{
-    hwaddr physical;
-    int prot;
-    int ret = 0;
-
-    /* data access */
-    ret = get_physical_address(env, &physical, &prot, address, access_type,
-                               cpu_mmu_index(env, false));
-    if (ret != TLBRET_MATCH) {
-#ifdef TARGET_CHERI
-        raise_mmu_exception(env, address, access_type, ret, 0xff);
-#else
-        raise_mmu_exception(env, address, access_type, ret);
-#endif
-        return -1LL;
-    } else {
-        return physical;
-    }
-}
-
-static void set_hflags_for_handler(CPUMIPSState *env)
-{
-    /* Exception handlers are entered in 32-bit mode.  */
-    env->hflags &= ~(MIPS_HFLAG_M16);
-    /* ...except that microMIPS lets you choose.  */
-    if (env->insn_flags & ASE_MICROMIPS) {
-        env->hflags |= (!!(env->CP0_Config3 &
-                           (1 << CP0C3_ISA_ON_EXC))
-                        << MIPS_HFLAG_M16_SHIFT);
-    }
-}
-
-static inline void set_badinstr_registers(CPUMIPSState *env)
-{
-    if (env->insn_flags & ISA_NANOMIPS32) {
-        if (env->CP0_Config3 & (1 << CP0C3_BI)) {
-            uint32_t instr = (cpu_lduw_code(env, PC_ADDR(env))) << 16;
-            if ((instr & 0x10000000) == 0) {
-                instr |= cpu_lduw_code(env, PC_ADDR(env) + 2);
-            }
-            env->CP0_BadInstr = instr;
-
-            if ((instr & 0xFC000000) == 0x60000000) {
-                instr = cpu_lduw_code(env, PC_ADDR(env) + 4) << 16;
-                env->CP0_BadInstrX = instr;
-            }
-        }
-        return;
-    }
-
-    if (env->hflags & MIPS_HFLAG_M16) {
-        /* TODO: add BadInstr support for microMIPS */
-        return;
-    }
-    if (env->CP0_Config3 & (1 << CP0C3_BI)) {
-        env->CP0_BadInstr = cpu_ldl_code(env, PC_ADDR(env));
-    }
-    if ((env->CP0_Config3 & (1 << CP0C3_BP)) &&
-        (env->hflags & MIPS_HFLAG_BMASK)) {
-        env->CP0_BadInstrP = cpu_ldl_code(env, PC_ADDR(env) - 4);
-    }
-}
-
-#endif /* !CONFIG_USER_ONLY */
-
-static inline void mips_update_pc_for_exc_handler(CPUMIPSState *env,
-                                                  target_ulong new_pc)
-{
-#ifdef TARGET_CHERI
-    /* always set PCC from KCC even with EXL */
-    cheri_update_pcc_for_exc_handler(&env->active_tc.PCC,
-                                     &env->active_tc.CHWR.KCC, new_pc);
-#else
-    mips_update_pc(env, new_pc, /*can_be_unrep=*/true);
-#endif
-}
-
-void mips_cpu_do_interrupt(CPUState *cs)
-{
-#if !defined(CONFIG_USER_ONLY)
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-    tcg_debug_assert(pc_is_current(env));
-    bool update_badinstr = 0;
-    target_ulong offset;
-    int cause = -1;
-
-    /* Log interrupt extra debug info */
-    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT) &&
-        cs->exception_index != EXCP_EXT_INTERRUPT) {
-        qemu_log_instr_or_mask_msg(
-            env, CPU_LOG_INT,
-            "%s enter: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx
-            " %s exception, (hflags & MIPS_HFLAG_BMASK)=%x, hflags=%x\n",
-            __func__, PC_ADDR(env), get_CP0_EPC(env),
-            mips_exception_name(cs->exception_index),
-            env->hflags & MIPS_HFLAG_BMASK, env->hflags);
-#ifdef TARGET_CHERI
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_INT,
-            "\tPCC=" PRINT_CAP_FMTSTR "\n\tKCC= " PRINT_CAP_FMTSTR
-            "\n\tEPCC=" PRINT_CAP_FMTSTR "\n",
-            PRINT_CAP_ARGS(cheri_get_current_pcc(env)),
-            PRINT_CAP_ARGS(&env->active_tc.CHWR.KCC),
-            PRINT_CAP_ARGS(&env->active_tc.CHWR.EPCC));
-#endif
-    }
-
-    if (cs->exception_index == EXCP_EXT_INTERRUPT &&
-        (env->hflags & MIPS_HFLAG_DM)) {
-        cs->exception_index = EXCP_DINT;
-    }
-    offset = 0x180;
-    switch (cs->exception_index) {
-    case EXCP_DSS:
-        env->CP0_Debug |= 1 << CP0DB_DSS;
-        /*
-         * Debug single step cannot be raised inside a delay slot and
-         * resume will always occur on the next instruction
-         * (but we assume the pc has always been updated during
-         * code translation).
-         */
-        env->CP0_DEPC = PC_ADDR(env) | !!(env->hflags & MIPS_HFLAG_M16);
-        goto enter_debug_mode;
-    case EXCP_DINT:
-        env->CP0_Debug |= 1 << CP0DB_DINT;
-        goto set_DEPC;
-    case EXCP_DIB:
-        env->CP0_Debug |= 1 << CP0DB_DIB;
-        goto set_DEPC;
-    case EXCP_DBp:
-        env->CP0_Debug |= 1 << CP0DB_DBp;
-        /* Setup DExcCode - SDBBP instruction */
-        env->CP0_Debug = (env->CP0_Debug & ~(0x1fULL << CP0DB_DEC)) |
-                         (9 << CP0DB_DEC);
-        goto set_DEPC;
-    case EXCP_DDBS:
-        env->CP0_Debug |= 1 << CP0DB_DDBS;
-        goto set_DEPC;
-    case EXCP_DDBL:
-        env->CP0_Debug |= 1 << CP0DB_DDBL;
-    set_DEPC:
-        env->CP0_DEPC = exception_resume_pc(env);
-        env->hflags &= ~MIPS_HFLAG_BMASK;
- enter_debug_mode:
-        if (env->insn_flags & ISA_MIPS3) {
-            env->hflags |= MIPS_HFLAG_64;
-            if (!(env->insn_flags & ISA_MIPS_R6) ||
-                env->CP0_Status & (1 << CP0St_KX)) {
-                env->hflags &= ~MIPS_HFLAG_AWRAP;
-            }
-        }
-        env->hflags |= MIPS_HFLAG_DM | MIPS_HFLAG_CP0;
-        env->hflags &= ~(MIPS_HFLAG_KSU);
-        /* EJTAG probe trap enable is not implemented... */
-        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
-            env->CP0_Cause &= ~(1U << CP0Ca_BD);
-        }
-        mips_update_pc_for_exc_handler(env, env->exception_base + 0x480);
-        set_hflags_for_handler(env);
-        break;
-    case EXCP_RESET:
-        cpu_reset(CPU(cpu));
-        break;
-    case EXCP_SRESET:
-        env->CP0_Status |= (1 << CP0St_SR);
-        memset(env->CP0_WatchLo, 0, sizeof(env->CP0_WatchLo));
-        goto set_error_EPC;
-    case EXCP_NMI:
-        env->CP0_Status |= (1 << CP0St_NMI);
- set_error_EPC:
-#ifdef TARGET_CHERI
-        env->active_tc.CHWR.ErrorEPCC = *cheri_get_current_pcc(env);
-        // Note: set_CP0_ErrorEPC() handles the special cases of sealed/unrep EPCC
-#endif /* TARGET_CHERI */
-        set_CP0_ErrorEPC(env, exception_resume_pc(env));
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-        env->CP0_Status |= (1 << CP0St_ERL) | (1 << CP0St_BEV);
-        if (env->insn_flags & ISA_MIPS3) {
-            env->hflags |= MIPS_HFLAG_64;
-            if (!(env->insn_flags & ISA_MIPS_R6) ||
-                env->CP0_Status & (1 << CP0St_KX)) {
-                env->hflags &= ~MIPS_HFLAG_AWRAP;
-            }
-        }
-        env->hflags |= MIPS_HFLAG_CP0;
-        env->hflags &= ~(MIPS_HFLAG_KSU);
-        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
-            env->CP0_Cause &= ~(1U << CP0Ca_BD);
-        }
-        mips_update_pc_for_exc_handler(env, env->exception_base);
-        set_hflags_for_handler(env);
-        break;
-    case EXCP_EXT_INTERRUPT:
-        cause = 0;
-        if (env->CP0_Cause & (1 << CP0Ca_IV)) {
-            uint32_t spacing = (env->CP0_IntCtl >> CP0IntCtl_VS) & 0x1f;
-
-            if ((env->CP0_Status & (1 << CP0St_BEV)) || spacing == 0) {
-                offset = 0x200;
-            } else {
-                uint32_t vector = 0;
-                uint32_t pending = (env->CP0_Cause & CP0Ca_IP_mask) >> CP0Ca_IP;
-
-                if (env->CP0_Config3 & (1 << CP0C3_VEIC)) {
-                    /*
-                     * For VEIC mode, the external interrupt controller feeds
-                     * the vector through the CP0Cause IP lines.
-                     */
-                    vector = pending;
-                } else {
-                    /*
-                     * Vectored Interrupts
-                     * Mask with Status.IM7-IM0 to get enabled interrupts.
-                     */
-                    pending &= (env->CP0_Status >> CP0St_IM) & 0xff;
-                    /* Find the highest-priority interrupt. */
-                    while (pending >>= 1) {
-                        vector++;
-                    }
-                }
-                offset = 0x200 + (vector * (spacing << 5));
-            }
-        }
-        goto set_EPC;
-    case EXCP_LTLBL:
-        cause = 1;
-        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
-        goto set_EPC;
-    case EXCP_TLBL:
-        cause = 2;
-        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
-        if ((env->error_code & EXCP_TLB_NOMATCH) &&
-            !(env->CP0_Status & (1 << CP0St_EXL))) {
-#if defined(TARGET_MIPS64)
-            int R = env->CP0_BadVAddr >> 62;
-            int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
-            int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
-
-            if ((R != 0 || UX) && (R != 3 || KX) &&
-                (!(env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)))) {
-                offset = 0x080;
-            } else {
-#endif
-                offset = 0x000;
-#if defined(TARGET_MIPS64)
-            }
-#endif
-        }
-        goto set_EPC;
-    case EXCP_TLBS:
-        cause = 3;
-        update_badinstr = 1;
-        if ((env->error_code & EXCP_TLB_NOMATCH) &&
-            !(env->CP0_Status & (1 << CP0St_EXL))) {
-#if defined(TARGET_MIPS64)
-            int R = env->CP0_BadVAddr >> 62;
-            int UX = (env->CP0_Status & (1 << CP0St_UX)) != 0;
-            int KX = (env->CP0_Status & (1 << CP0St_KX)) != 0;
-
-            if ((R != 0 || UX) && (R != 3 || KX) &&
-                (!(env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)))) {
-                offset = 0x080;
-            } else {
-#endif
-                offset = 0x000;
-#if defined(TARGET_MIPS64)
-            }
-#endif
-        }
-        goto set_EPC;
-    case EXCP_AdEL:
-        cause = 4;
-        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
-        goto set_EPC;
-    case EXCP_AdES:
-        cause = 5;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_IBE:
-        update_badinstr = 0;
-        cause = 6;
-        goto set_EPC;
-    case EXCP_DBE:
-        cause = 7;
-        goto set_EPC;
-    case EXCP_SYSCALL:
-        cause = 8;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_BREAK:
-        cause = 9;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_RI:
-        cause = 10;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_CpU:
-        cause = 11;
-        update_badinstr = 1;
-        env->CP0_Cause = (env->CP0_Cause & ~(0x3 << CP0Ca_CE)) |
-                         (env->error_code << CP0Ca_CE);
-        goto set_EPC;
-    case EXCP_OVERFLOW:
-        cause = 12;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_TRAP:
-        cause = 13;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_MSAFPE:
-        cause = 14;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_FPE:
-        cause = 15;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_C2E:
-        cause = 18;
-        update_badinstr = !(env->error_code & EXCP_INST_NOTAVAIL);
-#ifdef TARGET_CHERI
-        if ((env->CP2_CapCause >> 8) == CapEx_CallTrap ||
-                (env->CP2_CapCause >> 8) == CapEx_ReturnTrap)
-            offset = 0x280;
-#endif /* TARGET_CHERI */
-        goto set_EPC;
-    case EXCP_TLBRI:
-        cause = 19;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_TLBXI:
-        cause = 20;
-        goto set_EPC;
-    case EXCP_MSADIS:
-        cause = 21;
-        update_badinstr = 1;
-        goto set_EPC;
-    case EXCP_MDMX:
-        cause = 22;
-        goto set_EPC;
-    case EXCP_DWATCH:
-        cause = 23;
-        /* XXX: TODO: manage deferred watch exceptions */
-        goto set_EPC;
-    case EXCP_MCHECK:
-        cause = 24;
-        goto set_EPC;
-    case EXCP_THREAD:
-        cause = 25;
-        goto set_EPC;
-    case EXCP_DSPDIS:
-        cause = 26;
-        goto set_EPC;
-    case EXCP_CACHE:
-        cause = 30;
-        offset = 0x100;
- set_EPC:
-        if (!(env->CP0_Status & (1 << CP0St_EXL))) {
-#ifdef TARGET_CHERI
-            env->active_tc.CHWR.EPCC = *cheri_get_current_pcc(env);
-            // Note: set_CP0_EPC() handles the special cases of sealed/unrep EPCC
-#endif /* TARGET_CHERI */
-            set_CP0_EPC(env, exception_resume_pc(env));
-            if (update_badinstr) {
-                set_badinstr_registers(env);
-            }
-            if (env->hflags & MIPS_HFLAG_BMASK) {
-                env->CP0_Cause |= (1U << CP0Ca_BD);
-            } else {
-                env->CP0_Cause &= ~(1U << CP0Ca_BD);
-            }
-            env->CP0_Status |= (1 << CP0St_EXL);
-            if (env->insn_flags & ISA_MIPS3) {
-                env->hflags |= MIPS_HFLAG_64;
-                if (!(env->insn_flags & ISA_MIPS_R6) ||
-                    env->CP0_Status & (1 << CP0St_KX)) {
-                    env->hflags &= ~MIPS_HFLAG_AWRAP;
-                }
-            }
-            env->hflags |= MIPS_HFLAG_CP0;
-            env->hflags &= ~(MIPS_HFLAG_KSU);
-        }
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-        target_ulong new_pc;
-        if (env->CP0_Status & (1 << CP0St_BEV)) {
-            new_pc = env->exception_base + 0x200;
-        } else if (cause == 30 && !(env->CP0_Config3 & (1 << CP0C3_SC) &&
-                                    env->CP0_Config5 & (1 << CP0C5_CV))) {
-            /* Force KSeg1 for cache errors */
-            new_pc = KSEG1_BASE | (env->CP0_EBase & 0x1FFFF000);
-        } else {
-            new_pc = env->CP0_EBase & ~0xfff;
-        }
-        new_pc += offset;
-        mips_update_pc_for_exc_handler(env, new_pc);
-        set_hflags_for_handler(env);
-        env->CP0_Cause = (env->CP0_Cause & ~(0x1f << CP0Ca_EC)) |
-                         (cause << CP0Ca_EC);
-        break;
-    default:
-        abort();
-    }
-
-#ifdef TARGET_CHERI
-    // We may have to change the CP0 access flag since CHERI may have previously
-    // disabled it by installing a $pcc without the Access_Sys_Regs flag
-    update_cp0_access_for_pc(env);
-    assert(can_access_cp0(env) && "Installing $pcc without ASR in exception?");
-    assert(!cheri_get_current_pcc(env)->cr_tag ||
-           cap_is_representable(cheri_get_current_pcc(env)));
-#endif /* TARGET_CHERI */
-
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (qemu_log_instr_enabled(env)) {
-        uint32_t log_cause;
-        /* Log generic exception/interrupt info */
-        if (cs->exception_index == EXCP_EXT_INTERRUPT) {
-            log_cause = (env->CP0_Cause & CP0Ca_IP_mask) >> CP0Ca_IP;
-            qemu_log_instr_interrupt(env, log_cause, PC_ADDR(env));
-        } else {
-            /*
-             * Note on CHERI-MIPS the logged cause is the concatenation
-             * {cap_cause | cause}.
-             */
-#ifdef TARGET_CHERI
-            log_cause = cause | (((uint32_t)env->CP2_CapCause) << 8);
-#else
-            log_cause = cause;
-#endif
-            qemu_log_instr_exception(env, log_cause, PC_ADDR(env),
-                                     env->CP0_BadVAddr);
-        }
-#ifdef TARGET_CHERI
-        /* Log extra changed register information */
-        qemu_log_instr_cap(env, "PCC", cheri_get_current_pcc(env));
-        qemu_log_instr_cap(env, "EPCC", &env->active_tc.CHWR.EPCC);
-        qemu_log_instr_cap(env, "ErrorEPCC", &env->active_tc.CHWR.ErrorEPCC);
-#endif
-        qemu_log_instr_reg(env, "ErrorEPC", get_CP0_ErrorEPC(env));
-    }
-#endif /* CONFIG_TCG_LOG_INSTR */
-
-    // TODO(am2419): log these as extra changed registers?
-    if (qemu_log_instr_or_mask_enabled(env, CPU_LOG_INT) &&
-        cs->exception_index != EXCP_EXT_INTERRUPT) {
-        qemu_log_instr_or_mask_msg(env, CPU_LOG_INT,
-            "%s: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx " cause %d\n"
-            "    S %08x C %08x A " TARGET_FMT_lx " D " TARGET_FMT_lx "\n",
-            __func__, PC_ADDR(env), get_CP0_EPC(env), cause,
-            env->CP0_Status, env->CP0_Cause, env->CP0_BadVAddr,
-            env->CP0_DEPC);
-    }
-#endif /* ! CONFIG_USER_ONLY */
-    cs->exception_index = EXCP_NONE;
-
-#ifdef CONFIG_TCG_LOG_INSTR
-    mips_log_instr_mode_changed(env, cpu_get_recent_pc(env));
-#endif
-}
-
-#if !defined(CONFIG_USER_ONLY)
-bool r4k_lookup_tlb(CPUMIPSState *env, int *matching, bool use_extra)
-{
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    r4k_tlb_t *tlb;
-    target_ulong mask;
-    target_ulong tag;
-    target_ulong VPN;
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    uint32_t tlb_mmid;
-    int i;
-    int limit = (use_extra) ? env->tlb->tlb_in_use : env->tlb->nb_tlb;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-    for (i = 0; i < limit; i++) {
-        tlb = &env->tlb->mmu.r4k.tlb[i];
-        /* 1k pages are not supported. */
-        mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-        tag = env->CP0_EntryHi & ~mask;
-        VPN = tlb->VPN & ~mask;
-#if defined(TARGET_MIPS64)
-        tag &= env->SEGMask;
-#endif
-        tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-        /* Check ASID/MMID, virtual page number & size */
-        if ((tlb->G == 1 || tlb_mmid == MMID) && VPN == tag && !tlb->EHINV) {
-            if (matching)
-                *matching = i;
-            return true;
-        }
-    }
-
-    return false;
-}
-
-void r4k_invalidate_tlb(CPUMIPSState *env, int idx, int use_extra)
-{
-    CPUState *cs = env_cpu(env);
-    r4k_tlb_t *tlb;
-    target_ulong addr;
-    target_ulong end;
-    uint16_t ASID = env->CP0_EntryHi & env->CP0_EntryHi_ASID_mask;
-    uint32_t MMID = env->CP0_MemoryMapID;
-    bool mi = !!((env->CP0_Config5 >> CP0C5_MI) & 1);
-    uint32_t tlb_mmid;
-    target_ulong mask;
-
-    MMID = mi ? MMID : (uint32_t) ASID;
-
-    tlb = &env->tlb->mmu.r4k.tlb[idx];
-    /*
-     * The qemu TLB is flushed when the ASID/MMID changes, so no need to
-     * flush these entries again.
-     */
-    tlb_mmid = mi ? tlb->MMID : (uint32_t) tlb->ASID;
-    if (tlb->G == 0 && tlb_mmid != MMID) {
-        return;
-    }
-
-    if (use_extra && env->tlb->tlb_in_use < MIPS_TLB_MAX) {
-        /*
-         * For tlbwr, we can shadow the discarded entry into
-         * a new (fake) TLB entry, as long as the guest can not
-         * tell that it's there.
-         */
-        env->tlb->mmu.r4k.tlb[env->tlb->tlb_in_use] = *tlb;
-        env->tlb->tlb_in_use++;
-        return;
-    }
-
-    /* 1k pages are not supported. */
-    mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
-    if (tlb->V0) {
-        addr = tlb->VPN & ~mask;
-#if defined(TARGET_MIPS64)
-        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
-            addr |= 0x3FFFFF0000000000ULL;
-        }
-#endif
-        end = addr | (mask >> 1);
-        while (addr < end) {
-            tlb_flush_page(cs, addr);
-            addr += TARGET_PAGE_SIZE;
-        }
-    }
-    if (tlb->V1) {
-        addr = (tlb->VPN & ~mask) | ((mask >> 1) + 1);
-#if defined(TARGET_MIPS64)
-        if (addr >= (0xFFFFFFFF80000000ULL & env->SEGMask)) {
-            addr |= 0x3FFFFF0000000000ULL;
-        }
-#endif
-        end = addr | mask;
-        while (addr - 1 < end) {
-            tlb_flush_page(cs, addr);
-            addr += TARGET_PAGE_SIZE;
-        }
-    }
-}
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/mips/trace-events b/target/mips/trace-events
deleted file mode 100644
index ba87fe6062a..00000000000
--- a/target/mips/trace-events
+++ /dev/null
@@ -1,5 +0,0 @@
-# See docs/devel/tracing.txt for syntax documentation.
-
-# translate.c
-mips_translate_c0(const char *instr, const char *rn, int reg, int sel) "%s %s (reg %d sel %d)"
-mips_translate_tr(const char *instr, int rt, int u, int sel, int h) "%s (reg %d u %d sel %d h %d)"
diff --git a/target/mips/trace.h b/target/mips/trace.h
deleted file mode 100644
index f25b88ca6f9..00000000000
--- a/target/mips/trace.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "trace/trace-target_mips.h"
diff --git a/target/mips/translate.c b/target/mips/translate.c
deleted file mode 100644
index 52863e39c5f..00000000000
--- a/target/mips/translate.c
+++ /dev/null
@@ -1,26788 +0,0 @@
-/*
- *  MIPS emulation for QEMU - main translation routines
- *
- *  Copyright (c) 2004-2005 Jocelyn Mayer
- *  Copyright (c) 2006 Marius Groeger (FPU operations)
- *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
- *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
- *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
- *  Copyright (c) 2020 Philippe Mathieu-Daudé
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "tcg/tcg-op.h"
-#include "exec/translator.h"
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "semihosting/semihost.h"
-
-#include "target/mips/trace.h"
-#include "trace-tcg.h"
-#include "exec/translator.h"
-#include "exec/log.h"
-#include "exec/log_instr.h"
-#include "qemu/qemu-print.h"
-#include "fpu_helper.h"
-#include "translate.h"
-#ifdef TARGET_CHERI
-#include "cheri-helper-utils.h"
-#endif
-
-
-enum {
-    /* indirect opcode tables */
-    OPC_SPECIAL  = (0x00 << 26),
-    OPC_REGIMM   = (0x01 << 26),
-    OPC_CP0      = (0x10 << 26),
-    OPC_CP2      = (0x12 << 26),
-    OPC_CP3      = (0x13 << 26),
-    OPC_SPECIAL2 = (0x1C << 26),
-    OPC_SPECIAL3 = (0x1F << 26),
-    /* arithmetic with immediate */
-    OPC_ADDI     = (0x08 << 26),
-    OPC_ADDIU    = (0x09 << 26),
-    OPC_SLTI     = (0x0A << 26),
-    OPC_SLTIU    = (0x0B << 26),
-    /* logic with immediate */
-    OPC_ANDI     = (0x0C << 26),
-    OPC_ORI      = (0x0D << 26),
-    OPC_XORI     = (0x0E << 26),
-    OPC_LUI      = (0x0F << 26),
-    /* arithmetic with immediate */
-    OPC_DADDI    = (0x18 << 26),
-    OPC_DADDIU   = (0x19 << 26),
-    /* Jump and branches */
-    OPC_J        = (0x02 << 26),
-    OPC_JAL      = (0x03 << 26),
-    OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
-    OPC_BEQL     = (0x14 << 26),
-    OPC_BNE      = (0x05 << 26),
-    OPC_BNEL     = (0x15 << 26),
-    OPC_BLEZ     = (0x06 << 26),
-    OPC_BLEZL    = (0x16 << 26),
-    OPC_BGTZ     = (0x07 << 26),
-    OPC_BGTZL    = (0x17 << 26),
-    OPC_JALX     = (0x1D << 26),
-    OPC_DAUI     = (0x1D << 26),
-    /* Load and stores */
-    OPC_LDL      = (0x1A << 26),
-    OPC_LDR      = (0x1B << 26),
-    OPC_LB       = (0x20 << 26),
-    OPC_LH       = (0x21 << 26),
-    OPC_LWL      = (0x22 << 26),
-    OPC_LW       = (0x23 << 26),
-    OPC_LWPC     = OPC_LW | 0x5,
-    OPC_LBU      = (0x24 << 26),
-    OPC_LHU      = (0x25 << 26),
-    OPC_LWR      = (0x26 << 26),
-    OPC_LWU      = (0x27 << 26),
-    OPC_SB       = (0x28 << 26),
-    OPC_SH       = (0x29 << 26),
-    OPC_SWL      = (0x2A << 26),
-    OPC_SW       = (0x2B << 26),
-    OPC_SDL      = (0x2C << 26),
-    OPC_SDR      = (0x2D << 26),
-    OPC_SWR      = (0x2E << 26),
-    OPC_LL       = (0x30 << 26),
-    OPC_LLD      = (0x34 << 26),
-    OPC_LD       = (0x37 << 26),
-    OPC_LDPC     = OPC_LD | 0x5,
-    OPC_SC       = (0x38 << 26),
-    OPC_SCD      = (0x3C << 26),
-    OPC_SD       = (0x3F << 26),
-    /* Floating point load/store */
-    OPC_LWC1     = (0x31 << 26),
-    OPC_LWC2     = (0x32 << 26),
-    OPC_LDC1     = (0x35 << 26),
-    OPC_LDC2     = (0x36 << 26),
-    OPC_SWC1     = (0x39 << 26),
-    OPC_SWC2     = (0x3A << 26),
-    OPC_SDC1     = (0x3D << 26),
-    OPC_SDC2     = (0x3E << 26),
-    /* Compact Branches */
-    OPC_BLEZALC  = (0x06 << 26),
-    OPC_BGEZALC  = (0x06 << 26),
-    OPC_BGEUC    = (0x06 << 26),
-    OPC_BGTZALC  = (0x07 << 26),
-    OPC_BLTZALC  = (0x07 << 26),
-    OPC_BLTUC    = (0x07 << 26),
-    OPC_BOVC     = (0x08 << 26),
-    OPC_BEQZALC  = (0x08 << 26),
-    OPC_BEQC     = (0x08 << 26),
-    OPC_BLEZC    = (0x16 << 26),
-    OPC_BGEZC    = (0x16 << 26),
-    OPC_BGEC     = (0x16 << 26),
-    OPC_BGTZC    = (0x17 << 26),
-    OPC_BLTZC    = (0x17 << 26),
-    OPC_BLTC     = (0x17 << 26),
-    OPC_BNVC     = (0x18 << 26),
-    OPC_BNEZALC  = (0x18 << 26),
-    OPC_BNEC     = (0x18 << 26),
-    OPC_BC       = (0x32 << 26),
-#if defined(TARGET_CHERI)
-    /* Load Via Capability Register */
-    OPC_CLOAD    = (0x32 << 26),
-    /* Load Capability Register */
-    OPC_CLOADC   = (0x36 << 26),
-    /* Store Via Capability Register */
-    OPC_CSTORE   = (0x3A << 26),
-    /* Store Capability Register */
-    OPC_CSTOREC  = (0x3E << 26),
-#endif /* TARGET_CHERI */
-    OPC_BEQZC    = (0x36 << 26),
-    OPC_JIC      = (0x36 << 26),
-    OPC_BALC     = (0x3A << 26),
-    OPC_BNEZC    = (0x3E << 26),
-    OPC_JIALC    = (0x3E << 26),
-    /* MDMX ASE specific */
-    OPC_MDMX     = (0x1E << 26),
-    /* Cache and prefetch */
-    OPC_CACHE    = (0x2F << 26),
-    OPC_PREF     = (0x33 << 26),
-    /* PC-relative address computation / loads */
-    OPC_PCREL    = (0x3B << 26),
-
-// XXXAR: experimental CHERI instructions
-#if defined(TARGET_CHERI)
-    // For the new experimental CLC we reuse the JALX (since mode switch to
-    // micromips is not supported) and DAUI (add upper immediate, MIPS64R6 only)
-    OPC_CLOADC_LargeImm = OPC_JALX,
-    // For the new large immediate CStoreC (may not be needed but symmetry is
-    // nice) we use
-    OPC_CSTOREC_LargeImm = OPC_MDMX,
-
-    // why can't this table be ordered by opcode number rather than group?
-    // would make it a lot easier to find conflicting values
-#endif
-};
-
-/* PC-relative address computation / loads  */
-#define MASK_OPC_PCREL_TOP2BITS(op) (MASK_OP_MAJOR(op) | (op & (3 << 19)))
-#define MASK_OPC_PCREL_TOP5BITS(op) (MASK_OP_MAJOR(op) | (op & (0x1f << 16)))
-enum {
-    /* Instructions determined by bits 19 and 20 */
-    OPC_ADDIUPC = OPC_PCREL | (0 << 19),
-    R6_OPC_LWPC = OPC_PCREL | (1 << 19),
-    OPC_LWUPC   = OPC_PCREL | (2 << 19),
-
-    /* Instructions determined by bits 16 ... 20 */
-    OPC_AUIPC   = OPC_PCREL | (0x1e << 16),
-    OPC_ALUIPC  = OPC_PCREL | (0x1f << 16),
-
-    /* Other */
-    R6_OPC_LDPC = OPC_PCREL | (6 << 18),
-};
-
-/* MIPS special opcodes */
-#define MASK_SPECIAL(op)            (MASK_OP_MAJOR(op) | (op & 0x3F))
-
-enum {
-    /* Shifts */
-    OPC_SLL      = 0x00 | OPC_SPECIAL,
-    /* NOP is SLL r0, r0, 0   */
-    /* SSNOP is SLL r0, r0, 1 */
-    /* EHB is SLL r0, r0, 3 */
-    OPC_SRL      = 0x02 | OPC_SPECIAL, /* also ROTR */
-    OPC_ROTR     = OPC_SRL | (1 << 21),
-    OPC_SRA      = 0x03 | OPC_SPECIAL,
-    OPC_SLLV     = 0x04 | OPC_SPECIAL,
-    OPC_SRLV     = 0x06 | OPC_SPECIAL, /* also ROTRV */
-    OPC_ROTRV    = OPC_SRLV | (1 << 6),
-    OPC_SRAV     = 0x07 | OPC_SPECIAL,
-    OPC_DSLLV    = 0x14 | OPC_SPECIAL,
-    OPC_DSRLV    = 0x16 | OPC_SPECIAL, /* also DROTRV */
-    OPC_DROTRV   = OPC_DSRLV | (1 << 6),
-    OPC_DSRAV    = 0x17 | OPC_SPECIAL,
-    OPC_DSLL     = 0x38 | OPC_SPECIAL,
-    OPC_DSRL     = 0x3A | OPC_SPECIAL, /* also DROTR */
-    OPC_DROTR    = OPC_DSRL | (1 << 21),
-    OPC_DSRA     = 0x3B | OPC_SPECIAL,
-    OPC_DSLL32   = 0x3C | OPC_SPECIAL,
-    OPC_DSRL32   = 0x3E | OPC_SPECIAL, /* also DROTR32 */
-    OPC_DROTR32  = OPC_DSRL32 | (1 << 21),
-    OPC_DSRA32   = 0x3F | OPC_SPECIAL,
-    /* Multiplication / division */
-    OPC_MULT     = 0x18 | OPC_SPECIAL,
-    OPC_MULTU    = 0x19 | OPC_SPECIAL,
-    OPC_DIV      = 0x1A | OPC_SPECIAL,
-    OPC_DIVU     = 0x1B | OPC_SPECIAL,
-    OPC_DMULT    = 0x1C | OPC_SPECIAL,
-    OPC_DMULTU   = 0x1D | OPC_SPECIAL,
-    OPC_DDIV     = 0x1E | OPC_SPECIAL,
-    OPC_DDIVU    = 0x1F | OPC_SPECIAL,
-
-    /* 2 registers arithmetic / logic */
-    OPC_ADD      = 0x20 | OPC_SPECIAL,
-    OPC_ADDU     = 0x21 | OPC_SPECIAL,
-    OPC_SUB      = 0x22 | OPC_SPECIAL,
-    OPC_SUBU     = 0x23 | OPC_SPECIAL,
-    OPC_AND      = 0x24 | OPC_SPECIAL,
-    OPC_OR       = 0x25 | OPC_SPECIAL,
-    OPC_XOR      = 0x26 | OPC_SPECIAL,
-    OPC_NOR      = 0x27 | OPC_SPECIAL,
-    OPC_SLT      = 0x2A | OPC_SPECIAL,
-    OPC_SLTU     = 0x2B | OPC_SPECIAL,
-    OPC_DADD     = 0x2C | OPC_SPECIAL,
-    OPC_DADDU    = 0x2D | OPC_SPECIAL,
-    OPC_DSUB     = 0x2E | OPC_SPECIAL,
-    OPC_DSUBU    = 0x2F | OPC_SPECIAL,
-    /* Jumps */
-    OPC_JR       = 0x08 | OPC_SPECIAL, /* Also JR.HB */
-    OPC_JALR     = 0x09 | OPC_SPECIAL, /* Also JALR.HB */
-    /* Traps */
-    OPC_TGE      = 0x30 | OPC_SPECIAL,
-    OPC_TGEU     = 0x31 | OPC_SPECIAL,
-    OPC_TLT      = 0x32 | OPC_SPECIAL,
-    OPC_TLTU     = 0x33 | OPC_SPECIAL,
-    OPC_TEQ      = 0x34 | OPC_SPECIAL,
-    OPC_TNE      = 0x36 | OPC_SPECIAL,
-    /* HI / LO registers load & stores */
-    OPC_MFHI     = 0x10 | OPC_SPECIAL,
-    OPC_MTHI     = 0x11 | OPC_SPECIAL,
-    OPC_MFLO     = 0x12 | OPC_SPECIAL,
-    OPC_MTLO     = 0x13 | OPC_SPECIAL,
-    /* Conditional moves */
-    OPC_MOVZ     = 0x0A | OPC_SPECIAL,
-    OPC_MOVN     = 0x0B | OPC_SPECIAL,
-
-    OPC_SELEQZ   = 0x35 | OPC_SPECIAL,
-    OPC_SELNEZ   = 0x37 | OPC_SPECIAL,
-
-    OPC_MOVCI    = 0x01 | OPC_SPECIAL,
-
-    /* Special */
-    OPC_PMON     = 0x05 | OPC_SPECIAL, /* unofficial */
-    OPC_SYSCALL  = 0x0C | OPC_SPECIAL,
-    OPC_BREAK    = 0x0D | OPC_SPECIAL,
-    OPC_SPIM     = 0x0E | OPC_SPECIAL, /* unofficial */
-    OPC_SYNC     = 0x0F | OPC_SPECIAL,
-
-    OPC_SPECIAL28_RESERVED = 0x28 | OPC_SPECIAL,
-    OPC_SPECIAL29_RESERVED = 0x29 | OPC_SPECIAL,
-    OPC_SPECIAL39_RESERVED = 0x39 | OPC_SPECIAL,
-    OPC_SPECIAL3D_RESERVED = 0x3D | OPC_SPECIAL,
-};
-
-/*
- * R6 Multiply and Divide instructions have the same opcode
- * and function field as legacy OPC_MULT[U]/OPC_DIV[U]
- */
-#define MASK_R6_MULDIV(op)          (MASK_SPECIAL(op) | (op & (0x7ff)))
-
-enum {
-    R6_OPC_MUL   = OPC_MULT  | (2 << 6),
-    R6_OPC_MUH   = OPC_MULT  | (3 << 6),
-    R6_OPC_MULU  = OPC_MULTU | (2 << 6),
-    R6_OPC_MUHU  = OPC_MULTU | (3 << 6),
-    R6_OPC_DIV   = OPC_DIV   | (2 << 6),
-    R6_OPC_MOD   = OPC_DIV   | (3 << 6),
-    R6_OPC_DIVU  = OPC_DIVU  | (2 << 6),
-    R6_OPC_MODU  = OPC_DIVU  | (3 << 6),
-
-    R6_OPC_DMUL   = OPC_DMULT  | (2 << 6),
-    R6_OPC_DMUH   = OPC_DMULT  | (3 << 6),
-    R6_OPC_DMULU  = OPC_DMULTU | (2 << 6),
-    R6_OPC_DMUHU  = OPC_DMULTU | (3 << 6),
-    R6_OPC_DDIV   = OPC_DDIV   | (2 << 6),
-    R6_OPC_DMOD   = OPC_DDIV   | (3 << 6),
-    R6_OPC_DDIVU  = OPC_DDIVU  | (2 << 6),
-    R6_OPC_DMODU  = OPC_DDIVU  | (3 << 6),
-
-    R6_OPC_CLZ      = 0x10 | OPC_SPECIAL,
-    R6_OPC_CLO      = 0x11 | OPC_SPECIAL,
-    R6_OPC_DCLZ     = 0x12 | OPC_SPECIAL,
-    R6_OPC_DCLO     = 0x13 | OPC_SPECIAL,
-    R6_OPC_SDBBP    = 0x0e | OPC_SPECIAL,
-};
-
-/* Multiplication variants of the vr54xx. */
-#define MASK_MUL_VR54XX(op)         (MASK_SPECIAL(op) | (op & (0x1F << 6)))
-
-enum {
-    OPC_VR54XX_MULS    = (0x03 << 6) | OPC_MULT,
-    OPC_VR54XX_MULSU   = (0x03 << 6) | OPC_MULTU,
-    OPC_VR54XX_MACC    = (0x05 << 6) | OPC_MULT,
-    OPC_VR54XX_MACCU   = (0x05 << 6) | OPC_MULTU,
-    OPC_VR54XX_MSAC    = (0x07 << 6) | OPC_MULT,
-    OPC_VR54XX_MSACU   = (0x07 << 6) | OPC_MULTU,
-    OPC_VR54XX_MULHI   = (0x09 << 6) | OPC_MULT,
-    OPC_VR54XX_MULHIU  = (0x09 << 6) | OPC_MULTU,
-    OPC_VR54XX_MULSHI  = (0x0B << 6) | OPC_MULT,
-    OPC_VR54XX_MULSHIU = (0x0B << 6) | OPC_MULTU,
-    OPC_VR54XX_MACCHI  = (0x0D << 6) | OPC_MULT,
-    OPC_VR54XX_MACCHIU = (0x0D << 6) | OPC_MULTU,
-    OPC_VR54XX_MSACHI  = (0x0F << 6) | OPC_MULT,
-    OPC_VR54XX_MSACHIU = (0x0F << 6) | OPC_MULTU,
-};
-
-/* REGIMM (rt field) opcodes */
-#define MASK_REGIMM(op)             (MASK_OP_MAJOR(op) | (op & (0x1F << 16)))
-
-enum {
-    OPC_BLTZ     = (0x00 << 16) | OPC_REGIMM,
-    OPC_BLTZL    = (0x02 << 16) | OPC_REGIMM,
-    OPC_BGEZ     = (0x01 << 16) | OPC_REGIMM,
-    OPC_BGEZL    = (0x03 << 16) | OPC_REGIMM,
-    OPC_BLTZAL   = (0x10 << 16) | OPC_REGIMM,
-    OPC_BLTZALL  = (0x12 << 16) | OPC_REGIMM,
-    OPC_BGEZAL   = (0x11 << 16) | OPC_REGIMM,
-    OPC_BGEZALL  = (0x13 << 16) | OPC_REGIMM,
-    OPC_TGEI     = (0x08 << 16) | OPC_REGIMM,
-    OPC_TGEIU    = (0x09 << 16) | OPC_REGIMM,
-    OPC_TLTI     = (0x0A << 16) | OPC_REGIMM,
-    OPC_TLTIU    = (0x0B << 16) | OPC_REGIMM,
-    OPC_TEQI     = (0x0C << 16) | OPC_REGIMM,
-    OPC_TNEI     = (0x0E << 16) | OPC_REGIMM,
-    OPC_SIGRIE   = (0x17 << 16) | OPC_REGIMM,
-    OPC_SYNCI    = (0x1F << 16) | OPC_REGIMM,
-
-    OPC_DAHI     = (0x06 << 16) | OPC_REGIMM,
-    OPC_DATI     = (0x1e << 16) | OPC_REGIMM,
-};
-
-/* Special2 opcodes */
-#define MASK_SPECIAL2(op)           (MASK_OP_MAJOR(op) | (op & 0x3F))
-
-enum {
-    /* Multiply & xxx operations */
-    OPC_MADD     = 0x00 | OPC_SPECIAL2,
-    OPC_MADDU    = 0x01 | OPC_SPECIAL2,
-    OPC_MUL      = 0x02 | OPC_SPECIAL2,
-    OPC_MSUB     = 0x04 | OPC_SPECIAL2,
-    OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
-    /* Loongson 2F */
-    OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
-    OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
-    OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
-    OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
-    OPC_DIV_G_2F    = 0x14 | OPC_SPECIAL2,
-    OPC_DDIV_G_2F   = 0x15 | OPC_SPECIAL2,
-    OPC_DIVU_G_2F   = 0x16 | OPC_SPECIAL2,
-    OPC_DDIVU_G_2F  = 0x17 | OPC_SPECIAL2,
-    OPC_MOD_G_2F    = 0x1c | OPC_SPECIAL2,
-    OPC_DMOD_G_2F   = 0x1d | OPC_SPECIAL2,
-    OPC_MODU_G_2F   = 0x1e | OPC_SPECIAL2,
-    OPC_DMODU_G_2F  = 0x1f | OPC_SPECIAL2,
-    /* Misc */
-    OPC_CLZ      = 0x20 | OPC_SPECIAL2,
-    OPC_CLO      = 0x21 | OPC_SPECIAL2,
-    OPC_DCLZ     = 0x24 | OPC_SPECIAL2,
-    OPC_DCLO     = 0x25 | OPC_SPECIAL2,
-    /* Special */
-    OPC_SDBBP    = 0x3F | OPC_SPECIAL2,
-};
-
-/* Special3 opcodes */
-#define MASK_SPECIAL3(op)           (MASK_OP_MAJOR(op) | (op & 0x3F))
-
-enum {
-    OPC_EXT      = 0x00 | OPC_SPECIAL3,
-    OPC_DEXTM    = 0x01 | OPC_SPECIAL3,
-    OPC_DEXTU    = 0x02 | OPC_SPECIAL3,
-    OPC_DEXT     = 0x03 | OPC_SPECIAL3,
-    OPC_INS      = 0x04 | OPC_SPECIAL3,
-    OPC_DINSM    = 0x05 | OPC_SPECIAL3,
-    OPC_DINSU    = 0x06 | OPC_SPECIAL3,
-    OPC_DINS     = 0x07 | OPC_SPECIAL3,
-    OPC_FORK     = 0x08 | OPC_SPECIAL3,
-    OPC_YIELD    = 0x09 | OPC_SPECIAL3,
-    OPC_BSHFL    = 0x20 | OPC_SPECIAL3,
-    OPC_DBSHFL   = 0x24 | OPC_SPECIAL3,
-    OPC_RDHWR    = 0x3B | OPC_SPECIAL3,
-    OPC_GINV     = 0x3D | OPC_SPECIAL3,
-
-    /* Loongson 2E */
-    OPC_MULT_G_2E   = 0x18 | OPC_SPECIAL3,
-    OPC_MULTU_G_2E  = 0x19 | OPC_SPECIAL3,
-    OPC_DIV_G_2E    = 0x1A | OPC_SPECIAL3,
-    OPC_DIVU_G_2E   = 0x1B | OPC_SPECIAL3,
-    OPC_DMULT_G_2E  = 0x1C | OPC_SPECIAL3,
-    OPC_DMULTU_G_2E = 0x1D | OPC_SPECIAL3,
-    OPC_DDIV_G_2E   = 0x1E | OPC_SPECIAL3,
-    OPC_DDIVU_G_2E  = 0x1F | OPC_SPECIAL3,
-    OPC_MOD_G_2E    = 0x22 | OPC_SPECIAL3,
-    OPC_MODU_G_2E   = 0x23 | OPC_SPECIAL3,
-    OPC_DMOD_G_2E   = 0x26 | OPC_SPECIAL3,
-    OPC_DMODU_G_2E  = 0x27 | OPC_SPECIAL3,
-
-    /* MIPS DSP Load */
-    OPC_LX_DSP         = 0x0A | OPC_SPECIAL3,
-    /* MIPS DSP Arithmetic */
-    OPC_ADDU_QB_DSP    = 0x10 | OPC_SPECIAL3,
-    OPC_ADDU_OB_DSP    = 0x14 | OPC_SPECIAL3,
-    OPC_ABSQ_S_PH_DSP  = 0x12 | OPC_SPECIAL3,
-    OPC_ABSQ_S_QH_DSP  = 0x16 | OPC_SPECIAL3,
-    /* OPC_ADDUH_QB_DSP is same as OPC_MULT_G_2E.  */
-    /* OPC_ADDUH_QB_DSP   = 0x18 | OPC_SPECIAL3,  */
-    OPC_CMPU_EQ_QB_DSP = 0x11 | OPC_SPECIAL3,
-    OPC_CMPU_EQ_OB_DSP = 0x15 | OPC_SPECIAL3,
-    /* MIPS DSP GPR-Based Shift Sub-class */
-    OPC_SHLL_QB_DSP    = 0x13 | OPC_SPECIAL3,
-    OPC_SHLL_OB_DSP    = 0x17 | OPC_SPECIAL3,
-    /* MIPS DSP Multiply Sub-class insns */
-    /* OPC_MUL_PH_DSP is same as OPC_ADDUH_QB_DSP.  */
-    /* OPC_MUL_PH_DSP     = 0x18 | OPC_SPECIAL3,  */
-    OPC_DPA_W_PH_DSP   = 0x30 | OPC_SPECIAL3,
-    OPC_DPAQ_W_QH_DSP  = 0x34 | OPC_SPECIAL3,
-    /* DSP Bit/Manipulation Sub-class */
-    OPC_INSV_DSP       = 0x0C | OPC_SPECIAL3,
-    OPC_DINSV_DSP      = 0x0D | OPC_SPECIAL3,
-    /* MIPS DSP Append Sub-class */
-    OPC_APPEND_DSP     = 0x31 | OPC_SPECIAL3,
-    OPC_DAPPEND_DSP    = 0x35 | OPC_SPECIAL3,
-    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
-    OPC_EXTR_W_DSP     = 0x38 | OPC_SPECIAL3,
-    OPC_DEXTR_W_DSP    = 0x3C | OPC_SPECIAL3,
-
-    /* EVA */
-    OPC_LWLE           = 0x19 | OPC_SPECIAL3,
-    OPC_LWRE           = 0x1A | OPC_SPECIAL3,
-    OPC_CACHEE         = 0x1B | OPC_SPECIAL3,
-    OPC_SBE            = 0x1C | OPC_SPECIAL3,
-    OPC_SHE            = 0x1D | OPC_SPECIAL3,
-    OPC_SCE            = 0x1E | OPC_SPECIAL3,
-    OPC_SWE            = 0x1F | OPC_SPECIAL3,
-    OPC_SWLE           = 0x21 | OPC_SPECIAL3,
-    OPC_SWRE           = 0x22 | OPC_SPECIAL3,
-    OPC_PREFE          = 0x23 | OPC_SPECIAL3,
-    OPC_LBUE           = 0x28 | OPC_SPECIAL3,
-    OPC_LHUE           = 0x29 | OPC_SPECIAL3,
-    OPC_LBE            = 0x2C | OPC_SPECIAL3,
-    OPC_LHE            = 0x2D | OPC_SPECIAL3,
-    OPC_LLE            = 0x2E | OPC_SPECIAL3,
-    OPC_LWE            = 0x2F | OPC_SPECIAL3,
-
-    /* R6 */
-    R6_OPC_PREF        = 0x35 | OPC_SPECIAL3,
-    R6_OPC_CACHE       = 0x25 | OPC_SPECIAL3,
-    R6_OPC_LL          = 0x36 | OPC_SPECIAL3,
-    R6_OPC_SC          = 0x26 | OPC_SPECIAL3,
-    R6_OPC_LLD         = 0x37 | OPC_SPECIAL3,
-    R6_OPC_SCD         = 0x27 | OPC_SPECIAL3,
-};
-
-/* Loongson EXT load/store quad word opcodes */
-#define MASK_LOONGSON_GSLSQ(op)           (MASK_OP_MAJOR(op) | (op & 0x8020))
-enum {
-    OPC_GSLQ        = 0x0020 | OPC_LWC2,
-    OPC_GSLQC1      = 0x8020 | OPC_LWC2,
-    OPC_GSSHFL      = OPC_LWC2,
-    OPC_GSSQ        = 0x0020 | OPC_SWC2,
-    OPC_GSSQC1      = 0x8020 | OPC_SWC2,
-    OPC_GSSHFS      = OPC_SWC2,
-};
-
-/* Loongson EXT shifted load/store opcodes */
-#define MASK_LOONGSON_GSSHFLS(op)         (MASK_OP_MAJOR(op) | (op & 0xc03f))
-enum {
-    OPC_GSLWLC1     = 0x4 | OPC_GSSHFL,
-    OPC_GSLWRC1     = 0x5 | OPC_GSSHFL,
-    OPC_GSLDLC1     = 0x6 | OPC_GSSHFL,
-    OPC_GSLDRC1     = 0x7 | OPC_GSSHFL,
-    OPC_GSSWLC1     = 0x4 | OPC_GSSHFS,
-    OPC_GSSWRC1     = 0x5 | OPC_GSSHFS,
-    OPC_GSSDLC1     = 0x6 | OPC_GSSHFS,
-    OPC_GSSDRC1     = 0x7 | OPC_GSSHFS,
-};
-
-/* Loongson EXT LDC2/SDC2 opcodes */
-#define MASK_LOONGSON_LSDC2(op)           (MASK_OP_MAJOR(op) | (op & 0x7))
-
-enum {
-    OPC_GSLBX      = 0x0 | OPC_LDC2,
-    OPC_GSLHX      = 0x1 | OPC_LDC2,
-    OPC_GSLWX      = 0x2 | OPC_LDC2,
-    OPC_GSLDX      = 0x3 | OPC_LDC2,
-    OPC_GSLWXC1    = 0x6 | OPC_LDC2,
-    OPC_GSLDXC1    = 0x7 | OPC_LDC2,
-    OPC_GSSBX      = 0x0 | OPC_SDC2,
-    OPC_GSSHX      = 0x1 | OPC_SDC2,
-    OPC_GSSWX      = 0x2 | OPC_SDC2,
-    OPC_GSSDX      = 0x3 | OPC_SDC2,
-    OPC_GSSWXC1    = 0x6 | OPC_SDC2,
-    OPC_GSSDXC1    = 0x7 | OPC_SDC2,
-};
-
-/* BSHFL opcodes */
-#define MASK_BSHFL(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-
-enum {
-    OPC_WSBH      = (0x02 << 6) | OPC_BSHFL,
-    OPC_SEB       = (0x10 << 6) | OPC_BSHFL,
-    OPC_SEH       = (0x18 << 6) | OPC_BSHFL,
-    OPC_ALIGN     = (0x08 << 6) | OPC_BSHFL, /* 010.bp (010.00 to 010.11) */
-    OPC_ALIGN_1   = (0x09 << 6) | OPC_BSHFL,
-    OPC_ALIGN_2   = (0x0A << 6) | OPC_BSHFL,
-    OPC_ALIGN_3   = (0x0B << 6) | OPC_BSHFL,
-    OPC_BITSWAP   = (0x00 << 6) | OPC_BSHFL  /* 00000 */
-};
-
-/* DBSHFL opcodes */
-#define MASK_DBSHFL(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-
-enum {
-    OPC_DSBH       = (0x02 << 6) | OPC_DBSHFL,
-    OPC_DSHD       = (0x05 << 6) | OPC_DBSHFL,
-    OPC_DALIGN     = (0x08 << 6) | OPC_DBSHFL, /* 01.bp (01.000 to 01.111) */
-    OPC_DALIGN_1   = (0x09 << 6) | OPC_DBSHFL,
-    OPC_DALIGN_2   = (0x0A << 6) | OPC_DBSHFL,
-    OPC_DALIGN_3   = (0x0B << 6) | OPC_DBSHFL,
-    OPC_DALIGN_4   = (0x0C << 6) | OPC_DBSHFL,
-    OPC_DALIGN_5   = (0x0D << 6) | OPC_DBSHFL,
-    OPC_DALIGN_6   = (0x0E << 6) | OPC_DBSHFL,
-    OPC_DALIGN_7   = (0x0F << 6) | OPC_DBSHFL,
-    OPC_DBITSWAP   = (0x00 << 6) | OPC_DBSHFL, /* 00000 */
-};
-
-/* MIPS DSP REGIMM opcodes */
-enum {
-    OPC_BPOSGE32 = (0x1C << 16) | OPC_REGIMM,
-    OPC_BPOSGE64 = (0x1D << 16) | OPC_REGIMM,
-};
-
-#define MASK_LX(op)                 (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-/* MIPS DSP Load */
-enum {
-    OPC_LBUX = (0x06 << 6) | OPC_LX_DSP,
-    OPC_LHX  = (0x04 << 6) | OPC_LX_DSP,
-    OPC_LWX  = (0x00 << 6) | OPC_LX_DSP,
-    OPC_LDX = (0x08 << 6) | OPC_LX_DSP,
-};
-
-#define MASK_ADDU_QB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_ADDQ_PH        = (0x0A << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDQ_S_PH      = (0x0E << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDQ_S_W       = (0x16 << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDU_QB        = (0x00 << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDU_S_QB      = (0x04 << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDU_PH        = (0x08 << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDU_S_PH      = (0x0C << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBQ_PH        = (0x0B << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBQ_S_PH      = (0x0F << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBQ_S_W       = (0x17 << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBU_QB        = (0x01 << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBU_S_QB      = (0x05 << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBU_PH        = (0x09 << 6) | OPC_ADDU_QB_DSP,
-    OPC_SUBU_S_PH      = (0x0D << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDSC          = (0x10 << 6) | OPC_ADDU_QB_DSP,
-    OPC_ADDWC          = (0x11 << 6) | OPC_ADDU_QB_DSP,
-    OPC_MODSUB         = (0x12 << 6) | OPC_ADDU_QB_DSP,
-    OPC_RADDU_W_QB     = (0x14 << 6) | OPC_ADDU_QB_DSP,
-    /* MIPS DSP Multiply Sub-class insns */
-    OPC_MULEU_S_PH_QBL = (0x06 << 6) | OPC_ADDU_QB_DSP,
-    OPC_MULEU_S_PH_QBR = (0x07 << 6) | OPC_ADDU_QB_DSP,
-    OPC_MULQ_RS_PH     = (0x1F << 6) | OPC_ADDU_QB_DSP,
-    OPC_MULEQ_S_W_PHL  = (0x1C << 6) | OPC_ADDU_QB_DSP,
-    OPC_MULEQ_S_W_PHR  = (0x1D << 6) | OPC_ADDU_QB_DSP,
-    OPC_MULQ_S_PH      = (0x1E << 6) | OPC_ADDU_QB_DSP,
-};
-
-#define OPC_ADDUH_QB_DSP OPC_MULT_G_2E
-#define MASK_ADDUH_QB(op)           (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_ADDUH_QB   = (0x00 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_ADDUH_R_QB = (0x02 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_ADDQH_PH   = (0x08 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_ADDQH_R_PH = (0x0A << 6) | OPC_ADDUH_QB_DSP,
-    OPC_ADDQH_W    = (0x10 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_ADDQH_R_W  = (0x12 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBUH_QB   = (0x01 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBUH_R_QB = (0x03 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBQH_PH   = (0x09 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBQH_R_PH = (0x0B << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBQH_W    = (0x11 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_SUBQH_R_W  = (0x13 << 6) | OPC_ADDUH_QB_DSP,
-    /* MIPS DSP Multiply Sub-class insns */
-    OPC_MUL_PH     = (0x0C << 6) | OPC_ADDUH_QB_DSP,
-    OPC_MUL_S_PH   = (0x0E << 6) | OPC_ADDUH_QB_DSP,
-    OPC_MULQ_S_W   = (0x16 << 6) | OPC_ADDUH_QB_DSP,
-    OPC_MULQ_RS_W  = (0x17 << 6) | OPC_ADDUH_QB_DSP,
-};
-
-#define MASK_ABSQ_S_PH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_ABSQ_S_QB       = (0x01 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_ABSQ_S_PH       = (0x09 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_ABSQ_S_W        = (0x11 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQ_W_PHL    = (0x0C << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQ_W_PHR    = (0x0D << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQU_PH_QBL  = (0x04 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQU_PH_QBR  = (0x05 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQU_PH_QBLA = (0x06 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEQU_PH_QBRA = (0x07 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEU_PH_QBL   = (0x1C << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEU_PH_QBR   = (0x1D << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEU_PH_QBLA  = (0x1E << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_PRECEU_PH_QBRA  = (0x1F << 6) | OPC_ABSQ_S_PH_DSP,
-    /* DSP Bit/Manipulation Sub-class */
-    OPC_BITREV          = (0x1B << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_REPL_QB         = (0x02 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_REPLV_QB        = (0x03 << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_REPL_PH         = (0x0A << 6) | OPC_ABSQ_S_PH_DSP,
-    OPC_REPLV_PH        = (0x0B << 6) | OPC_ABSQ_S_PH_DSP,
-};
-
-#define MASK_CMPU_EQ_QB(op)         (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_PRECR_QB_PH      = (0x0D << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECRQ_QB_PH     = (0x0C << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECR_SRA_PH_W   = (0x1E << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECR_SRA_R_PH_W = (0x1F << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECRQ_PH_W      = (0x14 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECRQ_RS_PH_W   = (0x15 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PRECRQU_S_QB_PH  = (0x0F << 6) | OPC_CMPU_EQ_QB_DSP,
-    /* DSP Compare-Pick Sub-class */
-    OPC_CMPU_EQ_QB       = (0x00 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPU_LT_QB       = (0x01 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPU_LE_QB       = (0x02 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGU_EQ_QB      = (0x04 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGU_LT_QB      = (0x05 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGU_LE_QB      = (0x06 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGDU_EQ_QB     = (0x18 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGDU_LT_QB     = (0x19 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMPGDU_LE_QB     = (0x1A << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMP_EQ_PH        = (0x08 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMP_LT_PH        = (0x09 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_CMP_LE_PH        = (0x0A << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PICK_QB          = (0x03 << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PICK_PH          = (0x0B << 6) | OPC_CMPU_EQ_QB_DSP,
-    OPC_PACKRL_PH        = (0x0E << 6) | OPC_CMPU_EQ_QB_DSP,
-};
-
-#define MASK_SHLL_QB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP GPR-Based Shift Sub-class */
-    OPC_SHLL_QB    = (0x00 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLLV_QB   = (0x02 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLL_PH    = (0x08 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLLV_PH   = (0x0A << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLL_S_PH  = (0x0C << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLLV_S_PH = (0x0E << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLL_S_W   = (0x14 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHLLV_S_W  = (0x16 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRL_QB    = (0x01 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRLV_QB   = (0x03 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRL_PH    = (0x19 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRLV_PH   = (0x1B << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRA_QB    = (0x04 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRA_R_QB  = (0x05 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRAV_QB   = (0x06 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRAV_R_QB = (0x07 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRA_PH    = (0x09 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRAV_PH   = (0x0B << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRA_R_PH  = (0x0D << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRAV_R_PH = (0x0F << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRA_R_W   = (0x15 << 6) | OPC_SHLL_QB_DSP,
-    OPC_SHRAV_R_W  = (0x17 << 6) | OPC_SHLL_QB_DSP,
-};
-
-#define MASK_DPA_W_PH(op)           (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Multiply Sub-class insns */
-    OPC_DPAU_H_QBL    = (0x03 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAU_H_QBR    = (0x07 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSU_H_QBL    = (0x0B << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSU_H_QBR    = (0x0F << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPA_W_PH      = (0x00 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAX_W_PH     = (0x08 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAQ_S_W_PH   = (0x04 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAQX_S_W_PH  = (0x18 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAQX_SA_W_PH = (0x1A << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPS_W_PH      = (0x01 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSX_W_PH     = (0x09 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSQ_S_W_PH   = (0x05 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSQX_S_W_PH  = (0x19 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSQX_SA_W_PH = (0x1B << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MULSAQ_S_W_PH = (0x06 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPAQ_SA_L_W   = (0x0C << 6) | OPC_DPA_W_PH_DSP,
-    OPC_DPSQ_SA_L_W   = (0x0D << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MAQ_S_W_PHL   = (0x14 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MAQ_S_W_PHR   = (0x16 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MAQ_SA_W_PHL  = (0x10 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MAQ_SA_W_PHR  = (0x12 << 6) | OPC_DPA_W_PH_DSP,
-    OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
-};
-
-#define MASK_INSV(op)               (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* DSP Bit/Manipulation Sub-class */
-    OPC_INSV = (0x00 << 6) | OPC_INSV_DSP,
-};
-
-#define MASK_APPEND(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Append Sub-class */
-    OPC_APPEND  = (0x00 << 6) | OPC_APPEND_DSP,
-    OPC_PREPEND = (0x01 << 6) | OPC_APPEND_DSP,
-    OPC_BALIGN  = (0x10 << 6) | OPC_APPEND_DSP,
-};
-
-#define MASK_EXTR_W(op)             (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
-    OPC_EXTR_W     = (0x00 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTR_R_W   = (0x04 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTR_RS_W  = (0x06 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTR_S_H   = (0x0E << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTRV_S_H  = (0x0F << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTRV_W    = (0x01 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTRV_R_W  = (0x05 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTRV_RS_W = (0x07 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTP       = (0x02 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTPV      = (0x03 << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTPDP     = (0x0A << 6) | OPC_EXTR_W_DSP,
-    OPC_EXTPDPV    = (0x0B << 6) | OPC_EXTR_W_DSP,
-    OPC_SHILO      = (0x1A << 6) | OPC_EXTR_W_DSP,
-    OPC_SHILOV     = (0x1B << 6) | OPC_EXTR_W_DSP,
-    OPC_MTHLIP     = (0x1F << 6) | OPC_EXTR_W_DSP,
-    OPC_WRDSP      = (0x13 << 6) | OPC_EXTR_W_DSP,
-    OPC_RDDSP      = (0x12 << 6) | OPC_EXTR_W_DSP,
-};
-
-#define MASK_ABSQ_S_QH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_PRECEQ_L_PWL    = (0x14 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQ_L_PWR    = (0x15 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQ_PW_QHL   = (0x0C << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQ_PW_QHR   = (0x0D << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQ_PW_QHLA  = (0x0E << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQ_PW_QHRA  = (0x0F << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQU_QH_OBL  = (0x04 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQU_QH_OBR  = (0x05 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQU_QH_OBLA = (0x06 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEQU_QH_OBRA = (0x07 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEU_QH_OBL   = (0x1C << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEU_QH_OBR   = (0x1D << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEU_QH_OBLA  = (0x1E << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_PRECEU_QH_OBRA  = (0x1F << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_ABSQ_S_OB       = (0x01 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_ABSQ_S_PW       = (0x11 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_ABSQ_S_QH       = (0x09 << 6) | OPC_ABSQ_S_QH_DSP,
-    /* DSP Bit/Manipulation Sub-class */
-    OPC_REPL_OB         = (0x02 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_REPL_PW         = (0x12 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_REPL_QH         = (0x0A << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_REPLV_OB        = (0x03 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_REPLV_PW        = (0x13 << 6) | OPC_ABSQ_S_QH_DSP,
-    OPC_REPLV_QH        = (0x0B << 6) | OPC_ABSQ_S_QH_DSP,
-};
-
-#define MASK_ADDU_OB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Multiply Sub-class insns */
-    OPC_MULEQ_S_PW_QHL = (0x1C << 6) | OPC_ADDU_OB_DSP,
-    OPC_MULEQ_S_PW_QHR = (0x1D << 6) | OPC_ADDU_OB_DSP,
-    OPC_MULEU_S_QH_OBL = (0x06 << 6) | OPC_ADDU_OB_DSP,
-    OPC_MULEU_S_QH_OBR = (0x07 << 6) | OPC_ADDU_OB_DSP,
-    OPC_MULQ_RS_QH     = (0x1F << 6) | OPC_ADDU_OB_DSP,
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_RADDU_L_OB     = (0x14 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBQ_PW        = (0x13 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBQ_S_PW      = (0x17 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBQ_QH        = (0x0B << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBQ_S_QH      = (0x0F << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBU_OB        = (0x01 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBU_S_OB      = (0x05 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBU_QH        = (0x09 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBU_S_QH      = (0x0D << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBUH_OB       = (0x19 << 6) | OPC_ADDU_OB_DSP,
-    OPC_SUBUH_R_OB     = (0x1B << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDQ_PW        = (0x12 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDQ_S_PW      = (0x16 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDQ_QH        = (0x0A << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDQ_S_QH      = (0x0E << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDU_OB        = (0x00 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDU_S_OB      = (0x04 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDU_QH        = (0x08 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDU_S_QH      = (0x0C << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDUH_OB       = (0x18 << 6) | OPC_ADDU_OB_DSP,
-    OPC_ADDUH_R_OB     = (0x1A << 6) | OPC_ADDU_OB_DSP,
-};
-
-#define MASK_CMPU_EQ_OB(op)         (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* DSP Compare-Pick Sub-class */
-    OPC_CMP_EQ_PW         = (0x10 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMP_LT_PW         = (0x11 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMP_LE_PW         = (0x12 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMP_EQ_QH         = (0x08 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMP_LT_QH         = (0x09 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMP_LE_QH         = (0x0A << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGDU_EQ_OB      = (0x18 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGDU_LT_OB      = (0x19 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGDU_LE_OB      = (0x1A << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGU_EQ_OB       = (0x04 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGU_LT_OB       = (0x05 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPGU_LE_OB       = (0x06 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPU_EQ_OB        = (0x00 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPU_LT_OB        = (0x01 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_CMPU_LE_OB        = (0x02 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PACKRL_PW         = (0x0E << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PICK_OB           = (0x03 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PICK_PW           = (0x13 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PICK_QH           = (0x0B << 6) | OPC_CMPU_EQ_OB_DSP,
-    /* MIPS DSP Arithmetic Sub-class */
-    OPC_PRECR_OB_QH       = (0x0D << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECR_SRA_QH_PW   = (0x1E << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECR_SRA_R_QH_PW = (0x1F << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECRQ_OB_QH      = (0x0C << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECRQ_PW_L       = (0x1C << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECRQ_QH_PW      = (0x14 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECRQ_RS_QH_PW   = (0x15 << 6) | OPC_CMPU_EQ_OB_DSP,
-    OPC_PRECRQU_S_OB_QH   = (0x0F << 6) | OPC_CMPU_EQ_OB_DSP,
-};
-
-#define MASK_DAPPEND(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* DSP Append Sub-class */
-    OPC_DAPPEND  = (0x00 << 6) | OPC_DAPPEND_DSP,
-    OPC_PREPENDD = (0x03 << 6) | OPC_DAPPEND_DSP,
-    OPC_PREPENDW = (0x01 << 6) | OPC_DAPPEND_DSP,
-    OPC_DBALIGN  = (0x10 << 6) | OPC_DAPPEND_DSP,
-};
-
-#define MASK_DEXTR_W(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
-    OPC_DMTHLIP     = (0x1F << 6) | OPC_DEXTR_W_DSP,
-    OPC_DSHILO      = (0x1A << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTP       = (0x02 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTPDP     = (0x0A << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTPDPV    = (0x0B << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTPV      = (0x03 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_L     = (0x10 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_R_L   = (0x14 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_RS_L  = (0x16 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_W     = (0x00 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_R_W   = (0x04 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_RS_W  = (0x06 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTR_S_H   = (0x0E << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_L    = (0x11 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_R_L  = (0x15 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_RS_L = (0x17 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_S_H  = (0x0F << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_W    = (0x01 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_R_W  = (0x05 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DEXTRV_RS_W = (0x07 << 6) | OPC_DEXTR_W_DSP,
-    OPC_DSHILOV     = (0x1B << 6) | OPC_DEXTR_W_DSP,
-};
-
-#define MASK_DINSV(op)              (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* DSP Bit/Manipulation Sub-class */
-    OPC_DINSV = (0x00 << 6) | OPC_DINSV_DSP,
-};
-
-#define MASK_DPAQ_W_QH(op)          (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP Multiply Sub-class insns */
-    OPC_DMADD         = (0x19 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DMADDU        = (0x1D << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DMSUB         = (0x1B << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DMSUBU        = (0x1F << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPA_W_QH      = (0x00 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPAQ_S_W_QH   = (0x04 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPAQ_SA_L_PW  = (0x0C << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPAU_H_OBL    = (0x03 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPAU_H_OBR    = (0x07 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPS_W_QH      = (0x01 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPSQ_S_W_QH   = (0x05 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPSQ_SA_L_PW  = (0x0D << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPSU_H_OBL    = (0x0B << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_DPSU_H_OBR    = (0x0F << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_L_PWL   = (0x1C << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_L_PWR   = (0x1E << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_W_QHLL  = (0x14 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_SA_W_QHLL = (0x10 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_W_QHLR  = (0x15 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_SA_W_QHLR = (0x11 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_W_QHRL  = (0x16 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_SA_W_QHRL = (0x12 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_S_W_QHRR  = (0x17 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MAQ_SA_W_QHRR = (0x13 << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MULSAQ_S_L_PW = (0x0E << 6) | OPC_DPAQ_W_QH_DSP,
-    OPC_MULSAQ_S_W_QH = (0x06 << 6) | OPC_DPAQ_W_QH_DSP,
-};
-
-#define MASK_SHLL_OB(op)            (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
-enum {
-    /* MIPS DSP GPR-Based Shift Sub-class */
-    OPC_SHLL_PW    = (0x10 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLL_S_PW  = (0x14 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLLV_OB   = (0x02 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLLV_PW   = (0x12 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLLV_S_PW = (0x16 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLLV_QH   = (0x0A << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLLV_S_QH = (0x0E << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_PW    = (0x11 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_R_PW  = (0x15 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_OB   = (0x06 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_R_OB = (0x07 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_PW   = (0x13 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_R_PW = (0x17 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_QH   = (0x0B << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRAV_R_QH = (0x0F << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRLV_OB   = (0x03 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRLV_QH   = (0x1B << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLL_OB    = (0x00 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLL_QH    = (0x08 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHLL_S_QH  = (0x0C << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_OB    = (0x04 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_R_OB  = (0x05 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_QH    = (0x09 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRA_R_QH  = (0x0D << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRL_OB    = (0x01 << 6) | OPC_SHLL_OB_DSP,
-    OPC_SHRL_QH    = (0x19 << 6) | OPC_SHLL_OB_DSP,
-};
-
-/* Coprocessor 0 (rs field) */
-#define MASK_CP0(op)                (MASK_OP_MAJOR(op) | (op & (0x1F << 21)))
-
-enum {
-    OPC_MFC0     = (0x00 << 21) | OPC_CP0,
-    OPC_DMFC0    = (0x01 << 21) | OPC_CP0,
-    OPC_MFHC0    = (0x02 << 21) | OPC_CP0,
-    OPC_MTC0     = (0x04 << 21) | OPC_CP0,
-    OPC_DMTC0    = (0x05 << 21) | OPC_CP0,
-    OPC_MTHC0    = (0x06 << 21) | OPC_CP0,
-    OPC_MFTR     = (0x08 << 21) | OPC_CP0,
-    OPC_RDPGPR   = (0x0A << 21) | OPC_CP0,
-    OPC_MFMC0    = (0x0B << 21) | OPC_CP0,
-    OPC_MTTR     = (0x0C << 21) | OPC_CP0,
-    OPC_WRPGPR   = (0x0E << 21) | OPC_CP0,
-    OPC_C0       = (0x10 << 21) | OPC_CP0,
-    OPC_C0_1     = (0x11 << 21) | OPC_CP0,
-    OPC_C0_2     = (0x12 << 21) | OPC_CP0,
-    OPC_C0_3     = (0x13 << 21) | OPC_CP0,
-    OPC_C0_4     = (0x14 << 21) | OPC_CP0,
-    OPC_C0_5     = (0x15 << 21) | OPC_CP0,
-    OPC_C0_6     = (0x16 << 21) | OPC_CP0,
-    OPC_C0_7     = (0x17 << 21) | OPC_CP0,
-    OPC_C0_8     = (0x18 << 21) | OPC_CP0,
-    OPC_C0_9     = (0x19 << 21) | OPC_CP0,
-    OPC_C0_A     = (0x1A << 21) | OPC_CP0,
-    OPC_C0_B     = (0x1B << 21) | OPC_CP0,
-    OPC_C0_C     = (0x1C << 21) | OPC_CP0,
-    OPC_C0_D     = (0x1D << 21) | OPC_CP0,
-    OPC_C0_E     = (0x1E << 21) | OPC_CP0,
-    OPC_C0_F     = (0x1F << 21) | OPC_CP0,
-};
-
-/* MFMC0 opcodes */
-#define MASK_MFMC0(op)              (MASK_CP0(op) | (op & 0xFFFF))
-
-enum {
-    OPC_DMT      = 0x01 | (0 << 5) | (0x0F << 6) | (0x01 << 11) | OPC_MFMC0,
-    OPC_EMT      = 0x01 | (1 << 5) | (0x0F << 6) | (0x01 << 11) | OPC_MFMC0,
-    OPC_DVPE     = 0x01 | (0 << 5) | OPC_MFMC0,
-    OPC_EVPE     = 0x01 | (1 << 5) | OPC_MFMC0,
-    OPC_DI       = (0 << 5) | (0x0C << 11) | OPC_MFMC0,
-    OPC_EI       = (1 << 5) | (0x0C << 11) | OPC_MFMC0,
-    OPC_DVP      = 0x04 | (0 << 3) | (1 << 5) | (0 << 11) | OPC_MFMC0,
-    OPC_EVP      = 0x04 | (0 << 3) | (0 << 5) | (0 << 11) | OPC_MFMC0,
-};
-
-/* Coprocessor 0 (with rs == C0) */
-#define MASK_C0(op)                 (MASK_CP0(op) | (op & 0x3F))
-
-enum {
-    OPC_TLBR     = 0x01 | OPC_C0,
-    OPC_TLBWI    = 0x02 | OPC_C0,
-    OPC_TLBINV   = 0x03 | OPC_C0,
-    OPC_TLBINVF  = 0x04 | OPC_C0,
-    OPC_TLBWR    = 0x06 | OPC_C0,
-    OPC_TLBP     = 0x08 | OPC_C0,
-    OPC_RFE      = 0x10 | OPC_C0,
-    OPC_ERET     = 0x18 | OPC_C0,
-    OPC_DERET    = 0x1F | OPC_C0,
-    OPC_WAIT     = 0x20 | OPC_C0,
-};
-
-#define MASK_CP2(op)                (MASK_OP_MAJOR(op) | (op & (0x1F << 21)))
-
-enum {
-    OPC_MFC2    = (0x00 << 21) | OPC_CP2,
-    OPC_DMFC2   = (0x01 << 21) | OPC_CP2,
-    OPC_CFC2    = (0x02 << 21) | OPC_CP2,
-    OPC_MFHC2   = (0x03 << 21) | OPC_CP2,
-    OPC_MTC2    = (0x04 << 21) | OPC_CP2,
-    OPC_DMTC2   = (0x05 << 21) | OPC_CP2,
-    OPC_CTC2    = (0x06 << 21) | OPC_CP2,
-    OPC_MTHC2   = (0x07 << 21) | OPC_CP2,
-    OPC_BC2     = (0x08 << 21) | OPC_CP2,
-    OPC_BC2EQZ  = (0x09 << 21) | OPC_CP2,
-    OPC_BC2NEZ  = (0x0D << 21) | OPC_CP2,
-};
-
-#if defined(TARGET_CHERI)
-
-#define MASK_CAP3(op)       (MASK_CP2(op) | ((op) & 0x7))
-#define MASK_CAP4(op)       (MASK_CP2(op) | ((op) & 0xf))
-#define MASK_CAP6(op)       (MASK_CP2(op) | ((op) & 0x3f))
-#define MASK_CAP7(op)       (MASK_CP2(op) | ((op) & (0x1f << 6)) |  (0x3f))
-#define MASK_CAP8(op)       (MASK_CP2(op) | ((op) & (0x1f << 11)) | \
-        (0x1f << 6) |  (0x3f))
-
-enum {
-    OPC_CGET        = OPC_CP2 | (0x00 << 21),
-    OPC_CSETBOUNDS  = OPC_CP2 | (0x01 << 21),
-    OPC_CSEAL       = OPC_CP2 | (0x02 << 21),
-    OPC_CUNSEAL     = OPC_CP2 | (0x03 << 21),
-    OPC_CMISC       = OPC_CP2 | (0x04 << 21),
-    OPC_CCALL       = OPC_CP2 | (0x05 << 21),
-    OPC_CRETURN     = OPC_CP2 | (0x06 << 21),
-    OPC_CJALR       = OPC_CP2 | (0x07 << 21),
-    OPC_CJR         = OPC_CP2 | (0x08 << 21),
-    OPC_CBTU        = OPC_CP2 | (0x09 << 21),
-    OPC_CBTS        = OPC_CP2 | (0x0a << 21),
-    OPC_CCHECK      = OPC_CP2 | (0x0b << 21),
-    OPC_CTOPTR      = OPC_CP2 | (0x0c << 21),
-    OPC_COFFSET     = OPC_CP2 | (0x0d << 21),
-    OPC_CPTRCMP     = OPC_CP2 | (0x0e << 21),
-    OPC_CCLEARREGS  = OPC_CP2 | (0x0f << 21),
-    OPC_CLL         = OPC_CP2 | (0x10 << 21),
-};
-
-enum {
-    OPC_CGETPERM        = OPC_CGET | (0x00),
-    OPC_CGETTYPE        = OPC_CGET | (0x01),
-    OPC_CGETBASE        = OPC_CGET | (0x02),
-    OPC_CGETLEN         = OPC_CGET | (0x03),
-    OPC_CGETCAUSE       = OPC_CGET | (0x04),
-    OPC_CGETTAG         = OPC_CGET | (0x05),
-    OPC_CGETSEALED      = OPC_CGET | (0x06),
-    OPC_CGETPCC         = OPC_CGET | (0x07),
-
-    OPC_CSETBOUNDSEXACT = OPC_CGET | (0x09),
-    OPC_CSUB            = OPC_CGET | (0x0a),
-};
-
-enum {
-    OPC_CANDPERM    = OPC_CMISC | (0x0),
-    /* 0x2 was CIncBase (prior to CHERI-128) */
-    /* 0x3 was CSetLen (prior to CHERI-128) */
-    OPC_CSETCAUSE   = OPC_CMISC | (0x4),
-    OPC_CCLEARTAG   = OPC_CMISC | (0x5),
-    OPC_MTC2SEL6    = OPC_CMISC | (0x6),
-    OPC_CFROMPTR    = OPC_CMISC | (0x7),
-};
-
-enum {
-    OPC_CCHECKPERM  = OPC_CCHECK | (0x0),
-    OPC_CCHECKTYPE  = OPC_CCHECK | (0x1),
-};
-
-enum {
-    OPC_CINCOFFSET  = OPC_COFFSET | (0x0),
-    OPC_CSETOFFSET  = OPC_COFFSET | (0x1),
-    OPC_CGETOFFSET  = OPC_COFFSET | (0x2),
-};
-
-enum {
-    OPC_CEQ         = OPC_CPTRCMP | (0x0),
-    OPC_CNE         = OPC_CPTRCMP | (0x1),
-    OPC_CLT         = OPC_CPTRCMP | (0x2),
-    OPC_CLE         = OPC_CPTRCMP | (0x3),
-    OPC_CLTU        = OPC_CPTRCMP | (0x4),
-    OPC_CLEU        = OPC_CPTRCMP | (0x5),
-    OPC_CEXEQ       = OPC_CPTRCMP | (0x6),
-    OPC_CNEXEQ      = OPC_CPTRCMP | (0x7),
-};
-
-enum {
-    OPC_CSCB        = OPC_CLL | (0x0),
-    OPC_CSCH        = OPC_CLL | (0x1),
-    OPC_CSCW        = OPC_CLL | (0x2),
-    OPC_CSCD        = OPC_CLL | (0x3),
-
-    OPC_CSCC        = OPC_CLL | (0x7),
-
-    OPC_CLLBU       = OPC_CLL | (0x8),
-    OPC_CLLHU       = OPC_CLL | (0x9),
-    OPC_CLLWU       = OPC_CLL | (0xa),
-    OPC_CLLD        = OPC_CLL | (0xb),
-    OPC_CLLB        = OPC_CLL | (0xc),
-    OPC_CLLH        = OPC_CLL | (0xd),
-    OPC_CLLW        = OPC_CLL | (0xe),
-
-    OPC_CLLC        = OPC_CLL | (0xf),
-};
-
-#define MASK_CLDST_OFFSET(opc)   ((opc >> 3) & 0xff)
-#define MASK_CLDST_OPC(opc)     ((opc) & ((0x3f << 26) | 0x7))
-
-/* Load Via Capability Register */
-enum {
-    OPC_CLBU        = OPC_CLOAD | (0x0),
-    OPC_CLHU        = OPC_CLOAD | (0x1),
-    OPC_CLWU        = OPC_CLOAD | (0x2),
-    OPC_CLDU        = OPC_CLOAD | (0x3),
-    OPC_CLB         = OPC_CLOAD | (0x4),
-    OPC_CLH         = OPC_CLOAD | (0x5),
-    OPC_CLW         = OPC_CLOAD | (0x6),
-    OPC_CLD         = OPC_CLOAD | (0x7),
-};
-
-/* Store Via Capability Register */
-enum {
-    OPC_CSB         = OPC_CSTORE | (0x0),
-    OPC_CSH         = OPC_CSTORE | (0x1),
-    OPC_CSW         = OPC_CSTORE | (0x2),
-    OPC_CSD         = OPC_CSTORE | (0x3),
-};
-
-/* Version 1.17 and 1.22 ISA encodings (*_NI) to replace above. */
-enum {
-    /* Common new ISA encoding blocks */
-    /* non-immediate capability instructions */
-    OPC_CAP_NI          = OPC_CP2 | (0x00 << 21),
-    /* 2-operand capability instructions */
-    OPC_C2OPERAND_NI    = OPC_CAP_NI | (0x3f),
-    /* 1-operand capability instructions */
-    OPC_C1OPERAND_NI    = OPC_C2OPERAND_NI | (0x1f << 6),
-};
-
-enum {
-    /* One operand instructions */
-    OPC_CGETPCC_NI      = OPC_C1OPERAND_NI | (0x00 << 11),
-    OPC_CGETCAUSE_NI    = OPC_C1OPERAND_NI | (0x01 << 11),
-    OPC_CSETCAUSE_NI    = OPC_C1OPERAND_NI | (0x02 << 11),
-    OPC_CJR_NI          = OPC_C1OPERAND_NI | (0x03 << 11),
-};
-
-enum {
-    /* Two Operand Instructions */
-    OPC_CGETPERM_NI     = OPC_C2OPERAND_NI | (0x00 << 6),
-    OPC_CGETTYPE_NI     = OPC_C2OPERAND_NI | (0x01 << 6),
-    OPC_CGETBASE_NI     = OPC_C2OPERAND_NI | (0x02 << 6),
-    OPC_CGETLEN_NI      = OPC_C2OPERAND_NI | (0x03 << 6),
-    OPC_CGETTAG_NI      = OPC_C2OPERAND_NI | (0x04 << 6),
-    OPC_CGETSEALED_NI   = OPC_C2OPERAND_NI | (0x05 << 6),
-    OPC_CGETOFFSET_NI   = OPC_C2OPERAND_NI | (0x06 << 6),
-    OPC_CGETPCCSETOFF_NI = OPC_C2OPERAND_NI | (0x07 << 6),
-    OPC_CCHECKPERM_NI   = OPC_C2OPERAND_NI | (0x08 << 6),
-    OPC_CCHECKTYPE_NI   = OPC_C2OPERAND_NI | (0x09 << 6),
-    OPC_CMOVE_NI        = OPC_C2OPERAND_NI | (0x0a << 6),
-    OPC_CCLEARTAG_NI    = OPC_C2OPERAND_NI | (0x0b << 6),
-    OPC_CJALR_NI        = OPC_C2OPERAND_NI | (0x0c << 6),
-    OPC_CREADHWR_NI     = OPC_C2OPERAND_NI | (0x0d << 6),
-    OPC_CWRITEHWR_NI    = OPC_C2OPERAND_NI | (0x0e << 6),
-    OPC_CGETADDR_NI     = OPC_C2OPERAND_NI | (0x0f << 6),
-    OPC_CRAP_NI         = OPC_C2OPERAND_NI | (0x10 << 6),
-    OPC_CRAM_NI         = OPC_C2OPERAND_NI | (0x11 << 6),
-    OPC_CGETFLAGS_NI    = OPC_C2OPERAND_NI | (0x12 << 6),
-    OPC_CGETPCCINCOFF_NI = OPC_C2OPERAND_NI | (0x13 << 6),
-    OPC_CGETPCCSETADDR_NI = OPC_C2OPERAND_NI | (0x14 << 6),
-    OPC_CSEALENTRY_NI   = OPC_C2OPERAND_NI | (0x1d << 6),
-    OPC_CLOADTAGS_NI    = OPC_C2OPERAND_NI | (0x1e << 6),
-};
-
-enum {
-    /* Three operand instructions 1.22 */
-    OPC_CSEAL_NI        = OPC_CAP_NI | (0x0b),
-    OPC_CUNSEAL_NI      = OPC_CAP_NI | (0x0c),
-    OPC_CANDPERM_NI     = OPC_CAP_NI | (0x0d),
-    OPC_CSETFLAGS_NI    = OPC_CAP_NI | (0x0e),
-    OPC_CSETOFFSET_NI   = OPC_CAP_NI | (0x0f),
-    OPC_CSETBOUNDS_NI   = OPC_CAP_NI | (0x08),
-    /* OPC_SETBOUNDSEXACT_NI = OPC_CAP_NI | (0x09), unchanged OPC_SETBOUNDSEXACT */
-    OPC_CINCOFFSET_NI   = OPC_CAP_NI | (0x11),
-    OPC_CTOPTR_NI       = OPC_CAP_NI | (0x12),
-    OPC_CFROMPTR_NI     = OPC_CAP_NI | (0x13),
-    /* OPC_CSUB_NI      = OPC_CAP_NI | (0x0a), unchanged OPC_CSUB */
-    OPC_CEQ_NI          = OPC_CAP_NI | (0x14),
-    OPC_CNE_NI          = OPC_CAP_NI | (0x15),
-    OPC_CLT_NI          = OPC_CAP_NI | (0x16),
-    OPC_CLE_NI          = OPC_CAP_NI | (0x17),
-    OPC_CLTU_NI         = OPC_CAP_NI | (0x18),
-    OPC_CLEU_NI         = OPC_CAP_NI | (0x19),
-    OPC_CEXEQ_NI        = OPC_CAP_NI | (0x1a),
-    OPC_CMOVZ_NI        = OPC_CAP_NI | (0x1b),
-    OPC_CMOVN_NI        = OPC_CAP_NI | (0x1c),
-    OPC_CBUILDCAP_NI    = OPC_CAP_NI | (0x1d),
-    OPC_CCOPYTYPE_NI    = OPC_CAP_NI | (0x1e),
-    OPC_CCSEAL_NI       = OPC_CAP_NI | (0x1f),
-    OPC_CTESTSUBSET_NI  = OPC_CAP_NI | (0x20),
-    OPC_CNEXEQ_NI       = OPC_CAP_NI | (0x21),
-    OPC_CSETADDR_NI     = OPC_CAP_NI | (0x22),
-    OPC_CGETANDADDR_NI  = OPC_CAP_NI | (0x23),
-    OPC_CANDADDR_NI     = OPC_CAP_NI | (0x24),
-};
-
-enum {
-    /* instructions with immediate values 1.22 */
-    OPC_CSETBOUNDSIMM_NI   = OPC_CP2 | (0x14 << 21),
-    OPC_CINCOFFSETIMM_NI = OPC_CP2 | (0x13 << 21),
-    /* OPC_CBTU_NI           = OPC_CP2 | (0x09 << 21), unchanged OPC_CBTU */
-    /* OPC_CBTS_NI           = OPC_CP2 | (0x0a << 21), unchanged OPC_CBTS */
-    OPC_CBEZ_NI          = OPC_CP2 | (0x11 << 21),
-    OPC_CBNZ_NI          = OPC_CP2 | (0x12 << 21),
-    /* OPC_CCALL_NI          = OPC_CP2 | (0x05 << 21), unchanged OPC_CCALL */
-    /* OPC_CCLEARREGS variants unchanged */
-    OPC_CRETURN_NI       = OPC_CP2 | (0x05 << 21 | 0x7ff)
-};
-
-#endif /* TARGET_CHERI */
-
-
-
-#define MASK_LMMI(op)    (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
-
-enum {
-    OPC_PADDSH      = (24 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDUSH     = (25 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDH       = (26 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDW       = (27 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDSB      = (28 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDUSB     = (29 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDB       = (30 << 21) | (0x00) | OPC_CP2,
-    OPC_PADDD       = (31 << 21) | (0x00) | OPC_CP2,
-
-    OPC_PSUBSH      = (24 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBUSH     = (25 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBH       = (26 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBW       = (27 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBSB      = (28 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBUSB     = (29 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBB       = (30 << 21) | (0x01) | OPC_CP2,
-    OPC_PSUBD       = (31 << 21) | (0x01) | OPC_CP2,
-
-    OPC_PSHUFH      = (24 << 21) | (0x02) | OPC_CP2,
-    OPC_PACKSSWH    = (25 << 21) | (0x02) | OPC_CP2,
-    OPC_PACKSSHB    = (26 << 21) | (0x02) | OPC_CP2,
-    OPC_PACKUSHB    = (27 << 21) | (0x02) | OPC_CP2,
-    OPC_XOR_CP2     = (28 << 21) | (0x02) | OPC_CP2,
-    OPC_NOR_CP2     = (29 << 21) | (0x02) | OPC_CP2,
-    OPC_AND_CP2     = (30 << 21) | (0x02) | OPC_CP2,
-    OPC_PANDN       = (31 << 21) | (0x02) | OPC_CP2,
-
-    OPC_PUNPCKLHW   = (24 << 21) | (0x03) | OPC_CP2,
-    OPC_PUNPCKHHW   = (25 << 21) | (0x03) | OPC_CP2,
-    OPC_PUNPCKLBH   = (26 << 21) | (0x03) | OPC_CP2,
-    OPC_PUNPCKHBH   = (27 << 21) | (0x03) | OPC_CP2,
-    OPC_PINSRH_0    = (28 << 21) | (0x03) | OPC_CP2,
-    OPC_PINSRH_1    = (29 << 21) | (0x03) | OPC_CP2,
-    OPC_PINSRH_2    = (30 << 21) | (0x03) | OPC_CP2,
-    OPC_PINSRH_3    = (31 << 21) | (0x03) | OPC_CP2,
-
-    OPC_PAVGH       = (24 << 21) | (0x08) | OPC_CP2,
-    OPC_PAVGB       = (25 << 21) | (0x08) | OPC_CP2,
-    OPC_PMAXSH      = (26 << 21) | (0x08) | OPC_CP2,
-    OPC_PMINSH      = (27 << 21) | (0x08) | OPC_CP2,
-    OPC_PMAXUB      = (28 << 21) | (0x08) | OPC_CP2,
-    OPC_PMINUB      = (29 << 21) | (0x08) | OPC_CP2,
-
-    OPC_PCMPEQW     = (24 << 21) | (0x09) | OPC_CP2,
-    OPC_PCMPGTW     = (25 << 21) | (0x09) | OPC_CP2,
-    OPC_PCMPEQH     = (26 << 21) | (0x09) | OPC_CP2,
-    OPC_PCMPGTH     = (27 << 21) | (0x09) | OPC_CP2,
-    OPC_PCMPEQB     = (28 << 21) | (0x09) | OPC_CP2,
-    OPC_PCMPGTB     = (29 << 21) | (0x09) | OPC_CP2,
-
-    OPC_PSLLW       = (24 << 21) | (0x0A) | OPC_CP2,
-    OPC_PSLLH       = (25 << 21) | (0x0A) | OPC_CP2,
-    OPC_PMULLH      = (26 << 21) | (0x0A) | OPC_CP2,
-    OPC_PMULHH      = (27 << 21) | (0x0A) | OPC_CP2,
-    OPC_PMULUW      = (28 << 21) | (0x0A) | OPC_CP2,
-    OPC_PMULHUH     = (29 << 21) | (0x0A) | OPC_CP2,
-
-    OPC_PSRLW       = (24 << 21) | (0x0B) | OPC_CP2,
-    OPC_PSRLH       = (25 << 21) | (0x0B) | OPC_CP2,
-    OPC_PSRAW       = (26 << 21) | (0x0B) | OPC_CP2,
-    OPC_PSRAH       = (27 << 21) | (0x0B) | OPC_CP2,
-    OPC_PUNPCKLWD   = (28 << 21) | (0x0B) | OPC_CP2,
-    OPC_PUNPCKHWD   = (29 << 21) | (0x0B) | OPC_CP2,
-
-    OPC_ADDU_CP2    = (24 << 21) | (0x0C) | OPC_CP2,
-    OPC_OR_CP2      = (25 << 21) | (0x0C) | OPC_CP2,
-    OPC_ADD_CP2     = (26 << 21) | (0x0C) | OPC_CP2,
-    OPC_DADD_CP2    = (27 << 21) | (0x0C) | OPC_CP2,
-    OPC_SEQU_CP2    = (28 << 21) | (0x0C) | OPC_CP2,
-    OPC_SEQ_CP2     = (29 << 21) | (0x0C) | OPC_CP2,
-
-    OPC_SUBU_CP2    = (24 << 21) | (0x0D) | OPC_CP2,
-    OPC_PASUBUB     = (25 << 21) | (0x0D) | OPC_CP2,
-    OPC_SUB_CP2     = (26 << 21) | (0x0D) | OPC_CP2,
-    OPC_DSUB_CP2    = (27 << 21) | (0x0D) | OPC_CP2,
-    OPC_SLTU_CP2    = (28 << 21) | (0x0D) | OPC_CP2,
-    OPC_SLT_CP2     = (29 << 21) | (0x0D) | OPC_CP2,
-
-    OPC_SLL_CP2     = (24 << 21) | (0x0E) | OPC_CP2,
-    OPC_DSLL_CP2    = (25 << 21) | (0x0E) | OPC_CP2,
-    OPC_PEXTRH      = (26 << 21) | (0x0E) | OPC_CP2,
-    OPC_PMADDHW     = (27 << 21) | (0x0E) | OPC_CP2,
-    OPC_SLEU_CP2    = (28 << 21) | (0x0E) | OPC_CP2,
-    OPC_SLE_CP2     = (29 << 21) | (0x0E) | OPC_CP2,
-
-    OPC_SRL_CP2     = (24 << 21) | (0x0F) | OPC_CP2,
-    OPC_DSRL_CP2    = (25 << 21) | (0x0F) | OPC_CP2,
-    OPC_SRA_CP2     = (26 << 21) | (0x0F) | OPC_CP2,
-    OPC_DSRA_CP2    = (27 << 21) | (0x0F) | OPC_CP2,
-    OPC_BIADD       = (28 << 21) | (0x0F) | OPC_CP2,
-    OPC_PMOVMSKB    = (29 << 21) | (0x0F) | OPC_CP2,
-};
-
-
-#define MASK_CP3(op)                (MASK_OP_MAJOR(op) | (op & 0x3F))
-
-enum {
-    OPC_LWXC1       = 0x00 | OPC_CP3,
-    OPC_LDXC1       = 0x01 | OPC_CP3,
-    OPC_LUXC1       = 0x05 | OPC_CP3,
-    OPC_SWXC1       = 0x08 | OPC_CP3,
-    OPC_SDXC1       = 0x09 | OPC_CP3,
-    OPC_SUXC1       = 0x0D | OPC_CP3,
-    OPC_PREFX       = 0x0F | OPC_CP3,
-    OPC_ALNV_PS     = 0x1E | OPC_CP3,
-    OPC_MADD_S      = 0x20 | OPC_CP3,
-    OPC_MADD_D      = 0x21 | OPC_CP3,
-    OPC_MADD_PS     = 0x26 | OPC_CP3,
-    OPC_MSUB_S      = 0x28 | OPC_CP3,
-    OPC_MSUB_D      = 0x29 | OPC_CP3,
-    OPC_MSUB_PS     = 0x2E | OPC_CP3,
-    OPC_NMADD_S     = 0x30 | OPC_CP3,
-    OPC_NMADD_D     = 0x31 | OPC_CP3,
-    OPC_NMADD_PS    = 0x36 | OPC_CP3,
-    OPC_NMSUB_S     = 0x38 | OPC_CP3,
-    OPC_NMSUB_D     = 0x39 | OPC_CP3,
-    OPC_NMSUB_PS    = 0x3E | OPC_CP3,
-};
-
-/*
- *     MMI (MultiMedia Instruction) encodings
- *     ======================================
- *
- * MMI instructions encoding table keys:
- *
- *     *   This code is reserved for future use. An attempt to execute it
- *         causes a Reserved Instruction exception.
- *     %   This code indicates an instruction class. The instruction word
- *         must be further decoded by examining additional tables that show
- *         the values for other instruction fields.
- *     #   This code is reserved for the unsupported instructions DMULT,
- *         DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt
- *         to execute it causes a Reserved Instruction exception.
- *
- * MMI instructions encoded by opcode field (MMI, LQ, SQ):
- *
- *  31    26                                        0
- * +--------+----------------------------------------+
- * | opcode |                                        |
- * +--------+----------------------------------------+
- *
- *   opcode  bits 28..26
- *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
- *   31..29 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
- *   -------+-------+-------+-------+-------+-------+-------+-------+-------
- *    0 000 |SPECIAL| REGIMM|   J   |  JAL  |  BEQ  |  BNE  |  BLEZ |  BGTZ
- *    1 001 |  ADDI | ADDIU |  SLTI | SLTIU |  ANDI |  ORI  |  XORI |  LUI
- *    2 010 |  COP0 |  COP1 |   *   |   *   |  BEQL |  BNEL | BLEZL | BGTZL
- *    3 011 | DADDI | DADDIU|  LDL  |  LDR  |  MMI% |   *   |   LQ  |   SQ
- *    4 100 |   LB  |   LH  |  LWL  |   LW  |  LBU  |  LHU  |  LWR  |  LWU
- *    5 101 |   SB  |   SH  |  SWL  |   SW  |  SDL  |  SDR  |  SWR  | CACHE
- *    6 110 |   #   |  LWC1 |   #   |  PREF |   #   |  LDC1 |   #   |   LD
- *    7 111 |   #   |  SWC1 |   #   |   *   |   #   |  SDC1 |   #   |   SD
- */
-
-enum {
-    MMI_OPC_CLASS_MMI = 0x1C << 26,    /* Same as OPC_SPECIAL2 */
-    MMI_OPC_LQ        = 0x1E << 26,    /* Same as OPC_MSA */
-    MMI_OPC_SQ        = 0x1F << 26,    /* Same as OPC_SPECIAL3 */
-};
-
-/*
- * MMI instructions with opcode field = MMI:
- *
- *  31    26                                 5      0
- * +--------+-------------------------------+--------+
- * |   MMI  |                               |function|
- * +--------+-------------------------------+--------+
- *
- * function  bits 2..0
- *     bits |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
- *     5..3 |  000  |  001  |  010  |  011  |  100  |  101  |  110  |  111
- *   -------+-------+-------+-------+-------+-------+-------+-------+-------
- *    0 000 |  MADD | MADDU |   *   |   *   | PLZCW |   *   |   *   |   *
- *    1 001 | MMI0% | MMI2% |   *   |   *   |   *   |   *   |   *   |   *
- *    2 010 | MFHI1 | MTHI1 | MFLO1 | MTLO1 |   *   |   *   |   *   |   *
- *    3 011 | MULT1 | MULTU1|  DIV1 | DIVU1 |   *   |   *   |   *   |   *
- *    4 100 | MADD1 | MADDU1|   *   |   *   |   *   |   *   |   *   |   *
- *    5 101 | MMI1% | MMI3% |   *   |   *   |   *   |   *   |   *   |   *
- *    6 110 | PMFHL | PMTHL |   *   |   *   | PSLLH |   *   | PSRLH | PSRAH
- *    7 111 |   *   |   *   |   *   |   *   | PSLLW |   *   | PSRLW | PSRAW
- */
-
-#define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
-enum {
-    MMI_OPC_MADD       = 0x00 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADD */
-    MMI_OPC_MADDU      = 0x01 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADDU */
-    MMI_OPC_MULT1      = 0x18 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MULT */
-    MMI_OPC_MULTU1     = 0x19 | MMI_OPC_CLASS_MMI, /* Same min. as OPC_MULTU */
-    MMI_OPC_DIV1       = 0x1A | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIV  */
-    MMI_OPC_DIVU1      = 0x1B | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIVU */
-    MMI_OPC_MADD1      = 0x20 | MMI_OPC_CLASS_MMI,
-    MMI_OPC_MADDU1     = 0x21 | MMI_OPC_CLASS_MMI,
-};
-
-/* global register indices */
-TCGv cpu_gpr[32], cpu_PC;
-/*
- * For CPUs using 128-bit GPR registers, we put the lower halves in cpu_gpr[])
- * and the upper halves in cpu_gpr_hi[].
- */
-TCGv_i64 cpu_gpr_hi[32];
-TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC];
-static TCGv cpu_dspctrl, btarget;
-TCGv bcond;
-static TCGv cpu_lladdr, cpu_llval;
-static TCGv_i32 hflags;
-TCGv_i32 fpu_fcr0, fpu_fcr31;
-TCGv_i64 fpu_f64[32];
-static TCGv cpu_statcounters_icount_kernel, cpu_statcounters_icount_user;
-
-
-#include "exec/gen-icount.h"
-
-#define gen_helper_0e0i(name, arg) do {                           \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg);                     \
-    gen_helper_##name(cpu_env, helper_tmp);                       \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_0e1i(name, arg1, arg2) do {                    \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
-    gen_helper_##name(cpu_env, arg1, helper_tmp);                 \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_1e0i(name, ret, arg1) do {                     \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg1);                    \
-    gen_helper_##name(ret, cpu_env, helper_tmp);                  \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_1e1i(name, ret, arg1, arg2) do {               \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
-    gen_helper_##name(ret, cpu_env, arg1, helper_tmp);            \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_0e2i(name, arg1, arg2, arg3) do {              \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
-    gen_helper_##name(cpu_env, arg1, arg2, helper_tmp);           \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_1e2i(name, ret, arg1, arg2, arg3) do {         \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
-    gen_helper_##name(ret, cpu_env, arg1, arg2, helper_tmp);      \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-#define gen_helper_0e3i(name, arg1, arg2, arg3, arg4) do {        \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg4);                    \
-    gen_helper_##name(cpu_env, arg1, arg2, arg3, helper_tmp);     \
-    tcg_temp_free_i32(helper_tmp);                                \
-    } while (0)
-
-
-#define gen_helper_2_consti32(name, arg, arg2) do {               \
-    TCGv_i32 helper_tmp = tcg_const_i32(arg);                     \
-    TCGv_i32 helper_tmp2 = tcg_const_i32(arg2);                   \
-    gen_helper_##name(cpu_env, helper_tmp, helper_tmp2);          \
-    tcg_temp_free_i32(helper_tmp);                                \
-    tcg_temp_free_i32(helper_tmp2);                               \
-    } while(0)
-
-
-#define DISAS_STOP       DISAS_TARGET_0
-#define DISAS_EXIT       DISAS_TARGET_1
-
-static inline void mips_update_statcounters_icount(DisasContext *ctx);
-
-/* General purpose registers moves. */
-void gen_load_gpr(TCGv t, int reg)
-{
-    if (reg == 0) {
-        tcg_gen_movi_tl(t, 0);
-    } else {
-        tcg_gen_mov_tl(t, cpu_gpr[reg]);
-    }
-}
-
-void _gen_store_gpr(DisasContext *ctx, TCGv t, int reg)
-{
-    if (reg != 0) {
-        tcg_gen_mov_tl(cpu_gpr[reg], t);
-        gen_log_instr_gpr_update(ctx, reg);
-    }
-}
-
-
-#if defined(TARGET_MIPS64)
-void gen_load_gpr_hi(TCGv_i64 t, int reg)
-{
-    if (reg == 0) {
-        tcg_gen_movi_i64(t, 0);
-    } else {
-        tcg_gen_mov_i64(t, cpu_gpr_hi[reg]);
-    }
-}
-
-void gen_store_gpr_hi(TCGv_i64 t, int reg)
-{
-    if (reg != 0) {
-        tcg_gen_mov_i64(cpu_gpr_hi[reg], t);
-    }
-}
-#endif /* TARGET_MIPS64 */
-
-/* Moves to/from shadow registers. */
-static inline void gen_load_srsgpr(DisasContext *ctx, int from, int to)
-{
-    TCGv t0 = tcg_temp_new();
-
-    if (from == 0) {
-        tcg_gen_movi_tl(t0, 0);
-    } else {
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        TCGv_ptr addr = tcg_temp_new_ptr();
-
-        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUMIPSState, CP0_SRSCtl));
-        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
-        tcg_gen_andi_i32(t2, t2, 0xf);
-        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
-        tcg_gen_ext_i32_ptr(addr, t2);
-        tcg_gen_add_ptr(addr, cpu_env, addr);
-
-        tcg_gen_ld_tl(t0, addr, sizeof(target_ulong) * from);
-        tcg_temp_free_ptr(addr);
-        tcg_temp_free_i32(t2);
-    }
-    gen_store_gpr(t0, to);
-    tcg_temp_free(t0);
-}
-
-static inline void gen_store_srsgpr(int from, int to)
-{
-    if (to != 0) {
-        TCGv t0 = tcg_temp_new();
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        TCGv_ptr addr = tcg_temp_new_ptr();
-
-        gen_load_gpr(t0, from);
-        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUMIPSState, CP0_SRSCtl));
-        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
-        tcg_gen_andi_i32(t2, t2, 0xf);
-        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
-        tcg_gen_ext_i32_ptr(addr, t2);
-        tcg_gen_add_ptr(addr, cpu_env, addr);
-
-        tcg_gen_st_tl(t0, addr, sizeof(target_ulong) * to);
-        tcg_temp_free_ptr(addr);
-        tcg_temp_free_i32(t2);
-        tcg_temp_free(t0);
-    }
-}
-
-/* Tests */
-static inline void gen_save_pc(target_ulong pc)
-{
-    tcg_gen_movi_tl(cpu_PC, pc);
-#ifdef CONFIG_DEBUG_TCG
-    tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
-#endif
-}
-#include "cheri-translate-utils.h"
-
-static inline void save_cpu_state(DisasContext *ctx, int do_save_pc)
-{
-    LOG_DISAS("hflags %08x saved %08x\n", ctx->hflags, ctx->saved_hflags);
-    if (do_save_pc && ctx->base.pc_next != ctx->saved_pc) {
-        gen_save_pc(ctx->base.pc_next);
-        ctx->saved_pc = ctx->base.pc_next;
-    }
-    if (ctx->hflags != ctx->saved_hflags) {
-        tcg_gen_movi_i32(hflags, ctx->hflags);
-        ctx->saved_hflags = ctx->hflags;
-        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
-        case MIPS_HFLAG_BR:
-            break;
-#ifdef TARGET_CHERI
-        case MIPS_HFLAG_BRCCALL:
-        case MIPS_HFLAG_BRC:
-            break;
-#endif
-        case MIPS_HFLAG_BC:
-        case MIPS_HFLAG_BL:
-        case MIPS_HFLAG_B:
-            tcg_gen_movi_tl(btarget, ctx->btarget);
-            break;
-        }
-    }
-}
-
-static inline void restore_cpu_state(CPUMIPSState *env, DisasContext *ctx)
-{
-    ctx->saved_hflags = ctx->hflags;
-    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
-    case MIPS_HFLAG_BR:
-        break;
-#ifdef TARGET_CHERI
-    case MIPS_HFLAG_BRCCALL:
-    case MIPS_HFLAG_BRC:
-        break;
-#endif
-    case MIPS_HFLAG_BC:
-    case MIPS_HFLAG_BL:
-    case MIPS_HFLAG_B:
-        ctx->btarget = env->btarget;
-        break;
-    }
-}
-
-void generate_exception_err(DisasContext *ctx, MipsExcp excp, int err)
-{
-    TCGv_i32 texcp = tcg_const_i32(excp);
-    TCGv_i32 terr = tcg_const_i32(err);
-    save_cpu_state(ctx, 1);
-    gen_helper_raise_exception_err(cpu_env, texcp, terr);
-    tcg_temp_free_i32(terr);
-    tcg_temp_free_i32(texcp);
-    ctx->base.is_jmp = DISAS_NORETURN;
-}
-
-void generate_exception(DisasContext *ctx, int excp)
-{
-    gen_helper_0e0i(raise_exception, excp);
-}
-
-void generate_exception_end(DisasContext *ctx, int excp)
-{
-    generate_exception_err(ctx, excp, 0);
-}
-
-void cheri_tcg_save_pc(DisasContextBase *db) { gen_save_pc(db->pc_next); }
-// We have to save the current PC before setting DISAS_NORETURN (see
-// generate_exception_err())
-void cheri_tcg_prepare_for_unconditional_exception(DisasContextBase *db)
-{
-    cheri_tcg_save_pc(db);
-    db->is_jmp = DISAS_NORETURN;
-}
-
-void gen_reserved_instruction(DisasContext *ctx)
-{
-    generate_exception_end(ctx, EXCP_RI);
-}
-
-/* Floating point register moves. */
-void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
-{
-    if (ctx->hflags & MIPS_HFLAG_FRE) {
-        generate_exception(ctx, EXCP_RI);
-    }
-    tcg_gen_extrl_i64_i32(t, fpu_f64[reg]);
-}
-
-void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg)
-{
-    TCGv_i64 t64;
-    if (ctx->hflags & MIPS_HFLAG_FRE) {
-        generate_exception(ctx, EXCP_RI);
-    }
-    t64 = tcg_temp_new_i64();
-    tcg_gen_extu_i32_i64(t64, t);
-    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
-    tcg_temp_free_i64(t64);
-}
-
-static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
-{
-    if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_extrh_i64_i32(t, fpu_f64[reg]);
-    } else {
-        gen_load_fpr32(ctx, t, reg | 1);
-    }
-}
-
-static void gen_store_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg)
-{
-    if (ctx->hflags & MIPS_HFLAG_F64) {
-        TCGv_i64 t64 = tcg_temp_new_i64();
-        tcg_gen_extu_i32_i64(t64, t);
-        tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
-        tcg_temp_free_i64(t64);
-    } else {
-        gen_store_fpr32(ctx, t, reg | 1);
-    }
-}
-
-void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
-{
-    if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_mov_i64(t, fpu_f64[reg]);
-    } else {
-        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
-    }
-}
-
-void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
-{
-    if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_mov_i64(fpu_f64[reg], t);
-    } else {
-        TCGv_i64 t0;
-        tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32);
-        t0 = tcg_temp_new_i64();
-        tcg_gen_shri_i64(t0, t, 32);
-        tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32);
-        tcg_temp_free_i64(t0);
-    }
-}
-
-int get_fp_bit(int cc)
-{
-    if (cc) {
-        return 24 + cc;
-    } else {
-        return 23;
-    }
-}
-
-/* Addresses computation */
-void gen_op_addr_add(DisasContext *ctx, TCGv ret, TCGv arg0, TCGv arg1)
-{
-    tcg_gen_add_tl(ret, arg0, arg1);
-
-#if defined(TARGET_MIPS64)
-    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
-        tcg_gen_ext32s_i64(ret, ret);
-    }
-#endif
-}
-
-#include "translate_cheri.c"
-
-#ifndef TARGET_CHERI
-static inline void gen_op_addr_addi(DisasContext *ctx, TCGv ret, TCGv base,
-                                    target_long ofs)
-{
-    tcg_gen_addi_tl(ret, base, ofs);
-
-#if defined(TARGET_MIPS64)
-    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
-        tcg_gen_ext32s_i64(ret, ret);
-    }
-#endif
-}
-#endif
-
-/* Addresses computation (translation time) */
-static target_long addr_add(DisasContext *ctx, target_long base,
-                            target_long offset)
-{
-    target_long sum = base + offset;
-
-#if defined(TARGET_MIPS64)
-    if (ctx->hflags & MIPS_HFLAG_AWRAP) {
-        sum = (int32_t)sum;
-    }
-#endif
-    return sum;
-}
-
-/* Sign-extract the low 32-bits to a target_long.  */
-void gen_move_low32(TCGv ret, TCGv_i64 arg)
-{
-#if defined(TARGET_MIPS64)
-    tcg_gen_ext32s_i64(ret, arg);
-#else
-    tcg_gen_extrl_i64_i32(ret, arg);
-#endif
-}
-
-/* Sign-extract the high 32-bits to a target_long.  */
-void gen_move_high32(TCGv ret, TCGv_i64 arg)
-{
-#if defined(TARGET_MIPS64)
-    tcg_gen_sari_i64(ret, arg, 32);
-#else
-    tcg_gen_extrh_i64_i32(ret, arg);
-#endif
-}
-
-void check_cp0_enabled(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0))) {
-        generate_exception_end(ctx, EXCP_CpU);
-    }
-}
-
-void check_cp1_enabled(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_FPU))) {
-        generate_exception_err(ctx, EXCP_CpU, 1);
-    }
-}
-
-/*
- * Verify that the processor is running with COP1X instructions enabled.
- * This is associated with the nabla symbol in the MIPS32 and MIPS64
- * opcode tables.
- */
-void check_cop1x(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_COP1X))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * Verify that the processor is running with 64-bit floating-point
- * operations enabled.
- */
-void check_cp1_64bitmode(DisasContext *ctx)
-{
-    if (unlikely(~ctx->hflags & (MIPS_HFLAG_F64 | MIPS_HFLAG_COP1X))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * Verify if floating point register is valid; an operation is not defined
- * if bit 0 of any register specification is set and the FR bit in the
- * Status register equals zero, since the register numbers specify an
- * even-odd pair of adjacent coprocessor general registers. When the FR bit
- * in the Status register equals one, both even and odd register numbers
- * are valid. This limitation exists only for 64 bit wide (d,l,ps) registers.
- *
- * Multiple 64 bit wide registers can be checked by calling
- * gen_op_cp1_registers(freg1 | freg2 | ... | fregN);
- */
-void check_cp1_registers(DisasContext *ctx, int regs)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_F64) && (regs & 1))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * Verify that the processor is running with DSP instructions enabled.
- * This is enabled by CP0 Status register MX(24) bit.
- */
-static inline void check_dsp(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP))) {
-        if (ctx->insn_flags & ASE_DSP) {
-            generate_exception_end(ctx, EXCP_DSPDIS);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-    }
-}
-
-static inline void check_dsp_r2(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R2))) {
-        if (ctx->insn_flags & ASE_DSP) {
-            generate_exception_end(ctx, EXCP_DSPDIS);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-    }
-}
-
-#ifndef TARGET_CHERI
-static inline void check_dsp_r3(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP_R3))) {
-        if (ctx->insn_flags & ASE_DSP) {
-            generate_exception_end(ctx, EXCP_DSPDIS);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-    }
-}
-#endif
-
-/*
- * This code generates a "reserved instruction" exception if the
- * CPU does not support the instruction set corresponding to flags.
- */
-void check_insn(DisasContext *ctx, uint64_t flags)
-{
-    if (unlikely(!(ctx->insn_flags & flags))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * This code generates a "reserved instruction" exception if the
- * CPU has corresponding flag set which indicates that the instruction
- * has been removed.
- */
-static inline void check_insn_opc_removed(DisasContext *ctx, uint64_t flags)
-{
-    if (unlikely(ctx->insn_flags & flags)) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * The Linux kernel traps certain reserved instruction exceptions to
- * emulate the corresponding instructions. QEMU is the kernel in user
- * mode, so those traps are emulated by accepting the instructions.
- *
- * A reserved instruction exception is generated for flagged CPUs if
- * QEMU runs in system mode.
- */
-static inline void check_insn_opc_user_only(DisasContext *ctx, uint64_t flags)
-{
-#ifndef CONFIG_USER_ONLY
-    check_insn_opc_removed(ctx, flags);
-#endif
-}
-
-/*
- * This code generates a "reserved instruction" exception if the
- * CPU does not support 64-bit paired-single (PS) floating point data type.
- */
-static inline void check_ps(DisasContext *ctx)
-{
-    if (unlikely(!ctx->ps)) {
-        generate_exception(ctx, EXCP_RI);
-    }
-    check_cp1_64bitmode(ctx);
-}
-
-/*
- * This code generates a "reserved instruction" exception if cpu is not
- * 64-bit or 64-bit instructions are not enabled.
- */
-void check_mips_64(DisasContext *ctx)
-{
-    if (unlikely((TARGET_LONG_BITS != 64) || !(ctx->hflags & MIPS_HFLAG_64))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-#ifndef CONFIG_USER_ONLY
-static inline void check_mvh(DisasContext *ctx)
-{
-    if (unlikely(!ctx->mvh)) {
-        generate_exception(ctx, EXCP_RI);
-    }
-}
-#endif
-
-/*
- * This code generates a "reserved instruction" exception if the
- * Config5 XNP bit is set.
- */
-#ifndef TARGET_CHERI
-static inline void check_xnp(DisasContext *ctx)
-{
-    if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_XNP))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-#endif
-
-#ifndef CONFIG_USER_ONLY
-/*
- * This code generates a "reserved instruction" exception if the
- * Config3 PW bit is NOT set.
- */
-static inline void check_pw(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_PW)))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-#endif
-
-/*
- * This code generates a "reserved instruction" exception if the
- * Config3 MT bit is NOT set.
- */
-static inline void check_mt(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_MT)))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-#ifndef CONFIG_USER_ONLY
-/*
- * This code generates a "coprocessor unusable" exception if CP0 is not
- * available, and, if that is not the case, generates a "reserved instruction"
- * exception if the Config5 MT bit is NOT set. This is needed for availability
- * control of some of MT ASE instructions.
- */
-static inline void check_cp0_mt(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->hflags & MIPS_HFLAG_CP0))) {
-        generate_exception_end(ctx, EXCP_CpU);
-    } else {
-        if (unlikely(!(ctx->CP0_Config3 & (1 << CP0C3_MT)))) {
-            gen_reserved_instruction(ctx);
-        }
-    }
-}
-#endif
-
-/*
- * This code generates a "reserved instruction" exception if the
- * Config5 NMS bit is set.
- */
-#ifndef TARGET_CHERI
-static inline void check_nms(DisasContext *ctx)
-{
-    if (unlikely(ctx->CP0_Config5 & (1 << CP0C5_NMS))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * This code generates a "reserved instruction" exception if the
- * Config5 NMS bit is set, and Config1 DL, Config1 IL, Config2 SL,
- * Config2 TL, and Config5 L2C are unset.
- */
-static inline void check_nms_dl_il_sl_tl_l2c(DisasContext *ctx)
-{
-    if (unlikely((ctx->CP0_Config5 & (1 << CP0C5_NMS)) &&
-                 !(ctx->CP0_Config1 & (1 << CP0C1_DL)) &&
-                 !(ctx->CP0_Config1 & (1 << CP0C1_IL)) &&
-                 !(ctx->CP0_Config2 & (1 << CP0C2_SL)) &&
-                 !(ctx->CP0_Config2 & (1 << CP0C2_TL)) &&
-                 !(ctx->CP0_Config5 & (1 << CP0C5_L2C)))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-
-/*
- * This code generates a "reserved instruction" exception if the
- * Config5 EVA bit is NOT set.
- */
-static inline void check_eva(DisasContext *ctx)
-{
-    if (unlikely(!(ctx->CP0_Config5 & (1 << CP0C5_EVA)))) {
-        gen_reserved_instruction(ctx);
-    }
-}
-#endif // !TARGET_CHERI
-
-
-/*
- * Define small wrappers for gen_load_fpr* so that we have a uniform
- * calling interface for 32 and 64-bit FPRs.  No sense in changing
- * all callers for gen_load_fpr32 when we need the CTX parameter for
- * this one use.
- */
-#define gen_ldcmp_fpr32(ctx, x, y) gen_load_fpr32(ctx, x, y)
-#define gen_ldcmp_fpr64(ctx, x, y) gen_load_fpr64(ctx, x, y)
-#define FOP_CONDS(type, abs, fmt, ifmt, bits)                                 \
-static inline void gen_cmp ## type ## _ ## fmt(DisasContext *ctx, int n,      \
-                                               int ft, int fs, int cc)        \
-{                                                                             \
-    TCGv_i##bits fp0 = tcg_temp_new_i##bits();                                \
-    TCGv_i##bits fp1 = tcg_temp_new_i##bits();                                \
-    switch (ifmt) {                                                           \
-    case FMT_PS:                                                              \
-        check_ps(ctx);                                                        \
-        break;                                                                \
-    case FMT_D:                                                               \
-        if (abs) {                                                            \
-            check_cop1x(ctx);                                                 \
-        }                                                                     \
-        check_cp1_registers(ctx, fs | ft);                                    \
-        break;                                                                \
-    case FMT_S:                                                               \
-        if (abs) {                                                            \
-            check_cop1x(ctx);                                                 \
-        }                                                                     \
-        break;                                                                \
-    }                                                                         \
-    gen_ldcmp_fpr##bits(ctx, fp0, fs);                                        \
-    gen_ldcmp_fpr##bits(ctx, fp1, ft);                                        \
-    switch (n) {                                                              \
-    case  0:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);         \
-    break;                                                                    \
-    case  1:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);        \
-    break;                                                                    \
-    case  2:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);        \
-    break;                                                                    \
-    case  3:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);       \
-    break;                                                                    \
-    case  4:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);       \
-    break;                                                                    \
-    case  5:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);       \
-    break;                                                                    \
-    case  6:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);       \
-    break;                                                                    \
-    case  7:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);       \
-    break;                                                                    \
-    case  8:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);        \
-    break;                                                                    \
-    case  9:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc);      \
-    break;                                                                    \
-    case 10:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);       \
-    break;                                                                    \
-    case 11:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);       \
-    break;                                                                    \
-    case 12:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);        \
-    break;                                                                    \
-    case 13:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);       \
-    break;                                                                    \
-    case 14:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);        \
-    break;                                                                    \
-    case 15:                                                                  \
-        gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);       \
-    break;                                                                    \
-    default:                                                                  \
-        abort();                                                              \
-    }                                                                         \
-    tcg_temp_free_i##bits(fp0);                                               \
-    tcg_temp_free_i##bits(fp1);                                               \
-}
-
-FOP_CONDS(, 0, d, FMT_D, 64)
-FOP_CONDS(abs, 1, d, FMT_D, 64)
-FOP_CONDS(, 0, s, FMT_S, 32)
-FOP_CONDS(abs, 1, s, FMT_S, 32)
-FOP_CONDS(, 0, ps, FMT_PS, 64)
-FOP_CONDS(abs, 1, ps, FMT_PS, 64)
-#undef FOP_CONDS
-
-#define FOP_CONDNS(fmt, ifmt, bits, STORE)                              \
-static inline void gen_r6_cmp_ ## fmt(DisasContext *ctx, int n,         \
-                                      int ft, int fs, int fd)           \
-{                                                                       \
-    TCGv_i ## bits fp0 = tcg_temp_new_i ## bits();                      \
-    TCGv_i ## bits fp1 = tcg_temp_new_i ## bits();                      \
-    if (ifmt == FMT_D) {                                                \
-        check_cp1_registers(ctx, fs | ft | fd);                         \
-    }                                                                   \
-    gen_ldcmp_fpr ## bits(ctx, fp0, fs);                                \
-    gen_ldcmp_fpr ## bits(ctx, fp1, ft);                                \
-    switch (n) {                                                        \
-    case  0:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _af(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case  1:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _un(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case  2:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _eq(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case  3:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _ueq(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case  4:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _lt(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case  5:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _ult(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case  6:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _le(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case  7:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _ule(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case  8:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _saf(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case  9:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sun(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 10:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _seq(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 11:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sueq(fp0, cpu_env, fp0, fp1);     \
-        break;                                                          \
-    case 12:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _slt(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 13:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sult(fp0, cpu_env, fp0, fp1);     \
-        break;                                                          \
-    case 14:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sle(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 15:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sule(fp0, cpu_env, fp0, fp1);     \
-        break;                                                          \
-    case 17:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _or(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case 18:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _une(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 19:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _ne(fp0, cpu_env, fp0, fp1);       \
-        break;                                                          \
-    case 25:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sor(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    case 26:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sune(fp0, cpu_env, fp0, fp1);     \
-        break;                                                          \
-    case 27:                                                            \
-        gen_helper_r6_cmp_ ## fmt ## _sne(fp0, cpu_env, fp0, fp1);      \
-        break;                                                          \
-    default:                                                            \
-        abort();                                                        \
-    }                                                                   \
-    STORE;                                                              \
-    tcg_temp_free_i ## bits(fp0);                                       \
-    tcg_temp_free_i ## bits(fp1);                                       \
-}
-
-FOP_CONDNS(d, FMT_D, 64, gen_store_fpr64(ctx, fp0, fd))
-FOP_CONDNS(s, FMT_S, 32, gen_store_fpr32(ctx, fp0, fd))
-#undef FOP_CONDNS
-#undef gen_ldcmp_fpr32
-#undef gen_ldcmp_fpr64
-
-/* load/store instructions. */
-#ifdef CONFIG_USER_ONLY
-#define OP_LD_ATOMIC(insn, fname)                                          \
-static inline void op_ld_##insn(TCGv ret, TCGv arg1, int mem_idx,          \
-                                DisasContext *ctx)                         \
-{                                                                          \
-    TCGv t0 = tcg_temp_new();                                              \
-    tcg_gen_mov_tl(t0, arg1);                                              \
-    tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                         \
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, lladdr));            \
-    tcg_gen_st_tl(ret, cpu_env, offsetof(CPUMIPSState, llval));            \
-    tcg_temp_free(t0);                                                     \
-}
-#else
-#define OP_LD_ATOMIC(insn, fname)                                          \
-static inline void op_ld_##insn(TCGv ret, TCGv_cap_checked_ptr arg1,       \
-                                int mem_idx, DisasContext *ctx) {          \
-    gen_helper_1e1i(insn, ret, arg1, mem_idx);                             \
-}
-#endif
-OP_LD_ATOMIC(ll, ld32s);
-#if defined(TARGET_MIPS64)
-OP_LD_ATOMIC(lld, ld64);
-#endif
-#undef OP_LD_ATOMIC
-
-void gen_base_offset_addr(DisasContext *ctx, TCGv addr, int base, int offset)
-{
-    if (base == 0) {
-        tcg_gen_movi_tl(addr, offset);
-    } else if (offset == 0) {
-        gen_load_gpr(addr, base);
-    } else {
-        tcg_gen_movi_tl(addr, offset);
-        gen_op_addr_add(ctx, addr, cpu_gpr[base], addr);
-    }
-}
-
-static target_ulong pc_relative_pc(DisasContext *ctx)
-{
-    target_ulong pc = ctx->base.pc_next;
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        int branch_bytes = ctx->hflags & MIPS_HFLAG_BDS16 ? 2 : 4;
-
-        pc -= branch_bytes;
-    }
-
-    pc &= ~(target_ulong)3;
-    return pc;
-}
-
-/* Load */
-static void gen_ld(DisasContext *ctx, uint32_t opc,
-                   int rt, int base, int offset)
-{
-    TCGv t0, t1, t2;
-    int mem_idx = ctx->mem_idx;
-
-    if (rt == 0 && ctx->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F |
-                                      INSN_LOONGSON3A)) {
-        /*
-         * Loongson CPU uses a load to zero register for prefetch.
-         * We emulate it as a NOP. On other CPU we must perform the
-         * actual memory access.
-         */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    gen_base_offset_addr(ctx, t0, base, offset);
-
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    TCGv_cap_checked_ptr ddc_interposed = tcg_temp_new_cap_checked();
-#else
-    TCGv_cap_checked_ptr ddc_interposed = t0;
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-
-    switch (opc) {
-#if defined(TARGET_MIPS64)
-    case OPC_LWU:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
-                                 MO_TEUL | ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LD:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
-                                 MO_TEQ | ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LLD:
-    case R6_OPC_LLD:
-        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 8);
-        op_ld_lld(t0, ddc_interposed, mem_idx, ctx);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LDL:
-        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 8);
-        t1 = tcg_temp_new();
-        /*
-         * Do a byte access to possibly trigger a page
-         * fault with the unaligned address.
-         */
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
-        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 7);
-#ifndef TARGET_WORDS_BIGENDIAN
-        tcg_gen_xori_tl(t1, t1, 7);
-#endif
-        tcg_gen_shli_tl(t1, t1, 3);
-        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~7);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEQ);
-        tcg_gen_shl_tl(t0, t0, t1);
-        t2 = tcg_const_tl(-1);
-        tcg_gen_shl_tl(t2, t2, t1);
-        gen_load_gpr(t1, rt);
-        tcg_gen_andc_tl(t1, t1, t2);
-        tcg_temp_free(t2);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_temp_free(t1);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LDR:
-        generate_ccheck_load_right(ddc_interposed, t0, 8);
-        t1 = tcg_temp_new();
-        /*
-         * Do a byte access to possibly trigger a page
-         * fault with the unaligned address.
-         */
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
-        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 7);
-#ifdef TARGET_WORDS_BIGENDIAN
-        tcg_gen_xori_tl(t1, t1, 7);
-#endif
-        tcg_gen_shli_tl(t1, t1, 3);
-        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~7);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEQ);
-        tcg_gen_shr_tl(t0, t0, t1);
-        tcg_gen_xori_tl(t1, t1, 63);
-        t2 = tcg_const_tl(0xfffffffffffffffeull);
-        tcg_gen_shl_tl(t2, t2, t1);
-        gen_load_gpr(t1, rt);
-        tcg_gen_and_tl(t1, t1, t2);
-        tcg_temp_free(t2);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_temp_free(t1);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LDPC:
-        t1 = tcg_const_tl(pc_relative_pc(ctx));
-        gen_op_addr_add(ctx, t0, t0, t1);
-        tcg_temp_free(t1);
-        generate_ccheck_load_pcrel(t0, 8);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, PCC_CHECKED(t0), mem_idx, MO_TEQ);
-        gen_store_gpr(t0, rt);
-        break;
-#endif
-    case OPC_LWPC:
-        t1 = tcg_const_tl(pc_relative_pc(ctx));
-        gen_op_addr_add(ctx, t0, t0, t1);
-        generate_ccheck_load_pcrel(t0, 4);
-        tcg_gen_mov_tl(t1, t0);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, PCC_CHECKED(t0), mem_idx, MO_TESL);
-        tcg_temp_free(t1);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LWE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LW:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
-                                 MO_TESL | ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LHE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LH:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
-                                 MO_TESW | ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LHUE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LHU:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx,
-                                 MO_TEUW | ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LBE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LB:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx, MO_SB);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LBUE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LBU:
-        gen_ddc_interposed_ld_tl(ctx, t0, ddc_interposed, t0, mem_idx, MO_UB);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LWLE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LWL:
-        t1 = tcg_temp_new();
-        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 4);
-        /*
-         * Do a byte access to possibly trigger a page
-         * fault with the unaligned address.
-         */
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
-        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 3);
-#ifndef TARGET_WORDS_BIGENDIAN
-        tcg_gen_xori_tl(t1, t1, 3);
-#endif
-        tcg_gen_shli_tl(t1, t1, 3);
-        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~3);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEUL);
-        tcg_gen_shl_tl(t0, t0, t1);
-        t2 = tcg_const_tl(-1);
-        tcg_gen_shl_tl(t2, t2, t1);
-        gen_load_gpr(t1, rt);
-        tcg_gen_andc_tl(t1, t1, t2);
-        tcg_temp_free(t2);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_temp_free(t1);
-        tcg_gen_ext32s_tl(t0, t0);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LWRE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LWR:
-        generate_ccheck_load_right(ddc_interposed, t0, 4);
-        t1 = tcg_temp_new();
-        /*
-         * Do a byte access to possibly trigger a page
-         * fault with the unaligned address.
-         */
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_interposed, mem_idx, MO_UB);
-        tcg_gen_andi_tl(t1, (TCGv)ddc_interposed, 3);
-#ifdef TARGET_WORDS_BIGENDIAN
-        tcg_gen_xori_tl(t1, t1, 3);
-#endif
-        tcg_gen_shli_tl(t1, t1, 3);
-        tcg_gen_andi_tl((TCGv)ddc_interposed, (TCGv)ddc_interposed, ~3);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t0, ddc_interposed, mem_idx, MO_TEUL);
-        tcg_gen_shr_tl(t0, t0, t1);
-        tcg_gen_xori_tl(t1, t1, 31);
-        t2 = tcg_const_tl(0xfffffffeull);
-        tcg_gen_shl_tl(t2, t2, t1);
-        gen_load_gpr(t1, rt);
-        tcg_gen_and_tl(t1, t1, t2);
-        tcg_temp_free(t2);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_temp_free(t1);
-        tcg_gen_ext32s_tl(t0, t0);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_LLE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_LL:
-    case R6_OPC_LL:
-        generate_ddc_checked_load_ptr(ddc_interposed, ctx, t0, 4);
-        op_ld_ll(t0, ddc_interposed, mem_idx, ctx);
-        gen_store_gpr(t0, rt);
-        break;
-    }
-    tcg_temp_free(t0);
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    tcg_temp_free_cap_checked(ddc_interposed);
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-}
-
-#ifndef TARGET_CHERI
-static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
-                    uint32_t reg1, uint32_t reg2)
-{
-    TCGv taddr = tcg_temp_new();
-    TCGv_i64 tval = tcg_temp_new_i64();
-    TCGv tmp1 = tcg_temp_new();
-    TCGv tmp2 = tcg_temp_new();
-
-    gen_base_offset_addr(ctx, taddr, base, offset);
-    tcg_gen_qemu_ld64(tval, taddr, ctx->mem_idx);
-#ifdef TARGET_WORDS_BIGENDIAN
-    tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
-#else
-    tcg_gen_extr_i64_tl(tmp1, tmp2, tval);
-#endif
-    gen_store_gpr(tmp1, reg1);
-    tcg_temp_free(tmp1);
-    gen_store_gpr(tmp2, reg2);
-    tcg_temp_free(tmp2);
-    tcg_gen_st_i64(tval, cpu_env, offsetof(CPUMIPSState, llval_wp));
-    tcg_temp_free_i64(tval);
-    tcg_gen_st_tl(taddr, cpu_env, offsetof(CPUMIPSState, lladdr));
-    tcg_temp_free(taddr);
-}
-#endif
-
-/* Store */
-static void gen_st(DisasContext *ctx, uint32_t opc, int rt,
-                   int base, int offset)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    int mem_idx = ctx->mem_idx;
-
-    gen_base_offset_addr(ctx, t0, base, offset);
-    gen_load_gpr(t1, rt);
-    switch (opc) {
-#if defined(TARGET_MIPS64)
-    case OPC_SD:
-        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
-                                 MO_TEQ | ctx->default_tcg_memop_mask);
-        break;
-    case OPC_SDL:
-        gen_helper_0e2i(sdl, t1, t0, mem_idx);
-        break;
-    case OPC_SDR:
-        gen_helper_0e2i(sdr, t1, t0, mem_idx);
-        break;
-#endif
-    case OPC_SWE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_SW:
-        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
-                                 MO_TEUL | ctx->default_tcg_memop_mask);
-        break;
-    case OPC_SHE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_SH:
-        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx,
-                                 MO_TEUW | ctx->default_tcg_memop_mask);
-        break;
-    case OPC_SBE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_SB:
-        gen_ddc_interposed_st_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, mem_idx, MO_8);
-        break;
-    case OPC_SWLE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_SWL:
-        gen_helper_0e2i(swl, t1, t0, mem_idx);
-        break;
-    case OPC_SWRE:
-        mem_idx = MIPS_HFLAG_UM;
-        /* fall through */
-    case OPC_SWR:
-        gen_helper_0e2i(swr, t1, t0, mem_idx);
-        break;
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-
-/* Store conditional */
-static void gen_st_cond(DisasContext *ctx, int rt, int base, int offset,
-                        MemOp tcg_mo, bool eva, int opc)
-{
-    TCGv t0, val;
-    TCGv_cap_checked_ptr addr;
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *done = gen_new_label();
-    TCGLabel *not_misaligned = gen_new_label();
-
-    t0 = tcg_temp_new();
-    // IMPORTANT! addr must be allocated using tcg_temp_local_new since we
-    // reference it in two different basic blocks.
-    // The naming makes no sense... But is documented in tcg/README.
-    // I should read the documentation before debugging :(
-    addr = tcg_temp_local_new_cap_checked();
-    gen_base_offset_addr(ctx, t0, base, offset);
-    /* We have to add $ddc to the address for the alignment check and the memory access */
-    generate_ddc_checked_store_ptr(addr, ctx, t0, memop_size(tcg_mo));
-    /*
-     * Alignment must be checked even if the CPU supports unaligned accesses:
-     *
-     * The effective address must be naturally-aligned. If either of the 2/3
-     * least-significant bits of the address is non-zero, an Address Error
-     * exception occurs.
-     */
-    tcg_gen_andi_tl(t0, (TCGv)addr, memop_size(tcg_mo) - 1);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x0, not_misaligned);
-    generate_exception(ctx, EXCP_AdES);
-    gen_set_label(not_misaligned);
-
-    //    DEBUG_VALUE((TCGv)addr);
-    //    DEBUG_VALUE(cpu_lladdr);
-    /* compare the address against that of the preceeding LL */
-    tcg_gen_brcond_tl(TCG_COND_EQ, (TCGv)addr, cpu_lladdr, l1);
-    tcg_gen_movi_tl(t0, 0);
-    gen_store_gpr(t0, rt);
-    tcg_gen_br(done);
-
-    gen_set_label(l1);
-    /* generate cmpxchg */
-    val = tcg_temp_local_new(); // local_new since used in another BB
-    gen_load_gpr(val, rt);
-
-    //    DEBUG_VALUE((TCGv)addr);
-    //    DEBUG_VALUE(cpu_llval);
-    //    DEBUG_VALUE(val);
-    tcg_gen_atomic_cmpxchg_tl_with_checked_addr(
-        t0, addr, cpu_llval, val, eva ? MIPS_HFLAG_UM : ctx->mem_idx, tcg_mo);
-
-    //    DEBUG_VALUE(t0);
-    tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_llval);
-    //    DEBUG_VALUE(t0);
-    gen_store_gpr(t0, rt);
-    tcg_temp_free(val);
-
-    gen_set_label(done);
-    tcg_temp_free_cap_checked(addr);
-    tcg_temp_free(t0);
-}
-
-#ifndef TARGET_CHERI
-static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
-                    uint32_t reg1, uint32_t reg2, bool eva)
-{
-    TCGv taddr = tcg_temp_local_new();
-    TCGv lladdr = tcg_temp_local_new();
-    TCGv_i64 tval = tcg_temp_new_i64();
-    TCGv_i64 llval = tcg_temp_new_i64();
-    TCGv_i64 val = tcg_temp_new_i64();
-    TCGv tmp1 = tcg_temp_new();
-    TCGv tmp2 = tcg_temp_new();
-    TCGLabel *lab_fail = gen_new_label();
-    TCGLabel *lab_done = gen_new_label();
-
-    gen_base_offset_addr(ctx, taddr, base, offset);
-
-    tcg_gen_ld_tl(lladdr, cpu_env, offsetof(CPUMIPSState, lladdr));
-    tcg_gen_brcond_tl(TCG_COND_NE, taddr, lladdr, lab_fail);
-
-    gen_load_gpr(tmp1, reg1);
-    gen_load_gpr(tmp2, reg2);
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    tcg_gen_concat_tl_i64(tval, tmp2, tmp1);
-#else
-    tcg_gen_concat_tl_i64(tval, tmp1, tmp2);
-#endif
-
-    tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
-    tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
-                               eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
-    if (reg1 != 0) {
-        tcg_gen_movi_tl(cpu_gpr[reg1], 1);
-    }
-    tcg_gen_brcond_i64(TCG_COND_EQ, val, llval, lab_done);
-
-    gen_set_label(lab_fail);
-
-    if (reg1 != 0) {
-        tcg_gen_movi_tl(cpu_gpr[reg1], 0);
-    }
-    gen_set_label(lab_done);
-    tcg_gen_movi_tl(lladdr, -1);
-    tcg_gen_st_tl(lladdr, cpu_env, offsetof(CPUMIPSState, lladdr));
-}
-#endif
-
-/* Load and store */
-static void gen_flt_ldst(DisasContext *ctx, uint32_t opc, int ft,
-                         TCGv t0)
-{
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    TCGv t1 = tcg_temp_new();
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-    /*
-     * Don't do NOP if destination is zero: we must perform the actual
-     * memory access.
-     */
-    switch (opc) {
-    case OPC_LWC1:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_ddc_interposed_ld_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx,
-                                      MO_TESL | ctx->default_tcg_memop_mask);
-            gen_store_fpr32(ctx, fp0, ft);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_SWC1:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, ft);
-            gen_ddc_interposed_st_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx,
-                                      MO_TEUL | ctx->default_tcg_memop_mask);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_LDC1:
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ |
-                                      ctx->default_tcg_memop_mask);
-            gen_store_fpr64(ctx, fp0, ft);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SDC1:
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, ft);
-            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ |
-                                      ctx->default_tcg_memop_mask);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    default:
-        MIPS_INVAL("flt_ldst");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    tcg_temp_free(t1);
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-}
-
-static void gen_cop1_ldst(DisasContext *ctx, uint32_t op, int rt,
-                          int rs, int16_t imm)
-{
-    TCGv t0 = tcg_temp_new();
-
-    if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
-        check_cp1_enabled(ctx);
-        switch (op) {
-        case OPC_LDC1:
-        case OPC_SDC1:
-            check_insn(ctx, ISA_MIPS2);
-            /* Fallthrough */
-        default:
-            gen_base_offset_addr(ctx, t0, rs, imm);
-            gen_flt_ldst(ctx, op, rt, t0);
-        }
-    } else {
-        generate_exception_err(ctx, EXCP_CpU, 1);
-    }
-    tcg_temp_free(t0);
-}
-
-/* Arithmetic with immediate operand */
-static void gen_arith_imm(DisasContext *ctx, uint32_t opc,
-                          int rt, int rs, int imm)
-{
-    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
-
-    if (rt == 0 && opc != OPC_ADDI && opc != OPC_DADDI) {
-        /*
-         * If no destination, treat it as a NOP.
-         * For addi, we must generate the overflow exception when needed.
-         */
-        return;
-    }
-    switch (opc) {
-    case OPC_ADDI:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            tcg_gen_addi_tl(t0, t1, uimm);
-            tcg_gen_ext32s_tl(t0, t0);
-
-            tcg_gen_xori_tl(t1, t1, ~uimm);
-            tcg_gen_xori_tl(t2, t0, uimm);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /* operands of same sign, result different sign */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            tcg_gen_ext32s_tl(t0, t0);
-            gen_store_gpr(t0, rt);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_ADDIU:
-        if (rs != 0) {
-            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DADDI:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            tcg_gen_addi_tl(t0, t1, uimm);
-
-            tcg_gen_xori_tl(t1, t1, ~uimm);
-            tcg_gen_xori_tl(t2, t0, uimm);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /* operands of same sign, result different sign */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            gen_store_gpr(t0, rt);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_DADDIU:
-        if (rs != 0) {
-            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-#endif
-    }
-}
-
-#define GEN_INSTR_TRACE_HELPER(env, name)                                      \
-    {                                                                          \
-        TCGv_i64 tpc = tcg_const_i64(ctx->base.pc_next);                       \
-        gen_save_pc(ctx->base.pc_next);                                        \
-        gen_helper_##name(env, tpc);                                           \
-        tcg_temp_free_i64(tpc);                                                \
-        /* Exit translation block since tracing flag may change */             \
-        if (ctx->hflags & MIPS_HFLAG_BMASK) {                                  \
-            warn_report("warning: magic trace helper "                         \
-                "in delay / forbidden slot at PC 0x" TARGET_FMT_lx             \
-                " may not work as expected\n", ctx->base.pc_next);             \
-            return; /* We are already exiting the TB */                        \
-        }                                                                      \
-        gen_save_pc(ctx->base.pc_next + 4);                                    \
-        ctx->base.is_jmp = DISAS_EXIT;                                         \
-        return;                                                                \
-    }
-
-/* Logic with immediate operand */
-static void gen_logic_imm(DisasContext *ctx, uint32_t opc,
-                          int rt, int rs, int16_t imm)
-{
-    target_ulong uimm;
-
-    if (rt == 0) {
-        /* If no destination, treat it as a NOP. */
-#ifdef CONFIG_TCG_LOG_INSTR
-        if (opc == OPC_ORI && rs == 0) {
-
-            /* With 'li $0, 0xbeef' turn on instruction trace logging. */
-            if ((uint16_t)imm == 0xbeef)
-                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_start);
-
-            /* With 'li $0, 0xdead' turn off instruction trace logging. */
-            if ((uint16_t)imm == 0xdead)
-                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_stop);
-
-            /* With 'li $0, 0xdeaf' switch to userspace-only instruction trace logging. */
-            if ((uint16_t)imm == 0xdeaf)
-                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_user_start);
-
-            /* With 'li $0, 0xfaed' switch off userspace-only instruction trace logging. */
-            if ((uint16_t)imm == 0xfaed)
-                GEN_INSTR_TRACE_HELPER(cpu_env, qemu_log_instr_stop);
-
-            if ((uint16_t)imm == 0xface)
-                GEN_INSTR_TRACE_HELPER(cpu_env, cheri_debug_message);
-
-            /* With 0xcode invoke QEMU helper functions such as fast memset, memcpy etc.
-             * They are designed to take the same register arguments as the libc function:
-             * Currently supported values are:
-             * $v1 = 1 -> memset(ptr=$a0, c=$a1, len=$a2)
-             * $v1 = 2 -> purecap memset/memset_c(ptr=$c3, c=$a0, len=$a1)
-             * $v1 = 3 -> memcpy(dst=$a0, src=$a1, len=$a2)
-             * $v1 = 4 -> purecap memcpy/memcpy_c(dst=$c3, src=$c4, len=$a0)
-             * $v1 = 5 -> memmove(dst=$a0, src=$a1, len=$a2)
-             * $v1 = 6 -> purecap memmmove/memmove_c(dst=$c3, src=$c4, len=$a0)
-             * $v1 = 7 -> bcopy(src=$a0, dst=$a1, len=$a2)
-             * TODO: strlen? str{l,n}cpy?
-             */
-            if ((uint16_t)imm == 0xC0DE) {
-                save_cpu_state(ctx, 1);
-                gen_helper_magic_library_function(cpu_env, cpu_gpr[3]);
-            }
-
-            /* With 'li $0, 0xea1d' perform smp yield. */
-            if ((uint16_t)imm == 0xea1d) {
-                gen_save_pc(ctx->base.pc_next + 4);
-                gen_helper_smp_yield(cpu_env);
-            }
-
-            /* Buffered tracing switches, same as RISC-V */
-            if ((uint16_t)imm == 0x01 || (uint16_t)imm == 0x02) {
-                TCGv_i32 ttmp = tcg_const_i32(((uint16_t)imm == 0x01));
-                gen_helper_qemu_log_instr_buffered_mode(cpu_env, ttmp);
-                tcg_temp_free_i32(ttmp);
-            }
-            if ((uint16_t)imm == 0x03)
-                gen_helper_qemu_log_instr_buffer_flush(cpu_env);
-
-        }
-#endif /* CONFIG_TCG_LOG_INSTR */
-        return;
-    }
-    uimm = (uint16_t)imm;
-    switch (opc) {
-    case OPC_ANDI:
-        if (likely(rs != 0)) {
-            tcg_gen_andi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-    case OPC_ORI:
-        if (rs != 0) {
-            tcg_gen_ori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-    case OPC_XORI:
-        if (likely(rs != 0)) {
-            tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-    case OPC_LUI:
-        if (rs != 0 && (ctx->insn_flags & ISA_MIPS_R6)) {
-            /* OPC_AUI */
-            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], imm << 16);
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rt], imm << 16);
-        }
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-
-    default:
-        break;
-    }
-}
-
-/* Set on less than with immediate operand */
-static void gen_slt_imm(DisasContext *ctx, uint32_t opc,
-                        int rt, int rs, int16_t imm)
-{
-    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
-    TCGv t0;
-
-    if (rt == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rs);
-    switch (opc) {
-    case OPC_SLTI:
-        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_gpr[rt], t0, uimm);
-        break;
-    case OPC_SLTIU:
-        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, uimm);
-        break;
-    }
-    gen_log_instr_gpr_update(ctx, rt);
-    tcg_temp_free(t0);
-}
-
-/* Shifts with immediate operand */
-static void gen_shift_imm(DisasContext *ctx, uint32_t opc,
-                          int rt, int rs, int16_t imm)
-{
-    target_ulong uimm = ((uint16_t)imm) & 0x1f;
-    TCGv t0;
-
-    if (rt == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rs);
-    switch (opc) {
-    case OPC_SLL:
-        tcg_gen_shli_tl(t0, t0, uimm);
-        tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-        break;
-    case OPC_SRA:
-        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
-        break;
-    case OPC_SRL:
-        if (uimm != 0) {
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-        } else {
-            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-        }
-        break;
-    case OPC_ROTR:
-        if (uimm != 0) {
-            TCGv_i32 t1 = tcg_temp_new_i32();
-
-            tcg_gen_trunc_tl_i32(t1, t0);
-            tcg_gen_rotri_i32(t1, t1, uimm);
-            tcg_gen_ext_i32_tl(cpu_gpr[rt], t1);
-            tcg_temp_free_i32(t1);
-        } else {
-            tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DSLL:
-        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm);
-        break;
-    case OPC_DSRA:
-        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
-        break;
-    case OPC_DSRL:
-        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
-        break;
-    case OPC_DROTR:
-        if (uimm != 0) {
-            tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm);
-        } else {
-            tcg_gen_mov_tl(cpu_gpr[rt], t0);
-        }
-        break;
-    case OPC_DSLL32:
-        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm + 32);
-        break;
-    case OPC_DSRA32:
-        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm + 32);
-        break;
-    case OPC_DSRL32:
-        tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
-        break;
-    case OPC_DROTR32:
-        tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm + 32);
-        break;
-#endif
-    }
-    gen_log_instr_gpr_update(ctx, rt);
-    tcg_temp_free(t0);
-}
-
-/* Arithmetic */
-static void gen_arith(DisasContext *ctx, uint32_t opc,
-                      int rd, int rs, int rt)
-{
-    if (rd == 0 && opc != OPC_ADD && opc != OPC_SUB
-       && opc != OPC_DADD && opc != OPC_DSUB) {
-        /*
-         * If no destination, treat it as a NOP.
-         * For add & sub, we must generate the overflow exception when needed.
-         */
-        return;
-    }
-
-    switch (opc) {
-    case OPC_ADD:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            gen_load_gpr(t2, rt);
-            tcg_gen_add_tl(t0, t1, t2);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_xor_tl(t1, t1, t2);
-            tcg_gen_xor_tl(t2, t0, t2);
-            tcg_gen_andc_tl(t1, t2, t1);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /* operands of same sign, result different sign */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            gen_store_gpr(t0, rd);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_ADDU:
-        if (rs != 0 && rt != 0) {
-            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rd);
-        break;
-    case OPC_SUB:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            gen_load_gpr(t2, rt);
-            tcg_gen_sub_tl(t0, t1, t2);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_xor_tl(t2, t1, t2);
-            tcg_gen_xor_tl(t1, t0, t1);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /*
-             * operands of different sign, first operand and the result
-             * of different sign
-             */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            gen_store_gpr(t0, rd);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_SUBU:
-        if (rs != 0 && rt != 0) {
-            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rd);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DADD:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            gen_load_gpr(t2, rt);
-            tcg_gen_add_tl(t0, t1, t2);
-            tcg_gen_xor_tl(t1, t1, t2);
-            tcg_gen_xor_tl(t2, t0, t2);
-            tcg_gen_andc_tl(t1, t2, t1);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /* operands of same sign, result different sign */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            gen_store_gpr(t0, rd);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_DADDU:
-        if (rs != 0 && rt != 0) {
-            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rd);
-        break;
-    case OPC_DSUB:
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-            TCGLabel *l1 = gen_new_label();
-
-            gen_load_gpr(t1, rs);
-            gen_load_gpr(t2, rt);
-            tcg_gen_sub_tl(t0, t1, t2);
-            tcg_gen_xor_tl(t2, t1, t2);
-            tcg_gen_xor_tl(t1, t0, t1);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
-            tcg_temp_free(t1);
-            /*
-             * Operands of different sign, first operand and result different
-             * sign.
-             */
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(l1);
-            gen_store_gpr(t0, rd);
-            tcg_temp_free(t0);
-        }
-        break;
-    case OPC_DSUBU:
-        if (rs != 0 && rt != 0) {
-            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rd);
-        break;
-#endif
-    case OPC_MUL:
-        if (likely(rs != 0 && rt != 0)) {
-            tcg_gen_mul_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        gen_log_instr_gpr_update(ctx, rd);
-        break;
-    }
-}
-
-/* Conditional move */
-static void gen_cond_move(DisasContext *ctx, uint32_t opc,
-                          int rd, int rs, int rt)
-{
-    TCGv t0, t1, t2;
-
-    if (rd == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rt);
-    t1 = tcg_const_tl(0);
-    t2 = tcg_temp_new();
-    gen_load_gpr(t2, rs);
-    switch (opc) {
-    case OPC_MOVN:
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
-        break;
-    case OPC_MOVZ:
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
-        break;
-    case OPC_SELNEZ:
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, t1);
-        break;
-    case OPC_SELEQZ:
-        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, t1);
-        break;
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-    tcg_temp_free(t2);
-    tcg_temp_free(t1);
-    tcg_temp_free(t0);
-}
-
-/* Logic */
-static void gen_logic(DisasContext *ctx, uint32_t opc,
-                      int rd, int rs, int rt)
-{
-    if (rd == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-
-    switch (opc) {
-    case OPC_AND:
-        if (likely(rs != 0 && rt != 0)) {
-            tcg_gen_and_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        break;
-    case OPC_NOR:
-        if (rs != 0 && rt != 0) {
-            tcg_gen_nor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], ~((target_ulong)0));
-        }
-        break;
-    case OPC_OR:
-        if (likely(rs != 0 && rt != 0)) {
-            tcg_gen_or_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        break;
-    case OPC_XOR:
-        if (likely(rs != 0 && rt != 0)) {
-            tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
-        } else if (rs == 0 && rt != 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
-        } else if (rs != 0 && rt == 0) {
-            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-        } else {
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-        }
-        break;
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-}
-
-/* Set on lower than */
-static void gen_slt(DisasContext *ctx, uint32_t opc,
-                    int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    if (rd == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-    switch (opc) {
-    case OPC_SLT:
-        tcg_gen_setcond_tl(TCG_COND_LT, cpu_gpr[rd], t0, t1);
-        break;
-    case OPC_SLTU:
-        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t0, t1);
-        break;
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* Shifts */
-static void gen_shift(DisasContext *ctx, uint32_t opc,
-                      int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    if (rd == 0) {
-        /*
-         * If no destination, treat it as a NOP.
-         * For add & sub, we must generate the overflow exception when needed.
-         */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-    switch (opc) {
-    case OPC_SLLV:
-        tcg_gen_andi_tl(t0, t0, 0x1f);
-        tcg_gen_shl_tl(t0, t1, t0);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-        break;
-    case OPC_SRAV:
-        tcg_gen_andi_tl(t0, t0, 0x1f);
-        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
-        break;
-    case OPC_SRLV:
-        tcg_gen_ext32u_tl(t1, t1);
-        tcg_gen_andi_tl(t0, t0, 0x1f);
-        tcg_gen_shr_tl(t0, t1, t0);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-        break;
-    case OPC_ROTRV:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_andi_i32(t2, t2, 0x1f);
-            tcg_gen_rotr_i32(t2, t3, t2);
-            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DSLLV:
-        tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_shl_tl(cpu_gpr[rd], t1, t0);
-        break;
-    case OPC_DSRAV:
-        tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
-        break;
-    case OPC_DSRLV:
-        tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_shr_tl(cpu_gpr[rd], t1, t0);
-        break;
-    case OPC_DROTRV:
-        tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_rotr_tl(cpu_gpr[rd], t1, t0);
-        break;
-#endif
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* Arithmetic on HI/LO registers */
-static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
-{
-    if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    if (acc != 0) {
-        check_dsp(ctx);
-    }
-
-    switch (opc) {
-    case OPC_MFHI:
-#if defined(TARGET_MIPS64)
-        if (acc != 0) {
-            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
-        } else
-#endif
-        {
-            tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[acc]);
-        }
-        gen_log_instr_gpr_update(ctx, reg);
-        break;
-    case OPC_MFLO:
-#if defined(TARGET_MIPS64)
-        if (acc != 0) {
-            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
-        } else
-#endif
-        {
-            tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[acc]);
-        }
-        gen_log_instr_gpr_update(ctx, reg);
-        break;
-    case OPC_MTHI:
-        if (reg != 0) {
-#if defined(TARGET_MIPS64)
-            if (acc != 0) {
-                tcg_gen_ext32s_tl(cpu_HI[acc], cpu_gpr[reg]);
-            } else
-#endif
-            {
-                tcg_gen_mov_tl(cpu_HI[acc], cpu_gpr[reg]);
-            }
-        } else {
-            tcg_gen_movi_tl(cpu_HI[acc], 0);
-        }
-        gen_log_instr_hilo_update(ctx, /*hiLO*/0, acc, reg);
-        break;
-    case OPC_MTLO:
-        if (reg != 0) {
-#if defined(TARGET_MIPS64)
-            if (acc != 0) {
-                tcg_gen_ext32s_tl(cpu_LO[acc], cpu_gpr[reg]);
-            } else
-#endif
-            {
-                tcg_gen_mov_tl(cpu_LO[acc], cpu_gpr[reg]);
-            }
-        } else {
-            tcg_gen_movi_tl(cpu_LO[acc], 0);
-        }
-        gen_log_instr_hilo_update(ctx, /*hiLO*/1, acc, reg);
-        break;
-    }
-}
-
-static inline void gen_r6_pcrel_ld(DisasContext *ctx, target_long addr, int reg,
-                                   int memidx, MemOp memop)
-{
-    TCGv t0 = tcg_const_tl(addr);
-    TCGv tval = tcg_temp_new();
-    generate_ccheck_load_pcrel(t0, memop_size(memop));
-    tcg_gen_qemu_ld_tl_with_checked_addr(tval, PCC_CHECKED(t0), memidx, memop);
-    gen_store_gpr(tval, reg);
-    tcg_temp_free(tval);
-    tcg_temp_free(t0);
-}
-
-static inline void gen_pcrel(DisasContext *ctx, int opc, target_ulong pc,
-                             int rs)
-{
-    target_long offset;
-    target_long addr;
-
-    switch (MASK_OPC_PCREL_TOP2BITS(opc)) {
-    case OPC_ADDIUPC:
-        if (rs != 0) {
-            offset = sextract32(ctx->opcode << 2, 0, 21);
-            /* For CHERI the result is an offset relative to PC. */
-            addr = addr_add(ctx, pc - pcc_reloc(ctx), offset);
-            tcg_gen_movi_tl(cpu_gpr[rs], addr);
-            gen_log_instr_gpr_update(ctx, rs);
-        }
-        break;
-    case R6_OPC_LWPC:
-        offset = sextract32(ctx->opcode << 2, 0, 21);
-        addr = addr_add(ctx, pc, offset);
-        // For CHERI we check that the absolute PC address is within PCC
-        gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TESL);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_LWUPC:
-        check_mips_64(ctx);
-        offset = sextract32(ctx->opcode << 2, 0, 21);
-        addr = addr_add(ctx, pc, offset);
-        // For CHERI we check that the absolute PC address is within PCC
-        gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TEUL);
-        break;
-#endif
-    default:
-        switch (MASK_OPC_PCREL_TOP5BITS(opc)) {
-        case OPC_AUIPC:
-            if (rs != 0) {
-                offset = sextract32(ctx->opcode, 0, 16) << 16;
-                /* For CHERI the result is an offset relative to PC. */
-                addr = addr_add(ctx, pc - pcc_reloc(ctx), offset);
-                tcg_gen_movi_tl(cpu_gpr[rs], addr);
-                gen_log_instr_gpr_update(ctx, rs);
-            }
-            break;
-        case OPC_ALUIPC:
-            if (rs != 0) {
-                offset = sextract32(ctx->opcode, 0, 16) << 16;
-                /* For CHERI the result is an offset relative to PC. */
-                addr = ~0xFFFF & addr_add(ctx, pc - pcc_reloc(ctx), offset);
-                tcg_gen_movi_tl(cpu_gpr[rs], addr);
-                gen_log_instr_gpr_update(ctx, rs);
-            }
-            break;
-#if defined(TARGET_MIPS64)
-        case R6_OPC_LDPC: /* bits 16 and 17 are part of immediate */
-        case R6_OPC_LDPC + (1 << 16):
-        case R6_OPC_LDPC + (2 << 16):
-        case R6_OPC_LDPC + (3 << 16):
-            check_mips_64(ctx);
-            offset = sextract32(ctx->opcode << 3, 0, 21);
-            addr = addr_add(ctx, (pc & ~0x7), offset);
-            // For CHERI we check that the absolute PC address is within PCC
-            gen_r6_pcrel_ld(ctx, addr, rs, ctx->mem_idx, MO_TEQ);
-            break;
-#endif
-        default:
-            MIPS_INVAL("OPC_PCREL");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    }
-}
-
-static void gen_r6_muldiv(DisasContext *ctx, int opc, int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case R6_OPC_DIV:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_MOD:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DIVU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_MODU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_MUL:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_mul_i32(t2, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case R6_OPC_MUH:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_muls2_i32(t2, t3, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case R6_OPC_MULU:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_mul_i32(t2, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case R6_OPC_MUHU:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_mulu2_i32(t2, t3, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case R6_OPC_DDIV:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DMOD:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DDIVU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_divu_i64(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DMODU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_remu_i64(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DMUL:
-        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
-        break;
-    case R6_OPC_DMUH:
-        {
-            TCGv t2 = tcg_temp_new();
-            tcg_gen_muls2_i64(t2, cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t2);
-        }
-        break;
-    case R6_OPC_DMULU:
-        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
-        break;
-    case R6_OPC_DMUHU:
-        {
-            TCGv t2 = tcg_temp_new();
-            tcg_gen_mulu2_i64(t2, cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t2);
-        }
-        break;
-#endif
-    default:
-        MIPS_INVAL("r6 mul/div");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-    gen_log_instr_gpr_update(ctx, rd);
- out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-#if defined(TARGET_MIPS64)
-static void gen_div1_tx79(DisasContext *ctx, uint32_t opc, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case MMI_OPC_DIV1:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_div_tl(cpu_LO[1], t0, t1);
-            tcg_gen_rem_tl(cpu_HI[1], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
-            tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case MMI_OPC_DIVU1:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_divu_tl(cpu_LO[1], t0, t1);
-            tcg_gen_remu_tl(cpu_HI[1], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
-            tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    default:
-        MIPS_INVAL("div1 TX79");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
- out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-#endif
-
-static void gen_muldiv(DisasContext *ctx, uint32_t opc,
-                       int acc, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    if (acc != 0) {
-        check_dsp(ctx);
-    }
-
-    switch (opc) {
-    case OPC_DIV:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
-            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
-            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case OPC_DIVU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_divu_tl(cpu_LO[acc], t0, t1);
-            tcg_gen_remu_tl(cpu_HI[acc], t0, t1);
-            tcg_gen_ext32s_tl(cpu_LO[acc], cpu_LO[acc]);
-            tcg_gen_ext32s_tl(cpu_HI[acc], cpu_HI[acc]);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case OPC_MULT:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_muls2_i32(t2, t3, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case OPC_MULTU:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_mulu2_i32(t2, t3, t2, t3);
-            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DDIV:
-        {
-            TCGv t2 = tcg_temp_new();
-            TCGv t3 = tcg_temp_new();
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
-            tcg_gen_and_tl(t2, t2, t3);
-            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
-            tcg_gen_or_tl(t2, t2, t3);
-            tcg_gen_movi_tl(t3, 0);
-            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-            tcg_gen_div_tl(cpu_LO[acc], t0, t1);
-            tcg_gen_rem_tl(cpu_HI[acc], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case OPC_DDIVU:
-        {
-            TCGv t2 = tcg_const_tl(0);
-            TCGv t3 = tcg_const_tl(1);
-            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-            tcg_gen_divu_i64(cpu_LO[acc], t0, t1);
-            tcg_gen_remu_i64(cpu_HI[acc], t0, t1);
-            tcg_temp_free(t3);
-            tcg_temp_free(t2);
-        }
-        break;
-    case OPC_DMULT:
-        tcg_gen_muls2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
-        break;
-    case OPC_DMULTU:
-        tcg_gen_mulu2_i64(cpu_LO[acc], cpu_HI[acc], t0, t1);
-        break;
-#endif
-    case OPC_MADD:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(t2, t0);
-            tcg_gen_ext_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_add_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    case OPC_MADDU:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(t2, t0);
-            tcg_gen_extu_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_add_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    case OPC_MSUB:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(t2, t0);
-            tcg_gen_ext_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_sub_i64(t2, t3, t2);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    case OPC_MSUBU:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(t2, t0);
-            tcg_gen_extu_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_sub_i64(t2, t3, t2);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    default:
-        MIPS_INVAL("mul/div");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
- out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/*
- * These MULT[U] and MADD[U] instructions implemented in for example
- * the Toshiba/Sony R5900 and the Toshiba TX19, TX39 and TX79 core
- * architectures are special three-operand variants with the syntax
- *
- *     MULT[U][1] rd, rs, rt
- *
- * such that
- *
- *     (rd, LO, HI) <- rs * rt
- *
- * and
- *
- *     MADD[U][1] rd, rs, rt
- *
- * such that
- *
- *     (rd, LO, HI) <- (LO, HI) + rs * rt
- *
- * where the low-order 32-bits of the result is placed into both the
- * GPR rd and the special register LO. The high-order 32-bits of the
- * result is placed into the special register HI.
- *
- * If the GPR rd is omitted in assembly language, it is taken to be 0,
- * which is the zero register that always reads as 0.
- */
-static void gen_mul_txx9(DisasContext *ctx, uint32_t opc,
-                         int rd, int rs, int rt)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    int acc = 0;
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case MMI_OPC_MULT1:
-        acc = 1;
-        /* Fall through */
-    case OPC_MULT:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_muls2_i32(t2, t3, t2, t3);
-            if (rd) {
-                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-            }
-            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case MMI_OPC_MULTU1:
-        acc = 1;
-        /* Fall through */
-    case OPC_MULTU:
-        {
-            TCGv_i32 t2 = tcg_temp_new_i32();
-            TCGv_i32 t3 = tcg_temp_new_i32();
-            tcg_gen_trunc_tl_i32(t2, t0);
-            tcg_gen_trunc_tl_i32(t3, t1);
-            tcg_gen_mulu2_i32(t2, t3, t2, t3);
-            if (rd) {
-                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
-            }
-            tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-            tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-            tcg_temp_free_i32(t2);
-            tcg_temp_free_i32(t3);
-        }
-        break;
-    case MMI_OPC_MADD1:
-        acc = 1;
-        /* Fall through */
-    case MMI_OPC_MADD:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(t2, t0);
-            tcg_gen_ext_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_add_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            if (rd) {
-                gen_move_low32(cpu_gpr[rd], t2);
-            }
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    case MMI_OPC_MADDU1:
-        acc = 1;
-        /* Fall through */
-    case MMI_OPC_MADDU:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i64 t3 = tcg_temp_new_i64();
-
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(t2, t0);
-            tcg_gen_extu_tl_i64(t3, t1);
-            tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-            tcg_gen_add_i64(t2, t2, t3);
-            tcg_temp_free_i64(t3);
-            gen_move_low32(cpu_LO[acc], t2);
-            gen_move_high32(cpu_HI[acc], t2);
-            if (rd) {
-                gen_move_low32(cpu_gpr[rd], t2);
-            }
-            tcg_temp_free_i64(t2);
-        }
-        break;
-    default:
-        MIPS_INVAL("mul/madd TXx9");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-    if (rd) {
-        gen_log_instr_gpr_update(ctx, rd);
-    }
- out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void gen_mul_vr54xx(DisasContext *ctx, uint32_t opc,
-                           int rd, int rs, int rt)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case OPC_VR54XX_MULS:
-        gen_helper_muls(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MULSU:
-        gen_helper_mulsu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MACC:
-        gen_helper_macc(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MACCU:
-        gen_helper_maccu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MSAC:
-        gen_helper_msac(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MSACU:
-        gen_helper_msacu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MULHI:
-        gen_helper_mulhi(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MULHIU:
-        gen_helper_mulhiu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MULSHI:
-        gen_helper_mulshi(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MULSHIU:
-        gen_helper_mulshiu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MACCHI:
-        gen_helper_macchi(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MACCHIU:
-        gen_helper_macchiu(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MSACHI:
-        gen_helper_msachi(t0, cpu_env, t0, t1);
-        break;
-    case OPC_VR54XX_MSACHIU:
-        gen_helper_msachiu(t0, cpu_env, t0, t1);
-        break;
-    default:
-        MIPS_INVAL("mul vr54xx");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-    gen_store_gpr(t0, rd);
-
- out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void gen_cl(DisasContext *ctx, uint32_t opc,
-                   int rd, int rs)
-{
-    TCGv t0;
-
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-    t0 = cpu_gpr[rd];
-    gen_load_gpr(t0, rs);
-
-    switch (opc) {
-    case OPC_CLO:
-    case R6_OPC_CLO:
-#if defined(TARGET_MIPS64)
-    case OPC_DCLO:
-    case R6_OPC_DCLO:
-#endif
-        tcg_gen_not_tl(t0, t0);
-        break;
-    }
-
-    switch (opc) {
-    case OPC_CLO:
-    case R6_OPC_CLO:
-    case OPC_CLZ:
-    case R6_OPC_CLZ:
-        tcg_gen_ext32u_tl(t0, t0);
-        tcg_gen_clzi_tl(t0, t0, TARGET_LONG_BITS);
-        tcg_gen_subi_tl(t0, t0, TARGET_LONG_BITS - 32);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DCLO:
-    case R6_OPC_DCLO:
-    case OPC_DCLZ:
-    case R6_OPC_DCLZ:
-        tcg_gen_clzi_i64(t0, t0, 64);
-        break;
-#endif
-    }
-}
-
-/* Godson integer instructions */
-static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
-                                 int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    switch (opc) {
-    case OPC_MULT_G_2E:
-    case OPC_MULT_G_2F:
-    case OPC_MULTU_G_2E:
-    case OPC_MULTU_G_2F:
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT_G_2E:
-    case OPC_DMULT_G_2F:
-    case OPC_DMULTU_G_2E:
-    case OPC_DMULTU_G_2F:
-#endif
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
-        break;
-    default:
-        t0 = tcg_temp_local_new();
-        t1 = tcg_temp_local_new();
-        break;
-    }
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    switch (opc) {
-    case OPC_MULT_G_2E:
-    case OPC_MULT_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        break;
-    case OPC_MULTU_G_2E:
-    case OPC_MULTU_G_2F:
-        tcg_gen_ext32u_tl(t0, t0);
-        tcg_gen_ext32u_tl(t1, t1);
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-        break;
-    case OPC_DIV_G_2E:
-    case OPC_DIV_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DIVU_G_2E:
-    case OPC_DIVU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l2);
-        }
-        break;
-    case OPC_MOD_G_2E:
-    case OPC_MOD_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_MODU_G_2E:
-    case OPC_MODU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_ext32u_tl(t0, t0);
-            tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
-            gen_set_label(l2);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT_G_2E:
-    case OPC_DMULT_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        break;
-    case OPC_DMULTU_G_2E:
-    case OPC_DMULTU_G_2F:
-        tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
-        break;
-    case OPC_DDIV_G_2E:
-    case OPC_DDIV_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DDIVU_G_2E:
-    case OPC_DDIVU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l2);
-        }
-        break;
-    case OPC_DMOD_G_2E:
-    case OPC_DMOD_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            TCGLabel *l3 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l3);
-            gen_set_label(l2);
-            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l3);
-        }
-        break;
-    case OPC_DMODU_G_2E:
-    case OPC_DMODU_G_2F:
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            tcg_gen_movi_tl(cpu_gpr[rd], 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
-            gen_set_label(l2);
-        }
-        break;
-#endif
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-#if !defined(TARGET_CHERI)
-/* Loongson multimedia instructions */
-static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
-{
-    uint32_t opc, shift_max;
-    TCGv_i64 t0, t1;
-    TCGCond cond;
-
-    opc = MASK_LMMI(ctx->opcode);
-    switch (opc) {
-    case OPC_ADD_CP2:
-    case OPC_SUB_CP2:
-    case OPC_DADD_CP2:
-    case OPC_DSUB_CP2:
-        t0 = tcg_temp_local_new_i64();
-        t1 = tcg_temp_local_new_i64();
-        break;
-    default:
-        t0 = tcg_temp_new_i64();
-        t1 = tcg_temp_new_i64();
-        break;
-    }
-
-    check_cp1_enabled(ctx);
-    gen_load_fpr64(ctx, t0, rs);
-    gen_load_fpr64(ctx, t1, rt);
-
-    switch (opc) {
-    case OPC_PADDSH:
-        gen_helper_paddsh(t0, t0, t1);
-        break;
-    case OPC_PADDUSH:
-        gen_helper_paddush(t0, t0, t1);
-        break;
-    case OPC_PADDH:
-        gen_helper_paddh(t0, t0, t1);
-        break;
-    case OPC_PADDW:
-        gen_helper_paddw(t0, t0, t1);
-        break;
-    case OPC_PADDSB:
-        gen_helper_paddsb(t0, t0, t1);
-        break;
-    case OPC_PADDUSB:
-        gen_helper_paddusb(t0, t0, t1);
-        break;
-    case OPC_PADDB:
-        gen_helper_paddb(t0, t0, t1);
-        break;
-
-    case OPC_PSUBSH:
-        gen_helper_psubsh(t0, t0, t1);
-        break;
-    case OPC_PSUBUSH:
-        gen_helper_psubush(t0, t0, t1);
-        break;
-    case OPC_PSUBH:
-        gen_helper_psubh(t0, t0, t1);
-        break;
-    case OPC_PSUBW:
-        gen_helper_psubw(t0, t0, t1);
-        break;
-    case OPC_PSUBSB:
-        gen_helper_psubsb(t0, t0, t1);
-        break;
-    case OPC_PSUBUSB:
-        gen_helper_psubusb(t0, t0, t1);
-        break;
-    case OPC_PSUBB:
-        gen_helper_psubb(t0, t0, t1);
-        break;
-
-    case OPC_PSHUFH:
-        gen_helper_pshufh(t0, t0, t1);
-        break;
-    case OPC_PACKSSWH:
-        gen_helper_packsswh(t0, t0, t1);
-        break;
-    case OPC_PACKSSHB:
-        gen_helper_packsshb(t0, t0, t1);
-        break;
-    case OPC_PACKUSHB:
-        gen_helper_packushb(t0, t0, t1);
-        break;
-
-    case OPC_PUNPCKLHW:
-        gen_helper_punpcklhw(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHHW:
-        gen_helper_punpckhhw(t0, t0, t1);
-        break;
-    case OPC_PUNPCKLBH:
-        gen_helper_punpcklbh(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHBH:
-        gen_helper_punpckhbh(t0, t0, t1);
-        break;
-    case OPC_PUNPCKLWD:
-        gen_helper_punpcklwd(t0, t0, t1);
-        break;
-    case OPC_PUNPCKHWD:
-        gen_helper_punpckhwd(t0, t0, t1);
-        break;
-
-    case OPC_PAVGH:
-        gen_helper_pavgh(t0, t0, t1);
-        break;
-    case OPC_PAVGB:
-        gen_helper_pavgb(t0, t0, t1);
-        break;
-    case OPC_PMAXSH:
-        gen_helper_pmaxsh(t0, t0, t1);
-        break;
-    case OPC_PMINSH:
-        gen_helper_pminsh(t0, t0, t1);
-        break;
-    case OPC_PMAXUB:
-        gen_helper_pmaxub(t0, t0, t1);
-        break;
-    case OPC_PMINUB:
-        gen_helper_pminub(t0, t0, t1);
-        break;
-
-    case OPC_PCMPEQW:
-        gen_helper_pcmpeqw(t0, t0, t1);
-        break;
-    case OPC_PCMPGTW:
-        gen_helper_pcmpgtw(t0, t0, t1);
-        break;
-    case OPC_PCMPEQH:
-        gen_helper_pcmpeqh(t0, t0, t1);
-        break;
-    case OPC_PCMPGTH:
-        gen_helper_pcmpgth(t0, t0, t1);
-        break;
-    case OPC_PCMPEQB:
-        gen_helper_pcmpeqb(t0, t0, t1);
-        break;
-    case OPC_PCMPGTB:
-        gen_helper_pcmpgtb(t0, t0, t1);
-        break;
-
-    case OPC_PSLLW:
-        gen_helper_psllw(t0, t0, t1);
-        break;
-    case OPC_PSLLH:
-        gen_helper_psllh(t0, t0, t1);
-        break;
-    case OPC_PSRLW:
-        gen_helper_psrlw(t0, t0, t1);
-        break;
-    case OPC_PSRLH:
-        gen_helper_psrlh(t0, t0, t1);
-        break;
-    case OPC_PSRAW:
-        gen_helper_psraw(t0, t0, t1);
-        break;
-    case OPC_PSRAH:
-        gen_helper_psrah(t0, t0, t1);
-        break;
-
-    case OPC_PMULLH:
-        gen_helper_pmullh(t0, t0, t1);
-        break;
-    case OPC_PMULHH:
-        gen_helper_pmulhh(t0, t0, t1);
-        break;
-    case OPC_PMULHUH:
-        gen_helper_pmulhuh(t0, t0, t1);
-        break;
-    case OPC_PMADDHW:
-        gen_helper_pmaddhw(t0, t0, t1);
-        break;
-
-    case OPC_PASUBUB:
-        gen_helper_pasubub(t0, t0, t1);
-        break;
-    case OPC_BIADD:
-        gen_helper_biadd(t0, t0);
-        break;
-    case OPC_PMOVMSKB:
-        gen_helper_pmovmskb(t0, t0);
-        break;
-
-    case OPC_PADDD:
-        tcg_gen_add_i64(t0, t0, t1);
-        break;
-    case OPC_PSUBD:
-        tcg_gen_sub_i64(t0, t0, t1);
-        break;
-    case OPC_XOR_CP2:
-        tcg_gen_xor_i64(t0, t0, t1);
-        break;
-    case OPC_NOR_CP2:
-        tcg_gen_nor_i64(t0, t0, t1);
-        break;
-    case OPC_AND_CP2:
-        tcg_gen_and_i64(t0, t0, t1);
-        break;
-    case OPC_OR_CP2:
-        tcg_gen_or_i64(t0, t0, t1);
-        break;
-
-    case OPC_PANDN:
-        tcg_gen_andc_i64(t0, t1, t0);
-        break;
-
-    case OPC_PINSRH_0:
-        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
-        break;
-    case OPC_PINSRH_1:
-        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
-        break;
-    case OPC_PINSRH_2:
-        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
-        break;
-    case OPC_PINSRH_3:
-        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
-        break;
-
-    case OPC_PEXTRH:
-        tcg_gen_andi_i64(t1, t1, 3);
-        tcg_gen_shli_i64(t1, t1, 4);
-        tcg_gen_shr_i64(t0, t0, t1);
-        tcg_gen_ext16u_i64(t0, t0);
-        break;
-
-    case OPC_ADDU_CP2:
-        tcg_gen_add_i64(t0, t0, t1);
-        tcg_gen_ext32s_i64(t0, t0);
-        break;
-    case OPC_SUBU_CP2:
-        tcg_gen_sub_i64(t0, t0, t1);
-        tcg_gen_ext32s_i64(t0, t0);
-        break;
-
-    case OPC_SLL_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_SRL_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_SRA_CP2:
-        shift_max = 32;
-        goto do_shift;
-    case OPC_DSLL_CP2:
-        shift_max = 64;
-        goto do_shift;
-    case OPC_DSRL_CP2:
-        shift_max = 64;
-        goto do_shift;
-    case OPC_DSRA_CP2:
-        shift_max = 64;
-        goto do_shift;
-    do_shift:
-        /* Make sure shift count isn't TCG undefined behaviour.  */
-        tcg_gen_andi_i64(t1, t1, shift_max - 1);
-
-        switch (opc) {
-        case OPC_SLL_CP2:
-        case OPC_DSLL_CP2:
-            tcg_gen_shl_i64(t0, t0, t1);
-            break;
-        case OPC_SRA_CP2:
-        case OPC_DSRA_CP2:
-            /*
-             * Since SRA is UndefinedResult without sign-extended inputs,
-             * we can treat SRA and DSRA the same.
-             */
-            tcg_gen_sar_i64(t0, t0, t1);
-            break;
-        case OPC_SRL_CP2:
-            /* We want to shift in zeros for SRL; zero-extend first.  */
-            tcg_gen_ext32u_i64(t0, t0);
-            /* FALLTHRU */
-        case OPC_DSRL_CP2:
-            tcg_gen_shr_i64(t0, t0, t1);
-            break;
-        }
-
-        if (shift_max == 32) {
-            tcg_gen_ext32s_i64(t0, t0);
-        }
-
-        /* Shifts larger than MAX produce zero.  */
-        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
-        tcg_gen_neg_i64(t1, t1);
-        tcg_gen_and_i64(t0, t0, t1);
-        break;
-
-    case OPC_ADD_CP2:
-    case OPC_DADD_CP2:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGLabel *lab = gen_new_label();
-
-            tcg_gen_mov_i64(t2, t0);
-            tcg_gen_add_i64(t0, t1, t2);
-            if (opc == OPC_ADD_CP2) {
-                tcg_gen_ext32s_i64(t0, t0);
-            }
-            tcg_gen_xor_i64(t1, t1, t2);
-            tcg_gen_xor_i64(t2, t2, t0);
-            tcg_gen_andc_i64(t1, t2, t1);
-            tcg_temp_free_i64(t2);
-            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(lab);
-            break;
-        }
-
-    case OPC_SUB_CP2:
-    case OPC_DSUB_CP2:
-        {
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGLabel *lab = gen_new_label();
-
-            tcg_gen_mov_i64(t2, t0);
-            tcg_gen_sub_i64(t0, t1, t2);
-            if (opc == OPC_SUB_CP2) {
-                tcg_gen_ext32s_i64(t0, t0);
-            }
-            tcg_gen_xor_i64(t1, t1, t2);
-            tcg_gen_xor_i64(t2, t2, t0);
-            tcg_gen_and_i64(t1, t1, t2);
-            tcg_temp_free_i64(t2);
-            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
-            generate_exception(ctx, EXCP_OVERFLOW);
-            gen_set_label(lab);
-            break;
-        }
-
-    case OPC_PMULUW:
-        tcg_gen_ext32u_i64(t0, t0);
-        tcg_gen_ext32u_i64(t1, t1);
-        tcg_gen_mul_i64(t0, t0, t1);
-        break;
-
-    case OPC_SEQU_CP2:
-    case OPC_SEQ_CP2:
-        cond = TCG_COND_EQ;
-        goto do_cc_cond;
-        break;
-    case OPC_SLTU_CP2:
-        cond = TCG_COND_LTU;
-        goto do_cc_cond;
-        break;
-    case OPC_SLT_CP2:
-        cond = TCG_COND_LT;
-        goto do_cc_cond;
-        break;
-    case OPC_SLEU_CP2:
-        cond = TCG_COND_LEU;
-        goto do_cc_cond;
-        break;
-    case OPC_SLE_CP2:
-        cond = TCG_COND_LE;
-    do_cc_cond:
-        {
-            int cc = (ctx->opcode >> 8) & 0x7;
-            TCGv_i64 t64 = tcg_temp_new_i64();
-            TCGv_i32 t32 = tcg_temp_new_i32();
-
-            tcg_gen_setcond_i64(cond, t64, t0, t1);
-            tcg_gen_extrl_i64_i32(t32, t64);
-            tcg_gen_deposit_i32(fpu_fcr31, fpu_fcr31, t32,
-                                get_fp_bit(cc), 1);
-
-            tcg_temp_free_i32(t32);
-            tcg_temp_free_i64(t64);
-        }
-        goto no_rd;
-        break;
-    default:
-        MIPS_INVAL("loongson_cp2");
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    gen_store_fpr64(ctx, t0, rd);
-
-no_rd:
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-static void gen_loongson_lswc2(DisasContext *ctx, int rt,
-                               int rs, int rd)
-{
-    TCGv t0, t1, t2;
-    TCGv_i32 fp0;
-#if defined(TARGET_MIPS64)
-    int lsq_rt1 = ctx->opcode & 0x1f;
-    int lsq_offset = sextract32(ctx->opcode, 6, 9) << 4;
-#endif
-    int shf_offset = sextract32(ctx->opcode, 6, 8);
-
-    t0 = tcg_temp_new();
-
-    switch (MASK_LOONGSON_GSLSQ(ctx->opcode)) {
-#if defined(TARGET_MIPS64)
-    case OPC_GSLQ:
-        t1 = tcg_temp_new();
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_store_gpr(t1, rt);
-        gen_store_gpr(t0, lsq_rt1);
-        tcg_temp_free(t1);
-        break;
-    case OPC_GSLQC1:
-        check_cp1_enabled(ctx);
-        t1 = tcg_temp_new();
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_store_fpr64(ctx, t1, rt);
-        gen_store_fpr64(ctx, t0, lsq_rt1);
-        tcg_temp_free(t1);
-        break;
-    case OPC_GSSQ:
-        t1 = tcg_temp_new();
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
-        gen_load_gpr(t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
-        gen_load_gpr(t1, lsq_rt1);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-    case OPC_GSSQC1:
-        check_cp1_enabled(ctx);
-        t1 = tcg_temp_new();
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset);
-        gen_load_fpr64(ctx, t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_base_offset_addr(ctx, t0, rs, lsq_offset + 8);
-        gen_load_fpr64(ctx, t1, lsq_rt1);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-#endif
-    case OPC_GSSHFL:
-        switch (MASK_LOONGSON_GSSHFLS(ctx->opcode)) {
-        case OPC_GSLWLC1:
-            check_cp1_enabled(ctx);
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            t1 = tcg_temp_new();
-            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-            tcg_gen_andi_tl(t1, t0, 3);
-#ifndef TARGET_WORDS_BIGENDIAN
-            tcg_gen_xori_tl(t1, t1, 3);
-#endif
-            tcg_gen_shli_tl(t1, t1, 3);
-            tcg_gen_andi_tl(t0, t0, ~3);
-            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
-            tcg_gen_shl_tl(t0, t0, t1);
-            t2 = tcg_const_tl(-1);
-            tcg_gen_shl_tl(t2, t2, t1);
-            fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, rt);
-            tcg_gen_ext_i32_tl(t1, fp0);
-            tcg_gen_andc_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_temp_free(t1);
-#if defined(TARGET_MIPS64)
-            tcg_gen_extrl_i64_i32(fp0, t0);
-#else
-            tcg_gen_ext32s_tl(fp0, t0);
-#endif
-            gen_store_fpr32(ctx, fp0, rt);
-            tcg_temp_free_i32(fp0);
-            break;
-        case OPC_GSLWRC1:
-            check_cp1_enabled(ctx);
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            t1 = tcg_temp_new();
-            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-            tcg_gen_andi_tl(t1, t0, 3);
-#ifdef TARGET_WORDS_BIGENDIAN
-            tcg_gen_xori_tl(t1, t1, 3);
-#endif
-            tcg_gen_shli_tl(t1, t1, 3);
-            tcg_gen_andi_tl(t0, t0, ~3);
-            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
-            tcg_gen_shr_tl(t0, t0, t1);
-            tcg_gen_xori_tl(t1, t1, 31);
-            t2 = tcg_const_tl(0xfffffffeull);
-            tcg_gen_shl_tl(t2, t2, t1);
-            fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, rt);
-            tcg_gen_ext_i32_tl(t1, fp0);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_temp_free(t1);
-#if defined(TARGET_MIPS64)
-            tcg_gen_extrl_i64_i32(fp0, t0);
-#else
-            tcg_gen_ext32s_tl(fp0, t0);
-#endif
-            gen_store_fpr32(ctx, fp0, rt);
-            tcg_temp_free_i32(fp0);
-            break;
-#if defined(TARGET_MIPS64)
-        case OPC_GSLDLC1:
-            check_cp1_enabled(ctx);
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            t1 = tcg_temp_new();
-            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-            tcg_gen_andi_tl(t1, t0, 7);
-#ifndef TARGET_WORDS_BIGENDIAN
-            tcg_gen_xori_tl(t1, t1, 7);
-#endif
-            tcg_gen_shli_tl(t1, t1, 3);
-            tcg_gen_andi_tl(t0, t0, ~7);
-            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
-            tcg_gen_shl_tl(t0, t0, t1);
-            t2 = tcg_const_tl(-1);
-            tcg_gen_shl_tl(t2, t2, t1);
-            gen_load_fpr64(ctx, t1, rt);
-            tcg_gen_andc_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_temp_free(t1);
-            gen_store_fpr64(ctx, t0, rt);
-            break;
-        case OPC_GSLDRC1:
-            check_cp1_enabled(ctx);
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            t1 = tcg_temp_new();
-            tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
-            tcg_gen_andi_tl(t1, t0, 7);
-#ifdef TARGET_WORDS_BIGENDIAN
-            tcg_gen_xori_tl(t1, t1, 7);
-#endif
-            tcg_gen_shli_tl(t1, t1, 3);
-            tcg_gen_andi_tl(t0, t0, ~7);
-            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
-            tcg_gen_shr_tl(t0, t0, t1);
-            tcg_gen_xori_tl(t1, t1, 63);
-            t2 = tcg_const_tl(0xfffffffffffffffeull);
-            tcg_gen_shl_tl(t2, t2, t1);
-            gen_load_fpr64(ctx, t1, rt);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_temp_free(t2);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_temp_free(t1);
-            gen_store_fpr64(ctx, t0, rt);
-            break;
-#endif
-        default:
-            MIPS_INVAL("loongson_gsshfl");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_GSSHFS:
-        switch (MASK_LOONGSON_GSSHFLS(ctx->opcode)) {
-        case OPC_GSSWLC1:
-            check_cp1_enabled(ctx);
-            t1 = tcg_temp_new();
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, rt);
-            tcg_gen_ext_i32_tl(t1, fp0);
-            gen_helper_0e2i(swl, t1, t0, ctx->mem_idx);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free(t1);
-            break;
-        case OPC_GSSWRC1:
-            check_cp1_enabled(ctx);
-            t1 = tcg_temp_new();
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, rt);
-            tcg_gen_ext_i32_tl(t1, fp0);
-            gen_helper_0e2i(swr, t1, t0, ctx->mem_idx);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free(t1);
-            break;
-#if defined(TARGET_MIPS64)
-        case OPC_GSSDLC1:
-            check_cp1_enabled(ctx);
-            t1 = tcg_temp_new();
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            gen_load_fpr64(ctx, t1, rt);
-            gen_helper_0e2i(sdl, t1, t0, ctx->mem_idx);
-            tcg_temp_free(t1);
-            break;
-        case OPC_GSSDRC1:
-            check_cp1_enabled(ctx);
-            t1 = tcg_temp_new();
-            gen_base_offset_addr(ctx, t0, rs, shf_offset);
-            gen_load_fpr64(ctx, t1, rt);
-            gen_helper_0e2i(sdr, t1, t0, ctx->mem_idx);
-            tcg_temp_free(t1);
-            break;
-#endif
-        default:
-            MIPS_INVAL("loongson_gsshfs");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    default:
-        MIPS_INVAL("loongson_gslsq");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-    tcg_temp_free(t0);
-}
-
-/* Loongson EXT LDC2/SDC2 */
-static void gen_loongson_lsdc2(DisasContext *ctx, int rt,
-                               int rs, int rd)
-{
-    int offset = sextract32(ctx->opcode, 3, 8);
-    uint32_t opc = MASK_LOONGSON_LSDC2(ctx->opcode);
-    TCGv t0, t1;
-    TCGv_i32 fp0;
-
-    /* Pre-conditions */
-    switch (opc) {
-    case OPC_GSLBX:
-    case OPC_GSLHX:
-    case OPC_GSLWX:
-    case OPC_GSLDX:
-        /* prefetch, implement as NOP */
-        if (rt == 0) {
-            return;
-        }
-        break;
-    case OPC_GSSBX:
-    case OPC_GSSHX:
-    case OPC_GSSWX:
-    case OPC_GSSDX:
-        break;
-    case OPC_GSLWXC1:
-#if defined(TARGET_MIPS64)
-    case OPC_GSLDXC1:
-#endif
-        check_cp1_enabled(ctx);
-        /* prefetch, implement as NOP */
-        if (rt == 0) {
-            return;
-        }
-        break;
-    case OPC_GSSWXC1:
-#if defined(TARGET_MIPS64)
-    case OPC_GSSDXC1:
-#endif
-        check_cp1_enabled(ctx);
-        break;
-    default:
-        MIPS_INVAL("loongson_lsdc2");
-        gen_reserved_instruction(ctx);
-        return;
-        break;
-    }
-
-    t0 = tcg_temp_new();
-
-    gen_base_offset_addr(ctx, t0, rs, offset);
-    gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
-
-    switch (opc) {
-    case OPC_GSLBX:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_GSLHX:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
-                           ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_GSLWX:
-        gen_base_offset_addr(ctx, t0, rs, offset);
-        if (rd) {
-            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
-        }
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESL |
-                           ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_GSLDX:
-        gen_base_offset_addr(ctx, t0, rs, offset);
-        if (rd) {
-            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
-        }
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_store_gpr(t0, rt);
-        break;
-#endif
-    case OPC_GSLWXC1:
-        check_cp1_enabled(ctx);
-        gen_base_offset_addr(ctx, t0, rs, offset);
-        if (rd) {
-            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
-        }
-        fp0 = tcg_temp_new_i32();
-        tcg_gen_qemu_ld_i32(fp0, t0, ctx->mem_idx, MO_TESL |
-                            ctx->default_tcg_memop_mask);
-        gen_store_fpr32(ctx, fp0, rt);
-        tcg_temp_free_i32(fp0);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_GSLDXC1:
-        check_cp1_enabled(ctx);
-        gen_base_offset_addr(ctx, t0, rs, offset);
-        if (rd) {
-            gen_op_addr_add(ctx, t0, cpu_gpr[rd], t0);
-        }
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        gen_store_fpr64(ctx, t0, rt);
-        break;
-#endif
-    case OPC_GSSBX:
-        t1 = tcg_temp_new();
-        gen_load_gpr(t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_SB);
-        tcg_temp_free(t1);
-        break;
-    case OPC_GSSHX:
-        t1 = tcg_temp_new();
-        gen_load_gpr(t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
-                           ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-    case OPC_GSSWX:
-        t1 = tcg_temp_new();
-        gen_load_gpr(t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
-                           ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_GSSDX:
-        t1 = tcg_temp_new();
-        gen_load_gpr(t1, rt);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEQ |
-                           ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-#endif
-    case OPC_GSSWXC1:
-        fp0 = tcg_temp_new_i32();
-        gen_load_fpr32(ctx, fp0, rt);
-        tcg_gen_qemu_st_i32(fp0, t0, ctx->mem_idx, MO_TEUL |
-                            ctx->default_tcg_memop_mask);
-        tcg_temp_free_i32(fp0);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_GSSDXC1:
-        t1 = tcg_temp_new();
-        gen_load_fpr64(ctx, t1, rt);
-        tcg_gen_qemu_st_i64(t1, t0, ctx->mem_idx, MO_TEQ |
-                            ctx->default_tcg_memop_mask);
-        tcg_temp_free(t1);
-        break;
-#endif
-    default:
-        break;
-    }
-
-    tcg_temp_free(t0);
-}
-#endif /* !defined(TARGET_CHERI) */
-
-/* Traps */
-static void gen_trap(DisasContext *ctx, uint32_t opc,
-                     int rs, int rt, int16_t imm)
-{
-    int cond;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    cond = 0;
-    /* Load needed operands */
-    switch (opc) {
-    case OPC_TEQ:
-    case OPC_TGE:
-    case OPC_TGEU:
-    case OPC_TLT:
-    case OPC_TLTU:
-    case OPC_TNE:
-        /* Compare two registers */
-        if (rs != rt) {
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            cond = 1;
-        }
-        break;
-    case OPC_TEQI:
-    case OPC_TGEI:
-    case OPC_TGEIU:
-    case OPC_TLTI:
-    case OPC_TLTIU:
-    case OPC_TNEI:
-        /* Compare register to immediate */
-        if (rs != 0 || imm != 0) {
-            gen_load_gpr(t0, rs);
-            tcg_gen_movi_tl(t1, (int32_t)imm);
-            cond = 1;
-        }
-        break;
-    }
-    if (cond == 0) {
-        switch (opc) {
-        case OPC_TEQ:   /* rs == rs */
-        case OPC_TEQI:  /* r0 == 0  */
-        case OPC_TGE:   /* rs >= rs */
-        case OPC_TGEI:  /* r0 >= 0  */
-        case OPC_TGEU:  /* rs >= rs unsigned */
-        case OPC_TGEIU: /* r0 >= 0  unsigned */
-            /* Always trap */
-            generate_exception_end(ctx, EXCP_TRAP);
-            break;
-        case OPC_TLT:   /* rs < rs           */
-        case OPC_TLTI:  /* r0 < 0            */
-        case OPC_TLTU:  /* rs < rs unsigned  */
-        case OPC_TLTIU: /* r0 < 0  unsigned  */
-        case OPC_TNE:   /* rs != rs          */
-        case OPC_TNEI:  /* r0 != 0           */
-            /* Never trap: treat as NOP. */
-            break;
-        }
-    } else {
-        TCGLabel *l1 = gen_new_label();
-
-        switch (opc) {
-        case OPC_TEQ:
-        case OPC_TEQI:
-            tcg_gen_brcond_tl(TCG_COND_NE, t0, t1, l1);
-            break;
-        case OPC_TGE:
-        case OPC_TGEI:
-            tcg_gen_brcond_tl(TCG_COND_LT, t0, t1, l1);
-            break;
-        case OPC_TGEU:
-        case OPC_TGEIU:
-            tcg_gen_brcond_tl(TCG_COND_LTU, t0, t1, l1);
-            break;
-        case OPC_TLT:
-        case OPC_TLTI:
-            tcg_gen_brcond_tl(TCG_COND_GE, t0, t1, l1);
-            break;
-        case OPC_TLTU:
-        case OPC_TLTIU:
-            tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
-            break;
-        case OPC_TNE:
-        case OPC_TNEI:
-            tcg_gen_brcond_tl(TCG_COND_EQ, t0, t1, l1);
-            break;
-        }
-        generate_exception(ctx, EXCP_TRAP);
-        gen_set_label(l1);
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
-{
-    if (unlikely(ctx->base.singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
-{
-    if (use_goto_tb(ctx, dest)) {
-        tcg_gen_goto_tb(n);
-        gen_save_pc(dest);
-        tcg_gen_exit_tb(ctx->base.tb, n);
-    } else {
-        gen_save_pc(dest);
-        if (ctx->base.singlestep_enabled) {
-            save_cpu_state(ctx, 0);
-            gen_helper_raise_exception_debug(cpu_env);
-        }
-        tcg_gen_lookup_and_goto_ptr();
-    }
-}
-
-/* Branches (before delay slot) */
-static void gen_compute_branch(DisasContext *ctx, uint32_t opc,
-                               int insn_bytes,
-                               int rs, int rt, int32_t offset,
-                               int delayslot_size)
-{
-    target_ulong btgt = -1;
-    int blink = 0;
-    int bcond_compute = 0;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    // Note: For CHERI btgt is an absolute address not an offset relative
-    // to PCC.base.
-#if defined(TARGET_CHERI)
-    bool btarget_checked = false;
-    // Some debug assertions to ensure all branch cases are bounds checked
-#define SET_BTARGET_CHECKED(value) \
-    do { tcg_debug_assert(!btarget_checked); btarget_checked = value; } while (false)
-#else
-#define SET_BTARGET_CHECKED(value)
-#endif // defined(TARGET_CHERI)
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-#ifdef MIPS_DEBUG_DISAS
-        LOG_DISAS("Branch in delay / forbidden slot at PC 0x"
-                  TARGET_FMT_lx "\n", ctx->base.pc_next);
-#endif
-        gen_reserved_instruction(ctx);
-        SET_BTARGET_CHECKED(true); // exception raised -> no need to check
-        goto out;
-    }
-
-    /* Load needed operands */
-    switch (opc) {
-    case OPC_BEQ:
-    case OPC_BEQL:
-    case OPC_BNE:
-    case OPC_BNEL:
-        /* Compare two registers */
-        if (rs != rt) {
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            bcond_compute = 1;
-        }
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_BGEZ:
-    case OPC_BGEZAL:
-    case OPC_BGEZALL:
-    case OPC_BGEZL:
-    case OPC_BGTZ:
-    case OPC_BGTZL:
-    case OPC_BLEZ:
-    case OPC_BLEZL:
-    case OPC_BLTZ:
-    case OPC_BLTZAL:
-    case OPC_BLTZALL:
-    case OPC_BLTZL:
-        /* Compare to zero */
-        if (rs != 0) {
-            gen_load_gpr(t0, rs);
-            bcond_compute = 1;
-        }
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_BPOSGE32:
-#if defined(TARGET_MIPS64)
-    case OPC_BPOSGE64:
-        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x7F);
-#else
-        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
-#endif
-        bcond_compute = 1;
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_J:
-    case OPC_JAL:
-#ifndef TARGET_CHERI
-    case OPC_JALX:
-#endif
-        /* Jump to immediate (taking PCC.base into account) */
-        btgt = (((ctx->base.pc_next + insn_bytes - pcc_reloc(ctx)) &
-                 (int32_t)0xF0000000) |
-                (uint32_t)offset) +
-               pcc_reloc(ctx);
-        break;
-    case OPC_JR:
-    case OPC_JALR:
-        /* Jump to register */
-        if (offset != 0 && offset != 16) {
-            /*
-             * Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
-             * others are reserved.
-             */
-            MIPS_INVAL("jump hint");
-            gen_reserved_instruction(ctx);
-            SET_BTARGET_CHECKED(true); // exception raised -> no need to check
-            goto out;
-        }
-        gen_load_gpr(btarget, rs);
-#ifdef TARGET_CHERI
-        /* Add PCC.base to rs (jr/jalr is relative to PCC) */
-        tcg_gen_addi_tl(btarget, btarget, pcc_reloc(ctx));
-#endif /* TARGET_CHERI */
-        gen_check_branch_target_dynamic(ctx, btarget);
-        SET_BTARGET_CHECKED(true);
-        break;
-    default:
-        MIPS_INVAL("branch/jump");
-        gen_reserved_instruction(ctx);
-        SET_BTARGET_CHECKED(true); // exception raised -> no need to check
-        goto out;
-    }
-    if (bcond_compute == 0) {
-        /* No condition to be computed */
-        switch (opc) {
-        case OPC_BEQ:     /* rx == rx        */
-        case OPC_BEQL:    /* rx == rx likely */
-        case OPC_BGEZ:    /* 0 >= 0          */
-        case OPC_BGEZL:   /* 0 >= 0 likely   */
-        case OPC_BLEZ:    /* 0 <= 0          */
-        case OPC_BLEZL:   /* 0 <= 0 likely   */
-            /* Always take */
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_BGEZAL:  /* 0 >= 0          */
-        case OPC_BGEZALL: /* 0 >= 0 likely   */
-            /* Always take and link */
-            blink = 31;
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_BNE:     /* rx != rx        */
-        case OPC_BGTZ:    /* 0 > 0           */
-        case OPC_BLTZ:    /* 0 < 0           */
-            /* Treat as NOP. */
-            SET_BTARGET_CHECKED(true); // not taken -> no need to check
-            goto out;
-        case OPC_BLTZAL:  /* 0 < 0           */
-            /*
-             * Handle as an unconditional branch to get correct delay
-             * slot checking.
-             */
-            blink = 31;
-            btgt = ctx->base.pc_next + insn_bytes + delayslot_size;
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_BLTZALL: /* 0 < 0 likely */
-            /* For CHERI, we have to subtract PCC.base from r31 */
-            tcg_gen_movi_tl(cpu_gpr[31],
-                            ctx->base.pc_next + 8 - pcc_reloc(ctx));
-            gen_log_instr_gpr_update(ctx, 31);
-            /* Skip the instruction in the delay slot */
-            ctx->base.pc_next += 4;
-            SET_BTARGET_CHECKED(true); // not taken -> no need to check
-            goto out;
-        case OPC_BNEL:    /* rx != rx likely */
-        case OPC_BGTZL:   /* 0 > 0 likely */
-        case OPC_BLTZL:   /* 0 < 0 likely */
-            /* Skip the instruction in the delay slot */
-            ctx->base.pc_next += 4;
-            SET_BTARGET_CHECKED(true); // not taken -> no need to check
-            goto out;
-        case OPC_J:
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-#ifndef TARGET_CHERI
-        case OPC_JALX:
-            ctx->hflags |= MIPS_HFLAG_BX;
-            /* Fallthrough */
-#endif
-        case OPC_JAL:
-            blink = 31;
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_JR:
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        case OPC_JALR:
-            blink = rt;
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        default:
-            MIPS_INVAL("branch/jump");
-            gen_reserved_instruction(ctx);
-            SET_BTARGET_CHECKED(true); // exception raised -> no need to check
-            goto out;
-        }
-    } else {
-        switch (opc) {
-        case OPC_BEQ:
-            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
-            goto not_likely;
-        case OPC_BEQL:
-            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
-            goto likely;
-        case OPC_BNE:
-            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
-            goto not_likely;
-        case OPC_BNEL:
-            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
-            goto likely;
-        case OPC_BGEZ:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
-            goto not_likely;
-        case OPC_BGEZL:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
-            goto likely;
-        case OPC_BGEZAL:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
-            blink = 31;
-            goto not_likely;
-        case OPC_BGEZALL:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
-            blink = 31;
-            goto likely;
-        case OPC_BGTZ:
-            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
-            goto not_likely;
-        case OPC_BGTZL:
-            tcg_gen_setcondi_tl(TCG_COND_GT, bcond, t0, 0);
-            goto likely;
-        case OPC_BLEZ:
-            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
-            goto not_likely;
-        case OPC_BLEZL:
-            tcg_gen_setcondi_tl(TCG_COND_LE, bcond, t0, 0);
-            goto likely;
-        case OPC_BLTZ:
-            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
-            goto not_likely;
-        case OPC_BLTZL:
-            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
-            goto likely;
-        case OPC_BPOSGE32:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
-            goto not_likely;
-#if defined(TARGET_MIPS64)
-        case OPC_BPOSGE64:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 64);
-            goto not_likely;
-#endif
-        case OPC_BLTZAL:
-            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
-            blink = 31;
-        not_likely:
-            ctx->hflags |= MIPS_HFLAG_BC;
-            break;
-        case OPC_BLTZALL:
-            tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
-            blink = 31;
-        likely:
-            ctx->hflags |= MIPS_HFLAG_BL;
-            break;
-        default:
-            MIPS_INVAL("conditional branch/jump");
-            gen_reserved_instruction(ctx);
-            SET_BTARGET_CHECKED(true); // not taken -> no need to check
-            goto out;
-        }
-    }
-
-    ctx->btarget = btgt;
-
-#ifdef TARGET_CHERI
-    if (bcond_compute) {
-        // Check that the conditional branch target is in range (but only if the branch is taken)
-        tcg_debug_assert(btgt != -1 && "btgt should have been set!");
-        gen_check_cond_branch_target(ctx, bcond, btgt);
-        SET_BTARGET_CHECKED(true);
-    } else if (!btarget_checked) {
-        tcg_debug_assert(btgt != -1 && "btgt should have been set!");
-        gen_check_branch_target(ctx, btgt);
-        SET_BTARGET_CHECKED(true);
-    }
-#endif
-
-    switch (delayslot_size) {
-    case 2:
-        ctx->hflags |= MIPS_HFLAG_BDS16;
-        break;
-    case 4:
-        ctx->hflags |= MIPS_HFLAG_BDS32;
-        break;
-    }
-
-    if (blink > 0) {
-        int post_delay = insn_bytes + delayslot_size;
-        int lowbit = !!(ctx->hflags & MIPS_HFLAG_M16);
-
-        tcg_gen_movi_tl(cpu_gpr[blink],
-                        /* Subtract PCC.base from r[blink] */
-                        ctx->base.pc_next + post_delay + lowbit -
-                            pcc_reloc(ctx));
-        gen_log_instr_gpr_update(ctx, blink);
-    }
-
- out:
-    if (insn_bytes == 2) {
-        ctx->hflags |= MIPS_HFLAG_B16;
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-#ifdef TARGET_CHERI
-    tcg_debug_assert(btarget_checked);
-#endif
-
-}
-
-#ifndef TARGET_CHERI
-/* nanoMIPS Branches */
-static void gen_compute_branch_nm(DisasContext *ctx, uint32_t opc,
-                                int insn_bytes,
-                                int rs, int rt, int32_t offset)
-{
-    target_ulong btgt = -1;
-    int bcond_compute = 0;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    /* Load needed operands */
-    switch (opc) {
-    case OPC_BEQ:
-    case OPC_BNE:
-        /* Compare two registers */
-        if (rs != rt) {
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            bcond_compute = 1;
-        }
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_BGEZAL:
-        /* Compare to zero */
-        if (rs != 0) {
-            gen_load_gpr(t0, rs);
-            bcond_compute = 1;
-        }
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_BPOSGE32:
-        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
-        bcond_compute = 1;
-        btgt = ctx->base.pc_next + insn_bytes + offset;
-        break;
-    case OPC_JR:
-    case OPC_JALR:
-        /* Jump to register */
-        if (offset != 0 && offset != 16) {
-            /*
-             * Hint = 0 is JR/JALR, hint 16 is JR.HB/JALR.HB, the
-             * others are reserved.
-             */
-            MIPS_INVAL("jump hint");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-        gen_load_gpr(btarget, rs);
-        break;
-    default:
-        MIPS_INVAL("branch/jump");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-    if (bcond_compute == 0) {
-        /* No condition to be computed */
-        switch (opc) {
-        case OPC_BEQ:     /* rx == rx        */
-            /* Always take */
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_BGEZAL:  /* 0 >= 0          */
-            /* Always take and link */
-            tcg_gen_movi_tl(cpu_gpr[31],
-                            ctx->base.pc_next + insn_bytes);
-            gen_log_instr_gpr_update(ctx, 31);
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        case OPC_BNE:     /* rx != rx        */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 8);
-            gen_log_instr_gpr_update(ctx, 31);
-            /* Skip the instruction in the delay slot */
-            ctx->base.pc_next += 4;
-            goto out;
-        case OPC_JR:
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        case OPC_JALR:
-            if (rt > 0) {
-                tcg_gen_movi_tl(cpu_gpr[rt],
-                                ctx->base.pc_next + insn_bytes);
-                gen_log_instr_gpr_update(ctx, 31);
-            }
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        default:
-            MIPS_INVAL("branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-    } else {
-        switch (opc) {
-        case OPC_BEQ:
-            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
-            goto not_likely;
-        case OPC_BNE:
-            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
-            goto not_likely;
-        case OPC_BGEZAL:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 0);
-            tcg_gen_movi_tl(cpu_gpr[31],
-                            ctx->base.pc_next + insn_bytes);
-            gen_log_instr_gpr_update(ctx, 31);
-            goto not_likely;
-        case OPC_BPOSGE32:
-            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
-        not_likely:
-            ctx->hflags |= MIPS_HFLAG_BC;
-            break;
-        default:
-            MIPS_INVAL("conditional branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-    }
-
-    ctx->btarget = btgt;
-
- out:
-    if (insn_bytes == 2) {
-        ctx->hflags |= MIPS_HFLAG_B16;
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-#endif
-
-/* special3 bitfield operations */
-static void gen_bitops(DisasContext *ctx, uint32_t opc, int rt,
-                       int rs, int lsb, int msb)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_load_gpr(t1, rs);
-    switch (opc) {
-    case OPC_EXT:
-        if (lsb + msb > 31) {
-            goto fail;
-        }
-        if (msb != 31) {
-            tcg_gen_extract_tl(t0, t1, lsb, msb + 1);
-        } else {
-            /*
-             * The two checks together imply that lsb == 0,
-             * so this is a simple sign-extension.
-             */
-            tcg_gen_ext32s_tl(t0, t1);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DEXTU:
-        lsb += 32;
-        goto do_dext;
-    case OPC_DEXTM:
-        msb += 32;
-        goto do_dext;
-    case OPC_DEXT:
-    do_dext:
-        if (lsb + msb > 63) {
-            goto fail;
-        }
-        tcg_gen_extract_tl(t0, t1, lsb, msb + 1);
-        break;
-#endif
-    case OPC_INS:
-        if (lsb > msb) {
-            goto fail;
-        }
-        gen_load_gpr(t0, rt);
-        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
-        tcg_gen_ext32s_tl(t0, t0);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DINSU:
-        lsb += 32;
-        /* FALLTHRU */
-    case OPC_DINSM:
-        msb += 32;
-        /* FALLTHRU */
-    case OPC_DINS:
-        if (lsb > msb) {
-            goto fail;
-        }
-        gen_load_gpr(t0, rt);
-        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
-        break;
-#endif
-    default:
-fail:
-        MIPS_INVAL("bitops");
-        gen_reserved_instruction(ctx);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
-        return;
-    }
-    gen_store_gpr(t0, rt);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void gen_bshfl(DisasContext *ctx, uint32_t op2, int rt, int rd)
-{
-    TCGv t0;
-
-    if (rd == 0) {
-        /* If no destination, treat it as a NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rt);
-    switch (op2) {
-    case OPC_WSBH:
-        {
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_const_tl(0x00FF00FF);
-
-            tcg_gen_shri_tl(t1, t0, 8);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_gen_and_tl(t0, t0, t2);
-            tcg_gen_shli_tl(t0, t0, 8);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_temp_free(t2);
-            tcg_temp_free(t1);
-            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-        }
-        break;
-    case OPC_SEB:
-        tcg_gen_ext8s_tl(cpu_gpr[rd], t0);
-        break;
-    case OPC_SEH:
-        tcg_gen_ext16s_tl(cpu_gpr[rd], t0);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DSBH:
-        {
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_const_tl(0x00FF00FF00FF00FFULL);
-
-            tcg_gen_shri_tl(t1, t0, 8);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_gen_and_tl(t0, t0, t2);
-            tcg_gen_shli_tl(t0, t0, 8);
-            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t2);
-            tcg_temp_free(t1);
-        }
-        break;
-    case OPC_DSHD:
-        {
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_const_tl(0x0000FFFF0000FFFFULL);
-
-            tcg_gen_shri_tl(t1, t0, 16);
-            tcg_gen_and_tl(t1, t1, t2);
-            tcg_gen_and_tl(t0, t0, t2);
-            tcg_gen_shli_tl(t0, t0, 16);
-            tcg_gen_or_tl(t0, t0, t1);
-            tcg_gen_shri_tl(t1, t0, 32);
-            tcg_gen_shli_tl(t0, t0, 32);
-            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
-            tcg_temp_free(t2);
-            tcg_temp_free(t1);
-        }
-        break;
-#endif
-    default:
-        MIPS_INVAL("bsfhl");
-        gen_reserved_instruction(ctx);
-        tcg_temp_free(t0);
-        return;
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-    tcg_temp_free(t0);
-}
-
-static void gen_align_bits(DisasContext *ctx, int wordsz, int rd, int rs,
-                           int rt, int bits)
-{
-    TCGv t0;
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-    t0 = tcg_temp_new();
-    if (bits == 0 || bits == wordsz) {
-        if (bits == 0) {
-            gen_load_gpr(t0, rt);
-        } else {
-            gen_load_gpr(t0, rs);
-        }
-        switch (wordsz) {
-        case 32:
-            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
-            break;
-#if defined(TARGET_MIPS64)
-        case 64:
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            break;
-#endif
-        }
-    } else {
-        TCGv t1 = tcg_temp_new();
-        gen_load_gpr(t0, rt);
-        gen_load_gpr(t1, rs);
-        switch (wordsz) {
-        case 32:
-            {
-                TCGv_i64 t2 = tcg_temp_new_i64();
-                tcg_gen_concat_tl_i64(t2, t1, t0);
-                tcg_gen_shri_i64(t2, t2, 32 - bits);
-                gen_move_low32(cpu_gpr[rd], t2);
-                tcg_temp_free_i64(t2);
-            }
-            break;
-#if defined(TARGET_MIPS64)
-        case 64:
-            tcg_gen_shli_tl(t0, t0, bits);
-            tcg_gen_shri_tl(t1, t1, 64 - bits);
-            tcg_gen_or_tl(cpu_gpr[rd], t1, t0);
-            break;
-#endif
-        }
-        tcg_temp_free(t1);
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-
-    tcg_temp_free(t0);
-}
-
-static void gen_align(DisasContext *ctx, int wordsz, int rd, int rs, int rt,
-                      int bp)
-{
-    gen_align_bits(ctx, wordsz, rd, rs, rt, bp * 8);
-}
-
-#ifndef TARGET_CHERI
-static void gen_ext(DisasContext *ctx, int wordsz, int rd, int rs, int rt,
-                    int shift)
-{
-    gen_align_bits(ctx, wordsz, rd, rs, rt, wordsz - shift);
-}
-#endif
-
-static void gen_bitswap(DisasContext *ctx, int opc, int rd, int rt)
-{
-    TCGv t0;
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rt);
-    switch (opc) {
-    case OPC_BITSWAP:
-        gen_helper_bitswap(cpu_gpr[rd], t0);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DBITSWAP:
-        gen_helper_dbitswap(cpu_gpr[rd], t0);
-        break;
-#endif
-    }
-    gen_log_instr_gpr_update(ctx, rd);
-
-    tcg_temp_free(t0);
-}
-
-#ifndef CONFIG_USER_ONLY
-/* CP0 (MMU and control) */
-static inline void gen_mthc0_entrylo(TCGv arg, target_ulong off)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
-
-    tcg_gen_ext_tl_i64(t0, arg);
-    tcg_gen_ld_i64(t1, cpu_env, off);
-#if defined(TARGET_MIPS64)
-    tcg_gen_deposit_i64(t1, t1, t0, 30, 32);
-#else
-    tcg_gen_concat32_i64(t1, t1, t0);
-#endif
-    tcg_gen_st_i64(t1, cpu_env, off);
-    tcg_temp_free_i64(t1);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void gen_mthc0_store64(TCGv arg, target_ulong off)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    TCGv_i64 t1 = tcg_temp_new_i64();
-
-    tcg_gen_ext_tl_i64(t0, arg);
-    tcg_gen_ld_i64(t1, cpu_env, off);
-    tcg_gen_concat32_i64(t1, t1, t0);
-    tcg_gen_st_i64(t1, cpu_env, off);
-    tcg_temp_free_i64(t1);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void gen_mfhc0_entrylo(TCGv arg, target_ulong off)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-
-    tcg_gen_ld_i64(t0, cpu_env, off);
-#if defined(TARGET_MIPS64)
-    tcg_gen_shri_i64(t0, t0, 30);
-#else
-    tcg_gen_shri_i64(t0, t0, 32);
-#endif
-    gen_move_low32(arg, t0);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void gen_mfhc0_load64(TCGv arg, target_ulong off, int shift)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-
-    tcg_gen_ld_i64(t0, cpu_env, off);
-    tcg_gen_shri_i64(t0, t0, 32 + shift);
-    gen_move_low32(arg, t0);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void gen_mfc0_load32(TCGv arg, target_ulong off)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-
-    tcg_gen_ld_i32(t0, cpu_env, off);
-    tcg_gen_ext_i32_tl(arg, t0);
-    tcg_temp_free_i32(t0);
-}
-
-static inline void gen_mfc0_load64(TCGv arg, target_ulong off)
-{
-    tcg_gen_ld_tl(arg, cpu_env, off);
-    tcg_gen_ext32s_tl(arg, arg);
-}
-
-static inline void gen_mtc0_store32(
-    DisasContext *ctx, TCGv arg, int reg, int sel, target_ulong off)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-
-    tcg_gen_trunc_tl_i32(t0, arg);
-    tcg_gen_st_i32(t0, cpu_env, off);
-    tcg_temp_free_i32(t0);
-    gen_log_instr_cop0_update(ctx, reg, sel, off);
-}
-
-static inline void gen_mtc0_store(
-    DisasContext *ctx, TCGv arg, int reg, int sel, target_ulong off)
-{
-    tcg_gen_st_tl(arg, cpu_env, off);
-    gen_log_instr_cop0_update(ctx, reg, sel, off);
-}
-
-#define CP0_CHECK(c)                            \
-    do {                                        \
-        if (!(c)) {                             \
-            goto cp0_unimplemented;             \
-        }                                       \
-    } while (0)
-
-static void gen_mfhc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-
-    switch (reg) {
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case 0:
-            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
-            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
-            register_name = "EntryLo0";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
-            gen_mfhc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
-            register_name = "EntryLo1";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mfhc0_saar(arg, cpu_env);
-            register_name = "SAAR";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_LLAddr),
-                             ctx->CP0_LLAddr_shift);
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mfhc0_maar(arg, cpu_env);
-            register_name = "MAAR";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            /* upper 32 bits are only available when Config5MI != 0 */
-            CP0_CHECK(ctx->mi);
-            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_WatchHi[sel]), 0);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case 0:
-        case 2:
-        case 4:
-        case 6:
-            gen_mfhc0_load64(arg, offsetof(CPUMIPSState, CP0_TagLo), 0);
-            register_name = "TagLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-        goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("mfhc0", register_name, reg, sel);
-    return;
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "mfhc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-    tcg_gen_movi_tl(arg, 0);
-}
-
-static void gen_mthc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-    uint64_t mask = ctx->PAMask >> 36;
-
-    switch (reg) {
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case 0:
-            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
-            tcg_gen_andi_tl(arg, arg, mask);
-            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo0));
-            register_name = "EntryLo0";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            CP0_CHECK(ctx->hflags & MIPS_HFLAG_ELPA);
-            tcg_gen_andi_tl(arg, arg, mask);
-            gen_mthc0_entrylo(arg, offsetof(CPUMIPSState, CP0_EntryLo1));
-            register_name = "EntryLo1";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mthc0_saar(cpu_env, arg);
-            register_name = "SAAR";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            /*
-             * LLAddr is read-only (the only exception is bit 0 if LLB is
-             * supported); the CP0_LLAddr_rw_bitmask does not seem to be
-             * relevant for modern MIPS cores supporting MTHC0, therefore
-             * treating MTHC0 to LLAddr as NOP.
-             */
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mthc0_maar(cpu_env, arg);
-            register_name = "MAAR";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            /* upper 32 bits are only available when Config5MI != 0 */
-            CP0_CHECK(ctx->mi);
-            gen_helper_0e1i(mthc0_watchhi, arg, sel);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case 0:
-        case 2:
-        case 4:
-        case 6:
-            tcg_gen_andi_tl(arg, arg, mask);
-            gen_mthc0_store64(arg, offsetof(CPUMIPSState, CP0_TagLo));
-            register_name = "TagLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-        goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("mthc0", register_name, reg, sel);
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "mthc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-}
-
-static inline void gen_mfc0_unimplemented(DisasContext *ctx, TCGv arg)
-{
-    if (ctx->insn_flags & ISA_MIPS_R6) {
-        tcg_gen_movi_tl(arg, 0);
-    } else {
-        tcg_gen_movi_tl(arg, ~0);
-    }
-}
-
-static void gen_mfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-
-#ifndef TARGET_CHERI
-    if (sel != 0) {
-        check_insn(ctx, ISA_MIPS_R1);
-    }
-#endif
-
-    switch (reg) {
-    case CP0_REGISTER_00:
-        switch (sel) {
-        case CP0_REG00__INDEX:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Index));
-            register_name = "Index";
-            break;
-        case CP0_REG00__MVPCONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
-            register_name = "MVPControl";
-            break;
-        case CP0_REG00__MVPCONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg, cpu_env);
-            register_name = "MVPConf0";
-            break;
-        case CP0_REG00__MVPCONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg, cpu_env);
-            register_name = "MVPConf1";
-            break;
-        case CP0_REG00__VPCONTROL:
-            CP0_CHECK(ctx->vp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPControl));
-            register_name = "VPControl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_01:
-        switch (sel) {
-        case CP0_REG01__RANDOM:
-            CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-            gen_helper_mfc0_random(arg, cpu_env);
-            register_name = "Random";
-            break;
-        case CP0_REG01__VPECONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
-            register_name = "VPEControl";
-            break;
-        case CP0_REG01__VPECONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
-            register_name = "VPEConf0";
-            break;
-        case CP0_REG01__VPECONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
-            register_name = "VPEConf1";
-            break;
-        case CP0_REG01__YQMASK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_YQMask));
-            register_name = "YQMask";
-            break;
-        case CP0_REG01__VPESCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPESchedule));
-            register_name = "VPESchedule";
-            break;
-        case CP0_REG01__VPESCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load64(arg, offsetof(CPUMIPSState, CP0_VPEScheFBack));
-            register_name = "VPEScheFBack";
-            break;
-        case CP0_REG01__VPEOPT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
-            register_name = "VPEOpt";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case CP0_REG02__ENTRYLO0:
-            {
-                TCGv_i64 tmp = tcg_temp_new_i64();
-                tcg_gen_ld_i64(tmp, cpu_env,
-                               offsetof(CPUMIPSState, CP0_EntryLo0));
-#if defined(TARGET_MIPS64)
-                if (ctx->rxi) {
-                    /* Move RI/XI fields to bits 31:30 */
-#if defined(TARGET_CHERI)
-                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_L);
-#else
-                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
-#endif /* TARGET_CHERI */
-                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
-                }
-#endif
-                gen_move_low32(arg, tmp);
-                tcg_temp_free_i64(tmp);
-            }
-            register_name = "EntryLo0";
-            break;
-        case CP0_REG02__TCSTATUS:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcstatus(arg, cpu_env);
-            register_name = "TCStatus";
-            break;
-        case CP0_REG02__TCBIND:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcbind(arg, cpu_env);
-            register_name = "TCBind";
-            break;
-        case CP0_REG02__TCRESTART:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcrestart(arg, cpu_env);
-            register_name = "TCRestart";
-            break;
-        case CP0_REG02__TCHALT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tchalt(arg, cpu_env);
-            register_name = "TCHalt";
-            break;
-        case CP0_REG02__TCCONTEXT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tccontext(arg, cpu_env);
-            register_name = "TCContext";
-            break;
-        case CP0_REG02__TCSCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcschedule(arg, cpu_env);
-            register_name = "TCSchedule";
-            break;
-        case CP0_REG02__TCSCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcschefback(arg, cpu_env);
-            register_name = "TCScheFBack";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            {
-                TCGv_i64 tmp = tcg_temp_new_i64();
-                tcg_gen_ld_i64(tmp, cpu_env,
-                               offsetof(CPUMIPSState, CP0_EntryLo1));
-#if defined(TARGET_MIPS64)
-                if (ctx->rxi) {
-                    /* Move RI/XI fields to bits 31:30 */
-#if defined(TARGET_CHERI)
-                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_L);
-#else
-                    tcg_gen_shri_tl(arg, tmp, CP0EnLo_XI);
-#endif /* TARGET_CHERI */
-                    tcg_gen_deposit_tl(tmp, tmp, arg, 30, 2);
-                }
-#endif
-                gen_move_low32(arg, tmp);
-                tcg_temp_free_i64(tmp);
-            }
-            register_name = "EntryLo1";
-            break;
-        case CP0_REG03__GLOBALNUM:
-            CP0_CHECK(ctx->vp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_GlobalNumber));
-            register_name = "GlobalNumber";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_04:
-        switch (sel) {
-        case CP0_REG04__CONTEXT:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_Context));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "Context";
-            break;
-        case CP0_REG04__CONTEXTCONFIG:
-            /* SmartMIPS ASE */
-            /* gen_helper_mfc0_contextconfig(arg); */
-            register_name = "ContextConfig";
-            goto cp0_unimplemented;
-        case CP0_REG04__USERLOCAL:
-            CP0_CHECK(ctx->ulri);
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "UserLocal";
-            break;
-        case CP0_REG04__MMID:
-            CP0_CHECK(ctx->mi);
-            gen_helper_mtc0_memorymapid(cpu_env, arg);
-            register_name = "MMID";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_05:
-        switch (sel) {
-        case CP0_REG05__PAGEMASK:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageMask));
-            register_name = "PageMask";
-            break;
-        case CP0_REG05__PAGEGRAIN:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
-            register_name = "PageGrain";
-            break;
-        case CP0_REG05__SEGCTL0:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl0));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "SegCtl0";
-            break;
-        case CP0_REG05__SEGCTL1:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl1));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "SegCtl1";
-            break;
-        case CP0_REG05__SEGCTL2:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "SegCtl2";
-            break;
-        case CP0_REG05__PWBASE:
-            check_pw(ctx);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWBase));
-            register_name = "PWBase";
-            break;
-        case CP0_REG05__PWFIELD:
-            check_pw(ctx);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWField));
-            register_name = "PWField";
-            break;
-        case CP0_REG05__PWSIZE:
-            check_pw(ctx);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWSize));
-            register_name = "PWSize";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_06:
-        switch (sel) {
-        case CP0_REG06__WIRED:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Wired));
-            register_name = "Wired";
-            break;
-        case CP0_REG06__SRSCONF0:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
-            register_name = "SRSConf0";
-            break;
-        case CP0_REG06__SRSCONF1:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
-            register_name = "SRSConf1";
-            break;
-        case CP0_REG06__SRSCONF2:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
-            register_name = "SRSConf2";
-            break;
-        case CP0_REG06__SRSCONF3:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
-            register_name = "SRSConf3";
-            break;
-        case CP0_REG06__SRSCONF4:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
-            register_name = "SRSConf4";
-            break;
-        case CP0_REG06__PWCTL:
-            check_pw(ctx);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
-            register_name = "PWCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_07:
-        switch (sel) {
-        case CP0_REG07__HWRENA:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
-            register_name = "HWREna";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_08:
-        switch (sel) {
-        case CP0_REG08__BADVADDR:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "BadVAddr";
-            break;
-        case CP0_REG08__BADINSTR:
-            CP0_CHECK(ctx->bi);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstr));
-            register_name = "BadInstr";
-            break;
-        case CP0_REG08__BADINSTRP:
-            CP0_CHECK(ctx->bp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrP));
-            register_name = "BadInstrP";
-            break;
-        case CP0_REG08__BADINSTRX:
-            CP0_CHECK(ctx->bi);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrX));
-            tcg_gen_andi_tl(arg, arg, ~0xffff);
-            register_name = "BadInstrX";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-        case CP0_REG09__COUNT:
-            /* Mark as an IO operation because we read the time.  */
-            if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-                gen_io_start();
-            }
-            gen_helper_mfc0_count(arg, cpu_env);
-            /*
-             * Break the TB to be able to take timer interrupts immediately
-             * after reading count. DISAS_STOP isn't sufficient, we need to
-             * ensure we break completely out of translated code.
-             */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Count";
-            break;
-#if !defined(TARGET_CHERI) /* Conflicts with RTC */
-        case CP0_REG09__SAARI:
-            CP0_CHECK(ctx->saar);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SAARI));
-            register_name = "SAARI";
-            break;
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mfc0_saar(arg, cpu_env);
-            register_name = "SAAR";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_10:
-        switch (sel) {
-        case CP0_REG10__ENTRYHI:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryHi));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "EntryHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_11:
-        switch (sel) {
-        case CP0_REG11__COMPARE:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Compare));
-            register_name = "Compare";
-            break;
-        /* 6,7 are implementation dependent */
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_12:
-        switch (sel) {
-        case CP0_REG12__STATUS:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Status));
-            register_name = "Status";
-            break;
-        case CP0_REG12__INTCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
-            register_name = "IntCtl";
-            break;
-        case CP0_REG12__SRSCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
-            register_name = "SRSCtl";
-            break;
-        case CP0_REG12__SRSMAP:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
-            register_name = "SRSMap";
-            break;
-        default:
-            goto cp0_unimplemented;
-       }
-        break;
-    case CP0_REGISTER_13:
-        switch (sel) {
-        case CP0_REG13__CAUSE:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Cause));
-            register_name = "Cause";
-            break;
-        default:
-            goto cp0_unimplemented;
-       }
-        break;
-    case CP0_REGISTER_14:
-        switch (sel) {
-        case CP0_REG14__EPC:
-#ifdef TARGET_CHERI
-            gen_helper_mfc0_epc(arg, cpu_env);
-#else
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EPC));
-#endif
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "EPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_15:
-        switch (sel) {
-        case CP0_REG15__PRID:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PRid));
-            register_name = "PRid";
-            break;
-        case CP0_REG15__EBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EBase));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "EBase";
-            break;
-        case CP0_REG15__CMGCRBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            CP0_CHECK(ctx->cmgcr);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_CMGCRBase));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "CMGCRBase";
-            break;
-#ifdef TARGET_CHERI
-        case 6:
-            /*
-             * See section 7.3.5 Core Identification (CPO Register 15,
-             * Select 6 of the BERI Hardware Reference.
-             */
-            gen_helper_mfc0_coreid(arg, cpu_env);
-            register_name = "CoreID";
-            break;
-        case 7:
-            /*
-             * See section 7.3.6 Thread Identification (CPO Register 15,
-             * Select 7 of the BERI Hardware Reference.
-             */
-            tcg_gen_movi_tl(arg, 0); /* currently unimplemented */
-            register_name = "ThreadID";
-            break;
-#endif /* TARGET_CHERI */
-        default:
-            goto cp0_unimplemented;
-       }
-        break;
-    case CP0_REGISTER_16:
-        switch (sel) {
-        case CP0_REG16__CONFIG:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config0));
-            register_name = "Config";
-            break;
-        case CP0_REG16__CONFIG1:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config1));
-            register_name = "Config1";
-            break;
-        case CP0_REG16__CONFIG2:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config2));
-            register_name = "Config2";
-            break;
-        case CP0_REG16__CONFIG3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config3));
-            register_name = "Config3";
-            break;
-        case CP0_REG16__CONFIG4:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config4));
-            register_name = "Config4";
-            break;
-        case CP0_REG16__CONFIG5:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config5));
-            register_name = "Config5";
-            break;
-        /* 6,7 are implementation dependent */
-        case CP0_REG16__CONFIG6:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config6));
-            register_name = "Config6";
-            break;
-        case CP0_REG16__CONFIG7:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config7));
-            register_name = "Config7";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            gen_helper_mfc0_lladdr(arg, cpu_env);
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mfc0_maar(arg, cpu_env);
-            register_name = "MAAR";
-            break;
-        case CP0_REG17__MAARI:
-            CP0_CHECK(ctx->mrp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_MAARI));
-            register_name = "MAARI";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_18:
-        switch (sel) {
-        case CP0_REG18__WATCHLO0:
-        case CP0_REG18__WATCHLO1:
-        case CP0_REG18__WATCHLO2:
-        case CP0_REG18__WATCHLO3:
-        case CP0_REG18__WATCHLO4:
-        case CP0_REG18__WATCHLO5:
-        case CP0_REG18__WATCHLO6:
-        case CP0_REG18__WATCHLO7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_1e0i(mfc0_watchlo, arg, sel);
-            register_name = "WatchLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_1e0i(mfc0_watchhi, arg, sel);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_20:
-        switch (sel) {
-        case CP0_REG20__XCONTEXT:
-#if defined(TARGET_MIPS64)
-            check_insn(ctx, ISA_MIPS3);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "XContext";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_21:
-       /* Officially reserved, but sel 0 is used for R1x000 framemask */
-        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-        switch (sel) {
-        case 0:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Framemask));
-            register_name = "Framemask";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_22:
-        tcg_gen_movi_tl(arg, 0); /* unimplemented */
-        register_name = "'Diagnostic"; /* implementation dependent */
-        break;
-    case CP0_REGISTER_23:
-        switch (sel) {
-        case CP0_REG23__DEBUG:
-            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
-            register_name = "Debug";
-            break;
-        case CP0_REG23__TRACECONTROL:
-            /* PDtrace support */
-            /* gen_helper_mfc0_tracecontrol(arg);  */
-            register_name = "TraceControl";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACECONTROL2:
-            /* PDtrace support */
-            /* gen_helper_mfc0_tracecontrol2(arg); */
-            register_name = "TraceControl2";
-            goto cp0_unimplemented;
-        case CP0_REG23__USERTRACEDATA1:
-            /* PDtrace support */
-            /* gen_helper_mfc0_usertracedata1(arg);*/
-            register_name = "UserTraceData1";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEIBPC:
-            /* PDtrace support */
-            /* gen_helper_mfc0_traceibpc(arg);     */
-            register_name = "TraceIBPC";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEDBPC:
-            /* PDtrace support */
-            /* gen_helper_mfc0_tracedbpc(arg);     */
-            register_name = "TraceDBPC";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_24:
-        switch (sel) {
-        case CP0_REG24__DEPC:
-            /* EJTAG support */
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_DEPC));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "DEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_25:
-        switch (sel) {
-        case CP0_REG25__PERFCTL0:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Performance0));
-            register_name = "Performance0";
-            break;
-        case CP0_REG25__PERFCNT0:
-            /* gen_helper_mfc0_performance1(arg); */
-            register_name = "Performance1";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL1:
-            /* gen_helper_mfc0_performance2(arg); */
-            register_name = "Performance2";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT1:
-            /* gen_helper_mfc0_performance3(arg); */
-            register_name = "Performance3";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL2:
-            /* gen_helper_mfc0_performance4(arg); */
-            register_name = "Performance4";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT2:
-            /* gen_helper_mfc0_performance5(arg); */
-            register_name = "Performance5";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL3:
-            /* gen_helper_mfc0_performance6(arg); */
-            register_name = "Performance6";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT3:
-            /* gen_helper_mfc0_performance7(arg); */
-            register_name = "Performance7";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_26:
-        switch (sel) {
-        case CP0_REG26__ERRCTL:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_ErrCtl));
-            register_name = "ErrCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_27:
-        switch (sel) {
-        case CP0_REG27__CACHERR:
-            tcg_gen_movi_tl(arg, 0); /* unimplemented */
-            register_name = "CacheErr";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case CP0_REG28__TAGLO:
-        case CP0_REG28__TAGLO1:
-        case CP0_REG28__TAGLO2:
-        case CP0_REG28__TAGLO3:
-            {
-                TCGv_i64 tmp = tcg_temp_new_i64();
-                tcg_gen_ld_i64(tmp, cpu_env, offsetof(CPUMIPSState, CP0_TagLo));
-                gen_move_low32(arg, tmp);
-                tcg_temp_free_i64(tmp);
-            }
-            register_name = "TagLo";
-            break;
-        case CP0_REG28__DATALO:
-        case CP0_REG28__DATALO1:
-        case CP0_REG28__DATALO2:
-        case CP0_REG28__DATALO3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataLo));
-            register_name = "DataLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_29:
-        switch (sel) {
-        case CP0_REG29__TAGHI:
-        case CP0_REG29__TAGHI1:
-        case CP0_REG29__TAGHI2:
-        case CP0_REG29__TAGHI3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagHi));
-            register_name = "TagHi";
-            break;
-        case CP0_REG29__DATAHI:
-        case CP0_REG29__DATAHI1:
-        case CP0_REG29__DATAHI2:
-        case CP0_REG29__DATAHI3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataHi));
-            register_name = "DataHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_30:
-        switch (sel) {
-        case CP0_REG30__ERROREPC:
-#ifdef TARGET_CHERI
-            gen_helper_mfc0_error_epc(arg, cpu_env);
-#else
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_ErrorEPC));
-#endif
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "ErrorEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_31:
-        switch (sel) {
-        case CP0_REG31__DESAVE:
-            /* EJTAG support */
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DESAVE));
-            register_name = "DESAVE";
-            break;
-        case CP0_REG31__KSCRATCH1:
-        case CP0_REG31__KSCRATCH2:
-        case CP0_REG31__KSCRATCH3:
-        case CP0_REG31__KSCRATCH4:
-        case CP0_REG31__KSCRATCH5:
-        case CP0_REG31__KSCRATCH6:
-            CP0_CHECK(ctx->kscrexist & (1 << sel));
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
-            tcg_gen_ext32s_tl(arg, arg);
-            register_name = "KScratch";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-       goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("mfc0", register_name, reg, sel);
-    return;
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "mfc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-    gen_mfc0_unimplemented(ctx, arg);
-}
-
-static void gen_mtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-
-    if (sel != 0) {
-        check_insn(ctx, ISA_MIPS_R1);
-    }
-
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-
-    switch (reg) {
-    case CP0_REGISTER_00:
-        switch (sel) {
-        case CP0_REG00__INDEX:
-            gen_helper_mtc0_index(cpu_env, arg);
-            register_name = "Index";
-            break;
-        case CP0_REG00__MVPCONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
-            register_name = "MVPControl";
-            break;
-        case CP0_REG00__MVPCONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            /* ignored */
-            register_name = "MVPConf0";
-            break;
-        case CP0_REG00__MVPCONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            /* ignored */
-            register_name = "MVPConf1";
-            break;
-        case CP0_REG00__VPCONTROL:
-            CP0_CHECK(ctx->vp);
-            /* ignored */
-            register_name = "VPControl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_01:
-        switch (sel) {
-        case CP0_REG01__RANDOM:
-            /* ignored */
-            register_name = "Random";
-            break;
-        case CP0_REG01__VPECONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpecontrol(cpu_env, arg);
-            register_name = "VPEControl";
-            break;
-        case CP0_REG01__VPECONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeconf0(cpu_env, arg);
-            register_name = "VPEConf0";
-            break;
-        case CP0_REG01__VPECONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeconf1(cpu_env, arg);
-            register_name = "VPEConf1";
-            break;
-        case CP0_REG01__YQMASK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_yqmask(cpu_env, arg);
-            register_name = "YQMask";
-            break;
-        case CP0_REG01__VPESCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_VPESchedule));
-            register_name = "VPESchedule";
-            break;
-        case CP0_REG01__VPESCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_VPEScheFBack));
-            register_name = "VPEScheFBack";
-            break;
-        case CP0_REG01__VPEOPT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeopt(cpu_env, arg);
-            register_name = "VPEOpt";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case CP0_REG02__ENTRYLO0:
-            gen_helper_mtc0_entrylo0(cpu_env, arg);
-            register_name = "EntryLo0";
-            break;
-        case CP0_REG02__TCSTATUS:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcstatus(cpu_env, arg);
-            register_name = "TCStatus";
-            break;
-        case CP0_REG02__TCBIND:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcbind(cpu_env, arg);
-            register_name = "TCBind";
-            break;
-        case CP0_REG02__TCRESTART:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcrestart(cpu_env, arg);
-            register_name = "TCRestart";
-            break;
-        case CP0_REG02__TCHALT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tchalt(cpu_env, arg);
-            register_name = "TCHalt";
-            break;
-        case CP0_REG02__TCCONTEXT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tccontext(cpu_env, arg);
-            register_name = "TCContext";
-            break;
-        case CP0_REG02__TCSCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcschedule(cpu_env, arg);
-            register_name = "TCSchedule";
-            break;
-        case CP0_REG02__TCSCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcschefback(cpu_env, arg);
-            register_name = "TCScheFBack";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            gen_helper_mtc0_entrylo1(cpu_env, arg);
-            register_name = "EntryLo1";
-            break;
-        case CP0_REG03__GLOBALNUM:
-            CP0_CHECK(ctx->vp);
-            /* ignored */
-            register_name = "GlobalNumber";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_04:
-        switch (sel) {
-        case CP0_REG04__CONTEXT:
-            gen_helper_mtc0_context(cpu_env, arg);
-            register_name = "Context";
-            break;
-        case CP0_REG04__CONTEXTCONFIG:
-            /* SmartMIPS ASE */
-            /* gen_helper_mtc0_contextconfig(arg); */
-            register_name = "ContextConfig";
-            goto cp0_unimplemented;
-        case CP0_REG04__USERLOCAL:
-            CP0_CHECK(ctx->ulri);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-            register_name = "UserLocal";
-            break;
-        case CP0_REG04__MMID:
-            CP0_CHECK(ctx->mi);
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_MemoryMapID));
-            register_name = "MMID";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_05:
-        switch (sel) {
-        case CP0_REG05__PAGEMASK:
-            gen_helper_mtc0_pagemask(cpu_env, arg);
-            register_name = "PageMask";
-            break;
-        case CP0_REG05__PAGEGRAIN:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_pagegrain(cpu_env, arg);
-            register_name = "PageGrain";
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG05__SEGCTL0:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl0(cpu_env, arg);
-            register_name = "SegCtl0";
-            break;
-        case CP0_REG05__SEGCTL1:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl1(cpu_env, arg);
-            register_name = "SegCtl1";
-            break;
-        case CP0_REG05__SEGCTL2:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl2(cpu_env, arg);
-            register_name = "SegCtl2";
-            break;
-        case CP0_REG05__PWBASE:
-            check_pw(ctx);
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_PWBase));
-            register_name = "PWBase";
-            break;
-        case CP0_REG05__PWFIELD:
-            check_pw(ctx);
-            gen_helper_mtc0_pwfield(cpu_env, arg);
-            register_name = "PWField";
-            break;
-        case CP0_REG05__PWSIZE:
-            check_pw(ctx);
-            gen_helper_mtc0_pwsize(cpu_env, arg);
-            register_name = "PWSize";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_06:
-        switch (sel) {
-        case CP0_REG06__WIRED:
-            gen_helper_mtc0_wired(cpu_env, arg);
-            register_name = "Wired";
-            break;
-        case CP0_REG06__SRSCONF0:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf0(cpu_env, arg);
-            register_name = "SRSConf0";
-            break;
-        case CP0_REG06__SRSCONF1:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf1(cpu_env, arg);
-            register_name = "SRSConf1";
-            break;
-        case CP0_REG06__SRSCONF2:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf2(cpu_env, arg);
-            register_name = "SRSConf2";
-            break;
-        case CP0_REG06__SRSCONF3:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf3(cpu_env, arg);
-            register_name = "SRSConf3";
-            break;
-        case CP0_REG06__SRSCONF4:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf4(cpu_env, arg);
-            register_name = "SRSConf4";
-            break;
-        case CP0_REG06__PWCTL:
-            check_pw(ctx);
-            gen_helper_mtc0_pwctl(cpu_env, arg);
-            register_name = "PWCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_07:
-        switch (sel) {
-        case CP0_REG07__HWRENA:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_hwrena(cpu_env, arg);
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "HWREna";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_08:
-        switch (sel) {
-        case CP0_REG08__BADVADDR:
-            /* ignored */
-            register_name = "BadVAddr";
-            break;
-        case CP0_REG08__BADINSTR:
-            /* ignored */
-            register_name = "BadInstr";
-            break;
-        case CP0_REG08__BADINSTRP:
-            /* ignored */
-            register_name = "BadInstrP";
-            break;
-        case CP0_REG08__BADINSTRX:
-            /* ignored */
-            register_name = "BadInstrX";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-        case CP0_REG09__COUNT:
-            gen_helper_mtc0_count(cpu_env, arg);
-            register_name = "Count";
-            break;
-#if !defined(TARGET_CHERI) /* Conflicts with RTC */
-        case CP0_REG09__SAARI:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mtc0_saari(cpu_env, arg);
-            register_name = "SAARI";
-            break;
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mtc0_saar(cpu_env, arg);
-            register_name = "SAAR";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_10:
-        switch (sel) {
-        case CP0_REG10__ENTRYHI:
-            gen_helper_mtc0_entryhi(cpu_env, arg);
-            register_name = "EntryHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_11:
-        switch (sel) {
-        case CP0_REG11__COMPARE:
-            gen_helper_mtc0_compare(cpu_env, arg);
-            register_name = "Compare";
-            break;
-        /* 6,7 are implementation dependent */
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_12:
-        switch (sel) {
-        case CP0_REG12__STATUS:
-            save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(cpu_env, arg);
-            /* DISAS_STOP isn't good enough here, hflags may have changed. */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Status";
-            break;
-        case CP0_REG12__INTCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_intctl(cpu_env, arg);
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "IntCtl";
-            break;
-        case CP0_REG12__SRSCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsctl(cpu_env, arg);
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "SRSCtl";
-            break;
-        case CP0_REG12__SRSMAP:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_SRSMap));
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "SRSMap";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_13:
-        switch (sel) {
-        case CP0_REG13__CAUSE:
-            save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(cpu_env, arg);
-            /*
-             * Stop translation as we may have triggered an interrupt.
-             * DISAS_STOP isn't sufficient, we need to ensure we break out of
-             * translated code to check for pending interrupts.
-             */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Cause";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_14:
-        switch (sel) {
-        case CP0_REG14__EPC:
-#ifdef TARGET_CHERI
-            gen_helper_mtc0_epc(cpu_env, arg);
-#else
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_EPC));
-#endif
-            register_name = "EPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_15:
-        switch (sel) {
-        case CP0_REG15__PRID:
-            /* ignored */
-            register_name = "PRid";
-            break;
-        case CP0_REG15__EBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_ebase(cpu_env, arg);
-            register_name = "EBase";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_16:
-        switch (sel) {
-        case CP0_REG16__CONFIG:
-            gen_helper_mtc0_config0(cpu_env, arg);
-            register_name = "Config";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG1:
-            /* ignored, read only */
-            register_name = "Config1";
-            break;
-        case CP0_REG16__CONFIG2:
-            gen_helper_mtc0_config2(cpu_env, arg);
-            register_name = "Config2";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG3:
-            gen_helper_mtc0_config3(cpu_env, arg);
-            register_name = "Config3";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG4:
-            gen_helper_mtc0_config4(cpu_env, arg);
-            register_name = "Config4";
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG5:
-            gen_helper_mtc0_config5(cpu_env, arg);
-            register_name = "Config5";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        /* 6,7 are implementation dependent */
-        case CP0_REG16__CONFIG6:
-            /* ignored */
-            register_name = "Config6";
-            break;
-        case CP0_REG16__CONFIG7:
-            /* ignored */
-            register_name = "Config7";
-            break;
-        default:
-            register_name = "Invalid config selector";
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            gen_helper_mtc0_lladdr(cpu_env, arg);
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mtc0_maar(cpu_env, arg);
-            register_name = "MAAR";
-            break;
-        case CP0_REG17__MAARI:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mtc0_maari(cpu_env, arg);
-            register_name = "MAARI";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_18:
-        switch (sel) {
-        case CP0_REG18__WATCHLO0:
-        case CP0_REG18__WATCHLO1:
-        case CP0_REG18__WATCHLO2:
-        case CP0_REG18__WATCHLO3:
-        case CP0_REG18__WATCHLO4:
-        case CP0_REG18__WATCHLO5:
-        case CP0_REG18__WATCHLO6:
-        case CP0_REG18__WATCHLO7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_0e1i(mtc0_watchlo, arg, sel);
-            register_name = "WatchLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_0e1i(mtc0_watchhi, arg, sel);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_20:
-        switch (sel) {
-        case CP0_REG20__XCONTEXT:
-#if defined(TARGET_MIPS64)
-            check_insn(ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(cpu_env, arg);
-            register_name = "XContext";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_21:
-       /* Officially reserved, but sel 0 is used for R1x000 framemask */
-        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-        switch (sel) {
-        case 0:
-            gen_helper_mtc0_framemask(cpu_env, arg);
-            register_name = "Framemask";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_22:
-        /* ignored */
-        register_name = "Diagnostic"; /* implementation dependent */
-        break;
-    case CP0_REGISTER_23:
-        switch (sel) {
-        case CP0_REG23__DEBUG:
-            save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
-            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
-            /* DISAS_STOP isn't good enough here, hflags may have changed. */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Debug";
-            break;
-        case CP0_REG23__TRACECONTROL:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracecontrol(cpu_env, arg);  */
-            register_name = "TraceControl";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACECONTROL2:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracecontrol2(cpu_env, arg); */
-            register_name = "TraceControl2";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            goto cp0_unimplemented;
-        case CP0_REG23__USERTRACEDATA1:
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            /* PDtrace support */
-            /* gen_helper_mtc0_usertracedata1(cpu_env, arg);*/
-            register_name = "UserTraceData";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEIBPC:
-            /* PDtrace support */
-            /* gen_helper_mtc0_traceibpc(cpu_env, arg);     */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceIBPC";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEDBPC:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracedbpc(cpu_env, arg);     */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceDBPC";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_24:
-        switch (sel) {
-        case CP0_REG24__DEPC:
-            /* EJTAG support */
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_DEPC));
-            register_name = "DEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_25:
-        switch (sel) {
-        case CP0_REG25__PERFCTL0:
-            gen_helper_mtc0_performance0(cpu_env, arg);
-            register_name = "Performance0";
-            break;
-        case CP0_REG25__PERFCNT0:
-            /* gen_helper_mtc0_performance1(arg); */
-            register_name = "Performance1";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL1:
-            /* gen_helper_mtc0_performance2(arg); */
-            register_name = "Performance2";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT1:
-            /* gen_helper_mtc0_performance3(arg); */
-            register_name = "Performance3";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL2:
-            /* gen_helper_mtc0_performance4(arg); */
-            register_name = "Performance4";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT2:
-            /* gen_helper_mtc0_performance5(arg); */
-            register_name = "Performance5";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL3:
-            /* gen_helper_mtc0_performance6(arg); */
-            register_name = "Performance6";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT3:
-            /* gen_helper_mtc0_performance7(arg); */
-            register_name = "Performance7";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-       break;
-    case CP0_REGISTER_26:
-        save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
-        gen_helper_mtc0_dumpstate(cpu_env, arg); /* CHERI: dump reg state */
-#if !defined(TARGET_CHERI)
-        switch (sel) {
-        case CP0_REG26__ERRCTL:
-            gen_helper_mtc0_errctl(cpu_env, arg);
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "ErrCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-#endif /* TARGET_CHERI */
-        break;
-    case CP0_REGISTER_27:
-        switch (sel) {
-        case CP0_REG27__CACHERR:
-            /* ignored */
-            register_name = "CacheErr";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-       break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case CP0_REG28__TAGLO:
-        case CP0_REG28__TAGLO1:
-        case CP0_REG28__TAGLO2:
-        case CP0_REG28__TAGLO3:
-            gen_helper_mtc0_taglo(cpu_env, arg);
-            register_name = "TagLo";
-            break;
-        case CP0_REG28__DATALO:
-        case CP0_REG28__DATALO1:
-        case CP0_REG28__DATALO2:
-        case CP0_REG28__DATALO3:
-            gen_helper_mtc0_datalo(cpu_env, arg);
-            register_name = "DataLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_29:
-        switch (sel) {
-        case CP0_REG29__TAGHI:
-        case CP0_REG29__TAGHI1:
-        case CP0_REG29__TAGHI2:
-        case CP0_REG29__TAGHI3:
-            gen_helper_mtc0_taghi(cpu_env, arg);
-            register_name = "TagHi";
-            break;
-        case CP0_REG29__DATAHI:
-        case CP0_REG29__DATAHI1:
-        case CP0_REG29__DATAHI2:
-        case CP0_REG29__DATAHI3:
-            gen_helper_mtc0_datahi(cpu_env, arg);
-            register_name = "DataHi";
-            break;
-        default:
-            register_name = "invalid sel";
-            goto cp0_unimplemented;
-        }
-       break;
-    case CP0_REGISTER_30:
-        switch (sel) {
-        case CP0_REG30__ERROREPC:
-#ifdef TARGET_CHERI
-            gen_helper_mtc0_error_epc(cpu_env, arg);
-#else
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_ErrorEPC));
-#endif
-            register_name = "ErrorEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_31:
-        switch (sel) {
-        case CP0_REG31__DESAVE:
-            /* EJTAG support */
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_DESAVE));
-            register_name = "DESAVE";
-            break;
-        case CP0_REG31__KSCRATCH1:
-        case CP0_REG31__KSCRATCH2:
-        case CP0_REG31__KSCRATCH3:
-        case CP0_REG31__KSCRATCH4:
-        case CP0_REG31__KSCRATCH5:
-        case CP0_REG31__KSCRATCH6:
-            CP0_CHECK(ctx->kscrexist & (1 << sel));
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
-            register_name = "KScratch";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-       goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("mtc0", register_name, reg, sel);
-
-    /* For simplicity assume that all writes can cause interrupts.  */
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        /*
-         * DISAS_STOP isn't sufficient, we need to ensure we break out of
-         * translated code to check for pending interrupts.
-         */
-        gen_save_pc(ctx->base.pc_next + 4);
-        ctx->base.is_jmp = DISAS_EXIT;
-    }
-    return;
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "mtc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-}
-
-#if defined(TARGET_MIPS64)
-static void gen_dmfc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-
-#ifndef TARGET_CHERI
-    if (sel != 0) {
-        check_insn(ctx, ISA_MIPS_R1);
-    }
-#endif /* ! TARGET_CHERI */
-
-    switch (reg) {
-    case CP0_REGISTER_00:
-        switch (sel) {
-        case CP0_REG00__INDEX:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Index));
-            register_name = "Index";
-            break;
-        case CP0_REG00__MVPCONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
-            register_name = "MVPControl";
-            break;
-        case CP0_REG00__MVPCONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg, cpu_env);
-            register_name = "MVPConf0";
-            break;
-        case CP0_REG00__MVPCONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg, cpu_env);
-            register_name = "MVPConf1";
-            break;
-        case CP0_REG00__VPCONTROL:
-            CP0_CHECK(ctx->vp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPControl));
-            register_name = "VPControl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_01:
-        switch (sel) {
-        case CP0_REG01__RANDOM:
-            CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-            gen_helper_mfc0_random(arg, cpu_env);
-            register_name = "Random";
-            break;
-        case CP0_REG01__VPECONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEControl));
-            register_name = "VPEControl";
-            break;
-        case CP0_REG01__VPECONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf0));
-            register_name = "VPEConf0";
-            break;
-        case CP0_REG01__VPECONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEConf1));
-            register_name = "VPEConf1";
-            break;
-        case CP0_REG01__YQMASK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_YQMask));
-            register_name = "YQMask";
-            break;
-        case CP0_REG01__VPESCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_VPESchedule));
-            register_name = "VPESchedule";
-            break;
-        case CP0_REG01__VPESCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_VPEScheFBack));
-            register_name = "VPEScheFBack";
-            break;
-        case CP0_REG01__VPEOPT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_VPEOpt));
-            register_name = "VPEOpt";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case CP0_REG02__ENTRYLO0:
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_EntryLo0));
-            register_name = "EntryLo0";
-            break;
-        case CP0_REG02__TCSTATUS:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcstatus(arg, cpu_env);
-            register_name = "TCStatus";
-            break;
-        case CP0_REG02__TCBIND:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mfc0_tcbind(arg, cpu_env);
-            register_name = "TCBind";
-            break;
-        case CP0_REG02__TCRESTART:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_dmfc0_tcrestart(arg, cpu_env);
-            register_name = "TCRestart";
-            break;
-        case CP0_REG02__TCHALT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_dmfc0_tchalt(arg, cpu_env);
-            register_name = "TCHalt";
-            break;
-        case CP0_REG02__TCCONTEXT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_dmfc0_tccontext(arg, cpu_env);
-            register_name = "TCContext";
-            break;
-        case CP0_REG02__TCSCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_dmfc0_tcschedule(arg, cpu_env);
-            register_name = "TCSchedule";
-            break;
-        case CP0_REG02__TCSCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_dmfc0_tcschefback(arg, cpu_env);
-            register_name = "TCScheFBack";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryLo1));
-            register_name = "EntryLo1";
-            break;
-        case CP0_REG03__GLOBALNUM:
-            CP0_CHECK(ctx->vp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_GlobalNumber));
-            register_name = "GlobalNumber";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_04:
-        switch (sel) {
-        case CP0_REG04__CONTEXT:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_Context));
-            register_name = "Context";
-            break;
-        case CP0_REG04__CONTEXTCONFIG:
-            /* SmartMIPS ASE */
-            /* gen_helper_dmfc0_contextconfig(arg); */
-            register_name = "ContextConfig";
-            goto cp0_unimplemented;
-        case CP0_REG04__USERLOCAL:
-            CP0_CHECK(ctx->ulri);
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-            register_name = "UserLocal";
-            break;
-        case CP0_REG04__MMID:
-            CP0_CHECK(ctx->mi);
-            gen_helper_mtc0_memorymapid(cpu_env, arg);
-            register_name = "MMID";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_05:
-        switch (sel) {
-        case CP0_REG05__PAGEMASK:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageMask));
-            register_name = "PageMask";
-            break;
-        case CP0_REG05__PAGEGRAIN:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PageGrain));
-            register_name = "PageGrain";
-            break;
-        case CP0_REG05__SEGCTL0:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl0));
-            register_name = "SegCtl0";
-            break;
-        case CP0_REG05__SEGCTL1:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl1));
-            register_name = "SegCtl1";
-            break;
-        case CP0_REG05__SEGCTL2:
-            CP0_CHECK(ctx->sc);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_SegCtl2));
-            register_name = "SegCtl2";
-            break;
-        case CP0_REG05__PWBASE:
-            check_pw(ctx);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWBase));
-            register_name = "PWBase";
-            break;
-        case CP0_REG05__PWFIELD:
-            check_pw(ctx);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWField));
-            register_name = "PWField";
-            break;
-        case CP0_REG05__PWSIZE:
-            check_pw(ctx);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_PWSize));
-            register_name = "PWSize";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_06:
-        switch (sel) {
-        case CP0_REG06__WIRED:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Wired));
-            register_name = "Wired";
-            break;
-        case CP0_REG06__SRSCONF0:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf0));
-            register_name = "SRSConf0";
-            break;
-        case CP0_REG06__SRSCONF1:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf1));
-            register_name = "SRSConf1";
-            break;
-        case CP0_REG06__SRSCONF2:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf2));
-            register_name = "SRSConf2";
-            break;
-        case CP0_REG06__SRSCONF3:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf3));
-            register_name = "SRSConf3";
-            break;
-        case CP0_REG06__SRSCONF4:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSConf4));
-            register_name = "SRSConf4";
-            break;
-        case CP0_REG06__PWCTL:
-            check_pw(ctx);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PWCtl));
-            register_name = "PWCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_07:
-        switch (sel) {
-        case CP0_REG07__HWRENA:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_HWREna));
-            register_name = "HWREna";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_08:
-        switch (sel) {
-        case CP0_REG08__BADVADDR:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
-            register_name = "BadVAddr";
-            break;
-        case CP0_REG08__BADINSTR:
-            CP0_CHECK(ctx->bi);
-            tcg_gen_ld32u_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadInstr));
-            register_name = "BadInstr";
-            break;
-        case CP0_REG08__BADINSTRP:
-            CP0_CHECK(ctx->bp);
-            tcg_gen_ld32u_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_BadInstrP));
-            register_name = "BadInstrP";
-            break;
-        case CP0_REG08__BADINSTRX:
-            CP0_CHECK(ctx->bi);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_BadInstrX));
-            tcg_gen_andi_tl(arg, arg, ~0xffff);
-            register_name = "BadInstrX";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-        case CP0_REG09__COUNT:
-            /* Mark as an IO operation because we read the time.  */
-            if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-                gen_io_start();
-            }
-            gen_helper_mfc0_count(arg, cpu_env);
-            /*
-             * Break the TB to be able to take timer interrupts immediately
-             * after reading count. DISAS_STOP isn't sufficient, we need to
-             * ensure we break completely out of translated code.
-             */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Count";
-            break;
-#if !defined(TARGET_CHERI) /* SAAR Conflicts with RTC */
-        case CP0_REG09__SAARI:
-            CP0_CHECK(ctx->saar);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SAARI));
-            register_name = "SAARI";
-            break;
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_dmfc0_saar(arg, cpu_env);
-            register_name = "SAAR";
-            break;
-#else
-        /* 6,7 are implementation dependent */
-        case 6:
-            // QEMU-CHERI extension:
-            gen_helper_mfc0_rtc64(arg, cpu_env);
-            register_name = "RTC";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_10:
-        switch (sel) {
-        case CP0_REG10__ENTRYHI:
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EntryHi));
-            register_name = "EntryHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_11:
-        switch (sel) {
-        case CP0_REG11__COMPARE:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Compare));
-            register_name = "Compare";
-            break;
-        /* 6,7 are implementation dependent */
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_12:
-        switch (sel) {
-        case CP0_REG12__STATUS:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Status));
-            register_name = "Status";
-            break;
-        case CP0_REG12__INTCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_IntCtl));
-            register_name = "IntCtl";
-            break;
-        case CP0_REG12__SRSCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSCtl));
-            register_name = "SRSCtl";
-            break;
-        case CP0_REG12__SRSMAP:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_SRSMap));
-            register_name = "SRSMap";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_13:
-        switch (sel) {
-        case CP0_REG13__CAUSE:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Cause));
-            register_name = "Cause";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_14:
-        switch (sel) {
-        case CP0_REG14__EPC:
-#ifdef TARGET_CHERI
-            gen_helper_mfc0_epc(arg, cpu_env);
-#else
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EPC));
-#endif
-            register_name = "EPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_15:
-        switch (sel) {
-        case CP0_REG15__PRID:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_PRid));
-            register_name = "PRid";
-            break;
-        case CP0_REG15__EBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_EBase));
-            register_name = "EBase";
-            break;
-        case CP0_REG15__CMGCRBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            CP0_CHECK(ctx->cmgcr);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_CMGCRBase));
-            register_name = "CMGCRBase";
-            break;
-        case 6:
-            /*
-             * See section 7.3.5 Core Identification (CPO Register 15,
-             * Select 6 of the BERI Hardware Reference.
-             */
-            gen_helper_mfc0_coreid(arg, cpu_env);
-            register_name = "CoreID";
-            break;
-        case 7:
-            /*
-             * See section 7.3.6 Thread Identification (CPO Register 15,
-             * Select 7 of the BERI Hardware Reference.
-             */
-            tcg_gen_movi_tl(arg, 0); /* currently unimplemented */
-            register_name = "ThreadID";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_16:
-        switch (sel) {
-        case CP0_REG16__CONFIG:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config0));
-            register_name = "Config";
-            break;
-        case CP0_REG16__CONFIG1:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config1));
-            register_name = "Config1";
-            break;
-        case CP0_REG16__CONFIG2:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config2));
-            register_name = "Config2";
-            break;
-        case CP0_REG16__CONFIG3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config3));
-            register_name = "Config3";
-            break;
-        case CP0_REG16__CONFIG4:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config4));
-            register_name = "Config4";
-            break;
-        case CP0_REG16__CONFIG5:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config5));
-            register_name = "Config5";
-            break;
-        /* 6,7 are implementation dependent */
-        case CP0_REG16__CONFIG6:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config6));
-            register_name = "Config6";
-            break;
-        case CP0_REG16__CONFIG7:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Config7));
-            register_name = "Config7";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            gen_helper_dmfc0_lladdr(arg, cpu_env);
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_dmfc0_maar(arg, cpu_env);
-            register_name = "MAAR";
-            break;
-        case CP0_REG17__MAARI:
-            CP0_CHECK(ctx->mrp);
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_MAARI));
-            register_name = "MAARI";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_18:
-        switch (sel) {
-        case CP0_REG18__WATCHLO0:
-        case CP0_REG18__WATCHLO1:
-        case CP0_REG18__WATCHLO2:
-        case CP0_REG18__WATCHLO3:
-        case CP0_REG18__WATCHLO4:
-        case CP0_REG18__WATCHLO5:
-        case CP0_REG18__WATCHLO6:
-        case CP0_REG18__WATCHLO7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_1e0i(dmfc0_watchlo, arg, sel);
-            register_name = "WatchLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_1e0i(dmfc0_watchhi, arg, sel);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_20:
-        switch (sel) {
-        case CP0_REG20__XCONTEXT:
-            check_insn(ctx, ISA_MIPS3);
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_XContext));
-            register_name = "XContext";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_21:
-        /* Officially reserved, but sel 0 is used for R1x000 framemask */
-        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-        switch (sel) {
-        case 0:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Framemask));
-            register_name = "Framemask";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_22:
-        tcg_gen_movi_tl(arg, 0); /* unimplemented */
-        register_name = "'Diagnostic"; /* implementation dependent */
-        break;
-    case CP0_REGISTER_23:
-        switch (sel) {
-        case CP0_REG23__DEBUG:
-            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
-            register_name = "Debug";
-            break;
-        case CP0_REG23__TRACECONTROL:
-            /* PDtrace support */
-            /* gen_helper_dmfc0_tracecontrol(arg, cpu_env);  */
-            register_name = "TraceControl";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACECONTROL2:
-            /* PDtrace support */
-            /* gen_helper_dmfc0_tracecontrol2(arg, cpu_env); */
-            register_name = "TraceControl2";
-            goto cp0_unimplemented;
-        case CP0_REG23__USERTRACEDATA1:
-            /* PDtrace support */
-            /* gen_helper_dmfc0_usertracedata1(arg, cpu_env);*/
-            register_name = "UserTraceData1";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEIBPC:
-            /* PDtrace support */
-            /* gen_helper_dmfc0_traceibpc(arg, cpu_env);     */
-            register_name = "TraceIBPC";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEDBPC:
-            /* PDtrace support */
-            /* gen_helper_dmfc0_tracedbpc(arg, cpu_env);     */
-            register_name = "TraceDBPC";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_24:
-        switch (sel) {
-        case CP0_REG24__DEPC:
-            /* EJTAG support */
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_DEPC));
-            register_name = "DEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_25:
-        switch (sel) {
-        case CP0_REG25__PERFCTL0:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_Performance0));
-            register_name = "Performance0";
-            break;
-        case CP0_REG25__PERFCNT0:
-            /* gen_helper_dmfc0_performance1(arg); */
-            register_name = "Performance1";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL1:
-            /* gen_helper_dmfc0_performance2(arg); */
-            register_name = "Performance2";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT1:
-            /* gen_helper_dmfc0_performance3(arg); */
-            register_name = "Performance3";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL2:
-            /* gen_helper_dmfc0_performance4(arg); */
-            register_name = "Performance4";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT2:
-            /* gen_helper_dmfc0_performance5(arg); */
-            register_name = "Performance5";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL3:
-            /* gen_helper_dmfc0_performance6(arg); */
-            register_name = "Performance6";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT3:
-            /* gen_helper_dmfc0_performance7(arg); */
-            register_name = "Performance7";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_26:
-        switch (sel) {
-        case CP0_REG26__ERRCTL:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_ErrCtl));
-            register_name = "ErrCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_27:
-        switch (sel) {
-        /* ignored */
-        case CP0_REG27__CACHERR:
-            tcg_gen_movi_tl(arg, 0); /* unimplemented */
-            register_name = "CacheErr";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case CP0_REG28__TAGLO:
-        case CP0_REG28__TAGLO1:
-        case CP0_REG28__TAGLO2:
-        case CP0_REG28__TAGLO3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagLo));
-            register_name = "TagLo";
-            break;
-        case CP0_REG28__DATALO:
-        case CP0_REG28__DATALO1:
-        case CP0_REG28__DATALO2:
-        case CP0_REG28__DATALO3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataLo));
-            register_name = "DataLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_29:
-        switch (sel) {
-        case CP0_REG29__TAGHI:
-        case CP0_REG29__TAGHI1:
-        case CP0_REG29__TAGHI2:
-        case CP0_REG29__TAGHI3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_TagHi));
-            register_name = "TagHi";
-            break;
-        case CP0_REG29__DATAHI:
-        case CP0_REG29__DATAHI1:
-        case CP0_REG29__DATAHI2:
-        case CP0_REG29__DATAHI3:
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DataHi));
-            register_name = "DataHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_30:
-        switch (sel) {
-        case CP0_REG30__ERROREPC:
-#ifdef TARGET_CHERI
-            gen_helper_mfc0_error_epc(arg, cpu_env);
-#else
-            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUMIPSState, CP0_ErrorEPC));
-#endif
-            register_name = "ErrorEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_31:
-        switch (sel) {
-        case CP0_REG31__DESAVE:
-            /* EJTAG support */
-            gen_mfc0_load32(arg, offsetof(CPUMIPSState, CP0_DESAVE));
-            register_name = "DESAVE";
-            break;
-        case CP0_REG31__KSCRATCH1:
-        case CP0_REG31__KSCRATCH2:
-        case CP0_REG31__KSCRATCH3:
-        case CP0_REG31__KSCRATCH4:
-        case CP0_REG31__KSCRATCH5:
-        case CP0_REG31__KSCRATCH6:
-            CP0_CHECK(ctx->kscrexist & (1 << sel));
-            tcg_gen_ld_tl(arg, cpu_env,
-                          offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
-            register_name = "KScratch";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-        goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("dmfc0", register_name, reg, sel);
-    return;
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "dmfc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-    gen_mfc0_unimplemented(ctx, arg);
-}
-
-static void gen_dmtc0(DisasContext *ctx, TCGv arg, int reg, int sel)
-{
-    const char *register_name = "invalid";
-
-    if (sel != 0) {
-        check_insn(ctx, ISA_MIPS_R1);
-    }
-
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-
-    switch (reg) {
-    case CP0_REGISTER_00:
-        switch (sel) {
-        case CP0_REG00__INDEX:
-            gen_helper_mtc0_index(cpu_env, arg);
-            register_name = "Index";
-            break;
-        case CP0_REG00__MVPCONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
-            register_name = "MVPControl";
-            break;
-        case CP0_REG00__MVPCONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            /* ignored */
-            register_name = "MVPConf0";
-            break;
-        case CP0_REG00__MVPCONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            /* ignored */
-            register_name = "MVPConf1";
-            break;
-        case CP0_REG00__VPCONTROL:
-            CP0_CHECK(ctx->vp);
-            /* ignored */
-            register_name = "VPControl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_01:
-        switch (sel) {
-        case CP0_REG01__RANDOM:
-            /* ignored */
-            register_name = "Random";
-            break;
-        case CP0_REG01__VPECONTROL:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpecontrol(cpu_env, arg);
-            register_name = "VPEControl";
-            break;
-        case CP0_REG01__VPECONF0:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeconf0(cpu_env, arg);
-            register_name = "VPEConf0";
-            break;
-        case CP0_REG01__VPECONF1:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeconf1(cpu_env, arg);
-            register_name = "VPEConf1";
-            break;
-        case CP0_REG01__YQMASK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_yqmask(cpu_env, arg);
-            register_name = "YQMask";
-            break;
-        case CP0_REG01__VPESCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_VPESchedule));
-            register_name = "VPESchedule";
-            break;
-        case CP0_REG01__VPESCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                          offsetof(CPUMIPSState, CP0_VPEScheFBack));
-            register_name = "VPEScheFBack";
-            break;
-        case CP0_REG01__VPEOPT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_vpeopt(cpu_env, arg);
-            register_name = "VPEOpt";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_02:
-        switch (sel) {
-        case CP0_REG02__ENTRYLO0:
-            gen_helper_dmtc0_entrylo0(cpu_env, arg);
-            register_name = "EntryLo0";
-            break;
-        case CP0_REG02__TCSTATUS:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcstatus(cpu_env, arg);
-            register_name = "TCStatus";
-            break;
-        case CP0_REG02__TCBIND:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcbind(cpu_env, arg);
-            register_name = "TCBind";
-            break;
-        case CP0_REG02__TCRESTART:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcrestart(cpu_env, arg);
-            register_name = "TCRestart";
-            break;
-        case CP0_REG02__TCHALT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tchalt(cpu_env, arg);
-            register_name = "TCHalt";
-            break;
-        case CP0_REG02__TCCONTEXT:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tccontext(cpu_env, arg);
-            register_name = "TCContext";
-            break;
-        case CP0_REG02__TCSCHEDULE:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcschedule(cpu_env, arg);
-            register_name = "TCSchedule";
-            break;
-        case CP0_REG02__TCSCHEFBACK:
-            CP0_CHECK(ctx->insn_flags & ASE_MT);
-            gen_helper_mtc0_tcschefback(cpu_env, arg);
-            register_name = "TCScheFBack";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_03:
-        switch (sel) {
-        case CP0_REG03__ENTRYLO1:
-            gen_helper_dmtc0_entrylo1(cpu_env, arg);
-            register_name = "EntryLo1";
-            break;
-        case CP0_REG03__GLOBALNUM:
-            CP0_CHECK(ctx->vp);
-            /* ignored */
-            register_name = "GlobalNumber";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_04:
-        switch (sel) {
-        case CP0_REG04__CONTEXT:
-            gen_helper_mtc0_context(cpu_env, arg);
-            register_name = "Context";
-            break;
-        case CP0_REG04__CONTEXTCONFIG:
-            /* SmartMIPS ASE */
-            /* gen_helper_dmtc0_contextconfig(arg); */
-            register_name = "ContextConfig";
-            goto cp0_unimplemented;
-        case CP0_REG04__USERLOCAL:
-            CP0_CHECK(ctx->ulri);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-            register_name = "UserLocal";
-            break;
-        case CP0_REG04__MMID:
-            CP0_CHECK(ctx->mi);
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_MemoryMapID));
-            register_name = "MMID";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_05:
-        switch (sel) {
-        case CP0_REG05__PAGEMASK:
-            gen_helper_mtc0_pagemask(cpu_env, arg);
-            register_name = "PageMask";
-            break;
-        case CP0_REG05__PAGEGRAIN:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_pagegrain(cpu_env, arg);
-            register_name = "PageGrain";
-            break;
-        case CP0_REG05__SEGCTL0:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl0(cpu_env, arg);
-            register_name = "SegCtl0";
-            break;
-        case CP0_REG05__SEGCTL1:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl1(cpu_env, arg);
-            register_name = "SegCtl1";
-            break;
-        case CP0_REG05__SEGCTL2:
-            CP0_CHECK(ctx->sc);
-            gen_helper_mtc0_segctl2(cpu_env, arg);
-            register_name = "SegCtl2";
-            break;
-        case CP0_REG05__PWBASE:
-            check_pw(ctx);
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_PWBase));
-            register_name = "PWBase";
-            break;
-        case CP0_REG05__PWFIELD:
-            check_pw(ctx);
-            gen_helper_mtc0_pwfield(cpu_env, arg);
-            register_name = "PWField";
-            break;
-        case CP0_REG05__PWSIZE:
-            check_pw(ctx);
-            gen_helper_mtc0_pwsize(cpu_env, arg);
-            register_name = "PWSize";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_06:
-        switch (sel) {
-        case CP0_REG06__WIRED:
-            gen_helper_mtc0_wired(cpu_env, arg);
-            register_name = "Wired";
-            break;
-        case CP0_REG06__SRSCONF0:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf0(cpu_env, arg);
-            register_name = "SRSConf0";
-            break;
-        case CP0_REG06__SRSCONF1:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf1(cpu_env, arg);
-            register_name = "SRSConf1";
-            break;
-        case CP0_REG06__SRSCONF2:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf2(cpu_env, arg);
-            register_name = "SRSConf2";
-            break;
-        case CP0_REG06__SRSCONF3:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf3(cpu_env, arg);
-            register_name = "SRSConf3";
-            break;
-        case CP0_REG06__SRSCONF4:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsconf4(cpu_env, arg);
-            register_name = "SRSConf4";
-            break;
-        case CP0_REG06__PWCTL:
-            check_pw(ctx);
-            gen_helper_mtc0_pwctl(cpu_env, arg);
-            register_name = "PWCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_07:
-        switch (sel) {
-        case CP0_REG07__HWRENA:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_hwrena(cpu_env, arg);
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "HWREna";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_08:
-        switch (sel) {
-        case CP0_REG08__BADVADDR:
-            /* ignored */
-            register_name = "BadVAddr";
-            break;
-        case CP0_REG08__BADINSTR:
-            /* ignored */
-            register_name = "BadInstr";
-            break;
-        case CP0_REG08__BADINSTRP:
-            /* ignored */
-            register_name = "BadInstrP";
-            break;
-        case CP0_REG08__BADINSTRX:
-            /* ignored */
-            register_name = "BadInstrX";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_09:
-        switch (sel) {
-        case CP0_REG09__COUNT:
-            gen_helper_mtc0_count(cpu_env, arg);
-            register_name = "Count";
-            break;
-#if !defined(TARGET_CHERI) /* Conflicts with RTC */
-        case CP0_REG09__SAARI:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mtc0_saari(cpu_env, arg);
-            register_name = "SAARI";
-            break;
-        case CP0_REG09__SAAR:
-            CP0_CHECK(ctx->saar);
-            gen_helper_mtc0_saar(cpu_env, arg);
-            register_name = "SAAR";
-            break;
-#else
-        /* 6,7 are implementation dependent */
-        case 6:
-            gen_helper_mtc0_rtc64(cpu_env, arg);
-            register_name = "RTC";
-            break;
-#endif
-        default:
-            goto cp0_unimplemented;
-        }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->base.is_jmp = DISAS_STOP;
-        break;
-    case CP0_REGISTER_10:
-        switch (sel) {
-        case CP0_REG10__ENTRYHI:
-            gen_helper_mtc0_entryhi(cpu_env, arg);
-            register_name = "EntryHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_11:
-        switch (sel) {
-        case CP0_REG11__COMPARE:
-            gen_helper_mtc0_compare(cpu_env, arg);
-            register_name = "Compare";
-            break;
-        /* 6,7 are implementation dependent */
-        default:
-            goto cp0_unimplemented;
-        }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->base.is_jmp = DISAS_STOP;
-        break;
-    case CP0_REGISTER_12:
-        switch (sel) {
-        case CP0_REG12__STATUS:
-            save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(cpu_env, arg);
-            /* DISAS_STOP isn't good enough here, hflags may have changed. */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Status";
-            break;
-        case CP0_REG12__INTCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_intctl(cpu_env, arg);
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "IntCtl";
-            break;
-        case CP0_REG12__SRSCTL:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_srsctl(cpu_env, arg);
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "SRSCtl";
-            break;
-        case CP0_REG12__SRSMAP:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_SRSMap));
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "SRSMap";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_13:
-        switch (sel) {
-        case CP0_REG13__CAUSE:
-            save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(cpu_env, arg);
-            /*
-             * Stop translation as we may have triggered an interrupt.
-             * DISAS_STOP isn't sufficient, we need to ensure we break out of
-             * translated code to check for pending interrupts.
-             */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Cause";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_14:
-        switch (sel) {
-        case CP0_REG14__EPC:
-#ifdef TARGET_CHERI
-            gen_helper_mtc0_epc(cpu_env, arg);
-#else
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_EPC));
-#endif
-            register_name = "EPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_15:
-        switch (sel) {
-        case CP0_REG15__PRID:
-            /* ignored */
-            register_name = "PRid";
-            break;
-        case CP0_REG15__EBASE:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_helper_mtc0_ebase(cpu_env, arg);
-            register_name = "EBase";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_16:
-        switch (sel) {
-        case CP0_REG16__CONFIG:
-            gen_helper_mtc0_config0(cpu_env, arg);
-            register_name = "Config";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG1:
-            /* ignored, read only */
-            register_name = "Config1";
-            break;
-        case CP0_REG16__CONFIG2:
-            gen_helper_mtc0_config2(cpu_env, arg);
-            register_name = "Config2";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG3:
-            gen_helper_mtc0_config3(cpu_env, arg);
-            register_name = "Config3";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case CP0_REG16__CONFIG4:
-            /* currently ignored */
-            register_name = "Config4";
-            break;
-        case CP0_REG16__CONFIG5:
-            gen_helper_mtc0_config5(cpu_env, arg);
-            register_name = "Config5";
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        /* 6,7 are implementation dependent */
-        default:
-            register_name = "Invalid config selector";
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_17:
-        switch (sel) {
-        case CP0_REG17__LLADDR:
-            gen_helper_mtc0_lladdr(cpu_env, arg);
-            register_name = "LLAddr";
-            break;
-        case CP0_REG17__MAAR:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mtc0_maar(cpu_env, arg);
-            register_name = "MAAR";
-            break;
-        case CP0_REG17__MAARI:
-            CP0_CHECK(ctx->mrp);
-            gen_helper_mtc0_maari(cpu_env, arg);
-            register_name = "MAARI";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_18:
-        switch (sel) {
-        case CP0_REG18__WATCHLO0:
-        case CP0_REG18__WATCHLO1:
-        case CP0_REG18__WATCHLO2:
-        case CP0_REG18__WATCHLO3:
-        case CP0_REG18__WATCHLO4:
-        case CP0_REG18__WATCHLO5:
-        case CP0_REG18__WATCHLO6:
-        case CP0_REG18__WATCHLO7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_0e1i(mtc0_watchlo, arg, sel);
-            register_name = "WatchLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_19:
-        switch (sel) {
-        case CP0_REG19__WATCHHI0:
-        case CP0_REG19__WATCHHI1:
-        case CP0_REG19__WATCHHI2:
-        case CP0_REG19__WATCHHI3:
-        case CP0_REG19__WATCHHI4:
-        case CP0_REG19__WATCHHI5:
-        case CP0_REG19__WATCHHI6:
-        case CP0_REG19__WATCHHI7:
-            CP0_CHECK(ctx->CP0_Config1 & (1 << CP0C1_WR));
-            gen_helper_0e1i(mtc0_watchhi, arg, sel);
-            register_name = "WatchHi";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_20:
-        switch (sel) {
-        case CP0_REG20__XCONTEXT:
-            check_insn(ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(cpu_env, arg);
-            register_name = "XContext";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_21:
-       /* Officially reserved, but sel 0 is used for R1x000 framemask */
-        CP0_CHECK(!(ctx->insn_flags & ISA_MIPS_R6));
-        switch (sel) {
-        case 0:
-            gen_helper_mtc0_framemask(cpu_env, arg);
-            register_name = "Framemask";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_22:
-        /* ignored */
-        register_name = "Diagnostic"; /* implementation dependent */
-        break;
-    case CP0_REGISTER_23:
-        switch (sel) {
-        case CP0_REG23__DEBUG:
-            save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
-            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
-            /* DISAS_STOP isn't good enough here, hflags may have changed. */
-            gen_save_pc(ctx->base.pc_next + 4);
-            ctx->base.is_jmp = DISAS_EXIT;
-            register_name = "Debug";
-            break;
-        case CP0_REG23__TRACECONTROL:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracecontrol(cpu_env, arg);  */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceControl";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACECONTROL2:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracecontrol2(cpu_env, arg); */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceControl2";
-            goto cp0_unimplemented;
-        case CP0_REG23__USERTRACEDATA1:
-            /* PDtrace support */
-            /* gen_helper_mtc0_usertracedata1(cpu_env, arg);*/
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "UserTraceData1";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEIBPC:
-            /* PDtrace support */
-            /* gen_helper_mtc0_traceibpc(cpu_env, arg);     */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceIBPC";
-            goto cp0_unimplemented;
-        case CP0_REG23__TRACEDBPC:
-            /* PDtrace support */
-            /* gen_helper_mtc0_tracedbpc(cpu_env, arg);     */
-            /* Stop translation as we may have switched the execution mode */
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "TraceDBPC";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_24:
-        switch (sel) {
-        case CP0_REG24__DEPC:
-            /* EJTAG support */
-            gen_mtc0_store(ctx, arg, reg, sel, offsetof(CPUMIPSState, CP0_DEPC));
-            register_name = "DEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_25:
-        switch (sel) {
-        case CP0_REG25__PERFCTL0:
-            gen_helper_mtc0_performance0(cpu_env, arg);
-            register_name = "Performance0";
-            break;
-        case CP0_REG25__PERFCNT0:
-            /* gen_helper_mtc0_performance1(cpu_env, arg); */
-            register_name = "Performance1";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL1:
-            /* gen_helper_mtc0_performance2(cpu_env, arg); */
-            register_name = "Performance2";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT1:
-            /* gen_helper_mtc0_performance3(cpu_env, arg); */
-            register_name = "Performance3";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL2:
-            /* gen_helper_mtc0_performance4(cpu_env, arg); */
-            register_name = "Performance4";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT2:
-            /* gen_helper_mtc0_performance5(cpu_env, arg); */
-            register_name = "Performance5";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCTL3:
-            /* gen_helper_mtc0_performance6(cpu_env, arg); */
-            register_name = "Performance6";
-            goto cp0_unimplemented;
-        case CP0_REG25__PERFCNT3:
-            /* gen_helper_mtc0_performance7(cpu_env, arg); */
-            register_name = "Performance7";
-            goto cp0_unimplemented;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_26:
-        save_cpu_state(ctx, 1); // Need to sync PC (PCC.cursor)
-        gen_helper_mtc0_dumpstate(cpu_env, arg); /* CHERI: dump reg state */
-#if !defined(TARGET_CHERI)
-        switch (sel) {
-        case CP0_REG26__ERRCTL:
-            gen_helper_mtc0_errctl(cpu_env, arg);
-            ctx->base.is_jmp = DISAS_STOP;
-            register_name = "ErrCtl";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-#endif /* TARGET_CHERI */
-        break;
-    case CP0_REGISTER_27:
-        switch (sel) {
-        case CP0_REG27__CACHERR:
-            /* ignored */
-            register_name = "CacheErr";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_28:
-        switch (sel) {
-        case CP0_REG28__TAGLO:
-        case CP0_REG28__TAGLO1:
-        case CP0_REG28__TAGLO2:
-        case CP0_REG28__TAGLO3:
-            gen_helper_mtc0_taglo(cpu_env, arg);
-            register_name = "TagLo";
-            break;
-        case CP0_REG28__DATALO:
-        case CP0_REG28__DATALO1:
-        case CP0_REG28__DATALO2:
-        case CP0_REG28__DATALO3:
-            gen_helper_mtc0_datalo(cpu_env, arg);
-            register_name = "DataLo";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_29:
-        switch (sel) {
-        case CP0_REG29__TAGHI:
-        case CP0_REG29__TAGHI1:
-        case CP0_REG29__TAGHI2:
-        case CP0_REG29__TAGHI3:
-            gen_helper_mtc0_taghi(cpu_env, arg);
-            register_name = "TagHi";
-            break;
-        case CP0_REG29__DATAHI:
-        case CP0_REG29__DATAHI1:
-        case CP0_REG29__DATAHI2:
-        case CP0_REG29__DATAHI3:
-            gen_helper_mtc0_datahi(cpu_env, arg);
-            register_name = "DataHi";
-            break;
-        default:
-            register_name = "invalid sel";
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_30:
-        switch (sel) {
-        case CP0_REG30__ERROREPC:
-#ifdef TARGET_CHERI
-            gen_helper_mtc0_error_epc(cpu_env, arg);
-#else
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_ErrorEPC));
-#endif
-            register_name = "ErrorEPC";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    case CP0_REGISTER_31:
-        switch (sel) {
-        case CP0_REG31__DESAVE:
-            /* EJTAG support */
-            gen_mtc0_store32(ctx, arg, reg, sel,
-                             offsetof(CPUMIPSState, CP0_DESAVE));
-            register_name = "DESAVE";
-            break;
-        case CP0_REG31__KSCRATCH1:
-        case CP0_REG31__KSCRATCH2:
-        case CP0_REG31__KSCRATCH3:
-        case CP0_REG31__KSCRATCH4:
-        case CP0_REG31__KSCRATCH5:
-        case CP0_REG31__KSCRATCH6:
-            CP0_CHECK(ctx->kscrexist & (1 << sel));
-            gen_mtc0_store(ctx, arg, reg, sel,
-                           offsetof(CPUMIPSState, CP0_KScratch[sel - 2]));
-            register_name = "KScratch";
-            break;
-        default:
-            goto cp0_unimplemented;
-        }
-        break;
-    default:
-        goto cp0_unimplemented;
-    }
-    trace_mips_translate_c0("dmtc0", register_name, reg, sel);
-
-    /* For simplicity assume that all writes can cause interrupts.  */
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        /*
-         * DISAS_STOP isn't sufficient, we need to ensure we break out of
-         * translated code to check for pending interrupts.
-         */
-        gen_save_pc(ctx->base.pc_next + 4);
-        ctx->base.is_jmp = DISAS_EXIT;
-    }
-    return;
-
-cp0_unimplemented:
-    qemu_log_mask(LOG_UNIMP, "dmtc0 %s (reg %d sel %d)\n",
-                  register_name, reg, sel);
-}
-#endif /* TARGET_MIPS64 */
-
-static void gen_mftr(CPUMIPSState *env, DisasContext *ctx, int rt, int rd,
-                     int u, int sel, int h)
-{
-    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    TCGv t0 = tcg_temp_local_new();
-
-    if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
-        ((env->tcs[other_tc].CP0_TCBind & (0xf << CP0TCBd_CurVPE)) !=
-         (env->active_tc.CP0_TCBind & (0xf << CP0TCBd_CurVPE)))) {
-        tcg_gen_movi_tl(t0, -1);
-    } else if ((env->CP0_VPEControl & (0xff << CP0VPECo_TargTC)) >
-               (env->mvp->CP0_MVPConf0 & (0xff << CP0MVPC0_PTC))) {
-        tcg_gen_movi_tl(t0, -1);
-    } else if (u == 0) {
-        switch (rt) {
-        case 1:
-            switch (sel) {
-            case 1:
-                gen_helper_mftc0_vpecontrol(t0, cpu_env);
-                break;
-            case 2:
-                gen_helper_mftc0_vpeconf0(t0, cpu_env);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 2:
-            switch (sel) {
-            case 1:
-                gen_helper_mftc0_tcstatus(t0, cpu_env);
-                break;
-            case 2:
-                gen_helper_mftc0_tcbind(t0, cpu_env);
-                break;
-            case 3:
-                gen_helper_mftc0_tcrestart(t0, cpu_env);
-                break;
-            case 4:
-                gen_helper_mftc0_tchalt(t0, cpu_env);
-                break;
-            case 5:
-                gen_helper_mftc0_tccontext(t0, cpu_env);
-                break;
-            case 6:
-                gen_helper_mftc0_tcschedule(t0, cpu_env);
-                break;
-            case 7:
-                gen_helper_mftc0_tcschefback(t0, cpu_env);
-                break;
-            default:
-                gen_mfc0(ctx, t0, rt, sel);
-                break;
-            }
-            break;
-        case 10:
-            switch (sel) {
-            case 0:
-                gen_helper_mftc0_entryhi(t0, cpu_env);
-                break;
-            default:
-                gen_mfc0(ctx, t0, rt, sel);
-                break;
-            }
-            break;
-        case 12:
-            switch (sel) {
-            case 0:
-                gen_helper_mftc0_status(t0, cpu_env);
-                break;
-            default:
-                gen_mfc0(ctx, t0, rt, sel);
-                break;
-            }
-            break;
-        case 13:
-            switch (sel) {
-            case 0:
-                gen_helper_mftc0_cause(t0, cpu_env);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 14:
-            switch (sel) {
-            case 0:
-                gen_helper_mftc0_epc(t0, cpu_env);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 15:
-            switch (sel) {
-            case 1:
-                gen_helper_mftc0_ebase(t0, cpu_env);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 16:
-            switch (sel) {
-            case 0:
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 5:
-            case 6:
-            case 7:
-                gen_helper_mftc0_configx(t0, cpu_env, tcg_const_tl(sel));
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 23:
-            switch (sel) {
-            case 0:
-                gen_helper_mftc0_debug(t0, cpu_env);
-                break;
-            default:
-                gen_mfc0(ctx, t0, rt, sel);
-                break;
-            }
-            break;
-        default:
-            gen_mfc0(ctx, t0, rt, sel);
-        }
-    } else {
-        switch (sel) {
-        /* GPR registers. */
-        case 0:
-            gen_helper_1e0i(mftgpr, t0, rt);
-            break;
-        /* Auxiliary CPU registers */
-        case 1:
-            switch (rt) {
-            case 0:
-                gen_helper_1e0i(mftlo, t0, 0);
-                break;
-            case 1:
-                gen_helper_1e0i(mfthi, t0, 0);
-                break;
-            case 2:
-                gen_helper_1e0i(mftacx, t0, 0);
-                break;
-            case 4:
-                gen_helper_1e0i(mftlo, t0, 1);
-                break;
-            case 5:
-                gen_helper_1e0i(mfthi, t0, 1);
-                break;
-            case 6:
-                gen_helper_1e0i(mftacx, t0, 1);
-                break;
-            case 8:
-                gen_helper_1e0i(mftlo, t0, 2);
-                break;
-            case 9:
-                gen_helper_1e0i(mfthi, t0, 2);
-                break;
-            case 10:
-                gen_helper_1e0i(mftacx, t0, 2);
-                break;
-            case 12:
-                gen_helper_1e0i(mftlo, t0, 3);
-                break;
-            case 13:
-                gen_helper_1e0i(mfthi, t0, 3);
-                break;
-            case 14:
-                gen_helper_1e0i(mftacx, t0, 3);
-                break;
-            case 16:
-                gen_helper_mftdsp(t0, cpu_env);
-                break;
-            default:
-                goto die;
-            }
-            break;
-        /* Floating point (COP1). */
-        case 2:
-            /* XXX: For now we support only a single FPU context. */
-            if (h == 0) {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                gen_load_fpr32(ctx, fp0, rt);
-                tcg_gen_ext_i32_tl(t0, fp0);
-                tcg_temp_free_i32(fp0);
-            } else {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                gen_load_fpr32h(ctx, fp0, rt);
-                tcg_gen_ext_i32_tl(t0, fp0);
-                tcg_temp_free_i32(fp0);
-            }
-            break;
-        case 3:
-            /* XXX: For now we support only a single FPU context. */
-            gen_helper_1e0i(cfc1, t0, rt);
-            break;
-        /* COP2: Not implemented. */
-        case 4:
-        case 5:
-            /* fall through */
-        default:
-            goto die;
-        }
-    }
-    trace_mips_translate_tr("mftr", rt, u, sel, h);
-    gen_store_gpr(t0, rd);
-    tcg_temp_free(t0);
-    return;
-
-die:
-    tcg_temp_free(t0);
-    LOG_DISAS("mftr (reg %d u %d sel %d h %d)\n", rt, u, sel, h);
-    gen_reserved_instruction(ctx);
-}
-
-static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
-                     int u, int sel, int h)
-{
-    int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    TCGv t0 = tcg_temp_local_new();
-
-    gen_load_gpr(t0, rt);
-    if ((env->CP0_VPEConf0 & (1 << CP0VPEC0_MVP)) == 0 &&
-        ((env->tcs[other_tc].CP0_TCBind & (0xf << CP0TCBd_CurVPE)) !=
-         (env->active_tc.CP0_TCBind & (0xf << CP0TCBd_CurVPE)))) {
-        /* NOP */
-        ;
-    } else if ((env->CP0_VPEControl & (0xff << CP0VPECo_TargTC)) >
-             (env->mvp->CP0_MVPConf0 & (0xff << CP0MVPC0_PTC))) {
-        /* NOP */
-        ;
-    } else if (u == 0) {
-        switch (rd) {
-        case 1:
-            switch (sel) {
-            case 1:
-                gen_helper_mttc0_vpecontrol(cpu_env, t0);
-                break;
-            case 2:
-                gen_helper_mttc0_vpeconf0(cpu_env, t0);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 2:
-            switch (sel) {
-            case 1:
-                gen_helper_mttc0_tcstatus(cpu_env, t0);
-                break;
-            case 2:
-                gen_helper_mttc0_tcbind(cpu_env, t0);
-                break;
-            case 3:
-                gen_helper_mttc0_tcrestart(cpu_env, t0);
-                break;
-            case 4:
-                gen_helper_mttc0_tchalt(cpu_env, t0);
-                break;
-            case 5:
-                gen_helper_mttc0_tccontext(cpu_env, t0);
-                break;
-            case 6:
-                gen_helper_mttc0_tcschedule(cpu_env, t0);
-                break;
-            case 7:
-                gen_helper_mttc0_tcschefback(cpu_env, t0);
-                break;
-            default:
-                gen_mtc0(ctx, t0, rd, sel);
-                break;
-            }
-            break;
-        case 10:
-            switch (sel) {
-            case 0:
-                gen_helper_mttc0_entryhi(cpu_env, t0);
-                break;
-            default:
-                gen_mtc0(ctx, t0, rd, sel);
-                break;
-            }
-            break;
-        case 12:
-            switch (sel) {
-            case 0:
-                gen_helper_mttc0_status(cpu_env, t0);
-                break;
-            default:
-                gen_mtc0(ctx, t0, rd, sel);
-                break;
-            }
-            break;
-        case 13:
-            switch (sel) {
-            case 0:
-                gen_helper_mttc0_cause(cpu_env, t0);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 15:
-            switch (sel) {
-            case 1:
-                gen_helper_mttc0_ebase(cpu_env, t0);
-                break;
-            default:
-                goto die;
-                break;
-            }
-            break;
-        case 23:
-            switch (sel) {
-            case 0:
-                gen_helper_mttc0_debug(cpu_env, t0);
-                break;
-            default:
-                gen_mtc0(ctx, t0, rd, sel);
-                break;
-            }
-            break;
-        default:
-            gen_mtc0(ctx, t0, rd, sel);
-        }
-    } else {
-        switch (sel) {
-        /* GPR registers. */
-        case 0:
-            gen_helper_0e1i(mttgpr, t0, rd);
-            break;
-        /* Auxiliary CPU registers */
-        case 1:
-            switch (rd) {
-            case 0:
-                gen_helper_0e1i(mttlo, t0, 0);
-                break;
-            case 1:
-                gen_helper_0e1i(mtthi, t0, 0);
-                break;
-            case 2:
-                gen_helper_0e1i(mttacx, t0, 0);
-                break;
-            case 4:
-                gen_helper_0e1i(mttlo, t0, 1);
-                break;
-            case 5:
-                gen_helper_0e1i(mtthi, t0, 1);
-                break;
-            case 6:
-                gen_helper_0e1i(mttacx, t0, 1);
-                break;
-            case 8:
-                gen_helper_0e1i(mttlo, t0, 2);
-                break;
-            case 9:
-                gen_helper_0e1i(mtthi, t0, 2);
-                break;
-            case 10:
-                gen_helper_0e1i(mttacx, t0, 2);
-                break;
-            case 12:
-                gen_helper_0e1i(mttlo, t0, 3);
-                break;
-            case 13:
-                gen_helper_0e1i(mtthi, t0, 3);
-                break;
-            case 14:
-                gen_helper_0e1i(mttacx, t0, 3);
-                break;
-            case 16:
-                gen_helper_mttdsp(cpu_env, t0);
-                break;
-            default:
-                goto die;
-            }
-            break;
-        /* Floating point (COP1). */
-        case 2:
-            /* XXX: For now we support only a single FPU context. */
-            if (h == 0) {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                tcg_gen_trunc_tl_i32(fp0, t0);
-                gen_store_fpr32(ctx, fp0, rd);
-                tcg_temp_free_i32(fp0);
-            } else {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                tcg_gen_trunc_tl_i32(fp0, t0);
-                gen_store_fpr32h(ctx, fp0, rd);
-                tcg_temp_free_i32(fp0);
-            }
-            break;
-        case 3:
-            /* XXX: For now we support only a single FPU context. */
-            {
-                TCGv_i32 fs_tmp = tcg_const_i32(rd);
-
-                gen_helper_0e2i(ctc1, t0, fs_tmp, rt);
-                tcg_temp_free_i32(fs_tmp);
-            }
-            /* Stop translation as we may have changed hflags */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        /* COP2: Not implemented. */
-        case 4:
-        case 5:
-            /* fall through */
-        default:
-            goto die;
-        }
-    }
-    trace_mips_translate_tr("mttr", rd, u, sel, h);
-    tcg_temp_free(t0);
-    return;
-
-die:
-    tcg_temp_free(t0);
-    LOG_DISAS("mttr (reg %d u %d sel %d h %d)\n", rd, u, sel, h);
-    gen_reserved_instruction(ctx);
-}
-
-static void gen_cp0(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
-                    int rt, int rd)
-{
-    const char *opn = "ldst";
-
-    check_cp0_enabled(ctx);
-    switch (opc) {
-    case OPC_MFC0:
-        if (rt == 0) {
-            /* Treat as NOP. */
-            return;
-        }
-        gen_mfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
-        gen_log_instr_gpr_update(ctx, rt);
-        opn = "mfc0";
-        break;
-    case OPC_MTC0:
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_mtc0(ctx, t0, rd, ctx->opcode & 0x7);
-            tcg_temp_free(t0);
-        }
-        opn = "mtc0";
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMFC0:
-        check_insn(ctx, ISA_MIPS3);
-        if (rt == 0) {
-            /* Treat as NOP. */
-            return;
-        }
-        gen_dmfc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
-        gen_log_instr_gpr_update(ctx, rt);
-        opn = "dmfc0";
-        break;
-    case OPC_DMTC0:
-        check_insn(ctx, ISA_MIPS3);
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_dmtc0(ctx, t0, rd, ctx->opcode & 0x7);
-            tcg_temp_free(t0);
-        }
-        opn = "dmtc0";
-        break;
-#endif
-    case OPC_MFHC0:
-        check_mvh(ctx);
-        if (rt == 0) {
-            /* Treat as NOP. */
-            return;
-        }
-        gen_mfhc0(ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
-        gen_log_instr_gpr_update(ctx, rt);
-        opn = "mfhc0";
-        break;
-    case OPC_MTHC0:
-        check_mvh(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-            gen_load_gpr(t0, rt);
-            gen_mthc0(ctx, t0, rd, ctx->opcode & 0x7);
-            tcg_temp_free(t0);
-        }
-        opn = "mthc0";
-        break;
-    case OPC_MFTR:
-        check_cp0_enabled(ctx);
-        if (rd == 0) {
-            /* Treat as NOP. */
-            return;
-        }
-        gen_mftr(env, ctx, rt, rd, (ctx->opcode >> 5) & 1,
-                 ctx->opcode & 0x7, (ctx->opcode >> 4) & 1);
-        opn = "mftr";
-        break;
-    case OPC_MTTR:
-        check_cp0_enabled(ctx);
-        gen_mttr(env, ctx, rd, rt, (ctx->opcode >> 5) & 1,
-                 ctx->opcode & 0x7, (ctx->opcode >> 4) & 1);
-        opn = "mttr";
-        break;
-    case OPC_TLBWI:
-        opn = "tlbwi";
-        if (!env->tlb->helper_tlbwi) {
-            goto die;
-        }
-        gen_helper_tlbwi(cpu_env);
-        break;
-    case OPC_TLBINV:
-        opn = "tlbinv";
-        if (ctx->ie >= 2) {
-            if (!env->tlb->helper_tlbinv) {
-                goto die;
-            }
-            gen_helper_tlbinv(cpu_env);
-        } /* treat as nop if TLBINV not supported */
-        break;
-    case OPC_TLBINVF:
-        opn = "tlbinvf";
-        if (ctx->ie >= 2) {
-            if (!env->tlb->helper_tlbinvf) {
-                goto die;
-            }
-            gen_helper_tlbinvf(cpu_env);
-        } /* treat as nop if TLBINV not supported */
-        break;
-    case OPC_TLBWR:
-        opn = "tlbwr";
-        if (!env->tlb->helper_tlbwr) {
-            goto die;
-        }
-        gen_helper_tlbwr(cpu_env);
-        break;
-    case OPC_TLBP:
-        opn = "tlbp";
-        if (!env->tlb->helper_tlbp) {
-            goto die;
-        }
-        gen_helper_tlbp(cpu_env);
-        break;
-    case OPC_TLBR:
-        opn = "tlbr";
-        if (!env->tlb->helper_tlbr) {
-            goto die;
-        }
-        gen_helper_tlbr(cpu_env);
-        break;
-    case OPC_ERET: /* OPC_ERETNC */
-        if ((ctx->insn_flags & ISA_MIPS_R6) &&
-            (ctx->hflags & MIPS_HFLAG_BMASK)) {
-            goto die;
-        } else {
-            int bit_shift = (ctx->hflags & MIPS_HFLAG_M16) ? 16 : 6;
-            gen_save_pc(ctx->base.pc_next);
-            if (ctx->opcode & (1 << bit_shift)) {
-                /* OPC_ERETNC */
-                opn = "eretnc";
-                check_insn(ctx, ISA_MIPS_R5);
-                gen_helper_eretnc(cpu_env);
-            } else {
-                /* OPC_ERET */
-                opn = "eret";
-                check_insn(ctx, ISA_MIPS2);
-                gen_helper_eret(cpu_env);
-            }
-            ctx->base.is_jmp = DISAS_EXIT;
-        }
-        break;
-    case OPC_DERET:
-        opn = "deret";
-        check_insn(ctx, ISA_MIPS_R1);
-        if ((ctx->insn_flags & ISA_MIPS_R6) &&
-            (ctx->hflags & MIPS_HFLAG_BMASK)) {
-            goto die;
-        }
-        if (!(ctx->hflags & MIPS_HFLAG_DM)) {
-            MIPS_INVAL(opn);
-            gen_reserved_instruction(ctx);
-        } else {
-            gen_helper_deret(cpu_env);
-            ctx->base.is_jmp = DISAS_EXIT;
-        }
-        break;
-    case OPC_WAIT:
-        opn = "wait";
-        check_insn(ctx, ISA_MIPS3 | ISA_MIPS_R1);
-        if ((ctx->insn_flags & ISA_MIPS_R6) &&
-            (ctx->hflags & MIPS_HFLAG_BMASK)) {
-            goto die;
-        }
-        /* If we get an exception, we want to restart at next instruction */
-        ctx->base.pc_next += 4;
-        save_cpu_state(ctx, 1);
-        ctx->base.pc_next -= 4;
-        gen_helper_wait(cpu_env);
-        ctx->base.is_jmp = DISAS_NORETURN;
-        break;
-    default:
- die:
-        MIPS_INVAL(opn);
-        gen_reserved_instruction(ctx);
-        return;
-    }
-    (void)opn; /* avoid a compiler warning */
-}
-#endif /* !CONFIG_USER_ONLY */
-
-/* CP1 Branches (before delay slot) */
-static void gen_compute_branch1(DisasContext *ctx, uint32_t op,
-                                int32_t cc, int32_t offset)
-{
-    target_ulong btarget;
-    TCGv_i32 t0 = tcg_temp_new_i32();
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    if (cc != 0) {
-        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1);
-    }
-
-    btarget = ctx->base.pc_next + 4 + offset;
-
-    switch (op) {
-    case OPC_BC1F:
-        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-        tcg_gen_not_i32(t0, t0);
-        tcg_gen_andi_i32(t0, t0, 1);
-        tcg_gen_extu_i32_tl(bcond, t0);
-        goto not_likely;
-    case OPC_BC1FL:
-        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-        tcg_gen_not_i32(t0, t0);
-        tcg_gen_andi_i32(t0, t0, 1);
-        tcg_gen_extu_i32_tl(bcond, t0);
-        goto likely;
-    case OPC_BC1T:
-        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-        tcg_gen_andi_i32(t0, t0, 1);
-        tcg_gen_extu_i32_tl(bcond, t0);
-        goto not_likely;
-    case OPC_BC1TL:
-        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-        tcg_gen_andi_i32(t0, t0, 1);
-        tcg_gen_extu_i32_tl(bcond, t0);
-    likely:
-        ctx->hflags |= MIPS_HFLAG_BL;
-        break;
-    case OPC_BC1FANY2:
-        {
-            TCGv_i32 t1 = tcg_temp_new_i32();
-            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
-            tcg_gen_nand_i32(t0, t0, t1);
-            tcg_temp_free_i32(t1);
-            tcg_gen_andi_i32(t0, t0, 1);
-            tcg_gen_extu_i32_tl(bcond, t0);
-        }
-        goto not_likely;
-    case OPC_BC1TANY2:
-        {
-            TCGv_i32 t1 = tcg_temp_new_i32();
-            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
-            tcg_gen_or_i32(t0, t0, t1);
-            tcg_temp_free_i32(t1);
-            tcg_gen_andi_i32(t0, t0, 1);
-            tcg_gen_extu_i32_tl(bcond, t0);
-        }
-        goto not_likely;
-    case OPC_BC1FANY4:
-        {
-            TCGv_i32 t1 = tcg_temp_new_i32();
-            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
-            tcg_gen_and_i32(t0, t0, t1);
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 2));
-            tcg_gen_and_i32(t0, t0, t1);
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 3));
-            tcg_gen_nand_i32(t0, t0, t1);
-            tcg_temp_free_i32(t1);
-            tcg_gen_andi_i32(t0, t0, 1);
-            tcg_gen_extu_i32_tl(bcond, t0);
-        }
-        goto not_likely;
-    case OPC_BC1TANY4:
-        {
-            TCGv_i32 t1 = tcg_temp_new_i32();
-            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 1));
-            tcg_gen_or_i32(t0, t0, t1);
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 2));
-            tcg_gen_or_i32(t0, t0, t1);
-            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc + 3));
-            tcg_gen_or_i32(t0, t0, t1);
-            tcg_temp_free_i32(t1);
-            tcg_gen_andi_i32(t0, t0, 1);
-            tcg_gen_extu_i32_tl(bcond, t0);
-        }
-    not_likely:
-        ctx->hflags |= MIPS_HFLAG_BC;
-        break;
-    default:
-        MIPS_INVAL("cp1 cond branch");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-    // Check that the conditional branch target is in range (but only if the
-    // branch is taken).
-    gen_check_cond_branch_target(ctx, bcond, btarget);
-
-    ctx->btarget = btarget;
-    ctx->hflags |= MIPS_HFLAG_BDS32;
- out:
-    tcg_temp_free_i32(t0);
-}
-
-/* R6 CP1 Branches */
-static void gen_compute_branch1_r6(DisasContext *ctx, uint32_t op,
-                                   int32_t ft, int32_t offset,
-                                   int delayslot_size)
-{
-    target_ulong btarget;
-    TCGv_i64 t0 = tcg_temp_new_i64();
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-#ifdef MIPS_DEBUG_DISAS
-        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
-                  "\n", ctx->base.pc_next);
-#endif
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    gen_load_fpr64(ctx, t0, ft);
-    tcg_gen_andi_i64(t0, t0, 1);
-
-    btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-
-    switch (op) {
-    case OPC_BC1EQZ:
-        tcg_gen_xori_i64(t0, t0, 1);
-        ctx->hflags |= MIPS_HFLAG_BC;
-        break;
-    case OPC_BC1NEZ:
-        /* t0 already set */
-        ctx->hflags |= MIPS_HFLAG_BC;
-        break;
-    default:
-        MIPS_INVAL("cp1 cond branch");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    tcg_gen_trunc_i64_tl(bcond, t0);
-
-    // Check that the conditional branch target is in range (but only if the
-    // branch is taken)
-    gen_check_cond_branch_target(ctx, bcond, btarget);
-    ctx->btarget = btarget;
-
-    switch (delayslot_size) {
-    case 2:
-        ctx->hflags |= MIPS_HFLAG_BDS16;
-        break;
-    case 4:
-        ctx->hflags |= MIPS_HFLAG_BDS32;
-        break;
-    }
-
-out:
-    tcg_temp_free_i64(t0);
-}
-
-/* Coprocessor 1 (FPU) */
-
-#define FOP(func, fmt) (((fmt) << 21) | (func))
-
-enum fopcode {
-    OPC_ADD_S = FOP(0, FMT_S),
-    OPC_SUB_S = FOP(1, FMT_S),
-    OPC_MUL_S = FOP(2, FMT_S),
-    OPC_DIV_S = FOP(3, FMT_S),
-    OPC_SQRT_S = FOP(4, FMT_S),
-    OPC_ABS_S = FOP(5, FMT_S),
-    OPC_MOV_S = FOP(6, FMT_S),
-    OPC_NEG_S = FOP(7, FMT_S),
-    OPC_ROUND_L_S = FOP(8, FMT_S),
-    OPC_TRUNC_L_S = FOP(9, FMT_S),
-    OPC_CEIL_L_S = FOP(10, FMT_S),
-    OPC_FLOOR_L_S = FOP(11, FMT_S),
-    OPC_ROUND_W_S = FOP(12, FMT_S),
-    OPC_TRUNC_W_S = FOP(13, FMT_S),
-    OPC_CEIL_W_S = FOP(14, FMT_S),
-    OPC_FLOOR_W_S = FOP(15, FMT_S),
-    OPC_SEL_S = FOP(16, FMT_S),
-    OPC_MOVCF_S = FOP(17, FMT_S),
-    OPC_MOVZ_S = FOP(18, FMT_S),
-    OPC_MOVN_S = FOP(19, FMT_S),
-    OPC_SELEQZ_S = FOP(20, FMT_S),
-    OPC_RECIP_S = FOP(21, FMT_S),
-    OPC_RSQRT_S = FOP(22, FMT_S),
-    OPC_SELNEZ_S = FOP(23, FMT_S),
-    OPC_MADDF_S = FOP(24, FMT_S),
-    OPC_MSUBF_S = FOP(25, FMT_S),
-    OPC_RINT_S = FOP(26, FMT_S),
-    OPC_CLASS_S = FOP(27, FMT_S),
-    OPC_MIN_S = FOP(28, FMT_S),
-    OPC_RECIP2_S = FOP(28, FMT_S),
-    OPC_MINA_S = FOP(29, FMT_S),
-    OPC_RECIP1_S = FOP(29, FMT_S),
-    OPC_MAX_S = FOP(30, FMT_S),
-    OPC_RSQRT1_S = FOP(30, FMT_S),
-    OPC_MAXA_S = FOP(31, FMT_S),
-    OPC_RSQRT2_S = FOP(31, FMT_S),
-    OPC_CVT_D_S = FOP(33, FMT_S),
-    OPC_CVT_W_S = FOP(36, FMT_S),
-    OPC_CVT_L_S = FOP(37, FMT_S),
-    OPC_CVT_PS_S = FOP(38, FMT_S),
-    OPC_CMP_F_S = FOP(48, FMT_S),
-    OPC_CMP_UN_S = FOP(49, FMT_S),
-    OPC_CMP_EQ_S = FOP(50, FMT_S),
-    OPC_CMP_UEQ_S = FOP(51, FMT_S),
-    OPC_CMP_OLT_S = FOP(52, FMT_S),
-    OPC_CMP_ULT_S = FOP(53, FMT_S),
-    OPC_CMP_OLE_S = FOP(54, FMT_S),
-    OPC_CMP_ULE_S = FOP(55, FMT_S),
-    OPC_CMP_SF_S = FOP(56, FMT_S),
-    OPC_CMP_NGLE_S = FOP(57, FMT_S),
-    OPC_CMP_SEQ_S = FOP(58, FMT_S),
-    OPC_CMP_NGL_S = FOP(59, FMT_S),
-    OPC_CMP_LT_S = FOP(60, FMT_S),
-    OPC_CMP_NGE_S = FOP(61, FMT_S),
-    OPC_CMP_LE_S = FOP(62, FMT_S),
-    OPC_CMP_NGT_S = FOP(63, FMT_S),
-
-    OPC_ADD_D = FOP(0, FMT_D),
-    OPC_SUB_D = FOP(1, FMT_D),
-    OPC_MUL_D = FOP(2, FMT_D),
-    OPC_DIV_D = FOP(3, FMT_D),
-    OPC_SQRT_D = FOP(4, FMT_D),
-    OPC_ABS_D = FOP(5, FMT_D),
-    OPC_MOV_D = FOP(6, FMT_D),
-    OPC_NEG_D = FOP(7, FMT_D),
-    OPC_ROUND_L_D = FOP(8, FMT_D),
-    OPC_TRUNC_L_D = FOP(9, FMT_D),
-    OPC_CEIL_L_D = FOP(10, FMT_D),
-    OPC_FLOOR_L_D = FOP(11, FMT_D),
-    OPC_ROUND_W_D = FOP(12, FMT_D),
-    OPC_TRUNC_W_D = FOP(13, FMT_D),
-    OPC_CEIL_W_D = FOP(14, FMT_D),
-    OPC_FLOOR_W_D = FOP(15, FMT_D),
-    OPC_SEL_D = FOP(16, FMT_D),
-    OPC_MOVCF_D = FOP(17, FMT_D),
-    OPC_MOVZ_D = FOP(18, FMT_D),
-    OPC_MOVN_D = FOP(19, FMT_D),
-    OPC_SELEQZ_D = FOP(20, FMT_D),
-    OPC_RECIP_D = FOP(21, FMT_D),
-    OPC_RSQRT_D = FOP(22, FMT_D),
-    OPC_SELNEZ_D = FOP(23, FMT_D),
-    OPC_MADDF_D = FOP(24, FMT_D),
-    OPC_MSUBF_D = FOP(25, FMT_D),
-    OPC_RINT_D = FOP(26, FMT_D),
-    OPC_CLASS_D = FOP(27, FMT_D),
-    OPC_MIN_D = FOP(28, FMT_D),
-    OPC_RECIP2_D = FOP(28, FMT_D),
-    OPC_MINA_D = FOP(29, FMT_D),
-    OPC_RECIP1_D = FOP(29, FMT_D),
-    OPC_MAX_D = FOP(30, FMT_D),
-    OPC_RSQRT1_D = FOP(30, FMT_D),
-    OPC_MAXA_D = FOP(31, FMT_D),
-    OPC_RSQRT2_D = FOP(31, FMT_D),
-    OPC_CVT_S_D = FOP(32, FMT_D),
-    OPC_CVT_W_D = FOP(36, FMT_D),
-    OPC_CVT_L_D = FOP(37, FMT_D),
-    OPC_CMP_F_D = FOP(48, FMT_D),
-    OPC_CMP_UN_D = FOP(49, FMT_D),
-    OPC_CMP_EQ_D = FOP(50, FMT_D),
-    OPC_CMP_UEQ_D = FOP(51, FMT_D),
-    OPC_CMP_OLT_D = FOP(52, FMT_D),
-    OPC_CMP_ULT_D = FOP(53, FMT_D),
-    OPC_CMP_OLE_D = FOP(54, FMT_D),
-    OPC_CMP_ULE_D = FOP(55, FMT_D),
-    OPC_CMP_SF_D = FOP(56, FMT_D),
-    OPC_CMP_NGLE_D = FOP(57, FMT_D),
-    OPC_CMP_SEQ_D = FOP(58, FMT_D),
-    OPC_CMP_NGL_D = FOP(59, FMT_D),
-    OPC_CMP_LT_D = FOP(60, FMT_D),
-    OPC_CMP_NGE_D = FOP(61, FMT_D),
-    OPC_CMP_LE_D = FOP(62, FMT_D),
-    OPC_CMP_NGT_D = FOP(63, FMT_D),
-
-    OPC_CVT_S_W = FOP(32, FMT_W),
-    OPC_CVT_D_W = FOP(33, FMT_W),
-    OPC_CVT_S_L = FOP(32, FMT_L),
-    OPC_CVT_D_L = FOP(33, FMT_L),
-    OPC_CVT_PS_PW = FOP(38, FMT_W),
-
-    OPC_ADD_PS = FOP(0, FMT_PS),
-    OPC_SUB_PS = FOP(1, FMT_PS),
-    OPC_MUL_PS = FOP(2, FMT_PS),
-    OPC_DIV_PS = FOP(3, FMT_PS),
-    OPC_ABS_PS = FOP(5, FMT_PS),
-    OPC_MOV_PS = FOP(6, FMT_PS),
-    OPC_NEG_PS = FOP(7, FMT_PS),
-    OPC_MOVCF_PS = FOP(17, FMT_PS),
-    OPC_MOVZ_PS = FOP(18, FMT_PS),
-    OPC_MOVN_PS = FOP(19, FMT_PS),
-    OPC_ADDR_PS = FOP(24, FMT_PS),
-    OPC_MULR_PS = FOP(26, FMT_PS),
-    OPC_RECIP2_PS = FOP(28, FMT_PS),
-    OPC_RECIP1_PS = FOP(29, FMT_PS),
-    OPC_RSQRT1_PS = FOP(30, FMT_PS),
-    OPC_RSQRT2_PS = FOP(31, FMT_PS),
-
-    OPC_CVT_S_PU = FOP(32, FMT_PS),
-    OPC_CVT_PW_PS = FOP(36, FMT_PS),
-    OPC_CVT_S_PL = FOP(40, FMT_PS),
-    OPC_PLL_PS = FOP(44, FMT_PS),
-    OPC_PLU_PS = FOP(45, FMT_PS),
-    OPC_PUL_PS = FOP(46, FMT_PS),
-    OPC_PUU_PS = FOP(47, FMT_PS),
-    OPC_CMP_F_PS = FOP(48, FMT_PS),
-    OPC_CMP_UN_PS = FOP(49, FMT_PS),
-    OPC_CMP_EQ_PS = FOP(50, FMT_PS),
-    OPC_CMP_UEQ_PS = FOP(51, FMT_PS),
-    OPC_CMP_OLT_PS = FOP(52, FMT_PS),
-    OPC_CMP_ULT_PS = FOP(53, FMT_PS),
-    OPC_CMP_OLE_PS = FOP(54, FMT_PS),
-    OPC_CMP_ULE_PS = FOP(55, FMT_PS),
-    OPC_CMP_SF_PS = FOP(56, FMT_PS),
-    OPC_CMP_NGLE_PS = FOP(57, FMT_PS),
-    OPC_CMP_SEQ_PS = FOP(58, FMT_PS),
-    OPC_CMP_NGL_PS = FOP(59, FMT_PS),
-    OPC_CMP_LT_PS = FOP(60, FMT_PS),
-    OPC_CMP_NGE_PS = FOP(61, FMT_PS),
-    OPC_CMP_LE_PS = FOP(62, FMT_PS),
-    OPC_CMP_NGT_PS = FOP(63, FMT_PS),
-};
-
-enum r6_f_cmp_op {
-    R6_OPC_CMP_AF_S   = FOP(0, FMT_W),
-    R6_OPC_CMP_UN_S   = FOP(1, FMT_W),
-    R6_OPC_CMP_EQ_S   = FOP(2, FMT_W),
-    R6_OPC_CMP_UEQ_S  = FOP(3, FMT_W),
-    R6_OPC_CMP_LT_S   = FOP(4, FMT_W),
-    R6_OPC_CMP_ULT_S  = FOP(5, FMT_W),
-    R6_OPC_CMP_LE_S   = FOP(6, FMT_W),
-    R6_OPC_CMP_ULE_S  = FOP(7, FMT_W),
-    R6_OPC_CMP_SAF_S  = FOP(8, FMT_W),
-    R6_OPC_CMP_SUN_S  = FOP(9, FMT_W),
-    R6_OPC_CMP_SEQ_S  = FOP(10, FMT_W),
-    R6_OPC_CMP_SEUQ_S = FOP(11, FMT_W),
-    R6_OPC_CMP_SLT_S  = FOP(12, FMT_W),
-    R6_OPC_CMP_SULT_S = FOP(13, FMT_W),
-    R6_OPC_CMP_SLE_S  = FOP(14, FMT_W),
-    R6_OPC_CMP_SULE_S = FOP(15, FMT_W),
-    R6_OPC_CMP_OR_S   = FOP(17, FMT_W),
-    R6_OPC_CMP_UNE_S  = FOP(18, FMT_W),
-    R6_OPC_CMP_NE_S   = FOP(19, FMT_W),
-    R6_OPC_CMP_SOR_S  = FOP(25, FMT_W),
-    R6_OPC_CMP_SUNE_S = FOP(26, FMT_W),
-    R6_OPC_CMP_SNE_S  = FOP(27, FMT_W),
-
-    R6_OPC_CMP_AF_D   = FOP(0, FMT_L),
-    R6_OPC_CMP_UN_D   = FOP(1, FMT_L),
-    R6_OPC_CMP_EQ_D   = FOP(2, FMT_L),
-    R6_OPC_CMP_UEQ_D  = FOP(3, FMT_L),
-    R6_OPC_CMP_LT_D   = FOP(4, FMT_L),
-    R6_OPC_CMP_ULT_D  = FOP(5, FMT_L),
-    R6_OPC_CMP_LE_D   = FOP(6, FMT_L),
-    R6_OPC_CMP_ULE_D  = FOP(7, FMT_L),
-    R6_OPC_CMP_SAF_D  = FOP(8, FMT_L),
-    R6_OPC_CMP_SUN_D  = FOP(9, FMT_L),
-    R6_OPC_CMP_SEQ_D  = FOP(10, FMT_L),
-    R6_OPC_CMP_SEUQ_D = FOP(11, FMT_L),
-    R6_OPC_CMP_SLT_D  = FOP(12, FMT_L),
-    R6_OPC_CMP_SULT_D = FOP(13, FMT_L),
-    R6_OPC_CMP_SLE_D  = FOP(14, FMT_L),
-    R6_OPC_CMP_SULE_D = FOP(15, FMT_L),
-    R6_OPC_CMP_OR_D   = FOP(17, FMT_L),
-    R6_OPC_CMP_UNE_D  = FOP(18, FMT_L),
-    R6_OPC_CMP_NE_D   = FOP(19, FMT_L),
-    R6_OPC_CMP_SOR_D  = FOP(25, FMT_L),
-    R6_OPC_CMP_SUNE_D = FOP(26, FMT_L),
-    R6_OPC_CMP_SNE_D  = FOP(27, FMT_L),
-};
-
-static void gen_cp1(DisasContext *ctx, uint32_t opc, int rt, int fs)
-{
-    TCGv t0 = tcg_temp_new();
-
-    switch (opc) {
-    case OPC_MFC1:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            tcg_gen_ext_i32_tl(t0, fp0);
-            tcg_temp_free_i32(fp0);
-        }
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_MTC1:
-        gen_load_gpr(t0, rt);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(ctx, fp0, fs);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CFC1:
-        gen_helper_1e0i(cfc1, t0, fs);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_CTC1:
-        gen_load_gpr(t0, rt);
-        save_cpu_state(ctx, 0);
-        {
-            TCGv_i32 fs_tmp = tcg_const_i32(fs);
-
-            gen_helper_0e2i(ctc1, t0, fs_tmp, rt);
-            tcg_temp_free_i32(fs_tmp);
-        }
-        /* Stop translation as we may have changed hflags */
-        ctx->base.is_jmp = DISAS_STOP;
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMFC1:
-        gen_load_fpr64(ctx, t0, fs);
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_DMTC1:
-        gen_load_gpr(t0, rt);
-        gen_store_fpr64(ctx, t0, fs);
-        break;
-#endif
-    case OPC_MFHC1:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32h(ctx, fp0, fs);
-            tcg_gen_ext_i32_tl(t0, fp0);
-            tcg_temp_free_i32(fp0);
-        }
-        gen_store_gpr(t0, rt);
-        break;
-    case OPC_MTHC1:
-        gen_load_gpr(t0, rt);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32h(ctx, fp0, fs);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    default:
-        MIPS_INVAL("cp1 move");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
- out:
-    tcg_temp_free(t0);
-}
-
-static void gen_movci(DisasContext *ctx, int rd, int rs, int cc, int tf)
-{
-    TCGLabel *l1;
-    TCGCond cond;
-    TCGv_i32 t0;
-
-    if (rd == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    if (tf) {
-        cond = TCG_COND_EQ;
-    } else {
-        cond = TCG_COND_NE;
-    }
-
-    l1 = gen_new_label();
-    t0 = tcg_temp_new_i32();
-    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
-    tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    tcg_temp_free_i32(t0);
-    gen_load_gpr(cpu_gpr[rd], rs);
-    gen_log_instr_gpr_update(ctx, rd);
-    gen_set_label(l1);
-}
-
-static inline void gen_movcf_s(DisasContext *ctx, int fs, int fd, int cc,
-                               int tf)
-{
-    int cond;
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGLabel *l1 = gen_new_label();
-
-    if (tf) {
-        cond = TCG_COND_EQ;
-    } else {
-        cond = TCG_COND_NE;
-    }
-
-    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
-    tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(ctx, t0, fs);
-    gen_store_fpr32(ctx, t0, fd);
-    gen_set_label(l1);
-    tcg_temp_free_i32(t0);
-}
-
-static inline void gen_movcf_d(DisasContext *ctx, int fs, int fd, int cc,
-                               int tf)
-{
-    int cond;
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i64 fp0;
-    TCGLabel *l1 = gen_new_label();
-
-    if (tf) {
-        cond = TCG_COND_EQ;
-    } else {
-        cond = TCG_COND_NE;
-    }
-
-    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
-    tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    tcg_temp_free_i32(t0);
-    fp0 = tcg_temp_new_i64();
-    gen_load_fpr64(ctx, fp0, fs);
-    gen_store_fpr64(ctx, fp0, fd);
-    tcg_temp_free_i64(fp0);
-    gen_set_label(l1);
-}
-
-static inline void gen_movcf_ps(DisasContext *ctx, int fs, int fd,
-                                int cc, int tf)
-{
-    int cond;
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-
-    if (tf) {
-        cond = TCG_COND_EQ;
-    } else {
-        cond = TCG_COND_NE;
-    }
-
-    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc));
-    tcg_gen_brcondi_i32(cond, t0, 0, l1);
-    gen_load_fpr32(ctx, t0, fs);
-    gen_store_fpr32(ctx, t0, fd);
-    gen_set_label(l1);
-
-    tcg_gen_andi_i32(t0, fpu_fcr31, 1 << get_fp_bit(cc + 1));
-    tcg_gen_brcondi_i32(cond, t0, 0, l2);
-    gen_load_fpr32h(ctx, t0, fs);
-    gen_store_fpr32h(ctx, t0, fd);
-    tcg_temp_free_i32(t0);
-    gen_set_label(l2);
-}
-
-static void gen_sel_s(DisasContext *ctx, enum fopcode op1, int fd, int ft,
-                      int fs)
-{
-    TCGv_i32 t1 = tcg_const_i32(0);
-    TCGv_i32 fp0 = tcg_temp_new_i32();
-    TCGv_i32 fp1 = tcg_temp_new_i32();
-    TCGv_i32 fp2 = tcg_temp_new_i32();
-    gen_load_fpr32(ctx, fp0, fd);
-    gen_load_fpr32(ctx, fp1, ft);
-    gen_load_fpr32(ctx, fp2, fs);
-
-    switch (op1) {
-    case OPC_SEL_S:
-        tcg_gen_andi_i32(fp0, fp0, 1);
-        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
-        break;
-    case OPC_SELEQZ_S:
-        tcg_gen_andi_i32(fp1, fp1, 1);
-        tcg_gen_movcond_i32(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
-        break;
-    case OPC_SELNEZ_S:
-        tcg_gen_andi_i32(fp1, fp1, 1);
-        tcg_gen_movcond_i32(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
-        break;
-    default:
-        MIPS_INVAL("gen_sel_s");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    gen_store_fpr32(ctx, fp0, fd);
-    tcg_temp_free_i32(fp2);
-    tcg_temp_free_i32(fp1);
-    tcg_temp_free_i32(fp0);
-    tcg_temp_free_i32(t1);
-}
-
-static void gen_sel_d(DisasContext *ctx, enum fopcode op1, int fd, int ft,
-                      int fs)
-{
-    TCGv_i64 t1 = tcg_const_i64(0);
-    TCGv_i64 fp0 = tcg_temp_new_i64();
-    TCGv_i64 fp1 = tcg_temp_new_i64();
-    TCGv_i64 fp2 = tcg_temp_new_i64();
-    gen_load_fpr64(ctx, fp0, fd);
-    gen_load_fpr64(ctx, fp1, ft);
-    gen_load_fpr64(ctx, fp2, fs);
-
-    switch (op1) {
-    case OPC_SEL_D:
-        tcg_gen_andi_i64(fp0, fp0, 1);
-        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp0, t1, fp1, fp2);
-        break;
-    case OPC_SELEQZ_D:
-        tcg_gen_andi_i64(fp1, fp1, 1);
-        tcg_gen_movcond_i64(TCG_COND_EQ, fp0, fp1, t1, fp2, t1);
-        break;
-    case OPC_SELNEZ_D:
-        tcg_gen_andi_i64(fp1, fp1, 1);
-        tcg_gen_movcond_i64(TCG_COND_NE, fp0, fp1, t1, fp2, t1);
-        break;
-    default:
-        MIPS_INVAL("gen_sel_d");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    gen_store_fpr64(ctx, fp0, fd);
-    tcg_temp_free_i64(fp2);
-    tcg_temp_free_i64(fp1);
-    tcg_temp_free_i64(fp0);
-    tcg_temp_free_i64(t1);
-}
-
-static void gen_farith(DisasContext *ctx, enum fopcode op1,
-                       int ft, int fs, int fd, int cc)
-{
-    uint32_t func = ctx->opcode & 0x3f;
-    switch (op1) {
-    case OPC_ADD_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_SUB_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_MUL_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_DIV_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_SQRT_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_ABS_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->abs2008) {
-                tcg_gen_andi_i32(fp0, fp0, 0x7fffffffUL);
-            } else {
-                gen_helper_float_abs_s(fp0, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_MOV_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_NEG_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->abs2008) {
-                tcg_gen_xori_i32(fp0, fp0, 1UL << 31);
-            } else {
-                gen_helper_float_chs_s(fp0, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_ROUND_L_S:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_round_2008_l_s(fp64, cpu_env, fp32);
-            } else {
-                gen_helper_float_round_l_s(fp64, cpu_env, fp32);
-            }
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_TRUNC_L_S:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_trunc_2008_l_s(fp64, cpu_env, fp32);
-            } else {
-                gen_helper_float_trunc_l_s(fp64, cpu_env, fp32);
-            }
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_CEIL_L_S:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_ceil_2008_l_s(fp64, cpu_env, fp32);
-            } else {
-                gen_helper_float_ceil_l_s(fp64, cpu_env, fp32);
-            }
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_FLOOR_L_S:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_floor_2008_l_s(fp64, cpu_env, fp32);
-            } else {
-                gen_helper_float_floor_l_s(fp64, cpu_env, fp32);
-            }
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_ROUND_W_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_round_2008_w_s(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_round_w_s(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_TRUNC_W_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_trunc_2008_w_s(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_trunc_w_s(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CEIL_W_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_ceil_2008_w_s(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_ceil_w_s(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_FLOOR_W_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_floor_2008_w_s(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_floor_w_s(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_SEL_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_s(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_SELEQZ_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_s(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_SELNEZ_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_s(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_MOVCF_S:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        gen_movcf_s(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
-        break;
-    case OPC_MOVZ_S:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i32 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-            }
-            fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-            gen_set_label(l1);
-        }
-        break;
-    case OPC_MOVN_S:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i32 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-                fp0 = tcg_temp_new_i32();
-                gen_load_fpr32(ctx, fp0, fs);
-                gen_store_fpr32(ctx, fp0, fd);
-                tcg_temp_free_i32(fp0);
-                gen_set_label(l1);
-            }
-        }
-        break;
-    case OPC_RECIP_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_recip_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_RSQRT_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_MADDF_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fd);
-            gen_helper_float_maddf_s(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_MSUBF_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fd);
-            gen_helper_float_msubf_s(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_RINT_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_rint_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CLASS_S:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_class_s(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_MIN_S: /* OPC_RECIP2_S */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MIN_S */
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_min_s(fp2, cpu_env, fp0, fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        } else {
-            /* OPC_RECIP2_S */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-                TCGv_i32 fp1 = tcg_temp_new_i32();
-
-                gen_load_fpr32(ctx, fp0, fs);
-                gen_load_fpr32(ctx, fp1, ft);
-                gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
-                tcg_temp_free_i32(fp1);
-                gen_store_fpr32(ctx, fp0, fd);
-                tcg_temp_free_i32(fp0);
-            }
-        }
-        break;
-    case OPC_MINA_S: /* OPC_RECIP1_S */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MINA_S */
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_mina_s(fp2, cpu_env, fp0, fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        } else {
-            /* OPC_RECIP1_S */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                gen_load_fpr32(ctx, fp0, fs);
-                gen_helper_float_recip1_s(fp0, cpu_env, fp0);
-                gen_store_fpr32(ctx, fp0, fd);
-                tcg_temp_free_i32(fp0);
-            }
-        }
-        break;
-    case OPC_MAX_S: /* OPC_RSQRT1_S */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MAX_S */
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_max_s(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr32(ctx, fp1, fd);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        } else {
-            /* OPC_RSQRT1_S */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-
-                gen_load_fpr32(ctx, fp0, fs);
-                gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
-                gen_store_fpr32(ctx, fp0, fd);
-                tcg_temp_free_i32(fp0);
-            }
-        }
-        break;
-    case OPC_MAXA_S: /* OPC_RSQRT2_S */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MAXA_S */
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_helper_float_maxa_s(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr32(ctx, fp1, fd);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fp0);
-        } else {
-            /* OPC_RSQRT2_S */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i32 fp0 = tcg_temp_new_i32();
-                TCGv_i32 fp1 = tcg_temp_new_i32();
-
-                gen_load_fpr32(ctx, fp0, fs);
-                gen_load_fpr32(ctx, fp1, ft);
-                gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
-                tcg_temp_free_i32(fp1);
-                gen_store_fpr32(ctx, fp0, fd);
-                tcg_temp_free_i32(fp0);
-            }
-        }
-        break;
-    case OPC_CVT_D_S:
-        check_cp1_registers(ctx, fd);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_CVT_W_S:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_cvt_2008_w_s(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_cvt_w_s(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CVT_L_S:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_cvt_2008_l_s(fp64, cpu_env, fp32);
-            } else {
-                gen_helper_float_cvt_l_s(fp64, cpu_env, fp32);
-            }
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_CVT_PS_S:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-            TCGv_i32 fp32_0 = tcg_temp_new_i32();
-            TCGv_i32 fp32_1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp32_0, fs);
-            gen_load_fpr32(ctx, fp32_1, ft);
-            tcg_gen_concat_i32_i64(fp64, fp32_1, fp32_0);
-            tcg_temp_free_i32(fp32_1);
-            tcg_temp_free_i32(fp32_0);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_CMP_F_S:
-    case OPC_CMP_UN_S:
-    case OPC_CMP_EQ_S:
-    case OPC_CMP_UEQ_S:
-    case OPC_CMP_OLT_S:
-    case OPC_CMP_ULT_S:
-    case OPC_CMP_OLE_S:
-    case OPC_CMP_ULE_S:
-    case OPC_CMP_SF_S:
-    case OPC_CMP_NGLE_S:
-    case OPC_CMP_SEQ_S:
-    case OPC_CMP_NGL_S:
-    case OPC_CMP_LT_S:
-    case OPC_CMP_NGE_S:
-    case OPC_CMP_LE_S:
-    case OPC_CMP_NGT_S:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        if (ctx->opcode & (1 << 6)) {
-            gen_cmpabs_s(ctx, func - 48, ft, fs, cc);
-        } else {
-            gen_cmp_s(ctx, func - 48, ft, fs, cc);
-        }
-        break;
-    case OPC_ADD_D:
-        check_cp1_registers(ctx, fs | ft | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_d(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SUB_D:
-        check_cp1_registers(ctx, fs | ft | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_d(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MUL_D:
-        check_cp1_registers(ctx, fs | ft | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_d(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_DIV_D:
-        check_cp1_registers(ctx, fs | ft | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_div_d(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SQRT_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_sqrt_d(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_ABS_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->abs2008) {
-                tcg_gen_andi_i64(fp0, fp0, 0x7fffffffffffffffULL);
-            } else {
-                gen_helper_float_abs_d(fp0, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MOV_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_NEG_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->abs2008) {
-                tcg_gen_xori_i64(fp0, fp0, 1ULL << 63);
-            } else {
-                gen_helper_float_chs_d(fp0, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_ROUND_L_D:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_round_2008_l_d(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_round_l_d(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_TRUNC_L_D:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_trunc_2008_l_d(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_trunc_l_d(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CEIL_L_D:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_ceil_2008_l_d(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_ceil_l_d(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_FLOOR_L_D:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_floor_2008_l_d(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_floor_l_d(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_ROUND_W_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_round_2008_w_d(fp32, cpu_env, fp64);
-            } else {
-                gen_helper_float_round_w_d(fp32, cpu_env, fp64);
-            }
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_TRUNC_W_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_trunc_2008_w_d(fp32, cpu_env, fp64);
-            } else {
-                gen_helper_float_trunc_w_d(fp32, cpu_env, fp64);
-            }
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_CEIL_W_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_ceil_2008_w_d(fp32, cpu_env, fp64);
-            } else {
-                gen_helper_float_ceil_w_d(fp32, cpu_env, fp64);
-            }
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_FLOOR_W_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_floor_2008_w_d(fp32, cpu_env, fp64);
-            } else {
-                gen_helper_float_floor_w_d(fp32, cpu_env, fp64);
-            }
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_SEL_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_d(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_SELEQZ_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_d(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_SELNEZ_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_sel_d(ctx, op1, fd, ft, fs);
-        break;
-    case OPC_MOVCF_D:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        gen_movcf_d(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
-        break;
-    case OPC_MOVZ_D:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i64 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-            }
-            fp0 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-            gen_set_label(l1);
-        }
-        break;
-    case OPC_MOVN_D:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i64 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-                fp0 = tcg_temp_new_i64();
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-                gen_set_label(l1);
-            }
-        }
-        break;
-    case OPC_RECIP_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip_d(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RSQRT_D:
-        check_cp1_registers(ctx, fs | fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt_d(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MADDF_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fd);
-            gen_helper_float_maddf_d(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MSUBF_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fd);
-            gen_helper_float_msubf_d(fp2, cpu_env, fp0, fp1, fp2);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RINT_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rint_d(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CLASS_D:
-        check_insn(ctx, ISA_MIPS_R6);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_class_d(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MIN_D: /* OPC_RECIP2_D */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MIN_D */
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_min_d(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr64(ctx, fp1, fd);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        } else {
-            /* OPC_RECIP2_D */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i64 fp0 = tcg_temp_new_i64();
-                TCGv_i64 fp1 = tcg_temp_new_i64();
-
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_load_fpr64(ctx, fp1, ft);
-                gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
-                tcg_temp_free_i64(fp1);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-            }
-        }
-        break;
-    case OPC_MINA_D: /* OPC_RECIP1_D */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MINA_D */
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mina_d(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr64(ctx, fp1, fd);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        } else {
-            /* OPC_RECIP1_D */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i64 fp0 = tcg_temp_new_i64();
-
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_helper_float_recip1_d(fp0, cpu_env, fp0);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-            }
-        }
-        break;
-    case OPC_MAX_D: /*  OPC_RSQRT1_D */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MAX_D */
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_max_d(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr64(ctx, fp1, fd);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        } else {
-            /* OPC_RSQRT1_D */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i64 fp0 = tcg_temp_new_i64();
-
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-            }
-        }
-        break;
-    case OPC_MAXA_D: /* OPC_RSQRT2_D */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_MAXA_D */
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_maxa_d(fp1, cpu_env, fp0, fp1);
-            gen_store_fpr64(ctx, fp1, fd);
-            tcg_temp_free_i64(fp1);
-            tcg_temp_free_i64(fp0);
-        } else {
-            /* OPC_RSQRT2_D */
-            check_cp1_64bitmode(ctx);
-            {
-                TCGv_i64 fp0 = tcg_temp_new_i64();
-                TCGv_i64 fp1 = tcg_temp_new_i64();
-
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_load_fpr64(ctx, fp1, ft);
-                gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
-                tcg_temp_free_i64(fp1);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-            }
-        }
-        break;
-    case OPC_CMP_F_D:
-    case OPC_CMP_UN_D:
-    case OPC_CMP_EQ_D:
-    case OPC_CMP_UEQ_D:
-    case OPC_CMP_OLT_D:
-    case OPC_CMP_ULT_D:
-    case OPC_CMP_OLE_D:
-    case OPC_CMP_ULE_D:
-    case OPC_CMP_SF_D:
-    case OPC_CMP_NGLE_D:
-    case OPC_CMP_SEQ_D:
-    case OPC_CMP_NGL_D:
-    case OPC_CMP_LT_D:
-    case OPC_CMP_NGE_D:
-    case OPC_CMP_LE_D:
-    case OPC_CMP_NGT_D:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        if (ctx->opcode & (1 << 6)) {
-            gen_cmpabs_d(ctx, func - 48, ft, fs, cc);
-        } else {
-            gen_cmp_d(ctx, func - 48, ft, fs, cc);
-        }
-        break;
-    case OPC_CVT_S_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_d(fp32, cpu_env, fp64);
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_CVT_W_D:
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_cvt_2008_w_d(fp32, cpu_env, fp64);
-            } else {
-                gen_helper_float_cvt_w_d(fp32, cpu_env, fp64);
-            }
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_CVT_L_D:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            if (ctx->nan2008) {
-                gen_helper_float_cvt_2008_l_d(fp0, cpu_env, fp0);
-            } else {
-                gen_helper_float_cvt_l_d(fp0, cpu_env, fp0);
-            }
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CVT_S_W:
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_cvts_w(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CVT_D_W:
-        check_cp1_registers(ctx, fd);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr32(ctx, fp32, fs);
-            gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
-            tcg_temp_free_i32(fp32);
-            gen_store_fpr64(ctx, fp64, fd);
-            tcg_temp_free_i64(fp64);
-        }
-        break;
-    case OPC_CVT_S_L:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp32 = tcg_temp_new_i32();
-            TCGv_i64 fp64 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_l(fp32, cpu_env, fp64);
-            tcg_temp_free_i64(fp64);
-            gen_store_fpr32(ctx, fp32, fd);
-            tcg_temp_free_i32(fp32);
-        }
-        break;
-    case OPC_CVT_D_L:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtd_l(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CVT_PS_PW:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtps_pw(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_ADD_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SUB_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MUL_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_ABS_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_abs_ps(fp0, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MOV_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_NEG_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_chs_ps(fp0, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MOVCF_PS:
-        check_ps(ctx);
-        gen_movcf_ps(ctx, fs, fd, (ft >> 2) & 0x7, ft & 0x1);
-        break;
-    case OPC_MOVZ_PS:
-        check_ps(ctx);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i64 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-            }
-            fp0 = tcg_temp_new_i64();
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-            gen_set_label(l1);
-        }
-        break;
-    case OPC_MOVN_PS:
-        check_ps(ctx);
-        {
-            TCGLabel *l1 = gen_new_label();
-            TCGv_i64 fp0;
-
-            if (ft != 0) {
-                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
-                gen_log_instr_gpr_update(ctx, ft);
-                fp0 = tcg_temp_new_i64();
-                gen_load_fpr64(ctx, fp0, fs);
-                gen_store_fpr64(ctx, fp0, fd);
-                tcg_temp_free_i64(fp0);
-                gen_set_label(l1);
-            }
-        }
-        break;
-    case OPC_ADDR_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, ft);
-            gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_addr_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_MULR_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, ft);
-            gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_mulr_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RECIP2_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RECIP1_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_ps(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RSQRT1_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_ps(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_RSQRT2_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_ps(fp0, cpu_env, fp0, fp1);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CVT_S_PU:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32h(ctx, fp0, fs);
-            gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_CVT_PW_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtpw_ps(fp0, cpu_env, fp0);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_CVT_S_PL:
-        check_cp1_64bitmode(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_PLL_PS:
-        check_ps(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_store_fpr32h(ctx, fp0, fd);
-            gen_store_fpr32(ctx, fp1, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-        }
-        break;
-    case OPC_PLU_PS:
-        check_ps(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-        }
-        break;
-    case OPC_PUL_PS:
-        check_ps(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32h(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-        }
-        break;
-    case OPC_PUU_PS:
-        check_ps(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-
-            gen_load_fpr32h(ctx, fp0, fs);
-            gen_load_fpr32h(ctx, fp1, ft);
-            gen_store_fpr32(ctx, fp1, fd);
-            gen_store_fpr32h(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-        }
-        break;
-    case OPC_CMP_F_PS:
-    case OPC_CMP_UN_PS:
-    case OPC_CMP_EQ_PS:
-    case OPC_CMP_UEQ_PS:
-    case OPC_CMP_OLT_PS:
-    case OPC_CMP_ULT_PS:
-    case OPC_CMP_OLE_PS:
-    case OPC_CMP_ULE_PS:
-    case OPC_CMP_SF_PS:
-    case OPC_CMP_NGLE_PS:
-    case OPC_CMP_SEQ_PS:
-    case OPC_CMP_NGL_PS:
-    case OPC_CMP_LT_PS:
-    case OPC_CMP_NGE_PS:
-    case OPC_CMP_LE_PS:
-    case OPC_CMP_NGT_PS:
-        if (ctx->opcode & (1 << 6)) {
-            gen_cmpabs_ps(ctx, func - 48, ft, fs, cc);
-        } else {
-            gen_cmp_ps(ctx, func - 48, ft, fs, cc);
-        }
-        break;
-    default:
-        MIPS_INVAL("farith");
-        gen_reserved_instruction(ctx);
-        return;
-    }
-}
-
-/* Coprocessor 3 (FPU) */
-static void gen_flt3_ldst(DisasContext *ctx, uint32_t opc,
-                          int fd, int fs, int base, int index)
-{
-    TCGv t0 = tcg_temp_new();
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    TCGv t1 = tcg_temp_new();
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-
-    if (base == 0) {
-        gen_load_gpr(t0, index);
-    } else if (index == 0) {
-        gen_load_gpr(t0, base);
-    } else {
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[index]);
-    }
-    /*
-     * Don't do NOP if destination is zero: we must perform the actual
-     * memory access.
-     */
-    switch (opc) {
-    case OPC_LWXC1:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
-            tcg_gen_trunc_tl_i32(fp0, t0);
-            gen_store_fpr32(ctx, fp0, fd);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_LDXC1:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_LUXC1:
-        check_cp1_64bitmode(ctx);
-        tcg_gen_andi_tl(t0, t0, ~0x7);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_ddc_interposed_ld_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SWXC1:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_ddc_interposed_st_i32(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEUL);
-            tcg_temp_free_i32(fp0);
-        }
-        break;
-    case OPC_SDXC1:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fs);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    case OPC_SUXC1:
-        check_cp1_64bitmode(ctx);
-        tcg_gen_andi_tl(t0, t0, ~0x7);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_ddc_interposed_st_i64(ctx, fp0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
-            tcg_temp_free_i64(fp0);
-        }
-        break;
-    }
-    tcg_temp_free(t0);
-#if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    tcg_temp_free(t1);
-#endif /* defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR) */
-}
-
-static void gen_flt3_arith(DisasContext *ctx, uint32_t opc,
-                           int fd, int fr, int fs, int ft)
-{
-    switch (opc) {
-    case OPC_ALNV_PS:
-        check_ps(ctx);
-        {
-            TCGv t0 = tcg_temp_local_new();
-            TCGv_i32 fp = tcg_temp_new_i32();
-            TCGv_i32 fph = tcg_temp_new_i32();
-            TCGLabel *l1 = gen_new_label();
-            TCGLabel *l2 = gen_new_label();
-
-            gen_load_gpr(t0, fr);
-            tcg_gen_andi_tl(t0, t0, 0x7);
-
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_load_fpr32(ctx, fp, fs);
-            gen_load_fpr32h(ctx, fph, fs);
-            gen_store_fpr32(ctx, fp, fd);
-            gen_store_fpr32h(ctx, fph, fd);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
-            tcg_temp_free(t0);
-#ifdef TARGET_WORDS_BIGENDIAN
-            gen_load_fpr32(ctx, fp, fs);
-            gen_load_fpr32h(ctx, fph, ft);
-            gen_store_fpr32h(ctx, fp, fd);
-            gen_store_fpr32(ctx, fph, fd);
-#else
-            gen_load_fpr32h(ctx, fph, fs);
-            gen_load_fpr32(ctx, fp, ft);
-            gen_store_fpr32(ctx, fph, fd);
-            gen_store_fpr32h(ctx, fp, fd);
-#endif
-            gen_set_label(l2);
-            tcg_temp_free_i32(fp);
-            tcg_temp_free_i32(fph);
-        }
-        break;
-    case OPC_MADD_S:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-        }
-        break;
-    case OPC_MADD_D:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd | fs | ft | fr);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_MADD_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_MSUB_S:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-        }
-        break;
-    case OPC_MSUB_D:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd | fs | ft | fr);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_MSUB_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_NMADD_S:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-        }
-        break;
-    case OPC_NMADD_D:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd | fs | ft | fr);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_NMADD_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_NMSUB_S:
-        check_cop1x(ctx);
-        {
-            TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv_i32 fp1 = tcg_temp_new_i32();
-            TCGv_i32 fp2 = tcg_temp_new_i32();
-
-            gen_load_fpr32(ctx, fp0, fs);
-            gen_load_fpr32(ctx, fp1, ft);
-            gen_load_fpr32(ctx, fp2, fr);
-            gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fp1);
-            gen_store_fpr32(ctx, fp2, fd);
-            tcg_temp_free_i32(fp2);
-        }
-        break;
-    case OPC_NMSUB_D:
-        check_cop1x(ctx);
-        check_cp1_registers(ctx, fd | fs | ft | fr);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    case OPC_NMSUB_PS:
-        check_ps(ctx);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-            TCGv_i64 fp1 = tcg_temp_new_i64();
-            TCGv_i64 fp2 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_load_fpr64(ctx, fp1, ft);
-            gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
-            tcg_temp_free_i64(fp0);
-            tcg_temp_free_i64(fp1);
-            gen_store_fpr64(ctx, fp2, fd);
-            tcg_temp_free_i64(fp2);
-        }
-        break;
-    default:
-        MIPS_INVAL("flt3_arith");
-        gen_reserved_instruction(ctx);
-        return;
-    }
-}
-
-void gen_rdhwr(DisasContext *ctx, int rt, int rd, int sel)
-{
-    TCGv t0;
-
-#if !defined(CONFIG_USER_ONLY)
-    /*
-     * The Linux kernel will emulate rdhwr if it's not supported natively.
-     * Therefore only check the ISA in system mode.
-     */
-    check_insn(ctx, ISA_MIPS_R2);
-#endif
-    t0 = tcg_temp_new();
-
-    switch (rd) {
-    case 0:
-        gen_helper_rdhwr_cpunum(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 1:
-        gen_helper_rdhwr_synci_step(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 2:
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
-        gen_helper_rdhwr_cc(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        /*
-         * Break the TB to be able to take timer interrupts immediately
-         * after reading count. DISAS_STOP isn't sufficient, we need to ensure
-         * we break completely out of translated code.
-         */
-        gen_save_pc(ctx->base.pc_next + 4);
-        ctx->base.is_jmp = DISAS_EXIT;
-        break;
-    case 3:
-        gen_helper_rdhwr_ccres(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-#if !defined(TARGET_CHERI)
-    case 4:
-        check_insn(ctx, ISA_MIPS_R6);
-        if (sel != 0) {
-            /*
-             * Performance counter registers are not implemented other than
-             * control register 0.
-             */
-            generate_exception(ctx, EXCP_RI);
-        }
-        gen_helper_rdhwr_performance(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 5:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_helper_rdhwr_xnp(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-#endif
-    case 29:
-#if defined(CONFIG_USER_ONLY)
-        tcg_gen_ld_tl(t0, cpu_env,
-                      offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-        gen_store_gpr(t0, rt);
-        break;
-#else
-        if ((ctx->hflags & MIPS_HFLAG_CP0) ||
-            (ctx->hflags & MIPS_HFLAG_HWRENA_ULR)) {
-            tcg_gen_ld_tl(t0, cpu_env,
-                          offsetof(CPUMIPSState, active_tc.CP0_UserLocal));
-            gen_store_gpr(t0, rt);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-        break;
-#endif
-#if defined(TARGET_CHERI)
-    /*
-     * Fake registers to keep libstatcounters from triggering segfaultr
-     */
-    case 4: /* ICOUNT */
-        gen_helper_1e0i(rdhwr_statcounters_icount, t0, sel);
-        gen_store_gpr(t0, rt);
-        break;
-    case 5: /* ITLB MISS */
-        gen_helper_rdhwr_statcounters_itlb_miss(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 6: /* DTLB MISS */
-        gen_helper_rdhwr_statcounters_dtlb_miss(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 7: /* RESET */
-        gen_helper_rdhwr_statcounters_reset(t0, cpu_env);
-        gen_store_gpr(t0, rt);
-        break;
-    case 11:
-        gen_helper_1e0i(rdhwr_statcounters_memory, t0, sel);
-        gen_store_gpr(t0, rt);
-        break;
-    case 8:
-    case 9:
-    case 10:
-    case 12:
-    case 13:
-    case 14:
-        gen_helper_1e0i(rdhwr_statcounters_ignored, t0, rd);
-        gen_store_gpr(t0, rt);
-        break;
-#endif
-    default:            /* Invalid */
-        MIPS_INVAL("rdhwr");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-    tcg_temp_free(t0);
-}
-
-static inline void clear_branch_hflags(DisasContext *ctx)
-{
-    ctx->hflags &= ~MIPS_HFLAG_BMASK;
-    if (ctx->base.is_jmp == DISAS_NEXT) {
-        save_cpu_state(ctx, 0);
-    } else {
-        /*
-         * It is not safe to save ctx->hflags as hflags may be changed
-         * in execution time by the instruction in delay / forbidden slot.
-         */
-        tcg_gen_andi_i32(hflags, hflags, ~MIPS_HFLAG_BMASK);
-    }
-}
-
-static void gen_branch(DisasContext *ctx, int insn_bytes)
-{
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        int proc_hflags = ctx->hflags & MIPS_HFLAG_BMASK;
-        /* Branches completion */
-        clear_branch_hflags(ctx);
-        ctx->base.is_jmp = DISAS_NORETURN;
-        /* FIXME: Need to clear can_do_io.  */
-        switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) {
-        case MIPS_HFLAG_FBNSLOT:
-            gen_goto_tb(ctx, 0, ctx->base.pc_next + insn_bytes);
-            break;
-        case MIPS_HFLAG_B:
-            /* unconditional branch */
-            if (proc_hflags & MIPS_HFLAG_BX) {
-                tcg_gen_xori_i32(hflags, hflags, MIPS_HFLAG_M16);
-            }
-            gen_goto_tb(ctx, 0, ctx->btarget);
-            break;
-        case MIPS_HFLAG_BL:
-            /* blikely taken case */
-            gen_goto_tb(ctx, 0, ctx->btarget);
-            break;
-        case MIPS_HFLAG_BC:
-            /* Conditional branch */
-            {
-                TCGLabel *l1 = gen_new_label();
-
-                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
-                gen_goto_tb(ctx, 1, ctx->base.pc_next + insn_bytes);
-                gen_set_label(l1);
-                gen_goto_tb(ctx, 0, ctx->btarget);
-            }
-            break;
-        case MIPS_HFLAG_BR:
-            /* unconditional branch to register */
-            if (ctx->insn_flags & (ASE_MIPS16 | ASE_MICROMIPS)) {
-                TCGv t0 = tcg_temp_new();
-                TCGv_i32 t1 = tcg_temp_new_i32();
-
-                tcg_gen_andi_tl(t0, btarget, 0x1);
-                tcg_gen_trunc_tl_i32(t1, t0);
-                tcg_temp_free(t0);
-                tcg_gen_andi_i32(hflags, hflags, ~(uint32_t)MIPS_HFLAG_M16);
-                tcg_gen_shli_i32(t1, t1, MIPS_HFLAG_M16_SHIFT);
-                tcg_gen_or_i32(hflags, hflags, t1);
-                tcg_temp_free_i32(t1);
-
-                tcg_gen_andi_tl(cpu_PC, btarget, ~(target_ulong)0x1);
-            } else {
-                tcg_gen_mov_tl(cpu_PC, btarget);
-            }
-#ifdef CONFIG_DEBUG_TCG
-            tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
-#endif
-            if (ctx->base.singlestep_enabled) {
-                save_cpu_state(ctx, 0);
-                gen_helper_raise_exception_debug(cpu_env);
-            }
-            tcg_gen_lookup_and_goto_ptr();
-            break;
-#ifdef TARGET_CHERI
-        case MIPS_HFLAG_BRCCALL:
-            /* unconditional branch to capability register from a ccall.
-             * Can fall through since otype and seal are not copied anyway.
-             */
-            /* fallthrough */
-        case MIPS_HFLAG_BRC:
-            /* unconditional branch to capability register */
-
-            tcg_gen_mov_tl(cpu_PC, btarget);
-            /* Update PCC with capability register */
-            gen_helper_copy_cap_btarget_to_pcc(cpu_env);
-#ifdef CONFIG_DEBUG_TCG
-            tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
-#endif
-
-            if (ctx->base.singlestep_enabled) {
-                save_cpu_state(ctx, 0);
-                gen_helper_0e0i(raise_exception, EXCP_DEBUG);
-            }
-            tcg_gen_exit_tb(NULL, 0);
-            break;
-#endif /* TARGET_CHERI */
-        default:
-            fprintf(stderr, "unknown branch 0x%x\n", proc_hflags);
-            abort();
-        }
-    }
-}
-
-/* Compact Branches */
-static void gen_compute_compact_branch(DisasContext *ctx, uint32_t opc,
-                                       int rs, int rt, int32_t offset)
-{
-    int bcond_compute = 0;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    int m16_lowbit = (ctx->hflags & MIPS_HFLAG_M16) != 0;
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-#ifdef MIPS_DEBUG_DISAS
-        LOG_DISAS("Branch in delay / forbidden slot at PC 0x" TARGET_FMT_lx
-                  "\n", ctx->base.pc_next);
-#endif
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    /* Load needed operands and calculate btarget */
-    switch (opc) {
-    /* compact branch */
-    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
-    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        if (rs <= rt && rs == 0) {
-            /* OPC_BEQZALC, OPC_BNEZALC */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
-            gen_log_instr_gpr_update(ctx, 31);
-        }
-        break;
-    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
-    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
-    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
-        if (rs == 0 || rs == rt) {
-            /* OPC_BLEZALC, OPC_BGEZALC */
-            /* OPC_BGTZALC, OPC_BLTZALC */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
-            gen_log_instr_gpr_update(ctx, 31);
-        }
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BC:
-    case OPC_BALC:
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BEQZC:
-    case OPC_BNEZC:
-        if (rs != 0) {
-            /* OPC_BEQZC, OPC_BNEZC */
-            gen_load_gpr(t0, rs);
-            bcond_compute = 1;
-            ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        } else {
-            /* OPC_JIC, OPC_JIALC */
-            TCGv tbase = tcg_temp_new();
-            TCGv toffset = tcg_temp_new();
-
-            gen_load_gpr(tbase, rt);
-            tcg_gen_movi_tl(toffset, offset);
-            gen_op_addr_add(ctx, btarget, tbase, toffset);
-            tcg_temp_free(tbase);
-            tcg_temp_free(toffset);
-        }
-        break;
-    default:
-        MIPS_INVAL("Compact branch/jump");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    if (bcond_compute == 0) {
-        /* Uncoditional compact branch */
-        switch (opc) {
-        case OPC_JIALC:
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
-            gen_log_instr_gpr_update(ctx, 31);
-            /* Fallthrough */
-        case OPC_JIC:
-            ctx->hflags |= MIPS_HFLAG_BR;
-            break;
-        case OPC_BALC:
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4 + m16_lowbit);
-            gen_log_instr_gpr_update(ctx, 31);
-            /* Fallthrough */
-        case OPC_BC:
-            ctx->hflags |= MIPS_HFLAG_B;
-            break;
-        default:
-            MIPS_INVAL("Compact branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-        // Bounds check against $pcc for unconditional branches.
-        gen_check_branch_target(ctx, ctx->btarget);
-        /* Generating branch here as compact branches don't have delay slot */
-        gen_branch(ctx, 4);
-    } else {
-        /* Conditional compact branch */
-        TCGLabel *fs = gen_new_label();
-        save_cpu_state(ctx, 0);
-
-        switch (opc) {
-        case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
-            }
-            break;
-        case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
-            }
-            break;
-        case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
-            }
-            break;
-        case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC */
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
-            }
-            break;
-        case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC */
-        case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
-            if (rs >= rt) {
-                /* OPC_BOVC, OPC_BNVC */
-                TCGv t2 = tcg_temp_new();
-                TCGv t3 = tcg_temp_new();
-                TCGv t4 = tcg_temp_new();
-                TCGv input_overflow = tcg_temp_new();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_ext32s_tl(t2, t0);
-                tcg_gen_setcond_tl(TCG_COND_NE, input_overflow, t2, t0);
-                tcg_gen_ext32s_tl(t3, t1);
-                tcg_gen_setcond_tl(TCG_COND_NE, t4, t3, t1);
-                tcg_gen_or_tl(input_overflow, input_overflow, t4);
-
-                tcg_gen_add_tl(t4, t2, t3);
-                tcg_gen_ext32s_tl(t4, t4);
-                tcg_gen_xor_tl(t2, t2, t3);
-                tcg_gen_xor_tl(t3, t4, t3);
-                tcg_gen_andc_tl(t2, t3, t2);
-                tcg_gen_setcondi_tl(TCG_COND_LT, t4, t2, 0);
-                tcg_gen_or_tl(t4, t4, input_overflow);
-                if (opc == OPC_BOVC) {
-                    /* OPC_BOVC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t4, 0, fs);
-                } else {
-                    /* OPC_BNVC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t4, 0, fs);
-                }
-                tcg_temp_free(input_overflow);
-                tcg_temp_free(t4);
-                tcg_temp_free(t3);
-                tcg_temp_free(t2);
-            } else if (rs < rt && rs == 0) {
-                /* OPC_BEQZALC, OPC_BNEZALC */
-                if (opc == OPC_BEQZALC) {
-                    /* OPC_BEQZALC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t1, 0, fs);
-                } else {
-                    /* OPC_BNEZALC */
-                    tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t1, 0, fs);
-                }
-            } else {
-                /* OPC_BEQC, OPC_BNEC */
-                if (opc == OPC_BEQC) {
-                    /* OPC_BEQC */
-                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_EQ), t0, t1, fs);
-                } else {
-                    /* OPC_BNEC */
-                    tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_NE), t0, t1, fs);
-                }
-            }
-            break;
-        case OPC_BEQZC:
-            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
-            break;
-        case OPC_BNEZC:
-            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_NE), t0, 0, fs);
-            break;
-        default:
-            MIPS_INVAL("Compact conditional branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-        // Bounds check against $pcc for conditional branches.
-        gen_check_cond_branch_target(ctx, bcond, ctx->btarget);
-
-        /* Generating branch here as compact branches don't have delay slot */
-        gen_goto_tb(ctx, 1, ctx->btarget);
-        gen_set_label(fs);
-
-        ctx->hflags |= MIPS_HFLAG_FBNSLOT;
-    }
-
-out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* ISA extensions (ASEs) */
-/* MIPS16 extension to MIPS32 */
-#ifndef TARGET_CHERI
-
-/* MIPS16 major opcodes */
-enum {
-  M16_OPC_ADDIUSP = 0x00,
-  M16_OPC_ADDIUPC = 0x01,
-  M16_OPC_B = 0x02,
-  M16_OPC_JAL = 0x03,
-  M16_OPC_BEQZ = 0x04,
-  M16_OPC_BNEQZ = 0x05,
-  M16_OPC_SHIFT = 0x06,
-  M16_OPC_LD = 0x07,
-  M16_OPC_RRIA = 0x08,
-  M16_OPC_ADDIU8 = 0x09,
-  M16_OPC_SLTI = 0x0a,
-  M16_OPC_SLTIU = 0x0b,
-  M16_OPC_I8 = 0x0c,
-  M16_OPC_LI = 0x0d,
-  M16_OPC_CMPI = 0x0e,
-  M16_OPC_SD = 0x0f,
-  M16_OPC_LB = 0x10,
-  M16_OPC_LH = 0x11,
-  M16_OPC_LWSP = 0x12,
-  M16_OPC_LW = 0x13,
-  M16_OPC_LBU = 0x14,
-  M16_OPC_LHU = 0x15,
-  M16_OPC_LWPC = 0x16,
-  M16_OPC_LWU = 0x17,
-  M16_OPC_SB = 0x18,
-  M16_OPC_SH = 0x19,
-  M16_OPC_SWSP = 0x1a,
-  M16_OPC_SW = 0x1b,
-  M16_OPC_RRR = 0x1c,
-  M16_OPC_RR = 0x1d,
-  M16_OPC_EXTEND = 0x1e,
-  M16_OPC_I64 = 0x1f
-};
-
-/* I8 funct field */
-enum {
-  I8_BTEQZ = 0x0,
-  I8_BTNEZ = 0x1,
-  I8_SWRASP = 0x2,
-  I8_ADJSP = 0x3,
-  I8_SVRS = 0x4,
-  I8_MOV32R = 0x5,
-  I8_MOVR32 = 0x7
-};
-
-/* RRR f field */
-enum {
-  RRR_DADDU = 0x0,
-  RRR_ADDU = 0x1,
-  RRR_DSUBU = 0x2,
-  RRR_SUBU = 0x3
-};
-
-/* RR funct field */
-enum {
-  RR_JR = 0x00,
-  RR_SDBBP = 0x01,
-  RR_SLT = 0x02,
-  RR_SLTU = 0x03,
-  RR_SLLV = 0x04,
-  RR_BREAK = 0x05,
-  RR_SRLV = 0x06,
-  RR_SRAV = 0x07,
-  RR_DSRL = 0x08,
-  RR_CMP = 0x0a,
-  RR_NEG = 0x0b,
-  RR_AND = 0x0c,
-  RR_OR = 0x0d,
-  RR_XOR = 0x0e,
-  RR_NOT = 0x0f,
-  RR_MFHI = 0x10,
-  RR_CNVT = 0x11,
-  RR_MFLO = 0x12,
-  RR_DSRA = 0x13,
-  RR_DSLLV = 0x14,
-  RR_DSRLV = 0x16,
-  RR_DSRAV = 0x17,
-  RR_MULT = 0x18,
-  RR_MULTU = 0x19,
-  RR_DIV = 0x1a,
-  RR_DIVU = 0x1b,
-  RR_DMULT = 0x1c,
-  RR_DMULTU = 0x1d,
-  RR_DDIV = 0x1e,
-  RR_DDIVU = 0x1f
-};
-
-/* I64 funct field */
-enum {
-  I64_LDSP = 0x0,
-  I64_SDSP = 0x1,
-  I64_SDRASP = 0x2,
-  I64_DADJSP = 0x3,
-  I64_LDPC = 0x4,
-  I64_DADDIU5 = 0x5,
-  I64_DADDIUPC = 0x6,
-  I64_DADDIUSP = 0x7
-};
-
-/* RR ry field for CNVT */
-enum {
-  RR_RY_CNVT_ZEB = 0x0,
-  RR_RY_CNVT_ZEH = 0x1,
-  RR_RY_CNVT_ZEW = 0x2,
-  RR_RY_CNVT_SEB = 0x4,
-  RR_RY_CNVT_SEH = 0x5,
-  RR_RY_CNVT_SEW = 0x6,
-};
-
-static int xlat(int r)
-{
-  static int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
-
-  return map[r];
-}
-
-static void gen_mips16_save(DisasContext *ctx,
-                            int xsregs, int aregs,
-                            int do_ra, int do_s0, int do_s1,
-                            int framesize)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-    int args, astatic;
-
-    switch (aregs) {
-    case 0:
-    case 1:
-    case 2:
-    case 3:
-    case 11:
-        args = 0;
-        break;
-    case 4:
-    case 5:
-    case 6:
-    case 7:
-        args = 1;
-        break;
-    case 8:
-    case 9:
-    case 10:
-        args = 2;
-        break;
-    case 12:
-    case 13:
-        args = 3;
-        break;
-    case 14:
-        args = 4;
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    switch (args) {
-    case 4:
-        gen_base_offset_addr(ctx, t0, 29, 12);
-        gen_load_gpr(t1, 7);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
-        /* Fall through */
-    case 3:
-        gen_base_offset_addr(ctx, t0, 29, 8);
-        gen_load_gpr(t1, 6);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
-        /* Fall through */
-    case 2:
-        gen_base_offset_addr(ctx, t0, 29, 4);
-        gen_load_gpr(t1, 5);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
-        /* Fall through */
-    case 1:
-        gen_base_offset_addr(ctx, t0, 29, 0);
-        gen_load_gpr(t1, 4);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
-    }
-
-    gen_load_gpr(t0, 29);
-
-#define DECR_AND_STORE(reg) do {                                 \
-        tcg_gen_movi_tl(t2, -4);                                 \
-        gen_op_addr_add(ctx, t0, t0, t2);                        \
-        gen_load_gpr(t1, reg);                                   \
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);       \
-        gen_log_instr_gpr_update(ctx, reg);                      \
-    } while (0)
-
-    if (do_ra) {
-        DECR_AND_STORE(31);
-    }
-
-    switch (xsregs) {
-    case 7:
-        DECR_AND_STORE(30);
-        /* Fall through */
-    case 6:
-        DECR_AND_STORE(23);
-        /* Fall through */
-    case 5:
-        DECR_AND_STORE(22);
-        /* Fall through */
-    case 4:
-        DECR_AND_STORE(21);
-        /* Fall through */
-    case 3:
-        DECR_AND_STORE(20);
-        /* Fall through */
-    case 2:
-        DECR_AND_STORE(19);
-        /* Fall through */
-    case 1:
-        DECR_AND_STORE(18);
-    }
-
-    if (do_s1) {
-        DECR_AND_STORE(17);
-    }
-    if (do_s0) {
-        DECR_AND_STORE(16);
-    }
-
-    switch (aregs) {
-    case 0:
-    case 4:
-    case 8:
-    case 12:
-    case 14:
-        astatic = 0;
-        break;
-    case 1:
-    case 5:
-    case 9:
-    case 13:
-        astatic = 1;
-        break;
-    case 2:
-    case 6:
-    case 10:
-        astatic = 2;
-        break;
-    case 3:
-    case 7:
-        astatic = 3;
-        break;
-    case 11:
-        astatic = 4;
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    if (astatic > 0) {
-        DECR_AND_STORE(7);
-        if (astatic > 1) {
-            DECR_AND_STORE(6);
-            if (astatic > 2) {
-                DECR_AND_STORE(5);
-                if (astatic > 3) {
-                    DECR_AND_STORE(4);
-                }
-            }
-        }
-    }
-#undef DECR_AND_STORE
-
-    tcg_gen_movi_tl(t2, -framesize);
-    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
-    gen_log_instr_gpr_update(ctx, 29);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void gen_mips16_restore(DisasContext *ctx,
-                               int xsregs, int aregs,
-                               int do_ra, int do_s0, int do_s1,
-                               int framesize)
-{
-    int astatic;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    tcg_gen_movi_tl(t2, framesize);
-    gen_op_addr_add(ctx, t0, cpu_gpr[29], t2);
-
-#define DECR_AND_LOAD(reg) do {                            \
-        tcg_gen_movi_tl(t2, -4);                           \
-        gen_op_addr_add(ctx, t0, t0, t2);                  \
-        tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
-        gen_store_gpr(t1, reg);                            \
-    } while (0)
-
-    if (do_ra) {
-        DECR_AND_LOAD(31);
-    }
-
-    switch (xsregs) {
-    case 7:
-        DECR_AND_LOAD(30);
-        /* Fall through */
-    case 6:
-        DECR_AND_LOAD(23);
-        /* Fall through */
-    case 5:
-        DECR_AND_LOAD(22);
-        /* Fall through */
-    case 4:
-        DECR_AND_LOAD(21);
-        /* Fall through */
-    case 3:
-        DECR_AND_LOAD(20);
-        /* Fall through */
-    case 2:
-        DECR_AND_LOAD(19);
-        /* Fall through */
-    case 1:
-        DECR_AND_LOAD(18);
-    }
-
-    if (do_s1) {
-        DECR_AND_LOAD(17);
-    }
-    if (do_s0) {
-        DECR_AND_LOAD(16);
-    }
-
-    switch (aregs) {
-    case 0:
-    case 4:
-    case 8:
-    case 12:
-    case 14:
-        astatic = 0;
-        break;
-    case 1:
-    case 5:
-    case 9:
-    case 13:
-        astatic = 1;
-        break;
-    case 2:
-    case 6:
-    case 10:
-        astatic = 2;
-        break;
-    case 3:
-    case 7:
-        astatic = 3;
-        break;
-    case 11:
-        astatic = 4;
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    if (astatic > 0) {
-        DECR_AND_LOAD(7);
-        if (astatic > 1) {
-            DECR_AND_LOAD(6);
-            if (astatic > 2) {
-                DECR_AND_LOAD(5);
-                if (astatic > 3) {
-                    DECR_AND_LOAD(4);
-                }
-            }
-        }
-    }
-#undef DECR_AND_LOAD
-
-    tcg_gen_movi_tl(t2, framesize);
-    gen_op_addr_add(ctx, cpu_gpr[29], cpu_gpr[29], t2);
-    gen_log_instr_gpr_update(ctx, 29);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-#endif // !TARGET_CHERI
-
-static void gen_addiupc(DisasContext *ctx, int rx, int imm,
-                        int is_64_bit, int extended)
-{
-    TCGv t0;
-
-    if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    t0 = tcg_temp_new();
-
-    tcg_gen_movi_tl(t0, pc_relative_pc(ctx));
-    tcg_gen_addi_tl(cpu_gpr[rx], t0, imm);
-    if (!is_64_bit) {
-        tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
-    }
-    gen_log_instr_gpr_update(ctx, rx);
-
-    tcg_temp_free(t0);
-}
-
-static void gen_cache_operation(DisasContext *ctx, uint32_t op, int base,
-                                int16_t offset)
-{
-    TCGv_i32 t0 = tcg_const_i32(op);
-    TCGv t1 = tcg_temp_new();
-    gen_base_offset_addr(ctx, t1, base, offset);
-    gen_helper_cache(cpu_env, t1, t0);
-    tcg_temp_free(t1);
-    tcg_temp_free_i32(t0);
-}
-
-#ifndef TARGET_CHERI // No MIPS16 with TARGET_CHERI
-
-#if defined(TARGET_MIPS64)
-static void decode_i64_mips16(DisasContext *ctx,
-                              int ry, int funct, int16_t offset,
-                              int extended)
-{
-    switch (funct) {
-    case I64_LDSP:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : offset << 3;
-        gen_ld(ctx, OPC_LD, ry, 29, offset);
-        break;
-    case I64_SDSP:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : offset << 3;
-        gen_st(ctx, OPC_SD, ry, 29, offset);
-        break;
-    case I64_SDRASP:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : (ctx->opcode & 0xff) << 3;
-        gen_st(ctx, OPC_SD, 31, 29, offset);
-        break;
-    case I64_DADJSP:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : ((int8_t)ctx->opcode) << 3;
-        gen_arith_imm(ctx, OPC_DADDIU, 29, 29, offset);
-        break;
-    case I64_LDPC:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        if (extended && (ctx->hflags & MIPS_HFLAG_BMASK)) {
-            gen_reserved_instruction(ctx);
-        } else {
-            offset = extended ? offset : offset << 3;
-            gen_ld(ctx, OPC_LDPC, ry, 0, offset);
-        }
-        break;
-    case I64_DADDIU5:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : ((int8_t)(offset << 3)) >> 3;
-        gen_arith_imm(ctx, OPC_DADDIU, ry, ry, offset);
-        break;
-    case I64_DADDIUPC:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : offset << 2;
-        gen_addiupc(ctx, ry, offset, 1, extended);
-        break;
-    case I64_DADDIUSP:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        offset = extended ? offset : offset << 2;
-        gen_arith_imm(ctx, OPC_DADDIU, ry, 29, offset);
-        break;
-    }
-}
-#endif
-
-static int decode_extended_mips16_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    int extend = translator_lduw(env, ctx->base.pc_next + 2);
-    int op, rx, ry, funct, sa;
-    int16_t imm, offset;
-
-    ctx->opcode = (ctx->opcode << 16) | extend;
-    op = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 22) & 0x1f;
-    funct = (ctx->opcode >> 8) & 0x7;
-    rx = xlat((ctx->opcode >> 8) & 0x7);
-    ry = xlat((ctx->opcode >> 5) & 0x7);
-    offset = imm = (int16_t) (((ctx->opcode >> 16) & 0x1f) << 11
-                              | ((ctx->opcode >> 21) & 0x3f) << 5
-                              | (ctx->opcode & 0x1f));
-
-    /*
-     * The extended opcodes cleverly reuse the opcodes from their 16-bit
-     * counterparts.
-     */
-    switch (op) {
-    case M16_OPC_ADDIUSP:
-        gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
-        break;
-    case M16_OPC_ADDIUPC:
-        gen_addiupc(ctx, rx, imm, 0, 1);
-        break;
-    case M16_OPC_B:
-        gen_compute_branch(ctx, OPC_BEQ, 4, 0, 0, offset << 1, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_BEQZ:
-        gen_compute_branch(ctx, OPC_BEQ, 4, rx, 0, offset << 1, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_BNEQZ:
-        gen_compute_branch(ctx, OPC_BNE, 4, rx, 0, offset << 1, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_SHIFT:
-        switch (ctx->opcode & 0x3) {
-        case 0x0:
-            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
-            break;
-        case 0x1:
-#if defined(TARGET_MIPS64)
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
-#else
-            gen_reserved_instruction(ctx);
-#endif
-            break;
-        case 0x2:
-            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
-            break;
-        case 0x3:
-            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
-            break;
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_LD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_ld(ctx, OPC_LD, ry, rx, offset);
-        break;
-#endif
-    case M16_OPC_RRIA:
-        imm = ctx->opcode & 0xf;
-        imm = imm | ((ctx->opcode >> 20) & 0x7f) << 4;
-        imm = imm | ((ctx->opcode >> 16) & 0xf) << 11;
-        imm = (int16_t) (imm << 1) >> 1;
-        if ((ctx->opcode >> 4) & 0x1) {
-#if defined(TARGET_MIPS64)
-            check_mips_64(ctx);
-            gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
-#else
-            gen_reserved_instruction(ctx);
-#endif
-        } else {
-            gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
-        }
-        break;
-    case M16_OPC_ADDIU8:
-        gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
-        break;
-    case M16_OPC_SLTI:
-        gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
-        break;
-    case M16_OPC_SLTIU:
-        gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
-        break;
-    case M16_OPC_I8:
-        switch (funct) {
-        case I8_BTEQZ:
-            gen_compute_branch(ctx, OPC_BEQ, 4, 24, 0, offset << 1, 0);
-            break;
-        case I8_BTNEZ:
-            gen_compute_branch(ctx, OPC_BNE, 4, 24, 0, offset << 1, 0);
-            break;
-        case I8_SWRASP:
-            gen_st(ctx, OPC_SW, 31, 29, imm);
-            break;
-        case I8_ADJSP:
-            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm);
-            break;
-        case I8_SVRS:
-            check_insn(ctx, ISA_MIPS_R1);
-            {
-                int xsregs = (ctx->opcode >> 24) & 0x7;
-                int aregs = (ctx->opcode >> 16) & 0xf;
-                int do_ra = (ctx->opcode >> 6) & 0x1;
-                int do_s0 = (ctx->opcode >> 5) & 0x1;
-                int do_s1 = (ctx->opcode >> 4) & 0x1;
-                int framesize = (((ctx->opcode >> 20) & 0xf) << 4
-                                 | (ctx->opcode & 0xf)) << 3;
-
-                if (ctx->opcode & (1 << 7)) {
-                    gen_mips16_save(ctx, xsregs, aregs,
-                                    do_ra, do_s0, do_s1,
-                                    framesize);
-                } else {
-                    gen_mips16_restore(ctx, xsregs, aregs,
-                                       do_ra, do_s0, do_s1,
-                                       framesize);
-                }
-            }
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case M16_OPC_LI:
-        tcg_gen_movi_tl(cpu_gpr[rx], (uint16_t) imm);
-        gen_log_instr_gpr_update(ctx, rx);
-        break;
-    case M16_OPC_CMPI:
-        tcg_gen_xori_tl(cpu_gpr[24], cpu_gpr[rx], (uint16_t) imm);
-        gen_log_instr_gpr_update(ctx, 24);
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_SD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_st(ctx, OPC_SD, ry, rx, offset);
-        break;
-#endif
-    case M16_OPC_LB:
-        gen_ld(ctx, OPC_LB, ry, rx, offset);
-        break;
-    case M16_OPC_LH:
-        gen_ld(ctx, OPC_LH, ry, rx, offset);
-        break;
-    case M16_OPC_LWSP:
-        gen_ld(ctx, OPC_LW, rx, 29, offset);
-        break;
-    case M16_OPC_LW:
-        gen_ld(ctx, OPC_LW, ry, rx, offset);
-        break;
-    case M16_OPC_LBU:
-        gen_ld(ctx, OPC_LBU, ry, rx, offset);
-        break;
-    case M16_OPC_LHU:
-        gen_ld(ctx, OPC_LHU, ry, rx, offset);
-        break;
-    case M16_OPC_LWPC:
-        gen_ld(ctx, OPC_LWPC, rx, 0, offset);
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_LWU:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_ld(ctx, OPC_LWU, ry, rx, offset);
-        break;
-#endif
-    case M16_OPC_SB:
-        gen_st(ctx, OPC_SB, ry, rx, offset);
-        break;
-    case M16_OPC_SH:
-        gen_st(ctx, OPC_SH, ry, rx, offset);
-        break;
-    case M16_OPC_SWSP:
-        gen_st(ctx, OPC_SW, rx, 29, offset);
-        break;
-    case M16_OPC_SW:
-        gen_st(ctx, OPC_SW, ry, rx, offset);
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_I64:
-        decode_i64_mips16(ctx, ry, funct, offset, 1);
-        break;
-#endif
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    return 4;
-}
-#endif // !TARGET_CHERI
-
-static inline bool is_uhi(int sdbbp_code)
-{
-#ifdef CONFIG_USER_ONLY
-    return false;
-#else
-    return semihosting_enabled() && sdbbp_code == 1;
-#endif
-}
-
-#ifdef CONFIG_USER_ONLY
-/* The above should dead-code away any calls to this..*/
-static inline void gen_helper_do_semihosting(void *env)
-{
-    g_assert_not_reached();
-}
-#endif
-
-#ifndef TARGET_CHERI /* MIPS16 not supported */
-
-static int decode_mips16_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rx, ry;
-    int sa;
-    int op, cnvt_op, op1, offset;
-    int funct;
-    int n_bytes;
-
-    op = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 2) & 0x7;
-    sa = sa == 0 ? 8 : sa;
-    rx = xlat((ctx->opcode >> 8) & 0x7);
-    cnvt_op = (ctx->opcode >> 5) & 0x7;
-    ry = xlat((ctx->opcode >> 5) & 0x7);
-    op1 = offset = ctx->opcode & 0x1f;
-
-    n_bytes = 2;
-
-    switch (op) {
-    case M16_OPC_ADDIUSP:
-        {
-            int16_t imm = ((uint8_t) ctx->opcode) << 2;
-
-            gen_arith_imm(ctx, OPC_ADDIU, rx, 29, imm);
-        }
-        break;
-    case M16_OPC_ADDIUPC:
-        gen_addiupc(ctx, rx, ((uint8_t) ctx->opcode) << 2, 0, 0);
-        break;
-    case M16_OPC_B:
-        offset = (ctx->opcode & 0x7ff) << 1;
-        offset = (int16_t)(offset << 4) >> 4;
-        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0, offset, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_JAL:
-        offset = translator_lduw(env, ctx->base.pc_next + 2);
-        offset = (((ctx->opcode & 0x1f) << 21)
-                  | ((ctx->opcode >> 5) & 0x1f) << 16
-                  | offset) << 2;
-        op = ((ctx->opcode >> 10) & 0x1) ? OPC_JALX : OPC_JAL;
-        gen_compute_branch(ctx, op, 4, rx, ry, offset, 2);
-        n_bytes = 4;
-        break;
-    case M16_OPC_BEQZ:
-        gen_compute_branch(ctx, OPC_BEQ, 2, rx, 0,
-                           ((int8_t)ctx->opcode) << 1, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_BNEQZ:
-        gen_compute_branch(ctx, OPC_BNE, 2, rx, 0,
-                           ((int8_t)ctx->opcode) << 1, 0);
-        /* No delay slot, so just process as a normal instruction */
-        break;
-    case M16_OPC_SHIFT:
-        switch (ctx->opcode & 0x3) {
-        case 0x0:
-            gen_shift_imm(ctx, OPC_SLL, rx, ry, sa);
-            break;
-        case 0x1:
-#if defined(TARGET_MIPS64)
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, OPC_DSLL, rx, ry, sa);
-#else
-            gen_reserved_instruction(ctx);
-#endif
-            break;
-        case 0x2:
-            gen_shift_imm(ctx, OPC_SRL, rx, ry, sa);
-            break;
-        case 0x3:
-            gen_shift_imm(ctx, OPC_SRA, rx, ry, sa);
-            break;
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_LD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_ld(ctx, OPC_LD, ry, rx, offset << 3);
-        break;
-#endif
-    case M16_OPC_RRIA:
-        {
-            int16_t imm = (int8_t)((ctx->opcode & 0xf) << 4) >> 4;
-
-            if ((ctx->opcode >> 4) & 1) {
-#if defined(TARGET_MIPS64)
-                check_insn(ctx, ISA_MIPS3);
-                check_mips_64(ctx);
-                gen_arith_imm(ctx, OPC_DADDIU, ry, rx, imm);
-#else
-                gen_reserved_instruction(ctx);
-#endif
-            } else {
-                gen_arith_imm(ctx, OPC_ADDIU, ry, rx, imm);
-            }
-        }
-        break;
-    case M16_OPC_ADDIU8:
-        {
-            int16_t imm = (int8_t) ctx->opcode;
-
-            gen_arith_imm(ctx, OPC_ADDIU, rx, rx, imm);
-        }
-        break;
-    case M16_OPC_SLTI:
-        {
-            int16_t imm = (uint8_t) ctx->opcode;
-            gen_slt_imm(ctx, OPC_SLTI, 24, rx, imm);
-        }
-        break;
-    case M16_OPC_SLTIU:
-        {
-            int16_t imm = (uint8_t) ctx->opcode;
-            gen_slt_imm(ctx, OPC_SLTIU, 24, rx, imm);
-        }
-        break;
-    case M16_OPC_I8:
-        {
-            int reg32;
-
-            funct = (ctx->opcode >> 8) & 0x7;
-            switch (funct) {
-            case I8_BTEQZ:
-                gen_compute_branch(ctx, OPC_BEQ, 2, 24, 0,
-                                   ((int8_t)ctx->opcode) << 1, 0);
-                break;
-            case I8_BTNEZ:
-                gen_compute_branch(ctx, OPC_BNE, 2, 24, 0,
-                                   ((int8_t)ctx->opcode) << 1, 0);
-                break;
-            case I8_SWRASP:
-                gen_st(ctx, OPC_SW, 31, 29, (ctx->opcode & 0xff) << 2);
-                break;
-            case I8_ADJSP:
-                gen_arith_imm(ctx, OPC_ADDIU, 29, 29,
-                              ((int8_t)ctx->opcode) << 3);
-                break;
-            case I8_SVRS:
-                check_insn(ctx, ISA_MIPS_R1);
-                {
-                    int do_ra = ctx->opcode & (1 << 6);
-                    int do_s0 = ctx->opcode & (1 << 5);
-                    int do_s1 = ctx->opcode & (1 << 4);
-                    int framesize = ctx->opcode & 0xf;
-
-                    if (framesize == 0) {
-                        framesize = 128;
-                    } else {
-                        framesize = framesize << 3;
-                    }
-
-                    if (ctx->opcode & (1 << 7)) {
-                        gen_mips16_save(ctx, 0, 0,
-                                        do_ra, do_s0, do_s1, framesize);
-                    } else {
-                        gen_mips16_restore(ctx, 0, 0,
-                                           do_ra, do_s0, do_s1, framesize);
-                    }
-                }
-                break;
-            case I8_MOV32R:
-                {
-                    int rz = xlat(ctx->opcode & 0x7);
-
-                    reg32 = (((ctx->opcode >> 3) & 0x3) << 3) |
-                        ((ctx->opcode >> 5) & 0x7);
-                    gen_arith(ctx, OPC_ADDU, reg32, rz, 0);
-                }
-                break;
-            case I8_MOVR32:
-                reg32 = ctx->opcode & 0x1f;
-                gen_arith(ctx, OPC_ADDU, ry, reg32, 0);
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case M16_OPC_LI:
-        {
-            int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_arith_imm(ctx, OPC_ADDIU, rx, 0, imm);
-        }
-        break;
-    case M16_OPC_CMPI:
-        {
-            int16_t imm = (uint8_t) ctx->opcode;
-            gen_logic_imm(ctx, OPC_XORI, 24, rx, imm);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_SD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_st(ctx, OPC_SD, ry, rx, offset << 3);
-        break;
-#endif
-    case M16_OPC_LB:
-        gen_ld(ctx, OPC_LB, ry, rx, offset);
-        break;
-    case M16_OPC_LH:
-        gen_ld(ctx, OPC_LH, ry, rx, offset << 1);
-        break;
-    case M16_OPC_LWSP:
-        gen_ld(ctx, OPC_LW, rx, 29, ((uint8_t)ctx->opcode) << 2);
-        break;
-    case M16_OPC_LW:
-        gen_ld(ctx, OPC_LW, ry, rx, offset << 2);
-        break;
-    case M16_OPC_LBU:
-        gen_ld(ctx, OPC_LBU, ry, rx, offset);
-        break;
-    case M16_OPC_LHU:
-        gen_ld(ctx, OPC_LHU, ry, rx, offset << 1);
-        break;
-    case M16_OPC_LWPC:
-        gen_ld(ctx, OPC_LWPC, rx, 0, ((uint8_t)ctx->opcode) << 2);
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_LWU:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_ld(ctx, OPC_LWU, ry, rx, offset << 2);
-        break;
-#endif
-    case M16_OPC_SB:
-        gen_st(ctx, OPC_SB, ry, rx, offset);
-        break;
-    case M16_OPC_SH:
-        gen_st(ctx, OPC_SH, ry, rx, offset << 1);
-        break;
-    case M16_OPC_SWSP:
-        gen_st(ctx, OPC_SW, rx, 29, ((uint8_t)ctx->opcode) << 2);
-        break;
-    case M16_OPC_SW:
-        gen_st(ctx, OPC_SW, ry, rx, offset << 2);
-        break;
-    case M16_OPC_RRR:
-        {
-            int rz = xlat((ctx->opcode >> 2) & 0x7);
-            int mips32_op;
-
-            switch (ctx->opcode & 0x3) {
-            case RRR_ADDU:
-                mips32_op = OPC_ADDU;
-                break;
-            case RRR_SUBU:
-                mips32_op = OPC_SUBU;
-                break;
-#if defined(TARGET_MIPS64)
-            case RRR_DADDU:
-                mips32_op = OPC_DADDU;
-                check_insn(ctx, ISA_MIPS3);
-                check_mips_64(ctx);
-                break;
-            case RRR_DSUBU:
-                mips32_op = OPC_DSUBU;
-                check_insn(ctx, ISA_MIPS3);
-                check_mips_64(ctx);
-                break;
-#endif
-            default:
-                gen_reserved_instruction(ctx);
-                goto done;
-            }
-
-            gen_arith(ctx, mips32_op, rz, rx, ry);
-        done:
-            ;
-        }
-        break;
-    case M16_OPC_RR:
-        switch (op1) {
-        case RR_JR:
-            {
-                int nd = (ctx->opcode >> 7) & 0x1;
-                int link = (ctx->opcode >> 6) & 0x1;
-                int ra = (ctx->opcode >> 5) & 0x1;
-
-                if (nd) {
-                    check_insn(ctx, ISA_MIPS_R1);
-                }
-
-                if (link) {
-                    op = OPC_JALR;
-                } else {
-                    op = OPC_JR;
-                }
-
-                gen_compute_branch(ctx, op, 2, ra ? 31 : rx, 31, 0,
-                                   (nd ? 0 : 2));
-            }
-            break;
-        case RR_SDBBP:
-            if (is_uhi(extract32(ctx->opcode, 5, 6))) {
-                gen_helper_do_semihosting(cpu_env);
-            } else {
-                /*
-                 * XXX: not clear which exception should be raised
-                 *      when in debug mode...
-                 */
-                check_insn(ctx, ISA_MIPS_R1);
-                generate_exception_end(ctx, EXCP_DBp);
-            }
-            break;
-        case RR_SLT:
-            gen_slt(ctx, OPC_SLT, 24, rx, ry);
-            break;
-        case RR_SLTU:
-            gen_slt(ctx, OPC_SLTU, 24, rx, ry);
-            break;
-        case RR_BREAK:
-            generate_exception_end(ctx, EXCP_BREAK);
-            break;
-        case RR_SLLV:
-            gen_shift(ctx, OPC_SLLV, ry, rx, ry);
-            break;
-        case RR_SRLV:
-            gen_shift(ctx, OPC_SRLV, ry, rx, ry);
-            break;
-        case RR_SRAV:
-            gen_shift(ctx, OPC_SRAV, ry, rx, ry);
-            break;
-#if defined(TARGET_MIPS64)
-        case RR_DSRL:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, OPC_DSRL, ry, ry, sa);
-            break;
-#endif
-        case RR_CMP:
-            gen_logic(ctx, OPC_XOR, 24, rx, ry);
-            break;
-        case RR_NEG:
-            gen_arith(ctx, OPC_SUBU, rx, 0, ry);
-            break;
-        case RR_AND:
-            gen_logic(ctx, OPC_AND, rx, rx, ry);
-            break;
-        case RR_OR:
-            gen_logic(ctx, OPC_OR, rx, rx, ry);
-            break;
-        case RR_XOR:
-            gen_logic(ctx, OPC_XOR, rx, rx, ry);
-            break;
-        case RR_NOT:
-            gen_logic(ctx, OPC_NOR, rx, ry, 0);
-            break;
-        case RR_MFHI:
-            gen_HILO(ctx, OPC_MFHI, 0, rx);
-            break;
-        case RR_CNVT:
-            check_insn(ctx, ISA_MIPS_R1);
-            switch (cnvt_op) {
-            case RR_RY_CNVT_ZEB:
-                tcg_gen_ext8u_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-            case RR_RY_CNVT_ZEH:
-                tcg_gen_ext16u_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-            case RR_RY_CNVT_SEB:
-                tcg_gen_ext8s_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-            case RR_RY_CNVT_SEH:
-                tcg_gen_ext16s_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-#if defined(TARGET_MIPS64)
-            case RR_RY_CNVT_ZEW:
-                check_insn(ctx, ISA_MIPS_R1);
-                check_mips_64(ctx);
-                tcg_gen_ext32u_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-            case RR_RY_CNVT_SEW:
-                check_insn(ctx, ISA_MIPS_R1);
-                check_mips_64(ctx);
-                tcg_gen_ext32s_tl(cpu_gpr[rx], cpu_gpr[rx]);
-                break;
-#endif
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            gen_log_instr_gpr_update(ctx, rx);
-            break;
-        case RR_MFLO:
-            gen_HILO(ctx, OPC_MFLO, 0, rx);
-            break;
-#if defined(TARGET_MIPS64)
-        case RR_DSRA:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, OPC_DSRA, ry, ry, sa);
-            break;
-        case RR_DSLLV:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift(ctx, OPC_DSLLV, ry, rx, ry);
-            break;
-        case RR_DSRLV:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift(ctx, OPC_DSRLV, ry, rx, ry);
-            break;
-        case RR_DSRAV:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift(ctx, OPC_DSRAV, ry, rx, ry);
-            break;
-#endif
-        case RR_MULT:
-            gen_muldiv(ctx, OPC_MULT, 0, rx, ry);
-            break;
-        case RR_MULTU:
-            gen_muldiv(ctx, OPC_MULTU, 0, rx, ry);
-            break;
-        case RR_DIV:
-            gen_muldiv(ctx, OPC_DIV, 0, rx, ry);
-            break;
-        case RR_DIVU:
-            gen_muldiv(ctx, OPC_DIVU, 0, rx, ry);
-            break;
-#if defined(TARGET_MIPS64)
-        case RR_DMULT:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_muldiv(ctx, OPC_DMULT, 0, rx, ry);
-            break;
-        case RR_DMULTU:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_muldiv(ctx, OPC_DMULTU, 0, rx, ry);
-            break;
-        case RR_DDIV:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_muldiv(ctx, OPC_DDIV, 0, rx, ry);
-            break;
-        case RR_DDIVU:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_muldiv(ctx, OPC_DDIVU, 0, rx, ry);
-            break;
-#endif
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case M16_OPC_EXTEND:
-#ifdef TARGET_CHERI
-        generate_exception_end(ctx, EXCP_RI); // Not supported, needs more changes
-#else
-        decode_extended_mips16_opc(env, ctx);
-        n_bytes = 4;
-#endif
-        break;
-#if defined(TARGET_MIPS64)
-    case M16_OPC_I64:
-        funct = (ctx->opcode >> 8) & 0x7;
-        decode_i64_mips16(ctx, ry, funct, offset, 0);
-        break;
-#endif
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    return n_bytes;
-}
-
-#endif
-/* microMIPS extension to MIPS32/MIPS64 */
-
-/*
- * microMIPS32/microMIPS64 major opcodes
- *
- * 1. MIPS Architecture for Programmers Volume II-B:
- *      The microMIPS32 Instruction Set (Revision 3.05)
- *
- *    Table 6.2 microMIPS32 Encoding of Major Opcode Field
- *
- * 2. MIPS Architecture For Programmers Volume II-A:
- *      The MIPS64 Instruction Set (Revision 3.51)
- */
-
-enum {
-    POOL32A = 0x00,
-    POOL16A = 0x01,
-    LBU16 = 0x02,
-    MOVE16 = 0x03,
-    ADDI32 = 0x04,
-    R6_LUI = 0x04,
-    AUI = 0x04,
-    LBU32 = 0x05,
-    SB32 = 0x06,
-    LB32 = 0x07,
-
-    POOL32B = 0x08,
-    POOL16B = 0x09,
-    LHU16 = 0x0a,
-    ANDI16 = 0x0b,
-    ADDIU32 = 0x0c,
-    LHU32 = 0x0d,
-    SH32 = 0x0e,
-    LH32 = 0x0f,
-
-    POOL32I = 0x10,
-    POOL16C = 0x11,
-    LWSP16 = 0x12,
-    POOL16D = 0x13,
-    ORI32 = 0x14,
-    POOL32F = 0x15,
-    POOL32S = 0x16,  /* MIPS64 */
-    DADDIU32 = 0x17, /* MIPS64 */
-
-    POOL32C = 0x18,
-    LWGP16 = 0x19,
-    LW16 = 0x1a,
-    POOL16E = 0x1b,
-    XORI32 = 0x1c,
-    JALS32 = 0x1d,
-    BOVC = 0x1d,
-    BEQC = 0x1d,
-    BEQZALC = 0x1d,
-    ADDIUPC = 0x1e,
-    PCREL = 0x1e,
-    BNVC = 0x1f,
-    BNEC = 0x1f,
-    BNEZALC = 0x1f,
-
-    R6_BEQZC = 0x20,
-    JIC = 0x20,
-    POOL16F = 0x21,
-    SB16 = 0x22,
-    BEQZ16 = 0x23,
-    BEQZC16 = 0x23,
-    SLTI32 = 0x24,
-    BEQ32 = 0x25,
-    BC = 0x25,
-    SWC132 = 0x26,
-    LWC132 = 0x27,
-
-    /* 0x29 is reserved */
-    RES_29 = 0x29,
-    R6_BNEZC = 0x28,
-    JIALC = 0x28,
-    SH16 = 0x2a,
-    BNEZ16 = 0x2b,
-    BNEZC16 = 0x2b,
-    SLTIU32 = 0x2c,
-    BNE32 = 0x2d,
-    BALC = 0x2d,
-    SDC132 = 0x2e,
-    LDC132 = 0x2f,
-
-    /* 0x31 is reserved */
-    RES_31 = 0x31,
-    BLEZALC = 0x30,
-    BGEZALC = 0x30,
-    BGEUC = 0x30,
-    SWSP16 = 0x32,
-    B16 = 0x33,
-    BC16 = 0x33,
-    ANDI32 = 0x34,
-    J32 = 0x35,
-    BGTZC = 0x35,
-    BLTZC = 0x35,
-    BLTC = 0x35,
-    SD32 = 0x36, /* MIPS64 */
-    LD32 = 0x37, /* MIPS64 */
-
-    /* 0x39 is reserved */
-    RES_39 = 0x39,
-    BGTZALC = 0x38,
-    BLTZALC = 0x38,
-    BLTUC = 0x38,
-    SW16 = 0x3a,
-    LI16 = 0x3b,
-    JALX32 = 0x3c,
-    JAL32 = 0x3d,
-    BLEZC = 0x3d,
-    BGEZC = 0x3d,
-    BGEC = 0x3d,
-    SW32 = 0x3e,
-    LW32 = 0x3f
-};
-
-/* PCREL Instructions perform PC-Relative address calculation. bits 20..16 */
-enum {
-    ADDIUPC_00 = 0x00,
-    ADDIUPC_01 = 0x01,
-    ADDIUPC_02 = 0x02,
-    ADDIUPC_03 = 0x03,
-    ADDIUPC_04 = 0x04,
-    ADDIUPC_05 = 0x05,
-    ADDIUPC_06 = 0x06,
-    ADDIUPC_07 = 0x07,
-    AUIPC = 0x1e,
-    ALUIPC = 0x1f,
-    LWPC_08 = 0x08,
-    LWPC_09 = 0x09,
-    LWPC_0A = 0x0A,
-    LWPC_0B = 0x0B,
-    LWPC_0C = 0x0C,
-    LWPC_0D = 0x0D,
-    LWPC_0E = 0x0E,
-    LWPC_0F = 0x0F,
-};
-
-/* POOL32A encoding of minor opcode field */
-
-enum {
-    /*
-     * These opcodes are distinguished only by bits 9..6; those bits are
-     * what are recorded below.
-     */
-    SLL32 = 0x0,
-    SRL32 = 0x1,
-    SRA = 0x2,
-    ROTR = 0x3,
-    SELEQZ = 0x5,
-    SELNEZ = 0x6,
-    R6_RDHWR = 0x7,
-
-    SLLV = 0x0,
-    SRLV = 0x1,
-    SRAV = 0x2,
-    ROTRV = 0x3,
-    ADD = 0x4,
-    ADDU32 = 0x5,
-    SUB = 0x6,
-    SUBU32 = 0x7,
-    MUL = 0x8,
-    AND = 0x9,
-    OR32 = 0xa,
-    NOR = 0xb,
-    XOR32 = 0xc,
-    SLT = 0xd,
-    SLTU = 0xe,
-
-    MOVN = 0x0,
-    R6_MUL  = 0x0,
-    MOVZ = 0x1,
-    MUH  = 0x1,
-    MULU = 0x2,
-    MUHU = 0x3,
-    LWXS = 0x4,
-    R6_DIV  = 0x4,
-    MOD  = 0x5,
-    R6_DIVU = 0x6,
-    MODU = 0x7,
-
-    /* The following can be distinguished by their lower 6 bits. */
-    BREAK32 = 0x07,
-    INS = 0x0c,
-    LSA = 0x0f,
-    ALIGN = 0x1f,
-    EXT = 0x2c,
-    POOL32AXF = 0x3c,
-    SIGRIE = 0x3f
-};
-
-/* POOL32AXF encoding of minor opcode field extension */
-
-/*
- * 1. MIPS Architecture for Programmers Volume II-B:
- *      The microMIPS32 Instruction Set (Revision 3.05)
- *
- *    Table 6.5 POOL32Axf Encoding of Minor Opcode Extension Field
- *
- * 2. MIPS Architecture for Programmers VolumeIV-e:
- *      The MIPS DSP Application-Specific Extension
- *        to the microMIPS32 Architecture (Revision 2.34)
- *
- *    Table 5.5 POOL32Axf Encoding of Minor Opcode Extension Field
- */
-
-enum {
-    /* bits 11..6 */
-    TEQ = 0x00,
-    TGE = 0x08,
-    TGEU = 0x10,
-    TLT = 0x20,
-    TLTU = 0x28,
-    TNE = 0x30,
-
-    MFC0 = 0x03,
-    MTC0 = 0x0b,
-
-    /* begin of microMIPS32 DSP */
-
-    /* bits 13..12 for 0x01 */
-    MFHI_ACC = 0x0,
-    MFLO_ACC = 0x1,
-    MTHI_ACC = 0x2,
-    MTLO_ACC = 0x3,
-
-    /* bits 13..12 for 0x2a */
-    MADD_ACC = 0x0,
-    MADDU_ACC = 0x1,
-    MSUB_ACC = 0x2,
-    MSUBU_ACC = 0x3,
-
-    /* bits 13..12 for 0x32 */
-    MULT_ACC = 0x0,
-    MULTU_ACC = 0x1,
-
-    /* end of microMIPS32 DSP */
-
-    /* bits 15..12 for 0x2c */
-    BITSWAP = 0x0,
-    SEB = 0x2,
-    SEH = 0x3,
-    CLO = 0x4,
-    CLZ = 0x5,
-    RDHWR = 0x6,
-    WSBH = 0x7,
-    MULT = 0x8,
-    MULTU = 0x9,
-    DIV = 0xa,
-    DIVU = 0xb,
-    MADD = 0xc,
-    MADDU = 0xd,
-    MSUB = 0xe,
-    MSUBU = 0xf,
-
-    /* bits 15..12 for 0x34 */
-    MFC2 = 0x4,
-    MTC2 = 0x5,
-    MFHC2 = 0x8,
-    MTHC2 = 0x9,
-    CFC2 = 0xc,
-    CTC2 = 0xd,
-
-    /* bits 15..12 for 0x3c */
-    JALR = 0x0,
-    JR = 0x0,                   /* alias */
-    JALRC = 0x0,
-    JRC = 0x0,
-    JALR_HB = 0x1,
-    JALRC_HB = 0x1,
-    JALRS = 0x4,
-    JALRS_HB = 0x5,
-
-    /* bits 15..12 for 0x05 */
-    RDPGPR = 0xe,
-    WRPGPR = 0xf,
-
-    /* bits 15..12 for 0x0d */
-    TLBP = 0x0,
-    TLBR = 0x1,
-    TLBWI = 0x2,
-    TLBWR = 0x3,
-    TLBINV = 0x4,
-    TLBINVF = 0x5,
-    WAIT = 0x9,
-    IRET = 0xd,
-    DERET = 0xe,
-    ERET = 0xf,
-
-    /* bits 15..12 for 0x15 */
-    DMT = 0x0,
-    DVPE = 0x1,
-    EMT = 0x2,
-    EVPE = 0x3,
-
-    /* bits 15..12 for 0x1d */
-    DI = 0x4,
-    EI = 0x5,
-
-    /* bits 15..12 for 0x2d */
-    SYNC = 0x6,
-    SYSCALL = 0x8,
-    SDBBP = 0xd,
-
-    /* bits 15..12 for 0x35 */
-    MFHI32 = 0x0,
-    MFLO32 = 0x1,
-    MTHI32 = 0x2,
-    MTLO32 = 0x3,
-};
-
-/* POOL32B encoding of minor opcode field (bits 15..12) */
-
-enum {
-    LWC2 = 0x0,
-    LWP = 0x1,
-    LDP = 0x4,
-    LWM32 = 0x5,
-    CACHE = 0x6,
-    LDM = 0x7,
-    SWC2 = 0x8,
-    SWP = 0x9,
-    SDP = 0xc,
-    SWM32 = 0xd,
-    SDM = 0xf
-};
-
-/* POOL32C encoding of minor opcode field (bits 15..12) */
-
-enum {
-    LWL = 0x0,
-    SWL = 0x8,
-    LWR = 0x1,
-    SWR = 0x9,
-    PREF = 0x2,
-    ST_EVA = 0xa,
-    LL = 0x3,
-    SC = 0xb,
-    LDL = 0x4,
-    SDL = 0xc,
-    LDR = 0x5,
-    SDR = 0xd,
-    LD_EVA = 0x6,
-    LWU = 0xe,
-    LLD = 0x7,
-    SCD = 0xf
-};
-
-/* POOL32C LD-EVA encoding of minor opcode field (bits 11..9) */
-
-enum {
-    LBUE = 0x0,
-    LHUE = 0x1,
-    LWLE = 0x2,
-    LWRE = 0x3,
-    LBE = 0x4,
-    LHE = 0x5,
-    LLE = 0x6,
-    LWE = 0x7,
-};
-
-/* POOL32C ST-EVA encoding of minor opcode field (bits 11..9) */
-
-enum {
-    SWLE = 0x0,
-    SWRE = 0x1,
-    PREFE = 0x2,
-    CACHEE = 0x3,
-    SBE = 0x4,
-    SHE = 0x5,
-    SCE = 0x6,
-    SWE = 0x7,
-};
-
-/* POOL32F encoding of minor opcode field (bits 5..0) */
-
-enum {
-    /* These are the bit 7..6 values */
-    ADD_FMT = 0x0,
-
-    SUB_FMT = 0x1,
-
-    MUL_FMT = 0x2,
-
-    DIV_FMT = 0x3,
-
-    /* These are the bit 8..6 values */
-    MOVN_FMT = 0x0,
-    RSQRT2_FMT = 0x0,
-    MOVF_FMT = 0x0,
-    RINT_FMT = 0x0,
-    SELNEZ_FMT = 0x0,
-
-    MOVZ_FMT = 0x1,
-    LWXC1 = 0x1,
-    MOVT_FMT = 0x1,
-    CLASS_FMT = 0x1,
-    SELEQZ_FMT = 0x1,
-
-    PLL_PS = 0x2,
-    SWXC1 = 0x2,
-    SEL_FMT = 0x2,
-
-    PLU_PS = 0x3,
-    LDXC1 = 0x3,
-
-    MOVN_FMT_04 = 0x4,
-    PUL_PS = 0x4,
-    SDXC1 = 0x4,
-    RECIP2_FMT = 0x4,
-
-    MOVZ_FMT_05 = 0x05,
-    PUU_PS = 0x5,
-    LUXC1 = 0x5,
-
-    CVT_PS_S = 0x6,
-    SUXC1 = 0x6,
-    ADDR_PS = 0x6,
-    PREFX = 0x6,
-    MADDF_FMT = 0x6,
-
-    MULR_PS = 0x7,
-    MSUBF_FMT = 0x7,
-
-    MADD_S = 0x01,
-    MADD_D = 0x09,
-    MADD_PS = 0x11,
-    ALNV_PS = 0x19,
-    MSUB_S = 0x21,
-    MSUB_D = 0x29,
-    MSUB_PS = 0x31,
-
-    NMADD_S = 0x02,
-    NMADD_D = 0x0a,
-    NMADD_PS = 0x12,
-    NMSUB_S = 0x22,
-    NMSUB_D = 0x2a,
-    NMSUB_PS = 0x32,
-
-    MIN_FMT = 0x3,
-    MAX_FMT = 0xb,
-    MINA_FMT = 0x23,
-    MAXA_FMT = 0x2b,
-    POOL32FXF = 0x3b,
-
-    CABS_COND_FMT = 0x1c,              /* MIPS3D */
-    C_COND_FMT = 0x3c,
-
-    CMP_CONDN_S = 0x5,
-    CMP_CONDN_D = 0x15
-};
-
-/* POOL32Fxf encoding of minor opcode extension field */
-
-enum {
-    CVT_L = 0x04,
-    RSQRT_FMT = 0x08,
-    FLOOR_L = 0x0c,
-    CVT_PW_PS = 0x1c,
-    CVT_W = 0x24,
-    SQRT_FMT = 0x28,
-    FLOOR_W = 0x2c,
-    CVT_PS_PW = 0x3c,
-    CFC1 = 0x40,
-    RECIP_FMT = 0x48,
-    CEIL_L = 0x4c,
-    CTC1 = 0x60,
-    CEIL_W = 0x6c,
-    MFC1 = 0x80,
-    CVT_S_PL = 0x84,
-    TRUNC_L = 0x8c,
-    MTC1 = 0xa0,
-    CVT_S_PU = 0xa4,
-    TRUNC_W = 0xac,
-    MFHC1 = 0xc0,
-    ROUND_L = 0xcc,
-    MTHC1 = 0xe0,
-    ROUND_W = 0xec,
-
-    MOV_FMT = 0x01,
-    MOVF = 0x05,
-    ABS_FMT = 0x0d,
-    RSQRT1_FMT = 0x1d,
-    MOVT = 0x25,
-    NEG_FMT = 0x2d,
-    CVT_D = 0x4d,
-    RECIP1_FMT = 0x5d,
-    CVT_S = 0x6d
-};
-
-/* POOL32I encoding of minor opcode field (bits 25..21) */
-
-enum {
-    BLTZ = 0x00,
-    BLTZAL = 0x01,
-    BGEZ = 0x02,
-    BGEZAL = 0x03,
-    BLEZ = 0x04,
-    BNEZC = 0x05,
-    BGTZ = 0x06,
-    BEQZC = 0x07,
-    TLTI = 0x08,
-    BC1EQZC = 0x08,
-    TGEI = 0x09,
-    BC1NEZC = 0x09,
-    TLTIU = 0x0a,
-    BC2EQZC = 0x0a,
-    TGEIU = 0x0b,
-    BC2NEZC = 0x0a,
-    TNEI = 0x0c,
-    R6_SYNCI = 0x0c,
-    LUI = 0x0d,
-    TEQI = 0x0e,
-    SYNCI = 0x10,
-    BLTZALS = 0x11,
-    BGEZALS = 0x13,
-    BC2F = 0x14,
-    BC2T = 0x15,
-    BPOSGE64 = 0x1a,
-    BPOSGE32 = 0x1b,
-    /* These overlap and are distinguished by bit16 of the instruction */
-    BC1F = 0x1c,
-    BC1T = 0x1d,
-    BC1ANY2F = 0x1c,
-    BC1ANY2T = 0x1d,
-    BC1ANY4F = 0x1e,
-    BC1ANY4T = 0x1f
-};
-
-/* POOL16A encoding of minor opcode field */
-
-enum {
-    ADDU16 = 0x0,
-    SUBU16 = 0x1
-};
-
-/* POOL16B encoding of minor opcode field */
-
-enum {
-    SLL16 = 0x0,
-    SRL16 = 0x1
-};
-
-/* POOL16C encoding of minor opcode field */
-
-enum {
-    NOT16 = 0x00,
-    XOR16 = 0x04,
-    AND16 = 0x08,
-    OR16 = 0x0c,
-    LWM16 = 0x10,
-    SWM16 = 0x14,
-    JR16 = 0x18,
-    JRC16 = 0x1a,
-    JALR16 = 0x1c,
-    JALR16S = 0x1e,
-    MFHI16 = 0x20,
-    MFLO16 = 0x24,
-    BREAK16 = 0x28,
-    SDBBP16 = 0x2c,
-    JRADDIUSP = 0x30
-};
-
-/* R6 POOL16C encoding of minor opcode field (bits 0..5) */
-
-enum {
-    R6_NOT16    = 0x00,
-    R6_AND16    = 0x01,
-    R6_LWM16    = 0x02,
-    R6_JRC16    = 0x03,
-    MOVEP       = 0x04,
-    MOVEP_05    = 0x05,
-    MOVEP_06    = 0x06,
-    MOVEP_07    = 0x07,
-    R6_XOR16    = 0x08,
-    R6_OR16     = 0x09,
-    R6_SWM16    = 0x0a,
-    JALRC16     = 0x0b,
-    MOVEP_0C    = 0x0c,
-    MOVEP_0D    = 0x0d,
-    MOVEP_0E    = 0x0e,
-    MOVEP_0F    = 0x0f,
-    JRCADDIUSP  = 0x13,
-    R6_BREAK16  = 0x1b,
-    R6_SDBBP16  = 0x3b
-};
-
-/* POOL16D encoding of minor opcode field */
-
-enum {
-    ADDIUS5 = 0x0,
-    ADDIUSP = 0x1
-};
-
-/* POOL16E encoding of minor opcode field */
-
-enum {
-    ADDIUR2 = 0x0,
-    ADDIUR1SP = 0x1
-};
-
-static int mmreg(int r)
-{
-    static const int map[] = { 16, 17, 2, 3, 4, 5, 6, 7 };
-
-    return map[r];
-}
-
-/* Used for 16-bit store instructions.  */
-static int mmreg2(int r)
-{
-    static const int map[] = { 0, 17, 2, 3, 4, 5, 6, 7 };
-
-    return map[r];
-}
-
-#define uMIPS_RD(op) ((op >> 7) & 0x7)
-#define uMIPS_RS(op) ((op >> 4) & 0x7)
-#define uMIPS_RS2(op) uMIPS_RS(op)
-#define uMIPS_RS1(op) ((op >> 1) & 0x7)
-#define uMIPS_RD5(op) ((op >> 5) & 0x1f)
-#define uMIPS_RS5(op) (op & 0x1f)
-
-/* Signed immediate */
-#define SIMM(op, start, width)                                          \
-    ((int32_t)(((op >> start) & ((~0U) >> (32 - width)))                \
-               << (32 - width))                                         \
-     >> (32 - width))
-/* Zero-extended immediate */
-#define ZIMM(op, start, width) ((op >> start) & ((~0U) >> (32 - width)))
-
-static void gen_addiur1sp(DisasContext *ctx)
-{
-    int rd = mmreg(uMIPS_RD(ctx->opcode));
-
-    gen_arith_imm(ctx, OPC_ADDIU, rd, 29, ((ctx->opcode >> 1) & 0x3f) << 2);
-}
-
-static void gen_addiur2(DisasContext *ctx)
-{
-    static const int decoded_imm[] = { 1, 4, 8, 12, 16, 20, 24, -1 };
-    int rd = mmreg(uMIPS_RD(ctx->opcode));
-    int rs = mmreg(uMIPS_RS(ctx->opcode));
-
-    gen_arith_imm(ctx, OPC_ADDIU, rd, rs, decoded_imm[ZIMM(ctx->opcode, 1, 3)]);
-}
-
-static void gen_addiusp(DisasContext *ctx)
-{
-    int encoded = ZIMM(ctx->opcode, 1, 9);
-    int decoded;
-
-    if (encoded <= 1) {
-        decoded = 256 + encoded;
-    } else if (encoded <= 255) {
-        decoded = encoded;
-    } else if (encoded <= 509) {
-        decoded = encoded - 512;
-    } else {
-        decoded = encoded - 768;
-    }
-
-    gen_arith_imm(ctx, OPC_ADDIU, 29, 29, decoded << 2);
-}
-
-static void gen_addius5(DisasContext *ctx)
-{
-    int imm = SIMM(ctx->opcode, 1, 4);
-    int rd = (ctx->opcode >> 5) & 0x1f;
-
-    gen_arith_imm(ctx, OPC_ADDIU, rd, rd, imm);
-}
-
-static void gen_andi16(DisasContext *ctx)
-{
-    static const int decoded_imm[] = { 128, 1, 2, 3, 4, 7, 8, 15, 16,
-                                 31, 32, 63, 64, 255, 32768, 65535 };
-    int rd = mmreg(uMIPS_RD(ctx->opcode));
-    int rs = mmreg(uMIPS_RS(ctx->opcode));
-    int encoded = ZIMM(ctx->opcode, 0, 4);
-
-    gen_logic_imm(ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]);
-}
-
-static void gen_ldst_multiple(DisasContext *ctx, uint32_t opc, int reglist,
-                              int base, int16_t offset)
-{
-    TCGv t0, t1;
-    TCGv_i32 t2;
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    t0 = tcg_temp_new();
-
-    gen_base_offset_addr(ctx, t0, base, offset);
-
-    t1 = tcg_const_tl(reglist);
-    t2 = tcg_const_i32(ctx->mem_idx);
-
-    save_cpu_state(ctx, 1);
-    switch (opc) {
-    case LWM32:
-        gen_helper_lwm(cpu_env, t0, t1, t2);
-        break;
-    case SWM32:
-        gen_helper_swm(cpu_env, t0, t1, t2);
-        break;
-#ifdef TARGET_MIPS64
-    case LDM:
-        gen_helper_ldm(cpu_env, t0, t1, t2);
-        break;
-    case SDM:
-        gen_helper_sdm(cpu_env, t0, t1, t2);
-        break;
-#endif
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free_i32(t2);
-}
-
-
-static void gen_pool16c_insn(DisasContext *ctx)
-{
-    int rd = mmreg((ctx->opcode >> 3) & 0x7);
-    int rs = mmreg(ctx->opcode & 0x7);
-
-    switch (((ctx->opcode) >> 4) & 0x3f) {
-    case NOT16 + 0:
-    case NOT16 + 1:
-    case NOT16 + 2:
-    case NOT16 + 3:
-        gen_logic(ctx, OPC_NOR, rd, rs, 0);
-        break;
-    case XOR16 + 0:
-    case XOR16 + 1:
-    case XOR16 + 2:
-    case XOR16 + 3:
-        gen_logic(ctx, OPC_XOR, rd, rd, rs);
-        break;
-    case AND16 + 0:
-    case AND16 + 1:
-    case AND16 + 2:
-    case AND16 + 3:
-        gen_logic(ctx, OPC_AND, rd, rd, rs);
-        break;
-    case OR16 + 0:
-    case OR16 + 1:
-    case OR16 + 2:
-    case OR16 + 3:
-        gen_logic(ctx, OPC_OR, rd, rd, rs);
-        break;
-    case LWM16 + 0:
-    case LWM16 + 1:
-    case LWM16 + 2:
-    case LWM16 + 3:
-        {
-            static const int lwm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
-            int offset = ZIMM(ctx->opcode, 0, 4);
-
-            gen_ldst_multiple(ctx, LWM32, lwm_convert[(ctx->opcode >> 4) & 0x3],
-                              29, offset << 2);
-        }
-        break;
-    case SWM16 + 0:
-    case SWM16 + 1:
-    case SWM16 + 2:
-    case SWM16 + 3:
-        {
-            static const int swm_convert[] = { 0x11, 0x12, 0x13, 0x14 };
-            int offset = ZIMM(ctx->opcode, 0, 4);
-
-            gen_ldst_multiple(ctx, SWM32, swm_convert[(ctx->opcode >> 4) & 0x3],
-                              29, offset << 2);
-        }
-        break;
-    case JR16 + 0:
-    case JR16 + 1:
-        {
-            int reg = ctx->opcode & 0x1f;
-
-            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 4);
-        }
-        break;
-    case JRC16 + 0:
-    case JRC16 + 1:
-        {
-            int reg = ctx->opcode & 0x1f;
-            gen_compute_branch(ctx, OPC_JR, 2, reg, 0, 0, 0);
-            /*
-             * Let normal delay slot handling in our caller take us
-             * to the branch target.
-             */
-        }
-        break;
-    case JALR16 + 0:
-    case JALR16 + 1:
-        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 4);
-        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-        break;
-    case JALR16S + 0:
-    case JALR16S + 1:
-        gen_compute_branch(ctx, OPC_JALR, 2, ctx->opcode & 0x1f, 31, 0, 2);
-        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-        break;
-    case MFHI16 + 0:
-    case MFHI16 + 1:
-        gen_HILO(ctx, OPC_MFHI, 0, uMIPS_RS5(ctx->opcode));
-        break;
-    case MFLO16 + 0:
-    case MFLO16 + 1:
-        gen_HILO(ctx, OPC_MFLO, 0, uMIPS_RS5(ctx->opcode));
-        break;
-    case BREAK16:
-        generate_exception_end(ctx, EXCP_BREAK);
-        break;
-    case SDBBP16:
-        if (is_uhi(extract32(ctx->opcode, 0, 4))) {
-            gen_helper_do_semihosting(cpu_env);
-        } else {
-            /*
-             * XXX: not clear which exception should be raised
-             *      when in debug mode...
-             */
-            check_insn(ctx, ISA_MIPS_R1);
-            generate_exception_end(ctx, EXCP_DBp);
-        }
-        break;
-    case JRADDIUSP + 0:
-    case JRADDIUSP + 1:
-        {
-            int imm = ZIMM(ctx->opcode, 0, 5);
-            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
-            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
-            /*
-             * Let normal delay slot handling in our caller take us
-             * to the branch target.
-             */
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static inline void gen_movep(DisasContext *ctx, int enc_dest, int enc_rt,
-                             int enc_rs)
-{
-    int rd, re;
-    static const int rd_enc[] = { 5, 5, 6, 4, 4, 4, 4, 4 };
-    static const int re_enc[] = { 6, 7, 7, 21, 22, 5, 6, 7 };
-    static const int rs_rt_enc[] = { 0, 17, 2, 3, 16, 18, 19, 20 };
-
-    rd = rd_enc[enc_dest];
-    re = re_enc[enc_dest];
-    gen_load_gpr(cpu_gpr[rd], rs_rt_enc[enc_rs]);
-    gen_log_instr_gpr_update(ctx, rd);
-    gen_load_gpr(cpu_gpr[re], rs_rt_enc[enc_rt]);
-    gen_log_instr_gpr_update(ctx, re);
-}
-
-static void gen_pool16c_r6_insn(DisasContext *ctx)
-{
-    int rt = mmreg((ctx->opcode >> 7) & 0x7);
-    int rs = mmreg((ctx->opcode >> 4) & 0x7);
-
-    switch (ctx->opcode & 0xf) {
-    case R6_NOT16:
-        gen_logic(ctx, OPC_NOR, rt, rs, 0);
-        break;
-    case R6_AND16:
-        gen_logic(ctx, OPC_AND, rt, rt, rs);
-        break;
-    case R6_LWM16:
-        {
-            int lwm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
-            int offset = extract32(ctx->opcode, 4, 4);
-            gen_ldst_multiple(ctx, LWM32, lwm_converted, 29, offset << 2);
-        }
-        break;
-    case R6_JRC16: /* JRCADDIUSP */
-        if ((ctx->opcode >> 4) & 1) {
-            /* JRCADDIUSP */
-            int imm = extract32(ctx->opcode, 5, 5);
-            gen_compute_branch(ctx, OPC_JR, 2, 31, 0, 0, 0);
-            gen_arith_imm(ctx, OPC_ADDIU, 29, 29, imm << 2);
-        } else {
-            /* JRC16 */
-            rs = extract32(ctx->opcode, 5, 5);
-            gen_compute_branch(ctx, OPC_JR, 2, rs, 0, 0, 0);
-        }
-        break;
-    case MOVEP:
-    case MOVEP_05:
-    case MOVEP_06:
-    case MOVEP_07:
-    case MOVEP_0C:
-    case MOVEP_0D:
-    case MOVEP_0E:
-    case MOVEP_0F:
-        {
-            int enc_dest = uMIPS_RD(ctx->opcode);
-            int enc_rt = uMIPS_RS2(ctx->opcode);
-            int enc_rs = (ctx->opcode & 3) | ((ctx->opcode >> 1) & 4);
-            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
-        }
-        break;
-    case R6_XOR16:
-        gen_logic(ctx, OPC_XOR, rt, rt, rs);
-        break;
-    case R6_OR16:
-        gen_logic(ctx, OPC_OR, rt, rt, rs);
-        break;
-    case R6_SWM16:
-        {
-            int swm_converted = 0x11 + extract32(ctx->opcode, 8, 2);
-            int offset = extract32(ctx->opcode, 4, 4);
-            gen_ldst_multiple(ctx, SWM32, swm_converted, 29, offset << 2);
-        }
-        break;
-    case JALRC16: /* BREAK16, SDBBP16 */
-        switch (ctx->opcode & 0x3f) {
-        case JALRC16:
-        case JALRC16 + 0x20:
-            /* JALRC16 */
-            gen_compute_branch(ctx, OPC_JALR, 2, (ctx->opcode >> 5) & 0x1f,
-                               31, 0, 0);
-            break;
-        case R6_BREAK16:
-            /* BREAK16 */
-            generate_exception(ctx, EXCP_BREAK);
-            break;
-        case R6_SDBBP16:
-            /* SDBBP16 */
-            if (is_uhi(extract32(ctx->opcode, 6, 4))) {
-                gen_helper_do_semihosting(cpu_env);
-            } else {
-                if (ctx->hflags & MIPS_HFLAG_SBRI) {
-                    generate_exception(ctx, EXCP_RI);
-                } else {
-                    generate_exception(ctx, EXCP_DBp);
-                }
-            }
-            break;
-        }
-        break;
-    default:
-        generate_exception(ctx, EXCP_RI);
-        break;
-    }
-}
-
-static void gen_ldxs(DisasContext *ctx, int base, int index, int rd, int opc)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, base);
-
-    if (index != 0) {
-        gen_load_gpr(t1, index);
-        tcg_gen_shli_tl(t1, t1, 2);
-        gen_op_addr_add(ctx, t0, t1, t0);
-    }
-    gen_ddc_interposed_ld_tl(ctx, t1, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
-    gen_store_gpr(t1, rd);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
-                          int base, int16_t offset)
-{
-    TCGv t0, t1;
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK || rd == 31) {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    TCGv_cap_checked_ptr ddc_adjusted = tcg_temp_new_cap_checked();
-
-    gen_base_offset_addr(ctx, t0, base, offset);
-
-    switch (opc) {
-    case LWP:
-        if (rd == base) {
-            gen_reserved_instruction(ctx);
-            return;
-        }
-        // FIXME: should LWP fail early or allow loading one word for $ddc errors?
-        generate_ddc_checked_load_ptr(ddc_adjusted, ctx, t0, memop_size(MO_TESL) * 2);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TESL);
-        gen_store_gpr(t1, rd);
-        tcg_gen_movi_tl(t1, 4);
-        gen_op_addr_add(ctx, (TCGv)ddc_adjusted, (TCGv)ddc_adjusted, t1);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TESL);
-        gen_store_gpr(t1, rd + 1);
-        break;
-    case SWP:
-        generate_ddc_checked_store_ptr(ddc_adjusted, ctx, t0, memop_size(MO_TESL) * 2);
-        gen_load_gpr(t1, rd);
-        tcg_gen_qemu_st_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEUL);
-        tcg_gen_movi_tl(t1, 4);
-        gen_op_addr_add(ctx, (TCGv)ddc_adjusted, (TCGv)ddc_adjusted, t1);
-        gen_load_gpr(t1, rd + 1);
-        tcg_gen_qemu_st_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEUL);
-        break;
-#ifdef TARGET_MIPS64
-    case LDP:
-        if (rd == base) {
-            gen_reserved_instruction(ctx);
-            return;
-        }
-        generate_ddc_checked_load_ptr(ddc_adjusted, ctx, t0, memop_size(MO_TEQ) * 2);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEQ);
-        gen_store_gpr(t1, rd);
-        tcg_gen_movi_tl(t1, 8);
-        gen_op_addr_add(ctx, (TCGv)ddc_adjusted, (TCGv)ddc_adjusted, t1);
-        tcg_gen_qemu_ld_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEQ);
-        gen_store_gpr(t1, rd + 1);
-        break;
-    case SDP:
-        generate_ddc_checked_store_ptr(ddc_adjusted, ctx, t0, memop_size(MO_TEQ) * 2);
-        gen_load_gpr(t1, rd);
-        tcg_gen_qemu_st_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEQ);
-        tcg_gen_movi_tl(t1, 8);
-        gen_op_addr_add(ctx, (TCGv)ddc_adjusted, (TCGv)ddc_adjusted, t1);
-        gen_load_gpr(t1, rd + 1);
-        tcg_gen_qemu_st_tl_with_checked_addr(t1, ddc_adjusted, ctx->mem_idx, MO_TEQ);
-        break;
-#endif
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free_cap_checked(ddc_adjusted);
-}
-
-static void gen_sync(int stype)
-{
-    TCGBar tcg_mo = TCG_BAR_SC;
-
-    switch (stype) {
-    case 0x4: /* SYNC_WMB */
-        tcg_mo |= TCG_MO_ST_ST;
-        break;
-    case 0x10: /* SYNC_MB */
-        tcg_mo |= TCG_MO_ALL;
-        break;
-    case 0x11: /* SYNC_ACQUIRE */
-        tcg_mo |= TCG_MO_LD_LD | TCG_MO_LD_ST;
-        break;
-    case 0x12: /* SYNC_RELEASE */
-        tcg_mo |= TCG_MO_ST_ST | TCG_MO_LD_ST;
-        break;
-    case 0x13: /* SYNC_RMB */
-        tcg_mo |= TCG_MO_LD_LD;
-        break;
-    default:
-        tcg_mo |= TCG_MO_ALL;
-        break;
-    }
-
-    tcg_gen_mb(tcg_mo);
-}
-
-static void gen_pool32axf(CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
-{
-    int extension = (ctx->opcode >> 6) & 0x3f;
-    int minor = (ctx->opcode >> 12) & 0xf;
-    uint32_t mips32_op;
-
-    switch (extension) {
-    case TEQ:
-        mips32_op = OPC_TEQ;
-        goto do_trap;
-    case TGE:
-        mips32_op = OPC_TGE;
-        goto do_trap;
-    case TGEU:
-        mips32_op = OPC_TGEU;
-        goto do_trap;
-    case TLT:
-        mips32_op = OPC_TLT;
-        goto do_trap;
-    case TLTU:
-        mips32_op = OPC_TLTU;
-        goto do_trap;
-    case TNE:
-        mips32_op = OPC_TNE;
-    do_trap:
-        gen_trap(ctx, mips32_op, rs, rt, -1);
-        break;
-#ifndef CONFIG_USER_ONLY
-    case MFC0:
-    case MFC0 + 32:
-        check_cp0_enabled(ctx);
-        if (rt == 0) {
-            /* Treat as NOP. */
-            break;
-        }
-        gen_mfc0(ctx, cpu_gpr[rt], rs, (ctx->opcode >> 11) & 0x7);
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-    case MTC0:
-    case MTC0 + 32:
-        check_cp0_enabled(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_mtc0(ctx, t0, rs, (ctx->opcode >> 11) & 0x7);
-            tcg_temp_free(t0);
-        }
-        break;
-#endif
-    case 0x2a:
-        switch (minor & 3) {
-        case MADD_ACC:
-            gen_muldiv(ctx, OPC_MADD, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        case MADDU_ACC:
-            gen_muldiv(ctx, OPC_MADDU, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        case MSUB_ACC:
-            gen_muldiv(ctx, OPC_MSUB, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        case MSUBU_ACC:
-            gen_muldiv(ctx, OPC_MSUBU, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x32:
-        switch (minor & 3) {
-        case MULT_ACC:
-            gen_muldiv(ctx, OPC_MULT, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        case MULTU_ACC:
-            gen_muldiv(ctx, OPC_MULTU, (ctx->opcode >> 14) & 3, rs, rt);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x2c:
-        switch (minor) {
-        case BITSWAP:
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_bitswap(ctx, OPC_BITSWAP, rs, rt);
-            break;
-        case SEB:
-            gen_bshfl(ctx, OPC_SEB, rs, rt);
-            break;
-        case SEH:
-            gen_bshfl(ctx, OPC_SEH, rs, rt);
-            break;
-        case CLO:
-            mips32_op = OPC_CLO;
-            goto do_cl;
-        case CLZ:
-            mips32_op = OPC_CLZ;
-        do_cl:
-            check_insn(ctx, ISA_MIPS_R1);
-            gen_cl(ctx, mips32_op, rt, rs);
-            break;
-        case RDHWR:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_rdhwr(ctx, rt, rs, 0);
-            break;
-        case WSBH:
-            gen_bshfl(ctx, OPC_WSBH, rs, rt);
-            break;
-        case MULT:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MULT;
-            goto do_mul;
-        case MULTU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MULTU;
-            goto do_mul;
-        case DIV:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_DIV;
-            goto do_div;
-        case DIVU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_DIVU;
-            goto do_div;
-        do_div:
-            check_insn(ctx, ISA_MIPS_R1);
-            gen_muldiv(ctx, mips32_op, 0, rs, rt);
-            break;
-        case MADD:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MADD;
-            goto do_mul;
-        case MADDU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MADDU;
-            goto do_mul;
-        case MSUB:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MSUB;
-            goto do_mul;
-        case MSUBU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_MSUBU;
-        do_mul:
-            check_insn(ctx, ISA_MIPS_R1);
-            gen_muldiv(ctx, mips32_op, 0, rs, rt);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x34:
-        switch (minor) {
-        case MFC2:
-        case MTC2:
-        case MFHC2:
-        case MTHC2:
-        case CFC2:
-        case CTC2:
-            generate_exception_err(ctx, EXCP_CpU, 2);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x3c:
-        switch (minor) {
-        case JALR:    /* JALRC */
-        case JALR_HB: /* JALRC_HB */
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /* JALRC, JALRC_HB */
-                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 0);
-            } else {
-                /* JALR, JALR_HB */
-                gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 4);
-                ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            }
-            break;
-        case JALRS:
-        case JALRS_HB:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_JALR, 4, rs, rt, 0, 2);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x05:
-        switch (minor) {
-        case RDPGPR:
-            check_cp0_enabled(ctx);
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_load_srsgpr(ctx, rs, rt);
-            break;
-        case WRPGPR:
-            check_cp0_enabled(ctx);
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_store_srsgpr(rs, rt);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-#ifndef CONFIG_USER_ONLY
-    case 0x0d:
-        switch (minor) {
-        case TLBP:
-            mips32_op = OPC_TLBP;
-            goto do_cp0;
-        case TLBR:
-            mips32_op = OPC_TLBR;
-            goto do_cp0;
-        case TLBWI:
-            mips32_op = OPC_TLBWI;
-            goto do_cp0;
-        case TLBWR:
-            mips32_op = OPC_TLBWR;
-            goto do_cp0;
-        case TLBINV:
-            mips32_op = OPC_TLBINV;
-            goto do_cp0;
-        case TLBINVF:
-            mips32_op = OPC_TLBINVF;
-            goto do_cp0;
-        case WAIT:
-            mips32_op = OPC_WAIT;
-            goto do_cp0;
-        case DERET:
-            mips32_op = OPC_DERET;
-            goto do_cp0;
-        case ERET:
-            mips32_op = OPC_ERET;
-        do_cp0:
-            gen_cp0(env, ctx, mips32_op, rt, rs);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x1d:
-        switch (minor) {
-        case DI:
-            check_cp0_enabled(ctx);
-            {
-                TCGv t0 = tcg_temp_new();
-
-                save_cpu_state(ctx, 1);
-                gen_helper_di(t0, cpu_env);
-                gen_store_gpr(t0, rs);
-                /*
-                 * Stop translation as we may have switched the execution
-                 * mode.
-                 */
-                ctx->base.is_jmp = DISAS_STOP;
-                tcg_temp_free(t0);
-            }
-            break;
-        case EI:
-            check_cp0_enabled(ctx);
-            {
-                TCGv t0 = tcg_temp_new();
-
-                save_cpu_state(ctx, 1);
-                gen_helper_ei(t0, cpu_env);
-                gen_store_gpr(t0, rs);
-                /*
-                 * DISAS_STOP isn't sufficient, we need to ensure we break out
-                 * of translated code to check for pending interrupts.
-                 */
-                gen_save_pc(ctx->base.pc_next + 4);
-                ctx->base.is_jmp = DISAS_EXIT;
-                tcg_temp_free(t0);
-            }
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-#endif
-    case 0x2d:
-        switch (minor) {
-        case SYNC:
-            gen_sync(extract32(ctx->opcode, 16, 5));
-            break;
-        case SYSCALL:
-            generate_exception_end(ctx, EXCP_SYSCALL);
-            break;
-        case SDBBP:
-            if (is_uhi(extract32(ctx->opcode, 16, 10))) {
-                gen_helper_do_semihosting(cpu_env);
-            } else {
-                check_insn(ctx, ISA_MIPS_R1);
-                if (ctx->hflags & MIPS_HFLAG_SBRI) {
-                    gen_reserved_instruction(ctx);
-                } else {
-                    generate_exception_end(ctx, EXCP_DBp);
-                }
-            }
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x01:
-        switch (minor & 3) {
-        case MFHI_ACC:
-            gen_HILO(ctx, OPC_MFHI, minor >> 2, rs);
-            break;
-        case MFLO_ACC:
-            gen_HILO(ctx, OPC_MFLO, minor >> 2, rs);
-            break;
-        case MTHI_ACC:
-            gen_HILO(ctx, OPC_MTHI, minor >> 2, rs);
-            break;
-        case MTLO_ACC:
-            gen_HILO(ctx, OPC_MTLO, minor >> 2, rs);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    case 0x35:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        switch (minor) {
-        case MFHI32:
-            gen_HILO(ctx, OPC_MFHI, 0, rs);
-            break;
-        case MFLO32:
-            gen_HILO(ctx, OPC_MFLO, 0, rs);
-            break;
-        case MTHI32:
-            gen_HILO(ctx, OPC_MTHI, 0, rs);
-            break;
-        case MTLO32:
-            gen_HILO(ctx, OPC_MTLO, 0, rs);
-            break;
-        default:
-            goto pool32axf_invalid;
-        }
-        break;
-    default:
-    pool32axf_invalid:
-        MIPS_INVAL("pool32axf");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/*
- * Values for microMIPS fmt field.  Variable-width, depending on which
- * formats the instruction supports.
- */
-enum {
-    FMT_SD_S = 0,
-    FMT_SD_D = 1,
-
-    FMT_SDPS_S = 0,
-    FMT_SDPS_D = 1,
-    FMT_SDPS_PS = 2,
-
-    FMT_SWL_S = 0,
-    FMT_SWL_W = 1,
-    FMT_SWL_L = 2,
-
-    FMT_DWL_D = 0,
-    FMT_DWL_W = 1,
-    FMT_DWL_L = 2
-};
-
-static void gen_pool32fxf(DisasContext *ctx, int rt, int rs)
-{
-    int extension = (ctx->opcode >> 6) & 0x3ff;
-    uint32_t mips32_op;
-
-#define FLOAT_1BIT_FMT(opc, fmt)    ((fmt << 8) | opc)
-#define FLOAT_2BIT_FMT(opc, fmt)    ((fmt << 7) | opc)
-#define COND_FLOAT_MOV(opc, cond)   ((cond << 7) | opc)
-
-    switch (extension) {
-    case FLOAT_1BIT_FMT(CFC1, 0):
-        mips32_op = OPC_CFC1;
-        goto do_cp1;
-    case FLOAT_1BIT_FMT(CTC1, 0):
-        mips32_op = OPC_CTC1;
-        goto do_cp1;
-    case FLOAT_1BIT_FMT(MFC1, 0):
-        mips32_op = OPC_MFC1;
-        goto do_cp1;
-    case FLOAT_1BIT_FMT(MTC1, 0):
-        mips32_op = OPC_MTC1;
-        goto do_cp1;
-    case FLOAT_1BIT_FMT(MFHC1, 0):
-        mips32_op = OPC_MFHC1;
-        goto do_cp1;
-    case FLOAT_1BIT_FMT(MTHC1, 0):
-        mips32_op = OPC_MTHC1;
-    do_cp1:
-        gen_cp1(ctx, mips32_op, rt, rs);
-        break;
-
-        /* Reciprocal square root */
-    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_S):
-        mips32_op = OPC_RSQRT_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(RSQRT_FMT, FMT_SD_D):
-        mips32_op = OPC_RSQRT_D;
-        goto do_unaryfp;
-
-        /* Square root */
-    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_S):
-        mips32_op = OPC_SQRT_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(SQRT_FMT, FMT_SD_D):
-        mips32_op = OPC_SQRT_D;
-        goto do_unaryfp;
-
-        /* Reciprocal */
-    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_S):
-        mips32_op = OPC_RECIP_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(RECIP_FMT, FMT_SD_D):
-        mips32_op = OPC_RECIP_D;
-        goto do_unaryfp;
-
-        /* Floor */
-    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_S):
-        mips32_op = OPC_FLOOR_L_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(FLOOR_L, FMT_SD_D):
-        mips32_op = OPC_FLOOR_L_D;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_S):
-        mips32_op = OPC_FLOOR_W_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(FLOOR_W, FMT_SD_D):
-        mips32_op = OPC_FLOOR_W_D;
-        goto do_unaryfp;
-
-        /* Ceiling */
-    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_S):
-        mips32_op = OPC_CEIL_L_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CEIL_L, FMT_SD_D):
-        mips32_op = OPC_CEIL_L_D;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_S):
-        mips32_op = OPC_CEIL_W_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CEIL_W, FMT_SD_D):
-        mips32_op = OPC_CEIL_W_D;
-        goto do_unaryfp;
-
-        /* Truncation */
-    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_S):
-        mips32_op = OPC_TRUNC_L_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(TRUNC_L, FMT_SD_D):
-        mips32_op = OPC_TRUNC_L_D;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_S):
-        mips32_op = OPC_TRUNC_W_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(TRUNC_W, FMT_SD_D):
-        mips32_op = OPC_TRUNC_W_D;
-        goto do_unaryfp;
-
-        /* Round */
-    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_S):
-        mips32_op = OPC_ROUND_L_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(ROUND_L, FMT_SD_D):
-        mips32_op = OPC_ROUND_L_D;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_S):
-        mips32_op = OPC_ROUND_W_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(ROUND_W, FMT_SD_D):
-        mips32_op = OPC_ROUND_W_D;
-        goto do_unaryfp;
-
-        /* Integer to floating-point conversion */
-    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_S):
-        mips32_op = OPC_CVT_L_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_L, FMT_SD_D):
-        mips32_op = OPC_CVT_L_D;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_S):
-        mips32_op = OPC_CVT_W_S;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_W, FMT_SD_D):
-        mips32_op = OPC_CVT_W_D;
-        goto do_unaryfp;
-
-        /* Paired-foo conversions */
-    case FLOAT_1BIT_FMT(CVT_S_PL, 0):
-        mips32_op = OPC_CVT_S_PL;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_S_PU, 0):
-        mips32_op = OPC_CVT_S_PU;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_PW_PS, 0):
-        mips32_op = OPC_CVT_PW_PS;
-        goto do_unaryfp;
-    case FLOAT_1BIT_FMT(CVT_PS_PW, 0):
-        mips32_op = OPC_CVT_PS_PW;
-        goto do_unaryfp;
-
-        /* Floating-point moves */
-    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_S):
-        mips32_op = OPC_MOV_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_D):
-        mips32_op = OPC_MOV_D;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(MOV_FMT, FMT_SDPS_PS):
-        mips32_op = OPC_MOV_PS;
-        goto do_unaryfp;
-
-        /* Absolute value */
-    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_S):
-        mips32_op = OPC_ABS_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_D):
-        mips32_op = OPC_ABS_D;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(ABS_FMT, FMT_SDPS_PS):
-        mips32_op = OPC_ABS_PS;
-        goto do_unaryfp;
-
-        /* Negation */
-    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_S):
-        mips32_op = OPC_NEG_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_D):
-        mips32_op = OPC_NEG_D;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(NEG_FMT, FMT_SDPS_PS):
-        mips32_op = OPC_NEG_PS;
-        goto do_unaryfp;
-
-        /* Reciprocal square root step */
-    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_S):
-        mips32_op = OPC_RSQRT1_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_D):
-        mips32_op = OPC_RSQRT1_D;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(RSQRT1_FMT, FMT_SDPS_PS):
-        mips32_op = OPC_RSQRT1_PS;
-        goto do_unaryfp;
-
-        /* Reciprocal step */
-    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_S):
-        mips32_op = OPC_RECIP1_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_D):
-        mips32_op = OPC_RECIP1_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(RECIP1_FMT, FMT_SDPS_PS):
-        mips32_op = OPC_RECIP1_PS;
-        goto do_unaryfp;
-
-        /* Conversions from double */
-    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_S):
-        mips32_op = OPC_CVT_D_S;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_W):
-        mips32_op = OPC_CVT_D_W;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(CVT_D, FMT_SWL_L):
-        mips32_op = OPC_CVT_D_L;
-        goto do_unaryfp;
-
-        /* Conversions from single */
-    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_D):
-        mips32_op = OPC_CVT_S_D;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_W):
-        mips32_op = OPC_CVT_S_W;
-        goto do_unaryfp;
-    case FLOAT_2BIT_FMT(CVT_S, FMT_DWL_L):
-        mips32_op = OPC_CVT_S_L;
-    do_unaryfp:
-        gen_farith(ctx, mips32_op, -1, rs, rt, 0);
-        break;
-
-        /* Conditional moves on floating-point codes */
-    case COND_FLOAT_MOV(MOVT, 0):
-    case COND_FLOAT_MOV(MOVT, 1):
-    case COND_FLOAT_MOV(MOVT, 2):
-    case COND_FLOAT_MOV(MOVT, 3):
-    case COND_FLOAT_MOV(MOVT, 4):
-    case COND_FLOAT_MOV(MOVT, 5):
-    case COND_FLOAT_MOV(MOVT, 6):
-    case COND_FLOAT_MOV(MOVT, 7):
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 1);
-        break;
-    case COND_FLOAT_MOV(MOVF, 0):
-    case COND_FLOAT_MOV(MOVF, 1):
-    case COND_FLOAT_MOV(MOVF, 2):
-    case COND_FLOAT_MOV(MOVF, 3):
-    case COND_FLOAT_MOV(MOVF, 4):
-    case COND_FLOAT_MOV(MOVF, 5):
-    case COND_FLOAT_MOV(MOVF, 6):
-    case COND_FLOAT_MOV(MOVF, 7):
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        gen_movci(ctx, rt, rs, (ctx->opcode >> 13) & 0x7, 0);
-        break;
-    default:
-        MIPS_INVAL("pool32fxf");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_micromips32_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    int32_t offset;
-    uint16_t insn;
-    int rt, rs, rd, rr;
-    int16_t imm;
-    uint32_t op, minor, minor2, mips32_op;
-    uint32_t cond, fmt, cc;
-
-    insn = translator_lduw(env, ctx->base.pc_next + 2);
-    ctx->opcode = (ctx->opcode << 16) | insn;
-
-    rt = (ctx->opcode >> 21) & 0x1f;
-    rs = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    rr = (ctx->opcode >> 6) & 0x1f;
-    imm = (int16_t) ctx->opcode;
-
-    op = (ctx->opcode >> 26) & 0x3f;
-    switch (op) {
-    case POOL32A:
-        minor = ctx->opcode & 0x3f;
-        switch (minor) {
-        case 0x00:
-            minor = (ctx->opcode >> 6) & 0xf;
-            switch (minor) {
-            case SLL32:
-                mips32_op = OPC_SLL;
-                goto do_shifti;
-            case SRA:
-                mips32_op = OPC_SRA;
-                goto do_shifti;
-            case SRL32:
-                mips32_op = OPC_SRL;
-                goto do_shifti;
-            case ROTR:
-                mips32_op = OPC_ROTR;
-            do_shifti:
-                gen_shift_imm(ctx, mips32_op, rt, rs, rd);
-                break;
-            case SELEQZ:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_cond_move(ctx, OPC_SELEQZ, rd, rs, rt);
-                break;
-            case SELNEZ:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_cond_move(ctx, OPC_SELNEZ, rd, rs, rt);
-                break;
-            case R6_RDHWR:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
-                break;
-            default:
-                goto pool32a_invalid;
-            }
-            break;
-        case 0x10:
-            minor = (ctx->opcode >> 6) & 0xf;
-            switch (minor) {
-                /* Arithmetic */
-            case ADD:
-                mips32_op = OPC_ADD;
-                goto do_arith;
-            case ADDU32:
-                mips32_op = OPC_ADDU;
-                goto do_arith;
-            case SUB:
-                mips32_op = OPC_SUB;
-                goto do_arith;
-            case SUBU32:
-                mips32_op = OPC_SUBU;
-                goto do_arith;
-            case MUL:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MUL;
-            do_arith:
-                gen_arith(ctx, mips32_op, rd, rs, rt);
-                break;
-                /* Shifts */
-            case SLLV:
-                mips32_op = OPC_SLLV;
-                goto do_shift;
-            case SRLV:
-                mips32_op = OPC_SRLV;
-                goto do_shift;
-            case SRAV:
-                mips32_op = OPC_SRAV;
-                goto do_shift;
-            case ROTRV:
-                mips32_op = OPC_ROTRV;
-            do_shift:
-                gen_shift(ctx, mips32_op, rd, rs, rt);
-                break;
-                /* Logical operations */
-            case AND:
-                mips32_op = OPC_AND;
-                goto do_logic;
-            case OR32:
-                mips32_op = OPC_OR;
-                goto do_logic;
-            case NOR:
-                mips32_op = OPC_NOR;
-                goto do_logic;
-            case XOR32:
-                mips32_op = OPC_XOR;
-            do_logic:
-                gen_logic(ctx, mips32_op, rd, rs, rt);
-                break;
-                /* Set less than */
-            case SLT:
-                mips32_op = OPC_SLT;
-                goto do_slt;
-            case SLTU:
-                mips32_op = OPC_SLTU;
-            do_slt:
-                gen_slt(ctx, mips32_op, rd, rs, rt);
-                break;
-            default:
-                goto pool32a_invalid;
-            }
-            break;
-        case 0x18:
-            minor = (ctx->opcode >> 6) & 0xf;
-            switch (minor) {
-                /* Conditional moves */
-            case MOVN: /* MUL */
-                if (ctx->insn_flags & ISA_MIPS_R6) {
-                    /* MUL */
-                    gen_r6_muldiv(ctx, R6_OPC_MUL, rd, rs, rt);
-                } else {
-                    /* MOVN */
-                    gen_cond_move(ctx, OPC_MOVN, rd, rs, rt);
-                }
-                break;
-            case MOVZ: /* MUH */
-                if (ctx->insn_flags & ISA_MIPS_R6) {
-                    /* MUH */
-                    gen_r6_muldiv(ctx, R6_OPC_MUH, rd, rs, rt);
-                } else {
-                    /* MOVZ */
-                    gen_cond_move(ctx, OPC_MOVZ, rd, rs, rt);
-                }
-                break;
-            case MULU:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_muldiv(ctx, R6_OPC_MULU, rd, rs, rt);
-                break;
-            case MUHU:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_muldiv(ctx, R6_OPC_MUHU, rd, rs, rt);
-                break;
-            case LWXS: /* DIV */
-                if (ctx->insn_flags & ISA_MIPS_R6) {
-                    /* DIV */
-                    gen_r6_muldiv(ctx, R6_OPC_DIV, rd, rs, rt);
-                } else {
-                    /* LWXS */
-                    gen_ldxs(ctx, rs, rt, rd, op);
-                }
-                break;
-            case MOD:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_muldiv(ctx, R6_OPC_MOD, rd, rs, rt);
-                break;
-            case R6_DIVU:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_muldiv(ctx, R6_OPC_DIVU, rd, rs, rt);
-                break;
-            case MODU:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_muldiv(ctx, R6_OPC_MODU, rd, rs, rt);
-                break;
-            default:
-                goto pool32a_invalid;
-            }
-            break;
-        case INS:
-            gen_bitops(ctx, OPC_INS, rt, rs, rr, rd);
-            return;
-        case LSA:
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2));
-            break;
-        case ALIGN:
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_align(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 9, 2));
-            break;
-        case EXT:
-            gen_bitops(ctx, OPC_EXT, rt, rs, rr, rd);
-            return;
-        case POOL32AXF:
-            gen_pool32axf(env, ctx, rt, rs);
-            break;
-        case BREAK32:
-            generate_exception_end(ctx, EXCP_BREAK);
-            break;
-        case SIGRIE:
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_reserved_instruction(ctx);
-            break;
-        default:
-        pool32a_invalid:
-                MIPS_INVAL("pool32a");
-                gen_reserved_instruction(ctx);
-                break;
-        }
-        break;
-    case POOL32B:
-        minor = (ctx->opcode >> 12) & 0xf;
-        switch (minor) {
-        case CACHE:
-            check_cp0_enabled(ctx);
-            if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-                gen_cache_operation(ctx, rt, rs, imm);
-            }
-            break;
-        case LWC2:
-        case SWC2:
-            /* COP2: Not implemented. */
-            generate_exception_err(ctx, EXCP_CpU, 2);
-            break;
-#ifdef TARGET_MIPS64
-        case LDP:
-        case SDP:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-#endif
-            /* fall through */
-        case LWP:
-        case SWP:
-            gen_ldst_pair(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
-            break;
-#ifdef TARGET_MIPS64
-        case LDM:
-        case SDM:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-#endif
-            /* fall through */
-        case LWM32:
-        case SWM32:
-            gen_ldst_multiple(ctx, minor, rt, rs, SIMM(ctx->opcode, 0, 12));
-            break;
-        default:
-            MIPS_INVAL("pool32b");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case POOL32F:
-        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
-            minor = ctx->opcode & 0x3f;
-            check_cp1_enabled(ctx);
-            switch (minor) {
-            case ALNV_PS:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_ALNV_PS;
-                goto do_madd;
-            case MADD_S:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MADD_S;
-                goto do_madd;
-            case MADD_D:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MADD_D;
-                goto do_madd;
-            case MADD_PS:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MADD_PS;
-                goto do_madd;
-            case MSUB_S:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MSUB_S;
-                goto do_madd;
-            case MSUB_D:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MSUB_D;
-                goto do_madd;
-            case MSUB_PS:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_MSUB_PS;
-                goto do_madd;
-            case NMADD_S:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMADD_S;
-                goto do_madd;
-            case NMADD_D:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMADD_D;
-                goto do_madd;
-            case NMADD_PS:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMADD_PS;
-                goto do_madd;
-            case NMSUB_S:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMSUB_S;
-                goto do_madd;
-            case NMSUB_D:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMSUB_D;
-                goto do_madd;
-            case NMSUB_PS:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_NMSUB_PS;
-            do_madd:
-                gen_flt3_arith(ctx, mips32_op, rd, rr, rs, rt);
-                break;
-            case CABS_COND_FMT:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                cond = (ctx->opcode >> 6) & 0xf;
-                cc = (ctx->opcode >> 13) & 0x7;
-                fmt = (ctx->opcode >> 10) & 0x3;
-                switch (fmt) {
-                case 0x0:
-                    gen_cmpabs_s(ctx, cond, rt, rs, cc);
-                    break;
-                case 0x1:
-                    gen_cmpabs_d(ctx, cond, rt, rs, cc);
-                    break;
-                case 0x2:
-                    gen_cmpabs_ps(ctx, cond, rt, rs, cc);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case C_COND_FMT:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                cond = (ctx->opcode >> 6) & 0xf;
-                cc = (ctx->opcode >> 13) & 0x7;
-                fmt = (ctx->opcode >> 10) & 0x3;
-                switch (fmt) {
-                case 0x0:
-                    gen_cmp_s(ctx, cond, rt, rs, cc);
-                    break;
-                case 0x1:
-                    gen_cmp_d(ctx, cond, rt, rs, cc);
-                    break;
-                case 0x2:
-                    gen_cmp_ps(ctx, cond, rt, rs, cc);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case CMP_CONDN_S:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_cmp_s(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
-                break;
-            case CMP_CONDN_D:
-                check_insn(ctx, ISA_MIPS_R6);
-                gen_r6_cmp_d(ctx, (ctx->opcode >> 6) & 0x1f, rt, rs, rd);
-                break;
-            case POOL32FXF:
-                gen_pool32fxf(ctx, rt, rs);
-                break;
-            case 0x00:
-                /* PLL foo */
-                switch ((ctx->opcode >> 6) & 0x7) {
-                case PLL_PS:
-                    mips32_op = OPC_PLL_PS;
-                    goto do_ps;
-                case PLU_PS:
-                    mips32_op = OPC_PLU_PS;
-                    goto do_ps;
-                case PUL_PS:
-                    mips32_op = OPC_PUL_PS;
-                    goto do_ps;
-                case PUU_PS:
-                    mips32_op = OPC_PUU_PS;
-                    goto do_ps;
-                case CVT_PS_S:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_CVT_PS_S;
-                do_ps:
-                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case MIN_FMT:
-                check_insn(ctx, ISA_MIPS_R6);
-                switch ((ctx->opcode >> 9) & 0x3) {
-                case FMT_SDPS_S:
-                    gen_farith(ctx, OPC_MIN_S, rt, rs, rd, 0);
-                    break;
-                case FMT_SDPS_D:
-                    gen_farith(ctx, OPC_MIN_D, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case 0x08:
-                /* [LS][WDU]XC1 */
-                switch ((ctx->opcode >> 6) & 0x7) {
-                case LWXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_LWXC1;
-                    goto do_ldst_cp1;
-                case SWXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_SWXC1;
-                    goto do_ldst_cp1;
-                case LDXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_LDXC1;
-                    goto do_ldst_cp1;
-                case SDXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_SDXC1;
-                    goto do_ldst_cp1;
-                case LUXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_LUXC1;
-                    goto do_ldst_cp1;
-                case SUXC1:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    mips32_op = OPC_SUXC1;
-                do_ldst_cp1:
-                    gen_flt3_ldst(ctx, mips32_op, rd, rd, rt, rs);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case MAX_FMT:
-                check_insn(ctx, ISA_MIPS_R6);
-                switch ((ctx->opcode >> 9) & 0x3) {
-                case FMT_SDPS_S:
-                    gen_farith(ctx, OPC_MAX_S, rt, rs, rd, 0);
-                    break;
-                case FMT_SDPS_D:
-                    gen_farith(ctx, OPC_MAX_D, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case 0x18:
-                /* 3D insns */
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                fmt = (ctx->opcode >> 9) & 0x3;
-                switch ((ctx->opcode >> 6) & 0x7) {
-                case RSQRT2_FMT:
-                    switch (fmt) {
-                    case FMT_SDPS_S:
-                        mips32_op = OPC_RSQRT2_S;
-                        goto do_3d;
-                    case FMT_SDPS_D:
-                        mips32_op = OPC_RSQRT2_D;
-                        goto do_3d;
-                    case FMT_SDPS_PS:
-                        mips32_op = OPC_RSQRT2_PS;
-                        goto do_3d;
-                    default:
-                        goto pool32f_invalid;
-                    }
-                    break;
-                case RECIP2_FMT:
-                    switch (fmt) {
-                    case FMT_SDPS_S:
-                        mips32_op = OPC_RECIP2_S;
-                        goto do_3d;
-                    case FMT_SDPS_D:
-                        mips32_op = OPC_RECIP2_D;
-                        goto do_3d;
-                    case FMT_SDPS_PS:
-                        mips32_op = OPC_RECIP2_PS;
-                        goto do_3d;
-                    default:
-                        goto pool32f_invalid;
-                    }
-                    break;
-                case ADDR_PS:
-                    mips32_op = OPC_ADDR_PS;
-                    goto do_3d;
-                case MULR_PS:
-                    mips32_op = OPC_MULR_PS;
-                do_3d:
-                    gen_farith(ctx, mips32_op, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case 0x20:
-                /* MOV[FT].fmt, PREFX, RINT.fmt, CLASS.fmt*/
-                cc = (ctx->opcode >> 13) & 0x7;
-                fmt = (ctx->opcode >> 9) & 0x3;
-                switch ((ctx->opcode >> 6) & 0x7) {
-                case MOVF_FMT: /* RINT_FMT */
-                    if (ctx->insn_flags & ISA_MIPS_R6) {
-                        /* RINT_FMT */
-                        switch (fmt) {
-                        case FMT_SDPS_S:
-                            gen_farith(ctx, OPC_RINT_S, 0, rt, rs, 0);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_farith(ctx, OPC_RINT_D, 0, rt, rs, 0);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    } else {
-                        /* MOVF_FMT */
-                        switch (fmt) {
-                        case FMT_SDPS_S:
-                            gen_movcf_s(ctx, rs, rt, cc, 0);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_movcf_d(ctx, rs, rt, cc, 0);
-                            break;
-                        case FMT_SDPS_PS:
-                            check_ps(ctx);
-                            gen_movcf_ps(ctx, rs, rt, cc, 0);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    }
-                    break;
-                case MOVT_FMT: /* CLASS_FMT */
-                    if (ctx->insn_flags & ISA_MIPS_R6) {
-                        /* CLASS_FMT */
-                        switch (fmt) {
-                        case FMT_SDPS_S:
-                            gen_farith(ctx, OPC_CLASS_S, 0, rt, rs, 0);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_farith(ctx, OPC_CLASS_D, 0, rt, rs, 0);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    } else {
-                        /* MOVT_FMT */
-                        switch (fmt) {
-                        case FMT_SDPS_S:
-                            gen_movcf_s(ctx, rs, rt, cc, 1);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_movcf_d(ctx, rs, rt, cc, 1);
-                            break;
-                        case FMT_SDPS_PS:
-                            check_ps(ctx);
-                            gen_movcf_ps(ctx, rs, rt, cc, 1);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    }
-                    break;
-                case PREFX:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-#define FINSN_3ARG_SDPS(prfx)                           \
-                switch ((ctx->opcode >> 8) & 0x3) {     \
-                case FMT_SDPS_S:                        \
-                    mips32_op = OPC_##prfx##_S;         \
-                    goto do_fpop;                       \
-                case FMT_SDPS_D:                        \
-                    mips32_op = OPC_##prfx##_D;         \
-                    goto do_fpop;                       \
-                case FMT_SDPS_PS:                       \
-                    check_ps(ctx);                      \
-                    mips32_op = OPC_##prfx##_PS;        \
-                    goto do_fpop;                       \
-                default:                                \
-                    goto pool32f_invalid;               \
-                }
-            case MINA_FMT:
-                check_insn(ctx, ISA_MIPS_R6);
-                switch ((ctx->opcode >> 9) & 0x3) {
-                case FMT_SDPS_S:
-                    gen_farith(ctx, OPC_MINA_S, rt, rs, rd, 0);
-                    break;
-                case FMT_SDPS_D:
-                    gen_farith(ctx, OPC_MINA_D, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case MAXA_FMT:
-                check_insn(ctx, ISA_MIPS_R6);
-                switch ((ctx->opcode >> 9) & 0x3) {
-                case FMT_SDPS_S:
-                    gen_farith(ctx, OPC_MAXA_S, rt, rs, rd, 0);
-                    break;
-                case FMT_SDPS_D:
-                    gen_farith(ctx, OPC_MAXA_D, rt, rs, rd, 0);
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case 0x30:
-                /* regular FP ops */
-                switch ((ctx->opcode >> 6) & 0x3) {
-                case ADD_FMT:
-                    FINSN_3ARG_SDPS(ADD);
-                    break;
-                case SUB_FMT:
-                    FINSN_3ARG_SDPS(SUB);
-                    break;
-                case MUL_FMT:
-                    FINSN_3ARG_SDPS(MUL);
-                    break;
-                case DIV_FMT:
-                    fmt = (ctx->opcode >> 8) & 0x3;
-                    if (fmt == 1) {
-                        mips32_op = OPC_DIV_D;
-                    } else if (fmt == 0) {
-                        mips32_op = OPC_DIV_S;
-                    } else {
-                        goto pool32f_invalid;
-                    }
-                    goto do_fpop;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            case 0x38:
-                /* cmovs */
-                switch ((ctx->opcode >> 6) & 0x7) {
-                case MOVN_FMT: /* SELEQZ_FMT */
-                    if (ctx->insn_flags & ISA_MIPS_R6) {
-                        /* SELEQZ_FMT */
-                        switch ((ctx->opcode >> 9) & 0x3) {
-                        case FMT_SDPS_S:
-                            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    } else {
-                        /* MOVN_FMT */
-                        FINSN_3ARG_SDPS(MOVN);
-                    }
-                    break;
-                case MOVN_FMT_04:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    FINSN_3ARG_SDPS(MOVN);
-                    break;
-                case MOVZ_FMT: /* SELNEZ_FMT */
-                    if (ctx->insn_flags & ISA_MIPS_R6) {
-                        /* SELNEZ_FMT */
-                        switch ((ctx->opcode >> 9) & 0x3) {
-                        case FMT_SDPS_S:
-                            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
-                            break;
-                        case FMT_SDPS_D:
-                            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
-                            break;
-                        default:
-                            goto pool32f_invalid;
-                        }
-                    } else {
-                        /* MOVZ_FMT */
-                        FINSN_3ARG_SDPS(MOVZ);
-                    }
-                    break;
-                case MOVZ_FMT_05:
-                    check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                    FINSN_3ARG_SDPS(MOVZ);
-                    break;
-                case SEL_FMT:
-                    check_insn(ctx, ISA_MIPS_R6);
-                    switch ((ctx->opcode >> 9) & 0x3) {
-                    case FMT_SDPS_S:
-                        gen_sel_s(ctx, OPC_SEL_S, rd, rt, rs);
-                        break;
-                    case FMT_SDPS_D:
-                        gen_sel_d(ctx, OPC_SEL_D, rd, rt, rs);
-                        break;
-                    default:
-                        goto pool32f_invalid;
-                    }
-                    break;
-                case MADDF_FMT:
-                    check_insn(ctx, ISA_MIPS_R6);
-                    switch ((ctx->opcode >> 9) & 0x3) {
-                    case FMT_SDPS_S:
-                        mips32_op = OPC_MADDF_S;
-                        goto do_fpop;
-                    case FMT_SDPS_D:
-                        mips32_op = OPC_MADDF_D;
-                        goto do_fpop;
-                    default:
-                        goto pool32f_invalid;
-                    }
-                    break;
-                case MSUBF_FMT:
-                    check_insn(ctx, ISA_MIPS_R6);
-                    switch ((ctx->opcode >> 9) & 0x3) {
-                    case FMT_SDPS_S:
-                        mips32_op = OPC_MSUBF_S;
-                        goto do_fpop;
-                    case FMT_SDPS_D:
-                        mips32_op = OPC_MSUBF_D;
-                        goto do_fpop;
-                    default:
-                        goto pool32f_invalid;
-                    }
-                    break;
-                default:
-                    goto pool32f_invalid;
-                }
-                break;
-            do_fpop:
-                gen_farith(ctx, mips32_op, rt, rs, rd, 0);
-                break;
-            default:
-            pool32f_invalid:
-                MIPS_INVAL("pool32f");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        } else {
-            generate_exception_err(ctx, EXCP_CpU, 1);
-        }
-        break;
-    case POOL32I:
-        minor = (ctx->opcode >> 21) & 0x1f;
-        switch (minor) {
-        case BLTZ:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BLTZ, 4, rs, -1, imm << 1, 4);
-            break;
-        case BLTZAL:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 4);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            break;
-        case BLTZALS:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BLTZAL, 4, rs, -1, imm << 1, 2);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            break;
-        case BGEZ:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BGEZ, 4, rs, -1, imm << 1, 4);
-            break;
-        case BGEZAL:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 4);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            break;
-        case BGEZALS:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BGEZAL, 4, rs, -1, imm << 1, 2);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-            break;
-        case BLEZ:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BLEZ, 4, rs, -1, imm << 1, 4);
-            break;
-        case BGTZ:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, OPC_BGTZ, 4, rs, -1, imm << 1, 4);
-            break;
-
-            /* Traps */
-        case TLTI: /* BC1EQZC */
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /* BC1EQZC */
-                check_cp1_enabled(ctx);
-                gen_compute_branch1_r6(ctx, OPC_BC1EQZ, rs, imm << 1, 0);
-            } else {
-                /* TLTI */
-                mips32_op = OPC_TLTI;
-                goto do_trapi;
-            }
-            break;
-        case TGEI: /* BC1NEZC */
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /* BC1NEZC */
-                check_cp1_enabled(ctx);
-                gen_compute_branch1_r6(ctx, OPC_BC1NEZ, rs, imm << 1, 0);
-            } else {
-                /* TGEI */
-                mips32_op = OPC_TGEI;
-                goto do_trapi;
-            }
-            break;
-        case TLTIU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_TLTIU;
-            goto do_trapi;
-        case TGEIU:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_TGEIU;
-            goto do_trapi;
-        case TNEI: /* SYNCI */
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /* SYNCI */
-                /*
-                 * Break the TB to be able to sync copied instructions
-                 * immediately.
-                 */
-                ctx->base.is_jmp = DISAS_STOP;
-            } else {
-                /* TNEI */
-                mips32_op = OPC_TNEI;
-                goto do_trapi;
-            }
-            break;
-        case TEQI:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_TEQI;
-        do_trapi:
-            gen_trap(ctx, mips32_op, rs, -1, imm);
-            break;
-
-        case BNEZC:
-        case BEQZC:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch(ctx, minor == BNEZC ? OPC_BNE : OPC_BEQ,
-                               4, rs, 0, imm << 1, 0);
-            /*
-             * Compact branches don't have a delay slot, so just let
-             * the normal delay slot handling take us to the branch
-             * target.
-             */
-            break;
-        case LUI:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_logic_imm(ctx, OPC_LUI, rs, 0, imm);
-            break;
-        case SYNCI:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            /*
-             * Break the TB to be able to sync copied instructions
-             * immediately.
-             */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case BC2F:
-        case BC2T:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            /* COP2: Not implemented. */
-            generate_exception_err(ctx, EXCP_CpU, 2);
-            break;
-        case BC1F:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1FANY2 : OPC_BC1F;
-            goto do_cp1branch;
-        case BC1T:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = (ctx->opcode & (1 << 16)) ? OPC_BC1TANY2 : OPC_BC1T;
-            goto do_cp1branch;
-        case BC1ANY4F:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_BC1FANY4;
-            goto do_cp1mips3d;
-        case BC1ANY4T:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_BC1TANY4;
-        do_cp1mips3d:
-            check_cop1x(ctx);
-            check_insn(ctx, ASE_MIPS3D);
-            /* Fall through */
-        do_cp1branch:
-            if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-                check_cp1_enabled(ctx);
-                gen_compute_branch1(ctx, mips32_op,
-                                    (ctx->opcode >> 18) & 0x7, imm << 1);
-            } else {
-                generate_exception_err(ctx, EXCP_CpU, 1);
-            }
-            break;
-        case BPOSGE64:
-        case BPOSGE32:
-            /* MIPS DSP: not implemented */
-            /* Fall through */
-        default:
-            MIPS_INVAL("pool32i");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case POOL32C:
-        minor = (ctx->opcode >> 12) & 0xf;
-        offset = sextract32(ctx->opcode, 0,
-                            (ctx->insn_flags & ISA_MIPS_R6) ? 9 : 12);
-        switch (minor) {
-        case LWL:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_LWL;
-            goto do_ld_lr;
-        case SWL:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_SWL;
-            goto do_st_lr;
-        case LWR:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_LWR;
-            goto do_ld_lr;
-        case SWR:
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_SWR;
-            goto do_st_lr;
-#if defined(TARGET_MIPS64)
-        case LDL:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_LDL;
-            goto do_ld_lr;
-        case SDL:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_SDL;
-            goto do_st_lr;
-        case LDR:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_LDR;
-            goto do_ld_lr;
-        case SDR:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            mips32_op = OPC_SDR;
-            goto do_st_lr;
-        case LWU:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            mips32_op = OPC_LWU;
-            goto do_ld_lr;
-        case LLD:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            mips32_op = OPC_LLD;
-            goto do_ld_lr;
-#endif
-        case LL:
-            mips32_op = OPC_LL;
-            goto do_ld_lr;
-        do_ld_lr:
-            gen_ld(ctx, mips32_op, rt, rs, offset);
-            break;
-        do_st_lr:
-            gen_st(ctx, mips32_op, rt, rs, offset);
-            break;
-        case SC:
-            gen_st_cond(ctx, rt, rs, offset, MO_TESL, false, OPC_SC);
-            break;
-#if defined(TARGET_MIPS64)
-        case SCD:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_st_cond(ctx, rt, rs, offset, MO_TEQ, false, OPC_SCD);
-            break;
-#endif
-        case LD_EVA:
-            if (!ctx->eva) {
-                MIPS_INVAL("pool32c ld-eva");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            check_cp0_enabled(ctx);
-
-            minor2 = (ctx->opcode >> 9) & 0x7;
-            offset = sextract32(ctx->opcode, 0, 9);
-            switch (minor2) {
-            case LBUE:
-                mips32_op = OPC_LBUE;
-                goto do_ld_lr;
-            case LHUE:
-                mips32_op = OPC_LHUE;
-                goto do_ld_lr;
-            case LWLE:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_LWLE;
-                goto do_ld_lr;
-            case LWRE:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_LWRE;
-                goto do_ld_lr;
-            case LBE:
-                mips32_op = OPC_LBE;
-                goto do_ld_lr;
-            case LHE:
-                mips32_op = OPC_LHE;
-                goto do_ld_lr;
-            case LLE:
-                mips32_op = OPC_LLE;
-                goto do_ld_lr;
-            case LWE:
-                mips32_op = OPC_LWE;
-                goto do_ld_lr;
-            };
-            break;
-        case ST_EVA:
-            if (!ctx->eva) {
-                MIPS_INVAL("pool32c st-eva");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            check_cp0_enabled(ctx);
-
-            minor2 = (ctx->opcode >> 9) & 0x7;
-            offset = sextract32(ctx->opcode, 0, 9);
-            switch (minor2) {
-            case SWLE:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_SWLE;
-                goto do_st_lr;
-            case SWRE:
-                check_insn_opc_removed(ctx, ISA_MIPS_R6);
-                mips32_op = OPC_SWRE;
-                goto do_st_lr;
-            case PREFE:
-                /* Treat as no-op */
-                if ((ctx->insn_flags & ISA_MIPS_R6) && (rt >= 24)) {
-                    /* hint codes 24-31 are reserved and signal RI */
-                    generate_exception(ctx, EXCP_RI);
-                }
-                break;
-            case CACHEE:
-                /* Treat as no-op */
-                if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-                    gen_cache_operation(ctx, rt, rs, offset);
-                }
-                break;
-            case SBE:
-                mips32_op = OPC_SBE;
-                goto do_st_lr;
-            case SHE:
-                mips32_op = OPC_SHE;
-                goto do_st_lr;
-            case SCE:
-                gen_st_cond(ctx, rt, rs, offset, MO_TESL, true, OPC_SCE);
-                break;
-            case SWE:
-                mips32_op = OPC_SWE;
-                goto do_st_lr;
-            };
-            break;
-        case PREF:
-            /* Treat as no-op */
-            if ((ctx->insn_flags & ISA_MIPS_R6) && (rt >= 24)) {
-                /* hint codes 24-31 are reserved and signal RI */
-                generate_exception(ctx, EXCP_RI);
-            }
-            break;
-        default:
-            MIPS_INVAL("pool32c");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case ADDI32: /* AUI, LUI */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* AUI, LUI */
-            gen_logic_imm(ctx, OPC_LUI, rt, rs, imm);
-        } else {
-            /* ADDI32 */
-            mips32_op = OPC_ADDI;
-            goto do_addi;
-        }
-        break;
-    case ADDIU32:
-        mips32_op = OPC_ADDIU;
-    do_addi:
-        gen_arith_imm(ctx, mips32_op, rt, rs, imm);
-        break;
-
-        /* Logical operations */
-    case ORI32:
-        mips32_op = OPC_ORI;
-        goto do_logici;
-    case XORI32:
-        mips32_op = OPC_XORI;
-        goto do_logici;
-    case ANDI32:
-        mips32_op = OPC_ANDI;
-    do_logici:
-        gen_logic_imm(ctx, mips32_op, rt, rs, imm);
-        break;
-
-        /* Set less than immediate */
-    case SLTI32:
-        mips32_op = OPC_SLTI;
-        goto do_slti;
-    case SLTIU32:
-        mips32_op = OPC_SLTIU;
-    do_slti:
-        gen_slt_imm(ctx, mips32_op, rt, rs, imm);
-        break;
-    case JALX32:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-        gen_compute_branch(ctx, OPC_JALX, 4, rt, rs, offset, 4);
-        ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-        break;
-    case JALS32: /* BOVC, BEQC, BEQZALC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rs >= rt) {
-                /* BOVC */
-                mips32_op = OPC_BOVC;
-            } else if (rs < rt && rs == 0) {
-                /* BEQZALC */
-                mips32_op = OPC_BEQZALC;
-            } else {
-                /* BEQC */
-                mips32_op = OPC_BEQC;
-            }
-            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        } else {
-            /* JALS32 */
-            offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 1;
-            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs, offset, 2);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-        }
-        break;
-    case BEQ32: /* BC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* BC */
-            gen_compute_compact_branch(ctx, OPC_BC, 0, 0,
-                                       sextract32(ctx->opcode << 1, 0, 27));
-        } else {
-            /* BEQ32 */
-            gen_compute_branch(ctx, OPC_BEQ, 4, rt, rs, imm << 1, 4);
-        }
-        break;
-    case BNE32: /* BALC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* BALC */
-            gen_compute_compact_branch(ctx, OPC_BALC, 0, 0,
-                                       sextract32(ctx->opcode << 1, 0, 27));
-        } else {
-            /* BNE32 */
-            gen_compute_branch(ctx, OPC_BNE, 4, rt, rs, imm << 1, 4);
-        }
-        break;
-    case J32: /* BGTZC, BLTZC, BLTC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rs == 0 && rt != 0) {
-                /* BGTZC */
-                mips32_op = OPC_BGTZC;
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* BLTZC */
-                mips32_op = OPC_BLTZC;
-            } else {
-                /* BLTC */
-                mips32_op = OPC_BLTC;
-            }
-            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        } else {
-            /* J32 */
-            gen_compute_branch(ctx, OPC_J, 4, rt, rs,
-                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
-        }
-        break;
-    case JAL32: /* BLEZC, BGEZC, BGEC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rs == 0 && rt != 0) {
-                /* BLEZC */
-                mips32_op = OPC_BLEZC;
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* BGEZC */
-                mips32_op = OPC_BGEZC;
-            } else {
-                /* BGEC */
-                mips32_op = OPC_BGEC;
-            }
-            gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        } else {
-            /* JAL32 */
-            gen_compute_branch(ctx, OPC_JAL, 4, rt, rs,
-                               (int32_t)(ctx->opcode & 0x3FFFFFF) << 1, 4);
-            ctx->hflags |= MIPS_HFLAG_BDS_STRICT;
-        }
-        break;
-        /* Floating point (COP1) */
-    case LWC132:
-        mips32_op = OPC_LWC1;
-        goto do_cop1;
-    case LDC132:
-        mips32_op = OPC_LDC1;
-        goto do_cop1;
-    case SWC132:
-        mips32_op = OPC_SWC1;
-        goto do_cop1;
-    case SDC132:
-        mips32_op = OPC_SDC1;
-    do_cop1:
-        gen_cop1_ldst(ctx, mips32_op, rt, rs, imm);
-        break;
-    case ADDIUPC: /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* PCREL: ADDIUPC, AUIPC, ALUIPC, LWPC */
-            switch ((ctx->opcode >> 16) & 0x1f) {
-            case ADDIUPC_00:
-            case ADDIUPC_01:
-            case ADDIUPC_02:
-            case ADDIUPC_03:
-            case ADDIUPC_04:
-            case ADDIUPC_05:
-            case ADDIUPC_06:
-            case ADDIUPC_07:
-                gen_pcrel(ctx, OPC_ADDIUPC, ctx->base.pc_next & ~0x3, rt);
-                break;
-            case AUIPC:
-                gen_pcrel(ctx, OPC_AUIPC, ctx->base.pc_next, rt);
-                break;
-            case ALUIPC:
-                gen_pcrel(ctx, OPC_ALUIPC, ctx->base.pc_next, rt);
-                break;
-            case LWPC_08:
-            case LWPC_09:
-            case LWPC_0A:
-            case LWPC_0B:
-            case LWPC_0C:
-            case LWPC_0D:
-            case LWPC_0E:
-            case LWPC_0F:
-                gen_pcrel(ctx, R6_OPC_LWPC, ctx->base.pc_next & ~0x3, rt);
-                break;
-            default:
-                generate_exception(ctx, EXCP_RI);
-                break;
-            }
-        } else {
-            /* ADDIUPC */
-            int reg = mmreg(ZIMM(ctx->opcode, 23, 3));
-            offset = SIMM(ctx->opcode, 0, 23) << 2;
-
-            gen_addiupc(ctx, reg, offset, 0, 0);
-        }
-        break;
-    case BNVC: /* BNEC, BNEZALC */
-        check_insn(ctx, ISA_MIPS_R6);
-        if (rs >= rt) {
-            /* BNVC */
-            mips32_op = OPC_BNVC;
-        } else if (rs < rt && rs == 0) {
-            /* BNEZALC */
-            mips32_op = OPC_BNEZALC;
-        } else {
-            /* BNEC */
-            mips32_op = OPC_BNEC;
-        }
-        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        break;
-    case R6_BNEZC: /* JIALC */
-        check_insn(ctx, ISA_MIPS_R6);
-        if (rt != 0) {
-            /* BNEZC */
-            gen_compute_compact_branch(ctx, OPC_BNEZC, rt, 0,
-                                       sextract32(ctx->opcode << 1, 0, 22));
-        } else {
-            /* JIALC */
-            gen_compute_compact_branch(ctx, OPC_JIALC, 0, rs, imm);
-        }
-        break;
-    case R6_BEQZC: /* JIC */
-        check_insn(ctx, ISA_MIPS_R6);
-        if (rt != 0) {
-            /* BEQZC */
-            gen_compute_compact_branch(ctx, OPC_BEQZC, rt, 0,
-                                       sextract32(ctx->opcode << 1, 0, 22));
-        } else {
-            /* JIC */
-            gen_compute_compact_branch(ctx, OPC_JIC, 0, rs, imm);
-        }
-        break;
-    case BLEZALC: /* BGEZALC, BGEUC */
-        check_insn(ctx, ISA_MIPS_R6);
-        if (rs == 0 && rt != 0) {
-            /* BLEZALC */
-            mips32_op = OPC_BLEZALC;
-        } else if (rs != 0 && rt != 0 && rs == rt) {
-            /* BGEZALC */
-            mips32_op = OPC_BGEZALC;
-        } else {
-            /* BGEUC */
-            mips32_op = OPC_BGEUC;
-        }
-        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        break;
-    case BGTZALC: /* BLTZALC, BLTUC */
-        check_insn(ctx, ISA_MIPS_R6);
-        if (rs == 0 && rt != 0) {
-            /* BGTZALC */
-            mips32_op = OPC_BGTZALC;
-        } else if (rs != 0 && rt != 0 && rs == rt) {
-            /* BLTZALC */
-            mips32_op = OPC_BLTZALC;
-        } else {
-            /* BLTUC */
-            mips32_op = OPC_BLTUC;
-        }
-        gen_compute_compact_branch(ctx, mips32_op, rs, rt, imm << 1);
-        break;
-        /* Loads and stores */
-    case LB32:
-        mips32_op = OPC_LB;
-        goto do_ld;
-    case LBU32:
-        mips32_op = OPC_LBU;
-        goto do_ld;
-    case LH32:
-        mips32_op = OPC_LH;
-        goto do_ld;
-    case LHU32:
-        mips32_op = OPC_LHU;
-        goto do_ld;
-    case LW32:
-        mips32_op = OPC_LW;
-        goto do_ld;
-#ifdef TARGET_MIPS64
-    case LD32:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        mips32_op = OPC_LD;
-        goto do_ld;
-    case SD32:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        mips32_op = OPC_SD;
-        goto do_st;
-#endif
-    case SB32:
-        mips32_op = OPC_SB;
-        goto do_st;
-    case SH32:
-        mips32_op = OPC_SH;
-        goto do_st;
-    case SW32:
-        mips32_op = OPC_SW;
-        goto do_st;
-    do_ld:
-        gen_ld(ctx, mips32_op, rt, rs, imm);
-        break;
-    do_st:
-        gen_st(ctx, mips32_op, rt, rs, imm);
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static int decode_micromips_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t op;
-
-    /* make sure instructions are on a halfword boundary */
-    if (ctx->base.pc_next & 0x1) {
-        env->CP0_BadVAddr = ctx->base.pc_next;
-        generate_exception_end(ctx, EXCP_AdEL);
-        return 2;
-    }
-
-    op = (ctx->opcode >> 10) & 0x3f;
-    /* Enforce properly-sized instructions in a delay slot */
-    if (ctx->hflags & MIPS_HFLAG_BDS_STRICT) {
-        switch (op & 0x7) { /* MSB-3..MSB-5 */
-        case 0:
-        /* POOL32A, POOL32B, POOL32I, POOL32C */
-        case 4:
-        /* ADDI32, ADDIU32, ORI32, XORI32, SLTI32, SLTIU32, ANDI32, JALX32 */
-        case 5:
-        /* LBU32, LHU32, POOL32F, JALS32, BEQ32, BNE32, J32, JAL32 */
-        case 6:
-        /* SB32, SH32, ADDIUPC, SWC132, SDC132, SW32 */
-        case 7:
-        /* LB32, LH32, LWC132, LDC132, LW32 */
-            if (ctx->hflags & MIPS_HFLAG_BDS16) {
-                gen_reserved_instruction(ctx);
-                return 2;
-            }
-            break;
-        case 1:
-        /* POOL16A, POOL16B, POOL16C, LWGP16, POOL16F */
-        case 2:
-        /* LBU16, LHU16, LWSP16, LW16, SB16, SH16, SWSP16, SW16 */
-        case 3:
-        /* MOVE16, ANDI16, POOL16D, POOL16E, BEQZ16, BNEZ16, B16, LI16 */
-            if (ctx->hflags & MIPS_HFLAG_BDS32) {
-                gen_reserved_instruction(ctx);
-                return 2;
-            }
-            break;
-        }
-    }
-
-    switch (op) {
-    case POOL16A:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rs1 = mmreg(uMIPS_RS1(ctx->opcode));
-            int rs2 = mmreg(uMIPS_RS2(ctx->opcode));
-            uint32_t opc = 0;
-
-            switch (ctx->opcode & 0x1) {
-            case ADDU16:
-                opc = OPC_ADDU;
-                break;
-            case SUBU16:
-                opc = OPC_SUBU;
-                break;
-            }
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /*
-                 * In the Release 6, the register number location in
-                 * the instruction encoding has changed.
-                 */
-                gen_arith(ctx, opc, rs1, rd, rs2);
-            } else {
-                gen_arith(ctx, opc, rd, rs1, rs2);
-            }
-        }
-        break;
-    case POOL16B:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rs = mmreg(uMIPS_RS(ctx->opcode));
-            int amount = (ctx->opcode >> 1) & 0x7;
-            uint32_t opc = 0;
-            amount = amount == 0 ? 8 : amount;
-
-            switch (ctx->opcode & 0x1) {
-            case SLL16:
-                opc = OPC_SLL;
-                break;
-            case SRL16:
-                opc = OPC_SRL;
-                break;
-            }
-
-            gen_shift_imm(ctx, opc, rd, rs, amount);
-        }
-        break;
-    case POOL16C:
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            gen_pool16c_r6_insn(ctx);
-        } else {
-            gen_pool16c_insn(ctx);
-        }
-        break;
-    case LWGP16:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rb = 28;            /* GP */
-            int16_t offset = SIMM(ctx->opcode, 0, 7) << 2;
-
-            gen_ld(ctx, OPC_LW, rd, rb, offset);
-        }
-        break;
-    case POOL16F:
-        check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        if (ctx->opcode & 1) {
-            gen_reserved_instruction(ctx);
-        } else {
-            /* MOVEP */
-            int enc_dest = uMIPS_RD(ctx->opcode);
-            int enc_rt = uMIPS_RS2(ctx->opcode);
-            int enc_rs = uMIPS_RS1(ctx->opcode);
-            gen_movep(ctx, enc_dest, enc_rt, enc_rs);
-        }
-        break;
-    case LBU16:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4);
-            offset = (offset == 0xf ? -1 : offset);
-
-            gen_ld(ctx, OPC_LBU, rd, rb, offset);
-        }
-        break;
-    case LHU16:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
-
-            gen_ld(ctx, OPC_LHU, rd, rb, offset);
-        }
-        break;
-    case LWSP16:
-        {
-            int rd = (ctx->opcode >> 5) & 0x1f;
-            int rb = 29;            /* SP */
-            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
-
-            gen_ld(ctx, OPC_LW, rd, rb, offset);
-        }
-        break;
-    case LW16:
-        {
-            int rd = mmreg(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
-
-            gen_ld(ctx, OPC_LW, rd, rb, offset);
-        }
-        break;
-    case SB16:
-        {
-            int rd = mmreg2(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4);
-
-            gen_st(ctx, OPC_SB, rd, rb, offset);
-        }
-        break;
-    case SH16:
-        {
-            int rd = mmreg2(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 1;
-
-            gen_st(ctx, OPC_SH, rd, rb, offset);
-        }
-        break;
-    case SWSP16:
-        {
-            int rd = (ctx->opcode >> 5) & 0x1f;
-            int rb = 29;            /* SP */
-            int16_t offset = ZIMM(ctx->opcode, 0, 5) << 2;
-
-            gen_st(ctx, OPC_SW, rd, rb, offset);
-        }
-        break;
-    case SW16:
-        {
-            int rd = mmreg2(uMIPS_RD(ctx->opcode));
-            int rb = mmreg(uMIPS_RS(ctx->opcode));
-            int16_t offset = ZIMM(ctx->opcode, 0, 4) << 2;
-
-            gen_st(ctx, OPC_SW, rd, rb, offset);
-        }
-        break;
-    case MOVE16:
-        {
-            int rd = uMIPS_RD5(ctx->opcode);
-            int rs = uMIPS_RS5(ctx->opcode);
-
-            gen_arith(ctx, OPC_ADDU, rd, rs, 0);
-        }
-        break;
-    case ANDI16:
-        gen_andi16(ctx);
-        break;
-    case POOL16D:
-        switch (ctx->opcode & 0x1) {
-        case ADDIUS5:
-            gen_addius5(ctx);
-            break;
-        case ADDIUSP:
-            gen_addiusp(ctx);
-            break;
-        }
-        break;
-    case POOL16E:
-        switch (ctx->opcode & 0x1) {
-        case ADDIUR2:
-            gen_addiur2(ctx);
-            break;
-        case ADDIUR1SP:
-            gen_addiur1sp(ctx);
-            break;
-        }
-        break;
-    case B16: /* BC16 */
-        gen_compute_branch(ctx, OPC_BEQ, 2, 0, 0,
-                           sextract32(ctx->opcode, 0, 10) << 1,
-                           (ctx->insn_flags & ISA_MIPS_R6) ? 0 : 4);
-        break;
-    case BNEZ16: /* BNEZC16 */
-    case BEQZ16: /* BEQZC16 */
-        gen_compute_branch(ctx, op == BNEZ16 ? OPC_BNE : OPC_BEQ, 2,
-                           mmreg(uMIPS_RD(ctx->opcode)),
-                           0, sextract32(ctx->opcode, 0, 7) << 1,
-                           (ctx->insn_flags & ISA_MIPS_R6) ? 0 : 4);
-
-        break;
-    case LI16:
-        {
-            int reg = mmreg(uMIPS_RD(ctx->opcode));
-            int imm = ZIMM(ctx->opcode, 0, 7);
-
-            imm = (imm == 0x7f ? -1 : imm);
-            tcg_gen_movi_tl(cpu_gpr[reg], imm);
-            gen_log_instr_gpr_update(ctx, reg);
-        }
-        break;
-    case RES_29:
-    case RES_31:
-    case RES_39:
-        gen_reserved_instruction(ctx);
-        break;
-    default:
-        decode_micromips32_opc(env, ctx);
-        return 4;
-    }
-
-    return 2;
-}
-
-// Disable nanoMIPS for TARGET_CHERI since to avoid having to add $ddc checks
-#ifndef TARGET_CHERI
-/*
- *
- * nanoMIPS opcodes
- *
- */
-
-/* MAJOR, P16, and P32 pools opcodes */
-enum {
-    NM_P_ADDIU      = 0x00,
-    NM_ADDIUPC      = 0x01,
-    NM_MOVE_BALC    = 0x02,
-    NM_P16_MV       = 0x04,
-    NM_LW16         = 0x05,
-    NM_BC16         = 0x06,
-    NM_P16_SR       = 0x07,
-
-    NM_POOL32A      = 0x08,
-    NM_P_BAL        = 0x0a,
-    NM_P16_SHIFT    = 0x0c,
-    NM_LWSP16       = 0x0d,
-    NM_BALC16       = 0x0e,
-    NM_P16_4X4      = 0x0f,
-
-    NM_P_GP_W       = 0x10,
-    NM_P_GP_BH      = 0x11,
-    NM_P_J          = 0x12,
-    NM_P16C         = 0x14,
-    NM_LWGP16       = 0x15,
-    NM_P16_LB       = 0x17,
-
-    NM_P48I         = 0x18,
-    NM_P16_A1       = 0x1c,
-    NM_LW4X4        = 0x1d,
-    NM_P16_LH       = 0x1f,
-
-    NM_P_U12        = 0x20,
-    NM_P_LS_U12     = 0x21,
-    NM_P_BR1        = 0x22,
-    NM_P16_A2       = 0x24,
-    NM_SW16         = 0x25,
-    NM_BEQZC16      = 0x26,
-
-    NM_POOL32F      = 0x28,
-    NM_P_LS_S9      = 0x29,
-    NM_P_BR2        = 0x2a,
-
-    NM_P16_ADDU     = 0x2c,
-    NM_SWSP16       = 0x2d,
-    NM_BNEZC16      = 0x2e,
-    NM_MOVEP        = 0x2f,
-
-    NM_POOL32S      = 0x30,
-    NM_P_BRI        = 0x32,
-    NM_LI16         = 0x34,
-    NM_SWGP16       = 0x35,
-    NM_P16_BR       = 0x36,
-
-    NM_P_LUI        = 0x38,
-    NM_ANDI16       = 0x3c,
-    NM_SW4X4        = 0x3d,
-    NM_MOVEPREV     = 0x3f,
-};
-
-/* POOL32A instruction pool */
-enum {
-    NM_POOL32A0    = 0x00,
-    NM_SPECIAL2    = 0x01,
-    NM_COP2_1      = 0x02,
-    NM_UDI         = 0x03,
-    NM_POOL32A5    = 0x05,
-    NM_POOL32A7    = 0x07,
-};
-
-/* P.GP.W instruction pool */
-enum {
-    NM_ADDIUGP_W = 0x00,
-    NM_LWGP      = 0x02,
-    NM_SWGP      = 0x03,
-};
-
-/* P48I instruction pool */
-enum {
-    NM_LI48        = 0x00,
-    NM_ADDIU48     = 0x01,
-    NM_ADDIUGP48   = 0x02,
-    NM_ADDIUPC48   = 0x03,
-    NM_LWPC48      = 0x0b,
-    NM_SWPC48      = 0x0f,
-};
-
-/* P.U12 instruction pool */
-enum {
-    NM_ORI      = 0x00,
-    NM_XORI     = 0x01,
-    NM_ANDI     = 0x02,
-    NM_P_SR     = 0x03,
-    NM_SLTI     = 0x04,
-    NM_SLTIU    = 0x05,
-    NM_SEQI     = 0x06,
-    NM_ADDIUNEG = 0x08,
-    NM_P_SHIFT  = 0x0c,
-    NM_P_ROTX   = 0x0d,
-    NM_P_INS    = 0x0e,
-    NM_P_EXT    = 0x0f,
-};
-
-/* POOL32F instruction pool */
-enum {
-    NM_POOL32F_0   = 0x00,
-    NM_POOL32F_3   = 0x03,
-    NM_POOL32F_5   = 0x05,
-};
-
-/* POOL32S instruction pool */
-enum {
-    NM_POOL32S_0   = 0x00,
-    NM_POOL32S_4   = 0x04,
-};
-
-/* P.LUI instruction pool */
-enum {
-    NM_LUI      = 0x00,
-    NM_ALUIPC   = 0x01,
-};
-
-/* P.GP.BH instruction pool */
-enum {
-    NM_LBGP      = 0x00,
-    NM_SBGP      = 0x01,
-    NM_LBUGP     = 0x02,
-    NM_ADDIUGP_B = 0x03,
-    NM_P_GP_LH   = 0x04,
-    NM_P_GP_SH   = 0x05,
-    NM_P_GP_CP1  = 0x06,
-};
-
-/* P.LS.U12 instruction pool */
-enum {
-    NM_LB        = 0x00,
-    NM_SB        = 0x01,
-    NM_LBU       = 0x02,
-    NM_P_PREFU12 = 0x03,
-    NM_LH        = 0x04,
-    NM_SH        = 0x05,
-    NM_LHU       = 0x06,
-    NM_LWU       = 0x07,
-    NM_LW        = 0x08,
-    NM_SW        = 0x09,
-    NM_LWC1      = 0x0a,
-    NM_SWC1      = 0x0b,
-    NM_LDC1      = 0x0e,
-    NM_SDC1      = 0x0f,
-};
-
-/* P.LS.S9 instruction pool */
-enum {
-    NM_P_LS_S0         = 0x00,
-    NM_P_LS_S1         = 0x01,
-    NM_P_LS_E0         = 0x02,
-    NM_P_LS_WM         = 0x04,
-    NM_P_LS_UAWM       = 0x05,
-};
-
-/* P.BAL instruction pool */
-enum {
-    NM_BC       = 0x00,
-    NM_BALC     = 0x01,
-};
-
-/* P.J instruction pool */
-enum {
-    NM_JALRC    = 0x00,
-    NM_JALRC_HB = 0x01,
-    NM_P_BALRSC = 0x08,
-};
-
-/* P.BR1 instruction pool */
-enum {
-    NM_BEQC     = 0x00,
-    NM_P_BR3A   = 0x01,
-    NM_BGEC     = 0x02,
-    NM_BGEUC    = 0x03,
-};
-
-/* P.BR2 instruction pool */
-enum {
-    NM_BNEC     = 0x00,
-    NM_BLTC     = 0x02,
-    NM_BLTUC    = 0x03,
-};
-
-/* P.BRI instruction pool */
-enum {
-    NM_BEQIC    = 0x00,
-    NM_BBEQZC   = 0x01,
-    NM_BGEIC    = 0x02,
-    NM_BGEIUC   = 0x03,
-    NM_BNEIC    = 0x04,
-    NM_BBNEZC   = 0x05,
-    NM_BLTIC    = 0x06,
-    NM_BLTIUC   = 0x07,
-};
-
-/* P16.SHIFT instruction pool */
-enum {
-    NM_SLL16    = 0x00,
-    NM_SRL16    = 0x01,
-};
-
-/* POOL16C instruction pool */
-enum {
-    NM_POOL16C_0  = 0x00,
-    NM_LWXS16     = 0x01,
-};
-
-/* P16.A1 instruction pool */
-enum {
-    NM_ADDIUR1SP = 0x01,
-};
-
-/* P16.A2 instruction pool */
-enum {
-    NM_ADDIUR2  = 0x00,
-    NM_P_ADDIURS5  = 0x01,
-};
-
-/* P16.ADDU instruction pool */
-enum {
-    NM_ADDU16     = 0x00,
-    NM_SUBU16     = 0x01,
-};
-
-/* P16.SR instruction pool */
-enum {
-    NM_SAVE16        = 0x00,
-    NM_RESTORE_JRC16 = 0x01,
-};
-
-/* P16.4X4 instruction pool */
-enum {
-    NM_ADDU4X4      = 0x00,
-    NM_MUL4X4       = 0x01,
-};
-
-/* P16.LB instruction pool */
-enum {
-    NM_LB16       = 0x00,
-    NM_SB16       = 0x01,
-    NM_LBU16      = 0x02,
-};
-
-/* P16.LH  instruction pool */
-enum {
-    NM_LH16     = 0x00,
-    NM_SH16     = 0x01,
-    NM_LHU16    = 0x02,
-};
-
-/* P.RI instruction pool */
-enum {
-    NM_SIGRIE       = 0x00,
-    NM_P_SYSCALL    = 0x01,
-    NM_BREAK        = 0x02,
-    NM_SDBBP        = 0x03,
-};
-
-/* POOL32A0 instruction pool */
-enum {
-    NM_P_TRAP   = 0x00,
-    NM_SEB      = 0x01,
-    NM_SLLV     = 0x02,
-    NM_MUL      = 0x03,
-    NM_MFC0     = 0x06,
-    NM_MFHC0    = 0x07,
-    NM_SEH      = 0x09,
-    NM_SRLV     = 0x0a,
-    NM_MUH      = 0x0b,
-    NM_MTC0     = 0x0e,
-    NM_MTHC0    = 0x0f,
-    NM_SRAV     = 0x12,
-    NM_MULU     = 0x13,
-    NM_ROTRV    = 0x1a,
-    NM_MUHU     = 0x1b,
-    NM_ADD      = 0x22,
-    NM_DIV      = 0x23,
-    NM_ADDU     = 0x2a,
-    NM_MOD      = 0x2b,
-    NM_SUB      = 0x32,
-    NM_DIVU     = 0x33,
-    NM_RDHWR    = 0x38,
-    NM_SUBU     = 0x3a,
-    NM_MODU     = 0x3b,
-    NM_P_CMOVE  = 0x42,
-    NM_FORK     = 0x45,
-    NM_MFTR     = 0x46,
-    NM_MFHTR    = 0x47,
-    NM_AND      = 0x4a,
-    NM_YIELD    = 0x4d,
-    NM_MTTR     = 0x4e,
-    NM_MTHTR    = 0x4f,
-    NM_OR       = 0x52,
-    NM_D_E_MT_VPE = 0x56,
-    NM_NOR      = 0x5a,
-    NM_XOR      = 0x62,
-    NM_SLT      = 0x6a,
-    NM_P_SLTU   = 0x72,
-    NM_SOV      = 0x7a,
-};
-
-/* CRC32 instruction pool */
-enum {
-    NM_CRC32B   = 0x00,
-    NM_CRC32H   = 0x01,
-    NM_CRC32W   = 0x02,
-    NM_CRC32CB  = 0x04,
-    NM_CRC32CH  = 0x05,
-    NM_CRC32CW  = 0x06,
-};
-
-/* POOL32A5 instruction pool */
-enum {
-    NM_CMP_EQ_PH        = 0x00,
-    NM_CMP_LT_PH        = 0x08,
-    NM_CMP_LE_PH        = 0x10,
-    NM_CMPGU_EQ_QB      = 0x18,
-    NM_CMPGU_LT_QB      = 0x20,
-    NM_CMPGU_LE_QB      = 0x28,
-    NM_CMPGDU_EQ_QB     = 0x30,
-    NM_CMPGDU_LT_QB     = 0x38,
-    NM_CMPGDU_LE_QB     = 0x40,
-    NM_CMPU_EQ_QB       = 0x48,
-    NM_CMPU_LT_QB       = 0x50,
-    NM_CMPU_LE_QB       = 0x58,
-    NM_ADDQ_S_W         = 0x60,
-    NM_SUBQ_S_W         = 0x68,
-    NM_ADDSC            = 0x70,
-    NM_ADDWC            = 0x78,
-
-    NM_ADDQ_S_PH   = 0x01,
-    NM_ADDQH_R_PH  = 0x09,
-    NM_ADDQH_R_W   = 0x11,
-    NM_ADDU_S_QB   = 0x19,
-    NM_ADDU_S_PH   = 0x21,
-    NM_ADDUH_R_QB  = 0x29,
-    NM_SHRAV_R_PH  = 0x31,
-    NM_SHRAV_R_QB  = 0x39,
-    NM_SUBQ_S_PH   = 0x41,
-    NM_SUBQH_R_PH  = 0x49,
-    NM_SUBQH_R_W   = 0x51,
-    NM_SUBU_S_QB   = 0x59,
-    NM_SUBU_S_PH   = 0x61,
-    NM_SUBUH_R_QB  = 0x69,
-    NM_SHLLV_S_PH  = 0x71,
-    NM_PRECR_SRA_R_PH_W = 0x79,
-
-    NM_MULEU_S_PH_QBL   = 0x12,
-    NM_MULEU_S_PH_QBR   = 0x1a,
-    NM_MULQ_RS_PH       = 0x22,
-    NM_MULQ_S_PH        = 0x2a,
-    NM_MULQ_RS_W        = 0x32,
-    NM_MULQ_S_W         = 0x3a,
-    NM_APPEND           = 0x42,
-    NM_MODSUB           = 0x52,
-    NM_SHRAV_R_W        = 0x5a,
-    NM_SHRLV_PH         = 0x62,
-    NM_SHRLV_QB         = 0x6a,
-    NM_SHLLV_QB         = 0x72,
-    NM_SHLLV_S_W        = 0x7a,
-
-    NM_SHILO            = 0x03,
-
-    NM_MULEQ_S_W_PHL    = 0x04,
-    NM_MULEQ_S_W_PHR    = 0x0c,
-
-    NM_MUL_S_PH         = 0x05,
-    NM_PRECR_QB_PH      = 0x0d,
-    NM_PRECRQ_QB_PH     = 0x15,
-    NM_PRECRQ_PH_W      = 0x1d,
-    NM_PRECRQ_RS_PH_W   = 0x25,
-    NM_PRECRQU_S_QB_PH  = 0x2d,
-    NM_PACKRL_PH        = 0x35,
-    NM_PICK_QB          = 0x3d,
-    NM_PICK_PH          = 0x45,
-
-    NM_SHRA_R_W         = 0x5e,
-    NM_SHRA_R_PH        = 0x66,
-    NM_SHLL_S_PH        = 0x76,
-    NM_SHLL_S_W         = 0x7e,
-
-    NM_REPL_PH          = 0x07
-};
-
-/* POOL32A7 instruction pool */
-enum {
-    NM_P_LSX        = 0x00,
-    NM_LSA          = 0x01,
-    NM_EXTW         = 0x03,
-    NM_POOL32AXF    = 0x07,
-};
-
-/* P.SR instruction pool */
-enum {
-    NM_PP_SR           = 0x00,
-    NM_P_SR_F          = 0x01,
-};
-
-/* P.SHIFT instruction pool */
-enum {
-    NM_P_SLL        = 0x00,
-    NM_SRL          = 0x02,
-    NM_SRA          = 0x04,
-    NM_ROTR         = 0x06,
-};
-
-/* P.ROTX instruction pool */
-enum {
-    NM_ROTX         = 0x00,
-};
-
-/* P.INS instruction pool */
-enum {
-    NM_INS          = 0x00,
-};
-
-/* P.EXT instruction pool */
-enum {
-    NM_EXT          = 0x00,
-};
-
-/* POOL32F_0 (fmt) instruction pool */
-enum {
-    NM_RINT_S              = 0x04,
-    NM_RINT_D              = 0x44,
-    NM_ADD_S               = 0x06,
-    NM_SELEQZ_S            = 0x07,
-    NM_SELEQZ_D            = 0x47,
-    NM_CLASS_S             = 0x0c,
-    NM_CLASS_D             = 0x4c,
-    NM_SUB_S               = 0x0e,
-    NM_SELNEZ_S            = 0x0f,
-    NM_SELNEZ_D            = 0x4f,
-    NM_MUL_S               = 0x16,
-    NM_SEL_S               = 0x17,
-    NM_SEL_D               = 0x57,
-    NM_DIV_S               = 0x1e,
-    NM_ADD_D               = 0x26,
-    NM_SUB_D               = 0x2e,
-    NM_MUL_D               = 0x36,
-    NM_MADDF_S             = 0x37,
-    NM_MADDF_D             = 0x77,
-    NM_DIV_D               = 0x3e,
-    NM_MSUBF_S             = 0x3f,
-    NM_MSUBF_D             = 0x7f,
-};
-
-/* POOL32F_3  instruction pool */
-enum {
-    NM_MIN_FMT         = 0x00,
-    NM_MAX_FMT         = 0x01,
-    NM_MINA_FMT        = 0x04,
-    NM_MAXA_FMT        = 0x05,
-    NM_POOL32FXF       = 0x07,
-};
-
-/* POOL32F_5  instruction pool */
-enum {
-    NM_CMP_CONDN_S     = 0x00,
-    NM_CMP_CONDN_D     = 0x02,
-};
-
-/* P.GP.LH instruction pool */
-enum {
-    NM_LHGP    = 0x00,
-    NM_LHUGP   = 0x01,
-};
-
-/* P.GP.SH instruction pool */
-enum {
-    NM_SHGP    = 0x00,
-};
-
-/* P.GP.CP1 instruction pool */
-enum {
-    NM_LWC1GP       = 0x00,
-    NM_SWC1GP       = 0x01,
-    NM_LDC1GP       = 0x02,
-    NM_SDC1GP       = 0x03,
-};
-
-/* P.LS.S0 instruction pool */
-enum {
-    NM_LBS9     = 0x00,
-    NM_LHS9     = 0x04,
-    NM_LWS9     = 0x08,
-    NM_LDS9     = 0x0c,
-
-    NM_SBS9     = 0x01,
-    NM_SHS9     = 0x05,
-    NM_SWS9     = 0x09,
-    NM_SDS9     = 0x0d,
-
-    NM_LBUS9    = 0x02,
-    NM_LHUS9    = 0x06,
-    NM_LWC1S9   = 0x0a,
-    NM_LDC1S9   = 0x0e,
-
-    NM_P_PREFS9 = 0x03,
-    NM_LWUS9    = 0x07,
-    NM_SWC1S9   = 0x0b,
-    NM_SDC1S9   = 0x0f,
-};
-
-/* P.LS.S1 instruction pool */
-enum {
-    NM_ASET_ACLR = 0x02,
-    NM_UALH      = 0x04,
-    NM_UASH      = 0x05,
-    NM_CACHE     = 0x07,
-    NM_P_LL      = 0x0a,
-    NM_P_SC      = 0x0b,
-};
-
-/* P.LS.E0 instruction pool */
-enum {
-    NM_LBE      = 0x00,
-    NM_SBE      = 0x01,
-    NM_LBUE     = 0x02,
-    NM_P_PREFE  = 0x03,
-    NM_LHE      = 0x04,
-    NM_SHE      = 0x05,
-    NM_LHUE     = 0x06,
-    NM_CACHEE   = 0x07,
-    NM_LWE      = 0x08,
-    NM_SWE      = 0x09,
-    NM_P_LLE    = 0x0a,
-    NM_P_SCE    = 0x0b,
-};
-
-/* P.PREFE instruction pool */
-enum {
-    NM_SYNCIE   = 0x00,
-    NM_PREFE    = 0x01,
-};
-
-/* P.LLE instruction pool */
-enum {
-    NM_LLE      = 0x00,
-    NM_LLWPE    = 0x01,
-};
-
-/* P.SCE instruction pool */
-enum {
-    NM_SCE      = 0x00,
-    NM_SCWPE    = 0x01,
-};
-
-/* P.LS.WM instruction pool */
-enum {
-    NM_LWM       = 0x00,
-    NM_SWM       = 0x01,
-};
-
-/* P.LS.UAWM instruction pool */
-enum {
-    NM_UALWM       = 0x00,
-    NM_UASWM       = 0x01,
-};
-
-/* P.BR3A instruction pool */
-enum {
-    NM_BC1EQZC          = 0x00,
-    NM_BC1NEZC          = 0x01,
-    NM_BC2EQZC          = 0x02,
-    NM_BC2NEZC          = 0x03,
-    NM_BPOSGE32C        = 0x04,
-};
-
-/* P16.RI instruction pool */
-enum {
-    NM_P16_SYSCALL  = 0x01,
-    NM_BREAK16      = 0x02,
-    NM_SDBBP16      = 0x03,
-};
-
-/* POOL16C_0 instruction pool */
-enum {
-    NM_POOL16C_00      = 0x00,
-};
-
-/* P16.JRC instruction pool */
-enum {
-    NM_JRC          = 0x00,
-    NM_JALRC16      = 0x01,
-};
-
-/* P.SYSCALL instruction pool */
-enum {
-    NM_SYSCALL      = 0x00,
-    NM_HYPCALL      = 0x01,
-};
-
-/* P.TRAP instruction pool */
-enum {
-    NM_TEQ          = 0x00,
-    NM_TNE          = 0x01,
-};
-
-/* P.CMOVE instruction pool */
-enum {
-    NM_MOVZ            = 0x00,
-    NM_MOVN            = 0x01,
-};
-
-/* POOL32Axf instruction pool */
-enum {
-    NM_POOL32AXF_1 = 0x01,
-    NM_POOL32AXF_2 = 0x02,
-    NM_POOL32AXF_4 = 0x04,
-    NM_POOL32AXF_5 = 0x05,
-    NM_POOL32AXF_7 = 0x07,
-};
-
-/* POOL32Axf_1 instruction pool */
-enum {
-    NM_POOL32AXF_1_0 = 0x00,
-    NM_POOL32AXF_1_1 = 0x01,
-    NM_POOL32AXF_1_3 = 0x03,
-    NM_POOL32AXF_1_4 = 0x04,
-    NM_POOL32AXF_1_5 = 0x05,
-    NM_POOL32AXF_1_7 = 0x07,
-};
-
-/* POOL32Axf_2 instruction pool */
-enum {
-    NM_POOL32AXF_2_0_7     = 0x00,
-    NM_POOL32AXF_2_8_15    = 0x01,
-    NM_POOL32AXF_2_16_23   = 0x02,
-    NM_POOL32AXF_2_24_31   = 0x03,
-};
-
-/* POOL32Axf_7 instruction pool */
-enum {
-    NM_SHRA_R_QB    = 0x0,
-    NM_SHRL_PH      = 0x1,
-    NM_REPL_QB      = 0x2,
-};
-
-/* POOL32Axf_1_0 instruction pool */
-enum {
-    NM_MFHI = 0x0,
-    NM_MFLO = 0x1,
-    NM_MTHI = 0x2,
-    NM_MTLO = 0x3,
-};
-
-/* POOL32Axf_1_1 instruction pool */
-enum {
-    NM_MTHLIP = 0x0,
-    NM_SHILOV = 0x1,
-};
-
-/* POOL32Axf_1_3 instruction pool */
-enum {
-    NM_RDDSP    = 0x0,
-    NM_WRDSP    = 0x1,
-    NM_EXTP     = 0x2,
-    NM_EXTPDP   = 0x3,
-};
-
-/* POOL32Axf_1_4 instruction pool */
-enum {
-    NM_SHLL_QB  = 0x0,
-    NM_SHRL_QB  = 0x1,
-};
-
-/* POOL32Axf_1_5 instruction pool */
-enum {
-    NM_MAQ_S_W_PHR   = 0x0,
-    NM_MAQ_S_W_PHL   = 0x1,
-    NM_MAQ_SA_W_PHR  = 0x2,
-    NM_MAQ_SA_W_PHL  = 0x3,
-};
-
-/* POOL32Axf_1_7 instruction pool */
-enum {
-    NM_EXTR_W       = 0x0,
-    NM_EXTR_R_W     = 0x1,
-    NM_EXTR_RS_W    = 0x2,
-    NM_EXTR_S_H     = 0x3,
-};
-
-/* POOL32Axf_2_0_7 instruction pool */
-enum {
-    NM_DPA_W_PH     = 0x0,
-    NM_DPAQ_S_W_PH  = 0x1,
-    NM_DPS_W_PH     = 0x2,
-    NM_DPSQ_S_W_PH  = 0x3,
-    NM_BALIGN       = 0x4,
-    NM_MADD         = 0x5,
-    NM_MULT         = 0x6,
-    NM_EXTRV_W      = 0x7,
-};
-
-/* POOL32Axf_2_8_15 instruction pool */
-enum {
-    NM_DPAX_W_PH    = 0x0,
-    NM_DPAQ_SA_L_W  = 0x1,
-    NM_DPSX_W_PH    = 0x2,
-    NM_DPSQ_SA_L_W  = 0x3,
-    NM_MADDU        = 0x5,
-    NM_MULTU        = 0x6,
-    NM_EXTRV_R_W    = 0x7,
-};
-
-/* POOL32Axf_2_16_23 instruction pool */
-enum {
-    NM_DPAU_H_QBL       = 0x0,
-    NM_DPAQX_S_W_PH     = 0x1,
-    NM_DPSU_H_QBL       = 0x2,
-    NM_DPSQX_S_W_PH     = 0x3,
-    NM_EXTPV            = 0x4,
-    NM_MSUB             = 0x5,
-    NM_MULSA_W_PH       = 0x6,
-    NM_EXTRV_RS_W       = 0x7,
-};
-
-/* POOL32Axf_2_24_31 instruction pool */
-enum {
-    NM_DPAU_H_QBR       = 0x0,
-    NM_DPAQX_SA_W_PH    = 0x1,
-    NM_DPSU_H_QBR       = 0x2,
-    NM_DPSQX_SA_W_PH    = 0x3,
-    NM_EXTPDPV          = 0x4,
-    NM_MSUBU            = 0x5,
-    NM_MULSAQ_S_W_PH    = 0x6,
-    NM_EXTRV_S_H        = 0x7,
-};
-
-/* POOL32Axf_{4, 5} instruction pool */
-enum {
-    NM_CLO      = 0x25,
-    NM_CLZ      = 0x2d,
-
-    NM_TLBP     = 0x01,
-    NM_TLBR     = 0x09,
-    NM_TLBWI    = 0x11,
-    NM_TLBWR    = 0x19,
-    NM_TLBINV   = 0x03,
-    NM_TLBINVF  = 0x0b,
-    NM_DI       = 0x23,
-    NM_EI       = 0x2b,
-    NM_RDPGPR   = 0x70,
-    NM_WRPGPR   = 0x78,
-    NM_WAIT     = 0x61,
-    NM_DERET    = 0x71,
-    NM_ERETX    = 0x79,
-
-    /* nanoMIPS DSP instructions */
-    NM_ABSQ_S_QB        = 0x00,
-    NM_ABSQ_S_PH        = 0x08,
-    NM_ABSQ_S_W         = 0x10,
-    NM_PRECEQ_W_PHL     = 0x28,
-    NM_PRECEQ_W_PHR     = 0x30,
-    NM_PRECEQU_PH_QBL   = 0x38,
-    NM_PRECEQU_PH_QBR   = 0x48,
-    NM_PRECEU_PH_QBL    = 0x58,
-    NM_PRECEU_PH_QBR    = 0x68,
-    NM_PRECEQU_PH_QBLA  = 0x39,
-    NM_PRECEQU_PH_QBRA  = 0x49,
-    NM_PRECEU_PH_QBLA   = 0x59,
-    NM_PRECEU_PH_QBRA   = 0x69,
-    NM_REPLV_PH         = 0x01,
-    NM_REPLV_QB         = 0x09,
-    NM_BITREV           = 0x18,
-    NM_INSV             = 0x20,
-    NM_RADDU_W_QB       = 0x78,
-
-    NM_BITSWAP          = 0x05,
-    NM_WSBH             = 0x3d,
-};
-
-/* PP.SR instruction pool */
-enum {
-    NM_SAVE         = 0x00,
-    NM_RESTORE      = 0x02,
-    NM_RESTORE_JRC  = 0x03,
-};
-
-/* P.SR.F instruction pool */
-enum {
-    NM_SAVEF        = 0x00,
-    NM_RESTOREF     = 0x01,
-};
-
-/* P16.SYSCALL  instruction pool */
-enum {
-    NM_SYSCALL16     = 0x00,
-    NM_HYPCALL16     = 0x01,
-};
-
-/* POOL16C_00 instruction pool */
-enum {
-    NM_NOT16           = 0x00,
-    NM_XOR16           = 0x01,
-    NM_AND16           = 0x02,
-    NM_OR16            = 0x03,
-};
-
-/* PP.LSX and PP.LSXS instruction pool */
-enum {
-    NM_LBX      = 0x00,
-    NM_LHX      = 0x04,
-    NM_LWX      = 0x08,
-    NM_LDX      = 0x0c,
-
-    NM_SBX      = 0x01,
-    NM_SHX      = 0x05,
-    NM_SWX      = 0x09,
-    NM_SDX      = 0x0d,
-
-    NM_LBUX     = 0x02,
-    NM_LHUX     = 0x06,
-    NM_LWC1X    = 0x0a,
-    NM_LDC1X    = 0x0e,
-
-    NM_LWUX     = 0x07,
-    NM_SWC1X    = 0x0b,
-    NM_SDC1X    = 0x0f,
-
-    NM_LHXS     = 0x04,
-    NM_LWXS     = 0x08,
-    NM_LDXS     = 0x0c,
-
-    NM_SHXS     = 0x05,
-    NM_SWXS     = 0x09,
-    NM_SDXS     = 0x0d,
-
-    NM_LHUXS    = 0x06,
-    NM_LWC1XS   = 0x0a,
-    NM_LDC1XS   = 0x0e,
-
-    NM_LWUXS    = 0x07,
-    NM_SWC1XS   = 0x0b,
-    NM_SDC1XS   = 0x0f,
-};
-
-/* ERETx instruction pool */
-enum {
-    NM_ERET     = 0x00,
-    NM_ERETNC   = 0x01,
-};
-
-/* POOL32FxF_{0, 1} insturction pool */
-enum {
-    NM_CFC1     = 0x40,
-    NM_CTC1     = 0x60,
-    NM_MFC1     = 0x80,
-    NM_MTC1     = 0xa0,
-    NM_MFHC1    = 0xc0,
-    NM_MTHC1    = 0xe0,
-
-    NM_CVT_S_PL = 0x84,
-    NM_CVT_S_PU = 0xa4,
-
-    NM_CVT_L_S     = 0x004,
-    NM_CVT_L_D     = 0x104,
-    NM_CVT_W_S     = 0x024,
-    NM_CVT_W_D     = 0x124,
-
-    NM_RSQRT_S     = 0x008,
-    NM_RSQRT_D     = 0x108,
-
-    NM_SQRT_S      = 0x028,
-    NM_SQRT_D      = 0x128,
-
-    NM_RECIP_S     = 0x048,
-    NM_RECIP_D     = 0x148,
-
-    NM_FLOOR_L_S   = 0x00c,
-    NM_FLOOR_L_D   = 0x10c,
-
-    NM_FLOOR_W_S   = 0x02c,
-    NM_FLOOR_W_D   = 0x12c,
-
-    NM_CEIL_L_S    = 0x04c,
-    NM_CEIL_L_D    = 0x14c,
-    NM_CEIL_W_S    = 0x06c,
-    NM_CEIL_W_D    = 0x16c,
-    NM_TRUNC_L_S   = 0x08c,
-    NM_TRUNC_L_D   = 0x18c,
-    NM_TRUNC_W_S   = 0x0ac,
-    NM_TRUNC_W_D   = 0x1ac,
-    NM_ROUND_L_S   = 0x0cc,
-    NM_ROUND_L_D   = 0x1cc,
-    NM_ROUND_W_S   = 0x0ec,
-    NM_ROUND_W_D   = 0x1ec,
-
-    NM_MOV_S       = 0x01,
-    NM_MOV_D       = 0x81,
-    NM_ABS_S       = 0x0d,
-    NM_ABS_D       = 0x8d,
-    NM_NEG_S       = 0x2d,
-    NM_NEG_D       = 0xad,
-    NM_CVT_D_S     = 0x04d,
-    NM_CVT_D_W     = 0x0cd,
-    NM_CVT_D_L     = 0x14d,
-    NM_CVT_S_D     = 0x06d,
-    NM_CVT_S_W     = 0x0ed,
-    NM_CVT_S_L     = 0x16d,
-};
-
-/* P.LL instruction pool */
-enum {
-    NM_LL       = 0x00,
-    NM_LLWP     = 0x01,
-};
-
-/* P.SC instruction pool */
-enum {
-    NM_SC       = 0x00,
-    NM_SCWP     = 0x01,
-};
-
-/* P.DVP instruction pool */
-enum {
-    NM_DVP      = 0x00,
-    NM_EVP      = 0x01,
-};
-
-
-/*
- *
- * nanoMIPS decoding engine
- *
- */
-
-
-/* extraction utilities */
-
-#define NANOMIPS_EXTRACT_RT3(op) ((op >> 7) & 0x7)
-#define NANOMIPS_EXTRACT_RS3(op) ((op >> 4) & 0x7)
-#define NANOMIPS_EXTRACT_RD3(op) ((op >> 1) & 0x7)
-#define NANOMIPS_EXTRACT_RD5(op) ((op >> 5) & 0x1f)
-#define NANOMIPS_EXTRACT_RS5(op) (op & 0x1f)
-
-/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr3'). */
-static inline int decode_gpr_gpr3(int r)
-{
-    static const int map[] = { 16, 17, 18, 19,  4,  5,  6,  7 };
-
-    return map[r & 0x7];
-}
-
-/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr3.src.store'). */
-static inline int decode_gpr_gpr3_src_store(int r)
-{
-    static const int map[] = {  0, 17, 18, 19,  4,  5,  6,  7 };
-
-    return map[r & 0x7];
-}
-
-/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr4'). */
-static inline int decode_gpr_gpr4(int r)
-{
-    static const int map[] = {  8,  9, 10, 11,  4,  5,  6,  7,
-                               16, 17, 18, 19, 20, 21, 22, 23 };
-
-    return map[r & 0xf];
-}
-
-/* Implement nanoMIPS pseudocode decode_gpr(encoded_gpr, 'gpr4.zero'). */
-static inline int decode_gpr_gpr4_zero(int r)
-{
-    static const int map[] = {  8,  9, 10,  0,  4,  5,  6,  7,
-                               16, 17, 18, 19, 20, 21, 22, 23 };
-
-    return map[r & 0xf];
-}
-
-
-static void gen_adjust_sp(DisasContext *ctx, int u)
-{
-    gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], u);
-    gen_log_instr_gpr_update(ctx, 29);
-}
-
-static void gen_save(DisasContext *ctx, uint8_t rt, uint8_t count,
-                     uint8_t gp, uint16_t u)
-{
-    int counter = 0;
-    TCGv va = tcg_temp_new();
-    TCGv t0 = tcg_temp_new();
-
-    while (counter != count) {
-        bool use_gp = gp && (counter == count - 1);
-        int this_rt = use_gp ? 28 : (rt & 0x10) | ((rt + counter) & 0x1f);
-        int this_offset = -((counter + 1) << 2);
-        gen_base_offset_addr(ctx, va, 29, this_offset);
-        gen_load_gpr(t0, this_rt);
-        tcg_gen_qemu_st_tl(t0, va, ctx->mem_idx,
-                           (MO_TEUL | ctx->default_tcg_memop_mask));
-        counter++;
-    }
-
-    /* adjust stack pointer */
-    gen_adjust_sp(ctx, -u);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(va);
-}
-
-static void gen_restore(DisasContext *ctx, uint8_t rt, uint8_t count,
-                        uint8_t gp, uint16_t u)
-{
-    int counter = 0;
-    TCGv va = tcg_temp_new();
-    TCGv t0 = tcg_temp_new();
-
-    while (counter != count) {
-        bool use_gp = gp && (counter == count - 1);
-        int this_rt = use_gp ? 28 : (rt & 0x10) | ((rt + counter) & 0x1f);
-        int this_offset = u - ((counter + 1) << 2);
-        gen_base_offset_addr(ctx, va, 29, this_offset);
-        tcg_gen_qemu_ld_tl(t0, va, ctx->mem_idx, MO_TESL |
-                        ctx->default_tcg_memop_mask);
-        tcg_gen_ext32s_tl(t0, t0);
-        gen_store_gpr(t0, this_rt);
-        counter++;
-    }
-
-    /* adjust stack pointer */
-    gen_adjust_sp(ctx, u);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(va);
-}
-
-static void gen_pool16c_nanomips_insn(DisasContext *ctx)
-{
-    int rt = decode_gpr_gpr3(NANOMIPS_EXTRACT_RT3(ctx->opcode));
-    int rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
-
-    switch (extract32(ctx->opcode, 2, 2)) {
-    case NM_NOT16:
-        gen_logic(ctx, OPC_NOR, rt, rs, 0);
-        break;
-    case NM_AND16:
-        gen_logic(ctx, OPC_AND, rt, rt, rs);
-        break;
-    case NM_XOR16:
-        gen_logic(ctx, OPC_XOR, rt, rt, rs);
-        break;
-    case NM_OR16:
-        gen_logic(ctx, OPC_OR, rt, rt, rs);
-        break;
-    }
-}
-
-static void gen_pool32a0_nanomips_insn(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rt = extract32(ctx->opcode, 21, 5);
-    int rs = extract32(ctx->opcode, 16, 5);
-    int rd = extract32(ctx->opcode, 11, 5);
-
-    switch (extract32(ctx->opcode, 3, 7)) {
-    case NM_P_TRAP:
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case NM_TEQ:
-            check_nms(ctx);
-            gen_trap(ctx, OPC_TEQ, rs, rt, -1);
-            break;
-        case NM_TNE:
-            check_nms(ctx);
-            gen_trap(ctx, OPC_TNE, rs, rt, -1);
-            break;
-        }
-        break;
-    case NM_RDHWR:
-        check_nms(ctx);
-        gen_rdhwr(ctx, rt, rs, extract32(ctx->opcode, 11, 3));
-        break;
-    case NM_SEB:
-        check_nms(ctx);
-        gen_bshfl(ctx, OPC_SEB, rs, rt);
-        break;
-    case NM_SEH:
-        gen_bshfl(ctx, OPC_SEH, rs, rt);
-        break;
-    case NM_SLLV:
-        gen_shift(ctx, OPC_SLLV, rd, rt, rs);
-        break;
-    case NM_SRLV:
-        gen_shift(ctx, OPC_SRLV, rd, rt, rs);
-        break;
-    case NM_SRAV:
-        gen_shift(ctx, OPC_SRAV, rd, rt, rs);
-        break;
-    case NM_ROTRV:
-        gen_shift(ctx, OPC_ROTRV, rd, rt, rs);
-        break;
-    case NM_ADD:
-        gen_arith(ctx, OPC_ADD, rd, rs, rt);
-        break;
-    case NM_ADDU:
-        gen_arith(ctx, OPC_ADDU, rd, rs, rt);
-        break;
-    case NM_SUB:
-        check_nms(ctx);
-        gen_arith(ctx, OPC_SUB, rd, rs, rt);
-        break;
-    case NM_SUBU:
-        gen_arith(ctx, OPC_SUBU, rd, rs, rt);
-        break;
-    case NM_P_CMOVE:
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case NM_MOVZ:
-            gen_cond_move(ctx, OPC_MOVZ, rd, rs, rt);
-            break;
-        case NM_MOVN:
-            gen_cond_move(ctx, OPC_MOVN, rd, rs, rt);
-            break;
-        }
-        break;
-    case NM_AND:
-        gen_logic(ctx, OPC_AND, rd, rs, rt);
-        break;
-    case NM_OR:
-        gen_logic(ctx, OPC_OR, rd, rs, rt);
-        break;
-    case NM_NOR:
-        gen_logic(ctx, OPC_NOR, rd, rs, rt);
-        break;
-    case NM_XOR:
-        gen_logic(ctx, OPC_XOR, rd, rs, rt);
-        break;
-    case NM_SLT:
-        gen_slt(ctx, OPC_SLT, rd, rs, rt);
-        break;
-    case NM_P_SLTU:
-        if (rd == 0) {
-            /* P_DVP */
-#ifndef CONFIG_USER_ONLY
-            TCGv t0 = tcg_temp_new();
-            switch (extract32(ctx->opcode, 10, 1)) {
-            case NM_DVP:
-                if (ctx->vp) {
-                    check_cp0_enabled(ctx);
-                    gen_helper_dvp(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                }
-                break;
-            case NM_EVP:
-                if (ctx->vp) {
-                    check_cp0_enabled(ctx);
-                    gen_helper_evp(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                }
-                break;
-            }
-            tcg_temp_free(t0);
-#endif
-        } else {
-            gen_slt(ctx, OPC_SLTU, rd, rs, rt);
-        }
-        break;
-    case NM_SOV:
-        {
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-            TCGv t2 = tcg_temp_new();
-
-            gen_load_gpr(t1, rs);
-            gen_load_gpr(t2, rt);
-            tcg_gen_add_tl(t0, t1, t2);
-            tcg_gen_ext32s_tl(t0, t0);
-            tcg_gen_xor_tl(t1, t1, t2);
-            tcg_gen_xor_tl(t2, t0, t2);
-            tcg_gen_andc_tl(t1, t2, t1);
-
-            /* operands of same sign, result different sign */
-            tcg_gen_setcondi_tl(TCG_COND_LT, t0, t1, 0);
-            gen_store_gpr(t0, rd);
-
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-            tcg_temp_free(t2);
-        }
-        break;
-    case NM_MUL:
-        gen_r6_muldiv(ctx, R6_OPC_MUL, rd, rs, rt);
-        break;
-    case NM_MUH:
-        gen_r6_muldiv(ctx, R6_OPC_MUH, rd, rs, rt);
-        break;
-    case NM_MULU:
-        gen_r6_muldiv(ctx, R6_OPC_MULU, rd, rs, rt);
-        break;
-    case NM_MUHU:
-        gen_r6_muldiv(ctx, R6_OPC_MUHU, rd, rs, rt);
-        break;
-    case NM_DIV:
-        gen_r6_muldiv(ctx, R6_OPC_DIV, rd, rs, rt);
-        break;
-    case NM_MOD:
-        gen_r6_muldiv(ctx, R6_OPC_MOD, rd, rs, rt);
-        break;
-    case NM_DIVU:
-        gen_r6_muldiv(ctx, R6_OPC_DIVU, rd, rs, rt);
-        break;
-    case NM_MODU:
-        gen_r6_muldiv(ctx, R6_OPC_MODU, rd, rs, rt);
-        break;
-#ifndef CONFIG_USER_ONLY
-    case NM_MFC0:
-        check_cp0_enabled(ctx);
-        if (rt == 0) {
-            /* Treat as NOP. */
-            break;
-        }
-        gen_mfc0(ctx, cpu_gpr[rt], rs, extract32(ctx->opcode, 11, 3));
-        gen_log_instr_gpr_update(ctx, rt);
-        break;
-    case NM_MTC0:
-        check_cp0_enabled(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_mtc0(ctx, t0, rs, extract32(ctx->opcode, 11, 3));
-            tcg_temp_free(t0);
-        }
-        break;
-    case NM_D_E_MT_VPE:
-        {
-            uint8_t sc = extract32(ctx->opcode, 10, 1);
-            TCGv t0 = tcg_temp_new();
-
-            switch (sc) {
-            case 0:
-                if (rs == 1) {
-                    /* DMT */
-                    check_cp0_mt(ctx);
-                    gen_helper_dmt(t0);
-                    gen_store_gpr(t0, rt);
-                } else if (rs == 0) {
-                    /* DVPE */
-                    check_cp0_mt(ctx);
-                    gen_helper_dvpe(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                } else {
-                    gen_reserved_instruction(ctx);
-                }
-                break;
-            case 1:
-                if (rs == 1) {
-                    /* EMT */
-                    check_cp0_mt(ctx);
-                    gen_helper_emt(t0);
-                    gen_store_gpr(t0, rt);
-                } else if (rs == 0) {
-                    /* EVPE */
-                    check_cp0_mt(ctx);
-                    gen_helper_evpe(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                } else {
-                    gen_reserved_instruction(ctx);
-                }
-                break;
-            }
-
-            tcg_temp_free(t0);
-        }
-        break;
-    case NM_FORK:
-        check_mt(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_load_gpr(t1, rs);
-            gen_helper_fork(t0, t1);
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-        }
-        break;
-    case NM_MFTR:
-    case NM_MFHTR:
-        check_cp0_enabled(ctx);
-        if (rd == 0) {
-            /* Treat as NOP. */
-            return;
-        }
-        gen_mftr(env, ctx, rs, rt, extract32(ctx->opcode, 10, 1),
-                 extract32(ctx->opcode, 11, 5), extract32(ctx->opcode, 3, 1));
-        break;
-    case NM_MTTR:
-    case NM_MTHTR:
-        check_cp0_enabled(ctx);
-        gen_mttr(env, ctx, rs, rt, extract32(ctx->opcode, 10, 1),
-                 extract32(ctx->opcode, 11, 5), extract32(ctx->opcode, 3, 1));
-        break;
-    case NM_YIELD:
-        check_mt(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rs);
-            gen_helper_yield(t0, cpu_env, t0);
-            gen_store_gpr(t0, rt);
-            tcg_temp_free(t0);
-        }
-        break;
-#endif
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/* dsp */
-static void gen_pool32axf_1_5_nanomips_insn(DisasContext *ctx, uint32_t opc,
-                                            int ret, int v1, int v2)
-{
-    TCGv_i32 t0;
-    TCGv v0_t;
-    TCGv v1_t;
-
-    t0 = tcg_temp_new_i32();
-
-    v0_t = tcg_temp_new();
-    v1_t = tcg_temp_new();
-
-    tcg_gen_movi_i32(t0, v2 >> 3);
-
-    gen_load_gpr(v0_t, ret);
-    gen_load_gpr(v1_t, v1);
-
-    switch (opc) {
-    case NM_MAQ_S_W_PHR:
-        check_dsp(ctx);
-        gen_helper_maq_s_w_phr(t0, v1_t, v0_t, cpu_env);
-        break;
-    case NM_MAQ_S_W_PHL:
-        check_dsp(ctx);
-        gen_helper_maq_s_w_phl(t0, v1_t, v0_t, cpu_env);
-        break;
-    case NM_MAQ_SA_W_PHR:
-        check_dsp(ctx);
-        gen_helper_maq_sa_w_phr(t0, v1_t, v0_t, cpu_env);
-        break;
-    case NM_MAQ_SA_W_PHL:
-        check_dsp(ctx);
-        gen_helper_maq_sa_w_phl(t0, v1_t, v0_t, cpu_env);
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(t0);
-
-    tcg_temp_free(v0_t);
-    tcg_temp_free(v1_t);
-}
-
-
-static void gen_pool32axf_1_nanomips_insn(DisasContext *ctx, uint32_t opc,
-                                    int ret, int v1, int v2)
-{
-    int16_t imm;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv v0_t = tcg_temp_new();
-
-    gen_load_gpr(v0_t, v1);
-
-    switch (opc) {
-    case NM_POOL32AXF_1_0:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 12, 2)) {
-        case NM_MFHI:
-            gen_HILO(ctx, OPC_MFHI, v2 >> 3, ret);
-            break;
-        case NM_MFLO:
-            gen_HILO(ctx, OPC_MFLO, v2 >> 3, ret);
-            break;
-        case NM_MTHI:
-            gen_HILO(ctx, OPC_MTHI, v2 >> 3, v1);
-            break;
-        case NM_MTLO:
-            gen_HILO(ctx, OPC_MTLO, v2 >> 3, v1);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_1_1:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 12, 2)) {
-        case NM_MTHLIP:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_mthlip(t0, v0_t, cpu_env);
-            break;
-        case NM_SHILOV:
-            tcg_gen_movi_tl(t0, v2 >> 3);
-            gen_helper_shilo(t0, v0_t, cpu_env);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_1_3:
-        check_dsp(ctx);
-        imm = extract32(ctx->opcode, 14, 7);
-        switch (extract32(ctx->opcode, 12, 2)) {
-        case NM_RDDSP:
-            tcg_gen_movi_tl(t0, imm);
-            gen_helper_rddsp(t0, t0, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_WRDSP:
-            gen_load_gpr(t0, ret);
-            tcg_gen_movi_tl(t1, imm);
-            gen_helper_wrdsp(t0, t1, cpu_env);
-            break;
-        case NM_EXTP:
-            tcg_gen_movi_tl(t0, v2 >> 3);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extp(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_EXTPDP:
-            tcg_gen_movi_tl(t0, v2 >> 3);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extpdp(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_1_4:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, v2 >> 2);
-        switch (extract32(ctx->opcode, 12, 1)) {
-        case NM_SHLL_QB:
-            gen_helper_shll_qb(t0, t0, v0_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_SHRL_QB:
-            gen_helper_shrl_qb(t0, t0, v0_t);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_1_5:
-        opc = extract32(ctx->opcode, 12, 2);
-        gen_pool32axf_1_5_nanomips_insn(ctx, opc, ret, v1, v2);
-        break;
-    case NM_POOL32AXF_1_7:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, v2 >> 3);
-        tcg_gen_movi_tl(t1, v1);
-        switch (extract32(ctx->opcode, 12, 2)) {
-        case NM_EXTR_W:
-            gen_helper_extr_w(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_EXTR_R_W:
-            gen_helper_extr_r_w(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_EXTR_RS_W:
-            gen_helper_extr_rs_w(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_EXTR_S_H:
-            gen_helper_extr_s_h(t0, t0, t1, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(v0_t);
-}
-
-static void gen_pool32axf_2_multiply(DisasContext *ctx, uint32_t opc,
-                                    TCGv v0, TCGv v1, int rd)
-{
-    TCGv_i32 t0;
-
-    t0 = tcg_temp_new_i32();
-
-    tcg_gen_movi_i32(t0, rd >> 3);
-
-    switch (opc) {
-    case NM_POOL32AXF_2_0_7:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpa_w_ph(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPAQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_dpaq_s_w_ph(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPS_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dps_w_ph(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPSQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_dpsq_s_w_ph(t0, v1, v0, cpu_env);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_8_15:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAX_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpax_w_ph(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPAQ_SA_L_W:
-            check_dsp(ctx);
-            gen_helper_dpaq_sa_l_w(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPSX_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsx_w_ph(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPSQ_SA_L_W:
-            check_dsp(ctx);
-            gen_helper_dpsq_sa_l_w(t0, v0, v1, cpu_env);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_16_23:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAU_H_QBL:
-            check_dsp(ctx);
-            gen_helper_dpau_h_qbl(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPAQX_S_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpaqx_s_w_ph(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPSU_H_QBL:
-            check_dsp(ctx);
-            gen_helper_dpsu_h_qbl(t0, v0, v1, cpu_env);
-            break;
-        case NM_DPSQX_S_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsqx_s_w_ph(t0, v0, v1, cpu_env);
-            break;
-        case NM_MULSA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_mulsa_w_ph(t0, v0, v1, cpu_env);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_24_31:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAU_H_QBR:
-            check_dsp(ctx);
-            gen_helper_dpau_h_qbr(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPAQX_SA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpaqx_sa_w_ph(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPSU_H_QBR:
-            check_dsp(ctx);
-            gen_helper_dpsu_h_qbr(t0, v1, v0, cpu_env);
-            break;
-        case NM_DPSQX_SA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsqx_sa_w_ph(t0, v1, v0, cpu_env);
-            break;
-        case NM_MULSAQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_mulsaq_s_w_ph(t0, v1, v0, cpu_env);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free_i32(t0);
-}
-
-static void gen_pool32axf_2_nanomips_insn(DisasContext *ctx, uint32_t opc,
-                                          int rt, int rs, int rd)
-{
-    int ret = rt;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv v0_t = tcg_temp_new();
-    TCGv v1_t = tcg_temp_new();
-
-    gen_load_gpr(v0_t, rt);
-    gen_load_gpr(v1_t, rs);
-
-    switch (opc) {
-    case NM_POOL32AXF_2_0_7:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPA_W_PH:
-        case NM_DPAQ_S_W_PH:
-        case NM_DPS_W_PH:
-        case NM_DPSQ_S_W_PH:
-            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
-            break;
-        case NM_BALIGN:
-            check_dsp_r2(ctx);
-            if (rt != 0) {
-                gen_load_gpr(t0, rs);
-                rd &= 3;
-                if (rd != 0 && rd != 2) {
-                    tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 8 * rd);
-                    tcg_gen_ext32u_tl(t0, t0);
-                    tcg_gen_shri_tl(t0, t0, 8 * (4 - rd));
-                    tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-                }
-                tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
-                gen_log_instr_gpr_update(ctx, ret);
-            }
-            break;
-        case NM_MADD:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i64 t2 = tcg_temp_new_i64();
-                TCGv_i64 t3 = tcg_temp_new_i64();
-
-                gen_load_gpr(t0, rt);
-                gen_load_gpr(t1, rs);
-                tcg_gen_ext_tl_i64(t2, t0);
-                tcg_gen_ext_tl_i64(t3, t1);
-                tcg_gen_mul_i64(t2, t2, t3);
-                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-                tcg_gen_add_i64(t2, t2, t3);
-                tcg_temp_free_i64(t3);
-                gen_move_low32(cpu_LO[acc], t2);
-                gen_move_high32(cpu_HI[acc], t2);
-                tcg_temp_free_i64(t2);
-            }
-            break;
-        case NM_MULT:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i32 t2 = tcg_temp_new_i32();
-                TCGv_i32 t3 = tcg_temp_new_i32();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_trunc_tl_i32(t2, t0);
-                tcg_gen_trunc_tl_i32(t3, t1);
-                tcg_gen_muls2_i32(t2, t3, t2, t3);
-                tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-                tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-                tcg_temp_free_i32(t2);
-                tcg_temp_free_i32(t3);
-            }
-            break;
-        case NM_EXTRV_W:
-            check_dsp(ctx);
-            gen_load_gpr(v1_t, rs);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extr_w(t0, t0, v1_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_8_15:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAX_W_PH:
-        case NM_DPAQ_SA_L_W:
-        case NM_DPSX_W_PH:
-        case NM_DPSQ_SA_L_W:
-            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
-            break;
-        case NM_MADDU:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i64 t2 = tcg_temp_new_i64();
-                TCGv_i64 t3 = tcg_temp_new_i64();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_ext32u_tl(t0, t0);
-                tcg_gen_ext32u_tl(t1, t1);
-                tcg_gen_extu_tl_i64(t2, t0);
-                tcg_gen_extu_tl_i64(t3, t1);
-                tcg_gen_mul_i64(t2, t2, t3);
-                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-                tcg_gen_add_i64(t2, t2, t3);
-                tcg_temp_free_i64(t3);
-                gen_move_low32(cpu_LO[acc], t2);
-                gen_move_high32(cpu_HI[acc], t2);
-                tcg_temp_free_i64(t2);
-            }
-            break;
-        case NM_MULTU:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i32 t2 = tcg_temp_new_i32();
-                TCGv_i32 t3 = tcg_temp_new_i32();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_trunc_tl_i32(t2, t0);
-                tcg_gen_trunc_tl_i32(t3, t1);
-                tcg_gen_mulu2_i32(t2, t3, t2, t3);
-                tcg_gen_ext_i32_tl(cpu_LO[acc], t2);
-                tcg_gen_ext_i32_tl(cpu_HI[acc], t3);
-                tcg_temp_free_i32(t2);
-                tcg_temp_free_i32(t3);
-            }
-            break;
-        case NM_EXTRV_R_W:
-            check_dsp(ctx);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extr_r_w(t0, t0, v1_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_16_23:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAU_H_QBL:
-        case NM_DPAQX_S_W_PH:
-        case NM_DPSU_H_QBL:
-        case NM_DPSQX_S_W_PH:
-        case NM_MULSA_W_PH:
-            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
-            break;
-        case NM_EXTPV:
-            check_dsp(ctx);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extp(t0, t0, v1_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_MSUB:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i64 t2 = tcg_temp_new_i64();
-                TCGv_i64 t3 = tcg_temp_new_i64();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_ext_tl_i64(t2, t0);
-                tcg_gen_ext_tl_i64(t3, t1);
-                tcg_gen_mul_i64(t2, t2, t3);
-                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-                tcg_gen_sub_i64(t2, t3, t2);
-                tcg_temp_free_i64(t3);
-                gen_move_low32(cpu_LO[acc], t2);
-                gen_move_high32(cpu_HI[acc], t2);
-                tcg_temp_free_i64(t2);
-            }
-            break;
-        case NM_EXTRV_RS_W:
-            check_dsp(ctx);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extr_rs_w(t0, t0, v1_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_2_24_31:
-        switch (extract32(ctx->opcode, 9, 3)) {
-        case NM_DPAU_H_QBR:
-        case NM_DPAQX_SA_W_PH:
-        case NM_DPSU_H_QBR:
-        case NM_DPSQX_SA_W_PH:
-        case NM_MULSAQ_S_W_PH:
-            gen_pool32axf_2_multiply(ctx, opc, v0_t, v1_t, rd);
-            break;
-        case NM_EXTPDPV:
-            check_dsp(ctx);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extpdp(t0, t0, v1_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        case NM_MSUBU:
-            check_dsp(ctx);
-            {
-                int acc = extract32(ctx->opcode, 14, 2);
-                TCGv_i64 t2 = tcg_temp_new_i64();
-                TCGv_i64 t3 = tcg_temp_new_i64();
-
-                gen_load_gpr(t0, rs);
-                gen_load_gpr(t1, rt);
-                tcg_gen_ext32u_tl(t0, t0);
-                tcg_gen_ext32u_tl(t1, t1);
-                tcg_gen_extu_tl_i64(t2, t0);
-                tcg_gen_extu_tl_i64(t3, t1);
-                tcg_gen_mul_i64(t2, t2, t3);
-                tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
-                tcg_gen_sub_i64(t2, t3, t2);
-                tcg_temp_free_i64(t3);
-                gen_move_low32(cpu_LO[acc], t2);
-                gen_move_high32(cpu_HI[acc], t2);
-                tcg_temp_free_i64(t2);
-            }
-            break;
-        case NM_EXTRV_S_H:
-            check_dsp(ctx);
-            tcg_gen_movi_tl(t0, rd >> 3);
-            gen_helper_extr_s_h(t0, t0, v0_t, cpu_env);
-            gen_store_gpr(t0, ret);
-            break;
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-
-    tcg_temp_free(v0_t);
-    tcg_temp_free(v1_t);
-}
-
-static void gen_pool32axf_4_nanomips_insn(DisasContext *ctx, uint32_t opc,
-                                          int rt, int rs)
-{
-    int ret = rt;
-    TCGv t0 = tcg_temp_new();
-    TCGv v0_t = tcg_temp_new();
-
-    gen_load_gpr(v0_t, rs);
-
-    switch (opc) {
-    case NM_ABSQ_S_QB:
-        check_dsp_r2(ctx);
-        gen_helper_absq_s_qb(v0_t, v0_t, cpu_env);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_ABSQ_S_PH:
-        check_dsp(ctx);
-        gen_helper_absq_s_ph(v0_t, v0_t, cpu_env);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_ABSQ_S_W:
-        check_dsp(ctx);
-        gen_helper_absq_s_w(v0_t, v0_t, cpu_env);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQ_W_PHL:
-        check_dsp(ctx);
-        tcg_gen_andi_tl(v0_t, v0_t, 0xFFFF0000);
-        tcg_gen_ext32s_tl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQ_W_PHR:
-        check_dsp(ctx);
-        tcg_gen_andi_tl(v0_t, v0_t, 0x0000FFFF);
-        tcg_gen_shli_tl(v0_t, v0_t, 16);
-        tcg_gen_ext32s_tl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQU_PH_QBL:
-        check_dsp(ctx);
-        gen_helper_precequ_ph_qbl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQU_PH_QBR:
-        check_dsp(ctx);
-        gen_helper_precequ_ph_qbr(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQU_PH_QBLA:
-        check_dsp(ctx);
-        gen_helper_precequ_ph_qbla(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEQU_PH_QBRA:
-        check_dsp(ctx);
-        gen_helper_precequ_ph_qbra(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEU_PH_QBL:
-        check_dsp(ctx);
-        gen_helper_preceu_ph_qbl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEU_PH_QBR:
-        check_dsp(ctx);
-        gen_helper_preceu_ph_qbr(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEU_PH_QBLA:
-        check_dsp(ctx);
-        gen_helper_preceu_ph_qbla(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_PRECEU_PH_QBRA:
-        check_dsp(ctx);
-        gen_helper_preceu_ph_qbra(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_REPLV_PH:
-        check_dsp(ctx);
-        tcg_gen_ext16u_tl(v0_t, v0_t);
-        tcg_gen_shli_tl(t0, v0_t, 16);
-        tcg_gen_or_tl(v0_t, v0_t, t0);
-        tcg_gen_ext32s_tl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_REPLV_QB:
-        check_dsp(ctx);
-        tcg_gen_ext8u_tl(v0_t, v0_t);
-        tcg_gen_shli_tl(t0, v0_t, 8);
-        tcg_gen_or_tl(v0_t, v0_t, t0);
-        tcg_gen_shli_tl(t0, v0_t, 16);
-        tcg_gen_or_tl(v0_t, v0_t, t0);
-        tcg_gen_ext32s_tl(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_BITREV:
-        check_dsp(ctx);
-        gen_helper_bitrev(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_INSV:
-        check_dsp(ctx);
-        {
-            TCGv tv0 = tcg_temp_new();
-
-            gen_load_gpr(tv0, rt);
-            gen_helper_insv(v0_t, cpu_env, v0_t, tv0);
-            gen_store_gpr(v0_t, ret);
-            tcg_temp_free(tv0);
-        }
-        break;
-    case NM_RADDU_W_QB:
-        check_dsp(ctx);
-        gen_helper_raddu_w_qb(v0_t, v0_t);
-        gen_store_gpr(v0_t, ret);
-        break;
-    case NM_BITSWAP:
-        gen_bitswap(ctx, OPC_BITSWAP, ret, rs);
-        break;
-    case NM_CLO:
-        check_nms(ctx);
-        gen_cl(ctx, OPC_CLO, ret, rs);
-        break;
-    case NM_CLZ:
-        check_nms(ctx);
-        gen_cl(ctx, OPC_CLZ, ret, rs);
-        break;
-    case NM_WSBH:
-        gen_bshfl(ctx, OPC_WSBH, ret, rs);
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free(v0_t);
-    tcg_temp_free(t0);
-}
-
-static void gen_pool32axf_7_nanomips_insn(DisasContext *ctx, uint32_t opc,
-                                          int rt, int rs, int rd)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv rs_t = tcg_temp_new();
-
-    gen_load_gpr(rs_t, rs);
-
-    switch (opc) {
-    case NM_SHRA_R_QB:
-        check_dsp_r2(ctx);
-        tcg_gen_movi_tl(t0, rd >> 2);
-        switch (extract32(ctx->opcode, 12, 1)) {
-        case 0:
-            /* NM_SHRA_QB */
-            gen_helper_shra_qb(t0, t0, rs_t);
-            gen_store_gpr(t0, rt);
-            break;
-        case 1:
-            /* NM_SHRA_R_QB */
-            gen_helper_shra_r_qb(t0, t0, rs_t);
-            gen_store_gpr(t0, rt);
-            break;
-        }
-        break;
-    case NM_SHRL_PH:
-        check_dsp_r2(ctx);
-        tcg_gen_movi_tl(t0, rd >> 1);
-        gen_helper_shrl_ph(t0, t0, rs_t);
-        gen_store_gpr(t0, rt);
-        break;
-    case NM_REPL_QB:
-        check_dsp(ctx);
-        {
-            int16_t imm;
-            target_long result;
-            imm = extract32(ctx->opcode, 13, 8);
-            result = (uint32_t)imm << 24 |
-                     (uint32_t)imm << 16 |
-                     (uint32_t)imm << 8  |
-                     (uint32_t)imm;
-            result = (int32_t)result;
-            tcg_gen_movi_tl(t0, result);
-            gen_store_gpr(t0, rt);
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(rs_t);
-}
-
-
-static void gen_pool32axf_nanomips_insn(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rt = extract32(ctx->opcode, 21, 5);
-    int rs = extract32(ctx->opcode, 16, 5);
-    int rd = extract32(ctx->opcode, 11, 5);
-
-    switch (extract32(ctx->opcode, 6, 3)) {
-    case NM_POOL32AXF_1:
-        {
-            int32_t op1 = extract32(ctx->opcode, 9, 3);
-            gen_pool32axf_1_nanomips_insn(ctx, op1, rt, rs, rd);
-        }
-        break;
-    case NM_POOL32AXF_2:
-        {
-            int32_t op1 = extract32(ctx->opcode, 12, 2);
-            gen_pool32axf_2_nanomips_insn(ctx, op1, rt, rs, rd);
-        }
-        break;
-    case NM_POOL32AXF_4:
-        {
-            int32_t op1 = extract32(ctx->opcode, 9, 7);
-            gen_pool32axf_4_nanomips_insn(ctx, op1, rt, rs);
-        }
-        break;
-    case NM_POOL32AXF_5:
-        switch (extract32(ctx->opcode, 9, 7)) {
-#ifndef CONFIG_USER_ONLY
-        case NM_TLBP:
-            gen_cp0(env, ctx, OPC_TLBP, 0, 0);
-            break;
-        case NM_TLBR:
-            gen_cp0(env, ctx, OPC_TLBR, 0, 0);
-            break;
-        case NM_TLBWI:
-            gen_cp0(env, ctx, OPC_TLBWI, 0, 0);
-            break;
-        case NM_TLBWR:
-            gen_cp0(env, ctx, OPC_TLBWR, 0, 0);
-            break;
-        case NM_TLBINV:
-            gen_cp0(env, ctx, OPC_TLBINV, 0, 0);
-            break;
-        case NM_TLBINVF:
-            gen_cp0(env, ctx, OPC_TLBINVF, 0, 0);
-            break;
-        case NM_DI:
-            check_cp0_enabled(ctx);
-            {
-                TCGv t0 = tcg_temp_new();
-
-                save_cpu_state(ctx, 1);
-                gen_helper_di(t0, cpu_env);
-                gen_store_gpr(t0, rt);
-            /* Stop translation as we may have switched the execution mode */
-                ctx->base.is_jmp = DISAS_STOP;
-                tcg_temp_free(t0);
-            }
-            break;
-        case NM_EI:
-            check_cp0_enabled(ctx);
-            {
-                TCGv t0 = tcg_temp_new();
-
-                save_cpu_state(ctx, 1);
-                gen_helper_ei(t0, cpu_env);
-                gen_store_gpr(t0, rt);
-            /* Stop translation as we may have switched the execution mode */
-                ctx->base.is_jmp = DISAS_STOP;
-                tcg_temp_free(t0);
-            }
-            break;
-        case NM_RDPGPR:
-            gen_load_srsgpr(ctx, rs, rt);
-            break;
-        case NM_WRPGPR:
-            gen_store_srsgpr(rs, rt);
-            break;
-        case NM_WAIT:
-            gen_cp0(env, ctx, OPC_WAIT, 0, 0);
-            break;
-        case NM_DERET:
-            gen_cp0(env, ctx, OPC_DERET, 0, 0);
-            break;
-        case NM_ERETX:
-            gen_cp0(env, ctx, OPC_ERET, 0, 0);
-            break;
-#endif
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32AXF_7:
-        {
-            int32_t op1 = extract32(ctx->opcode, 9, 3);
-            gen_pool32axf_7_nanomips_insn(ctx, op1, rt, rs, rd);
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-/* Immediate Value Compact Branches */
-static void gen_compute_imm_branch(DisasContext *ctx, uint32_t opc,
-                                   int rt, int32_t imm, int32_t offset)
-{
-    TCGCond cond = TCG_COND_ALWAYS;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rt);
-    tcg_gen_movi_tl(t1, imm);
-    ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-
-    /* Load needed operands and calculate btarget */
-    switch (opc) {
-    case NM_BEQIC:
-        if (rt == 0 && imm == 0) {
-            /* Unconditional branch */
-        } else if (rt == 0 && imm != 0) {
-            /* Treat as NOP */
-            goto out;
-        } else {
-            cond = TCG_COND_EQ;
-        }
-        break;
-    case NM_BBEQZC:
-    case NM_BBNEZC:
-        check_nms(ctx);
-        if (imm >= 32 && !(ctx->hflags & MIPS_HFLAG_64)) {
-            gen_reserved_instruction(ctx);
-            goto out;
-        } else if (rt == 0 && opc == NM_BBEQZC) {
-            /* Unconditional branch */
-        } else if (rt == 0 && opc == NM_BBNEZC) {
-            /* Treat as NOP */
-            goto out;
-        } else {
-            tcg_gen_shri_tl(t0, t0, imm);
-            tcg_gen_andi_tl(t0, t0, 1);
-            tcg_gen_movi_tl(t1, 0);
-            if (opc == NM_BBEQZC) {
-                cond = TCG_COND_EQ;
-            } else {
-                cond = TCG_COND_NE;
-            }
-        }
-        break;
-    case NM_BNEIC:
-        if (rt == 0 && imm == 0) {
-            /* Treat as NOP */
-            goto out;
-        } else if (rt == 0 && imm != 0) {
-            /* Unconditional branch */
-        } else {
-            cond = TCG_COND_NE;
-        }
-        break;
-    case NM_BGEIC:
-        if (rt == 0 && imm == 0) {
-            /* Unconditional branch */
-        } else  {
-            cond = TCG_COND_GE;
-        }
-        break;
-    case NM_BLTIC:
-        cond = TCG_COND_LT;
-        break;
-    case NM_BGEIUC:
-        if (rt == 0 && imm == 0) {
-            /* Unconditional branch */
-        } else  {
-            cond = TCG_COND_GEU;
-        }
-        break;
-    case NM_BLTIUC:
-        cond = TCG_COND_LTU;
-        break;
-    default:
-        MIPS_INVAL("Immediate Value Compact branch");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    /* branch completion */
-    clear_branch_hflags(ctx);
-    ctx->base.is_jmp = DISAS_NORETURN;
-
-    if (cond == TCG_COND_ALWAYS) {
-        /* Uncoditional compact branch */
-        gen_goto_tb(ctx, 0, ctx->btarget);
-    } else {
-        /* Conditional compact branch */
-        TCGLabel *fs = gen_new_label();
-
-        tcg_gen_brcond_tl(tcg_invert_cond(cond), t0, t1, fs);
-
-        gen_goto_tb(ctx, 1, ctx->btarget);
-        gen_set_label(fs);
-
-        gen_goto_tb(ctx, 0, ctx->base.pc_next + 4);
-    }
-
-out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* P.BALRSC type nanoMIPS R6 branches: BALRSC and BRSC */
-static void gen_compute_nanomips_pbalrsc_branch(DisasContext *ctx, int rs,
-                                                int rt)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    /* load rs */
-    gen_load_gpr(t0, rs);
-
-    /* link */
-    if (rt != 0) {
-        tcg_gen_movi_tl(cpu_gpr[rt], ctx->base.pc_next + 4);
-        gen_log_instr_gpr_update(ctx, rt);
-    }
-
-    /* calculate btarget */
-    tcg_gen_shli_tl(t0, t0, 1);
-    tcg_gen_movi_tl(t1, ctx->base.pc_next + 4);
-    gen_op_addr_add(ctx, btarget, t1, t0);
-
-    /* branch completion */
-    clear_branch_hflags(ctx);
-    ctx->base.is_jmp = DISAS_NORETURN;
-
-    /* unconditional branch to register */
-    tcg_gen_mov_tl(cpu_PC, btarget);
-#ifdef CONFIG_DEBUG_TCG
-    tcg_gen_movi_tl(_pc_is_current, 1); // PC has been updated.
-#endif
-    tcg_gen_lookup_and_goto_ptr();
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-/* nanoMIPS Branches */
-static void gen_compute_compact_branch_nm(DisasContext *ctx, uint32_t opc,
-                                       int rs, int rt, int32_t offset)
-{
-    int bcond_compute = 0;
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    /* Load needed operands and calculate btarget */
-    switch (opc) {
-    /* compact branch */
-    case OPC_BGEC:
-    case OPC_BLTC:
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BGEUC:
-    case OPC_BLTUC:
-        if (rs == 0 || rs == rt) {
-            /* OPC_BLEZALC, OPC_BGEZALC */
-            /* OPC_BGTZALC, OPC_BLTZALC */
-            tcg_gen_movi_tl(cpu_gpr[31], ctx->base.pc_next + 4);
-        }
-        gen_load_gpr(t0, rs);
-        gen_load_gpr(t1, rt);
-        bcond_compute = 1;
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BC:
-        ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        break;
-    case OPC_BEQZC:
-        if (rs != 0) {
-            /* OPC_BEQZC, OPC_BNEZC */
-            gen_load_gpr(t0, rs);
-            bcond_compute = 1;
-            ctx->btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-        } else {
-            /* OPC_JIC, OPC_JIALC */
-            TCGv tbase = tcg_temp_new();
-            TCGv toffset = tcg_temp_new();
-
-            gen_load_gpr(tbase, rt);
-            tcg_gen_movi_tl(toffset, offset);
-            gen_op_addr_add(ctx, btarget, tbase, toffset);
-            tcg_temp_free(tbase);
-            tcg_temp_free(toffset);
-        }
-        break;
-    default:
-        MIPS_INVAL("Compact branch/jump");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    if (bcond_compute == 0) {
-        /* Uncoditional compact branch */
-        switch (opc) {
-        case OPC_BC:
-            gen_goto_tb(ctx, 0, ctx->btarget);
-            break;
-        default:
-            MIPS_INVAL("Compact branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-    } else {
-        /* Conditional compact branch */
-        TCGLabel *fs = gen_new_label();
-
-        switch (opc) {
-        case OPC_BGEUC:
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GEU), t0, t1, fs);
-            }
-            break;
-        case OPC_BLTUC:
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZALC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTUC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LTU), t0, t1, fs);
-            }
-            break;
-        case OPC_BGEC:
-            if (rs == 0 && rt != 0) {
-                /* OPC_BLEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LE), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BGEZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GE), t1, 0, fs);
-            } else {
-                /* OPC_BGEC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_GE), t0, t1, fs);
-            }
-            break;
-        case OPC_BLTC:
-            if (rs == 0 && rt != 0) {
-                /* OPC_BGTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_GT), t1, 0, fs);
-            } else if (rs != 0 && rt != 0 && rs == rt) {
-                /* OPC_BLTZC */
-                tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_LT), t1, 0, fs);
-            } else {
-                /* OPC_BLTC */
-                tcg_gen_brcond_tl(tcg_invert_cond(TCG_COND_LT), t0, t1, fs);
-            }
-            break;
-        case OPC_BEQZC:
-            tcg_gen_brcondi_tl(tcg_invert_cond(TCG_COND_EQ), t0, 0, fs);
-            break;
-        default:
-            MIPS_INVAL("Compact conditional branch/jump");
-            gen_reserved_instruction(ctx);
-            goto out;
-        }
-
-        /* branch completion */
-        clear_branch_hflags(ctx);
-        ctx->base.is_jmp = DISAS_NORETURN;
-
-        /* Generating branch here as compact branches don't have delay slot */
-        gen_goto_tb(ctx, 1, ctx->btarget);
-        gen_set_label(fs);
-
-        gen_goto_tb(ctx, 0, ctx->base.pc_next + 4);
-    }
-
-out:
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-
-/* nanoMIPS CP1 Branches */
-static void gen_compute_branch_cp1_nm(DisasContext *ctx, uint32_t op,
-                                   int32_t ft, int32_t offset)
-{
-    target_ulong btarget;
-    TCGv_i64 t0 = tcg_temp_new_i64();
-
-    gen_load_fpr64(ctx, t0, ft);
-    tcg_gen_andi_i64(t0, t0, 1);
-
-    btarget = addr_add(ctx, ctx->base.pc_next + 4, offset);
-
-    switch (op) {
-    case NM_BC1EQZC:
-        tcg_gen_xori_i64(t0, t0, 1);
-        ctx->hflags |= MIPS_HFLAG_BC;
-        break;
-    case NM_BC1NEZC:
-        /* t0 already set */
-        ctx->hflags |= MIPS_HFLAG_BC;
-        break;
-    default:
-        MIPS_INVAL("cp1 cond branch");
-        gen_reserved_instruction(ctx);
-        goto out;
-    }
-
-    tcg_gen_trunc_i64_tl(bcond, t0);
-
-    ctx->btarget = btarget;
-
-out:
-    tcg_temp_free_i64(t0);
-}
-
-
-static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
-{
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-
-    gen_load_gpr(t0, rs);
-    gen_load_gpr(t1, rt);
-
-    if ((extract32(ctx->opcode, 6, 1)) == 1) {
-        /* PP.LSXS instructions require shifting */
-        switch (extract32(ctx->opcode, 7, 4)) {
-        case NM_SHXS:
-            check_nms(ctx);
-            /* fall through */
-        case NM_LHXS:
-        case NM_LHUXS:
-            tcg_gen_shli_tl(t0, t0, 1);
-            break;
-        case NM_SWXS:
-            check_nms(ctx);
-            /* fall through */
-        case NM_LWXS:
-        case NM_LWC1XS:
-        case NM_SWC1XS:
-            tcg_gen_shli_tl(t0, t0, 2);
-            break;
-        case NM_LDC1XS:
-        case NM_SDC1XS:
-            tcg_gen_shli_tl(t0, t0, 3);
-            break;
-        }
-    }
-    gen_op_addr_add(ctx, t0, t0, t1);
-
-    switch (extract32(ctx->opcode, 7, 4)) {
-    case NM_LBX:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
-                           MO_SB);
-        gen_store_gpr(t0, rd);
-        break;
-    case NM_LHX:
-    /*case NM_LHXS:*/
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
-                           MO_TESW);
-        gen_store_gpr(t0, rd);
-        break;
-    case NM_LWX:
-    /*case NM_LWXS:*/
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
-                           MO_TESL);
-        gen_store_gpr(t0, rd);
-        break;
-    case NM_LBUX:
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
-                           MO_UB);
-        gen_store_gpr(t0, rd);
-        break;
-    case NM_LHUX:
-    /*case NM_LHUXS:*/
-        tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
-                           MO_TEUW);
-        gen_store_gpr(t0, rd);
-        break;
-    case NM_SBX:
-        check_nms(ctx);
-        gen_load_gpr(t1, rd);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
-                           MO_8);
-        break;
-    case NM_SHX:
-    /*case NM_SHXS:*/
-        check_nms(ctx);
-        gen_load_gpr(t1, rd);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
-                           MO_TEUW);
-        break;
-    case NM_SWX:
-    /*case NM_SWXS:*/
-        check_nms(ctx);
-        gen_load_gpr(t1, rd);
-        tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
-                           MO_TEUL);
-        break;
-    case NM_LWC1X:
-    /*case NM_LWC1XS:*/
-    case NM_LDC1X:
-    /*case NM_LDC1XS:*/
-    case NM_SWC1X:
-    /*case NM_SWC1XS:*/
-    case NM_SDC1X:
-    /*case NM_SDC1XS:*/
-        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
-            check_cp1_enabled(ctx);
-            switch (extract32(ctx->opcode, 7, 4)) {
-            case NM_LWC1X:
-            /*case NM_LWC1XS:*/
-                gen_flt_ldst(ctx, OPC_LWC1, rd, t0);
-                break;
-            case NM_LDC1X:
-            /*case NM_LDC1XS:*/
-                gen_flt_ldst(ctx, OPC_LDC1, rd, t0);
-                break;
-            case NM_SWC1X:
-            /*case NM_SWC1XS:*/
-                gen_flt_ldst(ctx, OPC_SWC1, rd, t0);
-                break;
-            case NM_SDC1X:
-            /*case NM_SDC1XS:*/
-                gen_flt_ldst(ctx, OPC_SDC1, rd, t0);
-                break;
-            }
-        } else {
-            generate_exception_err(ctx, EXCP_CpU, 1);
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void gen_pool32f_nanomips_insn(DisasContext *ctx)
-{
-    int rt, rs, rd;
-
-    rt = extract32(ctx->opcode, 21, 5);
-    rs = extract32(ctx->opcode, 16, 5);
-    rd = extract32(ctx->opcode, 11, 5);
-
-    if (!(ctx->CP0_Config1 & (1 << CP0C1_FP))) {
-        gen_reserved_instruction(ctx);
-        return;
-    }
-    check_cp1_enabled(ctx);
-    switch (extract32(ctx->opcode, 0, 3)) {
-    case NM_POOL32F_0:
-        switch (extract32(ctx->opcode, 3, 7)) {
-        case NM_RINT_S:
-            gen_farith(ctx, OPC_RINT_S, 0, rt, rs, 0);
-            break;
-        case NM_RINT_D:
-            gen_farith(ctx, OPC_RINT_D, 0, rt, rs, 0);
-            break;
-        case NM_CLASS_S:
-            gen_farith(ctx, OPC_CLASS_S, 0, rt, rs, 0);
-            break;
-        case NM_CLASS_D:
-            gen_farith(ctx, OPC_CLASS_D, 0, rt, rs, 0);
-            break;
-        case NM_ADD_S:
-            gen_farith(ctx, OPC_ADD_S, rt, rs, rd, 0);
-            break;
-        case NM_ADD_D:
-            gen_farith(ctx, OPC_ADD_D, rt, rs, rd, 0);
-            break;
-        case NM_SUB_S:
-            gen_farith(ctx, OPC_SUB_S, rt, rs, rd, 0);
-            break;
-        case NM_SUB_D:
-            gen_farith(ctx, OPC_SUB_D, rt, rs, rd, 0);
-            break;
-        case NM_MUL_S:
-            gen_farith(ctx, OPC_MUL_S, rt, rs, rd, 0);
-            break;
-        case NM_MUL_D:
-            gen_farith(ctx, OPC_MUL_D, rt, rs, rd, 0);
-            break;
-        case NM_DIV_S:
-            gen_farith(ctx, OPC_DIV_S, rt, rs, rd, 0);
-            break;
-        case NM_DIV_D:
-            gen_farith(ctx, OPC_DIV_D, rt, rs, rd, 0);
-            break;
-        case NM_SELEQZ_S:
-            gen_sel_s(ctx, OPC_SELEQZ_S, rd, rt, rs);
-            break;
-        case NM_SELEQZ_D:
-            gen_sel_d(ctx, OPC_SELEQZ_D, rd, rt, rs);
-            break;
-        case NM_SELNEZ_S:
-            gen_sel_s(ctx, OPC_SELNEZ_S, rd, rt, rs);
-            break;
-        case NM_SELNEZ_D:
-            gen_sel_d(ctx, OPC_SELNEZ_D, rd, rt, rs);
-            break;
-        case NM_SEL_S:
-            gen_sel_s(ctx, OPC_SEL_S, rd, rt, rs);
-            break;
-        case NM_SEL_D:
-            gen_sel_d(ctx, OPC_SEL_D, rd, rt, rs);
-            break;
-        case NM_MADDF_S:
-            gen_farith(ctx, OPC_MADDF_S, rt, rs, rd, 0);
-            break;
-        case NM_MADDF_D:
-            gen_farith(ctx, OPC_MADDF_D, rt, rs, rd, 0);
-            break;
-        case NM_MSUBF_S:
-            gen_farith(ctx, OPC_MSUBF_S, rt, rs, rd, 0);
-            break;
-        case NM_MSUBF_D:
-            gen_farith(ctx, OPC_MSUBF_D, rt, rs, rd, 0);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32F_3:
-        switch (extract32(ctx->opcode, 3, 3)) {
-        case NM_MIN_FMT:
-            switch (extract32(ctx->opcode, 9, 1)) {
-            case FMT_SDPS_S:
-                gen_farith(ctx, OPC_MIN_S, rt, rs, rd, 0);
-                break;
-            case FMT_SDPS_D:
-                gen_farith(ctx, OPC_MIN_D, rt, rs, rd, 0);
-                break;
-            }
-            break;
-        case NM_MAX_FMT:
-            switch (extract32(ctx->opcode, 9, 1)) {
-            case FMT_SDPS_S:
-                gen_farith(ctx, OPC_MAX_S, rt, rs, rd, 0);
-                break;
-            case FMT_SDPS_D:
-                gen_farith(ctx, OPC_MAX_D, rt, rs, rd, 0);
-                break;
-            }
-            break;
-        case NM_MINA_FMT:
-            switch (extract32(ctx->opcode, 9, 1)) {
-            case FMT_SDPS_S:
-                gen_farith(ctx, OPC_MINA_S, rt, rs, rd, 0);
-                break;
-            case FMT_SDPS_D:
-                gen_farith(ctx, OPC_MINA_D, rt, rs, rd, 0);
-                break;
-            }
-            break;
-        case NM_MAXA_FMT:
-            switch (extract32(ctx->opcode, 9, 1)) {
-            case FMT_SDPS_S:
-                gen_farith(ctx, OPC_MAXA_S, rt, rs, rd, 0);
-                break;
-            case FMT_SDPS_D:
-                gen_farith(ctx, OPC_MAXA_D, rt, rs, rd, 0);
-                break;
-            }
-            break;
-        case NM_POOL32FXF:
-            switch (extract32(ctx->opcode, 6, 8)) {
-            case NM_CFC1:
-                gen_cp1(ctx, OPC_CFC1, rt, rs);
-                break;
-            case NM_CTC1:
-                gen_cp1(ctx, OPC_CTC1, rt, rs);
-                break;
-            case NM_MFC1:
-                gen_cp1(ctx, OPC_MFC1, rt, rs);
-                break;
-            case NM_MTC1:
-                gen_cp1(ctx, OPC_MTC1, rt, rs);
-                break;
-            case NM_MFHC1:
-                gen_cp1(ctx, OPC_MFHC1, rt, rs);
-                break;
-            case NM_MTHC1:
-                gen_cp1(ctx, OPC_MTHC1, rt, rs);
-                break;
-            case NM_CVT_S_PL:
-                gen_farith(ctx, OPC_CVT_S_PL, -1, rs, rt, 0);
-                break;
-            case NM_CVT_S_PU:
-                gen_farith(ctx, OPC_CVT_S_PU, -1, rs, rt, 0);
-                break;
-            default:
-                switch (extract32(ctx->opcode, 6, 9)) {
-                case NM_CVT_L_S:
-                    gen_farith(ctx, OPC_CVT_L_S, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_L_D:
-                    gen_farith(ctx, OPC_CVT_L_D, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_W_S:
-                    gen_farith(ctx, OPC_CVT_W_S, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_W_D:
-                    gen_farith(ctx, OPC_CVT_W_D, -1, rs, rt, 0);
-                    break;
-                case NM_RSQRT_S:
-                    gen_farith(ctx, OPC_RSQRT_S, -1, rs, rt, 0);
-                    break;
-                case NM_RSQRT_D:
-                    gen_farith(ctx, OPC_RSQRT_D, -1, rs, rt, 0);
-                    break;
-                case NM_SQRT_S:
-                    gen_farith(ctx, OPC_SQRT_S, -1, rs, rt, 0);
-                    break;
-                case NM_SQRT_D:
-                    gen_farith(ctx, OPC_SQRT_D, -1, rs, rt, 0);
-                    break;
-                case NM_RECIP_S:
-                    gen_farith(ctx, OPC_RECIP_S, -1, rs, rt, 0);
-                    break;
-                case NM_RECIP_D:
-                    gen_farith(ctx, OPC_RECIP_D, -1, rs, rt, 0);
-                    break;
-                case NM_FLOOR_L_S:
-                    gen_farith(ctx, OPC_FLOOR_L_S, -1, rs, rt, 0);
-                    break;
-                case NM_FLOOR_L_D:
-                    gen_farith(ctx, OPC_FLOOR_L_D, -1, rs, rt, 0);
-                    break;
-                case NM_FLOOR_W_S:
-                    gen_farith(ctx, OPC_FLOOR_W_S, -1, rs, rt, 0);
-                    break;
-                case NM_FLOOR_W_D:
-                    gen_farith(ctx, OPC_FLOOR_W_D, -1, rs, rt, 0);
-                    break;
-                case NM_CEIL_L_S:
-                    gen_farith(ctx, OPC_CEIL_L_S, -1, rs, rt, 0);
-                    break;
-                case NM_CEIL_L_D:
-                    gen_farith(ctx, OPC_CEIL_L_D, -1, rs, rt, 0);
-                    break;
-                case NM_CEIL_W_S:
-                    gen_farith(ctx, OPC_CEIL_W_S, -1, rs, rt, 0);
-                    break;
-                case NM_CEIL_W_D:
-                    gen_farith(ctx, OPC_CEIL_W_D, -1, rs, rt, 0);
-                    break;
-                case NM_TRUNC_L_S:
-                    gen_farith(ctx, OPC_TRUNC_L_S, -1, rs, rt, 0);
-                    break;
-                case NM_TRUNC_L_D:
-                    gen_farith(ctx, OPC_TRUNC_L_D, -1, rs, rt, 0);
-                    break;
-                case NM_TRUNC_W_S:
-                    gen_farith(ctx, OPC_TRUNC_W_S, -1, rs, rt, 0);
-                    break;
-                case NM_TRUNC_W_D:
-                    gen_farith(ctx, OPC_TRUNC_W_D, -1, rs, rt, 0);
-                    break;
-                case NM_ROUND_L_S:
-                    gen_farith(ctx, OPC_ROUND_L_S, -1, rs, rt, 0);
-                    break;
-                case NM_ROUND_L_D:
-                    gen_farith(ctx, OPC_ROUND_L_D, -1, rs, rt, 0);
-                    break;
-                case NM_ROUND_W_S:
-                    gen_farith(ctx, OPC_ROUND_W_S, -1, rs, rt, 0);
-                    break;
-                case NM_ROUND_W_D:
-                    gen_farith(ctx, OPC_ROUND_W_D, -1, rs, rt, 0);
-                    break;
-                case NM_MOV_S:
-                    gen_farith(ctx, OPC_MOV_S, -1, rs, rt, 0);
-                    break;
-                case NM_MOV_D:
-                    gen_farith(ctx, OPC_MOV_D, -1, rs, rt, 0);
-                    break;
-                case NM_ABS_S:
-                    gen_farith(ctx, OPC_ABS_S, -1, rs, rt, 0);
-                    break;
-                case NM_ABS_D:
-                    gen_farith(ctx, OPC_ABS_D, -1, rs, rt, 0);
-                    break;
-                case NM_NEG_S:
-                    gen_farith(ctx, OPC_NEG_S, -1, rs, rt, 0);
-                    break;
-                case NM_NEG_D:
-                    gen_farith(ctx, OPC_NEG_D, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_D_S:
-                    gen_farith(ctx, OPC_CVT_D_S, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_D_W:
-                    gen_farith(ctx, OPC_CVT_D_W, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_D_L:
-                    gen_farith(ctx, OPC_CVT_D_L, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_S_D:
-                    gen_farith(ctx, OPC_CVT_S_D, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_S_W:
-                    gen_farith(ctx, OPC_CVT_S_W, -1, rs, rt, 0);
-                    break;
-                case NM_CVT_S_L:
-                    gen_farith(ctx, OPC_CVT_S_L, -1, rs, rt, 0);
-                    break;
-                default:
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                break;
-            }
-            break;
-        }
-        break;
-    case NM_POOL32F_5:
-        switch (extract32(ctx->opcode, 3, 3)) {
-        case NM_CMP_CONDN_S:
-            gen_r6_cmp_s(ctx, extract32(ctx->opcode, 6, 5), rt, rs, rd);
-            break;
-        case NM_CMP_CONDN_D:
-            gen_r6_cmp_d(ctx, extract32(ctx->opcode, 6, 5), rt, rs, rd);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void gen_pool32a5_nanomips_insn(DisasContext *ctx, int opc,
-                                       int rd, int rs, int rt)
-{
-    int ret = rd;
-    TCGv t0 = tcg_temp_new();
-    TCGv v1_t = tcg_temp_new();
-    TCGv v2_t = tcg_temp_new();
-
-    gen_load_gpr(v1_t, rs);
-    gen_load_gpr(v2_t, rt);
-
-    switch (opc) {
-    case NM_CMP_EQ_PH:
-        check_dsp(ctx);
-        gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMP_LT_PH:
-        check_dsp(ctx);
-        gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMP_LE_PH:
-        check_dsp(ctx);
-        gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMPU_EQ_QB:
-        check_dsp(ctx);
-        gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMPU_LT_QB:
-        check_dsp(ctx);
-        gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMPU_LE_QB:
-        check_dsp(ctx);
-        gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
-        break;
-    case NM_CMPGU_EQ_QB:
-        check_dsp(ctx);
-        gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_CMPGU_LT_QB:
-        check_dsp(ctx);
-        gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_CMPGU_LE_QB:
-        check_dsp(ctx);
-        gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_CMPGDU_EQ_QB:
-        check_dsp_r2(ctx);
-        gen_helper_cmpgu_eq_qb(v1_t, v1_t, v2_t);
-        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_CMPGDU_LT_QB:
-        check_dsp_r2(ctx);
-        gen_helper_cmpgu_lt_qb(v1_t, v1_t, v2_t);
-        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_CMPGDU_LE_QB:
-        check_dsp_r2(ctx);
-        gen_helper_cmpgu_le_qb(v1_t, v1_t, v2_t);
-        tcg_gen_deposit_tl(cpu_dspctrl, cpu_dspctrl, v1_t, 24, 4);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PACKRL_PH:
-        check_dsp(ctx);
-        gen_helper_packrl_ph(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PICK_QB:
-        check_dsp(ctx);
-        gen_helper_pick_qb(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PICK_PH:
-        check_dsp(ctx);
-        gen_helper_pick_ph(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_ADDQ_S_W:
-        check_dsp(ctx);
-        gen_helper_addq_s_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SUBQ_S_W:
-        check_dsp(ctx);
-        gen_helper_subq_s_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_ADDSC:
-        check_dsp(ctx);
-        gen_helper_addsc(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_ADDWC:
-        check_dsp(ctx);
-        gen_helper_addwc(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_ADDQ_S_PH:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDQ_PH */
-            gen_helper_addq_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDQ_S_PH */
-            gen_helper_addq_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_ADDQH_R_PH:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDQH_PH */
-            gen_helper_addqh_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDQH_R_PH */
-            gen_helper_addqh_r_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_ADDQH_R_W:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDQH_W */
-            gen_helper_addqh_w(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDQH_R_W */
-            gen_helper_addqh_r_w(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_ADDU_S_QB:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDU_QB */
-            gen_helper_addu_qb(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDU_S_QB */
-            gen_helper_addu_s_qb(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_ADDU_S_PH:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDU_PH */
-            gen_helper_addu_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDU_S_PH */
-            gen_helper_addu_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_ADDUH_R_QB:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* ADDUH_QB */
-            gen_helper_adduh_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* ADDUH_R_QB */
-            gen_helper_adduh_r_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SHRAV_R_PH:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SHRAV_PH */
-            gen_helper_shra_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SHRAV_R_PH */
-            gen_helper_shra_r_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SHRAV_R_QB:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SHRAV_QB */
-            gen_helper_shra_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SHRAV_R_QB */
-            gen_helper_shra_r_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBQ_S_PH:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBQ_PH */
-            gen_helper_subq_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBQ_S_PH */
-            gen_helper_subq_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBQH_R_PH:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBQH_PH */
-            gen_helper_subqh_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBQH_R_PH */
-            gen_helper_subqh_r_ph(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBQH_R_W:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBQH_W */
-            gen_helper_subqh_w(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBQH_R_W */
-            gen_helper_subqh_r_w(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBU_S_QB:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBU_QB */
-            gen_helper_subu_qb(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBU_S_QB */
-            gen_helper_subu_s_qb(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBU_S_PH:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBU_PH */
-            gen_helper_subu_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBU_S_PH */
-            gen_helper_subu_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SUBUH_R_QB:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SUBUH_QB */
-            gen_helper_subuh_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SUBUH_R_QB */
-            gen_helper_subuh_r_qb(v1_t, v1_t, v2_t);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_SHLLV_S_PH:
-        check_dsp(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SHLLV_PH */
-            gen_helper_shll_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* SHLLV_S_PH */
-            gen_helper_shll_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_PRECR_SRA_R_PH_W:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* PRECR_SRA_PH_W */
-            {
-                TCGv_i32 sa_t = tcg_const_i32(rd);
-                gen_helper_precr_sra_ph_w(v1_t, sa_t, v1_t,
-                                          cpu_gpr[rt]);
-                gen_store_gpr(v1_t, rt);
-                tcg_temp_free_i32(sa_t);
-            }
-            break;
-        case 1:
-            /* PRECR_SRA_R_PH_W */
-            {
-                TCGv_i32 sa_t = tcg_const_i32(rd);
-                gen_helper_precr_sra_r_ph_w(v1_t, sa_t, v1_t,
-                                            cpu_gpr[rt]);
-                gen_store_gpr(v1_t, rt);
-                tcg_temp_free_i32(sa_t);
-            }
-            break;
-       }
-        break;
-    case NM_MULEU_S_PH_QBL:
-        check_dsp(ctx);
-        gen_helper_muleu_s_ph_qbl(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULEU_S_PH_QBR:
-        check_dsp(ctx);
-        gen_helper_muleu_s_ph_qbr(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULQ_RS_PH:
-        check_dsp(ctx);
-        gen_helper_mulq_rs_ph(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULQ_S_PH:
-        check_dsp_r2(ctx);
-        gen_helper_mulq_s_ph(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULQ_RS_W:
-        check_dsp_r2(ctx);
-        gen_helper_mulq_rs_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULQ_S_W:
-        check_dsp_r2(ctx);
-        gen_helper_mulq_s_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_APPEND:
-        check_dsp_r2(ctx);
-        gen_load_gpr(t0, rs);
-        if (rd != 0) {
-            tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], rd, 32 - rd);
-        }
-        tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-        break;
-    case NM_MODSUB:
-        check_dsp(ctx);
-        gen_helper_modsub(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHRAV_R_W:
-        check_dsp(ctx);
-        gen_helper_shra_r_w(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHRLV_PH:
-        check_dsp_r2(ctx);
-        gen_helper_shrl_ph(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHRLV_QB:
-        check_dsp(ctx);
-        gen_helper_shrl_qb(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHLLV_QB:
-        check_dsp(ctx);
-        gen_helper_shll_qb(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHLLV_S_W:
-        check_dsp(ctx);
-        gen_helper_shll_s_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHILO:
-        check_dsp(ctx);
-        {
-            TCGv tv0 = tcg_temp_new();
-            TCGv tv1 = tcg_temp_new();
-            int16_t imm = extract32(ctx->opcode, 16, 7);
-
-            tcg_gen_movi_tl(tv0, rd >> 3);
-            tcg_gen_movi_tl(tv1, imm);
-            gen_helper_shilo(tv0, tv1, cpu_env);
-        }
-        break;
-    case NM_MULEQ_S_W_PHL:
-        check_dsp(ctx);
-        gen_helper_muleq_s_w_phl(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MULEQ_S_W_PHR:
-        check_dsp(ctx);
-        gen_helper_muleq_s_w_phr(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_MUL_S_PH:
-        check_dsp_r2(ctx);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* MUL_PH */
-            gen_helper_mul_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        case 1:
-            /* MUL_S_PH */
-            gen_helper_mul_s_ph(v1_t, v1_t, v2_t, cpu_env);
-            gen_store_gpr(v1_t, ret);
-            break;
-        }
-        break;
-    case NM_PRECR_QB_PH:
-        check_dsp_r2(ctx);
-        gen_helper_precr_qb_ph(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PRECRQ_QB_PH:
-        check_dsp(ctx);
-        gen_helper_precrq_qb_ph(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PRECRQ_PH_W:
-        check_dsp(ctx);
-        gen_helper_precrq_ph_w(v1_t, v1_t, v2_t);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PRECRQ_RS_PH_W:
-        check_dsp(ctx);
-        gen_helper_precrq_rs_ph_w(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_PRECRQU_S_QB_PH:
-        check_dsp(ctx);
-        gen_helper_precrqu_s_qb_ph(v1_t, v1_t, v2_t, cpu_env);
-        gen_store_gpr(v1_t, ret);
-        break;
-    case NM_SHRA_R_W:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, rd);
-        gen_helper_shra_r_w(v1_t, t0, v1_t);
-        gen_store_gpr(v1_t, rt);
-        break;
-    case NM_SHRA_R_PH:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, rd >> 1);
-        switch (extract32(ctx->opcode, 10, 1)) {
-        case 0:
-            /* SHRA_PH */
-            gen_helper_shra_ph(v1_t, t0, v1_t);
-            gen_store_gpr(v1_t, rt);
-            break;
-        case 1:
-            /* SHRA_R_PH */
-            gen_helper_shra_r_ph(v1_t, t0, v1_t);
-            gen_store_gpr(v1_t, rt);
-            break;
-        }
-        break;
-    case NM_SHLL_S_PH:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, rd >> 1);
-        switch (extract32(ctx->opcode, 10, 2)) {
-        case 0:
-            /* SHLL_PH */
-            gen_helper_shll_ph(v1_t, t0, v1_t, cpu_env);
-            gen_store_gpr(v1_t, rt);
-            break;
-        case 2:
-            /* SHLL_S_PH */
-            gen_helper_shll_s_ph(v1_t, t0, v1_t, cpu_env);
-            gen_store_gpr(v1_t, rt);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_SHLL_S_W:
-        check_dsp(ctx);
-        tcg_gen_movi_tl(t0, rd);
-        gen_helper_shll_s_w(v1_t, t0, v1_t, cpu_env);
-        gen_store_gpr(v1_t, rt);
-        break;
-    case NM_REPL_PH:
-        check_dsp(ctx);
-        {
-            int16_t imm;
-            imm = sextract32(ctx->opcode, 11, 11);
-            imm = (int16_t)(imm << 6) >> 6;
-            if (rt != 0) {
-                tcg_gen_movi_tl(cpu_gpr[rt], dup_const(MO_16, imm));
-            }
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint16_t insn;
-    uint32_t op;
-    int rt, rs, rd;
-    int offset;
-    int imm;
-
-    insn = translator_lduw(env, ctx->base.pc_next + 2);
-    ctx->opcode = (ctx->opcode << 16) | insn;
-
-    rt = extract32(ctx->opcode, 21, 5);
-    rs = extract32(ctx->opcode, 16, 5);
-    rd = extract32(ctx->opcode, 11, 5);
-
-    op = extract32(ctx->opcode, 26, 6);
-    switch (op) {
-    case NM_P_ADDIU:
-        if (rt == 0) {
-            /* P.RI */
-            switch (extract32(ctx->opcode, 19, 2)) {
-            case NM_SIGRIE:
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            case NM_P_SYSCALL:
-                if ((extract32(ctx->opcode, 18, 1)) == NM_SYSCALL) {
-                    generate_exception_end(ctx, EXCP_SYSCALL);
-                } else {
-                    gen_reserved_instruction(ctx);
-                }
-                break;
-            case NM_BREAK:
-                generate_exception_end(ctx, EXCP_BREAK);
-                break;
-            case NM_SDBBP:
-                if (is_uhi(extract32(ctx->opcode, 0, 19))) {
-                    gen_helper_do_semihosting(cpu_env);
-                } else {
-                    if (ctx->hflags & MIPS_HFLAG_SBRI) {
-                        gen_reserved_instruction(ctx);
-                    } else {
-                        generate_exception_end(ctx, EXCP_DBp);
-                    }
-                }
-                break;
-            }
-        } else {
-            /* NM_ADDIU */
-            imm = extract32(ctx->opcode, 0, 16);
-            if (rs != 0) {
-                tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], imm);
-            } else {
-                tcg_gen_movi_tl(cpu_gpr[rt], imm);
-            }
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-        }
-        break;
-    case NM_ADDIUPC:
-        if (rt != 0) {
-            offset = sextract32(ctx->opcode, 0, 1) << 21 |
-                     extract32(ctx->opcode, 1, 20) << 1;
-            target_long addr = addr_add(ctx, ctx->base.pc_next + 4, offset);
-            tcg_gen_movi_tl(cpu_gpr[rt], addr);
-        }
-        break;
-    case NM_POOL32A:
-        switch (ctx->opcode & 0x07) {
-        case NM_POOL32A0:
-            gen_pool32a0_nanomips_insn(env, ctx);
-            break;
-        case NM_POOL32A5:
-            {
-                int32_t op1 = extract32(ctx->opcode, 3, 7);
-                gen_pool32a5_nanomips_insn(ctx, op1, rd, rs, rt);
-            }
-            break;
-        case NM_POOL32A7:
-            switch (extract32(ctx->opcode, 3, 3)) {
-            case NM_P_LSX:
-                gen_p_lsx(ctx, rd, rs, rt);
-                break;
-            case NM_LSA:
-                /*
-                 * In nanoMIPS, the shift field directly encodes the shift
-                 * amount, meaning that the supported shift values are in
-                 * the range 0 to 3 (instead of 1 to 4 in MIPSR6).
-                 */
-                gen_lsa(ctx, rd, rt, rs, extract32(ctx->opcode, 9, 2) - 1);
-                break;
-            case NM_EXTW:
-                gen_ext(ctx, 32, rd, rs, rt, extract32(ctx->opcode, 6, 5));
-                break;
-            case NM_POOL32AXF:
-                gen_pool32axf_nanomips_insn(env, ctx);
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_P_GP_W:
-        switch (ctx->opcode & 0x03) {
-        case NM_ADDIUGP_W:
-            if (rt != 0) {
-                offset = extract32(ctx->opcode, 0, 21);
-                gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], offset);
-            }
-            break;
-        case NM_LWGP:
-            gen_ld(ctx, OPC_LW, rt, 28, extract32(ctx->opcode, 2, 19) << 2);
-            break;
-        case NM_SWGP:
-            gen_st(ctx, OPC_SW, rt, 28, extract32(ctx->opcode, 2, 19) << 2);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_P48I:
-        {
-            insn = translator_lduw(env, ctx->base.pc_next + 4);
-            target_long addr_off = extract32(ctx->opcode, 0, 16) | insn << 16;
-            switch (extract32(ctx->opcode, 16, 5)) {
-            case NM_LI48:
-                check_nms(ctx);
-                if (rt != 0) {
-                    tcg_gen_movi_tl(cpu_gpr[rt], addr_off);
-                }
-                break;
-            case NM_ADDIU48:
-                check_nms(ctx);
-                if (rt != 0) {
-                    tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rt], addr_off);
-                    tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-                }
-                break;
-            case NM_ADDIUGP48:
-                check_nms(ctx);
-                if (rt != 0) {
-                    gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], addr_off);
-                }
-                break;
-            case NM_ADDIUPC48:
-                check_nms(ctx);
-                if (rt != 0) {
-                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
-                                                addr_off);
-
-                    tcg_gen_movi_tl(cpu_gpr[rt], addr);
-                }
-                break;
-            case NM_LWPC48:
-                check_nms(ctx);
-                if (rt != 0) {
-                    TCGv t0;
-                    t0 = tcg_temp_new();
-
-                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
-                                                addr_off);
-
-                    tcg_gen_movi_tl(t0, addr);
-                    tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
-                    tcg_temp_free(t0);
-                }
-                break;
-            case NM_SWPC48:
-                check_nms(ctx);
-                {
-                    TCGv t0, t1;
-                    t0 = tcg_temp_new();
-                    t1 = tcg_temp_new();
-
-                    target_long addr = addr_add(ctx, ctx->base.pc_next + 6,
-                                                addr_off);
-
-                    tcg_gen_movi_tl(t0, addr);
-                    gen_load_gpr(t1, rt);
-
-                    tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
-
-                    tcg_temp_free(t0);
-                    tcg_temp_free(t1);
-                }
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            return 6;
-        }
-    case NM_P_U12:
-        switch (extract32(ctx->opcode, 12, 4)) {
-        case NM_ORI:
-            gen_logic_imm(ctx, OPC_ORI, rt, rs, extract32(ctx->opcode, 0, 12));
-            break;
-        case NM_XORI:
-            gen_logic_imm(ctx, OPC_XORI, rt, rs, extract32(ctx->opcode, 0, 12));
-            break;
-        case NM_ANDI:
-            gen_logic_imm(ctx, OPC_ANDI, rt, rs, extract32(ctx->opcode, 0, 12));
-            break;
-        case NM_P_SR:
-            switch (extract32(ctx->opcode, 20, 1)) {
-            case NM_PP_SR:
-                switch (ctx->opcode & 3) {
-                case NM_SAVE:
-                    gen_save(ctx, rt, extract32(ctx->opcode, 16, 4),
-                             extract32(ctx->opcode, 2, 1),
-                             extract32(ctx->opcode, 3, 9) << 3);
-                    break;
-                case NM_RESTORE:
-                case NM_RESTORE_JRC:
-                    gen_restore(ctx, rt, extract32(ctx->opcode, 16, 4),
-                                extract32(ctx->opcode, 2, 1),
-                                extract32(ctx->opcode, 3, 9) << 3);
-                    if ((ctx->opcode & 3) == NM_RESTORE_JRC) {
-                        gen_compute_branch_nm(ctx, OPC_JR, 2, 31, 0, 0);
-                    }
-                    break;
-                default:
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                break;
-            case NM_P_SR_F:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            break;
-        case NM_SLTI:
-            gen_slt_imm(ctx, OPC_SLTI, rt, rs, extract32(ctx->opcode, 0, 12));
-            break;
-        case NM_SLTIU:
-            gen_slt_imm(ctx, OPC_SLTIU, rt, rs, extract32(ctx->opcode, 0, 12));
-            break;
-        case NM_SEQI:
-            {
-                TCGv t0 = tcg_temp_new();
-
-                imm = extract32(ctx->opcode, 0, 12);
-                gen_load_gpr(t0, rs);
-                tcg_gen_setcondi_tl(TCG_COND_EQ, t0, t0, imm);
-                gen_store_gpr(t0, rt);
-
-                tcg_temp_free(t0);
-            }
-            break;
-        case NM_ADDIUNEG:
-            imm = (int16_t) extract32(ctx->opcode, 0, 12);
-            gen_arith_imm(ctx, OPC_ADDIU, rt, rs, -imm);
-            break;
-        case NM_P_SHIFT:
-            {
-                int shift = extract32(ctx->opcode, 0, 5);
-                switch (extract32(ctx->opcode, 5, 4)) {
-                case NM_P_SLL:
-                    if (rt == 0 && shift == 0) {
-                        /* NOP */
-                    } else if (rt == 0 && shift == 3) {
-                        /* EHB - treat as NOP */
-                    } else if (rt == 0 && shift == 5) {
-                        /* PAUSE - treat as NOP */
-                    } else if (rt == 0 && shift == 6) {
-                        /* SYNC */
-                        gen_sync(extract32(ctx->opcode, 16, 5));
-                    } else {
-                        /* SLL */
-                        gen_shift_imm(ctx, OPC_SLL, rt, rs,
-                                      extract32(ctx->opcode, 0, 5));
-                    }
-                    break;
-                case NM_SRL:
-                    gen_shift_imm(ctx, OPC_SRL, rt, rs,
-                                  extract32(ctx->opcode, 0, 5));
-                    break;
-                case NM_SRA:
-                    gen_shift_imm(ctx, OPC_SRA, rt, rs,
-                                  extract32(ctx->opcode, 0, 5));
-                    break;
-                case NM_ROTR:
-                    gen_shift_imm(ctx, OPC_ROTR, rt, rs,
-                                  extract32(ctx->opcode, 0, 5));
-                    break;
-                }
-            }
-            break;
-        case NM_P_ROTX:
-            check_nms(ctx);
-            if (rt != 0) {
-                TCGv t0 = tcg_temp_new();
-                TCGv_i32 shift = tcg_const_i32(extract32(ctx->opcode, 0, 5));
-                TCGv_i32 shiftx = tcg_const_i32(extract32(ctx->opcode, 7, 4)
-                                                << 1);
-                TCGv_i32 stripe = tcg_const_i32(extract32(ctx->opcode, 6, 1));
-
-                gen_load_gpr(t0, rs);
-                gen_helper_rotx(cpu_gpr[rt], t0, shift, shiftx, stripe);
-                tcg_temp_free(t0);
-
-                tcg_temp_free_i32(shift);
-                tcg_temp_free_i32(shiftx);
-                tcg_temp_free_i32(stripe);
-            }
-            break;
-        case NM_P_INS:
-            switch (((ctx->opcode >> 10) & 2) |
-                    (extract32(ctx->opcode, 5, 1))) {
-            case NM_INS:
-                check_nms(ctx);
-                gen_bitops(ctx, OPC_INS, rt, rs, extract32(ctx->opcode, 0, 5),
-                           extract32(ctx->opcode, 6, 5));
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            break;
-        case NM_P_EXT:
-            switch (((ctx->opcode >> 10) & 2) |
-                    (extract32(ctx->opcode, 5, 1))) {
-            case NM_EXT:
-                check_nms(ctx);
-                gen_bitops(ctx, OPC_EXT, rt, rs, extract32(ctx->opcode, 0, 5),
-                           extract32(ctx->opcode, 6, 5));
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_POOL32F:
-        gen_pool32f_nanomips_insn(ctx);
-        break;
-    case NM_POOL32S:
-        break;
-    case NM_P_LUI:
-        switch (extract32(ctx->opcode, 1, 1)) {
-        case NM_LUI:
-            if (rt != 0) {
-                tcg_gen_movi_tl(cpu_gpr[rt],
-                                sextract32(ctx->opcode, 0, 1) << 31 |
-                                extract32(ctx->opcode, 2, 10) << 21 |
-                                extract32(ctx->opcode, 12, 9) << 12);
-            }
-            break;
-        case NM_ALUIPC:
-            if (rt != 0) {
-                offset = sextract32(ctx->opcode, 0, 1) << 31 |
-                         extract32(ctx->opcode, 2, 10) << 21 |
-                         extract32(ctx->opcode, 12, 9) << 12;
-                target_long addr;
-                addr = ~0xFFF & addr_add(ctx, ctx->base.pc_next + 4, offset);
-                tcg_gen_movi_tl(cpu_gpr[rt], addr);
-            }
-            break;
-        }
-        break;
-    case NM_P_GP_BH:
-        {
-            uint32_t u = extract32(ctx->opcode, 0, 18);
-
-            switch (extract32(ctx->opcode, 18, 3)) {
-            case NM_LBGP:
-                gen_ld(ctx, OPC_LB, rt, 28, u);
-                break;
-            case NM_SBGP:
-                gen_st(ctx, OPC_SB, rt, 28, u);
-                break;
-            case NM_LBUGP:
-                gen_ld(ctx, OPC_LBU, rt, 28, u);
-                break;
-            case NM_ADDIUGP_B:
-                if (rt != 0) {
-                    gen_op_addr_addi(ctx, cpu_gpr[rt], cpu_gpr[28], u);
-                }
-                break;
-            case NM_P_GP_LH:
-                u &= ~1;
-                switch (ctx->opcode & 1) {
-                case NM_LHGP:
-                    gen_ld(ctx, OPC_LH, rt, 28, u);
-                    break;
-                case NM_LHUGP:
-                    gen_ld(ctx, OPC_LHU, rt, 28, u);
-                    break;
-                }
-                break;
-            case NM_P_GP_SH:
-                u &= ~1;
-                switch (ctx->opcode & 1) {
-                case NM_SHGP:
-                    gen_st(ctx, OPC_SH, rt, 28, u);
-                    break;
-                default:
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                break;
-            case NM_P_GP_CP1:
-                u &= ~0x3;
-                switch (ctx->opcode & 0x3) {
-                case NM_LWC1GP:
-                    gen_cop1_ldst(ctx, OPC_LWC1, rt, 28, u);
-                    break;
-                case NM_LDC1GP:
-                    gen_cop1_ldst(ctx, OPC_LDC1, rt, 28, u);
-                    break;
-                case NM_SWC1GP:
-                    gen_cop1_ldst(ctx, OPC_SWC1, rt, 28, u);
-                    break;
-                case NM_SDC1GP:
-                    gen_cop1_ldst(ctx, OPC_SDC1, rt, 28, u);
-                    break;
-                }
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case NM_P_LS_U12:
-        {
-            uint32_t u = extract32(ctx->opcode, 0, 12);
-
-            switch (extract32(ctx->opcode, 12, 4)) {
-            case NM_P_PREFU12:
-                if (rt == 31) {
-                    /* SYNCI */
-                    /*
-                     * Break the TB to be able to sync copied instructions
-                     * immediately.
-                     */
-                    ctx->base.is_jmp = DISAS_STOP;
-                } else {
-                    /* PREF */
-                    /* Treat as NOP. */
-                }
-                break;
-            case NM_LB:
-                gen_ld(ctx, OPC_LB, rt, rs, u);
-                break;
-            case NM_LH:
-                gen_ld(ctx, OPC_LH, rt, rs, u);
-                break;
-            case NM_LW:
-                gen_ld(ctx, OPC_LW, rt, rs, u);
-                break;
-            case NM_LBU:
-                gen_ld(ctx, OPC_LBU, rt, rs, u);
-                break;
-            case NM_LHU:
-                gen_ld(ctx, OPC_LHU, rt, rs, u);
-                break;
-            case NM_SB:
-                gen_st(ctx, OPC_SB, rt, rs, u);
-                break;
-            case NM_SH:
-                gen_st(ctx, OPC_SH, rt, rs, u);
-                break;
-            case NM_SW:
-                gen_st(ctx, OPC_SW, rt, rs, u);
-                break;
-            case NM_LWC1:
-                gen_cop1_ldst(ctx, OPC_LWC1, rt, rs, u);
-                break;
-            case NM_LDC1:
-                gen_cop1_ldst(ctx, OPC_LDC1, rt, rs, u);
-                break;
-            case NM_SWC1:
-                gen_cop1_ldst(ctx, OPC_SWC1, rt, rs, u);
-                break;
-            case NM_SDC1:
-                gen_cop1_ldst(ctx, OPC_SDC1, rt, rs, u);
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case NM_P_LS_S9:
-        {
-            int32_t s = (sextract32(ctx->opcode, 15, 1) << 8) |
-                        extract32(ctx->opcode, 0, 8);
-
-            switch (extract32(ctx->opcode, 8, 3)) {
-            case NM_P_LS_S0:
-                switch (extract32(ctx->opcode, 11, 4)) {
-                case NM_LBS9:
-                    gen_ld(ctx, OPC_LB, rt, rs, s);
-                    break;
-                case NM_LHS9:
-                    gen_ld(ctx, OPC_LH, rt, rs, s);
-                    break;
-                case NM_LWS9:
-                    gen_ld(ctx, OPC_LW, rt, rs, s);
-                    break;
-                case NM_LBUS9:
-                    gen_ld(ctx, OPC_LBU, rt, rs, s);
-                    break;
-                case NM_LHUS9:
-                    gen_ld(ctx, OPC_LHU, rt, rs, s);
-                    break;
-                case NM_SBS9:
-                    gen_st(ctx, OPC_SB, rt, rs, s);
-                    break;
-                case NM_SHS9:
-                    gen_st(ctx, OPC_SH, rt, rs, s);
-                    break;
-                case NM_SWS9:
-                    gen_st(ctx, OPC_SW, rt, rs, s);
-                    break;
-                case NM_LWC1S9:
-                    gen_cop1_ldst(ctx, OPC_LWC1, rt, rs, s);
-                    break;
-                case NM_LDC1S9:
-                    gen_cop1_ldst(ctx, OPC_LDC1, rt, rs, s);
-                    break;
-                case NM_SWC1S9:
-                    gen_cop1_ldst(ctx, OPC_SWC1, rt, rs, s);
-                    break;
-                case NM_SDC1S9:
-                    gen_cop1_ldst(ctx, OPC_SDC1, rt, rs, s);
-                    break;
-                case NM_P_PREFS9:
-                    if (rt == 31) {
-                        /* SYNCI */
-                        /*
-                         * Break the TB to be able to sync copied instructions
-                         * immediately.
-                         */
-                        ctx->base.is_jmp = DISAS_STOP;
-                    } else {
-                        /* PREF */
-                        /* Treat as NOP. */
-                    }
-                    break;
-                default:
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                break;
-            case NM_P_LS_S1:
-                switch (extract32(ctx->opcode, 11, 4)) {
-                case NM_UALH:
-                case NM_UASH:
-                    check_nms(ctx);
-                    {
-                        TCGv t0 = tcg_temp_new();
-                        TCGv t1 = tcg_temp_new();
-
-                        gen_base_offset_addr(ctx, t0, rs, s);
-
-                        switch (extract32(ctx->opcode, 11, 4)) {
-                        case NM_UALH:
-                            tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TESW |
-                                               MO_UNALN);
-                            gen_store_gpr(t0, rt);
-                            break;
-                        case NM_UASH:
-                            gen_load_gpr(t1, rt);
-                            tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUW |
-                                               MO_UNALN);
-                            break;
-                        }
-                        tcg_temp_free(t0);
-                        tcg_temp_free(t1);
-                    }
-                    break;
-                case NM_P_LL:
-                    switch (ctx->opcode & 0x03) {
-                    case NM_LL:
-                        gen_ld(ctx, OPC_LL, rt, rs, s);
-                        break;
-                    case NM_LLWP:
-                        check_xnp(ctx);
-                        gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
-                        break;
-                    }
-                    break;
-                case NM_P_SC:
-                    switch (ctx->opcode & 0x03) {
-                    case NM_SC:
-                        gen_st_cond(ctx, rt, rs, s, MO_TESL, false, OPC_SC);
-                        break;
-                    case NM_SCWP:
-                        check_xnp(ctx);
-                        gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5),
-                                 false);
-                        break;
-                    }
-                    break;
-                case NM_CACHE:
-                    check_cp0_enabled(ctx);
-                    if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-                        gen_cache_operation(ctx, rt, rs, s);
-                    }
-                    break;
-                }
-                break;
-            case NM_P_LS_E0:
-                switch (extract32(ctx->opcode, 11, 4)) {
-                case NM_LBE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_ld(ctx, OPC_LBE, rt, rs, s);
-                    break;
-                case NM_SBE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_st(ctx, OPC_SBE, rt, rs, s);
-                    break;
-                case NM_LBUE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_ld(ctx, OPC_LBUE, rt, rs, s);
-                    break;
-                case NM_P_PREFE:
-                    if (rt == 31) {
-                        /* case NM_SYNCIE */
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        /*
-                         * Break the TB to be able to sync copied instructions
-                         * immediately.
-                         */
-                        ctx->base.is_jmp = DISAS_STOP;
-                    } else {
-                        /* case NM_PREFE */
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        /* Treat as NOP. */
-                    }
-                    break;
-                case NM_LHE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_ld(ctx, OPC_LHE, rt, rs, s);
-                    break;
-                case NM_SHE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_st(ctx, OPC_SHE, rt, rs, s);
-                    break;
-                case NM_LHUE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_ld(ctx, OPC_LHUE, rt, rs, s);
-                    break;
-                case NM_CACHEE:
-                    check_nms_dl_il_sl_tl_l2c(ctx);
-                    gen_cache_operation(ctx, rt, rs, s);
-                    break;
-                case NM_LWE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_ld(ctx, OPC_LWE, rt, rs, s);
-                    break;
-                case NM_SWE:
-                    check_eva(ctx);
-                    check_cp0_enabled(ctx);
-                    gen_st(ctx, OPC_SWE, rt, rs, s);
-                    break;
-                case NM_P_LLE:
-                    switch (extract32(ctx->opcode, 2, 2)) {
-                    case NM_LLE:
-                        check_xnp(ctx);
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        gen_ld(ctx, OPC_LLE, rt, rs, s);
-                        break;
-                    case NM_LLWPE:
-                        check_xnp(ctx);
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        gen_llwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5));
-                        break;
-                    default:
-                        gen_reserved_instruction(ctx);
-                        break;
-                    }
-                    break;
-                case NM_P_SCE:
-                    switch (extract32(ctx->opcode, 2, 2)) {
-                    case NM_SCE:
-                        check_xnp(ctx);
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        gen_st_cond(ctx, rt, rs, s, MO_TESL, true, OPC_SCE);
-                        break;
-                    case NM_SCWPE:
-                        check_xnp(ctx);
-                        check_eva(ctx);
-                        check_cp0_enabled(ctx);
-                        gen_scwp(ctx, rs, 0, rt, extract32(ctx->opcode, 3, 5),
-                                 true);
-                        break;
-                    default:
-                        gen_reserved_instruction(ctx);
-                        break;
-                    }
-                    break;
-                }
-                break;
-            case NM_P_LS_WM:
-            case NM_P_LS_UAWM:
-                check_nms(ctx);
-                {
-                    int count = extract32(ctx->opcode, 12, 3);
-                    int counter = 0;
-
-                    offset = sextract32(ctx->opcode, 15, 1) << 8 |
-                             extract32(ctx->opcode, 0, 8);
-                    TCGv va = tcg_temp_new();
-                    TCGv t1 = tcg_temp_new();
-                    MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
-                                      NM_P_LS_UAWM ? MO_UNALN : 0;
-
-                    count = (count == 0) ? 8 : count;
-                    while (counter != count) {
-                        int this_rt = ((rt + counter) & 0x1f) | (rt & 0x10);
-                        int this_offset = offset + (counter << 2);
-
-                        gen_base_offset_addr(ctx, va, rs, this_offset);
-
-                        switch (extract32(ctx->opcode, 11, 1)) {
-                        case NM_LWM:
-                            tcg_gen_qemu_ld_tl(t1, va, ctx->mem_idx,
-                                               memop | MO_TESL);
-                            gen_store_gpr(t1, this_rt);
-                            if ((this_rt == rs) &&
-                                (counter != (count - 1))) {
-                                /* UNPREDICTABLE */
-                            }
-                            break;
-                        case NM_SWM:
-                            this_rt = (rt == 0) ? 0 : this_rt;
-                            gen_load_gpr(t1, this_rt);
-                            tcg_gen_qemu_st_tl(t1, va, ctx->mem_idx,
-                                               memop | MO_TEUL);
-                            break;
-                        }
-                        counter++;
-                    }
-                    tcg_temp_free(va);
-                    tcg_temp_free(t1);
-                }
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case NM_MOVE_BALC:
-        check_nms(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-            int32_t s = sextract32(ctx->opcode, 0, 1) << 21 |
-                        extract32(ctx->opcode, 1, 20) << 1;
-            rd = (extract32(ctx->opcode, 24, 1)) == 0 ? 4 : 5;
-            rt = decode_gpr_gpr4_zero(extract32(ctx->opcode, 25, 1) << 3 |
-                            extract32(ctx->opcode, 21, 3));
-            gen_load_gpr(t0, rt);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            gen_compute_branch_nm(ctx, OPC_BGEZAL, 4, 0, 0, s);
-            tcg_temp_free(t0);
-        }
-        break;
-    case NM_P_BAL:
-        {
-            int32_t s = sextract32(ctx->opcode, 0, 1) << 25 |
-                        extract32(ctx->opcode, 1, 24) << 1;
-
-            if ((extract32(ctx->opcode, 25, 1)) == 0) {
-                /* BC */
-                gen_compute_branch_nm(ctx, OPC_BEQ, 4, 0, 0, s);
-            } else {
-                /* BALC */
-                gen_compute_branch_nm(ctx, OPC_BGEZAL, 4, 0, 0, s);
-            }
-        }
-        break;
-    case NM_P_J:
-        switch (extract32(ctx->opcode, 12, 4)) {
-        case NM_JALRC:
-        case NM_JALRC_HB:
-            gen_compute_branch_nm(ctx, OPC_JALR, 4, rs, rt, 0);
-            break;
-        case NM_P_BALRSC:
-            gen_compute_nanomips_pbalrsc_branch(ctx, rs, rt);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_P_BR1:
-        {
-            int32_t s = sextract32(ctx->opcode, 0, 1) << 14 |
-                        extract32(ctx->opcode, 1, 13) << 1;
-            switch (extract32(ctx->opcode, 14, 2)) {
-            case NM_BEQC:
-                check_nms(ctx);
-                gen_compute_branch_nm(ctx, OPC_BEQ, 4, rs, rt, s);
-                break;
-            case NM_P_BR3A:
-                s = sextract32(ctx->opcode, 0, 1) << 14 |
-                    extract32(ctx->opcode, 1, 13) << 1;
-                check_cp1_enabled(ctx);
-                switch (extract32(ctx->opcode, 16, 5)) {
-                case NM_BC1EQZC:
-                    gen_compute_branch_cp1_nm(ctx, OPC_BC1EQZ, rt, s);
-                    break;
-                case NM_BC1NEZC:
-                    gen_compute_branch_cp1_nm(ctx, OPC_BC1NEZ, rt, s);
-                    break;
-                case NM_BPOSGE32C:
-                    check_dsp_r3(ctx);
-                    {
-                        int32_t imm = extract32(ctx->opcode, 1, 13) |
-                                      extract32(ctx->opcode, 0, 1) << 13;
-
-                        gen_compute_branch_nm(ctx, OPC_BPOSGE32, 4, -1, -2,
-                                              imm);
-                    }
-                    break;
-                default:
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                break;
-            case NM_BGEC:
-                if (rs == rt) {
-                    gen_compute_compact_branch_nm(ctx, OPC_BC, rs, rt, s);
-                } else {
-                    gen_compute_compact_branch_nm(ctx, OPC_BGEC, rs, rt, s);
-                }
-                break;
-            case NM_BGEUC:
-                if (rs == rt || rt == 0) {
-                    gen_compute_compact_branch_nm(ctx, OPC_BC, 0, 0, s);
-                } else if (rs == 0) {
-                    gen_compute_compact_branch_nm(ctx, OPC_BEQZC, rt, 0, s);
-                } else {
-                    gen_compute_compact_branch_nm(ctx, OPC_BGEUC, rs, rt, s);
-                }
-                break;
-            }
-        }
-        break;
-    case NM_P_BR2:
-        {
-            int32_t s = sextract32(ctx->opcode, 0, 1) << 14 |
-                        extract32(ctx->opcode, 1, 13) << 1;
-            switch (extract32(ctx->opcode, 14, 2)) {
-            case NM_BNEC:
-                check_nms(ctx);
-                gen_compute_branch_nm(ctx, OPC_BNE, 4, rs, rt, s);
-                break;
-            case NM_BLTC:
-                if (rs != 0 && rt != 0 && rs == rt) {
-                    /* NOP */
-                    ctx->hflags |= MIPS_HFLAG_FBNSLOT;
-                } else {
-                    gen_compute_compact_branch_nm(ctx, OPC_BLTC, rs, rt, s);
-                }
-                break;
-            case NM_BLTUC:
-                if (rs == 0 || rs == rt) {
-                    /* NOP */
-                    ctx->hflags |= MIPS_HFLAG_FBNSLOT;
-                } else {
-                    gen_compute_compact_branch_nm(ctx, OPC_BLTUC, rs, rt, s);
-                }
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case NM_P_BRI:
-        {
-            int32_t s = sextract32(ctx->opcode, 0, 1) << 11 |
-                        extract32(ctx->opcode, 1, 10) << 1;
-            uint32_t u = extract32(ctx->opcode, 11, 7);
-
-            gen_compute_imm_branch(ctx, extract32(ctx->opcode, 18, 3),
-                                   rt, u, s);
-        }
-        break;
-    default:
-        gen_reserved_instruction(ctx);
-        break;
-    }
-    return 4;
-}
-
-static int decode_nanomips_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t op;
-    int rt = decode_gpr_gpr3(NANOMIPS_EXTRACT_RT3(ctx->opcode));
-    int rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
-    int rd = decode_gpr_gpr3(NANOMIPS_EXTRACT_RD3(ctx->opcode));
-    int offset;
-    int imm;
-
-    /* make sure instructions are on a halfword boundary */
-    if (ctx->base.pc_next & 0x1) {
-        TCGv tmp = tcg_const_tl(ctx->base.pc_next);
-        tcg_gen_st_tl(tmp, cpu_env, offsetof(CPUMIPSState, CP0_BadVAddr));
-        tcg_temp_free(tmp);
-        generate_exception_end(ctx, EXCP_AdEL);
-        return 2;
-    }
-
-    op = extract32(ctx->opcode, 10, 6);
-    switch (op) {
-    case NM_P16_MV:
-        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
-        if (rt != 0) {
-            /* MOVE */
-            rs = NANOMIPS_EXTRACT_RS5(ctx->opcode);
-            gen_arith(ctx, OPC_ADDU, rt, rs, 0);
-        } else {
-            /* P16.RI */
-            switch (extract32(ctx->opcode, 3, 2)) {
-            case NM_P16_SYSCALL:
-                if (extract32(ctx->opcode, 2, 1) == 0) {
-                    generate_exception_end(ctx, EXCP_SYSCALL);
-                } else {
-                    gen_reserved_instruction(ctx);
-                }
-                break;
-            case NM_BREAK16:
-                generate_exception_end(ctx, EXCP_BREAK);
-                break;
-            case NM_SDBBP16:
-                if (is_uhi(extract32(ctx->opcode, 0, 3))) {
-                    gen_helper_do_semihosting(cpu_env);
-                } else {
-                    if (ctx->hflags & MIPS_HFLAG_SBRI) {
-                        gen_reserved_instruction(ctx);
-                    } else {
-                        generate_exception_end(ctx, EXCP_DBp);
-                    }
-                }
-                break;
-            default:
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        break;
-    case NM_P16_SHIFT:
-        {
-            int shift = extract32(ctx->opcode, 0, 3);
-            uint32_t opc = 0;
-            shift = (shift == 0) ? 8 : shift;
-
-            switch (extract32(ctx->opcode, 3, 1)) {
-            case NM_SLL16:
-                opc = OPC_SLL;
-                break;
-            case NM_SRL16:
-                opc = OPC_SRL;
-                break;
-            }
-            gen_shift_imm(ctx, opc, rt, rs, shift);
-        }
-        break;
-    case NM_P16C:
-        switch (ctx->opcode & 1) {
-        case NM_POOL16C_0:
-            gen_pool16c_nanomips_insn(ctx);
-            break;
-        case NM_LWXS16:
-            gen_ldxs(ctx, rt, rs, rd, op);
-            break;
-        }
-        break;
-    case NM_P16_A1:
-        switch (extract32(ctx->opcode, 6, 1)) {
-        case NM_ADDIUR1SP:
-            imm = extract32(ctx->opcode, 0, 6) << 2;
-            gen_arith_imm(ctx, OPC_ADDIU, rt, 29, imm);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_P16_A2:
-        switch (extract32(ctx->opcode, 3, 1)) {
-        case NM_ADDIUR2:
-            imm = extract32(ctx->opcode, 0, 3) << 2;
-            gen_arith_imm(ctx, OPC_ADDIU, rt, rs, imm);
-            break;
-        case NM_P_ADDIURS5:
-            rt = extract32(ctx->opcode, 5, 5);
-            if (rt != 0) {
-                /* imm = sign_extend(s[3] . s[2:0] , from_nbits = 4) */
-                imm = (sextract32(ctx->opcode, 4, 1) << 3) |
-                      (extract32(ctx->opcode, 0, 3));
-                gen_arith_imm(ctx, OPC_ADDIU, rt, rt, imm);
-            }
-            break;
-        }
-        break;
-    case NM_P16_ADDU:
-        switch (ctx->opcode & 0x1) {
-        case NM_ADDU16:
-            gen_arith(ctx, OPC_ADDU, rd, rs, rt);
-            break;
-        case NM_SUBU16:
-            gen_arith(ctx, OPC_SUBU, rd, rs, rt);
-            break;
-        }
-        break;
-    case NM_P16_4X4:
-        rt = (extract32(ctx->opcode, 9, 1) << 3) |
-              extract32(ctx->opcode, 5, 3);
-        rs = (extract32(ctx->opcode, 4, 1) << 3) |
-              extract32(ctx->opcode, 0, 3);
-        rt = decode_gpr_gpr4(rt);
-        rs = decode_gpr_gpr4(rs);
-        switch ((extract32(ctx->opcode, 7, 2) & 0x2) |
-                (extract32(ctx->opcode, 3, 1))) {
-        case NM_ADDU4X4:
-            check_nms(ctx);
-            gen_arith(ctx, OPC_ADDU, rt, rs, rt);
-            break;
-        case NM_MUL4X4:
-            check_nms(ctx);
-            gen_r6_muldiv(ctx, R6_OPC_MUL, rt, rs, rt);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_LI16:
-        {
-            int imm = extract32(ctx->opcode, 0, 7);
-            imm = (imm == 0x7f ? -1 : imm);
-            if (rt != 0) {
-                tcg_gen_movi_tl(cpu_gpr[rt], imm);
-            }
-        }
-        break;
-    case NM_ANDI16:
-        {
-            uint32_t u = extract32(ctx->opcode, 0, 4);
-            u = (u == 12) ? 0xff :
-                (u == 13) ? 0xffff : u;
-            gen_logic_imm(ctx, OPC_ANDI, rt, rs, u);
-        }
-        break;
-    case NM_P16_LB:
-        offset = extract32(ctx->opcode, 0, 2);
-        switch (extract32(ctx->opcode, 2, 2)) {
-        case NM_LB16:
-            gen_ld(ctx, OPC_LB, rt, rs, offset);
-            break;
-        case NM_SB16:
-            rt = decode_gpr_gpr3_src_store(
-                     NANOMIPS_EXTRACT_RT3(ctx->opcode));
-            gen_st(ctx, OPC_SB, rt, rs, offset);
-            break;
-        case NM_LBU16:
-            gen_ld(ctx, OPC_LBU, rt, rs, offset);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_P16_LH:
-        offset = extract32(ctx->opcode, 1, 2) << 1;
-        switch ((extract32(ctx->opcode, 3, 1) << 1) | (ctx->opcode & 1)) {
-        case NM_LH16:
-            gen_ld(ctx, OPC_LH, rt, rs, offset);
-            break;
-        case NM_SH16:
-            rt = decode_gpr_gpr3_src_store(
-                     NANOMIPS_EXTRACT_RT3(ctx->opcode));
-            gen_st(ctx, OPC_SH, rt, rs, offset);
-            break;
-        case NM_LHU16:
-            gen_ld(ctx, OPC_LHU, rt, rs, offset);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case NM_LW16:
-        offset = extract32(ctx->opcode, 0, 4) << 2;
-        gen_ld(ctx, OPC_LW, rt, rs, offset);
-        break;
-    case NM_LWSP16:
-        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
-        offset = extract32(ctx->opcode, 0, 5) << 2;
-        gen_ld(ctx, OPC_LW, rt, 29, offset);
-        break;
-    case NM_LW4X4:
-        check_nms(ctx);
-        rt = (extract32(ctx->opcode, 9, 1) << 3) |
-             extract32(ctx->opcode, 5, 3);
-        rs = (extract32(ctx->opcode, 4, 1) << 3) |
-             extract32(ctx->opcode, 0, 3);
-        offset = (extract32(ctx->opcode, 3, 1) << 3) |
-                 (extract32(ctx->opcode, 8, 1) << 2);
-        rt = decode_gpr_gpr4(rt);
-        rs = decode_gpr_gpr4(rs);
-        gen_ld(ctx, OPC_LW, rt, rs, offset);
-        break;
-    case NM_SW4X4:
-        check_nms(ctx);
-        rt = (extract32(ctx->opcode, 9, 1) << 3) |
-             extract32(ctx->opcode, 5, 3);
-        rs = (extract32(ctx->opcode, 4, 1) << 3) |
-             extract32(ctx->opcode, 0, 3);
-        offset = (extract32(ctx->opcode, 3, 1) << 3) |
-                 (extract32(ctx->opcode, 8, 1) << 2);
-        rt = decode_gpr_gpr4_zero(rt);
-        rs = decode_gpr_gpr4(rs);
-        gen_st(ctx, OPC_SW, rt, rs, offset);
-        break;
-    case NM_LWGP16:
-        offset = extract32(ctx->opcode, 0, 7) << 2;
-        gen_ld(ctx, OPC_LW, rt, 28, offset);
-        break;
-    case NM_SWSP16:
-        rt = NANOMIPS_EXTRACT_RD5(ctx->opcode);
-        offset = extract32(ctx->opcode, 0, 5) << 2;
-        gen_st(ctx, OPC_SW, rt, 29, offset);
-        break;
-    case NM_SW16:
-        rt = decode_gpr_gpr3_src_store(
-                 NANOMIPS_EXTRACT_RT3(ctx->opcode));
-        rs = decode_gpr_gpr3(NANOMIPS_EXTRACT_RS3(ctx->opcode));
-        offset = extract32(ctx->opcode, 0, 4) << 2;
-        gen_st(ctx, OPC_SW, rt, rs, offset);
-        break;
-    case NM_SWGP16:
-        rt = decode_gpr_gpr3_src_store(
-                 NANOMIPS_EXTRACT_RT3(ctx->opcode));
-        offset = extract32(ctx->opcode, 0, 7) << 2;
-        gen_st(ctx, OPC_SW, rt, 28, offset);
-        break;
-    case NM_BC16:
-        gen_compute_branch_nm(ctx, OPC_BEQ, 2, 0, 0,
-                           (sextract32(ctx->opcode, 0, 1) << 10) |
-                           (extract32(ctx->opcode, 1, 9) << 1));
-        break;
-    case NM_BALC16:
-        gen_compute_branch_nm(ctx, OPC_BGEZAL, 2, 0, 0,
-                           (sextract32(ctx->opcode, 0, 1) << 10) |
-                           (extract32(ctx->opcode, 1, 9) << 1));
-        break;
-    case NM_BEQZC16:
-        gen_compute_branch_nm(ctx, OPC_BEQ, 2, rt, 0,
-                           (sextract32(ctx->opcode, 0, 1) << 7) |
-                           (extract32(ctx->opcode, 1, 6) << 1));
-        break;
-    case NM_BNEZC16:
-        gen_compute_branch_nm(ctx, OPC_BNE, 2, rt, 0,
-                           (sextract32(ctx->opcode, 0, 1) << 7) |
-                           (extract32(ctx->opcode, 1, 6) << 1));
-        break;
-    case NM_P16_BR:
-        switch (ctx->opcode & 0xf) {
-        case 0:
-            /* P16.JRC */
-            switch (extract32(ctx->opcode, 4, 1)) {
-            case NM_JRC:
-                gen_compute_branch_nm(ctx, OPC_JR, 2,
-                                   extract32(ctx->opcode, 5, 5), 0, 0);
-                break;
-            case NM_JALRC16:
-                gen_compute_branch_nm(ctx, OPC_JALR, 2,
-                                   extract32(ctx->opcode, 5, 5), 31, 0);
-                break;
-            }
-            break;
-        default:
-            {
-                /* P16.BRI */
-                uint32_t opc = extract32(ctx->opcode, 4, 3) <
-                               extract32(ctx->opcode, 7, 3) ? OPC_BEQ : OPC_BNE;
-                gen_compute_branch_nm(ctx, opc, 2, rs, rt,
-                                   extract32(ctx->opcode, 0, 4) << 1);
-            }
-            break;
-        }
-        break;
-    case NM_P16_SR:
-        {
-            int count = extract32(ctx->opcode, 0, 4);
-            int u = extract32(ctx->opcode, 4, 4) << 4;
-
-            rt = 30 + extract32(ctx->opcode, 9, 1);
-            switch (extract32(ctx->opcode, 8, 1)) {
-            case NM_SAVE16:
-                gen_save(ctx, rt, count, 0, u);
-                break;
-            case NM_RESTORE_JRC16:
-                gen_restore(ctx, rt, count, 0, u);
-                gen_compute_branch_nm(ctx, OPC_JR, 2, 31, 0, 0);
-                break;
-            }
-        }
-        break;
-    case NM_MOVEP:
-    case NM_MOVEPREV:
-        check_nms(ctx);
-        {
-            static const int gpr2reg1[] = {4, 5, 6, 7};
-            static const int gpr2reg2[] = {5, 6, 7, 8};
-            int re;
-            int rd2 = extract32(ctx->opcode, 3, 1) << 1 |
-                      extract32(ctx->opcode, 8, 1);
-            int r1 = gpr2reg1[rd2];
-            int r2 = gpr2reg2[rd2];
-            int r3 = extract32(ctx->opcode, 4, 1) << 3 |
-                     extract32(ctx->opcode, 0, 3);
-            int r4 = extract32(ctx->opcode, 9, 1) << 3 |
-                     extract32(ctx->opcode, 5, 3);
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-            if (op == NM_MOVEP) {
-                rd = r1;
-                re = r2;
-                rs = decode_gpr_gpr4_zero(r3);
-                rt = decode_gpr_gpr4_zero(r4);
-            } else {
-                rd = decode_gpr_gpr4(r3);
-                re = decode_gpr_gpr4(r4);
-                rs = r1;
-                rt = r2;
-            }
-            gen_load_gpr(t0, rs);
-            gen_load_gpr(t1, rt);
-            tcg_gen_mov_tl(cpu_gpr[rd], t0);
-            tcg_gen_mov_tl(cpu_gpr[re], t1);
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-        }
-        break;
-    default:
-        return decode_nanomips_32_48_opc(env, ctx);
-    }
-
-    return 2;
-}
-
-
-#endif // !TARGET_CHERI
-
-/* SmartMIPS extension to MIPS32 */
-
-#if defined(TARGET_MIPS64)
-
-/* MDMX extension to MIPS64 */
-
-#endif
-
-/* MIPSDSP functions. */
-static void gen_mipsdsp_ld(DisasContext *ctx, uint32_t opc,
-                           int rd, int base, int offset)
-{
-    TCGv t0;
-
-    check_dsp(ctx);
-    t0 = tcg_temp_new();
-
-    if (base == 0) {
-        gen_load_gpr(t0, offset);
-    } else if (offset == 0) {
-        gen_load_gpr(t0, base);
-    } else {
-        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]);
-    }
-
-    switch (opc) {
-    case OPC_LBUX:
-        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_UB);
-        gen_store_gpr(t0, rd);
-        break;
-    case OPC_LHX:
-        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESW);
-        gen_store_gpr(t0, rd);
-        break;
-    case OPC_LWX:
-        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TESL);
-        gen_store_gpr(t0, rd);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_LDX:
-        gen_ddc_interposed_ld_tl(ctx, t0, NULL/* add $ddc to t0*/, t0, ctx->mem_idx, MO_TEQ);
-        gen_store_gpr(t0, rd);
-        break;
-#endif
-    }
-    tcg_temp_free(t0);
-}
-
-static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2,
-                              int ret, int v1, int v2)
-{
-    TCGv v1_t;
-    TCGv v2_t;
-
-    if (ret == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    v1_t = tcg_temp_new();
-    v2_t = tcg_temp_new();
-
-    gen_load_gpr(v1_t, v1);
-    gen_load_gpr(v2_t, v2);
-
-    switch (op1) {
-    /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */
-    case OPC_MULT_G_2E:
-        check_dsp_r2(ctx);
-        switch (op2) {
-        case OPC_ADDUH_QB:
-            gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDUH_R_QB:
-            gen_helper_adduh_r_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDQH_PH:
-            gen_helper_addqh_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDQH_R_PH:
-            gen_helper_addqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDQH_W:
-            gen_helper_addqh_w(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDQH_R_W:
-            gen_helper_addqh_r_w(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBUH_QB:
-            gen_helper_subuh_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBUH_R_QB:
-            gen_helper_subuh_r_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBQH_PH:
-            gen_helper_subqh_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBQH_R_PH:
-            gen_helper_subqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBQH_W:
-            gen_helper_subqh_w(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBQH_R_W:
-            gen_helper_subqh_r_w(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        }
-        break;
-    case OPC_ABSQ_S_PH_DSP:
-        switch (op2) {
-        case OPC_ABSQ_S_QB:
-            check_dsp_r2(ctx);
-            gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        case OPC_ABSQ_S_PH:
-            check_dsp(ctx);
-            gen_helper_absq_s_ph(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        case OPC_ABSQ_S_W:
-            check_dsp(ctx);
-            gen_helper_absq_s_w(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        case OPC_PRECEQ_W_PHL:
-            check_dsp(ctx);
-            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFF0000);
-            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
-            break;
-        case OPC_PRECEQ_W_PHR:
-            check_dsp(ctx);
-            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0x0000FFFF);
-            tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 16);
-            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
-            break;
-        case OPC_PRECEQU_PH_QBL:
-            check_dsp(ctx);
-            gen_helper_precequ_ph_qbl(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_PH_QBR:
-            check_dsp(ctx);
-            gen_helper_precequ_ph_qbr(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_PH_QBLA:
-            check_dsp(ctx);
-            gen_helper_precequ_ph_qbla(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_PH_QBRA:
-            check_dsp(ctx);
-            gen_helper_precequ_ph_qbra(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_PH_QBL:
-            check_dsp(ctx);
-            gen_helper_preceu_ph_qbl(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_PH_QBR:
-            check_dsp(ctx);
-            gen_helper_preceu_ph_qbr(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_PH_QBLA:
-            check_dsp(ctx);
-            gen_helper_preceu_ph_qbla(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_PH_QBRA:
-            check_dsp(ctx);
-            gen_helper_preceu_ph_qbra(cpu_gpr[ret], v2_t);
-            break;
-        }
-        break;
-    case OPC_ADDU_QB_DSP:
-        switch (op2) {
-        case OPC_ADDQ_PH:
-            check_dsp(ctx);
-            gen_helper_addq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDQ_S_PH:
-            check_dsp(ctx);
-            gen_helper_addq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDQ_S_W:
-            check_dsp(ctx);
-            gen_helper_addq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_QB:
-            check_dsp(ctx);
-            gen_helper_addu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_S_QB:
-            check_dsp(ctx);
-            gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_PH:
-            check_dsp_r2(ctx);
-            gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_S_PH:
-            check_dsp_r2(ctx);
-            gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_PH:
-            check_dsp(ctx);
-            gen_helper_subq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_S_PH:
-            check_dsp(ctx);
-            gen_helper_subq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_S_W:
-            check_dsp(ctx);
-            gen_helper_subq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_QB:
-            check_dsp(ctx);
-            gen_helper_subu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_S_QB:
-            check_dsp(ctx);
-            gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_PH:
-            check_dsp_r2(ctx);
-            gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_S_PH:
-            check_dsp_r2(ctx);
-            gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDSC:
-            check_dsp(ctx);
-            gen_helper_addsc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDWC:
-            check_dsp(ctx);
-            gen_helper_addwc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MODSUB:
-            check_dsp(ctx);
-            gen_helper_modsub(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_RADDU_W_QB:
-            check_dsp(ctx);
-            gen_helper_raddu_w_qb(cpu_gpr[ret], v1_t);
-            break;
-        }
-        break;
-    case OPC_CMPU_EQ_QB_DSP:
-        switch (op2) {
-        case OPC_PRECR_QB_PH:
-            check_dsp_r2(ctx);
-            gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECRQ_QB_PH:
-            check_dsp(ctx);
-            gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECR_SRA_PH_W:
-            check_dsp_r2(ctx);
-            {
-                TCGv_i32 sa_t = tcg_const_i32(v2);
-                gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t,
-                                          cpu_gpr[ret]);
-                tcg_temp_free_i32(sa_t);
-                break;
-            }
-        case OPC_PRECR_SRA_R_PH_W:
-            check_dsp_r2(ctx);
-            {
-                TCGv_i32 sa_t = tcg_const_i32(v2);
-                gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t,
-                                            cpu_gpr[ret]);
-                tcg_temp_free_i32(sa_t);
-                break;
-            }
-        case OPC_PRECRQ_PH_W:
-            check_dsp(ctx);
-            gen_helper_precrq_ph_w(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECRQ_RS_PH_W:
-            check_dsp(ctx);
-            gen_helper_precrq_rs_ph_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PRECRQU_S_QB_PH:
-            check_dsp(ctx);
-            gen_helper_precrqu_s_qb_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_ABSQ_S_QH_DSP:
-        switch (op2) {
-        case OPC_PRECEQ_L_PWL:
-            check_dsp(ctx);
-            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFFFFFF00000000ull);
-            break;
-        case OPC_PRECEQ_L_PWR:
-            check_dsp(ctx);
-            tcg_gen_shli_tl(cpu_gpr[ret], v2_t, 32);
-            break;
-        case OPC_PRECEQ_PW_QHL:
-            check_dsp(ctx);
-            gen_helper_preceq_pw_qhl(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQ_PW_QHR:
-            check_dsp(ctx);
-            gen_helper_preceq_pw_qhr(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQ_PW_QHLA:
-            check_dsp(ctx);
-            gen_helper_preceq_pw_qhla(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQ_PW_QHRA:
-            check_dsp(ctx);
-            gen_helper_preceq_pw_qhra(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_QH_OBL:
-            check_dsp(ctx);
-            gen_helper_precequ_qh_obl(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_QH_OBR:
-            check_dsp(ctx);
-            gen_helper_precequ_qh_obr(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_QH_OBLA:
-            check_dsp(ctx);
-            gen_helper_precequ_qh_obla(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEQU_QH_OBRA:
-            check_dsp(ctx);
-            gen_helper_precequ_qh_obra(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_QH_OBL:
-            check_dsp(ctx);
-            gen_helper_preceu_qh_obl(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_QH_OBR:
-            check_dsp(ctx);
-            gen_helper_preceu_qh_obr(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_QH_OBLA:
-            check_dsp(ctx);
-            gen_helper_preceu_qh_obla(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_PRECEU_QH_OBRA:
-            check_dsp(ctx);
-            gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t);
-            break;
-        case OPC_ABSQ_S_OB:
-            check_dsp_r2(ctx);
-            gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        case OPC_ABSQ_S_PW:
-            check_dsp(ctx);
-            gen_helper_absq_s_pw(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        case OPC_ABSQ_S_QH:
-            check_dsp(ctx);
-            gen_helper_absq_s_qh(cpu_gpr[ret], v2_t, cpu_env);
-            break;
-        }
-        break;
-    case OPC_ADDU_OB_DSP:
-        switch (op2) {
-        case OPC_RADDU_L_OB:
-            check_dsp(ctx);
-            gen_helper_raddu_l_ob(cpu_gpr[ret], v1_t);
-            break;
-        case OPC_SUBQ_PW:
-            check_dsp(ctx);
-            gen_helper_subq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_S_PW:
-            check_dsp(ctx);
-            gen_helper_subq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_QH:
-            check_dsp(ctx);
-            gen_helper_subq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBQ_S_QH:
-            check_dsp(ctx);
-            gen_helper_subq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_OB:
-            check_dsp(ctx);
-            gen_helper_subu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_S_OB:
-            check_dsp(ctx);
-            gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_QH:
-            check_dsp_r2(ctx);
-            gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBU_S_QH:
-            check_dsp_r2(ctx);
-            gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_SUBUH_OB:
-            check_dsp_r2(ctx);
-            gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_SUBUH_R_OB:
-            check_dsp_r2(ctx);
-            gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDQ_PW:
-            check_dsp(ctx);
-            gen_helper_addq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDQ_S_PW:
-            check_dsp(ctx);
-            gen_helper_addq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDQ_QH:
-            check_dsp(ctx);
-            gen_helper_addq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDQ_S_QH:
-            check_dsp(ctx);
-            gen_helper_addq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_OB:
-            check_dsp(ctx);
-            gen_helper_addu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_S_OB:
-            check_dsp(ctx);
-            gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_QH:
-            check_dsp_r2(ctx);
-            gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDU_S_QH:
-            check_dsp_r2(ctx);
-            gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_ADDUH_OB:
-            check_dsp_r2(ctx);
-            gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_ADDUH_R_OB:
-            check_dsp_r2(ctx);
-            gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        }
-        break;
-    case OPC_CMPU_EQ_OB_DSP:
-        switch (op2) {
-        case OPC_PRECR_OB_QH:
-            check_dsp_r2(ctx);
-            gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECR_SRA_QH_PW:
-            check_dsp_r2(ctx);
-            {
-                TCGv_i32 ret_t = tcg_const_i32(ret);
-                gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t);
-                tcg_temp_free_i32(ret_t);
-                break;
-            }
-        case OPC_PRECR_SRA_R_QH_PW:
-            check_dsp_r2(ctx);
-            {
-                TCGv_i32 sa_v = tcg_const_i32(ret);
-                gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v);
-                tcg_temp_free_i32(sa_v);
-                break;
-            }
-        case OPC_PRECRQ_OB_QH:
-            check_dsp(ctx);
-            gen_helper_precrq_ob_qh(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECRQ_PW_L:
-            check_dsp(ctx);
-            gen_helper_precrq_pw_l(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECRQ_QH_PW:
-            check_dsp(ctx);
-            gen_helper_precrq_qh_pw(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PRECRQ_RS_QH_PW:
-            check_dsp(ctx);
-            gen_helper_precrq_rs_qh_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PRECRQU_S_OB_QH:
-            check_dsp(ctx);
-            gen_helper_precrqu_s_ob_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free(v1_t);
-    tcg_temp_free(v2_t);
-}
-
-static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc,
-                              int ret, int v1, int v2)
-{
-    uint32_t op2;
-    TCGv t0;
-    TCGv v1_t;
-    TCGv v2_t;
-
-    if (ret == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    v1_t = tcg_temp_new();
-    v2_t = tcg_temp_new();
-
-    tcg_gen_movi_tl(t0, v1);
-    gen_load_gpr(v1_t, v1);
-    gen_load_gpr(v2_t, v2);
-
-    switch (opc) {
-    case OPC_SHLL_QB_DSP:
-        {
-            op2 = MASK_SHLL_QB(ctx->opcode);
-            switch (op2) {
-            case OPC_SHLL_QB:
-                check_dsp(ctx);
-                gen_helper_shll_qb(cpu_gpr[ret], t0, v2_t, cpu_env);
-                break;
-            case OPC_SHLLV_QB:
-                check_dsp(ctx);
-                gen_helper_shll_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-                break;
-            case OPC_SHLL_PH:
-                check_dsp(ctx);
-                gen_helper_shll_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
-                break;
-            case OPC_SHLLV_PH:
-                check_dsp(ctx);
-                gen_helper_shll_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-                break;
-            case OPC_SHLL_S_PH:
-                check_dsp(ctx);
-                gen_helper_shll_s_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
-                break;
-            case OPC_SHLLV_S_PH:
-                check_dsp(ctx);
-                gen_helper_shll_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-                break;
-            case OPC_SHLL_S_W:
-                check_dsp(ctx);
-                gen_helper_shll_s_w(cpu_gpr[ret], t0, v2_t, cpu_env);
-                break;
-            case OPC_SHLLV_S_W:
-                check_dsp(ctx);
-                gen_helper_shll_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-                break;
-            case OPC_SHRL_QB:
-                check_dsp(ctx);
-                gen_helper_shrl_qb(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRLV_QB:
-                check_dsp(ctx);
-                gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRL_PH:
-                check_dsp_r2(ctx);
-                gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRLV_PH:
-                check_dsp_r2(ctx);
-                gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRA_QB:
-                check_dsp_r2(ctx);
-                gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRA_R_QB:
-                check_dsp_r2(ctx);
-                gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRAV_QB:
-                check_dsp_r2(ctx);
-                gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRAV_R_QB:
-                check_dsp_r2(ctx);
-                gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRA_PH:
-                check_dsp(ctx);
-                gen_helper_shra_ph(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRA_R_PH:
-                check_dsp(ctx);
-                gen_helper_shra_r_ph(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRAV_PH:
-                check_dsp(ctx);
-                gen_helper_shra_ph(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRAV_R_PH:
-                check_dsp(ctx);
-                gen_helper_shra_r_ph(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            case OPC_SHRA_R_W:
-                check_dsp(ctx);
-                gen_helper_shra_r_w(cpu_gpr[ret], t0, v2_t);
-                break;
-            case OPC_SHRAV_R_W:
-                check_dsp(ctx);
-                gen_helper_shra_r_w(cpu_gpr[ret], v1_t, v2_t);
-                break;
-            default:            /* Invalid */
-                MIPS_INVAL("MASK SHLL.QB");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            break;
-        }
-#ifdef TARGET_MIPS64
-    case OPC_SHLL_OB_DSP:
-        op2 = MASK_SHLL_OB(ctx->opcode);
-        switch (op2) {
-        case OPC_SHLL_PW:
-            check_dsp(ctx);
-            gen_helper_shll_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
-            break;
-        case OPC_SHLLV_PW:
-            check_dsp(ctx);
-            gen_helper_shll_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
-            break;
-        case OPC_SHLL_S_PW:
-            check_dsp(ctx);
-            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
-            break;
-        case OPC_SHLLV_S_PW:
-            check_dsp(ctx);
-            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
-            break;
-        case OPC_SHLL_OB:
-            check_dsp(ctx);
-            gen_helper_shll_ob(cpu_gpr[ret], v2_t, t0, cpu_env);
-            break;
-        case OPC_SHLLV_OB:
-            check_dsp(ctx);
-            gen_helper_shll_ob(cpu_gpr[ret], v2_t, v1_t, cpu_env);
-            break;
-        case OPC_SHLL_QH:
-            check_dsp(ctx);
-            gen_helper_shll_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
-            break;
-        case OPC_SHLLV_QH:
-            check_dsp(ctx);
-            gen_helper_shll_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
-            break;
-        case OPC_SHLL_S_QH:
-            check_dsp(ctx);
-            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
-            break;
-        case OPC_SHLLV_S_QH:
-            check_dsp(ctx);
-            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
-            break;
-        case OPC_SHRA_OB:
-            check_dsp_r2(ctx);
-            gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_OB:
-            check_dsp_r2(ctx);
-            gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRA_R_OB:
-            check_dsp_r2(ctx);
-            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_R_OB:
-            check_dsp_r2(ctx);
-            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRA_PW:
-            check_dsp(ctx);
-            gen_helper_shra_pw(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_PW:
-            check_dsp(ctx);
-            gen_helper_shra_pw(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRA_R_PW:
-            check_dsp(ctx);
-            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_R_PW:
-            check_dsp(ctx);
-            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRA_QH:
-            check_dsp(ctx);
-            gen_helper_shra_qh(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_QH:
-            check_dsp(ctx);
-            gen_helper_shra_qh(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRA_R_QH:
-            check_dsp(ctx);
-            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRAV_R_QH:
-            check_dsp(ctx);
-            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRL_OB:
-            check_dsp(ctx);
-            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRLV_OB:
-            check_dsp(ctx);
-            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        case OPC_SHRL_QH:
-            check_dsp_r2(ctx);
-            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0);
-            break;
-        case OPC_SHRLV_QH:
-            check_dsp_r2(ctx);
-            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK SHLL.OB");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(v1_t);
-    tcg_temp_free(v2_t);
-}
-
-static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2,
-                                 int ret, int v1, int v2, int check_ret)
-{
-    TCGv_i32 t0;
-    TCGv v1_t;
-    TCGv v2_t;
-
-    if ((ret == 0) && (check_ret == 1)) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new_i32();
-    v1_t = tcg_temp_new();
-    v2_t = tcg_temp_new();
-
-    tcg_gen_movi_i32(t0, ret);
-    gen_load_gpr(v1_t, v1);
-    gen_load_gpr(v2_t, v2);
-
-    switch (op1) {
-    /*
-     * OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
-     * the same mask and op1.
-     */
-    case OPC_MULT_G_2E:
-        check_dsp_r2(ctx);
-        switch (op2) {
-        case  OPC_MUL_PH:
-            gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case  OPC_MUL_S_PH:
-            gen_helper_mul_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULQ_S_W:
-            gen_helper_mulq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULQ_RS_W:
-            gen_helper_mulq_rs_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-    case OPC_DPA_W_PH_DSP:
-        switch (op2) {
-        case OPC_DPAU_H_QBL:
-            check_dsp(ctx);
-            gen_helper_dpau_h_qbl(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAU_H_QBR:
-            check_dsp(ctx);
-            gen_helper_dpau_h_qbr(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSU_H_QBL:
-            check_dsp(ctx);
-            gen_helper_dpsu_h_qbl(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSU_H_QBR:
-            check_dsp(ctx);
-            gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAX_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAQX_S_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAQX_SA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPS_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSX_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSQX_S_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSQX_SA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULSAQ_S_W_PH:
-            check_dsp(ctx);
-            gen_helper_mulsaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPAQ_SA_L_W:
-            check_dsp(ctx);
-            gen_helper_dpaq_sa_l_w(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_DPSQ_SA_L_W:
-            check_dsp(ctx);
-            gen_helper_dpsq_sa_l_w(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MAQ_S_W_PHL:
-            check_dsp(ctx);
-            gen_helper_maq_s_w_phl(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MAQ_S_W_PHR:
-            check_dsp(ctx);
-            gen_helper_maq_s_w_phr(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MAQ_SA_W_PHL:
-            check_dsp(ctx);
-            gen_helper_maq_sa_w_phl(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MAQ_SA_W_PHR:
-            check_dsp(ctx);
-            gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULSA_W_PH:
-            check_dsp_r2(ctx);
-            gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_DPAQ_W_QH_DSP:
-        {
-            int ac = ret & 0x03;
-            tcg_gen_movi_i32(t0, ac);
-
-            switch (op2) {
-            case OPC_DMADD:
-                check_dsp(ctx);
-                gen_helper_dmadd(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DMADDU:
-                check_dsp(ctx);
-                gen_helper_dmaddu(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DMSUB:
-                check_dsp(ctx);
-                gen_helper_dmsub(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DMSUBU:
-                check_dsp(ctx);
-                gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPA_W_QH:
-                check_dsp_r2(ctx);
-                gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPAQ_S_W_QH:
-                check_dsp(ctx);
-                gen_helper_dpaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPAQ_SA_L_PW:
-                check_dsp(ctx);
-                gen_helper_dpaq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPAU_H_OBL:
-                check_dsp(ctx);
-                gen_helper_dpau_h_obl(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPAU_H_OBR:
-                check_dsp(ctx);
-                gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPS_W_QH:
-                check_dsp_r2(ctx);
-                gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPSQ_S_W_QH:
-                check_dsp(ctx);
-                gen_helper_dpsq_s_w_qh(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPSQ_SA_L_PW:
-                check_dsp(ctx);
-                gen_helper_dpsq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPSU_H_OBL:
-                check_dsp(ctx);
-                gen_helper_dpsu_h_obl(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_DPSU_H_OBR:
-                check_dsp(ctx);
-                gen_helper_dpsu_h_obr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_L_PWL:
-                check_dsp(ctx);
-                gen_helper_maq_s_l_pwl(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_L_PWR:
-                check_dsp(ctx);
-                gen_helper_maq_s_l_pwr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_W_QHLL:
-                check_dsp(ctx);
-                gen_helper_maq_s_w_qhll(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_SA_W_QHLL:
-                check_dsp(ctx);
-                gen_helper_maq_sa_w_qhll(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_W_QHLR:
-                check_dsp(ctx);
-                gen_helper_maq_s_w_qhlr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_SA_W_QHLR:
-                check_dsp(ctx);
-                gen_helper_maq_sa_w_qhlr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_W_QHRL:
-                check_dsp(ctx);
-                gen_helper_maq_s_w_qhrl(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_SA_W_QHRL:
-                check_dsp(ctx);
-                gen_helper_maq_sa_w_qhrl(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_S_W_QHRR:
-                check_dsp(ctx);
-                gen_helper_maq_s_w_qhrr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MAQ_SA_W_QHRR:
-                check_dsp(ctx);
-                gen_helper_maq_sa_w_qhrr(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MULSAQ_S_L_PW:
-                check_dsp(ctx);
-                gen_helper_mulsaq_s_l_pw(v1_t, v2_t, t0, cpu_env);
-                break;
-            case OPC_MULSAQ_S_W_QH:
-                check_dsp(ctx);
-                gen_helper_mulsaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
-                break;
-            }
-        }
-        break;
-#endif
-    case OPC_ADDU_QB_DSP:
-        switch (op2) {
-        case OPC_MULEU_S_PH_QBL:
-            check_dsp(ctx);
-            gen_helper_muleu_s_ph_qbl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEU_S_PH_QBR:
-            check_dsp(ctx);
-            gen_helper_muleu_s_ph_qbr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULQ_RS_PH:
-            check_dsp(ctx);
-            gen_helper_mulq_rs_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEQ_S_W_PHL:
-            check_dsp(ctx);
-            gen_helper_muleq_s_w_phl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEQ_S_W_PHR:
-            check_dsp(ctx);
-            gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULQ_S_PH:
-            check_dsp_r2(ctx);
-            gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_ADDU_OB_DSP:
-        switch (op2) {
-        case OPC_MULEQ_S_PW_QHL:
-            check_dsp(ctx);
-            gen_helper_muleq_s_pw_qhl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEQ_S_PW_QHR:
-            check_dsp(ctx);
-            gen_helper_muleq_s_pw_qhr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEU_S_QH_OBL:
-            check_dsp(ctx);
-            gen_helper_muleu_s_qh_obl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULEU_S_QH_OBR:
-            check_dsp(ctx);
-            gen_helper_muleu_s_qh_obr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_MULQ_RS_QH:
-            check_dsp(ctx);
-            gen_helper_mulq_rs_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free_i32(t0);
-    tcg_temp_free(v1_t);
-    tcg_temp_free(v2_t);
-}
-
-static void gen_mipsdsp_bitinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
-                                int ret, int val)
-{
-    int16_t imm;
-    TCGv t0;
-    TCGv val_t;
-
-    if (ret == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    val_t = tcg_temp_new();
-    gen_load_gpr(val_t, val);
-
-    switch (op1) {
-    case OPC_ABSQ_S_PH_DSP:
-        switch (op2) {
-        case OPC_BITREV:
-            check_dsp(ctx);
-            gen_helper_bitrev(cpu_gpr[ret], val_t);
-            break;
-        case OPC_REPL_QB:
-            check_dsp(ctx);
-            {
-                target_long result;
-                imm = (ctx->opcode >> 16) & 0xFF;
-                result = (uint32_t)imm << 24 |
-                         (uint32_t)imm << 16 |
-                         (uint32_t)imm << 8  |
-                         (uint32_t)imm;
-                result = (int32_t)result;
-                tcg_gen_movi_tl(cpu_gpr[ret], result);
-            }
-            break;
-        case OPC_REPLV_QB:
-            check_dsp(ctx);
-            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
-            break;
-        case OPC_REPL_PH:
-            check_dsp(ctx);
-            {
-                imm = (ctx->opcode >> 16) & 0x03FF;
-                imm = (int16_t)(imm << 6) >> 6;
-                tcg_gen_movi_tl(cpu_gpr[ret], \
-                                (target_long)((int32_t)imm << 16 | \
-                                (uint16_t)imm));
-            }
-            break;
-        case OPC_REPLV_PH:
-            check_dsp(ctx);
-            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_ABSQ_S_QH_DSP:
-        switch (op2) {
-        case OPC_REPL_OB:
-            check_dsp(ctx);
-            {
-                target_long temp;
-
-                imm = (ctx->opcode >> 16) & 0xFF;
-                temp = ((uint64_t)imm << 8) | (uint64_t)imm;
-                temp = (temp << 16) | temp;
-                temp = (temp << 32) | temp;
-                tcg_gen_movi_tl(cpu_gpr[ret], temp);
-                break;
-            }
-        case OPC_REPL_PW:
-            check_dsp(ctx);
-            {
-                target_long temp;
-
-                imm = (ctx->opcode >> 16) & 0x03FF;
-                imm = (int16_t)(imm << 6) >> 6;
-                temp = ((target_long)imm << 32) \
-                       | ((target_long)imm & 0xFFFFFFFF);
-                tcg_gen_movi_tl(cpu_gpr[ret], temp);
-                break;
-            }
-        case OPC_REPL_QH:
-            check_dsp(ctx);
-            {
-                target_long temp;
-
-                imm = (ctx->opcode >> 16) & 0x03FF;
-                imm = (int16_t)(imm << 6) >> 6;
-
-                temp = ((uint64_t)(uint16_t)imm << 48) |
-                       ((uint64_t)(uint16_t)imm << 32) |
-                       ((uint64_t)(uint16_t)imm << 16) |
-                       (uint64_t)(uint16_t)imm;
-                tcg_gen_movi_tl(cpu_gpr[ret], temp);
-                break;
-            }
-        case OPC_REPLV_OB:
-            check_dsp(ctx);
-            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            break;
-        case OPC_REPLV_PW:
-            check_dsp(ctx);
-            tcg_gen_ext32u_i64(cpu_gpr[ret], val_t);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            break;
-        case OPC_REPLV_QH:
-            check_dsp(ctx);
-            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
-            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
-            break;
-        }
-        break;
-#endif
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(val_t);
-}
-
-static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx,
-                                     uint32_t op1, uint32_t op2,
-                                     int ret, int v1, int v2, int check_ret)
-{
-    TCGv t1;
-    TCGv v1_t;
-    TCGv v2_t;
-
-    if ((ret == 0) && (check_ret == 1)) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t1 = tcg_temp_new();
-    v1_t = tcg_temp_new();
-    v2_t = tcg_temp_new();
-
-    gen_load_gpr(v1_t, v1);
-    gen_load_gpr(v2_t, v2);
-
-    switch (op1) {
-    case OPC_CMPU_EQ_QB_DSP:
-        switch (op2) {
-        case OPC_CMPU_EQ_QB:
-            check_dsp(ctx);
-            gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPU_LT_QB:
-            check_dsp(ctx);
-            gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPU_LE_QB:
-            check_dsp(ctx);
-            gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPGU_EQ_QB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_eq_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPGU_LT_QB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_lt_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPGU_LE_QB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPGDU_EQ_QB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t);
-            tcg_gen_mov_tl(cpu_gpr[ret], t1);
-            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
-            tcg_gen_shli_tl(t1, t1, 24);
-            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
-            break;
-        case OPC_CMPGDU_LT_QB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t);
-            tcg_gen_mov_tl(cpu_gpr[ret], t1);
-            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
-            tcg_gen_shli_tl(t1, t1, 24);
-            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
-            break;
-        case OPC_CMPGDU_LE_QB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgu_le_qb(t1, v1_t, v2_t);
-            tcg_gen_mov_tl(cpu_gpr[ret], t1);
-            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
-            tcg_gen_shli_tl(t1, t1, 24);
-            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
-            break;
-        case OPC_CMP_EQ_PH:
-            check_dsp(ctx);
-            gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LT_PH:
-            check_dsp(ctx);
-            gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LE_PH:
-            check_dsp(ctx);
-            gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PICK_QB:
-            check_dsp(ctx);
-            gen_helper_pick_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PICK_PH:
-            check_dsp(ctx);
-            gen_helper_pick_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PACKRL_PH:
-            check_dsp(ctx);
-            gen_helper_packrl_ph(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_CMPU_EQ_OB_DSP:
-        switch (op2) {
-        case OPC_CMP_EQ_PW:
-            check_dsp(ctx);
-            gen_helper_cmp_eq_pw(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LT_PW:
-            check_dsp(ctx);
-            gen_helper_cmp_lt_pw(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LE_PW:
-            check_dsp(ctx);
-            gen_helper_cmp_le_pw(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_EQ_QH:
-            check_dsp(ctx);
-            gen_helper_cmp_eq_qh(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LT_QH:
-            check_dsp(ctx);
-            gen_helper_cmp_lt_qh(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMP_LE_QH:
-            check_dsp(ctx);
-            gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPGDU_EQ_OB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPGDU_LT_OB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPGDU_LE_OB:
-            check_dsp_r2(ctx);
-            gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPGU_EQ_OB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_eq_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPGU_LT_OB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_lt_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPGU_LE_OB:
-            check_dsp(ctx);
-            gen_helper_cmpgu_le_ob(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_CMPU_EQ_OB:
-            check_dsp(ctx);
-            gen_helper_cmpu_eq_ob(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPU_LT_OB:
-            check_dsp(ctx);
-            gen_helper_cmpu_lt_ob(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_CMPU_LE_OB:
-            check_dsp(ctx);
-            gen_helper_cmpu_le_ob(v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PACKRL_PW:
-            check_dsp(ctx);
-            gen_helper_packrl_pw(cpu_gpr[ret], v1_t, v2_t);
-            break;
-        case OPC_PICK_OB:
-            check_dsp(ctx);
-            gen_helper_pick_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PICK_PW:
-            check_dsp(ctx);
-            gen_helper_pick_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        case OPC_PICK_QH:
-            check_dsp(ctx);
-            gen_helper_pick_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
-            break;
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free(t1);
-    tcg_temp_free(v1_t);
-    tcg_temp_free(v2_t);
-}
-
-static void gen_mipsdsp_append(CPUMIPSState *env, DisasContext *ctx,
-                               uint32_t op1, int rt, int rs, int sa)
-{
-    TCGv t0;
-
-    check_dsp_r2(ctx);
-
-    if (rt == 0) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    gen_load_gpr(t0, rs);
-
-    switch (op1) {
-    case OPC_APPEND_DSP:
-        switch (MASK_APPEND(ctx->opcode)) {
-        case OPC_APPEND:
-            if (sa != 0) {
-                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 32 - sa);
-            }
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-            break;
-        case OPC_PREPEND:
-            if (sa != 0) {
-                tcg_gen_ext32u_tl(cpu_gpr[rt], cpu_gpr[rt]);
-                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
-                tcg_gen_shli_tl(t0, t0, 32 - sa);
-                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
-            }
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-            break;
-        case OPC_BALIGN:
-            sa &= 3;
-            if (sa != 0 && sa != 2) {
-                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
-                tcg_gen_ext32u_tl(t0, t0);
-                tcg_gen_shri_tl(t0, t0, 8 * (4 - sa));
-                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
-            }
-            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK APPEND");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_DAPPEND_DSP:
-        switch (MASK_DAPPEND(ctx->opcode)) {
-        case OPC_DAPPEND:
-            if (sa != 0) {
-                tcg_gen_deposit_tl(cpu_gpr[rt], t0, cpu_gpr[rt], sa, 64 - sa);
-            }
-            break;
-        case OPC_PREPENDD:
-            tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], 0x20 | sa);
-            tcg_gen_shli_tl(t0, t0, 64 - (0x20 | sa));
-            tcg_gen_or_tl(cpu_gpr[rt], t0, t0);
-            break;
-        case OPC_PREPENDW:
-            if (sa != 0) {
-                tcg_gen_shri_tl(cpu_gpr[rt], cpu_gpr[rt], sa);
-                tcg_gen_shli_tl(t0, t0, 64 - sa);
-                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
-            }
-            break;
-        case OPC_DBALIGN:
-            sa &= 7;
-            if (sa != 0 && sa != 2 && sa != 4) {
-                tcg_gen_shli_tl(cpu_gpr[rt], cpu_gpr[rt], 8 * sa);
-                tcg_gen_shri_tl(t0, t0, 8 * (8 - sa));
-                tcg_gen_or_tl(cpu_gpr[rt], cpu_gpr[rt], t0);
-            }
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK DAPPEND");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#endif
-    }
-    tcg_temp_free(t0);
-}
-
-static void gen_mipsdsp_accinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
-                                int ret, int v1, int v2, int check_ret)
-
-{
-    TCGv t0;
-    TCGv t1;
-    TCGv v1_t;
-    TCGv v2_t;
-    int16_t imm;
-
-    if ((ret == 0) && (check_ret == 1)) {
-        /* Treat as NOP. */
-        return;
-    }
-
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    v1_t = tcg_temp_new();
-    v2_t = tcg_temp_new();
-
-    gen_load_gpr(v1_t, v1);
-    gen_load_gpr(v2_t, v2);
-
-    switch (op1) {
-    case OPC_EXTR_W_DSP:
-        check_dsp(ctx);
-        switch (op2) {
-        case OPC_EXTR_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extr_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTR_R_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTR_RS_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTR_S_H:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTRV_S_H:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extr_s_h(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_EXTRV_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_EXTRV_R_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_EXTRV_RS_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_EXTP:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extp(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTPV:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extp(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_EXTPDP:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_extpdp(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_EXTPDPV:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_extpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_SHILO:
-            imm = (ctx->opcode >> 20) & 0x3F;
-            tcg_gen_movi_tl(t0, ret);
-            tcg_gen_movi_tl(t1, imm);
-            gen_helper_shilo(t0, t1, cpu_env);
-            break;
-        case OPC_SHILOV:
-            tcg_gen_movi_tl(t0, ret);
-            gen_helper_shilo(t0, v1_t, cpu_env);
-            break;
-        case OPC_MTHLIP:
-            tcg_gen_movi_tl(t0, ret);
-            gen_helper_mthlip(t0, v1_t, cpu_env);
-            break;
-        case OPC_WRDSP:
-            imm = (ctx->opcode >> 11) & 0x3FF;
-            tcg_gen_movi_tl(t0, imm);
-            gen_helper_wrdsp(v1_t, t0, cpu_env);
-            break;
-        case OPC_RDDSP:
-            imm = (ctx->opcode >> 16) & 0x03FF;
-            tcg_gen_movi_tl(t0, imm);
-            gen_helper_rddsp(cpu_gpr[ret], t0, cpu_env);
-            break;
-        }
-        break;
-#ifdef TARGET_MIPS64
-    case OPC_DEXTR_W_DSP:
-        check_dsp(ctx);
-        switch (op2) {
-        case OPC_DMTHLIP:
-            tcg_gen_movi_tl(t0, ret);
-            gen_helper_dmthlip(v1_t, t0, cpu_env);
-            break;
-        case OPC_DSHILO:
-            {
-                int shift = (ctx->opcode >> 19) & 0x7F;
-                int ac = (ctx->opcode >> 11) & 0x03;
-                tcg_gen_movi_tl(t0, shift);
-                tcg_gen_movi_tl(t1, ac);
-                gen_helper_dshilo(t0, t1, cpu_env);
-                break;
-            }
-        case OPC_DSHILOV:
-            {
-                int ac = (ctx->opcode >> 11) & 0x03;
-                tcg_gen_movi_tl(t0, ac);
-                gen_helper_dshilo(v1_t, t0, cpu_env);
-                break;
-            }
-        case OPC_DEXTP:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-
-            gen_helper_dextp(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTPV:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextp(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTPDP:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextpdp(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTPDPV:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTR_L:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_l(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_R_L:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_r_l(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_RS_L:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_R_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_RS_W:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTR_S_H:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTRV_S_H:
-            tcg_gen_movi_tl(t0, v2);
-            tcg_gen_movi_tl(t1, v1);
-            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
-            break;
-        case OPC_DEXTRV_L:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_l(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTRV_R_L:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_r_l(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTRV_RS_L:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTRV_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTRV_R_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        case OPC_DEXTRV_RS_W:
-            tcg_gen_movi_tl(t0, v2);
-            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
-            break;
-        }
-        break;
-#endif
-    }
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(v1_t);
-    tcg_temp_free(v2_t);
-}
-
-/* End MIPSDSP functions. */
-
-static void decode_opc_special_r6(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd, sa;
-    uint32_t op1, op2;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-
-    op1 = MASK_SPECIAL(ctx->opcode);
-    switch (op1) {
-    case OPC_MULT:
-    case OPC_MULTU:
-    case OPC_DIV:
-    case OPC_DIVU:
-        op2 = MASK_R6_MULDIV(ctx->opcode);
-        switch (op2) {
-        case R6_OPC_MUL:
-        case R6_OPC_MUH:
-        case R6_OPC_MULU:
-        case R6_OPC_MUHU:
-        case R6_OPC_DIV:
-        case R6_OPC_MOD:
-        case R6_OPC_DIVU:
-        case R6_OPC_MODU:
-            gen_r6_muldiv(ctx, op2, rd, rs, rt);
-            break;
-        default:
-            MIPS_INVAL("special_r6 muldiv");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_SELEQZ:
-    case OPC_SELNEZ:
-        gen_cond_move(ctx, op1, rd, rs, rt);
-        break;
-    case R6_OPC_CLO:
-    case R6_OPC_CLZ:
-        if (rt == 0 && sa == 1) {
-            /*
-             * Major opcode and function field is shared with preR6 MFHI/MTHI.
-             * We need additionally to check other fields.
-             */
-            gen_cl(ctx, op1, rd, rs);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-        break;
-    case R6_OPC_SDBBP:
-        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
-            gen_helper_do_semihosting(cpu_env);
-        } else {
-            if (ctx->hflags & MIPS_HFLAG_SBRI) {
-                gen_reserved_instruction(ctx);
-            } else {
-                generate_exception_end(ctx, EXCP_DBp);
-            }
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case R6_OPC_DCLO:
-    case R6_OPC_DCLZ:
-        if (rt == 0 && sa == 1) {
-            /*
-             * Major opcode and function field is shared with preR6 MFHI/MTHI.
-             * We need additionally to check other fields.
-             */
-            check_mips_64(ctx);
-            gen_cl(ctx, op1, rd, rs);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-        break;
-    case OPC_DMULT:
-    case OPC_DMULTU:
-    case OPC_DDIV:
-    case OPC_DDIVU:
-
-        op2 = MASK_R6_MULDIV(ctx->opcode);
-        switch (op2) {
-        case R6_OPC_DMUL:
-        case R6_OPC_DMUH:
-        case R6_OPC_DMULU:
-        case R6_OPC_DMUHU:
-        case R6_OPC_DDIV:
-        case R6_OPC_DMOD:
-        case R6_OPC_DDIVU:
-        case R6_OPC_DMODU:
-            check_mips_64(ctx);
-            gen_r6_muldiv(ctx, op2, rd, rs, rt);
-            break;
-        default:
-            MIPS_INVAL("special_r6 muldiv");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#endif
-    default:            /* Invalid */
-        MIPS_INVAL("special_r6");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_opc_special_tx79(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs = extract32(ctx->opcode, 21, 5);
-    int rt = extract32(ctx->opcode, 16, 5);
-    int rd = extract32(ctx->opcode, 11, 5);
-    uint32_t op1 = MASK_SPECIAL(ctx->opcode);
-
-    switch (op1) {
-    case OPC_MOVN:         /* Conditional move */
-    case OPC_MOVZ:
-        gen_cond_move(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_MFHI:          /* Move from HI/LO */
-    case OPC_MFLO:
-        gen_HILO(ctx, op1, 0, rd);
-        break;
-    case OPC_MTHI:
-    case OPC_MTLO:          /* Move to HI/LO */
-        gen_HILO(ctx, op1, 0, rs);
-        break;
-    case OPC_MULT:
-    case OPC_MULTU:
-        gen_mul_txx9(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_DIV:
-    case OPC_DIVU:
-        gen_muldiv(ctx, op1, 0, rs, rt);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT:
-    case OPC_DMULTU:
-    case OPC_DDIV:
-    case OPC_DDIVU:
-        check_insn_opc_user_only(ctx, INSN_R5900);
-        gen_muldiv(ctx, op1, 0, rs, rt);
-        break;
-#endif
-    case OPC_JR:
-        gen_compute_branch(ctx, op1, 4, rs, 0, 0, 4);
-        break;
-    default:            /* Invalid */
-        MIPS_INVAL("special_tx79");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd, sa;
-    uint32_t op1;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-
-    op1 = MASK_SPECIAL(ctx->opcode);
-    switch (op1) {
-    case OPC_MOVN:         /* Conditional move */
-    case OPC_MOVZ:
-        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1 |
-                   INSN_LOONGSON2E | INSN_LOONGSON2F);
-        gen_cond_move(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_MFHI:          /* Move from HI/LO */
-    case OPC_MFLO:
-        gen_HILO(ctx, op1, rs & 3, rd);
-        break;
-    case OPC_MTHI:
-    case OPC_MTLO:          /* Move to HI/LO */
-        gen_HILO(ctx, op1, rd & 3, rs);
-        break;
-    case OPC_MOVCI:
-        check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1);
-        if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-            check_cp1_enabled(ctx);
-            gen_movci(ctx, rd, rs, (ctx->opcode >> 18) & 0x7,
-                      (ctx->opcode >> 16) & 1);
-        } else {
-            generate_exception_err(ctx, EXCP_CpU, 1);
-        }
-        break;
-    case OPC_MULT:
-    case OPC_MULTU:
-        if (sa) {
-            check_insn(ctx, INSN_VR54XX);
-            op1 = MASK_MUL_VR54XX(ctx->opcode);
-            gen_mul_vr54xx(ctx, op1, rd, rs, rt);
-        } else {
-            gen_muldiv(ctx, op1, rd & 3, rs, rt);
-        }
-        break;
-    case OPC_DIV:
-    case OPC_DIVU:
-        gen_muldiv(ctx, op1, 0, rs, rt);
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DMULT:
-    case OPC_DMULTU:
-    case OPC_DDIV:
-    case OPC_DDIVU:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_muldiv(ctx, op1, 0, rs, rt);
-        break;
-#endif
-    case OPC_JR:
-        gen_compute_branch(ctx, op1, 4, rs, rd, sa, 4);
-        break;
-    case OPC_SPIM:
-#ifdef MIPS_STRICT_STANDARD
-        MIPS_INVAL("SPIM");
-        gen_reserved_instruction(ctx);
-#else
-        /* Implemented as RI exception for now. */
-        MIPS_INVAL("spim (unofficial)");
-        gen_reserved_instruction(ctx);
-#endif
-        break;
-    default:            /* Invalid */
-        MIPS_INVAL("special_legacy");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd, sa;
-    uint32_t op1;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-
-    op1 = MASK_SPECIAL(ctx->opcode);
-    switch (op1) {
-    case OPC_SLL:          /* Shift with immediate */
-        if (sa == 5 && rd == 0 &&
-            rs == 0 && rt == 0) { /* PAUSE */
-            if ((ctx->insn_flags & ISA_MIPS_R6) &&
-                (ctx->hflags & MIPS_HFLAG_BMASK)) {
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        }
-        /* Fallthrough */
-    case OPC_SRA:
-        gen_shift_imm(ctx, op1, rd, rt, sa);
-        break;
-    case OPC_SRL:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 1:
-            /* rotr is decoded as srl on non-R2 CPUs */
-            if (ctx->insn_flags & ISA_MIPS_R2) {
-                op1 = OPC_ROTR;
-            }
-            /* Fallthrough */
-        case 0:
-            gen_shift_imm(ctx, op1, rd, rt, sa);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_ADD:
-    case OPC_ADDU:
-    case OPC_SUB:
-    case OPC_SUBU:
-        gen_arith(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_SLLV:         /* Shifts */
-    case OPC_SRAV:
-        gen_shift(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_SRLV:
-        switch ((ctx->opcode >> 6) & 0x1f) {
-        case 1:
-            /* rotrv is decoded as srlv on non-R2 CPUs */
-            if (ctx->insn_flags & ISA_MIPS_R2) {
-                op1 = OPC_ROTRV;
-            }
-            /* Fallthrough */
-        case 0:
-            gen_shift(ctx, op1, rd, rs, rt);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_SLT:          /* Set on less than */
-    case OPC_SLTU:
-        gen_slt(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_AND:          /* Logic*/
-    case OPC_OR:
-    case OPC_NOR:
-    case OPC_XOR:
-        gen_logic(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_JALR:
-        gen_compute_branch(ctx, op1, 4, rs, rd, sa, 4);
-        break;
-    case OPC_TGE: /* Traps */
-    case OPC_TGEU:
-    case OPC_TLT:
-    case OPC_TLTU:
-    case OPC_TEQ:
-    case OPC_TNE:
-        check_insn(ctx, ISA_MIPS2);
-        gen_trap(ctx, op1, rs, rt, -1);
-        break;
-    case OPC_PMON:
-        /* Pmon entry point, also R4010 selsl */
-#ifdef MIPS_STRICT_STANDARD
-        MIPS_INVAL("PMON / selsl");
-        gen_reserved_instruction(ctx);
-#else
-        gen_helper_0e0i(pmon, sa);
-#endif
-        break;
-    case OPC_SYSCALL:
-        generate_exception_end(ctx, EXCP_SYSCALL);
-        break;
-    case OPC_BREAK:
-        generate_exception_end(ctx, EXCP_BREAK);
-        break;
-    case OPC_SYNC:
-        check_insn(ctx, ISA_MIPS2);
-        gen_sync(extract32(ctx->opcode, 6, 5));
-        break;
-
-#if defined(TARGET_MIPS64)
-        /* MIPS64 specific opcodes */
-    case OPC_DSLL:
-    case OPC_DSRA:
-    case OPC_DSLL32:
-    case OPC_DSRA32:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_shift_imm(ctx, op1, rd, rt, sa);
-        break;
-    case OPC_DSRL:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 1:
-            /* drotr is decoded as dsrl on non-R2 CPUs */
-            if (ctx->insn_flags & ISA_MIPS_R2) {
-                op1 = OPC_DROTR;
-            }
-            /* Fallthrough */
-        case 0:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, op1, rd, rt, sa);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_DSRL32:
-        switch ((ctx->opcode >> 21) & 0x1f) {
-        case 1:
-            /* drotr32 is decoded as dsrl32 on non-R2 CPUs */
-            if (ctx->insn_flags & ISA_MIPS_R2) {
-                op1 = OPC_DROTR32;
-            }
-            /* Fallthrough */
-        case 0:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift_imm(ctx, op1, rd, rt, sa);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_DADD:
-    case OPC_DADDU:
-    case OPC_DSUB:
-    case OPC_DSUBU:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_arith(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_DSLLV:
-    case OPC_DSRAV:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_shift(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_DSRLV:
-        switch ((ctx->opcode >> 6) & 0x1f) {
-        case 1:
-            /* drotrv is decoded as dsrlv on non-R2 CPUs */
-            if (ctx->insn_flags & ISA_MIPS_R2) {
-                op1 = OPC_DROTRV;
-            }
-            /* Fallthrough */
-        case 0:
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_shift(ctx, op1, rd, rs, rt);
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#endif
-    default:
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            decode_opc_special_r6(env, ctx);
-        } else if (ctx->insn_flags & INSN_R5900) {
-            decode_opc_special_tx79(env, ctx);
-        } else {
-            decode_opc_special_legacy(env, ctx);
-        }
-    }
-}
-
-
-static void decode_opc_special2_legacy(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd;
-    uint32_t op1;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-
-    op1 = MASK_SPECIAL2(ctx->opcode);
-    switch (op1) {
-    case OPC_MADD: /* Multiply and add/sub */
-    case OPC_MADDU:
-    case OPC_MSUB:
-    case OPC_MSUBU:
-        check_insn(ctx, ISA_MIPS_R1);
-        gen_muldiv(ctx, op1, rd & 3, rs, rt);
-        break;
-    case OPC_MUL:
-        gen_arith(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_DIV_G_2F:
-    case OPC_DIVU_G_2F:
-    case OPC_MULT_G_2F:
-    case OPC_MULTU_G_2F:
-    case OPC_MOD_G_2F:
-    case OPC_MODU_G_2F:
-        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
-        gen_loongson_integer(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_CLO:
-    case OPC_CLZ:
-        check_insn(ctx, ISA_MIPS_R1);
-        gen_cl(ctx, op1, rd, rs);
-        break;
-    case OPC_SDBBP:
-        if (is_uhi(extract32(ctx->opcode, 6, 20))) {
-            gen_helper_do_semihosting(cpu_env);
-        } else {
-            /*
-             * XXX: not clear which exception should be raised
-             *      when in debug mode...
-             */
-            check_insn(ctx, ISA_MIPS_R1);
-            generate_exception_end(ctx, EXCP_DBp);
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DCLO:
-    case OPC_DCLZ:
-        check_insn(ctx, ISA_MIPS_R1);
-        check_mips_64(ctx);
-        gen_cl(ctx, op1, rd, rs);
-        break;
-    case OPC_DMULT_G_2F:
-    case OPC_DMULTU_G_2F:
-    case OPC_DDIV_G_2F:
-    case OPC_DDIVU_G_2F:
-    case OPC_DMOD_G_2F:
-    case OPC_DMODU_G_2F:
-        check_insn(ctx, INSN_LOONGSON2F | ASE_LEXT);
-        gen_loongson_integer(ctx, op1, rd, rs, rt);
-        break;
-#endif
-    default:            /* Invalid */
-        MIPS_INVAL("special2_legacy");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd, sa;
-    uint32_t op1, op2;
-    int16_t imm;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-    imm = (int16_t)ctx->opcode >> 7;
-
-    op1 = MASK_SPECIAL3(ctx->opcode);
-    switch (op1) {
-    case R6_OPC_PREF:
-        if (rt >= 24) {
-            /* hint codes 24-31 are reserved and signal RI */
-            gen_reserved_instruction(ctx);
-        }
-        /* Treat as NOP. */
-        break;
-    case R6_OPC_CACHE:
-        check_cp0_enabled(ctx);
-        if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-            gen_cache_operation(ctx, rt, rs, imm);
-        }
-        break;
-    case R6_OPC_SC:
-        gen_st_cond(ctx, rt, rs, imm, MO_TESL, false, R6_OPC_SC);
-        break;
-    case R6_OPC_LL:
-        gen_ld(ctx, op1, rt, rs, imm);
-        break;
-    case OPC_BSHFL:
-        {
-            if (rd == 0) {
-                /* Treat as NOP. */
-                break;
-            }
-            op2 = MASK_BSHFL(ctx->opcode);
-            switch (op2) {
-            case OPC_ALIGN:
-            case OPC_ALIGN_1:
-            case OPC_ALIGN_2:
-            case OPC_ALIGN_3:
-                gen_align(ctx, 32, rd, rs, rt, sa & 3);
-                break;
-            case OPC_BITSWAP:
-                gen_bitswap(ctx, op2, rd, rt);
-                break;
-            }
-        }
-        break;
-#ifndef CONFIG_USER_ONLY
-    case OPC_GINV:
-        if (unlikely(ctx->gi <= 1)) {
-            gen_reserved_instruction(ctx);
-        }
-        check_cp0_enabled(ctx);
-        switch ((ctx->opcode >> 6) & 3) {
-        case 0:    /* GINVI */
-            /* Treat as NOP. */
-            break;
-        case 2:    /* GINVT */
-            gen_helper_0e1i(ginvt, cpu_gpr[rs], extract32(ctx->opcode, 8, 2));
-            break;
-        default:
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#endif
-#if defined(TARGET_MIPS64)
-    case R6_OPC_SCD:
-        gen_st_cond(ctx, rt, rs, imm, MO_TEQ, false, R6_OPC_SCD);
-        break;
-    case R6_OPC_LLD:
-        gen_ld(ctx, op1, rt, rs, imm);
-        break;
-    case OPC_DBSHFL:
-        check_mips_64(ctx);
-        {
-            if (rd == 0) {
-                /* Treat as NOP. */
-                break;
-            }
-            op2 = MASK_DBSHFL(ctx->opcode);
-            switch (op2) {
-            case OPC_DALIGN:
-            case OPC_DALIGN_1:
-            case OPC_DALIGN_2:
-            case OPC_DALIGN_3:
-            case OPC_DALIGN_4:
-            case OPC_DALIGN_5:
-            case OPC_DALIGN_6:
-            case OPC_DALIGN_7:
-                gen_align(ctx, 64, rd, rs, rt, sa & 7);
-                break;
-            case OPC_DBITSWAP:
-                gen_bitswap(ctx, op2, rd, rt);
-                break;
-            }
-
-        }
-        break;
-#endif
-    default:            /* Invalid */
-        MIPS_INVAL("special3_r6");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-static void decode_opc_special3_legacy(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd;
-    uint32_t op1, op2;
-
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (unlikely(ctx->base.log_instr_enabled)) {
-        warn_report("DSP instruction tracing is not implemented\n");
-    }
-#endif
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-
-    op1 = MASK_SPECIAL3(ctx->opcode);
-    switch (op1) {
-    case OPC_DIV_G_2E:
-    case OPC_DIVU_G_2E:
-    case OPC_MOD_G_2E:
-    case OPC_MODU_G_2E:
-    case OPC_MULT_G_2E:
-    case OPC_MULTU_G_2E:
-        /*
-         * OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
-         * the same mask and op1.
-         */
-        if ((ctx->insn_flags & ASE_DSP_R2) && (op1 == OPC_MULT_G_2E)) {
-            op2 = MASK_ADDUH_QB(ctx->opcode);
-            switch (op2) {
-            case OPC_ADDUH_QB:
-            case OPC_ADDUH_R_QB:
-            case OPC_ADDQH_PH:
-            case OPC_ADDQH_R_PH:
-            case OPC_ADDQH_W:
-            case OPC_ADDQH_R_W:
-            case OPC_SUBUH_QB:
-            case OPC_SUBUH_R_QB:
-            case OPC_SUBQH_PH:
-            case OPC_SUBQH_R_PH:
-            case OPC_SUBQH_W:
-            case OPC_SUBQH_R_W:
-                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-                break;
-            case OPC_MUL_PH:
-            case OPC_MUL_S_PH:
-            case OPC_MULQ_S_W:
-            case OPC_MULQ_RS_W:
-                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
-                break;
-            default:
-                MIPS_INVAL("MASK ADDUH.QB");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        } else if (ctx->insn_flags & INSN_LOONGSON2E) {
-            gen_loongson_integer(ctx, op1, rd, rs, rt);
-        } else {
-            gen_reserved_instruction(ctx);
-        }
-        break;
-    case OPC_LX_DSP:
-        op2 = MASK_LX(ctx->opcode);
-        switch (op2) {
-#if defined(TARGET_MIPS64)
-        case OPC_LDX:
-#endif
-        case OPC_LBUX:
-        case OPC_LHX:
-        case OPC_LWX:
-            gen_mipsdsp_ld(ctx, op2, rd, rs, rt);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK LX");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_ABSQ_S_PH_DSP:
-        op2 = MASK_ABSQ_S_PH(ctx->opcode);
-        switch (op2) {
-        case OPC_ABSQ_S_QB:
-        case OPC_ABSQ_S_PH:
-        case OPC_ABSQ_S_W:
-        case OPC_PRECEQ_W_PHL:
-        case OPC_PRECEQ_W_PHR:
-        case OPC_PRECEQU_PH_QBL:
-        case OPC_PRECEQU_PH_QBR:
-        case OPC_PRECEQU_PH_QBLA:
-        case OPC_PRECEQU_PH_QBRA:
-        case OPC_PRECEU_PH_QBL:
-        case OPC_PRECEU_PH_QBR:
-        case OPC_PRECEU_PH_QBLA:
-        case OPC_PRECEU_PH_QBRA:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_BITREV:
-        case OPC_REPL_QB:
-        case OPC_REPLV_QB:
-        case OPC_REPL_PH:
-        case OPC_REPLV_PH:
-            gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
-            break;
-        default:
-            MIPS_INVAL("MASK ABSQ_S.PH");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_ADDU_QB_DSP:
-        op2 = MASK_ADDU_QB(ctx->opcode);
-        switch (op2) {
-        case OPC_ADDQ_PH:
-        case OPC_ADDQ_S_PH:
-        case OPC_ADDQ_S_W:
-        case OPC_ADDU_QB:
-        case OPC_ADDU_S_QB:
-        case OPC_ADDU_PH:
-        case OPC_ADDU_S_PH:
-        case OPC_SUBQ_PH:
-        case OPC_SUBQ_S_PH:
-        case OPC_SUBQ_S_W:
-        case OPC_SUBU_QB:
-        case OPC_SUBU_S_QB:
-        case OPC_SUBU_PH:
-        case OPC_SUBU_S_PH:
-        case OPC_ADDSC:
-        case OPC_ADDWC:
-        case OPC_MODSUB:
-        case OPC_RADDU_W_QB:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_MULEU_S_PH_QBL:
-        case OPC_MULEU_S_PH_QBR:
-        case OPC_MULQ_RS_PH:
-        case OPC_MULEQ_S_W_PHL:
-        case OPC_MULEQ_S_W_PHR:
-        case OPC_MULQ_S_PH:
-            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK ADDU.QB");
-            gen_reserved_instruction(ctx);
-            break;
-
-        }
-        break;
-    case OPC_CMPU_EQ_QB_DSP:
-        op2 = MASK_CMPU_EQ_QB(ctx->opcode);
-        switch (op2) {
-        case OPC_PRECR_SRA_PH_W:
-        case OPC_PRECR_SRA_R_PH_W:
-            gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
-            break;
-        case OPC_PRECR_QB_PH:
-        case OPC_PRECRQ_QB_PH:
-        case OPC_PRECRQ_PH_W:
-        case OPC_PRECRQ_RS_PH_W:
-        case OPC_PRECRQU_S_QB_PH:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_CMPU_EQ_QB:
-        case OPC_CMPU_LT_QB:
-        case OPC_CMPU_LE_QB:
-        case OPC_CMP_EQ_PH:
-        case OPC_CMP_LT_PH:
-        case OPC_CMP_LE_PH:
-            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        case OPC_CMPGU_EQ_QB:
-        case OPC_CMPGU_LT_QB:
-        case OPC_CMPGU_LE_QB:
-        case OPC_CMPGDU_EQ_QB:
-        case OPC_CMPGDU_LT_QB:
-        case OPC_CMPGDU_LE_QB:
-        case OPC_PICK_QB:
-        case OPC_PICK_PH:
-        case OPC_PACKRL_PH:
-            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK CMPU.EQ.QB");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_SHLL_QB_DSP:
-        gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_DPA_W_PH_DSP:
-        op2 = MASK_DPA_W_PH(ctx->opcode);
-        switch (op2) {
-        case OPC_DPAU_H_QBL:
-        case OPC_DPAU_H_QBR:
-        case OPC_DPSU_H_QBL:
-        case OPC_DPSU_H_QBR:
-        case OPC_DPA_W_PH:
-        case OPC_DPAX_W_PH:
-        case OPC_DPAQ_S_W_PH:
-        case OPC_DPAQX_S_W_PH:
-        case OPC_DPAQX_SA_W_PH:
-        case OPC_DPS_W_PH:
-        case OPC_DPSX_W_PH:
-        case OPC_DPSQ_S_W_PH:
-        case OPC_DPSQX_S_W_PH:
-        case OPC_DPSQX_SA_W_PH:
-        case OPC_MULSAQ_S_W_PH:
-        case OPC_DPAQ_SA_L_W:
-        case OPC_DPSQ_SA_L_W:
-        case OPC_MAQ_S_W_PHL:
-        case OPC_MAQ_S_W_PHR:
-        case OPC_MAQ_SA_W_PHL:
-        case OPC_MAQ_SA_W_PHR:
-        case OPC_MULSA_W_PH:
-            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK DPAW.PH");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_INSV_DSP:
-        op2 = MASK_INSV(ctx->opcode);
-        switch (op2) {
-        case OPC_INSV:
-            check_dsp(ctx);
-            {
-                TCGv t0, t1;
-
-                if (rt == 0) {
-                    break;
-                }
-
-                t0 = tcg_temp_new();
-                t1 = tcg_temp_new();
-
-                gen_load_gpr(t0, rt);
-                gen_load_gpr(t1, rs);
-
-                gen_helper_insv(cpu_gpr[rt], cpu_env, t1, t0);
-
-                tcg_temp_free(t0);
-                tcg_temp_free(t1);
-                break;
-            }
-        default:            /* Invalid */
-            MIPS_INVAL("MASK INSV");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_APPEND_DSP:
-        gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
-        break;
-    case OPC_EXTR_W_DSP:
-        op2 = MASK_EXTR_W(ctx->opcode);
-        switch (op2) {
-        case OPC_EXTR_W:
-        case OPC_EXTR_R_W:
-        case OPC_EXTR_RS_W:
-        case OPC_EXTR_S_H:
-        case OPC_EXTRV_S_H:
-        case OPC_EXTRV_W:
-        case OPC_EXTRV_R_W:
-        case OPC_EXTRV_RS_W:
-        case OPC_EXTP:
-        case OPC_EXTPV:
-        case OPC_EXTPDP:
-        case OPC_EXTPDPV:
-            gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
-            break;
-        case OPC_RDDSP:
-            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 1);
-            break;
-        case OPC_SHILO:
-        case OPC_SHILOV:
-        case OPC_MTHLIP:
-        case OPC_WRDSP:
-            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK EXTR.W");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DDIV_G_2E:
-    case OPC_DDIVU_G_2E:
-    case OPC_DMULT_G_2E:
-    case OPC_DMULTU_G_2E:
-    case OPC_DMOD_G_2E:
-    case OPC_DMODU_G_2E:
-        check_insn(ctx, INSN_LOONGSON2E);
-        gen_loongson_integer(ctx, op1, rd, rs, rt);
-        break;
-    case OPC_ABSQ_S_QH_DSP:
-        op2 = MASK_ABSQ_S_QH(ctx->opcode);
-        switch (op2) {
-        case OPC_PRECEQ_L_PWL:
-        case OPC_PRECEQ_L_PWR:
-        case OPC_PRECEQ_PW_QHL:
-        case OPC_PRECEQ_PW_QHR:
-        case OPC_PRECEQ_PW_QHLA:
-        case OPC_PRECEQ_PW_QHRA:
-        case OPC_PRECEQU_QH_OBL:
-        case OPC_PRECEQU_QH_OBR:
-        case OPC_PRECEQU_QH_OBLA:
-        case OPC_PRECEQU_QH_OBRA:
-        case OPC_PRECEU_QH_OBL:
-        case OPC_PRECEU_QH_OBR:
-        case OPC_PRECEU_QH_OBLA:
-        case OPC_PRECEU_QH_OBRA:
-        case OPC_ABSQ_S_OB:
-        case OPC_ABSQ_S_PW:
-        case OPC_ABSQ_S_QH:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_REPL_OB:
-        case OPC_REPL_PW:
-        case OPC_REPL_QH:
-        case OPC_REPLV_OB:
-        case OPC_REPLV_PW:
-        case OPC_REPLV_QH:
-            gen_mipsdsp_bitinsn(ctx, op1, op2, rd, rt);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK ABSQ_S.QH");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_ADDU_OB_DSP:
-        op2 = MASK_ADDU_OB(ctx->opcode);
-        switch (op2) {
-        case OPC_RADDU_L_OB:
-        case OPC_SUBQ_PW:
-        case OPC_SUBQ_S_PW:
-        case OPC_SUBQ_QH:
-        case OPC_SUBQ_S_QH:
-        case OPC_SUBU_OB:
-        case OPC_SUBU_S_OB:
-        case OPC_SUBU_QH:
-        case OPC_SUBU_S_QH:
-        case OPC_SUBUH_OB:
-        case OPC_SUBUH_R_OB:
-        case OPC_ADDQ_PW:
-        case OPC_ADDQ_S_PW:
-        case OPC_ADDQ_QH:
-        case OPC_ADDQ_S_QH:
-        case OPC_ADDU_OB:
-        case OPC_ADDU_S_OB:
-        case OPC_ADDU_QH:
-        case OPC_ADDU_S_QH:
-        case OPC_ADDUH_OB:
-        case OPC_ADDUH_R_OB:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_MULEQ_S_PW_QHL:
-        case OPC_MULEQ_S_PW_QHR:
-        case OPC_MULEU_S_QH_OBL:
-        case OPC_MULEU_S_QH_OBR:
-        case OPC_MULQ_RS_QH:
-            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK ADDU.OB");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_CMPU_EQ_OB_DSP:
-        op2 = MASK_CMPU_EQ_OB(ctx->opcode);
-        switch (op2) {
-        case OPC_PRECR_SRA_QH_PW:
-        case OPC_PRECR_SRA_R_QH_PW:
-            /* Return value is rt. */
-            gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
-            break;
-        case OPC_PRECR_OB_QH:
-        case OPC_PRECRQ_OB_QH:
-        case OPC_PRECRQ_PW_L:
-        case OPC_PRECRQ_QH_PW:
-        case OPC_PRECRQ_RS_QH_PW:
-        case OPC_PRECRQU_S_OB_QH:
-            gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
-            break;
-        case OPC_CMPU_EQ_OB:
-        case OPC_CMPU_LT_OB:
-        case OPC_CMPU_LE_OB:
-        case OPC_CMP_EQ_QH:
-        case OPC_CMP_LT_QH:
-        case OPC_CMP_LE_QH:
-        case OPC_CMP_EQ_PW:
-        case OPC_CMP_LT_PW:
-        case OPC_CMP_LE_PW:
-            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        case OPC_CMPGDU_EQ_OB:
-        case OPC_CMPGDU_LT_OB:
-        case OPC_CMPGDU_LE_OB:
-        case OPC_CMPGU_EQ_OB:
-        case OPC_CMPGU_LT_OB:
-        case OPC_CMPGU_LE_OB:
-        case OPC_PACKRL_PW:
-        case OPC_PICK_OB:
-        case OPC_PICK_PW:
-        case OPC_PICK_QH:
-            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK CMPU_EQ.OB");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_DAPPEND_DSP:
-        gen_mipsdsp_append(env, ctx, op1, rt, rs, rd);
-        break;
-    case OPC_DEXTR_W_DSP:
-        op2 = MASK_DEXTR_W(ctx->opcode);
-        switch (op2) {
-        case OPC_DEXTP:
-        case OPC_DEXTPDP:
-        case OPC_DEXTPDPV:
-        case OPC_DEXTPV:
-        case OPC_DEXTR_L:
-        case OPC_DEXTR_R_L:
-        case OPC_DEXTR_RS_L:
-        case OPC_DEXTR_W:
-        case OPC_DEXTR_R_W:
-        case OPC_DEXTR_RS_W:
-        case OPC_DEXTR_S_H:
-        case OPC_DEXTRV_L:
-        case OPC_DEXTRV_R_L:
-        case OPC_DEXTRV_RS_L:
-        case OPC_DEXTRV_S_H:
-        case OPC_DEXTRV_W:
-        case OPC_DEXTRV_R_W:
-        case OPC_DEXTRV_RS_W:
-            gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
-            break;
-        case OPC_DMTHLIP:
-        case OPC_DSHILO:
-        case OPC_DSHILOV:
-            gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK EXTR.W");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_DPAQ_W_QH_DSP:
-        op2 = MASK_DPAQ_W_QH(ctx->opcode);
-        switch (op2) {
-        case OPC_DPAU_H_OBL:
-        case OPC_DPAU_H_OBR:
-        case OPC_DPSU_H_OBL:
-        case OPC_DPSU_H_OBR:
-        case OPC_DPA_W_QH:
-        case OPC_DPAQ_S_W_QH:
-        case OPC_DPS_W_QH:
-        case OPC_DPSQ_S_W_QH:
-        case OPC_MULSAQ_S_W_QH:
-        case OPC_DPAQ_SA_L_PW:
-        case OPC_DPSQ_SA_L_PW:
-        case OPC_MULSAQ_S_L_PW:
-            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        case OPC_MAQ_S_W_QHLL:
-        case OPC_MAQ_S_W_QHLR:
-        case OPC_MAQ_S_W_QHRL:
-        case OPC_MAQ_S_W_QHRR:
-        case OPC_MAQ_SA_W_QHLL:
-        case OPC_MAQ_SA_W_QHLR:
-        case OPC_MAQ_SA_W_QHRL:
-        case OPC_MAQ_SA_W_QHRR:
-        case OPC_MAQ_S_L_PWL:
-        case OPC_MAQ_S_L_PWR:
-        case OPC_DMADD:
-        case OPC_DMADDU:
-        case OPC_DMSUB:
-        case OPC_DMSUBU:
-            gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
-            break;
-        default:            /* Invalid */
-            MIPS_INVAL("MASK DPAQ.W.QH");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_DINSV_DSP:
-        op2 = MASK_INSV(ctx->opcode);
-        switch (op2) {
-        case OPC_DINSV:
-        {
-            TCGv t0, t1;
-
-            if (rt == 0) {
-                break;
-            }
-            check_dsp(ctx);
-
-            t0 = tcg_temp_new();
-            t1 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_load_gpr(t1, rs);
-
-            gen_helper_dinsv(cpu_gpr[rt], cpu_env, t1, t0);
-
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-            break;
-        }
-        default:            /* Invalid */
-            MIPS_INVAL("MASK DINSV");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_SHLL_OB_DSP:
-        gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
-        break;
-#endif
-    default:            /* Invalid */
-        MIPS_INVAL("special3_legacy");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-
-#if defined(TARGET_MIPS64)
-
-static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
-{
-    uint32_t opc = MASK_MMI(ctx->opcode);
-    int rs = extract32(ctx->opcode, 21, 5);
-    int rt = extract32(ctx->opcode, 16, 5);
-    int rd = extract32(ctx->opcode, 11, 5);
-
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (unlikely(ctx->base.log_instr_enabled)) {
-        warn_report("OPC_SPECIAL2 MMI instruction tracing is "
-                    "not implemented\n");
-    }
-#endif
-
-    switch (opc) {
-    case MMI_OPC_MULT1:
-    case MMI_OPC_MULTU1:
-    case MMI_OPC_MADD:
-    case MMI_OPC_MADDU:
-    case MMI_OPC_MADD1:
-    case MMI_OPC_MADDU1:
-        gen_mul_txx9(ctx, opc, rd, rs, rt);
-        break;
-    case MMI_OPC_DIV1:
-    case MMI_OPC_DIVU1:
-        gen_div1_tx79(ctx, opc, rs, rt);
-        break;
-    default:
-        MIPS_INVAL("TX79 MMI class");
-        gen_reserved_instruction(ctx);
-        break;
-    }
-}
-
-#if !defined(TARGET_CHERI)
-static void gen_mmi_lq(CPUMIPSState *env, DisasContext *ctx)
-{
-    gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_LQ */
-}
-#endif
-
-static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
-{
-    gen_reserved_instruction(ctx);    /* TODO: MMI_OPC_SQ */
-}
-
-/*
- * The TX79-specific instruction Store Quadword
- *
- * +--------+-------+-------+------------------------+
- * | 011111 |  base |   rt  |           offset       | SQ
- * +--------+-------+-------+------------------------+
- *      6       5       5                 16
- *
- * has the same opcode as the Read Hardware Register instruction
- *
- * +--------+-------+-------+-------+-------+--------+
- * | 011111 | 00000 |   rt  |   rd  | 00000 | 111011 | RDHWR
- * +--------+-------+-------+-------+-------+--------+
- *      6       5       5       5       5        6
- *
- * that is required, trapped and emulated by the Linux kernel. However, all
- * RDHWR encodings yield address error exceptions on the TX79 since the SQ
- * offset is odd. Therefore all valid SQ instructions can execute normally.
- * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
- * between SQ and RDHWR, as the Linux kernel does.
- */
-static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
-{
-    int base = extract32(ctx->opcode, 21, 5);
-    int rt = extract32(ctx->opcode, 16, 5);
-    int offset = extract32(ctx->opcode, 0, 16);
-
-#ifdef CONFIG_USER_ONLY
-    uint32_t op1 = MASK_SPECIAL3(ctx->opcode);
-    uint32_t op2 = extract32(ctx->opcode, 6, 5);
-
-    if (base == 0 && op2 == 0 && op1 == OPC_RDHWR) {
-        int rd = extract32(ctx->opcode, 11, 5);
-
-        gen_rdhwr(ctx, rt, rd, 0);
-        return;
-    }
-#endif
-
-    gen_mmi_sq(ctx, base, rt, offset);
-}
-
-#endif
-
-static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
-{
-    int rs, rt, rd, sa;
-    uint32_t op1, op2;
-    int16_t imm;
-
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-    imm = sextract32(ctx->opcode, 7, 9);
-
-    op1 = MASK_SPECIAL3(ctx->opcode);
-
-    /*
-     * EVA loads and stores overlap Loongson 2E instructions decoded by
-     * decode_opc_special3_legacy(), so be careful to allow their decoding when
-     * EVA is absent.
-     */
-    if (ctx->eva) {
-        switch (op1) {
-        case OPC_LWLE:
-        case OPC_LWRE:
-        case OPC_LBUE:
-        case OPC_LHUE:
-        case OPC_LBE:
-        case OPC_LHE:
-        case OPC_LLE:
-        case OPC_LWE:
-            check_cp0_enabled(ctx);
-            gen_ld(ctx, op1, rt, rs, imm);
-            return;
-        case OPC_SWLE:
-        case OPC_SWRE:
-        case OPC_SBE:
-        case OPC_SHE:
-        case OPC_SWE:
-            check_cp0_enabled(ctx);
-            gen_st(ctx, op1, rt, rs, imm);
-            return;
-        case OPC_SCE:
-            check_cp0_enabled(ctx);
-            gen_st_cond(ctx, rt, rs, imm, MO_TESL, true, OPC_SCE);
-            return;
-        case OPC_CACHEE:
-            check_cp0_enabled(ctx);
-            if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-                gen_cache_operation(ctx, rt, rs, imm);
-            }
-            /* Treat as NOP. */
-            return;
-        case OPC_PREFE:
-            check_cp0_enabled(ctx);
-            /* Treat as NOP. */
-            return;
-        }
-    }
-
-    switch (op1) {
-    case OPC_EXT:
-    case OPC_INS:
-        check_insn(ctx, ISA_MIPS_R2);
-        gen_bitops(ctx, op1, rt, rs, sa, rd);
-        break;
-    case OPC_BSHFL:
-        op2 = MASK_BSHFL(ctx->opcode);
-        switch (op2) {
-        case OPC_ALIGN:
-        case OPC_ALIGN_1:
-        case OPC_ALIGN_2:
-        case OPC_ALIGN_3:
-        case OPC_BITSWAP:
-            check_insn(ctx, ISA_MIPS_R6);
-            decode_opc_special3_r6(env, ctx);
-            break;
-        default:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_bshfl(ctx, op2, rt, rd);
-            break;
-        }
-        break;
-#if defined(TARGET_MIPS64)
-    case OPC_DEXTM:
-    case OPC_DEXTU:
-    case OPC_DEXT:
-    case OPC_DINSM:
-    case OPC_DINSU:
-    case OPC_DINS:
-        check_insn(ctx, ISA_MIPS_R2);
-        check_mips_64(ctx);
-        gen_bitops(ctx, op1, rt, rs, sa, rd);
-        break;
-    case OPC_DBSHFL:
-        op2 = MASK_DBSHFL(ctx->opcode);
-        switch (op2) {
-        case OPC_DALIGN:
-        case OPC_DALIGN_1:
-        case OPC_DALIGN_2:
-        case OPC_DALIGN_3:
-        case OPC_DALIGN_4:
-        case OPC_DALIGN_5:
-        case OPC_DALIGN_6:
-        case OPC_DALIGN_7:
-        case OPC_DBITSWAP:
-            check_insn(ctx, ISA_MIPS_R6);
-            decode_opc_special3_r6(env, ctx);
-            break;
-        default:
-            check_insn(ctx, ISA_MIPS_R2);
-            check_mips_64(ctx);
-            op2 = MASK_DBSHFL(ctx->opcode);
-            gen_bshfl(ctx, op2, rt, rd);
-            break;
-        }
-        break;
-#endif
-    case OPC_RDHWR:
-#ifdef TARGET_CHERI
-        // For CHERI/BERI statcounters we need a 4 bit selector instead of 3
-        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 4));
-#else
-        gen_rdhwr(ctx, rt, rd, extract32(ctx->opcode, 6, 3));
-#endif
-        break;
-    case OPC_FORK:
-        check_mt(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-            TCGv t1 = tcg_temp_new();
-
-            gen_load_gpr(t0, rt);
-            gen_load_gpr(t1, rs);
-            gen_helper_fork(t0, t1);
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-        }
-        break;
-    case OPC_YIELD:
-        check_mt(ctx);
-        {
-            TCGv t0 = tcg_temp_new();
-
-            gen_load_gpr(t0, rs);
-            gen_helper_yield(t0, cpu_env, t0);
-            gen_store_gpr(t0, rd);
-            tcg_temp_free(t0);
-        }
-        break;
-    default:
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            decode_opc_special3_r6(env, ctx);
-        } else {
-            decode_opc_special3_legacy(env, ctx);
-        }
-    }
-}
-
-static bool decode_opc_legacy(CPUMIPSState *env, DisasContext *ctx)
-{
-    int32_t offset;
-    int rs, rt, rd, sa;
-    uint32_t op, op1;
-    int16_t imm;
-
-    op = MASK_OP_MAJOR(ctx->opcode);
-    rs = (ctx->opcode >> 21) & 0x1f;
-    rt = (ctx->opcode >> 16) & 0x1f;
-    rd = (ctx->opcode >> 11) & 0x1f;
-    sa = (ctx->opcode >> 6) & 0x1f;
-    imm = (int16_t)ctx->opcode;
-    switch (op) {
-    case OPC_SPECIAL:
-        decode_opc_special(env, ctx);
-        break;
-    case OPC_SPECIAL2:
-#if defined(TARGET_MIPS64)
-        if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
-            decode_mmi(env, ctx);
-            break;
-        }
-#endif
-        if (TARGET_LONG_BITS == 32 && (ctx->insn_flags & ASE_MXU)) {
-            if (MASK_SPECIAL2(ctx->opcode) == OPC_MUL) {
-                gen_arith(ctx, OPC_MUL, rd, rs, rt);
-            } else {
-                decode_ase_mxu(ctx, ctx->opcode);
-            }
-            break;
-        }
-        decode_opc_special2_legacy(env, ctx);
-        break;
-    case OPC_SPECIAL3:
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (unlikely(ctx->base.log_instr_enabled)) {
-        warn_report("OPC_SPECIAL3 (mmi) instruction tracing is "
-                    "not implemented\n");
-    }
-#endif
-#if defined(TARGET_MIPS64)
-        if (ctx->insn_flags & INSN_R5900) {
-            decode_mmi_sq(env, ctx);    /* MMI_OPC_SQ */
-        } else {
-            decode_opc_special3(env, ctx);
-        }
-#else
-        decode_opc_special3(env, ctx);
-#endif
-        break;
-    case OPC_REGIMM:
-        op1 = MASK_REGIMM(ctx->opcode);
-        switch (op1) {
-        case OPC_BLTZL: /* REGIMM branches */
-        case OPC_BGEZL:
-        case OPC_BLTZALL:
-        case OPC_BGEZALL:
-            check_insn(ctx, ISA_MIPS2);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            /* Fallthrough */
-        case OPC_BLTZ:
-        case OPC_BGEZ:
-            gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
-            break;
-        case OPC_BLTZAL:
-        case OPC_BGEZAL:
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                if (rs == 0) {
-                    /* OPC_NAL, OPC_BAL */
-                    gen_compute_branch(ctx, op1, 4, 0, -1, imm << 2, 4);
-                } else {
-                    gen_reserved_instruction(ctx);
-                }
-            } else {
-                gen_compute_branch(ctx, op1, 4, rs, -1, imm << 2, 4);
-            }
-            break;
-        case OPC_TGEI: /* REGIMM traps */
-        case OPC_TGEIU:
-        case OPC_TLTI:
-        case OPC_TLTIU:
-        case OPC_TEQI:
-
-        case OPC_TNEI:
-            check_insn(ctx, ISA_MIPS2);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_trap(ctx, op1, rs, -1, imm);
-            break;
-        case OPC_SIGRIE:
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_reserved_instruction(ctx);
-            break;
-        case OPC_SYNCI:
-            check_insn(ctx, ISA_MIPS_R2);
-            /*
-             * Break the TB to be able to sync copied instructions
-             * immediately.
-             */
-            ctx->base.is_jmp = DISAS_STOP;
-            break;
-        case OPC_BPOSGE32:    /* MIPS DSP branch */
-#if defined(TARGET_MIPS64)
-        case OPC_BPOSGE64:
-#endif
-            check_dsp(ctx);
-            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2, 4);
-            break;
-#if defined(TARGET_MIPS64)
-        case OPC_DAHI:
-            check_insn(ctx, ISA_MIPS_R6);
-            check_mips_64(ctx);
-            if (rs != 0) {
-                tcg_gen_addi_tl(cpu_gpr[rs], cpu_gpr[rs], (int64_t)imm << 32);
-                gen_log_instr_gpr_update(ctx, rs);
-            }
-            break;
-        case OPC_DATI:
-            check_insn(ctx, ISA_MIPS_R6);
-            check_mips_64(ctx);
-            if (rs != 0) {
-                tcg_gen_addi_tl(cpu_gpr[rs], cpu_gpr[rs], (int64_t)imm << 48);
-                gen_log_instr_gpr_update(ctx, rs);
-            }
-            break;
-#endif
-        default:            /* Invalid */
-            MIPS_INVAL("regimm");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_CP0:
-        check_cp0_enabled(ctx);
-        op1 = MASK_CP0(ctx->opcode);
-        switch (op1) {
-        case OPC_MFC0:
-        case OPC_MTC0:
-        case OPC_MFTR:
-        case OPC_MTTR:
-        case OPC_MFHC0:
-        case OPC_MTHC0:
-#if defined(TARGET_MIPS64)
-        case OPC_DMFC0:
-        case OPC_DMTC0:
-#endif
-#ifndef CONFIG_USER_ONLY
-            gen_cp0(env, ctx, op1, rt, rd);
-#endif /* !CONFIG_USER_ONLY */
-            break;
-        case OPC_C0:
-        case OPC_C0_1:
-        case OPC_C0_2:
-        case OPC_C0_3:
-        case OPC_C0_4:
-        case OPC_C0_5:
-        case OPC_C0_6:
-        case OPC_C0_7:
-        case OPC_C0_8:
-        case OPC_C0_9:
-        case OPC_C0_A:
-        case OPC_C0_B:
-        case OPC_C0_C:
-        case OPC_C0_D:
-        case OPC_C0_E:
-        case OPC_C0_F:
-#ifndef CONFIG_USER_ONLY
-            gen_cp0(env, ctx, MASK_C0(ctx->opcode), rt, rd);
-#endif /* !CONFIG_USER_ONLY */
-            break;
-        case OPC_MFMC0:
-#ifndef CONFIG_USER_ONLY
-            {
-                uint32_t op2;
-                TCGv t0 = tcg_temp_new();
-
-                op2 = MASK_MFMC0(ctx->opcode);
-                switch (op2) {
-                case OPC_DMT:
-                    check_cp0_mt(ctx);
-                    gen_helper_dmt(t0);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case OPC_EMT:
-                    check_cp0_mt(ctx);
-                    gen_helper_emt(t0);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case OPC_DVPE:
-                    check_cp0_mt(ctx);
-                    gen_helper_dvpe(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case OPC_EVPE:
-                    check_cp0_mt(ctx);
-                    gen_helper_evpe(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    break;
-                case OPC_DVP:
-                    check_insn(ctx, ISA_MIPS_R6);
-                    if (ctx->vp) {
-                        gen_helper_dvp(t0, cpu_env);
-                        gen_store_gpr(t0, rt);
-                    }
-                    break;
-                case OPC_EVP:
-                    check_insn(ctx, ISA_MIPS_R6);
-                    if (ctx->vp) {
-                        gen_helper_evp(t0, cpu_env);
-                        gen_store_gpr(t0, rt);
-                    }
-                    break;
-                case OPC_DI:
-                    check_insn(ctx, ISA_MIPS_R2);
-                    save_cpu_state(ctx, 1);
-                    gen_helper_di(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    /*
-                     * Stop translation as we may have switched
-                     * the execution mode.
-                     */
-                    ctx->base.is_jmp = DISAS_STOP;
-                    break;
-                case OPC_EI:
-                    check_insn(ctx, ISA_MIPS_R2);
-                    save_cpu_state(ctx, 1);
-                    gen_helper_ei(t0, cpu_env);
-                    gen_store_gpr(t0, rt);
-                    /*
-                     * DISAS_STOP isn't sufficient, we need to ensure we break
-                     * out of translated code to check for pending interrupts.
-                     */
-                    gen_save_pc(ctx->base.pc_next + 4);
-                    ctx->base.is_jmp = DISAS_EXIT;
-                    break;
-                default:            /* Invalid */
-                    MIPS_INVAL("mfmc0");
-                    gen_reserved_instruction(ctx);
-                    break;
-                }
-                tcg_temp_free(t0);
-            }
-#endif /* !CONFIG_USER_ONLY */
-            break;
-        case OPC_RDPGPR:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_load_srsgpr(ctx, rt, rd);
-            break;
-        case OPC_WRPGPR:
-            check_insn(ctx, ISA_MIPS_R2);
-            gen_store_srsgpr(rt, rd);
-            break;
-        default:
-            MIPS_INVAL("cp0");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-    case OPC_BOVC: /* OPC_BEQZALC, OPC_BEQC, OPC_ADDI */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_BOVC, OPC_BEQZALC, OPC_BEQC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        } else {
-            /* OPC_ADDI */
-            /* Arithmetic with immediate opcode */
-            gen_arith_imm(ctx, op, rt, rs, imm);
-        }
-        break;
-    case OPC_ADDIU:
-         gen_arith_imm(ctx, op, rt, rs, imm);
-         break;
-    case OPC_SLTI: /* Set on less than with immediate opcode */
-    case OPC_SLTIU:
-         gen_slt_imm(ctx, op, rt, rs, imm);
-         break;
-    case OPC_ANDI: /* Arithmetic with immediate opcode */
-    case OPC_LUI: /* OPC_AUI */
-    case OPC_ORI:
-    case OPC_XORI:
-         gen_logic_imm(ctx, op, rt, rs, imm);
-         break;
-    case OPC_J: /* Jump */
-    case OPC_JAL:
-         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-         gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
-         break;
-    /* Branch */
-    case OPC_BLEZC: /* OPC_BGEZC, OPC_BGEC, OPC_BLEZL */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rt == 0) {
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            /* OPC_BLEZC, OPC_BGEZC, OPC_BGEC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        } else {
-            /* OPC_BLEZL */
-            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
-        }
-        break;
-    case OPC_BGTZC: /* OPC_BLTZC, OPC_BLTC, OPC_BGTZL */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rt == 0) {
-                gen_reserved_instruction(ctx);
-                break;
-            }
-            /* OPC_BGTZC, OPC_BLTZC, OPC_BLTC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        } else {
-            /* OPC_BGTZL */
-            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
-        }
-        break;
-    case OPC_BLEZALC: /* OPC_BGEZALC, OPC_BGEUC, OPC_BLEZ */
-        if (rt == 0) {
-            /* OPC_BLEZ */
-            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
-        } else {
-            check_insn(ctx, ISA_MIPS_R6);
-            /* OPC_BLEZALC, OPC_BGEZALC, OPC_BGEUC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        }
-        break;
-    case OPC_BGTZALC: /* OPC_BLTZALC, OPC_BLTUC, OPC_BGTZ */
-        if (rt == 0) {
-            /* OPC_BGTZ */
-            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
-        } else {
-            check_insn(ctx, ISA_MIPS_R6);
-            /* OPC_BGTZALC, OPC_BLTZALC, OPC_BLTUC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        }
-        break;
-    case OPC_BEQL:
-    case OPC_BNEL:
-        check_insn(ctx, ISA_MIPS2);
-         check_insn_opc_removed(ctx, ISA_MIPS_R6);
-        /* Fallthrough */
-    case OPC_BEQ:
-    case OPC_BNE:
-         gen_compute_branch(ctx, op, 4, rs, rt, imm << 2, 4);
-         break;
-    case OPC_LL: /* Load and stores */
-        check_insn(ctx, ISA_MIPS2);
-        if (ctx->insn_flags & INSN_R5900) {
-            check_insn_opc_user_only(ctx, INSN_R5900);
-        }
-        /* Fallthrough */
-    case OPC_LWL:
-    case OPC_LWR:
-    case OPC_LB:
-    case OPC_LH:
-    case OPC_LW:
-    case OPC_LWPC:
-    case OPC_LBU:
-    case OPC_LHU:
-         gen_ld(ctx, op, rt, rs, imm);
-         break;
-    case OPC_SWL:
-    case OPC_SWR:
-    case OPC_SB:
-    case OPC_SH:
-    case OPC_SW:
-         gen_st(ctx, op, rt, rs, imm);
-         break;
-    case OPC_SC:
-        check_insn(ctx, ISA_MIPS2);
-        if (ctx->insn_flags & INSN_R5900) {
-            check_insn_opc_user_only(ctx, INSN_R5900);
-        }
-        gen_st_cond(ctx, rt, rs, imm, MO_TESL, false, OPC_SC);
-        break;
-    case OPC_CACHE:
-        check_cp0_enabled(ctx);
-        check_insn(ctx, ISA_MIPS3 | ISA_MIPS_R1);
-        if (ctx->hflags & MIPS_HFLAG_ITC_CACHE) {
-            gen_cache_operation(ctx, rt, rs, imm);
-        }
-        /* Treat as NOP. */
-        break;
-    case OPC_PREF:
-        if (ctx->insn_flags & INSN_R5900) {
-            /* Treat as NOP. */
-        } else {
-            check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R1);
-            /* Treat as NOP. */
-        }
-        break;
-
-    /* Floating point (COP1). */
-    case OPC_LWC1:
-    case OPC_LDC1:
-    case OPC_SWC1:
-    case OPC_SDC1:
-        gen_cop1_ldst(ctx, op, rt, rs, imm);
-        break;
-
-    case OPC_CP1:
-        op1 = MASK_CP1(ctx->opcode);
-
-        switch (op1) {
-        case OPC_MFHC1:
-        case OPC_MTHC1:
-            check_cp1_enabled(ctx);
-            check_insn(ctx, ISA_MIPS_R2);
-            /* fall through */
-        case OPC_MFC1:
-        case OPC_CFC1:
-        case OPC_MTC1:
-        case OPC_CTC1:
-            check_cp1_enabled(ctx);
-            gen_cp1(ctx, op1, rt, rd);
-            break;
-#if defined(TARGET_MIPS64)
-        case OPC_DMFC1:
-        case OPC_DMTC1:
-            check_cp1_enabled(ctx);
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_cp1(ctx, op1, rt, rd);
-            break;
-#endif
-        case OPC_BC1EQZ: /* OPC_BC1ANY2 */
-            check_cp1_enabled(ctx);
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                /* OPC_BC1EQZ */
-                gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
-                                       rt, imm << 2, 4);
-            } else {
-                /* OPC_BC1ANY2 */
-                check_cop1x(ctx);
-                check_insn(ctx, ASE_MIPS3D);
-                gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
-                                    (rt >> 2) & 0x7, imm << 2);
-            }
-            break;
-        case OPC_BC1NEZ:
-            check_cp1_enabled(ctx);
-            check_insn(ctx, ISA_MIPS_R6);
-            gen_compute_branch1_r6(ctx, MASK_CP1(ctx->opcode),
-                                   rt, imm << 2, 4);
-            break;
-        case OPC_BC1ANY4:
-            check_cp1_enabled(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            check_cop1x(ctx);
-            check_insn(ctx, ASE_MIPS3D);
-            /* fall through */
-        case OPC_BC1:
-            check_cp1_enabled(ctx);
-            check_insn_opc_removed(ctx, ISA_MIPS_R6);
-            gen_compute_branch1(ctx, MASK_BC1(ctx->opcode),
-                                (rt >> 2) & 0x7, imm << 2);
-            break;
-        case OPC_PS_FMT:
-            check_ps(ctx);
-            /* fall through */
-        case OPC_S_FMT:
-        case OPC_D_FMT:
-            check_cp1_enabled(ctx);
-            gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
-                       (imm >> 8) & 0x7);
-            break;
-        case OPC_W_FMT:
-        case OPC_L_FMT:
-        {
-            int r6_op = ctx->opcode & FOP(0x3f, 0x1f);
-            check_cp1_enabled(ctx);
-            if (ctx->insn_flags & ISA_MIPS_R6) {
-                switch (r6_op) {
-                case R6_OPC_CMP_AF_S:
-                case R6_OPC_CMP_UN_S:
-                case R6_OPC_CMP_EQ_S:
-                case R6_OPC_CMP_UEQ_S:
-                case R6_OPC_CMP_LT_S:
-                case R6_OPC_CMP_ULT_S:
-                case R6_OPC_CMP_LE_S:
-                case R6_OPC_CMP_ULE_S:
-                case R6_OPC_CMP_SAF_S:
-                case R6_OPC_CMP_SUN_S:
-                case R6_OPC_CMP_SEQ_S:
-                case R6_OPC_CMP_SEUQ_S:
-                case R6_OPC_CMP_SLT_S:
-                case R6_OPC_CMP_SULT_S:
-                case R6_OPC_CMP_SLE_S:
-                case R6_OPC_CMP_SULE_S:
-                case R6_OPC_CMP_OR_S:
-                case R6_OPC_CMP_UNE_S:
-                case R6_OPC_CMP_NE_S:
-                case R6_OPC_CMP_SOR_S:
-                case R6_OPC_CMP_SUNE_S:
-                case R6_OPC_CMP_SNE_S:
-                    gen_r6_cmp_s(ctx, ctx->opcode & 0x1f, rt, rd, sa);
-                    break;
-                case R6_OPC_CMP_AF_D:
-                case R6_OPC_CMP_UN_D:
-                case R6_OPC_CMP_EQ_D:
-                case R6_OPC_CMP_UEQ_D:
-                case R6_OPC_CMP_LT_D:
-                case R6_OPC_CMP_ULT_D:
-                case R6_OPC_CMP_LE_D:
-                case R6_OPC_CMP_ULE_D:
-                case R6_OPC_CMP_SAF_D:
-                case R6_OPC_CMP_SUN_D:
-                case R6_OPC_CMP_SEQ_D:
-                case R6_OPC_CMP_SEUQ_D:
-                case R6_OPC_CMP_SLT_D:
-                case R6_OPC_CMP_SULT_D:
-                case R6_OPC_CMP_SLE_D:
-                case R6_OPC_CMP_SULE_D:
-                case R6_OPC_CMP_OR_D:
-                case R6_OPC_CMP_UNE_D:
-                case R6_OPC_CMP_NE_D:
-                case R6_OPC_CMP_SOR_D:
-                case R6_OPC_CMP_SUNE_D:
-                case R6_OPC_CMP_SNE_D:
-                    gen_r6_cmp_d(ctx, ctx->opcode & 0x1f, rt, rd, sa);
-                    break;
-                default:
-                    gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f),
-                               rt, rd, sa, (imm >> 8) & 0x7);
-
-                    break;
-                }
-            } else {
-                gen_farith(ctx, ctx->opcode & FOP(0x3f, 0x1f), rt, rd, sa,
-                           (imm >> 8) & 0x7);
-            }
-            break;
-        }
-        default:
-            MIPS_INVAL("cp1");
-            gen_reserved_instruction(ctx);
-            break;
-        }
-        break;
-
-#if defined(TARGET_CHERI)
-    case OPC_CLOAD:     /* Load Via Capability Register */
-        {
-            uint32_t opc = ctx->opcode;
-
-            check_cop2x(ctx);
-
-            switch(MASK_CLDST_OPC(opc)) {
-            case OPC_CLBU:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_UB | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLHU:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TEUW | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLWU:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TEUL | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLDU:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TEQ | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLB:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_SB | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLH:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TESW | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLW:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TESL | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            case OPC_CLD:
-                generate_cap_load(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                  MO_TEQ | ctx->default_tcg_memop_mask,
-                                  MASK_CLDST_OPC(opc));
-                break;
-            default:
-                MIPS_INVAL("cl");
-                generate_exception (ctx, EXCP_RI);
-                break;
-            }
-        }
-        break;
-    case OPC_CLOADC:    /* Load Capability Register */
-        check_cop2x(ctx);
-        generate_clc(ctx, rs, rt, rd, ctx->opcode & 0x7ff, false);
-        break;
-    case OPC_CLOADC_LargeImm: /* Load Capability Register with 16bit immediate */
-        check_cop2x(ctx);
-        generate_clc(ctx, rs, rt, 0, ctx->opcode & 0xffff, true);
-        break;
-    case OPC_CSTORE:    /* Store Via Capability Register */
-        {
-            uint32_t opc = ctx->opcode;
-
-            check_cop2x(ctx);
-
-/*
- * XXX CHERI seems to be ignoring bit 2 given how 'cscdr' is encoded.
- *     For now, just ignore bit 2 by masking it.
- *
- *          switch(MASK_CLDST_OPC(opc)) {
- */
-            uint32_t inst_opc = MASK_CLDST_OPC(opc) & ~0x4;
-            switch(inst_opc) {
-            case OPC_CSB:
-                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc), MO_8,
-                                inst_opc);
-                break;
-            case OPC_CSH:
-                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                MO_TEUW | ctx->default_tcg_memop_mask,
-                                inst_opc);
-                break;
-            case OPC_CSW:
-                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                MO_TEUL | ctx->default_tcg_memop_mask,
-                                inst_opc);
-                break;
-            case OPC_CSD:
-                generate_cstore(ctx, rs, rt, rd, MASK_CLDST_OFFSET(opc),
-                                MO_TEQ | ctx->default_tcg_memop_mask, inst_opc);
-                break;
-            default:
-                MIPS_INVAL("cs");
-                generate_exception (ctx, EXCP_RI);
-                break;
-            }
-        }
-        break;
-    case OPC_CSTOREC:   /* Store Capability Register */
-        check_cop2x(ctx);
-        generate_csc(ctx, rs, rt, rd, imm & 0x7ff, false);
-        break;
-    case OPC_CSTOREC_LargeImm:   /* Store Capability Register with 16-bit immediate */
-        check_cop2x(ctx);
-        generate_csc(ctx, rs, rt, 0, imm & 0xffff, true);
-        break;
-#else /* ! TARGET_CHERI */
-    /* Compact branches [R6] and COP2 [non-R6] */
-    case OPC_BC: /* OPC_LWC2 */
-    case OPC_BALC: /* OPC_SWC2 */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_BC, OPC_BALC */
-            gen_compute_compact_branch(ctx, op, 0, 0,
-                                       sextract32(ctx->opcode << 2, 0, 28));
-        } else if (ctx->insn_flags & ASE_LEXT) {
-            gen_loongson_lswc2(ctx, rt, rs, rd);
-        } else {
-            /* OPC_LWC2, OPC_SWC2 */
-            /* COP2: Not implemented. */
-            generate_exception_err(ctx, EXCP_CpU, 2);
-        }
-        break;
-    case OPC_BEQZC: /* OPC_JIC, OPC_LDC2 */
-    case OPC_BNEZC: /* OPC_JIALC, OPC_SDC2 */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            if (rs != 0) {
-                /* OPC_BEQZC, OPC_BNEZC */
-                gen_compute_compact_branch(ctx, op, rs, 0,
-                                           sextract32(ctx->opcode << 2, 0, 23));
-            } else {
-                /* OPC_JIC, OPC_JIALC */
-                gen_compute_compact_branch(ctx, op, 0, rt, imm);
-            }
-        } else if (ctx->insn_flags & ASE_LEXT) {
-            gen_loongson_lsdc2(ctx, rt, rs, rd);
-        } else {
-            /* OPC_LWC2, OPC_SWC2 */
-            /* COP2: Not implemented. */
-            generate_exception_err(ctx, EXCP_CpU, 2);
-        }
-        break;
-#endif /* ! TARGET_CHERI */
-    case OPC_CP2:
-#if defined(TARGET_CHERI)
-#if defined(TARGET_MIPS64)
-        check_mips_64(ctx);
-#endif
-        gen_cp2(ctx, ctx->opcode, rt, rd, sa);
-        break;
-#else /* ! TARGET_CHERI */
-        check_insn(ctx, ASE_LMMI);
-        /* Note that these instructions use different fields.  */
-        gen_loongson_multimedia(ctx, sa, rd, rt);
-        break;
-#endif /* ! TARGET_CHERI */
-    case OPC_CP3:
-        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
-            check_cp1_enabled(ctx);
-            op1 = MASK_CP3(ctx->opcode);
-            switch (op1) {
-            case OPC_LUXC1:
-            case OPC_SUXC1:
-                check_insn(ctx, ISA_MIPS5 | ISA_MIPS_R2);
-                /* Fallthrough */
-            case OPC_LWXC1:
-            case OPC_LDXC1:
-            case OPC_SWXC1:
-            case OPC_SDXC1:
-                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
-                gen_flt3_ldst(ctx, op1, sa, rd, rs, rt);
-                break;
-            case OPC_PREFX:
-                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
-                /* Treat as NOP. */
-                break;
-            case OPC_ALNV_PS:
-                check_insn(ctx, ISA_MIPS5 | ISA_MIPS_R2);
-                /* Fallthrough */
-            case OPC_MADD_S:
-            case OPC_MADD_D:
-            case OPC_MADD_PS:
-            case OPC_MSUB_S:
-            case OPC_MSUB_D:
-            case OPC_MSUB_PS:
-            case OPC_NMADD_S:
-            case OPC_NMADD_D:
-            case OPC_NMADD_PS:
-            case OPC_NMSUB_S:
-            case OPC_NMSUB_D:
-            case OPC_NMSUB_PS:
-                check_insn(ctx, ISA_MIPS4 | ISA_MIPS_R2);
-                gen_flt3_arith(ctx, op1, sa, rs, rd, rt);
-                break;
-            default:
-                MIPS_INVAL("cp3");
-                gen_reserved_instruction(ctx);
-                break;
-            }
-        } else {
-            generate_exception_err(ctx, EXCP_CpU, 1);
-        }
-        break;
-
-#if defined(TARGET_MIPS64)
-    /* MIPS64 opcodes */
-    case OPC_LLD:
-        if (ctx->insn_flags & INSN_R5900) {
-            check_insn_opc_user_only(ctx, INSN_R5900);
-        }
-        /* fall through */
-    case OPC_LDL:
-    case OPC_LDR:
-    case OPC_LWU:
-    case OPC_LD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_ld(ctx, op, rt, rs, imm);
-        break;
-    case OPC_SDL:
-    case OPC_SDR:
-    case OPC_SD:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_st(ctx, op, rt, rs, imm);
-        break;
-    case OPC_SCD:
-        check_insn(ctx, ISA_MIPS3);
-        if (ctx->insn_flags & INSN_R5900) {
-            check_insn_opc_user_only(ctx, INSN_R5900);
-        }
-        check_mips_64(ctx);
-        gen_st_cond(ctx, rt, rs, imm, MO_TEQ, false, OPC_SCD);
-        break;
-    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC, OPC_DADDI */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            /* OPC_BNVC, OPC_BNEZALC, OPC_BNEC */
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        } else {
-            /* OPC_DADDI */
-            check_insn(ctx, ISA_MIPS3);
-            check_mips_64(ctx);
-            gen_arith_imm(ctx, op, rt, rs, imm);
-        }
-        break;
-    case OPC_DADDIU:
-        check_insn(ctx, ISA_MIPS3);
-        check_mips_64(ctx);
-        gen_arith_imm(ctx, op, rt, rs, imm);
-        break;
-#else
-    case OPC_BNVC: /* OPC_BNEZALC, OPC_BNEC */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-            gen_compute_compact_branch(ctx, op, rs, rt, imm << 2);
-        } else {
-            MIPS_INVAL("major opcode");
-            gen_reserved_instruction(ctx);
-        }
-        break;
-#endif
-
-#if !defined(TARGET_CHERI) /* CHERI reuses these opcodes for experimental instrs */
-    case OPC_DAUI: /* OPC_JALX */
-        if (ctx->insn_flags & ISA_MIPS_R6) {
-#if defined(TARGET_MIPS64)
-            /* OPC_DAUI */
-            check_mips_64(ctx);
-            if (rs == 0) {
-                generate_exception(ctx, EXCP_RI);
-            } else if (rt != 0) {
-                TCGv t0 = tcg_temp_new();
-                gen_load_gpr(t0, rs);
-                tcg_gen_addi_tl(cpu_gpr[rt], t0, imm << 16);
-                gen_log_instr_gpr_update(ctx, rt);
-                tcg_temp_free(t0);
-            }
-#else
-            gen_reserved_instruction(ctx);
-            MIPS_INVAL("major opcode");
-#endif
-        } else {
-            /* OPC_JALX */
-            if (ctx->insn_flags & (ASE_MIPS16 | ASE_MICROMIPS)) {
-                offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
-                gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
-            } else {
-                gen_reserved_instruction(ctx);
-            }
-        }
-        break;
-    case OPC_MDMX: /* MMI_OPC_LQ */
-        if (ctx->insn_flags & INSN_R5900) {
-#if defined(TARGET_MIPS64)
-            gen_mmi_lq(env, ctx);
-#endif
-        } else {
-            /* MDMX: Not implemented. */
-            gen_reserved_instruction(ctx);
-        }
-        break;
-#endif /* ! TAGET_CHERI */
-    case OPC_PCREL:
-        check_insn(ctx, ISA_MIPS_R6);
-        gen_pcrel(ctx, ctx->opcode, ctx->base.pc_next, rs);
-        break;
-    default:            /* Invalid */
-        MIPS_INVAL("major opcode");
-        return false;
-    }
-    return true;
-}
-
-static void decode_opc(CPUMIPSState *env, DisasContext *ctx)
-{
-    /* make sure instructions are on a word boundary */
-    if (ctx->base.pc_next & 0x3) {
-        env->CP0_BadVAddr = ctx->base.pc_next;
-        generate_exception_err(ctx, EXCP_AdEL, EXCP_INST_NOTAVAIL);
-        return;
-    }
-
-    /* Handle blikely not taken case */
-    if ((ctx->hflags & MIPS_HFLAG_BMASK_BASE) == MIPS_HFLAG_BL) {
-        TCGLabel *l1 = gen_new_label();
-
-        tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
-        tcg_gen_movi_i32(hflags, ctx->hflags & ~MIPS_HFLAG_BMASK);
-        gen_goto_tb(ctx, 1, ctx->base.pc_next + 4);
-        gen_set_label(l1);
-    }
-
-    /* Transition to the auto-generated decoder.  */
-
-    /* ISA extensions */
-    if (ase_msa_available(env) && decode_ase_msa(ctx, ctx->opcode)) {
-        return;
-    }
-
-    /* ISA (from latest to oldest) */
-    if (cpu_supports_isa(env, ISA_MIPS_R6) && decode_isa_rel6(ctx, ctx->opcode)) {
-        return;
-    }
-    if (cpu_supports_isa(env, INSN_R5900) && decode_ext_txx9(ctx, ctx->opcode)) {
-        return;
-    }
-
-    if (decode_opc_legacy(env, ctx)) {
-        return;
-    }
-
-    gen_reserved_instruction(ctx);
-}
-
-static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-    CPUMIPSState *env = cs->env_ptr;
-
-    ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
-    ctx->saved_pc = -1;
-    ctx->insn_flags = env->insn_flags;
-    ctx->CP0_Config1 = env->CP0_Config1;
-    ctx->CP0_Config2 = env->CP0_Config2;
-    ctx->CP0_Config3 = env->CP0_Config3;
-    ctx->CP0_Config5 = env->CP0_Config5;
-    ctx->btarget = 0;
-    ctx->kscrexist = (env->CP0_Config4 >> CP0C4_KScrExist) & 0xff;
-    ctx->rxi = (env->CP0_Config3 >> CP0C3_RXI) & 1;
-    ctx->ie = (env->CP0_Config4 >> CP0C4_IE) & 3;
-    ctx->bi = (env->CP0_Config3 >> CP0C3_BI) & 1;
-    ctx->bp = (env->CP0_Config3 >> CP0C3_BP) & 1;
-    ctx->PAMask = env->PAMask;
-    ctx->mvh = (env->CP0_Config5 >> CP0C5_MVH) & 1;
-    ctx->eva = (env->CP0_Config5 >> CP0C5_EVA) & 1;
-    ctx->sc = (env->CP0_Config3 >> CP0C3_SC) & 1;
-    ctx->CP0_LLAddr_shift = env->CP0_LLAddr_shift;
-    ctx->cmgcr = (env->CP0_Config3 >> CP0C3_CMGCR) & 1;
-    /* Restore delay slot state from the tb context.  */
-    ctx->hflags = (uint32_t)ctx->base.tb->flags; /* FIXME: maybe use 64 bits? */
-    ctx->ulri = (env->CP0_Config3 >> CP0C3_ULRI) & 1;
-    ctx->ps = ((env->active_fpu.fcr0 >> FCR0_PS) & 1) ||
-             (env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F));
-    ctx->vp = (env->CP0_Config5 >> CP0C5_VP) & 1;
-    ctx->mrp = (env->CP0_Config5 >> CP0C5_MRP) & 1;
-    ctx->nan2008 = (env->active_fpu.fcr31 >> FCR31_NAN2008) & 1;
-    ctx->abs2008 = (env->active_fpu.fcr31 >> FCR31_ABS2008) & 1;
-    ctx->mi = (env->CP0_Config5 >> CP0C5_MI) & 1;
-    ctx->gi = (env->CP0_Config5 >> CP0C5_GI) & 3;
-    restore_cpu_state(env, ctx);
-#ifdef CONFIG_USER_ONLY
-        ctx->mem_idx = MIPS_HFLAG_UM;
-#else
-        ctx->mem_idx = hflags_mmu_index(ctx->hflags);
-#endif
-#if defined(CHERI_UNALIGNED)
-    ctx->default_tcg_memop_mask = MO_UNALN;
-#else
-    ctx->default_tcg_memop_mask = (ctx->insn_flags & (ISA_MIPS_R6 |
-                                  INSN_LOONGSON3A)) ? MO_UNALN : MO_ALIGN;
-#endif
-    LOG_DISAS("\ntb %p idx %d hflags %04x\n", ctx->base.tb, ctx->mem_idx,
-              ctx->hflags);
-}
-
-static inline void mips_update_statcounters_icount(DisasContext *ctx)
-{
-    // FIXME: Don't do this for every executed instruction.
-#ifdef NOTYET
-    _debug_value(cpu_PC, "mips_update_icount");
-    if (ctx->icount_already_added == ctx->base.num_insns) {
-        qemu_log("%s: icount already updated: %d\n", __func__, ctx->base.num_insns);
-        return;
-    }
-    target_ulong diff = ctx->base.num_insns - ctx->icount_already_added;
-#else
-    target_ulong diff = 1;
-#endif
-    if (ctx->hflags & MIPS_HFLAG_UM) {
-        tcg_gen_addi_tl(cpu_statcounters_icount_user,
-                        cpu_statcounters_icount_user, diff);
-    } else {
-        tcg_gen_addi_tl(cpu_statcounters_icount_kernel,
-                        cpu_statcounters_icount_kernel, diff);
-    }
-#ifdef NOTYET
-    ctx->icount_already_added = ctx->base.num_insns;
-#endif
-}
-
-static void mips_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
-{
-}
-
-static void mips_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    /* If QEMU was started with -bc option insert a check for breakcount */
-    if (unlikely(cs->breakcount)) {
-        gen_helper_check_breakcount(cpu_env);
-    }
-
-    tcg_gen_insn_start(ctx->base.pc_next, ctx->hflags & MIPS_HFLAG_BMASK,
-                       ctx->btarget);
-
-    /* Update the icount statcounter */
-    // TODO: it would be nice if we could do this at TB end but exceptions
-    // use longjmp to jump out of the tb so it is quite difficult.
-    mips_update_statcounters_icount(ctx); // Increment the current icount value
-}
-
-static bool mips_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                     const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    save_cpu_state(ctx, 1);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    gen_helper_raise_exception_debug(cpu_env);
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be
-     * properly cleared -- thus we increment the PC here so that
-     * the logic setting tb->size below does the right thing.
-     */
-    ctx->base.pc_next += 4;
-    return true;
-}
-
-#ifdef CONFIG_TCG_LOG_INSTR
-static inline void gen_mips_log_instr_unsupported(
-    DisasContext *ctx, const char *what)
-{
-    if (unlikely(ctx->base.log_instr_enabled)) {
-        warn_report("%s instruction tracing is not implemented\n", what);
-        gen_helper_mips_log_instr_drop(cpu_env);
-    }
-}
-
-static inline void gen_mips_log_instr32(DisasContext *ctx)
-{
-    if (unlikely(ctx->base.log_instr_enabled)) {
-        TCGv pc = tcg_const_tl(ctx->base.pc_next);
-        TCGv_i32 opc = tcg_constant_i32(ctx->opcode);
-        gen_helper_mips_log_instr32(cpu_env, pc, opc);
-        tcg_temp_free(pc);
-    }
-}
-#else
-#define gen_mips_log_instr_unsupported(ctx, what) ((void)0)
-#define gen_mips_log_instr32(ctx) ((void)0)
-#endif
-
-static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
-{
-    CPUMIPSState *env = cs->env_ptr;
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-    int insn_bytes;
-    int is_slot;
-    // XXX: we don't support micromips, etc. so we can hardcode 4 bytes as the
-    // instruction size (see assert below).
-    gen_check_pcc_bounds_next_inst(ctx, 4);
-    is_slot = ctx->hflags & MIPS_HFLAG_BMASK;
-    if (ctx->insn_flags & ISA_NANOMIPS32) {
-#ifdef TARGET_CHERI
-        /* NanoMIPS not supported */
-        gen_reserved_instruction(ctx);
-        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
-        return;
-#else
-        gen_mips_log_instr_unsupported(ctx, "nanomips");
-        ctx->opcode = translator_lduw(env, ctx->base.pc_next);
-        insn_bytes = decode_nanomips_opc(env, ctx);
-#endif
-    } else if (!(ctx->hflags & MIPS_HFLAG_M16)) {
-        ctx->opcode = translator_ldl(env, ctx->base.pc_next);
-        insn_bytes = 4;
-        gen_mips_log_instr32(ctx);
-        decode_opc(env, ctx);
-    } else if (ctx->insn_flags & ASE_MICROMIPS) {
-        gen_mips_log_instr_unsupported(ctx, "micromips");
-        ctx->opcode = translator_lduw(env, ctx->base.pc_next);
-        insn_bytes = decode_micromips_opc(env, ctx);
-#ifndef TARGET_CHERI /* MIPS16 not supported */
-    } else if (ctx->insn_flags & ASE_MIPS16) {
-        gen_mips_log_instr_unsupported(ctx, "mips16");
-        ctx->opcode = translator_lduw(env, ctx->base.pc_next);
-        insn_bytes = decode_mips16_opc(env, ctx);
-#endif
-    } else {
-        gen_reserved_instruction(ctx);
-        g_assert(ctx->base.is_jmp == DISAS_NORETURN);
-        return;
-    }
-#ifdef TARGET_CHERI
-    tcg_debug_assert(insn_bytes == 4 && "Compressed insns not supported");
-#endif
-
-    if (ctx->hflags & MIPS_HFLAG_BMASK) {
-        if (!(ctx->hflags & (MIPS_HFLAG_BDS16 | MIPS_HFLAG_BDS32 |
-                             MIPS_HFLAG_FBNSLOT))) {
-            /*
-             * Force to generate branch as there is neither delay nor
-             * forbidden slot.
-             */
-            is_slot = 1;
-        }
-        if ((ctx->hflags & MIPS_HFLAG_M16) &&
-            (ctx->hflags & MIPS_HFLAG_FBNSLOT)) {
-            /*
-             * Force to generate branch as microMIPS R6 doesn't restrict
-             * branches in the forbidden slot.
-             */
-            is_slot = 1;
-        }
-    }
-    if (is_slot) {
-        gen_branch(ctx, insn_bytes);
-    }
-    ctx->base.pc_next += insn_bytes;
-
-    if (ctx->base.is_jmp != DISAS_NEXT) {
-        return;
-    }
-    /*
-     * Execute a branch and its delay slot as a single instruction.
-     * This is what GDB expects and is consistent with what the
-     * hardware does (e.g. if a delay slot instruction faults, the
-     * reported PC is the PC of the branch).
-     */
-    if (ctx->base.singlestep_enabled &&
-        (ctx->hflags & MIPS_HFLAG_BMASK) == 0) {
-        ctx->base.is_jmp = DISAS_TOO_MANY;
-    }
-    if (ctx->base.pc_next - ctx->page_start >= TARGET_PAGE_SIZE) {
-        ctx->base.is_jmp = DISAS_TOO_MANY;
-    }
-}
-
-static void mips_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-    // tcg_debug_assert(ctx->icount_already_added == dcbase->num_insns);
-
-    if (ctx->base.singlestep_enabled && ctx->base.is_jmp != DISAS_NORETURN) {
-        save_cpu_state(ctx, ctx->base.is_jmp != DISAS_EXIT);
-        gen_helper_raise_exception_debug(cpu_env);
-    } else {
-        switch (ctx->base.is_jmp) {
-        case DISAS_STOP:
-            gen_save_pc(ctx->base.pc_next);
-            tcg_gen_lookup_and_goto_ptr();
-            break;
-        case DISAS_NEXT:
-        case DISAS_TOO_MANY:
-            save_cpu_state(ctx, 0);
-            gen_goto_tb(ctx, 0, ctx->base.pc_next);
-            break;
-        case DISAS_EXIT:
-            tcg_gen_exit_tb(NULL, 0);
-            break;
-        case DISAS_NORETURN:
-            break;
-        default:
-            g_assert_not_reached();
-        }
-    }
-}
-
-static void mips_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
-{
-    qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
-    log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
-}
-
-static const TranslatorOps mips_tr_ops = {
-    .init_disas_context = mips_tr_init_disas_context,
-    .tb_start           = mips_tr_tb_start,
-    .insn_start         = mips_tr_insn_start,
-    .breakpoint_check   = mips_tr_breakpoint_check,
-    .translate_insn     = mips_tr_translate_insn,
-    .tb_stop            = mips_tr_tb_stop,
-    .disas_log          = mips_tr_disas_log,
-};
-
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
-{
-    DisasContext ctx;
-
-    translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns);
-}
-
-static void fpu_dump_state(CPUMIPSState *env, FILE * f, int flags)
-{
-    int i;
-    int is_fpu64 = !!(env->hflags & MIPS_HFLAG_F64);
-
-#define printfpr(fp)                                                    \
-    do {                                                                \
-        if (is_fpu64)                                                   \
-            qemu_fprintf(f, "w:%08x d:%016" PRIx64                      \
-                         " fd:%13g fs:%13g psu: %13g\n",                \
-                         (fp)->w[FP_ENDIAN_IDX], (fp)->d,               \
-                         (double)(fp)->fd,                              \
-                         (double)(fp)->fs[FP_ENDIAN_IDX],               \
-                         (double)(fp)->fs[!FP_ENDIAN_IDX]);             \
-        else {                                                          \
-            fpr_t tmp;                                                  \
-            tmp.w[FP_ENDIAN_IDX] = (fp)->w[FP_ENDIAN_IDX];              \
-            tmp.w[!FP_ENDIAN_IDX] = ((fp) + 1)->w[FP_ENDIAN_IDX];       \
-            qemu_fprintf(f, "w:%08x d:%016" PRIx64                      \
-                         " fd:%13g fs:%13g psu:%13g\n",                 \
-                         tmp.w[FP_ENDIAN_IDX], tmp.d,                   \
-                         (double)tmp.fd,                                \
-                         (double)tmp.fs[FP_ENDIAN_IDX],                 \
-                         (double)tmp.fs[!FP_ENDIAN_IDX]);               \
-        }                                                               \
-    } while (0)
-
-
-    qemu_fprintf(f,
-                 "CP1 FCR0 0x%08x  FCR31 0x%08x  SR.FR %d  fp_status 0x%02x\n",
-                 env->active_fpu.fcr0, env->active_fpu.fcr31, is_fpu64,
-                 get_float_exception_flags(&env->active_fpu.fp_status));
-    for (i = 0; i < 32; (is_fpu64) ? i++ : (i += 2)) {
-        qemu_fprintf(f, "%3s: ", mips_fregnames[i]);
-        printfpr(&env->active_fpu.fpr[i]);
-    }
-
-#undef printfpr
-}
-
-void mips_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-    MIPSCPU *cpu = MIPS_CPU(cs);
-    CPUMIPSState *env = &cpu->env;
-    int i;
-
-    qemu_fprintf(f, "pc=0x" TARGET_FMT_lx " HI=0x" TARGET_FMT_lx
-                 " LO=0x" TARGET_FMT_lx " ds %04x "
-                 TARGET_FMT_lx " " TARGET_FMT_ld "\n",
-                 PC_ADDR(env), env->active_tc.HI[0], env->active_tc.LO[0],
-                 env->hflags, env->btarget, env->bcond);
-    for (i = 0; i < 32; i++) {
-        if ((i & 3) == 0) {
-            qemu_fprintf(f, "GPR%02d:", i);
-        }
-        qemu_fprintf(f, " %s " TARGET_FMT_lx,
-                     mips_gp_regnames[i], env->active_tc.gpr[i]);
-        if ((i & 3) == 3) {
-            qemu_fprintf(f, "\n");
-        }
-    }
-
-    qemu_fprintf(f, "CP0 Status  0x%08x Cause   0x%08x EPC    0x"
-                 TARGET_FMT_lx "\n",
-                 env->CP0_Status, env->CP0_Cause, get_CP0_EPC(env));
-    qemu_fprintf(f, "    Config0 0x%08x Config1 0x%08x LLAddr 0x%016"
-                 PRIx64 "\n",
-                 env->CP0_Config0, env->CP0_Config1, env->CP0_LLAddr);
-    qemu_fprintf(f, "    Config2 0x%08x Config3 0x%08x\n",
-                 env->CP0_Config2, env->CP0_Config3);
-    qemu_fprintf(f, "    Config4 0x%08x Config5 0x%08x\n",
-                 env->CP0_Config4, env->CP0_Config5);
-    if ((flags & CPU_DUMP_FPU) && (env->hflags & MIPS_HFLAG_FPU)) {
-        fpu_dump_state(env, f, flags);
-    }
-}
-
-void mips_tcg_init(void)
-{
-    int i;
-
-    cpu_gpr[0] = NULL;
-    for (i = 1; i < 32; i++)
-        cpu_gpr[i] = tcg_global_mem_new(cpu_env,
-                                        offsetof(CPUMIPSState,
-                                                 active_tc.gpr[i]),
-                                        mips_gp_regnames[i]);
-#if defined(TARGET_MIPS64)
-    cpu_gpr_hi[0] = NULL;
-
-    for (unsigned i = 1; i < 32; i++) {
-        g_autofree char *rname = g_strdup_printf("%s[hi]", mips_gp_regnames[i]);
-
-        cpu_gpr_hi[i] = tcg_global_mem_new_i64(cpu_env,
-                                               offsetof(CPUMIPSState,
-                                                        active_tc.gpr_hi[i]),
-                                               rname);
-    }
-#endif /* !TARGET_MIPS64 */
-    for (i = 0; i < 32; i++) {
-        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
-
-        fpu_f64[i] = tcg_global_mem_new_i64(cpu_env, off, mips_fregnames[i]);
-    }
-#ifdef TARGET_CHERI
-    cpu_PC = tcg_global_mem_new(cpu_env,
-                                offsetof(CPUMIPSState, active_tc.PCC._cr_cursor), "PC");
-    /// XXXAR: We currently interpose using DDC.cursor and not DDC.base!
-    ddc_interposition = tcg_global_mem_new(cpu_env,
-                                offsetof(CPUMIPSState, active_tc.CHWR.DDC._cr_cursor), "ddc_interpose");
-#else
-    msa_translate_init();
-    cpu_PC = tcg_global_mem_new(cpu_env,
-                                offsetof(CPUMIPSState, active_tc.PC), "PC");
-#endif
-#ifdef CONFIG_DEBUG_TCG
-    _pc_is_current = tcg_global_mem_new(
-        cpu_env, offsetof(CPUArchState, active_tc._pc_is_current),
-        "_pc_is_current");
-#endif
-    for (i = 0; i < MIPS_DSP_ACC; i++) {
-        cpu_HI[i] = tcg_global_mem_new(cpu_env,
-                                       offsetof(CPUMIPSState, active_tc.HI[i]),
-                                       mips_regnames_HI[i]);
-        cpu_LO[i] = tcg_global_mem_new(cpu_env,
-                                       offsetof(CPUMIPSState, active_tc.LO[i]),
-                                       mips_regnames_LO[i]);
-    }
-    cpu_dspctrl = tcg_global_mem_new(cpu_env,
-                                     offsetof(CPUMIPSState,
-                                              active_tc.DSPControl),
-                                     "DSPControl");
-    bcond = tcg_global_mem_new(cpu_env,
-                               offsetof(CPUMIPSState, bcond), "bcond");
-    btarget = tcg_global_mem_new(cpu_env,
-                                 offsetof(CPUMIPSState, btarget), "btarget");
-    hflags = tcg_global_mem_new_i32(cpu_env,
-                                    offsetof(CPUMIPSState, hflags), "hflags");
-
-    fpu_fcr0 = tcg_global_mem_new_i32(cpu_env,
-                                      offsetof(CPUMIPSState, active_fpu.fcr0),
-                                      "fcr0");
-    fpu_fcr31 = tcg_global_mem_new_i32(cpu_env,
-                                       offsetof(CPUMIPSState, active_fpu.fcr31),
-                                       "fcr31");
-    cpu_lladdr = tcg_global_mem_new(cpu_env, offsetof(CPUMIPSState, lladdr),
-                                    "lladdr");
-    cpu_llval = tcg_global_mem_new(cpu_env, offsetof(CPUMIPSState, llval),
-                                   "llval");
-    cpu_statcounters_icount_kernel = tcg_global_mem_new(
-        cpu_env, offsetof(CPUMIPSState, statcounters_icount_kernel),
-        "statcounters_icount_kernel");
-    cpu_statcounters_icount_user = tcg_global_mem_new(
-        cpu_env, offsetof(CPUMIPSState, statcounters_icount_user),
-        "statcounters_icount_user");
-
-    if (TARGET_LONG_BITS == 32) {
-        mxu_translate_init();
-    }
-}
-
-void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb,
-                          target_ulong *data)
-{
-    mips_update_pc(env, data[0], /*can_be_unrepresentable=*/false);
-    env->hflags &= ~MIPS_HFLAG_BMASK;
-    env->hflags |= data[1];
-    switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
-    case MIPS_HFLAG_BR:
-#ifdef TARGET_CHERI
-    case MIPS_HFLAG_BRCCALL:
-    case MIPS_HFLAG_BRC:
-#endif
-        break;
-    case MIPS_HFLAG_BC:
-    case MIPS_HFLAG_BL:
-    case MIPS_HFLAG_B:
-        env->btarget = data[2];
-        break;
-    }
-}
-
-void gen_cheri_break_loadlink(TCGv_cap_checked_ptr out_addr)
-{
-    tcg_gen_movi_tl(cpu_lladdr, 0);
-}
diff --git a/target/mips/tx79.decode b/target/mips/tx79.decode
deleted file mode 100644
index 0f748b53a64..00000000000
--- a/target/mips/tx79.decode
+++ /dev/null
@@ -1,39 +0,0 @@
-# Toshiba C790's instruction set
-#
-# Copyright (C) 2021  Philippe Mathieu-Daudé
-#
-# SPDX-License-Identifier: LGPL-2.1-or-later
-#
-# Toshiba Appendix B  C790-Specific Instruction Set Details
-
-###########################################################################
-# Named attribute sets.  These are used to make nice(er) names
-# when creating helpers common to those for the individual
-# instruction patterns.
-
-&rtype           rs rt rd sa
-
-###########################################################################
-# Named instruction formats.  These are generally used to
-# reduce the amount of duplication between instruction patterns.
-
-@rs_rt_rd       ...... rs:5  rt:5  rd:5  ..... ......   &rtype sa=0
-@rt_rd          ...... ..... rt:5  rd:5  ..... ......   &rtype rs=0 sa=0
-@rs             ...... rs:5  ..... ..........  ......   &rtype rt=0 rd=0 sa=0
-@rd             ...... ..........  rd:5  ..... ......   &rtype rs=0 rt=0 sa=0
-
-###########################################################################
-
-MFHI1           011100 0000000000  ..... 00000 010000   @rd
-MTHI1           011100 .....  0000000000 00000 010001   @rs
-MFLO1           011100 0000000000  ..... 00000 010010   @rd
-MTLO1           011100 .....  0000000000 00000 010011   @rs
-
-# MMI2
-
-PCPYLD          011100 ..... ..... ..... 01110 001001   @rs_rt_rd
-
-# MMI3
-
-PCPYUD          011100 ..... ..... ..... 01110 101001   @rs_rt_rd
-PCPYH           011100 00000 ..... ..... 11011 101001   @rt_rd
diff --git a/target/mips/tx79_translate.c b/target/mips/tx79_translate.c
deleted file mode 100644
index 44fcb41d5dd..00000000000
--- a/target/mips/tx79_translate.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Toshiba TX79-specific instructions translation routines
- *
- *  Copyright (c) 2018 Fredrik Noring
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include "qemu/osdep.h"
-#include "tcg/tcg-op.h"
-#include "exec/helper-gen.h"
-#include "translate.h"
-
-/* Include the auto-generated decoder.  */
-#include "decode-tx79.c.inc"
-
-/*
- *     Overview of the TX79-specific instruction set
- *     =============================================
- *
- * The R5900 and the C790 have 128-bit wide GPRs, where the upper 64 bits
- * are only used by the specific quadword (128-bit) LQ/SQ load/store
- * instructions and certain multimedia instructions (MMIs). These MMIs
- * configure the 128-bit data path as two 64-bit, four 32-bit, eight 16-bit
- * or sixteen 8-bit paths.
- *
- * Reference:
- *
- * The Toshiba TX System RISC TX79 Core Architecture manual,
- * https://wiki.qemu.org/File:C790.pdf
- */
-
-bool decode_ext_tx79(DisasContext *ctx, uint32_t insn)
-{
-    if (TARGET_LONG_BITS == 64 && decode_tx79(ctx, insn)) {
-        return true;
-    }
-    return false;
-}
-
-/*
- *     Three-Operand Multiply and Multiply-Add (4 instructions)
- *     --------------------------------------------------------
- * MADD    [rd,] rs, rt      Multiply/Add
- * MADDU   [rd,] rs, rt      Multiply/Add Unsigned
- * MULT    [rd,] rs, rt      Multiply (3-operand)
- * MULTU   [rd,] rs, rt      Multiply Unsigned (3-operand)
- */
-
-/*
- *     Multiply Instructions for Pipeline 1 (10 instructions)
- *     ------------------------------------------------------
- * MULT1   [rd,] rs, rt      Multiply Pipeline 1
- * MULTU1  [rd,] rs, rt      Multiply Unsigned Pipeline 1
- * DIV1    rs, rt            Divide Pipeline 1
- * DIVU1   rs, rt            Divide Unsigned Pipeline 1
- * MADD1   [rd,] rs, rt      Multiply-Add Pipeline 1
- * MADDU1  [rd,] rs, rt      Multiply-Add Unsigned Pipeline 1
- * MFHI1   rd                Move From HI1 Register
- * MFLO1   rd                Move From LO1 Register
- * MTHI1   rs                Move To HI1 Register
- * MTLO1   rs                Move To LO1 Register
- */
-
-static bool trans_MFHI1(DisasContext *ctx, arg_rtype *a)
-{
-    gen_store_gpr(cpu_HI[1], a->rd);
-    gen_log_instr_gpr_update(ctx, a->rd);
-
-    return true;
-}
-
-static bool trans_MFLO1(DisasContext *ctx, arg_rtype *a)
-{
-    gen_store_gpr(cpu_LO[1], a->rd);
-    gen_log_instr_gpr_update(ctx, a->rd);
-
-    return true;
-}
-
-static bool trans_MTHI1(DisasContext *ctx, arg_rtype *a)
-{
-    gen_load_gpr(cpu_HI[1], a->rs);
-
-    return true;
-}
-
-static bool trans_MTLO1(DisasContext *ctx, arg_rtype *a)
-{
-    gen_load_gpr(cpu_LO[1], a->rs);
-
-    return true;
-}
-
-/*
- *     Arithmetic (19 instructions)
- *     ----------------------------
- * PADDB   rd, rs, rt        Parallel Add Byte
- * PSUBB   rd, rs, rt        Parallel Subtract Byte
- * PADDH   rd, rs, rt        Parallel Add Halfword
- * PSUBH   rd, rs, rt        Parallel Subtract Halfword
- * PADDW   rd, rs, rt        Parallel Add Word
- * PSUBW   rd, rs, rt        Parallel Subtract Word
- * PADSBH  rd, rs, rt        Parallel Add/Subtract Halfword
- * PADDSB  rd, rs, rt        Parallel Add with Signed Saturation Byte
- * PSUBSB  rd, rs, rt        Parallel Subtract with Signed Saturation Byte
- * PADDSH  rd, rs, rt        Parallel Add with Signed Saturation Halfword
- * PSUBSH  rd, rs, rt        Parallel Subtract with Signed Saturation Halfword
- * PADDSW  rd, rs, rt        Parallel Add with Signed Saturation Word
- * PSUBSW  rd, rs, rt        Parallel Subtract with Signed Saturation Word
- * PADDUB  rd, rs, rt        Parallel Add with Unsigned saturation Byte
- * PSUBUB  rd, rs, rt        Parallel Subtract with Unsigned saturation Byte
- * PADDUH  rd, rs, rt        Parallel Add with Unsigned saturation Halfword
- * PSUBUH  rd, rs, rt        Parallel Subtract with Unsigned saturation Halfword
- * PADDUW  rd, rs, rt        Parallel Add with Unsigned saturation Word
- * PSUBUW  rd, rs, rt        Parallel Subtract with Unsigned saturation Word
- */
-
-/*
- *     Min/Max (4 instructions)
- *     ------------------------
- * PMAXH   rd, rs, rt        Parallel Maximum Halfword
- * PMINH   rd, rs, rt        Parallel Minimum Halfword
- * PMAXW   rd, rs, rt        Parallel Maximum Word
- * PMINW   rd, rs, rt        Parallel Minimum Word
- */
-
-/*
- *     Absolute (2 instructions)
- *     -------------------------
- * PABSH   rd, rt            Parallel Absolute Halfword
- * PABSW   rd, rt            Parallel Absolute Word
- */
-
-/*
- *     Logical (4 instructions)
- *     ------------------------
- * PAND    rd, rs, rt        Parallel AND
- * POR     rd, rs, rt        Parallel OR
- * PXOR    rd, rs, rt        Parallel XOR
- * PNOR    rd, rs, rt        Parallel NOR
- */
-
-/*
- *     Shift (9 instructions)
- *     ----------------------
- * PSLLH   rd, rt, sa        Parallel Shift Left Logical Halfword
- * PSRLH   rd, rt, sa        Parallel Shift Right Logical Halfword
- * PSRAH   rd, rt, sa        Parallel Shift Right Arithmetic Halfword
- * PSLLW   rd, rt, sa        Parallel Shift Left Logical Word
- * PSRLW   rd, rt, sa        Parallel Shift Right Logical Word
- * PSRAW   rd, rt, sa        Parallel Shift Right Arithmetic Word
- * PSLLVW  rd, rt, rs        Parallel Shift Left Logical Variable Word
- * PSRLVW  rd, rt, rs        Parallel Shift Right Logical Variable Word
- * PSRAVW  rd, rt, rs        Parallel Shift Right Arithmetic Variable Word
- */
-
-/*
- *     Compare (6 instructions)
- *     ------------------------
- * PCGTB   rd, rs, rt        Parallel Compare for Greater Than Byte
- * PCEQB   rd, rs, rt        Parallel Compare for Equal Byte
- * PCGTH   rd, rs, rt        Parallel Compare for Greater Than Halfword
- * PCEQH   rd, rs, rt        Parallel Compare for Equal Halfword
- * PCGTW   rd, rs, rt        Parallel Compare for Greater Than Word
- * PCEQW   rd, rs, rt        Parallel Compare for Equal Word
- */
-
-/*
- *     LZC (1 instruction)
- *     -------------------
- * PLZCW   rd, rs            Parallel Leading Zero or One Count Word
- */
-
-/*
- *     Quadword Load and Store (2 instructions)
- *     ----------------------------------------
- * LQ      rt, offset(base)  Load Quadword
- * SQ      rt, offset(base)  Store Quadword
- */
-
-/*
- *     Multiply and Divide (19 instructions)
- *     -------------------------------------
- * PMULTW  rd, rs, rt        Parallel Multiply Word
- * PMULTUW rd, rs, rt        Parallel Multiply Unsigned Word
- * PDIVW   rs, rt            Parallel Divide Word
- * PDIVUW  rs, rt            Parallel Divide Unsigned Word
- * PMADDW  rd, rs, rt        Parallel Multiply-Add Word
- * PMADDUW rd, rs, rt        Parallel Multiply-Add Unsigned Word
- * PMSUBW  rd, rs, rt        Parallel Multiply-Subtract Word
- * PMULTH  rd, rs, rt        Parallel Multiply Halfword
- * PMADDH  rd, rs, rt        Parallel Multiply-Add Halfword
- * PMSUBH  rd, rs, rt        Parallel Multiply-Subtract Halfword
- * PHMADH  rd, rs, rt        Parallel Horizontal Multiply-Add Halfword
- * PHMSBH  rd, rs, rt        Parallel Horizontal Multiply-Subtract Halfword
- * PDIVBW  rs, rt            Parallel Divide Broadcast Word
- * PMFHI   rd                Parallel Move From HI Register
- * PMFLO   rd                Parallel Move From LO Register
- * PMTHI   rs                Parallel Move To HI Register
- * PMTLO   rs                Parallel Move To LO Register
- * PMFHL   rd                Parallel Move From HI/LO Register
- * PMTHL   rs                Parallel Move To HI/LO Register
- */
-
-/*
- *     Pack/Extend (11 instructions)
- *     -----------------------------
- * PPAC5   rd, rt            Parallel Pack to 5 bits
- * PPACB   rd, rs, rt        Parallel Pack to Byte
- * PPACH   rd, rs, rt        Parallel Pack to Halfword
- * PPACW   rd, rs, rt        Parallel Pack to Word
- * PEXT5   rd, rt            Parallel Extend Upper from 5 bits
- * PEXTUB  rd, rs, rt        Parallel Extend Upper from Byte
- * PEXTLB  rd, rs, rt        Parallel Extend Lower from Byte
- * PEXTUH  rd, rs, rt        Parallel Extend Upper from Halfword
- * PEXTLH  rd, rs, rt        Parallel Extend Lower from Halfword
- * PEXTUW  rd, rs, rt        Parallel Extend Upper from Word
- * PEXTLW  rd, rs, rt        Parallel Extend Lower from Word
- */
-
-/*
- *     Others (16 instructions)
- *     ------------------------
- * PCPYH   rd, rt            Parallel Copy Halfword
- * PCPYLD  rd, rs, rt        Parallel Copy Lower Doubleword
- * PCPYUD  rd, rs, rt        Parallel Copy Upper Doubleword
- * PREVH   rd, rt            Parallel Reverse Halfword
- * PINTH   rd, rs, rt        Parallel Interleave Halfword
- * PINTEH  rd, rs, rt        Parallel Interleave Even Halfword
- * PEXEH   rd, rt            Parallel Exchange Even Halfword
- * PEXCH   rd, rt            Parallel Exchange Center Halfword
- * PEXEW   rd, rt            Parallel Exchange Even Word
- * PEXCW   rd, rt            Parallel Exchange Center Word
- * QFSRV   rd, rs, rt        Quadword Funnel Shift Right Variable
- * MFSA    rd                Move from Shift Amount Register
- * MTSA    rs                Move to Shift Amount Register
- * MTSAB   rs, immediate     Move Byte Count to Shift Amount Register
- * MTSAH   rs, immediate     Move Halfword Count to Shift Amount Register
- * PROT3W  rd, rt            Parallel Rotate 3 Words
- */
-
-/* Parallel Copy Halfword */
-static bool trans_PCPYH(DisasContext *s, arg_rtype *a)
-{
-    if (a->rd == 0) {
-        /* nop */
-        return true;
-    }
-
-    if (a->rt == 0) {
-        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
-        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
-        return true;
-    }
-
-    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rt], cpu_gpr[a->rt], 16, 16);
-    tcg_gen_deposit_i64(cpu_gpr[a->rd], cpu_gpr[a->rd], cpu_gpr[a->rd], 32, 32);
-    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt], cpu_gpr_hi[a->rt], 16, 16);
-    tcg_gen_deposit_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rd], 32, 32);
-
-    return true;
-}
-
-/* Parallel Copy Lower Doubleword */
-static bool trans_PCPYLD(DisasContext *s, arg_rtype *a)
-{
-    if (a->rd == 0) {
-        /* nop */
-        return true;
-    }
-
-    if (a->rs == 0) {
-        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
-    } else {
-        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr[a->rs]);
-    }
-
-    if (a->rt == 0) {
-        tcg_gen_movi_i64(cpu_gpr[a->rd], 0);
-    } else if (a->rd != a->rt) {
-        tcg_gen_mov_i64(cpu_gpr[a->rd], cpu_gpr[a->rt]);
-    }
-
-    return true;
-}
-
-/* Parallel Copy Upper Doubleword */
-static bool trans_PCPYUD(DisasContext *s, arg_rtype *a)
-{
-    if (a->rd == 0) {
-        /* nop */
-        return true;
-    }
-
-    gen_load_gpr_hi(cpu_gpr[a->rd], a->rs);
-
-    if (a->rt == 0) {
-        tcg_gen_movi_i64(cpu_gpr_hi[a->rd], 0);
-    } else if (a->rd != a->rt) {
-        tcg_gen_mov_i64(cpu_gpr_hi[a->rd], cpu_gpr_hi[a->rt]);
-    }
-
-    return true;
-}
diff --git a/target/moxie/cpu-param.h b/target/moxie/cpu-param.h
deleted file mode 100644
index 9a40ef525cf..00000000000
--- a/target/moxie/cpu-param.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Moxie cpu parameters for qemu.
- *
- * Copyright (c) 2008, 2010, 2013 Anthony Green
- * SPDX-License-Identifier: LGPL-2.1+
- */
-
-#ifndef MOXIE_CPU_PARAM_H
-#define MOXIE_CPU_PARAM_H 1
-
-#define TARGET_LONG_BITS 32
-#define TARGET_PAGE_BITS 12     /* 4k */
-#define TARGET_PHYS_ADDR_SPACE_BITS 32
-#define TARGET_VIRT_ADDR_SPACE_BITS 32
-#define NB_MMU_MODES 1
-
-#endif
diff --git a/target/moxie/cpu.c b/target/moxie/cpu.c
deleted file mode 100644
index 83bec34d36c..00000000000
--- a/target/moxie/cpu.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * QEMU Moxie CPU
- *
- * Copyright (c) 2013 Anthony Green
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "cpu.h"
-#include "migration/vmstate.h"
-#include "machine.h"
-
-static void moxie_cpu_set_pc(CPUState *cs, vaddr value)
-{
-    MoxieCPU *cpu = MOXIE_CPU(cs);
-
-    cpu->env.pc = value;
-}
-
-static bool moxie_cpu_has_work(CPUState *cs)
-{
-    return cs->interrupt_request & CPU_INTERRUPT_HARD;
-}
-
-static void moxie_cpu_reset(DeviceState *dev)
-{
-    CPUState *s = CPU(dev);
-    MoxieCPU *cpu = MOXIE_CPU(s);
-    MoxieCPUClass *mcc = MOXIE_CPU_GET_CLASS(cpu);
-    CPUMoxieState *env = &cpu->env;
-
-    mcc->parent_reset(dev);
-
-    memset(env, 0, offsetof(CPUMoxieState, end_reset_fields));
-    env->pc = 0x1000;
-}
-
-static void moxie_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
-{
-    info->mach = bfd_arch_moxie;
-    info->print_insn = print_insn_moxie;
-}
-
-static void moxie_cpu_realizefn(DeviceState *dev, Error **errp)
-{
-    CPUState *cs = CPU(dev);
-    MoxieCPUClass *mcc = MOXIE_CPU_GET_CLASS(dev);
-    Error *local_err = NULL;
-
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    qemu_init_vcpu(cs);
-    cpu_reset(cs);
-
-    mcc->parent_realize(dev, errp);
-}
-
-static void moxie_cpu_initfn(Object *obj)
-{
-    MoxieCPU *cpu = MOXIE_CPU(obj);
-
-    cpu_set_cpustate_pointers(cpu);
-}
-
-static ObjectClass *moxie_cpu_class_by_name(const char *cpu_model)
-{
-    ObjectClass *oc;
-    char *typename;
-
-    typename = g_strdup_printf(MOXIE_CPU_TYPE_NAME("%s"), cpu_model);
-    oc = object_class_by_name(typename);
-    g_free(typename);
-    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_MOXIE_CPU) ||
-                       object_class_is_abstract(oc))) {
-        return NULL;
-    }
-    return oc;
-}
-
-#include "hw/core/tcg-cpu-ops.h"
-
-static struct TCGCPUOps moxie_tcg_ops = {
-    .initialize = moxie_translate_init,
-    .tlb_fill = moxie_cpu_tlb_fill,
-
-#ifndef CONFIG_USER_ONLY
-    .do_interrupt = moxie_cpu_do_interrupt,
-#endif /* !CONFIG_USER_ONLY */
-};
-
-static void moxie_cpu_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-    MoxieCPUClass *mcc = MOXIE_CPU_CLASS(oc);
-
-    device_class_set_parent_realize(dc, moxie_cpu_realizefn,
-                                    &mcc->parent_realize);
-    device_class_set_parent_reset(dc, moxie_cpu_reset, &mcc->parent_reset);
-
-    cc->class_by_name = moxie_cpu_class_by_name;
-
-    cc->has_work = moxie_cpu_has_work;
-    cc->dump_state = moxie_cpu_dump_state;
-    cc->set_pc = moxie_cpu_set_pc;
-#ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = moxie_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_moxie_cpu;
-#endif
-    cc->disas_set_info = moxie_cpu_disas_set_info;
-    cc->tcg_ops = &moxie_tcg_ops;
-}
-
-static void moxielite_initfn(Object *obj)
-{
-    /* Set cpu feature flags */
-}
-
-static void moxie_any_initfn(Object *obj)
-{
-    /* Set cpu feature flags */
-}
-
-#define DEFINE_MOXIE_CPU_TYPE(cpu_model, initfn) \
-    {                                            \
-        .parent = TYPE_MOXIE_CPU,                \
-        .instance_init = initfn,                 \
-        .name = MOXIE_CPU_TYPE_NAME(cpu_model),  \
-    }
-
-static const TypeInfo moxie_cpus_type_infos[] = {
-    { /* base class should be registered first */
-        .name = TYPE_MOXIE_CPU,
-        .parent = TYPE_CPU,
-        .instance_size = sizeof(MoxieCPU),
-        .instance_init = moxie_cpu_initfn,
-        .class_size = sizeof(MoxieCPUClass),
-        .class_init = moxie_cpu_class_init,
-    },
-    DEFINE_MOXIE_CPU_TYPE("MoxieLite", moxielite_initfn),
-    DEFINE_MOXIE_CPU_TYPE("any", moxie_any_initfn),
-};
-
-DEFINE_TYPES(moxie_cpus_type_infos)
diff --git a/target/moxie/cpu.h b/target/moxie/cpu.h
deleted file mode 100644
index bd6ab66084d..00000000000
--- a/target/moxie/cpu.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- *  Moxie emulation
- *
- *  Copyright (c) 2008, 2010, 2013 Anthony Green
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef MOXIE_CPU_H
-#define MOXIE_CPU_H
-
-#include "exec/cpu-defs.h"
-#include "qom/object.h"
-
-#define MOXIE_EX_DIV0        0
-#define MOXIE_EX_BAD         1
-#define MOXIE_EX_IRQ         2
-#define MOXIE_EX_SWI         3
-#define MOXIE_EX_MMU_MISS    4
-#define MOXIE_EX_BREAK      16
-
-typedef struct CPUMoxieState {
-
-    uint32_t flags;               /* general execution flags */
-    uint32_t gregs[16];           /* general registers */
-    uint32_t sregs[256];          /* special registers */
-    uint32_t pc;                  /* program counter */
-    /* Instead of saving the cc value, we save the cmp arguments
-       and compute cc on demand.  */
-    uint32_t cc_a;                /* reg a for condition code calculation */
-    uint32_t cc_b;                /* reg b for condition code calculation */
-
-    void *irq[8];
-
-    /* Fields up to this point are cleared by a CPU reset */
-    struct {} end_reset_fields;
-} CPUMoxieState;
-
-#include "hw/core/cpu.h"
-
-#define TYPE_MOXIE_CPU "moxie-cpu"
-
-OBJECT_DECLARE_TYPE(MoxieCPU, MoxieCPUClass,
-                    MOXIE_CPU)
-
-/**
- * MoxieCPUClass:
- * @parent_reset: The parent class' reset handler.
- *
- * A Moxie CPU model.
- */
-struct MoxieCPUClass {
-    /*< private >*/
-    CPUClass parent_class;
-    /*< public >*/
-
-    DeviceRealize parent_realize;
-    DeviceReset parent_reset;
-};
-
-/**
- * MoxieCPU:
- * @env: #CPUMoxieState
- *
- * A Moxie CPU.
- */
-struct MoxieCPU {
-    /*< private >*/
-    CPUState parent_obj;
-    /*< public >*/
-
-    CPUNegativeOffsetState neg;
-    CPUMoxieState env;
-};
-
-
-void moxie_cpu_do_interrupt(CPUState *cs);
-void moxie_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
-hwaddr moxie_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-void moxie_translate_init(void);
-int cpu_moxie_signal_handler(int host_signum, void *pinfo,
-                             void *puc);
-
-#define MOXIE_CPU_TYPE_SUFFIX "-" TYPE_MOXIE_CPU
-#define MOXIE_CPU_TYPE_NAME(model) model MOXIE_CPU_TYPE_SUFFIX
-#define CPU_RESOLVING_TYPE TYPE_MOXIE_CPU
-
-#define cpu_signal_handler cpu_moxie_signal_handler
-
-static inline int cpu_mmu_index(CPUMoxieState *env, bool ifetch)
-{
-    return 0;
-}
-
-typedef CPUMoxieState CPUArchState;
-typedef MoxieCPU ArchCPU;
-
-#include "exec/cpu-all.h"
-
-static inline void cpu_get_tb_cpu_state(CPUMoxieState *env, target_ulong *pc,
-                                        target_ulong *cs_base, uint32_t *flags)
-{
-    *pc = env->pc;
-    *cs_base = 0;
-    *flags = 0;
-}
-
-bool moxie_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr);
-
-#endif /* MOXIE_CPU_H */
diff --git a/target/moxie/helper.c b/target/moxie/helper.c
deleted file mode 100644
index b1919f62b34..00000000000
--- a/target/moxie/helper.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- *  Moxie helper routines.
- *
- *  Copyright (c) 2008, 2009, 2010, 2013 Anthony Green
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "mmu.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "qemu/host-utils.h"
-#include "exec/helper-proto.h"
-
-void helper_raise_exception(CPUMoxieState *env, int ex)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = ex;
-    /* Stash the exception type.  */
-    env->sregs[2] = ex;
-    /* Stash the address where the exception occurred.  */
-    cpu_restore_state(cs, GETPC(), true);
-    env->sregs[5] = env->pc;
-    /* Jump to the exception handline routine.  */
-    env->pc = env->sregs[1];
-    cpu_loop_exit(cs);
-}
-
-uint32_t helper_div(CPUMoxieState *env, uint32_t a, uint32_t b)
-{
-    if (unlikely(b == 0)) {
-        helper_raise_exception(env, MOXIE_EX_DIV0);
-        return 0;
-    }
-    if (unlikely(a == INT_MIN && b == -1)) {
-        return INT_MIN;
-    }
-
-    return (int32_t)a / (int32_t)b;
-}
-
-uint32_t helper_udiv(CPUMoxieState *env, uint32_t a, uint32_t b)
-{
-    if (unlikely(b == 0)) {
-        helper_raise_exception(env, MOXIE_EX_DIV0);
-        return 0;
-    }
-    return a / b;
-}
-
-void helper_debug(CPUMoxieState *env)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(cs);
-}
-
-bool moxie_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr)
-{
-    MoxieCPU *cpu = MOXIE_CPU(cs);
-    CPUMoxieState *env = &cpu->env;
-    MoxieMMUResult res;
-    int prot, miss;
-
-    address &= TARGET_PAGE_MASK;
-    prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-    miss = moxie_mmu_translate(&res, env, address, access_type, mmu_idx);
-    if (likely(!miss)) {
-        tlb_set_page(cs, address, res.phy, prot, mmu_idx, TARGET_PAGE_SIZE);
-        return true;
-    }
-    if (probe) {
-        return false;
-    }
-
-    cs->exception_index = MOXIE_EX_MMU_MISS;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-void moxie_cpu_do_interrupt(CPUState *cs)
-{
-    switch (cs->exception_index) {
-    case MOXIE_EX_BREAK:
-        break;
-    default:
-        break;
-    }
-}
-
-hwaddr moxie_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
-    MoxieCPU *cpu = MOXIE_CPU(cs);
-    uint32_t phy = addr;
-    MoxieMMUResult res;
-    int miss;
-
-    miss = moxie_mmu_translate(&res, &cpu->env, addr, 0, 0);
-    if (!miss) {
-        phy = res.phy;
-    }
-    return phy;
-}
diff --git a/target/moxie/helper.h b/target/moxie/helper.h
deleted file mode 100644
index d94ef7a17eb..00000000000
--- a/target/moxie/helper.h
+++ /dev/null
@@ -1,5 +0,0 @@
-DEF_HELPER_2(raise_exception, void, env, int)
-DEF_HELPER_1(debug, void, env)
-
-DEF_HELPER_FLAGS_3(div, TCG_CALL_NO_WG, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_WG, i32, env, i32, i32)
diff --git a/target/moxie/machine.c b/target/moxie/machine.c
deleted file mode 100644
index d0f177048c4..00000000000
--- a/target/moxie/machine.c
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "machine.h"
-#include "migration/cpu.h"
-
-const VMStateDescription vmstate_moxie_cpu = {
-    .name = "cpu",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .fields = (VMStateField[]) {
-        VMSTATE_UINT32(flags, CPUMoxieState),
-        VMSTATE_UINT32_ARRAY(gregs, CPUMoxieState, 16),
-        VMSTATE_UINT32_ARRAY(sregs, CPUMoxieState, 256),
-        VMSTATE_UINT32(pc, CPUMoxieState),
-        VMSTATE_UINT32(cc_a, CPUMoxieState),
-        VMSTATE_UINT32(cc_b, CPUMoxieState),
-        VMSTATE_END_OF_LIST()
-    }
-};
diff --git a/target/moxie/machine.h b/target/moxie/machine.h
deleted file mode 100644
index a1b72907ae6..00000000000
--- a/target/moxie/machine.h
+++ /dev/null
@@ -1 +0,0 @@
-extern const VMStateDescription vmstate_moxie_cpu;
diff --git a/target/moxie/meson.build b/target/moxie/meson.build
deleted file mode 100644
index b4beb528cca..00000000000
--- a/target/moxie/meson.build
+++ /dev/null
@@ -1,14 +0,0 @@
-moxie_ss = ss.source_set()
-moxie_ss.add(files(
-  'cpu.c',
-  'helper.c',
-  'machine.c',
-  'machine.c',
-  'translate.c',
-))
-
-moxie_softmmu_ss = ss.source_set()
-moxie_softmmu_ss.add(files('mmu.c'))
-
-target_arch += {'moxie': moxie_ss}
-target_softmmu_arch += {'moxie': moxie_softmmu_ss}
diff --git a/target/moxie/mmu.c b/target/moxie/mmu.c
deleted file mode 100644
index 87783a36f82..00000000000
--- a/target/moxie/mmu.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Moxie mmu emulation.
- *
- *  Copyright (c) 2008, 2013 Anthony Green
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "mmu.h"
-
-int moxie_mmu_translate(MoxieMMUResult *res,
-                       CPUMoxieState *env, uint32_t vaddr,
-                       int rw, int mmu_idx)
-{
-    /* Perform no translation yet.  */
-    res->phy = vaddr;
-    return 0;
-}
diff --git a/target/moxie/mmu.h b/target/moxie/mmu.h
deleted file mode 100644
index d80690f4d2f..00000000000
--- a/target/moxie/mmu.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef TARGET_MOXIE_MMU_H
-#define TARGET_MOXIE_MMU_H
-
-#define MOXIE_MMU_ERR_EXEC  0
-#define MOXIE_MMU_ERR_READ  1
-#define MOXIE_MMU_ERR_WRITE 2
-#define MOXIE_MMU_ERR_FLUSH 3
-
-typedef struct {
-    uint32_t phy;
-    uint32_t pfn;
-    int cause_op;
-} MoxieMMUResult;
-
-int moxie_mmu_translate(MoxieMMUResult *res,
-                        CPUMoxieState *env, uint32_t vaddr,
-                        int rw, int mmu_idx);
-
-#endif
diff --git a/target/moxie/translate.c b/target/moxie/translate.c
deleted file mode 100644
index 24a742b25e6..00000000000
--- a/target/moxie/translate.c
+++ /dev/null
@@ -1,892 +0,0 @@
-/*
- *  Moxie emulation for qemu: main translation routines.
- *
- *  Copyright (c) 2009, 2013 Anthony Green
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* For information on the Moxie architecture, see
- *    http://moxielogic.org/wiki
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "disas/disas.h"
-#include "tcg/tcg-op.h"
-#include "exec/cpu_ldst.h"
-#include "qemu/qemu-print.h"
-
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "exec/log.h"
-
-/* This is the state at translation time.  */
-typedef struct DisasContext {
-    TranslationBlock *tb;
-    target_ulong pc, saved_pc;
-    uint32_t opcode;
-    uint32_t fp_status;
-    /* Routine used to access memory */
-    int memidx;
-    int bstate;
-    target_ulong btarget;
-    int singlestep_enabled;
-} DisasContext;
-
-enum {
-    BS_NONE     = 0, /* We go out of the TB without reaching a branch or an
-                      * exception condition */
-    BS_STOP     = 1, /* We want to stop translation for any reason */
-    BS_BRANCH   = 2, /* We reached a branch condition     */
-    BS_EXCP     = 3, /* We reached an exception condition */
-};
-
-static TCGv cpu_pc;
-static TCGv cpu_gregs[16];
-static TCGv cc_a, cc_b;
-
-#include "exec/gen-icount.h"
-
-#define REG(x) (cpu_gregs[x])
-
-/* Extract the signed 10-bit offset from a 16-bit branch
-   instruction.  */
-static int extract_branch_offset(int opcode)
-{
-  return (((signed short)((opcode & ((1 << 10) - 1)) << 6)) >> 6) << 1;
-}
-
-void moxie_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-    MoxieCPU *cpu = MOXIE_CPU(cs);
-    CPUMoxieState *env = &cpu->env;
-    int i;
-    qemu_fprintf(f, "pc=0x%08x\n", env->pc);
-    qemu_fprintf(f, "$fp=0x%08x $sp=0x%08x $r0=0x%08x $r1=0x%08x\n",
-                 env->gregs[0], env->gregs[1], env->gregs[2], env->gregs[3]);
-    for (i = 4; i < 16; i += 4) {
-        qemu_fprintf(f, "$r%d=0x%08x $r%d=0x%08x $r%d=0x%08x $r%d=0x%08x\n",
-                     i - 2, env->gregs[i], i - 1, env->gregs[i + 1],
-                     i, env->gregs[i + 2], i + 1, env->gregs[i + 3]);
-    }
-    for (i = 4; i < 16; i += 4) {
-        qemu_fprintf(f, "sr%d=0x%08x sr%d=0x%08x sr%d=0x%08x sr%d=0x%08x\n",
-                     i - 2, env->sregs[i], i - 1, env->sregs[i + 1],
-                     i, env->sregs[i + 2], i + 1, env->sregs[i + 3]);
-    }
-}
-
-void moxie_translate_init(void)
-{
-    int i;
-    static const char * const gregnames[16] = {
-        "$fp", "$sp", "$r0", "$r1",
-        "$r2", "$r3", "$r4", "$r5",
-        "$r6", "$r7", "$r8", "$r9",
-        "$r10", "$r11", "$r12", "$r13"
-    };
-
-    cpu_pc = tcg_global_mem_new_i32(cpu_env,
-                                    offsetof(CPUMoxieState, pc), "$pc");
-    for (i = 0; i < 16; i++)
-        cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env,
-                                              offsetof(CPUMoxieState, gregs[i]),
-                                              gregnames[i]);
-
-    cc_a = tcg_global_mem_new_i32(cpu_env,
-                                  offsetof(CPUMoxieState, cc_a), "cc_a");
-    cc_b = tcg_global_mem_new_i32(cpu_env,
-                                  offsetof(CPUMoxieState, cc_b), "cc_b");
-}
-
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
-{
-    if (unlikely(ctx->singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static inline void gen_goto_tb(CPUMoxieState *env, DisasContext *ctx,
-                               int n, target_ulong dest)
-{
-    if (use_goto_tb(ctx, dest)) {
-        tcg_gen_goto_tb(n);
-        tcg_gen_movi_i32(cpu_pc, dest);
-        tcg_gen_exit_tb(ctx->tb, n);
-    } else {
-        tcg_gen_movi_i32(cpu_pc, dest);
-        if (ctx->singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        }
-        tcg_gen_exit_tb(NULL, 0);
-    }
-}
-
-static int decode_opc(MoxieCPU *cpu, DisasContext *ctx)
-{
-    CPUMoxieState *env = &cpu->env;
-
-    /* Local cache for the instruction opcode.  */
-    int opcode;
-    /* Set the default instruction length.  */
-    int length = 2;
-
-    /* Examine the 16-bit opcode.  */
-    opcode = ctx->opcode;
-
-    /* Decode instruction.  */
-    if (opcode & (1 << 15)) {
-        if (opcode & (1 << 14)) {
-            /* This is a Form 3 instruction.  */
-            int inst = (opcode >> 10 & 0xf);
-
-#define BRANCH(cond)                                                         \
-    do {                                                                     \
-        TCGLabel *l1 = gen_new_label();                                      \
-        tcg_gen_brcond_i32(cond, cc_a, cc_b, l1);                            \
-        gen_goto_tb(env, ctx, 1, ctx->pc+2);                                 \
-        gen_set_label(l1);                                                   \
-        gen_goto_tb(env, ctx, 0, extract_branch_offset(opcode) + ctx->pc+2); \
-        ctx->bstate = BS_BRANCH;                                             \
-    } while (0)
-
-            switch (inst) {
-            case 0x00: /* beq */
-                BRANCH(TCG_COND_EQ);
-                break;
-            case 0x01: /* bne */
-                BRANCH(TCG_COND_NE);
-                break;
-            case 0x02: /* blt */
-                BRANCH(TCG_COND_LT);
-                break;
-            case 0x03: /* bgt */
-                BRANCH(TCG_COND_GT);
-                break;
-            case 0x04: /* bltu */
-                BRANCH(TCG_COND_LTU);
-                break;
-            case 0x05: /* bgtu */
-                BRANCH(TCG_COND_GTU);
-                break;
-            case 0x06: /* bge */
-                BRANCH(TCG_COND_GE);
-                break;
-            case 0x07: /* ble */
-                BRANCH(TCG_COND_LE);
-                break;
-            case 0x08: /* bgeu */
-                BRANCH(TCG_COND_GEU);
-                break;
-            case 0x09: /* bleu */
-                BRANCH(TCG_COND_LEU);
-                break;
-            default:
-                {
-                    TCGv temp = tcg_temp_new_i32();
-                    tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                    tcg_gen_movi_i32(temp, MOXIE_EX_BAD);
-                    gen_helper_raise_exception(cpu_env, temp);
-                    tcg_temp_free_i32(temp);
-                }
-                break;
-            }
-        } else {
-            /* This is a Form 2 instruction.  */
-            int inst = (opcode >> 12 & 0x3);
-            switch (inst) {
-            case 0x00: /* inc */
-                {
-                    int a = (opcode >> 8) & 0xf;
-                    unsigned int v = (opcode & 0xff);
-                    tcg_gen_addi_i32(REG(a), REG(a), v);
-                }
-                break;
-            case 0x01: /* dec */
-                {
-                    int a = (opcode >> 8) & 0xf;
-                    unsigned int v = (opcode & 0xff);
-                    tcg_gen_subi_i32(REG(a), REG(a), v);
-                }
-                break;
-            case 0x02: /* gsr */
-                {
-                    int a = (opcode >> 8) & 0xf;
-                    unsigned v = (opcode & 0xff);
-                    tcg_gen_ld_i32(REG(a), cpu_env,
-                                   offsetof(CPUMoxieState, sregs[v]));
-                }
-                break;
-            case 0x03: /* ssr */
-                {
-                    int a = (opcode >> 8) & 0xf;
-                    unsigned v = (opcode & 0xff);
-                    tcg_gen_st_i32(REG(a), cpu_env,
-                                   offsetof(CPUMoxieState, sregs[v]));
-                }
-                break;
-            default:
-                {
-                    TCGv temp = tcg_temp_new_i32();
-                    tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                    tcg_gen_movi_i32(temp, MOXIE_EX_BAD);
-                    gen_helper_raise_exception(cpu_env, temp);
-                    tcg_temp_free_i32(temp);
-                }
-                break;
-            }
-        }
-    } else {
-        /* This is a Form 1 instruction.  */
-        int inst = opcode >> 8;
-        switch (inst) {
-        case 0x00: /* nop */
-            break;
-        case 0x01: /* ldi.l (immediate) */
-            {
-                int reg = (opcode >> 4) & 0xf;
-                int val = cpu_ldl_code(env, ctx->pc+2);
-                tcg_gen_movi_i32(REG(reg), val);
-                length = 6;
-            }
-            break;
-        case 0x02: /* mov (register-to-register) */
-            {
-                int dest  = (opcode >> 4) & 0xf;
-                int src = opcode & 0xf;
-                tcg_gen_mov_i32(REG(dest), REG(src));
-            }
-            break;
-        case 0x03: /* jsra */
-            {
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-
-                tcg_gen_movi_i32(t1, ctx->pc + 6);
-
-                /* Make space for the static chain and return address.  */
-                tcg_gen_subi_i32(t2, REG(1), 8);
-                tcg_gen_mov_i32(REG(1), t2);
-                tcg_gen_qemu_st32(t1, REG(1), ctx->memidx);
-
-                /* Push the current frame pointer.  */
-                tcg_gen_subi_i32(t2, REG(1), 4);
-                tcg_gen_mov_i32(REG(1), t2);
-                tcg_gen_qemu_st32(REG(0), REG(1), ctx->memidx);
-
-                /* Set the pc and $fp.  */
-                tcg_gen_mov_i32(REG(0), REG(1));
-
-                gen_goto_tb(env, ctx, 0, cpu_ldl_code(env, ctx->pc+2));
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                ctx->bstate = BS_BRANCH;
-                length = 6;
-            }
-            break;
-        case 0x04: /* ret */
-            {
-                TCGv t1 = tcg_temp_new_i32();
-
-                /* The new $sp is the old $fp.  */
-                tcg_gen_mov_i32(REG(1), REG(0));
-
-                /* Pop the frame pointer.  */
-                tcg_gen_qemu_ld32u(REG(0), REG(1), ctx->memidx);
-                tcg_gen_addi_i32(t1, REG(1), 4);
-                tcg_gen_mov_i32(REG(1), t1);
-
-
-                /* Pop the return address and skip over the static chain
-                   slot.  */
-                tcg_gen_qemu_ld32u(cpu_pc, REG(1), ctx->memidx);
-                tcg_gen_addi_i32(t1, REG(1), 8);
-                tcg_gen_mov_i32(REG(1), t1);
-
-                tcg_temp_free_i32(t1);
-
-                /* Jump... */
-                tcg_gen_exit_tb(NULL, 0);
-
-                ctx->bstate = BS_BRANCH;
-            }
-            break;
-        case 0x05: /* add.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_add_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x06: /* push */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                tcg_gen_subi_i32(t1, REG(a), 4);
-                tcg_gen_mov_i32(REG(a), t1);
-                tcg_gen_qemu_st32(REG(b), REG(a), ctx->memidx);
-                tcg_temp_free_i32(t1);
-            }
-            break;
-        case 0x07: /* pop */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-                TCGv t1 = tcg_temp_new_i32();
-
-                tcg_gen_qemu_ld32u(REG(b), REG(a), ctx->memidx);
-                tcg_gen_addi_i32(t1, REG(a), 4);
-                tcg_gen_mov_i32(REG(a), t1);
-                tcg_temp_free_i32(t1);
-            }
-            break;
-        case 0x08: /* lda.l */
-            {
-                int reg = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld32u(REG(reg), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x09: /* sta.l */
-            {
-                int val = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st32(REG(val), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x0a: /* ld.l (register indirect) */
-            {
-                int src  = opcode & 0xf;
-                int dest = (opcode >> 4) & 0xf;
-
-                tcg_gen_qemu_ld32u(REG(dest), REG(src), ctx->memidx);
-            }
-            break;
-        case 0x0b: /* st.l */
-            {
-                int dest = (opcode >> 4) & 0xf;
-                int val  = opcode & 0xf;
-
-                tcg_gen_qemu_st32(REG(val), REG(dest), ctx->memidx);
-            }
-            break;
-        case 0x0c: /* ldo.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(b), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld32u(t2, t1, ctx->memidx);
-                tcg_gen_mov_i32(REG(a), t2);
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        case 0x0d: /* sto.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(a), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st32(REG(b), t1, ctx->memidx);
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        case 0x0e: /* cmp */
-            {
-                int a  = (opcode >> 4) & 0xf;
-                int b  = opcode & 0xf;
-
-                tcg_gen_mov_i32(cc_a, REG(a));
-                tcg_gen_mov_i32(cc_b, REG(b));
-            }
-            break;
-        case 0x19: /* jsr */
-            {
-                int fnreg = (opcode >> 4) & 0xf;
-
-                /* Load the stack pointer into T0.  */
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-
-                tcg_gen_movi_i32(t1, ctx->pc+2);
-
-                /* Make space for the static chain and return address.  */
-                tcg_gen_subi_i32(t2, REG(1), 8);
-                tcg_gen_mov_i32(REG(1), t2);
-                tcg_gen_qemu_st32(t1, REG(1), ctx->memidx);
-
-                /* Push the current frame pointer.  */
-                tcg_gen_subi_i32(t2, REG(1), 4);
-                tcg_gen_mov_i32(REG(1), t2);
-                tcg_gen_qemu_st32(REG(0), REG(1), ctx->memidx);
-
-                /* Set the pc and $fp.  */
-                tcg_gen_mov_i32(REG(0), REG(1));
-                tcg_gen_mov_i32(cpu_pc, REG(fnreg));
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-                tcg_gen_exit_tb(NULL, 0);
-                ctx->bstate = BS_BRANCH;
-            }
-            break;
-        case 0x1a: /* jmpa */
-            {
-                tcg_gen_movi_i32(cpu_pc, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_exit_tb(NULL, 0);
-                ctx->bstate = BS_BRANCH;
-                length = 6;
-            }
-            break;
-        case 0x1b: /* ldi.b (immediate) */
-            {
-                int reg = (opcode >> 4) & 0xf;
-                int val = cpu_ldl_code(env, ctx->pc+2);
-                tcg_gen_movi_i32(REG(reg), val);
-                length = 6;
-            }
-            break;
-        case 0x1c: /* ld.b (register indirect) */
-            {
-                int src  = opcode & 0xf;
-                int dest = (opcode >> 4) & 0xf;
-
-                tcg_gen_qemu_ld8u(REG(dest), REG(src), ctx->memidx);
-            }
-            break;
-        case 0x1d: /* lda.b */
-            {
-                int reg = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld8u(REG(reg), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x1e: /* st.b */
-            {
-                int dest = (opcode >> 4) & 0xf;
-                int val  = opcode & 0xf;
-
-                tcg_gen_qemu_st8(REG(val), REG(dest), ctx->memidx);
-            }
-            break;
-        case 0x1f: /* sta.b */
-            {
-                int val = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st8(REG(val), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x20: /* ldi.s (immediate) */
-            {
-                int reg = (opcode >> 4) & 0xf;
-                int val = cpu_ldl_code(env, ctx->pc+2);
-                tcg_gen_movi_i32(REG(reg), val);
-                length = 6;
-            }
-            break;
-        case 0x21: /* ld.s (register indirect) */
-            {
-                int src  = opcode & 0xf;
-                int dest = (opcode >> 4) & 0xf;
-
-                tcg_gen_qemu_ld16u(REG(dest), REG(src), ctx->memidx);
-            }
-            break;
-        case 0x22: /* lda.s */
-            {
-                int reg = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld16u(REG(reg), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x23: /* st.s */
-            {
-                int dest = (opcode >> 4) & 0xf;
-                int val  = opcode & 0xf;
-
-                tcg_gen_qemu_st16(REG(val), REG(dest), ctx->memidx);
-            }
-            break;
-        case 0x24: /* sta.s */
-            {
-                int val = (opcode >> 4) & 0xf;
-
-                TCGv ptr = tcg_temp_new_i32();
-                tcg_gen_movi_i32(ptr, cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st16(REG(val), ptr, ctx->memidx);
-                tcg_temp_free_i32(ptr);
-
-                length = 6;
-            }
-            break;
-        case 0x25: /* jmp */
-            {
-                int reg = (opcode >> 4) & 0xf;
-                tcg_gen_mov_i32(cpu_pc, REG(reg));
-                tcg_gen_exit_tb(NULL, 0);
-                ctx->bstate = BS_BRANCH;
-            }
-            break;
-        case 0x26: /* and */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_and_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x27: /* lshr */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv sv = tcg_temp_new_i32();
-                tcg_gen_andi_i32(sv, REG(b), 0x1f);
-                tcg_gen_shr_i32(REG(a), REG(a), sv);
-                tcg_temp_free_i32(sv);
-            }
-            break;
-        case 0x28: /* ashl */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv sv = tcg_temp_new_i32();
-                tcg_gen_andi_i32(sv, REG(b), 0x1f);
-                tcg_gen_shl_i32(REG(a), REG(a), sv);
-                tcg_temp_free_i32(sv);
-            }
-            break;
-        case 0x29: /* sub.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_sub_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x2a: /* neg */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_neg_i32(REG(a), REG(b));
-            }
-            break;
-        case 0x2b: /* or */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_or_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x2c: /* not */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_not_i32(REG(a), REG(b));
-            }
-            break;
-        case 0x2d: /* ashr */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv sv = tcg_temp_new_i32();
-                tcg_gen_andi_i32(sv, REG(b), 0x1f);
-                tcg_gen_sar_i32(REG(a), REG(a), sv);
-                tcg_temp_free_i32(sv);
-            }
-            break;
-        case 0x2e: /* xor */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_xor_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x2f: /* mul.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                tcg_gen_mul_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x30: /* swi */
-            {
-                int val = cpu_ldl_code(env, ctx->pc+2);
-
-                TCGv temp = tcg_temp_new_i32();
-                tcg_gen_movi_i32(temp, val);
-                tcg_gen_st_i32(temp, cpu_env,
-                               offsetof(CPUMoxieState, sregs[3]));
-                tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                tcg_gen_movi_i32(temp, MOXIE_EX_SWI);
-                gen_helper_raise_exception(cpu_env, temp);
-                tcg_temp_free_i32(temp);
-
-                length = 6;
-            }
-            break;
-        case 0x31: /* div.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-                tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                gen_helper_div(REG(a), cpu_env, REG(a), REG(b));
-            }
-            break;
-        case 0x32: /* udiv.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-                tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                gen_helper_udiv(REG(a), cpu_env, REG(a), REG(b));
-            }
-            break;
-        case 0x33: /* mod.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-                tcg_gen_rem_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x34: /* umod.l */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-                tcg_gen_remu_i32(REG(a), REG(a), REG(b));
-            }
-            break;
-        case 0x35: /* brk */
-            {
-                TCGv temp = tcg_temp_new_i32();
-                tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                tcg_gen_movi_i32(temp, MOXIE_EX_BREAK);
-                gen_helper_raise_exception(cpu_env, temp);
-                tcg_temp_free_i32(temp);
-            }
-            break;
-        case 0x36: /* ldo.b */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(b), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld8u(t2, t1, ctx->memidx);
-                tcg_gen_mov_i32(REG(a), t2);
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        case 0x37: /* sto.b */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(a), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st8(REG(b), t1, ctx->memidx);
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        case 0x38: /* ldo.s */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(b), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_ld16u(t2, t1, ctx->memidx);
-                tcg_gen_mov_i32(REG(a), t2);
-
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        case 0x39: /* sto.s */
-            {
-                int a = (opcode >> 4) & 0xf;
-                int b = opcode & 0xf;
-
-                TCGv t1 = tcg_temp_new_i32();
-                TCGv t2 = tcg_temp_new_i32();
-                tcg_gen_addi_i32(t1, REG(a), cpu_ldl_code(env, ctx->pc+2));
-                tcg_gen_qemu_st16(REG(b), t1, ctx->memidx);
-                tcg_temp_free_i32(t1);
-                tcg_temp_free_i32(t2);
-
-                length = 6;
-            }
-            break;
-        default:
-            {
-                TCGv temp = tcg_temp_new_i32();
-                tcg_gen_movi_i32(cpu_pc, ctx->pc);
-                tcg_gen_movi_i32(temp, MOXIE_EX_BAD);
-                gen_helper_raise_exception(cpu_env, temp);
-                tcg_temp_free_i32(temp);
-             }
-            break;
-        }
-    }
-
-    return length;
-}
-
-/* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
-{
-    CPUMoxieState *env = cs->env_ptr;
-    MoxieCPU *cpu = env_archcpu(env);
-    DisasContext ctx;
-    target_ulong pc_start;
-    int num_insns;
-
-    pc_start = tb->pc;
-    ctx.pc = pc_start;
-    ctx.saved_pc = -1;
-    ctx.tb = tb;
-    ctx.memidx = 0;
-    ctx.singlestep_enabled = 0;
-    ctx.bstate = BS_NONE;
-    num_insns = 0;
-
-    gen_tb_start(tb);
-    do {
-        tcg_gen_insn_start(ctx.pc);
-        num_insns++;
-
-        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
-            tcg_gen_movi_i32(cpu_pc, ctx.pc);
-            gen_helper_debug(cpu_env);
-            ctx.bstate = BS_EXCP;
-            /* The address covered by the breakpoint must be included in
-               [tb->pc, tb->pc + tb->size) in order to for it to be
-               properly cleared -- thus we increment the PC here so that
-               the logic setting tb->size below does the right thing.  */
-            ctx.pc += 2;
-            goto done_generating;
-        }
-
-        ctx.opcode = cpu_lduw_code(env, ctx.pc);
-        ctx.pc += decode_opc(cpu, &ctx);
-
-        if (num_insns >= max_insns) {
-            break;
-        }
-        if (cs->singlestep_enabled) {
-            break;
-        }
-        if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) {
-            break;
-        }
-    } while (ctx.bstate == BS_NONE && !tcg_op_buf_full());
-
-    if (cs->singlestep_enabled) {
-        tcg_gen_movi_tl(cpu_pc, ctx.pc);
-        gen_helper_debug(cpu_env);
-    } else {
-        switch (ctx.bstate) {
-        case BS_STOP:
-        case BS_NONE:
-            gen_goto_tb(env, &ctx, 0, ctx.pc);
-            break;
-        case BS_EXCP:
-            tcg_gen_exit_tb(NULL, 0);
-            break;
-        case BS_BRANCH:
-        default:
-            break;
-        }
-    }
- done_generating:
-    gen_tb_end(tb, num_insns);
-
-    tb->size = ctx.pc - pc_start;
-    tb->icount = num_insns;
-}
-
-void restore_state_to_opc(CPUMoxieState *env, TranslationBlock *tb,
-                          target_ulong *data)
-{
-    env->pc = data[0];
-}
diff --git a/target/nios2/Kconfig b/target/nios2/Kconfig
new file mode 100644
index 00000000000..1529ab8950d
--- /dev/null
+++ b/target/nios2/Kconfig
@@ -0,0 +1,2 @@
+config NIOS2
+    bool
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index e9c9fc3a389..4cade61e93f 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -127,6 +127,7 @@ static void nios2_cpu_realizefn(DeviceState *dev, Error **errp)
     ncc->parent_realize(dev, errp);
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool nios2_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     Nios2CPU *cpu = NIOS2_CPU(cs);
@@ -140,17 +141,13 @@ static bool nios2_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     }
     return false;
 }
-
+#endif /* !CONFIG_USER_ONLY */
 
 static void nios2_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     /* NOTE: NiosII R2 is not supported yet. */
     info->mach = bfd_arch_nios2;
-#ifdef TARGET_WORDS_BIGENDIAN
-    info->print_insn = print_insn_big_nios2;
-#else
-    info->print_insn = print_insn_little_nios2;
-#endif
+    info->print_insn = print_insn_nios2;
 }
 
 static int nios2_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
@@ -207,14 +204,24 @@ static Property nios2_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps nios2_sysemu_ops = {
+    .get_phys_page_debug = nios2_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps nios2_tcg_ops = {
+static const struct TCGCPUOps nios2_tcg_ops = {
     .initialize = nios2_tcg_init,
-    .cpu_exec_interrupt = nios2_cpu_exec_interrupt,
-    .tlb_fill = nios2_cpu_tlb_fill,
 
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+    .record_sigsegv = nios2_cpu_record_sigsegv,
+#else
+    .tlb_fill = nios2_cpu_tlb_fill,
+    .cpu_exec_interrupt = nios2_cpu_exec_interrupt,
     .do_interrupt = nios2_cpu_do_interrupt,
     .do_unaligned_access = nios2_cpu_do_unaligned_access,
 #endif /* !CONFIG_USER_ONLY */
@@ -237,7 +244,7 @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
     cc->set_pc = nios2_cpu_set_pc;
     cc->disas_set_info = nios2_cpu_disas_set_info;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = nios2_cpu_get_phys_page_debug;
+    cc->sysemu_ops = &nios2_sysemu_ops;
 #endif
     cc->gdb_read_register = nios2_cpu_gdb_read_register;
     cc->gdb_write_register = nios2_cpu_gdb_write_register;
diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
index 2ab82fdc713..1a69ed7a49c 100644
--- a/target/nios2/cpu.h
+++ b/target/nios2/cpu.h
@@ -193,20 +193,18 @@ struct Nios2CPU {
 
 void nios2_tcg_init(void);
 void nios2_cpu_do_interrupt(CPUState *cs);
-int cpu_nios2_signal_handler(int host_signum, void *pinfo, void *puc);
 void dump_mmu(CPUNios2State *env);
 void nios2_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr nios2_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 void nios2_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, uintptr_t retaddr);
+                                   MMUAccessType access_type, int mmu_idx,
+                                   uintptr_t retaddr) QEMU_NORETURN;
 
 void do_nios2_semihosting(CPUNios2State *env);
 
 #define CPU_RESOLVING_TYPE TYPE_NIOS2_CPU
 
 #define cpu_gen_code cpu_nios2_gen_code
-#define cpu_signal_handler cpu_nios2_signal_handler
 
 #define CPU_SAVE_VERSION 1
 
@@ -220,9 +218,15 @@ static inline int cpu_mmu_index(CPUNios2State *env, bool ifetch)
                                                   MMU_SUPERVISOR_IDX;
 }
 
+#ifdef CONFIG_USER_ONLY
+void nios2_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
+                              MMUAccessType access_type,
+                              bool maperr, uintptr_t ra);
+#else
 bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                         MMUAccessType access_type, int mmu_idx,
                         bool probe, uintptr_t retaddr);
+#endif
 
 static inline int cpu_interrupts_enabled(CPUNios2State *env)
 {
diff --git a/target/nios2/helper.c b/target/nios2/helper.c
index 53be8398e99..e5c98650e1a 100644
--- a/target/nios2/helper.c
+++ b/target/nios2/helper.c
@@ -38,10 +38,11 @@ void nios2_cpu_do_interrupt(CPUState *cs)
     env->regs[R_EA] = env->regs[R_PC] + 4;
 }
 
-bool nios2_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr)
+void nios2_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                              MMUAccessType access_type,
+                              bool maperr, uintptr_t retaddr)
 {
+    /* FIXME: Disentangle kuser page from linux-user sigsegv handling. */
     cs->exception_index = 0xaa;
     cpu_loop_exit_restore(cs, retaddr);
 }
diff --git a/target/nios2/helper.h b/target/nios2/helper.h
index b0cb9146a5f..6c8f0b5b356 100644
--- a/target/nios2/helper.h
+++ b/target/nios2/helper.h
@@ -18,7 +18,7 @@
  * <http://www.gnu.org/licenses/lgpl-2.1.html>
  */
 
-DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
 
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(mmu_read_debug, void, env, i32)
diff --git a/target/nios2/nios2-semi.c b/target/nios2/nios2-semi.c
index e508b2fafce..fe5598bae4d 100644
--- a/target/nios2/nios2-semi.c
+++ b/target/nios2/nios2-semi.c
@@ -24,11 +24,11 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
+#include "exec/gdbstub.h"
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #else
 #include "qemu-common.h"
-#include "exec/gdbstub.h"
 #include "exec/softmmu-semi.h"
 #endif
 #include "qemu/log.h"
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index 9824544eb37..08d7ac53983 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -37,7 +37,6 @@
 /* is_jmp field values */
 #define DISAS_JUMP    DISAS_TARGET_0 /* only pc was modified dynamically */
 #define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
 
 #define INSTRUCTION_FLG(func, flags) { (func), (flags) }
 #define INSTRUCTION(func)                  \
@@ -98,16 +97,14 @@
     }
 
 typedef struct DisasContext {
-    TCGv_ptr          cpu_env;
-    TCGv             *cpu_R;
+    DisasContextBase  base;
     TCGv_i32          zero;
-    int               is_jmp;
     target_ulong      pc;
-    TranslationBlock *tb;
     int               mem_idx;
-    bool              singlestep_enabled;
 } DisasContext;
 
+static TCGv cpu_R[NUM_CORE_REGS];
+
 typedef struct Nios2Instruction {
     void     (*handler)(DisasContext *dc, uint32_t code, uint32_t flags);
     uint32_t  flags;
@@ -136,7 +133,7 @@ static TCGv load_zero(DisasContext *dc)
 static TCGv load_gpr(DisasContext *dc, uint8_t reg)
 {
     if (likely(reg != R_ZERO)) {
-        return dc->cpu_R[reg];
+        return cpu_R[reg];
     } else {
         return load_zero(dc);
     }
@@ -147,35 +144,22 @@ static void t_gen_helper_raise_exception(DisasContext *dc,
 {
     TCGv_i32 tmp = tcg_const_i32(index);
 
-    tcg_gen_movi_tl(dc->cpu_R[R_PC], dc->pc);
-    gen_helper_raise_exception(dc->cpu_env, tmp);
+    tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
+    gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
-    dc->is_jmp = DISAS_NORETURN;
-}
-
-static bool use_goto_tb(DisasContext *dc, uint32_t dest)
-{
-    if (unlikely(dc->singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (dc->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    dc->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_goto_tb(DisasContext *dc, int n, uint32_t dest)
 {
-    TranslationBlock *tb = dc->tb;
+    const TranslationBlock *tb = dc->base.tb;
 
-    if (use_goto_tb(dc, dest)) {
+    if (translator_use_goto_tb(&dc->base, dest)) {
         tcg_gen_goto_tb(n);
-        tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
+        tcg_gen_movi_tl(cpu_R[R_PC], dest);
         tcg_gen_exit_tb(tb, n);
     } else {
-        tcg_gen_movi_tl(dc->cpu_R[R_PC], dest);
+        tcg_gen_movi_tl(cpu_R[R_PC], dest);
         tcg_gen_exit_tb(NULL, 0);
     }
 }
@@ -187,7 +171,7 @@ static void gen_excp(DisasContext *dc, uint32_t code, uint32_t flags)
 
 static void gen_check_supervisor(DisasContext *dc)
 {
-    if (dc->tb->flags & CR_STATUS_U) {
+    if (dc->base.tb->flags & CR_STATUS_U) {
         /* CPU in user mode, privileged instruction called, stop. */
         t_gen_helper_raise_exception(dc, EXCP_SUPERI);
     }
@@ -209,12 +193,12 @@ static void jmpi(DisasContext *dc, uint32_t code, uint32_t flags)
 {
     J_TYPE(instr, code);
     gen_goto_tb(dc, 0, (dc->pc & 0xF0000000) | (instr.imm26 << 2));
-    dc->is_jmp = DISAS_TB_JUMP;
+    dc->base.is_jmp = DISAS_NORETURN;
 }
 
 static void call(DisasContext *dc, uint32_t code, uint32_t flags)
 {
-    tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
+    tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next);
     jmpi(dc, code, flags);
 }
 
@@ -236,7 +220,7 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
      *          the Nios2 CPU.
      */
     if (likely(instr.b != R_ZERO)) {
-        data = dc->cpu_R[instr.b];
+        data = cpu_R[instr.b];
     } else {
         data = tcg_temp_new();
     }
@@ -268,8 +252,8 @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags)
 {
     I_TYPE(instr, code);
 
-    gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4));
-    dc->is_jmp = DISAS_TB_JUMP;
+    gen_goto_tb(dc, 0, dc->base.pc_next + (instr.imm16.s & -4));
+    dc->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
@@ -277,11 +261,11 @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
     I_TYPE(instr, code);
 
     TCGLabel *l1 = gen_new_label();
-    tcg_gen_brcond_tl(flags, dc->cpu_R[instr.a], dc->cpu_R[instr.b], l1);
-    gen_goto_tb(dc, 0, dc->pc + 4);
+    tcg_gen_brcond_tl(flags, cpu_R[instr.a], cpu_R[instr.b], l1);
+    gen_goto_tb(dc, 0, dc->base.pc_next);
     gen_set_label(l1);
-    gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4));
-    dc->is_jmp = DISAS_TB_JUMP;
+    gen_goto_tb(dc, 1, dc->base.pc_next + (instr.imm16.s & -4));
+    dc->base.is_jmp = DISAS_NORETURN;
 }
 
 /* Comparison instructions */
@@ -289,8 +273,7 @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags)
 static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags)         \
 {                                                                            \
     I_TYPE(instr, (code));                                                   \
-    tcg_gen_setcondi_tl(flags, (dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a],   \
-                        (op3));                                              \
+    tcg_gen_setcondi_tl(flags, cpu_R[instr.b], cpu_R[instr.a], (op3));       \
 }
 
 gen_i_cmpxx(gen_cmpxxsi, instr.imm16.s)
@@ -304,10 +287,9 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags)        \
     if (unlikely(instr.b == R_ZERO)) { /* Store to R_ZERO is ignored */     \
         return;                                                             \
     } else if (instr.a == R_ZERO) { /* MOVxI optimizations */               \
-        tcg_gen_movi_tl(dc->cpu_R[instr.b], (resimm) ? (op3) : 0);          \
+        tcg_gen_movi_tl(cpu_R[instr.b], (resimm) ? (op3) : 0);              \
     } else {                                                                \
-        tcg_gen_##insn##_tl((dc)->cpu_R[instr.b], (dc)->cpu_R[instr.a],     \
-                            (op3));                                         \
+        tcg_gen_##insn##_tl(cpu_R[instr.b], cpu_R[instr.a], (op3));         \
     }                                                                       \
 }
 
@@ -402,26 +384,26 @@ static const Nios2Instruction i_type_instructions[] = {
  */
 static void eret(DisasContext *dc, uint32_t code, uint32_t flags)
 {
-    tcg_gen_mov_tl(dc->cpu_R[CR_STATUS], dc->cpu_R[CR_ESTATUS]);
-    tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_EA]);
+    tcg_gen_mov_tl(cpu_R[CR_STATUS], cpu_R[CR_ESTATUS]);
+    tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_EA]);
 
-    dc->is_jmp = DISAS_JUMP;
+    dc->base.is_jmp = DISAS_JUMP;
 }
 
 /* PC <- ra */
 static void ret(DisasContext *dc, uint32_t code, uint32_t flags)
 {
-    tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_RA]);
+    tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_RA]);
 
-    dc->is_jmp = DISAS_JUMP;
+    dc->base.is_jmp = DISAS_JUMP;
 }
 
 /* PC <- ba */
 static void bret(DisasContext *dc, uint32_t code, uint32_t flags)
 {
-    tcg_gen_mov_tl(dc->cpu_R[R_PC], dc->cpu_R[R_BA]);
+    tcg_gen_mov_tl(cpu_R[R_PC], cpu_R[R_BA]);
 
-    dc->is_jmp = DISAS_JUMP;
+    dc->base.is_jmp = DISAS_JUMP;
 }
 
 /* PC <- rA */
@@ -429,9 +411,9 @@ static void jmp(DisasContext *dc, uint32_t code, uint32_t flags)
 {
     R_TYPE(instr, code);
 
-    tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a));
+    tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
 
-    dc->is_jmp = DISAS_JUMP;
+    dc->base.is_jmp = DISAS_JUMP;
 }
 
 /* rC <- PC + 4 */
@@ -440,7 +422,7 @@ static void nextpc(DisasContext *dc, uint32_t code, uint32_t flags)
     R_TYPE(instr, code);
 
     if (likely(instr.c != R_ZERO)) {
-        tcg_gen_movi_tl(dc->cpu_R[instr.c], dc->pc + 4);
+        tcg_gen_movi_tl(cpu_R[instr.c], dc->base.pc_next);
     }
 }
 
@@ -452,10 +434,10 @@ static void callr(DisasContext *dc, uint32_t code, uint32_t flags)
 {
     R_TYPE(instr, code);
 
-    tcg_gen_mov_tl(dc->cpu_R[R_PC], load_gpr(dc, instr.a));
-    tcg_gen_movi_tl(dc->cpu_R[R_RA], dc->pc + 4);
+    tcg_gen_mov_tl(cpu_R[R_PC], load_gpr(dc, instr.a));
+    tcg_gen_movi_tl(cpu_R[R_RA], dc->base.pc_next);
 
-    dc->is_jmp = DISAS_JUMP;
+    dc->base.is_jmp = DISAS_JUMP;
 }
 
 /* rC <- ctlN */
@@ -472,10 +454,10 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
     {
 #if !defined(CONFIG_USER_ONLY)
         if (likely(instr.c != R_ZERO)) {
-            tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]);
+            tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
 #ifdef DEBUG_MMU
             TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
-            gen_helper_mmu_read_debug(dc->cpu_R[instr.c], dc->cpu_env, tmp);
+            gen_helper_mmu_read_debug(cpu_R[instr.c], cpu_env, tmp);
             tcg_temp_free_i32(tmp);
 #endif
         }
@@ -485,7 +467,7 @@ static void rdctl(DisasContext *dc, uint32_t code, uint32_t flags)
 
     default:
         if (likely(instr.c != R_ZERO)) {
-            tcg_gen_mov_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.imm5 + CR_BASE]);
+            tcg_gen_mov_tl(cpu_R[instr.c], cpu_R[instr.imm5 + CR_BASE]);
         }
         break;
     }
@@ -505,25 +487,25 @@ static void wrctl(DisasContext *dc, uint32_t code, uint32_t flags)
     {
 #if !defined(CONFIG_USER_ONLY)
         TCGv_i32 tmp = tcg_const_i32(instr.imm5 + CR_BASE);
-        gen_helper_mmu_write(dc->cpu_env, tmp, load_gpr(dc, instr.a));
+        gen_helper_mmu_write(cpu_env, tmp, load_gpr(dc, instr.a));
         tcg_temp_free_i32(tmp);
 #endif
         break;
     }
 
     default:
-        tcg_gen_mov_tl(dc->cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a));
+        tcg_gen_mov_tl(cpu_R[instr.imm5 + CR_BASE], load_gpr(dc, instr.a));
         break;
     }
 
     /* If interrupts were enabled using WRCTL, trigger them. */
 #if !defined(CONFIG_USER_ONLY)
     if ((instr.imm5 + CR_BASE) == CR_STATUS) {
-        if (tb_cflags(dc->tb) & CF_USE_ICOUNT) {
+        if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
             gen_io_start();
         }
-        gen_helper_check_interrupts(dc->cpu_env);
-        dc->is_jmp = DISAS_UPDATE;
+        gen_helper_check_interrupts(cpu_env);
+        dc->base.is_jmp = DISAS_UPDATE;
     }
 #endif
 }
@@ -533,8 +515,8 @@ static void gen_cmpxx(DisasContext *dc, uint32_t code, uint32_t flags)
 {
     R_TYPE(instr, code);
     if (likely(instr.c != R_ZERO)) {
-        tcg_gen_setcond_tl(flags, dc->cpu_R[instr.c], dc->cpu_R[instr.a],
-                           dc->cpu_R[instr.b]);
+        tcg_gen_setcond_tl(flags, cpu_R[instr.c], cpu_R[instr.a],
+                           cpu_R[instr.b]);
     }
 }
 
@@ -544,8 +526,7 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags)       \
 {                                                                          \
     R_TYPE(instr, (code));                                                 \
     if (likely(instr.c != R_ZERO)) {                                       \
-        tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a),      \
-                       (op3));                                             \
+        tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), (op3));    \
     }                                                                      \
 }
 
@@ -569,8 +550,8 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags)   \
     R_TYPE(instr, (code));                                             \
     if (likely(instr.c != R_ZERO)) {                                   \
         TCGv t0 = tcg_temp_new();                                      \
-        tcg_gen_##insn(t0, dc->cpu_R[instr.c],                         \
-                       load_gpr(dc, instr.a), load_gpr(dc, instr.b)); \
+        tcg_gen_##insn(t0, cpu_R[instr.c],                             \
+                       load_gpr(dc, instr.a), load_gpr(dc, instr.b));  \
         tcg_temp_free(t0);                                             \
     }                                                                  \
 }
@@ -586,7 +567,7 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags)       \
     if (likely(instr.c != R_ZERO)) {                                       \
         TCGv t0 = tcg_temp_new();                                          \
         tcg_gen_andi_tl(t0, load_gpr((dc), instr.b), 31);                  \
-        tcg_gen_##insn((dc)->cpu_R[instr.c], load_gpr((dc), instr.a), t0); \
+        tcg_gen_##insn(cpu_R[instr.c], load_gpr((dc), instr.a), t0);       \
         tcg_temp_free(t0);                                                 \
     }                                                                      \
 }
@@ -620,8 +601,8 @@ static void divs(DisasContext *dc, uint32_t code, uint32_t flags)
     tcg_gen_or_tl(t2, t2, t3);
     tcg_gen_movi_tl(t3, 0);
     tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
-    tcg_gen_div_tl(dc->cpu_R[instr.c], t0, t1);
-    tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]);
+    tcg_gen_div_tl(cpu_R[instr.c], t0, t1);
+    tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]);
 
     tcg_temp_free(t3);
     tcg_temp_free(t2);
@@ -646,8 +627,8 @@ static void divu(DisasContext *dc, uint32_t code, uint32_t flags)
     tcg_gen_ext32u_tl(t0, load_gpr(dc, instr.a));
     tcg_gen_ext32u_tl(t1, load_gpr(dc, instr.b));
     tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
-    tcg_gen_divu_tl(dc->cpu_R[instr.c], t0, t1);
-    tcg_gen_ext32s_tl(dc->cpu_R[instr.c], dc->cpu_R[instr.c]);
+    tcg_gen_divu_tl(cpu_R[instr.c], t0, t1);
+    tcg_gen_ext32s_tl(cpu_R[instr.c], cpu_R[instr.c]);
 
     tcg_temp_free(t3);
     tcg_temp_free(t2);
@@ -741,41 +722,6 @@ static void handle_r_type_instr(DisasContext *dc, uint32_t code, uint32_t flags)
     t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
 }
 
-static void handle_instruction(DisasContext *dc, CPUNios2State *env)
-{
-    uint32_t code;
-    uint8_t op;
-    const Nios2Instruction *instr;
-#if defined(CONFIG_USER_ONLY)
-    /* FIXME: Is this needed ? */
-    if (dc->pc >= 0x1000 && dc->pc < 0x2000) {
-        env->regs[R_PC] = dc->pc;
-        t_gen_helper_raise_exception(dc, 0xaa);
-        return;
-    }
-#endif
-    code = cpu_ldl_code(env, dc->pc);
-    op = get_opcode(code);
-
-    if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) {
-        goto illegal_op;
-    }
-
-    dc->zero = NULL;
-
-    instr = &i_type_instructions[op];
-    instr->handler(dc, code, instr->flags);
-
-    if (dc->zero) {
-        tcg_temp_free(dc->zero);
-    }
-
-    return;
-
-illegal_op:
-    t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
-}
-
 static const char * const regnames[] = {
     "zero",       "at",         "r2",         "r3",
     "r4",         "r5",         "r6",         "r7",
@@ -796,115 +742,117 @@ static const char * const regnames[] = {
     "rpc"
 };
 
-static TCGv cpu_R[NUM_CORE_REGS];
-
 #include "exec/gen-icount.h"
 
-static void gen_exception(DisasContext *dc, uint32_t excp)
+/* generate intermediate code for basic block 'tb'.  */
+static void nios2_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
-    TCGv_i32 tmp = tcg_const_i32(excp);
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+    CPUNios2State *env = cs->env_ptr;
+    int page_insns;
 
-    tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
-    gen_helper_raise_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
-    dc->is_jmp = DISAS_NORETURN;
+    dc->mem_idx = cpu_mmu_index(env, false);
+
+    /* Bound the number of insns to execute to those left on the page.  */
+    page_insns = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
+    dc->base.max_insns = MIN(page_insns, dc->base.max_insns);
 }
 
-/* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+static void nios2_tr_tb_start(DisasContextBase *db, CPUState *cs)
 {
+}
+
+static void nios2_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+    tcg_gen_insn_start(dcbase->pc_next);
+}
+
+static void nios2_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
     CPUNios2State *env = cs->env_ptr;
-    DisasContext dc1, *dc = &dc1;
-    int num_insns;
-
-    /* Initialize DC */
-    dc->cpu_env = cpu_env;
-    dc->cpu_R   = cpu_R;
-    dc->is_jmp  = DISAS_NEXT;
-    dc->pc      = tb->pc;
-    dc->tb      = tb;
-    dc->mem_idx = cpu_mmu_index(env, false);
-    dc->singlestep_enabled = cs->singlestep_enabled;
-
-    /* Set up instruction counts */
-    num_insns = 0;
-    if (max_insns > 1) {
-        int page_insns = (TARGET_PAGE_SIZE - (tb->pc & TARGET_PAGE_MASK)) / 4;
-        if (max_insns > page_insns) {
-            max_insns = page_insns;
-        }
+    const Nios2Instruction *instr;
+    uint32_t code, pc;
+    uint8_t op;
+
+    pc = dc->base.pc_next;
+    dc->pc = pc;
+    dc->base.pc_next = pc + 4;
+
+    /* Decode an instruction */
+
+#if defined(CONFIG_USER_ONLY)
+    /* FIXME: Is this needed ? */
+    if (pc >= 0x1000 && pc < 0x2000) {
+        t_gen_helper_raise_exception(dc, 0xaa);
+        return;
     }
+#endif
 
-    gen_tb_start(tb);
-    do {
-        tcg_gen_insn_start(dc->pc);
-        num_insns++;
-
-        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
-            gen_exception(dc, EXCP_DEBUG);
-            /* The address covered by the breakpoint must be included in
-               [tb->pc, tb->pc + tb->size) in order to for it to be
-               properly cleared -- thus we increment the PC here so that
-               the logic setting tb->size below does the right thing.  */
-            dc->pc += 4;
-            break;
-        }
+    code = cpu_ldl_code(env, pc);
+    op = get_opcode(code);
 
-        if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
-            gen_io_start();
-        }
+    if (unlikely(op >= ARRAY_SIZE(i_type_instructions))) {
+        t_gen_helper_raise_exception(dc, EXCP_ILLEGAL);
+        return;
+    }
+
+    dc->zero = NULL;
 
-        /* Decode an instruction */
-        handle_instruction(dc, env);
+    instr = &i_type_instructions[op];
+    instr->handler(dc, code, instr->flags);
 
-        dc->pc += 4;
+    if (dc->zero) {
+        tcg_temp_free(dc->zero);
+    }
+}
 
-        /* Translation stops when a conditional branch is encountered.
-         * Otherwise the subsequent code could get translated several times.
-         * Also stop translation when a page boundary is reached.  This
-         * ensures prefetch aborts occur at the right place.  */
-    } while (!dc->is_jmp &&
-             !tcg_op_buf_full() &&
-             num_insns < max_insns);
+static void nios2_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
 
     /* Indicate where the next block should start */
-    switch (dc->is_jmp) {
-    case DISAS_NEXT:
+    switch (dc->base.is_jmp) {
+    case DISAS_TOO_MANY:
     case DISAS_UPDATE:
         /* Save the current PC back into the CPU register */
-        tcg_gen_movi_tl(cpu_R[R_PC], dc->pc);
+        tcg_gen_movi_tl(cpu_R[R_PC], dc->base.pc_next);
         tcg_gen_exit_tb(NULL, 0);
         break;
 
-    default:
     case DISAS_JUMP:
         /* The jump will already have updated the PC register */
         tcg_gen_exit_tb(NULL, 0);
         break;
 
     case DISAS_NORETURN:
-    case DISAS_TB_JUMP:
         /* nothing more to generate */
         break;
-    }
 
-    /* End off the block */
-    gen_tb_end(tb, num_insns);
-
-    /* Mark instruction starts for the final generated instruction */
-    tb->size = dc->pc - tb->pc;
-    tb->icount = num_insns;
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(tb->pc)) {
-        FILE *logfile = qemu_log_lock();
-        qemu_log("IN: %s\n", lookup_symbol(tb->pc));
-        log_target_disas(cs, tb->pc, dc->pc - tb->pc);
-        qemu_log("\n");
-        qemu_log_unlock(logfile);
+    default:
+        g_assert_not_reached();
     }
-#endif
+}
+
+static void nios2_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
+{
+    qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
+    log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size);
+}
+
+static const TranslatorOps nios2_tr_ops = {
+    .init_disas_context = nios2_tr_init_disas_context,
+    .tb_start           = nios2_tr_tb_start,
+    .insn_start         = nios2_tr_insn_start,
+    .translate_insn     = nios2_tr_translate_insn,
+    .tb_stop            = nios2_tr_tb_stop,
+    .disas_log          = nios2_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc;
+    translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns);
 }
 
 void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
diff --git a/target/openrisc/Kconfig b/target/openrisc/Kconfig
new file mode 100644
index 00000000000..e0da4ac1dfc
--- /dev/null
+++ b/target/openrisc/Kconfig
@@ -0,0 +1,2 @@
+config OPENRISC
+    bool
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 2c64842f46b..dfbafc5236e 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -174,14 +174,22 @@ static void openrisc_any_initfn(Object *obj)
                       | (IMMUCFGR_NTS & (ctz32(TLB_SIZE) << 2));
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps openrisc_sysemu_ops = {
+    .get_phys_page_debug = openrisc_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps openrisc_tcg_ops = {
+static const struct TCGCPUOps openrisc_tcg_ops = {
     .initialize = openrisc_translate_init,
-    .cpu_exec_interrupt = openrisc_cpu_exec_interrupt,
-    .tlb_fill = openrisc_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = openrisc_cpu_tlb_fill,
+    .cpu_exec_interrupt = openrisc_cpu_exec_interrupt,
     .do_interrupt = openrisc_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
@@ -203,8 +211,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = openrisc_cpu_gdb_read_register;
     cc->gdb_write_register = openrisc_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = openrisc_cpu_get_phys_page_debug;
     dc->vmsd = &vmstate_openrisc_cpu;
+    cc->sysemu_ops = &openrisc_sysemu_ops;
 #endif
     cc->gdb_num_core_regs = 32 + 3;
     cc->disas_set_info = openrisc_disas_set_info;
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 82cbaeb4f84..ee069b080c9 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -312,25 +312,25 @@ struct OpenRISCCPU {
 
 
 void cpu_openrisc_list(void);
-void openrisc_cpu_do_interrupt(CPUState *cpu);
-bool openrisc_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void openrisc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int openrisc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int openrisc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void openrisc_translate_init(void);
-bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                           MMUAccessType access_type, int mmu_idx,
-                           bool probe, uintptr_t retaddr);
-int cpu_openrisc_signal_handler(int host_signum, void *pinfo, void *puc);
 int print_insn_or1k(bfd_vma addr, disassemble_info *info);
 
 #define cpu_list cpu_openrisc_list
-#define cpu_signal_handler cpu_openrisc_signal_handler
 
 #ifndef CONFIG_USER_ONLY
+bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                           MMUAccessType access_type, int mmu_idx,
+                           bool probe, uintptr_t retaddr);
+
 extern const VMStateDescription vmstate_openrisc_cpu;
 
+void openrisc_cpu_do_interrupt(CPUState *cpu);
+bool openrisc_cpu_exec_interrupt(CPUState *cpu, int int_req);
+
 /* hw/openrisc_pic.c */
 void cpu_openrisc_pic_init(OpenRISCCPU *cpu);
 
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index 3eab771dcda..19223e3f25b 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -28,7 +28,6 @@
 
 void openrisc_cpu_do_interrupt(CPUState *cs)
 {
-#ifndef CONFIG_USER_ONLY
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
     CPUOpenRISCState *env = &cpu->env;
     int exception = cs->exception_index;
@@ -96,7 +95,6 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
     } else {
         cpu_abort(cs, "Unhandled exception 0x%x\n", exception);
     }
-#endif
 
     cs->exception_index = -1;
 }
diff --git a/target/openrisc/meson.build b/target/openrisc/meson.build
index 9774a583065..84322086ecc 100644
--- a/target/openrisc/meson.build
+++ b/target/openrisc/meson.build
@@ -9,15 +9,17 @@ openrisc_ss.add(files(
   'exception_helper.c',
   'fpu_helper.c',
   'gdbstub.c',
-  'interrupt.c',
   'interrupt_helper.c',
-  'mmu.c',
   'sys_helper.c',
   'translate.c',
 ))
 
 openrisc_softmmu_ss = ss.source_set()
-openrisc_softmmu_ss.add(files('machine.c'))
+openrisc_softmmu_ss.add(files(
+  'interrupt.c',
+  'machine.c',
+  'mmu.c',
+))
 
 target_arch += {'openrisc': openrisc_ss}
 target_softmmu_arch += {'openrisc': openrisc_softmmu_ss}
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 94df8c7bef3..e561ef245b4 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -23,11 +23,8 @@
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu/host-utils.h"
-#ifndef CONFIG_USER_ONLY
 #include "hw/loader.h"
-#endif
 
-#ifndef CONFIG_USER_ONLY
 static inline void get_phys_nommu(hwaddr *phys_addr, int *prot,
                                   target_ulong address)
 {
@@ -94,7 +91,6 @@ static int get_phys_mmu(OpenRISCCPU *cpu, hwaddr *phys_addr, int *prot,
         return need & PAGE_EXEC ? EXCP_ITLBMISS : EXCP_DTLBMISS;
     }
 }
-#endif
 
 static void raise_mmu_exception(OpenRISCCPU *cpu, target_ulong address,
                                 int exception)
@@ -112,8 +108,6 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
     int excp = EXCP_DPF;
-
-#ifndef CONFIG_USER_ONLY
     int prot;
     hwaddr phys_addr;
 
@@ -138,13 +132,11 @@ bool openrisc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
     if (probe) {
         return false;
     }
-#endif
 
     raise_mmu_exception(cpu, addr, excp);
     cpu_loop_exit_restore(cs, retaddr);
 }
 
-#ifndef CONFIG_USER_ONLY
 hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
@@ -177,4 +169,3 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
         return phys_addr;
     }
 }
-#endif
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 41390d046f6..48674231e74 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -23,7 +23,6 @@
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "exception.h"
-#include "sysemu/sysemu.h"
 #ifndef CONFIG_USER_ONLY
 #include "hw/boards.h"
 #endif
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index a9c81f8bd5b..ca79e609dad 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -33,7 +33,6 @@
 #include "exec/helper-gen.h"
 #include "exec/gen-icount.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 /* is_jmp field values */
@@ -53,6 +52,8 @@ typedef struct DisasContext {
 
     /* The temporary corresponding to register 0 for this compilation.  */
     TCGv R0;
+    /* The constant zero. */
+    TCGv zero;
 } DisasContext;
 
 static inline bool is_user(DisasContext *dc)
@@ -130,9 +131,7 @@ void openrisc_translate_init(void)
 
 static void gen_exception(DisasContext *dc, unsigned int excp)
 {
-    TCGv_i32 tmp = tcg_const_i32(excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
+    gen_helper_exception(cpu_env, tcg_constant_i32(excp));
 }
 
 static void gen_illegal_exception(DisasContext *dc)
@@ -200,10 +199,10 @@ static void gen_ove_cyov(DisasContext *dc)
 
 static void gen_add(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
 {
-    TCGv t0 = tcg_const_tl(0);
+    TCGv t0 = tcg_temp_new();
     TCGv res = tcg_temp_new();
 
-    tcg_gen_add2_tl(res, cpu_sr_cy, srca, t0, srcb, t0);
+    tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, srcb, dc->zero);
     tcg_gen_xor_tl(cpu_sr_ov, srca, srcb);
     tcg_gen_xor_tl(t0, res, srcb);
     tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov);
@@ -217,11 +216,11 @@ static void gen_add(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
 
 static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
 {
-    TCGv t0 = tcg_const_tl(0);
+    TCGv t0 = tcg_temp_new();
     TCGv res = tcg_temp_new();
 
-    tcg_gen_add2_tl(res, cpu_sr_cy, srca, t0, cpu_sr_cy, t0);
-    tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, t0);
+    tcg_gen_add2_tl(res, cpu_sr_cy, srca, dc->zero, cpu_sr_cy, dc->zero);
+    tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, dc->zero);
     tcg_gen_xor_tl(cpu_sr_ov, srca, srcb);
     tcg_gen_xor_tl(t0, res, srcb);
     tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov);
@@ -539,13 +538,9 @@ static bool trans_l_extbz(DisasContext *dc, arg_da *a)
 
 static bool trans_l_cmov(DisasContext *dc, arg_dab *a)
 {
-    TCGv zero;
-
     check_r0_write(dc, a->d);
-    zero = tcg_const_tl(0);
-    tcg_gen_movcond_tl(TCG_COND_NE, cpu_R(dc, a->d), cpu_sr_f, zero,
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_R(dc, a->d), cpu_sr_f, dc->zero,
                        cpu_R(dc, a->a), cpu_R(dc, a->b));
-    tcg_temp_free(zero);
     return true;
 }
 
@@ -633,15 +628,10 @@ static bool trans_l_jal(DisasContext *dc, arg_l_jal *a)
 static void do_bf(DisasContext *dc, arg_l_bf *a, TCGCond cond)
 {
     target_ulong tmp_pc = dc->base.pc_next + a->n * 4;
-    TCGv t_next = tcg_const_tl(dc->base.pc_next + 8);
-    TCGv t_true = tcg_const_tl(tmp_pc);
-    TCGv t_zero = tcg_const_tl(0);
+    TCGv t_next = tcg_constant_tl(dc->base.pc_next + 8);
+    TCGv t_true = tcg_constant_tl(tmp_pc);
 
-    tcg_gen_movcond_tl(cond, jmp_pc, cpu_sr_f, t_zero, t_true, t_next);
-
-    tcg_temp_free(t_next);
-    tcg_temp_free(t_true);
-    tcg_temp_free(t_zero);
+    tcg_gen_movcond_tl(cond, jmp_pc, cpu_sr_f, dc->zero, t_true, t_next);
     dc->delayed_branch = 2;
 }
 
@@ -741,12 +731,6 @@ static bool trans_l_swa(DisasContext *dc, arg_store *a)
     ea = tcg_temp_new();
     tcg_gen_addi_tl(ea, cpu_R(dc, a->a), a->i);
 
-    /* For TB_FLAGS_R0_0, the branch below invalidates the temporary assigned
-       to cpu_regs[0].  Since l.swa is quite often immediately followed by a
-       branch, don't bother reallocating; finish the TB using the "real" R0.
-       This also takes care of RB input across the branch.  */
-    dc->R0 = cpu_regs[0];
-
     lab_fail = gen_new_label();
     lab_done = gen_new_label();
     tcg_gen_brcond_tl(TCG_COND_NE, ea, cpu_lock_addr, lab_fail);
@@ -754,7 +738,7 @@ static bool trans_l_swa(DisasContext *dc, arg_store *a)
 
     val = tcg_temp_new();
     tcg_gen_atomic_cmpxchg_tl(val, cpu_lock_addr, cpu_lock_value,
-                              cpu_regs[a->b], dc->mem_idx, MO_TEUL);
+                              cpu_R(dc, a->b), dc->mem_idx, MO_TEUL);
     tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, val, cpu_lock_value);
     tcg_temp_free(val);
 
@@ -814,44 +798,28 @@ static bool trans_l_adrp(DisasContext *dc, arg_l_adrp *a)
 
 static bool trans_l_addi(DisasContext *dc, arg_rri *a)
 {
-    TCGv t0;
-
     check_r0_write(dc, a->d);
-    t0 = tcg_const_tl(a->i);
-    gen_add(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), t0);
-    tcg_temp_free(t0);
+    gen_add(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i));
     return true;
 }
 
 static bool trans_l_addic(DisasContext *dc, arg_rri *a)
 {
-    TCGv t0;
-
     check_r0_write(dc, a->d);
-    t0 = tcg_const_tl(a->i);
-    gen_addc(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), t0);
-    tcg_temp_free(t0);
+    gen_addc(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i));
     return true;
 }
 
 static bool trans_l_muli(DisasContext *dc, arg_rri *a)
 {
-    TCGv t0;
-
     check_r0_write(dc, a->d);
-    t0 = tcg_const_tl(a->i);
-    gen_mul(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), t0);
-    tcg_temp_free(t0);
+    gen_mul(dc, cpu_R(dc, a->d), cpu_R(dc, a->a), tcg_constant_tl(a->i));
     return true;
 }
 
 static bool trans_l_maci(DisasContext *dc, arg_l_maci *a)
 {
-    TCGv t0;
-
-    t0 = tcg_const_tl(a->i);
-    gen_mac(dc, cpu_R(dc, a->a), t0);
-    tcg_temp_free(t0);
+    gen_mac(dc, cpu_R(dc, a->a), tcg_constant_tl(a->i));
     return true;
 }
 
@@ -1625,8 +1593,9 @@ static void openrisc_tr_tb_start(DisasContextBase *db, CPUState *cs)
 
     /* Allow the TCG optimizer to see that R0 == 0,
        when it's true, which is the common case.  */
+    dc->zero = tcg_constant_tl(0);
     if (dc->tb_flags & TB_FLAGS_R0_0) {
-        dc->R0 = tcg_const_tl(0);
+        dc->R0 = dc->zero;
     } else {
         dc->R0 = cpu_regs[0];
     }
@@ -1640,27 +1609,11 @@ static void openrisc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
                        | (dc->base.num_insns > 1 ? 2 : 0));
 }
 
-static bool openrisc_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                         const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    tcg_gen_movi_tl(cpu_pc, dc->base.pc_next);
-    gen_exception(dc, EXCP_DEBUG);
-    dc->base.is_jmp = DISAS_NORETURN;
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size below does the right thing.  */
-    dc->base.pc_next += 4;
-    return true;
-}
-
 static void openrisc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
-    uint32_t insn = translator_ldl(&cpu->env, dc->base.pc_next);
+    uint32_t insn = translator_ldl(&cpu->env, &dc->base, dc->base.pc_next);
 
     if (!decode(dc, insn)) {
         gen_illegal_exception(dc);
@@ -1706,11 +1659,7 @@ static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
             /* The jump destination is indirect/computed; use jmp_pc.  */
             tcg_gen_mov_tl(cpu_pc, jmp_pc);
             tcg_gen_discard_tl(jmp_pc);
-            if (unlikely(dc->base.singlestep_enabled)) {
-                gen_exception(dc, EXCP_DEBUG);
-            } else {
-                tcg_gen_lookup_and_goto_ptr();
-            }
+            tcg_gen_lookup_and_goto_ptr();
             break;
         }
         /* The jump destination is direct; use jmp_pc_imm.
@@ -1720,25 +1669,18 @@ static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         /* fallthru */
 
     case DISAS_TOO_MANY:
-        if (unlikely(dc->base.singlestep_enabled)) {
-            tcg_gen_movi_tl(cpu_pc, jmp_dest);
-            gen_exception(dc, EXCP_DEBUG);
-        } else if ((dc->base.pc_first ^ jmp_dest) & TARGET_PAGE_MASK) {
-            tcg_gen_movi_tl(cpu_pc, jmp_dest);
-            tcg_gen_lookup_and_goto_ptr();
-        } else {
+        if (translator_use_goto_tb(&dc->base, jmp_dest)) {
             tcg_gen_goto_tb(0);
             tcg_gen_movi_tl(cpu_pc, jmp_dest);
             tcg_gen_exit_tb(dc->base.tb, 0);
+            break;
         }
+        tcg_gen_movi_tl(cpu_pc, jmp_dest);
+        tcg_gen_lookup_and_goto_ptr();
         break;
 
     case DISAS_EXIT:
-        if (unlikely(dc->base.singlestep_enabled)) {
-            gen_exception(dc, EXCP_DEBUG);
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
         break;
     default:
         g_assert_not_reached();
@@ -1757,7 +1699,6 @@ static const TranslatorOps openrisc_tr_ops = {
     .init_disas_context = openrisc_tr_init_disas_context,
     .tb_start           = openrisc_tr_tb_start,
     .insn_start         = openrisc_tr_insn_start,
-    .breakpoint_check   = openrisc_tr_breakpoint_check,
     .translate_insn     = openrisc_tr_translate_insn,
     .tb_stop            = openrisc_tr_tb_stop,
     .disas_log          = openrisc_tr_disas_log,
diff --git a/target/ppc/Kconfig b/target/ppc/Kconfig
new file mode 100644
index 00000000000..3ff152051a3
--- /dev/null
+++ b/target/ppc/Kconfig
@@ -0,0 +1,5 @@
+config PPC
+    bool
+
+config PPC64
+    bool
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 9ab04b2c38f..bb392f6d888 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -17,7 +17,6 @@
 #include "elf.h"
 #include "sysemu/dump.h"
 #include "sysemu/kvm.h"
-#include "exec/helper-proto.h"
 
 #ifdef TARGET_PPC64
 #define ELFCLASS ELFCLASS64
@@ -176,7 +175,7 @@ static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
             vmxregset->avr[i].u64[1] = avr->u64[1];
         }
     }
-    vmxregset->vscr.u32[3] = cpu_to_dump32(s, helper_mfvscr(&cpu->env));
+    vmxregset->vscr.u32[3] = cpu_to_dump32(s, ppc_get_vscr(&cpu->env));
 }
 
 static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
@@ -228,22 +227,20 @@ int cpu_get_dump_info(ArchDumpInfo *info,
                       const struct GuestPhysBlockList *guest_phys_blocks)
 {
     PowerPCCPU *cpu;
-    PowerPCCPUClass *pcc;
 
     if (first_cpu == NULL) {
         return -1;
     }
 
     cpu = POWERPC_CPU(first_cpu);
-    pcc = POWERPC_CPU_GET_CLASS(cpu);
 
     info->d_machine = PPC_ELF_MACHINE;
     info->d_class = ELFCLASS;
 
-    if ((*pcc->interrupts_big_endian)(cpu)) {
-        info->d_endian = ELFDATA2MSB;
-    } else {
+    if (ppc_interrupts_little_endian(cpu)) {
         info->d_endian = ELFDATA2LSB;
+    } else {
+        info->d_endian = ELFDATA2MSB;
     }
     /* 64KB is the max page size for pseries kernel */
     if (strncmp(object_get_typename(qdev_get_machine()),
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index 87e4228614b..4baa111713b 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -776,6 +776,8 @@
                 "POWER9 v2.0")
     POWERPC_DEF("power10_v1.0",  CPU_POWERPC_POWER10_DD1,            POWER10,
                 "POWER10 v1.0")
+    POWERPC_DEF("power10_v2.0",  CPU_POWERPC_POWER10_DD20,           POWER10,
+                "POWER10 v2.0")
 #endif /* defined (TARGET_PPC64) */
 
 /***************************************************************************/
@@ -952,7 +954,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "power8", "power8_v2.0" },
     { "power8nvl", "power8nvl_v1.0" },
     { "power9", "power9_v2.0" },
-    { "power10", "power10_v1.0" },
+    { "power10", "power10_v2.0" },
 #endif
 
     /* Generic PowerPCs */
diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h
index fc5e21728d7..09525927594 100644
--- a/target/ppc/cpu-models.h
+++ b/target/ppc/cpu-models.h
@@ -375,6 +375,7 @@ enum {
     CPU_POWERPC_POWER9_DD20        = 0x004E1200,
     CPU_POWERPC_POWER10_BASE       = 0x00800000,
     CPU_POWERPC_POWER10_DD1        = 0x00800100,
+    CPU_POWERPC_POWER10_DD20       = 0x00800200,
     CPU_POWERPC_970_v22            = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index 118baf8d41f..5800fa324e8 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -116,6 +116,8 @@ enum powerpc_excp_t {
     POWERPC_EXCP_POWER8,
     /* POWER9 exception model           */
     POWERPC_EXCP_POWER9,
+    /* POWER10 exception model           */
+    POWERPC_EXCP_POWER10,
 };
 
 /*****************************************************************************/
@@ -196,8 +198,6 @@ struct PowerPCCPUClass {
     int n_host_threads;
     void (*init_proc)(CPUPPCState *env);
     int  (*check_pow)(CPUPPCState *env);
-    int (*handle_mmu_fault)(PowerPCCPU *cpu, vaddr eaddr, int rwx, int mmu_idx);
-    bool (*interrupts_big_endian)(PowerPCCPU *cpu);
 };
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index e501a7ff6f4..f933d9f2bd8 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -20,8 +20,14 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "cpu-models.h"
+#include "cpu-qom.h"
+#include "exec/log.h"
+#include "fpu/softfloat-helpers.h"
+#include "mmu-hash64.h"
+#include "helper_regs.h"
+#include "sysemu/tcg.h"
 
-target_ulong cpu_read_xer(CPUPPCState *env)
+target_ulong cpu_read_xer(const CPUPPCState *env)
 {
     if (is_isa300(env)) {
         return env->xer | (env->so << XER_SO) |
@@ -45,3 +51,76 @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer)
                        (1ul << XER_OV) | (1ul << XER_CA) |
                        (1ul << XER_OV32) | (1ul << XER_CA32));
 }
+
+void ppc_store_vscr(CPUPPCState *env, uint32_t vscr)
+{
+    env->vscr = vscr & ~(1u << VSCR_SAT);
+    /* Which bit we set is completely arbitrary, but clear the rest.  */
+    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
+    env->vscr_sat.u64[1] = 0;
+    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+}
+
+uint32_t ppc_get_vscr(CPUPPCState *env)
+{
+    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
+    return env->vscr | (sat << VSCR_SAT);
+}
+
+/* GDBstub can read and write MSR... */
+void ppc_store_msr(CPUPPCState *env, target_ulong value)
+{
+    hreg_store_msr(env, value, 0);
+}
+
+void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    env->spr[SPR_LPCR] = val & pcc->lpcr_mask;
+    /* The gtse bit affects hflags */
+    hreg_compute_hflags(env);
+}
+
+static inline void fpscr_set_rounding_mode(CPUPPCState *env)
+{
+    int rnd_type;
+
+    /* Set rounding mode */
+    switch (fpscr_rn) {
+    case 0:
+        /* Best approximation (round to nearest) */
+        rnd_type = float_round_nearest_even;
+        break;
+    case 1:
+        /* Smaller magnitude (round toward zero) */
+        rnd_type = float_round_to_zero;
+        break;
+    case 2:
+        /* Round toward +infinite */
+        rnd_type = float_round_up;
+        break;
+    default:
+    case 3:
+        /* Round toward -infinite */
+        rnd_type = float_round_down;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->fp_status);
+}
+
+void ppc_store_fpscr(CPUPPCState *env, target_ulong val)
+{
+    val &= ~(FP_VX | FP_FEX);
+    if (val & FPSCR_IX) {
+        val |= FP_VX;
+    }
+    if ((val >> FPSCR_XX) & (val >> FPSCR_XE) & 0x1f) {
+        val |= FP_FEX;
+    }
+    env->fpscr = val;
+    if (tcg_enabled()) {
+        fpscr_set_rounding_mode(env);
+    }
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index e73416da68d..e946da5f3a8 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -131,11 +131,7 @@ enum {
     POWERPC_EXCP_SYSCALL_VECTORED = 102, /* scv exception                     */
     /* EOL                                                                   */
     POWERPC_EXCP_NB       = 103,
-    /* QEMU exceptions: used internally during code translation              */
-    POWERPC_EXCP_STOP         = 0x200, /* stop translation                   */
-    POWERPC_EXCP_BRANCH       = 0x201, /* branch instruction                 */
     /* QEMU exceptions: special cases we want to stop translation            */
-    POWERPC_EXCP_SYNC         = 0x202, /* context synchronizing instruction  */
     POWERPC_EXCP_SYSCALL_USER = 0x203, /* System call in user mode only      */
 };
 
@@ -148,6 +144,7 @@ enum {
     POWERPC_EXCP_ALIGN_PROT    = 0x04,  /* Access cross protection boundary  */
     POWERPC_EXCP_ALIGN_BAT     = 0x05,  /* Access cross a BAT/seg boundary   */
     POWERPC_EXCP_ALIGN_CACHE   = 0x06,  /* Impossible dcbz access            */
+    POWERPC_EXCP_ALIGN_INSN    = 0x07,  /* Pref. insn x-ing 64-byte boundary */
     /* Exception subtypes for POWERPC_EXCP_PROGRAM                           */
     /* FP exceptions                                                         */
     POWERPC_EXCP_FP            = 0x10,
@@ -192,17 +189,21 @@ typedef struct ppc_hash_pte64 ppc_hash_pte64_t;
 
 /* SPR access micro-ops generations callbacks */
 struct ppc_spr_t {
+    const char *name;
+    target_ulong default_value;
+#ifndef CONFIG_USER_ONLY
+    unsigned int gdb_id;
+#endif
+#ifdef CONFIG_TCG
     void (*uea_read)(DisasContext *ctx, int gpr_num, int spr_num);
     void (*uea_write)(DisasContext *ctx, int spr_num, int gpr_num);
-#if !defined(CONFIG_USER_ONLY)
+# ifndef CONFIG_USER_ONLY
     void (*oea_read)(DisasContext *ctx, int gpr_num, int spr_num);
     void (*oea_write)(DisasContext *ctx, int spr_num, int gpr_num);
     void (*hea_read)(DisasContext *ctx, int gpr_num, int spr_num);
     void (*hea_write)(DisasContext *ctx, int spr_num, int gpr_num);
-    unsigned int gdb_id;
+# endif
 #endif
-    const char *name;
-    target_ulong default_value;
 #ifdef CONFIG_KVM
     /*
      * We (ab)use the fact that all the SPRs will have ids for the
@@ -313,6 +314,7 @@ typedef struct ppc_v3_pate_t {
 #define MSR_AP   23 /* Access privilege state on 602                  hflags */
 #define MSR_VSX  23 /* Vector Scalar Extension (ISA 2.06 and later) x hflags */
 #define MSR_SA   22 /* Supervisor access mode on 602                  hflags */
+#define MSR_S    22 /* Secure state                                          */
 #define MSR_KEY  19 /* key bit on 603e                                       */
 #define MSR_POW  18 /* Power management                                      */
 #define MSR_TGPR 17 /* TGPR usage on 602/603                        x        */
@@ -322,13 +324,13 @@ typedef struct ppc_v3_pate_t {
 #define MSR_PR   14 /* Problem state                                  hflags */
 #define MSR_FP   13 /* Floating point available                       hflags */
 #define MSR_ME   12 /* Machine check interrupt enable                        */
-#define MSR_FE0  11 /* Floating point exception mode 0                hflags */
+#define MSR_FE0  11 /* Floating point exception mode 0                       */
 #define MSR_SE   10 /* Single-step trace enable                     x hflags */
 #define MSR_DWE  10 /* Debug wait enable on 405                     x        */
 #define MSR_UBLE 10 /* User BTB lock enable on e500                 x        */
 #define MSR_BE   9  /* Branch trace enable                          x hflags */
 #define MSR_DE   9  /* Debug interrupts enable on embedded PowerPC  x        */
-#define MSR_FE1  8  /* Floating point exception mode 1                hflags */
+#define MSR_FE1  8  /* Floating point exception mode 1                       */
 #define MSR_AL   7  /* AL bit on POWER                                       */
 #define MSR_EP   6  /* Exception prefix on 601                               */
 #define MSR_IR   5  /* Instruction relocate                                  */
@@ -341,6 +343,26 @@ typedef struct ppc_v3_pate_t {
 #define MSR_RI   1  /* Recoverable interrupt                        1        */
 #define MSR_LE   0  /* Little-endian mode                           1 hflags */
 
+/* PMU bits */
+#define MMCR0_FC     PPC_BIT(32)         /* Freeze Counters  */
+#define MMCR0_PMAO   PPC_BIT(56)         /* Perf Monitor Alert Ocurred */
+#define MMCR0_PMAE   PPC_BIT(37)         /* Perf Monitor Alert Enable */
+#define MMCR0_EBE    PPC_BIT(43)         /* Perf Monitor EBB Enable */
+#define MMCR0_FCECE  PPC_BIT(38)         /* FC on Enabled Cond or Event */
+#define MMCR0_PMCC0  PPC_BIT(44)         /* PMC Control bit 0 */
+#define MMCR0_PMCC1  PPC_BIT(45)         /* PMC Control bit 1 */
+/* MMCR0 userspace r/w mask */
+#define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE)
+/* MMCR2 userspace r/w mask */
+#define MMCR2_FC1P0  PPC_BIT(1)          /* MMCR2 FCnP0 for PMC1 */
+#define MMCR2_FC2P0  PPC_BIT(10)         /* MMCR2 FCnP0 for PMC2 */
+#define MMCR2_FC3P0  PPC_BIT(19)         /* MMCR2 FCnP0 for PMC3 */
+#define MMCR2_FC4P0  PPC_BIT(28)         /* MMCR2 FCnP0 for PMC4 */
+#define MMCR2_FC5P0  PPC_BIT(37)         /* MMCR2 FCnP0 for PMC5 */
+#define MMCR2_FC6P0  PPC_BIT(46)         /* MMCR2 FCnP0 for PMC6 */
+#define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
+                         MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
+
 /* LPCR bits */
 #define LPCR_VPM0         PPC_BIT(0)
 #define LPCR_VPM1         PPC_BIT(1)
@@ -354,10 +376,11 @@ typedef struct ppc_v3_pate_t {
 #define LPCR_PECE_U_SHIFT (63 - 19)
 #define LPCR_PECE_U_MASK  (0x7ull << LPCR_PECE_U_SHIFT)
 #define LPCR_HVEE         PPC_BIT(17) /* Hypervisor Virt Exit Enable */
-#define LPCR_RMLS_SHIFT   (63 - 37)
+#define LPCR_RMLS_SHIFT   (63 - 37)   /* RMLS (removed in ISA v3.0) */
 #define LPCR_RMLS         (0xfull << LPCR_RMLS_SHIFT)
+#define LPCR_HAIL         PPC_BIT(37) /* ISA v3.1 HV AIL=3 equivalent */
 #define LPCR_ILE          PPC_BIT(38)
-#define LPCR_AIL_SHIFT    (63 - 40)      /* Alternate interrupt location */
+#define LPCR_AIL_SHIFT    (63 - 40)   /* Alternate interrupt location */
 #define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
 #define LPCR_UPRT         PPC_BIT(41) /* Use Process Table */
 #define LPCR_EVIRT        PPC_BIT(42) /* Enhanced Virtualisation */
@@ -581,6 +604,37 @@ enum {
     POWERPC_FLAG_TM       = 0x00100000,
     /* Has SCV (ISA 3.00)                                                    */
     POWERPC_FLAG_SCV      = 0x00200000,
+    /* Has HID0 for LE bit (601)                                             */
+    POWERPC_FLAG_HID0_LE  = 0x00400000,
+};
+
+/*
+ * Bits for env->hflags.
+ *
+ * Most of these bits overlap with corresponding bits in MSR,
+ * but some come from other sources.  Those that do come from
+ * the MSR are validated in hreg_compute_hflags.
+ */
+enum {
+    HFLAGS_LE = 0,   /* MSR_LE -- comes from elsewhere on 601 */
+    HFLAGS_HV = 1,   /* computed from MSR_HV and other state */
+    HFLAGS_64 = 2,   /* computed from MSR_CE and MSR_SF */
+    HFLAGS_GTSE = 3, /* computed from SPR_LPCR[GTSE] */
+    HFLAGS_DR = 4,   /* MSR_DR */
+    HFLAGS_HR = 5,   /* computed from SPR_LPCR[HR] */
+    HFLAGS_SPE = 6,  /* from MSR_SPE if cpu has SPE; avoid overlap w/ MSR_VR */
+    HFLAGS_TM = 8,   /* computed from MSR_TM */
+    HFLAGS_BE = 9,   /* MSR_BE -- from elsewhere on embedded ppc */
+    HFLAGS_SE = 10,  /* MSR_SE -- from elsewhere on embedded ppc */
+    HFLAGS_FP = 13,  /* MSR_FP */
+    HFLAGS_PR = 14,  /* MSR_PR */
+    HFLAGS_PMCC0 = 15,  /* MMCR0 PMCC bit 0 */
+    HFLAGS_PMCC1 = 16,  /* MMCR0 PMCC bit 1 */
+    HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
+    HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
+
+    HFLAGS_IMMU_IDX = 26, /* 26..28 -- the composite immu_idx */
+    HFLAGS_DMMU_IDX = 29, /* 29..31 -- the composite dmmu_idx */
 };
 
 /*****************************************************************************/
@@ -646,11 +700,11 @@ enum {
 #define fpscr_ni     (((env->fpscr) >> FPSCR_NI)     & 0x1)
 #define fpscr_rn     (((env->fpscr) >> FPSCR_RN0)    & 0x3)
 /* Invalid operation exception summary */
-#define fpscr_ix ((env->fpscr) & ((1 << FPSCR_VXSNAN) | (1 << FPSCR_VXISI)  | \
-                                  (1 << FPSCR_VXIDI)  | (1 << FPSCR_VXZDZ)  | \
-                                  (1 << FPSCR_VXIMZ)  | (1 << FPSCR_VXVC)   | \
-                                  (1 << FPSCR_VXSOFT) | (1 << FPSCR_VXSQRT) | \
-                                  (1 << FPSCR_VXCVI)))
+#define FPSCR_IX     ((1 << FPSCR_VXSNAN) | (1 << FPSCR_VXISI)  | \
+                      (1 << FPSCR_VXIDI)  | (1 << FPSCR_VXZDZ)  | \
+                      (1 << FPSCR_VXIMZ)  | (1 << FPSCR_VXVC)   | \
+                      (1 << FPSCR_VXSOFT) | (1 << FPSCR_VXSQRT) | \
+                      (1 << FPSCR_VXCVI))
 /* exception summary */
 #define fpscr_ex  (((env->fpscr) >> FPSCR_XX) & 0x1F)
 /* enabled exception summary */
@@ -1102,11 +1156,9 @@ struct CPUPPCState {
     bool resume_as_sreset;
 #endif
 
-    /* These resources are used only in QEMU core */
-    target_ulong hflags;      /* hflags is MSR & HFLAGS_MASK */
-    target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
-    int immu_idx;     /* precomputed MMU index to speed up insn accesses */
-    int dmmu_idx;     /* precomputed MMU index to speed up data accesses */
+    /* These resources are used only in TCG */
+    uint32_t hflags;
+    target_ulong hflags_compat_nmsr; /* for migration compatibility */
 
     /* Power management */
     int (*check_pow)(CPUPPCState *env);
@@ -1226,10 +1278,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
                      PPC_VIRTUAL_HYPERVISOR, TYPE_PPC_VIRTUAL_HYPERVISOR)
 #endif /* CONFIG_USER_ONLY */
 
-void ppc_cpu_do_interrupt(CPUState *cpu);
-bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void ppc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
-void ppc_cpu_dump_statistics(CPUState *cpu, int flags);
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int ppc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int ppc_cpu_gdb_read_register_apple(CPUState *cpu, GByteArray *buf, int reg);
@@ -1244,6 +1293,8 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
 int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque);
 #ifndef CONFIG_USER_ONLY
+void ppc_cpu_do_interrupt(CPUState *cpu);
+bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void ppc_cpu_do_system_reset(CPUState *cs);
 void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector);
 extern const VMStateDescription vmstate_ppc_cpu;
@@ -1251,21 +1302,12 @@ extern const VMStateDescription vmstate_ppc_cpu;
 
 /*****************************************************************************/
 void ppc_translate_init(void);
-/*
- * you can call this signal handler from your SIGBUS and SIGSEGV
- * signal handlers to inform the virtual CPU of exceptions. non zero
- * is returned if the signal was handled by the virtual CPU.
- */
-int cpu_ppc_signal_handler(int host_signum, void *pinfo, void *puc);
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                      MMUAccessType access_type, int mmu_idx,
-                      bool probe, uintptr_t retaddr);
 
 #if !defined(CONFIG_USER_ONLY)
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
-void ppc_store_ptcr(CPUPPCState *env, target_ulong value);
 #endif /* !defined(CONFIG_USER_ONLY) */
 void ppc_store_msr(CPUPPCState *env, target_ulong value);
+void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
 
 void ppc_cpu_list(void);
 
@@ -1303,10 +1345,19 @@ void store_booke_tsr(CPUPPCState *env, target_ulong val);
 void ppc_tlb_invalidate_all(CPUPPCState *env);
 void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr);
 void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
+int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb,
+                            hwaddr *raddrp, target_ulong address,
+                            uint32_t pid);
+int ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb,
+                            hwaddr *raddrp,
+                            target_ulong address, uint32_t pid, int ext,
+                            int i);
+hwaddr booke206_tlb_to_page_size(CPUPPCState *env,
+                                        ppcmas_tlb_t *tlb);
 #endif
 #endif
 
-void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
+void ppc_store_fpscr(CPUPPCState *env, target_ulong val);
 void helper_hfscr_facility_check(CPUPPCState *env, uint32_t bit,
                                  const char *caller, uint32_t cause);
 
@@ -1335,14 +1386,17 @@ int ppc_dcr_write(ppc_dcr_t *dcr_env, int dcrn, uint32_t val);
 #define POWERPC_CPU_TYPE_NAME(model) model POWERPC_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU
 
-#define cpu_signal_handler cpu_ppc_signal_handler
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
 #define MMU_USER_IDX 0
 static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
 {
-    return ifetch ? env->immu_idx : env->dmmu_idx;
+#ifdef CONFIG_USER_ONLY
+    return MMU_USER_IDX;
+#else
+    return (env->hflags >> (ifetch ? HFLAGS_IMMU_IDX : HFLAGS_DMMU_IDX)) & 7;
+#endif
 }
 
 /* Compatibility modes */
@@ -1459,10 +1513,10 @@ typedef PowerPCCPU ArchCPU;
 #define SPR_MPC_BAR           (0x09F)
 #define SPR_PSPB              (0x09F)
 #define SPR_DPDES             (0x0B0)
-#define SPR_DAWR              (0x0B4)
+#define SPR_DAWR0             (0x0B4)
 #define SPR_RPR               (0x0BA)
 #define SPR_CIABR             (0x0BB)
-#define SPR_DAWRX             (0x0BC)
+#define SPR_DAWRX0            (0x0BC)
 #define SPR_HFSCR             (0x0BE)
 #define SPR_VRSAVE            (0x100)
 #define SPR_USPRG0            (0x100)
@@ -2375,18 +2429,10 @@ enum {
     HMER_XSCOM_STATUS_MASK      = PPC_BITMASK(21, 23),
 };
 
-/* Alternate Interrupt Location (AIL) */
-enum {
-    AIL_NONE                = 0,
-    AIL_RESERVED            = 1,
-    AIL_0001_8000           = 2,
-    AIL_C000_0000_0000_4000 = 3,
-};
-
 /*****************************************************************************/
 
 #define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300))
-target_ulong cpu_read_xer(CPUPPCState *env);
+target_ulong cpu_read_xer(const CPUPPCState *env);
 void cpu_write_xer(CPUPPCState *env, target_ulong xer);
 
 /*
@@ -2395,6 +2441,10 @@ void cpu_write_xer(CPUPPCState *env, target_ulong xer);
  */
 #define is_book3s_arch2x(ctx) (!!((ctx)->insns_flags & PPC_SEGMENT_64B))
 
+#ifdef CONFIG_DEBUG_TCG
+void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
+                          target_ulong *cs_base, uint32_t *flags);
+#else
 static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
@@ -2402,6 +2452,7 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
     *cs_base = 0;
     *flags = env->hflags;
 }
+#endif
 
 void QEMU_NORETURN raise_exception(CPUPPCState *env, uint32_t exception);
 void QEMU_NORETURN raise_exception_ra(CPUPPCState *env, uint32_t exception,
@@ -2609,7 +2660,30 @@ static inline ppc_avr_t *cpu_avr_ptr(CPUPPCState *env, int i)
     return (ppc_avr_t *)((uintptr_t)env + avr_full_offset(i));
 }
 
+static inline bool ppc_has_spr(PowerPCCPU *cpu, int spr)
+{
+    /* We can test whether the SPR is defined by checking for a valid name */
+    return cpu->env.spr_cb[spr].name != NULL;
+}
+
+static inline bool ppc_interrupts_little_endian(PowerPCCPU *cpu)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    /*
+     * Only models that have an LPCR and know about LPCR_ILE can do little
+     * endian.
+     */
+    if (pcc->lpcr_mask & LPCR_ILE) {
+        return !!(cpu->env.spr[SPR_LPCR] & LPCR_ILE);
+    }
+
+    return false;
+}
+
 void dump_mmu(CPUPPCState *env);
 
 void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len);
+void ppc_store_vscr(CPUPPCState *env, uint32_t vscr);
+uint32_t ppc_get_vscr(CPUPPCState *env);
 #endif /* PPC_CPU_H */
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
new file mode 100644
index 00000000000..6695985e9b5
--- /dev/null
+++ b/target/ppc/cpu_init.c
@@ -0,0 +1,9291 @@
+/*
+ *  PowerPC CPU initialization for qemu.
+ *
+ *  Copyright (c) 2003-2007 Jocelyn Mayer
+ *  Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "disas/dis-asm.h"
+#include "exec/gdbstub.h"
+#include "kvm_ppc.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/tcg.h"
+#include "cpu-models.h"
+#include "mmu-hash32.h"
+#include "mmu-hash64.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qemu/qemu-print.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qnull.h"
+#include "qapi/visitor.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/ppc.h"
+#include "mmu-book3s-v3.h"
+#include "qemu/cutils.h"
+#include "disas/capstone.h"
+#include "fpu/softfloat.h"
+#include "qapi/qapi-commands-machine-target.h"
+
+#include "helper_regs.h"
+#include "internal.h"
+#include "spr_tcg.h"
+
+/* #define PPC_DEBUG_SPR */
+/* #define USE_APPLE_GDB */
+
+static inline void vscr_init(CPUPPCState *env, uint32_t val)
+{
+    /* Altivec always uses round-to-nearest */
+    set_float_rounding_mode(float_round_nearest_even, &env->vec_status);
+    ppc_store_vscr(env, val);
+}
+
+/**
+ * _spr_register
+ *
+ * Register an SPR with all the callbacks required for tcg,
+ * and the ID number for KVM.
+ *
+ * The reason for the conditional compilation is that the tcg functions
+ * may be compiled out, and the system kvm header may not be available
+ * for supplying the ID numbers.  This is ugly, but the best we can do.
+ */
+
+#ifdef CONFIG_TCG
+# define USR_ARG(X)    X,
+# ifdef CONFIG_USER_ONLY
+#  define SYS_ARG(X)
+# else
+#  define SYS_ARG(X)   X,
+# endif
+#else
+# define USR_ARG(X)
+# define SYS_ARG(X)
+#endif
+#ifdef CONFIG_KVM
+# define KVM_ARG(X)    X,
+#else
+# define KVM_ARG(X)
+#endif
+
+typedef void spr_callback(DisasContext *, int, int);
+
+static void _spr_register(CPUPPCState *env, int num, const char *name,
+                          USR_ARG(spr_callback *uea_read)
+                          USR_ARG(spr_callback *uea_write)
+                          SYS_ARG(spr_callback *oea_read)
+                          SYS_ARG(spr_callback *oea_write)
+                          SYS_ARG(spr_callback *hea_read)
+                          SYS_ARG(spr_callback *hea_write)
+                          KVM_ARG(uint64_t one_reg_id)
+                          target_ulong initial_value)
+{
+    ppc_spr_t *spr = &env->spr_cb[num];
+
+    /* No SPR should be registered twice. */
+    assert(spr->name == NULL);
+    assert(name != NULL);
+
+    spr->name = name;
+    spr->default_value = initial_value;
+    env->spr[num] = initial_value;
+
+#ifdef CONFIG_TCG
+    spr->uea_read = uea_read;
+    spr->uea_write = uea_write;
+# ifndef CONFIG_USER_ONLY
+    spr->oea_read = oea_read;
+    spr->oea_write = oea_write;
+    spr->hea_read = hea_read;
+    spr->hea_write = hea_write;
+# endif
+#endif
+#ifdef CONFIG_KVM
+    spr->one_reg_id = one_reg_id;
+#endif
+}
+
+/* spr_register_kvm_hv passes all required arguments. */
+#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,             \
+                            oea_read, oea_write, hea_read, hea_write,        \
+                            one_reg_id, initial_value)                       \
+    _spr_register(env, num, name,                                            \
+                  USR_ARG(uea_read) USR_ARG(uea_write)                       \
+                  SYS_ARG(oea_read) SYS_ARG(oea_write)                       \
+                  SYS_ARG(hea_read) SYS_ARG(hea_write)                       \
+                  KVM_ARG(one_reg_id) initial_value)
+
+/* spr_register_kvm duplicates the oea callbacks to the hea callbacks. */
+#define spr_register_kvm(env, num, name, uea_read, uea_write,                \
+                         oea_read, oea_write, one_reg_id, ival)              \
+    spr_register_kvm_hv(env, num, name, uea_read, uea_write, oea_read,       \
+                        oea_write, oea_read, oea_write, one_reg_id, ival)
+
+/* spr_register_hv and spr_register are similar, except there is no kvm id. */
+#define spr_register_hv(env, num, name, uea_read, uea_write,                 \
+                        oea_read, oea_write, hea_read, hea_write, ival)      \
+    spr_register_kvm_hv(env, num, name, uea_read, uea_write, oea_read,       \
+                        oea_write, hea_read, hea_write, 0, ival)
+
+#define spr_register(env, num, name, uea_read, uea_write,                    \
+                     oea_read, oea_write, ival)                              \
+    spr_register_kvm(env, num, name, uea_read, uea_write,                    \
+                     oea_read, oea_write, 0, ival)
+
+/* Generic PowerPC SPRs */
+static void register_generic_sprs(CPUPPCState *env)
+{
+    /* Integer processing */
+    spr_register(env, SPR_XER, "XER",
+                 &spr_read_xer, &spr_write_xer,
+                 &spr_read_xer, &spr_write_xer,
+                 0x00000000);
+    /* Branch control */
+    spr_register(env, SPR_LR, "LR",
+                 &spr_read_lr, &spr_write_lr,
+                 &spr_read_lr, &spr_write_lr,
+                 0x00000000);
+    spr_register(env, SPR_CTR, "CTR",
+                 &spr_read_ctr, &spr_write_ctr,
+                 &spr_read_ctr, &spr_write_ctr,
+                 0x00000000);
+    /* Interrupt processing */
+    spr_register(env, SPR_SRR0, "SRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SRR1, "SRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Processor control */
+    spr_register(env, SPR_SPRG0, "SPRG0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG1, "SPRG1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG2, "SPRG2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG3, "SPRG3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR common to all non-embedded PowerPC, including 601 */
+static void register_ne_601_sprs(CPUPPCState *env)
+{
+    /* Exception processing */
+    spr_register_kvm(env, SPR_DSISR, "DSISR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DSISR, 0x00000000);
+    spr_register_kvm(env, SPR_DAR, "DAR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DAR, 0x00000000);
+    /* Timer */
+    spr_register(env, SPR_DECR, "DECR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_decr, &spr_write_decr,
+                 0x00000000);
+}
+
+/* Storage Description Register 1 */
+static void register_sdr1_sprs(CPUPPCState *env)
+{
+#ifndef CONFIG_USER_ONLY
+    if (env->has_hv_mode) {
+        /*
+         * SDR1 is a hypervisor resource on CPUs which have a
+         * hypervisor mode
+         */
+        spr_register_hv(env, SPR_SDR1, "SDR1",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_sdr1,
+                        0x00000000);
+    } else {
+        spr_register(env, SPR_SDR1, "SDR1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_sdr1,
+                     0x00000000);
+    }
+#endif
+}
+
+/* BATs 0-3 */
+static void register_low_BATs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register(env, SPR_IBAT0U, "IBAT0U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT0L, "IBAT0L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT1U, "IBAT1U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT1L, "IBAT1L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT2U, "IBAT2U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT2L, "IBAT2L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT3U, "IBAT3U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT3L, "IBAT3L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat, &spr_write_ibatl,
+                 0x00000000);
+    spr_register(env, SPR_DBAT0U, "DBAT0U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatu,
+                 0x00000000);
+    spr_register(env, SPR_DBAT0L, "DBAT0L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatl,
+                 0x00000000);
+    spr_register(env, SPR_DBAT1U, "DBAT1U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatu,
+                 0x00000000);
+    spr_register(env, SPR_DBAT1L, "DBAT1L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatl,
+                 0x00000000);
+    spr_register(env, SPR_DBAT2U, "DBAT2U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatu,
+                 0x00000000);
+    spr_register(env, SPR_DBAT2L, "DBAT2L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatl,
+                 0x00000000);
+    spr_register(env, SPR_DBAT3U, "DBAT3U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatu,
+                 0x00000000);
+    spr_register(env, SPR_DBAT3L, "DBAT3L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat, &spr_write_dbatl,
+                 0x00000000);
+    env->nb_BATs += 4;
+#endif
+}
+
+/* BATs 4-7 */
+static void register_high_BATs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register(env, SPR_IBAT4U, "IBAT4U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatu_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT4L, "IBAT4L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatl_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT5U, "IBAT5U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatu_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT5L, "IBAT5L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatl_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT6U, "IBAT6U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatu_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT6L, "IBAT6L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatl_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT7U, "IBAT7U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatu_h,
+                 0x00000000);
+    spr_register(env, SPR_IBAT7L, "IBAT7L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_ibat_h, &spr_write_ibatl_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT4U, "DBAT4U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatu_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT4L, "DBAT4L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatl_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT5U, "DBAT5U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatu_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT5L, "DBAT5L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatl_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT6U, "DBAT6U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatu_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT6L, "DBAT6L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatl_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT7U, "DBAT7U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatu_h,
+                 0x00000000);
+    spr_register(env, SPR_DBAT7L, "DBAT7L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_dbat_h, &spr_write_dbatl_h,
+                 0x00000000);
+    env->nb_BATs += 4;
+#endif
+}
+
+/* Generic PowerPC time base */
+static void register_tbl(CPUPPCState *env)
+{
+    spr_register(env, SPR_VTBL,  "TBL",
+                 &spr_read_tbl, SPR_NOACCESS,
+                 &spr_read_tbl, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_TBL,   "TBL",
+                 &spr_read_tbl, SPR_NOACCESS,
+                 &spr_read_tbl, &spr_write_tbl,
+                 0x00000000);
+    spr_register(env, SPR_VTBU,  "TBU",
+                 &spr_read_tbu, SPR_NOACCESS,
+                 &spr_read_tbu, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_TBU,   "TBU",
+                 &spr_read_tbu, SPR_NOACCESS,
+                 &spr_read_tbu, &spr_write_tbu,
+                 0x00000000);
+}
+
+/* Softare table search registers */
+static void register_6xx_7xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = nb_tlbs;
+    env->nb_ways = nb_ways;
+    env->id_tlbs = 1;
+    env->tlb_type = TLB_6XX;
+    spr_register(env, SPR_DMISS, "DMISS",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_DCMP, "DCMP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_HASH1, "HASH1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_HASH2, "HASH2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_IMISS, "IMISS",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_ICMP, "ICMP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_RPA, "RPA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+#endif
+}
+
+/* SPR common to MPC755 and G2 */
+static void register_G2_755_sprs(CPUPPCState *env)
+{
+    /* SGPRs */
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR common to all 7xx PowerPC implementations */
+static void register_7xx_sprs(CPUPPCState *env)
+{
+    /* Breakpoints */
+    /* XXX : not implemented */
+    spr_register_kvm(env, SPR_DABR, "DABR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DABR, 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Cache management */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTC, "ICTC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Performance monitors */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_MMCR0, "MMCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_MMCR1, "MMCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC1, "PMC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC2, "PMC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC3, "PMC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC4, "PMC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_SIAR, "SIAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UMMCR0, "UMMCR0",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UMMCR1, "UMMCR1",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC1, "UPMC1",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC2, "UPMC2",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC3, "UPMC3",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC4, "UPMC4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_USIAR, "USIAR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+#ifdef TARGET_PPC64
+static void register_amr_sprs(CPUPPCState *env)
+{
+#ifndef CONFIG_USER_ONLY
+    /*
+     * Virtual Page Class Key protection
+     *
+     * The AMR is accessible either via SPR 13 or SPR 29.  13 is
+     * userspace accessible, 29 is privileged.  So we only need to set
+     * the kvm ONE_REG id on one of them, we use 29
+     */
+    spr_register(env, SPR_UAMR, "UAMR",
+                 &spr_read_generic, &spr_write_amr,
+                 &spr_read_generic, &spr_write_amr,
+                 0);
+    spr_register_kvm_hv(env, SPR_AMR, "AMR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_amr,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_AMR, 0);
+    spr_register_kvm_hv(env, SPR_UAMOR, "UAMOR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_uamor,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_UAMOR, 0);
+    spr_register_hv(env, SPR_AMOR, "AMOR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+#endif /* !CONFIG_USER_ONLY */
+}
+
+static void register_iamr_sprs(CPUPPCState *env)
+{
+#ifndef CONFIG_USER_ONLY
+    spr_register_kvm_hv(env, SPR_IAMR, "IAMR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_iamr,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_IAMR, 0);
+#endif /* !CONFIG_USER_ONLY */
+}
+#endif /* TARGET_PPC64 */
+
+static void register_thrm_sprs(CPUPPCState *env)
+{
+    /* Thermal management */
+    /* XXX : not implemented */
+    spr_register(env, SPR_THRM1, "THRM1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_thrm, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_THRM2, "THRM2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_thrm, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_THRM3, "THRM3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_thrm, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 604 implementation */
+static void register_604_sprs(CPUPPCState *env)
+{
+    /* Processor identification */
+    spr_register(env, SPR_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* Breakpoints */
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register_kvm(env, SPR_DABR, "DABR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DABR, 0x00000000);
+    /* Performance counters */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_MMCR0, "MMCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC1, "PMC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC2, "PMC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_SIAR, "SIAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_SDA, "SDA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 603 implementation */
+static void register_603_sprs(CPUPPCState *env)
+{
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Breakpoints */
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+
+}
+
+/* SPR specific to PowerPC G2 implementation */
+static void register_G2_sprs(CPUPPCState *env)
+{
+    /* Memory base address */
+    /* MBAR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MBAR, "MBAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Exception processing */
+    spr_register(env, SPR_BOOKE_CSRR0, "CSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_CSRR1, "CSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Breakpoints */
+    /* XXX : not implemented */
+    spr_register(env, SPR_DABR, "DABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_DABR2, "DABR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR2, "IABR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IBCR, "IBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_DBCR, "DBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 602 implementation */
+static void register_602_sprs(CPUPPCState *env)
+{
+    /* ESA registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_SER, "SER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_SEBR, "SEBR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_ESASRR, "ESASRR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Floating point status */
+    /* XXX : not implemented */
+    spr_register(env, SPR_SP, "SP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_LT, "LT",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Watchdog timer */
+    /* XXX : not implemented */
+    spr_register(env, SPR_TCR, "TCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Interrupt base */
+    spr_register(env, SPR_IBR, "IBR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 601 implementation */
+static void register_601_sprs(CPUPPCState *env)
+{
+    /* Multiplication/division register */
+    /* MQ */
+    spr_register(env, SPR_MQ, "MQ",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* RTC registers */
+    spr_register(env, SPR_601_RTCU, "RTCU",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_601_rtcu,
+                 0x00000000);
+    spr_register(env, SPR_601_VRTCU, "RTCU",
+                 &spr_read_601_rtcu, SPR_NOACCESS,
+                 &spr_read_601_rtcu, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_601_RTCL, "RTCL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_601_rtcl,
+                 0x00000000);
+    spr_register(env, SPR_601_VRTCL, "RTCL",
+                 &spr_read_601_rtcl, SPR_NOACCESS,
+                 &spr_read_601_rtcl, SPR_NOACCESS,
+                 0x00000000);
+    /* Timer */
+#if 0 /* ? */
+    spr_register(env, SPR_601_UDECR, "UDECR",
+                 &spr_read_decr, SPR_NOACCESS,
+                 &spr_read_decr, SPR_NOACCESS,
+                 0x00000000);
+#endif
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    spr_register(env, SPR_IBAT0U, "IBAT0U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT0L, "IBAT0L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT1U, "IBAT1U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT1L, "IBAT1L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT2U, "IBAT2U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT2L, "IBAT2L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatl,
+                 0x00000000);
+    spr_register(env, SPR_IBAT3U, "IBAT3U",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatu,
+                 0x00000000);
+    spr_register(env, SPR_IBAT3L, "IBAT3L",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_601_ubat, &spr_write_601_ubatl,
+                 0x00000000);
+    env->nb_BATs = 4;
+#endif
+}
+
+static void register_74xx_sprs(CPUPPCState *env)
+{
+    /* Processor identification */
+    spr_register(env, SPR_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_74XX_MMCR2, "MMCR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_74XX_UMMCR2, "UMMCR2",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX: not implemented */
+    spr_register(env, SPR_BAMR, "BAMR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSCR0, "MSSCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Altivec */
+    spr_register(env, SPR_VRSAVE, "VRSAVE",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+}
+
+static void register_l3_ctrl(CPUPPCState *env)
+{
+    /* L3CR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3CR, "L3CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3ITCR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR0, "L3ITCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3PM */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3PM, "L3PM",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_74xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = nb_tlbs;
+    env->nb_ways = nb_ways;
+    env->id_tlbs = 1;
+    env->tlb_type = TLB_6XX;
+    /* XXX : not implemented */
+    spr_register(env, SPR_PTEHI, "PTEHI",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_PTELO, "PTELO",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_TLBMISS, "TLBMISS",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+#endif
+}
+
+static void register_usprg3_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_USPRG3, "USPRG3",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+}
+
+static void register_usprgh_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_USPRG4, "USPRG4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_USPRG5, "USPRG5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_USPRG6, "USPRG6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_USPRG7, "USPRG7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+}
+
+/* PowerPC BookE SPR */
+static void register_BookE_sprs(CPUPPCState *env, uint64_t ivor_mask)
+{
+    const char *ivor_names[64] = {
+        "IVOR0",  "IVOR1",  "IVOR2",  "IVOR3",
+        "IVOR4",  "IVOR5",  "IVOR6",  "IVOR7",
+        "IVOR8",  "IVOR9",  "IVOR10", "IVOR11",
+        "IVOR12", "IVOR13", "IVOR14", "IVOR15",
+        "IVOR16", "IVOR17", "IVOR18", "IVOR19",
+        "IVOR20", "IVOR21", "IVOR22", "IVOR23",
+        "IVOR24", "IVOR25", "IVOR26", "IVOR27",
+        "IVOR28", "IVOR29", "IVOR30", "IVOR31",
+        "IVOR32", "IVOR33", "IVOR34", "IVOR35",
+        "IVOR36", "IVOR37", "IVOR38", "IVOR39",
+        "IVOR40", "IVOR41", "IVOR42", "IVOR43",
+        "IVOR44", "IVOR45", "IVOR46", "IVOR47",
+        "IVOR48", "IVOR49", "IVOR50", "IVOR51",
+        "IVOR52", "IVOR53", "IVOR54", "IVOR55",
+        "IVOR56", "IVOR57", "IVOR58", "IVOR59",
+        "IVOR60", "IVOR61", "IVOR62", "IVOR63",
+    };
+#define SPR_BOOKE_IVORxx (-1)
+    int ivor_sprn[64] = {
+        SPR_BOOKE_IVOR0,  SPR_BOOKE_IVOR1,  SPR_BOOKE_IVOR2,  SPR_BOOKE_IVOR3,
+        SPR_BOOKE_IVOR4,  SPR_BOOKE_IVOR5,  SPR_BOOKE_IVOR6,  SPR_BOOKE_IVOR7,
+        SPR_BOOKE_IVOR8,  SPR_BOOKE_IVOR9,  SPR_BOOKE_IVOR10, SPR_BOOKE_IVOR11,
+        SPR_BOOKE_IVOR12, SPR_BOOKE_IVOR13, SPR_BOOKE_IVOR14, SPR_BOOKE_IVOR15,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVOR32, SPR_BOOKE_IVOR33, SPR_BOOKE_IVOR34, SPR_BOOKE_IVOR35,
+        SPR_BOOKE_IVOR36, SPR_BOOKE_IVOR37, SPR_BOOKE_IVOR38, SPR_BOOKE_IVOR39,
+        SPR_BOOKE_IVOR40, SPR_BOOKE_IVOR41, SPR_BOOKE_IVOR42, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
+    };
+    int i;
+
+    /* Interrupt processing */
+    spr_register(env, SPR_BOOKE_CSRR0, "CSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_CSRR1, "CSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Debug */
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC1, "IAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC2, "IAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DAC1, "DAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DAC2, "DAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DBCR0, "DBCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_dbcr0,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DBCR1, "DBCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DBCR2, "DBCR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DBSR, "DBSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_clear,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_DEAR, "DEAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_ESR, "ESR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_IVPR, "IVPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_excp_prefix,
+                 0x00000000);
+    /* Exception vectors */
+    for (i = 0; i < 64; i++) {
+        if (ivor_mask & (1ULL << i)) {
+            if (ivor_sprn[i] == SPR_BOOKE_IVORxx) {
+                fprintf(stderr, "ERROR: IVOR %d SPR is not defined\n", i);
+                exit(1);
+            }
+            spr_register(env, ivor_sprn[i], ivor_names[i],
+                         SPR_NOACCESS, SPR_NOACCESS,
+                         &spr_read_generic, &spr_write_excp_vector,
+                         0x00000000);
+        }
+    }
+    spr_register(env, SPR_BOOKE_PID, "PID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke_pid,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_TCR, "TCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke_tcr,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_TSR, "TSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke_tsr,
+                 0x00000000);
+    /* Timer */
+    spr_register(env, SPR_DECR, "DECR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_decr, &spr_write_decr,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_DECAR, "DECAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_generic,
+                 0x00000000);
+    /* SPRGs */
+    spr_register(env, SPR_USPRG0, "USPRG0",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_SPRG8, "SPRG8",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_SPRG9, "SPRG9",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+static inline uint32_t register_tlbncfg(uint32_t assoc, uint32_t minsize,
+                                   uint32_t maxsize, uint32_t flags,
+                                   uint32_t nentries)
+{
+    return (assoc << TLBnCFG_ASSOC_SHIFT) |
+           (minsize << TLBnCFG_MINSIZE_SHIFT) |
+           (maxsize << TLBnCFG_MAXSIZE_SHIFT) |
+           flags | nentries;
+}
+#endif /* !CONFIG_USER_ONLY */
+
+/* BookE 2.06 storage control registers */
+static void register_BookE206_sprs(CPUPPCState *env, uint32_t mas_mask,
+                             uint32_t *tlbncfg, uint32_t mmucfg)
+{
+#if !defined(CONFIG_USER_ONLY)
+    const char *mas_names[8] = {
+        "MAS0", "MAS1", "MAS2", "MAS3", "MAS4", "MAS5", "MAS6", "MAS7",
+    };
+    int mas_sprn[8] = {
+        SPR_BOOKE_MAS0, SPR_BOOKE_MAS1, SPR_BOOKE_MAS2, SPR_BOOKE_MAS3,
+        SPR_BOOKE_MAS4, SPR_BOOKE_MAS5, SPR_BOOKE_MAS6, SPR_BOOKE_MAS7,
+    };
+    int i;
+
+    /* TLB assist registers */
+    /* XXX : not implemented */
+    for (i = 0; i < 8; i++) {
+        if (mas_mask & (1 << i)) {
+            spr_register(env, mas_sprn[i], mas_names[i],
+                         SPR_NOACCESS, SPR_NOACCESS,
+                         &spr_read_generic,
+                         (i == 2 && (env->insns_flags & PPC_64B))
+                         ? &spr_write_generic : &spr_write_generic32,
+                         0x00000000);
+        }
+    }
+    if (env->nb_pids > 1) {
+        /* XXX : not implemented */
+        spr_register(env, SPR_BOOKE_PID1, "PID1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_booke_pid,
+                     0x00000000);
+    }
+    if (env->nb_pids > 2) {
+        /* XXX : not implemented */
+        spr_register(env, SPR_BOOKE_PID2, "PID2",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_booke_pid,
+                     0x00000000);
+    }
+
+    spr_register(env, SPR_BOOKE_EPLC, "EPLC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_eplc,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_EPSC, "EPSC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_epsc,
+                 0x00000000);
+
+    /* XXX : not implemented */
+    spr_register(env, SPR_MMUCFG, "MMUCFG",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 mmucfg);
+    switch (env->nb_ways) {
+    case 4:
+        spr_register(env, SPR_BOOKE_TLB3CFG, "TLB3CFG",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     tlbncfg[3]);
+        /* Fallthru */
+    case 3:
+        spr_register(env, SPR_BOOKE_TLB2CFG, "TLB2CFG",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     tlbncfg[2]);
+        /* Fallthru */
+    case 2:
+        spr_register(env, SPR_BOOKE_TLB1CFG, "TLB1CFG",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     tlbncfg[1]);
+        /* Fallthru */
+    case 1:
+        spr_register(env, SPR_BOOKE_TLB0CFG, "TLB0CFG",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     tlbncfg[0]);
+        /* Fallthru */
+    case 0:
+    default:
+        break;
+    }
+#endif
+
+    register_usprgh_sprs(env);
+}
+
+/* SPR specific to PowerPC 440 implementation */
+static void register_440_sprs(CPUPPCState *env)
+{
+    /* Cache control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DNV0, "DNV0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DNV1, "DNV1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DNV2, "DNV2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DNV3, "DNV3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DTV0, "DTV0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DTV1, "DTV1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DTV2, "DTV2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DTV3, "DTV3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DVLIM, "DVLIM",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_INV0, "INV0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_INV1, "INV1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_INV2, "INV2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_INV3, "INV3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_ITV0, "ITV0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_ITV1, "ITV1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_ITV2, "ITV2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_ITV3, "ITV3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_IVLIM, "IVLIM",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Cache debug */
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DCDBTRH, "DCDBTRH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DCDBTRL, "DCDBTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_ICDBDR, "ICDBDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_ICDBTRH, "ICDBTRH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_ICDBTRL, "ICDBTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_DBDR, "DBDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Processor control */
+    spr_register(env, SPR_4xx_CCR0, "CCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_440_RSTCFG, "RSTCFG",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* Storage control */
+    spr_register(env, SPR_440_MMUCR, "MMUCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR shared between PowerPC 40x implementations */
+static void register_40x_sprs(CPUPPCState *env)
+{
+    /* Cache */
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_40x_DCCR, "DCCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_40x_ICCR, "ICCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_BOOKE_ICDBDR, "ICDBDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* Exception */
+    spr_register(env, SPR_40x_DEAR, "DEAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_ESR, "ESR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_EVPR, "EVPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_excp_prefix,
+                 0x00000000);
+    spr_register(env, SPR_40x_SRR2, "SRR2",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_SRR3, "SRR3",
+                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Timers */
+    spr_register(env, SPR_40x_PIT, "PIT",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_40x_pit, &spr_write_40x_pit,
+                 0x00000000);
+    spr_register(env, SPR_40x_TCR, "TCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke_tcr,
+                 0x00000000);
+    spr_register(env, SPR_40x_TSR, "TSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke_tsr,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 405 implementation */
+static void register_405_sprs(CPUPPCState *env)
+{
+    /* MMU */
+    spr_register(env, SPR_40x_PID, "PID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_4xx_CCR0, "CCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00700000);
+    /* Debug interface */
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBCR0, "DBCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_dbcr0,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_DBCR1, "DBCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBSR, "DBSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_clear,
+                 /* Last reset was system reset */
+                 0x00000300);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DAC1, "DAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_DAC2, "DAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_DVC1, "DVC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_DVC2, "DVC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_IAC1, "IAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_IAC2, "IAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Storage control */
+    /* XXX: TODO: not implemented */
+    spr_register(env, SPR_405_SLER, "SLER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_sler,
+                 0x00000000);
+    spr_register(env, SPR_40x_ZPR, "ZPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_405_SU0R, "SU0R",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* SPRG */
+    spr_register(env, SPR_USPRG0, "USPRG0",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    register_usprgh_sprs(env);
+}
+
+/* SPR shared between PowerPC 401 & 403 implementations */
+static void register_401_403_sprs(CPUPPCState *env)
+{
+    /* Time base */
+    spr_register(env, SPR_403_VTBL,  "TBL",
+                 &spr_read_tbl, SPR_NOACCESS,
+                 &spr_read_tbl, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_403_TBL,   "TBL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_tbl,
+                 0x00000000);
+    spr_register(env, SPR_403_VTBU,  "TBU",
+                 &spr_read_tbu, SPR_NOACCESS,
+                 &spr_read_tbu, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_403_TBU,   "TBU",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_tbu,
+                 0x00000000);
+    /* Debug */
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_403_CDBCR, "CDBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 401 implementation */
+static void register_401_sprs(CPUPPCState *env)
+{
+    /* Debug interface */
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBCR0, "DBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_dbcr0,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBSR, "DBSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_clear,
+                 /* Last reset was system reset */
+                 0x00000300);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DAC1, "DAC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_IAC1, "IAC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Storage control */
+    /* XXX: TODO: not implemented */
+    spr_register(env, SPR_405_SLER, "SLER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_sler,
+                 0x00000000);
+    /* not emulated, as QEMU never does speculative access */
+    spr_register(env, SPR_40x_SGR, "SGR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0xFFFFFFFF);
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_40x_DCWR, "DCWR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_401x2_sprs(CPUPPCState *env)
+{
+    register_401_sprs(env);
+    spr_register(env, SPR_40x_PID, "PID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_ZPR, "ZPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC 403 implementation */
+static void register_403_sprs(CPUPPCState *env)
+{
+    /* Debug interface */
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBCR0, "DBCR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_40x_dbcr0,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DBSR, "DBSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_clear,
+                 /* Last reset was system reset */
+                 0x00000300);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DAC1, "DAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_DAC2, "DAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_IAC1, "IAC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_40x_IAC2, "IAC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_403_real_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_403_PBL1,  "PBL1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_403_pbr, &spr_write_403_pbr,
+                 0x00000000);
+    spr_register(env, SPR_403_PBU1,  "PBU1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_403_pbr, &spr_write_403_pbr,
+                 0x00000000);
+    spr_register(env, SPR_403_PBL2,  "PBL2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_403_pbr, &spr_write_403_pbr,
+                 0x00000000);
+    spr_register(env, SPR_403_PBU2,  "PBU2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_403_pbr, &spr_write_403_pbr,
+                 0x00000000);
+}
+
+static void register_403_mmu_sprs(CPUPPCState *env)
+{
+    /* MMU */
+    spr_register(env, SPR_40x_PID, "PID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_40x_ZPR, "ZPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/* SPR specific to PowerPC compression coprocessor extension */
+static void register_compress_sprs(CPUPPCState *env)
+{
+    /* XXX : not implemented */
+    spr_register(env, SPR_401_SKR, "SKR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_5xx_8xx_sprs(CPUPPCState *env)
+{
+    /* Exception processing */
+    spr_register_kvm(env, SPR_DSISR, "DSISR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DSISR, 0x00000000);
+    spr_register_kvm(env, SPR_DAR, "DAR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DAR, 0x00000000);
+    /* Timer */
+    spr_register(env, SPR_DECR, "DECR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_decr, &spr_write_decr,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_EIE, "EIE",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_EID, "EID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_NRI, "NRI",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPA, "CMPA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPB, "CMPB",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPC, "CMPC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPD, "CMPD",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_ECR, "ECR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_DER, "DER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_COUNTA, "COUNTA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_COUNTB, "COUNTB",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPE, "CMPE",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPF, "CMPF",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPG, "CMPG",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_CMPH, "CMPH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_LCTRL1, "LCTRL1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_LCTRL2, "LCTRL2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_BAR, "BAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_DPDR, "DPDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_IMMR, "IMMR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_5xx_sprs(CPUPPCState *env)
+{
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_GRA, "MI_GRA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_GRA, "L2U_GRA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RPCU_BBCMCR, "L2U_BBCMCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_MCR, "L2U_MCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RBA0, "MI_RBA0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RBA1, "MI_RBA1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RBA2, "MI_RBA2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RBA3, "MI_RBA3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RBA0, "L2U_RBA0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RBA1, "L2U_RBA1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RBA2, "L2U_RBA2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RBA3, "L2U_RBA3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RA0, "MI_RA0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RA1, "MI_RA1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RA2, "MI_RA2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_MI_RA3, "MI_RA3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RA0, "L2U_RA0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RA1, "L2U_RA1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RA2, "L2U_RA2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_L2U_RA3, "L2U_RA3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_RCPU_FPECR, "FPECR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_8xx_sprs(CPUPPCState *env)
+{
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_IC_CST, "IC_CST",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_IC_ADR, "IC_ADR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_IC_DAT, "IC_DAT",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_DC_CST, "DC_CST",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_DC_ADR, "DC_ADR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_DC_DAT, "DC_DAT",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_CTR, "MI_CTR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_AP, "MI_AP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_EPN, "MI_EPN",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_TWC, "MI_TWC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_RPN, "MI_RPN",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_DBCAM, "MI_DBCAM",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_DBRAM0, "MI_DBRAM0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MI_DBRAM1, "MI_DBRAM1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_CTR, "MD_CTR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_CASID, "MD_CASID",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_AP, "MD_AP",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_EPN, "MD_EPN",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_TWB, "MD_TWB",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_TWC, "MD_TWC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_RPN, "MD_RPN",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_TW, "MD_TW",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_DBCAM, "MD_DBCAM",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_DBRAM0, "MD_DBRAM0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MPC_MD_DBRAM1, "MD_DBRAM1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+/*
+ * AMR     => SPR 29 (Power 2.04)
+ * CTRL    => SPR 136 (Power 2.04)
+ * CTRL    => SPR 152 (Power 2.04)
+ * SCOMC   => SPR 276 (64 bits ?)
+ * SCOMD   => SPR 277 (64 bits ?)
+ * TBU40   => SPR 286 (Power 2.04 hypv)
+ * HSPRG0  => SPR 304 (Power 2.04 hypv)
+ * HSPRG1  => SPR 305 (Power 2.04 hypv)
+ * HDSISR  => SPR 306 (Power 2.04 hypv)
+ * HDAR    => SPR 307 (Power 2.04 hypv)
+ * PURR    => SPR 309 (Power 2.04 hypv)
+ * HDEC    => SPR 310 (Power 2.04 hypv)
+ * HIOR    => SPR 311 (hypv)
+ * RMOR    => SPR 312 (970)
+ * HRMOR   => SPR 313 (Power 2.04 hypv)
+ * HSRR0   => SPR 314 (Power 2.04 hypv)
+ * HSRR1   => SPR 315 (Power 2.04 hypv)
+ * LPIDR   => SPR 317 (970)
+ * EPR     => SPR 702 (Power 2.04 emb)
+ * perf    => 768-783 (Power 2.04)
+ * perf    => 784-799 (Power 2.04)
+ * PPR     => SPR 896 (Power 2.04)
+ * DABRX   => 1015    (Power 2.04 hypv)
+ * FPECR   => SPR 1022 (?)
+ * ... and more (thermal management, performance counters, ...)
+ */
+
+/*****************************************************************************/
+/* Exception vectors models                                                  */
+static void init_excp_4xx_real(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_PIT]      = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00001010;
+    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001020;
+    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00002000;
+    env->ivor_mask = 0x0000FFF0UL;
+    env->ivpr_mask = 0xFFFF0000UL;
+    /* Hardware reset vector */
+    env->hreset_vector = 0xFFFFFFFCUL;
+#endif
+}
+
+static void init_excp_4xx_softmmu(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_PIT]      = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00001010;
+    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001020;
+    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00002000;
+    env->ivor_mask = 0x0000FFF0UL;
+    env->ivpr_mask = 0xFFFF0000UL;
+    /* Hardware reset vector */
+    env->hreset_vector = 0xFFFFFFFCUL;
+#endif
+}
+
+static void init_excp_MPC5xx(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_FPA]      = 0x00000E00;
+    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DABR]     = 0x00001C00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001C00;
+    env->excp_vectors[POWERPC_EXCP_MEXTBR]   = 0x00001E00;
+    env->excp_vectors[POWERPC_EXCP_NMEXTBR]  = 0x00001F00;
+    env->ivor_mask = 0x0000FFF0UL;
+    env->ivpr_mask = 0xFFFF0000UL;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_MPC8xx(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_FPA]      = 0x00000E00;
+    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_ITLBE]    = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_DTLBE]    = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_DABR]     = 0x00001C00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001C00;
+    env->excp_vectors[POWERPC_EXCP_MEXTBR]   = 0x00001E00;
+    env->excp_vectors[POWERPC_EXCP_NMEXTBR]  = 0x00001F00;
+    env->ivor_mask = 0x0000FFF0UL;
+    env->ivpr_mask = 0xFFFF0000UL;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_G2(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000A00;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_e200(CPUPPCState *env, target_ulong ivpr_mask)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000FFC;
+    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_APU]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_SPEU]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_EFPDI]    = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_EFPRI]    = 0x00000000;
+    env->ivor_mask = 0x0000FFF7UL;
+    env->ivpr_mask = ivpr_mask;
+    /* Hardware reset vector */
+    env->hreset_vector = 0xFFFFFFFCUL;
+#endif
+}
+
+static void init_excp_BookE(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_APU]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00000000;
+    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00000000;
+    env->ivor_mask = 0x0000FFF0UL;
+    env->ivpr_mask = 0xFFFF0000UL;
+    /* Hardware reset vector */
+    env->hreset_vector = 0xFFFFFFFCUL;
+#endif
+}
+
+static void init_excp_601(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_IO]       = 0x00000A00;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_RUNM]     = 0x00002000;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_602(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* XXX: exception prefix has a special behavior on 602 */
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001500;
+    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001600;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_603(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_604(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_7x0(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_750cl(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_750cx(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+/* XXX: Check if this is correct */
+static void init_excp_7x5(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_7400(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001600;
+    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+static void init_excp_7450(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
+    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
+    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
+    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
+    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001600;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x00000100UL;
+#endif
+}
+
+#if defined(TARGET_PPC64)
+static void init_excp_970(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_DSEG]     = 0x00000380;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_ISEG]     = 0x00000480;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_HDECR]    = 0x00000980;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
+    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
+    env->excp_vectors[POWERPC_EXCP_MAINT]    = 0x00001600;
+    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001700;
+    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001800;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x0000000000000100ULL;
+#endif
+}
+
+static void init_excp_POWER7(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
+    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
+    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
+    env->excp_vectors[POWERPC_EXCP_DSEG]     = 0x00000380;
+    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
+    env->excp_vectors[POWERPC_EXCP_ISEG]     = 0x00000480;
+    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
+    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
+    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
+    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
+    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
+    env->excp_vectors[POWERPC_EXCP_HDECR]    = 0x00000980;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
+    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
+    env->excp_vectors[POWERPC_EXCP_HDSI]     = 0x00000E00;
+    env->excp_vectors[POWERPC_EXCP_HISI]     = 0x00000E20;
+    env->excp_vectors[POWERPC_EXCP_HV_EMU]   = 0x00000E40;
+    env->excp_vectors[POWERPC_EXCP_HV_MAINT] = 0x00000E60;
+    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
+    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
+    env->excp_vectors[POWERPC_EXCP_VSXU]     = 0x00000F40;
+    /* Hardware reset vector */
+    env->hreset_vector = 0x0000000000000100ULL;
+#endif
+}
+
+static void init_excp_POWER8(CPUPPCState *env)
+{
+    init_excp_POWER7(env);
+
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_SDOOR]    = 0x00000A00;
+    env->excp_vectors[POWERPC_EXCP_FU]       = 0x00000F60;
+    env->excp_vectors[POWERPC_EXCP_HV_FU]    = 0x00000F80;
+    env->excp_vectors[POWERPC_EXCP_SDOOR_HV] = 0x00000E80;
+#endif
+}
+
+static void init_excp_POWER9(CPUPPCState *env)
+{
+    init_excp_POWER8(env);
+
+#if !defined(CONFIG_USER_ONLY)
+    env->excp_vectors[POWERPC_EXCP_HVIRT]    = 0x00000EA0;
+    env->excp_vectors[POWERPC_EXCP_SYSCALL_VECTORED] = 0x00017000;
+#endif
+}
+
+static void init_excp_POWER10(CPUPPCState *env)
+{
+    init_excp_POWER9(env);
+}
+
+#endif
+
+/*****************************************************************************/
+/* Power management enable checks                                            */
+static int check_pow_none(CPUPPCState *env)
+{
+    return 0;
+}
+
+static int check_pow_nocheck(CPUPPCState *env)
+{
+    return 1;
+}
+
+static int check_pow_hid0(CPUPPCState *env)
+{
+    if (env->spr[SPR_HID0] & 0x00E00000) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int check_pow_hid0_74xx(CPUPPCState *env)
+{
+    if (env->spr[SPR_HID0] & 0x00600000) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/*****************************************************************************/
+/* PowerPC implementations definitions                                       */
+
+#define POWERPC_FAMILY(_name)                                               \
+    static void                                                             \
+    glue(glue(ppc_, _name), _cpu_family_class_init)(ObjectClass *, void *); \
+                                                                            \
+    static const TypeInfo                                                   \
+    glue(glue(ppc_, _name), _cpu_family_type_info) = {                      \
+        .name = stringify(_name) "-family-" TYPE_POWERPC_CPU,               \
+        .parent = TYPE_POWERPC_CPU,                                         \
+        .abstract = true,                                                   \
+        .class_init = glue(glue(ppc_, _name), _cpu_family_class_init),      \
+    };                                                                      \
+                                                                            \
+    static void glue(glue(ppc_, _name), _cpu_family_register_types)(void)   \
+    {                                                                       \
+        type_register_static(                                               \
+            &glue(glue(ppc_, _name), _cpu_family_type_info));               \
+    }                                                                       \
+                                                                            \
+    type_init(glue(glue(ppc_, _name), _cpu_family_register_types))          \
+                                                                            \
+    static void glue(glue(ppc_, _name), _cpu_family_class_init)
+
+static void init_proc_401(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401_sprs(env);
+    init_excp_4xx_real(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(401)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 401";
+    pcc->init_proc = init_proc_401;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_WRTEE | PPC_DCR |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << MSR_KEY) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_REAL;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_401x2(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_4xx_softmmu(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(401x2)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 401x2";
+    pcc->init_proc = init_proc_401x2;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << 20) |
+                    (1ull << MSR_KEY) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_401x3(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
+    init_excp_4xx_softmmu(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(401x3)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 401x3";
+    pcc->init_proc = init_proc_401x3;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << 20) |
+                    (1ull << MSR_KEY) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_IOP480(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_401x2_sprs(env);
+    register_compress_sprs(env);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_4xx_softmmu(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(8, 12, 16, 20);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(IOP480)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "IOP480";
+    pcc->init_proc = init_proc_IOP480;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI |  PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << 20) |
+                    (1ull << MSR_KEY) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_403(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_403_sprs(env);
+    register_403_real_sprs(env);
+    init_excp_4xx_real(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(8, 12, 16, 20);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(403)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 403";
+    pcc->init_proc = init_proc_403;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_PE) |
+                    (1ull << MSR_PX) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_REAL;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_PX |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_403GCX(CPUPPCState *env)
+{
+    register_40x_sprs(env);
+    register_401_403_sprs(env);
+    register_403_sprs(env);
+    register_403_real_sprs(env);
+    register_403_mmu_sprs(env);
+    /* Bus access control */
+    /* not emulated, as QEMU never does speculative access */
+    spr_register(env, SPR_40x_SGR, "SGR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0xFFFFFFFF);
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_40x_DCWR, "DCWR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_4xx_softmmu(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(8, 12, 16, 20);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(403GCX)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 403 GCX";
+    pcc->init_proc = init_proc_403GCX;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
+                       PPC_4xx_COMMON | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_PE) |
+                    (1ull << MSR_PX) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_401;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_PX |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_405(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_40x_sprs(env);
+    register_405_sprs(env);
+    /* Bus access control */
+    /* not emulated, as QEMU never does speculative access */
+    spr_register(env, SPR_40x_SGR, "SGR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0xFFFFFFFF);
+    /* not emulated, as QEMU do not emulate caches */
+    spr_register(env, SPR_40x_DCWR, "DCWR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_4xx_softmmu(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(8, 12, 16, 20);
+    SET_WDT_PERIOD(16, 20, 24, 28);
+}
+
+POWERPC_FAMILY(405)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 405";
+    pcc->init_proc = init_proc_405;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
+                       PPC_4xx_COMMON | PPC_405_MAC | PPC_40x_EXCP;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_SOFT_4xx;
+    pcc->excp_model = POWERPC_EXCP_40x;
+    pcc->bus_model = PPC_FLAGS_INPUT_405;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_440EP(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
+    /* Processor identification */
+    spr_register(env, SPR_BOOKE_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_CCR1, "CCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_BookE(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(20, 24, 28, 32);
+}
+
+POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 440 EP";
+    pcc->init_proc = init_proc_440EP;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+POWERPC_FAMILY(460EX)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 460 EX";
+    pcc->init_proc = init_proc_440EP;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_440GP(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
+    /* Processor identification */
+    spr_register(env, SPR_BOOKE_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_BookE(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* XXX: TODO: allocate internal IRQ controller */
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(20, 24, 28, 32);
+}
+
+POWERPC_FAMILY(440GP)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 440 GP";
+    pcc->init_proc = init_proc_440GP;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_MFAPIDI |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVA | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_440x4(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
+    /* Processor identification */
+    spr_register(env, SPR_BOOKE_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_BookE(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* XXX: TODO: allocate internal IRQ controller */
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(20, 24, 28, 32);
+}
+
+POWERPC_FAMILY(440x4)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 440x4";
+    pcc->init_proc = init_proc_440x4;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_WRTEE |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_440x5(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000000000FFFFULL);
+    register_440_sprs(env);
+    register_usprgh_sprs(env);
+    /* Processor identification */
+    spr_register(env, SPR_BOOKE_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_440_CCR1, "CCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_BookE(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    ppc40x_irq_init(env_archcpu(env));
+
+    SET_FIT_PERIOD(12, 16, 20, 24);
+    SET_WDT_PERIOD(20, 24, 28, 32);
+}
+
+POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 440x5";
+    pcc->init_proc = init_proc_440x5;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 440x5 with double precision FPU";
+    pcc->init_proc = init_proc_440x5;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_FLOAT | PPC_FLOAT_FSQRT |
+                       PPC_FLOAT_STFIWX |
+                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_MFTB |
+                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
+                       PPC_440_SPEC;
+    pcc->insns_flags2 = PPC2_FP_CVT_S64;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_403;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
+                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_MPC5xx(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_5xx_8xx_sprs(env);
+    register_5xx_sprs(env);
+    init_excp_MPC5xx(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* XXX: TODO: allocate internal IRQ controller */
+}
+
+POWERPC_FAMILY(MPC5xx)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "Freescale 5xx cores (aka RCPU)";
+    pcc->init_proc = init_proc_MPC5xx;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
+                       PPC_MEM_EIEIO | PPC_MEM_SYNC |
+                       PPC_CACHE_ICBI | PPC_FLOAT | PPC_FLOAT_STFIWX |
+                       PPC_MFTB;
+    pcc->msr_mask = (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_REAL;
+    pcc->excp_model = POWERPC_EXCP_603;
+    pcc->bus_model = PPC_FLAGS_INPUT_RCPU;
+    pcc->bfd_mach = bfd_mach_ppc_505;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_MPC8xx(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_5xx_8xx_sprs(env);
+    register_8xx_sprs(env);
+    init_excp_MPC8xx(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* XXX: TODO: allocate internal IRQ controller */
+}
+
+POWERPC_FAMILY(MPC8xx)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "Freescale 8xx cores (aka PowerQUICC)";
+    pcc->init_proc = init_proc_MPC8xx;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING  |
+                       PPC_MEM_EIEIO | PPC_MEM_SYNC |
+                       PPC_CACHE_ICBI | PPC_MFTB;
+    pcc->msr_mask = (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_MPC8xx;
+    pcc->excp_model = POWERPC_EXCP_603;
+    pcc->bus_model = PPC_FLAGS_INPUT_RCPU;
+    pcc->bfd_mach = bfd_mach_ppc_860;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+/* Freescale 82xx cores (aka PowerQUICC-II)                                  */
+
+static void init_proc_G2(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_G2_755_sprs(env);
+    register_G2_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation register */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_G2(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(G2)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC G2";
+    pcc->init_proc = init_proc_G2;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_AL) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_G2;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_ec603e;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_G2LE(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_G2_755_sprs(env);
+    register_G2_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* External access control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation register */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_G2(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(G2LE)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC G2LE";
+    pcc->init_proc = init_proc_G2LE;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_AL) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_G2;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_ec603e;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e200(CPUPPCState *env)
+{
+    /* Time base */
+    register_tbl(env);
+    register_BookE_sprs(env, 0x000000070000FFFFULL);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_SPEFSCR, "SPEFSCR",
+                 &spr_read_spefscr, &spr_write_spefscr,
+                 &spr_read_spefscr, &spr_write_spefscr,
+                 0x00000000);
+    /* Memory management */
+    register_BookE206_sprs(env, 0x0000005D, NULL, 0);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_ALTCTXCR, "ALTCTXCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_BUCSR, "BUCSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_CTXCR, "CTXCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_DBCNT, "DBCNT",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_DBCR3, "DBCR3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_L1CFG0, "L1CFG0",
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_L1CSR0, "L1CSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_L1FINV0, "L1FINV0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_TLB0CFG, "TLB0CFG",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_TLB1CFG, "TLB1CFG",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000); /* TOFIX */
+    spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 64;
+    env->nb_ways = 1;
+    env->id_tlbs = 0;
+    env->tlb_type = TLB_EMB;
+#endif
+    init_excp_e200(env, 0xFFFF0000UL);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* XXX: TODO: allocate internal IRQ controller */
+}
+
+POWERPC_FAMILY(e200)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e200 core";
+    pcc->init_proc = init_proc_e200;
+    pcc->check_pow = check_pow_hid0;
+    /*
+     * XXX: unimplemented instructions:
+     * dcblc
+     * dcbtlst
+     * dcbtstls
+     * icblc
+     * icbtls
+     * tlbivax
+     * all SPE multiply-accumulate instructions
+     */
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
+                       PPC_SPE | PPC_SPE_SINGLE |
+                       PPC_WRTEE | PPC_RFDI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX |
+                       PPC_BOOKE;
+    pcc->msr_mask = (1ull << MSR_UCLE) |
+                    (1ull << MSR_SPE) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_860;
+    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
+                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e300(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Breakpoints */
+    /* XXX : not implemented */
+    spr_register(env, SPR_DABR, "DABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_DABR2, "DABR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IABR2, "IABR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_IBCR, "IBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_DBCR, "DBCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_603(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(e300)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e300 core";
+    pcc->init_proc = init_proc_e300;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_AL) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_603;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_603;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+enum fsl_e500_version {
+    fsl_e500v1,
+    fsl_e500v2,
+    fsl_e500mc,
+    fsl_e5500,
+    fsl_e6500,
+};
+
+static void init_proc_e500(CPUPPCState *env, int version)
+{
+    uint32_t tlbncfg[2];
+    uint64_t ivor_mask;
+    uint64_t ivpr_mask = 0xFFFF0000ULL;
+    uint32_t l1cfg0 = 0x3800  /* 8 ways */
+                    | 0x0020; /* 32 kb */
+    uint32_t l1cfg1 = 0x3800  /* 8 ways */
+                    | 0x0020; /* 32 kb */
+    uint32_t mmucfg = 0;
+#if !defined(CONFIG_USER_ONLY)
+    int i;
+#endif
+
+    /* Time base */
+    register_tbl(env);
+    /*
+     * XXX The e500 doesn't implement IVOR7 and IVOR9, but doesn't
+     *     complain when accessing them.
+     * register_BookE_sprs(env, 0x0000000F0000FD7FULL);
+     */
+    switch (version) {
+    case fsl_e500v1:
+    case fsl_e500v2:
+    default:
+        ivor_mask = 0x0000000F0000FFFFULL;
+        break;
+    case fsl_e500mc:
+    case fsl_e5500:
+        ivor_mask = 0x000003FE0000FFFFULL;
+        break;
+    case fsl_e6500:
+        ivor_mask = 0x000003FF0000FFFFULL;
+        break;
+    }
+    register_BookE_sprs(env, ivor_mask);
+    register_usprg3_sprs(env);
+    /* Processor identification */
+    spr_register(env, SPR_BOOKE_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pir,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_SPEFSCR, "SPEFSCR",
+                 &spr_read_spefscr, &spr_write_spefscr,
+                 &spr_read_spefscr, &spr_write_spefscr,
+                 0x00000000);
+#if !defined(CONFIG_USER_ONLY)
+    /* Memory management */
+    env->nb_pids = 3;
+    env->nb_ways = 2;
+    env->id_tlbs = 0;
+    switch (version) {
+    case fsl_e500v1:
+        tlbncfg[0] = register_tlbncfg(2, 1, 1, 0, 256);
+        tlbncfg[1] = register_tlbncfg(16, 1, 9, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
+        break;
+    case fsl_e500v2:
+        tlbncfg[0] = register_tlbncfg(4, 1, 1, 0, 512);
+        tlbncfg[1] = register_tlbncfg(16, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
+        break;
+    case fsl_e500mc:
+    case fsl_e5500:
+        tlbncfg[0] = register_tlbncfg(4, 1, 1, 0, 512);
+        tlbncfg[1] = register_tlbncfg(64, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 64);
+        break;
+    case fsl_e6500:
+        mmucfg = 0x6510B45;
+        env->nb_pids = 1;
+        tlbncfg[0] = 0x08052400;
+        tlbncfg[1] = 0x40028040;
+        break;
+    default:
+        cpu_abort(env_cpu(env), "Unknown CPU: " TARGET_FMT_lx "\n",
+                  env->spr[SPR_PVR]);
+    }
+#endif
+    /* Cache sizes */
+    switch (version) {
+    case fsl_e500v1:
+    case fsl_e500v2:
+        env->dcache_line_size = 32;
+        env->icache_line_size = 32;
+        break;
+    case fsl_e500mc:
+    case fsl_e5500:
+        env->dcache_line_size = 64;
+        env->icache_line_size = 64;
+        l1cfg0 |= 0x1000000; /* 64 byte cache block size */
+        l1cfg1 |= 0x1000000; /* 64 byte cache block size */
+        break;
+    case fsl_e6500:
+        env->dcache_line_size = 32;
+        env->icache_line_size = 32;
+        l1cfg0 |= 0x0F83820;
+        l1cfg1 |= 0x0B83820;
+        break;
+    default:
+        cpu_abort(env_cpu(env), "Unknown CPU: " TARGET_FMT_lx "\n",
+                  env->spr[SPR_PVR]);
+    }
+    register_BookE206_sprs(env, 0x000000DF, tlbncfg, mmucfg);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_BBEAR, "BBEAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_BBTAR, "BBTAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_MCAR, "MCAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_NPIDR, "NPIDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_BUCSR, "BUCSR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_Exxx_L1CFG0, "L1CFG0",
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 l1cfg0);
+    spr_register(env, SPR_Exxx_L1CFG1, "L1CFG1",
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 l1cfg1);
+    spr_register(env, SPR_Exxx_L1CSR0, "L1CSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_e500_l1csr0,
+                 0x00000000);
+    spr_register(env, SPR_Exxx_L1CSR1, "L1CSR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_e500_l1csr1,
+                 0x00000000);
+    if (version != fsl_e500v1 && version != fsl_e500v2) {
+        spr_register(env, SPR_Exxx_L2CSR0, "L2CSR0",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_e500_l2csr0,
+                     0x00000000);
+    }
+    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_booke206_mmucsr0,
+                 0x00000000);
+    spr_register(env, SPR_BOOKE_EPR, "EPR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX better abstract into Emb.xxx features */
+    if ((version == fsl_e5500) || (version == fsl_e6500)) {
+        spr_register(env, SPR_BOOKE_EPCR, "EPCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     0x00000000);
+        spr_register(env, SPR_BOOKE_MAS7_MAS3, "MAS7_MAS3",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_mas73, &spr_write_mas73,
+                     0x00000000);
+        ivpr_mask = (target_ulong)~0xFFFFULL;
+    }
+
+    if (version == fsl_e6500) {
+        /* Thread identification */
+        spr_register(env, SPR_TIR, "TIR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     0x00000000);
+        spr_register(env, SPR_BOOKE_TLB0PS, "TLB0PS",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     0x00000004);
+        spr_register(env, SPR_BOOKE_TLB1PS, "TLB1PS",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, SPR_NOACCESS,
+                     0x7FFFFFFC);
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    env->nb_tlb = 0;
+    env->tlb_type = TLB_MAS;
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        env->nb_tlb += booke206_tlb_size(env, i);
+    }
+#endif
+
+    init_excp_e200(env, ivpr_mask);
+    /* Allocate hardware IRQ controller */
+    ppce500_irq_init(env_archcpu(env));
+}
+
+static void init_proc_e500v1(CPUPPCState *env)
+{
+    init_proc_e500(env, fsl_e500v1);
+}
+
+POWERPC_FAMILY(e500v1)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e500v1 core";
+    pcc->init_proc = init_proc_e500v1;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
+                       PPC_SPE | PPC_SPE_SINGLE |
+                       PPC_WRTEE | PPC_RFDI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
+    pcc->insns_flags2 = PPC2_BOOKE206;
+    pcc->msr_mask = (1ull << MSR_UCLE) |
+                    (1ull << MSR_SPE) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_860;
+    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
+                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e500v2(CPUPPCState *env)
+{
+    init_proc_e500(env, fsl_e500v2);
+}
+
+POWERPC_FAMILY(e500v2)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e500v2 core";
+    pcc->init_proc = init_proc_e500v2;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
+                       PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE |
+                       PPC_WRTEE | PPC_RFDI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
+    pcc->insns_flags2 = PPC2_BOOKE206;
+    pcc->msr_mask = (1ull << MSR_UCLE) |
+                    (1ull << MSR_SPE) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DWE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_860;
+    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
+                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e500mc(CPUPPCState *env)
+{
+    init_proc_e500(env, fsl_e500mc);
+}
+
+POWERPC_FAMILY(e500mc)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e500mc core";
+    pcc->init_proc = init_proc_e500mc;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
+                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_FLOAT | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
+                       PPC_FLOAT_STFIWX | PPC_WAIT |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL;
+    pcc->msr_mask = (1ull << MSR_GS) |
+                    (1ull << MSR_UCLE) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PX) |
+                    (1ull << MSR_RI);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    /* FIXME: figure out the correct flag for e500mc */
+    pcc->bfd_mach = bfd_mach_ppc_e500;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+#ifdef TARGET_PPC64
+static void init_proc_e5500(CPUPPCState *env)
+{
+    init_proc_e500(env, fsl_e5500);
+}
+
+POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e5500 core";
+    pcc->init_proc = init_proc_e5500;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
+                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_FLOAT | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
+                       PPC_FLOAT_STFIWX | PPC_WAIT |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
+                       PPC_64B | PPC_POPCNTB | PPC_POPCNTWD;
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
+                        PPC2_FP_CVT_S64;
+    pcc->msr_mask = (1ull << MSR_CM) |
+                    (1ull << MSR_GS) |
+                    (1ull << MSR_UCLE) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PX) |
+                    (1ull << MSR_RI);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    /* FIXME: figure out the correct flag for e5500 */
+    pcc->bfd_mach = bfd_mach_ppc_e500;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e6500(CPUPPCState *env)
+{
+    init_proc_e500(env, fsl_e6500);
+}
+
+POWERPC_FAMILY(e6500)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "e6500 core";
+    pcc->init_proc = init_proc_e6500;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
+                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
+                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
+                       PPC_FLOAT | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
+                       PPC_FLOAT_STFIWX | PPC_WAIT |
+                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
+                       PPC_64B | PPC_POPCNTB | PPC_POPCNTWD | PPC_ALTIVEC;
+    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
+                        PPC2_FP_CVT_S64 | PPC2_ATOMIC_ISA206;
+    pcc->msr_mask = (1ull << MSR_CM) |
+                    (1ull << MSR_GS) |
+                    (1ull << MSR_UCLE) |
+                    (1ull << MSR_CE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IS) |
+                    (1ull << MSR_DS) |
+                    (1ull << MSR_PX) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_VR);
+    pcc->mmu_model = POWERPC_MMU_BOOKE206;
+    pcc->excp_model = POWERPC_EXCP_BOOKE;
+    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
+    pcc->bfd_mach = bfd_mach_ppc_e500;
+    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_VRE;
+}
+
+#endif
+
+/* Non-embedded PowerPC                                                      */
+
+#define POWERPC_MSRR_601     (0x0000000000001040ULL)
+
+static void init_proc_601(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_601_sprs(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_hid0_601,
+                 0x80010080);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_601_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_601_HID5, "HID5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    init_excp_601(env);
+    /*
+     * XXX: beware that dcache line size is 64
+     *      but dcbz uses 32 bytes "sectors"
+     * XXX: this breaks clcs instruction !
+     */
+    env->dcache_line_size = 32;
+    env->icache_line_size = 64;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(601)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 601";
+    pcc->init_proc = init_proc_601;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_POWER_BR |
+                       PPC_FLOAT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_601;
+    pcc->excp_model = POWERPC_EXCP_601;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_601;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_RTC_CLK | POWERPC_FLAG_HID0_LE;
+}
+
+#define POWERPC_MSRR_601v    (0x0000000000001040ULL)
+
+static void init_proc_601v(CPUPPCState *env)
+{
+    init_proc_601(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_601_HID15, "HID15",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+POWERPC_FAMILY(601v)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 601v";
+    pcc->init_proc = init_proc_601v;
+    pcc->check_pow = check_pow_none;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_POWER_BR |
+                       PPC_FLOAT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR);
+    pcc->mmu_model = POWERPC_MMU_601;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_601;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_RTC_CLK | POWERPC_FLAG_HID0_LE;
+}
+
+static void init_proc_602(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_602_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_602(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(602)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 602";
+    pcc->init_proc = init_proc_602;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_6xx_TLB | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_602_SPEC;
+    pcc->msr_mask = (1ull << MSR_VSX) |
+                    (1ull << MSR_SA) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    /* XXX: 602 MMU is quite specific. Should add a special case */
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_602;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_602;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_603(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_603(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(603)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 603";
+    pcc->init_proc = init_proc_603;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_603;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_603;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_603E(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_603_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_603(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(603E)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 603e";
+    pcc->init_proc = init_proc_603E;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_TGPR) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_603E;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_ec603e;
+    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_604(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_604_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    init_excp_604(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(604)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 604";
+    pcc->init_proc = init_proc_604;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_604;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_604;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_604E(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_604_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_MMCR1, "MMCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC3, "PMC3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC4, "PMC4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    init_excp_604(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(604E)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 604E";
+    pcc->init_proc = init_proc_604E;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_604;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_604;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_740(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    init_excp_7x0(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(740)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 740";
+    pcc->init_proc = init_proc_740;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_750(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    /*
+     * XXX: high BATs are also present but are known to be bugged on
+     *      die version 1.x
+     */
+    init_excp_7x0(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(750)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 750";
+    pcc->init_proc = init_proc_750;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_750cl(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    /* Those registers are fake on 750CL */
+    spr_register(env, SPR_THRM1, "THRM1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_THRM2, "THRM2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_THRM3, "THRM3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX: not implemented */
+    spr_register(env, SPR_750_TDCL, "TDCL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_750_TDCH, "TDCH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* DMA */
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_WPAR, "WPAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_750_DMAL, "DMAL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_750_DMAU, "DMAU",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750CL_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750CL_HID4, "HID4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Quantization registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR0, "GQR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR1, "GQR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR2, "GQR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR3, "GQR3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR4, "GQR4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR5, "GQR5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR6, "GQR6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_GQR7, "GQR7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    /* PowerPC 750cl has 8 DBATs and 8 IBATs */
+    register_high_BATs(env);
+    init_excp_750cl(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(750cl)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 750 CL";
+    pcc->init_proc = init_proc_750cl;
+    pcc->check_pow = check_pow_hid0;
+    /*
+     * XXX: not implemented:
+     * cache lock instructions:
+     * dcbz_l
+     * floating point paired instructions
+     * psq_lux
+     * psq_lx
+     * psq_stux
+     * psq_stx
+     * ps_abs
+     * ps_add
+     * ps_cmpo0
+     * ps_cmpo1
+     * ps_cmpu0
+     * ps_cmpu1
+     * ps_div
+     * ps_madd
+     * ps_madds0
+     * ps_madds1
+     * ps_merge00
+     * ps_merge01
+     * ps_merge10
+     * ps_merge11
+     * ps_mr
+     * ps_msub
+     * ps_mul
+     * ps_muls0
+     * ps_muls1
+     * ps_nabs
+     * ps_neg
+     * ps_nmadd
+     * ps_nmsub
+     * ps_res
+     * ps_rsqrte
+     * ps_sel
+     * ps_sub
+     * ps_sum0
+     * ps_sum1
+     */
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_750cx(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* This register is not implemented but is present for compatibility */
+    spr_register(env, SPR_SDA, "SDA",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    /* PowerPC 750cx has 8 DBATs and 8 IBATs */
+    register_high_BATs(env);
+    init_excp_750cx(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(750cx)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 750CX";
+    pcc->init_proc = init_proc_750cx;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_750fx(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_THRM4, "THRM4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750FX_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
+    register_high_BATs(env);
+    init_excp_7x0(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(750fx)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 750FX";
+    pcc->init_proc = init_proc_750fx;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_750gx(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* XXX : not implemented (XXX: different from 750fx) */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* XXX : not implemented */
+    spr_register(env, SPR_750_THRM4, "THRM4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Hardware implementation registers */
+    /* XXX : not implemented (XXX: different from 750fx) */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented (XXX: different from 750fx) */
+    spr_register(env, SPR_750FX_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
+    register_high_BATs(env);
+    init_excp_7x0(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(750gx)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 750GX";
+    pcc->init_proc = init_proc_750gx;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_7x0;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_745(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    register_G2_755_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_7x5(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(745)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 745";
+    pcc->init_proc = init_proc_745;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_7x5;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_755(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    register_G2_755_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* L2 cache control */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2CR, "L2CR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, spr_access_nop,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2PMCR, "L2PMCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID2, "HID2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_6xx_7xx_soft_tlb(env, 64, 2);
+    init_excp_7x5(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(755)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 755";
+    pcc->init_proc = init_proc_755;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN;
+    pcc->msr_mask = (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
+    pcc->excp_model = POWERPC_EXCP_7x5;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_750;
+    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
+                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7400(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_UBAMR, "UBAMR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX: this seems not implemented on all revisions. */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSCR1, "MSSCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* Memory management */
+    register_low_BATs(env);
+    init_excp_7400(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7400)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7400 (aka G4)";
+    pcc->init_proc = init_proc_7400;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7410(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_UBAMR, "UBAMR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Thermal management */
+    register_thrm_sprs(env);
+    /* L2PMCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L2PMCR, "L2PMCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* LDSTDB */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTDB, "LDSTDB",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    init_excp_7400(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7410)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7410 (aka G4)";
+    pcc->init_proc = init_proc_7410;
+    pcc->check_pow = check_pow_hid0;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7440(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_UBAMR, "UBAMR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* LDSTCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* ICTRL */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* MSSSR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* PMC */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7440)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7440 (aka G4)";
+    pcc->init_proc = init_proc_7440;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7450(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* Level 3 cache control */
+    register_l3_ctrl(env);
+    /* L3ITCR1 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR1, "L3ITCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3ITCR2 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR2, "L3ITCR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3ITCR3 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR3, "L3ITCR3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3OHCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3OHCR, "L3OHCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_UBAMR, "UBAMR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* LDSTCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* ICTRL */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* MSSSR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* PMC */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7450)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7450 (aka G4)";
+    pcc->init_proc = init_proc_7450;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7445(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* LDSTCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* ICTRL */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* MSSSR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* PMC */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* SPRGs */
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG4, "USPRG4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG5, "USPRG5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG6, "USPRG6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG7, "USPRG7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7445)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7445 (aka G4)";
+    pcc->init_proc = init_proc_7445;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7455(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* Level 3 cache control */
+    register_l3_ctrl(env);
+    /* LDSTCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* ICTRL */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* MSSSR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* PMC */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* SPRGs */
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG4, "USPRG4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG5, "USPRG5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG6, "USPRG6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG7, "USPRG7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7455)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7455 (aka G4)";
+    pcc->init_proc = init_proc_7455;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_7457(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* Level 3 cache control */
+    register_l3_ctrl(env);
+    /* L3ITCR1 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR1, "L3ITCR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3ITCR2 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR2, "L3ITCR2",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3ITCR3 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3ITCR3, "L3ITCR3",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* L3OHCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_L3OHCR, "L3OHCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* LDSTCR */
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* ICTRL */
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* MSSSR0 */
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* PMC */
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* SPRGs */
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG4, "USPRG4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG5, "USPRG5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG6, "USPRG6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG7, "USPRG7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(7457)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 7457 (aka G4)";
+    pcc->init_proc = init_proc_7457;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+static void init_proc_e600(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_sdr1_sprs(env);
+    register_7xx_sprs(env);
+    /* Time base */
+    register_tbl(env);
+    /* 74xx specific SPR */
+    register_74xx_sprs(env);
+    vscr_init(env, 0x00010000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_UBAMR, "UBAMR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_LDSTCR, "LDSTCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_ICTRL, "ICTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_MSSSR0, "MSSSR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC5, "PMC5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_PMC6, "PMC6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    /* XXX : not implemented */
+    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* SPRGs */
+    spr_register(env, SPR_SPRG4, "SPRG4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG4, "USPRG4",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG5, "SPRG5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG5, "USPRG5",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG6, "SPRG6",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG6, "USPRG6",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    spr_register(env, SPR_SPRG7, "SPRG7",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_USPRG7, "USPRG7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+    /* Memory management */
+    register_low_BATs(env);
+    register_high_BATs(env);
+    register_74xx_soft_tlb(env, 128, 2);
+    init_excp_7450(env);
+    env->dcache_line_size = 32;
+    env->icache_line_size = 32;
+    /* Allocate hardware IRQ controller */
+    ppc6xx_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(e600)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC e600";
+    pcc->init_proc = init_proc_e600;
+    pcc->check_pow = check_pow_hid0_74xx;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI |
+                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_MEM_TLBIA | PPC_74xx_TLB |
+                       PPC_SEGMENT | PPC_EXTERN |
+                       PPC_ALTIVEC;
+    pcc->insns_flags2 = PPC_NONE;
+    pcc->msr_mask = (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_ILE) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_EP) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->mmu_model = POWERPC_MMU_32B;
+    pcc->excp_model = POWERPC_EXCP_74xx;
+    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
+    pcc->bfd_mach = bfd_mach_ppc_7400;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+}
+
+#if defined(TARGET_PPC64)
+#if defined(CONFIG_USER_ONLY)
+#define POWERPC970_HID5_INIT 0x00000080
+#else
+#define POWERPC970_HID5_INIT 0x00000000
+#endif
+
+static int check_pow_970(CPUPPCState *env)
+{
+    if (env->spr[SPR_HID0] & (HID0_DEEPNAP | HID0_DOZE | HID0_NAP)) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static void register_970_hid_sprs(CPUPPCState *env)
+{
+    /* Hardware implementation registers */
+    /* XXX : not implemented */
+    spr_register(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_clear,
+                 0x60000000);
+    spr_register(env, SPR_HID1, "HID1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_970_HID5, "HID5",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 POWERPC970_HID5_INIT);
+}
+
+static void register_970_hior_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_HIOR, "SPR_HIOR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_hior, &spr_write_hior,
+                 0x00000000);
+}
+
+static void register_book3s_ctrl_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_CTRL, "SPR_CTRL",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_UCTRL, "SPR_UCTRL",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, SPR_NOACCESS,
+                 0x00000000);
+}
+
+static void register_book3s_altivec_sprs(CPUPPCState *env)
+{
+    if (!(env->insns_flags & PPC_ALTIVEC)) {
+        return;
+    }
+
+    spr_register_kvm(env, SPR_VRSAVE, "VRSAVE",
+                     &spr_read_generic, &spr_write_generic,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_VRSAVE, 0x00000000);
+
+}
+
+static void register_book3s_dbg_sprs(CPUPPCState *env)
+{
+    /*
+     * TODO: different specs define different scopes for these,
+     * will have to address this:
+     * 970: super/write and super/read
+     * powerisa 2.03..2.04: hypv/write and super/read.
+     * powerisa 2.05 and newer: hypv/write and hypv/read.
+     */
+    spr_register_kvm(env, SPR_DABR, "DABR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DABR, 0x00000000);
+    spr_register_kvm(env, SPR_DABRX, "DABRX",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DABRX, 0x00000000);
+}
+
+static void register_book3s_207_dbg_sprs(CPUPPCState *env)
+{
+    spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_DAWR, 0x00000000);
+    spr_register_kvm_hv(env, SPR_DAWRX0, "DAWRX0",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_DAWRX, 0x00000000);
+    spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_generic,
+                        KVM_REG_PPC_CIABR, 0x00000000);
+}
+
+static void register_970_dbg_sprs(CPUPPCState *env)
+{
+    /* Breakpoints */
+    spr_register(env, SPR_IABR, "IABR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_book3s_pmu_sup_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_POWER_MMCR0, "MMCR0",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_MMCR0, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_MMCR1, "MMCR1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_MMCR1, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_MMCRA, "MMCRA",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_MMCRA, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC1, "PMC1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC1, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC2, "PMC2",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC2, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC3, "PMC3",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC3, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC4, "PMC4",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC4, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC5, "PMC5",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC5, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_PMC6, "PMC6",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC6, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SIAR, "SIAR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SIAR, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SDAR, "SDAR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SDAR, 0x00000000);
+}
+
+static void register_book3s_pmu_user_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_POWER_UMMCR0, "UMMCR0",
+                 &spr_read_MMCR0_ureg, &spr_write_MMCR0_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UMMCR1, "UMMCR1",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UMMCRA, "UMMCRA",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC1, "UPMC1",
+                 &spr_read_PMC14_ureg, &spr_write_PMC14_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC2, "UPMC2",
+                 &spr_read_PMC14_ureg, &spr_write_PMC14_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC3, "UPMC3",
+                 &spr_read_PMC14_ureg, &spr_write_PMC14_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC4, "UPMC4",
+                 &spr_read_PMC14_ureg, &spr_write_PMC14_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC5, "UPMC5",
+                 &spr_read_PMC56_ureg, &spr_write_PMC56_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_UPMC6, "UPMC6",
+                 &spr_read_PMC56_ureg, &spr_write_PMC56_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_USIAR, "USIAR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_USDAR, "USDAR",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+}
+
+static void register_970_pmu_sup_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_970_PMC7, "PMC7",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC7, 0x00000000);
+    spr_register_kvm(env, SPR_970_PMC8, "PMC8",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PMC8, 0x00000000);
+}
+
+static void register_970_pmu_user_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_970_UPMC7, "UPMC7",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_970_UPMC8, "UPMC8",
+                 &spr_read_ureg, SPR_NOACCESS,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+}
+
+static void register_power8_pmu_sup_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_POWER_MMCR2, "MMCR2",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_MMCR2, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_MMCRS, "MMCRS",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_MMCRS, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SIER, "SIER",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SIER, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SPMC1, "SPMC1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SPMC1, 0x00000000);
+    spr_register_kvm(env, SPR_POWER_SPMC2, "SPMC2",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_SPMC2, 0x00000000);
+    spr_register_kvm(env, SPR_TACR, "TACR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_TACR, 0x00000000);
+    spr_register_kvm(env, SPR_TCSCR, "TCSCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_TCSCR, 0x00000000);
+    spr_register_kvm(env, SPR_CSIGR, "CSIGR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_CSIGR, 0x00000000);
+}
+
+static void register_power8_pmu_user_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_POWER_UMMCR2, "UMMCR2",
+                 &spr_read_MMCR2_ureg, &spr_write_MMCR2_ureg,
+                 &spr_read_ureg, &spr_write_ureg,
+                 0x00000000);
+    spr_register(env, SPR_POWER_USIER, "USIER",
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_power5p_ear_sprs(CPUPPCState *env)
+{
+    /* External access control */
+    spr_register(env, SPR_EAR, "EAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_power5p_tb_sprs(CPUPPCState *env)
+{
+    /* TBU40 (High 40 bits of the Timebase register */
+    spr_register_hv(env, SPR_TBU40, "TBU40",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, &spr_write_tbu40,
+                    0x00000000);
+}
+
+static void register_970_lpar_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /*
+     * PPC970: HID4 covers things later controlled by the LPCR and
+     * RMOR in later CPUs, but with a different encoding.  We only
+     * support the 970 in "Apple mode" which has all hypervisor
+     * facilities disabled by strapping, so we can basically just
+     * ignore it
+     */
+    spr_register(env, SPR_970_HID4, "HID4",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+#endif
+}
+
+static void register_power5p_lpar_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* Logical partitionning */
+    spr_register_kvm_hv(env, SPR_LPCR, "LPCR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_lpcr,
+                        KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
+    spr_register_hv(env, SPR_HDEC, "HDEC",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_hdecr, &spr_write_hdecr, 0);
+#endif
+}
+
+static void register_book3s_ids_sprs(CPUPPCState *env)
+{
+    /* FIXME: Will need to deal with thread vs core only SPRs */
+
+    /* Processor identification */
+    spr_register_hv(env, SPR_PIR, "PIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 &spr_read_generic, NULL,
+                 0x00000000);
+    spr_register_hv(env, SPR_HID0, "HID0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_TSCR, "TSCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HMER, "HMER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_hmer,
+                 0x00000000);
+    spr_register_hv(env, SPR_HMEER, "HMEER",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_TFMR, "TFMR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_LPIDR, "LPIDR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_lpidr,
+                 0x00000000);
+    spr_register_hv(env, SPR_HFSCR, "HFSCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_MMCRC, "MMCRC",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_MMCRH, "MMCRH",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSPRG0, "HSPRG0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSPRG1, "HSPRG1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSRR0, "HSRR0",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HSRR1, "HSRR1",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HDAR, "HDAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HDSISR, "HDSISR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register_hv(env, SPR_HRMOR, "HRMOR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_rmor_sprs(CPUPPCState *env)
+{
+    spr_register_hv(env, SPR_RMOR, "RMOR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+}
+
+static void register_power8_ids_sprs(CPUPPCState *env)
+{
+    /* Thread identification */
+    spr_register(env, SPR_TIR, "TIR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 0x00000000);
+}
+
+static void register_book3s_purr_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
+    spr_register_kvm_hv(env, SPR_PURR,   "PURR",
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, &spr_write_purr,
+                        KVM_REG_PPC_PURR, 0x00000000);
+    spr_register_kvm_hv(env, SPR_SPURR,   "SPURR",
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, SPR_NOACCESS,
+                        &spr_read_purr, &spr_write_purr,
+                        KVM_REG_PPC_SPURR, 0x00000000);
+#endif
+}
+
+static void register_power6_dbg_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register(env, SPR_CFAR, "SPR_CFAR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_cfar, &spr_write_cfar,
+                 0x00000000);
+#endif
+}
+
+static void register_power5p_common_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_PPR, "PPR",
+                     &spr_read_generic, &spr_write_generic,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PPR, 0x00000000);
+}
+
+static void register_power6_common_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_kvm(env, SPR_DSCR, "SPR_DSCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_DSCR, 0x00000000);
+#endif
+    /*
+     * Register PCR to report POWERPC_EXCP_PRIV_REG instead of
+     * POWERPC_EXCP_INVAL_SPR in userspace. Permit hypervisor access.
+     */
+    spr_register_hv(env, SPR_PCR, "PCR",
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 SPR_NOACCESS, SPR_NOACCESS,
+                 &spr_read_generic, &spr_write_pcr,
+                 0x00000000);
+}
+
+static void register_power8_tce_address_control_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_TAR, "TAR",
+                     &spr_read_tar, &spr_write_tar,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_TAR, 0x00000000);
+}
+
+static void register_power8_tm_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_TFHAR, "TFHAR",
+                     &spr_read_tm, &spr_write_tm,
+                     &spr_read_tm, &spr_write_tm,
+                     KVM_REG_PPC_TFHAR, 0x00000000);
+    spr_register_kvm(env, SPR_TFIAR, "TFIAR",
+                     &spr_read_tm, &spr_write_tm,
+                     &spr_read_tm, &spr_write_tm,
+                     KVM_REG_PPC_TFIAR, 0x00000000);
+    spr_register_kvm(env, SPR_TEXASR, "TEXASR",
+                     &spr_read_tm, &spr_write_tm,
+                     &spr_read_tm, &spr_write_tm,
+                     KVM_REG_PPC_TEXASR, 0x00000000);
+    spr_register(env, SPR_TEXASRU, "TEXASRU",
+                 &spr_read_tm_upper32, &spr_write_tm_upper32,
+                 &spr_read_tm_upper32, &spr_write_tm_upper32,
+                 0x00000000);
+}
+
+static void register_power8_ebb_sprs(CPUPPCState *env)
+{
+    spr_register(env, SPR_BESCRS, "BESCRS",
+                 &spr_read_ebb, &spr_write_ebb,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BESCRSU, "BESCRSU",
+                 &spr_read_ebb_upper32, &spr_write_ebb_upper32,
+                 &spr_read_prev_upper32, &spr_write_prev_upper32,
+                 0x00000000);
+    spr_register(env, SPR_BESCRR, "BESCRR",
+                 &spr_read_ebb, &spr_write_ebb,
+                 &spr_read_generic, &spr_write_generic,
+                 0x00000000);
+    spr_register(env, SPR_BESCRRU, "BESCRRU",
+                 &spr_read_ebb_upper32, &spr_write_ebb_upper32,
+                 &spr_read_prev_upper32, &spr_write_prev_upper32,
+                 0x00000000);
+    spr_register_kvm(env, SPR_EBBHR, "EBBHR",
+                     &spr_read_ebb, &spr_write_ebb,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_EBBHR, 0x00000000);
+    spr_register_kvm(env, SPR_EBBRR, "EBBRR",
+                     &spr_read_ebb, &spr_write_ebb,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_EBBRR, 0x00000000);
+    spr_register_kvm(env, SPR_BESCR, "BESCR",
+                     &spr_read_ebb, &spr_write_ebb,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_BESCR, 0x00000000);
+}
+
+/* Virtual Time Base */
+static void register_vtb_sprs(CPUPPCState *env)
+{
+    spr_register_kvm_hv(env, SPR_VTB, "VTB",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_vtb, SPR_NOACCESS,
+                        &spr_read_vtb, &spr_write_vtb,
+                        KVM_REG_PPC_VTB, 0x00000000);
+}
+
+static void register_power8_fscr_sprs(CPUPPCState *env)
+{
+#if defined(CONFIG_USER_ONLY)
+    target_ulong initval = 1ULL << FSCR_TAR;
+#else
+    target_ulong initval = 0;
+#endif
+    spr_register_kvm(env, SPR_FSCR, "FSCR",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_FSCR, initval);
+}
+
+static void register_power8_pspb_sprs(CPUPPCState *env)
+{
+    spr_register_kvm(env, SPR_PSPB, "PSPB",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic32,
+                     KVM_REG_PPC_PSPB, 0);
+}
+
+static void register_power8_dpdes_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* Directed Privileged Door-bell Exception State, used for IPI */
+    spr_register_kvm_hv(env, SPR_DPDES, "DPDES",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_dpdes, SPR_NOACCESS,
+                        &spr_read_dpdes, &spr_write_dpdes,
+                        KVM_REG_PPC_DPDES, 0x00000000);
+#endif
+}
+
+static void register_power8_ic_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_IC, "IC",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0);
+#endif
+}
+
+static void register_power8_book4_sprs(CPUPPCState *env)
+{
+    /* Add a number of P8 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_kvm(env, SPR_ACOP, "ACOP",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_ACOP, 0);
+    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_pidr,
+                     KVM_REG_PPC_PID, 0);
+    spr_register_kvm(env, SPR_WORT, "WORT",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_WORT, 0);
+#endif
+}
+
+static void register_power7_book4_sprs(CPUPPCState *env)
+{
+    /* Add a number of P7 book4 registers */
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_kvm(env, SPR_ACOP, "ACOP",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_ACOP, 0);
+    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_generic,
+                     KVM_REG_PPC_PID, 0);
+#endif
+}
+
+static void register_power8_rpr_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    spr_register_hv(env, SPR_RPR, "RPR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0x00000103070F1F3F);
+#endif
+}
+
+static void register_power9_mmu_sprs(CPUPPCState *env)
+{
+#if !defined(CONFIG_USER_ONLY)
+    /* Partition Table Control */
+    spr_register_kvm_hv(env, SPR_PTCR, "PTCR",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_ptcr,
+                        KVM_REG_PPC_PTCR, 0x00000000);
+    /* Address Segment Descriptor Register */
+    spr_register_hv(env, SPR_ASDR, "ASDR",
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    SPR_NOACCESS, SPR_NOACCESS,
+                    &spr_read_generic, &spr_write_generic,
+                    0x0000000000000000);
+#endif
+}
+
+static void init_proc_book3s_common(CPUPPCState *env)
+{
+    register_ne_601_sprs(env);
+    register_tbl(env);
+    register_usprg3_sprs(env);
+    register_book3s_altivec_sprs(env);
+    register_book3s_pmu_sup_sprs(env);
+    register_book3s_pmu_user_sprs(env);
+    register_book3s_ctrl_sprs(env);
+    /*
+     * Can't find information on what this should be on reset.  This
+     * value is the one used by 74xx processors.
+     */
+    vscr_init(env, 0x00010000);
+}
+
+static void init_proc_970(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
+
+    /* 970 Specific Registers */
+    register_970_hid_sprs(env);
+    register_970_hior_sprs(env);
+    register_low_BATs(env);
+    register_970_pmu_sup_sprs(env);
+    register_970_pmu_user_sprs(env);
+    register_970_lpar_sprs(env);
+    register_970_dbg_sprs(env);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_970(env);
+    ppc970_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(970)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->desc = "PowerPC 970";
+    pcc->init_proc = init_proc_970;
+    pcc->check_pow = check_pow_970;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI;
+    pcc->insns_flags2 = PPC2_FP_CVT_S64;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI);
+    pcc->mmu_model = POWERPC_MMU_64B;
+#if defined(CONFIG_SOFTMMU)
+    pcc->hash64_opts = &ppc_hash64_opts_basic;
+#endif
+    pcc->excp_model = POWERPC_EXCP_970;
+    pcc->bus_model = PPC_FLAGS_INPUT_970;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x10000;
+}
+
+static void init_proc_power5plus(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
+
+    /* POWER5+ Specific Registers */
+    register_970_hid_sprs(env);
+    register_970_hior_sprs(env);
+    register_low_BATs(env);
+    register_970_pmu_sup_sprs(env);
+    register_970_pmu_user_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_970(env);
+    ppc970_irq_init(env_archcpu(env));
+}
+
+POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER5";
+    dc->desc = "POWER5+";
+    pcc->init_proc = init_proc_power5plus;
+    pcc->check_pow = check_pow_970;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_STFIWX |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B |
+                       PPC_SEGMENT_64B | PPC_SLBI;
+    pcc->insns_flags2 = PPC2_FP_CVT_S64;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_POW) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI);
+    pcc->lpcr_mask = LPCR_RMLS | LPCR_ILE | LPCR_LPES0 | LPCR_LPES1 |
+        LPCR_RMI | LPCR_HDICE;
+    pcc->mmu_model = POWERPC_MMU_2_03;
+#if defined(CONFIG_SOFTMMU)
+    pcc->hash64_opts = &ppc_hash64_opts_basic;
+    pcc->lrg_decr_bits = 32;
+#endif
+    pcc->excp_model = POWERPC_EXCP_970;
+    pcc->bus_model = PPC_FLAGS_INPUT_970;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x10000;
+}
+
+static void init_proc_POWER7(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_sdr1_sprs(env);
+    register_book3s_dbg_sprs(env);
+
+    /* POWER7 Specific Registers */
+    register_book3s_ids_sprs(env);
+    register_rmor_sprs(env);
+    register_amr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power7_book4_sprs(env);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_POWER7(env);
+    ppcPOWER7_irq_init(env_archcpu(env));
+}
+
+static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER7P_BASE) {
+        return true;
+    }
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER7_BASE) {
+        return true;
+    }
+    return false;
+}
+
+static bool cpu_has_work_POWER7(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
+            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
+POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER7";
+    dc->desc = "POWER7";
+    pcc->pvr_match = ppc_pvr_match_power7;
+    pcc->pcr_mask = PCR_VEC_DIS | PCR_VSX_DIS | PCR_COMPAT_2_05;
+    pcc->pcr_supported = PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
+    pcc->init_proc = init_proc_POWER7;
+    pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER7;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
+                        PPC2_PM_ISA206;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->lpcr_mask = LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_DPFD |
+        LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
+        LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2 |
+        LPCR_MER | LPCR_TC |
+        LPCR_LPES0 | LPCR_LPES1 | LPCR_HDICE;
+    pcc->lpcr_pm = LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2;
+    pcc->mmu_model = POWERPC_MMU_2_06;
+#if defined(CONFIG_SOFTMMU)
+    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
+    pcc->lrg_decr_bits = 32;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER7;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+}
+
+static void init_proc_POWER8(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_sdr1_sprs(env);
+    register_book3s_207_dbg_sprs(env);
+
+    /* POWER8 Specific Registers */
+    register_book3s_ids_sprs(env);
+    register_rmor_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_power8_dpdes_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_POWER8(env);
+    ppcPOWER7_irq_init(env_archcpu(env));
+}
+
+static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8NVL_BASE) {
+        return true;
+    }
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8E_BASE) {
+        return true;
+    }
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8_BASE) {
+        return true;
+    }
+    return false;
+}
+
+static bool cpu_has_work_POWER8(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) {
+            return true;
+        }
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
+POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER8";
+    dc->desc = "POWER8";
+    pcc->pvr_match = ppc_pvr_match_power8;
+    pcc->pcr_mask = PCR_TM_DIS | PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
+    pcc->pcr_supported = PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
+    pcc->init_proc = init_proc_POWER8;
+    pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER8;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM | PPC2_PM_ISA206;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_HV) |
+                    (1ull << MSR_TM) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_TS0) |
+                    (1ull << MSR_TS1) |
+                    (1ull << MSR_LE);
+    pcc->lpcr_mask = LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV |
+        LPCR_DPFD | LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
+        LPCR_AIL | LPCR_ONL | LPCR_P8_PECE0 | LPCR_P8_PECE1 |
+        LPCR_P8_PECE2 | LPCR_P8_PECE3 | LPCR_P8_PECE4 |
+        LPCR_MER | LPCR_TC | LPCR_LPES0 | LPCR_HDICE;
+    pcc->lpcr_pm = LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
+                   LPCR_P8_PECE3 | LPCR_P8_PECE4;
+    pcc->mmu_model = POWERPC_MMU_2_07;
+#if defined(CONFIG_SOFTMMU)
+    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
+    pcc->lrg_decr_bits = 32;
+    pcc->n_host_threads = 8;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER8;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+}
+
+#ifdef CONFIG_SOFTMMU
+/*
+ * Radix pg sizes and AP encodings for dt node ibm,processor-radix-AP-encodings
+ * Encoded as array of int_32s in the form:
+ *  0bxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+ *  x -> AP encoding
+ *  y -> radix mode supported page size (encoded as a shift)
+ */
+static struct ppc_radix_page_info POWER9_radix_page_info = {
+    .count = 4,
+    .entries = {
+        0x0000000c, /*  4K - enc: 0x0 */
+        0xa0000010, /* 64K - enc: 0x5 */
+        0x20000015, /*  2M - enc: 0x1 */
+        0x4000001e  /*  1G - enc: 0x2 */
+    }
+};
+#endif /* CONFIG_SOFTMMU */
+
+static void init_proc_POWER9(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_book3s_207_dbg_sprs(env);
+
+    /* POWER8 Specific Registers */
+    register_book3s_ids_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power5p_tb_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_power8_dpdes_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
+    register_power9_mmu_sprs(env);
+
+    /* POWER9 Specific registers */
+    spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL,
+                     spr_read_generic, spr_write_generic,
+                     KVM_REG_PPC_TIDR, 0);
+
+    /* FIXME: Filter fields properly based on privilege level */
+    spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
+                        spr_read_generic, spr_write_generic,
+                        KVM_REG_PPC_PSSCR, 0);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_POWER9(env);
+    ppcPOWER9_irq_init(env_archcpu(env));
+}
+
+static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER9_BASE) {
+        return true;
+    }
+    return false;
+}
+
+static bool cpu_has_work_POWER9(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        uint64_t psscr = env->spr[SPR_PSSCR];
+
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+
+        /* If EC is clear, just return true on any pending interrupt */
+        if (!(psscr & PSSCR_EC)) {
+            return true;
+        }
+        /* External Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_EEE)) {
+            bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
+            if (heic == 0 || !msr_hv || msr_pr) {
+                return true;
+            }
+        }
+        /* Decrementer Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_DEE)) {
+            return true;
+        }
+        /* Machine Check or Hypervisor Maintenance Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK |
+            1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) {
+            return true;
+        }
+        /* Privileged Doorbell Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_PDEE)) {
+            return true;
+        }
+        /* Hypervisor Doorbell Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_HDEE)) {
+            return true;
+        }
+        /* Hypervisor virtualization exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) &&
+            (env->spr[SPR_LPCR] & LPCR_HVEE)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
+POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER9";
+    dc->desc = "POWER9";
+    pcc->pvr_match = ppc_pvr_match_power9;
+    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07;
+    pcc->pcr_supported = PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 |
+                         PCR_COMPAT_2_05;
+    pcc->init_proc = init_proc_POWER9;
+    pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER9;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_HV) |
+                    (1ull << MSR_TM) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->lpcr_mask = LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
+        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
+        LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
+        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
+                             LPCR_DEE | LPCR_OEE))
+        | LPCR_MER | LPCR_GTSE | LPCR_TC |
+        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE;
+    pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
+    pcc->mmu_model = POWERPC_MMU_3_00;
+#if defined(CONFIG_SOFTMMU)
+    /* segment page size remain the same */
+    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
+    pcc->radix_page_info = &POWER9_radix_page_info;
+    pcc->lrg_decr_bits = 56;
+    pcc->n_host_threads = 4;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER9;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+}
+
+#ifdef CONFIG_SOFTMMU
+/*
+ * Radix pg sizes and AP encodings for dt node ibm,processor-radix-AP-encodings
+ * Encoded as array of int_32s in the form:
+ *  0bxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
+ *  x -> AP encoding
+ *  y -> radix mode supported page size (encoded as a shift)
+ */
+static struct ppc_radix_page_info POWER10_radix_page_info = {
+    .count = 4,
+    .entries = {
+        0x0000000c, /*  4K - enc: 0x0 */
+        0xa0000010, /* 64K - enc: 0x5 */
+        0x20000015, /*  2M - enc: 0x1 */
+        0x4000001e  /*  1G - enc: 0x2 */
+    }
+};
+#endif /* CONFIG_SOFTMMU */
+
+static void init_proc_POWER10(CPUPPCState *env)
+{
+    /* Common Registers */
+    init_proc_book3s_common(env);
+    register_book3s_207_dbg_sprs(env);
+
+    /* POWER8 Specific Registers */
+    register_book3s_ids_sprs(env);
+    register_amr_sprs(env);
+    register_iamr_sprs(env);
+    register_book3s_purr_sprs(env);
+    register_power5p_common_sprs(env);
+    register_power5p_lpar_sprs(env);
+    register_power5p_ear_sprs(env);
+    register_power6_common_sprs(env);
+    register_power6_dbg_sprs(env);
+    register_power8_tce_address_control_sprs(env);
+    register_power8_ids_sprs(env);
+    register_power8_ebb_sprs(env);
+    register_power8_fscr_sprs(env);
+    register_power8_pmu_sup_sprs(env);
+    register_power8_pmu_user_sprs(env);
+    register_power8_tm_sprs(env);
+    register_power8_pspb_sprs(env);
+    register_vtb_sprs(env);
+    register_power8_ic_sprs(env);
+    register_power8_book4_sprs(env);
+    register_power8_rpr_sprs(env);
+    register_power9_mmu_sprs(env);
+
+    /* FIXME: Filter fields properly based on privilege level */
+    spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
+                        spr_read_generic, spr_write_generic,
+                        KVM_REG_PPC_PSSCR, 0);
+
+    /* env variables */
+    env->dcache_line_size = 128;
+    env->icache_line_size = 128;
+
+    /* Allocate hardware IRQ controller */
+    init_excp_POWER10(env);
+    ppcPOWER9_irq_init(env_archcpu(env));
+}
+
+static bool ppc_pvr_match_power10(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER10_BASE) {
+        return true;
+    }
+    return false;
+}
+
+static bool cpu_has_work_POWER10(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if (cs->halted) {
+        uint64_t psscr = env->spr[SPR_PSSCR];
+
+        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
+            return false;
+        }
+
+        /* If EC is clear, just return true on any pending interrupt */
+        if (!(psscr & PSSCR_EC)) {
+            return true;
+        }
+        /* External Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
+            (env->spr[SPR_LPCR] & LPCR_EEE)) {
+            bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
+            if (heic == 0 || !msr_hv || msr_pr) {
+                return true;
+            }
+        }
+        /* Decrementer Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
+            (env->spr[SPR_LPCR] & LPCR_DEE)) {
+            return true;
+        }
+        /* Machine Check or Hypervisor Maintenance Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK |
+            1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) {
+            return true;
+        }
+        /* Privileged Doorbell Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_PDEE)) {
+            return true;
+        }
+        /* Hypervisor Doorbell Exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
+            (env->spr[SPR_LPCR] & LPCR_HDEE)) {
+            return true;
+        }
+        /* Hypervisor virtualization exception */
+        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) &&
+            (env->spr[SPR_LPCR] & LPCR_HVEE)) {
+            return true;
+        }
+        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
+            return true;
+        }
+        return false;
+    } else {
+        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+    }
+}
+
+POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER10";
+    dc->desc = "POWER10";
+    pcc->pvr_match = ppc_pvr_match_power10;
+    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07 |
+                    PCR_COMPAT_3_00;
+    pcc->pcr_supported = PCR_COMPAT_3_10 | PCR_COMPAT_3_00 | PCR_COMPAT_2_07 |
+                         PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
+    pcc->init_proc = init_proc_POWER10;
+    pcc->check_pow = check_pow_nocheck;
+    cc->has_work = cpu_has_work_POWER10;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_HV) |
+                    (1ull << MSR_TM) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    pcc->lpcr_mask = LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
+        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
+        LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
+        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
+                             LPCR_DEE | LPCR_OEE))
+        | LPCR_MER | LPCR_GTSE | LPCR_TC |
+        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE;
+    /* DD2 adds an extra HAIL bit */
+    pcc->lpcr_mask |= LPCR_HAIL;
+
+    pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
+    pcc->mmu_model = POWERPC_MMU_3_00;
+#if defined(CONFIG_SOFTMMU)
+    /* segment page size remain the same */
+    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
+    pcc->radix_page_info = &POWER10_radix_page_info;
+    pcc->lrg_decr_bits = 56;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER10;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
+{
+    CPUPPCState *env = &cpu->env;
+
+    cpu->vhyp = vhyp;
+
+    /*
+     * With a virtual hypervisor mode we never allow the CPU to go
+     * hypervisor mode itself
+     */
+    env->msr_mask &= ~MSR_HVB;
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#endif /* defined(TARGET_PPC64) */
+
+/*****************************************************************************/
+/* Generic CPU instantiation routine                                         */
+static void init_ppc_proc(PowerPCCPU *cpu)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+#if !defined(CONFIG_USER_ONLY)
+    int i;
+
+    env->irq_inputs = NULL;
+    /* Set all exception vectors to an invalid address */
+    for (i = 0; i < POWERPC_EXCP_NB; i++) {
+        env->excp_vectors[i] = (target_ulong)(-1ULL);
+    }
+    env->ivor_mask = 0x00000000;
+    env->ivpr_mask = 0x00000000;
+    /* Default MMU definitions */
+    env->nb_BATs = 0;
+    env->nb_tlb = 0;
+    env->nb_ways = 0;
+    env->tlb_type = TLB_NONE;
+#endif
+    /* Register SPR common to all PowerPC implementations */
+    register_generic_sprs(env);
+    spr_register(env, SPR_PVR, "PVR",
+                 /* Linux permits userspace to read PVR */
+#if defined(CONFIG_LINUX_USER)
+                 &spr_read_generic,
+#else
+                 SPR_NOACCESS,
+#endif
+                 SPR_NOACCESS,
+                 &spr_read_generic, SPR_NOACCESS,
+                 pcc->pvr);
+    /* Register SVR if it's defined to anything else than POWERPC_SVR_NONE */
+    if (pcc->svr != POWERPC_SVR_NONE) {
+        if (pcc->svr & POWERPC_SVR_E500) {
+            spr_register(env, SPR_E500_SVR, "SVR",
+                         SPR_NOACCESS, SPR_NOACCESS,
+                         &spr_read_generic, SPR_NOACCESS,
+                         pcc->svr & ~POWERPC_SVR_E500);
+        } else {
+            spr_register(env, SPR_SVR, "SVR",
+                         SPR_NOACCESS, SPR_NOACCESS,
+                         &spr_read_generic, SPR_NOACCESS,
+                         pcc->svr);
+        }
+    }
+    /* PowerPC implementation specific initialisations (SPRs, timers, ...) */
+    (*pcc->init_proc)(env);
+
+#if !defined(CONFIG_USER_ONLY)
+    ppc_gdb_gen_spr_xml(cpu);
+#endif
+
+    /* MSR bits & flags consistency checks */
+    if (env->msr_mask & (1 << 25)) {
+        switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) {
+        case POWERPC_FLAG_SPE:
+        case POWERPC_FLAG_VRE:
+            break;
+        default:
+            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                    "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n");
+            exit(1);
+        }
+    } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) {
+        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n");
+        exit(1);
+    }
+    if (env->msr_mask & (1 << 17)) {
+        switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) {
+        case POWERPC_FLAG_TGPR:
+        case POWERPC_FLAG_CE:
+            break;
+        default:
+            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                    "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n");
+            exit(1);
+        }
+    } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) {
+        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n");
+        exit(1);
+    }
+    if (env->msr_mask & (1 << 10)) {
+        switch (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE |
+                              POWERPC_FLAG_UBLE)) {
+        case POWERPC_FLAG_SE:
+        case POWERPC_FLAG_DWE:
+        case POWERPC_FLAG_UBLE:
+            break;
+        default:
+            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                    "Should define POWERPC_FLAG_SE or POWERPC_FLAG_DWE or "
+                    "POWERPC_FLAG_UBLE\n");
+            exit(1);
+        }
+    } else if (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE |
+                             POWERPC_FLAG_UBLE)) {
+        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                "Should not define POWERPC_FLAG_SE nor POWERPC_FLAG_DWE nor "
+                "POWERPC_FLAG_UBLE\n");
+            exit(1);
+    }
+    if (env->msr_mask & (1 << 9)) {
+        switch (env->flags & (POWERPC_FLAG_BE | POWERPC_FLAG_DE)) {
+        case POWERPC_FLAG_BE:
+        case POWERPC_FLAG_DE:
+            break;
+        default:
+            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                    "Should define POWERPC_FLAG_BE or POWERPC_FLAG_DE\n");
+            exit(1);
+        }
+    } else if (env->flags & (POWERPC_FLAG_BE | POWERPC_FLAG_DE)) {
+        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                "Should not define POWERPC_FLAG_BE nor POWERPC_FLAG_DE\n");
+        exit(1);
+    }
+    if (env->msr_mask & (1 << 2)) {
+        switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) {
+        case POWERPC_FLAG_PX:
+        case POWERPC_FLAG_PMM:
+            break;
+        default:
+            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                    "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n");
+            exit(1);
+        }
+    } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) {
+        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
+                "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n");
+        exit(1);
+    }
+    if ((env->flags & (POWERPC_FLAG_RTC_CLK | POWERPC_FLAG_BUS_CLK)) == 0) {
+        fprintf(stderr, "PowerPC flags inconsistency\n"
+                "Should define the time-base and decrementer clock source\n");
+        exit(1);
+    }
+    /* Allocate TLBs buffer when needed */
+#if !defined(CONFIG_USER_ONLY)
+    if (env->nb_tlb != 0) {
+        int nb_tlb = env->nb_tlb;
+        if (env->id_tlbs != 0) {
+            nb_tlb *= 2;
+        }
+        switch (env->tlb_type) {
+        case TLB_6XX:
+            env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb);
+            break;
+        case TLB_EMB:
+            env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb);
+            break;
+        case TLB_MAS:
+            env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb);
+            break;
+        }
+        /* Pre-compute some useful values */
+        env->tlb_per_way = env->nb_tlb / env->nb_ways;
+    }
+    if (env->irq_inputs == NULL) {
+        warn_report("no internal IRQ controller registered."
+                    " Attempt QEMU to crash very soon !");
+    }
+#endif
+    if (env->check_pow == NULL) {
+        warn_report("no power management check handler registered."
+                    " Attempt QEMU to crash very soon !");
+    }
+}
+
+
+static void ppc_cpu_realize(DeviceState *dev, Error **errp)
+{
+    CPUState *cs = CPU(dev);
+    PowerPCCPU *cpu = POWERPC_CPU(dev);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (cpu->vcpu_id == UNASSIGNED_CPU_INDEX) {
+        cpu->vcpu_id = cs->cpu_index;
+    }
+
+    if (tcg_enabled()) {
+        if (ppc_fixup_cpu(cpu) != 0) {
+            error_setg(errp, "Unable to emulate selected CPU with TCG");
+            goto unrealize;
+        }
+    }
+
+    create_ppc_opcodes(cpu, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        goto unrealize;
+    }
+    init_ppc_proc(cpu);
+
+    ppc_gdb_init(cs, pcc);
+    qemu_init_vcpu(cs);
+
+    pcc->parent_realize(dev, errp);
+
+    return;
+
+unrealize:
+    cpu_exec_unrealizefn(cs);
+}
+
+static void ppc_cpu_unrealize(DeviceState *dev)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(dev);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    pcc->parent_unrealize(dev);
+
+    cpu_remove_sync(CPU(cpu));
+
+    destroy_ppc_opcodes(cpu);
+}
+
+static gint ppc_cpu_compare_class_pvr(gconstpointer a, gconstpointer b)
+{
+    ObjectClass *oc = (ObjectClass *)a;
+    uint32_t pvr = *(uint32_t *)b;
+    PowerPCCPUClass *pcc = (PowerPCCPUClass *)a;
+
+    /* -cpu host does a PVR lookup during construction */
+    if (unlikely(strcmp(object_class_get_name(oc),
+                        TYPE_HOST_POWERPC_CPU) == 0)) {
+        return -1;
+    }
+
+    return pcc->pvr == pvr ? 0 : -1;
+}
+
+PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr)
+{
+    GSList *list, *item;
+    PowerPCCPUClass *pcc = NULL;
+
+    list = object_class_get_list(TYPE_POWERPC_CPU, false);
+    item = g_slist_find_custom(list, &pvr, ppc_cpu_compare_class_pvr);
+    if (item != NULL) {
+        pcc = POWERPC_CPU_CLASS(item->data);
+    }
+    g_slist_free(list);
+
+    return pcc;
+}
+
+static gint ppc_cpu_compare_class_pvr_mask(gconstpointer a, gconstpointer b)
+{
+    ObjectClass *oc = (ObjectClass *)a;
+    uint32_t pvr = *(uint32_t *)b;
+    PowerPCCPUClass *pcc = (PowerPCCPUClass *)a;
+
+    /* -cpu host does a PVR lookup during construction */
+    if (unlikely(strcmp(object_class_get_name(oc),
+                        TYPE_HOST_POWERPC_CPU) == 0)) {
+        return -1;
+    }
+
+    if (pcc->pvr_match(pcc, pvr)) {
+        return 0;
+    }
+
+    return -1;
+}
+
+PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr)
+{
+    GSList *list, *item;
+    PowerPCCPUClass *pcc = NULL;
+
+    list = object_class_get_list(TYPE_POWERPC_CPU, true);
+    item = g_slist_find_custom(list, &pvr, ppc_cpu_compare_class_pvr_mask);
+    if (item != NULL) {
+        pcc = POWERPC_CPU_CLASS(item->data);
+    }
+    g_slist_free(list);
+
+    return pcc;
+}
+
+static const char *ppc_cpu_lookup_alias(const char *alias)
+{
+    int ai;
+
+    for (ai = 0; ppc_cpu_aliases[ai].alias != NULL; ai++) {
+        if (strcmp(ppc_cpu_aliases[ai].alias, alias) == 0) {
+            return ppc_cpu_aliases[ai].model;
+        }
+    }
+
+    return NULL;
+}
+
+static ObjectClass *ppc_cpu_class_by_name(const char *name)
+{
+    char *cpu_model, *typename;
+    ObjectClass *oc;
+    const char *p;
+    unsigned long pvr;
+
+    /*
+     * Lookup by PVR if cpu_model is valid 8 digit hex number (excl:
+     * 0x prefix if present)
+     */
+    if (!qemu_strtoul(name, &p, 16, &pvr)) {
+        int len = p - name;
+        len = (len == 10) && (name[1] == 'x') ? len - 2 : len;
+        if ((len == 8) && (*p == '\0')) {
+            return OBJECT_CLASS(ppc_cpu_class_by_pvr(pvr));
+        }
+    }
+
+    cpu_model = g_ascii_strdown(name, -1);
+    p = ppc_cpu_lookup_alias(cpu_model);
+    if (p) {
+        g_free(cpu_model);
+        cpu_model = g_strdup(p);
+    }
+
+    typename = g_strdup_printf("%s" POWERPC_CPU_TYPE_SUFFIX, cpu_model);
+    oc = object_class_by_name(typename);
+    g_free(typename);
+    g_free(cpu_model);
+
+    return oc;
+}
+
+PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
+{
+    ObjectClass *oc = OBJECT_CLASS(pcc);
+
+    while (oc && !object_class_is_abstract(oc)) {
+        oc = object_class_get_parent(oc);
+    }
+    assert(oc);
+
+    return POWERPC_CPU_CLASS(oc);
+}
+
+/* Sort by PVR, ordering special case "host" last. */
+static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b)
+{
+    ObjectClass *oc_a = (ObjectClass *)a;
+    ObjectClass *oc_b = (ObjectClass *)b;
+    PowerPCCPUClass *pcc_a = POWERPC_CPU_CLASS(oc_a);
+    PowerPCCPUClass *pcc_b = POWERPC_CPU_CLASS(oc_b);
+    const char *name_a = object_class_get_name(oc_a);
+    const char *name_b = object_class_get_name(oc_b);
+
+    if (strcmp(name_a, TYPE_HOST_POWERPC_CPU) == 0) {
+        return 1;
+    } else if (strcmp(name_b, TYPE_HOST_POWERPC_CPU) == 0) {
+        return -1;
+    } else {
+        /* Avoid an integer overflow during subtraction */
+        if (pcc_a->pvr < pcc_b->pvr) {
+            return -1;
+        } else if (pcc_a->pvr > pcc_b->pvr) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+}
+
+static void ppc_cpu_list_entry(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc));
+    const char *typename = object_class_get_name(oc);
+    char *name;
+    int i;
+
+    if (unlikely(strcmp(typename, TYPE_HOST_POWERPC_CPU) == 0)) {
+        return;
+    }
+
+    name = g_strndup(typename,
+                     strlen(typename) - strlen(POWERPC_CPU_TYPE_SUFFIX));
+    qemu_printf("PowerPC %-16s PVR %08x\n", name, pcc->pvr);
+    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
+        PowerPCCPUAlias *alias = &ppc_cpu_aliases[i];
+        ObjectClass *alias_oc = ppc_cpu_class_by_name(alias->model);
+
+        if (alias_oc != oc) {
+            continue;
+        }
+        /*
+         * If running with KVM, we might update the family alias later, so
+         * avoid printing the wrong alias here and use "preferred" instead
+         */
+        if (strcmp(alias->alias, family->desc) == 0) {
+            qemu_printf("PowerPC %-16s (alias for preferred %s CPU)\n",
+                        alias->alias, family->desc);
+        } else {
+            qemu_printf("PowerPC %-16s (alias for %s)\n",
+                        alias->alias, name);
+        }
+    }
+    g_free(name);
+}
+
+void ppc_cpu_list(void)
+{
+    GSList *list;
+
+    list = object_class_get_list(TYPE_POWERPC_CPU, false);
+    list = g_slist_sort(list, ppc_cpu_list_compare);
+    g_slist_foreach(list, ppc_cpu_list_entry, NULL);
+    g_slist_free(list);
+
+#ifdef CONFIG_KVM
+    qemu_printf("\n");
+    qemu_printf("PowerPC %s\n", "host");
+#endif
+}
+
+static void ppc_cpu_defs_entry(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    CpuDefinitionInfoList **first = user_data;
+    const char *typename;
+    CpuDefinitionInfo *info;
+
+    typename = object_class_get_name(oc);
+    info = g_malloc0(sizeof(*info));
+    info->name = g_strndup(typename,
+                           strlen(typename) - strlen(POWERPC_CPU_TYPE_SUFFIX));
+
+    QAPI_LIST_PREPEND(*first, info);
+}
+
+CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
+{
+    CpuDefinitionInfoList *cpu_list = NULL;
+    GSList *list;
+    int i;
+
+    list = object_class_get_list(TYPE_POWERPC_CPU, false);
+    g_slist_foreach(list, ppc_cpu_defs_entry, &cpu_list);
+    g_slist_free(list);
+
+    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
+        PowerPCCPUAlias *alias = &ppc_cpu_aliases[i];
+        ObjectClass *oc;
+        CpuDefinitionInfo *info;
+
+        oc = ppc_cpu_class_by_name(alias->model);
+        if (oc == NULL) {
+            continue;
+        }
+
+        info = g_malloc0(sizeof(*info));
+        info->name = g_strdup(alias->alias);
+        info->q_typename = g_strdup(object_class_get_name(oc));
+
+        QAPI_LIST_PREPEND(cpu_list, info);
+    }
+
+    return cpu_list;
+}
+
+static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    cpu->env.nip = value;
+}
+
+static bool ppc_cpu_has_work(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
+}
+
+static void ppc_cpu_reset(DeviceState *dev)
+{
+    CPUState *s = CPU(dev);
+    PowerPCCPU *cpu = POWERPC_CPU(s);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+    target_ulong msr;
+    int i;
+
+    pcc->parent_reset(dev);
+
+    msr = (target_ulong)0;
+    msr |= (target_ulong)MSR_HVB;
+    msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */
+    msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */
+    msr |= (target_ulong)1 << MSR_EP;
+#if defined(DO_SINGLE_STEP) && 0
+    /* Single step trace mode */
+    msr |= (target_ulong)1 << MSR_SE;
+    msr |= (target_ulong)1 << MSR_BE;
+#endif
+#if defined(CONFIG_USER_ONLY)
+    msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */
+    msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */
+    msr |= (target_ulong)1 << MSR_FE1;
+    msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */
+    msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */
+    msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */
+    msr |= (target_ulong)1 << MSR_PR;
+#if defined(TARGET_PPC64)
+    msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */
+#endif
+#if !defined(TARGET_WORDS_BIGENDIAN)
+    msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */
+    if (!((env->msr_mask >> MSR_LE) & 1)) {
+        fprintf(stderr, "Selected CPU does not support little-endian.\n");
+        exit(1);
+    }
+#endif
+#endif
+
+#if defined(TARGET_PPC64)
+    if (mmu_is_64bit(env->mmu_model)) {
+        msr |= (1ULL << MSR_SF);
+    }
+#endif
+
+    hreg_store_msr(env, msr, 1);
+
+#if !defined(CONFIG_USER_ONLY)
+    env->nip = env->hreset_vector | env->excp_prefix;
+#if defined(CONFIG_TCG)
+    if (env->mmu_model != POWERPC_MMU_REAL) {
+        ppc_tlb_invalidate_all(env);
+    }
+#endif /* CONFIG_TCG */
+#endif
+
+    hreg_compute_hflags(env);
+    env->reserve_addr = (target_ulong)-1ULL;
+    /* Be sure no exception or interrupt is pending */
+    env->pending_interrupts = 0;
+    s->exception_index = POWERPC_EXCP_NONE;
+    env->error_code = 0;
+    ppc_irq_reset(cpu);
+
+    /* tininess for underflow is detected before rounding */
+    set_float_detect_tininess(float_tininess_before_rounding,
+                              &env->fp_status);
+
+    for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
+        ppc_spr_t *spr = &env->spr_cb[i];
+
+        if (!spr->name) {
+            continue;
+        }
+        env->spr[i] = spr->default_value;
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
+
+static bool ppc_cpu_is_big_endian(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_synchronize_state(cs);
+
+    return !msr_le;
+}
+
+#ifdef CONFIG_TCG
+static void ppc_cpu_exec_enter(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->cpu_exec_enter(cpu->vhyp, cpu);
+    }
+}
+
+static void ppc_cpu_exec_exit(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->cpu_exec_exit(cpu->vhyp, cpu);
+    }
+}
+#endif /* CONFIG_TCG */
+
+#endif /* !CONFIG_USER_ONLY */
+
+static void ppc_cpu_instance_init(Object *obj)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(obj);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_set_cpustate_pointers(cpu);
+    cpu->vcpu_id = UNASSIGNED_CPU_INDEX;
+
+    env->msr_mask = pcc->msr_mask;
+    env->mmu_model = pcc->mmu_model;
+    env->excp_model = pcc->excp_model;
+    env->bus_model = pcc->bus_model;
+    env->insns_flags = pcc->insns_flags;
+    env->insns_flags2 = pcc->insns_flags2;
+    env->flags = pcc->flags;
+    env->bfd_mach = pcc->bfd_mach;
+    env->check_pow = pcc->check_pow;
+
+    /*
+     * Mark HV mode as supported if the CPU has an MSR_HV bit in the
+     * msr_mask. The mask can later be cleared by PAPR mode but the hv
+     * mode support will remain, thus enforcing that we cannot use
+     * priv. instructions in guest in PAPR mode. For 970 we currently
+     * simply don't set HV in msr_mask thus simulating an "Apple mode"
+     * 970. If we ever want to support 970 HV mode, we'll have to add
+     * a processor attribute of some sort.
+     */
+#if !defined(CONFIG_USER_ONLY)
+    env->has_hv_mode = !!(env->msr_mask & MSR_HVB);
+#endif
+
+    ppc_hash64_init(cpu);
+}
+
+static void ppc_cpu_instance_finalize(Object *obj)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(obj);
+
+    ppc_hash64_finalize(cpu);
+}
+
+static bool ppc_pvr_match_default(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    return pcc->pvr == pvr;
+}
+
+static void ppc_disas_set_info(CPUState *cs, disassemble_info *info)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    if ((env->hflags >> MSR_LE) & 1) {
+        info->endian = BFD_ENDIAN_LITTLE;
+    }
+    info->mach = env->bfd_mach;
+    if (!env->bfd_mach) {
+#ifdef TARGET_PPC64
+        info->mach = bfd_mach_ppc64;
+#else
+        info->mach = bfd_mach_ppc;
+#endif
+    }
+    info->disassembler_options = (char *)"any";
+    info->print_insn = print_insn_ppc;
+
+    info->cap_arch = CS_ARCH_PPC;
+#ifdef TARGET_PPC64
+    info->cap_mode = CS_MODE_64;
+#endif
+}
+
+static Property ppc_cpu_properties[] = {
+    DEFINE_PROP_BOOL("pre-2.8-migration", PowerPCCPU, pre_2_8_migration, false),
+    DEFINE_PROP_BOOL("pre-2.10-migration", PowerPCCPU, pre_2_10_migration,
+                     false),
+    DEFINE_PROP_BOOL("pre-3.0-migration", PowerPCCPU, pre_3_0_migration,
+                     false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps ppc_sysemu_ops = {
+    .get_phys_page_debug = ppc_cpu_get_phys_page_debug,
+    .write_elf32_note = ppc32_cpu_write_elf32_note,
+    .write_elf64_note = ppc64_cpu_write_elf64_note,
+    .virtio_is_big_endian = ppc_cpu_is_big_endian,
+    .legacy_vmsd = &vmstate_ppc_cpu,
+};
+#endif
+
+#ifdef CONFIG_TCG
+#include "hw/core/tcg-cpu-ops.h"
+
+static const struct TCGCPUOps ppc_tcg_ops = {
+  .initialize = ppc_translate_init,
+
+#ifdef CONFIG_USER_ONLY
+  .record_sigsegv = ppc_cpu_record_sigsegv,
+#else
+  .tlb_fill = ppc_cpu_tlb_fill,
+  .cpu_exec_interrupt = ppc_cpu_exec_interrupt,
+  .do_interrupt = ppc_cpu_do_interrupt,
+  .cpu_exec_enter = ppc_cpu_exec_enter,
+  .cpu_exec_exit = ppc_cpu_exec_exit,
+  .do_unaligned_access = ppc_cpu_do_unaligned_access,
+#endif /* !CONFIG_USER_ONLY */
+};
+#endif /* CONFIG_TCG */
+
+static void ppc_cpu_class_init(ObjectClass *oc, void *data)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+    CPUClass *cc = CPU_CLASS(oc);
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    device_class_set_parent_realize(dc, ppc_cpu_realize,
+                                    &pcc->parent_realize);
+    device_class_set_parent_unrealize(dc, ppc_cpu_unrealize,
+                                      &pcc->parent_unrealize);
+    pcc->pvr_match = ppc_pvr_match_default;
+    device_class_set_props(dc, ppc_cpu_properties);
+
+    device_class_set_parent_reset(dc, ppc_cpu_reset, &pcc->parent_reset);
+
+    cc->class_by_name = ppc_cpu_class_by_name;
+    cc->has_work = ppc_cpu_has_work;
+    cc->dump_state = ppc_cpu_dump_state;
+    cc->set_pc = ppc_cpu_set_pc;
+    cc->gdb_read_register = ppc_cpu_gdb_read_register;
+    cc->gdb_write_register = ppc_cpu_gdb_write_register;
+#ifndef CONFIG_USER_ONLY
+    cc->sysemu_ops = &ppc_sysemu_ops;
+#endif
+
+    cc->gdb_num_core_regs = 71;
+#ifndef CONFIG_USER_ONLY
+    cc->gdb_get_dynamic_xml = ppc_gdb_get_dynamic_xml;
+#endif
+#ifdef USE_APPLE_GDB
+    cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
+    cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
+    cc->gdb_num_core_regs = 71 + 32;
+#endif
+
+    cc->gdb_arch_name = ppc_gdb_arch_name;
+#if defined(TARGET_PPC64)
+    cc->gdb_core_xml_file = "power64-core.xml";
+#else
+    cc->gdb_core_xml_file = "power-core.xml";
+#endif
+    cc->disas_set_info = ppc_disas_set_info;
+
+    dc->fw_name = "PowerPC,UNKNOWN";
+
+#ifdef CONFIG_TCG
+    cc->tcg_ops = &ppc_tcg_ops;
+#endif /* CONFIG_TCG */
+}
+
+static const TypeInfo ppc_cpu_type_info = {
+    .name = TYPE_POWERPC_CPU,
+    .parent = TYPE_CPU,
+    .instance_size = sizeof(PowerPCCPU),
+    .instance_align = __alignof__(PowerPCCPU),
+    .instance_init = ppc_cpu_instance_init,
+    .instance_finalize = ppc_cpu_instance_finalize,
+    .abstract = true,
+    .class_size = sizeof(PowerPCCPUClass),
+    .class_init = ppc_cpu_class_init,
+};
+
+#ifndef CONFIG_USER_ONLY
+static const TypeInfo ppc_vhyp_type_info = {
+    .name = TYPE_PPC_VIRTUAL_HYPERVISOR,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(PPCVirtualHypervisorClass),
+};
+#endif
+
+static void ppc_cpu_register_types(void)
+{
+    type_register_static(&ppc_cpu_type_info);
+#ifndef CONFIG_USER_ONLY
+    type_register_static(&ppc_vhyp_type_info);
+#endif
+}
+
+void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+#define RGPL  4
+#define RFPL  4
+
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    int i;
+
+    qemu_fprintf(f, "NIP " TARGET_FMT_lx "   LR " TARGET_FMT_lx " CTR "
+                 TARGET_FMT_lx " XER " TARGET_FMT_lx " CPU#%d\n",
+                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
+                 cs->cpu_index);
+    qemu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
+                 "%08x iidx %d didx %d\n",
+                 env->msr, env->spr[SPR_HID0], env->hflags,
+                 cpu_mmu_index(env, true), cpu_mmu_index(env, false));
+#if !defined(NO_TIMER_DUMP)
+    qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
+#if !defined(CONFIG_USER_ONLY)
+                 " DECR " TARGET_FMT_lu
+#endif
+                 "\n",
+                 cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
+#if !defined(CONFIG_USER_ONLY)
+                 , cpu_ppc_load_decr(env)
+#endif
+        );
+#endif
+    for (i = 0; i < 32; i++) {
+        if ((i & (RGPL - 1)) == 0) {
+            qemu_fprintf(f, "GPR%02d", i);
+        }
+        qemu_fprintf(f, " %016" PRIx64, ppc_dump_gpr(env, i));
+        if ((i & (RGPL - 1)) == (RGPL - 1)) {
+            qemu_fprintf(f, "\n");
+        }
+    }
+    qemu_fprintf(f, "CR ");
+    for (i = 0; i < 8; i++)
+        qemu_fprintf(f, "%01x", env->crf[i]);
+    qemu_fprintf(f, "  [");
+    for (i = 0; i < 8; i++) {
+        char a = '-';
+        if (env->crf[i] & 0x08) {
+            a = 'L';
+        } else if (env->crf[i] & 0x04) {
+            a = 'G';
+        } else if (env->crf[i] & 0x02) {
+            a = 'E';
+        }
+        qemu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
+    }
+    qemu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
+                 env->reserve_addr);
+
+    if (flags & CPU_DUMP_FPU) {
+        for (i = 0; i < 32; i++) {
+            if ((i & (RFPL - 1)) == 0) {
+                qemu_fprintf(f, "FPR%02d", i);
+            }
+            qemu_fprintf(f, " %016" PRIx64, *cpu_fpr_ptr(env, i));
+            if ((i & (RFPL - 1)) == (RFPL - 1)) {
+                qemu_fprintf(f, "\n");
+            }
+        }
+        qemu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    qemu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
+                 "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SRR0], env->spr[SPR_SRR1],
+                 env->spr[SPR_PVR], env->spr[SPR_VRSAVE]);
+
+    qemu_fprintf(f, "SPRG0 " TARGET_FMT_lx " SPRG1 " TARGET_FMT_lx
+                 "  SPRG2 " TARGET_FMT_lx "  SPRG3 " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SPRG0], env->spr[SPR_SPRG1],
+                 env->spr[SPR_SPRG2], env->spr[SPR_SPRG3]);
+
+    qemu_fprintf(f, "SPRG4 " TARGET_FMT_lx " SPRG5 " TARGET_FMT_lx
+                 "  SPRG6 " TARGET_FMT_lx "  SPRG7 " TARGET_FMT_lx "\n",
+                 env->spr[SPR_SPRG4], env->spr[SPR_SPRG5],
+                 env->spr[SPR_SPRG6], env->spr[SPR_SPRG7]);
+
+#if defined(TARGET_PPC64)
+    if (env->excp_model == POWERPC_EXCP_POWER7 ||
+        env->excp_model == POWERPC_EXCP_POWER8 ||
+        env->excp_model == POWERPC_EXCP_POWER9 ||
+        env->excp_model == POWERPC_EXCP_POWER10)  {
+        qemu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
+    }
+#endif
+    if (env->excp_model == POWERPC_EXCP_BOOKE) {
+        qemu_fprintf(f, "CSRR0 " TARGET_FMT_lx " CSRR1 " TARGET_FMT_lx
+                     " MCSRR0 " TARGET_FMT_lx " MCSRR1 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1],
+                     env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
+
+        qemu_fprintf(f, "  TCR " TARGET_FMT_lx "   TSR " TARGET_FMT_lx
+                     "    ESR " TARGET_FMT_lx "   DEAR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_TCR], env->spr[SPR_BOOKE_TSR],
+                     env->spr[SPR_BOOKE_ESR], env->spr[SPR_BOOKE_DEAR]);
+
+        qemu_fprintf(f, "  PIR " TARGET_FMT_lx " DECAR " TARGET_FMT_lx
+                     "   IVPR " TARGET_FMT_lx "   EPCR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_PIR], env->spr[SPR_BOOKE_DECAR],
+                     env->spr[SPR_BOOKE_IVPR], env->spr[SPR_BOOKE_EPCR]);
+
+        qemu_fprintf(f, " MCSR " TARGET_FMT_lx " SPRG8 " TARGET_FMT_lx
+                     "    EPR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MCSR], env->spr[SPR_BOOKE_SPRG8],
+                     env->spr[SPR_BOOKE_EPR]);
+
+        /* FSL-specific */
+        qemu_fprintf(f, " MCAR " TARGET_FMT_lx "  PID1 " TARGET_FMT_lx
+                     "   PID2 " TARGET_FMT_lx "    SVR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_Exxx_MCAR], env->spr[SPR_BOOKE_PID1],
+                     env->spr[SPR_BOOKE_PID2], env->spr[SPR_E500_SVR]);
+
+        /*
+         * IVORs are left out as they are large and do not change often --
+         * they can be read with "p $ivor0", "p $ivor1", etc.
+         */
+    }
+
+#if defined(TARGET_PPC64)
+    if (env->flags & POWERPC_FLAG_CFAR) {
+        qemu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
+    }
+#endif
+
+    if (env->spr_cb[SPR_LPCR].name) {
+        qemu_fprintf(f, " LPCR " TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
+    }
+
+    switch (env->mmu_model) {
+    case POWERPC_MMU_32B:
+    case POWERPC_MMU_601:
+    case POWERPC_MMU_SOFT_6xx:
+    case POWERPC_MMU_SOFT_74xx:
+#if defined(TARGET_PPC64)
+    case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_07:
+    case POWERPC_MMU_3_00:
+#endif
+        if (env->spr_cb[SPR_SDR1].name) { /* SDR1 Exists */
+            qemu_fprintf(f, " SDR1 " TARGET_FMT_lx " ", env->spr[SPR_SDR1]);
+        }
+        if (env->spr_cb[SPR_PTCR].name) { /* PTCR Exists */
+            qemu_fprintf(f, " PTCR " TARGET_FMT_lx " ", env->spr[SPR_PTCR]);
+        }
+        qemu_fprintf(f, "  DAR " TARGET_FMT_lx "  DSISR " TARGET_FMT_lx "\n",
+                     env->spr[SPR_DAR], env->spr[SPR_DSISR]);
+        break;
+    case POWERPC_MMU_BOOKE206:
+        qemu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx
+                     "   MAS2 " TARGET_FMT_lx "   MAS3 " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MAS0], env->spr[SPR_BOOKE_MAS1],
+                     env->spr[SPR_BOOKE_MAS2], env->spr[SPR_BOOKE_MAS3]);
+
+        qemu_fprintf(f, " MAS4 " TARGET_FMT_lx "  MAS6 " TARGET_FMT_lx
+                     "   MAS7 " TARGET_FMT_lx "    PID " TARGET_FMT_lx "\n",
+                     env->spr[SPR_BOOKE_MAS4], env->spr[SPR_BOOKE_MAS6],
+                     env->spr[SPR_BOOKE_MAS7], env->spr[SPR_BOOKE_PID]);
+
+        qemu_fprintf(f, "MMUCFG " TARGET_FMT_lx " TLB0CFG " TARGET_FMT_lx
+                     " TLB1CFG " TARGET_FMT_lx "\n",
+                     env->spr[SPR_MMUCFG], env->spr[SPR_BOOKE_TLB0CFG],
+                     env->spr[SPR_BOOKE_TLB1CFG]);
+        break;
+    default:
+        break;
+    }
+#endif
+
+#undef RGPL
+#undef RFPL
+}
+type_init(ppc_cpu_register_types)
diff --git a/target/ppc/dfp_helper.c b/target/ppc/dfp_helper.c
index 07341a69f52..0d01ac3de0b 100644
--- a/target/ppc/dfp_helper.c
+++ b/target/ppc/dfp_helper.c
@@ -51,6 +51,11 @@ static void set_dfp128(ppc_fprp_t *dfp, ppc_vsr_t *src)
     dfp[1].VsrD(0) = src->VsrD(1);
 }
 
+static void set_dfp128_to_avr(ppc_avr_t *dst, ppc_vsr_t *src)
+{
+    *dst = *src;
+}
+
 struct PPC_DFP {
     CPUPPCState *env;
     ppc_vsr_t vt, va, vb;
@@ -440,8 +445,8 @@ static void ADD_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXISI_add(dfp);
 }
 
-DFP_HELPER_TAB(dadd, decNumberAdd, ADD_PPs, 64)
-DFP_HELPER_TAB(daddq, decNumberAdd, ADD_PPs, 128)
+DFP_HELPER_TAB(DADD, decNumberAdd, ADD_PPs, 64)
+DFP_HELPER_TAB(DADDQ, decNumberAdd, ADD_PPs, 128)
 
 static void SUB_PPs(struct PPC_DFP *dfp)
 {
@@ -453,8 +458,8 @@ static void SUB_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXISI_subtract(dfp);
 }
 
-DFP_HELPER_TAB(dsub, decNumberSubtract, SUB_PPs, 64)
-DFP_HELPER_TAB(dsubq, decNumberSubtract, SUB_PPs, 128)
+DFP_HELPER_TAB(DSUB, decNumberSubtract, SUB_PPs, 64)
+DFP_HELPER_TAB(DSUBQ, decNumberSubtract, SUB_PPs, 128)
 
 static void MUL_PPs(struct PPC_DFP *dfp)
 {
@@ -466,8 +471,8 @@ static void MUL_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXIMZ(dfp);
 }
 
-DFP_HELPER_TAB(dmul, decNumberMultiply, MUL_PPs, 64)
-DFP_HELPER_TAB(dmulq, decNumberMultiply, MUL_PPs, 128)
+DFP_HELPER_TAB(DMUL, decNumberMultiply, MUL_PPs, 64)
+DFP_HELPER_TAB(DMULQ, decNumberMultiply, MUL_PPs, 128)
 
 static void DIV_PPs(struct PPC_DFP *dfp)
 {
@@ -481,8 +486,8 @@ static void DIV_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXIDI(dfp);
 }
 
-DFP_HELPER_TAB(ddiv, decNumberDivide, DIV_PPs, 64)
-DFP_HELPER_TAB(ddivq, decNumberDivide, DIV_PPs, 128)
+DFP_HELPER_TAB(DDIV, decNumberDivide, DIV_PPs, 64)
+DFP_HELPER_TAB(DDIVQ, decNumberDivide, DIV_PPs, 128)
 
 #define DFP_HELPER_BF_AB(op, dnop, postprocs, size)                            \
 uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, ppc_fprp_t *b)           \
@@ -502,8 +507,8 @@ static void CMPU_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXSNAN(dfp);
 }
 
-DFP_HELPER_BF_AB(dcmpu, decNumberCompare, CMPU_PPs, 64)
-DFP_HELPER_BF_AB(dcmpuq, decNumberCompare, CMPU_PPs, 128)
+DFP_HELPER_BF_AB(DCMPU, decNumberCompare, CMPU_PPs, 64)
+DFP_HELPER_BF_AB(DCMPUQ, decNumberCompare, CMPU_PPs, 128)
 
 static void CMPO_PPs(struct PPC_DFP *dfp)
 {
@@ -513,8 +518,8 @@ static void CMPO_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXVC(dfp);
 }
 
-DFP_HELPER_BF_AB(dcmpo, decNumberCompare, CMPO_PPs, 64)
-DFP_HELPER_BF_AB(dcmpoq, decNumberCompare, CMPO_PPs, 128)
+DFP_HELPER_BF_AB(DCMPO, decNumberCompare, CMPO_PPs, 64)
+DFP_HELPER_BF_AB(DCMPOQ, decNumberCompare, CMPO_PPs, 128)
 
 #define DFP_HELPER_TSTDC(op, size)                                       \
 uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, uint32_t dcm)      \
@@ -541,8 +546,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, uint32_t dcm)      \
     return dfp.crbf;                                                     \
 }
 
-DFP_HELPER_TSTDC(dtstdc, 64)
-DFP_HELPER_TSTDC(dtstdcq, 128)
+DFP_HELPER_TSTDC(DTSTDC, 64)
+DFP_HELPER_TSTDC(DTSTDCQ, 128)
 
 #define DFP_HELPER_TSTDG(op, size)                                       \
 uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, uint32_t dcm)      \
@@ -596,8 +601,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, uint32_t dcm)      \
     return dfp.crbf;                                                     \
 }
 
-DFP_HELPER_TSTDG(dtstdg, 64)
-DFP_HELPER_TSTDG(dtstdgq, 128)
+DFP_HELPER_TSTDG(DTSTDG, 64)
+DFP_HELPER_TSTDG(DTSTDGQ, 128)
 
 #define DFP_HELPER_TSTEX(op, size)                                       \
 uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, ppc_fprp_t *b)     \
@@ -628,8 +633,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, ppc_fprp_t *b)     \
     return dfp.crbf;                                                     \
 }
 
-DFP_HELPER_TSTEX(dtstex, 64)
-DFP_HELPER_TSTEX(dtstexq, 128)
+DFP_HELPER_TSTEX(DTSTEX, 64)
+DFP_HELPER_TSTEX(DTSTEXQ, 128)
 
 #define DFP_HELPER_TSTSF(op, size)                                       \
 uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, ppc_fprp_t *b)     \
@@ -665,8 +670,8 @@ uint32_t helper_##op(CPUPPCState *env, ppc_fprp_t *a, ppc_fprp_t *b)     \
     return dfp.crbf;                                                     \
 }
 
-DFP_HELPER_TSTSF(dtstsf, 64)
-DFP_HELPER_TSTSF(dtstsfq, 128)
+DFP_HELPER_TSTSF(DTSTSF, 64)
+DFP_HELPER_TSTSF(DTSTSFQ, 128)
 
 #define DFP_HELPER_TSTSFI(op, size)                                     \
 uint32_t helper_##op(CPUPPCState *env, uint32_t a, ppc_fprp_t *b)       \
@@ -700,8 +705,8 @@ uint32_t helper_##op(CPUPPCState *env, uint32_t a, ppc_fprp_t *b)       \
     return dfp.crbf;                                                    \
 }
 
-DFP_HELPER_TSTSFI(dtstsfi, 64)
-DFP_HELPER_TSTSFI(dtstsfiq, 128)
+DFP_HELPER_TSTSFI(DTSTSFI, 64)
+DFP_HELPER_TSTSFI(DTSTSFIQ, 128)
 
 static void QUA_PPs(struct PPC_DFP *dfp)
 {
@@ -746,8 +751,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b,        \
     set_dfp##size(t, &dfp.vt);                                          \
 }
 
-DFP_HELPER_QUAI(dquai, 64)
-DFP_HELPER_QUAI(dquaiq, 128)
+DFP_HELPER_QUAI(DQUAI, 64)
+DFP_HELPER_QUAI(DQUAIQ, 128)
 
 #define DFP_HELPER_QUA(op, size)                                        \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *a,        \
@@ -764,8 +769,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *a,        \
     set_dfp##size(t, &dfp.vt);                                          \
 }
 
-DFP_HELPER_QUA(dqua, 64)
-DFP_HELPER_QUA(dquaq, 128)
+DFP_HELPER_QUA(DQUA, 64)
+DFP_HELPER_QUA(DQUAQ, 128)
 
 static void _dfp_reround(uint8_t rmc, int32_t ref_sig, int32_t xmax,
                              struct PPC_DFP *dfp)
@@ -842,8 +847,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *a,        \
     set_dfp##size(t, &dfp.vt);                                          \
 }
 
-DFP_HELPER_RRND(drrnd, 64)
-DFP_HELPER_RRND(drrndq, 128)
+DFP_HELPER_RRND(DRRND, 64)
+DFP_HELPER_RRND(DRRNDQ, 128)
 
 #define DFP_HELPER_RINT(op, postprocs, size)                                   \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b,               \
@@ -868,8 +873,8 @@ static void RINTX_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXSNAN(dfp);
 }
 
-DFP_HELPER_RINT(drintx, RINTX_PPs, 64)
-DFP_HELPER_RINT(drintxq, RINTX_PPs, 128)
+DFP_HELPER_RINT(DRINTX, RINTX_PPs, 64)
+DFP_HELPER_RINT(DRINTXQ, RINTX_PPs, 128)
 
 static void RINTN_PPs(struct PPC_DFP *dfp)
 {
@@ -877,10 +882,10 @@ static void RINTN_PPs(struct PPC_DFP *dfp)
     dfp_check_for_VXSNAN(dfp);
 }
 
-DFP_HELPER_RINT(drintn, RINTN_PPs, 64)
-DFP_HELPER_RINT(drintnq, RINTN_PPs, 128)
+DFP_HELPER_RINT(DRINTN, RINTN_PPs, 64)
+DFP_HELPER_RINT(DRINTNQ, RINTN_PPs, 128)
 
-void helper_dctdp(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
+void helper_DCTDP(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
 {
     struct PPC_DFP dfp;
     ppc_vsr_t vb;
@@ -896,7 +901,7 @@ void helper_dctdp(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
     dfp_set_FPRF_from_FRT(&dfp);
 }
 
-void helper_dctqpq(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
+void helper_DCTQPQ(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
 {
     struct PPC_DFP dfp;
     ppc_vsr_t vb;
@@ -911,7 +916,7 @@ void helper_dctqpq(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
     set_dfp128(t, &dfp.vt);
 }
 
-void helper_drsp(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
+void helper_DRSP(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
 {
     struct PPC_DFP dfp;
     uint32_t t_short = 0;
@@ -929,7 +934,7 @@ void helper_drsp(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
     set_dfp64(t, &vt);
 }
 
-void helper_drdpq(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
+void helper_DRDPQ(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)
 {
     struct PPC_DFP dfp;
     dfp_prepare_decimal128(&dfp, 0, b, env);
@@ -967,8 +972,20 @@ static void CFFIX_PPs(struct PPC_DFP *dfp)
     dfp_check_for_XX(dfp);
 }
 
-DFP_HELPER_CFFIX(dcffix, 64)
-DFP_HELPER_CFFIX(dcffixq, 128)
+DFP_HELPER_CFFIX(DCFFIX, 64)
+DFP_HELPER_CFFIX(DCFFIXQ, 128)
+
+void helper_DCFFIXQQ(CPUPPCState *env, ppc_fprp_t *t, ppc_avr_t *b)
+{
+    struct PPC_DFP dfp;
+
+    dfp_prepare_decimal128(&dfp, NULL, NULL, env);
+    decNumberFromInt128(&dfp.t, (uint64_t)b->VsrD(1), (int64_t)b->VsrD(0));
+    dfp_finalize_decimal128(&dfp);
+    CFFIX_PPs(&dfp);
+
+    set_dfp128(t, &dfp.vt);
+}
 
 #define DFP_HELPER_CTFIX(op, size)                                            \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)              \
@@ -1005,8 +1022,55 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b)              \
     set_dfp64(t, &dfp.vt);                                                    \
 }
 
-DFP_HELPER_CTFIX(dctfix, 64)
-DFP_HELPER_CTFIX(dctfixq, 128)
+DFP_HELPER_CTFIX(DCTFIX, 64)
+DFP_HELPER_CTFIX(DCTFIXQ, 128)
+
+void helper_DCTFIXQQ(CPUPPCState *env, ppc_avr_t *t, ppc_fprp_t *b)
+{
+    struct PPC_DFP dfp;
+    dfp_prepare_decimal128(&dfp, 0, b, env);
+
+    if (unlikely(decNumberIsSpecial(&dfp.b))) {
+        uint64_t invalid_flags = FP_VX | FP_VXCVI;
+        if (decNumberIsInfinite(&dfp.b)) {
+            if (decNumberIsNegative(&dfp.b)) {
+                dfp.vt.VsrD(0) = INT64_MIN;
+                dfp.vt.VsrD(1) = 0;
+            } else {
+                dfp.vt.VsrD(0) = INT64_MAX;
+                dfp.vt.VsrD(1) = UINT64_MAX;
+            }
+        } else { /* NaN */
+            dfp.vt.VsrD(0) = INT64_MIN;
+            dfp.vt.VsrD(1) = 0;
+            if (decNumberIsSNaN(&dfp.b)) {
+                invalid_flags |= FP_VXSNAN;
+            }
+        }
+        dfp_set_FPSCR_flag(&dfp, invalid_flags, FP_VE);
+    } else if (unlikely(decNumberIsZero(&dfp.b))) {
+        dfp.vt.VsrD(0) = 0;
+        dfp.vt.VsrD(1) = 0;
+    } else {
+        decNumberToIntegralExact(&dfp.b, &dfp.b, &dfp.context);
+        decNumberIntegralToInt128(&dfp.b, &dfp.context,
+                &dfp.vt.VsrD(1), &dfp.vt.VsrD(0));
+        if (decContextTestStatus(&dfp.context, DEC_Invalid_operation)) {
+            if (decNumberIsNegative(&dfp.b)) {
+                dfp.vt.VsrD(0) = INT64_MIN;
+                dfp.vt.VsrD(1) = 0;
+            } else {
+                dfp.vt.VsrD(0) = INT64_MAX;
+                dfp.vt.VsrD(1) = UINT64_MAX;
+            }
+            dfp_set_FPSCR_flag(&dfp, FP_VX | FP_VXCVI, FP_VE);
+        } else {
+            dfp_check_for_XX(&dfp);
+        }
+    }
+
+    set_dfp128_to_avr(t, &dfp.vt);
+}
 
 static inline void dfp_set_bcd_digit_64(ppc_vsr_t *t, uint8_t digit,
                                         unsigned n)
@@ -1067,8 +1131,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b,          \
     set_dfp##size(t, &dfp.vt);                                            \
 }
 
-DFP_HELPER_DEDPD(ddedpd, 64)
-DFP_HELPER_DEDPD(ddedpdq, 128)
+DFP_HELPER_DEDPD(DDEDPD, 64)
+DFP_HELPER_DEDPD(DDEDPDQ, 128)
 
 static inline uint8_t dfp_get_bcd_digit_64(ppc_vsr_t *t, unsigned n)
 {
@@ -1135,8 +1199,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b,             \
     set_dfp##size(t, &dfp.vt);                                               \
 }
 
-DFP_HELPER_ENBCD(denbcd, 64)
-DFP_HELPER_ENBCD(denbcdq, 128)
+DFP_HELPER_ENBCD(DENBCD, 64)
+DFP_HELPER_ENBCD(DENBCDQ, 128)
 
 #define DFP_HELPER_XEX(op, size)                               \
 void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b) \
@@ -1169,8 +1233,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *b) \
     }                                                          \
 }
 
-DFP_HELPER_XEX(dxex, 64)
-DFP_HELPER_XEX(dxexq, 128)
+DFP_HELPER_XEX(DXEX, 64)
+DFP_HELPER_XEX(DXEXQ, 128)
 
 static void dfp_set_raw_exp_64(ppc_vsr_t *t, uint64_t raw)
 {
@@ -1235,8 +1299,8 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *a,          \
     set_dfp##size(t, &dfp.vt);                                            \
 }
 
-DFP_HELPER_IEX(diex, 64)
-DFP_HELPER_IEX(diexq, 128)
+DFP_HELPER_IEX(DIEX, 64)
+DFP_HELPER_IEX(DIEXQ, 128)
 
 static void dfp_clear_lmd_from_g5msb(uint64_t *t)
 {
@@ -1323,7 +1387,7 @@ void helper_##op(CPUPPCState *env, ppc_fprp_t *t, ppc_fprp_t *a,    \
     set_dfp##size(t, &dfp.vt);                                      \
 }
 
-DFP_HELPER_SHIFT(dscli, 64, 1)
-DFP_HELPER_SHIFT(dscliq, 128, 1)
-DFP_HELPER_SHIFT(dscri, 64, 0)
-DFP_HELPER_SHIFT(dscriq, 128, 0)
+DFP_HELPER_SHIFT(DSCLI, 64, 1)
+DFP_HELPER_SHIFT(DSCLIQ, 128, 1)
+DFP_HELPER_SHIFT(DSCRI, 64, 0)
+DFP_HELPER_SHIFT(DSCRIQ, 128, 0)
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 85de7e6c906..17607adbe41 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -19,42 +19,23 @@
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
-#include "exec/helper-proto.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "internal.h"
 #include "helper_regs.h"
 
-/* #define DEBUG_OP */
-/* #define DEBUG_SOFTWARE_TLB */
-/* #define DEBUG_EXCEPTIONS */
+#include "trace.h"
 
-#ifdef DEBUG_EXCEPTIONS
-#  define LOG_EXCP(...) qemu_log(__VA_ARGS__)
-#else
-#  define LOG_EXCP(...) do { } while (0)
+#ifdef CONFIG_TCG
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 #endif
 
+/* #define DEBUG_SOFTWARE_TLB */
+
 /*****************************************************************************/
 /* Exception processing */
-#if defined(CONFIG_USER_ONLY)
-void ppc_cpu_do_interrupt(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    cs->exception_index = POWERPC_EXCP_NONE;
-    env->error_code = 0;
-}
-
-static void ppc_hw_interrupt(CPUPPCState *env)
-{
-    CPUState *cs = env_cpu(env);
+#if !defined(CONFIG_USER_ONLY)
 
-    cs->exception_index = POWERPC_EXCP_NONE;
-    env->error_code = 0;
-}
-#else /* defined(CONFIG_USER_ONLY) */
 static inline void dump_syscall(CPUPPCState *env)
 {
     qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64
@@ -67,18 +48,6 @@ static inline void dump_syscall(CPUPPCState *env)
                   ppc_dump_gpr(env, 8), env->nip);
 }
 
-static inline void dump_syscall_vectored(CPUPPCState *env)
-{
-    qemu_log_mask(CPU_LOG_INT, "syscall r0=%016" PRIx64
-                  " r3=%016" PRIx64 " r4=%016" PRIx64 " r5=%016" PRIx64
-                  " r6=%016" PRIx64 " r7=%016" PRIx64 " r8=%016" PRIx64
-                  " nip=" TARGET_FMT_lx "\n",
-                  ppc_dump_gpr(env, 0), ppc_dump_gpr(env, 3),
-                  ppc_dump_gpr(env, 4), ppc_dump_gpr(env, 5),
-                  ppc_dump_gpr(env, 6), ppc_dump_gpr(env, 7),
-                  ppc_dump_gpr(env, 8), env->nip);
-}
-
 static inline void dump_hcall(CPUPPCState *env)
 {
     qemu_log_mask(CPU_LOG_INT, "hypercall r3=%016" PRIx64
@@ -136,25 +105,157 @@ static int powerpc_reset_wakeup(CPUState *cs, CPUPPCState *env, int excp,
     return POWERPC_EXCP_RESET;
 }
 
-static uint64_t ppc_excp_vector_offset(CPUState *cs, int ail)
+/*
+ * AIL - Alternate Interrupt Location, a mode that allows interrupts to be
+ * taken with the MMU on, and which uses an alternate location (e.g., so the
+ * kernel/hv can map the vectors there with an effective address).
+ *
+ * An interrupt is considered to be taken "with AIL" or "AIL applies" if they
+ * are delivered in this way. AIL requires the LPCR to be set to enable this
+ * mode, and then a number of conditions have to be true for AIL to apply.
+ *
+ * First of all, SRESET, MCE, and HMI are always delivered without AIL, because
+ * they specifically want to be in real mode (e.g., the MCE might be signaling
+ * a SLB multi-hit which requires SLB flush before the MMU can be enabled).
+ *
+ * After that, behaviour depends on the current MSR[IR], MSR[DR], MSR[HV],
+ * whether or not the interrupt changes MSR[HV] from 0 to 1, and the current
+ * radix mode (LPCR[HR]).
+ *
+ * POWER8, POWER9 with LPCR[HR]=0
+ * | LPCR[AIL] | MSR[IR||DR] | MSR[HV] | new MSR[HV] | AIL |
+ * +-----------+-------------+---------+-------------+-----+
+ * | a         | 00/01/10    | x       | x           | 0   |
+ * | a         | 11          | 0       | 1           | 0   |
+ * | a         | 11          | 1       | 1           | a   |
+ * | a         | 11          | 0       | 0           | a   |
+ * +-------------------------------------------------------+
+ *
+ * POWER9 with LPCR[HR]=1
+ * | LPCR[AIL] | MSR[IR||DR] | MSR[HV] | new MSR[HV] | AIL |
+ * +-----------+-------------+---------+-------------+-----+
+ * | a         | 00/01/10    | x       | x           | 0   |
+ * | a         | 11          | x       | x           | a   |
+ * +-------------------------------------------------------+
+ *
+ * The difference with POWER9 being that MSR[HV] 0->1 interrupts can be sent to
+ * the hypervisor in AIL mode if the guest is radix. This is good for
+ * performance but allows the guest to influence the AIL of hypervisor
+ * interrupts using its MSR, and also the hypervisor must disallow guest
+ * interrupts (MSR[HV] 0->0) from using AIL if the hypervisor does not want to
+ * use AIL for its MSR[HV] 0->1 interrupts.
+ *
+ * POWER10 addresses those issues with a new LPCR[HAIL] bit that is applied to
+ * interrupts that begin execution with MSR[HV]=1 (so both MSR[HV] 0->1 and
+ * MSR[HV] 1->1).
+ *
+ * HAIL=1 is equivalent to AIL=3, for interrupts delivered with MSR[HV]=1.
+ *
+ * POWER10 behaviour is
+ * | LPCR[AIL] | LPCR[HAIL] | MSR[IR||DR] | MSR[HV] | new MSR[HV] | AIL |
+ * +-----------+------------+-------------+---------+-------------+-----+
+ * | a         | h          | 00/01/10    | 0       | 0           | 0   |
+ * | a         | h          | 11          | 0       | 0           | a   |
+ * | a         | h          | x           | 0       | 1           | h   |
+ * | a         | h          | 00/01/10    | 1       | 1           | 0   |
+ * | a         | h          | 11          | 1       | 1           | h   |
+ * +--------------------------------------------------------------------+
+ */
+static inline void ppc_excp_apply_ail(PowerPCCPU *cpu, int excp_model, int excp,
+                                      target_ulong msr,
+                                      target_ulong *new_msr,
+                                      target_ulong *vector)
 {
-    uint64_t offset = 0;
+#if defined(TARGET_PPC64)
+    CPUPPCState *env = &cpu->env;
+    bool mmu_all_on = ((msr >> MSR_IR) & 1) && ((msr >> MSR_DR) & 1);
+    bool hv_escalation = !(msr & MSR_HVB) && (*new_msr & MSR_HVB);
+    int ail = 0;
+
+    if (excp == POWERPC_EXCP_MCHECK ||
+        excp == POWERPC_EXCP_RESET ||
+        excp == POWERPC_EXCP_HV_MAINT) {
+        /* SRESET, MCE, HMI never apply AIL */
+        return;
+    }
 
-    switch (ail) {
-    case AIL_NONE:
-        break;
-    case AIL_0001_8000:
-        offset = 0x18000;
-        break;
-    case AIL_C000_0000_0000_4000:
-        offset = 0xc000000000004000ull;
-        break;
-    default:
-        cpu_abort(cs, "Invalid AIL combination %d\n", ail);
-        break;
+    if (excp_model == POWERPC_EXCP_POWER8 ||
+        excp_model == POWERPC_EXCP_POWER9) {
+        if (!mmu_all_on) {
+            /* AIL only works if MSR[IR] and MSR[DR] are both enabled. */
+            return;
+        }
+        if (hv_escalation && !(env->spr[SPR_LPCR] & LPCR_HR)) {
+            /*
+             * AIL does not work if there is a MSR[HV] 0->1 transition and the
+             * partition is in HPT mode. For radix guests, such interrupts are
+             * allowed to be delivered to the hypervisor in ail mode.
+             */
+            return;
+        }
+
+        ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT;
+        if (ail == 0) {
+            return;
+        }
+        if (ail == 1) {
+            /* AIL=1 is reserved, treat it like AIL=0 */
+            return;
+        }
+
+    } else if (excp_model == POWERPC_EXCP_POWER10) {
+        if (!mmu_all_on && !hv_escalation) {
+            /*
+             * AIL works for HV interrupts even with guest MSR[IR/DR] disabled.
+             * Guest->guest and HV->HV interrupts do require MMU on.
+             */
+            return;
+        }
+
+        if (*new_msr & MSR_HVB) {
+            if (!(env->spr[SPR_LPCR] & LPCR_HAIL)) {
+                /* HV interrupts depend on LPCR[HAIL] */
+                return;
+            }
+            ail = 3; /* HAIL=1 gives AIL=3 behaviour for HV interrupts */
+        } else {
+            ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT;
+        }
+        if (ail == 0) {
+            return;
+        }
+        if (ail == 1 || ail == 2) {
+            /* AIL=1 and AIL=2 are reserved, treat them like AIL=0 */
+            return;
+        }
+    } else {
+        /* Other processors do not support AIL */
+        return;
     }
 
-    return offset;
+    /*
+     * AIL applies, so the new MSR gets IR and DR set, and an offset applied
+     * to the new IP.
+     */
+    *new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
+
+    if (excp != POWERPC_EXCP_SYSCALL_VECTORED) {
+        if (ail == 2) {
+            *vector |= 0x0000000000018000ull;
+        } else if (ail == 3) {
+            *vector |= 0xc000000000004000ull;
+        }
+    } else {
+        /*
+         * scv AIL is a little different. AIL=2 does not change the address,
+         * only the MSR. AIL=3 replaces the 0x17000 base with 0xc...3000.
+         */
+        if (ail == 3) {
+            *vector &= ~0x0000000000017000ull; /* Un-apply the base offset */
+            *vector |= 0xc000000000003000ull; /* Apply scv's AIL=3 offset */
+        }
+    }
+#endif
 }
 
 static inline void powerpc_set_excp_state(PowerPCCPU *cpu,
@@ -197,8 +298,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
     target_ulong msr, new_msr, vector;
-    int srr0, srr1, asrr0, asrr1, lev = -1, ail;
-    bool lpes0;
+    int srr0, srr1, asrr0, asrr1, lev = -1;
 
     qemu_log_mask(CPU_LOG_INT, "Raise exception at " TARGET_FMT_lx
                   " => %08x (%02x)\n", env->nip, excp, env->error_code);
@@ -230,35 +330,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         excp = powerpc_reset_wakeup(cs, env, excp, &msr);
     }
 
-    /*
-     * Exception targeting modifiers
-     *
-     * LPES0 is supported on POWER7/8/9
-     * LPES1 is not supported (old iSeries mode)
-     *
-     * On anything else, we behave as if LPES0 is 1
-     * (externals don't alter MSR:HV)
-     *
-     * AIL is initialized here but can be cleared by
-     * selected exceptions
-     */
-#if defined(TARGET_PPC64)
-    if (excp_model == POWERPC_EXCP_POWER7 ||
-        excp_model == POWERPC_EXCP_POWER8 ||
-        excp_model == POWERPC_EXCP_POWER9) {
-        lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0);
-        if (excp_model != POWERPC_EXCP_POWER7) {
-            ail = (env->spr[SPR_LPCR] & LPCR_AIL) >> LPCR_AIL_SHIFT;
-        } else {
-            ail = 0;
-        }
-    } else
-#endif /* defined(TARGET_PPC64) */
-    {
-        lpes0 = true;
-        ail = 0;
-    }
-
     /*
      * Hypervisor emulation assistance interrupt only exists on server
      * arch 2.05 server or later. We also don't want to generate it if
@@ -315,7 +386,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
              */
             new_msr |= (target_ulong)MSR_HVB;
         }
-        ail = 0;
 
         /* machine check exceptions don't have ME set */
         new_msr &= ~((target_ulong)1 << MSR_ME);
@@ -338,17 +408,39 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         break;
     case POWERPC_EXCP_DSI:       /* Data storage exception                   */
-        LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
-                 "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
+        trace_ppc_excp_dsi(env->spr[SPR_DSISR], env->spr[SPR_DAR]);
         break;
     case POWERPC_EXCP_ISI:       /* Instruction storage exception            */
-        LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
-                 "\n", msr, env->nip);
+        trace_ppc_excp_isi(msr, env->nip);
         msr |= env->error_code;
         break;
     case POWERPC_EXCP_EXTERNAL:  /* External input                           */
+    {
+        bool lpes0;
+
         cs = CPU(cpu);
 
+        /*
+         * Exception targeting modifiers
+         *
+         * LPES0 is supported on POWER7/8/9
+         * LPES1 is not supported (old iSeries mode)
+         *
+         * On anything else, we behave as if LPES0 is 1
+         * (externals don't alter MSR:HV)
+         */
+#if defined(TARGET_PPC64)
+        if (excp_model == POWERPC_EXCP_POWER7 ||
+            excp_model == POWERPC_EXCP_POWER8 ||
+            excp_model == POWERPC_EXCP_POWER9 ||
+            excp_model == POWERPC_EXCP_POWER10) {
+            lpes0 = !!(env->spr[SPR_LPCR] & LPCR_LPES0);
+        } else
+#endif /* defined(TARGET_PPC64) */
+        {
+            lpes0 = true;
+        }
+
         if (!lpes0) {
             new_msr |= (target_ulong)MSR_HVB;
             new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
@@ -360,20 +452,23 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             env->spr[SPR_BOOKE_EPR] = ldl_phys(cs->as, env->mpic_iack);
         }
         break;
+    }
     case POWERPC_EXCP_ALIGN:     /* Alignment exception                      */
-        /* Get rS/rD and rA from faulting opcode */
         /*
-         * Note: the opcode fields will not be set properly for a
-         * direct store load/store, but nobody cares as nobody
-         * actually uses direct store segments.
+         * Get rS/rD and rA from faulting opcode.
+         * Note: We will only invoke ALIGN for atomic operations,
+         * so all instructions are X-form.
          */
-        env->spr[SPR_DSISR] |= (env->error_code & 0x03FF0000) >> 16;
+        {
+            uint32_t insn = cpu_ldl_code(env, env->nip);
+            env->spr[SPR_DSISR] |= (insn & 0x03FF0000) >> 16;
+        }
         break;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
         case POWERPC_EXCP_FP:
             if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
-                LOG_EXCP("Ignore floating point exception\n");
+                trace_ppc_excp_fp_ignore();
                 cs->exception_index = POWERPC_EXCP_NONE;
                 env->error_code = 0;
                 return;
@@ -388,7 +483,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             env->spr[SPR_BOOKE_ESR] = ESR_FP;
             break;
         case POWERPC_EXCP_INVAL:
-            LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
+            trace_ppc_excp_inval(env->nip);
             msr |= 0x00080000;
             env->spr[SPR_BOOKE_ESR] = ESR_PIL;
             break;
@@ -435,7 +530,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         break;
     case POWERPC_EXCP_SYSCALL_VECTORED: /* scv exception                     */
         lev = env->error_code;
-        dump_syscall_vectored(env);
+        dump_syscall(env);
         env->nip += 4;
         new_msr |= env->msr & ((target_ulong)1 << MSR_EE);
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
@@ -446,10 +541,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         break;
     case POWERPC_EXCP_FIT:       /* Fixed-interval timer interrupt           */
         /* FIT on 4xx */
-        LOG_EXCP("FIT exception\n");
+        trace_ppc_excp_print("FIT");
         break;
     case POWERPC_EXCP_WDT:       /* Watchdog timer interrupt                 */
-        LOG_EXCP("WDT exception\n");
+        trace_ppc_excp_print("WDT");
         switch (excp_model) {
         case POWERPC_EXCP_BOOKE:
             srr0 = SPR_BOOKE_CSRR0;
@@ -519,7 +614,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                           "exception %d with no HV support\n", excp);
             }
         }
-        ail = 0;
         break;
     case POWERPC_EXCP_DSEG:      /* Data segment exception                   */
     case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
@@ -557,7 +651,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 #endif
         break;
     case POWERPC_EXCP_PIT:       /* Programmable interval timer interrupt    */
-        LOG_EXCP("PIT exception\n");
+        trace_ppc_excp_print("PIT");
         break;
     case POWERPC_EXCP_IO:        /* IO error exception                       */
         /* XXX: TODO */
@@ -573,52 +667,20 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                   "is not implemented yet !\n");
         break;
     case POWERPC_EXCP_IFTLB:     /* Instruction fetch TLB error              */
-        switch (excp_model) {
-        case POWERPC_EXCP_602:
-        case POWERPC_EXCP_603:
-        case POWERPC_EXCP_603E:
-        case POWERPC_EXCP_G2:
-            goto tlb_miss_tgpr;
-        case POWERPC_EXCP_7x5:
-            goto tlb_miss;
-        case POWERPC_EXCP_74xx:
-            goto tlb_miss_74xx;
-        default:
-            cpu_abort(cs, "Invalid instruction TLB miss exception\n");
-            break;
-        }
-        break;
     case POWERPC_EXCP_DLTLB:     /* Data load TLB miss                       */
-        switch (excp_model) {
-        case POWERPC_EXCP_602:
-        case POWERPC_EXCP_603:
-        case POWERPC_EXCP_603E:
-        case POWERPC_EXCP_G2:
-            goto tlb_miss_tgpr;
-        case POWERPC_EXCP_7x5:
-            goto tlb_miss;
-        case POWERPC_EXCP_74xx:
-            goto tlb_miss_74xx;
-        default:
-            cpu_abort(cs, "Invalid data load TLB miss exception\n");
-            break;
-        }
-        break;
     case POWERPC_EXCP_DSTLB:     /* Data store TLB miss                      */
         switch (excp_model) {
         case POWERPC_EXCP_602:
         case POWERPC_EXCP_603:
         case POWERPC_EXCP_603E:
         case POWERPC_EXCP_G2:
-        tlb_miss_tgpr:
             /* Swap temporary saved registers with GPRs */
             if (!(new_msr & ((target_ulong)1 << MSR_TGPR))) {
                 new_msr |= (target_ulong)1 << MSR_TGPR;
                 hreg_swap_gpr_tgpr(env);
             }
-            goto tlb_miss;
+            /* fall through */
         case POWERPC_EXCP_7x5:
-        tlb_miss:
 #if defined(DEBUG_SOFTWARE_TLB)
             if (qemu_log_enabled()) {
                 const char *es;
@@ -653,7 +715,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             msr |= ((env->last_way + 1) & (env->nb_ways - 1)) << 17;
             break;
         case POWERPC_EXCP_74xx:
-        tlb_miss_74xx:
 #if defined(DEBUG_SOFTWARE_TLB)
             if (qemu_log_enabled()) {
                 const char *es;
@@ -683,7 +744,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             msr |= env->error_code; /* key bit */
             break;
         default:
-            cpu_abort(cs, "Invalid data store TLB miss exception\n");
+            cpu_abort(cs, "Invalid TLB miss exception\n");
             break;
         }
         break;
@@ -773,7 +834,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         } else if (env->spr[SPR_LPCR] & LPCR_ILE) {
             new_msr |= (target_ulong)1 << MSR_LE;
         }
-    } else if (excp_model == POWERPC_EXCP_POWER9) {
+    } else if (excp_model == POWERPC_EXCP_POWER9 ||
+               excp_model == POWERPC_EXCP_POWER10) {
         if (new_msr & MSR_HVB) {
             if (env->spr[SPR_HID0] & HID0_POWER9_HILE) {
                 new_msr |= (target_ulong)1 << MSR_LE;
@@ -790,15 +852,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     }
 #endif
 
-    /*
-     * AIL only works if there is no HV transition and we are running
-     * with translations enabled
-     */
-    if (!((msr >> MSR_IR) & 1) || !((msr >> MSR_DR) & 1) ||
-        ((new_msr & MSR_HVB) && !(msr & MSR_HVB))) {
-        ail = 0;
-    }
-
     vector = env->excp_vectors[excp];
     if (vector == (target_ulong)-1ULL) {
         cpu_abort(cs, "Raised an exception without defined vector %d\n",
@@ -839,23 +892,8 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         /* Save MSR */
         env->spr[srr1] = msr;
 
-        /* Handle AIL */
-        if (ail) {
-            new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-            vector |= ppc_excp_vector_offset(cs, ail);
-        }
-
 #if defined(TARGET_PPC64)
     } else {
-        /* scv AIL is a little different */
-        if (ail) {
-            new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-        }
-        if (ail == AIL_C000_0000_0000_4000) {
-            vector |= 0xc000000000003000ull;
-        } else {
-            vector |= 0x0000000000017000ull;
-        }
         vector += lev * 0x20;
 
         env->lr = env->nip;
@@ -863,6 +901,9 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 #endif
     }
 
+    /* This can update new_msr and vector if AIL applies */
+    ppc_excp_apply_ail(cpu, excp_model, excp, msr, &new_msr, &vector);
+
     powerpc_set_excp_state(cpu, vector, new_msr);
 }
 
@@ -1036,7 +1077,6 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = &cpu->env;
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
     target_ulong msr = 0;
 
     /*
@@ -1045,13 +1085,12 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector)
      */
     msr = (1ULL << MSR_ME);
     msr |= env->msr & (1ULL << MSR_SF);
-    if (!(*pcc->interrupts_big_endian)(cpu)) {
+    if (ppc_interrupts_little_endian(cpu)) {
         msr |= (1ULL << MSR_LE);
     }
 
     powerpc_set_excp_state(cpu, vector, msr);
 }
-#endif /* !CONFIG_USER_ONLY */
 
 bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
@@ -1068,13 +1107,7 @@ bool ppc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     return false;
 }
 
-#if defined(DEBUG_OP)
-static void cpu_dump_rfi(target_ulong RA, target_ulong msr)
-{
-    qemu_log("Return from exception at " TARGET_FMT_lx " with flags "
-             TARGET_FMT_lx "\n", RA, msr);
-}
-#endif
+#endif /* !CONFIG_USER_ONLY */
 
 /*****************************************************************************/
 /* Exceptions processing helpers */
@@ -1106,6 +1139,7 @@ void raise_exception_ra(CPUPPCState *env, uint32_t exception,
     raise_exception_err_ra(env, exception, 0, raddr);
 }
 
+#ifdef CONFIG_TCG
 void helper_raise_exception_err(CPUPPCState *env, uint32_t exception,
                                 uint32_t error_code)
 {
@@ -1116,8 +1150,10 @@ void helper_raise_exception(CPUPPCState *env, uint32_t exception)
 {
     raise_exception_err_ra(env, exception, 0, 0);
 }
+#endif
 
 #if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_TCG
 void helper_store_msr(CPUPPCState *env, target_ulong val)
 {
     uint32_t excp = hreg_store_msr(env, val, 0);
@@ -1130,6 +1166,15 @@ void helper_store_msr(CPUPPCState *env, target_ulong val)
 }
 
 #if defined(TARGET_PPC64)
+void helper_scv(CPUPPCState *env, uint32_t lev)
+{
+    if (env->spr[SPR_FSCR] & (1ull << FSCR_SCV)) {
+        raise_exception_err(env, POWERPC_EXCP_SYSCALL_VECTORED, lev);
+    } else {
+        raise_exception_err(env, POWERPC_EXCP_FU, FSCR_IC_SCV);
+    }
+}
+
 void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
 {
     CPUState *cs;
@@ -1137,17 +1182,12 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
     cs = env_cpu(env);
     cs->halted = 1;
 
-    /*
-     * The architecture specifies that HDEC interrupts are discarded
-     * in PM states
-     */
-    env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDECR);
-
     /* Condition for waking up at 0x100 */
     env->resume_as_sreset = (insn != PPC_PM_STOP) ||
         (env->spr[SPR_PSSCR] & PSSCR_EC);
 }
 #endif /* defined(TARGET_PPC64) */
+#endif /* CONFIG_TCG */
 
 static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
 {
@@ -1167,9 +1207,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
     /* XXX: beware: this is false if VLE is supported */
     env->nip = nip & ~((target_ulong)0x00000003);
     hreg_store_msr(env, msr, 1);
-#if defined(DEBUG_OP)
-    cpu_dump_rfi(env->nip, env->msr);
-#endif
+    trace_ppc_excp_rfi(env->nip, env->msr);
     /*
      * No need to raise an exception here, as rfi is always the last
      * insn of a TB
@@ -1182,6 +1220,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
     check_tlb_flush(env, false);
 }
 
+#ifdef CONFIG_TCG
 void helper_rfi(CPUPPCState *env)
 {
     do_rfi(env, env->spr[SPR_SRR0], env->spr[SPR_SRR1] & 0xfffffffful);
@@ -1234,8 +1273,10 @@ void helper_rfmci(CPUPPCState *env)
     /* FIXME: choose CSRR1 or MCSRR1 based on cpu type */
     do_rfi(env, env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
 }
-#endif
+#endif /* CONFIG_TCG */
+#endif /* !defined(CONFIG_USER_ONLY) */
 
+#ifdef CONFIG_TCG
 void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
                uint32_t flags)
 {
@@ -1263,11 +1304,13 @@ void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
     }
 }
 #endif
+#endif
 
 #if !defined(CONFIG_USER_ONLY)
 /*****************************************************************************/
 /* PowerPC 601 specific instructions (POWER bridge) */
 
+#ifdef CONFIG_TCG
 void helper_rfsvc(CPUPPCState *env)
 {
     do_rfi(env, env->lr, env->ctr & 0x0000FFFF);
@@ -1411,21 +1454,31 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
 
     book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL);
 }
-#endif
-#endif
+#endif /* TARGET_PPC64 */
 
 void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
                                  MMUAccessType access_type,
                                  int mmu_idx, uintptr_t retaddr)
 {
     CPUPPCState *env = cs->env_ptr;
-    uint32_t insn;
 
-    /* Restore state and reload the insn we executed, for filling in DSISR.  */
-    cpu_restore_state(cs, retaddr, true);
-    insn = cpu_ldl_code(env, env->nip);
+    switch (env->mmu_model) {
+    case POWERPC_MMU_SOFT_4xx:
+    case POWERPC_MMU_SOFT_4xx_Z:
+        env->spr[SPR_40x_DEAR] = vaddr;
+        break;
+    case POWERPC_MMU_BOOKE:
+    case POWERPC_MMU_BOOKE206:
+        env->spr[SPR_BOOKE_DEAR] = vaddr;
+        break;
+    default:
+        env->spr[SPR_DAR] = vaddr;
+        break;
+    }
 
     cs->exception_index = POWERPC_EXCP_ALIGN;
-    env->error_code = insn & 0x03FF0000;
-    cpu_loop_exit(cs);
+    env->error_code = 0;
+    cpu_loop_exit_restore(cs, retaddr);
 }
+#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 44315fca0be..c4896cecc80 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -383,247 +383,35 @@ static inline void float_inexact_excp(CPUPPCState *env)
     }
 }
 
-static inline void fpscr_set_rounding_mode(CPUPPCState *env)
-{
-    int rnd_type;
-
-    /* Set rounding mode */
-    switch (fpscr_rn) {
-    case 0:
-        /* Best approximation (round to nearest) */
-        rnd_type = float_round_nearest_even;
-        break;
-    case 1:
-        /* Smaller magnitude (round toward zero) */
-        rnd_type = float_round_to_zero;
-        break;
-    case 2:
-        /* Round toward +infinite */
-        rnd_type = float_round_up;
-        break;
-    default:
-    case 3:
-        /* Round toward -infinite */
-        rnd_type = float_round_down;
-        break;
-    }
-    set_float_rounding_mode(rnd_type, &env->fp_status);
-}
-
 void helper_fpscr_clrbit(CPUPPCState *env, uint32_t bit)
 {
-    int prev;
-
-    prev = (env->fpscr >> bit) & 1;
-    env->fpscr &= ~(1 << bit);
-    if (prev == 1) {
-        switch (bit) {
-        case FPSCR_RN1:
-        case FPSCR_RN0:
-            fpscr_set_rounding_mode(env);
-            break;
-        case FPSCR_VXSNAN:
-        case FPSCR_VXISI:
-        case FPSCR_VXIDI:
-        case FPSCR_VXZDZ:
-        case FPSCR_VXIMZ:
-        case FPSCR_VXVC:
-        case FPSCR_VXSOFT:
-        case FPSCR_VXSQRT:
-        case FPSCR_VXCVI:
-            if (!fpscr_ix) {
-                /* Set VX bit to zero */
-                env->fpscr &= ~FP_VX;
-            }
-            break;
-        case FPSCR_OX:
-        case FPSCR_UX:
-        case FPSCR_ZX:
-        case FPSCR_XX:
-        case FPSCR_VE:
-        case FPSCR_OE:
-        case FPSCR_UE:
-        case FPSCR_ZE:
-        case FPSCR_XE:
-            if (!fpscr_eex) {
-                /* Set the FEX bit */
-                env->fpscr &= ~FP_FEX;
-            }
-            break;
-        default:
-            break;
-        }
+    uint32_t mask = 1u << bit;
+    if (env->fpscr & mask) {
+        ppc_store_fpscr(env, env->fpscr & ~(target_ulong)mask);
     }
 }
 
 void helper_fpscr_setbit(CPUPPCState *env, uint32_t bit)
 {
-    CPUState *cs = env_cpu(env);
-    int prev;
-
-    prev = (env->fpscr >> bit) & 1;
-    env->fpscr |= 1 << bit;
-    if (prev == 0) {
-        switch (bit) {
-        case FPSCR_VX:
-            env->fpscr |= FP_FX;
-            if (fpscr_ve) {
-                goto raise_ve;
-            }
-            break;
-        case FPSCR_OX:
-            env->fpscr |= FP_FX;
-            if (fpscr_oe) {
-                goto raise_oe;
-            }
-            break;
-        case FPSCR_UX:
-            env->fpscr |= FP_FX;
-            if (fpscr_ue) {
-                goto raise_ue;
-            }
-            break;
-        case FPSCR_ZX:
-            env->fpscr |= FP_FX;
-            if (fpscr_ze) {
-                goto raise_ze;
-            }
-            break;
-        case FPSCR_XX:
-            env->fpscr |= FP_FX;
-            if (fpscr_xe) {
-                goto raise_xe;
-            }
-            break;
-        case FPSCR_VXSNAN:
-        case FPSCR_VXISI:
-        case FPSCR_VXIDI:
-        case FPSCR_VXZDZ:
-        case FPSCR_VXIMZ:
-        case FPSCR_VXVC:
-        case FPSCR_VXSOFT:
-        case FPSCR_VXSQRT:
-        case FPSCR_VXCVI:
-            env->fpscr |= FP_VX;
-            env->fpscr |= FP_FX;
-            if (fpscr_ve != 0) {
-                goto raise_ve;
-            }
-            break;
-        case FPSCR_VE:
-            if (fpscr_vx != 0) {
-            raise_ve:
-                env->error_code = POWERPC_EXCP_FP;
-                if (fpscr_vxsnan) {
-                    env->error_code |= POWERPC_EXCP_FP_VXSNAN;
-                }
-                if (fpscr_vxisi) {
-                    env->error_code |= POWERPC_EXCP_FP_VXISI;
-                }
-                if (fpscr_vxidi) {
-                    env->error_code |= POWERPC_EXCP_FP_VXIDI;
-                }
-                if (fpscr_vxzdz) {
-                    env->error_code |= POWERPC_EXCP_FP_VXZDZ;
-                }
-                if (fpscr_vximz) {
-                    env->error_code |= POWERPC_EXCP_FP_VXIMZ;
-                }
-                if (fpscr_vxvc) {
-                    env->error_code |= POWERPC_EXCP_FP_VXVC;
-                }
-                if (fpscr_vxsoft) {
-                    env->error_code |= POWERPC_EXCP_FP_VXSOFT;
-                }
-                if (fpscr_vxsqrt) {
-                    env->error_code |= POWERPC_EXCP_FP_VXSQRT;
-                }
-                if (fpscr_vxcvi) {
-                    env->error_code |= POWERPC_EXCP_FP_VXCVI;
-                }
-                goto raise_excp;
-            }
-            break;
-        case FPSCR_OE:
-            if (fpscr_ox != 0) {
-            raise_oe:
-                env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_OX;
-                goto raise_excp;
-            }
-            break;
-        case FPSCR_UE:
-            if (fpscr_ux != 0) {
-            raise_ue:
-                env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_UX;
-                goto raise_excp;
-            }
-            break;
-        case FPSCR_ZE:
-            if (fpscr_zx != 0) {
-            raise_ze:
-                env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_ZX;
-                goto raise_excp;
-            }
-            break;
-        case FPSCR_XE:
-            if (fpscr_xx != 0) {
-            raise_xe:
-                env->error_code = POWERPC_EXCP_FP | POWERPC_EXCP_FP_XX;
-                goto raise_excp;
-            }
-            break;
-        case FPSCR_RN1:
-        case FPSCR_RN0:
-            fpscr_set_rounding_mode(env);
-            break;
-        default:
-            break;
-        raise_excp:
-            /* Update the floating-point enabled exception summary */
-            env->fpscr |= FP_FEX;
-            /* We have to update Rc1 before raising the exception */
-            cs->exception_index = POWERPC_EXCP_PROGRAM;
-            break;
-        }
+    uint32_t mask = 1u << bit;
+    if (!(env->fpscr & mask)) {
+        ppc_store_fpscr(env, env->fpscr | mask);
     }
 }
 
-void helper_store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask)
+void helper_store_fpscr(CPUPPCState *env, uint64_t val, uint32_t nibbles)
 {
-    CPUState *cs = env_cpu(env);
-    target_ulong prev, new;
+    target_ulong mask = 0;
     int i;
 
-    prev = env->fpscr;
-    new = (target_ulong)arg;
-    new &= ~(FP_FEX | FP_VX);
-    new |= prev & (FP_FEX | FP_VX);
+    /* TODO: push this extension back to translation time */
     for (i = 0; i < sizeof(target_ulong) * 2; i++) {
-        if (mask & (1 << i)) {
-            env->fpscr &= ~(0xFLL << (4 * i));
-            env->fpscr |= new & (0xFLL << (4 * i));
+        if (nibbles & (1 << i)) {
+            mask |= (target_ulong) 0xf << (4 * i);
         }
     }
-    /* Update VX and FEX */
-    if (fpscr_ix != 0) {
-        env->fpscr |= FP_VX;
-    } else {
-        env->fpscr &= ~FP_VX;
-    }
-    if ((fpscr_ex & fpscr_eex) != 0) {
-        env->fpscr |= FP_FEX;
-        cs->exception_index = POWERPC_EXCP_PROGRAM;
-        /* XXX: we should compute it properly */
-        env->error_code = POWERPC_EXCP_FP;
-    } else {
-        env->fpscr &= ~FP_FEX;
-    }
-    fpscr_set_rounding_mode(env);
-}
-
-void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask)
-{
-    helper_store_fpscr(env, arg, mask);
+    val = (val & mask) | (env->fpscr & ~mask);
+    ppc_store_fpscr(env, val);
 }
 
 static void do_float_check_status(CPUPPCState *env, uintptr_t raddr)
@@ -822,6 +610,7 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
                               int rounding_mode)
 {
     CPU_DoubleU farg;
+    FloatRoundMode old_rounding_mode = get_float_rounding_mode(&env->fp_status);
 
     farg.ll = arg;
 
@@ -834,8 +623,7 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
                       float_flag_inexact;
         set_float_rounding_mode(rounding_mode, &env->fp_status);
         farg.ll = float64_round_to_int(farg.d, &env->fp_status);
-        /* Restore rounding mode from FPSCR */
-        fpscr_set_rounding_mode(env);
+        set_float_rounding_mode(old_rounding_mode, &env->fp_status);
 
         /* fri* does not set FPSCR[XX] */
         if (!inexact) {
@@ -3136,8 +2924,10 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)       \
 {                                                                      \
     ppc_vsr_t t = *xt;                                                 \
     int i;                                                             \
+    FloatRoundMode curr_rounding_mode;                                 \
                                                                        \
     if (rmode != FLOAT_ROUND_CURRENT) {                                \
+        curr_rounding_mode = get_float_rounding_mode(&env->fp_status); \
         set_float_rounding_mode(rmode, &env->fp_status);               \
     }                                                                  \
                                                                        \
@@ -3160,7 +2950,7 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb)       \
      * mode from FPSCR                                                 \
      */                                                                \
     if (rmode != FLOAT_ROUND_CURRENT) {                                \
-        fpscr_set_rounding_mode(env);                                  \
+        set_float_rounding_mode(curr_rounding_mode, &env->fp_status);  \
         env->fp_status.float_exception_flags &= ~float_flag_inexact;   \
     }                                                                  \
                                                                        \
diff --git a/target/ppc/gdbstub.c b/target/ppc/gdbstub.c
index c28319fb974..105c2f7dd13 100644
--- a/target/ppc/gdbstub.c
+++ b/target/ppc/gdbstub.c
@@ -20,6 +20,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
+#include "internal.h"
 
 static int ppc_gdb_register_len_apple(int n)
 {
@@ -100,6 +101,8 @@ void ppc_maybe_bswap_register(CPUPPCState *env, uint8_t *mem_buf, int len)
         bswap32s((uint32_t *)mem_buf);
     } else if (len == 8) {
         bswap64s((uint64_t *)mem_buf);
+    } else if (len == 16) {
+        bswap128s((Int128 *)mem_buf);
     } else {
         g_assert_not_reached();
     }
@@ -156,7 +159,7 @@ int ppc_cpu_gdb_read_register(CPUState *cs, GByteArray *buf, int n)
             gdb_get_regl(buf, env->ctr);
             break;
         case 69:
-            gdb_get_reg32(buf, env->xer);
+            gdb_get_reg32(buf, cpu_read_xer(env));
             break;
         case 70:
             gdb_get_reg32(buf, env->fpscr);
@@ -214,7 +217,7 @@ int ppc_cpu_gdb_read_register_apple(CPUState *cs, GByteArray *buf, int n)
             gdb_get_reg64(buf, env->ctr);
             break;
         case 69 + 32:
-            gdb_get_reg32(buf, env->xer);
+            gdb_get_reg32(buf, cpu_read_xer(env));
             break;
         case 70 + 32:
             gdb_get_reg64(buf, env->fpscr);
@@ -266,11 +269,11 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
             env->ctr = ldtul_p(mem_buf);
             break;
         case 69:
-            env->xer = ldl_p(mem_buf);
+            cpu_write_xer(env, ldl_p(mem_buf));
             break;
         case 70:
             /* fpscr */
-            store_fpscr(env, ldtul_p(mem_buf), 0xffffffff);
+            ppc_store_fpscr(env, ldtul_p(mem_buf));
             break;
         }
     }
@@ -316,11 +319,11 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
             env->ctr = ldq_p(mem_buf);
             break;
         case 69 + 32:
-            env->xer = ldl_p(mem_buf);
+            cpu_write_xer(env, ldl_p(mem_buf));
             break;
         case 70 + 32:
             /* fpscr */
-            store_fpscr(env, ldq_p(mem_buf), 0xffffffff);
+            ppc_store_fpscr(env, ldq_p(mem_buf));
             break;
         }
     }
@@ -387,3 +390,239 @@ const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name)
     return NULL;
 }
 #endif
+
+#if !defined(CONFIG_USER_ONLY)
+static int gdb_find_spr_idx(CPUPPCState *env, int n)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
+        ppc_spr_t *spr = &env->spr_cb[i];
+
+        if (spr->name && spr->gdb_id == n) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static int gdb_get_spr_reg(CPUPPCState *env, GByteArray *buf, int n)
+{
+    int reg;
+    int len;
+
+    reg = gdb_find_spr_idx(env, n);
+    if (reg < 0) {
+        return 0;
+    }
+
+    len = TARGET_LONG_SIZE;
+    gdb_get_regl(buf, env->spr[reg]);
+    ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, len), len);
+    return len;
+}
+
+static int gdb_set_spr_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+    int reg;
+    int len;
+
+    reg = gdb_find_spr_idx(env, n);
+    if (reg < 0) {
+        return 0;
+    }
+
+    len = TARGET_LONG_SIZE;
+    ppc_maybe_bswap_register(env, mem_buf, len);
+    env->spr[reg] = ldn_p(mem_buf, len);
+
+    return len;
+}
+#endif
+
+static int gdb_get_float_reg(CPUPPCState *env, GByteArray *buf, int n)
+{
+    uint8_t *mem_buf;
+    if (n < 32) {
+        gdb_get_reg64(buf, *cpu_fpr_ptr(env, n));
+        mem_buf = gdb_get_reg_ptr(buf, 8);
+        ppc_maybe_bswap_register(env, mem_buf, 8);
+        return 8;
+    }
+    if (n == 32) {
+        gdb_get_reg32(buf, env->fpscr);
+        mem_buf = gdb_get_reg_ptr(buf, 4);
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_set_float_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+    if (n < 32) {
+        ppc_maybe_bswap_register(env, mem_buf, 8);
+        *cpu_fpr_ptr(env, n) = ldq_p(mem_buf);
+        return 8;
+    }
+    if (n == 32) {
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        ppc_store_fpscr(env, ldl_p(mem_buf));
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_get_avr_reg(CPUPPCState *env, GByteArray *buf, int n)
+{
+    uint8_t *mem_buf;
+
+    if (n < 32) {
+        ppc_avr_t *avr = cpu_avr_ptr(env, n);
+        gdb_get_reg128(buf, avr->VsrD(0), avr->VsrD(1));
+        mem_buf = gdb_get_reg_ptr(buf, 16);
+        ppc_maybe_bswap_register(env, mem_buf, 16);
+        return 16;
+    }
+    if (n == 32) {
+        gdb_get_reg32(buf, ppc_get_vscr(env));
+        mem_buf = gdb_get_reg_ptr(buf, 4);
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        return 4;
+    }
+    if (n == 33) {
+        gdb_get_reg32(buf, (uint32_t)env->spr[SPR_VRSAVE]);
+        mem_buf = gdb_get_reg_ptr(buf, 4);
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_set_avr_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+    if (n < 32) {
+        ppc_avr_t *avr = cpu_avr_ptr(env, n);
+        ppc_maybe_bswap_register(env, mem_buf, 16);
+        avr->VsrD(0) = ldq_p(mem_buf);
+        avr->VsrD(1) = ldq_p(mem_buf + 8);
+        return 16;
+    }
+    if (n == 32) {
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        ppc_store_vscr(env, ldl_p(mem_buf));
+        return 4;
+    }
+    if (n == 33) {
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        env->spr[SPR_VRSAVE] = (target_ulong)ldl_p(mem_buf);
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_get_spe_reg(CPUPPCState *env, GByteArray *buf, int n)
+{
+    if (n < 32) {
+#if defined(TARGET_PPC64)
+        gdb_get_reg32(buf, env->gpr[n] >> 32);
+        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 4), 4);
+#else
+        gdb_get_reg32(buf, env->gprh[n]);
+#endif
+        return 4;
+    }
+    if (n == 32) {
+        gdb_get_reg64(buf, env->spe_acc);
+        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 8), 8);
+        return 8;
+    }
+    if (n == 33) {
+        gdb_get_reg32(buf, env->spe_fscr);
+        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 4), 4);
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_set_spe_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+    if (n < 32) {
+#if defined(TARGET_PPC64)
+        target_ulong lo = (uint32_t)env->gpr[n];
+        target_ulong hi;
+
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+
+        hi = (target_ulong)ldl_p(mem_buf) << 32;
+        env->gpr[n] = lo | hi;
+#else
+        env->gprh[n] = ldl_p(mem_buf);
+#endif
+        return 4;
+    }
+    if (n == 32) {
+        ppc_maybe_bswap_register(env, mem_buf, 8);
+        env->spe_acc = ldq_p(mem_buf);
+        return 8;
+    }
+    if (n == 33) {
+        ppc_maybe_bswap_register(env, mem_buf, 4);
+        env->spe_fscr = ldl_p(mem_buf);
+        return 4;
+    }
+    return 0;
+}
+
+static int gdb_get_vsx_reg(CPUPPCState *env, GByteArray *buf, int n)
+{
+    if (n < 32) {
+        gdb_get_reg64(buf, *cpu_vsrl_ptr(env, n));
+        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 8), 8);
+        return 8;
+    }
+    return 0;
+}
+
+static int gdb_set_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+    if (n < 32) {
+        ppc_maybe_bswap_register(env, mem_buf, 8);
+        *cpu_vsrl_ptr(env, n) = ldq_p(mem_buf);
+        return 8;
+    }
+    return 0;
+}
+
+gchar *ppc_gdb_arch_name(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+    return g_strdup("powerpc:common64");
+#else
+    return g_strdup("powerpc:common");
+#endif
+}
+
+void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *pcc)
+{
+    if (pcc->insns_flags & PPC_FLOAT) {
+        gdb_register_coprocessor(cs, gdb_get_float_reg, gdb_set_float_reg,
+                                 33, "power-fpu.xml", 0);
+    }
+    if (pcc->insns_flags & PPC_ALTIVEC) {
+        gdb_register_coprocessor(cs, gdb_get_avr_reg, gdb_set_avr_reg,
+                                 34, "power-altivec.xml", 0);
+    }
+    if (pcc->insns_flags & PPC_SPE) {
+        gdb_register_coprocessor(cs, gdb_get_spe_reg, gdb_set_spe_reg,
+                                 34, "power-spe.xml", 0);
+    }
+    if (pcc->insns_flags2 & PPC2_VSX) {
+        gdb_register_coprocessor(cs, gdb_get_vsx_reg, gdb_set_vsx_reg,
+                                 32, "power-vsx.xml", 0);
+    }
+#ifndef CONFIG_USER_ONLY
+    gdb_register_coprocessor(cs, gdb_get_spr_reg, gdb_set_spr_reg,
+                             pcc->gdb_num_sprs, "power-spr.xml", 0);
+#endif
+}
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 6a4dccf70c0..627811cefc9 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -1,5 +1,5 @@
-DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, void, env, i32, i32)
-DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32)
 DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
@@ -13,6 +13,7 @@ DEF_HELPER_1(rfci, void, env)
 DEF_HELPER_1(rfdi, void, env)
 DEF_HELPER_1(rfmci, void, env)
 #if defined(TARGET_PPC64)
+DEF_HELPER_2(scv, noreturn, env, i32)
 DEF_HELPER_2(pminsn, void, env, i32)
 DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(rfscv, void, env)
@@ -45,6 +46,9 @@ DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
+DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
@@ -107,11 +111,9 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
 
 #define dh_alias_avr ptr
 #define dh_ctype_avr ppc_avr_t *
-#define dh_is_signed_avr dh_is_signed_ptr
 
 #define dh_alias_vsr ptr
 #define dh_ctype_vsr ppc_vsr_t *
-#define dh_is_signed_vsr dh_is_signed_ptr
 
 DEF_HELPER_3(vavgub, void, avr, avr, avr)
 DEF_HELPER_3(vavguh, void, avr, avr, avr)
@@ -222,10 +224,10 @@ DEF_HELPER_3(vextractub, void, avr, avr, i32)
 DEF_HELPER_3(vextractuh, void, avr, avr, i32)
 DEF_HELPER_3(vextractuw, void, avr, avr, i32)
 DEF_HELPER_3(vextractd, void, avr, avr, i32)
-DEF_HELPER_3(vinsertb, void, avr, avr, i32)
-DEF_HELPER_3(vinserth, void, avr, avr, i32)
-DEF_HELPER_3(vinsertw, void, avr, avr, i32)
-DEF_HELPER_3(vinsertd, void, avr, avr, i32)
+DEF_HELPER_4(VINSBLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSHLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSWLX, void, env, avr, i64, tl)
+DEF_HELPER_4(VINSDLX, void, env, avr, i64, tl)
 DEF_HELPER_2(vextsb2w, void, avr, avr)
 DEF_HELPER_2(vextsh2w, void, avr, avr)
 DEF_HELPER_2(vextsb2d, void, avr, avr)
@@ -332,6 +334,10 @@ DEF_HELPER_2(vextuwlx, tl, tl, avr)
 DEF_HELPER_2(vextubrx, tl, tl, avr)
 DEF_HELPER_2(vextuhrx, tl, tl, avr)
 DEF_HELPER_2(vextuwrx, tl, tl, avr)
+DEF_HELPER_5(VEXTDUBVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUHVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUWVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDDVLX, void, env, avr, avr, avr, tl)
 
 DEF_HELPER_2(vsbox, void, avr, avr)
 DEF_HELPER_3(vcipher, void, avr, avr, avr)
@@ -514,6 +520,10 @@ DEF_HELPER_4(xxpermr, void, env, vsr, vsr, vsr)
 DEF_HELPER_4(xxextractuw, void, env, vsr, vsr, i32)
 DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32)
 DEF_HELPER_3(xvxsigsp, void, env, vsr, vsr)
+DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32)
+DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
@@ -695,60 +705,61 @@ DEF_HELPER_3(store_601_batu, void, env, i32, tl)
 
 #define dh_alias_fprp ptr
 #define dh_ctype_fprp ppc_fprp_t *
-#define dh_is_signed_fprp dh_is_signed_ptr
 
-DEF_HELPER_4(dadd, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(daddq, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(dsub, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(dsubq, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(dmul, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(dmulq, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(ddiv, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(ddivq, void, env, fprp, fprp, fprp)
-DEF_HELPER_3(dcmpo, i32, env, fprp, fprp)
-DEF_HELPER_3(dcmpoq, i32, env, fprp, fprp)
-DEF_HELPER_3(dcmpu, i32, env, fprp, fprp)
-DEF_HELPER_3(dcmpuq, i32, env, fprp, fprp)
-DEF_HELPER_3(dtstdc, i32, env, fprp, i32)
-DEF_HELPER_3(dtstdcq, i32, env, fprp, i32)
-DEF_HELPER_3(dtstdg, i32, env, fprp, i32)
-DEF_HELPER_3(dtstdgq, i32, env, fprp, i32)
-DEF_HELPER_3(dtstex, i32, env, fprp, fprp)
-DEF_HELPER_3(dtstexq, i32, env, fprp, fprp)
-DEF_HELPER_3(dtstsf, i32, env, fprp, fprp)
-DEF_HELPER_3(dtstsfq, i32, env, fprp, fprp)
-DEF_HELPER_3(dtstsfi, i32, env, i32, fprp)
-DEF_HELPER_3(dtstsfiq, i32, env, i32, fprp)
-DEF_HELPER_5(dquai, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_5(dquaiq, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_5(dqua, void, env, fprp, fprp, fprp, i32)
-DEF_HELPER_5(dquaq, void, env, fprp, fprp, fprp, i32)
-DEF_HELPER_5(drrnd, void, env, fprp, fprp, fprp, i32)
-DEF_HELPER_5(drrndq, void, env, fprp, fprp, fprp, i32)
-DEF_HELPER_5(drintx, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_5(drintxq, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_5(drintn, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_5(drintnq, void, env, fprp, fprp, i32, i32)
-DEF_HELPER_3(dctdp, void, env, fprp, fprp)
-DEF_HELPER_3(dctqpq, void, env, fprp, fprp)
-DEF_HELPER_3(drsp, void, env, fprp, fprp)
-DEF_HELPER_3(drdpq, void, env, fprp, fprp)
-DEF_HELPER_3(dcffix, void, env, fprp, fprp)
-DEF_HELPER_3(dcffixq, void, env, fprp, fprp)
-DEF_HELPER_3(dctfix, void, env, fprp, fprp)
-DEF_HELPER_3(dctfixq, void, env, fprp, fprp)
-DEF_HELPER_4(ddedpd, void, env, fprp, fprp, i32)
-DEF_HELPER_4(ddedpdq, void, env, fprp, fprp, i32)
-DEF_HELPER_4(denbcd, void, env, fprp, fprp, i32)
-DEF_HELPER_4(denbcdq, void, env, fprp, fprp, i32)
-DEF_HELPER_3(dxex, void, env, fprp, fprp)
-DEF_HELPER_3(dxexq, void, env, fprp, fprp)
-DEF_HELPER_4(diex, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(diexq, void, env, fprp, fprp, fprp)
-DEF_HELPER_4(dscri, void, env, fprp, fprp, i32)
-DEF_HELPER_4(dscriq, void, env, fprp, fprp, i32)
-DEF_HELPER_4(dscli, void, env, fprp, fprp, i32)
-DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DADD, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DADDQ, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DSUB, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DSUBQ, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DMUL, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DMULQ, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DDIV, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DDIVQ, void, env, fprp, fprp, fprp)
+DEF_HELPER_3(DCMPO, i32, env, fprp, fprp)
+DEF_HELPER_3(DCMPOQ, i32, env, fprp, fprp)
+DEF_HELPER_3(DCMPU, i32, env, fprp, fprp)
+DEF_HELPER_3(DCMPUQ, i32, env, fprp, fprp)
+DEF_HELPER_3(DTSTDC, i32, env, fprp, i32)
+DEF_HELPER_3(DTSTDCQ, i32, env, fprp, i32)
+DEF_HELPER_3(DTSTDG, i32, env, fprp, i32)
+DEF_HELPER_3(DTSTDGQ, i32, env, fprp, i32)
+DEF_HELPER_3(DTSTEX, i32, env, fprp, fprp)
+DEF_HELPER_3(DTSTEXQ, i32, env, fprp, fprp)
+DEF_HELPER_3(DTSTSF, i32, env, fprp, fprp)
+DEF_HELPER_3(DTSTSFQ, i32, env, fprp, fprp)
+DEF_HELPER_3(DTSTSFI, i32, env, i32, fprp)
+DEF_HELPER_3(DTSTSFIQ, i32, env, i32, fprp)
+DEF_HELPER_5(DQUAI, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_5(DQUAIQ, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_5(DQUA, void, env, fprp, fprp, fprp, i32)
+DEF_HELPER_5(DQUAQ, void, env, fprp, fprp, fprp, i32)
+DEF_HELPER_5(DRRND, void, env, fprp, fprp, fprp, i32)
+DEF_HELPER_5(DRRNDQ, void, env, fprp, fprp, fprp, i32)
+DEF_HELPER_5(DRINTX, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_5(DRINTXQ, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_5(DRINTN, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_5(DRINTNQ, void, env, fprp, fprp, i32, i32)
+DEF_HELPER_3(DCTDP, void, env, fprp, fprp)
+DEF_HELPER_3(DCTQPQ, void, env, fprp, fprp)
+DEF_HELPER_3(DRSP, void, env, fprp, fprp)
+DEF_HELPER_3(DRDPQ, void, env, fprp, fprp)
+DEF_HELPER_3(DCFFIX, void, env, fprp, fprp)
+DEF_HELPER_3(DCFFIXQ, void, env, fprp, fprp)
+DEF_HELPER_3(DCFFIXQQ, void, env, fprp, avr)
+DEF_HELPER_3(DCTFIX, void, env, fprp, fprp)
+DEF_HELPER_3(DCTFIXQ, void, env, fprp, fprp)
+DEF_HELPER_3(DCTFIXQQ, void, env, avr, fprp)
+DEF_HELPER_4(DDEDPD, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DDEDPDQ, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DENBCD, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DENBCDQ, void, env, fprp, fprp, i32)
+DEF_HELPER_3(DXEX, void, env, fprp, fprp)
+DEF_HELPER_3(DXEXQ, void, env, fprp, fprp)
+DEF_HELPER_4(DIEX, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DIEXQ, void, env, fprp, fprp, fprp)
+DEF_HELPER_4(DSCRI, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DSCRIQ, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DSCLI, void, env, fprp, fprp, i32)
+DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
 DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
new file mode 100644
index 00000000000..99562edd574
--- /dev/null
+++ b/target/ppc/helper_regs.c
@@ -0,0 +1,301 @@
+/*
+ *  PowerPC emulation special registers manipulation helpers for qemu.
+ *
+ *  Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu/main-loop.h"
+#include "exec/exec-all.h"
+#include "sysemu/kvm.h"
+#include "helper_regs.h"
+
+/* Swap temporary saved registers with GPRs */
+void hreg_swap_gpr_tgpr(CPUPPCState *env)
+{
+    target_ulong tmp;
+
+    tmp = env->gpr[0];
+    env->gpr[0] = env->tgpr[0];
+    env->tgpr[0] = tmp;
+    tmp = env->gpr[1];
+    env->gpr[1] = env->tgpr[1];
+    env->tgpr[1] = tmp;
+    tmp = env->gpr[2];
+    env->gpr[2] = env->tgpr[2];
+    env->tgpr[2] = tmp;
+    tmp = env->gpr[3];
+    env->gpr[3] = env->tgpr[3];
+    env->tgpr[3] = tmp;
+}
+
+static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
+{
+    target_ulong msr = env->msr;
+    uint32_t ppc_flags = env->flags;
+    uint32_t hflags = 0;
+    uint32_t msr_mask;
+
+    /* Some bits come straight across from MSR. */
+    QEMU_BUILD_BUG_ON(MSR_LE != HFLAGS_LE);
+    QEMU_BUILD_BUG_ON(MSR_PR != HFLAGS_PR);
+    QEMU_BUILD_BUG_ON(MSR_DR != HFLAGS_DR);
+    QEMU_BUILD_BUG_ON(MSR_FP != HFLAGS_FP);
+    msr_mask = ((1 << MSR_LE) | (1 << MSR_PR) |
+                (1 << MSR_DR) | (1 << MSR_FP));
+
+    if (ppc_flags & POWERPC_FLAG_HID0_LE) {
+        /*
+         * Note that MSR_LE is not set in env->msr_mask for this cpu,
+         * and so will never be set in msr.
+         */
+        uint32_t le = extract32(env->spr[SPR_HID0], 3, 1);
+        hflags |= le << MSR_LE;
+    }
+
+    if (ppc_flags & POWERPC_FLAG_DE) {
+        target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
+        if (dbcr0 & DBCR0_ICMP) {
+            hflags |= 1 << HFLAGS_SE;
+        }
+        if (dbcr0 & DBCR0_BRT) {
+            hflags |= 1 << HFLAGS_BE;
+        }
+    } else {
+        if (ppc_flags & POWERPC_FLAG_BE) {
+            QEMU_BUILD_BUG_ON(MSR_BE != HFLAGS_BE);
+            msr_mask |= 1 << MSR_BE;
+        }
+        if (ppc_flags & POWERPC_FLAG_SE) {
+            QEMU_BUILD_BUG_ON(MSR_SE != HFLAGS_SE);
+            msr_mask |= 1 << MSR_SE;
+        }
+    }
+
+    if (msr_is_64bit(env, msr)) {
+        hflags |= 1 << HFLAGS_64;
+    }
+    if ((ppc_flags & POWERPC_FLAG_SPE) && (msr & (1 << MSR_SPE))) {
+        hflags |= 1 << HFLAGS_SPE;
+    }
+    if (ppc_flags & POWERPC_FLAG_VRE) {
+        QEMU_BUILD_BUG_ON(MSR_VR != HFLAGS_VR);
+        msr_mask |= 1 << MSR_VR;
+    }
+    if (ppc_flags & POWERPC_FLAG_VSX) {
+        QEMU_BUILD_BUG_ON(MSR_VSX != HFLAGS_VSX);
+        msr_mask |= 1 << MSR_VSX;
+    }
+    if ((ppc_flags & POWERPC_FLAG_TM) && (msr & (1ull << MSR_TM))) {
+        hflags |= 1 << HFLAGS_TM;
+    }
+    if (env->spr[SPR_LPCR] & LPCR_GTSE) {
+        hflags |= 1 << HFLAGS_GTSE;
+    }
+    if (env->spr[SPR_LPCR] & LPCR_HR) {
+        hflags |= 1 << HFLAGS_HR;
+    }
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC0) {
+        hflags |= 1 << HFLAGS_PMCC0;
+    }
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
+        hflags |= 1 << HFLAGS_PMCC1;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
+        hflags |= 1 << HFLAGS_HV;
+    }
+
+    /*
+     * This is our encoding for server processors. The architecture
+     * specifies that there is no such thing as userspace with
+     * translation off, however it appears that MacOS does it and some
+     * 32-bit CPUs support it. Weird...
+     *
+     *   0 = Guest User space virtual mode
+     *   1 = Guest Kernel space virtual mode
+     *   2 = Guest User space real mode
+     *   3 = Guest Kernel space real mode
+     *   4 = HV User space virtual mode
+     *   5 = HV Kernel space virtual mode
+     *   6 = HV User space real mode
+     *   7 = HV Kernel space real mode
+     *
+     * For BookE, we need 8 MMU modes as follow:
+     *
+     *  0 = AS 0 HV User space
+     *  1 = AS 0 HV Kernel space
+     *  2 = AS 1 HV User space
+     *  3 = AS 1 HV Kernel space
+     *  4 = AS 0 Guest User space
+     *  5 = AS 0 Guest Kernel space
+     *  6 = AS 1 Guest User space
+     *  7 = AS 1 Guest Kernel space
+     */
+    unsigned immu_idx, dmmu_idx;
+    dmmu_idx = msr & (1 << MSR_PR) ? 0 : 1;
+    if (env->mmu_model & POWERPC_MMU_BOOKE) {
+        dmmu_idx |= msr & (1 << MSR_GS) ? 4 : 0;
+        immu_idx = dmmu_idx;
+        immu_idx |= msr & (1 << MSR_IS) ? 2 : 0;
+        dmmu_idx |= msr & (1 << MSR_DS) ? 2 : 0;
+    } else {
+        dmmu_idx |= msr & (1ull << MSR_HV) ? 4 : 0;
+        immu_idx = dmmu_idx;
+        immu_idx |= msr & (1 << MSR_IR) ? 0 : 2;
+        dmmu_idx |= msr & (1 << MSR_DR) ? 0 : 2;
+    }
+    hflags |= immu_idx << HFLAGS_IMMU_IDX;
+    hflags |= dmmu_idx << HFLAGS_DMMU_IDX;
+#endif
+
+    return hflags | (msr & msr_mask);
+}
+
+void hreg_compute_hflags(CPUPPCState *env)
+{
+    env->hflags = hreg_compute_hflags_value(env);
+}
+
+#ifdef CONFIG_DEBUG_TCG
+void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
+                          target_ulong *cs_base, uint32_t *flags)
+{
+    uint32_t hflags_current = env->hflags;
+    uint32_t hflags_rebuilt;
+
+    *pc = env->nip;
+    *cs_base = 0;
+    *flags = hflags_current;
+
+    hflags_rebuilt = hreg_compute_hflags_value(env);
+    if (unlikely(hflags_current != hflags_rebuilt)) {
+        cpu_abort(env_cpu(env),
+                  "TCG hflags mismatch (current:0x%08x rebuilt:0x%08x)\n",
+                  hflags_current, hflags_rebuilt);
+    }
+}
+#endif
+
+void cpu_interrupt_exittb(CPUState *cs)
+{
+    if (!kvm_enabled()) {
+        return;
+    }
+
+    if (!qemu_mutex_iothread_locked()) {
+        qemu_mutex_lock_iothread();
+        cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
+        qemu_mutex_unlock_iothread();
+    } else {
+        cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
+    }
+}
+
+int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv)
+{
+    int excp;
+#if !defined(CONFIG_USER_ONLY)
+    CPUState *cs = env_cpu(env);
+#endif
+
+    excp = 0;
+    value &= env->msr_mask;
+#if !defined(CONFIG_USER_ONLY)
+    /* Neither mtmsr nor guest state can alter HV */
+    if (!alter_hv || !(env->msr & MSR_HVB)) {
+        value &= ~MSR_HVB;
+        value |= env->msr & MSR_HVB;
+    }
+    if (((value >> MSR_IR) & 1) != msr_ir ||
+        ((value >> MSR_DR) & 1) != msr_dr) {
+        cpu_interrupt_exittb(cs);
+    }
+    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
+        ((value >> MSR_GS) & 1) != msr_gs) {
+        cpu_interrupt_exittb(cs);
+    }
+    if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
+                 ((value ^ env->msr) & (1 << MSR_TGPR)))) {
+        /* Swap temporary saved registers with GPRs */
+        hreg_swap_gpr_tgpr(env);
+    }
+    if (unlikely((value >> MSR_EP) & 1) != msr_ep) {
+        /* Change the exception prefix on PowerPC 601 */
+        env->excp_prefix = ((value >> MSR_EP) & 1) * 0xFFF00000;
+    }
+    /*
+     * If PR=1 then EE, IR and DR must be 1
+     *
+     * Note: We only enforce this on 64-bit server processors.
+     * It appears that:
+     * - 32-bit implementations supports PR=1 and EE/DR/IR=0 and MacOS
+     *   exploits it.
+     * - 64-bit embedded implementations do not need any operation to be
+     *   performed when PR is set.
+     */
+    if (is_book3s_arch2x(env) && ((value >> MSR_PR) & 1)) {
+        value |= (1 << MSR_EE) | (1 << MSR_DR) | (1 << MSR_IR);
+    }
+#endif
+    env->msr = value;
+    hreg_compute_hflags(env);
+#if !defined(CONFIG_USER_ONLY)
+    if (unlikely(msr_pow == 1)) {
+        if (!env->pending_interrupts && (*env->check_pow)(env)) {
+            cs->halted = 1;
+            excp = EXCP_HALTED;
+        }
+    }
+#endif
+
+    return excp;
+}
+
+#ifdef CONFIG_SOFTMMU
+void store_40x_sler(CPUPPCState *env, uint32_t val)
+{
+    /* XXX: TO BE FIXED */
+    if (val != 0x00000000) {
+        cpu_abort(env_cpu(env),
+                  "Little-endian regions are not supported by now\n");
+    }
+    env->spr[SPR_405_SLER] = val;
+}
+#endif /* CONFIG_SOFTMMU */
+
+#ifndef CONFIG_USER_ONLY
+void check_tlb_flush(CPUPPCState *env, bool global)
+{
+    CPUState *cs = env_cpu(env);
+
+    /* Handle global flushes first */
+    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
+        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
+        env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
+        tlb_flush_all_cpus_synced(cs);
+        return;
+    }
+
+    /* Then handle local ones */
+    if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) {
+        env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
+        tlb_flush(cs);
+    }
+}
+#endif
diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
index efcc9034271..42f26870b99 100644
--- a/target/ppc/helper_regs.h
+++ b/target/ppc/helper_regs.h
@@ -20,184 +20,15 @@
 #ifndef HELPER_REGS_H
 #define HELPER_REGS_H
 
-#include "qemu/main-loop.h"
-#include "exec/exec-all.h"
-#include "sysemu/kvm.h"
+void hreg_swap_gpr_tgpr(CPUPPCState *env);
+void hreg_compute_hflags(CPUPPCState *env);
+void cpu_interrupt_exittb(CPUState *cs);
+int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv);
 
-/* Swap temporary saved registers with GPRs */
-static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
-{
-    target_ulong tmp;
-
-    tmp = env->gpr[0];
-    env->gpr[0] = env->tgpr[0];
-    env->tgpr[0] = tmp;
-    tmp = env->gpr[1];
-    env->gpr[1] = env->tgpr[1];
-    env->tgpr[1] = tmp;
-    tmp = env->gpr[2];
-    env->gpr[2] = env->tgpr[2];
-    env->tgpr[2] = tmp;
-    tmp = env->gpr[3];
-    env->gpr[3] = env->tgpr[3];
-    env->tgpr[3] = tmp;
-}
-
-static inline void hreg_compute_mem_idx(CPUPPCState *env)
-{
-    /*
-     * This is our encoding for server processors. The architecture
-     * specifies that there is no such thing as userspace with
-     * translation off, however it appears that MacOS does it and some
-     * 32-bit CPUs support it. Weird...
-     *
-     *   0 = Guest User space virtual mode
-     *   1 = Guest Kernel space virtual mode
-     *   2 = Guest User space real mode
-     *   3 = Guest Kernel space real mode
-     *   4 = HV User space virtual mode
-     *   5 = HV Kernel space virtual mode
-     *   6 = HV User space real mode
-     *   7 = HV Kernel space real mode
-     *
-     * For BookE, we need 8 MMU modes as follow:
-     *
-     *  0 = AS 0 HV User space
-     *  1 = AS 0 HV Kernel space
-     *  2 = AS 1 HV User space
-     *  3 = AS 1 HV Kernel space
-     *  4 = AS 0 Guest User space
-     *  5 = AS 0 Guest Kernel space
-     *  6 = AS 1 Guest User space
-     *  7 = AS 1 Guest Kernel space
-     */
-    if (env->mmu_model & POWERPC_MMU_BOOKE) {
-        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
-        env->immu_idx += msr_is ? 2 : 0;
-        env->dmmu_idx += msr_ds ? 2 : 0;
-        env->immu_idx += msr_gs ? 4 : 0;
-        env->dmmu_idx += msr_gs ? 4 : 0;
-    } else {
-        env->immu_idx = env->dmmu_idx = msr_pr ? 0 : 1;
-        env->immu_idx += msr_ir ? 0 : 2;
-        env->dmmu_idx += msr_dr ? 0 : 2;
-        env->immu_idx += msr_hv ? 4 : 0;
-        env->dmmu_idx += msr_hv ? 4 : 0;
-    }
-}
-
-static inline void hreg_compute_hflags(CPUPPCState *env)
-{
-    target_ulong hflags_mask;
-
-    /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
-    hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
-        (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
-        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
-    hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
-    hreg_compute_mem_idx(env);
-    env->hflags = env->msr & hflags_mask;
-    /* Merge with hflags coming from other registers */
-    env->hflags |= env->hflags_nmsr;
-}
-
-static inline void cpu_interrupt_exittb(CPUState *cs)
-{
-    if (!kvm_enabled()) {
-        return;
-    }
-
-    if (!qemu_mutex_iothread_locked()) {
-        qemu_mutex_lock_iothread();
-        cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
-        qemu_mutex_unlock_iothread();
-    } else {
-        cpu_interrupt(cs, CPU_INTERRUPT_EXITTB);
-    }
-}
-
-static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
-                                 int alter_hv)
-{
-    int excp;
-#if !defined(CONFIG_USER_ONLY)
-    CPUState *cs = env_cpu(env);
-#endif
-
-    excp = 0;
-    value &= env->msr_mask;
-#if !defined(CONFIG_USER_ONLY)
-    /* Neither mtmsr nor guest state can alter HV */
-    if (!alter_hv || !(env->msr & MSR_HVB)) {
-        value &= ~MSR_HVB;
-        value |= env->msr & MSR_HVB;
-    }
-    if (((value >> MSR_IR) & 1) != msr_ir ||
-        ((value >> MSR_DR) & 1) != msr_dr) {
-        cpu_interrupt_exittb(cs);
-    }
-    if ((env->mmu_model & POWERPC_MMU_BOOKE) &&
-        ((value >> MSR_GS) & 1) != msr_gs) {
-        cpu_interrupt_exittb(cs);
-    }
-    if (unlikely((env->flags & POWERPC_FLAG_TGPR) &&
-                 ((value ^ env->msr) & (1 << MSR_TGPR)))) {
-        /* Swap temporary saved registers with GPRs */
-        hreg_swap_gpr_tgpr(env);
-    }
-    if (unlikely((value >> MSR_EP) & 1) != msr_ep) {
-        /* Change the exception prefix on PowerPC 601 */
-        env->excp_prefix = ((value >> MSR_EP) & 1) * 0xFFF00000;
-    }
-    /*
-     * If PR=1 then EE, IR and DR must be 1
-     *
-     * Note: We only enforce this on 64-bit server processors.
-     * It appears that:
-     * - 32-bit implementations supports PR=1 and EE/DR/IR=0 and MacOS
-     *   exploits it.
-     * - 64-bit embedded implementations do not need any operation to be
-     *   performed when PR is set.
-     */
-    if (is_book3s_arch2x(env) && ((value >> MSR_PR) & 1)) {
-        value |= (1 << MSR_EE) | (1 << MSR_DR) | (1 << MSR_IR);
-    }
-#endif
-    env->msr = value;
-    hreg_compute_hflags(env);
-#if !defined(CONFIG_USER_ONLY)
-    if (unlikely(msr_pow == 1)) {
-        if (!env->pending_interrupts && (*env->check_pow)(env)) {
-            cs->halted = 1;
-            excp = EXCP_HALTED;
-        }
-    }
-#endif
-
-    return excp;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static inline void check_tlb_flush(CPUPPCState *env, bool global)
-{
-    CPUState *cs = env_cpu(env);
-
-    /* Handle global flushes first */
-    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
-        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
-        env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
-        tlb_flush_all_cpus_synced(cs);
-        return;
-    }
-
-    /* Then handle local ones */
-    if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) {
-        env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
-        tlb_flush(cs);
-    }
-}
-#else
+#ifdef CONFIG_USER_ONLY
 static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
+#else
+void check_tlb_flush(CPUPPCState *env, bool global);
 #endif
 
 #endif /* HELPER_REGS_H */
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
new file mode 100644
index 00000000000..e135b8aba41
--- /dev/null
+++ b/target/ppc/insn32.decode
@@ -0,0 +1,429 @@
+#
+# Power ISA decode for 32-bit insns (opcode space 0)
+#
+# Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+#
+
+&D              rt ra si:int64_t
+@D              ...... rt:5 ra:5 si:s16                 &D
+
+&D_bf           bf l:bool ra imm
+@D_bfs          ...... bf:3 - l:1 ra:5 imm:s16          &D_bf
+@D_bfu          ...... bf:3 - l:1 ra:5 imm:16           &D_bf
+
+%dq_si          4:s12  !function=times_16
+%dq_rtp         22:4   !function=times_2
+@DQ_rtp         ...... ....0 ra:5 ............ ....             &D rt=%dq_rtp si=%dq_si
+
+%dq_rt_tsx      3:1 21:5
+@DQ_TSX         ...... ..... ra:5 ............ ....             &D si=%dq_si rt=%dq_rt_tsx
+
+%rt_tsxp        21:1 22:4 !function=times_2
+@DQ_TSXP        ...... ..... ra:5 ............ ....             &D si=%dq_si rt=%rt_tsxp
+
+%ds_si          2:s14  !function=times_4
+@DS             ...... rt:5 ra:5 .............. ..      &D si=%ds_si
+
+%ds_rtp         22:4   !function=times_2
+@DS_rtp         ...... ....0 ra:5 .............. ..             &D rt=%ds_rtp si=%ds_si
+
+&DX             rt d
+%dx_d           6:s10 16:5 0:1
+@DX             ...... rt:5  ..... .......... ..... .   &DX d=%dx_d
+
+&VA             vrt vra vrb rc
+@VA             ...... vrt:5 vra:5 vrb:5 rc:5 ......    &VA
+
+&VN             vrt vra vrb sh
+@VN             ...... vrt:5 vra:5 vrb:5 .. sh:3 ......         &VN
+
+&VX             vrt vra vrb
+@VX             ...... vrt:5 vra:5 vrb:5 .......... .   &VX
+
+&VX_uim4        vrt uim vrb
+@VX_uim4        ...... vrt:5 . uim:4 vrb:5 ...........  &VX_uim4
+
+&X              rt ra rb
+@X              ...... rt:5 ra:5 rb:5 .......... .      &X
+
+&X_rc           rt ra rb rc:bool
+@X_rc           ...... rt:5 ra:5 rb:5 .......... rc:1           &X_rc
+
+%x_frtp         22:4 !function=times_2
+%x_frap         17:4 !function=times_2
+%x_frbp         12:4 !function=times_2
+@X_tp_ap_bp_rc  ...... ....0 ....0 ....0 .......... rc:1        &X_rc rt=%x_frtp ra=%x_frap rb=%x_frbp
+
+@X_tp_a_bp_rc   ...... ....0 ra:5 ....0 .......... rc:1         &X_rc rt=%x_frtp rb=%x_frbp
+
+&X_tb_rc        rt rb rc:bool
+@X_tb_rc        ...... rt:5 ..... rb:5 .......... rc:1          &X_tb_rc
+
+@X_tbp_rc       ...... ....0 ..... ....0 .......... rc:1        &X_tb_rc rt=%x_frtp rb=%x_frbp
+
+@X_tp_b_rc      ...... ....0 ..... rb:5 .......... rc:1         &X_tb_rc rt=%x_frtp
+
+@X_t_bp_rc      ...... rt:5 ..... ....0 .......... rc:1         &X_tb_rc rb=%x_frbp
+
+&X_bi           rt bi
+@X_bi           ...... rt:5 bi:5 ----- .......... -     &X_bi
+
+&X_bf           bf ra rb
+@X_bf           ...... bf:3 .. ra:5 rb:5 .......... .           &X_bf
+
+@X_bf_ap_bp     ...... bf:3 .. ....0 ....0 .......... .         &X_bf ra=%x_frap rb=%x_frbp
+
+@X_bf_a_bp      ...... bf:3 .. ra:5 ....0 .......... .          &X_bf rb=%x_frbp
+
+&X_bf_uim       bf uim rb
+@X_bf_uim       ...... bf:3 . uim:6 rb:5 .......... .           &X_bf_uim
+
+@X_bf_uim_bp    ...... bf:3 . uim:6 ....0 .......... .          &X_bf_uim rb=%x_frbp
+
+&X_bfl          bf l:bool ra rb
+@X_bfl          ...... bf:3 - l:1 ra:5 rb:5 ..........- &X_bfl
+
+%x_xt           0:1 21:5
+&X_imm8         xt imm:uint8_t
+@X_imm8         ...... ..... .. imm:8 .......... .              &X_imm8 xt=%x_xt
+
+&X_uim5         xt uim:uint8_t
+@X_uim5         ...... ..... ..... uim:5 .......... .           &X_uim5 xt=%x_xt
+
+&X_tb_sp_rc     rt rb sp rc:bool
+@X_tb_sp_rc     ...... rt:5 sp:2 ... rb:5 .......... rc:1       &X_tb_sp_rc
+
+@X_tbp_sp_rc    ...... ....0 sp:2 ... ....0 .......... rc:1     &X_tb_sp_rc rt=%x_frtp rb=%x_frbp
+
+&X_tb_s_rc      rt rb s:bool rc:bool
+@X_tb_s_rc      ...... rt:5 s:1 .... rb:5 .......... rc:1       &X_tb_s_rc
+
+@X_tbp_s_rc     ...... ....0 s:1 .... ....0 .......... rc:1     &X_tb_s_rc rt=%x_frtp rb=%x_frbp
+
+%x_rt_tsx       0:1 21:5
+@X_TSX          ...... ..... ra:5 rb:5 .......... .             &X rt=%x_rt_tsx
+@X_TSXP         ...... ..... ra:5 rb:5 .......... .             &X rt=%rt_tsxp
+
+&X_frtp_vrb     frtp vrb
+@X_frtp_vrb     ...... ....0 ..... vrb:5 .......... .           &X_frtp_vrb frtp=%x_frtp
+
+&X_vrt_frbp     vrt frbp
+@X_vrt_frbp     ...... vrt:5 ..... ....0 .......... .           &X_vrt_frbp frbp=%x_frbp
+
+&XX2            xt xb uim:uint8_t
+%xx2_xt         0:1 21:5
+%xx2_xb         1:1 11:5
+@XX2            ...... ..... ... uim:2 ..... ......... ..       &XX2 xt=%xx2_xt xb=%xx2_xb
+
+&Z22_bf_fra     bf fra dm
+@Z22_bf_fra     ...... bf:3 .. fra:5 dm:6 ......... .           &Z22_bf_fra
+
+%z22_frap       17:4 !function=times_2
+@Z22_bf_frap    ...... bf:3 .. ....0 dm:6 ......... .           &Z22_bf_fra fra=%z22_frap
+
+&Z22_ta_sh_rc   rt ra sh rc:bool
+@Z22_ta_sh_rc   ...... rt:5 ra:5 sh:6 ......... rc:1            &Z22_ta_sh_rc
+
+%z22_frtp       22:4 !function=times_2
+@Z22_tap_sh_rc  ...... ....0 ....0 sh:6 ......... rc:1          &Z22_ta_sh_rc rt=%z22_frtp ra=%z22_frap
+
+&Z23_tab        frt fra frb rmc rc:bool
+@Z23_tab        ...... frt:5 fra:5 frb:5 rmc:2 ........ rc:1    &Z23_tab
+
+%z23_frtp       22:4 !function=times_2
+%z23_frap       17:4 !function=times_2
+%z23_frbp       12:4 !function=times_2
+@Z23_tabp       ...... ....0 ....0 ....0 rmc:2 ........ rc:1    &Z23_tab frt=%z23_frtp fra=%z23_frap frb=%z23_frbp
+
+@Z23_tp_a_bp    ...... ....0 fra:5 ....0 rmc:2 ........ rc:1    &Z23_tab frt=%z23_frtp frb=%z23_frbp
+
+&Z23_tb         frt frb r:bool rmc rc:bool
+@Z23_tb         ...... frt:5 .... r:1 frb:5 rmc:2 ........ rc:1 &Z23_tb
+
+@Z23_tbp        ...... ....0 .... r:1 ....0 rmc:2 ........ rc:1 &Z23_tb frt=%z23_frtp frb=%z23_frbp
+
+&Z23_te_tb      te frt frb rmc rc:bool
+@Z23_te_tb      ...... frt:5 te:5 frb:5 rmc:2 ........ rc:1     &Z23_te_tb
+
+@Z23_te_tbp     ...... ....0 te:5 ....0 rmc:2 ........ rc:1     &Z23_te_tb frt=%z23_frtp frb=%z23_frbp
+
+### Fixed-Point Load Instructions
+
+LBZ             100010 ..... ..... ................     @D
+LBZU            100011 ..... ..... ................     @D
+LBZX            011111 ..... ..... ..... 0001010111 -   @X
+LBZUX           011111 ..... ..... ..... 0001110111 -   @X
+
+LHZ             101000 ..... ..... ................     @D
+LHZU            101001 ..... ..... ................     @D
+LHZX            011111 ..... ..... ..... 0100010111 -   @X
+LHZUX           011111 ..... ..... ..... 0100110111 -   @X
+
+LHA             101010 ..... ..... ................     @D
+LHAU            101011 ..... ..... ................     @D
+LHAX            011111 ..... ..... ..... 0101010111 -   @X
+LHAXU           011111 ..... ..... ..... 0101110111 -   @X
+
+LWZ             100000 ..... ..... ................     @D
+LWZU            100001 ..... ..... ................     @D
+LWZX            011111 ..... ..... ..... 0000010111 -   @X
+LWZUX           011111 ..... ..... ..... 0000110111 -   @X
+
+LWA             111010 ..... ..... ..............10     @DS
+LWAX            011111 ..... ..... ..... 0101010101 -   @X
+LWAUX           011111 ..... ..... ..... 0101110101 -   @X
+
+LD              111010 ..... ..... ..............00     @DS
+LDU             111010 ..... ..... ..............01     @DS
+LDX             011111 ..... ..... ..... 0000010101 -   @X
+LDUX            011111 ..... ..... ..... 0000110101 -   @X
+
+LQ              111000 ..... ..... ............ ----    @DQ_rtp
+
+### Fixed-Point Store Instructions
+
+STB             100110 ..... ..... ................     @D
+STBU            100111 ..... ..... ................     @D
+STBX            011111 ..... ..... ..... 0011010111 -   @X
+STBUX           011111 ..... ..... ..... 0011110111 -   @X
+
+STH             101100 ..... ..... ................     @D
+STHU            101101 ..... ..... ................     @D
+STHX            011111 ..... ..... ..... 0110010111 -   @X
+STHUX           011111 ..... ..... ..... 0110110111 -   @X
+
+STW             100100 ..... ..... ................     @D
+STWU            100101 ..... ..... ................     @D
+STWX            011111 ..... ..... ..... 0010010111 -   @X
+STWUX           011111 ..... ..... ..... 0010110111 -   @X
+
+STD             111110 ..... ..... ..............00     @DS
+STDU            111110 ..... ..... ..............01     @DS
+STDX            011111 ..... ..... ..... 0010010101 -   @X
+STDUX           011111 ..... ..... ..... 0010110101 -   @X
+
+STQ             111110 ..... ..... ..............10     @DS_rtp
+
+### Fixed-Point Compare Instructions
+
+CMP             011111 ... - . ..... ..... 0000000000 - @X_bfl
+CMPL            011111 ... - . ..... ..... 0000100000 - @X_bfl
+CMPI            001011 ... - . ..... ................   @D_bfs
+CMPLI           001010 ... - . ..... ................   @D_bfu
+
+### Fixed-Point Arithmetic Instructions
+
+ADDI            001110 ..... ..... ................     @D
+ADDIS           001111 ..... ..... ................     @D
+
+ADDPCIS         010011 ..... ..... .......... 00010 .   @DX
+
+## Fixed-Point Logical Instructions
+
+CFUGED          011111 ..... ..... ..... 0011011100 -   @X
+CNTLZDM         011111 ..... ..... ..... 0000111011 -   @X
+CNTTZDM         011111 ..... ..... ..... 1000111011 -   @X
+PDEPD           011111 ..... ..... ..... 0010011100 -   @X
+PEXTD           011111 ..... ..... ..... 0010111100 -   @X
+
+### Float-Point Load Instructions
+
+LFS             110000 ..... ..... ................     @D
+LFSU            110001 ..... ..... ................     @D
+LFSX            011111 ..... ..... ..... 1000010111 -   @X
+LFSUX           011111 ..... ..... ..... 1000110111 -   @X
+
+LFD             110010 ..... ..... ................     @D
+LFDU            110011 ..... ..... ................     @D
+LFDX            011111 ..... ..... ..... 1001010111 -   @X
+LFDUX           011111 ..... ..... ..... 1001110111 -   @X
+
+### Float-Point Store Instructions
+
+STFS            110100 ..... ...... ...............     @D
+STFSU           110101 ..... ...... ...............     @D
+STFSX           011111 ..... ...... .... 1010010111 -   @X
+STFSUX          011111 ..... ...... .... 1010110111 -   @X
+
+STFD            110110 ..... ...... ...............     @D
+STFDU           110111 ..... ...... ...............     @D
+STFDX           011111 ..... ...... .... 1011010111 -   @X
+STFDUX          011111 ..... ...... .... 1011110111 -   @X
+
+### Move To/From System Register Instructions
+
+SETBC           011111 ..... ..... ----- 0110000000 -   @X_bi
+SETBCR          011111 ..... ..... ----- 0110100000 -   @X_bi
+SETNBC          011111 ..... ..... ----- 0111000000 -   @X_bi
+SETNBCR         011111 ..... ..... ----- 0111100000 -   @X_bi
+
+### Decimal Floating-Point Arithmetic Instructions
+
+DADD            111011 ..... ..... ..... 0000000010 .   @X_rc
+DADDQ           111111 ..... ..... ..... 0000000010 .   @X_tp_ap_bp_rc
+
+DSUB            111011 ..... ..... ..... 1000000010 .   @X_rc
+DSUBQ           111111 ..... ..... ..... 1000000010 .   @X_tp_ap_bp_rc
+
+DMUL            111011 ..... ..... ..... 0000100010 .   @X_rc
+DMULQ           111111 ..... ..... ..... 0000100010 .   @X_tp_ap_bp_rc
+
+DDIV            111011 ..... ..... ..... 1000100010 .   @X_rc
+DDIVQ           111111 ..... ..... ..... 1000100010 .   @X_tp_ap_bp_rc
+
+### Decimal Floating-Point Compare Instructions
+
+DCMPU           111011 ... -- ..... ..... 1010000010 -  @X_bf
+DCMPUQ          111111 ... -- ..... ..... 1010000010 -  @X_bf_ap_bp
+
+DCMPO           111011 ... -- ..... ..... 0010000010 -  @X_bf
+DCMPOQ          111111 ... -- ..... ..... 0010000010 -  @X_bf_ap_bp
+
+### Decimal Floating-Point Test Instructions
+
+DTSTDC          111011 ... -- ..... ...... 011000010 -  @Z22_bf_fra
+DTSTDCQ         111111 ... -- ..... ...... 011000010 -  @Z22_bf_frap
+
+DTSTDG          111011 ... -- ..... ...... 011100010 -  @Z22_bf_fra
+DTSTDGQ         111111 ... -- ..... ...... 011100010 -  @Z22_bf_frap
+
+DTSTEX          111011 ... -- ..... ..... 0010100010 -  @X_bf
+DTSTEXQ         111111 ... -- ..... ..... 0010100010 -  @X_bf_ap_bp
+
+DTSTSF          111011 ... -- ..... ..... 1010100010 -  @X_bf
+DTSTSFQ         111111 ... -- ..... ..... 1010100010 -  @X_bf_a_bp
+
+DTSTSFI         111011 ... - ...... ..... 1010100011 -  @X_bf_uim
+DTSTSFIQ        111111 ... - ...... ..... 1010100011 -  @X_bf_uim_bp
+
+### Decimal Floating-Point Quantum Adjustment Instructions
+
+DQUAI           111011 ..... ..... ..... .. 01000011 .  @Z23_te_tb
+DQUAIQ          111111 ..... ..... ..... .. 01000011 .  @Z23_te_tbp
+
+DQUA            111011 ..... ..... ..... .. 00000011 .  @Z23_tab
+DQUAQ           111111 ..... ..... ..... .. 00000011 .  @Z23_tabp
+
+DRRND           111011 ..... ..... ..... .. 00100011 .  @Z23_tab
+DRRNDQ          111111 ..... ..... ..... .. 00100011 .  @Z23_tp_a_bp
+
+DRINTX          111011 ..... ---- . ..... .. 01100011 . @Z23_tb
+DRINTXQ         111111 ..... ---- . ..... .. 01100011 . @Z23_tbp
+
+DRINTN          111011 ..... ---- . ..... .. 11100011 . @Z23_tb
+DRINTNQ         111111 ..... ---- . ..... .. 11100011 . @Z23_tbp
+
+### Decimal Floating-Point Conversion Instructions
+
+DCTDP           111011 ..... ----- ..... 0100000010 .   @X_tb_rc
+DCTQPQ          111111 ..... ----- ..... 0100000010 .   @X_tp_b_rc
+
+DRSP            111011 ..... ----- ..... 1100000010 .   @X_tb_rc
+DRDPQ           111111 ..... ----- ..... 1100000010 .   @X_tbp_rc
+
+DCFFIX          111011 ..... ----- ..... 1100100010 .   @X_tb_rc
+DCFFIXQ         111111 ..... ----- ..... 1100100010 .   @X_tp_b_rc
+DCFFIXQQ        111111 ..... 00000 ..... 1111100010 -   @X_frtp_vrb
+
+DCTFIX          111011 ..... ----- ..... 0100100010 .   @X_tb_rc
+DCTFIXQ         111111 ..... ----- ..... 0100100010 .   @X_t_bp_rc
+DCTFIXQQ        111111 ..... 00001 ..... 1111100010 -   @X_vrt_frbp
+
+### Decimal Floating-Point Format Instructions
+
+DDEDPD          111011 ..... .. --- ..... 0101000010 .  @X_tb_sp_rc
+DDEDPDQ         111111 ..... .. --- ..... 0101000010 .  @X_tbp_sp_rc
+
+DENBCD          111011 ..... . ---- ..... 1101000010 .  @X_tb_s_rc
+DENBCDQ         111111 ..... . ---- ..... 1101000010 .  @X_tbp_s_rc
+
+DXEX            111011 ..... ----- ..... 0101100010 .   @X_tb_rc
+DXEXQ           111111 ..... ----- ..... 0101100010 .   @X_t_bp_rc
+
+DIEX            111011 ..... ..... ..... 1101100010 .   @X_rc
+DIEXQ           111111 ..... ..... ..... 1101100010 .   @X_tp_a_bp_rc
+
+DSCLI           111011 ..... ..... ...... 001000010 .   @Z22_ta_sh_rc
+DSCLIQ          111111 ..... ..... ...... 001000010 .   @Z22_tap_sh_rc
+
+DSCRI           111011 ..... ..... ...... 001100010 .   @Z22_ta_sh_rc
+DSCRIQ          111111 ..... ..... ...... 001100010 .   @Z22_tap_sh_rc
+
+## Vector Bit Manipulation Instruction
+
+VCFUGED         000100 ..... ..... ..... 10101001101    @VX
+VCLZDM          000100 ..... ..... ..... 11110000100    @VX
+VCTZDM          000100 ..... ..... ..... 11111000100    @VX
+VPDEPD          000100 ..... ..... ..... 10111001101    @VX
+VPEXTD          000100 ..... ..... ..... 10110001101    @VX
+
+## Vector Permute and Formatting Instruction
+
+VEXTDUBVLX      000100 ..... ..... ..... ..... 011000   @VA
+VEXTDUBVRX      000100 ..... ..... ..... ..... 011001   @VA
+VEXTDUHVLX      000100 ..... ..... ..... ..... 011010   @VA
+VEXTDUHVRX      000100 ..... ..... ..... ..... 011011   @VA
+VEXTDUWVLX      000100 ..... ..... ..... ..... 011100   @VA
+VEXTDUWVRX      000100 ..... ..... ..... ..... 011101   @VA
+VEXTDDVLX       000100 ..... ..... ..... ..... 011110   @VA
+VEXTDDVRX       000100 ..... ..... ..... ..... 011111   @VA
+
+VINSERTB        000100 ..... - .... ..... 01100001101   @VX_uim4
+VINSERTH        000100 ..... - .... ..... 01101001101   @VX_uim4
+VINSERTW        000100 ..... - .... ..... 01110001101   @VX_uim4
+VINSERTD        000100 ..... - .... ..... 01111001101   @VX_uim4
+
+VINSBLX         000100 ..... ..... ..... 01000001111    @VX
+VINSBRX         000100 ..... ..... ..... 01100001111    @VX
+VINSHLX         000100 ..... ..... ..... 01001001111    @VX
+VINSHRX         000100 ..... ..... ..... 01101001111    @VX
+VINSWLX         000100 ..... ..... ..... 01010001111    @VX
+VINSWRX         000100 ..... ..... ..... 01110001111    @VX
+VINSDLX         000100 ..... ..... ..... 01011001111    @VX
+VINSDRX         000100 ..... ..... ..... 01111001111    @VX
+
+VINSW           000100 ..... - .... ..... 00011001111   @VX_uim4
+VINSD           000100 ..... - .... ..... 00111001111   @VX_uim4
+
+VINSBVLX        000100 ..... ..... ..... 00000001111    @VX
+VINSBVRX        000100 ..... ..... ..... 00100001111    @VX
+VINSHVLX        000100 ..... ..... ..... 00001001111    @VX
+VINSHVRX        000100 ..... ..... ..... 00101001111    @VX
+VINSWVLX        000100 ..... ..... ..... 00010001111    @VX
+VINSWVRX        000100 ..... ..... ..... 00110001111    @VX
+
+VSLDBI          000100 ..... ..... ..... 00 ... 010110  @VN
+VSRDBI          000100 ..... ..... ..... 01 ... 010110  @VN
+
+# VSX Load/Store Instructions
+
+LXV             111101 ..... ..... ............ . 001   @DQ_TSX
+STXV            111101 ..... ..... ............ . 101   @DQ_TSX
+LXVP            000110 ..... ..... ............ 0000    @DQ_TSXP
+STXVP           000110 ..... ..... ............ 0001    @DQ_TSXP
+LXVX            011111 ..... ..... ..... 0100 - 01100 . @X_TSX
+STXVX           011111 ..... ..... ..... 0110001100 .   @X_TSX
+LXVPX           011111 ..... ..... ..... 0101001101 -   @X_TSXP
+STXVPX          011111 ..... ..... ..... 0111001101 -   @X_TSXP
+
+## VSX splat instruction
+
+XXSPLTIB        111100 ..... 00 ........ 0101101000 .   @X_imm8
+XXSPLTW         111100 ..... ---.. ..... 010100100 . .  @XX2
+
+## VSX Vector Load Special Value Instruction
+
+LXVKQ           111100 ..... 11111 ..... 0101101000 .   @X_uim5
diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode
new file mode 100644
index 00000000000..39e610913df
--- /dev/null
+++ b/target/ppc/insn64.decode
@@ -0,0 +1,196 @@
+#
+# Power ISA decode for 64-bit prefixed insns (opcode space 0 and 1)
+#
+# Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+#
+
+# Format MLS:D and 8LS:D
+&PLS_D          rt ra si:int64_t r:bool
+%pls_si         32:s18 0:16
+@PLS_D          ...... .. ... r:1 .. .................. \
+                ...... rt:5 ra:5 ................       \
+                &PLS_D si=%pls_si
+@8LS_D_TSX      ...... .. . .. r:1 .. .................. \
+                ..... rt:6 ra:5 ................         \
+                &PLS_D si=%pls_si
+
+%rt_tsxp        21:1 22:4 !function=times_2
+@8LS_D_TSXP     ...... .. . .. r:1 .. .................. \
+                ...... ..... ra:5 ................       \
+                &PLS_D si=%pls_si rt=%rt_tsxp
+
+# Format 8RR:D
+%8rr_si         32:s16 0:16
+%8rr_xt         16:1 21:5
+&8RR_D_IX       xt ix si
+@8RR_D_IX       ...... .. .... .. .. ................ \
+                ...... ..... ... ix:1 . ................ \
+                &8RR_D_IX si=%8rr_si xt=%8rr_xt
+&8RR_D          xt si:int32_t
+@8RR_D          ...... .. .... .. .. ................ \
+                ...... ..... ....  . ................ \
+                &8RR_D si=%8rr_si xt=%8rr_xt
+
+# Format XX4
+&XX4            xt xa xb xc
+%xx4_xt         0:1 21:5
+%xx4_xa         2:1 16:5
+%xx4_xb         1:1 11:5
+%xx4_xc         3:1  6:5
+@XX4            ........ ........ ........ ........ \
+                ...... ..... ..... ..... ..... .. .... \
+                &XX4 xt=%xx4_xt xa=%xx4_xa xb=%xx4_xb xc=%xx4_xc
+
+### Fixed-Point Load Instructions
+
+PLBZ            000001 10 0--.-- .................. \
+                100010 ..... ..... ................     @PLS_D
+PLHZ            000001 10 0--.-- .................. \
+                101000 ..... ..... ................     @PLS_D
+PLHA            000001 10 0--.-- .................. \
+                101010 ..... ..... ................     @PLS_D
+PLWZ            000001 10 0--.-- .................. \
+                100000 ..... ..... ................     @PLS_D
+PLWA            000001 00 0--.-- .................. \
+                101001 ..... ..... ................     @PLS_D
+PLD             000001 00 0--.-- .................. \
+                111001 ..... ..... ................     @PLS_D
+PLQ             000001 00 0--.-- .................. \
+                111000 ..... ..... ................     @PLS_D
+
+### Fixed-Point Store Instructions
+
+PSTW            000001 10 0--.-- .................. \
+                100100 ..... ..... ................     @PLS_D
+PSTB            000001 10 0--.-- .................. \
+                100110 ..... ..... ................     @PLS_D
+PSTH            000001 10 0--.-- .................. \
+                101100 ..... ..... ................     @PLS_D
+
+PSTD            000001 00 0--.-- .................. \
+                111101 ..... ..... ................     @PLS_D
+PSTQ            000001 00 0--.-- .................. \
+                111100 ..... ..... ................     @PLS_D
+
+### Fixed-Point Arithmetic Instructions
+
+PADDI           000001 10 0--.-- ..................     \
+                001110 ..... ..... ................     @PLS_D
+
+### Float-Point Load and Store Instructions
+
+PLFS            000001 10 0--.-- .................. \
+                110000 ..... ..... ................     @PLS_D
+PLFD            000001 10 0--.-- .................. \
+                110010 ..... ..... ................     @PLS_D
+PSTFS           000001 10 0--.-- .................. \
+                110100 ..... ..... ................     @PLS_D
+PSTFD           000001 10 0--.-- .................. \
+                110110 ..... ..... ................     @PLS_D
+
+### Prefixed No-operation Instruction
+
+@PNOP           000001 11 0000-- 000000000000000000     \
+                ................................
+
+{
+  [
+    ## Invalid suffixes: Branch instruction
+    # bc[l][a]
+    INVALID     ................................        \
+                010000--------------------------        @PNOP
+    # b[l][a]
+    INVALID     ................................        \
+                010010--------------------------        @PNOP
+    # bclr[l]
+    INVALID     ................................        \
+                010011---------------0000010000-        @PNOP
+    # bcctr[l]
+    INVALID     ................................        \
+                010011---------------1000010000-        @PNOP
+    # bctar[l]
+    INVALID     ................................        \
+                010011---------------1000110000-        @PNOP
+
+    ## Invalid suffixes: rfebb
+    INVALID     ................................        \
+                010011---------------0010010010-        @PNOP
+
+    ## Invalid suffixes: context synchronizing other than isync
+    # sc
+    INVALID     ................................        \
+                010001------------------------1-        @PNOP
+    # scv
+    INVALID     ................................        \
+                010001------------------------01        @PNOP
+    # rfscv
+    INVALID     ................................        \
+                010011---------------0001010010-        @PNOP
+    # rfid
+    INVALID     ................................        \
+                010011---------------0000010010-        @PNOP
+    # hrfid
+    INVALID     ................................        \
+                010011---------------0100010010-        @PNOP
+    # urfid
+    INVALID     ................................        \
+                010011---------------0100110010-        @PNOP
+    # stop
+    INVALID     ................................        \
+                010011---------------0101110010-        @PNOP
+    # mtmsr w/ L=0
+    INVALID     ................................        \
+                011111---------0-----0010010010-        @PNOP
+    # mtmsrd w/ L=0
+    INVALID     ................................        \
+                011111---------0-----0010110010-        @PNOP
+
+    ## Invalid suffixes: Service Processor Attention
+    INVALID     ................................        \
+                000000----------------100000000-        @PNOP
+  ]
+
+  ## Valid suffixes
+  PNOP          ................................        \
+                --------------------------------        @PNOP
+}
+
+### VSX instructions
+
+PLXV            000001 00 0--.-- .................. \
+                11001 ...... ..... ................     @8LS_D_TSX
+PSTXV           000001 00 0--.-- .................. \
+                11011 ...... ..... ................     @8LS_D_TSX
+PLXVP           000001 00 0--.-- .................. \
+                111010 ..... ..... ................     @8LS_D_TSXP
+PSTXVP          000001 00 0--.-- .................. \
+                111110 ..... ..... ................     @8LS_D_TSXP
+
+XXSPLTIDP       000001 01 0000 -- -- ................ \
+                100000 ..... 0010 . ................    @8RR_D
+XXSPLTIW        000001 01 0000 -- -- ................ \
+                100000 ..... 0011 . ................    @8RR_D
+XXSPLTI32DX     000001 01 0000 -- -- ................ \
+                100000 ..... 000 .. ................    @8RR_D_IX
+
+XXBLENDVD       000001 01 0000 -- ------------------ \
+                100001 ..... ..... ..... ..... 11 ....  @XX4
+XXBLENDVW       000001 01 0000 -- ------------------ \
+                100001 ..... ..... ..... ..... 10 ....  @XX4
+XXBLENDVH       000001 01 0000 -- ------------------ \
+                100001 ..... ..... ..... ..... 01 ....  @XX4
+XXBLENDVB       000001 01 0000 -- ------------------ \
+                100001 ..... ..... ..... ..... 00 ....  @XX4
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 429de28494f..9bc327bcba5 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -22,6 +22,7 @@
 #include "internal.h"
 #include "qemu/host-utils.h"
 #include "qemu/main-loop.h"
+#include "qemu/log.h"
 #include "exec/helper-proto.h"
 #include "crypto/aes.h"
 #include "fpu/softfloat.h"
@@ -103,10 +104,11 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
     uint64_t rt = 0;
     int overflow = 0;
 
-    overflow = divu128(&rt, &ra, rb);
-
-    if (unlikely(overflow)) {
+    if (unlikely(rb == 0 || ra >= rb)) {
+        overflow = 1;
         rt = 0; /* Undefined */
+    } else {
+        divu128(&rt, &ra, rb);
     }
 
     if (oe) {
@@ -118,13 +120,16 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 
 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 {
-    int64_t rt = 0;
+    uint64_t rt = 0;
     int64_t ra = (int64_t)rau;
     int64_t rb = (int64_t)rbu;
-    int overflow = divs128(&rt, &ra, rb);
+    int overflow = 0;
 
-    if (unlikely(overflow)) {
+    if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
+        overflow = 1;
         rt = 0; /* Undefined */
+    } else {
+        divs128(&rt, &ra, rb);
     }
 
     if (oe) {
@@ -319,6 +324,104 @@ target_ulong helper_popcntb(target_ulong val)
 }
 #endif
 
+uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
+{
+    /*
+     * Instead of processing the mask bit-by-bit from the most significant to
+     * the least significant bit, as described in PowerISA, we'll handle it in
+     * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
+     * ctz or cto, we negate the mask at the end of the loop.
+     */
+    target_ulong m, left = 0, right = 0;
+    unsigned int n, i = 64;
+    bool bit = false; /* tracks if we are processing zeros or ones */
+
+    if (mask == 0 || mask == -1) {
+        return src;
+    }
+
+    /* Processes the mask in blocks, from LSB to MSB */
+    while (i) {
+        /* Find how many bits we should take */
+        n = ctz64(mask);
+        if (n > i) {
+            n = i;
+        }
+
+        /*
+         * Extracts 'n' trailing bits of src and put them on the leading 'n'
+         * bits of 'right' or 'left', pushing down the previously extracted
+         * values.
+         */
+        m = (1ll << n) - 1;
+        if (bit) {
+            right = ror64(right | (src & m), n);
+        } else {
+            left = ror64(left | (src & m), n);
+        }
+
+        /*
+         * Discards the processed bits from 'src' and 'mask'. Note that we are
+         * removing 'n' trailing zeros from 'mask', but the logical shift will
+         * add 'n' leading zeros back, so the population count of 'mask' is kept
+         * the same.
+         */
+        src >>= n;
+        mask >>= n;
+        i -= n;
+        bit = !bit;
+        mask = ~mask;
+    }
+
+    /*
+     * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
+     * we'll shift it more 64-ctpop(mask) times.
+     */
+    if (bit) {
+        n = ctpop64(mask);
+    } else {
+        n = 64 - ctpop64(mask);
+    }
+
+    return left | (right >> n);
+}
+
+uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
+{
+    int i, o;
+    uint64_t result = 0;
+
+    if (mask == -1) {
+        return src;
+    }
+
+    for (i = 0; mask != 0; i++) {
+        o = ctz64(mask);
+        mask &= mask - 1;
+        result |= ((src >> i) & 1) << o;
+    }
+
+    return result;
+}
+
+uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
+{
+    int i, o;
+    uint64_t result = 0;
+
+    if (mask == -1) {
+        return src;
+    }
+
+    for (o = 0; mask != 0; o++) {
+        i = ctz64(mask);
+        mask &= mask - 1;
+        result |= ((src >> i) & 1) << o;
+    }
+
+    return result;
+}
+
 /*****************************************************************************/
 /* PowerPC 601 specific instructions (POWER bridge) */
 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
@@ -461,17 +564,12 @@ SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 
 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 {
-    env->vscr = vscr & ~(1u << VSCR_SAT);
-    /* Which bit we set is completely arbitrary, but clear the rest.  */
-    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
-    env->vscr_sat.u64[1] = 0;
-    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+    ppc_store_vscr(env, vscr);
 }
 
 uint32_t helper_mfvscr(CPUPPCState *env)
 {
-    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
-    return env->vscr | (sat << VSCR_SAT);
+    return ppc_get_vscr(env);
 }
 
 static inline void set_vscr_sat(CPUPPCState *env)
@@ -1434,34 +1532,16 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     }
 }
 
-#if defined(HOST_WORDS_BIGENDIAN)
-#define VEXTU_X_DO(name, size, left)                                \
-    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
-    {                                                               \
-        int index;                                                  \
-        if (left) {                                                 \
-            index = (a & 0xf) * 8;                                  \
-        } else {                                                    \
-            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
-        }                                                           \
-        return int128_getlo(int128_rshift(b->s128, index)) &        \
-            MAKE_64BIT_MASK(0, size);                               \
-    }
-#else
-#define VEXTU_X_DO(name, size, left)                                \
-    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
-    {                                                               \
-        int index;                                                  \
-        if (left) {                                                 \
-            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
-        } else {                                                    \
-            index = (a & 0xf) * 8;                                  \
-        }                                                           \
-        return int128_getlo(int128_rshift(b->s128, index)) &        \
-            MAKE_64BIT_MASK(0, size);                               \
-    }
-#endif
-
+#define VEXTU_X_DO(name, size, left)                            \
+target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
+{                                                               \
+    int index = (a & 0xf) * 8;                                  \
+    if (left) {                                                 \
+        index = 128 - index - size;                             \
+    }                                                           \
+    return int128_getlo(int128_rshift(b->s128, index)) &        \
+        MAKE_64BIT_MASK(0, size);                               \
+}
 VEXTU_X_DO(vextublx,  8, 1)
 VEXTU_X_DO(vextuhlx, 16, 1)
 VEXTU_X_DO(vextuwlx, 32, 1)
@@ -1533,25 +1613,73 @@ void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 }
 
 #if defined(HOST_WORDS_BIGENDIAN)
-#define VINSERT(suffix, element)                                            \
-    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
-    {                                                                       \
-        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])],           \
-               sizeof(r->element[0]));                                      \
-    }
+#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
 #else
-#define VINSERT(suffix, element)                                            \
-    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
-    {                                                                       \
-        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
-        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
-    }
+#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
 #endif
-VINSERT(b, u8)
-VINSERT(h, u16)
-VINSERT(w, u32)
-VINSERT(d, u64)
-#undef VINSERT
+
+#define VINSX(SUFFIX, TYPE) \
+void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
+                                         uint64_t val, target_ulong index)     \
+{                                                                              \
+    const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
+    target_long idx = index;                                                   \
+                                                                               \
+    if (idx < 0 || idx > maxidx) {                                             \
+        idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
+        qemu_log_mask(LOG_GUEST_ERROR,                                         \
+            "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx   \
+            ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx);         \
+    } else {                                                                   \
+        TYPE src = val;                                                        \
+        memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE));           \
+    }                                                                          \
+}
+VINSX(B, uint8_t)
+VINSX(H, uint16_t)
+VINSX(W, uint32_t)
+VINSX(D, uint64_t)
+#undef ELEM_ADDR
+#undef VINSX
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VEXTDVLX(NAME, SIZE) \
+void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
+                   target_ulong index)                                         \
+{                                                                              \
+    const target_long idx = index;                                             \
+    ppc_avr_t tmp[2] = { *a, *b };                                             \
+    memset(t, 0, sizeof(*t));                                                  \
+    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
+        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
+    } else {                                                                   \
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
+                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
+                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
+    }                                                                          \
+}
+#else
+#define VEXTDVLX(NAME, SIZE) \
+void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
+                   target_ulong index)                                         \
+{                                                                              \
+    const target_long idx = index;                                             \
+    ppc_avr_t tmp[2] = { *b, *a };                                             \
+    memset(t, 0, sizeof(*t));                                                  \
+    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
+        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2],                                  \
+               (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE);                  \
+    } else {                                                                   \
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
+                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
+                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
+    }                                                                          \
+}
+#endif
+VEXTDVLX(VEXTDUBVLX, 1)
+VEXTDVLX(VEXTDUHVLX, 2)
+VEXTDVLX(VEXTDUWVLX, 4)
+VEXTDVLX(VEXTDDVLX, 8)
+#undef VEXTDVLX
 #if defined(HOST_WORDS_BIGENDIAN)
 #define VEXTRACT(suffix, element)                                            \
     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
@@ -1609,6 +1737,21 @@ void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
     *xt = t;
 }
 
+#define XXBLEND(name, sz) \
+void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b,  \
+                                 ppc_avr_t *c, uint32_t desc)               \
+{                                                                           \
+    for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) {                  \
+        t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ?               \
+            b->glue(u, sz)[i] : a->glue(u, sz)[i];                          \
+    }                                                                       \
+}
+XXBLEND(B, 8)
+XXBLEND(H, 16)
+XXBLEND(W, 32)
+XXBLEND(D, 64)
+#undef XXBLEND
+
 #define VEXT_SIGNED(name, element, cast)                            \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
 {                                                                   \
@@ -2440,40 +2583,76 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
     return cr;
 }
 
+/**
+ * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
+ *
+ * Returns:
+ * > 0 if ahi|alo > bhi|blo,
+ * 0 if ahi|alo == bhi|blo,
+ * < 0 if ahi|alo < bhi|blo
+ */
+static inline int ucmp128(uint64_t alo, uint64_t ahi,
+                          uint64_t blo, uint64_t bhi)
+{
+    return (ahi == bhi) ?
+        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
+        (ahi > bhi ? 1 : -1);
+}
+
 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
 {
     int i;
-    int cr = 0;
+    int cr;
     uint64_t lo_value;
     uint64_t hi_value;
+    uint64_t rem;
     ppc_avr_t ret = { .u64 = { 0, 0 } };
 
     if (b->VsrSD(0) < 0) {
         lo_value = -b->VsrSD(1);
         hi_value = ~b->VsrD(0) + !lo_value;
         bcd_put_digit(&ret, 0xD, 0);
+
+        cr = CRF_LT;
     } else {
         lo_value = b->VsrD(1);
         hi_value = b->VsrD(0);
         bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
-    }
 
-    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
-            lo_value > 9999999999999999ULL) {
-        cr = CRF_SO;
+        if (hi_value == 0 && lo_value == 0) {
+            cr = CRF_EQ;
+        } else {
+            cr = CRF_GT;
+        }
     }
 
-    for (i = 1; i < 16; hi_value /= 10, i++) {
-        bcd_put_digit(&ret, hi_value % 10, i);
-    }
+    /*
+     * Check src limits: abs(src) <= 10^31 - 1
+     *
+     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
+     */
+    if (ucmp128(lo_value, hi_value,
+                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
+        cr |= CRF_SO;
 
-    for (; i < 32; lo_value /= 10, i++) {
-        bcd_put_digit(&ret, lo_value % 10, i);
-    }
+        /*
+         * According to the ISA, if src wouldn't fit in the destination
+         * register, the result is undefined.
+         * In that case, we leave r unchanged.
+         */
+    } else {
+        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
 
-    cr |= bcd_cmp_zero(&ret);
+        for (i = 1; i < 16; rem /= 10, i++) {
+            bcd_put_digit(&ret, rem % 10, i);
+        }
 
-    *r = ret;
+        for (; i < 32; lo_value /= 10, i++) {
+            bcd_put_digit(&ret, lo_value % 10, i);
+        }
+
+        *r = ret;
+    }
 
     return cr;
 }
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index d547448065a..6aa9484f34a 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -211,8 +211,84 @@ void helper_compute_fprf_float16(CPUPPCState *env, float16 arg);
 void helper_compute_fprf_float32(CPUPPCState *env, float32 arg);
 void helper_compute_fprf_float128(CPUPPCState *env, float128 arg);
 
-/* Raise a data fault alignment exception for the specified virtual address */
+/* translate.c */
+
+int ppc_fixup_cpu(PowerPCCPU *cpu);
+void create_ppc_opcodes(PowerPCCPU *cpu, Error **errp);
+void destroy_ppc_opcodes(PowerPCCPU *cpu);
+
+/* gdbstub.c */
+void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *ppc);
+gchar *ppc_gdb_arch_name(CPUState *cs);
+
+/**
+ * prot_for_access_type:
+ * @access_type: Access type
+ *
+ * Return the protection bit required for the given access type.
+ */
+static inline int prot_for_access_type(MMUAccessType access_type)
+{
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        return PAGE_EXEC;
+    case MMU_DATA_LOAD:
+        return PAGE_READ;
+    case MMU_DATA_STORE:
+        return PAGE_WRITE;
+    }
+    g_assert_not_reached();
+}
+
+/* PowerPC MMU emulation */
+
+typedef struct mmu_ctx_t mmu_ctx_t;
+bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp,
+                      int mmu_idx, bool guest_visible);
+int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                                     target_ulong eaddr,
+                                     MMUAccessType access_type, int type,
+                                     int mmu_idx);
+/* Software driven TLB helpers */
+int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
+                                    int way, int is_code);
+/* Context used internally during MMU translations */
+struct mmu_ctx_t {
+    hwaddr raddr;      /* Real address              */
+    hwaddr eaddr;      /* Effective address         */
+    int prot;                      /* Protection bits           */
+    hwaddr hash[2];    /* Pagetable hash values     */
+    target_ulong ptem;             /* Virtual segment ID | API  */
+    int key;                       /* Access key                */
+    int nx;                        /* Non-execute area          */
+};
+
+/* Common routines used by software and hardware TLBs emulation */
+static inline int pte_is_valid(target_ulong pte0)
+{
+    return pte0 & 0x80000000 ? 1 : 0;
+}
+
+static inline void pte_invalidate(target_ulong *pte0)
+{
+    *pte0 &= ~0x80000000;
+}
+
+#define PTE_PTEM_MASK 0x7FFFFFBF
+#define PTE_CHECK_MASK (TARGET_PAGE_MASK | 0x7B)
+
+#ifdef CONFIG_USER_ONLY
+void ppc_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t ra);
+#else
+bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr);
 void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                 MMUAccessType access_type,
-                                 int mmu_idx, uintptr_t retaddr);
+                                 MMUAccessType access_type, int mmu_idx,
+                                 uintptr_t retaddr) QEMU_NORETURN;
+#endif
+
 #endif /* PPC_INTERNAL_H */
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 104a308abb5..dc93b99189e 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist;
 static int cap_ppc_nested_kvm_hv;
 static int cap_large_decr;
 static int cap_fwnmi;
+static int cap_rpt_invalidate;
 
 static uint32_t debug_inst_opcode;
 
@@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         exit(1);
     }
 
+    cap_rpt_invalidate = kvm_vm_check_extension(s, KVM_CAP_PPC_RPT_INVALIDATE);
     kvm_ppc_register_host_cpu_type();
 
     return 0;
@@ -2040,6 +2042,11 @@ void kvmppc_enable_h_page_init(void)
     kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
 }
 
+void kvmppc_enable_h_rpt_invalidate(void)
+{
+    kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE);
+}
+
 void kvmppc_set_papr(PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -2551,6 +2558,11 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
     return 0;
 }
 
+int kvmppc_has_cap_rpt_invalidate(void)
+{
+    return cap_rpt_invalidate;
+}
+
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
 {
     uint32_t host_pvr = mfpvr();
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 989f61ace03..ee9325bf9ae 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -24,6 +24,7 @@ void kvmppc_enable_logical_ci_hcalls(void);
 void kvmppc_enable_set_mode_hcall(void);
 void kvmppc_enable_clear_ref_mod_hcalls(void);
 void kvmppc_enable_h_page_init(void);
+void kvmppc_enable_h_rpt_invalidate(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
@@ -71,6 +72,7 @@ bool kvmppc_has_cap_nested_kvm_hv(void);
 int kvmppc_set_cap_nested_kvm_hv(int enable);
 int kvmppc_get_cap_large_decr(void);
 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
+int kvmppc_has_cap_rpt_invalidate(void);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
@@ -150,6 +152,11 @@ static inline void kvmppc_enable_h_page_init(void)
 {
 }
 
+static inline void kvmppc_enable_h_rpt_invalidate(void)
+{
+    g_assert_not_reached();
+}
+
 static inline void kvmppc_set_papr(PowerPCCPU *cpu)
 {
 }
@@ -381,6 +388,11 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
     return -1;
 }
 
+static inline int kvmppc_has_cap_rpt_invalidate(void)
+{
+    return false;
+}
+
 static inline int kvmppc_enable_hwrng(void)
 {
     return -1;
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 283db1d28af..93972df58ea 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -8,7 +8,18 @@
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
 #include "kvm_ppc.h"
-#include "exec/helper-proto.h"
+
+static void post_load_update_msr(CPUPPCState *env)
+{
+    target_ulong msr = env->msr;
+
+    /*
+     * Invalidate all supported msr bits except MSR_TGPR/MSR_HVB
+     * before restoring.  Note that this recomputes hflags.
+     */
+    env->msr ^= env->msr_mask & ~((1ULL << MSR_TGPR) | MSR_HVB);
+    ppc_store_msr(env, msr);
+}
 
 static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
 {
@@ -95,7 +106,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
         ppc_store_sdr1(env, sdr1);
     }
     qemu_get_be32s(f, &vscr);
-    helper_mtvscr(env, vscr);
+    ppc_store_vscr(env, vscr);
     qemu_get_be64s(f, &env->spe_acc);
     qemu_get_be32s(f, &env->spe_fscr);
     qemu_get_betls(f, &env->msr_mask);
@@ -111,13 +122,12 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
     qemu_get_betls(f, &env->ivpr_mask);
     qemu_get_betls(f, &env->hreset_vector);
     qemu_get_betls(f, &env->nip);
-    qemu_get_betls(f, &env->hflags);
-    qemu_get_betls(f, &env->hflags_nmsr);
+    qemu_get_sbetl(f); /* Discard unused hflags */
+    qemu_get_sbetl(f); /* Discard unused hflags_nmsr */
     qemu_get_sbe32(f); /* Discard unused mmu_idx */
     qemu_get_sbe32(f); /* Discard unused power_mode */
 
-    /* Recompute mmu indices */
-    hreg_compute_mem_idx(env);
+    post_load_update_msr(env);
 
     return 0;
 }
@@ -304,6 +314,10 @@ static int cpu_pre_save(void *opaque)
         }
     }
 
+    /* Retain migration compatibility for pre 6.0 for 601 machines. */
+    env->hflags_compat_nmsr = (env->flags & POWERPC_FLAG_HID0_LE
+                               ? env->hflags & MSR_LE : 0);
+
     return 0;
 }
 
@@ -333,7 +347,6 @@ static int cpu_post_load(void *opaque, int version_id)
     PowerPCCPU *cpu = opaque;
     CPUPPCState *env = &cpu->env;
     int i;
-    target_ulong msr;
 
     /*
      * If we're operating in compat mode, we should be ok as long as
@@ -407,15 +420,7 @@ static int cpu_post_load(void *opaque, int version_id)
         ppc_store_sdr1(env, env->spr[SPR_SDR1]);
     }
 
-    /*
-     * Invalidate all supported msr bits except MSR_TGPR/MSR_HVB
-     * before restoring
-     */
-    msr = env->msr;
-    env->msr ^= env->msr_mask & ~((1ULL << MSR_TGPR) | MSR_HVB);
-    ppc_store_msr(env, msr);
-
-    hreg_compute_mem_idx(env);
+    post_load_update_msr(env);
 
     return 0;
 }
@@ -450,7 +455,7 @@ static int get_vscr(QEMUFile *f, void *opaque, size_t size,
                     const VMStateField *field)
 {
     PowerPCCPU *cpu = opaque;
-    helper_mtvscr(&cpu->env, qemu_get_be32(f));
+    ppc_store_vscr(&cpu->env, qemu_get_be32(f));
     return 0;
 }
 
@@ -458,7 +463,7 @@ static int put_vscr(QEMUFile *f, void *opaque, size_t size,
                     const VMStateField *field, JSONWriter *vmdesc)
 {
     PowerPCCPU *cpu = opaque;
-    qemu_put_be32(f, helper_mfvscr(&cpu->env));
+    qemu_put_be32(f, ppc_get_vscr(&cpu->env));
     return 0;
 }
 
@@ -825,9 +830,8 @@ const VMStateDescription vmstate_ppc_cpu = {
         /* Supervisor mode architected state */
         VMSTATE_UINTTL(env.msr, PowerPCCPU),
 
-        /* Internal state */
-        VMSTATE_UINTTL(env.hflags_nmsr, PowerPCCPU),
-        /* FIXME: access_type? */
+        /* Backward compatible internal state */
+        VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
 
         /* Sanity checking */
         VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index f4f7e730de1..39945d9ea58 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -25,7 +25,6 @@
 #include "exec/helper-proto.h"
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
-#include "tcg/tcg.h"
 #include "internal.h"
 #include "qemu/atomic128.h"
 
@@ -278,7 +277,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
     target_ulong mask, dcbz_size = env->dcache_line_size;
     uint32_t i;
     void *haddr;
-    int mmu_idx = epid ? PPC_TLB_EPID_STORE : env->dmmu_idx;
+    int mmu_idx = epid ? PPC_TLB_EPID_STORE : cpu_mmu_index(env, false);
 
 #if defined(TARGET_PPC64)
     /* Check for dcbz vs dcbzl on 970 */
@@ -376,7 +375,7 @@ uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
 
     /* We will have raised EXCP_ATOMIC from the translator.  */
     assert(HAVE_ATOMIC128);
-    ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
+    ret = cpu_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -388,7 +387,7 @@ uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
 
     /* We will have raised EXCP_ATOMIC from the translator.  */
     assert(HAVE_ATOMIC128);
-    ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
+    ret = cpu_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
     env->retxh = int128_gethi(ret);
     return int128_getlo(ret);
 }
@@ -401,7 +400,7 @@ void helper_stq_le_parallel(CPUPPCState *env, target_ulong addr,
     /* We will have raised EXCP_ATOMIC from the translator.  */
     assert(HAVE_ATOMIC128);
     val = int128_make128(lo, hi);
-    helper_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
+    cpu_atomic_sto_le_mmu(env, addr, val, opidx, GETPC());
 }
 
 void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
@@ -412,7 +411,7 @@ void helper_stq_be_parallel(CPUPPCState *env, target_ulong addr,
     /* We will have raised EXCP_ATOMIC from the translator.  */
     assert(HAVE_ATOMIC128);
     val = int128_make128(lo, hi);
-    helper_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
+    cpu_atomic_sto_be_mmu(env, addr, val, opidx, GETPC());
 }
 
 uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
@@ -429,8 +428,8 @@ uint32_t helper_stqcx_le_parallel(CPUPPCState *env, target_ulong addr,
 
         cmpv = int128_make128(env->reserve_val2, env->reserve_val);
         newv = int128_make128(new_lo, new_hi);
-        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
-                                             opidx, GETPC());
+        oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv,
+                                          opidx, GETPC());
         success = int128_eq(oldv, cmpv);
     }
     env->reserve_addr = -1;
@@ -451,8 +450,8 @@ uint32_t helper_stqcx_be_parallel(CPUPPCState *env, target_ulong addr,
 
         cmpv = int128_make128(env->reserve_val2, env->reserve_val);
         newv = int128_make128(new_lo, new_hi);
-        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
-                                             opidx, GETPC());
+        oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv,
+                                          opidx, GETPC());
         success = int128_eq(oldv, cmpv);
     }
     env->reserve_addr = -1;
diff --git a/target/ppc/meson.build b/target/ppc/meson.build
index bbfef90e08c..b85f2957039 100644
--- a/target/ppc/meson.build
+++ b/target/ppc/meson.build
@@ -2,10 +2,15 @@ ppc_ss = ss.source_set()
 ppc_ss.add(files(
   'cpu-models.c',
   'cpu.c',
-  'dfp_helper.c',
+  'cpu_init.c',
   'excp_helper.c',
-  'fpu_helper.c',
   'gdbstub.c',
+  'helper_regs.c',
+))
+
+ppc_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'dfp_helper.c',
+  'fpu_helper.c',
   'int_helper.c',
   'mem_helper.c',
   'misc_helper.c',
@@ -15,6 +20,15 @@ ppc_ss.add(files(
 
 ppc_ss.add(libdecnumber)
 
+gen = [
+  decodetree.process('insn32.decode',
+                     extra_args: '--static-decode=decode_insn32'),
+  decodetree.process('insn64.decode',
+                     extra_args: ['--static-decode=decode_insn64',
+                                  '--insnwidth=64']),
+]
+ppc_ss.add(gen)
+
 ppc_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
 ppc_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user_only_helper.c'))
 
@@ -23,9 +37,15 @@ ppc_softmmu_ss.add(files(
   'arch_dump.c',
   'machine.c',
   'mmu-hash32.c',
-  'mmu_helper.c',
+  'mmu_common.c',
   'monitor.c',
 ))
+ppc_softmmu_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'mmu_helper.c',
+), if_false: files(
+  'tcg-stub.c',
+))
+
 ppc_softmmu_ss.add(when: 'TARGET_PPC64', if_true: files(
   'compat.c',
   'mmu-book3s-v3.c',
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 5d6e0de396c..c33f5f39b90 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -23,6 +23,7 @@
 #include "exec/helper-proto.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
+#include "mmu-book3s-v3.h"
 
 #include "helper_regs.h"
 
@@ -116,7 +117,28 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val)
 void helper_store_ptcr(CPUPPCState *env, target_ulong val)
 {
     if (env->spr[SPR_PTCR] != val) {
-        ppc_store_ptcr(env, val);
+        PowerPCCPU *cpu = env_archcpu(env);
+        target_ulong ptcr_mask = PTCR_PATB | PTCR_PATS;
+        target_ulong patbsize = val & PTCR_PATS;
+
+        qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, val);
+
+        assert(!cpu->vhyp);
+        assert(env->mmu_model & POWERPC_MMU_3_00);
+
+        if (val & ~ptcr_mask) {
+            error_report("Invalid bits 0x"TARGET_FMT_lx" set in PTCR",
+                         val & ~ptcr_mask);
+            val &= ptcr_mask;
+        }
+
+        if (patbsize > 24) {
+            error_report("Invalid Partition Table size 0x" TARGET_FMT_lx
+                         " stored in PTCR", patbsize);
+            return;
+        }
+
+        env->spr[SPR_PTCR] = val;
         tlb_flush(env_cpu(env));
     }
 }
@@ -194,16 +216,14 @@ void helper_store_hid0_601(CPUPPCState *env, target_ulong val)
     target_ulong hid0;
 
     hid0 = env->spr[SPR_HID0];
+    env->spr[SPR_HID0] = (uint32_t)val;
+
     if ((val ^ hid0) & 0x00000008) {
         /* Change current endianness */
-        env->hflags &= ~(1 << MSR_LE);
-        env->hflags_nmsr &= ~(1 << MSR_LE);
-        env->hflags_nmsr |= (1 << MSR_LE) & (((val >> 3) & 1) << MSR_LE);
-        env->hflags |= env->hflags_nmsr;
-        qemu_log("%s: set endianness to %c => " TARGET_FMT_lx "\n", __func__,
+        hreg_compute_hflags(env);
+        qemu_log("%s: set endianness to %c => %08x\n", __func__,
                  val & 0x8 ? 'l' : 'b', env->hflags);
     }
-    env->spr[SPR_HID0] = (uint32_t)val;
 }
 
 void helper_store_403_pbr(CPUPPCState *env, uint32_t num, target_ulong value)
@@ -217,6 +237,9 @@ void helper_store_403_pbr(CPUPPCState *env, uint32_t num, target_ulong value)
 
 void helper_store_40x_dbcr0(CPUPPCState *env, target_ulong val)
 {
+    /* Bits 26 & 27 affect single-stepping. */
+    hreg_compute_hflags(env);
+    /* Bits 28 & 29 affect reset or shutdown. */
     store_40x_dbcr0(env, val);
 }
 
@@ -254,12 +277,6 @@ target_ulong helper_clcs(CPUPPCState *env, uint32_t arg)
 /*****************************************************************************/
 /* Special registers manipulation */
 
-/* GDBstub can read and write MSR... */
-void ppc_store_msr(CPUPPCState *env, target_ulong value)
-{
-    hreg_store_msr(env, value, 0);
-}
-
 /*
  * This code is lifted from MacOnLinux. It is called whenever THRM1,2
  * or 3 is read an fixes up the values in such a way that will make
diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
index c78fd8dc0e3..f4985bae788 100644
--- a/target/ppc/mmu-book3s-v3.c
+++ b/target/ppc/mmu-book3s-v3.c
@@ -23,25 +23,6 @@
 #include "mmu-book3s-v3.h"
 #include "mmu-radix64.h"
 
-int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                              int mmu_idx)
-{
-    if (ppc64_v3_radix(cpu)) { /* Guest uses radix */
-        return ppc_radix64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
-    } else { /* Guest uses hash */
-        return ppc_hash64_handle_mmu_fault(cpu, eaddr, rwx, mmu_idx);
-    }
-}
-
-hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr)
-{
-    if (ppc64_v3_radix(cpu)) {
-        return ppc_radix64_get_phys_page_debug(cpu, eaddr);
-    } else {
-        return ppc_hash64_get_phys_page_debug(cpu, eaddr);
-    }
-}
-
 bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry)
 {
     uint64_t patb = cpu->env.spr[SPR_PTCR] & PTCR_PATB;
diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index 7b89be54b83..d6d5ed8f8ed 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h
@@ -21,6 +21,7 @@
 #define PPC_MMU_BOOK3S_V3_H
 
 #include "mmu-hash64.h"
+#include "mmu-books.h"
 
 #ifndef CONFIG_USER_ONLY
 
@@ -67,11 +68,6 @@ static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
     return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
 }
 
-hwaddr ppc64_v3_get_phys_page_debug(PowerPCCPU *cpu, vaddr eaddr);
-
-int ppc64_v3_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                              int mmu_idx);
-
 static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
 {
     uint64_t base;
diff --git a/target/ppc/mmu-books.h b/target/ppc/mmu-books.h
new file mode 100644
index 00000000000..0d125518673
--- /dev/null
+++ b/target/ppc/mmu-books.h
@@ -0,0 +1,30 @@
+/*
+ *  PowerPC BookS emulation generic mmu definitions for qemu.
+ *
+ *  Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_MMU_BOOKS_H
+#define PPC_MMU_BOOKS_H
+
+/*
+ * These correspond to the mmu_idx values computed in
+ * hreg_compute_hflags_value. See the tables therein
+ */
+static inline bool mmuidx_pr(int idx) { return !(idx & 1); }
+static inline bool mmuidx_real(int idx) { return idx & 2; }
+static inline bool mmuidx_hv(int idx) { return idx & 4; }
+#endif /* PPC_MMU_BOOKS_H */
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 178cf090b72..3957aab2dc0 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -21,13 +21,14 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
-#include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "internal.h"
 #include "mmu-hash32.h"
+#include "mmu-books.h"
 #include "exec/log.h"
 
-/* #define DEBUG_BAT */
+/* #define DEBUG_BATS */
 
 #ifdef DEBUG_BATS
 #  define LOG_BATS(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
@@ -86,25 +87,22 @@ static int ppc_hash32_pp_prot(int key, int pp, int nx)
     return prot;
 }
 
-static int ppc_hash32_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash32_pte_prot(int mmu_idx,
                                target_ulong sr, ppc_hash_pte32_t pte)
 {
-    CPUPPCState *env = &cpu->env;
     unsigned pp, key;
 
-    key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+    key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
     pp = pte.pte1 & HPTE32_R_PP;
 
     return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX));
 }
 
-static target_ulong hash32_bat_size(PowerPCCPU *cpu,
+static target_ulong hash32_bat_size(int mmu_idx,
                                     target_ulong batu, target_ulong batl)
 {
-    CPUPPCState *env = &cpu->env;
-
-    if ((msr_pr && !(batu & BATU32_VP))
-        || (!msr_pr && !(batu & BATU32_VS))) {
+    if ((mmuidx_pr(mmu_idx) && !(batu & BATU32_VP))
+        || (!mmuidx_pr(mmu_idx) && !(batu & BATU32_VS))) {
         return 0;
     }
 
@@ -137,14 +135,13 @@ static target_ulong hash32_bat_601_size(PowerPCCPU *cpu,
     return BATU32_BEPI & ~((batl & BATL32_601_BL) << 17);
 }
 
-static int hash32_bat_601_prot(PowerPCCPU *cpu,
+static int hash32_bat_601_prot(int mmu_idx,
                                target_ulong batu, target_ulong batl)
 {
-    CPUPPCState *env = &cpu->env;
     int key, pp;
 
     pp = batu & BATU32_601_PP;
-    if (msr_pr == 0) {
+    if (mmuidx_pr(mmu_idx) == 0) {
         key = !!(batu & BATU32_601_KS);
     } else {
         key = !!(batu & BATU32_601_KP);
@@ -152,16 +149,18 @@ static int hash32_bat_601_prot(PowerPCCPU *cpu,
     return ppc_hash32_pp_prot(key, pp, 0);
 }
 
-static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
-                                    int *prot)
+static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
+                                    MMUAccessType access_type, int *prot,
+                                    int mmu_idx)
 {
     CPUPPCState *env = &cpu->env;
     target_ulong *BATlt, *BATut;
+    bool ifetch = access_type == MMU_INST_FETCH;
     int i;
 
     LOG_BATS("%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
-             rwx == 2 ? 'I' : 'D', ea);
-    if (rwx == 2) {
+             ifetch ? 'I' : 'D', ea);
+    if (ifetch) {
         BATlt = env->IBAT[1];
         BATut = env->IBAT[0];
     } else {
@@ -176,17 +175,17 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
         if (unlikely(env->mmu_model == POWERPC_MMU_601)) {
             mask = hash32_bat_601_size(cpu, batu, batl);
         } else {
-            mask = hash32_bat_size(cpu, batu, batl);
+            mask = hash32_bat_size(mmu_idx, batu, batl);
         }
         LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                  " BATl " TARGET_FMT_lx "\n", __func__,
-                 type == ACCESS_CODE ? 'I' : 'D', i, ea, batu, batl);
+                 ifetch ? 'I' : 'D', i, ea, batu, batl);
 
         if (mask && ((ea & mask) == (batu & BATU32_BEPI))) {
             hwaddr raddr = (batl & mask) | (ea & ~mask);
 
             if (unlikely(env->mmu_model == POWERPC_MMU_601)) {
-                *prot = hash32_bat_601_prot(cpu, batu, batl);
+                *prot = hash32_bat_601_prot(mmu_idx, batu, batl);
             } else {
                 *prot = hash32_bat_prot(cpu, batu, batl);
             }
@@ -198,6 +197,9 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
     /* No hit */
 #if defined(DEBUG_BATS)
     if (qemu_log_enabled()) {
+        target_ulong *BATu, *BATl;
+        target_ulong BEPIl, BEPIu, bl;
+
         LOG_BATS("no BAT match for " TARGET_FMT_lx ":\n", ea);
         for (i = 0; i < 4; i++) {
             BATu = &BATut[i];
@@ -208,7 +210,7 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
             LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
                      " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
                      TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-                     __func__, type == ACCESS_CODE ? 'I' : 'D', i, ea,
+                     __func__, ifetch ? 'I' : 'D', i, ea,
                      *BATu, *BATl, BEPIu, BEPIl, bl);
         }
     }
@@ -217,13 +219,15 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea, int rwx,
     return -1;
 }
 
-static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
-                                   target_ulong eaddr, int rwx,
-                                   hwaddr *raddr, int *prot)
+static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
+                                    target_ulong eaddr,
+                                    MMUAccessType access_type,
+                                    hwaddr *raddr, int *prot, int mmu_idx,
+                                    bool guest_visible)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    int key = !!(msr_pr ? (sr & SR32_KP) : (sr & SR32_KS));
+    int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
 
     qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
 
@@ -236,17 +240,23 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
          */
         *raddr = ((sr & 0xF) << 28) | (eaddr & 0x0FFFFFFF);
         *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-        return 0;
+        return true;
     }
 
-    if (rwx == 2) {
+    if (access_type == MMU_INST_FETCH) {
         /* No code fetch is allowed in direct-store areas */
-        cs->exception_index = POWERPC_EXCP_ISI;
-        env->error_code = 0x10000000;
-        return 1;
+        if (guest_visible) {
+            cs->exception_index = POWERPC_EXCP_ISI;
+            env->error_code = 0x10000000;
+        }
+        return false;
     }
 
-    switch (env->access_type) {
+    /*
+     * From ppc_cpu_get_phys_page_debug, env->access_type is not set.
+     * Assume ACCESS_INT for that case.
+     */
+    switch (guest_visible ? env->access_type : ACCESS_INT) {
     case ACCESS_INT:
         /* Integer load/store : only access allowed */
         break;
@@ -255,17 +265,17 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
         cs->exception_index = POWERPC_EXCP_ALIGN;
         env->error_code = POWERPC_EXCP_ALIGN_FP;
         env->spr[SPR_DAR] = eaddr;
-        return 1;
+        return false;
     case ACCESS_RES:
         /* lwarx, ldarx or srwcx. */
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x06000000;
         } else {
             env->spr[SPR_DSISR] = 0x04000000;
         }
-        return 1;
+        return false;
     case ACCESS_CACHE:
         /*
          * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
@@ -274,36 +284,39 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
          * no-op, it's quite easy :-)
          */
         *raddr = eaddr;
-        return 0;
+        return true;
     case ACCESS_EXT:
         /* eciwx or ecowx */
         cs->exception_index = POWERPC_EXCP_DSI;
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x06100000;
         } else {
             env->spr[SPR_DSISR] = 0x04100000;
         }
-        return 1;
+        return false;
     default:
-        cpu_abort(cs, "ERROR: instruction should not need "
-                 "address translation\n");
+        cpu_abort(cs, "ERROR: insn should not need address translation\n");
     }
-    if ((rwx == 1 || key != 1) && (rwx == 0 || key != 0)) {
+
+    *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ;
+    if (*prot & prot_for_access_type(access_type)) {
         *raddr = eaddr;
-        return 0;
-    } else {
+        return true;
+    }
+
+    if (guest_visible) {
         cs->exception_index = POWERPC_EXCP_DSI;
         env->error_code = 0;
         env->spr[SPR_DAR] = eaddr;
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             env->spr[SPR_DSISR] = 0x0a000000;
         } else {
             env->spr[SPR_DSISR] = 0x08000000;
         }
-        return 1;
     }
+    return false;
 }
 
 hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
@@ -412,8 +425,9 @@ static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte,
     return (rpn & ~mask) | (eaddr & mask);
 }
 
-int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                                int mmu_idx)
+bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+                      bool guest_visible)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -421,46 +435,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
     hwaddr pte_offset;
     ppc_hash_pte32_t pte;
     int prot;
-    const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
+    int need_prot;
     hwaddr raddr;
 
-    assert((rwx == 0) || (rwx == 1) || (rwx == 2));
+    /* There are no hash32 large pages. */
+    *psizep = TARGET_PAGE_BITS;
 
     /* 1. Handle real mode accesses */
-    if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
+    if (mmuidx_real(mmu_idx)) {
         /* Translation is off */
-        raddr = eaddr;
-        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                     PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
-                     TARGET_PAGE_SIZE);
-        return 0;
+        *raddrp = eaddr;
+        *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return true;
     }
 
+    need_prot = prot_for_access_type(access_type);
+
     /* 2. Check Block Address Translation entries (BATs) */
     if (env->nb_BATs != 0) {
-        raddr = ppc_hash32_bat_lookup(cpu, eaddr, rwx, &prot);
+        raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, mmu_idx);
         if (raddr != -1) {
-            if (need_prot[rwx] & ~prot) {
-                if (rwx == 2) {
-                    cs->exception_index = POWERPC_EXCP_ISI;
-                    env->error_code = 0x08000000;
-                } else {
-                    cs->exception_index = POWERPC_EXCP_DSI;
-                    env->error_code = 0;
-                    env->spr[SPR_DAR] = eaddr;
-                    if (rwx == 1) {
-                        env->spr[SPR_DSISR] = 0x0a000000;
+            if (need_prot & ~*protp) {
+                if (guest_visible) {
+                    if (access_type == MMU_INST_FETCH) {
+                        cs->exception_index = POWERPC_EXCP_ISI;
+                        env->error_code = 0x08000000;
                     } else {
-                        env->spr[SPR_DSISR] = 0x08000000;
+                        cs->exception_index = POWERPC_EXCP_DSI;
+                        env->error_code = 0;
+                        env->spr[SPR_DAR] = eaddr;
+                        if (access_type == MMU_DATA_STORE) {
+                            env->spr[SPR_DSISR] = 0x0a000000;
+                        } else {
+                            env->spr[SPR_DSISR] = 0x08000000;
+                        }
                     }
                 }
-                return 1;
+                return false;
             }
-
-            tlb_set_page(cs, eaddr & TARGET_PAGE_MASK,
-                         raddr & TARGET_PAGE_MASK, prot, mmu_idx,
-                         TARGET_PAGE_SIZE);
-            return 0;
+            *raddrp = raddr;
+            return true;
         }
     }
 
@@ -469,67 +483,65 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
 
     /* 4. Handle direct store segments */
     if (sr & SR32_T) {
-        if (ppc_hash32_direct_store(cpu, sr, eaddr, rwx,
-                                    &raddr, &prot) == 0) {
-            tlb_set_page(cs, eaddr & TARGET_PAGE_MASK,
-                         raddr & TARGET_PAGE_MASK, prot, mmu_idx,
-                         TARGET_PAGE_SIZE);
-            return 0;
-        } else {
-            return 1;
-        }
+        return ppc_hash32_direct_store(cpu, sr, eaddr, access_type,
+                                       raddrp, protp, mmu_idx, guest_visible);
     }
 
     /* 5. Check for segment level no-execute violation */
-    if ((rwx == 2) && (sr & SR32_NX)) {
-        cs->exception_index = POWERPC_EXCP_ISI;
-        env->error_code = 0x10000000;
-        return 1;
+    if (access_type == MMU_INST_FETCH && (sr & SR32_NX)) {
+        if (guest_visible) {
+            cs->exception_index = POWERPC_EXCP_ISI;
+            env->error_code = 0x10000000;
+        }
+        return false;
     }
 
     /* 6. Locate the PTE in the hash table */
     pte_offset = ppc_hash32_htab_lookup(cpu, sr, eaddr, &pte);
     if (pte_offset == -1) {
-        if (rwx == 2) {
-            cs->exception_index = POWERPC_EXCP_ISI;
-            env->error_code = 0x40000000;
-        } else {
-            cs->exception_index = POWERPC_EXCP_DSI;
-            env->error_code = 0;
-            env->spr[SPR_DAR] = eaddr;
-            if (rwx == 1) {
-                env->spr[SPR_DSISR] = 0x42000000;
+        if (guest_visible) {
+            if (access_type == MMU_INST_FETCH) {
+                cs->exception_index = POWERPC_EXCP_ISI;
+                env->error_code = 0x40000000;
             } else {
-                env->spr[SPR_DSISR] = 0x40000000;
+                cs->exception_index = POWERPC_EXCP_DSI;
+                env->error_code = 0;
+                env->spr[SPR_DAR] = eaddr;
+                if (access_type == MMU_DATA_STORE) {
+                    env->spr[SPR_DSISR] = 0x42000000;
+                } else {
+                    env->spr[SPR_DSISR] = 0x40000000;
+                }
             }
         }
-
-        return 1;
+        return false;
     }
     qemu_log_mask(CPU_LOG_MMU,
                 "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
 
     /* 7. Check access permissions */
 
-    prot = ppc_hash32_pte_prot(cpu, sr, pte);
+    prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
 
-    if (need_prot[rwx] & ~prot) {
+    if (need_prot & ~prot) {
         /* Access right violation */
         qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-        if (rwx == 2) {
-            cs->exception_index = POWERPC_EXCP_ISI;
-            env->error_code = 0x08000000;
-        } else {
-            cs->exception_index = POWERPC_EXCP_DSI;
-            env->error_code = 0;
-            env->spr[SPR_DAR] = eaddr;
-            if (rwx == 1) {
-                env->spr[SPR_DSISR] = 0x0a000000;
+        if (guest_visible) {
+            if (access_type == MMU_INST_FETCH) {
+                cs->exception_index = POWERPC_EXCP_ISI;
+                env->error_code = 0x08000000;
             } else {
-                env->spr[SPR_DSISR] = 0x08000000;
+                cs->exception_index = POWERPC_EXCP_DSI;
+                env->error_code = 0;
+                env->spr[SPR_DAR] = eaddr;
+                if (access_type == MMU_DATA_STORE) {
+                    env->spr[SPR_DSISR] = 0x0a000000;
+                } else {
+                    env->spr[SPR_DSISR] = 0x08000000;
+                }
             }
         }
-        return 1;
+        return false;
     }
 
     qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
@@ -540,7 +552,7 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
         ppc_hash32_set_r(cpu, pte_offset, pte.pte1);
     }
     if (!(pte.pte1 & HPTE32_R_C)) {
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             ppc_hash32_set_c(cpu, pte_offset, pte.pte1);
         } else {
             /*
@@ -553,45 +565,7 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
 
     /* 9. Determine the real address from the PTE */
 
-    raddr = ppc_hash32_pte_raddr(sr, pte, eaddr);
-
-    tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                 prot, mmu_idx, TARGET_PAGE_SIZE);
-
-    return 0;
-}
-
-hwaddr ppc_hash32_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong sr;
-    hwaddr pte_offset;
-    ppc_hash_pte32_t pte;
-    int prot;
-
-    if (msr_dr == 0) {
-        /* Translation is off */
-        return eaddr;
-    }
-
-    if (env->nb_BATs != 0) {
-        hwaddr raddr = ppc_hash32_bat_lookup(cpu, eaddr, 0, &prot);
-        if (raddr != -1) {
-            return raddr;
-        }
-    }
-
-    sr = env->sr[eaddr >> 28];
-
-    if (sr & SR32_T) {
-        /* FIXME: Add suitable debug support for Direct Store segments */
-        return -1;
-    }
-
-    pte_offset = ppc_hash32_htab_lookup(cpu, sr, eaddr, &pte);
-    if (pte_offset == -1) {
-        return -1;
-    }
-
-    return ppc_hash32_pte_raddr(sr, pte, eaddr) & TARGET_PAGE_MASK;
+    *raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr);
+    *protp = prot;
+    return true;
 }
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 898021f0d84..3892b693d6e 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -4,9 +4,9 @@
 #ifndef CONFIG_USER_ONLY
 
 hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
-hwaddr ppc_hash32_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
-int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
-                                int mmu_idx);
+bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+                      bool guest_visible);
 
 /*
  * Segment register definitions
@@ -22,6 +22,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
  * Block Address Translation (BAT) definitions
  */
 
+#define BATU32_BEPIU            0xf0000000
+#define BATU32_BEPIL            0x0ffe0000
 #define BATU32_BEPI             0xfffe0000
 #define BATU32_BL               0x00001ffc
 #define BATU32_VS               0x00000002
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 0fabc10302d..da9fe99ff8b 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -21,7 +21,6 @@
 #include "qemu/units.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
-#include "exec/helper-proto.h"
 #include "qemu/error-report.h"
 #include "qemu/qemu-print.h"
 #include "sysemu/hw_accel.h"
@@ -29,7 +28,13 @@
 #include "mmu-hash64.h"
 #include "exec/log.h"
 #include "hw/hw.h"
+#include "internal.h"
 #include "mmu-book3s-v3.h"
+#include "helper_regs.h"
+
+#ifdef CONFIG_TCG
+#include "exec/helper-proto.h"
+#endif
 
 /* #define DEBUG_SLB */
 
@@ -95,6 +100,7 @@ void dump_slb(PowerPCCPU *cpu)
     }
 }
 
+#ifdef CONFIG_TCG
 void helper_slbia(CPUPPCState *env, uint32_t ih)
 {
     PowerPCCPU *cpu = env_archcpu(env);
@@ -200,6 +206,7 @@ void helper_slbieg(CPUPPCState *env, target_ulong addr)
 {
     __helper_slbie(env, addr, true);
 }
+#endif
 
 int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
                   target_ulong esid, target_ulong vsid)
@@ -253,6 +260,7 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
     return 0;
 }
 
+#ifdef CONFIG_TCG
 static int ppc_load_slb_esid(PowerPCCPU *cpu, target_ulong rb,
                              target_ulong *rt)
 {
@@ -346,6 +354,7 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
     }
     return rt;
 }
+#endif
 
 /* Check No-Execute or Guarded Storage */
 static inline int ppc_hash64_pte_noexec_guard(PowerPCCPU *cpu,
@@ -357,10 +366,9 @@ static inline int ppc_hash64_pte_noexec_guard(PowerPCCPU *cpu,
 }
 
 /* Check Basic Storage Protection */
-static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
+static int ppc_hash64_pte_prot(int mmu_idx,
                                ppc_slb_t *slb, ppc_hash_pte64_t pte)
 {
-    CPUPPCState *env = &cpu->env;
     unsigned pp, key;
     /*
      * Some pp bit combinations have undefined behaviour, so default
@@ -368,7 +376,7 @@ static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
      */
     int prot = 0;
 
-    key = !!(msr_pr ? (slb->vsid & SLB_VSID_KP)
+    key = !!(mmuidx_pr(mmu_idx) ? (slb->vsid & SLB_VSID_KP)
              : (slb->vsid & SLB_VSID_KS));
     pp = (pte.pte1 & HPTE64_R_PP) | ((pte.pte1 & HPTE64_R_PP0) >> 61);
 
@@ -735,17 +743,17 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env)
     }
 }
 
-static void ppc_hash64_set_isi(CPUState *cs, uint64_t error_code)
+static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
 {
     CPUPPCState *env = &POWERPC_CPU(cs)->env;
     bool vpm;
 
-    if (msr_ir) {
+    if (!mmuidx_real(mmu_idx)) {
         vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
     } else {
         vpm = ppc_hash64_use_vrma(env);
     }
-    if (vpm && !msr_hv) {
+    if (vpm && !mmuidx_hv(mmu_idx)) {
         cs->exception_index = POWERPC_EXCP_HISI;
     } else {
         cs->exception_index = POWERPC_EXCP_ISI;
@@ -753,17 +761,17 @@ static void ppc_hash64_set_isi(CPUState *cs, uint64_t error_code)
     env->error_code = error_code;
 }
 
-static void ppc_hash64_set_dsi(CPUState *cs, uint64_t dar, uint64_t dsisr)
+static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t dsisr)
 {
     CPUPPCState *env = &POWERPC_CPU(cs)->env;
     bool vpm;
 
-    if (msr_dr) {
+    if (!mmuidx_real(mmu_idx)) {
         vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
     } else {
         vpm = ppc_hash64_use_vrma(env);
     }
-    if (vpm && !msr_hv) {
+    if (vpm && !mmuidx_hv(mmu_idx)) {
         cs->exception_index = POWERPC_EXCP_HDSI;
         env->spr[SPR_HDAR] = dar;
         env->spr[SPR_HDSISR] = dsisr;
@@ -778,7 +786,7 @@ static void ppc_hash64_set_dsi(CPUState *cs, uint64_t dar, uint64_t dsisr)
 
 static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
 {
-    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 16;
+    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
 
     if (cpu->vhyp) {
         PPCVirtualHypervisorClass *vhc =
@@ -795,7 +803,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
 
 static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
 {
-    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 15;
+    hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
 
     if (cpu->vhyp) {
         PPCVirtualHypervisorClass *vhc =
@@ -864,8 +872,9 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
     return -1;
 }
 
-int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
-                                int rwx, int mmu_idx)
+bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+                      bool guest_visible)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -875,11 +884,9 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
     hwaddr ptex;
     ppc_hash_pte64_t pte;
     int exec_prot, pp_prot, amr_prot, prot;
-    const int need_prot[] = {PAGE_READ, PAGE_WRITE, PAGE_EXEC};
+    int need_prot;
     hwaddr raddr;
 
-    assert((rwx == 0) || (rwx == 1) || (rwx == 2));
-
     /*
      * Note on LPCR usage: 970 uses HID4, but our special variant of
      * store_spr copies relevant fields into env->spr[SPR_LPCR].
@@ -889,7 +896,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
      */
 
     /* 1. Handle real mode accesses */
-    if (((rwx == 2) && (msr_ir == 0)) || ((rwx != 2) && (msr_dr == 0))) {
+    if (mmuidx_real(mmu_idx)) {
         /*
          * Translation is supposedly "off", but in real mode the top 4
          * effective address bits are (mostly) ignored
@@ -901,7 +908,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
              * In virtual hypervisor mode, there's nothing to do:
              *   EA == GPA == qemu guest address
              */
-        } else if (msr_hv || !env->has_hv_mode) {
+        } else if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
             /* In HV mode, add HRMOR if top EA bit is clear */
             if (!(eaddr >> 63)) {
                 raddr |= env->spr[SPR_HRMOR];
@@ -911,9 +918,11 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
             slb = &vrma_slbe;
             if (build_vrma_slbe(cpu, slb) != 0) {
                 /* Invalid VRMA setup, machine check */
-                cs->exception_index = POWERPC_EXCP_MCHECK;
-                env->error_code = 0;
-                return 1;
+                if (guest_visible) {
+                    cs->exception_index = POWERPC_EXCP_MCHECK;
+                    env->error_code = 0;
+                }
+                return false;
             }
 
             goto skip_slb_search;
@@ -922,24 +931,33 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
 
             /* Emulated old-style RMO mode, bounds check against RMLS */
             if (raddr >= limit) {
-                if (rwx == 2) {
-                    ppc_hash64_set_isi(cs, SRR1_PROTFAULT);
-                } else {
-                    int dsisr = DSISR_PROTFAULT;
-                    if (rwx == 1) {
-                        dsisr |= DSISR_ISSTORE;
-                    }
-                    ppc_hash64_set_dsi(cs, eaddr, dsisr);
+                if (!guest_visible) {
+                    return false;
+                }
+                switch (access_type) {
+                case MMU_INST_FETCH:
+                    ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT);
+                    break;
+                case MMU_DATA_LOAD:
+                    ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT);
+                    break;
+                case MMU_DATA_STORE:
+                    ppc_hash64_set_dsi(cs, mmu_idx, eaddr,
+                                       DSISR_PROTFAULT | DSISR_ISSTORE);
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
-                return 1;
+                return false;
             }
 
             raddr |= env->spr[SPR_RMOR];
         }
-        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                     PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
-                     TARGET_PAGE_SIZE);
-        return 0;
+
+        *raddrp = raddr;
+        *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        *psizep = TARGET_PAGE_BITS;
+        return true;
     }
 
     /* 2. Translation is on, so look up the SLB */
@@ -952,38 +970,56 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
             exit(1);
         }
         /* Segment still not found, generate the appropriate interrupt */
-        if (rwx == 2) {
+        if (!guest_visible) {
+            return false;
+        }
+        switch (access_type) {
+        case MMU_INST_FETCH:
             cs->exception_index = POWERPC_EXCP_ISEG;
             env->error_code = 0;
-        } else {
+            break;
+        case MMU_DATA_LOAD:
+        case MMU_DATA_STORE:
             cs->exception_index = POWERPC_EXCP_DSEG;
             env->error_code = 0;
             env->spr[SPR_DAR] = eaddr;
+            break;
+        default:
+            g_assert_not_reached();
         }
-        return 1;
+        return false;
     }
 
-skip_slb_search:
+ skip_slb_search:
 
     /* 3. Check for segment level no-execute violation */
-    if ((rwx == 2) && (slb->vsid & SLB_VSID_N)) {
-        ppc_hash64_set_isi(cs, SRR1_NOEXEC_GUARD);
-        return 1;
+    if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) {
+        if (guest_visible) {
+            ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOEXEC_GUARD);
+        }
+        return false;
     }
 
     /* 4. Locate the PTE in the hash table */
     ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
     if (ptex == -1) {
-        if (rwx == 2) {
-            ppc_hash64_set_isi(cs, SRR1_NOPTE);
-        } else {
-            int dsisr = DSISR_NOPTE;
-            if (rwx == 1) {
-                dsisr |= DSISR_ISSTORE;
-            }
-            ppc_hash64_set_dsi(cs, eaddr, dsisr);
+        if (!guest_visible) {
+            return false;
         }
-        return 1;
+        switch (access_type) {
+        case MMU_INST_FETCH:
+            ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOPTE);
+            break;
+        case MMU_DATA_LOAD:
+            ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE);
+            break;
+        case MMU_DATA_STORE:
+            ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        return false;
     }
     qemu_log_mask(CPU_LOG_MMU,
                   "found PTE at index %08" HWADDR_PRIx "\n", ptex);
@@ -991,14 +1027,18 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
     /* 5. Check access permissions */
 
     exec_prot = ppc_hash64_pte_noexec_guard(cpu, pte);
-    pp_prot = ppc_hash64_pte_prot(cpu, slb, pte);
+    pp_prot = ppc_hash64_pte_prot(mmu_idx, slb, pte);
     amr_prot = ppc_hash64_amr_prot(cpu, pte);
     prot = exec_prot & pp_prot & amr_prot;
 
-    if ((need_prot[rwx] & ~prot) != 0) {
+    need_prot = prot_for_access_type(access_type);
+    if (need_prot & ~prot) {
         /* Access right violation */
         qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-        if (rwx == 2) {
+        if (!guest_visible) {
+            return false;
+        }
+        if (access_type == MMU_INST_FETCH) {
             int srr1 = 0;
             if (PAGE_EXEC & ~exec_prot) {
                 srr1 |= SRR1_NOEXEC_GUARD; /* Access violates noexec or guard */
@@ -1008,21 +1048,21 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
             if (PAGE_EXEC & ~amr_prot) {
                 srr1 |= SRR1_IAMR; /* Access violates virt pg class key prot */
             }
-            ppc_hash64_set_isi(cs, srr1);
+            ppc_hash64_set_isi(cs, mmu_idx, srr1);
         } else {
             int dsisr = 0;
-            if (need_prot[rwx] & ~pp_prot) {
+            if (need_prot & ~pp_prot) {
                 dsisr |= DSISR_PROTFAULT;
             }
-            if (rwx == 1) {
+            if (access_type == MMU_DATA_STORE) {
                 dsisr |= DSISR_ISSTORE;
             }
-            if (need_prot[rwx] & ~amr_prot) {
+            if (need_prot & ~amr_prot) {
                 dsisr |= DSISR_AMR;
             }
-            ppc_hash64_set_dsi(cs, eaddr, dsisr);
+            ppc_hash64_set_dsi(cs, mmu_idx, eaddr, dsisr);
         }
-        return 1;
+        return false;
     }
 
     qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
@@ -1033,7 +1073,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
         ppc_hash64_set_r(cpu, ptex, pte.pte1);
     }
     if (!(pte.pte1 & HPTE64_R_C)) {
-        if (rwx == 1) {
+        if (access_type == MMU_DATA_STORE) {
             ppc_hash64_set_c(cpu, ptex, pte.pte1);
         } else {
             /*
@@ -1046,66 +1086,10 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
 
     /* 7. Determine the real address from the PTE */
 
-    raddr = deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, eaddr);
-
-    tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                 prot, mmu_idx, 1ULL << apshift);
-
-    return 0;
-}
-
-hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
-{
-    CPUPPCState *env = &cpu->env;
-    ppc_slb_t vrma_slbe;
-    ppc_slb_t *slb;
-    hwaddr ptex, raddr;
-    ppc_hash_pte64_t pte;
-    unsigned apshift;
-
-    /* Handle real mode */
-    if (msr_dr == 0) {
-        /* In real mode the top 4 effective address bits are ignored */
-        raddr = addr & 0x0FFFFFFFFFFFFFFFULL;
-
-        if (cpu->vhyp) {
-            /*
-             * In virtual hypervisor mode, there's nothing to do:
-             *   EA == GPA == qemu guest address
-             */
-            return raddr;
-        } else if ((msr_hv || !env->has_hv_mode) && !(addr >> 63)) {
-            /* In HV mode, add HRMOR if top EA bit is clear */
-            return raddr | env->spr[SPR_HRMOR];
-        } else if (ppc_hash64_use_vrma(env)) {
-            /* Emulated VRMA mode */
-            slb = &vrma_slbe;
-            if (build_vrma_slbe(cpu, slb) != 0) {
-                return -1;
-            }
-        } else {
-            target_ulong limit = rmls_limit(cpu);
-
-            /* Emulated old-style RMO mode, bounds check against RMLS */
-            if (raddr >= limit) {
-                return -1;
-            }
-            return raddr | env->spr[SPR_RMOR];
-        }
-    } else {
-        slb = slb_lookup(cpu, addr);
-        if (!slb) {
-            return -1;
-        }
-    }
-
-    ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
-    if (ptex == -1) {
-        return -1;
-    }
-
-    return deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, addr)
-        & TARGET_PAGE_MASK;
+    *raddrp = deposit64(pte.pte1 & HPTE64_R_RPN, 0, apshift, eaddr);
+    *protp = prot;
+    *psizep = apshift;
+    return true;
 }
 
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
@@ -1119,20 +1103,14 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
     cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
 }
 
-void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val)
-{
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    CPUPPCState *env = &cpu->env;
-
-    env->spr[SPR_LPCR] = val & pcc->lpcr_mask;
-}
-
+#ifdef CONFIG_TCG
 void helper_store_lpcr(CPUPPCState *env, target_ulong val)
 {
     PowerPCCPU *cpu = env_archcpu(env);
 
     ppc_store_lpcr(cpu, val);
 }
+#endif
 
 void ppc_hash64_init(PowerPCCPU *cpu)
 {
@@ -1197,61 +1175,4 @@ const PPCHash64Options ppc_hash64_opts_POWER7 = {
     }
 };
 
-void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
-                                 bool (*cb)(void *, uint32_t, uint32_t),
-                                 void *opaque)
-{
-    PPCHash64Options *opts = cpu->hash64_opts;
-    int i;
-    int n = 0;
-    bool ci_largepage = false;
-
-    assert(opts);
-
-    n = 0;
-    for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
-        PPCHash64SegmentPageSizes *sps = &opts->sps[i];
-        int j;
-        int m = 0;
-
-        assert(n <= i);
-
-        if (!sps->page_shift) {
-            break;
-        }
-
-        for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
-            PPCHash64PageSize *ps = &sps->enc[j];
-
-            assert(m <= j);
-            if (!ps->page_shift) {
-                break;
-            }
-
-            if (cb(opaque, sps->page_shift, ps->page_shift)) {
-                if (ps->page_shift >= 16) {
-                    ci_largepage = true;
-                }
-                sps->enc[m++] = *ps;
-            }
-        }
-
-        /* Clear rest of the row */
-        for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
-            memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
-        }
-
-        if (m) {
-            n++;
-        }
-    }
-
-    /* Clear the rest of the table */
-    for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
-        memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
-    }
 
-    if (!ci_largepage) {
-        opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
-    }
-}
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 87729d48b39..1496955d389 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -7,20 +7,16 @@
 void dump_slb(PowerPCCPU *cpu);
 int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
                   target_ulong esid, target_ulong vsid);
-hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
-int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
-                                int mmu_idx);
+bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
+                      bool guest_visible);
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1);
 unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
                                           uint64_t pte0, uint64_t pte1);
-void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
 void ppc_hash64_init(PowerPCCPU *cpu);
 void ppc_hash64_finalize(PowerPCCPU *cpu);
-void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
-                                 bool (*cb)(void *, uint32_t, uint32_t),
-                                 void *opaque);
 #endif
 
 /*
@@ -101,6 +97,11 @@ void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
 #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
 #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
 
+/* PTE offsets */
+#define HPTE64_DW1              (HASH_PTE_SIZE_64 / 2)
+#define HPTE64_DW1_R            (HPTE64_DW1 + 6)
+#define HPTE64_DW1_C            (HPTE64_DW1 + 7)
+
 /* Format changes for ARCH v3 */
 #define HPTE64_V_COMMON_BITS    0x000fffffffffffffULL
 #define HPTE64_R_3_0_SSIZE_SHIFT 58
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 30fcfcf11fc..5b0e62e676d 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -20,11 +20,11 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
-#include "exec/helper-proto.h"
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "exec/log.h"
+#include "internal.h"
 #include "mmu-radix64.h"
 #include "mmu-book3s-v3.h"
 
@@ -74,71 +74,94 @@ static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
     return true;
 }
 
-static void ppc_radix64_raise_segi(PowerPCCPU *cpu, int rwx, vaddr eaddr)
+static void ppc_radix64_raise_segi(PowerPCCPU *cpu, MMUAccessType access_type,
+                                   vaddr eaddr)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* Instruction Segment Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* Instruction Segment Interrupt */
         cs->exception_index = POWERPC_EXCP_ISEG;
-    } else { /* Data Segment Interrupt */
+        break;
+    case MMU_DATA_STORE:
+    case MMU_DATA_LOAD:
+        /* Data Segment Interrupt */
         cs->exception_index = POWERPC_EXCP_DSEG;
         env->spr[SPR_DAR] = eaddr;
+        break;
+    default:
+        g_assert_not_reached();
     }
     env->error_code = 0;
 }
 
-static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
-                                uint32_t cause)
+static void ppc_radix64_raise_si(PowerPCCPU *cpu, MMUAccessType access_type,
+                                 vaddr eaddr, uint32_t cause)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* Instruction Storage Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* Instruction Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_ISI;
         env->error_code = cause;
-    } else { /* Data Storage Interrupt */
+        break;
+    case MMU_DATA_STORE:
+        cause |= DSISR_ISSTORE;
+        /* fall through */
+    case MMU_DATA_LOAD:
+        /* Data Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_DSI;
-        if (rwx == 1) { /* Write -> Store */
-            cause |= DSISR_ISSTORE;
-        }
         env->spr[SPR_DSISR] = cause;
         env->spr[SPR_DAR] = eaddr;
         env->error_code = 0;
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, int rwx, vaddr eaddr,
-                                  hwaddr g_raddr, uint32_t cause)
+static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type,
+                                  vaddr eaddr, hwaddr g_raddr, uint32_t cause)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
 
-    if (rwx == 2) { /* H Instruction Storage Interrupt */
+    switch (access_type) {
+    case MMU_INST_FETCH:
+        /* H Instruction Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_HISI;
         env->spr[SPR_ASDR] = g_raddr;
         env->error_code = cause;
-    } else { /* H Data Storage Interrupt */
+        break;
+    case MMU_DATA_STORE:
+        cause |= DSISR_ISSTORE;
+        /* fall through */
+    case MMU_DATA_LOAD:
+        /* H Data Storage Interrupt */
         cs->exception_index = POWERPC_EXCP_HDSI;
-        if (rwx == 1) { /* Write -> Store */
-            cause |= DSISR_ISSTORE;
-        }
         env->spr[SPR_HDSISR] = cause;
         env->spr[SPR_HDAR] = eaddr;
         env->spr[SPR_ASDR] = g_raddr;
         env->error_code = 0;
+        break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
-                                   int *fault_cause, int *prot,
-                                   bool partition_scoped)
+static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type,
+                                   uint64_t pte, int *fault_cause, int *prot,
+                                   int mmu_idx, bool partition_scoped)
 {
     CPUPPCState *env = &cpu->env;
-    const int need_prot[] = { PAGE_READ, PAGE_WRITE, PAGE_EXEC };
+    int need_prot;
 
     /* Check Page Attributes (pte58:59) */
-    if (((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO) && (rwx == 2)) {
+    if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) {
         /*
          * Radix PTE entries with the non-idempotent I/O attribute are treated
          * as guarded storage
@@ -150,7 +173,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
     /* Determine permissions allowed by Encoded Access Authority */
     if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) {
         *prot = 0;
-    } else if (msr_pr || (pte & R_PTE_EAA_PRIV) || partition_scoped) {
+    } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) ||
+               partition_scoped) {
         *prot = ppc_radix64_get_prot_eaa(pte);
     } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */
         *prot = ppc_radix64_get_prot_eaa(pte);
@@ -158,7 +182,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
     }
 
     /* Check if requested access type is allowed */
-    if (need_prot[rwx] & ~(*prot)) { /* Page Protected for that Access */
+    need_prot = prot_for_access_type(access_type);
+    if (need_prot & ~*prot) { /* Page Protected for that Access */
         *fault_cause |= DSISR_PROTFAULT;
         return true;
     }
@@ -166,15 +191,15 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
     return false;
 }
 
-static void ppc_radix64_set_rc(PowerPCCPU *cpu, int rwx, uint64_t pte,
-                               hwaddr pte_addr, int *prot)
+static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
+                               uint64_t pte, hwaddr pte_addr, int *prot)
 {
     CPUState *cs = CPU(cpu);
     uint64_t npte;
 
     npte = pte | R_PTE_R; /* Always set reference bit */
 
-    if (rwx == 1) { /* Store/Write */
+    if (access_type == MMU_DATA_STORE) { /* Store/Write */
         npte |= R_PTE_C; /* Set change bit */
     } else {
         /*
@@ -269,12 +294,13 @@ static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
     return true;
 }
 
-static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
+static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu,
+                                              MMUAccessType access_type,
                                               vaddr eaddr, hwaddr g_raddr,
                                               ppc_v3_pate_t pate,
                                               hwaddr *h_raddr, int *h_prot,
                                               int *h_page_size, bool pde_addr,
-                                              bool guest_visible)
+                                              int mmu_idx, bool guest_visible)
 {
     int fault_cause = 0;
     hwaddr pte_addr;
@@ -285,28 +311,30 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
     if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
                               pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
                               &pte, &fault_cause, &pte_addr) ||
-        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, h_prot, true)) {
+        ppc_radix64_check_prot(cpu, access_type, pte,
+                               &fault_cause, h_prot, mmu_idx, true)) {
         if (pde_addr) { /* address being translated was that of a guest pde */
             fault_cause |= DSISR_PRTABLE_FAULT;
         }
         if (guest_visible) {
-            ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
+            ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause);
         }
         return 1;
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
+        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
     }
 
     return 0;
 }
 
-static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
+static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
+                                            MMUAccessType access_type,
                                             vaddr eaddr, uint64_t pid,
                                             ppc_v3_pate_t pate, hwaddr *g_raddr,
                                             int *g_prot, int *g_page_size,
-                                            bool guest_visible)
+                                            int mmu_idx, bool guest_visible)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -321,7 +349,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
     if (offset >= size) {
         /* offset exceeds size of the process table */
         if (guest_visible) {
-            ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
+            ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
         }
         return 1;
     }
@@ -341,7 +369,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
         ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
                                                  pate, &h_raddr, &h_prot,
                                                  &h_page_size, true,
-                                                 guest_visible);
+            /* mmu_idx is 5 because we're translating from hypervisor scope */
+                                                 5, guest_visible);
         if (ret) {
             return ret;
         }
@@ -362,7 +391,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
         if (ret) {
             /* No valid PTE */
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
             }
             return ret;
         }
@@ -381,7 +410,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
             ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
                                                      pate, &h_raddr, &h_prot,
                                                      &h_page_size, true,
-                                                     guest_visible);
+            /* mmu_idx is 5 because we're translating from hypervisor scope */
+                                                     5, guest_visible);
             if (ret) {
                 return ret;
             }
@@ -391,7 +421,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
             if (ret) {
                 /* No valid pte */
                 if (guest_visible) {
-                    ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+                    ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
                 }
                 return ret;
             }
@@ -405,16 +435,17 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
         *g_raddr = (rpn & ~mask) | (eaddr & mask);
     }
 
-    if (ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
+    if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause,
+                               g_prot, mmu_idx, false)) {
         /* Access denied due to protection */
         if (guest_visible) {
-            ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+            ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause);
         }
         return 1;
     }
 
     if (guest_visible) {
-        ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, g_prot);
+        ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot);
     }
 
     return 0;
@@ -437,23 +468,53 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
  *              | = On        | Process Scoped |    Scoped     |
  *              +-------------+----------------+---------------+
  */
-static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                             bool relocation,
-                             hwaddr *raddr, int *psizep, int *protp,
-                             bool guest_visible)
+bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                       hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
+                       bool guest_visible)
 {
     CPUPPCState *env = &cpu->env;
     uint64_t lpid, pid;
     ppc_v3_pate_t pate;
     int psize, prot;
     hwaddr g_raddr;
+    bool relocation;
+
+    assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp));
+
+    relocation = !mmuidx_real(mmu_idx);
+
+    /* HV or virtual hypervisor Real Mode Access */
+    if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) {
+        /* In real mode top 4 effective addr bits (mostly) ignored */
+        *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
+
+        /* In HV mode, add HRMOR if top EA bit is clear */
+        if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) {
+            if (!(eaddr >> 63)) {
+                *raddr |= env->spr[SPR_HRMOR];
+           }
+        }
+        *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        *psizep = TARGET_PAGE_BITS;
+        return true;
+    }
+
+    /*
+     * Check UPRT (we avoid the check in real mode to deal with
+     * transitional states during kexec.
+     */
+    if (guest_visible && !ppc64_use_proc_tbl(cpu)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "LPCR:UPRT not set in radix mode ! LPCR="
+                      TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
+    }
 
     /* Virtual Mode Access - get the fully qualified address */
     if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) {
         if (guest_visible) {
-            ppc_radix64_raise_segi(cpu, rwx, eaddr);
+            ppc_radix64_raise_segi(cpu, access_type, eaddr);
         }
-        return 1;
+        return false;
     }
 
     /* Get Process Table */
@@ -464,15 +525,15 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
     } else {
         if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
             }
-            return 1;
+            return false;
         }
         if (!validate_pate(cpu, lpid, &pate)) {
             if (guest_visible) {
-                ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_R_BADCONFIG);
+                ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG);
             }
-            return 1;
+            return false;
         }
     }
 
@@ -488,11 +549,11 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
      * - Translates an effective address to a guest real address.
      */
     if (relocation) {
-        int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
+        int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
                                                    pate, &g_raddr, &prot,
-                                                   &psize, guest_visible);
+                                                   &psize, mmu_idx, guest_visible);
         if (ret) {
-            return ret;
+            return false;
         }
         *psizep = MIN(*psizep, psize);
         *protp &= prot;
@@ -508,14 +569,15 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
          * quadrants 1 or 2. Translates a guest real address to a host
          * real address.
          */
-        if (lpid || !msr_hv) {
+        if (lpid || !mmuidx_hv(mmu_idx)) {
             int ret;
 
-            ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
-                                                     pate, raddr, &prot, &psize,
-                                                     false, guest_visible);
+            ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
+                                                     g_raddr, pate, raddr,
+                                                     &prot, &psize, false,
+                                                     mmu_idx, guest_visible);
             if (ret) {
-                return ret;
+                return false;
             }
             *psizep = MIN(*psizep, psize);
             *protp &= prot;
@@ -524,76 +586,5 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
         }
     }
 
-    return 0;
-}
-
-int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                                 int mmu_idx)
-{
-    CPUState *cs = CPU(cpu);
-    CPUPPCState *env = &cpu->env;
-    int page_size, prot;
-    bool relocation;
-    hwaddr raddr;
-
-    assert(!(msr_hv && cpu->vhyp));
-    assert((rwx == 0) || (rwx == 1) || (rwx == 2));
-
-    relocation = ((rwx == 2) && (msr_ir == 1)) || ((rwx != 2) && (msr_dr == 1));
-    /* HV or virtual hypervisor Real Mode Access */
-    if (!relocation && (msr_hv || cpu->vhyp)) {
-        /* In real mode top 4 effective addr bits (mostly) ignored */
-        raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
-
-        /* In HV mode, add HRMOR if top EA bit is clear */
-        if (msr_hv || !env->has_hv_mode) {
-            if (!(eaddr >> 63)) {
-                raddr |= env->spr[SPR_HRMOR];
-           }
-        }
-        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                     PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
-                     TARGET_PAGE_SIZE);
-        return 0;
-    }
-
-    /*
-     * Check UPRT (we avoid the check in real mode to deal with
-     * transitional states during kexec.
-     */
-    if (!ppc64_use_proc_tbl(cpu)) {
-        qemu_log_mask(LOG_GUEST_ERROR,
-                      "LPCR:UPRT not set in radix mode ! LPCR="
-                      TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
-    }
-
-    /* Translate eaddr to raddr (where raddr is addr qemu needs for access) */
-    if (ppc_radix64_xlate(cpu, eaddr, rwx, relocation, &raddr,
-                          &page_size, &prot, true)) {
-        return 1;
-    }
-
-    tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
-                 prot, mmu_idx, 1UL << page_size);
-    return 0;
-}
-
-hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong eaddr)
-{
-    CPUPPCState *env = &cpu->env;
-    int psize, prot;
-    hwaddr raddr;
-
-    /* Handle Real Mode */
-    if ((msr_dr == 0) && (msr_hv || cpu->vhyp)) {
-        /* In real mode top 4 effective addr bits (mostly) ignored */
-        return eaddr & 0x0FFFFFFFFFFFFFFFULL;
-    }
-
-    if (ppc_radix64_xlate(cpu, eaddr, 0, msr_dr, &raddr, &psize,
-                          &prot, false)) {
-        return -1;
-    }
-
-    return raddr & TARGET_PAGE_MASK;
+    return true;
 }
diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
index f28c5794d07..b70357cf345 100644
--- a/target/ppc/mmu-radix64.h
+++ b/target/ppc/mmu-radix64.h
@@ -44,9 +44,9 @@
 
 #ifdef TARGET_PPC64
 
-int ppc_radix64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr, int rwx,
-                                 int mmu_idx);
-hwaddr ppc_radix64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
+bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                       hwaddr *raddr, int *psizep, int *protp, int mmu_idx,
+                       bool guest_visible);
 
 static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
 {
diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
new file mode 100644
index 00000000000..754509e556c
--- /dev/null
+++ b/target/ppc/mmu_common.c
@@ -0,0 +1,1620 @@
+/*
+ *  PowerPC MMU, TLB, SLB and BAT emulation helpers for QEMU.
+ *
+ *  Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "cpu.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "mmu-hash64.h"
+#include "mmu-hash32.h"
+#include "exec/exec-all.h"
+#include "exec/log.h"
+#include "helper_regs.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/qemu-print.h"
+#include "internal.h"
+#include "mmu-book3s-v3.h"
+#include "mmu-radix64.h"
+
+/* #define DEBUG_MMU */
+/* #define DEBUG_BATS */
+/* #define DEBUG_SOFTWARE_TLB */
+/* #define DUMP_PAGE_TABLES */
+/* #define FLUSH_ALL_TLBS */
+
+#ifdef DEBUG_MMU
+#  define LOG_MMU_STATE(cpu) log_cpu_state_mask(CPU_LOG_MMU, (cpu), 0)
+#else
+#  define LOG_MMU_STATE(cpu) do { } while (0)
+#endif
+
+#ifdef DEBUG_SOFTWARE_TLB
+#  define LOG_SWTLB(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
+#else
+#  define LOG_SWTLB(...) do { } while (0)
+#endif
+
+#ifdef DEBUG_BATS
+#  define LOG_BATS(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
+#else
+#  define LOG_BATS(...) do { } while (0)
+#endif
+
+void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
+    assert(!cpu->env.has_hv_mode || !cpu->vhyp);
+#if defined(TARGET_PPC64)
+    if (mmu_is_64bit(env->mmu_model)) {
+        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+        target_ulong htabsize = value & SDR_64_HTABSIZE;
+
+        if (value & ~sdr_mask) {
+            qemu_log_mask(LOG_GUEST_ERROR, "Invalid bits 0x"TARGET_FMT_lx
+                     " set in SDR1", value & ~sdr_mask);
+            value &= sdr_mask;
+        }
+        if (htabsize > 28) {
+            qemu_log_mask(LOG_GUEST_ERROR, "Invalid HTABSIZE 0x" TARGET_FMT_lx
+                     " stored in SDR1", htabsize);
+            return;
+        }
+    }
+#endif /* defined(TARGET_PPC64) */
+    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+    env->spr[SPR_SDR1] = value;
+}
+
+/*****************************************************************************/
+/* PowerPC MMU emulation */
+
+static int pp_check(int key, int pp, int nx)
+{
+    int access;
+
+    /* Compute access rights */
+    access = 0;
+    if (key == 0) {
+        switch (pp) {
+        case 0x0:
+        case 0x1:
+        case 0x2:
+            access |= PAGE_WRITE;
+            /* fall through */
+        case 0x3:
+            access |= PAGE_READ;
+            break;
+        }
+    } else {
+        switch (pp) {
+        case 0x0:
+            access = 0;
+            break;
+        case 0x1:
+        case 0x3:
+            access = PAGE_READ;
+            break;
+        case 0x2:
+            access = PAGE_READ | PAGE_WRITE;
+            break;
+        }
+    }
+    if (nx == 0) {
+        access |= PAGE_EXEC;
+    }
+
+    return access;
+}
+
+static int check_prot(int prot, MMUAccessType access_type)
+{
+    return prot & prot_for_access_type(access_type) ? 0 : -2;
+}
+
+int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
+                                    int way, int is_code)
+{
+    int nr;
+
+    /* Select TLB num in a way from address */
+    nr = (eaddr >> TARGET_PAGE_BITS) & (env->tlb_per_way - 1);
+    /* Select TLB way */
+    nr += env->tlb_per_way * way;
+    /* 6xx have separate TLBs for instructions and data */
+    if (is_code && env->id_tlbs == 1) {
+        nr += env->nb_tlb;
+    }
+
+    return nr;
+}
+
+static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
+                                target_ulong pte1, int h,
+                                MMUAccessType access_type)
+{
+    target_ulong ptem, mmask;
+    int access, ret, pteh, ptev, pp;
+
+    ret = -1;
+    /* Check validity and table match */
+    ptev = pte_is_valid(pte0);
+    pteh = (pte0 >> 6) & 1;
+    if (ptev && h == pteh) {
+        /* Check vsid & api */
+        ptem = pte0 & PTE_PTEM_MASK;
+        mmask = PTE_CHECK_MASK;
+        pp = pte1 & 0x00000003;
+        if (ptem == ctx->ptem) {
+            if (ctx->raddr != (hwaddr)-1ULL) {
+                /* all matches should have equal RPN, WIMG & PP */
+                if ((ctx->raddr & mmask) != (pte1 & mmask)) {
+                    qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
+                    return -3;
+                }
+            }
+            /* Compute access rights */
+            access = pp_check(ctx->key, pp, ctx->nx);
+            /* Keep the matching PTE information */
+            ctx->raddr = pte1;
+            ctx->prot = access;
+            ret = check_prot(ctx->prot, access_type);
+            if (ret == 0) {
+                /* Access granted */
+                qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
+            } else {
+                /* Access right violation */
+                qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
+            }
+        }
+    }
+
+    return ret;
+}
+
+static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,
+                            int ret, MMUAccessType access_type)
+{
+    int store = 0;
+
+    /* Update page flags */
+    if (!(*pte1p & 0x00000100)) {
+        /* Update accessed flag */
+        *pte1p |= 0x00000100;
+        store = 1;
+    }
+    if (!(*pte1p & 0x00000080)) {
+        if (access_type == MMU_DATA_STORE && ret == 0) {
+            /* Update changed flag */
+            *pte1p |= 0x00000080;
+            store = 1;
+        } else {
+            /* Force page fault for first write access */
+            ctx->prot &= ~PAGE_WRITE;
+        }
+    }
+
+    return store;
+}
+
+/* Software driven TLB helpers */
+
+static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
+                            target_ulong eaddr, MMUAccessType access_type)
+{
+    ppc6xx_tlb_t *tlb;
+    int nr, best, way;
+    int ret;
+
+    best = -1;
+    ret = -1; /* No TLB found */
+    for (way = 0; way < env->nb_ways; way++) {
+        nr = ppc6xx_tlb_getnum(env, eaddr, way, access_type == MMU_INST_FETCH);
+        tlb = &env->tlb.tlb6[nr];
+        /* This test "emulates" the PTE index match for hardware TLBs */
+        if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) {
+            LOG_SWTLB("TLB %d/%d %s [" TARGET_FMT_lx " " TARGET_FMT_lx
+                      "] <> " TARGET_FMT_lx "\n", nr, env->nb_tlb,
+                      pte_is_valid(tlb->pte0) ? "valid" : "inval",
+                      tlb->EPN, tlb->EPN + TARGET_PAGE_SIZE, eaddr);
+            continue;
+        }
+        LOG_SWTLB("TLB %d/%d %s " TARGET_FMT_lx " <> " TARGET_FMT_lx " "
+                  TARGET_FMT_lx " %c %c\n", nr, env->nb_tlb,
+                  pte_is_valid(tlb->pte0) ? "valid" : "inval",
+                  tlb->EPN, eaddr, tlb->pte1,
+                  access_type == MMU_DATA_STORE ? 'S' : 'L',
+                  access_type == MMU_INST_FETCH ? 'I' : 'D');
+        switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
+                                     0, access_type)) {
+        case -3:
+            /* TLB inconsistency */
+            return -1;
+        case -2:
+            /* Access violation */
+            ret = -2;
+            best = nr;
+            break;
+        case -1:
+        default:
+            /* No match */
+            break;
+        case 0:
+            /* access granted */
+            /*
+             * XXX: we should go on looping to check all TLBs
+             *      consistency but we can speed-up the whole thing as
+             *      the result would be undefined if TLBs are not
+             *      consistent.
+             */
+            ret = 0;
+            best = nr;
+            goto done;
+        }
+    }
+    if (best != -1) {
+    done:
+        LOG_SWTLB("found TLB at addr " TARGET_FMT_plx " prot=%01x ret=%d\n",
+                  ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
+        /* Update page flags */
+        pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type);
+    }
+
+    return ret;
+}
+
+/* Perform BAT hit & translation */
+static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp,
+                                 int *validp, int *protp, target_ulong *BATu,
+                                 target_ulong *BATl)
+{
+    target_ulong bl;
+    int pp, valid, prot;
+
+    bl = (*BATu & 0x00001FFC) << 15;
+    valid = 0;
+    prot = 0;
+    if (((msr_pr == 0) && (*BATu & 0x00000002)) ||
+        ((msr_pr != 0) && (*BATu & 0x00000001))) {
+        valid = 1;
+        pp = *BATl & 0x00000003;
+        if (pp != 0) {
+            prot = PAGE_READ | PAGE_EXEC;
+            if (pp == 0x2) {
+                prot |= PAGE_WRITE;
+            }
+        }
+    }
+    *blp = bl;
+    *validp = valid;
+    *protp = prot;
+}
+
+static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                           target_ulong virtual, MMUAccessType access_type)
+{
+    target_ulong *BATlt, *BATut, *BATu, *BATl;
+    target_ulong BEPIl, BEPIu, bl;
+    int i, valid, prot;
+    int ret = -1;
+    bool ifetch = access_type == MMU_INST_FETCH;
+
+    LOG_BATS("%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
+             ifetch ? 'I' : 'D', virtual);
+    if (ifetch) {
+        BATlt = env->IBAT[1];
+        BATut = env->IBAT[0];
+    } else {
+        BATlt = env->DBAT[1];
+        BATut = env->DBAT[0];
+    }
+    for (i = 0; i < env->nb_BATs; i++) {
+        BATu = &BATut[i];
+        BATl = &BATlt[i];
+        BEPIu = *BATu & 0xF0000000;
+        BEPIl = *BATu & 0x0FFE0000;
+        bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
+        LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
+                 " BATl " TARGET_FMT_lx "\n", __func__,
+                 ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
+        if ((virtual & 0xF0000000) == BEPIu &&
+            ((virtual & 0x0FFE0000) & ~bl) == BEPIl) {
+            /* BAT matches */
+            if (valid != 0) {
+                /* Get physical address */
+                ctx->raddr = (*BATl & 0xF0000000) |
+                    ((virtual & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) |
+                    (virtual & 0x0001F000);
+                /* Compute access rights */
+                ctx->prot = prot;
+                ret = check_prot(ctx->prot, access_type);
+                if (ret == 0) {
+                    LOG_BATS("BAT %d match: r " TARGET_FMT_plx " prot=%c%c\n",
+                             i, ctx->raddr, ctx->prot & PAGE_READ ? 'R' : '-',
+                             ctx->prot & PAGE_WRITE ? 'W' : '-');
+                }
+                break;
+            }
+        }
+    }
+    if (ret < 0) {
+#if defined(DEBUG_BATS)
+        if (qemu_log_enabled()) {
+            LOG_BATS("no BAT match for " TARGET_FMT_lx ":\n", virtual);
+            for (i = 0; i < 4; i++) {
+                BATu = &BATut[i];
+                BATl = &BATlt[i];
+                BEPIu = *BATu & 0xF0000000;
+                BEPIl = *BATu & 0x0FFE0000;
+                bl = (*BATu & 0x00001FFC) << 15;
+                LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
+                         " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
+                         TARGET_FMT_lx " " TARGET_FMT_lx "\n",
+                         __func__, ifetch ? 'I' : 'D', i, virtual,
+                         *BATu, *BATl, BEPIu, BEPIl, bl);
+            }
+        }
+#endif
+    }
+    /* No hit */
+    return ret;
+}
+
+/* Perform segment based translation */
+static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                               target_ulong eaddr, MMUAccessType access_type,
+                               int type)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    hwaddr hash;
+    target_ulong vsid;
+    int ds, pr, target_page_bits;
+    int ret;
+    target_ulong sr, pgidx;
+
+    pr = msr_pr;
+    ctx->eaddr = eaddr;
+
+    sr = env->sr[eaddr >> 28];
+    ctx->key = (((sr & 0x20000000) && (pr != 0)) ||
+                ((sr & 0x40000000) && (pr == 0))) ? 1 : 0;
+    ds = sr & 0x80000000 ? 1 : 0;
+    ctx->nx = sr & 0x10000000 ? 1 : 0;
+    vsid = sr & 0x00FFFFFF;
+    target_page_bits = TARGET_PAGE_BITS;
+    qemu_log_mask(CPU_LOG_MMU,
+            "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
+            " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
+            " ir=%d dr=%d pr=%d %d t=%d\n",
+            eaddr, (int)(eaddr >> 28), sr, env->nip, env->lr, (int)msr_ir,
+            (int)msr_dr, pr != 0 ? 1 : 0, access_type == MMU_DATA_STORE, type);
+    pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits;
+    hash = vsid ^ pgidx;
+    ctx->ptem = (vsid << 7) | (pgidx >> 10);
+
+    qemu_log_mask(CPU_LOG_MMU,
+            "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
+            ctx->key, ds, ctx->nx, vsid);
+    ret = -1;
+    if (!ds) {
+        /* Check if instruction fetch is allowed, if needed */
+        if (type != ACCESS_CODE || ctx->nx == 0) {
+            /* Page address translation */
+            qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
+                    " htab_mask " TARGET_FMT_plx
+                    " hash " TARGET_FMT_plx "\n",
+                    ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
+            ctx->hash[0] = hash;
+            ctx->hash[1] = ~hash;
+
+            /* Initialize real address with an invalid value */
+            ctx->raddr = (hwaddr)-1ULL;
+            /* Software TLB search */
+            ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type);
+#if defined(DUMP_PAGE_TABLES)
+            if (qemu_loglevel_mask(CPU_LOG_MMU)) {
+                CPUState *cs = env_cpu(env);
+                hwaddr curaddr;
+                uint32_t a0, a1, a2, a3;
+
+                qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
+                         "\n", ppc_hash32_hpt_base(cpu),
+                         ppc_hash32_hpt_mask(cpu) + 0x80);
+                for (curaddr = ppc_hash32_hpt_base(cpu);
+                     curaddr < (ppc_hash32_hpt_base(cpu)
+                                + ppc_hash32_hpt_mask(cpu) + 0x80);
+                     curaddr += 16) {
+                    a0 = ldl_phys(cs->as, curaddr);
+                    a1 = ldl_phys(cs->as, curaddr + 4);
+                    a2 = ldl_phys(cs->as, curaddr + 8);
+                    a3 = ldl_phys(cs->as, curaddr + 12);
+                    if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
+                        qemu_log(TARGET_FMT_plx ": %08x %08x %08x %08x\n",
+                                 curaddr, a0, a1, a2, a3);
+                    }
+                }
+            }
+#endif
+        } else {
+            qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
+            ret = -3;
+        }
+    } else {
+        target_ulong sr;
+
+        qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
+        /* Direct-store segment : absolutely *BUGGY* for now */
+
+        /*
+         * Direct-store implies a 32-bit MMU.
+         * Check the Segment Register's bus unit ID (BUID).
+         */
+        sr = env->sr[eaddr >> 28];
+        if ((sr & 0x1FF00000) >> 20 == 0x07f) {
+            /*
+             * Memory-forced I/O controller interface access
+             *
+             * If T=1 and BUID=x'07F', the 601 performs a memory
+             * access to SR[28-31] LA[4-31], bypassing all protection
+             * mechanisms.
+             */
+            ctx->raddr = ((sr & 0xF) << 28) | (eaddr & 0x0FFFFFFF);
+            ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+            return 0;
+        }
+
+        switch (type) {
+        case ACCESS_INT:
+            /* Integer load/store : only access allowed */
+            break;
+        case ACCESS_CODE:
+            /* No code fetch is allowed in direct-store areas */
+            return -4;
+        case ACCESS_FLOAT:
+            /* Floating point load/store */
+            return -4;
+        case ACCESS_RES:
+            /* lwarx, ldarx or srwcx. */
+            return -4;
+        case ACCESS_CACHE:
+            /*
+             * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
+             *
+             * Should make the instruction do no-op.  As it already do
+             * no-op, it's quite easy :-)
+             */
+            ctx->raddr = eaddr;
+            return 0;
+        case ACCESS_EXT:
+            /* eciwx or ecowx */
+            return -4;
+        default:
+            qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need "
+                          "address translation\n");
+            return -4;
+        }
+        if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
+            (access_type == MMU_DATA_LOAD || ctx->key != 0)) {
+            ctx->raddr = eaddr;
+            ret = 2;
+        } else {
+            ret = -2;
+        }
+    }
+
+    return ret;
+}
+
+/* Generic TLB check function for embedded PowerPC implementations */
+int ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb,
+                            hwaddr *raddrp,
+                            target_ulong address, uint32_t pid, int ext,
+                            int i)
+{
+    target_ulong mask;
+
+    /* Check valid flag */
+    if (!(tlb->prot & PAGE_VALID)) {
+        return -1;
+    }
+    mask = ~(tlb->size - 1);
+    LOG_SWTLB("%s: TLB %d address " TARGET_FMT_lx " PID %u <=> " TARGET_FMT_lx
+              " " TARGET_FMT_lx " %u %x\n", __func__, i, address, pid, tlb->EPN,
+              mask, (uint32_t)tlb->PID, tlb->prot);
+    /* Check PID */
+    if (tlb->PID != 0 && tlb->PID != pid) {
+        return -1;
+    }
+    /* Check effective address */
+    if ((address & mask) != tlb->EPN) {
+        return -1;
+    }
+    *raddrp = (tlb->RPN & mask) | (address & ~mask);
+    if (ext) {
+        /* Extend the physical address to 36 bits */
+        *raddrp |= (uint64_t)(tlb->RPN & 0xF) << 32;
+    }
+
+    return 0;
+}
+
+static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+                                       target_ulong address,
+                                       MMUAccessType access_type)
+{
+    ppcemb_tlb_t *tlb;
+    hwaddr raddr;
+    int i, ret, zsel, zpr, pr;
+
+    ret = -1;
+    raddr = (hwaddr)-1ULL;
+    pr = msr_pr;
+    for (i = 0; i < env->nb_tlb; i++) {
+        tlb = &env->tlb.tlbe[i];
+        if (ppcemb_tlb_check(env, tlb, &raddr, address,
+                             env->spr[SPR_40x_PID], 0, i) < 0) {
+            continue;
+        }
+        zsel = (tlb->attr >> 4) & 0xF;
+        zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3;
+        LOG_SWTLB("%s: TLB %d zsel %d zpr %d ty %d attr %08x\n",
+                    __func__, i, zsel, zpr, access_type, tlb->attr);
+        /* Check execute enable bit */
+        switch (zpr) {
+        case 0x2:
+            if (pr != 0) {
+                goto check_perms;
+            }
+            /* fall through */
+        case 0x3:
+            /* All accesses granted */
+            ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+            ret = 0;
+            break;
+        case 0x0:
+            if (pr != 0) {
+                /* Raise Zone protection fault.  */
+                env->spr[SPR_40x_ESR] = 1 << 22;
+                ctx->prot = 0;
+                ret = -2;
+                break;
+            }
+            /* fall through */
+        case 0x1:
+        check_perms:
+            /* Check from TLB entry */
+            ctx->prot = tlb->prot;
+            ret = check_prot(ctx->prot, access_type);
+            if (ret == -2) {
+                env->spr[SPR_40x_ESR] = 0;
+            }
+            break;
+        }
+        if (ret >= 0) {
+            ctx->raddr = raddr;
+            LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
+                      " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
+                      ret);
+            return 0;
+        }
+    }
+    LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
+              " %d %d\n", __func__, address, raddr, ctx->prot, ret);
+
+    return ret;
+}
+
+static int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb,
+                              hwaddr *raddr, int *prot, target_ulong address,
+                              MMUAccessType access_type, int i)
+{
+    int prot2;
+
+    if (ppcemb_tlb_check(env, tlb, raddr, address,
+                         env->spr[SPR_BOOKE_PID],
+                         !env->nb_pids, i) >= 0) {
+        goto found_tlb;
+    }
+
+    if (env->spr[SPR_BOOKE_PID1] &&
+        ppcemb_tlb_check(env, tlb, raddr, address,
+                         env->spr[SPR_BOOKE_PID1], 0, i) >= 0) {
+        goto found_tlb;
+    }
+
+    if (env->spr[SPR_BOOKE_PID2] &&
+        ppcemb_tlb_check(env, tlb, raddr, address,
+                         env->spr[SPR_BOOKE_PID2], 0, i) >= 0) {
+        goto found_tlb;
+    }
+
+    LOG_SWTLB("%s: TLB entry not found\n", __func__);
+    return -1;
+
+found_tlb:
+
+    if (msr_pr != 0) {
+        prot2 = tlb->prot & 0xF;
+    } else {
+        prot2 = (tlb->prot >> 4) & 0xF;
+    }
+
+    /* Check the address space */
+    if ((access_type == MMU_INST_FETCH ? msr_ir : msr_dr) != (tlb->attr & 1)) {
+        LOG_SWTLB("%s: AS doesn't match\n", __func__);
+        return -1;
+    }
+
+    *prot = prot2;
+    if (prot2 & prot_for_access_type(access_type)) {
+        LOG_SWTLB("%s: good TLB!\n", __func__);
+        return 0;
+    }
+
+    LOG_SWTLB("%s: no prot match: %x\n", __func__, prot2);
+    return access_type == MMU_INST_FETCH ? -3 : -2;
+}
+
+static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+                                         target_ulong address,
+                                         MMUAccessType access_type)
+{
+    ppcemb_tlb_t *tlb;
+    hwaddr raddr;
+    int i, ret;
+
+    ret = -1;
+    raddr = (hwaddr)-1ULL;
+    for (i = 0; i < env->nb_tlb; i++) {
+        tlb = &env->tlb.tlbe[i];
+        ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address,
+                                 access_type, i);
+        if (ret != -1) {
+            break;
+        }
+    }
+
+    if (ret >= 0) {
+        ctx->raddr = raddr;
+        LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
+                  " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
+                  ret);
+    } else {
+        LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
+                  " %d %d\n", __func__, address, raddr, ctx->prot, ret);
+    }
+
+    return ret;
+}
+
+hwaddr booke206_tlb_to_page_size(CPUPPCState *env,
+                                        ppcmas_tlb_t *tlb)
+{
+    int tlbm_size;
+
+    tlbm_size = (tlb->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+
+    return 1024ULL << tlbm_size;
+}
+
+/* TLB check function for MAS based SoftTLBs */
+int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb,
+                            hwaddr *raddrp, target_ulong address,
+                            uint32_t pid)
+{
+    hwaddr mask;
+    uint32_t tlb_pid;
+
+    if (!msr_cm) {
+        /* In 32bit mode we can only address 32bit EAs */
+        address = (uint32_t)address;
+    }
+
+    /* Check valid flag */
+    if (!(tlb->mas1 & MAS1_VALID)) {
+        return -1;
+    }
+
+    mask = ~(booke206_tlb_to_page_size(env, tlb) - 1);
+    LOG_SWTLB("%s: TLB ADDR=0x" TARGET_FMT_lx " PID=0x%x MAS1=0x%x MAS2=0x%"
+              PRIx64 " mask=0x%" HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%"
+              PRIx32 "\n", __func__, address, pid, tlb->mas1, tlb->mas2, mask,
+              tlb->mas7_3, tlb->mas8);
+
+    /* Check PID */
+    tlb_pid = (tlb->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT;
+    if (tlb_pid != 0 && tlb_pid != pid) {
+        return -1;
+    }
+
+    /* Check effective address */
+    if ((address & mask) != (tlb->mas2 & MAS2_EPN_MASK)) {
+        return -1;
+    }
+
+    if (raddrp) {
+        *raddrp = (tlb->mas7_3 & mask) | (address & ~mask);
+    }
+
+    return 0;
+}
+
+static bool is_epid_mmu(int mmu_idx)
+{
+    return mmu_idx == PPC_TLB_EPID_STORE || mmu_idx == PPC_TLB_EPID_LOAD;
+}
+
+static uint32_t mmubooke206_esr(int mmu_idx, MMUAccessType access_type)
+{
+    uint32_t esr = 0;
+    if (access_type == MMU_DATA_STORE) {
+        esr |= ESR_ST;
+    }
+    if (is_epid_mmu(mmu_idx)) {
+        esr |= ESR_EPID;
+    }
+    return esr;
+}
+
+/*
+ * Get EPID register given the mmu_idx. If this is regular load,
+ * construct the EPID access bits from current processor state
+ *
+ * Get the effective AS and PR bits and the PID. The PID is returned
+ * only if EPID load is requested, otherwise the caller must detect
+ * the correct EPID.  Return true if valid EPID is returned.
+ */
+static bool mmubooke206_get_as(CPUPPCState *env,
+                               int mmu_idx, uint32_t *epid_out,
+                               bool *as_out, bool *pr_out)
+{
+    if (is_epid_mmu(mmu_idx)) {
+        uint32_t epidr;
+        if (mmu_idx == PPC_TLB_EPID_STORE) {
+            epidr = env->spr[SPR_BOOKE_EPSC];
+        } else {
+            epidr = env->spr[SPR_BOOKE_EPLC];
+        }
+        *epid_out = (epidr & EPID_EPID) >> EPID_EPID_SHIFT;
+        *as_out = !!(epidr & EPID_EAS);
+        *pr_out = !!(epidr & EPID_EPR);
+        return true;
+    } else {
+        *as_out = msr_ds;
+        *pr_out = msr_pr;
+        return false;
+    }
+}
+
+/* Check if the tlb found by hashing really matches */
+static int mmubooke206_check_tlb(CPUPPCState *env, ppcmas_tlb_t *tlb,
+                                 hwaddr *raddr, int *prot,
+                                 target_ulong address,
+                                 MMUAccessType access_type, int mmu_idx)
+{
+    int prot2 = 0;
+    uint32_t epid;
+    bool as, pr;
+    bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr);
+
+    if (!use_epid) {
+        if (ppcmas_tlb_check(env, tlb, raddr, address,
+                             env->spr[SPR_BOOKE_PID]) >= 0) {
+            goto found_tlb;
+        }
+
+        if (env->spr[SPR_BOOKE_PID1] &&
+            ppcmas_tlb_check(env, tlb, raddr, address,
+                             env->spr[SPR_BOOKE_PID1]) >= 0) {
+            goto found_tlb;
+        }
+
+        if (env->spr[SPR_BOOKE_PID2] &&
+            ppcmas_tlb_check(env, tlb, raddr, address,
+                             env->spr[SPR_BOOKE_PID2]) >= 0) {
+            goto found_tlb;
+        }
+    } else {
+        if (ppcmas_tlb_check(env, tlb, raddr, address, epid) >= 0) {
+            goto found_tlb;
+        }
+    }
+
+    LOG_SWTLB("%s: TLB entry not found\n", __func__);
+    return -1;
+
+found_tlb:
+
+    if (pr) {
+        if (tlb->mas7_3 & MAS3_UR) {
+            prot2 |= PAGE_READ;
+        }
+        if (tlb->mas7_3 & MAS3_UW) {
+            prot2 |= PAGE_WRITE;
+        }
+        if (tlb->mas7_3 & MAS3_UX) {
+            prot2 |= PAGE_EXEC;
+        }
+    } else {
+        if (tlb->mas7_3 & MAS3_SR) {
+            prot2 |= PAGE_READ;
+        }
+        if (tlb->mas7_3 & MAS3_SW) {
+            prot2 |= PAGE_WRITE;
+        }
+        if (tlb->mas7_3 & MAS3_SX) {
+            prot2 |= PAGE_EXEC;
+        }
+    }
+
+    /* Check the address space and permissions */
+    if (access_type == MMU_INST_FETCH) {
+        /* There is no way to fetch code using epid load */
+        assert(!use_epid);
+        as = msr_ir;
+    }
+
+    if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
+        LOG_SWTLB("%s: AS doesn't match\n", __func__);
+        return -1;
+    }
+
+    *prot = prot2;
+    if (prot2 & prot_for_access_type(access_type)) {
+        LOG_SWTLB("%s: good TLB!\n", __func__);
+        return 0;
+    }
+
+    LOG_SWTLB("%s: no prot match: %x\n", __func__, prot2);
+    return access_type == MMU_INST_FETCH ? -3 : -2;
+}
+
+static int mmubooke206_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+                                            target_ulong address,
+                                            MMUAccessType access_type,
+                                            int mmu_idx)
+{
+    ppcmas_tlb_t *tlb;
+    hwaddr raddr;
+    int i, j, ret;
+
+    ret = -1;
+    raddr = (hwaddr)-1ULL;
+
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        int ways = booke206_tlb_ways(env, i);
+
+        for (j = 0; j < ways; j++) {
+            tlb = booke206_get_tlbm(env, i, address, j);
+            if (!tlb) {
+                continue;
+            }
+            ret = mmubooke206_check_tlb(env, tlb, &raddr, &ctx->prot, address,
+                                        access_type, mmu_idx);
+            if (ret != -1) {
+                goto found_tlb;
+            }
+        }
+    }
+
+found_tlb:
+
+    if (ret >= 0) {
+        ctx->raddr = raddr;
+        LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
+                  " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
+                  ret);
+    } else {
+        LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
+                  " %d %d\n", __func__, address, raddr, ctx->prot, ret);
+    }
+
+    return ret;
+}
+
+static const char *book3e_tsize_to_str[32] = {
+    "1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
+    "1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M",
+    "1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G",
+    "1T", "2T"
+};
+
+static void mmubooke_dump_mmu(CPUPPCState *env)
+{
+    ppcemb_tlb_t *entry;
+    int i;
+
+    if (kvm_enabled() && !env->kvm_sw_tlb) {
+        qemu_printf("Cannot access KVM TLB\n");
+        return;
+    }
+
+    qemu_printf("\nTLB:\n");
+    qemu_printf("Effective          Physical           Size PID   Prot     "
+                "Attr\n");
+
+    entry = &env->tlb.tlbe[0];
+    for (i = 0; i < env->nb_tlb; i++, entry++) {
+        hwaddr ea, pa;
+        target_ulong mask;
+        uint64_t size = (uint64_t)entry->size;
+        char size_buf[20];
+
+        /* Check valid flag */
+        if (!(entry->prot & PAGE_VALID)) {
+            continue;
+        }
+
+        mask = ~(entry->size - 1);
+        ea = entry->EPN & mask;
+        pa = entry->RPN & mask;
+        /* Extend the physical address to 36 bits */
+        pa |= (hwaddr)(entry->RPN & 0xF) << 32;
+        if (size >= 1 * MiB) {
+            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "M", size / MiB);
+        } else {
+            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "k", size / KiB);
+        }
+        qemu_printf("0x%016" PRIx64 " 0x%016" PRIx64 " %s %-5u %08x %08x\n",
+                    (uint64_t)ea, (uint64_t)pa, size_buf, (uint32_t)entry->PID,
+                    entry->prot, entry->attr);
+    }
+
+}
+
+static void mmubooke206_dump_one_tlb(CPUPPCState *env, int tlbn, int offset,
+                                     int tlbsize)
+{
+    ppcmas_tlb_t *entry;
+    int i;
+
+    qemu_printf("\nTLB%d:\n", tlbn);
+    qemu_printf("Effective          Physical           Size TID   TS SRWX"
+                " URWX WIMGE U0123\n");
+
+    entry = &env->tlb.tlbm[offset];
+    for (i = 0; i < tlbsize; i++, entry++) {
+        hwaddr ea, pa, size;
+        int tsize;
+
+        if (!(entry->mas1 & MAS1_VALID)) {
+            continue;
+        }
+
+        tsize = (entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
+        size = 1024ULL << tsize;
+        ea = entry->mas2 & ~(size - 1);
+        pa = entry->mas7_3 & ~(size - 1);
+
+        qemu_printf("0x%016" PRIx64 " 0x%016" PRIx64 " %4s %-5u %1u  S%c%c%c"
+                    "U%c%c%c %c%c%c%c%c U%c%c%c%c\n",
+                    (uint64_t)ea, (uint64_t)pa,
+                    book3e_tsize_to_str[tsize],
+                    (entry->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT,
+                    (entry->mas1 & MAS1_TS) >> MAS1_TS_SHIFT,
+                    entry->mas7_3 & MAS3_SR ? 'R' : '-',
+                    entry->mas7_3 & MAS3_SW ? 'W' : '-',
+                    entry->mas7_3 & MAS3_SX ? 'X' : '-',
+                    entry->mas7_3 & MAS3_UR ? 'R' : '-',
+                    entry->mas7_3 & MAS3_UW ? 'W' : '-',
+                    entry->mas7_3 & MAS3_UX ? 'X' : '-',
+                    entry->mas2 & MAS2_W ? 'W' : '-',
+                    entry->mas2 & MAS2_I ? 'I' : '-',
+                    entry->mas2 & MAS2_M ? 'M' : '-',
+                    entry->mas2 & MAS2_G ? 'G' : '-',
+                    entry->mas2 & MAS2_E ? 'E' : '-',
+                    entry->mas7_3 & MAS3_U0 ? '0' : '-',
+                    entry->mas7_3 & MAS3_U1 ? '1' : '-',
+                    entry->mas7_3 & MAS3_U2 ? '2' : '-',
+                    entry->mas7_3 & MAS3_U3 ? '3' : '-');
+    }
+}
+
+static void mmubooke206_dump_mmu(CPUPPCState *env)
+{
+    int offset = 0;
+    int i;
+
+    if (kvm_enabled() && !env->kvm_sw_tlb) {
+        qemu_printf("Cannot access KVM TLB\n");
+        return;
+    }
+
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        int size = booke206_tlb_size(env, i);
+
+        if (size == 0) {
+            continue;
+        }
+
+        mmubooke206_dump_one_tlb(env, i, offset, size);
+        offset += size;
+    }
+}
+
+static void mmu6xx_dump_BATs(CPUPPCState *env, int type)
+{
+    target_ulong *BATlt, *BATut, *BATu, *BATl;
+    target_ulong BEPIl, BEPIu, bl;
+    int i;
+
+    switch (type) {
+    case ACCESS_CODE:
+        BATlt = env->IBAT[1];
+        BATut = env->IBAT[0];
+        break;
+    default:
+        BATlt = env->DBAT[1];
+        BATut = env->DBAT[0];
+        break;
+    }
+
+    for (i = 0; i < env->nb_BATs; i++) {
+        BATu = &BATut[i];
+        BATl = &BATlt[i];
+        BEPIu = *BATu & 0xF0000000;
+        BEPIl = *BATu & 0x0FFE0000;
+        bl = (*BATu & 0x00001FFC) << 15;
+        qemu_printf("%s BAT%d BATu " TARGET_FMT_lx
+                    " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
+                    TARGET_FMT_lx " " TARGET_FMT_lx "\n",
+                    type == ACCESS_CODE ? "code" : "data", i,
+                    *BATu, *BATl, BEPIu, BEPIl, bl);
+    }
+}
+
+static void mmu6xx_dump_mmu(CPUPPCState *env)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc6xx_tlb_t *tlb;
+    target_ulong sr;
+    int type, way, entry, i;
+
+    qemu_printf("HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
+    qemu_printf("HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
+
+    qemu_printf("\nSegment registers:\n");
+    for (i = 0; i < 32; i++) {
+        sr = env->sr[i];
+        if (sr & 0x80000000) {
+            qemu_printf("%02d T=%d Ks=%d Kp=%d BUID=0x%03x "
+                        "CNTLR_SPEC=0x%05x\n", i,
+                        sr & 0x80000000 ? 1 : 0, sr & 0x40000000 ? 1 : 0,
+                        sr & 0x20000000 ? 1 : 0, (uint32_t)((sr >> 20) & 0x1FF),
+                        (uint32_t)(sr & 0xFFFFF));
+        } else {
+            qemu_printf("%02d T=%d Ks=%d Kp=%d N=%d VSID=0x%06x\n", i,
+                        sr & 0x80000000 ? 1 : 0, sr & 0x40000000 ? 1 : 0,
+                        sr & 0x20000000 ? 1 : 0, sr & 0x10000000 ? 1 : 0,
+                        (uint32_t)(sr & 0x00FFFFFF));
+        }
+    }
+
+    qemu_printf("\nBATs:\n");
+    mmu6xx_dump_BATs(env, ACCESS_INT);
+    mmu6xx_dump_BATs(env, ACCESS_CODE);
+
+    if (env->id_tlbs != 1) {
+        qemu_printf("ERROR: 6xx MMU should have separated TLB"
+                    " for code and data\n");
+    }
+
+    qemu_printf("\nTLBs                       [EPN    EPN + SIZE]\n");
+
+    for (type = 0; type < 2; type++) {
+        for (way = 0; way < env->nb_ways; way++) {
+            for (entry = env->nb_tlb * type + env->tlb_per_way * way;
+                 entry < (env->nb_tlb * type + env->tlb_per_way * (way + 1));
+                 entry++) {
+
+                tlb = &env->tlb.tlb6[entry];
+                qemu_printf("%s TLB %02d/%02d way:%d %s ["
+                            TARGET_FMT_lx " " TARGET_FMT_lx "]\n",
+                            type ? "code" : "data", entry % env->nb_tlb,
+                            env->nb_tlb, way,
+                            pte_is_valid(tlb->pte0) ? "valid" : "inval",
+                            tlb->EPN, tlb->EPN + TARGET_PAGE_SIZE);
+            }
+        }
+    }
+}
+
+void dump_mmu(CPUPPCState *env)
+{
+    switch (env->mmu_model) {
+    case POWERPC_MMU_BOOKE:
+        mmubooke_dump_mmu(env);
+        break;
+    case POWERPC_MMU_BOOKE206:
+        mmubooke206_dump_mmu(env);
+        break;
+    case POWERPC_MMU_SOFT_6xx:
+    case POWERPC_MMU_SOFT_74xx:
+        mmu6xx_dump_mmu(env);
+        break;
+#if defined(TARGET_PPC64)
+    case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_07:
+        dump_slb(env_archcpu(env));
+        break;
+    case POWERPC_MMU_3_00:
+        if (ppc64_v3_radix(env_archcpu(env))) {
+            qemu_log_mask(LOG_UNIMP, "%s: the PPC64 MMU is unsupported\n",
+                          __func__);
+        } else {
+            dump_slb(env_archcpu(env));
+        }
+        break;
+#endif
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented\n", __func__);
+    }
+}
+
+static int check_physical(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr,
+                          MMUAccessType access_type)
+{
+    int in_plb, ret;
+
+    ctx->raddr = eaddr;
+    ctx->prot = PAGE_READ | PAGE_EXEC;
+    ret = 0;
+    switch (env->mmu_model) {
+    case POWERPC_MMU_SOFT_6xx:
+    case POWERPC_MMU_SOFT_74xx:
+    case POWERPC_MMU_SOFT_4xx:
+    case POWERPC_MMU_REAL:
+    case POWERPC_MMU_BOOKE:
+        ctx->prot |= PAGE_WRITE;
+        break;
+
+    case POWERPC_MMU_SOFT_4xx_Z:
+        if (unlikely(msr_pe != 0)) {
+            /*
+             * 403 family add some particular protections, using
+             * PBL/PBU registers for accesses with no translation.
+             */
+            in_plb =
+                /* Check PLB validity */
+                (env->pb[0] < env->pb[1] &&
+                 /* and address in plb area */
+                 eaddr >= env->pb[0] && eaddr < env->pb[1]) ||
+                (env->pb[2] < env->pb[3] &&
+                 eaddr >= env->pb[2] && eaddr < env->pb[3]) ? 1 : 0;
+            if (in_plb ^ msr_px) {
+                /* Access in protected area */
+                if (access_type == MMU_DATA_STORE) {
+                    /* Access is not allowed */
+                    ret = -2;
+                }
+            } else {
+                /* Read-write access is allowed */
+                ctx->prot |= PAGE_WRITE;
+            }
+        }
+        break;
+
+    default:
+        /* Caller's checks mean we should never get here for other models */
+        abort();
+        return -1;
+    }
+
+    return ret;
+}
+
+int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx,
+                                     target_ulong eaddr,
+                                     MMUAccessType access_type, int type,
+                                     int mmu_idx)
+{
+    int ret = -1;
+    bool real_mode = (type == ACCESS_CODE && msr_ir == 0)
+        || (type != ACCESS_CODE && msr_dr == 0);
+
+    switch (env->mmu_model) {
+    case POWERPC_MMU_SOFT_6xx:
+    case POWERPC_MMU_SOFT_74xx:
+        if (real_mode) {
+            ret = check_physical(env, ctx, eaddr, access_type);
+        } else {
+            /* Try to find a BAT */
+            if (env->nb_BATs != 0) {
+                ret = get_bat_6xx_tlb(env, ctx, eaddr, access_type);
+            }
+            if (ret < 0) {
+                /* We didn't match any BAT entry or don't have BATs */
+                ret = get_segment_6xx_tlb(env, ctx, eaddr, access_type, type);
+            }
+        }
+        break;
+
+    case POWERPC_MMU_SOFT_4xx:
+    case POWERPC_MMU_SOFT_4xx_Z:
+        if (real_mode) {
+            ret = check_physical(env, ctx, eaddr, access_type);
+        } else {
+            ret = mmu40x_get_physical_address(env, ctx, eaddr, access_type);
+        }
+        break;
+    case POWERPC_MMU_BOOKE:
+        ret = mmubooke_get_physical_address(env, ctx, eaddr, access_type);
+        break;
+    case POWERPC_MMU_BOOKE206:
+        ret = mmubooke206_get_physical_address(env, ctx, eaddr, access_type,
+                                               mmu_idx);
+        break;
+    case POWERPC_MMU_MPC8xx:
+        /* XXX: TODO */
+        cpu_abort(env_cpu(env), "MPC8xx MMU model is not implemented\n");
+        break;
+    case POWERPC_MMU_REAL:
+        if (real_mode) {
+            ret = check_physical(env, ctx, eaddr, access_type);
+        } else {
+            cpu_abort(env_cpu(env),
+                      "PowerPC in real mode do not do any translation\n");
+        }
+        return -1;
+    default:
+        cpu_abort(env_cpu(env), "Unknown or invalid MMU model\n");
+        return -1;
+    }
+
+    return ret;
+}
+
+static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address,
+                                         MMUAccessType access_type, int mmu_idx)
+{
+    uint32_t epid;
+    bool as, pr;
+    uint32_t missed_tid = 0;
+    bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr);
+
+    if (access_type == MMU_INST_FETCH) {
+        as = msr_ir;
+    }
+    env->spr[SPR_BOOKE_MAS0] = env->spr[SPR_BOOKE_MAS4] & MAS4_TLBSELD_MASK;
+    env->spr[SPR_BOOKE_MAS1] = env->spr[SPR_BOOKE_MAS4] & MAS4_TSIZED_MASK;
+    env->spr[SPR_BOOKE_MAS2] = env->spr[SPR_BOOKE_MAS4] & MAS4_WIMGED_MASK;
+    env->spr[SPR_BOOKE_MAS3] = 0;
+    env->spr[SPR_BOOKE_MAS6] = 0;
+    env->spr[SPR_BOOKE_MAS7] = 0;
+
+    /* AS */
+    if (as) {
+        env->spr[SPR_BOOKE_MAS1] |= MAS1_TS;
+        env->spr[SPR_BOOKE_MAS6] |= MAS6_SAS;
+    }
+
+    env->spr[SPR_BOOKE_MAS1] |= MAS1_VALID;
+    env->spr[SPR_BOOKE_MAS2] |= address & MAS2_EPN_MASK;
+
+    if (!use_epid) {
+        switch (env->spr[SPR_BOOKE_MAS4] & MAS4_TIDSELD_PIDZ) {
+        case MAS4_TIDSELD_PID0:
+            missed_tid = env->spr[SPR_BOOKE_PID];
+            break;
+        case MAS4_TIDSELD_PID1:
+            missed_tid = env->spr[SPR_BOOKE_PID1];
+            break;
+        case MAS4_TIDSELD_PID2:
+            missed_tid = env->spr[SPR_BOOKE_PID2];
+            break;
+        }
+        env->spr[SPR_BOOKE_MAS6] |= env->spr[SPR_BOOKE_PID] << 16;
+    } else {
+        missed_tid = epid;
+        env->spr[SPR_BOOKE_MAS6] |= missed_tid << 16;
+    }
+    env->spr[SPR_BOOKE_MAS1] |= (missed_tid << MAS1_TID_SHIFT);
+
+
+    /* next victim logic */
+    env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_ESEL_SHIFT;
+    env->last_way++;
+    env->last_way &= booke206_tlb_ways(env, 0) - 1;
+    env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_NV_SHIFT;
+}
+
+/* Perform address translation */
+/* TODO: Split this by mmu_model. */
+static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr,
+                            MMUAccessType access_type,
+                            hwaddr *raddrp, int *psizep, int *protp,
+                            int mmu_idx, bool guest_visible)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    mmu_ctx_t ctx;
+    int type;
+    int ret;
+
+    if (access_type == MMU_INST_FETCH) {
+        /* code access */
+        type = ACCESS_CODE;
+    } else if (guest_visible) {
+        /* data access */
+        type = env->access_type;
+    } else {
+        type = ACCESS_INT;
+    }
+
+    ret = get_physical_address_wtlb(env, &ctx, eaddr, access_type,
+                                    type, mmu_idx);
+    if (ret == 0) {
+        *raddrp = ctx.raddr;
+        *protp = ctx.prot;
+        *psizep = TARGET_PAGE_BITS;
+        return true;
+    }
+
+    if (guest_visible) {
+        LOG_MMU_STATE(cs);
+        if (type == ACCESS_CODE) {
+            switch (ret) {
+            case -1:
+                /* No matches in page tables or TLB */
+                switch (env->mmu_model) {
+                case POWERPC_MMU_SOFT_6xx:
+                    cs->exception_index = POWERPC_EXCP_IFTLB;
+                    env->error_code = 1 << 18;
+                    env->spr[SPR_IMISS] = eaddr;
+                    env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem;
+                    goto tlb_miss;
+                case POWERPC_MMU_SOFT_74xx:
+                    cs->exception_index = POWERPC_EXCP_IFTLB;
+                    goto tlb_miss_74xx;
+                case POWERPC_MMU_SOFT_4xx:
+                case POWERPC_MMU_SOFT_4xx_Z:
+                    cs->exception_index = POWERPC_EXCP_ITLB;
+                    env->error_code = 0;
+                    env->spr[SPR_40x_DEAR] = eaddr;
+                    env->spr[SPR_40x_ESR] = 0x00000000;
+                    break;
+                case POWERPC_MMU_BOOKE206:
+                    booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx);
+                    /* fall through */
+                case POWERPC_MMU_BOOKE:
+                    cs->exception_index = POWERPC_EXCP_ITLB;
+                    env->error_code = 0;
+                    env->spr[SPR_BOOKE_DEAR] = eaddr;
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, MMU_DATA_LOAD);
+                    break;
+                case POWERPC_MMU_MPC8xx:
+                    cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
+                case POWERPC_MMU_REAL:
+                    cpu_abort(cs, "PowerPC in real mode should never raise "
+                              "any MMU exceptions\n");
+                default:
+                    cpu_abort(cs, "Unknown or invalid MMU model\n");
+                }
+                break;
+            case -2:
+                /* Access rights violation */
+                cs->exception_index = POWERPC_EXCP_ISI;
+                env->error_code = 0x08000000;
+                break;
+            case -3:
+                /* No execute protection violation */
+                if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
+                    (env->mmu_model == POWERPC_MMU_BOOKE206)) {
+                    env->spr[SPR_BOOKE_ESR] = 0x00000000;
+                }
+                cs->exception_index = POWERPC_EXCP_ISI;
+                env->error_code = 0x10000000;
+                break;
+            case -4:
+                /* Direct store exception */
+                /* No code fetch is allowed in direct-store areas */
+                cs->exception_index = POWERPC_EXCP_ISI;
+                env->error_code = 0x10000000;
+                break;
+            }
+        } else {
+            switch (ret) {
+            case -1:
+                /* No matches in page tables or TLB */
+                switch (env->mmu_model) {
+                case POWERPC_MMU_SOFT_6xx:
+                    if (access_type == MMU_DATA_STORE) {
+                        cs->exception_index = POWERPC_EXCP_DSTLB;
+                        env->error_code = 1 << 16;
+                    } else {
+                        cs->exception_index = POWERPC_EXCP_DLTLB;
+                        env->error_code = 0;
+                    }
+                    env->spr[SPR_DMISS] = eaddr;
+                    env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
+                tlb_miss:
+                    env->error_code |= ctx.key << 19;
+                    env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
+                        get_pteg_offset32(cpu, ctx.hash[0]);
+                    env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
+                        get_pteg_offset32(cpu, ctx.hash[1]);
+                    break;
+                case POWERPC_MMU_SOFT_74xx:
+                    if (access_type == MMU_DATA_STORE) {
+                        cs->exception_index = POWERPC_EXCP_DSTLB;
+                    } else {
+                        cs->exception_index = POWERPC_EXCP_DLTLB;
+                    }
+                tlb_miss_74xx:
+                    /* Implement LRU algorithm */
+                    env->error_code = ctx.key << 19;
+                    env->spr[SPR_TLBMISS] = (eaddr & ~((target_ulong)0x3)) |
+                        ((env->last_way + 1) & (env->nb_ways - 1));
+                    env->spr[SPR_PTEHI] = 0x80000000 | ctx.ptem;
+                    break;
+                case POWERPC_MMU_SOFT_4xx:
+                case POWERPC_MMU_SOFT_4xx_Z:
+                    cs->exception_index = POWERPC_EXCP_DTLB;
+                    env->error_code = 0;
+                    env->spr[SPR_40x_DEAR] = eaddr;
+                    if (access_type == MMU_DATA_STORE) {
+                        env->spr[SPR_40x_ESR] = 0x00800000;
+                    } else {
+                        env->spr[SPR_40x_ESR] = 0x00000000;
+                    }
+                    break;
+                case POWERPC_MMU_MPC8xx:
+                    /* XXX: TODO */
+                    cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
+                case POWERPC_MMU_BOOKE206:
+                    booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx);
+                    /* fall through */
+                case POWERPC_MMU_BOOKE:
+                    cs->exception_index = POWERPC_EXCP_DTLB;
+                    env->error_code = 0;
+                    env->spr[SPR_BOOKE_DEAR] = eaddr;
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
+                    break;
+                case POWERPC_MMU_REAL:
+                    cpu_abort(cs, "PowerPC in real mode should never raise "
+                              "any MMU exceptions\n");
+                default:
+                    cpu_abort(cs, "Unknown or invalid MMU model\n");
+                }
+                break;
+            case -2:
+                /* Access rights violation */
+                cs->exception_index = POWERPC_EXCP_DSI;
+                env->error_code = 0;
+                if (env->mmu_model == POWERPC_MMU_SOFT_4xx
+                    || env->mmu_model == POWERPC_MMU_SOFT_4xx_Z) {
+                    env->spr[SPR_40x_DEAR] = eaddr;
+                    if (access_type == MMU_DATA_STORE) {
+                        env->spr[SPR_40x_ESR] |= 0x00800000;
+                    }
+                } else if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
+                           (env->mmu_model == POWERPC_MMU_BOOKE206)) {
+                    env->spr[SPR_BOOKE_DEAR] = eaddr;
+                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type);
+                } else {
+                    env->spr[SPR_DAR] = eaddr;
+                    if (access_type == MMU_DATA_STORE) {
+                        env->spr[SPR_DSISR] = 0x0A000000;
+                    } else {
+                        env->spr[SPR_DSISR] = 0x08000000;
+                    }
+                }
+                break;
+            case -4:
+                /* Direct store exception */
+                switch (type) {
+                case ACCESS_FLOAT:
+                    /* Floating point load/store */
+                    cs->exception_index = POWERPC_EXCP_ALIGN;
+                    env->error_code = POWERPC_EXCP_ALIGN_FP;
+                    env->spr[SPR_DAR] = eaddr;
+                    break;
+                case ACCESS_RES:
+                    /* lwarx, ldarx or stwcx. */
+                    cs->exception_index = POWERPC_EXCP_DSI;
+                    env->error_code = 0;
+                    env->spr[SPR_DAR] = eaddr;
+                    if (access_type == MMU_DATA_STORE) {
+                        env->spr[SPR_DSISR] = 0x06000000;
+                    } else {
+                        env->spr[SPR_DSISR] = 0x04000000;
+                    }
+                    break;
+                case ACCESS_EXT:
+                    /* eciwx or ecowx */
+                    cs->exception_index = POWERPC_EXCP_DSI;
+                    env->error_code = 0;
+                    env->spr[SPR_DAR] = eaddr;
+                    if (access_type == MMU_DATA_STORE) {
+                        env->spr[SPR_DSISR] = 0x06100000;
+                    } else {
+                        env->spr[SPR_DSISR] = 0x04100000;
+                    }
+                    break;
+                default:
+                    printf("DSI: invalid exception (%d)\n", ret);
+                    cs->exception_index = POWERPC_EXCP_PROGRAM;
+                    env->error_code =
+                        POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL;
+                    env->spr[SPR_DAR] = eaddr;
+                    break;
+                }
+                break;
+            }
+        }
+    }
+    return false;
+}
+
+/*****************************************************************************/
+
+bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
+                      hwaddr *raddrp, int *psizep, int *protp,
+                      int mmu_idx, bool guest_visible)
+{
+    switch (cpu->env.mmu_model) {
+#if defined(TARGET_PPC64)
+    case POWERPC_MMU_3_00:
+        if (ppc64_v3_radix(cpu)) {
+            return ppc_radix64_xlate(cpu, eaddr, access_type, raddrp,
+                                     psizep, protp, mmu_idx, guest_visible);
+        }
+        /* fall through */
+    case POWERPC_MMU_64B:
+    case POWERPC_MMU_2_03:
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_07:
+        return ppc_hash64_xlate(cpu, eaddr, access_type,
+                                raddrp, psizep, protp, mmu_idx, guest_visible);
+#endif
+
+    case POWERPC_MMU_32B:
+    case POWERPC_MMU_601:
+        return ppc_hash32_xlate(cpu, eaddr, access_type, raddrp,
+                               psizep, protp, mmu_idx, guest_visible);
+
+    default:
+        return ppc_jumbo_xlate(cpu, eaddr, access_type, raddrp,
+                               psizep, protp, mmu_idx, guest_visible);
+    }
+}
+
+hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    hwaddr raddr;
+    int s, p;
+
+    /*
+     * Some MMUs have separate TLBs for code and data. If we only
+     * try an MMU_DATA_LOAD, we may not be able to read instructions
+     * mapped by code TLBs, so we also try a MMU_INST_FETCH.
+     */
+    if (ppc_xlate(cpu, addr, MMU_DATA_LOAD, &raddr, &s, &p,
+                  cpu_mmu_index(&cpu->env, false), false) ||
+        ppc_xlate(cpu, addr, MMU_INST_FETCH, &raddr, &s, &p,
+                  cpu_mmu_index(&cpu->env, true), false)) {
+        return raddr & TARGET_PAGE_MASK;
+    }
+    return -1;
+}
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index ca88658cba0..e0c4950dda5 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -20,33 +20,27 @@
 #include "qemu/osdep.h"
 #include "qemu/units.h"
 #include "cpu.h"
-#include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
 #include "mmu-hash32.h"
 #include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "helper_regs.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/qemu-print.h"
+#include "internal.h"
 #include "mmu-book3s-v3.h"
 #include "mmu-radix64.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
 
-/* #define DEBUG_MMU */
 /* #define DEBUG_BATS */
 /* #define DEBUG_SOFTWARE_TLB */
 /* #define DUMP_PAGE_TABLES */
 /* #define FLUSH_ALL_TLBS */
 
-#ifdef DEBUG_MMU
-#  define LOG_MMU_STATE(cpu) log_cpu_state_mask(CPU_LOG_MMU, (cpu), 0)
-#else
-#  define LOG_MMU_STATE(cpu) do { } while (0)
-#endif
-
 #ifdef DEBUG_SOFTWARE_TLB
 #  define LOG_SWTLB(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
 #else
@@ -55,1757 +49,147 @@
 
 #ifdef DEBUG_BATS
 #  define LOG_BATS(...) qemu_log_mask(CPU_LOG_MMU, __VA_ARGS__)
-#else
-#  define LOG_BATS(...) do { } while (0)
-#endif
-
-/*****************************************************************************/
-/* PowerPC MMU emulation */
-
-/* Context used internally during MMU translations */
-typedef struct mmu_ctx_t mmu_ctx_t;
-struct mmu_ctx_t {
-    hwaddr raddr;      /* Real address              */
-    hwaddr eaddr;      /* Effective address         */
-    int prot;                      /* Protection bits           */
-    hwaddr hash[2];    /* Pagetable hash values     */
-    target_ulong ptem;             /* Virtual segment ID | API  */
-    int key;                       /* Access key                */
-    int nx;                        /* Non-execute area          */
-};
-
-/* Common routines used by software and hardware TLBs emulation */
-static inline int pte_is_valid(target_ulong pte0)
-{
-    return pte0 & 0x80000000 ? 1 : 0;
-}
-
-static inline void pte_invalidate(target_ulong *pte0)
-{
-    *pte0 &= ~0x80000000;
-}
-
-#define PTE_PTEM_MASK 0x7FFFFFBF
-#define PTE_CHECK_MASK (TARGET_PAGE_MASK | 0x7B)
-
-static int pp_check(int key, int pp, int nx)
-{
-    int access;
-
-    /* Compute access rights */
-    access = 0;
-    if (key == 0) {
-        switch (pp) {
-        case 0x0:
-        case 0x1:
-        case 0x2:
-            access |= PAGE_WRITE;
-            /* fall through */
-        case 0x3:
-            access |= PAGE_READ;
-            break;
-        }
-    } else {
-        switch (pp) {
-        case 0x0:
-            access = 0;
-            break;
-        case 0x1:
-        case 0x3:
-            access = PAGE_READ;
-            break;
-        case 0x2:
-            access = PAGE_READ | PAGE_WRITE;
-            break;
-        }
-    }
-    if (nx == 0) {
-        access |= PAGE_EXEC;
-    }
-
-    return access;
-}
-
-static int check_prot(int prot, int rw, int access_type)
-{
-    int ret;
-
-    if (access_type == ACCESS_CODE) {
-        if (prot & PAGE_EXEC) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    } else if (rw) {
-        if (prot & PAGE_WRITE) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    } else {
-        if (prot & PAGE_READ) {
-            ret = 0;
-        } else {
-            ret = -2;
-        }
-    }
-
-    return ret;
-}
-
-static inline int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
-                                       target_ulong pte1, int h,
-                                       int rw, int type)
-{
-    target_ulong ptem, mmask;
-    int access, ret, pteh, ptev, pp;
-
-    ret = -1;
-    /* Check validity and table match */
-    ptev = pte_is_valid(pte0);
-    pteh = (pte0 >> 6) & 1;
-    if (ptev && h == pteh) {
-        /* Check vsid & api */
-        ptem = pte0 & PTE_PTEM_MASK;
-        mmask = PTE_CHECK_MASK;
-        pp = pte1 & 0x00000003;
-        if (ptem == ctx->ptem) {
-            if (ctx->raddr != (hwaddr)-1ULL) {
-                /* all matches should have equal RPN, WIMG & PP */
-                if ((ctx->raddr & mmask) != (pte1 & mmask)) {
-                    qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
-                    return -3;
-                }
-            }
-            /* Compute access rights */
-            access = pp_check(ctx->key, pp, ctx->nx);
-            /* Keep the matching PTE information */
-            ctx->raddr = pte1;
-            ctx->prot = access;
-            ret = check_prot(ctx->prot, rw, type);
-            if (ret == 0) {
-                /* Access granted */
-                qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
-            } else {
-                /* Access right violation */
-                qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
-            }
-        }
-    }
-
-    return ret;
-}
-
-static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,
-                            int ret, int rw)
-{
-    int store = 0;
-
-    /* Update page flags */
-    if (!(*pte1p & 0x00000100)) {
-        /* Update accessed flag */
-        *pte1p |= 0x00000100;
-        store = 1;
-    }
-    if (!(*pte1p & 0x00000080)) {
-        if (rw == 1 && ret == 0) {
-            /* Update changed flag */
-            *pte1p |= 0x00000080;
-            store = 1;
-        } else {
-            /* Force page fault for first write access */
-            ctx->prot &= ~PAGE_WRITE;
-        }
-    }
-
-    return store;
-}
-
-/* Software driven TLB helpers */
-static inline int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
-                                    int way, int is_code)
-{
-    int nr;
-
-    /* Select TLB num in a way from address */
-    nr = (eaddr >> TARGET_PAGE_BITS) & (env->tlb_per_way - 1);
-    /* Select TLB way */
-    nr += env->tlb_per_way * way;
-    /* 6xx have separate TLBs for instructions and data */
-    if (is_code && env->id_tlbs == 1) {
-        nr += env->nb_tlb;
-    }
-
-    return nr;
-}
-
-static inline void ppc6xx_tlb_invalidate_all(CPUPPCState *env)
-{
-    ppc6xx_tlb_t *tlb;
-    int nr, max;
-
-    /* LOG_SWTLB("Invalidate all TLBs\n"); */
-    /* Invalidate all defined software TLB */
-    max = env->nb_tlb;
-    if (env->id_tlbs == 1) {
-        max *= 2;
-    }
-    for (nr = 0; nr < max; nr++) {
-        tlb = &env->tlb.tlb6[nr];
-        pte_invalidate(&tlb->pte0);
-    }
-    tlb_flush(env_cpu(env));
-}
-
-static inline void ppc6xx_tlb_invalidate_virt2(CPUPPCState *env,
-                                               target_ulong eaddr,
-                                               int is_code, int match_epn)
-{
-#if !defined(FLUSH_ALL_TLBS)
-    CPUState *cs = env_cpu(env);
-    ppc6xx_tlb_t *tlb;
-    int way, nr;
-
-    /* Invalidate ITLB + DTLB, all ways */
-    for (way = 0; way < env->nb_ways; way++) {
-        nr = ppc6xx_tlb_getnum(env, eaddr, way, is_code);
-        tlb = &env->tlb.tlb6[nr];
-        if (pte_is_valid(tlb->pte0) && (match_epn == 0 || eaddr == tlb->EPN)) {
-            LOG_SWTLB("TLB invalidate %d/%d " TARGET_FMT_lx "\n", nr,
-                      env->nb_tlb, eaddr);
-            pte_invalidate(&tlb->pte0);
-            tlb_flush_page(cs, tlb->EPN);
-        }
-    }
-#else
-    /* XXX: PowerPC specification say this is valid as well */
-    ppc6xx_tlb_invalidate_all(env);
-#endif
-}
-
-static inline void ppc6xx_tlb_invalidate_virt(CPUPPCState *env,
-                                              target_ulong eaddr, int is_code)
-{
-    ppc6xx_tlb_invalidate_virt2(env, eaddr, is_code, 0);
-}
-
-static void ppc6xx_tlb_store(CPUPPCState *env, target_ulong EPN, int way,
-                             int is_code, target_ulong pte0, target_ulong pte1)
-{
-    ppc6xx_tlb_t *tlb;
-    int nr;
-
-    nr = ppc6xx_tlb_getnum(env, EPN, way, is_code);
-    tlb = &env->tlb.tlb6[nr];
-    LOG_SWTLB("Set TLB %d/%d EPN " TARGET_FMT_lx " PTE0 " TARGET_FMT_lx
-              " PTE1 " TARGET_FMT_lx "\n", nr, env->nb_tlb, EPN, pte0, pte1);
-    /* Invalidate any pending reference in QEMU for this virtual address */
-    ppc6xx_tlb_invalidate_virt2(env, EPN, is_code, 1);
-    tlb->pte0 = pte0;
-    tlb->pte1 = pte1;
-    tlb->EPN = EPN;
-    /* Store last way for LRU mechanism */
-    env->last_way = way;
-}
-
-static inline int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
-                                   target_ulong eaddr, int rw, int access_type)
-{
-    ppc6xx_tlb_t *tlb;
-    int nr, best, way;
-    int ret;
-
-    best = -1;
-    ret = -1; /* No TLB found */
-    for (way = 0; way < env->nb_ways; way++) {
-        nr = ppc6xx_tlb_getnum(env, eaddr, way,
-                               access_type == ACCESS_CODE ? 1 : 0);
-        tlb = &env->tlb.tlb6[nr];
-        /* This test "emulates" the PTE index match for hardware TLBs */
-        if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) {
-            LOG_SWTLB("TLB %d/%d %s [" TARGET_FMT_lx " " TARGET_FMT_lx
-                      "] <> " TARGET_FMT_lx "\n", nr, env->nb_tlb,
-                      pte_is_valid(tlb->pte0) ? "valid" : "inval",
-                      tlb->EPN, tlb->EPN + TARGET_PAGE_SIZE, eaddr);
-            continue;
-        }
-        LOG_SWTLB("TLB %d/%d %s " TARGET_FMT_lx " <> " TARGET_FMT_lx " "
-                  TARGET_FMT_lx " %c %c\n", nr, env->nb_tlb,
-                  pte_is_valid(tlb->pte0) ? "valid" : "inval",
-                  tlb->EPN, eaddr, tlb->pte1,
-                  rw ? 'S' : 'L', access_type == ACCESS_CODE ? 'I' : 'D');
-        switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
-                                     0, rw, access_type)) {
-        case -3:
-            /* TLB inconsistency */
-            return -1;
-        case -2:
-            /* Access violation */
-            ret = -2;
-            best = nr;
-            break;
-        case -1:
-        default:
-            /* No match */
-            break;
-        case 0:
-            /* access granted */
-            /*
-             * XXX: we should go on looping to check all TLBs
-             *      consistency but we can speed-up the whole thing as
-             *      the result would be undefined if TLBs are not
-             *      consistent.
-             */
-            ret = 0;
-            best = nr;
-            goto done;
-        }
-    }
-    if (best != -1) {
-    done:
-        LOG_SWTLB("found TLB at addr " TARGET_FMT_plx " prot=%01x ret=%d\n",
-                  ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
-        /* Update page flags */
-        pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, rw);
-    }
-
-    return ret;
-}
-
-/* Perform BAT hit & translation */
-static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp,
-                                 int *validp, int *protp, target_ulong *BATu,
-                                 target_ulong *BATl)
-{
-    target_ulong bl;
-    int pp, valid, prot;
-
-    bl = (*BATu & 0x00001FFC) << 15;
-    valid = 0;
-    prot = 0;
-    if (((msr_pr == 0) && (*BATu & 0x00000002)) ||
-        ((msr_pr != 0) && (*BATu & 0x00000001))) {
-        valid = 1;
-        pp = *BATl & 0x00000003;
-        if (pp != 0) {
-            prot = PAGE_READ | PAGE_EXEC;
-            if (pp == 0x2) {
-                prot |= PAGE_WRITE;
-            }
-        }
-    }
-    *blp = bl;
-    *validp = valid;
-    *protp = prot;
-}
-
-static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-                           target_ulong virtual, int rw, int type)
-{
-    target_ulong *BATlt, *BATut, *BATu, *BATl;
-    target_ulong BEPIl, BEPIu, bl;
-    int i, valid, prot;
-    int ret = -1;
-
-    LOG_BATS("%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
-             type == ACCESS_CODE ? 'I' : 'D', virtual);
-    switch (type) {
-    case ACCESS_CODE:
-        BATlt = env->IBAT[1];
-        BATut = env->IBAT[0];
-        break;
-    default:
-        BATlt = env->DBAT[1];
-        BATut = env->DBAT[0];
-        break;
-    }
-    for (i = 0; i < env->nb_BATs; i++) {
-        BATu = &BATut[i];
-        BATl = &BATlt[i];
-        BEPIu = *BATu & 0xF0000000;
-        BEPIl = *BATu & 0x0FFE0000;
-        bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
-        LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
-                 " BATl " TARGET_FMT_lx "\n", __func__,
-                 type == ACCESS_CODE ? 'I' : 'D', i, virtual, *BATu, *BATl);
-        if ((virtual & 0xF0000000) == BEPIu &&
-            ((virtual & 0x0FFE0000) & ~bl) == BEPIl) {
-            /* BAT matches */
-            if (valid != 0) {
-                /* Get physical address */
-                ctx->raddr = (*BATl & 0xF0000000) |
-                    ((virtual & 0x0FFE0000 & bl) | (*BATl & 0x0FFE0000)) |
-                    (virtual & 0x0001F000);
-                /* Compute access rights */
-                ctx->prot = prot;
-                ret = check_prot(ctx->prot, rw, type);
-                if (ret == 0) {
-                    LOG_BATS("BAT %d match: r " TARGET_FMT_plx " prot=%c%c\n",
-                             i, ctx->raddr, ctx->prot & PAGE_READ ? 'R' : '-',
-                             ctx->prot & PAGE_WRITE ? 'W' : '-');
-                }
-                break;
-            }
-        }
-    }
-    if (ret < 0) {
-#if defined(DEBUG_BATS)
-        if (qemu_log_enabled()) {
-            LOG_BATS("no BAT match for " TARGET_FMT_lx ":\n", virtual);
-            for (i = 0; i < 4; i++) {
-                BATu = &BATut[i];
-                BATl = &BATlt[i];
-                BEPIu = *BATu & 0xF0000000;
-                BEPIl = *BATu & 0x0FFE0000;
-                bl = (*BATu & 0x00001FFC) << 15;
-                LOG_BATS("%s: %cBAT%d v " TARGET_FMT_lx " BATu " TARGET_FMT_lx
-                         " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
-                         TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-                         __func__, type == ACCESS_CODE ? 'I' : 'D', i, virtual,
-                         *BATu, *BATl, BEPIu, BEPIl, bl);
-            }
-        }
-#endif
-    }
-    /* No hit */
-    return ret;
-}
-
-/* Perform segment based translation */
-static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
-                                      target_ulong eaddr, int rw, int type)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-    hwaddr hash;
-    target_ulong vsid;
-    int ds, pr, target_page_bits;
-    int ret;
-    target_ulong sr, pgidx;
-
-    pr = msr_pr;
-    ctx->eaddr = eaddr;
-
-    sr = env->sr[eaddr >> 28];
-    ctx->key = (((sr & 0x20000000) && (pr != 0)) ||
-                ((sr & 0x40000000) && (pr == 0))) ? 1 : 0;
-    ds = sr & 0x80000000 ? 1 : 0;
-    ctx->nx = sr & 0x10000000 ? 1 : 0;
-    vsid = sr & 0x00FFFFFF;
-    target_page_bits = TARGET_PAGE_BITS;
-    qemu_log_mask(CPU_LOG_MMU,
-            "Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
-            " nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
-            " ir=%d dr=%d pr=%d %d t=%d\n",
-            eaddr, (int)(eaddr >> 28), sr, env->nip, env->lr, (int)msr_ir,
-            (int)msr_dr, pr != 0 ? 1 : 0, rw, type);
-    pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits;
-    hash = vsid ^ pgidx;
-    ctx->ptem = (vsid << 7) | (pgidx >> 10);
-
-    qemu_log_mask(CPU_LOG_MMU,
-            "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n",
-            ctx->key, ds, ctx->nx, vsid);
-    ret = -1;
-    if (!ds) {
-        /* Check if instruction fetch is allowed, if needed */
-        if (type != ACCESS_CODE || ctx->nx == 0) {
-            /* Page address translation */
-            qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
-                    " htab_mask " TARGET_FMT_plx
-                    " hash " TARGET_FMT_plx "\n",
-                    ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
-            ctx->hash[0] = hash;
-            ctx->hash[1] = ~hash;
-
-            /* Initialize real address with an invalid value */
-            ctx->raddr = (hwaddr)-1ULL;
-            /* Software TLB search */
-            ret = ppc6xx_tlb_check(env, ctx, eaddr, rw, type);
-#if defined(DUMP_PAGE_TABLES)
-            if (qemu_loglevel_mask(CPU_LOG_MMU)) {
-                CPUState *cs = env_cpu(env);
-                hwaddr curaddr;
-                uint32_t a0, a1, a2, a3;
-
-                qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
-                         "\n", ppc_hash32_hpt_base(cpu),
-                         ppc_hash32_hpt_mask(env) + 0x80);
-                for (curaddr = ppc_hash32_hpt_base(cpu);
-                     curaddr < (ppc_hash32_hpt_base(cpu)
-                                + ppc_hash32_hpt_mask(cpu) + 0x80);
-                     curaddr += 16) {
-                    a0 = ldl_phys(cs->as, curaddr);
-                    a1 = ldl_phys(cs->as, curaddr + 4);
-                    a2 = ldl_phys(cs->as, curaddr + 8);
-                    a3 = ldl_phys(cs->as, curaddr + 12);
-                    if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) {
-                        qemu_log(TARGET_FMT_plx ": %08x %08x %08x %08x\n",
-                                 curaddr, a0, a1, a2, a3);
-                    }
-                }
-            }
-#endif
-        } else {
-            qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
-            ret = -3;
-        }
-    } else {
-        target_ulong sr;
-
-        qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
-        /* Direct-store segment : absolutely *BUGGY* for now */
-
-        /*
-         * Direct-store implies a 32-bit MMU.
-         * Check the Segment Register's bus unit ID (BUID).
-         */
-        sr = env->sr[eaddr >> 28];
-        if ((sr & 0x1FF00000) >> 20 == 0x07f) {
-            /*
-             * Memory-forced I/O controller interface access
-             *
-             * If T=1 and BUID=x'07F', the 601 performs a memory
-             * access to SR[28-31] LA[4-31], bypassing all protection
-             * mechanisms.
-             */
-            ctx->raddr = ((sr & 0xF) << 28) | (eaddr & 0x0FFFFFFF);
-            ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-            return 0;
-        }
-
-        switch (type) {
-        case ACCESS_INT:
-            /* Integer load/store : only access allowed */
-            break;
-        case ACCESS_CODE:
-            /* No code fetch is allowed in direct-store areas */
-            return -4;
-        case ACCESS_FLOAT:
-            /* Floating point load/store */
-            return -4;
-        case ACCESS_RES:
-            /* lwarx, ldarx or srwcx. */
-            return -4;
-        case ACCESS_CACHE:
-            /*
-             * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
-             *
-             * Should make the instruction do no-op.  As it already do
-             * no-op, it's quite easy :-)
-             */
-            ctx->raddr = eaddr;
-            return 0;
-        case ACCESS_EXT:
-            /* eciwx or ecowx */
-            return -4;
-        default:
-            qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need "
-                          "address translation\n");
-            return -4;
-        }
-        if ((rw == 1 || ctx->key != 1) && (rw == 0 || ctx->key != 0)) {
-            ctx->raddr = eaddr;
-            ret = 2;
-        } else {
-            ret = -2;
-        }
-    }
-
-    return ret;
-}
-
-/* Generic TLB check function for embedded PowerPC implementations */
-static int ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb,
-                            hwaddr *raddrp,
-                            target_ulong address, uint32_t pid, int ext,
-                            int i)
-{
-    target_ulong mask;
-
-    /* Check valid flag */
-    if (!(tlb->prot & PAGE_VALID)) {
-        return -1;
-    }
-    mask = ~(tlb->size - 1);
-    LOG_SWTLB("%s: TLB %d address " TARGET_FMT_lx " PID %u <=> " TARGET_FMT_lx
-              " " TARGET_FMT_lx " %u %x\n", __func__, i, address, pid, tlb->EPN,
-              mask, (uint32_t)tlb->PID, tlb->prot);
-    /* Check PID */
-    if (tlb->PID != 0 && tlb->PID != pid) {
-        return -1;
-    }
-    /* Check effective address */
-    if ((address & mask) != tlb->EPN) {
-        return -1;
-    }
-    *raddrp = (tlb->RPN & mask) | (address & ~mask);
-    if (ext) {
-        /* Extend the physical address to 36 bits */
-        *raddrp |= (uint64_t)(tlb->RPN & 0xF) << 32;
-    }
-
-    return 0;
-}
-
-/* Generic TLB search function for PowerPC embedded implementations */
-static int ppcemb_tlb_search(CPUPPCState *env, target_ulong address,
-                             uint32_t pid)
-{
-    ppcemb_tlb_t *tlb;
-    hwaddr raddr;
-    int i, ret;
-
-    /* Default return value is no match */
-    ret = -1;
-    for (i = 0; i < env->nb_tlb; i++) {
-        tlb = &env->tlb.tlbe[i];
-        if (ppcemb_tlb_check(env, tlb, &raddr, address, pid, 0, i) == 0) {
-            ret = i;
-            break;
-        }
-    }
-
-    return ret;
-}
-
-/* Helpers specific to PowerPC 40x implementations */
-static inline void ppc4xx_tlb_invalidate_all(CPUPPCState *env)
-{
-    ppcemb_tlb_t *tlb;
-    int i;
-
-    for (i = 0; i < env->nb_tlb; i++) {
-        tlb = &env->tlb.tlbe[i];
-        tlb->prot &= ~PAGE_VALID;
-    }
-    tlb_flush(env_cpu(env));
-}
-
-static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                       target_ulong address, int rw,
-                                       int access_type)
-{
-    ppcemb_tlb_t *tlb;
-    hwaddr raddr;
-    int i, ret, zsel, zpr, pr;
-
-    ret = -1;
-    raddr = (hwaddr)-1ULL;
-    pr = msr_pr;
-    for (i = 0; i < env->nb_tlb; i++) {
-        tlb = &env->tlb.tlbe[i];
-        if (ppcemb_tlb_check(env, tlb, &raddr, address,
-                             env->spr[SPR_40x_PID], 0, i) < 0) {
-            continue;
-        }
-        zsel = (tlb->attr >> 4) & 0xF;
-        zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3;
-        LOG_SWTLB("%s: TLB %d zsel %d zpr %d rw %d attr %08x\n",
-                    __func__, i, zsel, zpr, rw, tlb->attr);
-        /* Check execute enable bit */
-        switch (zpr) {
-        case 0x2:
-            if (pr != 0) {
-                goto check_perms;
-            }
-            /* fall through */
-        case 0x3:
-            /* All accesses granted */
-            ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-            ret = 0;
-            break;
-        case 0x0:
-            if (pr != 0) {
-                /* Raise Zone protection fault.  */
-                env->spr[SPR_40x_ESR] = 1 << 22;
-                ctx->prot = 0;
-                ret = -2;
-                break;
-            }
-            /* fall through */
-        case 0x1:
-        check_perms:
-            /* Check from TLB entry */
-            ctx->prot = tlb->prot;
-            ret = check_prot(ctx->prot, rw, access_type);
-            if (ret == -2) {
-                env->spr[SPR_40x_ESR] = 0;
-            }
-            break;
-        }
-        if (ret >= 0) {
-            ctx->raddr = raddr;
-            LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
-                      " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
-                      ret);
-            return 0;
-        }
-    }
-    LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
-              " %d %d\n", __func__, address, raddr, ctx->prot, ret);
-
-    return ret;
-}
-
-void store_40x_sler(CPUPPCState *env, uint32_t val)
-{
-    /* XXX: TO BE FIXED */
-    if (val != 0x00000000) {
-        cpu_abort(env_cpu(env),
-                  "Little-endian regions are not supported by now\n");
-    }
-    env->spr[SPR_405_SLER] = val;
-}
-
-static inline int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb,
-                                     hwaddr *raddr, int *prot,
-                                     target_ulong address, int rw,
-                                     int access_type, int i)
-{
-    int ret, prot2;
-
-    if (ppcemb_tlb_check(env, tlb, raddr, address,
-                         env->spr[SPR_BOOKE_PID],
-                         !env->nb_pids, i) >= 0) {
-        goto found_tlb;
-    }
-
-    if (env->spr[SPR_BOOKE_PID1] &&
-        ppcemb_tlb_check(env, tlb, raddr, address,
-                         env->spr[SPR_BOOKE_PID1], 0, i) >= 0) {
-        goto found_tlb;
-    }
-
-    if (env->spr[SPR_BOOKE_PID2] &&
-        ppcemb_tlb_check(env, tlb, raddr, address,
-                         env->spr[SPR_BOOKE_PID2], 0, i) >= 0) {
-        goto found_tlb;
-    }
-
-    LOG_SWTLB("%s: TLB entry not found\n", __func__);
-    return -1;
-
-found_tlb:
-
-    if (msr_pr != 0) {
-        prot2 = tlb->prot & 0xF;
-    } else {
-        prot2 = (tlb->prot >> 4) & 0xF;
-    }
-
-    /* Check the address space */
-    if (access_type == ACCESS_CODE) {
-        if (msr_ir != (tlb->attr & 1)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if (prot2 & PAGE_EXEC) {
-            LOG_SWTLB("%s: good TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: no PAGE_EXEC: %x\n", __func__, prot2);
-        ret = -3;
-    } else {
-        if (msr_dr != (tlb->attr & 1)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if ((!rw && prot2 & PAGE_READ) || (rw && (prot2 & PAGE_WRITE))) {
-            LOG_SWTLB("%s: found TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: PAGE_READ/WRITE doesn't match: %x\n", __func__, prot2);
-        ret = -2;
-    }
-
-    return ret;
-}
-
-static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                         target_ulong address, int rw,
-                                         int access_type)
-{
-    ppcemb_tlb_t *tlb;
-    hwaddr raddr;
-    int i, ret;
-
-    ret = -1;
-    raddr = (hwaddr)-1ULL;
-    for (i = 0; i < env->nb_tlb; i++) {
-        tlb = &env->tlb.tlbe[i];
-        ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address, rw,
-                                 access_type, i);
-        if (ret != -1) {
-            break;
-        }
-    }
-
-    if (ret >= 0) {
-        ctx->raddr = raddr;
-        LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
-                  " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
-                  ret);
-    } else {
-        LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
-                  " %d %d\n", __func__, address, raddr, ctx->prot, ret);
-    }
-
-    return ret;
-}
-
-static void booke206_flush_tlb(CPUPPCState *env, int flags,
-                               const int check_iprot)
-{
-    int tlb_size;
-    int i, j;
-    ppcmas_tlb_t *tlb = env->tlb.tlbm;
-
-    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
-        if (flags & (1 << i)) {
-            tlb_size = booke206_tlb_size(env, i);
-            for (j = 0; j < tlb_size; j++) {
-                if (!check_iprot || !(tlb[j].mas1 & MAS1_IPROT)) {
-                    tlb[j].mas1 &= ~MAS1_VALID;
-                }
-            }
-        }
-        tlb += booke206_tlb_size(env, i);
-    }
-
-    tlb_flush(env_cpu(env));
-}
-
-static hwaddr booke206_tlb_to_page_size(CPUPPCState *env,
-                                        ppcmas_tlb_t *tlb)
-{
-    int tlbm_size;
-
-    tlbm_size = (tlb->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
-
-    return 1024ULL << tlbm_size;
-}
-
-/* TLB check function for MAS based SoftTLBs */
-static int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb,
-                            hwaddr *raddrp, target_ulong address,
-                            uint32_t pid)
-{
-    hwaddr mask;
-    uint32_t tlb_pid;
-
-    if (!msr_cm) {
-        /* In 32bit mode we can only address 32bit EAs */
-        address = (uint32_t)address;
-    }
-
-    /* Check valid flag */
-    if (!(tlb->mas1 & MAS1_VALID)) {
-        return -1;
-    }
-
-    mask = ~(booke206_tlb_to_page_size(env, tlb) - 1);
-    LOG_SWTLB("%s: TLB ADDR=0x" TARGET_FMT_lx " PID=0x%x MAS1=0x%x MAS2=0x%"
-              PRIx64 " mask=0x%" HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%"
-              PRIx32 "\n", __func__, address, pid, tlb->mas1, tlb->mas2, mask,
-              tlb->mas7_3, tlb->mas8);
-
-    /* Check PID */
-    tlb_pid = (tlb->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT;
-    if (tlb_pid != 0 && tlb_pid != pid) {
-        return -1;
-    }
-
-    /* Check effective address */
-    if ((address & mask) != (tlb->mas2 & MAS2_EPN_MASK)) {
-        return -1;
-    }
-
-    if (raddrp) {
-        *raddrp = (tlb->mas7_3 & mask) | (address & ~mask);
-    }
-
-    return 0;
-}
-
-static bool is_epid_mmu(int mmu_idx)
-{
-    return mmu_idx == PPC_TLB_EPID_STORE || mmu_idx == PPC_TLB_EPID_LOAD;
-}
-
-static uint32_t mmubooke206_esr(int mmu_idx, bool rw)
-{
-    uint32_t esr = 0;
-    if (rw) {
-        esr |= ESR_ST;
-    }
-    if (is_epid_mmu(mmu_idx)) {
-        esr |= ESR_EPID;
-    }
-    return esr;
-}
-
-/*
- * Get EPID register given the mmu_idx. If this is regular load,
- * construct the EPID access bits from current processor state
- *
- * Get the effective AS and PR bits and the PID. The PID is returned
- * only if EPID load is requested, otherwise the caller must detect
- * the correct EPID.  Return true if valid EPID is returned.
- */
-static bool mmubooke206_get_as(CPUPPCState *env,
-                               int mmu_idx, uint32_t *epid_out,
-                               bool *as_out, bool *pr_out)
-{
-    if (is_epid_mmu(mmu_idx)) {
-        uint32_t epidr;
-        if (mmu_idx == PPC_TLB_EPID_STORE) {
-            epidr = env->spr[SPR_BOOKE_EPSC];
-        } else {
-            epidr = env->spr[SPR_BOOKE_EPLC];
-        }
-        *epid_out = (epidr & EPID_EPID) >> EPID_EPID_SHIFT;
-        *as_out = !!(epidr & EPID_EAS);
-        *pr_out = !!(epidr & EPID_EPR);
-        return true;
-    } else {
-        *as_out = msr_ds;
-        *pr_out = msr_pr;
-        return false;
-    }
-}
-
-/* Check if the tlb found by hashing really matches */
-static int mmubooke206_check_tlb(CPUPPCState *env, ppcmas_tlb_t *tlb,
-                                 hwaddr *raddr, int *prot,
-                                 target_ulong address, int rw,
-                                 int access_type, int mmu_idx)
-{
-    int ret;
-    int prot2 = 0;
-    uint32_t epid;
-    bool as, pr;
-    bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr);
-
-    if (!use_epid) {
-        if (ppcmas_tlb_check(env, tlb, raddr, address,
-                             env->spr[SPR_BOOKE_PID]) >= 0) {
-            goto found_tlb;
-        }
-
-        if (env->spr[SPR_BOOKE_PID1] &&
-            ppcmas_tlb_check(env, tlb, raddr, address,
-                             env->spr[SPR_BOOKE_PID1]) >= 0) {
-            goto found_tlb;
-        }
-
-        if (env->spr[SPR_BOOKE_PID2] &&
-            ppcmas_tlb_check(env, tlb, raddr, address,
-                             env->spr[SPR_BOOKE_PID2]) >= 0) {
-            goto found_tlb;
-        }
-    } else {
-        if (ppcmas_tlb_check(env, tlb, raddr, address, epid) >= 0) {
-            goto found_tlb;
-        }
-    }
-
-    LOG_SWTLB("%s: TLB entry not found\n", __func__);
-    return -1;
-
-found_tlb:
-
-    if (pr) {
-        if (tlb->mas7_3 & MAS3_UR) {
-            prot2 |= PAGE_READ;
-        }
-        if (tlb->mas7_3 & MAS3_UW) {
-            prot2 |= PAGE_WRITE;
-        }
-        if (tlb->mas7_3 & MAS3_UX) {
-            prot2 |= PAGE_EXEC;
-        }
-    } else {
-        if (tlb->mas7_3 & MAS3_SR) {
-            prot2 |= PAGE_READ;
-        }
-        if (tlb->mas7_3 & MAS3_SW) {
-            prot2 |= PAGE_WRITE;
-        }
-        if (tlb->mas7_3 & MAS3_SX) {
-            prot2 |= PAGE_EXEC;
-        }
-    }
-
-    /* Check the address space and permissions */
-    if (access_type == ACCESS_CODE) {
-        /* There is no way to fetch code using epid load */
-        assert(!use_epid);
-        if (msr_ir != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if (prot2 & PAGE_EXEC) {
-            LOG_SWTLB("%s: good TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: no PAGE_EXEC: %x\n", __func__, prot2);
-        ret = -3;
-    } else {
-        if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) {
-            LOG_SWTLB("%s: AS doesn't match\n", __func__);
-            return -1;
-        }
-
-        *prot = prot2;
-        if ((!rw && prot2 & PAGE_READ) || (rw && (prot2 & PAGE_WRITE))) {
-            LOG_SWTLB("%s: found TLB!\n", __func__);
-            return 0;
-        }
-
-        LOG_SWTLB("%s: PAGE_READ/WRITE doesn't match: %x\n", __func__, prot2);
-        ret = -2;
-    }
-
-    return ret;
-}
-
-static int mmubooke206_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
-                                            target_ulong address, int rw,
-                                            int access_type, int mmu_idx)
-{
-    ppcmas_tlb_t *tlb;
-    hwaddr raddr;
-    int i, j, ret;
-
-    ret = -1;
-    raddr = (hwaddr)-1ULL;
-
-    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
-        int ways = booke206_tlb_ways(env, i);
-
-        for (j = 0; j < ways; j++) {
-            tlb = booke206_get_tlbm(env, i, address, j);
-            if (!tlb) {
-                continue;
-            }
-            ret = mmubooke206_check_tlb(env, tlb, &raddr, &ctx->prot, address,
-                                        rw, access_type, mmu_idx);
-            if (ret != -1) {
-                goto found_tlb;
-            }
-        }
-    }
-
-found_tlb:
-
-    if (ret >= 0) {
-        ctx->raddr = raddr;
-        LOG_SWTLB("%s: access granted " TARGET_FMT_lx " => " TARGET_FMT_plx
-                  " %d %d\n", __func__, address, ctx->raddr, ctx->prot,
-                  ret);
-    } else {
-        LOG_SWTLB("%s: access refused " TARGET_FMT_lx " => " TARGET_FMT_plx
-                  " %d %d\n", __func__, address, raddr, ctx->prot, ret);
-    }
-
-    return ret;
-}
-
-static const char *book3e_tsize_to_str[32] = {
-    "1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K",
-    "1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M",
-    "1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G",
-    "1T", "2T"
-};
-
-static void mmubooke_dump_mmu(CPUPPCState *env)
-{
-    ppcemb_tlb_t *entry;
-    int i;
-
-    if (kvm_enabled() && !env->kvm_sw_tlb) {
-        qemu_printf("Cannot access KVM TLB\n");
-        return;
-    }
-
-    qemu_printf("\nTLB:\n");
-    qemu_printf("Effective          Physical           Size PID   Prot     "
-                "Attr\n");
-
-    entry = &env->tlb.tlbe[0];
-    for (i = 0; i < env->nb_tlb; i++, entry++) {
-        hwaddr ea, pa;
-        target_ulong mask;
-        uint64_t size = (uint64_t)entry->size;
-        char size_buf[20];
-
-        /* Check valid flag */
-        if (!(entry->prot & PAGE_VALID)) {
-            continue;
-        }
-
-        mask = ~(entry->size - 1);
-        ea = entry->EPN & mask;
-        pa = entry->RPN & mask;
-        /* Extend the physical address to 36 bits */
-        pa |= (hwaddr)(entry->RPN & 0xF) << 32;
-        if (size >= 1 * MiB) {
-            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "M", size / MiB);
-        } else {
-            snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "k", size / KiB);
-        }
-        qemu_printf("0x%016" PRIx64 " 0x%016" PRIx64 " %s %-5u %08x %08x\n",
-                    (uint64_t)ea, (uint64_t)pa, size_buf, (uint32_t)entry->PID,
-                    entry->prot, entry->attr);
-    }
-
-}
-
-static void mmubooke206_dump_one_tlb(CPUPPCState *env, int tlbn, int offset,
-                                     int tlbsize)
-{
-    ppcmas_tlb_t *entry;
-    int i;
-
-    qemu_printf("\nTLB%d:\n", tlbn);
-    qemu_printf("Effective          Physical           Size TID   TS SRWX"
-                " URWX WIMGE U0123\n");
-
-    entry = &env->tlb.tlbm[offset];
-    for (i = 0; i < tlbsize; i++, entry++) {
-        hwaddr ea, pa, size;
-        int tsize;
-
-        if (!(entry->mas1 & MAS1_VALID)) {
-            continue;
-        }
-
-        tsize = (entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
-        size = 1024ULL << tsize;
-        ea = entry->mas2 & ~(size - 1);
-        pa = entry->mas7_3 & ~(size - 1);
-
-        qemu_printf("0x%016" PRIx64 " 0x%016" PRIx64 " %4s %-5u %1u  S%c%c%c"
-                    "U%c%c%c %c%c%c%c%c U%c%c%c%c\n",
-                    (uint64_t)ea, (uint64_t)pa,
-                    book3e_tsize_to_str[tsize],
-                    (entry->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT,
-                    (entry->mas1 & MAS1_TS) >> MAS1_TS_SHIFT,
-                    entry->mas7_3 & MAS3_SR ? 'R' : '-',
-                    entry->mas7_3 & MAS3_SW ? 'W' : '-',
-                    entry->mas7_3 & MAS3_SX ? 'X' : '-',
-                    entry->mas7_3 & MAS3_UR ? 'R' : '-',
-                    entry->mas7_3 & MAS3_UW ? 'W' : '-',
-                    entry->mas7_3 & MAS3_UX ? 'X' : '-',
-                    entry->mas2 & MAS2_W ? 'W' : '-',
-                    entry->mas2 & MAS2_I ? 'I' : '-',
-                    entry->mas2 & MAS2_M ? 'M' : '-',
-                    entry->mas2 & MAS2_G ? 'G' : '-',
-                    entry->mas2 & MAS2_E ? 'E' : '-',
-                    entry->mas7_3 & MAS3_U0 ? '0' : '-',
-                    entry->mas7_3 & MAS3_U1 ? '1' : '-',
-                    entry->mas7_3 & MAS3_U2 ? '2' : '-',
-                    entry->mas7_3 & MAS3_U3 ? '3' : '-');
-    }
-}
-
-static void mmubooke206_dump_mmu(CPUPPCState *env)
-{
-    int offset = 0;
-    int i;
-
-    if (kvm_enabled() && !env->kvm_sw_tlb) {
-        qemu_printf("Cannot access KVM TLB\n");
-        return;
-    }
-
-    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
-        int size = booke206_tlb_size(env, i);
-
-        if (size == 0) {
-            continue;
-        }
+#else
+#  define LOG_BATS(...) do { } while (0)
+#endif
 
-        mmubooke206_dump_one_tlb(env, i, offset, size);
-        offset += size;
-    }
-}
+/*****************************************************************************/
+/* PowerPC MMU emulation */
 
-static void mmu6xx_dump_BATs(CPUPPCState *env, int type)
+/* Software driven TLB helpers */
+static inline void ppc6xx_tlb_invalidate_all(CPUPPCState *env)
 {
-    target_ulong *BATlt, *BATut, *BATu, *BATl;
-    target_ulong BEPIl, BEPIu, bl;
-    int i;
+    ppc6xx_tlb_t *tlb;
+    int nr, max;
 
-    switch (type) {
-    case ACCESS_CODE:
-        BATlt = env->IBAT[1];
-        BATut = env->IBAT[0];
-        break;
-    default:
-        BATlt = env->DBAT[1];
-        BATut = env->DBAT[0];
-        break;
+    /* LOG_SWTLB("Invalidate all TLBs\n"); */
+    /* Invalidate all defined software TLB */
+    max = env->nb_tlb;
+    if (env->id_tlbs == 1) {
+        max *= 2;
     }
-
-    for (i = 0; i < env->nb_BATs; i++) {
-        BATu = &BATut[i];
-        BATl = &BATlt[i];
-        BEPIu = *BATu & 0xF0000000;
-        BEPIl = *BATu & 0x0FFE0000;
-        bl = (*BATu & 0x00001FFC) << 15;
-        qemu_printf("%s BAT%d BATu " TARGET_FMT_lx
-                    " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
-                    TARGET_FMT_lx " " TARGET_FMT_lx "\n",
-                    type == ACCESS_CODE ? "code" : "data", i,
-                    *BATu, *BATl, BEPIu, BEPIl, bl);
+    for (nr = 0; nr < max; nr++) {
+        tlb = &env->tlb.tlb6[nr];
+        pte_invalidate(&tlb->pte0);
     }
+    tlb_flush(env_cpu(env));
 }
 
-static void mmu6xx_dump_mmu(CPUPPCState *env)
+static inline void ppc6xx_tlb_invalidate_virt2(CPUPPCState *env,
+                                               target_ulong eaddr,
+                                               int is_code, int match_epn)
 {
-    PowerPCCPU *cpu = env_archcpu(env);
+#if !defined(FLUSH_ALL_TLBS)
+    CPUState *cs = env_cpu(env);
     ppc6xx_tlb_t *tlb;
-    target_ulong sr;
-    int type, way, entry, i;
-
-    qemu_printf("HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
-    qemu_printf("HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
-
-    qemu_printf("\nSegment registers:\n");
-    for (i = 0; i < 32; i++) {
-        sr = env->sr[i];
-        if (sr & 0x80000000) {
-            qemu_printf("%02d T=%d Ks=%d Kp=%d BUID=0x%03x "
-                        "CNTLR_SPEC=0x%05x\n", i,
-                        sr & 0x80000000 ? 1 : 0, sr & 0x40000000 ? 1 : 0,
-                        sr & 0x20000000 ? 1 : 0, (uint32_t)((sr >> 20) & 0x1FF),
-                        (uint32_t)(sr & 0xFFFFF));
-        } else {
-            qemu_printf("%02d T=%d Ks=%d Kp=%d N=%d VSID=0x%06x\n", i,
-                        sr & 0x80000000 ? 1 : 0, sr & 0x40000000 ? 1 : 0,
-                        sr & 0x20000000 ? 1 : 0, sr & 0x10000000 ? 1 : 0,
-                        (uint32_t)(sr & 0x00FFFFFF));
-        }
-    }
-
-    qemu_printf("\nBATs:\n");
-    mmu6xx_dump_BATs(env, ACCESS_INT);
-    mmu6xx_dump_BATs(env, ACCESS_CODE);
-
-    if (env->id_tlbs != 1) {
-        qemu_printf("ERROR: 6xx MMU should have separated TLB"
-                    " for code and data\n");
-    }
+    int way, nr;
 
-    qemu_printf("\nTLBs                       [EPN    EPN + SIZE]\n");
-
-    for (type = 0; type < 2; type++) {
-        for (way = 0; way < env->nb_ways; way++) {
-            for (entry = env->nb_tlb * type + env->tlb_per_way * way;
-                 entry < (env->nb_tlb * type + env->tlb_per_way * (way + 1));
-                 entry++) {
-
-                tlb = &env->tlb.tlb6[entry];
-                qemu_printf("%s TLB %02d/%02d way:%d %s ["
-                            TARGET_FMT_lx " " TARGET_FMT_lx "]\n",
-                            type ? "code" : "data", entry % env->nb_tlb,
-                            env->nb_tlb, way,
-                            pte_is_valid(tlb->pte0) ? "valid" : "inval",
-                            tlb->EPN, tlb->EPN + TARGET_PAGE_SIZE);
-            }
+    /* Invalidate ITLB + DTLB, all ways */
+    for (way = 0; way < env->nb_ways; way++) {
+        nr = ppc6xx_tlb_getnum(env, eaddr, way, is_code);
+        tlb = &env->tlb.tlb6[nr];
+        if (pte_is_valid(tlb->pte0) && (match_epn == 0 || eaddr == tlb->EPN)) {
+            LOG_SWTLB("TLB invalidate %d/%d " TARGET_FMT_lx "\n", nr,
+                      env->nb_tlb, eaddr);
+            pte_invalidate(&tlb->pte0);
+            tlb_flush_page(cs, tlb->EPN);
         }
     }
+#else
+    /* XXX: PowerPC specification say this is valid as well */
+    ppc6xx_tlb_invalidate_all(env);
+#endif
 }
 
-void dump_mmu(CPUPPCState *env)
+static inline void ppc6xx_tlb_invalidate_virt(CPUPPCState *env,
+                                              target_ulong eaddr, int is_code)
 {
-    switch (env->mmu_model) {
-    case POWERPC_MMU_BOOKE:
-        mmubooke_dump_mmu(env);
-        break;
-    case POWERPC_MMU_BOOKE206:
-        mmubooke206_dump_mmu(env);
-        break;
-    case POWERPC_MMU_SOFT_6xx:
-    case POWERPC_MMU_SOFT_74xx:
-        mmu6xx_dump_mmu(env);
-        break;
-#if defined(TARGET_PPC64)
-    case POWERPC_MMU_64B:
-    case POWERPC_MMU_2_03:
-    case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_07:
-        dump_slb(env_archcpu(env));
-        break;
-    case POWERPC_MMU_3_00:
-        if (ppc64_v3_radix(env_archcpu(env))) {
-            qemu_log_mask(LOG_UNIMP, "%s: the PPC64 MMU is unsupported\n",
-                          __func__);
-        } else {
-            dump_slb(env_archcpu(env));
-        }
-        break;
-#endif
-    default:
-        qemu_log_mask(LOG_UNIMP, "%s: unimplemented\n", __func__);
-    }
+    ppc6xx_tlb_invalidate_virt2(env, eaddr, is_code, 0);
 }
 
-static inline int check_physical(CPUPPCState *env, mmu_ctx_t *ctx,
-                                 target_ulong eaddr, int rw)
+static void ppc6xx_tlb_store(CPUPPCState *env, target_ulong EPN, int way,
+                             int is_code, target_ulong pte0, target_ulong pte1)
 {
-    int in_plb, ret;
-
-    ctx->raddr = eaddr;
-    ctx->prot = PAGE_READ | PAGE_EXEC;
-    ret = 0;
-    switch (env->mmu_model) {
-    case POWERPC_MMU_SOFT_6xx:
-    case POWERPC_MMU_SOFT_74xx:
-    case POWERPC_MMU_SOFT_4xx:
-    case POWERPC_MMU_REAL:
-    case POWERPC_MMU_BOOKE:
-        ctx->prot |= PAGE_WRITE;
-        break;
-
-    case POWERPC_MMU_SOFT_4xx_Z:
-        if (unlikely(msr_pe != 0)) {
-            /*
-             * 403 family add some particular protections, using
-             * PBL/PBU registers for accesses with no translation.
-             */
-            in_plb =
-                /* Check PLB validity */
-                (env->pb[0] < env->pb[1] &&
-                 /* and address in plb area */
-                 eaddr >= env->pb[0] && eaddr < env->pb[1]) ||
-                (env->pb[2] < env->pb[3] &&
-                 eaddr >= env->pb[2] && eaddr < env->pb[3]) ? 1 : 0;
-            if (in_plb ^ msr_px) {
-                /* Access in protected area */
-                if (rw == 1) {
-                    /* Access is not allowed */
-                    ret = -2;
-                }
-            } else {
-                /* Read-write access is allowed */
-                ctx->prot |= PAGE_WRITE;
-            }
-        }
-        break;
-
-    default:
-        /* Caller's checks mean we should never get here for other models */
-        abort();
-        return -1;
-    }
+    ppc6xx_tlb_t *tlb;
+    int nr;
 
-    return ret;
+    nr = ppc6xx_tlb_getnum(env, EPN, way, is_code);
+    tlb = &env->tlb.tlb6[nr];
+    LOG_SWTLB("Set TLB %d/%d EPN " TARGET_FMT_lx " PTE0 " TARGET_FMT_lx
+              " PTE1 " TARGET_FMT_lx "\n", nr, env->nb_tlb, EPN, pte0, pte1);
+    /* Invalidate any pending reference in QEMU for this virtual address */
+    ppc6xx_tlb_invalidate_virt2(env, EPN, is_code, 1);
+    tlb->pte0 = pte0;
+    tlb->pte1 = pte1;
+    tlb->EPN = EPN;
+    /* Store last way for LRU mechanism */
+    env->last_way = way;
 }
 
-static int get_physical_address_wtlb(
-    CPUPPCState *env, mmu_ctx_t *ctx,
-    target_ulong eaddr, int rw, int access_type,
-    int mmu_idx)
+/* Generic TLB search function for PowerPC embedded implementations */
+static int ppcemb_tlb_search(CPUPPCState *env, target_ulong address,
+                             uint32_t pid)
 {
-    int ret = -1;
-    bool real_mode = (access_type == ACCESS_CODE && msr_ir == 0)
-        || (access_type != ACCESS_CODE && msr_dr == 0);
-
-    switch (env->mmu_model) {
-    case POWERPC_MMU_SOFT_6xx:
-    case POWERPC_MMU_SOFT_74xx:
-        if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
-        } else {
-            /* Try to find a BAT */
-            if (env->nb_BATs != 0) {
-                ret = get_bat_6xx_tlb(env, ctx, eaddr, rw, access_type);
-            }
-            if (ret < 0) {
-                /* We didn't match any BAT entry or don't have BATs */
-                ret = get_segment_6xx_tlb(env, ctx, eaddr, rw, access_type);
-            }
-        }
-        break;
+    ppcemb_tlb_t *tlb;
+    hwaddr raddr;
+    int i, ret;
 
-    case POWERPC_MMU_SOFT_4xx:
-    case POWERPC_MMU_SOFT_4xx_Z:
-        if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
-        } else {
-            ret = mmu40x_get_physical_address(env, ctx, eaddr,
-                                              rw, access_type);
-        }
-        break;
-    case POWERPC_MMU_BOOKE:
-        ret = mmubooke_get_physical_address(env, ctx, eaddr,
-                                            rw, access_type);
-        break;
-    case POWERPC_MMU_BOOKE206:
-        ret = mmubooke206_get_physical_address(env, ctx, eaddr, rw,
-                                               access_type, mmu_idx);
-        break;
-    case POWERPC_MMU_MPC8xx:
-        /* XXX: TODO */
-        cpu_abort(env_cpu(env), "MPC8xx MMU model is not implemented\n");
-        break;
-    case POWERPC_MMU_REAL:
-        if (real_mode) {
-            ret = check_physical(env, ctx, eaddr, rw);
-        } else {
-            cpu_abort(env_cpu(env),
-                      "PowerPC in real mode do not do any translation\n");
+    /* Default return value is no match */
+    ret = -1;
+    for (i = 0; i < env->nb_tlb; i++) {
+        tlb = &env->tlb.tlbe[i];
+        if (ppcemb_tlb_check(env, tlb, &raddr, address, pid, 0, i) == 0) {
+            ret = i;
+            break;
         }
-        return -1;
-    default:
-        cpu_abort(env_cpu(env), "Unknown or invalid MMU model\n");
-        return -1;
     }
 
     return ret;
 }
 
-static int get_physical_address(
-    CPUPPCState *env, mmu_ctx_t *ctx,
-    target_ulong eaddr, int rw, int access_type)
-{
-    return get_physical_address_wtlb(env, ctx, eaddr, rw, access_type, 0);
-}
-
-hwaddr ppc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+/* Helpers specific to PowerPC 40x implementations */
+static inline void ppc4xx_tlb_invalidate_all(CPUPPCState *env)
 {
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-    mmu_ctx_t ctx;
-
-    switch (env->mmu_model) {
-#if defined(TARGET_PPC64)
-    case POWERPC_MMU_64B:
-    case POWERPC_MMU_2_03:
-    case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_07:
-        return ppc_hash64_get_phys_page_debug(cpu, addr);
-    case POWERPC_MMU_3_00:
-        return ppc64_v3_get_phys_page_debug(cpu, addr);
-#endif
-
-    case POWERPC_MMU_32B:
-    case POWERPC_MMU_601:
-        return ppc_hash32_get_phys_page_debug(cpu, addr);
-
-    default:
-        ;
-    }
-
-    if (unlikely(get_physical_address(env, &ctx, addr, 0, ACCESS_INT) != 0)) {
+    ppcemb_tlb_t *tlb;
+    int i;
 
-        /*
-         * Some MMUs have separate TLBs for code and data. If we only
-         * try an ACCESS_INT, we may not be able to read instructions
-         * mapped by code TLBs, so we also try a ACCESS_CODE.
-         */
-        if (unlikely(get_physical_address(env, &ctx, addr, 0,
-                                          ACCESS_CODE) != 0)) {
-            return -1;
-        }
+    for (i = 0; i < env->nb_tlb; i++) {
+        tlb = &env->tlb.tlbe[i];
+        tlb->prot &= ~PAGE_VALID;
     }
-
-    return ctx.raddr & TARGET_PAGE_MASK;
+    tlb_flush(env_cpu(env));
 }
 
-static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address,
-                                     int rw, int mmu_idx)
+static void booke206_flush_tlb(CPUPPCState *env, int flags,
+                               const int check_iprot)
 {
-    uint32_t epid;
-    bool as, pr;
-    uint32_t missed_tid = 0;
-    bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr);
-    if (rw == 2) {
-        as = msr_ir;
-    }
-    env->spr[SPR_BOOKE_MAS0] = env->spr[SPR_BOOKE_MAS4] & MAS4_TLBSELD_MASK;
-    env->spr[SPR_BOOKE_MAS1] = env->spr[SPR_BOOKE_MAS4] & MAS4_TSIZED_MASK;
-    env->spr[SPR_BOOKE_MAS2] = env->spr[SPR_BOOKE_MAS4] & MAS4_WIMGED_MASK;
-    env->spr[SPR_BOOKE_MAS3] = 0;
-    env->spr[SPR_BOOKE_MAS6] = 0;
-    env->spr[SPR_BOOKE_MAS7] = 0;
-
-    /* AS */
-    if (as) {
-        env->spr[SPR_BOOKE_MAS1] |= MAS1_TS;
-        env->spr[SPR_BOOKE_MAS6] |= MAS6_SAS;
-    }
-
-    env->spr[SPR_BOOKE_MAS1] |= MAS1_VALID;
-    env->spr[SPR_BOOKE_MAS2] |= address & MAS2_EPN_MASK;
+    int tlb_size;
+    int i, j;
+    ppcmas_tlb_t *tlb = env->tlb.tlbm;
 
-    if (!use_epid) {
-        switch (env->spr[SPR_BOOKE_MAS4] & MAS4_TIDSELD_PIDZ) {
-        case MAS4_TIDSELD_PID0:
-            missed_tid = env->spr[SPR_BOOKE_PID];
-            break;
-        case MAS4_TIDSELD_PID1:
-            missed_tid = env->spr[SPR_BOOKE_PID1];
-            break;
-        case MAS4_TIDSELD_PID2:
-            missed_tid = env->spr[SPR_BOOKE_PID2];
-            break;
+    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
+        if (flags & (1 << i)) {
+            tlb_size = booke206_tlb_size(env, i);
+            for (j = 0; j < tlb_size; j++) {
+                if (!check_iprot || !(tlb[j].mas1 & MAS1_IPROT)) {
+                    tlb[j].mas1 &= ~MAS1_VALID;
+                }
+            }
         }
-        env->spr[SPR_BOOKE_MAS6] |= env->spr[SPR_BOOKE_PID] << 16;
-    } else {
-        missed_tid = epid;
-        env->spr[SPR_BOOKE_MAS6] |= missed_tid << 16;
+        tlb += booke206_tlb_size(env, i);
     }
-    env->spr[SPR_BOOKE_MAS1] |= (missed_tid << MAS1_TID_SHIFT);
-
 
-    /* next victim logic */
-    env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_ESEL_SHIFT;
-    env->last_way++;
-    env->last_way &= booke206_tlb_ways(env, 0) - 1;
-    env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_NV_SHIFT;
+    tlb_flush(env_cpu(env));
 }
 
-/* Perform address translation */
-static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
-                                    int rw, int mmu_idx)
+static int get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
+                                target_ulong eaddr, MMUAccessType access_type,
+                                int type)
 {
-    CPUState *cs = env_cpu(env);
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    mmu_ctx_t ctx;
-    int access_type;
-    int ret = 0;
+    return get_physical_address_wtlb(env, ctx, eaddr, access_type, type, 0);
+}
 
-    if (rw == 2) {
-        /* code access */
-        rw = 0;
-        access_type = ACCESS_CODE;
-    } else {
-        /* data access */
-        access_type = env->access_type;
-    }
-    ret = get_physical_address_wtlb(env, &ctx, address, rw,
-                                    access_type, mmu_idx);
-    if (ret == 0) {
-        tlb_set_page(cs, address & TARGET_PAGE_MASK,
-                     ctx.raddr & TARGET_PAGE_MASK, ctx.prot,
-                     mmu_idx, TARGET_PAGE_SIZE);
-        ret = 0;
-    } else if (ret < 0) {
-        LOG_MMU_STATE(cs);
-        if (access_type == ACCESS_CODE) {
-            switch (ret) {
-            case -1:
-                /* No matches in page tables or TLB */
-                switch (env->mmu_model) {
-                case POWERPC_MMU_SOFT_6xx:
-                    cs->exception_index = POWERPC_EXCP_IFTLB;
-                    env->error_code = 1 << 18;
-                    env->spr[SPR_IMISS] = address;
-                    env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem;
-                    goto tlb_miss;
-                case POWERPC_MMU_SOFT_74xx:
-                    cs->exception_index = POWERPC_EXCP_IFTLB;
-                    goto tlb_miss_74xx;
-                case POWERPC_MMU_SOFT_4xx:
-                case POWERPC_MMU_SOFT_4xx_Z:
-                    cs->exception_index = POWERPC_EXCP_ITLB;
-                    env->error_code = 0;
-                    env->spr[SPR_40x_DEAR] = address;
-                    env->spr[SPR_40x_ESR] = 0x00000000;
-                    break;
-                case POWERPC_MMU_BOOKE206:
-                    booke206_update_mas_tlb_miss(env, address, 2, mmu_idx);
-                    /* fall through */
-                case POWERPC_MMU_BOOKE:
-                    cs->exception_index = POWERPC_EXCP_ITLB;
-                    env->error_code = 0;
-                    env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, 0);
-                    return -1;
-                case POWERPC_MMU_MPC8xx:
-                    /* XXX: TODO */
-                    cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
-                    break;
-                case POWERPC_MMU_REAL:
-                    cpu_abort(cs, "PowerPC in real mode should never raise "
-                              "any MMU exceptions\n");
-                    return -1;
-                default:
-                    cpu_abort(cs, "Unknown or invalid MMU model\n");
-                    return -1;
-                }
-                break;
-            case -2:
-                /* Access rights violation */
-                cs->exception_index = POWERPC_EXCP_ISI;
-                env->error_code = 0x08000000;
-                break;
-            case -3:
-                /* No execute protection violation */
-                if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-                    (env->mmu_model == POWERPC_MMU_BOOKE206)) {
-                    env->spr[SPR_BOOKE_ESR] = 0x00000000;
-                }
-                cs->exception_index = POWERPC_EXCP_ISI;
-                env->error_code = 0x10000000;
-                break;
-            case -4:
-                /* Direct store exception */
-                /* No code fetch is allowed in direct-store areas */
-                cs->exception_index = POWERPC_EXCP_ISI;
-                env->error_code = 0x10000000;
-                break;
-            }
-        } else {
-            switch (ret) {
-            case -1:
-                /* No matches in page tables or TLB */
-                switch (env->mmu_model) {
-                case POWERPC_MMU_SOFT_6xx:
-                    if (rw == 1) {
-                        cs->exception_index = POWERPC_EXCP_DSTLB;
-                        env->error_code = 1 << 16;
-                    } else {
-                        cs->exception_index = POWERPC_EXCP_DLTLB;
-                        env->error_code = 0;
-                    }
-                    env->spr[SPR_DMISS] = address;
-                    env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
-                tlb_miss:
-                    env->error_code |= ctx.key << 19;
-                    env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
-                        get_pteg_offset32(cpu, ctx.hash[0]);
-                    env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
-                        get_pteg_offset32(cpu, ctx.hash[1]);
-                    break;
-                case POWERPC_MMU_SOFT_74xx:
-                    if (rw == 1) {
-                        cs->exception_index = POWERPC_EXCP_DSTLB;
-                    } else {
-                        cs->exception_index = POWERPC_EXCP_DLTLB;
-                    }
-                tlb_miss_74xx:
-                    /* Implement LRU algorithm */
-                    env->error_code = ctx.key << 19;
-                    env->spr[SPR_TLBMISS] = (address & ~((target_ulong)0x3)) |
-                        ((env->last_way + 1) & (env->nb_ways - 1));
-                    env->spr[SPR_PTEHI] = 0x80000000 | ctx.ptem;
-                    break;
-                case POWERPC_MMU_SOFT_4xx:
-                case POWERPC_MMU_SOFT_4xx_Z:
-                    cs->exception_index = POWERPC_EXCP_DTLB;
-                    env->error_code = 0;
-                    env->spr[SPR_40x_DEAR] = address;
-                    if (rw) {
-                        env->spr[SPR_40x_ESR] = 0x00800000;
-                    } else {
-                        env->spr[SPR_40x_ESR] = 0x00000000;
-                    }
-                    break;
-                case POWERPC_MMU_MPC8xx:
-                    /* XXX: TODO */
-                    cpu_abort(cs, "MPC8xx MMU model is not implemented\n");
-                    break;
-                case POWERPC_MMU_BOOKE206:
-                    booke206_update_mas_tlb_miss(env, address, rw, mmu_idx);
-                    /* fall through */
-                case POWERPC_MMU_BOOKE:
-                    cs->exception_index = POWERPC_EXCP_DTLB;
-                    env->error_code = 0;
-                    env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, rw);
-                    return -1;
-                case POWERPC_MMU_REAL:
-                    cpu_abort(cs, "PowerPC in real mode should never raise "
-                              "any MMU exceptions\n");
-                    return -1;
-                default:
-                    cpu_abort(cs, "Unknown or invalid MMU model\n");
-                    return -1;
-                }
-                break;
-            case -2:
-                /* Access rights violation */
-                cs->exception_index = POWERPC_EXCP_DSI;
-                env->error_code = 0;
-                if (env->mmu_model == POWERPC_MMU_SOFT_4xx
-                    || env->mmu_model == POWERPC_MMU_SOFT_4xx_Z) {
-                    env->spr[SPR_40x_DEAR] = address;
-                    if (rw) {
-                        env->spr[SPR_40x_ESR] |= 0x00800000;
-                    }
-                } else if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
-                           (env->mmu_model == POWERPC_MMU_BOOKE206)) {
-                    env->spr[SPR_BOOKE_DEAR] = address;
-                    env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, rw);
-                } else {
-                    env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
-                        env->spr[SPR_DSISR] = 0x0A000000;
-                    } else {
-                        env->spr[SPR_DSISR] = 0x08000000;
-                    }
-                }
-                break;
-            case -4:
-                /* Direct store exception */
-                switch (access_type) {
-                case ACCESS_FLOAT:
-                    /* Floating point load/store */
-                    cs->exception_index = POWERPC_EXCP_ALIGN;
-                    env->error_code = POWERPC_EXCP_ALIGN_FP;
-                    env->spr[SPR_DAR] = address;
-                    break;
-                case ACCESS_RES:
-                    /* lwarx, ldarx or stwcx. */
-                    cs->exception_index = POWERPC_EXCP_DSI;
-                    env->error_code = 0;
-                    env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
-                        env->spr[SPR_DSISR] = 0x06000000;
-                    } else {
-                        env->spr[SPR_DSISR] = 0x04000000;
-                    }
-                    break;
-                case ACCESS_EXT:
-                    /* eciwx or ecowx */
-                    cs->exception_index = POWERPC_EXCP_DSI;
-                    env->error_code = 0;
-                    env->spr[SPR_DAR] = address;
-                    if (rw == 1) {
-                        env->spr[SPR_DSISR] = 0x06100000;
-                    } else {
-                        env->spr[SPR_DSISR] = 0x04100000;
-                    }
-                    break;
-                default:
-                    printf("DSI: invalid exception (%d)\n", ret);
-                    cs->exception_index = POWERPC_EXCP_PROGRAM;
-                    env->error_code =
-                        POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL;
-                    env->spr[SPR_DAR] = address;
-                    break;
-                }
-                break;
-            }
-        }
-        ret = 1;
-    }
 
-    return ret;
-}
 
 /*****************************************************************************/
 /* BATs management */
@@ -1844,9 +228,6 @@ static inline void dump_store_bat(CPUPPCState *env, char ID, int ul, int nr,
 void helper_store_ibatu(CPUPPCState *env, uint32_t nr, target_ulong value)
 {
     target_ulong mask;
-#if defined(FLUSH_ALL_TLBS)
-    PowerPCCPU *cpu = env_archcpu(env);
-#endif
 
     dump_store_bat(env, 'I', 0, nr, value);
     if (env->IBAT[0][nr] != value) {
@@ -1880,9 +261,6 @@ void helper_store_ibatl(CPUPPCState *env, uint32_t nr, target_ulong value)
 void helper_store_dbatu(CPUPPCState *env, uint32_t nr, target_ulong value)
 {
     target_ulong mask;
-#if defined(FLUSH_ALL_TLBS)
-    PowerPCCPU *cpu = env_archcpu(env);
-#endif
 
     dump_store_bat(env, 'D', 0, nr, value);
     if (env->DBAT[0][nr] != value) {
@@ -1917,7 +295,6 @@ void helper_store_601_batu(CPUPPCState *env, uint32_t nr, target_ulong value)
 {
     target_ulong mask;
 #if defined(FLUSH_ALL_TLBS)
-    PowerPCCPU *cpu = env_archcpu(env);
     int do_inval;
 #endif
 
@@ -1962,7 +339,6 @@ void helper_store_601_batl(CPUPPCState *env, uint32_t nr, target_ulong value)
 #if !defined(FLUSH_ALL_TLBS)
     target_ulong mask;
 #else
-    PowerPCCPU *cpu = env_archcpu(env);
     int do_inval;
 #endif
 
@@ -2085,60 +461,6 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
 
 /*****************************************************************************/
 /* Special registers manipulation */
-void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
-    assert(!cpu->vhyp);
-#if defined(TARGET_PPC64)
-    if (mmu_is_64bit(env->mmu_model)) {
-        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
-        target_ulong htabsize = value & SDR_64_HTABSIZE;
-
-        if (value & ~sdr_mask) {
-            error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
-                         value & ~sdr_mask);
-            value &= sdr_mask;
-        }
-        if (htabsize > 28) {
-            error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
-                         htabsize);
-            return;
-        }
-    }
-#endif /* defined(TARGET_PPC64) */
-    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
-    env->spr[SPR_SDR1] = value;
-}
-
-#if defined(TARGET_PPC64)
-void ppc_store_ptcr(CPUPPCState *env, target_ulong value)
-{
-    PowerPCCPU *cpu = env_archcpu(env);
-    target_ulong ptcr_mask = PTCR_PATB | PTCR_PATS;
-    target_ulong patbsize = value & PTCR_PATS;
-
-    qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
-
-    assert(!cpu->vhyp);
-    assert(env->mmu_model & POWERPC_MMU_3_00);
-
-    if (value & ~ptcr_mask) {
-        error_report("Invalid bits 0x"TARGET_FMT_lx" set in PTCR",
-                     value & ~ptcr_mask);
-        value &= ptcr_mask;
-    }
-
-    if (patbsize > 24) {
-        error_report("Invalid Partition Table size 0x" TARGET_FMT_lx
-                     " stored in PTCR", patbsize);
-        return;
-    }
-
-    env->spr[SPR_PTCR] = value;
-}
-
-#endif /* defined(TARGET_PPC64) */
 
 /* Segment registers load and store */
 target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num)
@@ -2894,7 +1216,7 @@ void helper_booke206_tlbsx(CPUPPCState *env, target_ulong address)
 }
 
 static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
-                                              uint32_t ea)
+                                              vaddr ea)
 {
     int i;
     int ways = booke206_tlb_ways(env, tlbn);
@@ -3037,28 +1359,24 @@ void helper_check_tlb_flush_global(CPUPPCState *env)
     check_tlb_flush(env, true);
 }
 
-/*****************************************************************************/
 
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
+bool ppc_cpu_tlb_fill(CPUState *cs, vaddr eaddr, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
-    CPUPPCState *env = &cpu->env;
-    int ret;
+    hwaddr raddr;
+    int page_size, prot;
 
-    if (pcc->handle_mmu_fault) {
-        ret = pcc->handle_mmu_fault(cpu, addr, access_type, mmu_idx);
-    } else {
-        ret = cpu_ppc_handle_mmu_fault(env, addr, access_type, mmu_idx);
+    if (ppc_xlate(cpu, eaddr, access_type, &raddr,
+                  &page_size, &prot, mmu_idx, !probe)) {
+        tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
+                     prot, mmu_idx, 1UL << page_size);
+        return true;
     }
-    if (unlikely(ret != 0)) {
-        if (probe) {
-            return false;
-        }
-        raise_exception_err_ra(env, cs->exception_index, env->error_code,
-                               retaddr);
+    if (probe) {
+        return false;
     }
-    return true;
+    raise_exception_err_ra(&cpu->env, cs->exception_index,
+                           cpu->env.error_code, retaddr);
 }
diff --git a/target/ppc/monitor.c b/target/ppc/monitor.c
index a475108b2db..0b805ef6e92 100644
--- a/target/ppc/monitor.c
+++ b/target/ppc/monitor.c
@@ -44,6 +44,13 @@ static target_long monitor_get_ccr(Monitor *mon, const struct MonitorDef *md,
     return u;
 }
 
+static target_long monitor_get_xer(Monitor *mon, const struct MonitorDef *md,
+                                   int val)
+{
+    CPUArchState *env = mon_get_cpu_env(mon);
+    return cpu_read_xer(env);
+}
+
 static target_long monitor_get_decr(Monitor *mon, const struct MonitorDef *md,
                                     int val)
 {
@@ -85,7 +92,7 @@ const MonitorDef monitor_defs[] = {
     { "decr", 0, &monitor_get_decr, },
     { "ccr|cr", 0, &monitor_get_ccr, },
     /* Machine state register */
-    { "xer", offsetof(CPUPPCState, xer) },
+    { "xer", 0, &monitor_get_xer },
     { "msr", offsetof(CPUPPCState, msr) },
     { "tbu", 0, &monitor_get_tbu, },
     { "tbl", 0, &monitor_get_tbl, },
diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc
new file mode 100644
index 00000000000..73918512382
--- /dev/null
+++ b/target/ppc/power8-pmu-regs.c.inc
@@ -0,0 +1,262 @@
+/*
+ * PMU register read/write functions for TCG IBM POWER chips
+ *
+ * Copyright IBM Corp. 2021
+ *
+ * Authors:
+ *  Daniel Henrique Barboza      <danielhb413@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+
+/*
+ * Checks whether the Group A SPR (MMCR0, MMCR2, MMCRA, and the
+ * PMCs) has problem state read access.
+ *
+ * Read acccess is granted for all PMCC values but 0b01, where a
+ * Facility Unavailable Interrupt will occur.
+ */
+static bool spr_groupA_read_allowed(DisasContext *ctx)
+{
+    if (!ctx->mmcr0_pmcc0 && ctx->mmcr0_pmcc1) {
+        gen_hvpriv_exception(ctx, POWERPC_EXCP_FU);
+        return false;
+    }
+
+    return true;
+}
+
+/*
+ * Checks whether the Group A SPR (MMCR0, MMCR2, MMCRA, and the
+ * PMCs) has problem state write access.
+ *
+ * Write acccess is granted for PMCC values 0b10 and 0b11. Userspace
+ * writing with PMCC 0b00 will generate a Hypervisor Emulation
+ * Assistance Interrupt. Userspace writing with PMCC 0b01 will
+ * generate a Facility Unavailable Interrupt.
+ */
+static bool spr_groupA_write_allowed(DisasContext *ctx)
+{
+    if (ctx->mmcr0_pmcc0) {
+        return true;
+    }
+
+    if (ctx->mmcr0_pmcc1) {
+        /* PMCC = 0b01 */
+        gen_hvpriv_exception(ctx, POWERPC_EXCP_FU);
+    } else {
+        /* PMCC = 0b00 */
+        gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR);
+    }
+
+    return false;
+}
+
+/*
+ * Helper function to avoid code repetition between MMCR0 and
+ * MMCR2 problem state write functions.
+ *
+ * 'ret' must be tcg_temp_freed() by the caller.
+ */
+static TCGv masked_gprn_for_spr_write(int gprn, int sprn,
+                                      uint64_t spr_mask)
+{
+    TCGv ret = tcg_temp_new();
+    TCGv t0 = tcg_temp_new();
+
+    /* 'ret' starts with all mask bits cleared */
+    gen_load_spr(ret, sprn);
+    tcg_gen_andi_tl(ret, ret, ~(spr_mask));
+
+    /* Apply the mask into 'gprn' in a temp var */
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], spr_mask);
+
+    /* Add the masked gprn bits into 'ret' */
+    tcg_gen_or_tl(ret, ret, t0);
+
+    tcg_temp_free(t0);
+
+    return ret;
+}
+
+void spr_read_MMCR0_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv t0;
+
+    if (!spr_groupA_read_allowed(ctx)) {
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    /*
+     * Filter out all bits but FC, PMAO, and PMAE, according
+     * to ISA v3.1, in 10.4.4 Monitor Mode Control Register 0,
+     * fourth paragraph.
+     */
+    gen_load_spr(t0, SPR_POWER_MMCR0);
+    tcg_gen_andi_tl(t0, t0, MMCR0_UREG_MASK);
+    tcg_gen_mov_tl(cpu_gpr[gprn], t0);
+
+    tcg_temp_free(t0);
+}
+
+void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv masked_gprn;
+
+    if (!spr_groupA_write_allowed(ctx)) {
+        return;
+    }
+
+    /*
+     * Filter out all bits but FC, PMAO, and PMAE, according
+     * to ISA v3.1, in 10.4.4 Monitor Mode Control Register 0,
+     * fourth paragraph.
+     */
+    masked_gprn = masked_gprn_for_spr_write(gprn, SPR_POWER_MMCR0,
+                                            MMCR0_UREG_MASK);
+    gen_store_spr(SPR_POWER_MMCR0, masked_gprn);
+
+    tcg_temp_free(masked_gprn);
+}
+
+void spr_read_MMCR2_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv t0;
+
+    if (!spr_groupA_read_allowed(ctx)) {
+        return;
+    }
+
+    t0 = tcg_temp_new();
+
+    /*
+     * On read, filter out all bits that are not FCnP0 bits.
+     * When MMCR0[PMCC] is set to 0b10 or 0b11, providing
+     * problem state programs read/write access to MMCR2,
+     * only the FCnP0 bits can be accessed. All other bits are
+     * not changed when mtspr is executed in problem state, and
+     * all other bits return 0s when mfspr is executed in problem
+     * state, according to ISA v3.1, section 10.4.6 Monitor Mode
+     * Control Register 2, p. 1316, third paragraph.
+     */
+    gen_load_spr(t0, SPR_POWER_MMCR2);
+    tcg_gen_andi_tl(t0, t0, MMCR2_UREG_MASK);
+    tcg_gen_mov_tl(cpu_gpr[gprn], t0);
+
+    tcg_temp_free(t0);
+}
+
+void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv masked_gprn;
+
+    if (!spr_groupA_write_allowed(ctx)) {
+        return;
+    }
+
+    /*
+     * Filter the bits that can be written using MMCR2_UREG_MASK,
+     * similar to what is done in spr_write_MMCR0_ureg().
+     */
+    masked_gprn = masked_gprn_for_spr_write(gprn, SPR_POWER_MMCR2,
+                                            MMCR2_UREG_MASK);
+    gen_store_spr(SPR_POWER_MMCR2, masked_gprn);
+
+    tcg_temp_free(masked_gprn);
+}
+
+void spr_read_PMC14_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    if (!spr_groupA_read_allowed(ctx)) {
+        return;
+    }
+
+    spr_read_ureg(ctx, gprn, sprn);
+}
+
+void spr_read_PMC56_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    /*
+     * If PMCC = 0b11, PMC5 and PMC6 aren't included in the Performance
+     * Monitor, and a read attempt results in a Facility Unavailable
+     * Interrupt.
+     */
+    if (ctx->mmcr0_pmcc0 && ctx->mmcr0_pmcc1) {
+        gen_hvpriv_exception(ctx, POWERPC_EXCP_FU);
+        return;
+    }
+
+    /* The remaining steps are similar to PMCs 1-4 userspace read */
+    spr_read_PMC14_ureg(ctx, gprn, sprn);
+}
+
+void spr_write_PMC14_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    if (!spr_groupA_write_allowed(ctx)) {
+        return;
+    }
+
+    spr_write_ureg(ctx, sprn, gprn);
+}
+
+void spr_write_PMC56_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    /*
+     * If PMCC = 0b11, PMC5 and PMC6 aren't included in the Performance
+     * Monitor, and a write attempt results in a Facility Unavailable
+     * Interrupt.
+     */
+    if (ctx->mmcr0_pmcc0 && ctx->mmcr0_pmcc1) {
+        gen_hvpriv_exception(ctx, POWERPC_EXCP_FU);
+        return;
+    }
+
+    /* The remaining steps are similar to PMCs 1-4 userspace write */
+    spr_write_PMC14_ureg(ctx, sprn, gprn);
+}
+#else
+void spr_read_MMCR0_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    spr_read_ureg(ctx, gprn, sprn);
+}
+
+void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    spr_noaccess(ctx, gprn, sprn);
+}
+
+void spr_read_MMCR2_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    spr_read_ureg(ctx, gprn, sprn);
+}
+
+void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    spr_noaccess(ctx, gprn, sprn);
+}
+
+void spr_read_PMC14_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    spr_read_ureg(ctx, gprn, sprn);
+}
+
+void spr_read_PMC56_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    spr_read_ureg(ctx, gprn, sprn);
+}
+
+void spr_write_PMC14_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    spr_noaccess(ctx, gprn, sprn);
+}
+
+void spr_write_PMC56_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    spr_noaccess(ctx, gprn, sprn);
+}
+#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
diff --git a/target/ppc/spr_tcg.h b/target/ppc/spr_tcg.h
new file mode 100644
index 00000000000..520f1ef2333
--- /dev/null
+++ b/target/ppc/spr_tcg.h
@@ -0,0 +1,144 @@
+/*
+ *  PowerPC emulation for qemu: read/write callbacks for SPRs
+ *
+ *  Copyright (C) 2021 Instituto de Pesquisas Eldorado
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef SPR_TCG_H
+#define SPR_TCG_H
+
+#define SPR_NOACCESS (&spr_noaccess)
+
+/* prototypes for readers and writers for SPRs */
+void spr_noaccess(DisasContext *ctx, int gprn, int sprn);
+void spr_read_generic(DisasContext *ctx, int gprn, int sprn);
+void spr_write_generic(DisasContext *ctx, int sprn, int gprn);
+void spr_read_xer(DisasContext *ctx, int gprn, int sprn);
+void spr_write_xer(DisasContext *ctx, int sprn, int gprn);
+void spr_read_lr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_lr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ctr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ctr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_MMCR0_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_MMCR2_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_PMC14_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_PMC56_ureg(DisasContext *ctx, int gprn, int sprn);
+void spr_read_tbl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_tbu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_atbl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_atbu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn);
+void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn);
+void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_MMCR0_ureg(DisasContext *ctx, int sprn, int gprn);
+void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, int gprn);
+void spr_write_PMC14_ureg(DisasContext *ctx, int sprn, int gprn);
+void spr_write_PMC56_ureg(DisasContext *ctx, int sprn, int gprn);
+
+#ifndef CONFIG_USER_ONLY
+void spr_write_generic32(DisasContext *ctx, int sprn, int gprn);
+void spr_write_clear(DisasContext *ctx, int sprn, int gprn);
+void spr_access_nop(DisasContext *ctx, int sprn, int gprn);
+void spr_read_decr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_decr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_atbl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_atbu(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ibat(DisasContext *ctx, int gprn, int sprn);
+void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn);
+void spr_read_dbat(DisasContext *ctx, int gprn, int sprn);
+void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn);
+void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn);
+void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn);
+void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn);
+void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn);
+void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn);
+void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn);
+void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn);
+void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn);
+void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pir(DisasContext *ctx, int sprn, int gprn);
+void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn);
+void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn);
+void spr_read_thrm(DisasContext *ctx, int gprn, int sprn);
+void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn);
+void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn);
+void spr_write_eplc(DisasContext *ctx, int sprn, int gprn);
+void spr_write_epsc(DisasContext *ctx, int sprn, int gprn);
+void spr_write_mas73(DisasContext *ctx, int sprn, int gprn);
+void spr_read_mas73(DisasContext *ctx, int gprn, int sprn);
+#ifdef TARGET_PPC64
+void spr_read_cfar(DisasContext *ctx, int gprn, int sprn);
+void spr_write_cfar(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ureg(DisasContext *ctx, int sprn, int gprn);
+void spr_read_purr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_purr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn);
+void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_vtb(DisasContext *ctx, int gprn, int sprn);
+void spr_write_vtb(DisasContext *ctx, int sprn, int gprn);
+void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pidr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_hior(DisasContext *ctx, int gprn, int sprn);
+void spr_write_hior(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_pcr(DisasContext *ctx, int sprn, int gprn);
+void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn);
+void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn);
+void spr_write_amr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_uamor(DisasContext *ctx, int sprn, int gprn);
+void spr_write_iamr(DisasContext *ctx, int sprn, int gprn);
+#endif
+#endif
+
+#ifdef TARGET_PPC64
+void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tar(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tar(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tm(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tm(DisasContext *ctx, int sprn, int gprn);
+void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ebb(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ebb(DisasContext *ctx, int sprn, int gprn);
+void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn);
+void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn);
+void spr_write_hmer(DisasContext *ctx, int sprn, int gprn);
+void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn);
+#endif
+
+#endif
diff --git a/target/ppc/tcg-stub.c b/target/ppc/tcg-stub.c
new file mode 100644
index 00000000000..aadcf59d261
--- /dev/null
+++ b/target/ppc/tcg-stub.c
@@ -0,0 +1,45 @@
+/*
+ *  PowerPC CPU initialization for qemu.
+ *
+ *  Copyright (C) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "hw/ppc/spapr.h"
+
+void create_ppc_opcodes(PowerPCCPU *cpu, Error **errp)
+{
+}
+
+void destroy_ppc_opcodes(PowerPCCPU *cpu)
+{
+}
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong shift)
+{
+    g_assert_not_reached();
+}
+
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
+                                       SpaprMachineState *spapr,
+                                       target_ulong flags,
+                                       target_ulong shift)
+{
+    g_assert_not_reached();
+}
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index bc0d4e6f8bb..53b107f56eb 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # kvm.c
 kvm_failed_spr_set(int spr, const char *msg) "Warning: Unable to set SPR %d to KVM: %s"
@@ -28,3 +28,11 @@ kvm_handle_epr(void) "handle epr"
 kvm_handle_watchdog_expiry(void) "handle watchdog expiry"
 kvm_handle_debug_exception(void) "handle debug exception"
 kvm_handle_nmi_exception(void) "handle NMI exception"
+
+# excp_helper.c
+ppc_excp_rfi(uint64_t nip, uint64_t msr) "Return from exception at 0x%" PRIx64 " with flags 0x%016" PRIx64
+ppc_excp_dsi(uint64_t dsisr, uint64_t dar) "DSI exception: DSISR=0x%" PRIx64 " DAR=0x%" PRIx64
+ppc_excp_isi(uint64_t msr, uint64_t nip) "ISI exception: msr=0x%016" PRIx64 " nip=0x%" PRIx64
+ppc_excp_fp_ignore(void) "Ignore floating point exception"
+ppc_excp_inval(uint64_t nip) "Invalid instruction at 0x%" PRIx64
+ppc_excp_print(const char *excp) "%s exception"
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0984ce637be..9960df6e183 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -32,19 +32,19 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
 #include "qemu/atomic128.h"
+#include "spr_tcg.h"
 
+#include "qemu/qemu-print.h"
+#include "qapi/error.h"
 
 #define CPU_SINGLE_STEP 0x1
 #define CPU_BRANCH_STEP 0x2
-#define GDBSTUB_SINGLE_STEP 0x4
 
 /* Include definitions for instructions classes and implementations flags */
 /* #define PPC_DEBUG_DISAS */
-/* #define DO_PPC_STATISTICS */
 
 #ifdef PPC_DEBUG_DISAS
 #  define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
@@ -154,8 +154,8 @@ void ppc_translate_init(void)
 /* internal defines */
 struct DisasContext {
     DisasContextBase base;
+    target_ulong cia;  /* current instruction address */
     uint32_t opcode;
-    uint32_t exception;
     /* Routine used to access memory */
     bool pr, hv, dr, le_mode;
     bool lazy_tlb_flush;
@@ -173,8 +173,10 @@ struct DisasContext {
     bool vsx_enabled;
     bool spe_enabled;
     bool tm_enabled;
-    bool scv_enabled;
     bool gtse;
+    bool hr;
+    bool mmcr0_pmcc0;
+    bool mmcr0_pmcc1;
     ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
     int singlestep_enabled;
     uint32_t flags;
@@ -182,6 +184,11 @@ struct DisasContext {
     uint64_t insns_flags2;
 };
 
+#define DISAS_EXIT         DISAS_TARGET_0  /* exit to main loop, pc updated */
+#define DISAS_EXIT_UPDATE  DISAS_TARGET_1  /* exit to main loop, pc stale */
+#define DISAS_CHAIN        DISAS_TARGET_2  /* lookup next tb, pc updated */
+#define DISAS_CHAIN_UPDATE DISAS_TARGET_3  /* lookup next tb, pc stale */
+
 /* Return true iff byteswap is needed in a scalar memop */
 static inline bool need_byteswap(const DisasContext *ctx)
 {
@@ -210,12 +217,6 @@ struct opc_handler_t {
     uint64_t type2;
     /* handler */
     void (*handler)(DisasContext *ctx);
-#if defined(DO_PPC_STATISTICS) || defined(PPC_DUMP_CPU)
-    const char *oname;
-#endif
-#if defined(DO_PPC_STATISTICS)
-    uint64_t count;
-#endif
 };
 
 /* SPR load/store helpers */
@@ -253,15 +254,13 @@ static void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
      * These are all synchronous exceptions, we set the PC back to the
      * faulting instruction
      */
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->base.pc_next - 4);
-    }
+    gen_update_nip(ctx, ctx->cia);
     t0 = tcg_const_i32(excp);
     t1 = tcg_const_i32(error);
     gen_helper_raise_exception_err(cpu_env, t0, t1);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_exception(DisasContext *ctx, uint32_t excp)
@@ -272,13 +271,11 @@ static void gen_exception(DisasContext *ctx, uint32_t excp)
      * These are all synchronous exceptions, we set the PC back to the
      * faulting instruction
      */
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->base.pc_next - 4);
-    }
+    gen_update_nip(ctx, ctx->cia);
     t0 = tcg_const_i32(excp);
     gen_helper_raise_exception(cpu_env, t0);
     tcg_temp_free_i32(t0);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_exception_nip(DisasContext *ctx, uint32_t excp,
@@ -290,7 +287,21 @@ static void gen_exception_nip(DisasContext *ctx, uint32_t excp,
     t0 = tcg_const_i32(excp);
     gen_helper_raise_exception(cpu_env, t0);
     tcg_temp_free_i32(t0);
-    ctx->exception = (excp);
+    ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_icount_io_start(DisasContext *ctx)
+{
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+        /*
+         * An I/O instruction must be last in the TB.
+         * Chain to the next TB, and let the code from gen_tb_start
+         * decide if we need to return to the main loop.
+         * Doing this first also allows this value to be overridden.
+         */
+        ctx->base.is_jmp = DISAS_TOO_MANY;
+    }
 }
 
 /*
@@ -323,19 +334,8 @@ static uint32_t gen_prep_dbgex(DisasContext *ctx)
 
 static void gen_debug_exception(DisasContext *ctx)
 {
-    TCGv_i32 t0;
-
-    /*
-     * These are all synchronous exceptions, we set the PC back to the
-     * faulting instruction
-     */
-    if ((ctx->exception != POWERPC_EXCP_BRANCH) &&
-        (ctx->exception != POWERPC_EXCP_SYNC)) {
-        gen_update_nip(ctx, ctx->base.pc_next);
-    }
-    t0 = tcg_const_i32(EXCP_DEBUG);
-    gen_helper_raise_exception(cpu_env, t0);
-    tcg_temp_free_i32(t0);
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(gen_prep_dbgex(ctx)));
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static inline void gen_inval_exception(DisasContext *ctx, uint32_t error)
@@ -355,162 +355,990 @@ static inline void gen_hvpriv_exception(DisasContext *ctx, uint32_t error)
     gen_exception_err(ctx, POWERPC_EXCP_HV_EMU, POWERPC_EXCP_PRIV | error);
 }
 
-/* Stop translation */
-static inline void gen_stop_exception(DisasContext *ctx)
+/*****************************************************************************/
+/* SPR READ/WRITE CALLBACKS */
+
+void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
 {
-    gen_update_nip(ctx, ctx->base.pc_next);
-    ctx->exception = POWERPC_EXCP_STOP;
+#if 0
+    sprn = ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
+    printf("ERROR: try to access SPR %d !\n", sprn);
+#endif
 }
 
-#ifndef CONFIG_USER_ONLY
-/* No need to update nip here, as execution flow will change */
-static inline void gen_sync_exception(DisasContext *ctx)
+/* #define PPC_DUMP_SPR_ACCESSES */
+
+/*
+ * Generic callbacks:
+ * do nothing but store/retrieve spr value
+ */
+static void spr_load_dump_spr(int sprn)
 {
-    ctx->exception = POWERPC_EXCP_SYNC;
-}
+#ifdef PPC_DUMP_SPR_ACCESSES
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_load_dump_spr(cpu_env, t0);
+    tcg_temp_free_i32(t0);
 #endif
+}
 
-#define GEN_HANDLER(name, opc1, opc2, opc3, inval, type)                      \
-GEN_OPCODE(name, opc1, opc2, opc3, inval, type, PPC_NONE)
+void spr_read_generic(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_load_spr(cpu_gpr[gprn], sprn);
+    spr_load_dump_spr(sprn);
+}
 
-#define GEN_HANDLER_E(name, opc1, opc2, opc3, inval, type, type2)             \
-GEN_OPCODE(name, opc1, opc2, opc3, inval, type, type2)
+static void spr_store_dump_spr(int sprn)
+{
+#ifdef PPC_DUMP_SPR_ACCESSES
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_store_dump_spr(cpu_env, t0);
+    tcg_temp_free_i32(t0);
+#endif
+}
 
-#define GEN_HANDLER2(name, onam, opc1, opc2, opc3, inval, type)               \
-GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, PPC_NONE)
+void spr_write_generic(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_store_spr(sprn, cpu_gpr[gprn]);
+    spr_store_dump_spr(sprn);
+}
 
-#define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)      \
-GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_generic32(DisasContext *ctx, int sprn, int gprn)
+{
+#ifdef TARGET_PPC64
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ext32u_tl(t0, cpu_gpr[gprn]);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+    spr_store_dump_spr(sprn);
+#else
+    spr_write_generic(ctx, sprn, gprn);
+#endif
+}
 
-#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
-GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
+void spr_write_clear(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    gen_load_spr(t0, sprn);
+    tcg_gen_neg_tl(t1, cpu_gpr[gprn]);
+    tcg_gen_and_tl(t0, t0, t1);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
 
-#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) \
-GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
+void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
+{
+}
 
-typedef struct opcode_t {
-    unsigned char opc1, opc2, opc3, opc4;
-#if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
-    unsigned char pad[4];
 #endif
-    opc_handler_t handler;
-    const char *oname;
-} opcode_t;
 
-/* Helpers for priv. check */
-#define GEN_PRIV                                                \
-    do {                                                        \
-        gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC); return; \
-    } while (0)
+/* SPR common to all PowerPC */
+/* XER */
+void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv dst = cpu_gpr[gprn];
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+    tcg_gen_mov_tl(dst, cpu_xer);
+    tcg_gen_shli_tl(t0, cpu_so, XER_SO);
+    tcg_gen_shli_tl(t1, cpu_ov, XER_OV);
+    tcg_gen_shli_tl(t2, cpu_ca, XER_CA);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_or_tl(dst, dst, t2);
+    tcg_gen_or_tl(dst, dst, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+        tcg_gen_or_tl(dst, dst, t0);
+        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+        tcg_gen_or_tl(dst, dst, t0);
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
 
-#if defined(CONFIG_USER_ONLY)
-#define CHK_HV GEN_PRIV
-#define CHK_SV GEN_PRIV
-#define CHK_HVRM GEN_PRIV
-#else
-#define CHK_HV                                                          \
-    do {                                                                \
-        if (unlikely(ctx->pr || !ctx->hv)) {                            \
-            GEN_PRIV;                                                   \
-        }                                                               \
-    } while (0)
-#define CHK_SV                   \
-    do {                         \
-        if (unlikely(ctx->pr)) { \
-            GEN_PRIV;            \
-        }                        \
-    } while (0)
-#define CHK_HVRM                                            \
-    do {                                                    \
-        if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) {     \
-            GEN_PRIV;                                       \
-        }                                                   \
-    } while (0)
+void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv src = cpu_gpr[gprn];
+    /* Write all flags, while reading back check for isa300 */
+    tcg_gen_andi_tl(cpu_xer, src,
+                    ~((1u << XER_SO) |
+                      (1u << XER_OV) | (1u << XER_OV32) |
+                      (1u << XER_CA) | (1u << XER_CA32)));
+    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
+}
+
+/* LR */
+void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_lr);
+}
+
+void spr_write_lr(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
+}
+
+/* CFAR */
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
+}
+
+void spr_write_cfar(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
+}
+#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
+
+/* CTR */
+void spr_read_ctr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_ctr);
+}
+
+void spr_write_ctr(DisasContext *ctx, int sprn, int gprn)
+{
+    tcg_gen_mov_tl(cpu_ctr, cpu_gpr[gprn]);
+}
+
+/* User read access to SPR */
+/* USPRx */
+/* UMMCRx */
+/* UPMCx */
+/* USIA */
+/* UDECR */
+void spr_read_ureg(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_load_spr(cpu_gpr[gprn], sprn + 0x10);
+}
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+void spr_write_ureg(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_store_spr(sprn + 0x10, cpu_gpr[gprn]);
+}
 #endif
 
-#define CHK_NONE
+/* SPR common to all non-embedded PowerPC */
+/* DECR */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_decr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_decr(cpu_gpr[gprn], cpu_env);
+}
 
-/*****************************************************************************/
-/* PowerPC instructions table                                                */
+void spr_write_decr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_decr(cpu_env, cpu_gpr[gprn]);
+}
+#endif
 
-#if defined(DO_PPC_STATISTICS)
-#define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
-{                                                                             \
-    .opc1 = op1,                                                              \
-    .opc2 = op2,                                                              \
-    .opc3 = op3,                                                              \
-    .opc4 = 0xff,                                                             \
-    .handler = {                                                              \
-        .inval1  = invl,                                                      \
-        .type = _typ,                                                         \
-        .type2 = _typ2,                                                       \
-        .handler = &gen_##name,                                               \
-        .oname = stringify(name),                                             \
-    },                                                                        \
-    .oname = stringify(name),                                                 \
+/* SPR common to all non-embedded PowerPC, except 601 */
+/* Time base */
+void spr_read_tbl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_tbl(cpu_gpr[gprn], cpu_env);
 }
-#define GEN_OPCODE_DUAL(name, op1, op2, op3, invl1, invl2, _typ, _typ2)       \
-{                                                                             \
-    .opc1 = op1,                                                              \
-    .opc2 = op2,                                                              \
-    .opc3 = op3,                                                              \
-    .opc4 = 0xff,                                                             \
-    .handler = {                                                              \
-        .inval1  = invl1,                                                     \
-        .inval2  = invl2,                                                     \
-        .type = _typ,                                                         \
-        .type2 = _typ2,                                                       \
-        .handler = &gen_##name,                                               \
-        .oname = stringify(name),                                             \
-    },                                                                        \
-    .oname = stringify(name),                                                 \
+
+void spr_read_tbu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_tbu(cpu_gpr[gprn], cpu_env);
 }
-#define GEN_OPCODE2(name, onam, op1, op2, op3, invl, _typ, _typ2)             \
-{                                                                             \
-    .opc1 = op1,                                                              \
-    .opc2 = op2,                                                              \
-    .opc3 = op3,                                                              \
-    .opc4 = 0xff,                                                             \
-    .handler = {                                                              \
-        .inval1  = invl,                                                      \
-        .type = _typ,                                                         \
-        .type2 = _typ2,                                                       \
-        .handler = &gen_##name,                                               \
-        .oname = onam,                                                        \
-    },                                                                        \
-    .oname = onam,                                                            \
+
+void spr_read_atbl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_atbl(cpu_gpr[gprn], cpu_env);
 }
-#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
-{                                                                             \
-    .opc1 = op1,                                                              \
-    .opc2 = op2,                                                              \
-    .opc3 = op3,                                                              \
-    .opc4 = op4,                                                              \
-    .handler = {                                                              \
-        .inval1  = invl,                                                      \
-        .type = _typ,                                                         \
-        .type2 = _typ2,                                                       \
-        .handler = &gen_##name,                                               \
-        .oname = stringify(name),                                             \
-    },                                                                        \
-    .oname = stringify(name),                                                 \
+
+void spr_read_atbu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_atbu(cpu_gpr[gprn], cpu_env);
 }
-#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
-{                                                                             \
-    .opc1 = op1,                                                              \
-    .opc2 = op2,                                                              \
-    .opc3 = op3,                                                              \
-    .opc4 = op4,                                                              \
-    .handler = {                                                              \
-        .inval1  = invl,                                                      \
-        .type = _typ,                                                         \
-        .type2 = _typ2,                                                       \
-        .handler = &gen_##name,                                               \
-        .oname = onam,                                                        \
-    },                                                                        \
-    .oname = onam,                                                            \
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_tbl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_tbu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbu(cpu_env, cpu_gpr[gprn]);
 }
+
+void spr_write_atbl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_atbl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_atbu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_atbu(cpu_env, cpu_gpr[gprn]);
+}
+
+#if defined(TARGET_PPC64)
+void spr_read_purr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_purr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_purr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* HDECR */
+void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_hdecr(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_hdecr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_read_vtb(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_vtb(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_vtb(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]);
+}
+
+#endif
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+/* IBAT0U...IBAT0U */
+/* IBAT0L...IBAT7L */
+void spr_read_ibat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
+}
+
+void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][((sprn - SPR_IBAT4U) / 2) + 4]));
+}
+
+void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4U) / 2) + 4);
+    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0L) / 2);
+    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4L) / 2) + 4);
+    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+/* DBAT0U...DBAT7U */
+/* DBAT0L...DBAT7L */
+void spr_read_dbat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           DBAT[sprn & 1][(sprn - SPR_DBAT0U) / 2]));
+}
+
+void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           DBAT[sprn & 1][((sprn - SPR_DBAT4U) / 2) + 4]));
+}
+
+void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0U) / 2);
+    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4U) / 2) + 4);
+    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0L) / 2);
+    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4L) / 2) + 4);
+    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+/* SDR1 */
+void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_sdr1(cpu_env, cpu_gpr[gprn]);
+}
+
+#if defined(TARGET_PPC64)
+/* 64 bits PowerPC specific SPRs */
+/* PIDR */
+void spr_write_pidr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_pidr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_lpidr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_read_hior(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, excp_prefix));
+}
+
+void spr_write_hior(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0x3FFFFF00000ULL);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
+    tcg_temp_free(t0);
+}
+void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_ptcr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* DPDES */
+void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]);
+}
+#endif
+#endif
+
+/* PowerPC 601 specific registers */
+/* RTC */
+void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_601_rtcl(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_load_601_rtcu(cpu_gpr[gprn], cpu_env);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_601_rtcu(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_601_rtcl(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_hid0_601(cpu_env, cpu_gpr[gprn]);
+    /* Must stop the translation as endianness may have changed */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+}
+#endif
+
+/* Unified bats */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState,
+                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
+}
+
+void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_601_batl(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
+    gen_helper_store_601_batu(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+#endif
+
+/* PowerPC 40x specific registers */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_load_40x_pit(cpu_gpr[gprn], cpu_env);
+}
+
+void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_40x_pit(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_store_spr(sprn, cpu_gpr[gprn]);
+    gen_helper_store_40x_dbcr0(cpu_env, cpu_gpr[gprn]);
+    /* We must stop translation as we may have rebooted */
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+}
+
+void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_40x_sler(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_booke_tcr(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_icount_io_start(ctx);
+    gen_helper_store_booke_tsr(cpu_env, cpu_gpr[gprn]);
+}
+#endif
+
+/* PowerPC 403 specific registers */
+/* PBL1 / PBU1 / PBL2 / PBU2 */
+#if !defined(CONFIG_USER_ONLY)
+void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn)
+{
+    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
+                  offsetof(CPUPPCState, pb[sprn - SPR_403_PBL1]));
+}
+
+void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(sprn - SPR_403_PBL1);
+    gen_helper_store_403_pbr(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_pir(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0xF);
+    gen_store_spr(SPR_PIR, t0);
+    tcg_temp_free(t0);
+}
+#endif
+
+/* SPE specific registers */
+void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
+    tcg_gen_extu_i32_tl(cpu_gpr[gprn], t0);
+    tcg_temp_free_i32(t0);
+}
+
+void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[gprn]);
+    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
+    tcg_temp_free_i32(t0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+/* Callback used to write the exception vector base */
+void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivpr_mask));
+    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn)
+{
+    int sprn_offs;
+
+    if (sprn >= SPR_BOOKE_IVOR0 && sprn <= SPR_BOOKE_IVOR15) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR0;
+    } else if (sprn >= SPR_BOOKE_IVOR32 && sprn <= SPR_BOOKE_IVOR37) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR32 + 32;
+    } else if (sprn >= SPR_BOOKE_IVOR38 && sprn <= SPR_BOOKE_IVOR42) {
+        sprn_offs = sprn - SPR_BOOKE_IVOR38 + 38;
+    } else {
+        printf("Trying to write an unknown exception vector %d %03x\n",
+               sprn, sprn);
+        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
+        return;
+    }
+
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivor_mask));
+    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_vectors[sprn_offs]));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+#endif
+
+#ifdef TARGET_PPC64
+#ifndef CONFIG_USER_ONLY
+void spr_write_amr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 PR=0 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    if (ctx->pr) {
+        gen_load_spr(t1, SPR_UAMOR);
+    } else {
+        gen_load_spr(t1, SPR_AMOR);
+    }
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_AMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_AMR, t0);
+    spr_store_dump_spr(SPR_AMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+void spr_write_uamor(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_UAMOR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_UAMOR, t0);
+    spr_store_dump_spr(SPR_UAMOR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+
+void spr_write_iamr(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
+
+    /*
+     * Note, the HV=1 case is handled earlier by simply using
+     * spr_write_generic for HV mode in the SPR table
+     */
+
+    /* Build insertion mask into t1 based on context */
+    gen_load_spr(t1, SPR_AMOR);
+
+    /* Mask new bits into t2 */
+    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
+
+    /* Load AMR and clear new bits in t0 */
+    gen_load_spr(t0, SPR_IAMR);
+    tcg_gen_andc_tl(t0, t0, t1);
+
+    /* Or'in new bits and write it out */
+    tcg_gen_or_tl(t0, t0, t2);
+    gen_store_spr(SPR_IAMR, t0);
+    spr_store_dump_spr(SPR_IAMR);
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+}
+#endif
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void spr_read_thrm(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_helper_fixup_thrm(cpu_env);
+    gen_load_spr(cpu_gpr[gprn], sprn);
+    spr_load_dump_spr(sprn);
+}
+#endif /* !CONFIG_USER_ONLY */
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR0_DCE | L1CSR0_CPE);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR1_ICE | L1CSR1_CPE);
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_andi_tl(t0, cpu_gpr[gprn],
+                    ~(E500_L2CSR0_L2FI | E500_L2CSR0_L2FL | E500_L2CSR0_L2LFC));
+    gen_store_spr(sprn, t0);
+    tcg_temp_free(t0);
+}
+
+void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke206_tlbflush(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv_i32 t0 = tcg_const_i32(sprn);
+    gen_helper_booke_setpid(cpu_env, t0, cpu_gpr[gprn]);
+    tcg_temp_free_i32(t0);
+}
+void spr_write_eplc(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke_set_eplc(cpu_env, cpu_gpr[gprn]);
+}
+void spr_write_epsc(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_booke_set_epsc(cpu_env, cpu_gpr[gprn]);
+}
+
+#endif
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_mas73(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv val = tcg_temp_new();
+    tcg_gen_ext32u_tl(val, cpu_gpr[gprn]);
+    gen_store_spr(SPR_BOOKE_MAS3, val);
+    tcg_gen_shri_tl(val, cpu_gpr[gprn], 32);
+    gen_store_spr(SPR_BOOKE_MAS7, val);
+    tcg_temp_free(val);
+}
+
+void spr_read_mas73(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv mas7 = tcg_temp_new();
+    TCGv mas3 = tcg_temp_new();
+    gen_load_spr(mas7, SPR_BOOKE_MAS7);
+    tcg_gen_shli_tl(mas7, mas7, 32);
+    gen_load_spr(mas3, SPR_BOOKE_MAS3);
+    tcg_gen_or_tl(cpu_gpr[gprn], mas3, mas7);
+    tcg_temp_free(mas3);
+    tcg_temp_free(mas7);
+}
+
+#endif
+
+#ifdef TARGET_PPC64
+static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
+                                    int bit, int sprn, int cause)
+{
+    TCGv_i32 t1 = tcg_const_i32(bit);
+    TCGv_i32 t2 = tcg_const_i32(sprn);
+    TCGv_i32 t3 = tcg_const_i32(cause);
+
+    gen_helper_fscr_facility_check(cpu_env, t1, t2, t3);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+static void gen_msr_facility_check(DisasContext *ctx, int facility_sprn,
+                                   int bit, int sprn, int cause)
+{
+    TCGv_i32 t1 = tcg_const_i32(bit);
+    TCGv_i32 t2 = tcg_const_i32(sprn);
+    TCGv_i32 t3 = tcg_const_i32(cause);
+
+    gen_helper_msr_facility_check(cpu_env, t1, t2, t3);
+
+    tcg_temp_free_i32(t3);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t1);
+}
+
+void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    TCGv spr_up = tcg_temp_new();
+    TCGv spr = tcg_temp_new();
+
+    gen_load_spr(spr, sprn - 1);
+    tcg_gen_shri_tl(spr_up, spr, 32);
+    tcg_gen_ext32u_tl(cpu_gpr[gprn], spr_up);
+
+    tcg_temp_free(spr);
+    tcg_temp_free(spr_up);
+}
+
+void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv spr = tcg_temp_new();
+
+    gen_load_spr(spr, sprn - 1);
+    tcg_gen_deposit_tl(spr, spr, cpu_gpr[gprn], 32, 32);
+    gen_store_spr(sprn - 1, spr);
+
+    tcg_temp_free(spr);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
+{
+    TCGv hmer = tcg_temp_new();
+
+    gen_load_spr(hmer, sprn);
+    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
+    gen_store_spr(sprn, hmer);
+    spr_store_dump_spr(sprn);
+    tcg_temp_free(hmer);
+}
+
+void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
+}
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+void spr_read_tar(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_tar(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_tm(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_tm(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_read_prev_upper32(ctx, gprn, sprn);
+}
+
+void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
+    spr_write_prev_upper32(ctx, sprn, gprn);
+}
+
+void spr_read_ebb(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_read_generic(ctx, gprn, sprn);
+}
+
+void spr_write_ebb(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_write_generic(ctx, sprn, gprn);
+}
+
+void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_read_prev_upper32(ctx, gprn, sprn);
+}
+
+void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn)
+{
+    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
+    spr_write_prev_upper32(ctx, sprn, gprn);
+}
+#endif
+
+#define GEN_HANDLER(name, opc1, opc2, opc3, inval, type)                      \
+GEN_OPCODE(name, opc1, opc2, opc3, inval, type, PPC_NONE)
+
+#define GEN_HANDLER_E(name, opc1, opc2, opc3, inval, type, type2)             \
+GEN_OPCODE(name, opc1, opc2, opc3, inval, type, type2)
+
+#define GEN_HANDLER2(name, onam, opc1, opc2, opc3, inval, type)               \
+GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, PPC_NONE)
+
+#define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)      \
+GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
+
+#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
+GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
+
+#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) \
+GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
+
+typedef struct opcode_t {
+    unsigned char opc1, opc2, opc3, opc4;
+#if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
+    unsigned char pad[4];
+#endif
+    opc_handler_t handler;
+    const char *oname;
+} opcode_t;
+
+/* Helpers for priv. check */
+#define GEN_PRIV                                                \
+    do {                                                        \
+        gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC); return; \
+    } while (0)
+
+#if defined(CONFIG_USER_ONLY)
+#define CHK_HV GEN_PRIV
+#define CHK_SV GEN_PRIV
+#define CHK_HVRM GEN_PRIV
 #else
+#define CHK_HV                                                          \
+    do {                                                                \
+        if (unlikely(ctx->pr || !ctx->hv)) {                            \
+            GEN_PRIV;                                                   \
+        }                                                               \
+    } while (0)
+#define CHK_SV                   \
+    do {                         \
+        if (unlikely(ctx->pr)) { \
+            GEN_PRIV;            \
+        }                        \
+    } while (0)
+#define CHK_HVRM                                            \
+    do {                                                    \
+        if (unlikely(ctx->pr || !ctx->hv || ctx->dr)) {     \
+            GEN_PRIV;                                       \
+        }                                                   \
+    } while (0)
+#endif
+
+#define CHK_NONE
+
+/*****************************************************************************/
+/* PowerPC instructions table                                                */
+
 #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
 {                                                                             \
     .opc1 = op1,                                                              \
@@ -582,7 +1410,6 @@ typedef struct opcode_t {
     },                                                                        \
     .oname = onam,                                                            \
 }
-#endif
 
 /* Invalid instruction */
 static void gen_invalid(DisasContext *ctx)
@@ -663,54 +1490,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv reg)
     }
 }
 
-/* cmp */
-static void gen_cmp(DisasContext *ctx)
-{
-    if ((ctx->opcode & 0x00200000) && (ctx->insns_flags & PPC_64B)) {
-        gen_op_cmp(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                   1, crfD(ctx->opcode));
-    } else {
-        gen_op_cmp32(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                     1, crfD(ctx->opcode));
-    }
-}
-
-/* cmpi */
-static void gen_cmpi(DisasContext *ctx)
-{
-    if ((ctx->opcode & 0x00200000) && (ctx->insns_flags & PPC_64B)) {
-        gen_op_cmpi(cpu_gpr[rA(ctx->opcode)], SIMM(ctx->opcode),
-                    1, crfD(ctx->opcode));
-    } else {
-        gen_op_cmpi32(cpu_gpr[rA(ctx->opcode)], SIMM(ctx->opcode),
-                      1, crfD(ctx->opcode));
-    }
-}
-
-/* cmpl */
-static void gen_cmpl(DisasContext *ctx)
-{
-    if ((ctx->opcode & 0x00200000) && (ctx->insns_flags & PPC_64B)) {
-        gen_op_cmp(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                   0, crfD(ctx->opcode));
-    } else {
-        gen_op_cmp32(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
-                     0, crfD(ctx->opcode));
-    }
-}
-
-/* cmpli */
-static void gen_cmpli(DisasContext *ctx)
-{
-    if ((ctx->opcode & 0x00200000) && (ctx->insns_flags & PPC_64B)) {
-        gen_op_cmpi(cpu_gpr[rA(ctx->opcode)], UIMM(ctx->opcode),
-                    0, crfD(ctx->opcode));
-    } else {
-        gen_op_cmpi32(cpu_gpr[rA(ctx->opcode)], UIMM(ctx->opcode),
-                      0, crfD(ctx->opcode));
-    }
-}
-
 /* cmprb - range comparison: isupper, isaplha, islower*/
 static void gen_cmprb(DisasContext *ctx)
 {
@@ -934,19 +1713,6 @@ GEN_INT_ARITH_ADD(addex, 0x05, cpu_ov, 1, 1, 0);
 /* addze  addze.  addzeo  addzeo.*/
 GEN_INT_ARITH_ADD_CONST(addze, 0x06, 0, cpu_ca, 1, 1, 0)
 GEN_INT_ARITH_ADD_CONST(addzeo, 0x16, 0, cpu_ca, 1, 1, 1)
-/* addi */
-static void gen_addi(DisasContext *ctx)
-{
-    target_long simm = SIMM(ctx->opcode);
-
-    if (rA(ctx->opcode) == 0) {
-        /* li case */
-        tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm);
-    } else {
-        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)],
-                        cpu_gpr[rA(ctx->opcode)], simm);
-    }
-}
 /* addic  addic.*/
 static inline void gen_op_addic(DisasContext *ctx, bool compute_rc0)
 {
@@ -966,28 +1732,6 @@ static void gen_addic_(DisasContext *ctx)
     gen_op_addic(ctx, 1);
 }
 
-/* addis */
-static void gen_addis(DisasContext *ctx)
-{
-    target_long simm = SIMM(ctx->opcode);
-
-    if (rA(ctx->opcode) == 0) {
-        /* lis case */
-        tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], simm << 16);
-    } else {
-        tcg_gen_addi_tl(cpu_gpr[rD(ctx->opcode)],
-                        cpu_gpr[rA(ctx->opcode)], simm << 16);
-    }
-}
-
-/* addpcis */
-static void gen_addpcis(DisasContext *ctx)
-{
-    target_long d = DX(ctx->opcode);
-
-    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], ctx->base.pc_next + (d << 16));
-}
-
 static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
@@ -1852,18 +2596,13 @@ static void gen_darn(DisasContext *ctx)
     if (l > 2) {
         tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1);
     } else {
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
+        gen_icount_io_start(ctx);
         if (l == 0) {
             gen_helper_darn32(cpu_gpr[rD(ctx->opcode)]);
         } else {
             /* Return 64-bit random for both CRN and RRN */
             gen_helper_darn64(cpu_gpr[rD(ctx->opcode)]);
         }
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_stop_exception(ctx);
-        }
     }
 }
 #endif
@@ -2458,6 +3197,20 @@ static inline void gen_align_no_le(DisasContext *ctx)
                       (ctx->opcode & 0x03FF0000) | POWERPC_EXCP_ALIGN_LE);
 }
 
+static TCGv do_ea_calc(DisasContext *ctx, int ra, TCGv displ)
+{
+    TCGv ea = tcg_temp_new();
+    if (ra) {
+        tcg_gen_add_tl(ea, cpu_gpr[ra], displ);
+    } else {
+        tcg_gen_mov_tl(ea, displ);
+    }
+    if (NARROW_MODE(ctx)) {
+        tcg_gen_ext32u_tl(ea, ea);
+    }
+    return ea;
+}
+
 /***                             Integer load                              ***/
 #define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask)
 #define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP))
@@ -2505,7 +3258,9 @@ static void glue(gen_qemu_, stop)(DisasContext *ctx,                    \
     tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, op);                    \
 }
 
+#if defined(TARGET_PPC64) || !defined(CONFIG_USER_ONLY)
 GEN_QEMU_STORE_TL(st8,  DEF_MEMOP(MO_UB))
+#endif
 GEN_QEMU_STORE_TL(st16, DEF_MEMOP(MO_UW))
 GEN_QEMU_STORE_TL(st32, DEF_MEMOP(MO_UL))
 
@@ -2525,263 +3280,56 @@ GEN_QEMU_STORE_64(st16, DEF_MEMOP(MO_UW))
 GEN_QEMU_STORE_64(st32, DEF_MEMOP(MO_UL))
 GEN_QEMU_STORE_64(st64, DEF_MEMOP(MO_Q))
 
-#if defined(TARGET_PPC64)
-GEN_QEMU_STORE_64(st64r, BSWAP_MEMOP(MO_Q))
-#endif
-
-#define GEN_LD(name, ldop, opc, type)                                         \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGv EA;                                                                  \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDU(name, ldop, opc, type)                                        \
-static void glue(gen_, name##u)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(rA(ctx->opcode) == 0 ||                                      \
-                 rA(ctx->opcode) == rD(ctx->opcode))) {                       \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    if (type == PPC_64B)                                                      \
-        gen_addr_imm_index(ctx, EA, 0x03);                                    \
-    else                                                                      \
-        gen_addr_imm_index(ctx, EA, 0);                                       \
-    gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(rA(ctx->opcode) == 0 ||                                      \
-                 rA(ctx->opcode) == rD(ctx->opcode))) {                       \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
-static void glue(gen_, name##x)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGv EA;                                                                  \
-    chk;                                                                      \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDX(name, ldop, opc2, opc3, type)                                 \
-    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_NONE)
-
-#define GEN_LDX_HVRM(name, ldop, opc2, opc3, type)                            \
-    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
-
-#define GEN_LDS(name, ldop, op, type)                                         \
-GEN_LD(name, ldop, op | 0x20, type);                                          \
-GEN_LDU(name, ldop, op | 0x21, type);                                         \
-GEN_LDUX(name, ldop, 0x17, op | 0x01, type);                                  \
-GEN_LDX(name, ldop, 0x17, op | 0x00, type)
-
-/* lbz lbzu lbzux lbzx */
-GEN_LDS(lbz, ld8u, 0x02, PPC_INTEGER);
-/* lha lhau lhaux lhax */
-GEN_LDS(lha, ld16s, 0x0A, PPC_INTEGER);
-/* lhz lhzu lhzux lhzx */
-GEN_LDS(lhz, ld16u, 0x08, PPC_INTEGER);
-/* lwz lwzu lwzux lwzx */
-GEN_LDS(lwz, ld32u, 0x00, PPC_INTEGER);
-
-#define GEN_LDEPX(name, ldop, opc2, opc3)                                     \
-static void glue(gen_, name##epx)(DisasContext *ctx)                          \
-{                                                                             \
-    TCGv EA;                                                                  \
-    CHK_SV;                                                                   \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    tcg_gen_qemu_ld_tl(cpu_gpr[rD(ctx->opcode)], EA, PPC_TLB_EPID_LOAD, ldop);\
-    tcg_temp_free(EA);                                                        \
-}
-
-GEN_LDEPX(lb, DEF_MEMOP(MO_UB), 0x1F, 0x02)
-GEN_LDEPX(lh, DEF_MEMOP(MO_UW), 0x1F, 0x08)
-GEN_LDEPX(lw, DEF_MEMOP(MO_UL), 0x1F, 0x00)
-#if defined(TARGET_PPC64)
-GEN_LDEPX(ld, DEF_MEMOP(MO_Q), 0x1D, 0x00)
-#endif
-
-#if defined(TARGET_PPC64)
-/* lwaux */
-GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B);
-/* lwax */
-GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B);
-/* ldux */
-GEN_LDUX(ld, ld64_i64, 0x15, 0x01, PPC_64B);
-/* ldx */
-GEN_LDX(ld, ld64_i64, 0x15, 0x00, PPC_64B);
-
-/* CI load/store variants */
-GEN_LDX_HVRM(ldcix, ld64_i64, 0x15, 0x1b, PPC_CILDST)
-GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x15, PPC_CILDST)
-GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
-GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
-
-static void gen_ld(DisasContext *ctx)
-{
-    TCGv EA;
-    if (Rc(ctx->opcode)) {
-        if (unlikely(rA(ctx->opcode) == 0 ||
-                     rA(ctx->opcode) == rD(ctx->opcode))) {
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-            return;
-        }
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_imm_index(ctx, EA, 0x03);
-    if (ctx->opcode & 0x02) {
-        /* lwa (lwau is undefined) */
-        gen_qemu_ld32s(ctx, cpu_gpr[rD(ctx->opcode)], EA);
-    } else {
-        /* ld - ldu */
-        gen_qemu_ld64_i64(ctx, cpu_gpr[rD(ctx->opcode)], EA);
-    }
-    if (Rc(ctx->opcode)) {
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);
-    }
-    tcg_temp_free(EA);
-}
-
-/* lq */
-static void gen_lq(DisasContext *ctx)
-{
-    int ra, rd;
-    TCGv EA, hi, lo;
-
-    /* lq is a legal user mode instruction starting in ISA 2.07 */
-    bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
-    bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
-
-    if (!legal_in_user_mode && ctx->pr) {
-        gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-        return;
-    }
-
-    if (!le_is_supported && ctx->le_mode) {
-        gen_align_no_le(ctx);
-        return;
-    }
-    ra = rA(ctx->opcode);
-    rd = rD(ctx->opcode);
-    if (unlikely((rd & 1) || rd == ra)) {
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-        return;
-    }
-
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_imm_index(ctx, EA, 0x0F);
-
-    /* Note that the low part is always in RD+1, even in LE mode.  */
-    lo = cpu_gpr[rd + 1];
-    hi = cpu_gpr[rd];
-
-    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-        if (HAVE_ATOMIC128) {
-            TCGv_i32 oi = tcg_temp_new_i32();
-            if (ctx->le_mode) {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-                gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
-            } else {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-                gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
-            }
-            tcg_temp_free_i32(oi);
-            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
-        } else {
-            /* Restart with exclusive lock.  */
-            gen_helper_exit_atomic(cpu_env);
-            ctx->base.is_jmp = DISAS_NORETURN;
-        }
-    } else if (ctx->le_mode) {
-        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
-        gen_addr_add(ctx, EA, EA, 8);
-        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
-    } else {
-        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ);
-        gen_addr_add(ctx, EA, EA, 8);
-        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
-    }
-    tcg_temp_free(EA);
-}
-#endif
-
-/***                              Integer store                            ***/
-#define GEN_ST(name, stop, opc, type)                                         \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGv EA;                                                                  \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##stop(ctx, cpu_gpr[rS(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
+#if defined(TARGET_PPC64)
+GEN_QEMU_STORE_64(st64r, BSWAP_MEMOP(MO_Q))
+#endif
 
-#define GEN_STU(name, stop, opc, type)                                        \
-static void glue(gen_, stop##u)(DisasContext *ctx)                            \
+#define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
+static void glue(gen_, name##x)(DisasContext *ctx)                            \
 {                                                                             \
     TCGv EA;                                                                  \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
+    chk;                                                                      \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
-    if (type == PPC_64B)                                                      \
-        gen_addr_imm_index(ctx, EA, 0x03);                                    \
-    else                                                                      \
-        gen_addr_imm_index(ctx, EA, 0);                                       \
-    gen_qemu_##stop(ctx, cpu_gpr[rS(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    gen_addr_reg_index(ctx, EA);                                              \
+    gen_qemu_##ldop(ctx, cpu_gpr[rD(ctx->opcode)], EA);                       \
     tcg_temp_free(EA);                                                        \
 }
 
-#define GEN_STUX(name, stop, opc2, opc3, type)                                \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
+#define GEN_LDX(name, ldop, opc2, opc3, type)                                 \
+    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_NONE)
+
+#define GEN_LDX_HVRM(name, ldop, opc2, opc3, type)                            \
+    GEN_LDX_E(name, ldop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
+
+#define GEN_LDEPX(name, ldop, opc2, opc3)                                     \
+static void glue(gen_, name##epx)(DisasContext *ctx)                          \
 {                                                                             \
     TCGv EA;                                                                  \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
+    CHK_SV;                                                                   \
     gen_set_access_type(ctx, ACCESS_INT);                                     \
     EA = tcg_temp_new();                                                      \
     gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##stop(ctx, cpu_gpr[rS(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    tcg_gen_qemu_ld_tl(cpu_gpr[rD(ctx->opcode)], EA, PPC_TLB_EPID_LOAD, ldop);\
     tcg_temp_free(EA);                                                        \
 }
 
+GEN_LDEPX(lb, DEF_MEMOP(MO_UB), 0x1F, 0x02)
+GEN_LDEPX(lh, DEF_MEMOP(MO_UW), 0x1F, 0x08)
+GEN_LDEPX(lw, DEF_MEMOP(MO_UL), 0x1F, 0x00)
+#if defined(TARGET_PPC64)
+GEN_LDEPX(ld, DEF_MEMOP(MO_Q), 0x1D, 0x00)
+#endif
+
+#if defined(TARGET_PPC64)
+/* CI load/store variants */
+GEN_LDX_HVRM(ldcix, ld64_i64, 0x15, 0x1b, PPC_CILDST)
+GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x15, PPC_CILDST)
+GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
+GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
+#endif
+
+/***                              Integer store                            ***/
 #define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
 static void glue(gen_, name##x)(DisasContext *ctx)                            \
 {                                                                             \
@@ -2799,19 +3347,6 @@ static void glue(gen_, name##x)(DisasContext *ctx)                            \
 #define GEN_STX_HVRM(name, stop, opc2, opc3, type)                            \
     GEN_STX_E(name, stop, opc2, opc3, type, PPC_NONE, CHK_HVRM)
 
-#define GEN_STS(name, stop, op, type)                                         \
-GEN_ST(name, stop, op | 0x20, type);                                          \
-GEN_STU(name, stop, op | 0x21, type);                                         \
-GEN_STUX(name, stop, 0x17, op | 0x01, type);                                  \
-GEN_STX(name, stop, 0x17, op | 0x00, type)
-
-/* stb stbu stbux stbx */
-GEN_STS(stb, st8, 0x06, PPC_INTEGER);
-/* sth sthu sthux sthx */
-GEN_STS(sth, st16, 0x0C, PPC_INTEGER);
-/* stw stwu stwux stwx */
-GEN_STS(stw, st32, 0x04, PPC_INTEGER);
-
 #define GEN_STEPX(name, stop, opc2, opc3)                                     \
 static void glue(gen_, name##epx)(DisasContext *ctx)                          \
 {                                                                             \
@@ -2833,94 +3368,10 @@ GEN_STEPX(std, DEF_MEMOP(MO_Q), 0x1d, 0x04)
 #endif
 
 #if defined(TARGET_PPC64)
-GEN_STUX(std, st64_i64, 0x15, 0x05, PPC_64B);
-GEN_STX(std, st64_i64, 0x15, 0x04, PPC_64B);
 GEN_STX_HVRM(stdcix, st64_i64, 0x15, 0x1f, PPC_CILDST)
 GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
 GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
 GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
-
-static void gen_std(DisasContext *ctx)
-{
-    int rs;
-    TCGv EA;
-
-    rs = rS(ctx->opcode);
-    if ((ctx->opcode & 0x3) == 0x2) { /* stq */
-        bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
-        bool le_is_supported = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
-        TCGv hi, lo;
-
-        if (!(ctx->insns_flags & PPC_64BX)) {
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-        }
-
-        if (!legal_in_user_mode && ctx->pr) {
-            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
-            return;
-        }
-
-        if (!le_is_supported && ctx->le_mode) {
-            gen_align_no_le(ctx);
-            return;
-        }
-
-        if (unlikely(rs & 1)) {
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-            return;
-        }
-        gen_set_access_type(ctx, ACCESS_INT);
-        EA = tcg_temp_new();
-        gen_addr_imm_index(ctx, EA, 0x03);
-
-        /* Note that the low part is always in RS+1, even in LE mode.  */
-        lo = cpu_gpr[rs + 1];
-        hi = cpu_gpr[rs];
-
-        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
-            if (HAVE_ATOMIC128) {
-                TCGv_i32 oi = tcg_temp_new_i32();
-                if (ctx->le_mode) {
-                    tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
-                    gen_helper_stq_le_parallel(cpu_env, EA, lo, hi, oi);
-                } else {
-                    tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
-                    gen_helper_stq_be_parallel(cpu_env, EA, lo, hi, oi);
-                }
-                tcg_temp_free_i32(oi);
-            } else {
-                /* Restart with exclusive lock.  */
-                gen_helper_exit_atomic(cpu_env);
-                ctx->base.is_jmp = DISAS_NORETURN;
-            }
-        } else if (ctx->le_mode) {
-            tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_LEQ);
-            gen_addr_add(ctx, EA, EA, 8);
-            tcg_gen_qemu_st_i64(hi, EA, ctx->mem_idx, MO_LEQ);
-        } else {
-            tcg_gen_qemu_st_i64(hi, EA, ctx->mem_idx, MO_BEQ);
-            gen_addr_add(ctx, EA, EA, 8);
-            tcg_gen_qemu_st_i64(lo, EA, ctx->mem_idx, MO_BEQ);
-        }
-        tcg_temp_free(EA);
-    } else {
-        /* std / stdu */
-        if (Rc(ctx->opcode)) {
-            if (unlikely(rA(ctx->opcode) == 0)) {
-                gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-                return;
-            }
-        }
-        gen_set_access_type(ctx, ACCESS_INT);
-        EA = tcg_temp_new();
-        gen_addr_imm_index(ctx, EA, 0x03);
-        gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
-        if (Rc(ctx->opcode)) {
-            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);
-        }
-        tcg_temp_free(EA);
-    }
-}
 #endif
 /***                Integer load and store with byte reverse               ***/
 
@@ -3113,7 +3564,7 @@ static void gen_eieio(DisasContext *ctx)
          */
         if (!(ctx->insns_flags2 & PPC2_ISA300)) {
             qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @"
-                          TARGET_FMT_lx "\n", ctx->base.pc_next - 4);
+                          TARGET_FMT_lx "\n", ctx->cia);
         } else {
             bar = TCG_MO_ST_LD;
         }
@@ -3158,7 +3609,7 @@ static void gen_isync(DisasContext *ctx)
         gen_check_tlb_flush(ctx, false);
     }
     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 }
 
 #define MEMOP_GET_SIZE(x)  (1 << ((x) & MO_SIZE))
@@ -3486,11 +3937,11 @@ static void gen_lqarx(DisasContext *ctx)
         if (HAVE_ATOMIC128) {
             TCGv_i32 oi = tcg_temp_new_i32();
             if (ctx->le_mode) {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_LE | MO_128 | MO_ALIGN,
                                                     ctx->mem_idx));
                 gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
             } else {
-                tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
+                tcg_gen_movi_i32(oi, make_memop_idx(MO_BE | MO_128 | MO_ALIGN,
                                                     ctx->mem_idx));
                 gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
             }
@@ -3541,7 +3992,7 @@ static void gen_stqcx_(DisasContext *ctx)
 
     if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
         if (HAVE_CMPXCHG128) {
-            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_Q) | MO_ALIGN_16);
+            TCGv_i32 oi = tcg_const_i32(DEF_MEMOP(MO_128) | MO_ALIGN);
             if (ctx->le_mode) {
                 gen_helper_stqcx_le_parallel(cpu_crf[0], cpu_env,
                                              EA, lo, hi, oi);
@@ -3721,28 +4172,13 @@ static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
 
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
-    if (unlikely(ctx->singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    return translator_use_goto_tb(&ctx->base, dest);
 }
 
 static void gen_lookup_and_goto_ptr(DisasContext *ctx)
 {
-    int sse = ctx->singlestep_enabled;
-    if (unlikely(sse)) {
-        if (sse & GDBSTUB_SINGLE_STEP) {
-            gen_debug_exception(ctx);
-        } else if (sse & (CPU_SINGLE_STEP | CPU_BRANCH_STEP)) {
-            uint32_t excp = gen_prep_dbgex(ctx);
-            gen_exception(ctx, excp);
-        }
-        tcg_gen_exit_tb(NULL, 0);
+    if (unlikely(ctx->singlestep_enabled)) {
+        gen_debug_exception(ctx);
     } else {
         tcg_gen_lookup_and_goto_ptr();
     }
@@ -3777,20 +4213,20 @@ static void gen_b(DisasContext *ctx)
 {
     target_ulong li, target;
 
-    ctx->exception = POWERPC_EXCP_BRANCH;
     /* sign extend LI */
     li = LI(ctx->opcode);
     li = (li ^ 0x02000000) - 0x02000000;
     if (likely(AA(ctx->opcode) == 0)) {
-        target = ctx->base.pc_next + li - 4;
+        target = ctx->cia + li;
     } else {
         target = li;
     }
     if (LK(ctx->opcode)) {
         gen_setlr(ctx, ctx->base.pc_next);
     }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_update_cfar(ctx, ctx->cia);
     gen_goto_tb(ctx, 0, target);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 #define BCOND_IM  0
@@ -3803,7 +4239,6 @@ static void gen_bcond(DisasContext *ctx, int type)
     uint32_t bo = BO(ctx->opcode);
     TCGLabel *l1;
     TCGv target;
-    ctx->exception = POWERPC_EXCP_BRANCH;
 
     if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
         target = tcg_temp_local_new();
@@ -3888,11 +4323,11 @@ static void gen_bcond(DisasContext *ctx, int type)
         }
         tcg_temp_free_i32(temp);
     }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_update_cfar(ctx, ctx->cia);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
-            gen_goto_tb(ctx, 0, ctx->base.pc_next + li - 4);
+            gen_goto_tb(ctx, 0, ctx->cia + li);
         } else {
             gen_goto_tb(ctx, 0, li);
         }
@@ -3910,6 +4345,7 @@ static void gen_bcond(DisasContext *ctx, int type)
         gen_set_label(l1);
         gen_goto_tb(ctx, 1, ctx->base.pc_next);
     }
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_bc(DisasContext *ctx)
@@ -4005,12 +4441,10 @@ static void gen_rfi(DisasContext *ctx)
     }
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfi(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 
@@ -4022,12 +4456,10 @@ static void gen_rfid(DisasContext *ctx)
 #else
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfid(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 
@@ -4039,12 +4471,10 @@ static void gen_rfscv(DisasContext *ctx)
 #else
     /* Restore CPU state */
     CHK_SV;
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_update_cfar(ctx, ctx->base.pc_next - 4);
+    gen_icount_io_start(ctx);
+    gen_update_cfar(ctx, ctx->cia);
     gen_helper_rfscv(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 #endif
@@ -4057,7 +4487,7 @@ static void gen_hrfid(DisasContext *ctx)
     /* Restore CPU state */
     CHK_HV;
     gen_helper_hrfid(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif
 }
 #endif
@@ -4081,15 +4511,13 @@ static void gen_sc(DisasContext *ctx)
 #if !defined(CONFIG_USER_ONLY)
 static void gen_scv(DisasContext *ctx)
 {
-    uint32_t lev;
+    uint32_t lev = (ctx->opcode >> 5) & 0x7F;
 
-    if (unlikely(!ctx->scv_enabled)) {
-        gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_SCV);
-        return;
-    }
+    /* Set the PC back to the faulting instruction. */
+    gen_update_nip(ctx, ctx->cia);
+    gen_helper_scv(cpu_env, tcg_constant_i32(lev));
 
-    lev = (ctx->opcode >> 5) & 0x7F;
-    gen_exception_err(ctx, POWERPC_SYSCALL_VECTORED, lev);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 #endif
 #endif
@@ -4175,43 +4603,6 @@ static void gen_tdi(DisasContext *ctx)
 
 /***                          Processor control                            ***/
 
-static void gen_read_xer(DisasContext *ctx, TCGv dst)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-    tcg_gen_mov_tl(dst, cpu_xer);
-    tcg_gen_shli_tl(t0, cpu_so, XER_SO);
-    tcg_gen_shli_tl(t1, cpu_ov, XER_OV);
-    tcg_gen_shli_tl(t2, cpu_ca, XER_CA);
-    tcg_gen_or_tl(t0, t0, t1);
-    tcg_gen_or_tl(dst, dst, t2);
-    tcg_gen_or_tl(dst, dst, t0);
-    if (is_isa300(ctx)) {
-        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
-        tcg_gen_or_tl(dst, dst, t0);
-        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
-        tcg_gen_or_tl(dst, dst, t0);
-    }
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void gen_write_xer(TCGv src)
-{
-    /* Write all flags, while reading back check for isa300 */
-    tcg_gen_andi_tl(cpu_xer, src,
-                    ~((1u << XER_SO) |
-                      (1u << XER_OV) | (1u << XER_OV32) |
-                      (1u << XER_CA) | (1u << XER_CA32)));
-    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
-    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
-    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
-    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
-    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
-}
-
 /* mcrxr */
 static void gen_mcrxr(DisasContext *ctx)
 {
@@ -4299,15 +4690,6 @@ static void gen_mfmsr(DisasContext *ctx)
     tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_msr);
 }
 
-static void spr_noaccess(DisasContext *ctx, int gprn, int sprn)
-{
-#if 0
-    sprn = ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
-    printf("ERROR: try to access SPR %d !\n", sprn);
-#endif
-}
-#define SPR_NOACCESS (&spr_noaccess)
-
 /* mfspr */
 static inline void gen_op_mfspr(DisasContext *ctx)
 {
@@ -4338,7 +4720,7 @@ static inline void gen_op_mfspr(DisasContext *ctx)
             if (sprn != SPR_PVR) {
                 qemu_log_mask(LOG_GUEST_ERROR, "Trying to read privileged spr "
                               "%d (0x%03x) at " TARGET_FMT_lx "\n", sprn, sprn,
-                              ctx->base.pc_next - 4);
+                              ctx->cia);
             }
             gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
@@ -4352,7 +4734,7 @@ static inline void gen_op_mfspr(DisasContext *ctx)
         /* Not defined */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Trying to read invalid spr %d (0x%03x) at "
-                      TARGET_FMT_lx "\n", sprn, sprn, ctx->base.pc_next - 4);
+                      TARGET_FMT_lx "\n", sprn, sprn, ctx->cia);
 
         /*
          * The behaviour depends on MSR:PR and SPR# bit 0x10, it can
@@ -4413,37 +4795,48 @@ static void gen_mtcrf(DisasContext *ctx)
 #if defined(TARGET_PPC64)
 static void gen_mtmsrd(DisasContext *ctx)
 {
+    if (unlikely(!is_book3s_arch2x(ctx))) {
+        gen_invalid(ctx);
+        return;
+    }
+
     CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    if (ctx->opcode & 0x00010000) {
-        /* L=1 form only updates EE and RI */
-        TCGv t0 = tcg_temp_new();
-        TCGv t1 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
-                        (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(t1, cpu_msr,
-                        ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-        tcg_gen_or_tl(t1, t1, t0);
+    TCGv t0, t1;
+    target_ulong mask;
 
-        gen_helper_store_msr(cpu_env, t1);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_icount_io_start(ctx);
 
+    if (ctx->opcode & 0x00010000) {
+        /* L=1 form only updates EE and RI */
+        mask = (1ULL << MSR_RI) | (1ULL << MSR_EE);
     } else {
+        /* mtmsrd does not alter HV, S, ME, or LE */
+        mask = ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S) |
+                 (1ULL << MSR_HV));
         /*
          * XXX: we need to update nip before the store if we enter
          *      power saving mode, we will exit the loop directly from
          *      ppc_store_msr
          */
         gen_update_nip(ctx, ctx->base.pc_next);
-        gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
     }
+
+    tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], mask);
+    tcg_gen_andi_tl(t1, cpu_msr, ~mask);
+    tcg_gen_or_tl(t0, t0, t1);
+
+    gen_helper_store_msr(cpu_env, t0);
+
     /* Must stop the translation as machine state (may have) changed */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
 #endif /* !defined(CONFIG_USER_ONLY) */
 }
 #endif /* defined(TARGET_PPC64) */
@@ -4453,25 +4846,19 @@ static void gen_mtmsr(DisasContext *ctx)
     CHK_SV;
 
 #if !defined(CONFIG_USER_ONLY)
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    if (ctx->opcode & 0x00010000) {
-        /* L=1 form only updates EE and RI */
-        TCGv t0 = tcg_temp_new();
-        TCGv t1 = tcg_temp_new();
-        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
-                        (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(t1, cpu_msr,
-                        ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-        tcg_gen_or_tl(t1, t1, t0);
+    TCGv t0, t1;
+    target_ulong mask = 0xFFFFFFFF;
 
-        gen_helper_store_msr(cpu_env, t1);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
 
+    gen_icount_io_start(ctx);
+    if (ctx->opcode & 0x00010000) {
+        /* L=1 form only updates EE and RI */
+        mask &= (1ULL << MSR_RI) | (1ULL << MSR_EE);
     } else {
-        TCGv msr = tcg_temp_new();
+        /* mtmsr does not alter S, ME, or LE */
+        mask &= ~((1ULL << MSR_LE) | (1ULL << MSR_ME) | (1ULL << MSR_S));
 
         /*
          * XXX: we need to update nip before the store if we enter
@@ -4479,16 +4866,19 @@ static void gen_mtmsr(DisasContext *ctx)
          *      ppc_store_msr
          */
         gen_update_nip(ctx, ctx->base.pc_next);
-#if defined(TARGET_PPC64)
-        tcg_gen_deposit_tl(msr, cpu_msr, cpu_gpr[rS(ctx->opcode)], 0, 32);
-#else
-        tcg_gen_mov_tl(msr, cpu_gpr[rS(ctx->opcode)]);
-#endif
-        gen_helper_store_msr(cpu_env, msr);
-        tcg_temp_free(msr);
     }
+
+    tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], mask);
+    tcg_gen_andi_tl(t1, cpu_msr, ~mask);
+    tcg_gen_or_tl(t0, t0, t1);
+
+    gen_helper_store_msr(cpu_env, t0);
+
     /* Must stop the translation as machine state (may have) changed */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
 #endif
 }
 
@@ -4516,7 +4906,7 @@ static void gen_mtspr(DisasContext *ctx)
             /* Privilege exception */
             qemu_log_mask(LOG_GUEST_ERROR, "Trying to write privileged spr "
                           "%d (0x%03x) at " TARGET_FMT_lx "\n", sprn, sprn,
-                          ctx->base.pc_next - 4);
+                          ctx->cia);
             gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG);
         }
     } else {
@@ -4530,7 +4920,7 @@ static void gen_mtspr(DisasContext *ctx)
         /* Not defined */
         qemu_log_mask(LOG_GUEST_ERROR,
                       "Trying to write invalid spr %d (0x%03x) at "
-                      TARGET_FMT_lx "\n", sprn, sprn, ctx->base.pc_next - 4);
+                      TARGET_FMT_lx "\n", sprn, sprn, ctx->cia);
 
 
         /*
@@ -4554,19 +4944,15 @@ static void gen_mtspr(DisasContext *ctx)
 static void gen_setb(DisasContext *ctx)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv_i32 t8 = tcg_temp_new_i32();
-    TCGv_i32 tm1 = tcg_temp_new_i32();
+    TCGv_i32 t8 = tcg_constant_i32(8);
+    TCGv_i32 tm1 = tcg_constant_i32(-1);
     int crf = crfS(ctx->opcode);
 
     tcg_gen_setcondi_i32(TCG_COND_GEU, t0, cpu_crf[crf], 4);
-    tcg_gen_movi_i32(t8, 8);
-    tcg_gen_movi_i32(tm1, -1);
     tcg_gen_movcond_i32(TCG_COND_GEU, t0, cpu_crf[crf], t8, tm1, t0);
     tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
 
     tcg_temp_free_i32(t0);
-    tcg_temp_free_i32(t8);
-    tcg_temp_free_i32(tm1);
 }
 #endif
 
@@ -4997,7 +5383,15 @@ static void gen_tlbiel(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
-    CHK_SV;
+    bool psr = (ctx->opcode >> 17) & 0x1;
+
+    if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
+        /*
+         * tlbiel is privileged except when PSR=0 and HR=1, making it
+         * hypervisor privileged.
+         */
+        GEN_PRIV;
+    }
 
     gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
@@ -5009,12 +5403,20 @@ static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
+    bool psr = (ctx->opcode >> 17) & 0x1;
     TCGv_i32 t1;
 
-    if (ctx->gtse) {
-        CHK_SV; /* If gtse is set then tlbie is supervisor privileged */
-    } else {
-        CHK_HV; /* Else hypervisor privileged */
+    if (ctx->pr) {
+        /* tlbie is privileged... */
+        GEN_PRIV;
+    } else if (!ctx->hv) {
+        if (!ctx->gtse || (!psr && ctx->hr)) {
+            /*
+             * ... except when GTSE=0 or when PSR=0 and HR=1, making it
+             * hypervisor privileged.
+             */
+            GEN_PRIV;
+        }
     }
 
     if (NARROW_MODE(ctx)) {
@@ -5943,7 +6345,7 @@ static void gen_rfsvc(DisasContext *ctx)
     CHK_SV;
 
     gen_helper_rfsvc(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6323,7 +6725,7 @@ static void gen_rfci_40x(DisasContext *ctx)
     CHK_SV;
     /* Restore CPU state */
     gen_helper_40x_rfci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6335,7 +6737,7 @@ static void gen_rfci(DisasContext *ctx)
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6350,7 +6752,7 @@ static void gen_rfdi(DisasContext *ctx)
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfdi(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6363,7 +6765,7 @@ static void gen_rfmci(DisasContext *ctx)
     CHK_SV;
     /* Restore CPU state */
     gen_helper_rfmci(cpu_env);
-    gen_sync_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6625,7 +7027,7 @@ static void gen_wrtee(DisasContext *ctx)
      * Stop translation to have a chance to raise an exception if we
      * just set msr_ee to 1
      */
-    gen_stop_exception(ctx);
+    ctx->base.is_jmp = DISAS_EXIT_UPDATE;
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -6639,7 +7041,7 @@ static void gen_wrteei(DisasContext *ctx)
     if (ctx->opcode & 0x00008000) {
         tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE));
         /* Stop translation to have a chance to raise an exception */
-        gen_stop_exception(ctx);
+        ctx->base.is_jmp = DISAS_EXIT_UPDATE;
     } else {
         tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
     }
@@ -6877,28 +7279,136 @@ static inline void gen_##name(DisasContext *ctx)               \
 
 #endif
 
-GEN_TM_PRIV_NOOP(treclaim);
-GEN_TM_PRIV_NOOP(trechkpt);
+GEN_TM_PRIV_NOOP(treclaim);
+GEN_TM_PRIV_NOOP(trechkpt);
+
+static inline void get_fpr(TCGv_i64 dst, int regno)
+{
+    tcg_gen_ld_i64(dst, cpu_env, fpr_offset(regno));
+}
+
+static inline void set_fpr(int regno, TCGv_i64 src)
+{
+    tcg_gen_st_i64(src, cpu_env, fpr_offset(regno));
+}
+
+static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
+{
+    tcg_gen_ld_i64(dst, cpu_env, avr64_offset(regno, high));
+}
+
+static inline void set_avr64(int regno, TCGv_i64 src, bool high)
+{
+    tcg_gen_st_i64(src, cpu_env, avr64_offset(regno, high));
+}
+
+/*
+ * Helpers for decodetree used by !function for decoding arguments.
+ */
+static int times_2(DisasContext *ctx, int x)
+{
+    return x * 2;
+}
+
+static int times_4(DisasContext *ctx, int x)
+{
+    return x * 4;
+}
+
+static int times_16(DisasContext *ctx, int x)
+{
+    return x * 16;
+}
+
+/*
+ * Helpers for trans_* functions to check for specific insns flags.
+ * Use token pasting to ensure that we use the proper flag with the
+ * proper variable.
+ */
+#define REQUIRE_INSNS_FLAGS(CTX, NAME) \
+    do {                                                \
+        if (((CTX)->insns_flags & PPC_##NAME) == 0) {   \
+            return false;                               \
+        }                                               \
+    } while (0)
+
+#define REQUIRE_INSNS_FLAGS2(CTX, NAME) \
+    do {                                                \
+        if (((CTX)->insns_flags2 & PPC2_##NAME) == 0) { \
+            return false;                               \
+        }                                               \
+    } while (0)
+
+/* Then special-case the check for 64-bit so that we elide code for ppc32. */
+#if TARGET_LONG_BITS == 32
+# define REQUIRE_64BIT(CTX)  return false
+#else
+# define REQUIRE_64BIT(CTX)  REQUIRE_INSNS_FLAGS(CTX, 64B)
+#endif
+
+#define REQUIRE_VECTOR(CTX)                             \
+    do {                                                \
+        if (unlikely(!(CTX)->altivec_enabled)) {        \
+            gen_exception((CTX), POWERPC_EXCP_VPU);     \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+#define REQUIRE_VSX(CTX)                                \
+    do {                                                \
+        if (unlikely(!(CTX)->vsx_enabled)) {            \
+            gen_exception((CTX), POWERPC_EXCP_VSXU);    \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+#define REQUIRE_FPU(ctx)                                \
+    do {                                                \
+        if (unlikely(!(ctx)->fpu_enabled)) {            \
+            gen_exception((ctx), POWERPC_EXCP_FPU);     \
+            return true;                                \
+        }                                               \
+    } while (0)
+
+/*
+ * Helpers for implementing sets of trans_* functions.
+ * Defer the implementation of NAME to FUNC, with optional extra arguments.
+ */
+#define TRANS(NAME, FUNC, ...) \
+    static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
+    { return FUNC(ctx, a, __VA_ARGS__); }
+
+#define TRANS64(NAME, FUNC, ...) \
+    static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
+    { REQUIRE_64BIT(ctx); return FUNC(ctx, a, __VA_ARGS__); }
+
+/* TODO: More TRANS* helpers for extra insn_flags checks. */
 
-static inline void get_fpr(TCGv_i64 dst, int regno)
-{
-    tcg_gen_ld_i64(dst, cpu_env, fpr_offset(regno));
-}
 
-static inline void set_fpr(int regno, TCGv_i64 src)
-{
-    tcg_gen_st_i64(src, cpu_env, fpr_offset(regno));
-}
+#include "decode-insn32.c.inc"
+#include "decode-insn64.c.inc"
+#include "power8-pmu-regs.c.inc"
 
-static inline void get_avr64(TCGv_i64 dst, int regno, bool high)
-{
-    tcg_gen_ld_i64(dst, cpu_env, avr64_offset(regno, high));
+/*
+ * Incorporate CIA into the constant when R=1.
+ * Validate that when R=1, RA=0.
+ */
+static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a)
+{
+    d->rt = a->rt;
+    d->ra = a->ra;
+    d->si = a->si;
+    if (a->r) {
+        if (unlikely(a->ra != 0)) {
+            gen_invalid(ctx);
+            return false;
+        }
+        d->si += ctx->cia;
+    }
+    return true;
 }
 
-static inline void set_avr64(int regno, TCGv_i64 src, bool high)
-{
-    tcg_gen_st_i64(src, cpu_env, avr64_offset(regno, high));
-}
+#include "translate/fixedpoint-impl.c.inc"
 
 #include "translate/fp-impl.c.inc"
 
@@ -6936,20 +7446,7 @@ static void gen_dform39(DisasContext *ctx)
 /* handles stfdp, lxv, stxsd, stxssp lxvx */
 static void gen_dform3D(DisasContext *ctx)
 {
-    if ((ctx->opcode & 3) == 1) { /* DQ-FORM */
-        switch (ctx->opcode & 0x7) {
-        case 1: /* lxv */
-            if (ctx->insns_flags2 & PPC2_ISA300) {
-                return gen_lxv(ctx);
-            }
-            break;
-        case 5: /* stxv */
-            if (ctx->insns_flags2 & PPC2_ISA300) {
-                return gen_stxv(ctx);
-            }
-            break;
-        }
-    } else { /* DS-FORM */
+    if ((ctx->opcode & 3) != 1) { /* DS-FORM */
         switch (ctx->opcode & 0x3) {
         case 0: /* stfdp */
             if (ctx->insns_flags2 & PPC2_ISA205) {
@@ -6989,18 +7486,16 @@ static void gen_brw(DisasContext *ctx)
 /* brh */
 static void gen_brh(DisasContext *ctx)
 {
-    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 mask = tcg_constant_i64(0x00ff00ff00ff00ffull);
     TCGv_i64 t1 = tcg_temp_new_i64();
     TCGv_i64 t2 = tcg_temp_new_i64();
 
-    tcg_gen_movi_i64(t0, 0x00ff00ff00ff00ffull);
     tcg_gen_shri_i64(t1, cpu_gpr[rS(ctx->opcode)], 8);
-    tcg_gen_and_i64(t2, t1, t0);
-    tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], t0);
+    tcg_gen_and_i64(t2, t1, mask);
+    tcg_gen_and_i64(t1, cpu_gpr[rS(ctx->opcode)], mask);
     tcg_gen_shli_i64(t1, t1, 8);
     tcg_gen_or_i64(cpu_gpr[rA(ctx->opcode)], t1, t2);
 
-    tcg_temp_free_i64(t0);
     tcg_temp_free_i64(t1);
     tcg_temp_free_i64(t2);
 }
@@ -7013,21 +7508,14 @@ GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310),
 GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310),
 #endif
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
-GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
-GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
-GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400001, PPC_INTEGER),
-GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
-GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(addic_, "addic.", 0x0D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER(addis, 0x0F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
-GEN_HANDLER_E(addpcis, 0x13, 0x2, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
@@ -7081,14 +7569,9 @@ GEN_HANDLER2_E(extswsli0, "extswsli", 0x1F, 0x1A, 0x1B, 0x00000000,
 GEN_HANDLER2_E(extswsli1, "extswsli", 0x1F, 0x1B, 0x1B, 0x00000000,
                PPC_NONE, PPC2_ISA300),
 #endif
-#if defined(TARGET_PPC64)
-GEN_HANDLER(ld, 0x3A, 0xFF, 0xFF, 0x00000000, PPC_64B),
-GEN_HANDLER(lq, 0x38, 0xFF, 0xFF, 0x00000000, PPC_64BX),
-GEN_HANDLER(std, 0x3E, 0xFF, 0xFF, 0x00000000, PPC_64B),
-#endif
 /* handles lfdp, lxsd, lxssp */
 GEN_HANDLER_E(dform39, 0x39, 0xFF, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA205),
-/* handles stfdp, lxv, stxsd, stxssp, stxv */
+/* handles stfdp, stxsd, stxssp */
 GEN_HANDLER_E(dform3D, 0x3D, 0xFF, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER(lmw, 0x2E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(stmw, 0x2F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
@@ -7448,34 +7931,11 @@ GEN_PPC64_R2(rldcr, 0x1E, 0x09),
 GEN_PPC64_R4(rldimi, 0x1E, 0x06),
 #endif
 
-#undef GEN_LD
-#undef GEN_LDU
-#undef GEN_LDUX
 #undef GEN_LDX_E
-#undef GEN_LDS
-#define GEN_LD(name, ldop, opc, type)                                         \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDU(name, ldop, opc, type)                                        \
-GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDUX(name, ldop, opc2, opc3, type)                                \
-GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
 #define GEN_LDX_E(name, ldop, opc2, opc3, type, type2, chk)                   \
 GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000001, type, type2),
-#define GEN_LDS(name, ldop, op, type)                                         \
-GEN_LD(name, ldop, op | 0x20, type)                                           \
-GEN_LDU(name, ldop, op | 0x21, type)                                          \
-GEN_LDUX(name, ldop, 0x17, op | 0x01, type)                                   \
-GEN_LDX(name, ldop, 0x17, op | 0x00, type)
-
-GEN_LDS(lbz, ld8u, 0x02, PPC_INTEGER)
-GEN_LDS(lha, ld16s, 0x0A, PPC_INTEGER)
-GEN_LDS(lhz, ld16u, 0x08, PPC_INTEGER)
-GEN_LDS(lwz, ld32u, 0x00, PPC_INTEGER)
+
 #if defined(TARGET_PPC64)
-GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B)
-GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B)
-GEN_LDUX(ld, ld64_i64, 0x15, 0x01, PPC_64B)
-GEN_LDX(ld, ld64_i64, 0x15, 0x00, PPC_64B)
 GEN_LDX_E(ldbr, ld64ur_i64, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE)
 
 /* HV/P7 and later only */
@@ -7500,31 +7960,11 @@ GEN_LDEPX(lw, DEF_MEMOP(MO_UL), 0x1F, 0x00)
 GEN_LDEPX(ld, DEF_MEMOP(MO_Q), 0x1D, 0x00)
 #endif
 
-#undef GEN_ST
-#undef GEN_STU
-#undef GEN_STUX
 #undef GEN_STX_E
-#undef GEN_STS
-#define GEN_ST(name, stop, opc, type)                                         \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STU(name, stop, opc, type)                                        \
-GEN_HANDLER(stop##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STUX(name, stop, opc2, opc3, type)                                \
-GEN_HANDLER(name##ux, 0x1F, opc2, opc3, 0x00000001, type),
 #define GEN_STX_E(name, stop, opc2, opc3, type, type2, chk)                   \
 GEN_HANDLER_E(name##x, 0x1F, opc2, opc3, 0x00000000, type, type2),
-#define GEN_STS(name, stop, op, type)                                         \
-GEN_ST(name, stop, op | 0x20, type)                                           \
-GEN_STU(name, stop, op | 0x21, type)                                          \
-GEN_STUX(name, stop, 0x17, op | 0x01, type)                                   \
-GEN_STX(name, stop, 0x17, op | 0x00, type)
-
-GEN_STS(stb, st8, 0x06, PPC_INTEGER)
-GEN_STS(sth, st16, 0x0C, PPC_INTEGER)
-GEN_STS(stw, st32, 0x04, PPC_INTEGER)
+
 #if defined(TARGET_PPC64)
-GEN_STUX(std, st64_i64, 0x15, 0x05, PPC_64B)
-GEN_STX(std, st64_i64, 0x15, 0x04, PPC_64B)
 GEN_STX_E(stdbr, st64r_i64, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE)
 GEN_STX_HVRM(stdcix, st64_i64, 0x15, 0x1f, PPC_CILDST)
 GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
@@ -7633,336 +8073,400 @@ GEN_HANDLER2_E(trechkpt, "trechkpt", 0x1F, 0x0E, 0x1F, 0x03FFF800, \
 
 #include "translate/vsx-ops.c.inc"
 
-#include "translate/dfp-ops.c.inc"
-
 #include "translate/spe-ops.c.inc"
 };
 
-#include "helper_regs.h"
-#include "translate_init.c.inc"
-
 /*****************************************************************************/
-/* Misc PowerPC helpers */
-void ppc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+/* Opcode types */
+enum {
+    PPC_DIRECT   = 0, /* Opcode routine        */
+    PPC_INDIRECT = 1, /* Indirect opcode table */
+};
+
+#define PPC_OPCODE_MASK 0x3
+
+static inline int is_indirect_opcode(void *handler)
 {
-#define RGPL  4
-#define RFPL  4
+    return ((uintptr_t)handler & PPC_OPCODE_MASK) == PPC_INDIRECT;
+}
 
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
+static inline opc_handler_t **ind_table(void *handler)
+{
+    return (opc_handler_t **)((uintptr_t)handler & ~PPC_OPCODE_MASK);
+}
+
+/* Instruction table creation */
+/* Opcodes tables creation */
+static void fill_new_table(opc_handler_t **table, int len)
+{
     int i;
 
-    qemu_fprintf(f, "NIP " TARGET_FMT_lx "   LR " TARGET_FMT_lx " CTR "
-                 TARGET_FMT_lx " XER " TARGET_FMT_lx " CPU#%d\n",
-                 env->nip, env->lr, env->ctr, cpu_read_xer(env),
-                 cs->cpu_index);
-    qemu_fprintf(f, "MSR " TARGET_FMT_lx " HID0 " TARGET_FMT_lx "  HF "
-                 TARGET_FMT_lx " iidx %d didx %d\n",
-                 env->msr, env->spr[SPR_HID0],
-                 env->hflags, env->immu_idx, env->dmmu_idx);
-#if !defined(NO_TIMER_DUMP)
-    qemu_fprintf(f, "TB %08" PRIu32 " %08" PRIu64
-#if !defined(CONFIG_USER_ONLY)
-                 " DECR " TARGET_FMT_lu
-#endif
-                 "\n",
-                 cpu_ppc_load_tbu(env), cpu_ppc_load_tbl(env)
-#if !defined(CONFIG_USER_ONLY)
-                 , cpu_ppc_load_decr(env)
-#endif
-        );
-#endif
-    for (i = 0; i < 32; i++) {
-        if ((i & (RGPL - 1)) == 0) {
-            qemu_fprintf(f, "GPR%02d", i);
+    for (i = 0; i < len; i++) {
+        table[i] = &invalid_handler;
+    }
+}
+
+static int create_new_table(opc_handler_t **table, unsigned char idx)
+{
+    opc_handler_t **tmp;
+
+    tmp = g_new(opc_handler_t *, PPC_CPU_INDIRECT_OPCODES_LEN);
+    fill_new_table(tmp, PPC_CPU_INDIRECT_OPCODES_LEN);
+    table[idx] = (opc_handler_t *)((uintptr_t)tmp | PPC_INDIRECT);
+
+    return 0;
+}
+
+static int insert_in_table(opc_handler_t **table, unsigned char idx,
+                            opc_handler_t *handler)
+{
+    if (table[idx] != &invalid_handler) {
+        return -1;
+    }
+    table[idx] = handler;
+
+    return 0;
+}
+
+static int register_direct_insn(opc_handler_t **ppc_opcodes,
+                                unsigned char idx, opc_handler_t *handler)
+{
+    if (insert_in_table(ppc_opcodes, idx, handler) < 0) {
+        printf("*** ERROR: opcode %02x already assigned in main "
+               "opcode table\n", idx);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int register_ind_in_table(opc_handler_t **table,
+                                 unsigned char idx1, unsigned char idx2,
+                                 opc_handler_t *handler)
+{
+    if (table[idx1] == &invalid_handler) {
+        if (create_new_table(table, idx1) < 0) {
+            printf("*** ERROR: unable to create indirect table "
+                   "idx=%02x\n", idx1);
+            return -1;
         }
-        qemu_fprintf(f, " %016" PRIx64, ppc_dump_gpr(env, i));
-        if ((i & (RGPL - 1)) == (RGPL - 1)) {
-            qemu_fprintf(f, "\n");
+    } else {
+        if (!is_indirect_opcode(table[idx1])) {
+            printf("*** ERROR: idx %02x already assigned to a direct "
+                   "opcode\n", idx1);
+            return -1;
         }
     }
-    qemu_fprintf(f, "CR ");
-    for (i = 0; i < 8; i++)
-        qemu_fprintf(f, "%01x", env->crf[i]);
-    qemu_fprintf(f, "  [");
-    for (i = 0; i < 8; i++) {
-        char a = '-';
-        if (env->crf[i] & 0x08) {
-            a = 'L';
-        } else if (env->crf[i] & 0x04) {
-            a = 'G';
-        } else if (env->crf[i] & 0x02) {
-            a = 'E';
-        }
-        qemu_fprintf(f, " %c%c", a, env->crf[i] & 0x01 ? 'O' : ' ');
+    if (handler != NULL &&
+        insert_in_table(ind_table(table[idx1]), idx2, handler) < 0) {
+        printf("*** ERROR: opcode %02x already assigned in "
+               "opcode table %02x\n", idx2, idx1);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int register_ind_insn(opc_handler_t **ppc_opcodes,
+                             unsigned char idx1, unsigned char idx2,
+                             opc_handler_t *handler)
+{
+    return register_ind_in_table(ppc_opcodes, idx1, idx2, handler);
+}
+
+static int register_dblind_insn(opc_handler_t **ppc_opcodes,
+                                unsigned char idx1, unsigned char idx2,
+                                unsigned char idx3, opc_handler_t *handler)
+{
+    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
+        printf("*** ERROR: unable to join indirect table idx "
+               "[%02x-%02x]\n", idx1, idx2);
+        return -1;
+    }
+    if (register_ind_in_table(ind_table(ppc_opcodes[idx1]), idx2, idx3,
+                              handler) < 0) {
+        printf("*** ERROR: unable to insert opcode "
+               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
+        return -1;
     }
-    qemu_fprintf(f, " ]             RES " TARGET_FMT_lx "\n",
-                 env->reserve_addr);
 
-    if (flags & CPU_DUMP_FPU) {
-        for (i = 0; i < 32; i++) {
-            if ((i & (RFPL - 1)) == 0) {
-                qemu_fprintf(f, "FPR%02d", i);
+    return 0;
+}
+
+static int register_trplind_insn(opc_handler_t **ppc_opcodes,
+                                 unsigned char idx1, unsigned char idx2,
+                                 unsigned char idx3, unsigned char idx4,
+                                 opc_handler_t *handler)
+{
+    opc_handler_t **table;
+
+    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
+        printf("*** ERROR: unable to join indirect table idx "
+               "[%02x-%02x]\n", idx1, idx2);
+        return -1;
+    }
+    table = ind_table(ppc_opcodes[idx1]);
+    if (register_ind_in_table(table, idx2, idx3, NULL) < 0) {
+        printf("*** ERROR: unable to join 2nd-level indirect table idx "
+               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
+        return -1;
+    }
+    table = ind_table(table[idx2]);
+    if (register_ind_in_table(table, idx3, idx4, handler) < 0) {
+        printf("*** ERROR: unable to insert opcode "
+               "[%02x-%02x-%02x-%02x]\n", idx1, idx2, idx3, idx4);
+        return -1;
+    }
+    return 0;
+}
+static int register_insn(opc_handler_t **ppc_opcodes, opcode_t *insn)
+{
+    if (insn->opc2 != 0xFF) {
+        if (insn->opc3 != 0xFF) {
+            if (insn->opc4 != 0xFF) {
+                if (register_trplind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                          insn->opc3, insn->opc4,
+                                          &insn->handler) < 0) {
+                    return -1;
+                }
+            } else {
+                if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                         insn->opc3, &insn->handler) < 0) {
+                    return -1;
+                }
             }
-            qemu_fprintf(f, " %016" PRIx64, *cpu_fpr_ptr(env, i));
-            if ((i & (RFPL - 1)) == (RFPL - 1)) {
-                qemu_fprintf(f, "\n");
+        } else {
+            if (register_ind_insn(ppc_opcodes, insn->opc1,
+                                  insn->opc2, &insn->handler) < 0) {
+                return -1;
             }
         }
-        qemu_fprintf(f, "FPSCR " TARGET_FMT_lx "\n", env->fpscr);
+    } else {
+        if (register_direct_insn(ppc_opcodes, insn->opc1, &insn->handler) < 0) {
+            return -1;
+        }
     }
 
-#if !defined(CONFIG_USER_ONLY)
-    qemu_fprintf(f, " SRR0 " TARGET_FMT_lx "  SRR1 " TARGET_FMT_lx
-                 "    PVR " TARGET_FMT_lx " VRSAVE " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SRR0], env->spr[SPR_SRR1],
-                 env->spr[SPR_PVR], env->spr[SPR_VRSAVE]);
-
-    qemu_fprintf(f, "SPRG0 " TARGET_FMT_lx " SPRG1 " TARGET_FMT_lx
-                 "  SPRG2 " TARGET_FMT_lx "  SPRG3 " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SPRG0], env->spr[SPR_SPRG1],
-                 env->spr[SPR_SPRG2], env->spr[SPR_SPRG3]);
+    return 0;
+}
 
-    qemu_fprintf(f, "SPRG4 " TARGET_FMT_lx " SPRG5 " TARGET_FMT_lx
-                 "  SPRG6 " TARGET_FMT_lx "  SPRG7 " TARGET_FMT_lx "\n",
-                 env->spr[SPR_SPRG4], env->spr[SPR_SPRG5],
-                 env->spr[SPR_SPRG6], env->spr[SPR_SPRG7]);
+static int test_opcode_table(opc_handler_t **table, int len)
+{
+    int i, count, tmp;
 
-#if defined(TARGET_PPC64)
-    if (env->excp_model == POWERPC_EXCP_POWER7 ||
-        env->excp_model == POWERPC_EXCP_POWER8 ||
-        env->excp_model == POWERPC_EXCP_POWER9)  {
-        qemu_fprintf(f, "HSRR0 " TARGET_FMT_lx " HSRR1 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_HSRR0], env->spr[SPR_HSRR1]);
+    for (i = 0, count = 0; i < len; i++) {
+        /* Consistency fixup */
+        if (table[i] == NULL) {
+            table[i] = &invalid_handler;
+        }
+        if (table[i] != &invalid_handler) {
+            if (is_indirect_opcode(table[i])) {
+                tmp = test_opcode_table(ind_table(table[i]),
+                    PPC_CPU_INDIRECT_OPCODES_LEN);
+                if (tmp == 0) {
+                    free(table[i]);
+                    table[i] = &invalid_handler;
+                } else {
+                    count++;
+                }
+            } else {
+                count++;
+            }
+        }
     }
-#endif
-    if (env->excp_model == POWERPC_EXCP_BOOKE) {
-        qemu_fprintf(f, "CSRR0 " TARGET_FMT_lx " CSRR1 " TARGET_FMT_lx
-                     " MCSRR0 " TARGET_FMT_lx " MCSRR1 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_CSRR0], env->spr[SPR_BOOKE_CSRR1],
-                     env->spr[SPR_BOOKE_MCSRR0], env->spr[SPR_BOOKE_MCSRR1]);
-
-        qemu_fprintf(f, "  TCR " TARGET_FMT_lx "   TSR " TARGET_FMT_lx
-                     "    ESR " TARGET_FMT_lx "   DEAR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_TCR], env->spr[SPR_BOOKE_TSR],
-                     env->spr[SPR_BOOKE_ESR], env->spr[SPR_BOOKE_DEAR]);
-
-        qemu_fprintf(f, "  PIR " TARGET_FMT_lx " DECAR " TARGET_FMT_lx
-                     "   IVPR " TARGET_FMT_lx "   EPCR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_PIR], env->spr[SPR_BOOKE_DECAR],
-                     env->spr[SPR_BOOKE_IVPR], env->spr[SPR_BOOKE_EPCR]);
-
-        qemu_fprintf(f, " MCSR " TARGET_FMT_lx " SPRG8 " TARGET_FMT_lx
-                     "    EPR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MCSR], env->spr[SPR_BOOKE_SPRG8],
-                     env->spr[SPR_BOOKE_EPR]);
-
-        /* FSL-specific */
-        qemu_fprintf(f, " MCAR " TARGET_FMT_lx "  PID1 " TARGET_FMT_lx
-                     "   PID2 " TARGET_FMT_lx "    SVR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_Exxx_MCAR], env->spr[SPR_BOOKE_PID1],
-                     env->spr[SPR_BOOKE_PID2], env->spr[SPR_E500_SVR]);
 
-        /*
-         * IVORs are left out as they are large and do not change often --
-         * they can be read with "p $ivor0", "p $ivor1", etc.
-         */
-    }
+    return count;
+}
 
-#if defined(TARGET_PPC64)
-    if (env->flags & POWERPC_FLAG_CFAR) {
-        qemu_fprintf(f, " CFAR " TARGET_FMT_lx"\n", env->cfar);
+static void fix_opcode_tables(opc_handler_t **ppc_opcodes)
+{
+    if (test_opcode_table(ppc_opcodes, PPC_CPU_OPCODES_LEN) == 0) {
+        printf("*** WARNING: no opcode defined !\n");
     }
-#endif
+}
 
-    if (env->spr_cb[SPR_LPCR].name) {
-        qemu_fprintf(f, " LPCR " TARGET_FMT_lx "\n", env->spr[SPR_LPCR]);
+/*****************************************************************************/
+void create_ppc_opcodes(PowerPCCPU *cpu, Error **errp)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    opcode_t *opc;
+
+    fill_new_table(cpu->opcodes, PPC_CPU_OPCODES_LEN);
+    for (opc = opcodes; opc < &opcodes[ARRAY_SIZE(opcodes)]; opc++) {
+        if (((opc->handler.type & pcc->insns_flags) != 0) ||
+            ((opc->handler.type2 & pcc->insns_flags2) != 0)) {
+            if (register_insn(cpu->opcodes, opc) < 0) {
+                error_setg(errp, "ERROR initializing PowerPC instruction "
+                           "0x%02x 0x%02x 0x%02x", opc->opc1, opc->opc2,
+                           opc->opc3);
+                return;
+            }
+        }
     }
+    fix_opcode_tables(cpu->opcodes);
+    fflush(stdout);
+    fflush(stderr);
+}
 
-    switch (env->mmu_model) {
-    case POWERPC_MMU_32B:
-    case POWERPC_MMU_601:
-    case POWERPC_MMU_SOFT_6xx:
-    case POWERPC_MMU_SOFT_74xx:
-#if defined(TARGET_PPC64)
-    case POWERPC_MMU_64B:
-    case POWERPC_MMU_2_03:
-    case POWERPC_MMU_2_06:
-    case POWERPC_MMU_2_07:
-    case POWERPC_MMU_3_00:
-#endif
-        if (env->spr_cb[SPR_SDR1].name) { /* SDR1 Exists */
-            qemu_fprintf(f, " SDR1 " TARGET_FMT_lx " ", env->spr[SPR_SDR1]);
+void destroy_ppc_opcodes(PowerPCCPU *cpu)
+{
+    opc_handler_t **table, **table_2;
+    int i, j, k;
+
+    for (i = 0; i < PPC_CPU_OPCODES_LEN; i++) {
+        if (cpu->opcodes[i] == &invalid_handler) {
+            continue;
         }
-        if (env->spr_cb[SPR_PTCR].name) { /* PTCR Exists */
-            qemu_fprintf(f, " PTCR " TARGET_FMT_lx " ", env->spr[SPR_PTCR]);
+        if (is_indirect_opcode(cpu->opcodes[i])) {
+            table = ind_table(cpu->opcodes[i]);
+            for (j = 0; j < PPC_CPU_INDIRECT_OPCODES_LEN; j++) {
+                if (table[j] == &invalid_handler) {
+                    continue;
+                }
+                if (is_indirect_opcode(table[j])) {
+                    table_2 = ind_table(table[j]);
+                    for (k = 0; k < PPC_CPU_INDIRECT_OPCODES_LEN; k++) {
+                        if (table_2[k] != &invalid_handler &&
+                            is_indirect_opcode(table_2[k])) {
+                            g_free((opc_handler_t *)((uintptr_t)table_2[k] &
+                                                     ~PPC_INDIRECT));
+                        }
+                    }
+                    g_free((opc_handler_t *)((uintptr_t)table[j] &
+                                             ~PPC_INDIRECT));
+                }
+            }
+            g_free((opc_handler_t *)((uintptr_t)cpu->opcodes[i] &
+                ~PPC_INDIRECT));
         }
-        qemu_fprintf(f, "  DAR " TARGET_FMT_lx "  DSISR " TARGET_FMT_lx "\n",
-                     env->spr[SPR_DAR], env->spr[SPR_DSISR]);
-        break;
-    case POWERPC_MMU_BOOKE206:
-        qemu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx
-                     "   MAS2 " TARGET_FMT_lx "   MAS3 " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MAS0], env->spr[SPR_BOOKE_MAS1],
-                     env->spr[SPR_BOOKE_MAS2], env->spr[SPR_BOOKE_MAS3]);
-
-        qemu_fprintf(f, " MAS4 " TARGET_FMT_lx "  MAS6 " TARGET_FMT_lx
-                     "   MAS7 " TARGET_FMT_lx "    PID " TARGET_FMT_lx "\n",
-                     env->spr[SPR_BOOKE_MAS4], env->spr[SPR_BOOKE_MAS6],
-                     env->spr[SPR_BOOKE_MAS7], env->spr[SPR_BOOKE_PID]);
-
-        qemu_fprintf(f, "MMUCFG " TARGET_FMT_lx " TLB0CFG " TARGET_FMT_lx
-                     " TLB1CFG " TARGET_FMT_lx "\n",
-                     env->spr[SPR_MMUCFG], env->spr[SPR_BOOKE_TLB0CFG],
-                     env->spr[SPR_BOOKE_TLB1CFG]);
-        break;
-    default:
-        break;
     }
-#endif
+}
+
+int ppc_fixup_cpu(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
 
-#undef RGPL
-#undef RFPL
+    /*
+     * TCG doesn't (yet) emulate some groups of instructions that are
+     * implemented on some otherwise supported CPUs (e.g. VSX and
+     * decimal floating point instructions on POWER7).  We remove
+     * unsupported instruction groups from the cpu state's instruction
+     * masks and hope the guest can cope.  For at least the pseries
+     * machine, the unavailability of these instructions can be
+     * advertised to the guest via the device tree.
+     */
+    if ((env->insns_flags & ~PPC_TCG_INSNS)
+        || (env->insns_flags2 & ~PPC_TCG_INSNS2)) {
+        warn_report("Disabling some instructions which are not "
+                    "emulated by TCG (0x%" PRIx64 ", 0x%" PRIx64 ")",
+                    env->insns_flags & ~PPC_TCG_INSNS,
+                    env->insns_flags2 & ~PPC_TCG_INSNS2);
+    }
+    env->insns_flags &= PPC_TCG_INSNS;
+    env->insns_flags2 &= PPC_TCG_INSNS2;
+    return 0;
 }
 
-void ppc_cpu_dump_statistics(CPUState *cs, int flags)
+static bool decode_legacy(PowerPCCPU *cpu, DisasContext *ctx, uint32_t insn)
 {
-#if defined(DO_PPC_STATISTICS)
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    opc_handler_t **t1, **t2, **t3, *handler;
-    int op1, op2, op3;
+    opc_handler_t **table, *handler;
+    uint32_t inval;
+
+    ctx->opcode = insn;
 
-    t1 = cpu->env.opcodes;
-    for (op1 = 0; op1 < 64; op1++) {
-        handler = t1[op1];
+    LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
+              insn, opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+              ctx->le_mode ? "little" : "big");
+
+    table = cpu->opcodes;
+    handler = table[opc1(insn)];
+    if (is_indirect_opcode(handler)) {
+        table = ind_table(handler);
+        handler = table[opc2(insn)];
         if (is_indirect_opcode(handler)) {
-            t2 = ind_table(handler);
-            for (op2 = 0; op2 < 32; op2++) {
-                handler = t2[op2];
-                if (is_indirect_opcode(handler)) {
-                    t3 = ind_table(handler);
-                    for (op3 = 0; op3 < 32; op3++) {
-                        handler = t3[op3];
-                        if (handler->count == 0) {
-                            continue;
-                        }
-                        qemu_printf("%02x %02x %02x (%02x %04d) %16s: "
-                                    "%016" PRIx64 " %" PRId64 "\n",
-                                    op1, op2, op3, op1, (op3 << 5) | op2,
-                                    handler->oname,
-                                    handler->count, handler->count);
-                    }
-                } else {
-                    if (handler->count == 0) {
-                        continue;
-                    }
-                    qemu_printf("%02x %02x    (%02x %04d) %16s: "
-                                "%016" PRIx64 " %" PRId64 "\n",
-                                op1, op2, op1, op2, handler->oname,
-                                handler->count, handler->count);
-                }
-            }
-        } else {
-            if (handler->count == 0) {
-                continue;
+            table = ind_table(handler);
+            handler = table[opc3(insn)];
+            if (is_indirect_opcode(handler)) {
+                table = ind_table(handler);
+                handler = table[opc4(insn)];
             }
-            qemu_printf("%02x       (%02x     ) %16s: %016" PRIx64
-                        " %" PRId64 "\n",
-                        op1, op1, handler->oname,
-                        handler->count, handler->count);
         }
     }
-#endif
+
+    /* Is opcode *REALLY* valid ? */
+    if (unlikely(handler->handler == &gen_invalid)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
+                      "%02x - %02x - %02x - %02x (%08x) "
+                      TARGET_FMT_lx "\n",
+                      opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+                      insn, ctx->cia);
+        return false;
+    }
+
+    if (unlikely(handler->type & (PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE)
+                 && Rc(insn))) {
+        inval = handler->inval2;
+    } else {
+        inval = handler->inval1;
+    }
+
+    if (unlikely((insn & inval) != 0)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
+                      "%02x - %02x - %02x - %02x (%08x) "
+                      TARGET_FMT_lx "\n", insn & inval,
+                      opc1(insn), opc2(insn), opc3(insn), opc4(insn),
+                      insn, ctx->cia);
+        return false;
+    }
+
+    handler->handler(ctx);
+    return true;
 }
 
 static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPUPPCState *env = cs->env_ptr;
-    int bound;
+    uint32_t hflags = ctx->base.tb->flags;
 
-    ctx->exception = POWERPC_EXCP_NONE;
     ctx->spr_cb = env->spr_cb;
-    ctx->pr = msr_pr;
-    ctx->mem_idx = env->dmmu_idx;
-    ctx->dr = msr_dr;
-#if !defined(CONFIG_USER_ONLY)
-    ctx->hv = msr_hv || !env->has_hv_mode;
-#endif
+    ctx->pr = (hflags >> HFLAGS_PR) & 1;
+    ctx->mem_idx = (hflags >> HFLAGS_DMMU_IDX) & 7;
+    ctx->dr = (hflags >> HFLAGS_DR) & 1;
+    ctx->hv = (hflags >> HFLAGS_HV) & 1;
     ctx->insns_flags = env->insns_flags;
     ctx->insns_flags2 = env->insns_flags2;
     ctx->access_type = -1;
     ctx->need_access_type = !mmu_is_64bit(env->mmu_model);
-    ctx->le_mode = !!(env->hflags & (1 << MSR_LE));
+    ctx->le_mode = (hflags >> HFLAGS_LE) & 1;
     ctx->default_tcg_memop_mask = ctx->le_mode ? MO_LE : MO_BE;
     ctx->flags = env->flags;
 #if defined(TARGET_PPC64)
-    ctx->sf_mode = msr_is_64bit(env, env->msr);
+    ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
     ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
 #endif
     ctx->lazy_tlb_flush = env->mmu_model == POWERPC_MMU_32B
         || env->mmu_model == POWERPC_MMU_601
         || env->mmu_model & POWERPC_MMU_64;
 
-    ctx->fpu_enabled = !!msr_fp;
-    if ((env->flags & POWERPC_FLAG_SPE) && msr_spe) {
-        ctx->spe_enabled = !!msr_spe;
-    } else {
-        ctx->spe_enabled = false;
-    }
-    if ((env->flags & POWERPC_FLAG_VRE) && msr_vr) {
-        ctx->altivec_enabled = !!msr_vr;
-    } else {
-        ctx->altivec_enabled = false;
-    }
-    if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) {
-        ctx->vsx_enabled = !!msr_vsx;
-    } else {
-        ctx->vsx_enabled = false;
-    }
-    if ((env->flags & POWERPC_FLAG_SCV)
-        && (env->spr[SPR_FSCR] & (1ull << FSCR_SCV))) {
-        ctx->scv_enabled = true;
-    } else {
-        ctx->scv_enabled = false;
-    }
-#if defined(TARGET_PPC64)
-    if ((env->flags & POWERPC_FLAG_TM) && msr_tm) {
-        ctx->tm_enabled = !!msr_tm;
-    } else {
-        ctx->tm_enabled = false;
-    }
-#endif
-    ctx->gtse = !!(env->spr[SPR_LPCR] & LPCR_GTSE);
-    if ((env->flags & POWERPC_FLAG_SE) && msr_se) {
-        ctx->singlestep_enabled = CPU_SINGLE_STEP;
-    } else {
-        ctx->singlestep_enabled = 0;
-    }
-    if ((env->flags & POWERPC_FLAG_BE) && msr_be) {
+    ctx->fpu_enabled = (hflags >> HFLAGS_FP) & 1;
+    ctx->spe_enabled = (hflags >> HFLAGS_SPE) & 1;
+    ctx->altivec_enabled = (hflags >> HFLAGS_VR) & 1;
+    ctx->vsx_enabled = (hflags >> HFLAGS_VSX) & 1;
+    ctx->tm_enabled = (hflags >> HFLAGS_TM) & 1;
+    ctx->gtse = (hflags >> HFLAGS_GTSE) & 1;
+    ctx->hr = (hflags >> HFLAGS_HR) & 1;
+    ctx->mmcr0_pmcc0 = (hflags >> HFLAGS_PMCC0) & 1;
+    ctx->mmcr0_pmcc1 = (hflags >> HFLAGS_PMCC1) & 1;
+
+    ctx->singlestep_enabled = 0;
+    if ((hflags >> HFLAGS_SE) & 1) {
+        ctx->singlestep_enabled |= CPU_SINGLE_STEP;
+        ctx->base.max_insns = 1;
+    }
+    if ((hflags >> HFLAGS_BE) & 1) {
         ctx->singlestep_enabled |= CPU_BRANCH_STEP;
     }
-    if ((env->flags & POWERPC_FLAG_DE) && msr_de) {
-        ctx->singlestep_enabled = 0;
-        target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
-        if (dbcr0 & DBCR0_ICMP) {
-            ctx->singlestep_enabled |= CPU_SINGLE_STEP;
-        }
-        if (dbcr0 & DBCR0_BRT) {
-            ctx->singlestep_enabled |= CPU_BRANCH_STEP;
-        }
-
-    }
-    if (unlikely(ctx->base.singlestep_enabled)) {
-        ctx->singlestep_enabled |= GDBSTUB_SINGLE_STEP;
-    }
-#if defined(DO_SINGLE_STEP) && 0
-    /* Single step trace mode */
-    msr_se = 1;
-#endif
-
-    bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
-    ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
 }
 
 static void ppc_tr_tb_start(DisasContextBase *db, CPUState *cs)
@@ -7974,21 +8478,10 @@ static void ppc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     tcg_gen_insn_start(dcbase->pc_next);
 }
 
-static bool ppc_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                    const CPUBreakpoint *bp)
+static bool is_prefix_insn(DisasContext *ctx, uint32_t insn)
 {
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    gen_debug_exception(ctx);
-    dcbase->is_jmp = DISAS_NORETURN;
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be properly
-     * cleared -- thus we increment the PC here so that the logic
-     * setting tb->size below does the right thing.
-     */
-    ctx->base.pc_next += 4;
-    return true;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    return opc1(insn) == 1;
 }
 
 static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
@@ -7996,100 +8489,103 @@ static void ppc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = cs->env_ptr;
-    opc_handler_t **table, *handler;
+    target_ulong pc;
+    uint32_t insn;
+    bool ok;
 
     LOG_DISAS("----------------\n");
     LOG_DISAS("nip=" TARGET_FMT_lx " super=%d ir=%d\n",
               ctx->base.pc_next, ctx->mem_idx, (int)msr_ir);
 
-    ctx->opcode = translator_ldl_swap(env, ctx->base.pc_next,
-                                      need_byteswap(ctx));
+    ctx->cia = pc = ctx->base.pc_next;
+    insn = translator_ldl_swap(env, dcbase, pc, need_byteswap(ctx));
+    ctx->base.pc_next = pc += 4;
 
-    LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
-              ctx->opcode, opc1(ctx->opcode), opc2(ctx->opcode),
-              opc3(ctx->opcode), opc4(ctx->opcode),
-              ctx->le_mode ? "little" : "big");
-    ctx->base.pc_next += 4;
-    table = cpu->opcodes;
-    handler = table[opc1(ctx->opcode)];
-    if (is_indirect_opcode(handler)) {
-        table = ind_table(handler);
-        handler = table[opc2(ctx->opcode)];
-        if (is_indirect_opcode(handler)) {
-            table = ind_table(handler);
-            handler = table[opc3(ctx->opcode)];
-            if (is_indirect_opcode(handler)) {
-                table = ind_table(handler);
-                handler = table[opc4(ctx->opcode)];
-            }
-        }
-    }
-    /* Is opcode *REALLY* valid ? */
-    if (unlikely(handler->handler == &gen_invalid)) {
-        qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
-                      "%02x - %02x - %02x - %02x (%08x) "
-                      TARGET_FMT_lx " %d\n",
-                      opc1(ctx->opcode), opc2(ctx->opcode),
-                      opc3(ctx->opcode), opc4(ctx->opcode),
-                      ctx->opcode, ctx->base.pc_next - 4, (int)msr_ir);
+    if (!is_prefix_insn(ctx, insn)) {
+        ok = (decode_insn32(ctx, insn) ||
+              decode_legacy(cpu, ctx, insn));
+    } else if ((pc & 63) == 0) {
+        /*
+         * Power v3.1, section 1.9 Exceptions:
+         * attempt to execute a prefixed instruction that crosses a
+         * 64-byte address boundary (system alignment error).
+         */
+        gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_INSN);
+        ok = true;
     } else {
-        uint32_t inval;
-
-        if (unlikely(handler->type & (PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE)
-                     && Rc(ctx->opcode))) {
-            inval = handler->inval2;
-        } else {
-            inval = handler->inval1;
-        }
-
-        if (unlikely((ctx->opcode & inval) != 0)) {
-            qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
-                          "%02x - %02x - %02x - %02x (%08x) "
-                          TARGET_FMT_lx "\n", ctx->opcode & inval,
-                          opc1(ctx->opcode), opc2(ctx->opcode),
-                          opc3(ctx->opcode), opc4(ctx->opcode),
-                          ctx->opcode, ctx->base.pc_next - 4);
-            gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-            ctx->base.is_jmp = DISAS_NORETURN;
-            return;
-        }
+        uint32_t insn2 = translator_ldl_swap(env, dcbase, pc,
+                                             need_byteswap(ctx));
+        ctx->base.pc_next = pc += 4;
+        ok = decode_insn64(ctx, deposit64(insn2, 32, 32, insn));
     }
-    (*(handler->handler))(ctx);
-#if defined(DO_PPC_STATISTICS)
-    handler->count++;
-#endif
-    /* Check trace mode exceptions */
-    if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP &&
-                 (ctx->base.pc_next <= 0x100 || ctx->base.pc_next > 0xF00) &&
-                 ctx->exception != POWERPC_SYSCALL &&
-                 ctx->exception != POWERPC_EXCP_TRAP &&
-                 ctx->exception != POWERPC_EXCP_BRANCH)) {
-        uint32_t excp = gen_prep_dbgex(ctx);
-        gen_exception_nip(ctx, excp, ctx->base.pc_next);
+    if (!ok) {
+        gen_invalid(ctx);
     }
 
-    if (tcg_check_temp_count()) {
-        qemu_log("Opcode %02x %02x %02x %02x (%08x) leaked "
-                 "temporaries\n", opc1(ctx->opcode), opc2(ctx->opcode),
-                 opc3(ctx->opcode), opc4(ctx->opcode), ctx->opcode);
+    /* End the TB when crossing a page boundary. */
+    if (ctx->base.is_jmp == DISAS_NEXT && !(pc & ~TARGET_PAGE_MASK)) {
+        ctx->base.is_jmp = DISAS_TOO_MANY;
     }
 
-    ctx->base.is_jmp = ctx->exception == POWERPC_EXCP_NONE ?
-        DISAS_NEXT : DISAS_NORETURN;
+    translator_loop_temp_check(&ctx->base);
 }
 
 static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+    DisasJumpType is_jmp = ctx->base.is_jmp;
+    target_ulong nip = ctx->base.pc_next;
+
+    if (is_jmp == DISAS_NORETURN) {
+        /* We have already exited the TB. */
+        return;
+    }
+
+    /* Honor single stepping. */
+    if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)
+        && (nip <= 0x100 || nip > 0xf00)) {
+        switch (is_jmp) {
+        case DISAS_TOO_MANY:
+        case DISAS_EXIT_UPDATE:
+        case DISAS_CHAIN_UPDATE:
+            gen_update_nip(ctx, nip);
+            break;
+        case DISAS_EXIT:
+        case DISAS_CHAIN:
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        gen_debug_exception(ctx);
+        return;
+    }
 
-    if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_goto_tb(ctx, 0, ctx->base.pc_next);
-    } else if (ctx->exception != POWERPC_EXCP_BRANCH) {
-        if (unlikely(ctx->base.singlestep_enabled)) {
-            gen_debug_exception(ctx);
+    switch (is_jmp) {
+    case DISAS_TOO_MANY:
+        if (use_goto_tb(ctx, nip)) {
+            tcg_gen_goto_tb(0);
+            gen_update_nip(ctx, nip);
+            tcg_gen_exit_tb(ctx->base.tb, 0);
+            break;
         }
-        /* Generate the return instruction */
+        /* fall through */
+    case DISAS_CHAIN_UPDATE:
+        gen_update_nip(ctx, nip);
+        /* fall through */
+    case DISAS_CHAIN:
+        tcg_gen_lookup_and_goto_ptr();
+        break;
+
+    case DISAS_EXIT_UPDATE:
+        gen_update_nip(ctx, nip);
+        /* fall through */
+    case DISAS_EXIT:
         tcg_gen_exit_tb(NULL, 0);
+        break;
+
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -8103,7 +8599,6 @@ static const TranslatorOps ppc_tr_ops = {
     .init_disas_context = ppc_tr_init_disas_context,
     .tb_start           = ppc_tr_tb_start,
     .insn_start         = ppc_tr_insn_start,
-    .breakpoint_check   = ppc_tr_breakpoint_check,
     .translate_insn     = ppc_tr_translate_insn,
     .tb_stop            = ppc_tr_tb_stop,
     .disas_log          = ppc_tr_disas_log,
diff --git a/target/ppc/translate/dfp-impl.c.inc b/target/ppc/translate/dfp-impl.c.inc
index 6c556dc2e14..f9f1d58d443 100644
--- a/target/ppc/translate/dfp-impl.c.inc
+++ b/target/ppc/translate/dfp-impl.c.inc
@@ -7,226 +7,223 @@ static inline TCGv_ptr gen_fprp_ptr(int reg)
     return r;
 }
 
-#define GEN_DFP_T_A_B_Rc(name)                   \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rd, ra, rb;                         \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);  \
-    rd = gen_fprp_ptr(rD(ctx->opcode));          \
-    ra = gen_fprp_ptr(rA(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    gen_helper_##name(cpu_env, rd, ra, rb);      \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rd);                       \
-    tcg_temp_free_ptr(ra);                       \
-    tcg_temp_free_ptr(rb);                       \
+#define TRANS_DFP_T_A_B_Rc(NAME)                             \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rt, ra, rb;                                     \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rt = gen_fprp_ptr(a->rt);                                \
+    ra = gen_fprp_ptr(a->ra);                                \
+    rb = gen_fprp_ptr(a->rb);                                \
+    gen_helper_##NAME(cpu_env, rt, ra, rb);                  \
+    if (unlikely(a->rc)) {                                   \
+        gen_set_cr1_from_fpscr(ctx);                         \
+    }                                                        \
+    tcg_temp_free_ptr(rt);                                   \
+    tcg_temp_free_ptr(ra);                                   \
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
 }
 
-#define GEN_DFP_BF_A_B(name)                      \
-static void gen_##name(DisasContext *ctx)         \
-{                                                 \
-    TCGv_ptr ra, rb;                              \
-    if (unlikely(!ctx->fpu_enabled)) {            \
-        gen_exception(ctx, POWERPC_EXCP_FPU);     \
-        return;                                   \
-    }                                             \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);            \
-    ra = gen_fprp_ptr(rA(ctx->opcode));           \
-    rb = gen_fprp_ptr(rB(ctx->opcode));           \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, rb);           \
-    tcg_temp_free_ptr(ra);                        \
-    tcg_temp_free_ptr(rb);                        \
+#define TRANS_DFP_BF_A_B(NAME)                               \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr ra, rb;                                         \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    ra = gen_fprp_ptr(a->ra);                                \
+    rb = gen_fprp_ptr(a->rb);                                \
+    gen_helper_##NAME(cpu_crf[a->bf],                        \
+                      cpu_env, ra, rb);                      \
+    tcg_temp_free_ptr(ra);                                   \
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
 }
 
-#define GEN_DFP_BF_I_B(name)                      \
-static void gen_##name(DisasContext *ctx)         \
-{                                                 \
-    TCGv_i32 uim;                                 \
-    TCGv_ptr rb;                                  \
-    if (unlikely(!ctx->fpu_enabled)) {            \
-        gen_exception(ctx, POWERPC_EXCP_FPU);     \
-        return;                                   \
-    }                                             \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);            \
-    uim = tcg_const_i32(UIMM5(ctx->opcode));      \
-    rb = gen_fprp_ptr(rB(ctx->opcode));           \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, uim, rb);          \
-    tcg_temp_free_i32(uim);                       \
-    tcg_temp_free_ptr(rb);                        \
+#define TRANS_DFP_BF_I_B(NAME)                               \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rb;                                             \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rb = gen_fprp_ptr(a->rb);                                \
+    gen_helper_##NAME(cpu_crf[a->bf],                        \
+                      cpu_env, tcg_constant_i32(a->uim), rb);\
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
 }
 
-#define GEN_DFP_BF_A_DCM(name)                    \
-static void gen_##name(DisasContext *ctx)         \
-{                                                 \
-    TCGv_ptr ra;                                  \
-    TCGv_i32 dcm;                                 \
-    if (unlikely(!ctx->fpu_enabled)) {            \
-        gen_exception(ctx, POWERPC_EXCP_FPU);     \
-        return;                                   \
-    }                                             \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);   \
-    ra = gen_fprp_ptr(rA(ctx->opcode));           \
-    dcm = tcg_const_i32(DCM(ctx->opcode));        \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, dcm);          \
-    tcg_temp_free_ptr(ra);                        \
-    tcg_temp_free_i32(dcm);                       \
+#define TRANS_DFP_BF_A_DCM(NAME)                             \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr ra;                                             \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    ra = gen_fprp_ptr(a->fra);                               \
+    gen_helper_##NAME(cpu_crf[a->bf],                        \
+                      cpu_env, ra, tcg_constant_i32(a->dm)); \
+    tcg_temp_free_ptr(ra);                                   \
+    return true;                                             \
 }
 
-#define GEN_DFP_T_B_U32_U32_Rc(name, u32f1, u32f2)    \
-static void gen_##name(DisasContext *ctx)             \
-{                                                     \
-    TCGv_ptr rt, rb;                                  \
-    TCGv_i32 u32_1, u32_2;                            \
-    if (unlikely(!ctx->fpu_enabled)) {                \
-        gen_exception(ctx, POWERPC_EXCP_FPU);         \
-        return;                                       \
-    }                                                 \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);       \
-    rt = gen_fprp_ptr(rD(ctx->opcode));               \
-    rb = gen_fprp_ptr(rB(ctx->opcode));               \
-    u32_1 = tcg_const_i32(u32f1(ctx->opcode));        \
-    u32_2 = tcg_const_i32(u32f2(ctx->opcode));        \
-    gen_helper_##name(cpu_env, rt, rb, u32_1, u32_2); \
-    if (unlikely(Rc(ctx->opcode) != 0)) {             \
-        gen_set_cr1_from_fpscr(ctx);                  \
-    }                                                 \
-    tcg_temp_free_ptr(rt);                            \
-    tcg_temp_free_ptr(rb);                            \
-    tcg_temp_free_i32(u32_1);                         \
-    tcg_temp_free_i32(u32_2);                         \
+#define TRANS_DFP_T_B_U32_U32_Rc(NAME, U32F1, U32F2)         \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rt, rb;                                         \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rt = gen_fprp_ptr(a->frt);                               \
+    rb = gen_fprp_ptr(a->frb);                               \
+    gen_helper_##NAME(cpu_env, rt, rb,                       \
+                      tcg_constant_i32(a->U32F1),            \
+                      tcg_constant_i32(a->U32F2));           \
+    if (unlikely(a->rc)) {                                   \
+        gen_set_cr1_from_fpscr(ctx);                         \
+    }                                                        \
+    tcg_temp_free_ptr(rt);                                   \
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
 }
 
-#define GEN_DFP_T_A_B_I32_Rc(name, i32fld)       \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rt, ra, rb;                         \
-    TCGv_i32 i32;                                \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);  \
-    rt = gen_fprp_ptr(rD(ctx->opcode));          \
-    ra = gen_fprp_ptr(rA(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    i32 = tcg_const_i32(i32fld(ctx->opcode));    \
-    gen_helper_##name(cpu_env, rt, ra, rb, i32); \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rt);                       \
-    tcg_temp_free_ptr(rb);                       \
-    tcg_temp_free_ptr(ra);                       \
-    tcg_temp_free_i32(i32);                      \
-    }
-
-#define GEN_DFP_T_B_Rc(name)                     \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rt, rb;                             \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);  \
-    rt = gen_fprp_ptr(rD(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    gen_helper_##name(cpu_env, rt, rb);          \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rt);                       \
-    tcg_temp_free_ptr(rb);                       \
-    }
-
-#define GEN_DFP_T_FPR_I32_Rc(name, fprfld, i32fld) \
-static void gen_##name(DisasContext *ctx)          \
-{                                                  \
-    TCGv_ptr rt, rs;                               \
-    TCGv_i32 i32;                                  \
-    if (unlikely(!ctx->fpu_enabled)) {             \
-        gen_exception(ctx, POWERPC_EXCP_FPU);      \
-        return;                                    \
-    }                                              \
-    gen_update_nip(ctx, ctx->base.pc_next - 4);    \
-    rt = gen_fprp_ptr(rD(ctx->opcode));            \
-    rs = gen_fprp_ptr(fprfld(ctx->opcode));        \
-    i32 = tcg_const_i32(i32fld(ctx->opcode));      \
-    gen_helper_##name(cpu_env, rt, rs, i32);       \
-    if (unlikely(Rc(ctx->opcode) != 0)) {          \
-        gen_set_cr1_from_fpscr(ctx);               \
-    }                                              \
-    tcg_temp_free_ptr(rt);                         \
-    tcg_temp_free_ptr(rs);                         \
-    tcg_temp_free_i32(i32);                        \
+#define TRANS_DFP_T_A_B_I32_Rc(NAME, I32FLD)                 \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rt, ra, rb;                                     \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rt = gen_fprp_ptr(a->frt);                               \
+    ra = gen_fprp_ptr(a->fra);                               \
+    rb = gen_fprp_ptr(a->frb);                               \
+    gen_helper_##NAME(cpu_env, rt, ra, rb,                   \
+                      tcg_constant_i32(a->I32FLD));          \
+    if (unlikely(a->rc)) {                                   \
+        gen_set_cr1_from_fpscr(ctx);                         \
+    }                                                        \
+    tcg_temp_free_ptr(rt);                                   \
+    tcg_temp_free_ptr(ra);                                   \
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
 }
 
-GEN_DFP_T_A_B_Rc(dadd)
-GEN_DFP_T_A_B_Rc(daddq)
-GEN_DFP_T_A_B_Rc(dsub)
-GEN_DFP_T_A_B_Rc(dsubq)
-GEN_DFP_T_A_B_Rc(dmul)
-GEN_DFP_T_A_B_Rc(dmulq)
-GEN_DFP_T_A_B_Rc(ddiv)
-GEN_DFP_T_A_B_Rc(ddivq)
-GEN_DFP_BF_A_B(dcmpu)
-GEN_DFP_BF_A_B(dcmpuq)
-GEN_DFP_BF_A_B(dcmpo)
-GEN_DFP_BF_A_B(dcmpoq)
-GEN_DFP_BF_A_DCM(dtstdc)
-GEN_DFP_BF_A_DCM(dtstdcq)
-GEN_DFP_BF_A_DCM(dtstdg)
-GEN_DFP_BF_A_DCM(dtstdgq)
-GEN_DFP_BF_A_B(dtstex)
-GEN_DFP_BF_A_B(dtstexq)
-GEN_DFP_BF_A_B(dtstsf)
-GEN_DFP_BF_A_B(dtstsfq)
-GEN_DFP_BF_I_B(dtstsfi)
-GEN_DFP_BF_I_B(dtstsfiq)
-GEN_DFP_T_B_U32_U32_Rc(dquai, SIMM5, RMC)
-GEN_DFP_T_B_U32_U32_Rc(dquaiq, SIMM5, RMC)
-GEN_DFP_T_A_B_I32_Rc(dqua, RMC)
-GEN_DFP_T_A_B_I32_Rc(dquaq, RMC)
-GEN_DFP_T_A_B_I32_Rc(drrnd, RMC)
-GEN_DFP_T_A_B_I32_Rc(drrndq, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintx, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintxq, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintn, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintnq, FPW, RMC)
-GEN_DFP_T_B_Rc(dctdp)
-GEN_DFP_T_B_Rc(dctqpq)
-GEN_DFP_T_B_Rc(drsp)
-GEN_DFP_T_B_Rc(drdpq)
-GEN_DFP_T_B_Rc(dcffix)
-GEN_DFP_T_B_Rc(dcffixq)
-GEN_DFP_T_B_Rc(dctfix)
-GEN_DFP_T_B_Rc(dctfixq)
-GEN_DFP_T_FPR_I32_Rc(ddedpd, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(ddedpdq, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(denbcd, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(denbcdq, rB, SP)
-GEN_DFP_T_B_Rc(dxex)
-GEN_DFP_T_B_Rc(dxexq)
-GEN_DFP_T_A_B_Rc(diex)
-GEN_DFP_T_A_B_Rc(diexq)
-GEN_DFP_T_FPR_I32_Rc(dscli, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscliq, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscri, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscriq, rA, DCM)
-
-#undef GEN_DFP_T_A_B_Rc
-#undef GEN_DFP_BF_A_B
-#undef GEN_DFP_BF_A_DCM
-#undef GEN_DFP_T_B_U32_U32_Rc
-#undef GEN_DFP_T_A_B_I32_Rc
-#undef GEN_DFP_T_B_Rc
-#undef GEN_DFP_T_FPR_I32_Rc
+#define TRANS_DFP_T_B_Rc(NAME)                               \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rt, rb;                                         \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rt = gen_fprp_ptr(a->rt);                                \
+    rb = gen_fprp_ptr(a->rb);                                \
+    gen_helper_##NAME(cpu_env, rt, rb);                      \
+    if (unlikely(a->rc)) {                                   \
+        gen_set_cr1_from_fpscr(ctx);                         \
+    }                                                        \
+    tcg_temp_free_ptr(rt);                                   \
+    tcg_temp_free_ptr(rb);                                   \
+    return true;                                             \
+}
+
+#define TRANS_DFP_T_FPR_I32_Rc(NAME, FPRFLD, I32FLD)         \
+static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a)   \
+{                                                            \
+    TCGv_ptr rt, rx;                                         \
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);                          \
+    REQUIRE_FPU(ctx);                                        \
+    rt = gen_fprp_ptr(a->rt);                                \
+    rx = gen_fprp_ptr(a->FPRFLD);                            \
+    gen_helper_##NAME(cpu_env, rt, rx,                       \
+                      tcg_constant_i32(a->I32FLD));          \
+    if (unlikely(a->rc)) {                                   \
+        gen_set_cr1_from_fpscr(ctx);                         \
+    }                                                        \
+    tcg_temp_free_ptr(rt);                                   \
+    tcg_temp_free_ptr(rx);                                   \
+    return true;                                             \
+}
+
+TRANS_DFP_T_A_B_Rc(DADD)
+TRANS_DFP_T_A_B_Rc(DADDQ)
+TRANS_DFP_T_A_B_Rc(DSUB)
+TRANS_DFP_T_A_B_Rc(DSUBQ)
+TRANS_DFP_T_A_B_Rc(DMUL)
+TRANS_DFP_T_A_B_Rc(DMULQ)
+TRANS_DFP_T_A_B_Rc(DDIV)
+TRANS_DFP_T_A_B_Rc(DDIVQ)
+TRANS_DFP_BF_A_B(DCMPU)
+TRANS_DFP_BF_A_B(DCMPUQ)
+TRANS_DFP_BF_A_B(DCMPO)
+TRANS_DFP_BF_A_B(DCMPOQ)
+TRANS_DFP_BF_A_DCM(DTSTDC)
+TRANS_DFP_BF_A_DCM(DTSTDCQ)
+TRANS_DFP_BF_A_DCM(DTSTDG)
+TRANS_DFP_BF_A_DCM(DTSTDGQ)
+TRANS_DFP_BF_A_B(DTSTEX)
+TRANS_DFP_BF_A_B(DTSTEXQ)
+TRANS_DFP_BF_A_B(DTSTSF)
+TRANS_DFP_BF_A_B(DTSTSFQ)
+TRANS_DFP_BF_I_B(DTSTSFI)
+TRANS_DFP_BF_I_B(DTSTSFIQ)
+TRANS_DFP_T_B_U32_U32_Rc(DQUAI, te, rmc)
+TRANS_DFP_T_B_U32_U32_Rc(DQUAIQ, te, rmc)
+TRANS_DFP_T_A_B_I32_Rc(DQUA, rmc)
+TRANS_DFP_T_A_B_I32_Rc(DQUAQ, rmc)
+TRANS_DFP_T_A_B_I32_Rc(DRRND, rmc)
+TRANS_DFP_T_A_B_I32_Rc(DRRNDQ, rmc)
+TRANS_DFP_T_B_U32_U32_Rc(DRINTX, r, rmc)
+TRANS_DFP_T_B_U32_U32_Rc(DRINTXQ, r, rmc)
+TRANS_DFP_T_B_U32_U32_Rc(DRINTN, r, rmc)
+TRANS_DFP_T_B_U32_U32_Rc(DRINTNQ, r, rmc)
+TRANS_DFP_T_B_Rc(DCTDP)
+TRANS_DFP_T_B_Rc(DCTQPQ)
+TRANS_DFP_T_B_Rc(DRSP)
+TRANS_DFP_T_B_Rc(DRDPQ)
+TRANS_DFP_T_B_Rc(DCFFIX)
+TRANS_DFP_T_B_Rc(DCFFIXQ)
+TRANS_DFP_T_B_Rc(DCTFIX)
+TRANS_DFP_T_B_Rc(DCTFIXQ)
+TRANS_DFP_T_FPR_I32_Rc(DDEDPD, rb, sp)
+TRANS_DFP_T_FPR_I32_Rc(DDEDPDQ, rb, sp)
+TRANS_DFP_T_FPR_I32_Rc(DENBCD, rb, s)
+TRANS_DFP_T_FPR_I32_Rc(DENBCDQ, rb, s)
+TRANS_DFP_T_B_Rc(DXEX)
+TRANS_DFP_T_B_Rc(DXEXQ)
+TRANS_DFP_T_A_B_Rc(DIEX)
+TRANS_DFP_T_A_B_Rc(DIEXQ)
+TRANS_DFP_T_FPR_I32_Rc(DSCLI, ra, sh)
+TRANS_DFP_T_FPR_I32_Rc(DSCLIQ, ra, sh)
+TRANS_DFP_T_FPR_I32_Rc(DSCRI, ra, sh)
+TRANS_DFP_T_FPR_I32_Rc(DSCRIQ, ra, sh)
+
+static bool trans_DCFFIXQQ(DisasContext *ctx, arg_DCFFIXQQ *a)
+{
+    TCGv_ptr rt, rb;
+
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);
+    REQUIRE_FPU(ctx);
+    REQUIRE_VECTOR(ctx);
+
+    rt = gen_fprp_ptr(a->frtp);
+    rb = gen_avr_ptr(a->vrb);
+    gen_helper_DCFFIXQQ(cpu_env, rt, rb);
+    tcg_temp_free_ptr(rt);
+    tcg_temp_free_ptr(rb);
+
+    return true;
+}
+
+static bool trans_DCTFIXQQ(DisasContext *ctx, arg_DCTFIXQQ *a)
+{
+    TCGv_ptr rt, rb;
+
+    REQUIRE_INSNS_FLAGS2(ctx, DFP);
+    REQUIRE_FPU(ctx);
+    REQUIRE_VECTOR(ctx);
+
+    rt = gen_avr_ptr(a->vrt);
+    rb = gen_fprp_ptr(a->frbp);
+    gen_helper_DCTFIXQQ(cpu_env, rt, rb);
+    tcg_temp_free_ptr(rt);
+    tcg_temp_free_ptr(rb);
+
+    return true;
+}
diff --git a/target/ppc/translate/dfp-ops.c.inc b/target/ppc/translate/dfp-ops.c.inc
deleted file mode 100644
index 6ef38e57125..00000000000
--- a/target/ppc/translate/dfp-ops.c.inc
+++ /dev/null
@@ -1,165 +0,0 @@
-#define _GEN_DFP_LONG(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_LONG_300(name, op1, op2, mask)                   \
-GEN_HANDLER_E(name, 0x3B, op1, op2, mask, PPC_NONE, PPC2_ISA300)
-
-#define _GEN_DFP_LONGx2(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_LONGx4(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUAD(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3F, op1, op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUAD_300(name, op1, op2, mask)             \
-GEN_HANDLER_E(name, 0x3F, op1, op2, mask, PPC_NONE, PPC2_ISA300)
-
-#define _GEN_DFP_QUADx2(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUADx4(name, op1, op2, mask)                         \
-GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define GEN_DFP_T_A_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00210800)
-
-#define GEN_DFP_Tp_A_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00200800)
-
-#define GEN_DFP_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x001F0000)
-
-#define GEN_DFP_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x003F0800)
-
-#define GEN_DFP_Tp_B_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x003F0000)
-
-#define GEN_DFP_T_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x001F0800)
-
-#define GEN_DFP_BF_A_B(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00000001)
-
-#define GEN_DFP_BF_A_B_300(name, op1, op2)          \
-_GEN_DFP_LONG_300(name, op1, op2, 0x00400001)
-
-#define GEN_DFP_BF_Ap_Bp(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00610801)
-
-#define GEN_DFP_BF_A_Bp(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00600801)
-
-#define GEN_DFP_BF_A_Bp_300(name, op1, op2)     \
-_GEN_DFP_QUAD_300(name, op1, op2, 0x00400001)
-
-#define GEN_DFP_BF_A_DCM(name, op1, op2) \
-_GEN_DFP_LONGx2(name, op1, op2, 0x00600001)
-
-#define GEN_DFP_BF_Ap_DCM(name, op1, op2) \
-_GEN_DFP_QUADx2(name, op1, op2, 0x00610001)
-
-#define GEN_DFP_T_A_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x02010800)
-
-#define GEN_DFP_Tp_A_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x02000800)
-
-#define GEN_DFP_TE_T_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_TE_Tp_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x00200800)
-
-#define GEN_DFP_R_T_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x001E0000)
-
-#define GEN_DFP_R_Tp_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x003E0800)
-
-#define GEN_DFP_SP_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00070000)
-
-#define GEN_DFP_SP_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00270800)
-
-#define GEN_DFP_S_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x000F0000)
-
-#define GEN_DFP_S_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x002F0800)
-
-#define GEN_DFP_T_A_SH_Rc(name, op1, op2) \
-_GEN_DFP_LONGx2(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_SH_Rc(name, op1, op2) \
-_GEN_DFP_QUADx2(name, op1, op2, 0x00210000)
-
-GEN_DFP_T_A_B_Rc(dadd, 0x02, 0x00),
-GEN_DFP_Tp_Ap_Bp_Rc(daddq, 0x02, 0x00),
-GEN_DFP_T_A_B_Rc(dsub, 0x02, 0x10),
-GEN_DFP_Tp_Ap_Bp_Rc(dsubq, 0x02, 0x10),
-GEN_DFP_T_A_B_Rc(dmul, 0x02, 0x01),
-GEN_DFP_Tp_Ap_Bp_Rc(dmulq, 0x02, 0x01),
-GEN_DFP_T_A_B_Rc(ddiv, 0x02, 0x11),
-GEN_DFP_Tp_Ap_Bp_Rc(ddivq, 0x02, 0x11),
-GEN_DFP_BF_A_B(dcmpu, 0x02, 0x14),
-GEN_DFP_BF_Ap_Bp(dcmpuq, 0x02, 0x14),
-GEN_DFP_BF_A_B(dcmpo, 0x02, 0x04),
-GEN_DFP_BF_Ap_Bp(dcmpoq, 0x02, 0x04),
-GEN_DFP_BF_A_DCM(dtstdc, 0x02, 0x06),
-GEN_DFP_BF_Ap_DCM(dtstdcq, 0x02, 0x06),
-GEN_DFP_BF_A_DCM(dtstdg, 0x02, 0x07),
-GEN_DFP_BF_Ap_DCM(dtstdgq, 0x02, 0x07),
-GEN_DFP_BF_A_B(dtstex, 0x02, 0x05),
-GEN_DFP_BF_Ap_Bp(dtstexq, 0x02, 0x05),
-GEN_DFP_BF_A_B(dtstsf, 0x02, 0x15),
-GEN_DFP_BF_A_Bp(dtstsfq, 0x02, 0x15),
-GEN_DFP_BF_A_B_300(dtstsfi, 0x03, 0x15),
-GEN_DFP_BF_A_Bp_300(dtstsfiq, 0x03, 0x15),
-GEN_DFP_TE_T_B_RMC_Rc(dquai, 0x03, 0x02),
-GEN_DFP_TE_Tp_Bp_RMC_Rc(dquaiq, 0x03, 0x02),
-GEN_DFP_T_A_B_RMC_Rc(dqua, 0x03, 0x00),
-GEN_DFP_Tp_Ap_Bp_RMC_Rc(dquaq, 0x03, 0x00),
-GEN_DFP_T_A_B_RMC_Rc(drrnd, 0x03, 0x01),
-GEN_DFP_Tp_A_Bp_RMC_Rc(drrndq, 0x03, 0x01),
-GEN_DFP_R_T_B_RMC_Rc(drintx, 0x03, 0x03),
-GEN_DFP_R_Tp_Bp_RMC_Rc(drintxq, 0x03, 0x03),
-GEN_DFP_R_T_B_RMC_Rc(drintn, 0x03, 0x07),
-GEN_DFP_R_Tp_Bp_RMC_Rc(drintnq, 0x03, 0x07),
-GEN_DFP_T_B_Rc(dctdp, 0x02, 0x08),
-GEN_DFP_Tp_B_Rc(dctqpq, 0x02, 0x08),
-GEN_DFP_T_B_Rc(drsp, 0x02, 0x18),
-GEN_DFP_Tp_Bp_Rc(drdpq, 0x02, 0x18),
-GEN_DFP_T_B_Rc(dcffix, 0x02, 0x19),
-GEN_DFP_Tp_B_Rc(dcffixq, 0x02, 0x19),
-GEN_DFP_T_B_Rc(dctfix, 0x02, 0x09),
-GEN_DFP_T_Bp_Rc(dctfixq, 0x02, 0x09),
-GEN_DFP_SP_T_B_Rc(ddedpd, 0x02, 0x0a),
-GEN_DFP_SP_Tp_Bp_Rc(ddedpdq, 0x02, 0x0a),
-GEN_DFP_S_T_B_Rc(denbcd, 0x02, 0x1a),
-GEN_DFP_S_Tp_Bp_Rc(denbcdq, 0x02, 0x1a),
-GEN_DFP_T_B_Rc(dxex, 0x02, 0x0b),
-GEN_DFP_T_Bp_Rc(dxexq, 0x02, 0x0b),
-GEN_DFP_T_A_B_Rc(diex, 0x02, 0x1b),
-GEN_DFP_Tp_A_Bp_Rc(diexq, 0x02, 0x1b),
-GEN_DFP_T_A_SH_Rc(dscli, 0x02, 0x02),
-GEN_DFP_Tp_Ap_SH_Rc(dscliq, 0x02, 0x02),
-GEN_DFP_T_A_SH_Rc(dscri, 0x02, 0x03),
-GEN_DFP_Tp_Ap_SH_Rc(dscriq, 0x02, 0x03),
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
new file mode 100644
index 00000000000..7fecff45797
--- /dev/null
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -0,0 +1,494 @@
+/*
+ * Power ISA decode for Fixed-Point Facility instructions
+ *
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Fixed-Point Load/Store Instructions
+ */
+
+static bool do_ldst(DisasContext *ctx, int rt, int ra, TCGv displ, bool update,
+                    bool store, MemOp mop)
+{
+    TCGv ea;
+
+    if (update && (ra == 0 || (!store && ra == rt))) {
+        gen_invalid(ctx);
+        return true;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+
+    ea = do_ea_calc(ctx, ra, displ);
+    mop ^= ctx->default_tcg_memop_mask;
+    if (store) {
+        tcg_gen_qemu_st_tl(cpu_gpr[rt], ea, ctx->mem_idx, mop);
+    } else {
+        tcg_gen_qemu_ld_tl(cpu_gpr[rt], ea, ctx->mem_idx, mop);
+    }
+    if (update) {
+        tcg_gen_mov_tl(cpu_gpr[ra], ea);
+    }
+    tcg_temp_free(ea);
+
+    return true;
+}
+
+static bool do_ldst_D(DisasContext *ctx, arg_D *a, bool update, bool store,
+                      MemOp mop)
+{
+    return do_ldst(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update, store, mop);
+}
+
+static bool do_ldst_PLS_D(DisasContext *ctx, arg_PLS_D *a, bool update,
+                          bool store, MemOp mop)
+{
+    arg_D d;
+    if (!resolve_PLS_D(ctx, &d, a)) {
+        return true;
+    }
+    return do_ldst_D(ctx, &d, update, store, mop);
+}
+
+static bool do_ldst_X(DisasContext *ctx, arg_X *a, bool update,
+                      bool store, MemOp mop)
+{
+    return do_ldst(ctx, a->rt, a->ra, cpu_gpr[a->rb], update, store, mop);
+}
+
+static bool do_ldst_quad(DisasContext *ctx, arg_D *a, bool store, bool prefixed)
+{
+#if defined(TARGET_PPC64)
+    TCGv ea;
+    TCGv_i64 low_addr_gpr, high_addr_gpr;
+    MemOp mop;
+
+    REQUIRE_INSNS_FLAGS(ctx, 64BX);
+
+    if (!prefixed && !(ctx->insns_flags2 & PPC2_LSQ_ISA207)) {
+        if (ctx->pr) {
+            /* lq and stq were privileged prior to V. 2.07 */
+            gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
+            return true;
+        }
+
+        if (ctx->le_mode) {
+            gen_align_no_le(ctx);
+            return true;
+        }
+    }
+
+    if (!store && unlikely(a->ra == a->rt)) {
+        gen_invalid(ctx);
+        return true;
+    }
+
+    gen_set_access_type(ctx, ACCESS_INT);
+    ea = do_ea_calc(ctx, a->ra, tcg_constant_tl(a->si));
+
+    if (prefixed || !ctx->le_mode) {
+        low_addr_gpr = cpu_gpr[a->rt];
+        high_addr_gpr = cpu_gpr[a->rt + 1];
+    } else {
+        low_addr_gpr = cpu_gpr[a->rt + 1];
+        high_addr_gpr = cpu_gpr[a->rt];
+    }
+
+    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
+        if (HAVE_ATOMIC128) {
+            mop = DEF_MEMOP(MO_128);
+            TCGv_i32 oi = tcg_constant_i32(make_memop_idx(mop, ctx->mem_idx));
+            if (store) {
+                if (ctx->le_mode) {
+                    gen_helper_stq_le_parallel(cpu_env, ea, low_addr_gpr,
+                                               high_addr_gpr, oi);
+                } else {
+                    gen_helper_stq_be_parallel(cpu_env, ea, high_addr_gpr,
+                                               low_addr_gpr, oi);
+
+                }
+            } else {
+                if (ctx->le_mode) {
+                    gen_helper_lq_le_parallel(low_addr_gpr, cpu_env, ea, oi);
+                    tcg_gen_ld_i64(high_addr_gpr, cpu_env,
+                                   offsetof(CPUPPCState, retxh));
+                } else {
+                    gen_helper_lq_be_parallel(high_addr_gpr, cpu_env, ea, oi);
+                    tcg_gen_ld_i64(low_addr_gpr, cpu_env,
+                                   offsetof(CPUPPCState, retxh));
+                }
+            }
+        } else {
+            /* Restart with exclusive lock.  */
+            gen_helper_exit_atomic(cpu_env);
+            ctx->base.is_jmp = DISAS_NORETURN;
+        }
+    } else {
+        mop = DEF_MEMOP(MO_Q);
+        if (store) {
+            tcg_gen_qemu_st_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
+        } else {
+            tcg_gen_qemu_ld_i64(low_addr_gpr, ea, ctx->mem_idx, mop);
+        }
+
+        gen_addr_add(ctx, ea, ea, 8);
+
+        if (store) {
+            tcg_gen_qemu_st_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
+        } else {
+            tcg_gen_qemu_ld_i64(high_addr_gpr, ea, ctx->mem_idx, mop);
+        }
+    }
+    tcg_temp_free(ea);
+#else
+    qemu_build_not_reached();
+#endif
+
+    return true;
+}
+
+static bool do_ldst_quad_PLS_D(DisasContext *ctx, arg_PLS_D *a, bool store)
+{
+    arg_D d;
+    if (!resolve_PLS_D(ctx, &d, a)) {
+        return true;
+    }
+
+    return do_ldst_quad(ctx, &d, store, true);
+}
+
+/* Load Byte and Zero */
+TRANS(LBZ, do_ldst_D, false, false, MO_UB)
+TRANS(LBZX, do_ldst_X, false, false, MO_UB)
+TRANS(LBZU, do_ldst_D, true, false, MO_UB)
+TRANS(LBZUX, do_ldst_X, true, false, MO_UB)
+TRANS(PLBZ, do_ldst_PLS_D, false, false, MO_UB)
+
+/* Load Halfword and Zero */
+TRANS(LHZ, do_ldst_D, false, false, MO_UW)
+TRANS(LHZX, do_ldst_X, false, false, MO_UW)
+TRANS(LHZU, do_ldst_D, true, false, MO_UW)
+TRANS(LHZUX, do_ldst_X, true, false, MO_UW)
+TRANS(PLHZ, do_ldst_PLS_D, false, false, MO_UW)
+
+/* Load Halfword Algebraic */
+TRANS(LHA, do_ldst_D, false, false, MO_SW)
+TRANS(LHAX, do_ldst_X, false, false, MO_SW)
+TRANS(LHAU, do_ldst_D, true, false, MO_SW)
+TRANS(LHAXU, do_ldst_X, true, false, MO_SW)
+TRANS(PLHA, do_ldst_PLS_D, false, false, MO_SW)
+
+/* Load Word and Zero */
+TRANS(LWZ, do_ldst_D, false, false, MO_UL)
+TRANS(LWZX, do_ldst_X, false, false, MO_UL)
+TRANS(LWZU, do_ldst_D, true, false, MO_UL)
+TRANS(LWZUX, do_ldst_X, true, false, MO_UL)
+TRANS(PLWZ, do_ldst_PLS_D, false, false, MO_UL)
+
+/* Load Word Algebraic */
+TRANS64(LWA, do_ldst_D, false, false, MO_SL)
+TRANS64(LWAX, do_ldst_X, false, false, MO_SL)
+TRANS64(LWAUX, do_ldst_X, true, false, MO_SL)
+TRANS64(PLWA, do_ldst_PLS_D, false, false, MO_SL)
+
+/* Load Doubleword */
+TRANS64(LD, do_ldst_D, false, false, MO_Q)
+TRANS64(LDX, do_ldst_X, false, false, MO_Q)
+TRANS64(LDU, do_ldst_D, true, false, MO_Q)
+TRANS64(LDUX, do_ldst_X, true, false, MO_Q)
+TRANS64(PLD, do_ldst_PLS_D, false, false, MO_Q)
+
+/* Load Quadword */
+TRANS64(LQ, do_ldst_quad, false, false);
+TRANS64(PLQ, do_ldst_quad_PLS_D, false);
+
+/* Store Byte */
+TRANS(STB, do_ldst_D, false, true, MO_UB)
+TRANS(STBX, do_ldst_X, false, true, MO_UB)
+TRANS(STBU, do_ldst_D, true, true, MO_UB)
+TRANS(STBUX, do_ldst_X, true, true, MO_UB)
+TRANS(PSTB, do_ldst_PLS_D, false, true, MO_UB)
+
+/* Store Halfword */
+TRANS(STH, do_ldst_D, false, true, MO_UW)
+TRANS(STHX, do_ldst_X, false, true, MO_UW)
+TRANS(STHU, do_ldst_D, true, true, MO_UW)
+TRANS(STHUX, do_ldst_X, true, true, MO_UW)
+TRANS(PSTH, do_ldst_PLS_D, false, true, MO_UW)
+
+/* Store Word */
+TRANS(STW, do_ldst_D, false, true, MO_UL)
+TRANS(STWX, do_ldst_X, false, true, MO_UL)
+TRANS(STWU, do_ldst_D, true, true, MO_UL)
+TRANS(STWUX, do_ldst_X, true, true, MO_UL)
+TRANS(PSTW, do_ldst_PLS_D, false, true, MO_UL)
+
+/* Store Doubleword */
+TRANS64(STD, do_ldst_D, false, true, MO_Q)
+TRANS64(STDX, do_ldst_X, false, true, MO_Q)
+TRANS64(STDU, do_ldst_D, true, true, MO_Q)
+TRANS64(STDUX, do_ldst_X, true, true, MO_Q)
+TRANS64(PSTD, do_ldst_PLS_D, false, true, MO_Q)
+
+/* Store Quadword */
+TRANS64(STQ, do_ldst_quad, true, false);
+TRANS64(PSTQ, do_ldst_quad_PLS_D, true);
+
+/*
+ * Fixed-Point Compare Instructions
+ */
+
+static bool do_cmp_X(DisasContext *ctx, arg_X_bfl *a, bool s)
+{
+    if ((ctx->insns_flags & PPC_64B) == 0) {
+        /*
+         * For 32-bit implementations, The Programming Environments Manual says
+         * that "the L field must be cleared, otherwise the instruction form is
+         * invalid." It seems, however, that most 32-bit CPUs ignore invalid
+         * forms (e.g., section "Instruction Formats" of the 405 and 440
+         * manuals, "Integer Compare Instructions" of the 601 manual), with the
+         * notable exception of the e500 and e500mc, where L=1 was reported to
+         * cause an exception.
+         */
+        if (a->l) {
+            if ((ctx->insns_flags2 & PPC2_BOOKE206)) {
+                /*
+                 * For 32-bit Book E v2.06 implementations (i.e. e500/e500mc),
+                 * generate an illegal instruction exception.
+                 */
+                return false;
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                        "Invalid form of CMP%s at 0x" TARGET_FMT_lx ", L = 1\n",
+                        s ? "" : "L", ctx->cia);
+            }
+        }
+        gen_op_cmp32(cpu_gpr[a->ra], cpu_gpr[a->rb], s, a->bf);
+        return true;
+    }
+
+    /* For 64-bit implementations, deal with bit L accordingly. */
+    if (a->l) {
+        gen_op_cmp(cpu_gpr[a->ra], cpu_gpr[a->rb], s, a->bf);
+    } else {
+        gen_op_cmp32(cpu_gpr[a->ra], cpu_gpr[a->rb], s, a->bf);
+    }
+    return true;
+}
+
+static bool do_cmp_D(DisasContext *ctx, arg_D_bf *a, bool s)
+{
+    if ((ctx->insns_flags & PPC_64B) == 0) {
+        /*
+         * For 32-bit implementations, The Programming Environments Manual says
+         * that "the L field must be cleared, otherwise the instruction form is
+         * invalid." It seems, however, that most 32-bit CPUs ignore invalid
+         * forms (e.g., section "Instruction Formats" of the 405 and 440
+         * manuals, "Integer Compare Instructions" of the 601 manual), with the
+         * notable exception of the e500 and e500mc, where L=1 was reported to
+         * cause an exception.
+         */
+        if (a->l) {
+            if ((ctx->insns_flags2 & PPC2_BOOKE206)) {
+                /*
+                 * For 32-bit Book E v2.06 implementations (i.e. e500/e500mc),
+                 * generate an illegal instruction exception.
+                 */
+                return false;
+            } else {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                        "Invalid form of CMP%s at 0x" TARGET_FMT_lx ", L = 1\n",
+                        s ? "I" : "LI", ctx->cia);
+            }
+        }
+        gen_op_cmp32(cpu_gpr[a->ra], tcg_constant_tl(a->imm), s, a->bf);
+        return true;
+    }
+
+    /* For 64-bit implementations, deal with bit L accordingly. */
+    if (a->l) {
+        gen_op_cmp(cpu_gpr[a->ra], tcg_constant_tl(a->imm), s, a->bf);
+    } else {
+        gen_op_cmp32(cpu_gpr[a->ra], tcg_constant_tl(a->imm), s, a->bf);
+    }
+    return true;
+}
+
+TRANS(CMP, do_cmp_X, true);
+TRANS(CMPL, do_cmp_X, false);
+TRANS(CMPI, do_cmp_D, true);
+TRANS(CMPLI, do_cmp_D, false);
+
+/*
+ * Fixed-Point Arithmetic Instructions
+ */
+
+static bool trans_ADDI(DisasContext *ctx, arg_D *a)
+{
+    if (a->ra) {
+        tcg_gen_addi_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si);
+    } else {
+        tcg_gen_movi_tl(cpu_gpr[a->rt], a->si);
+    }
+    return true;
+}
+
+static bool trans_PADDI(DisasContext *ctx, arg_PLS_D *a)
+{
+    arg_D d;
+    if (!resolve_PLS_D(ctx, &d, a)) {
+        return true;
+    }
+    return trans_ADDI(ctx, &d);
+}
+
+static bool trans_ADDIS(DisasContext *ctx, arg_D *a)
+{
+    a->si <<= 16;
+    return trans_ADDI(ctx, a);
+}
+
+static bool trans_ADDPCIS(DisasContext *ctx, arg_DX *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    tcg_gen_movi_tl(cpu_gpr[a->rt], ctx->base.pc_next + (a->d << 16));
+    return true;
+}
+
+static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a)
+{
+    gen_invalid(ctx);
+    return true;
+}
+
+static bool trans_PNOP(DisasContext *ctx, arg_PNOP *a)
+{
+    return true;
+}
+
+static bool do_set_bool_cond(DisasContext *ctx, arg_X_bi *a, bool neg, bool rev)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    uint32_t mask = 0x08 >> (a->bi & 0x03);
+    TCGCond cond = rev ? TCG_COND_EQ : TCG_COND_NE;
+    TCGv temp = tcg_temp_new();
+
+    tcg_gen_extu_i32_tl(temp, cpu_crf[a->bi >> 2]);
+    tcg_gen_andi_tl(temp, temp, mask);
+    tcg_gen_setcondi_tl(cond, cpu_gpr[a->rt], temp, 0);
+    if (neg) {
+        tcg_gen_neg_tl(cpu_gpr[a->rt], cpu_gpr[a->rt]);
+    }
+    tcg_temp_free(temp);
+
+    return true;
+}
+
+TRANS(SETBC, do_set_bool_cond, false, false)
+TRANS(SETBCR, do_set_bool_cond, false, true)
+TRANS(SETNBC, do_set_bool_cond, true, false)
+TRANS(SETNBCR, do_set_bool_cond, true, true)
+
+static bool trans_CFUGED(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    gen_helper_CFUGED(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
+
+static void do_cntzdm(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 mask, int64_t trail)
+{
+    TCGv_i64 t0, t1;
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    tcg_gen_and_i64(t0, src, mask);
+    if (trail) {
+        tcg_gen_ctzi_i64(t0, t0, -1);
+    } else {
+        tcg_gen_clzi_i64(t0, t0, -1);
+    }
+
+    tcg_gen_setcondi_i64(TCG_COND_NE, t1, t0, -1);
+    tcg_gen_andi_i64(t0, t0, 63);
+    tcg_gen_xori_i64(t0, t0, 63);
+    if (trail) {
+        tcg_gen_shl_i64(t0, mask, t0);
+        tcg_gen_shl_i64(t0, t0, t1);
+    } else {
+        tcg_gen_shr_i64(t0, mask, t0);
+        tcg_gen_shr_i64(t0, t0, t1);
+    }
+
+    tcg_gen_ctpop_i64(dst, t0);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static bool trans_CNTLZDM(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    do_cntzdm(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb], false);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
+
+static bool trans_CNTTZDM(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    do_cntzdm(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb], true);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
+
+static bool trans_PDEPD(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    gen_helper_PDEPD(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
+
+static bool trans_PEXTD(DisasContext *ctx, arg_X *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+#if defined(TARGET_PPC64)
+    gen_helper_PEXTD(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]);
+#else
+    qemu_build_not_reached();
+#endif
+    return true;
+}
diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc
index 9f7868ee280..c9e05201d9e 100644
--- a/target/ppc/translate/fp-impl.c.inc
+++ b/target/ppc/translate/fp-impl.c.inc
@@ -854,99 +854,6 @@ static void gen_mtfsfi(DisasContext *ctx)
     gen_helper_float_check_status(cpu_env);
 }
 
-/***                         Floating-point load                           ***/
-#define GEN_LDF(name, ldop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(rD(ctx->opcode), t0);                                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_LDUF(name, ldop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(rD(ctx->opcode), t0);                                             \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_LDUXF(name, ldop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64();                                                  \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(rD(ctx->opcode), t0);                                             \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
-static void glue(gen_, name##x)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, t0, EA);                                             \
-    set_fpr(rD(ctx->opcode), t0);                                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_LDFS(name, ldop, op, type)                                        \
-GEN_LDF(name, ldop, op | 0x20, type);                                         \
-GEN_LDUF(name, ldop, op | 0x21, type);                                        \
-GEN_LDUXF(name, ldop, op | 0x01, type);                                       \
-GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
-
 static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -955,11 +862,6 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr)
     tcg_temp_free_i32(tmp);
 }
 
- /* lfd lfdu lfdux lfdx */
-GEN_LDFS(lfd, ld64_i64, 0x12, PPC_FLOAT);
- /* lfs lfsu lfsux lfsx */
-GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT);
-
 /* lfdepx (external PID lfdx) */
 static void gen_lfdepx(DisasContext *ctx)
 {
@@ -1089,73 +991,6 @@ static void gen_lfiwzx(DisasContext *ctx)
     tcg_temp_free(EA);
     tcg_temp_free_i64(t0);
 }
-/***                         Floating-point store                          ***/
-#define GEN_STF(name, stop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                               \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    get_fpr(t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_STUF(name, stop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                            \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    get_fpr(t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
-
-#define GEN_STUXF(name, stop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                           \
-{                                                                             \
-    TCGv EA;                                                                  \
-    TCGv_i64 t0;                                                              \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    t0 = tcg_temp_new_i64();                                                  \
-    gen_addr_reg_index(ctx, EA);                                              \
-    get_fpr(t0, rS(ctx->opcode));                                             \
-    gen_qemu_##stop(ctx, t0, EA);                                             \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-    tcg_temp_free_i64(t0);                                                    \
-}
 
 #define GEN_STXF(name, stop, opc2, opc3, type)                                \
 static void glue(gen_, name##x)(DisasContext *ctx)                            \
@@ -1176,12 +1011,6 @@ static void glue(gen_, name##x)(DisasContext *ctx)                            \
     tcg_temp_free_i64(t0);                                                    \
 }
 
-#define GEN_STFS(name, stop, op, type)                                        \
-GEN_STF(name, stop, op | 0x20, type);                                         \
-GEN_STUF(name, stop, op | 0x21, type);                                        \
-GEN_STUXF(name, stop, op | 0x01, type);                                       \
-GEN_STXF(name, stop, 0x17, op | 0x00, type)
-
 static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
@@ -1190,11 +1019,6 @@ static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr)
     tcg_temp_free_i32(tmp);
 }
 
-/* stfd stfdu stfdux stfdx */
-GEN_STFS(stfd, st64_i64, 0x16, PPC_FLOAT);
-/* stfs stfsu stfsux stfsx */
-GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT);
-
 /* stfdepx (external PID lfdx) */
 static void gen_stfdepx(DisasContext *ctx)
 {
@@ -1473,6 +1297,91 @@ static void gen_stfqx(DisasContext *ctx)
     tcg_temp_free_i64(t1);
 }
 
+/*            Floating-point Load/Store Instructions                         */
+static bool do_lsfpsd(DisasContext *ctx, int rt, int ra, TCGv displ,
+                      bool update, bool store, bool single)
+{
+    TCGv ea;
+    TCGv_i64 t0;
+    REQUIRE_INSNS_FLAGS(ctx, FLOAT);
+    REQUIRE_FPU(ctx);
+    if (update && ra == 0) {
+        gen_invalid(ctx);
+        return true;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new_i64();
+    ea = do_ea_calc(ctx, ra, displ);
+    if (store) {
+        get_fpr(t0, rt);
+        if (single) {
+            gen_qemu_st32fs(ctx, t0, ea);
+        } else {
+            gen_qemu_st64_i64(ctx, t0, ea);
+        }
+    } else {
+        if (single) {
+            gen_qemu_ld32fs(ctx, t0, ea);
+        } else {
+            gen_qemu_ld64_i64(ctx, t0, ea);
+        }
+        set_fpr(rt, t0);
+    }
+    if (update) {
+        tcg_gen_mov_tl(cpu_gpr[ra], ea);
+    }
+    tcg_temp_free_i64(t0);
+    tcg_temp_free(ea);
+    return true;
+}
+
+static bool do_lsfp_D(DisasContext *ctx, arg_D *a, bool update, bool store,
+                      bool single)
+{
+    return do_lsfpsd(ctx, a->rt, a->ra, tcg_constant_tl(a->si), update, store,
+                     single);
+}
+
+static bool do_lsfp_PLS_D(DisasContext *ctx, arg_PLS_D *a, bool update,
+                          bool store, bool single)
+{
+    arg_D d;
+    if (!resolve_PLS_D(ctx, &d, a)) {
+        return true;
+    }
+    return do_lsfp_D(ctx, &d, update, store, single);
+}
+
+static bool do_lsfp_X(DisasContext *ctx, arg_X *a, bool update,
+                      bool store, bool single)
+{
+    return do_lsfpsd(ctx, a->rt, a->ra, cpu_gpr[a->rb], update, store, single);
+}
+
+TRANS(LFS, do_lsfp_D, false, false, true)
+TRANS(LFSU, do_lsfp_D, true, false, true)
+TRANS(LFSX, do_lsfp_X, false, false, true)
+TRANS(LFSUX, do_lsfp_X, true, false, true)
+TRANS(PLFS, do_lsfp_PLS_D, false, false, true)
+
+TRANS(LFD, do_lsfp_D, false, false, false)
+TRANS(LFDU, do_lsfp_D, true, false, false)
+TRANS(LFDX, do_lsfp_X, false, false, false)
+TRANS(LFDUX, do_lsfp_X, true, false, false)
+TRANS(PLFD, do_lsfp_PLS_D, false, false, false)
+
+TRANS(STFS, do_lsfp_D, false, true, true)
+TRANS(STFSU, do_lsfp_D, true, true, true)
+TRANS(STFSX, do_lsfp_X, false, true, true)
+TRANS(STFSUX, do_lsfp_X, true, true, true)
+TRANS(PSTFS, do_lsfp_PLS_D, false, true, true)
+
+TRANS(STFD, do_lsfp_D, false, true, false)
+TRANS(STFDU, do_lsfp_D, true, true, false)
+TRANS(STFDX, do_lsfp_X, false, true, false)
+TRANS(STFDUX, do_lsfp_X, true, true, false)
+TRANS(PSTFD, do_lsfp_PLS_D, false, true, false)
+
 #undef _GEN_FLOAT_ACB
 #undef GEN_FLOAT_ACB
 #undef _GEN_FLOAT_AB
diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc
index 88fab656281..4260635a126 100644
--- a/target/ppc/translate/fp-ops.c.inc
+++ b/target/ppc/translate/fp-ops.c.inc
@@ -50,43 +50,14 @@ GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT),
 GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT),
 GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT),
 
-#define GEN_LDF(name, ldop, opc, type)                                        \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDUF(name, ldop, opc, type)                                       \
-GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDUXF(name, ldop, opc, type)                                      \
-GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
-#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
-GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_LDFS(name, ldop, op, type)                                        \
-GEN_LDF(name, ldop, op | 0x20, type)                                          \
-GEN_LDUF(name, ldop, op | 0x21, type)                                         \
-GEN_LDUXF(name, ldop, op | 0x01, type)                                        \
-GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
-
-GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT)
-GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT)
 GEN_HANDLER_E(lfdepx, 0x1F, 0x1F, 0x12, 0x00000001, PPC_NONE, PPC2_BOOKE206),
 GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206),
 GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x00200001, PPC_NONE, PPC2_ISA205),
 
-#define GEN_STF(name, stop, opc, type)                                        \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STUF(name, stop, opc, type)                                       \
-GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STUXF(name, stop, opc, type)                                      \
-GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
 #define GEN_STXF(name, stop, opc2, opc3, type)                                \
 GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_STFS(name, stop, op, type)                                        \
-GEN_STF(name, stop, op | 0x20, type)                                          \
-GEN_STUF(name, stop, op | 0x21, type)                                         \
-GEN_STUXF(name, stop, op | 0x01, type)                                        \
-GEN_STXF(name, stop, 0x17, op | 0x00, type)
 
-GEN_STFS(stfd, st64_i64, 0x16, PPC_FLOAT)
-GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT)
 GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX)
 GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x00000001, PPC_NONE, PPC2_BOOKE206),
 GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205),
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 92b9527aff3..8eb8d3a0672 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -1217,10 +1217,6 @@ GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
 GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
 GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
 GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
-GEN_VXFORM_UIMM_SPLAT(vinsertb, 6, 12, 15);
-GEN_VXFORM_UIMM_SPLAT(vinserth, 6, 13, 14);
-GEN_VXFORM_UIMM_SPLAT(vinsertw, 6, 14, 12);
-GEN_VXFORM_UIMM_SPLAT(vinsertd, 6, 15, 8);
 GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
 GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
 GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
@@ -1231,12 +1227,184 @@ GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
                 vextractuh, PPC_NONE, PPC2_ISA300);
 GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
                 vextractuw, PPC_NONE, PPC2_ISA300);
-GEN_VXFORM_DUAL(vspltisb, PPC_ALTIVEC, PPC_NONE,
-                vinsertb, PPC_NONE, PPC2_ISA300);
-GEN_VXFORM_DUAL(vspltish, PPC_ALTIVEC, PPC_NONE,
-                vinserth, PPC_NONE, PPC2_ISA300);
-GEN_VXFORM_DUAL(vspltisw, PPC_ALTIVEC, PPC_NONE,
-                vinsertw, PPC_NONE, PPC2_ISA300);
+
+static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right,
+               void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv))
+{
+    TCGv_ptr vrt, vra, vrb;
+    TCGv rc;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    vrt = gen_avr_ptr(a->vrt);
+    vra = gen_avr_ptr(a->vra);
+    vrb = gen_avr_ptr(a->vrb);
+    rc = tcg_temp_new();
+
+    tcg_gen_andi_tl(rc, cpu_gpr[a->rc], 0x1F);
+    if (right) {
+        tcg_gen_subfi_tl(rc, 32 - size, rc);
+    }
+    gen_helper(cpu_env, vrt, vra, vrb, rc);
+
+    tcg_temp_free_ptr(vrt);
+    tcg_temp_free_ptr(vra);
+    tcg_temp_free_ptr(vrb);
+    tcg_temp_free(rc);
+    return true;
+}
+
+TRANS(VEXTDUBVLX, do_vextdx, 1, false, gen_helper_VEXTDUBVLX)
+TRANS(VEXTDUHVLX, do_vextdx, 2, false, gen_helper_VEXTDUHVLX)
+TRANS(VEXTDUWVLX, do_vextdx, 4, false, gen_helper_VEXTDUWVLX)
+TRANS(VEXTDDVLX, do_vextdx, 8, false, gen_helper_VEXTDDVLX)
+
+TRANS(VEXTDUBVRX, do_vextdx, 1, true, gen_helper_VEXTDUBVLX)
+TRANS(VEXTDUHVRX, do_vextdx, 2, true, gen_helper_VEXTDUHVLX)
+TRANS(VEXTDUWVRX, do_vextdx, 4, true, gen_helper_VEXTDUWVLX)
+TRANS(VEXTDDVRX, do_vextdx, 8, true, gen_helper_VEXTDDVLX)
+
+static bool do_vinsx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
+            TCGv_i64 rb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    TCGv_ptr t;
+    TCGv idx;
+
+    t = gen_avr_ptr(vrt);
+    idx = tcg_temp_new();
+
+    tcg_gen_andi_tl(idx, ra, 0xF);
+    if (right) {
+        tcg_gen_subfi_tl(idx, 16 - size, idx);
+    }
+
+    gen_helper(cpu_env, t, rb, idx);
+
+    tcg_temp_free_ptr(t);
+    tcg_temp_free(idx);
+
+    return true;
+}
+
+static bool do_vinsvx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
+                int vrb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    bool ok;
+    TCGv_i64 val;
+
+    val = tcg_temp_new_i64();
+    get_avr64(val, vrb, true);
+    ok = do_vinsx(ctx, vrt, size, right, ra, val, gen_helper);
+
+    tcg_temp_free_i64(val);
+    return ok;
+}
+
+static bool do_vinsx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
+                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    bool ok;
+    TCGv_i64 val;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    val = tcg_temp_new_i64();
+    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
+
+    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);
+
+    tcg_temp_free_i64(val);
+    return ok;
+}
+
+static bool do_vinsvx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
+                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    return do_vinsvx(ctx, a->vrt, size, right, cpu_gpr[a->vra], a->vrb,
+                     gen_helper);
+}
+
+static bool do_vins_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
+                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    bool ok;
+    TCGv_i64 val;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    if (a->uim > (16 - size)) {
+        /*
+         * PowerISA v3.1 says that the resulting value is undefined in this
+         * case, so just log a guest error and leave VRT unchanged. The
+         * real hardware would do a partial insert, e.g. if VRT is zeroed and
+         * RB is 0x12345678, executing "vinsw VRT,RB,14" results in
+         * VRT = 0x0000...00001234, but we don't bother to reproduce this
+         * behavior as software shouldn't rely on it.
+         */
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS* at"
+            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
+            16 - size);
+        return true;
+    }
+
+    val = tcg_temp_new_i64();
+    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
+
+    ok = do_vinsx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), val,
+                  gen_helper);
+
+    tcg_temp_free_i64(val);
+    return ok;
+}
+
+static bool do_vinsert_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
+                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    REQUIRE_VECTOR(ctx);
+
+    if (a->uim > (16 - size)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINSERT* at"
+            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
+            16 - size);
+        return true;
+    }
+
+    return do_vinsvx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), a->vrb,
+                     gen_helper);
+}
+
+TRANS(VINSBLX, do_vinsx_VX, 1, false, gen_helper_VINSBLX)
+TRANS(VINSHLX, do_vinsx_VX, 2, false, gen_helper_VINSHLX)
+TRANS(VINSWLX, do_vinsx_VX, 4, false, gen_helper_VINSWLX)
+TRANS(VINSDLX, do_vinsx_VX, 8, false, gen_helper_VINSDLX)
+
+TRANS(VINSBRX, do_vinsx_VX, 1, true, gen_helper_VINSBLX)
+TRANS(VINSHRX, do_vinsx_VX, 2, true, gen_helper_VINSHLX)
+TRANS(VINSWRX, do_vinsx_VX, 4, true, gen_helper_VINSWLX)
+TRANS(VINSDRX, do_vinsx_VX, 8, true, gen_helper_VINSDLX)
+
+TRANS(VINSW, do_vins_VX_uim4, 4, gen_helper_VINSWLX)
+TRANS(VINSD, do_vins_VX_uim4, 8, gen_helper_VINSDLX)
+
+TRANS(VINSBVLX, do_vinsvx_VX, 1, false, gen_helper_VINSBLX)
+TRANS(VINSHVLX, do_vinsvx_VX, 2, false, gen_helper_VINSHLX)
+TRANS(VINSWVLX, do_vinsvx_VX, 4, false, gen_helper_VINSWLX)
+
+TRANS(VINSBVRX, do_vinsvx_VX, 1, true, gen_helper_VINSBLX)
+TRANS(VINSHVRX, do_vinsvx_VX, 2, true, gen_helper_VINSHLX)
+TRANS(VINSWVRX, do_vinsvx_VX, 4, true, gen_helper_VINSWLX)
+
+TRANS(VINSERTB, do_vinsert_VX_uim4, 1, gen_helper_VINSBLX)
+TRANS(VINSERTH, do_vinsert_VX_uim4, 2, gen_helper_VINSHLX)
+TRANS(VINSERTW, do_vinsert_VX_uim4, 4, gen_helper_VINSWLX)
+TRANS(VINSERTD, do_vinsert_VX_uim4, 8, gen_helper_VINSDLX)
 
 static void gen_vsldoi(DisasContext *ctx)
 {
@@ -1257,6 +1425,72 @@ static void gen_vsldoi(DisasContext *ctx)
     tcg_temp_free_i32(sh);
 }
 
+static bool trans_VSLDBI(DisasContext *ctx, arg_VN *a)
+{
+    TCGv_i64 t0, t1, t2;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    get_avr64(t0, a->vra, true);
+    get_avr64(t1, a->vra, false);
+
+    if (a->sh != 0) {
+        t2 = tcg_temp_new_i64();
+
+        get_avr64(t2, a->vrb, true);
+
+        tcg_gen_extract2_i64(t0, t1, t0, 64 - a->sh);
+        tcg_gen_extract2_i64(t1, t2, t1, 64 - a->sh);
+
+        tcg_temp_free_i64(t2);
+    }
+
+    set_avr64(a->vrt, t0, true);
+    set_avr64(a->vrt, t1, false);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
+    return true;
+}
+
+static bool trans_VSRDBI(DisasContext *ctx, arg_VN *a)
+{
+    TCGv_i64 t2, t1, t0;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    get_avr64(t0, a->vrb, false);
+    get_avr64(t1, a->vrb, true);
+
+    if (a->sh != 0) {
+        t2 = tcg_temp_new_i64();
+
+        get_avr64(t2, a->vra, false);
+
+        tcg_gen_extract2_i64(t0, t0, t1, a->sh);
+        tcg_gen_extract2_i64(t1, t1, t2, a->sh);
+
+        tcg_temp_free_i64(t2);
+    }
+
+    set_avr64(a->vrt, t0, false);
+    set_avr64(a->vrt, t1, true);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
+    return true;
+}
+
 #define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
 static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
     {                                                                   \
@@ -1559,6 +1793,86 @@ GEN_VXFORM3(vpermxor, 22, 0xFF)
 GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
                 vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
 
+static bool trans_VCFUGED(DisasContext *ctx, arg_VX *a)
+{
+    static const GVecGen3 g = {
+        .fni8 = gen_helper_CFUGED,
+        .vece = MO_64,
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &g);
+
+    return true;
+}
+
+static bool trans_VCLZDM(DisasContext *ctx, arg_VX *a)
+{
+    static const GVecGen3i g = {
+        .fni8 = do_cntzdm,
+        .vece = MO_64,
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                    avr_full_offset(a->vrb), 16, 16, false, &g);
+
+    return true;
+}
+
+static bool trans_VCTZDM(DisasContext *ctx, arg_VX *a)
+{
+    static const GVecGen3i g = {
+        .fni8 = do_cntzdm,
+        .vece = MO_64,
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                    avr_full_offset(a->vrb), 16, 16, true, &g);
+
+    return true;
+}
+
+static bool trans_VPDEPD(DisasContext *ctx, arg_VX *a)
+{
+    static const GVecGen3 g = {
+        .fni8 = gen_helper_PDEPD,
+        .vece = MO_64,
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &g);
+
+    return true;
+}
+
+static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a)
+{
+    static const GVecGen3 g = {
+        .fni8 = gen_helper_PEXTD,
+        .vece = MO_64,
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &g);
+
+    return true;
+}
+
 #undef GEN_VR_LDX
 #undef GEN_VR_STX
 #undef GEN_VR_LVE
diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index f3f48551112..25ee715b432 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -225,13 +225,9 @@ GEN_VXFORM_DUAL_INV(vsplth, vextractuh, 6, 9, 0x00000000, 0x100000,
 GEN_VXFORM_DUAL_INV(vspltw, vextractuw, 6, 10, 0x00000000, 0x100000,
                                                PPC_ALTIVEC),
 GEN_VXFORM_300_EXT(vextractd, 6, 11, 0x100000),
-GEN_VXFORM_DUAL_INV(vspltisb, vinsertb, 6, 12, 0x00000000, 0x100000,
-                                               PPC_ALTIVEC),
-GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x00000000, 0x100000,
-                                               PPC_ALTIVEC),
-GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x00000000, 0x100000,
-                                               PPC_ALTIVEC),
-GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x100000),
+GEN_VXFORM(vspltisb, 6, 12),
+GEN_VXFORM(vspltish, 6, 13),
+GEN_VXFORM(vspltisw, 6, 14),
 GEN_VXFORM_300_EO(vnegw, 0x01, 0x18, 0x06),
 GEN_VXFORM_300_EO(vnegd, 0x01, 0x18, 0x07),
 GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10),
diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc
index b817d31260b..c0e38060b45 100644
--- a/target/ppc/translate/vsx-impl.c.inc
+++ b/target/ppc/translate/vsx-impl.c.inc
@@ -1,23 +1,13 @@
 /***                           VSX extension                               ***/
 
-static inline void get_cpu_vsrh(TCGv_i64 dst, int n)
+static inline void get_cpu_vsr(TCGv_i64 dst, int n, bool high)
 {
-    tcg_gen_ld_i64(dst, cpu_env, vsr64_offset(n, true));
+    tcg_gen_ld_i64(dst, cpu_env, vsr64_offset(n, high));
 }
 
-static inline void get_cpu_vsrl(TCGv_i64 dst, int n)
+static inline void set_cpu_vsr(int n, TCGv_i64 src, bool high)
 {
-    tcg_gen_ld_i64(dst, cpu_env, vsr64_offset(n, false));
-}
-
-static inline void set_cpu_vsrh(int n, TCGv_i64 src)
-{
-    tcg_gen_st_i64(src, cpu_env, vsr64_offset(n, true));
-}
-
-static inline void set_cpu_vsrl(int n, TCGv_i64 src)
-{
-    tcg_gen_st_i64(src, cpu_env, vsr64_offset(n, false));
+    tcg_gen_st_i64(src, cpu_env, vsr64_offset(n, high));
 }
 
 static inline TCGv_ptr gen_vsr_ptr(int reg)
@@ -41,7 +31,7 @@ static void gen_##name(DisasContext *ctx)                     \
     EA = tcg_temp_new();                                      \
     gen_addr_reg_index(ctx, EA);                              \
     gen_qemu_##operation(ctx, t0, EA);                        \
-    set_cpu_vsrh(xT(ctx->opcode), t0);                        \
+    set_cpu_vsr(xT(ctx->opcode), t0, true);                   \
     /* NOTE: cpu_vsrl is undefined */                         \
     tcg_temp_free(EA);                                        \
     tcg_temp_free_i64(t0);                                    \
@@ -67,10 +57,10 @@ static void gen_lxvd2x(DisasContext *ctx)
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
     gen_qemu_ld64_i64(ctx, t0, EA);
-    set_cpu_vsrh(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, true);
     tcg_gen_addi_tl(EA, EA, 8);
     gen_qemu_ld64_i64(ctx, t0, EA);
-    set_cpu_vsrl(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, false);
     tcg_temp_free(EA);
     tcg_temp_free_i64(t0);
 }
@@ -109,8 +99,8 @@ static void gen_lxvw4x(DisasContext *ctx)
         tcg_gen_addi_tl(EA, EA, 8);
         tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
     }
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -139,7 +129,7 @@ static void gen_lxvwsx(DisasContext *ctx)
     gen_addr_reg_index(ctx, EA);
 
     data = tcg_temp_new_i32();
-    tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, MO_TEUL);
+    tcg_gen_qemu_ld_i32(data, EA, ctx->mem_idx, DEF_MEMOP(MO_UL));
     tcg_gen_gvec_dup_i32(MO_UL, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
 
     tcg_temp_free(EA);
@@ -162,7 +152,7 @@ static void gen_lxvdsx(DisasContext *ctx)
     gen_addr_reg_index(ctx, EA);
 
     data = tcg_temp_new_i64();
-    tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, MO_TEQ);
+    tcg_gen_qemu_ld_i64(data, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
     tcg_gen_gvec_dup_i64(MO_Q, vsr_full_offset(xT(ctx->opcode)), 16, 16, data);
 
     tcg_temp_free(EA);
@@ -233,8 +223,8 @@ static void gen_lxvh8x(DisasContext *ctx)
     if (ctx->le_mode) {
         gen_bswap16x8(xth, xtl, xth, xtl);
     }
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -258,121 +248,13 @@ static void gen_lxvb16x(DisasContext *ctx)
     tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
     tcg_gen_addi_tl(EA, EA, 8);
     tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
     tcg_temp_free(EA);
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
 }
 
-#define VSX_VECTOR_LOAD(name, op, indexed)                  \
-static void gen_##name(DisasContext *ctx)                   \
-{                                                           \
-    int xt;                                                 \
-    TCGv EA;                                                \
-    TCGv_i64 xth;                                           \
-    TCGv_i64 xtl;                                           \
-                                                            \
-    if (indexed) {                                          \
-        xt = xT(ctx->opcode);                               \
-    } else {                                                \
-        xt = DQxT(ctx->opcode);                             \
-    }                                                       \
-                                                            \
-    if (xt < 32) {                                          \
-        if (unlikely(!ctx->vsx_enabled)) {                  \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
-            return;                                         \
-        }                                                   \
-    } else {                                                \
-        if (unlikely(!ctx->altivec_enabled)) {              \
-            gen_exception(ctx, POWERPC_EXCP_VPU);           \
-            return;                                         \
-        }                                                   \
-    }                                                       \
-    xth = tcg_temp_new_i64();                               \
-    xtl = tcg_temp_new_i64();                               \
-    gen_set_access_type(ctx, ACCESS_INT);                   \
-    EA = tcg_temp_new();                                    \
-    if (indexed) {                                          \
-        gen_addr_reg_index(ctx, EA);                        \
-    } else {                                                \
-        gen_addr_imm_index(ctx, EA, 0x0F);                  \
-    }                                                       \
-    if (ctx->le_mode) {                                     \
-        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
-        set_cpu_vsrl(xt, xtl);                              \
-        tcg_gen_addi_tl(EA, EA, 8);                         \
-        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
-        set_cpu_vsrh(xt, xth);                              \
-    } else {                                                \
-        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
-        set_cpu_vsrh(xt, xth);                              \
-        tcg_gen_addi_tl(EA, EA, 8);                         \
-        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
-        set_cpu_vsrl(xt, xtl);                              \
-    }                                                       \
-    tcg_temp_free(EA);                                      \
-    tcg_temp_free_i64(xth);                                 \
-    tcg_temp_free_i64(xtl);                                 \
-}
-
-VSX_VECTOR_LOAD(lxv, ld_i64, 0)
-VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
-
-#define VSX_VECTOR_STORE(name, op, indexed)                 \
-static void gen_##name(DisasContext *ctx)                   \
-{                                                           \
-    int xt;                                                 \
-    TCGv EA;                                                \
-    TCGv_i64 xth;                                           \
-    TCGv_i64 xtl;                                           \
-                                                            \
-    if (indexed) {                                          \
-        xt = xT(ctx->opcode);                               \
-    } else {                                                \
-        xt = DQxT(ctx->opcode);                             \
-    }                                                       \
-                                                            \
-    if (xt < 32) {                                          \
-        if (unlikely(!ctx->vsx_enabled)) {                  \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
-            return;                                         \
-        }                                                   \
-    } else {                                                \
-        if (unlikely(!ctx->altivec_enabled)) {              \
-            gen_exception(ctx, POWERPC_EXCP_VPU);           \
-            return;                                         \
-        }                                                   \
-    }                                                       \
-    xth = tcg_temp_new_i64();                               \
-    xtl = tcg_temp_new_i64();                               \
-    get_cpu_vsrh(xth, xt);                                  \
-    get_cpu_vsrl(xtl, xt);                                  \
-    gen_set_access_type(ctx, ACCESS_INT);                   \
-    EA = tcg_temp_new();                                    \
-    if (indexed) {                                          \
-        gen_addr_reg_index(ctx, EA);                        \
-    } else {                                                \
-        gen_addr_imm_index(ctx, EA, 0x0F);                  \
-    }                                                       \
-    if (ctx->le_mode) {                                     \
-        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
-        tcg_gen_addi_tl(EA, EA, 8);                         \
-        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
-    } else {                                                \
-        tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
-        tcg_gen_addi_tl(EA, EA, 8);                         \
-        tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
-    }                                                       \
-    tcg_temp_free(EA);                                      \
-    tcg_temp_free_i64(xth);                                 \
-    tcg_temp_free_i64(xtl);                                 \
-}
-
-VSX_VECTOR_STORE(stxv, st_i64, 0)
-VSX_VECTOR_STORE(stxvx, st_i64, 1)
-
 #ifdef TARGET_PPC64
 #define VSX_VECTOR_LOAD_STORE_LENGTH(name)                         \
 static void gen_##name(DisasContext *ctx)                          \
@@ -421,7 +303,7 @@ static void gen_##name(DisasContext *ctx)                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \
     gen_qemu_##operation(ctx, xth, EA);                           \
-    set_cpu_vsrh(rD(ctx->opcode) + 32, xth);                      \
+    set_cpu_vsr(rD(ctx->opcode) + 32, xth, true);                 \
     /* NOTE: cpu_vsrl is undefined */                             \
     tcg_temp_free(EA);                                            \
     tcg_temp_free_i64(xth);                                       \
@@ -443,7 +325,7 @@ static void gen_##name(DisasContext *ctx)                     \
     gen_set_access_type(ctx, ACCESS_INT);                     \
     EA = tcg_temp_new();                                      \
     gen_addr_reg_index(ctx, EA);                              \
-    get_cpu_vsrh(t0, xS(ctx->opcode));                        \
+    get_cpu_vsr(t0, xS(ctx->opcode), true);                   \
     gen_qemu_##operation(ctx, t0, EA);                        \
     tcg_temp_free(EA);                                        \
     tcg_temp_free_i64(t0);                                    \
@@ -468,10 +350,10 @@ static void gen_stxvd2x(DisasContext *ctx)
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
-    get_cpu_vsrh(t0, xS(ctx->opcode));
+    get_cpu_vsr(t0, xS(ctx->opcode), true);
     gen_qemu_st64_i64(ctx, t0, EA);
     tcg_gen_addi_tl(EA, EA, 8);
-    get_cpu_vsrl(t0, xS(ctx->opcode));
+    get_cpu_vsr(t0, xS(ctx->opcode), false);
     gen_qemu_st64_i64(ctx, t0, EA);
     tcg_temp_free(EA);
     tcg_temp_free_i64(t0);
@@ -489,8 +371,8 @@ static void gen_stxvw4x(DisasContext *ctx)
     }
     xsh = tcg_temp_new_i64();
     xsl = tcg_temp_new_i64();
-    get_cpu_vsrh(xsh, xS(ctx->opcode));
-    get_cpu_vsrl(xsl, xS(ctx->opcode));
+    get_cpu_vsr(xsh, xS(ctx->opcode), true);
+    get_cpu_vsr(xsl, xS(ctx->opcode), false);
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
@@ -529,8 +411,8 @@ static void gen_stxvh8x(DisasContext *ctx)
     }
     xsh = tcg_temp_new_i64();
     xsl = tcg_temp_new_i64();
-    get_cpu_vsrh(xsh, xS(ctx->opcode));
-    get_cpu_vsrl(xsl, xS(ctx->opcode));
+    get_cpu_vsr(xsh, xS(ctx->opcode), true);
+    get_cpu_vsr(xsl, xS(ctx->opcode), false);
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
@@ -566,8 +448,8 @@ static void gen_stxvb16x(DisasContext *ctx)
     }
     xsh = tcg_temp_new_i64();
     xsl = tcg_temp_new_i64();
-    get_cpu_vsrh(xsh, xS(ctx->opcode));
-    get_cpu_vsrl(xsl, xS(ctx->opcode));
+    get_cpu_vsr(xsh, xS(ctx->opcode), true);
+    get_cpu_vsr(xsl, xS(ctx->opcode), false);
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
@@ -590,7 +472,7 @@ static void gen_##name(DisasContext *ctx)                         \
         return;                                                   \
     }                                                             \
     xth = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xth, rD(ctx->opcode) + 32);                      \
+    get_cpu_vsr(xth, rD(ctx->opcode) + 32, true);                 \
     gen_set_access_type(ctx, ACCESS_INT);                         \
     EA = tcg_temp_new();                                          \
     gen_addr_imm_index(ctx, EA, 0x03);                            \
@@ -618,7 +500,7 @@ static void gen_mfvsrwz(DisasContext *ctx)
     }
     TCGv_i64 tmp = tcg_temp_new_i64();
     TCGv_i64 xsh = tcg_temp_new_i64();
-    get_cpu_vsrh(xsh, xS(ctx->opcode));
+    get_cpu_vsr(xsh, xS(ctx->opcode), true);
     tcg_gen_ext32u_i64(tmp, xsh);
     tcg_gen_trunc_i64_tl(cpu_gpr[rA(ctx->opcode)], tmp);
     tcg_temp_free_i64(tmp);
@@ -642,7 +524,7 @@ static void gen_mtvsrwa(DisasContext *ctx)
     TCGv_i64 xsh = tcg_temp_new_i64();
     tcg_gen_extu_tl_i64(tmp, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_ext32s_i64(xsh, tmp);
-    set_cpu_vsrh(xT(ctx->opcode), xsh);
+    set_cpu_vsr(xT(ctx->opcode), xsh, true);
     tcg_temp_free_i64(tmp);
     tcg_temp_free_i64(xsh);
 }
@@ -664,7 +546,7 @@ static void gen_mtvsrwz(DisasContext *ctx)
     TCGv_i64 xsh = tcg_temp_new_i64();
     tcg_gen_extu_tl_i64(tmp, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_ext32u_i64(xsh, tmp);
-    set_cpu_vsrh(xT(ctx->opcode), xsh);
+    set_cpu_vsr(xT(ctx->opcode), xsh, true);
     tcg_temp_free_i64(tmp);
     tcg_temp_free_i64(xsh);
 }
@@ -685,7 +567,7 @@ static void gen_mfvsrd(DisasContext *ctx)
         }
     }
     t0 = tcg_temp_new_i64();
-    get_cpu_vsrh(t0, xS(ctx->opcode));
+    get_cpu_vsr(t0, xS(ctx->opcode), true);
     tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], t0);
     tcg_temp_free_i64(t0);
 }
@@ -706,7 +588,7 @@ static void gen_mtvsrd(DisasContext *ctx)
     }
     t0 = tcg_temp_new_i64();
     tcg_gen_mov_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    set_cpu_vsrh(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, true);
     tcg_temp_free_i64(t0);
 }
 
@@ -725,7 +607,7 @@ static void gen_mfvsrld(DisasContext *ctx)
         }
     }
     t0 = tcg_temp_new_i64();
-    get_cpu_vsrl(t0, xS(ctx->opcode));
+    get_cpu_vsr(t0, xS(ctx->opcode), false);
     tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], t0);
     tcg_temp_free_i64(t0);
 }
@@ -751,10 +633,10 @@ static void gen_mtvsrdd(DisasContext *ctx)
     } else {
         tcg_gen_mov_i64(t0, cpu_gpr[rA(ctx->opcode)]);
     }
-    set_cpu_vsrh(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, true);
 
     tcg_gen_mov_i64(t0, cpu_gpr[rB(ctx->opcode)]);
-    set_cpu_vsrl(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, false);
     tcg_temp_free_i64(t0);
 }
 
@@ -776,8 +658,8 @@ static void gen_mtvsrws(DisasContext *ctx)
     t0 = tcg_temp_new_i64();
     tcg_gen_deposit_i64(t0, cpu_gpr[rA(ctx->opcode)],
                         cpu_gpr[rA(ctx->opcode)], 32, 32);
-    set_cpu_vsrl(xT(ctx->opcode), t0);
-    set_cpu_vsrh(xT(ctx->opcode), t0);
+    set_cpu_vsr(xT(ctx->opcode), t0, false);
+    set_cpu_vsr(xT(ctx->opcode), t0, true);
     tcg_temp_free_i64(t0);
 }
 
@@ -797,33 +679,25 @@ static void gen_xxpermdi(DisasContext *ctx)
 
     if (unlikely((xT(ctx->opcode) == xA(ctx->opcode)) ||
                  (xT(ctx->opcode) == xB(ctx->opcode)))) {
-        if ((DM(ctx->opcode) & 2) == 0) {
-            get_cpu_vsrh(xh, xA(ctx->opcode));
-        } else {
-            get_cpu_vsrl(xh, xA(ctx->opcode));
-        }
-        if ((DM(ctx->opcode) & 1) == 0) {
-            get_cpu_vsrh(xl, xB(ctx->opcode));
-        } else {
-            get_cpu_vsrl(xl, xB(ctx->opcode));
-        }
+        get_cpu_vsr(xh, xA(ctx->opcode), (DM(ctx->opcode) & 2) == 0);
+        get_cpu_vsr(xl, xB(ctx->opcode), (DM(ctx->opcode) & 1) == 0);
 
-        set_cpu_vsrh(xT(ctx->opcode), xh);
-        set_cpu_vsrl(xT(ctx->opcode), xl);
+        set_cpu_vsr(xT(ctx->opcode), xh, true);
+        set_cpu_vsr(xT(ctx->opcode), xl, false);
     } else {
         if ((DM(ctx->opcode) & 2) == 0) {
-            get_cpu_vsrh(xh, xA(ctx->opcode));
-            set_cpu_vsrh(xT(ctx->opcode), xh);
+            get_cpu_vsr(xh, xA(ctx->opcode), true);
+            set_cpu_vsr(xT(ctx->opcode), xh, true);
         } else {
-            get_cpu_vsrl(xh, xA(ctx->opcode));
-            set_cpu_vsrh(xT(ctx->opcode), xh);
+            get_cpu_vsr(xh, xA(ctx->opcode), false);
+            set_cpu_vsr(xT(ctx->opcode), xh, true);
         }
         if ((DM(ctx->opcode) & 1) == 0) {
-            get_cpu_vsrh(xl, xB(ctx->opcode));
-            set_cpu_vsrl(xT(ctx->opcode), xl);
+            get_cpu_vsr(xl, xB(ctx->opcode), true);
+            set_cpu_vsr(xT(ctx->opcode), xl, false);
         } else {
-            get_cpu_vsrl(xl, xB(ctx->opcode));
-            set_cpu_vsrl(xT(ctx->opcode), xl);
+            get_cpu_vsr(xl, xB(ctx->opcode), false);
+            set_cpu_vsr(xT(ctx->opcode), xl, false);
         }
     }
     tcg_temp_free_i64(xh);
@@ -847,7 +721,7 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
         }                                                         \
         xb = tcg_temp_new_i64();                                  \
         sgm = tcg_temp_new_i64();                                 \
-        get_cpu_vsrh(xb, xB(ctx->opcode));                        \
+        get_cpu_vsr(xb, xB(ctx->opcode), true);                   \
         tcg_gen_movi_i64(sgm, sgn_mask);                          \
         switch (op) {                                             \
             case OP_ABS: {                                        \
@@ -864,7 +738,7 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
             }                                                     \
             case OP_CPSGN: {                                      \
                 TCGv_i64 xa = tcg_temp_new_i64();                 \
-                get_cpu_vsrh(xa, xA(ctx->opcode));                \
+                get_cpu_vsr(xa, xA(ctx->opcode), true);           \
                 tcg_gen_and_i64(xa, xa, sgm);                     \
                 tcg_gen_andc_i64(xb, xb, sgm);                    \
                 tcg_gen_or_i64(xb, xb, xa);                       \
@@ -872,7 +746,7 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
                 break;                                            \
             }                                                     \
         }                                                         \
-        set_cpu_vsrh(xT(ctx->opcode), xb);                        \
+        set_cpu_vsr(xT(ctx->opcode), xb, true);                   \
         tcg_temp_free_i64(xb);                                    \
         tcg_temp_free_i64(sgm);                                   \
     }
@@ -898,8 +772,8 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
     xbl = tcg_temp_new_i64();                                     \
     sgm = tcg_temp_new_i64();                                     \
     tmp = tcg_temp_new_i64();                                     \
-    get_cpu_vsrh(xbh, xb);                                        \
-    get_cpu_vsrl(xbl, xb);                                        \
+    get_cpu_vsr(xbh, xb, true);                                   \
+    get_cpu_vsr(xbl, xb, false);                                  \
     tcg_gen_movi_i64(sgm, sgn_mask);                              \
     switch (op) {                                                 \
     case OP_ABS:                                                  \
@@ -914,15 +788,15 @@ static void glue(gen_, name)(DisasContext *ctx)                   \
     case OP_CPSGN:                                                \
         xah = tcg_temp_new_i64();                                 \
         xa = rA(ctx->opcode) + 32;                                \
-        get_cpu_vsrh(tmp, xa);                                    \
+        get_cpu_vsr(tmp, xa, true);                               \
         tcg_gen_and_i64(xah, tmp, sgm);                           \
         tcg_gen_andc_i64(xbh, xbh, sgm);                          \
         tcg_gen_or_i64(xbh, xbh, xah);                            \
         tcg_temp_free_i64(xah);                                   \
         break;                                                    \
     }                                                             \
-    set_cpu_vsrh(xt, xbh);                                        \
-    set_cpu_vsrl(xt, xbl);                                        \
+    set_cpu_vsr(xt, xbh, true);                                   \
+    set_cpu_vsr(xt, xbl, false);                                  \
     tcg_temp_free_i64(xbl);                                       \
     tcg_temp_free_i64(xbh);                                       \
     tcg_temp_free_i64(sgm);                                       \
@@ -945,8 +819,8 @@ static void glue(gen_, name)(DisasContext *ctx)                  \
         xbh = tcg_temp_new_i64();                                \
         xbl = tcg_temp_new_i64();                                \
         sgm = tcg_temp_new_i64();                                \
-        get_cpu_vsrh(xbh, xB(ctx->opcode));                      \
-        get_cpu_vsrl(xbl, xB(ctx->opcode));                      \
+        get_cpu_vsr(xbh, xB(ctx->opcode), true);                 \
+        get_cpu_vsr(xbl, xB(ctx->opcode), false);                \
         tcg_gen_movi_i64(sgm, sgn_mask);                         \
         switch (op) {                                            \
             case OP_ABS: {                                       \
@@ -967,8 +841,8 @@ static void glue(gen_, name)(DisasContext *ctx)                  \
             case OP_CPSGN: {                                     \
                 TCGv_i64 xah = tcg_temp_new_i64();               \
                 TCGv_i64 xal = tcg_temp_new_i64();               \
-                get_cpu_vsrh(xah, xA(ctx->opcode));              \
-                get_cpu_vsrl(xal, xA(ctx->opcode));              \
+                get_cpu_vsr(xah, xA(ctx->opcode), true);         \
+                get_cpu_vsr(xal, xA(ctx->opcode), false);        \
                 tcg_gen_and_i64(xah, xah, sgm);                  \
                 tcg_gen_and_i64(xal, xal, sgm);                  \
                 tcg_gen_andc_i64(xbh, xbh, sgm);                 \
@@ -980,8 +854,8 @@ static void glue(gen_, name)(DisasContext *ctx)                  \
                 break;                                           \
             }                                                    \
         }                                                        \
-        set_cpu_vsrh(xT(ctx->opcode), xbh);                      \
-        set_cpu_vsrl(xT(ctx->opcode), xbl);                      \
+        set_cpu_vsr(xT(ctx->opcode), xbh, true);                 \
+        set_cpu_vsr(xT(ctx->opcode), xbl, false);                \
         tcg_temp_free_i64(xbh);                                  \
         tcg_temp_free_i64(xbl);                                  \
         tcg_temp_free_i64(sgm);                                  \
@@ -1193,9 +1067,9 @@ static void gen_##name(DisasContext *ctx)                     \
     }                                                         \
     t0 = tcg_temp_new_i64();                                  \
     t1 = tcg_temp_new_i64();                                  \
-    get_cpu_vsrh(t0, xB(ctx->opcode));                        \
+    get_cpu_vsr(t0, xB(ctx->opcode), true);                   \
     gen_helper_##name(t1, cpu_env, t0);                       \
-    set_cpu_vsrh(xT(ctx->opcode), t1);                        \
+    set_cpu_vsr(xT(ctx->opcode), t1, true);                   \
     tcg_temp_free_i64(t0);                                    \
     tcg_temp_free_i64(t1);                                    \
 }
@@ -1390,13 +1264,13 @@ static void gen_xxbrd(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     tcg_gen_bswap64_i64(xth, xbh);
     tcg_gen_bswap64_i64(xtl, xbl);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -1419,12 +1293,12 @@ static void gen_xxbrh(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     gen_bswap16x8(xth, xtl, xbh, xbl);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -1448,15 +1322,15 @@ static void gen_xxbrq(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
     t0 = tcg_temp_new_i64();
 
     tcg_gen_bswap64_i64(t0, xbl);
     tcg_gen_bswap64_i64(xtl, xbh);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
     tcg_gen_mov_i64(xth, t0);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1480,12 +1354,12 @@ static void gen_xxbrw(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     gen_bswap32x4(xth, xtl, xbh, xbl);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -1527,23 +1401,16 @@ static void glue(gen_, name)(DisasContext *ctx)             \
         b0 = tcg_temp_new_i64();                            \
         b1 = tcg_temp_new_i64();                            \
         tmp = tcg_temp_new_i64();                           \
-        if (high) {                                         \
-            get_cpu_vsrh(a0, xA(ctx->opcode));              \
-            get_cpu_vsrh(a1, xA(ctx->opcode));              \
-            get_cpu_vsrh(b0, xB(ctx->opcode));              \
-            get_cpu_vsrh(b1, xB(ctx->opcode));              \
-        } else {                                            \
-            get_cpu_vsrl(a0, xA(ctx->opcode));              \
-            get_cpu_vsrl(a1, xA(ctx->opcode));              \
-            get_cpu_vsrl(b0, xB(ctx->opcode));              \
-            get_cpu_vsrl(b1, xB(ctx->opcode));              \
-        }                                                   \
+        get_cpu_vsr(a0, xA(ctx->opcode), high);             \
+        get_cpu_vsr(a1, xA(ctx->opcode), high);             \
+        get_cpu_vsr(b0, xB(ctx->opcode), high);             \
+        get_cpu_vsr(b1, xB(ctx->opcode), high);             \
         tcg_gen_shri_i64(a0, a0, 32);                       \
         tcg_gen_shri_i64(b0, b0, 32);                       \
         tcg_gen_deposit_i64(tmp, b0, a0, 32, 32);           \
-        set_cpu_vsrh(xT(ctx->opcode), tmp);                 \
+        set_cpu_vsr(xT(ctx->opcode), tmp, true);            \
         tcg_gen_deposit_i64(tmp, b1, a1, 32, 32);           \
-        set_cpu_vsrl(xT(ctx->opcode), tmp);                 \
+        set_cpu_vsr(xT(ctx->opcode), tmp, false);           \
         tcg_temp_free_i64(a0);                              \
         tcg_temp_free_i64(a1);                              \
         tcg_temp_free_i64(b0);                              \
@@ -1569,47 +1436,114 @@ static void gen_xxsel(DisasContext *ctx)
                         vsr_full_offset(rb), vsr_full_offset(ra), 16, 16);
 }
 
-static void gen_xxspltw(DisasContext *ctx)
+static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2 *a)
 {
-    int rt = xT(ctx->opcode);
-    int rb = xB(ctx->opcode);
-    int uim = UIM(ctx->opcode);
     int tofs, bofs;
 
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
+    REQUIRE_VSX(ctx);
 
-    tofs = vsr_full_offset(rt);
-    bofs = vsr_full_offset(rb);
-    bofs += uim << MO_32;
+    tofs = vsr_full_offset(a->xt);
+    bofs = vsr_full_offset(a->xb);
+    bofs += a->uim << MO_32;
 #ifndef HOST_WORDS_BIG_ENDIAN
     bofs ^= 8 | 4;
 #endif
 
     tcg_gen_gvec_dup_mem(MO_32, tofs, bofs, 16, 16);
+    return true;
 }
 
 #define pattern(x) (((x) & 0xff) * (~(uint64_t)0 / 0xff))
 
-static void gen_xxspltib(DisasContext *ctx)
+static bool trans_XXSPLTIB(DisasContext *ctx, arg_X_imm8 *a)
 {
-    uint8_t uim8 = IMM8(ctx->opcode);
-    int rt = xT(ctx->opcode);
+    if (a->xt < 32) {
+        REQUIRE_VSX(ctx);
+    } else {
+        REQUIRE_VECTOR(ctx);
+    }
+    tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(a->xt), 16, 16, a->imm);
+    return true;
+}
 
-    if (rt < 32) {
-        if (unlikely(!ctx->vsx_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VSXU);
-            return;
-        }
+static bool trans_XXSPLTIW(DisasContext *ctx, arg_8RR_D *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+
+    tcg_gen_gvec_dup_imm(MO_32, vsr_full_offset(a->xt), 16, 16, a->si);
+
+    return true;
+}
+
+static bool trans_XXSPLTIDP(DisasContext *ctx, arg_8RR_D *a)
+{
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+
+    tcg_gen_gvec_dup_imm(MO_64, vsr_full_offset(a->xt), 16, 16,
+                         helper_todouble(a->si));
+    return true;
+}
+
+static bool trans_XXSPLTI32DX(DisasContext *ctx, arg_8RR_D_IX *a)
+{
+    TCGv_i32 imm;
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+
+    imm = tcg_constant_i32(a->si);
+
+    tcg_gen_st_i32(imm, cpu_env,
+        offsetof(CPUPPCState, vsr[a->xt].VsrW(0 + a->ix)));
+    tcg_gen_st_i32(imm, cpu_env,
+        offsetof(CPUPPCState, vsr[a->xt].VsrW(2 + a->ix)));
+
+    return true;
+}
+
+static bool trans_LXVKQ(DisasContext *ctx, arg_X_uim5 *a)
+{
+    static const uint64_t values[32] = {
+        0, /* Unspecified */
+        0x3FFF000000000000llu, /* QP +1.0 */
+        0x4000000000000000llu, /* QP +2.0 */
+        0x4000800000000000llu, /* QP +3.0 */
+        0x4001000000000000llu, /* QP +4.0 */
+        0x4001400000000000llu, /* QP +5.0 */
+        0x4001800000000000llu, /* QP +6.0 */
+        0x4001C00000000000llu, /* QP +7.0 */
+        0x7FFF000000000000llu, /* QP +Inf */
+        0x7FFF800000000000llu, /* QP dQNaN */
+        0, /* Unspecified */
+        0, /* Unspecified */
+        0, /* Unspecified */
+        0, /* Unspecified */
+        0, /* Unspecified */
+        0, /* Unspecified */
+        0x8000000000000000llu, /* QP -0.0 */
+        0xBFFF000000000000llu, /* QP -1.0 */
+        0xC000000000000000llu, /* QP -2.0 */
+        0xC000800000000000llu, /* QP -3.0 */
+        0xC001000000000000llu, /* QP -4.0 */
+        0xC001400000000000llu, /* QP -5.0 */
+        0xC001800000000000llu, /* QP -6.0 */
+        0xC001C00000000000llu, /* QP -7.0 */
+        0xFFFF000000000000llu, /* QP -Inf */
+    };
+
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+
+    if (values[a->uim]) {
+        set_cpu_vsr(a->xt, tcg_constant_i64(0x0), false);
+        set_cpu_vsr(a->xt, tcg_constant_i64(values[a->uim]), true);
     } else {
-        if (unlikely(!ctx->altivec_enabled)) {
-            gen_exception(ctx, POWERPC_EXCP_VPU);
-            return;
-        }
+        gen_invalid(ctx);
     }
-    tcg_gen_gvec_dup_imm(MO_8, vsr_full_offset(rt), 16, 16, uim8);
+
+    return true;
 }
 
 static void gen_xxsldwi(DisasContext *ctx)
@@ -1624,40 +1558,40 @@ static void gen_xxsldwi(DisasContext *ctx)
 
     switch (SHW(ctx->opcode)) {
         case 0: {
-            get_cpu_vsrh(xth, xA(ctx->opcode));
-            get_cpu_vsrl(xtl, xA(ctx->opcode));
+            get_cpu_vsr(xth, xA(ctx->opcode), true);
+            get_cpu_vsr(xtl, xA(ctx->opcode), false);
             break;
         }
         case 1: {
             TCGv_i64 t0 = tcg_temp_new_i64();
-            get_cpu_vsrh(xth, xA(ctx->opcode));
+            get_cpu_vsr(xth, xA(ctx->opcode), true);
             tcg_gen_shli_i64(xth, xth, 32);
-            get_cpu_vsrl(t0, xA(ctx->opcode));
+            get_cpu_vsr(t0, xA(ctx->opcode), false);
             tcg_gen_shri_i64(t0, t0, 32);
             tcg_gen_or_i64(xth, xth, t0);
-            get_cpu_vsrl(xtl, xA(ctx->opcode));
+            get_cpu_vsr(xtl, xA(ctx->opcode), false);
             tcg_gen_shli_i64(xtl, xtl, 32);
-            get_cpu_vsrh(t0, xB(ctx->opcode));
+            get_cpu_vsr(t0, xB(ctx->opcode), true);
             tcg_gen_shri_i64(t0, t0, 32);
             tcg_gen_or_i64(xtl, xtl, t0);
             tcg_temp_free_i64(t0);
             break;
         }
         case 2: {
-            get_cpu_vsrl(xth, xA(ctx->opcode));
-            get_cpu_vsrh(xtl, xB(ctx->opcode));
+            get_cpu_vsr(xth, xA(ctx->opcode), false);
+            get_cpu_vsr(xtl, xB(ctx->opcode), true);
             break;
         }
         case 3: {
             TCGv_i64 t0 = tcg_temp_new_i64();
-            get_cpu_vsrl(xth, xA(ctx->opcode));
+            get_cpu_vsr(xth, xA(ctx->opcode), false);
             tcg_gen_shli_i64(xth, xth, 32);
-            get_cpu_vsrh(t0, xB(ctx->opcode));
+            get_cpu_vsr(t0, xB(ctx->opcode), true);
             tcg_gen_shri_i64(t0, t0, 32);
             tcg_gen_or_i64(xth, xth, t0);
-            get_cpu_vsrh(xtl, xB(ctx->opcode));
+            get_cpu_vsr(xtl, xB(ctx->opcode), true);
             tcg_gen_shli_i64(xtl, xtl, 32);
-            get_cpu_vsrl(t0, xB(ctx->opcode));
+            get_cpu_vsr(t0, xB(ctx->opcode), false);
             tcg_gen_shri_i64(t0, t0, 32);
             tcg_gen_or_i64(xtl, xtl, t0);
             tcg_temp_free_i64(t0);
@@ -1665,8 +1599,8 @@ static void gen_xxsldwi(DisasContext *ctx)
         }
     }
 
-    set_cpu_vsrh(xT(ctx->opcode), xth);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -1694,8 +1628,8 @@ static void gen_##name(DisasContext *ctx)                       \
      */                                                         \
     if (uimm > 15) {                                            \
         tcg_gen_movi_i64(t1, 0);                                \
-        set_cpu_vsrh(xT(ctx->opcode), t1);                      \
-        set_cpu_vsrl(xT(ctx->opcode), t1);                      \
+        set_cpu_vsr(xT(ctx->opcode), t1, true);                 \
+        set_cpu_vsr(xT(ctx->opcode), t1, false);                \
         return;                                                 \
     }                                                           \
     tcg_gen_movi_i32(t0, uimm);                                 \
@@ -1719,7 +1653,7 @@ static void gen_xsxexpdp(DisasContext *ctx)
         return;
     }
     t0 = tcg_temp_new_i64();
-    get_cpu_vsrh(t0, xB(ctx->opcode));
+    get_cpu_vsr(t0, xB(ctx->opcode), true);
     tcg_gen_extract_i64(rt, t0, 52, 11);
     tcg_temp_free_i64(t0);
 }
@@ -1737,12 +1671,12 @@ static void gen_xsxexpqp(DisasContext *ctx)
     xth = tcg_temp_new_i64();
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, rB(ctx->opcode) + 32);
+    get_cpu_vsr(xbh, rB(ctx->opcode) + 32, true);
 
     tcg_gen_extract_i64(xth, xbh, 48, 15);
-    set_cpu_vsrh(rD(ctx->opcode) + 32, xth);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xth, true);
     tcg_gen_movi_i64(xtl, 0);
-    set_cpu_vsrl(rD(ctx->opcode) + 32, xtl);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xtl, false);
 
     tcg_temp_free_i64(xbh);
     tcg_temp_free_i64(xth);
@@ -1766,7 +1700,7 @@ static void gen_xsiexpdp(DisasContext *ctx)
     tcg_gen_andi_i64(t0, rb, 0x7FF);
     tcg_gen_shli_i64(t0, t0, 52);
     tcg_gen_or_i64(xth, xth, t0);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
     /* dword[1] is undefined */
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1789,19 +1723,19 @@ static void gen_xsiexpqp(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xah = tcg_temp_new_i64();
     xal = tcg_temp_new_i64();
-    get_cpu_vsrh(xah, rA(ctx->opcode) + 32);
-    get_cpu_vsrl(xal, rA(ctx->opcode) + 32);
+    get_cpu_vsr(xah, rA(ctx->opcode) + 32, true);
+    get_cpu_vsr(xal, rA(ctx->opcode) + 32, false);
     xbh = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, rB(ctx->opcode) + 32);
+    get_cpu_vsr(xbh, rB(ctx->opcode) + 32, true);
     t0 = tcg_temp_new_i64();
 
     tcg_gen_andi_i64(xth, xah, 0x8000FFFFFFFFFFFF);
     tcg_gen_andi_i64(t0, xbh, 0x7FFF);
     tcg_gen_shli_i64(t0, t0, 48);
     tcg_gen_or_i64(xth, xth, t0);
-    set_cpu_vsrh(rD(ctx->opcode) + 32, xth);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xth, true);
     tcg_gen_mov_i64(xtl, xal);
-    set_cpu_vsrl(rD(ctx->opcode) + 32, xtl);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xtl, false);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1826,12 +1760,12 @@ static void gen_xsxsigdp(DisasContext *ctx)
     zr = tcg_const_i64(0);
     nan = tcg_const_i64(2047);
 
-    get_cpu_vsrh(t1, xB(ctx->opcode));
+    get_cpu_vsr(t1, xB(ctx->opcode), true);
     tcg_gen_extract_i64(exp, t1, 52, 11);
     tcg_gen_movi_i64(t0, 0x0010000000000000);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
-    get_cpu_vsrh(t1, xB(ctx->opcode));
+    get_cpu_vsr(t1, xB(ctx->opcode), true);
     tcg_gen_deposit_i64(rt, t0, t1, 0, 52);
 
     tcg_temp_free_i64(t0);
@@ -1857,8 +1791,8 @@ static void gen_xsxsigqp(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, rB(ctx->opcode) + 32);
-    get_cpu_vsrl(xbl, rB(ctx->opcode) + 32);
+    get_cpu_vsr(xbh, rB(ctx->opcode) + 32, true);
+    get_cpu_vsr(xbl, rB(ctx->opcode) + 32, false);
     exp = tcg_temp_new_i64();
     t0 = tcg_temp_new_i64();
     zr = tcg_const_i64(0);
@@ -1869,9 +1803,9 @@ static void gen_xsxsigqp(DisasContext *ctx)
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
     tcg_gen_deposit_i64(xth, t0, xbh, 0, 48);
-    set_cpu_vsrh(rD(ctx->opcode) + 32, xth);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xth, true);
     tcg_gen_mov_i64(xtl, xbl);
-    set_cpu_vsrl(rD(ctx->opcode) + 32, xtl);
+    set_cpu_vsr(rD(ctx->opcode) + 32, xtl, false);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(exp);
@@ -1904,22 +1838,22 @@ static void gen_xviexpsp(DisasContext *ctx)
     xal = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xah, xA(ctx->opcode));
-    get_cpu_vsrl(xal, xA(ctx->opcode));
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xah, xA(ctx->opcode), true);
+    get_cpu_vsr(xal, xA(ctx->opcode), false);
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
     t0 = tcg_temp_new_i64();
 
     tcg_gen_andi_i64(xth, xah, 0x807FFFFF807FFFFF);
     tcg_gen_andi_i64(t0, xbh, 0xFF000000FF);
     tcg_gen_shli_i64(t0, t0, 23);
     tcg_gen_or_i64(xth, xth, t0);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
     tcg_gen_andi_i64(xtl, xal, 0x807FFFFF807FFFFF);
     tcg_gen_andi_i64(t0, xbl, 0xFF000000FF);
     tcg_gen_shli_i64(t0, t0, 23);
     tcg_gen_or_i64(xtl, xtl, t0);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(xth);
@@ -1949,16 +1883,16 @@ static void gen_xviexpdp(DisasContext *ctx)
     xal = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xah, xA(ctx->opcode));
-    get_cpu_vsrl(xal, xA(ctx->opcode));
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xah, xA(ctx->opcode), true);
+    get_cpu_vsr(xal, xA(ctx->opcode), false);
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     tcg_gen_deposit_i64(xth, xah, xbh, 52, 11);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
 
     tcg_gen_deposit_i64(xtl, xal, xbl, 52, 11);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -1983,15 +1917,15 @@ static void gen_xvxexpsp(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     tcg_gen_shri_i64(xth, xbh, 23);
     tcg_gen_andi_i64(xth, xth, 0xFF000000FF);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
     tcg_gen_shri_i64(xtl, xbl, 23);
     tcg_gen_andi_i64(xtl, xtl, 0xFF000000FF);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -2014,13 +1948,13 @@ static void gen_xvxexpdp(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
 
     tcg_gen_extract_i64(xth, xbh, 52, 11);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
     tcg_gen_extract_i64(xtl, xbl, 52, 11);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(xth);
     tcg_temp_free_i64(xtl);
@@ -2046,8 +1980,8 @@ static void gen_xvxsigdp(DisasContext *ctx)
     xtl = tcg_temp_new_i64();
     xbh = tcg_temp_new_i64();
     xbl = tcg_temp_new_i64();
-    get_cpu_vsrh(xbh, xB(ctx->opcode));
-    get_cpu_vsrl(xbl, xB(ctx->opcode));
+    get_cpu_vsr(xbh, xB(ctx->opcode), true);
+    get_cpu_vsr(xbl, xB(ctx->opcode), false);
     exp = tcg_temp_new_i64();
     t0 = tcg_temp_new_i64();
     zr = tcg_const_i64(0);
@@ -2058,14 +1992,14 @@ static void gen_xvxsigdp(DisasContext *ctx)
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
     tcg_gen_deposit_i64(xth, t0, xbh, 0, 52);
-    set_cpu_vsrh(xT(ctx->opcode), xth);
+    set_cpu_vsr(xT(ctx->opcode), xth, true);
 
     tcg_gen_extract_i64(exp, xbl, 52, 11);
     tcg_gen_movi_i64(t0, 0x0010000000000000);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
     tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
     tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52);
-    set_cpu_vsrl(xT(ctx->opcode), xtl);
+    set_cpu_vsr(xT(ctx->opcode), xtl, false);
 
     tcg_temp_free_i64(t0);
     tcg_temp_free_i64(exp);
@@ -2077,6 +2011,180 @@ static void gen_xvxsigdp(DisasContext *ctx)
     tcg_temp_free_i64(xbl);
 }
 
+static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ,
+                     int rt, bool store, bool paired)
+{
+    TCGv ea;
+    TCGv_i64 xt;
+    MemOp mop;
+    int rt1, rt2;
+
+    xt = tcg_temp_new_i64();
+
+    mop = DEF_MEMOP(MO_Q);
+
+    gen_set_access_type(ctx, ACCESS_INT);
+    ea = do_ea_calc(ctx, ra, displ);
+
+    if (paired && ctx->le_mode) {
+        rt1 = rt + 1;
+        rt2 = rt;
+    } else {
+        rt1 = rt;
+        rt2 = rt + 1;
+    }
+
+    if (store) {
+        get_cpu_vsr(xt, rt1, !ctx->le_mode);
+        tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+        gen_addr_add(ctx, ea, ea, 8);
+        get_cpu_vsr(xt, rt1, ctx->le_mode);
+        tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+        if (paired) {
+            gen_addr_add(ctx, ea, ea, 8);
+            get_cpu_vsr(xt, rt2, !ctx->le_mode);
+            tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+            gen_addr_add(ctx, ea, ea, 8);
+            get_cpu_vsr(xt, rt2, ctx->le_mode);
+            tcg_gen_qemu_st_i64(xt, ea, ctx->mem_idx, mop);
+        }
+    } else {
+        tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
+        set_cpu_vsr(rt1, xt, !ctx->le_mode);
+        gen_addr_add(ctx, ea, ea, 8);
+        tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
+        set_cpu_vsr(rt1, xt, ctx->le_mode);
+        if (paired) {
+            gen_addr_add(ctx, ea, ea, 8);
+            tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
+            set_cpu_vsr(rt2, xt, !ctx->le_mode);
+            gen_addr_add(ctx, ea, ea, 8);
+            tcg_gen_qemu_ld_i64(xt, ea, ctx->mem_idx, mop);
+            set_cpu_vsr(rt2, xt, ctx->le_mode);
+        }
+    }
+
+    tcg_temp_free(ea);
+    tcg_temp_free_i64(xt);
+    return true;
+}
+
+static bool do_lstxv_D(DisasContext *ctx, arg_D *a, bool store, bool paired)
+{
+    if (paired) {
+        REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    } else {
+        REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    }
+
+    if (paired || a->rt >= 32) {
+        REQUIRE_VSX(ctx);
+    } else {
+        REQUIRE_VECTOR(ctx);
+    }
+
+    return do_lstxv(ctx, a->ra, tcg_constant_tl(a->si), a->rt, store, paired);
+}
+
+static bool do_lstxv_PLS_D(DisasContext *ctx, arg_PLS_D *a,
+                           bool store, bool paired)
+{
+    arg_D d;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+
+    if (!resolve_PLS_D(ctx, &d, a)) {
+        return true;
+    }
+
+    return do_lstxv(ctx, d.ra, tcg_constant_tl(d.si), d.rt, store, paired);
+}
+
+static bool do_lstxv_X(DisasContext *ctx, arg_X *a, bool store, bool paired)
+{
+    if (paired) {
+        REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    } else {
+        REQUIRE_INSNS_FLAGS2(ctx, ISA300);
+    }
+
+    if (paired || a->rt >= 32) {
+        REQUIRE_VSX(ctx);
+    } else {
+        REQUIRE_VECTOR(ctx);
+    }
+
+    return do_lstxv(ctx, a->ra, cpu_gpr[a->rb], a->rt, store, paired);
+}
+
+TRANS(STXV, do_lstxv_D, true, false)
+TRANS(LXV, do_lstxv_D, false, false)
+TRANS(STXVP, do_lstxv_D, true, true)
+TRANS(LXVP, do_lstxv_D, false, true)
+TRANS(STXVX, do_lstxv_X, true, false)
+TRANS(LXVX, do_lstxv_X, false, false)
+TRANS(STXVPX, do_lstxv_X, true, true)
+TRANS(LXVPX, do_lstxv_X, false, true)
+TRANS64(PSTXV, do_lstxv_PLS_D, true, false)
+TRANS64(PLXV, do_lstxv_PLS_D, false, false)
+TRANS64(PSTXVP, do_lstxv_PLS_D, true, true)
+TRANS64(PLXVP, do_lstxv_PLS_D, false, true)
+
+static void gen_xxblendv_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+                             TCGv_vec c)
+{
+    TCGv_vec tmp = tcg_temp_new_vec_matching(c);
+    tcg_gen_sari_vec(vece, tmp, c, (8 << vece) - 1);
+    tcg_gen_bitsel_vec(vece, t, tmp, b, a);
+    tcg_temp_free_vec(tmp);
+}
+
+static bool do_xxblendv(DisasContext *ctx, arg_XX4 *a, unsigned vece)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_sari_vec, 0
+    };
+    static const GVecGen4 ops[4] = {
+        {
+            .fniv = gen_xxblendv_vec,
+            .fno = gen_helper_XXBLENDVB,
+            .opt_opc = vecop_list,
+            .vece = MO_8
+        },
+        {
+            .fniv = gen_xxblendv_vec,
+            .fno = gen_helper_XXBLENDVH,
+            .opt_opc = vecop_list,
+            .vece = MO_16
+        },
+        {
+            .fniv = gen_xxblendv_vec,
+            .fno = gen_helper_XXBLENDVW,
+            .opt_opc = vecop_list,
+            .vece = MO_32
+        },
+        {
+            .fniv = gen_xxblendv_vec,
+            .fno = gen_helper_XXBLENDVD,
+            .opt_opc = vecop_list,
+            .vece = MO_64
+        }
+    };
+
+    REQUIRE_VSX(ctx);
+
+    tcg_gen_gvec_4(vsr_full_offset(a->xt), vsr_full_offset(a->xa),
+                   vsr_full_offset(a->xb), vsr_full_offset(a->xc),
+                   16, 16, &ops[vece]);
+
+    return true;
+}
+
+TRANS(XXBLENDVB, do_xxblendv, MO_8)
+TRANS(XXBLENDVH, do_xxblendv, MO_16)
+TRANS(XXBLENDVW, do_xxblendv, MO_32)
+TRANS(XXBLENDVD, do_xxblendv, MO_64)
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc
index 1d41beef26a..152d1e5c3bf 100644
--- a/target/ppc/translate/vsx-ops.c.inc
+++ b/target/ppc/translate/vsx-ops.c.inc
@@ -10,7 +10,6 @@ GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(lxvx, 0x1F, 0x0C, 0x08, 0x00000040, PPC_NONE, PPC2_ISA300),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(lxvl, 0x1F, 0x0D, 0x08, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(lxvll, 0x1F, 0x0D, 0x09, 0, PPC_NONE, PPC2_ISA300),
@@ -25,7 +24,6 @@ GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
 GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
 GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(stxvx, 0x1F, 0x0C, 0x0C, 0, PPC_NONE, PPC2_ISA300),
 #if defined(TARGET_PPC64)
 GEN_HANDLER_E(stxvl, 0x1F, 0x0D, 0x0C, 0, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stxvll, 0x1F, 0x0D, 0x0D, 0, PPC_NONE, PPC2_ISA300),
@@ -350,8 +348,6 @@ GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
 GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
 GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300),
 GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
-GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
-GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
 GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300),
 GEN_XX2FORM_EXT(xxinsertw, 0x0A, 0x0B, PPC2_ISA300),
diff --git a/target/ppc/translate_init.c.inc b/target/ppc/translate_init.c.inc
deleted file mode 100644
index c03a7c4f526..00000000000
--- a/target/ppc/translate_init.c.inc
+++ /dev/null
@@ -1,10986 +0,0 @@
-/*
- *  PowerPC CPU initialization for qemu.
- *
- *  Copyright (c) 2003-2007 Jocelyn Mayer
- *  Copyright 2011 Freescale Semiconductor, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "disas/dis-asm.h"
-#include "exec/gdbstub.h"
-#include "kvm_ppc.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/cpus.h"
-#include "sysemu/hw_accel.h"
-#include "sysemu/tcg.h"
-#include "cpu-models.h"
-#include "mmu-hash32.h"
-#include "mmu-hash64.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qemu/qemu-print.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qnull.h"
-#include "qapi/visitor.h"
-#include "hw/qdev-properties.h"
-#include "hw/ppc/ppc.h"
-#include "mmu-book3s-v3.h"
-#include "qemu/cutils.h"
-#include "disas/capstone.h"
-#include "fpu/softfloat.h"
-#include "qapi/qapi-commands-machine-target.h"
-
-/* #define PPC_DUMP_CPU */
-/* #define PPC_DEBUG_SPR */
-/* #define PPC_DUMP_SPR_ACCESSES */
-/* #define USE_APPLE_GDB */
-
-/*
- * Generic callbacks:
- * do nothing but store/retrieve spr value
- */
-static void spr_load_dump_spr(int sprn)
-{
-#ifdef PPC_DUMP_SPR_ACCESSES
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_load_dump_spr(cpu_env, t0);
-    tcg_temp_free_i32(t0);
-#endif
-}
-
-static void spr_read_generic(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_load_spr(cpu_gpr[gprn], sprn);
-    spr_load_dump_spr(sprn);
-}
-
-static void spr_store_dump_spr(int sprn)
-{
-#ifdef PPC_DUMP_SPR_ACCESSES
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_store_dump_spr(cpu_env, t0);
-    tcg_temp_free_i32(t0);
-#endif
-}
-
-static void spr_write_generic(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_store_spr(sprn, cpu_gpr[gprn]);
-    spr_store_dump_spr(sprn);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_generic32(DisasContext *ctx, int sprn, int gprn)
-{
-#ifdef TARGET_PPC64
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ext32u_tl(t0, cpu_gpr[gprn]);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-    spr_store_dump_spr(sprn);
-#else
-    spr_write_generic(ctx, sprn, gprn);
-#endif
-}
-
-static void spr_write_clear(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    gen_load_spr(t0, sprn);
-    tcg_gen_neg_tl(t1, cpu_gpr[gprn]);
-    tcg_gen_and_tl(t0, t0, t1);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
-
-static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
-{
-}
-
-#endif
-
-/* SPR common to all PowerPC */
-/* XER */
-static void spr_read_xer(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_read_xer(ctx, cpu_gpr[gprn]);
-}
-
-static void spr_write_xer(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_write_xer(cpu_gpr[gprn]);
-}
-
-/* LR */
-static void spr_read_lr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_lr);
-}
-
-static void spr_write_lr(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
-}
-
-/* CFAR */
-#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-static void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
-}
-
-static void spr_write_cfar(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
-}
-#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
-
-/* CTR */
-static void spr_read_ctr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_mov_tl(cpu_gpr[gprn], cpu_ctr);
-}
-
-static void spr_write_ctr(DisasContext *ctx, int sprn, int gprn)
-{
-    tcg_gen_mov_tl(cpu_ctr, cpu_gpr[gprn]);
-}
-
-/* User read access to SPR */
-/* USPRx */
-/* UMMCRx */
-/* UPMCx */
-/* USIA */
-/* UDECR */
-static void spr_read_ureg(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_load_spr(cpu_gpr[gprn], sprn + 0x10);
-}
-
-#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-static void spr_write_ureg(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_store_spr(sprn + 0x10, cpu_gpr[gprn]);
-}
-#endif
-
-/* SPR common to all non-embedded PowerPC */
-/* DECR */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_decr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_decr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_decr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_decr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-#endif
-
-/* SPR common to all non-embedded PowerPC, except 601 */
-/* Time base */
-static void spr_read_tbl(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_tbl(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_read_tbu(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_tbu(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-ATTRIBUTE_UNUSED
-static void spr_read_atbl(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_atbl(cpu_gpr[gprn], cpu_env);
-}
-
-ATTRIBUTE_UNUSED
-static void spr_read_atbu(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_atbu(cpu_gpr[gprn], cpu_env);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_tbl(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbl(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_tbu(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbu(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-ATTRIBUTE_UNUSED
-static void spr_write_atbl(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_atbl(cpu_env, cpu_gpr[gprn]);
-}
-
-ATTRIBUTE_UNUSED
-static void spr_write_atbu(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_atbu(cpu_env, cpu_gpr[gprn]);
-}
-
-#if defined(TARGET_PPC64)
-ATTRIBUTE_UNUSED
-static void spr_read_purr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_purr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_purr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-/* HDECR */
-static void spr_read_hdecr(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_hdecr(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_hdecr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_hdecr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_end();
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_read_vtb(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_vtb(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_vtb(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_vtb(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_tbu40(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_tbu40(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-#endif
-#endif
-
-#if !defined(CONFIG_USER_ONLY)
-/* IBAT0U...IBAT0U */
-/* IBAT0L...IBAT7L */
-static void spr_read_ibat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
-}
-
-static void spr_read_ibat_h(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][((sprn - SPR_IBAT4U) / 2) + 4]));
-}
-
-static void spr_write_ibatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatu_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4U) / 2) + 4);
-    gen_helper_store_ibatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0L) / 2);
-    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_ibatl_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_IBAT4L) / 2) + 4);
-    gen_helper_store_ibatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-/* DBAT0U...DBAT7U */
-/* DBAT0L...DBAT7L */
-static void spr_read_dbat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           DBAT[sprn & 1][(sprn - SPR_DBAT0U) / 2]));
-}
-
-static void spr_read_dbat_h(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           DBAT[sprn & 1][((sprn - SPR_DBAT4U) / 2) + 4]));
-}
-
-static void spr_write_dbatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0U) / 2);
-    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatu_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4U) / 2) + 4);
-    gen_helper_store_dbatu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_DBAT0L) / 2);
-    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_dbatl_h(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(((sprn - SPR_DBAT4L) / 2) + 4);
-    gen_helper_store_dbatl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-/* SDR1 */
-static void spr_write_sdr1(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_sdr1(cpu_env, cpu_gpr[gprn]);
-}
-
-#if defined(TARGET_PPC64)
-/* 64 bits PowerPC specific SPRs */
-/* PIDR */
-static void spr_write_pidr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_pidr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_lpidr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_lpidr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_read_hior(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env, offsetof(CPUPPCState, excp_prefix));
-}
-
-static void spr_write_hior(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0x3FFFFF00000ULL);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
-    tcg_temp_free(t0);
-}
-static void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_ptcr(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_pcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
-}
-
-/* DPDES */
-static void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env);
-}
-
-static void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]);
-}
-#endif
-#endif
-
-/* PowerPC 601 specific registers */
-/* RTC */
-static void spr_read_601_rtcl(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_601_rtcl(cpu_gpr[gprn], cpu_env);
-}
-
-static void spr_read_601_rtcu(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_load_601_rtcu(cpu_gpr[gprn], cpu_env);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_601_rtcu(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_601_rtcu(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_601_rtcl(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_601_rtcl(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_hid0_601(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_hid0_601(cpu_env, cpu_gpr[gprn]);
-    /* Must stop the translation as endianness may have changed */
-    gen_stop_exception(ctx);
-}
-#endif
-
-/* Unified bats */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_601_ubat(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState,
-                           IBAT[sprn & 1][(sprn - SPR_IBAT0U) / 2]));
-}
-
-static void spr_write_601_ubatu(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_601_batl(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_601_ubatl(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32((sprn - SPR_IBAT0U) / 2);
-    gen_helper_store_601_batu(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-#endif
-
-/* PowerPC 40x specific registers */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_40x_pit(DisasContext *ctx, int gprn, int sprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_load_40x_pit(cpu_gpr[gprn], cpu_env);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_pit(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_40x_pit(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_dbcr0(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_store_spr(sprn, cpu_gpr[gprn]);
-    gen_helper_store_40x_dbcr0(cpu_env, cpu_gpr[gprn]);
-    /* We must stop translation as we may have rebooted */
-    gen_stop_exception(ctx);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_40x_sler(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_40x_sler(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_booke_tcr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_booke_tcr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-
-static void spr_write_booke_tsr(DisasContext *ctx, int sprn, int gprn)
-{
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_io_start();
-    }
-    gen_helper_store_booke_tsr(cpu_env, cpu_gpr[gprn]);
-    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-        gen_stop_exception(ctx);
-    }
-}
-#endif
-
-/* PowerPC 403 specific registers */
-/* PBL1 / PBU1 / PBL2 / PBU2 */
-#if !defined(CONFIG_USER_ONLY)
-static void spr_read_403_pbr(DisasContext *ctx, int gprn, int sprn)
-{
-    tcg_gen_ld_tl(cpu_gpr[gprn], cpu_env,
-                  offsetof(CPUPPCState, pb[sprn - SPR_403_PBL1]));
-}
-
-static void spr_write_403_pbr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(sprn - SPR_403_PBL1);
-    gen_helper_store_403_pbr(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_pir(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], 0xF);
-    gen_store_spr(SPR_PIR, t0);
-    tcg_temp_free(t0);
-}
-#endif
-
-/* SPE specific registers */
-static void spr_read_spefscr(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    tcg_gen_ld_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
-    tcg_gen_extu_i32_tl(cpu_gpr[gprn], t0);
-    tcg_temp_free_i32(t0);
-}
-
-static void spr_write_spefscr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[gprn]);
-    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUPPCState, spe_fscr));
-    tcg_temp_free_i32(t0);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-/* Callback used to write the exception vector base */
-static void spr_write_excp_prefix(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivpr_mask));
-    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_prefix));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_excp_vector(DisasContext *ctx, int sprn, int gprn)
-{
-    int sprn_offs;
-
-    if (sprn >= SPR_BOOKE_IVOR0 && sprn <= SPR_BOOKE_IVOR15) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR0;
-    } else if (sprn >= SPR_BOOKE_IVOR32 && sprn <= SPR_BOOKE_IVOR37) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR32 + 32;
-    } else if (sprn >= SPR_BOOKE_IVOR38 && sprn <= SPR_BOOKE_IVOR42) {
-        sprn_offs = sprn - SPR_BOOKE_IVOR38 + 38;
-    } else {
-        printf("Trying to write an unknown exception vector %d %03x\n",
-               sprn, sprn);
-        gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
-        return;
-    }
-
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, ivor_mask));
-    tcg_gen_and_tl(t0, t0, cpu_gpr[gprn]);
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, excp_vectors[sprn_offs]));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-#endif
-
-static inline void vscr_init(CPUPPCState *env, uint32_t val)
-{
-    /* Altivec always uses round-to-nearest */
-    set_float_rounding_mode(float_round_nearest_even, &env->vec_status);
-    helper_mtvscr(env, val);
-}
-
-#ifdef CONFIG_USER_ONLY
-#define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
-                         oea_read, oea_write, one_reg_id, initial_value)       \
-    _spr_register(env, num, name, uea_read, uea_write, initial_value)
-#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
-                            oea_read, oea_write, hea_read, hea_write,          \
-                            one_reg_id, initial_value)                         \
-    _spr_register(env, num, name, uea_read, uea_write, initial_value)
-#else
-#if !defined(CONFIG_KVM)
-#define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
-                         oea_read, oea_write, one_reg_id, initial_value)       \
-    _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, oea_read, oea_write, initial_value)
-#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
-                            oea_read, oea_write, hea_read, hea_write,          \
-                            one_reg_id, initial_value)                         \
-    _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, hea_read, hea_write, initial_value)
-#else
-#define spr_register_kvm(env, num, name, uea_read, uea_write,                  \
-                         oea_read, oea_write, one_reg_id, initial_value)       \
-    _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, oea_read, oea_write,                    \
-                  one_reg_id, initial_value)
-#define spr_register_kvm_hv(env, num, name, uea_read, uea_write,               \
-                            oea_read, oea_write, hea_read, hea_write,          \
-                            one_reg_id, initial_value)                         \
-    _spr_register(env, num, name, uea_read, uea_write,                         \
-                  oea_read, oea_write, hea_read, hea_write,                    \
-                  one_reg_id, initial_value)
-#endif
-#endif
-
-#define spr_register(env, num, name, uea_read, uea_write,                      \
-                     oea_read, oea_write, initial_value)                       \
-    spr_register_kvm(env, num, name, uea_read, uea_write,                      \
-                     oea_read, oea_write, 0, initial_value)
-
-#define spr_register_hv(env, num, name, uea_read, uea_write,                   \
-                        oea_read, oea_write, hea_read, hea_write,              \
-                        initial_value)                                         \
-    spr_register_kvm_hv(env, num, name, uea_read, uea_write,                   \
-                        oea_read, oea_write, hea_read, hea_write,              \
-                        0, initial_value)
-
-static inline void _spr_register(CPUPPCState *env, int num,
-                                 const char *name,
-                                 void (*uea_read)(DisasContext *ctx,
-                                                  int gprn, int sprn),
-                                 void (*uea_write)(DisasContext *ctx,
-                                                   int sprn, int gprn),
-#if !defined(CONFIG_USER_ONLY)
-
-                                 void (*oea_read)(DisasContext *ctx,
-                                                  int gprn, int sprn),
-                                 void (*oea_write)(DisasContext *ctx,
-                                                   int sprn, int gprn),
-                                 void (*hea_read)(DisasContext *opaque,
-                                                  int gprn, int sprn),
-                                 void (*hea_write)(DisasContext *opaque,
-                                                   int sprn, int gprn),
-#endif
-#if defined(CONFIG_KVM)
-                                 uint64_t one_reg_id,
-#endif
-                                 target_ulong initial_value)
-{
-    ppc_spr_t *spr;
-
-    spr = &env->spr_cb[num];
-    if (spr->name != NULL || env->spr[num] != 0x00000000 ||
-#if !defined(CONFIG_USER_ONLY)
-        spr->oea_read != NULL || spr->oea_write != NULL ||
-#endif
-        spr->uea_read != NULL || spr->uea_write != NULL) {
-        printf("Error: Trying to register SPR %d (%03x) twice !\n", num, num);
-        exit(1);
-    }
-#if defined(PPC_DEBUG_SPR)
-    printf("*** register spr %d (%03x) %s val " TARGET_FMT_lx "\n", num, num,
-           name, initial_value);
-#endif
-    spr->name = name;
-    spr->uea_read = uea_read;
-    spr->uea_write = uea_write;
-#if !defined(CONFIG_USER_ONLY)
-    spr->oea_read = oea_read;
-    spr->oea_write = oea_write;
-    spr->hea_read = hea_read;
-    spr->hea_write = hea_write;
-#endif
-#if defined(CONFIG_KVM)
-    spr->one_reg_id = one_reg_id,
-#endif
-    env->spr[num] = spr->default_value = initial_value;
-}
-
-/* Generic PowerPC SPRs */
-static void gen_spr_generic(CPUPPCState *env)
-{
-    /* Integer processing */
-    spr_register(env, SPR_XER, "XER",
-                 &spr_read_xer, &spr_write_xer,
-                 &spr_read_xer, &spr_write_xer,
-                 0x00000000);
-    /* Branch control */
-    spr_register(env, SPR_LR, "LR",
-                 &spr_read_lr, &spr_write_lr,
-                 &spr_read_lr, &spr_write_lr,
-                 0x00000000);
-    spr_register(env, SPR_CTR, "CTR",
-                 &spr_read_ctr, &spr_write_ctr,
-                 &spr_read_ctr, &spr_write_ctr,
-                 0x00000000);
-    /* Interrupt processing */
-    spr_register(env, SPR_SRR0, "SRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SRR1, "SRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Processor control */
-    spr_register(env, SPR_SPRG0, "SPRG0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG1, "SPRG1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG2, "SPRG2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG3, "SPRG3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR common to all non-embedded PowerPC, including 601 */
-static void gen_spr_ne_601(CPUPPCState *env)
-{
-    /* Exception processing */
-    spr_register_kvm(env, SPR_DSISR, "DSISR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DSISR, 0x00000000);
-    spr_register_kvm(env, SPR_DAR, "DAR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DAR, 0x00000000);
-    /* Timer */
-    spr_register(env, SPR_DECR, "DECR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_decr, &spr_write_decr,
-                 0x00000000);
-}
-
-/* Storage Description Register 1 */
-static void gen_spr_sdr1(CPUPPCState *env)
-{
-#ifndef CONFIG_USER_ONLY
-    if (env->has_hv_mode) {
-        /*
-         * SDR1 is a hypervisor resource on CPUs which have a
-         * hypervisor mode
-         */
-        spr_register_hv(env, SPR_SDR1, "SDR1",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_sdr1,
-                        0x00000000);
-    } else {
-        spr_register(env, SPR_SDR1, "SDR1",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_sdr1,
-                     0x00000000);
-    }
-#endif
-}
-
-/* BATs 0-3 */
-static void gen_low_BATs(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register(env, SPR_IBAT0U, "IBAT0U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT0L, "IBAT0L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT1U, "IBAT1U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT1L, "IBAT1L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT2U, "IBAT2U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT2L, "IBAT2L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT3U, "IBAT3U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT3L, "IBAT3L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat, &spr_write_ibatl,
-                 0x00000000);
-    spr_register(env, SPR_DBAT0U, "DBAT0U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatu,
-                 0x00000000);
-    spr_register(env, SPR_DBAT0L, "DBAT0L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatl,
-                 0x00000000);
-    spr_register(env, SPR_DBAT1U, "DBAT1U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatu,
-                 0x00000000);
-    spr_register(env, SPR_DBAT1L, "DBAT1L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatl,
-                 0x00000000);
-    spr_register(env, SPR_DBAT2U, "DBAT2U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatu,
-                 0x00000000);
-    spr_register(env, SPR_DBAT2L, "DBAT2L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatl,
-                 0x00000000);
-    spr_register(env, SPR_DBAT3U, "DBAT3U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatu,
-                 0x00000000);
-    spr_register(env, SPR_DBAT3L, "DBAT3L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat, &spr_write_dbatl,
-                 0x00000000);
-    env->nb_BATs += 4;
-#endif
-}
-
-/* BATs 4-7 */
-static void gen_high_BATs(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register(env, SPR_IBAT4U, "IBAT4U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatu_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT4L, "IBAT4L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatl_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT5U, "IBAT5U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatu_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT5L, "IBAT5L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatl_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT6U, "IBAT6U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatu_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT6L, "IBAT6L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatl_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT7U, "IBAT7U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatu_h,
-                 0x00000000);
-    spr_register(env, SPR_IBAT7L, "IBAT7L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_ibat_h, &spr_write_ibatl_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT4U, "DBAT4U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatu_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT4L, "DBAT4L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatl_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT5U, "DBAT5U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatu_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT5L, "DBAT5L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatl_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT6U, "DBAT6U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatu_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT6L, "DBAT6L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatl_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT7U, "DBAT7U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatu_h,
-                 0x00000000);
-    spr_register(env, SPR_DBAT7L, "DBAT7L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_dbat_h, &spr_write_dbatl_h,
-                 0x00000000);
-    env->nb_BATs += 4;
-#endif
-}
-
-/* Generic PowerPC time base */
-static void gen_tbl(CPUPPCState *env)
-{
-    spr_register(env, SPR_VTBL,  "TBL",
-                 &spr_read_tbl, SPR_NOACCESS,
-                 &spr_read_tbl, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_TBL,   "TBL",
-                 &spr_read_tbl, SPR_NOACCESS,
-                 &spr_read_tbl, &spr_write_tbl,
-                 0x00000000);
-    spr_register(env, SPR_VTBU,  "TBU",
-                 &spr_read_tbu, SPR_NOACCESS,
-                 &spr_read_tbu, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_TBU,   "TBU",
-                 &spr_read_tbu, SPR_NOACCESS,
-                 &spr_read_tbu, &spr_write_tbu,
-                 0x00000000);
-}
-
-/* Softare table search registers */
-static void gen_6xx_7xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = nb_tlbs;
-    env->nb_ways = nb_ways;
-    env->id_tlbs = 1;
-    env->tlb_type = TLB_6XX;
-    spr_register(env, SPR_DMISS, "DMISS",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_DCMP, "DCMP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_HASH1, "HASH1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_HASH2, "HASH2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_IMISS, "IMISS",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_ICMP, "ICMP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_RPA, "RPA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-#endif
-}
-
-/* SPR common to MPC755 and G2 */
-static void gen_spr_G2_755(CPUPPCState *env)
-{
-    /* SGPRs */
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR common to all 7xx PowerPC implementations */
-static void gen_spr_7xx(CPUPPCState *env)
-{
-    /* Breakpoints */
-    /* XXX : not implemented */
-    spr_register_kvm(env, SPR_DABR, "DABR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DABR, 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Cache management */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTC, "ICTC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Performance monitors */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_MMCR0, "MMCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_MMCR1, "MMCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC1, "PMC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC2, "PMC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC3, "PMC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC4, "PMC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_SIAR, "SIAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UMMCR0, "UMMCR0",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UMMCR1, "UMMCR1",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC1, "UPMC1",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC2, "UPMC2",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC3, "UPMC3",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC4, "UPMC4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_USIAR, "USIAR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-#ifdef TARGET_PPC64
-#ifndef CONFIG_USER_ONLY
-static void spr_write_amr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 PR=0 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    if (ctx->pr) {
-        gen_load_spr(t1, SPR_UAMOR);
-    } else {
-        gen_load_spr(t1, SPR_AMOR);
-    }
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_AMR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_AMR, t0);
-    spr_store_dump_spr(SPR_AMR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void spr_write_uamor(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    gen_load_spr(t1, SPR_AMOR);
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_UAMOR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_UAMOR, t0);
-    spr_store_dump_spr(SPR_UAMOR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-
-static void spr_write_iamr(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    TCGv t2 = tcg_temp_new();
-
-    /*
-     * Note, the HV=1 case is handled earlier by simply using
-     * spr_write_generic for HV mode in the SPR table
-     */
-
-    /* Build insertion mask into t1 based on context */
-    gen_load_spr(t1, SPR_AMOR);
-
-    /* Mask new bits into t2 */
-    tcg_gen_and_tl(t2, t1, cpu_gpr[gprn]);
-
-    /* Load AMR and clear new bits in t0 */
-    gen_load_spr(t0, SPR_IAMR);
-    tcg_gen_andc_tl(t0, t0, t1);
-
-    /* Or'in new bits and write it out */
-    tcg_gen_or_tl(t0, t0, t2);
-    gen_store_spr(SPR_IAMR, t0);
-    spr_store_dump_spr(SPR_IAMR);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    tcg_temp_free(t2);
-}
-#endif /* CONFIG_USER_ONLY */
-
-static void gen_spr_amr(CPUPPCState *env)
-{
-#ifndef CONFIG_USER_ONLY
-    /*
-     * Virtual Page Class Key protection
-     *
-     * The AMR is accessible either via SPR 13 or SPR 29.  13 is
-     * userspace accessible, 29 is privileged.  So we only need to set
-     * the kvm ONE_REG id on one of them, we use 29
-     */
-    spr_register(env, SPR_UAMR, "UAMR",
-                 &spr_read_generic, &spr_write_amr,
-                 &spr_read_generic, &spr_write_amr,
-                 0);
-    spr_register_kvm_hv(env, SPR_AMR, "AMR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_amr,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_AMR, 0);
-    spr_register_kvm_hv(env, SPR_UAMOR, "UAMOR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_uamor,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_UAMOR, 0);
-    spr_register_hv(env, SPR_AMOR, "AMOR",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0);
-#endif /* !CONFIG_USER_ONLY */
-}
-
-static void gen_spr_iamr(CPUPPCState *env)
-{
-#ifndef CONFIG_USER_ONLY
-    spr_register_kvm_hv(env, SPR_IAMR, "IAMR",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_iamr,
-                        &spr_read_generic, &spr_write_generic,
-                        KVM_REG_PPC_IAMR, 0);
-#endif /* !CONFIG_USER_ONLY */
-}
-#endif /* TARGET_PPC64 */
-
-#ifndef CONFIG_USER_ONLY
-static void spr_read_thrm(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_helper_fixup_thrm(cpu_env);
-    gen_load_spr(cpu_gpr[gprn], sprn);
-    spr_load_dump_spr(sprn);
-}
-#endif /* !CONFIG_USER_ONLY */
-
-static void gen_spr_thrm(CPUPPCState *env)
-{
-    /* Thermal management */
-    /* XXX : not implemented */
-    spr_register(env, SPR_THRM1, "THRM1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_thrm, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_THRM2, "THRM2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_thrm, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_THRM3, "THRM3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_thrm, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 604 implementation */
-static void gen_spr_604(CPUPPCState *env)
-{
-    /* Processor identification */
-    spr_register(env, SPR_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* Breakpoints */
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register_kvm(env, SPR_DABR, "DABR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DABR, 0x00000000);
-    /* Performance counters */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_MMCR0, "MMCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC1, "PMC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC2, "PMC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_SIAR, "SIAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_SDA, "SDA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 603 implementation */
-static void gen_spr_603(CPUPPCState *env)
-{
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Breakpoints */
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-
-}
-
-/* SPR specific to PowerPC G2 implementation */
-static void gen_spr_G2(CPUPPCState *env)
-{
-    /* Memory base address */
-    /* MBAR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MBAR, "MBAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Exception processing */
-    spr_register(env, SPR_BOOKE_CSRR0, "CSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_CSRR1, "CSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Breakpoints */
-    /* XXX : not implemented */
-    spr_register(env, SPR_DABR, "DABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_DABR2, "DABR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR2, "IABR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IBCR, "IBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_DBCR, "DBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 602 implementation */
-static void gen_spr_602(CPUPPCState *env)
-{
-    /* ESA registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_SER, "SER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_SEBR, "SEBR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_ESASRR, "ESASRR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Floating point status */
-    /* XXX : not implemented */
-    spr_register(env, SPR_SP, "SP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_LT, "LT",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Watchdog timer */
-    /* XXX : not implemented */
-    spr_register(env, SPR_TCR, "TCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Interrupt base */
-    spr_register(env, SPR_IBR, "IBR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 601 implementation */
-static void gen_spr_601(CPUPPCState *env)
-{
-    /* Multiplication/division register */
-    /* MQ */
-    spr_register(env, SPR_MQ, "MQ",
-                 &spr_read_generic, &spr_write_generic,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* RTC registers */
-    spr_register(env, SPR_601_RTCU, "RTCU",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_601_rtcu,
-                 0x00000000);
-    spr_register(env, SPR_601_VRTCU, "RTCU",
-                 &spr_read_601_rtcu, SPR_NOACCESS,
-                 &spr_read_601_rtcu, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_601_RTCL, "RTCL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_601_rtcl,
-                 0x00000000);
-    spr_register(env, SPR_601_VRTCL, "RTCL",
-                 &spr_read_601_rtcl, SPR_NOACCESS,
-                 &spr_read_601_rtcl, SPR_NOACCESS,
-                 0x00000000);
-    /* Timer */
-#if 0 /* ? */
-    spr_register(env, SPR_601_UDECR, "UDECR",
-                 &spr_read_decr, SPR_NOACCESS,
-                 &spr_read_decr, SPR_NOACCESS,
-                 0x00000000);
-#endif
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    spr_register(env, SPR_IBAT0U, "IBAT0U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT0L, "IBAT0L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT1U, "IBAT1U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT1L, "IBAT1L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT2U, "IBAT2U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT2L, "IBAT2L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatl,
-                 0x00000000);
-    spr_register(env, SPR_IBAT3U, "IBAT3U",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatu,
-                 0x00000000);
-    spr_register(env, SPR_IBAT3L, "IBAT3L",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_601_ubat, &spr_write_601_ubatl,
-                 0x00000000);
-    env->nb_BATs = 4;
-#endif
-}
-
-static void gen_spr_74xx(CPUPPCState *env)
-{
-    /* Processor identification */
-    spr_register(env, SPR_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_74XX_MMCR2, "MMCR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_74XX_UMMCR2, "UMMCR2",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX: not implemented */
-    spr_register(env, SPR_BAMR, "BAMR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSCR0, "MSSCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Altivec */
-    spr_register(env, SPR_VRSAVE, "VRSAVE",
-                 &spr_read_generic, &spr_write_generic,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Not strictly an SPR */
-    vscr_init(env, 0x00010000);
-}
-
-static void gen_l3_ctrl(CPUPPCState *env)
-{
-    /* L3CR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3CR, "L3CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3ITCR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR0, "L3ITCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3PM */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3PM, "L3PM",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_74xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = nb_tlbs;
-    env->nb_ways = nb_ways;
-    env->id_tlbs = 1;
-    env->tlb_type = TLB_6XX;
-    /* XXX : not implemented */
-    spr_register(env, SPR_PTEHI, "PTEHI",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_PTELO, "PTELO",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_TLBMISS, "TLBMISS",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-#endif
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_e500_l1csr0(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR0_DCE | L1CSR0_CPE);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_e500_l1csr1(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn], L1CSR1_ICE | L1CSR1_CPE);
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_e500_l2csr0(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv t0 = tcg_temp_new();
-
-    tcg_gen_andi_tl(t0, cpu_gpr[gprn],
-                    ~(E500_L2CSR0_L2FI | E500_L2CSR0_L2FL | E500_L2CSR0_L2LFC));
-    gen_store_spr(sprn, t0);
-    tcg_temp_free(t0);
-}
-
-static void spr_write_booke206_mmucsr0(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke206_tlbflush(cpu_env, cpu_gpr[gprn]);
-}
-
-static void spr_write_booke_pid(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv_i32 t0 = tcg_const_i32(sprn);
-    gen_helper_booke_setpid(cpu_env, t0, cpu_gpr[gprn]);
-    tcg_temp_free_i32(t0);
-}
-static void spr_write_eplc(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke_set_eplc(cpu_env, cpu_gpr[gprn]);
-}
-static void spr_write_epsc(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_booke_set_epsc(cpu_env, cpu_gpr[gprn]);
-}
-
-#endif
-
-static void gen_spr_usprg3(CPUPPCState *env)
-{
-    spr_register(env, SPR_USPRG3, "USPRG3",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-}
-
-static void gen_spr_usprgh(CPUPPCState *env)
-{
-    spr_register(env, SPR_USPRG4, "USPRG4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_USPRG5, "USPRG5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_USPRG6, "USPRG6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_USPRG7, "USPRG7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-}
-
-/* PowerPC BookE SPR */
-static void gen_spr_BookE(CPUPPCState *env, uint64_t ivor_mask)
-{
-    const char *ivor_names[64] = {
-        "IVOR0",  "IVOR1",  "IVOR2",  "IVOR3",
-        "IVOR4",  "IVOR5",  "IVOR6",  "IVOR7",
-        "IVOR8",  "IVOR9",  "IVOR10", "IVOR11",
-        "IVOR12", "IVOR13", "IVOR14", "IVOR15",
-        "IVOR16", "IVOR17", "IVOR18", "IVOR19",
-        "IVOR20", "IVOR21", "IVOR22", "IVOR23",
-        "IVOR24", "IVOR25", "IVOR26", "IVOR27",
-        "IVOR28", "IVOR29", "IVOR30", "IVOR31",
-        "IVOR32", "IVOR33", "IVOR34", "IVOR35",
-        "IVOR36", "IVOR37", "IVOR38", "IVOR39",
-        "IVOR40", "IVOR41", "IVOR42", "IVOR43",
-        "IVOR44", "IVOR45", "IVOR46", "IVOR47",
-        "IVOR48", "IVOR49", "IVOR50", "IVOR51",
-        "IVOR52", "IVOR53", "IVOR54", "IVOR55",
-        "IVOR56", "IVOR57", "IVOR58", "IVOR59",
-        "IVOR60", "IVOR61", "IVOR62", "IVOR63",
-    };
-#define SPR_BOOKE_IVORxx (-1)
-    int ivor_sprn[64] = {
-        SPR_BOOKE_IVOR0,  SPR_BOOKE_IVOR1,  SPR_BOOKE_IVOR2,  SPR_BOOKE_IVOR3,
-        SPR_BOOKE_IVOR4,  SPR_BOOKE_IVOR5,  SPR_BOOKE_IVOR6,  SPR_BOOKE_IVOR7,
-        SPR_BOOKE_IVOR8,  SPR_BOOKE_IVOR9,  SPR_BOOKE_IVOR10, SPR_BOOKE_IVOR11,
-        SPR_BOOKE_IVOR12, SPR_BOOKE_IVOR13, SPR_BOOKE_IVOR14, SPR_BOOKE_IVOR15,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVOR32, SPR_BOOKE_IVOR33, SPR_BOOKE_IVOR34, SPR_BOOKE_IVOR35,
-        SPR_BOOKE_IVOR36, SPR_BOOKE_IVOR37, SPR_BOOKE_IVOR38, SPR_BOOKE_IVOR39,
-        SPR_BOOKE_IVOR40, SPR_BOOKE_IVOR41, SPR_BOOKE_IVOR42, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-        SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx, SPR_BOOKE_IVORxx,
-    };
-    int i;
-
-    /* Interrupt processing */
-    spr_register(env, SPR_BOOKE_CSRR0, "CSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_CSRR1, "CSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Debug */
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC1, "IAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC2, "IAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DAC1, "DAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DAC2, "DAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DBCR0, "DBCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_dbcr0,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DBCR1, "DBCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DBCR2, "DBCR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DBSR, "DBSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_DEAR, "DEAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_ESR, "ESR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_IVPR, "IVPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_excp_prefix,
-                 0x00000000);
-    /* Exception vectors */
-    for (i = 0; i < 64; i++) {
-        if (ivor_mask & (1ULL << i)) {
-            if (ivor_sprn[i] == SPR_BOOKE_IVORxx) {
-                fprintf(stderr, "ERROR: IVOR %d SPR is not defined\n", i);
-                exit(1);
-            }
-            spr_register(env, ivor_sprn[i], ivor_names[i],
-                         SPR_NOACCESS, SPR_NOACCESS,
-                         &spr_read_generic, &spr_write_excp_vector,
-                         0x00000000);
-        }
-    }
-    spr_register(env, SPR_BOOKE_PID, "PID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke_pid,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_TCR, "TCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke_tcr,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_TSR, "TSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke_tsr,
-                 0x00000000);
-    /* Timer */
-    spr_register(env, SPR_DECR, "DECR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_decr, &spr_write_decr,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_DECAR, "DECAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_generic,
-                 0x00000000);
-    /* SPRGs */
-    spr_register(env, SPR_USPRG0, "USPRG0",
-                 &spr_read_generic, &spr_write_generic,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_SPRG8, "SPRG8",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_SPRG9, "SPRG9",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static inline uint32_t gen_tlbncfg(uint32_t assoc, uint32_t minsize,
-                                   uint32_t maxsize, uint32_t flags,
-                                   uint32_t nentries)
-{
-    return (assoc << TLBnCFG_ASSOC_SHIFT) |
-           (minsize << TLBnCFG_MINSIZE_SHIFT) |
-           (maxsize << TLBnCFG_MAXSIZE_SHIFT) |
-           flags | nentries;
-}
-
-/* BookE 2.06 storage control registers */
-static void gen_spr_BookE206(CPUPPCState *env, uint32_t mas_mask,
-                             uint32_t *tlbncfg, uint32_t mmucfg)
-{
-#if !defined(CONFIG_USER_ONLY)
-    const char *mas_names[8] = {
-        "MAS0", "MAS1", "MAS2", "MAS3", "MAS4", "MAS5", "MAS6", "MAS7",
-    };
-    int mas_sprn[8] = {
-        SPR_BOOKE_MAS0, SPR_BOOKE_MAS1, SPR_BOOKE_MAS2, SPR_BOOKE_MAS3,
-        SPR_BOOKE_MAS4, SPR_BOOKE_MAS5, SPR_BOOKE_MAS6, SPR_BOOKE_MAS7,
-    };
-    int i;
-
-    /* TLB assist registers */
-    /* XXX : not implemented */
-    for (i = 0; i < 8; i++) {
-        void (*uea_write)(DisasContext *ctx, int sprn, int gprn) =
-            &spr_write_generic32;
-        if (i == 2 && (mas_mask & (1 << i)) && (env->insns_flags & PPC_64B)) {
-            uea_write = &spr_write_generic;
-        }
-        if (mas_mask & (1 << i)) {
-            spr_register(env, mas_sprn[i], mas_names[i],
-                         SPR_NOACCESS, SPR_NOACCESS,
-                         &spr_read_generic, uea_write,
-                         0x00000000);
-        }
-    }
-    if (env->nb_pids > 1) {
-        /* XXX : not implemented */
-        spr_register(env, SPR_BOOKE_PID1, "PID1",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_booke_pid,
-                     0x00000000);
-    }
-    if (env->nb_pids > 2) {
-        /* XXX : not implemented */
-        spr_register(env, SPR_BOOKE_PID2, "PID2",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_booke_pid,
-                     0x00000000);
-    }
-
-    spr_register(env, SPR_BOOKE_EPLC, "EPLC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_eplc,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_EPSC, "EPSC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_epsc,
-                 0x00000000);
-
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCFG, "MMUCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 mmucfg);
-    switch (env->nb_ways) {
-    case 4:
-        spr_register(env, SPR_BOOKE_TLB3CFG, "TLB3CFG",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     tlbncfg[3]);
-        /* Fallthru */
-    case 3:
-        spr_register(env, SPR_BOOKE_TLB2CFG, "TLB2CFG",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     tlbncfg[2]);
-        /* Fallthru */
-    case 2:
-        spr_register(env, SPR_BOOKE_TLB1CFG, "TLB1CFG",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     tlbncfg[1]);
-        /* Fallthru */
-    case 1:
-        spr_register(env, SPR_BOOKE_TLB0CFG, "TLB0CFG",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     tlbncfg[0]);
-        /* Fallthru */
-    case 0:
-    default:
-        break;
-    }
-#endif
-
-    gen_spr_usprgh(env);
-}
-
-/* SPR specific to PowerPC 440 implementation */
-static void gen_spr_440(CPUPPCState *env)
-{
-    /* Cache control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DNV0, "DNV0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DNV1, "DNV1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DNV2, "DNV2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DNV3, "DNV3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DTV0, "DTV0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DTV1, "DTV1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DTV2, "DTV2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DTV3, "DTV3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DVLIM, "DVLIM",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_INV0, "INV0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_INV1, "INV1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_INV2, "INV2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_INV3, "INV3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_ITV0, "ITV0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_ITV1, "ITV1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_ITV2, "ITV2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_ITV3, "ITV3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_IVLIM, "IVLIM",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Cache debug */
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DCDBTRH, "DCDBTRH",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DCDBTRL, "DCDBTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_ICDBDR, "ICDBDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_ICDBTRH, "ICDBTRH",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_ICDBTRL, "ICDBTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_DBDR, "DBDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Processor control */
-    spr_register(env, SPR_4xx_CCR0, "CCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_440_RSTCFG, "RSTCFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* Storage control */
-    spr_register(env, SPR_440_MMUCR, "MMUCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR shared between PowerPC 40x implementations */
-static void gen_spr_40x(CPUPPCState *env)
-{
-    /* Cache */
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_40x_DCCR, "DCCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_40x_ICCR, "ICCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_BOOKE_ICDBDR, "ICDBDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* Exception */
-    spr_register(env, SPR_40x_DEAR, "DEAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_ESR, "ESR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_EVPR, "EVPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_excp_prefix,
-                 0x00000000);
-    spr_register(env, SPR_40x_SRR2, "SRR2",
-                 &spr_read_generic, &spr_write_generic,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_SRR3, "SRR3",
-                 &spr_read_generic, &spr_write_generic,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Timers */
-    spr_register(env, SPR_40x_PIT, "PIT",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_40x_pit, &spr_write_40x_pit,
-                 0x00000000);
-    spr_register(env, SPR_40x_TCR, "TCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke_tcr,
-                 0x00000000);
-    spr_register(env, SPR_40x_TSR, "TSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke_tsr,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 405 implementation */
-static void gen_spr_405(CPUPPCState *env)
-{
-    /* MMU */
-    spr_register(env, SPR_40x_PID, "PID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_4xx_CCR0, "CCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00700000);
-    /* Debug interface */
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBCR0, "DBCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_dbcr0,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_DBCR1, "DBCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBSR, "DBSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 /* Last reset was system reset */
-                 0x00000300);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DAC1, "DAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_DAC2, "DAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_DVC1, "DVC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_DVC2, "DVC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_IAC1, "IAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_IAC2, "IAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Storage control */
-    /* XXX: TODO: not implemented */
-    spr_register(env, SPR_405_SLER, "SLER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_sler,
-                 0x00000000);
-    spr_register(env, SPR_40x_ZPR, "ZPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_405_SU0R, "SU0R",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* SPRG */
-    spr_register(env, SPR_USPRG0, "USPRG0",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    gen_spr_usprgh(env);
-}
-
-/* SPR shared between PowerPC 401 & 403 implementations */
-static void gen_spr_401_403(CPUPPCState *env)
-{
-    /* Time base */
-    spr_register(env, SPR_403_VTBL,  "TBL",
-                 &spr_read_tbl, SPR_NOACCESS,
-                 &spr_read_tbl, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_403_TBL,   "TBL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_tbl,
-                 0x00000000);
-    spr_register(env, SPR_403_VTBU,  "TBU",
-                 &spr_read_tbu, SPR_NOACCESS,
-                 &spr_read_tbu, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_403_TBU,   "TBU",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_tbu,
-                 0x00000000);
-    /* Debug */
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_403_CDBCR, "CDBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 401 implementation */
-static void gen_spr_401(CPUPPCState *env)
-{
-    /* Debug interface */
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBCR0, "DBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_dbcr0,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBSR, "DBSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 /* Last reset was system reset */
-                 0x00000300);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DAC1, "DAC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_IAC1, "IAC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Storage control */
-    /* XXX: TODO: not implemented */
-    spr_register(env, SPR_405_SLER, "SLER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_sler,
-                 0x00000000);
-    /* not emulated, as QEMU never does speculative access */
-    spr_register(env, SPR_40x_SGR, "SGR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0xFFFFFFFF);
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_40x_DCWR, "DCWR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_401x2(CPUPPCState *env)
-{
-    gen_spr_401(env);
-    spr_register(env, SPR_40x_PID, "PID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_ZPR, "ZPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC 403 implementation */
-static void gen_spr_403(CPUPPCState *env)
-{
-    /* Debug interface */
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBCR0, "DBCR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_40x_dbcr0,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DBSR, "DBSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 /* Last reset was system reset */
-                 0x00000300);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DAC1, "DAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_DAC2, "DAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_IAC1, "IAC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_40x_IAC2, "IAC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_403_real(CPUPPCState *env)
-{
-    spr_register(env, SPR_403_PBL1,  "PBL1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_403_pbr, &spr_write_403_pbr,
-                 0x00000000);
-    spr_register(env, SPR_403_PBU1,  "PBU1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_403_pbr, &spr_write_403_pbr,
-                 0x00000000);
-    spr_register(env, SPR_403_PBL2,  "PBL2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_403_pbr, &spr_write_403_pbr,
-                 0x00000000);
-    spr_register(env, SPR_403_PBU2,  "PBU2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_403_pbr, &spr_write_403_pbr,
-                 0x00000000);
-}
-
-static void gen_spr_403_mmu(CPUPPCState *env)
-{
-    /* MMU */
-    spr_register(env, SPR_40x_PID, "PID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_40x_ZPR, "ZPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/* SPR specific to PowerPC compression coprocessor extension */
-static void gen_spr_compress(CPUPPCState *env)
-{
-    /* XXX : not implemented */
-    spr_register(env, SPR_401_SKR, "SKR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_5xx_8xx(CPUPPCState *env)
-{
-    /* Exception processing */
-    spr_register_kvm(env, SPR_DSISR, "DSISR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DSISR, 0x00000000);
-    spr_register_kvm(env, SPR_DAR, "DAR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DAR, 0x00000000);
-    /* Timer */
-    spr_register(env, SPR_DECR, "DECR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_decr, &spr_write_decr,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_EIE, "EIE",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_EID, "EID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_NRI, "NRI",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPA, "CMPA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPB, "CMPB",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPC, "CMPC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPD, "CMPD",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_ECR, "ECR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_DER, "DER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_COUNTA, "COUNTA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_COUNTB, "COUNTB",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPE, "CMPE",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPF, "CMPF",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPG, "CMPG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_CMPH, "CMPH",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_LCTRL1, "LCTRL1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_LCTRL2, "LCTRL2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_BAR, "BAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_DPDR, "DPDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_IMMR, "IMMR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_5xx(CPUPPCState *env)
-{
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_GRA, "MI_GRA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_GRA, "L2U_GRA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RPCU_BBCMCR, "L2U_BBCMCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_MCR, "L2U_MCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RBA0, "MI_RBA0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RBA1, "MI_RBA1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RBA2, "MI_RBA2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RBA3, "MI_RBA3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RBA0, "L2U_RBA0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RBA1, "L2U_RBA1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RBA2, "L2U_RBA2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RBA3, "L2U_RBA3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RA0, "MI_RA0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RA1, "MI_RA1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RA2, "MI_RA2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_MI_RA3, "MI_RA3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RA0, "L2U_RA0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RA1, "L2U_RA1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RA2, "L2U_RA2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_L2U_RA3, "L2U_RA3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_RCPU_FPECR, "FPECR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_8xx(CPUPPCState *env)
-{
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_IC_CST, "IC_CST",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_IC_ADR, "IC_ADR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_IC_DAT, "IC_DAT",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_DC_CST, "DC_CST",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_DC_ADR, "DC_ADR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_DC_DAT, "DC_DAT",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_CTR, "MI_CTR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_AP, "MI_AP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_EPN, "MI_EPN",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_TWC, "MI_TWC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_RPN, "MI_RPN",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_DBCAM, "MI_DBCAM",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_DBRAM0, "MI_DBRAM0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MI_DBRAM1, "MI_DBRAM1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_CTR, "MD_CTR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_CASID, "MD_CASID",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_AP, "MD_AP",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_EPN, "MD_EPN",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_TWB, "MD_TWB",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_TWC, "MD_TWC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_RPN, "MD_RPN",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_TW, "MD_TW",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_DBCAM, "MD_DBCAM",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_DBRAM0, "MD_DBRAM0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MPC_MD_DBRAM1, "MD_DBRAM1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-/*
- * AMR     => SPR 29 (Power 2.04)
- * CTRL    => SPR 136 (Power 2.04)
- * CTRL    => SPR 152 (Power 2.04)
- * SCOMC   => SPR 276 (64 bits ?)
- * SCOMD   => SPR 277 (64 bits ?)
- * TBU40   => SPR 286 (Power 2.04 hypv)
- * HSPRG0  => SPR 304 (Power 2.04 hypv)
- * HSPRG1  => SPR 305 (Power 2.04 hypv)
- * HDSISR  => SPR 306 (Power 2.04 hypv)
- * HDAR    => SPR 307 (Power 2.04 hypv)
- * PURR    => SPR 309 (Power 2.04 hypv)
- * HDEC    => SPR 310 (Power 2.04 hypv)
- * HIOR    => SPR 311 (hypv)
- * RMOR    => SPR 312 (970)
- * HRMOR   => SPR 313 (Power 2.04 hypv)
- * HSRR0   => SPR 314 (Power 2.04 hypv)
- * HSRR1   => SPR 315 (Power 2.04 hypv)
- * LPIDR   => SPR 317 (970)
- * EPR     => SPR 702 (Power 2.04 emb)
- * perf    => 768-783 (Power 2.04)
- * perf    => 784-799 (Power 2.04)
- * PPR     => SPR 896 (Power 2.04)
- * DABRX   => 1015    (Power 2.04 hypv)
- * FPECR   => SPR 1022 (?)
- * ... and more (thermal management, performance counters, ...)
- */
-
-/*****************************************************************************/
-/* Exception vectors models                                                  */
-static void init_excp_4xx_real(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_PIT]      = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00001010;
-    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001020;
-    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00002000;
-    env->ivor_mask = 0x0000FFF0UL;
-    env->ivpr_mask = 0xFFFF0000UL;
-    /* Hardware reset vector */
-    env->hreset_vector = 0xFFFFFFFCUL;
-#endif
-}
-
-static void init_excp_4xx_softmmu(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_PIT]      = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00001010;
-    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001020;
-    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00002000;
-    env->ivor_mask = 0x0000FFF0UL;
-    env->ivpr_mask = 0xFFFF0000UL;
-    /* Hardware reset vector */
-    env->hreset_vector = 0xFFFFFFFCUL;
-#endif
-}
-
-static void init_excp_MPC5xx(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_FPA]      = 0x00000E00;
-    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DABR]     = 0x00001C00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001C00;
-    env->excp_vectors[POWERPC_EXCP_MEXTBR]   = 0x00001E00;
-    env->excp_vectors[POWERPC_EXCP_NMEXTBR]  = 0x00001F00;
-    env->ivor_mask = 0x0000FFF0UL;
-    env->ivpr_mask = 0xFFFF0000UL;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_MPC8xx(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_FPA]      = 0x00000E00;
-    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_ITLBE]    = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_DTLBE]    = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_DABR]     = 0x00001C00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001C00;
-    env->excp_vectors[POWERPC_EXCP_MEXTBR]   = 0x00001E00;
-    env->excp_vectors[POWERPC_EXCP_NMEXTBR]  = 0x00001F00;
-    env->ivor_mask = 0x0000FFF0UL;
-    env->ivpr_mask = 0xFFFF0000UL;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_G2(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000A00;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_e200(CPUPPCState *env, target_ulong ivpr_mask)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000FFC;
-    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_APU]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_SPEU]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_EFPDI]    = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_EFPRI]    = 0x00000000;
-    env->ivor_mask = 0x0000FFF7UL;
-    env->ivpr_mask = ivpr_mask;
-    /* Hardware reset vector */
-    env->hreset_vector = 0xFFFFFFFCUL;
-#endif
-}
-
-static void init_excp_BookE(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_CRITICAL] = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_APU]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_FIT]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DTLB]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_ITLB]     = 0x00000000;
-    env->excp_vectors[POWERPC_EXCP_DEBUG]    = 0x00000000;
-    env->ivor_mask = 0x0000FFF0UL;
-    env->ivpr_mask = 0xFFFF0000UL;
-    /* Hardware reset vector */
-    env->hreset_vector = 0xFFFFFFFCUL;
-#endif
-}
-
-static void init_excp_601(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_IO]       = 0x00000A00;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_RUNM]     = 0x00002000;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_602(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /* XXX: exception prefix has a special behavior on 602 */
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_WDT]      = 0x00001500;
-    env->excp_vectors[POWERPC_EXCP_EMUL]     = 0x00001600;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_603(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_604(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_7x0(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_750cl(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_750cx(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-/* XXX: Check if this is correct */
-static void init_excp_7x5(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_7400(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001600;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001700;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-static void init_excp_7450(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
-    env->excp_vectors[POWERPC_EXCP_IFTLB]    = 0x00001000;
-    env->excp_vectors[POWERPC_EXCP_DLTLB]    = 0x00001100;
-    env->excp_vectors[POWERPC_EXCP_DSTLB]    = 0x00001200;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_SMI]      = 0x00001400;
-    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001600;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x00000100UL;
-#endif
-}
-
-#if defined(TARGET_PPC64)
-static void init_excp_970(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_DSEG]     = 0x00000380;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_ISEG]     = 0x00000480;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_HDECR]    = 0x00000980;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
-    env->excp_vectors[POWERPC_EXCP_IABR]     = 0x00001300;
-    env->excp_vectors[POWERPC_EXCP_MAINT]    = 0x00001600;
-    env->excp_vectors[POWERPC_EXCP_VPUA]     = 0x00001700;
-    env->excp_vectors[POWERPC_EXCP_THERM]    = 0x00001800;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x0000000000000100ULL;
-#endif
-}
-
-static void init_excp_POWER7(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_RESET]    = 0x00000100;
-    env->excp_vectors[POWERPC_EXCP_MCHECK]   = 0x00000200;
-    env->excp_vectors[POWERPC_EXCP_DSI]      = 0x00000300;
-    env->excp_vectors[POWERPC_EXCP_DSEG]     = 0x00000380;
-    env->excp_vectors[POWERPC_EXCP_ISI]      = 0x00000400;
-    env->excp_vectors[POWERPC_EXCP_ISEG]     = 0x00000480;
-    env->excp_vectors[POWERPC_EXCP_EXTERNAL] = 0x00000500;
-    env->excp_vectors[POWERPC_EXCP_ALIGN]    = 0x00000600;
-    env->excp_vectors[POWERPC_EXCP_PROGRAM]  = 0x00000700;
-    env->excp_vectors[POWERPC_EXCP_FPU]      = 0x00000800;
-    env->excp_vectors[POWERPC_EXCP_DECR]     = 0x00000900;
-    env->excp_vectors[POWERPC_EXCP_HDECR]    = 0x00000980;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL]  = 0x00000C00;
-    env->excp_vectors[POWERPC_EXCP_TRACE]    = 0x00000D00;
-    env->excp_vectors[POWERPC_EXCP_HDSI]     = 0x00000E00;
-    env->excp_vectors[POWERPC_EXCP_HISI]     = 0x00000E20;
-    env->excp_vectors[POWERPC_EXCP_HV_EMU]   = 0x00000E40;
-    env->excp_vectors[POWERPC_EXCP_HV_MAINT] = 0x00000E60;
-    env->excp_vectors[POWERPC_EXCP_PERFM]    = 0x00000F00;
-    env->excp_vectors[POWERPC_EXCP_VPU]      = 0x00000F20;
-    env->excp_vectors[POWERPC_EXCP_VSXU]     = 0x00000F40;
-    /* Hardware reset vector */
-    env->hreset_vector = 0x0000000000000100ULL;
-#endif
-}
-
-static void init_excp_POWER8(CPUPPCState *env)
-{
-    init_excp_POWER7(env);
-
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_SDOOR]    = 0x00000A00;
-    env->excp_vectors[POWERPC_EXCP_FU]       = 0x00000F60;
-    env->excp_vectors[POWERPC_EXCP_HV_FU]    = 0x00000F80;
-    env->excp_vectors[POWERPC_EXCP_SDOOR_HV] = 0x00000E80;
-#endif
-}
-
-static void init_excp_POWER9(CPUPPCState *env)
-{
-    init_excp_POWER8(env);
-
-#if !defined(CONFIG_USER_ONLY)
-    env->excp_vectors[POWERPC_EXCP_HVIRT]    = 0x00000EA0;
-    env->excp_vectors[POWERPC_EXCP_SYSCALL_VECTORED] = 0x00000000;
-#endif
-}
-
-static void init_excp_POWER10(CPUPPCState *env)
-{
-    init_excp_POWER9(env);
-}
-
-#endif
-
-/*****************************************************************************/
-/* Power management enable checks                                            */
-static int check_pow_none(CPUPPCState *env)
-{
-    return 0;
-}
-
-static int check_pow_nocheck(CPUPPCState *env)
-{
-    return 1;
-}
-
-static int check_pow_hid0(CPUPPCState *env)
-{
-    if (env->spr[SPR_HID0] & 0x00E00000) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static int check_pow_hid0_74xx(CPUPPCState *env)
-{
-    if (env->spr[SPR_HID0] & 0x00600000) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static bool ppc_cpu_interrupts_big_endian_always(PowerPCCPU *cpu)
-{
-    return true;
-}
-
-#ifdef TARGET_PPC64
-static bool ppc_cpu_interrupts_big_endian_lpcr(PowerPCCPU *cpu)
-{
-    return !(cpu->env.spr[SPR_LPCR] & LPCR_ILE);
-}
-#endif
-
-/*****************************************************************************/
-/* PowerPC implementations definitions                                       */
-
-#define POWERPC_FAMILY(_name)                                               \
-    static void                                                             \
-    glue(glue(ppc_, _name), _cpu_family_class_init)(ObjectClass *, void *); \
-                                                                            \
-    static const TypeInfo                                                   \
-    glue(glue(ppc_, _name), _cpu_family_type_info) = {                      \
-        .name = stringify(_name) "-family-" TYPE_POWERPC_CPU,               \
-        .parent = TYPE_POWERPC_CPU,                                         \
-        .abstract = true,                                                   \
-        .class_init = glue(glue(ppc_, _name), _cpu_family_class_init),      \
-    };                                                                      \
-                                                                            \
-    static void glue(glue(ppc_, _name), _cpu_family_register_types)(void)   \
-    {                                                                       \
-        type_register_static(                                               \
-            &glue(glue(ppc_, _name), _cpu_family_type_info));               \
-    }                                                                       \
-                                                                            \
-    type_init(glue(glue(ppc_, _name), _cpu_family_register_types))          \
-                                                                            \
-    static void glue(glue(ppc_, _name), _cpu_family_class_init)
-
-static void init_proc_401(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401(env);
-    init_excp_4xx_real(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(401)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 401";
-    pcc->init_proc = init_proc_401;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_WRTEE | PPC_DCR |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << MSR_KEY) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_REAL;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_401x2(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_4xx_softmmu(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(401x2)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 401x2";
-    pcc->init_proc = init_proc_401x2;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << 20) |
-                    (1ull << MSR_KEY) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_401x3(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
-    init_excp_4xx_softmmu(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(401x3)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 401x3";
-    pcc->init_proc = init_proc_401x3;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << 20) |
-                    (1ull << MSR_KEY) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_IOP480(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_401x2(env);
-    gen_spr_compress(env);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_4xx_softmmu(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(8, 12, 16, 20);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(IOP480)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "IOP480";
-    pcc->init_proc = init_proc_IOP480;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI |  PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << 20) |
-                    (1ull << MSR_KEY) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_403(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_403(env);
-    gen_spr_403_real(env);
-    init_excp_4xx_real(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(8, 12, 16, 20);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(403)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 403";
-    pcc->init_proc = init_proc_403;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_PE) |
-                    (1ull << MSR_PX) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_REAL;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_PX |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_403GCX(CPUPPCState *env)
-{
-    gen_spr_40x(env);
-    gen_spr_401_403(env);
-    gen_spr_403(env);
-    gen_spr_403_real(env);
-    gen_spr_403_mmu(env);
-    /* Bus access control */
-    /* not emulated, as QEMU never does speculative access */
-    spr_register(env, SPR_40x_SGR, "SGR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0xFFFFFFFF);
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_40x_DCWR, "DCWR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_4xx_softmmu(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(8, 12, 16, 20);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(403GCX)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 403 GCX";
-    pcc->init_proc = init_proc_403GCX;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
-                       PPC_4xx_COMMON | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_PE) |
-                    (1ull << MSR_PX) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_4xx_Z;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_401;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_PX |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_405(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_40x(env);
-    gen_spr_405(env);
-    /* Bus access control */
-    /* not emulated, as QEMU never does speculative access */
-    spr_register(env, SPR_40x_SGR, "SGR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0xFFFFFFFF);
-    /* not emulated, as QEMU do not emulate caches */
-    spr_register(env, SPR_40x_DCWR, "DCWR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_4xx_softmmu(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(8, 12, 16, 20);
-    SET_WDT_PERIOD(16, 20, 24, 28);
-}
-
-POWERPC_FAMILY(405)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 405";
-    pcc->init_proc = init_proc_405;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_40x_TLB | PPC_MEM_TLBIA | PPC_MEM_TLBSYNC |
-                       PPC_4xx_COMMON | PPC_405_MAC | PPC_40x_EXCP;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_SOFT_4xx;
-    pcc->excp_model = POWERPC_EXCP_40x;
-    pcc->bus_model = PPC_FLAGS_INPUT_405;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_440EP(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
-    /* Processor identification */
-    spr_register(env, SPR_BOOKE_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_CCR1, "CCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_BookE(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(20, 24, 28, 32);
-}
-
-POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 440 EP";
-    pcc->init_proc = init_proc_440EP;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-POWERPC_FAMILY(460EX)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 460 EX";
-    pcc->init_proc = init_proc_440EP;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_440GP(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
-    /* Processor identification */
-    spr_register(env, SPR_BOOKE_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_BookE(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* XXX: TODO: allocate internal IRQ controller */
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(20, 24, 28, 32);
-}
-
-POWERPC_FAMILY(440GP)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 440 GP";
-    pcc->init_proc = init_proc_440GP;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_MFAPIDI |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVA | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_440x4(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
-    /* Processor identification */
-    spr_register(env, SPR_BOOKE_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_BookE(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* XXX: TODO: allocate internal IRQ controller */
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(20, 24, 28, 32);
-}
-
-POWERPC_FAMILY(440x4)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 440x4";
-    pcc->init_proc = init_proc_440x4;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_WRTEE |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_440x5(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000000000FFFFULL);
-    gen_spr_440(env);
-    gen_spr_usprgh(env);
-    /* Processor identification */
-    spr_register(env, SPR_BOOKE_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC1, "DVC1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_DVC2, "DVC2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_440_CCR1, "CCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_BookE(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    ppc40x_irq_init(env_archcpu(env));
-
-    SET_FIT_PERIOD(12, 16, 20, 24);
-    SET_WDT_PERIOD(20, 24, 28, 32);
-}
-
-POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 440x5";
-    pcc->init_proc = init_proc_440x5;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 440x5 with double precision FPU";
-    pcc->init_proc = init_proc_440x5;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_FLOAT | PPC_FLOAT_FSQRT |
-                       PPC_FLOAT_STFIWX |
-                       PPC_DCR | PPC_WRTEE | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_MFTB |
-                       PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
-                       PPC_440_SPEC;
-    pcc->insns_flags2 = PPC2_FP_CVT_S64;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_403;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
-                 POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_MPC5xx(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_5xx_8xx(env);
-    gen_spr_5xx(env);
-    init_excp_MPC5xx(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* XXX: TODO: allocate internal IRQ controller */
-}
-
-POWERPC_FAMILY(MPC5xx)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "Freescale 5xx cores (aka RCPU)";
-    pcc->init_proc = init_proc_MPC5xx;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING |
-                       PPC_MEM_EIEIO | PPC_MEM_SYNC |
-                       PPC_CACHE_ICBI | PPC_FLOAT | PPC_FLOAT_STFIWX |
-                       PPC_MFTB;
-    pcc->msr_mask = (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_REAL;
-    pcc->excp_model = POWERPC_EXCP_603;
-    pcc->bus_model = PPC_FLAGS_INPUT_RCPU;
-    pcc->bfd_mach = bfd_mach_ppc_505;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_MPC8xx(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_5xx_8xx(env);
-    gen_spr_8xx(env);
-    init_excp_MPC8xx(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* XXX: TODO: allocate internal IRQ controller */
-}
-
-POWERPC_FAMILY(MPC8xx)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "Freescale 8xx cores (aka PowerQUICC)";
-    pcc->init_proc = init_proc_MPC8xx;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING  |
-                       PPC_MEM_EIEIO | PPC_MEM_SYNC |
-                       PPC_CACHE_ICBI | PPC_MFTB;
-    pcc->msr_mask = (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_MPC8xx;
-    pcc->excp_model = POWERPC_EXCP_603;
-    pcc->bus_model = PPC_FLAGS_INPUT_RCPU;
-    pcc->bfd_mach = bfd_mach_ppc_860;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-/* Freescale 82xx cores (aka PowerQUICC-II)                                  */
-
-static void init_proc_G2(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_G2_755(env);
-    gen_spr_G2(env);
-    /* Time base */
-    gen_tbl(env);
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation register */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_G2(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(G2)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC G2";
-    pcc->init_proc = init_proc_G2;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_AL) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_G2;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_ec603e;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_G2LE(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_G2_755(env);
-    gen_spr_G2(env);
-    /* Time base */
-    gen_tbl(env);
-    /* External access control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation register */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_G2(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(G2LE)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC G2LE";
-    pcc->init_proc = init_proc_G2LE;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_AL) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_G2;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_ec603e;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e200(CPUPPCState *env)
-{
-    /* Time base */
-    gen_tbl(env);
-    gen_spr_BookE(env, 0x000000070000FFFFULL);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_SPEFSCR, "SPEFSCR",
-                 &spr_read_spefscr, &spr_write_spefscr,
-                 &spr_read_spefscr, &spr_write_spefscr,
-                 0x00000000);
-    /* Memory management */
-    gen_spr_BookE206(env, 0x0000005D, NULL, 0);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_ALTCTXCR, "ALTCTXCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_BUCSR, "BUCSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_CTXCR, "CTXCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_DBCNT, "DBCNT",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_DBCR3, "DBCR3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_L1CFG0, "L1CFG0",
-                 &spr_read_generic, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_L1CSR0, "L1CSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_L1FINV0, "L1FINV0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_TLB0CFG, "TLB0CFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_TLB1CFG, "TLB1CFG",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC3, "IAC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_IAC4, "IAC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000); /* TOFIX */
-    spr_register(env, SPR_BOOKE_DSRR0, "DSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_DSRR1, "DSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 64;
-    env->nb_ways = 1;
-    env->id_tlbs = 0;
-    env->tlb_type = TLB_EMB;
-#endif
-    init_excp_e200(env, 0xFFFF0000UL);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* XXX: TODO: allocate internal IRQ controller */
-}
-
-POWERPC_FAMILY(e200)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e200 core";
-    pcc->init_proc = init_proc_e200;
-    pcc->check_pow = check_pow_hid0;
-    /*
-     * XXX: unimplemented instructions:
-     * dcblc
-     * dcbtlst
-     * dcbtstls
-     * icblc
-     * icbtls
-     * tlbivax
-     * all SPE multiply-accumulate instructions
-     */
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
-                       PPC_SPE | PPC_SPE_SINGLE |
-                       PPC_WRTEE | PPC_RFDI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX |
-                       PPC_BOOKE;
-    pcc->msr_mask = (1ull << MSR_UCLE) |
-                    (1ull << MSR_SPE) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_860;
-    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
-                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e300(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
-    /* Time base */
-    gen_tbl(env);
-    /* hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Breakpoints */
-    /* XXX : not implemented */
-    spr_register(env, SPR_DABR, "DABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_DABR2, "DABR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IABR2, "IABR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_IBCR, "IBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_DBCR, "DBCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_603(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(e300)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e300 core";
-    pcc->init_proc = init_proc_e300;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_AL) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_603;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_603;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_mas73(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv val = tcg_temp_new();
-    tcg_gen_ext32u_tl(val, cpu_gpr[gprn]);
-    gen_store_spr(SPR_BOOKE_MAS3, val);
-    tcg_gen_shri_tl(val, cpu_gpr[gprn], 32);
-    gen_store_spr(SPR_BOOKE_MAS7, val);
-    tcg_temp_free(val);
-}
-
-static void spr_read_mas73(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv mas7 = tcg_temp_new();
-    TCGv mas3 = tcg_temp_new();
-    gen_load_spr(mas7, SPR_BOOKE_MAS7);
-    tcg_gen_shli_tl(mas7, mas7, 32);
-    gen_load_spr(mas3, SPR_BOOKE_MAS3);
-    tcg_gen_or_tl(cpu_gpr[gprn], mas3, mas7);
-    tcg_temp_free(mas3);
-    tcg_temp_free(mas7);
-}
-
-#endif
-
-enum fsl_e500_version {
-    fsl_e500v1,
-    fsl_e500v2,
-    fsl_e500mc,
-    fsl_e5500,
-    fsl_e6500,
-};
-
-static void init_proc_e500(CPUPPCState *env, int version)
-{
-    uint32_t tlbncfg[2];
-    uint64_t ivor_mask;
-    uint64_t ivpr_mask = 0xFFFF0000ULL;
-    uint32_t l1cfg0 = 0x3800  /* 8 ways */
-                    | 0x0020; /* 32 kb */
-    uint32_t l1cfg1 = 0x3800  /* 8 ways */
-                    | 0x0020; /* 32 kb */
-    uint32_t mmucfg = 0;
-#if !defined(CONFIG_USER_ONLY)
-    int i;
-#endif
-
-    /* Time base */
-    gen_tbl(env);
-    /*
-     * XXX The e500 doesn't implement IVOR7 and IVOR9, but doesn't
-     *     complain when accessing them.
-     * gen_spr_BookE(env, 0x0000000F0000FD7FULL);
-     */
-    switch (version) {
-    case fsl_e500v1:
-    case fsl_e500v2:
-    default:
-        ivor_mask = 0x0000000F0000FFFFULL;
-        break;
-    case fsl_e500mc:
-    case fsl_e5500:
-        ivor_mask = 0x000003FE0000FFFFULL;
-        break;
-    case fsl_e6500:
-        ivor_mask = 0x000003FF0000FFFFULL;
-        break;
-    }
-    gen_spr_BookE(env, ivor_mask);
-    gen_spr_usprg3(env);
-    /* Processor identification */
-    spr_register(env, SPR_BOOKE_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pir,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_SPEFSCR, "SPEFSCR",
-                 &spr_read_spefscr, &spr_write_spefscr,
-                 &spr_read_spefscr, &spr_write_spefscr,
-                 0x00000000);
-#if !defined(CONFIG_USER_ONLY)
-    /* Memory management */
-    env->nb_pids = 3;
-    env->nb_ways = 2;
-    env->id_tlbs = 0;
-    switch (version) {
-    case fsl_e500v1:
-        tlbncfg[0] = gen_tlbncfg(2, 1, 1, 0, 256);
-        tlbncfg[1] = gen_tlbncfg(16, 1, 9, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
-        break;
-    case fsl_e500v2:
-        tlbncfg[0] = gen_tlbncfg(4, 1, 1, 0, 512);
-        tlbncfg[1] = gen_tlbncfg(16, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 16);
-        break;
-    case fsl_e500mc:
-    case fsl_e5500:
-        tlbncfg[0] = gen_tlbncfg(4, 1, 1, 0, 512);
-        tlbncfg[1] = gen_tlbncfg(64, 1, 12, TLBnCFG_AVAIL | TLBnCFG_IPROT, 64);
-        break;
-    case fsl_e6500:
-        mmucfg = 0x6510B45;
-        env->nb_pids = 1;
-        tlbncfg[0] = 0x08052400;
-        tlbncfg[1] = 0x40028040;
-        break;
-    default:
-        cpu_abort(env_cpu(env), "Unknown CPU: " TARGET_FMT_lx "\n",
-                  env->spr[SPR_PVR]);
-    }
-#endif
-    /* Cache sizes */
-    switch (version) {
-    case fsl_e500v1:
-    case fsl_e500v2:
-        env->dcache_line_size = 32;
-        env->icache_line_size = 32;
-        break;
-    case fsl_e500mc:
-    case fsl_e5500:
-        env->dcache_line_size = 64;
-        env->icache_line_size = 64;
-        l1cfg0 |= 0x1000000; /* 64 byte cache block size */
-        l1cfg1 |= 0x1000000; /* 64 byte cache block size */
-        break;
-    case fsl_e6500:
-        env->dcache_line_size = 32;
-        env->icache_line_size = 32;
-        l1cfg0 |= 0x0F83820;
-        l1cfg1 |= 0x0B83820;
-        break;
-    default:
-        cpu_abort(env_cpu(env), "Unknown CPU: " TARGET_FMT_lx "\n",
-                  env->spr[SPR_PVR]);
-    }
-    gen_spr_BookE206(env, 0x000000DF, tlbncfg, mmucfg);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_BBEAR, "BBEAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_BBTAR, "BBTAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_MCAR, "MCAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_BOOKE_MCSR, "MCSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_NPIDR, "NPIDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_BUCSR, "BUCSR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_Exxx_L1CFG0, "L1CFG0",
-                 &spr_read_generic, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 l1cfg0);
-    spr_register(env, SPR_Exxx_L1CFG1, "L1CFG1",
-                 &spr_read_generic, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 l1cfg1);
-    spr_register(env, SPR_Exxx_L1CSR0, "L1CSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_e500_l1csr0,
-                 0x00000000);
-    spr_register(env, SPR_Exxx_L1CSR1, "L1CSR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_e500_l1csr1,
-                 0x00000000);
-    if (version != fsl_e500v1 && version != fsl_e500v2) {
-        spr_register(env, SPR_Exxx_L2CSR0, "L2CSR0",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_e500_l2csr0,
-                     0x00000000);
-    }
-    spr_register(env, SPR_BOOKE_MCSRR0, "MCSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_MCSRR1, "MCSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_MMUCSR0, "MMUCSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_booke206_mmucsr0,
-                 0x00000000);
-    spr_register(env, SPR_BOOKE_EPR, "EPR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX better abstract into Emb.xxx features */
-    if ((version == fsl_e5500) || (version == fsl_e6500)) {
-        spr_register(env, SPR_BOOKE_EPCR, "EPCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     0x00000000);
-        spr_register(env, SPR_BOOKE_MAS7_MAS3, "MAS7_MAS3",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_mas73, &spr_write_mas73,
-                     0x00000000);
-        ivpr_mask = (target_ulong)~0xFFFFULL;
-    }
-
-    if (version == fsl_e6500) {
-        /* Thread identification */
-        spr_register(env, SPR_TIR, "TIR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     0x00000000);
-        spr_register(env, SPR_BOOKE_TLB0PS, "TLB0PS",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     0x00000004);
-        spr_register(env, SPR_BOOKE_TLB1PS, "TLB1PS",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, SPR_NOACCESS,
-                     0x7FFFFFFC);
-    }
-
-#if !defined(CONFIG_USER_ONLY)
-    env->nb_tlb = 0;
-    env->tlb_type = TLB_MAS;
-    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
-        env->nb_tlb += booke206_tlb_size(env, i);
-    }
-#endif
-
-    init_excp_e200(env, ivpr_mask);
-    /* Allocate hardware IRQ controller */
-    ppce500_irq_init(env_archcpu(env));
-}
-
-static void init_proc_e500v1(CPUPPCState *env)
-{
-    init_proc_e500(env, fsl_e500v1);
-}
-
-POWERPC_FAMILY(e500v1)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e500v1 core";
-    pcc->init_proc = init_proc_e500v1;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
-                       PPC_SPE | PPC_SPE_SINGLE |
-                       PPC_WRTEE | PPC_RFDI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
-    pcc->insns_flags2 = PPC2_BOOKE206;
-    pcc->msr_mask = (1ull << MSR_UCLE) |
-                    (1ull << MSR_SPE) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_860;
-    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
-                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e500v2(CPUPPCState *env)
-{
-    init_proc_e500(env, fsl_e500v2);
-}
-
-POWERPC_FAMILY(e500v2)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e500v2 core";
-    pcc->init_proc = init_proc_e500v2;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL |
-                       PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE |
-                       PPC_WRTEE | PPC_RFDI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
-    pcc->insns_flags2 = PPC2_BOOKE206;
-    pcc->msr_mask = (1ull << MSR_UCLE) |
-                    (1ull << MSR_SPE) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DWE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_860;
-    pcc->flags = POWERPC_FLAG_SPE | POWERPC_FLAG_CE |
-                 POWERPC_FLAG_UBLE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e500mc(CPUPPCState *env)
-{
-    init_proc_e500(env, fsl_e500mc);
-}
-
-POWERPC_FAMILY(e500mc)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e500mc core";
-    pcc->init_proc = init_proc_e500mc;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
-                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_FLOAT | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
-                       PPC_FLOAT_STFIWX | PPC_WAIT |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL;
-    pcc->msr_mask = (1ull << MSR_GS) |
-                    (1ull << MSR_UCLE) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PX) |
-                    (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    /* FIXME: figure out the correct flag for e500mc */
-    pcc->bfd_mach = bfd_mach_ppc_e500;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-#ifdef TARGET_PPC64
-static void init_proc_e5500(CPUPPCState *env)
-{
-    init_proc_e500(env, fsl_e5500);
-}
-
-POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e5500 core";
-    pcc->init_proc = init_proc_e5500;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
-                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_FLOAT | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
-                       PPC_FLOAT_STFIWX | PPC_WAIT |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
-                       PPC_64B | PPC_POPCNTB | PPC_POPCNTWD;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
-                        PPC2_FP_CVT_S64;
-    pcc->msr_mask = (1ull << MSR_CM) |
-                    (1ull << MSR_GS) |
-                    (1ull << MSR_UCLE) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PX) |
-                    (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    /* FIXME: figure out the correct flag for e5500 */
-    pcc->bfd_mach = bfd_mach_ppc_e500;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e6500(CPUPPCState *env)
-{
-    init_proc_e500(env, fsl_e6500);
-}
-
-POWERPC_FAMILY(e6500)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "e6500 core";
-    pcc->init_proc = init_proc_e6500;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB |
-                       PPC_WRTEE | PPC_RFDI | PPC_RFMCI |
-                       PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
-                       PPC_FLOAT | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_FSEL |
-                       PPC_FLOAT_STFIWX | PPC_WAIT |
-                       PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
-                       PPC_64B | PPC_POPCNTB | PPC_POPCNTWD | PPC_ALTIVEC;
-    pcc->insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 |
-                        PPC2_FP_CVT_S64 | PPC2_ATOMIC_ISA206;
-    pcc->msr_mask = (1ull << MSR_CM) |
-                    (1ull << MSR_GS) |
-                    (1ull << MSR_UCLE) |
-                    (1ull << MSR_CE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IS) |
-                    (1ull << MSR_DS) |
-                    (1ull << MSR_PX) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_VR);
-    pcc->mmu_model = POWERPC_MMU_BOOKE206;
-    pcc->excp_model = POWERPC_EXCP_BOOKE;
-    pcc->bus_model = PPC_FLAGS_INPUT_BookE;
-    pcc->bfd_mach = bfd_mach_ppc_e500;
-    pcc->flags = POWERPC_FLAG_CE | POWERPC_FLAG_DE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_VRE;
-}
-
-#endif
-
-/* Non-embedded PowerPC                                                      */
-
-#define POWERPC_MSRR_601     (0x0000000000001040ULL)
-
-static void init_proc_601(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_601(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_hid0_601,
-                 0x80010080);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_601_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_601_HID5, "HID5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    init_excp_601(env);
-    /*
-     * XXX: beware that dcache line size is 64
-     *      but dcbz uses 32 bytes "sectors"
-     * XXX: this breaks clcs instruction !
-     */
-    env->dcache_line_size = 32;
-    env->icache_line_size = 64;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(601)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 601";
-    pcc->init_proc = init_proc_601;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_POWER_BR |
-                       PPC_FLOAT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_601;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_601;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_601;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_RTC_CLK;
-}
-
-#define POWERPC_MSRR_601v    (0x0000000000001040ULL)
-
-static void init_proc_601v(CPUPPCState *env)
-{
-    init_proc_601(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_601_HID15, "HID15",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-POWERPC_FAMILY(601v)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 601v";
-    pcc->init_proc = init_proc_601v;
-    pcc->check_pow = check_pow_none;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_POWER_BR |
-                       PPC_FLOAT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR);
-    pcc->mmu_model = POWERPC_MMU_601;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_601;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_RTC_CLK;
-}
-
-static void init_proc_602(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_602(env);
-    /* Time base */
-    gen_tbl(env);
-    /* hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_602(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(602)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 602";
-    pcc->init_proc = init_proc_602;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_6xx_TLB | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_602_SPEC;
-    pcc->msr_mask = (1ull << MSR_VSX) |
-                    (1ull << MSR_SA) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    /* XXX: 602 MMU is quite specific. Should add a special case */
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_602;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_602;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_603(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
-    /* Time base */
-    gen_tbl(env);
-    /* hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_603(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(603)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 603";
-    pcc->init_proc = init_proc_603;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_603;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_603;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_603E(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_603(env);
-    /* Time base */
-    gen_tbl(env);
-    /* hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_603(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(603E)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 603e";
-    pcc->init_proc = init_proc_603E;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_TGPR) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_603E;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_ec603e;
-    pcc->flags = POWERPC_FLAG_TGPR | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_604(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_604(env);
-    /* Time base */
-    gen_tbl(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    init_excp_604(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(604)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 604";
-    pcc->init_proc = init_proc_604;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_604;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_604;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_604E(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_604(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_MMCR1, "MMCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC3, "PMC3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC4, "PMC4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    init_excp_604(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(604E)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 604E";
-    pcc->init_proc = init_proc_604E;
-    pcc->check_pow = check_pow_nocheck;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_604;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_604;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_740(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    init_excp_7x0(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(740)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 740";
-    pcc->init_proc = init_proc_740;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_750(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    /*
-     * XXX: high BATs are also present but are known to be bugged on
-     *      die version 1.x
-     */
-    init_excp_7x0(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(750)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 750";
-    pcc->init_proc = init_proc_750;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_750cl(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    /* Those registers are fake on 750CL */
-    spr_register(env, SPR_THRM1, "THRM1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_THRM2, "THRM2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_THRM3, "THRM3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX: not implemented */
-    spr_register(env, SPR_750_TDCL, "TDCL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_750_TDCH, "TDCH",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* DMA */
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_WPAR, "WPAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_750_DMAL, "DMAL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_750_DMAU, "DMAU",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750CL_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750CL_HID4, "HID4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Quantization registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR0, "GQR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR1, "GQR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR2, "GQR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR3, "GQR3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR4, "GQR4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR5, "GQR5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR6, "GQR6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_GQR7, "GQR7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    /* PowerPC 750cl has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
-    init_excp_750cl(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(750cl)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 750 CL";
-    pcc->init_proc = init_proc_750cl;
-    pcc->check_pow = check_pow_hid0;
-    /*
-     * XXX: not implemented:
-     * cache lock instructions:
-     * dcbz_l
-     * floating point paired instructions
-     * psq_lux
-     * psq_lx
-     * psq_stux
-     * psq_stx
-     * ps_abs
-     * ps_add
-     * ps_cmpo0
-     * ps_cmpo1
-     * ps_cmpu0
-     * ps_cmpu1
-     * ps_div
-     * ps_madd
-     * ps_madds0
-     * ps_madds1
-     * ps_merge00
-     * ps_merge01
-     * ps_merge10
-     * ps_merge11
-     * ps_mr
-     * ps_msub
-     * ps_mul
-     * ps_muls0
-     * ps_muls1
-     * ps_nabs
-     * ps_neg
-     * ps_nmadd
-     * ps_nmsub
-     * ps_res
-     * ps_rsqrte
-     * ps_sel
-     * ps_sub
-     * ps_sum0
-     * ps_sum1
-     */
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_750cx(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* This register is not implemented but is present for compatibility */
-    spr_register(env, SPR_SDA, "SDA",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    /* PowerPC 750cx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
-    init_excp_750cx(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(750cx)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 750CX";
-    pcc->init_proc = init_proc_750cx;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_750fx(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_THRM4, "THRM4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
-    init_excp_7x0(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(750fx)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 750FX";
-    pcc->init_proc = init_proc_750fx;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_750gx(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* XXX : not implemented (XXX: different from 750fx) */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_750_THRM4, "THRM4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Hardware implementation registers */
-    /* XXX : not implemented (XXX: different from 750fx) */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented (XXX: different from 750fx) */
-    spr_register(env, SPR_750FX_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    /* PowerPC 750fx & 750gx has 8 DBATs and 8 IBATs */
-    gen_high_BATs(env);
-    init_excp_7x0(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(750gx)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 750GX";
-    pcc->init_proc = init_proc_750gx;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_7x0;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_745(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    gen_spr_G2_755(env);
-    /* Time base */
-    gen_tbl(env);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_7x5(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(745)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 745";
-    pcc->init_proc = init_proc_745;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_7x5;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_755(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    gen_spr_G2_755(env);
-    /* Time base */
-    gen_tbl(env);
-    /* L2 cache control */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2CR, "L2CR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, spr_access_nop,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2PMCR, "L2PMCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID2, "HID2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_6xx_7xx_soft_tlb(env, 64, 2);
-    init_excp_7x5(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(755)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 755";
-    pcc->init_proc = init_proc_755;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC | PPC_6xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN;
-    pcc->msr_mask = (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_6xx;
-    pcc->excp_model = POWERPC_EXCP_7x5;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_750;
-    pcc->flags = POWERPC_FLAG_SE | POWERPC_FLAG_BE |
-                 POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7400(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_UBAMR, "UBAMR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX: this seems not implemented on all revisions. */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSCR1, "MSSCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* Memory management */
-    gen_low_BATs(env);
-    init_excp_7400(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7400)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7400 (aka G4)";
-    pcc->init_proc = init_proc_7400;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7410(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_UBAMR, "UBAMR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Thermal management */
-    gen_spr_thrm(env);
-    /* L2PMCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L2PMCR, "L2PMCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* LDSTDB */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTDB, "LDSTDB",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    init_excp_7400(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7410)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7410 (aka G4)";
-    pcc->init_proc = init_proc_7410;
-    pcc->check_pow = check_pow_hid0;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7440(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_UBAMR, "UBAMR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* LDSTCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* ICTRL */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* MSSSR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* PMC */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7440)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7440 (aka G4)";
-    pcc->init_proc = init_proc_7440;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7450(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* Level 3 cache control */
-    gen_l3_ctrl(env);
-    /* L3ITCR1 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR1, "L3ITCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3ITCR2 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR2, "L3ITCR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3ITCR3 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR3, "L3ITCR3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3OHCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3OHCR, "L3OHCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_UBAMR, "UBAMR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* LDSTCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* ICTRL */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* MSSSR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* PMC */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7450)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7450 (aka G4)";
-    pcc->init_proc = init_proc_7450;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7445(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* LDSTCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* ICTRL */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* MSSSR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* PMC */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* SPRGs */
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG4, "USPRG4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG5, "USPRG5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG6, "USPRG6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG7, "USPRG7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7445)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7445 (aka G4)";
-    pcc->init_proc = init_proc_7445;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7455(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* Level 3 cache control */
-    gen_l3_ctrl(env);
-    /* LDSTCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* ICTRL */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* MSSSR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* PMC */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* SPRGs */
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG4, "USPRG4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG5, "USPRG5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG6, "USPRG6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG7, "USPRG7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7455)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7455 (aka G4)";
-    pcc->init_proc = init_proc_7455;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_7457(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* Level 3 cache control */
-    gen_l3_ctrl(env);
-    /* L3ITCR1 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR1, "L3ITCR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3ITCR2 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR2, "L3ITCR2",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3ITCR3 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3ITCR3, "L3ITCR3",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* L3OHCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_L3OHCR, "L3OHCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* LDSTCR */
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* ICTRL */
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* MSSSR0 */
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* PMC */
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* SPRGs */
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG4, "USPRG4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG5, "USPRG5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG6, "USPRG6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG7, "USPRG7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(7457)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 7457 (aka G4)";
-    pcc->init_proc = init_proc_7457;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_SOFT_74xx;
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-static void init_proc_e600(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_spr_sdr1(env);
-    gen_spr_7xx(env);
-    /* Time base */
-    gen_tbl(env);
-    /* 74xx specific SPR */
-    gen_spr_74xx(env);
-    /* XXX : not implemented */
-    spr_register(env, SPR_UBAMR, "UBAMR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_LDSTCR, "LDSTCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_ICTRL, "ICTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_MSSSR0, "MSSSR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC5, "PMC5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_PMC6, "PMC6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    /* XXX : not implemented */
-    spr_register(env, SPR_7XX_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* SPRGs */
-    spr_register(env, SPR_SPRG4, "SPRG4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG4, "USPRG4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG5, "SPRG5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG5, "USPRG5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG6, "SPRG6",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG6, "USPRG6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    spr_register(env, SPR_SPRG7, "SPRG7",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_USPRG7, "USPRG7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-    /* Memory management */
-    gen_low_BATs(env);
-    gen_high_BATs(env);
-    gen_74xx_soft_tlb(env, 128, 2);
-    init_excp_7450(env);
-    env->dcache_line_size = 32;
-    env->icache_line_size = 32;
-    /* Allocate hardware IRQ controller */
-    ppc6xx_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(e600)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC e600";
-    pcc->init_proc = init_proc_e600;
-    pcc->check_pow = check_pow_hid0_74xx;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI |
-                       PPC_CACHE_DCBA | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_MEM_TLBIA | PPC_74xx_TLB |
-                       PPC_SEGMENT | PPC_EXTERN |
-                       PPC_ALTIVEC;
-    pcc->insns_flags2 = PPC_NONE;
-    pcc->msr_mask = (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_ILE) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_EP) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->mmu_model = POWERPC_MMU_32B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash32_handle_mmu_fault;
-#endif
-    pcc->excp_model = POWERPC_EXCP_74xx;
-    pcc->bus_model = PPC_FLAGS_INPUT_6xx;
-    pcc->bfd_mach = bfd_mach_ppc_7400;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-}
-
-#if defined(TARGET_PPC64)
-#if defined(CONFIG_USER_ONLY)
-#define POWERPC970_HID5_INIT 0x00000080
-#else
-#define POWERPC970_HID5_INIT 0x00000000
-#endif
-
-static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
-                                    int bit, int sprn, int cause)
-{
-    TCGv_i32 t1 = tcg_const_i32(bit);
-    TCGv_i32 t2 = tcg_const_i32(sprn);
-    TCGv_i32 t3 = tcg_const_i32(cause);
-
-    gen_helper_fscr_facility_check(cpu_env, t1, t2, t3);
-
-    tcg_temp_free_i32(t3);
-    tcg_temp_free_i32(t2);
-    tcg_temp_free_i32(t1);
-}
-
-static void gen_msr_facility_check(DisasContext *ctx, int facility_sprn,
-                                   int bit, int sprn, int cause)
-{
-    TCGv_i32 t1 = tcg_const_i32(bit);
-    TCGv_i32 t2 = tcg_const_i32(sprn);
-    TCGv_i32 t3 = tcg_const_i32(cause);
-
-    gen_helper_msr_facility_check(cpu_env, t1, t2, t3);
-
-    tcg_temp_free_i32(t3);
-    tcg_temp_free_i32(t2);
-    tcg_temp_free_i32(t1);
-}
-
-static void spr_read_prev_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    TCGv spr_up = tcg_temp_new();
-    TCGv spr = tcg_temp_new();
-
-    gen_load_spr(spr, sprn - 1);
-    tcg_gen_shri_tl(spr_up, spr, 32);
-    tcg_gen_ext32u_tl(cpu_gpr[gprn], spr_up);
-
-    tcg_temp_free(spr);
-    tcg_temp_free(spr_up);
-}
-
-static void spr_write_prev_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv spr = tcg_temp_new();
-
-    gen_load_spr(spr, sprn - 1);
-    tcg_gen_deposit_tl(spr, spr, cpu_gpr[gprn], 32, 32);
-    gen_store_spr(sprn - 1, spr);
-
-    tcg_temp_free(spr);
-}
-
-static int check_pow_970(CPUPPCState *env)
-{
-    if (env->spr[SPR_HID0] & (HID0_DEEPNAP | HID0_DOZE | HID0_NAP)) {
-        return 1;
-    }
-
-    return 0;
-}
-
-static void gen_spr_970_hid(CPUPPCState *env)
-{
-    /* Hardware implementation registers */
-    /* XXX : not implemented */
-    spr_register(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_clear,
-                 0x60000000);
-    spr_register(env, SPR_HID1, "HID1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_970_HID5, "HID5",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 POWERPC970_HID5_INIT);
-}
-
-static void gen_spr_970_hior(CPUPPCState *env)
-{
-    spr_register(env, SPR_HIOR, "SPR_HIOR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_hior, &spr_write_hior,
-                 0x00000000);
-}
-
-static void gen_spr_book3s_ctrl(CPUPPCState *env)
-{
-    spr_register(env, SPR_CTRL, "SPR_CTRL",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_UCTRL, "SPR_UCTRL",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, SPR_NOACCESS,
-                 0x00000000);
-}
-
-static void gen_spr_book3s_altivec(CPUPPCState *env)
-{
-    if (!(env->insns_flags & PPC_ALTIVEC)) {
-        return;
-    }
-
-    spr_register_kvm(env, SPR_VRSAVE, "VRSAVE",
-                     &spr_read_generic, &spr_write_generic,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_VRSAVE, 0x00000000);
-
-    /*
-     * Can't find information on what this should be on reset.  This
-     * value is the one used by 74xx processors.
-     */
-    vscr_init(env, 0x00010000);
-}
-
-static void gen_spr_book3s_dbg(CPUPPCState *env)
-{
-    /*
-     * TODO: different specs define different scopes for these,
-     * will have to address this:
-     * 970: super/write and super/read
-     * powerisa 2.03..2.04: hypv/write and super/read.
-     * powerisa 2.05 and newer: hypv/write and hypv/read.
-     */
-    spr_register_kvm(env, SPR_DABR, "DABR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DABR, 0x00000000);
-    spr_register_kvm(env, SPR_DABRX, "DABRX",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DABRX, 0x00000000);
-}
-
-static void gen_spr_book3s_207_dbg(CPUPPCState *env)
-{
-    spr_register_kvm_hv(env, SPR_DAWR, "DAWR",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic,
-                        KVM_REG_PPC_DAWR, 0x00000000);
-    spr_register_kvm_hv(env, SPR_DAWRX, "DAWRX",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic,
-                        KVM_REG_PPC_DAWRX, 0x00000000);
-    spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_generic,
-                        KVM_REG_PPC_CIABR, 0x00000000);
-}
-
-static void gen_spr_970_dbg(CPUPPCState *env)
-{
-    /* Breakpoints */
-    spr_register(env, SPR_IABR, "IABR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_book3s_pmu_sup(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_POWER_MMCR0, "MMCR0",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_MMCR0, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_MMCR1, "MMCR1",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_MMCR1, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_MMCRA, "MMCRA",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_MMCRA, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC1, "PMC1",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC1, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC2, "PMC2",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC2, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC3, "PMC3",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC3, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC4, "PMC4",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC4, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC5, "PMC5",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC5, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_PMC6, "PMC6",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC6, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_SIAR, "SIAR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_SIAR, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_SDAR, "SDAR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_SDAR, 0x00000000);
-}
-
-static void gen_spr_book3s_pmu_user(CPUPPCState *env)
-{
-    spr_register(env, SPR_POWER_UMMCR0, "UMMCR0",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UMMCR1, "UMMCR1",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UMMCRA, "UMMCRA",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC1, "UPMC1",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC2, "UPMC2",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC3, "UPMC3",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC4, "UPMC4",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC5, "UPMC5",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_UPMC6, "UPMC6",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_USIAR, "USIAR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_USDAR, "USDAR",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-}
-
-static void gen_spr_970_pmu_sup(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_970_PMC7, "PMC7",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC7, 0x00000000);
-    spr_register_kvm(env, SPR_970_PMC8, "PMC8",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PMC8, 0x00000000);
-}
-
-static void gen_spr_970_pmu_user(CPUPPCState *env)
-{
-    spr_register(env, SPR_970_UPMC7, "UPMC7",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_970_UPMC8, "UPMC8",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-}
-
-static void gen_spr_power8_pmu_sup(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_POWER_MMCR2, "MMCR2",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_MMCR2, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_MMCRS, "MMCRS",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_MMCRS, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_SIER, "SIER",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_SIER, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_SPMC1, "SPMC1",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_SPMC1, 0x00000000);
-    spr_register_kvm(env, SPR_POWER_SPMC2, "SPMC2",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_SPMC2, 0x00000000);
-    spr_register_kvm(env, SPR_TACR, "TACR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_TACR, 0x00000000);
-    spr_register_kvm(env, SPR_TCSCR, "TCSCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_TCSCR, 0x00000000);
-    spr_register_kvm(env, SPR_CSIGR, "CSIGR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_CSIGR, 0x00000000);
-}
-
-static void gen_spr_power8_pmu_user(CPUPPCState *env)
-{
-    spr_register(env, SPR_POWER_UMMCR2, "UMMCR2",
-                 &spr_read_ureg, SPR_NOACCESS,
-                 &spr_read_ureg, &spr_write_ureg,
-                 0x00000000);
-    spr_register(env, SPR_POWER_USIER, "USIER",
-                 &spr_read_generic, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_power5p_ear(CPUPPCState *env)
-{
-    /* External access control */
-    spr_register(env, SPR_EAR, "EAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_power5p_tb(CPUPPCState *env)
-{
-    /* TBU40 (High 40 bits of the Timebase register */
-    spr_register_hv(env, SPR_TBU40, "TBU40",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, &spr_write_tbu40,
-                    0x00000000);
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static void spr_write_hmer(DisasContext *ctx, int sprn, int gprn)
-{
-    TCGv hmer = tcg_temp_new();
-
-    gen_load_spr(hmer, sprn);
-    tcg_gen_and_tl(hmer, cpu_gpr[gprn], hmer);
-    gen_store_spr(sprn, hmer);
-    spr_store_dump_spr(sprn);
-    tcg_temp_free(hmer);
-}
-
-static void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_helper_store_lpcr(cpu_env, cpu_gpr[gprn]);
-}
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-static void gen_spr_970_lpar(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /*
-     * PPC970: HID4 covers things later controlled by the LPCR and
-     * RMOR in later CPUs, but with a different encoding.  We only
-     * support the 970 in "Apple mode" which has all hypervisor
-     * facilities disabled by strapping, so we can basically just
-     * ignore it
-     */
-    spr_register(env, SPR_970_HID4, "HID4",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-#endif
-}
-
-static void gen_spr_power5p_lpar(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /* Logical partitionning */
-    spr_register_kvm_hv(env, SPR_LPCR, "LPCR",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_lpcr,
-                        KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
-    spr_register_hv(env, SPR_HDEC, "HDEC",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_hdecr, &spr_write_hdecr, 0);
-#endif
-}
-
-static void gen_spr_book3s_ids(CPUPPCState *env)
-{
-    /* FIXME: Will need to deal with thread vs core only SPRs */
-
-    /* Processor identification */
-    spr_register_hv(env, SPR_PIR, "PIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 &spr_read_generic, NULL,
-                 0x00000000);
-    spr_register_hv(env, SPR_HID0, "HID0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_TSCR, "TSCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HMER, "HMER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_hmer,
-                 0x00000000);
-    spr_register_hv(env, SPR_HMEER, "HMEER",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_TFMR, "TFMR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_LPIDR, "LPIDR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_lpidr,
-                 0x00000000);
-    spr_register_hv(env, SPR_HFSCR, "HFSCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_MMCRC, "MMCRC",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_MMCRH, "MMCRH",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HSPRG0, "HSPRG0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HSPRG1, "HSPRG1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HSRR0, "HSRR0",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HSRR1, "HSRR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HDAR, "HDAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HDSISR, "HDSISR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register_hv(env, SPR_HRMOR, "HRMOR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_rmor(CPUPPCState *env)
-{
-    spr_register_hv(env, SPR_RMOR, "RMOR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-}
-
-static void gen_spr_power8_ids(CPUPPCState *env)
-{
-    /* Thread identification */
-    spr_register(env, SPR_TIR, "TIR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 0x00000000);
-}
-
-static void gen_spr_book3s_purr(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
-    spr_register_kvm_hv(env, SPR_PURR,   "PURR",
-                        &spr_read_purr, SPR_NOACCESS,
-                        &spr_read_purr, SPR_NOACCESS,
-                        &spr_read_purr, &spr_write_purr,
-                        KVM_REG_PPC_PURR, 0x00000000);
-    spr_register_kvm_hv(env, SPR_SPURR,   "SPURR",
-                        &spr_read_purr, SPR_NOACCESS,
-                        &spr_read_purr, SPR_NOACCESS,
-                        &spr_read_purr, &spr_write_purr,
-                        KVM_REG_PPC_SPURR, 0x00000000);
-#endif
-}
-
-static void gen_spr_power6_dbg(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register(env, SPR_CFAR, "SPR_CFAR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_cfar, &spr_write_cfar,
-                 0x00000000);
-#endif
-}
-
-static void gen_spr_power5p_common(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_PPR, "PPR",
-                     &spr_read_generic, &spr_write_generic,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PPR, 0x00000000);
-}
-
-static void gen_spr_power6_common(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register_kvm(env, SPR_DSCR, "SPR_DSCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_DSCR, 0x00000000);
-#endif
-    /*
-     * Register PCR to report POWERPC_EXCP_PRIV_REG instead of
-     * POWERPC_EXCP_INVAL_SPR in userspace. Permit hypervisor access.
-     */
-    spr_register_hv(env, SPR_PCR, "PCR",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_pcr,
-                 0x00000000);
-}
-
-static void spr_read_tar(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_tar(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_TAR, sprn, FSCR_IC_TAR);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_tce_address_control(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_TAR, "TAR",
-                     &spr_read_tar, &spr_write_tar,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_TAR, 0x00000000);
-}
-
-static void spr_read_tm(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_tm(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void spr_read_tm_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_read_prev_upper32(ctx, gprn, sprn);
-}
-
-static void spr_write_tm_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_msr_facility_check(ctx, SPR_FSCR, MSR_TM, sprn, FSCR_IC_TM);
-    spr_write_prev_upper32(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_tm(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_TFHAR, "TFHAR",
-                     &spr_read_tm, &spr_write_tm,
-                     &spr_read_tm, &spr_write_tm,
-                     KVM_REG_PPC_TFHAR, 0x00000000);
-    spr_register_kvm(env, SPR_TFIAR, "TFIAR",
-                     &spr_read_tm, &spr_write_tm,
-                     &spr_read_tm, &spr_write_tm,
-                     KVM_REG_PPC_TFIAR, 0x00000000);
-    spr_register_kvm(env, SPR_TEXASR, "TEXASR",
-                     &spr_read_tm, &spr_write_tm,
-                     &spr_read_tm, &spr_write_tm,
-                     KVM_REG_PPC_TEXASR, 0x00000000);
-    spr_register(env, SPR_TEXASRU, "TEXASRU",
-                 &spr_read_tm_upper32, &spr_write_tm_upper32,
-                 &spr_read_tm_upper32, &spr_write_tm_upper32,
-                 0x00000000);
-}
-
-static void spr_read_ebb(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_read_generic(ctx, gprn, sprn);
-}
-
-static void spr_write_ebb(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_write_generic(ctx, sprn, gprn);
-}
-
-static void spr_read_ebb_upper32(DisasContext *ctx, int gprn, int sprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_read_prev_upper32(ctx, gprn, sprn);
-}
-
-static void spr_write_ebb_upper32(DisasContext *ctx, int sprn, int gprn)
-{
-    gen_fscr_facility_check(ctx, SPR_FSCR, FSCR_EBB, sprn, FSCR_IC_EBB);
-    spr_write_prev_upper32(ctx, sprn, gprn);
-}
-
-static void gen_spr_power8_ebb(CPUPPCState *env)
-{
-    spr_register(env, SPR_BESCRS, "BESCRS",
-                 &spr_read_ebb, &spr_write_ebb,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BESCRSU, "BESCRSU",
-                 &spr_read_ebb_upper32, &spr_write_ebb_upper32,
-                 &spr_read_prev_upper32, &spr_write_prev_upper32,
-                 0x00000000);
-    spr_register(env, SPR_BESCRR, "BESCRR",
-                 &spr_read_ebb, &spr_write_ebb,
-                 &spr_read_generic, &spr_write_generic,
-                 0x00000000);
-    spr_register(env, SPR_BESCRRU, "BESCRRU",
-                 &spr_read_ebb_upper32, &spr_write_ebb_upper32,
-                 &spr_read_prev_upper32, &spr_write_prev_upper32,
-                 0x00000000);
-    spr_register_kvm(env, SPR_EBBHR, "EBBHR",
-                     &spr_read_ebb, &spr_write_ebb,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_EBBHR, 0x00000000);
-    spr_register_kvm(env, SPR_EBBRR, "EBBRR",
-                     &spr_read_ebb, &spr_write_ebb,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_EBBRR, 0x00000000);
-    spr_register_kvm(env, SPR_BESCR, "BESCR",
-                     &spr_read_ebb, &spr_write_ebb,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_BESCR, 0x00000000);
-}
-
-/* Virtual Time Base */
-static void gen_spr_vtb(CPUPPCState *env)
-{
-    spr_register_kvm_hv(env, SPR_VTB, "VTB",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_vtb, SPR_NOACCESS,
-                        &spr_read_vtb, &spr_write_vtb,
-                        KVM_REG_PPC_VTB, 0x00000000);
-}
-
-static void gen_spr_power8_fscr(CPUPPCState *env)
-{
-#if defined(CONFIG_USER_ONLY)
-    target_ulong initval = 1ULL << FSCR_TAR;
-#else
-    target_ulong initval = 0;
-#endif
-    spr_register_kvm(env, SPR_FSCR, "FSCR",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_FSCR, initval);
-}
-
-static void gen_spr_power8_pspb(CPUPPCState *env)
-{
-    spr_register_kvm(env, SPR_PSPB, "PSPB",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic32,
-                     KVM_REG_PPC_PSPB, 0);
-}
-
-static void gen_spr_power8_dpdes(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /* Directed Privileged Door-bell Exception State, used for IPI */
-    spr_register_kvm_hv(env, SPR_DPDES, "DPDES",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_dpdes, SPR_NOACCESS,
-                        &spr_read_dpdes, &spr_write_dpdes,
-                        KVM_REG_PPC_DPDES, 0x00000000);
-#endif
-}
-
-static void gen_spr_power8_ic(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register_hv(env, SPR_IC, "IC",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0);
-#endif
-}
-
-static void gen_spr_power8_book4(CPUPPCState *env)
-{
-    /* Add a number of P8 book4 registers */
-#if !defined(CONFIG_USER_ONLY)
-    spr_register_kvm(env, SPR_ACOP, "ACOP",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_ACOP, 0);
-    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_pidr,
-                     KVM_REG_PPC_PID, 0);
-    spr_register_kvm(env, SPR_WORT, "WORT",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_WORT, 0);
-#endif
-}
-
-static void gen_spr_power7_book4(CPUPPCState *env)
-{
-    /* Add a number of P7 book4 registers */
-#if !defined(CONFIG_USER_ONLY)
-    spr_register_kvm(env, SPR_ACOP, "ACOP",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_ACOP, 0);
-    spr_register_kvm(env, SPR_BOOKS_PID, "PID",
-                     SPR_NOACCESS, SPR_NOACCESS,
-                     &spr_read_generic, &spr_write_generic,
-                     KVM_REG_PPC_PID, 0);
-#endif
-}
-
-static void gen_spr_power8_rpr(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    spr_register_hv(env, SPR_RPR, "RPR",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0x00000103070F1F3F);
-#endif
-}
-
-static void gen_spr_power9_mmu(CPUPPCState *env)
-{
-#if !defined(CONFIG_USER_ONLY)
-    /* Partition Table Control */
-    spr_register_kvm_hv(env, SPR_PTCR, "PTCR",
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        SPR_NOACCESS, SPR_NOACCESS,
-                        &spr_read_generic, &spr_write_ptcr,
-                        KVM_REG_PPC_PTCR, 0x00000000);
-    /* Address Segment Descriptor Register */
-    spr_register_hv(env, SPR_ASDR, "ASDR",
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    SPR_NOACCESS, SPR_NOACCESS,
-                    &spr_read_generic, &spr_write_generic,
-                    0x0000000000000000);
-#endif
-}
-
-static void init_proc_book3s_common(CPUPPCState *env)
-{
-    gen_spr_ne_601(env);
-    gen_tbl(env);
-    gen_spr_usprg3(env);
-    gen_spr_book3s_altivec(env);
-    gen_spr_book3s_pmu_sup(env);
-    gen_spr_book3s_pmu_user(env);
-    gen_spr_book3s_ctrl(env);
-}
-
-static void init_proc_970(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
-
-    /* 970 Specific Registers */
-    gen_spr_970_hid(env);
-    gen_spr_970_hior(env);
-    gen_low_BATs(env);
-    gen_spr_970_pmu_sup(env);
-    gen_spr_970_pmu_user(env);
-    gen_spr_970_lpar(env);
-    gen_spr_970_dbg(env);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_970(env);
-    ppc970_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(970)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->desc = "PowerPC 970";
-    pcc->init_proc = init_proc_970;
-    pcc->check_pow = check_pow_970;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI;
-    pcc->insns_flags2 = PPC2_FP_CVT_S64;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI);
-    pcc->mmu_model = POWERPC_MMU_64B;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-    pcc->hash64_opts = &ppc_hash64_opts_basic;
-#endif
-    pcc->excp_model = POWERPC_EXCP_970;
-    pcc->bus_model = PPC_FLAGS_INPUT_970;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x10000;
-}
-
-static void init_proc_power5plus(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
-
-    /* POWER5+ Specific Registers */
-    gen_spr_970_hid(env);
-    gen_spr_970_hior(env);
-    gen_low_BATs(env);
-    gen_spr_970_pmu_sup(env);
-    gen_spr_970_pmu_user(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_970(env);
-    ppc970_irq_init(env_archcpu(env));
-}
-
-POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-
-    dc->fw_name = "PowerPC,POWER5";
-    dc->desc = "POWER5+";
-    pcc->init_proc = init_proc_power5plus;
-    pcc->check_pow = check_pow_970;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_STFIWX |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B |
-                       PPC_SEGMENT_64B | PPC_SLBI;
-    pcc->insns_flags2 = PPC2_FP_CVT_S64;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_POW) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI);
-    pcc->lpcr_mask = LPCR_RMLS | LPCR_ILE | LPCR_LPES0 | LPCR_LPES1 |
-        LPCR_RMI | LPCR_HDICE;
-    pcc->mmu_model = POWERPC_MMU_2_03;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-    pcc->hash64_opts = &ppc_hash64_opts_basic;
-    pcc->lrg_decr_bits = 32;
-#endif
-    pcc->excp_model = POWERPC_EXCP_970;
-    pcc->bus_model = PPC_FLAGS_INPUT_970;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x10000;
-}
-
-static void init_proc_POWER7(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_dbg(env);
-
-    /* POWER7 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_rmor(env);
-    gen_spr_amr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power7_book4(env);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_POWER7(env);
-    ppcPOWER7_irq_init(env_archcpu(env));
-}
-
-static bool ppc_pvr_match_power7(PowerPCCPUClass *pcc, uint32_t pvr)
-{
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER7P_BASE) {
-        return true;
-    }
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER7_BASE) {
-        return true;
-    }
-    return false;
-}
-
-static bool cpu_has_work_POWER7(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    if (cs->halted) {
-        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
-            return false;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
-            (env->spr[SPR_LPCR] & LPCR_P7_PECE0)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
-            (env->spr[SPR_LPCR] & LPCR_P7_PECE1)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
-            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
-            (env->spr[SPR_LPCR] & LPCR_P7_PECE2)) {
-            return true;
-        }
-        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
-            return true;
-        }
-        return false;
-    } else {
-        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
-    }
-}
-
-POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-
-    dc->fw_name = "PowerPC,POWER7";
-    dc->desc = "POWER7";
-    pcc->pvr_match = ppc_pvr_match_power7;
-    pcc->pcr_mask = PCR_VEC_DIS | PCR_VSX_DIS | PCR_COMPAT_2_05;
-    pcc->pcr_supported = PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
-    pcc->init_proc = init_proc_POWER7;
-    pcc->check_pow = check_pow_nocheck;
-    cc->has_work = cpu_has_work_POWER7;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_FRSQRTES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_FLOAT_EXT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD |
-                       PPC_CILDST;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
-                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
-                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-                        PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
-                        PPC2_PM_ISA206;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_VSX) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->lpcr_mask = LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_DPFD |
-        LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
-        LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2 |
-        LPCR_MER | LPCR_TC |
-        LPCR_LPES0 | LPCR_LPES1 | LPCR_HDICE;
-    pcc->lpcr_pm = LPCR_P7_PECE0 | LPCR_P7_PECE1 | LPCR_P7_PECE2;
-    pcc->mmu_model = POWERPC_MMU_2_06;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
-    pcc->lrg_decr_bits = 32;
-#endif
-    pcc->excp_model = POWERPC_EXCP_POWER7;
-    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x8000;
-    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
-}
-
-static void init_proc_POWER8(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_sdr1(env);
-    gen_spr_book3s_207_dbg(env);
-
-    /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_rmor(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_power8_dpdes(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_POWER8(env);
-    ppcPOWER7_irq_init(env_archcpu(env));
-}
-
-static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr)
-{
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8NVL_BASE) {
-        return true;
-    }
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8E_BASE) {
-        return true;
-    }
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8_BASE) {
-        return true;
-    }
-    return false;
-}
-
-static bool cpu_has_work_POWER8(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    if (cs->halted) {
-        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
-            return false;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE2)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE3)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HMI)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE4)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE0)) {
-            return true;
-        }
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_P8_PECE1)) {
-            return true;
-        }
-        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
-            return true;
-        }
-        return false;
-    } else {
-        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
-    }
-}
-
-POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-
-    dc->fw_name = "PowerPC,POWER8";
-    dc->desc = "POWER8";
-    pcc->pvr_match = ppc_pvr_match_power8;
-    pcc->pcr_mask = PCR_TM_DIS | PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
-    pcc->pcr_supported = PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
-    pcc->init_proc = init_proc_POWER8;
-    pcc->check_pow = check_pow_nocheck;
-    cc->has_work = cpu_has_work_POWER8;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_FRSQRTES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_FLOAT_EXT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD |
-                       PPC_CILDST;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
-                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
-                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
-                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
-                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM | PPC2_PM_ISA206;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_HV) |
-                    (1ull << MSR_TM) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_VSX) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_TS0) |
-                    (1ull << MSR_TS1) |
-                    (1ull << MSR_LE);
-    pcc->lpcr_mask = LPCR_VPM0 | LPCR_VPM1 | LPCR_ISL | LPCR_KBV |
-        LPCR_DPFD | LPCR_VRMASD | LPCR_RMLS | LPCR_ILE |
-        LPCR_AIL | LPCR_ONL | LPCR_P8_PECE0 | LPCR_P8_PECE1 |
-        LPCR_P8_PECE2 | LPCR_P8_PECE3 | LPCR_P8_PECE4 |
-        LPCR_MER | LPCR_TC | LPCR_LPES0 | LPCR_HDICE;
-    pcc->lpcr_pm = LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
-                   LPCR_P8_PECE3 | LPCR_P8_PECE4;
-    pcc->mmu_model = POWERPC_MMU_2_07;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
-    pcc->lrg_decr_bits = 32;
-    pcc->n_host_threads = 8;
-#endif
-    pcc->excp_model = POWERPC_EXCP_POWER8;
-    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x8000;
-    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
-}
-
-#ifdef CONFIG_SOFTMMU
-/*
- * Radix pg sizes and AP encodings for dt node ibm,processor-radix-AP-encodings
- * Encoded as array of int_32s in the form:
- *  0bxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
- *  x -> AP encoding
- *  y -> radix mode supported page size (encoded as a shift)
- */
-static struct ppc_radix_page_info POWER9_radix_page_info = {
-    .count = 4,
-    .entries = {
-        0x0000000c, /*  4K - enc: 0x0 */
-        0xa0000010, /* 64K - enc: 0x5 */
-        0x20000015, /*  2M - enc: 0x1 */
-        0x4000001e  /*  1G - enc: 0x2 */
-    }
-};
-#endif /* CONFIG_SOFTMMU */
-
-static void init_proc_POWER9(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_book3s_207_dbg(env);
-
-    /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power5p_tb(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_power8_dpdes(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
-    gen_spr_power9_mmu(env);
-
-    /* POWER9 Specific registers */
-    spr_register_kvm(env, SPR_TIDR, "TIDR", NULL, NULL,
-                     spr_read_generic, spr_write_generic,
-                     KVM_REG_PPC_TIDR, 0);
-
-    /* FIXME: Filter fields properly based on privilege level */
-    spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
-                        spr_read_generic, spr_write_generic,
-                        KVM_REG_PPC_PSSCR, 0);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_POWER9(env);
-    ppcPOWER9_irq_init(env_archcpu(env));
-}
-
-static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr)
-{
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER9_BASE) {
-        return true;
-    }
-    return false;
-}
-
-static bool cpu_has_work_POWER9(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    if (cs->halted) {
-        uint64_t psscr = env->spr[SPR_PSSCR];
-
-        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
-            return false;
-        }
-
-        /* If EC is clear, just return true on any pending interrupt */
-        if (!(psscr & PSSCR_EC)) {
-            return true;
-        }
-        /* External Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
-            (env->spr[SPR_LPCR] & LPCR_EEE)) {
-            bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
-            if (heic == 0 || !msr_hv || msr_pr) {
-                return true;
-            }
-        }
-        /* Decrementer Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
-            (env->spr[SPR_LPCR] & LPCR_DEE)) {
-            return true;
-        }
-        /* Machine Check or Hypervisor Maintenance Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK |
-            1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) {
-            return true;
-        }
-        /* Privileged Doorbell Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_PDEE)) {
-            return true;
-        }
-        /* Hypervisor Doorbell Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_HDEE)) {
-            return true;
-        }
-        /* Hypervisor virtualization exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) &&
-            (env->spr[SPR_LPCR] & LPCR_HVEE)) {
-            return true;
-        }
-        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
-            return true;
-        }
-        return false;
-    } else {
-        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
-    }
-}
-
-POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-
-    dc->fw_name = "PowerPC,POWER9";
-    dc->desc = "POWER9";
-    pcc->pvr_match = ppc_pvr_match_power9;
-    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07;
-    pcc->pcr_supported = PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 |
-                         PCR_COMPAT_2_05;
-    pcc->init_proc = init_proc_POWER9;
-    pcc->check_pow = check_pow_nocheck;
-    cc->has_work = cpu_has_work_POWER9;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_FRSQRTES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_FLOAT_EXT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD |
-                       PPC_CILDST;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
-                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
-                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
-                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
-                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_HV) |
-                    (1ull << MSR_TM) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_VSX) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->lpcr_mask = LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
-        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
-        LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
-        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
-                             LPCR_DEE | LPCR_OEE))
-        | LPCR_MER | LPCR_GTSE | LPCR_TC |
-        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE;
-    pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
-    pcc->mmu_model = POWERPC_MMU_3_00;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc64_v3_handle_mmu_fault;
-    /* segment page size remain the same */
-    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
-    pcc->radix_page_info = &POWER9_radix_page_info;
-    pcc->lrg_decr_bits = 56;
-    pcc->n_host_threads = 4;
-#endif
-    pcc->excp_model = POWERPC_EXCP_POWER9;
-    pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x8000;
-    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
-}
-
-#ifdef CONFIG_SOFTMMU
-/*
- * Radix pg sizes and AP encodings for dt node ibm,processor-radix-AP-encodings
- * Encoded as array of int_32s in the form:
- *  0bxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
- *  x -> AP encoding
- *  y -> radix mode supported page size (encoded as a shift)
- */
-static struct ppc_radix_page_info POWER10_radix_page_info = {
-    .count = 4,
-    .entries = {
-        0x0000000c, /*  4K - enc: 0x0 */
-        0xa0000010, /* 64K - enc: 0x5 */
-        0x20000015, /*  2M - enc: 0x1 */
-        0x4000001e  /*  1G - enc: 0x2 */
-    }
-};
-#endif /* CONFIG_SOFTMMU */
-
-static void init_proc_POWER10(CPUPPCState *env)
-{
-    /* Common Registers */
-    init_proc_book3s_common(env);
-    gen_spr_book3s_207_dbg(env);
-
-    /* POWER8 Specific Registers */
-    gen_spr_book3s_ids(env);
-    gen_spr_amr(env);
-    gen_spr_iamr(env);
-    gen_spr_book3s_purr(env);
-    gen_spr_power5p_common(env);
-    gen_spr_power5p_lpar(env);
-    gen_spr_power5p_ear(env);
-    gen_spr_power6_common(env);
-    gen_spr_power6_dbg(env);
-    gen_spr_power8_tce_address_control(env);
-    gen_spr_power8_ids(env);
-    gen_spr_power8_ebb(env);
-    gen_spr_power8_fscr(env);
-    gen_spr_power8_pmu_sup(env);
-    gen_spr_power8_pmu_user(env);
-    gen_spr_power8_tm(env);
-    gen_spr_power8_pspb(env);
-    gen_spr_vtb(env);
-    gen_spr_power8_ic(env);
-    gen_spr_power8_book4(env);
-    gen_spr_power8_rpr(env);
-    gen_spr_power9_mmu(env);
-
-    /* FIXME: Filter fields properly based on privilege level */
-    spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
-                        spr_read_generic, spr_write_generic,
-                        KVM_REG_PPC_PSSCR, 0);
-
-    /* env variables */
-    env->dcache_line_size = 128;
-    env->icache_line_size = 128;
-
-    /* Allocate hardware IRQ controller */
-    init_excp_POWER10(env);
-    ppcPOWER9_irq_init(env_archcpu(env));
-}
-
-static bool ppc_pvr_match_power10(PowerPCCPUClass *pcc, uint32_t pvr)
-{
-    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER10_BASE) {
-        return true;
-    }
-    return false;
-}
-
-static bool cpu_has_work_POWER10(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    if (cs->halted) {
-        uint64_t psscr = env->spr[SPR_PSSCR];
-
-        if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
-            return false;
-        }
-
-        /* If EC is clear, just return true on any pending interrupt */
-        if (!(psscr & PSSCR_EC)) {
-            return true;
-        }
-        /* External Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_EXT)) &&
-            (env->spr[SPR_LPCR] & LPCR_EEE)) {
-            bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
-            if (heic == 0 || !msr_hv || msr_pr) {
-                return true;
-            }
-        }
-        /* Decrementer Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DECR)) &&
-            (env->spr[SPR_LPCR] & LPCR_DEE)) {
-            return true;
-        }
-        /* Machine Check or Hypervisor Maintenance Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_MCK |
-            1u << PPC_INTERRUPT_HMI)) && (env->spr[SPR_LPCR] & LPCR_OEE)) {
-            return true;
-        }
-        /* Privileged Doorbell Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_DOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_PDEE)) {
-            return true;
-        }
-        /* Hypervisor Doorbell Exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HDOORBELL)) &&
-            (env->spr[SPR_LPCR] & LPCR_HDEE)) {
-            return true;
-        }
-        /* Hypervisor virtualization exception */
-        if ((env->pending_interrupts & (1u << PPC_INTERRUPT_HVIRT)) &&
-            (env->spr[SPR_LPCR] & LPCR_HVEE)) {
-            return true;
-        }
-        if (env->pending_interrupts & (1u << PPC_INTERRUPT_RESET)) {
-            return true;
-        }
-        return false;
-    } else {
-        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
-    }
-}
-
-POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-
-    dc->fw_name = "PowerPC,POWER10";
-    dc->desc = "POWER10";
-    pcc->pvr_match = ppc_pvr_match_power10;
-    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07 |
-                    PCR_COMPAT_3_00;
-    pcc->pcr_supported = PCR_COMPAT_3_10 | PCR_COMPAT_3_00 | PCR_COMPAT_2_07 |
-                         PCR_COMPAT_2_06 | PCR_COMPAT_2_05;
-    pcc->init_proc = init_proc_POWER10;
-    pcc->check_pow = check_pow_nocheck;
-    cc->has_work = cpu_has_work_POWER10;
-    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
-                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
-                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
-                       PPC_FLOAT_FRSQRTES |
-                       PPC_FLOAT_STFIWX |
-                       PPC_FLOAT_EXT |
-                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
-                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
-                       PPC_MEM_TLBSYNC |
-                       PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
-                       PPC_SEGMENT_64B | PPC_SLBI |
-                       PPC_POPCNTB | PPC_POPCNTWD |
-                       PPC_CILDST;
-    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
-                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
-                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
-                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
-                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
-                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-                        PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310;
-    pcc->msr_mask = (1ull << MSR_SF) |
-                    (1ull << MSR_HV) |
-                    (1ull << MSR_TM) |
-                    (1ull << MSR_VR) |
-                    (1ull << MSR_VSX) |
-                    (1ull << MSR_EE) |
-                    (1ull << MSR_PR) |
-                    (1ull << MSR_FP) |
-                    (1ull << MSR_ME) |
-                    (1ull << MSR_FE0) |
-                    (1ull << MSR_SE) |
-                    (1ull << MSR_DE) |
-                    (1ull << MSR_FE1) |
-                    (1ull << MSR_IR) |
-                    (1ull << MSR_DR) |
-                    (1ull << MSR_PMM) |
-                    (1ull << MSR_RI) |
-                    (1ull << MSR_LE);
-    pcc->lpcr_mask = LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
-        (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
-        LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD |
-        (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
-                             LPCR_DEE | LPCR_OEE))
-        | LPCR_MER | LPCR_GTSE | LPCR_TC |
-        LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE;
-    pcc->lpcr_pm = LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE | LPCR_OEE;
-    pcc->mmu_model = POWERPC_MMU_3_00;
-#if defined(CONFIG_SOFTMMU)
-    pcc->handle_mmu_fault = ppc64_v3_handle_mmu_fault;
-    /* segment page size remain the same */
-    pcc->hash64_opts = &ppc_hash64_opts_POWER7;
-    pcc->radix_page_info = &POWER10_radix_page_info;
-    pcc->lrg_decr_bits = 56;
-#endif
-    pcc->excp_model = POWERPC_EXCP_POWER9;
-    pcc->bus_model = PPC_FLAGS_INPUT_POWER9;
-    pcc->bfd_mach = bfd_mach_ppc64;
-    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
-                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
-                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
-                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
-    pcc->l1_dcache_size = 0x8000;
-    pcc->l1_icache_size = 0x8000;
-    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
-{
-    CPUPPCState *env = &cpu->env;
-
-    cpu->vhyp = vhyp;
-
-    /*
-     * With a virtual hypervisor mode we never allow the CPU to go
-     * hypervisor mode itself
-     */
-    env->msr_mask &= ~MSR_HVB;
-}
-
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-#endif /* defined(TARGET_PPC64) */
-
-/*****************************************************************************/
-/* Generic CPU instantiation routine                                         */
-static void init_ppc_proc(PowerPCCPU *cpu)
-{
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    CPUPPCState *env = &cpu->env;
-#if !defined(CONFIG_USER_ONLY)
-    int i;
-
-    env->irq_inputs = NULL;
-    /* Set all exception vectors to an invalid address */
-    for (i = 0; i < POWERPC_EXCP_NB; i++) {
-        env->excp_vectors[i] = (target_ulong)(-1ULL);
-    }
-    env->ivor_mask = 0x00000000;
-    env->ivpr_mask = 0x00000000;
-    /* Default MMU definitions */
-    env->nb_BATs = 0;
-    env->nb_tlb = 0;
-    env->nb_ways = 0;
-    env->tlb_type = TLB_NONE;
-#endif
-    /* Register SPR common to all PowerPC implementations */
-    gen_spr_generic(env);
-    spr_register(env, SPR_PVR, "PVR",
-                 /* Linux permits userspace to read PVR */
-#if defined(CONFIG_LINUX_USER)
-                 &spr_read_generic,
-#else
-                 SPR_NOACCESS,
-#endif
-                 SPR_NOACCESS,
-                 &spr_read_generic, SPR_NOACCESS,
-                 pcc->pvr);
-    /* Register SVR if it's defined to anything else than POWERPC_SVR_NONE */
-    if (pcc->svr != POWERPC_SVR_NONE) {
-        if (pcc->svr & POWERPC_SVR_E500) {
-            spr_register(env, SPR_E500_SVR, "SVR",
-                         SPR_NOACCESS, SPR_NOACCESS,
-                         &spr_read_generic, SPR_NOACCESS,
-                         pcc->svr & ~POWERPC_SVR_E500);
-        } else {
-            spr_register(env, SPR_SVR, "SVR",
-                         SPR_NOACCESS, SPR_NOACCESS,
-                         &spr_read_generic, SPR_NOACCESS,
-                         pcc->svr);
-        }
-    }
-    /* PowerPC implementation specific initialisations (SPRs, timers, ...) */
-    (*pcc->init_proc)(env);
-
-#if !defined(CONFIG_USER_ONLY)
-    ppc_gdb_gen_spr_xml(cpu);
-#endif
-
-    /* MSR bits & flags consistency checks */
-    if (env->msr_mask & (1 << 25)) {
-        switch (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) {
-        case POWERPC_FLAG_SPE:
-        case POWERPC_FLAG_VRE:
-            break;
-        default:
-            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                    "Should define POWERPC_FLAG_SPE or POWERPC_FLAG_VRE\n");
-            exit(1);
-        }
-    } else if (env->flags & (POWERPC_FLAG_SPE | POWERPC_FLAG_VRE)) {
-        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                "Should not define POWERPC_FLAG_SPE nor POWERPC_FLAG_VRE\n");
-        exit(1);
-    }
-    if (env->msr_mask & (1 << 17)) {
-        switch (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) {
-        case POWERPC_FLAG_TGPR:
-        case POWERPC_FLAG_CE:
-            break;
-        default:
-            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                    "Should define POWERPC_FLAG_TGPR or POWERPC_FLAG_CE\n");
-            exit(1);
-        }
-    } else if (env->flags & (POWERPC_FLAG_TGPR | POWERPC_FLAG_CE)) {
-        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                "Should not define POWERPC_FLAG_TGPR nor POWERPC_FLAG_CE\n");
-        exit(1);
-    }
-    if (env->msr_mask & (1 << 10)) {
-        switch (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE |
-                              POWERPC_FLAG_UBLE)) {
-        case POWERPC_FLAG_SE:
-        case POWERPC_FLAG_DWE:
-        case POWERPC_FLAG_UBLE:
-            break;
-        default:
-            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                    "Should define POWERPC_FLAG_SE or POWERPC_FLAG_DWE or "
-                    "POWERPC_FLAG_UBLE\n");
-            exit(1);
-        }
-    } else if (env->flags & (POWERPC_FLAG_SE | POWERPC_FLAG_DWE |
-                             POWERPC_FLAG_UBLE)) {
-        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                "Should not define POWERPC_FLAG_SE nor POWERPC_FLAG_DWE nor "
-                "POWERPC_FLAG_UBLE\n");
-            exit(1);
-    }
-    if (env->msr_mask & (1 << 9)) {
-        switch (env->flags & (POWERPC_FLAG_BE | POWERPC_FLAG_DE)) {
-        case POWERPC_FLAG_BE:
-        case POWERPC_FLAG_DE:
-            break;
-        default:
-            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                    "Should define POWERPC_FLAG_BE or POWERPC_FLAG_DE\n");
-            exit(1);
-        }
-    } else if (env->flags & (POWERPC_FLAG_BE | POWERPC_FLAG_DE)) {
-        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                "Should not define POWERPC_FLAG_BE nor POWERPC_FLAG_DE\n");
-        exit(1);
-    }
-    if (env->msr_mask & (1 << 2)) {
-        switch (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) {
-        case POWERPC_FLAG_PX:
-        case POWERPC_FLAG_PMM:
-            break;
-        default:
-            fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                    "Should define POWERPC_FLAG_PX or POWERPC_FLAG_PMM\n");
-            exit(1);
-        }
-    } else if (env->flags & (POWERPC_FLAG_PX | POWERPC_FLAG_PMM)) {
-        fprintf(stderr, "PowerPC MSR definition inconsistency\n"
-                "Should not define POWERPC_FLAG_PX nor POWERPC_FLAG_PMM\n");
-        exit(1);
-    }
-    if ((env->flags & (POWERPC_FLAG_RTC_CLK | POWERPC_FLAG_BUS_CLK)) == 0) {
-        fprintf(stderr, "PowerPC flags inconsistency\n"
-                "Should define the time-base and decrementer clock source\n");
-        exit(1);
-    }
-    /* Allocate TLBs buffer when needed */
-#if !defined(CONFIG_USER_ONLY)
-    if (env->nb_tlb != 0) {
-        int nb_tlb = env->nb_tlb;
-        if (env->id_tlbs != 0) {
-            nb_tlb *= 2;
-        }
-        switch (env->tlb_type) {
-        case TLB_6XX:
-            env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb);
-            break;
-        case TLB_EMB:
-            env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb);
-            break;
-        case TLB_MAS:
-            env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb);
-            break;
-        }
-        /* Pre-compute some useful values */
-        env->tlb_per_way = env->nb_tlb / env->nb_ways;
-    }
-    if (env->irq_inputs == NULL) {
-        warn_report("no internal IRQ controller registered."
-                    " Attempt QEMU to crash very soon !");
-    }
-#endif
-    if (env->check_pow == NULL) {
-        warn_report("no power management check handler registered."
-                    " Attempt QEMU to crash very soon !");
-    }
-}
-
-#if defined(PPC_DUMP_CPU)
-static void dump_ppc_sprs(CPUPPCState *env)
-{
-    ppc_spr_t *spr;
-#if !defined(CONFIG_USER_ONLY)
-    uint32_t sr, sw;
-#endif
-    uint32_t ur, uw;
-    int i, j, n;
-
-    printf("Special purpose registers:\n");
-    for (i = 0; i < 32; i++) {
-        for (j = 0; j < 32; j++) {
-            n = (i << 5) | j;
-            spr = &env->spr_cb[n];
-            uw = spr->uea_write != NULL && spr->uea_write != SPR_NOACCESS;
-            ur = spr->uea_read != NULL && spr->uea_read != SPR_NOACCESS;
-#if !defined(CONFIG_USER_ONLY)
-            sw = spr->oea_write != NULL && spr->oea_write != SPR_NOACCESS;
-            sr = spr->oea_read != NULL && spr->oea_read != SPR_NOACCESS;
-            if (sw || sr || uw || ur) {
-                printf("SPR: %4d (%03x) %-8s s%c%c u%c%c\n",
-                       (i << 5) | j, (i << 5) | j, spr->name,
-                       sw ? 'w' : '-', sr ? 'r' : '-',
-                       uw ? 'w' : '-', ur ? 'r' : '-');
-            }
-#else
-            if (uw || ur) {
-                printf("SPR: %4d (%03x) %-8s u%c%c\n",
-                       (i << 5) | j, (i << 5) | j, spr->name,
-                       uw ? 'w' : '-', ur ? 'r' : '-');
-            }
-#endif
-        }
-    }
-    fflush(stdout);
-    fflush(stderr);
-}
-#endif
-
-/*****************************************************************************/
-
-/* Opcode types */
-enum {
-    PPC_DIRECT   = 0, /* Opcode routine        */
-    PPC_INDIRECT = 1, /* Indirect opcode table */
-};
-
-#define PPC_OPCODE_MASK 0x3
-
-static inline int is_indirect_opcode(void *handler)
-{
-    return ((uintptr_t)handler & PPC_OPCODE_MASK) == PPC_INDIRECT;
-}
-
-static inline opc_handler_t **ind_table(void *handler)
-{
-    return (opc_handler_t **)((uintptr_t)handler & ~PPC_OPCODE_MASK);
-}
-
-/* Instruction table creation */
-/* Opcodes tables creation */
-static void fill_new_table(opc_handler_t **table, int len)
-{
-    int i;
-
-    for (i = 0; i < len; i++) {
-        table[i] = &invalid_handler;
-    }
-}
-
-static int create_new_table(opc_handler_t **table, unsigned char idx)
-{
-    opc_handler_t **tmp;
-
-    tmp = g_new(opc_handler_t *, PPC_CPU_INDIRECT_OPCODES_LEN);
-    fill_new_table(tmp, PPC_CPU_INDIRECT_OPCODES_LEN);
-    table[idx] = (opc_handler_t *)((uintptr_t)tmp | PPC_INDIRECT);
-
-    return 0;
-}
-
-static int insert_in_table(opc_handler_t **table, unsigned char idx,
-                            opc_handler_t *handler)
-{
-    if (table[idx] != &invalid_handler) {
-        return -1;
-    }
-    table[idx] = handler;
-
-    return 0;
-}
-
-static int register_direct_insn(opc_handler_t **ppc_opcodes,
-                                unsigned char idx, opc_handler_t *handler)
-{
-    if (insert_in_table(ppc_opcodes, idx, handler) < 0) {
-        printf("*** ERROR: opcode %02x already assigned in main "
-               "opcode table\n", idx);
-#if defined(DO_PPC_STATISTICS) || defined(PPC_DUMP_CPU)
-        printf("           Registered handler '%s' - new handler '%s'\n",
-               ppc_opcodes[idx]->oname, handler->oname);
-#endif
-        return -1;
-    }
-
-    return 0;
-}
-
-static int register_ind_in_table(opc_handler_t **table,
-                                 unsigned char idx1, unsigned char idx2,
-                                 opc_handler_t *handler)
-{
-    if (table[idx1] == &invalid_handler) {
-        if (create_new_table(table, idx1) < 0) {
-            printf("*** ERROR: unable to create indirect table "
-                   "idx=%02x\n", idx1);
-            return -1;
-        }
-    } else {
-        if (!is_indirect_opcode(table[idx1])) {
-            printf("*** ERROR: idx %02x already assigned to a direct "
-                   "opcode\n", idx1);
-#if defined(DO_PPC_STATISTICS) || defined(PPC_DUMP_CPU)
-            printf("           Registered handler '%s' - new handler '%s'\n",
-                   ind_table(table[idx1])[idx2]->oname, handler->oname);
-#endif
-            return -1;
-        }
-    }
-    if (handler != NULL &&
-        insert_in_table(ind_table(table[idx1]), idx2, handler) < 0) {
-        printf("*** ERROR: opcode %02x already assigned in "
-               "opcode table %02x\n", idx2, idx1);
-#if defined(DO_PPC_STATISTICS) || defined(PPC_DUMP_CPU)
-        printf("           Registered handler '%s' - new handler '%s'\n",
-               ind_table(table[idx1])[idx2]->oname, handler->oname);
-#endif
-        return -1;
-    }
-
-    return 0;
-}
-
-static int register_ind_insn(opc_handler_t **ppc_opcodes,
-                             unsigned char idx1, unsigned char idx2,
-                             opc_handler_t *handler)
-{
-    return register_ind_in_table(ppc_opcodes, idx1, idx2, handler);
-}
-
-static int register_dblind_insn(opc_handler_t **ppc_opcodes,
-                                unsigned char idx1, unsigned char idx2,
-                                unsigned char idx3, opc_handler_t *handler)
-{
-    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
-        printf("*** ERROR: unable to join indirect table idx "
-               "[%02x-%02x]\n", idx1, idx2);
-        return -1;
-    }
-    if (register_ind_in_table(ind_table(ppc_opcodes[idx1]), idx2, idx3,
-                              handler) < 0) {
-        printf("*** ERROR: unable to insert opcode "
-               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
-        return -1;
-    }
-
-    return 0;
-}
-
-static int register_trplind_insn(opc_handler_t **ppc_opcodes,
-                                 unsigned char idx1, unsigned char idx2,
-                                 unsigned char idx3, unsigned char idx4,
-                                 opc_handler_t *handler)
-{
-    opc_handler_t **table;
-
-    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
-        printf("*** ERROR: unable to join indirect table idx "
-               "[%02x-%02x]\n", idx1, idx2);
-        return -1;
-    }
-    table = ind_table(ppc_opcodes[idx1]);
-    if (register_ind_in_table(table, idx2, idx3, NULL) < 0) {
-        printf("*** ERROR: unable to join 2nd-level indirect table idx "
-               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
-        return -1;
-    }
-    table = ind_table(table[idx2]);
-    if (register_ind_in_table(table, idx3, idx4, handler) < 0) {
-        printf("*** ERROR: unable to insert opcode "
-               "[%02x-%02x-%02x-%02x]\n", idx1, idx2, idx3, idx4);
-        return -1;
-    }
-    return 0;
-}
-static int register_insn(opc_handler_t **ppc_opcodes, opcode_t *insn)
-{
-    if (insn->opc2 != 0xFF) {
-        if (insn->opc3 != 0xFF) {
-            if (insn->opc4 != 0xFF) {
-                if (register_trplind_insn(ppc_opcodes, insn->opc1, insn->opc2,
-                                          insn->opc3, insn->opc4,
-                                          &insn->handler) < 0) {
-                    return -1;
-                }
-            } else {
-                if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
-                                         insn->opc3, &insn->handler) < 0) {
-                    return -1;
-                }
-            }
-        } else {
-            if (register_ind_insn(ppc_opcodes, insn->opc1,
-                                  insn->opc2, &insn->handler) < 0) {
-                return -1;
-            }
-        }
-    } else {
-        if (register_direct_insn(ppc_opcodes, insn->opc1, &insn->handler) < 0) {
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-static int test_opcode_table(opc_handler_t **table, int len)
-{
-    int i, count, tmp;
-
-    for (i = 0, count = 0; i < len; i++) {
-        /* Consistency fixup */
-        if (table[i] == NULL) {
-            table[i] = &invalid_handler;
-        }
-        if (table[i] != &invalid_handler) {
-            if (is_indirect_opcode(table[i])) {
-                tmp = test_opcode_table(ind_table(table[i]),
-                    PPC_CPU_INDIRECT_OPCODES_LEN);
-                if (tmp == 0) {
-                    free(table[i]);
-                    table[i] = &invalid_handler;
-                } else {
-                    count++;
-                }
-            } else {
-                count++;
-            }
-        }
-    }
-
-    return count;
-}
-
-static void fix_opcode_tables(opc_handler_t **ppc_opcodes)
-{
-    if (test_opcode_table(ppc_opcodes, PPC_CPU_OPCODES_LEN) == 0) {
-        printf("*** WARNING: no opcode defined !\n");
-    }
-}
-
-/*****************************************************************************/
-static void create_ppc_opcodes(PowerPCCPU *cpu, Error **errp)
-{
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    opcode_t *opc;
-
-    fill_new_table(cpu->opcodes, PPC_CPU_OPCODES_LEN);
-    for (opc = opcodes; opc < &opcodes[ARRAY_SIZE(opcodes)]; opc++) {
-        if (((opc->handler.type & pcc->insns_flags) != 0) ||
-            ((opc->handler.type2 & pcc->insns_flags2) != 0)) {
-            if (register_insn(cpu->opcodes, opc) < 0) {
-                error_setg(errp, "ERROR initializing PowerPC instruction "
-                           "0x%02x 0x%02x 0x%02x", opc->opc1, opc->opc2,
-                           opc->opc3);
-                return;
-            }
-        }
-    }
-    fix_opcode_tables(cpu->opcodes);
-    fflush(stdout);
-    fflush(stderr);
-}
-
-#if defined(PPC_DUMP_CPU)
-static void dump_ppc_insns(CPUPPCState *env)
-{
-    opc_handler_t **table, *handler;
-    const char *p, *q;
-    uint8_t opc1, opc2, opc3, opc4;
-
-    printf("Instructions set:\n");
-    /* opc1 is 6 bits long */
-    for (opc1 = 0x00; opc1 < PPC_CPU_OPCODES_LEN; opc1++) {
-        table = env->opcodes;
-        handler = table[opc1];
-        if (is_indirect_opcode(handler)) {
-            /* opc2 is 5 bits long */
-            for (opc2 = 0; opc2 < PPC_CPU_INDIRECT_OPCODES_LEN; opc2++) {
-                table = env->opcodes;
-                handler = env->opcodes[opc1];
-                table = ind_table(handler);
-                handler = table[opc2];
-                if (is_indirect_opcode(handler)) {
-                    table = ind_table(handler);
-                    /* opc3 is 5 bits long */
-                    for (opc3 = 0; opc3 < PPC_CPU_INDIRECT_OPCODES_LEN;
-                            opc3++) {
-                        handler = table[opc3];
-                        if (is_indirect_opcode(handler)) {
-                            table = ind_table(handler);
-                            /* opc4 is 5 bits long */
-                            for (opc4 = 0; opc4 < PPC_CPU_INDIRECT_OPCODES_LEN;
-                                 opc4++) {
-                                handler = table[opc4];
-                                if (handler->handler != &gen_invalid) {
-                                    printf("INSN: %02x %02x %02x %02x -- "
-                                           "(%02d %04d %02d) : %s\n",
-                                           opc1, opc2, opc3, opc4,
-                                           opc1, (opc3 << 5) | opc2, opc4,
-                                           handler->oname);
-                                }
-                            }
-                        } else {
-                            if (handler->handler != &gen_invalid) {
-                                /* Special hack to properly dump SPE insns */
-                                p = strchr(handler->oname, '_');
-                                if (p == NULL) {
-                                    printf("INSN: %02x %02x %02x (%02d %04d) : "
-                                           "%s\n",
-                                           opc1, opc2, opc3, opc1,
-                                           (opc3 << 5) | opc2,
-                                           handler->oname);
-                                } else {
-                                    q = "speundef";
-                                    if ((p - handler->oname) != strlen(q)
-                                        || (memcmp(handler->oname, q, strlen(q))
-                                            != 0)) {
-                                        /* First instruction */
-                                        printf("INSN: %02x %02x %02x"
-                                               "(%02d %04d) : %.*s\n",
-                                               opc1, opc2 << 1, opc3, opc1,
-                                               (opc3 << 6) | (opc2 << 1),
-                                               (int)(p - handler->oname),
-                                               handler->oname);
-                                    }
-                                    if (strcmp(p + 1, q) != 0) {
-                                        /* Second instruction */
-                                        printf("INSN: %02x %02x %02x "
-                                               "(%02d %04d) : %s\n", opc1,
-                                               (opc2 << 1) | 1, opc3, opc1,
-                                               (opc3 << 6) | (opc2 << 1) | 1,
-                                               p + 1);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                } else {
-                    if (handler->handler != &gen_invalid) {
-                        printf("INSN: %02x %02x -- (%02d %04d) : %s\n",
-                               opc1, opc2, opc1, opc2, handler->oname);
-                    }
-                }
-            }
-        } else {
-            if (handler->handler != &gen_invalid) {
-                printf("INSN: %02x -- -- (%02d ----) : %s\n",
-                       opc1, opc1, handler->oname);
-            }
-        }
-    }
-}
-#endif
-
-static bool avr_need_swap(CPUPPCState *env)
-{
-#ifdef HOST_WORDS_BIGENDIAN
-    return msr_le;
-#else
-    return !msr_le;
-#endif
-}
-
-#if !defined(CONFIG_USER_ONLY)
-static int gdb_find_spr_idx(CPUPPCState *env, int n)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
-        ppc_spr_t *spr = &env->spr_cb[i];
-
-        if (spr->name && spr->gdb_id == n) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-static int gdb_get_spr_reg(CPUPPCState *env, GByteArray *buf, int n)
-{
-    int reg;
-    int len;
-
-    reg = gdb_find_spr_idx(env, n);
-    if (reg < 0) {
-        return 0;
-    }
-
-    len = TARGET_LONG_SIZE;
-    gdb_get_regl(buf, env->spr[reg]);
-    ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, len), len);
-    return len;
-}
-
-static int gdb_set_spr_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
-{
-    int reg;
-    int len;
-
-    reg = gdb_find_spr_idx(env, n);
-    if (reg < 0) {
-        return 0;
-    }
-
-    len = TARGET_LONG_SIZE;
-    ppc_maybe_bswap_register(env, mem_buf, len);
-    env->spr[reg] = ldn_p(mem_buf, len);
-
-    return len;
-}
-#endif
-
-static int gdb_get_float_reg(CPUPPCState *env, GByteArray *buf, int n)
-{
-    uint8_t *mem_buf;
-    if (n < 32) {
-        gdb_get_reg64(buf, *cpu_fpr_ptr(env, n));
-        mem_buf = gdb_get_reg_ptr(buf, 8);
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        return 8;
-    }
-    if (n == 32) {
-        gdb_get_reg32(buf, env->fpscr);
-        mem_buf = gdb_get_reg_ptr(buf, 4);
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_set_float_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
-{
-    if (n < 32) {
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        *cpu_fpr_ptr(env, n) = ldq_p(mem_buf);
-        return 8;
-    }
-    if (n == 32) {
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        helper_store_fpscr(env, ldl_p(mem_buf), 0xffffffff);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_get_avr_reg(CPUPPCState *env, GByteArray *buf, int n)
-{
-    uint8_t *mem_buf;
-
-    if (n < 32) {
-        ppc_avr_t *avr = cpu_avr_ptr(env, n);
-        if (!avr_need_swap(env)) {
-            gdb_get_reg128(buf, avr->u64[0] , avr->u64[1]);
-        } else {
-            gdb_get_reg128(buf, avr->u64[1] , avr->u64[0]);
-        }
-        mem_buf = gdb_get_reg_ptr(buf, 16);
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        ppc_maybe_bswap_register(env, mem_buf + 8, 8);
-        return 16;
-    }
-    if (n == 32) {
-        gdb_get_reg32(buf, helper_mfvscr(env));
-        mem_buf = gdb_get_reg_ptr(buf, 4);
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        return 4;
-    }
-    if (n == 33) {
-        gdb_get_reg32(buf, (uint32_t)env->spr[SPR_VRSAVE]);
-        mem_buf = gdb_get_reg_ptr(buf, 4);
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_set_avr_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
-{
-    if (n < 32) {
-        ppc_avr_t *avr = cpu_avr_ptr(env, n);
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        ppc_maybe_bswap_register(env, mem_buf + 8, 8);
-        if (!avr_need_swap(env)) {
-            avr->u64[0] = ldq_p(mem_buf);
-            avr->u64[1] = ldq_p(mem_buf + 8);
-        } else {
-            avr->u64[1] = ldq_p(mem_buf);
-            avr->u64[0] = ldq_p(mem_buf + 8);
-        }
-        return 16;
-    }
-    if (n == 32) {
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        helper_mtvscr(env, ldl_p(mem_buf));
-        return 4;
-    }
-    if (n == 33) {
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        env->spr[SPR_VRSAVE] = (target_ulong)ldl_p(mem_buf);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_get_spe_reg(CPUPPCState *env, GByteArray *buf, int n)
-{
-    if (n < 32) {
-#if defined(TARGET_PPC64)
-        gdb_get_reg32(buf, env->gpr[n] >> 32);
-        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 4), 4);
-#else
-        gdb_get_reg32(buf, env->gprh[n]);
-#endif
-        return 4;
-    }
-    if (n == 32) {
-        gdb_get_reg64(buf, env->spe_acc);
-        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 8), 8);
-        return 8;
-    }
-    if (n == 33) {
-        gdb_get_reg32(buf, env->spe_fscr);
-        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 4), 4);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_set_spe_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
-{
-    if (n < 32) {
-#if defined(TARGET_PPC64)
-        target_ulong lo = (uint32_t)env->gpr[n];
-        target_ulong hi;
-
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-
-        hi = (target_ulong)ldl_p(mem_buf) << 32;
-        env->gpr[n] = lo | hi;
-#else
-        env->gprh[n] = ldl_p(mem_buf);
-#endif
-        return 4;
-    }
-    if (n == 32) {
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        env->spe_acc = ldq_p(mem_buf);
-        return 8;
-    }
-    if (n == 33) {
-        ppc_maybe_bswap_register(env, mem_buf, 4);
-        env->spe_fscr = ldl_p(mem_buf);
-        return 4;
-    }
-    return 0;
-}
-
-static int gdb_get_vsx_reg(CPUPPCState *env, GByteArray *buf, int n)
-{
-    if (n < 32) {
-        gdb_get_reg64(buf, *cpu_vsrl_ptr(env, n));
-        ppc_maybe_bswap_register(env, gdb_get_reg_ptr(buf, 8), 8);
-        return 8;
-    }
-    return 0;
-}
-
-static int gdb_set_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
-{
-    if (n < 32) {
-        ppc_maybe_bswap_register(env, mem_buf, 8);
-        *cpu_vsrl_ptr(env, n) = ldq_p(mem_buf);
-        return 8;
-    }
-    return 0;
-}
-
-static int ppc_fixup_cpu(PowerPCCPU *cpu)
-{
-    CPUPPCState *env = &cpu->env;
-
-    /*
-     * TCG doesn't (yet) emulate some groups of instructions that are
-     * implemented on some otherwise supported CPUs (e.g. VSX and
-     * decimal floating point instructions on POWER7).  We remove
-     * unsupported instruction groups from the cpu state's instruction
-     * masks and hope the guest can cope.  For at least the pseries
-     * machine, the unavailability of these instructions can be
-     * advertised to the guest via the device tree.
-     */
-    if ((env->insns_flags & ~PPC_TCG_INSNS)
-        || (env->insns_flags2 & ~PPC_TCG_INSNS2)) {
-        warn_report("Disabling some instructions which are not "
-                    "emulated by TCG (0x%" PRIx64 ", 0x%" PRIx64 ")",
-                    env->insns_flags & ~PPC_TCG_INSNS,
-                    env->insns_flags2 & ~PPC_TCG_INSNS2);
-    }
-    env->insns_flags &= PPC_TCG_INSNS;
-    env->insns_flags2 &= PPC_TCG_INSNS2;
-    return 0;
-}
-
-static void ppc_cpu_realize(DeviceState *dev, Error **errp)
-{
-    CPUState *cs = CPU(dev);
-    PowerPCCPU *cpu = POWERPC_CPU(dev);
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    Error *local_err = NULL;
-
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    if (cpu->vcpu_id == UNASSIGNED_CPU_INDEX) {
-        cpu->vcpu_id = cs->cpu_index;
-    }
-
-    if (tcg_enabled()) {
-        if (ppc_fixup_cpu(cpu) != 0) {
-            error_setg(errp, "Unable to emulate selected CPU with TCG");
-            goto unrealize;
-        }
-    }
-
-    create_ppc_opcodes(cpu, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        goto unrealize;
-    }
-    init_ppc_proc(cpu);
-
-    if (pcc->insns_flags & PPC_FLOAT) {
-        gdb_register_coprocessor(cs, gdb_get_float_reg, gdb_set_float_reg,
-                                 33, "power-fpu.xml", 0);
-    }
-    if (pcc->insns_flags & PPC_ALTIVEC) {
-        gdb_register_coprocessor(cs, gdb_get_avr_reg, gdb_set_avr_reg,
-                                 34, "power-altivec.xml", 0);
-    }
-    if (pcc->insns_flags & PPC_SPE) {
-        gdb_register_coprocessor(cs, gdb_get_spe_reg, gdb_set_spe_reg,
-                                 34, "power-spe.xml", 0);
-    }
-    if (pcc->insns_flags2 & PPC2_VSX) {
-        gdb_register_coprocessor(cs, gdb_get_vsx_reg, gdb_set_vsx_reg,
-                                 32, "power-vsx.xml", 0);
-    }
-#ifndef CONFIG_USER_ONLY
-    gdb_register_coprocessor(cs, gdb_get_spr_reg, gdb_set_spr_reg,
-                             pcc->gdb_num_sprs, "power-spr.xml", 0);
-#endif
-    qemu_init_vcpu(cs);
-
-    pcc->parent_realize(dev, errp);
-
-#if defined(PPC_DUMP_CPU)
-    {
-        CPUPPCState *env = &cpu->env;
-        const char *mmu_model, *excp_model, *bus_model;
-        switch (env->mmu_model) {
-        case POWERPC_MMU_32B:
-            mmu_model = "PowerPC 32";
-            break;
-        case POWERPC_MMU_SOFT_6xx:
-            mmu_model = "PowerPC 6xx/7xx with software driven TLBs";
-            break;
-        case POWERPC_MMU_SOFT_74xx:
-            mmu_model = "PowerPC 74xx with software driven TLBs";
-            break;
-        case POWERPC_MMU_SOFT_4xx:
-            mmu_model = "PowerPC 4xx with software driven TLBs";
-            break;
-        case POWERPC_MMU_SOFT_4xx_Z:
-            mmu_model = "PowerPC 4xx with software driven TLBs "
-                "and zones protections";
-            break;
-        case POWERPC_MMU_REAL:
-            mmu_model = "PowerPC real mode only";
-            break;
-        case POWERPC_MMU_MPC8xx:
-            mmu_model = "PowerPC MPC8xx";
-            break;
-        case POWERPC_MMU_BOOKE:
-            mmu_model = "PowerPC BookE";
-            break;
-        case POWERPC_MMU_BOOKE206:
-            mmu_model = "PowerPC BookE 2.06";
-            break;
-        case POWERPC_MMU_601:
-            mmu_model = "PowerPC 601";
-            break;
-#if defined(TARGET_PPC64)
-        case POWERPC_MMU_64B:
-            mmu_model = "PowerPC 64";
-            break;
-#endif
-        default:
-            mmu_model = "Unknown or invalid";
-            break;
-        }
-        switch (env->excp_model) {
-        case POWERPC_EXCP_STD:
-            excp_model = "PowerPC";
-            break;
-        case POWERPC_EXCP_40x:
-            excp_model = "PowerPC 40x";
-            break;
-        case POWERPC_EXCP_601:
-            excp_model = "PowerPC 601";
-            break;
-        case POWERPC_EXCP_602:
-            excp_model = "PowerPC 602";
-            break;
-        case POWERPC_EXCP_603:
-            excp_model = "PowerPC 603";
-            break;
-        case POWERPC_EXCP_603E:
-            excp_model = "PowerPC 603e";
-            break;
-        case POWERPC_EXCP_604:
-            excp_model = "PowerPC 604";
-            break;
-        case POWERPC_EXCP_7x0:
-            excp_model = "PowerPC 740/750";
-            break;
-        case POWERPC_EXCP_7x5:
-            excp_model = "PowerPC 745/755";
-            break;
-        case POWERPC_EXCP_74xx:
-            excp_model = "PowerPC 74xx";
-            break;
-        case POWERPC_EXCP_BOOKE:
-            excp_model = "PowerPC BookE";
-            break;
-#if defined(TARGET_PPC64)
-        case POWERPC_EXCP_970:
-            excp_model = "PowerPC 970";
-            break;
-#endif
-        default:
-            excp_model = "Unknown or invalid";
-            break;
-        }
-        switch (env->bus_model) {
-        case PPC_FLAGS_INPUT_6xx:
-            bus_model = "PowerPC 6xx";
-            break;
-        case PPC_FLAGS_INPUT_BookE:
-            bus_model = "PowerPC BookE";
-            break;
-        case PPC_FLAGS_INPUT_405:
-            bus_model = "PowerPC 405";
-            break;
-        case PPC_FLAGS_INPUT_401:
-            bus_model = "PowerPC 401/403";
-            break;
-        case PPC_FLAGS_INPUT_RCPU:
-            bus_model = "RCPU / MPC8xx";
-            break;
-#if defined(TARGET_PPC64)
-        case PPC_FLAGS_INPUT_970:
-            bus_model = "PowerPC 970";
-            break;
-#endif
-        default:
-            bus_model = "Unknown or invalid";
-            break;
-        }
-        printf("PowerPC %-12s : PVR %08x MSR %016" PRIx64 "\n"
-               "    MMU model        : %s\n",
-               object_class_get_name(OBJECT_CLASS(pcc)),
-               pcc->pvr, pcc->msr_mask, mmu_model);
-#if !defined(CONFIG_USER_ONLY)
-        if (env->tlb.tlb6) {
-            printf("                       %d %s TLB in %d ways\n",
-                   env->nb_tlb, env->id_tlbs ? "splitted" : "merged",
-                   env->nb_ways);
-        }
-#endif
-        printf("    Exceptions model : %s\n"
-               "    Bus model        : %s\n",
-               excp_model, bus_model);
-        printf("    MSR features     :\n");
-        if (env->flags & POWERPC_FLAG_SPE) {
-            printf("                        signal processing engine enable"
-                   "\n");
-        } else if (env->flags & POWERPC_FLAG_VRE) {
-            printf("                        vector processor enable\n");
-        }
-        if (env->flags & POWERPC_FLAG_TGPR) {
-            printf("                        temporary GPRs\n");
-        } else if (env->flags & POWERPC_FLAG_CE) {
-            printf("                        critical input enable\n");
-        }
-        if (env->flags & POWERPC_FLAG_SE) {
-            printf("                        single-step trace mode\n");
-        } else if (env->flags & POWERPC_FLAG_DWE) {
-            printf("                        debug wait enable\n");
-        } else if (env->flags & POWERPC_FLAG_UBLE) {
-            printf("                        user BTB lock enable\n");
-        }
-        if (env->flags & POWERPC_FLAG_BE) {
-            printf("                        branch-step trace mode\n");
-        } else if (env->flags & POWERPC_FLAG_DE) {
-            printf("                        debug interrupt enable\n");
-        }
-        if (env->flags & POWERPC_FLAG_PX) {
-            printf("                        inclusive protection\n");
-        } else if (env->flags & POWERPC_FLAG_PMM) {
-            printf("                        performance monitor mark\n");
-        }
-        if (env->flags == POWERPC_FLAG_NONE) {
-            printf("                        none\n");
-        }
-        printf("    Time-base/decrementer clock source: %s\n",
-               env->flags & POWERPC_FLAG_RTC_CLK ? "RTC clock" : "bus clock");
-        dump_ppc_insns(env);
-        dump_ppc_sprs(env);
-        fflush(stdout);
-    }
-#endif
-    return;
-
-unrealize:
-    cpu_exec_unrealizefn(cs);
-}
-
-static void ppc_cpu_unrealize(DeviceState *dev)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(dev);
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    opc_handler_t **table, **table_2;
-    int i, j, k;
-
-    pcc->parent_unrealize(dev);
-
-    cpu_remove_sync(CPU(cpu));
-
-    for (i = 0; i < PPC_CPU_OPCODES_LEN; i++) {
-        if (cpu->opcodes[i] == &invalid_handler) {
-            continue;
-        }
-        if (is_indirect_opcode(cpu->opcodes[i])) {
-            table = ind_table(cpu->opcodes[i]);
-            for (j = 0; j < PPC_CPU_INDIRECT_OPCODES_LEN; j++) {
-                if (table[j] == &invalid_handler) {
-                    continue;
-                }
-                if (is_indirect_opcode(table[j])) {
-                    table_2 = ind_table(table[j]);
-                    for (k = 0; k < PPC_CPU_INDIRECT_OPCODES_LEN; k++) {
-                        if (table_2[k] != &invalid_handler &&
-                            is_indirect_opcode(table_2[k])) {
-                            g_free((opc_handler_t *)((uintptr_t)table_2[k] &
-                                                     ~PPC_INDIRECT));
-                        }
-                    }
-                    g_free((opc_handler_t *)((uintptr_t)table[j] &
-                                             ~PPC_INDIRECT));
-                }
-            }
-            g_free((opc_handler_t *)((uintptr_t)cpu->opcodes[i] &
-                ~PPC_INDIRECT));
-        }
-    }
-}
-
-static gint ppc_cpu_compare_class_pvr(gconstpointer a, gconstpointer b)
-{
-    ObjectClass *oc = (ObjectClass *)a;
-    uint32_t pvr = *(uint32_t *)b;
-    PowerPCCPUClass *pcc = (PowerPCCPUClass *)a;
-
-    /* -cpu host does a PVR lookup during construction */
-    if (unlikely(strcmp(object_class_get_name(oc),
-                        TYPE_HOST_POWERPC_CPU) == 0)) {
-        return -1;
-    }
-
-    return pcc->pvr == pvr ? 0 : -1;
-}
-
-PowerPCCPUClass *ppc_cpu_class_by_pvr(uint32_t pvr)
-{
-    GSList *list, *item;
-    PowerPCCPUClass *pcc = NULL;
-
-    list = object_class_get_list(TYPE_POWERPC_CPU, false);
-    item = g_slist_find_custom(list, &pvr, ppc_cpu_compare_class_pvr);
-    if (item != NULL) {
-        pcc = POWERPC_CPU_CLASS(item->data);
-    }
-    g_slist_free(list);
-
-    return pcc;
-}
-
-static gint ppc_cpu_compare_class_pvr_mask(gconstpointer a, gconstpointer b)
-{
-    ObjectClass *oc = (ObjectClass *)a;
-    uint32_t pvr = *(uint32_t *)b;
-    PowerPCCPUClass *pcc = (PowerPCCPUClass *)a;
-
-    /* -cpu host does a PVR lookup during construction */
-    if (unlikely(strcmp(object_class_get_name(oc),
-                        TYPE_HOST_POWERPC_CPU) == 0)) {
-        return -1;
-    }
-
-    if (pcc->pvr_match(pcc, pvr)) {
-        return 0;
-    }
-
-    return -1;
-}
-
-PowerPCCPUClass *ppc_cpu_class_by_pvr_mask(uint32_t pvr)
-{
-    GSList *list, *item;
-    PowerPCCPUClass *pcc = NULL;
-
-    list = object_class_get_list(TYPE_POWERPC_CPU, true);
-    item = g_slist_find_custom(list, &pvr, ppc_cpu_compare_class_pvr_mask);
-    if (item != NULL) {
-        pcc = POWERPC_CPU_CLASS(item->data);
-    }
-    g_slist_free(list);
-
-    return pcc;
-}
-
-static const char *ppc_cpu_lookup_alias(const char *alias)
-{
-    int ai;
-
-    for (ai = 0; ppc_cpu_aliases[ai].alias != NULL; ai++) {
-        if (strcmp(ppc_cpu_aliases[ai].alias, alias) == 0) {
-            return ppc_cpu_aliases[ai].model;
-        }
-    }
-
-    return NULL;
-}
-
-static ObjectClass *ppc_cpu_class_by_name(const char *name)
-{
-    char *cpu_model, *typename;
-    ObjectClass *oc;
-    const char *p;
-    unsigned long pvr;
-
-    /*
-     * Lookup by PVR if cpu_model is valid 8 digit hex number (excl:
-     * 0x prefix if present)
-     */
-    if (!qemu_strtoul(name, &p, 16, &pvr)) {
-        int len = p - name;
-        len = (len == 10) && (name[1] == 'x') ? len - 2 : len;
-        if ((len == 8) && (*p == '\0')) {
-            return OBJECT_CLASS(ppc_cpu_class_by_pvr(pvr));
-        }
-    }
-
-    cpu_model = g_ascii_strdown(name, -1);
-    p = ppc_cpu_lookup_alias(cpu_model);
-    if (p) {
-        g_free(cpu_model);
-        cpu_model = g_strdup(p);
-    }
-
-    typename = g_strdup_printf("%s" POWERPC_CPU_TYPE_SUFFIX, cpu_model);
-    oc = object_class_by_name(typename);
-    g_free(typename);
-    g_free(cpu_model);
-
-    return oc;
-}
-
-PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
-{
-    ObjectClass *oc = OBJECT_CLASS(pcc);
-
-    while (oc && !object_class_is_abstract(oc)) {
-        oc = object_class_get_parent(oc);
-    }
-    assert(oc);
-
-    return POWERPC_CPU_CLASS(oc);
-}
-
-/* Sort by PVR, ordering special case "host" last. */
-static gint ppc_cpu_list_compare(gconstpointer a, gconstpointer b)
-{
-    ObjectClass *oc_a = (ObjectClass *)a;
-    ObjectClass *oc_b = (ObjectClass *)b;
-    PowerPCCPUClass *pcc_a = POWERPC_CPU_CLASS(oc_a);
-    PowerPCCPUClass *pcc_b = POWERPC_CPU_CLASS(oc_b);
-    const char *name_a = object_class_get_name(oc_a);
-    const char *name_b = object_class_get_name(oc_b);
-
-    if (strcmp(name_a, TYPE_HOST_POWERPC_CPU) == 0) {
-        return 1;
-    } else if (strcmp(name_b, TYPE_HOST_POWERPC_CPU) == 0) {
-        return -1;
-    } else {
-        /* Avoid an integer overflow during subtraction */
-        if (pcc_a->pvr < pcc_b->pvr) {
-            return -1;
-        } else if (pcc_a->pvr > pcc_b->pvr) {
-            return 1;
-        } else {
-            return 0;
-        }
-    }
-}
-
-static void ppc_cpu_list_entry(gpointer data, gpointer user_data)
-{
-    ObjectClass *oc = data;
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    DeviceClass *family = DEVICE_CLASS(ppc_cpu_get_family_class(pcc));
-    const char *typename = object_class_get_name(oc);
-    char *name;
-    int i;
-
-    if (unlikely(strcmp(typename, TYPE_HOST_POWERPC_CPU) == 0)) {
-        return;
-    }
-
-    name = g_strndup(typename,
-                     strlen(typename) - strlen(POWERPC_CPU_TYPE_SUFFIX));
-    qemu_printf("PowerPC %-16s PVR %08x\n", name, pcc->pvr);
-    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
-        PowerPCCPUAlias *alias = &ppc_cpu_aliases[i];
-        ObjectClass *alias_oc = ppc_cpu_class_by_name(alias->model);
-
-        if (alias_oc != oc) {
-            continue;
-        }
-        /*
-         * If running with KVM, we might update the family alias later, so
-         * avoid printing the wrong alias here and use "preferred" instead
-         */
-        if (strcmp(alias->alias, family->desc) == 0) {
-            qemu_printf("PowerPC %-16s (alias for preferred %s CPU)\n",
-                        alias->alias, family->desc);
-        } else {
-            qemu_printf("PowerPC %-16s (alias for %s)\n",
-                        alias->alias, name);
-        }
-    }
-    g_free(name);
-}
-
-void ppc_cpu_list(void)
-{
-    GSList *list;
-
-    list = object_class_get_list(TYPE_POWERPC_CPU, false);
-    list = g_slist_sort(list, ppc_cpu_list_compare);
-    g_slist_foreach(list, ppc_cpu_list_entry, NULL);
-    g_slist_free(list);
-
-#ifdef CONFIG_KVM
-    qemu_printf("\n");
-    qemu_printf("PowerPC %-16s\n", "host");
-#endif
-}
-
-static void ppc_cpu_defs_entry(gpointer data, gpointer user_data)
-{
-    ObjectClass *oc = data;
-    CpuDefinitionInfoList **first = user_data;
-    const char *typename;
-    CpuDefinitionInfo *info;
-
-    typename = object_class_get_name(oc);
-    info = g_malloc0(sizeof(*info));
-    info->name = g_strndup(typename,
-                           strlen(typename) - strlen(POWERPC_CPU_TYPE_SUFFIX));
-
-    QAPI_LIST_PREPEND(*first, info);
-}
-
-CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
-{
-    CpuDefinitionInfoList *cpu_list = NULL;
-    GSList *list;
-    int i;
-
-    list = object_class_get_list(TYPE_POWERPC_CPU, false);
-    g_slist_foreach(list, ppc_cpu_defs_entry, &cpu_list);
-    g_slist_free(list);
-
-    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
-        PowerPCCPUAlias *alias = &ppc_cpu_aliases[i];
-        ObjectClass *oc;
-        CpuDefinitionInfo *info;
-
-        oc = ppc_cpu_class_by_name(alias->model);
-        if (oc == NULL) {
-            continue;
-        }
-
-        info = g_malloc0(sizeof(*info));
-        info->name = g_strdup(alias->alias);
-        info->q_typename = g_strdup(object_class_get_name(oc));
-
-        QAPI_LIST_PREPEND(cpu_list, info);
-    }
-
-    return cpu_list;
-}
-
-static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-
-    cpu->env.nip = value;
-}
-
-static bool ppc_cpu_has_work(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
-}
-
-static void ppc_cpu_reset(DeviceState *dev)
-{
-    CPUState *s = CPU(dev);
-    PowerPCCPU *cpu = POWERPC_CPU(s);
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    CPUPPCState *env = &cpu->env;
-    target_ulong msr;
-    int i;
-
-    pcc->parent_reset(dev);
-
-    msr = (target_ulong)0;
-    msr |= (target_ulong)MSR_HVB;
-    msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */
-    msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */
-    msr |= (target_ulong)1 << MSR_EP;
-#if defined(DO_SINGLE_STEP) && 0
-    /* Single step trace mode */
-    msr |= (target_ulong)1 << MSR_SE;
-    msr |= (target_ulong)1 << MSR_BE;
-#endif
-#if defined(CONFIG_USER_ONLY)
-    msr |= (target_ulong)1 << MSR_FP; /* Allow floating point usage */
-    msr |= (target_ulong)1 << MSR_FE0; /* Allow floating point exceptions */
-    msr |= (target_ulong)1 << MSR_FE1;
-    msr |= (target_ulong)1 << MSR_VR; /* Allow altivec usage */
-    msr |= (target_ulong)1 << MSR_VSX; /* Allow VSX usage */
-    msr |= (target_ulong)1 << MSR_SPE; /* Allow SPE usage */
-    msr |= (target_ulong)1 << MSR_PR;
-#if defined(TARGET_PPC64)
-    msr |= (target_ulong)1 << MSR_TM; /* Transactional memory */
-#endif
-#if !defined(TARGET_WORDS_BIGENDIAN)
-    msr |= (target_ulong)1 << MSR_LE; /* Little-endian user mode */
-    if (!((env->msr_mask >> MSR_LE) & 1)) {
-        fprintf(stderr, "Selected CPU does not support little-endian.\n");
-        exit(1);
-    }
-#endif
-#endif
-
-#if defined(TARGET_PPC64)
-    if (mmu_is_64bit(env->mmu_model)) {
-        msr |= (1ULL << MSR_SF);
-    }
-#endif
-
-    hreg_store_msr(env, msr, 1);
-
-#if !defined(CONFIG_USER_ONLY)
-    env->nip = env->hreset_vector | env->excp_prefix;
-    if (env->mmu_model != POWERPC_MMU_REAL) {
-        ppc_tlb_invalidate_all(env);
-    }
-#endif
-
-    hreg_compute_hflags(env);
-    env->reserve_addr = (target_ulong)-1ULL;
-    /* Be sure no exception or interrupt is pending */
-    env->pending_interrupts = 0;
-    s->exception_index = POWERPC_EXCP_NONE;
-    env->error_code = 0;
-    ppc_irq_reset(cpu);
-
-    /* tininess for underflow is detected before rounding */
-    set_float_detect_tininess(float_tininess_before_rounding,
-                              &env->fp_status);
-
-    for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) {
-        ppc_spr_t *spr = &env->spr_cb[i];
-
-        if (!spr->name) {
-            continue;
-        }
-        env->spr[i] = spr->default_value;
-    }
-}
-
-#ifndef CONFIG_USER_ONLY
-
-static bool ppc_cpu_is_big_endian(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    cpu_synchronize_state(cs);
-
-    return !msr_le;
-}
-
-#ifdef CONFIG_TCG
-static void ppc_cpu_exec_enter(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-
-    if (cpu->vhyp) {
-        PPCVirtualHypervisorClass *vhc =
-            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
-        vhc->cpu_exec_enter(cpu->vhyp, cpu);
-    }
-}
-
-static void ppc_cpu_exec_exit(CPUState *cs)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-
-    if (cpu->vhyp) {
-        PPCVirtualHypervisorClass *vhc =
-            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
-        vhc->cpu_exec_exit(cpu->vhyp, cpu);
-    }
-}
-#endif /* CONFIG_TCG */
-
-#endif /* !CONFIG_USER_ONLY */
-
-static void ppc_cpu_instance_init(Object *obj)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(obj);
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
-    CPUPPCState *env = &cpu->env;
-
-    cpu_set_cpustate_pointers(cpu);
-    cpu->vcpu_id = UNASSIGNED_CPU_INDEX;
-
-    env->msr_mask = pcc->msr_mask;
-    env->mmu_model = pcc->mmu_model;
-    env->excp_model = pcc->excp_model;
-    env->bus_model = pcc->bus_model;
-    env->insns_flags = pcc->insns_flags;
-    env->insns_flags2 = pcc->insns_flags2;
-    env->flags = pcc->flags;
-    env->bfd_mach = pcc->bfd_mach;
-    env->check_pow = pcc->check_pow;
-
-    /*
-     * Mark HV mode as supported if the CPU has an MSR_HV bit in the
-     * msr_mask. The mask can later be cleared by PAPR mode but the hv
-     * mode support will remain, thus enforcing that we cannot use
-     * priv. instructions in guest in PAPR mode. For 970 we currently
-     * simply don't set HV in msr_mask thus simulating an "Apple mode"
-     * 970. If we ever want to support 970 HV mode, we'll have to add
-     * a processor attribute of some sort.
-     */
-#if !defined(CONFIG_USER_ONLY)
-    env->has_hv_mode = !!(env->msr_mask & MSR_HVB);
-#endif
-
-    ppc_hash64_init(cpu);
-}
-
-static void ppc_cpu_instance_finalize(Object *obj)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(obj);
-
-    ppc_hash64_finalize(cpu);
-}
-
-static bool ppc_pvr_match_default(PowerPCCPUClass *pcc, uint32_t pvr)
-{
-    return pcc->pvr == pvr;
-}
-
-static gchar *ppc_gdb_arch_name(CPUState *cs)
-{
-#if defined(TARGET_PPC64)
-    return g_strdup("powerpc:common64");
-#else
-    return g_strdup("powerpc:common");
-#endif
-}
-
-static void ppc_disas_set_info(CPUState *cs, disassemble_info *info)
-{
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-
-    if ((env->hflags >> MSR_LE) & 1) {
-        info->endian = BFD_ENDIAN_LITTLE;
-    }
-    info->mach = env->bfd_mach;
-    if (!env->bfd_mach) {
-#ifdef TARGET_PPC64
-        info->mach = bfd_mach_ppc64;
-#else
-        info->mach = bfd_mach_ppc;
-#endif
-    }
-    info->disassembler_options = (char *)"any";
-    info->print_insn = print_insn_ppc;
-
-    info->cap_arch = CS_ARCH_PPC;
-#ifdef TARGET_PPC64
-    info->cap_mode = CS_MODE_64;
-#endif
-}
-
-static Property ppc_cpu_properties[] = {
-    DEFINE_PROP_BOOL("pre-2.8-migration", PowerPCCPU, pre_2_8_migration, false),
-    DEFINE_PROP_BOOL("pre-2.10-migration", PowerPCCPU, pre_2_10_migration,
-                     false),
-    DEFINE_PROP_BOOL("pre-3.0-migration", PowerPCCPU, pre_3_0_migration,
-                     false),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-#ifdef CONFIG_TCG
-#include "hw/core/tcg-cpu-ops.h"
-
-static struct TCGCPUOps ppc_tcg_ops = {
-  .initialize = ppc_translate_init,
-  .cpu_exec_interrupt = ppc_cpu_exec_interrupt,
-  .tlb_fill = ppc_cpu_tlb_fill,
-
-#ifndef CONFIG_USER_ONLY
-  .do_interrupt = ppc_cpu_do_interrupt,
-  .cpu_exec_enter = ppc_cpu_exec_enter,
-  .cpu_exec_exit = ppc_cpu_exec_exit,
-  .do_unaligned_access = ppc_cpu_do_unaligned_access,
-#endif /* !CONFIG_USER_ONLY */
-};
-#endif /* CONFIG_TCG */
-
-static void ppc_cpu_class_init(ObjectClass *oc, void *data)
-{
-    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    device_class_set_parent_realize(dc, ppc_cpu_realize,
-                                    &pcc->parent_realize);
-    device_class_set_parent_unrealize(dc, ppc_cpu_unrealize,
-                                      &pcc->parent_unrealize);
-    pcc->pvr_match = ppc_pvr_match_default;
-    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_always;
-    device_class_set_props(dc, ppc_cpu_properties);
-
-    device_class_set_parent_reset(dc, ppc_cpu_reset, &pcc->parent_reset);
-
-    cc->class_by_name = ppc_cpu_class_by_name;
-    cc->has_work = ppc_cpu_has_work;
-    cc->dump_state = ppc_cpu_dump_state;
-    cc->dump_statistics = ppc_cpu_dump_statistics;
-    cc->set_pc = ppc_cpu_set_pc;
-    cc->gdb_read_register = ppc_cpu_gdb_read_register;
-    cc->gdb_write_register = ppc_cpu_gdb_write_register;
-#ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_ppc_cpu;
-#endif
-#if defined(CONFIG_SOFTMMU)
-    cc->write_elf64_note = ppc64_cpu_write_elf64_note;
-    cc->write_elf32_note = ppc32_cpu_write_elf32_note;
-#endif
-
-    cc->gdb_num_core_regs = 71;
-#ifndef CONFIG_USER_ONLY
-    cc->gdb_get_dynamic_xml = ppc_gdb_get_dynamic_xml;
-#endif
-#ifdef USE_APPLE_GDB
-    cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
-    cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
-    cc->gdb_num_core_regs = 71 + 32;
-#endif
-
-    cc->gdb_arch_name = ppc_gdb_arch_name;
-#if defined(TARGET_PPC64)
-    cc->gdb_core_xml_file = "power64-core.xml";
-#else
-    cc->gdb_core_xml_file = "power-core.xml";
-#endif
-#ifndef CONFIG_USER_ONLY
-    cc->virtio_is_big_endian = ppc_cpu_is_big_endian;
-#endif
-    cc->disas_set_info = ppc_disas_set_info;
-
-    dc->fw_name = "PowerPC,UNKNOWN";
-
-#ifdef CONFIG_TCG
-    cc->tcg_ops = &ppc_tcg_ops;
-#endif /* CONFIG_TCG */
-}
-
-static const TypeInfo ppc_cpu_type_info = {
-    .name = TYPE_POWERPC_CPU,
-    .parent = TYPE_CPU,
-    .instance_size = sizeof(PowerPCCPU),
-    .instance_align = __alignof__(PowerPCCPU),
-    .instance_init = ppc_cpu_instance_init,
-    .instance_finalize = ppc_cpu_instance_finalize,
-    .abstract = true,
-    .class_size = sizeof(PowerPCCPUClass),
-    .class_init = ppc_cpu_class_init,
-};
-
-#ifndef CONFIG_USER_ONLY
-static const TypeInfo ppc_vhyp_type_info = {
-    .name = TYPE_PPC_VIRTUAL_HYPERVISOR,
-    .parent = TYPE_INTERFACE,
-    .class_size = sizeof(PPCVirtualHypervisorClass),
-};
-#endif
-
-static void ppc_cpu_register_types(void)
-{
-    type_register_static(&ppc_cpu_type_info);
-#ifndef CONFIG_USER_ONLY
-    type_register_static(&ppc_vhyp_type_info);
-#endif
-}
-
-type_init(ppc_cpu_register_types)
diff --git a/target/ppc/user_only_helper.c b/target/ppc/user_only_helper.c
index aa3f8675963..7ff76f7a061 100644
--- a/target/ppc/user_only_helper.c
+++ b/target/ppc/user_only_helper.c
@@ -21,16 +21,23 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "internal.h"
 
-
-bool ppc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                      MMUAccessType access_type, int mmu_idx,
-                      bool probe, uintptr_t retaddr)
+void ppc_cpu_record_sigsegv(CPUState *cs, vaddr address,
+                            MMUAccessType access_type,
+                            bool maperr, uintptr_t retaddr)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = &cpu->env;
     int exception, error_code;
 
+    /*
+     * Both DSISR and the "trap number" (exception vector offset,
+     * looked up from exception_index) are present in the linux-user
+     * signal frame.
+     * FIXME: we don't actually populate the trap number properly.
+     * It would be easiest to fill in an env->trap value now.
+     */
     if (access_type == MMU_INST_FETCH) {
         exception = POWERPC_EXCP_ISI;
         error_code = 0x40000000;
diff --git a/target/riscv/Kconfig b/target/riscv/Kconfig
new file mode 100644
index 00000000000..b9e5932f13f
--- /dev/null
+++ b/target/riscv/Kconfig
@@ -0,0 +1,5 @@
+config RISCV32
+    bool
+
+config RISCV64
+    bool
diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/bitmanip_helper.c
new file mode 100644
index 00000000000..f1b5e5549f4
--- /dev/null
+++ b/target/riscv/bitmanip_helper.c
@@ -0,0 +1,51 @@
+/*
+ * RISC-V Bitmanip Extension Helpers for QEMU.
+ *
+ * Copyright (c) 2020 Kito Cheng, kito.cheng@sifive.com
+ * Copyright (c) 2020 Frank Chang, frank.chang@sifive.com
+ * Copyright (c) 2021 Philipp Tomsich, philipp.tomsich@vrull.eu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg.h"
+
+target_ulong HELPER(clmul)(target_ulong rs1, target_ulong rs2)
+{
+    target_ulong result = 0;
+
+    for (int i = 0; i < TARGET_LONG_BITS; i++) {
+        if ((rs2 >> i) & 1) {
+            result ^= (rs1 << i);
+        }
+    }
+
+    return result;
+}
+
+target_ulong HELPER(clmulr)(target_ulong rs1, target_ulong rs2)
+{
+    target_ulong result = 0;
+
+    for (int i = 0; i < TARGET_LONG_BITS; i++) {
+        if ((rs2 >> i) & 1) {
+            result ^= (rs1 >> (TARGET_LONG_BITS - i - 1));
+        }
+    }
+
+    return result;
+}
diff --git a/target/riscv/cheri-archspecific-early.h b/target/riscv/cheri-archspecific-early.h
index b8b5e73c8ee..f7fbc7f8030 100644
--- a/target/riscv/cheri-archspecific-early.h
+++ b/target/riscv/cheri-archspecific-early.h
@@ -117,14 +117,14 @@ enum CheriSCR {
 #define CINVOKE_DATA_REGNUM 31
 
 static inline const cap_register_t *cheri_get_ddc(CPURISCVState *env) {
-    cheri_debug_assert(env->DDC.cr_extra == CREG_FULLY_DECOMPRESSED);
-    return &env->DDC;
+    cheri_debug_assert(env->ddc.cr_extra == CREG_FULLY_DECOMPRESSED);
+    return &env->ddc;
 }
 
 static inline const cap_register_t *_cheri_get_pcc_unchecked(CPURISCVState *env)
 {
-    cheri_debug_assert(env->PCC.cr_extra == CREG_FULLY_DECOMPRESSED);
-    return &env->PCC;
+    cheri_debug_assert(env->pcc.cr_extra == CREG_FULLY_DECOMPRESSED);
+    return &env->pcc;
 }
 
 static inline GPCapRegs *cheri_get_gpcrs(CPUArchState *env) {
diff --git a/target/riscv/cheri-archspecific.h b/target/riscv/cheri-archspecific.h
index bdb8b2881db..37d93548869 100644
--- a/target/riscv/cheri-archspecific.h
+++ b/target/riscv/cheri-archspecific.h
@@ -119,8 +119,8 @@ static inline void update_next_pcc_for_tcg(CPUArchState *env,
                                            uint32_t cjalr_flags)
 {
     assert_valid_jump_target(target);
-    // On return to TCG we will jump there immediately, so update env->PCC now.
-    env->PCC = *target;
+    // On return to TCG we will jump there immediately, so update env->pcc now.
+    env->pcc = *target;
 #ifdef CONFIG_DEBUG_TCG
     env->_pc_is_current = true; // PCC.cursor is up-to-date again.
 #endif
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 98d54ba3176..cb386e73b25 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -73,8 +73,7 @@ const char * const riscv_fpr_regnames[] = {
   "f30/ft10", "f31/ft11"
 };
 
-// See Table 3.6 In privileged ISA spec (20190608-Priv-MSU-Ratified)
-const char * const riscv_excp_names[] = {
+static const char * const riscv_excp_names[] = {
     "misaligned_fetch",
     "fault_fetch",
     "illegal_instruction",
@@ -113,7 +112,7 @@ const char * const riscv_excp_names[] = {
     // >64 Reserved for future standard use
 };
 
-const char * const riscv_intr_names[] = {
+static const char * const riscv_intr_names[] = {
     "u_software",
     "s_software",
     "vs_software",
@@ -123,8 +122,8 @@ const char * const riscv_intr_names[] = {
     "vs_timer",
     "m_timer",
     "u_external",
+    "s_external",
     "vs_external",
-    "h_external",
     "m_external",
     "reserved",
     "reserved",
@@ -151,18 +150,10 @@ const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
     }
 }
 
-bool riscv_cpu_is_32bit(CPURISCVState *env)
+static void set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext)
 {
-    if (env->misa & RV64) {
-        return false;
-    }
-
-    return true;
-}
-
-static void set_misa(CPURISCVState *env, target_ulong misa)
-{
-    env->misa_mask = env->misa = misa;
+    env->misa_mxl_max = env->misa_mxl = mxl;
+    env->misa_ext_mask = env->misa_ext = ext;
 }
 
 static void set_priv_version(CPURISCVState *env, int priv_ver)
@@ -180,7 +171,7 @@ static void set_feature(CPURISCVState *env, int feature)
     env->features |= (1ULL << feature);
 }
 
-static void set_resetvec(CPURISCVState *env, int resetvec)
+static void set_resetvec(CPURISCVState *env, target_ulong resetvec)
 {
 #ifndef CONFIG_USER_ONLY
     env->resetvec = resetvec;
@@ -190,7 +181,11 @@ static void set_resetvec(CPURISCVState *env, int resetvec)
 static void riscv_any_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RVXLEN | RVI | RVM | RVA | RVF | RVD | RVC | RVU);
+#if defined(TARGET_RISCV32)
+    set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
+#elif defined(TARGET_RISCV64)
+    set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
+#endif
     set_priv_version(env, PRIV_VERSION_1_11_0);
 }
 
@@ -199,20 +194,20 @@ static void rv64_base_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
     /* We set this in the realise function */
-    set_misa(env, RV64);
+    set_misa(env, MXL_RV64, 0);
 }
 
 static void rv64_sifive_u_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV64 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+    set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
 }
 
 static void rv64_sifive_e_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV64 | RVI | RVM | RVA | RVC | RVU);
+    set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
     qdev_prop_set_bit(DEVICE(obj), "mmu", false);
 }
@@ -221,20 +216,20 @@ static void rv32_base_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
     /* We set this in the realise function */
-    set_misa(env, RV32);
+    set_misa(env, MXL_RV32, 0);
 }
 
 static void rv32_sifive_u_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
+    set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
 }
 
 static void rv32_sifive_e_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVA | RVC | RVU);
+    set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
     qdev_prop_set_bit(DEVICE(obj), "mmu", false);
 }
@@ -242,15 +237,16 @@ static void rv32_sifive_e_cpu_init(Object *obj)
 static void rv32_ibex_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVC | RVU);
+    set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
     qdev_prop_set_bit(DEVICE(obj), "mmu", false);
+    qdev_prop_set_bit(DEVICE(obj), "x-epmp", true);
 }
 
 static void rv32_imafcu_nommu_cpu_init(Object *obj)
 {
     CPURISCVState *env = &RISCV_CPU(obj)->env;
-    set_misa(env, RV32 | RVI | RVM | RVA | RVF | RVC | RVU);
+    set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU);
     set_priv_version(env, PRIV_VERSION_1_10_0);
     set_resetvec(env, DEFAULT_RSTVEC);
     qdev_prop_set_bit(DEVICE(obj), "mmu", false);
@@ -288,55 +284,66 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 #endif
     qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "pc      ", PC_ADDR(env));
 #ifdef TARGET_CHERI
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "pc (offset) ", GET_SPECIAL_REG_ARCH(env, pc, PCC));
+    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "pc (offset) ", GET_SPECIAL_REG_ARCH(env, pc, pcc));
 #endif
 #ifndef CONFIG_USER_ONLY
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mhartid ", env->mhartid);
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mstatus ", (target_ulong)env->mstatus);
-    if (riscv_cpu_is_32bit(env)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mstatush ",
-                     (target_ulong)(env->mstatus >> 32));
-    }
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "hstatus ", env->hstatus);
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "vsstatus ",
-                     (target_ulong)env->vsstatus);
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mip     ", env->mip);
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mie     ", env->mie);
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mideleg ", env->mideleg);
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "hideleg ", env->hideleg);
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "medeleg ", env->medeleg);
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "hedeleg ", env->hedeleg);
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mtvec   ", GET_SPECIAL_REG_ARCH(env, mtvec, MTCC));
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "stvec   ", GET_SPECIAL_REG_ARCH(env, stvec, STCC));
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "vstvec  ", GET_SPECIAL_REG_ARCH(env, vstvec, VSTCC));
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mepc    ", GET_SPECIAL_REG_ARCH(env, mepc, MEPCC));
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "sepc    ", GET_SPECIAL_REG_ARCH(env, sepc, SEPCC));
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "vsepc   ", GET_SPECIAL_REG_ARCH(env, vsepc, VSEPCC));
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mcause  ", env->mcause);
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "scause  ", env->scause);
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "vscause ", env->vscause);
-    }
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mtval ", env->mtval);
-    qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "stval ", env->sbadaddr);
-    if (riscv_has_ext(env, RVH)) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "htval ", env->htval);
-        qemu_fprintf(f, " %s " TARGET_FMT_lx "\n", "mtval2 ", env->mtval2);
+    {
+        static const int dump_csrs[] = {
+            CSR_MHARTID,
+            CSR_MSTATUS,
+            CSR_MSTATUSH,
+            CSR_HSTATUS,
+            CSR_VSSTATUS,
+            CSR_MIP,
+            CSR_MIE,
+            CSR_MIDELEG,
+            CSR_HIDELEG,
+            CSR_MEDELEG,
+            CSR_HEDELEG,
+            CSR_MTVEC,
+            CSR_STVEC,
+            CSR_VSTVEC,
+            CSR_MEPC,
+            CSR_SEPC,
+            CSR_VSEPC,
+            CSR_MCAUSE,
+            CSR_SCAUSE,
+            CSR_VSCAUSE,
+            CSR_MTVAL,
+            CSR_STVAL,
+            CSR_HTVAL,
+            CSR_MTVAL2,
+            CSR_MSCRATCH,
+            CSR_SSCRATCH,
+            CSR_SATP,
+            CSR_MMTE,
+            CSR_UPMBASE,
+            CSR_UPMMASK,
+            CSR_SPMBASE,
+            CSR_SPMMASK,
+            CSR_MPMBASE,
+            CSR_MPMMASK,
+        };
+
+        for (int i = 0; i < ARRAY_SIZE(dump_csrs); ++i) {
+            int csrno = dump_csrs[i];
+            target_ulong val = 0;
+            RISCVException res = riscv_csrrw_debug(env, csrno, &val, 0, 0);
+
+            /*
+             * Rely on the smode, hmode, etc, predicates within csr.c
+             * to do the filtering of the registers that are present.
+             */
+            if (res == RISCV_EXCP_NONE) {
+                qemu_fprintf(f, " %-8s " TARGET_FMT_lx "\n",
+                             csr_ops[csrno].name, val);
+            }
+        }
     }
 #endif
 
     for (i = 0; i < 32; i++) {
-        qemu_fprintf(f, " %s " TARGET_FMT_lx,
+        qemu_fprintf(f, " %-8s " TARGET_FMT_lx,
                      riscv_int_regnames[i], gpr_int_value(env, i));
         if ((i & 3) == 3) {
             qemu_fprintf(f, "\n");
@@ -344,7 +351,7 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     }
     if (flags & CPU_DUMP_FPU) {
         for (i = 0; i < 32; i++) {
-            qemu_fprintf(f, " %s %016" PRIx64,
+            qemu_fprintf(f, " %-8s %016" PRIx64,
                          riscv_fpr_regnames[i], env->fpr[i]);
             if ((i & 3) == 3) {
                 qemu_fprintf(f, "\n");
@@ -358,7 +365,7 @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
 #ifdef TARGET_CHERI
-    cheri_update_pcc(&env->PCC, value, /*can_be_unrepresentable=*/true);
+    cheri_update_pcc(&env->pcc, value, /*can_be_unrepresentable=*/true);
 #else
     env->pc = value;
 #endif
@@ -397,11 +404,11 @@ void restore_state_to_opc(CPURISCVState *env, TranslationBlock *tb,
                           target_ulong *data)
 {
 #ifdef TARGET_CHERI
-    assert(cap_is_in_bounds(&env->PCC, data[0], 1));
-    if (unlikely(env->PCC._cr_cursor != data[0])) {
+    assert(cap_is_in_bounds(&env->pcc, data[0], 1));
+    if (unlikely(env->pcc._cr_cursor != data[0])) {
         qemu_log_instr_or_mask_msg(env, CPU_LOG_INT,
             "%s: Updating pc from TB: " TARGET_FMT_lx " -> " TARGET_FMT_lx "\n",
-            __func__, (target_ulong)env->PCC._cr_cursor, data[0]);
+            __func__, (target_ulong)env->pcc._cr_cursor, data[0]);
     }
 #endif
     riscv_update_pc(env, data[0], /*can_be_unrepresentable=*/false);
@@ -522,7 +529,7 @@ void rvfi_dii_communicate(CPUState* cs, CPURISCVState* env, bool was_trap) {
     static bool rvfi_dii_started = false;
     static unsigned rvfi_dii_version = 1;
     // Single-step completed -> update PC in the trace buffer
-    env->rvfi_dii_trace.PC.rvfi_pc_wdata = GET_SPECIAL_REG_ARCH(env, pc, PCC);
+    env->rvfi_dii_trace.PC.rvfi_pc_wdata = GET_SPECIAL_REG_ARCH(env, pc, pcc);
     env->rvfi_dii_trace.INST.rvfi_order++;
 
     // TestRIG expects a zero $pc after a trap:
@@ -658,14 +665,14 @@ void rvfi_dii_communicate(CPUState* cs, CPURISCVState* env, bool was_trap) {
             tb_flush(cs); // flush TCG state
             env->rvfi_dii_injected_insn = cmd_buf.rvfi_dii_insn;
             env->rvfi_dii_have_injected_insn = true;
-            env->rvfi_dii_trace.PC.rvfi_pc_rdata = GET_SPECIAL_REG_ARCH(env, pc, PCC);
+            env->rvfi_dii_trace.PC.rvfi_pc_rdata = GET_SPECIAL_REG_ARCH(env, pc, pcc);
             env->rvfi_dii_trace.INST.rvfi_mode = env->priv;
-            env->rvfi_dii_trace.INST.rvfi_ixl = get_field(env->misa, MISA_MXL);
+            env->rvfi_dii_trace.INST.rvfi_ixl = riscv_cpu_mxl(env);
             resume_all_vcpus();
             cpu_resume(cs);
             env->rvfi_dii_trace.PC.rvfi_pc_wdata = -1; // Will be set after single-step trap
             // Clear the EXCP_DEBUG flag to avoid dropping into GDB
-            cs->exception_index = EXCP_NONE; // EXCP_INTERRUPT;
+            cs->exception_index = RISCV_EXCP_NONE;
             cs->cflags_next_tb = (curr_cflags(cs) & ~CF_USE_ICOUNT) | 1;
             // Continue execution at env->pc
             cpu_loop_exit_noexc(cs); // noreturn -> jumps back to TCG
@@ -706,18 +713,24 @@ static void riscv_cpu_reset(DeviceState *dev)
 
     mcc->parent_reset(dev);
 #ifndef CONFIG_USER_ONLY
+    env->misa_mxl = env->misa_mxl_max;
     env->priv = PRV_M;
     env->mstatus &= ~(MSTATUS_MIE | MSTATUS_MPRV);
+    if (env->misa_mxl > MXL_RV32) {
+        /*
+         * The reset status of SXL/UXL is undefined, but mstatus is WARL
+         * and we must ensure that the value after init is valid for read.
+         */
+        env->mstatus = set_field(env->mstatus, MSTATUS64_SXL, env->misa_mxl);
+        env->mstatus = set_field(env->mstatus, MSTATUS64_UXL, env->misa_mxl);
+    }
     env->mcause = 0;
     env->two_stage_lookup = false;
-#if defined(TARGET_RISCV64)
-    target_ulong mxl = get_field(env->misa, MISA_MXL);
-    env->mstatus = set_field(env->mstatus, MSTATUS64_SXL, mxl);
-    env->mstatus = set_field(env->mstatus, MSTATUS64_UXL, mxl);
-#endif
+    /* mmte is supposed to have pm.current hardwired to 1 */
+    env->mmte |= (PM_EXT_INITIAL | MMTE_M_PM_CURRENT);
 #endif
 
-    cs->exception_index = EXCP_NONE;
+    cs->exception_index = RISCV_EXCP_NONE;
     env->load_res = -1;
     set_default_nan_mode(1, &env->fp_status);
 
@@ -736,23 +749,23 @@ static void riscv_cpu_reset(DeviceState *dev)
     /*
      * See Table 5.2: Special Capability Registers (SCRs) in the CHERI ISA spec
      */
-    set_max_perms_capability(&env->PCC, env->resetvec);
-    set_max_perms_capability(&env->DDC, 0);
+    set_max_perms_capability(&env->pcc, env->resetvec);
+    set_max_perms_capability(&env->ddc, 0);
     // User mode trap handling:
-    set_max_perms_capability(&env->UTCC, 0);
-    null_capability(&env->UTDC);
-    null_capability(&env->UScratchC);
-    set_max_perms_capability(&env->UEPCC, 0);
+    set_max_perms_capability(&env->utcc, 0);
+    null_capability(&env->utdc);
+    null_capability(&env->uscratchc);
+    set_max_perms_capability(&env->uepcc, 0);
     // Supervisor mode trap handling
-    set_max_perms_capability(&env->STCC, 0);
-    null_capability(&env->STDC);
-    null_capability(&env->SScratchC);
-    set_max_perms_capability(&env->SEPCC, 0);
+    set_max_perms_capability(&env->stcc, 0);
+    null_capability(&env->stdc);
+    null_capability(&env->sscratchc);
+    set_max_perms_capability(&env->sepcc, 0);
     // Machine mode trap handling
-    set_max_perms_capability(&env->MTCC, 0);
-    null_capability(&env->MTDC);
-    null_capability(&env->MScratchC);
-    set_max_perms_capability(&env->MEPCC, 0);
+    set_max_perms_capability(&env->mtcc, 0);
+    null_capability(&env->mtdc);
+    null_capability(&env->mscratchc);
+    set_max_perms_capability(&env->mepcc, 0);
 #endif /* TARGET_CHERI */
 #ifdef CONFIG_DEBUG_TCG
     env->_pc_is_current = true;
@@ -762,10 +775,16 @@ static void riscv_cpu_reset(DeviceState *dev)
 static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
     RISCVCPU *cpu = RISCV_CPU(s);
-    if (riscv_cpu_is_32bit(&cpu->env)) {
+
+    switch (riscv_cpu_mxl(&cpu->env)) {
+    case MXL_RV32:
         info->print_insn = print_insn_riscv32;
-    } else {
+        break;
+    case MXL_RV64:
         info->print_insn = print_insn_riscv64;
+        break;
+    default:
+        g_assert_not_reached();
     }
 #ifdef TARGET_CHERI
     info->flags |= RISCV_DIS_FLAG_CHERI;
@@ -781,9 +800,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
     RISCVCPU *cpu = RISCV_CPU(dev);
     CPURISCVState *env = &cpu->env;
     RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
-    int priv_version = PRIV_VERSION_1_11_0;
-    int vext_version = VEXT_VERSION_0_07_1;
-    target_ulong target_misa = env->misa;
+    int priv_version = 0;
     Error *local_err = NULL;
 
     cpu_exec_realizefn(cs, &local_err);
@@ -805,8 +822,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    set_priv_version(env, priv_version);
-    set_vext_version(env, vext_version);
+    if (priv_version) {
+        set_priv_version(env, priv_version);
+    } else if (!env->priv_ver) {
+        set_priv_version(env, PRIV_VERSION_1_11_0);
+    }
 
     if (cpu->cfg.mmu) {
         set_feature(env, RISCV_FEATURE_MMU);
@@ -814,12 +834,35 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
 
     if (cpu->cfg.pmp) {
         set_feature(env, RISCV_FEATURE_PMP);
+
+        /*
+         * Enhanced PMP should only be available
+         * on harts with PMP support
+         */
+        if (cpu->cfg.epmp) {
+            set_feature(env, RISCV_FEATURE_EPMP);
+        }
     }
 
     set_resetvec(env, cpu->cfg.resetvec);
 
-    /* If only XLEN is set for misa, then set misa from properties */
-    if (env->misa == RV32 || env->misa == RV64) {
+    /* Validate that MISA_MXL is set properly. */
+    switch (env->misa_mxl_max) {
+#ifdef TARGET_RISCV64
+    case MXL_RV64:
+        break;
+#endif
+    case MXL_RV32:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    assert(env->misa_mxl_max == env->misa_mxl);
+
+    /* If only MISA_EXT is unset for misa, then set it from properties */
+    if (env->misa_ext == 0) {
+        uint32_t ext = 0;
+
         /* Do some ISA extension error checking */
         if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
             error_setg(errp,
@@ -846,37 +889,38 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
 
         /* Set the ISA extensions, checks should have happened above */
         if (cpu->cfg.ext_i) {
-            target_misa |= RVI;
+            ext |= RVI;
         }
         if (cpu->cfg.ext_e) {
-            target_misa |= RVE;
+            ext |= RVE;
         }
         if (cpu->cfg.ext_m) {
-            target_misa |= RVM;
+            ext |= RVM;
         }
         if (cpu->cfg.ext_a) {
-            target_misa |= RVA;
+            ext |= RVA;
         }
         if (cpu->cfg.ext_f) {
-            target_misa |= RVF;
+            ext |= RVF;
         }
         if (cpu->cfg.ext_d) {
-            target_misa |= RVD;
+            ext |= RVD;
         }
         if (cpu->cfg.ext_c) {
-            target_misa |= RVC;
+            ext |= RVC;
         }
         if (cpu->cfg.ext_s) {
-            target_misa |= RVS;
+            ext |= RVS;
         }
         if (cpu->cfg.ext_u) {
-            target_misa |= RVU;
+            ext |= RVU;
         }
         if (cpu->cfg.ext_h) {
-            target_misa |= RVH;
+            ext |= RVH;
         }
         if (cpu->cfg.ext_v) {
-            target_misa |= RVV;
+            int vext_version = VEXT_VERSION_0_07_1;
+            ext |= RVV;
             if (!is_power_of_2(cpu->cfg.vlen)) {
                 error_setg(errp,
                         "Vector extension VLEN must be power of 2");
@@ -914,14 +958,19 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
             }
             set_vext_version(env, vext_version);
         }
+
+        if (cpu->cfg.ext_j) {
+            ext |= RVJ;
+        }
+
 #ifdef TARGET_CHERI
         if (cpu->cfg.ext_cheri) {
             // Non-standard extensions present
-            target_misa |= RV('X');
+            ext |= RV('X');
         }
 #endif
 
-        set_misa(env, target_misa);
+        set_misa(env, env->misa_mxl, ext);
     }
 
     riscv_cpu_register_gdb_regs_for_features(cs);
@@ -942,14 +991,45 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
     mcc->parent_realize(dev, errp);
 }
 
+#ifndef CONFIG_USER_ONLY
+static void riscv_cpu_set_irq(void *opaque, int irq, int level)
+{
+    RISCVCPU *cpu = RISCV_CPU(opaque);
+
+    switch (irq) {
+    case IRQ_U_SOFT:
+    case IRQ_S_SOFT:
+    case IRQ_VS_SOFT:
+    case IRQ_M_SOFT:
+    case IRQ_U_TIMER:
+    case IRQ_S_TIMER:
+    case IRQ_VS_TIMER:
+    case IRQ_M_TIMER:
+    case IRQ_U_EXT:
+    case IRQ_S_EXT:
+    case IRQ_VS_EXT:
+    case IRQ_M_EXT:
+        riscv_cpu_update_mip(cpu, 1 << irq, BOOL_TO_MASK(level));
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+#endif /* CONFIG_USER_ONLY */
+
 static void riscv_cpu_init(Object *obj)
 {
     RISCVCPU *cpu = RISCV_CPU(obj);
 
     cpu_set_cpustate_pointers(cpu);
+
+#ifndef CONFIG_USER_ONLY
+    qdev_init_gpio_in(DEVICE(cpu), riscv_cpu_set_irq, 12);
+#endif /* CONFIG_USER_ONLY */
 }
 
 static Property riscv_cpu_properties[] = {
+    /* Defaults for standard extensions */
     DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true),
     DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false),
     DEFINE_PROP_BOOL("g", RISCVCPU, cfg.ext_g, true),
@@ -960,22 +1040,32 @@ static Property riscv_cpu_properties[] = {
     DEFINE_PROP_BOOL("c", RISCVCPU, cfg.ext_c, true),
     DEFINE_PROP_BOOL("s", RISCVCPU, cfg.ext_s, true),
     DEFINE_PROP_BOOL("u", RISCVCPU, cfg.ext_u, true),
-    /* This is experimental so mark with 'x-' */
-    DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
-    DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
     DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
     DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
     DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
+    DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
+    DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
+
+    DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
+
+    /* These are experimental so mark with 'x-' */
+    DEFINE_PROP_BOOL("x-zba", RISCVCPU, cfg.ext_zba, false),
+    DEFINE_PROP_BOOL("x-zbb", RISCVCPU, cfg.ext_zbb, false),
+    DEFINE_PROP_BOOL("x-zbc", RISCVCPU, cfg.ext_zbc, false),
+    DEFINE_PROP_BOOL("x-zbs", RISCVCPU, cfg.ext_zbs, false),
+    DEFINE_PROP_BOOL("x-h", RISCVCPU, cfg.ext_h, false),
+    DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+    DEFINE_PROP_BOOL("x-v", RISCVCPU, cfg.ext_v, false),
 #ifdef TARGET_CHERI
     DEFINE_PROP_BOOL("Xcheri", RISCVCPU, cfg.ext_cheri, true),
-    DEFINE_PROP_BOOL("Xcheri_v9", RISCVCPU, cfg.ext_cheri_v9, false),
+    DEFINE_PROP_BOOL("Xcheri_v9", RISCVCPU, cfg.ext_cheri_v9, true),
 #endif
-    DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
     DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
     DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
     DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
-    DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
-    DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
+    /* ePMP 0.9.3 */
+    DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
+
     DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -985,10 +1075,13 @@ static gchar *riscv_gdb_arch_name(CPUState *cs)
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
 
-    if (riscv_cpu_is_32bit(env)) {
+    switch (riscv_cpu_mxl(env)) {
+    case MXL_RV32:
         return g_strdup("riscv:rv32");
-    } else {
+    case MXL_RV64:
         return g_strdup("riscv:rv64");
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -999,20 +1092,36 @@ static const char *riscv_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
     if (strcmp(xmlname, "riscv-csr.xml") == 0) {
         return cpu->dyn_csr_xml;
     }
+#ifdef TARGET_CHERI
+    if (strcmp(xmlname, "riscv-scr.xml") == 0) {
+        return cpu->dyn_scr_xml;
+    }
+#endif
 
     return NULL;
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps riscv_sysemu_ops = {
+    .get_phys_page_debug = riscv_cpu_get_phys_page_debug,
+    .write_elf64_note = riscv_cpu_write_elf64_note,
+    .write_elf32_note = riscv_cpu_write_elf32_note,
+    .legacy_vmsd = &vmstate_riscv_cpu,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps riscv_tcg_ops = {
+static const struct TCGCPUOps riscv_tcg_ops = {
     .initialize = riscv_translate_init,
     .synchronize_from_tb = riscv_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = riscv_cpu_exec_interrupt,
-    .tlb_fill = riscv_cpu_tlb_fill,
     .debug_excp_handler = riscv_debug_excp_handler,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = riscv_cpu_tlb_fill,
+    .cpu_exec_interrupt = riscv_cpu_exec_interrupt,
     .do_interrupt = riscv_cpu_do_interrupt,
     .do_transaction_failed = riscv_cpu_do_transaction_failed,
     .do_unaligned_access = riscv_cpu_do_unaligned_access,
@@ -1045,11 +1154,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
     cc->gdb_stop_before_watchpoint = true;
     cc->disas_set_info = riscv_cpu_disas_set_info;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = riscv_cpu_get_phys_page_debug;
-    /* For now, mark unmigratable: */
-    cc->vmsd = &vmstate_riscv_cpu;
-    cc->write_elf64_note = riscv_cpu_write_elf64_note;
-    cc->write_elf32_note = riscv_cpu_write_elf32_note;
+    cc->sysemu_ops = &riscv_sysemu_ops;
 #endif
     cc->gdb_arch_name = riscv_gdb_arch_name;
     cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml;
@@ -1065,7 +1170,7 @@ char *riscv_isa_string(RISCVCPU *cpu)
     char *isa_str = g_new(char, maxlen);
     char *p = isa_str + snprintf(isa_str, maxlen, "rv%d", TARGET_LONG_BITS);
     for (i = 0; i < sizeof(riscv_exts); i++) {
-        if (cpu->env.misa & RV(riscv_exts[i])) {
+        if (cpu->env.misa_ext & RV(riscv_exts[i])) {
             *p++ = qemu_tolower(riscv_exts[i]);
         }
     }
@@ -1132,6 +1237,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
     DEFINE_CPU(TYPE_RISCV_CPU_BASE64,           rv64_base_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_E51,       rv64_sifive_e_cpu_init),
     DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U54,       rv64_sifive_u_cpu_init),
+    DEFINE_CPU(TYPE_RISCV_CPU_SHAKTI_C,         rv64_sifive_u_cpu_init),
 #endif
 };
 
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 71ea82f7e21..b9d035154d4 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -26,6 +26,7 @@
 #include "qemu/units.h"
 #include "fpu/softfloat-types.h"
 #include "qom/object.h"
+#include "cpu_bits.h"
 #include "rvfi_dii.h"
 
 #define TCG_GUEST_DEFAULT_MO 0
@@ -40,6 +41,7 @@
 #define TYPE_RISCV_CPU_BASE32           RISCV_CPU_TYPE_NAME("rv32")
 #define TYPE_RISCV_CPU_BASE64           RISCV_CPU_TYPE_NAME("rv64")
 #define TYPE_RISCV_CPU_IBEX             RISCV_CPU_TYPE_NAME("lowrisc-ibex")
+#define TYPE_RISCV_CPU_SHAKTI_C         RISCV_CPU_TYPE_NAME("shakti-c")
 #define TYPE_RISCV_CPU_SIFIVE_E31       RISCV_CPU_TYPE_NAME("sifive-e31")
 #define TYPE_RISCV_CPU_SIFIVE_E34       RISCV_CPU_TYPE_NAME("sifive-e34")
 #define TYPE_RISCV_CPU_SIFIVE_E51       RISCV_CPU_TYPE_NAME("sifive-e51")
@@ -52,15 +54,6 @@
 # define TYPE_RISCV_CPU_BASE            TYPE_RISCV_CPU_BASE64
 #endif
 
-#define RV32 ((target_ulong)1 << (TARGET_LONG_BITS - 2))
-#define RV64 ((target_ulong)2 << (TARGET_LONG_BITS - 2))
-
-#if defined(TARGET_RISCV32)
-#define RVXLEN RV32
-#elif defined(TARGET_RISCV64)
-#define RVXLEN RV64
-#endif
-
 #define RV(x) ((target_ulong)1 << (x - 'A'))
 
 #define RVI RV('I')
@@ -74,6 +67,7 @@
 #define RVS RV('S')
 #define RVU RV('U')
 #define RVH RV('H')
+#define RVJ RV('J')
 
 /* S extension denotes that Supervisor mode exists, however it is possible
    to have a core that support S mode but does not have an MMU and there
@@ -82,6 +76,7 @@
 enum {
     RISCV_FEATURE_MMU,
     RISCV_FEATURE_PMP,
+    RISCV_FEATURE_EPMP,
     RISCV_FEATURE_MISA
 };
 
@@ -109,7 +104,10 @@ typedef struct CPURISCVState CPURISCVState;
 #ifdef TARGET_CHERI
 #include "cheri-lazy-capregs-types.h"
 #endif
+
+#if !defined(CONFIG_USER_ONLY)
 #include "pmp.h"
+#endif
 
 #define RV_VLEN_MAX 256
 
@@ -136,8 +134,8 @@ struct CPURISCVState {
     target_ulong vtype;
 
 #ifdef TARGET_CHERI
-    cap_register_t PCC; // SCR 0 Program counter cap. (PCC) TODO: implement this properly
-    cap_register_t DDC; // SCR 1 Default data cap. (DDC)
+    cap_register_t pcc; // SCR 0 Program counter cap. (PCC) TODO: implement this properly
+    cap_register_t ddc; // SCR 1 Default data cap. (DDC)
 #else
     target_ulong pc;
 #endif
@@ -156,6 +154,7 @@ struct CPURISCVState {
 
     target_ulong badaddr;
     target_ulong guest_phys_fault_addr;
+
 #ifdef TARGET_CHERI
     // The cause field reports the cause of the last capability exception,
     // following the encoding described in Table 3.9.2.
@@ -169,6 +168,10 @@ struct CPURISCVState {
     uint8_t last_cap_index;  // Used to populate xtval
 #endif
 
+#ifdef CONFIG_USER_ONLY
+    uint32_t elf_flags;
+#endif
+
 #ifndef CONFIG_USER_ONLY
     target_ulong priv;
     /* This contains QEMU specific information about the virt state. */
@@ -188,10 +191,8 @@ struct CPURISCVState {
     target_ulong mie;
     target_ulong mideleg;
 
-    target_ulong sptbr;  /* until: priv-1.9.1 */
     target_ulong satp;   /* since: priv-1.10.0 */
-    target_ulong sbadaddr;
-    target_ulong mbadaddr;
+    target_ulong stval;
     target_ulong medeleg;
 
 #if defined(TARGET_CHERI) && !defined(TARGET_RISCV32)
@@ -200,17 +201,17 @@ struct CPURISCVState {
 
 #ifdef TARGET_CHERI
     // XXX: not implemented properly
-    cap_register_t UTCC; // SCR 4 User trap code cap. (UTCC)
-    cap_register_t UTDC; // SCR 5 User trap data cap. (UTDC)
-    cap_register_t UScratchC; // SCR 6 User scratch cap. (UScratchC)
-    cap_register_t UEPCC; // SCR 7 User exception PC cap. (UEPCC)
+    cap_register_t utcc; // SCR 4 User trap code cap. (UTCC)
+    cap_register_t utdc; // SCR 5 User trap data cap. (UTDC)
+    cap_register_t uscratchc; // SCR 6 User scratch cap. (UScratchC)
+    cap_register_t uepcc; // SCR 7 User exception PC cap. (UEPCC)
 #endif
 
 #ifdef TARGET_CHERI
-    cap_register_t STCC;      // SCR 12 Supervisor trap code cap. (STCC)
-    cap_register_t STDC;      // SCR 13 Supervisor trap data cap. (STDC)
-    cap_register_t SScratchC; // SCR 14 Supervisor scratch cap. (SScratchC)
-    cap_register_t SEPCC;     // SCR 15 Supervisor exception PC cap. (SEPCC)
+    cap_register_t stcc;      // SCR 12 Supervisor trap code cap. (STCC)
+    cap_register_t stdc;      // SCR 13 Supervisor trap data cap. (STDC)
+    cap_register_t sscratchc; // SCR 14 Supervisor scratch cap. (SScratchC)
+    cap_register_t sepcc;     // SCR 15 Supervisor exception PC cap. (SEPCC)
 #else
     target_ulong stvec;
     target_ulong sepc;
@@ -218,10 +219,10 @@ struct CPURISCVState {
     target_ulong scause;
 
 #ifdef TARGET_CHERI
-    cap_register_t MTCC;      // SCR 28 Machine trap code cap. (MTCC)
-    cap_register_t MTDC;      // SCR 29 Machine trap data cap. (MTDC)
-    cap_register_t MScratchC; // SCR 30 Machine scratch cap. (MScratchC)
-    cap_register_t MEPCC;     // SCR 31 Machine exception PC cap. (MEPCC)
+    cap_register_t mtcc;      // SCR 28 Machine trap code cap. (MTCC)
+    cap_register_t mtdc;      // SCR 29 Machine trap data cap. (MTDC)
+    cap_register_t mscratchc; // SCR 30 Machine scratch cap. (MScratchC)
+    cap_register_t mepcc;     // SCR 31 Machine exception PC cap. (MEPCC)
 #else
     target_ulong mtvec;
     target_ulong mepc;
@@ -241,10 +242,10 @@ struct CPURISCVState {
 
     /* Virtual CSRs */
 #ifdef TARGET_CHERI
-    cap_register_t VSTCC;
-    cap_register_t VSTDC;
-    cap_register_t VSScratchC;
-    cap_register_t VSEPCC;
+    cap_register_t vstcc;
+    cap_register_t vstdc;
+    cap_register_t vsscratchc;
+    cap_register_t vsepcc;
 #else
     target_ulong vstvec;
     target_ulong vsepc;
@@ -264,8 +265,8 @@ struct CPURISCVState {
 
     /* HS Backup CSRs */
 #ifdef TARGET_CHERI
-    cap_register_t STCC_HS;
-    cap_register_t SEPCC_HS;
+    cap_register_t stcc_hs;
+    cap_register_t sepcc_hs;
 #else
     target_ulong stvec_hs;
     target_ulong sepc_hs;
@@ -293,9 +294,21 @@ struct CPURISCVState {
 
     /* physical memory protection */
     pmp_table_t pmp_state;
+    target_ulong mseccfg;
 
     /* True if in debugger mode.  */
     bool debugger;
+
+    /*
+     * CSRs for PointerMasking extension
+     */
+    target_ulong mmte;
+    target_ulong mpmmask;
+    target_ulong mpmbase;
+    target_ulong spmmask;
+    target_ulong spmbase;
+    target_ulong upmmask;
+    target_ulong upmbase;
 #endif
 
     float_status fp_status;
@@ -337,11 +350,16 @@ struct CPURISCVState {
 #endif
 
     target_ulong priv_ver;
+    target_ulong bext_ver;
     target_ulong vext_ver;
     // TODO: we should probably re-compute these instead of preserving
     //  in case misa becomes writable
-    target_ulong misa;
-    target_ulong misa_mask;
+    /* RISCVMXL, but uint32_t for vmstate migration */
+    uint32_t misa_mxl;      /* current mxl */
+    uint32_t misa_mxl_max;  /* max mxl for this cpu */
+    uint32_t misa_ext;      /* current extensions */
+    uint32_t misa_ext_mask; /* max ext for this cpu */
+
     target_ulong mhartid;
 
     uint32_t features;
@@ -372,7 +390,7 @@ static inline bool pc_is_current(CPURISCVState *env)
 // that shouldn't really matter.
 static inline target_ulong cpu_get_recent_pc(CPURISCVState *env) {
 #ifdef TARGET_CHERI
-    return env->PCC._cr_cursor;
+    return env->pcc._cr_cursor;
 #else
     return env->pc;
 #endif
@@ -410,6 +428,9 @@ struct RISCVCPU {
     CPURISCVState env;
 
     char *dyn_csr_xml;
+#ifdef TARGET_CHERI
+    char *dyn_scr_xml;
+#endif
 
     /* Configuration Settings */
     struct {
@@ -424,7 +445,12 @@ struct RISCVCPU {
         bool ext_s;
         bool ext_u;
         bool ext_h;
+        bool ext_j;
         bool ext_v;
+        bool ext_zba;
+        bool ext_zbb;
+        bool ext_zbc;
+        bool ext_zbs;
         bool ext_counters;
         bool ext_ifencei;
         bool ext_icsr;
@@ -435,18 +461,20 @@ struct RISCVCPU {
 
         char *priv_spec;
         char *user_spec;
+        char *bext_spec;
         char *vext_spec;
         uint16_t vlen;
         uint16_t elen;
         bool mmu;
         bool pmp;
+        bool epmp;
         uint64_t resetvec;
     } cfg;
 };
 
 static inline int riscv_has_ext(CPURISCVState *env, target_ulong ext)
 {
-    return (env->misa & ext) != 0;
+    return (env->misa_ext & ext) != 0;
 }
 
 static inline bool riscv_feature(CPURISCVState *env, int feature)
@@ -455,12 +483,9 @@ static inline bool riscv_feature(CPURISCVState *env, int feature)
 }
 
 #include "cpu_user.h"
-#include "cpu_bits.h"
 
 extern const char * const riscv_int_regnames[];
 extern const char * const riscv_fpr_regnames[];
-extern const char * const riscv_excp_names[];
-extern const char * const riscv_intr_names[];
 #ifdef TARGET_CHERI
 /* Needed for cheri-common logging */
 extern const char * const cheri_gp_regnames[];
@@ -618,12 +643,9 @@ int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque);
 int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
 bool riscv_cpu_fp_enabled(CPURISCVState *env);
 bool riscv_cpu_virt_enabled(CPURISCVState *env);
 void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
-bool riscv_cpu_force_hs_excep_enabled(CPURISCVState *env);
-void riscv_cpu_set_force_hs_excep(CPURISCVState *env, bool enable);
 bool riscv_cpu_two_stage_lookup(int mmu_idx);
 int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch);
 hwaddr riscv_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
@@ -635,7 +657,7 @@ hwaddr cpu_riscv_translate_address_tagmem(CPURISCVState *env,
 #endif
 void  riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                     MMUAccessType access_type, int mmu_idx,
-                                    uintptr_t retaddr);
+                                    uintptr_t retaddr) QEMU_NORETURN;
 bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                         MMUAccessType access_type, int mmu_idx,
                         bool probe, uintptr_t retaddr);
@@ -647,11 +669,11 @@ void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
 char *riscv_isa_string(RISCVCPU *cpu);
 void riscv_cpu_list(void);
 
-#define cpu_signal_handler riscv_cpu_signal_handler
 #define cpu_list riscv_cpu_list
 #define cpu_mmu_index riscv_cpu_mmu_index
 
 #ifndef CONFIG_USER_ONLY
+bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request);
 void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env, bool hs_mode_trap);
 int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint32_t interrupts);
 uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value);
@@ -662,14 +684,12 @@ void riscv_cpu_set_rdtime_fn(CPURISCVState *env, uint64_t (*fn)(uint32_t),
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv);
 
 void riscv_translate_init(void);
-int riscv_cpu_signal_handler(int host_signum, void *pinfo, void *puc);
 void QEMU_NORETURN riscv_raise_exception(CPURISCVState *env,
                                          uint32_t exception, uintptr_t pc);
 
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
-#define TB_FLAGS_MMU_MASK   7
 #define TB_FLAGS_PRIV_MMU_MASK                3
 #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
@@ -679,14 +699,27 @@ typedef RISCVCPU ArchCPU;
 #include "exec/cpu-all.h"
 #include "cpu_cheri.h"
 
-FIELD(TB_FLAGS, VL_EQ_VLMAX, 2, 1)
-FIELD(TB_FLAGS, LMUL, 3, 2)
-FIELD(TB_FLAGS, SEW, 5, 3)
-FIELD(TB_FLAGS, VILL, 8, 1)
+FIELD(TB_FLAGS, MEM_IDX, 0, 3)
+FIELD(TB_FLAGS, VL_EQ_VLMAX, 3, 1)
+FIELD(TB_FLAGS, LMUL, 4, 2)
+FIELD(TB_FLAGS, SEW, 6, 3)
+FIELD(TB_FLAGS, VILL, 9, 1)
 /* Is a Hypervisor instruction load/store allowed? */
-FIELD(TB_FLAGS, HLSX, 9, 1)
-
-bool riscv_cpu_is_32bit(CPURISCVState *env);
+FIELD(TB_FLAGS, HLSX, 10, 1)
+FIELD(TB_FLAGS, MSTATUS_HS_FS, 11, 2)
+/* The combination of MXL/SXL/UXL that applies to the current cpu mode. */
+FIELD(TB_FLAGS, XL, 13, 2)
+/* If PointerMasking should be applied */
+FIELD(TB_FLAGS, PM_ENABLED, 15, 1)
+
+#ifdef TARGET_RISCV32
+#define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
+#else
+static inline RISCVMXL riscv_cpu_mxl(CPURISCVState *env)
+{
+    return env->misa_mxl;
+}
+#endif
 
 /*
  * A simplification for VLMAX
@@ -704,55 +737,13 @@ static inline uint32_t vext_get_vlmax(RISCVCPU *cpu, target_ulong vtype)
     return cpu->cfg.vlen >> (sew + 3 - lmul);
 }
 
-static inline void
-riscv_cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
-                           target_ulong *cs_base, target_ulong *cs_top,
-                           uint32_t *cheri_flags, uint32_t *pflags)
-{
-    uint32_t flags = 0;
-    *pc = PC_ADDR(env); // We want the full virtual address here (no offset)
-#ifdef TARGET_CHERI
-    cheri_cpu_get_tb_cpu_state(&env->PCC, &env->DDC, cs_base, cs_top,
-                               cheri_flags);
-#else
-    *cs_base = 0;
-#endif
-    if (riscv_has_ext(env, RVV)) {
-        uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
-        bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
-        flags = FIELD_DP32(flags, TB_FLAGS, VILL,
-                    FIELD_EX64(env->vtype, VTYPE, VILL));
-        flags = FIELD_DP32(flags, TB_FLAGS, SEW,
-                    FIELD_EX64(env->vtype, VTYPE, VSEW));
-        flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
-                    FIELD_EX64(env->vtype, VTYPE, VLMUL));
-        flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
-    } else {
-        flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
-    }
-
-#ifdef CONFIG_USER_ONLY
-    flags |= TB_FLAGS_MSTATUS_FS;
-#else
-    flags |= cpu_mmu_index(env, 0);
-    if (riscv_cpu_fp_enabled(env)) {
-        flags |= env->mstatus & MSTATUS_FS;
-    }
+void riscv_cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
+                                target_ulong *cs_base, target_ulong *pcc_base,
+                                target_ulong *pcc_top, uint32_t *cheri_flags,
+                                uint32_t *pflags);
 
-    if (riscv_has_ext(env, RVH)) {
-        if (env->priv == PRV_M ||
-            (env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) ||
-            (env->priv == PRV_U && !riscv_cpu_virt_enabled(env) &&
-                get_field(env->hstatus, HSTATUS_HU))) {
-            flags = FIELD_DP32(flags, TB_FLAGS, HLSX, 1);
-        }
-    }
-#endif
-
-    *pflags = flags;
-}
 // Ugly macro hack to avoid having to modify cpu_get_tb_cpu_state in all targets
-#define cpu_get_tb_cpu_state_6 riscv_cpu_get_tb_cpu_state
+#define cpu_get_tb_cpu_state_ext riscv_cpu_get_tb_cpu_state
 
 #ifdef CONFIG_TCG_LOG_INSTR
 #define RISCV_LOG_INSTR_CPU_U QEMU_LOG_INSTR_CPU_USER
@@ -779,10 +770,12 @@ static inline const char *cpu_get_mode_name(qemu_log_instr_cpu_mode_t mode)
 }
 #endif
 
-int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                target_ulong new_value, target_ulong write_mask, uintptr_t retpc);
-int riscv_csrrw_debug(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                      target_ulong new_value, target_ulong write_mask);
+
+RISCVException riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
+                           target_ulong new_value, target_ulong write_mask,
+                           uintptr_t retpc);
+RISCVException riscv_csrrw_debug(CPURISCVState *env, int csrno, target_ulong *ret_value,
+                                 target_ulong new_value, target_ulong write_mask);
 
 static inline void riscv_csr_write(CPURISCVState *env, int csrno,
                                    target_ulong val, uintptr_t retpc)
@@ -797,13 +790,16 @@ static inline target_ulong riscv_csr_read(CPURISCVState *env, int csrno, uintptr
     return val;
 }
 
-typedef int (*riscv_csr_predicate_fn)(CPURISCVState *env, int csrno);
-typedef int (*riscv_csr_read_fn)(CPURISCVState *env, int csrno,
-    target_ulong *ret_value);
-typedef int (*riscv_csr_write_fn)(CPURISCVState *env, int csrno,
-    target_ulong new_value);
-typedef int (*riscv_csr_op_fn)(CPURISCVState *env, int csrno,
-    target_ulong *ret_value, target_ulong new_value, target_ulong write_mask);
+typedef RISCVException (*riscv_csr_predicate_fn)(CPURISCVState *env,
+                                                 int csrno);
+typedef RISCVException (*riscv_csr_read_fn)(CPURISCVState *env, int csrno,
+                                            target_ulong *ret_value);
+typedef RISCVException (*riscv_csr_write_fn)(CPURISCVState *env, int csrno,
+                                             target_ulong new_value);
+typedef RISCVException (*riscv_csr_op_fn)(CPURISCVState *env, int csrno,
+                                          target_ulong *ret_value,
+                                          target_ulong new_value,
+                                          target_ulong write_mask);
 typedef void (*riscv_csr_log_update_fn)(CPURISCVState *env, int csrno,
                                         target_ulong new_value);
 
@@ -814,7 +810,6 @@ typedef struct {
     riscv_csr_write_fn write;
     riscv_csr_op_fn op;
     riscv_csr_log_update_fn log_update;
-    const char *csr_name;
 } riscv_csr_operations;
 
 /* CSR function table constants */
@@ -828,6 +823,37 @@ extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE];
 void riscv_get_csr_ops(int csrno, riscv_csr_operations *ops);
 void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops);
 
+#ifdef TARGET_CHERI
+static inline cap_register_t *riscv_get_scr(CPUArchState *env, uint32_t index)
+{
+    switch (index) {
+    case CheriSCR_PCC: return &env->pcc;
+    case CheriSCR_DDC: return &env->ddc;
+
+    case CheriSCR_UTCC: return &env->utcc;
+    case CheriSCR_UTDC: return &env->utdc;
+    case CheriSCR_UScratchC: return &env->uscratchc;
+    case CheriSCR_UEPCC: return &env->uepcc;
+
+    case CheriSCR_STCC: return &env->stcc;
+    case CheriSCR_STDC: return &env->stdc;
+    case CheriSCR_SScratchC: return &env->sscratchc;
+    case CheriSCR_SEPCC: return &env->sepcc;
+
+    case CheriSCR_MTCC: return &env->mtcc;
+    case CheriSCR_MTDC: return &env->mtdc;
+    case CheriSCR_MScratchC: return &env->mscratchc;
+    case CheriSCR_MEPCC: return &env->mepcc;
+
+    case CheriSCR_BSTCC: return &env->vstcc;
+    case CheriSCR_BSTDC: return &env->vstdc;
+    case CheriSCR_BSScratchC: return &env->vsscratchc;
+    case CheriSCR_BSEPCC: return &env->vsepcc;
+    default: assert(false && "Should have raised an invalid inst trap!");
+    }
+}
+#endif
+
 void riscv_cpu_register_gdb_regs_for_features(CPUState *cs);
 
 #endif /* RISCV_CPU_H */
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 3070c680fce..d3750be5e0c 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -153,12 +153,6 @@
 /* 32-bit only */
 #define CSR_MSTATUSH        0x310
 
-/* Legacy Counter Setup (priv v1.9.1) */
-/* Update to #define CSR_MCOUNTINHIBIT 0x320 for 1.11.0 */
-#define CSR_MUCOUNTEREN     0x320
-#define CSR_MSCOUNTEREN     0x321
-#define CSR_MHCOUNTEREN     0x322
-
 /* Machine Trap Handling */
 #define CSR_MSCRATCH        0x340
 #define CSR_MEPC            0x341
@@ -166,9 +160,6 @@
 #define CSR_MTVAL           0x343
 #define CSR_MIP             0x344
 
-/* Legacy Machine Trap Handling (priv v1.9.1) */
-#define CSR_MBADADDR        0x343
-
 /* Supervisor Trap Setup */
 #define CSR_SSTATUS         0x100
 #define CSR_SEDELEG         0x102
@@ -184,9 +175,6 @@
 #define CSR_STVAL           0x143
 #define CSR_SIP             0x144
 
-/* Legacy Supervisor Trap Handling (priv v1.9.1) */
-#define CSR_SBADADDR        0x143
-
 /* Supervisor Protection and Translation */
 #define CSR_SPTBR           0x180
 #define CSR_SATP            0x180
@@ -207,17 +195,6 @@
 #define CSR_HTIMEDELTA      0x605
 #define CSR_HTIMEDELTAH     0x615
 
-#if defined(TARGET_RISCV32)
-#define HGATP_MODE           SATP32_MODE
-#define HGATP_VMID           SATP32_ASID
-#define HGATP_PPN            SATP32_PPN
-#endif
-#if defined(TARGET_RISCV64)
-#define HGATP_MODE           SATP64_MODE
-#define HGATP_VMID           SATP64_ASID
-#define HGATP_PPN            SATP64_PPN
-#endif
-
 /* Virtual CSRs */
 #define CSR_VSSTATUS        0x200
 #define CSR_VSIE            0x204
@@ -232,6 +209,9 @@
 #define CSR_MTINST          0x34a
 #define CSR_MTVAL2          0x34b
 
+/* Enhanced Physical Memory Protection (ePMP) */
+#define CSR_MSECCFG         0x747
+#define CSR_MSECCFGH        0x757
 /* Physical Memory Protection */
 #define CSR_PMPCFG0         0x3a0
 #define CSR_PMPCFG1         0x3a1
@@ -354,14 +334,6 @@
 #define CSR_MHPMCOUNTER30H  0xb9e
 #define CSR_MHPMCOUNTER31H  0xb9f
 
-/* Legacy Machine Protection and Translation (priv v1.9.1) */
-#define CSR_MBASE           0x380
-#define CSR_MBOUND          0x381
-#define CSR_MIBASE          0x382
-#define CSR_MIBOUND         0x383
-#define CSR_MDBASE          0x384
-#define CSR_MDBOUND         0x385
-
 #ifdef TARGET_CHERI
 #define CSR_UCCSR           0x8C0
 #define CSR_SCCSR           0x9C0
@@ -376,6 +348,38 @@
 #define XCCSR_TAG_CLEARING  0x80000000 /* CHERI has tag-clearing semantics. */
 #endif
 
+/*
+ * User PointerMasking registers
+ * NB: actual CSR numbers might be changed in future
+ */
+#define CSR_UMTE            0x4c0
+#define CSR_UPMMASK         0x4c1
+#define CSR_UPMBASE         0x4c2
+
+/*
+ * Machine PointerMasking registers
+ * NB: actual CSR numbers might be changed in future
+ */
+#define CSR_MMTE            0x3c0
+#define CSR_MPMMASK         0x3c1
+#define CSR_MPMBASE         0x3c2
+
+/*
+ * Supervisor PointerMaster registers
+ * NB: actual CSR numbers might be changed in future
+ */
+#define CSR_SMTE            0x1c0
+#define CSR_SPMMASK         0x1c1
+#define CSR_SPMBASE         0x1c2
+
+/*
+ * Hypervisor PointerMaster registers
+ * NB: actual CSR numbers might be changed in future
+ */
+#define CSR_VSMTE           0x2c0
+#define CSR_VSPMMASK        0x2c1
+#define CSR_VSPMBASE        0x2c2
+
 /* mstatus CSR bits */
 #define MSTATUS_UIE         0x00000001
 #define MSTATUS_SIE         0x00000002
@@ -389,10 +393,8 @@
 #define MSTATUS_FS          0x00006000
 #define MSTATUS_XS          0x00018000
 #define MSTATUS_MPRV        0x00020000
-#define MSTATUS_PUM         0x00040000 /* until: priv-1.9.1 */
 #define MSTATUS_SUM         0x00040000 /* since: priv-1.10 */
 #define MSTATUS_MXR         0x00080000
-#define MSTATUS_VM          0x1F000000 /* until: priv-1.9.1 */
 #define MSTATUS_TVM         0x00100000 /* since: priv-1.10 */
 #define MSTATUS_TW          0x00200000 /* since: priv-1.10 */
 #define MSTATUS_TSR         0x00400000 /* since: priv-1.10 */
@@ -407,21 +409,18 @@
 
 #define MISA32_MXL          0xC0000000
 #define MISA64_MXL          0xC000000000000000ULL
-
-#define MXL_RV32            1
-#define MXL_RV64            2
-#define MXL_RV128           3
-
-#if defined(TARGET_RISCV32)
-#define MSTATUS_SD MSTATUS32_SD
-#define MISA_MXL MISA32_MXL
-#define MXL_VAL MXL_RV32
+#ifdef TARGET_RISCV32
+#define MISA_MXL            MISA32_MXL
 #elif defined(TARGET_RISCV64)
-#define MSTATUS_SD MSTATUS64_SD
-#define MISA_MXL MISA64_MXL
-#define MXL_VAL MXL_RV64
+#define MISA_MXL            MISA64_MXL
 #endif
 
+typedef enum {
+    MXL_RV32  = 1,
+    MXL_RV64  = 2,
+    MXL_RV128 = 3,
+} RISCVMXL;
+
 /* sstatus CSR bits */
 #define SSTATUS_UIE         0x00000001
 #define SSTATUS_SIE         0x00000002
@@ -430,7 +429,6 @@
 #define SSTATUS_SPP         0x00000100
 #define SSTATUS_FS          0x00006000
 #define SSTATUS_XS          0x00018000
-#define SSTATUS_PUM         0x00040000 /* until: priv-1.9.1 */
 #define SSTATUS_SUM         0x00040000 /* since: priv-1.10 */
 #define SSTATUS_MXR         0x00080000
 
@@ -439,12 +437,6 @@
 #define SSTATUS32_SD        0x80000000
 #define SSTATUS64_SD        0x8000000000000000ULL
 
-#if defined(TARGET_RISCV32)
-#define SSTATUS_SD SSTATUS32_SD
-#elif defined(TARGET_RISCV64)
-#define SSTATUS_SD SSTATUS64_SD
-#endif
-
 /* hstatus CSR bits */
 #define HSTATUS_VSBE         0x00000020
 #define HSTATUS_GVA          0x00000040
@@ -453,22 +445,17 @@
 #define HSTATUS_HU           0x00000200
 #define HSTATUS_VGEIN        0x0003F000
 #define HSTATUS_VTVM         0x00100000
+#define HSTATUS_VTW          0x00200000
 #define HSTATUS_VTSR         0x00400000
 #define HSTATUS_VSXL         0x300000000
 
 #define HSTATUS32_WPRI       0xFF8FF87E
 #define HSTATUS64_WPRI       0xFFFFFFFFFF8FF87EULL
 
-#if defined(TARGET_RISCV32)
-#define HSTATUS_WPRI HSTATUS32_WPRI
-#elif defined(TARGET_RISCV64)
-#define HSTATUS_WPRI HSTATUS64_WPRI
-#endif
-
-#define HCOUNTEREN_CY        (1 << 0)
-#define HCOUNTEREN_TM        (1 << 1)
-#define HCOUNTEREN_IR        (1 << 2)
-#define HCOUNTEREN_HPM3      (1 << 3)
+#define COUNTEREN_CY         (1 << 0)
+#define COUNTEREN_TM         (1 << 1)
+#define COUNTEREN_IR         (1 << 2)
+#define COUNTEREN_HPM3       (1 << 3)
 
 /* Privilege modes */
 #define PRV_U 0
@@ -478,12 +465,6 @@
 
 /* Virtulisation Register Fields */
 #define VIRT_ONOFF          1
-/* This is used to save state for when we take an exception. If this is set
- * that means that we want to force a HS level exception (no matter what the
- * delegation is set to). This will occur for things such as a second level
- * page table fault.
- */
-#define FORCE_HS_EXCEP      2
 
 /* RV32 satp CSR field masks */
 #define SATP32_MODE         0x80000000
@@ -495,25 +476,12 @@
 #define SATP64_ASID         0x0FFFF00000000000ULL
 #define SATP64_PPN          0x00000FFFFFFFFFFFULL
 
-#if defined(TARGET_RISCV32)
-#define SATP_MODE           SATP32_MODE
+#ifdef TARGET_RISCV32
 #define SATP_ASID           SATP32_ASID
-#define SATP_PPN            SATP32_PPN
-#endif
-#if defined(TARGET_RISCV64)
-#define SATP_MODE           SATP64_MODE
+#elif defined(TARGET_RISCV64)
 #define SATP_ASID           SATP64_ASID
-#define SATP_PPN            SATP64_PPN
 #endif
 
-/* VM modes (mstatus.vm) privileged ISA 1.9.1 */
-#define VM_1_09_MBARE       0
-#define VM_1_09_MBB         1
-#define VM_1_09_MBBID       2
-#define VM_1_09_SV32        8
-#define VM_1_09_SV39        9
-#define VM_1_09_SV48        10
-
 /* VM modes (satp.mode) privileged ISA 1.10 */
 #define VM_1_10_MBARE       0
 #define VM_1_10_SV32        1
@@ -550,27 +518,28 @@
 #define DEFAULT_RSTVEC      0x1000
 
 /* Exception causes */
-#define EXCP_NONE                                -1 /* sentinel value */
-#define RISCV_EXCP_INST_ADDR_MIS                 0x0
-#define RISCV_EXCP_INST_ACCESS_FAULT             0x1
-#define RISCV_EXCP_ILLEGAL_INST                  0x2
-#define RISCV_EXCP_BREAKPOINT                    0x3
-#define RISCV_EXCP_LOAD_ADDR_MIS                 0x4
-#define RISCV_EXCP_LOAD_ACCESS_FAULT             0x5
-#define RISCV_EXCP_STORE_AMO_ADDR_MIS            0x6
-#define RISCV_EXCP_STORE_AMO_ACCESS_FAULT        0x7
-#define RISCV_EXCP_U_ECALL                       0x8
-#define RISCV_EXCP_S_ECALL                      0x9
-#define RISCV_EXCP_VS_ECALL                      0xa
-#define RISCV_EXCP_M_ECALL                       0xb
-#define RISCV_EXCP_INST_PAGE_FAULT               0xc /* since: priv-1.10.0 */
-#define RISCV_EXCP_LOAD_PAGE_FAULT               0xd /* since: priv-1.10.0 */
-#define RISCV_EXCP_STORE_PAGE_FAULT              0xf /* since: priv-1.10.0 */
-#define RISCV_EXCP_SEMIHOST                      0x10
-#define RISCV_EXCP_INST_GUEST_PAGE_FAULT         0x14
-#define RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT       0x15
-#define RISCV_EXCP_VIRT_INSTRUCTION_FAULT        0x16
-#define RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT  0x17
+typedef enum RISCVException {
+    RISCV_EXCP_NONE = -1, /* sentinel value */
+    RISCV_EXCP_INST_ADDR_MIS = 0x0,
+    RISCV_EXCP_INST_ACCESS_FAULT = 0x1,
+    RISCV_EXCP_ILLEGAL_INST = 0x2,
+    RISCV_EXCP_BREAKPOINT = 0x3,
+    RISCV_EXCP_LOAD_ADDR_MIS = 0x4,
+    RISCV_EXCP_LOAD_ACCESS_FAULT = 0x5,
+    RISCV_EXCP_STORE_AMO_ADDR_MIS = 0x6,
+    RISCV_EXCP_STORE_AMO_ACCESS_FAULT = 0x7,
+    RISCV_EXCP_U_ECALL = 0x8,
+    RISCV_EXCP_S_ECALL = 0x9,
+    RISCV_EXCP_VS_ECALL = 0xa,
+    RISCV_EXCP_M_ECALL = 0xb,
+    RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */
+    RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */
+    RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */
+    RISCV_EXCP_SEMIHOST = 0x10,
+    RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14,
+    RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15,
+    RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16,
+    RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT = 0x17,
 #ifdef TARGET_CHERI
 #ifndef TARGET_RISCV32
 #define RISCV_EXCP_LOAD_CAP_PAGE_FAULT           0x1a
@@ -578,6 +547,7 @@
 #endif
 #define RISCV_EXCP_CHERI                         0x1c
 #endif
+} RISCVException;
 
 #define RISCV_EXCP_INT_FLAG                0x80000000
 #define RISCV_EXCP_INT_MASK                0x7fffffff
@@ -622,4 +592,68 @@
 #define MIE_UTIE                           (1 << IRQ_U_TIMER)
 #define MIE_SSIE                           (1 << IRQ_S_SOFT)
 #define MIE_USIE                           (1 << IRQ_U_SOFT)
+
+/* General PointerMasking CSR bits*/
+#define PM_ENABLE       0x00000001ULL
+#define PM_CURRENT      0x00000002ULL
+#define PM_INSN         0x00000004ULL
+#define PM_XS_MASK      0x00000003ULL
+
+/* PointerMasking XS bits values */
+#define PM_EXT_DISABLE  0x00000000ULL
+#define PM_EXT_INITIAL  0x00000001ULL
+#define PM_EXT_CLEAN    0x00000002ULL
+#define PM_EXT_DIRTY    0x00000003ULL
+
+/* Offsets for every pair of control bits per each priv level */
+#define XS_OFFSET    0ULL
+#define U_OFFSET     2ULL
+#define S_OFFSET     5ULL
+#define M_OFFSET     8ULL
+
+#define PM_XS_BITS   (PM_XS_MASK << XS_OFFSET)
+#define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
+#define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
+#define U_PM_INSN    (PM_INSN    << U_OFFSET)
+#define S_PM_ENABLE  (PM_ENABLE  << S_OFFSET)
+#define S_PM_CURRENT (PM_CURRENT << S_OFFSET)
+#define S_PM_INSN    (PM_INSN    << S_OFFSET)
+#define M_PM_ENABLE  (PM_ENABLE  << M_OFFSET)
+#define M_PM_CURRENT (PM_CURRENT << M_OFFSET)
+#define M_PM_INSN    (PM_INSN    << M_OFFSET)
+
+/* mmte CSR bits */
+#define MMTE_PM_XS_BITS     PM_XS_BITS
+#define MMTE_U_PM_ENABLE    U_PM_ENABLE
+#define MMTE_U_PM_CURRENT   U_PM_CURRENT
+#define MMTE_U_PM_INSN      U_PM_INSN
+#define MMTE_S_PM_ENABLE    S_PM_ENABLE
+#define MMTE_S_PM_CURRENT   S_PM_CURRENT
+#define MMTE_S_PM_INSN      S_PM_INSN
+#define MMTE_M_PM_ENABLE    M_PM_ENABLE
+#define MMTE_M_PM_CURRENT   M_PM_CURRENT
+#define MMTE_M_PM_INSN      M_PM_INSN
+#define MMTE_MASK    (MMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | MMTE_U_PM_INSN | \
+                      MMTE_S_PM_ENABLE | MMTE_S_PM_CURRENT | MMTE_S_PM_INSN | \
+                      MMTE_M_PM_ENABLE | MMTE_M_PM_CURRENT | MMTE_M_PM_INSN | \
+                      MMTE_PM_XS_BITS)
+
+/* (v)smte CSR bits */
+#define SMTE_PM_XS_BITS     PM_XS_BITS
+#define SMTE_U_PM_ENABLE    U_PM_ENABLE
+#define SMTE_U_PM_CURRENT   U_PM_CURRENT
+#define SMTE_U_PM_INSN      U_PM_INSN
+#define SMTE_S_PM_ENABLE    S_PM_ENABLE
+#define SMTE_S_PM_CURRENT   S_PM_CURRENT
+#define SMTE_S_PM_INSN      S_PM_INSN
+#define SMTE_MASK    (SMTE_U_PM_ENABLE | SMTE_U_PM_CURRENT | SMTE_U_PM_INSN | \
+                      SMTE_S_PM_ENABLE | SMTE_S_PM_CURRENT | SMTE_S_PM_INSN | \
+                      SMTE_PM_XS_BITS)
+
+/* umte CSR bits */
+#define UMTE_U_PM_ENABLE    U_PM_ENABLE
+#define UMTE_U_PM_CURRENT   U_PM_CURRENT
+#define UMTE_U_PM_INSN      U_PM_INSN
+#define UMTE_MASK     (UMTE_U_PM_ENABLE | MMTE_U_PM_CURRENT | UMTE_U_PM_INSN)
+
 #endif
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index edec70f249e..0f84a84d797 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -65,51 +65,139 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #endif
 }
 
+static RISCVMXL cpu_get_xl(CPURISCVState *env)
+{
+#if defined(TARGET_RISCV32)
+    return MXL_RV32;
+#elif defined(CONFIG_USER_ONLY)
+    return MXL_RV64;
+#else
+    RISCVMXL xl = riscv_cpu_mxl(env);
+
+    /*
+     * When emulating a 32-bit-only cpu, use RV32.
+     * When emulating a 64-bit cpu, and MXL has been reduced to RV32,
+     * MSTATUSH doesn't have UXL/SXL, therefore XLEN cannot be widened
+     * back to RV64 for lower privs.
+     */
+    if (xl != MXL_RV32) {
+        switch (env->priv) {
+        case PRV_M:
+            break;
+        case PRV_U:
+            xl = get_field(env->mstatus, MSTATUS64_UXL);
+            break;
+        default: /* PRV_S | PRV_H */
+            xl = get_field(env->mstatus, MSTATUS64_SXL);
+            break;
+        }
+    }
+    return xl;
+#endif
+}
+
+void riscv_cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
+                           target_ulong *cs_base, target_ulong *pcc_base,
+                           target_ulong *pcc_top, uint32_t *cheri_flags,
+                           uint32_t *pflags)
+{
+    uint32_t flags = 0;
+    *pc = PC_ADDR(env); // We want the full virtual address here (no offset)
+#ifdef TARGET_CHERI
+    cheri_cpu_get_tb_cpu_state(&env->pcc, &env->ddc, pcc_base, pcc_top,
+                               cheri_flags);
+#endif
+    *cs_base = 0;
+
+    if (riscv_has_ext(env, RVV)) {
+        uint32_t vlmax = vext_get_vlmax(env_archcpu(env), env->vtype);
+        bool vl_eq_vlmax = (env->vstart == 0) && (vlmax == env->vl);
+        flags = FIELD_DP32(flags, TB_FLAGS, VILL,
+                    FIELD_EX64(env->vtype, VTYPE, VILL));
+        flags = FIELD_DP32(flags, TB_FLAGS, SEW,
+                    FIELD_EX64(env->vtype, VTYPE, VSEW));
+        flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
+                    FIELD_EX64(env->vtype, VTYPE, VLMUL));
+        flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
+    } else {
+        flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
+    }
+
+#ifdef CONFIG_USER_ONLY
+    flags |= TB_FLAGS_MSTATUS_FS;
+#else
+    flags |= cpu_mmu_index(env, 0);
+    if (riscv_cpu_fp_enabled(env)) {
+        flags |= env->mstatus & MSTATUS_FS;
+    }
+
+    if (riscv_has_ext(env, RVH)) {
+        if (env->priv == PRV_M ||
+            (env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) ||
+            (env->priv == PRV_U && !riscv_cpu_virt_enabled(env) &&
+                get_field(env->hstatus, HSTATUS_HU))) {
+            flags = FIELD_DP32(flags, TB_FLAGS, HLSX, 1);
+        }
+
+        flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_FS,
+                           get_field(env->mstatus_hs, MSTATUS_FS));
+    }
+    if (riscv_has_ext(env, RVJ)) {
+        int priv = flags & TB_FLAGS_PRIV_MMU_MASK;
+        bool pm_enabled = false;
+        switch (priv) {
+        case PRV_U:
+            pm_enabled = env->mmte & U_PM_ENABLE;
+            break;
+        case PRV_S:
+            pm_enabled = env->mmte & S_PM_ENABLE;
+            break;
+        case PRV_M:
+            pm_enabled = env->mmte & M_PM_ENABLE;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        flags = FIELD_DP32(flags, TB_FLAGS, PM_ENABLED, pm_enabled);
+    }
+#endif
+
+    flags = FIELD_DP32(flags, TB_FLAGS, XL, cpu_get_xl(env));
+
+    *pflags = flags;
+}
+
 #ifndef CONFIG_USER_ONLY
 static int riscv_cpu_local_irq_pending(CPURISCVState *env)
 {
-    target_ulong irqs;
+    target_ulong virt_enabled = riscv_cpu_virt_enabled(env);
 
     target_ulong mstatus_mie = get_field(env->mstatus, MSTATUS_MIE);
     target_ulong mstatus_sie = get_field(env->mstatus, MSTATUS_SIE);
-    target_ulong hs_mstatus_sie = get_field(env->mstatus_hs, MSTATUS_SIE);
 
-    target_ulong pending = env->mip & env->mie &
-                               ~(MIP_VSSIP | MIP_VSTIP | MIP_VSEIP);
-    target_ulong vspending = (env->mip & env->mie &
-                              (MIP_VSSIP | MIP_VSTIP | MIP_VSEIP));
+    target_ulong pending = env->mip & env->mie;
 
     target_ulong mie    = env->priv < PRV_M ||
                           (env->priv == PRV_M && mstatus_mie);
     target_ulong sie    = env->priv < PRV_S ||
                           (env->priv == PRV_S && mstatus_sie);
-    target_ulong hs_sie = env->priv < PRV_S ||
-                          (env->priv == PRV_S && hs_mstatus_sie);
-
-    if (riscv_cpu_virt_enabled(env)) {
-        target_ulong pending_hs_irq = pending & -hs_sie;
+    target_ulong hsie   = virt_enabled || sie;
+    target_ulong vsie   = virt_enabled && sie;
 
-        if (pending_hs_irq) {
-            riscv_cpu_set_force_hs_excep(env, FORCE_HS_EXCEP);
-            return ctz64(pending_hs_irq);
-        }
-
-        pending = vspending;
-    }
-
-    irqs = (pending & ~env->mideleg & -mie) | (pending &  env->mideleg & -sie);
+    target_ulong irqs =
+            (pending & ~env->mideleg & -mie) |
+            (pending &  env->mideleg & ~env->hideleg & -hsie) |
+            (pending &  env->mideleg &  env->hideleg & -vsie);
 
     if (irqs) {
         return ctz64(irqs); /* since non-zero */
     } else {
-        return EXCP_NONE; /* indicates no pending interrupt */
+        return RISCV_EXCP_NONE; /* indicates no pending interrupt */
     }
 }
-#endif
 
 bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-#if !defined(CONFIG_USER_ONLY)
     if (interrupt_request & CPU_INTERRUPT_HARD) {
         RISCVCPU *cpu = RISCV_CPU(cs);
         CPURISCVState *env = &cpu->env;
@@ -120,12 +208,9 @@ bool riscv_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
             return true;
         }
     }
-#endif
     return false;
 }
 
-#if !defined(CONFIG_USER_ONLY)
-
 /* Return true is floating point support is currently enabled */
 bool riscv_cpu_fp_enabled(CPURISCVState *env)
 {
@@ -156,8 +241,8 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env, bool hs_mode_trap)
         riscv_log_instr_csr_changed(env, CSR_VSSTATUS);
         /* mstatus may be modified again by do_interrupt */
 
-        COPY_SPECIAL_REG(env, vstvec, VSTCC, stvec, STCC);
-        COPY_SPECIAL_REG(env, stvec, STCC, stvec_hs, STCC_HS);
+        COPY_SPECIAL_REG(env, vstvec, vstcc, stvec, stcc);
+        COPY_SPECIAL_REG(env, stvec, stcc, stvec_hs, stcc_hs);
         LOG_SPECIAL_REG(env, CSR_VSTVEC, CheriSCR_BSTCC);
         LOG_SPECIAL_REG(env, CSR_STVEC, CheriSCR_STCC);
 
@@ -166,8 +251,8 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env, bool hs_mode_trap)
         riscv_log_instr_csr_changed(env, CSR_VSSCRATCH);
         riscv_log_instr_csr_changed(env, CSR_SSCRATCH);
 
-        COPY_SPECIAL_REG(env, vsepc, VSEPCC, sepc, SEPCC);
-        COPY_SPECIAL_REG(env, sepc, SEPCC, sepc_hs, SEPCC_HS);
+        COPY_SPECIAL_REG(env, vsepc, vsepcc, sepc, sepcc);
+        COPY_SPECIAL_REG(env, sepc, sepcc, sepc_hs, sepcc_hs);
         LOG_SPECIAL_REG(env, CSR_VSEPC, CheriSCR_BSEPCC);
         LOG_SPECIAL_REG(env, CSR_SEPC, CheriSCR_SEPCC);
 
@@ -179,12 +264,12 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env, bool hs_mode_trap)
             riscv_log_instr_csr_changed(env, CSR_SCAUSE);
         }
 
-        env->vstval = env->sbadaddr;
-        env->sbadaddr = env->stval_hs;
+        env->vstval = env->stval;
+        env->stval = env->stval_hs;
         riscv_log_instr_csr_changed(env, CSR_VSTVAL);
         if (!hs_mode_trap) {
-            /* sbaddaddr is modified again when trapping to HS-mode */
-            riscv_log_instr_csr_changed(env, CSR_SBADADDR);
+            /* stval is modified again when trapping to HS-mode */
+            riscv_log_instr_csr_changed(env, CSR_STVAL);
         }
 
         env->vsatp = env->satp;
@@ -201,25 +286,25 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env, bool hs_mode_trap)
         env->mstatus &= ~mstatus_mask;
         env->mstatus |= env->vsstatus;
 
-        COPY_SPECIAL_REG(env, stvec_hs, STCC_HS, stvec, STCC);
-        COPY_SPECIAL_REG(env, stvec, STCC, vstvec, VSTCC);
+        COPY_SPECIAL_REG(env, stvec_hs, stcc_hs, stvec, stcc);
+        COPY_SPECIAL_REG(env, stvec, stcc, vstvec, vstcc);
         LOG_SPECIAL_REG(env, CSR_STVEC, CheriSCR_STCC);
 
         env->sscratch_hs = env->sscratch;
         env->sscratch = env->vsscratch;
         riscv_log_instr_csr_changed(env, CSR_SSCRATCH);
 
-        COPY_SPECIAL_REG(env, sepc_hs, SEPCC_HS, sepc, SEPCC);
-        COPY_SPECIAL_REG(env, sepc, SEPCC, vsepc, VSEPCC);
+        COPY_SPECIAL_REG(env, sepc_hs, sepcc_hs, sepc, sepcc);
+        COPY_SPECIAL_REG(env, sepc, sepcc, vsepc, vsepcc);
         LOG_SPECIAL_REG(env, CSR_SEPC, CheriSCR_SEPCC);
 
         env->scause_hs = env->scause;
         env->scause = env->vscause;
         riscv_log_instr_csr_changed(env, CSR_SCAUSE);
 
-        env->stval_hs = env->sbadaddr;
-        env->sbadaddr = env->vstval;
-        riscv_log_instr_csr_changed(env, CSR_SBADADDR);
+        env->stval_hs = env->stval;
+        env->stval = env->vstval;
+        riscv_log_instr_csr_changed(env, CSR_STVAL);
 
         env->satp_hs = env->satp;
         env->satp = env->vsatp;
@@ -250,24 +335,6 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable)
     env->virt = set_field(env->virt, VIRT_ONOFF, enable);
 }
 
-bool riscv_cpu_force_hs_excep_enabled(CPURISCVState *env)
-{
-    if (!riscv_has_ext(env, RVH)) {
-        return false;
-    }
-
-    return get_field(env->virt, FORCE_HS_EXCEP);
-}
-
-void riscv_cpu_set_force_hs_excep(CPURISCVState *env, bool enable)
-{
-    if (!riscv_has_ext(env, RVH)) {
-        return;
-    }
-
-    env->virt = set_field(env->virt, FORCE_HS_EXCEP, enable);
-}
-
 bool riscv_cpu_two_stage_lookup(int mmu_idx)
 {
     return mmu_idx & TB_FLAGS_PRIV_HYP_ACCESS_MASK;
@@ -437,12 +504,14 @@ static int get_physical_address_pmp(CPURISCVState *env, int *prot,
  * @first_stage: Are we in first stage translation?
  *               Second stage is used for hypervisor guest translation
  * @two_stage: Are we going to perform two stage translation
+ * @is_debug: Is this access from a debugger or the monitor?
  */
 static int get_physical_address(CPURISCVState *env, hwaddr *physical,
                                 int *prot, target_ulong addr,
                                 target_ulong *fault_pte_addr,
                                 int access_type, int mmu_idx,
-                                bool first_stage, bool two_stage)
+                                bool first_stage, bool two_stage,
+                                bool is_debug)
 {
     /* NOTE: the env->pc value visible here will not be
      * correct, but the value visible to the exception handler
@@ -510,20 +579,35 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
 
     if (first_stage == true) {
         if (use_background) {
-            base = (hwaddr)get_field(env->vsatp, SATP_PPN) << PGSHIFT;
-            vm = get_field(env->vsatp, SATP_MODE);
+            if (riscv_cpu_mxl(env) == MXL_RV32) {
+                base = (hwaddr)get_field(env->vsatp, SATP32_PPN) << PGSHIFT;
+                vm = get_field(env->vsatp, SATP32_MODE);
+            } else {
+                base = (hwaddr)get_field(env->vsatp, SATP64_PPN) << PGSHIFT;
+                vm = get_field(env->vsatp, SATP64_MODE);
+            }
         } else {
-            base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT;
-            vm = get_field(env->satp, SATP_MODE);
+            if (riscv_cpu_mxl(env) == MXL_RV32) {
+                base = (hwaddr)get_field(env->satp, SATP32_PPN) << PGSHIFT;
+                vm = get_field(env->satp, SATP32_MODE);
+            } else {
+                base = (hwaddr)get_field(env->satp, SATP64_PPN) << PGSHIFT;
+                vm = get_field(env->satp, SATP64_MODE);
+            }
         }
         widened = 0;
     } else {
-        base = (hwaddr)get_field(env->hgatp, HGATP_PPN) << PGSHIFT;
-        vm = get_field(env->hgatp, HGATP_MODE);
+        if (riscv_cpu_mxl(env) == MXL_RV32) {
+            base = (hwaddr)get_field(env->hgatp, SATP32_PPN) << PGSHIFT;
+            vm = get_field(env->hgatp, SATP32_MODE);
+        } else {
+            base = (hwaddr)get_field(env->hgatp, SATP64_PPN) << PGSHIFT;
+            vm = get_field(env->hgatp, SATP64_MODE);
+        }
         widened = 2;
     }
     /* status.SUM will be ignored if execute on background */
-    sum = get_field(env->mstatus, MSTATUS_SUM) || use_background;
+    sum = get_field(env->mstatus, MSTATUS_SUM) || use_background || is_debug;
     switch (vm) {
     case VM_1_10_SV32:
       levels = 2; ptidxbits = 10; ptesize = 4; break;
@@ -585,7 +669,8 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
             /* Do the second stage translation on the base PTE address. */
             int vbase_ret = get_physical_address(env, &vbase, &vbase_prot,
                                                  base, NULL, MMU_DATA_LOAD,
-                                                 mmu_idx, false, true);
+                                                 mmu_idx, false, true,
+                                                 is_debug);
 
             if (vbase_ret != TRANSLATE_SUCCESS) {
                 if (fault_pte_addr) {
@@ -608,7 +693,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical,
         }
 
         target_ulong pte;
-        if (riscv_cpu_is_32bit(env)) {
+        if (riscv_cpu_mxl(env) == MXL_RV32) {
             pte = address_space_ldl(cs->as, pte_addr, attrs, &res);
         } else {
             pte = address_space_ldq(cs->as, pte_addr, attrs, &res);
@@ -860,16 +945,23 @@ static void raise_mmu_exception(CPURISCVState *env, target_ulong address,
                                 bool first_stage, bool two_stage)
 {
     CPUState *cs = env_cpu(env);
-    int page_fault_exceptions;
+    int page_fault_exceptions, vm;
+    uint64_t stap_mode;
+
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
+        stap_mode = SATP32_MODE;
+    } else {
+        stap_mode = SATP64_MODE;
+    }
+
     if (first_stage) {
-        page_fault_exceptions =
-            get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
-            !pmp_violation;
+        vm = get_field(env->satp, stap_mode);
     } else {
-        page_fault_exceptions =
-            get_field(env->hgatp, HGATP_MODE) != VM_1_10_MBARE &&
-            !pmp_violation;
+        vm = get_field(env->hgatp, stap_mode);
     }
+
+    page_fault_exceptions = vm != VM_1_10_MBARE && !pmp_violation;
+
     switch (access_type) {
     case MMU_INST_FETCH:
         if (riscv_cpu_virt_enabled(env) && !first_stage) {
@@ -929,13 +1021,13 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     int mmu_idx = cpu_mmu_index(&cpu->env, false);
 
     if (get_physical_address(env, &phys_addr, &prot, addr, NULL, 0, mmu_idx,
-                             true, riscv_cpu_virt_enabled(env))) {
+                             true, riscv_cpu_virt_enabled(env), true)) {
         return -1;
     }
 
     if (riscv_cpu_virt_enabled(env)) {
         if (get_physical_address(env, &phys_addr, &prot, phys_addr, NULL,
-                                 0, mmu_idx, false, true)) {
+                                 0, mmu_idx, false, true, true)) {
             return -1;
         }
     }
@@ -992,7 +1084,6 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                             riscv_cpu_two_stage_lookup(mmu_idx);
     riscv_raise_exception(env, cs->exception_index, retaddr);
 }
-#endif /* !CONFIG_USER_ONLY */
 
 static inline int rvfi_dii_check_addr(CPURISCVState *env, int ret, hwaddr *pa,
                                       vaddr address, int size, int *prot,
@@ -1034,7 +1125,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
-#ifndef CONFIG_USER_ONLY
     vaddr im_address;
     hwaddr pa = 0;
     int prot, prot2, prot_pmp, prot_lc_preserve, prot_sc_preserve;
@@ -1077,7 +1167,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
         /* Two stage lookup */
         ret = get_physical_address(env, &pa, &prot, address,
                                    &env->guest_phys_fault_addr, access_type,
-                                   mmu_idx, true, true);
+                                   mmu_idx, true, true, false);
 
         /*
          * A G-stage exception may be triggered during two state lookup.
@@ -1100,7 +1190,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
             im_address = pa;
 
             ret = get_physical_address(env, &pa, &prot2, im_address, NULL,
-                                       access_type, mmu_idx, false, true);
+                                       access_type, mmu_idx, false, true,
+                                       false);
 
             qemu_log_mask(CPU_LOG_MMU,
                     "%s 2nd-stage address=%" VADDR_PRIx " ret %d physical "
@@ -1161,7 +1252,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     } else {
         /* Single stage lookup */
         ret = get_physical_address(env, &pa, &prot, address, NULL,
-                                   access_type, mmu_idx, true, false);
+                                   access_type, mmu_idx, true, false, false);
         ret = rvfi_dii_check_addr(env, ret, &pa, address, size, &prot, access_type);
 
         qemu_log_mask(CPU_LOG_MMU,
@@ -1209,32 +1300,15 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     }
 
     return true;
-
-#else
-    switch (access_type) {
-    case MMU_INST_FETCH:
-        cs->exception_index = RISCV_EXCP_INST_PAGE_FAULT;
-        break;
-    case MMU_DATA_LOAD:
-        cs->exception_index = RISCV_EXCP_LOAD_PAGE_FAULT;
-        break;
-    case MMU_DATA_STORE:
-        cs->exception_index = RISCV_EXCP_STORE_PAGE_FAULT;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    env->badaddr = address;
-    cpu_loop_exit_restore(cs, retaddr);
-#endif
 }
+#endif /* !CONFIG_USER_ONLY */
 
 #ifdef TARGET_CHERI
 /* TODO(am2419): do we log PCC as a changed register? */
 #define riscv_update_pc_for_exc_handler(env, src_cap, new_pc)           \
     do {                                                                \
-        cheri_update_pcc_for_exc_handler(&env->PCC, src_cap, new_pc);   \
-        qemu_log_instr_dbg_cap(env, "PCC", &env->PCC);                  \
+        cheri_update_pcc_for_exc_handler(&env->pcc, src_cap, new_pc);   \
+        qemu_log_instr_dbg_cap(env, "PCC", &env->pcc);                  \
     } while (false)
 #else
 /*
@@ -1262,7 +1336,6 @@ void riscv_cpu_do_interrupt(CPUState *cs)
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
     tcg_debug_assert(pc_is_current(env));
-    bool force_hs_execp = riscv_cpu_force_hs_excep_enabled(env);
     uint64_t s;
 
     /* cs->exception is 32-bits wide unlike mcause which is XLEN-bits wide
@@ -1291,8 +1364,12 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         case RISCV_EXCP_INST_GUEST_PAGE_FAULT:
         case RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT:
         case RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT:
-            force_hs_execp = true;
-            /* fallthrough */
+        case RISCV_EXCP_INST_ADDR_MIS:
+        case RISCV_EXCP_INST_ACCESS_FAULT:
+        case RISCV_EXCP_LOAD_ADDR_MIS:
+        case RISCV_EXCP_STORE_AMO_ADDR_MIS:
+        case RISCV_EXCP_LOAD_ACCESS_FAULT:
+        case RISCV_EXCP_STORE_AMO_ACCESS_FAULT:
         case RISCV_EXCP_INST_PAGE_FAULT:
         case RISCV_EXCP_LOAD_PAGE_FAULT:
         case RISCV_EXCP_STORE_PAGE_FAULT:
@@ -1300,12 +1377,6 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         case RISCV_EXCP_LOAD_CAP_PAGE_FAULT:
         case RISCV_EXCP_STORE_AMO_CAP_PAGE_FAULT:
 #endif
-        case RISCV_EXCP_INST_ADDR_MIS:
-        case RISCV_EXCP_INST_ACCESS_FAULT:
-        case RISCV_EXCP_LOAD_ADDR_MIS:
-        case RISCV_EXCP_STORE_AMO_ADDR_MIS:
-        case RISCV_EXCP_LOAD_ACCESS_FAULT:
-        case RISCV_EXCP_STORE_AMO_ACCESS_FAULT:
         case RISCV_EXCP_ILLEGAL_INST:
             write_tval  = true;
             tval = env->badaddr;
@@ -1369,8 +1440,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
                 env->hstatus = set_field(env->hstatus, HSTATUS_GVA, 0);
             }
 
-            if (riscv_cpu_virt_enabled(env) && ((hdeleg >> cause) & 1) &&
-                !force_hs_execp) {
+            if (riscv_cpu_virt_enabled(env) && ((hdeleg >> cause) & 1)) {
                 /* Trap to VS mode */
                 /*
                  * See if we need to adjust cause. Yes if its VS mode interrupt
@@ -1392,7 +1462,6 @@ void riscv_cpu_do_interrupt(CPUState *cs)
                 htval = env->guest_phys_fault_addr;
 
                 riscv_cpu_set_virt_enabled(env, 0);
-                riscv_cpu_set_force_hs_excep(env, 0);
             } else {
                 /* Trap into HS mode */
                 env->hstatus = set_field(env->hstatus, HSTATUS_SPV, false);
@@ -1410,18 +1479,18 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         env->scause = cause | ((target_ulong)async << (TARGET_LONG_BITS - 1));
         riscv_log_instr_csr_changed(env, CSR_SCAUSE);
 
-        COPY_SPECIAL_REG(env, sepc, SEPCC, pc, PCC);
+        COPY_SPECIAL_REG(env, sepc, sepcc, pc, pcc);
         LOG_SPECIAL_REG(env, CSR_SEPC, CheriSCR_SEPCC);
 
-        env->sbadaddr = tval;
-        riscv_log_instr_csr_changed(env, CSR_SBADADDR);
+        env->stval = tval;
+        riscv_log_instr_csr_changed(env, CSR_STVAL);
         env->htval = htval;
         riscv_log_instr_csr_changed(env, CSR_HTVAL);
 
-        target_ulong stvec = GET_SPECIAL_REG_ADDR(env, stvec, STCC);
+        target_ulong stvec = GET_SPECIAL_REG_ADDR(env, stvec, stcc);
         target_ulong new_pc = (stvec >> 2 << 2) +
             ((async && (stvec & 3) == 1) ? cause * 4 : 0);
-        riscv_update_pc_for_exc_handler(env, &env->STCC, new_pc);
+        riscv_update_pc_for_exc_handler(env, &env->stcc, new_pc);
         riscv_cpu_set_mode(env, PRV_S);
     } else {
         /* handle the trap in M-mode */
@@ -1439,7 +1508,6 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 
             /* Trapping to M mode, virt is disabled */
             riscv_cpu_set_virt_enabled(env, 0);
-            riscv_cpu_set_force_hs_excep(env, 0);
         }
 
         s = env->mstatus;
@@ -1451,18 +1519,18 @@ void riscv_cpu_do_interrupt(CPUState *cs)
         env->mcause = cause | ~(((target_ulong)-1) >> async);
         riscv_log_instr_csr_changed(env, CSR_MCAUSE);
 
-        COPY_SPECIAL_REG(env, mepc, MEPCC, pc, PCC);
+        COPY_SPECIAL_REG(env, mepc, mepcc, pc, pcc);
         LOG_SPECIAL_REG(env, CSR_MEPC, CheriSCR_MEPCC);
 
-        env->mbadaddr = tval;
-        riscv_log_instr_csr_changed(env, CSR_MBADADDR);
+        env->mtval = tval;
+        riscv_log_instr_csr_changed(env, CSR_MTVAL);
         env->mtval2 = mtval2;
         riscv_log_instr_csr_changed(env, CSR_MTVAL2);
 
-        target_ulong mtvec = GET_SPECIAL_REG_ADDR(env, mtvec, MTCC);
+        target_ulong mtvec = GET_SPECIAL_REG_ADDR(env, mtvec, mtcc);
         target_ulong new_pc = (mtvec >> 2 << 2) +
             ((async && (mtvec & 3) == 1) ? cause * 4 : 0);
-        riscv_update_pc_for_exc_handler(env, &env->MTCC, new_pc);
+        riscv_update_pc_for_exc_handler(env, &env->mtcc, new_pc);
         riscv_cpu_set_mode(env, PRV_M);
     }
 
@@ -1470,10 +1538,10 @@ void riscv_cpu_do_interrupt(CPUState *cs)
     if (qemu_log_instr_enabled(env)) {
         if (async) {
             qemu_log_instr_interrupt(env, cause,
-                GET_SPECIAL_REG_ADDR(env, pc, PCC));
+                GET_SPECIAL_REG_ADDR(env, pc, pcc));
         } else {
             qemu_log_instr_exception(env, cause,
-                GET_SPECIAL_REG_ADDR(env, pc, PCC), tval);
+                GET_SPECIAL_REG_ADDR(env, pc, pcc), tval);
         }
     }
 #endif
@@ -1494,7 +1562,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 
     env->two_stage_lookup = false;
 #endif
-    cs->exception_index = EXCP_NONE; /* mark handled to qemu */
+    cs->exception_index = RISCV_EXCP_NONE; /* mark handled to qemu */
 }
 
 #ifdef TARGET_CHERI
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index aee5be58244..6f346899f05 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -58,29 +58,29 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
 }
 
 /* Predicates */
-static int fs(CPURISCVState *env, int csrno)
+static RISCVException fs(CPURISCVState *env, int csrno)
 {
 #if !defined(CONFIG_USER_ONLY)
     /* loose check condition for fcsr in vector extension */
-    if ((csrno == CSR_FCSR) && (env->misa & RVV)) {
-        return 0;
+    if ((csrno == CSR_FCSR) && (env->misa_ext & RVV)) {
+        return RISCV_EXCP_NONE;
     }
     if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 #endif
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int vs(CPURISCVState *env, int csrno)
+static RISCVException vs(CPURISCVState *env, int csrno)
 {
-    if (env->misa & RVV) {
-        return 0;
+    if (env->misa_ext & RVV) {
+        return RISCV_EXCP_NONE;
     }
-    return -RISCV_EXCP_ILLEGAL_INST;
+    return RISCV_EXCP_ILLEGAL_INST;
 }
 
-static int ctr(CPURISCVState *env, int csrno)
+static RISCVException ctr(CPURISCVState *env, int csrno)
 {
 #if !defined(CONFIG_USER_ONLY)
     CPUState *cs = env_cpu(env);
@@ -88,88 +88,88 @@ static int ctr(CPURISCVState *env, int csrno)
 
     if (!cpu->cfg.ext_counters) {
         /* The Counters extensions is not enabled */
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     if (riscv_cpu_virt_enabled(env)) {
         switch (csrno) {
         case CSR_CYCLE:
-            if (!get_field(env->hcounteren, HCOUNTEREN_CY) &&
-                get_field(env->mcounteren, HCOUNTEREN_CY)) {
-                return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+            if (!get_field(env->hcounteren, COUNTEREN_CY) &&
+                get_field(env->mcounteren, COUNTEREN_CY)) {
+                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
             }
             break;
         case CSR_TIME:
-            if (!get_field(env->hcounteren, HCOUNTEREN_TM) &&
-                get_field(env->mcounteren, HCOUNTEREN_TM)) {
-                return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+            if (!get_field(env->hcounteren, COUNTEREN_TM) &&
+                get_field(env->mcounteren, COUNTEREN_TM)) {
+                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
             }
             break;
         case CSR_INSTRET:
-            if (!get_field(env->hcounteren, HCOUNTEREN_IR) &&
-                get_field(env->mcounteren, HCOUNTEREN_IR)) {
-                return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+            if (!get_field(env->hcounteren, COUNTEREN_IR) &&
+                get_field(env->mcounteren, COUNTEREN_IR)) {
+                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
             }
             break;
         case CSR_HPMCOUNTER3...CSR_HPMCOUNTER31:
             if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3)) &&
                 get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3))) {
-                return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+                return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
             }
             break;
         }
-        if (riscv_cpu_is_32bit(env)) {
+        if (riscv_cpu_mxl(env) == MXL_RV32) {
             switch (csrno) {
             case CSR_CYCLEH:
-                if (!get_field(env->hcounteren, HCOUNTEREN_CY) &&
-                    get_field(env->mcounteren, HCOUNTEREN_CY)) {
-                    return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+                if (!get_field(env->hcounteren, COUNTEREN_CY) &&
+                    get_field(env->mcounteren, COUNTEREN_CY)) {
+                    return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
                 }
                 break;
             case CSR_TIMEH:
-                if (!get_field(env->hcounteren, HCOUNTEREN_TM) &&
-                    get_field(env->mcounteren, HCOUNTEREN_TM)) {
-                    return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+                if (!get_field(env->hcounteren, COUNTEREN_TM) &&
+                    get_field(env->mcounteren, COUNTEREN_TM)) {
+                    return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
                 }
                 break;
             case CSR_INSTRETH:
-                if (!get_field(env->hcounteren, HCOUNTEREN_IR) &&
-                    get_field(env->mcounteren, HCOUNTEREN_IR)) {
-                    return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+                if (!get_field(env->hcounteren, COUNTEREN_IR) &&
+                    get_field(env->mcounteren, COUNTEREN_IR)) {
+                    return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
                 }
                 break;
             case CSR_HPMCOUNTER3H...CSR_HPMCOUNTER31H:
                 if (!get_field(env->hcounteren, 1 << (csrno - CSR_HPMCOUNTER3H)) &&
                     get_field(env->mcounteren, 1 << (csrno - CSR_HPMCOUNTER3H))) {
-                    return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+                    return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
                 }
                 break;
             }
         }
     }
 #endif
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int ctr32(CPURISCVState *env, int csrno)
+static RISCVException ctr32(CPURISCVState *env, int csrno)
 {
-    if (!riscv_cpu_is_32bit(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+    if (riscv_cpu_mxl(env) != MXL_RV32) {
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     return ctr(env, csrno);
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static int any(CPURISCVState *env, int csrno)
+static RISCVException any(CPURISCVState *env, int csrno)
 {
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int any32(CPURISCVState *env, int csrno)
+static RISCVException any32(CPURISCVState *env, int csrno)
 {
-    if (!riscv_cpu_is_32bit(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+    if (riscv_cpu_mxl(env) != MXL_RV32) {
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     return any(env, csrno);
@@ -177,118 +177,135 @@ static int any32(CPURISCVState *env, int csrno)
 }
 
 #ifdef TARGET_CHERI
-static int umode(CPURISCVState *env, int csrno)
+static RISCVException umode(CPURISCVState *env, int csrno)
 {
-    return -!riscv_has_ext(env, RVU);
+    if (riscv_has_ext(env, RVU)) {
+        return RISCV_EXCP_NONE;
+    }
+
+    return RISCV_EXCP_ILLEGAL_INST;
 }
 #endif
 
-static int smode(CPURISCVState *env, int csrno)
+static RISCVException smode(CPURISCVState *env, int csrno)
 {
-    return -!riscv_has_ext(env, RVS);
+    if (riscv_has_ext(env, RVS)) {
+        return RISCV_EXCP_NONE;
+    }
+
+    return RISCV_EXCP_ILLEGAL_INST;
 }
 
-static int hmode(CPURISCVState *env, int csrno)
+static RISCVException hmode(CPURISCVState *env, int csrno)
 {
     if (riscv_has_ext(env, RVS) &&
         riscv_has_ext(env, RVH)) {
         /* Hypervisor extension is supported */
         if ((env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) ||
             env->priv == PRV_M) {
-            return 0;
+            return RISCV_EXCP_NONE;
         } else {
-            return -RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+            return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
         }
     }
 
-    return -RISCV_EXCP_ILLEGAL_INST;
+    return RISCV_EXCP_ILLEGAL_INST;
 }
 
-static int hmode32(CPURISCVState *env, int csrno)
+static RISCVException hmode32(CPURISCVState *env, int csrno)
 {
-    if (!riscv_cpu_is_32bit(env)) {
-        return 0;
+    if (riscv_cpu_mxl(env) != MXL_RV32) {
+        if (riscv_cpu_virt_enabled(env)) {
+            return RISCV_EXCP_ILLEGAL_INST;
+        } else {
+            return RISCV_EXCP_VIRT_INSTRUCTION_FAULT;
+        }
     }
 
     return hmode(env, csrno);
 
 }
 
-static int pmp(CPURISCVState *env, int csrno)
+/* Checks if PointerMasking registers could be accessed */
+static RISCVException pointer_masking(CPURISCVState *env, int csrno)
 {
-    return -!riscv_feature(env, RISCV_FEATURE_PMP);
+    /* Check if j-ext is present */
+    if (riscv_has_ext(env, RVJ)) {
+        return RISCV_EXCP_NONE;
+    }
+    return RISCV_EXCP_ILLEGAL_INST;
 }
-#endif
 
-/* User Floating-Point CSRs */
-static int read_fflags(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException pmp(CPURISCVState *env, int csrno)
 {
-#if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+    if (riscv_feature(env, RISCV_FEATURE_PMP)) {
+        return RISCV_EXCP_NONE;
     }
+
+    return RISCV_EXCP_ILLEGAL_INST;
+}
+
+static RISCVException epmp(CPURISCVState *env, int csrno)
+{
+    if (env->priv == PRV_M && riscv_feature(env, RISCV_FEATURE_EPMP)) {
+        return RISCV_EXCP_NONE;
+    }
+
+    return RISCV_EXCP_ILLEGAL_INST;
+}
 #endif
+
+/* User Floating-Point CSRs */
+static RISCVException read_fflags(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
+{
     *val = riscv_cpu_get_fflags(env);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_fflags(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_fflags(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
 #if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
     env->mstatus |= MSTATUS_FS;
 #endif
     riscv_cpu_set_fflags(env, val & (FSR_AEXC >> FSR_AEXC_SHIFT));
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_frm(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_frm(CPURISCVState *env, int csrno,
+                               target_ulong *val)
 {
-#if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
-#endif
     *val = env->frm;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_frm(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_frm(CPURISCVState *env, int csrno,
+                                target_ulong val)
 {
 #if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
     env->mstatus |= MSTATUS_FS;
 #endif
     env->frm = val & (FSR_RD >> FSR_RD_SHIFT);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_fcsr(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_fcsr(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
-#if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
-#endif
     *val = (riscv_cpu_get_fflags(env) << FSR_AEXC_SHIFT)
         | (env->frm << FSR_RD_SHIFT);
     if (vs(env, csrno) >= 0) {
         *val |= (env->vxrm << FSR_VXRM_SHIFT)
                 | (env->vxsat << FSR_VXSAT_SHIFT);
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_fcsr(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
 #if !defined(CONFIG_USER_ONLY)
-    if (!env->debugger && !riscv_cpu_fp_enabled(env)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
     env->mstatus |= MSTATUS_FS;
 #endif
     env->frm = (val & FSR_RD) >> FSR_RD_SHIFT;
@@ -297,59 +314,68 @@ static int write_fcsr(CPURISCVState *env, int csrno, target_ulong val)
         env->vxsat = (val & FSR_VXSAT) >> FSR_VXSAT_SHIFT;
     }
     riscv_cpu_set_fflags(env, (val & FSR_AEXC) >> FSR_AEXC_SHIFT);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vtype(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vtype(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = env->vtype;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vl(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vl(CPURISCVState *env, int csrno,
+                              target_ulong *val)
 {
     *val = env->vl;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vxrm(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vxrm(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
     *val = env->vxrm;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vxrm(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vxrm(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
     env->vxrm = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vxsat(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vxsat(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = env->vxsat;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vxsat(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vxsat(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     env->vxsat = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vstart(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vstart(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->vstart;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vstart(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vstart(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     env->vstart = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* User Timers and Counters */
-static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_instret(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
 #if !defined(CONFIG_USER_ONLY)
     if (icount_enabled()) {
@@ -360,10 +386,11 @@ static int read_instret(CPURISCVState *env, int csrno, target_ulong *val)
 #else
     *val = cpu_get_host_ticks();
 #endif
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_instreth(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_instreth(CPURISCVState *env, int csrno,
+                                    target_ulong *val)
 {
 #if !defined(CONFIG_USER_ONLY)
     if (icount_enabled()) {
@@ -374,46 +401,50 @@ static int read_instreth(CPURISCVState *env, int csrno, target_ulong *val)
 #else
     *val = cpu_get_host_ticks() >> 32;
 #endif
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 #if defined(CONFIG_USER_ONLY)
-static int read_time(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_time(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
     *val = cpu_get_host_ticks();
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_timeh(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_timeh(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = cpu_get_host_ticks() >> 32;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 #else /* CONFIG_USER_ONLY */
 
-static int read_time(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_time(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
     uint64_t delta = riscv_cpu_virt_enabled(env) ? env->htimedelta : 0;
 
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     *val = env->rdtime_fn(env->rdtime_fn_arg) + delta;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_timeh(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_timeh(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     uint64_t delta = riscv_cpu_virt_enabled(env) ? env->htimedelta : 0;
 
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     *val = (env->rdtime_fn(env->rdtime_fn_arg) + delta) >> 32;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Machine constants */
@@ -424,43 +455,53 @@ static int read_timeh(CPURISCVState *env, int csrno, target_ulong *val)
 
 static const target_ulong delegable_ints = S_MODE_INTERRUPTS |
                                            VS_MODE_INTERRUPTS;
+static const target_ulong vs_delegable_ints = VS_MODE_INTERRUPTS;
 static const target_ulong all_ints = M_MODE_INTERRUPTS | S_MODE_INTERRUPTS |
                                      VS_MODE_INTERRUPTS;
-static const target_ulong delegable_excps =
-    (1ULL << (RISCV_EXCP_INST_ADDR_MIS)) |
-    (1ULL << (RISCV_EXCP_INST_ACCESS_FAULT)) |
-    (1ULL << (RISCV_EXCP_ILLEGAL_INST)) |
-    (1ULL << (RISCV_EXCP_BREAKPOINT)) |
-    (1ULL << (RISCV_EXCP_LOAD_ADDR_MIS)) |
-    (1ULL << (RISCV_EXCP_LOAD_ACCESS_FAULT)) |
-    (1ULL << (RISCV_EXCP_STORE_AMO_ADDR_MIS)) |
-    (1ULL << (RISCV_EXCP_STORE_AMO_ACCESS_FAULT)) |
-    (1ULL << (RISCV_EXCP_U_ECALL)) |
-    (1ULL << (RISCV_EXCP_S_ECALL)) |
-    (1ULL << (RISCV_EXCP_VS_ECALL)) |
-    (1ULL << (RISCV_EXCP_M_ECALL)) |
-    (1ULL << (RISCV_EXCP_INST_PAGE_FAULT)) |
-    (1ULL << (RISCV_EXCP_LOAD_PAGE_FAULT)) |
-    (1ULL << (RISCV_EXCP_STORE_PAGE_FAULT)) |
-    (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) |
-    (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) |
-    (1ULL << (RISCV_EXCP_VIRT_INSTRUCTION_FAULT)) |
-    (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)) |
+
 #ifdef TARGET_CHERI
 #ifndef TARGET_RISCV32
-    (1ULL << (RISCV_EXCP_LOAD_CAP_PAGE_FAULT)) |
-    (1ULL << (RISCV_EXCP_STORE_AMO_CAP_PAGE_FAULT)) |
+#define CHERI_DELEGABLE_EXCPS ( \
+        (1ULL << (RISCV_EXCP_LOAD_CAP_PAGE_FAULT)) | \
+        (1ULL << (RISCV_EXCP_STORE_AMO_CAP_PAGE_FAULT)) | \
+        (1ULL << (RISCV_EXCP_CHERI)))
+#else
+#define CHERI_DELEGABLE_EXCPS (1ULL << (RISCV_EXCP_CHERI))
 #endif
-    (1ULL << (RISCV_EXCP_CHERI)) |
+#else
+#define CHERI_DELEGABLE_EXCPS 0
 #endif
-    0;
+#define DELEGABLE_EXCPS ((1ULL << (RISCV_EXCP_INST_ADDR_MIS)) | \
+                         (1ULL << (RISCV_EXCP_INST_ACCESS_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_ILLEGAL_INST)) | \
+                         (1ULL << (RISCV_EXCP_BREAKPOINT)) | \
+                         (1ULL << (RISCV_EXCP_LOAD_ADDR_MIS)) | \
+                         (1ULL << (RISCV_EXCP_LOAD_ACCESS_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_STORE_AMO_ADDR_MIS)) | \
+                         (1ULL << (RISCV_EXCP_STORE_AMO_ACCESS_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_U_ECALL)) | \
+                         (1ULL << (RISCV_EXCP_S_ECALL)) | \
+                         (1ULL << (RISCV_EXCP_VS_ECALL)) | \
+                         (1ULL << (RISCV_EXCP_M_ECALL)) | \
+                         (1ULL << (RISCV_EXCP_INST_PAGE_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_LOAD_PAGE_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_STORE_PAGE_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_VIRT_INSTRUCTION_FAULT)) | \
+                         (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)) | \
+                         (CHERI_DELEGABLE_EXCPS))
+static const target_ulong vs_delegable_excps = DELEGABLE_EXCPS &
+    ~((1ULL << (RISCV_EXCP_S_ECALL)) |
+      (1ULL << (RISCV_EXCP_VS_ECALL)) |
+      (1ULL << (RISCV_EXCP_M_ECALL)) |
+      (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) |
+      (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) |
+      (1ULL << (RISCV_EXCP_VIRT_INSTRUCTION_FAULT)) |
+      (1ULL << (RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT)));
 static const target_ulong sstatus_v1_10_mask = SSTATUS_SIE | SSTATUS_SPIE |
     SSTATUS_UIE | SSTATUS_UPIE | SSTATUS_SPP | SSTATUS_FS | SSTATUS_XS |
-    SSTATUS_SUM | SSTATUS_MXR |
-#if defined(TARGET_RISCV64)
-    SSTATUS64_UXL |
-#endif
-    SSTATUS_SD;
+    SSTATUS_SUM | SSTATUS_MXR | (target_ulong)SSTATUS64_UXL;
 static const target_ulong sip_writable_mask = SIP_SSIP | MIP_USIP | MIP_UEIP;
 static const target_ulong hip_writable_mask = MIP_VSSIP;
 static const target_ulong hvip_writable_mask = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP;
@@ -479,38 +520,60 @@ static const char valid_vm_1_10_64[16] = {
 };
 
 /* Machine Information Registers */
-static int read_zero(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_zero(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
-    return *val = 0;
+    *val = 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mhartid(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mhartid(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->mhartid;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Machine Trap Setup */
-static int read_mstatus(CPURISCVState *env, int csrno, target_ulong *val)
+
+/* We do not store SD explicitly, only compute it on demand. */
+static uint64_t add_status_sd(RISCVMXL xl, uint64_t status)
+{
+    if ((status & MSTATUS_FS) == MSTATUS_FS ||
+        (status & MSTATUS_XS) == MSTATUS_XS) {
+        switch (xl) {
+        case MXL_RV32:
+            return status | MSTATUS32_SD;
+        case MXL_RV64:
+            return status | MSTATUS64_SD;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    return status;
+}
+
+static RISCVException read_mstatus(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
-    *val = env->mstatus;
-    return 0;
+    *val = add_status_sd(riscv_cpu_mxl(env), env->mstatus);
+    return RISCV_EXCP_NONE;
 }
 
 static int validate_vm(CPURISCVState *env, target_ulong vm)
 {
-    if (riscv_cpu_is_32bit(env)) {
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
         return valid_vm_1_10_32[vm & 0xf];
     } else {
         return valid_vm_1_10_64[vm & 0xf];
     }
 }
 
-static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mstatus(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     uint64_t mstatus = env->mstatus;
     uint64_t mask = 0;
-    int dirty;
 
     /* flush tlb on mstatus fields that affect VM */
     if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
@@ -522,7 +585,7 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
         MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
         MSTATUS_TW;
 
-    if (!riscv_cpu_is_32bit(env)) {
+    if (riscv_cpu_mxl(env) != MXL_RV32) {
         /*
          * RV32: MPV and GVA are not in mstatus. The current plan is to
          * add them to mstatush. For now, we just don't support it.
@@ -534,21 +597,25 @@ static int write_mstatus(CPURISCVState *env, int csrno, target_ulong val)
 
     mstatus = (mstatus & ~mask) | (val & mask);
 
-    dirty = ((mstatus & MSTATUS_FS) == MSTATUS_FS) |
-            ((mstatus & MSTATUS_XS) == MSTATUS_XS);
-    mstatus = set_field(mstatus, MSTATUS_SD, dirty);
+    if (riscv_cpu_mxl(env) == MXL_RV64) {
+        /* SXL and UXL fields are for now read only */
+        mstatus = set_field(mstatus, MSTATUS64_SXL, MXL_RV64);
+        mstatus = set_field(mstatus, MSTATUS64_UXL, MXL_RV64);
+    }
     env->mstatus = mstatus;
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mstatush(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mstatush(CPURISCVState *env, int csrno,
+                                    target_ulong *val)
 {
     *val = env->mstatus >> 32;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mstatush(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mstatush(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
     uint64_t valh = (uint64_t)val << 32;
     uint64_t mask = MSTATUS_MPV | MSTATUS_GVA;
@@ -559,20 +626,37 @@ static int write_mstatush(CPURISCVState *env, int csrno, target_ulong val)
 
     env->mstatus = (env->mstatus & ~mask) | (valh & mask);
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_misa(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_misa(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
-    *val = env->misa;
-    return 0;
+    target_ulong misa;
+
+    switch (env->misa_mxl) {
+    case MXL_RV32:
+        misa = (target_ulong)MXL_RV32 << 30;
+        break;
+#ifdef TARGET_RISCV64
+    case MXL_RV64:
+        misa = (target_ulong)MXL_RV64 << 62;
+        break;
+#endif
+    default:
+        g_assert_not_reached();
+    }
+
+    *val = misa | env->misa_ext;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_misa(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_misa(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
     if (!riscv_feature(env, RISCV_FEATURE_MISA)) {
         /* drop write to misa */
-        return 0;
+        return RISCV_EXCP_NONE;
     }
 
     /*
@@ -590,7 +674,7 @@ static int write_misa(CPURISCVState *env, int csrno, target_ulong val)
     /* 'I' or 'E' must be present */
     if (!(val & (RVI | RVE))) {
         /* It is not, drop write to misa */
-        return 0;
+        return RISCV_EXCP_NONE;
     }
 
     /* 'E' excludes all other extensions */
@@ -598,11 +682,16 @@ static int write_misa(CPURISCVState *env, int csrno, target_ulong val)
         /* when we support 'E' we can do "val = RVE;" however
          * for now we just drop writes if 'E' is present.
          */
-        return 0;
+        return RISCV_EXCP_NONE;
     }
 
+    /*
+     * misa.MXL writes are not supported by QEMU.
+     * Drop writes to those bits.
+     */
+
     /* Mask extensions that are not supported by this hart */
-    val &= env->misa_mask;
+    val &= env->misa_ext_mask;
 
     /* Mask extensions that are not supported by QEMU */
     val &= (RVI | RVE | RVM | RVA | RVF | RVD | RVC | RVS | RVU);
@@ -619,162 +708,159 @@ static int write_misa(CPURISCVState *env, int csrno, target_ulong val)
         val &= ~RVC;
     }
 
-    /* misa.MXL writes are not supported by QEMU */
-    val = (env->misa & MISA_MXL) | (val & ~MISA_MXL);
-
-    /* flush translation cache */
-    if (val != env->misa) {
-        tb_flush(env_cpu(env));
+    /* If nothing changed, do nothing. */
+    if (val == env->misa_ext) {
+        return RISCV_EXCP_NONE;
     }
 
-    env->misa = val;
-
-    return 0;
+    /* flush translation cache */
+    tb_flush(env_cpu(env));
+    env->misa_ext = val;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_medeleg(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_medeleg(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->medeleg;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_medeleg(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_medeleg(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
-    env->medeleg = (env->medeleg & ~delegable_excps) | (val & delegable_excps);
-    return 0;
+    env->medeleg = (env->medeleg & ~DELEGABLE_EXCPS) | (val & DELEGABLE_EXCPS);
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mideleg(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mideleg(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->mideleg;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mideleg(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mideleg(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     env->mideleg = (env->mideleg & ~delegable_ints) | (val & delegable_ints);
     if (riscv_has_ext(env, RVH)) {
         env->mideleg |= VS_MODE_INTERRUPTS;
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mie(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mie(CPURISCVState *env, int csrno,
+                               target_ulong *val)
 {
     *val = env->mie;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mie(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mie(CPURISCVState *env, int csrno,
+                                target_ulong val)
 {
     env->mie = (env->mie & ~all_ints) | (val & all_ints);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mtvec(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mtvec(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, mtvec, MTCC);
-    return 0;
+    *val = GET_SPECIAL_REG_ARCH(env, mtvec, mtcc);
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mtvec(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mtvec(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     /* bits [1:0] encode mode; 0 = direct, 1 = vectored, 2 >= reserved */
     if ((val & 3) < 2) {
-        SET_SPECIAL_REG(env, mtvec, MTCC, val);
+        SET_SPECIAL_REG(env, mtvec, mtcc, val);
     } else {
         qemu_log_mask(LOG_UNIMP, "CSR_MTVEC: reserved mode not supported\n");
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mcounteren(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mcounteren(CPURISCVState *env, int csrno,
+                                      target_ulong *val)
 {
     *val = env->mcounteren;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mcounteren(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mcounteren(CPURISCVState *env, int csrno,
+                                       target_ulong val)
 {
     env->mcounteren = val;
-    return 0;
-}
-
-/* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */
-static int read_mscounteren(CPURISCVState *env, int csrno, target_ulong *val)
-{
-    if (env->priv_ver < PRIV_VERSION_1_11_0) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
-    *val = env->mcounteren;
-    return 0;
-}
-
-/* This regiser is replaced with CSR_MCOUNTINHIBIT in 1.11.0 */
-static int write_mscounteren(CPURISCVState *env, int csrno, target_ulong val)
-{
-    if (env->priv_ver < PRIV_VERSION_1_11_0) {
-        return -RISCV_EXCP_ILLEGAL_INST;
-    }
-    env->mcounteren = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Machine Trap Handling */
-static int read_mscratch(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mscratch(CPURISCVState *env, int csrno,
+                                    target_ulong *val)
 {
     *val = env->mscratch;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mscratch(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mscratch(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
     env->mscratch = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mepc(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mepc(CPURISCVState *env, int csrno,
+                                     target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, mepc, MEPCC);
+    *val = GET_SPECIAL_REG_ARCH(env, mepc, mepcc);
     // RISC-V privileged spec 3.1.15 Machine Exception Program Counter (mepc):
     // "The low bit of mepc (mepc[0]) is always zero. [...] Whenever IALIGN=32,
     // mepc[1] is masked on reads so that it appears to be 0."
     *val &= ~(target_ulong)(riscv_has_ext(env, RVC) ? 1 : 3);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mepc(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mepc(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
-    SET_SPECIAL_REG(env, mepc, MEPCC, val);
-    return 0;
+    SET_SPECIAL_REG(env, mepc, mepcc, val);
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mcause(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mcause(CPURISCVState *env, int csrno,
+                                     target_ulong *val)
 {
     *val = env->mcause;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mcause(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mcause(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
     env->mcause = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mbadaddr(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mtval(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
-    *val = env->mbadaddr;
-    return 0;
+    *val = env->mtval;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mbadaddr(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mtval(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
-    env->mbadaddr = val;
-    return 0;
+    env->mtval = val;
+    return RISCV_EXCP_NONE;
 }
 
-static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                   target_ulong new_value, target_ulong write_mask)
+static RISCVException rmw_mip(CPURISCVState *env, int csrno,
+                              target_ulong *ret_value,
+                              target_ulong new_value, target_ulong write_mask)
 {
     RISCVCPU *cpu = env_archcpu(env);
     /* Allow software control of delegable interrupts not claimed by hardware */
@@ -791,42 +877,49 @@ static int rmw_mip(CPURISCVState *env, int csrno, target_ulong *ret_value,
         *ret_value = old_mip;
     }
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Supervisor Trap Setup */
-static int read_sstatus(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_sstatus(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     target_ulong mask = (sstatus_v1_10_mask);
-    *val = env->mstatus & mask;
-    return 0;
+
+    /* TODO: Use SXL not MXL. */
+    *val = add_status_sd(riscv_cpu_mxl(env), env->mstatus & mask);
+    return RISCV_EXCP_NONE;
 }
 
-static int write_sstatus(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_sstatus(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     target_ulong mask = (sstatus_v1_10_mask);
     target_ulong newval = (env->mstatus & ~mask) | (val & mask);
     return write_mstatus(env, CSR_MSTATUS, newval);
 }
 
-static int read_vsie(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vsie(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
     /* Shift the VS bits to their S bit location in vsie */
     *val = (env->mie & env->hideleg & VS_MODE_INTERRUPTS) >> 1;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_sie(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_sie(CPURISCVState *env, int csrno,
+                               target_ulong *val)
 {
     if (riscv_cpu_virt_enabled(env)) {
         read_vsie(env, CSR_VSIE, val);
     } else {
         *val = env->mie & env->mideleg;
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vsie(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vsie(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
     /* Shift the S bits to their VS bit location in mie */
     target_ulong newval = (env->mie & ~VS_MODE_INTERRUPTS) |
@@ -844,105 +937,122 @@ static int write_sie(CPURISCVState *env, int csrno, target_ulong val)
         write_mie(env, CSR_MIE, newval);
     }
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_stvec(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_stvec(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, stvec, STCC);
-    return 0;
+    *val = GET_SPECIAL_REG_ARCH(env, stvec, stcc);
+    return RISCV_EXCP_NONE;
 }
 
-static int write_stvec(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_stvec(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     /* bits [1:0] encode mode; 0 = direct, 1 = vectored, 2 >= reserved */
     if ((val & 3) < 2) {
-        SET_SPECIAL_REG(env, stvec, STCC, val);
+        SET_SPECIAL_REG(env, stvec, stcc, val);
     } else {
         qemu_log_mask(LOG_UNIMP, "CSR_STVEC: reserved mode not supported\n");
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_scounteren(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_scounteren(CPURISCVState *env, int csrno,
+                                      target_ulong *val)
 {
     *val = env->scounteren;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_scounteren(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_scounteren(CPURISCVState *env, int csrno,
+                                       target_ulong val)
 {
     env->scounteren = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Supervisor Trap Handling */
-static int read_sscratch(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_sscratch(CPURISCVState *env, int csrno,
+                                    target_ulong *val)
 {
     *val = env->sscratch;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_sscratch(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_sscratch(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
     env->sscratch = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_sepc(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_sepc(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, sepc, SEPCC);
+    *val = GET_SPECIAL_REG_ARCH(env, sepc, sepcc);
     // RISC-V privileged spec 4.1.7 Supervisor Exception Program Counter (sepc)
     // "The low bit of sepc (sepc[0]) is always zero. [...] Whenever IALIGN=32,
     // sepc[1] is masked on reads so that it appears to be 0."
     *val &= ~(target_ulong)(riscv_has_ext(env, RVC) ? 1 : 3);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_sepc(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_sepc(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
-    SET_SPECIAL_REG(env, sepc, SEPCC, val);
-    return 0;
+    SET_SPECIAL_REG(env, sepc, sepcc, val);
+    return RISCV_EXCP_NONE;
 }
 
-static int read_scause(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_scause(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->scause;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_scause(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_scause(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     env->scause = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_sbadaddr(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_stval(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
-    *val = env->sbadaddr;
-    return 0;
+    *val = env->stval;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_sbadaddr(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_stval(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
-    env->sbadaddr = val;
-    return 0;
+    env->stval = val;
+    return RISCV_EXCP_NONE;
 }
 
-static int rmw_vsip(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                    target_ulong new_value, target_ulong write_mask)
+static RISCVException rmw_vsip(CPURISCVState *env, int csrno,
+                               target_ulong *ret_value,
+                               target_ulong new_value, target_ulong write_mask)
 {
     /* Shift the S bits to their VS bit location in mip */
     int ret = rmw_mip(env, 0, ret_value, new_value << 1,
                       (write_mask << 1) & vsip_writable_mask & env->hideleg);
-    *ret_value &= VS_MODE_INTERRUPTS;
-    /* Shift the VS bits to their S bit location in vsip */
-    *ret_value >>= 1;
+
+    if (ret_value) {
+        *ret_value &= VS_MODE_INTERRUPTS;
+        /* Shift the VS bits to their S bit location in vsip */
+        *ret_value >>= 1;
+    }
     return ret;
 }
 
-static int rmw_sip(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                   target_ulong new_value, target_ulong write_mask)
+static RISCVException rmw_sip(CPURISCVState *env, int csrno,
+                              target_ulong *ret_value,
+                              target_ulong new_value, target_ulong write_mask)
 {
     int ret;
 
@@ -953,398 +1063,714 @@ static int rmw_sip(CPURISCVState *env, int csrno, target_ulong *ret_value,
                       write_mask & env->mideleg & sip_writable_mask);
     }
 
-    *ret_value &= env->mideleg;
+    if (ret_value) {
+        *ret_value &= env->mideleg;
+    }
     return ret;
 }
 
 /* Supervisor Protection and Translation */
-static int read_satp(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_satp(CPURISCVState *env, int csrno,
+                                target_ulong *val)
 {
     if (!riscv_feature(env, RISCV_FEATURE_MMU)) {
         *val = 0;
-        return 0;
+        return RISCV_EXCP_NONE;
     }
 
     if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     } else {
         *val = env->satp;
     }
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_satp(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_satp(CPURISCVState *env, int csrno,
+                                 target_ulong val)
 {
+    target_ulong vm, mask, asid;
+
     if (!riscv_feature(env, RISCV_FEATURE_MMU)) {
-        return 0;
+        return RISCV_EXCP_NONE;
+    }
+
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
+        vm = validate_vm(env, get_field(val, SATP32_MODE));
+        mask = (val ^ env->satp) & (SATP32_MODE | SATP32_ASID | SATP32_PPN);
+        asid = (val ^ env->satp) & SATP32_ASID;
+    } else {
+        vm = validate_vm(env, get_field(val, SATP64_MODE));
+        mask = (val ^ env->satp) & (SATP64_MODE | SATP64_ASID | SATP64_PPN);
+        asid = (val ^ env->satp) & SATP64_ASID;
     }
-    if (validate_vm(env, get_field(val, SATP_MODE)) &&
-        ((val ^ env->satp) & (SATP_MODE | SATP_ASID | SATP_PPN)))
-    {
+
+    if (vm && mask) {
         if (env->priv == PRV_S && get_field(env->mstatus, MSTATUS_TVM)) {
-            return -RISCV_EXCP_ILLEGAL_INST;
+            return RISCV_EXCP_ILLEGAL_INST;
         } else {
-            if ((val ^ env->satp) & SATP_ASID) {
+            if (asid) {
                 tlb_flush(env_cpu(env));
             }
             env->satp = val;
         }
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Hypervisor Extensions */
-static int read_hstatus(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hstatus(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->hstatus;
-    if (!riscv_cpu_is_32bit(env)) {
+    if (riscv_cpu_mxl(env) != MXL_RV32) {
         /* We only support 64-bit VSXL */
         *val = set_field(*val, HSTATUS_VSXL, 2);
     }
     /* We only support little endian */
     *val = set_field(*val, HSTATUS_VSBE, 0);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hstatus(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hstatus(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     env->hstatus = val;
-    if (!riscv_cpu_is_32bit(env) && get_field(val, HSTATUS_VSXL) != 2) {
+    if (riscv_cpu_mxl(env) != MXL_RV32 && get_field(val, HSTATUS_VSXL) != 2) {
         qemu_log_mask(LOG_UNIMP, "QEMU does not support mixed HSXLEN options.");
     }
     if (get_field(val, HSTATUS_VSBE) != 0) {
         qemu_log_mask(LOG_UNIMP, "QEMU does not support big endian guests.");
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_hedeleg(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hedeleg(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->hedeleg;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hedeleg(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hedeleg(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
-    env->hedeleg = val;
-    return 0;
+    env->hedeleg = val & vs_delegable_excps;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_hideleg(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hideleg(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->hideleg;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hideleg(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hideleg(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
-    env->hideleg = val;
-    return 0;
+    env->hideleg = val & vs_delegable_ints;
+    return RISCV_EXCP_NONE;
 }
 
-static int rmw_hvip(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                   target_ulong new_value, target_ulong write_mask)
+static RISCVException rmw_hvip(CPURISCVState *env, int csrno,
+                               target_ulong *ret_value,
+                               target_ulong new_value, target_ulong write_mask)
 {
     int ret = rmw_mip(env, 0, ret_value, new_value,
                       write_mask & hvip_writable_mask);
 
-    *ret_value &= hvip_writable_mask;
-
+    if (ret_value) {
+        *ret_value &= hvip_writable_mask;
+    }
     return ret;
 }
 
-static int rmw_hip(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                   target_ulong new_value, target_ulong write_mask)
+static RISCVException rmw_hip(CPURISCVState *env, int csrno,
+                              target_ulong *ret_value,
+                              target_ulong new_value, target_ulong write_mask)
 {
     int ret = rmw_mip(env, 0, ret_value, new_value,
                       write_mask & hip_writable_mask);
 
-    *ret_value &= hip_writable_mask;
-
+    if (ret_value) {
+        *ret_value &= hip_writable_mask;
+    }
     return ret;
 }
 
-static int read_hie(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hie(CPURISCVState *env, int csrno,
+                               target_ulong *val)
 {
     *val = env->mie & VS_MODE_INTERRUPTS;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hie(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hie(CPURISCVState *env, int csrno,
+                                target_ulong val)
 {
     target_ulong newval = (env->mie & ~VS_MODE_INTERRUPTS) | (val & VS_MODE_INTERRUPTS);
     return write_mie(env, CSR_MIE, newval);
 }
 
-static int read_hcounteren(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hcounteren(CPURISCVState *env, int csrno,
+                                      target_ulong *val)
 {
     *val = env->hcounteren;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hcounteren(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hcounteren(CPURISCVState *env, int csrno,
+                                       target_ulong val)
 {
     env->hcounteren = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_hgeie(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException write_hgeie(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
-    qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
-    return 0;
-}
-
-static int write_hgeie(CPURISCVState *env, int csrno, target_ulong val)
-{
-    qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
-    return 0;
+    if (val) {
+        qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
+    }
+    return RISCV_EXCP_NONE;
 }
 
-static int read_htval(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_htval(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = env->htval;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_htval(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_htval(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     env->htval = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_htinst(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_htinst(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->htinst;
-    return 0;
-}
-
-static int write_htinst(CPURISCVState *env, int csrno, target_ulong val)
-{
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_hgeip(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException write_htinst(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
-    qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hgeip(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hgeip(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
-    qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
-    return 0;
+    if (val) {
+        qemu_log_mask(LOG_UNIMP, "No support for a non-zero GEILEN.");
+    }
+    return RISCV_EXCP_NONE;
 }
 
-static int read_hgatp(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_hgatp(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = env->hgatp;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_hgatp(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_hgatp(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     env->hgatp = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_htimedelta(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_htimedelta(CPURISCVState *env, int csrno,
+                                      target_ulong *val)
 {
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     *val = env->htimedelta;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_htimedelta(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_htimedelta(CPURISCVState *env, int csrno,
+                                       target_ulong val)
 {
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
-    if (riscv_cpu_is_32bit(env)) {
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
         env->htimedelta = deposit64(env->htimedelta, 0, 32, (uint64_t)val);
     } else {
         env->htimedelta = val;
     }
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_htimedeltah(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_htimedeltah(CPURISCVState *env, int csrno,
+                                       target_ulong *val)
 {
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     *val = env->htimedelta >> 32;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_htimedeltah(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_htimedeltah(CPURISCVState *env, int csrno,
+                                        target_ulong val)
 {
     if (!env->rdtime_fn) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     env->htimedelta = deposit64(env->htimedelta, 32, 32, (uint64_t)val);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Virtual CSR Registers */
-static int read_vsstatus(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vsstatus(CPURISCVState *env, int csrno,
+                                    target_ulong *val)
 {
     *val = env->vsstatus;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vsstatus(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vsstatus(CPURISCVState *env, int csrno,
+                                     target_ulong val)
 {
     uint64_t mask = (target_ulong)-1;
     env->vsstatus = (env->vsstatus & ~mask) | (uint64_t)val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 static int read_vstvec(CPURISCVState *env, int csrno, target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, vstvec, VSTCC);
-    return 0;
+    *val = GET_SPECIAL_REG_ARCH(env, vstvec, vstcc);
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vstvec(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vstvec(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
-    SET_SPECIAL_REG(env, vstvec, VSTCC, val);
-    return 0;
+    SET_SPECIAL_REG(env, vstvec, vstcc, val);
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vsscratch(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vsscratch(CPURISCVState *env, int csrno,
+                                     target_ulong *val)
 {
     *val = env->vsscratch;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vsscratch(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vsscratch(CPURISCVState *env, int csrno,
+                                      target_ulong val)
 {
     env->vsscratch = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vsepc(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vsepc(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
-    *val = GET_SPECIAL_REG_ARCH(env, vsepc, VSEPCC);
-    return 0;
+    *val = GET_SPECIAL_REG_ARCH(env, vsepc, vsepcc);
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vsepc(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vsepc(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
-    SET_SPECIAL_REG(env, vsepc, VSEPCC, val);
-    return 0;
+    SET_SPECIAL_REG(env, vsepc, vsepcc, val);
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vscause(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vscause(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = env->vscause;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vscause(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vscause(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     env->vscause = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vstval(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vstval(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->vstval;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vstval(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vstval(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     env->vstval = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_vsatp(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_vsatp(CPURISCVState *env, int csrno,
+                                 target_ulong *val)
 {
     *val = env->vsatp;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_vsatp(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_vsatp(CPURISCVState *env, int csrno,
+                                  target_ulong val)
 {
     env->vsatp = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mtval2(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mtval2(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->mtval2;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mtval2(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mtval2(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     env->mtval2 = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_mtinst(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mtinst(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = env->mtinst;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_mtinst(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_mtinst(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     env->mtinst = val;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /* Physical Memory Protection */
-static int read_pmpcfg(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_mseccfg(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = mseccfg_csr_read(env);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mseccfg(CPURISCVState *env, int csrno,
+                         target_ulong val)
+{
+    mseccfg_csr_write(env, val);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_pmpcfg(CPURISCVState *env, int csrno,
+                                  target_ulong *val)
 {
     *val = pmpcfg_csr_read(env, csrno - CSR_PMPCFG0);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_pmpcfg(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_pmpcfg(CPURISCVState *env, int csrno,
+                                   target_ulong val)
 {
     pmpcfg_csr_write(env, csrno - CSR_PMPCFG0, val);
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (qemu_log_instr_enabled(env)) {
-        char buf[16];
-        snprintf(buf, sizeof(buf), "pmpcfg%d", csrno - CSR_PMPCFG0);
-        qemu_log_instr_reg(env, buf, val);
-    }
-#endif
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int read_pmpaddr(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_pmpaddr(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
 {
     *val = pmpaddr_csr_read(env, csrno - CSR_PMPADDR0);
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_pmpaddr(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_pmpaddr(CPURISCVState *env, int csrno,
+                                    target_ulong val)
 {
     pmpaddr_csr_write(env, csrno - CSR_PMPADDR0, val);
-#ifdef CONFIG_TCG_LOG_INSTR
-    if (qemu_log_instr_enabled(env)) {
-        char buf[16];
-        snprintf(buf, sizeof(buf), "pmpaddr%d", csrno - CSR_PMPADDR0);
-        qemu_log_instr_reg(env, buf, val);
+    return RISCV_EXCP_NONE;
+}
+
+/*
+ * Functions to access Pointer Masking feature registers
+ * We have to check if current priv lvl could modify
+ * csr in given mode
+ */
+static bool check_pm_current_disabled(CPURISCVState *env, int csrno)
+{
+    int csr_priv = get_field(csrno, 0x300);
+    int pm_current;
+
+    /*
+     * If priv lvls differ that means we're accessing csr from higher priv lvl,
+     * so allow the access
+     */
+    if (env->priv != csr_priv) {
+        return false;
     }
-#endif
-    return 0;
+    switch (env->priv) {
+    case PRV_M:
+        pm_current = get_field(env->mmte, M_PM_CURRENT);
+        break;
+    case PRV_S:
+        pm_current = get_field(env->mmte, S_PM_CURRENT);
+        break;
+    case PRV_U:
+        pm_current = get_field(env->mmte, U_PM_CURRENT);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    /* It's same priv lvl, so we allow to modify csr only if pm.current==1 */
+    return !pm_current;
+}
+
+static RISCVException read_mmte(CPURISCVState *env, int csrno,
+                                target_ulong *val)
+{
+    *val = env->mmte & MMTE_MASK;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mmte(CPURISCVState *env, int csrno,
+                                 target_ulong val)
+{
+    uint64_t mstatus;
+    target_ulong wpri_val = val & MMTE_MASK;
+
+    if (val != wpri_val) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s" TARGET_FMT_lx " %s" TARGET_FMT_lx "\n",
+                      "MMTE: WPRI violation written 0x", val,
+                      "vs expected 0x", wpri_val);
+    }
+    /* for machine mode pm.current is hardwired to 1 */
+    wpri_val |= MMTE_M_PM_CURRENT;
+
+    /* hardwiring pm.instruction bit to 0, since it's not supported yet */
+    wpri_val &= ~(MMTE_M_PM_INSN | MMTE_S_PM_INSN | MMTE_U_PM_INSN);
+    env->mmte = wpri_val | PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_smte(CPURISCVState *env, int csrno,
+                                target_ulong *val)
+{
+    *val = env->mmte & SMTE_MASK;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_smte(CPURISCVState *env, int csrno,
+                                 target_ulong val)
+{
+    target_ulong wpri_val = val & SMTE_MASK;
+
+    if (val != wpri_val) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s" TARGET_FMT_lx " %s" TARGET_FMT_lx "\n",
+                      "SMTE: WPRI violation written 0x", val,
+                      "vs expected 0x", wpri_val);
+    }
+
+    /* if pm.current==0 we can't modify current PM CSRs */
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+
+    wpri_val |= (env->mmte & ~SMTE_MASK);
+    write_mmte(env, csrno, wpri_val);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_umte(CPURISCVState *env, int csrno,
+                                target_ulong *val)
+{
+    *val = env->mmte & UMTE_MASK;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_umte(CPURISCVState *env, int csrno,
+                                 target_ulong val)
+{
+    target_ulong wpri_val = val & UMTE_MASK;
+
+    if (val != wpri_val) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s" TARGET_FMT_lx " %s" TARGET_FMT_lx "\n",
+                      "UMTE: WPRI violation written 0x", val,
+                      "vs expected 0x", wpri_val);
+    }
+
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+
+    wpri_val |= (env->mmte & ~UMTE_MASK);
+    write_mmte(env, csrno, wpri_val);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_mpmmask(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->mpmmask;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mpmmask(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    env->mpmmask = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_spmmask(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->spmmask;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_spmmask(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    /* if pm.current==0 we can't modify current PM CSRs */
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+    env->spmmask = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_upmmask(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->upmmask;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_upmmask(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    /* if pm.current==0 we can't modify current PM CSRs */
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+    env->upmmask = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_mpmbase(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->mpmbase;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_mpmbase(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    env->mpmbase = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_spmbase(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->spmbase;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_spmbase(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    /* if pm.current==0 we can't modify current PM CSRs */
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+    env->spmbase = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException read_upmbase(CPURISCVState *env, int csrno,
+                                   target_ulong *val)
+{
+    *val = env->upmbase;
+    return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_upmbase(CPURISCVState *env, int csrno,
+                                    target_ulong val)
+{
+    uint64_t mstatus;
+
+    /* if pm.current==0 we can't modify current PM CSRs */
+    if (check_pm_current_disabled(env, csrno)) {
+        return RISCV_EXCP_NONE;
+    }
+    env->upmbase = val;
+    env->mmte |= PM_EXT_DIRTY;
+
+    /* Set XS and SD bits, since PM CSRs are dirty */
+    mstatus = env->mstatus | MSTATUS_XS;
+    write_mstatus(env, csrno, mstatus);
+    return RISCV_EXCP_NONE;
 }
 
 #endif
 
 #ifdef TARGET_CHERI
-static int read_ccsr(CPURISCVState *env, int csrno, target_ulong *val)
+static RISCVException read_ccsr(CPURISCVState *env, int csrno, target_ulong *val)
 {
     // We report the same values for all modes and don't perform dirty tracking
     // The capability cause has moved to xTVAL so we don't report it here.
@@ -1362,17 +1788,17 @@ static int read_ccsr(CPURISCVState *env, int csrno, target_ulong *val)
 
     qemu_log_mask(CPU_LOG_INT, "Reading xCCSR(%#x): %x\n", csrno, (int)ccsr);
     *val = ccsr;
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
-static int write_ccsr(CPURISCVState *env, int csrno, target_ulong val)
+static RISCVException write_ccsr(CPURISCVState *env, int csrno, target_ulong val)
 {
     switch (csrno) {
     default:
         error_report("Attempting to write " TARGET_FMT_lx
                      "to xCCSR(%#x), this is not supported (yet?).",
                      val, csrno);
-        return -1;
+        return RISCV_EXCP_INST_ACCESS_FAULT;
 #if !defined(TARGET_RISCV32)
     case CSR_SCCSR: {
         static const target_ulong gclgmask = (SCCSR_SGCLG | SCCSR_UGCLG);
@@ -1391,7 +1817,7 @@ static int write_ccsr(CPURISCVState *env, int csrno, target_ulong val)
 #endif
     }
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 static inline bool csr_needs_asr(CPURISCVState *env, int csrno) {
@@ -1432,17 +1858,18 @@ static inline bool csr_needs_asr(CPURISCVState *env, int csrno) {
  * csrrc  <->  riscv_csrrw(env, csrno, ret_value, 0, value);
  */
 
-int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                target_ulong new_value, target_ulong write_mask, uintptr_t retpc)
+RISCVException riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
+                           target_ulong new_value, target_ulong write_mask,
+                           uintptr_t retpc)
 {
-    int ret;
+    RISCVException ret;
     target_ulong old_value;
     RISCVCPU *cpu = env_archcpu(env);
+    int read_only = get_field(csrno, 0xC00) == 3;
 
-    /* check privileges and return -1 if check fails */
+    /* check privileges and return RISCV_EXCP_ILLEGAL_INST if check fails */
 #if !defined(CONFIG_USER_ONLY)
     int effective_priv = env->priv;
-    int read_only = get_field(csrno, 0xC00) == 3;
 
     if (riscv_has_ext(env, RVH) &&
         env->priv == PRV_S &&
@@ -1455,23 +1882,25 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
         effective_priv++;
     }
 
-    if ((write_mask && read_only) ||
-        (!env->debugger && (effective_priv < get_field(csrno, 0x300)))) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+    if (!env->debugger && (effective_priv < get_field(csrno, 0x300))) {
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 #endif
+    if (write_mask && read_only) {
+        return RISCV_EXCP_ILLEGAL_INST;
+    }
 
     /* ensure the CSR extension is enabled. */
     if (!cpu->cfg.ext_icsr) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
 
     /* check predicate */
     if (!csr_ops[csrno].predicate) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
     ret = csr_ops[csrno].predicate(env, csrno);
-    if (ret < 0) {
+    if (ret != RISCV_EXCP_NONE) {
         return ret;
     }
 
@@ -1483,7 +1912,7 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
     if (!cheri_have_access_sysregs(env) && csr_needs_asr(env, csrno)) {
 #if !defined(CONFIG_USER_ONLY)
         if (env->debugger) {
-            return -1;
+            return RISCV_EXCP_INST_ACCESS_FAULT;
         }
         raise_cheri_exception_impl(env, CapEx_AccessSystemRegsViolation,
                                    /*regnum=*/0, 0, true, retpc);
@@ -1504,12 +1933,11 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
 
     /* if no accessor exists then return failure */
     if (!csr_ops[csrno].read) {
-        return -RISCV_EXCP_ILLEGAL_INST;
+        return RISCV_EXCP_ILLEGAL_INST;
     }
-
     /* read old value */
     ret = csr_ops[csrno].read(env, csrno, &old_value);
-    if (ret < 0) {
+    if (ret != RISCV_EXCP_NONE) {
         return ret;
     }
 
@@ -1518,7 +1946,7 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
         new_value = (old_value & ~write_mask) | (new_value & write_mask);
         if (csr_ops[csrno].write) {
             ret = csr_ops[csrno].write(env, csrno, new_value);
-            if (ret < 0) {
+            if (ret != RISCV_EXCP_NONE) {
                 return ret;
             }
 #ifdef CONFIG_TCG_LOG_INSTR
@@ -1533,17 +1961,19 @@ int riscv_csrrw(CPURISCVState *env, int csrno, target_ulong *ret_value,
         *ret_value = old_value;
     }
 
-    return 0;
+    return RISCV_EXCP_NONE;
 }
 
 /*
  * Debugger support.  If not in user mode, set env->debugger before the
  * riscv_csrrw call and clear it after the call.
  */
-int riscv_csrrw_debug(CPURISCVState *env, int csrno, target_ulong *ret_value,
-                target_ulong new_value, target_ulong write_mask)
+RISCVException riscv_csrrw_debug(CPURISCVState *env, int csrno,
+                                 target_ulong *ret_value,
+                                 target_ulong new_value,
+                                 target_ulong write_mask)
 {
-    int ret;
+    RISCVException ret;
 #if !defined(CONFIG_USER_ONLY)
     env->debugger = true;
 #endif
@@ -1560,7 +1990,7 @@ static void log_changed_csr_fn(CPURISCVState *env, int csrno,
                                target_ulong value)
 {
     if (qemu_log_instr_enabled(env)) {
-        qemu_log_instr_reg(env, csr_ops[csrno].csr_name, value);
+        qemu_log_instr_reg(env, csr_ops[csrno].name, value);
     }
 }
 #else
@@ -1568,18 +1998,18 @@ static void log_changed_csr_fn(CPURISCVState *env, int csrno,
 #endif
 
 /* Define csr_ops entry for read-only CSR register */
-#define CSR_OP_FN_R(pred, readfn, name)                            \
+#define CSR_OP_FN_R(pred, readfn, csr_name)                        \
     {.predicate=pred, .read=readfn, .write=NULL, .op=NULL,         \
-     .log_update=NULL, .csr_name=name}
+     .log_update=NULL, .name=csr_name}
 
 /* Shorthand for functions following the read_<csr> pattern */
 #define CSR_OP_R(pred, name)                                    \
     CSR_OP_FN_R(pred, glue(read_, name), stringify(name))
 
 /* Internal - use CSR_OP_FN_RW, CSR_OP_RW and CSR_OP_NOLOG_RW */
-#define _CSR_OP_FN_RW(pred, readfn, writefn, logfn, name)          \
+#define _CSR_OP_FN_RW(pred, readfn, writefn, logfn, csr_name)      \
     {.predicate=pred, .read=readfn, .write=writefn,                \
-     .op=NULL, .log_update=logfn, .csr_name=name}
+     .op=NULL, .log_update=logfn, .name=csr_name}
 
 /* Define csr_ops entry for read-write CSR register */
 #define CSR_OP_FN_RW(pred, readfn, writefn, name)                  \
@@ -1598,11 +2028,14 @@ static void log_changed_csr_fn(CPURISCVState *env, int csrno,
     _CSR_OP_FN_RW(pred, glue(read_, name), glue(write_, name),     \
                   NULL, stringify(name))
 
+#define CSR_OP_NOLOG_FN_RW(pred, readfn, writefn, name)         \
+    _CSR_OP_FN_RW(pred, readfn, writefn, NULL, stringify(name))
+
 /* Define csr_ops entry for read-modify-write CSR register */
-#define CSR_OP_RMW(pred, name)                                     \
+#define CSR_OP_RMW(pred, csr_name)                                 \
     {.predicate=pred, .read=NULL, .write=NULL,                     \
-     .op=glue(rmw_, name), .log_update=log_changed_csr_fn,         \
-     .csr_name=stringify(name)}
+     .op=glue(rmw_, csr_name), .log_update=log_changed_csr_fn,     \
+     .name=stringify(csr_name)}
 
 /* Control and Status Register function table */
 riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
@@ -1654,13 +2087,11 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 
     [CSR_MSTATUSH] =            CSR_OP_RW(any32, mstatush),
 
-    [CSR_MSCOUNTEREN] =         CSR_OP_RW(any, mscounteren),
-
     /* Machine Trap Handling */
     [CSR_MSCRATCH] =            CSR_OP_RW(any, mscratch),
     [CSR_MEPC] =                CSR_OP_RW(any, mepc),
     [CSR_MCAUSE] =              CSR_OP_RW(any, mcause),
-    [CSR_MBADADDR] =            CSR_OP_RW(any, mbadaddr),
+    [CSR_MTVAL] =               CSR_OP_RW(any, mtval),
     [CSR_MIP] =                 CSR_OP_RMW(any, mip),
 
     /* Supervisor Trap Setup */
@@ -1673,7 +2104,7 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_SSCRATCH] =            CSR_OP_RW(smode, sscratch),
     [CSR_SEPC] =                CSR_OP_RW(smode, sepc),
     [CSR_SCAUSE] =              CSR_OP_RW(smode, scause),
-    [CSR_SBADADDR] =            CSR_OP_RW(smode, sbadaddr),
+    [CSR_STVAL] =               CSR_OP_RW(smode, stval),
     [CSR_SIP] =                 CSR_OP_RMW(smode, sip),
 
     /* Supervisor Protection and Translation */
@@ -1686,10 +2117,10 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
     [CSR_HIP] =                 CSR_OP_RMW(hmode, hip),
     [CSR_HIE] =                 CSR_OP_RW(hmode, hie),
     [CSR_HCOUNTEREN] =          CSR_OP_RW(hmode, hcounteren),
-    [CSR_HGEIE] =               CSR_OP_RW(hmode, hgeie),
+    [CSR_HGEIE] =               CSR_OP_FN_RW(hmode, read_zero, write_hgeie, "hgeie"),
     [CSR_HTVAL] =               CSR_OP_RW(hmode, htval),
     [CSR_HTINST] =              CSR_OP_RW(hmode, htinst),
-    [CSR_HGEIP] =               CSR_OP_RW(hmode, hgeip),
+    [CSR_HGEIP] =               CSR_OP_FN_RW(hmode, read_zero, write_hgeip, "hgeip"),
     [CSR_HGATP] =               CSR_OP_RW(hmode, hgatp),
     [CSR_HTIMEDELTA] =          CSR_OP_RW(hmode, htimedelta),
     [CSR_HTIMEDELTAH] =         CSR_OP_RW(hmode32, htimedeltah),
@@ -1717,8 +2148,40 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 #endif
 
     /* Physical Memory Protection */
-    [CSR_PMPCFG0  ... CSR_PMPCFG3] =  CSR_OP_NOLOG_RW(pmp, pmpcfg),
-    [CSR_PMPADDR0 ... CSR_PMPADDR15] = CSR_OP_NOLOG_RW(pmp, pmpaddr),
+    [CSR_MSECCFG]    = CSR_OP_FN_RW(epmp, read_mseccfg, write_mseccfg, "mseccfg"),
+    [CSR_PMPCFG0]    = CSR_OP_FN_RW(pmp, read_pmpcfg, write_pmpcfg, "pmpcfg0"),
+    [CSR_PMPCFG1]    = CSR_OP_FN_RW(pmp, read_pmpcfg, write_pmpcfg, "pmpcfg1"),
+    [CSR_PMPCFG2]    = CSR_OP_FN_RW(pmp, read_pmpcfg, write_pmpcfg, "pmpcfg2"),
+    [CSR_PMPCFG3]    = CSR_OP_FN_RW(pmp, read_pmpcfg, write_pmpcfg, "pmpcfg3"),
+    [CSR_PMPADDR0]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr0"),
+    [CSR_PMPADDR1]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr1"),
+    [CSR_PMPADDR2]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr2"),
+    [CSR_PMPADDR3]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr3"),
+    [CSR_PMPADDR4]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr4"),
+    [CSR_PMPADDR5]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr5"),
+    [CSR_PMPADDR6]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr6"),
+    [CSR_PMPADDR7]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr7"),
+    [CSR_PMPADDR8]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr8"),
+    [CSR_PMPADDR9]   = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr9"),
+    [CSR_PMPADDR10]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr10"),
+    [CSR_PMPADDR11]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr11"),
+    [CSR_PMPADDR12]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr12"),
+    [CSR_PMPADDR13]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr13"),
+    [CSR_PMPADDR14]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr14"),
+    [CSR_PMPADDR15]  = CSR_OP_FN_RW(pmp, read_pmpaddr, write_pmpaddr, "pmpaddr15"),
+
+    /* User Pointer Masking */
+    [CSR_UMTE]    =    { "umte",    pointer_masking, read_umte,    write_umte    },
+    [CSR_UPMMASK] =    { "upmmask", pointer_masking, read_upmmask, write_upmmask },
+    [CSR_UPMBASE] =    { "upmbase", pointer_masking, read_upmbase, write_upmbase },
+    /* Machine Pointer Masking */
+    [CSR_MMTE]    =    { "mmte",    pointer_masking, read_mmte,    write_mmte    },
+    [CSR_MPMMASK] =    { "mpmmask", pointer_masking, read_mpmmask, write_mpmmask },
+    [CSR_MPMBASE] =    { "mpmbase", pointer_masking, read_mpmbase, write_mpmbase },
+    /* Supervisor Pointer Masking */
+    [CSR_SMTE]    =    { "smte",    pointer_masking, read_smte,    write_smte    },
+    [CSR_SPMMASK] =    { "spmmask", pointer_masking, read_spmmask, write_spmmask },
+    [CSR_SPMBASE] =    { "spmbase", pointer_masking, read_spmbase, write_spmbase },
 
     /* Performance Counters */
     [CSR_HPMCOUNTER3   ... CSR_HPMCOUNTER31] =    CSR_OP_FN_R(ctr, read_zero, "hpmcounterN"),
diff --git a/target/riscv/fpu_helper.c b/target/riscv/fpu_helper.c
index 7c4ab92ecbe..d62f4709002 100644
--- a/target/riscv/fpu_helper.c
+++ b/target/riscv/fpu_helper.c
@@ -174,14 +174,18 @@ uint64_t helper_fmin_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(rs1);
     float32 frs2 = check_nanbox_s(rs2);
-    return nanbox_s(float32_minnum(frs1, frs2, &env->fp_status));
+    return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ?
+                    float32_minnum(frs1, frs2, &env->fp_status) :
+                    float32_minimum_number(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fmax_s(CPURISCVState *env, uint64_t rs1, uint64_t rs2)
 {
     float32 frs1 = check_nanbox_s(rs1);
     float32 frs2 = check_nanbox_s(rs2);
-    return nanbox_s(float32_maxnum(frs1, frs2, &env->fp_status));
+    return nanbox_s(env->priv_ver < PRIV_VERSION_1_11_0 ?
+                    float32_maxnum(frs1, frs2, &env->fp_status) :
+                    float32_maximum_number(frs1, frs2, &env->fp_status));
 }
 
 uint64_t helper_fsqrt_s(CPURISCVState *env, uint64_t rs1)
@@ -223,13 +227,13 @@ target_ulong helper_fcvt_wu_s(CPURISCVState *env, uint64_t rs1)
     return (int32_t)float32_to_uint32(frs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1)
+target_ulong helper_fcvt_l_s(CPURISCVState *env, uint64_t rs1)
 {
     float32 frs1 = check_nanbox_s(rs1);
     return float32_to_int64(frs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1)
+target_ulong helper_fcvt_lu_s(CPURISCVState *env, uint64_t rs1)
 {
     float32 frs1 = check_nanbox_s(rs1);
     return float32_to_uint64(frs1, &env->fp_status);
@@ -245,12 +249,12 @@ uint64_t helper_fcvt_s_wu(CPURISCVState *env, target_ulong rs1)
     return nanbox_s(uint32_to_float32((uint32_t)rs1, &env->fp_status));
 }
 
-uint64_t helper_fcvt_s_l(CPURISCVState *env, uint64_t rs1)
+uint64_t helper_fcvt_s_l(CPURISCVState *env, target_ulong rs1)
 {
     return nanbox_s(int64_to_float32(rs1, &env->fp_status));
 }
 
-uint64_t helper_fcvt_s_lu(CPURISCVState *env, uint64_t rs1)
+uint64_t helper_fcvt_s_lu(CPURISCVState *env, target_ulong rs1)
 {
     return nanbox_s(uint64_to_float32(rs1, &env->fp_status));
 }
@@ -283,12 +287,16 @@ uint64_t helper_fdiv_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 
 uint64_t helper_fmin_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float64_minnum(frs1, frs2, &env->fp_status);
+    return env->priv_ver < PRIV_VERSION_1_11_0 ?
+            float64_minnum(frs1, frs2, &env->fp_status) :
+            float64_minimum_number(frs1, frs2, &env->fp_status);
 }
 
 uint64_t helper_fmax_d(CPURISCVState *env, uint64_t frs1, uint64_t frs2)
 {
-    return float64_maxnum(frs1, frs2, &env->fp_status);
+    return env->priv_ver < PRIV_VERSION_1_11_0 ?
+            float64_maxnum(frs1, frs2, &env->fp_status) :
+            float64_maximum_number(frs1, frs2, &env->fp_status);
 }
 
 uint64_t helper_fcvt_s_d(CPURISCVState *env, uint64_t rs1)
@@ -332,12 +340,12 @@ target_ulong helper_fcvt_wu_d(CPURISCVState *env, uint64_t frs1)
     return (int32_t)float64_to_uint32(frs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_l_d(CPURISCVState *env, uint64_t frs1)
+target_ulong helper_fcvt_l_d(CPURISCVState *env, uint64_t frs1)
 {
     return float64_to_int64(frs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_lu_d(CPURISCVState *env, uint64_t frs1)
+target_ulong helper_fcvt_lu_d(CPURISCVState *env, uint64_t frs1)
 {
     return float64_to_uint64(frs1, &env->fp_status);
 }
@@ -352,12 +360,12 @@ uint64_t helper_fcvt_d_wu(CPURISCVState *env, target_ulong rs1)
     return uint32_to_float64((uint32_t)rs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_d_l(CPURISCVState *env, uint64_t rs1)
+uint64_t helper_fcvt_d_l(CPURISCVState *env, target_ulong rs1)
 {
     return int64_to_float64(rs1, &env->fp_status);
 }
 
-uint64_t helper_fcvt_d_lu(CPURISCVState *env, uint64_t rs1)
+uint64_t helper_fcvt_d_lu(CPURISCVState *env, target_ulong rs1)
 {
     return uint64_to_float64(rs1, &env->fp_status);
 }
diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index 358ac023fb5..b040f810231 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -33,7 +33,7 @@ int riscv_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
     if (n < 32) {
         return gdb_get_regl(mem_buf, gpr_int_value(env, n));
     } else if (n == 32) {
-        return gdb_get_regl(mem_buf, GET_SPECIAL_REG_ARCH(env, pc, PCC));
+        return gdb_get_regl(mem_buf, GET_SPECIAL_REG_ARCH(env, pc, pcc));
     }
     return 0;
 }
@@ -51,7 +51,7 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         return sizeof(target_ulong);
     } else if (n == 32) {
         /* TODO(am2419): arguably we don't want to log changes from gdb */
-        SET_SPECIAL_REG(env, pc, PCC, ldtul_p(mem_buf));
+        SET_SPECIAL_REG(env, pc, pcc, ldtul_p(mem_buf));
         return sizeof(target_ulong);
     }
     return 0;
@@ -60,10 +60,10 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 static int riscv_gdb_get_fpu(CPURISCVState *env, GByteArray *buf, int n)
 {
     if (n < 32) {
-        if (env->misa & RVD) {
+        if (env->misa_ext & RVD) {
             return gdb_get_reg64(buf, env->fpr[n]);
         }
-        if (env->misa & RVF) {
+        if (env->misa_ext & RVF) {
             return gdb_get_reg32(buf, env->fpr[n]);
         }
     /* there is hole between ft11 and fflags in fpu.xml */
@@ -77,7 +77,7 @@ static int riscv_gdb_get_fpu(CPURISCVState *env, GByteArray *buf, int n)
          */
         result = riscv_csrrw_debug(env, n - 32, &val,
                                    0, 0);
-        if (result == 0) {
+        if (result == RISCV_EXCP_NONE) {
             return gdb_get_regl(buf, val);
         }
     }
@@ -100,7 +100,7 @@ static int riscv_gdb_set_fpu(CPURISCVState *env, uint8_t *mem_buf, int n)
          */
         result = riscv_csrrw_debug(env, n - 32, NULL,
                                    val, -1);
-        if (result == 0) {
+        if (result == RISCV_EXCP_NONE) {
             return sizeof(target_ulong);
         }
     }
@@ -114,7 +114,7 @@ static int riscv_gdb_get_csr(CPURISCVState *env, GByteArray *buf, int n)
         int result;
 
         result = riscv_csrrw_debug(env, n, &val, 0, 0);
-        if (result == 0) {
+        if (result == RISCV_EXCP_NONE) {
             return gdb_get_regl(buf, val);
         }
     }
@@ -128,7 +128,7 @@ static int riscv_gdb_set_csr(CPURISCVState *env, uint8_t *mem_buf, int n)
         int result;
 
         result = riscv_csrrw_debug(env, n, NULL, val, -1);
-        if (result == 0) {
+        if (result == RISCV_EXCP_NONE) {
             return sizeof(target_ulong);
         }
     }
@@ -202,7 +202,7 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int base_reg)
     CPURISCVState *env = &cpu->env;
     GString *s = g_string_new(NULL);
     riscv_csr_predicate_fn predicate;
-    int bitsize = riscv_cpu_is_32bit(env) ? 32 : 64;
+    int bitsize = 16 << env->misa_mxl_max;
     int i;
 
     g_string_printf(s, "<?xml version=\"1.0\"?>");
@@ -211,7 +211,7 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int base_reg)
 
     for (i = 0; i < CSR_TABLE_SIZE; i++) {
         predicate = csr_ops[i].predicate;
-        if (predicate && !predicate(env, i)) {
+        if (predicate && (predicate(env, i) == RISCV_EXCP_NONE)) {
             if (csr_ops[i].name) {
                 g_string_append_printf(s, "<reg name=\"%s\"", csr_ops[i].name);
             } else {
@@ -228,14 +228,82 @@ static int riscv_gen_dynamic_csr_xml(CPUState *cs, int base_reg)
     return CSR_TABLE_SIZE;
 }
 
+#if defined(TARGET_CHERI)
+static struct SCR {
+    int index;
+    const char *name;
+    bool code;
+} scrs[] = {
+    { .index = CheriSCR_UTCC, .name = "utcc", .code = true },
+    { .index = CheriSCR_UTDC, .name = "utdc"},
+    { .index = CheriSCR_UScratchC, .name = "uscratchc"},
+    { .index = CheriSCR_UEPCC, .name = "uepcc", .code = true },
+
+    { .index = CheriSCR_STCC, .name = "stcc", .code = true },
+    { .index = CheriSCR_STDC, .name = "stdc"},
+    { .index = CheriSCR_SScratchC, .name = "sscratchc"},
+    { .index = CheriSCR_SEPCC, .name = "sepcc", .code = true },
+
+    { .index = CheriSCR_MTCC, .name = "mtcc", .code = true },
+    { .index = CheriSCR_MTDC, .name = "mtdc"},
+    { .index = CheriSCR_MScratchC, .name = "mscratchc"},
+    { .index = CheriSCR_MEPCC, .name = "mepcc", .code = true },
+};
+
+static int riscv_gdb_get_scr(CPURISCVState *env, GByteArray *buf, int n)
+{
+    if (n < ARRAY_SIZE(scrs)) {
+        cap_register_t *scr = riscv_get_scr(env, scrs[n].index);
+        return gdb_get_capreg(buf, scr);
+    }
+    return 0;
+}
+
+static int riscv_gdb_set_scr(CPURISCVState *env, uint8_t *mem_buf, int n)
+{
+    /* All CHERI registers are read-only currently.  */
+    if (n < ARRAY_SIZE(scrs)) {
+        return CHERI_CAP_SIZE + 1;
+    }
+    return 0;
+}
+
+static int riscv_gen_dynamic_scr_xml(CPUState *cs, int base_reg)
+{
+    RISCVCPU *cpu = RISCV_CPU(cs);
+    CPURISCVState *env = &cpu->env;
+    GString *s = g_string_new(NULL);
+    int bitsize = riscv_cpu_mxl(env) == MXL_RV32 ? 64 : 128;
+    int i;
+
+    g_string_printf(s, "<?xml version=\"1.0\"?>");
+    g_string_append_printf(s, "<!DOCTYPE feature SYSTEM \"gdb-target.dtd\">");
+    g_string_append_printf(s, "<feature name=\"org.gnu.gdb.riscv.scr\">");
+
+    for (i = 0; i < ARRAY_SIZE(scrs); i++) {
+        g_string_append_printf(s, "<reg name=\"%s\"", scrs[i].name);
+        g_string_append_printf(s, " bitsize=\"%d\"", bitsize);
+        g_string_append_printf(s, " type=\"%s_capability\"",
+                               scrs[i].code ? "code" : "data");
+        g_string_append_printf(s, " group=\"system\"");
+        g_string_append_printf(s, " regnum=\"%d\"/>", base_reg + i);
+    }
+
+    g_string_append_printf(s, "</feature>");
+
+    cpu->dyn_scr_xml = g_string_free(s, false);
+    return ARRAY_SIZE(scrs);
+}
+#endif
+
 void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
 {
     RISCVCPU *cpu = RISCV_CPU(cs);
     CPURISCVState *env = &cpu->env;
-    if (env->misa & RVD) {
+    if (env->misa_ext & RVD) {
         gdb_register_coprocessor(cs, riscv_gdb_get_fpu, riscv_gdb_set_fpu,
                                  36, "riscv-64bit-fpu.xml", 0);
-    } else if (env->misa & RVF) {
+    } else if (env->misa_ext & RVF) {
         gdb_register_coprocessor(cs, riscv_gdb_get_fpu, riscv_gdb_set_fpu,
                                  36, "riscv-32bit-fpu.xml", 0);
     }
@@ -261,4 +329,9 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
     gdb_register_coprocessor(cs, riscv_gdb_get_csr, riscv_gdb_set_csr,
                              riscv_gen_dynamic_csr_xml(cs, cs->gdb_num_regs),
                              "riscv-csr.xml", 0);
+#if defined(TARGET_CHERI)
+    gdb_register_coprocessor(cs, riscv_gdb_get_scr, riscv_gdb_set_scr,
+                             riscv_gen_dynamic_scr_xml(cs, cs->gdb_num_regs),
+                             "riscv-scr.xml", 0);
+#endif
 }
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 31fad8b2c8f..fb1eaec7103 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -28,12 +28,12 @@ DEF_HELPER_FLAGS_3(flt_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(feq_s, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_w_s, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_wu_s, TCG_CALL_NO_RWG, tl, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_l_s, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_lu_s, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_l_s, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_s, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_s_w, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_s_wu, TCG_CALL_NO_RWG, i64, env, tl)
-DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_s_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_s_lu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_1(fclass_s, TCG_CALL_NO_RWG_SE, tl, i64)
 
 /* Floating Point - Double Precision */
@@ -51,12 +51,12 @@ DEF_HELPER_FLAGS_3(flt_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_3(feq_d, TCG_CALL_NO_RWG, tl, env, i64, i64)
 DEF_HELPER_FLAGS_2(fcvt_w_d, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_wu_d, TCG_CALL_NO_RWG, tl, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_l_d, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_lu_d, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_l_d, TCG_CALL_NO_RWG, tl, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_lu_d, TCG_CALL_NO_RWG, tl, env, i64)
 DEF_HELPER_FLAGS_2(fcvt_d_w, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_2(fcvt_d_wu, TCG_CALL_NO_RWG, i64, env, tl)
-DEF_HELPER_FLAGS_2(fcvt_d_l, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(fcvt_d_l, TCG_CALL_NO_RWG, i64, env, tl)
+DEF_HELPER_FLAGS_2(fcvt_d_lu, TCG_CALL_NO_RWG, i64, env, tl)
 DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64)
 
 #ifdef TARGET_CHERI
@@ -78,10 +78,14 @@ DEF_HELPER_FLAGS_3(riscv_log_gpr_write, TCG_CALL_NO_RWG, void, env, i32, tl)
 DEF_HELPER_FLAGS_4(riscv_log_instr, TCG_CALL_NO_RWG, void, env, tl, i32, i32)
 #endif
 
+/* Bitmanip */
+DEF_HELPER_FLAGS_2(clmul, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(clmulr, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+
 /* Special functions */
-DEF_HELPER_3(csrrw, tl, env, tl, tl)
-DEF_HELPER_4(csrrs, tl, env, tl, tl, tl)
-DEF_HELPER_4(csrrc, tl, env, tl, tl, tl)
+DEF_HELPER_2(csrr, tl, env, int)
+DEF_HELPER_3(csrw, void, env, int, tl)
+DEF_HELPER_4(csrrw, tl, env, int, tl, tl)
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_2(sret, tl, env, tl)
 DEF_HELPER_2(mret, tl, env, tl)
@@ -261,7 +265,6 @@ DEF_HELPER_5(vlhuff_v_w, void, ptr, ptr, tl, env, i32)
 DEF_HELPER_5(vlhuff_v_d, void, ptr, ptr, tl, env, i32)
 DEF_HELPER_5(vlwuff_v_w, void, ptr, ptr, tl, env, i32)
 DEF_HELPER_5(vlwuff_v_d, void, ptr, ptr, tl, env, i32)
-#ifdef TARGET_RISCV64
 DEF_HELPER_6(vamoswapw_v_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamoswapd_v_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamoaddw_v_d,  void, ptr, ptr, tl, ptr, env, i32)
@@ -280,7 +283,6 @@ DEF_HELPER_6(vamominuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamominud_v_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamomaxuw_v_d, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamomaxud_v_d, void, ptr, ptr, tl, ptr, env, i32)
-#endif
 DEF_HELPER_6(vamoswapw_v_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamoaddw_v_w,  void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vamoxorw_v_w,  void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/helper_utils.h b/target/riscv/helper_utils.h
index da704ec2696..345e5d094a7 100644
--- a/target/riscv/helper_utils.h
+++ b/target/riscv/helper_utils.h
@@ -67,7 +67,7 @@ static inline void riscv_update_pc(CPURISCVState *env, target_ulong pc_addr,
                                    bool can_be_unrepresentable)
 {
 #ifdef TARGET_CHERI
-    cheri_update_pcc(&env->PCC, pc_addr, can_be_unrepresentable);
+    cheri_update_pcc(&env->pcc, pc_addr, can_be_unrepresentable);
 #else
     env->pc = pc_addr;
 #endif
diff --git a/target/riscv/insn16-32.decode b/target/riscv/insn16-32.decode
deleted file mode 100644
index 23df9c74882..00000000000
--- a/target/riscv/insn16-32.decode
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# RISC-V translation routines for the RVXI Base Integer Instruction Set.
-#
-# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
-#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2 or later, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program.  If not, see <http://www.gnu.org/licenses/>.
-
-# *** RV32C Standard Extension (Quadrant 0) ***
-fld               001  ... ... .. ... 00 @cl_d
-{
-  flw             011  ... ... .. ... 00 @cl_w ?!pred_capmode
-  lc              011  ... ... .. ... 00 @cl_d ?pred_capmode
-}
-fsd               101  ... ... .. ... 00 @cs_d
-{
-  fsw             111  ... ... .. ... 00 @cs_w ?!pred_capmode
-  sc              111  ... ... .. ... 00 @cs_d ?pred_capmode
-}
-
-# *** RV32C Standard Extension (Quadrant 1) ***
-jal               001     ........... 01 @cj    rd=1  # C.JAL
-
-# *** RV32C Standard Extension (Quadrant 2) ***
-fld               001 .  .....  ..... 10 @c_ldsp
-{
-  flw             011 .  .....  ..... 10 @c_lwsp ?!pred_capmode
-  illegal         011 -  00000  ----- 10         ?pred_capmode # RES rd=0
-  lc              011 .  .....  ..... 10 @c_ldsp ?pred_capmode
-}
-fsd               101   ......  ..... 10 @c_sdsp
-{
-  fsw             111 .  .....  ..... 10 @c_swsp ?!pred_capmode
-  sc              111 .  .....  ..... 10 @c_sdsp ?pred_capmode
-}
diff --git a/target/riscv/insn16-64.decode b/target/riscv/insn16-64.decode
deleted file mode 100644
index 8c7e18b88f1..00000000000
--- a/target/riscv/insn16-64.decode
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# RISC-V translation routines for the RVXI Base Integer Instruction Set.
-#
-# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
-#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2 or later, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program.  If not, see <http://www.gnu.org/licenses/>.
-
-# Immediates:
-%uimm_cl_q     10:1 5:2 11:2      !function=ex_shift_4
-
-%uimm_6bit_lq 2:4 12:1 6:1           !function=ex_shift_4
-%uimm_6bit_sq 7:4 11:2               !function=ex_shift_4
-
-# Formats 16:
-@cl_q      ... ... ... .. ... .. &i      imm=%uimm_cl_q   rs1=%rs1_3  rd=%rs2_3
-@cs_q      ... ... ... .. ... .. &s      imm=%uimm_cl_q   rs1=%rs1_3  rs2=%rs2_3
-
-@c_lqsp    ... . .....  ..... .. &i      imm=%uimm_6bit_lq rs1=2 %rd
-@c_sqsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sq rs1=2 rs2=%rs2_5
-
-# *** RV64C Standard Extension (Quadrant 0) ***
-{
-  fld             001  ... ... .. ... 00 @cl_d ?!pred_capmode
-  lc              001  ... ... .. ... 00 @cl_q ?pred_capmode
-}
-ld                011  ... ... .. ... 00 @cl_d
-{
-  fsd             101  ... ... .. ... 00 @cs_d ?!pred_capmode
-  sc              101  ... ... .. ... 00 @cs_q ?pred_capmode
-}
-sd                111  ... ... .. ... 00 @cs_d
-
-# *** RV64C Standard Extension (Quadrant 1) ***
-{
-  illegal         001 -  00000  ----- 01 # c.addiw, RES rd=0
-  addiw           001 .  .....  ..... 01 @ci
-}
-subw              100 1 11 ... 00 ... 01 @cs_2
-addw              100 1 11 ... 01 ... 01 @cs_2
-
-# *** RV64C Standard Extension (Quadrant 2) ***
-{
-  fld             001 .  .....  ..... 10 @c_ldsp ?!pred_capmode
-  illegal         001 -  00000  ----- 10         ?pred_capmode # RES rd=0
-  lc              001 .  .....  ..... 10 @c_lqsp ?pred_capmode
-}
-{
-  illegal         011 -  00000  ----- 10 # c.ldsp, RES rd=0
-  ld              011 .  .....  ..... 10 @c_ldsp
-}
-{
-  fsd             101   ......  ..... 10 @c_sdsp ?!pred_capmode
-  sc              101   ......  ..... 10 @c_sqsp ?pred_capmode
-}
-sd                111 .  .....  ..... 10 @c_sdsp
diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index c6edea8c9f3..4a6d4b20673 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -25,14 +25,17 @@
 # Immediates:
 %imm_ci        12:s1 2:5
 %nzuimm_ciw    7:4 11:2 5:1 6:1   !function=ex_shift_2
+%uimm_cl_q     10:1 5:2 11:2      !function=ex_shift_4
 %uimm_cl_d     5:2 10:3           !function=ex_shift_3
 %uimm_cl_w     5:1 10:3 6:1       !function=ex_shift_2
 %imm_cb        12:s1 5:2 2:1 10:2 3:2 !function=ex_shift_1
 %imm_cj        12:s1 8:1 9:2 6:1 7:1 2:1 11:1 3:3 !function=ex_shift_1
 
 %shimm_6bit   12:1 2:5               !function=ex_rvc_shifti
+%uimm_6bit_lq 2:4 12:1 6:1           !function=ex_shift_4
 %uimm_6bit_ld 2:3 12:1 5:2           !function=ex_shift_3
 %uimm_6bit_lw 2:2 12:1 4:3           !function=ex_shift_2
+%uimm_6bit_sq 7:4 11:2               !function=ex_shift_4
 %uimm_6bit_sd 7:3 10:3               !function=ex_shift_3
 %uimm_6bit_sw 7:2 9:4                !function=ex_shift_2
 
@@ -54,16 +57,20 @@
 # Formats 16:
 @cr        ....  ..... .....  .. &r      rs2=%rs2_5       rs1=%rd     %rd
 @ci        ... . ..... .....  .. &i      imm=%imm_ci      rs1=%rd     %rd
+@cl_q      ... ... ... .. ... .. &i      imm=%uimm_cl_q   rs1=%rs1_3  rd=%rs2_3
 @cl_d      ... ... ... .. ... .. &i      imm=%uimm_cl_d   rs1=%rs1_3  rd=%rs2_3
 @cl_w      ... ... ... .. ... .. &i      imm=%uimm_cl_w   rs1=%rs1_3  rd=%rs2_3
 @cs_2      ... ... ... .. ... .. &r      rs2=%rs2_3       rs1=%rs1_3  rd=%rs1_3
+@cs_q      ... ... ... .. ... .. &s      imm=%uimm_cl_q   rs1=%rs1_3  rs2=%rs2_3
 @cs_d      ... ... ... .. ... .. &s      imm=%uimm_cl_d   rs1=%rs1_3  rs2=%rs2_3
 @cs_w      ... ... ... .. ... .. &s      imm=%uimm_cl_w   rs1=%rs1_3  rs2=%rs2_3
 @cj        ...    ........... .. &j      imm=%imm_cj
 @cb_z      ... ... ... .. ... .. &b      imm=%imm_cb      rs1=%rs1_3  rs2=0
 
+@c_lqsp    ... . .....  ..... .. &i      imm=%uimm_6bit_lq rs1=2 %rd
 @c_ldsp    ... . .....  ..... .. &i      imm=%uimm_6bit_ld rs1=2 %rd
 @c_lwsp    ... . .....  ..... .. &i      imm=%uimm_6bit_lw rs1=2 %rd
+@c_sqsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sq rs1=2 rs2=%rs2_5
 @c_sdsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sd rs1=2 rs2=%rs2_5
 @c_swsp    ... . .....  ..... .. &s      imm=%uimm_6bit_sw rs1=2 rs2=%rs2_5
 @c_li      ... . .....  ..... .. &i      imm=%imm_ci rs1=0 %rd
@@ -94,6 +101,30 @@
 lw                010  ... ... .. ... 00 @cl_w
 sw                110  ... ... .. ... 00 @cs_w
 
+# *** RV32C and RV64C specific Standard Extension (Quadrant 0) ***
+{
+  fld             001  ... ... .. ... 00 @cl_d ?!pred_rv64_capmode
+  lc              001  ... ... .. ... 00 @cl_q ?pred_rv64_capmode
+}
+{
+  # RV64C
+  ld              011  ... ... .. ... 00 @cl_d
+  # RV32C
+  lc              011  ... ... .. ... 00 @cl_d ?pred_capmode
+  flw             011  ... ... .. ... 00 @cl_w ?!pred_capmode
+}
+{
+  fsd             101  ... ... .. ... 00 @cs_d ?!pred_rv64_capmode
+  sc              101  ... ... .. ... 00 @cs_q ?pred_rv64_capmode
+}
+{
+  # RV64C
+  sd              111  ... ... .. ... 00 @cs_d
+  # RV32C
+  sc              111  ... ... .. ... 00 @cs_d ?pred_capmode
+  fsw             111  ... ... .. ... 00 @cs_w ?!pred_capmode
+}
+
 # *** RV32/64C Standard Extension (Quadrant 1) ***
 {
   c_hint          000 0  .....  00000 01 @c_hinti
@@ -123,6 +154,15 @@ jal               101     ........... 01 @cj    rd=0  # C.J
 beq               110  ... ...  ..... 01 @cb_z
 bne               111  ... ...  ..... 01 @cb_z
 
+# *** RV64C and RV32C specific Standard Extension (Quadrant 1) ***
+{
+  c64_illegal     001 -  00000  ----- 01 # c.addiw, RES rd=0
+  addiw           001 .  .....  ..... 01 @ci
+  jal             001     ........... 01 @cj    rd=1  # C.JAL
+}
+subw              100 1 11 ... 00 ... 01 @cs_2
+addw              100 1 11 ... 01 ... 01 @cs_2
+
 # *** RV32/64C Standard Extension (Quadrant 2) ***
 {
   c_hint          000 0  .....  00000 10 @c_hintshift2  # c.slli64 is a HINT for rv32/64
@@ -143,3 +183,30 @@ bne               111  ... ...  ..... 01 @cb_z
   add             100 1  .....  ..... 10 @cr
 }
 sw                110 .  .....  ..... 10 @c_swsp
+
+# *** RV32C and RV64C specific Standard Extension (Quadrant 2) ***
+{
+  fld             001 .  .....  ..... 10 @c_ldsp ?!pred_rv64_capmode
+  c64_illegal     001 -  00000  ----- 10         ?pred_rv64_capmode # RES rd=0
+  lc              001 .  .....  ..... 10 @c_lqsp ?pred_rv64_capmode
+}
+{
+  # RV64C
+  c64_illegal     011 -  00000  ----- 10 # c.ldsp, RES rd=0
+  ld              011 .  .....  ..... 10 @c_ldsp
+  # RV32C
+  flw             011 .  .....  ..... 10 @c_lwsp ?!pred_capmode
+  illegal         011 -  00000  ----- 10         ?pred_capmode # RES rd=0
+  lc              011 .  .....  ..... 10 @c_ldsp ?pred_capmode
+}
+{
+  fsd             101   ......  ..... 10 @c_sdsp ?!pred_rv64_capmode
+  sc              101   ......  ..... 10 @c_sqsp ?pred_rv64_capmode
+}
+{
+  # RV64C
+  sd              111 .  .....  ..... 10 @c_sdsp
+  # RV32C
+  fsw             111 .  .....  ..... 10 @c_swsp ?!pred_capmode
+  sc              111 .  .....  ..... 10 @c_sdsp ?pred_capmode
+}
diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
deleted file mode 100644
index 8157dee8b7c..00000000000
--- a/target/riscv/insn32-64.decode
+++ /dev/null
@@ -1,88 +0,0 @@
-#
-# RISC-V translation routines for the RV Instruction Set.
-#
-# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
-#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2 or later, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program.  If not, see <http://www.gnu.org/licenses/>.
-
-# This is concatenated with insn32.decode for risc64 targets.
-# Most of the fields and formats are there.
-
-%sh5    20:5
-
-@sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd
-
-# *** RV64I Base Instruction Set (in addition to RV32I) ***
-lwu      ............   ..... 110 ..... 0000011 @i
-ld       ............   ..... 011 ..... 0000011 @i
-sd       ....... .....  ..... 011 ..... 0100011 @s
-addiw    ............   ..... 000 ..... 0011011 @i
-slliw    0000000 .....  ..... 001 ..... 0011011 @sh5
-srliw    0000000 .....  ..... 101 ..... 0011011 @sh5
-sraiw    0100000 .....  ..... 101 ..... 0011011 @sh5
-addw     0000000 .....  ..... 000 ..... 0111011 @r
-subw     0100000 .....  ..... 000 ..... 0111011 @r
-sllw     0000000 .....  ..... 001 ..... 0111011 @r
-srlw     0000000 .....  ..... 101 ..... 0111011 @r
-sraw     0100000 .....  ..... 101 ..... 0111011 @r
-
-# *** RV64M Standard Extension (in addition to RV32M) ***
-mulw     0000001 .....  ..... 000 ..... 0111011 @r
-divw     0000001 .....  ..... 100 ..... 0111011 @r
-divuw    0000001 .....  ..... 101 ..... 0111011 @r
-remw     0000001 .....  ..... 110 ..... 0111011 @r
-remuw    0000001 .....  ..... 111 ..... 0111011 @r
-
-# *** RV64A Standard Extension (in addition to RV32A) ***
-lr_d       00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
-sc_d       00011 . . ..... ..... 011 ..... 0101111 @atom_st
-amoswap_d  00001 . . ..... ..... 011 ..... 0101111 @atom_st
-amoadd_d   00000 . . ..... ..... 011 ..... 0101111 @atom_st
-amoxor_d   00100 . . ..... ..... 011 ..... 0101111 @atom_st
-amoand_d   01100 . . ..... ..... 011 ..... 0101111 @atom_st
-amoor_d    01000 . . ..... ..... 011 ..... 0101111 @atom_st
-amomin_d   10000 . . ..... ..... 011 ..... 0101111 @atom_st
-amomax_d   10100 . . ..... ..... 011 ..... 0101111 @atom_st
-amominu_d  11000 . . ..... ..... 011 ..... 0101111 @atom_st
-amomaxu_d  11100 . . ..... ..... 011 ..... 0101111 @atom_st
-
-#*** Vector AMO operations (in addition to Zvamo) ***
-vamoswapd_v     00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoaddd_v      00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoxord_v      00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoandd_v      01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamoord_v       01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomind_v      10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomaxd_v      10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamominud_v     11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
-vamomaxud_v     11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
-
-# *** RV64F Standard Extension (in addition to RV32F) ***
-fcvt_l_s   1100000  00010 ..... ... ..... 1010011 @r2_rm
-fcvt_lu_s  1100000  00011 ..... ... ..... 1010011 @r2_rm
-fcvt_s_l   1101000  00010 ..... ... ..... 1010011 @r2_rm
-fcvt_s_lu  1101000  00011 ..... ... ..... 1010011 @r2_rm
-
-# *** RV64D Standard Extension (in addition to RV32D) ***
-fcvt_l_d   1100001  00010 ..... ... ..... 1010011 @r2_rm
-fcvt_lu_d  1100001  00011 ..... ... ..... 1010011 @r2_rm
-fmv_x_d    1110001  00000 ..... 000 ..... 1010011 @r2
-fcvt_d_l   1101001  00010 ..... ... ..... 1010011 @r2_rm
-fcvt_d_lu  1101001  00011 ..... ... ..... 1010011 @r2_rm
-fmv_d_x    1111001  00000 ..... 000 ..... 1010011 @r2
-
-# *** RV32H Base Instruction Set ***
-hlv_wu    0110100  00001   ..... 100 ..... 1110011 @r2
-hlv_d     0110110  00000   ..... 100 ..... 1110011 @r2
-hsv_d     0110111  .....   ..... 100 00000 1110011 @r2_s
diff --git a/target/riscv/insn32-cheri-32.decode b/target/riscv/insn32-cheri-32.decode
index bfa3514e80b..6dfac1e67ea 100644
--- a/target/riscv/insn32-cheri-32.decode
+++ b/target/riscv/insn32-cheri-32.decode
@@ -44,10 +44,3 @@ st_c_cap    1111100  ..... ..... 000 01011 1011011 @r_2source  # This is st_d_ca
 sc_c_ddc    1111100  ..... ..... 000 10011 1011011 @atom_st_cap_or_ddc  # This is sc_d_ddc for RV64
 sc_c_cap    1111100  ..... ..... 000 11011 1011011 @atom_st_cap_or_ddc  # This is sc_d_cap for RV64
 
-# We reuse the ld/sd opcodes for lc/sc for RV32
-lc       ............   ..... 011 ..... 0000011 @i # def LC_64  : RVInstI<0x3, OPC_LOAD (0b0000011)
-sc       ....... .....  ..... 011 ..... 0100011 @s # def SC_64  : RVInstS<0x3, OPC_STORE (0b0100011)
-
-lr_c        00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
-sc_c        00011 . . ..... ..... 011 ..... 0101111 @atom_st
-amoswap_c   00001 . . ..... ..... 011 ..... 0101111 @atom_st
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index af89ee19c9a..545d8412524 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -21,8 +21,9 @@
 %rs2       20:5
 %rs1       15:5
 %rd        7:5
+%sh5       20:5
 
-%sh10    20:10
+%sh7    20:7
 %csr    20:12
 %rm     12:3
 %nf     29:3                     !function=ex_plus_1
@@ -40,6 +41,8 @@
 &i    imm rs1 rd
 &j    imm rd
 &r    rd rs1 rs2
+&r2   rd rs1
+&r2_s rs1 rs2
 &s    imm rs1 rs2
 &u    imm rd
 &shift     shamt rs1 rd
@@ -58,7 +61,7 @@
 @u       ....................      ..... ....... &u      imm=%imm_u          %rd
 @j       ....................      ..... ....... &j      imm=%imm_j          %rd
 
-@sh      ......  ...... .....  ... ..... ....... &shift  shamt=%sh10      %rs1 %rd
+@sh      ......  ...... .....  ... ..... ....... &shift  shamt=%sh7     %rs1 %rd
 @csr     ............   .....  ... ..... .......               %csr     %rs1 %rd
 
 @atom_ld ..... aq:1 rl:1 ..... ........ ..... ....... &atomic rs2=0     %rs1 %rd
@@ -67,7 +70,7 @@
 @r4_rm   ..... ..  ..... ..... ... ..... ....... %rs3 %rs2 %rs1 %rm %rd
 @r_rm    .......   ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
 @r2_rm   .......   ..... ..... ... ..... ....... %rs1 %rm %rd
-@r2      .......   ..... ..... ... ..... ....... %rs1 %rd
+@r2      .......   ..... ..... ... ..... ....... &r2 %rs1 %rd
 @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd
 @r2_vm   ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd
 @r1_vm   ...... vm:1 ..... ..... ... ..... ....... %rd
@@ -86,6 +89,8 @@
 @sfence_vma ....... ..... .....   ... ..... ....... %rs2 %rs1
 @sfence_vm  ....... ..... .....   ... ..... ....... %rs1
 
+# Formats 64:
+@sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd
 
 # *** Privileged Instructions ***
 ecall       000000000000     00000 000 00000 1110011
@@ -122,9 +127,9 @@ sltiu    ............     ..... 011 ..... 0010011 @i
 xori     ............     ..... 100 ..... 0010011 @i
 ori      ............     ..... 110 ..... 0010011 @i
 andi     ............     ..... 111 ..... 0010011 @i
-slli     00.... ......    ..... 001 ..... 0010011 @sh
-srli     00.... ......    ..... 101 ..... 0010011 @sh
-srai     01.... ......    ..... 101 ..... 0010011 @sh
+slli     00000. ......    ..... 001 ..... 0010011 @sh
+srli     00000. ......    ..... 101 ..... 0010011 @sh
+srai     01000. ......    ..... 101 ..... 0010011 @sh
 add      0000000 .....    ..... 000 ..... 0110011 @r
 sub      0100000 .....    ..... 000 ..... 0110011 @r
 sll      0000000 .....    ..... 001 ..... 0110011 @r
@@ -144,6 +149,27 @@ csrrwi   ............     ..... 101 ..... 1110011 @csr
 csrrsi   ............     ..... 110 ..... 1110011 @csr
 csrrci   ............     ..... 111 ..... 1110011 @csr
 
+# *** RV64I Base Instruction Set (in addition to RV32I) ***
+lwu      ............   ..... 110 ..... 0000011 @i
+# CHERI RV32: We reuse the ld/sd opcodes for lc/sc
+{
+  ld       ............   ..... 011 ..... 0000011 @i
+  lc       ............   ..... 011 ..... 0000011 @i # def LC_64  : RVInstI<0x3, OPC_LOAD (0b0000011)
+}
+{
+  sd       ....... .....  ..... 011 ..... 0100011 @s
+  sc       ....... .....  ..... 011 ..... 0100011 @s # def SC_64  : RVInstS<0x3, OPC_STORE (0b0100011)
+}
+addiw    ............   ..... 000 ..... 0011011 @i
+slliw    0000000 .....  ..... 001 ..... 0011011 @sh5
+srliw    0000000 .....  ..... 101 ..... 0011011 @sh5
+sraiw    0100000 .....  ..... 101 ..... 0011011 @sh5
+addw     0000000 .....  ..... 000 ..... 0111011 @r
+subw     0100000 .....  ..... 000 ..... 0111011 @r
+sllw     0000000 .....  ..... 001 ..... 0111011 @r
+srlw     0000000 .....  ..... 101 ..... 0111011 @r
+sraw     0100000 .....  ..... 101 ..... 0111011 @r
+
 # *** RV32M Standard Extension ***
 mul      0000001 .....  ..... 000 ..... 0110011 @r
 mulh     0000001 .....  ..... 001 ..... 0110011 @r
@@ -154,6 +180,13 @@ divu     0000001 .....  ..... 101 ..... 0110011 @r
 rem      0000001 .....  ..... 110 ..... 0110011 @r
 remu     0000001 .....  ..... 111 ..... 0110011 @r
 
+# *** RV64M Standard Extension (in addition to RV32M) ***
+mulw     0000001 .....  ..... 000 ..... 0111011 @r
+divw     0000001 .....  ..... 100 ..... 0111011 @r
+divuw    0000001 .....  ..... 101 ..... 0111011 @r
+remw     0000001 .....  ..... 110 ..... 0111011 @r
+remuw    0000001 .....  ..... 111 ..... 0111011 @r
+
 # *** RV32A Standard Extension ***
 lr_b       00010 . . 00000 ..... 000 ..... 0101111 @atom_ld
 lr_h       00010 . . 00000 ..... 001 ..... 0101111 @atom_ld
@@ -171,6 +204,29 @@ amomax_w   10100 . . ..... ..... 010 ..... 0101111 @atom_st
 amominu_w  11000 . . ..... ..... 010 ..... 0101111 @atom_st
 amomaxu_w  11100 . . ..... ..... 010 ..... 0101111 @atom_st
 
+# *** RV64A Standard Extension (in addition to RV32A) ***
+# CHERI RV32: We reuse the lr_d/sc_d/swap_d opcodes for lr_c/sc_c/swap_c
+{
+  lr_d       00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
+  lr_c       00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
+}
+{
+  sc_d       00011 . . ..... ..... 011 ..... 0101111 @atom_st
+  sc_c       00011 . . ..... ..... 011 ..... 0101111 @atom_st
+}
+{
+  amoswap_d  00001 . . ..... ..... 011 ..... 0101111 @atom_st
+  amoswap_c  00001 . . ..... ..... 011 ..... 0101111 @atom_st
+}
+amoadd_d   00000 . . ..... ..... 011 ..... 0101111 @atom_st
+amoxor_d   00100 . . ..... ..... 011 ..... 0101111 @atom_st
+amoand_d   01100 . . ..... ..... 011 ..... 0101111 @atom_st
+amoor_d    01000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomin_d   10000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomax_d   10100 . . ..... ..... 011 ..... 0101111 @atom_st
+amominu_d  11000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomaxu_d  11100 . . ..... ..... 011 ..... 0101111 @atom_st
+
 # *** RV32F Standard Extension ***
 flw        ............   ..... 010 ..... 0000111 @i
 fsw        .......  ..... ..... 010 ..... 0100111 @s
@@ -199,6 +255,12 @@ fcvt_s_w   1101000  00000 ..... ... ..... 1010011 @r2_rm
 fcvt_s_wu  1101000  00001 ..... ... ..... 1010011 @r2_rm
 fmv_w_x    1111000  00000 ..... 000 ..... 1010011 @r2
 
+# *** RV64F Standard Extension (in addition to RV32F) ***
+fcvt_l_s   1100000  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_s  1100000  00011 ..... ... ..... 1010011 @r2_rm
+fcvt_s_l   1101000  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_s_lu  1101000  00011 ..... ... ..... 1010011 @r2_rm
+
 # *** RV32D Standard Extension ***
 fld        ............   ..... 011 ..... 0000111 @i
 fsd        ....... .....  ..... 011 ..... 0100111 @s
@@ -227,6 +289,14 @@ fcvt_wu_d  1100001  00001 ..... ... ..... 1010011 @r2_rm
 fcvt_d_w   1101001  00000 ..... ... ..... 1010011 @r2_rm
 fcvt_d_wu  1101001  00001 ..... ... ..... 1010011 @r2_rm
 
+# *** RV64D Standard Extension (in addition to RV32D) ***
+fcvt_l_d   1100001  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_d  1100001  00011 ..... ... ..... 1010011 @r2_rm
+fmv_x_d    1110001  00000 ..... 000 ..... 1010011 @r2
+fcvt_d_l   1101001  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_d_lu  1101001  00011 ..... ... ..... 1010011 @r2_rm
+fmv_d_x    1111001  00000 ..... 000 ..... 1010011 @r2
+
 # *** RV32H Base Instruction Set ***
 hlv_b       0110000  00000  ..... 100 ..... 1110011 @r2
 hlv_bu      0110000  00001  ..... 100 ..... 1110011 @r2
@@ -241,7 +311,10 @@ hsv_w       0110101  .....  ..... 100 00000 1110011 @r2_s
 hfence_gvma 0110001  .....  ..... 000 00000 1110011 @hfence_gvma
 hfence_vvma 0010001  .....  ..... 000 00000 1110011 @hfence_vvma
 
-# *** RV32V Extension ***
+# *** RV64H Base Instruction Set ***
+hlv_wu    0110100  00001   ..... 100 ..... 1110011 @r2
+hlv_d     0110110  00000   ..... 100 ..... 1110011 @r2
+hsv_d     0110111  .....   ..... 100 00000 1110011 @r2_s
 
 # *** Vector loads and stores are encoded within LOADFP/STORE-FP ***
 vlb_v      ... 100 . 00000 ..... 000 ..... 0000111 @r2_nfvm
@@ -596,3 +669,81 @@ vcompress_vm    010111 - ..... ..... 010 ..... 1010111 @r
 
 vsetvli         0 ........... ..... 111 ..... 1010111  @r2_zimm
 vsetvl          1000000 ..... ..... 111 ..... 1010111  @r
+
+#*** Vector AMO operations (in addition to Zvamo) ***
+vamoswapd_v     00001 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoaddd_v      00000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoxord_v      00100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoandd_v      01100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamoord_v       01000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamomind_v      10000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamomaxd_v      10100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamominud_v     11000 . . ..... ..... 111 ..... 0101111 @r_wdvm
+vamomaxud_v     11100 . . ..... ..... 111 ..... 0101111 @r_wdvm
+
+# *** RV32 Zba Standard Extension ***
+sh1add     0010000 .......... 010 ..... 0110011 @r
+sh2add     0010000 .......... 100 ..... 0110011 @r
+sh3add     0010000 .......... 110 ..... 0110011 @r
+
+# *** RV64 Zba Standard Extension (in addition to RV32 Zba) ***
+add_uw     0000100 .......... 000 ..... 0111011 @r
+sh1add_uw  0010000 .......... 010 ..... 0111011 @r
+sh2add_uw  0010000 .......... 100 ..... 0111011 @r
+sh3add_uw  0010000 .......... 110 ..... 0111011 @r
+slli_uw    00001 ............ 001 ..... 0011011 @sh
+
+# *** RV32 Zbb Standard Extension ***
+andn       0100000 .......... 111 ..... 0110011 @r
+clz        011000 000000 ..... 001 ..... 0010011 @r2
+cpop       011000 000010 ..... 001 ..... 0010011 @r2
+ctz        011000 000001 ..... 001 ..... 0010011 @r2
+max        0000101 .......... 110 ..... 0110011 @r
+maxu       0000101 .......... 111 ..... 0110011 @r
+min        0000101 .......... 100 ..... 0110011 @r
+minu       0000101 .......... 101 ..... 0110011 @r
+orc_b      001010 000111 ..... 101 ..... 0010011 @r2
+orn        0100000 .......... 110 ..... 0110011 @r
+# The encoding for rev8 differs between RV32 and RV64.
+# rev8_32 denotes the RV32 variant.
+rev8_32    011010 011000 ..... 101 ..... 0010011 @r2
+rol        0110000 .......... 001 ..... 0110011 @r
+ror        0110000 .......... 101 ..... 0110011 @r
+rori       01100 ............ 101 ..... 0010011 @sh
+sext_b     011000 000100 ..... 001 ..... 0010011 @r2
+sext_h     011000 000101 ..... 001 ..... 0010011 @r2
+xnor       0100000 .......... 100 ..... 0110011 @r
+# The encoding for zext.h differs between RV32 and RV64.
+# zext_h_32 denotes the RV32 variant.
+zext_h_32  0000100 00000 ..... 100 ..... 0110011 @r2
+
+# *** RV64 Zbb Standard Extension (in addition to RV32 Zbb) ***
+clzw       0110000 00000 ..... 001 ..... 0011011 @r2
+ctzw       0110000 00001 ..... 001 ..... 0011011 @r2
+cpopw      0110000 00010 ..... 001 ..... 0011011 @r2
+# The encoding for rev8 differs between RV32 and RV64.
+# When executing on RV64, the encoding used in RV32 is an illegal
+# instruction, so we use different handler functions to differentiate.
+rev8_64    011010 111000 ..... 101 ..... 0010011 @r2
+rolw       0110000 .......... 001 ..... 0111011 @r
+roriw      0110000 .......... 101 ..... 0011011 @sh5
+rorw       0110000 .......... 101 ..... 0111011 @r
+# The encoding for zext.h differs between RV32 and RV64.
+# When executing on RV64, the encoding used in RV32 is an illegal
+# instruction, so we use different handler functions to differentiate.
+zext_h_64  0000100 00000 ..... 100 ..... 0111011 @r2
+
+# *** RV32 Zbc Standard Extension ***
+clmul      0000101 .......... 001 ..... 0110011 @r
+clmulh     0000101 .......... 011 ..... 0110011 @r
+clmulr     0000101 .......... 010 ..... 0110011 @r
+
+# *** RV32 Zbs Standard Extension ***
+bclr       0100100 .......... 001 ..... 0110011 @r
+bclri      01001. ........... 001 ..... 0010011 @sh
+bext       0100100 .......... 101 ..... 0110011 @r
+bexti      01001. ........... 101 ..... 0010011 @sh
+binv       0110100 .......... 001 ..... 0110011 @r
+binvi      01101. ........... 001 ..... 0010011 @sh
+bset       0010100 .......... 001 ..... 0110011 @r
+bseti      00101. ........... 001 ..... 0010011 @sh
diff --git a/target/riscv/insn_trans/trans_cheri.c.inc b/target/riscv/insn_trans/trans_cheri.c.inc
index 955fe6d0c0f..7d4f8c02bf8 100644
--- a/target/riscv/insn_trans/trans_cheri.c.inc
+++ b/target/riscv/insn_trans/trans_cheri.c.inc
@@ -46,7 +46,7 @@ static inline bool gen_cheri_int_cap(DisasContext *ctx, int rd, int cs,
     TCGv_i32 source_regnum = tcg_const_i32(cs);
     TCGv result = tcg_temp_new();
     gen_func(result, cpu_env, source_regnum);
-    gen_set_gpr(rd, result);
+    gen_set_gpr(ctx, rd, result);
     tcg_temp_free(result);
     tcg_temp_free_i32(source_regnum);
     return true;
@@ -71,12 +71,12 @@ static inline bool gen_cheri_cap_cap(int cd, int cs,
 
 typedef void(cheri_cap_int_helper)(TCGv_env, TCGv_i32, TCGv);
 static inline QEMU_ALWAYS_INLINE bool
-gen_cheri_cap_int_plus_imm(int cd, int rs, target_long imm,
+gen_cheri_cap_int_plus_imm(DisasContext *ctx, int cd, int rs, target_long imm,
                            cheri_cap_int_helper *gen_func)
 {
     TCGv_i32 dest_regnum = tcg_const_i32(cd);
     TCGv gpr_value = tcg_temp_new();
-    gen_get_gpr(gpr_value, rs);
+    gen_get_gpr(ctx, gpr_value, rs);
     tcg_gen_addi_tl(gpr_value, gpr_value, imm);
     gen_func(cpu_env, dest_regnum, gpr_value);
     tcg_temp_free(gpr_value);
@@ -84,10 +84,10 @@ gen_cheri_cap_int_plus_imm(int cd, int rs, target_long imm,
     return true;
 }
 
-static inline bool gen_cheri_cap_int(int cd, int rs,
+static inline bool gen_cheri_cap_int(DisasContext *ctx, int cd, int rs,
                                      cheri_cap_int_helper *gen_func)
 {
-    return gen_cheri_cap_int_plus_imm(cd, rs, 0, gen_func);
+    return gen_cheri_cap_int_plus_imm(ctx, cd, rs, 0, gen_func);
 }
 
 typedef void(cheri_int_int_helper)(TCGv, TCGv_env, TCGv);
@@ -96,9 +96,9 @@ static inline bool gen_cheri_int_int(DisasContext *ctx, int rd, int rs,
 {
     TCGv result = tcg_temp_new();
     TCGv gpr_src_value = tcg_temp_new();
-    gen_get_gpr(gpr_src_value, rs);
+    gen_get_gpr(ctx, gpr_src_value, rs);
     gen_func(result, cpu_env, gpr_src_value);
-    gen_set_gpr(rd, result);
+    gen_set_gpr(ctx, rd, result);
     tcg_temp_free(gpr_src_value);
     tcg_temp_free(result);
     return true;
@@ -123,13 +123,14 @@ static inline bool gen_cheri_cap_cap_cap(int cd, int cs1, int cs2,
 
 typedef void(cheri_cap_cap_int_helper)(TCGv_env, TCGv_i32, TCGv_i32, TCGv);
 static inline bool
-gen_cheri_cap_cap_int_plus_imm(int cd, int cs1, int rs2, target_long imm,
+gen_cheri_cap_cap_int_plus_imm(DisasContext *ctx, int cd, int cs1, int rs2,
+                               target_long imm,
                                cheri_cap_cap_int_helper *gen_func)
 {
     TCGv_i32 dest_regnum = tcg_const_i32(cd);
     TCGv_i32 source_regnum = tcg_const_i32(cs1);
     TCGv gpr_value = tcg_temp_new();
-    gen_get_gpr(gpr_value, rs2);
+    gen_get_gpr(ctx, gpr_value, rs2);
     if (imm != 0) {
         tcg_gen_addi_tl(gpr_value, gpr_value, imm);
     }
@@ -140,7 +141,8 @@ gen_cheri_cap_cap_int_plus_imm(int cd, int cs1, int rs2, target_long imm,
     return true;
 }
 #define TRANSLATE_CAP_CAP_INT(name)                                            \
-    DO_TRANSLATE(name, gen_cheri_cap_cap_int_plus_imm, a->rd, a->rs1, a->rs2, 0)
+    DO_TRANSLATE(name, gen_cheri_cap_cap_int_plus_imm, ctx, a->rd, a->rs1,     \
+                 a->rs2, 0)
 
 typedef void(cheri_cap_loadstore_helper)(TCGv_env, TCGv_i32, TCGv, TCGv_i32);
 static inline bool gen_cheri_cap_loadstore(DisasContext *ctx, int srcdst,
@@ -170,7 +172,7 @@ static inline bool gen_cheri_int_cap_cap(DisasContext *ctx, int rd, int cs1,
     TCGv_i32 source_regnum2 = tcg_const_i32(cs2);
     TCGv result = tcg_temp_new();
     gen_func(result, cpu_env, source_regnum1, source_regnum2);
-    gen_set_gpr(rd, result);
+    gen_set_gpr(ctx, rd, result);
     tcg_temp_free(result);
     tcg_temp_free_i32(source_regnum2);
     tcg_temp_free_i32(source_regnum1);
@@ -225,7 +227,7 @@ static bool trans_cspecialrw(DisasContext *ctx, arg_cspecialrw *a)
             // since we cache DDC properties in the flags to optimize out
             // bounds/permission checks.
             gen_update_cpu_pc(ctx->pc_succ_insn);
-            exit_tb(ctx);
+            tcg_gen_exit_tb(NULL, 0);
             ctx->base.is_jmp = DISAS_NORETURN;
         }
         return true;
@@ -315,7 +317,7 @@ static void gen_cjalr(DisasContext *ctx, int rd, int rs1, target_ulong imm)
     tcg_temp_free_i32(source_regnum);
     tcg_temp_free_i32(dest_regnum);
 
-    lookup_and_goto_ptr(ctx);
+    tcg_gen_lookup_and_goto_ptr();
     // PC has been updated -> exit translation block
     ctx->base.is_jmp = DISAS_NORETURN;
 }
@@ -340,7 +342,7 @@ static inline bool trans_cinvoke(DisasContext *ctx, arg_cinvoke *a)
     tcg_temp_free_i32(code_regnum);
     tcg_temp_free_i32(data_regnum);
 
-    lookup_and_goto_ptr(ctx);
+    tcg_gen_lookup_and_goto_ptr();
     // PC has been updated -> exit translation block
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
@@ -351,10 +353,10 @@ static bool gen_ddc_load(DisasContext *ctx, int rd, int rs1, MemOp memop)
 {
     TCGv addr = tcg_temp_new();
     TCGv value = tcg_temp_new();
-    gen_get_gpr(addr, rs1);
+    gen_get_gpr(ctx, addr, rs1);
     gen_ddc_interposed_ld_tl(ctx, value, /* Update addr in-place */ NULL, addr,
                              ctx->mem_idx, memop);
-    gen_set_gpr(rd, value);
+    gen_set_gpr(ctx, rd, value);
     tcg_temp_free(addr);
     tcg_temp_free(value);
     return true;
@@ -369,7 +371,7 @@ static inline bool gen_cap_load_mem_idx(DisasContext *ctx, int32_t rd,
     TCGv_cap_checked_ptr vaddr = tcg_temp_new_cap_checked();
     generate_cap_load_check_imm(vaddr, cs, offset, op);
     tcg_gen_qemu_ld_tl_with_checked_addr(value, vaddr, mem_idx, op);
-    gen_set_gpr(rd, value);
+    gen_set_gpr(ctx, rd, value);
     tcg_temp_free_cap_checked(vaddr);
     tcg_temp_free(value);
     return true;
@@ -406,7 +408,7 @@ TRANSLATE_EXPLICIT_LOAD(ld_wu, MO_UL)
 static inline bool trans_ld_c_ddc(DisasContext *ctx, arg_ld_c_ddc *a)
 {
     // always uses DDC as the base register
-    return gen_cheri_cap_int_plus_imm(a->rd, a->rs1, 0,
+    return gen_cheri_cap_int_plus_imm(ctx, a->rd, a->rs1, 0,
                                       &gen_helper_load_cap_via_ddc);
 }
 
@@ -421,7 +423,7 @@ static inline bool trans_lc(DisasContext *ctx, arg_lc *a)
 {
     if (!ctx->capmode) {
         // Without capmode we load relative to DDC (lc instructions)
-        return gen_cheri_cap_int_plus_imm(a->rd, a->rs1, a->imm,
+        return gen_cheri_cap_int_plus_imm(ctx, a->rd, a->rs1, a->imm,
                                           &gen_helper_load_cap_via_ddc);
     }
     return gen_cheri_cap_loadstore(ctx, a->rd, a->rs1, /*offset=*/a->imm,
@@ -433,8 +435,8 @@ static bool gen_ddc_store(DisasContext *ctx, int rs1, int rs2, MemOp memop)
 {
     TCGv addr = tcg_temp_new();
     TCGv value = tcg_temp_new();
-    gen_get_gpr(addr, rs1);
-    gen_get_gpr(value, rs2);
+    gen_get_gpr(ctx, addr, rs1);
+    gen_get_gpr(ctx, value, rs2);
     gen_ddc_interposed_st_tl(ctx, value, /* Update addr in-place */ NULL, addr,
                              ctx->mem_idx, memop);
     tcg_temp_free(value);
@@ -452,7 +454,7 @@ static inline bool gen_cap_store_mem_idx(DisasContext *ctx, int32_t addr_regnum,
     generate_cap_store_check_imm(vaddr, addr_regnum, offset, op);
 
     TCGv value = tcg_temp_new();
-    gen_get_gpr(value, val_regnum);
+    gen_get_gpr(ctx, value, val_regnum);
     tcg_gen_qemu_st_tl_with_checked_addr(value, vaddr, mem_idx, op);
     tcg_temp_free(value);
     tcg_temp_free_cap_checked(vaddr);
@@ -489,7 +491,7 @@ TRANSLATE_EXPLICIT_STORE(st_d, MO_Q)
 static inline bool trans_st_c_ddc(DisasContext *ctx, arg_st_c_ddc *a)
 {
     // always uses DDC as the base register
-    return gen_cheri_cap_int_plus_imm(a->rs2, a->rs1, 0,
+    return gen_cheri_cap_int_plus_imm(ctx, a->rs2, a->rs1, 0,
                                       &gen_helper_store_cap_via_ddc);
 }
 
@@ -505,7 +507,7 @@ static inline bool trans_sc(DisasContext *ctx, arg_sc *a)
     // RS2 is the value, RS1 is the capability
     if (!ctx->capmode) {
         // Without capmode we store relative to DDC (sc instructions)
-        return gen_cheri_cap_int_plus_imm(a->rs2, a->rs1, a->imm,
+        return gen_cheri_cap_int_plus_imm(ctx, a->rs2, a->rs1, a->imm,
                                           &gen_helper_store_cap_via_ddc);
     }
     return gen_cheri_cap_loadstore(ctx, a->rs2, a->rs1, /*offset=*/a->imm,
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc b/target/riscv/insn_trans/trans_privileged.c.inc
index 16e2c62adf5..cf7fa19370f 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -22,8 +22,6 @@ static bool trans_ecall(DisasContext *ctx, arg_ecall *a)
 {
     /* always generates U-level ECALL, fixed in do_interrupt handler */
     generate_exception(ctx, RISCV_EXCP_U_ECALL);
-    exit_tb(ctx); /* no chaining */
-    ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
 
@@ -60,13 +58,11 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
         post   = opcode_at(&ctx->base, post_addr);
     }
 
-    if  (pre == 0x01f01013 && ebreak == 0x00100073 && post == 0x40705013) {
+    if (pre == 0x01f01013 && ebreak == 0x00100073 && post == 0x40705013) {
         generate_exception(ctx, RISCV_EXCP_SEMIHOST);
     } else {
         generate_exception(ctx, RISCV_EXCP_BREAKPOINT);
     }
-    exit_tb(ctx); /* no chaining */
-    ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
 
@@ -83,7 +79,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a)
     if (has_ext(ctx, RVS)) {
         gen_helper_sret(cpu_pc, cpu_env, cpu_pc);
         gen_mark_pc_updated();
-        exit_tb(ctx); /* no chaining */
+        tcg_gen_exit_tb(NULL, 0); /* no chaining */
         ctx->base.is_jmp = DISAS_NORETURN;
     } else {
         return false;
@@ -100,7 +96,7 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a)
     gen_update_cpu_pc(ctx->base.pc_next);
     gen_helper_mret(cpu_pc, cpu_env, cpu_pc);
     gen_mark_pc_updated();
-    exit_tb(ctx); /* no chaining */
+    tcg_gen_exit_tb(NULL, 0); /* no chaining */
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 #else
diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc
index 2d29076be4b..ed7f5b3739d 100644
--- a/target/riscv/insn_trans/trans_rva.c.inc
+++ b/target/riscv/insn_trans/trans_rva.c.inc
@@ -25,13 +25,15 @@ static inline bool gen_lr_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
     if (a->rl) {
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
     }
+
     cheri_debug_assert((mop & MO_ALIGN) && "load-reserved must be aligned");
     tcg_gen_qemu_ld_tl_with_checked_addr(load_val, addr, ctx->mem_idx, mop);
+
     if (a->aq) {
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
     }
     tcg_gen_mov_cap_checked(load_res, addr);
-    gen_set_gpr(a->rd, load_val);
+    gen_set_gpr(ctx, a->rd, load_val);
 
     return true;
 }
@@ -48,9 +50,7 @@ static inline bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop)
 static inline bool gen_sc_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
                                arg_atomic *a, MemOp mop)
 {
-    TCGv src1 = tcg_temp_new();
-    TCGv src2 = tcg_temp_new();
-    TCGv dat = tcg_temp_new();
+    TCGv dest, src1, src2;
     TCGLabel *l1 = gen_new_label();
     TCGLabel *l2 = gen_new_label();
 
@@ -74,11 +74,14 @@ static inline bool gen_sc_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
     }
 
     tcg_gen_brcond_cap_checked(TCG_COND_NE, load_res, addr, l1);
-    gen_get_gpr(src2, a->rs2);
+
     /*
      * Note that the TCG atomic primitives are SC,
      * so we can ignore AQ/RL along this path.
      */
+    dest = dest_gpr(ctx, a->rd);
+    src1 = temp_new(ctx);
+    src2 = get_gpr(ctx, a->rs2, EXT_NONE);
     tcg_gen_atomic_cmpxchg_tl_with_checked_addr(src1, load_res, load_val, src2,
                                                 ctx->mem_idx, mop);
     /*
@@ -86,8 +89,8 @@ static inline bool gen_sc_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
      * sail we should report that this was just a store.
      */
     gen_rvfi_dii_set_field_const_i32(MEM, mem_rmask, 0);
-    tcg_gen_setcond_tl(TCG_COND_NE, dat, src1, load_val);
-    gen_set_gpr(a->rd, dat);
+    tcg_gen_setcond_tl(TCG_COND_NE, dest, src1, load_val);
+    gen_set_gpr(ctx, a->rd, dest);
     tcg_gen_br(l2);
 
     gen_set_label(l1);
@@ -96,8 +99,7 @@ static inline bool gen_sc_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
      * provide the memory barrier implied by AQ/RL.
      */
     tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
-    tcg_gen_movi_tl(dat, 1);
-    gen_set_gpr(a->rd, dat);
+    gen_set_gpr(ctx, a->rd, tcg_constant_tl(1));
 
     gen_set_label(l2);
     /*
@@ -106,9 +108,6 @@ static inline bool gen_sc_impl(DisasContext *ctx, TCGv_cap_checked_ptr addr,
      */
     tcg_gen_movi_tl((TCGv)load_res, -1);
 
-    tcg_temp_free(dat);
-    tcg_temp_free(src1);
-    tcg_temp_free(src2);
     return true;
 }
 
@@ -126,7 +125,10 @@ static bool gen_amo(DisasContext *ctx, arg_atomic *a,
                     void(*func)(TCGv, TCGv_cap_checked_ptr, TCGv, TCGArg, MemOp),
                     MemOp mop)
 {
+    TCGv dest = dest_gpr(ctx, a->rd);
     TCGv_cap_checked_ptr src1 = get_capmode_dependent_rmw_addr(ctx, a->rs1, 0, mop);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
     cheri_debug_assert((mop & MO_ALIGN) && "RMW AMOs must be aligned");
     /*
      * We have to check the alignment explicitly before the tcg_gen_atomic_*
@@ -141,14 +143,11 @@ static bool gen_amo(DisasContext *ctx, arg_atomic *a,
         tcg_temp_free_i32(tcode);
         tcg_temp_free_i32(tmop);
     }
-    TCGv src2 = tcg_temp_new();
-    gen_get_gpr(src2, a->rs2);
 
-    (*func)(src2, src1, src2, ctx->mem_idx, mop);
+    func(dest, src1, src2, ctx->mem_idx, mop);
 
-    gen_set_gpr(a->rd, src2);
+    gen_set_gpr(ctx, a->rd, dest);
     tcg_temp_free_cap_checked(src1);
-    tcg_temp_free(src2);
     return true;
 }
 
@@ -243,60 +242,68 @@ static bool trans_amomaxu_w(DisasContext *ctx, arg_amomaxu_w *a)
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umax_tl, (MO_ALIGN | MO_TESL));
 }
 
-#ifdef TARGET_RISCV64
-
 static bool trans_lr_d(DisasContext *ctx, arg_lr_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_lr(ctx, a, MO_ALIGN | MO_TEQ);
 }
 
 static bool trans_sc_d(DisasContext *ctx, arg_sc_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_sc(ctx, a, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amoswap_d(DisasContext *ctx, arg_amoswap_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amoadd_d(DisasContext *ctx, arg_amoadd_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_add_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amoxor_d(DisasContext *ctx, arg_amoxor_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_xor_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amoand_d(DisasContext *ctx, arg_amoand_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_and_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amoor_d(DisasContext *ctx, arg_amoor_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_or_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amomin_d(DisasContext *ctx, arg_amomin_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smin_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amomax_d(DisasContext *ctx, arg_amomax_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smax_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amominu_d(DisasContext *ctx, arg_amominu_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umin_tl, (MO_ALIGN | MO_TEQ));
 }
 
 static bool trans_amomaxu_d(DisasContext *ctx, arg_amomaxu_d *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umax_tl, (MO_ALIGN | MO_TEQ));
 }
-#endif
diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_trans/trans_rvb.c.inc
new file mode 100644
index 00000000000..c8d31907c54
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvb.c.inc
@@ -0,0 +1,506 @@
+/*
+ * RISC-V translation routines for the Zb[abcs] Standard Extension.
+ *
+ * Copyright (c) 2020 Kito Cheng, kito.cheng@sifive.com
+ * Copyright (c) 2020 Frank Chang, frank.chang@sifive.com
+ * Copyright (c) 2021 Philipp Tomsich, philipp.tomsich@vrull.eu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_ZBA(ctx) do {                    \
+    if (!RISCV_CPU(ctx->cs)->cfg.ext_zba) {      \
+        return false;                            \
+    }                                            \
+} while (0)
+
+#define REQUIRE_ZBB(ctx) do {                    \
+    if (!RISCV_CPU(ctx->cs)->cfg.ext_zbb) {      \
+        return false;                            \
+    }                                            \
+} while (0)
+
+#define REQUIRE_ZBC(ctx) do {                    \
+    if (!RISCV_CPU(ctx->cs)->cfg.ext_zbc) {      \
+        return false;                            \
+    }                                            \
+} while (0)
+
+#define REQUIRE_ZBS(ctx) do {                    \
+    if (!RISCV_CPU(ctx->cs)->cfg.ext_zbs) {      \
+        return false;                            \
+    }                                            \
+} while (0)
+
+static void gen_clz(TCGv ret, TCGv arg1)
+{
+    tcg_gen_clzi_tl(ret, arg1, TARGET_LONG_BITS);
+}
+
+static void gen_clzw(TCGv ret, TCGv arg1)
+{
+    TCGv t = tcg_temp_new();
+    tcg_gen_shli_tl(t, arg1, 32);
+    tcg_gen_clzi_tl(ret, t, 32);
+    tcg_temp_free(t);
+}
+
+static bool trans_clz(DisasContext *ctx, arg_clz *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary_per_ol(ctx, a, EXT_NONE, gen_clz, gen_clzw);
+}
+
+static void gen_ctz(TCGv ret, TCGv arg1)
+{
+    tcg_gen_ctzi_tl(ret, arg1, TARGET_LONG_BITS);
+}
+
+static void gen_ctzw(TCGv ret, TCGv arg1)
+{
+    tcg_gen_ctzi_tl(ret, arg1, 32);
+}
+
+static bool trans_ctz(DisasContext *ctx, arg_ctz *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary_per_ol(ctx, a, EXT_ZERO, gen_ctz, gen_ctzw);
+}
+
+static bool trans_cpop(DisasContext *ctx, arg_cpop *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_ZERO, tcg_gen_ctpop_tl);
+}
+
+static bool trans_andn(DisasContext *ctx, arg_andn *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_andc_tl);
+}
+
+static bool trans_orn(DisasContext *ctx, arg_orn *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_orc_tl);
+}
+
+static bool trans_xnor(DisasContext *ctx, arg_xnor *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_eqv_tl);
+}
+
+static bool trans_min(DisasContext *ctx, arg_min *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smin_tl);
+}
+
+static bool trans_max(DisasContext *ctx, arg_max *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_smax_tl);
+}
+
+static bool trans_minu(DisasContext *ctx, arg_minu *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umin_tl);
+}
+
+static bool trans_maxu(DisasContext *ctx, arg_maxu *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_arith(ctx, a, EXT_SIGN, tcg_gen_umax_tl);
+}
+
+static bool trans_sext_b(DisasContext *ctx, arg_sext_b *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext8s_tl);
+}
+
+static bool trans_sext_h(DisasContext *ctx, arg_sext_h *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16s_tl);
+}
+
+static void gen_sbop_mask(TCGv ret, TCGv shamt)
+{
+    tcg_gen_movi_tl(ret, 1);
+    tcg_gen_shl_tl(ret, ret, shamt);
+}
+
+static void gen_bset(TCGv ret, TCGv arg1, TCGv shamt)
+{
+    TCGv t = tcg_temp_new();
+
+    gen_sbop_mask(t, shamt);
+    tcg_gen_or_tl(ret, arg1, t);
+
+    tcg_temp_free(t);
+}
+
+static bool trans_bset(DisasContext *ctx, arg_bset *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift(ctx, a, EXT_NONE, gen_bset);
+}
+
+static bool trans_bseti(DisasContext *ctx, arg_bseti *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift_imm_tl(ctx, a, EXT_NONE, gen_bset);
+}
+
+static void gen_bclr(TCGv ret, TCGv arg1, TCGv shamt)
+{
+    TCGv t = tcg_temp_new();
+
+    gen_sbop_mask(t, shamt);
+    tcg_gen_andc_tl(ret, arg1, t);
+
+    tcg_temp_free(t);
+}
+
+static bool trans_bclr(DisasContext *ctx, arg_bclr *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift(ctx, a, EXT_NONE, gen_bclr);
+}
+
+static bool trans_bclri(DisasContext *ctx, arg_bclri *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift_imm_tl(ctx, a, EXT_NONE, gen_bclr);
+}
+
+static void gen_binv(TCGv ret, TCGv arg1, TCGv shamt)
+{
+    TCGv t = tcg_temp_new();
+
+    gen_sbop_mask(t, shamt);
+    tcg_gen_xor_tl(ret, arg1, t);
+
+    tcg_temp_free(t);
+}
+
+static bool trans_binv(DisasContext *ctx, arg_binv *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift(ctx, a, EXT_NONE, gen_binv);
+}
+
+static bool trans_binvi(DisasContext *ctx, arg_binvi *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift_imm_tl(ctx, a, EXT_NONE, gen_binv);
+}
+
+static void gen_bext(TCGv ret, TCGv arg1, TCGv shamt)
+{
+    tcg_gen_shr_tl(ret, arg1, shamt);
+    tcg_gen_andi_tl(ret, ret, 1);
+}
+
+static bool trans_bext(DisasContext *ctx, arg_bext *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift(ctx, a, EXT_NONE, gen_bext);
+}
+
+static bool trans_bexti(DisasContext *ctx, arg_bexti *a)
+{
+    REQUIRE_ZBS(ctx);
+    return gen_shift_imm_tl(ctx, a, EXT_NONE, gen_bext);
+}
+
+static void gen_rorw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    /* truncate to 32-bits */
+    tcg_gen_trunc_tl_i32(t1, arg1);
+    tcg_gen_trunc_tl_i32(t2, arg2);
+
+    tcg_gen_rotr_i32(t1, t1, t2);
+
+    /* sign-extend 64-bits */
+    tcg_gen_ext_i32_tl(ret, t1);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
+static bool trans_ror(DisasContext *ctx, arg_ror *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_shift_per_ol(ctx, a, EXT_NONE, tcg_gen_rotr_tl, gen_rorw);
+}
+
+static void gen_roriw(TCGv ret, TCGv arg1, target_long shamt)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t1, arg1);
+    tcg_gen_rotri_i32(t1, t1, shamt);
+    tcg_gen_ext_i32_tl(ret, t1);
+
+    tcg_temp_free_i32(t1);
+}
+
+static bool trans_rori(DisasContext *ctx, arg_rori *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_shift_imm_fn_per_ol(ctx, a, EXT_NONE,
+                                   tcg_gen_rotri_tl, gen_roriw);
+}
+
+static void gen_rolw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    /* truncate to 32-bits */
+    tcg_gen_trunc_tl_i32(t1, arg1);
+    tcg_gen_trunc_tl_i32(t2, arg2);
+
+    tcg_gen_rotl_i32(t1, t1, t2);
+
+    /* sign-extend 64-bits */
+    tcg_gen_ext_i32_tl(ret, t1);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
+static bool trans_rol(DisasContext *ctx, arg_rol *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_shift_per_ol(ctx, a, EXT_NONE, tcg_gen_rotl_tl, gen_rolw);
+}
+
+static void gen_rev8_32(TCGv ret, TCGv src1)
+{
+    tcg_gen_bswap32_tl(ret, src1, TCG_BSWAP_OS);
+}
+
+static bool trans_rev8_32(DisasContext *ctx, arg_rev8_32 *a)
+{
+    REQUIRE_32BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, gen_rev8_32);
+}
+
+static bool trans_rev8_64(DisasContext *ctx, arg_rev8_64 *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, tcg_gen_bswap_tl);
+}
+
+static void gen_orc_b(TCGv ret, TCGv source1)
+{
+    TCGv  tmp = tcg_temp_new();
+    TCGv  low7 = tcg_constant_tl(dup_const_tl(MO_8, 0x7f));
+
+    /* Set msb in each byte if the byte was non-zero. */
+    tcg_gen_and_tl(tmp, source1, low7);
+    tcg_gen_add_tl(tmp, tmp, low7);
+    tcg_gen_or_tl(tmp, tmp, source1);
+
+    /* Extract the msb to the lsb in each byte */
+    tcg_gen_andc_tl(tmp, tmp, low7);
+    tcg_gen_shri_tl(tmp, tmp, 7);
+
+    /* Replicate the lsb of each byte across the byte. */
+    tcg_gen_muli_tl(ret, tmp, 0xff);
+
+    tcg_temp_free(tmp);
+}
+
+static bool trans_orc_b(DisasContext *ctx, arg_orc_b *a)
+{
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_ZERO, gen_orc_b);
+}
+
+#define GEN_SHADD(SHAMT)                                       \
+static void gen_sh##SHAMT##add(TCGv ret, TCGv arg1, TCGv arg2) \
+{                                                              \
+    TCGv t = tcg_temp_new();                                   \
+                                                               \
+    tcg_gen_shli_tl(t, arg1, SHAMT);                           \
+    tcg_gen_add_tl(ret, t, arg2);                              \
+                                                               \
+    tcg_temp_free(t);                                          \
+}
+
+GEN_SHADD(1)
+GEN_SHADD(2)
+GEN_SHADD(3)
+
+#define GEN_TRANS_SHADD(SHAMT)                                             \
+static bool trans_sh##SHAMT##add(DisasContext *ctx, arg_sh##SHAMT##add *a) \
+{                                                                          \
+    REQUIRE_ZBA(ctx);                                                      \
+    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add);                \
+}
+
+GEN_TRANS_SHADD(1)
+GEN_TRANS_SHADD(2)
+GEN_TRANS_SHADD(3)
+
+static bool trans_zext_h_32(DisasContext *ctx, arg_zext_h_32 *a)
+{
+    REQUIRE_32BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16u_tl);
+}
+
+static bool trans_zext_h_64(DisasContext *ctx, arg_zext_h_64 *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16u_tl);
+}
+
+static bool trans_clzw(DisasContext *ctx, arg_clzw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_NONE, gen_clzw);
+}
+
+static bool trans_ctzw(DisasContext *ctx, arg_ctzw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    return gen_unary(ctx, a, EXT_ZERO, gen_ctzw);
+}
+
+static bool trans_cpopw(DisasContext *ctx, arg_cpopw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_unary(ctx, a, EXT_ZERO, tcg_gen_ctpop_tl);
+}
+
+static bool trans_rorw(DisasContext *ctx, arg_rorw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift(ctx, a, EXT_NONE, gen_rorw);
+}
+
+static bool trans_roriw(DisasContext *ctx, arg_roriw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, gen_roriw);
+}
+
+static bool trans_rolw(DisasContext *ctx, arg_rolw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBB(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift(ctx, a, EXT_NONE, gen_rolw);
+}
+
+#define GEN_SHADD_UW(SHAMT)                                       \
+static void gen_sh##SHAMT##add_uw(TCGv ret, TCGv arg1, TCGv arg2) \
+{                                                                 \
+    TCGv t = tcg_temp_new();                                      \
+                                                                  \
+    tcg_gen_ext32u_tl(t, arg1);                                   \
+                                                                  \
+    tcg_gen_shli_tl(t, t, SHAMT);                                 \
+    tcg_gen_add_tl(ret, t, arg2);                                 \
+                                                                  \
+    tcg_temp_free(t);                                             \
+}
+
+GEN_SHADD_UW(1)
+GEN_SHADD_UW(2)
+GEN_SHADD_UW(3)
+
+#define GEN_TRANS_SHADD_UW(SHAMT)                             \
+static bool trans_sh##SHAMT##add_uw(DisasContext *ctx,        \
+                                    arg_sh##SHAMT##add_uw *a) \
+{                                                             \
+    REQUIRE_64BIT(ctx);                                       \
+    REQUIRE_ZBA(ctx);                                         \
+    return gen_arith(ctx, a, EXT_NONE, gen_sh##SHAMT##add_uw);  \
+}
+
+GEN_TRANS_SHADD_UW(1)
+GEN_TRANS_SHADD_UW(2)
+GEN_TRANS_SHADD_UW(3)
+
+static void gen_add_uw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    TCGv t = tcg_temp_new();
+    tcg_gen_ext32u_tl(t, arg1);
+    tcg_gen_add_tl(ret, t, arg2);
+    tcg_temp_free(t);
+}
+
+static bool trans_add_uw(DisasContext *ctx, arg_add_uw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBA(ctx);
+    return gen_arith(ctx, a, EXT_NONE, gen_add_uw);
+}
+
+static void gen_slli_uw(TCGv dest, TCGv src, target_long shamt)
+{
+    tcg_gen_deposit_z_tl(dest, src, shamt, MIN(32, TARGET_LONG_BITS - shamt));
+}
+
+static bool trans_slli_uw(DisasContext *ctx, arg_slli_uw *a)
+{
+    REQUIRE_64BIT(ctx);
+    REQUIRE_ZBA(ctx);
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, gen_slli_uw);
+}
+
+static bool trans_clmul(DisasContext *ctx, arg_clmul *a)
+{
+    REQUIRE_ZBC(ctx);
+    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmul);
+}
+
+static void gen_clmulh(TCGv dst, TCGv src1, TCGv src2)
+{
+     gen_helper_clmulr(dst, src1, src2);
+     tcg_gen_shri_tl(dst, dst, 1);
+}
+
+static bool trans_clmulh(DisasContext *ctx, arg_clmulr *a)
+{
+    REQUIRE_ZBC(ctx);
+    return gen_arith(ctx, a, EXT_NONE, gen_clmulh);
+}
+
+static bool trans_clmulr(DisasContext *ctx, arg_clmulh *a)
+{
+    REQUIRE_ZBC(ctx);
+    return gen_arith(ctx, a, EXT_NONE, gen_helper_clmulr);
+}
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc b/target/riscv/insn_trans/trans_rvd.c.inc
index 6535cc4381b..4060be405d1 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -20,29 +20,34 @@
 
 static bool trans_fld(DisasContext *ctx, arg_fld *a)
 {
+    TCGv_cap_checked_ptr addr;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
     const MemOp mop = MO_TEQ;
-    TCGv_cap_checked_ptr t0 =
-        get_capmode_dependent_load_addr(ctx, a->rs1, a->imm, mop);
-    tcg_gen_qemu_ld_i64_with_checked_addr(cpu_fpr[a->rd], t0, ctx->mem_idx,
+    addr = get_capmode_dependent_load_addr(ctx, a->rs1, a->imm, mop);
+
+    tcg_gen_qemu_ld_i64_with_checked_addr(cpu_fpr[a->rd], addr, ctx->mem_idx,
                                           mop);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free_cap_checked(t0);
+    tcg_temp_free_cap_checked(addr);
     return true;
 }
 
 static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
 {
+    TCGv_cap_checked_ptr addr;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
     const MemOp mop = MO_TEQ;
-    TCGv_cap_checked_ptr t0 =
-        get_capmode_dependent_store_addr(ctx, a->rs1, a->imm, mop);
-    tcg_gen_qemu_st_i64_with_checked_addr(cpu_fpr[a->rs2], t0, ctx->mem_idx,
+    addr = get_capmode_dependent_store_addr(ctx, a->rs1, a->imm, mop);
+
+    tcg_gen_qemu_st_i64_with_checked_addr(cpu_fpr[a->rs2], addr, ctx->mem_idx,
                                           mop);
-    tcg_temp_free_cap_checked(t0);
+    tcg_temp_free_cap_checked(addr);
+
     return true;
 }
 
@@ -251,11 +256,10 @@ static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_helper_feq_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_helper_feq_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -264,11 +268,10 @@ static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_helper_flt_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_helper_flt_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -277,11 +280,10 @@ static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_helper_fle_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_helper_fle_d(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -290,10 +292,10 @@ static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_helper_fclass_d(t0, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
+
+    gen_helper_fclass_d(dest, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -302,12 +304,11 @@ static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_w_d(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_w_d(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -316,12 +317,11 @@ static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_wu_d(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_wu_d(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -330,12 +330,10 @@ static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_d_w(cpu_fpr[a->rd], cpu_env, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_d_w(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
     return true;
@@ -346,95 +344,98 @@ static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_d_wu(cpu_fpr[a->rd], cpu_env, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_d_wu(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
     return true;
 }
 
-#ifdef TARGET_RISCV64
-
 static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_l_d(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_l_d(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
 static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_lu_d(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_lu_d(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
 static bool trans_fmv_x_d(DisasContext *ctx, arg_fmv_x_d *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    gen_set_gpr(a->rd, cpu_fpr[a->rs1]);
+#ifdef TARGET_RISCV64
+    gen_set_gpr(ctx, a->rd, cpu_fpr[a->rs1]);
     return true;
+#else
+    qemu_build_not_reached();
+#endif
 }
 
 static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_d_l(cpu_fpr[a->rd], cpu_env, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_d_l(cpu_fpr[a->rd], cpu_env, src);
+
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_d_lu(cpu_fpr[a->rd], cpu_env, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_d_lu(cpu_fpr[a->rd], cpu_env, src);
+
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fmv_d_x(DisasContext *ctx, arg_fmv_d_x *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVD);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
-
-    tcg_gen_mov_tl(cpu_fpr[a->rd], t0);
-    tcg_temp_free(t0);
+#ifdef TARGET_RISCV64
+    tcg_gen_mov_tl(cpu_fpr[a->rd], get_gpr(ctx, a->rs1, EXT_NONE));
     mark_fs_dirty(ctx);
     return true;
-}
+#else
+    qemu_build_not_reached();
 #endif
+}
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc
index bdc3a24c504..61a25f51583 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -25,30 +25,35 @@
 
 static bool trans_flw(DisasContext *ctx, arg_flw *a)
 {
+    TCGv_i64 dest;
+    TCGv_cap_checked_ptr addr;
+    const MemOp mop = MO_TEUL;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
-    const MemOp mop = MO_TEUL;
-    TCGv_cap_checked_ptr t0 =
-        get_capmode_dependent_load_addr(ctx, a->rs1, a->imm, mop);
-    tcg_gen_qemu_ld_i64_with_checked_addr(cpu_fpr[a->rd], t0, ctx->mem_idx,
-                                          mop);
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+    addr = get_capmode_dependent_load_addr(ctx, a->rs1, a->imm, mop);
+    dest = cpu_fpr[a->rd];
+    tcg_gen_qemu_ld_i64_with_checked_addr(dest, addr, ctx->mem_idx, mop);
+    gen_nanbox_s(dest, dest);
 
-    tcg_temp_free_cap_checked(t0);
+    tcg_temp_free_cap_checked(addr);
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
 {
+    TCGv_cap_checked_ptr addr;
+    const MemOp mop = MO_TEUL;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
-    const MemOp mop = MO_TEUL;
-    TCGv_cap_checked_ptr t0 =
-        get_capmode_dependent_store_addr(ctx, a->rs1, a->imm, mop);
-    tcg_gen_qemu_st_i64_with_checked_addr(cpu_fpr[a->rs2], t0, ctx->mem_idx,
+    addr = get_capmode_dependent_store_addr(ctx, a->rs1, a->imm, mop);
+    tcg_gen_qemu_st_i64_with_checked_addr(cpu_fpr[a->rs2], addr, ctx->mem_idx,
                                           mop);
-    tcg_temp_free_cap_checked(t0);
+
+    tcg_temp_free_cap_checked(addr);
+
     return true;
 }
 
@@ -198,12 +203,11 @@ static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
          * Replace bit 31 in rs1 with inverse in rs2.
          * This formulation retains the nanboxing of rs1.
          */
-        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
+        mask = tcg_constant_i64(~MAKE_64BIT_MASK(31, 1));
         tcg_gen_nor_i64(rs2, rs2, mask);
         tcg_gen_and_i64(rs1, mask, rs1);
         tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
 
-        tcg_temp_free_i64(mask);
         tcg_temp_free_i64(rs2);
     }
     tcg_temp_free_i64(rs1);
@@ -270,12 +274,11 @@ static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_w_s(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_w_s(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -284,12 +287,11 @@ static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_wu_s(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_wu_s(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -299,17 +301,15 @@ static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
 
 #if defined(TARGET_RISCV64)
-    tcg_gen_ext32s_tl(t0, cpu_fpr[a->rs1]);
+    tcg_gen_ext32s_tl(dest, cpu_fpr[a->rs1]);
 #else
-    tcg_gen_extrl_i64_i32(t0, cpu_fpr[a->rs1]);
+    tcg_gen_extrl_i64_i32(dest, cpu_fpr[a->rs1]);
 #endif
 
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
-
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -317,10 +317,11 @@ static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a)
 {
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
-    TCGv t0 = tcg_temp_new();
-    gen_helper_feq_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+
+    gen_helper_feq_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -328,10 +329,11 @@ static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a)
 {
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
-    TCGv t0 = tcg_temp_new();
-    gen_helper_flt_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+
+    gen_helper_flt_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -339,10 +341,11 @@ static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a)
 {
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
-    TCGv t0 = tcg_temp_new();
-    gen_helper_fle_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+
+    TCGv dest = dest_gpr(ctx, a->rd);
+
+    gen_helper_fle_s(dest, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -351,13 +354,10 @@ static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-
-    gen_helper_fclass_s(t0, cpu_fpr[a->rs1]);
-
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    TCGv dest = dest_gpr(ctx, a->rd);
 
+    gen_helper_fclass_s(dest, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -366,15 +366,12 @@ static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, t0);
+    gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free(t0);
-
     return true;
 }
 
@@ -383,15 +380,12 @@ static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, t0);
+    gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free(t0);
-
     return true;
 }
 
@@ -401,74 +395,69 @@ static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a)
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
 
-    tcg_gen_extu_tl_i64(cpu_fpr[a->rd], t0);
+    tcg_gen_extu_tl_i64(cpu_fpr[a->rd], src);
     gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free(t0);
-
     return true;
 }
 
-#ifdef TARGET_RISCV64
 static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_l_s(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_l_s(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
 static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_lu_s(t0, cpu_env, cpu_fpr[a->rs1]);
-    gen_set_gpr(a->rd, t0);
-    tcg_temp_free(t0);
+    gen_helper_fcvt_lu_s(dest, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
 static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_SIGN);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, t0);
+    gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free(t0);
     return true;
 }
 
 static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
 
-    TCGv t0 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
+    TCGv src = get_gpr(ctx, a->rs1, EXT_ZERO);
 
     gen_set_rm(ctx, a->rm);
-    gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, t0);
+    gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, src);
 
     mark_fs_dirty(ctx);
-    tcg_temp_free(t0);
     return true;
 }
-#endif
diff --git a/target/riscv/insn_trans/trans_rvh.c.inc b/target/riscv/insn_trans/trans_rvh.c.inc
index c1048ce9f3c..cc56882f635 100644
--- a/target/riscv/insn_trans/trans_rvh.c.inc
+++ b/target/riscv/insn_trans/trans_rvh.c.inc
@@ -17,128 +17,145 @@
  */
 
 #ifndef CONFIG_USER_ONLY
-static void check_access(DisasContext *ctx) {
+static bool check_access(DisasContext *ctx)
+{
     if (!ctx->hlsx) {
         if (ctx->virt_enabled) {
             generate_exception(ctx, RISCV_EXCP_VIRT_INSTRUCTION_FAULT);
         } else {
             generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST);
         }
+        return false;
     }
+    return true;
 }
 #endif
 
-static bool gen_hlv(DisasContext *ctx, int rs1, int rd, MemOp memop)
+static bool do_hlv(DisasContext *ctx, arg_r2 *a, MemOp mop)
 {
-    REQUIRE_EXT(ctx, RVH);
-#ifndef CONFIG_USER_ONLY
-    check_access(ctx);
-#ifdef TARGET_CHERI
-    if (ctx->capmode)
-        return gen_cap_load_mem_idx(ctx, rd, rs1, 0,
-            ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK, memop);
-#endif
-
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_get_gpr(t0, rs1);
-
-    gen_ddc_interposed_ld_tl(ctx, t1, /* Update addr in-place */ NULL, t0,
-        ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK, memop);
-    gen_set_gpr(rd, t1);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    return true;
-#else
+#ifdef CONFIG_USER_ONLY
     return false;
-#endif
-}
-
-
-static bool gen_hsv(DisasContext *ctx, int rs1, int rs2, MemOp memop)
-{
-    REQUIRE_EXT(ctx, RVH);
-#ifndef CONFIG_USER_ONLY
-    check_access(ctx);
+#else
+    if (check_access(ctx)) {
+        int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
 #ifdef TARGET_CHERI
-    if (ctx->capmode)
-        return gen_cap_store_mem_idx(ctx, rs1, rs2, 0,
-            ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK, memop);
+        if (ctx->capmode)
+            return gen_cap_load_mem_idx(ctx, a->rd, a->rs1, 0,
+                mem_idx, mop);
 #endif
 
-    TCGv t0 = tcg_temp_new();
-    TCGv dat = tcg_temp_new();
-
-    gen_get_gpr(t0, rs1);
-    gen_get_gpr(dat, rs2);
-
-    gen_ddc_interposed_st_tl(ctx, dat, /* Update addr in-place */ NULL, t0,
-        ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK, memop);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(dat);
+        TCGv dest = dest_gpr(ctx, a->rd);
+        TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
+        gen_ddc_interposed_ld_tl(ctx, dest, /* Update addr in-place */ NULL,
+            addr, mem_idx, mop);
+        gen_set_gpr(ctx, a->rd, dest);
+    }
     return true;
-#else
-    return false;
 #endif
 }
 
 static bool trans_hlv_b(DisasContext *ctx, arg_hlv_b *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_SB);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_SB);
 }
 
 static bool trans_hlv_h(DisasContext *ctx, arg_hlv_h *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_TESW);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_TESW);
 }
 
 static bool trans_hlv_w(DisasContext *ctx, arg_hlv_w *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_TESL);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_TESL);
 }
 
 static bool trans_hlv_bu(DisasContext *ctx, arg_hlv_bu *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_UB);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_UB);
 }
 
 static bool trans_hlv_hu(DisasContext *ctx, arg_hlv_hu *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_TEUW);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_TEUW);
+}
+
+static bool do_hsv(DisasContext *ctx, arg_r2_s *a, MemOp mop)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;
+#else
+    if (check_access(ctx)) {
+        int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+#ifdef TARGET_CHERI
+        if (ctx->capmode)
+            return gen_cap_store_mem_idx(ctx, a->rs1, a->rs2, 0, mem_idx, mop);
+#endif
+        
+        TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
+        TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
+
+        gen_ddc_interposed_st_tl(ctx, data, /* Update addr in-place */ NULL,
+            addr, mem_idx, mop);
+    }
+    return true;
+#endif
 }
 
 static bool trans_hsv_b(DisasContext *ctx, arg_hsv_b *a)
 {
-    return gen_hsv(ctx, a->rs1, a->rs2, MO_SB);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hsv(ctx, a, MO_SB);
 }
 
 static bool trans_hsv_h(DisasContext *ctx, arg_hsv_h *a)
 {
-    return gen_hsv(ctx, a->rs1, a->rs2, MO_TESW);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hsv(ctx, a, MO_TESW);
 }
 
 static bool trans_hsv_w(DisasContext *ctx, arg_hsv_w *a)
 {
-    return gen_hsv(ctx, a->rs1, a->rs2, MO_TESL);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hsv(ctx, a, MO_TESL);
 }
 
-#ifdef TARGET_RISCV64
 static bool trans_hlv_wu(DisasContext *ctx, arg_hlv_wu *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_TEUL);
+    REQUIRE_64BIT(ctx);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_TEUL);
 }
 
 static bool trans_hlv_d(DisasContext *ctx, arg_hlv_d *a)
 {
-    return gen_hlv(ctx, a->rs1, a->rd, MO_TEQ);
+    REQUIRE_64BIT(ctx);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hlv(ctx, a, MO_TEQ);
 }
 
 static bool trans_hsv_d(DisasContext *ctx, arg_hsv_d *a)
 {
-    return gen_hsv(ctx, a->rs1, a->rs2, MO_TEQ);
+    REQUIRE_64BIT(ctx);
+    REQUIRE_EXT(ctx, RVH);
+    return do_hsv(ctx, a, MO_TEQ);
+}
+
+#ifndef CONFIG_USER_ONLY
+static bool do_hlvx(DisasContext *ctx, arg_r2 *a,
+                    void (*func)(TCGv, TCGv_env, TCGv))
+{
+    if (check_access(ctx)) {
+        TCGv dest = dest_gpr(ctx, a->rd);
+        TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
+        func(dest, cpu_env, addr);
+        gen_set_gpr(ctx, a->rd, dest);
+    }
+    return true;
 }
 #endif
 
@@ -146,19 +163,7 @@ static bool trans_hlvx_hu(DisasContext *ctx, arg_hlvx_hu *a)
 {
     REQUIRE_EXT(ctx, RVH);
 #ifndef CONFIG_USER_ONLY
-    check_access(ctx);
-
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_get_gpr(t0, a->rs1);
-
-    gen_helper_hyp_hlvx_hu(t1, cpu_env, t0);
-    gen_set_gpr(a->rd, t1);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    return true;
+    return do_hlvx(ctx, a, gen_helper_hyp_hlvx_hu);
 #else
     return false;
 #endif
@@ -168,19 +173,7 @@ static bool trans_hlvx_wu(DisasContext *ctx, arg_hlvx_wu *a)
 {
     REQUIRE_EXT(ctx, RVH);
 #ifndef CONFIG_USER_ONLY
-    check_access(ctx);
-
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-
-    gen_get_gpr(t0, a->rs1);
-
-    gen_helper_hyp_hlvx_wu(t1, cpu_env, t0);
-    gen_set_gpr(a->rd, t1);
-
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-    return true;
+    return do_hlvx(ctx, a, gen_helper_hyp_hlvx_wu);
 #else
     return false;
 #endif
diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc
index 1803ec83682..9c1838b25bc 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -24,9 +24,15 @@ static bool trans_illegal(DisasContext *ctx, arg_empty *a)
     return true;
 }
 
+static bool trans_c64_illegal(DisasContext *ctx, arg_empty *a)
+{
+     REQUIRE_64BIT(ctx);
+     return trans_illegal(ctx, a);
+}
+
 static bool trans_lui(DisasContext *ctx, arg_lui *a)
 {
-    gen_set_gpr_const(a->rd, a->imm);
+    gen_set_gpr_const(ctx, a->rd, a->imm);
     return true;
 }
 
@@ -46,7 +52,7 @@ static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
      * AUIPC returns a value relative to PCC.base for ISAv8 but the address for
      * v9. This is handled by the pcc_reloc() macro.
      */
-    gen_set_gpr_const(a->rd, a->imm + ctx->base.pc_next - pcc_reloc(ctx));
+    gen_set_gpr_const(ctx, a->rd, a->imm + ctx->base.pc_next - pcc_reloc(ctx));
     return true;
 }
 
@@ -77,15 +83,13 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
 static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
 {
     TCGLabel *l = gen_new_label();
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_SIGN);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_SIGN);
 
-    tcg_gen_brcond_tl(cond, source1, source2, l);
+    tcg_gen_brcond_tl(cond, src1, src2, l);
     /* Branch not taken, CHERI PCC bounds check done on next ifetch. */
     gen_goto_tb(ctx, 1, ctx->pc_succ_insn, /*bounds_check=*/false);
+
     gen_set_label(l); /* branch taken */
 
     if (!has_ext(ctx, RVC) && ((ctx->base.pc_next + a->imm) & 0x3)) {
@@ -97,9 +101,6 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
     }
     ctx->base.is_jmp = DISAS_NORETURN;
 
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-
     return true;
 }
 
@@ -141,16 +142,19 @@ static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop)
         return gen_cap_load(ctx, a->rd, a->rs1, a->imm, memop);
     }
 #endif
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
-    tcg_gen_addi_tl(t0, t0, a->imm);
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
+
+    if (a->imm) {
+        TCGv temp = temp_new(ctx);
+        tcg_gen_addi_tl(temp, addr, a->imm);
+        addr = temp;
+    }
+    addr = gen_pm_adjust_address(ctx, addr);
 
-    gen_ddc_interposed_ld_tl(ctx, t1, /* Update addr in-place */ NULL, t0,
+    gen_ddc_interposed_ld_tl(ctx, dest, /* Update addr in-place */ NULL, addr,
                              ctx->mem_idx, memop);
-    gen_set_gpr(a->rd, t1);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
@@ -187,20 +191,22 @@ static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp memop)
         return gen_cap_store(ctx, a->rs1, a->rs2, a->imm, memop);
     }
 #endif
-    TCGv t0 = tcg_temp_new();
-    TCGv dat = tcg_temp_new();
-    gen_get_gpr(t0, a->rs1);
-    tcg_gen_addi_tl(t0, t0, a->imm);
-    gen_get_gpr(dat, a->rs2);
 
-    gen_ddc_interposed_st_tl(ctx, dat, /* Update addr in-place */ NULL, t0,
+    TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
+    TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
+
+    if (a->imm) {
+        TCGv temp = temp_new(ctx);
+        tcg_gen_addi_tl(temp, addr, a->imm);
+        addr = temp;
+    }
+    addr = gen_pm_adjust_address(ctx, addr);
+
+    gen_ddc_interposed_st_tl(ctx, data, /* Update addr in-place */ NULL, addr,
                              ctx->mem_idx, memop);
-    tcg_temp_free(t0);
-    tcg_temp_free(dat);
     return true;
 }
 
-
 static bool trans_sb(DisasContext *ctx, arg_sb *a)
 {
     return gen_store(ctx, a, MO_SB);
@@ -216,26 +222,27 @@ static bool trans_sw(DisasContext *ctx, arg_sw *a)
     return gen_store(ctx, a, MO_TESL);
 }
 
-#ifdef TARGET_RISCV64
 static bool trans_lwu(DisasContext *ctx, arg_lwu *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_load(ctx, a, MO_TEUL);
 }
 
 static bool trans_ld(DisasContext *ctx, arg_ld *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_load(ctx, a, MO_TEQ);
 }
 
 static bool trans_sd(DisasContext *ctx, arg_sd *a)
 {
+    REQUIRE_64BIT(ctx);
     return gen_store(ctx, a, MO_TEQ);
 }
-#endif
 
 static bool trans_addi(DisasContext *ctx, arg_addi *a)
 {
-    return gen_arith_imm_fn(ctx, a, &tcg_gen_addi_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl);
 }
 
 static void gen_slt(TCGv ret, TCGv s1, TCGv s2)
@@ -248,7 +255,6 @@ static void gen_sltu(TCGv ret, TCGv s1, TCGv s2)
     tcg_gen_setcond_tl(TCG_COND_LTU, ret, s1, s2);
 }
 
-
 static bool trans_slti(DisasContext *ctx, arg_slti *a)
 {
 #ifdef CONFIG_TCG_LOG_INSTR
@@ -292,246 +298,173 @@ static bool trans_slti(DisasContext *ctx, arg_slti *a)
 
         if (ctx->base.is_jmp != DISAS_NEXT) {
             gen_update_cpu_pc(ctx->pc_succ_insn);
-            exit_tb(ctx);
+            tcg_gen_exit_tb(NULL, 0);
             return true;
         }
     }
 #endif
-    return gen_arith_imm_tl(ctx, a, &gen_slt);
+    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_slt);
 }
 
 static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
 {
-    return gen_arith_imm_tl(ctx, a, &gen_sltu);
+    return gen_arith_imm_tl(ctx, a, EXT_SIGN, gen_sltu);
 }
 
 static bool trans_xori(DisasContext *ctx, arg_xori *a)
 {
-    return gen_arith_imm_fn(ctx, a, &tcg_gen_xori_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_xori_tl);
 }
+
 static bool trans_ori(DisasContext *ctx, arg_ori *a)
 {
-    return gen_arith_imm_fn(ctx, a, &tcg_gen_ori_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_ori_tl);
 }
+
 static bool trans_andi(DisasContext *ctx, arg_andi *a)
 {
-    return gen_arith_imm_fn(ctx, a, &tcg_gen_andi_tl);
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_andi_tl);
 }
+
 static bool trans_slli(DisasContext *ctx, arg_slli *a)
 {
-    if (a->shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    if (a->rd != 0) {
-        TCGv t = tcg_temp_new();
-        gen_get_gpr(t, a->rs1);
-
-        tcg_gen_shli_tl(t, t, a->shamt);
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, tcg_gen_shli_tl);
+}
 
-        gen_set_gpr(a->rd, t);
-        tcg_temp_free(t);
-    } /* NOP otherwise */
-    return true;
+static void gen_srliw(TCGv dst, TCGv src, target_long shamt)
+{
+    tcg_gen_extract_tl(dst, src, shamt, 32 - shamt);
 }
 
 static bool trans_srli(DisasContext *ctx, arg_srli *a)
 {
-    if (a->shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    if (a->rd != 0) {
-        TCGv t = tcg_temp_new();
-        gen_get_gpr(t, a->rs1);
+    return gen_shift_imm_fn_per_ol(ctx, a, EXT_NONE,
+                                   tcg_gen_shri_tl, gen_srliw);
+}
 
-        tcg_gen_shri_tl(t, t, a->shamt);
-        gen_set_gpr(a->rd, t);
-        tcg_temp_free(t);
-    } /* NOP otherwise */
-    return true;
+static void gen_sraiw(TCGv dst, TCGv src, target_long shamt)
+{
+    tcg_gen_sextract_tl(dst, src, shamt, 32 - shamt);
 }
 
 static bool trans_srai(DisasContext *ctx, arg_srai *a)
 {
-    if (a->shamt >= TARGET_LONG_BITS) {
-        return false;
-    }
-
-    if (a->rd != 0) {
-        TCGv t = tcg_temp_new();
-        gen_get_gpr(t, a->rs1);
-
-        tcg_gen_sari_tl(t, t, a->shamt);
-        gen_set_gpr(a->rd, t);
-        tcg_temp_free(t);
-    } /* NOP otherwise */
-    return true;
+    return gen_shift_imm_fn_per_ol(ctx, a, EXT_NONE,
+                                   tcg_gen_sari_tl, gen_sraiw);
 }
 
 static bool trans_add(DisasContext *ctx, arg_add *a)
 {
-    return gen_arith(ctx, a, &tcg_gen_add_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl);
 }
 
 static bool trans_sub(DisasContext *ctx, arg_sub *a)
 {
-    return gen_arith(ctx, a, &tcg_gen_sub_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl);
 }
 
 static bool trans_sll(DisasContext *ctx, arg_sll *a)
 {
-    return gen_shift(ctx, a, &tcg_gen_shl_tl);
+    return gen_shift(ctx, a, EXT_NONE, tcg_gen_shl_tl);
 }
 
 static bool trans_slt(DisasContext *ctx, arg_slt *a)
 {
-    return gen_arith(ctx, a, &gen_slt);
+    return gen_arith(ctx, a, EXT_SIGN, gen_slt);
 }
 
 static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
 {
-    return gen_arith(ctx, a, &gen_sltu);
+    return gen_arith(ctx, a, EXT_SIGN, gen_sltu);
 }
 
 static bool trans_xor(DisasContext *ctx, arg_xor *a)
 {
-    return gen_arith(ctx, a, &tcg_gen_xor_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_xor_tl);
 }
 
 static bool trans_srl(DisasContext *ctx, arg_srl *a)
 {
-    return gen_shift(ctx, a, &tcg_gen_shr_tl);
+    return gen_shift(ctx, a, EXT_ZERO, tcg_gen_shr_tl);
 }
 
 static bool trans_sra(DisasContext *ctx, arg_sra *a)
 {
-    return gen_shift(ctx, a, &tcg_gen_sar_tl);
+    return gen_shift(ctx, a, EXT_SIGN, tcg_gen_sar_tl);
 }
 
 static bool trans_or(DisasContext *ctx, arg_or *a)
 {
-    return gen_arith(ctx, a, &tcg_gen_or_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_or_tl);
 }
 
 static bool trans_and(DisasContext *ctx, arg_and *a)
 {
-    return gen_arith(ctx, a, &tcg_gen_and_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_and_tl);
 }
 
-#ifdef TARGET_RISCV64
 static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
 {
-    return gen_arith_imm_tl(ctx, a, &gen_addw);
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_arith_imm_fn(ctx, a, EXT_NONE, tcg_gen_addi_tl);
 }
 
 static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
 {
-    TCGv source1;
-    source1 = tcg_temp_new();
-    gen_get_gpr(source1, a->rs1);
-
-    tcg_gen_shli_tl(source1, source1, a->shamt);
-    tcg_gen_ext32s_tl(source1, source1);
-    gen_set_gpr(a->rd, source1);
-
-    tcg_temp_free(source1);
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, tcg_gen_shli_tl);
 }
 
 static bool trans_srliw(DisasContext *ctx, arg_srliw *a)
 {
-    TCGv t = tcg_temp_new();
-    gen_get_gpr(t, a->rs1);
-    tcg_gen_extract_tl(t, t, a->shamt, 32 - a->shamt);
-    /* sign-extend for W instructions */
-    tcg_gen_ext32s_tl(t, t);
-    gen_set_gpr(a->rd, t);
-    tcg_temp_free(t);
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, gen_srliw);
 }
 
 static bool trans_sraiw(DisasContext *ctx, arg_sraiw *a)
 {
-    TCGv t = tcg_temp_new();
-    gen_get_gpr(t, a->rs1);
-    tcg_gen_sextract_tl(t, t, a->shamt, 32 - a->shamt);
-    gen_set_gpr(a->rd, t);
-    tcg_temp_free(t);
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift_imm_fn(ctx, a, EXT_NONE, gen_sraiw);
 }
 
 static bool trans_addw(DisasContext *ctx, arg_addw *a)
 {
-    return gen_arith(ctx, a, &gen_addw);
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_add_tl);
 }
 
 static bool trans_subw(DisasContext *ctx, arg_subw *a)
 {
-    return gen_arith(ctx, a, &gen_subw);
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_sub_tl);
 }
 
 static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
 {
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
-
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
-
-    tcg_gen_andi_tl(source2, source2, 0x1F);
-    tcg_gen_shl_tl(source1, source1, source2);
-
-    tcg_gen_ext32s_tl(source1, source1);
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift(ctx, a, EXT_NONE, tcg_gen_shl_tl);
 }
 
 static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
 {
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
-
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
-
-    /* clear upper 32 */
-    tcg_gen_ext32u_tl(source1, source1);
-    tcg_gen_andi_tl(source2, source2, 0x1F);
-    tcg_gen_shr_tl(source1, source1, source2);
-
-    tcg_gen_ext32s_tl(source1, source1);
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift(ctx, a, EXT_ZERO, tcg_gen_shr_tl);
 }
 
 static bool trans_sraw(DisasContext *ctx, arg_sraw *a)
 {
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
-
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
-
-    /*
-     * first, trick to get it to act like working on 32 bits (get rid of
-     * upper 32, sign extend to fill space)
-     */
-    tcg_gen_ext32s_tl(source1, source1);
-    tcg_gen_andi_tl(source2, source2, 0x1F);
-    tcg_gen_sar_tl(source1, source1, source2);
-
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-
-    return true;
+    REQUIRE_64BIT(ctx);
+    ctx->ol = MXL_RV32;
+    return gen_shift(ctx, a, EXT_SIGN, tcg_gen_sar_tl);
 }
-#endif
 
 static bool trans_fence(DisasContext *ctx, arg_fence *a)
 {
@@ -551,85 +484,155 @@ static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
      * however we need to end the translation block
      */
     gen_update_cpu_pc(ctx->pc_succ_insn);
-    exit_tb(ctx);
+    tcg_gen_exit_tb(NULL, 0);
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+
+static bool do_csr_post(DisasContext *ctx)
+{
+    /* We may have changed important cpu state -- exit to main loop. */
+    gen_update_cpu_pc(ctx->pc_succ_insn);
+    tcg_gen_exit_tb(NULL, 0);
     ctx->base.is_jmp = DISAS_NORETURN;
     return true;
 }
 
-#define RISCV_OP_CSR_PRE do {\
-    source1 = tcg_temp_new(); \
-    csr_store = tcg_temp_new(); \
-    dest = tcg_temp_new(); \
-    rs1_pass = tcg_temp_new(); \
-    gen_get_gpr(source1, a->rs1); \
-    gen_update_cpu_pc(ctx->base.pc_next); \
-    tcg_gen_movi_tl(rs1_pass, a->rs1); \
-    tcg_gen_movi_tl(csr_store, a->csr); \
-    gen_io_start();\
-} while (0)
-
-#define RISCV_OP_CSR_POST do {\
-    gen_set_gpr(a->rd, dest); \
-    gen_update_cpu_pc(ctx->pc_succ_insn); \
-    exit_tb(ctx); \
-    ctx->base.is_jmp = DISAS_NORETURN; \
-    tcg_temp_free(source1); \
-    tcg_temp_free(csr_store); \
-    tcg_temp_free(dest); \
-    tcg_temp_free(rs1_pass); \
-} while (0)
+static bool do_csrr(DisasContext *ctx, int rd, int rc)
+{
+    TCGv dest = dest_gpr(ctx, rd);
+    TCGv_i32 csr = tcg_constant_i32(rc);
 
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    gen_helper_csrr(dest, cpu_env, csr);
+    gen_set_gpr(ctx, rd, dest);
+    return do_csr_post(ctx);
+}
+
+static bool do_csrw(DisasContext *ctx, int rc, TCGv src)
+{
+    TCGv_i32 csr = tcg_constant_i32(rc);
+
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    gen_helper_csrw(cpu_env, csr, src);
+    return do_csr_post(ctx);
+}
+
+static bool do_csrrw(DisasContext *ctx, int rd, int rc, TCGv src, TCGv mask)
+{
+    TCGv dest = dest_gpr(ctx, rd);
+    TCGv_i32 csr = tcg_constant_i32(rc);
+
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    gen_helper_csrrw(dest, cpu_env, csr, src, mask);
+    gen_set_gpr(ctx, rd, dest);
+    return do_csr_post(ctx);
+}
 
 static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrw(dest, cpu_env, source1, csr_store);
-    RISCV_OP_CSR_POST;
-    return true;
+    TCGv src = get_gpr(ctx, a->rs1, EXT_NONE);
+
+    /*
+     * If rd == 0, the insn shall not read the csr, nor cause any of the
+     * side effects that might occur on a csr read.
+     */
+    if (a->rd == 0) {
+        return do_csrw(ctx, a->csr, src);
+    }
+
+    TCGv mask = tcg_constant_tl(-1);
+    return do_csrrw(ctx, a->rd, a->csr, src, mask);
 }
 
 static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrs(dest, cpu_env, source1, csr_store, rs1_pass);
-    RISCV_OP_CSR_POST;
-    return true;
+    /*
+     * If rs1 == 0, the insn shall not write to the csr at all, nor
+     * cause any of the side effects that might occur on a csr write.
+     * Note that if rs1 specifies a register other than x0, holding
+     * a zero value, the instruction will still attempt to write the
+     * unmodified value back to the csr and will cause side effects.
+     */
+    if (a->rs1 == 0) {
+        return do_csrr(ctx, a->rd, a->csr);
+    }
+
+    TCGv ones = tcg_constant_tl(-1);
+    TCGv mask = get_gpr(ctx, a->rs1, EXT_ZERO);
+    return do_csrrw(ctx, a->rd, a->csr, ones, mask);
 }
 
 static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrc(dest, cpu_env, source1, csr_store, rs1_pass);
-    RISCV_OP_CSR_POST;
-    return true;
+    /*
+     * If rs1 == 0, the insn shall not write to the csr at all, nor
+     * cause any of the side effects that might occur on a csr write.
+     * Note that if rs1 specifies a register other than x0, holding
+     * a zero value, the instruction will still attempt to write the
+     * unmodified value back to the csr and will cause side effects.
+     */
+    if (a->rs1 == 0) {
+        return do_csrr(ctx, a->rd, a->csr);
+    }
+
+    TCGv mask = get_gpr(ctx, a->rs1, EXT_ZERO);
+    return do_csrrw(ctx, a->rd, a->csr, ctx->zero, mask);
 }
 
 static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrw(dest, cpu_env, rs1_pass, csr_store);
-    RISCV_OP_CSR_POST;
-    return true;
+    TCGv src = tcg_constant_tl(a->rs1);
+
+    /*
+     * If rd == 0, the insn shall not read the csr, nor cause any of the
+     * side effects that might occur on a csr read.
+     */
+    if (a->rd == 0) {
+        return do_csrw(ctx, a->csr, src);
+    }
+
+    TCGv mask = tcg_constant_tl(-1);
+    return do_csrrw(ctx, a->rd, a->csr, src, mask);
 }
 
 static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrs(dest, cpu_env, rs1_pass, csr_store, rs1_pass);
-    RISCV_OP_CSR_POST;
-    return true;
+    /*
+     * If rs1 == 0, the insn shall not write to the csr at all, nor
+     * cause any of the side effects that might occur on a csr write.
+     * Note that if rs1 specifies a register other than x0, holding
+     * a zero value, the instruction will still attempt to write the
+     * unmodified value back to the csr and will cause side effects.
+     */
+    if (a->rs1 == 0) {
+        return do_csrr(ctx, a->rd, a->csr);
+    }
+
+    TCGv ones = tcg_constant_tl(-1);
+    TCGv mask = tcg_constant_tl(a->rs1);
+    return do_csrrw(ctx, a->rd, a->csr, ones, mask);
 }
 
 static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a)
 {
-    TCGv source1, csr_store, dest, rs1_pass;
-    RISCV_OP_CSR_PRE;
-    gen_helper_csrrc(dest, cpu_env, rs1_pass, csr_store, rs1_pass);
-    RISCV_OP_CSR_POST;
-    return true;
+    /*
+     * If rs1 == 0, the insn shall not write to the csr at all, nor
+     * cause any of the side effects that might occur on a csr write.
+     * Note that if rs1 specifies a register other than x0, holding
+     * a zero value, the instruction will still attempt to write the
+     * unmodified value back to the csr and will cause side effects.
+     */
+    if (a->rs1 == 0) {
+        return do_csrr(ctx, a->rd, a->csr);
+    }
+
+    TCGv mask = tcg_constant_tl(a->rs1);
+    return do_csrrw(ctx, a->rd, a->csr, ctx->zero, mask);
 }
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc
index 47cd6edc72a..2af0e5c1391 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -22,99 +22,250 @@
 static bool trans_mul(DisasContext *ctx, arg_mul *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &tcg_gen_mul_tl);
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl);
+}
+
+static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
+{
+    TCGv discard = tcg_temp_new();
+
+    tcg_gen_muls2_tl(discard, ret, s1, s2);
+    tcg_temp_free(discard);
+}
+
+static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
+{
+    tcg_gen_mul_tl(ret, s1, s2);
+    tcg_gen_sari_tl(ret, ret, 32);
 }
 
 static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
+    return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w);
+}
+
+static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    TCGv rl = tcg_temp_new();
+    TCGv rh = tcg_temp_new();
+
+    tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
+    /* fix up for one negative */
+    tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
+    tcg_gen_and_tl(rl, rl, arg2);
+    tcg_gen_sub_tl(ret, rh, rl);
+
+    tcg_temp_free(rl);
+    tcg_temp_free(rh);
+}
 
-    tcg_gen_muls2_tl(source2, source1, source1, source2);
+static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    TCGv t1 = tcg_temp_new();
+    TCGv t2 = tcg_temp_new();
 
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-    return true;
+    tcg_gen_ext32s_tl(t1, arg1);
+    tcg_gen_ext32u_tl(t2, arg2);
+    tcg_gen_mul_tl(ret, t1, t2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t2);
+    tcg_gen_sari_tl(ret, ret, 32);
 }
 
 static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_mulhsu);
+    return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w);
+}
+
+static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
+{
+    TCGv discard = tcg_temp_new();
+
+    tcg_gen_mulu2_tl(discard, ret, s1, s2);
+    tcg_temp_free(discard);
 }
 
 static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
+    /* gen_mulh_w works for either sign as input. */
+    return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w);
+}
+
+static void gen_div(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv temp1, temp2, zero, one, mone, min;
+
+    temp1 = tcg_temp_new();
+    temp2 = tcg_temp_new();
+    zero = tcg_constant_tl(0);
+    one = tcg_constant_tl(1);
+    mone = tcg_constant_tl(-1);
+    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
+
+    /*
+     * If overflow, set temp2 to 1, else source2.
+     * This produces the required result of min.
+     */
+    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
+    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
+    tcg_gen_and_tl(temp1, temp1, temp2);
+    tcg_gen_movcond_tl(TCG_COND_NE, temp2, temp1, zero, one, source2);
+
+    /*
+     * If div by zero, set temp1 to -1 and temp2 to 1 to
+     * produce the required result of -1.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, mone, source1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, temp2);
 
-    tcg_gen_mulu2_tl(source2, source1, source1, source2);
+    tcg_gen_div_tl(ret, temp1, temp2);
 
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-    return true;
+    tcg_temp_free(temp1);
+    tcg_temp_free(temp2);
 }
 
 static bool trans_div(DisasContext *ctx, arg_div *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_div);
+    return gen_arith(ctx, a, EXT_SIGN, gen_div);
+}
+
+static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv temp1, temp2, zero, one, max;
+
+    temp1 = tcg_temp_new();
+    temp2 = tcg_temp_new();
+    zero = tcg_constant_tl(0);
+    one = tcg_constant_tl(1);
+    max = tcg_constant_tl(~0);
+
+    /*
+     * If div by zero, set temp1 to max and temp2 to 1 to
+     * produce the required result of max.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp1, source2, zero, max, source1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
+    tcg_gen_divu_tl(ret, temp1, temp2);
+
+    tcg_temp_free(temp1);
+    tcg_temp_free(temp2);
 }
 
 static bool trans_divu(DisasContext *ctx, arg_divu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_divu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_divu);
+}
+
+static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv temp1, temp2, zero, one, mone, min;
+
+    temp1 = tcg_temp_new();
+    temp2 = tcg_temp_new();
+    zero = tcg_constant_tl(0);
+    one = tcg_constant_tl(1);
+    mone = tcg_constant_tl(-1);
+    min = tcg_constant_tl(1ull << (TARGET_LONG_BITS - 1));
+
+    /*
+     * If overflow, set temp1 to 0, else source1.
+     * This avoids a possible host trap, and produces the required result of 0.
+     */
+    tcg_gen_setcond_tl(TCG_COND_EQ, temp1, source1, min);
+    tcg_gen_setcond_tl(TCG_COND_EQ, temp2, source2, mone);
+    tcg_gen_and_tl(temp1, temp1, temp2);
+    tcg_gen_movcond_tl(TCG_COND_NE, temp1, temp1, zero, zero, source1);
+
+    /*
+     * If div by zero, set temp2 to 1, else source2.
+     * This avoids a possible host trap, but produces an incorrect result.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp2, source2, zero, one, source2);
+
+    tcg_gen_rem_tl(temp1, temp1, temp2);
+
+    /* If div by zero, the required result is the original dividend. */
+    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp1);
+
+    tcg_temp_free(temp1);
+    tcg_temp_free(temp2);
 }
 
 static bool trans_rem(DisasContext *ctx, arg_rem *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_rem);
+    return gen_arith(ctx, a, EXT_SIGN, gen_rem);
+}
+
+static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv temp, zero, one;
+
+    temp = tcg_temp_new();
+    zero = tcg_constant_tl(0);
+    one = tcg_constant_tl(1);
+
+    /*
+     * If div by zero, set temp to 1, else source2.
+     * This avoids a possible host trap, but produces an incorrect result.
+     */
+    tcg_gen_movcond_tl(TCG_COND_EQ, temp, source2, zero, one, source2);
+
+    tcg_gen_remu_tl(temp, source1, temp);
+
+    /* If div by zero, the required result is the original dividend. */
+    tcg_gen_movcond_tl(TCG_COND_EQ, ret, source2, zero, source1, temp);
+
+    tcg_temp_free(temp);
 }
 
 static bool trans_remu(DisasContext *ctx, arg_remu *a)
 {
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_remu);
+    return gen_arith(ctx, a, EXT_ZERO, gen_remu);
 }
 
-#ifdef TARGET_RISCV64
 static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith(ctx, a, &gen_mulw);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl);
 }
 
 static bool trans_divw(DisasContext *ctx, arg_divw *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_div_w(ctx, a, &gen_div);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_SIGN, gen_div);
 }
 
 static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_div_uw(ctx, a, &gen_divu);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_ZERO, gen_divu);
 }
 
 static bool trans_remw(DisasContext *ctx, arg_remw *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_div_w(ctx, a, &gen_rem);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_SIGN, gen_rem);
 }
 
 static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
 {
+    REQUIRE_64BIT(ctx);
     REQUIRE_EXT(ctx, RVM);
-    return gen_arith_div_uw(ctx, a, &gen_remu);
+    ctx->ol = MXL_RV32;
+    return gen_arith(ctx, a, EXT_ZERO, gen_remu);
 }
-#endif
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 5e4e5a29b0e..382b7206d8d 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -27,27 +27,22 @@ static bool trans_vsetvl(DisasContext *ctx, arg_vsetvl *a)
         return false;
     }
 
-    s2 = tcg_temp_new();
-    dst = tcg_temp_new();
+    s2 = get_gpr(ctx, a->rs2, EXT_ZERO);
+    dst = dest_gpr(ctx, a->rd);
 
     /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
     if (a->rs1 == 0) {
         /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
-        s1 = tcg_const_tl(RV_VLEN_MAX);
+        s1 = tcg_constant_tl(RV_VLEN_MAX);
     } else {
-        s1 = tcg_temp_new();
-        gen_get_gpr(s1, a->rs1);
+        s1 = get_gpr(ctx, a->rs1, EXT_ZERO);
     }
-    gen_get_gpr(s2, a->rs2);
     gen_helper_vsetvl(dst, cpu_env, s1, s2);
-    gen_set_gpr(a->rd, dst);
+    gen_set_gpr(ctx, a->rd, dst);
+
     tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
-    lookup_and_goto_ptr(ctx);
+    tcg_gen_lookup_and_goto_ptr();
     ctx->base.is_jmp = DISAS_NORETURN;
-
-    tcg_temp_free(s1);
-    tcg_temp_free(s2);
-    tcg_temp_free(dst);
     return true;
 }
 
@@ -59,26 +54,22 @@ static bool trans_vsetvli(DisasContext *ctx, arg_vsetvli *a)
         return false;
     }
 
-    s2 = tcg_const_tl(a->zimm);
-    dst = tcg_temp_new();
+    s2 = tcg_constant_tl(a->zimm);
+    dst = dest_gpr(ctx, a->rd);
 
     /* Using x0 as the rs1 register specifier, encodes an infinite AVL */
     if (a->rs1 == 0) {
         /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */
-        s1 = tcg_const_tl(RV_VLEN_MAX);
+        s1 = tcg_constant_tl(RV_VLEN_MAX);
     } else {
-        s1 = tcg_temp_new();
-        gen_get_gpr(s1, a->rs1);
+        s1 = get_gpr(ctx, a->rs1, EXT_ZERO);
     }
     gen_helper_vsetvl(dst, cpu_env, s1, s2);
-    gen_set_gpr(a->rd, dst);
+
+    gen_set_gpr(ctx, a->rd, dst);
     /* CHERI PCC bounds check done on next ifetch. */
     gen_goto_tb(ctx, 0, ctx->pc_succ_insn, /*bounds_check=*/false);
     ctx->base.is_jmp = DISAS_NORETURN;
-
-    tcg_temp_free(s1);
-    tcg_temp_free(s2);
-    tcg_temp_free(dst);
     return true;
 }
 
@@ -175,7 +166,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
 
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
-    base = tcg_temp_new();
+    base = get_gpr(s, rs1, EXT_NONE);
 
     /*
      * As simd_desc supports at most 256 bytes, and in this implementation,
@@ -184,9 +175,8 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
      * The first part is vlen in bytes, encoded in maxsz of simd_desc.
      * The second part is lmul, encoded in data of simd_desc.
      */
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
-    gen_get_gpr(base, rs1);
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
@@ -194,8 +184,6 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
 
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
-    tcg_temp_free(base);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -333,12 +321,10 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
 
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
-    base = tcg_temp_new();
-    stride = tcg_temp_new();
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    base = get_gpr(s, rs1, EXT_NONE);
+    stride = get_gpr(s, rs2, EXT_NONE);
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
-    gen_get_gpr(base, rs1);
-    gen_get_gpr(stride, rs2);
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
@@ -346,9 +332,6 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
 
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
-    tcg_temp_free(base);
-    tcg_temp_free(stride);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -462,10 +445,9 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
     index = tcg_temp_new_ptr();
-    base = tcg_temp_new();
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    base = get_gpr(s, rs1, EXT_NONE);
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
-    gen_get_gpr(base, rs1);
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
@@ -475,8 +457,6 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
     tcg_temp_free_ptr(index);
-    tcg_temp_free(base);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -594,10 +574,9 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
 
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
-    base = tcg_temp_new();
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    base = get_gpr(s, rs1, EXT_NONE);
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
-    gen_get_gpr(base, rs1);
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
@@ -605,8 +584,6 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
 
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
-    tcg_temp_free(base);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -671,10 +648,9 @@ static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
     index = tcg_temp_new_ptr();
-    base = tcg_temp_new();
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    base = get_gpr(s, rs1, EXT_NONE);
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
-    gen_get_gpr(base, rs1);
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(index, cpu_env, vreg_ofs(s, vs2));
     tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
@@ -684,8 +660,6 @@ static bool amo_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
     tcg_temp_free_ptr(index);
-    tcg_temp_free(base);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -706,7 +680,6 @@ static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
         gen_helper_vamominuw_v_w,
         gen_helper_vamomaxuw_v_w
     };
-#ifdef TARGET_RISCV64
     static gen_helper_amo *const fnsd[18] = {
         gen_helper_vamoswapw_v_d,
         gen_helper_vamoaddw_v_d,
@@ -727,24 +700,25 @@ static bool amo_op(DisasContext *s, arg_rwdvm *a, uint8_t seq)
         gen_helper_vamominud_v_d,
         gen_helper_vamomaxud_v_d
     };
-#endif
 
     if (tb_cflags(s->base.tb) & CF_PARALLEL) {
         gen_helper_exit_atomic(cpu_env);
         s->base.is_jmp = DISAS_NORETURN;
         return true;
-    } else {
-        if (s->sew == 3) {
-#ifdef TARGET_RISCV64
-            fn = fnsd[seq];
-#else
-            /* Check done in amo_check(). */
-            g_assert_not_reached();
-#endif
-        } else {
-            assert(seq < ARRAY_SIZE(fnsw));
-            fn = fnsw[seq];
-        }
+    }
+
+    switch (s->sew) {
+    case 0 ... 2:
+        assert(seq < ARRAY_SIZE(fnsw));
+        fn = fnsw[seq];
+        break;
+    case 3:
+        /* XLEN check done in amo_check(). */
+        assert(seq < ARRAY_SIZE(fnsd));
+        fn = fnsd[seq];
+        break;
+    default:
+        g_assert_not_reached();
     }
 
     data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
@@ -770,6 +744,12 @@ static bool amo_check(DisasContext *s, arg_rwdvm* a)
             ((1 << s->sew) >= 4));
 }
 
+static bool amo_check64(DisasContext *s, arg_rwdvm* a)
+{
+    REQUIRE_64BIT(s);
+    return amo_check(s, a);
+}
+
 GEN_VEXT_TRANS(vamoswapw_v, 0, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamoaddw_v, 1, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamoxorw_v, 2, rwdvm, amo_op, amo_check)
@@ -779,17 +759,15 @@ GEN_VEXT_TRANS(vamominw_v, 5, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamomaxw_v, 6, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamominuw_v, 7, rwdvm, amo_op, amo_check)
 GEN_VEXT_TRANS(vamomaxuw_v, 8, rwdvm, amo_op, amo_check)
-#ifdef TARGET_RISCV64
-GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check)
-GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check)
-#endif
+GEN_VEXT_TRANS(vamoswapd_v, 9, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamoaddd_v, 10, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamoxord_v, 11, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamoandd_v, 12, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamoord_v, 13, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamomind_v, 14, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamomaxd_v, 15, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamominud_v, 16, rwdvm, amo_op, amo_check64)
+GEN_VEXT_TRANS(vamomaxud_v, 17, rwdvm, amo_op, amo_check64)
 
 /*
  *** Vector Integer Arithmetic Instructions
@@ -831,7 +809,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
-                           cpu_env, 0, s->vlen / 8, data, fn);
+                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
     }
     gen_set_label(over);
     return true;
@@ -868,13 +846,12 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
     src2 = tcg_temp_new_ptr();
-    src1 = tcg_temp_new();
-    gen_get_gpr(src1, rs1);
+    src1 = get_gpr(s, rs1, EXT_NONE);
 
     data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
@@ -885,8 +862,6 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
     tcg_temp_free_ptr(src2);
-    tcg_temp_free(src1);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -912,15 +887,12 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
 
     if (a->vm && s->vl_eq_vlmax) {
         TCGv_i64 src1 = tcg_temp_new_i64();
-        TCGv tmp = tcg_temp_new();
 
-        gen_get_gpr(tmp, a->rs1);
-        tcg_gen_ext_tl_i64(src1, tmp);
+        tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
         gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
                 src1, MAXSZ(s), MAXSZ(s));
 
         tcg_temp_free_i64(src1);
-        tcg_temp_free(tmp);
         return true;
     }
     return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
@@ -1014,14 +986,14 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
     mask = tcg_temp_new_ptr();
     src2 = tcg_temp_new_ptr();
     if (zx) {
-        src1 = tcg_const_tl(imm);
+        src1 = tcg_constant_tl(imm);
     } else {
-        src1 = tcg_const_tl(sextract64(imm, 0, 5));
+        src1 = tcg_constant_tl(sextract64(imm, 0, 5));
     }
     data = FIELD_DP32(data, VDATA, MLEN, s->mlen);
     data = FIELD_DP32(data, VDATA, VM, vm);
     data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
@@ -1032,8 +1004,6 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
     tcg_temp_free_ptr(src2);
-    tcg_temp_free(src1);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -1080,9 +1050,8 @@ GEN_OPIVI_GVEC_TRANS(vadd_vi, 0, vadd_vx, addi)
 static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs,
                                int64_t c, uint32_t oprsz, uint32_t maxsz)
 {
-    TCGv_i64 tmp = tcg_const_i64(c);
+    TCGv_i64 tmp = tcg_constant_i64(c);
     tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz);
-    tcg_temp_free_i64(tmp);
 }
 
 GEN_OPIVI_GVEC_TRANS(vrsub_vi, 0, vrsub_vx, rsubi)
@@ -1119,7 +1088,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
-                           cpu_env, 0, s->vlen / 8,
+                           cpu_env, s->vlen / 8, s->vlen / 8,
                            data, fn);
         gen_set_label(over);
         return true;
@@ -1207,7 +1176,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1),
                            vreg_ofs(s, a->rs2),
-                           cpu_env, 0, s->vlen / 8, data, fn);
+                           cpu_env, s->vlen / 8, s->vlen / 8, data, fn);
         gen_set_label(over);
         return true;
     }
@@ -1284,8 +1253,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew]);        \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew]);                           \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -1407,16 +1377,13 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
 
     if (a->vm && s->vl_eq_vlmax) {
         TCGv_i32 src1 = tcg_temp_new_i32();
-        TCGv tmp = tcg_temp_new();
 
-        gen_get_gpr(tmp, a->rs1);
-        tcg_gen_trunc_tl_i32(src1, tmp);
+        tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
         tcg_gen_extract_i32(src1, src1, 0, s->sew + 3);
         gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
                 src1, MAXSZ(s), MAXSZ(s));
 
         tcg_temp_free_i32(src1);
-        tcg_temp_free(tmp);
         return true;
     }
     return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s);
@@ -1473,8 +1440,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew]);        \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew]);                           \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -1655,7 +1623,8 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 
             tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
-                               cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+                               cpu_env, s->vlen / 8, s->vlen / 8, data,
+                               fns[s->sew]);
             gen_set_label(over);
         }
         return true;
@@ -1673,14 +1642,13 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
         TCGLabel *over = gen_new_label();
         tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 
-        s1 = tcg_temp_new();
-        gen_get_gpr(s1, a->rs1);
+        s1 = get_gpr(s, a->rs1, EXT_SIGN);
 
         if (s->vl_eq_vlmax) {
             tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
                                 MAXSZ(s), MAXSZ(s), s1);
         } else {
-            TCGv_i32 desc ;
+            TCGv_i32 desc;
             TCGv_i64 s1_i64 = tcg_temp_new_i64();
             TCGv_ptr dest = tcg_temp_new_ptr();
             uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
@@ -1690,16 +1658,14 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
             };
 
             tcg_gen_ext_tl_i64(s1_i64, s1);
-            desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
             tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
             fns[s->sew](dest, s1_i64, cpu_env, desc);
 
             tcg_temp_free_ptr(dest);
-            tcg_temp_free_i32(desc);
             tcg_temp_free_i64(s1_i64);
         }
 
-        tcg_temp_free(s1);
         gen_set_label(over);
         return true;
     }
@@ -1727,15 +1693,13 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
             TCGLabel *over = gen_new_label();
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 
-            s1 = tcg_const_i64(simm);
+            s1 = tcg_constant_i64(simm);
             dest = tcg_temp_new_ptr();
-            desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
             tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
             fns[s->sew](dest, s1, cpu_env, desc);
 
             tcg_temp_free_ptr(dest);
-            tcg_temp_free_i32(desc);
-            tcg_temp_free_i64(s1);
             gen_set_label(over);
         }
         return true;
@@ -1838,8 +1802,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew - 1]);    \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew - 1]);                       \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -1863,7 +1828,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     dest = tcg_temp_new_ptr();
     mask = tcg_temp_new_ptr();
     src2 = tcg_temp_new_ptr();
-    desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+    desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
     tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, vd));
     tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, vs2));
@@ -1874,7 +1839,6 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
     tcg_temp_free_ptr(dest);
     tcg_temp_free_ptr(mask);
     tcg_temp_free_ptr(src2);
-    tcg_temp_free_i32(desc);
     gen_set_label(over);
     return true;
 }
@@ -1950,8 +1914,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)           \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);           \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
                            vreg_ofs(s, a->rs1),                  \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,      \
-                           s->vlen / 8, data, fns[s->sew - 1]);  \
+                           vreg_ofs(s, a->rs2), cpu_env,         \
+                           s->vlen / 8, s->vlen / 8, data,       \
+                           fns[s->sew - 1]);                     \
         gen_set_label(over);                                     \
         return true;                                             \
     }                                                            \
@@ -2024,8 +1989,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)             \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew - 1]);    \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew - 1]);                       \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2138,8 +2104,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew - 1]);    \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew - 1]);                       \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2225,12 +2192,11 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 
             dest = tcg_temp_new_ptr();
-            desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+            desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
             tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
             fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc);
 
             tcg_temp_free_ptr(dest);
-            tcg_temp_free_i32(desc);
             gen_set_label(over);
         }
         return true;
@@ -2278,8 +2244,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew - 1]);    \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew - 1]);                       \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2326,8 +2293,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, VM, a->vm);                 \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fns[s->sew - 1]);    \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data,         \
+                           fns[s->sew - 1]);                       \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2388,8 +2356,8 @@ static bool trans_##NAME(DisasContext *s, arg_r *a)                \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),     \
                            vreg_ofs(s, a->rs1),                    \
-                           vreg_ofs(s, a->rs2), cpu_env, 0,        \
-                           s->vlen / 8, data, fn);                 \
+                           vreg_ofs(s, a->rs2), cpu_env,           \
+                           s->vlen / 8, s->vlen / 8, data, fn);    \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2419,19 +2387,17 @@ static bool trans_vmpopc_m(DisasContext *s, arg_rmr *a)
 
         mask = tcg_temp_new_ptr();
         src2 = tcg_temp_new_ptr();
-        dst = tcg_temp_new();
-        desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+        dst = dest_gpr(s, a->rd);
+        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
         tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
         tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
         gen_helper_vmpopc_m(dst, mask, src2, cpu_env, desc);
-        _gen_set_gpr(s, a->rd, dst, true);
+        gen_set_gpr(s, a->rd, dst);
 
         tcg_temp_free_ptr(mask);
         tcg_temp_free_ptr(src2);
-        tcg_temp_free(dst);
-        tcg_temp_free_i32(desc);
         return true;
     }
     return false;
@@ -2451,19 +2417,17 @@ static bool trans_vmfirst_m(DisasContext *s, arg_rmr *a)
 
         mask = tcg_temp_new_ptr();
         src2 = tcg_temp_new_ptr();
-        dst = tcg_temp_new();
-        desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+        dst = dest_gpr(s, a->rd);
+        desc = tcg_constant_i32(simd_desc(s->vlen / 8, s->vlen / 8, data));
 
         tcg_gen_addi_ptr(src2, cpu_env, vreg_ofs(s, a->rs2));
         tcg_gen_addi_ptr(mask, cpu_env, vreg_ofs(s, 0));
 
         gen_helper_vmfirst_m(dst, mask, src2, cpu_env, desc);
-        _gen_set_gpr(s, a->rd, dst, true);
+        gen_set_gpr(s, a->rd, dst);
 
         tcg_temp_free_ptr(mask);
         tcg_temp_free_ptr(src2);
-        tcg_temp_free(dst);
-        tcg_temp_free_i32(desc);
         return true;
     }
     return false;
@@ -2486,7 +2450,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)              \
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);             \
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd),                     \
                            vreg_ofs(s, 0), vreg_ofs(s, a->rs2),    \
-                           cpu_env, 0, s->vlen / 8, data, fn);     \
+                           cpu_env, s->vlen / 8, s->vlen / 8,      \
+                           data, fn);                              \
         gen_set_label(over);                                       \
         return true;                                               \
     }                                                              \
@@ -2516,8 +2481,8 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
             gen_helper_viota_m_w, gen_helper_viota_m_d,
         };
         tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
-                           vreg_ofs(s, a->rs2), cpu_env, 0,
-                           s->vlen / 8, data, fns[s->sew]);
+                           vreg_ofs(s, a->rs2), cpu_env,
+                           s->vlen / 8, s->vlen / 8, data, fns[s->sew]);
         gen_set_label(over);
         return true;
     }
@@ -2542,7 +2507,8 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
             gen_helper_vid_v_w, gen_helper_vid_v_d,
         };
         tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
-                           cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+                           cpu_env, s->vlen / 8, s->vlen / 8,
+                           data, fns[s->sew]);
         gen_set_label(over);
         return true;
     }
@@ -2626,15 +2592,13 @@ static void vec_element_loadx(DisasContext *s, TCGv_i64 dest,
     tcg_temp_free_i32(ofs);
 
     /* Flush out-of-range indexing to zero.  */
-    t_vlmax = tcg_const_i64(vlmax);
-    t_zero = tcg_const_i64(0);
+    t_vlmax = tcg_constant_i64(vlmax);
+    t_zero = tcg_constant_i64(0);
     tcg_gen_extu_tl_i64(t_idx, idx);
 
     tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
                         t_vlmax, dest, t_zero);
 
-    tcg_temp_free_i64(t_vlmax);
-    tcg_temp_free_i64(t_zero);
     tcg_temp_free_i64(t_idx);
 }
 
@@ -2647,7 +2611,7 @@ static void vec_element_loadi(DisasContext *s, TCGv_i64 dest,
 static bool trans_vext_x_v(DisasContext *s, arg_r *a)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
-    TCGv dest = tcg_temp_new();
+    TCGv dest = dest_gpr(s, a->rd);
 
     if (a->rs1 == 0) {
         /* Special case vmv.x.s rd, vs2. */
@@ -2656,14 +2620,14 @@ static bool trans_vext_x_v(DisasContext *s, arg_r *a)
         /* This instruction ignores LMUL and vector register groups */
         int vlmax = s->vlen >> (3 + s->sew);
         TCGv trs1 = tcg_temp_new();
-        gen_get_gpr(trs1, a->rs1);
+        gen_get_gpr(s, trs1, a->rs1);
         vec_element_loadx(s, tmp, a->rs2, trs1, vlmax);
         tcg_temp_free(trs1);
     }
+
     tcg_gen_trunc_i64_tl(dest, tmp);
-    _gen_set_gpr(s, a->rd, dest, true);
+    gen_set_gpr(s, a->rd, dest);
 
-    tcg_temp_free(dest);
     tcg_temp_free_i64(tmp);
     return true;
 }
@@ -2719,7 +2683,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a)
 
         t1 = tcg_temp_new_i64();
         TCGv trs1 = tcg_temp_new();
-        gen_get_gpr(trs1, a->rs1);
+        gen_get_gpr(s, trs1, a->rs1);
         tcg_gen_extu_tl_i64(t1, trs1);
         vec_element_storei(s, a->rd, 0, t1);
         tcg_temp_free(trs1);
@@ -2835,7 +2799,7 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
             vec_element_loadi(s, dest, a->rs2, 0);
         } else {
             TCGv trs1 = tcg_temp_new();
-            gen_get_gpr(trs1, a->rs1);
+            gen_get_gpr(s, trs1, a->rs1);
             vec_element_loadx(s, dest, a->rs2, trs1, vlmax);
             tcg_temp_free(trs1);
         }
@@ -2904,7 +2868,8 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
         data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
         tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
                            vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
-                           cpu_env, 0, s->vlen / 8, data, fns[s->sew]);
+                           cpu_env, s->vlen / 8, s->vlen / 8, data,
+                           fns[s->sew]);
         gen_set_label(over);
         return true;
     }
diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index 796c6d77fa0..b141378ae3a 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -76,30 +76,6 @@ static bool hyper_needed(void *opaque)
     return riscv_has_ext(env, RVH);
 }
 
-static bool vector_needed(void *opaque)
-{
-    RISCVCPU *cpu = opaque;
-    CPURISCVState *env = &cpu->env;
-
-    return riscv_has_ext(env, RVV);
-}
-
-static const VMStateDescription vmstate_vector = {
-    .name = "cpu/vector",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = vector_needed,
-    .fields = (VMStateField[]) {
-            VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
-            VMSTATE_UINTTL(env.vxrm, RISCVCPU),
-            VMSTATE_UINTTL(env.vxsat, RISCVCPU),
-            VMSTATE_UINTTL(env.vl, RISCVCPU),
-            VMSTATE_UINTTL(env.vstart, RISCVCPU),
-            VMSTATE_UINTTL(env.vtype, RISCVCPU),
-            VMSTATE_END_OF_LIST()
-        }
-};
-
 static const VMStateDescription vmstate_hyper = {
     .name = "cpu/hyper",
     .version_id = 1,
@@ -116,9 +92,9 @@ static const VMStateDescription vmstate_hyper = {
         VMSTATE_UINT64(env.htimedelta, RISCVCPU),
 
         VMSTATE_UINT64(env.vsstatus, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.vstvec, env.VSTCC, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.vstvec, env.vstcc, RISCVCPU),
         VMSTATE_UINTTL(env.vsscratch, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.vsepc, env.VSEPCC, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.vsepc, env.vsepcc, RISCVCPU),
         VMSTATE_UINTTL(env.vscause, RISCVCPU),
         VMSTATE_UINTTL(env.vstval, RISCVCPU),
         VMSTATE_UINTTL(env.vsatp, RISCVCPU),
@@ -126,9 +102,9 @@ static const VMStateDescription vmstate_hyper = {
         VMSTATE_UINTTL(env.mtval2, RISCVCPU),
         VMSTATE_UINTTL(env.mtinst, RISCVCPU),
 
-        VMSTATE_UINTTL_OR_CAP(env.stvec_hs, env.STCC_HS, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.stvec_hs, env.stcc_hs, RISCVCPU),
         VMSTATE_UINTTL(env.sscratch_hs, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.sepc_hs, env.SEPCC_HS, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.sepc_hs, env.sepcc_hs, RISCVCPU),
         VMSTATE_UINTTL(env.scause_hs, RISCVCPU),
         VMSTATE_UINTTL(env.stval_hs, RISCVCPU),
         VMSTATE_UINTTL(env.satp_hs, RISCVCPU),
@@ -141,16 +117,67 @@ static const VMStateDescription vmstate_hyper = {
 #ifdef TARGET_CHERI
 #pragma message("TODO: VMSTATE_CAP_ARRAY")
 #endif
-const VMStateDescription vmstate_riscv_cpu = {
-    .name = "cpu",
+
+static bool vector_needed(void *opaque)
+{
+    RISCVCPU *cpu = opaque;
+    CPURISCVState *env = &cpu->env;
+
+    return riscv_has_ext(env, RVV);
+}
+
+static const VMStateDescription vmstate_vector = {
+    .name = "cpu/vector",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vector_needed,
+    .fields = (VMStateField[]) {
+            VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64),
+            VMSTATE_UINTTL(env.vxrm, RISCVCPU),
+            VMSTATE_UINTTL(env.vxsat, RISCVCPU),
+            VMSTATE_UINTTL(env.vl, RISCVCPU),
+            VMSTATE_UINTTL(env.vstart, RISCVCPU),
+            VMSTATE_UINTTL(env.vtype, RISCVCPU),
+            VMSTATE_END_OF_LIST()
+        }
+};
+
+static bool pointermasking_needed(void *opaque)
+{
+    RISCVCPU *cpu = opaque;
+    CPURISCVState *env = &cpu->env;
+
+    return riscv_has_ext(env, RVJ);
+}
+
+static const VMStateDescription vmstate_pointermasking = {
+    .name = "cpu/pointer_masking",
     .version_id = 1,
     .minimum_version_id = 1,
+    .needed = pointermasking_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL(env.mmte, RISCVCPU),
+        VMSTATE_UINTTL(env.mpmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.mpmbase, RISCVCPU),
+        VMSTATE_UINTTL(env.spmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.spmbase, RISCVCPU),
+        VMSTATE_UINTTL(env.upmmask, RISCVCPU),
+        VMSTATE_UINTTL(env.upmbase, RISCVCPU),
+
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+const VMStateDescription vmstate_riscv_cpu = {
+    .name = "cpu",
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
 #ifndef TARGET_CHERI
         VMSTATE_UINTTL_ARRAY(env.gpr, RISCVCPU, 32),
 #endif
         VMSTATE_UINT64_ARRAY(env.fpr, RISCVCPU, 32),
-        VMSTATE_UINTTL_OR_CAP(env.pc, env.PCC, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.pc, env.pcc, RISCVCPU),
         VMSTATE_UINTTL(env.load_res, RISCVCPU),
         VMSTATE_UINTTL(env.load_val, RISCVCPU),
         VMSTATE_UINTTL(env.frm, RISCVCPU),
@@ -158,8 +185,10 @@ const VMStateDescription vmstate_riscv_cpu = {
         VMSTATE_UINTTL(env.guest_phys_fault_addr, RISCVCPU),
         VMSTATE_UINTTL(env.priv_ver, RISCVCPU),
         VMSTATE_UINTTL(env.vext_ver, RISCVCPU),
-        VMSTATE_UINTTL(env.misa, RISCVCPU),
-        VMSTATE_UINTTL(env.misa_mask, RISCVCPU),
+        VMSTATE_UINT32(env.misa_mxl, RISCVCPU),
+        VMSTATE_UINT32(env.misa_ext, RISCVCPU),
+        VMSTATE_UINT32(env.misa_mxl_max, RISCVCPU),
+        VMSTATE_UINT32(env.misa_ext_mask, RISCVCPU),
         VMSTATE_UINT32(env.features, RISCVCPU),
         VMSTATE_UINTTL(env.priv, RISCVCPU),
         VMSTATE_UINTTL(env.virt, RISCVCPU),
@@ -170,16 +199,14 @@ const VMStateDescription vmstate_riscv_cpu = {
         VMSTATE_UINT32(env.miclaim, RISCVCPU),
         VMSTATE_UINTTL(env.mie, RISCVCPU),
         VMSTATE_UINTTL(env.mideleg, RISCVCPU),
-        VMSTATE_UINTTL(env.sptbr, RISCVCPU),
         VMSTATE_UINTTL(env.satp, RISCVCPU),
-        VMSTATE_UINTTL(env.sbadaddr, RISCVCPU),
-        VMSTATE_UINTTL(env.mbadaddr, RISCVCPU),
+        VMSTATE_UINTTL(env.stval, RISCVCPU),
         VMSTATE_UINTTL(env.medeleg, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.stvec, env.STCC, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.sepc, env.SEPCC, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.stvec, env.stcc, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.sepc, env.sepcc, RISCVCPU),
         VMSTATE_UINTTL(env.scause, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.mtvec, env.MTCC, RISCVCPU),
-        VMSTATE_UINTTL_OR_CAP(env.mepc, env.MEPCC, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.mtvec, env.mtcc, RISCVCPU),
+        VMSTATE_UINTTL_OR_CAP(env.mepc, env.mepcc, RISCVCPU),
         VMSTATE_UINTTL(env.mcause, RISCVCPU),
         VMSTATE_UINTTL(env.mtval, RISCVCPU),
         VMSTATE_UINTTL(env.scounteren, RISCVCPU),
@@ -196,6 +223,7 @@ const VMStateDescription vmstate_riscv_cpu = {
         &vmstate_pmp,
         &vmstate_hyper,
         &vmstate_vector,
+        &vmstate_pointermasking,
         NULL
     }
 };
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 454039af9ff..b87a996c27f 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -1,34 +1,29 @@
 # FIXME extra_args should accept files()
 dir = meson.current_source_dir()
-gen32 = [
-  decodetree.process('insn16.decode', extra_args: [dir / 'insn16-32.decode', '--static-decode=decode_insn16', '--insnwidth=16']),
-  decodetree.process('insn32.decode', extra_args: '--static-decode=decode_insn32'),
-]
 
-gen64 = [
-  decodetree.process('insn16.decode', extra_args: [dir / 'insn16-64.decode', '--static-decode=decode_insn16', '--insnwidth=16']),
-  decodetree.process('insn32.decode', extra_args: [dir / 'insn32-64.decode', '--static-decode=decode_insn32']),
+gen = [
+  decodetree.process('insn16.decode', extra_args: ['--static-decode=decode_insn16', '--insnwidth=16']),
+  decodetree.process('insn32.decode', extra_args: '--static-decode=decode_insn32'),
 ]
 
 # FIXME: would be nice to avoid this duplication but I don't see an easy way to
 # do this since the TARGET_CHERI check is evaulated later.
 gen32_cheri = [
-  decodetree.process('insn16.decode', extra_args: [dir / 'insn16-32.decode', '--static-decode=decode_insn16', '--insnwidth=16']),
+  decodetree.process('insn16.decode', extra_args: ['--static-decode=decode_insn16', '--insnwidth=16']),
   decodetree.process('insn32.decode', extra_args: [dir / 'insn32-cheri.decode', dir / 'insn32-cheri-32.decode', '--static-decode=decode_insn32']),
 ]
 gen64_cheri = [
-  decodetree.process('insn16.decode', extra_args: [dir / 'insn16-64.decode', '--static-decode=decode_insn16', '--insnwidth=16']),
-  decodetree.process('insn32.decode', extra_args: [dir / 'insn32-64.decode', dir / 'insn32-cheri.decode', dir / 'insn32-cheri-64.decode', '--static-decode=decode_insn32']),
+  decodetree.process('insn16.decode', extra_args: ['--static-decode=decode_insn16', '--insnwidth=16']),
+  decodetree.process('insn32.decode', extra_args: [dir / 'insn32-cheri.decode', dir / 'insn32-cheri-64.decode', '--static-decode=decode_insn32']),
 ]
 
-riscv32_ss = ss.source_set()
-riscv64_ss = ss.source_set()
-riscv32_ss.add(when: 'TARGET_CHERI', if_true: gen32_cheri, if_false: gen32)
-riscv64_ss.add(when: 'TARGET_CHERI', if_true: gen64_cheri, if_false: gen64)
+cheri_ss = ss.source_set()
+cheri_ss.add(when: ['TARGET_CHERI', 'TARGET_RISCV32'], if_true: gen32_cheri)
+cheri_ss.add(when: ['TARGET_CHERI', 'TARGET_RISCV64'], if_true: gen64_cheri)
 
 riscv_ss = ss.source_set()
-riscv_ss.add_all(when: 'TARGET_RISCV32', if_true: riscv32_ss)
-riscv_ss.add_all(when: 'TARGET_RISCV64', if_true: riscv64_ss)
+riscv_ss.add(when: 'TARGET_CHERI', if_false: gen)
+riscv_ss.add_all(when: 'TARGET_CHERI', if_true: cheri_ss)
 
 riscv_ss.add(files(
   'cpu.c',
@@ -38,6 +33,7 @@ riscv_ss.add(files(
   'gdbstub.c',
   'op_helper.c',
   'vector_helper.c',
+  'bitmanip_helper.c',
   'translate.c',
 ))
 riscv_ss.add(when: 'TARGET_CHERI', if_true: files('op_helper_cheri.c'))
diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c
index d2195764ff3..530f8990d1b 100644
--- a/target/riscv/monitor.c
+++ b/target/riscv/monitor.c
@@ -167,9 +167,14 @@ static void mem_info_svxx(Monitor *mon, CPUArchState *env)
     target_ulong last_size;
     target_ulong last_attr;
 
-    base = (hwaddr)get_field(env->satp, SATP_PPN) << PGSHIFT;
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
+        base = (hwaddr)get_field(env->satp, SATP32_PPN) << PGSHIFT;
+        vm = get_field(env->satp, SATP32_MODE);
+    } else {
+        base = (hwaddr)get_field(env->satp, SATP64_PPN) << PGSHIFT;
+        vm = get_field(env->satp, SATP64_MODE);
+    }
 
-    vm = get_field(env->satp, SATP_MODE);
     switch (vm) {
     case VM_1_10_SV32:
         levels = 2;
@@ -232,9 +237,16 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    if (!(env->satp & SATP_MODE)) {
-        monitor_printf(mon, "No translation or protection\n");
-        return;
+    if (riscv_cpu_mxl(env) == MXL_RV32) {
+        if (!(env->satp & SATP32_MODE)) {
+            monitor_printf(mon, "No translation or protection\n");
+            return;
+        }
+    } else {
+        if (!(env->satp & SATP64_MODE)) {
+            monitor_printf(mon, "No translation or protection\n");
+            return;
+        }
     }
 
     mem_info_svxx(mon, env);
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index 3bee75e5156..3df23d9fe87 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -18,7 +18,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "cpu.h"
 #include "qemu/main-loop.h"
 #include "exec/exec-all.h"
@@ -66,38 +65,34 @@ void helper_raise_exception(CPURISCVState *env, uint32_t exception)
     riscv_raise_exception(env, exception, 0);
 }
 
-target_ulong helper_csrrw(CPURISCVState *env, target_ulong src,
-        target_ulong csr)
+target_ulong helper_csrr(CPURISCVState *env, int csr)
 {
     target_ulong val = 0;
-    int ret = riscv_csrrw(env, csr, &val, src, -1, GETPC());
+    RISCVException ret = riscv_csrrw(env, csr, &val, 0, 0, GETPC());
 
-    if (ret < 0) {
-        riscv_raise_exception(env, -ret, GETPC());
+    if (ret != RISCV_EXCP_NONE) {
+        riscv_raise_exception(env, ret, GETPC());
     }
     return val;
 }
 
-target_ulong helper_csrrs(CPURISCVState *env, target_ulong src,
-        target_ulong csr, target_ulong rs1_pass)
+void helper_csrw(CPURISCVState *env, int csr, target_ulong src)
 {
-    target_ulong val = 0;
-    int ret = riscv_csrrw(env, csr, &val, -1, rs1_pass ? src : 0, GETPC());
+    RISCVException ret = riscv_csrrw(env, csr, NULL, src, -1, GETPC());
 
-    if (ret < 0) {
-        riscv_raise_exception(env, -ret, GETPC());
+    if (ret != RISCV_EXCP_NONE) {
+        riscv_raise_exception(env, ret, GETPC());
     }
-    return val;
 }
 
-target_ulong helper_csrrc(CPURISCVState *env, target_ulong src,
-        target_ulong csr, target_ulong rs1_pass)
+target_ulong helper_csrrw(CPURISCVState *env, int csr,
+                          target_ulong src, target_ulong write_mask)
 {
     target_ulong val = 0;
-    int ret = riscv_csrrw(env, csr, &val, 0, rs1_pass ? src : 0, GETPC());
+    RISCVException ret = riscv_csrrw(env, csr, &val, src, write_mask, GETPC());
 
-    if (ret < 0) {
-        riscv_raise_exception(env, -ret, GETPC());
+    if (ret != RISCV_EXCP_NONE) {
+        riscv_raise_exception(env, ret, GETPC());
     }
     return val;
 }
@@ -119,7 +114,7 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb)
     }
 #endif
 
-    target_ulong retpc = GET_SPECIAL_REG_ADDR(env, sepc, SEPCC);
+    target_ulong retpc = GET_SPECIAL_REG_ADDR(env, sepc, sepcc);
     // We have to clear the low bit of the address since that is defined as zero
     // in the privileged spec. The cheri_update_pcc_for_exc_return() check below
     // will de-tag pcc if this would result changing the address for sealed caps.
@@ -174,9 +169,9 @@ target_ulong helper_sret(CPURISCVState *env, target_ulong cpu_pc_deb)
     riscv_cpu_set_mode(env, prev_priv);
 
 #ifdef TARGET_CHERI
-    cheri_update_pcc_for_exc_return(&env->PCC, &env->SEPCC, retpc);
+    cheri_update_pcc_for_exc_return(&env->pcc, &env->sepcc, retpc);
     /* TODO(am2419): do we log PCC as a changed register? */
-    qemu_log_instr_dbg_cap(env, "PCC", &env->PCC);
+    qemu_log_instr_dbg_cap(env, "PCC", &env->pcc);
 #endif
     return retpc;
 }
@@ -193,7 +188,7 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb)
     }
 #endif
 
-    target_ulong retpc = GET_SPECIAL_REG_ADDR(env, mepc, MEPCC);
+    target_ulong retpc = GET_SPECIAL_REG_ADDR(env, mepc, mepcc);
     // We have to clear the low bit of the address since that is defined as zero
     // in the privileged spec. The cheri_update_pcc_for_exc_return() check below
     // will de-tag pcc if this would result changing the address for sealed caps.
@@ -235,9 +230,9 @@ target_ulong helper_mret(CPURISCVState *env, target_ulong cpu_pc_deb)
 #endif
 
 #ifdef TARGET_CHERI
-    cheri_update_pcc_for_exc_return(&env->PCC, &env->MEPCC, retpc);
+    cheri_update_pcc_for_exc_return(&env->pcc, &env->mepcc, retpc);
     /* TODO(am2419): do we log PCC as a changed register? */
-    qemu_log_instr_dbg_cap(env, "PCC", &env->PCC);
+    qemu_log_instr_dbg_cap(env, "PCC", &env->pcc);
 #endif
     return retpc;
 }
@@ -254,10 +249,15 @@ void HELPER(check_alignment)(CPURISCVState *env, target_ulong addr, MemOp op,
 void helper_wfi(CPURISCVState *env)
 {
     CPUState *cs = env_cpu(env);
+    bool rvs = riscv_has_ext(env, RVS);
+    bool prv_u = env->priv == PRV_U;
+    bool prv_s = env->priv == PRV_S;
 
-    if ((env->priv == PRV_S &&
-        get_field(env->mstatus, MSTATUS_TW)) ||
-        riscv_cpu_virt_enabled(env)) {
+    if (((prv_s || (!rvs && prv_u)) && get_field(env->mstatus, MSTATUS_TW)) ||
+        (rvs && prv_u && !riscv_cpu_virt_enabled(env))) {
+        riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+    } else if (riscv_cpu_virt_enabled(env) && (prv_u ||
+        (prv_s && get_field(env->hstatus, HSTATUS_VTW)))) {
         riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC());
     } else {
         cs->halted = 1;
diff --git a/target/riscv/op_helper_cheri.c b/target/riscv/op_helper_cheri.c
index a1ed682746c..c00816793b5 100644
--- a/target/riscv/op_helper_cheri.c
+++ b/target/riscv/op_helper_cheri.c
@@ -113,40 +113,12 @@ struct SCRInfo {
     [CheriSCR_BSEPCC] = {.r = true, .w = true, .access = H_ASR, .name= "BSTCC"},
 };
 
-static inline cap_register_t *get_scr(CPUArchState *env, uint32_t index)
-{
-    switch (index) {
-    case CheriSCR_PCC: return &env->PCC;
-    case CheriSCR_DDC: return &env->DDC;
-
-    case CheriSCR_UTCC: return &env->UTCC;
-    case CheriSCR_UTDC: return &env->UTDC;
-    case CheriSCR_UScratchC: return &env->UScratchC;
-    case CheriSCR_UEPCC: return &env->UEPCC;
-
-    case CheriSCR_STCC: return &env->STCC;
-    case CheriSCR_STDC: return &env->STDC;
-    case CheriSCR_SScratchC: return &env->SScratchC;
-    case CheriSCR_SEPCC: return &env->SEPCC;
-
-    case CheriSCR_MTCC: return &env->MTCC;
-    case CheriSCR_MTDC: return &env->MTDC;
-    case CheriSCR_MScratchC: return &env->MScratchC;
-    case CheriSCR_MEPCC: return &env->MEPCC;
-
-    case CheriSCR_BSTCC: return &env->VSTCC;
-    case CheriSCR_BSTDC: return &env->VSTDC;
-    case CheriSCR_BSScratchC: return &env->VSScratchC;
-    case CheriSCR_BSEPCC: return &env->VSEPCC;
-    default: assert(false && "Should have raised an invalid inst trap!");
-    }
-}
-
 #ifdef CONFIG_TCG_LOG_INSTR
 void riscv_log_instr_scr_changed(CPURISCVState *env, int scrno)
 {
     if (qemu_log_instr_enabled(env)) {
-        qemu_log_instr_cap(env, scr_info[scrno].name, get_scr(env, scrno));
+        qemu_log_instr_cap(env, scr_info[scrno].name,
+                           riscv_get_scr(env, scrno));
     }
 }
 #endif
@@ -155,7 +127,7 @@ void HELPER(cspecialrw)(CPUArchState *env, uint32_t cd, uint32_t cs,
                         uint32_t index)
 {
     uintptr_t _host_return_address = GETPC();
-    // Ensure that env->PCC.cursor is correct:
+    // Ensure that env->pcc.cursor is correct:
     cpu_restore_state(env_cpu(env), _host_return_address, false);
 
     assert(index <= 31 && "Bug in translator?");
@@ -171,7 +143,7 @@ void HELPER(cspecialrw)(CPUArchState *env, uint32_t cd, uint32_t cs,
     if (scr_min_priv(mode) > env->priv) {
         raise_cheri_exception(env, CapEx_AccessSystemRegsViolation, 32 + index);
     }
-    cap_register_t *scr = get_scr(env, index);
+    cap_register_t *scr = riscv_get_scr(env, index);
     // Make a copy of the write value in case cd == cs
     cap_register_t new_val = *get_readonly_capreg(env, cs);
     if (cd != 0) {
diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c
index cff020122a3..54abf425835 100644
--- a/target/riscv/pmp.c
+++ b/target/riscv/pmp.c
@@ -19,10 +19,6 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/*
- * PMP (Physical Memory Protection) is as-of-yet unused and needs testing.
- */
-
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qapi/error.h"
@@ -59,16 +55,6 @@ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index)
         return 0;
     }
 
-    /* In TOR mode, need to check the lock bit of the next pmp
-     * (if there is a next)
-     */
-    const uint8_t a_field =
-        pmp_get_a_field(env->pmp_state.pmp[pmp_index + 1].cfg_reg);
-    if ((env->pmp_state.pmp[pmp_index + 1u].cfg_reg & PMP_LOCK) &&
-         (PMP_AMATCH_TOR == a_field)) {
-        return 1;
-    }
-
     return 0;
 }
 
@@ -100,11 +86,42 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index)
 static void pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val)
 {
     if (pmp_index < MAX_RISCV_PMPS) {
-        if (!pmp_is_locked(env, pmp_index)) {
-            env->pmp_state.pmp[pmp_index].cfg_reg = val;
-            pmp_update_rule(env, pmp_index);
+        bool locked = true;
+
+        if (riscv_feature(env, RISCV_FEATURE_EPMP)) {
+            /* mseccfg.RLB is set */
+            if (MSECCFG_RLB_ISSET(env)) {
+                locked = false;
+            }
+
+            /* mseccfg.MML is not set */
+            if (!MSECCFG_MML_ISSET(env) && !pmp_is_locked(env, pmp_index)) {
+                locked = false;
+            }
+
+            /* mseccfg.MML is set */
+            if (MSECCFG_MML_ISSET(env)) {
+                /* not adding execute bit */
+                if ((val & PMP_LOCK) != 0 && (val & PMP_EXEC) != PMP_EXEC) {
+                    locked = false;
+                }
+                /* shared region and not adding X bit */
+                if ((val & PMP_LOCK) != PMP_LOCK &&
+                    (val & 0x7) != (PMP_WRITE | PMP_EXEC)) {
+                    locked = false;
+                }
+            }
         } else {
+            if (!pmp_is_locked(env, pmp_index)) {
+                locked = false;
+            }
+        }
+
+        if (locked) {
             qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - locked\n");
+        } else {
+            env->pmp_state.pmp[pmp_index].cfg_reg = val;
+            pmp_update_rule(env, pmp_index);
         }
     } else {
         qemu_log_mask(LOG_GUEST_ERROR,
@@ -227,6 +244,32 @@ static bool pmp_hart_has_privs_default(CPURISCVState *env, target_ulong addr,
 {
     bool ret;
 
+    if (riscv_feature(env, RISCV_FEATURE_EPMP)) {
+        if (MSECCFG_MMWP_ISSET(env)) {
+            /*
+             * The Machine Mode Whitelist Policy (mseccfg.MMWP) is set
+             * so we default to deny all, even for M-mode.
+             */
+            *allowed_privs = 0;
+            return false;
+        } else if (MSECCFG_MML_ISSET(env)) {
+            /*
+             * The Machine Mode Lockdown (mseccfg.MML) bit is set
+             * so we can only execute code in M-mode with an applicable
+             * rule. Other modes are disabled.
+             */
+            if (mode == PRV_M && !(privs & PMP_EXEC)) {
+                ret = true;
+                *allowed_privs = PMP_READ | PMP_WRITE;
+            } else {
+                ret = false;
+                *allowed_privs = 0;
+            }
+
+            return ret;
+        }
+    }
+
     if ((!riscv_feature(env, RISCV_FEATURE_PMP)) || (mode == PRV_M)) {
         /*
          * Privileged spec v1.10 states if HW doesn't implement any PMP entry
@@ -304,13 +347,98 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
             pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg);
 
         /*
-         * If the PMP entry is not off and the address is in range, do the priv
-         * check
+         * Convert the PMP permissions to match the truth table in the
+         * ePMP spec.
          */
+        const uint8_t epmp_operation =
+            ((env->pmp_state.pmp[i].cfg_reg & PMP_LOCK) >> 4) |
+            ((env->pmp_state.pmp[i].cfg_reg & PMP_READ) << 2) |
+            (env->pmp_state.pmp[i].cfg_reg & PMP_WRITE) |
+            ((env->pmp_state.pmp[i].cfg_reg & PMP_EXEC) >> 2);
+
         if (((s + e) == 2) && (PMP_AMATCH_OFF != a_field)) {
-            *allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC;
-            if ((mode != PRV_M) || pmp_is_locked(env, i)) {
-                *allowed_privs &= env->pmp_state.pmp[i].cfg_reg;
+            /*
+             * If the PMP entry is not off and the address is in range,
+             * do the priv check
+             */
+            if (!MSECCFG_MML_ISSET(env)) {
+                /*
+                 * If mseccfg.MML Bit is not set, do pmp priv check
+                 * This will always apply to regular PMP.
+                 */
+                *allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC;
+                if ((mode != PRV_M) || pmp_is_locked(env, i)) {
+                    *allowed_privs &= env->pmp_state.pmp[i].cfg_reg;
+                }
+            } else {
+                /*
+                 * If mseccfg.MML Bit set, do the enhanced pmp priv check
+                 */
+                if (mode == PRV_M) {
+                    switch (epmp_operation) {
+                    case 0:
+                    case 1:
+                    case 4:
+                    case 5:
+                    case 6:
+                    case 7:
+                    case 8:
+                        *allowed_privs = 0;
+                        break;
+                    case 2:
+                    case 3:
+                    case 14:
+                        *allowed_privs = PMP_READ | PMP_WRITE;
+                        break;
+                    case 9:
+                    case 10:
+                        *allowed_privs = PMP_EXEC;
+                        break;
+                    case 11:
+                    case 13:
+                        *allowed_privs = PMP_READ | PMP_EXEC;
+                        break;
+                    case 12:
+                    case 15:
+                        *allowed_privs = PMP_READ;
+                        break;
+                    default:
+                        g_assert_not_reached();
+                    }
+                } else {
+                    switch (epmp_operation) {
+                    case 0:
+                    case 8:
+                    case 9:
+                    case 12:
+                    case 13:
+                    case 14:
+                        *allowed_privs = 0;
+                        break;
+                    case 1:
+                    case 10:
+                    case 11:
+                        *allowed_privs = PMP_EXEC;
+                        break;
+                    case 2:
+                    case 4:
+                    case 15:
+                        *allowed_privs = PMP_READ;
+                        break;
+                    case 3:
+                    case 6:
+                        *allowed_privs = PMP_READ | PMP_WRITE;
+                        break;
+                    case 5:
+                        *allowed_privs = PMP_READ | PMP_EXEC;
+                        break;
+                    case 7:
+                        *allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC;
+                        break;
+                    default:
+                        g_assert_not_reached();
+                    }
+                }
             }
 
             ret = ((privs & *allowed_privs) == privs);
@@ -328,7 +456,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, target_ulong addr,
 }
 
 /*
- * Handle a write to a pmpcfg CSP
+ * Handle a write to a pmpcfg CSR
  */
 void pmpcfg_csr_write(CPURISCVState *env, uint32_t reg_index,
     target_ulong val)
@@ -355,7 +483,7 @@ void pmpcfg_csr_write(CPURISCVState *env, uint32_t reg_index,
 
 
 /*
- * Handle a read from a pmpcfg CSP
+ * Handle a read from a pmpcfg CSR
  */
 target_ulong pmpcfg_csr_read(CPURISCVState *env, uint32_t reg_index)
 {
@@ -374,13 +502,29 @@ target_ulong pmpcfg_csr_read(CPURISCVState *env, uint32_t reg_index)
 
 
 /*
- * Handle a write to a pmpaddr CSP
+ * Handle a write to a pmpaddr CSR
  */
 void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index,
     target_ulong val)
 {
     trace_pmpaddr_csr_write(env->mhartid, addr_index, val);
+
     if (addr_index < MAX_RISCV_PMPS) {
+        /*
+         * In TOR mode, need to check the lock bit of the next pmp
+         * (if there is a next).
+         */
+        if (addr_index + 1 < MAX_RISCV_PMPS) {
+            uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg;
+
+            if (pmp_cfg & PMP_LOCK &&
+                PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg)) {
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "ignoring pmpaddr write - pmpcfg + 1 locked\n");
+                return;
+            }
+        }
+
         if (!pmp_is_locked(env, addr_index)) {
             env->pmp_state.pmp[addr_index].addr_reg = val;
             pmp_update_rule(env, addr_index);
@@ -396,7 +540,7 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index,
 
 
 /*
- * Handle a read from a pmpaddr CSP
+ * Handle a read from a pmpaddr CSR
  */
 target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index)
 {
@@ -413,9 +557,43 @@ target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index)
     return val;
 }
 
+/*
+ * Handle a write to a mseccfg CSR
+ */
+void mseccfg_csr_write(CPURISCVState *env, target_ulong val)
+{
+    int i;
+
+    trace_mseccfg_csr_write(env->mhartid, val);
+
+    /* RLB cannot be enabled if it's already 0 and if any regions are locked */
+    if (!MSECCFG_RLB_ISSET(env)) {
+        for (i = 0; i < MAX_RISCV_PMPS; i++) {
+            if (pmp_is_locked(env, i)) {
+                val &= ~MSECCFG_RLB;
+                break;
+            }
+        }
+    }
+
+    /* Sticky bits */
+    val |= (env->mseccfg & (MSECCFG_MMWP | MSECCFG_MML));
+
+    env->mseccfg = val;
+}
+
+/*
+ * Handle a read from a mseccfg CSR
+ */
+target_ulong mseccfg_csr_read(CPURISCVState *env)
+{
+    trace_mseccfg_csr_read(env->mhartid, env->mseccfg);
+    return env->mseccfg;
+}
+
 /*
  * Calculate the TLB size if the start address or the end address of
- * PMP entry is presented in thie TLB page.
+ * PMP entry is presented in the TLB page.
  */
 static target_ulong pmp_get_tlb_size(CPURISCVState *env, int pmp_index,
                                      target_ulong tlb_sa, target_ulong tlb_ea)
diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h
index d2dd57a20a4..a5150199a0c 100644
--- a/target/riscv/pmp.h
+++ b/target/riscv/pmp.h
@@ -48,6 +48,12 @@ typedef enum {
     PMP_AMATCH_NAPOT /* Naturally aligned power-of-two region */
 } pmp_am_t;
 
+typedef enum {
+    MSECCFG_MML  = 1 << 0,
+    MSECCFG_MMWP = 1 << 1,
+    MSECCFG_RLB  = 1 << 2
+} mseccfg_field_t;
+
 typedef struct {
     target_ulong addr_reg;
     uint8_t  cfg_reg;
@@ -67,6 +73,10 @@ typedef struct {
 void pmpcfg_csr_write(CPURISCVState *env, uint32_t reg_index,
     target_ulong val);
 target_ulong pmpcfg_csr_read(CPURISCVState *env, uint32_t reg_index);
+
+void mseccfg_csr_write(CPURISCVState *env, target_ulong val);
+target_ulong mseccfg_csr_read(CPURISCVState *env);
+
 void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index,
     target_ulong val);
 target_ulong pmpaddr_csr_read(CPURISCVState *env, uint32_t addr_index);
@@ -80,4 +90,8 @@ void pmp_update_rule_nums(CPURISCVState *env);
 uint32_t pmp_get_num_rules(CPURISCVState *env);
 int pmp_priv_to_page_prot(pmp_priv_t pmp_priv);
 
+#define MSECCFG_MML_ISSET(env) get_field(env->mseccfg, MSECCFG_MML)
+#define MSECCFG_MMWP_ISSET(env) get_field(env->mseccfg, MSECCFG_MMWP)
+#define MSECCFG_RLB_ISSET(env) get_field(env->mseccfg, MSECCFG_RLB)
+
 #endif
diff --git a/target/riscv/trace-events b/target/riscv/trace-events
index b7e371ee97a..49ec4d3b7d7 100644
--- a/target/riscv/trace-events
+++ b/target/riscv/trace-events
@@ -6,3 +6,6 @@ pmpcfg_csr_read(uint64_t mhartid, uint32_t reg_index, uint64_t val) "hart %" PRI
 pmpcfg_csr_write(uint64_t mhartid, uint32_t reg_index, uint64_t val) "hart %" PRIu64 ": write reg%" PRIu32", val: 0x%" PRIx64
 pmpaddr_csr_read(uint64_t mhartid, uint32_t addr_index, uint64_t val) "hart %" PRIu64 ": read addr%" PRIu32", val: 0x%" PRIx64
 pmpaddr_csr_write(uint64_t mhartid, uint32_t addr_index, uint64_t val) "hart %" PRIu64 ": write addr%" PRIu32", val: 0x%" PRIx64
+
+mseccfg_csr_read(uint64_t mhartid, uint64_t val) "hart %" PRIu64 ": read mseccfg, val: 0x%" PRIx64
+mseccfg_csr_write(uint64_t mhartid, uint64_t val) "hart %" PRIu64 ": write mseccfg, val: 0x%" PRIx64
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 33228937815..8777c588b92 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -32,7 +32,6 @@
 
 #include "instmap.h"
 
-
 /* global register indices */
 #ifdef TARGET_CHERI
 #include "cheri-lazy-capregs.h"
@@ -48,18 +47,33 @@ static TCGv cpu_vl;
 static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
 static TCGv_cap_checked_ptr load_res;
 static TCGv load_val;
+/* globals for PM CSRs */
+static TCGv pm_mask[4];
+static TCGv pm_base[4];
 
 #include "exec/gen-icount.h"
 
+/*
+ * If an operation is being performed on less than TARGET_LONG_BITS,
+ * it may require the inputs to be sign- or zero-extended; which will
+ * depend on the exact operation being performed.
+ */
+typedef enum {
+    EXT_NONE,
+    EXT_SIGN,
+    EXT_ZERO,
+} DisasExtend;
+
 typedef struct DisasContext {
     DisasContextBase base;
     /* pc_succ_insn points to the instruction following base.pc_next */
     target_ulong pc_succ_insn;
     target_ulong priv_ver;
-    bool virt_enabled;
+    RISCVMXL xl;
+    uint32_t misa_ext;
     uint32_t opcode;
     uint32_t mstatus_fs;
-    uint32_t misa;
+    uint32_t mstatus_hs_fs;
     uint32_t mem_idx;
     /* Remember the rounding mode encoded in the previous fp instruction,
        which we have already installed into env->fp_status.  Or -1 for
@@ -67,6 +81,8 @@ typedef struct DisasContext {
        to any system register, which includes CSR_FRM, so we do not have
        to reset this known value.  */
     int frm;
+    RISCVMXL ol;
+    bool virt_enabled;
     bool ext_ifencei;
     bool hlsx;
 #ifdef TARGET_CHERI
@@ -80,22 +96,23 @@ typedef struct DisasContext {
     uint16_t vlen;
     uint16_t mlen;
     bool vl_eq_vlmax;
+    uint8_t ntemp;
     CPUState *cs;
+    TCGv zero;
+    /* Space for 3 operands plus 1 extra for address computation. */
+    TCGv temp[4];
+    /* PointerMasking extension */
+    bool pm_enabled;
+    TCGv pm_mask;
+    TCGv pm_base;
 } DisasContext;
 
-#ifdef TARGET_RISCV64
-#define CASE_OP_32_64(X) case X: case glue(X, W)
-#else
-#define CASE_OP_32_64(X) case X
-#endif
-
 #ifdef CONFIG_DEBUG_TCG
 #define gen_mark_pc_updated() tcg_gen_movi_tl(_pc_is_current, 1)
 #else
 #define gen_mark_pc_updated() ((void)0)
 #endif
 
-
 static inline void gen_update_cpu_pc(target_ulong new_pc)
 {
     tcg_gen_movi_tl(cpu_pc, new_pc);
@@ -104,7 +121,33 @@ static inline void gen_update_cpu_pc(target_ulong new_pc)
 
 static inline bool has_ext(DisasContext *ctx, uint32_t ext)
 {
-    return ctx->misa & ext;
+    return ctx->misa_ext & ext;
+}
+
+#ifdef TARGET_RISCV32
+#define get_xl(ctx)    MXL_RV32
+#elif defined(CONFIG_USER_ONLY)
+#define get_xl(ctx)    MXL_RV64
+#else
+#define get_xl(ctx)    ((ctx)->xl)
+#endif
+
+/* The word size for this machine mode. */
+static inline int __attribute__((unused)) get_xlen(DisasContext *ctx)
+{
+    return 16 << get_xl(ctx);
+}
+
+/* The operation length, as opposed to the xlen. */
+#ifdef TARGET_RISCV32
+#define get_ol(ctx)    MXL_RV32
+#else
+#define get_ol(ctx)    ((ctx)->ol)
+#endif
+
+static inline int get_olen(DisasContext *ctx)
+{
+    return 16 << get_ol(ctx);
 }
 
 /*
@@ -128,60 +171,27 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
  */
 static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
 {
-    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
-    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
+    TCGv_i64 t_max = tcg_constant_i64(0xffffffff00000000ull);
+    TCGv_i64 t_nan = tcg_constant_i64(0xffffffff7fc00000ull);
 
     tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
-    tcg_temp_free_i64(t_max);
-    tcg_temp_free_i64(t_nan);
 }
 
 static void generate_exception(DisasContext *ctx, int excp)
 {
     gen_update_cpu_pc(ctx->base.pc_next);
-    TCGv_i32 helper_tmp = tcg_const_i32(excp);
-    gen_helper_raise_exception(cpu_env, helper_tmp);
-    tcg_temp_free_i32(helper_tmp);
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
-static void generate_exception_mbadaddr(DisasContext *ctx, int excp)
+static void generate_exception_mtval(DisasContext *ctx, int excp)
 {
     gen_update_cpu_pc(ctx->base.pc_next);
     tcg_gen_st_tl(cpu_pc, cpu_env, offsetof(CPURISCVState, badaddr));
-    TCGv_i32 helper_tmp = tcg_const_i32(excp);
-    gen_helper_raise_exception(cpu_env, helper_tmp);
-    tcg_temp_free_i32(helper_tmp);
+    gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_exception_debug(void)
-{
-    TCGv_i32 helper_tmp = tcg_const_i32(EXCP_DEBUG);
-    gen_helper_raise_exception(cpu_env, helper_tmp);
-    tcg_temp_free_i32(helper_tmp);
-}
-
-/* Wrapper around tcg_gen_exit_tb that handles single stepping */
-static void exit_tb(DisasContext *ctx)
-{
-    if (ctx->base.singlestep_enabled) {
-        gen_exception_debug();
-    } else {
-        tcg_gen_exit_tb(NULL, 0);
-    }
-}
-
-/* Wrapper around tcg_gen_lookup_and_goto_ptr that handles single stepping */
-static void lookup_and_goto_ptr(DisasContext *ctx)
-{
-    if (ctx->base.singlestep_enabled) {
-        gen_exception_debug();
-    } else {
-        tcg_gen_lookup_and_goto_ptr();
-    }
-}
-
 static void gen_exception_illegal(DisasContext *ctx)
 {
     generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST);
@@ -189,36 +199,38 @@ static void gen_exception_illegal(DisasContext *ctx)
 
 static void gen_exception_inst_addr_mis(DisasContext *ctx)
 {
-    generate_exception_mbadaddr(ctx, RISCV_EXCP_INST_ADDR_MIS);
+    generate_exception_mtval(ctx, RISCV_EXCP_INST_ADDR_MIS);
 }
 
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+static void gen_check_branch_target(DisasContext *ctx, target_ulong dest);
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest,
+                        bool bounds_check)
 {
-    if (unlikely(ctx->base.singlestep_enabled)) {
-        return false;
-    }
+    if (bounds_check)
+        gen_check_branch_target(ctx, dest);
 
-#ifndef CONFIG_USER_ONLY
-    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    if (translator_use_goto_tb(&ctx->base, dest)) {
+        tcg_gen_goto_tb(n);
+        gen_update_cpu_pc(dest);
+        tcg_gen_exit_tb(ctx->base.tb, n);
+    } else {
+        gen_update_cpu_pc(dest);
+        tcg_gen_lookup_and_goto_ptr();
+    }
 }
 
-/* Wrapper for getting reg values - need to check of reg is zero since
- * cpu_gpr[0] is not actually allocated
+/*
+ * Wrappers for getting reg values.
+ *
+ * The $zero register does not have cpu_gpr[0] allocated -- we supply the
+ * constant zero as a source, and an uninitialized sink as destination.
+ *
+ * Further, we may provide an extension for word operations.
  */
-static inline void _gen_get_gpr(TCGv t, int reg_num)
+static TCGv temp_new(DisasContext *ctx)
 {
-    if (reg_num == 0) {
-        tcg_gen_movi_tl(t, 0);
-    } else {
-#ifdef TARGET_CHERI
-        tcg_gen_mov_tl(t, _cpu_cursors_do_not_access_directly[reg_num]);
-#else
-        tcg_gen_mov_tl(t, cpu_gpr[reg_num]);
-#endif
-    }
+    assert(ctx->ntemp < ARRAY_SIZE(ctx->temp));
+    return ctx->temp[ctx->ntemp++] = tcg_temp_new();
 }
 
 #ifdef CONFIG_RVFI_DII
@@ -231,82 +243,114 @@ static inline void _gen_get_gpr(TCGv t, int reg_num)
 #else
 // #define gen_get_gpr(t, reg_num, field) _gen_get_gpr(t, reg_num)
 #endif
-#define gen_get_gpr(t, reg_num) _gen_get_gpr(t, reg_num)
 
-#include "cheri-translate-utils.h"
-
-/* Wrapper for setting reg values - need to check of reg is zero since
- * cpu_gpr[0] is not actually allocated. this is more for safety purposes,
- * since we usually avoid calling the OP_TYPE_gen function if we see a write to
- * $zero
- */
-static inline void _gen_set_gpr(DisasContext *ctx, int reg_num_dst, TCGv t,
-                                bool clear_pesbt)
+static TCGv get_gpr(DisasContext *ctx, int reg_num, DisasExtend ext)
 {
-    if (reg_num_dst != 0) {
+    TCGv t;
 #ifdef TARGET_CHERI
-        if (clear_pesbt)
-            gen_lazy_cap_set_int(
-                ctx, reg_num_dst); // Reset the register type to int.
-        tcg_gen_mov_tl(_cpu_cursors_do_not_access_directly[reg_num_dst], t);
+    TCGv *cpu_gpr_cursors = _cpu_cursors_do_not_access_directly;
 #else
-        tcg_gen_mov_tl(cpu_gpr[reg_num_dst], t);
+    TCGv *cpu_gpr_cursors = cpu_gpr;
 #endif
-        gen_rvfi_dii_set_field_const_i8(INTEGER, rd_addr, reg_num_dst);
-        gen_rvfi_dii_set_field_zext_tl(INTEGER, rd_wdata, t);
-#ifdef CONFIG_TCG_LOG_INSTR
-        // Log GPR writes here
-        if (unlikely(ctx->base.log_instr_enabled)) {
-            TCGv_i32 tregnum = tcg_const_i32(reg_num_dst);
-            gen_helper_riscv_log_gpr_write(cpu_env, tregnum, t);
-            tcg_temp_free_i32(tregnum);
+
+    if (reg_num == 0) {
+        return ctx->zero;
+    }
+
+    switch (get_ol(ctx)) {
+    case MXL_RV32:
+        switch (ext) {
+        case EXT_NONE:
+            break;
+        case EXT_SIGN:
+            t = temp_new(ctx);
+            tcg_gen_ext32s_tl(t, cpu_gpr_cursors[reg_num]);
+            return t;
+        case EXT_ZERO:
+            t = temp_new(ctx);
+            tcg_gen_ext32u_tl(t, cpu_gpr_cursors[reg_num]);
+            return t;
+        default:
+            g_assert_not_reached();
         }
-#endif
+        break;
+    case MXL_RV64:
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return cpu_gpr_cursors[reg_num];
+}
+
+static void gen_get_gpr(DisasContext *ctx, TCGv t, int reg_num)
+{
+    tcg_gen_mov_tl(t, get_gpr(ctx, reg_num, EXT_NONE));
+}
+
+static TCGv dest_gpr(DisasContext *ctx, int reg_num)
+{
+    if (reg_num == 0 || get_olen(ctx) < TARGET_LONG_BITS) {
+        return temp_new(ctx);
     }
+#ifdef TARGET_CHERI
+    return _cpu_cursors_do_not_access_directly[reg_num];
+#else
+    return cpu_gpr[reg_num];
+#endif
 }
 
-static inline void _gen_set_gpr_const(DisasContext *ctx, int reg_num_dst,
-                                      target_ulong value)
+#include "cheri-translate-utils.h"
+
+static void _gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t,
+                        bool clear_pesbt)
 {
-    if (reg_num_dst != 0) {
+    if (reg_num != 0) {
 #ifdef TARGET_CHERI
-        gen_lazy_cap_set_int(ctx,
-                             reg_num_dst); // Reset the register type to int.
-        tcg_gen_movi_tl(_cpu_cursors_do_not_access_directly[reg_num_dst], value);
+        TCGv *dest_gpr = _cpu_cursors_do_not_access_directly;
+        if (clear_pesbt) {
+            // Reset the register type to int.
+            gen_lazy_cap_set_int(ctx, reg_num);
+        }
 #else
-        tcg_gen_movi_tl(cpu_gpr[reg_num_dst], value);
+        TCGv *dest_gpr = cpu_gpr;
 #endif
-        gen_rvfi_dii_set_field_const_i8(INTEGER, rd_addr, reg_num_dst);
-        gen_rvfi_dii_set_field_const_i64(INTEGER, rd_wdata, value);
+        switch (get_ol(ctx)) {
+        case MXL_RV32:
+            tcg_gen_ext32s_tl(dest_gpr[reg_num], t);
+            break;
+        case MXL_RV64:
+            tcg_gen_mov_tl(dest_gpr[reg_num], t);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        gen_rvfi_dii_set_field_const_i8(INTEGER, rd_addr, reg_num);
+        gen_rvfi_dii_set_field_zext_tl(INTEGER, rd_wdata, t);
 #ifdef CONFIG_TCG_LOG_INSTR
         // Log GPR writes here
-        if (unlikely(ctx->base.log_instr_enabled)) {
-            TCGv_i32 tregnum = tcg_const_i32(reg_num_dst);
-            TCGv tval = tcg_const_tl(value);
-            gen_helper_riscv_log_gpr_write(cpu_env, tregnum, tval);
-            tcg_temp_free(tval);
-            tcg_temp_free_i32(tregnum);
+        if (qemu_ctx_logging_enabled(ctx)) {
+            gen_helper_riscv_log_gpr_write(cpu_env, tcg_constant_i32(reg_num),
+                                           t);
         }
 #endif
     }
 }
 
-#define gen_set_gpr(reg_num_dst, t) _gen_set_gpr(ctx, reg_num_dst, t, true)
-#define gen_set_gpr_const(reg_num_dst, t) _gen_set_gpr_const(ctx, reg_num_dst, t)
+#define gen_set_gpr(ctx, reg_num_dst, t) _gen_set_gpr(ctx, reg_num_dst, t, true)
+#define gen_set_gpr_const(ctx, reg_num_dst, t)                  \
+    _gen_set_gpr(ctx, reg_num_dst, tcg_constant_tl(t), true)
 
 #ifdef CONFIG_TCG_LOG_INSTR
 static inline void gen_riscv_log_instr(DisasContext *ctx, uint32_t opcode,
                                        int width)
 {
-    if (unlikely(ctx->base.log_instr_enabled)) {
+    if (qemu_ctx_logging_enabled(ctx)) {
         TCGv tpc = tcg_const_tl(ctx->base.pc_next);
-        TCGv_i32 topc = tcg_const_i32(opcode);
-        TCGv_i32 twidth = tcg_const_i32(width);
+        TCGv_i32 topc = tcg_constant_i32(opcode);
+        TCGv_i32 twidth = tcg_constant_i32(width);
         // TODO(am2419): bswap opcode if target byte-order != host byte-order
         gen_helper_riscv_log_instr(cpu_env, tpc, topc, twidth);
         tcg_temp_free(tpc);
-        tcg_temp_free_i32(topc);
-        tcg_temp_free_i32(twidth);
     }
 }
 
@@ -319,6 +363,7 @@ static inline void gen_riscv_log_instr(DisasContext *ctx, uint32_t opcode,
 #define gen_riscv_log_instr32(ctx, opcode)              \
     gen_riscv_log_instr(ctx, opcode, sizeof(uint32_t))
 
+
 void cheri_tcg_save_pc(DisasContextBase *db) { gen_update_cpu_pc(db->pc_next); }
 // We have to call gen_update_cpu_pc() before setting DISAS_NORETURN (see
 // generate_exception())
@@ -328,150 +373,6 @@ void cheri_tcg_prepare_for_unconditional_exception(DisasContextBase *db)
     db->is_jmp = DISAS_NORETURN;
 }
 
-static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest,
-                        bool bounds_check)
-{
-    if (bounds_check)
-        gen_check_branch_target(ctx, dest);
-
-    if (use_goto_tb(ctx, dest)) {
-        /* chaining is only allowed when the jump is to the same page */
-        tcg_gen_goto_tb(n);
-        gen_update_cpu_pc(dest);
-
-        /* No need to check for single stepping here as use_goto_tb() will
-         * return false in case of single stepping.
-         */
-        tcg_gen_exit_tb(ctx->base.tb, n);
-    } else {
-        gen_update_cpu_pc(dest);
-        lookup_and_goto_ptr(ctx);
-    }
-}
-
-static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
-{
-    TCGv rl = tcg_temp_new();
-    TCGv rh = tcg_temp_new();
-
-    tcg_gen_mulu2_tl(rl, rh, arg1, arg2);
-    /* fix up for one negative */
-    tcg_gen_sari_tl(rl, arg1, TARGET_LONG_BITS - 1);
-    tcg_gen_and_tl(rl, rl, arg2);
-    tcg_gen_sub_tl(ret, rh, rl);
-
-    tcg_temp_free(rl);
-    tcg_temp_free(rh);
-}
-
-static void gen_div(TCGv ret, TCGv source1, TCGv source2)
-{
-    TCGv cond1, cond2, zeroreg, resultopt1;
-    /*
-     * Handle by altering args to tcg_gen_div to produce req'd results:
-     * For overflow: want source1 in source1 and 1 in source2
-     * For div by zero: want -1 in source1 and 1 in source2 -> -1 result
-     */
-    cond1 = tcg_temp_new();
-    cond2 = tcg_temp_new();
-    zeroreg = tcg_const_tl(0);
-    resultopt1 = tcg_temp_new();
-
-    tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)(~0L));
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
-                        ((target_ulong)1) << (TARGET_LONG_BITS - 1));
-    tcg_gen_and_tl(cond1, cond1, cond2); /* cond1 = overflow */
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, 0); /* cond2 = div 0 */
-    /* if div by zero, set source1 to -1, otherwise don't change */
-    tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond2, zeroreg, source1,
-            resultopt1);
-    /* if overflow or div by zero, set source2 to 1, else don't change */
-    tcg_gen_or_tl(cond1, cond1, cond2);
-    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-            resultopt1);
-    tcg_gen_div_tl(ret, source1, source2);
-
-    tcg_temp_free(cond1);
-    tcg_temp_free(cond2);
-    tcg_temp_free(zeroreg);
-    tcg_temp_free(resultopt1);
-}
-
-static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
-{
-    TCGv cond1, zeroreg, resultopt1;
-    cond1 = tcg_temp_new();
-
-    zeroreg = tcg_const_tl(0);
-    resultopt1 = tcg_temp_new();
-
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
-    tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond1, zeroreg, source1,
-            resultopt1);
-    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-            resultopt1);
-    tcg_gen_divu_tl(ret, source1, source2);
-
-    tcg_temp_free(cond1);
-    tcg_temp_free(zeroreg);
-    tcg_temp_free(resultopt1);
-}
-
-static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
-{
-    TCGv cond1, cond2, zeroreg, resultopt1;
-
-    cond1 = tcg_temp_new();
-    cond2 = tcg_temp_new();
-    zeroreg = tcg_const_tl(0);
-    resultopt1 = tcg_temp_new();
-
-    tcg_gen_movi_tl(resultopt1, 1L);
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)-1);
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
-                        (target_ulong)1 << (TARGET_LONG_BITS - 1));
-    tcg_gen_and_tl(cond2, cond1, cond2); /* cond1 = overflow */
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0); /* cond2 = div 0 */
-    /* if overflow or div by zero, set source2 to 1, else don't change */
-    tcg_gen_or_tl(cond2, cond1, cond2);
-    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond2, zeroreg, source2,
-            resultopt1);
-    tcg_gen_rem_tl(resultopt1, source1, source2);
-    /* if div by zero, just return the original dividend */
-    tcg_gen_movcond_tl(TCG_COND_EQ, ret, cond1, zeroreg, resultopt1,
-            source1);
-
-    tcg_temp_free(cond1);
-    tcg_temp_free(cond2);
-    tcg_temp_free(zeroreg);
-    tcg_temp_free(resultopt1);
-}
-
-static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
-{
-    TCGv cond1, zeroreg, resultopt1;
-    cond1 = tcg_temp_new();
-    zeroreg = tcg_const_tl(0);
-    resultopt1 = tcg_temp_new();
-
-    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
-    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-            resultopt1);
-    tcg_gen_remu_tl(resultopt1, source1, source2);
-    /* if div by zero, just return the original dividend */
-    tcg_gen_movcond_tl(TCG_COND_EQ, ret, cond1, zeroreg, resultopt1,
-            source1);
-
-    tcg_temp_free(cond1);
-    tcg_temp_free(zeroreg);
-    tcg_temp_free(resultopt1);
-}
-
 static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
 {
     target_ulong next_pc;
@@ -487,7 +388,7 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
         }
     }
     /* For CHERI ISAv8 the result is an offset relative to PCC.base */
-    gen_set_gpr_const(rd, ctx->pc_succ_insn - pcc_reloc(ctx));
+    gen_set_gpr_const(ctx, rd, ctx->pc_succ_insn - pcc_reloc(ctx));
 
     gen_goto_tb(ctx, 0, ctx->base.pc_next + imm, /*bounds_check=*/true); /* must use this for safety */
     ctx->base.is_jmp = DISAS_NORETURN;
@@ -501,7 +402,7 @@ static void gen_jalr(DisasContext *ctx, int rd, int rs1, target_ulong imm)
     // gen_check_branch_target_dynamic() inserts branches.
     TCGv t0 = tcg_temp_local_new();
 
-    gen_get_gpr(t0, rs1);
+    gen_get_gpr(ctx, t0, rs1);
     /* For CHERI ISAv8 the destination is an offset relative to PCC.base. */
     tcg_gen_addi_tl(t0, t0, imm + pcc_reloc(ctx));
     tcg_gen_andi_tl(t0, t0, (target_ulong)-2);
@@ -518,8 +419,10 @@ static void gen_jalr(DisasContext *ctx, int rd, int rs1, target_ulong imm)
     }
 
     /* For CHERI ISAv8 the result is an offset relative to PCC.base */
-    gen_set_gpr_const(rd, ctx->pc_succ_insn - pcc_reloc(ctx));
-    lookup_and_goto_ptr(ctx);
+    gen_set_gpr_const(ctx, rd, ctx->pc_succ_insn - pcc_reloc(ctx));
+
+    /* No chaining with JALR. */
+    tcg_gen_lookup_and_goto_ptr();
 
     if (misaligned) {
         gen_set_label(misaligned);
@@ -530,6 +433,23 @@ static void gen_jalr(DisasContext *ctx, int rd, int rs1, target_ulong imm)
     tcg_temp_free(t0);
 }
 
+/*
+ * Generates address adjustment for PointerMasking
+ */
+static TCGv gen_pm_adjust_address(DisasContext *s, TCGv src)
+{
+    TCGv temp;
+    if (!s->pm_enabled) {
+        /* Load unmodified address */
+        return src;
+    } else {
+        temp = temp_new(s);
+        tcg_gen_andc_tl(temp, src, s->pm_mask);
+        tcg_gen_or_tl(temp, temp, s->pm_base);
+        return temp;
+    }
+}
+
 #ifndef CONFIG_USER_ONLY
 /* The states of mstatus_fs are:
  * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
@@ -539,23 +459,28 @@ static void gen_jalr(DisasContext *ctx, int rd, int rs1, target_ulong imm)
 static void mark_fs_dirty(DisasContext *ctx)
 {
     TCGv tmp;
-    if (ctx->mstatus_fs == MSTATUS_FS) {
-        return;
+
+    if (ctx->mstatus_fs != MSTATUS_FS) {
+        /* Remember the state change for the rest of the TB. */
+        ctx->mstatus_fs = MSTATUS_FS;
+
+        tmp = tcg_temp_new();
+        tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
+        tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS);
+        tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
+        tcg_temp_free(tmp);
     }
-    /* Remember the state change for the rest of the TB.  */
-    ctx->mstatus_fs = MSTATUS_FS;
 
-    tmp = tcg_temp_new();
-    tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
-    tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS | MSTATUS_SD);
-    tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
+    if (ctx->virt_enabled && ctx->mstatus_hs_fs != MSTATUS_FS) {
+        /* Remember the stage change for the rest of the TB. */
+        ctx->mstatus_hs_fs = MSTATUS_FS;
 
-    if (ctx->virt_enabled) {
+        tmp = tcg_temp_new();
         tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
-        tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS | MSTATUS_SD);
+        tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS);
         tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
+        tcg_temp_free(tmp);
     }
-    tcg_temp_free(tmp);
 }
 #else
 static inline void mark_fs_dirty(DisasContext *ctx) { }
@@ -563,15 +488,11 @@ static inline void mark_fs_dirty(DisasContext *ctx) { }
 
 static void gen_set_rm(DisasContext *ctx, int rm)
 {
-    TCGv_i32 t0;
-
     if (ctx->frm == rm) {
         return;
     }
     ctx->frm = rm;
-    t0 = tcg_const_i32(rm);
-    gen_helper_set_rounding_mode(cpu_env, t0);
-    tcg_temp_free_i32(t0);
+    gen_helper_set_rounding_mode(cpu_env, tcg_constant_i32(rm));
 }
 
 static int ex_plus_1(DisasContext *ctx, int nf)
@@ -596,6 +517,18 @@ EX_SH(12)
     }                              \
 } while (0)
 
+#define REQUIRE_32BIT(ctx) do {    \
+    if (get_xl(ctx) != MXL_RV32) { \
+        return false;              \
+    }                              \
+} while (0)
+
+#define REQUIRE_64BIT(ctx) do {    \
+    if (get_xl(ctx) < MXL_RV64) {  \
+        return false;              \
+    }                              \
+} while (0)
+
 static int ex_rvc_register(DisasContext *ctx, int reg)
 {
     return 8 + reg;
@@ -616,141 +549,183 @@ static bool pred_capmode(DisasContext *ctx)
 #endif
 }
 
+static bool pred_rv64_capmode(DisasContext *ctx)
+{
+    REQUIRE_64BIT(ctx);
+    return pred_capmode(ctx);
+}
+
 /* Include the auto-generated decoder for 32 bit insn */
 #include "decode-insn32.c.inc"
 
-static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a,
+static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a, DisasExtend ext,
                              void (*func)(TCGv, TCGv, target_long))
 {
-    TCGv source1;
-    source1 = tcg_temp_new();
-
-    gen_get_gpr(source1, a->rs1);
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, ext);
 
-    (*func)(source1, source1, a->imm);
+    func(dest, src1, a->imm);
 
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
-static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a,
+static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a, DisasExtend ext,
                              void (*func)(TCGv, TCGv, TCGv))
 {
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, ext);
+    TCGv src2 = tcg_constant_tl(a->imm);
 
-    gen_get_gpr(source1, a->rs1);
-    tcg_gen_movi_tl(source2, a->imm);
+    func(dest, src1, src2);
 
-    (*func)(source1, source1, source2);
-
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
-#ifdef TARGET_RISCV64
-static void gen_addw(TCGv ret, TCGv arg1, TCGv arg2)
+static bool gen_arith(DisasContext *ctx, arg_r *a, DisasExtend ext,
+                      void (*func)(TCGv, TCGv, TCGv))
 {
-    tcg_gen_add_tl(ret, arg1, arg2);
-    tcg_gen_ext32s_tl(ret, ret);
-}
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, ext);
+    TCGv src2 = get_gpr(ctx, a->rs2, ext);
 
-static void gen_subw(TCGv ret, TCGv arg1, TCGv arg2)
-{
-    tcg_gen_sub_tl(ret, arg1, arg2);
-    tcg_gen_ext32s_tl(ret, ret);
+    func(dest, src1, src2);
+
+    gen_set_gpr(ctx, a->rd, dest);
+    return true;
 }
 
-static void gen_mulw(TCGv ret, TCGv arg1, TCGv arg2)
+static bool gen_arith_per_ol(DisasContext *ctx, arg_r *a, DisasExtend ext,
+                             void (*f_tl)(TCGv, TCGv, TCGv),
+                             void (*f_32)(TCGv, TCGv, TCGv))
 {
-    tcg_gen_mul_tl(ret, arg1, arg2);
-    tcg_gen_ext32s_tl(ret, ret);
+    int olen = get_olen(ctx);
+
+    if (olen != TARGET_LONG_BITS) {
+        if (olen == 32) {
+            f_tl = f_32;
+        } else {
+            g_assert_not_reached();
+        }
+    }
+    return gen_arith(ctx, a, ext, f_tl);
 }
 
-static bool gen_arith_div_w(DisasContext *ctx, arg_r *a,
-                            void(*func)(TCGv, TCGv, TCGv))
+static bool gen_shift_imm_fn(DisasContext *ctx, arg_shift *a, DisasExtend ext,
+                             void (*func)(TCGv, TCGv, target_long))
 {
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
+    TCGv dest, src1;
+    int max_len = get_olen(ctx);
+
+    if (a->shamt >= max_len) {
+        return false;
+    }
 
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
-    tcg_gen_ext32s_tl(source1, source1);
-    tcg_gen_ext32s_tl(source2, source2);
+    dest = dest_gpr(ctx, a->rd);
+    src1 = get_gpr(ctx, a->rs1, ext);
 
-    (*func)(source1, source1, source2);
+    func(dest, src1, a->shamt);
 
-    tcg_gen_ext32s_tl(source1, source1);
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
-static bool gen_arith_div_uw(DisasContext *ctx, arg_r *a,
-                            void(*func)(TCGv, TCGv, TCGv))
+static bool gen_shift_imm_fn_per_ol(DisasContext *ctx, arg_shift *a,
+                                    DisasExtend ext,
+                                    void (*f_tl)(TCGv, TCGv, target_long),
+                                    void (*f_32)(TCGv, TCGv, target_long))
+{
+    int olen = get_olen(ctx);
+    if (olen != TARGET_LONG_BITS) {
+        if (olen == 32) {
+            f_tl = f_32;
+        } else {
+            g_assert_not_reached();
+        }
+    }
+    return gen_shift_imm_fn(ctx, a, ext, f_tl);
+}
+
+static bool gen_shift_imm_tl(DisasContext *ctx, arg_shift *a, DisasExtend ext,
+                             void (*func)(TCGv, TCGv, TCGv))
 {
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
+    TCGv dest, src1, src2;
+    int max_len = get_olen(ctx);
+
+    if (a->shamt >= max_len) {
+        return false;
+    }
 
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
-    tcg_gen_ext32u_tl(source1, source1);
-    tcg_gen_ext32u_tl(source2, source2);
+    dest = dest_gpr(ctx, a->rd);
+    src1 = get_gpr(ctx, a->rs1, ext);
+    src2 = tcg_constant_tl(a->shamt);
 
-    (*func)(source1, source1, source2);
+    func(dest, src1, src2);
 
-    tcg_gen_ext32s_tl(source1, source1);
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
-#endif
-
-static bool gen_arith(DisasContext *ctx, arg_r *a,
-                      void(*func)(TCGv, TCGv, TCGv))
+static bool gen_shift(DisasContext *ctx, arg_r *a, DisasExtend ext,
+                      void (*func)(TCGv, TCGv, TCGv))
 {
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, ext);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+    TCGv ext2 = tcg_temp_new();
 
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
+    tcg_gen_andi_tl(ext2, src2, get_olen(ctx) - 1);
+    func(dest, src1, ext2);
 
-    (*func)(source1, source1, source2);
-
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
+    gen_set_gpr(ctx, a->rd, dest);
+    tcg_temp_free(ext2);
     return true;
 }
 
-static bool gen_shift(DisasContext *ctx, arg_r *a,
-                        void(*func)(TCGv, TCGv, TCGv))
+static bool gen_shift_per_ol(DisasContext *ctx, arg_r *a, DisasExtend ext,
+                             void (*f_tl)(TCGv, TCGv, TCGv),
+                             void (*f_32)(TCGv, TCGv, TCGv))
 {
-    TCGv source1 = tcg_temp_new();
-    TCGv source2 = tcg_temp_new();
+    int olen = get_olen(ctx);
+    if (olen != TARGET_LONG_BITS) {
+        if (olen == 32) {
+            f_tl = f_32;
+        } else {
+            g_assert_not_reached();
+        }
+    }
+    return gen_shift(ctx, a, ext, f_tl);
+}
 
-    gen_get_gpr(source1, a->rs1);
-    gen_get_gpr(source2, a->rs2);
+static bool gen_unary(DisasContext *ctx, arg_r2 *a, DisasExtend ext,
+                      void (*func)(TCGv, TCGv))
+{
+    TCGv dest = dest_gpr(ctx, a->rd);
+    TCGv src1 = get_gpr(ctx, a->rs1, ext);
 
-    tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
-    (*func)(source1, source1, source2);
+    func(dest, src1);
 
-    gen_set_gpr(a->rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
+    gen_set_gpr(ctx, a->rd, dest);
     return true;
 }
 
+static bool gen_unary_per_ol(DisasContext *ctx, arg_r2 *a, DisasExtend ext,
+                             void (*f_tl)(TCGv, TCGv),
+                             void (*f_32)(TCGv, TCGv))
+{
+    int olen = get_olen(ctx);
+
+    if (olen != TARGET_LONG_BITS) {
+        if (olen == 32) {
+            f_tl = f_32;
+        } else {
+            g_assert_not_reached();
+        }
+    }
+    return gen_unary(ctx, a, ext, f_tl);
+}
+
 static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
@@ -787,6 +762,27 @@ static inline bool trans_cincoffsetimm(DisasContext *ctx, arg_cincoffsetimm *a)
     g_assert_not_reached();
     return false;
 }
+
+typedef arg_atomic arg_lr_c;
+static bool trans_lr_c(DisasContext *ctx, arg_lr_c *a)
+{
+    g_assert_not_reached();
+    return false;
+}
+
+typedef arg_atomic arg_sc_c;
+static bool trans_sc_c(DisasContext *ctx, arg_sc_c *a)
+{
+    g_assert_not_reached();
+    return false;
+}
+
+typedef arg_atomic arg_amoswap_c;
+static bool trans_amoswap_c(DisasContext *ctx, arg_amoswap_c *a)
+{
+    g_assert_not_reached();
+    return false;
+}
 #endif
 // Helpers to generate a virtual address that has been checked by the CHERI
 // capability helpers: If ctx->capmode is set, the register number will be
@@ -804,6 +800,7 @@ static inline TCGv_cap_checked_ptr _get_capmode_dependent_addr(
 {
     TCGv_cap_checked_ptr result = tcg_temp_new_cap_checked();
 #ifdef TARGET_CHERI
+    // XXX-AM: Unsupported pointer masking extensions
     if (ctx->capmode) {
         gen_check_cap(result, reg_num, regoffs, mop);
     } else {
@@ -811,7 +808,8 @@ static inline TCGv_cap_checked_ptr _get_capmode_dependent_addr(
                                                  mop, check_ddc);
     }
 #else
-    gen_get_gpr(result, reg_num);
+    gen_get_gpr(ctx, result, reg_num);
+    result = gen_pm_adjust_address(ctx, result);
     if (!__builtin_constant_p(regoffs) || regoffs != 0) {
         tcg_gen_addi_tl(result, result, regoffs);
     }
@@ -862,6 +860,7 @@ get_capmode_dependent_rmw_addr(DisasContext *ctx, int reg_num,
 #include "insn_trans/trans_rvd.c.inc"
 #include "insn_trans/trans_rvh.c.inc"
 #include "insn_trans/trans_rvv.c.inc"
+#include "insn_trans/trans_rvb.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
 
 /* Include the auto-generated decoder for 16 bit insn */
@@ -872,18 +871,8 @@ static bool trans_c_hint(DisasContext *ctx, arg_c_hint *a)
     return true;
 }
 
-static void decode_opc(CPURISCVState *env, DisasContext *ctx)
+static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
 {
-#ifdef CONFIG_RVFI_DII
-    // We have to avoid memory accesses for injected instructions since
-    // the PC could point somewhere invalid.
-    uint16_t opcode = env->rvfi_dii_have_injected_insn
-                          ? env->rvfi_dii_injected_insn
-                          : translator_lduw(env, ctx->base.pc_next);
-    gen_rvfi_dii_set_field_const_i64(PC, pc_rdata, ctx->base.pc_next);
-#else
-    uint16_t opcode = translator_lduw(env, ctx->base.pc_next);
-#endif
     /* check for compressed insn */
     if (extract16(opcode, 0, 2) != 3) {
         gen_riscv_log_instr16(ctx, opcode);
@@ -903,9 +892,11 @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx)
         // the PC could point somewhere invalid.
         uint16_t next_16 = env->rvfi_dii_have_injected_insn
                           ? (env->rvfi_dii_injected_insn >> 16)
-                          : translator_lduw(env, ctx->base.pc_next + 2);
+                          : translator_lduw(env, &ctx->base,
+                                            ctx->base.pc_next + 2);
 #else
-        uint16_t next_16 = translator_lduw(env, ctx->base.pc_next + 2);
+        uint16_t next_16 = translator_lduw(env, &ctx->base,
+                                           ctx->base.pc_next + 2);
 #endif
         uint32_t opcode32 = opcode;
         opcode32 = deposit32(opcode32, 16, 16, next_16);
@@ -927,7 +918,7 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
     uint32_t tb_flags = ctx->base.tb->flags;
 
     ctx->pc_succ_insn = ctx->base.pc_first;
-    ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK;
+    ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
     ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
 #ifdef TARGET_CHERI
     ctx->capmode = tb_in_capmode(ctx->base.tb);
@@ -943,17 +934,27 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 #else
     ctx->virt_enabled = false;
 #endif
-    ctx->misa = env->misa;
+    ctx->misa_ext = env->misa_ext;
     ctx->frm = -1;  /* unknown rounding mode */
     ctx->ext_ifencei = cpu->cfg.ext_ifencei;
     ctx->vlen = cpu->cfg.vlen;
+    ctx->mstatus_hs_fs = FIELD_EX32(tb_flags, TB_FLAGS, MSTATUS_HS_FS);
     ctx->hlsx = FIELD_EX32(tb_flags, TB_FLAGS, HLSX);
     ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
     ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
     ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
     ctx->mlen = 1 << (ctx->sew  + 3 - ctx->lmul);
     ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
+    ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL);
     ctx->cs = cs;
+    ctx->ntemp = 0;
+    memset(ctx->temp, 0, sizeof(ctx->temp));
+    ctx->pm_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_ENABLED);
+    int priv = tb_flags & TB_FLAGS_PRIV_MMU_MASK;
+    ctx->pm_mask = pm_mask[priv];
+    ctx->pm_base = pm_base[priv];
+
+    ctx->zero = tcg_constant_tl(0);
 }
 
 static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
@@ -967,31 +968,32 @@ static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(ctx->base.pc_next);
 }
 
-static bool riscv_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    gen_update_cpu_pc(ctx->base.pc_next);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    gen_exception_debug();
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size below does the right thing.  */
-    ctx->base.pc_next += 4;
-    return true;
-}
-
 static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     CPURISCVState *env = cpu->env_ptr;
+#ifdef CONFIG_RVFI_DII
+    // We have to avoid memory accesses for injected instructions since
+    // the PC could point somewhere invalid.
+    uint16_t opcode16 = env->rvfi_dii_have_injected_insn
+        ? env->rvfi_dii_injected_insn
+        : translator_lduw(env, &ctx->base, ctx->base.pc_next);
+    gen_rvfi_dii_set_field_const_i64(PC, pc_rdata, ctx->base.pc_next);
+#else
+    uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
+#endif
 
-    decode_opc(env, ctx);
+    ctx->ol = ctx->xl;
+    decode_opc(env, ctx, opcode16);
     ctx->base.pc_next = ctx->pc_succ_insn;
     gen_rvfi_dii_set_field_const_i64(PC, pc_wdata, ctx->base.pc_next);
 
+    for (int i = ctx->ntemp - 1; i >= 0; --i) {
+        tcg_temp_free(ctx->temp[i]);
+        ctx->temp[i] = NULL;
+    }
+    ctx->ntemp = 0;
+
     if (ctx->base.is_jmp == DISAS_NEXT) {
         target_ulong page_start;
 
@@ -1049,7 +1051,6 @@ static const TranslatorOps riscv_tr_ops = {
     .init_disas_context = riscv_tr_init_disas_context,
     .tb_start           = riscv_tr_tb_start,
     .insn_start         = riscv_tr_insn_start,
-    .breakpoint_check   = riscv_tr_breakpoint_check,
     .translate_insn     = riscv_tr_translate_insn,
     .tb_stop            = riscv_tr_tb_stop,
     .disas_log          = riscv_tr_disas_log,
@@ -1066,11 +1067,12 @@ void riscv_translate_init(void)
 {
     int i;
 
-
-    /* cpu_gpr[0] is a placeholder for the zero register. Do not use it. */
-    /* Use the gen_set_gpr and gen_get_gpr helper functions when accessing */
-    /* registers, unless you specifically block reads/writes to reg 0 */
 #ifndef TARGET_CHERI
+    /*
+     * cpu_gpr[0] is a placeholder for the zero register. Do not use it.
+     * Use the gen_set_gpr and get_gpr helper functions when accessing regs,
+     * unless you specifically block reads/writes to reg 0.
+     */
     cpu_gpr[0] = NULL;
     for (i = 1; i < 32; i++) {
         cpu_gpr[i] = tcg_global_mem_new(cpu_env,
@@ -1104,10 +1106,10 @@ void riscv_translate_init(void)
 
 #ifdef TARGET_CHERI
     cpu_pc = tcg_global_mem_new(cpu_env,
-                                offsetof(CPURISCVState, PCC._cr_cursor), "pc");
+                                offsetof(CPURISCVState, pcc._cr_cursor), "pc");
     /// XXXAR: We currently interpose using DDC.cursor and not DDC.base!
     ddc_interposition = tcg_global_mem_new(
-        cpu_env, offsetof(CPURISCVState, DDC._cr_cursor), "ddc_interpose");
+        cpu_env, offsetof(CPURISCVState, ddc._cr_cursor), "ddc_interpose");
 #else
     cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc");
 #endif
@@ -1120,6 +1122,21 @@ void riscv_translate_init(void)
         cpu_env, offsetof(CPURISCVState, load_res), "load_res");
     load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
                              "load_val");
+#ifndef CONFIG_USER_ONLY
+    /* Assign PM CSRs to tcg globals */
+    pm_mask[PRV_U] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmmask), "upmmask");
+    pm_base[PRV_U] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, upmbase), "upmbase");
+    pm_mask[PRV_S] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmmask), "spmmask");
+    pm_base[PRV_S] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, spmbase), "spmbase");
+    pm_mask[PRV_M] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmmask), "mpmmask");
+    pm_base[PRV_M] =
+      tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, mpmbase), "mpmbase");
+#endif
 }
 
 void gen_cheri_break_loadlink(TCGv_cap_checked_ptr out_addr)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index a156573d281..12c31aa4b4d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -751,7 +751,6 @@ GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w,  32, 32, H4, DO_MIN,  l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w,  32, 32, H4, DO_MAX,  l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
-#ifdef TARGET_RISCV64
 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d,  64, 32, H8, DO_ADD,  l)
@@ -770,7 +769,6 @@ GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
-#endif
 
 static inline void
 vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
@@ -814,7 +812,6 @@ void HELPER(NAME)(void *vs3, void *v0, target_ulong base,       \
                       GETPC());                                 \
 }
 
-#ifdef TARGET_RISCV64
 GEN_VEXT_AMO(vamoswapw_v_d, int32_t,  int64_t,  idx_d, clearq)
 GEN_VEXT_AMO(vamoswapd_v_d, int64_t,  int64_t,  idx_d, clearq)
 GEN_VEXT_AMO(vamoaddw_v_d,  int32_t,  int64_t,  idx_d, clearq)
@@ -833,7 +830,6 @@ GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
 GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
-#endif
 GEN_VEXT_AMO(vamoswapw_v_w, int32_t,  int32_t,  idx_w, clearl)
 GEN_VEXT_AMO(vamoaddw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 GEN_VEXT_AMO(vamoxorw_v_w,  int32_t,  int32_t,  idx_w, clearl)
@@ -2451,7 +2447,7 @@ static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
 {
     int8_t res = a - b;
     if ((res ^ a) & (a ^ b) & INT8_MIN) {
-        res = a > 0 ? INT8_MAX : INT8_MIN;
+        res = a >= 0 ? INT8_MAX : INT8_MIN;
         env->vxsat = 0x1;
     }
     return res;
@@ -2461,7 +2457,7 @@ static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
 {
     int16_t res = a - b;
     if ((res ^ a) & (a ^ b) & INT16_MIN) {
-        res = a > 0 ? INT16_MAX : INT16_MIN;
+        res = a >= 0 ? INT16_MAX : INT16_MIN;
         env->vxsat = 0x1;
     }
     return res;
@@ -2471,7 +2467,7 @@ static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
 {
     int32_t res = a - b;
     if ((res ^ a) & (a ^ b) & INT32_MIN) {
-        res = a > 0 ? INT32_MAX : INT32_MIN;
+        res = a >= 0 ? INT32_MAX : INT32_MIN;
         env->vxsat = 0x1;
     }
     return res;
@@ -2481,7 +2477,7 @@ static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
 {
     int64_t res = a - b;
     if ((res ^ a) & (a ^ b) & INT64_MIN) {
-        res = a > 0 ? INT64_MAX : INT64_MIN;
+        res = a >= 0 ? INT64_MAX : INT64_MIN;
         env->vxsat = 0x1;
     }
     return res;
@@ -4796,7 +4792,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
     uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vl = env->vl;                                                \
-    uint32_t index, i;                                                    \
+    uint64_t index;                                                       \
+    uint32_t i;                                                           \
                                                                           \
     for (i = 0; i < vl; i++) {                                            \
         if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
@@ -4826,7 +4823,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
     uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
     uint32_t vm = vext_vm(desc);                                          \
     uint32_t vl = env->vl;                                                \
-    uint32_t index = s1, i;                                               \
+    uint64_t index = s1;                                                  \
+    uint32_t i;                                                           \
                                                                           \
     for (i = 0; i < vl; i++) {                                            \
         if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
diff --git a/target/rx/Kconfig b/target/rx/Kconfig
new file mode 100644
index 00000000000..aceb5ed28fe
--- /dev/null
+++ b/target/rx/Kconfig
@@ -0,0 +1,2 @@
+config RX
+    bool
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index 7ac6618b26b..25a4aa2976d 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -173,15 +173,23 @@ static void rx_cpu_init(Object *obj)
     qdev_init_gpio_in(DEVICE(cpu), rx_cpu_set_irq, 2);
 }
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps rx_sysemu_ops = {
+    .get_phys_page_debug = rx_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps rx_tcg_ops = {
+static const struct TCGCPUOps rx_tcg_ops = {
     .initialize = rx_translate_init,
     .synchronize_from_tb = rx_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = rx_cpu_exec_interrupt,
     .tlb_fill = rx_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .cpu_exec_interrupt = rx_cpu_exec_interrupt,
     .do_interrupt = rx_cpu_do_interrupt,
 #endif /* !CONFIG_USER_ONLY */
 };
@@ -202,9 +210,11 @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
     cc->dump_state = rx_cpu_dump_state;
     cc->set_pc = rx_cpu_set_pc;
 
+#ifndef CONFIG_USER_ONLY
+    cc->sysemu_ops = &rx_sysemu_ops;
+#endif
     cc->gdb_read_register = rx_cpu_gdb_read_register;
     cc->gdb_write_register = rx_cpu_gdb_write_register;
-    cc->get_phys_page_debug = rx_cpu_get_phys_page_debug;
     cc->disas_set_info = rx_cpu_disas_set_info;
 
     cc->gdb_num_core_regs = 26;
diff --git a/target/rx/cpu.h b/target/rx/cpu.h
index 0b4b998c7be..4ac71aec370 100644
--- a/target/rx/cpu.h
+++ b/target/rx/cpu.h
@@ -124,21 +124,19 @@ typedef RXCPU ArchCPU;
 #define CPU_RESOLVING_TYPE TYPE_RX_CPU
 
 const char *rx_crname(uint8_t cr);
+#ifndef CONFIG_USER_ONLY
 void rx_cpu_do_interrupt(CPUState *cpu);
 bool rx_cpu_exec_interrupt(CPUState *cpu, int int_req);
+#endif /* !CONFIG_USER_ONLY */
 void rx_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 int rx_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int rx_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 hwaddr rx_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 
 void rx_translate_init(void);
-int cpu_rx_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
-
 void rx_cpu_list(void);
 void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte);
 
-#define cpu_signal_handler cpu_rx_signal_handler
 #define cpu_list rx_cpu_list
 
 #include "exec/cpu-all.h"
diff --git a/target/rx/helper.c b/target/rx/helper.c
index 3e380a94fe6..f34945e7e2c 100644
--- a/target/rx/helper.c
+++ b/target/rx/helper.c
@@ -21,7 +21,6 @@
 #include "cpu.h"
 #include "exec/log.h"
 #include "exec/cpu_ldst.h"
-#include "sysemu/sysemu.h"
 #include "hw/irq.h"
 
 void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte)
@@ -41,6 +40,8 @@ void rx_cpu_unpack_psw(CPURXState *env, uint32_t psw, int rte)
     env->psw_c = FIELD_EX32(psw, PSW, C);
 }
 
+#ifndef CONFIG_USER_ONLY
+
 #define INT_FLAGS (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIR)
 void rx_cpu_do_interrupt(CPUState *cs)
 {
@@ -143,6 +144,8 @@ bool rx_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     return false;
 }
 
+#endif /* !CONFIG_USER_ONLY */
+
 hwaddr rx_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
     return addr;
diff --git a/target/rx/helper.h b/target/rx/helper.h
index f0b7ebbbf77..ebb47394744 100644
--- a/target/rx/helper.h
+++ b/target/rx/helper.h
@@ -2,7 +2,6 @@ DEF_HELPER_1(raise_illegal_instruction, noreturn, env)
 DEF_HELPER_1(raise_access_fault, noreturn, env)
 DEF_HELPER_1(raise_privilege_violation, noreturn, env)
 DEF_HELPER_1(wait, noreturn, env)
-DEF_HELPER_1(debug, noreturn, env)
 DEF_HELPER_2(rxint, noreturn, env, i32)
 DEF_HELPER_1(rxbrk, noreturn, env)
 DEF_HELPER_FLAGS_3(fadd, TCG_CALL_NO_WG, f32, env, f32, f32)
diff --git a/target/rx/op_helper.c b/target/rx/op_helper.c
index 4d315b44492..11f952d3409 100644
--- a/target/rx/op_helper.c
+++ b/target/rx/op_helper.c
@@ -451,14 +451,6 @@ void QEMU_NORETURN helper_wait(CPURXState *env)
     raise_exception(env, EXCP_HLT, 0);
 }
 
-void QEMU_NORETURN helper_debug(CPURXState *env)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(cs);
-}
-
 void QEMU_NORETURN helper_rxint(CPURXState *env, uint32_t vec)
 {
     raise_exception(env, 0x100 + vec, 0);
diff --git a/target/rx/translate.c b/target/rx/translate.c
index 9ea941c6302..5db8f79a82e 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -26,7 +26,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 typedef struct DisasContext {
@@ -143,28 +142,15 @@ void rx_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     }
 }
 
-static bool use_goto_tb(DisasContext *dc, target_ulong dest)
-{
-    if (unlikely(dc->base.singlestep_enabled)) {
-        return false;
-    } else {
-        return true;
-    }
-}
-
 static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
 {
-    if (use_goto_tb(dc, dest)) {
+    if (translator_use_goto_tb(&dc->base, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_i32(cpu_pc, dest);
         tcg_gen_exit_tb(dc->base.tb, n);
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
-        if (dc->base.singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
+        tcg_gen_lookup_and_goto_ptr();
     }
     dc->base.is_jmp = DISAS_NORETURN;
 }
@@ -2319,19 +2305,6 @@ static void rx_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     tcg_gen_insn_start(ctx->base.pc_next);
 }
 
-static bool rx_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                    const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    /* We have hit a breakpoint - make sure PC is up-to-date */
-    tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
-    gen_helper_debug(cpu_env);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    ctx->base.pc_next += 1;
-    return true;
-}
-
 static void rx_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
@@ -2354,11 +2327,7 @@ static void rx_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         gen_goto_tb(ctx, 0, dcbase->pc_next);
         break;
     case DISAS_JUMP:
-        if (ctx->base.singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
+        tcg_gen_lookup_and_goto_ptr();
         break;
     case DISAS_UPDATE:
         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
@@ -2383,7 +2352,6 @@ static const TranslatorOps rx_tr_ops = {
     .init_disas_context = rx_tr_init_disas_context,
     .tb_start           = rx_tr_tb_start,
     .insn_start         = rx_tr_insn_start,
-    .breakpoint_check   = rx_tr_breakpoint_check,
     .translate_insn     = rx_tr_translate_insn,
     .tb_stop            = rx_tr_tb_stop,
     .disas_log          = rx_tr_disas_log,
diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig
new file mode 100644
index 00000000000..72da48136c6
--- /dev/null
+++ b/target/s390x/Kconfig
@@ -0,0 +1,2 @@
+config S390X
+    bool
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index cc1330876be..08daf93ae1f 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -13,7 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "elf.h"
 #include "sysemu/dump.h"
 
diff --git a/target/s390x/cpu-dump.c b/target/s390x/cpu-dump.c
new file mode 100644
index 00000000000..0f5c062994f
--- /dev/null
+++ b/target/s390x/cpu-dump.c
@@ -0,0 +1,134 @@
+/*
+ * S/390 CPU dump to FILE
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2011 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "qemu/qemu-print.h"
+#include "sysemu/tcg.h"
+
+void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    int i;
+
+    qemu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64,
+                 s390_cpu_get_psw_mask(env), env->psw.addr);
+    if (!tcg_enabled()) {
+        qemu_fprintf(f, "\n");
+    } else if (env->cc_op > 3) {
+        qemu_fprintf(f, " cc %15s\n", cc_name(env->cc_op));
+    } else {
+        qemu_fprintf(f, " cc %02x\n", env->cc_op);
+    }
+
+    for (i = 0; i < 16; i++) {
+        qemu_fprintf(f, "R%02d=%016" PRIx64, i, env->regs[i]);
+        if ((i % 4) == 3) {
+            qemu_fprintf(f, "\n");
+        } else {
+            qemu_fprintf(f, " ");
+        }
+    }
+
+    if (flags & CPU_DUMP_FPU) {
+        if (s390_has_feat(S390_FEAT_VECTOR)) {
+            for (i = 0; i < 32; i++) {
+                qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
+                             i, env->vregs[i][0], env->vregs[i][1],
+                             i % 2 ? '\n' : ' ');
+            }
+        } else {
+            for (i = 0; i < 16; i++) {
+                qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
+                             i, *get_freg(env, i),
+                             (i % 4) == 3 ? '\n' : ' ');
+            }
+        }
+    }
+
+#ifndef CONFIG_USER_ONLY
+    for (i = 0; i < 16; i++) {
+        qemu_fprintf(f, "C%02d=%016" PRIx64, i, env->cregs[i]);
+        if ((i % 4) == 3) {
+            qemu_fprintf(f, "\n");
+        } else {
+            qemu_fprintf(f, " ");
+        }
+    }
+#endif
+
+#ifdef DEBUG_INLINE_BRANCHES
+    for (i = 0; i < CC_OP_MAX; i++) {
+        qemu_fprintf(f, "  %15s = %10ld\t%10ld\n", cc_name(i),
+                     inline_branch_miss[i], inline_branch_hit[i]);
+    }
+#endif
+
+    qemu_fprintf(f, "\n");
+}
+
+const char *cc_name(enum cc_op cc_op)
+{
+    static const char * const cc_names[] = {
+        [CC_OP_CONST0]    = "CC_OP_CONST0",
+        [CC_OP_CONST1]    = "CC_OP_CONST1",
+        [CC_OP_CONST2]    = "CC_OP_CONST2",
+        [CC_OP_CONST3]    = "CC_OP_CONST3",
+        [CC_OP_DYNAMIC]   = "CC_OP_DYNAMIC",
+        [CC_OP_STATIC]    = "CC_OP_STATIC",
+        [CC_OP_NZ]        = "CC_OP_NZ",
+        [CC_OP_ADDU]      = "CC_OP_ADDU",
+        [CC_OP_SUBU]      = "CC_OP_SUBU",
+        [CC_OP_LTGT_32]   = "CC_OP_LTGT_32",
+        [CC_OP_LTGT_64]   = "CC_OP_LTGT_64",
+        [CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
+        [CC_OP_LTUGTU_64] = "CC_OP_LTUGTU_64",
+        [CC_OP_LTGT0_32]  = "CC_OP_LTGT0_32",
+        [CC_OP_LTGT0_64]  = "CC_OP_LTGT0_64",
+        [CC_OP_ADD_64]    = "CC_OP_ADD_64",
+        [CC_OP_SUB_64]    = "CC_OP_SUB_64",
+        [CC_OP_ABS_64]    = "CC_OP_ABS_64",
+        [CC_OP_NABS_64]   = "CC_OP_NABS_64",
+        [CC_OP_ADD_32]    = "CC_OP_ADD_32",
+        [CC_OP_SUB_32]    = "CC_OP_SUB_32",
+        [CC_OP_ABS_32]    = "CC_OP_ABS_32",
+        [CC_OP_NABS_32]   = "CC_OP_NABS_32",
+        [CC_OP_COMP_32]   = "CC_OP_COMP_32",
+        [CC_OP_COMP_64]   = "CC_OP_COMP_64",
+        [CC_OP_TM_32]     = "CC_OP_TM_32",
+        [CC_OP_TM_64]     = "CC_OP_TM_64",
+        [CC_OP_NZ_F32]    = "CC_OP_NZ_F32",
+        [CC_OP_NZ_F64]    = "CC_OP_NZ_F64",
+        [CC_OP_NZ_F128]   = "CC_OP_NZ_F128",
+        [CC_OP_ICM]       = "CC_OP_ICM",
+        [CC_OP_SLA_32]    = "CC_OP_SLA_32",
+        [CC_OP_SLA_64]    = "CC_OP_SLA_64",
+        [CC_OP_FLOGR]     = "CC_OP_FLOGR",
+        [CC_OP_LCBB]      = "CC_OP_LCBB",
+        [CC_OP_VC]        = "CC_OP_VC",
+        [CC_OP_MULS_32]   = "CC_OP_MULS_32",
+        [CC_OP_MULS_64]   = "CC_OP_MULS_64",
+    };
+
+    return cc_names[cc_op];
+}
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
new file mode 100644
index 00000000000..5471e01ee82
--- /dev/null
+++ b/target/s390x/cpu-sysemu.c
@@ -0,0 +1,308 @@
+/*
+ * QEMU S/390 CPU - System Emulation-only code
+ *
+ * Copyright (c) 2009 Ulrich Hecht
+ * Copyright (c) 2011 Alexander Graf
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ * Copyright (c) 2012 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "qemu/timer.h"
+#include "trace.h"
+#include "qapi/qapi-visit-run-state.h"
+#include "sysemu/hw_accel.h"
+
+#include "hw/s390x/pv.h"
+#include "hw/boards.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/tcg.h"
+#include "hw/core/sysemu-cpu-ops.h"
+
+/* S390CPUClass::load_normal() */
+static void s390_cpu_load_normal(CPUState *s)
+{
+    S390CPU *cpu = S390_CPU(s);
+    uint64_t spsw;
+
+    if (!s390_is_pv()) {
+        spsw = ldq_phys(s->as, 0);
+        cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL;
+        /*
+         * Invert short psw indication, so SIE will report a specification
+         * exception if it was not set.
+         */
+        cpu->env.psw.mask ^= PSW_MASK_SHORTPSW;
+        cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR;
+    } else {
+        /*
+         * Firmware requires us to set the load state before we set
+         * the cpu to operating on protected guests.
+         */
+        s390_cpu_set_state(S390_CPU_STATE_LOAD, cpu);
+    }
+    s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
+}
+
+void s390_cpu_machine_reset_cb(void *opaque)
+{
+    S390CPU *cpu = opaque;
+
+    run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
+}
+
+static GuestPanicInformation *s390_cpu_get_crash_info(CPUState *cs)
+{
+    GuestPanicInformation *panic_info;
+    S390CPU *cpu = S390_CPU(cs);
+
+    cpu_synchronize_state(cs);
+    panic_info = g_malloc0(sizeof(GuestPanicInformation));
+
+    panic_info->type = GUEST_PANIC_INFORMATION_TYPE_S390;
+    panic_info->u.s390.core = cpu->env.core_id;
+    panic_info->u.s390.psw_mask = cpu->env.psw.mask;
+    panic_info->u.s390.psw_addr = cpu->env.psw.addr;
+    panic_info->u.s390.reason = cpu->env.crash_reason;
+
+    return panic_info;
+}
+
+static void s390_cpu_get_crash_info_qom(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    CPUState *cs = CPU(obj);
+    GuestPanicInformation *panic_info;
+
+    if (!cs->crash_occurred) {
+        error_setg(errp, "No crash occurred");
+        return;
+    }
+
+    panic_info = s390_cpu_get_crash_info(cs);
+
+    visit_type_GuestPanicInformation(v, "crash-information", &panic_info,
+                                     errp);
+    qapi_free_GuestPanicInformation(panic_info);
+}
+
+void s390_cpu_init_sysemu(Object *obj)
+{
+    CPUState *cs = CPU(obj);
+    S390CPU *cpu = S390_CPU(obj);
+
+    cs->start_powered_off = true;
+    object_property_add(obj, "crash-information", "GuestPanicInformation",
+                        s390_cpu_get_crash_info_qom, NULL, NULL, NULL);
+    cpu->env.tod_timer =
+        timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_tod_timer, cpu);
+    cpu->env.cpu_timer =
+        timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_cpu_timer, cpu);
+    s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu);
+}
+
+bool s390_cpu_realize_sysemu(DeviceState *dev, Error **errp)
+{
+    S390CPU *cpu = S390_CPU(dev);
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int max_cpus = ms->smp.max_cpus;
+
+    if (cpu->env.core_id >= max_cpus) {
+        error_setg(errp, "Unable to add CPU with core-id: %" PRIu32
+                   ", maximum core-id: %d", cpu->env.core_id,
+                   max_cpus - 1);
+        return false;
+    }
+
+    if (cpu_exists(cpu->env.core_id)) {
+        error_setg(errp, "Unable to add CPU with core-id: %" PRIu32
+                   ", it already exists", cpu->env.core_id);
+        return false;
+    }
+
+    /* sync cs->cpu_index and env->core_id. The latter is needed for TCG. */
+    CPU(cpu)->cpu_index = cpu->env.core_id;
+    return true;
+}
+
+void s390_cpu_finalize(Object *obj)
+{
+    S390CPU *cpu = S390_CPU(obj);
+
+    timer_free(cpu->env.tod_timer);
+    timer_free(cpu->env.cpu_timer);
+
+    qemu_unregister_reset(s390_cpu_machine_reset_cb, cpu);
+    g_free(cpu->irqstate);
+}
+
+static const struct SysemuCPUOps s390_sysemu_ops = {
+    .get_phys_page_debug = s390_cpu_get_phys_page_debug,
+    .get_crash_info = s390_cpu_get_crash_info,
+    .write_elf64_note = s390_cpu_write_elf64_note,
+    .legacy_vmsd = &vmstate_s390_cpu,
+};
+
+void s390_cpu_class_init_sysemu(CPUClass *cc)
+{
+    S390CPUClass *scc = S390_CPU_CLASS(cc);
+
+    scc->load_normal = s390_cpu_load_normal;
+    cc->sysemu_ops = &s390_sysemu_ops;
+}
+
+static bool disabled_wait(CPUState *cpu)
+{
+    return cpu->halted && !(S390_CPU(cpu)->env.psw.mask &
+                            (PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK));
+}
+
+static unsigned s390_count_running_cpus(void)
+{
+    CPUState *cpu;
+    int nr_running = 0;
+
+    CPU_FOREACH(cpu) {
+        uint8_t state = S390_CPU(cpu)->env.cpu_state;
+        if (state == S390_CPU_STATE_OPERATING ||
+            state == S390_CPU_STATE_LOAD) {
+            if (!disabled_wait(cpu)) {
+                nr_running++;
+            }
+        }
+    }
+
+    return nr_running;
+}
+
+unsigned int s390_cpu_halt(S390CPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    trace_cpu_halt(cs->cpu_index);
+
+    if (!cs->halted) {
+        cs->halted = 1;
+        cs->exception_index = EXCP_HLT;
+    }
+
+    return s390_count_running_cpus();
+}
+
+void s390_cpu_unhalt(S390CPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    trace_cpu_unhalt(cs->cpu_index);
+
+    if (cs->halted) {
+        cs->halted = 0;
+        cs->exception_index = -1;
+    }
+}
+
+unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu)
+ {
+    trace_cpu_set_state(CPU(cpu)->cpu_index, cpu_state);
+
+    switch (cpu_state) {
+    case S390_CPU_STATE_STOPPED:
+    case S390_CPU_STATE_CHECK_STOP:
+        /* halt the cpu for common infrastructure */
+        s390_cpu_halt(cpu);
+        break;
+    case S390_CPU_STATE_OPERATING:
+    case S390_CPU_STATE_LOAD:
+        /*
+         * Starting a CPU with a PSW WAIT bit set:
+         * KVM: handles this internally and triggers another WAIT exit.
+         * TCG: will actually try to continue to run. Don't unhalt, will
+         *      be done when the CPU actually has work (an interrupt).
+         */
+        if (!tcg_enabled() || !(cpu->env.psw.mask & PSW_MASK_WAIT)) {
+            s390_cpu_unhalt(cpu);
+        }
+        break;
+    default:
+        error_report("Requested CPU state is not a valid S390 CPU state: %u",
+                     cpu_state);
+        exit(1);
+    }
+    if (kvm_enabled() && cpu->env.cpu_state != cpu_state) {
+        kvm_s390_set_cpu_state(cpu, cpu_state);
+    }
+    cpu->env.cpu_state = cpu_state;
+
+    return s390_count_running_cpus();
+}
+
+int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit)
+{
+    if (kvm_enabled()) {
+        return kvm_s390_set_mem_limit(new_limit, hw_limit);
+    }
+    return 0;
+}
+
+void s390_set_max_pagesize(uint64_t pagesize, Error **errp)
+{
+    if (kvm_enabled()) {
+        kvm_s390_set_max_pagesize(pagesize, errp);
+    }
+}
+
+void s390_cmma_reset(void)
+{
+    if (kvm_enabled()) {
+        kvm_s390_cmma_reset();
+    }
+}
+
+int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
+                                int vq, bool assign)
+{
+    if (kvm_enabled()) {
+        return kvm_s390_assign_subch_ioeventfd(notifier, sch_id, vq, assign);
+    } else {
+        return 0;
+    }
+}
+
+void s390_crypto_reset(void)
+{
+    if (kvm_enabled()) {
+        kvm_s390_crypto_reset();
+    }
+}
+
+void s390_enable_css_support(S390CPU *cpu)
+{
+    if (kvm_enabled()) {
+        kvm_s390_enable_css_support(cpu);
+    }
+}
+
+void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg)
+{
+    if (kvm_enabled()) {
+        kvm_s390_set_diag318(cs, arg.host_ulong);
+    }
+}
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index d35eb39a1bb..ccdbaf84d54 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -23,32 +23,64 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "cpu.h"
-#include "internal.h"
-#include "kvm_s390x.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
 #include "sysemu/kvm.h"
 #include "sysemu/reset.h"
-#include "qemu/timer.h"
-#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "trace.h"
-#include "qapi/visitor.h"
 #include "qapi/qapi-types-machine.h"
-#include "qapi/qapi-visit-run-state.h"
 #include "sysemu/hw_accel.h"
 #include "hw/qdev-properties.h"
-#ifndef CONFIG_USER_ONLY
-#include "hw/s390x/pv.h"
-#include "hw/boards.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/tcg.h"
-#endif
 #include "fpu/softfloat-helpers.h"
 #include "disas/capstone.h"
+#include "sysemu/tcg.h"
 
 #define CR0_RESET       0xE0UL
 #define CR14_RESET      0xC2000000UL;
 
+void s390_cpu_set_psw(CPUS390XState *env, uint64_t mask, uint64_t addr)
+{
+#ifndef CONFIG_USER_ONLY
+    uint64_t old_mask = env->psw.mask;
+#endif
+
+    env->psw.addr = addr;
+    env->psw.mask = mask;
+
+    /* KVM will handle all WAITs and trigger a WAIT exit on disabled_wait */
+    if (!tcg_enabled()) {
+        return;
+    }
+    env->cc_op = (mask >> 44) & 3;
+
+#ifndef CONFIG_USER_ONLY
+    if ((old_mask ^ mask) & PSW_MASK_PER) {
+        s390_cpu_recompute_watchpoints(env_cpu(env));
+    }
+
+    if (mask & PSW_MASK_WAIT) {
+        s390_handle_wait(env_archcpu(env));
+    }
+#endif
+}
+
+uint64_t s390_cpu_get_psw_mask(CPUS390XState *env)
+{
+    uint64_t r = env->psw.mask;
+
+    if (tcg_enabled()) {
+        uint64_t cc = calc_cc(env, env->cc_op, env->cc_src,
+                              env->cc_dst, env->cc_vr);
+
+        assert(cc <= 3);
+        r &= ~PSW_MASK_CC;
+        r |= cc << 44;
+    }
+
+    return r;
+}
+
 static void s390_cpu_set_pc(CPUState *cs, vaddr value)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -73,33 +105,6 @@ static bool s390_cpu_has_work(CPUState *cs)
     return s390_cpu_has_int(cpu);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-/* S390CPUClass::load_normal() */
-static void s390_cpu_load_normal(CPUState *s)
-{
-    S390CPU *cpu = S390_CPU(s);
-    uint64_t spsw;
-
-    if (!s390_is_pv()) {
-        spsw = ldq_phys(s->as, 0);
-        cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL;
-        /*
-         * Invert short psw indication, so SIE will report a specification
-         * exception if it was not set.
-         */
-        cpu->env.psw.mask ^= PSW_MASK_SHORTPSW;
-        cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR;
-    } else {
-        /*
-         * Firmware requires us to set the load state before we set
-         * the cpu to operating on protected guests.
-         */
-        s390_cpu_set_state(S390_CPU_STATE_LOAD, cpu);
-    }
-    s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
-}
-#endif
-
 /* S390CPUClass::reset() */
 static void s390_cpu_reset(CPUState *s, cpu_reset_type type)
 {
@@ -170,15 +175,6 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type)
     }
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static void s390_cpu_machine_reset_cb(void *opaque)
-{
-    S390CPU *cpu = opaque;
-
-    run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
-}
-#endif
-
 static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->mach = bfd_mach_s390_64;
@@ -192,9 +188,6 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     S390CPUClass *scc = S390_CPU_GET_CLASS(dev);
-#if !defined(CONFIG_USER_ONLY)
-    S390CPU *cpu = S390_CPU(dev);
-#endif
     Error *err = NULL;
 
     /* the model has to be realized before qemu_init_vcpu() due to kvm */
@@ -204,23 +197,9 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 #if !defined(CONFIG_USER_ONLY)
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
-    if (cpu->env.core_id >= max_cpus) {
-        error_setg(&err, "Unable to add CPU with core-id: %" PRIu32
-                   ", maximum core-id: %d", cpu->env.core_id,
-                   max_cpus - 1);
-        goto out;
-    }
-
-    if (cpu_exists(cpu->env.core_id)) {
-        error_setg(&err, "Unable to add CPU with core-id: %" PRIu32
-                   ", it already exists", cpu->env.core_id);
+    if (!s390_cpu_realize_sysemu(dev, &err)) {
         goto out;
     }
-
-    /* sync cs->cpu_index and env->core_id. The latter is needed for TCG. */
-    cs->cpu_index = cpu->env.core_id;
 #endif
 
     cpu_exec_realizefn(cs, &err);
@@ -229,7 +208,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 
 #if !defined(CONFIG_USER_ONLY)
-    qemu_register_reset(s390_cpu_machine_reset_cb, cpu);
+    qemu_register_reset(s390_cpu_machine_reset_cb, S390_CPU(dev));
 #endif
     s390_cpu_gdb_init(cs);
     qemu_init_vcpu(cs);
@@ -251,44 +230,6 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
     error_propagate(errp, err);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static GuestPanicInformation *s390_cpu_get_crash_info(CPUState *cs)
-{
-    GuestPanicInformation *panic_info;
-    S390CPU *cpu = S390_CPU(cs);
-
-    cpu_synchronize_state(cs);
-    panic_info = g_malloc0(sizeof(GuestPanicInformation));
-
-    panic_info->type = GUEST_PANIC_INFORMATION_TYPE_S390;
-    panic_info->u.s390.core = cpu->env.core_id;
-    panic_info->u.s390.psw_mask = cpu->env.psw.mask;
-    panic_info->u.s390.psw_addr = cpu->env.psw.addr;
-    panic_info->u.s390.reason = cpu->env.crash_reason;
-
-    return panic_info;
-}
-
-static void s390_cpu_get_crash_info_qom(Object *obj, Visitor *v,
-                                        const char *name, void *opaque,
-                                        Error **errp)
-{
-    CPUState *cs = CPU(obj);
-    GuestPanicInformation *panic_info;
-
-    if (!cs->crash_occurred) {
-        error_setg(errp, "No crash occurred");
-        return;
-    }
-
-    panic_info = s390_cpu_get_crash_info(cs);
-
-    visit_type_GuestPanicInformation(v, "crash-information", &panic_info,
-                                     errp);
-    qapi_free_GuestPanicInformation(panic_info);
-}
-#endif
-
 static void s390_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -296,169 +237,12 @@ static void s390_cpu_initfn(Object *obj)
 
     cpu_set_cpustate_pointers(cpu);
     cs->exception_index = EXCP_HLT;
-#if !defined(CONFIG_USER_ONLY)
-    cs->start_powered_off = true;
-    object_property_add(obj, "crash-information", "GuestPanicInformation",
-                        s390_cpu_get_crash_info_qom, NULL, NULL, NULL);
-    cpu->env.tod_timer =
-        timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_tod_timer, cpu);
-    cpu->env.cpu_timer =
-        timer_new_ns(QEMU_CLOCK_VIRTUAL, s390x_cpu_timer, cpu);
-    s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu);
-#endif
-}
 
-static void s390_cpu_finalize(Object *obj)
-{
 #if !defined(CONFIG_USER_ONLY)
-    S390CPU *cpu = S390_CPU(obj);
-
-    timer_free(cpu->env.tod_timer);
-    timer_free(cpu->env.cpu_timer);
-
-    qemu_unregister_reset(s390_cpu_machine_reset_cb, cpu);
-    g_free(cpu->irqstate);
+    s390_cpu_init_sysemu(obj);
 #endif
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static bool disabled_wait(CPUState *cpu)
-{
-    return cpu->halted && !(S390_CPU(cpu)->env.psw.mask &
-                            (PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK));
-}
-
-static unsigned s390_count_running_cpus(void)
-{
-    CPUState *cpu;
-    int nr_running = 0;
-
-    CPU_FOREACH(cpu) {
-        uint8_t state = S390_CPU(cpu)->env.cpu_state;
-        if (state == S390_CPU_STATE_OPERATING ||
-            state == S390_CPU_STATE_LOAD) {
-            if (!disabled_wait(cpu)) {
-                nr_running++;
-            }
-        }
-    }
-
-    return nr_running;
-}
-
-unsigned int s390_cpu_halt(S390CPU *cpu)
-{
-    CPUState *cs = CPU(cpu);
-    trace_cpu_halt(cs->cpu_index);
-
-    if (!cs->halted) {
-        cs->halted = 1;
-        cs->exception_index = EXCP_HLT;
-    }
-
-    return s390_count_running_cpus();
-}
-
-void s390_cpu_unhalt(S390CPU *cpu)
-{
-    CPUState *cs = CPU(cpu);
-    trace_cpu_unhalt(cs->cpu_index);
-
-    if (cs->halted) {
-        cs->halted = 0;
-        cs->exception_index = -1;
-    }
-}
-
-unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu)
- {
-    trace_cpu_set_state(CPU(cpu)->cpu_index, cpu_state);
-
-    switch (cpu_state) {
-    case S390_CPU_STATE_STOPPED:
-    case S390_CPU_STATE_CHECK_STOP:
-        /* halt the cpu for common infrastructure */
-        s390_cpu_halt(cpu);
-        break;
-    case S390_CPU_STATE_OPERATING:
-    case S390_CPU_STATE_LOAD:
-        /*
-         * Starting a CPU with a PSW WAIT bit set:
-         * KVM: handles this internally and triggers another WAIT exit.
-         * TCG: will actually try to continue to run. Don't unhalt, will
-         *      be done when the CPU actually has work (an interrupt).
-         */
-        if (!tcg_enabled() || !(cpu->env.psw.mask & PSW_MASK_WAIT)) {
-            s390_cpu_unhalt(cpu);
-        }
-        break;
-    default:
-        error_report("Requested CPU state is not a valid S390 CPU state: %u",
-                     cpu_state);
-        exit(1);
-    }
-    if (kvm_enabled() && cpu->env.cpu_state != cpu_state) {
-        kvm_s390_set_cpu_state(cpu, cpu_state);
-    }
-    cpu->env.cpu_state = cpu_state;
-
-    return s390_count_running_cpus();
-}
-
-int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit)
-{
-    if (kvm_enabled()) {
-        return kvm_s390_set_mem_limit(new_limit, hw_limit);
-    }
-    return 0;
-}
-
-void s390_set_max_pagesize(uint64_t pagesize, Error **errp)
-{
-    if (kvm_enabled()) {
-        kvm_s390_set_max_pagesize(pagesize, errp);
-    }
-}
-
-void s390_cmma_reset(void)
-{
-    if (kvm_enabled()) {
-        kvm_s390_cmma_reset();
-    }
-}
-
-int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
-                                int vq, bool assign)
-{
-    if (kvm_enabled()) {
-        return kvm_s390_assign_subch_ioeventfd(notifier, sch_id, vq, assign);
-    } else {
-        return 0;
-    }
-}
-
-void s390_crypto_reset(void)
-{
-    if (kvm_enabled()) {
-        kvm_s390_crypto_reset();
-    }
-}
-
-void s390_enable_css_support(S390CPU *cpu)
-{
-    if (kvm_enabled()) {
-        kvm_s390_enable_css_support(cpu);
-    }
-}
-
-void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg)
-{
-    if (kvm_enabled()) {
-        kvm_s390_set_diag318(cs, arg.host_ulong);
-    }
-}
-#endif
-
 static gchar *s390_gdb_arch_name(CPUState *cs)
 {
     return g_strdup("s390:64-bit");
@@ -480,11 +264,14 @@ static void s390_cpu_reset_full(DeviceState *dev)
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps s390_tcg_ops = {
+static const struct TCGCPUOps s390_tcg_ops = {
     .initialize = s390x_translate_init,
-    .tlb_fill = s390_cpu_tlb_fill,
 
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+    .record_sigsegv = s390_cpu_record_sigsegv,
+    .record_sigbus = s390_cpu_record_sigbus,
+#else
+    .tlb_fill = s390_cpu_tlb_fill,
     .cpu_exec_interrupt = s390_cpu_exec_interrupt,
     .do_interrupt = s390_cpu_do_interrupt,
     .debug_excp_handler = s390x_cpu_debug_excp_handler,
@@ -505,9 +292,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
     dc->user_creatable = true;
 
     device_class_set_parent_reset(dc, s390_cpu_reset_full, &scc->parent_reset);
-#if !defined(CONFIG_USER_ONLY)
-    scc->load_normal = s390_cpu_load_normal;
-#endif
+
     scc->reset = s390_cpu_reset;
     cc->class_by_name = s390_cpu_class_by_name,
     cc->has_work = s390_cpu_has_work;
@@ -516,10 +301,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = s390_cpu_gdb_read_register;
     cc->gdb_write_register = s390_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = s390_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_s390_cpu;
-    cc->get_crash_info = s390_cpu_get_crash_info;
-    cc->write_elf64_note = s390_cpu_write_elf64_note;
+    s390_cpu_class_init_sysemu(cc);
 #endif
     cc->disas_set_info = s390_cpu_disas_set_info;
     cc->gdb_num_core_regs = S390_NUM_CORE_REGS;
@@ -539,7 +321,11 @@ static const TypeInfo s390_cpu_type_info = {
     .instance_size = sizeof(S390CPU),
     .instance_align = __alignof__(S390CPU),
     .instance_init = s390_cpu_initfn,
+
+#ifndef CONFIG_USER_ONLY
     .instance_finalize = s390_cpu_finalize,
+#endif /* !CONFIG_USER_ONLY */
+
     .abstract = true,
     .class_size = sizeof(S390CPUClass),
     .class_init = s390_cpu_class_init,
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 2464d4076c0..ca3845d0235 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -674,11 +674,6 @@ QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 #define SIGP_STAT_INVALID_ORDER     0x00000002UL
 #define SIGP_STAT_RECEIVER_CHECK    0x00000001UL
 
-/* SIGP SET ARCHITECTURE modes */
-#define SIGP_MODE_ESA_S390 0
-#define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1
-#define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2
-
 /* SIGP order code mask corresponding to bit positions 56-63 */
 #define SIGP_ORDER_MASK 0x000000ff
 
@@ -809,13 +804,6 @@ void s390_set_qemu_cpu_model(uint16_t type, uint8_t gen, uint8_t ec_ga,
 #define S390_CPU_TYPE_NAME(name) (name S390_CPU_TYPE_SUFFIX)
 #define CPU_RESOLVING_TYPE TYPE_S390_CPU
 
-/* you can call this signal handler from your SIGBUS and SIGSEGV
-   signal handlers to inform the virtual CPU of exceptions. non zero
-   is returned if the signal was handled by the virtual CPU.  */
-int cpu_s390x_signal_handler(int host_signum, void *pinfo, void *puc);
-#define cpu_signal_handler cpu_s390x_signal_handler
-
-
 /* interrupt.c */
 #define RA_IGNORED                  0
 void s390_program_interrupt(CPUS390XState *env, uint32_t code, uintptr_t ra);
@@ -845,6 +833,9 @@ int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf,
 int s390_cpu_restart(S390CPU *cpu);
 void s390_init_sigp(void);
 
+/* helper.c */
+void s390_cpu_set_psw(CPUS390XState *env, uint64_t mask, uint64_t addr);
+uint64_t s390_cpu_get_psw_mask(CPUS390XState *env);
 
 /* outside of target/s390x/ */
 S390CPU *s390_cpu_addr2state(uint16_t cpu_addr);
diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc
index 7db3449e043..e86662bb3b8 100644
--- a/target/s390x/cpu_features_def.h.inc
+++ b/target/s390x/cpu_features_def.h.inc
@@ -109,6 +109,11 @@ DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-
 DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)")
 DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility")
 DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility")
+DEF_FEAT(NNPA, "nnpa", STFL, 165, "NNPA facility")
+DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH2, "vxpdeh2", STFL, 192, "Vector-Packed-Decimal-Enhancement facility 2")
+DEF_FEAT(BEAR_ENH, "beareh", STFL, 193, "BEAR-enhancement facility")
+DEF_FEAT(RDP, "rdp", STFL, 194, "Reset-DAT-protection facility")
+DEF_FEAT(PAI, "pai", STFL, 196, "Processor-Activity-Instrumentation facility")
 
 /* Features exposed via SCLP SCCB Byte 80 - 98  (bit numbers relative to byte-80) */
 DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility")
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 050dcf2d42d..11e06cc51fa 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -12,24 +12,17 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
-#include "kvm_s390x.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
-#include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qemu/qemu-print.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qobject-input-visitor.h"
-#include "qapi/qmp/qdict.h"
 #ifndef CONFIG_USER_ONLY
-#include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
-#include "hw/pci/pci.h"
 #endif
-#include "qapi/qapi-commands-machine-target.h"
 #include "hw/s390x/pv.h"
 
 #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
@@ -88,10 +81,12 @@ static S390CPUDef s390_cpu_defs[] = {
     CPUDEF_INIT(0x3907, 14, 1, 47, 0x08000000U, "z14ZR1", "IBM z14 Model ZR1 GA1"),
     CPUDEF_INIT(0x8561, 15, 1, 47, 0x08000000U, "gen15a", "IBM z15 T01 GA1"),
     CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM z15 T02 GA1"),
+    CPUDEF_INIT(0x3931, 16, 1, 47, 0x08000000U, "gen16a", "IBM 3931 GA1"),
+    CPUDEF_INIT(0x3932, 16, 1, 47, 0x08000000U, "gen16b", "IBM 3932 GA1"),
 };
 
-#define QEMU_MAX_CPU_TYPE 0x2964
-#define QEMU_MAX_CPU_GEN 13
+#define QEMU_MAX_CPU_TYPE 0x3906
+#define QEMU_MAX_CPU_GEN 14
 #define QEMU_MAX_CPU_EC_GA 2
 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX };
 static S390FeatBitmap qemu_max_cpu_feat;
@@ -403,392 +398,17 @@ void s390_cpu_list(void)
     for (feat = 0; feat < S390_FEAT_MAX; feat++) {
         const S390FeatDef *def = s390_feat_def(feat);
 
-        qemu_printf("%-20s %-50s\n", def->name, def->desc);
+        qemu_printf("%-20s %s\n", def->name, def->desc);
     }
 
     qemu_printf("\nRecognized feature groups:\n");
     for (group = 0; group < S390_FEAT_GROUP_MAX; group++) {
         const S390FeatGroupDef *def = s390_feat_group_def(group);
 
-        qemu_printf("%-20s %-50s\n", def->name, def->desc);
+        qemu_printf("%-20s %s\n", def->name, def->desc);
     }
 }
 
-static S390CPUModel *get_max_cpu_model(Error **errp);
-
-#ifndef CONFIG_USER_ONLY
-static void list_add_feat(const char *name, void *opaque);
-
-static void check_unavailable_features(const S390CPUModel *max_model,
-                                       const S390CPUModel *model,
-                                       strList **unavailable)
-{
-    S390FeatBitmap missing;
-
-    /* check general model compatibility */
-    if (max_model->def->gen < model->def->gen ||
-        (max_model->def->gen == model->def->gen &&
-         max_model->def->ec_ga < model->def->ec_ga)) {
-        list_add_feat("type", unavailable);
-    }
-
-    /* detect missing features if any to properly report them */
-    bitmap_andnot(missing, model->features, max_model->features,
-                  S390_FEAT_MAX);
-    if (!bitmap_empty(missing, S390_FEAT_MAX)) {
-        s390_feat_bitmap_to_ascii(missing, unavailable, list_add_feat);
-    }
-}
-
-struct CpuDefinitionInfoListData {
-    CpuDefinitionInfoList *list;
-    S390CPUModel *model;
-};
-
-static void create_cpu_model_list(ObjectClass *klass, void *opaque)
-{
-    struct CpuDefinitionInfoListData *cpu_list_data = opaque;
-    CpuDefinitionInfoList **cpu_list = &cpu_list_data->list;
-    CpuDefinitionInfo *info;
-    char *name = g_strdup(object_class_get_name(klass));
-    S390CPUClass *scc = S390_CPU_CLASS(klass);
-
-    /* strip off the -s390x-cpu */
-    g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0;
-    info = g_new0(CpuDefinitionInfo, 1);
-    info->name = name;
-    info->has_migration_safe = true;
-    info->migration_safe = scc->is_migration_safe;
-    info->q_static = scc->is_static;
-    info->q_typename = g_strdup(object_class_get_name(klass));
-    /* check for unavailable features */
-    if (cpu_list_data->model) {
-        Object *obj;
-        S390CPU *sc;
-        obj = object_new_with_class(klass);
-        sc = S390_CPU(obj);
-        if (sc->model) {
-            info->has_unavailable_features = true;
-            check_unavailable_features(cpu_list_data->model, sc->model,
-                                       &info->unavailable_features);
-        }
-        object_unref(obj);
-    }
-
-    QAPI_LIST_PREPEND(*cpu_list, info);
-}
-
-CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
-{
-    struct CpuDefinitionInfoListData list_data = {
-        .list = NULL,
-    };
-
-    list_data.model = get_max_cpu_model(NULL);
-
-    object_class_foreach(create_cpu_model_list, TYPE_S390_CPU, false,
-                         &list_data);
-
-    return list_data.list;
-}
-
-static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info,
-                                Error **errp)
-{
-    Error *err = NULL;
-    const QDict *qdict = NULL;
-    const QDictEntry *e;
-    Visitor *visitor;
-    ObjectClass *oc;
-    S390CPU *cpu;
-    Object *obj;
-
-    if (info->props) {
-        qdict = qobject_to(QDict, info->props);
-        if (!qdict) {
-            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
-            return;
-        }
-    }
-
-    oc = cpu_class_by_name(TYPE_S390_CPU, info->name);
-    if (!oc) {
-        error_setg(errp, "The CPU definition \'%s\' is unknown.", info->name);
-        return;
-    }
-    if (S390_CPU_CLASS(oc)->kvm_required && !kvm_enabled()) {
-        error_setg(errp, "The CPU definition '%s' requires KVM", info->name);
-        return;
-    }
-    obj = object_new_with_class(oc);
-    cpu = S390_CPU(obj);
-
-    if (!cpu->model) {
-        error_setg(errp, "Details about the host CPU model are not available, "
-                         "it cannot be used.");
-        object_unref(obj);
-        return;
-    }
-
-    if (qdict) {
-        visitor = qobject_input_visitor_new(info->props);
-        if (!visit_start_struct(visitor, NULL, NULL, 0, errp)) {
-            visit_free(visitor);
-            object_unref(obj);
-            return;
-        }
-        for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
-            if (!object_property_set(obj, e->key, visitor, &err)) {
-                break;
-            }
-        }
-        if (!err) {
-            visit_check_struct(visitor, &err);
-        }
-        visit_end_struct(visitor, NULL);
-        visit_free(visitor);
-        if (err) {
-            error_propagate(errp, err);
-            object_unref(obj);
-            return;
-        }
-    }
-
-    /* copy the model and throw the cpu away */
-    memcpy(model, cpu->model, sizeof(*model));
-    object_unref(obj);
-}
-
-static void qdict_add_disabled_feat(const char *name, void *opaque)
-{
-    qdict_put_bool(opaque, name, false);
-}
-
-static void qdict_add_enabled_feat(const char *name, void *opaque)
-{
-    qdict_put_bool(opaque, name, true);
-}
-
-/* convert S390CPUDef into a static CpuModelInfo */
-static void cpu_info_from_model(CpuModelInfo *info, const S390CPUModel *model,
-                                bool delta_changes)
-{
-    QDict *qdict = qdict_new();
-    S390FeatBitmap bitmap;
-
-    /* always fallback to the static base model */
-    info->name = g_strdup_printf("%s-base", model->def->name);
-
-    if (delta_changes) {
-        /* features deleted from the base feature set */
-        bitmap_andnot(bitmap, model->def->base_feat, model->features,
-                      S390_FEAT_MAX);
-        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
-            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
-        }
-
-        /* features added to the base feature set */
-        bitmap_andnot(bitmap, model->features, model->def->base_feat,
-                      S390_FEAT_MAX);
-        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
-            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_enabled_feat);
-        }
-    } else {
-        /* expand all features */
-        s390_feat_bitmap_to_ascii(model->features, qdict,
-                                  qdict_add_enabled_feat);
-        bitmap_complement(bitmap, model->features, S390_FEAT_MAX);
-        s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
-    }
-
-    if (!qdict_size(qdict)) {
-        qobject_unref(qdict);
-    } else {
-        info->props = QOBJECT(qdict);
-        info->has_props = true;
-    }
-}
-
-CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
-                                                      CpuModelInfo *model,
-                                                      Error **errp)
-{
-    Error *err = NULL;
-    CpuModelExpansionInfo *expansion_info = NULL;
-    S390CPUModel s390_model;
-    bool delta_changes = false;
-
-    /* convert it to our internal representation */
-    cpu_model_from_info(&s390_model, model, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
-
-    if (type == CPU_MODEL_EXPANSION_TYPE_STATIC) {
-        delta_changes = true;
-    } else if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
-        error_setg(errp, "The requested expansion type is not supported.");
-        return NULL;
-    }
-
-    /* convert it back to a static representation */
-    expansion_info = g_new0(CpuModelExpansionInfo, 1);
-    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
-    cpu_info_from_model(expansion_info->model, &s390_model, delta_changes);
-    return expansion_info;
-}
-
-static void list_add_feat(const char *name, void *opaque)
-{
-    strList **last = (strList **) opaque;
-
-    QAPI_LIST_PREPEND(*last, g_strdup(name));
-}
-
-CpuModelCompareInfo *qmp_query_cpu_model_comparison(CpuModelInfo *infoa,
-                                                     CpuModelInfo *infob,
-                                                     Error **errp)
-{
-    Error *err = NULL;
-    CpuModelCompareResult feat_result, gen_result;
-    CpuModelCompareInfo *compare_info;
-    S390FeatBitmap missing, added;
-    S390CPUModel modela, modelb;
-
-    /* convert both models to our internal representation */
-    cpu_model_from_info(&modela, infoa, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
-    cpu_model_from_info(&modelb, infob, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
-    compare_info = g_new0(CpuModelCompareInfo, 1);
-
-    /* check the cpu generation and ga level */
-    if (modela.def->gen == modelb.def->gen) {
-        if (modela.def->ec_ga == modelb.def->ec_ga) {
-            /* ec and corresponding bc are identical */
-            gen_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
-        } else if (modela.def->ec_ga < modelb.def->ec_ga) {
-            gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
-        } else {
-            gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
-        }
-    } else if (modela.def->gen < modelb.def->gen) {
-        gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
-    } else {
-        gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
-    }
-    if (gen_result != CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
-        /* both models cannot be made identical */
-        list_add_feat("type", &compare_info->responsible_properties);
-    }
-
-    /* check the feature set */
-    if (bitmap_equal(modela.features, modelb.features, S390_FEAT_MAX)) {
-        feat_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
-    } else {
-        bitmap_andnot(missing, modela.features, modelb.features, S390_FEAT_MAX);
-        s390_feat_bitmap_to_ascii(missing,
-                                  &compare_info->responsible_properties,
-                                  list_add_feat);
-        bitmap_andnot(added, modelb.features, modela.features, S390_FEAT_MAX);
-        s390_feat_bitmap_to_ascii(added, &compare_info->responsible_properties,
-                                  list_add_feat);
-        if (bitmap_empty(missing, S390_FEAT_MAX)) {
-            feat_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
-        } else if (bitmap_empty(added, S390_FEAT_MAX)) {
-            feat_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
-        } else {
-            feat_result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
-        }
-    }
-
-    /* combine the results */
-    if (gen_result == feat_result) {
-        compare_info->result = gen_result;
-    } else if (feat_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
-        compare_info->result = gen_result;
-    } else if (gen_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
-        compare_info->result = feat_result;
-    } else {
-        compare_info->result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
-    }
-    return compare_info;
-}
-
-CpuModelBaselineInfo *qmp_query_cpu_model_baseline(CpuModelInfo *infoa,
-                                                    CpuModelInfo *infob,
-                                                    Error **errp)
-{
-    Error *err = NULL;
-    CpuModelBaselineInfo *baseline_info;
-    S390CPUModel modela, modelb, model;
-    uint16_t cpu_type;
-    uint8_t max_gen_ga;
-    uint8_t max_gen;
-
-    /* convert both models to our internal representation */
-    cpu_model_from_info(&modela, infoa, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
-
-    cpu_model_from_info(&modelb, infob, &err);
-    if (err) {
-        error_propagate(errp, err);
-        return NULL;
-    }
-
-    /* features both models support */
-    bitmap_and(model.features, modela.features, modelb.features, S390_FEAT_MAX);
-
-    /* detect the maximum model not regarding features */
-    if (modela.def->gen == modelb.def->gen) {
-        if (modela.def->type == modelb.def->type) {
-            cpu_type = modela.def->type;
-        } else {
-            cpu_type = 0;
-        }
-        max_gen = modela.def->gen;
-        max_gen_ga = MIN(modela.def->ec_ga, modelb.def->ec_ga);
-    } else if (modela.def->gen > modelb.def->gen) {
-        cpu_type = modelb.def->type;
-        max_gen = modelb.def->gen;
-        max_gen_ga = modelb.def->ec_ga;
-    } else {
-        cpu_type = modela.def->type;
-        max_gen = modela.def->gen;
-        max_gen_ga = modela.def->ec_ga;
-    }
-
-    model.def = s390_find_cpu_def(cpu_type, max_gen, max_gen_ga,
-                                  model.features);
-
-    /* models without early base features (esan3) are bad */
-    if (!model.def) {
-        error_setg(errp, "No compatible CPU model could be created as"
-                   " important base features are disabled");
-        return NULL;
-    }
-
-    /* strip off features not part of the max model */
-    bitmap_and(model.features, model.features, model.def->full_feat,
-               S390_FEAT_MAX);
-
-    baseline_info = g_new0(CpuModelBaselineInfo, 1);
-    baseline_info->model = g_malloc0(sizeof(*baseline_info->model));
-    cpu_info_from_model(baseline_info->model, &model, true);
-    return baseline_info;
-}
-#endif
-
 static void check_consistency(const S390CPUModel *model)
 {
     static int dep[][2] = {
@@ -812,6 +432,8 @@ static void check_consistency(const S390CPUModel *model)
         { S390_FEAT_MSA_EXT_9, S390_FEAT_MSA_EXT_4 },
         { S390_FEAT_MULTIPLE_EPOCH, S390_FEAT_TOD_CLOCK_STEERING },
         { S390_FEAT_VECTOR_PACKED_DECIMAL, S390_FEAT_VECTOR },
+        { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH, S390_FEAT_VECTOR_PACKED_DECIMAL },
+        { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH },
         { S390_FEAT_VECTOR_ENH, S390_FEAT_VECTOR },
         { S390_FEAT_INSTRUCTION_EXEC_PROT, S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2 },
         { S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, S390_FEAT_ESOP },
@@ -843,6 +465,8 @@ static void check_consistency(const S390CPUModel *model)
         { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH },
         { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP },
         { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB },
+        { S390_FEAT_NNPA, S390_FEAT_VECTOR },
+        { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING },
     };
     int i;
 
@@ -900,7 +524,7 @@ static void check_compatibility(const S390CPUModel *max_model,
                   "available in the configuration: ");
 }
 
-static S390CPUModel *get_max_cpu_model(Error **errp)
+S390CPUModel *get_max_cpu_model(Error **errp)
 {
     Error *err = NULL;
     static S390CPUModel max_model;
@@ -925,39 +549,6 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
     return &max_model;
 }
 
-static inline void apply_cpu_model(const S390CPUModel *model, Error **errp)
-{
-#ifndef CONFIG_USER_ONLY
-    Error *err = NULL;
-    static S390CPUModel applied_model;
-    static bool applied;
-
-    /*
-     * We have the same model for all VCPUs. KVM can only be configured before
-     * any VCPUs are defined in KVM.
-     */
-    if (applied) {
-        if (model && memcmp(&applied_model, model, sizeof(S390CPUModel))) {
-            error_setg(errp, "Mixed CPU models are not supported on s390x.");
-        }
-        return;
-    }
-
-    if (kvm_enabled()) {
-        kvm_s390_apply_cpu_model(model, &err);
-        if (err) {
-            error_propagate(errp, err);
-            return;
-        }
-    }
-
-    applied = true;
-    if (model) {
-        applied_model = *model;
-    }
-#endif
-}
-
 void s390_realize_cpu_model(CPUState *cs, Error **errp)
 {
     Error *err = NULL;
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
new file mode 100644
index 00000000000..05c3ccaaff2
--- /dev/null
+++ b/target/s390x/cpu_models_sysemu.c
@@ -0,0 +1,426 @@
+/*
+ * CPU models for s390x - System Emulation-only
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
+#include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-machine-target.h"
+
+static void list_add_feat(const char *name, void *opaque);
+
+static void check_unavailable_features(const S390CPUModel *max_model,
+                                       const S390CPUModel *model,
+                                       strList **unavailable)
+{
+    S390FeatBitmap missing;
+
+    /* check general model compatibility */
+    if (max_model->def->gen < model->def->gen ||
+        (max_model->def->gen == model->def->gen &&
+         max_model->def->ec_ga < model->def->ec_ga)) {
+        list_add_feat("type", unavailable);
+    }
+
+    /* detect missing features if any to properly report them */
+    bitmap_andnot(missing, model->features, max_model->features,
+                  S390_FEAT_MAX);
+    if (!bitmap_empty(missing, S390_FEAT_MAX)) {
+        s390_feat_bitmap_to_ascii(missing, unavailable, list_add_feat);
+    }
+}
+
+struct CpuDefinitionInfoListData {
+    CpuDefinitionInfoList *list;
+    S390CPUModel *model;
+};
+
+static void create_cpu_model_list(ObjectClass *klass, void *opaque)
+{
+    struct CpuDefinitionInfoListData *cpu_list_data = opaque;
+    CpuDefinitionInfoList **cpu_list = &cpu_list_data->list;
+    CpuDefinitionInfo *info;
+    char *name = g_strdup(object_class_get_name(klass));
+    S390CPUClass *scc = S390_CPU_CLASS(klass);
+
+    /* strip off the -s390x-cpu */
+    g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0;
+    info = g_new0(CpuDefinitionInfo, 1);
+    info->name = name;
+    info->has_migration_safe = true;
+    info->migration_safe = scc->is_migration_safe;
+    info->q_static = scc->is_static;
+    info->q_typename = g_strdup(object_class_get_name(klass));
+    /* check for unavailable features */
+    if (cpu_list_data->model) {
+        Object *obj;
+        S390CPU *sc;
+        obj = object_new_with_class(klass);
+        sc = S390_CPU(obj);
+        if (sc->model) {
+            info->has_unavailable_features = true;
+            check_unavailable_features(cpu_list_data->model, sc->model,
+                                       &info->unavailable_features);
+        }
+        object_unref(obj);
+    }
+
+    QAPI_LIST_PREPEND(*cpu_list, info);
+}
+
+CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
+{
+    struct CpuDefinitionInfoListData list_data = {
+        .list = NULL,
+    };
+
+    list_data.model = get_max_cpu_model(NULL);
+
+    object_class_foreach(create_cpu_model_list, TYPE_S390_CPU, false,
+                         &list_data);
+
+    return list_data.list;
+}
+
+static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info,
+                                Error **errp)
+{
+    Error *err = NULL;
+    const QDict *qdict = NULL;
+    const QDictEntry *e;
+    Visitor *visitor;
+    ObjectClass *oc;
+    S390CPU *cpu;
+    Object *obj;
+
+    if (info->props) {
+        qdict = qobject_to(QDict, info->props);
+        if (!qdict) {
+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
+            return;
+        }
+    }
+
+    oc = cpu_class_by_name(TYPE_S390_CPU, info->name);
+    if (!oc) {
+        error_setg(errp, "The CPU definition \'%s\' is unknown.", info->name);
+        return;
+    }
+    if (S390_CPU_CLASS(oc)->kvm_required && !kvm_enabled()) {
+        error_setg(errp, "The CPU definition '%s' requires KVM", info->name);
+        return;
+    }
+    obj = object_new_with_class(oc);
+    cpu = S390_CPU(obj);
+
+    if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "it cannot be used.");
+        object_unref(obj);
+        return;
+    }
+
+    if (qdict) {
+        visitor = qobject_input_visitor_new(info->props);
+        if (!visit_start_struct(visitor, NULL, NULL, 0, errp)) {
+            visit_free(visitor);
+            object_unref(obj);
+            return;
+        }
+        for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+            if (!object_property_set(obj, e->key, visitor, &err)) {
+                break;
+            }
+        }
+        if (!err) {
+            visit_check_struct(visitor, &err);
+        }
+        visit_end_struct(visitor, NULL);
+        visit_free(visitor);
+        if (err) {
+            error_propagate(errp, err);
+            object_unref(obj);
+            return;
+        }
+    }
+
+    /* copy the model and throw the cpu away */
+    memcpy(model, cpu->model, sizeof(*model));
+    object_unref(obj);
+}
+
+static void qdict_add_disabled_feat(const char *name, void *opaque)
+{
+    qdict_put_bool(opaque, name, false);
+}
+
+static void qdict_add_enabled_feat(const char *name, void *opaque)
+{
+    qdict_put_bool(opaque, name, true);
+}
+
+/* convert S390CPUDef into a static CpuModelInfo */
+static void cpu_info_from_model(CpuModelInfo *info, const S390CPUModel *model,
+                                bool delta_changes)
+{
+    QDict *qdict = qdict_new();
+    S390FeatBitmap bitmap;
+
+    /* always fallback to the static base model */
+    info->name = g_strdup_printf("%s-base", model->def->name);
+
+    if (delta_changes) {
+        /* features deleted from the base feature set */
+        bitmap_andnot(bitmap, model->def->base_feat, model->features,
+                      S390_FEAT_MAX);
+        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
+            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
+        }
+
+        /* features added to the base feature set */
+        bitmap_andnot(bitmap, model->features, model->def->base_feat,
+                      S390_FEAT_MAX);
+        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
+            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_enabled_feat);
+        }
+    } else {
+        /* expand all features */
+        s390_feat_bitmap_to_ascii(model->features, qdict,
+                                  qdict_add_enabled_feat);
+        bitmap_complement(bitmap, model->features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
+    }
+
+    if (!qdict_size(qdict)) {
+        qobject_unref(qdict);
+    } else {
+        info->props = QOBJECT(qdict);
+        info->has_props = true;
+    }
+}
+
+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *model,
+                                                      Error **errp)
+{
+    Error *err = NULL;
+    CpuModelExpansionInfo *expansion_info = NULL;
+    S390CPUModel s390_model;
+    bool delta_changes = false;
+
+    /* convert it to our internal representation */
+    cpu_model_from_info(&s390_model, model, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+
+    if (type == CPU_MODEL_EXPANSION_TYPE_STATIC) {
+        delta_changes = true;
+    } else if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
+        error_setg(errp, "The requested expansion type is not supported.");
+        return NULL;
+    }
+
+    /* convert it back to a static representation */
+    expansion_info = g_new0(CpuModelExpansionInfo, 1);
+    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
+    cpu_info_from_model(expansion_info->model, &s390_model, delta_changes);
+    return expansion_info;
+}
+
+static void list_add_feat(const char *name, void *opaque)
+{
+    strList **last = (strList **) opaque;
+
+    QAPI_LIST_PREPEND(*last, g_strdup(name));
+}
+
+CpuModelCompareInfo *qmp_query_cpu_model_comparison(CpuModelInfo *infoa,
+                                                     CpuModelInfo *infob,
+                                                     Error **errp)
+{
+    Error *err = NULL;
+    CpuModelCompareResult feat_result, gen_result;
+    CpuModelCompareInfo *compare_info;
+    S390FeatBitmap missing, added;
+    S390CPUModel modela, modelb;
+
+    /* convert both models to our internal representation */
+    cpu_model_from_info(&modela, infoa, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+    cpu_model_from_info(&modelb, infob, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+    compare_info = g_new0(CpuModelCompareInfo, 1);
+
+    /* check the cpu generation and ga level */
+    if (modela.def->gen == modelb.def->gen) {
+        if (modela.def->ec_ga == modelb.def->ec_ga) {
+            /* ec and corresponding bc are identical */
+            gen_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
+        } else if (modela.def->ec_ga < modelb.def->ec_ga) {
+            gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+        } else {
+            gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+        }
+    } else if (modela.def->gen < modelb.def->gen) {
+        gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+    } else {
+        gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+    }
+    if (gen_result != CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        /* both models cannot be made identical */
+        list_add_feat("type", &compare_info->responsible_properties);
+    }
+
+    /* check the feature set */
+    if (bitmap_equal(modela.features, modelb.features, S390_FEAT_MAX)) {
+        feat_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
+    } else {
+        bitmap_andnot(missing, modela.features, modelb.features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(missing,
+                                  &compare_info->responsible_properties,
+                                  list_add_feat);
+        bitmap_andnot(added, modelb.features, modela.features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(added, &compare_info->responsible_properties,
+                                  list_add_feat);
+        if (bitmap_empty(missing, S390_FEAT_MAX)) {
+            feat_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+        } else if (bitmap_empty(added, S390_FEAT_MAX)) {
+            feat_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+        } else {
+            feat_result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
+        }
+    }
+
+    /* combine the results */
+    if (gen_result == feat_result) {
+        compare_info->result = gen_result;
+    } else if (feat_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        compare_info->result = gen_result;
+    } else if (gen_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        compare_info->result = feat_result;
+    } else {
+        compare_info->result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
+    }
+    return compare_info;
+}
+
+CpuModelBaselineInfo *qmp_query_cpu_model_baseline(CpuModelInfo *infoa,
+                                                    CpuModelInfo *infob,
+                                                    Error **errp)
+{
+    Error *err = NULL;
+    CpuModelBaselineInfo *baseline_info;
+    S390CPUModel modela, modelb, model;
+    uint16_t cpu_type;
+    uint8_t max_gen_ga;
+    uint8_t max_gen;
+
+    /* convert both models to our internal representation */
+    cpu_model_from_info(&modela, infoa, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+
+    cpu_model_from_info(&modelb, infob, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return NULL;
+    }
+
+    /* features both models support */
+    bitmap_and(model.features, modela.features, modelb.features, S390_FEAT_MAX);
+
+    /* detect the maximum model not regarding features */
+    if (modela.def->gen == modelb.def->gen) {
+        if (modela.def->type == modelb.def->type) {
+            cpu_type = modela.def->type;
+        } else {
+            cpu_type = 0;
+        }
+        max_gen = modela.def->gen;
+        max_gen_ga = MIN(modela.def->ec_ga, modelb.def->ec_ga);
+    } else if (modela.def->gen > modelb.def->gen) {
+        cpu_type = modelb.def->type;
+        max_gen = modelb.def->gen;
+        max_gen_ga = modelb.def->ec_ga;
+    } else {
+        cpu_type = modela.def->type;
+        max_gen = modela.def->gen;
+        max_gen_ga = modela.def->ec_ga;
+    }
+
+    model.def = s390_find_cpu_def(cpu_type, max_gen, max_gen_ga,
+                                  model.features);
+
+    /* models without early base features (esan3) are bad */
+    if (!model.def) {
+        error_setg(errp, "No compatible CPU model could be created as"
+                   " important base features are disabled");
+        return NULL;
+    }
+
+    /* strip off features not part of the max model */
+    bitmap_and(model.features, model.features, model.def->full_feat,
+               S390_FEAT_MAX);
+
+    baseline_info = g_new0(CpuModelBaselineInfo, 1);
+    baseline_info->model = g_malloc0(sizeof(*baseline_info->model));
+    cpu_info_from_model(baseline_info->model, &model, true);
+    return baseline_info;
+}
+
+void apply_cpu_model(const S390CPUModel *model, Error **errp)
+{
+    Error *err = NULL;
+    static S390CPUModel applied_model;
+    static bool applied;
+
+    /*
+     * We have the same model for all VCPUs. KVM can only be configured before
+     * any VCPUs are defined in KVM.
+     */
+    if (applied) {
+        if (model && memcmp(&applied_model, model, sizeof(S390CPUModel))) {
+            error_setg(errp, "Mixed CPU models are not supported on s390x.");
+        }
+        return;
+    }
+
+    if (kvm_enabled()) {
+        kvm_s390_apply_cpu_model(model, &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+    }
+
+    applied = true;
+    if (model) {
+        applied_model = *model;
+    }
+}
diff --git a/target/s390x/cpu_models_user.c b/target/s390x/cpu_models_user.c
new file mode 100644
index 00000000000..df24d12d9e4
--- /dev/null
+++ b/target/s390x/cpu_models_user.c
@@ -0,0 +1,20 @@
+/*
+ * CPU models for s390x - User-mode
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "qapi/error.h"
+
+void apply_cpu_model(const S390CPUModel *model, Error **errp)
+{
+}
diff --git a/target/s390x/diag.c b/target/s390x/diag.c
index 1a484295640..76b01dcd68b 100644
--- a/target/s390x/diag.c
+++ b/target/s390x/diag.c
@@ -14,14 +14,14 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
-#include "exec/address-spaces.h"
+#include "s390x-internal.h"
 #include "hw/watchdog/wdt_diag288.h"
 #include "sysemu/cpus.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/pv.h"
-#include "kvm_s390x.h"
+#include "sysemu/kvm.h"
+#include "kvm/kvm_s390x.h"
 
 int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3)
 {
@@ -169,7 +169,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra)
             return;
         }
 
-        if (kvm_s390_get_hpage_1m()) {
+        if (kvm_enabled() && kvm_s390_get_hpage_1m()) {
             error_report("Protected VMs can currently not be backed with "
                          "huge pages");
             env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c
deleted file mode 100644
index c48cd6b46f4..00000000000
--- a/target/s390x/excp_helper.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * s390x exception / interrupt helpers
- *
- *  Copyright (c) 2009 Ulrich Hecht
- *  Copyright (c) 2011 Alexander Graf
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "exec/helper-proto.h"
-#include "qemu/timer.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "hw/s390x/ioinst.h"
-#include "exec/address-spaces.h"
-#include "tcg_s390x.h"
-#ifndef CONFIG_USER_ONLY
-#include "sysemu/sysemu.h"
-#include "hw/s390x/s390_flic.h"
-#include "hw/boards.h"
-#endif
-
-void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env,
-                                              uint32_t code, uintptr_t ra)
-{
-    CPUState *cs = env_cpu(env);
-
-    cpu_restore_state(cs, ra, true);
-    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
-                  env->psw.addr);
-    trigger_pgm_exception(env, code);
-    cpu_loop_exit(cs);
-}
-
-void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
-                                           uintptr_t ra)
-{
-    g_assert(dxc <= 0xff);
-#if !defined(CONFIG_USER_ONLY)
-    /* Store the DXC into the lowcore */
-    stl_phys(env_cpu(env)->as,
-             env->psa + offsetof(LowCore, data_exc_code), dxc);
-#endif
-
-    /* Store the DXC into the FPC if AFP is enabled */
-    if (env->cregs[0] & CR0_AFP) {
-        env->fpc = deposit32(env->fpc, 8, 8, dxc);
-    }
-    tcg_s390_program_interrupt(env, PGM_DATA, ra);
-}
-
-void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
-                                             uintptr_t ra)
-{
-    g_assert(vxc <= 0xff);
-#if !defined(CONFIG_USER_ONLY)
-    /* Always store the VXC into the lowcore, without AFP it is undefined */
-    stl_phys(env_cpu(env)->as,
-             env->psa + offsetof(LowCore, data_exc_code), vxc);
-#endif
-
-    /* Always store the VXC into the FPC, without AFP it is undefined */
-    env->fpc = deposit32(env->fpc, 8, 8, vxc);
-    tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ra);
-}
-
-void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
-{
-    tcg_s390_data_exception(env, dxc, GETPC());
-}
-
-#if defined(CONFIG_USER_ONLY)
-
-void s390_cpu_do_interrupt(CPUState *cs)
-{
-    cs->exception_index = -1;
-}
-
-bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    S390CPU *cpu = S390_CPU(cs);
-
-    trigger_pgm_exception(&cpu->env, PGM_ADDRESSING);
-    /* On real machines this value is dropped into LowMem.  Since this
-       is userland, simply put this someplace that cpu_loop can find it.  */
-    cpu->env.__excp_addr = address;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
-static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx)
-{
-    switch (mmu_idx) {
-    case MMU_PRIMARY_IDX:
-        return PSW_ASC_PRIMARY;
-    case MMU_SECONDARY_IDX:
-        return PSW_ASC_SECONDARY;
-    case MMU_HOME_IDX:
-        return PSW_ASC_HOME;
-    default:
-        abort();
-    }
-}
-
-bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
-    target_ulong vaddr, raddr;
-    uint64_t asc, tec;
-    int prot, excp;
-
-    qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n",
-                  __func__, address, access_type, mmu_idx);
-
-    vaddr = address;
-
-    if (mmu_idx < MMU_REAL_IDX) {
-        asc = cpu_mmu_idx_to_asc(mmu_idx);
-        /* 31-Bit mode */
-        if (!(env->psw.mask & PSW_MASK_64)) {
-            vaddr &= 0x7fffffff;
-        }
-        excp = mmu_translate(env, vaddr, access_type, asc, &raddr, &prot, &tec);
-    } else if (mmu_idx == MMU_REAL_IDX) {
-        /* 31-Bit mode */
-        if (!(env->psw.mask & PSW_MASK_64)) {
-            vaddr &= 0x7fffffff;
-        }
-        excp = mmu_translate_real(env, vaddr, access_type, &raddr, &prot, &tec);
-    } else {
-        g_assert_not_reached();
-    }
-
-    /* check out of RAM access */
-    if (!excp &&
-        !address_space_access_valid(&address_space_memory, raddr,
-                                    TARGET_PAGE_SIZE, access_type,
-                                    MEMTXATTRS_UNSPECIFIED)) {
-        MachineState *ms = MACHINE(qdev_get_machine());
-        qemu_log_mask(CPU_LOG_MMU,
-                      "%s: raddr %" PRIx64 " > ram_size %" PRIx64 "\n",
-                      __func__, (uint64_t)raddr, (uint64_t)ms->ram_size);
-        excp = PGM_ADDRESSING;
-        tec = 0; /* unused */
-    }
-
-    env->tlb_fill_exc = excp;
-    env->tlb_fill_tec = tec;
-
-    if (!excp) {
-        qemu_log_mask(CPU_LOG_MMU,
-                      "%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n",
-                      __func__, (uint64_t)vaddr, (uint64_t)raddr, prot);
-        tlb_set_page(cs, address & TARGET_PAGE_MASK, raddr, prot,
-                     mmu_idx, TARGET_PAGE_SIZE);
-        return true;
-    }
-    if (probe) {
-        return false;
-    }
-
-    if (excp != PGM_ADDRESSING) {
-        stq_phys(env_cpu(env)->as,
-                 env->psa + offsetof(LowCore, trans_exc_code), tec);
-    }
-
-    /*
-     * For data accesses, ILEN will be filled in from the unwind info,
-     * within cpu_loop_exit_restore.  For code accesses, retaddr == 0,
-     * and so unwinding will not occur.  However, ILEN is also undefined
-     * for that case -- we choose to set ILEN = 2.
-     */
-    env->int_pgm_ilen = 2;
-    trigger_pgm_exception(env, excp);
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-static void do_program_interrupt(CPUS390XState *env)
-{
-    uint64_t mask, addr;
-    LowCore *lowcore;
-    int ilen = env->int_pgm_ilen;
-
-    assert(ilen == 2 || ilen == 4 || ilen == 6);
-
-    switch (env->int_pgm_code) {
-    case PGM_PER:
-        if (env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION) {
-            break;
-        }
-        /* FALL THROUGH */
-    case PGM_OPERATION:
-    case PGM_PRIVILEGED:
-    case PGM_EXECUTE:
-    case PGM_PROTECTION:
-    case PGM_ADDRESSING:
-    case PGM_SPECIFICATION:
-    case PGM_DATA:
-    case PGM_FIXPT_OVERFLOW:
-    case PGM_FIXPT_DIVIDE:
-    case PGM_DEC_OVERFLOW:
-    case PGM_DEC_DIVIDE:
-    case PGM_HFP_EXP_OVERFLOW:
-    case PGM_HFP_EXP_UNDERFLOW:
-    case PGM_HFP_SIGNIFICANCE:
-    case PGM_HFP_DIVIDE:
-    case PGM_TRANS_SPEC:
-    case PGM_SPECIAL_OP:
-    case PGM_OPERAND:
-    case PGM_HFP_SQRT:
-    case PGM_PC_TRANS_SPEC:
-    case PGM_ALET_SPEC:
-    case PGM_MONITOR:
-        /* advance the PSW if our exception is not nullifying */
-        env->psw.addr += ilen;
-        break;
-    }
-
-    qemu_log_mask(CPU_LOG_INT,
-                  "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n",
-                  __func__, env->int_pgm_code, ilen, env->psw.mask,
-                  env->psw.addr);
-
-    lowcore = cpu_map_lowcore(env);
-
-    /* Signal PER events with the exception.  */
-    if (env->per_perc_atmid) {
-        env->int_pgm_code |= PGM_PER;
-        lowcore->per_address = cpu_to_be64(env->per_address);
-        lowcore->per_perc_atmid = cpu_to_be16(env->per_perc_atmid);
-        env->per_perc_atmid = 0;
-    }
-
-    lowcore->pgm_ilen = cpu_to_be16(ilen);
-    lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
-    lowcore->program_old_psw.mask = cpu_to_be64(get_psw_mask(env));
-    lowcore->program_old_psw.addr = cpu_to_be64(env->psw.addr);
-    mask = be64_to_cpu(lowcore->program_new_psw.mask);
-    addr = be64_to_cpu(lowcore->program_new_psw.addr);
-    lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea);
-
-    cpu_unmap_lowcore(lowcore);
-
-    load_psw(env, mask, addr);
-}
-
-static void do_svc_interrupt(CPUS390XState *env)
-{
-    uint64_t mask, addr;
-    LowCore *lowcore;
-
-    lowcore = cpu_map_lowcore(env);
-
-    lowcore->svc_code = cpu_to_be16(env->int_svc_code);
-    lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
-    lowcore->svc_old_psw.mask = cpu_to_be64(get_psw_mask(env));
-    lowcore->svc_old_psw.addr = cpu_to_be64(env->psw.addr + env->int_svc_ilen);
-    mask = be64_to_cpu(lowcore->svc_new_psw.mask);
-    addr = be64_to_cpu(lowcore->svc_new_psw.addr);
-
-    cpu_unmap_lowcore(lowcore);
-
-    load_psw(env, mask, addr);
-
-    /* When a PER event is pending, the PER exception has to happen
-       immediately after the SERVICE CALL one.  */
-    if (env->per_perc_atmid) {
-        env->int_pgm_code = PGM_PER;
-        env->int_pgm_ilen = env->int_svc_ilen;
-        do_program_interrupt(env);
-    }
-}
-
-#define VIRTIO_SUBCODE_64 0x0D00
-
-static void do_ext_interrupt(CPUS390XState *env)
-{
-    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
-    S390CPU *cpu = env_archcpu(env);
-    uint64_t mask, addr;
-    uint16_t cpu_addr;
-    LowCore *lowcore;
-
-    if (!(env->psw.mask & PSW_MASK_EXT)) {
-        cpu_abort(CPU(cpu), "Ext int w/o ext mask\n");
-    }
-
-    lowcore = cpu_map_lowcore(env);
-
-    if ((env->pending_int & INTERRUPT_EMERGENCY_SIGNAL) &&
-        (env->cregs[0] & CR0_EMERGENCY_SIGNAL_SC)) {
-        MachineState *ms = MACHINE(qdev_get_machine());
-        unsigned int max_cpus = ms->smp.max_cpus;
-
-        lowcore->ext_int_code = cpu_to_be16(EXT_EMERGENCY);
-        cpu_addr = find_first_bit(env->emergency_signals, S390_MAX_CPUS);
-        g_assert(cpu_addr < S390_MAX_CPUS);
-        lowcore->cpu_addr = cpu_to_be16(cpu_addr);
-        clear_bit(cpu_addr, env->emergency_signals);
-        if (bitmap_empty(env->emergency_signals, max_cpus)) {
-            env->pending_int &= ~INTERRUPT_EMERGENCY_SIGNAL;
-        }
-    } else if ((env->pending_int & INTERRUPT_EXTERNAL_CALL) &&
-               (env->cregs[0] & CR0_EXTERNAL_CALL_SC)) {
-        lowcore->ext_int_code = cpu_to_be16(EXT_EXTERNAL_CALL);
-        lowcore->cpu_addr = cpu_to_be16(env->external_call_addr);
-        env->pending_int &= ~INTERRUPT_EXTERNAL_CALL;
-    } else if ((env->pending_int & INTERRUPT_EXT_CLOCK_COMPARATOR) &&
-               (env->cregs[0] & CR0_CKC_SC)) {
-        lowcore->ext_int_code = cpu_to_be16(EXT_CLOCK_COMP);
-        lowcore->cpu_addr = 0;
-        env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
-    } else if ((env->pending_int & INTERRUPT_EXT_CPU_TIMER) &&
-               (env->cregs[0] & CR0_CPU_TIMER_SC)) {
-        lowcore->ext_int_code = cpu_to_be16(EXT_CPU_TIMER);
-        lowcore->cpu_addr = 0;
-        env->pending_int &= ~INTERRUPT_EXT_CPU_TIMER;
-    } else if (qemu_s390_flic_has_service(flic) &&
-               (env->cregs[0] & CR0_SERVICE_SC)) {
-        uint32_t param;
-
-        param = qemu_s390_flic_dequeue_service(flic);
-        lowcore->ext_int_code = cpu_to_be16(EXT_SERVICE);
-        lowcore->ext_params = cpu_to_be32(param);
-        lowcore->cpu_addr = 0;
-    } else {
-        g_assert_not_reached();
-    }
-
-    mask = be64_to_cpu(lowcore->external_new_psw.mask);
-    addr = be64_to_cpu(lowcore->external_new_psw.addr);
-    lowcore->external_old_psw.mask = cpu_to_be64(get_psw_mask(env));
-    lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr);
-
-    cpu_unmap_lowcore(lowcore);
-
-    load_psw(env, mask, addr);
-}
-
-static void do_io_interrupt(CPUS390XState *env)
-{
-    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
-    uint64_t mask, addr;
-    QEMUS390FlicIO *io;
-    LowCore *lowcore;
-
-    g_assert(env->psw.mask & PSW_MASK_IO);
-    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
-    g_assert(io);
-
-    lowcore = cpu_map_lowcore(env);
-
-    lowcore->subchannel_id = cpu_to_be16(io->id);
-    lowcore->subchannel_nr = cpu_to_be16(io->nr);
-    lowcore->io_int_parm = cpu_to_be32(io->parm);
-    lowcore->io_int_word = cpu_to_be32(io->word);
-    lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
-    lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
-    mask = be64_to_cpu(lowcore->io_new_psw.mask);
-    addr = be64_to_cpu(lowcore->io_new_psw.addr);
-
-    cpu_unmap_lowcore(lowcore);
-    g_free(io);
-
-    load_psw(env, mask, addr);
-}
-
-typedef struct MchkExtSaveArea {
-    uint64_t    vregs[32][2];                     /* 0x0000 */
-    uint8_t     pad_0x0200[0x0400 - 0x0200];      /* 0x0200 */
-} MchkExtSaveArea;
-QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024);
-
-static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
-{
-    hwaddr len = sizeof(MchkExtSaveArea);
-    MchkExtSaveArea *sa;
-    int i;
-
-    sa = cpu_physical_memory_map(mcesao, &len, true);
-    if (!sa) {
-        return -EFAULT;
-    }
-    if (len != sizeof(MchkExtSaveArea)) {
-        cpu_physical_memory_unmap(sa, len, 1, 0);
-        return -EFAULT;
-    }
-
-    for (i = 0; i < 32; i++) {
-        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
-        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
-    }
-
-    cpu_physical_memory_unmap(sa, len, 1, len);
-    return 0;
-}
-
-static void do_mchk_interrupt(CPUS390XState *env)
-{
-    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
-    uint64_t mcic = s390_build_validity_mcic() | MCIC_SC_CP;
-    uint64_t mask, addr, mcesao = 0;
-    LowCore *lowcore;
-    int i;
-
-    /* for now we only support channel report machine checks (floating) */
-    g_assert(env->psw.mask & PSW_MASK_MCHECK);
-    g_assert(env->cregs[14] & CR14_CHANNEL_REPORT_SC);
-
-    qemu_s390_flic_dequeue_crw_mchk(flic);
-
-    lowcore = cpu_map_lowcore(env);
-
-    /* extended save area */
-    if (mcic & MCIC_VB_VR) {
-        /* length and alignment is 1024 bytes */
-        mcesao = be64_to_cpu(lowcore->mcesad) & ~0x3ffull;
-    }
-
-    /* try to store vector registers */
-    if (!mcesao || mchk_store_vregs(env, mcesao)) {
-        mcic &= ~MCIC_VB_VR;
-    }
-
-    /* we are always in z/Architecture mode */
-    lowcore->ar_access_id = 1;
-
-    for (i = 0; i < 16; i++) {
-        lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
-        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
-        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
-        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
-    }
-    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
-    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
-    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
-    lowcore->cpu_timer_save_area = cpu_to_be64(env->cputm);
-    lowcore->clock_comp_save_area = cpu_to_be64(env->ckc >> 8);
-
-    lowcore->mcic = cpu_to_be64(mcic);
-    lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
-    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
-    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
-    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
-
-    cpu_unmap_lowcore(lowcore);
-
-    load_psw(env, mask, addr);
-}
-
-void s390_cpu_do_interrupt(CPUState *cs)
-{
-    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
-    bool stopped = false;
-
-    qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n",
-                  __func__, cs->exception_index, env->psw.mask, env->psw.addr);
-
-try_deliver:
-    /* handle machine checks */
-    if (cs->exception_index == -1 && s390_cpu_has_mcck_int(cpu)) {
-        cs->exception_index = EXCP_MCHK;
-    }
-    /* handle external interrupts */
-    if (cs->exception_index == -1 && s390_cpu_has_ext_int(cpu)) {
-        cs->exception_index = EXCP_EXT;
-    }
-    /* handle I/O interrupts */
-    if (cs->exception_index == -1 && s390_cpu_has_io_int(cpu)) {
-        cs->exception_index = EXCP_IO;
-    }
-    /* RESTART interrupt */
-    if (cs->exception_index == -1 && s390_cpu_has_restart_int(cpu)) {
-        cs->exception_index = EXCP_RESTART;
-    }
-    /* STOP interrupt has least priority */
-    if (cs->exception_index == -1 && s390_cpu_has_stop_int(cpu)) {
-        cs->exception_index = EXCP_STOP;
-    }
-
-    switch (cs->exception_index) {
-    case EXCP_PGM:
-        do_program_interrupt(env);
-        break;
-    case EXCP_SVC:
-        do_svc_interrupt(env);
-        break;
-    case EXCP_EXT:
-        do_ext_interrupt(env);
-        break;
-    case EXCP_IO:
-        do_io_interrupt(env);
-        break;
-    case EXCP_MCHK:
-        do_mchk_interrupt(env);
-        break;
-    case EXCP_RESTART:
-        do_restart_interrupt(env);
-        break;
-    case EXCP_STOP:
-        do_stop_interrupt(env);
-        stopped = true;
-        break;
-    }
-
-    if (cs->exception_index != -1 && !stopped) {
-        /* check if there are more pending interrupts to deliver */
-        cs->exception_index = -1;
-        goto try_deliver;
-    }
-    cs->exception_index = -1;
-
-    /* we might still have pending interrupts, but not deliverable */
-    if (!env->pending_int && !qemu_s390_flic_has_any(flic)) {
-        cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
-    }
-
-    /* WAIT PSW during interrupt injection or STOP interrupt */
-    if ((env->psw.mask & PSW_MASK_WAIT) || stopped) {
-        /* don't trigger a cpu_loop_exit(), use an interrupt instead */
-        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
-    } else if (cs->halted) {
-        /* unhalt if we had a WAIT PSW somehwere in our injection chain */
-        s390_cpu_unhalt(cpu);
-    }
-}
-
-bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-    if (interrupt_request & CPU_INTERRUPT_HARD) {
-        S390CPU *cpu = S390_CPU(cs);
-        CPUS390XState *env = &cpu->env;
-
-        if (env->ex_value) {
-            /* Execution of the target insn is indivisible from
-               the parent EXECUTE insn.  */
-            return false;
-        }
-        if (s390_cpu_has_int(cpu)) {
-            s390_cpu_do_interrupt(cs);
-            return true;
-        }
-        if (env->psw.mask & PSW_MASK_WAIT) {
-            /* Woken up because of a floating interrupt but it has already
-             * been delivered. Go back to sleep. */
-            cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
-        }
-    }
-    return false;
-}
-
-void s390x_cpu_debug_excp_handler(CPUState *cs)
-{
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
-    CPUWatchpoint *wp_hit = cs->watchpoint_hit;
-
-    if (wp_hit && wp_hit->flags & BP_CPU) {
-        /* FIXME: When the storage-alteration-space control bit is set,
-           the exception should only be triggered if the memory access
-           is done using an address space with the storage-alteration-event
-           bit set.  We have no way to detect that with the current
-           watchpoint code.  */
-        cs->watchpoint_hit = NULL;
-
-        env->per_address = env->psw.addr;
-        env->per_perc_atmid |= PER_CODE_EVENT_STORE | get_per_atmid(env);
-        /* FIXME: We currently no way to detect the address space used
-           to trigger the watchpoint.  For now just consider it is the
-           current default ASC. This turn to be true except when MVCP
-           and MVCS instrutions are not used.  */
-        env->per_perc_atmid |= env->psw.mask & (PSW_MASK_ASC) >> 46;
-
-        /* Remove all watchpoints to re-execute the code.  A PER exception
-           will be triggered, it will call load_psw which will recompute
-           the watchpoints.  */
-        cpu_watchpoint_remove_all(cs, BP_CPU);
-        cpu_loop_exit_noexc(cs);
-    }
-}
-
-/* Unaligned accesses are only diagnosed with MO_ALIGN.  At the moment,
-   this is only for the atomic operations, for which we want to raise a
-   specification exception.  */
-void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, uintptr_t retaddr)
-{
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
-
-    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
-}
-
-static void QEMU_NORETURN monitor_event(CPUS390XState *env,
-                                        uint64_t monitor_code,
-                                        uint8_t monitor_class, uintptr_t ra)
-{
-    /* Store the Monitor Code and the Monitor Class Number into the lowcore */
-    stq_phys(env_cpu(env)->as,
-             env->psa + offsetof(LowCore, monitor_code), monitor_code);
-    stw_phys(env_cpu(env)->as,
-             env->psa + offsetof(LowCore, mon_class_num), monitor_class);
-
-    tcg_s390_program_interrupt(env, PGM_MONITOR, ra);
-}
-
-void HELPER(monitor_call)(CPUS390XState *env, uint64_t monitor_code,
-                          uint32_t monitor_class)
-{
-    g_assert(monitor_class <= 0xff);
-
-    if (env->cregs[8] & (0x8000 >> monitor_class)) {
-        monitor_event(env, monitor_code, monitor_class, GETPC());
-    }
-}
-
-#endif /* !CONFIG_USER_ONLY */
diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
deleted file mode 100644
index f155bc048c1..00000000000
--- a/target/s390x/fpu_helper.c
+++ /dev/null
@@ -1,888 +0,0 @@
-/*
- *  S/390 FPU helper routines
- *
- *  Copyright (c) 2009 Ulrich Hecht
- *  Copyright (c) 2009 Alexander Graf
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "tcg_s390x.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/helper-proto.h"
-#include "fpu/softfloat.h"
-
-/* #define DEBUG_HELPER */
-#ifdef DEBUG_HELPER
-#define HELPER_LOG(x...) qemu_log(x)
-#else
-#define HELPER_LOG(x...)
-#endif
-
-#define RET128(F) (env->retxl = F.low, F.high)
-
-uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
-{
-    uint8_t s390_exc = 0;
-
-    s390_exc |= (exc & float_flag_invalid) ? S390_IEEE_MASK_INVALID : 0;
-    s390_exc |= (exc & float_flag_divbyzero) ? S390_IEEE_MASK_DIVBYZERO : 0;
-    s390_exc |= (exc & float_flag_overflow) ? S390_IEEE_MASK_OVERFLOW : 0;
-    s390_exc |= (exc & float_flag_underflow) ? S390_IEEE_MASK_UNDERFLOW : 0;
-    s390_exc |= (exc & float_flag_inexact) ? S390_IEEE_MASK_INEXACT : 0;
-
-    return s390_exc;
-}
-
-/* Should be called after any operation that may raise IEEE exceptions.  */
-static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
-{
-    unsigned s390_exc, qemu_exc;
-
-    /* Get the exceptions raised by the current operation.  Reset the
-       fpu_status contents so that the next operation has a clean slate.  */
-    qemu_exc = env->fpu_status.float_exception_flags;
-    if (qemu_exc == 0) {
-        return;
-    }
-    env->fpu_status.float_exception_flags = 0;
-    s390_exc = s390_softfloat_exc_to_ieee(qemu_exc);
-
-    /*
-     * IEEE-Underflow exception recognition exists if a tininess condition
-     * (underflow) exists and
-     * - The mask bit in the FPC is zero and the result is inexact
-     * - The mask bit in the FPC is one
-     * So tininess conditions that are not inexact don't trigger any
-     * underflow action in case the mask bit is not one.
-     */
-    if (!(s390_exc & S390_IEEE_MASK_INEXACT) &&
-        !((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW)) {
-        s390_exc &= ~S390_IEEE_MASK_UNDERFLOW;
-    }
-
-    /*
-     * FIXME:
-     * 1. Right now, all inexact conditions are inidicated as
-     *    "truncated" (0) and never as "incremented" (1) in the DXC.
-     * 2. Only traps due to invalid/divbyzero are suppressing. Other traps
-     *    are completing, meaning the target register has to be written!
-     *    This, however will mean that we have to write the register before
-     *    triggering the trap - impossible right now.
-     */
-
-    /*
-     * invalid/divbyzero cannot coexist with other conditions.
-     * overflow/underflow however can coexist with inexact, we have to
-     * handle it separatly.
-     */
-    if (s390_exc & ~S390_IEEE_MASK_INEXACT) {
-        if (s390_exc & ~S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
-            /* trap condition - inexact reported along */
-            tcg_s390_data_exception(env, s390_exc, retaddr);
-        }
-        /* nontrap condition - inexact handled differently */
-        env->fpc |= (s390_exc & ~S390_IEEE_MASK_INEXACT) << 16;
-    }
-
-    /* inexact handling */
-    if (s390_exc & S390_IEEE_MASK_INEXACT && !XxC) {
-        /* trap condition - overflow/underflow _not_ reported along */
-        if (s390_exc & S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
-            tcg_s390_data_exception(env, s390_exc & S390_IEEE_MASK_INEXACT,
-                                    retaddr);
-        }
-        /* nontrap condition */
-        env->fpc |= (s390_exc & S390_IEEE_MASK_INEXACT) << 16;
-    }
-}
-
-int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare)
-{
-    switch (float_compare) {
-    case float_relation_equal:
-        return 0;
-    case float_relation_less:
-        return 1;
-    case float_relation_greater:
-        return 2;
-    case float_relation_unordered:
-        return 3;
-    default:
-        cpu_abort(env_cpu(env), "unknown return value for float compare\n");
-    }
-}
-
-/* condition codes for unary FP ops */
-uint32_t set_cc_nz_f32(float32 v)
-{
-    if (float32_is_any_nan(v)) {
-        return 3;
-    } else if (float32_is_zero(v)) {
-        return 0;
-    } else if (float32_is_neg(v)) {
-        return 1;
-    } else {
-        return 2;
-    }
-}
-
-uint32_t set_cc_nz_f64(float64 v)
-{
-    if (float64_is_any_nan(v)) {
-        return 3;
-    } else if (float64_is_zero(v)) {
-        return 0;
-    } else if (float64_is_neg(v)) {
-        return 1;
-    } else {
-        return 2;
-    }
-}
-
-uint32_t set_cc_nz_f128(float128 v)
-{
-    if (float128_is_any_nan(v)) {
-        return 3;
-    } else if (float128_is_zero(v)) {
-        return 0;
-    } else if (float128_is_neg(v)) {
-        return 1;
-    } else {
-        return 2;
-    }
-}
-
-static inline uint8_t round_from_m34(uint32_t m34)
-{
-    return extract32(m34, 0, 4);
-}
-
-static inline bool xxc_from_m34(uint32_t m34)
-{
-    /* XxC is bit 1 of m4 */
-    return extract32(m34, 4 + 3 - 1, 1);
-}
-
-/* 32-bit FP addition */
-uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float32 ret = float32_add(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP addition */
-uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float64 ret = float64_add(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 128-bit FP addition */
-uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    float128 ret = float128_add(make_float128(ah, al),
-                                make_float128(bh, bl),
-                                &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* 32-bit FP subtraction */
-uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float32 ret = float32_sub(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP subtraction */
-uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float64 ret = float64_sub(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 128-bit FP subtraction */
-uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    float128 ret = float128_sub(make_float128(ah, al),
-                                make_float128(bh, bl),
-                                &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* 32-bit FP division */
-uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float32 ret = float32_div(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP division */
-uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float64 ret = float64_div(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 128-bit FP division */
-uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    float128 ret = float128_div(make_float128(ah, al),
-                                make_float128(bh, bl),
-                                &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* 32-bit FP multiplication */
-uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float32 ret = float32_mul(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP multiplication */
-uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float64 ret = float64_mul(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64/32-bit FP multiplication */
-uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    float64 ret = float32_to_float64(f2, &env->fpu_status);
-    ret = float64_mul(f1, ret, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 128-bit FP multiplication */
-uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    float128 ret = float128_mul(make_float128(ah, al),
-                                make_float128(bh, bl),
-                                &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* 128/64-bit FP multiplication */
-uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                      uint64_t f2)
-{
-    float128 ret = float64_to_float128(f2, &env->fpu_status);
-    ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* convert 32-bit float to 64-bit float */
-uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
-{
-    float64 ret = float32_to_float64(f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 64-bit float */
-uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                      uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit float to 128-bit float */
-uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
-{
-    float128 ret = float64_to_float128(f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* convert 32-bit float to 128-bit float */
-uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
-{
-    float128 ret = float32_to_float128(f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-/* convert 64-bit float to 32-bit float */
-uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float32 ret = float64_to_float32(f2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 32-bit float */
-uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                      uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* 32-bit FP compare */
-uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-/* 64-bit FP compare */
-uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-/* 128-bit FP compare */
-uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
-                                               make_float128(bh, bl),
-                                               &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3)
-{
-    int ret = env->fpu_status.float_rounding_mode;
-
-    switch (m3) {
-    case 0:
-        /* current mode */
-        break;
-    case 1:
-        /* round to nearest with ties away from 0 */
-        set_float_rounding_mode(float_round_ties_away, &env->fpu_status);
-        break;
-    case 3:
-        /* round to prepare for shorter precision */
-        set_float_rounding_mode(float_round_to_odd, &env->fpu_status);
-        break;
-    case 4:
-        /* round to nearest with ties to even */
-        set_float_rounding_mode(float_round_nearest_even, &env->fpu_status);
-        break;
-    case 5:
-        /* round to zero */
-        set_float_rounding_mode(float_round_to_zero, &env->fpu_status);
-        break;
-    case 6:
-        /* round to +inf */
-        set_float_rounding_mode(float_round_up, &env->fpu_status);
-        break;
-    case 7:
-        /* round to -inf */
-        set_float_rounding_mode(float_round_down, &env->fpu_status);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return ret;
-}
-
-void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode)
-{
-    set_float_rounding_mode(old_mode, &env->fpu_status);
-}
-
-/* convert 64-bit int to 32-bit float */
-uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float32 ret = int64_to_float32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit int to 64-bit float */
-uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float64 ret = int64_to_float64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit int to 128-bit float */
-uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float128 ret = int64_to_float128(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return RET128(ret);
-}
-
-/* convert 64-bit uint to 32-bit float */
-uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float32 ret = uint64_to_float32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit uint to 64-bit float */
-uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float64 ret = uint64_to_float64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit uint to 128-bit float */
-uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float128 ret = uint64_to_float128(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return RET128(ret);
-}
-
-/* convert 32-bit float to 64-bit int */
-uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    int64_t ret = float32_to_int64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit float to 64-bit int */
-uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    int64_t ret = float64_to_int64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 64-bit int */
-uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float128 v2 = make_float128(h, l);
-    int64_t ret = float128_to_int64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 32-bit float to 32-bit int */
-uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    int32_t ret = float32_to_int32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit float to 32-bit int */
-uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    int32_t ret = float64_to_int32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 32-bit int */
-uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float128 v2 = make_float128(h, l);
-    int32_t ret = float128_to_int32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 32-bit float to 64-bit uint */
-uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint64_t ret;
-
-    v2 = float32_to_float64(v2, &env->fpu_status);
-    ret = float64_to_uint64(v2, &env->fpu_status);
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit float to 64-bit uint */
-uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint64_t ret = float64_to_uint64(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 64-bit uint */
-uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint64_t ret = float128_to_uint64(make_float128(h, l), &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 32-bit float to 32-bit uint */
-uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint32_t ret = float32_to_uint32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 64-bit float to 32-bit uint */
-uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint32_t ret = float64_to_uint32(v2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* convert 128-bit float to 32-bit uint */
-uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    uint32_t ret = float128_to_uint32(make_float128(h, l), &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* round to integer 32-bit */
-uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float32 ret = float32_round_to_int(f2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* round to integer 64-bit */
-uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float64 ret = float64_round_to_int(f2, &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return ret;
-}
-
-/* round to integer 128-bit */
-uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                      uint32_t m34)
-{
-    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
-    float128 ret = float128_round_to_int(make_float128(ah, al),
-                                         &env->fpu_status);
-
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_exceptions(env, xxc_from_m34(m34), GETPC());
-    return RET128(ret);
-}
-
-/* 32-bit FP compare and signal */
-uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-/* 64-bit FP compare and signal */
-uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
-{
-    FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-/* 128-bit FP compare and signal */
-uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
-                     uint64_t bh, uint64_t bl)
-{
-    FloatRelation cmp = float128_compare(make_float128(ah, al),
-                                         make_float128(bh, bl),
-                                         &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return float_comp_to_cc(env, cmp);
-}
-
-/* 32-bit FP multiply and add */
-uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1,
-                      uint64_t f2, uint64_t f3)
-{
-    float32 ret = float32_muladd(f2, f3, f1, 0, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP multiply and add */
-uint64_t HELPER(madb)(CPUS390XState *env, uint64_t f1,
-                      uint64_t f2, uint64_t f3)
-{
-    float64 ret = float64_muladd(f2, f3, f1, 0, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 32-bit FP multiply and subtract */
-uint64_t HELPER(mseb)(CPUS390XState *env, uint64_t f1,
-                      uint64_t f2, uint64_t f3)
-{
-    float32 ret = float32_muladd(f2, f3, f1, float_muladd_negate_c,
-                                 &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* 64-bit FP multiply and subtract */
-uint64_t HELPER(msdb)(CPUS390XState *env, uint64_t f1,
-                      uint64_t f2, uint64_t f3)
-{
-    float64 ret = float64_muladd(f2, f3, f1, float_muladd_negate_c,
-                                 &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* The rightmost bit has the number 11. */
-static inline uint16_t dcmask(int bit, bool neg)
-{
-    return 1 << (11 - bit - neg);
-}
-
-#define DEF_FLOAT_DCMASK(_TYPE) \
-uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)              \
-{                                                                  \
-    const bool neg = _TYPE##_is_neg(f1);                           \
-                                                                   \
-    /* Sorted by most common cases - only one class is possible */ \
-    if (_TYPE##_is_normal(f1)) {                                   \
-        return dcmask(2, neg);                                     \
-    } else if (_TYPE##_is_zero(f1)) {                              \
-        return dcmask(0, neg);                                     \
-    } else if (_TYPE##_is_denormal(f1)) {                          \
-        return dcmask(4, neg);                                     \
-    } else if (_TYPE##_is_infinity(f1)) {                          \
-        return dcmask(6, neg);                                     \
-    } else if (_TYPE##_is_quiet_nan(f1, &env->fpu_status)) {       \
-        return dcmask(8, neg);                                     \
-    }                                                              \
-    /* signaling nan, as last remaining case */                    \
-    return dcmask(10, neg);                                        \
-}
-DEF_FLOAT_DCMASK(float32)
-DEF_FLOAT_DCMASK(float64)
-DEF_FLOAT_DCMASK(float128)
-
-/* test data class 32-bit */
-uint32_t HELPER(tceb)(CPUS390XState *env, uint64_t f1, uint64_t m2)
-{
-    return (m2 & float32_dcmask(env, f1)) != 0;
-}
-
-/* test data class 64-bit */
-uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
-{
-    return (m2 & float64_dcmask(env, v1)) != 0;
-}
-
-/* test data class 128-bit */
-uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
-{
-    return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
-}
-
-/* square root 32-bit */
-uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2)
-{
-    float32 ret = float32_sqrt(f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* square root 64-bit */
-uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
-{
-    float64 ret = float64_sqrt(f2, &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return ret;
-}
-
-/* square root 128-bit */
-uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
-{
-    float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
-    handle_exceptions(env, false, GETPC());
-    return RET128(ret);
-}
-
-static const int fpc_to_rnd[8] = {
-    float_round_nearest_even,
-    float_round_to_zero,
-    float_round_up,
-    float_round_down,
-    -1,
-    -1,
-    -1,
-    float_round_to_odd,
-};
-
-/* set fpc */
-void HELPER(sfpc)(CPUS390XState *env, uint64_t fpc)
-{
-    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
-        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
-    }
-
-    /* Install everything in the main FPC.  */
-    env->fpc = fpc;
-
-    /* Install the rounding mode in the shadow fpu_status.  */
-    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
-}
-
-/* set fpc and signal */
-void HELPER(sfas)(CPUS390XState *env, uint64_t fpc)
-{
-    uint32_t signalling = env->fpc;
-    uint32_t s390_exc;
-
-    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
-        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
-    }
-
-    /*
-     * FPC is set to the FPC operand with a bitwise OR of the signalling
-     * flags.
-     */
-    env->fpc = fpc | (signalling & 0x00ff0000);
-    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
-
-    /*
-     * If any signaling flag is enabled in the new FPC mask, a
-     * simulated-iee-exception exception occurs.
-     */
-    s390_exc = (signalling >> 16) & (fpc >> 24);
-    if (s390_exc) {
-        if (s390_exc & S390_IEEE_MASK_INVALID) {
-            s390_exc = S390_IEEE_MASK_INVALID;
-        } else if (s390_exc & S390_IEEE_MASK_DIVBYZERO) {
-            s390_exc = S390_IEEE_MASK_DIVBYZERO;
-        } else if (s390_exc & S390_IEEE_MASK_OVERFLOW) {
-            s390_exc &= (S390_IEEE_MASK_OVERFLOW | S390_IEEE_MASK_INEXACT);
-        } else if (s390_exc & S390_IEEE_MASK_UNDERFLOW) {
-            s390_exc &= (S390_IEEE_MASK_UNDERFLOW | S390_IEEE_MASK_INEXACT);
-        } else if (s390_exc & S390_IEEE_MASK_INEXACT) {
-            s390_exc = S390_IEEE_MASK_INEXACT;
-        } else if (s390_exc & S390_IEEE_MASK_QUANTUM) {
-            s390_exc = S390_IEEE_MASK_QUANTUM;
-        }
-        tcg_s390_data_exception(env, s390_exc | 3, GETPC());
-    }
-}
-
-/* set bfp rounding mode */
-void HELPER(srnm)(CPUS390XState *env, uint64_t rnd)
-{
-    if (rnd > 0x7 || fpc_to_rnd[rnd & 0x7] == -1) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
-    }
-
-    env->fpc = deposit32(env->fpc, 0, 3, rnd);
-    set_float_rounding_mode(fpc_to_rnd[rnd & 0x7], &env->fpu_status);
-}
diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c
index d6fce5ff1e1..a5d69d0e0bc 100644
--- a/target/s390x/gdbstub.c
+++ b/target/s390x/gdbstub.c
@@ -20,7 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gdbstub.h"
 #include "qemu/bitops.h"
@@ -31,18 +31,10 @@ int s390_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
 {
     S390CPU *cpu = S390_CPU(cs);
     CPUS390XState *env = &cpu->env;
-    uint64_t val;
-    int cc_op;
 
     switch (n) {
     case S390_PSWM_REGNUM:
-        if (tcg_enabled()) {
-            cc_op = calc_cc(env, env->cc_op, env->cc_src, env->cc_dst,
-                            env->cc_vr);
-            val = deposit64(env->psw.mask, 44, 2, cc_op);
-            return gdb_get_regl(mem_buf, val);
-        }
-        return gdb_get_regl(mem_buf, env->psw.mask);
+        return gdb_get_regl(mem_buf, s390_cpu_get_psw_mask(env));
     case S390_PSWA_REGNUM:
         return gdb_get_regl(mem_buf, env->psw.addr);
     case S390_R0_REGNUM ... S390_R15_REGNUM:
@@ -59,10 +51,7 @@ int s390_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
 
     switch (n) {
     case S390_PSWM_REGNUM:
-        env->psw.mask = tmpl;
-        if (tcg_enabled()) {
-            env->cc_op = extract64(tmpl, 44, 2);
-        }
+        s390_cpu_set_psw(env, tmpl, env->psw.addr);
         break;
     case S390_PSWA_REGNUM:
         env->psw.addr = tmpl;
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index a6ec918e901..7cb1a6ec10b 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -424,6 +424,8 @@ static uint16_t base_GEN15_GA1[] = {
     S390_FEAT_MISC_INSTRUCTION_EXT3,
 };
 
+#define base_GEN16_GA1 EmptyFeat
+
 /* Full features (in order of release)
  * Automatically includes corresponding base features.
  * Full features are all features this hardware supports even if kvm/QEMU do not
@@ -567,6 +569,15 @@ static uint16_t full_GEN15_GA1[] = {
     S390_FEAT_UNPACK,
 };
 
+static uint16_t full_GEN16_GA1[] = {
+    S390_FEAT_NNPA,
+    S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2,
+    S390_FEAT_BEAR_ENH,
+    S390_FEAT_RDP,
+    S390_FEAT_PAI,
+};
+
+
 /* Default features (in order of release)
  * Automatically includes corresponding base features.
  * Default features are all features this version of QEMU supports for this
@@ -652,6 +663,14 @@ static uint16_t default_GEN15_GA1[] = {
     S390_FEAT_ETOKEN,
 };
 
+static uint16_t default_GEN16_GA1[] = {
+    S390_FEAT_NNPA,
+    S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2,
+    S390_FEAT_BEAR_ENH,
+    S390_FEAT_RDP,
+    S390_FEAT_PAI,
+};
+
 /* QEMU (CPU model) features */
 
 static uint16_t qemu_V2_11[] = {
@@ -706,20 +725,23 @@ static uint16_t qemu_V4_1[] = {
     S390_FEAT_VECTOR,
 };
 
-static uint16_t qemu_LATEST[] = {
+static uint16_t qemu_V6_0[] = {
     S390_FEAT_ACCESS_EXCEPTION_FS_INDICATION,
     S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2,
     S390_FEAT_ESOP,
 };
 
+static uint16_t qemu_LATEST[] = {
+    S390_FEAT_INSTRUCTION_EXEC_PROT,
+    S390_FEAT_MISC_INSTRUCTION_EXT2,
+    S390_FEAT_MSA_EXT_8,
+    S390_FEAT_VECTOR_ENH,
+};
+
 /* add all new definitions before this point */
 static uint16_t qemu_MAX[] = {
     /* generates a dependency warning, leave it out for now */
     S390_FEAT_MSA_EXT_5,
-    /* features introduced after the z13 */
-    S390_FEAT_INSTRUCTION_EXEC_PROT,
-    S390_FEAT_MISC_INSTRUCTION_EXT2,
-    S390_FEAT_MSA_EXT_8,
 };
 
 /****** END FEATURE DEFS ******/
@@ -782,6 +804,7 @@ static CpuFeatDefSpec CpuFeatDef[] = {
     CPU_FEAT_INITIALIZER(GEN14_GA1),
     CPU_FEAT_INITIALIZER(GEN14_GA2),
     CPU_FEAT_INITIALIZER(GEN15_GA1),
+    CPU_FEAT_INITIALIZER(GEN16_GA1),
 };
 
 #define FEAT_GROUP_INITIALIZER(_name)                  \
@@ -838,6 +861,7 @@ static FeatGroupDefSpec QemuFeatDef[] = {
     QEMU_FEAT_INITIALIZER(V3_1),
     QEMU_FEAT_INITIALIZER(V4_0),
     QEMU_FEAT_INITIALIZER(V4_1),
+    QEMU_FEAT_INITIALIZER(V6_0),
     QEMU_FEAT_INITIALIZER(LATEST),
     QEMU_FEAT_INITIALIZER(MAX),
 };
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 7678994febe..6e35473c7f8 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -1,5 +1,5 @@
 /*
- *  S/390 helpers
+ *  S/390 helpers - sysemu only
  *
  *  Copyright (c) 2009 Ulrich Hecht
  *  Copyright (c) 2011 Alexander Graf
@@ -20,19 +20,15 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "exec/gdbstub.h"
 #include "qemu/timer.h"
-#include "qemu/qemu-print.h"
 #include "hw/s390x/ioinst.h"
 #include "hw/s390x/pv.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/runstate.h"
-#ifndef CONFIG_USER_ONLY
 #include "sysemu/tcg.h"
-#endif
 
-#ifndef CONFIG_USER_ONLY
 void s390x_tod_timer(void *opaque)
 {
     cpu_inject_clock_comparator((S390CPU *) opaque);
@@ -104,44 +100,6 @@ void s390_handle_wait(S390CPU *cpu)
     }
 }
 
-void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr)
-{
-    uint64_t old_mask = env->psw.mask;
-
-    env->psw.addr = addr;
-    env->psw.mask = mask;
-
-    /* KVM will handle all WAITs and trigger a WAIT exit on disabled_wait */
-    if (!tcg_enabled()) {
-        return;
-    }
-    env->cc_op = (mask >> 44) & 3;
-
-    if ((old_mask ^ mask) & PSW_MASK_PER) {
-        s390_cpu_recompute_watchpoints(env_cpu(env));
-    }
-
-    if (mask & PSW_MASK_WAIT) {
-        s390_handle_wait(env_archcpu(env));
-    }
-}
-
-uint64_t get_psw_mask(CPUS390XState *env)
-{
-    uint64_t r = env->psw.mask;
-
-    if (tcg_enabled()) {
-        env->cc_op = calc_cc(env, env->cc_op, env->cc_src, env->cc_dst,
-                             env->cc_vr);
-
-        r &= ~PSW_MASK_CC;
-        assert(!(env->cc_op & ~3));
-        r |= (uint64_t)env->cc_op << 44;
-    }
-
-    return r;
-}
-
 LowCore *cpu_map_lowcore(CPUS390XState *env)
 {
     LowCore *lowcore;
@@ -168,7 +126,7 @@ void do_restart_interrupt(CPUS390XState *env)
 
     lowcore = cpu_map_lowcore(env);
 
-    lowcore->restart_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+    lowcore->restart_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
     lowcore->restart_old_psw.addr = cpu_to_be64(env->psw.addr);
     mask = be64_to_cpu(lowcore->restart_new_psw.mask);
     addr = be64_to_cpu(lowcore->restart_new_psw.addr);
@@ -176,7 +134,7 @@ void do_restart_interrupt(CPUS390XState *env)
     cpu_unmap_lowcore(lowcore);
     env->pending_int &= ~INTERRUPT_RESTART;
 
-    load_psw(env, mask, addr);
+    s390_cpu_set_psw(env, mask, addr);
 }
 
 void s390_cpu_recompute_watchpoints(CPUState *cs)
@@ -266,7 +224,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
         sa->grs[i] = cpu_to_be64(cpu->env.regs[i]);
     }
     sa->psw.addr = cpu_to_be64(cpu->env.psw.addr);
-    sa->psw.mask = cpu_to_be64(get_psw_mask(&cpu->env));
+    sa->psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(&cpu->env));
     sa->prefix = cpu_to_be32(cpu->env.psa);
     sa->fpc = cpu_to_be32(cpu->env.fpc);
     sa->todpr = cpu_to_be32(cpu->env.todpr);
@@ -323,110 +281,3 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len)
     cpu_physical_memory_unmap(sa, len, 1, len);
     return 0;
 }
-#endif /* CONFIG_USER_ONLY */
-
-void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-    S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
-    int i;
-
-    if (env->cc_op > 3) {
-        qemu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
-                     env->psw.mask, env->psw.addr, cc_name(env->cc_op));
-    } else {
-        qemu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
-                     env->psw.mask, env->psw.addr, env->cc_op);
-    }
-
-    for (i = 0; i < 16; i++) {
-        qemu_fprintf(f, "R%02d=%016" PRIx64, i, env->regs[i]);
-        if ((i % 4) == 3) {
-            qemu_fprintf(f, "\n");
-        } else {
-            qemu_fprintf(f, " ");
-        }
-    }
-
-    if (flags & CPU_DUMP_FPU) {
-        if (s390_has_feat(S390_FEAT_VECTOR)) {
-            for (i = 0; i < 32; i++) {
-                qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c",
-                             i, env->vregs[i][0], env->vregs[i][1],
-                             i % 2 ? '\n' : ' ');
-            }
-        } else {
-            for (i = 0; i < 16; i++) {
-                qemu_fprintf(f, "F%02d=%016" PRIx64 "%c",
-                             i, *get_freg(env, i),
-                             (i % 4) == 3 ? '\n' : ' ');
-            }
-        }
-    }
-
-#ifndef CONFIG_USER_ONLY
-    for (i = 0; i < 16; i++) {
-        qemu_fprintf(f, "C%02d=%016" PRIx64, i, env->cregs[i]);
-        if ((i % 4) == 3) {
-            qemu_fprintf(f, "\n");
-        } else {
-            qemu_fprintf(f, " ");
-        }
-    }
-#endif
-
-#ifdef DEBUG_INLINE_BRANCHES
-    for (i = 0; i < CC_OP_MAX; i++) {
-        qemu_fprintf(f, "  %15s = %10ld\t%10ld\n", cc_name(i),
-                     inline_branch_miss[i], inline_branch_hit[i]);
-    }
-#endif
-
-    qemu_fprintf(f, "\n");
-}
-
-const char *cc_name(enum cc_op cc_op)
-{
-    static const char * const cc_names[] = {
-        [CC_OP_CONST0]    = "CC_OP_CONST0",
-        [CC_OP_CONST1]    = "CC_OP_CONST1",
-        [CC_OP_CONST2]    = "CC_OP_CONST2",
-        [CC_OP_CONST3]    = "CC_OP_CONST3",
-        [CC_OP_DYNAMIC]   = "CC_OP_DYNAMIC",
-        [CC_OP_STATIC]    = "CC_OP_STATIC",
-        [CC_OP_NZ]        = "CC_OP_NZ",
-        [CC_OP_ADDU]      = "CC_OP_ADDU",
-        [CC_OP_SUBU]      = "CC_OP_SUBU",
-        [CC_OP_LTGT_32]   = "CC_OP_LTGT_32",
-        [CC_OP_LTGT_64]   = "CC_OP_LTGT_64",
-        [CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
-        [CC_OP_LTUGTU_64] = "CC_OP_LTUGTU_64",
-        [CC_OP_LTGT0_32]  = "CC_OP_LTGT0_32",
-        [CC_OP_LTGT0_64]  = "CC_OP_LTGT0_64",
-        [CC_OP_ADD_64]    = "CC_OP_ADD_64",
-        [CC_OP_SUB_64]    = "CC_OP_SUB_64",
-        [CC_OP_ABS_64]    = "CC_OP_ABS_64",
-        [CC_OP_NABS_64]   = "CC_OP_NABS_64",
-        [CC_OP_ADD_32]    = "CC_OP_ADD_32",
-        [CC_OP_SUB_32]    = "CC_OP_SUB_32",
-        [CC_OP_ABS_32]    = "CC_OP_ABS_32",
-        [CC_OP_NABS_32]   = "CC_OP_NABS_32",
-        [CC_OP_COMP_32]   = "CC_OP_COMP_32",
-        [CC_OP_COMP_64]   = "CC_OP_COMP_64",
-        [CC_OP_TM_32]     = "CC_OP_TM_32",
-        [CC_OP_TM_64]     = "CC_OP_TM_64",
-        [CC_OP_NZ_F32]    = "CC_OP_NZ_F32",
-        [CC_OP_NZ_F64]    = "CC_OP_NZ_F64",
-        [CC_OP_NZ_F128]   = "CC_OP_NZ_F128",
-        [CC_OP_ICM]       = "CC_OP_ICM",
-        [CC_OP_SLA_32]    = "CC_OP_SLA_32",
-        [CC_OP_SLA_64]    = "CC_OP_SLA_64",
-        [CC_OP_FLOGR]     = "CC_OP_FLOGR",
-        [CC_OP_LCBB]      = "CC_OP_LCBB",
-        [CC_OP_VC]        = "CC_OP_VC",
-        [CC_OP_MULS_32]   = "CC_OP_MULS_32",
-        [CC_OP_MULS_64]   = "CC_OP_MULS_64",
-    };
-
-    return cc_names[cc_op];
-}
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index d4e4f3388f8..271b081e8c4 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -64,18 +64,18 @@ DEF_HELPER_FLAGS_5(cxb, TCG_CALL_NO_WG_SE, i32, env, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
-DEF_HELPER_FLAGS_3(cgeb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_3(cgdb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_4(cgxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
-DEF_HELPER_FLAGS_3(cfeb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_3(cfdb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_4(cfxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
-DEF_HELPER_FLAGS_3(clgeb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_3(clgdb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_4(clgxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
-DEF_HELPER_FLAGS_3(clfeb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_3(clfdb, TCG_CALL_NO_WG, i64, env, i64, i32)
-DEF_HELPER_FLAGS_4(clfxb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
+DEF_HELPER_3(cgeb, i64, env, i64, i32)
+DEF_HELPER_3(cgdb, i64, env, i64, i32)
+DEF_HELPER_4(cgxb, i64, env, i64, i64, i32)
+DEF_HELPER_3(cfeb, i64, env, i64, i32)
+DEF_HELPER_3(cfdb, i64, env, i64, i32)
+DEF_HELPER_4(cfxb, i64, env, i64, i64, i32)
+DEF_HELPER_3(clgeb, i64, env, i64, i32)
+DEF_HELPER_3(clgdb, i64, env, i64, i32)
+DEF_HELPER_4(clgxb, i64, env, i64, i64, i32)
+DEF_HELPER_3(clfeb, i64, env, i64, i32)
+DEF_HELPER_3(clfdb, i64, env, i64, i32)
+DEF_HELPER_4(clfxb, i64, env, i64, i64, i32)
 DEF_HELPER_FLAGS_3(fieb, TCG_CALL_NO_WG, i64, env, i64, i32)
 DEF_HELPER_FLAGS_3(fidb, TCG_CALL_NO_WG, i64, env, i64, i32)
 DEF_HELPER_FLAGS_4(fixb, TCG_CALL_NO_WG, i64, env, i64, i64, i32)
@@ -126,6 +126,7 @@ DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env)
 DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, void, env, i64, i64)
 
 /* === Vector Support Instructions === */
+DEF_HELPER_FLAGS_4(gvec_vbperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
 DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
 DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
@@ -246,50 +247,77 @@ DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc32, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk32, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfc128, void, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_wfk128, void, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfce128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfce128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfch128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfch128_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche32_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfi128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfll64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vflr128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmax128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfmin128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfnms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
-DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vfsq128, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
-DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci32, void, ptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
-DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32)
+DEF_HELPER_4(gvec_vftci128, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
@@ -308,9 +336,9 @@ DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(stctg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_2(testblock, TCG_CALL_NO_WG, i32, env, i64)
 DEF_HELPER_FLAGS_3(tprot, TCG_CALL_NO_WG, i32, env, i64, i64)
-DEF_HELPER_FLAGS_2(iske, TCG_CALL_NO_RWG_SE, i64, env, i64)
-DEF_HELPER_FLAGS_3(sske, TCG_CALL_NO_RWG, void, env, i64, i64)
-DEF_HELPER_FLAGS_2(rrbe, TCG_CALL_NO_RWG, i32, env, i64)
+DEF_HELPER_2(iske, i64, env, i64)
+DEF_HELPER_3(sske, void, env, i64, i64)
+DEF_HELPER_2(rrbe, i32, env, i64)
 DEF_HELPER_4(mvcs, i32, env, i64, i64, i64)
 DEF_HELPER_4(mvcp, i32, env, i64, i64, i64)
 DEF_HELPER_4(sigp, i32, env, i64, i32, i32)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
deleted file mode 100644
index 11515bb6173..00000000000
--- a/target/s390x/internal.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * s390x internal definitions and helpers
- *
- * Copyright (c) 2009 Ulrich Hecht
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef S390X_INTERNAL_H
-#define S390X_INTERNAL_H
-
-#include "cpu.h"
-
-#ifndef CONFIG_USER_ONLY
-typedef struct LowCore {
-    /* prefix area: defined by architecture */
-    uint32_t        ccw1[2];                  /* 0x000 */
-    uint32_t        ccw2[4];                  /* 0x008 */
-    uint8_t         pad1[0x80 - 0x18];        /* 0x018 */
-    uint32_t        ext_params;               /* 0x080 */
-    uint16_t        cpu_addr;                 /* 0x084 */
-    uint16_t        ext_int_code;             /* 0x086 */
-    uint16_t        svc_ilen;                 /* 0x088 */
-    uint16_t        svc_code;                 /* 0x08a */
-    uint16_t        pgm_ilen;                 /* 0x08c */
-    uint16_t        pgm_code;                 /* 0x08e */
-    uint32_t        data_exc_code;            /* 0x090 */
-    uint16_t        mon_class_num;            /* 0x094 */
-    uint16_t        per_perc_atmid;           /* 0x096 */
-    uint64_t        per_address;              /* 0x098 */
-    uint8_t         exc_access_id;            /* 0x0a0 */
-    uint8_t         per_access_id;            /* 0x0a1 */
-    uint8_t         op_access_id;             /* 0x0a2 */
-    uint8_t         ar_access_id;             /* 0x0a3 */
-    uint8_t         pad2[0xA8 - 0xA4];        /* 0x0a4 */
-    uint64_t        trans_exc_code;           /* 0x0a8 */
-    uint64_t        monitor_code;             /* 0x0b0 */
-    uint16_t        subchannel_id;            /* 0x0b8 */
-    uint16_t        subchannel_nr;            /* 0x0ba */
-    uint32_t        io_int_parm;              /* 0x0bc */
-    uint32_t        io_int_word;              /* 0x0c0 */
-    uint8_t         pad3[0xc8 - 0xc4];        /* 0x0c4 */
-    uint32_t        stfl_fac_list;            /* 0x0c8 */
-    uint8_t         pad4[0xe8 - 0xcc];        /* 0x0cc */
-    uint64_t        mcic;                     /* 0x0e8 */
-    uint8_t         pad5[0xf4 - 0xf0];        /* 0x0f0 */
-    uint32_t        external_damage_code;     /* 0x0f4 */
-    uint64_t        failing_storage_address;  /* 0x0f8 */
-    uint8_t         pad6[0x110 - 0x100];      /* 0x100 */
-    uint64_t        per_breaking_event_addr;  /* 0x110 */
-    uint8_t         pad7[0x120 - 0x118];      /* 0x118 */
-    PSW             restart_old_psw;          /* 0x120 */
-    PSW             external_old_psw;         /* 0x130 */
-    PSW             svc_old_psw;              /* 0x140 */
-    PSW             program_old_psw;          /* 0x150 */
-    PSW             mcck_old_psw;             /* 0x160 */
-    PSW             io_old_psw;               /* 0x170 */
-    uint8_t         pad8[0x1a0 - 0x180];      /* 0x180 */
-    PSW             restart_new_psw;          /* 0x1a0 */
-    PSW             external_new_psw;         /* 0x1b0 */
-    PSW             svc_new_psw;              /* 0x1c0 */
-    PSW             program_new_psw;          /* 0x1d0 */
-    PSW             mcck_new_psw;             /* 0x1e0 */
-    PSW             io_new_psw;               /* 0x1f0 */
-    uint8_t         pad13[0x11b0 - 0x200];    /* 0x200 */
-
-    uint64_t        mcesad;                    /* 0x11B0 */
-
-    /* 64 bit extparam used for pfault, diag 250 etc  */
-    uint64_t        ext_params2;               /* 0x11B8 */
-
-    uint8_t         pad14[0x1200 - 0x11C0];    /* 0x11C0 */
-
-    /* System info area */
-
-    uint64_t        floating_pt_save_area[16]; /* 0x1200 */
-    uint64_t        gpregs_save_area[16];      /* 0x1280 */
-    uint32_t        st_status_fixed_logout[4]; /* 0x1300 */
-    uint8_t         pad15[0x1318 - 0x1310];    /* 0x1310 */
-    uint32_t        prefixreg_save_area;       /* 0x1318 */
-    uint32_t        fpt_creg_save_area;        /* 0x131c */
-    uint8_t         pad16[0x1324 - 0x1320];    /* 0x1320 */
-    uint32_t        tod_progreg_save_area;     /* 0x1324 */
-    uint64_t        cpu_timer_save_area;       /* 0x1328 */
-    uint64_t        clock_comp_save_area;      /* 0x1330 */
-    uint8_t         pad17[0x1340 - 0x1338];    /* 0x1338 */
-    uint32_t        access_regs_save_area[16]; /* 0x1340 */
-    uint64_t        cregs_save_area[16];       /* 0x1380 */
-
-    /* align to the top of the prefix area */
-
-    uint8_t         pad18[0x2000 - 0x1400];    /* 0x1400 */
-} QEMU_PACKED LowCore;
-QEMU_BUILD_BUG_ON(sizeof(LowCore) != 8192);
-#endif /* CONFIG_USER_ONLY */
-
-#define MAX_ILEN 6
-
-/* While the PoO talks about ILC (a number between 1-3) what is actually
-   stored in LowCore is shifted left one bit (an even between 2-6).  As
-   this is the actual length of the insn and therefore more useful, that
-   is what we want to pass around and manipulate.  To make sure that we
-   have applied this distinction universally, rename the "ILC" to "ILEN".  */
-static inline int get_ilen(uint8_t opc)
-{
-    switch (opc >> 6) {
-    case 0:
-        return 2;
-    case 1:
-    case 2:
-        return 4;
-    default:
-        return 6;
-    }
-}
-
-/* Compute the ATMID field that is stored in the per_perc_atmid lowcore
-   entry when a PER exception is triggered.  */
-static inline uint8_t get_per_atmid(CPUS390XState *env)
-{
-    return ((env->psw.mask & PSW_MASK_64) ?       (1 << 7) : 0) |
-                                                  (1 << 6)      |
-           ((env->psw.mask & PSW_MASK_32) ?       (1 << 5) : 0) |
-           ((env->psw.mask & PSW_MASK_DAT) ?      (1 << 4) : 0) |
-           ((env->psw.mask & PSW_ASC_SECONDARY) ? (1 << 3) : 0) |
-           ((env->psw.mask & PSW_ASC_ACCREG) ?    (1 << 2) : 0);
-}
-
-static inline uint64_t wrap_address(CPUS390XState *env, uint64_t a)
-{
-    if (!(env->psw.mask & PSW_MASK_64)) {
-        if (!(env->psw.mask & PSW_MASK_32)) {
-            /* 24-Bit mode */
-            a &= 0x00ffffff;
-        } else {
-            /* 31-Bit mode */
-            a &= 0x7fffffff;
-        }
-    }
-    return a;
-}
-
-/* CC optimization */
-
-/* Instead of computing the condition codes after each x86 instruction,
- * QEMU just stores the result (called CC_DST), the type of operation
- * (called CC_OP) and whatever operands are needed (CC_SRC and possibly
- * CC_VR). When the condition codes are needed, the condition codes can
- * be calculated using this information. Condition codes are not generated
- * if they are only needed for conditional branches.
- */
-enum cc_op {
-    CC_OP_CONST0 = 0,           /* CC is 0 */
-    CC_OP_CONST1,               /* CC is 1 */
-    CC_OP_CONST2,               /* CC is 2 */
-    CC_OP_CONST3,               /* CC is 3 */
-
-    CC_OP_DYNAMIC,              /* CC calculation defined by env->cc_op */
-    CC_OP_STATIC,               /* CC value is env->cc_op */
-
-    CC_OP_NZ,                   /* env->cc_dst != 0 */
-    CC_OP_ADDU,                 /* dst != 0, src = carry out (0,1) */
-    CC_OP_SUBU,                 /* dst != 0, src = borrow out (0,-1) */
-
-    CC_OP_LTGT_32,              /* signed less/greater than (32bit) */
-    CC_OP_LTGT_64,              /* signed less/greater than (64bit) */
-    CC_OP_LTUGTU_32,            /* unsigned less/greater than (32bit) */
-    CC_OP_LTUGTU_64,            /* unsigned less/greater than (64bit) */
-    CC_OP_LTGT0_32,             /* signed less/greater than 0 (32bit) */
-    CC_OP_LTGT0_64,             /* signed less/greater than 0 (64bit) */
-
-    CC_OP_ADD_64,               /* overflow on add (64bit) */
-    CC_OP_SUB_64,               /* overflow on subtraction (64bit) */
-    CC_OP_ABS_64,               /* sign eval on abs (64bit) */
-    CC_OP_NABS_64,              /* sign eval on nabs (64bit) */
-    CC_OP_MULS_64,              /* overflow on signed multiply (64bit) */
-
-    CC_OP_ADD_32,               /* overflow on add (32bit) */
-    CC_OP_SUB_32,               /* overflow on subtraction (32bit) */
-    CC_OP_ABS_32,               /* sign eval on abs (64bit) */
-    CC_OP_NABS_32,              /* sign eval on nabs (64bit) */
-    CC_OP_MULS_32,              /* overflow on signed multiply (32bit) */
-
-    CC_OP_COMP_32,              /* complement */
-    CC_OP_COMP_64,              /* complement */
-
-    CC_OP_TM_32,                /* test under mask (32bit) */
-    CC_OP_TM_64,                /* test under mask (64bit) */
-
-    CC_OP_NZ_F32,               /* FP dst != 0 (32bit) */
-    CC_OP_NZ_F64,               /* FP dst != 0 (64bit) */
-    CC_OP_NZ_F128,              /* FP dst != 0 (128bit) */
-
-    CC_OP_ICM,                  /* insert characters under mask */
-    CC_OP_SLA_32,               /* Calculate shift left signed (32bit) */
-    CC_OP_SLA_64,               /* Calculate shift left signed (64bit) */
-    CC_OP_FLOGR,                /* find leftmost one */
-    CC_OP_LCBB,                 /* load count to block boundary */
-    CC_OP_VC,                   /* vector compare result */
-    CC_OP_MAX
-};
-
-#ifndef CONFIG_USER_ONLY
-
-static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
-                                       uint8_t *ar)
-{
-    hwaddr addr = 0;
-    uint8_t reg;
-
-    reg = ipb >> 28;
-    if (reg > 0) {
-        addr = env->regs[reg];
-    }
-    addr += (ipb >> 16) & 0xfff;
-    if (ar) {
-        *ar = reg;
-    }
-
-    return addr;
-}
-
-/* Base/displacement are at the same locations. */
-#define decode_basedisp_rs decode_basedisp_s
-
-#endif /* CONFIG_USER_ONLY */
-
-/* arch_dump.c */
-int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
-                              int cpuid, void *opaque);
-
-
-/* cc_helper.c */
-const char *cc_name(enum cc_op cc_op);
-uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst,
-                 uint64_t vr);
-#ifndef CONFIG_USER_ONLY
-void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr);
-#endif /* CONFIG_USER_ONLY */
-
-
-/* cpu.c */
-#ifndef CONFIG_USER_ONLY
-unsigned int s390_cpu_halt(S390CPU *cpu);
-void s390_cpu_unhalt(S390CPU *cpu);
-#else
-static inline unsigned int s390_cpu_halt(S390CPU *cpu)
-{
-    return 0;
-}
-
-static inline void s390_cpu_unhalt(S390CPU *cpu)
-{
-}
-#endif /* CONFIG_USER_ONLY */
-
-
-/* cpu_models.c */
-void s390_cpu_model_class_register_props(ObjectClass *oc);
-void s390_realize_cpu_model(CPUState *cs, Error **errp);
-ObjectClass *s390_cpu_class_by_name(const char *name);
-
-
-/* excp_helper.c */
-void s390x_cpu_debug_excp_handler(CPUState *cs);
-void s390_cpu_do_interrupt(CPUState *cpu);
-bool s390_cpu_exec_interrupt(CPUState *cpu, int int_req);
-bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr);
-void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                   MMUAccessType access_type,
-                                   int mmu_idx, uintptr_t retaddr);
-
-
-/* fpu_helper.c */
-uint32_t set_cc_nz_f32(float32 v);
-uint32_t set_cc_nz_f64(float64 v);
-uint32_t set_cc_nz_f128(float128 v);
-#define S390_IEEE_MASK_INVALID   0x80
-#define S390_IEEE_MASK_DIVBYZERO 0x40
-#define S390_IEEE_MASK_OVERFLOW  0x20
-#define S390_IEEE_MASK_UNDERFLOW 0x10
-#define S390_IEEE_MASK_INEXACT   0x08
-#define S390_IEEE_MASK_QUANTUM   0x04
-uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
-int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
-void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
-int float_comp_to_cc(CPUS390XState *env, int float_compare);
-uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
-uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
-uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
-
-
-/* gdbstub.c */
-int s390_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
-int s390_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-void s390_cpu_gdb_init(CPUState *cs);
-
-
-/* helper.c */
-void s390_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
-void do_restart_interrupt(CPUS390XState *env);
-#ifndef CONFIG_USER_ONLY
-uint64_t get_psw_mask(CPUS390XState *env);
-void s390_cpu_recompute_watchpoints(CPUState *cs);
-void s390x_tod_timer(void *opaque);
-void s390x_cpu_timer(void *opaque);
-void s390_handle_wait(S390CPU *cpu);
-hwaddr s390_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-hwaddr s390_cpu_get_phys_addr_debug(CPUState *cpu, vaddr addr);
-#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area)
-int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch);
-int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len);
-LowCore *cpu_map_lowcore(CPUS390XState *env);
-void cpu_unmap_lowcore(LowCore *lowcore);
-#endif /* CONFIG_USER_ONLY */
-
-
-/* interrupt.c */
-void trigger_pgm_exception(CPUS390XState *env, uint32_t code);
-void cpu_inject_clock_comparator(S390CPU *cpu);
-void cpu_inject_cpu_timer(S390CPU *cpu);
-void cpu_inject_emergency_signal(S390CPU *cpu, uint16_t src_cpu_addr);
-int cpu_inject_external_call(S390CPU *cpu, uint16_t src_cpu_addr);
-bool s390_cpu_has_io_int(S390CPU *cpu);
-bool s390_cpu_has_ext_int(S390CPU *cpu);
-bool s390_cpu_has_mcck_int(S390CPU *cpu);
-bool s390_cpu_has_int(S390CPU *cpu);
-bool s390_cpu_has_restart_int(S390CPU *cpu);
-bool s390_cpu_has_stop_int(S390CPU *cpu);
-void cpu_inject_restart(S390CPU *cpu);
-void cpu_inject_stop(S390CPU *cpu);
-
-
-/* ioinst.c */
-void ioinst_handle_xsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-void ioinst_handle_csch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-void ioinst_handle_hsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
-                        uintptr_t ra);
-void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
-                        uintptr_t ra);
-void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra);
-void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
-                         uintptr_t ra);
-int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra);
-void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra);
-void ioinst_handle_schm(S390CPU *cpu, uint64_t reg1, uint64_t reg2,
-                        uint32_t ipb, uintptr_t ra);
-void ioinst_handle_rsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-void ioinst_handle_rchp(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-void ioinst_handle_sal(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
-
-
-/* mem_helper.c */
-target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr);
-void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
-                        uintptr_t ra);
-
-
-/* mmu_helper.c */
-int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
-                  target_ulong *raddr, int *flags, uint64_t *tec);
-int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
-                       target_ulong *addr, int *flags, uint64_t *tec);
-
-
-/* misc_helper.c */
-int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3);
-void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
-                     uintptr_t ra);
-
-
-/* translate.c */
-void s390x_translate_init(void);
-
-
-/* sigp.c */
-int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3);
-void do_stop_interrupt(CPUS390XState *env);
-
-#endif /* S390X_INTERNAL_H */
diff --git a/target/s390x/interrupt.c b/target/s390x/interrupt.c
index 4cdbbc8849a..5195f060ecb 100644
--- a/target/s390x/interrupt.c
+++ b/target/s390x/interrupt.c
@@ -8,15 +8,14 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "cpu.h"
-#include "kvm_s390x.h"
-#include "internal.h"
+#include "kvm/kvm_s390x.h"
+#include "s390x-internal.h"
 #include "exec/exec-all.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
 #include "hw/s390x/ioinst.h"
-#include "tcg_s390x.h"
+#include "tcg/tcg_s390x.h"
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/s390_flic.h"
 #endif
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
index 1ee11522e15..bdae5090bc8 100644
--- a/target/s390x/ioinst.c
+++ b/target/s390x/ioinst.c
@@ -12,7 +12,7 @@
 #include "qemu/osdep.h"
 
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "hw/s390x/ioinst.h"
 #include "trace.h"
 #include "hw/s390x/s390-pci-bus.h"
@@ -123,7 +123,7 @@ static int ioinst_schib_valid(SCHIB *schib)
     }
     /* for MB format 1 bits 26-31 of word 11 must be 0 */
     /* MBA uses words 10 and 11, it means align on 2**6 */
-    if ((be16_to_cpu(schib->pmcw.chars) & PMCW_CHARS_MASK_MBFC) &&
+    if ((be32_to_cpu(schib->pmcw.chars) & PMCW_CHARS_MASK_MBFC) &&
         (be64_to_cpu(schib->mba) & 0x03fUL)) {
         return 0;
     }
diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
deleted file mode 100644
index 9970b5a8c70..00000000000
--- a/target/s390x/kvm-stub.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * QEMU KVM support -- s390x specific function stubs.
- *
- * Copyright (c) 2009 Ulrich Hecht
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "kvm_s390x.h"
-
-void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code)
-{
-}
-
-int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf,
-                    int len, bool is_write)
-{
-    return -ENOSYS;
-}
-
-void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code)
-{
-}
-
-int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state)
-{
-    return -ENOSYS;
-}
-
-void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu)
-{
-}
-
-int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu)
-{
-    return 0;
-}
-
-int kvm_s390_get_hpage_1m(void)
-{
-    return 0;
-}
-
-int kvm_s390_get_ri(void)
-{
-    return 0;
-}
-
-int kvm_s390_get_gs(void)
-{
-    return 0;
-}
-
-int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_low)
-{
-    return -ENOSYS;
-}
-
-int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_low)
-{
-    return -ENOSYS;
-}
-
-int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_low)
-{
-    return -ENOSYS;
-}
-
-int kvm_s390_set_clock_ext(uint8_t tod_high, uint64_t tod_low)
-{
-    return -ENOSYS;
-}
-
-void kvm_s390_enable_css_support(S390CPU *cpu)
-{
-}
-
-int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
-                                    int vq, bool assign)
-{
-    return -ENOSYS;
-}
-
-void kvm_s390_cmma_reset(void)
-{
-}
-
-void kvm_s390_reset_vcpu_initial(S390CPU *cpu)
-{
-}
-
-void kvm_s390_reset_vcpu_clear(S390CPU *cpu)
-{
-}
-
-void kvm_s390_reset_vcpu_normal(S390CPU *cpu)
-{
-}
-
-int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit)
-{
-    return 0;
-}
-
-void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
-{
-}
-
-void kvm_s390_crypto_reset(void)
-{
-}
-
-void kvm_s390_stop_interrupt(S390CPU *cpu)
-{
-}
-
-void kvm_s390_restart_interrupt(S390CPU *cpu)
-{
-}
-
-void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info)
-{
-}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm/kvm.c
similarity index 99%
rename from target/s390x/kvm.c
rename to target/s390x/kvm/kvm.c
index 4fb3bbfef50..5b1fdb55c47 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -26,7 +26,7 @@
 
 #include "qemu-common.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "kvm_s390x.h"
 #include "sysemu/kvm_int.h"
 #include "qemu/cutils.h"
@@ -154,7 +154,6 @@ static int cap_async_pf;
 static int cap_mem_op;
 static int cap_s390_irq;
 static int cap_ri;
-static int cap_gs;
 static int cap_hpage_1m;
 static int cap_vcpu_resets;
 static int cap_protected;
@@ -369,9 +368,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
     if (cpu_model_allowed()) {
-        if (kvm_vm_enable_cap(s, KVM_CAP_S390_GS, 0) == 0) {
-            cap_gs = 1;
-        }
+        kvm_vm_enable_cap(s, KVM_CAP_S390_GS, 0);
     }
 
     /*
@@ -2039,11 +2036,6 @@ int kvm_s390_get_ri(void)
     return cap_ri;
 }
 
-int kvm_s390_get_gs(void)
-{
-    return cap_gs;
-}
-
 int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state)
 {
     struct kvm_mp_state mp_state = {};
diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
similarity index 98%
rename from target/s390x/kvm_s390x.h
rename to target/s390x/kvm/kvm_s390x.h
index 25bbe98b251..05a5e1e6f46 100644
--- a/target/s390x/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -27,7 +27,6 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
 int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
 int kvm_s390_get_hpage_1m(void);
 int kvm_s390_get_ri(void);
-int kvm_s390_get_gs(void);
 int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
 int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock);
 int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock);
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
new file mode 100644
index 00000000000..d1356356b1f
--- /dev/null
+++ b/target/s390x/kvm/meson.build
@@ -0,0 +1,17 @@
+
+s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
+  'kvm.c'
+))
+
+# Newer kernels on s390 check for an S390_PGSTE program header and
+# enable the pgste page table extensions in that case. This makes
+# the vm.allocate_pgste sysctl unnecessary. We enable this program
+# header if
+#  - we build on s390x
+#  - we build the system emulation for s390x (qemu-system-s390x)
+#  - KVM is enabled
+#  - the linker supports --s390-pgste
+if host_machine.cpu_family() == 's390x' and cc.has_link_argument('-Wl,--s390-pgste')
+  s390x_softmmu_ss.add(when: 'CONFIG_KVM',
+                       if_true: declare_dependency(link_args: ['-Wl,--s390-pgste']))
+endif
diff --git a/target/s390x/kvm/trace-events b/target/s390x/kvm/trace-events
new file mode 100644
index 00000000000..5289f5f6750
--- /dev/null
+++ b/target/s390x/kvm/trace-events
@@ -0,0 +1,7 @@
+# See docs/devel/tracing.txt for syntax documentation.
+
+# kvm.c
+kvm_enable_cmma(int rc) "CMMA: enabling with result code %d"
+kvm_clear_cmma(int rc) "CMMA: clearing with result code %d"
+kvm_failed_cpu_state_set(int cpu_index, uint8_t state, const char *msg) "Warning: Unable to set cpu %d state %" PRIu8 " to KVM: %s"
+kvm_assign_subch_ioeventfd(int fd, uint32_t addr, bool assign, int datamatch) "fd: %d sch: @0x%x assign: %d vq: %d"
diff --git a/target/s390x/kvm/trace.h b/target/s390x/kvm/trace.h
new file mode 100644
index 00000000000..ae195b13068
--- /dev/null
+++ b/target/s390x/kvm/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-target_s390x_kvm.h"
diff --git a/target/s390x/machine.c b/target/s390x/machine.c
index 5b4e82f1ab9..37a076858c7 100644
--- a/target/s390x/machine.c
+++ b/target/s390x/machine.c
@@ -16,10 +16,10 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
-#include "kvm_s390x.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
 #include "migration/vmstate.h"
-#include "tcg_s390x.h"
+#include "tcg/tcg_s390x.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
 
diff --git a/target/s390x/meson.build b/target/s390x/meson.build
index c42eadb7d24..84c1402a6a1 100644
--- a/target/s390x/meson.build
+++ b/target/s390x/meson.build
@@ -4,27 +4,10 @@ s390x_ss.add(files(
   'cpu_features.c',
   'cpu_models.c',
   'gdbstub.c',
-  'helper.c',
   'interrupt.c',
+  'cpu-dump.c',
 ))
 
-s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
-  'cc_helper.c',
-  'crypto_helper.c',
-  'excp_helper.c',
-  'fpu_helper.c',
-  'int_helper.c',
-  'mem_helper.c',
-  'misc_helper.c',
-  'translate.c',
-  'vec_fpu_helper.c',
-  'vec_helper.c',
-  'vec_int_helper.c',
-  'vec_string_helper.c',
-), if_false: files('tcg-stub.c'))
-
-s390x_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c'))
-
 gen_features = executable('gen-features', 'gen-features.c', native: true,
                           build_by_default: false)
 
@@ -37,26 +20,25 @@ s390x_ss.add(gen_features_h)
 
 s390x_softmmu_ss = ss.source_set()
 s390x_softmmu_ss.add(files(
+  'helper.c',
   'arch_dump.c',
   'diag.c',
   'ioinst.c',
   'machine.c',
   'mmu_helper.c',
   'sigp.c',
+  'cpu-sysemu.c',
+  'cpu_models_sysemu.c',
+))
+
+s390x_user_ss = ss.source_set()
+s390x_user_ss.add(files(
+  'cpu_models_user.c',
 ))
 
-# Newer kernels on s390 check for an S390_PGSTE program header and
-# enable the pgste page table extensions in that case. This makes
-# the vm.allocate_pgste sysctl unnecessary. We enable this program
-# header if
-#  - we build on s390x
-#  - we build the system emulation for s390x (qemu-system-s390x)
-#  - KVM is enabled
-#  - the linker supports --s390-pgste
-if host_machine.cpu_family() == 's390x' and cc.has_link_argument('-Wl,--s390-pgste')
-  s390x_softmmu_ss.add(when: 'CONFIG_KVM',
-                       if_true: declare_dependency(link_args: ['-Wl,--s390-pgste']))
-endif
+subdir('tcg')
+subdir('kvm')
 
 target_arch += {'s390x': s390x_ss}
 target_softmmu_arch += {'s390x': s390x_softmmu_ss}
+target_user_arch += {'s390x': s390x_user_ss}
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
deleted file mode 100644
index 7ea90d414aa..00000000000
--- a/target/s390x/misc_helper.c
+++ /dev/null
@@ -1,785 +0,0 @@
-/*
- *  S/390 misc helper routines
- *
- *  Copyright (c) 2009 Ulrich Hecht
- *  Copyright (c) 2009 Alexander Graf
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/cutils.h"
-#include "qemu/main-loop.h"
-#include "cpu.h"
-#include "internal.h"
-#include "exec/memory.h"
-#include "qemu/host-utils.h"
-#include "exec/helper-proto.h"
-#include "qemu/timer.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "qapi/error.h"
-#include "tcg_s390x.h"
-#include "s390-tod.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "sysemu/cpus.h"
-#include "sysemu/sysemu.h"
-#include "hw/s390x/ebcdic.h"
-#include "hw/s390x/s390-virtio-hcall.h"
-#include "hw/s390x/sclp.h"
-#include "hw/s390x/s390_flic.h"
-#include "hw/s390x/ioinst.h"
-#include "hw/s390x/s390-pci-inst.h"
-#include "hw/boards.h"
-#include "hw/s390x/tod.h"
-#endif
-
-/* #define DEBUG_HELPER */
-#ifdef DEBUG_HELPER
-#define HELPER_LOG(x...) qemu_log(x)
-#else
-#define HELPER_LOG(x...)
-#endif
-
-/* Raise an exception statically from a TB.  */
-void HELPER(exception)(CPUS390XState *env, uint32_t excp)
-{
-    CPUState *cs = env_cpu(env);
-
-    HELPER_LOG("%s: exception %d\n", __func__, excp);
-    cs->exception_index = excp;
-    cpu_loop_exit(cs);
-}
-
-/* Store CPU Timer (also used for EXTRACT CPU TIME) */
-uint64_t HELPER(stpt)(CPUS390XState *env)
-{
-#if defined(CONFIG_USER_ONLY)
-    /*
-     * Fake a descending CPU timer. We could get negative values here,
-     * but we don't care as it is up to the OS when to process that
-     * interrupt and reset to > 0.
-     */
-    return UINT64_MAX - (uint64_t)cpu_get_host_ticks();
-#else
-    return time2tod(env->cputm - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
-#endif
-}
-
-/* Store Clock */
-uint64_t HELPER(stck)(CPUS390XState *env)
-{
-#ifdef CONFIG_USER_ONLY
-    struct timespec ts;
-    uint64_t ns;
-
-    clock_gettime(CLOCK_REALTIME, &ts);
-    ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec;
-
-    return TOD_UNIX_EPOCH + time2tod(ns);
-#else
-    S390TODState *td = s390_get_todstate();
-    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
-    S390TOD tod;
-
-    tdc->get(td, &tod, &error_abort);
-    return tod.low;
-#endif
-}
-
-#ifndef CONFIG_USER_ONLY
-/* SCLP service call */
-uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
-{
-    qemu_mutex_lock_iothread();
-    int r = sclp_service_call(env, r1, r2);
-    qemu_mutex_unlock_iothread();
-    if (r < 0) {
-        tcg_s390_program_interrupt(env, -r, GETPC());
-    }
-    return r;
-}
-
-void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num)
-{
-    uint64_t r;
-
-    switch (num) {
-    case 0x500:
-        /* KVM hypercall */
-        qemu_mutex_lock_iothread();
-        r = s390_virtio_hypercall(env);
-        qemu_mutex_unlock_iothread();
-        break;
-    case 0x44:
-        /* yield */
-        r = 0;
-        break;
-    case 0x308:
-        /* ipl */
-        qemu_mutex_lock_iothread();
-        handle_diag_308(env, r1, r3, GETPC());
-        qemu_mutex_unlock_iothread();
-        r = 0;
-        break;
-    case 0x288:
-        /* time bomb (watchdog) */
-        r = handle_diag_288(env, r1, r3);
-        break;
-    default:
-        r = -1;
-        break;
-    }
-
-    if (r) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
-    }
-}
-
-/* Set Prefix */
-void HELPER(spx)(CPUS390XState *env, uint64_t a1)
-{
-    CPUState *cs = env_cpu(env);
-    uint32_t prefix = a1 & 0x7fffe000;
-
-    env->psa = prefix;
-    HELPER_LOG("prefix: %#x\n", prefix);
-    tlb_flush_page(cs, 0);
-    tlb_flush_page(cs, TARGET_PAGE_SIZE);
-}
-
-static void update_ckc_timer(CPUS390XState *env)
-{
-    S390TODState *td = s390_get_todstate();
-    uint64_t time;
-
-    /* stop the timer and remove pending CKC IRQs */
-    timer_del(env->tod_timer);
-    g_assert(qemu_mutex_iothread_locked());
-    env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
-
-    /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */
-    if (env->ckc == -1ULL) {
-        return;
-    }
-
-    /* difference between origins */
-    time = env->ckc - td->base.low;
-
-    /* nanoseconds */
-    time = tod2time(time);
-
-    timer_mod(env->tod_timer, time);
-}
-
-/* Set Clock Comparator */
-void HELPER(sckc)(CPUS390XState *env, uint64_t ckc)
-{
-    env->ckc = ckc;
-
-    qemu_mutex_lock_iothread();
-    update_ckc_timer(env);
-    qemu_mutex_unlock_iothread();
-}
-
-void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque)
-{
-    S390CPU *cpu = S390_CPU(cs);
-
-    update_ckc_timer(&cpu->env);
-}
-
-/* Set Clock */
-uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low)
-{
-    S390TODState *td = s390_get_todstate();
-    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
-    S390TOD tod = {
-        .high = 0,
-        .low = tod_low,
-    };
-
-    qemu_mutex_lock_iothread();
-    tdc->set(td, &tod, &error_abort);
-    qemu_mutex_unlock_iothread();
-    return 0;
-}
-
-/* Set Tod Programmable Field */
-void HELPER(sckpf)(CPUS390XState *env, uint64_t r0)
-{
-    uint32_t val = r0;
-
-    if (val & 0xffff0000) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
-    }
-    env->todpr = val;
-}
-
-/* Store Clock Comparator */
-uint64_t HELPER(stckc)(CPUS390XState *env)
-{
-    return env->ckc;
-}
-
-/* Set CPU Timer */
-void HELPER(spt)(CPUS390XState *env, uint64_t time)
-{
-    if (time == -1ULL) {
-        return;
-    }
-
-    /* nanoseconds */
-    time = tod2time(time);
-
-    env->cputm = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + time;
-
-    timer_mod(env->cpu_timer, env->cputm);
-}
-
-/* Store System Information */
-uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1)
-{
-    const uintptr_t ra = GETPC();
-    const uint32_t sel1 = r0 & STSI_R0_SEL1_MASK;
-    const uint32_t sel2 = r1 & STSI_R1_SEL2_MASK;
-    const MachineState *ms = MACHINE(qdev_get_machine());
-    uint16_t total_cpus = 0, conf_cpus = 0, reserved_cpus = 0;
-    S390CPU *cpu = env_archcpu(env);
-    SysIB sysib = { };
-    int i, cc = 0;
-
-    if ((r0 & STSI_R0_FC_MASK) > STSI_R0_FC_LEVEL_3) {
-        /* invalid function code: no other checks are performed */
-        return 3;
-    }
-
-    if ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK)) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
-    }
-
-    if ((r0 & STSI_R0_FC_MASK) == STSI_R0_FC_CURRENT) {
-        /* query the current level: no further checks are performed */
-        env->regs[0] = STSI_R0_FC_LEVEL_3;
-        return 0;
-    }
-
-    if (a0 & ~TARGET_PAGE_MASK) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
-    }
-
-    /* count the cpus and split them into configured and reserved ones */
-    for (i = 0; i < ms->possible_cpus->len; i++) {
-        total_cpus++;
-        if (ms->possible_cpus->cpus[i].cpu) {
-            conf_cpus++;
-        } else {
-            reserved_cpus++;
-        }
-    }
-
-    /*
-     * In theory, we could report Level 1 / Level 2 as current. However,
-     * the Linux kernel will detect this as running under LPAR and assume
-     * that we have a sclp linemode console (which is always present on
-     * LPAR, but not the default for QEMU), therefore not displaying boot
-     * messages and making booting a Linux kernel under TCG harder.
-     *
-     * For now we fake the same SMP configuration on all levels.
-     *
-     * TODO: We could later make the level configurable via the machine
-     *       and change defaults (linemode console) based on machine type
-     *       and accelerator.
-     */
-    switch (r0 & STSI_R0_FC_MASK) {
-    case STSI_R0_FC_LEVEL_1:
-        if ((sel1 == 1) && (sel2 == 1)) {
-            /* Basic Machine Configuration */
-            char type[5] = {};
-
-            ebcdic_put(sysib.sysib_111.manuf, "QEMU            ", 16);
-            /* same as machine type number in STORE CPU ID, but in EBCDIC */
-            snprintf(type, ARRAY_SIZE(type), "%X", cpu->model->def->type);
-            ebcdic_put(sysib.sysib_111.type, type, 4);
-            /* model number (not stored in STORE CPU ID for z/Architecure) */
-            ebcdic_put(sysib.sysib_111.model, "QEMU            ", 16);
-            ebcdic_put(sysib.sysib_111.sequence, "QEMU            ", 16);
-            ebcdic_put(sysib.sysib_111.plant, "QEMU", 4);
-        } else if ((sel1 == 2) && (sel2 == 1)) {
-            /* Basic Machine CPU */
-            ebcdic_put(sysib.sysib_121.sequence, "QEMUQEMUQEMUQEMU", 16);
-            ebcdic_put(sysib.sysib_121.plant, "QEMU", 4);
-            sysib.sysib_121.cpu_addr = cpu_to_be16(env->core_id);
-        } else if ((sel1 == 2) && (sel2 == 2)) {
-            /* Basic Machine CPUs */
-            sysib.sysib_122.capability = cpu_to_be32(0x443afc29);
-            sysib.sysib_122.total_cpus = cpu_to_be16(total_cpus);
-            sysib.sysib_122.conf_cpus = cpu_to_be16(conf_cpus);
-            sysib.sysib_122.reserved_cpus = cpu_to_be16(reserved_cpus);
-        } else {
-            cc = 3;
-        }
-        break;
-    case STSI_R0_FC_LEVEL_2:
-        if ((sel1 == 2) && (sel2 == 1)) {
-            /* LPAR CPU */
-            ebcdic_put(sysib.sysib_221.sequence, "QEMUQEMUQEMUQEMU", 16);
-            ebcdic_put(sysib.sysib_221.plant, "QEMU", 4);
-            sysib.sysib_221.cpu_addr = cpu_to_be16(env->core_id);
-        } else if ((sel1 == 2) && (sel2 == 2)) {
-            /* LPAR CPUs */
-            sysib.sysib_222.lcpuc = 0x80; /* dedicated */
-            sysib.sysib_222.total_cpus = cpu_to_be16(total_cpus);
-            sysib.sysib_222.conf_cpus = cpu_to_be16(conf_cpus);
-            sysib.sysib_222.reserved_cpus = cpu_to_be16(reserved_cpus);
-            ebcdic_put(sysib.sysib_222.name, "QEMU    ", 8);
-            sysib.sysib_222.caf = cpu_to_be32(1000);
-            sysib.sysib_222.dedicated_cpus = cpu_to_be16(conf_cpus);
-        } else {
-            cc = 3;
-        }
-        break;
-    case STSI_R0_FC_LEVEL_3:
-        if ((sel1 == 2) && (sel2 == 2)) {
-            /* VM CPUs */
-            sysib.sysib_322.count = 1;
-            sysib.sysib_322.vm[0].total_cpus = cpu_to_be16(total_cpus);
-            sysib.sysib_322.vm[0].conf_cpus = cpu_to_be16(conf_cpus);
-            sysib.sysib_322.vm[0].reserved_cpus = cpu_to_be16(reserved_cpus);
-            sysib.sysib_322.vm[0].caf = cpu_to_be32(1000);
-            /* Linux kernel uses this to distinguish us from z/VM */
-            ebcdic_put(sysib.sysib_322.vm[0].cpi, "KVM/Linux       ", 16);
-            sysib.sysib_322.vm[0].ext_name_encoding = 2; /* UTF-8 */
-
-            /* If our VM has a name, use the real name */
-            if (qemu_name) {
-                memset(sysib.sysib_322.vm[0].name, 0x40,
-                       sizeof(sysib.sysib_322.vm[0].name));
-                ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name,
-                           MIN(sizeof(sysib.sysib_322.vm[0].name),
-                               strlen(qemu_name)));
-                strpadcpy((char *)sysib.sysib_322.ext_names[0],
-                          sizeof(sysib.sysib_322.ext_names[0]),
-                          qemu_name, '\0');
-
-            } else {
-                ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8);
-                strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest");
-            }
-
-            /* add the uuid */
-            memcpy(sysib.sysib_322.vm[0].uuid, &qemu_uuid,
-                   sizeof(sysib.sysib_322.vm[0].uuid));
-        } else {
-            cc = 3;
-        }
-        break;
-    }
-
-    if (cc == 0) {
-        if (s390_cpu_virt_mem_write(cpu, a0, 0, &sysib, sizeof(sysib))) {
-            s390_cpu_virt_mem_handle_exc(cpu, ra);
-        }
-    }
-
-    return cc;
-}
-
-uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
-                      uint32_t r3)
-{
-    int cc;
-
-    /* TODO: needed to inject interrupts  - push further down */
-    qemu_mutex_lock_iothread();
-    cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3);
-    qemu_mutex_unlock_iothread();
-
-    return cc;
-}
-#endif
-
-#ifndef CONFIG_USER_ONLY
-void HELPER(xsch)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_xsch(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(csch)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_csch(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(hsch)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_hsch(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_msch(cpu, r1, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(rchp)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_rchp(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(rsch)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_rsch(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(sal)(CPUS390XState *env, uint64_t r1)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    ioinst_handle_sal(cpu, r1, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(stcrw)(CPUS390XState *env, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    ioinst_handle_stcrw(cpu, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr)
-{
-    const uintptr_t ra = GETPC();
-    S390CPU *cpu = env_archcpu(env);
-    QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
-    QEMUS390FlicIO *io = NULL;
-    LowCore *lowcore;
-
-    if (addr & 0x3) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
-    }
-
-    qemu_mutex_lock_iothread();
-    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
-    if (!io) {
-        qemu_mutex_unlock_iothread();
-        return 0;
-    }
-
-    if (addr) {
-        struct {
-            uint16_t id;
-            uint16_t nr;
-            uint32_t parm;
-        } intc = {
-            .id = cpu_to_be16(io->id),
-            .nr = cpu_to_be16(io->nr),
-            .parm = cpu_to_be32(io->parm),
-        };
-
-        if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) {
-            /* writing failed, reinject and properly clean up */
-            s390_io_interrupt(io->id, io->nr, io->parm, io->word);
-            qemu_mutex_unlock_iothread();
-            g_free(io);
-            s390_cpu_virt_mem_handle_exc(cpu, ra);
-            return 0;
-        }
-    } else {
-        /* no protection applies */
-        lowcore = cpu_map_lowcore(env);
-        lowcore->subchannel_id = cpu_to_be16(io->id);
-        lowcore->subchannel_nr = cpu_to_be16(io->nr);
-        lowcore->io_int_parm = cpu_to_be32(io->parm);
-        lowcore->io_int_word = cpu_to_be32(io->word);
-        cpu_unmap_lowcore(lowcore);
-    }
-
-    g_free(io);
-    qemu_mutex_unlock_iothread();
-    return 1;
-}
-
-void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(chsc)(CPUS390XState *env, uint64_t inst)
-{
-    S390CPU *cpu = env_archcpu(env);
-    qemu_mutex_lock_iothread();
-    ioinst_handle_chsc(cpu, inst >> 16, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-#endif
-
-#ifndef CONFIG_USER_ONLY
-void HELPER(per_check_exception)(CPUS390XState *env)
-{
-    if (env->per_perc_atmid) {
-        tcg_s390_program_interrupt(env, PGM_PER, GETPC());
-    }
-}
-
-/* Check if an address is within the PER starting address and the PER
-   ending address.  The address range might loop.  */
-static inline bool get_per_in_range(CPUS390XState *env, uint64_t addr)
-{
-    if (env->cregs[10] <= env->cregs[11]) {
-        return env->cregs[10] <= addr && addr <= env->cregs[11];
-    } else {
-        return env->cregs[10] <= addr || addr <= env->cregs[11];
-    }
-}
-
-void HELPER(per_branch)(CPUS390XState *env, uint64_t from, uint64_t to)
-{
-    if ((env->cregs[9] & PER_CR9_EVENT_BRANCH)) {
-        if (!(env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS)
-            || get_per_in_range(env, to)) {
-            env->per_address = from;
-            env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env);
-        }
-    }
-}
-
-void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr)
-{
-    if ((env->cregs[9] & PER_CR9_EVENT_IFETCH) && get_per_in_range(env, addr)) {
-        env->per_address = addr;
-        env->per_perc_atmid = PER_CODE_EVENT_IFETCH | get_per_atmid(env);
-
-        /* If the instruction has to be nullified, trigger the
-           exception immediately. */
-        if (env->cregs[9] & PER_CR9_EVENT_NULLIFICATION) {
-            CPUState *cs = env_cpu(env);
-
-            env->per_perc_atmid |= PER_CODE_EVENT_NULLIFICATION;
-            env->int_pgm_code = PGM_PER;
-            env->int_pgm_ilen = get_ilen(cpu_ldub_code(env, addr));
-
-            cs->exception_index = EXCP_PGM;
-            cpu_loop_exit(cs);
-        }
-    }
-}
-
-void HELPER(per_store_real)(CPUS390XState *env)
-{
-    if ((env->cregs[9] & PER_CR9_EVENT_STORE) &&
-        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
-        /* PSW is saved just before calling the helper.  */
-        env->per_address = env->psw.addr;
-        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
-    }
-}
-#endif
-
-static uint8_t stfl_bytes[2048];
-static unsigned int used_stfl_bytes;
-
-static void prepare_stfl(void)
-{
-    static bool initialized;
-    int i;
-
-    /* racy, but we don't care, the same values are always written */
-    if (initialized) {
-        return;
-    }
-
-    s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes);
-    for (i = 0; i < sizeof(stfl_bytes); i++) {
-        if (stfl_bytes[i]) {
-            used_stfl_bytes = i + 1;
-        }
-    }
-    initialized = true;
-}
-
-#ifndef CONFIG_USER_ONLY
-void HELPER(stfl)(CPUS390XState *env)
-{
-    LowCore *lowcore;
-
-    lowcore = cpu_map_lowcore(env);
-    prepare_stfl();
-    memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list));
-    cpu_unmap_lowcore(lowcore);
-}
-#endif
-
-uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
-{
-    const uintptr_t ra = GETPC();
-    const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
-    int max_bytes;
-    int i;
-
-    if (addr & 0x7) {
-        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
-    }
-
-    prepare_stfl();
-    max_bytes = ROUND_UP(used_stfl_bytes, 8);
-
-    /*
-     * The PoP says that doublewords beyond the highest-numbered facility
-     * bit may or may not be stored.  However, existing hardware appears to
-     * not store the words, and existing software depend on that.
-     */
-    for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
-        cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
-    }
-
-    env->regs[0] = deposit64(env->regs[0], 0, 8, (max_bytes / 8) - 1);
-    return count_bytes >= max_bytes ? 0 : 3;
-}
-
-#ifndef CONFIG_USER_ONLY
-/*
- * Note: we ignore any return code of the functions called for the pci
- * instructions, as the only time they return !0 is when the stub is
- * called, and in that case we didn't even offer the zpci facility.
- * The only exception is SIC, where program checks need to be handled
- * by the caller.
- */
-void HELPER(clp)(CPUS390XState *env, uint32_t r2)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    clp_service_call(cpu, r2, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    pcilg_service_call(cpu, r1, r2, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    pcistg_service_call(cpu, r1, r2, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
-                     uint32_t ar)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    stpcifc_service_call(cpu, r1, fiba, ar, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3)
-{
-    int r;
-
-    qemu_mutex_lock_iothread();
-    r = css_do_sic(env, (r3 >> 27) & 0x7, r1 & 0xffff);
-    qemu_mutex_unlock_iothread();
-    /* css_do_sic() may actually return a PGM_xxx value to inject */
-    if (r) {
-        tcg_s390_program_interrupt(env, -r, GETPC());
-    }
-}
-
-void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    rpcit_service_call(cpu, r1, r2, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3,
-                    uint64_t gaddr, uint32_t ar)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-
-void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
-                    uint32_t ar)
-{
-    S390CPU *cpu = env_archcpu(env);
-
-    qemu_mutex_lock_iothread();
-    mpcifc_service_call(cpu, r1, fiba, ar, GETPC());
-    qemu_mutex_unlock_iothread();
-}
-#endif
diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c
index d492b23a177..b04b57c2356 100644
--- a/target/s390x/mmu_helper.c
+++ b/target/s390x/mmu_helper.c
@@ -19,8 +19,8 @@
 #include "qemu/error-report.h"
 #include "exec/address-spaces.h"
 #include "cpu.h"
-#include "internal.h"
-#include "kvm_s390x.h"
+#include "s390x-internal.h"
+#include "kvm/kvm_s390x.h"
 #include "sysemu/kvm.h"
 #include "sysemu/tcg.h"
 #include "exec/exec-all.h"
@@ -94,6 +94,14 @@ target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr)
     return raddr;
 }
 
+bool mmu_absolute_addr_valid(target_ulong addr, bool is_write)
+{
+    return address_space_access_valid(&address_space_memory,
+                                      addr & TARGET_PAGE_MASK,
+                                      TARGET_PAGE_SIZE, is_write,
+                                      MEMTXATTRS_UNSPECIFIED);
+}
+
 static inline bool read_table_entry(CPUS390XState *env, hwaddr gaddr,
                                     uint64_t *entry)
 {
@@ -117,7 +125,7 @@ static inline bool read_table_entry(CPUS390XState *env, hwaddr gaddr,
 
 static int mmu_translate_asce(CPUS390XState *env, target_ulong vaddr,
                               uint64_t asc, uint64_t asce, target_ulong *raddr,
-                              int *flags, int rw)
+                              int *flags)
 {
     const bool edat1 = (env->cregs[0] & CR0_EDAT) &&
                        s390_has_feat(S390_FEAT_EDAT);
@@ -293,19 +301,26 @@ static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
 {
     static S390SKeysClass *skeyclass;
     static S390SKeysState *ss;
-    MachineState *ms = MACHINE(qdev_get_machine());
-    uint8_t key;
+    uint8_t key, old_key;
     int rc;
 
-    if (unlikely(addr >= ms->ram_size)) {
-        return;
-    }
-
+    /*
+     * We expect to be called with an absolute address that has already been
+     * validated, such that we can reliably use it to lookup the storage key.
+     */
     if (unlikely(!ss)) {
         ss = s390_get_skeys_device();
         skeyclass = S390_SKEYS_GET_CLASS(ss);
     }
 
+    /*
+     * Don't enable storage keys if they are still disabled, i.e., no actual
+     * storage key instruction was issued yet.
+     */
+    if (!skeyclass->skeys_are_enabled(ss)) {
+        return;
+    }
+
     /*
      * Whenever we create a new TLB entry, we set the storage key reference
      * bit. In case we allow write accesses, we set the storage key change
@@ -330,6 +345,7 @@ static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
         trace_get_skeys_nonzero(rc);
         return;
     }
+    old_key = key;
 
     switch (rw) {
     case MMU_DATA_LOAD:
@@ -353,20 +369,23 @@ static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
     /* Any store/fetch sets the reference bit */
     key |= SK_R;
 
-    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
-    if (rc) {
-        trace_set_skeys_nonzero(rc);
+    if (key != old_key) {
+        rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+        if (rc) {
+            trace_set_skeys_nonzero(rc);
+        }
     }
 }
 
 /**
  * Translate a virtual (logical) address into a physical (absolute) address.
  * @param vaddr  the virtual address
- * @param rw     0 = read, 1 = write, 2 = code fetch
+ * @param rw     0 = read, 1 = write, 2 = code fetch, < 0 = load real address
  * @param asc    address space control (one of the PSW_ASC_* modes)
  * @param raddr  the translated address is stored to this pointer
  * @param flags  the PAGE_READ/WRITE/EXEC flags are stored to this pointer
- * @param exc    true = inject a program check if a fault occurred
+ * @param tec    the translation exception code if stored to this pointer if
+ *               there is an exception to raise
  * @return       0 = success, != 0, the exception to raise
  */
 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
@@ -420,7 +439,7 @@ int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
     }
 
     /* perform the DAT translation */
-    r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags, rw);
+    r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags);
     if (unlikely(r)) {
         return r;
     }
@@ -440,10 +459,17 @@ int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
     }
 
 nodat:
-    /* Convert real address -> absolute address */
-    *raddr = mmu_real2abs(env, *raddr);
+    if (rw >= 0) {
+        /* Convert real address -> absolute address */
+        *raddr = mmu_real2abs(env, *raddr);
 
-    mmu_handle_skey(*raddr, rw, flags);
+        if (!mmu_absolute_addr_valid(*raddr, rw == MMU_DATA_STORE)) {
+            *tec = 0; /* unused */
+            return PGM_ADDRESSING;
+        }
+
+        mmu_handle_skey(*raddr, rw, flags);
+    }
     return 0;
 }
 
@@ -464,12 +490,6 @@ static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages,
         if (ret) {
             return ret;
         }
-        if (!address_space_access_valid(&address_space_memory, pages[i],
-                                        TARGET_PAGE_SIZE, is_write,
-                                        MEMTXATTRS_UNSPECIFIED)) {
-            *tec = 0; /* unused */
-            return PGM_ADDRESSING;
-        }
         addr += TARGET_PAGE_SIZE;
     }
 
@@ -579,6 +599,12 @@ int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
 
     *addr = mmu_real2abs(env, raddr & TARGET_PAGE_MASK);
 
+    if (!mmu_absolute_addr_valid(*addr, rw == MMU_DATA_STORE)) {
+        /* unused */
+        *tec = 0;
+        return PGM_ADDRESSING;
+    }
+
     mmu_handle_skey(*addr, rw, flags);
     return 0;
 }
diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h
new file mode 100644
index 00000000000..1a178aed416
--- /dev/null
+++ b/target/s390x/s390x-internal.h
@@ -0,0 +1,408 @@
+/*
+ * s390x internal definitions and helpers
+ *
+ * Copyright (c) 2009 Ulrich Hecht
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef S390X_INTERNAL_H
+#define S390X_INTERNAL_H
+
+#include "cpu.h"
+
+#ifndef CONFIG_USER_ONLY
+typedef struct LowCore {
+    /* prefix area: defined by architecture */
+    uint32_t        ccw1[2];                  /* 0x000 */
+    uint32_t        ccw2[4];                  /* 0x008 */
+    uint8_t         pad1[0x80 - 0x18];        /* 0x018 */
+    uint32_t        ext_params;               /* 0x080 */
+    uint16_t        cpu_addr;                 /* 0x084 */
+    uint16_t        ext_int_code;             /* 0x086 */
+    uint16_t        svc_ilen;                 /* 0x088 */
+    uint16_t        svc_code;                 /* 0x08a */
+    uint16_t        pgm_ilen;                 /* 0x08c */
+    uint16_t        pgm_code;                 /* 0x08e */
+    uint32_t        data_exc_code;            /* 0x090 */
+    uint16_t        mon_class_num;            /* 0x094 */
+    uint16_t        per_perc_atmid;           /* 0x096 */
+    uint64_t        per_address;              /* 0x098 */
+    uint8_t         exc_access_id;            /* 0x0a0 */
+    uint8_t         per_access_id;            /* 0x0a1 */
+    uint8_t         op_access_id;             /* 0x0a2 */
+    uint8_t         ar_access_id;             /* 0x0a3 */
+    uint8_t         pad2[0xA8 - 0xA4];        /* 0x0a4 */
+    uint64_t        trans_exc_code;           /* 0x0a8 */
+    uint64_t        monitor_code;             /* 0x0b0 */
+    uint16_t        subchannel_id;            /* 0x0b8 */
+    uint16_t        subchannel_nr;            /* 0x0ba */
+    uint32_t        io_int_parm;              /* 0x0bc */
+    uint32_t        io_int_word;              /* 0x0c0 */
+    uint8_t         pad3[0xc8 - 0xc4];        /* 0x0c4 */
+    uint32_t        stfl_fac_list;            /* 0x0c8 */
+    uint8_t         pad4[0xe8 - 0xcc];        /* 0x0cc */
+    uint64_t        mcic;                     /* 0x0e8 */
+    uint8_t         pad5[0xf4 - 0xf0];        /* 0x0f0 */
+    uint32_t        external_damage_code;     /* 0x0f4 */
+    uint64_t        failing_storage_address;  /* 0x0f8 */
+    uint8_t         pad6[0x110 - 0x100];      /* 0x100 */
+    uint64_t        per_breaking_event_addr;  /* 0x110 */
+    uint8_t         pad7[0x120 - 0x118];      /* 0x118 */
+    PSW             restart_old_psw;          /* 0x120 */
+    PSW             external_old_psw;         /* 0x130 */
+    PSW             svc_old_psw;              /* 0x140 */
+    PSW             program_old_psw;          /* 0x150 */
+    PSW             mcck_old_psw;             /* 0x160 */
+    PSW             io_old_psw;               /* 0x170 */
+    uint8_t         pad8[0x1a0 - 0x180];      /* 0x180 */
+    PSW             restart_new_psw;          /* 0x1a0 */
+    PSW             external_new_psw;         /* 0x1b0 */
+    PSW             svc_new_psw;              /* 0x1c0 */
+    PSW             program_new_psw;          /* 0x1d0 */
+    PSW             mcck_new_psw;             /* 0x1e0 */
+    PSW             io_new_psw;               /* 0x1f0 */
+    uint8_t         pad13[0x11b0 - 0x200];    /* 0x200 */
+
+    uint64_t        mcesad;                    /* 0x11B0 */
+
+    /* 64 bit extparam used for pfault, diag 250 etc  */
+    uint64_t        ext_params2;               /* 0x11B8 */
+
+    uint8_t         pad14[0x1200 - 0x11C0];    /* 0x11C0 */
+
+    /* System info area */
+
+    uint64_t        floating_pt_save_area[16]; /* 0x1200 */
+    uint64_t        gpregs_save_area[16];      /* 0x1280 */
+    uint32_t        st_status_fixed_logout[4]; /* 0x1300 */
+    uint8_t         pad15[0x1318 - 0x1310];    /* 0x1310 */
+    uint32_t        prefixreg_save_area;       /* 0x1318 */
+    uint32_t        fpt_creg_save_area;        /* 0x131c */
+    uint8_t         pad16[0x1324 - 0x1320];    /* 0x1320 */
+    uint32_t        tod_progreg_save_area;     /* 0x1324 */
+    uint64_t        cpu_timer_save_area;       /* 0x1328 */
+    uint64_t        clock_comp_save_area;      /* 0x1330 */
+    uint8_t         pad17[0x1340 - 0x1338];    /* 0x1338 */
+    uint32_t        access_regs_save_area[16]; /* 0x1340 */
+    uint64_t        cregs_save_area[16];       /* 0x1380 */
+
+    /* align to the top of the prefix area */
+
+    uint8_t         pad18[0x2000 - 0x1400];    /* 0x1400 */
+} QEMU_PACKED LowCore;
+QEMU_BUILD_BUG_ON(sizeof(LowCore) != 8192);
+#endif /* CONFIG_USER_ONLY */
+
+#define MAX_ILEN 6
+
+/* While the PoO talks about ILC (a number between 1-3) what is actually
+   stored in LowCore is shifted left one bit (an even between 2-6).  As
+   this is the actual length of the insn and therefore more useful, that
+   is what we want to pass around and manipulate.  To make sure that we
+   have applied this distinction universally, rename the "ILC" to "ILEN".  */
+static inline int get_ilen(uint8_t opc)
+{
+    switch (opc >> 6) {
+    case 0:
+        return 2;
+    case 1:
+    case 2:
+        return 4;
+    default:
+        return 6;
+    }
+}
+
+/* Compute the ATMID field that is stored in the per_perc_atmid lowcore
+   entry when a PER exception is triggered.  */
+static inline uint8_t get_per_atmid(CPUS390XState *env)
+{
+    return ((env->psw.mask & PSW_MASK_64) ?       (1 << 7) : 0) |
+                                                  (1 << 6)      |
+           ((env->psw.mask & PSW_MASK_32) ?       (1 << 5) : 0) |
+           ((env->psw.mask & PSW_MASK_DAT) ?      (1 << 4) : 0) |
+           ((env->psw.mask & PSW_ASC_SECONDARY) ? (1 << 3) : 0) |
+           ((env->psw.mask & PSW_ASC_ACCREG) ?    (1 << 2) : 0);
+}
+
+static inline uint64_t wrap_address(CPUS390XState *env, uint64_t a)
+{
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        if (!(env->psw.mask & PSW_MASK_32)) {
+            /* 24-Bit mode */
+            a &= 0x00ffffff;
+        } else {
+            /* 31-Bit mode */
+            a &= 0x7fffffff;
+        }
+    }
+    return a;
+}
+
+/* CC optimization */
+
+/* Instead of computing the condition codes after each x86 instruction,
+ * QEMU just stores the result (called CC_DST), the type of operation
+ * (called CC_OP) and whatever operands are needed (CC_SRC and possibly
+ * CC_VR). When the condition codes are needed, the condition codes can
+ * be calculated using this information. Condition codes are not generated
+ * if they are only needed for conditional branches.
+ */
+enum cc_op {
+    CC_OP_CONST0 = 0,           /* CC is 0 */
+    CC_OP_CONST1,               /* CC is 1 */
+    CC_OP_CONST2,               /* CC is 2 */
+    CC_OP_CONST3,               /* CC is 3 */
+
+    CC_OP_DYNAMIC,              /* CC calculation defined by env->cc_op */
+    CC_OP_STATIC,               /* CC value is env->cc_op */
+
+    CC_OP_NZ,                   /* env->cc_dst != 0 */
+    CC_OP_ADDU,                 /* dst != 0, src = carry out (0,1) */
+    CC_OP_SUBU,                 /* dst != 0, src = borrow out (0,-1) */
+
+    CC_OP_LTGT_32,              /* signed less/greater than (32bit) */
+    CC_OP_LTGT_64,              /* signed less/greater than (64bit) */
+    CC_OP_LTUGTU_32,            /* unsigned less/greater than (32bit) */
+    CC_OP_LTUGTU_64,            /* unsigned less/greater than (64bit) */
+    CC_OP_LTGT0_32,             /* signed less/greater than 0 (32bit) */
+    CC_OP_LTGT0_64,             /* signed less/greater than 0 (64bit) */
+
+    CC_OP_ADD_64,               /* overflow on add (64bit) */
+    CC_OP_SUB_64,               /* overflow on subtraction (64bit) */
+    CC_OP_ABS_64,               /* sign eval on abs (64bit) */
+    CC_OP_NABS_64,              /* sign eval on nabs (64bit) */
+    CC_OP_MULS_64,              /* overflow on signed multiply (64bit) */
+
+    CC_OP_ADD_32,               /* overflow on add (32bit) */
+    CC_OP_SUB_32,               /* overflow on subtraction (32bit) */
+    CC_OP_ABS_32,               /* sign eval on abs (64bit) */
+    CC_OP_NABS_32,              /* sign eval on nabs (64bit) */
+    CC_OP_MULS_32,              /* overflow on signed multiply (32bit) */
+
+    CC_OP_COMP_32,              /* complement */
+    CC_OP_COMP_64,              /* complement */
+
+    CC_OP_TM_32,                /* test under mask (32bit) */
+    CC_OP_TM_64,                /* test under mask (64bit) */
+
+    CC_OP_NZ_F32,               /* FP dst != 0 (32bit) */
+    CC_OP_NZ_F64,               /* FP dst != 0 (64bit) */
+    CC_OP_NZ_F128,              /* FP dst != 0 (128bit) */
+
+    CC_OP_ICM,                  /* insert characters under mask */
+    CC_OP_SLA_32,               /* Calculate shift left signed (32bit) */
+    CC_OP_SLA_64,               /* Calculate shift left signed (64bit) */
+    CC_OP_FLOGR,                /* find leftmost one */
+    CC_OP_LCBB,                 /* load count to block boundary */
+    CC_OP_VC,                   /* vector compare result */
+    CC_OP_MAX
+};
+
+#ifndef CONFIG_USER_ONLY
+
+static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
+                                       uint8_t *ar)
+{
+    hwaddr addr = 0;
+    uint8_t reg;
+
+    reg = ipb >> 28;
+    if (reg > 0) {
+        addr = env->regs[reg];
+    }
+    addr += (ipb >> 16) & 0xfff;
+    if (ar) {
+        *ar = reg;
+    }
+
+    return addr;
+}
+
+/* Base/displacement are at the same locations. */
+#define decode_basedisp_rs decode_basedisp_s
+
+#endif /* CONFIG_USER_ONLY */
+
+/* arch_dump.c */
+int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
+                              int cpuid, void *opaque);
+
+
+/* cc_helper.c */
+const char *cc_name(enum cc_op cc_op);
+uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst,
+                 uint64_t vr);
+
+/* cpu.c */
+#ifndef CONFIG_USER_ONLY
+unsigned int s390_cpu_halt(S390CPU *cpu);
+void s390_cpu_unhalt(S390CPU *cpu);
+void s390_cpu_init_sysemu(Object *obj);
+bool s390_cpu_realize_sysemu(DeviceState *dev, Error **errp);
+void s390_cpu_finalize(Object *obj);
+void s390_cpu_class_init_sysemu(CPUClass *cc);
+void s390_cpu_machine_reset_cb(void *opaque);
+
+#else
+static inline unsigned int s390_cpu_halt(S390CPU *cpu)
+{
+    return 0;
+}
+
+static inline void s390_cpu_unhalt(S390CPU *cpu)
+{
+}
+#endif /* CONFIG_USER_ONLY */
+
+
+/* cpu_models.c */
+void s390_cpu_model_class_register_props(ObjectClass *oc);
+void s390_realize_cpu_model(CPUState *cs, Error **errp);
+S390CPUModel *get_max_cpu_model(Error **errp);
+void apply_cpu_model(const S390CPUModel *model, Error **errp);
+ObjectClass *s390_cpu_class_by_name(const char *name);
+
+
+/* excp_helper.c */
+void s390x_cpu_debug_excp_handler(CPUState *cs);
+void s390_cpu_do_interrupt(CPUState *cpu);
+bool s390_cpu_exec_interrupt(CPUState *cpu, int int_req);
+
+#ifdef CONFIG_USER_ONLY
+void s390_cpu_record_sigsegv(CPUState *cs, vaddr address,
+                             MMUAccessType access_type,
+                             bool maperr, uintptr_t retaddr);
+void s390_cpu_record_sigbus(CPUState *cs, vaddr address,
+                            MMUAccessType access_type, uintptr_t retaddr);
+#else
+bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr);
+void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type, int mmu_idx,
+                                   uintptr_t retaddr) QEMU_NORETURN;
+#endif
+
+
+/* fpu_helper.c */
+uint32_t set_cc_nz_f32(float32 v);
+uint32_t set_cc_nz_f64(float64 v);
+uint32_t set_cc_nz_f128(float128 v);
+#define S390_IEEE_MASK_INVALID   0x80
+#define S390_IEEE_MASK_DIVBYZERO 0x40
+#define S390_IEEE_MASK_OVERFLOW  0x20
+#define S390_IEEE_MASK_UNDERFLOW 0x10
+#define S390_IEEE_MASK_INEXACT   0x08
+#define S390_IEEE_MASK_QUANTUM   0x04
+uint8_t s390_softfloat_exc_to_ieee(unsigned int exc);
+int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3);
+void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode);
+int float_comp_to_cc(CPUS390XState *env, int float_compare);
+
+#define DCMASK_ZERO             0x0c00
+#define DCMASK_NORMAL           0x0300
+#define DCMASK_SUBNORMAL        0x00c0
+#define DCMASK_INFINITY         0x0030
+#define DCMASK_QUIET_NAN        0x000c
+#define DCMASK_SIGNALING_NAN    0x0003
+#define DCMASK_NAN              0x000f
+#define DCMASK_NEGATIVE         0x0555
+uint16_t float32_dcmask(CPUS390XState *env, float32 f1);
+uint16_t float64_dcmask(CPUS390XState *env, float64 f1);
+uint16_t float128_dcmask(CPUS390XState *env, float128 f1);
+
+
+/* gdbstub.c */
+int s390_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
+int s390_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+void s390_cpu_gdb_init(CPUState *cs);
+
+
+/* helper.c */
+void s390_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
+void do_restart_interrupt(CPUS390XState *env);
+#ifndef CONFIG_USER_ONLY
+void s390_cpu_recompute_watchpoints(CPUState *cs);
+void s390x_tod_timer(void *opaque);
+void s390x_cpu_timer(void *opaque);
+void s390_handle_wait(S390CPU *cpu);
+hwaddr s390_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
+hwaddr s390_cpu_get_phys_addr_debug(CPUState *cpu, vaddr addr);
+#define S390_STORE_STATUS_DEF_ADDR offsetof(LowCore, floating_pt_save_area)
+int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch);
+int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len);
+LowCore *cpu_map_lowcore(CPUS390XState *env);
+void cpu_unmap_lowcore(LowCore *lowcore);
+#endif /* CONFIG_USER_ONLY */
+
+
+/* interrupt.c */
+void trigger_pgm_exception(CPUS390XState *env, uint32_t code);
+void cpu_inject_clock_comparator(S390CPU *cpu);
+void cpu_inject_cpu_timer(S390CPU *cpu);
+void cpu_inject_emergency_signal(S390CPU *cpu, uint16_t src_cpu_addr);
+int cpu_inject_external_call(S390CPU *cpu, uint16_t src_cpu_addr);
+bool s390_cpu_has_io_int(S390CPU *cpu);
+bool s390_cpu_has_ext_int(S390CPU *cpu);
+bool s390_cpu_has_mcck_int(S390CPU *cpu);
+bool s390_cpu_has_int(S390CPU *cpu);
+bool s390_cpu_has_restart_int(S390CPU *cpu);
+bool s390_cpu_has_stop_int(S390CPU *cpu);
+void cpu_inject_restart(S390CPU *cpu);
+void cpu_inject_stop(S390CPU *cpu);
+
+
+/* ioinst.c */
+void ioinst_handle_xsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+void ioinst_handle_csch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+void ioinst_handle_hsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
+                        uintptr_t ra);
+void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
+                        uintptr_t ra);
+void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra);
+void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb,
+                         uintptr_t ra);
+int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra);
+void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra);
+void ioinst_handle_schm(S390CPU *cpu, uint64_t reg1, uint64_t reg2,
+                        uint32_t ipb, uintptr_t ra);
+void ioinst_handle_rsch(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+void ioinst_handle_rchp(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+void ioinst_handle_sal(S390CPU *cpu, uint64_t reg1, uintptr_t ra);
+
+
+/* mem_helper.c */
+target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr);
+void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
+                        uintptr_t ra);
+
+
+/* mmu_helper.c */
+bool mmu_absolute_addr_valid(target_ulong addr, bool is_write);
+/* Special access mode only valid for mmu_translate() */
+#define MMU_S390_LRA        -1
+int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
+                  target_ulong *raddr, int *flags, uint64_t *tec);
+int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
+                       target_ulong *addr, int *flags, uint64_t *tec);
+
+
+/* misc_helper.c */
+int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3);
+void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3,
+                     uintptr_t ra);
+
+
+/* translate.c */
+void s390x_translate_init(void);
+
+
+/* sigp.c */
+int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3);
+void do_stop_interrupt(CPUS390XState *env);
+
+#endif /* S390X_INTERNAL_H */
diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c
index c604f177100..51c727834c0 100644
--- a/target/s390x/sigp.c
+++ b/target/s390x/sigp.c
@@ -10,7 +10,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/runstate.h"
 #include "exec/address-spaces.h"
@@ -235,7 +235,8 @@ static void sigp_restart(CPUState *cs, run_on_cpu_data arg)
         cpu_synchronize_state(cs);
         /*
          * Set OPERATING (and unhalting) before loading the restart PSW.
-         * load_psw() will then properly halt the CPU again if necessary (TCG).
+         * s390_cpu_set_psw() will then properly halt the CPU again if
+         * necessary (TCG).
          */
         s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu);
         do_restart_interrupt(&cpu->env);
@@ -427,26 +428,10 @@ static int handle_sigp_single_dst(S390CPU *cpu, S390CPU *dst_cpu, uint8_t order,
 static int sigp_set_architecture(S390CPU *cpu, uint32_t param,
                                  uint64_t *status_reg)
 {
-    CPUState *cur_cs;
-    S390CPU *cur_cpu;
-    bool all_stopped = true;
-
-    CPU_FOREACH(cur_cs) {
-        cur_cpu = S390_CPU(cur_cs);
-
-        if (cur_cpu == cpu) {
-            continue;
-        }
-        if (s390_cpu_get_state(cur_cpu) != S390_CPU_STATE_STOPPED) {
-            all_stopped = false;
-        }
-    }
-
     *status_reg &= 0xffffffff00000000ULL;
 
     /* Reject set arch order, with czam we're always in z/Arch mode. */
-    *status_reg |= (all_stopped ? SIGP_STAT_INVALID_PARAMETER :
-                    SIGP_STAT_INCORRECT_STATE);
+    *status_reg |= SIGP_STAT_INVALID_PARAMETER;
     return SIGP_CC_STATUS_STORED;
 }
 
diff --git a/target/s390x/tcg-stub.c b/target/s390x/tcg-stub.c
deleted file mode 100644
index d22c898802f..00000000000
--- a/target/s390x/tcg-stub.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * QEMU TCG support -- s390x specific function stubs.
- *
- * Copyright (C) 2018 Red Hat Inc
- *
- * Authors:
- *   David Hildenbrand <david@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "tcg_s390x.h"
-
-void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque)
-{
-}
-void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env,
-                                              uint32_t code, uintptr_t ra)
-{
-    g_assert_not_reached();
-}
-void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
-                                           uintptr_t ra)
-{
-    g_assert_not_reached();
-}
diff --git a/target/s390x/cc_helper.c b/target/s390x/tcg/cc_helper.c
similarity index 99%
rename from target/s390x/cc_helper.c
rename to target/s390x/tcg/cc_helper.c
index e7039d0d183..c2c96c3a3c1 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/tcg/cc_helper.c
@@ -20,7 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
@@ -509,7 +509,7 @@ uint32_t HELPER(calc_cc)(CPUS390XState *env, uint32_t cc_op, uint64_t src,
 #ifndef CONFIG_USER_ONLY
 void HELPER(load_psw)(CPUS390XState *env, uint64_t mask, uint64_t addr)
 {
-    load_psw(env, mask, addr);
+    s390_cpu_set_psw(env, mask, addr);
     cpu_loop_exit(env_cpu(env));
 }
 
diff --git a/target/s390x/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
similarity index 98%
rename from target/s390x/crypto_helper.c
rename to target/s390x/tcg/crypto_helper.c
index ff3fbc39502..138d9e7ad95 100644
--- a/target/s390x/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -12,7 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c
new file mode 100644
index 00000000000..4e7648f301b
--- /dev/null
+++ b/target/s390x/tcg/excp_helper.c
@@ -0,0 +1,645 @@
+/*
+ * s390x exception / interrupt helpers
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2011 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "exec/helper-proto.h"
+#include "qemu/timer.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "hw/s390x/ioinst.h"
+#include "exec/address-spaces.h"
+#include "tcg_s390x.h"
+#ifndef CONFIG_USER_ONLY
+#include "hw/s390x/s390_flic.h"
+#include "hw/boards.h"
+#endif
+
+void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env,
+                                              uint32_t code, uintptr_t ra)
+{
+    CPUState *cs = env_cpu(env);
+
+    cpu_restore_state(cs, ra, true);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
+    trigger_pgm_exception(env, code);
+    cpu_loop_exit(cs);
+}
+
+void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc,
+                                           uintptr_t ra)
+{
+    g_assert(dxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Store the DXC into the lowcore */
+    stl_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, data_exc_code), dxc);
+#endif
+
+    /* Store the DXC into the FPC if AFP is enabled */
+    if (env->cregs[0] & CR0_AFP) {
+        env->fpc = deposit32(env->fpc, 8, 8, dxc);
+    }
+    tcg_s390_program_interrupt(env, PGM_DATA, ra);
+}
+
+void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc,
+                                             uintptr_t ra)
+{
+    g_assert(vxc <= 0xff);
+#if !defined(CONFIG_USER_ONLY)
+    /* Always store the VXC into the lowcore, without AFP it is undefined */
+    stl_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, data_exc_code), vxc);
+#endif
+
+    /* Always store the VXC into the FPC, without AFP it is undefined */
+    env->fpc = deposit32(env->fpc, 8, 8, vxc);
+    tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ra);
+}
+
+void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc)
+{
+    tcg_s390_data_exception(env, dxc, GETPC());
+}
+
+/*
+ * Unaligned accesses are only diagnosed with MO_ALIGN.  At the moment,
+ * this is only for the atomic operations, for which we want to raise a
+ * specification exception.
+ */
+static void QEMU_NORETURN do_unaligned_access(CPUState *cs, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+
+    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, retaddr);
+}
+
+#if defined(CONFIG_USER_ONLY)
+
+void s390_cpu_do_interrupt(CPUState *cs)
+{
+    cs->exception_index = -1;
+}
+
+void s390_cpu_record_sigsegv(CPUState *cs, vaddr address,
+                             MMUAccessType access_type,
+                             bool maperr, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    trigger_pgm_exception(&cpu->env, maperr ? PGM_ADDRESSING : PGM_PROTECTION);
+    /*
+     * On real machines this value is dropped into LowMem. Since this
+     * is userland, simply put this someplace that cpu_loop can find it.
+     * S390 only gives the page of the fault, not the exact address.
+     * C.f. the construction of TEC in mmu_translate().
+     */
+    cpu->env.__excp_addr = address & TARGET_PAGE_MASK;
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
+void s390_cpu_record_sigbus(CPUState *cs, vaddr address,
+                            MMUAccessType access_type, uintptr_t retaddr)
+{
+    do_unaligned_access(cs, retaddr);
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+static inline uint64_t cpu_mmu_idx_to_asc(int mmu_idx)
+{
+    switch (mmu_idx) {
+    case MMU_PRIMARY_IDX:
+        return PSW_ASC_PRIMARY;
+    case MMU_SECONDARY_IDX:
+        return PSW_ASC_SECONDARY;
+    case MMU_HOME_IDX:
+        return PSW_ASC_HOME;
+    default:
+        abort();
+    }
+}
+
+bool s390_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool probe, uintptr_t retaddr)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    target_ulong vaddr, raddr;
+    uint64_t asc, tec;
+    int prot, excp;
+
+    qemu_log_mask(CPU_LOG_MMU, "%s: addr 0x%" VADDR_PRIx " rw %d mmu_idx %d\n",
+                  __func__, address, access_type, mmu_idx);
+
+    vaddr = address;
+
+    if (mmu_idx < MMU_REAL_IDX) {
+        asc = cpu_mmu_idx_to_asc(mmu_idx);
+        /* 31-Bit mode */
+        if (!(env->psw.mask & PSW_MASK_64)) {
+            vaddr &= 0x7fffffff;
+        }
+        excp = mmu_translate(env, vaddr, access_type, asc, &raddr, &prot, &tec);
+    } else if (mmu_idx == MMU_REAL_IDX) {
+        /* 31-Bit mode */
+        if (!(env->psw.mask & PSW_MASK_64)) {
+            vaddr &= 0x7fffffff;
+        }
+        excp = mmu_translate_real(env, vaddr, access_type, &raddr, &prot, &tec);
+    } else {
+        g_assert_not_reached();
+    }
+
+    env->tlb_fill_exc = excp;
+    env->tlb_fill_tec = tec;
+
+    if (!excp) {
+        qemu_log_mask(CPU_LOG_MMU,
+                      "%s: set tlb %" PRIx64 " -> %" PRIx64 " (%x)\n",
+                      __func__, (uint64_t)vaddr, (uint64_t)raddr, prot);
+        tlb_set_page(cs, address & TARGET_PAGE_MASK, raddr, prot,
+                     mmu_idx, TARGET_PAGE_SIZE);
+        return true;
+    }
+    if (probe) {
+        return false;
+    }
+
+    if (excp != PGM_ADDRESSING) {
+        stq_phys(env_cpu(env)->as,
+                 env->psa + offsetof(LowCore, trans_exc_code), tec);
+    }
+
+    /*
+     * For data accesses, ILEN will be filled in from the unwind info,
+     * within cpu_loop_exit_restore.  For code accesses, retaddr == 0,
+     * and so unwinding will not occur.  However, ILEN is also undefined
+     * for that case -- we choose to set ILEN = 2.
+     */
+    env->int_pgm_ilen = 2;
+    trigger_pgm_exception(env, excp);
+    cpu_loop_exit_restore(cs, retaddr);
+}
+
+static void do_program_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    int ilen = env->int_pgm_ilen;
+
+    assert(ilen == 2 || ilen == 4 || ilen == 6);
+
+    switch (env->int_pgm_code) {
+    case PGM_PER:
+        if (env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION) {
+            break;
+        }
+        /* FALL THROUGH */
+    case PGM_OPERATION:
+    case PGM_PRIVILEGED:
+    case PGM_EXECUTE:
+    case PGM_PROTECTION:
+    case PGM_ADDRESSING:
+    case PGM_SPECIFICATION:
+    case PGM_DATA:
+    case PGM_FIXPT_OVERFLOW:
+    case PGM_FIXPT_DIVIDE:
+    case PGM_DEC_OVERFLOW:
+    case PGM_DEC_DIVIDE:
+    case PGM_HFP_EXP_OVERFLOW:
+    case PGM_HFP_EXP_UNDERFLOW:
+    case PGM_HFP_SIGNIFICANCE:
+    case PGM_HFP_DIVIDE:
+    case PGM_TRANS_SPEC:
+    case PGM_SPECIAL_OP:
+    case PGM_OPERAND:
+    case PGM_HFP_SQRT:
+    case PGM_PC_TRANS_SPEC:
+    case PGM_ALET_SPEC:
+    case PGM_MONITOR:
+        /* advance the PSW if our exception is not nullifying */
+        env->psw.addr += ilen;
+        break;
+    }
+
+    qemu_log_mask(CPU_LOG_INT,
+                  "%s: code=0x%x ilen=%d psw: %" PRIx64 " %" PRIx64 "\n",
+                  __func__, env->int_pgm_code, ilen, env->psw.mask,
+                  env->psw.addr);
+
+    lowcore = cpu_map_lowcore(env);
+
+    /* Signal PER events with the exception.  */
+    if (env->per_perc_atmid) {
+        env->int_pgm_code |= PGM_PER;
+        lowcore->per_address = cpu_to_be64(env->per_address);
+        lowcore->per_perc_atmid = cpu_to_be16(env->per_perc_atmid);
+        env->per_perc_atmid = 0;
+    }
+
+    lowcore->pgm_ilen = cpu_to_be16(ilen);
+    lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
+    lowcore->program_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->program_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->program_new_psw.mask);
+    addr = be64_to_cpu(lowcore->program_new_psw.addr);
+    lowcore->per_breaking_event_addr = cpu_to_be64(env->gbea);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+static void do_svc_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+
+    lowcore = cpu_map_lowcore(env);
+
+    lowcore->svc_code = cpu_to_be16(env->int_svc_code);
+    lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
+    lowcore->svc_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->svc_old_psw.addr = cpu_to_be64(env->psw.addr + env->int_svc_ilen);
+    mask = be64_to_cpu(lowcore->svc_new_psw.mask);
+    addr = be64_to_cpu(lowcore->svc_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+
+    /* When a PER event is pending, the PER exception has to happen
+       immediately after the SERVICE CALL one.  */
+    if (env->per_perc_atmid) {
+        env->int_pgm_code = PGM_PER;
+        env->int_pgm_ilen = env->int_svc_ilen;
+        do_program_interrupt(env);
+    }
+}
+
+#define VIRTIO_SUBCODE_64 0x0D00
+
+static void do_ext_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    S390CPU *cpu = env_archcpu(env);
+    uint64_t mask, addr;
+    uint16_t cpu_addr;
+    LowCore *lowcore;
+
+    if (!(env->psw.mask & PSW_MASK_EXT)) {
+        cpu_abort(CPU(cpu), "Ext int w/o ext mask\n");
+    }
+
+    lowcore = cpu_map_lowcore(env);
+
+    if ((env->pending_int & INTERRUPT_EMERGENCY_SIGNAL) &&
+        (env->cregs[0] & CR0_EMERGENCY_SIGNAL_SC)) {
+        MachineState *ms = MACHINE(qdev_get_machine());
+        unsigned int max_cpus = ms->smp.max_cpus;
+
+        lowcore->ext_int_code = cpu_to_be16(EXT_EMERGENCY);
+        cpu_addr = find_first_bit(env->emergency_signals, S390_MAX_CPUS);
+        g_assert(cpu_addr < S390_MAX_CPUS);
+        lowcore->cpu_addr = cpu_to_be16(cpu_addr);
+        clear_bit(cpu_addr, env->emergency_signals);
+        if (bitmap_empty(env->emergency_signals, max_cpus)) {
+            env->pending_int &= ~INTERRUPT_EMERGENCY_SIGNAL;
+        }
+    } else if ((env->pending_int & INTERRUPT_EXTERNAL_CALL) &&
+               (env->cregs[0] & CR0_EXTERNAL_CALL_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_EXTERNAL_CALL);
+        lowcore->cpu_addr = cpu_to_be16(env->external_call_addr);
+        env->pending_int &= ~INTERRUPT_EXTERNAL_CALL;
+    } else if ((env->pending_int & INTERRUPT_EXT_CLOCK_COMPARATOR) &&
+               (env->cregs[0] & CR0_CKC_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_CLOCK_COMP);
+        lowcore->cpu_addr = 0;
+        env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
+    } else if ((env->pending_int & INTERRUPT_EXT_CPU_TIMER) &&
+               (env->cregs[0] & CR0_CPU_TIMER_SC)) {
+        lowcore->ext_int_code = cpu_to_be16(EXT_CPU_TIMER);
+        lowcore->cpu_addr = 0;
+        env->pending_int &= ~INTERRUPT_EXT_CPU_TIMER;
+    } else if (qemu_s390_flic_has_service(flic) &&
+               (env->cregs[0] & CR0_SERVICE_SC)) {
+        uint32_t param;
+
+        param = qemu_s390_flic_dequeue_service(flic);
+        lowcore->ext_int_code = cpu_to_be16(EXT_SERVICE);
+        lowcore->ext_params = cpu_to_be32(param);
+        lowcore->cpu_addr = 0;
+    } else {
+        g_assert_not_reached();
+    }
+
+    mask = be64_to_cpu(lowcore->external_new_psw.mask);
+    addr = be64_to_cpu(lowcore->external_new_psw.addr);
+    lowcore->external_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->external_old_psw.addr = cpu_to_be64(env->psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+static void do_io_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    uint64_t mask, addr;
+    QEMUS390FlicIO *io;
+    LowCore *lowcore;
+
+    g_assert(env->psw.mask & PSW_MASK_IO);
+    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+    g_assert(io);
+
+    lowcore = cpu_map_lowcore(env);
+
+    lowcore->subchannel_id = cpu_to_be16(io->id);
+    lowcore->subchannel_nr = cpu_to_be16(io->nr);
+    lowcore->io_int_parm = cpu_to_be32(io->parm);
+    lowcore->io_int_word = cpu_to_be32(io->word);
+    lowcore->io_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->io_new_psw.mask);
+    addr = be64_to_cpu(lowcore->io_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+    g_free(io);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+typedef struct MchkExtSaveArea {
+    uint64_t    vregs[32][2];                     /* 0x0000 */
+    uint8_t     pad_0x0200[0x0400 - 0x0200];      /* 0x0200 */
+} MchkExtSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(MchkExtSaveArea) != 1024);
+
+static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao)
+{
+    hwaddr len = sizeof(MchkExtSaveArea);
+    MchkExtSaveArea *sa;
+    int i;
+
+    sa = cpu_physical_memory_map(mcesao, &len, true);
+    if (!sa) {
+        return -EFAULT;
+    }
+    if (len != sizeof(MchkExtSaveArea)) {
+        cpu_physical_memory_unmap(sa, len, 1, 0);
+        return -EFAULT;
+    }
+
+    for (i = 0; i < 32; i++) {
+        sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]);
+        sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]);
+    }
+
+    cpu_physical_memory_unmap(sa, len, 1, len);
+    return 0;
+}
+
+static void do_mchk_interrupt(CPUS390XState *env)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    uint64_t mcic = s390_build_validity_mcic() | MCIC_SC_CP;
+    uint64_t mask, addr, mcesao = 0;
+    LowCore *lowcore;
+    int i;
+
+    /* for now we only support channel report machine checks (floating) */
+    g_assert(env->psw.mask & PSW_MASK_MCHECK);
+    g_assert(env->cregs[14] & CR14_CHANNEL_REPORT_SC);
+
+    qemu_s390_flic_dequeue_crw_mchk(flic);
+
+    lowcore = cpu_map_lowcore(env);
+
+    /* extended save area */
+    if (mcic & MCIC_VB_VR) {
+        /* length and alignment is 1024 bytes */
+        mcesao = be64_to_cpu(lowcore->mcesad) & ~0x3ffull;
+    }
+
+    /* try to store vector registers */
+    if (!mcesao || mchk_store_vregs(env, mcesao)) {
+        mcic &= ~MCIC_VB_VR;
+    }
+
+    /* we are always in z/Architecture mode */
+    lowcore->ar_access_id = 1;
+
+    for (i = 0; i < 16; i++) {
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i));
+        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
+        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
+        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
+    }
+    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
+    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
+    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
+    lowcore->cpu_timer_save_area = cpu_to_be64(env->cputm);
+    lowcore->clock_comp_save_area = cpu_to_be64(env->ckc >> 8);
+
+    lowcore->mcic = cpu_to_be64(mcic);
+    lowcore->mcck_old_psw.mask = cpu_to_be64(s390_cpu_get_psw_mask(env));
+    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
+    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    s390_cpu_set_psw(env, mask, addr);
+}
+
+void s390_cpu_do_interrupt(CPUState *cs)
+{
+    QEMUS390FLICState *flic = QEMU_S390_FLIC(s390_get_flic());
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    bool stopped = false;
+
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at psw=%" PRIx64 ":%" PRIx64 "\n",
+                  __func__, cs->exception_index, env->psw.mask, env->psw.addr);
+
+try_deliver:
+    /* handle machine checks */
+    if (cs->exception_index == -1 && s390_cpu_has_mcck_int(cpu)) {
+        cs->exception_index = EXCP_MCHK;
+    }
+    /* handle external interrupts */
+    if (cs->exception_index == -1 && s390_cpu_has_ext_int(cpu)) {
+        cs->exception_index = EXCP_EXT;
+    }
+    /* handle I/O interrupts */
+    if (cs->exception_index == -1 && s390_cpu_has_io_int(cpu)) {
+        cs->exception_index = EXCP_IO;
+    }
+    /* RESTART interrupt */
+    if (cs->exception_index == -1 && s390_cpu_has_restart_int(cpu)) {
+        cs->exception_index = EXCP_RESTART;
+    }
+    /* STOP interrupt has least priority */
+    if (cs->exception_index == -1 && s390_cpu_has_stop_int(cpu)) {
+        cs->exception_index = EXCP_STOP;
+    }
+
+    switch (cs->exception_index) {
+    case EXCP_PGM:
+        do_program_interrupt(env);
+        break;
+    case EXCP_SVC:
+        do_svc_interrupt(env);
+        break;
+    case EXCP_EXT:
+        do_ext_interrupt(env);
+        break;
+    case EXCP_IO:
+        do_io_interrupt(env);
+        break;
+    case EXCP_MCHK:
+        do_mchk_interrupt(env);
+        break;
+    case EXCP_RESTART:
+        do_restart_interrupt(env);
+        break;
+    case EXCP_STOP:
+        do_stop_interrupt(env);
+        stopped = true;
+        break;
+    }
+
+    if (cs->exception_index != -1 && !stopped) {
+        /* check if there are more pending interrupts to deliver */
+        cs->exception_index = -1;
+        goto try_deliver;
+    }
+    cs->exception_index = -1;
+
+    /* we might still have pending interrupts, but not deliverable */
+    if (!env->pending_int && !qemu_s390_flic_has_any(flic)) {
+        cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
+    }
+
+    /* WAIT PSW during interrupt injection or STOP interrupt */
+    if ((env->psw.mask & PSW_MASK_WAIT) || stopped) {
+        /* don't trigger a cpu_loop_exit(), use an interrupt instead */
+        cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+    } else if (cs->halted) {
+        /* unhalt if we had a WAIT PSW somehwere in our injection chain */
+        s390_cpu_unhalt(cpu);
+    }
+}
+
+bool s390_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+    if (interrupt_request & CPU_INTERRUPT_HARD) {
+        S390CPU *cpu = S390_CPU(cs);
+        CPUS390XState *env = &cpu->env;
+
+        if (env->ex_value) {
+            /* Execution of the target insn is indivisible from
+               the parent EXECUTE insn.  */
+            return false;
+        }
+        if (s390_cpu_has_int(cpu)) {
+            s390_cpu_do_interrupt(cs);
+            return true;
+        }
+        if (env->psw.mask & PSW_MASK_WAIT) {
+            /* Woken up because of a floating interrupt but it has already
+             * been delivered. Go back to sleep. */
+            cpu_interrupt(CPU(cpu), CPU_INTERRUPT_HALT);
+        }
+    }
+    return false;
+}
+
+void s390x_cpu_debug_excp_handler(CPUState *cs)
+{
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
+    CPUWatchpoint *wp_hit = cs->watchpoint_hit;
+
+    if (wp_hit && wp_hit->flags & BP_CPU) {
+        /* FIXME: When the storage-alteration-space control bit is set,
+           the exception should only be triggered if the memory access
+           is done using an address space with the storage-alteration-event
+           bit set.  We have no way to detect that with the current
+           watchpoint code.  */
+        cs->watchpoint_hit = NULL;
+
+        env->per_address = env->psw.addr;
+        env->per_perc_atmid |= PER_CODE_EVENT_STORE | get_per_atmid(env);
+        /* FIXME: We currently no way to detect the address space used
+           to trigger the watchpoint.  For now just consider it is the
+           current default ASC. This turn to be true except when MVCP
+           and MVCS instrutions are not used.  */
+        env->per_perc_atmid |= env->psw.mask & (PSW_MASK_ASC) >> 46;
+
+        /*
+         * Remove all watchpoints to re-execute the code.  A PER exception
+         * will be triggered, it will call s390_cpu_set_psw which will
+         * recompute the watchpoints.
+         */
+        cpu_watchpoint_remove_all(cs, BP_CPU);
+        cpu_loop_exit_noexc(cs);
+    }
+}
+
+void s390x_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                   MMUAccessType access_type,
+                                   int mmu_idx, uintptr_t retaddr)
+{
+    do_unaligned_access(cs, retaddr);
+}
+
+static void QEMU_NORETURN monitor_event(CPUS390XState *env,
+                                        uint64_t monitor_code,
+                                        uint8_t monitor_class, uintptr_t ra)
+{
+    /* Store the Monitor Code and the Monitor Class Number into the lowcore */
+    stq_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, monitor_code), monitor_code);
+    stw_phys(env_cpu(env)->as,
+             env->psa + offsetof(LowCore, mon_class_num), monitor_class);
+
+    tcg_s390_program_interrupt(env, PGM_MONITOR, ra);
+}
+
+void HELPER(monitor_call)(CPUS390XState *env, uint64_t monitor_code,
+                          uint32_t monitor_class)
+{
+    g_assert(monitor_class <= 0xff);
+
+    if (env->cregs[8] & (0x8000 >> monitor_class)) {
+        monitor_event(env, monitor_code, monitor_class, GETPC());
+    }
+}
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
new file mode 100644
index 00000000000..40672054052
--- /dev/null
+++ b/target/s390x/tcg/fpu_helper.c
@@ -0,0 +1,976 @@
+/*
+ *  S/390 FPU helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "tcg_s390x.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+#define RET128(F) (env->retxl = F.low, F.high)
+
+uint8_t s390_softfloat_exc_to_ieee(unsigned int exc)
+{
+    uint8_t s390_exc = 0;
+
+    s390_exc |= (exc & float_flag_invalid) ? S390_IEEE_MASK_INVALID : 0;
+    s390_exc |= (exc & float_flag_divbyzero) ? S390_IEEE_MASK_DIVBYZERO : 0;
+    s390_exc |= (exc & float_flag_overflow) ? S390_IEEE_MASK_OVERFLOW : 0;
+    s390_exc |= (exc & float_flag_underflow) ? S390_IEEE_MASK_UNDERFLOW : 0;
+    s390_exc |= (exc & float_flag_inexact) ? S390_IEEE_MASK_INEXACT : 0;
+
+    return s390_exc;
+}
+
+/* Should be called after any operation that may raise IEEE exceptions.  */
+static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr)
+{
+    unsigned s390_exc, qemu_exc;
+
+    /* Get the exceptions raised by the current operation.  Reset the
+       fpu_status contents so that the next operation has a clean slate.  */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return;
+    }
+    env->fpu_status.float_exception_flags = 0;
+    s390_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /*
+     * IEEE-Underflow exception recognition exists if a tininess condition
+     * (underflow) exists and
+     * - The mask bit in the FPC is zero and the result is inexact
+     * - The mask bit in the FPC is one
+     * So tininess conditions that are not inexact don't trigger any
+     * underflow action in case the mask bit is not one.
+     */
+    if (!(s390_exc & S390_IEEE_MASK_INEXACT) &&
+        !((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW)) {
+        s390_exc &= ~S390_IEEE_MASK_UNDERFLOW;
+    }
+
+    /*
+     * FIXME:
+     * 1. Right now, all inexact conditions are inidicated as
+     *    "truncated" (0) and never as "incremented" (1) in the DXC.
+     * 2. Only traps due to invalid/divbyzero are suppressing. Other traps
+     *    are completing, meaning the target register has to be written!
+     *    This, however will mean that we have to write the register before
+     *    triggering the trap - impossible right now.
+     */
+
+    /*
+     * invalid/divbyzero cannot coexist with other conditions.
+     * overflow/underflow however can coexist with inexact, we have to
+     * handle it separatly.
+     */
+    if (s390_exc & ~S390_IEEE_MASK_INEXACT) {
+        if (s390_exc & ~S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            /* trap condition - inexact reported along */
+            tcg_s390_data_exception(env, s390_exc, retaddr);
+        }
+        /* nontrap condition - inexact handled differently */
+        env->fpc |= (s390_exc & ~S390_IEEE_MASK_INEXACT) << 16;
+    }
+
+    /* inexact handling */
+    if (s390_exc & S390_IEEE_MASK_INEXACT && !XxC) {
+        /* trap condition - overflow/underflow _not_ reported along */
+        if (s390_exc & S390_IEEE_MASK_INEXACT & env->fpc >> 24) {
+            tcg_s390_data_exception(env, s390_exc & S390_IEEE_MASK_INEXACT,
+                                    retaddr);
+        }
+        /* nontrap condition */
+        env->fpc |= (s390_exc & S390_IEEE_MASK_INEXACT) << 16;
+    }
+}
+
+int float_comp_to_cc(CPUS390XState *env, FloatRelation float_compare)
+{
+    switch (float_compare) {
+    case float_relation_equal:
+        return 0;
+    case float_relation_less:
+        return 1;
+    case float_relation_greater:
+        return 2;
+    case float_relation_unordered:
+        return 3;
+    default:
+        cpu_abort(env_cpu(env), "unknown return value for float compare\n");
+    }
+}
+
+/* condition codes for unary FP ops */
+uint32_t set_cc_nz_f32(float32 v)
+{
+    if (float32_is_any_nan(v)) {
+        return 3;
+    } else if (float32_is_zero(v)) {
+        return 0;
+    } else if (float32_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+uint32_t set_cc_nz_f64(float64 v)
+{
+    if (float64_is_any_nan(v)) {
+        return 3;
+    } else if (float64_is_zero(v)) {
+        return 0;
+    } else if (float64_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+uint32_t set_cc_nz_f128(float128 v)
+{
+    if (float128_is_any_nan(v)) {
+        return 3;
+    } else if (float128_is_zero(v)) {
+        return 0;
+    } else if (float128_is_neg(v)) {
+        return 1;
+    } else {
+        return 2;
+    }
+}
+
+/* condition codes for FP to integer conversion ops */
+static uint32_t set_cc_conv_f32(float32 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f32(v);
+    }
+}
+
+static uint32_t set_cc_conv_f64(float64 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f64(v);
+    }
+}
+
+static uint32_t set_cc_conv_f128(float128 v, float_status *stat)
+{
+    if (stat->float_exception_flags & float_flag_invalid) {
+        return 3;
+    } else {
+        return set_cc_nz_f128(v);
+    }
+}
+
+static inline uint8_t round_from_m34(uint32_t m34)
+{
+    return extract32(m34, 0, 4);
+}
+
+static inline bool xxc_from_m34(uint32_t m34)
+{
+    /* XxC is bit 1 of m4 */
+    return extract32(m34, 4 + 3 - 1, 1);
+}
+
+/* 32-bit FP addition */
+uint64_t HELPER(aeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_add(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP addition */
+uint64_t HELPER(adb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_add(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP addition */
+uint64_t HELPER(axb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_add(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP subtraction */
+uint64_t HELPER(seb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_sub(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP subtraction */
+uint64_t HELPER(sdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_sub(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP subtraction */
+uint64_t HELPER(sxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_sub(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP division */
+uint64_t HELPER(deb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_div(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP division */
+uint64_t HELPER(ddb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_div(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP division */
+uint64_t HELPER(dxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_div(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP multiplication */
+uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float32 ret = float32_mul(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiplication */
+uint64_t HELPER(mdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float64_mul(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64/32-bit FP multiplication */
+uint64_t HELPER(mdeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    float64 ret = float32_to_float64(f2, &env->fpu_status);
+    ret = float64_mul(f1, ret, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 128-bit FP multiplication */
+uint64_t HELPER(mxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    float128 ret = float128_mul(make_float128(ah, al),
+                                make_float128(bh, bl),
+                                &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* 128/64-bit FP multiplication */
+uint64_t HELPER(mxdb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint64_t f2)
+{
+    float128 ret = float64_to_float128(f2, &env->fpu_status);
+    ret = float128_mul(make_float128(ah, al), ret, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 64-bit float */
+uint64_t HELPER(ldeb)(CPUS390XState *env, uint64_t f2)
+{
+    float64 ret = float32_to_float64(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit float */
+uint64_t HELPER(ldxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = float128_to_float64(make_float128(ah, al), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit float to 128-bit float */
+uint64_t HELPER(lxdb)(CPUS390XState *env, uint64_t f2)
+{
+    float128 ret = float64_to_float128(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 128-bit float */
+uint64_t HELPER(lxeb)(CPUS390XState *env, uint64_t f2)
+{
+    float128 ret = float32_to_float128(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+/* convert 64-bit float to 32-bit float */
+uint64_t HELPER(ledb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float64_to_float32(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit float */
+uint64_t HELPER(lexb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float128_to_float32(make_float128(ah, al), &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* 32-bit FP compare */
+uint32_t HELPER(ceb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float32_compare_quiet(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 64-bit FP compare */
+uint32_t HELPER(cdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float64_compare_quiet(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 128-bit FP compare */
+uint32_t HELPER(cxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    FloatRelation cmp = float128_compare_quiet(make_float128(ah, al),
+                                               make_float128(bh, bl),
+                                               &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3)
+{
+    int ret = env->fpu_status.float_rounding_mode;
+
+    switch (m3) {
+    case 0:
+        /* current mode */
+        break;
+    case 1:
+        /* round to nearest with ties away from 0 */
+        set_float_rounding_mode(float_round_ties_away, &env->fpu_status);
+        break;
+    case 3:
+        /* round to prepare for shorter precision */
+        set_float_rounding_mode(float_round_to_odd, &env->fpu_status);
+        break;
+    case 4:
+        /* round to nearest with ties to even */
+        set_float_rounding_mode(float_round_nearest_even, &env->fpu_status);
+        break;
+    case 5:
+        /* round to zero */
+        set_float_rounding_mode(float_round_to_zero, &env->fpu_status);
+        break;
+    case 6:
+        /* round to +inf */
+        set_float_rounding_mode(float_round_up, &env->fpu_status);
+        break;
+    case 7:
+        /* round to -inf */
+        set_float_rounding_mode(float_round_down, &env->fpu_status);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return ret;
+}
+
+void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode)
+{
+    set_float_rounding_mode(old_mode, &env->fpu_status);
+}
+
+/* convert 64-bit int to 32-bit float */
+uint64_t HELPER(cegb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = int64_to_float32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit int to 64-bit float */
+uint64_t HELPER(cdgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = int64_to_float64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit int to 128-bit float */
+uint64_t HELPER(cxgb)(CPUS390XState *env, int64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = int64_to_float128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* convert 64-bit uint to 32-bit float */
+uint64_t HELPER(celgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = uint64_to_float32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit uint to 64-bit float */
+uint64_t HELPER(cdlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = uint64_to_float64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* convert 64-bit uint to 128-bit float */
+uint64_t HELPER(cxlgb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = uint64_to_float128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* convert 32-bit float to 64-bit int */
+uint64_t HELPER(cgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int64_t ret = float32_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 64-bit int */
+uint64_t HELPER(cgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int64_t ret = float64_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit int */
+uint64_t HELPER(cgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    int64_t ret = float128_to_int64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return INT64_MIN;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 32-bit int */
+uint64_t HELPER(cfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int32_t ret = float32_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 32-bit int */
+uint64_t HELPER(cfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    int32_t ret = float64_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit int */
+uint64_t HELPER(cfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    int32_t ret = float128_to_int32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return INT32_MIN;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 64-bit uint */
+uint64_t HELPER(clgeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint64_t ret = float32_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 64-bit uint */
+uint64_t HELPER(clgdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint64_t ret = float64_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 64-bit uint */
+uint64_t HELPER(clgxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    uint64_t ret = float128_to_uint64(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 32-bit float to 32-bit uint */
+uint64_t HELPER(clfeb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint32_t ret = float32_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f32(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float32_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 64-bit float to 32-bit uint */
+uint64_t HELPER(clfdb)(CPUS390XState *env, uint64_t v2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    uint32_t ret = float64_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f64(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float64_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* convert 128-bit float to 32-bit uint */
+uint64_t HELPER(clfxb)(CPUS390XState *env, uint64_t h, uint64_t l, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 v2 = make_float128(h, l);
+    uint32_t ret = float128_to_uint32(v2, &env->fpu_status);
+    uint32_t cc = set_cc_conv_f128(v2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    env->cc_op = cc;
+    if (float128_is_any_nan(v2)) {
+        return 0;
+    }
+    return ret;
+}
+
+/* round to integer 32-bit */
+uint64_t HELPER(fieb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float32 ret = float32_round_to_int(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* round to integer 64-bit */
+uint64_t HELPER(fidb)(CPUS390XState *env, uint64_t f2, uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float64 ret = float64_round_to_int(f2, &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return ret;
+}
+
+/* round to integer 128-bit */
+uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                      uint32_t m34)
+{
+    int old_mode = s390_swap_bfp_rounding_mode(env, round_from_m34(m34));
+    float128 ret = float128_round_to_int(make_float128(ah, al),
+                                         &env->fpu_status);
+
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_exceptions(env, xxc_from_m34(m34), GETPC());
+    return RET128(ret);
+}
+
+/* 32-bit FP compare and signal */
+uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float32_compare(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 64-bit FP compare and signal */
+uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+    FloatRelation cmp = float64_compare(f1, f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 128-bit FP compare and signal */
+uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+                     uint64_t bh, uint64_t bl)
+{
+    FloatRelation cmp = float128_compare(make_float128(ah, al),
+                                         make_float128(bh, bl),
+                                         &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return float_comp_to_cc(env, cmp);
+}
+
+/* 32-bit FP multiply and add */
+uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float32 ret = float32_muladd(f2, f3, f1, 0, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiply and add */
+uint64_t HELPER(madb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float64 ret = float64_muladd(f2, f3, f1, 0, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 32-bit FP multiply and subtract */
+uint64_t HELPER(mseb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float32 ret = float32_muladd(f2, f3, f1, float_muladd_negate_c,
+                                 &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* 64-bit FP multiply and subtract */
+uint64_t HELPER(msdb)(CPUS390XState *env, uint64_t f1,
+                      uint64_t f2, uint64_t f3)
+{
+    float64 ret = float64_muladd(f2, f3, f1, float_muladd_negate_c,
+                                 &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* The rightmost bit has the number 11. */
+static inline uint16_t dcmask(int bit, bool neg)
+{
+    return 1 << (11 - bit - neg);
+}
+
+#define DEF_FLOAT_DCMASK(_TYPE) \
+uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1)              \
+{                                                                  \
+    const bool neg = _TYPE##_is_neg(f1);                           \
+                                                                   \
+    /* Sorted by most common cases - only one class is possible */ \
+    if (_TYPE##_is_normal(f1)) {                                   \
+        return dcmask(2, neg);                                     \
+    } else if (_TYPE##_is_zero(f1)) {                              \
+        return dcmask(0, neg);                                     \
+    } else if (_TYPE##_is_denormal(f1)) {                          \
+        return dcmask(4, neg);                                     \
+    } else if (_TYPE##_is_infinity(f1)) {                          \
+        return dcmask(6, neg);                                     \
+    } else if (_TYPE##_is_quiet_nan(f1, &env->fpu_status)) {       \
+        return dcmask(8, neg);                                     \
+    }                                                              \
+    /* signaling nan, as last remaining case */                    \
+    return dcmask(10, neg);                                        \
+}
+DEF_FLOAT_DCMASK(float32)
+DEF_FLOAT_DCMASK(float64)
+DEF_FLOAT_DCMASK(float128)
+
+/* test data class 32-bit */
+uint32_t HELPER(tceb)(CPUS390XState *env, uint64_t f1, uint64_t m2)
+{
+    return (m2 & float32_dcmask(env, f1)) != 0;
+}
+
+/* test data class 64-bit */
+uint32_t HELPER(tcdb)(CPUS390XState *env, uint64_t v1, uint64_t m2)
+{
+    return (m2 & float64_dcmask(env, v1)) != 0;
+}
+
+/* test data class 128-bit */
+uint32_t HELPER(tcxb)(CPUS390XState *env, uint64_t ah, uint64_t al, uint64_t m2)
+{
+    return (m2 & float128_dcmask(env, make_float128(ah, al))) != 0;
+}
+
+/* square root 32-bit */
+uint64_t HELPER(sqeb)(CPUS390XState *env, uint64_t f2)
+{
+    float32 ret = float32_sqrt(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* square root 64-bit */
+uint64_t HELPER(sqdb)(CPUS390XState *env, uint64_t f2)
+{
+    float64 ret = float64_sqrt(f2, &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return ret;
+}
+
+/* square root 128-bit */
+uint64_t HELPER(sqxb)(CPUS390XState *env, uint64_t ah, uint64_t al)
+{
+    float128 ret = float128_sqrt(make_float128(ah, al), &env->fpu_status);
+    handle_exceptions(env, false, GETPC());
+    return RET128(ret);
+}
+
+static const int fpc_to_rnd[8] = {
+    float_round_nearest_even,
+    float_round_to_zero,
+    float_round_up,
+    float_round_down,
+    -1,
+    -1,
+    -1,
+    float_round_to_odd,
+};
+
+/* set fpc */
+void HELPER(sfpc)(CPUS390XState *env, uint64_t fpc)
+{
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    /* Install everything in the main FPC.  */
+    env->fpc = fpc;
+
+    /* Install the rounding mode in the shadow fpu_status.  */
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
+}
+
+/* set fpc and signal */
+void HELPER(sfas)(CPUS390XState *env, uint64_t fpc)
+{
+    uint32_t signalling = env->fpc;
+    uint32_t s390_exc;
+
+    if (fpc_to_rnd[fpc & 0x7] == -1 || fpc & 0x03030088u ||
+        (!s390_has_feat(S390_FEAT_FLOATING_POINT_EXT) && fpc & 0x4)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    /*
+     * FPC is set to the FPC operand with a bitwise OR of the signalling
+     * flags.
+     */
+    env->fpc = fpc | (signalling & 0x00ff0000);
+    set_float_rounding_mode(fpc_to_rnd[fpc & 0x7], &env->fpu_status);
+
+    /*
+     * If any signaling flag is enabled in the new FPC mask, a
+     * simulated-iee-exception exception occurs.
+     */
+    s390_exc = (signalling >> 16) & (fpc >> 24);
+    if (s390_exc) {
+        if (s390_exc & S390_IEEE_MASK_INVALID) {
+            s390_exc = S390_IEEE_MASK_INVALID;
+        } else if (s390_exc & S390_IEEE_MASK_DIVBYZERO) {
+            s390_exc = S390_IEEE_MASK_DIVBYZERO;
+        } else if (s390_exc & S390_IEEE_MASK_OVERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_OVERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_UNDERFLOW) {
+            s390_exc &= (S390_IEEE_MASK_UNDERFLOW | S390_IEEE_MASK_INEXACT);
+        } else if (s390_exc & S390_IEEE_MASK_INEXACT) {
+            s390_exc = S390_IEEE_MASK_INEXACT;
+        } else if (s390_exc & S390_IEEE_MASK_QUANTUM) {
+            s390_exc = S390_IEEE_MASK_QUANTUM;
+        }
+        tcg_s390_data_exception(env, s390_exc | 3, GETPC());
+    }
+}
+
+/* set bfp rounding mode */
+void HELPER(srnm)(CPUS390XState *env, uint64_t rnd)
+{
+    if (rnd > 0x7 || fpc_to_rnd[rnd & 0x7] == -1) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+
+    env->fpc = deposit32(env->fpc, 0, 3, rnd);
+    set_float_rounding_mode(fpc_to_rnd[rnd & 0x7], &env->fpu_status);
+}
diff --git a/target/s390x/insn-data.def b/target/s390x/tcg/insn-data.def
similarity index 99%
rename from target/s390x/insn-data.def
rename to target/s390x/tcg/insn-data.def
index 0bb1886a2e1..3e5594210c8 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -989,6 +989,8 @@
 
 /* === Vector Support Instructions === */
 
+/* VECTOR BIT PERMUTE */
+    E(0xe785, VBPERM,  VRR_c, VE,  0, 0, 0, 0, vbperm, 0, 0, IF_VEC)
 /* VECTOR GATHER ELEMENT */
     E(0xe713, VGEF,    VRV,   V,   la2, 0, 0, 0, vge, 0, ES_32, IF_VEC)
     E(0xe712, VGEG,    VRV,   V,   la2, 0, 0, 0, vge, 0, ES_64, IF_VEC)
@@ -1149,6 +1151,8 @@
     F(0xe7a7, VMO,     VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
 /* VECTOR MULTIPLY LOGICAL ODD */
     F(0xe7a5, VMLO,    VRR_c, V,   0, 0, 0, 0, vm, 0, IF_VEC)
+/* VECTOR MULTIPLY SUM LOGICAL */
+    F(0xe7b8, VMSL,    VRR_d, VE,  0, 0, 0, 0, vmsl, 0, IF_VEC)
 /* VECTOR NAND */
     F(0xe76e, VNN,     VRR_c, VE,  0, 0, 0, 0, vnn, 0, IF_VEC)
 /* VECTOR NOR */
@@ -1245,16 +1249,24 @@
     F(0xe7e5, VFD,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 /* VECTOR LOAD FP INTEGER */
     F(0xe7c7, VFI,     VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
-/* VECTOR LOAD LENGTHENED */
+/* VECTOR FP LOAD LENGTHENED */
     F(0xe7c4, VFLL,    VRR_a, V,   0, 0, 0, 0, vfll, 0, IF_VEC)
-/* VECTOR LOAD ROUNDED */
+/* VECTOR FP LOAD ROUNDED */
     F(0xe7c5, VFLR,    VRR_a, V,   0, 0, 0, 0, vcdg, 0, IF_VEC)
+/* VECTOR FP MAXIMUM */
+    F(0xe7ef, VFMAX,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
+/* VECTOR FP MINIMUM */
+    F(0xe7ee, VFMIN,   VRR_c, VE,  0, 0, 0, 0, vfmax, 0, IF_VEC)
 /* VECTOR FP MULTIPLY */
     F(0xe7e7, VFM,     VRR_c, V,   0, 0, 0, 0, vfa, 0, IF_VEC)
 /* VECTOR FP MULTIPLY AND ADD */
     F(0xe78f, VFMA,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP MULTIPLY AND SUBTRACT */
     F(0xe78e, VFMS,    VRR_e, V,   0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND ADD */
+    F(0xe79f, VFNMA,   VRR_e, VE,  0, 0, 0, 0, vfma, 0, IF_VEC)
+/* VECTOR FP NEGATIVE MULTIPLY AND SUBTRACT */
+    F(0xe79e, VFNMS,   VRR_e, VE,   0, 0, 0, 0, vfma, 0, IF_VEC)
 /* VECTOR FP PERFORM SIGN OPERATION */
     F(0xe7cc, VFPSO,   VRR_a, V,   0, 0, 0, 0, vfpso, 0, IF_VEC)
 /* VECTOR FP SQUARE ROOT */
diff --git a/target/s390x/insn-format.def b/target/s390x/tcg/insn-format.def
similarity index 100%
rename from target/s390x/insn-format.def
rename to target/s390x/tcg/insn-format.def
diff --git a/target/s390x/int_helper.c b/target/s390x/tcg/int_helper.c
similarity index 99%
rename from target/s390x/int_helper.c
rename to target/s390x/tcg/int_helper.c
index 658507dd6de..954542388aa 100644
--- a/target/s390x/int_helper.c
+++ b/target/s390x/tcg/int_helper.c
@@ -20,7 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
diff --git a/target/s390x/mem_helper.c b/target/s390x/tcg/mem_helper.c
similarity index 96%
rename from target/s390x/mem_helper.c
rename to target/s390x/tcg/mem_helper.c
index f6a7d292734..362a30d99e0 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -20,14 +20,14 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "tcg_s390x.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "qemu/int128.h"
 #include "qemu/atomic128.h"
-#include "tcg/tcg.h"
+#include "trace.h"
 
 #if !defined(CONFIG_USER_ONLY)
 #include "hw/s390x/storage-keys.h"
@@ -141,20 +141,12 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
                              MMUAccessType access_type, int mmu_idx,
                              bool nonfault, void **phost, uintptr_t ra)
 {
-    int flags;
-
 #if defined(CONFIG_USER_ONLY)
-    flags = page_get_flags(addr);
-    if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
-        env->__excp_addr = addr;
-        flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
-        if (nonfault) {
-            return flags;
-        }
-        tcg_s390_program_interrupt(env, flags, ra);
-    }
-    *phost = g2h(env_cpu(env), addr);
+    return probe_access_flags(env, addr, access_type, mmu_idx,
+                              nonfault, phost, ra);
 #else
+    int flags;
+
     /*
      * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
      * to detect if there was an exception during tlb_fill().
@@ -173,8 +165,8 @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
                              (access_type == MMU_DATA_STORE
                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
     }
-#endif
     return 0;
+#endif
 }
 
 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
@@ -238,7 +230,7 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
     g_assert(haddr);
     memset(haddr, byte, size);
 #else
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
     int i;
 
     if (likely(haddr)) {
@@ -249,13 +241,13 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
          * page. This is especially relevant to speed up TLB_NOTDIRTY.
          */
         g_assert(size > 0);
-        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
+        cpu_stb_mmu(env, vaddr, byte, oi, ra);
         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
         if (likely(haddr)) {
             memset(haddr + 1, byte, size - 1);
         } else {
             for (i = 1; i < size; i++) {
-                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
+                cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
             }
         }
     }
@@ -281,7 +273,7 @@ static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 #ifdef CONFIG_USER_ONLY
     return ldub_p(*haddr + offset);
 #else
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
     uint8_t byte;
 
     if (likely(*haddr)) {
@@ -291,7 +283,7 @@ static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
      * Do a single access and test if we can then get access to the
      * page. This is especially relevant to speed up TLB_NOTDIRTY.
      */
-    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
+    byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
     return byte;
 #endif
@@ -315,7 +307,7 @@ static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 #ifdef CONFIG_USER_ONLY
     stb_p(*haddr + offset, byte);
 #else
-    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 
     if (likely(*haddr)) {
         stb_p(*haddr + offset, byte);
@@ -325,7 +317,7 @@ static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
      * Do a single access and test if we can then get access to the
      * page. This is especially relevant to speed up TLB_NOTDIRTY.
      */
-    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
+    cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 #endif
 }
@@ -1803,15 +1795,15 @@ void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     Int128 oldv;
     bool fail;
 
     assert(HAVE_CMPXCHG128);
 
     mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
+    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
     fail = !int128_eq(oldv, cmpv);
 
     env->cc_op = fail;
@@ -1883,8 +1875,8 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                 uint32_t *haddr = g2h(env_cpu(env), a1);
                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
 #else
-                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
-                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
+                MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
+                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
 #endif
             } else {
                 ov = cpu_ldl_data_ra(env, a1, ra);
@@ -1903,13 +1895,8 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
 
             if (parallel) {
 #ifdef CONFIG_ATOMIC64
-# ifdef CONFIG_USER_ONLY
-                uint64_t *haddr = g2h(env_cpu(env), a1);
-                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
-# else
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
-                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
-# endif
+                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
+                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
 #else
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
@@ -1944,8 +1931,8 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
             } else if (HAVE_CMPXCHG128) {
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
+                MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
+                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
                 cc = !int128_eq(ov, cv);
             } else {
                 /* Note that we asserted !parallel above.  */
@@ -1983,9 +1970,9 @@ static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
             } else if (HAVE_ATOMIC128) {
-                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
                 Int128 sv = int128_make128(svl, svh);
-                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
+                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
             } else {
                 /* Note that we asserted !parallel above.  */
                 g_assert_not_reached();
@@ -2176,22 +2163,28 @@ uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
 /* insert storage key extended */
 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
     static S390SKeysState *ss;
     static S390SKeysClass *skeyclass;
     uint64_t addr = wrap_address(env, r2);
     uint8_t key;
+    int rc;
 
-    if (addr > ms->ram_size) {
-        return 0;
+    addr = mmu_real2abs(env, addr);
+    if (!mmu_absolute_addr_valid(addr, false)) {
+        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
     }
 
     if (unlikely(!ss)) {
         ss = s390_get_skeys_device();
         skeyclass = S390_SKEYS_GET_CLASS(ss);
+        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
+            tlb_flush_all_cpus_synced(env_cpu(env));
+        }
     }
 
-    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
+    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+    if (rc) {
+        trace_get_skeys_nonzero(rc);
         return 0;
     }
     return key;
@@ -2200,23 +2193,30 @@ uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
 /* set storage key extended */
 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
     static S390SKeysState *ss;
     static S390SKeysClass *skeyclass;
     uint64_t addr = wrap_address(env, r2);
     uint8_t key;
+    int rc;
 
-    if (addr > ms->ram_size) {
-        return;
+    addr = mmu_real2abs(env, addr);
+    if (!mmu_absolute_addr_valid(addr, false)) {
+        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
     }
 
     if (unlikely(!ss)) {
         ss = s390_get_skeys_device();
         skeyclass = S390_SKEYS_GET_CLASS(ss);
+        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
+            tlb_flush_all_cpus_synced(env_cpu(env));
+        }
     }
 
-    key = (uint8_t) r1;
-    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+    key = r1 & 0xfe;
+    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+    if (rc) {
+        trace_set_skeys_nonzero(rc);
+    }
    /*
     * As we can only flush by virtual address and not all the entries
     * that point to a physical address we have to flush the whole TLB.
@@ -2227,28 +2227,37 @@ void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
 /* reset reference bit extended */
 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
+    uint64_t addr = wrap_address(env, r2);
     static S390SKeysState *ss;
     static S390SKeysClass *skeyclass;
     uint8_t re, key;
+    int rc;
 
-    if (r2 > ms->ram_size) {
-        return 0;
+    addr = mmu_real2abs(env, addr);
+    if (!mmu_absolute_addr_valid(addr, false)) {
+        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
     }
 
     if (unlikely(!ss)) {
         ss = s390_get_skeys_device();
         skeyclass = S390_SKEYS_GET_CLASS(ss);
+        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
+            tlb_flush_all_cpus_synced(env_cpu(env));
+        }
     }
 
-    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
+    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+    if (rc) {
+        trace_get_skeys_nonzero(rc);
         return 0;
     }
 
     re = key & (SK_R | SK_C);
     key &= ~SK_R;
 
-    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
+    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
+    if (rc) {
+        trace_set_skeys_nonzero(rc);
         return 0;
     }
    /*
@@ -2446,7 +2455,7 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
     }
 
-    exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
+    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
     if (exc) {
         cc = 3;
         ret = exc | 0x80000000;
@@ -2479,14 +2488,14 @@ uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
     uintptr_t ra = GETPC();
     uint64_t hi, lo;
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     Int128 v;
 
     assert(HAVE_ATOMIC128);
 
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
-    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
     hi = int128_gethi(v);
     lo = int128_getlo(v);
 
@@ -2510,7 +2519,7 @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
 {
     uintptr_t ra = GETPC();
     int mem_idx;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     Int128 v;
 
     assert(HAVE_ATOMIC128);
@@ -2518,7 +2527,7 @@ void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
     mem_idx = cpu_mmu_index(env, false);
     oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
     v = int128_make128(low, high);
-    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
+    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
 }
 
 /* Execute instruction.  This instruction executes an insn modified with
diff --git a/target/s390x/tcg/meson.build b/target/s390x/tcg/meson.build
new file mode 100644
index 00000000000..ee4e8fec77c
--- /dev/null
+++ b/target/s390x/tcg/meson.build
@@ -0,0 +1,14 @@
+s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
+  'cc_helper.c',
+  'crypto_helper.c',
+  'excp_helper.c',
+  'fpu_helper.c',
+  'int_helper.c',
+  'mem_helper.c',
+  'misc_helper.c',
+  'translate.c',
+  'vec_fpu_helper.c',
+  'vec_helper.c',
+  'vec_int_helper.c',
+  'vec_string_helper.c',
+))
diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c
new file mode 100644
index 00000000000..aab9c47747e
--- /dev/null
+++ b/target/s390x/tcg/misc_helper.c
@@ -0,0 +1,798 @@
+/*
+ *  S/390 misc helper routines
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2009 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "exec/memory.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "qemu/timer.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "qapi/error.h"
+#include "tcg_s390x.h"
+#include "s390-tod.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "sysemu/cpus.h"
+#include "sysemu/sysemu.h"
+#include "hw/s390x/ebcdic.h"
+#include "hw/s390x/s390-virtio-hcall.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390_flic.h"
+#include "hw/s390x/ioinst.h"
+#include "hw/s390x/s390-pci-inst.h"
+#include "hw/boards.h"
+#include "hw/s390x/tod.h"
+#endif
+
+/* #define DEBUG_HELPER */
+#ifdef DEBUG_HELPER
+#define HELPER_LOG(x...) qemu_log(x)
+#else
+#define HELPER_LOG(x...)
+#endif
+
+/* Raise an exception statically from a TB.  */
+void HELPER(exception)(CPUS390XState *env, uint32_t excp)
+{
+    CPUState *cs = env_cpu(env);
+
+    HELPER_LOG("%s: exception %d\n", __func__, excp);
+    cs->exception_index = excp;
+    cpu_loop_exit(cs);
+}
+
+/* Store CPU Timer (also used for EXTRACT CPU TIME) */
+uint64_t HELPER(stpt)(CPUS390XState *env)
+{
+#if defined(CONFIG_USER_ONLY)
+    /*
+     * Fake a descending CPU timer. We could get negative values here,
+     * but we don't care as it is up to the OS when to process that
+     * interrupt and reset to > 0.
+     */
+    return UINT64_MAX - (uint64_t)cpu_get_host_ticks();
+#else
+    return time2tod(env->cputm - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+#endif
+}
+
+/* Store Clock */
+uint64_t HELPER(stck)(CPUS390XState *env)
+{
+#ifdef CONFIG_USER_ONLY
+    struct timespec ts;
+    uint64_t ns;
+
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ns = ts.tv_sec * NANOSECONDS_PER_SECOND + ts.tv_nsec;
+
+    return TOD_UNIX_EPOCH + time2tod(ns);
+#else
+    S390TODState *td = s390_get_todstate();
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
+    S390TOD tod;
+
+    tdc->get(td, &tod, &error_abort);
+    return tod.low;
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+/* SCLP service call */
+uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
+{
+    qemu_mutex_lock_iothread();
+    int r = sclp_service_call(env, r1, r2);
+    qemu_mutex_unlock_iothread();
+    if (r < 0) {
+        tcg_s390_program_interrupt(env, -r, GETPC());
+    }
+    return r;
+}
+
+void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num)
+{
+    uint64_t r;
+
+    switch (num) {
+    case 0x500:
+        /* KVM hypercall */
+        qemu_mutex_lock_iothread();
+        r = s390_virtio_hypercall(env);
+        qemu_mutex_unlock_iothread();
+        break;
+    case 0x44:
+        /* yield */
+        r = 0;
+        break;
+    case 0x308:
+        /* ipl */
+        qemu_mutex_lock_iothread();
+        handle_diag_308(env, r1, r3, GETPC());
+        qemu_mutex_unlock_iothread();
+        r = 0;
+        break;
+    case 0x288:
+        /* time bomb (watchdog) */
+        r = handle_diag_288(env, r1, r3);
+        break;
+    default:
+        r = -1;
+        break;
+    }
+
+    if (r) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+}
+
+/* Set Prefix */
+void HELPER(spx)(CPUS390XState *env, uint64_t a1)
+{
+    const uint32_t prefix = a1 & 0x7fffe000;
+    const uint32_t old_prefix = env->psa;
+    CPUState *cs = env_cpu(env);
+
+    if (prefix == old_prefix) {
+        return;
+    }
+
+    env->psa = prefix;
+    HELPER_LOG("prefix: %#x\n", prefix);
+    tlb_flush_page(cs, 0);
+    tlb_flush_page(cs, TARGET_PAGE_SIZE);
+    if (prefix != 0) {
+        tlb_flush_page(cs, prefix);
+        tlb_flush_page(cs, prefix + TARGET_PAGE_SIZE);
+    }
+    if (old_prefix != 0) {
+        tlb_flush_page(cs, old_prefix);
+        tlb_flush_page(cs, old_prefix + TARGET_PAGE_SIZE);
+    }
+}
+
+static void update_ckc_timer(CPUS390XState *env)
+{
+    S390TODState *td = s390_get_todstate();
+    uint64_t time;
+
+    /* stop the timer and remove pending CKC IRQs */
+    timer_del(env->tod_timer);
+    g_assert(qemu_mutex_iothread_locked());
+    env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR;
+
+    /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */
+    if (env->ckc == -1ULL) {
+        return;
+    }
+
+    /* difference between origins */
+    time = env->ckc - td->base.low;
+
+    /* nanoseconds */
+    time = tod2time(time);
+
+    timer_mod(env->tod_timer, time);
+}
+
+/* Set Clock Comparator */
+void HELPER(sckc)(CPUS390XState *env, uint64_t ckc)
+{
+    env->ckc = ckc;
+
+    qemu_mutex_lock_iothread();
+    update_ckc_timer(env);
+    qemu_mutex_unlock_iothread();
+}
+
+void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    update_ckc_timer(&cpu->env);
+}
+
+/* Set Clock */
+uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low)
+{
+    S390TODState *td = s390_get_todstate();
+    S390TODClass *tdc = S390_TOD_GET_CLASS(td);
+    S390TOD tod = {
+        .high = 0,
+        .low = tod_low,
+    };
+
+    qemu_mutex_lock_iothread();
+    tdc->set(td, &tod, &error_abort);
+    qemu_mutex_unlock_iothread();
+    return 0;
+}
+
+/* Set Tod Programmable Field */
+void HELPER(sckpf)(CPUS390XState *env, uint64_t r0)
+{
+    uint32_t val = r0;
+
+    if (val & 0xffff0000) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
+    }
+    env->todpr = val;
+}
+
+/* Store Clock Comparator */
+uint64_t HELPER(stckc)(CPUS390XState *env)
+{
+    return env->ckc;
+}
+
+/* Set CPU Timer */
+void HELPER(spt)(CPUS390XState *env, uint64_t time)
+{
+    if (time == -1ULL) {
+        return;
+    }
+
+    /* nanoseconds */
+    time = tod2time(time);
+
+    env->cputm = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + time;
+
+    timer_mod(env->cpu_timer, env->cputm);
+}
+
+/* Store System Information */
+uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1)
+{
+    const uintptr_t ra = GETPC();
+    const uint32_t sel1 = r0 & STSI_R0_SEL1_MASK;
+    const uint32_t sel2 = r1 & STSI_R1_SEL2_MASK;
+    const MachineState *ms = MACHINE(qdev_get_machine());
+    uint16_t total_cpus = 0, conf_cpus = 0, reserved_cpus = 0;
+    S390CPU *cpu = env_archcpu(env);
+    SysIB sysib = { };
+    int i, cc = 0;
+
+    if ((r0 & STSI_R0_FC_MASK) > STSI_R0_FC_LEVEL_3) {
+        /* invalid function code: no other checks are performed */
+        return 3;
+    }
+
+    if ((r0 & STSI_R0_RESERVED_MASK) || (r1 & STSI_R1_RESERVED_MASK)) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    if ((r0 & STSI_R0_FC_MASK) == STSI_R0_FC_CURRENT) {
+        /* query the current level: no further checks are performed */
+        env->regs[0] = STSI_R0_FC_LEVEL_3;
+        return 0;
+    }
+
+    if (a0 & ~TARGET_PAGE_MASK) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    /* count the cpus and split them into configured and reserved ones */
+    for (i = 0; i < ms->possible_cpus->len; i++) {
+        total_cpus++;
+        if (ms->possible_cpus->cpus[i].cpu) {
+            conf_cpus++;
+        } else {
+            reserved_cpus++;
+        }
+    }
+
+    /*
+     * In theory, we could report Level 1 / Level 2 as current. However,
+     * the Linux kernel will detect this as running under LPAR and assume
+     * that we have a sclp linemode console (which is always present on
+     * LPAR, but not the default for QEMU), therefore not displaying boot
+     * messages and making booting a Linux kernel under TCG harder.
+     *
+     * For now we fake the same SMP configuration on all levels.
+     *
+     * TODO: We could later make the level configurable via the machine
+     *       and change defaults (linemode console) based on machine type
+     *       and accelerator.
+     */
+    switch (r0 & STSI_R0_FC_MASK) {
+    case STSI_R0_FC_LEVEL_1:
+        if ((sel1 == 1) && (sel2 == 1)) {
+            /* Basic Machine Configuration */
+            char type[5] = {};
+
+            ebcdic_put(sysib.sysib_111.manuf, "QEMU            ", 16);
+            /* same as machine type number in STORE CPU ID, but in EBCDIC */
+            snprintf(type, ARRAY_SIZE(type), "%X", cpu->model->def->type);
+            ebcdic_put(sysib.sysib_111.type, type, 4);
+            /* model number (not stored in STORE CPU ID for z/Architecure) */
+            ebcdic_put(sysib.sysib_111.model, "QEMU            ", 16);
+            ebcdic_put(sysib.sysib_111.sequence, "QEMU            ", 16);
+            ebcdic_put(sysib.sysib_111.plant, "QEMU", 4);
+        } else if ((sel1 == 2) && (sel2 == 1)) {
+            /* Basic Machine CPU */
+            ebcdic_put(sysib.sysib_121.sequence, "QEMUQEMUQEMUQEMU", 16);
+            ebcdic_put(sysib.sysib_121.plant, "QEMU", 4);
+            sysib.sysib_121.cpu_addr = cpu_to_be16(env->core_id);
+        } else if ((sel1 == 2) && (sel2 == 2)) {
+            /* Basic Machine CPUs */
+            sysib.sysib_122.capability = cpu_to_be32(0x443afc29);
+            sysib.sysib_122.total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_122.conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_122.reserved_cpus = cpu_to_be16(reserved_cpus);
+        } else {
+            cc = 3;
+        }
+        break;
+    case STSI_R0_FC_LEVEL_2:
+        if ((sel1 == 2) && (sel2 == 1)) {
+            /* LPAR CPU */
+            ebcdic_put(sysib.sysib_221.sequence, "QEMUQEMUQEMUQEMU", 16);
+            ebcdic_put(sysib.sysib_221.plant, "QEMU", 4);
+            sysib.sysib_221.cpu_addr = cpu_to_be16(env->core_id);
+        } else if ((sel1 == 2) && (sel2 == 2)) {
+            /* LPAR CPUs */
+            sysib.sysib_222.lcpuc = 0x80; /* dedicated */
+            sysib.sysib_222.total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_222.conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_222.reserved_cpus = cpu_to_be16(reserved_cpus);
+            ebcdic_put(sysib.sysib_222.name, "QEMU    ", 8);
+            sysib.sysib_222.caf = cpu_to_be32(1000);
+            sysib.sysib_222.dedicated_cpus = cpu_to_be16(conf_cpus);
+        } else {
+            cc = 3;
+        }
+        break;
+    case STSI_R0_FC_LEVEL_3:
+        if ((sel1 == 2) && (sel2 == 2)) {
+            /* VM CPUs */
+            sysib.sysib_322.count = 1;
+            sysib.sysib_322.vm[0].total_cpus = cpu_to_be16(total_cpus);
+            sysib.sysib_322.vm[0].conf_cpus = cpu_to_be16(conf_cpus);
+            sysib.sysib_322.vm[0].reserved_cpus = cpu_to_be16(reserved_cpus);
+            sysib.sysib_322.vm[0].caf = cpu_to_be32(1000);
+            /* Linux kernel uses this to distinguish us from z/VM */
+            ebcdic_put(sysib.sysib_322.vm[0].cpi, "KVM/Linux       ", 16);
+            sysib.sysib_322.vm[0].ext_name_encoding = 2; /* UTF-8 */
+
+            /* If our VM has a name, use the real name */
+            if (qemu_name) {
+                memset(sysib.sysib_322.vm[0].name, 0x40,
+                       sizeof(sysib.sysib_322.vm[0].name));
+                ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name,
+                           MIN(sizeof(sysib.sysib_322.vm[0].name),
+                               strlen(qemu_name)));
+                strpadcpy((char *)sysib.sysib_322.ext_names[0],
+                          sizeof(sysib.sysib_322.ext_names[0]),
+                          qemu_name, '\0');
+
+            } else {
+                ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8);
+                strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest");
+            }
+
+            /* add the uuid */
+            memcpy(sysib.sysib_322.vm[0].uuid, &qemu_uuid,
+                   sizeof(sysib.sysib_322.vm[0].uuid));
+        } else {
+            cc = 3;
+        }
+        break;
+    }
+
+    if (cc == 0) {
+        if (s390_cpu_virt_mem_write(cpu, a0, 0, &sysib, sizeof(sysib))) {
+            s390_cpu_virt_mem_handle_exc(cpu, ra);
+        }
+    }
+
+    return cc;
+}
+
+uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
+                      uint32_t r3)
+{
+    int cc;
+
+    /* TODO: needed to inject interrupts  - push further down */
+    qemu_mutex_lock_iothread();
+    cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3);
+    qemu_mutex_unlock_iothread();
+
+    return cc;
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(xsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_xsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(csch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_csch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(hsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_hsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_msch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(rchp)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_rchp(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(rsch)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_rsch(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(sal)(CPUS390XState *env, uint64_t r1)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_sal(cpu, r1, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stcrw)(CPUS390XState *env, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    ioinst_handle_stcrw(cpu, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr)
+{
+    const uintptr_t ra = GETPC();
+    S390CPU *cpu = env_archcpu(env);
+    QEMUS390FLICState *flic = s390_get_qemu_flic(s390_get_flic());
+    QEMUS390FlicIO *io = NULL;
+    LowCore *lowcore;
+
+    if (addr & 0x3) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    qemu_mutex_lock_iothread();
+    io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]);
+    if (!io) {
+        qemu_mutex_unlock_iothread();
+        return 0;
+    }
+
+    if (addr) {
+        struct {
+            uint16_t id;
+            uint16_t nr;
+            uint32_t parm;
+        } intc = {
+            .id = cpu_to_be16(io->id),
+            .nr = cpu_to_be16(io->nr),
+            .parm = cpu_to_be32(io->parm),
+        };
+
+        if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) {
+            /* writing failed, reinject and properly clean up */
+            s390_io_interrupt(io->id, io->nr, io->parm, io->word);
+            qemu_mutex_unlock_iothread();
+            g_free(io);
+            s390_cpu_virt_mem_handle_exc(cpu, ra);
+            return 0;
+        }
+    } else {
+        /* no protection applies */
+        lowcore = cpu_map_lowcore(env);
+        lowcore->subchannel_id = cpu_to_be16(io->id);
+        lowcore->subchannel_nr = cpu_to_be16(io->nr);
+        lowcore->io_int_parm = cpu_to_be32(io->parm);
+        lowcore->io_int_word = cpu_to_be32(io->word);
+        cpu_unmap_lowcore(lowcore);
+    }
+
+    g_free(io);
+    qemu_mutex_unlock_iothread();
+    return 1;
+}
+
+void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(chsc)(CPUS390XState *env, uint64_t inst)
+{
+    S390CPU *cpu = env_archcpu(env);
+    qemu_mutex_lock_iothread();
+    ioinst_handle_chsc(cpu, inst >> 16, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(per_check_exception)(CPUS390XState *env)
+{
+    if (env->per_perc_atmid) {
+        tcg_s390_program_interrupt(env, PGM_PER, GETPC());
+    }
+}
+
+/* Check if an address is within the PER starting address and the PER
+   ending address.  The address range might loop.  */
+static inline bool get_per_in_range(CPUS390XState *env, uint64_t addr)
+{
+    if (env->cregs[10] <= env->cregs[11]) {
+        return env->cregs[10] <= addr && addr <= env->cregs[11];
+    } else {
+        return env->cregs[10] <= addr || addr <= env->cregs[11];
+    }
+}
+
+void HELPER(per_branch)(CPUS390XState *env, uint64_t from, uint64_t to)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_BRANCH)) {
+        if (!(env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS)
+            || get_per_in_range(env, to)) {
+            env->per_address = from;
+            env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env);
+        }
+    }
+}
+
+void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_IFETCH) && get_per_in_range(env, addr)) {
+        env->per_address = addr;
+        env->per_perc_atmid = PER_CODE_EVENT_IFETCH | get_per_atmid(env);
+
+        /* If the instruction has to be nullified, trigger the
+           exception immediately. */
+        if (env->cregs[9] & PER_CR9_EVENT_NULLIFICATION) {
+            CPUState *cs = env_cpu(env);
+
+            env->per_perc_atmid |= PER_CODE_EVENT_NULLIFICATION;
+            env->int_pgm_code = PGM_PER;
+            env->int_pgm_ilen = get_ilen(cpu_ldub_code(env, addr));
+
+            cs->exception_index = EXCP_PGM;
+            cpu_loop_exit(cs);
+        }
+    }
+}
+
+void HELPER(per_store_real)(CPUS390XState *env)
+{
+    if ((env->cregs[9] & PER_CR9_EVENT_STORE) &&
+        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
+        /* PSW is saved just before calling the helper.  */
+        env->per_address = env->psw.addr;
+        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
+    }
+}
+#endif
+
+static uint8_t stfl_bytes[2048];
+static unsigned int used_stfl_bytes;
+
+static void prepare_stfl(void)
+{
+    static bool initialized;
+    int i;
+
+    /* racy, but we don't care, the same values are always written */
+    if (initialized) {
+        return;
+    }
+
+    s390_get_feat_block(S390_FEAT_TYPE_STFL, stfl_bytes);
+    for (i = 0; i < sizeof(stfl_bytes); i++) {
+        if (stfl_bytes[i]) {
+            used_stfl_bytes = i + 1;
+        }
+    }
+    initialized = true;
+}
+
+#ifndef CONFIG_USER_ONLY
+void HELPER(stfl)(CPUS390XState *env)
+{
+    LowCore *lowcore;
+
+    lowcore = cpu_map_lowcore(env);
+    prepare_stfl();
+    memcpy(&lowcore->stfl_fac_list, stfl_bytes, sizeof(lowcore->stfl_fac_list));
+    cpu_unmap_lowcore(lowcore);
+}
+#endif
+
+uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr)
+{
+    const uintptr_t ra = GETPC();
+    const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8;
+    int max_bytes;
+    int i;
+
+    if (addr & 0x7) {
+        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+    }
+
+    prepare_stfl();
+    max_bytes = ROUND_UP(used_stfl_bytes, 8);
+
+    /*
+     * The PoP says that doublewords beyond the highest-numbered facility
+     * bit may or may not be stored.  However, existing hardware appears to
+     * not store the words, and existing software depend on that.
+     */
+    for (i = 0; i < MIN(count_bytes, max_bytes); ++i) {
+        cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra);
+    }
+
+    env->regs[0] = deposit64(env->regs[0], 0, 8, (max_bytes / 8) - 1);
+    return count_bytes >= max_bytes ? 0 : 3;
+}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * Note: we ignore any return code of the functions called for the pci
+ * instructions, as the only time they return !0 is when the stub is
+ * called, and in that case we didn't even offer the zpci facility.
+ * The only exception is SIC, where program checks need to be handled
+ * by the caller.
+ */
+void HELPER(clp)(CPUS390XState *env, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    clp_service_call(cpu, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcilg_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcistg_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+                     uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    stpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3)
+{
+    int r;
+
+    qemu_mutex_lock_iothread();
+    r = css_do_sic(env, (r3 >> 27) & 0x7, r1 & 0xffff);
+    qemu_mutex_unlock_iothread();
+    /* css_do_sic() may actually return a PGM_xxx value to inject */
+    if (r) {
+        tcg_s390_program_interrupt(env, -r, GETPC());
+    }
+}
+
+void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    rpcit_service_call(cpu, r1, r2, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3,
+                    uint64_t gaddr, uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+
+void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba,
+                    uint32_t ar)
+{
+    S390CPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+    mpcifc_service_call(cpu, r1, fiba, ar, GETPC());
+    qemu_mutex_unlock_iothread();
+}
+#endif
diff --git a/target/s390x/s390-tod.h b/target/s390x/tcg/s390-tod.h
similarity index 100%
rename from target/s390x/s390-tod.h
rename to target/s390x/tcg/s390-tod.h
diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg/tcg_s390x.h
similarity index 100%
rename from target/s390x/tcg_s390x.h
rename to target/s390x/tcg/tcg_s390x.h
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
new file mode 100644
index 00000000000..dcc249a197c
--- /dev/null
+++ b/target/s390x/tcg/translate.c
@@ -0,0 +1,6652 @@
+/*
+ *  S/390 translation
+ *
+ *  Copyright (c) 2009 Ulrich Hecht
+ *  Copyright (c) 2010 Alexander Graf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* #define DEBUG_INLINE_BRANCHES */
+#define S390X_DEBUG_DISAS
+/* #define S390X_DEBUG_DISAS_VERBOSE */
+
+#ifdef S390X_DEBUG_DISAS_VERBOSE
+#  define LOG_DISAS(...) qemu_log(__VA_ARGS__)
+#else
+#  define LOG_DISAS(...) do { } while (0)
+#endif
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "qemu/log.h"
+#include "qemu/host-utils.h"
+#include "exec/cpu_ldst.h"
+#include "exec/gen-icount.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+
+#include "exec/translator.h"
+#include "exec/log.h"
+#include "qemu/atomic128.h"
+
+
+/* Information that (most) every instruction needs to manipulate.  */
+typedef struct DisasContext DisasContext;
+typedef struct DisasInsn DisasInsn;
+typedef struct DisasFields DisasFields;
+
+/*
+ * Define a structure to hold the decoded fields.  We'll store each inside
+ * an array indexed by an enum.  In order to conserve memory, we'll arrange
+ * for fields that do not exist at the same time to overlap, thus the "C"
+ * for compact.  For checking purposes there is an "O" for original index
+ * as well that will be applied to availability bitmaps.
+ */
+
+enum DisasFieldIndexO {
+    FLD_O_r1,
+    FLD_O_r2,
+    FLD_O_r3,
+    FLD_O_m1,
+    FLD_O_m3,
+    FLD_O_m4,
+    FLD_O_m5,
+    FLD_O_m6,
+    FLD_O_b1,
+    FLD_O_b2,
+    FLD_O_b4,
+    FLD_O_d1,
+    FLD_O_d2,
+    FLD_O_d4,
+    FLD_O_x2,
+    FLD_O_l1,
+    FLD_O_l2,
+    FLD_O_i1,
+    FLD_O_i2,
+    FLD_O_i3,
+    FLD_O_i4,
+    FLD_O_i5,
+    FLD_O_v1,
+    FLD_O_v2,
+    FLD_O_v3,
+    FLD_O_v4,
+};
+
+enum DisasFieldIndexC {
+    FLD_C_r1 = 0,
+    FLD_C_m1 = 0,
+    FLD_C_b1 = 0,
+    FLD_C_i1 = 0,
+    FLD_C_v1 = 0,
+
+    FLD_C_r2 = 1,
+    FLD_C_b2 = 1,
+    FLD_C_i2 = 1,
+
+    FLD_C_r3 = 2,
+    FLD_C_m3 = 2,
+    FLD_C_i3 = 2,
+    FLD_C_v3 = 2,
+
+    FLD_C_m4 = 3,
+    FLD_C_b4 = 3,
+    FLD_C_i4 = 3,
+    FLD_C_l1 = 3,
+    FLD_C_v4 = 3,
+
+    FLD_C_i5 = 4,
+    FLD_C_d1 = 4,
+    FLD_C_m5 = 4,
+
+    FLD_C_d2 = 5,
+    FLD_C_m6 = 5,
+
+    FLD_C_d4 = 6,
+    FLD_C_x2 = 6,
+    FLD_C_l2 = 6,
+    FLD_C_v2 = 6,
+
+    NUM_C_FIELD = 7
+};
+
+struct DisasFields {
+    uint64_t raw_insn;
+    unsigned op:8;
+    unsigned op2:8;
+    unsigned presentC:16;
+    unsigned int presentO;
+    int c[NUM_C_FIELD];
+};
+
+struct DisasContext {
+    DisasContextBase base;
+    const DisasInsn *insn;
+    TCGOp *insn_start;
+    DisasFields fields;
+    uint64_t ex_value;
+    /*
+     * During translate_one(), pc_tmp is used to determine the instruction
+     * to be executed after base.pc_next - e.g. next sequential instruction
+     * or a branch target.
+     */
+    uint64_t pc_tmp;
+    uint32_t ilen;
+    enum cc_op cc_op;
+};
+
+/* Information carried about a condition to be evaluated.  */
+typedef struct {
+    TCGCond cond:8;
+    bool is_64;
+    bool g1;
+    bool g2;
+    union {
+        struct { TCGv_i64 a, b; } s64;
+        struct { TCGv_i32 a, b; } s32;
+    } u;
+} DisasCompare;
+
+#ifdef DEBUG_INLINE_BRANCHES
+static uint64_t inline_branch_hit[CC_OP_MAX];
+static uint64_t inline_branch_miss[CC_OP_MAX];
+#endif
+
+static void pc_to_link_info(TCGv_i64 out, DisasContext *s, uint64_t pc)
+{
+    TCGv_i64 tmp;
+
+    if (s->base.tb->flags & FLAG_MASK_32) {
+        if (s->base.tb->flags & FLAG_MASK_64) {
+            tcg_gen_movi_i64(out, pc);
+            return;
+        }
+        pc |= 0x80000000;
+    }
+    assert(!(s->base.tb->flags & FLAG_MASK_64));
+    tmp = tcg_const_i64(pc);
+    tcg_gen_deposit_i64(out, out, tmp, 0, 32);
+    tcg_temp_free_i64(tmp);
+}
+
+static TCGv_i64 psw_addr;
+static TCGv_i64 psw_mask;
+static TCGv_i64 gbea;
+
+static TCGv_i32 cc_op;
+static TCGv_i64 cc_src;
+static TCGv_i64 cc_dst;
+static TCGv_i64 cc_vr;
+
+static char cpu_reg_names[16][4];
+static TCGv_i64 regs[16];
+
+void s390x_translate_init(void)
+{
+    int i;
+
+    psw_addr = tcg_global_mem_new_i64(cpu_env,
+                                      offsetof(CPUS390XState, psw.addr),
+                                      "psw_addr");
+    psw_mask = tcg_global_mem_new_i64(cpu_env,
+                                      offsetof(CPUS390XState, psw.mask),
+                                      "psw_mask");
+    gbea = tcg_global_mem_new_i64(cpu_env,
+                                  offsetof(CPUS390XState, gbea),
+                                  "gbea");
+
+    cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op),
+                                   "cc_op");
+    cc_src = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_src),
+                                    "cc_src");
+    cc_dst = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_dst),
+                                    "cc_dst");
+    cc_vr = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_vr),
+                                   "cc_vr");
+
+    for (i = 0; i < 16; i++) {
+        snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
+        regs[i] = tcg_global_mem_new(cpu_env,
+                                     offsetof(CPUS390XState, regs[i]),
+                                     cpu_reg_names[i]);
+    }
+}
+
+static inline int vec_full_reg_offset(uint8_t reg)
+{
+    g_assert(reg < 32);
+    return offsetof(CPUS390XState, vregs[reg][0]);
+}
+
+static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es)
+{
+    /* Convert element size (es) - e.g. MO_8 - to bytes */
+    const uint8_t bytes = 1 << es;
+    int offs = enr * bytes;
+
+    /*
+     * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte
+     * of the 16 byte vector, on both, little and big endian systems.
+     *
+     * Big Endian (target/possible host)
+     * B:  [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15]
+     * HW: [     0][     1][     2][     3] - [     4][     5][     6][     7]
+     * W:  [             0][             1] - [             2][             3]
+     * DW: [                             0] - [                             1]
+     *
+     * Little Endian (possible host)
+     * B:  [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8]
+     * HW: [     3][     2][     1][     0] - [     7][     6][     5][     4]
+     * W:  [             1][             0] - [             3][             2]
+     * DW: [                             0] - [                             1]
+     *
+     * For 16 byte elements, the two 8 byte halves will not form a host
+     * int128 if the host is little endian, since they're in the wrong order.
+     * Some operations (e.g. xor) do not care. For operations like addition,
+     * the two 8 byte elements have to be loaded separately. Let's force all
+     * 16 byte operations to handle it in a special way.
+     */
+    g_assert(es <= MO_64);
+#ifndef HOST_WORDS_BIGENDIAN
+    offs ^= (8 - bytes);
+#endif
+    return offs + vec_full_reg_offset(reg);
+}
+
+static inline int freg64_offset(uint8_t reg)
+{
+    g_assert(reg < 16);
+    return vec_reg_offset(reg, 0, MO_64);
+}
+
+static inline int freg32_offset(uint8_t reg)
+{
+    g_assert(reg < 16);
+    return vec_reg_offset(reg, 0, MO_32);
+}
+
+static TCGv_i64 load_reg(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+    tcg_gen_mov_i64(r, regs[reg]);
+    return r;
+}
+
+static TCGv_i64 load_freg(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+
+    tcg_gen_ld_i64(r, cpu_env, freg64_offset(reg));
+    return r;
+}
+
+static TCGv_i64 load_freg32_i64(int reg)
+{
+    TCGv_i64 r = tcg_temp_new_i64();
+
+    tcg_gen_ld32u_i64(r, cpu_env, freg32_offset(reg));
+    return r;
+}
+
+static void store_reg(int reg, TCGv_i64 v)
+{
+    tcg_gen_mov_i64(regs[reg], v);
+}
+
+static void store_freg(int reg, TCGv_i64 v)
+{
+    tcg_gen_st_i64(v, cpu_env, freg64_offset(reg));
+}
+
+static void store_reg32_i64(int reg, TCGv_i64 v)
+{
+    /* 32 bit register writes keep the upper half */
+    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
+}
+
+static void store_reg32h_i64(int reg, TCGv_i64 v)
+{
+    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32);
+}
+
+static void store_freg32_i64(int reg, TCGv_i64 v)
+{
+    tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
+}
+
+static void return_low128(TCGv_i64 dest)
+{
+    tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
+}
+
+static void update_psw_addr(DisasContext *s)
+{
+    /* psw.addr */
+    tcg_gen_movi_i64(psw_addr, s->base.pc_next);
+}
+
+static void per_branch(DisasContext *s, bool to_next)
+{
+#ifndef CONFIG_USER_ONLY
+    tcg_gen_movi_i64(gbea, s->base.pc_next);
+
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGv_i64 next_pc = to_next ? tcg_const_i64(s->pc_tmp) : psw_addr;
+        gen_helper_per_branch(cpu_env, gbea, next_pc);
+        if (to_next) {
+            tcg_temp_free_i64(next_pc);
+        }
+    }
+#endif
+}
+
+static void per_branch_cond(DisasContext *s, TCGCond cond,
+                            TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGLabel *lab = gen_new_label();
+        tcg_gen_brcond_i64(tcg_invert_cond(cond), arg1, arg2, lab);
+
+        tcg_gen_movi_i64(gbea, s->base.pc_next);
+        gen_helper_per_branch(cpu_env, gbea, psw_addr);
+
+        gen_set_label(lab);
+    } else {
+        TCGv_i64 pc = tcg_const_i64(s->base.pc_next);
+        tcg_gen_movcond_i64(cond, gbea, arg1, arg2, gbea, pc);
+        tcg_temp_free_i64(pc);
+    }
+#endif
+}
+
+static void per_breaking_event(DisasContext *s)
+{
+    tcg_gen_movi_i64(gbea, s->base.pc_next);
+}
+
+static void update_cc_op(DisasContext *s)
+{
+    if (s->cc_op != CC_OP_DYNAMIC && s->cc_op != CC_OP_STATIC) {
+        tcg_gen_movi_i32(cc_op, s->cc_op);
+    }
+}
+
+static inline uint64_t ld_code2(CPUS390XState *env, DisasContext *s,
+                                uint64_t pc)
+{
+    return (uint64_t)translator_lduw(env, &s->base, pc);
+}
+
+static inline uint64_t ld_code4(CPUS390XState *env, DisasContext *s,
+                                uint64_t pc)
+{
+    return (uint64_t)(uint32_t)translator_ldl(env, &s->base, pc);
+}
+
+static int get_mem_index(DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+    return MMU_USER_IDX;
+#else
+    if (!(s->base.tb->flags & FLAG_MASK_DAT)) {
+        return MMU_REAL_IDX;
+    }
+
+    switch (s->base.tb->flags & FLAG_MASK_ASC) {
+    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
+        return MMU_PRIMARY_IDX;
+    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
+        return MMU_SECONDARY_IDX;
+    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
+        return MMU_HOME_IDX;
+    default:
+        tcg_abort();
+        break;
+    }
+#endif
+}
+
+static void gen_exception(int excp)
+{
+    TCGv_i32 tmp = tcg_const_i32(excp);
+    gen_helper_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_program_exception(DisasContext *s, int code)
+{
+    TCGv_i32 tmp;
+
+    /* Remember what pgm exeption this was.  */
+    tmp = tcg_const_i32(code);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
+    tcg_temp_free_i32(tmp);
+
+    tmp = tcg_const_i32(s->ilen);
+    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen));
+    tcg_temp_free_i32(tmp);
+
+    /* update the psw */
+    update_psw_addr(s);
+
+    /* Save off cc.  */
+    update_cc_op(s);
+
+    /* Trigger exception.  */
+    gen_exception(EXCP_PGM);
+}
+
+static inline void gen_illegal_opcode(DisasContext *s)
+{
+    gen_program_exception(s, PGM_OPERATION);
+}
+
+static inline void gen_data_exception(uint8_t dxc)
+{
+    TCGv_i32 tmp = tcg_const_i32(dxc);
+    gen_helper_data_exception(cpu_env, tmp);
+    tcg_temp_free_i32(tmp);
+}
+
+static inline void gen_trap(DisasContext *s)
+{
+    /* Set DXC to 0xff */
+    gen_data_exception(0xff);
+}
+
+static void gen_addi_and_wrap_i64(DisasContext *s, TCGv_i64 dst, TCGv_i64 src,
+                                  int64_t imm)
+{
+    tcg_gen_addi_i64(dst, src, imm);
+    if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_andi_i64(dst, dst, 0x7fffffff);
+        } else {
+            tcg_gen_andi_i64(dst, dst, 0x00ffffff);
+        }
+    }
+}
+
+static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /*
+     * Note that d2 is limited to 20 bits, signed.  If we crop negative
+     * displacements early we create larger immedate addends.
+     */
+    if (b2 && x2) {
+        tcg_gen_add_i64(tmp, regs[b2], regs[x2]);
+        gen_addi_and_wrap_i64(s, tmp, tmp, d2);
+    } else if (b2) {
+        gen_addi_and_wrap_i64(s, tmp, regs[b2], d2);
+    } else if (x2) {
+        gen_addi_and_wrap_i64(s, tmp, regs[x2], d2);
+    } else if (!(s->base.tb->flags & FLAG_MASK_64)) {
+        if (s->base.tb->flags & FLAG_MASK_32) {
+            tcg_gen_movi_i64(tmp, d2 & 0x7fffffff);
+        } else {
+            tcg_gen_movi_i64(tmp, d2 & 0x00ffffff);
+        }
+    } else {
+        tcg_gen_movi_i64(tmp, d2);
+    }
+
+    return tmp;
+}
+
+static inline bool live_cc_data(DisasContext *s)
+{
+    return (s->cc_op != CC_OP_DYNAMIC
+            && s->cc_op != CC_OP_STATIC
+            && s->cc_op > 3);
+}
+
+static inline void gen_op_movi_cc(DisasContext *s, uint32_t val)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_dst);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    s->cc_op = CC_OP_CONST0 + val;
+}
+
+static void gen_op_update1_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 dst)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    tcg_gen_mov_i64(cc_dst, dst);
+    s->cc_op = op;
+}
+
+static void gen_op_update2_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
+                                  TCGv_i64 dst)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_vr);
+    }
+    tcg_gen_mov_i64(cc_src, src);
+    tcg_gen_mov_i64(cc_dst, dst);
+    s->cc_op = op;
+}
+
+static void gen_op_update3_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
+                                  TCGv_i64 dst, TCGv_i64 vr)
+{
+    tcg_gen_mov_i64(cc_src, src);
+    tcg_gen_mov_i64(cc_dst, dst);
+    tcg_gen_mov_i64(cc_vr, vr);
+    s->cc_op = op;
+}
+
+static void set_cc_nz_u64(DisasContext *s, TCGv_i64 val)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ, val);
+}
+
+/* CC value is in env->cc_op */
+static void set_cc_static(DisasContext *s)
+{
+    if (live_cc_data(s)) {
+        tcg_gen_discard_i64(cc_src);
+        tcg_gen_discard_i64(cc_dst);
+        tcg_gen_discard_i64(cc_vr);
+    }
+    s->cc_op = CC_OP_STATIC;
+}
+
+/* calculates cc into cc_op */
+static void gen_op_calc_cc(DisasContext *s)
+{
+    TCGv_i32 local_cc_op = NULL;
+    TCGv_i64 dummy = NULL;
+
+    switch (s->cc_op) {
+    default:
+        dummy = tcg_const_i64(0);
+        /* FALLTHRU */
+    case CC_OP_ADD_64:
+    case CC_OP_SUB_64:
+    case CC_OP_ADD_32:
+    case CC_OP_SUB_32:
+        local_cc_op = tcg_const_i32(s->cc_op);
+        break;
+    case CC_OP_CONST0:
+    case CC_OP_CONST1:
+    case CC_OP_CONST2:
+    case CC_OP_CONST3:
+    case CC_OP_STATIC:
+    case CC_OP_DYNAMIC:
+        break;
+    }
+
+    switch (s->cc_op) {
+    case CC_OP_CONST0:
+    case CC_OP_CONST1:
+    case CC_OP_CONST2:
+    case CC_OP_CONST3:
+        /* s->cc_op is the cc value */
+        tcg_gen_movi_i32(cc_op, s->cc_op - CC_OP_CONST0);
+        break;
+    case CC_OP_STATIC:
+        /* env->cc_op already is the cc value */
+        break;
+    case CC_OP_NZ:
+    case CC_OP_ABS_64:
+    case CC_OP_NABS_64:
+    case CC_OP_ABS_32:
+    case CC_OP_NABS_32:
+    case CC_OP_LTGT0_32:
+    case CC_OP_LTGT0_64:
+    case CC_OP_COMP_32:
+    case CC_OP_COMP_64:
+    case CC_OP_NZ_F32:
+    case CC_OP_NZ_F64:
+    case CC_OP_FLOGR:
+    case CC_OP_LCBB:
+    case CC_OP_MULS_32:
+        /* 1 argument */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
+        break;
+    case CC_OP_ADDU:
+    case CC_OP_ICM:
+    case CC_OP_LTGT_32:
+    case CC_OP_LTGT_64:
+    case CC_OP_LTUGTU_32:
+    case CC_OP_LTUGTU_64:
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+    case CC_OP_SLA_32:
+    case CC_OP_SLA_64:
+    case CC_OP_SUBU:
+    case CC_OP_NZ_F128:
+    case CC_OP_VC:
+    case CC_OP_MULS_64:
+        /* 2 arguments */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
+        break;
+    case CC_OP_ADD_64:
+    case CC_OP_SUB_64:
+    case CC_OP_ADD_32:
+    case CC_OP_SUB_32:
+        /* 3 arguments */
+        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
+        break;
+    case CC_OP_DYNAMIC:
+        /* unknown operation - assume 3 arguments and cc_op in env */
+        gen_helper_calc_cc(cc_op, cpu_env, cc_op, cc_src, cc_dst, cc_vr);
+        break;
+    default:
+        tcg_abort();
+    }
+
+    if (local_cc_op) {
+        tcg_temp_free_i32(local_cc_op);
+    }
+    if (dummy) {
+        tcg_temp_free_i64(dummy);
+    }
+
+    /* We now have cc in cc_op as constant */
+    set_cc_static(s);
+}
+
+static bool use_goto_tb(DisasContext *s, uint64_t dest)
+{
+    if (unlikely(s->base.tb->flags & FLAG_MASK_PER)) {
+        return false;
+    }
+    return translator_use_goto_tb(&s->base, dest);
+}
+
+static void account_noninline_branch(DisasContext *s, int cc_op)
+{
+#ifdef DEBUG_INLINE_BRANCHES
+    inline_branch_miss[cc_op]++;
+#endif
+}
+
+static void account_inline_branch(DisasContext *s, int cc_op)
+{
+#ifdef DEBUG_INLINE_BRANCHES
+    inline_branch_hit[cc_op]++;
+#endif
+}
+
+/* Table of mask values to comparison codes, given a comparison as input.
+   For such, CC=3 should not be possible.  */
+static const TCGCond ltgt_cond[16] = {
+    TCG_COND_NEVER,  TCG_COND_NEVER,     /*    |    |    | x */
+    TCG_COND_GT,     TCG_COND_GT,        /*    |    | GT | x */
+    TCG_COND_LT,     TCG_COND_LT,        /*    | LT |    | x */
+    TCG_COND_NE,     TCG_COND_NE,        /*    | LT | GT | x */
+    TCG_COND_EQ,     TCG_COND_EQ,        /* EQ |    |    | x */
+    TCG_COND_GE,     TCG_COND_GE,        /* EQ |    | GT | x */
+    TCG_COND_LE,     TCG_COND_LE,        /* EQ | LT |    | x */
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | LT | GT | x */
+};
+
+/* Table of mask values to comparison codes, given a logic op as input.
+   For such, only CC=0 and CC=1 should be possible.  */
+static const TCGCond nz_cond[16] = {
+    TCG_COND_NEVER, TCG_COND_NEVER,      /*    |    | x | x */
+    TCG_COND_NEVER, TCG_COND_NEVER,
+    TCG_COND_NE, TCG_COND_NE,            /*    | NE | x | x */
+    TCG_COND_NE, TCG_COND_NE,
+    TCG_COND_EQ, TCG_COND_EQ,            /* EQ |    | x | x */
+    TCG_COND_EQ, TCG_COND_EQ,
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | NE | x | x */
+    TCG_COND_ALWAYS, TCG_COND_ALWAYS,
+};
+
+/* Interpret MASK in terms of S->CC_OP, and fill in C with all the
+   details required to generate a TCG comparison.  */
+static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
+{
+    TCGCond cond;
+    enum cc_op old_cc_op = s->cc_op;
+
+    if (mask == 15 || mask == 0) {
+        c->cond = (mask ? TCG_COND_ALWAYS : TCG_COND_NEVER);
+        c->u.s32.a = cc_op;
+        c->u.s32.b = cc_op;
+        c->g1 = c->g2 = true;
+        c->is_64 = false;
+        return;
+    }
+
+    /* Find the TCG condition for the mask + cc op.  */
+    switch (old_cc_op) {
+    case CC_OP_LTGT0_32:
+    case CC_OP_LTGT0_64:
+    case CC_OP_LTGT_32:
+    case CC_OP_LTGT_64:
+        cond = ltgt_cond[mask];
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_LTUGTU_32:
+    case CC_OP_LTUGTU_64:
+        cond = tcg_unsigned_cond(ltgt_cond[mask]);
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_NZ:
+        cond = nz_cond[mask];
+        if (cond == TCG_COND_NEVER) {
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+        switch (mask) {
+        case 8:
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 2 | 1:
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_ICM:
+        switch (mask) {
+        case 8:
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 2 | 1:
+        case 4 | 2:
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_FLOGR:
+        switch (mask & 0xa) {
+        case 8: /* src == 0 -> no one bit found */
+            cond = TCG_COND_EQ;
+            break;
+        case 2: /* src != 0 -> one bit found */
+            cond = TCG_COND_NE;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    case CC_OP_ADDU:
+    case CC_OP_SUBU:
+        switch (mask) {
+        case 8 | 2: /* result == 0 */
+            cond = TCG_COND_EQ;
+            break;
+        case 4 | 1: /* result != 0 */
+            cond = TCG_COND_NE;
+            break;
+        case 8 | 4: /* !carry (borrow) */
+            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
+            break;
+        case 2 | 1: /* carry (!borrow) */
+            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
+            break;
+        default:
+            goto do_dynamic;
+        }
+        account_inline_branch(s, old_cc_op);
+        break;
+
+    default:
+    do_dynamic:
+        /* Calculate cc value.  */
+        gen_op_calc_cc(s);
+        /* FALLTHRU */
+
+    case CC_OP_STATIC:
+        /* Jump based on CC.  We'll load up the real cond below;
+           the assignment here merely avoids a compiler warning.  */
+        account_noninline_branch(s, old_cc_op);
+        old_cc_op = CC_OP_STATIC;
+        cond = TCG_COND_NEVER;
+        break;
+    }
+
+    /* Load up the arguments of the comparison.  */
+    c->is_64 = true;
+    c->g1 = c->g2 = false;
+    switch (old_cc_op) {
+    case CC_OP_LTGT0_32:
+        c->is_64 = false;
+        c->u.s32.a = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_dst);
+        c->u.s32.b = tcg_const_i32(0);
+        break;
+    case CC_OP_LTGT_32:
+    case CC_OP_LTUGTU_32:
+        c->is_64 = false;
+        c->u.s32.a = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
+        c->u.s32.b = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(c->u.s32.b, cc_dst);
+        break;
+
+    case CC_OP_LTGT0_64:
+    case CC_OP_NZ:
+    case CC_OP_FLOGR:
+        c->u.s64.a = cc_dst;
+        c->u.s64.b = tcg_const_i64(0);
+        c->g1 = true;
+        break;
+    case CC_OP_LTGT_64:
+    case CC_OP_LTUGTU_64:
+        c->u.s64.a = cc_src;
+        c->u.s64.b = cc_dst;
+        c->g1 = c->g2 = true;
+        break;
+
+    case CC_OP_TM_32:
+    case CC_OP_TM_64:
+    case CC_OP_ICM:
+        c->u.s64.a = tcg_temp_new_i64();
+        c->u.s64.b = tcg_const_i64(0);
+        tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
+        break;
+
+    case CC_OP_ADDU:
+    case CC_OP_SUBU:
+        c->is_64 = true;
+        c->u.s64.b = tcg_const_i64(0);
+        c->g1 = true;
+        switch (mask) {
+        case 8 | 2:
+        case 4 | 1: /* result */
+            c->u.s64.a = cc_dst;
+            break;
+        case 8 | 4:
+        case 2 | 1: /* carry */
+            c->u.s64.a = cc_src;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+
+    case CC_OP_STATIC:
+        c->is_64 = false;
+        c->u.s32.a = cc_op;
+        c->g1 = true;
+        switch (mask) {
+        case 0x8 | 0x4 | 0x2: /* cc != 3 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(3);
+            break;
+        case 0x8 | 0x4 | 0x1: /* cc != 2 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x8 | 0x2 | 0x1: /* cc != 1 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */
+            cond = TCG_COND_EQ;
+            c->g1 = false;
+            c->u.s32.a = tcg_temp_new_i32();
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
+            break;
+        case 0x8 | 0x4: /* cc < 2 */
+            cond = TCG_COND_LTU;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x8: /* cc == 0 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(0);
+            break;
+        case 0x4 | 0x2 | 0x1: /* cc != 0 */
+            cond = TCG_COND_NE;
+            c->u.s32.b = tcg_const_i32(0);
+            break;
+        case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */
+            cond = TCG_COND_NE;
+            c->g1 = false;
+            c->u.s32.a = tcg_temp_new_i32();
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
+            break;
+        case 0x4: /* cc == 1 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x2 | 0x1: /* cc > 1 */
+            cond = TCG_COND_GTU;
+            c->u.s32.b = tcg_const_i32(1);
+            break;
+        case 0x2: /* cc == 2 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(2);
+            break;
+        case 0x1: /* cc == 3 */
+            cond = TCG_COND_EQ;
+            c->u.s32.b = tcg_const_i32(3);
+            break;
+        default:
+            /* CC is masked by something else: (8 >> cc) & mask.  */
+            cond = TCG_COND_NE;
+            c->g1 = false;
+            c->u.s32.a = tcg_const_i32(8);
+            c->u.s32.b = tcg_const_i32(0);
+            tcg_gen_shr_i32(c->u.s32.a, c->u.s32.a, cc_op);
+            tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask);
+            break;
+        }
+        break;
+
+    default:
+        abort();
+    }
+    c->cond = cond;
+}
+
+static void free_compare(DisasCompare *c)
+{
+    if (!c->g1) {
+        if (c->is_64) {
+            tcg_temp_free_i64(c->u.s64.a);
+        } else {
+            tcg_temp_free_i32(c->u.s32.a);
+        }
+    }
+    if (!c->g2) {
+        if (c->is_64) {
+            tcg_temp_free_i64(c->u.s64.b);
+        } else {
+            tcg_temp_free_i32(c->u.s32.b);
+        }
+    }
+}
+
+/* ====================================================================== */
+/* Define the insn format enumeration.  */
+#define F0(N)                         FMT_##N,
+#define F1(N, X1)                     F0(N)
+#define F2(N, X1, X2)                 F0(N)
+#define F3(N, X1, X2, X3)             F0(N)
+#define F4(N, X1, X2, X3, X4)         F0(N)
+#define F5(N, X1, X2, X3, X4, X5)     F0(N)
+#define F6(N, X1, X2, X3, X4, X5, X6) F0(N)
+
+typedef enum {
+#include "insn-format.def"
+} DisasFormat;
+
+#undef F0
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+#undef F6
+
+/* This is the way fields are to be accessed out of DisasFields.  */
+#define have_field(S, F)  have_field1((S), FLD_O_##F)
+#define get_field(S, F)   get_field1((S), FLD_O_##F, FLD_C_##F)
+
+static bool have_field1(const DisasContext *s, enum DisasFieldIndexO c)
+{
+    return (s->fields.presentO >> c) & 1;
+}
+
+static int get_field1(const DisasContext *s, enum DisasFieldIndexO o,
+                      enum DisasFieldIndexC c)
+{
+    assert(have_field1(s, o));
+    return s->fields.c[c];
+}
+
+/* Describe the layout of each field in each format.  */
+typedef struct DisasField {
+    unsigned int beg:8;
+    unsigned int size:8;
+    unsigned int type:2;
+    unsigned int indexC:6;
+    enum DisasFieldIndexO indexO:8;
+} DisasField;
+
+typedef struct DisasFormatInfo {
+    DisasField op[NUM_C_FIELD];
+} DisasFormatInfo;
+
+#define R(N, B)       {  B,  4, 0, FLD_C_r##N, FLD_O_r##N }
+#define M(N, B)       {  B,  4, 0, FLD_C_m##N, FLD_O_m##N }
+#define V(N, B)       {  B,  4, 3, FLD_C_v##N, FLD_O_v##N }
+#define BD(N, BB, BD) { BB,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { BD, 12, 0, FLD_C_d##N, FLD_O_d##N }
+#define BXD(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
+                      { 20, 12, 0, FLD_C_d##N, FLD_O_d##N }
+#define BDL(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
+#define BXDL(N)       { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
+                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
+                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
+#define I(N, B, S)    {  B,  S, 1, FLD_C_i##N, FLD_O_i##N }
+#define L(N, B, S)    {  B,  S, 0, FLD_C_l##N, FLD_O_l##N }
+
+#define F0(N)                     { { } },
+#define F1(N, X1)                 { { X1 } },
+#define F2(N, X1, X2)             { { X1, X2 } },
+#define F3(N, X1, X2, X3)         { { X1, X2, X3 } },
+#define F4(N, X1, X2, X3, X4)     { { X1, X2, X3, X4 } },
+#define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } },
+#define F6(N, X1, X2, X3, X4, X5, X6)       { { X1, X2, X3, X4, X5, X6 } },
+
+static const DisasFormatInfo format_info[] = {
+#include "insn-format.def"
+};
+
+#undef F0
+#undef F1
+#undef F2
+#undef F3
+#undef F4
+#undef F5
+#undef F6
+#undef R
+#undef M
+#undef V
+#undef BD
+#undef BXD
+#undef BDL
+#undef BXDL
+#undef I
+#undef L
+
+/* Generally, we'll extract operands into this structures, operate upon
+   them, and store them back.  See the "in1", "in2", "prep", "wout" sets
+   of routines below for more details.  */
+typedef struct {
+    bool g_out, g_out2, g_in1, g_in2;
+    TCGv_i64 out, out2, in1, in2;
+    TCGv_i64 addr1;
+} DisasOps;
+
+/* Instructions can place constraints on their operands, raising specification
+   exceptions if they are violated.  To make this easy to automate, each "in1",
+   "in2", "prep", "wout" helper will have a SPEC_<name> define that equals one
+   of the following, or 0.  To make this easy to document, we'll put the
+   SPEC_<name> defines next to <name>.  */
+
+#define SPEC_r1_even    1
+#define SPEC_r2_even    2
+#define SPEC_r3_even    4
+#define SPEC_r1_f128    8
+#define SPEC_r2_f128    16
+
+/* Return values from translate_one, indicating the state of the TB.  */
+
+/* We are not using a goto_tb (for whatever reason), but have updated
+   the PC (for whatever reason), so there's no need to do it again on
+   exiting the TB.  */
+#define DISAS_PC_UPDATED        DISAS_TARGET_0
+
+/* We have emitted one or more goto_tb.  No fixup required.  */
+#define DISAS_GOTO_TB           DISAS_TARGET_1
+
+/* We have updated the PC and CC values.  */
+#define DISAS_PC_CC_UPDATED     DISAS_TARGET_2
+
+/* We are exiting the TB, but have neither emitted a goto_tb, nor
+   updated the PC for the next instruction to be executed.  */
+#define DISAS_PC_STALE          DISAS_TARGET_3
+
+/* We are exiting the TB to the main loop.  */
+#define DISAS_PC_STALE_NOCHAIN  DISAS_TARGET_4
+
+
+/* Instruction flags */
+#define IF_AFP1     0x0001      /* r1 is a fp reg for HFP/FPS instructions */
+#define IF_AFP2     0x0002      /* r2 is a fp reg for HFP/FPS instructions */
+#define IF_AFP3     0x0004      /* r3 is a fp reg for HFP/FPS instructions */
+#define IF_BFP      0x0008      /* binary floating point instruction */
+#define IF_DFP      0x0010      /* decimal floating point instruction */
+#define IF_PRIV     0x0020      /* privileged instruction */
+#define IF_VEC      0x0040      /* vector instruction */
+#define IF_IO       0x0080      /* input/output instruction */
+
+struct DisasInsn {
+    unsigned opc:16;
+    unsigned flags:16;
+    DisasFormat fmt:8;
+    unsigned fac:8;
+    unsigned spec:8;
+
+    const char *name;
+
+    /* Pre-process arguments before HELP_OP.  */
+    void (*help_in1)(DisasContext *, DisasOps *);
+    void (*help_in2)(DisasContext *, DisasOps *);
+    void (*help_prep)(DisasContext *, DisasOps *);
+
+    /*
+     * Post-process output after HELP_OP.
+     * Note that these are not called if HELP_OP returns DISAS_NORETURN.
+     */
+    void (*help_wout)(DisasContext *, DisasOps *);
+    void (*help_cout)(DisasContext *, DisasOps *);
+
+    /* Implement the operation itself.  */
+    DisasJumpType (*help_op)(DisasContext *, DisasOps *);
+
+    uint64_t data;
+};
+
+/* ====================================================================== */
+/* Miscellaneous helpers, used by several operations.  */
+
+static void help_l2_shift(DisasContext *s, DisasOps *o, int mask)
+{
+    int b2 = get_field(s, b2);
+    int d2 = get_field(s, d2);
+
+    if (b2 == 0) {
+        o->in2 = tcg_const_i64(d2 & mask);
+    } else {
+        o->in2 = get_address(s, 0, b2, d2);
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+    }
+}
+
+static DisasJumpType help_goto_direct(DisasContext *s, uint64_t dest)
+{
+    if (dest == s->pc_tmp) {
+        per_branch(s, true);
+        return DISAS_NEXT;
+    }
+    if (use_goto_tb(s, dest)) {
+        update_cc_op(s);
+        per_breaking_event(s);
+        tcg_gen_goto_tb(0);
+        tcg_gen_movi_i64(psw_addr, dest);
+        tcg_gen_exit_tb(s->base.tb, 0);
+        return DISAS_GOTO_TB;
+    } else {
+        tcg_gen_movi_i64(psw_addr, dest);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    }
+}
+
+static DisasJumpType help_branch(DisasContext *s, DisasCompare *c,
+                                 bool is_imm, int imm, TCGv_i64 cdest)
+{
+    DisasJumpType ret;
+    uint64_t dest = s->base.pc_next + 2 * imm;
+    TCGLabel *lab;
+
+    /* Take care of the special cases first.  */
+    if (c->cond == TCG_COND_NEVER) {
+        ret = DISAS_NEXT;
+        goto egress;
+    }
+    if (is_imm) {
+        if (dest == s->pc_tmp) {
+            /* Branch to next.  */
+            per_branch(s, true);
+            ret = DISAS_NEXT;
+            goto egress;
+        }
+        if (c->cond == TCG_COND_ALWAYS) {
+            ret = help_goto_direct(s, dest);
+            goto egress;
+        }
+    } else {
+        if (!cdest) {
+            /* E.g. bcr %r0 -> no branch.  */
+            ret = DISAS_NEXT;
+            goto egress;
+        }
+        if (c->cond == TCG_COND_ALWAYS) {
+            tcg_gen_mov_i64(psw_addr, cdest);
+            per_branch(s, false);
+            ret = DISAS_PC_UPDATED;
+            goto egress;
+        }
+    }
+
+    if (use_goto_tb(s, s->pc_tmp)) {
+        if (is_imm && use_goto_tb(s, dest)) {
+            /* Both exits can use goto_tb.  */
+            update_cc_op(s);
+
+            lab = gen_new_label();
+            if (c->is_64) {
+                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
+            } else {
+                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
+            }
+
+            /* Branch not taken.  */
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+            tcg_gen_exit_tb(s->base.tb, 0);
+
+            /* Branch taken.  */
+            gen_set_label(lab);
+            per_breaking_event(s);
+            tcg_gen_goto_tb(1);
+            tcg_gen_movi_i64(psw_addr, dest);
+            tcg_gen_exit_tb(s->base.tb, 1);
+
+            ret = DISAS_GOTO_TB;
+        } else {
+            /* Fallthru can use goto_tb, but taken branch cannot.  */
+            /* Store taken branch destination before the brcond.  This
+               avoids having to allocate a new local temp to hold it.
+               We'll overwrite this in the not taken case anyway.  */
+            if (!is_imm) {
+                tcg_gen_mov_i64(psw_addr, cdest);
+            }
+
+            lab = gen_new_label();
+            if (c->is_64) {
+                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
+            } else {
+                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
+            }
+
+            /* Branch not taken.  */
+            update_cc_op(s);
+            tcg_gen_goto_tb(0);
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+            tcg_gen_exit_tb(s->base.tb, 0);
+
+            gen_set_label(lab);
+            if (is_imm) {
+                tcg_gen_movi_i64(psw_addr, dest);
+            }
+            per_breaking_event(s);
+            ret = DISAS_PC_UPDATED;
+        }
+    } else {
+        /* Fallthru cannot use goto_tb.  This by itself is vanishingly rare.
+           Most commonly we're single-stepping or some other condition that
+           disables all use of goto_tb.  Just update the PC and exit.  */
+
+        TCGv_i64 next = tcg_const_i64(s->pc_tmp);
+        if (is_imm) {
+            cdest = tcg_const_i64(dest);
+        }
+
+        if (c->is_64) {
+            tcg_gen_movcond_i64(c->cond, psw_addr, c->u.s64.a, c->u.s64.b,
+                                cdest, next);
+            per_branch_cond(s, c->cond, c->u.s64.a, c->u.s64.b);
+        } else {
+            TCGv_i32 t0 = tcg_temp_new_i32();
+            TCGv_i64 t1 = tcg_temp_new_i64();
+            TCGv_i64 z = tcg_const_i64(0);
+            tcg_gen_setcond_i32(c->cond, t0, c->u.s32.a, c->u.s32.b);
+            tcg_gen_extu_i32_i64(t1, t0);
+            tcg_temp_free_i32(t0);
+            tcg_gen_movcond_i64(TCG_COND_NE, psw_addr, t1, z, cdest, next);
+            per_branch_cond(s, TCG_COND_NE, t1, z);
+            tcg_temp_free_i64(t1);
+            tcg_temp_free_i64(z);
+        }
+
+        if (is_imm) {
+            tcg_temp_free_i64(cdest);
+        }
+        tcg_temp_free_i64(next);
+
+        ret = DISAS_PC_UPDATED;
+    }
+
+ egress:
+    free_compare(c);
+    return ret;
+}
+
+/* ====================================================================== */
+/* The operations.  These perform the bulk of the work for any insn,
+   usually after the operands have been loaded and output initialized.  */
+
+static DisasJumpType op_abs(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_abs_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_absf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in1, 0x7fffffffffffffffull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_add(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+    return DISAS_NEXT;
+}
+
+/* Compute carry into cc_src. */
+static void compute_carry(DisasContext *s)
+{
+    switch (s->cc_op) {
+    case CC_OP_ADDU:
+        /* The carry value is already in cc_src (1,0). */
+        break;
+    case CC_OP_SUBU:
+        tcg_gen_addi_i64(cc_src, cc_src, 1);
+        break;
+    default:
+        gen_op_calc_cc(s);
+        /* fall through */
+    case CC_OP_STATIC:
+        /* The carry flag is the msb of CC; compute into cc_src. */
+        tcg_gen_extu_i32_i64(cc_src, cc_op);
+        tcg_gen_shri_i64(cc_src, cc_src, 1);
+        break;
+    }
+}
+
+static DisasJumpType op_addc32(DisasContext *s, DisasOps *o)
+{
+    compute_carry(s);
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    tcg_gen_add_i64(o->out, o->out, cc_src);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_addc64(DisasContext *s, DisasOps *o)
+{
+    compute_carry(s);
+
+    TCGv_i64 zero = tcg_const_i64(0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero);
+    tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+    tcg_temp_free_i64(zero);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
+{
+    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+    o->in1 = tcg_temp_new_i64();
+    if (non_atomic) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic addition in memory. */
+        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+
+    if (non_atomic) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o)
+{
+    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+    o->in1 = tcg_temp_new_i64();
+    if (non_atomic) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic addition in memory. */
+        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+
+    if (non_atomic) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_aeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_aeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_adb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_and(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_andi(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_ori_i64(o->in2, o->in2, ~mask);
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ni(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_and_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_bas(DisasContext *s, DisasOps *o)
+{
+    pc_to_link_info(o->out, s, s->pc_tmp);
+    if (o->in2) {
+        tcg_gen_mov_i64(psw_addr, o->in2);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    } else {
+        return DISAS_NEXT;
+    }
+}
+
+static void save_link_info(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t;
+
+    if (s->base.tb->flags & (FLAG_MASK_32 | FLAG_MASK_64)) {
+        pc_to_link_info(o->out, s, s->pc_tmp);
+        return;
+    }
+    gen_op_calc_cc(s);
+    tcg_gen_andi_i64(o->out, o->out, 0xffffffff00000000ull);
+    tcg_gen_ori_i64(o->out, o->out, ((s->ilen / 2) << 30) | s->pc_tmp);
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, psw_mask, 16);
+    tcg_gen_andi_i64(t, t, 0x0f000000);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_gen_extu_i32_i64(t, cc_op);
+    tcg_gen_shli_i64(t, t, 28);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_temp_free_i64(t);
+}
+
+static DisasJumpType op_bal(DisasContext *s, DisasOps *o)
+{
+    save_link_info(s, o);
+    if (o->in2) {
+        tcg_gen_mov_i64(psw_addr, o->in2);
+        per_branch(s, false);
+        return DISAS_PC_UPDATED;
+    } else {
+        return DISAS_NEXT;
+    }
+}
+
+static DisasJumpType op_basi(DisasContext *s, DisasOps *o)
+{
+    pc_to_link_info(o->out, s, s->pc_tmp);
+    return help_goto_direct(s, s->base.pc_next + 2 * get_field(s, i2));
+}
+
+static DisasJumpType op_bc(DisasContext *s, DisasOps *o)
+{
+    int m1 = get_field(s, m1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    /* BCR with R2 = 0 causes no branching */
+    if (have_field(s, r2) && get_field(s, r2) == 0) {
+        if (m1 == 14) {
+            /* Perform serialization */
+            /* FIXME: check for fast-BCR-serialization facility */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        if (m1 == 15) {
+            /* Perform serialization */
+            /* FIXME: perform checkpoint-synchronisation */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+        }
+        return DISAS_NEXT;
+    }
+
+    disas_jcc(s, &c, m1);
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bct32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_subi_i64(t, regs[r1], 1);
+    store_reg32_i64(r1, t);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_const_i32(0);
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bcth(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int imm = get_field(s, i2);
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, regs[r1], 32);
+    tcg_gen_subi_i64(t, t, 1);
+    store_reg32h_i64(r1, t);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_const_i32(0);
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, 1, imm, o->in2);
+}
+
+static DisasJumpType op_bct64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    c.cond = TCG_COND_NE;
+    c.is_64 = true;
+    c.g1 = true;
+    c.g2 = false;
+
+    tcg_gen_subi_i64(regs[r1], regs[r1], 1);
+    c.u.s64.a = regs[r1];
+    c.u.s64.b = tcg_const_i64(0);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bx32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+    TCGv_i64 t;
+
+    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
+    c.is_64 = false;
+    c.g1 = false;
+    c.g2 = false;
+
+    t = tcg_temp_new_i64();
+    tcg_gen_add_i64(t, regs[r1], regs[r3]);
+    c.u.s32.a = tcg_temp_new_i32();
+    c.u.s32.b = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
+    tcg_gen_extrl_i64_i32(c.u.s32.b, regs[r3 | 1]);
+    store_reg32_i64(r1, t);
+    tcg_temp_free_i64(t);
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_bx64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    bool is_imm = have_field(s, i2);
+    int imm = is_imm ? get_field(s, i2) : 0;
+    DisasCompare c;
+
+    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
+    c.is_64 = true;
+
+    if (r1 == (r3 | 1)) {
+        c.u.s64.b = load_reg(r3 | 1);
+        c.g2 = false;
+    } else {
+        c.u.s64.b = regs[r3 | 1];
+        c.g2 = true;
+    }
+
+    tcg_gen_add_i64(regs[r1], regs[r1], regs[r3]);
+    c.u.s64.a = regs[r1];
+    c.g1 = true;
+
+    return help_branch(s, &c, is_imm, imm, o->in2);
+}
+
+static DisasJumpType op_cj(DisasContext *s, DisasOps *o)
+{
+    int imm, m3 = get_field(s, m3);
+    bool is_imm;
+    DisasCompare c;
+
+    c.cond = ltgt_cond[m3];
+    if (s->insn->data) {
+        c.cond = tcg_unsigned_cond(c.cond);
+    }
+    c.is_64 = c.g1 = c.g2 = true;
+    c.u.s64.a = o->in1;
+    c.u.s64.b = o->in2;
+
+    is_imm = have_field(s, i4);
+    if (is_imm) {
+        imm = get_field(s, i4);
+    } else {
+        imm = 0;
+        o->out = get_address(s, 0, get_field(s, b4),
+                             get_field(s, d4));
+    }
+
+    return help_branch(s, &c, is_imm, imm, o->out);
+}
+
+static DisasJumpType op_ceb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ceb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_cdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static TCGv_i32 fpinst_extract_m34(DisasContext *s, bool m3_with_fpe,
+                                   bool m4_with_fpe)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m3 = get_field(s, m3);
+    uint8_t m4 = get_field(s, m4);
+
+    /* m3 field was introduced with FPE */
+    if (!fpe && m3_with_fpe) {
+        m3 = 0;
+    }
+    /* m4 field was introduced with FPE */
+    if (!fpe && m4_with_fpe) {
+        m4 = 0;
+    }
+
+    /* Check for valid rounding modes. Mode 3 was introduced later. */
+    if (m3 == 2 || m3 > 7 || (!fpe && m3 == 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return NULL;
+    }
+
+    return tcg_const_i32(deposit32(m3, 4, 4, m4));
+}
+
+static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgeb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgdb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cegb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cegb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_celgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_celgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdlgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cdlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cksm(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    TCGv_i64 len = tcg_temp_new_i64();
+
+    gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]);
+    set_cc_static(s);
+    return_low128(o->out);
+
+    tcg_gen_add_i64(regs[r2], regs[r2], len);
+    tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len);
+    tcg_temp_free_i64(len);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clc(DisasContext *s, DisasOps *o)
+{
+    int l = get_field(s, l1);
+    TCGv_i32 vl;
+
+    switch (l + 1) {
+    case 1:
+        tcg_gen_qemu_ld8u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld8u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 2:
+        tcg_gen_qemu_ld16u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld16u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 4:
+        tcg_gen_qemu_ld32u(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld32u(cc_dst, o->in2, get_mem_index(s));
+        break;
+    case 8:
+        tcg_gen_qemu_ld64(cc_src, o->addr1, get_mem_index(s));
+        tcg_gen_qemu_ld64(cc_dst, o->in2, get_mem_index(s));
+        break;
+    default:
+        vl = tcg_const_i32(l);
+        gen_helper_clc(cc_op, cpu_env, vl, o->addr1, o->in2);
+        tcg_temp_free_i32(vl);
+        set_cc_static(s);
+        return DISAS_NEXT;
+    }
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clcl(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 t1, t2;
+
+    /* r1 and r2 must be even.  */
+    if (r1 & 1 || r2 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t2 = tcg_const_i32(r2);
+    gen_helper_clcl(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clcle(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_clcle(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clclu(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_clclu(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clm(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m3 = tcg_const_i32(get_field(s, m3));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, o->in1);
+    gen_helper_clm(cc_op, cpu_env, t1, m3, o->in2);
+    set_cc_static(s);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(m3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_clst(DisasContext *s, DisasOps *o)
+{
+    gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2);
+    set_cc_static(s);
+    return_low128(o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cps(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+    tcg_gen_andi_i64(t, o->in1, 0x8000000000000000ull);
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
+    tcg_gen_or_i64(o->out, o->out, t);
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
+{
+    int d2 = get_field(s, d2);
+    int b2 = get_field(s, b2);
+    TCGv_i64 addr, cc;
+
+    /* Note that in1 = R3 (new value) and
+       in2 = (zero-extended) R1 (expected value).  */
+
+    addr = get_address(s, 0, b2, d2);
+    tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1,
+                               get_mem_index(s), s->insn->data | MO_ALIGN);
+    tcg_temp_free_i64(addr);
+
+    /* Are the memory and expected values (un)equal?  Note that this setcond
+       produces the output CC value, thus the NE sense of the test.  */
+    cc = tcg_temp_new_i64();
+    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out);
+    tcg_gen_extrl_i64_i32(cc_op, cc);
+    tcg_temp_free_i64(cc);
+    set_cc_static(s);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    int d2 = get_field(s, d2);
+    int b2 = get_field(s, b2);
+    DisasJumpType ret = DISAS_NEXT;
+    TCGv_i64 addr;
+    TCGv_i32 t_r1, t_r3;
+
+    /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
+    addr = get_address(s, 0, b2, d2);
+    t_r1 = tcg_const_i32(r1);
+    t_r3 = tcg_const_i32(r3);
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
+    } else if (HAVE_CMPXCHG128) {
+        gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        ret = DISAS_NORETURN;
+    }
+    tcg_temp_free_i64(addr);
+    tcg_temp_free_i32(t_r1);
+    tcg_temp_free_i32(t_r3);
+
+    set_cc_static(s);
+    return ret;
+}
+
+static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    TCGv_i32 t_r3 = tcg_const_i32(r3);
+
+    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        gen_helper_csst_parallel(cc_op, cpu_env, t_r3, o->addr1, o->in2);
+    } else {
+        gen_helper_csst(cc_op, cpu_env, t_r3, o->addr1, o->in2);
+    }
+    tcg_temp_free_i32(t_r3);
+
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_csp(DisasContext *s, DisasOps *o)
+{
+    MemOp mop = s->insn->data;
+    TCGv_i64 addr, old, cc;
+    TCGLabel *lab = gen_new_label();
+
+    /* Note that in1 = R1 (zero-extended expected value),
+       out = R1 (original reg), out2 = R1+1 (new value).  */
+
+    addr = tcg_temp_new_i64();
+    old = tcg_temp_new_i64();
+    tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE));
+    tcg_gen_atomic_cmpxchg_i64(old, addr, o->in1, o->out2,
+                               get_mem_index(s), mop | MO_ALIGN);
+    tcg_temp_free_i64(addr);
+
+    /* Are the memory and expected values (un)equal?  */
+    cc = tcg_temp_new_i64();
+    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in1, old);
+    tcg_gen_extrl_i64_i32(cc_op, cc);
+
+    /* Write back the output now, so that it happens before the
+       following branch, so that we don't need local temps.  */
+    if ((mop & MO_SIZE) == MO_32) {
+        tcg_gen_deposit_i64(o->out, o->out, old, 0, 32);
+    } else {
+        tcg_gen_mov_i64(o->out, old);
+    }
+    tcg_temp_free_i64(old);
+
+    /* If the comparison was equal, and the LSB of R2 was set,
+       then we need to flush the TLB (for all cpus).  */
+    tcg_gen_xori_i64(cc, cc, 1);
+    tcg_gen_and_i64(cc, cc, o->in2);
+    tcg_gen_brcondi_i64(TCG_COND_EQ, cc, 0, lab);
+    tcg_temp_free_i64(cc);
+
+    gen_helper_purge(cpu_env);
+    gen_set_label(lab);
+
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_cvd(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, o->in1);
+    gen_helper_cvd(t1, t2);
+    tcg_temp_free_i32(t2);
+    tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s));
+    tcg_temp_free_i64(t1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ct(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    TCGLabel *lab = gen_new_label();
+    TCGCond c;
+
+    c = tcg_invert_cond(ltgt_cond[m3]);
+    if (s->insn->data) {
+        c = tcg_unsigned_cond(c);
+    }
+    tcg_gen_brcond_i64(c, o->in1, o->in2, lab);
+
+    /* Trap.  */
+    gen_trap(s);
+
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_cuXX(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 tr1, tr2, chk;
+
+    /* R1 and R2 must both be even.  */
+    if ((r1 | r2) & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    if (!s390_has_feat(S390_FEAT_ETF3_ENH)) {
+        m3 = 0;
+    }
+
+    tr1 = tcg_const_i32(r1);
+    tr2 = tcg_const_i32(r2);
+    chk = tcg_const_i32(m3);
+
+    switch (s->insn->data) {
+    case 12:
+        gen_helper_cu12(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 14:
+        gen_helper_cu14(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 21:
+        gen_helper_cu21(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 24:
+        gen_helper_cu24(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 41:
+        gen_helper_cu41(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    case 42:
+        gen_helper_cu42(cc_op, cpu_env, tr1, tr2, chk);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    tcg_temp_free_i32(tr1);
+    tcg_temp_free_i32(tr2);
+    tcg_temp_free_i32(chk);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_diag(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    TCGv_i32 func_code = tcg_const_i32(get_field(s, i2));
+
+    gen_helper_diag(cpu_env, r1, r3, func_code);
+
+    tcg_temp_free_i32(func_code);
+    tcg_temp_free_i32(r3);
+    tcg_temp_free_i32(r1);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divs64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_divu64(DisasContext *s, DisasOps *o)
+{
+    gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_deb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_deb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ddb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, aregs[r2]));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ecag(DisasContext *s, DisasOps *o)
+{
+    /* No cache information provided.  */
+    tcg_gen_movi_i64(o->out, -1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_efpc(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_epsw(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    /* Note the "subsequently" in the PoO, which implies a defined result
+       if r1 == r2.  Thus we cannot defer these writes to an output hook.  */
+    tcg_gen_shri_i64(t, psw_mask, 32);
+    store_reg32_i64(r1, t);
+    if (r2 != 0) {
+        store_reg32_i64(r2, psw_mask);
+    }
+
+    tcg_temp_free_i64(t);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ex(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    TCGv_i32 ilen;
+    TCGv_i64 v1;
+
+    /* Nested EXECUTE is not allowed.  */
+    if (unlikely(s->ex_value)) {
+        gen_program_exception(s, PGM_EXECUTE);
+        return DISAS_NORETURN;
+    }
+
+    update_psw_addr(s);
+    update_cc_op(s);
+
+    if (r1 == 0) {
+        v1 = tcg_const_i64(0);
+    } else {
+        v1 = regs[r1];
+    }
+
+    ilen = tcg_const_i32(s->ilen);
+    gen_helper_ex(cpu_env, ilen, v1, o->in2);
+    tcg_temp_free_i32(ilen);
+
+    if (r1 == 0) {
+        tcg_temp_free_i64(v1);
+    }
+
+    return DISAS_PC_CC_UPDATED;
+}
+
+static DisasJumpType op_fieb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fieb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_fidb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fidb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
+    return_low128(o->out2);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_flogr(DisasContext *s, DisasOps *o)
+{
+    /* We'll use the original input for cc computation, since we get to
+       compare that against 0, which ought to be better than comparing
+       the real output against 64.  It also lets cc_dst be a convenient
+       temporary during our computation.  */
+    gen_op_update1_cc_i64(s, CC_OP_FLOGR, o->in2);
+
+    /* R1 = IN ? CLZ(IN) : 64.  */
+    tcg_gen_clzi_i64(o->out, o->in2, 64);
+
+    /* R1+1 = IN & ~(found bit).  Note that we may attempt to shift this
+       value by 64, which is undefined.  But since the shift is 64 iff the
+       input is zero, we still get the correct result after and'ing.  */
+    tcg_gen_movi_i64(o->out2, 0x8000000000000000ull);
+    tcg_gen_shr_i64(o->out2, o->out2, o->out);
+    tcg_gen_andc_i64(o->out2, cc_dst, o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int pos, len, base = s->insn->data;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    uint64_t ccm;
+
+    switch (m3) {
+    case 0xf:
+        /* Effectively a 32-bit load.  */
+        tcg_gen_qemu_ld32u(tmp, o->in2, get_mem_index(s));
+        len = 32;
+        goto one_insert;
+
+    case 0xc:
+    case 0x6:
+    case 0x3:
+        /* Effectively a 16-bit load.  */
+        tcg_gen_qemu_ld16u(tmp, o->in2, get_mem_index(s));
+        len = 16;
+        goto one_insert;
+
+    case 0x8:
+    case 0x4:
+    case 0x2:
+    case 0x1:
+        /* Effectively an 8-bit load.  */
+        tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
+        len = 8;
+        goto one_insert;
+
+    one_insert:
+        pos = base + ctz32(m3) * 8;
+        tcg_gen_deposit_i64(o->out, o->out, tmp, pos, len);
+        ccm = ((1ull << len) - 1) << pos;
+        break;
+
+    default:
+        /* This is going to be a sequence of loads and inserts.  */
+        pos = base + 32 - 8;
+        ccm = 0;
+        while (m3) {
+            if (m3 & 0x8) {
+                tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
+                tcg_gen_addi_i64(o->in2, o->in2, 1);
+                tcg_gen_deposit_i64(o->out, o->out, tmp, pos, 8);
+                ccm |= 0xff << pos;
+            }
+            m3 = (m3 << 1) & 0xf;
+            pos -= 8;
+        }
+        break;
+    }
+
+    tcg_gen_movi_i64(tmp, ccm);
+    gen_op_update2_cc_i64(s, CC_OP_ICM, tmp, o->out);
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_insi(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    tcg_gen_deposit_i64(o->out, o->in1, o->in2, shift, size);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ipm(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    gen_op_calc_cc(s);
+    t1 = tcg_temp_new_i64();
+    tcg_gen_extract_i64(t1, psw_mask, 40, 4);
+    t2 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t2, cc_op);
+    tcg_gen_deposit_i64(t1, t1, t2, 4, 60);
+    tcg_gen_deposit_i64(o->out, o->out, t1, 24, 8);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_idte(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m4;
+
+    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
+        m4 = tcg_const_i32(get_field(s, m4));
+    } else {
+        m4 = tcg_const_i32(0);
+    }
+    gen_helper_idte(cpu_env, o->in1, o->in2, m4);
+    tcg_temp_free_i32(m4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ipte(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m4;
+
+    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
+        m4 = tcg_const_i32(get_field(s, m4));
+    } else {
+        m4 = tcg_const_i32(0);
+    }
+    gen_helper_ipte(cpu_env, o->in1, o->in2, m4);
+    tcg_temp_free_i32(m4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_iske(DisasContext *s, DisasOps *o)
+{
+    gen_helper_iske(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_msa(DisasContext *s, DisasOps *o)
+{
+    int r1 = have_field(s, r1) ? get_field(s, r1) : 0;
+    int r2 = have_field(s, r2) ? get_field(s, r2) : 0;
+    int r3 = have_field(s, r3) ? get_field(s, r3) : 0;
+    TCGv_i32 t_r1, t_r2, t_r3, type;
+
+    switch (s->insn->data) {
+    case S390_FEAT_TYPE_KMA:
+        if (r3 == r1 || r3 == r2) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_KMCTR:
+        if (r3 & 1 || !r3) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_PPNO:
+    case S390_FEAT_TYPE_KMF:
+    case S390_FEAT_TYPE_KMC:
+    case S390_FEAT_TYPE_KMO:
+    case S390_FEAT_TYPE_KM:
+        if (r1 & 1 || !r1) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_KMAC:
+    case S390_FEAT_TYPE_KIMD:
+    case S390_FEAT_TYPE_KLMD:
+        if (r2 & 1 || !r2) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            return DISAS_NORETURN;
+        }
+        /* FALL THROUGH */
+    case S390_FEAT_TYPE_PCKMO:
+    case S390_FEAT_TYPE_PCC:
+        break;
+    default:
+        g_assert_not_reached();
+    };
+
+    t_r1 = tcg_const_i32(r1);
+    t_r2 = tcg_const_i32(r2);
+    t_r3 = tcg_const_i32(r3);
+    type = tcg_const_i32(s->insn->data);
+    gen_helper_msa(cc_op, cpu_env, t_r1, t_r2, t_r3, type);
+    set_cc_static(s);
+    tcg_temp_free_i32(t_r1);
+    tcg_temp_free_i32(t_r2);
+    tcg_temp_free_i32(t_r3);
+    tcg_temp_free_i32(type);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_keb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_keb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_kdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_kdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_laa(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the addition for setting CC.  */
+    tcg_gen_add_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lan(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_and_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lao(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lax(DisasContext *s, DisasOps *o)
+{
+    /* The real output is indeed the original value in memory;
+       recompute the addition for the computation of CC.  */
+    tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s),
+                                 s->insn->data | MO_ALIGN);
+    /* However, we need to recompute the operation for setting CC.  */
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ldeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ldeb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ledb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ledb(o->out, cpu_env, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
+
+    if (!m34) {
+        return DISAS_NORETURN;
+    }
+    gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
+    tcg_temp_free_i32(m34);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lxdb(o->out, cpu_env, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lxeb(o->out, cpu_env, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lde(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shli_i64(o->out, o->in2, 32);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgt(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld8s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld8s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld8u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld8u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld16s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld16s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld16u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld16u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld32s(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld32s(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld32u(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ld64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    store_reg32_i64(get_field(s, r1), o->in2);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lgat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lfhat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    store_reg32h_i64(get_field(s, r1), o->in2);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgfat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_llgtat(DisasContext *s, DisasOps *o)
+{
+    TCGLabel *lab = gen_new_label();
+    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
+    /* The value is stored even in case of trap. */
+    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
+    gen_trap(s);
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_loc(DisasContext *s, DisasOps *o)
+{
+    DisasCompare c;
+
+    disas_jcc(s, &c, get_field(s, m3));
+
+    if (c.is_64) {
+        tcg_gen_movcond_i64(c.cond, o->out, c.u.s64.a, c.u.s64.b,
+                            o->in2, o->in1);
+        free_compare(&c);
+    } else {
+        TCGv_i32 t32 = tcg_temp_new_i32();
+        TCGv_i64 t, z;
+
+        tcg_gen_setcond_i32(c.cond, t32, c.u.s32.a, c.u.s32.b);
+        free_compare(&c);
+
+        t = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t, t32);
+        tcg_temp_free_i32(t32);
+
+        z = tcg_const_i64(0);
+        tcg_gen_movcond_i64(TCG_COND_NE, o->out, t, z, o->in2, o->in1);
+        tcg_temp_free_i64(t);
+        tcg_temp_free_i64(z);
+    }
+
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_lctl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lctl(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lctlg(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
+{
+    gen_helper_lra(o->out, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpp(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpsw(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    per_breaking_event(s);
+
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEUL | MO_ALIGN_8);
+    tcg_gen_addi_i64(o->in2, o->in2, 4);
+    tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s));
+    /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK.  */
+    tcg_gen_shli_i64(t1, t1, 32);
+    gen_helper_load_psw(cpu_env, t1, t2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NORETURN;
+}
+
+static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 t1, t2;
+
+    per_breaking_event(s);
+
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
+                        MO_TEQ | MO_ALIGN_8);
+    tcg_gen_addi_i64(o->in2, o->in2, 8);
+    tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s));
+    gen_helper_load_psw(cpu_env, t1, t2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    return DISAS_NORETURN;
+}
+#endif
+
+static DisasJumpType op_lam(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_lam(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    t1 = tcg_temp_new_i64();
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32_i64(r1, t1);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
+    store_reg32_i64(r1, t1);
+    store_reg32_i64(r3, t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t2);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t2, 4);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t2);
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32_i64(r1, t1);
+    }
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    t1 = tcg_temp_new_i64();
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32h_i64(r1, t1);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
+    store_reg32h_i64(r1, t1);
+    store_reg32h_i64(r3, t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t2);
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t2, 4);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t2);
+        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
+        store_reg32h_i64(r1, t1);
+    }
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t1, t2;
+
+    /* Only one register to read. */
+    if (unlikely(r1 == r3)) {
+        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
+        return DISAS_NEXT;
+    }
+
+    /* First load the values of the first and last registers to trigger
+       possible page faults. */
+    t1 = tcg_temp_new_i64();
+    t2 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(t2, o->in2, 8 * ((r3 - r1) & 15));
+    tcg_gen_qemu_ld64(regs[r3], t2, get_mem_index(s));
+    tcg_gen_mov_i64(regs[r1], t1);
+    tcg_temp_free(t2);
+
+    /* Only two registers to read. */
+    if (((r1 + 1) & 15) == r3) {
+        tcg_temp_free(t1);
+        return DISAS_NEXT;
+    }
+
+    /* Then load the remaining registers. Page fault can't occur. */
+    r3 = (r3 - 1) & 15;
+    tcg_gen_movi_i64(t1, 8);
+    while (r1 != r3) {
+        r1 = (r1 + 1) & 15;
+        tcg_gen_add_i64(o->in2, o->in2, t1);
+        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
+    }
+    tcg_temp_free(t1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 a1, a2;
+    MemOp mop = s->insn->data;
+
+    /* In a parallel context, stop the world and single step.  */
+    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+        update_psw_addr(s);
+        update_cc_op(s);
+        gen_exception(EXCP_ATOMIC);
+        return DISAS_NORETURN;
+    }
+
+    /* In a serial context, perform the two loads ... */
+    a1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
+    a2 = get_address(s, 0, get_field(s, b2), get_field(s, d2));
+    tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN);
+    tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN);
+    tcg_temp_free_i64(a1);
+    tcg_temp_free_i64(a2);
+
+    /* ... and indicate that we performed them while interlocked.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
+{
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_lpq(o->out, cpu_env, o->in2);
+    } else if (HAVE_ATOMIC128) {
+        gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
+    }
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_lura(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld_tl(o->out, o->in2, MMU_REAL_IDX, s->insn->data);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_lzrb(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_andi_i64(o->out, o->in2, -256);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o)
+{
+    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
+
+    if (get_field(s, m3) > 6) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    tcg_gen_ori_i64(o->addr1, o->addr1, -block_size);
+    tcg_gen_neg_i64(o->addr1, o->addr1);
+    tcg_gen_movi_i64(o->out, 16);
+    tcg_gen_umin_i64(o->out, o->out, o->addr1);
+    gen_op_update1_cc_i64(s, CC_OP_LCBB, o->out);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mc(DisasContext *s, DisasOps *o)
+{
+#if !defined(CONFIG_USER_ONLY)
+    TCGv_i32 i2;
+#endif
+    const uint16_t monitor_class = get_field(s, i2);
+
+    if (monitor_class & 0xff00) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+#if !defined(CONFIG_USER_ONLY)
+    i2 = tcg_const_i32(monitor_class);
+    gen_helper_monitor_call(cpu_env, o->addr1, i2);
+    tcg_temp_free_i32(i2);
+#endif
+    /* Defaults to a NOP. */
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mov2(DisasContext *s, DisasOps *o)
+{
+    o->out = o->in2;
+    o->g_out = o->g_in2;
+    o->in2 = NULL;
+    o->g_in2 = false;
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mov2e(DisasContext *s, DisasOps *o)
+{
+    int b2 = get_field(s, b2);
+    TCGv ar1 = tcg_temp_new_i64();
+
+    o->out = o->in2;
+    o->g_out = o->g_in2;
+    o->in2 = NULL;
+    o->g_in2 = false;
+
+    switch (s->base.tb->flags & FLAG_MASK_ASC) {
+    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 0);
+        break;
+    case PSW_ASC_ACCREG >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 1);
+        break;
+    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
+        if (b2) {
+            tcg_gen_ld32u_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[b2]));
+        } else {
+            tcg_gen_movi_i64(ar1, 0);
+        }
+        break;
+    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
+        tcg_gen_movi_i64(ar1, 2);
+        break;
+    }
+
+    tcg_gen_st32_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[1]));
+    tcg_temp_free_i64(ar1);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_movx(DisasContext *s, DisasOps *o)
+{
+    o->out = o->in1;
+    o->out2 = o->in2;
+    o->g_out = o->g_in1;
+    o->g_out2 = o->g_in2;
+    o->in1 = NULL;
+    o->in2 = NULL;
+    o->g_in1 = o->g_in2 = false;
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvc(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcin(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvcin(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcl(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r2 = get_field(s, r2);
+    TCGv_i32 t1, t2;
+
+    /* r1 and r2 must be even.  */
+    if (r1 & 1 || r2 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t2 = tcg_const_i32(r2);
+    gen_helper_mvcl(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcle(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_mvcle(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvclu(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i32 t1, t3;
+
+    /* r1 and r3 must be even.  */
+    if (r1 & 1 || r3 & 1) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    t1 = tcg_const_i32(r1);
+    t3 = tcg_const_i32(r3);
+    gen_helper_mvclu(cc_op, cpu_env, t1, o->in2, t3);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcos(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    gen_helper_mvcos(cc_op, cpu_env, o->addr1, o->in2, regs[r3]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, l1);
+    gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, l1);
+    gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_mvn(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvn(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvo(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvo(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvpg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_mvpg(cc_op, cpu_env, regs[0], t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvst(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_mvst(cc_op, cpu_env, t1, t2);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mvz(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_mvz(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mul(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_mul_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mul128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_mulu2_i64(o->out2, o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_muls128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_muls2_i64(o->out2, o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_meeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_meeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mdeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mdeb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mdb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_maeb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
+    gen_helper_maeb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_madb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg(get_field(s, r3));
+    gen_helper_madb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mseb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
+    gen_helper_mseb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_msdb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 r3 = load_freg(get_field(s, r3));
+    gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, r3);
+    tcg_temp_free_i64(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabs(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 z, n;
+    z = tcg_const_i64(0);
+    n = tcg_temp_new_i64();
+    tcg_gen_neg_i64(n, o->in2);
+    tcg_gen_movcond_i64(TCG_COND_GE, o->out, o->in2, z, n, o->in2);
+    tcg_temp_free_i64(n);
+    tcg_temp_free_i64(z);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in2, 0x80000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in2, 0x8000000000000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nabsf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ori_i64(o->out, o->in1, 0x8000000000000000ull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_nc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_nc(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_neg(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_neg_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in2, 0x80000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in2, 0x8000000000000000ull);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_negf128(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xori_i64(o->out, o->in1, 0x8000000000000000ull);
+    tcg_gen_mov_i64(o->out2, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_oc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_oc(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_or(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ori(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_oi(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_or_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                    s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_or_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pack(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_pack(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pka(DisasContext *s, DisasOps *o)
+{
+    int l2 = get_field(s, l2) + 1;
+    TCGv_i32 l;
+
+    /* The length must not exceed 32 bytes.  */
+    if (l2 > 32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l2);
+    gen_helper_pka(cpu_env, o->addr1, o->in2, l);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pku(DisasContext *s, DisasOps *o)
+{
+    int l2 = get_field(s, l2) + 1;
+    TCGv_i32 l;
+
+    /* The length must be even and should not exceed 64 bytes.  */
+    if ((l2 & 1) || (l2 > 64)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l2);
+    gen_helper_pku(cpu_env, o->addr1, o->in2, l);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_popcnt(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ptlb(cpu_env);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_risbg(DisasContext *s, DisasOps *o)
+{
+    int i3 = get_field(s, i3);
+    int i4 = get_field(s, i4);
+    int i5 = get_field(s, i5);
+    int do_zero = i4 & 0x80;
+    uint64_t mask, imask, pmask;
+    int pos, len, rot;
+
+    /* Adjust the arguments for the specific insn.  */
+    switch (s->fields.op2) {
+    case 0x55: /* risbg */
+    case 0x59: /* risbgn */
+        i3 &= 63;
+        i4 &= 63;
+        pmask = ~0;
+        break;
+    case 0x5d: /* risbhg */
+        i3 &= 31;
+        i4 &= 31;
+        pmask = 0xffffffff00000000ull;
+        break;
+    case 0x51: /* risblg */
+        i3 = (i3 & 31) + 32;
+        i4 = (i4 & 31) + 32;
+        pmask = 0x00000000ffffffffull;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /* MASK is the set of bits to be inserted from R2. */
+    if (i3 <= i4) {
+        /* [0...i3---i4...63] */
+        mask = (-1ull >> i3) & (-1ull << (63 - i4));
+    } else {
+        /* [0---i4...i3---63] */
+        mask = (-1ull >> i3) | (-1ull << (63 - i4));
+    }
+    /* For RISBLG/RISBHG, the wrapping is limited to the high/low doubleword. */
+    mask &= pmask;
+
+    /* IMASK is the set of bits to be kept from R1.  In the case of the high/low
+       insns, we need to keep the other half of the register.  */
+    imask = ~mask | ~pmask;
+    if (do_zero) {
+        imask = ~pmask;
+    }
+
+    len = i4 - i3 + 1;
+    pos = 63 - i4;
+    rot = i5 & 63;
+
+    /* In some cases we can implement this with extract.  */
+    if (imask == 0 && pos == 0 && len > 0 && len <= rot) {
+        tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len);
+        return DISAS_NEXT;
+    }
+
+    /* In some cases we can implement this with deposit.  */
+    if (len > 0 && (imask == 0 || ~mask == imask)) {
+        /* Note that we rotate the bits to be inserted to the lsb, not to
+           the position as described in the PoO.  */
+        rot = (rot - pos) & 63;
+    } else {
+        pos = -1;
+    }
+
+    /* Rotate the input as necessary.  */
+    tcg_gen_rotli_i64(o->in2, o->in2, rot);
+
+    /* Insert the selected bits into the output.  */
+    if (pos >= 0) {
+        if (imask == 0) {
+            tcg_gen_deposit_z_i64(o->out, o->in2, pos, len);
+        } else {
+            tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len);
+        }
+    } else if (imask == 0) {
+        tcg_gen_andi_i64(o->out, o->in2, mask);
+    } else {
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_andi_i64(o->out, o->out, imask);
+        tcg_gen_or_i64(o->out, o->out, o->in2);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
+{
+    int i3 = get_field(s, i3);
+    int i4 = get_field(s, i4);
+    int i5 = get_field(s, i5);
+    uint64_t mask;
+
+    /* If this is a test-only form, arrange to discard the result.  */
+    if (i3 & 0x80) {
+        o->out = tcg_temp_new_i64();
+        o->g_out = false;
+    }
+
+    i3 &= 63;
+    i4 &= 63;
+    i5 &= 63;
+
+    /* MASK is the set of bits to be operated on from R2.
+       Take care for I3/I4 wraparound.  */
+    mask = ~0ull >> i3;
+    if (i3 <= i4) {
+        mask ^= ~0ull >> i4 >> 1;
+    } else {
+        mask |= ~(~0ull >> i4 >> 1);
+    }
+
+    /* Rotate the input as necessary.  */
+    tcg_gen_rotli_i64(o->in2, o->in2, i5);
+
+    /* Operate.  */
+    switch (s->fields.op2) {
+    case 0x54: /* AND */
+        tcg_gen_ori_i64(o->in2, o->in2, ~mask);
+        tcg_gen_and_i64(o->out, o->out, o->in2);
+        break;
+    case 0x56: /* OR */
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_or_i64(o->out, o->out, o->in2);
+        break;
+    case 0x57: /* XOR */
+        tcg_gen_andi_i64(o->in2, o->in2, mask);
+        tcg_gen_xor_i64(o->out, o->out, o->in2);
+        break;
+    default:
+        abort();
+    }
+
+    /* Set the CC.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap16_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap32_i64(o->out, o->in2, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rev64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_bswap64_i64(o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rll32(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 to = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t1, o->in1);
+    tcg_gen_extrl_i64_i32(t2, o->in2);
+    tcg_gen_rotl_i32(to, t1, t2);
+    tcg_gen_extu_i32_i64(o->out, to);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(to);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rll64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_rotl_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rrbe(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sacf(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sacf(cpu_env, o->in2);
+    /* Addressing mode has changed, so end the block.  */
+    return DISAS_PC_STALE;
+}
+#endif
+
+static DisasJumpType op_sam(DisasContext *s, DisasOps *o)
+{
+    int sam = s->insn->data;
+    TCGv_i64 tsam;
+    uint64_t mask;
+
+    switch (sam) {
+    case 0:
+        mask = 0xffffff;
+        break;
+    case 1:
+        mask = 0x7fffffff;
+        break;
+    default:
+        mask = -1;
+        break;
+    }
+
+    /* Bizarre but true, we check the address of the current insn for the
+       specification exception, not the next to be executed.  Thus the PoO
+       documents that Bad Things Happen two bytes before the end.  */
+    if (s->base.pc_next & ~mask) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    s->pc_tmp &= mask;
+
+    tsam = tcg_const_i64(sam);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2);
+    tcg_temp_free_i64(tsam);
+
+    /* Always exit the TB, since we (may have) changed execution mode.  */
+    return DISAS_PC_STALE;
+}
+
+static DisasJumpType op_sar(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_st32_i64(o->in2, cpu_env, offsetof(CPUS390XState, aregs[r1]));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_seb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_seb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sdb(o->out, cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqeb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqeb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqdb(o->out, cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2);
+    return_low128(o->out2);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_servc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_servc(cc_op, cpu_env, o->in2, o->in1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sigp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3);
+    set_cc_static(s);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_soc(DisasContext *s, DisasOps *o)
+{
+    DisasCompare c;
+    TCGv_i64 a, h;
+    TCGLabel *lab;
+    int r1;
+
+    disas_jcc(s, &c, get_field(s, m3));
+
+    /* We want to store when the condition is fulfilled, so branch
+       out when it's not */
+    c.cond = tcg_invert_cond(c.cond);
+
+    lab = gen_new_label();
+    if (c.is_64) {
+        tcg_gen_brcond_i64(c.cond, c.u.s64.a, c.u.s64.b, lab);
+    } else {
+        tcg_gen_brcond_i32(c.cond, c.u.s32.a, c.u.s32.b, lab);
+    }
+    free_compare(&c);
+
+    r1 = get_field(s, r1);
+    a = get_address(s, 0, get_field(s, b2), get_field(s, d2));
+    switch (s->insn->data) {
+    case 1: /* STOCG */
+        tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s));
+        break;
+    case 0: /* STOC */
+        tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s));
+        break;
+    case 2: /* STOCFH */
+        h = tcg_temp_new_i64();
+        tcg_gen_shri_i64(h, regs[r1], 32);
+        tcg_gen_qemu_st32(h, a, get_mem_index(s));
+        tcg_temp_free_i64(h);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    tcg_temp_free_i64(a);
+
+    gen_set_label(lab);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sla(DisasContext *s, DisasOps *o)
+{
+    uint64_t sign = 1ull << s->insn->data;
+    enum cc_op cco = s->insn->data == 31 ? CC_OP_SLA_32 : CC_OP_SLA_64;
+    gen_op_update2_cc_i64(s, cco, o->in1, o->in2);
+    tcg_gen_shl_i64(o->out, o->in1, o->in2);
+    /* The arithmetic left shift is curious in that it does not affect
+       the sign bit.  Copy that over from the source unchanged.  */
+    tcg_gen_andi_i64(o->out, o->out, ~sign);
+    tcg_gen_andi_i64(o->in1, o->in1, sign);
+    tcg_gen_or_i64(o->out, o->out, o->in1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sll(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shl_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sra(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sar_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srl(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shr_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sfpc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sfpc(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sfas(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sfas(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnm(DisasContext *s, DisasOps *o)
+{
+    /* Bits other than 62 and 63 are ignored. Bit 29 is set to zero. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x3ull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnmb(DisasContext *s, DisasOps *o)
+{
+    /* Bits 0-55 are are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0xffull);
+    gen_helper_srnm(cpu_env, o->addr1);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srnmt(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* Bits other than 61-63 are ignored. */
+    tcg_gen_andi_i64(o->addr1, o->addr1, 0x7ull);
+
+    /* No need to call a helper, we don't implement dfp */
+    tcg_gen_ld32u_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+    tcg_gen_deposit_i64(tmp, tmp, o->addr1, 4, 3);
+    tcg_gen_st32_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
+
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spm(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_extrl_i64_i32(cc_op, o->in1);
+    tcg_gen_extract_i32(cc_op, cc_op, 28, 2);
+    set_cc_static(s);
+
+    tcg_gen_shri_i64(o->in1, o->in1, 24);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in1, PSW_SHIFT_MASK_PM, 4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ectg(DisasContext *s, DisasOps *o)
+{
+    int b1 = get_field(s, b1);
+    int d1 = get_field(s, d1);
+    int b2 = get_field(s, b2);
+    int d2 = get_field(s, d2);
+    int r3 = get_field(s, r3);
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* fetch all operands first */
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_addi_i64(o->in1, regs[b1], d1);
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_addi_i64(o->in2, regs[b2], d2);
+    o->addr1 = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, o->addr1, regs[r3], 0);
+
+    /* load the third operand into r3 before modifying anything */
+    tcg_gen_qemu_ld64(regs[r3], o->addr1, get_mem_index(s));
+
+    /* subtract CPU timer from first operand and store in GR0 */
+    gen_helper_stpt(tmp, cpu_env);
+    tcg_gen_sub_i64(regs[0], o->in1, tmp);
+
+    /* store second operand in GR1 */
+    tcg_gen_mov_i64(regs[1], o->in2);
+
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_spka(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shri_i64(o->in2, o->in2, 4);
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sske(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sske(cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ssm(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_stap(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id));
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_stck(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stck(o->out, cpu_env);
+    /* ??? We don't implement clock states.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcke(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 c1 = tcg_temp_new_i64();
+    TCGv_i64 c2 = tcg_temp_new_i64();
+    TCGv_i64 todpr = tcg_temp_new_i64();
+    gen_helper_stck(c1, cpu_env);
+    /* 16 bit value store in an uint32_t (only valid bits set) */
+    tcg_gen_ld32u_i64(todpr, cpu_env, offsetof(CPUS390XState, todpr));
+    /* Shift the 64-bit value into its place as a zero-extended
+       104-bit value.  Note that "bit positions 64-103 are always
+       non-zero so that they compare differently to STCK"; we set
+       the least significant bit to 1.  */
+    tcg_gen_shli_i64(c2, c1, 56);
+    tcg_gen_shri_i64(c1, c1, 8);
+    tcg_gen_ori_i64(c2, c2, 0x10000);
+    tcg_gen_or_i64(c2, c2, todpr);
+    tcg_gen_qemu_st64(c1, o->in2, get_mem_index(s));
+    tcg_gen_addi_i64(o->in2, o->in2, 8);
+    tcg_gen_qemu_st64(c2, o->in2, get_mem_index(s));
+    tcg_temp_free_i64(c1);
+    tcg_temp_free_i64(c2);
+    tcg_temp_free_i64(todpr);
+    /* ??? We don't implement clock states.  */
+    gen_op_movi_cc(s, 0);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_sck(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
+    gen_helper_sck(cc_op, cpu_env, o->in1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sckc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sckc(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sckpf(cpu_env, regs[0]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stckc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stckc(o->out, cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stctg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stctg(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stctl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stctl(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stidp(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_spt(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stfl(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stfl(cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpt(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stpt(o->out, cpu_env);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stsi(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_spx(DisasContext *s, DisasOps *o)
+{
+    gen_helper_spx(cpu_env, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_xsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_csch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_csch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_hsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_hsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_msch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_msch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rchp(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rchp(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_rsch(cpu_env, regs[1]);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sal(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sal(cpu_env, regs[1]);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_schm(DisasContext *s, DisasOps *o)
+{
+    gen_helper_schm(cpu_env, regs[1], regs[2], o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_siga(DisasContext *s, DisasOps *o)
+{
+    /* From KVM code: Not provided, set CC = 3 for subchannel not operational */
+    gen_op_movi_cc(s, 3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcps(DisasContext *s, DisasOps *o)
+{
+    /* The instruction is suppressed if not provided. */
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ssch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_ssch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stsch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stcrw(cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tpi(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tpi(cc_op, cpu_env, o->addr1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tsch(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tsch(cpu_env, regs[1], o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_chsc(DisasContext *s, DisasOps *o)
+{
+    gen_helper_chsc(cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpx(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa));
+    tcg_gen_andi_i64(o->out, o->out, 0x7fffe000);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = get_field(s, i2);
+    TCGv_i64 t;
+
+    /* It is important to do what the instruction name says: STORE THEN.
+       If we let the output hook perform the store then if we fault and
+       restart, we'll have the wrong SYSTEM MASK in place.  */
+    t = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t, psw_mask, 56);
+    tcg_gen_qemu_st8(t, o->addr1, get_mem_index(s));
+    tcg_temp_free_i64(t);
+
+    if (s->fields.op == 0xac) {
+        tcg_gen_andi_i64(psw_mask, psw_mask,
+                         (i2 << 56) | 0x00ffffffffffffffull);
+    } else {
+        tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56);
+    }
+
+    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
+    return DISAS_PC_STALE_NOCHAIN;
+}
+
+static DisasJumpType op_stura(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->in1, o->in2, MMU_REAL_IDX, s->insn->data);
+
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        update_psw_addr(s);
+        gen_helper_per_store_real(cpu_env);
+    }
+    return DISAS_NEXT;
+}
+#endif
+
+static DisasJumpType op_stfle(DisasContext *s, DisasOps *o)
+{
+    gen_helper_stfle(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st8(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st16(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_st64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st64(o->in1, o->in2, get_mem_index(s));
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stam(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    gen_helper_stam(cpu_env, r1, o->in2, r3);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
+{
+    int m3 = get_field(s, m3);
+    int pos, base = s->insn->data;
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    pos = base + ctz32(m3) * 8;
+    switch (m3) {
+    case 0xf:
+        /* Effectively a 32-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st32(tmp, o->in2, get_mem_index(s));
+        break;
+
+    case 0xc:
+    case 0x6:
+    case 0x3:
+        /* Effectively a 16-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st16(tmp, o->in2, get_mem_index(s));
+        break;
+
+    case 0x8:
+    case 0x4:
+    case 0x2:
+    case 0x1:
+        /* Effectively an 8-bit store.  */
+        tcg_gen_shri_i64(tmp, o->in1, pos);
+        tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
+        break;
+
+    default:
+        /* This is going to be a sequence of shifts and stores.  */
+        pos = base + 32 - 8;
+        while (m3) {
+            if (m3 & 0x8) {
+                tcg_gen_shri_i64(tmp, o->in1, pos);
+                tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
+                tcg_gen_addi_i64(o->in2, o->in2, 1);
+            }
+            m3 = (m3 << 1) & 0xf;
+            pos -= 8;
+        }
+        break;
+    }
+    tcg_temp_free_i64(tmp);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stm(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    int size = s->insn->data;
+    TCGv_i64 tsize = tcg_const_i64(size);
+
+    while (1) {
+        if (size == 8) {
+            tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s));
+        } else {
+            tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s));
+        }
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_add_i64(o->in2, o->in2, tsize);
+        r1 = (r1 + 1) & 15;
+    }
+
+    tcg_temp_free_i64(tsize);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    int r3 = get_field(s, r3);
+    TCGv_i64 t = tcg_temp_new_i64();
+    TCGv_i64 t4 = tcg_const_i64(4);
+    TCGv_i64 t32 = tcg_const_i64(32);
+
+    while (1) {
+        tcg_gen_shl_i64(t, regs[r1], t32);
+        tcg_gen_qemu_st32(t, o->in2, get_mem_index(s));
+        if (r1 == r3) {
+            break;
+        }
+        tcg_gen_add_i64(o->in2, o->in2, t4);
+        r1 = (r1 + 1) & 15;
+    }
+
+    tcg_temp_free_i64(t);
+    tcg_temp_free_i64(t4);
+    tcg_temp_free_i64(t32);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
+{
+    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+    } else if (HAVE_ATOMIC128) {
+        gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
+    } else {
+        gen_helper_exit_atomic(cpu_env);
+        return DISAS_NORETURN;
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srst(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_srst(cpu_env, r1, r2);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_srstu(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_srstu(cpu_env, r1, r2);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sub_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_movi_i64(cc_src, 0);
+    tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+    return DISAS_NEXT;
+}
+
+/* Compute borrow (0, -1) into cc_src. */
+static void compute_borrow(DisasContext *s)
+{
+    switch (s->cc_op) {
+    case CC_OP_SUBU:
+        /* The borrow value is already in cc_src (0,-1). */
+        break;
+    default:
+        gen_op_calc_cc(s);
+        /* fall through */
+    case CC_OP_STATIC:
+        /* The carry flag is the msb of CC; compute into cc_src. */
+        tcg_gen_extu_i32_i64(cc_src, cc_op);
+        tcg_gen_shri_i64(cc_src, cc_src, 1);
+        /* fall through */
+    case CC_OP_ADDU:
+        /* Convert carry (1,0) to borrow (0,-1). */
+        tcg_gen_subi_i64(cc_src, cc_src, 1);
+        break;
+    }
+}
+
+static DisasJumpType op_subb32(DisasContext *s, DisasOps *o)
+{
+    compute_borrow(s);
+
+    /* Borrow is {0, -1}, so add to subtract. */
+    tcg_gen_add_i64(o->out, o->in1, cc_src);
+    tcg_gen_sub_i64(o->out, o->out, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_subb64(DisasContext *s, DisasOps *o)
+{
+    compute_borrow(s);
+
+    /*
+     * Borrow is {0, -1}, so add to subtract; replicate the
+     * borrow input to produce 128-bit -1 for the addition.
+     */
+    TCGv_i64 zero = tcg_const_i64(0);
+    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src);
+    tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+    tcg_temp_free_i64(zero);
+
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_svc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t;
+
+    update_psw_addr(s);
+    update_cc_op(s);
+
+    t = tcg_const_i32(get_field(s, i1) & 0xff);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code));
+    tcg_temp_free_i32(t);
+
+    t = tcg_const_i32(s->ilen);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen));
+    tcg_temp_free_i32(t);
+
+    gen_exception(EXCP_SVC);
+    return DISAS_NORETURN;
+}
+
+static DisasJumpType op_tam(DisasContext *s, DisasOps *o)
+{
+    int cc = 0;
+
+    cc |= (s->base.tb->flags & FLAG_MASK_64) ? 2 : 0;
+    cc |= (s->base.tb->flags & FLAG_MASK_32) ? 1 : 0;
+    gen_op_movi_cc(s, cc);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tceb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tceb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tcdb(cc_op, cpu_env, o->in1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+
+static DisasJumpType op_testblock(DisasContext *s, DisasOps *o)
+{
+    gen_helper_testblock(cc_op, cpu_env, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tprot(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tprot(cc_op, cpu_env, o->addr1, o->in2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+#endif
+
+static DisasJumpType op_tp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l1 = tcg_const_i32(get_field(s, l1) + 1);
+    gen_helper_tp(cc_op, cpu_env, o->addr1, l1);
+    tcg_temp_free_i32(l1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tr(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_tr(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_tre(DisasContext *s, DisasOps *o)
+{
+    gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2);
+    return_low128(o->out2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trt(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_trt(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trtr(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_trtr(cc_op, cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_trXX(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+    TCGv_i32 sizes = tcg_const_i32(s->insn->opc & 3);
+    TCGv_i32 tst = tcg_temp_new_i32();
+    int m3 = get_field(s, m3);
+
+    if (!s390_has_feat(S390_FEAT_ETF2_ENH)) {
+        m3 = 0;
+    }
+    if (m3 & 1) {
+        tcg_gen_movi_i32(tst, -1);
+    } else {
+        tcg_gen_extrl_i64_i32(tst, regs[0]);
+        if (s->insn->opc & 3) {
+            tcg_gen_ext8u_i32(tst, tst);
+        } else {
+            tcg_gen_ext16u_i32(tst, tst);
+        }
+    }
+    gen_helper_trXX(cc_op, cpu_env, r1, r2, tst, sizes);
+
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    tcg_temp_free_i32(sizes);
+    tcg_temp_free_i32(tst);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_ts(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 t1 = tcg_const_i32(0xff);
+    tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);
+    tcg_gen_extract_i32(cc_op, t1, 7, 1);
+    tcg_temp_free_i32(t1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpk(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
+    gen_helper_unpk(cpu_env, l, o->addr1, o->in2);
+    tcg_temp_free_i32(l);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpka(DisasContext *s, DisasOps *o)
+{
+    int l1 = get_field(s, l1) + 1;
+    TCGv_i32 l;
+
+    /* The length must not exceed 32 bytes.  */
+    if (l1 > 32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l1);
+    gen_helper_unpka(cc_op, cpu_env, o->addr1, l, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_unpku(DisasContext *s, DisasOps *o)
+{
+    int l1 = get_field(s, l1) + 1;
+    TCGv_i32 l;
+
+    /* The length must be even and should not exceed 64 bytes.  */
+    if ((l1 & 1) || (l1 > 64)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    l = tcg_const_i32(l1);
+    gen_helper_unpku(cc_op, cpu_env, o->addr1, l, o->in2);
+    tcg_temp_free_i32(l);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+
+static DisasJumpType op_xc(DisasContext *s, DisasOps *o)
+{
+    int d1 = get_field(s, d1);
+    int d2 = get_field(s, d2);
+    int b1 = get_field(s, b1);
+    int b2 = get_field(s, b2);
+    int l = get_field(s, l1);
+    TCGv_i32 t32;
+
+    o->addr1 = get_address(s, 0, b1, d1);
+
+    /* If the addresses are identical, this is a store/memset of zero.  */
+    if (b1 == b2 && d1 == d2 && (l + 1) <= 32) {
+        o->in2 = tcg_const_i64(0);
+
+        l++;
+        while (l >= 8) {
+            tcg_gen_qemu_st64(o->in2, o->addr1, get_mem_index(s));
+            l -= 8;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 8);
+            }
+        }
+        if (l >= 4) {
+            tcg_gen_qemu_st32(o->in2, o->addr1, get_mem_index(s));
+            l -= 4;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 4);
+            }
+        }
+        if (l >= 2) {
+            tcg_gen_qemu_st16(o->in2, o->addr1, get_mem_index(s));
+            l -= 2;
+            if (l > 0) {
+                tcg_gen_addi_i64(o->addr1, o->addr1, 2);
+            }
+        }
+        if (l) {
+            tcg_gen_qemu_st8(o->in2, o->addr1, get_mem_index(s));
+        }
+        gen_op_movi_cc(s, 0);
+        return DISAS_NEXT;
+    }
+
+    /* But in general we'll defer to a helper.  */
+    o->in2 = get_address(s, 0, b2, d2);
+    t32 = tcg_const_i32(l);
+    gen_helper_xc(cc_op, cpu_env, t32, o->addr1, o->in2);
+    tcg_temp_free_i32(t32);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xor(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xori(DisasContext *s, DisasOps *o)
+{
+    int shift = s->insn->data & 0xff;
+    int size = s->insn->data >> 8;
+    uint64_t mask = ((1ull << size) - 1) << shift;
+
+    assert(!o->g_in2);
+    tcg_gen_shli_i64(o->in2, o->in2, shift);
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+
+    /* Produce the CC from only the bits manipulated.  */
+    tcg_gen_andi_i64(cc_dst, o->out, mask);
+    set_cc_nz_u64(s, cc_dst);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_xi(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+    } else {
+        /* Perform the atomic operation in memory. */
+        tcg_gen_atomic_fetch_xor_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+                                     s->insn->data);
+    }
+
+    /* Recompute also for atomic case: needed for setting CC. */
+    tcg_gen_xor_i64(o->out, o->in1, o->in2);
+
+    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
+        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+    }
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_zero(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_const_i64(0);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_zero2(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_const_i64(0);
+    o->out2 = o->out;
+    o->g_out2 = true;
+    return DISAS_NEXT;
+}
+
+#ifndef CONFIG_USER_ONLY
+static DisasJumpType op_clp(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_clp(cpu_env, r2);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcilg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_pcilg(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcistg(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_pcistg(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_stpcifc(cpu_env, r1, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_sic(DisasContext *s, DisasOps *o)
+{
+    gen_helper_sic(cpu_env, o->in1, o->in2);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_rpcit(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
+
+    gen_helper_rpcit(cpu_env, r1, r2);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r2);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_pcistb(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    tcg_temp_free_i32(r3);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
+    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
+
+    gen_helper_mpcifc(cpu_env, r1, o->addr1, ar);
+    tcg_temp_free_i32(ar);
+    tcg_temp_free_i32(r1);
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+#endif
+
+#include "translate_vx.c.inc"
+
+/* ====================================================================== */
+/* The "Cc OUTput" generators.  Given the generated output (and in some cases
+   the original inputs), update the various cc data structures in order to
+   be able to compute the new condition code.  */
+
+static void cout_abs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_ABS_32, o->out);
+}
+
+static void cout_abs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_ABS_64, o->out);
+}
+
+static void cout_adds32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_ADD_32, o->in1, o->in2, o->out);
+}
+
+static void cout_adds64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_ADD_64, o->in1, o->in2, o->out);
+}
+
+static void cout_addu32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_shri_i64(cc_src, o->out, 32);
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst);
+}
+
+static void cout_addu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
+}
+
+static void cout_cmps32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2);
+}
+
+static void cout_cmps64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTGT_64, o->in1, o->in2);
+}
+
+static void cout_cmpu32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_32, o->in1, o->in2);
+}
+
+static void cout_cmpu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, o->in1, o->in2);
+}
+
+static void cout_f32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ_F32, o->out);
+}
+
+static void cout_f64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ_F64, o->out);
+}
+
+static void cout_f128(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_NZ_F128, o->out, o->out2);
+}
+
+static void cout_nabs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NABS_32, o->out);
+}
+
+static void cout_nabs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NABS_64, o->out);
+}
+
+static void cout_neg32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_COMP_32, o->out);
+}
+
+static void cout_neg64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_COMP_64, o->out);
+}
+
+static void cout_nz32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update1_cc_i64(s, CC_OP_NZ, cc_dst);
+}
+
+static void cout_nz64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_NZ, o->out);
+}
+
+static void cout_s32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_LTGT0_32, o->out);
+}
+
+static void cout_s64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_LTGT0_64, o->out);
+}
+
+static void cout_subs32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_SUB_32, o->in1, o->in2, o->out);
+}
+
+static void cout_subs64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update3_cc_i64(s, CC_OP_SUB_64, o->in1, o->in2, o->out);
+}
+
+static void cout_subu32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_sari_i64(cc_src, o->out, 32);
+    tcg_gen_ext32u_i64(cc_dst, o->out);
+    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
+}
+
+static void cout_subu64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
+}
+
+static void cout_tm32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2);
+}
+
+static void cout_tm64(DisasContext *s, DisasOps *o)
+{
+    gen_op_update2_cc_i64(s, CC_OP_TM_64, o->in1, o->in2);
+}
+
+static void cout_muls32(DisasContext *s, DisasOps *o)
+{
+    gen_op_update1_cc_i64(s, CC_OP_MULS_32, o->out);
+}
+
+static void cout_muls64(DisasContext *s, DisasOps *o)
+{
+    /* out contains "high" part, out2 contains "low" part of 128 bit result */
+    gen_op_update2_cc_i64(s, CC_OP_MULS_64, o->out, o->out2);
+}
+
+/* ====================================================================== */
+/* The "PREParation" generators.  These initialize the DisasOps.OUT fields
+   with the TCG register to which we will write.  Used in combination with
+   the "wout" generators, in some cases we need a new temporary, and in
+   some cases we can write to a TCG global.  */
+
+static void prep_new(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_temp_new_i64();
+}
+#define SPEC_prep_new 0
+
+static void prep_new_P(DisasContext *s, DisasOps *o)
+{
+    o->out = tcg_temp_new_i64();
+    o->out2 = tcg_temp_new_i64();
+}
+#define SPEC_prep_new_P 0
+
+static void prep_r1(DisasContext *s, DisasOps *o)
+{
+    o->out = regs[get_field(s, r1)];
+    o->g_out = true;
+}
+#define SPEC_prep_r1 0
+
+static void prep_r1_P(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->out = regs[r1];
+    o->out2 = regs[r1 + 1];
+    o->g_out = o->g_out2 = true;
+}
+#define SPEC_prep_r1_P SPEC_r1_even
+
+/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */
+static void prep_x1(DisasContext *s, DisasOps *o)
+{
+    o->out = load_freg(get_field(s, r1));
+    o->out2 = load_freg(get_field(s, r1) + 2);
+}
+#define SPEC_prep_x1 SPEC_r1_f128
+
+/* ====================================================================== */
+/* The "Write OUTput" generators.  These generally perform some non-trivial
+   copy of data to TCG globals, or to main memory.  The trivial cases are
+   generally handled by having a "prep" generator install the TCG global
+   as the destination of the operation.  */
+
+static void wout_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1 0
+
+static void wout_out2_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->out2);
+}
+#define SPEC_wout_out2_r1 0
+
+static void wout_r1_8(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 8);
+}
+#define SPEC_wout_r1_8 0
+
+static void wout_r1_16(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 16);
+}
+#define SPEC_wout_r1_16 0
+
+static void wout_r1_32(DisasContext *s, DisasOps *o)
+{
+    store_reg32_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1_32 0
+
+static void wout_r1_32h(DisasContext *s, DisasOps *o)
+{
+    store_reg32h_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_r1_32h 0
+
+static void wout_r1_P32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    store_reg32_i64(r1, o->out);
+    store_reg32_i64(r1 + 1, o->out2);
+}
+#define SPEC_wout_r1_P32 SPEC_r1_even
+
+static void wout_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    store_reg32_i64(r1 + 1, o->out);
+    tcg_gen_shri_i64(o->out, o->out, 32);
+    store_reg32_i64(r1, o->out);
+}
+#define SPEC_wout_r1_D32 SPEC_r1_even
+
+static void wout_r3_P32(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    store_reg32_i64(r3, o->out);
+    store_reg32_i64(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P32 SPEC_r3_even
+
+static void wout_r3_P64(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    store_reg(r3, o->out);
+    store_reg(r3 + 1, o->out2);
+}
+#define SPEC_wout_r3_P64 SPEC_r3_even
+
+static void wout_e1(DisasContext *s, DisasOps *o)
+{
+    store_freg32_i64(get_field(s, r1), o->out);
+}
+#define SPEC_wout_e1 0
+
+static void wout_f1(DisasContext *s, DisasOps *o)
+{
+    store_freg(get_field(s, r1), o->out);
+}
+#define SPEC_wout_f1 0
+
+static void wout_x1(DisasContext *s, DisasOps *o)
+{
+    int f1 = get_field(s, r1);
+    store_freg(f1, o->out);
+    store_freg(f1 + 2, o->out2);
+}
+#define SPEC_wout_x1 SPEC_r1_f128
+
+static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, r1) != get_field(s, r2)) {
+        store_reg32_i64(get_field(s, r1), o->out);
+    }
+}
+#define SPEC_wout_cond_r1r2_32 0
+
+static void wout_cond_e1e2(DisasContext *s, DisasOps *o)
+{
+    if (get_field(s, r1) != get_field(s, r2)) {
+        store_freg32_i64(get_field(s, r1), o->out);
+    }
+}
+#define SPEC_wout_cond_e1e2 0
+
+static void wout_m1_8(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st8(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_8 0
+
+static void wout_m1_16(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st16(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_16 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_16a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUW | MO_ALIGN);
+}
+#define SPEC_wout_m1_16a 0
+#endif
+
+static void wout_m1_32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_32 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_32a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUL | MO_ALIGN);
+}
+#define SPEC_wout_m1_32a 0
+#endif
+
+static void wout_m1_64(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s));
+}
+#define SPEC_wout_m1_64 0
+
+#ifndef CONFIG_USER_ONLY
+static void wout_m1_64a(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
+}
+#define SPEC_wout_m1_64a 0
+#endif
+
+static void wout_m2_32(DisasContext *s, DisasOps *o)
+{
+    tcg_gen_qemu_st32(o->out, o->in2, get_mem_index(s));
+}
+#define SPEC_wout_m2_32 0
+
+static void wout_in2_r1(DisasContext *s, DisasOps *o)
+{
+    store_reg(get_field(s, r1), o->in2);
+}
+#define SPEC_wout_in2_r1 0
+
+static void wout_in2_r1_32(DisasContext *s, DisasOps *o)
+{
+    store_reg32_i64(get_field(s, r1), o->in2);
+}
+#define SPEC_wout_in2_r1_32 0
+
+/* ====================================================================== */
+/* The "INput 1" generators.  These load the first operand to an insn.  */
+
+static void in1_r1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r1));
+}
+#define SPEC_in1_r1 0
+
+static void in1_r1_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r1)];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r1_o 0
+
+static void in1_r1_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1)]);
+}
+#define SPEC_in1_r1_32s 0
+
+static void in1_r1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1)]);
+}
+#define SPEC_in1_r1_32u 0
+
+static void in1_r1_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in1, regs[get_field(s, r1)], 32);
+}
+#define SPEC_in1_r1_sr32 0
+
+static void in1_r1p1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r1) + 1);
+}
+#define SPEC_in1_r1p1 SPEC_r1_even
+
+static void in1_r1p1_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r1) + 1];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r1p1_o SPEC_r1_even
+
+static void in1_r1p1_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1) + 1]);
+}
+#define SPEC_in1_r1p1_32s SPEC_r1_even
+
+static void in1_r1p1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1) + 1]);
+}
+#define SPEC_in1_r1p1_32u SPEC_r1_even
+
+static void in1_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in1, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_in1_r1_D32 SPEC_r1_even
+
+static void in1_r2(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r2));
+}
+#define SPEC_in1_r2 0
+
+static void in1_r2_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in1, regs[get_field(s, r2)], 32);
+}
+#define SPEC_in1_r2_sr32 0
+
+static void in1_r2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]);
+}
+#define SPEC_in1_r2_32u 0
+
+static void in1_r3(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_reg(get_field(s, r3));
+}
+#define SPEC_in1_r3 0
+
+static void in1_r3_o(DisasContext *s, DisasOps *o)
+{
+    o->in1 = regs[get_field(s, r3)];
+    o->g_in1 = true;
+}
+#define SPEC_in1_r3_o 0
+
+static void in1_r3_32s(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r3)]);
+}
+#define SPEC_in1_r3_32s 0
+
+static void in1_r3_32u(DisasContext *s, DisasOps *o)
+{
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r3)]);
+}
+#define SPEC_in1_r3_32u 0
+
+static void in1_r3_D32(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in1, regs[r3 + 1], regs[r3]);
+}
+#define SPEC_in1_r3_D32 SPEC_r3_even
+
+static void in1_e1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg32_i64(get_field(s, r1));
+}
+#define SPEC_in1_e1 0
+
+static void in1_f1(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r1));
+}
+#define SPEC_in1_f1 0
+
+/* Load the high double word of an extended (128-bit) format FP number */
+static void in1_x2h(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r2));
+}
+#define SPEC_in1_x2h SPEC_r2_f128
+
+static void in1_f3(DisasContext *s, DisasOps *o)
+{
+    o->in1 = load_freg(get_field(s, r3));
+}
+#define SPEC_in1_f3 0
+
+static void in1_la1(DisasContext *s, DisasOps *o)
+{
+    o->addr1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
+}
+#define SPEC_in1_la1 0
+
+static void in1_la2(DisasContext *s, DisasOps *o)
+{
+    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
+    o->addr1 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
+}
+#define SPEC_in1_la2 0
+
+static void in1_m1_8u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld8u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_8u 0
+
+static void in1_m1_16s(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld16s(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_16s 0
+
+static void in1_m1_16u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld16u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_16u 0
+
+static void in1_m1_32s(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32s(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_32s 0
+
+static void in1_m1_32u(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld32u(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_32u 0
+
+static void in1_m1_64(DisasContext *s, DisasOps *o)
+{
+    in1_la1(s, o);
+    o->in1 = tcg_temp_new_i64();
+    tcg_gen_qemu_ld64(o->in1, o->addr1, get_mem_index(s));
+}
+#define SPEC_in1_m1_64 0
+
+/* ====================================================================== */
+/* The "INput 2" generators.  These load the second operand to an insn.  */
+
+static void in2_r1_o(DisasContext *s, DisasOps *o)
+{
+    o->in2 = regs[get_field(s, r1)];
+    o->g_in2 = true;
+}
+#define SPEC_in2_r1_o 0
+
+static void in2_r1_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r1)]);
+}
+#define SPEC_in2_r1_16u 0
+
+static void in2_r1_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r1)]);
+}
+#define SPEC_in2_r1_32u 0
+
+static void in2_r1_D32(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_concat32_i64(o->in2, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_in2_r1_D32 SPEC_r1_even
+
+static void in2_r2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_reg(get_field(s, r2));
+}
+#define SPEC_in2_r2 0
+
+static void in2_r2_o(DisasContext *s, DisasOps *o)
+{
+    o->in2 = regs[get_field(s, r2)];
+    o->g_in2 = true;
+}
+#define SPEC_in2_r2_o 0
+
+static void in2_r2_nz(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+    if (r2 != 0) {
+        o->in2 = load_reg(r2);
+    }
+}
+#define SPEC_in2_r2_nz 0
+
+static void in2_r2_8s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext8s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_8s 0
+
+static void in2_r2_8u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext8u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_8u 0
+
+static void in2_r2_16s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_16s 0
+
+static void in2_r2_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_16u 0
+
+static void in2_r3(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_reg(get_field(s, r3));
+}
+#define SPEC_in2_r3 0
+
+static void in2_r3_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in2, regs[get_field(s, r3)], 32);
+}
+#define SPEC_in2_r3_sr32 0
+
+static void in2_r3_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r3)]);
+}
+#define SPEC_in2_r3_32u 0
+
+static void in2_r2_32s(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32s_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_32s 0
+
+static void in2_r2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r2)]);
+}
+#define SPEC_in2_r2_32u 0
+
+static void in2_r2_sr32(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(o->in2, regs[get_field(s, r2)], 32);
+}
+#define SPEC_in2_r2_sr32 0
+
+static void in2_e2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg32_i64(get_field(s, r2));
+}
+#define SPEC_in2_e2 0
+
+static void in2_f2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg(get_field(s, r2));
+}
+#define SPEC_in2_f2 0
+
+/* Load the low double word of an extended (128-bit) format FP number */
+static void in2_x2l(DisasContext *s, DisasOps *o)
+{
+    o->in2 = load_freg(get_field(s, r2) + 2);
+}
+#define SPEC_in2_x2l SPEC_r2_f128
+
+static void in2_ra2(DisasContext *s, DisasOps *o)
+{
+    int r2 = get_field(s, r2);
+
+    /* Note: *don't* treat !r2 as 0, use the reg value. */
+    o->in2 = tcg_temp_new_i64();
+    gen_addi_and_wrap_i64(s, o->in2, regs[r2], 0);
+}
+#define SPEC_in2_ra2 0
+
+static void in2_a2(DisasContext *s, DisasOps *o)
+{
+    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
+    o->in2 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
+}
+#define SPEC_in2_a2 0
+
+static void in2_ri2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(s->base.pc_next + (int64_t)get_field(s, i2) * 2);
+}
+#define SPEC_in2_ri2 0
+
+static void in2_sh32(DisasContext *s, DisasOps *o)
+{
+    help_l2_shift(s, o, 31);
+}
+#define SPEC_in2_sh32 0
+
+static void in2_sh64(DisasContext *s, DisasOps *o)
+{
+    help_l2_shift(s, o, 63);
+}
+#define SPEC_in2_sh64 0
+
+static void in2_m2_8u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld8u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_8u 0
+
+static void in2_m2_16s(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld16s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_16s 0
+
+static void in2_m2_16u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_16u 0
+
+static void in2_m2_32s(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_32s 0
+
+static void in2_m2_32u(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_32u 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_m2_32ua(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld_tl(o->in2, o->in2, get_mem_index(s), MO_TEUL | MO_ALIGN);
+}
+#define SPEC_in2_m2_32ua 0
+#endif
+
+static void in2_m2_64(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_m2_64 0
+
+static void in2_m2_64w(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+    gen_addi_and_wrap_i64(s, o->in2, o->in2, 0);
+}
+#define SPEC_in2_m2_64w 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_m2_64a(DisasContext *s, DisasOps *o)
+{
+    in2_a2(s, o);
+    tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEQ | MO_ALIGN);
+}
+#define SPEC_in2_m2_64a 0
+#endif
+
+static void in2_mri2_16u(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_16u 0
+
+static void in2_mri2_32s(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_32s 0
+
+static void in2_mri2_32u(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_32u 0
+
+static void in2_mri2_64(DisasContext *s, DisasOps *o)
+{
+    in2_ri2(s, o);
+    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
+}
+#define SPEC_in2_mri2_64 0
+
+static void in2_i2(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(get_field(s, i2));
+}
+#define SPEC_in2_i2 0
+
+static void in2_i2_8u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint8_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_8u 0
+
+static void in2_i2_16u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint16_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_16u 0
+
+static void in2_i2_32u(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64((uint32_t)get_field(s, i2));
+}
+#define SPEC_in2_i2_32u 0
+
+static void in2_i2_16u_shl(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = (uint16_t)get_field(s, i2);
+    o->in2 = tcg_const_i64(i2 << s->insn->data);
+}
+#define SPEC_in2_i2_16u_shl 0
+
+static void in2_i2_32u_shl(DisasContext *s, DisasOps *o)
+{
+    uint64_t i2 = (uint32_t)get_field(s, i2);
+    o->in2 = tcg_const_i64(i2 << s->insn->data);
+}
+#define SPEC_in2_i2_32u_shl 0
+
+#ifndef CONFIG_USER_ONLY
+static void in2_insn(DisasContext *s, DisasOps *o)
+{
+    o->in2 = tcg_const_i64(s->fields.raw_insn);
+}
+#define SPEC_in2_insn 0
+#endif
+
+/* ====================================================================== */
+
+/* Find opc within the table of insns.  This is formulated as a switch
+   statement so that (1) we get compile-time notice of cut-paste errors
+   for duplicated opcodes, and (2) the compiler generates the binary
+   search tree, rather than us having to post-process the table.  */
+
+#define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0)
+
+#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0)
+
+#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \
+    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL)
+
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM,
+
+enum DisasInsnEnum {
+#include "insn-data.def"
+};
+
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) {                   \
+    .opc = OPC,                                                             \
+    .flags = FL,                                                            \
+    .fmt = FMT_##FT,                                                        \
+    .fac = FAC_##FC,                                                        \
+    .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W,  \
+    .name = #NM,                                                            \
+    .help_in1 = in1_##I1,                                                   \
+    .help_in2 = in2_##I2,                                                   \
+    .help_prep = prep_##P,                                                  \
+    .help_wout = wout_##W,                                                  \
+    .help_cout = cout_##CC,                                                 \
+    .help_op = op_##OP,                                                     \
+    .data = D                                                               \
+ },
+
+/* Allow 0 to be used for NULL in the table below.  */
+#define in1_0  NULL
+#define in2_0  NULL
+#define prep_0  NULL
+#define wout_0  NULL
+#define cout_0  NULL
+#define op_0  NULL
+
+#define SPEC_in1_0 0
+#define SPEC_in2_0 0
+#define SPEC_prep_0 0
+#define SPEC_wout_0 0
+
+/* Give smaller names to the various facilities.  */
+#define FAC_Z           S390_FEAT_ZARCH
+#define FAC_CASS        S390_FEAT_COMPARE_AND_SWAP_AND_STORE
+#define FAC_DFP         S390_FEAT_DFP
+#define FAC_DFPR        S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */
+#define FAC_DO          S390_FEAT_STFLE_45 /* distinct-operands */
+#define FAC_EE          S390_FEAT_EXECUTE_EXT
+#define FAC_EI          S390_FEAT_EXTENDED_IMMEDIATE
+#define FAC_FPE         S390_FEAT_FLOATING_POINT_EXT
+#define FAC_FPSSH       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPS-sign-handling */
+#define FAC_FPRGR       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPR-GR-transfer */
+#define FAC_GIE         S390_FEAT_GENERAL_INSTRUCTIONS_EXT
+#define FAC_HFP_MA      S390_FEAT_HFP_MADDSUB
+#define FAC_HW          S390_FEAT_STFLE_45 /* high-word */
+#define FAC_IEEEE_SIM   S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* IEEE-exception-simulation */
+#define FAC_MIE         S390_FEAT_STFLE_49 /* misc-instruction-extensions */
+#define FAC_LAT         S390_FEAT_STFLE_49 /* load-and-trap */
+#define FAC_LOC         S390_FEAT_STFLE_45 /* load/store on condition 1 */
+#define FAC_LOC2        S390_FEAT_STFLE_53 /* load/store on condition 2 */
+#define FAC_LD          S390_FEAT_LONG_DISPLACEMENT
+#define FAC_PC          S390_FEAT_STFLE_45 /* population count */
+#define FAC_SCF         S390_FEAT_STORE_CLOCK_FAST
+#define FAC_SFLE        S390_FEAT_STFLE
+#define FAC_ILA         S390_FEAT_STFLE_45 /* interlocked-access-facility 1 */
+#define FAC_MVCOS       S390_FEAT_MOVE_WITH_OPTIONAL_SPEC
+#define FAC_LPP         S390_FEAT_SET_PROGRAM_PARAMETERS /* load-program-parameter */
+#define FAC_DAT_ENH     S390_FEAT_DAT_ENH
+#define FAC_E2          S390_FEAT_EXTENDED_TRANSLATION_2
+#define FAC_EH          S390_FEAT_STFLE_49 /* execution-hint */
+#define FAC_PPA         S390_FEAT_STFLE_49 /* processor-assist */
+#define FAC_LZRB        S390_FEAT_STFLE_53 /* load-and-zero-rightmost-byte */
+#define FAC_ETF3        S390_FEAT_EXTENDED_TRANSLATION_3
+#define FAC_MSA         S390_FEAT_MSA /* message-security-assist facility */
+#define FAC_MSA3        S390_FEAT_MSA_EXT_3 /* msa-extension-3 facility */
+#define FAC_MSA4        S390_FEAT_MSA_EXT_4 /* msa-extension-4 facility */
+#define FAC_MSA5        S390_FEAT_MSA_EXT_5 /* msa-extension-5 facility */
+#define FAC_MSA8        S390_FEAT_MSA_EXT_8 /* msa-extension-8 facility */
+#define FAC_ECT         S390_FEAT_EXTRACT_CPU_TIME
+#define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
+#define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
+#define FAC_V           S390_FEAT_VECTOR /* vector facility */
+#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */
+#define FAC_MIE2        S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */
+
+static const DisasInsn insn_info[] = {
+#include "insn-data.def"
+};
+
+#undef E
+#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \
+    case OPC: return &insn_info[insn_ ## NM];
+
+static const DisasInsn *lookup_opc(uint16_t opc)
+{
+    switch (opc) {
+#include "insn-data.def"
+    default:
+        return NULL;
+    }
+}
+
+#undef F
+#undef E
+#undef D
+#undef C
+
+/* Extract a field from the insn.  The INSN should be left-aligned in
+   the uint64_t so that we can more easily utilize the big-bit-endian
+   definitions we extract from the Principals of Operation.  */
+
+static void extract_field(DisasFields *o, const DisasField *f, uint64_t insn)
+{
+    uint32_t r, m;
+
+    if (f->size == 0) {
+        return;
+    }
+
+    /* Zero extract the field from the insn.  */
+    r = (insn << f->beg) >> (64 - f->size);
+
+    /* Sign-extend, or un-swap the field as necessary.  */
+    switch (f->type) {
+    case 0: /* unsigned */
+        break;
+    case 1: /* signed */
+        assert(f->size <= 32);
+        m = 1u << (f->size - 1);
+        r = (r ^ m) - m;
+        break;
+    case 2: /* dl+dh split, signed 20 bit. */
+        r = ((int8_t)r << 12) | (r >> 8);
+        break;
+    case 3: /* MSB stored in RXB */
+        g_assert(f->size == 4);
+        switch (f->beg) {
+        case 8:
+            r |= extract64(insn, 63 - 36, 1) << 4;
+            break;
+        case 12:
+            r |= extract64(insn, 63 - 37, 1) << 4;
+            break;
+        case 16:
+            r |= extract64(insn, 63 - 38, 1) << 4;
+            break;
+        case 32:
+            r |= extract64(insn, 63 - 39, 1) << 4;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+    default:
+        abort();
+    }
+
+    /*
+     * Validate that the "compressed" encoding we selected above is valid.
+     * I.e. we haven't made two different original fields overlap.
+     */
+    assert(((o->presentC >> f->indexC) & 1) == 0);
+    o->presentC |= 1 << f->indexC;
+    o->presentO |= 1 << f->indexO;
+
+    o->c[f->indexC] = r;
+}
+
+/* Lookup the insn at the current PC, extracting the operands into O and
+   returning the info struct for the insn.  Returns NULL for invalid insn.  */
+
+static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s)
+{
+    uint64_t insn, pc = s->base.pc_next;
+    int op, op2, ilen;
+    const DisasInsn *info;
+
+    if (unlikely(s->ex_value)) {
+        /* Drop the EX data now, so that it's clear on exception paths.  */
+        TCGv_i64 zero = tcg_const_i64(0);
+        tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value));
+        tcg_temp_free_i64(zero);
+
+        /* Extract the values saved by EXECUTE.  */
+        insn = s->ex_value & 0xffffffffffff0000ull;
+        ilen = s->ex_value & 0xf;
+        op = insn >> 56;
+    } else {
+        insn = ld_code2(env, s, pc);
+        op = (insn >> 8) & 0xff;
+        ilen = get_ilen(op);
+        switch (ilen) {
+        case 2:
+            insn = insn << 48;
+            break;
+        case 4:
+            insn = ld_code4(env, s, pc) << 32;
+            break;
+        case 6:
+            insn = (insn << 48) | (ld_code4(env, s, pc + 2) << 16);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    s->pc_tmp = s->base.pc_next + ilen;
+    s->ilen = ilen;
+
+    /* We can't actually determine the insn format until we've looked up
+       the full insn opcode.  Which we can't do without locating the
+       secondary opcode.  Assume by default that OP2 is at bit 40; for
+       those smaller insns that don't actually have a secondary opcode
+       this will correctly result in OP2 = 0. */
+    switch (op) {
+    case 0x01: /* E */
+    case 0x80: /* S */
+    case 0x82: /* S */
+    case 0x93: /* S */
+    case 0xb2: /* S, RRF, RRE, IE */
+    case 0xb3: /* RRE, RRD, RRF */
+    case 0xb9: /* RRE, RRF */
+    case 0xe5: /* SSE, SIL */
+        op2 = (insn << 8) >> 56;
+        break;
+    case 0xa5: /* RI */
+    case 0xa7: /* RI */
+    case 0xc0: /* RIL */
+    case 0xc2: /* RIL */
+    case 0xc4: /* RIL */
+    case 0xc6: /* RIL */
+    case 0xc8: /* SSF */
+    case 0xcc: /* RIL */
+        op2 = (insn << 12) >> 60;
+        break;
+    case 0xc5: /* MII */
+    case 0xc7: /* SMI */
+    case 0xd0 ... 0xdf: /* SS */
+    case 0xe1: /* SS */
+    case 0xe2: /* SS */
+    case 0xe8: /* SS */
+    case 0xe9: /* SS */
+    case 0xea: /* SS */
+    case 0xee ... 0xf3: /* SS */
+    case 0xf8 ... 0xfd: /* SS */
+        op2 = 0;
+        break;
+    default:
+        op2 = (insn << 40) >> 56;
+        break;
+    }
+
+    memset(&s->fields, 0, sizeof(s->fields));
+    s->fields.raw_insn = insn;
+    s->fields.op = op;
+    s->fields.op2 = op2;
+
+    /* Lookup the instruction.  */
+    info = lookup_opc(op << 8 | op2);
+    s->insn = info;
+
+    /* If we found it, extract the operands.  */
+    if (info != NULL) {
+        DisasFormat fmt = info->fmt;
+        int i;
+
+        for (i = 0; i < NUM_C_FIELD; ++i) {
+            extract_field(&s->fields, &format_info[fmt].op[i], insn);
+        }
+    }
+    return info;
+}
+
+static bool is_afp_reg(int reg)
+{
+    return reg % 2 || reg > 6;
+}
+
+static bool is_fp_pair(int reg)
+{
+    /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */
+    return !(reg & 0x2);
+}
+
+static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
+{
+    const DisasInsn *insn;
+    DisasJumpType ret = DISAS_NEXT;
+    DisasOps o = {};
+    bool icount = false;
+
+    /* Search for the insn in the table.  */
+    insn = extract_insn(env, s);
+
+    /* Update insn_start now that we know the ILEN.  */
+    tcg_set_insn_start_param(s->insn_start, 2, s->ilen);
+
+    /* Not found means unimplemented/illegal opcode.  */
+    if (insn == NULL) {
+        qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%02x%02x\n",
+                      s->fields.op, s->fields.op2);
+        gen_illegal_opcode(s);
+        ret = DISAS_NORETURN;
+        goto out;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        TCGv_i64 addr = tcg_const_i64(s->base.pc_next);
+        gen_helper_per_ifetch(cpu_env, addr);
+        tcg_temp_free_i64(addr);
+    }
+#endif
+
+    /* process flags */
+    if (insn->flags) {
+        /* privileged instruction */
+        if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) {
+            gen_program_exception(s, PGM_PRIVILEGED);
+            ret = DISAS_NORETURN;
+            goto out;
+        }
+
+        /* if AFP is not enabled, instructions and registers are forbidden */
+        if (!(s->base.tb->flags & FLAG_MASK_AFP)) {
+            uint8_t dxc = 0;
+
+            if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(s, r1))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(s, r2))) {
+                dxc = 1;
+            }
+            if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(s, r3))) {
+                dxc = 1;
+            }
+            if (insn->flags & IF_BFP) {
+                dxc = 2;
+            }
+            if (insn->flags & IF_DFP) {
+                dxc = 3;
+            }
+            if (insn->flags & IF_VEC) {
+                dxc = 0xfe;
+            }
+            if (dxc) {
+                gen_data_exception(dxc);
+                ret = DISAS_NORETURN;
+                goto out;
+            }
+        }
+
+        /* if vector instructions not enabled, executing them is forbidden */
+        if (insn->flags & IF_VEC) {
+            if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) {
+                gen_data_exception(0xfe);
+                ret = DISAS_NORETURN;
+                goto out;
+            }
+        }
+
+        /* input/output is the special case for icount mode */
+        if (unlikely(insn->flags & IF_IO)) {
+            icount = tb_cflags(s->base.tb) & CF_USE_ICOUNT;
+            if (icount) {
+                gen_io_start();
+            }
+        }
+    }
+
+    /* Check for insn specification exceptions.  */
+    if (insn->spec) {
+        if ((insn->spec & SPEC_r1_even && get_field(s, r1) & 1) ||
+            (insn->spec & SPEC_r2_even && get_field(s, r2) & 1) ||
+            (insn->spec & SPEC_r3_even && get_field(s, r3) & 1) ||
+            (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(s, r1))) ||
+            (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(s, r2)))) {
+            gen_program_exception(s, PGM_SPECIFICATION);
+            ret = DISAS_NORETURN;
+            goto out;
+        }
+    }
+
+    /* Implement the instruction.  */
+    if (insn->help_in1) {
+        insn->help_in1(s, &o);
+    }
+    if (insn->help_in2) {
+        insn->help_in2(s, &o);
+    }
+    if (insn->help_prep) {
+        insn->help_prep(s, &o);
+    }
+    if (insn->help_op) {
+        ret = insn->help_op(s, &o);
+    }
+    if (ret != DISAS_NORETURN) {
+        if (insn->help_wout) {
+            insn->help_wout(s, &o);
+        }
+        if (insn->help_cout) {
+            insn->help_cout(s, &o);
+        }
+    }
+
+    /* Free any temporaries created by the helpers.  */
+    if (o.out && !o.g_out) {
+        tcg_temp_free_i64(o.out);
+    }
+    if (o.out2 && !o.g_out2) {
+        tcg_temp_free_i64(o.out2);
+    }
+    if (o.in1 && !o.g_in1) {
+        tcg_temp_free_i64(o.in1);
+    }
+    if (o.in2 && !o.g_in2) {
+        tcg_temp_free_i64(o.in2);
+    }
+    if (o.addr1) {
+        tcg_temp_free_i64(o.addr1);
+    }
+
+    /* io should be the last instruction in tb when icount is enabled */
+    if (unlikely(icount && ret == DISAS_NEXT)) {
+        ret = DISAS_PC_STALE;
+    }
+
+#ifndef CONFIG_USER_ONLY
+    if (s->base.tb->flags & FLAG_MASK_PER) {
+        /* An exception might be triggered, save PSW if not already done.  */
+        if (ret == DISAS_NEXT || ret == DISAS_PC_STALE) {
+            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
+        }
+
+        /* Call the helper to check for a possible PER exception.  */
+        gen_helper_per_check_exception(cpu_env);
+    }
+#endif
+
+out:
+    /* Advance to the next instruction.  */
+    s->base.pc_next = s->pc_tmp;
+    return ret;
+}
+
+static void s390x_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    /* 31-bit mode */
+    if (!(dc->base.tb->flags & FLAG_MASK_64)) {
+        dc->base.pc_first &= 0x7fffffff;
+        dc->base.pc_next = dc->base.pc_first;
+    }
+
+    dc->cc_op = CC_OP_DYNAMIC;
+    dc->ex_value = dc->base.tb->cs_base;
+}
+
+static void s390x_tr_tb_start(DisasContextBase *db, CPUState *cs)
+{
+}
+
+static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    /* Delay the set of ilen until we've read the insn. */
+    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op, 0);
+    dc->insn_start = tcg_last_op();
+}
+
+static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+    CPUS390XState *env = cs->env_ptr;
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    dc->base.is_jmp = translate_one(env, dc);
+    if (dc->base.is_jmp == DISAS_NEXT) {
+        uint64_t page_start;
+
+        page_start = dc->base.pc_first & TARGET_PAGE_MASK;
+        if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
+            dc->base.is_jmp = DISAS_TOO_MANY;
+        }
+    }
+}
+
+static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    switch (dc->base.is_jmp) {
+    case DISAS_GOTO_TB:
+    case DISAS_NORETURN:
+        break;
+    case DISAS_TOO_MANY:
+    case DISAS_PC_STALE:
+    case DISAS_PC_STALE_NOCHAIN:
+        update_psw_addr(dc);
+        /* FALLTHRU */
+    case DISAS_PC_UPDATED:
+        /* Next TB starts off with CC_OP_DYNAMIC, so make sure the
+           cc op type is in env */
+        update_cc_op(dc);
+        /* FALLTHRU */
+    case DISAS_PC_CC_UPDATED:
+        /* Exit the TB, either by raising a debug exception or by return.  */
+        if ((dc->base.tb->flags & FLAG_MASK_PER) ||
+             dc->base.is_jmp == DISAS_PC_STALE_NOCHAIN) {
+            tcg_gen_exit_tb(NULL, 0);
+        } else {
+            tcg_gen_lookup_and_goto_ptr();
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void s390x_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
+{
+    DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+    if (unlikely(dc->ex_value)) {
+        /* ??? Unfortunately log_target_disas can't use host memory.  */
+        qemu_log("IN: EXECUTE %016" PRIx64, dc->ex_value);
+    } else {
+        qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
+        log_target_disas(cs, dc->base.pc_first, dc->base.tb->size);
+    }
+}
+
+static const TranslatorOps s390x_tr_ops = {
+    .init_disas_context = s390x_tr_init_disas_context,
+    .tb_start           = s390x_tr_tb_start,
+    .insn_start         = s390x_tr_insn_start,
+    .translate_insn     = s390x_tr_translate_insn,
+    .tb_stop            = s390x_tr_tb_stop,
+    .disas_log          = s390x_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+{
+    DisasContext dc;
+
+    translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
+}
+
+void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
+                          target_ulong *data)
+{
+    int cc_op = data[1];
+
+    env->psw.addr = data[0];
+
+    /* Update the CC opcode if it is not already up-to-date.  */
+    if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) {
+        env->cc_op = cc_op;
+    }
+
+    /* Record ILEN.  */
+    env->int_pgm_ilen = data[2];
+}
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc
similarity index 84%
rename from target/s390x/translate_vx.c.inc
rename to target/s390x/tcg/translate_vx.c.inc
index eb767f5288e..28bf5a23b68 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -67,7 +67,7 @@ static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr,
 {
     const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
 
-    switch (memop) {
+    switch ((unsigned)memop) {
     case ES_8:
         tcg_gen_ld8u_i64(dst, cpu_env, offs);
         break;
@@ -327,6 +327,14 @@ static void gen_addi2_i64(TCGv_i64 dl, TCGv_i64 dh, TCGv_i64 al, TCGv_i64 ah,
     tcg_temp_free_i64(bh);
 }
 
+static DisasJumpType op_vbperm(DisasContext *s, DisasOps *o)
+{
+    gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), get_field(s, v3), 0,
+                   gen_helper_gvec_vbperm);
+
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_vge(DisasContext *s, DisasOps *o)
 {
     const uint8_t es = s->insn->data;
@@ -1771,6 +1779,56 @@ static DisasJumpType op_vm(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_vmsl(DisasContext *s, DisasOps *o)
+{
+    TCGv_i64 l1, h1, l2, h2;
+
+    if (get_field(s, m5) != ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    l1 = tcg_temp_new_i64();
+    h1 = tcg_temp_new_i64();
+    l2 = tcg_temp_new_i64();
+    h2 = tcg_temp_new_i64();
+
+    /* Multipy both even elements from v2 and v3 */
+    read_vec_element_i64(l1, get_field(s, v2), 0, ES_64);
+    read_vec_element_i64(h1, get_field(s, v3), 0, ES_64);
+    tcg_gen_mulu2_i64(l1, h1, l1, h1);
+    /* Shift result left by one (x2) if requested */
+    if (extract32(get_field(s, m6), 3, 1)) {
+        tcg_gen_add2_i64(l1, h1, l1, h1, l1, h1);
+    }
+
+    /* Multipy both odd elements from v2 and v3 */
+    read_vec_element_i64(l2, get_field(s, v2), 1, ES_64);
+    read_vec_element_i64(h2, get_field(s, v3), 1, ES_64);
+    tcg_gen_mulu2_i64(l2, h2, l2, h2);
+    /* Shift result left by one (x2) if requested */
+    if (extract32(get_field(s, m6), 2, 1)) {
+        tcg_gen_add2_i64(l2, h2, l2, h2, l2, h2);
+    }
+
+    /* Add both intermediate results */
+    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+    /* Add whole v4 */
+    read_vec_element_i64(h2, get_field(s, v4), 0, ES_64);
+    read_vec_element_i64(l2, get_field(s, v4), 1, ES_64);
+    tcg_gen_add2_i64(l1, h1, l1, h1, l2, h2);
+
+    /* Store final result into v1. */
+    write_vec_element_i64(h1, get_field(s, v1), 0, ES_64);
+    write_vec_element_i64(l1, get_field(s, v1), 1, ES_64);
+
+    tcg_temp_free_i64(l1);
+    tcg_temp_free_i64(h1);
+    tcg_temp_free_i64(l2);
+    tcg_temp_free_i64(h2);
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_vnn(DisasContext *s, DisasOps *o)
 {
     gen_gvec_fn_3(nand, ES_8, get_field(s, v1),
@@ -2443,32 +2501,96 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m4);
     const uint8_t m5 = get_field(s, m5);
-    const bool se = extract32(m5, 3, 1);
-    gen_helper_gvec_3_ptr *fn;
-
-    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
+    gen_helper_gvec_3_ptr *fn = NULL;
 
     switch (s->fields.op2) {
     case 0xe3:
-        fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfa64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe5:
-        fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfd64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe7:
-        fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfm64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe2:
-        fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfs64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     default:
         g_assert_not_reached();
     }
+
+    if (!fn || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
     gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
-                   get_field(s, v3), cpu_env, 0, fn);
+                   get_field(s, v3), cpu_env, m5, fn);
     return DISAS_NEXT;
 }
 
@@ -2476,19 +2598,41 @@ static DisasJumpType op_wfc(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m3);
     const uint8_t m4 = get_field(s, m4);
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_wfk32;
+            if (s->fields.op2 == 0xcb) {
+                fn = gen_helper_gvec_wfc32;
+            }
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_wfk64;
+        if (s->fields.op2 == 0xcb) {
+            fn = gen_helper_gvec_wfc64;
+        }
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_wfk128;
+            if (s->fields.op2 == 0xcb) {
+                fn = gen_helper_gvec_wfc128;
+            }
+        }
+        break;
+    default:
+        break;
+    };
 
-    if (fpf != FPF_LONG || m4) {
+    if (!fn || m4) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (s->fields.op2 == 0xcb) {
-        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
-                       cpu_env, 0, gen_helper_gvec_wfc64);
-    } else {
-        gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2),
-                       cpu_env, 0, gen_helper_gvec_wfk64);
-    }
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, 0, fn);
     set_cc_static(s);
     return DISAS_NEXT;
 }
@@ -2498,46 +2642,68 @@ static DisasJumpType op_vfc(DisasContext *s, DisasOps *o)
     const uint8_t fpf = get_field(s, m4);
     const uint8_t m5 = get_field(s, m5);
     const uint8_t m6 = get_field(s, m6);
-    const bool se = extract32(m5, 3, 1);
     const bool cs = extract32(m6, 0, 1);
-    gen_helper_gvec_3_ptr *fn;
-
-    if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
+    const bool sq = extract32(m5, 2, 1);
+    gen_helper_gvec_3_ptr *fn = NULL;
 
-    if (cs) {
-        switch (s->fields.op2) {
-        case 0xe8:
-            fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc;
+    switch (s->fields.op2) {
+    case 0xe8:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfce32_cc : gen_helper_gvec_vfce32;
             break;
-        case 0xeb:
-            fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfce64_cc : gen_helper_gvec_vfce64;
             break;
-        case 0xea:
-            fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfce128_cc : gen_helper_gvec_vfce128;
             break;
         default:
-            g_assert_not_reached();
+            break;
         }
-    } else {
-        switch (s->fields.op2) {
-        case 0xe8:
-            fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64;
+        break;
+    case 0xeb:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfch32_cc : gen_helper_gvec_vfch32;
             break;
-        case 0xeb:
-            fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfch64_cc : gen_helper_gvec_vfch64;
             break;
-        case 0xea:
-            fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfch128_cc : gen_helper_gvec_vfch128;
             break;
         default:
-            g_assert_not_reached();
+            break;
         }
+        break;
+    case 0xea:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = cs ? gen_helper_gvec_vfche32_cc : gen_helper_gvec_vfche32;
+            break;
+        case FPF_LONG:
+            fn = cs ? gen_helper_gvec_vfche64_cc : gen_helper_gvec_vfche64;
+            break;
+        case FPF_EXT:
+            fn = cs ? gen_helper_gvec_vfche128_cc : gen_helper_gvec_vfche128;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
     }
-    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
-                   get_field(s, v3), cpu_env, 0, fn);
+
+    if (!fn || extract32(m5, 0, 2) || extract32(m6, 1, 3) ||
+        (!s390_has_feat(S390_FEAT_VECTOR_ENH) && (fpf != FPF_LONG || sq))) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+                   cpu_env, m5, fn);
     if (cs) {
         set_cc_static(s);
     }
@@ -2549,36 +2715,72 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o)
     const uint8_t fpf = get_field(s, m3);
     const uint8_t m4 = get_field(s, m4);
     const uint8_t erm = get_field(s, m5);
-    const bool se = extract32(m4, 3, 1);
-    gen_helper_gvec_2_ptr *fn;
+    gen_helper_gvec_2_ptr *fn = NULL;
 
-    if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
 
     switch (s->fields.op2) {
     case 0xc3:
-        fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64;
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcdg64;
+        }
         break;
     case 0xc1:
-        fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64;
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcdlg64;
+        }
         break;
     case 0xc2:
-        fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64;
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vcgd64;
+        }
         break;
     case 0xc0:
-        fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64;
+        if (fpf == FPF_LONG) {
+            fn = gen_helper_gvec_vclgd64;
+        }
         break;
     case 0xc7:
-        fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfi32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfi64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfi128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xc5:
-        fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64;
+        switch (fpf) {
+        case FPF_LONG:
+            fn = gen_helper_gvec_vflr64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vflr128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     default:
         g_assert_not_reached();
     }
+
+    if (!fn || extract32(m4, 0, 2) || erm > 7 || erm == 2) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
     gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
                    deposit32(m4, 4, 4, erm), fn);
     return DISAS_NEXT;
@@ -2588,18 +2790,71 @@ static DisasJumpType op_vfll(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m3);
     const uint8_t m4 = get_field(s, m4);
-    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32;
+    gen_helper_gvec_2_ptr *fn = NULL;
 
-    if (fpf != FPF_SHORT || extract32(m4, 0, 3)) {
+    switch (fpf) {
+    case FPF_SHORT:
+        fn = gen_helper_gvec_vfll32;
+        break;
+    case FPF_LONG:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfll64;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (!fn || extract32(m4, 0, 3)) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (extract32(m4, 3, 1)) {
-        fn = gen_helper_gvec_vfll32s;
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
+    return DISAS_NEXT;
+}
+
+static DisasJumpType op_vfmax(DisasContext *s, DisasOps *o)
+{
+    const uint8_t fpf = get_field(s, m4);
+    const uint8_t m6 = get_field(s, m6);
+    const uint8_t m5 = get_field(s, m5);
+    gen_helper_gvec_3_ptr *fn;
+
+    if (m6 == 5 || m6 == 6 || m6 == 7 || m6 > 13) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
     }
-    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-                   0, fn);
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax32;
+        } else {
+            fn = gen_helper_gvec_vfmin32;
+        }
+        break;
+    case FPF_LONG:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax64;
+        } else {
+            fn = gen_helper_gvec_vfmin64;
+        }
+        break;
+    case FPF_EXT:
+        if (s->fields.op2 == 0xef) {
+            fn = gen_helper_gvec_vfmax128;
+        } else {
+            fn = gen_helper_gvec_vfmin128;
+        }
+        break;
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2), get_field(s, v3),
+                   cpu_env, deposit32(m5, 4, 4, m6), fn);
     return DISAS_NEXT;
 }
 
@@ -2607,22 +2862,88 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
 {
     const uint8_t m5 = get_field(s, m5);
     const uint8_t fpf = get_field(s, m6);
-    const bool se = extract32(m5, 3, 1);
-    gen_helper_gvec_4_ptr *fn;
+    gen_helper_gvec_4_ptr *fn = NULL;
 
-    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+    switch (s->fields.op2) {
+    case 0x8f:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfma32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfma64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfma128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x8e:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfms32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfms64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfms128;
+            }
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x9f:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = gen_helper_gvec_vfnma32;
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfnma64;
+            break;
+        case FPF_EXT:
+            fn = gen_helper_gvec_vfnma128;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 0x9e:
+        switch (fpf) {
+        case FPF_SHORT:
+            fn = gen_helper_gvec_vfnms32;
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfnms64;
+            break;
+        case FPF_EXT:
+            fn = gen_helper_gvec_vfnms128;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m5, 0, 3)) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (s->fields.op2 == 0x8f) {
-        fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
-    } else {
-        fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
-    }
     gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
-                   get_field(s, v3), get_field(s, v4), cpu_env,
-                   0, fn);
+                   get_field(s, v3), get_field(s, v4), cpu_env, m5, fn);
     return DISAS_NEXT;
 }
 
@@ -2633,48 +2954,88 @@ static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o)
     const uint8_t fpf = get_field(s, m3);
     const uint8_t m4 = get_field(s, m4);
     const uint8_t m5 = get_field(s, m5);
+    const bool se = extract32(m4, 3, 1);
     TCGv_i64 tmp;
 
-    if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) {
+    if ((fpf != FPF_LONG && !s390_has_feat(S390_FEAT_VECTOR_ENH)) ||
+        extract32(m4, 0, 3) || m5 > 2) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (extract32(m4, 3, 1)) {
-        tmp = tcg_temp_new_i64();
-        read_vec_element_i64(tmp, v2, 0, ES_64);
-        switch (m5) {
-        case 0:
-            /* sign bit is inverted (complement) */
-            tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
-            break;
-        case 1:
-            /* sign bit is set to one (negative) */
-            tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
-            break;
-        case 2:
-            /* sign bit is set to zero (positive) */
-            tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
-            break;
+    switch (fpf) {
+    case FPF_SHORT:
+        if (!se) {
+            switch (m5) {
+            case 0:
+                /* sign bit is inverted (complement) */
+                gen_gvec_fn_2i(xori, ES_32, v1, v2, 1ull << 31);
+                break;
+            case 1:
+                /* sign bit is set to one (negative) */
+                gen_gvec_fn_2i(ori, ES_32, v1, v2, 1ull << 31);
+                break;
+            case 2:
+                /* sign bit is set to zero (positive) */
+                gen_gvec_fn_2i(andi, ES_32, v1, v2, (1ull << 31) - 1);
+                break;
+            }
+            return DISAS_NEXT;
         }
-        write_vec_element_i64(tmp, v1, 0, ES_64);
-        tcg_temp_free_i64(tmp);
-    } else {
-        switch (m5) {
-        case 0:
-            /* sign bit is inverted (complement) */
-            gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
-            break;
-        case 1:
-            /* sign bit is set to one (negative) */
-            gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
-            break;
-        case 2:
-            /* sign bit is set to zero (positive) */
-            gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
-            break;
+        break;
+    case FPF_LONG:
+        if (!se) {
+            switch (m5) {
+            case 0:
+                /* sign bit is inverted (complement) */
+                gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63);
+                break;
+            case 1:
+                /* sign bit is set to one (negative) */
+                gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63);
+                break;
+            case 2:
+                /* sign bit is set to zero (positive) */
+                gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1);
+                break;
+            }
+            return DISAS_NEXT;
         }
+        break;
+    case FPF_EXT:
+        /* Only a single element. */
+        break;
+    default:
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    /* With a single element, we are only interested in bit 0. */
+    tmp = tcg_temp_new_i64();
+    read_vec_element_i64(tmp, v2, 0, ES_64);
+    switch (m5) {
+    case 0:
+        /* sign bit is inverted (complement) */
+        tcg_gen_xori_i64(tmp, tmp, 1ull << 63);
+        break;
+    case 1:
+        /* sign bit is set to one (negative) */
+        tcg_gen_ori_i64(tmp, tmp, 1ull << 63);
+        break;
+    case 2:
+        /* sign bit is set to zero (positive) */
+        tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1);
+        break;
     }
+    write_vec_element_i64(tmp, v1, 0, ES_64);
+
+    if (fpf == FPF_EXT) {
+        read_vec_element_i64(tmp, v2, 1, ES_64);
+        write_vec_element_i64(tmp, v1, 1, ES_64);
+    }
+
+    tcg_temp_free_i64(tmp);
+
     return DISAS_NEXT;
 }
 
@@ -2682,18 +3043,32 @@ static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m3);
     const uint8_t m4 = get_field(s, m4);
-    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64;
+    gen_helper_gvec_2_ptr *fn = NULL;
 
-    if (fpf != FPF_LONG || extract32(m4, 0, 3)) {
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfsq32;
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_vfsq64;
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vfsq128;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (!fn || extract32(m4, 0, 3)) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (extract32(m4, 3, 1)) {
-        fn = gen_helper_gvec_vfsq64s;
-    }
-    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
-                   0, fn);
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, m4, fn);
     return DISAS_NEXT;
 }
 
@@ -2702,17 +3077,33 @@ static DisasJumpType op_vftci(DisasContext *s, DisasOps *o)
     const uint16_t i3 = get_field(s, i3);
     const uint8_t fpf = get_field(s, m4);
     const uint8_t m5 = get_field(s, m5);
-    gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64;
+    gen_helper_gvec_2_ptr *fn = NULL;
+
+    switch (fpf) {
+    case FPF_SHORT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vftci32;
+        }
+        break;
+    case FPF_LONG:
+        fn = gen_helper_gvec_vftci64;
+        break;
+    case FPF_EXT:
+        if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+            fn = gen_helper_gvec_vftci128;
+        }
+        break;
+    default:
+        break;
+    }
 
-    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+    if (!fn || extract32(m5, 0, 3)) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (extract32(m5, 3, 1)) {
-        fn = gen_helper_gvec_vftci64s;
-    }
-    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env, i3, fn);
+    gen_gvec_2_ptr(get_field(s, v1), get_field(s, v2), cpu_env,
+                   deposit32(m5, 4, 12, i3), fn);
     set_cc_static(s);
     return DISAS_NEXT;
 }
diff --git a/target/s390x/vec.h b/target/s390x/tcg/vec.h
similarity index 100%
rename from target/s390x/vec.h
rename to target/s390x/tcg/vec.h
diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c
new file mode 100644
index 00000000000..1a779934715
--- /dev/null
+++ b/target/s390x/tcg/vec_fpu_helper.c
@@ -0,0 +1,1072 @@
+/*
+ * QEMU TCG support -- s390x vector floating point instruction support
+ *
+ * Copyright (C) 2019 Red Hat Inc
+ *
+ * Authors:
+ *   David Hildenbrand <david@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "s390x-internal.h"
+#include "vec.h"
+#include "tcg_s390x.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#define VIC_INVALID         0x1
+#define VIC_DIVBYZERO       0x2
+#define VIC_OVERFLOW        0x3
+#define VIC_UNDERFLOW       0x4
+#define VIC_INEXACT         0x5
+
+/* returns the VEX. If the VEX is 0, there is no trap */
+static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
+                              uint8_t *vec_exc)
+{
+    uint8_t vece_exc = 0, trap_exc;
+    unsigned qemu_exc;
+
+    /* Retrieve and clear the softfloat exceptions */
+    qemu_exc = env->fpu_status.float_exception_flags;
+    if (qemu_exc == 0) {
+        return 0;
+    }
+    env->fpu_status.float_exception_flags = 0;
+
+    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
+
+    /* Add them to the vector-wide s390x exception bits */
+    *vec_exc |= vece_exc;
+
+    /* Check for traps and construct the VXC */
+    trap_exc = vece_exc & env->fpc >> 24;
+    if (trap_exc) {
+        if (trap_exc & S390_IEEE_MASK_INVALID) {
+            return enr << 4 | VIC_INVALID;
+        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
+            return enr << 4 | VIC_DIVBYZERO;
+        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
+            return enr << 4 | VIC_OVERFLOW;
+        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
+            return enr << 4 | VIC_UNDERFLOW;
+        } else if (!XxC) {
+            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
+            /* inexact has lowest priority on traps */
+            return enr << 4 | VIC_INEXACT;
+        }
+    }
+    return 0;
+}
+
+static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
+                            uintptr_t retaddr)
+{
+    if (vxc) {
+        /* on traps, the fpc flags are not updated, instruction is suppressed */
+        tcg_s390_vector_exception(env, vxc, retaddr);
+    }
+    if (vec_exc) {
+        /* indicate exceptions for all elements combined */
+        env->fpc |= vec_exc << 16;
+    }
+}
+
+static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
+{
+    return make_float32(s390_vec_read_element32(v, enr));
+}
+
+static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
+{
+    return make_float64(s390_vec_read_element64(v, enr));
+}
+
+static float128 s390_vec_read_float128(const S390Vector *v)
+{
+    return make_float128(s390_vec_read_element64(v, 0),
+                         s390_vec_read_element64(v, 1));
+}
+
+static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
+{
+    return s390_vec_write_element32(v, enr, data);
+}
+
+static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
+{
+    return s390_vec_write_element64(v, enr, data);
+}
+
+static void s390_vec_write_float128(S390Vector *v, float128 data)
+{
+    s390_vec_write_element64(v, 0, data.high);
+    s390_vec_write_element64(v, 1, data.low);
+}
+
+typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
+static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+
+        s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
+static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+
+        s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
+static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
+                    bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
+                    uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
+    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static float64 vcdg64(float64 a, float_status *s)
+{
+    return int64_to_float64(a, s);
+}
+
+static float64 vcdlg64(float64 a, float_status *s)
+{
+    return uint64_to_float64(a, s);
+}
+
+static float64 vcgd64(float64 a, float_status *s)
+{
+    const float64 tmp = float64_to_int64(a, s);
+
+    return float64_is_any_nan(a) ? INT64_MIN : tmp;
+}
+
+static float64 vclgd64(float64 a, float_status *s)
+{
+    const float64 tmp = float64_to_uint64(a, s);
+
+    return float64_is_any_nan(a) ? 0 : tmp;
+}
+
+#define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
+                               uint32_t desc)                                  \
+{                                                                              \
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool XxC = extract32(simd_data(desc), 2, 1);                         \
+                                                                               \
+    vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
+}
+
+#define DEF_GVEC_VOP2_64(NAME)                                                 \
+DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
+
+#define DEF_GVEC_VOP2(NAME, OP)                                                \
+DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
+DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
+DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
+
+DEF_GVEC_VOP2_64(vcdg)
+DEF_GVEC_VOP2_64(vcdlg)
+DEF_GVEC_VOP2_64(vcgd)
+DEF_GVEC_VOP2_64(vclgd)
+DEF_GVEC_VOP2(vfi, round_to_int)
+DEF_GVEC_VOP2(vfsq, sqrt)
+
+typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
+static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop32_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+
+        s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
+static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop64_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+
+        s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
+static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                     CPUS390XState *env, bool s, vop128_3_fn fn,
+                     uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+
+    s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+#define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                              CPUS390XState *env, uint32_t desc)               \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+                                                                               \
+    vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
+}
+
+#define DEF_GVEC_VOP3(NAME, OP)                                                \
+DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 128)
+
+DEF_GVEC_VOP3(vfa, add)
+DEF_GVEC_VOP3(vfs, sub)
+DEF_GVEC_VOP3(vfd, div)
+DEF_GVEC_VOP3(vfm, mul)
+
+static int wfc32(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float32 a = s390_vec_read_float32(v1, 0);
+    const float32 b = s390_vec_read_float32(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float32_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float32_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+static int wfc64(const S390Vector *v1, const S390Vector *v2,
+                 CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float64 a = s390_vec_read_float64(v1, 0);
+    const float64 b = s390_vec_read_float64(v2, 0);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float64_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float64_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+static int wfc128(const S390Vector *v1, const S390Vector *v2,
+                  CPUS390XState *env, bool signal, uintptr_t retaddr)
+{
+    /* only the zero-indexed elements are compared */
+    const float128 a = s390_vec_read_float128(v1);
+    const float128 b = s390_vec_read_float128(v2);
+    uint8_t vxc, vec_exc = 0;
+    int cmp;
+
+    if (signal) {
+        cmp = float128_compare(a, b, &env->fpu_status);
+    } else {
+        cmp = float128_compare_quiet(a, b, &env->fpu_status);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+
+    return float_comp_to_cc(env, cmp);
+}
+
+#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
+void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
+}
+
+#define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
+     DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
+
+DEF_GVEC_WFC(wfc, false)
+DEF_GVEC_WFC(wfk, true)
+
+typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
+static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element32(&tmp, i, -1u);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 4 ? 0 : 1;
+    }
+    return 3;
+}
+
+typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
+static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int match = 0;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+
+        /* swap the order of the parameters, so we can use existing functions */
+        if (fn(b, a, &env->fpu_status)) {
+            match++;
+            s390_vec_write_element64(&tmp, i, -1ull);
+        }
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    if (match) {
+        return s || match == 2 ? 0 : 1;
+    }
+    return 3;
+}
+
+typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
+static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    bool match = false;
+
+    /* swap the order of the parameters, so we can use existing functions */
+    if (fn(b, a, &env->fpu_status)) {
+        match = true;
+        s390_vec_write_element64(&tmp, 0, -1ull);
+        s390_vec_write_element64(&tmp, 1, -1ull);
+    }
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+    return match ? 0 : 3;
+}
+
+#define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool sq = extract32(simd_data(desc), 2, 1);                          \
+    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
+                                                                               \
+    vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
+}                                                                              \
+                                                                               \
+void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
+                                    CPUS390XState *env, uint32_t desc)         \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    const bool sq = extract32(simd_data(desc), 2, 1);                          \
+    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
+                                                                               \
+    env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
+}
+
+#define DEF_GVEC_VFC(NAME, OP)                                                 \
+DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
+DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
+DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
+
+DEF_GVEC_VFC(vfce, eq)
+DEF_GVEC_VFC(vfch, lt)
+DEF_GVEC_VFC(vfche, le)
+
+void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const bool s = extract32(simd_data(desc), 3, 1);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        /* load from even element */
+        const float32 a = s390_vec_read_element32(v2, i * 2);
+        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
+
+        s390_vec_write_element64(&tmp, i, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    /* load from even element */
+    const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
+                                             &env->fpu_status);
+    uint8_t vxc, vec_exc = 0;
+
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    s390_vec_write_float128(v1, ret);
+}
+
+void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
+                         uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool s = extract32(simd_data(desc), 3, 1);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i, old_mode;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_element64(v2, i);
+        uint32_t ret = float64_to_float32(a, &env->fpu_status);
+
+        /* place at even element */
+        s390_vec_write_element32(&tmp, i * 2, ret);
+        /* indicate the source element */
+        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+    *(S390Vector *)v1 = tmp;
+}
+
+void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint8_t erm = extract32(simd_data(desc), 4, 4);
+    const bool XxC = extract32(simd_data(desc), 2, 1);
+    uint8_t vxc, vec_exc = 0;
+    int old_mode;
+    float64 ret;
+
+    old_mode = s390_swap_bfp_rounding_mode(env, erm);
+    ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
+    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
+    s390_restore_bfp_rounding_mode(env, old_mode);
+    handle_ieee_exc(env, vxc, vec_exc, GETPC());
+
+    /* place at even element, odd element is unpredictable */
+    s390_vec_write_float64(v1, 0, ret);
+}
+
+static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+        const float32 c = s390_vec_read_float32(v4, i);
+        float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_float32(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                   uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+        const float64 b = s390_vec_read_float64(v3, i);
+        const float64 c = s390_vec_read_float64(v4, i);
+        const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
+
+        s390_vec_write_float64(&tmp, i, ret);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
+                    uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    const float128 c = s390_vec_read_float128(v4);
+    uint8_t vxc, vec_exc = 0;
+    float128 ret;
+
+    ret = float128_muladd(a, b, c, flags, &env->fpu_status);
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    s390_vec_write_float128(v1, ret);
+}
+
+#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               const void *v4, CPUS390XState *env,             \
+                               uint32_t desc)                                  \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+                                                                               \
+    vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
+}
+
+#define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
+    DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
+
+DEF_GVEC_VFMA(vfma, 0)
+DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
+DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
+DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
+
+void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    uint16_t i3 = extract32(simd_data(desc), 4, 12);
+    bool s = extract32(simd_data(desc), 3, 1);
+    int i, match = 0;
+
+    for (i = 0; i < 4; i++) {
+        float32 a = s390_vec_read_float32(v2, i);
+
+        if (float32_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element32(v1, i, -1u);
+        } else {
+            s390_vec_write_element32(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match == 4 || (s && match)) {
+        env->cc_op = 0;
+    } else if (match) {
+        env->cc_op = 1;
+    } else {
+        env->cc_op = 3;
+    }
+}
+
+void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
+                          uint32_t desc)
+{
+    const uint16_t i3 = extract32(simd_data(desc), 4, 12);
+    const bool s = extract32(simd_data(desc), 3, 1);
+    int i, match = 0;
+
+    for (i = 0; i < 2; i++) {
+        const float64 a = s390_vec_read_float64(v2, i);
+
+        if (float64_dcmask(env, a) & i3) {
+            match++;
+            s390_vec_write_element64(v1, i, -1ull);
+        } else {
+            s390_vec_write_element64(v1, i, 0);
+        }
+        if (s) {
+            break;
+        }
+    }
+
+    if (match == 2 || (s && match)) {
+        env->cc_op = 0;
+    } else if (match) {
+        env->cc_op = 1;
+    } else {
+        env->cc_op = 3;
+    }
+}
+
+void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
+                           uint32_t desc)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    uint16_t i3 = extract32(simd_data(desc), 4, 12);
+
+    if (float128_dcmask(env, a) & i3) {
+        env->cc_op = 0;
+        s390_vec_write_element64(v1, 0, -1ull);
+        s390_vec_write_element64(v1, 1, -1ull);
+    } else {
+        env->cc_op = 3;
+        s390_vec_write_element64(v1, 0, 0);
+        s390_vec_write_element64(v1, 1, 0);
+    }
+}
+
+typedef enum S390MinMaxType {
+    S390_MINMAX_TYPE_IEEE = 0,
+    S390_MINMAX_TYPE_JAVA,
+    S390_MINMAX_TYPE_C_MACRO,
+    S390_MINMAX_TYPE_CPP,
+    S390_MINMAX_TYPE_F,
+} S390MinMaxType;
+
+typedef enum S390MinMaxRes {
+    S390_MINMAX_RES_MINMAX = 0,
+    S390_MINMAX_RES_A,
+    S390_MINMAX_RES_B,
+    S390_MINMAX_RES_SILENCE_A,
+    S390_MINMAX_RES_SILENCE_B,
+} S390MinMaxRes;
+
+static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                               S390MinMaxType type, float_status *s)
+{
+    const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
+    const bool nan_a = dcmask_a & DCMASK_NAN;
+    const bool nan_b = dcmask_b & DCMASK_NAN;
+
+    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
+
+    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
+        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
+        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
+
+        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
+            s->float_exception_flags |= float_flag_invalid;
+        }
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            if (sig_a) {
+                return S390_MINMAX_RES_SILENCE_A;
+            } else if (sig_b) {
+                return S390_MINMAX_RES_SILENCE_B;
+            }
+            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
+        case S390_MINMAX_TYPE_CPP:
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    return S390_MINMAX_RES_MINMAX;
+}
+
+static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                               S390MinMaxType type, float_status *s)
+{
+    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
+
+    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
+        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
+        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
+        const bool nan_a = dcmask_a & DCMASK_NAN;
+        const bool nan_b = dcmask_b & DCMASK_NAN;
+
+        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
+            s->float_exception_flags |= float_flag_invalid;
+        }
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+            if (sig_a) {
+                return S390_MINMAX_RES_SILENCE_A;
+            } else if (sig_b) {
+                return S390_MINMAX_RES_SILENCE_B;
+            }
+            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_F:
+            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_C_MACRO:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            s->float_exception_flags |= float_flag_invalid;
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
+        const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
+
+        switch (type) {
+        case S390_MINMAX_TYPE_JAVA:
+        case S390_MINMAX_TYPE_F:
+            return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
+        case S390_MINMAX_TYPE_C_MACRO:
+            return S390_MINMAX_RES_B;
+        case S390_MINMAX_TYPE_CPP:
+            return S390_MINMAX_RES_A;
+        default:
+            g_assert_not_reached();
+        }
+    }
+    return S390_MINMAX_RES_MINMAX;
+}
+
+static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
+                                  S390MinMaxType type, bool is_min,
+                                  float_status *s)
+{
+    return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
+                    vfmax_res(dcmask_a, dcmask_b, type, s);
+}
+
+static void vfminmax32(S390Vector *v1, const S390Vector *v2,
+                       const S390Vector *v3, CPUS390XState *env,
+                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                       uintptr_t retaddr)
+{
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        float32 a = s390_vec_read_float32(v2, i);
+        float32 b = s390_vec_read_float32(v3, i);
+        float32 result;
+
+        if (type != S390_MINMAX_TYPE_IEEE) {
+            S390MinMaxRes res;
+
+            if (is_abs) {
+                a = float32_abs(a);
+                b = float32_abs(b);
+            }
+
+            res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
+                               type, is_min, s);
+            switch (res) {
+            case S390_MINMAX_RES_MINMAX:
+                result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
+                break;
+            case S390_MINMAX_RES_A:
+                result = a;
+                break;
+            case S390_MINMAX_RES_B:
+                result = b;
+                break;
+            case S390_MINMAX_RES_SILENCE_A:
+                result = float32_silence_nan(a, s);
+                break;
+            case S390_MINMAX_RES_SILENCE_B:
+                result = float32_silence_nan(b, s);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        } else if (!is_abs) {
+            result = is_min ? float32_minnum(a, b, &env->fpu_status) :
+                              float32_maxnum(a, b, &env->fpu_status);
+        } else {
+            result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
+                              float32_maxnummag(a, b, &env->fpu_status);
+        }
+
+        s390_vec_write_float32(&tmp, i, result);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (se || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfminmax64(S390Vector *v1, const S390Vector *v2,
+                       const S390Vector *v3, CPUS390XState *env,
+                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                       uintptr_t retaddr)
+{
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        float64 a = s390_vec_read_float64(v2, i);
+        float64 b = s390_vec_read_float64(v3, i);
+        float64 result;
+
+        if (type != S390_MINMAX_TYPE_IEEE) {
+            S390MinMaxRes res;
+
+            if (is_abs) {
+                a = float64_abs(a);
+                b = float64_abs(b);
+            }
+
+            res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
+                               type, is_min, s);
+            switch (res) {
+            case S390_MINMAX_RES_MINMAX:
+                result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
+                break;
+            case S390_MINMAX_RES_A:
+                result = a;
+                break;
+            case S390_MINMAX_RES_B:
+                result = b;
+                break;
+            case S390_MINMAX_RES_SILENCE_A:
+                result = float64_silence_nan(a, s);
+                break;
+            case S390_MINMAX_RES_SILENCE_B:
+                result = float64_silence_nan(b, s);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+        } else if (!is_abs) {
+            result = is_min ? float64_minnum(a, b, &env->fpu_status) :
+                              float64_maxnum(a, b, &env->fpu_status);
+        } else {
+            result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
+                              float64_maxnummag(a, b, &env->fpu_status);
+        }
+
+        s390_vec_write_float64(&tmp, i, result);
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (se || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+static void vfminmax128(S390Vector *v1, const S390Vector *v2,
+                        const S390Vector *v3, CPUS390XState *env,
+                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
+                        uintptr_t retaddr)
+{
+    float128 a = s390_vec_read_float128(v2);
+    float128 b = s390_vec_read_float128(v3);
+    float_status *s = &env->fpu_status;
+    uint8_t vxc, vec_exc = 0;
+    float128 result;
+
+    if (type != S390_MINMAX_TYPE_IEEE) {
+        S390MinMaxRes res;
+
+        if (is_abs) {
+            a = float128_abs(a);
+            b = float128_abs(b);
+        }
+
+        res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
+                           type, is_min, s);
+        switch (res) {
+        case S390_MINMAX_RES_MINMAX:
+            result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
+            break;
+        case S390_MINMAX_RES_A:
+            result = a;
+            break;
+        case S390_MINMAX_RES_B:
+            result = b;
+            break;
+        case S390_MINMAX_RES_SILENCE_A:
+            result = float128_silence_nan(a, s);
+            break;
+        case S390_MINMAX_RES_SILENCE_B:
+            result = float128_silence_nan(b, s);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else if (!is_abs) {
+        result = is_min ? float128_minnum(a, b, &env->fpu_status) :
+                          float128_maxnum(a, b, &env->fpu_status);
+    } else {
+        result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
+                          float128_maxnummag(a, b, &env->fpu_status);
+    }
+
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    s390_vec_write_float128(v1, result);
+}
+
+#define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                               CPUS390XState *env, uint32_t desc)              \
+{                                                                              \
+    const bool se = extract32(simd_data(desc), 3, 1);                          \
+    uint8_t type = extract32(simd_data(desc), 4, 4);                           \
+    bool is_abs = false;                                                       \
+                                                                               \
+    if (type >= 8) {                                                           \
+        is_abs = true;                                                         \
+        type -= 8;                                                             \
+    }                                                                          \
+                                                                               \
+    vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
+}
+
+#define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
+    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
+
+DEF_GVEC_VFMINMAX(vfmax, false)
+DEF_GVEC_VFMINMAX(vfmin, true)
diff --git a/target/s390x/vec_helper.c b/target/s390x/tcg/vec_helper.c
similarity index 94%
rename from target/s390x/vec_helper.c
rename to target/s390x/tcg/vec_helper.c
index 986e7cc8255..ededf13cf09 100644
--- a/target/s390x/vec_helper.c
+++ b/target/s390x/tcg/vec_helper.c
@@ -11,7 +11,7 @@
  */
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "vec.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-gvec-desc.h"
@@ -19,6 +19,28 @@
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 
+void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
+                         uint32_t desc)
+{
+    S390Vector tmp = {};
+    uint16_t result = 0;
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        const uint8_t bit_nr = s390_vec_read_element8(v3, i);
+        uint16_t bit;
+
+        if (bit_nr >= 128) {
+            continue;
+        }
+        bit = (s390_vec_read_element8(v2, bit_nr / 8)
+               >> (7 - (bit_nr % 8))) & 1;
+        result |= (bit << (15 - i));
+    }
+    s390_vec_write_element16(&tmp, 3, result);
+    *(S390Vector *)v1 = tmp;
+}
+
 void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
 {
     if (likely(bytes >= 16)) {
diff --git a/target/s390x/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c
similarity index 100%
rename from target/s390x/vec_int_helper.c
rename to target/s390x/tcg/vec_int_helper.c
diff --git a/target/s390x/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c
similarity index 99%
rename from target/s390x/vec_string_helper.c
rename to target/s390x/tcg/vec_string_helper.c
index c516c0ceeb4..ac315eb095c 100644
--- a/target/s390x/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -12,7 +12,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "cpu.h"
-#include "internal.h"
+#include "s390x-internal.h"
 #include "vec.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-gvec-desc.h"
diff --git a/target/s390x/trace-events b/target/s390x/trace-events
index fda1ee82202..729cb012b41 100644
--- a/target/s390x/trace-events
+++ b/target/s390x/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # mmu_helper.c
 get_skeys_nonzero(int rc) "SKEY: Call to get_skeys unexpectedly returned %d"
@@ -10,13 +10,7 @@ ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.
 ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)"
 ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command 0x%04x, len 0x%04x"
 
-# kvm.c
-kvm_enable_cmma(int rc) "CMMA: enabling with result code %d"
-kvm_clear_cmma(int rc) "CMMA: clearing with result code %d"
-kvm_failed_cpu_state_set(int cpu_index, uint8_t state, const char *msg) "Warning: Unable to set cpu %d state %" PRIu8 " to KVM: %s"
-kvm_assign_subch_ioeventfd(int fd, uint32_t addr, bool assign, int datamatch) "fd: %d sch: @0x%x assign: %d vq: %d"
-
-# cpu.c
+# cpu-sysemu.c
 cpu_set_state(int cpu_index, uint8_t state) "setting cpu %d state to %" PRIu8
 cpu_halt(int cpu_index) "halting cpu %d"
 cpu_unhalt(int cpu_index) "unhalting cpu %d"
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
deleted file mode 100644
index 4f953ddfbaf..00000000000
--- a/target/s390x/translate.c
+++ /dev/null
@@ -1,6692 +0,0 @@
-/*
- *  S/390 translation
- *
- *  Copyright (c) 2009 Ulrich Hecht
- *  Copyright (c) 2010 Alexander Graf
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/* #define DEBUG_INLINE_BRANCHES */
-#define S390X_DEBUG_DISAS
-/* #define S390X_DEBUG_DISAS_VERBOSE */
-
-#ifdef S390X_DEBUG_DISAS_VERBOSE
-#  define LOG_DISAS(...) qemu_log(__VA_ARGS__)
-#else
-#  define LOG_DISAS(...) do { } while (0)
-#endif
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internal.h"
-#include "disas/disas.h"
-#include "exec/exec-all.h"
-#include "tcg/tcg-op.h"
-#include "tcg/tcg-op-gvec.h"
-#include "qemu/log.h"
-#include "qemu/host-utils.h"
-#include "exec/cpu_ldst.h"
-#include "exec/gen-icount.h"
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-
-#include "trace-tcg.h"
-#include "exec/translator.h"
-#include "exec/log.h"
-#include "qemu/atomic128.h"
-
-
-/* Information that (most) every instruction needs to manipulate.  */
-typedef struct DisasContext DisasContext;
-typedef struct DisasInsn DisasInsn;
-typedef struct DisasFields DisasFields;
-
-/*
- * Define a structure to hold the decoded fields.  We'll store each inside
- * an array indexed by an enum.  In order to conserve memory, we'll arrange
- * for fields that do not exist at the same time to overlap, thus the "C"
- * for compact.  For checking purposes there is an "O" for original index
- * as well that will be applied to availability bitmaps.
- */
-
-enum DisasFieldIndexO {
-    FLD_O_r1,
-    FLD_O_r2,
-    FLD_O_r3,
-    FLD_O_m1,
-    FLD_O_m3,
-    FLD_O_m4,
-    FLD_O_m5,
-    FLD_O_m6,
-    FLD_O_b1,
-    FLD_O_b2,
-    FLD_O_b4,
-    FLD_O_d1,
-    FLD_O_d2,
-    FLD_O_d4,
-    FLD_O_x2,
-    FLD_O_l1,
-    FLD_O_l2,
-    FLD_O_i1,
-    FLD_O_i2,
-    FLD_O_i3,
-    FLD_O_i4,
-    FLD_O_i5,
-    FLD_O_v1,
-    FLD_O_v2,
-    FLD_O_v3,
-    FLD_O_v4,
-};
-
-enum DisasFieldIndexC {
-    FLD_C_r1 = 0,
-    FLD_C_m1 = 0,
-    FLD_C_b1 = 0,
-    FLD_C_i1 = 0,
-    FLD_C_v1 = 0,
-
-    FLD_C_r2 = 1,
-    FLD_C_b2 = 1,
-    FLD_C_i2 = 1,
-
-    FLD_C_r3 = 2,
-    FLD_C_m3 = 2,
-    FLD_C_i3 = 2,
-    FLD_C_v3 = 2,
-
-    FLD_C_m4 = 3,
-    FLD_C_b4 = 3,
-    FLD_C_i4 = 3,
-    FLD_C_l1 = 3,
-    FLD_C_v4 = 3,
-
-    FLD_C_i5 = 4,
-    FLD_C_d1 = 4,
-    FLD_C_m5 = 4,
-
-    FLD_C_d2 = 5,
-    FLD_C_m6 = 5,
-
-    FLD_C_d4 = 6,
-    FLD_C_x2 = 6,
-    FLD_C_l2 = 6,
-    FLD_C_v2 = 6,
-
-    NUM_C_FIELD = 7
-};
-
-struct DisasFields {
-    uint64_t raw_insn;
-    unsigned op:8;
-    unsigned op2:8;
-    unsigned presentC:16;
-    unsigned int presentO;
-    int c[NUM_C_FIELD];
-};
-
-struct DisasContext {
-    DisasContextBase base;
-    const DisasInsn *insn;
-    DisasFields fields;
-    uint64_t ex_value;
-    /*
-     * During translate_one(), pc_tmp is used to determine the instruction
-     * to be executed after base.pc_next - e.g. next sequential instruction
-     * or a branch target.
-     */
-    uint64_t pc_tmp;
-    uint32_t ilen;
-    enum cc_op cc_op;
-    bool do_debug;
-};
-
-/* Information carried about a condition to be evaluated.  */
-typedef struct {
-    TCGCond cond:8;
-    bool is_64;
-    bool g1;
-    bool g2;
-    union {
-        struct { TCGv_i64 a, b; } s64;
-        struct { TCGv_i32 a, b; } s32;
-    } u;
-} DisasCompare;
-
-#ifdef DEBUG_INLINE_BRANCHES
-static uint64_t inline_branch_hit[CC_OP_MAX];
-static uint64_t inline_branch_miss[CC_OP_MAX];
-#endif
-
-static void pc_to_link_info(TCGv_i64 out, DisasContext *s, uint64_t pc)
-{
-    TCGv_i64 tmp;
-
-    if (s->base.tb->flags & FLAG_MASK_32) {
-        if (s->base.tb->flags & FLAG_MASK_64) {
-            tcg_gen_movi_i64(out, pc);
-            return;
-        }
-        pc |= 0x80000000;
-    }
-    assert(!(s->base.tb->flags & FLAG_MASK_64));
-    tmp = tcg_const_i64(pc);
-    tcg_gen_deposit_i64(out, out, tmp, 0, 32);
-    tcg_temp_free_i64(tmp);
-}
-
-static TCGv_i64 psw_addr;
-static TCGv_i64 psw_mask;
-static TCGv_i64 gbea;
-
-static TCGv_i32 cc_op;
-static TCGv_i64 cc_src;
-static TCGv_i64 cc_dst;
-static TCGv_i64 cc_vr;
-
-static char cpu_reg_names[16][4];
-static TCGv_i64 regs[16];
-
-void s390x_translate_init(void)
-{
-    int i;
-
-    psw_addr = tcg_global_mem_new_i64(cpu_env,
-                                      offsetof(CPUS390XState, psw.addr),
-                                      "psw_addr");
-    psw_mask = tcg_global_mem_new_i64(cpu_env,
-                                      offsetof(CPUS390XState, psw.mask),
-                                      "psw_mask");
-    gbea = tcg_global_mem_new_i64(cpu_env,
-                                  offsetof(CPUS390XState, gbea),
-                                  "gbea");
-
-    cc_op = tcg_global_mem_new_i32(cpu_env, offsetof(CPUS390XState, cc_op),
-                                   "cc_op");
-    cc_src = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_src),
-                                    "cc_src");
-    cc_dst = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_dst),
-                                    "cc_dst");
-    cc_vr = tcg_global_mem_new_i64(cpu_env, offsetof(CPUS390XState, cc_vr),
-                                   "cc_vr");
-
-    for (i = 0; i < 16; i++) {
-        snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
-        regs[i] = tcg_global_mem_new(cpu_env,
-                                     offsetof(CPUS390XState, regs[i]),
-                                     cpu_reg_names[i]);
-    }
-}
-
-static inline int vec_full_reg_offset(uint8_t reg)
-{
-    g_assert(reg < 32);
-    return offsetof(CPUS390XState, vregs[reg][0]);
-}
-
-static inline int vec_reg_offset(uint8_t reg, uint8_t enr, MemOp es)
-{
-    /* Convert element size (es) - e.g. MO_8 - to bytes */
-    const uint8_t bytes = 1 << es;
-    int offs = enr * bytes;
-
-    /*
-     * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte
-     * of the 16 byte vector, on both, little and big endian systems.
-     *
-     * Big Endian (target/possible host)
-     * B:  [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15]
-     * HW: [     0][     1][     2][     3] - [     4][     5][     6][     7]
-     * W:  [             0][             1] - [             2][             3]
-     * DW: [                             0] - [                             1]
-     *
-     * Little Endian (possible host)
-     * B:  [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8]
-     * HW: [     3][     2][     1][     0] - [     7][     6][     5][     4]
-     * W:  [             1][             0] - [             3][             2]
-     * DW: [                             0] - [                             1]
-     *
-     * For 16 byte elements, the two 8 byte halves will not form a host
-     * int128 if the host is little endian, since they're in the wrong order.
-     * Some operations (e.g. xor) do not care. For operations like addition,
-     * the two 8 byte elements have to be loaded separately. Let's force all
-     * 16 byte operations to handle it in a special way.
-     */
-    g_assert(es <= MO_64);
-#ifndef HOST_WORDS_BIGENDIAN
-    offs ^= (8 - bytes);
-#endif
-    return offs + vec_full_reg_offset(reg);
-}
-
-static inline int freg64_offset(uint8_t reg)
-{
-    g_assert(reg < 16);
-    return vec_reg_offset(reg, 0, MO_64);
-}
-
-static inline int freg32_offset(uint8_t reg)
-{
-    g_assert(reg < 16);
-    return vec_reg_offset(reg, 0, MO_32);
-}
-
-static TCGv_i64 load_reg(int reg)
-{
-    TCGv_i64 r = tcg_temp_new_i64();
-    tcg_gen_mov_i64(r, regs[reg]);
-    return r;
-}
-
-static TCGv_i64 load_freg(int reg)
-{
-    TCGv_i64 r = tcg_temp_new_i64();
-
-    tcg_gen_ld_i64(r, cpu_env, freg64_offset(reg));
-    return r;
-}
-
-static TCGv_i64 load_freg32_i64(int reg)
-{
-    TCGv_i64 r = tcg_temp_new_i64();
-
-    tcg_gen_ld32u_i64(r, cpu_env, freg32_offset(reg));
-    return r;
-}
-
-static void store_reg(int reg, TCGv_i64 v)
-{
-    tcg_gen_mov_i64(regs[reg], v);
-}
-
-static void store_freg(int reg, TCGv_i64 v)
-{
-    tcg_gen_st_i64(v, cpu_env, freg64_offset(reg));
-}
-
-static void store_reg32_i64(int reg, TCGv_i64 v)
-{
-    /* 32 bit register writes keep the upper half */
-    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
-}
-
-static void store_reg32h_i64(int reg, TCGv_i64 v)
-{
-    tcg_gen_deposit_i64(regs[reg], regs[reg], v, 32, 32);
-}
-
-static void store_freg32_i64(int reg, TCGv_i64 v)
-{
-    tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
-}
-
-static void return_low128(TCGv_i64 dest)
-{
-    tcg_gen_ld_i64(dest, cpu_env, offsetof(CPUS390XState, retxl));
-}
-
-static void update_psw_addr(DisasContext *s)
-{
-    /* psw.addr */
-    tcg_gen_movi_i64(psw_addr, s->base.pc_next);
-}
-
-static void per_branch(DisasContext *s, bool to_next)
-{
-#ifndef CONFIG_USER_ONLY
-    tcg_gen_movi_i64(gbea, s->base.pc_next);
-
-    if (s->base.tb->flags & FLAG_MASK_PER) {
-        TCGv_i64 next_pc = to_next ? tcg_const_i64(s->pc_tmp) : psw_addr;
-        gen_helper_per_branch(cpu_env, gbea, next_pc);
-        if (to_next) {
-            tcg_temp_free_i64(next_pc);
-        }
-    }
-#endif
-}
-
-static void per_branch_cond(DisasContext *s, TCGCond cond,
-                            TCGv_i64 arg1, TCGv_i64 arg2)
-{
-#ifndef CONFIG_USER_ONLY
-    if (s->base.tb->flags & FLAG_MASK_PER) {
-        TCGLabel *lab = gen_new_label();
-        tcg_gen_brcond_i64(tcg_invert_cond(cond), arg1, arg2, lab);
-
-        tcg_gen_movi_i64(gbea, s->base.pc_next);
-        gen_helper_per_branch(cpu_env, gbea, psw_addr);
-
-        gen_set_label(lab);
-    } else {
-        TCGv_i64 pc = tcg_const_i64(s->base.pc_next);
-        tcg_gen_movcond_i64(cond, gbea, arg1, arg2, gbea, pc);
-        tcg_temp_free_i64(pc);
-    }
-#endif
-}
-
-static void per_breaking_event(DisasContext *s)
-{
-    tcg_gen_movi_i64(gbea, s->base.pc_next);
-}
-
-static void update_cc_op(DisasContext *s)
-{
-    if (s->cc_op != CC_OP_DYNAMIC && s->cc_op != CC_OP_STATIC) {
-        tcg_gen_movi_i32(cc_op, s->cc_op);
-    }
-}
-
-static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc)
-{
-    return (uint64_t)cpu_lduw_code(env, pc);
-}
-
-static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc)
-{
-    return (uint64_t)(uint32_t)cpu_ldl_code(env, pc);
-}
-
-static int get_mem_index(DisasContext *s)
-{
-#ifdef CONFIG_USER_ONLY
-    return MMU_USER_IDX;
-#else
-    if (!(s->base.tb->flags & FLAG_MASK_DAT)) {
-        return MMU_REAL_IDX;
-    }
-
-    switch (s->base.tb->flags & FLAG_MASK_ASC) {
-    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
-        return MMU_PRIMARY_IDX;
-    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
-        return MMU_SECONDARY_IDX;
-    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
-        return MMU_HOME_IDX;
-    default:
-        tcg_abort();
-        break;
-    }
-#endif
-}
-
-static void gen_exception(int excp)
-{
-    TCGv_i32 tmp = tcg_const_i32(excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static void gen_program_exception(DisasContext *s, int code)
-{
-    TCGv_i32 tmp;
-
-    /* Remember what pgm exeption this was.  */
-    tmp = tcg_const_i32(code);
-    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_code));
-    tcg_temp_free_i32(tmp);
-
-    tmp = tcg_const_i32(s->ilen);
-    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUS390XState, int_pgm_ilen));
-    tcg_temp_free_i32(tmp);
-
-    /* update the psw */
-    update_psw_addr(s);
-
-    /* Save off cc.  */
-    update_cc_op(s);
-
-    /* Trigger exception.  */
-    gen_exception(EXCP_PGM);
-}
-
-static inline void gen_illegal_opcode(DisasContext *s)
-{
-    gen_program_exception(s, PGM_OPERATION);
-}
-
-static inline void gen_data_exception(uint8_t dxc)
-{
-    TCGv_i32 tmp = tcg_const_i32(dxc);
-    gen_helper_data_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
-}
-
-static inline void gen_trap(DisasContext *s)
-{
-    /* Set DXC to 0xff */
-    gen_data_exception(0xff);
-}
-
-static void gen_addi_and_wrap_i64(DisasContext *s, TCGv_i64 dst, TCGv_i64 src,
-                                  int64_t imm)
-{
-    tcg_gen_addi_i64(dst, src, imm);
-    if (!(s->base.tb->flags & FLAG_MASK_64)) {
-        if (s->base.tb->flags & FLAG_MASK_32) {
-            tcg_gen_andi_i64(dst, dst, 0x7fffffff);
-        } else {
-            tcg_gen_andi_i64(dst, dst, 0x00ffffff);
-        }
-    }
-}
-
-static TCGv_i64 get_address(DisasContext *s, int x2, int b2, int d2)
-{
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    /*
-     * Note that d2 is limited to 20 bits, signed.  If we crop negative
-     * displacements early we create larger immedate addends.
-     */
-    if (b2 && x2) {
-        tcg_gen_add_i64(tmp, regs[b2], regs[x2]);
-        gen_addi_and_wrap_i64(s, tmp, tmp, d2);
-    } else if (b2) {
-        gen_addi_and_wrap_i64(s, tmp, regs[b2], d2);
-    } else if (x2) {
-        gen_addi_and_wrap_i64(s, tmp, regs[x2], d2);
-    } else if (!(s->base.tb->flags & FLAG_MASK_64)) {
-        if (s->base.tb->flags & FLAG_MASK_32) {
-            tcg_gen_movi_i64(tmp, d2 & 0x7fffffff);
-        } else {
-            tcg_gen_movi_i64(tmp, d2 & 0x00ffffff);
-        }
-    } else {
-        tcg_gen_movi_i64(tmp, d2);
-    }
-
-    return tmp;
-}
-
-static inline bool live_cc_data(DisasContext *s)
-{
-    return (s->cc_op != CC_OP_DYNAMIC
-            && s->cc_op != CC_OP_STATIC
-            && s->cc_op > 3);
-}
-
-static inline void gen_op_movi_cc(DisasContext *s, uint32_t val)
-{
-    if (live_cc_data(s)) {
-        tcg_gen_discard_i64(cc_src);
-        tcg_gen_discard_i64(cc_dst);
-        tcg_gen_discard_i64(cc_vr);
-    }
-    s->cc_op = CC_OP_CONST0 + val;
-}
-
-static void gen_op_update1_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 dst)
-{
-    if (live_cc_data(s)) {
-        tcg_gen_discard_i64(cc_src);
-        tcg_gen_discard_i64(cc_vr);
-    }
-    tcg_gen_mov_i64(cc_dst, dst);
-    s->cc_op = op;
-}
-
-static void gen_op_update2_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
-                                  TCGv_i64 dst)
-{
-    if (live_cc_data(s)) {
-        tcg_gen_discard_i64(cc_vr);
-    }
-    tcg_gen_mov_i64(cc_src, src);
-    tcg_gen_mov_i64(cc_dst, dst);
-    s->cc_op = op;
-}
-
-static void gen_op_update3_cc_i64(DisasContext *s, enum cc_op op, TCGv_i64 src,
-                                  TCGv_i64 dst, TCGv_i64 vr)
-{
-    tcg_gen_mov_i64(cc_src, src);
-    tcg_gen_mov_i64(cc_dst, dst);
-    tcg_gen_mov_i64(cc_vr, vr);
-    s->cc_op = op;
-}
-
-static void set_cc_nz_u64(DisasContext *s, TCGv_i64 val)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ, val);
-}
-
-static void gen_set_cc_nz_f32(DisasContext *s, TCGv_i64 val)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ_F32, val);
-}
-
-static void gen_set_cc_nz_f64(DisasContext *s, TCGv_i64 val)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ_F64, val);
-}
-
-static void gen_set_cc_nz_f128(DisasContext *s, TCGv_i64 vh, TCGv_i64 vl)
-{
-    gen_op_update2_cc_i64(s, CC_OP_NZ_F128, vh, vl);
-}
-
-/* CC value is in env->cc_op */
-static void set_cc_static(DisasContext *s)
-{
-    if (live_cc_data(s)) {
-        tcg_gen_discard_i64(cc_src);
-        tcg_gen_discard_i64(cc_dst);
-        tcg_gen_discard_i64(cc_vr);
-    }
-    s->cc_op = CC_OP_STATIC;
-}
-
-/* calculates cc into cc_op */
-static void gen_op_calc_cc(DisasContext *s)
-{
-    TCGv_i32 local_cc_op = NULL;
-    TCGv_i64 dummy = NULL;
-
-    switch (s->cc_op) {
-    default:
-        dummy = tcg_const_i64(0);
-        /* FALLTHRU */
-    case CC_OP_ADD_64:
-    case CC_OP_SUB_64:
-    case CC_OP_ADD_32:
-    case CC_OP_SUB_32:
-        local_cc_op = tcg_const_i32(s->cc_op);
-        break;
-    case CC_OP_CONST0:
-    case CC_OP_CONST1:
-    case CC_OP_CONST2:
-    case CC_OP_CONST3:
-    case CC_OP_STATIC:
-    case CC_OP_DYNAMIC:
-        break;
-    }
-
-    switch (s->cc_op) {
-    case CC_OP_CONST0:
-    case CC_OP_CONST1:
-    case CC_OP_CONST2:
-    case CC_OP_CONST3:
-        /* s->cc_op is the cc value */
-        tcg_gen_movi_i32(cc_op, s->cc_op - CC_OP_CONST0);
-        break;
-    case CC_OP_STATIC:
-        /* env->cc_op already is the cc value */
-        break;
-    case CC_OP_NZ:
-    case CC_OP_ABS_64:
-    case CC_OP_NABS_64:
-    case CC_OP_ABS_32:
-    case CC_OP_NABS_32:
-    case CC_OP_LTGT0_32:
-    case CC_OP_LTGT0_64:
-    case CC_OP_COMP_32:
-    case CC_OP_COMP_64:
-    case CC_OP_NZ_F32:
-    case CC_OP_NZ_F64:
-    case CC_OP_FLOGR:
-    case CC_OP_LCBB:
-    case CC_OP_MULS_32:
-        /* 1 argument */
-        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
-        break;
-    case CC_OP_ADDU:
-    case CC_OP_ICM:
-    case CC_OP_LTGT_32:
-    case CC_OP_LTGT_64:
-    case CC_OP_LTUGTU_32:
-    case CC_OP_LTUGTU_64:
-    case CC_OP_TM_32:
-    case CC_OP_TM_64:
-    case CC_OP_SLA_32:
-    case CC_OP_SLA_64:
-    case CC_OP_SUBU:
-    case CC_OP_NZ_F128:
-    case CC_OP_VC:
-    case CC_OP_MULS_64:
-        /* 2 arguments */
-        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
-        break;
-    case CC_OP_ADD_64:
-    case CC_OP_SUB_64:
-    case CC_OP_ADD_32:
-    case CC_OP_SUB_32:
-        /* 3 arguments */
-        gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
-        break;
-    case CC_OP_DYNAMIC:
-        /* unknown operation - assume 3 arguments and cc_op in env */
-        gen_helper_calc_cc(cc_op, cpu_env, cc_op, cc_src, cc_dst, cc_vr);
-        break;
-    default:
-        tcg_abort();
-    }
-
-    if (local_cc_op) {
-        tcg_temp_free_i32(local_cc_op);
-    }
-    if (dummy) {
-        tcg_temp_free_i64(dummy);
-    }
-
-    /* We now have cc in cc_op as constant */
-    set_cc_static(s);
-}
-
-static bool use_exit_tb(DisasContext *s)
-{
-    return s->base.singlestep_enabled ||
-            (tb_cflags(s->base.tb) & CF_LAST_IO) ||
-            (s->base.tb->flags & FLAG_MASK_PER);
-}
-
-static bool use_goto_tb(DisasContext *s, uint64_t dest)
-{
-    if (unlikely(use_exit_tb(s))) {
-        return false;
-    }
-#ifndef CONFIG_USER_ONLY
-    return (dest & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
-           (dest & TARGET_PAGE_MASK) == (s->base.pc_next & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static void account_noninline_branch(DisasContext *s, int cc_op)
-{
-#ifdef DEBUG_INLINE_BRANCHES
-    inline_branch_miss[cc_op]++;
-#endif
-}
-
-static void account_inline_branch(DisasContext *s, int cc_op)
-{
-#ifdef DEBUG_INLINE_BRANCHES
-    inline_branch_hit[cc_op]++;
-#endif
-}
-
-/* Table of mask values to comparison codes, given a comparison as input.
-   For such, CC=3 should not be possible.  */
-static const TCGCond ltgt_cond[16] = {
-    TCG_COND_NEVER,  TCG_COND_NEVER,     /*    |    |    | x */
-    TCG_COND_GT,     TCG_COND_GT,        /*    |    | GT | x */
-    TCG_COND_LT,     TCG_COND_LT,        /*    | LT |    | x */
-    TCG_COND_NE,     TCG_COND_NE,        /*    | LT | GT | x */
-    TCG_COND_EQ,     TCG_COND_EQ,        /* EQ |    |    | x */
-    TCG_COND_GE,     TCG_COND_GE,        /* EQ |    | GT | x */
-    TCG_COND_LE,     TCG_COND_LE,        /* EQ | LT |    | x */
-    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | LT | GT | x */
-};
-
-/* Table of mask values to comparison codes, given a logic op as input.
-   For such, only CC=0 and CC=1 should be possible.  */
-static const TCGCond nz_cond[16] = {
-    TCG_COND_NEVER, TCG_COND_NEVER,      /*    |    | x | x */
-    TCG_COND_NEVER, TCG_COND_NEVER,
-    TCG_COND_NE, TCG_COND_NE,            /*    | NE | x | x */
-    TCG_COND_NE, TCG_COND_NE,
-    TCG_COND_EQ, TCG_COND_EQ,            /* EQ |    | x | x */
-    TCG_COND_EQ, TCG_COND_EQ,
-    TCG_COND_ALWAYS, TCG_COND_ALWAYS,    /* EQ | NE | x | x */
-    TCG_COND_ALWAYS, TCG_COND_ALWAYS,
-};
-
-/* Interpret MASK in terms of S->CC_OP, and fill in C with all the
-   details required to generate a TCG comparison.  */
-static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
-{
-    TCGCond cond;
-    enum cc_op old_cc_op = s->cc_op;
-
-    if (mask == 15 || mask == 0) {
-        c->cond = (mask ? TCG_COND_ALWAYS : TCG_COND_NEVER);
-        c->u.s32.a = cc_op;
-        c->u.s32.b = cc_op;
-        c->g1 = c->g2 = true;
-        c->is_64 = false;
-        return;
-    }
-
-    /* Find the TCG condition for the mask + cc op.  */
-    switch (old_cc_op) {
-    case CC_OP_LTGT0_32:
-    case CC_OP_LTGT0_64:
-    case CC_OP_LTGT_32:
-    case CC_OP_LTGT_64:
-        cond = ltgt_cond[mask];
-        if (cond == TCG_COND_NEVER) {
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_LTUGTU_32:
-    case CC_OP_LTUGTU_64:
-        cond = tcg_unsigned_cond(ltgt_cond[mask]);
-        if (cond == TCG_COND_NEVER) {
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_NZ:
-        cond = nz_cond[mask];
-        if (cond == TCG_COND_NEVER) {
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_TM_32:
-    case CC_OP_TM_64:
-        switch (mask) {
-        case 8:
-            cond = TCG_COND_EQ;
-            break;
-        case 4 | 2 | 1:
-            cond = TCG_COND_NE;
-            break;
-        default:
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_ICM:
-        switch (mask) {
-        case 8:
-            cond = TCG_COND_EQ;
-            break;
-        case 4 | 2 | 1:
-        case 4 | 2:
-            cond = TCG_COND_NE;
-            break;
-        default:
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_FLOGR:
-        switch (mask & 0xa) {
-        case 8: /* src == 0 -> no one bit found */
-            cond = TCG_COND_EQ;
-            break;
-        case 2: /* src != 0 -> one bit found */
-            cond = TCG_COND_NE;
-            break;
-        default:
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    case CC_OP_ADDU:
-    case CC_OP_SUBU:
-        switch (mask) {
-        case 8 | 2: /* result == 0 */
-            cond = TCG_COND_EQ;
-            break;
-        case 4 | 1: /* result != 0 */
-            cond = TCG_COND_NE;
-            break;
-        case 8 | 4: /* !carry (borrow) */
-            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
-            break;
-        case 2 | 1: /* carry (!borrow) */
-            cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
-            break;
-        default:
-            goto do_dynamic;
-        }
-        account_inline_branch(s, old_cc_op);
-        break;
-
-    default:
-    do_dynamic:
-        /* Calculate cc value.  */
-        gen_op_calc_cc(s);
-        /* FALLTHRU */
-
-    case CC_OP_STATIC:
-        /* Jump based on CC.  We'll load up the real cond below;
-           the assignment here merely avoids a compiler warning.  */
-        account_noninline_branch(s, old_cc_op);
-        old_cc_op = CC_OP_STATIC;
-        cond = TCG_COND_NEVER;
-        break;
-    }
-
-    /* Load up the arguments of the comparison.  */
-    c->is_64 = true;
-    c->g1 = c->g2 = false;
-    switch (old_cc_op) {
-    case CC_OP_LTGT0_32:
-        c->is_64 = false;
-        c->u.s32.a = tcg_temp_new_i32();
-        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_dst);
-        c->u.s32.b = tcg_const_i32(0);
-        break;
-    case CC_OP_LTGT_32:
-    case CC_OP_LTUGTU_32:
-        c->is_64 = false;
-        c->u.s32.a = tcg_temp_new_i32();
-        tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
-        c->u.s32.b = tcg_temp_new_i32();
-        tcg_gen_extrl_i64_i32(c->u.s32.b, cc_dst);
-        break;
-
-    case CC_OP_LTGT0_64:
-    case CC_OP_NZ:
-    case CC_OP_FLOGR:
-        c->u.s64.a = cc_dst;
-        c->u.s64.b = tcg_const_i64(0);
-        c->g1 = true;
-        break;
-    case CC_OP_LTGT_64:
-    case CC_OP_LTUGTU_64:
-        c->u.s64.a = cc_src;
-        c->u.s64.b = cc_dst;
-        c->g1 = c->g2 = true;
-        break;
-
-    case CC_OP_TM_32:
-    case CC_OP_TM_64:
-    case CC_OP_ICM:
-        c->u.s64.a = tcg_temp_new_i64();
-        c->u.s64.b = tcg_const_i64(0);
-        tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
-        break;
-
-    case CC_OP_ADDU:
-    case CC_OP_SUBU:
-        c->is_64 = true;
-        c->u.s64.b = tcg_const_i64(0);
-        c->g1 = true;
-        switch (mask) {
-        case 8 | 2:
-        case 4 | 1: /* result */
-            c->u.s64.a = cc_dst;
-            break;
-        case 8 | 4:
-        case 2 | 1: /* carry */
-            c->u.s64.a = cc_src;
-            break;
-        default:
-            g_assert_not_reached();
-        }
-        break;
-
-    case CC_OP_STATIC:
-        c->is_64 = false;
-        c->u.s32.a = cc_op;
-        c->g1 = true;
-        switch (mask) {
-        case 0x8 | 0x4 | 0x2: /* cc != 3 */
-            cond = TCG_COND_NE;
-            c->u.s32.b = tcg_const_i32(3);
-            break;
-        case 0x8 | 0x4 | 0x1: /* cc != 2 */
-            cond = TCG_COND_NE;
-            c->u.s32.b = tcg_const_i32(2);
-            break;
-        case 0x8 | 0x2 | 0x1: /* cc != 1 */
-            cond = TCG_COND_NE;
-            c->u.s32.b = tcg_const_i32(1);
-            break;
-        case 0x8 | 0x2: /* cc == 0 || cc == 2 => (cc & 1) == 0 */
-            cond = TCG_COND_EQ;
-            c->g1 = false;
-            c->u.s32.a = tcg_temp_new_i32();
-            c->u.s32.b = tcg_const_i32(0);
-            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
-            break;
-        case 0x8 | 0x4: /* cc < 2 */
-            cond = TCG_COND_LTU;
-            c->u.s32.b = tcg_const_i32(2);
-            break;
-        case 0x8: /* cc == 0 */
-            cond = TCG_COND_EQ;
-            c->u.s32.b = tcg_const_i32(0);
-            break;
-        case 0x4 | 0x2 | 0x1: /* cc != 0 */
-            cond = TCG_COND_NE;
-            c->u.s32.b = tcg_const_i32(0);
-            break;
-        case 0x4 | 0x1: /* cc == 1 || cc == 3 => (cc & 1) != 0 */
-            cond = TCG_COND_NE;
-            c->g1 = false;
-            c->u.s32.a = tcg_temp_new_i32();
-            c->u.s32.b = tcg_const_i32(0);
-            tcg_gen_andi_i32(c->u.s32.a, cc_op, 1);
-            break;
-        case 0x4: /* cc == 1 */
-            cond = TCG_COND_EQ;
-            c->u.s32.b = tcg_const_i32(1);
-            break;
-        case 0x2 | 0x1: /* cc > 1 */
-            cond = TCG_COND_GTU;
-            c->u.s32.b = tcg_const_i32(1);
-            break;
-        case 0x2: /* cc == 2 */
-            cond = TCG_COND_EQ;
-            c->u.s32.b = tcg_const_i32(2);
-            break;
-        case 0x1: /* cc == 3 */
-            cond = TCG_COND_EQ;
-            c->u.s32.b = tcg_const_i32(3);
-            break;
-        default:
-            /* CC is masked by something else: (8 >> cc) & mask.  */
-            cond = TCG_COND_NE;
-            c->g1 = false;
-            c->u.s32.a = tcg_const_i32(8);
-            c->u.s32.b = tcg_const_i32(0);
-            tcg_gen_shr_i32(c->u.s32.a, c->u.s32.a, cc_op);
-            tcg_gen_andi_i32(c->u.s32.a, c->u.s32.a, mask);
-            break;
-        }
-        break;
-
-    default:
-        abort();
-    }
-    c->cond = cond;
-}
-
-static void free_compare(DisasCompare *c)
-{
-    if (!c->g1) {
-        if (c->is_64) {
-            tcg_temp_free_i64(c->u.s64.a);
-        } else {
-            tcg_temp_free_i32(c->u.s32.a);
-        }
-    }
-    if (!c->g2) {
-        if (c->is_64) {
-            tcg_temp_free_i64(c->u.s64.b);
-        } else {
-            tcg_temp_free_i32(c->u.s32.b);
-        }
-    }
-}
-
-/* ====================================================================== */
-/* Define the insn format enumeration.  */
-#define F0(N)                         FMT_##N,
-#define F1(N, X1)                     F0(N)
-#define F2(N, X1, X2)                 F0(N)
-#define F3(N, X1, X2, X3)             F0(N)
-#define F4(N, X1, X2, X3, X4)         F0(N)
-#define F5(N, X1, X2, X3, X4, X5)     F0(N)
-#define F6(N, X1, X2, X3, X4, X5, X6) F0(N)
-
-typedef enum {
-#include "insn-format.def"
-} DisasFormat;
-
-#undef F0
-#undef F1
-#undef F2
-#undef F3
-#undef F4
-#undef F5
-#undef F6
-
-/* This is the way fields are to be accessed out of DisasFields.  */
-#define have_field(S, F)  have_field1((S), FLD_O_##F)
-#define get_field(S, F)   get_field1((S), FLD_O_##F, FLD_C_##F)
-
-static bool have_field1(const DisasContext *s, enum DisasFieldIndexO c)
-{
-    return (s->fields.presentO >> c) & 1;
-}
-
-static int get_field1(const DisasContext *s, enum DisasFieldIndexO o,
-                      enum DisasFieldIndexC c)
-{
-    assert(have_field1(s, o));
-    return s->fields.c[c];
-}
-
-/* Describe the layout of each field in each format.  */
-typedef struct DisasField {
-    unsigned int beg:8;
-    unsigned int size:8;
-    unsigned int type:2;
-    unsigned int indexC:6;
-    enum DisasFieldIndexO indexO:8;
-} DisasField;
-
-typedef struct DisasFormatInfo {
-    DisasField op[NUM_C_FIELD];
-} DisasFormatInfo;
-
-#define R(N, B)       {  B,  4, 0, FLD_C_r##N, FLD_O_r##N }
-#define M(N, B)       {  B,  4, 0, FLD_C_m##N, FLD_O_m##N }
-#define V(N, B)       {  B,  4, 3, FLD_C_v##N, FLD_O_v##N }
-#define BD(N, BB, BD) { BB,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
-                      { BD, 12, 0, FLD_C_d##N, FLD_O_d##N }
-#define BXD(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
-                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
-                      { 20, 12, 0, FLD_C_d##N, FLD_O_d##N }
-#define BDL(N)        { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
-                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
-#define BXDL(N)       { 16,  4, 0, FLD_C_b##N, FLD_O_b##N }, \
-                      { 12,  4, 0, FLD_C_x##N, FLD_O_x##N }, \
-                      { 20, 20, 2, FLD_C_d##N, FLD_O_d##N }
-#define I(N, B, S)    {  B,  S, 1, FLD_C_i##N, FLD_O_i##N }
-#define L(N, B, S)    {  B,  S, 0, FLD_C_l##N, FLD_O_l##N }
-
-#define F0(N)                     { { } },
-#define F1(N, X1)                 { { X1 } },
-#define F2(N, X1, X2)             { { X1, X2 } },
-#define F3(N, X1, X2, X3)         { { X1, X2, X3 } },
-#define F4(N, X1, X2, X3, X4)     { { X1, X2, X3, X4 } },
-#define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } },
-#define F6(N, X1, X2, X3, X4, X5, X6)       { { X1, X2, X3, X4, X5, X6 } },
-
-static const DisasFormatInfo format_info[] = {
-#include "insn-format.def"
-};
-
-#undef F0
-#undef F1
-#undef F2
-#undef F3
-#undef F4
-#undef F5
-#undef F6
-#undef R
-#undef M
-#undef V
-#undef BD
-#undef BXD
-#undef BDL
-#undef BXDL
-#undef I
-#undef L
-
-/* Generally, we'll extract operands into this structures, operate upon
-   them, and store them back.  See the "in1", "in2", "prep", "wout" sets
-   of routines below for more details.  */
-typedef struct {
-    bool g_out, g_out2, g_in1, g_in2;
-    TCGv_i64 out, out2, in1, in2;
-    TCGv_i64 addr1;
-} DisasOps;
-
-/* Instructions can place constraints on their operands, raising specification
-   exceptions if they are violated.  To make this easy to automate, each "in1",
-   "in2", "prep", "wout" helper will have a SPEC_<name> define that equals one
-   of the following, or 0.  To make this easy to document, we'll put the
-   SPEC_<name> defines next to <name>.  */
-
-#define SPEC_r1_even    1
-#define SPEC_r2_even    2
-#define SPEC_r3_even    4
-#define SPEC_r1_f128    8
-#define SPEC_r2_f128    16
-
-/* Return values from translate_one, indicating the state of the TB.  */
-
-/* We are not using a goto_tb (for whatever reason), but have updated
-   the PC (for whatever reason), so there's no need to do it again on
-   exiting the TB.  */
-#define DISAS_PC_UPDATED        DISAS_TARGET_0
-
-/* We have emitted one or more goto_tb.  No fixup required.  */
-#define DISAS_GOTO_TB           DISAS_TARGET_1
-
-/* We have updated the PC and CC values.  */
-#define DISAS_PC_CC_UPDATED     DISAS_TARGET_2
-
-/* We are exiting the TB, but have neither emitted a goto_tb, nor
-   updated the PC for the next instruction to be executed.  */
-#define DISAS_PC_STALE          DISAS_TARGET_3
-
-/* We are exiting the TB to the main loop.  */
-#define DISAS_PC_STALE_NOCHAIN  DISAS_TARGET_4
-
-
-/* Instruction flags */
-#define IF_AFP1     0x0001      /* r1 is a fp reg for HFP/FPS instructions */
-#define IF_AFP2     0x0002      /* r2 is a fp reg for HFP/FPS instructions */
-#define IF_AFP3     0x0004      /* r3 is a fp reg for HFP/FPS instructions */
-#define IF_BFP      0x0008      /* binary floating point instruction */
-#define IF_DFP      0x0010      /* decimal floating point instruction */
-#define IF_PRIV     0x0020      /* privileged instruction */
-#define IF_VEC      0x0040      /* vector instruction */
-#define IF_IO       0x0080      /* input/output instruction */
-
-struct DisasInsn {
-    unsigned opc:16;
-    unsigned flags:16;
-    DisasFormat fmt:8;
-    unsigned fac:8;
-    unsigned spec:8;
-
-    const char *name;
-
-    /* Pre-process arguments before HELP_OP.  */
-    void (*help_in1)(DisasContext *, DisasOps *);
-    void (*help_in2)(DisasContext *, DisasOps *);
-    void (*help_prep)(DisasContext *, DisasOps *);
-
-    /*
-     * Post-process output after HELP_OP.
-     * Note that these are not called if HELP_OP returns DISAS_NORETURN.
-     */
-    void (*help_wout)(DisasContext *, DisasOps *);
-    void (*help_cout)(DisasContext *, DisasOps *);
-
-    /* Implement the operation itself.  */
-    DisasJumpType (*help_op)(DisasContext *, DisasOps *);
-
-    uint64_t data;
-};
-
-/* ====================================================================== */
-/* Miscellaneous helpers, used by several operations.  */
-
-static void help_l2_shift(DisasContext *s, DisasOps *o, int mask)
-{
-    int b2 = get_field(s, b2);
-    int d2 = get_field(s, d2);
-
-    if (b2 == 0) {
-        o->in2 = tcg_const_i64(d2 & mask);
-    } else {
-        o->in2 = get_address(s, 0, b2, d2);
-        tcg_gen_andi_i64(o->in2, o->in2, mask);
-    }
-}
-
-static DisasJumpType help_goto_direct(DisasContext *s, uint64_t dest)
-{
-    if (dest == s->pc_tmp) {
-        per_branch(s, true);
-        return DISAS_NEXT;
-    }
-    if (use_goto_tb(s, dest)) {
-        update_cc_op(s);
-        per_breaking_event(s);
-        tcg_gen_goto_tb(0);
-        tcg_gen_movi_i64(psw_addr, dest);
-        tcg_gen_exit_tb(s->base.tb, 0);
-        return DISAS_GOTO_TB;
-    } else {
-        tcg_gen_movi_i64(psw_addr, dest);
-        per_branch(s, false);
-        return DISAS_PC_UPDATED;
-    }
-}
-
-static DisasJumpType help_branch(DisasContext *s, DisasCompare *c,
-                                 bool is_imm, int imm, TCGv_i64 cdest)
-{
-    DisasJumpType ret;
-    uint64_t dest = s->base.pc_next + 2 * imm;
-    TCGLabel *lab;
-
-    /* Take care of the special cases first.  */
-    if (c->cond == TCG_COND_NEVER) {
-        ret = DISAS_NEXT;
-        goto egress;
-    }
-    if (is_imm) {
-        if (dest == s->pc_tmp) {
-            /* Branch to next.  */
-            per_branch(s, true);
-            ret = DISAS_NEXT;
-            goto egress;
-        }
-        if (c->cond == TCG_COND_ALWAYS) {
-            ret = help_goto_direct(s, dest);
-            goto egress;
-        }
-    } else {
-        if (!cdest) {
-            /* E.g. bcr %r0 -> no branch.  */
-            ret = DISAS_NEXT;
-            goto egress;
-        }
-        if (c->cond == TCG_COND_ALWAYS) {
-            tcg_gen_mov_i64(psw_addr, cdest);
-            per_branch(s, false);
-            ret = DISAS_PC_UPDATED;
-            goto egress;
-        }
-    }
-
-    if (use_goto_tb(s, s->pc_tmp)) {
-        if (is_imm && use_goto_tb(s, dest)) {
-            /* Both exits can use goto_tb.  */
-            update_cc_op(s);
-
-            lab = gen_new_label();
-            if (c->is_64) {
-                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
-            } else {
-                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
-            }
-
-            /* Branch not taken.  */
-            tcg_gen_goto_tb(0);
-            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
-            tcg_gen_exit_tb(s->base.tb, 0);
-
-            /* Branch taken.  */
-            gen_set_label(lab);
-            per_breaking_event(s);
-            tcg_gen_goto_tb(1);
-            tcg_gen_movi_i64(psw_addr, dest);
-            tcg_gen_exit_tb(s->base.tb, 1);
-
-            ret = DISAS_GOTO_TB;
-        } else {
-            /* Fallthru can use goto_tb, but taken branch cannot.  */
-            /* Store taken branch destination before the brcond.  This
-               avoids having to allocate a new local temp to hold it.
-               We'll overwrite this in the not taken case anyway.  */
-            if (!is_imm) {
-                tcg_gen_mov_i64(psw_addr, cdest);
-            }
-
-            lab = gen_new_label();
-            if (c->is_64) {
-                tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab);
-            } else {
-                tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab);
-            }
-
-            /* Branch not taken.  */
-            update_cc_op(s);
-            tcg_gen_goto_tb(0);
-            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
-            tcg_gen_exit_tb(s->base.tb, 0);
-
-            gen_set_label(lab);
-            if (is_imm) {
-                tcg_gen_movi_i64(psw_addr, dest);
-            }
-            per_breaking_event(s);
-            ret = DISAS_PC_UPDATED;
-        }
-    } else {
-        /* Fallthru cannot use goto_tb.  This by itself is vanishingly rare.
-           Most commonly we're single-stepping or some other condition that
-           disables all use of goto_tb.  Just update the PC and exit.  */
-
-        TCGv_i64 next = tcg_const_i64(s->pc_tmp);
-        if (is_imm) {
-            cdest = tcg_const_i64(dest);
-        }
-
-        if (c->is_64) {
-            tcg_gen_movcond_i64(c->cond, psw_addr, c->u.s64.a, c->u.s64.b,
-                                cdest, next);
-            per_branch_cond(s, c->cond, c->u.s64.a, c->u.s64.b);
-        } else {
-            TCGv_i32 t0 = tcg_temp_new_i32();
-            TCGv_i64 t1 = tcg_temp_new_i64();
-            TCGv_i64 z = tcg_const_i64(0);
-            tcg_gen_setcond_i32(c->cond, t0, c->u.s32.a, c->u.s32.b);
-            tcg_gen_extu_i32_i64(t1, t0);
-            tcg_temp_free_i32(t0);
-            tcg_gen_movcond_i64(TCG_COND_NE, psw_addr, t1, z, cdest, next);
-            per_branch_cond(s, TCG_COND_NE, t1, z);
-            tcg_temp_free_i64(t1);
-            tcg_temp_free_i64(z);
-        }
-
-        if (is_imm) {
-            tcg_temp_free_i64(cdest);
-        }
-        tcg_temp_free_i64(next);
-
-        ret = DISAS_PC_UPDATED;
-    }
-
- egress:
-    free_compare(c);
-    return ret;
-}
-
-/* ====================================================================== */
-/* The operations.  These perform the bulk of the work for any insn,
-   usually after the operands have been loaded and output initialized.  */
-
-static DisasJumpType op_abs(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_abs_i64(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_absf32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_absf64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_absf128(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_andi_i64(o->out, o->in1, 0x7fffffffffffffffull);
-    tcg_gen_mov_i64(o->out2, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_add(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_add_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_movi_i64(cc_src, 0);
-    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
-    return DISAS_NEXT;
-}
-
-/* Compute carry into cc_src. */
-static void compute_carry(DisasContext *s)
-{
-    switch (s->cc_op) {
-    case CC_OP_ADDU:
-        /* The carry value is already in cc_src (1,0). */
-        break;
-    case CC_OP_SUBU:
-        tcg_gen_addi_i64(cc_src, cc_src, 1);
-        break;
-    default:
-        gen_op_calc_cc(s);
-        /* fall through */
-    case CC_OP_STATIC:
-        /* The carry flag is the msb of CC; compute into cc_src. */
-        tcg_gen_extu_i32_i64(cc_src, cc_op);
-        tcg_gen_shri_i64(cc_src, cc_src, 1);
-        break;
-    }
-}
-
-static DisasJumpType op_addc32(DisasContext *s, DisasOps *o)
-{
-    compute_carry(s);
-    tcg_gen_add_i64(o->out, o->in1, o->in2);
-    tcg_gen_add_i64(o->out, o->out, cc_src);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_addc64(DisasContext *s, DisasOps *o)
-{
-    compute_carry(s);
-
-    TCGv_i64 zero = tcg_const_i64(0);
-    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero);
-    tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
-    tcg_temp_free_i64(zero);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
-{
-    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
-
-    o->in1 = tcg_temp_new_i64();
-    if (non_atomic) {
-        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
-    } else {
-        /* Perform the atomic addition in memory. */
-        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
-                                     s->insn->data);
-    }
-
-    /* Recompute also for atomic case: needed for setting CC. */
-    tcg_gen_add_i64(o->out, o->in1, o->in2);
-
-    if (non_atomic) {
-        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o)
-{
-    bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
-
-    o->in1 = tcg_temp_new_i64();
-    if (non_atomic) {
-        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
-    } else {
-        /* Perform the atomic addition in memory. */
-        tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
-                                     s->insn->data);
-    }
-
-    /* Recompute also for atomic case: needed for setting CC. */
-    tcg_gen_movi_i64(cc_src, 0);
-    tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
-
-    if (non_atomic) {
-        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_aeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_aeb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_adb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_adb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_axb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_axb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_and(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_and_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_andi(DisasContext *s, DisasOps *o)
-{
-    int shift = s->insn->data & 0xff;
-    int size = s->insn->data >> 8;
-    uint64_t mask = ((1ull << size) - 1) << shift;
-
-    assert(!o->g_in2);
-    tcg_gen_shli_i64(o->in2, o->in2, shift);
-    tcg_gen_ori_i64(o->in2, o->in2, ~mask);
-    tcg_gen_and_i64(o->out, o->in1, o->in2);
-
-    /* Produce the CC from only the bits manipulated.  */
-    tcg_gen_andi_i64(cc_dst, o->out, mask);
-    set_cc_nz_u64(s, cc_dst);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ni(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
-    } else {
-        /* Perform the atomic operation in memory. */
-        tcg_gen_atomic_fetch_and_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
-                                     s->insn->data);
-    }
-
-    /* Recompute also for atomic case: needed for setting CC. */
-    tcg_gen_and_i64(o->out, o->in1, o->in2);
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_bas(DisasContext *s, DisasOps *o)
-{
-    pc_to_link_info(o->out, s, s->pc_tmp);
-    if (o->in2) {
-        tcg_gen_mov_i64(psw_addr, o->in2);
-        per_branch(s, false);
-        return DISAS_PC_UPDATED;
-    } else {
-        return DISAS_NEXT;
-    }
-}
-
-static void save_link_info(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t;
-
-    if (s->base.tb->flags & (FLAG_MASK_32 | FLAG_MASK_64)) {
-        pc_to_link_info(o->out, s, s->pc_tmp);
-        return;
-    }
-    gen_op_calc_cc(s);
-    tcg_gen_andi_i64(o->out, o->out, 0xffffffff00000000ull);
-    tcg_gen_ori_i64(o->out, o->out, ((s->ilen / 2) << 30) | s->pc_tmp);
-    t = tcg_temp_new_i64();
-    tcg_gen_shri_i64(t, psw_mask, 16);
-    tcg_gen_andi_i64(t, t, 0x0f000000);
-    tcg_gen_or_i64(o->out, o->out, t);
-    tcg_gen_extu_i32_i64(t, cc_op);
-    tcg_gen_shli_i64(t, t, 28);
-    tcg_gen_or_i64(o->out, o->out, t);
-    tcg_temp_free_i64(t);
-}
-
-static DisasJumpType op_bal(DisasContext *s, DisasOps *o)
-{
-    save_link_info(s, o);
-    if (o->in2) {
-        tcg_gen_mov_i64(psw_addr, o->in2);
-        per_branch(s, false);
-        return DISAS_PC_UPDATED;
-    } else {
-        return DISAS_NEXT;
-    }
-}
-
-static DisasJumpType op_basi(DisasContext *s, DisasOps *o)
-{
-    pc_to_link_info(o->out, s, s->pc_tmp);
-    return help_goto_direct(s, s->base.pc_next + 2 * get_field(s, i2));
-}
-
-static DisasJumpType op_bc(DisasContext *s, DisasOps *o)
-{
-    int m1 = get_field(s, m1);
-    bool is_imm = have_field(s, i2);
-    int imm = is_imm ? get_field(s, i2) : 0;
-    DisasCompare c;
-
-    /* BCR with R2 = 0 causes no branching */
-    if (have_field(s, r2) && get_field(s, r2) == 0) {
-        if (m1 == 14) {
-            /* Perform serialization */
-            /* FIXME: check for fast-BCR-serialization facility */
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-        }
-        if (m1 == 15) {
-            /* Perform serialization */
-            /* FIXME: perform checkpoint-synchronisation */
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-        }
-        return DISAS_NEXT;
-    }
-
-    disas_jcc(s, &c, m1);
-    return help_branch(s, &c, is_imm, imm, o->in2);
-}
-
-static DisasJumpType op_bct32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    bool is_imm = have_field(s, i2);
-    int imm = is_imm ? get_field(s, i2) : 0;
-    DisasCompare c;
-    TCGv_i64 t;
-
-    c.cond = TCG_COND_NE;
-    c.is_64 = false;
-    c.g1 = false;
-    c.g2 = false;
-
-    t = tcg_temp_new_i64();
-    tcg_gen_subi_i64(t, regs[r1], 1);
-    store_reg32_i64(r1, t);
-    c.u.s32.a = tcg_temp_new_i32();
-    c.u.s32.b = tcg_const_i32(0);
-    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
-    tcg_temp_free_i64(t);
-
-    return help_branch(s, &c, is_imm, imm, o->in2);
-}
-
-static DisasJumpType op_bcth(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int imm = get_field(s, i2);
-    DisasCompare c;
-    TCGv_i64 t;
-
-    c.cond = TCG_COND_NE;
-    c.is_64 = false;
-    c.g1 = false;
-    c.g2 = false;
-
-    t = tcg_temp_new_i64();
-    tcg_gen_shri_i64(t, regs[r1], 32);
-    tcg_gen_subi_i64(t, t, 1);
-    store_reg32h_i64(r1, t);
-    c.u.s32.a = tcg_temp_new_i32();
-    c.u.s32.b = tcg_const_i32(0);
-    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
-    tcg_temp_free_i64(t);
-
-    return help_branch(s, &c, 1, imm, o->in2);
-}
-
-static DisasJumpType op_bct64(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    bool is_imm = have_field(s, i2);
-    int imm = is_imm ? get_field(s, i2) : 0;
-    DisasCompare c;
-
-    c.cond = TCG_COND_NE;
-    c.is_64 = true;
-    c.g1 = true;
-    c.g2 = false;
-
-    tcg_gen_subi_i64(regs[r1], regs[r1], 1);
-    c.u.s64.a = regs[r1];
-    c.u.s64.b = tcg_const_i64(0);
-
-    return help_branch(s, &c, is_imm, imm, o->in2);
-}
-
-static DisasJumpType op_bx32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    bool is_imm = have_field(s, i2);
-    int imm = is_imm ? get_field(s, i2) : 0;
-    DisasCompare c;
-    TCGv_i64 t;
-
-    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
-    c.is_64 = false;
-    c.g1 = false;
-    c.g2 = false;
-
-    t = tcg_temp_new_i64();
-    tcg_gen_add_i64(t, regs[r1], regs[r3]);
-    c.u.s32.a = tcg_temp_new_i32();
-    c.u.s32.b = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(c.u.s32.a, t);
-    tcg_gen_extrl_i64_i32(c.u.s32.b, regs[r3 | 1]);
-    store_reg32_i64(r1, t);
-    tcg_temp_free_i64(t);
-
-    return help_branch(s, &c, is_imm, imm, o->in2);
-}
-
-static DisasJumpType op_bx64(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    bool is_imm = have_field(s, i2);
-    int imm = is_imm ? get_field(s, i2) : 0;
-    DisasCompare c;
-
-    c.cond = (s->insn->data ? TCG_COND_LE : TCG_COND_GT);
-    c.is_64 = true;
-
-    if (r1 == (r3 | 1)) {
-        c.u.s64.b = load_reg(r3 | 1);
-        c.g2 = false;
-    } else {
-        c.u.s64.b = regs[r3 | 1];
-        c.g2 = true;
-    }
-
-    tcg_gen_add_i64(regs[r1], regs[r1], regs[r3]);
-    c.u.s64.a = regs[r1];
-    c.g1 = true;
-
-    return help_branch(s, &c, is_imm, imm, o->in2);
-}
-
-static DisasJumpType op_cj(DisasContext *s, DisasOps *o)
-{
-    int imm, m3 = get_field(s, m3);
-    bool is_imm;
-    DisasCompare c;
-
-    c.cond = ltgt_cond[m3];
-    if (s->insn->data) {
-        c.cond = tcg_unsigned_cond(c.cond);
-    }
-    c.is_64 = c.g1 = c.g2 = true;
-    c.u.s64.a = o->in1;
-    c.u.s64.b = o->in2;
-
-    is_imm = have_field(s, i4);
-    if (is_imm) {
-        imm = get_field(s, i4);
-    } else {
-        imm = 0;
-        o->out = get_address(s, 0, get_field(s, b4),
-                             get_field(s, d4));
-    }
-
-    return help_branch(s, &c, is_imm, imm, o->out);
-}
-
-static DisasJumpType op_ceb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_ceb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_cdb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_cxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static TCGv_i32 fpinst_extract_m34(DisasContext *s, bool m3_with_fpe,
-                                   bool m4_with_fpe)
-{
-    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
-    uint8_t m3 = get_field(s, m3);
-    uint8_t m4 = get_field(s, m4);
-
-    /* m3 field was introduced with FPE */
-    if (!fpe && m3_with_fpe) {
-        m3 = 0;
-    }
-    /* m4 field was introduced with FPE */
-    if (!fpe && m4_with_fpe) {
-        m4 = 0;
-    }
-
-    /* Check for valid rounding modes. Mode 3 was introduced later. */
-    if (m3 == 2 || m3 > 7 || (!fpe && m3 == 3)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return NULL;
-    }
-
-    return tcg_const_i32(deposit32(m3, 4, 4, m4));
-}
-
-static DisasJumpType op_cfeb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cfeb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f32(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cfdb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cfdb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f64(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cfxb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cfxb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f128(s, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cgeb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cgeb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f32(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cgdb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cgdb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f64(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cgxb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cgxb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f128(s, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clfeb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clfeb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f32(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clfdb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clfdb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f64(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clfxb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clfxb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f128(s, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clgeb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clgeb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f32(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clgdb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clgdb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f64(s, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clgxb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_clgxb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    gen_set_cc_nz_f128(s, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cegb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cegb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cdgb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cdgb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cxgb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cxgb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_celgb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_celgb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cdlgb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cdlgb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cxlgb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, false);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_cxlgb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cksm(DisasContext *s, DisasOps *o)
-{
-    int r2 = get_field(s, r2);
-    TCGv_i64 len = tcg_temp_new_i64();
-
-    gen_helper_cksm(len, cpu_env, o->in1, o->in2, regs[r2 + 1]);
-    set_cc_static(s);
-    return_low128(o->out);
-
-    tcg_gen_add_i64(regs[r2], regs[r2], len);
-    tcg_gen_sub_i64(regs[r2 + 1], regs[r2 + 1], len);
-    tcg_temp_free_i64(len);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clc(DisasContext *s, DisasOps *o)
-{
-    int l = get_field(s, l1);
-    TCGv_i32 vl;
-
-    switch (l + 1) {
-    case 1:
-        tcg_gen_qemu_ld8u(cc_src, o->addr1, get_mem_index(s));
-        tcg_gen_qemu_ld8u(cc_dst, o->in2, get_mem_index(s));
-        break;
-    case 2:
-        tcg_gen_qemu_ld16u(cc_src, o->addr1, get_mem_index(s));
-        tcg_gen_qemu_ld16u(cc_dst, o->in2, get_mem_index(s));
-        break;
-    case 4:
-        tcg_gen_qemu_ld32u(cc_src, o->addr1, get_mem_index(s));
-        tcg_gen_qemu_ld32u(cc_dst, o->in2, get_mem_index(s));
-        break;
-    case 8:
-        tcg_gen_qemu_ld64(cc_src, o->addr1, get_mem_index(s));
-        tcg_gen_qemu_ld64(cc_dst, o->in2, get_mem_index(s));
-        break;
-    default:
-        vl = tcg_const_i32(l);
-        gen_helper_clc(cc_op, cpu_env, vl, o->addr1, o->in2);
-        tcg_temp_free_i32(vl);
-        set_cc_static(s);
-        return DISAS_NEXT;
-    }
-    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, cc_src, cc_dst);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clcl(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r2 = get_field(s, r2);
-    TCGv_i32 t1, t2;
-
-    /* r1 and r2 must be even.  */
-    if (r1 & 1 || r2 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t2 = tcg_const_i32(r2);
-    gen_helper_clcl(cc_op, cpu_env, t1, t2);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clcle(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i32 t1, t3;
-
-    /* r1 and r3 must be even.  */
-    if (r1 & 1 || r3 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t3 = tcg_const_i32(r3);
-    gen_helper_clcle(cc_op, cpu_env, t1, o->in2, t3);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t3);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clclu(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i32 t1, t3;
-
-    /* r1 and r3 must be even.  */
-    if (r1 & 1 || r3 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t3 = tcg_const_i32(r3);
-    gen_helper_clclu(cc_op, cpu_env, t1, o->in2, t3);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t3);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clm(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m3 = tcg_const_i32(get_field(s, m3));
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(t1, o->in1);
-    gen_helper_clm(cc_op, cpu_env, t1, m3, o->in2);
-    set_cc_static(s);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(m3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_clst(DisasContext *s, DisasOps *o)
-{
-    gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2);
-    set_cc_static(s);
-    return_low128(o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cps(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t = tcg_temp_new_i64();
-    tcg_gen_andi_i64(t, o->in1, 0x8000000000000000ull);
-    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffffffffffffull);
-    tcg_gen_or_i64(o->out, o->out, t);
-    tcg_temp_free_i64(t);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
-{
-    int d2 = get_field(s, d2);
-    int b2 = get_field(s, b2);
-    TCGv_i64 addr, cc;
-
-    /* Note that in1 = R3 (new value) and
-       in2 = (zero-extended) R1 (expected value).  */
-
-    addr = get_address(s, 0, b2, d2);
-    tcg_gen_atomic_cmpxchg_i64(o->out, addr, o->in2, o->in1,
-                               get_mem_index(s), s->insn->data | MO_ALIGN);
-    tcg_temp_free_i64(addr);
-
-    /* Are the memory and expected values (un)equal?  Note that this setcond
-       produces the output CC value, thus the NE sense of the test.  */
-    cc = tcg_temp_new_i64();
-    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in2, o->out);
-    tcg_gen_extrl_i64_i32(cc_op, cc);
-    tcg_temp_free_i64(cc);
-    set_cc_static(s);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    int d2 = get_field(s, d2);
-    int b2 = get_field(s, b2);
-    DisasJumpType ret = DISAS_NEXT;
-    TCGv_i64 addr;
-    TCGv_i32 t_r1, t_r3;
-
-    /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
-    addr = get_address(s, 0, b2, d2);
-    t_r1 = tcg_const_i32(r1);
-    t_r3 = tcg_const_i32(r3);
-    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
-    } else if (HAVE_CMPXCHG128) {
-        gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
-    } else {
-        gen_helper_exit_atomic(cpu_env);
-        ret = DISAS_NORETURN;
-    }
-    tcg_temp_free_i64(addr);
-    tcg_temp_free_i32(t_r1);
-    tcg_temp_free_i32(t_r3);
-
-    set_cc_static(s);
-    return ret;
-}
-
-static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
-{
-    int r3 = get_field(s, r3);
-    TCGv_i32 t_r3 = tcg_const_i32(r3);
-
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        gen_helper_csst_parallel(cc_op, cpu_env, t_r3, o->addr1, o->in2);
-    } else {
-        gen_helper_csst(cc_op, cpu_env, t_r3, o->addr1, o->in2);
-    }
-    tcg_temp_free_i32(t_r3);
-
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_csp(DisasContext *s, DisasOps *o)
-{
-    MemOp mop = s->insn->data;
-    TCGv_i64 addr, old, cc;
-    TCGLabel *lab = gen_new_label();
-
-    /* Note that in1 = R1 (zero-extended expected value),
-       out = R1 (original reg), out2 = R1+1 (new value).  */
-
-    addr = tcg_temp_new_i64();
-    old = tcg_temp_new_i64();
-    tcg_gen_andi_i64(addr, o->in2, -1ULL << (mop & MO_SIZE));
-    tcg_gen_atomic_cmpxchg_i64(old, addr, o->in1, o->out2,
-                               get_mem_index(s), mop | MO_ALIGN);
-    tcg_temp_free_i64(addr);
-
-    /* Are the memory and expected values (un)equal?  */
-    cc = tcg_temp_new_i64();
-    tcg_gen_setcond_i64(TCG_COND_NE, cc, o->in1, old);
-    tcg_gen_extrl_i64_i32(cc_op, cc);
-
-    /* Write back the output now, so that it happens before the
-       following branch, so that we don't need local temps.  */
-    if ((mop & MO_SIZE) == MO_32) {
-        tcg_gen_deposit_i64(o->out, o->out, old, 0, 32);
-    } else {
-        tcg_gen_mov_i64(o->out, old);
-    }
-    tcg_temp_free_i64(old);
-
-    /* If the comparison was equal, and the LSB of R2 was set,
-       then we need to flush the TLB (for all cpus).  */
-    tcg_gen_xori_i64(cc, cc, 1);
-    tcg_gen_and_i64(cc, cc, o->in2);
-    tcg_gen_brcondi_i64(TCG_COND_EQ, cc, 0, lab);
-    tcg_temp_free_i64(cc);
-
-    gen_helper_purge(cpu_env);
-    gen_set_label(lab);
-
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_cvd(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t1 = tcg_temp_new_i64();
-    TCGv_i32 t2 = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(t2, o->in1);
-    gen_helper_cvd(t1, t2);
-    tcg_temp_free_i32(t2);
-    tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s));
-    tcg_temp_free_i64(t1);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ct(DisasContext *s, DisasOps *o)
-{
-    int m3 = get_field(s, m3);
-    TCGLabel *lab = gen_new_label();
-    TCGCond c;
-
-    c = tcg_invert_cond(ltgt_cond[m3]);
-    if (s->insn->data) {
-        c = tcg_unsigned_cond(c);
-    }
-    tcg_gen_brcond_i64(c, o->in1, o->in2, lab);
-
-    /* Trap.  */
-    gen_trap(s);
-
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_cuXX(DisasContext *s, DisasOps *o)
-{
-    int m3 = get_field(s, m3);
-    int r1 = get_field(s, r1);
-    int r2 = get_field(s, r2);
-    TCGv_i32 tr1, tr2, chk;
-
-    /* R1 and R2 must both be even.  */
-    if ((r1 | r2) & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    if (!s390_has_feat(S390_FEAT_ETF3_ENH)) {
-        m3 = 0;
-    }
-
-    tr1 = tcg_const_i32(r1);
-    tr2 = tcg_const_i32(r2);
-    chk = tcg_const_i32(m3);
-
-    switch (s->insn->data) {
-    case 12:
-        gen_helper_cu12(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    case 14:
-        gen_helper_cu14(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    case 21:
-        gen_helper_cu21(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    case 24:
-        gen_helper_cu24(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    case 41:
-        gen_helper_cu41(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    case 42:
-        gen_helper_cu42(cc_op, cpu_env, tr1, tr2, chk);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    tcg_temp_free_i32(tr1);
-    tcg_temp_free_i32(tr2);
-    tcg_temp_free_i32(chk);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_diag(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    TCGv_i32 func_code = tcg_const_i32(get_field(s, i2));
-
-    gen_helper_diag(cpu_env, r1, r3, func_code);
-
-    tcg_temp_free_i32(func_code);
-    tcg_temp_free_i32(r3);
-    tcg_temp_free_i32(r1);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_divs32(DisasContext *s, DisasOps *o)
-{
-    gen_helper_divs32(o->out2, cpu_env, o->in1, o->in2);
-    return_low128(o->out);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_divu32(DisasContext *s, DisasOps *o)
-{
-    gen_helper_divu32(o->out2, cpu_env, o->in1, o->in2);
-    return_low128(o->out);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_divs64(DisasContext *s, DisasOps *o)
-{
-    gen_helper_divs64(o->out2, cpu_env, o->in1, o->in2);
-    return_low128(o->out);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_divu64(DisasContext *s, DisasOps *o)
-{
-    gen_helper_divu64(o->out2, cpu_env, o->out, o->out2, o->in2);
-    return_low128(o->out);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_deb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_deb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ddb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_ddb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_dxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
-{
-    int r2 = get_field(s, r2);
-    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, aregs[r2]));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ecag(DisasContext *s, DisasOps *o)
-{
-    /* No cache information provided.  */
-    tcg_gen_movi_i64(o->out, -1);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_efpc(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, fpc));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_epsw(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r2 = get_field(s, r2);
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    /* Note the "subsequently" in the PoO, which implies a defined result
-       if r1 == r2.  Thus we cannot defer these writes to an output hook.  */
-    tcg_gen_shri_i64(t, psw_mask, 32);
-    store_reg32_i64(r1, t);
-    if (r2 != 0) {
-        store_reg32_i64(r2, psw_mask);
-    }
-
-    tcg_temp_free_i64(t);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ex(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    TCGv_i32 ilen;
-    TCGv_i64 v1;
-
-    /* Nested EXECUTE is not allowed.  */
-    if (unlikely(s->ex_value)) {
-        gen_program_exception(s, PGM_EXECUTE);
-        return DISAS_NORETURN;
-    }
-
-    update_psw_addr(s);
-    update_cc_op(s);
-
-    if (r1 == 0) {
-        v1 = tcg_const_i64(0);
-    } else {
-        v1 = regs[r1];
-    }
-
-    ilen = tcg_const_i32(s->ilen);
-    gen_helper_ex(cpu_env, ilen, v1, o->in2);
-    tcg_temp_free_i32(ilen);
-
-    if (r1 == 0) {
-        tcg_temp_free_i64(v1);
-    }
-
-    return DISAS_PC_CC_UPDATED;
-}
-
-static DisasJumpType op_fieb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_fieb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_fidb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_fidb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_fixb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, false, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_fixb(o->out, cpu_env, o->in1, o->in2, m34);
-    return_low128(o->out2);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_flogr(DisasContext *s, DisasOps *o)
-{
-    /* We'll use the original input for cc computation, since we get to
-       compare that against 0, which ought to be better than comparing
-       the real output against 64.  It also lets cc_dst be a convenient
-       temporary during our computation.  */
-    gen_op_update1_cc_i64(s, CC_OP_FLOGR, o->in2);
-
-    /* R1 = IN ? CLZ(IN) : 64.  */
-    tcg_gen_clzi_i64(o->out, o->in2, 64);
-
-    /* R1+1 = IN & ~(found bit).  Note that we may attempt to shift this
-       value by 64, which is undefined.  But since the shift is 64 iff the
-       input is zero, we still get the correct result after and'ing.  */
-    tcg_gen_movi_i64(o->out2, 0x8000000000000000ull);
-    tcg_gen_shr_i64(o->out2, o->out2, o->out);
-    tcg_gen_andc_i64(o->out2, cc_dst, o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_icm(DisasContext *s, DisasOps *o)
-{
-    int m3 = get_field(s, m3);
-    int pos, len, base = s->insn->data;
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    uint64_t ccm;
-
-    switch (m3) {
-    case 0xf:
-        /* Effectively a 32-bit load.  */
-        tcg_gen_qemu_ld32u(tmp, o->in2, get_mem_index(s));
-        len = 32;
-        goto one_insert;
-
-    case 0xc:
-    case 0x6:
-    case 0x3:
-        /* Effectively a 16-bit load.  */
-        tcg_gen_qemu_ld16u(tmp, o->in2, get_mem_index(s));
-        len = 16;
-        goto one_insert;
-
-    case 0x8:
-    case 0x4:
-    case 0x2:
-    case 0x1:
-        /* Effectively an 8-bit load.  */
-        tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
-        len = 8;
-        goto one_insert;
-
-    one_insert:
-        pos = base + ctz32(m3) * 8;
-        tcg_gen_deposit_i64(o->out, o->out, tmp, pos, len);
-        ccm = ((1ull << len) - 1) << pos;
-        break;
-
-    default:
-        /* This is going to be a sequence of loads and inserts.  */
-        pos = base + 32 - 8;
-        ccm = 0;
-        while (m3) {
-            if (m3 & 0x8) {
-                tcg_gen_qemu_ld8u(tmp, o->in2, get_mem_index(s));
-                tcg_gen_addi_i64(o->in2, o->in2, 1);
-                tcg_gen_deposit_i64(o->out, o->out, tmp, pos, 8);
-                ccm |= 0xff << pos;
-            }
-            m3 = (m3 << 1) & 0xf;
-            pos -= 8;
-        }
-        break;
-    }
-
-    tcg_gen_movi_i64(tmp, ccm);
-    gen_op_update2_cc_i64(s, CC_OP_ICM, tmp, o->out);
-    tcg_temp_free_i64(tmp);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_insi(DisasContext *s, DisasOps *o)
-{
-    int shift = s->insn->data & 0xff;
-    int size = s->insn->data >> 8;
-    tcg_gen_deposit_i64(o->out, o->in1, o->in2, shift, size);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ipm(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t1, t2;
-
-    gen_op_calc_cc(s);
-    t1 = tcg_temp_new_i64();
-    tcg_gen_extract_i64(t1, psw_mask, 40, 4);
-    t2 = tcg_temp_new_i64();
-    tcg_gen_extu_i32_i64(t2, cc_op);
-    tcg_gen_deposit_i64(t1, t1, t2, 4, 60);
-    tcg_gen_deposit_i64(o->out, o->out, t1, 24, 8);
-    tcg_temp_free_i64(t1);
-    tcg_temp_free_i64(t2);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_idte(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m4;
-
-    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
-        m4 = tcg_const_i32(get_field(s, m4));
-    } else {
-        m4 = tcg_const_i32(0);
-    }
-    gen_helper_idte(cpu_env, o->in1, o->in2, m4);
-    tcg_temp_free_i32(m4);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ipte(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m4;
-
-    if (s390_has_feat(S390_FEAT_LOCAL_TLB_CLEARING)) {
-        m4 = tcg_const_i32(get_field(s, m4));
-    } else {
-        m4 = tcg_const_i32(0);
-    }
-    gen_helper_ipte(cpu_env, o->in1, o->in2, m4);
-    tcg_temp_free_i32(m4);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_iske(DisasContext *s, DisasOps *o)
-{
-    gen_helper_iske(o->out, cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_msa(DisasContext *s, DisasOps *o)
-{
-    int r1 = have_field(s, r1) ? get_field(s, r1) : 0;
-    int r2 = have_field(s, r2) ? get_field(s, r2) : 0;
-    int r3 = have_field(s, r3) ? get_field(s, r3) : 0;
-    TCGv_i32 t_r1, t_r2, t_r3, type;
-
-    switch (s->insn->data) {
-    case S390_FEAT_TYPE_KMA:
-        if (r3 == r1 || r3 == r2) {
-            gen_program_exception(s, PGM_SPECIFICATION);
-            return DISAS_NORETURN;
-        }
-        /* FALL THROUGH */
-    case S390_FEAT_TYPE_KMCTR:
-        if (r3 & 1 || !r3) {
-            gen_program_exception(s, PGM_SPECIFICATION);
-            return DISAS_NORETURN;
-        }
-        /* FALL THROUGH */
-    case S390_FEAT_TYPE_PPNO:
-    case S390_FEAT_TYPE_KMF:
-    case S390_FEAT_TYPE_KMC:
-    case S390_FEAT_TYPE_KMO:
-    case S390_FEAT_TYPE_KM:
-        if (r1 & 1 || !r1) {
-            gen_program_exception(s, PGM_SPECIFICATION);
-            return DISAS_NORETURN;
-        }
-        /* FALL THROUGH */
-    case S390_FEAT_TYPE_KMAC:
-    case S390_FEAT_TYPE_KIMD:
-    case S390_FEAT_TYPE_KLMD:
-        if (r2 & 1 || !r2) {
-            gen_program_exception(s, PGM_SPECIFICATION);
-            return DISAS_NORETURN;
-        }
-        /* FALL THROUGH */
-    case S390_FEAT_TYPE_PCKMO:
-    case S390_FEAT_TYPE_PCC:
-        break;
-    default:
-        g_assert_not_reached();
-    };
-
-    t_r1 = tcg_const_i32(r1);
-    t_r2 = tcg_const_i32(r2);
-    t_r3 = tcg_const_i32(r3);
-    type = tcg_const_i32(s->insn->data);
-    gen_helper_msa(cc_op, cpu_env, t_r1, t_r2, t_r3, type);
-    set_cc_static(s);
-    tcg_temp_free_i32(t_r1);
-    tcg_temp_free_i32(t_r2);
-    tcg_temp_free_i32(t_r3);
-    tcg_temp_free_i32(type);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_keb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_keb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_kdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_kdb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_kxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_laa(DisasContext *s, DisasOps *o)
-{
-    /* The real output is indeed the original value in memory;
-       recompute the addition for the computation of CC.  */
-    tcg_gen_atomic_fetch_add_i64(o->in2, o->in2, o->in1, get_mem_index(s),
-                                 s->insn->data | MO_ALIGN);
-    /* However, we need to recompute the addition for setting CC.  */
-    tcg_gen_add_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lan(DisasContext *s, DisasOps *o)
-{
-    /* The real output is indeed the original value in memory;
-       recompute the addition for the computation of CC.  */
-    tcg_gen_atomic_fetch_and_i64(o->in2, o->in2, o->in1, get_mem_index(s),
-                                 s->insn->data | MO_ALIGN);
-    /* However, we need to recompute the operation for setting CC.  */
-    tcg_gen_and_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lao(DisasContext *s, DisasOps *o)
-{
-    /* The real output is indeed the original value in memory;
-       recompute the addition for the computation of CC.  */
-    tcg_gen_atomic_fetch_or_i64(o->in2, o->in2, o->in1, get_mem_index(s),
-                                s->insn->data | MO_ALIGN);
-    /* However, we need to recompute the operation for setting CC.  */
-    tcg_gen_or_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lax(DisasContext *s, DisasOps *o)
-{
-    /* The real output is indeed the original value in memory;
-       recompute the addition for the computation of CC.  */
-    tcg_gen_atomic_fetch_xor_i64(o->in2, o->in2, o->in1, get_mem_index(s),
-                                 s->insn->data | MO_ALIGN);
-    /* However, we need to recompute the operation for setting CC.  */
-    tcg_gen_xor_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ldeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_ldeb(o->out, cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ledb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_ledb(o->out, cpu_env, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ldxb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_ldxb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lexb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 m34 = fpinst_extract_m34(s, true, true);
-
-    if (!m34) {
-        return DISAS_NORETURN;
-    }
-    gen_helper_lexb(o->out, cpu_env, o->in1, o->in2, m34);
-    tcg_temp_free_i32(m34);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lxdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_lxdb(o->out, cpu_env, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lxeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_lxeb(o->out, cpu_env, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lde(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_shli_i64(o->out, o->in2, 32);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_llgt(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld8s(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld8s(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld8u(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld8u(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld16s(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld16s(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld16u(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld16u(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld32s(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld32s(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld32u(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ld64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lat(DisasContext *s, DisasOps *o)
-{
-    TCGLabel *lab = gen_new_label();
-    store_reg32_i64(get_field(s, r1), o->in2);
-    /* The value is stored even in case of trap. */
-    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
-    gen_trap(s);
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lgat(DisasContext *s, DisasOps *o)
-{
-    TCGLabel *lab = gen_new_label();
-    tcg_gen_qemu_ld64(o->out, o->in2, get_mem_index(s));
-    /* The value is stored even in case of trap. */
-    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
-    gen_trap(s);
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lfhat(DisasContext *s, DisasOps *o)
-{
-    TCGLabel *lab = gen_new_label();
-    store_reg32h_i64(get_field(s, r1), o->in2);
-    /* The value is stored even in case of trap. */
-    tcg_gen_brcondi_i64(TCG_COND_NE, o->in2, 0, lab);
-    gen_trap(s);
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_llgfat(DisasContext *s, DisasOps *o)
-{
-    TCGLabel *lab = gen_new_label();
-    tcg_gen_qemu_ld32u(o->out, o->in2, get_mem_index(s));
-    /* The value is stored even in case of trap. */
-    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
-    gen_trap(s);
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_llgtat(DisasContext *s, DisasOps *o)
-{
-    TCGLabel *lab = gen_new_label();
-    tcg_gen_andi_i64(o->out, o->in2, 0x7fffffff);
-    /* The value is stored even in case of trap. */
-    tcg_gen_brcondi_i64(TCG_COND_NE, o->out, 0, lab);
-    gen_trap(s);
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_loc(DisasContext *s, DisasOps *o)
-{
-    DisasCompare c;
-
-    disas_jcc(s, &c, get_field(s, m3));
-
-    if (c.is_64) {
-        tcg_gen_movcond_i64(c.cond, o->out, c.u.s64.a, c.u.s64.b,
-                            o->in2, o->in1);
-        free_compare(&c);
-    } else {
-        TCGv_i32 t32 = tcg_temp_new_i32();
-        TCGv_i64 t, z;
-
-        tcg_gen_setcond_i32(c.cond, t32, c.u.s32.a, c.u.s32.b);
-        free_compare(&c);
-
-        t = tcg_temp_new_i64();
-        tcg_gen_extu_i32_i64(t, t32);
-        tcg_temp_free_i32(t32);
-
-        z = tcg_const_i64(0);
-        tcg_gen_movcond_i64(TCG_COND_NE, o->out, t, z, o->in2, o->in1);
-        tcg_temp_free_i64(t);
-        tcg_temp_free_i64(z);
-    }
-
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_lctl(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_lctl(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
-    return DISAS_PC_STALE_NOCHAIN;
-}
-
-static DisasJumpType op_lctlg(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_lctlg(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
-    return DISAS_PC_STALE_NOCHAIN;
-}
-
-static DisasJumpType op_lra(DisasContext *s, DisasOps *o)
-{
-    gen_helper_lra(o->out, cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lpp(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_st_i64(o->in2, cpu_env, offsetof(CPUS390XState, pp));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lpsw(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t1, t2;
-
-    per_breaking_event(s);
-
-    t1 = tcg_temp_new_i64();
-    t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
-                        MO_TEUL | MO_ALIGN_8);
-    tcg_gen_addi_i64(o->in2, o->in2, 4);
-    tcg_gen_qemu_ld32u(t2, o->in2, get_mem_index(s));
-    /* Convert the 32-bit PSW_MASK into the 64-bit PSW_MASK.  */
-    tcg_gen_shli_i64(t1, t1, 32);
-    gen_helper_load_psw(cpu_env, t1, t2);
-    tcg_temp_free_i64(t1);
-    tcg_temp_free_i64(t2);
-    return DISAS_NORETURN;
-}
-
-static DisasJumpType op_lpswe(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 t1, t2;
-
-    per_breaking_event(s);
-
-    t1 = tcg_temp_new_i64();
-    t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld_i64(t1, o->in2, get_mem_index(s),
-                        MO_TEQ | MO_ALIGN_8);
-    tcg_gen_addi_i64(o->in2, o->in2, 8);
-    tcg_gen_qemu_ld64(t2, o->in2, get_mem_index(s));
-    gen_helper_load_psw(cpu_env, t1, t2);
-    tcg_temp_free_i64(t1);
-    tcg_temp_free_i64(t2);
-    return DISAS_NORETURN;
-}
-#endif
-
-static DisasJumpType op_lam(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_lam(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lm32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i64 t1, t2;
-
-    /* Only one register to read. */
-    t1 = tcg_temp_new_i64();
-    if (unlikely(r1 == r3)) {
-        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-        store_reg32_i64(r1, t1);
-        tcg_temp_free(t1);
-        return DISAS_NEXT;
-    }
-
-    /* First load the values of the first and last registers to trigger
-       possible page faults. */
-    t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
-    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
-    store_reg32_i64(r1, t1);
-    store_reg32_i64(r3, t2);
-
-    /* Only two registers to read. */
-    if (((r1 + 1) & 15) == r3) {
-        tcg_temp_free(t2);
-        tcg_temp_free(t1);
-        return DISAS_NEXT;
-    }
-
-    /* Then load the remaining registers. Page fault can't occur. */
-    r3 = (r3 - 1) & 15;
-    tcg_gen_movi_i64(t2, 4);
-    while (r1 != r3) {
-        r1 = (r1 + 1) & 15;
-        tcg_gen_add_i64(o->in2, o->in2, t2);
-        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-        store_reg32_i64(r1, t1);
-    }
-    tcg_temp_free(t2);
-    tcg_temp_free(t1);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lmh(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i64 t1, t2;
-
-    /* Only one register to read. */
-    t1 = tcg_temp_new_i64();
-    if (unlikely(r1 == r3)) {
-        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-        store_reg32h_i64(r1, t1);
-        tcg_temp_free(t1);
-        return DISAS_NEXT;
-    }
-
-    /* First load the values of the first and last registers to trigger
-       possible page faults. */
-    t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-    tcg_gen_addi_i64(t2, o->in2, 4 * ((r3 - r1) & 15));
-    tcg_gen_qemu_ld32u(t2, t2, get_mem_index(s));
-    store_reg32h_i64(r1, t1);
-    store_reg32h_i64(r3, t2);
-
-    /* Only two registers to read. */
-    if (((r1 + 1) & 15) == r3) {
-        tcg_temp_free(t2);
-        tcg_temp_free(t1);
-        return DISAS_NEXT;
-    }
-
-    /* Then load the remaining registers. Page fault can't occur. */
-    r3 = (r3 - 1) & 15;
-    tcg_gen_movi_i64(t2, 4);
-    while (r1 != r3) {
-        r1 = (r1 + 1) & 15;
-        tcg_gen_add_i64(o->in2, o->in2, t2);
-        tcg_gen_qemu_ld32u(t1, o->in2, get_mem_index(s));
-        store_reg32h_i64(r1, t1);
-    }
-    tcg_temp_free(t2);
-    tcg_temp_free(t1);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lm64(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i64 t1, t2;
-
-    /* Only one register to read. */
-    if (unlikely(r1 == r3)) {
-        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
-        return DISAS_NEXT;
-    }
-
-    /* First load the values of the first and last registers to trigger
-       possible page faults. */
-    t1 = tcg_temp_new_i64();
-    t2 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld64(t1, o->in2, get_mem_index(s));
-    tcg_gen_addi_i64(t2, o->in2, 8 * ((r3 - r1) & 15));
-    tcg_gen_qemu_ld64(regs[r3], t2, get_mem_index(s));
-    tcg_gen_mov_i64(regs[r1], t1);
-    tcg_temp_free(t2);
-
-    /* Only two registers to read. */
-    if (((r1 + 1) & 15) == r3) {
-        tcg_temp_free(t1);
-        return DISAS_NEXT;
-    }
-
-    /* Then load the remaining registers. Page fault can't occur. */
-    r3 = (r3 - 1) & 15;
-    tcg_gen_movi_i64(t1, 8);
-    while (r1 != r3) {
-        r1 = (r1 + 1) & 15;
-        tcg_gen_add_i64(o->in2, o->in2, t1);
-        tcg_gen_qemu_ld64(regs[r1], o->in2, get_mem_index(s));
-    }
-    tcg_temp_free(t1);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lpd(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 a1, a2;
-    MemOp mop = s->insn->data;
-
-    /* In a parallel context, stop the world and single step.  */
-    if (tb_cflags(s->base.tb) & CF_PARALLEL) {
-        update_psw_addr(s);
-        update_cc_op(s);
-        gen_exception(EXCP_ATOMIC);
-        return DISAS_NORETURN;
-    }
-
-    /* In a serial context, perform the two loads ... */
-    a1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
-    a2 = get_address(s, 0, get_field(s, b2), get_field(s, d2));
-    tcg_gen_qemu_ld_i64(o->out, a1, get_mem_index(s), mop | MO_ALIGN);
-    tcg_gen_qemu_ld_i64(o->out2, a2, get_mem_index(s), mop | MO_ALIGN);
-    tcg_temp_free_i64(a1);
-    tcg_temp_free_i64(a2);
-
-    /* ... and indicate that we performed them while interlocked.  */
-    gen_op_movi_cc(s, 0);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lpq(DisasContext *s, DisasOps *o)
-{
-    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-        gen_helper_lpq(o->out, cpu_env, o->in2);
-    } else if (HAVE_ATOMIC128) {
-        gen_helper_lpq_parallel(o->out, cpu_env, o->in2);
-    } else {
-        gen_helper_exit_atomic(cpu_env);
-        return DISAS_NORETURN;
-    }
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_lura(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld_tl(o->out, o->in2, MMU_REAL_IDX, s->insn->data);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_lzrb(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_andi_i64(o->out, o->in2, -256);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_lcbb(DisasContext *s, DisasOps *o)
-{
-    const int64_t block_size = (1ull << (get_field(s, m3) + 6));
-
-    if (get_field(s, m3) > 6) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    tcg_gen_ori_i64(o->addr1, o->addr1, -block_size);
-    tcg_gen_neg_i64(o->addr1, o->addr1);
-    tcg_gen_movi_i64(o->out, 16);
-    tcg_gen_umin_i64(o->out, o->out, o->addr1);
-    gen_op_update1_cc_i64(s, CC_OP_LCBB, o->out);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mc(DisasContext *s, DisasOps *o)
-{
-#if !defined(CONFIG_USER_ONLY)
-    TCGv_i32 i2;
-#endif
-    const uint16_t monitor_class = get_field(s, i2);
-
-    if (monitor_class & 0xff00) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-#if !defined(CONFIG_USER_ONLY)
-    i2 = tcg_const_i32(monitor_class);
-    gen_helper_monitor_call(cpu_env, o->addr1, i2);
-    tcg_temp_free_i32(i2);
-#endif
-    /* Defaults to a NOP. */
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mov2(DisasContext *s, DisasOps *o)
-{
-    o->out = o->in2;
-    o->g_out = o->g_in2;
-    o->in2 = NULL;
-    o->g_in2 = false;
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mov2e(DisasContext *s, DisasOps *o)
-{
-    int b2 = get_field(s, b2);
-    TCGv ar1 = tcg_temp_new_i64();
-
-    o->out = o->in2;
-    o->g_out = o->g_in2;
-    o->in2 = NULL;
-    o->g_in2 = false;
-
-    switch (s->base.tb->flags & FLAG_MASK_ASC) {
-    case PSW_ASC_PRIMARY >> FLAG_MASK_PSW_SHIFT:
-        tcg_gen_movi_i64(ar1, 0);
-        break;
-    case PSW_ASC_ACCREG >> FLAG_MASK_PSW_SHIFT:
-        tcg_gen_movi_i64(ar1, 1);
-        break;
-    case PSW_ASC_SECONDARY >> FLAG_MASK_PSW_SHIFT:
-        if (b2) {
-            tcg_gen_ld32u_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[b2]));
-        } else {
-            tcg_gen_movi_i64(ar1, 0);
-        }
-        break;
-    case PSW_ASC_HOME >> FLAG_MASK_PSW_SHIFT:
-        tcg_gen_movi_i64(ar1, 2);
-        break;
-    }
-
-    tcg_gen_st32_i64(ar1, cpu_env, offsetof(CPUS390XState, aregs[1]));
-    tcg_temp_free_i64(ar1);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_movx(DisasContext *s, DisasOps *o)
-{
-    o->out = o->in1;
-    o->out2 = o->in2;
-    o->g_out = o->g_in1;
-    o->g_out2 = o->g_in2;
-    o->in1 = NULL;
-    o->in2 = NULL;
-    o->g_in1 = o->g_in2 = false;
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_mvc(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvcin(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_mvcin(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvcl(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r2 = get_field(s, r2);
-    TCGv_i32 t1, t2;
-
-    /* r1 and r2 must be even.  */
-    if (r1 & 1 || r2 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t2 = tcg_const_i32(r2);
-    gen_helper_mvcl(cc_op, cpu_env, t1, t2);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvcle(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i32 t1, t3;
-
-    /* r1 and r3 must be even.  */
-    if (r1 & 1 || r3 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t3 = tcg_const_i32(r3);
-    gen_helper_mvcle(cc_op, cpu_env, t1, o->in2, t3);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t3);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvclu(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i32 t1, t3;
-
-    /* r1 and r3 must be even.  */
-    if (r1 & 1 || r3 & 1) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-
-    t1 = tcg_const_i32(r1);
-    t3 = tcg_const_i32(r3);
-    gen_helper_mvclu(cc_op, cpu_env, t1, o->in2, t3);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t3);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvcos(DisasContext *s, DisasOps *o)
-{
-    int r3 = get_field(s, r3);
-    gen_helper_mvcos(cc_op, cpu_env, o->addr1, o->in2, regs[r3]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_mvcp(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, l1);
-    gen_helper_mvcp(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvcs(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, l1);
-    gen_helper_mvcs(cc_op, cpu_env, regs[r1], o->addr1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_mvn(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_mvn(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvo(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_mvo(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvpg(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_mvpg(cc_op, cpu_env, regs[0], t1, t2);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvst(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 t1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 t2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_mvst(cc_op, cpu_env, t1, t2);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mvz(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_mvz(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mul(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_mul_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mul128(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_mulu2_i64(o->out2, o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_muls128(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_muls2_i64(o->out2, o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_meeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_meeb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mdeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_mdeb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_mdb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_mxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_mxdb(o->out, cpu_env, o->out, o->out2, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_maeb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
-    gen_helper_maeb(o->out, cpu_env, o->in1, o->in2, r3);
-    tcg_temp_free_i64(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_madb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 r3 = load_freg(get_field(s, r3));
-    gen_helper_madb(o->out, cpu_env, o->in1, o->in2, r3);
-    tcg_temp_free_i64(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mseb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 r3 = load_freg32_i64(get_field(s, r3));
-    gen_helper_mseb(o->out, cpu_env, o->in1, o->in2, r3);
-    tcg_temp_free_i64(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_msdb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 r3 = load_freg(get_field(s, r3));
-    gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, r3);
-    tcg_temp_free_i64(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_nabs(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 z, n;
-    z = tcg_const_i64(0);
-    n = tcg_temp_new_i64();
-    tcg_gen_neg_i64(n, o->in2);
-    tcg_gen_movcond_i64(TCG_COND_GE, o->out, o->in2, z, n, o->in2);
-    tcg_temp_free_i64(n);
-    tcg_temp_free_i64(z);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_nabsf32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ori_i64(o->out, o->in2, 0x80000000ull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_nabsf64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ori_i64(o->out, o->in2, 0x8000000000000000ull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_nabsf128(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ori_i64(o->out, o->in1, 0x8000000000000000ull);
-    tcg_gen_mov_i64(o->out2, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_nc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_nc(cc_op, cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_neg(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_neg_i64(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_negf32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_xori_i64(o->out, o->in2, 0x80000000ull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_negf64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_xori_i64(o->out, o->in2, 0x8000000000000000ull);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_negf128(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_xori_i64(o->out, o->in1, 0x8000000000000000ull);
-    tcg_gen_mov_i64(o->out2, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_oc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_oc(cc_op, cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_or(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_or_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ori(DisasContext *s, DisasOps *o)
-{
-    int shift = s->insn->data & 0xff;
-    int size = s->insn->data >> 8;
-    uint64_t mask = ((1ull << size) - 1) << shift;
-
-    assert(!o->g_in2);
-    tcg_gen_shli_i64(o->in2, o->in2, shift);
-    tcg_gen_or_i64(o->out, o->in1, o->in2);
-
-    /* Produce the CC from only the bits manipulated.  */
-    tcg_gen_andi_i64(cc_dst, o->out, mask);
-    set_cc_nz_u64(s, cc_dst);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_oi(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
-    } else {
-        /* Perform the atomic operation in memory. */
-        tcg_gen_atomic_fetch_or_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
-                                    s->insn->data);
-    }
-
-    /* Recompute also for atomic case: needed for setting CC. */
-    tcg_gen_or_i64(o->out, o->in1, o->in2);
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pack(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_pack(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pka(DisasContext *s, DisasOps *o)
-{
-    int l2 = get_field(s, l2) + 1;
-    TCGv_i32 l;
-
-    /* The length must not exceed 32 bytes.  */
-    if (l2 > 32) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    l = tcg_const_i32(l2);
-    gen_helper_pka(cpu_env, o->addr1, o->in2, l);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pku(DisasContext *s, DisasOps *o)
-{
-    int l2 = get_field(s, l2) + 1;
-    TCGv_i32 l;
-
-    /* The length must be even and should not exceed 64 bytes.  */
-    if ((l2 & 1) || (l2 > 64)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    l = tcg_const_i32(l2);
-    gen_helper_pku(cpu_env, o->addr1, o->in2, l);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_popcnt(DisasContext *s, DisasOps *o)
-{
-    gen_helper_popcnt(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_ptlb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_ptlb(cpu_env);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_risbg(DisasContext *s, DisasOps *o)
-{
-    int i3 = get_field(s, i3);
-    int i4 = get_field(s, i4);
-    int i5 = get_field(s, i5);
-    int do_zero = i4 & 0x80;
-    uint64_t mask, imask, pmask;
-    int pos, len, rot;
-
-    /* Adjust the arguments for the specific insn.  */
-    switch (s->fields.op2) {
-    case 0x55: /* risbg */
-    case 0x59: /* risbgn */
-        i3 &= 63;
-        i4 &= 63;
-        pmask = ~0;
-        break;
-    case 0x5d: /* risbhg */
-        i3 &= 31;
-        i4 &= 31;
-        pmask = 0xffffffff00000000ull;
-        break;
-    case 0x51: /* risblg */
-        i3 = (i3 & 31) + 32;
-        i4 = (i4 & 31) + 32;
-        pmask = 0x00000000ffffffffull;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    /* MASK is the set of bits to be inserted from R2. */
-    if (i3 <= i4) {
-        /* [0...i3---i4...63] */
-        mask = (-1ull >> i3) & (-1ull << (63 - i4));
-    } else {
-        /* [0---i4...i3---63] */
-        mask = (-1ull >> i3) | (-1ull << (63 - i4));
-    }
-    /* For RISBLG/RISBHG, the wrapping is limited to the high/low doubleword. */
-    mask &= pmask;
-
-    /* IMASK is the set of bits to be kept from R1.  In the case of the high/low
-       insns, we need to keep the other half of the register.  */
-    imask = ~mask | ~pmask;
-    if (do_zero) {
-        imask = ~pmask;
-    }
-
-    len = i4 - i3 + 1;
-    pos = 63 - i4;
-    rot = i5 & 63;
-
-    /* In some cases we can implement this with extract.  */
-    if (imask == 0 && pos == 0 && len > 0 && len <= rot) {
-        tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len);
-        return DISAS_NEXT;
-    }
-
-    /* In some cases we can implement this with deposit.  */
-    if (len > 0 && (imask == 0 || ~mask == imask)) {
-        /* Note that we rotate the bits to be inserted to the lsb, not to
-           the position as described in the PoO.  */
-        rot = (rot - pos) & 63;
-    } else {
-        pos = -1;
-    }
-
-    /* Rotate the input as necessary.  */
-    tcg_gen_rotli_i64(o->in2, o->in2, rot);
-
-    /* Insert the selected bits into the output.  */
-    if (pos >= 0) {
-        if (imask == 0) {
-            tcg_gen_deposit_z_i64(o->out, o->in2, pos, len);
-        } else {
-            tcg_gen_deposit_i64(o->out, o->out, o->in2, pos, len);
-        }
-    } else if (imask == 0) {
-        tcg_gen_andi_i64(o->out, o->in2, mask);
-    } else {
-        tcg_gen_andi_i64(o->in2, o->in2, mask);
-        tcg_gen_andi_i64(o->out, o->out, imask);
-        tcg_gen_or_i64(o->out, o->out, o->in2);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rosbg(DisasContext *s, DisasOps *o)
-{
-    int i3 = get_field(s, i3);
-    int i4 = get_field(s, i4);
-    int i5 = get_field(s, i5);
-    uint64_t mask;
-
-    /* If this is a test-only form, arrange to discard the result.  */
-    if (i3 & 0x80) {
-        o->out = tcg_temp_new_i64();
-        o->g_out = false;
-    }
-
-    i3 &= 63;
-    i4 &= 63;
-    i5 &= 63;
-
-    /* MASK is the set of bits to be operated on from R2.
-       Take care for I3/I4 wraparound.  */
-    mask = ~0ull >> i3;
-    if (i3 <= i4) {
-        mask ^= ~0ull >> i4 >> 1;
-    } else {
-        mask |= ~(~0ull >> i4 >> 1);
-    }
-
-    /* Rotate the input as necessary.  */
-    tcg_gen_rotli_i64(o->in2, o->in2, i5);
-
-    /* Operate.  */
-    switch (s->fields.op2) {
-    case 0x54: /* AND */
-        tcg_gen_ori_i64(o->in2, o->in2, ~mask);
-        tcg_gen_and_i64(o->out, o->out, o->in2);
-        break;
-    case 0x56: /* OR */
-        tcg_gen_andi_i64(o->in2, o->in2, mask);
-        tcg_gen_or_i64(o->out, o->out, o->in2);
-        break;
-    case 0x57: /* XOR */
-        tcg_gen_andi_i64(o->in2, o->in2, mask);
-        tcg_gen_xor_i64(o->out, o->out, o->in2);
-        break;
-    default:
-        abort();
-    }
-
-    /* Set the CC.  */
-    tcg_gen_andi_i64(cc_dst, o->out, mask);
-    set_cc_nz_u64(s, cc_dst);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rev16(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_bswap16_i64(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rev32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_bswap32_i64(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rev64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_bswap64_i64(o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rll32(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    TCGv_i32 t2 = tcg_temp_new_i32();
-    TCGv_i32 to = tcg_temp_new_i32();
-    tcg_gen_extrl_i64_i32(t1, o->in1);
-    tcg_gen_extrl_i64_i32(t2, o->in2);
-    tcg_gen_rotl_i32(to, t1, t2);
-    tcg_gen_extu_i32_i64(o->out, to);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
-    tcg_temp_free_i32(to);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rll64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_rotl_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_rrbe(DisasContext *s, DisasOps *o)
-{
-    gen_helper_rrbe(cc_op, cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sacf(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sacf(cpu_env, o->in2);
-    /* Addressing mode has changed, so end the block.  */
-    return DISAS_PC_STALE;
-}
-#endif
-
-static DisasJumpType op_sam(DisasContext *s, DisasOps *o)
-{
-    int sam = s->insn->data;
-    TCGv_i64 tsam;
-    uint64_t mask;
-
-    switch (sam) {
-    case 0:
-        mask = 0xffffff;
-        break;
-    case 1:
-        mask = 0x7fffffff;
-        break;
-    default:
-        mask = -1;
-        break;
-    }
-
-    /* Bizarre but true, we check the address of the current insn for the
-       specification exception, not the next to be executed.  Thus the PoO
-       documents that Bad Things Happen two bytes before the end.  */
-    if (s->base.pc_next & ~mask) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    s->pc_tmp &= mask;
-
-    tsam = tcg_const_i64(sam);
-    tcg_gen_deposit_i64(psw_mask, psw_mask, tsam, 31, 2);
-    tcg_temp_free_i64(tsam);
-
-    /* Always exit the TB, since we (may have) changed execution mode.  */
-    return DISAS_PC_STALE;
-}
-
-static DisasJumpType op_sar(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    tcg_gen_st32_i64(o->in2, cpu_env, offsetof(CPUS390XState, aregs[r1]));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_seb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_seb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sdb(o->out, cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sxb(o->out, cpu_env, o->out, o->out2, o->in1, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sqeb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sqeb(o->out, cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sqdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sqdb(o->out, cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sqxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sqxb(o->out, cpu_env, o->in1, o->in2);
-    return_low128(o->out2);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_servc(DisasContext *s, DisasOps *o)
-{
-    gen_helper_servc(cc_op, cpu_env, o->in2, o->in1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sigp(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_sigp(cc_op, cpu_env, o->in2, r1, r3);
-    set_cc_static(s);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_soc(DisasContext *s, DisasOps *o)
-{
-    DisasCompare c;
-    TCGv_i64 a, h;
-    TCGLabel *lab;
-    int r1;
-
-    disas_jcc(s, &c, get_field(s, m3));
-
-    /* We want to store when the condition is fulfilled, so branch
-       out when it's not */
-    c.cond = tcg_invert_cond(c.cond);
-
-    lab = gen_new_label();
-    if (c.is_64) {
-        tcg_gen_brcond_i64(c.cond, c.u.s64.a, c.u.s64.b, lab);
-    } else {
-        tcg_gen_brcond_i32(c.cond, c.u.s32.a, c.u.s32.b, lab);
-    }
-    free_compare(&c);
-
-    r1 = get_field(s, r1);
-    a = get_address(s, 0, get_field(s, b2), get_field(s, d2));
-    switch (s->insn->data) {
-    case 1: /* STOCG */
-        tcg_gen_qemu_st64(regs[r1], a, get_mem_index(s));
-        break;
-    case 0: /* STOC */
-        tcg_gen_qemu_st32(regs[r1], a, get_mem_index(s));
-        break;
-    case 2: /* STOCFH */
-        h = tcg_temp_new_i64();
-        tcg_gen_shri_i64(h, regs[r1], 32);
-        tcg_gen_qemu_st32(h, a, get_mem_index(s));
-        tcg_temp_free_i64(h);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    tcg_temp_free_i64(a);
-
-    gen_set_label(lab);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sla(DisasContext *s, DisasOps *o)
-{
-    uint64_t sign = 1ull << s->insn->data;
-    enum cc_op cco = s->insn->data == 31 ? CC_OP_SLA_32 : CC_OP_SLA_64;
-    gen_op_update2_cc_i64(s, cco, o->in1, o->in2);
-    tcg_gen_shl_i64(o->out, o->in1, o->in2);
-    /* The arithmetic left shift is curious in that it does not affect
-       the sign bit.  Copy that over from the source unchanged.  */
-    tcg_gen_andi_i64(o->out, o->out, ~sign);
-    tcg_gen_andi_i64(o->in1, o->in1, sign);
-    tcg_gen_or_i64(o->out, o->out, o->in1);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sll(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_shl_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sra(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_sar_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srl(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_shr_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sfpc(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sfpc(cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sfas(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sfas(cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srnm(DisasContext *s, DisasOps *o)
-{
-    /* Bits other than 62 and 63 are ignored. Bit 29 is set to zero. */
-    tcg_gen_andi_i64(o->addr1, o->addr1, 0x3ull);
-    gen_helper_srnm(cpu_env, o->addr1);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srnmb(DisasContext *s, DisasOps *o)
-{
-    /* Bits 0-55 are are ignored. */
-    tcg_gen_andi_i64(o->addr1, o->addr1, 0xffull);
-    gen_helper_srnm(cpu_env, o->addr1);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srnmt(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    /* Bits other than 61-63 are ignored. */
-    tcg_gen_andi_i64(o->addr1, o->addr1, 0x7ull);
-
-    /* No need to call a helper, we don't implement dfp */
-    tcg_gen_ld32u_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
-    tcg_gen_deposit_i64(tmp, tmp, o->addr1, 4, 3);
-    tcg_gen_st32_i64(tmp, cpu_env, offsetof(CPUS390XState, fpc));
-
-    tcg_temp_free_i64(tmp);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_spm(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_extrl_i64_i32(cc_op, o->in1);
-    tcg_gen_extract_i32(cc_op, cc_op, 28, 2);
-    set_cc_static(s);
-
-    tcg_gen_shri_i64(o->in1, o->in1, 24);
-    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in1, PSW_SHIFT_MASK_PM, 4);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ectg(DisasContext *s, DisasOps *o)
-{
-    int b1 = get_field(s, b1);
-    int d1 = get_field(s, d1);
-    int b2 = get_field(s, b2);
-    int d2 = get_field(s, d2);
-    int r3 = get_field(s, r3);
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    /* fetch all operands first */
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_addi_i64(o->in1, regs[b1], d1);
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_addi_i64(o->in2, regs[b2], d2);
-    o->addr1 = tcg_temp_new_i64();
-    gen_addi_and_wrap_i64(s, o->addr1, regs[r3], 0);
-
-    /* load the third operand into r3 before modifying anything */
-    tcg_gen_qemu_ld64(regs[r3], o->addr1, get_mem_index(s));
-
-    /* subtract CPU timer from first operand and store in GR0 */
-    gen_helper_stpt(tmp, cpu_env);
-    tcg_gen_sub_i64(regs[0], o->in1, tmp);
-
-    /* store second operand in GR1 */
-    tcg_gen_mov_i64(regs[1], o->in2);
-
-    tcg_temp_free_i64(tmp);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_spka(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_shri_i64(o->in2, o->in2, 4);
-    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, PSW_SHIFT_KEY, 4);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sske(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sske(cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ssm(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_deposit_i64(psw_mask, psw_mask, o->in2, 56, 8);
-    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
-    return DISAS_PC_STALE_NOCHAIN;
-}
-
-static DisasJumpType op_stap(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ld32u_i64(o->out, cpu_env, offsetof(CPUS390XState, core_id));
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_stck(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stck(o->out, cpu_env);
-    /* ??? We don't implement clock states.  */
-    gen_op_movi_cc(s, 0);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stcke(DisasContext *s, DisasOps *o)
-{
-    TCGv_i64 c1 = tcg_temp_new_i64();
-    TCGv_i64 c2 = tcg_temp_new_i64();
-    TCGv_i64 todpr = tcg_temp_new_i64();
-    gen_helper_stck(c1, cpu_env);
-    /* 16 bit value store in an uint32_t (only valid bits set) */
-    tcg_gen_ld32u_i64(todpr, cpu_env, offsetof(CPUS390XState, todpr));
-    /* Shift the 64-bit value into its place as a zero-extended
-       104-bit value.  Note that "bit positions 64-103 are always
-       non-zero so that they compare differently to STCK"; we set
-       the least significant bit to 1.  */
-    tcg_gen_shli_i64(c2, c1, 56);
-    tcg_gen_shri_i64(c1, c1, 8);
-    tcg_gen_ori_i64(c2, c2, 0x10000);
-    tcg_gen_or_i64(c2, c2, todpr);
-    tcg_gen_qemu_st64(c1, o->in2, get_mem_index(s));
-    tcg_gen_addi_i64(o->in2, o->in2, 8);
-    tcg_gen_qemu_st64(c2, o->in2, get_mem_index(s));
-    tcg_temp_free_i64(c1);
-    tcg_temp_free_i64(c2);
-    tcg_temp_free_i64(todpr);
-    /* ??? We don't implement clock states.  */
-    gen_op_movi_cc(s, 0);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_sck(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_ld_i64(o->in1, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
-    gen_helper_sck(cc_op, cpu_env, o->in1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sckc(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sckc(cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sckpf(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sckpf(cpu_env, regs[0]);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stckc(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stckc(o->out, cpu_env);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stctg(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_stctg(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stctl(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_stctl(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stidp(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, cpuid));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_spt(DisasContext *s, DisasOps *o)
-{
-    gen_helper_spt(cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stfl(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stfl(cpu_env);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stpt(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stpt(o->out, cpu_env);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stsi(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stsi(cc_op, cpu_env, o->in2, regs[0], regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_spx(DisasContext *s, DisasOps *o)
-{
-    gen_helper_spx(cpu_env, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_xsch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_xsch(cpu_env, regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_csch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_csch(cpu_env, regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_hsch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_hsch(cpu_env, regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_msch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_msch(cpu_env, regs[1], o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rchp(DisasContext *s, DisasOps *o)
-{
-    gen_helper_rchp(cpu_env, regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rsch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_rsch(cpu_env, regs[1]);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sal(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sal(cpu_env, regs[1]);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_schm(DisasContext *s, DisasOps *o)
-{
-    gen_helper_schm(cpu_env, regs[1], regs[2], o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_siga(DisasContext *s, DisasOps *o)
-{
-    /* From KVM code: Not provided, set CC = 3 for subchannel not operational */
-    gen_op_movi_cc(s, 3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stcps(DisasContext *s, DisasOps *o)
-{
-    /* The instruction is suppressed if not provided. */
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ssch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_ssch(cpu_env, regs[1], o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stsch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stsch(cpu_env, regs[1], o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stcrw(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stcrw(cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tpi(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tpi(cc_op, cpu_env, o->addr1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tsch(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tsch(cpu_env, regs[1], o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_chsc(DisasContext *s, DisasOps *o)
-{
-    gen_helper_chsc(cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stpx(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ld_i64(o->out, cpu_env, offsetof(CPUS390XState, psa));
-    tcg_gen_andi_i64(o->out, o->out, 0x7fffe000);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stnosm(DisasContext *s, DisasOps *o)
-{
-    uint64_t i2 = get_field(s, i2);
-    TCGv_i64 t;
-
-    /* It is important to do what the instruction name says: STORE THEN.
-       If we let the output hook perform the store then if we fault and
-       restart, we'll have the wrong SYSTEM MASK in place.  */
-    t = tcg_temp_new_i64();
-    tcg_gen_shri_i64(t, psw_mask, 56);
-    tcg_gen_qemu_st8(t, o->addr1, get_mem_index(s));
-    tcg_temp_free_i64(t);
-
-    if (s->fields.op == 0xac) {
-        tcg_gen_andi_i64(psw_mask, psw_mask,
-                         (i2 << 56) | 0x00ffffffffffffffull);
-    } else {
-        tcg_gen_ori_i64(psw_mask, psw_mask, i2 << 56);
-    }
-
-    /* Exit to main loop to reevaluate s390_cpu_exec_interrupt.  */
-    return DISAS_PC_STALE_NOCHAIN;
-}
-
-static DisasJumpType op_stura(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st_tl(o->in1, o->in2, MMU_REAL_IDX, s->insn->data);
-
-    if (s->base.tb->flags & FLAG_MASK_PER) {
-        update_psw_addr(s);
-        gen_helper_per_store_real(cpu_env);
-    }
-    return DISAS_NEXT;
-}
-#endif
-
-static DisasJumpType op_stfle(DisasContext *s, DisasOps *o)
-{
-    gen_helper_stfle(cc_op, cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_st8(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st8(o->in1, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_st16(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st16(o->in1, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_st32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st32(o->in1, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_st64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st64(o->in1, o->in2, get_mem_index(s));
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stam(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    gen_helper_stam(cpu_env, r1, o->in2, r3);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stcm(DisasContext *s, DisasOps *o)
-{
-    int m3 = get_field(s, m3);
-    int pos, base = s->insn->data;
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    pos = base + ctz32(m3) * 8;
-    switch (m3) {
-    case 0xf:
-        /* Effectively a 32-bit store.  */
-        tcg_gen_shri_i64(tmp, o->in1, pos);
-        tcg_gen_qemu_st32(tmp, o->in2, get_mem_index(s));
-        break;
-
-    case 0xc:
-    case 0x6:
-    case 0x3:
-        /* Effectively a 16-bit store.  */
-        tcg_gen_shri_i64(tmp, o->in1, pos);
-        tcg_gen_qemu_st16(tmp, o->in2, get_mem_index(s));
-        break;
-
-    case 0x8:
-    case 0x4:
-    case 0x2:
-    case 0x1:
-        /* Effectively an 8-bit store.  */
-        tcg_gen_shri_i64(tmp, o->in1, pos);
-        tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
-        break;
-
-    default:
-        /* This is going to be a sequence of shifts and stores.  */
-        pos = base + 32 - 8;
-        while (m3) {
-            if (m3 & 0x8) {
-                tcg_gen_shri_i64(tmp, o->in1, pos);
-                tcg_gen_qemu_st8(tmp, o->in2, get_mem_index(s));
-                tcg_gen_addi_i64(o->in2, o->in2, 1);
-            }
-            m3 = (m3 << 1) & 0xf;
-            pos -= 8;
-        }
-        break;
-    }
-    tcg_temp_free_i64(tmp);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stm(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    int size = s->insn->data;
-    TCGv_i64 tsize = tcg_const_i64(size);
-
-    while (1) {
-        if (size == 8) {
-            tcg_gen_qemu_st64(regs[r1], o->in2, get_mem_index(s));
-        } else {
-            tcg_gen_qemu_st32(regs[r1], o->in2, get_mem_index(s));
-        }
-        if (r1 == r3) {
-            break;
-        }
-        tcg_gen_add_i64(o->in2, o->in2, tsize);
-        r1 = (r1 + 1) & 15;
-    }
-
-    tcg_temp_free_i64(tsize);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stmh(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    TCGv_i64 t = tcg_temp_new_i64();
-    TCGv_i64 t4 = tcg_const_i64(4);
-    TCGv_i64 t32 = tcg_const_i64(32);
-
-    while (1) {
-        tcg_gen_shl_i64(t, regs[r1], t32);
-        tcg_gen_qemu_st32(t, o->in2, get_mem_index(s));
-        if (r1 == r3) {
-            break;
-        }
-        tcg_gen_add_i64(o->in2, o->in2, t4);
-        r1 = (r1 + 1) & 15;
-    }
-
-    tcg_temp_free_i64(t);
-    tcg_temp_free_i64(t4);
-    tcg_temp_free_i64(t32);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stpq(DisasContext *s, DisasOps *o)
-{
-    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-        gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
-    } else if (HAVE_ATOMIC128) {
-        gen_helper_stpq_parallel(cpu_env, o->in2, o->out2, o->out);
-    } else {
-        gen_helper_exit_atomic(cpu_env);
-        return DISAS_NORETURN;
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srst(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_srst(cpu_env, r1, r2);
-
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_srstu(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_srstu(cpu_env, r1, r2);
-
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_sub_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_movi_i64(cc_src, 0);
-    tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
-    return DISAS_NEXT;
-}
-
-/* Compute borrow (0, -1) into cc_src. */
-static void compute_borrow(DisasContext *s)
-{
-    switch (s->cc_op) {
-    case CC_OP_SUBU:
-        /* The borrow value is already in cc_src (0,-1). */
-        break;
-    default:
-        gen_op_calc_cc(s);
-        /* fall through */
-    case CC_OP_STATIC:
-        /* The carry flag is the msb of CC; compute into cc_src. */
-        tcg_gen_extu_i32_i64(cc_src, cc_op);
-        tcg_gen_shri_i64(cc_src, cc_src, 1);
-        /* fall through */
-    case CC_OP_ADDU:
-        /* Convert carry (1,0) to borrow (0,-1). */
-        tcg_gen_subi_i64(cc_src, cc_src, 1);
-        break;
-    }
-}
-
-static DisasJumpType op_subb32(DisasContext *s, DisasOps *o)
-{
-    compute_borrow(s);
-
-    /* Borrow is {0, -1}, so add to subtract. */
-    tcg_gen_add_i64(o->out, o->in1, cc_src);
-    tcg_gen_sub_i64(o->out, o->out, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_subb64(DisasContext *s, DisasOps *o)
-{
-    compute_borrow(s);
-
-    /*
-     * Borrow is {0, -1}, so add to subtract; replicate the
-     * borrow input to produce 128-bit -1 for the addition.
-     */
-    TCGv_i64 zero = tcg_const_i64(0);
-    tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src);
-    tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
-    tcg_temp_free_i64(zero);
-
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_svc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 t;
-
-    update_psw_addr(s);
-    update_cc_op(s);
-
-    t = tcg_const_i32(get_field(s, i1) & 0xff);
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_code));
-    tcg_temp_free_i32(t);
-
-    t = tcg_const_i32(s->ilen);
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUS390XState, int_svc_ilen));
-    tcg_temp_free_i32(t);
-
-    gen_exception(EXCP_SVC);
-    return DISAS_NORETURN;
-}
-
-static DisasJumpType op_tam(DisasContext *s, DisasOps *o)
-{
-    int cc = 0;
-
-    cc |= (s->base.tb->flags & FLAG_MASK_64) ? 2 : 0;
-    cc |= (s->base.tb->flags & FLAG_MASK_32) ? 1 : 0;
-    gen_op_movi_cc(s, cc);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tceb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tceb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tcdb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tcdb(cc_op, cpu_env, o->in1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tcxb(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tcxb(cc_op, cpu_env, o->out, o->out2, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-
-static DisasJumpType op_testblock(DisasContext *s, DisasOps *o)
-{
-    gen_helper_testblock(cc_op, cpu_env, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tprot(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tprot(cc_op, cpu_env, o->addr1, o->in2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-#endif
-
-static DisasJumpType op_tp(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l1 = tcg_const_i32(get_field(s, l1) + 1);
-    gen_helper_tp(cc_op, cpu_env, o->addr1, l1);
-    tcg_temp_free_i32(l1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tr(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_tr(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_tre(DisasContext *s, DisasOps *o)
-{
-    gen_helper_tre(o->out, cpu_env, o->out, o->out2, o->in2);
-    return_low128(o->out2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_trt(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_trt(cc_op, cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_trtr(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_trtr(cc_op, cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_trXX(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-    TCGv_i32 sizes = tcg_const_i32(s->insn->opc & 3);
-    TCGv_i32 tst = tcg_temp_new_i32();
-    int m3 = get_field(s, m3);
-
-    if (!s390_has_feat(S390_FEAT_ETF2_ENH)) {
-        m3 = 0;
-    }
-    if (m3 & 1) {
-        tcg_gen_movi_i32(tst, -1);
-    } else {
-        tcg_gen_extrl_i64_i32(tst, regs[0]);
-        if (s->insn->opc & 3) {
-            tcg_gen_ext8u_i32(tst, tst);
-        } else {
-            tcg_gen_ext16u_i32(tst, tst);
-        }
-    }
-    gen_helper_trXX(cc_op, cpu_env, r1, r2, tst, sizes);
-
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    tcg_temp_free_i32(sizes);
-    tcg_temp_free_i32(tst);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_ts(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 t1 = tcg_const_i32(0xff);
-    tcg_gen_atomic_xchg_i32(t1, o->in2, t1, get_mem_index(s), MO_UB);
-    tcg_gen_extract_i32(cc_op, t1, 7, 1);
-    tcg_temp_free_i32(t1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_unpk(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 l = tcg_const_i32(get_field(s, l1));
-    gen_helper_unpk(cpu_env, l, o->addr1, o->in2);
-    tcg_temp_free_i32(l);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_unpka(DisasContext *s, DisasOps *o)
-{
-    int l1 = get_field(s, l1) + 1;
-    TCGv_i32 l;
-
-    /* The length must not exceed 32 bytes.  */
-    if (l1 > 32) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    l = tcg_const_i32(l1);
-    gen_helper_unpka(cc_op, cpu_env, o->addr1, l, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_unpku(DisasContext *s, DisasOps *o)
-{
-    int l1 = get_field(s, l1) + 1;
-    TCGv_i32 l;
-
-    /* The length must be even and should not exceed 64 bytes.  */
-    if ((l1 & 1) || (l1 > 64)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
-    l = tcg_const_i32(l1);
-    gen_helper_unpku(cc_op, cpu_env, o->addr1, l, o->in2);
-    tcg_temp_free_i32(l);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-
-static DisasJumpType op_xc(DisasContext *s, DisasOps *o)
-{
-    int d1 = get_field(s, d1);
-    int d2 = get_field(s, d2);
-    int b1 = get_field(s, b1);
-    int b2 = get_field(s, b2);
-    int l = get_field(s, l1);
-    TCGv_i32 t32;
-
-    o->addr1 = get_address(s, 0, b1, d1);
-
-    /* If the addresses are identical, this is a store/memset of zero.  */
-    if (b1 == b2 && d1 == d2 && (l + 1) <= 32) {
-        o->in2 = tcg_const_i64(0);
-
-        l++;
-        while (l >= 8) {
-            tcg_gen_qemu_st64(o->in2, o->addr1, get_mem_index(s));
-            l -= 8;
-            if (l > 0) {
-                tcg_gen_addi_i64(o->addr1, o->addr1, 8);
-            }
-        }
-        if (l >= 4) {
-            tcg_gen_qemu_st32(o->in2, o->addr1, get_mem_index(s));
-            l -= 4;
-            if (l > 0) {
-                tcg_gen_addi_i64(o->addr1, o->addr1, 4);
-            }
-        }
-        if (l >= 2) {
-            tcg_gen_qemu_st16(o->in2, o->addr1, get_mem_index(s));
-            l -= 2;
-            if (l > 0) {
-                tcg_gen_addi_i64(o->addr1, o->addr1, 2);
-            }
-        }
-        if (l) {
-            tcg_gen_qemu_st8(o->in2, o->addr1, get_mem_index(s));
-        }
-        gen_op_movi_cc(s, 0);
-        return DISAS_NEXT;
-    }
-
-    /* But in general we'll defer to a helper.  */
-    o->in2 = get_address(s, 0, b2, d2);
-    t32 = tcg_const_i32(l);
-    gen_helper_xc(cc_op, cpu_env, t32, o->addr1, o->in2);
-    tcg_temp_free_i32(t32);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_xor(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_xor_i64(o->out, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_xori(DisasContext *s, DisasOps *o)
-{
-    int shift = s->insn->data & 0xff;
-    int size = s->insn->data >> 8;
-    uint64_t mask = ((1ull << size) - 1) << shift;
-
-    assert(!o->g_in2);
-    tcg_gen_shli_i64(o->in2, o->in2, shift);
-    tcg_gen_xor_i64(o->out, o->in1, o->in2);
-
-    /* Produce the CC from only the bits manipulated.  */
-    tcg_gen_andi_i64(cc_dst, o->out, mask);
-    set_cc_nz_u64(s, cc_dst);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_xi(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
-    } else {
-        /* Perform the atomic operation in memory. */
-        tcg_gen_atomic_fetch_xor_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
-                                     s->insn->data);
-    }
-
-    /* Recompute also for atomic case: needed for setting CC. */
-    tcg_gen_xor_i64(o->out, o->in1, o->in2);
-
-    if (!s390_has_feat(S390_FEAT_INTERLOCKED_ACCESS_2)) {
-        tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
-    }
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_zero(DisasContext *s, DisasOps *o)
-{
-    o->out = tcg_const_i64(0);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_zero2(DisasContext *s, DisasOps *o)
-{
-    o->out = tcg_const_i64(0);
-    o->out2 = o->out;
-    o->g_out2 = true;
-    return DISAS_NEXT;
-}
-
-#ifndef CONFIG_USER_ONLY
-static DisasJumpType op_clp(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_clp(cpu_env, r2);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pcilg(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_pcilg(cpu_env, r1, r2);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pcistg(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_pcistg(cpu_env, r1, r2);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_stpcifc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
-
-    gen_helper_stpcifc(cpu_env, r1, o->addr1, ar);
-    tcg_temp_free_i32(ar);
-    tcg_temp_free_i32(r1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_sic(DisasContext *s, DisasOps *o)
-{
-    gen_helper_sic(cpu_env, o->in1, o->in2);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_rpcit(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r2 = tcg_const_i32(get_field(s, r2));
-
-    gen_helper_rpcit(cpu_env, r1, r2);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r2);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_pcistb(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 r3 = tcg_const_i32(get_field(s, r3));
-    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
-
-    gen_helper_pcistb(cpu_env, r1, r3, o->addr1, ar);
-    tcg_temp_free_i32(ar);
-    tcg_temp_free_i32(r1);
-    tcg_temp_free_i32(r3);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-
-static DisasJumpType op_mpcifc(DisasContext *s, DisasOps *o)
-{
-    TCGv_i32 r1 = tcg_const_i32(get_field(s, r1));
-    TCGv_i32 ar = tcg_const_i32(get_field(s, b2));
-
-    gen_helper_mpcifc(cpu_env, r1, o->addr1, ar);
-    tcg_temp_free_i32(ar);
-    tcg_temp_free_i32(r1);
-    set_cc_static(s);
-    return DISAS_NEXT;
-}
-#endif
-
-#include "translate_vx.c.inc"
-
-/* ====================================================================== */
-/* The "Cc OUTput" generators.  Given the generated output (and in some cases
-   the original inputs), update the various cc data structures in order to
-   be able to compute the new condition code.  */
-
-static void cout_abs32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_ABS_32, o->out);
-}
-
-static void cout_abs64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_ABS_64, o->out);
-}
-
-static void cout_adds32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update3_cc_i64(s, CC_OP_ADD_32, o->in1, o->in2, o->out);
-}
-
-static void cout_adds64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update3_cc_i64(s, CC_OP_ADD_64, o->in1, o->in2, o->out);
-}
-
-static void cout_addu32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_shri_i64(cc_src, o->out, 32);
-    tcg_gen_ext32u_i64(cc_dst, o->out);
-    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst);
-}
-
-static void cout_addu64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
-}
-
-static void cout_cmps32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2);
-}
-
-static void cout_cmps64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_LTGT_64, o->in1, o->in2);
-}
-
-static void cout_cmpu32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_32, o->in1, o->in2);
-}
-
-static void cout_cmpu64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_LTUGTU_64, o->in1, o->in2);
-}
-
-static void cout_f32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ_F32, o->out);
-}
-
-static void cout_f64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ_F64, o->out);
-}
-
-static void cout_f128(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_NZ_F128, o->out, o->out2);
-}
-
-static void cout_nabs32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NABS_32, o->out);
-}
-
-static void cout_nabs64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NABS_64, o->out);
-}
-
-static void cout_neg32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_COMP_32, o->out);
-}
-
-static void cout_neg64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_COMP_64, o->out);
-}
-
-static void cout_nz32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_ext32u_i64(cc_dst, o->out);
-    gen_op_update1_cc_i64(s, CC_OP_NZ, cc_dst);
-}
-
-static void cout_nz64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_NZ, o->out);
-}
-
-static void cout_s32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_LTGT0_32, o->out);
-}
-
-static void cout_s64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_LTGT0_64, o->out);
-}
-
-static void cout_subs32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update3_cc_i64(s, CC_OP_SUB_32, o->in1, o->in2, o->out);
-}
-
-static void cout_subs64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update3_cc_i64(s, CC_OP_SUB_64, o->in1, o->in2, o->out);
-}
-
-static void cout_subu32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_sari_i64(cc_src, o->out, 32);
-    tcg_gen_ext32u_i64(cc_dst, o->out);
-    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
-}
-
-static void cout_subu64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
-}
-
-static void cout_tm32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2);
-}
-
-static void cout_tm64(DisasContext *s, DisasOps *o)
-{
-    gen_op_update2_cc_i64(s, CC_OP_TM_64, o->in1, o->in2);
-}
-
-static void cout_muls32(DisasContext *s, DisasOps *o)
-{
-    gen_op_update1_cc_i64(s, CC_OP_MULS_32, o->out);
-}
-
-static void cout_muls64(DisasContext *s, DisasOps *o)
-{
-    /* out contains "high" part, out2 contains "low" part of 128 bit result */
-    gen_op_update2_cc_i64(s, CC_OP_MULS_64, o->out, o->out2);
-}
-
-/* ====================================================================== */
-/* The "PREParation" generators.  These initialize the DisasOps.OUT fields
-   with the TCG register to which we will write.  Used in combination with
-   the "wout" generators, in some cases we need a new temporary, and in
-   some cases we can write to a TCG global.  */
-
-static void prep_new(DisasContext *s, DisasOps *o)
-{
-    o->out = tcg_temp_new_i64();
-}
-#define SPEC_prep_new 0
-
-static void prep_new_P(DisasContext *s, DisasOps *o)
-{
-    o->out = tcg_temp_new_i64();
-    o->out2 = tcg_temp_new_i64();
-}
-#define SPEC_prep_new_P 0
-
-static void prep_r1(DisasContext *s, DisasOps *o)
-{
-    o->out = regs[get_field(s, r1)];
-    o->g_out = true;
-}
-#define SPEC_prep_r1 0
-
-static void prep_r1_P(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    o->out = regs[r1];
-    o->out2 = regs[r1 + 1];
-    o->g_out = o->g_out2 = true;
-}
-#define SPEC_prep_r1_P SPEC_r1_even
-
-/* Whenever we need x1 in addition to other inputs, we'll load it to out/out2 */
-static void prep_x1(DisasContext *s, DisasOps *o)
-{
-    o->out = load_freg(get_field(s, r1));
-    o->out2 = load_freg(get_field(s, r1) + 2);
-}
-#define SPEC_prep_x1 SPEC_r1_f128
-
-/* ====================================================================== */
-/* The "Write OUTput" generators.  These generally perform some non-trivial
-   copy of data to TCG globals, or to main memory.  The trivial cases are
-   generally handled by having a "prep" generator install the TCG global
-   as the destination of the operation.  */
-
-static void wout_r1(DisasContext *s, DisasOps *o)
-{
-    store_reg(get_field(s, r1), o->out);
-}
-#define SPEC_wout_r1 0
-
-static void wout_out2_r1(DisasContext *s, DisasOps *o)
-{
-    store_reg(get_field(s, r1), o->out2);
-}
-#define SPEC_wout_out2_r1 0
-
-static void wout_r1_8(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 8);
-}
-#define SPEC_wout_r1_8 0
-
-static void wout_r1_16(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    tcg_gen_deposit_i64(regs[r1], regs[r1], o->out, 0, 16);
-}
-#define SPEC_wout_r1_16 0
-
-static void wout_r1_32(DisasContext *s, DisasOps *o)
-{
-    store_reg32_i64(get_field(s, r1), o->out);
-}
-#define SPEC_wout_r1_32 0
-
-static void wout_r1_32h(DisasContext *s, DisasOps *o)
-{
-    store_reg32h_i64(get_field(s, r1), o->out);
-}
-#define SPEC_wout_r1_32h 0
-
-static void wout_r1_P32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    store_reg32_i64(r1, o->out);
-    store_reg32_i64(r1 + 1, o->out2);
-}
-#define SPEC_wout_r1_P32 SPEC_r1_even
-
-static void wout_r1_D32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    store_reg32_i64(r1 + 1, o->out);
-    tcg_gen_shri_i64(o->out, o->out, 32);
-    store_reg32_i64(r1, o->out);
-}
-#define SPEC_wout_r1_D32 SPEC_r1_even
-
-static void wout_r3_P32(DisasContext *s, DisasOps *o)
-{
-    int r3 = get_field(s, r3);
-    store_reg32_i64(r3, o->out);
-    store_reg32_i64(r3 + 1, o->out2);
-}
-#define SPEC_wout_r3_P32 SPEC_r3_even
-
-static void wout_r3_P64(DisasContext *s, DisasOps *o)
-{
-    int r3 = get_field(s, r3);
-    store_reg(r3, o->out);
-    store_reg(r3 + 1, o->out2);
-}
-#define SPEC_wout_r3_P64 SPEC_r3_even
-
-static void wout_e1(DisasContext *s, DisasOps *o)
-{
-    store_freg32_i64(get_field(s, r1), o->out);
-}
-#define SPEC_wout_e1 0
-
-static void wout_f1(DisasContext *s, DisasOps *o)
-{
-    store_freg(get_field(s, r1), o->out);
-}
-#define SPEC_wout_f1 0
-
-static void wout_x1(DisasContext *s, DisasOps *o)
-{
-    int f1 = get_field(s, r1);
-    store_freg(f1, o->out);
-    store_freg(f1 + 2, o->out2);
-}
-#define SPEC_wout_x1 SPEC_r1_f128
-
-static void wout_cond_r1r2_32(DisasContext *s, DisasOps *o)
-{
-    if (get_field(s, r1) != get_field(s, r2)) {
-        store_reg32_i64(get_field(s, r1), o->out);
-    }
-}
-#define SPEC_wout_cond_r1r2_32 0
-
-static void wout_cond_e1e2(DisasContext *s, DisasOps *o)
-{
-    if (get_field(s, r1) != get_field(s, r2)) {
-        store_freg32_i64(get_field(s, r1), o->out);
-    }
-}
-#define SPEC_wout_cond_e1e2 0
-
-static void wout_m1_8(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st8(o->out, o->addr1, get_mem_index(s));
-}
-#define SPEC_wout_m1_8 0
-
-static void wout_m1_16(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st16(o->out, o->addr1, get_mem_index(s));
-}
-#define SPEC_wout_m1_16 0
-
-#ifndef CONFIG_USER_ONLY
-static void wout_m1_16a(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUW | MO_ALIGN);
-}
-#define SPEC_wout_m1_16a 0
-#endif
-
-static void wout_m1_32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st32(o->out, o->addr1, get_mem_index(s));
-}
-#define SPEC_wout_m1_32 0
-
-#ifndef CONFIG_USER_ONLY
-static void wout_m1_32a(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), MO_TEUL | MO_ALIGN);
-}
-#define SPEC_wout_m1_32a 0
-#endif
-
-static void wout_m1_64(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st64(o->out, o->addr1, get_mem_index(s));
-}
-#define SPEC_wout_m1_64 0
-
-#ifndef CONFIG_USER_ONLY
-static void wout_m1_64a(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st_i64(o->out, o->addr1, get_mem_index(s), MO_TEQ | MO_ALIGN);
-}
-#define SPEC_wout_m1_64a 0
-#endif
-
-static void wout_m2_32(DisasContext *s, DisasOps *o)
-{
-    tcg_gen_qemu_st32(o->out, o->in2, get_mem_index(s));
-}
-#define SPEC_wout_m2_32 0
-
-static void wout_in2_r1(DisasContext *s, DisasOps *o)
-{
-    store_reg(get_field(s, r1), o->in2);
-}
-#define SPEC_wout_in2_r1 0
-
-static void wout_in2_r1_32(DisasContext *s, DisasOps *o)
-{
-    store_reg32_i64(get_field(s, r1), o->in2);
-}
-#define SPEC_wout_in2_r1_32 0
-
-/* ====================================================================== */
-/* The "INput 1" generators.  These load the first operand to an insn.  */
-
-static void in1_r1(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_reg(get_field(s, r1));
-}
-#define SPEC_in1_r1 0
-
-static void in1_r1_o(DisasContext *s, DisasOps *o)
-{
-    o->in1 = regs[get_field(s, r1)];
-    o->g_in1 = true;
-}
-#define SPEC_in1_r1_o 0
-
-static void in1_r1_32s(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1)]);
-}
-#define SPEC_in1_r1_32s 0
-
-static void in1_r1_32u(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1)]);
-}
-#define SPEC_in1_r1_32u 0
-
-static void in1_r1_sr32(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_shri_i64(o->in1, regs[get_field(s, r1)], 32);
-}
-#define SPEC_in1_r1_sr32 0
-
-static void in1_r1p1(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_reg(get_field(s, r1) + 1);
-}
-#define SPEC_in1_r1p1 SPEC_r1_even
-
-static void in1_r1p1_o(DisasContext *s, DisasOps *o)
-{
-    o->in1 = regs[get_field(s, r1) + 1];
-    o->g_in1 = true;
-}
-#define SPEC_in1_r1p1_o SPEC_r1_even
-
-static void in1_r1p1_32s(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r1) + 1]);
-}
-#define SPEC_in1_r1p1_32s SPEC_r1_even
-
-static void in1_r1p1_32u(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r1) + 1]);
-}
-#define SPEC_in1_r1p1_32u SPEC_r1_even
-
-static void in1_r1_D32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_concat32_i64(o->in1, regs[r1 + 1], regs[r1]);
-}
-#define SPEC_in1_r1_D32 SPEC_r1_even
-
-static void in1_r2(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_reg(get_field(s, r2));
-}
-#define SPEC_in1_r2 0
-
-static void in1_r2_sr32(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_shri_i64(o->in1, regs[get_field(s, r2)], 32);
-}
-#define SPEC_in1_r2_sr32 0
-
-static void in1_r2_32u(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]);
-}
-#define SPEC_in1_r2_32u 0
-
-static void in1_r3(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_reg(get_field(s, r3));
-}
-#define SPEC_in1_r3 0
-
-static void in1_r3_o(DisasContext *s, DisasOps *o)
-{
-    o->in1 = regs[get_field(s, r3)];
-    o->g_in1 = true;
-}
-#define SPEC_in1_r3_o 0
-
-static void in1_r3_32s(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32s_i64(o->in1, regs[get_field(s, r3)]);
-}
-#define SPEC_in1_r3_32s 0
-
-static void in1_r3_32u(DisasContext *s, DisasOps *o)
-{
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r3)]);
-}
-#define SPEC_in1_r3_32u 0
-
-static void in1_r3_D32(DisasContext *s, DisasOps *o)
-{
-    int r3 = get_field(s, r3);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_concat32_i64(o->in1, regs[r3 + 1], regs[r3]);
-}
-#define SPEC_in1_r3_D32 SPEC_r3_even
-
-static void in1_e1(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_freg32_i64(get_field(s, r1));
-}
-#define SPEC_in1_e1 0
-
-static void in1_f1(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_freg(get_field(s, r1));
-}
-#define SPEC_in1_f1 0
-
-/* Load the high double word of an extended (128-bit) format FP number */
-static void in1_x2h(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_freg(get_field(s, r2));
-}
-#define SPEC_in1_x2h SPEC_r2_f128
-
-static void in1_f3(DisasContext *s, DisasOps *o)
-{
-    o->in1 = load_freg(get_field(s, r3));
-}
-#define SPEC_in1_f3 0
-
-static void in1_la1(DisasContext *s, DisasOps *o)
-{
-    o->addr1 = get_address(s, 0, get_field(s, b1), get_field(s, d1));
-}
-#define SPEC_in1_la1 0
-
-static void in1_la2(DisasContext *s, DisasOps *o)
-{
-    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
-    o->addr1 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
-}
-#define SPEC_in1_la2 0
-
-static void in1_m1_8u(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld8u(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_8u 0
-
-static void in1_m1_16s(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld16s(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_16s 0
-
-static void in1_m1_16u(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld16u(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_16u 0
-
-static void in1_m1_32s(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32s(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_32s 0
-
-static void in1_m1_32u(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld32u(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_32u 0
-
-static void in1_m1_64(DisasContext *s, DisasOps *o)
-{
-    in1_la1(s, o);
-    o->in1 = tcg_temp_new_i64();
-    tcg_gen_qemu_ld64(o->in1, o->addr1, get_mem_index(s));
-}
-#define SPEC_in1_m1_64 0
-
-/* ====================================================================== */
-/* The "INput 2" generators.  These load the second operand to an insn.  */
-
-static void in2_r1_o(DisasContext *s, DisasOps *o)
-{
-    o->in2 = regs[get_field(s, r1)];
-    o->g_in2 = true;
-}
-#define SPEC_in2_r1_o 0
-
-static void in2_r1_16u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r1)]);
-}
-#define SPEC_in2_r1_16u 0
-
-static void in2_r1_32u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r1)]);
-}
-#define SPEC_in2_r1_32u 0
-
-static void in2_r1_D32(DisasContext *s, DisasOps *o)
-{
-    int r1 = get_field(s, r1);
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_concat32_i64(o->in2, regs[r1 + 1], regs[r1]);
-}
-#define SPEC_in2_r1_D32 SPEC_r1_even
-
-static void in2_r2(DisasContext *s, DisasOps *o)
-{
-    o->in2 = load_reg(get_field(s, r2));
-}
-#define SPEC_in2_r2 0
-
-static void in2_r2_o(DisasContext *s, DisasOps *o)
-{
-    o->in2 = regs[get_field(s, r2)];
-    o->g_in2 = true;
-}
-#define SPEC_in2_r2_o 0
-
-static void in2_r2_nz(DisasContext *s, DisasOps *o)
-{
-    int r2 = get_field(s, r2);
-    if (r2 != 0) {
-        o->in2 = load_reg(r2);
-    }
-}
-#define SPEC_in2_r2_nz 0
-
-static void in2_r2_8s(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext8s_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_8s 0
-
-static void in2_r2_8u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext8u_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_8u 0
-
-static void in2_r2_16s(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext16s_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_16s 0
-
-static void in2_r2_16u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext16u_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_16u 0
-
-static void in2_r3(DisasContext *s, DisasOps *o)
-{
-    o->in2 = load_reg(get_field(s, r3));
-}
-#define SPEC_in2_r3 0
-
-static void in2_r3_sr32(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_shri_i64(o->in2, regs[get_field(s, r3)], 32);
-}
-#define SPEC_in2_r3_sr32 0
-
-static void in2_r3_32u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r3)]);
-}
-#define SPEC_in2_r3_32u 0
-
-static void in2_r2_32s(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext32s_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_32s 0
-
-static void in2_r2_32u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(o->in2, regs[get_field(s, r2)]);
-}
-#define SPEC_in2_r2_32u 0
-
-static void in2_r2_sr32(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_temp_new_i64();
-    tcg_gen_shri_i64(o->in2, regs[get_field(s, r2)], 32);
-}
-#define SPEC_in2_r2_sr32 0
-
-static void in2_e2(DisasContext *s, DisasOps *o)
-{
-    o->in2 = load_freg32_i64(get_field(s, r2));
-}
-#define SPEC_in2_e2 0
-
-static void in2_f2(DisasContext *s, DisasOps *o)
-{
-    o->in2 = load_freg(get_field(s, r2));
-}
-#define SPEC_in2_f2 0
-
-/* Load the low double word of an extended (128-bit) format FP number */
-static void in2_x2l(DisasContext *s, DisasOps *o)
-{
-    o->in2 = load_freg(get_field(s, r2) + 2);
-}
-#define SPEC_in2_x2l SPEC_r2_f128
-
-static void in2_ra2(DisasContext *s, DisasOps *o)
-{
-    int r2 = get_field(s, r2);
-
-    /* Note: *don't* treat !r2 as 0, use the reg value. */
-    o->in2 = tcg_temp_new_i64();
-    gen_addi_and_wrap_i64(s, o->in2, regs[r2], 0);
-}
-#define SPEC_in2_ra2 0
-
-static void in2_a2(DisasContext *s, DisasOps *o)
-{
-    int x2 = have_field(s, x2) ? get_field(s, x2) : 0;
-    o->in2 = get_address(s, x2, get_field(s, b2), get_field(s, d2));
-}
-#define SPEC_in2_a2 0
-
-static void in2_ri2(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64(s->base.pc_next + (int64_t)get_field(s, i2) * 2);
-}
-#define SPEC_in2_ri2 0
-
-static void in2_sh32(DisasContext *s, DisasOps *o)
-{
-    help_l2_shift(s, o, 31);
-}
-#define SPEC_in2_sh32 0
-
-static void in2_sh64(DisasContext *s, DisasOps *o)
-{
-    help_l2_shift(s, o, 63);
-}
-#define SPEC_in2_sh64 0
-
-static void in2_m2_8u(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld8u(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_8u 0
-
-static void in2_m2_16s(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld16s(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_16s 0
-
-static void in2_m2_16u(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_16u 0
-
-static void in2_m2_32s(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_32s 0
-
-static void in2_m2_32u(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_32u 0
-
-#ifndef CONFIG_USER_ONLY
-static void in2_m2_32ua(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld_tl(o->in2, o->in2, get_mem_index(s), MO_TEUL | MO_ALIGN);
-}
-#define SPEC_in2_m2_32ua 0
-#endif
-
-static void in2_m2_64(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_m2_64 0
-
-static void in2_m2_64w(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
-    gen_addi_and_wrap_i64(s, o->in2, o->in2, 0);
-}
-#define SPEC_in2_m2_64w 0
-
-#ifndef CONFIG_USER_ONLY
-static void in2_m2_64a(DisasContext *s, DisasOps *o)
-{
-    in2_a2(s, o);
-    tcg_gen_qemu_ld_i64(o->in2, o->in2, get_mem_index(s), MO_TEQ | MO_ALIGN);
-}
-#define SPEC_in2_m2_64a 0
-#endif
-
-static void in2_mri2_16u(DisasContext *s, DisasOps *o)
-{
-    in2_ri2(s, o);
-    tcg_gen_qemu_ld16u(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_mri2_16u 0
-
-static void in2_mri2_32s(DisasContext *s, DisasOps *o)
-{
-    in2_ri2(s, o);
-    tcg_gen_qemu_ld32s(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_mri2_32s 0
-
-static void in2_mri2_32u(DisasContext *s, DisasOps *o)
-{
-    in2_ri2(s, o);
-    tcg_gen_qemu_ld32u(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_mri2_32u 0
-
-static void in2_mri2_64(DisasContext *s, DisasOps *o)
-{
-    in2_ri2(s, o);
-    tcg_gen_qemu_ld64(o->in2, o->in2, get_mem_index(s));
-}
-#define SPEC_in2_mri2_64 0
-
-static void in2_i2(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64(get_field(s, i2));
-}
-#define SPEC_in2_i2 0
-
-static void in2_i2_8u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64((uint8_t)get_field(s, i2));
-}
-#define SPEC_in2_i2_8u 0
-
-static void in2_i2_16u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64((uint16_t)get_field(s, i2));
-}
-#define SPEC_in2_i2_16u 0
-
-static void in2_i2_32u(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64((uint32_t)get_field(s, i2));
-}
-#define SPEC_in2_i2_32u 0
-
-static void in2_i2_16u_shl(DisasContext *s, DisasOps *o)
-{
-    uint64_t i2 = (uint16_t)get_field(s, i2);
-    o->in2 = tcg_const_i64(i2 << s->insn->data);
-}
-#define SPEC_in2_i2_16u_shl 0
-
-static void in2_i2_32u_shl(DisasContext *s, DisasOps *o)
-{
-    uint64_t i2 = (uint32_t)get_field(s, i2);
-    o->in2 = tcg_const_i64(i2 << s->insn->data);
-}
-#define SPEC_in2_i2_32u_shl 0
-
-#ifndef CONFIG_USER_ONLY
-static void in2_insn(DisasContext *s, DisasOps *o)
-{
-    o->in2 = tcg_const_i64(s->fields.raw_insn);
-}
-#define SPEC_in2_insn 0
-#endif
-
-/* ====================================================================== */
-
-/* Find opc within the table of insns.  This is formulated as a switch
-   statement so that (1) we get compile-time notice of cut-paste errors
-   for duplicated opcodes, and (2) the compiler generates the binary
-   search tree, rather than us having to post-process the table.  */
-
-#define C(OPC, NM, FT, FC, I1, I2, P, W, OP, CC) \
-    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, 0)
-
-#define D(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D) \
-    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, 0)
-
-#define F(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, FL) \
-    E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, 0, FL)
-
-#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) insn_ ## NM,
-
-enum DisasInsnEnum {
-#include "insn-data.def"
-};
-
-#undef E
-#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) {                   \
-    .opc = OPC,                                                             \
-    .flags = FL,                                                            \
-    .fmt = FMT_##FT,                                                        \
-    .fac = FAC_##FC,                                                        \
-    .spec = SPEC_in1_##I1 | SPEC_in2_##I2 | SPEC_prep_##P | SPEC_wout_##W,  \
-    .name = #NM,                                                            \
-    .help_in1 = in1_##I1,                                                   \
-    .help_in2 = in2_##I2,                                                   \
-    .help_prep = prep_##P,                                                  \
-    .help_wout = wout_##W,                                                  \
-    .help_cout = cout_##CC,                                                 \
-    .help_op = op_##OP,                                                     \
-    .data = D                                                               \
- },
-
-/* Allow 0 to be used for NULL in the table below.  */
-#define in1_0  NULL
-#define in2_0  NULL
-#define prep_0  NULL
-#define wout_0  NULL
-#define cout_0  NULL
-#define op_0  NULL
-
-#define SPEC_in1_0 0
-#define SPEC_in2_0 0
-#define SPEC_prep_0 0
-#define SPEC_wout_0 0
-
-/* Give smaller names to the various facilities.  */
-#define FAC_Z           S390_FEAT_ZARCH
-#define FAC_CASS        S390_FEAT_COMPARE_AND_SWAP_AND_STORE
-#define FAC_DFP         S390_FEAT_DFP
-#define FAC_DFPR        S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* DFP-rounding */
-#define FAC_DO          S390_FEAT_STFLE_45 /* distinct-operands */
-#define FAC_EE          S390_FEAT_EXECUTE_EXT
-#define FAC_EI          S390_FEAT_EXTENDED_IMMEDIATE
-#define FAC_FPE         S390_FEAT_FLOATING_POINT_EXT
-#define FAC_FPSSH       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPS-sign-handling */
-#define FAC_FPRGR       S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* FPR-GR-transfer */
-#define FAC_GIE         S390_FEAT_GENERAL_INSTRUCTIONS_EXT
-#define FAC_HFP_MA      S390_FEAT_HFP_MADDSUB
-#define FAC_HW          S390_FEAT_STFLE_45 /* high-word */
-#define FAC_IEEEE_SIM   S390_FEAT_FLOATING_POINT_SUPPPORT_ENH /* IEEE-exception-simulation */
-#define FAC_MIE         S390_FEAT_STFLE_49 /* misc-instruction-extensions */
-#define FAC_LAT         S390_FEAT_STFLE_49 /* load-and-trap */
-#define FAC_LOC         S390_FEAT_STFLE_45 /* load/store on condition 1 */
-#define FAC_LOC2        S390_FEAT_STFLE_53 /* load/store on condition 2 */
-#define FAC_LD          S390_FEAT_LONG_DISPLACEMENT
-#define FAC_PC          S390_FEAT_STFLE_45 /* population count */
-#define FAC_SCF         S390_FEAT_STORE_CLOCK_FAST
-#define FAC_SFLE        S390_FEAT_STFLE
-#define FAC_ILA         S390_FEAT_STFLE_45 /* interlocked-access-facility 1 */
-#define FAC_MVCOS       S390_FEAT_MOVE_WITH_OPTIONAL_SPEC
-#define FAC_LPP         S390_FEAT_SET_PROGRAM_PARAMETERS /* load-program-parameter */
-#define FAC_DAT_ENH     S390_FEAT_DAT_ENH
-#define FAC_E2          S390_FEAT_EXTENDED_TRANSLATION_2
-#define FAC_EH          S390_FEAT_STFLE_49 /* execution-hint */
-#define FAC_PPA         S390_FEAT_STFLE_49 /* processor-assist */
-#define FAC_LZRB        S390_FEAT_STFLE_53 /* load-and-zero-rightmost-byte */
-#define FAC_ETF3        S390_FEAT_EXTENDED_TRANSLATION_3
-#define FAC_MSA         S390_FEAT_MSA /* message-security-assist facility */
-#define FAC_MSA3        S390_FEAT_MSA_EXT_3 /* msa-extension-3 facility */
-#define FAC_MSA4        S390_FEAT_MSA_EXT_4 /* msa-extension-4 facility */
-#define FAC_MSA5        S390_FEAT_MSA_EXT_5 /* msa-extension-5 facility */
-#define FAC_MSA8        S390_FEAT_MSA_EXT_8 /* msa-extension-8 facility */
-#define FAC_ECT         S390_FEAT_EXTRACT_CPU_TIME
-#define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
-#define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
-#define FAC_V           S390_FEAT_VECTOR /* vector facility */
-#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */
-#define FAC_MIE2        S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */
-
-static const DisasInsn insn_info[] = {
-#include "insn-data.def"
-};
-
-#undef E
-#define E(OPC, NM, FT, FC, I1, I2, P, W, OP, CC, D, FL) \
-    case OPC: return &insn_info[insn_ ## NM];
-
-static const DisasInsn *lookup_opc(uint16_t opc)
-{
-    switch (opc) {
-#include "insn-data.def"
-    default:
-        return NULL;
-    }
-}
-
-#undef F
-#undef E
-#undef D
-#undef C
-
-/* Extract a field from the insn.  The INSN should be left-aligned in
-   the uint64_t so that we can more easily utilize the big-bit-endian
-   definitions we extract from the Principals of Operation.  */
-
-static void extract_field(DisasFields *o, const DisasField *f, uint64_t insn)
-{
-    uint32_t r, m;
-
-    if (f->size == 0) {
-        return;
-    }
-
-    /* Zero extract the field from the insn.  */
-    r = (insn << f->beg) >> (64 - f->size);
-
-    /* Sign-extend, or un-swap the field as necessary.  */
-    switch (f->type) {
-    case 0: /* unsigned */
-        break;
-    case 1: /* signed */
-        assert(f->size <= 32);
-        m = 1u << (f->size - 1);
-        r = (r ^ m) - m;
-        break;
-    case 2: /* dl+dh split, signed 20 bit. */
-        r = ((int8_t)r << 12) | (r >> 8);
-        break;
-    case 3: /* MSB stored in RXB */
-        g_assert(f->size == 4);
-        switch (f->beg) {
-        case 8:
-            r |= extract64(insn, 63 - 36, 1) << 4;
-            break;
-        case 12:
-            r |= extract64(insn, 63 - 37, 1) << 4;
-            break;
-        case 16:
-            r |= extract64(insn, 63 - 38, 1) << 4;
-            break;
-        case 32:
-            r |= extract64(insn, 63 - 39, 1) << 4;
-            break;
-        default:
-            g_assert_not_reached();
-        }
-        break;
-    default:
-        abort();
-    }
-
-    /* Validate that the "compressed" encoding we selected above is valid.
-       I.e. we havn't make two different original fields overlap.  */
-    assert(((o->presentC >> f->indexC) & 1) == 0);
-    o->presentC |= 1 << f->indexC;
-    o->presentO |= 1 << f->indexO;
-
-    o->c[f->indexC] = r;
-}
-
-/* Lookup the insn at the current PC, extracting the operands into O and
-   returning the info struct for the insn.  Returns NULL for invalid insn.  */
-
-static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s)
-{
-    uint64_t insn, pc = s->base.pc_next;
-    int op, op2, ilen;
-    const DisasInsn *info;
-
-    if (unlikely(s->ex_value)) {
-        /* Drop the EX data now, so that it's clear on exception paths.  */
-        TCGv_i64 zero = tcg_const_i64(0);
-        tcg_gen_st_i64(zero, cpu_env, offsetof(CPUS390XState, ex_value));
-        tcg_temp_free_i64(zero);
-
-        /* Extract the values saved by EXECUTE.  */
-        insn = s->ex_value & 0xffffffffffff0000ull;
-        ilen = s->ex_value & 0xf;
-        op = insn >> 56;
-    } else {
-        insn = ld_code2(env, pc);
-        op = (insn >> 8) & 0xff;
-        ilen = get_ilen(op);
-        switch (ilen) {
-        case 2:
-            insn = insn << 48;
-            break;
-        case 4:
-            insn = ld_code4(env, pc) << 32;
-            break;
-        case 6:
-            insn = (insn << 48) | (ld_code4(env, pc + 2) << 16);
-            break;
-        default:
-            g_assert_not_reached();
-        }
-    }
-    s->pc_tmp = s->base.pc_next + ilen;
-    s->ilen = ilen;
-
-    /* We can't actually determine the insn format until we've looked up
-       the full insn opcode.  Which we can't do without locating the
-       secondary opcode.  Assume by default that OP2 is at bit 40; for
-       those smaller insns that don't actually have a secondary opcode
-       this will correctly result in OP2 = 0. */
-    switch (op) {
-    case 0x01: /* E */
-    case 0x80: /* S */
-    case 0x82: /* S */
-    case 0x93: /* S */
-    case 0xb2: /* S, RRF, RRE, IE */
-    case 0xb3: /* RRE, RRD, RRF */
-    case 0xb9: /* RRE, RRF */
-    case 0xe5: /* SSE, SIL */
-        op2 = (insn << 8) >> 56;
-        break;
-    case 0xa5: /* RI */
-    case 0xa7: /* RI */
-    case 0xc0: /* RIL */
-    case 0xc2: /* RIL */
-    case 0xc4: /* RIL */
-    case 0xc6: /* RIL */
-    case 0xc8: /* SSF */
-    case 0xcc: /* RIL */
-        op2 = (insn << 12) >> 60;
-        break;
-    case 0xc5: /* MII */
-    case 0xc7: /* SMI */
-    case 0xd0 ... 0xdf: /* SS */
-    case 0xe1: /* SS */
-    case 0xe2: /* SS */
-    case 0xe8: /* SS */
-    case 0xe9: /* SS */
-    case 0xea: /* SS */
-    case 0xee ... 0xf3: /* SS */
-    case 0xf8 ... 0xfd: /* SS */
-        op2 = 0;
-        break;
-    default:
-        op2 = (insn << 40) >> 56;
-        break;
-    }
-
-    memset(&s->fields, 0, sizeof(s->fields));
-    s->fields.raw_insn = insn;
-    s->fields.op = op;
-    s->fields.op2 = op2;
-
-    /* Lookup the instruction.  */
-    info = lookup_opc(op << 8 | op2);
-    s->insn = info;
-
-    /* If we found it, extract the operands.  */
-    if (info != NULL) {
-        DisasFormat fmt = info->fmt;
-        int i;
-
-        for (i = 0; i < NUM_C_FIELD; ++i) {
-            extract_field(&s->fields, &format_info[fmt].op[i], insn);
-        }
-    }
-    return info;
-}
-
-static bool is_afp_reg(int reg)
-{
-    return reg % 2 || reg > 6;
-}
-
-static bool is_fp_pair(int reg)
-{
-    /* 0,1,4,5,8,9,12,13: to exclude the others, check for single bit */
-    return !(reg & 0x2);
-}
-
-static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s)
-{
-    const DisasInsn *insn;
-    DisasJumpType ret = DISAS_NEXT;
-    DisasOps o = {};
-    bool icount = false;
-
-    /* Search for the insn in the table.  */
-    insn = extract_insn(env, s);
-
-    /* Emit insn_start now that we know the ILEN.  */
-    tcg_gen_insn_start(s->base.pc_next, s->cc_op, s->ilen);
-
-    /* Not found means unimplemented/illegal opcode.  */
-    if (insn == NULL) {
-        qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%02x%02x\n",
-                      s->fields.op, s->fields.op2);
-        gen_illegal_opcode(s);
-        return DISAS_NORETURN;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    if (s->base.tb->flags & FLAG_MASK_PER) {
-        TCGv_i64 addr = tcg_const_i64(s->base.pc_next);
-        gen_helper_per_ifetch(cpu_env, addr);
-        tcg_temp_free_i64(addr);
-    }
-#endif
-
-    /* process flags */
-    if (insn->flags) {
-        /* privileged instruction */
-        if ((s->base.tb->flags & FLAG_MASK_PSTATE) && (insn->flags & IF_PRIV)) {
-            gen_program_exception(s, PGM_PRIVILEGED);
-            return DISAS_NORETURN;
-        }
-
-        /* if AFP is not enabled, instructions and registers are forbidden */
-        if (!(s->base.tb->flags & FLAG_MASK_AFP)) {
-            uint8_t dxc = 0;
-
-            if ((insn->flags & IF_AFP1) && is_afp_reg(get_field(s, r1))) {
-                dxc = 1;
-            }
-            if ((insn->flags & IF_AFP2) && is_afp_reg(get_field(s, r2))) {
-                dxc = 1;
-            }
-            if ((insn->flags & IF_AFP3) && is_afp_reg(get_field(s, r3))) {
-                dxc = 1;
-            }
-            if (insn->flags & IF_BFP) {
-                dxc = 2;
-            }
-            if (insn->flags & IF_DFP) {
-                dxc = 3;
-            }
-            if (insn->flags & IF_VEC) {
-                dxc = 0xfe;
-            }
-            if (dxc) {
-                gen_data_exception(dxc);
-                return DISAS_NORETURN;
-            }
-        }
-
-        /* if vector instructions not enabled, executing them is forbidden */
-        if (insn->flags & IF_VEC) {
-            if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) {
-                gen_data_exception(0xfe);
-                return DISAS_NORETURN;
-            }
-        }
-
-        /* input/output is the special case for icount mode */
-        if (unlikely(insn->flags & IF_IO)) {
-            icount = tb_cflags(s->base.tb) & CF_USE_ICOUNT;
-            if (icount) {
-                gen_io_start();
-            }
-        }
-    }
-
-    /* Check for insn specification exceptions.  */
-    if (insn->spec) {
-        if ((insn->spec & SPEC_r1_even && get_field(s, r1) & 1) ||
-            (insn->spec & SPEC_r2_even && get_field(s, r2) & 1) ||
-            (insn->spec & SPEC_r3_even && get_field(s, r3) & 1) ||
-            (insn->spec & SPEC_r1_f128 && !is_fp_pair(get_field(s, r1))) ||
-            (insn->spec & SPEC_r2_f128 && !is_fp_pair(get_field(s, r2)))) {
-            gen_program_exception(s, PGM_SPECIFICATION);
-            return DISAS_NORETURN;
-        }
-    }
-
-    /* Implement the instruction.  */
-    if (insn->help_in1) {
-        insn->help_in1(s, &o);
-    }
-    if (insn->help_in2) {
-        insn->help_in2(s, &o);
-    }
-    if (insn->help_prep) {
-        insn->help_prep(s, &o);
-    }
-    if (insn->help_op) {
-        ret = insn->help_op(s, &o);
-    }
-    if (ret != DISAS_NORETURN) {
-        if (insn->help_wout) {
-            insn->help_wout(s, &o);
-        }
-        if (insn->help_cout) {
-            insn->help_cout(s, &o);
-        }
-    }
-
-    /* Free any temporaries created by the helpers.  */
-    if (o.out && !o.g_out) {
-        tcg_temp_free_i64(o.out);
-    }
-    if (o.out2 && !o.g_out2) {
-        tcg_temp_free_i64(o.out2);
-    }
-    if (o.in1 && !o.g_in1) {
-        tcg_temp_free_i64(o.in1);
-    }
-    if (o.in2 && !o.g_in2) {
-        tcg_temp_free_i64(o.in2);
-    }
-    if (o.addr1) {
-        tcg_temp_free_i64(o.addr1);
-    }
-
-    /* io should be the last instruction in tb when icount is enabled */
-    if (unlikely(icount && ret == DISAS_NEXT)) {
-        ret = DISAS_PC_STALE;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    if (s->base.tb->flags & FLAG_MASK_PER) {
-        /* An exception might be triggered, save PSW if not already done.  */
-        if (ret == DISAS_NEXT || ret == DISAS_PC_STALE) {
-            tcg_gen_movi_i64(psw_addr, s->pc_tmp);
-        }
-
-        /* Call the helper to check for a possible PER exception.  */
-        gen_helper_per_check_exception(cpu_env);
-    }
-#endif
-
-    /* Advance to the next instruction.  */
-    s->base.pc_next = s->pc_tmp;
-    return ret;
-}
-
-static void s390x_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    /* 31-bit mode */
-    if (!(dc->base.tb->flags & FLAG_MASK_64)) {
-        dc->base.pc_first &= 0x7fffffff;
-        dc->base.pc_next = dc->base.pc_first;
-    }
-
-    dc->cc_op = CC_OP_DYNAMIC;
-    dc->ex_value = dc->base.tb->cs_base;
-    dc->do_debug = dc->base.singlestep_enabled;
-}
-
-static void s390x_tr_tb_start(DisasContextBase *db, CPUState *cs)
-{
-}
-
-static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
-{
-}
-
-static bool s390x_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    /*
-     * Emit an insn_start to accompany the breakpoint exception.
-     * The ILEN value is a dummy, since this does not result in
-     * an s390x exception, but an internal qemu exception which
-     * brings us back to interact with the gdbstub.
-     */
-    tcg_gen_insn_start(dc->base.pc_next, dc->cc_op, 2);
-
-    dc->base.is_jmp = DISAS_PC_STALE;
-    dc->do_debug = true;
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size does the right thing.  */
-    dc->base.pc_next += 2;
-    return true;
-}
-
-static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
-{
-    CPUS390XState *env = cs->env_ptr;
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    dc->base.is_jmp = translate_one(env, dc);
-    if (dc->base.is_jmp == DISAS_NEXT) {
-        uint64_t page_start;
-
-        page_start = dc->base.pc_first & TARGET_PAGE_MASK;
-        if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
-            dc->base.is_jmp = DISAS_TOO_MANY;
-        }
-    }
-}
-
-static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    switch (dc->base.is_jmp) {
-    case DISAS_GOTO_TB:
-    case DISAS_NORETURN:
-        break;
-    case DISAS_TOO_MANY:
-    case DISAS_PC_STALE:
-    case DISAS_PC_STALE_NOCHAIN:
-        update_psw_addr(dc);
-        /* FALLTHRU */
-    case DISAS_PC_UPDATED:
-        /* Next TB starts off with CC_OP_DYNAMIC, so make sure the
-           cc op type is in env */
-        update_cc_op(dc);
-        /* FALLTHRU */
-    case DISAS_PC_CC_UPDATED:
-        /* Exit the TB, either by raising a debug exception or by return.  */
-        if (dc->do_debug) {
-            gen_exception(EXCP_DEBUG);
-        } else if (use_exit_tb(dc) ||
-                   dc->base.is_jmp == DISAS_PC_STALE_NOCHAIN) {
-            tcg_gen_exit_tb(NULL, 0);
-        } else {
-            tcg_gen_lookup_and_goto_ptr();
-        }
-        break;
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void s390x_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    if (unlikely(dc->ex_value)) {
-        /* ??? Unfortunately log_target_disas can't use host memory.  */
-        qemu_log("IN: EXECUTE %016" PRIx64, dc->ex_value);
-    } else {
-        qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
-        log_target_disas(cs, dc->base.pc_first, dc->base.tb->size);
-    }
-}
-
-static const TranslatorOps s390x_tr_ops = {
-    .init_disas_context = s390x_tr_init_disas_context,
-    .tb_start           = s390x_tr_tb_start,
-    .insn_start         = s390x_tr_insn_start,
-    .breakpoint_check   = s390x_tr_breakpoint_check,
-    .translate_insn     = s390x_tr_translate_insn,
-    .tb_stop            = s390x_tr_tb_stop,
-    .disas_log          = s390x_tr_disas_log,
-};
-
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
-{
-    DisasContext dc;
-
-    translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
-}
-
-void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
-                          target_ulong *data)
-{
-    int cc_op = data[1];
-
-    env->psw.addr = data[0];
-
-    /* Update the CC opcode if it is not already up-to-date.  */
-    if ((cc_op != CC_OP_DYNAMIC) && (cc_op != CC_OP_STATIC)) {
-        env->cc_op = cc_op;
-    }
-
-    /* Record ILEN.  */
-    env->int_pgm_ilen = data[2];
-}
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
deleted file mode 100644
index c1564e819b1..00000000000
--- a/target/s390x/vec_fpu_helper.c
+++ /dev/null
@@ -1,625 +0,0 @@
-/*
- * QEMU TCG support -- s390x vector floating point instruction support
- *
- * Copyright (C) 2019 Red Hat Inc
- *
- * Authors:
- *   David Hildenbrand <david@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "internal.h"
-#include "vec.h"
-#include "tcg_s390x.h"
-#include "tcg/tcg-gvec-desc.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
-#include "fpu/softfloat.h"
-
-#define VIC_INVALID         0x1
-#define VIC_DIVBYZERO       0x2
-#define VIC_OVERFLOW        0x3
-#define VIC_UNDERFLOW       0x4
-#define VIC_INEXACT         0x5
-
-/* returns the VEX. If the VEX is 0, there is no trap */
-static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
-                              uint8_t *vec_exc)
-{
-    uint8_t vece_exc = 0, trap_exc;
-    unsigned qemu_exc;
-
-    /* Retrieve and clear the softfloat exceptions */
-    qemu_exc = env->fpu_status.float_exception_flags;
-    if (qemu_exc == 0) {
-        return 0;
-    }
-    env->fpu_status.float_exception_flags = 0;
-
-    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
-
-    /* Add them to the vector-wide s390x exception bits */
-    *vec_exc |= vece_exc;
-
-    /* Check for traps and construct the VXC */
-    trap_exc = vece_exc & env->fpc >> 24;
-    if (trap_exc) {
-        if (trap_exc & S390_IEEE_MASK_INVALID) {
-            return enr << 4 | VIC_INVALID;
-        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
-            return enr << 4 | VIC_DIVBYZERO;
-        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
-            return enr << 4 | VIC_OVERFLOW;
-        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
-            return enr << 4 | VIC_UNDERFLOW;
-        } else if (!XxC) {
-            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
-            /* inexact has lowest priority on traps */
-            return enr << 4 | VIC_INEXACT;
-        }
-    }
-    return 0;
-}
-
-static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
-                            uintptr_t retaddr)
-{
-    if (vxc) {
-        /* on traps, the fpc flags are not updated, instruction is suppressed */
-        tcg_s390_vector_exception(env, vxc, retaddr);
-    }
-    if (vec_exc) {
-        /* indicate exceptions for all elements combined */
-        env->fpc |= vec_exc << 16;
-    }
-}
-
-typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s);
-static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
-                    uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i, old_mode;
-
-    old_mode = s390_swap_bfp_rounding_mode(env, erm);
-    for (i = 0; i < 2; i++) {
-        const uint64_t a = s390_vec_read_element64(v2, i);
-
-        s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status));
-        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-}
-
-typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s);
-static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-                    CPUS390XState *env, bool s, vop64_3_fn fn,
-                    uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i;
-
-    for (i = 0; i < 2; i++) {
-        const uint64_t a = s390_vec_read_element64(v2, i);
-        const uint64_t b = s390_vec_read_element64(v3, i);
-
-        s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status));
-        vxc = check_ieee_exc(env, i, false, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-}
-
-static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s)
-{
-    return float64_add(a, b, s);
-}
-
-void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3,
-                        CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, false, vfa64, GETPC());
-}
-
-void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, true, vfa64, GETPC());
-}
-
-static int wfc64(const S390Vector *v1, const S390Vector *v2,
-                 CPUS390XState *env, bool signal, uintptr_t retaddr)
-{
-    /* only the zero-indexed elements are compared */
-    const float64 a = s390_vec_read_element64(v1, 0);
-    const float64 b = s390_vec_read_element64(v2, 0);
-    uint8_t vxc, vec_exc = 0;
-    int cmp;
-
-    if (signal) {
-        cmp = float64_compare(a, b, &env->fpu_status);
-    } else {
-        cmp = float64_compare_quiet(a, b, &env->fpu_status);
-    }
-    vxc = check_ieee_exc(env, 0, false, &vec_exc);
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-
-    return float_comp_to_cc(env, cmp);
-}
-
-void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env,
-                        uint32_t desc)
-{
-    env->cc_op = wfc64(v1, v2, env, false, GETPC());
-}
-
-void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env,
-                        uint32_t desc)
-{
-    env->cc_op = wfc64(v1, v2, env, true, GETPC());
-}
-
-typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
-static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int match = 0;
-    int i;
-
-    for (i = 0; i < 2; i++) {
-        const float64 a = s390_vec_read_element64(v2, i);
-        const float64 b = s390_vec_read_element64(v3, i);
-
-        /* swap the order of the parameters, so we can use existing functions */
-        if (fn(b, a, &env->fpu_status)) {
-            match++;
-            s390_vec_write_element64(&tmp, i, -1ull);
-        }
-        vxc = check_ieee_exc(env, i, false, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-    if (match) {
-        return s || match == 2 ? 0 : 1;
-    }
-    return 3;
-}
-
-void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
-}
-
-void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3,
-                          CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
-}
-
-void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3,
-                            CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC());
-}
-
-void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3,
-                            CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC());
-}
-
-void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
-}
-
-void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3,
-                          CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
-}
-
-void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3,
-                            CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC());
-}
-
-void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3,
-                             CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC());
-}
-
-void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3,
-                          CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
-}
-
-void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3,
-                           CPUS390XState *env, uint32_t desc)
-{
-    vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
-}
-
-void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3,
-                             CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC());
-}
-
-void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3,
-                              CPUS390XState *env, uint32_t desc)
-{
-    env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC());
-}
-
-static uint64_t vcdg64(uint64_t a, float_status *s)
-{
-    return int64_to_float64(a, s);
-}
-
-void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC());
-}
-
-void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC());
-}
-
-static uint64_t vcdlg64(uint64_t a, float_status *s)
-{
-    return uint64_to_float64(a, s);
-}
-
-void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC());
-}
-
-void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env,
-                           uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC());
-}
-
-static uint64_t vcgd64(uint64_t a, float_status *s)
-{
-    return float64_to_int64(a, s);
-}
-
-void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC());
-}
-
-void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC());
-}
-
-static uint64_t vclgd64(uint64_t a, float_status *s)
-{
-    return float64_to_uint64(a, s);
-}
-
-void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC());
-}
-
-void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env,
-                           uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC());
-}
-
-static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s)
-{
-    return float64_div(a, b, s);
-}
-
-void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3,
-                        CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, false, vfd64, GETPC());
-}
-
-void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, true, vfd64, GETPC());
-}
-
-static uint64_t vfi64(uint64_t a, float_status *s)
-{
-    return float64_round_to_int(a, s);
-}
-
-void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env,
-                        uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC());
-}
-
-void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC());
-}
-
-static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-                   bool s, uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i;
-
-    for (i = 0; i < 2; i++) {
-        /* load from even element */
-        const float32 a = s390_vec_read_element32(v2, i * 2);
-        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
-
-        s390_vec_write_element64(&tmp, i, ret);
-        /* indicate the source element */
-        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-}
-
-void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    vfll32(v1, v2, env, false, GETPC());
-}
-
-void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    vfll32(v1, v2, env, true, GETPC());
-}
-
-static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-                   bool s, bool XxC, uint8_t erm, uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i, old_mode;
-
-    old_mode = s390_swap_bfp_rounding_mode(env, erm);
-    for (i = 0; i < 2; i++) {
-        float64 a = s390_vec_read_element64(v2, i);
-        uint32_t ret = float64_to_float32(a, &env->fpu_status);
-
-        /* place at even element */
-        s390_vec_write_element32(&tmp, i * 2, ret);
-        /* indicate the source element */
-        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    s390_restore_bfp_rounding_mode(env, old_mode);
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-}
-
-void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vflr64(v1, v2, env, false, XxC, erm, GETPC());
-}
-
-void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    const uint8_t erm = extract32(simd_data(desc), 4, 4);
-    const bool XxC = extract32(simd_data(desc), 2, 1);
-
-    vflr64(v1, v2, env, true, XxC, erm, GETPC());
-}
-
-static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s)
-{
-    return float64_mul(a, b, s);
-}
-
-void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3,
-                        CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, false, vfm64, GETPC());
-}
-
-void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, true, vfm64, GETPC());
-}
-
-static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
-                   uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i;
-
-    for (i = 0; i < 2; i++) {
-        const uint64_t a = s390_vec_read_element64(v2, i);
-        const uint64_t b = s390_vec_read_element64(v3, i);
-        const uint64_t c = s390_vec_read_element64(v4, i);
-        uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
-
-        s390_vec_write_element64(&tmp, i, ret);
-        vxc = check_ieee_exc(env, i, false, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
-}
-
-void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
-}
-
-void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
-}
-
-void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
-}
-
-void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
-}
-
-static uint64_t vfsq64(uint64_t a, float_status *s)
-{
-    return float64_sqrt(a, s);
-}
-
-void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env,
-                         uint32_t desc)
-{
-    vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC());
-}
-
-void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC());
-}
-
-static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s)
-{
-    return float64_sub(a, b, s);
-}
-
-void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3,
-                        CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, false, vfs64, GETPC());
-}
-
-void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3,
-                         CPUS390XState *env, uint32_t desc)
-{
-    vop64_3(v1, v2, v3, env, true, vfs64, GETPC());
-}
-
-static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
-                   bool s, uint16_t i3)
-{
-    int i, match = 0;
-
-    for (i = 0; i < 2; i++) {
-        float64 a = s390_vec_read_element64(v2, i);
-
-        if (float64_dcmask(env, a) & i3) {
-            match++;
-            s390_vec_write_element64(v1, i, -1ull);
-        } else {
-            s390_vec_write_element64(v1, i, 0);
-        }
-        if (s) {
-            break;
-        }
-    }
-
-    if (match) {
-        return s || match == 2 ? 0 : 1;
-    }
-    return 3;
-}
-
-void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
-                          uint32_t desc)
-{
-    env->cc_op = vftci64(v1, v2, env, false, simd_data(desc));
-}
-
-void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env,
-                           uint32_t desc)
-{
-    env->cc_op = vftci64(v1, v2, env, true, simd_data(desc));
-}
diff --git a/target/sh4/Kconfig b/target/sh4/Kconfig
new file mode 100644
index 00000000000..2397c860280
--- /dev/null
+++ b/target/sh4/Kconfig
@@ -0,0 +1,2 @@
+config SH4
+    bool
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index ac65c88f1f8..06b2691dc41 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -218,20 +218,28 @@ static void superh_cpu_initfn(Object *obj)
     env->movcal_backup_tail = &(env->movcal_backup);
 }
 
+#ifndef CONFIG_USER_ONLY
 static const VMStateDescription vmstate_sh_cpu = {
     .name = "cpu",
     .unmigratable = 1,
 };
 
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps sh4_sysemu_ops = {
+    .get_phys_page_debug = superh_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps superh_tcg_ops = {
+static const struct TCGCPUOps superh_tcg_ops = {
     .initialize = sh4_translate_init,
     .synchronize_from_tb = superh_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = superh_cpu_exec_interrupt,
-    .tlb_fill = superh_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = superh_cpu_tlb_fill,
+    .cpu_exec_interrupt = superh_cpu_exec_interrupt,
     .do_interrupt = superh_cpu_do_interrupt,
     .do_unaligned_access = superh_cpu_do_unaligned_access,
     .io_recompile_replay_branch = superh_io_recompile_replay_branch,
@@ -256,13 +264,12 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = superh_cpu_gdb_read_register;
     cc->gdb_write_register = superh_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = superh_cpu_get_phys_page_debug;
+    cc->sysemu_ops = &sh4_sysemu_ops;
+    dc->vmsd = &vmstate_sh_cpu;
 #endif
     cc->disas_set_info = superh_cpu_disas_set_info;
 
     cc->gdb_num_core_regs = 59;
-
-    dc->vmsd = &vmstate_sh_cpu;
     cc->tcg_ops = &superh_tcg_ops;
 }
 
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 01c43440822..4cfb109f566 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -204,25 +204,23 @@ struct SuperHCPU {
 };
 
 
-void superh_cpu_do_interrupt(CPUState *cpu);
-bool superh_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void superh_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr superh_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int superh_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int superh_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void superh_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
-                                    MMUAccessType access_type,
-                                    int mmu_idx, uintptr_t retaddr);
+                                    MMUAccessType access_type, int mmu_idx,
+                                    uintptr_t retaddr) QEMU_NORETURN;
 
 void sh4_translate_init(void);
-int cpu_sh4_signal_handler(int host_signum, void *pinfo,
-                           void *puc);
+void sh4_cpu_list(void);
+
+#if !defined(CONFIG_USER_ONLY)
 bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                          MMUAccessType access_type, int mmu_idx,
                          bool probe, uintptr_t retaddr);
-
-void sh4_cpu_list(void);
-#if !defined(CONFIG_USER_ONLY)
+void superh_cpu_do_interrupt(CPUState *cpu);
+bool superh_cpu_exec_interrupt(CPUState *cpu, int int_req);
 void cpu_sh4_invalidate_tlb(CPUSH4State *s);
 uint32_t cpu_sh4_read_mmaped_itlb_addr(CPUSH4State *s,
                                        hwaddr addr);
@@ -250,7 +248,6 @@ void cpu_load_tlb(CPUSH4State * env);
 #define SUPERH_CPU_TYPE_NAME(model) model SUPERH_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_SUPERH_CPU
 
-#define cpu_signal_handler cpu_sh4_signal_handler
 #define cpu_list sh4_cpu_list
 
 /* MMU modes definitions */
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index bd8e034f174..6a620e36fc3 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -45,11 +45,6 @@
 
 #if defined(CONFIG_USER_ONLY)
 
-void superh_cpu_do_interrupt(CPUState *cs)
-{
-    cs->exception_index = -1;
-}
-
 int cpu_sh4_is_cached(CPUSH4State *env, target_ulong addr)
 {
     /* For user mode, only U0 area is cacheable. */
@@ -441,9 +436,12 @@ hwaddr superh_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     target_ulong physical;
     int prot;
 
-    get_physical_address(&cpu->env, &physical, &prot, addr, MMU_DATA_LOAD);
+    if (get_physical_address(&cpu->env, &physical, &prot, addr, MMU_DATA_LOAD)
+            == MMU_OK) {
+        return physical;
+    }
 
-    return physical;
+    return -1;
 }
 
 void cpu_load_tlb(CPUSH4State * env)
@@ -781,8 +779,6 @@ int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr)
     return 0;
 }
 
-#endif
-
 bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     if (interrupt_request & CPU_INTERRUPT_HARD) {
@@ -808,11 +804,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     CPUSH4State *env = &cpu->env;
     int ret;
 
-#ifdef CONFIG_USER_ONLY
-    ret = (access_type == MMU_DATA_STORE ? MMU_DTLB_VIOLATION_WRITE :
-           access_type == MMU_INST_FETCH ? MMU_ITLB_VIOLATION :
-           MMU_DTLB_VIOLATION_READ);
-#else
     target_ulong physical;
     int prot;
 
@@ -831,7 +822,6 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     if (ret != MMU_DTLB_MULTIPLE && ret != MMU_ITLB_MULTIPLE) {
         env->pteh = (env->pteh & PTEH_ASID_MASK) | (address & PTEH_VPN_MASK);
     }
-#endif
 
     env->tea = address;
     switch (ret) {
@@ -870,3 +860,4 @@ bool superh_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
     }
     cpu_loop_exit_restore(cs, retaddr);
 }
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index 1e768fcbc77..8d792f6b553 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -3,7 +3,6 @@ DEF_HELPER_1(raise_illegal_instruction, noreturn, env)
 DEF_HELPER_1(raise_slot_illegal_instruction, noreturn, env)
 DEF_HELPER_1(raise_fpu_disable, noreturn, env)
 DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env)
-DEF_HELPER_1(debug, noreturn, env)
 DEF_HELPER_1(sleep, noreturn, env)
 DEF_HELPER_2(trapa, noreturn, env, i32)
 DEF_HELPER_1(exclusive, noreturn, env)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index c0cbb953828..752669825f0 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -29,6 +29,9 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                     MMUAccessType access_type,
                                     int mmu_idx, uintptr_t retaddr)
 {
+    CPUSH4State *env = cs->env_ptr;
+
+    env->tea = addr;
     switch (access_type) {
     case MMU_INST_FETCH:
     case MMU_DATA_LOAD:
@@ -37,6 +40,8 @@ void superh_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     case MMU_DATA_STORE:
         cs->exception_index = 0x100;
         break;
+    default:
+        g_assert_not_reached();
     }
     cpu_loop_exit_restore(cs, retaddr);
 }
@@ -81,11 +86,6 @@ void helper_raise_slot_fpu_disable(CPUSH4State *env)
     raise_exception(env, 0x820, 0);
 }
 
-void helper_debug(CPUSH4State *env)
-{
-    raise_exception(env, EXCP_DEBUG, 0);
-}
-
 void helper_sleep(CPUSH4State *env)
 {
     CPUState *cs = env_cpu(env);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 93127906237..ce5d674a520 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -28,7 +28,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
-#include "trace-tcg.h"
 #include "exec/log.h"
 #include "qemu/qemu-print.h"
 
@@ -225,17 +224,12 @@ static inline bool use_exit_tb(DisasContext *ctx)
     return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
 }
 
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
-    /* Use a direct jump if in same page and singlestep not enabled */
-    if (unlikely(ctx->base.singlestep_enabled || use_exit_tb(ctx))) {
+    if (use_exit_tb(ctx)) {
         return false;
     }
-#ifndef CONFIG_USER_ONLY
-    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    return translator_use_goto_tb(&ctx->base, dest);
 }
 
 static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
@@ -246,9 +240,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
         tcg_gen_exit_tb(ctx->base.tb, n);
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
-        if (ctx->base.singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        } else if (use_exit_tb(ctx)) {
+        if (use_exit_tb(ctx)) {
             tcg_gen_exit_tb(NULL, 0);
         } else {
             tcg_gen_lookup_and_goto_ptr();
@@ -264,9 +256,7 @@ static void gen_jump(DisasContext * ctx)
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
         tcg_gen_discard_i32(cpu_delayed_pc);
-        if (ctx->base.singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        } else if (use_exit_tb(ctx)) {
+        if (use_exit_tb(ctx)) {
             tcg_gen_exit_tb(NULL, 0);
         } else {
             tcg_gen_lookup_and_goto_ptr();
@@ -676,8 +666,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x6008:		/* swap.b Rm,Rn */
 	{
             TCGv low = tcg_temp_new();
-	    tcg_gen_ext16u_i32(low, REG(B7_4));
-	    tcg_gen_bswap16_i32(low, low);
+            tcg_gen_bswap16_i32(low, REG(B7_4), 0);
             tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
 	    tcg_temp_free(low);
 	}
@@ -1914,7 +1903,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
 
     /* Read all of the insns for the region.  */
     for (i = 0; i < max_insns; ++i) {
-        insns[i] = translator_lduw(env, pc + i * 2);
+        insns[i] = translator_lduw(env, &ctx->base, pc + i * 2);
     }
 
     ld_adr = ld_dst = ld_mop = -1;
@@ -2296,23 +2285,6 @@ static void sh4_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     tcg_gen_insn_start(ctx->base.pc_next, ctx->envflags);
 }
 
-static bool sh4_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                    const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
-    /* We have hit a breakpoint - make sure PC is up-to-date */
-    gen_save_cpu_state(ctx, true);
-    gen_helper_debug(cpu_env);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size below does the right thing.  */
-    ctx->base.pc_next += 2;
-    return true;
-}
-
 static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     CPUSH4State *env = cs->env_ptr;
@@ -2331,7 +2303,7 @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     }
 #endif
 
-    ctx->opcode = translator_lduw(env, ctx->base.pc_next);
+    ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
     decode_opc(ctx);
     ctx->base.pc_next += 2;
 }
@@ -2348,11 +2320,7 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     switch (ctx->base.is_jmp) {
     case DISAS_STOP:
         gen_save_cpu_state(ctx, true);
-        if (ctx->base.singlestep_enabled) {
-            gen_helper_debug(cpu_env);
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
         break;
     case DISAS_NEXT:
     case DISAS_TOO_MANY:
@@ -2368,7 +2336,7 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 
 static void sh4_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
 {
-    qemu_log("IN:\n");  /* , lookup_symbol(dcbase->pc_first)); */
+    qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
     log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
 }
 
@@ -2376,7 +2344,6 @@ static const TranslatorOps sh4_tr_ops = {
     .init_disas_context = sh4_tr_init_disas_context,
     .tb_start           = sh4_tr_tb_start,
     .insn_start         = sh4_tr_insn_start,
-    .breakpoint_check   = sh4_tr_breakpoint_check,
     .translate_insn     = sh4_tr_translate_insn,
     .tb_stop            = sh4_tr_tb_stop,
     .disas_log          = sh4_tr_disas_log,
diff --git a/target/sparc/Kconfig b/target/sparc/Kconfig
new file mode 100644
index 00000000000..70cc0f3a210
--- /dev/null
+++ b/target/sparc/Kconfig
@@ -0,0 +1,5 @@
+config SPARC
+    bool
+
+config SPARC64
+    bool
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index aece2c7dc83..55268ed2a19 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -77,6 +77,7 @@ static void sparc_cpu_reset(DeviceState *dev)
     env->cache_control = 0;
 }
 
+#ifndef CONFIG_USER_ONLY
 static bool sparc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     if (interrupt_request & CPU_INTERRUPT_HARD) {
@@ -96,6 +97,7 @@ static bool sparc_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     }
     return false;
 }
+#endif /* !CONFIG_USER_ONLY */
 
 static void cpu_sparc_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
@@ -610,7 +612,7 @@ static void cpu_print_cc(FILE *f, uint32_t cc)
 #define REGS_PER_LINE 8
 #endif
 
-void sparc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+static void sparc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
     CPUSPARCState *env = &cpu->env;
@@ -848,16 +850,25 @@ static Property sparc_cpu_properties[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps sparc_sysemu_ops = {
+    .get_phys_page_debug = sparc_cpu_get_phys_page_debug,
+    .legacy_vmsd = &vmstate_sparc_cpu,
+};
+#endif
+
 #ifdef CONFIG_TCG
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps sparc_tcg_ops = {
+static const struct TCGCPUOps sparc_tcg_ops = {
     .initialize = sparc_tcg_init,
     .synchronize_from_tb = sparc_cpu_synchronize_from_tb,
-    .cpu_exec_interrupt = sparc_cpu_exec_interrupt,
-    .tlb_fill = sparc_cpu_tlb_fill,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = sparc_cpu_tlb_fill,
+    .cpu_exec_interrupt = sparc_cpu_exec_interrupt,
     .do_interrupt = sparc_cpu_do_interrupt,
     .do_transaction_failed = sparc_cpu_do_transaction_failed,
     .do_unaligned_access = sparc_cpu_do_unaligned_access,
@@ -888,8 +899,7 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = sparc_cpu_gdb_read_register;
     cc->gdb_write_register = sparc_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = sparc_cpu_get_phys_page_debug;
-    cc->vmsd = &vmstate_sparc_cpu;
+    cc->sysemu_ops = &sparc_sysemu_ops;
 #endif
     cc->disas_set_info = cpu_sparc_disas_set_info;
 
diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h
index 4b2290650be..5a7f1ed5d61 100644
--- a/target/sparc/cpu.h
+++ b/target/sparc/cpu.h
@@ -571,7 +571,6 @@ extern const VMStateDescription vmstate_sparc_cpu;
 #endif
 
 void sparc_cpu_do_interrupt(CPUState *cpu);
-void sparc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr sparc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int sparc_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int sparc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
@@ -615,15 +614,9 @@ int cpu_cwp_inc(CPUSPARCState *env1, int cwp);
 int cpu_cwp_dec(CPUSPARCState *env1, int cwp);
 void cpu_set_cwp(CPUSPARCState *env1, int new_cwp);
 
-/* int_helper.c */
-void leon3_irq_manager(CPUSPARCState *env, void *irq_manager, int intno);
-
 /* sun4m.c, sun4u.c */
 void cpu_check_irqs(CPUSPARCState *env);
 
-/* leon3.c */
-void leon3_irq_ack(void *irq_manager, int intno);
-
 #if defined (TARGET_SPARC64)
 
 static inline int compare_masked(uint64_t x, uint64_t y, uint64_t mask)
@@ -655,13 +648,11 @@ hwaddr cpu_get_phys_page_nofault(CPUSPARCState *env, target_ulong addr,
                                            int mmu_idx);
 #endif
 #endif
-int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 
 #define SPARC_CPU_TYPE_SUFFIX "-" TYPE_SPARC_CPU
 #define SPARC_CPU_TYPE_NAME(model) model SPARC_CPU_TYPE_SUFFIX
 #define CPU_RESOLVING_TYPE TYPE_SPARC_CPU
 
-#define cpu_signal_handler cpu_sparc_signal_handler
 #define cpu_list sparc_cpu_list
 
 /* MMU modes definitions */
diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c
index 817a463a179..82e8418e465 100644
--- a/target/sparc/int32_helper.c
+++ b/target/sparc/int32_helper.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "trace.h"
 #include "exec/log.h"
@@ -64,6 +65,38 @@ static const char *excp_name_str(int32_t exception_index)
     return excp_names[exception_index];
 }
 
+void cpu_check_irqs(CPUSPARCState *env)
+{
+    CPUState *cs;
+
+    /* We should be holding the BQL before we mess with IRQs */
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (env->pil_in && (env->interrupt_index == 0 ||
+                        (env->interrupt_index & ~15) == TT_EXTINT)) {
+        unsigned int i;
+
+        for (i = 15; i > 0; i--) {
+            if (env->pil_in & (1 << i)) {
+                int old_interrupt = env->interrupt_index;
+
+                env->interrupt_index = TT_EXTINT | i;
+                if (old_interrupt != env->interrupt_index) {
+                    cs = env_cpu(env);
+                    trace_sun4m_cpu_interrupt(i);
+                    cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+                }
+                break;
+            }
+        }
+    } else if (!env->pil_in && (env->interrupt_index & ~15) == TT_EXTINT) {
+        cs = env_cpu(env);
+        trace_sun4m_cpu_reset_interrupt(env->interrupt_index & 15);
+        env->interrupt_index = 0;
+        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+    }
+}
+
 void sparc_cpu_do_interrupt(CPUState *cs)
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
@@ -136,40 +169,3 @@ void sparc_cpu_do_interrupt(CPUState *cs)
     }
 #endif
 }
-
-#if !defined(CONFIG_USER_ONLY)
-static void leon3_cache_control_int(CPUSPARCState *env)
-{
-    uint32_t state = 0;
-
-    if (env->cache_control & CACHE_CTRL_IF) {
-        /* Instruction cache state */
-        state = env->cache_control & CACHE_STATE_MASK;
-        if (state == CACHE_ENABLED) {
-            state = CACHE_FROZEN;
-            trace_int_helper_icache_freeze();
-        }
-
-        env->cache_control &= ~CACHE_STATE_MASK;
-        env->cache_control |= state;
-    }
-
-    if (env->cache_control & CACHE_CTRL_DF) {
-        /* Data cache state */
-        state = (env->cache_control >> 2) & CACHE_STATE_MASK;
-        if (state == CACHE_ENABLED) {
-            state = CACHE_FROZEN;
-            trace_int_helper_dcache_freeze();
-        }
-
-        env->cache_control &= ~(CACHE_STATE_MASK << 2);
-        env->cache_control |= (state << 2);
-    }
-}
-
-void leon3_irq_manager(CPUSPARCState *env, void *irq_manager, int intno)
-{
-    leon3_irq_ack(irq_manager, intno);
-    leon3_cache_control_int(env);
-}
-#endif
diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c
index 7fb8ab211ca..793e57c536d 100644
--- a/target/sparc/int64_helper.c
+++ b/target/sparc/int64_helper.c
@@ -62,6 +62,72 @@ static const char * const excp_names[0x80] = {
 };
 #endif
 
+void cpu_check_irqs(CPUSPARCState *env)
+{
+    CPUState *cs;
+    uint32_t pil = env->pil_in |
+                  (env->softint & ~(SOFTINT_TIMER | SOFTINT_STIMER));
+
+    /* We should be holding the BQL before we mess with IRQs */
+    g_assert(qemu_mutex_iothread_locked());
+
+    /* TT_IVEC has a higher priority (16) than TT_EXTINT (31..17) */
+    if (env->ivec_status & 0x20) {
+        return;
+    }
+    cs = env_cpu(env);
+    /*
+     * check if TM or SM in SOFTINT are set
+     * setting these also causes interrupt 14
+     */
+    if (env->softint & (SOFTINT_TIMER | SOFTINT_STIMER)) {
+        pil |= 1 << 14;
+    }
+
+    /*
+     * The bit corresponding to psrpil is (1<< psrpil),
+     * the next bit is (2 << psrpil).
+     */
+    if (pil < (2 << env->psrpil)) {
+        if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+            trace_sparc64_cpu_check_irqs_reset_irq(env->interrupt_index);
+            env->interrupt_index = 0;
+            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+        }
+        return;
+    }
+
+    if (cpu_interrupts_enabled(env)) {
+
+        unsigned int i;
+
+        for (i = 15; i > env->psrpil; i--) {
+            if (pil & (1 << i)) {
+                int old_interrupt = env->interrupt_index;
+                int new_interrupt = TT_EXTINT | i;
+
+                if (unlikely(env->tl > 0 && cpu_tsptr(env)->tt > new_interrupt
+                  && ((cpu_tsptr(env)->tt & 0x1f0) == TT_EXTINT))) {
+                    trace_sparc64_cpu_check_irqs_noset_irq(env->tl,
+                                                      cpu_tsptr(env)->tt,
+                                                      new_interrupt);
+                } else if (old_interrupt != new_interrupt) {
+                    env->interrupt_index = new_interrupt;
+                    trace_sparc64_cpu_check_irqs_set_irq(i, old_interrupt,
+                                                         new_interrupt);
+                    cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+                }
+                break;
+            }
+        }
+    } else if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+        trace_sparc64_cpu_check_irqs_disabled(pil, env->pil_in, env->softint,
+                                              env->interrupt_index);
+        env->interrupt_index = 0;
+        cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+    }
+}
+
 void sparc_cpu_do_interrupt(CPUState *cs)
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 22327d7d725..a3e1cf9b6e9 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -27,7 +27,6 @@
 
 //#define DEBUG_MMU
 //#define DEBUG_MXCC
-//#define DEBUG_UNALIGNED
 //#define DEBUG_UNASSIGNED
 //#define DEBUG_ASI
 //#define DEBUG_CACHE_CONTROL
@@ -364,10 +363,6 @@ static void do_check_align(CPUSPARCState *env, target_ulong addr,
                            uint32_t align, uintptr_t ra)
 {
     if (addr & align) {
-#ifdef DEBUG_UNALIGNED
-        printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
-               "\n", addr, env->pc);
-#endif
         cpu_raise_exception_ra(env, TT_UNALIGNED, ra);
     }
 }
@@ -1318,7 +1313,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     case ASI_SNF:
     case ASI_SNFL:
         {
-            TCGMemOpIdx oi;
+            MemOpIdx oi;
             int idx = (env->pstate & PS_PRIV
                        ? (asi & 1 ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX)
                        : (asi & 1 ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX));
@@ -1333,27 +1328,27 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
             oi = make_memop_idx(memop, idx);
             switch (size) {
             case 1:
-                ret = helper_ret_ldub_mmu(env, addr, oi, GETPC());
+                ret = cpu_ldb_mmu(env, addr, oi, GETPC());
                 break;
             case 2:
                 if (asi & 8) {
-                    ret = helper_le_lduw_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldw_le_mmu(env, addr, oi, GETPC());
                 } else {
-                    ret = helper_be_lduw_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldw_be_mmu(env, addr, oi, GETPC());
                 }
                 break;
             case 4:
                 if (asi & 8) {
-                    ret = helper_le_ldul_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldl_le_mmu(env, addr, oi, GETPC());
                 } else {
-                    ret = helper_be_ldul_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldl_be_mmu(env, addr, oi, GETPC());
                 }
                 break;
             case 8:
                 if (asi & 8) {
-                    ret = helper_le_ldq_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldq_le_mmu(env, addr, oi, GETPC());
                 } else {
-                    ret = helper_be_ldq_mmu(env, addr, oi, GETPC());
+                    ret = cpu_ldq_be_mmu(env, addr, oi, GETPC());
                 }
                 break;
             default:
@@ -1958,20 +1953,3 @@ void sparc_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
                           is_asi, size, retaddr);
 }
 #endif
-
-#if !defined(CONFIG_USER_ONLY)
-void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
-                                                 MMUAccessType access_type,
-                                                 int mmu_idx,
-                                                 uintptr_t retaddr)
-{
-    SPARCCPU *cpu = SPARC_CPU(cs);
-    CPUSPARCState *env = &cpu->env;
-
-#ifdef DEBUG_UNALIGNED
-    printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
-           "\n", addr, env->pc);
-#endif
-    cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
-}
-#endif
diff --git a/target/sparc/meson.build b/target/sparc/meson.build
index a3638b9503f..a801802ee28 100644
--- a/target/sparc/meson.build
+++ b/target/sparc/meson.build
@@ -6,7 +6,6 @@ sparc_ss.add(files(
   'gdbstub.c',
   'helper.c',
   'ldst_helper.c',
-  'mmu_helper.c',
   'translate.c',
   'win_helper.c',
 ))
@@ -16,6 +15,7 @@ sparc_ss.add(when: 'TARGET_SPARC64', if_true: files('int64_helper.c', 'vis_helpe
 sparc_softmmu_ss = ss.source_set()
 sparc_softmmu_ss.add(files(
   'machine.c',
+  'mmu_helper.c',
   'monitor.c',
 ))
 
diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c
index a44473a1c7c..f2668389b07 100644
--- a/target/sparc/mmu_helper.c
+++ b/target/sparc/mmu_helper.c
@@ -25,30 +25,6 @@
 
 /* Sparc MMU emulation */
 
-#if defined(CONFIG_USER_ONLY)
-
-bool sparc_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                        MMUAccessType access_type, int mmu_idx,
-                        bool probe, uintptr_t retaddr)
-{
-    SPARCCPU *cpu = SPARC_CPU(cs);
-    CPUSPARCState *env = &cpu->env;
-
-    if (access_type == MMU_INST_FETCH) {
-        cs->exception_index = TT_TFAULT;
-    } else {
-        cs->exception_index = TT_DFAULT;
-#ifdef TARGET_SPARC64
-        env->dmmu.mmuregs[4] = address;
-#else
-        env->mmuregs[4] = address;
-#endif
-    }
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else
-
 #ifndef TARGET_SPARC64
 /*
  * Sparc V8 Reference MMU (SRMMU)
@@ -526,16 +502,60 @@ static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
     return 0;
 }
 
+static uint64_t build_sfsr(CPUSPARCState *env, int mmu_idx, int rw)
+{
+    uint64_t sfsr = SFSR_VALID_BIT;
+
+    switch (mmu_idx) {
+    case MMU_PHYS_IDX:
+        sfsr |= SFSR_CT_NOTRANS;
+        break;
+    case MMU_USER_IDX:
+    case MMU_KERNEL_IDX:
+        sfsr |= SFSR_CT_PRIMARY;
+        break;
+    case MMU_USER_SECONDARY_IDX:
+    case MMU_KERNEL_SECONDARY_IDX:
+        sfsr |= SFSR_CT_SECONDARY;
+        break;
+    case MMU_NUCLEUS_IDX:
+        sfsr |= SFSR_CT_NUCLEUS;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (rw == 1) {
+        sfsr |= SFSR_WRITE_BIT;
+    } else if (rw == 4) {
+        sfsr |= SFSR_NF_BIT;
+    }
+
+    if (env->pstate & PS_PRIV) {
+        sfsr |= SFSR_PR_BIT;
+    }
+
+    if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */
+        sfsr |= SFSR_OW_BIT; /* overflow (not read before another fault) */
+    }
+
+    /* FIXME: ASI field in SFSR must be set */
+
+    return sfsr;
+}
+
 static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
                                      int *prot, MemTxAttrs *attrs,
                                      target_ulong address, int rw, int mmu_idx)
 {
     CPUState *cs = env_cpu(env);
     unsigned int i;
+    uint64_t sfsr;
     uint64_t context;
-    uint64_t sfsr = 0;
     bool is_user = false;
 
+    sfsr = build_sfsr(env, mmu_idx, rw);
+
     switch (mmu_idx) {
     case MMU_PHYS_IDX:
         g_assert_not_reached();
@@ -544,29 +564,18 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
         /* fallthru */
     case MMU_KERNEL_IDX:
         context = env->dmmu.mmu_primary_context & 0x1fff;
-        sfsr |= SFSR_CT_PRIMARY;
         break;
     case MMU_USER_SECONDARY_IDX:
         is_user = true;
         /* fallthru */
     case MMU_KERNEL_SECONDARY_IDX:
         context = env->dmmu.mmu_secondary_context & 0x1fff;
-        sfsr |= SFSR_CT_SECONDARY;
         break;
-    case MMU_NUCLEUS_IDX:
-        sfsr |= SFSR_CT_NUCLEUS;
-        /* FALLTHRU */
     default:
         context = 0;
         break;
     }
 
-    if (rw == 1) {
-        sfsr |= SFSR_WRITE_BIT;
-    } else if (rw == 4) {
-        sfsr |= SFSR_NF_BIT;
-    }
-
     for (i = 0; i < 64; i++) {
         /* ctx match, vaddr match, valid? */
         if (ultrasparc_tag_match(&env->dtlb[i], address, context, physical)) {
@@ -616,22 +625,9 @@ static int get_physical_address_data(CPUSPARCState *env, hwaddr *physical,
                 return 0;
             }
 
-            if (env->dmmu.sfsr & SFSR_VALID_BIT) { /* Fault status register */
-                sfsr |= SFSR_OW_BIT; /* overflow (not read before
-                                        another fault) */
-            }
-
-            if (env->pstate & PS_PRIV) {
-                sfsr |= SFSR_PR_BIT;
-            }
-
-            /* FIXME: ASI field in SFSR must be set */
-            env->dmmu.sfsr = sfsr | SFSR_VALID_BIT;
-
+            env->dmmu.sfsr = sfsr;
             env->dmmu.sfar = address; /* Fault address register */
-
             env->dmmu.tag_access = (address & ~0x1fffULL) | context;
-
             return 1;
         }
     }
@@ -926,4 +922,23 @@ hwaddr sparc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     }
     return phys_addr;
 }
+
+#ifndef CONFIG_USER_ONLY
+void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
+                                                 MMUAccessType access_type,
+                                                 int mmu_idx,
+                                                 uintptr_t retaddr)
+{
+    SPARCCPU *cpu = SPARC_CPU(cs);
+    CPUSPARCState *env = &cpu->env;
+
+#ifdef TARGET_SPARC64
+    env->dmmu.sfsr = build_sfsr(env, mmu_idx, access_type);
+    env->dmmu.sfar = addr;
+#else
+    env->mmuregs[4] = addr;
 #endif
+
+    cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
+}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/sparc/trace-events b/target/sparc/trace-events
index 6a064e23275..de9833283d0 100644
--- a/target/sparc/trace-events
+++ b/target/sparc/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # mmu_helper.c
 mmu_helper_dfault(uint64_t address, uint64_t context, int mmu_idx, uint32_t tl) "DFAULT at 0x%"PRIx64" context 0x%"PRIx64" mmu_idx=%d tl=%d"
@@ -10,14 +10,18 @@ mmu_helper_get_phys_addr_code(uint32_t tl, int mmu_idx, uint64_t prim_context, u
 mmu_helper_get_phys_addr_data(uint32_t tl, int mmu_idx, uint64_t prim_context, uint64_t sec_context, uint64_t address) "tl=%d mmu_idx=%d primary context=0x%"PRIx64" secondary context=0x%"PRIx64" address=0x%"PRIx64
 mmu_helper_mmu_fault(uint64_t address, uint64_t paddr, int mmu_idx, uint32_t tl, uint64_t prim_context, uint64_t sec_context) "Translate at 0x%"PRIx64" -> 0x%"PRIx64", mmu_idx=%d tl=%d primary context=0x%"PRIx64" secondary context=0x%"PRIx64
 
+# int32_helper.c
+sun4m_cpu_interrupt(unsigned int level) "Set CPU IRQ %d"
+sun4m_cpu_reset_interrupt(unsigned int level) "Reset CPU IRQ %d"
+
 # int64_helper.c
 int_helper_set_softint(uint32_t softint) "new 0x%08x"
 int_helper_clear_softint(uint32_t softint) "new 0x%08x"
 int_helper_write_softint(uint32_t softint) "new 0x%08x"
-
-# int32_helper.c
-int_helper_icache_freeze(void) "Instruction cache: freeze"
-int_helper_dcache_freeze(void) "Data cache: freeze"
+sparc64_cpu_check_irqs_reset_irq(int intno) "Reset CPU IRQ (current interrupt 0x%x)"
+sparc64_cpu_check_irqs_noset_irq(uint32_t tl, uint32_t tt, int intno) "Not setting CPU IRQ: TL=%d current 0x%x >= pending 0x%x"
+sparc64_cpu_check_irqs_set_irq(unsigned int i, int old, int new) "Set CPU IRQ %d old=0x%x new=0x%x"
+sparc64_cpu_check_irqs_disabled(uint32_t pil, uint32_t pil_in, uint32_t softint, int intno) "Interrupts disabled, pil=0x%08x pil_in=0x%08x softint=0x%08x current interrupt 0x%x"
 
 # win_helper.c
 win_helper_gregset_error(uint32_t pstate) "ERROR in get_gregset: active pstate bits=0x%x"
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 4bfa3179f8e..fdb8bbe5dcf 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -29,7 +29,6 @@
 
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/translator.h"
 #include "exec/log.h"
 #include "asi.h"
@@ -339,23 +338,14 @@ static inline TCGv gen_dest_gpr(DisasContext *dc, int reg)
     }
 }
 
-static inline bool use_goto_tb(DisasContext *s, target_ulong pc,
-                               target_ulong npc)
+static bool use_goto_tb(DisasContext *s, target_ulong pc, target_ulong npc)
 {
-    if (unlikely(s->base.singlestep_enabled || singlestep)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) &&
-           (npc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
+    return translator_use_goto_tb(&s->base, pc) &&
+           translator_use_goto_tb(&s->base, npc);
 }
 
-static inline void gen_goto_tb(DisasContext *s, int tb_num,
-                               target_ulong pc, target_ulong npc)
+static void gen_goto_tb(DisasContext *s, int tb_num,
+                        target_ulong pc, target_ulong npc)
 {
     if (use_goto_tb(s, pc, npc))  {
         /* jump to same page: we can use a direct jump */
@@ -3411,7 +3401,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_temp_free_i32(r_const);
                         gen_store_gpr(dc, rd, cpu_dst);
                         if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                            gen_io_end();
+                            /* I/O operations in icount mode must end the TB */
+                            dc->base.is_jmp = DISAS_EXIT;
                         }
                     }
                     break;
@@ -3464,7 +3455,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_temp_free_i32(r_const);
                         gen_store_gpr(dc, rd, cpu_dst);
                         if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                            gen_io_end();
+                            /* I/O operations in icount mode must end the TB */
+                            dc->base.is_jmp = DISAS_EXIT;
                         }
                     }
                     break;
@@ -3598,7 +3590,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         tcg_temp_free_ptr(r_tickptr);
                         tcg_temp_free_i32(r_const);
                         if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                            gen_io_end();
+                            /* I/O operations in icount mode must end the TB */
+                            dc->base.is_jmp = DISAS_EXIT;
                         }
                     }
                     break;
@@ -4592,7 +4585,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 }
                                 gen_helper_wrpstate(cpu_env, cpu_tmp0);
                                 if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                                    gen_io_end();
+                                    /* I/O ops in icount mode must end the TB */
+                                    dc->base.is_jmp = DISAS_EXIT;
                                 }
                                 dc->npc = DYNAMIC_PC;
                                 break;
@@ -4608,7 +4602,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 }
                                 gen_helper_wrpil(cpu_env, cpu_tmp0);
                                 if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                                    gen_io_end();
+                                    /* I/O ops in icount mode must end the TB */
+                                    dc->base.is_jmp = DISAS_EXIT;
                                 }
                                 break;
                             case 9: // cwp
@@ -4707,10 +4702,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                     gen_helper_tick_set_limit(r_tickptr,
                                                               cpu_hstick_cmpr);
                                     tcg_temp_free_ptr(r_tickptr);
-                                    if (tb_cflags(dc->base.tb) &
-                                           CF_USE_ICOUNT) {
-                                        gen_io_end();
-                                    }
                                     /* End TB to handle timer interrupt */
                                     dc->base.is_jmp = DISAS_EXIT;
                                 }
@@ -5337,9 +5328,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 gen_io_start();
                             }
                             gen_helper_done(cpu_env);
-                            if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                                gen_io_end();
-                            }
                             goto jmp_insn;
                         case 1:
                             if (!supervisor(dc))
@@ -5350,9 +5338,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                                 gen_io_start();
                             }
                             gen_helper_retry(cpu_env);
-                            if (tb_cflags(dc->base.tb) & CF_USE_ICOUNT) {
-                                gen_io_end();
-                            }
                             goto jmp_insn;
                         default:
                             goto illegal_insn;
@@ -5864,29 +5849,13 @@ static void sparc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     }
 }
 
-static bool sparc_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    if (dc->pc != dc->base.pc_first) {
-        save_state(dc);
-    }
-    gen_helper_debug(cpu_env);
-    tcg_gen_exit_tb(NULL, 0);
-    dc->base.is_jmp = DISAS_NORETURN;
-    /* update pc_next so that the current instruction is included in tb->size */
-    dc->base.pc_next += 4;
-    return true;
-}
-
 static void sparc_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
     CPUSPARCState *env = cs->env_ptr;
     unsigned int insn;
 
-    insn = translator_ldl(env, dc->pc);
+    insn = translator_ldl(env, &dc->base, dc->pc);
     dc->base.pc_next += 4;
     disas_sparc_insn(dc, insn);
 
@@ -5942,7 +5911,6 @@ static const TranslatorOps sparc_tr_ops = {
     .init_disas_context = sparc_tr_init_disas_context,
     .tb_start           = sparc_tr_tb_start,
     .insn_start         = sparc_tr_insn_start,
-    .breakpoint_check   = sparc_tr_breakpoint_check,
     .translate_insn     = sparc_tr_translate_insn,
     .tb_stop            = sparc_tr_tb_stop,
     .disas_log          = sparc_tr_disas_log,
diff --git a/target/tricore/Kconfig b/target/tricore/Kconfig
new file mode 100644
index 00000000000..93134093093
--- /dev/null
+++ b/target/tricore/Kconfig
@@ -0,0 +1,2 @@
+config TRICORE
+    bool
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index 0b1e139bcba..b95682b7f04 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -142,9 +142,15 @@ static void tc27x_initfn(Object *obj)
     set_feature(&cpu->env, TRICORE_FEATURE_161);
 }
 
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps tricore_sysemu_ops = {
+    .get_phys_page_debug = tricore_cpu_get_phys_page_debug,
+};
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps tricore_tcg_ops = {
+static const struct TCGCPUOps tricore_tcg_ops = {
     .initialize = tricore_tcg_init,
     .synchronize_from_tb = tricore_cpu_synchronize_from_tb,
     .tlb_fill = tricore_cpu_tlb_fill,
@@ -170,7 +176,7 @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
 
     cc->dump_state = tricore_cpu_dump_state;
     cc->set_pc = tricore_cpu_set_pc;
-    cc->get_phys_page_debug = tricore_cpu_get_phys_page_debug;
+    cc->sysemu_ops = &tricore_sysemu_ops;
     cc->tcg_ops = &tricore_tcg_ops;
 }
 
diff --git a/target/tricore/cpu.h b/target/tricore/cpu.h
index 4b61a2c03f8..c461387e71b 100644
--- a/target/tricore/cpu.h
+++ b/target/tricore/cpu.h
@@ -362,7 +362,6 @@ void fpu_set_state(CPUTriCoreState *env);
 
 void tricore_cpu_list(void);
 
-#define cpu_signal_handler cpu_tricore_signal_handler
 #define cpu_list tricore_cpu_list
 
 static inline int cpu_mmu_index(CPUTriCoreState *env, bool ifetch)
@@ -377,7 +376,6 @@ typedef TriCoreCPU ArchCPU;
 
 void cpu_state_reset(CPUTriCoreState *s);
 void tricore_tcg_init(void);
-int cpu_tricore_signal_handler(int host_signum, void *pinfo, void *puc);
 
 static inline void cpu_get_tb_cpu_state(CPUTriCoreState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
diff --git a/target/tricore/helper.h b/target/tricore/helper.h
index 78176aa17a3..b64780c37dd 100644
--- a/target/tricore/helper.h
+++ b/target/tricore/helper.h
@@ -153,4 +153,3 @@ DEF_HELPER_2(psw_write, void, env, i32)
 DEF_HELPER_1(psw_read, i32, env)
 /* Exceptions */
 DEF_HELPER_3(raise_exception_sync, noreturn, env, i32, i32)
-DEF_HELPER_2(qemu_excp, noreturn, env, i32)
diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
index 32c2bc16998..9476d10d006 100644
--- a/target/tricore/op_helper.c
+++ b/target/tricore/op_helper.c
@@ -107,13 +107,6 @@ static void raise_exception_sync_helper(CPUTriCoreState *env, uint32_t class,
     raise_exception_sync_internal(env, class, tin, pc, 0);
 }
 
-void helper_qemu_excp(CPUTriCoreState *env, uint32_t excp)
-{
-    CPUState *cs = env_cpu(env);
-    cs->exception_index = excp;
-    cpu_loop_exit(cs);
-}
-
 /* Addressing mode helper */
 
 static uint16_t reverse16(uint16_t val)
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index 2a814263de6..07084407cb5 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -3225,39 +3225,15 @@ static inline void gen_save_pc(target_ulong pc)
     tcg_gen_movi_tl(cpu_PC, pc);
 }
 
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
+static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 {
-    if (unlikely(ctx->base.singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static void generate_qemu_excp(DisasContext *ctx, int excp)
-{
-    TCGv_i32 tmp = tcg_const_i32(excp);
-    gen_helper_qemu_excp(cpu_env, tmp);
-    ctx->base.is_jmp = DISAS_NORETURN;
-    tcg_temp_free(tmp);
-}
-
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
-{
-    if (use_goto_tb(ctx, dest)) {
+    if (translator_use_goto_tb(&ctx->base, dest)) {
         tcg_gen_goto_tb(n);
         gen_save_pc(dest);
         tcg_gen_exit_tb(ctx->base.tb, n);
     } else {
         gen_save_pc(dest);
-        if (ctx->base.singlestep_enabled) {
-            generate_qemu_excp(ctx, EXCP_DEBUG);
-        }
-        tcg_gen_exit_tb(NULL, 0);
+        tcg_gen_lookup_and_goto_ptr();
     }
 }
 
@@ -8822,21 +8798,6 @@ static void tricore_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(ctx->base.pc_next);
 }
 
-static bool tricore_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                      const CPUBreakpoint *bp)
-{
-    DisasContext *ctx = container_of(dcbase, DisasContext, base);
-    generate_qemu_excp(ctx, EXCP_DEBUG);
-    /*
-     * The address covered by the breakpoint must be included in
-     * [tb->pc, tb->pc + tb->size) in order to for it to be
-     * properly cleared -- thus we increment the PC here so that
-     * the logic setting tb->size below does the right thing.
-     */
-    ctx->base.pc_next += 4;
-    return true;
-}
-
 static bool insn_crosses_page(CPUTriCoreState *env, DisasContext *ctx)
 {
     /*
@@ -8910,7 +8871,6 @@ static const TranslatorOps tricore_tr_ops = {
     .init_disas_context = tricore_tr_init_disas_context,
     .tb_start           = tricore_tr_tb_start,
     .insn_start         = tricore_tr_insn_start,
-    .breakpoint_check   = tricore_tr_breakpoint_check,
     .translate_insn     = tricore_tr_translate_insn,
     .tb_stop            = tricore_tr_tb_stop,
     .disas_log          = tricore_tr_disas_log,
diff --git a/target/unicore32/cpu-param.h b/target/unicore32/cpu-param.h
deleted file mode 100644
index 94d8a5daa11..00000000000
--- a/target/unicore32/cpu-param.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * UniCore32 cpu parameters for qemu.
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#ifndef UNICORE32_CPU_PARAM_H
-#define UNICORE32_CPU_PARAM_H 1
-
-#define TARGET_LONG_BITS                32
-#define TARGET_PAGE_BITS                12
-#define TARGET_PHYS_ADDR_SPACE_BITS     32
-#define TARGET_VIRT_ADDR_SPACE_BITS     32
-#define NB_MMU_MODES      2
-
-#endif
diff --git a/target/unicore32/cpu-qom.h b/target/unicore32/cpu-qom.h
deleted file mode 100644
index 43621e74792..00000000000
--- a/target/unicore32/cpu-qom.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * QEMU UniCore32 CPU
- *
- * Copyright (c) 2012 SUSE LINUX Products GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-#ifndef QEMU_UC32_CPU_QOM_H
-#define QEMU_UC32_CPU_QOM_H
-
-#include "hw/core/cpu.h"
-#include "qom/object.h"
-
-#define TYPE_UNICORE32_CPU "unicore32-cpu"
-
-OBJECT_DECLARE_TYPE(UniCore32CPU, UniCore32CPUClass,
-                    UNICORE32_CPU)
-
-/**
- * UniCore32CPUClass:
- * @parent_realize: The parent class' realize handler.
- *
- * A UniCore32 CPU model.
- */
-struct UniCore32CPUClass {
-    /*< private >*/
-    CPUClass parent_class;
-    /*< public >*/
-
-    DeviceRealize parent_realize;
-};
-
-
-#endif
diff --git a/target/unicore32/cpu.c b/target/unicore32/cpu.c
deleted file mode 100644
index 0258884f845..00000000000
--- a/target/unicore32/cpu.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * QEMU UniCore32 CPU
- *
- * Copyright (c) 2010-2012 Guan Xuetao
- * Copyright (c) 2012 SUSE LINUX Products GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Contributions from 2012-04-01 on are considered under GPL version 2,
- * or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "cpu.h"
-#include "migration/vmstate.h"
-#include "exec/exec-all.h"
-
-static void uc32_cpu_set_pc(CPUState *cs, vaddr value)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(cs);
-
-    cpu->env.regs[31] = value;
-}
-
-static bool uc32_cpu_has_work(CPUState *cs)
-{
-    return cs->interrupt_request &
-        (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXITTB);
-}
-
-static inline void set_feature(CPUUniCore32State *env, int feature)
-{
-    env->features |= feature;
-}
-
-/* CPU models */
-
-static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model)
-{
-    ObjectClass *oc;
-    char *typename;
-
-    typename = g_strdup_printf(UNICORE32_CPU_TYPE_NAME("%s"), cpu_model);
-    oc = object_class_by_name(typename);
-    g_free(typename);
-    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU) ||
-                       object_class_is_abstract(oc))) {
-        oc = NULL;
-    }
-    return oc;
-}
-
-static void unicore_ii_cpu_initfn(Object *obj)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(obj);
-    CPUUniCore32State *env = &cpu->env;
-
-    env->cp0.c0_cpuid = 0x4d000863;
-    env->cp0.c0_cachetype = 0x0d152152;
-    env->cp0.c1_sys = 0x2000;
-    env->cp0.c2_base = 0x0;
-    env->cp0.c3_faultstatus = 0x0;
-    env->cp0.c4_faultaddr = 0x0;
-    env->ucf64.xregs[UC32_UCF64_FPSCR] = 0;
-
-    set_feature(env, UC32_HWCAP_CMOV);
-    set_feature(env, UC32_HWCAP_UCF64);
-}
-
-static void uc32_any_cpu_initfn(Object *obj)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(obj);
-    CPUUniCore32State *env = &cpu->env;
-
-    env->cp0.c0_cpuid = 0xffffffff;
-    env->ucf64.xregs[UC32_UCF64_FPSCR] = 0;
-
-    set_feature(env, UC32_HWCAP_CMOV);
-    set_feature(env, UC32_HWCAP_UCF64);
-}
-
-static void uc32_cpu_realizefn(DeviceState *dev, Error **errp)
-{
-    CPUState *cs = CPU(dev);
-    UniCore32CPUClass *ucc = UNICORE32_CPU_GET_CLASS(dev);
-    Error *local_err = NULL;
-
-    cpu_exec_realizefn(cs, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    qemu_init_vcpu(cs);
-
-    ucc->parent_realize(dev, errp);
-}
-
-static void uc32_cpu_initfn(Object *obj)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(obj);
-    CPUUniCore32State *env = &cpu->env;
-
-    cpu_set_cpustate_pointers(cpu);
-
-#ifdef CONFIG_USER_ONLY
-    env->uncached_asr = ASR_MODE_USER;
-    env->regs[31] = 0;
-#else
-    env->uncached_asr = ASR_MODE_PRIV;
-    env->regs[31] = 0x03000000;
-#endif
-}
-
-static const VMStateDescription vmstate_uc32_cpu = {
-    .name = "cpu",
-    .unmigratable = 1,
-};
-
-#include "hw/core/tcg-cpu-ops.h"
-
-static struct TCGCPUOps uc32_tcg_ops = {
-    .initialize = uc32_translate_init,
-    .cpu_exec_interrupt = uc32_cpu_exec_interrupt,
-    .tlb_fill = uc32_cpu_tlb_fill,
-
-#ifndef CONFIG_USER_ONLY
-    .do_interrupt = uc32_cpu_do_interrupt,
-#endif /* !CONFIG_USER_ONLY */
-};
-
-static void uc32_cpu_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    CPUClass *cc = CPU_CLASS(oc);
-    UniCore32CPUClass *ucc = UNICORE32_CPU_CLASS(oc);
-
-    device_class_set_parent_realize(dc, uc32_cpu_realizefn,
-                                    &ucc->parent_realize);
-
-    cc->class_by_name = uc32_cpu_class_by_name;
-    cc->has_work = uc32_cpu_has_work;
-    cc->dump_state = uc32_cpu_dump_state;
-    cc->set_pc = uc32_cpu_set_pc;
-    cc->get_phys_page_debug = uc32_cpu_get_phys_page_debug;
-    dc->vmsd = &vmstate_uc32_cpu;
-    cc->tcg_ops = &uc32_tcg_ops;
-}
-
-#define DEFINE_UNICORE32_CPU_TYPE(cpu_model, initfn) \
-    {                                                \
-        .parent = TYPE_UNICORE32_CPU,                \
-        .instance_init = initfn,                     \
-        .name = UNICORE32_CPU_TYPE_NAME(cpu_model),  \
-    }
-
-static const TypeInfo uc32_cpu_type_infos[] = {
-    {
-        .name = TYPE_UNICORE32_CPU,
-        .parent = TYPE_CPU,
-        .instance_size = sizeof(UniCore32CPU),
-        .instance_init = uc32_cpu_initfn,
-        .abstract = true,
-        .class_size = sizeof(UniCore32CPUClass),
-        .class_init = uc32_cpu_class_init,
-    },
-    DEFINE_UNICORE32_CPU_TYPE("UniCore-II", unicore_ii_cpu_initfn),
-    DEFINE_UNICORE32_CPU_TYPE("any", uc32_any_cpu_initfn),
-};
-
-DEFINE_TYPES(uc32_cpu_type_infos)
diff --git a/target/unicore32/cpu.h b/target/unicore32/cpu.h
deleted file mode 100644
index 7a32e086ed3..00000000000
--- a/target/unicore32/cpu.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * UniCore32 virtual CPU header
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-
-#ifndef UNICORE32_CPU_H
-#define UNICORE32_CPU_H
-
-#include "cpu-qom.h"
-#include "exec/cpu-defs.h"
-
-typedef struct CPUUniCore32State {
-    /* Regs for current mode.  */
-    uint32_t regs[32];
-    /* Frequently accessed ASR bits are stored separately for efficiently.
-       This contains all the other bits.  Use asr_{read,write} to access
-       the whole ASR.  */
-    uint32_t uncached_asr;
-    uint32_t bsr;
-
-    /* Banked registers.  */
-    uint32_t banked_bsr[6];
-    uint32_t banked_r29[6];
-    uint32_t banked_r30[6];
-
-    /* asr flag cache for faster execution */
-    uint32_t CF; /* 0 or 1 */
-    uint32_t VF; /* V is the bit 31. All other bits are undefined */
-    uint32_t NF; /* N is bit 31. All other bits are undefined.  */
-    uint32_t ZF; /* Z set if zero.  */
-
-    /* System control coprocessor (cp0) */
-    struct {
-        uint32_t c0_cpuid;
-        uint32_t c0_cachetype;
-        uint32_t c1_sys; /* System control register.  */
-        uint32_t c2_base; /* MMU translation table base.  */
-        uint32_t c3_faultstatus; /* Fault status registers.  */
-        uint32_t c4_faultaddr; /* Fault address registers.  */
-        uint32_t c5_cacheop; /* Cache operation registers.  */
-        uint32_t c6_tlbop; /* TLB operation registers. */
-    } cp0;
-
-    /* UniCore-F64 coprocessor state.  */
-    struct {
-        float64 regs[16];
-        uint32_t xregs[32];
-        float_status fp_status;
-    } ucf64;
-
-    /* Internal CPU feature flags.  */
-    uint32_t features;
-
-} CPUUniCore32State;
-
-/**
- * UniCore32CPU:
- * @env: #CPUUniCore32State
- *
- * A UniCore32 CPU.
- */
-struct UniCore32CPU {
-    /*< private >*/
-    CPUState parent_obj;
-    /*< public >*/
-
-    CPUNegativeOffsetState neg;
-    CPUUniCore32State env;
-};
-
-
-void uc32_cpu_do_interrupt(CPUState *cpu);
-bool uc32_cpu_exec_interrupt(CPUState *cpu, int int_req);
-void uc32_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
-hwaddr uc32_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
-
-#define ASR_M                   (0x1f)
-#define ASR_MODE_USER           (0x10)
-#define ASR_MODE_INTR           (0x12)
-#define ASR_MODE_PRIV           (0x13)
-#define ASR_MODE_TRAP           (0x17)
-#define ASR_MODE_EXTN           (0x1b)
-#define ASR_MODE_SUSR           (0x1f)
-#define ASR_I                   (1 << 7)
-#define ASR_V                   (1 << 28)
-#define ASR_C                   (1 << 29)
-#define ASR_Z                   (1 << 30)
-#define ASR_N                   (1 << 31)
-#define ASR_NZCV                (ASR_N | ASR_Z | ASR_C | ASR_V)
-#define ASR_RESERVED            (~(ASR_M | ASR_I | ASR_NZCV))
-
-#define UC32_EXCP_PRIV          (1)
-#define UC32_EXCP_ITRAP         (2)
-#define UC32_EXCP_DTRAP         (3)
-#define UC32_EXCP_INTR          (4)
-
-/* Return the current ASR value.  */
-target_ulong cpu_asr_read(CPUUniCore32State *env1);
-/* Set the ASR.  Note that some bits of mask must be all-set or all-clear.  */
-void cpu_asr_write(CPUUniCore32State *env1, target_ulong val, target_ulong mask);
-
-/* UniCore-F64 system registers.  */
-#define UC32_UCF64_FPSCR                (31)
-#define UCF64_FPSCR_MASK                (0x27ffffff)
-#define UCF64_FPSCR_RND_MASK            (0x7)
-#define UCF64_FPSCR_RND(r)              (((r) >>  0) & UCF64_FPSCR_RND_MASK)
-#define UCF64_FPSCR_TRAPEN_MASK         (0x7f)
-#define UCF64_FPSCR_TRAPEN(r)           (((r) >> 10) & UCF64_FPSCR_TRAPEN_MASK)
-#define UCF64_FPSCR_FLAG_MASK           (0x3ff)
-#define UCF64_FPSCR_FLAG(r)             (((r) >> 17) & UCF64_FPSCR_FLAG_MASK)
-#define UCF64_FPSCR_FLAG_ZERO           (1 << 17)
-#define UCF64_FPSCR_FLAG_INFINITY       (1 << 18)
-#define UCF64_FPSCR_FLAG_INVALID        (1 << 19)
-#define UCF64_FPSCR_FLAG_UNDERFLOW      (1 << 20)
-#define UCF64_FPSCR_FLAG_OVERFLOW       (1 << 21)
-#define UCF64_FPSCR_FLAG_INEXACT        (1 << 22)
-#define UCF64_FPSCR_FLAG_HUGEINT        (1 << 23)
-#define UCF64_FPSCR_FLAG_DENORMAL       (1 << 24)
-#define UCF64_FPSCR_FLAG_UNIMP          (1 << 25)
-#define UCF64_FPSCR_FLAG_DIVZERO        (1 << 26)
-
-#define UC32_HWCAP_CMOV                 4 /* 1 << 2 */
-#define UC32_HWCAP_UCF64                8 /* 1 << 3 */
-
-#define cpu_signal_handler              uc32_cpu_signal_handler
-
-int uc32_cpu_signal_handler(int host_signum, void *pinfo, void *puc);
-
-/* MMU modes definitions */
-#define MMU_USER_IDX 1
-static inline int cpu_mmu_index(CPUUniCore32State *env, bool ifetch)
-{
-    return (env->uncached_asr & ASR_M) == ASR_MODE_USER ? 1 : 0;
-}
-
-typedef CPUUniCore32State CPUArchState;
-typedef UniCore32CPU ArchCPU;
-
-#include "exec/cpu-all.h"
-
-#define UNICORE32_CPU_TYPE_SUFFIX "-" TYPE_UNICORE32_CPU
-#define UNICORE32_CPU_TYPE_NAME(model) model UNICORE32_CPU_TYPE_SUFFIX
-#define CPU_RESOLVING_TYPE TYPE_UNICORE32_CPU
-
-static inline void cpu_get_tb_cpu_state(CPUUniCore32State *env, target_ulong *pc,
-                                        target_ulong *cs_base, uint32_t *flags)
-{
-    *pc = env->regs[31];
-    *cs_base = 0;
-    *flags = 0;
-    if ((env->uncached_asr & ASR_M) != ASR_MODE_USER) {
-        *flags |= (1 << 6);
-    }
-}
-
-bool uc32_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr);
-void uc32_translate_init(void);
-void switch_mode(CPUUniCore32State *, int);
-
-#endif /* UNICORE32_CPU_H */
diff --git a/target/unicore32/helper.c b/target/unicore32/helper.c
deleted file mode 100644
index 704393c27f9..00000000000
--- a/target/unicore32/helper.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Contributions from 2012-04-01 on are considered under GPL version 2,
- * or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
-#include "semihosting/console.h"
-
-#undef DEBUG_UC32
-
-#ifdef DEBUG_UC32
-#define DPRINTF(fmt, ...) printf("%s: " fmt , __func__, ## __VA_ARGS__)
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
-
-#ifndef CONFIG_USER_ONLY
-void helper_cp0_set(CPUUniCore32State *env, uint32_t val, uint32_t creg,
-        uint32_t cop)
-{
-    /*
-     * movc pp.nn, rn, #imm9
-     *      rn: UCOP_REG_D
-     *      nn: UCOP_REG_N
-     *          1: sys control reg.
-     *          2: page table base reg.
-     *          3: data fault status reg.
-     *          4: insn fault status reg.
-     *          5: cache op. reg.
-     *          6: tlb op. reg.
-     *      imm9: split UCOP_IMM10 with bit5 is 0
-     */
-    switch (creg) {
-    case 1:
-        if (cop != 0) {
-            goto unrecognized;
-        }
-        env->cp0.c1_sys = val;
-        break;
-    case 2:
-        if (cop != 0) {
-            goto unrecognized;
-        }
-        env->cp0.c2_base = val;
-        break;
-    case 3:
-        if (cop != 0) {
-            goto unrecognized;
-        }
-        env->cp0.c3_faultstatus = val;
-        break;
-    case 4:
-        if (cop != 0) {
-            goto unrecognized;
-        }
-        env->cp0.c4_faultaddr = val;
-        break;
-    case 5:
-        switch (cop) {
-        case 28:
-            DPRINTF("Invalidate Entire I&D cache\n");
-            return;
-        case 20:
-            DPRINTF("Invalidate Entire Icache\n");
-            return;
-        case 12:
-            DPRINTF("Invalidate Entire Dcache\n");
-            return;
-        case 10:
-            DPRINTF("Clean Entire Dcache\n");
-            return;
-        case 14:
-            DPRINTF("Flush Entire Dcache\n");
-            return;
-        case 13:
-            DPRINTF("Invalidate Dcache line\n");
-            return;
-        case 11:
-            DPRINTF("Clean Dcache line\n");
-            return;
-        case 15:
-            DPRINTF("Flush Dcache line\n");
-            return;
-        }
-        break;
-    case 6:
-        if ((cop <= 6) && (cop >= 2)) {
-            /* invalid all tlb */
-            tlb_flush(env_cpu(env));
-            return;
-        }
-        break;
-    default:
-        goto unrecognized;
-    }
-    return;
-unrecognized:
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "Wrong register (%d) or wrong operation (%d) in cp0_set!\n",
-                  creg, cop);
-}
-
-uint32_t helper_cp0_get(CPUUniCore32State *env, uint32_t creg, uint32_t cop)
-{
-    /*
-     * movc rd, pp.nn, #imm9
-     *      rd: UCOP_REG_D
-     *      nn: UCOP_REG_N
-     *          0: cpuid and cachetype
-     *          1: sys control reg.
-     *          2: page table base reg.
-     *          3: data fault status reg.
-     *          4: insn fault status reg.
-     *      imm9: split UCOP_IMM10 with bit5 is 0
-     */
-    switch (creg) {
-    case 0:
-        switch (cop) {
-        case 0:
-            return env->cp0.c0_cpuid;
-        case 1:
-            return env->cp0.c0_cachetype;
-        }
-        break;
-    case 1:
-        if (cop == 0) {
-            return env->cp0.c1_sys;
-        }
-        break;
-    case 2:
-        if (cop == 0) {
-            return env->cp0.c2_base;
-        }
-        break;
-    case 3:
-        if (cop == 0) {
-            return env->cp0.c3_faultstatus;
-        }
-        break;
-    case 4:
-        if (cop == 0) {
-            return env->cp0.c4_faultaddr;
-        }
-        break;
-    }
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "Wrong register (%d) or wrong operation (%d) in cp0_set!\n",
-                  creg, cop);
-    return 0;
-}
-
-void helper_cp1_putc(target_ulong regval)
-{
-    const char c = regval;
-
-    qemu_semihosting_log_out(&c, sizeof(c));
-}
-#endif /* !CONFIG_USER_ONLY */
-
-bool uc32_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-{
-    if (interrupt_request & CPU_INTERRUPT_HARD) {
-        UniCore32CPU *cpu = UNICORE32_CPU(cs);
-        CPUUniCore32State *env = &cpu->env;
-
-        if (!(env->uncached_asr & ASR_I)) {
-            cs->exception_index = UC32_EXCP_INTR;
-            uc32_cpu_do_interrupt(cs);
-            return true;
-        }
-    }
-    return false;
-}
diff --git a/target/unicore32/helper.h b/target/unicore32/helper.h
deleted file mode 100644
index a4a5d45d1d9..00000000000
--- a/target/unicore32/helper.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-
-#ifndef CONFIG_USER_ONLY
-DEF_HELPER_4(cp0_set, void, env, i32, i32, i32)
-DEF_HELPER_3(cp0_get, i32, env, i32, i32)
-DEF_HELPER_1(cp1_putc, void, i32)
-#endif
-
-DEF_HELPER_2(exception, void, env, i32)
-
-DEF_HELPER_3(asr_write, void, env, i32, i32)
-DEF_HELPER_1(asr_read, i32, env)
-
-DEF_HELPER_2(get_user_reg, i32, env, i32)
-DEF_HELPER_3(set_user_reg, void, env, i32, i32)
-
-DEF_HELPER_3(add_cc, i32, env, i32, i32)
-DEF_HELPER_3(adc_cc, i32, env, i32, i32)
-DEF_HELPER_3(sub_cc, i32, env, i32, i32)
-DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
-
-DEF_HELPER_2(shl, i32, i32, i32)
-DEF_HELPER_2(shr, i32, i32, i32)
-DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_3(shl_cc, i32, env, i32, i32)
-DEF_HELPER_3(shr_cc, i32, env, i32, i32)
-DEF_HELPER_3(sar_cc, i32, env, i32, i32)
-DEF_HELPER_3(ror_cc, i32, env, i32, i32)
-
-DEF_HELPER_1(ucf64_get_fpscr, i32, env)
-DEF_HELPER_2(ucf64_set_fpscr, void, env, i32)
-
-DEF_HELPER_3(ucf64_adds, f32, f32, f32, env)
-DEF_HELPER_3(ucf64_addd, f64, f64, f64, env)
-DEF_HELPER_3(ucf64_subs, f32, f32, f32, env)
-DEF_HELPER_3(ucf64_subd, f64, f64, f64, env)
-DEF_HELPER_3(ucf64_muls, f32, f32, f32, env)
-DEF_HELPER_3(ucf64_muld, f64, f64, f64, env)
-DEF_HELPER_3(ucf64_divs, f32, f32, f32, env)
-DEF_HELPER_3(ucf64_divd, f64, f64, f64, env)
-DEF_HELPER_1(ucf64_negs, f32, f32)
-DEF_HELPER_1(ucf64_negd, f64, f64)
-DEF_HELPER_1(ucf64_abss, f32, f32)
-DEF_HELPER_1(ucf64_absd, f64, f64)
-DEF_HELPER_4(ucf64_cmps, void, f32, f32, i32, env)
-DEF_HELPER_4(ucf64_cmpd, void, f64, f64, i32, env)
-
-DEF_HELPER_2(ucf64_sf2df, f64, f32, env)
-DEF_HELPER_2(ucf64_df2sf, f32, f64, env)
-
-DEF_HELPER_2(ucf64_si2sf, f32, f32, env)
-DEF_HELPER_2(ucf64_si2df, f64, f32, env)
-
-DEF_HELPER_2(ucf64_sf2si, f32, f32, env)
-DEF_HELPER_2(ucf64_df2si, f32, f64, env)
diff --git a/target/unicore32/meson.build b/target/unicore32/meson.build
deleted file mode 100644
index 0fa78772ebc..00000000000
--- a/target/unicore32/meson.build
+++ /dev/null
@@ -1,14 +0,0 @@
-unicore32_ss = ss.source_set()
-unicore32_ss.add(files(
-  'cpu.c',
-  'helper.c',
-  'op_helper.c',
-  'translate.c',
-  'ucf64_helper.c',
-), curses)
-
-unicore32_softmmu_ss = ss.source_set()
-unicore32_softmmu_ss.add(files('softmmu.c'))
-
-target_arch += {'unicore32': unicore32_ss}
-target_softmmu_arch += {'unicore32': unicore32_softmmu_ss}
diff --git a/target/unicore32/op_helper.c b/target/unicore32/op_helper.c
deleted file mode 100644
index eeaa78601ab..00000000000
--- a/target/unicore32/op_helper.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- *  UniCore32 helper routines
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-
-#define SIGNBIT (uint32_t)0x80000000
-#define SIGNBIT64 ((uint64_t)1 << 63)
-
-void HELPER(exception)(CPUUniCore32State *env, uint32_t excp)
-{
-    CPUState *cs = env_cpu(env);
-
-    cs->exception_index = excp;
-    cpu_loop_exit(cs);
-}
-
-static target_ulong asr_read(CPUUniCore32State *env)
-{
-    int ZF;
-    ZF = (env->ZF == 0);
-    return env->uncached_asr | (env->NF & 0x80000000) | (ZF << 30) |
-        (env->CF << 29) | ((env->VF & 0x80000000) >> 3);
-}
-
-target_ulong cpu_asr_read(CPUUniCore32State *env)
-{
-    return asr_read(env);
-}
-
-target_ulong HELPER(asr_read)(CPUUniCore32State *env)
-{
-    return asr_read(env);
-}
-
-static void asr_write(CPUUniCore32State *env, target_ulong val,
-                      target_ulong mask)
-{
-    if (mask & ASR_NZCV) {
-        env->ZF = (~val) & ASR_Z;
-        env->NF = val;
-        env->CF = (val >> 29) & 1;
-        env->VF = (val << 3) & 0x80000000;
-    }
-
-    if ((env->uncached_asr ^ val) & mask & ASR_M) {
-        switch_mode(env, val & ASR_M);
-    }
-    mask &= ~ASR_NZCV;
-    env->uncached_asr = (env->uncached_asr & ~mask) | (val & mask);
-}
-
-void cpu_asr_write(CPUUniCore32State *env, target_ulong val, target_ulong mask)
-{
-    asr_write(env, val, mask);
-}
-
-void HELPER(asr_write)(CPUUniCore32State *env, target_ulong val,
-                       target_ulong mask)
-{
-    asr_write(env, val, mask);
-}
-
-/* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(CPUUniCore32State *env, uint32_t regno)
-{
-    uint32_t val;
-
-    if (regno == 29) {
-        val = env->banked_r29[0];
-    } else if (regno == 30) {
-        val = env->banked_r30[0];
-    } else {
-        val = env->regs[regno];
-    }
-    return val;
-}
-
-void HELPER(set_user_reg)(CPUUniCore32State *env, uint32_t regno, uint32_t val)
-{
-    if (regno == 29) {
-        env->banked_r29[0] = val;
-    } else if (regno == 30) {
-        env->banked_r30[0] = val;
-    } else {
-        env->regs[regno] = val;
-    }
-}
-
-/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
-   The only way to do that in TCG is a conditional branch, which clobbers
-   all our temporaries.  For now implement these as helper functions.  */
-
-uint32_t HELPER(add_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a + b;
-    env->NF = env->ZF = result;
-    env->CF = result < a;
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    return result;
-}
-
-uint32_t HELPER(adc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    if (!env->CF) {
-        result = a + b;
-        env->CF = result < a;
-    } else {
-        result = a + b + 1;
-        env->CF = result <= a;
-    }
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    env->NF = env->ZF = result;
-    return result;
-}
-
-uint32_t HELPER(sub_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a - b;
-    env->NF = env->ZF = result;
-    env->CF = a >= b;
-    env->VF = (a ^ b) & (a ^ result);
-    return result;
-}
-
-uint32_t HELPER(sbc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    if (!env->CF) {
-        result = a - b - 1;
-        env->CF = a > b;
-    } else {
-        result = a - b;
-        env->CF = a >= b;
-    }
-    env->VF = (a ^ b) & (a ^ result);
-    env->NF = env->ZF = result;
-    return result;
-}
-
-/* Similarly for variable shift instructions.  */
-
-uint32_t HELPER(shl)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        return 0;
-    }
-    return x << shift;
-}
-
-uint32_t HELPER(shr)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        return 0;
-    }
-    return (uint32_t)x >> shift;
-}
-
-uint32_t HELPER(sar)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        shift = 31;
-    }
-    return (int32_t)x >> shift;
-}
-
-uint32_t HELPER(shl_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        if (shift == 32) {
-            env->CF = x & 1;
-        } else {
-            env->CF = 0;
-        }
-        return 0;
-    } else if (shift != 0) {
-        env->CF = (x >> (32 - shift)) & 1;
-        return x << shift;
-    }
-    return x;
-}
-
-uint32_t HELPER(shr_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        if (shift == 32) {
-            env->CF = (x >> 31) & 1;
-        } else {
-            env->CF = 0;
-        }
-        return 0;
-    } else if (shift != 0) {
-        env->CF = (x >> (shift - 1)) & 1;
-        return x >> shift;
-    }
-    return x;
-}
-
-uint32_t HELPER(sar_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32) {
-        env->CF = (x >> 31) & 1;
-        return (int32_t)x >> 31;
-    } else if (shift != 0) {
-        env->CF = (x >> (shift - 1)) & 1;
-        return (int32_t)x >> shift;
-    }
-    return x;
-}
-
-uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
-{
-    int shift1, shift;
-    shift1 = i & 0xff;
-    shift = shift1 & 0x1f;
-    if (shift == 0) {
-        if (shift1 != 0) {
-            env->CF = (x >> 31) & 1;
-        }
-        return x;
-    } else {
-        env->CF = (x >> (shift - 1)) & 1;
-        return ((uint32_t)x >> shift) | (x << (32 - shift));
-    }
-}
diff --git a/target/unicore32/softmmu.c b/target/unicore32/softmmu.c
deleted file mode 100644
index cbdaa500b75..00000000000
--- a/target/unicore32/softmmu.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Softmmu related functions
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-#ifdef CONFIG_USER_ONLY
-#error This file only exist under softmmu circumstance
-#endif
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "qemu/error-report.h"
-
-#undef DEBUG_UC32
-
-#ifdef DEBUG_UC32
-#define DPRINTF(fmt, ...) printf("%s: " fmt , __func__, ## __VA_ARGS__)
-#else
-#define DPRINTF(fmt, ...) do {} while (0)
-#endif
-
-#define SUPERPAGE_SIZE             (1 << 22)
-#define UC32_PAGETABLE_READ        (1 << 8)
-#define UC32_PAGETABLE_WRITE       (1 << 7)
-#define UC32_PAGETABLE_EXEC        (1 << 6)
-#define UC32_PAGETABLE_EXIST       (1 << 2)
-#define PAGETABLE_TYPE(x)          ((x) & 3)
-
-
-/* Map CPU modes onto saved register banks.  */
-static inline int bank_number(CPUUniCore32State *env, int mode)
-{
-    switch (mode) {
-    case ASR_MODE_USER:
-    case ASR_MODE_SUSR:
-        return 0;
-    case ASR_MODE_PRIV:
-        return 1;
-    case ASR_MODE_TRAP:
-        return 2;
-    case ASR_MODE_EXTN:
-        return 3;
-    case ASR_MODE_INTR:
-        return 4;
-    }
-    cpu_abort(env_cpu(env), "Bad mode %x\n", mode);
-    return -1;
-}
-
-void switch_mode(CPUUniCore32State *env, int mode)
-{
-    int old_mode;
-    int i;
-
-    old_mode = env->uncached_asr & ASR_M;
-    if (mode == old_mode) {
-        return;
-    }
-
-    i = bank_number(env, old_mode);
-    env->banked_r29[i] = env->regs[29];
-    env->banked_r30[i] = env->regs[30];
-    env->banked_bsr[i] = env->bsr;
-
-    i = bank_number(env, mode);
-    env->regs[29] = env->banked_r29[i];
-    env->regs[30] = env->banked_r30[i];
-    env->bsr = env->banked_bsr[i];
-}
-
-/* Handle a CPU exception.  */
-void uc32_cpu_do_interrupt(CPUState *cs)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(cs);
-    CPUUniCore32State *env = &cpu->env;
-    uint32_t addr;
-    int new_mode;
-
-    switch (cs->exception_index) {
-    case UC32_EXCP_PRIV:
-        new_mode = ASR_MODE_PRIV;
-        addr = 0x08;
-        break;
-    case UC32_EXCP_ITRAP:
-        DPRINTF("itrap happened at %x\n", env->regs[31]);
-        new_mode = ASR_MODE_TRAP;
-        addr = 0x0c;
-        break;
-    case UC32_EXCP_DTRAP:
-        DPRINTF("dtrap happened at %x\n", env->regs[31]);
-        new_mode = ASR_MODE_TRAP;
-        addr = 0x10;
-        break;
-    case UC32_EXCP_INTR:
-        new_mode = ASR_MODE_INTR;
-        addr = 0x18;
-        break;
-    default:
-        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
-        return;
-    }
-    /* High vectors.  */
-    if (env->cp0.c1_sys & (1 << 13)) {
-        addr += 0xffff0000;
-    }
-
-    switch_mode(env, new_mode);
-    env->bsr = cpu_asr_read(env);
-    env->uncached_asr = (env->uncached_asr & ~ASR_M) | new_mode;
-    env->uncached_asr |= ASR_I;
-    /* The PC already points to the proper instruction.  */
-    env->regs[30] = env->regs[31];
-    env->regs[31] = addr;
-    cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
-}
-
-static int get_phys_addr_ucv2(CPUUniCore32State *env, uint32_t address,
-        int access_type, int is_user, uint32_t *phys_ptr, int *prot,
-        target_ulong *page_size)
-{
-    CPUState *cs = env_cpu(env);
-    int code;
-    uint32_t table;
-    uint32_t desc;
-    uint32_t phys_addr;
-
-    /* Pagetable walk.  */
-    /* Lookup l1 descriptor.  */
-    table = env->cp0.c2_base & 0xfffff000;
-    table |= (address >> 20) & 0xffc;
-    desc = ldl_phys(cs->as, table);
-    code = 0;
-    switch (PAGETABLE_TYPE(desc)) {
-    case 3:
-        /* Superpage  */
-        if (!(desc & UC32_PAGETABLE_EXIST)) {
-            code = 0x0b; /* superpage miss */
-            goto do_fault;
-        }
-        phys_addr = (desc & 0xffc00000) | (address & 0x003fffff);
-        *page_size = SUPERPAGE_SIZE;
-        break;
-    case 0:
-        /* Lookup l2 entry.  */
-        if (is_user) {
-            DPRINTF("PGD address %x, desc %x\n", table, desc);
-        }
-        if (!(desc & UC32_PAGETABLE_EXIST)) {
-            code = 0x05; /* second pagetable miss */
-            goto do_fault;
-        }
-        table = (desc & 0xfffff000) | ((address >> 10) & 0xffc);
-        desc = ldl_phys(cs->as, table);
-        /* 4k page.  */
-        if (is_user) {
-            DPRINTF("PTE address %x, desc %x\n", table, desc);
-        }
-        if (!(desc & UC32_PAGETABLE_EXIST)) {
-            code = 0x08; /* page miss */
-            goto do_fault;
-        }
-        switch (PAGETABLE_TYPE(desc)) {
-        case 0:
-            phys_addr = (desc & 0xfffff000) | (address & 0xfff);
-            *page_size = TARGET_PAGE_SIZE;
-            break;
-        default:
-            cpu_abort(cs, "wrong page type!");
-        }
-        break;
-    default:
-        cpu_abort(cs, "wrong page type!");
-    }
-
-    *phys_ptr = phys_addr;
-    *prot = 0;
-    /* Check access permissions.  */
-    if (desc & UC32_PAGETABLE_READ) {
-        *prot |= PAGE_READ;
-    } else {
-        if (is_user && (access_type == 0)) {
-            code = 0x11; /* access unreadable area */
-            goto do_fault;
-        }
-    }
-
-    if (desc & UC32_PAGETABLE_WRITE) {
-        *prot |= PAGE_WRITE;
-    } else {
-        if (is_user && (access_type == 1)) {
-            code = 0x12; /* access unwritable area */
-            goto do_fault;
-        }
-    }
-
-    if (desc & UC32_PAGETABLE_EXEC) {
-        *prot |= PAGE_EXEC;
-    } else {
-        if (is_user && (access_type == 2)) {
-            code = 0x13; /* access unexecutable area */
-            goto do_fault;
-        }
-    }
-
-do_fault:
-    return code;
-}
-
-bool uc32_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool probe, uintptr_t retaddr)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(cs);
-    CPUUniCore32State *env = &cpu->env;
-    uint32_t phys_addr;
-    target_ulong page_size;
-    int prot;
-    int ret, is_user;
-
-    ret = 1;
-    is_user = mmu_idx == MMU_USER_IDX;
-
-    if ((env->cp0.c1_sys & 1) == 0) {
-        /* MMU disabled.  */
-        phys_addr = address;
-        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-        page_size = TARGET_PAGE_SIZE;
-        ret = 0;
-    } else {
-        if ((address & (1 << 31)) || (is_user)) {
-            ret = get_phys_addr_ucv2(env, address, access_type, is_user,
-                                    &phys_addr, &prot, &page_size);
-            if (is_user) {
-                DPRINTF("user space access: ret %x, address %" VADDR_PRIx ", "
-                        "access_type %x, phys_addr %x, prot %x\n",
-                        ret, address, access_type, phys_addr, prot);
-            }
-        } else {
-            /*IO memory */
-            phys_addr = address | (1 << 31);
-            prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
-            page_size = TARGET_PAGE_SIZE;
-            ret = 0;
-        }
-    }
-
-    if (ret == 0) {
-        /* Map a single page.  */
-        phys_addr &= TARGET_PAGE_MASK;
-        address &= TARGET_PAGE_MASK;
-        tlb_set_page(cs, address, phys_addr, prot, mmu_idx, page_size);
-        return true;
-    }
-
-    if (probe) {
-        return false;
-    }
-
-    env->cp0.c3_faultstatus = ret;
-    env->cp0.c4_faultaddr = address;
-    if (access_type == 2) {
-        cs->exception_index = UC32_EXCP_ITRAP;
-    } else {
-        cs->exception_index = UC32_EXCP_DTRAP;
-    }
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-hwaddr uc32_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
-    error_report("function uc32_cpu_get_phys_page_debug not "
-                    "implemented, aborting");
-    return -1;
-}
diff --git a/target/unicore32/translate.c b/target/unicore32/translate.c
deleted file mode 100644
index 370709c9ea0..00000000000
--- a/target/unicore32/translate.c
+++ /dev/null
@@ -1,2083 +0,0 @@
-/*
- *  UniCore32 translation
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or (at your option) any
- * later version. See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "disas/disas.h"
-#include "exec/exec-all.h"
-#include "tcg/tcg-op.h"
-#include "qemu/log.h"
-#include "exec/cpu_ldst.h"
-#include "exec/translator.h"
-#include "qemu/qemu-print.h"
-
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-
-#include "trace-tcg.h"
-#include "exec/log.h"
-
-
-/* internal defines */
-typedef struct DisasContext {
-    target_ulong pc;
-    int is_jmp;
-    /* Nonzero if this instruction has been conditionally skipped.  */
-    int condjmp;
-    /* The label that will be jumped to when the instruction is skipped.  */
-    TCGLabel *condlabel;
-    TranslationBlock *tb;
-    int singlestep_enabled;
-#ifndef CONFIG_USER_ONLY
-    int user;
-#endif
-} DisasContext;
-
-#ifndef CONFIG_USER_ONLY
-#define IS_USER(s)      (s->user)
-#else
-#define IS_USER(s)      1
-#endif
-
-/* is_jmp field values */
-#define DISAS_JUMP    DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE  DISAS_TARGET_1 /* cpu state was modified dynamically */
-#define DISAS_TB_JUMP DISAS_TARGET_2 /* only pc was modified statically */
-/* These instructions trap after executing, so defer them until after the
-   conditional executions state has been updated.  */
-#define DISAS_SYSCALL DISAS_TARGET_3
-
-static TCGv_i32 cpu_R[32];
-
-/* FIXME:  These should be removed.  */
-static TCGv cpu_F0s, cpu_F1s;
-static TCGv_i64 cpu_F0d, cpu_F1d;
-
-#include "exec/gen-icount.h"
-
-static const char *regnames[] = {
-      "r00", "r01", "r02", "r03", "r04", "r05", "r06", "r07",
-      "r08", "r09", "r10", "r11", "r12", "r13", "r14", "r15",
-      "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-      "r24", "r25", "r26", "r27", "r28", "r29", "r30", "pc" };
-
-/* initialize TCG globals.  */
-void uc32_translate_init(void)
-{
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
-                                offsetof(CPUUniCore32State, regs[i]), regnames[i]);
-    }
-}
-
-static int num_temps;
-
-/* Allocate a temporary variable.  */
-static TCGv_i32 new_tmp(void)
-{
-    num_temps++;
-    return tcg_temp_new_i32();
-}
-
-/* Release a temporary variable.  */
-static void dead_tmp(TCGv tmp)
-{
-    tcg_temp_free(tmp);
-    num_temps--;
-}
-
-static inline TCGv load_cpu_offset(int offset)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_ld_i32(tmp, cpu_env, offset);
-    return tmp;
-}
-
-#define load_cpu_field(name) load_cpu_offset(offsetof(CPUUniCore32State, name))
-
-static inline void store_cpu_offset(TCGv var, int offset)
-{
-    tcg_gen_st_i32(var, cpu_env, offset);
-    dead_tmp(var);
-}
-
-#define store_cpu_field(var, name) \
-    store_cpu_offset(var, offsetof(CPUUniCore32State, name))
-
-/* Set a variable to the value of a CPU register.  */
-static void load_reg_var(DisasContext *s, TCGv var, int reg)
-{
-    if (reg == 31) {
-        uint32_t addr;
-        /* normaly, since we updated PC */
-        addr = (long)s->pc;
-        tcg_gen_movi_i32(var, addr);
-    } else {
-        tcg_gen_mov_i32(var, cpu_R[reg]);
-    }
-}
-
-/* Create a new temporary and set it to the value of a CPU register.  */
-static inline TCGv load_reg(DisasContext *s, int reg)
-{
-    TCGv tmp = new_tmp();
-    load_reg_var(s, tmp, reg);
-    return tmp;
-}
-
-/* Set a CPU register.  The source must be a temporary and will be
-   marked as dead.  */
-static void store_reg(DisasContext *s, int reg, TCGv var)
-{
-    if (reg == 31) {
-        tcg_gen_andi_i32(var, var, ~3);
-        s->is_jmp = DISAS_JUMP;
-    }
-    tcg_gen_mov_i32(cpu_R[reg], var);
-    dead_tmp(var);
-}
-
-/* Value extensions.  */
-#define gen_uxtb(var)           tcg_gen_ext8u_i32(var, var)
-#define gen_uxth(var)           tcg_gen_ext16u_i32(var, var)
-#define gen_sxtb(var)           tcg_gen_ext8s_i32(var, var)
-#define gen_sxth(var)           tcg_gen_ext16s_i32(var, var)
-
-#define UCOP_REG_M              (((insn) >>  0) & 0x1f)
-#define UCOP_REG_N              (((insn) >> 19) & 0x1f)
-#define UCOP_REG_D              (((insn) >> 14) & 0x1f)
-#define UCOP_REG_S              (((insn) >>  9) & 0x1f)
-#define UCOP_REG_LO             (((insn) >> 14) & 0x1f)
-#define UCOP_REG_HI             (((insn) >>  9) & 0x1f)
-#define UCOP_SH_OP              (((insn) >>  6) & 0x03)
-#define UCOP_SH_IM              (((insn) >>  9) & 0x1f)
-#define UCOP_OPCODES            (((insn) >> 25) & 0x0f)
-#define UCOP_IMM_9              (((insn) >>  0) & 0x1ff)
-#define UCOP_IMM10              (((insn) >>  0) & 0x3ff)
-#define UCOP_IMM14              (((insn) >>  0) & 0x3fff)
-#define UCOP_COND               (((insn) >> 25) & 0x0f)
-#define UCOP_CMOV_COND          (((insn) >> 19) & 0x0f)
-#define UCOP_CPNUM              (((insn) >> 10) & 0x0f)
-#define UCOP_UCF64_FMT          (((insn) >> 24) & 0x03)
-#define UCOP_UCF64_FUNC         (((insn) >>  6) & 0x0f)
-#define UCOP_UCF64_COND         (((insn) >>  6) & 0x0f)
-
-#define UCOP_SET(i)             ((insn) & (1 << (i)))
-#define UCOP_SET_P              UCOP_SET(28)
-#define UCOP_SET_U              UCOP_SET(27)
-#define UCOP_SET_B              UCOP_SET(26)
-#define UCOP_SET_W              UCOP_SET(25)
-#define UCOP_SET_L              UCOP_SET(24)
-#define UCOP_SET_S              UCOP_SET(24)
-
-#define ILLEGAL         cpu_abort(env_cpu(env),                         \
-                        "Illegal UniCore32 instruction %x at line %d!", \
-                        insn, __LINE__)
-
-#ifndef CONFIG_USER_ONLY
-static void disas_cp0_insn(CPUUniCore32State *env, DisasContext *s,
-        uint32_t insn)
-{
-    TCGv tmp, tmp2, tmp3;
-    if ((insn & 0xfe000000) == 0xe0000000) {
-        tmp2 = new_tmp();
-        tmp3 = new_tmp();
-        tcg_gen_movi_i32(tmp2, UCOP_REG_N);
-        tcg_gen_movi_i32(tmp3, UCOP_IMM10);
-        if (UCOP_SET_L) {
-            tmp = new_tmp();
-            gen_helper_cp0_get(tmp, cpu_env, tmp2, tmp3);
-            store_reg(s, UCOP_REG_D, tmp);
-        } else {
-            tmp = load_reg(s, UCOP_REG_D);
-            gen_helper_cp0_set(cpu_env, tmp, tmp2, tmp3);
-            dead_tmp(tmp);
-        }
-        dead_tmp(tmp2);
-        dead_tmp(tmp3);
-        return;
-    }
-    ILLEGAL;
-}
-
-static void disas_ocd_insn(CPUUniCore32State *env, DisasContext *s,
-        uint32_t insn)
-{
-    TCGv tmp;
-
-    if ((insn & 0xff003fff) == 0xe1000400) {
-        /*
-         * movc rd, pp.nn, #imm9
-         *      rd: UCOP_REG_D
-         *      nn: UCOP_REG_N (must be 0)
-         *      imm9: 0
-         */
-        if (UCOP_REG_N == 0) {
-            tmp = new_tmp();
-            tcg_gen_movi_i32(tmp, 0);
-            store_reg(s, UCOP_REG_D, tmp);
-            return;
-        } else {
-            ILLEGAL;
-        }
-    }
-    if ((insn & 0xff003fff) == 0xe0000401) {
-        /*
-         * movc pp.nn, rn, #imm9
-         *      rn: UCOP_REG_D
-         *      nn: UCOP_REG_N (must be 1)
-         *      imm9: 1
-         */
-        if (UCOP_REG_N == 1) {
-            tmp = load_reg(s, UCOP_REG_D);
-            gen_helper_cp1_putc(tmp);
-            dead_tmp(tmp);
-            return;
-        } else {
-            ILLEGAL;
-        }
-    }
-    ILLEGAL;
-}
-#endif
-
-static inline void gen_set_asr(TCGv var, uint32_t mask)
-{
-    TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_asr_write(cpu_env, var, tmp_mask);
-    tcg_temp_free_i32(tmp_mask);
-}
-/* Set NZCV flags from the high 4 bits of var.  */
-#define gen_set_nzcv(var) gen_set_asr(var, ASR_NZCV)
-
-static void gen_exception(int excp)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(cpu_env, tmp);
-    dead_tmp(tmp);
-}
-
-#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUUniCore32State, CF))
-
-/* Set CF to the top bit of var.  */
-static void gen_set_CF_bit31(TCGv var)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(tmp);
-    dead_tmp(tmp);
-}
-
-/* Set N and Z flags from var.  */
-static inline void gen_logic_CC(TCGv var)
-{
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUUniCore32State, NF));
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUUniCore32State, ZF));
-}
-
-/* dest = T0 + T1 + CF. */
-static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
-{
-    TCGv tmp;
-    tcg_gen_add_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
-    dead_tmp(tmp);
-}
-
-/* dest = T0 - T1 + CF - 1.  */
-static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
-{
-    TCGv tmp;
-    tcg_gen_sub_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
-    tcg_gen_subi_i32(dest, dest, 1);
-    dead_tmp(tmp);
-}
-
-static void shifter_out_im(TCGv var, int shift)
-{
-    TCGv tmp = new_tmp();
-    if (shift == 0) {
-        tcg_gen_andi_i32(tmp, var, 1);
-    } else {
-        tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31) {
-            tcg_gen_andi_i32(tmp, tmp, 1);
-        }
-    }
-    gen_set_CF(tmp);
-    dead_tmp(tmp);
-}
-
-/* Shift by immediate.  Includes special handling for shift == 0.  */
-static inline void gen_uc32_shift_im(TCGv var, int shiftop, int shift,
-        int flags)
-{
-    switch (shiftop) {
-    case 0: /* LSL */
-        if (shift != 0) {
-            if (flags) {
-                shifter_out_im(var, 32 - shift);
-            }
-            tcg_gen_shli_i32(var, var, shift);
-        }
-        break;
-    case 1: /* LSR */
-        if (shift == 0) {
-            if (flags) {
-                tcg_gen_shri_i32(var, var, 31);
-                gen_set_CF(var);
-            }
-            tcg_gen_movi_i32(var, 0);
-        } else {
-            if (flags) {
-                shifter_out_im(var, shift - 1);
-            }
-            tcg_gen_shri_i32(var, var, shift);
-        }
-        break;
-    case 2: /* ASR */
-        if (shift == 0) {
-            shift = 32;
-        }
-        if (flags) {
-            shifter_out_im(var, shift - 1);
-        }
-        if (shift == 32) {
-            shift = 31;
-        }
-        tcg_gen_sari_i32(var, var, shift);
-        break;
-    case 3: /* ROR/RRX */
-        if (shift != 0) {
-            if (flags) {
-                shifter_out_im(var, shift - 1);
-            }
-            tcg_gen_rotri_i32(var, var, shift); break;
-        } else {
-            TCGv tmp = load_cpu_field(CF);
-            if (flags) {
-                shifter_out_im(var, 0);
-            }
-            tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, tmp, 31);
-            tcg_gen_or_i32(var, var, tmp);
-            dead_tmp(tmp);
-        }
-    }
-};
-
-static inline void gen_uc32_shift_reg(TCGv var, int shiftop,
-                                     TCGv shift, int flags)
-{
-    if (flags) {
-        switch (shiftop) {
-        case 0:
-            gen_helper_shl_cc(var, cpu_env, var, shift);
-            break;
-        case 1:
-            gen_helper_shr_cc(var, cpu_env, var, shift);
-            break;
-        case 2:
-            gen_helper_sar_cc(var, cpu_env, var, shift);
-            break;
-        case 3:
-            gen_helper_ror_cc(var, cpu_env, var, shift);
-            break;
-        }
-    } else {
-        switch (shiftop) {
-        case 0:
-            gen_helper_shl(var, var, shift);
-            break;
-        case 1:
-            gen_helper_shr(var, var, shift);
-            break;
-        case 2:
-            gen_helper_sar(var, var, shift);
-            break;
-        case 3:
-            tcg_gen_andi_i32(shift, shift, 0x1f);
-            tcg_gen_rotr_i32(var, var, shift);
-            break;
-        }
-    }
-    dead_tmp(shift);
-}
-
-static void gen_test_cc(int cc, TCGLabel *label)
-{
-    TCGv tmp;
-    TCGv tmp2;
-    TCGLabel *inv;
-
-    switch (cc) {
-    case 0: /* eq: Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        break;
-    case 1: /* ne: !Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
-        break;
-    case 2: /* cs: C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
-        break;
-    case 3: /* cc: !C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        break;
-    case 4: /* mi: N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        break;
-    case 5: /* pl: !N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        break;
-    case 6: /* vs: V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        break;
-    case 7: /* vc: !V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        break;
-    case 8: /* hi: C && !Z */
-        inv = gen_new_label();
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
-        gen_set_label(inv);
-        break;
-    case 9: /* ls: !C || Z */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        break;
-    case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        break;
-    case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        break;
-    case 12: /* gt: !Z && N == V */
-        inv = gen_new_label();
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        gen_set_label(inv);
-        break;
-    case 13: /* le: Z || N != V */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        dead_tmp(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        break;
-    default:
-        fprintf(stderr, "Bad condition code 0x%x\n", cc);
-        abort();
-    }
-    dead_tmp(tmp);
-}
-
-static const uint8_t table_logic_cc[16] = {
-    1, /* and */    1, /* xor */    0, /* sub */    0, /* rsb */
-    0, /* add */    0, /* adc */    0, /* sbc */    0, /* rsc */
-    1, /* andl */   1, /* xorl */   0, /* cmp */    0, /* cmn */
-    1, /* orr */    1, /* mov */    1, /* bic */    1, /* mvn */
-};
-
-/* Set PC state from an immediate address.  */
-static inline void gen_bx_im(DisasContext *s, uint32_t addr)
-{
-    s->is_jmp = DISAS_UPDATE;
-    tcg_gen_movi_i32(cpu_R[31], addr & ~3);
-}
-
-/* Set PC state from var.  var is marked as dead.  */
-static inline void gen_bx(DisasContext *s, TCGv var)
-{
-    s->is_jmp = DISAS_UPDATE;
-    tcg_gen_andi_i32(cpu_R[31], var, ~3);
-    dead_tmp(var);
-}
-
-static inline void store_reg_bx(DisasContext *s, int reg, TCGv var)
-{
-    store_reg(s, reg, var);
-}
-
-static inline TCGv gen_ld8s(TCGv addr, int index)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld8s(tmp, addr, index);
-    return tmp;
-}
-
-static inline TCGv gen_ld8u(TCGv addr, int index)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld8u(tmp, addr, index);
-    return tmp;
-}
-
-static inline TCGv gen_ld16s(TCGv addr, int index)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld16s(tmp, addr, index);
-    return tmp;
-}
-
-static inline TCGv gen_ld16u(TCGv addr, int index)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld16u(tmp, addr, index);
-    return tmp;
-}
-
-static inline TCGv gen_ld32(TCGv addr, int index)
-{
-    TCGv tmp = new_tmp();
-    tcg_gen_qemu_ld32u(tmp, addr, index);
-    return tmp;
-}
-
-static inline void gen_st8(TCGv val, TCGv addr, int index)
-{
-    tcg_gen_qemu_st8(val, addr, index);
-    dead_tmp(val);
-}
-
-static inline void gen_st16(TCGv val, TCGv addr, int index)
-{
-    tcg_gen_qemu_st16(val, addr, index);
-    dead_tmp(val);
-}
-
-static inline void gen_st32(TCGv val, TCGv addr, int index)
-{
-    tcg_gen_qemu_st32(val, addr, index);
-    dead_tmp(val);
-}
-
-static inline void gen_set_pc_im(uint32_t val)
-{
-    tcg_gen_movi_i32(cpu_R[31], val);
-}
-
-/* Force a TB lookup after an instruction that changes the CPU state.  */
-static inline void gen_lookup_tb(DisasContext *s)
-{
-    tcg_gen_movi_i32(cpu_R[31], s->pc & ~1);
-    s->is_jmp = DISAS_UPDATE;
-}
-
-static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
-        TCGv var)
-{
-    int val;
-    TCGv offset;
-
-    if (UCOP_SET(29)) {
-        /* immediate */
-        val = UCOP_IMM14;
-        if (!UCOP_SET_U) {
-            val = -val;
-        }
-        if (val != 0) {
-            tcg_gen_addi_i32(var, var, val);
-        }
-    } else {
-        /* shift/register */
-        offset = load_reg(s, UCOP_REG_M);
-        gen_uc32_shift_im(offset, UCOP_SH_OP, UCOP_SH_IM, 0);
-        if (!UCOP_SET_U) {
-            tcg_gen_sub_i32(var, var, offset);
-        } else {
-            tcg_gen_add_i32(var, var, offset);
-        }
-        dead_tmp(offset);
-    }
-}
-
-static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
-        TCGv var)
-{
-    int val;
-    TCGv offset;
-
-    if (UCOP_SET(26)) {
-        /* immediate */
-        val = (insn & 0x1f) | ((insn >> 4) & 0x3e0);
-        if (!UCOP_SET_U) {
-            val = -val;
-        }
-        if (val != 0) {
-            tcg_gen_addi_i32(var, var, val);
-        }
-    } else {
-        /* register */
-        offset = load_reg(s, UCOP_REG_M);
-        if (!UCOP_SET_U) {
-            tcg_gen_sub_i32(var, var, offset);
-        } else {
-            tcg_gen_add_i32(var, var, offset);
-        }
-        dead_tmp(offset);
-    }
-}
-
-static inline long ucf64_reg_offset(int reg)
-{
-    if (reg & 1) {
-        return offsetof(CPUUniCore32State, ucf64.regs[reg >> 1])
-          + offsetof(CPU_DoubleU, l.upper);
-    } else {
-        return offsetof(CPUUniCore32State, ucf64.regs[reg >> 1])
-          + offsetof(CPU_DoubleU, l.lower);
-    }
-}
-
-#define ucf64_gen_ld32(reg)      load_cpu_offset(ucf64_reg_offset(reg))
-#define ucf64_gen_st32(var, reg) store_cpu_offset(var, ucf64_reg_offset(reg))
-
-/* UniCore-F64 single load/store I_offset */
-static void do_ucf64_ldst_i(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    int offset;
-    TCGv tmp;
-    TCGv addr;
-
-    addr = load_reg(s, UCOP_REG_N);
-    if (!UCOP_SET_P && !UCOP_SET_W) {
-        ILLEGAL;
-    }
-
-    if (UCOP_SET_P) {
-        offset = UCOP_IMM10 << 2;
-        if (!UCOP_SET_U) {
-            offset = -offset;
-        }
-        if (offset != 0) {
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-    }
-
-    if (UCOP_SET_L) { /* load */
-        tmp = gen_ld32(addr, IS_USER(s));
-        ucf64_gen_st32(tmp, UCOP_REG_D);
-    } else { /* store */
-        tmp = ucf64_gen_ld32(UCOP_REG_D);
-        gen_st32(tmp, addr, IS_USER(s));
-    }
-
-    if (!UCOP_SET_P) {
-        offset = UCOP_IMM10 << 2;
-        if (!UCOP_SET_U) {
-            offset = -offset;
-        }
-        if (offset != 0) {
-            tcg_gen_addi_i32(addr, addr, offset);
-        }
-    }
-    if (UCOP_SET_W) {
-        store_reg(s, UCOP_REG_N, addr);
-    } else {
-        dead_tmp(addr);
-    }
-}
-
-/* UniCore-F64 load/store multiple words */
-static void do_ucf64_ldst_m(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    unsigned int i;
-    int j, n, freg;
-    TCGv tmp;
-    TCGv addr;
-
-    if (UCOP_REG_D != 0) {
-        ILLEGAL;
-    }
-    if (UCOP_REG_N == 31) {
-        ILLEGAL;
-    }
-    if ((insn << 24) == 0) {
-        ILLEGAL;
-    }
-
-    addr = load_reg(s, UCOP_REG_N);
-
-    n = 0;
-    for (i = 0; i < 8; i++) {
-        if (UCOP_SET(i)) {
-            n++;
-        }
-    }
-
-    if (UCOP_SET_U) {
-        if (UCOP_SET_P) { /* pre increment */
-            tcg_gen_addi_i32(addr, addr, 4);
-        } /* unnecessary to do anything when post increment */
-    } else {
-        if (UCOP_SET_P) { /* pre decrement */
-            tcg_gen_addi_i32(addr, addr, -(n * 4));
-        } else { /* post decrement */
-            if (n != 1) {
-                tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
-            }
-        }
-    }
-
-    freg = ((insn >> 8) & 3) << 3; /* freg should be 0, 8, 16, 24 */
-
-    for (i = 0, j = 0; i < 8; i++, freg++) {
-        if (!UCOP_SET(i)) {
-            continue;
-        }
-
-        if (UCOP_SET_L) { /* load */
-            tmp = gen_ld32(addr, IS_USER(s));
-            ucf64_gen_st32(tmp, freg);
-        } else { /* store */
-            tmp = ucf64_gen_ld32(freg);
-            gen_st32(tmp, addr, IS_USER(s));
-        }
-
-        j++;
-        /* unnecessary to add after the last transfer */
-        if (j != n) {
-            tcg_gen_addi_i32(addr, addr, 4);
-        }
-    }
-
-    if (UCOP_SET_W) { /* write back */
-        if (UCOP_SET_U) {
-            if (!UCOP_SET_P) { /* post increment */
-                tcg_gen_addi_i32(addr, addr, 4);
-            } /* unnecessary to do anything when pre increment */
-        } else {
-            if (UCOP_SET_P) {
-                /* pre decrement */
-                if (n != 1) {
-                    tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
-                }
-            } else {
-                /* post decrement */
-                tcg_gen_addi_i32(addr, addr, -(n * 4));
-            }
-        }
-        store_reg(s, UCOP_REG_N, addr);
-    } else {
-        dead_tmp(addr);
-    }
-}
-
-/* UniCore-F64 mrc/mcr */
-static void do_ucf64_trans(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    TCGv tmp;
-
-    if ((insn & 0xfe0003ff) == 0xe2000000) {
-        /* control register */
-        if ((UCOP_REG_N != UC32_UCF64_FPSCR) || (UCOP_REG_D == 31)) {
-            ILLEGAL;
-        }
-        if (UCOP_SET(24)) {
-            /* CFF */
-            tmp = new_tmp();
-            gen_helper_ucf64_get_fpscr(tmp, cpu_env);
-            store_reg(s, UCOP_REG_D, tmp);
-        } else {
-            /* CTF */
-            tmp = load_reg(s, UCOP_REG_D);
-            gen_helper_ucf64_set_fpscr(cpu_env, tmp);
-            dead_tmp(tmp);
-            gen_lookup_tb(s);
-        }
-        return;
-    }
-    if ((insn & 0xfe0003ff) == 0xe0000000) {
-        /* general register */
-        if (UCOP_REG_D == 31) {
-            ILLEGAL;
-        }
-        if (UCOP_SET(24)) { /* MFF */
-            tmp = ucf64_gen_ld32(UCOP_REG_N);
-            store_reg(s, UCOP_REG_D, tmp);
-        } else { /* MTF */
-            tmp = load_reg(s, UCOP_REG_D);
-            ucf64_gen_st32(tmp, UCOP_REG_N);
-        }
-        return;
-    }
-    if ((insn & 0xfb000000) == 0xe9000000) {
-        /* MFFC */
-        if (UCOP_REG_D != 31) {
-            ILLEGAL;
-        }
-        if (UCOP_UCF64_COND & 0x8) {
-            ILLEGAL;
-        }
-
-        tmp = new_tmp();
-        tcg_gen_movi_i32(tmp, UCOP_UCF64_COND);
-        if (UCOP_SET(26)) {
-            tcg_gen_ld_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_N));
-            tcg_gen_ld_i64(cpu_F1d, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_cmpd(cpu_F0d, cpu_F1d, tmp, cpu_env);
-        } else {
-            tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_N));
-            tcg_gen_ld_i32(cpu_F1s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_cmps(cpu_F0s, cpu_F1s, tmp, cpu_env);
-        }
-        dead_tmp(tmp);
-        return;
-    }
-    ILLEGAL;
-}
-
-/* UniCore-F64 convert instructions */
-static void do_ucf64_fcvt(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    if (UCOP_UCF64_FMT == 3) {
-        ILLEGAL;
-    }
-    if (UCOP_REG_N != 0) {
-        ILLEGAL;
-    }
-    switch (UCOP_UCF64_FUNC) {
-    case 0: /* cvt.s */
-        switch (UCOP_UCF64_FMT) {
-        case 1 /* d */:
-            tcg_gen_ld_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_df2sf(cpu_F0s, cpu_F0d, cpu_env);
-            tcg_gen_st_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-        case 2 /* w */:
-            tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_si2sf(cpu_F0s, cpu_F0s, cpu_env);
-            tcg_gen_st_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-        default /* s */:
-            ILLEGAL;
-            break;
-        }
-        break;
-    case 1: /* cvt.d */
-        switch (UCOP_UCF64_FMT) {
-        case 0 /* s */:
-            tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_sf2df(cpu_F0d, cpu_F0s, cpu_env);
-            tcg_gen_st_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-        case 2 /* w */:
-            tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_si2df(cpu_F0d, cpu_F0s, cpu_env);
-            tcg_gen_st_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-        default /* d */:
-            ILLEGAL;
-            break;
-        }
-        break;
-    case 4: /* cvt.w */
-        switch (UCOP_UCF64_FMT) {
-        case 0 /* s */:
-            tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_sf2si(cpu_F0s, cpu_F0s, cpu_env);
-            tcg_gen_st_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-        case 1 /* d */:
-            tcg_gen_ld_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-            gen_helper_ucf64_df2si(cpu_F0s, cpu_F0d, cpu_env);
-            tcg_gen_st_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_D));
-            break;
-    default /* w */:
-            ILLEGAL;
-            break;
-        }
-        break;
-    default:
-        ILLEGAL;
-    }
-}
-
-/* UniCore-F64 compare instructions */
-static void do_ucf64_fcmp(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    if (UCOP_SET(25)) {
-        ILLEGAL;
-    }
-    if (UCOP_REG_D != 0) {
-        ILLEGAL;
-    }
-
-    ILLEGAL; /* TODO */
-    if (UCOP_SET(24)) {
-        tcg_gen_ld_i64(cpu_F0d, cpu_env, ucf64_reg_offset(UCOP_REG_N));
-        tcg_gen_ld_i64(cpu_F1d, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-        /* gen_helper_ucf64_cmpd(cpu_F0d, cpu_F1d, cpu_env); */
-    } else {
-        tcg_gen_ld_i32(cpu_F0s, cpu_env, ucf64_reg_offset(UCOP_REG_N));
-        tcg_gen_ld_i32(cpu_F1s, cpu_env, ucf64_reg_offset(UCOP_REG_M));
-        /* gen_helper_ucf64_cmps(cpu_F0s, cpu_F1s, cpu_env); */
-    }
-}
-
-#define gen_helper_ucf64_movs(x, y)      do { } while (0)
-#define gen_helper_ucf64_movd(x, y)      do { } while (0)
-
-#define UCF64_OP1(name)    do {                           \
-        if (UCOP_REG_N != 0) {                            \
-            ILLEGAL;                                      \
-        }                                                 \
-        switch (UCOP_UCF64_FMT) {                         \
-        case 0 /* s */:                                   \
-            tcg_gen_ld_i32(cpu_F0s, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_M)); \
-            gen_helper_ucf64_##name##s(cpu_F0s, cpu_F0s); \
-            tcg_gen_st_i32(cpu_F0s, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_D)); \
-            break;                                        \
-        case 1 /* d */:                                   \
-            tcg_gen_ld_i64(cpu_F0d, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_M)); \
-            gen_helper_ucf64_##name##d(cpu_F0d, cpu_F0d); \
-            tcg_gen_st_i64(cpu_F0d, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_D)); \
-            break;                                        \
-        case 2 /* w */:                                   \
-            ILLEGAL;                                      \
-            break;                                        \
-        }                                                 \
-    } while (0)
-
-#define UCF64_OP2(name)    do {                           \
-        switch (UCOP_UCF64_FMT) {                         \
-        case 0 /* s */:                                   \
-            tcg_gen_ld_i32(cpu_F0s, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_N)); \
-            tcg_gen_ld_i32(cpu_F1s, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_M)); \
-            gen_helper_ucf64_##name##s(cpu_F0s,           \
-                           cpu_F0s, cpu_F1s, cpu_env);    \
-            tcg_gen_st_i32(cpu_F0s, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_D)); \
-            break;                                        \
-        case 1 /* d */:                                   \
-            tcg_gen_ld_i64(cpu_F0d, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_N)); \
-            tcg_gen_ld_i64(cpu_F1d, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_M)); \
-            gen_helper_ucf64_##name##d(cpu_F0d,           \
-                           cpu_F0d, cpu_F1d, cpu_env);    \
-            tcg_gen_st_i64(cpu_F0d, cpu_env,              \
-                           ucf64_reg_offset(UCOP_REG_D)); \
-            break;                                        \
-        case 2 /* w */:                                   \
-            ILLEGAL;                                      \
-            break;                                        \
-        }                                                 \
-    } while (0)
-
-/* UniCore-F64 data processing */
-static void do_ucf64_datap(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    if (UCOP_UCF64_FMT == 3) {
-        ILLEGAL;
-    }
-    switch (UCOP_UCF64_FUNC) {
-    case 0: /* add */
-        UCF64_OP2(add);
-        break;
-    case 1: /* sub */
-        UCF64_OP2(sub);
-        break;
-    case 2: /* mul */
-        UCF64_OP2(mul);
-        break;
-    case 4: /* div */
-        UCF64_OP2(div);
-        break;
-    case 5: /* abs */
-        UCF64_OP1(abs);
-        break;
-    case 6: /* mov */
-        UCF64_OP1(mov);
-        break;
-    case 7: /* neg */
-        UCF64_OP1(neg);
-        break;
-    default:
-        ILLEGAL;
-    }
-}
-
-/* Disassemble an F64 instruction */
-static void disas_ucf64_insn(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    if (!UCOP_SET(29)) {
-        if (UCOP_SET(26)) {
-            do_ucf64_ldst_m(env, s, insn);
-        } else {
-            do_ucf64_ldst_i(env, s, insn);
-        }
-    } else {
-        if (UCOP_SET(5)) {
-            switch ((insn >> 26) & 0x3) {
-            case 0:
-                do_ucf64_datap(env, s, insn);
-                break;
-            case 1:
-                ILLEGAL;
-                break;
-            case 2:
-                do_ucf64_fcvt(env, s, insn);
-                break;
-            case 3:
-                do_ucf64_fcmp(env, s, insn);
-                break;
-            }
-        } else {
-            do_ucf64_trans(env, s, insn);
-        }
-    }
-}
-
-static inline bool use_goto_tb(DisasContext *s, uint32_t dest)
-{
-#ifndef CONFIG_USER_ONLY
-    return (s->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
-{
-    if (use_goto_tb(s, dest)) {
-        tcg_gen_goto_tb(n);
-        gen_set_pc_im(dest);
-        tcg_gen_exit_tb(s->tb, n);
-    } else {
-        gen_set_pc_im(dest);
-        tcg_gen_exit_tb(NULL, 0);
-    }
-}
-
-static inline void gen_jmp(DisasContext *s, uint32_t dest)
-{
-    if (unlikely(s->singlestep_enabled)) {
-        /* An indirect jump so that we still trigger the debug exception.  */
-        gen_bx_im(s, dest);
-    } else {
-        gen_goto_tb(s, 0, dest);
-        s->is_jmp = DISAS_TB_JUMP;
-    }
-}
-
-/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
-static int gen_set_psr(DisasContext *s, uint32_t mask, int bsr, TCGv t0)
-{
-    TCGv tmp;
-    if (bsr) {
-        /* ??? This is also undefined in system mode.  */
-        if (IS_USER(s)) {
-            return 1;
-        }
-
-        tmp = load_cpu_field(bsr);
-        tcg_gen_andi_i32(tmp, tmp, ~mask);
-        tcg_gen_andi_i32(t0, t0, mask);
-        tcg_gen_or_i32(tmp, tmp, t0);
-        store_cpu_field(tmp, bsr);
-    } else {
-        gen_set_asr(t0, mask);
-    }
-    dead_tmp(t0);
-    gen_lookup_tb(s);
-    return 0;
-}
-
-/* Generate an old-style exception return. Marks pc as dead. */
-static void gen_exception_return(DisasContext *s, TCGv pc)
-{
-    TCGv tmp;
-    store_reg(s, 31, pc);
-    tmp = load_cpu_field(bsr);
-    gen_set_asr(tmp, 0xffffffff);
-    dead_tmp(tmp);
-    s->is_jmp = DISAS_UPDATE;
-}
-
-static void disas_coproc_insn(CPUUniCore32State *env, DisasContext *s,
-        uint32_t insn)
-{
-    switch (UCOP_CPNUM) {
-#ifndef CONFIG_USER_ONLY
-    case 0:
-        disas_cp0_insn(env, s, insn);
-        break;
-    case 1:
-        disas_ocd_insn(env, s, insn);
-        break;
-#endif
-    case 2:
-        disas_ucf64_insn(env, s, insn);
-        break;
-    default:
-        /* Unknown coprocessor. */
-        cpu_abort(env_cpu(env), "Unknown coprocessor!");
-    }
-}
-
-/* data processing instructions */
-static void do_datap(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    TCGv tmp;
-    TCGv tmp2;
-    int logic_cc;
-
-    if (UCOP_OPCODES == 0x0f || UCOP_OPCODES == 0x0d) {
-        if (UCOP_SET(23)) { /* CMOV instructions */
-            if ((UCOP_CMOV_COND == 0xe) || (UCOP_CMOV_COND == 0xf)) {
-                ILLEGAL;
-            }
-            /* if not always execute, we generate a conditional jump to
-               next instruction */
-            s->condlabel = gen_new_label();
-            gen_test_cc(UCOP_CMOV_COND ^ 1, s->condlabel);
-            s->condjmp = 1;
-        }
-    }
-
-    logic_cc = table_logic_cc[UCOP_OPCODES] & (UCOP_SET_S >> 24);
-
-    if (UCOP_SET(29)) {
-        unsigned int val;
-        /* immediate operand */
-        val = UCOP_IMM_9;
-        if (UCOP_SH_IM) {
-            val = (val >> UCOP_SH_IM) | (val << (32 - UCOP_SH_IM));
-        }
-        tmp2 = new_tmp();
-        tcg_gen_movi_i32(tmp2, val);
-        if (logic_cc && UCOP_SH_IM) {
-            gen_set_CF_bit31(tmp2);
-        }
-   } else {
-        /* register */
-        tmp2 = load_reg(s, UCOP_REG_M);
-        if (UCOP_SET(5)) {
-            tmp = load_reg(s, UCOP_REG_S);
-            gen_uc32_shift_reg(tmp2, UCOP_SH_OP, tmp, logic_cc);
-        } else {
-            gen_uc32_shift_im(tmp2, UCOP_SH_OP, UCOP_SH_IM, logic_cc);
-        }
-    }
-
-    if (UCOP_OPCODES != 0x0f && UCOP_OPCODES != 0x0d) {
-        tmp = load_reg(s, UCOP_REG_N);
-    } else {
-        tmp = NULL;
-    }
-
-    switch (UCOP_OPCODES) {
-    case 0x00:
-        tcg_gen_and_i32(tmp, tmp, tmp2);
-        if (logic_cc) {
-            gen_logic_CC(tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x01:
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        if (logic_cc) {
-            gen_logic_CC(tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x02:
-        if (UCOP_SET_S && UCOP_REG_D == 31) {
-            /* SUBS r31, ... is used for exception return.  */
-            if (IS_USER(s)) {
-                ILLEGAL;
-            }
-            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
-            gen_exception_return(s, tmp);
-        } else {
-            if (UCOP_SET_S) {
-                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
-            } else {
-                tcg_gen_sub_i32(tmp, tmp, tmp2);
-            }
-            store_reg_bx(s, UCOP_REG_D, tmp);
-        }
-        break;
-    case 0x03:
-        if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
-        } else {
-            tcg_gen_sub_i32(tmp, tmp2, tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x04:
-        if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
-        } else {
-            tcg_gen_add_i32(tmp, tmp, tmp2);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x05:
-        if (UCOP_SET_S) {
-            gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
-        } else {
-            gen_add_carry(tmp, tmp, tmp2);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x06:
-        if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
-        } else {
-            gen_sub_carry(tmp, tmp, tmp2);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x07:
-        if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
-        } else {
-            gen_sub_carry(tmp, tmp2, tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x08:
-        if (UCOP_SET_S) {
-            tcg_gen_and_i32(tmp, tmp, tmp2);
-            gen_logic_CC(tmp);
-        }
-        dead_tmp(tmp);
-        break;
-    case 0x09:
-        if (UCOP_SET_S) {
-            tcg_gen_xor_i32(tmp, tmp, tmp2);
-            gen_logic_CC(tmp);
-        }
-        dead_tmp(tmp);
-        break;
-    case 0x0a:
-        if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
-        }
-        dead_tmp(tmp);
-        break;
-    case 0x0b:
-        if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
-        }
-        dead_tmp(tmp);
-        break;
-    case 0x0c:
-        tcg_gen_or_i32(tmp, tmp, tmp2);
-        if (logic_cc) {
-            gen_logic_CC(tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    case 0x0d:
-        if (logic_cc && UCOP_REG_D == 31) {
-            /* MOVS r31, ... is used for exception return.  */
-            if (IS_USER(s)) {
-                ILLEGAL;
-            }
-            gen_exception_return(s, tmp2);
-        } else {
-            if (logic_cc) {
-                gen_logic_CC(tmp2);
-            }
-            store_reg_bx(s, UCOP_REG_D, tmp2);
-        }
-        break;
-    case 0x0e:
-        tcg_gen_andc_i32(tmp, tmp, tmp2);
-        if (logic_cc) {
-            gen_logic_CC(tmp);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp);
-        break;
-    default:
-    case 0x0f:
-        tcg_gen_not_i32(tmp2, tmp2);
-        if (logic_cc) {
-            gen_logic_CC(tmp2);
-        }
-        store_reg_bx(s, UCOP_REG_D, tmp2);
-        break;
-    }
-    if (UCOP_OPCODES != 0x0f && UCOP_OPCODES != 0x0d) {
-        dead_tmp(tmp2);
-    }
-}
-
-/* multiply */
-static void do_mult(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    TCGv tmp, tmp2, tmp3, tmp4;
-
-    if (UCOP_SET(27)) {
-        /* 64 bit mul */
-        tmp = load_reg(s, UCOP_REG_M);
-        tmp2 = load_reg(s, UCOP_REG_N);
-        if (UCOP_SET(26)) {
-            tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
-        } else {
-            tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
-        }
-        if (UCOP_SET(25)) { /* mult accumulate */
-            tmp3 = load_reg(s, UCOP_REG_LO);
-            tmp4 = load_reg(s, UCOP_REG_HI);
-            tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, tmp3, tmp4);
-            dead_tmp(tmp3);
-            dead_tmp(tmp4);
-        }
-        store_reg(s, UCOP_REG_LO, tmp);
-        store_reg(s, UCOP_REG_HI, tmp2);
-    } else {
-        /* 32 bit mul */
-        tmp = load_reg(s, UCOP_REG_M);
-        tmp2 = load_reg(s, UCOP_REG_N);
-        tcg_gen_mul_i32(tmp, tmp, tmp2);
-        dead_tmp(tmp2);
-        if (UCOP_SET(25)) {
-            /* Add */
-            tmp2 = load_reg(s, UCOP_REG_S);
-            tcg_gen_add_i32(tmp, tmp, tmp2);
-            dead_tmp(tmp2);
-        }
-        if (UCOP_SET_S) {
-            gen_logic_CC(tmp);
-        }
-        store_reg(s, UCOP_REG_D, tmp);
-    }
-}
-
-/* miscellaneous instructions */
-static void do_misc(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    unsigned int val;
-    TCGv tmp;
-
-    if ((insn & 0xffffffe0) == 0x10ffc120) {
-        /* Trivial implementation equivalent to bx.  */
-        tmp = load_reg(s, UCOP_REG_M);
-        gen_bx(s, tmp);
-        return;
-    }
-
-    if ((insn & 0xfbffc000) == 0x30ffc000) {
-        /* PSR = immediate */
-        val = UCOP_IMM_9;
-        if (UCOP_SH_IM) {
-            val = (val >> UCOP_SH_IM) | (val << (32 - UCOP_SH_IM));
-        }
-        tmp = new_tmp();
-        tcg_gen_movi_i32(tmp, val);
-        if (gen_set_psr(s, ~ASR_RESERVED, UCOP_SET_B, tmp)) {
-            ILLEGAL;
-        }
-        return;
-    }
-
-    if ((insn & 0xfbffffe0) == 0x12ffc020) {
-        /* PSR.flag = reg */
-        tmp = load_reg(s, UCOP_REG_M);
-        if (gen_set_psr(s, ASR_NZCV, UCOP_SET_B, tmp)) {
-            ILLEGAL;
-        }
-        return;
-    }
-
-    if ((insn & 0xfbffffe0) == 0x10ffc020) {
-        /* PSR = reg */
-        tmp = load_reg(s, UCOP_REG_M);
-        if (gen_set_psr(s, ~ASR_RESERVED, UCOP_SET_B, tmp)) {
-            ILLEGAL;
-        }
-        return;
-    }
-
-    if ((insn & 0xfbf83fff) == 0x10f80000) {
-        /* reg = PSR */
-        if (UCOP_SET_B) {
-            if (IS_USER(s)) {
-                ILLEGAL;
-            }
-            tmp = load_cpu_field(bsr);
-        } else {
-            tmp = new_tmp();
-            gen_helper_asr_read(tmp, cpu_env);
-        }
-        store_reg(s, UCOP_REG_D, tmp);
-        return;
-    }
-
-    if ((insn & 0xfbf83fe0) == 0x12f80120) {
-        /* clz */
-        tmp = load_reg(s, UCOP_REG_M);
-        if (UCOP_SET(26)) {
-            /* clo */
-            tcg_gen_not_i32(tmp, tmp);
-        }
-        tcg_gen_clzi_i32(tmp, tmp, 32);
-        store_reg(s, UCOP_REG_D, tmp);
-        return;
-    }
-
-    /* otherwise */
-    ILLEGAL;
-}
-
-/* load/store I_offset and R_offset */
-static void do_ldst_ir(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    unsigned int mmu_idx;
-    TCGv tmp;
-    TCGv tmp2;
-
-    tmp2 = load_reg(s, UCOP_REG_N);
-    mmu_idx = (IS_USER(s) || (!UCOP_SET_P && UCOP_SET_W));
-
-    /* immediate */
-    if (UCOP_SET_P) {
-        gen_add_data_offset(s, insn, tmp2);
-    }
-
-    if (UCOP_SET_L) {
-        /* load */
-        if (UCOP_SET_B) {
-            tmp = gen_ld8u(tmp2, mmu_idx);
-        } else {
-            tmp = gen_ld32(tmp2, mmu_idx);
-        }
-    } else {
-        /* store */
-        tmp = load_reg(s, UCOP_REG_D);
-        if (UCOP_SET_B) {
-            gen_st8(tmp, tmp2, mmu_idx);
-        } else {
-            gen_st32(tmp, tmp2, mmu_idx);
-        }
-    }
-    if (!UCOP_SET_P) {
-        gen_add_data_offset(s, insn, tmp2);
-        store_reg(s, UCOP_REG_N, tmp2);
-    } else if (UCOP_SET_W) {
-        store_reg(s, UCOP_REG_N, tmp2);
-    } else {
-        dead_tmp(tmp2);
-    }
-    if (UCOP_SET_L) {
-        /* Complete the load.  */
-        if (UCOP_REG_D == 31) {
-            gen_bx(s, tmp);
-        } else {
-            store_reg(s, UCOP_REG_D, tmp);
-        }
-    }
-}
-
-/* SWP instruction */
-static void do_swap(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    TCGv addr;
-    TCGv tmp;
-    TCGv tmp2;
-
-    if ((insn & 0xff003fe0) != 0x40000120) {
-        ILLEGAL;
-    }
-
-    /* ??? This is not really atomic.  However we know
-       we never have multiple CPUs running in parallel,
-       so it is good enough.  */
-    addr = load_reg(s, UCOP_REG_N);
-    tmp = load_reg(s, UCOP_REG_M);
-    if (UCOP_SET_B) {
-        tmp2 = gen_ld8u(addr, IS_USER(s));
-        gen_st8(tmp, addr, IS_USER(s));
-    } else {
-        tmp2 = gen_ld32(addr, IS_USER(s));
-        gen_st32(tmp, addr, IS_USER(s));
-    }
-    dead_tmp(addr);
-    store_reg(s, UCOP_REG_D, tmp2);
-}
-
-/* load/store hw/sb */
-static void do_ldst_hwsb(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    TCGv addr;
-    TCGv tmp;
-
-    if (UCOP_SH_OP == 0) {
-        do_swap(env, s, insn);
-        return;
-    }
-
-    addr = load_reg(s, UCOP_REG_N);
-    if (UCOP_SET_P) {
-        gen_add_datah_offset(s, insn, addr);
-    }
-
-    if (UCOP_SET_L) { /* load */
-        switch (UCOP_SH_OP) {
-        case 1:
-            tmp = gen_ld16u(addr, IS_USER(s));
-            break;
-        case 2:
-            tmp = gen_ld8s(addr, IS_USER(s));
-            break;
-        default: /* see do_swap */
-        case 3:
-            tmp = gen_ld16s(addr, IS_USER(s));
-            break;
-        }
-    } else { /* store */
-        if (UCOP_SH_OP != 1) {
-            ILLEGAL;
-        }
-        tmp = load_reg(s, UCOP_REG_D);
-        gen_st16(tmp, addr, IS_USER(s));
-    }
-    /* Perform base writeback before the loaded value to
-       ensure correct behavior with overlapping index registers. */
-    if (!UCOP_SET_P) {
-        gen_add_datah_offset(s, insn, addr);
-        store_reg(s, UCOP_REG_N, addr);
-    } else if (UCOP_SET_W) {
-        store_reg(s, UCOP_REG_N, addr);
-    } else {
-        dead_tmp(addr);
-    }
-    if (UCOP_SET_L) {
-        /* Complete the load.  */
-        store_reg(s, UCOP_REG_D, tmp);
-    }
-}
-
-/* load/store multiple words */
-static void do_ldst_m(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    unsigned int val, i, mmu_idx;
-    int j, n, reg, user, loaded_base;
-    TCGv tmp;
-    TCGv tmp2;
-    TCGv addr;
-    TCGv loaded_var;
-
-    if (UCOP_SET(7)) {
-        ILLEGAL;
-    }
-    /* XXX: store correct base if write back */
-    user = 0;
-    if (UCOP_SET_B) { /* S bit in instruction table */
-        if (IS_USER(s)) {
-            ILLEGAL; /* only usable in supervisor mode */
-        }
-        if (UCOP_SET(18) == 0) { /* pc reg */
-            user = 1;
-        }
-    }
-
-    mmu_idx = (IS_USER(s) || (!UCOP_SET_P && UCOP_SET_W));
-    addr = load_reg(s, UCOP_REG_N);
-
-    /* compute total size */
-    loaded_base = 0;
-    loaded_var = NULL;
-    n = 0;
-    for (i = 0; i < 6; i++) {
-        if (UCOP_SET(i)) {
-            n++;
-        }
-    }
-    for (i = 9; i < 19; i++) {
-        if (UCOP_SET(i)) {
-            n++;
-        }
-    }
-    /* XXX: test invalid n == 0 case ? */
-    if (UCOP_SET_U) {
-        if (UCOP_SET_P) {
-            /* pre increment */
-            tcg_gen_addi_i32(addr, addr, 4);
-        } else {
-            /* post increment */
-        }
-    } else {
-        if (UCOP_SET_P) {
-            /* pre decrement */
-            tcg_gen_addi_i32(addr, addr, -(n * 4));
-        } else {
-            /* post decrement */
-            if (n != 1) {
-                tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
-            }
-        }
-    }
-
-    j = 0;
-    reg = UCOP_SET(6) ? 16 : 0;
-    for (i = 0; i < 19; i++, reg++) {
-        if (i == 6) {
-            i = i + 3;
-        }
-        if (UCOP_SET(i)) {
-            if (UCOP_SET_L) { /* load */
-                tmp = gen_ld32(addr, mmu_idx);
-                if (reg == 31) {
-                    gen_bx(s, tmp);
-                } else if (user) {
-                    tmp2 = tcg_const_i32(reg);
-                    gen_helper_set_user_reg(cpu_env, tmp2, tmp);
-                    tcg_temp_free_i32(tmp2);
-                    dead_tmp(tmp);
-                } else if (reg == UCOP_REG_N) {
-                    loaded_var = tmp;
-                    loaded_base = 1;
-                } else {
-                    store_reg(s, reg, tmp);
-                }
-            } else { /* store */
-                if (reg == 31) {
-                    /* special case: r31 = PC + 4 */
-                    val = (long)s->pc;
-                    tmp = new_tmp();
-                    tcg_gen_movi_i32(tmp, val);
-                } else if (user) {
-                    tmp = new_tmp();
-                    tmp2 = tcg_const_i32(reg);
-                    gen_helper_get_user_reg(tmp, cpu_env, tmp2);
-                    tcg_temp_free_i32(tmp2);
-                } else {
-                    tmp = load_reg(s, reg);
-                }
-                gen_st32(tmp, addr, mmu_idx);
-            }
-            j++;
-            /* no need to add after the last transfer */
-            if (j != n) {
-                tcg_gen_addi_i32(addr, addr, 4);
-            }
-        }
-    }
-    if (UCOP_SET_W) { /* write back */
-        if (UCOP_SET_U) {
-            if (UCOP_SET_P) {
-                /* pre increment */
-            } else {
-                /* post increment */
-                tcg_gen_addi_i32(addr, addr, 4);
-            }
-        } else {
-            if (UCOP_SET_P) {
-                /* pre decrement */
-                if (n != 1) {
-                    tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
-                }
-            } else {
-                /* post decrement */
-                tcg_gen_addi_i32(addr, addr, -(n * 4));
-            }
-        }
-        store_reg(s, UCOP_REG_N, addr);
-    } else {
-        dead_tmp(addr);
-    }
-    if (loaded_base) {
-        store_reg(s, UCOP_REG_N, loaded_var);
-    }
-    if (UCOP_SET_B && !user) {
-        /* Restore ASR from BSR.  */
-        tmp = load_cpu_field(bsr);
-        gen_set_asr(tmp, 0xffffffff);
-        dead_tmp(tmp);
-        s->is_jmp = DISAS_UPDATE;
-    }
-}
-
-/* branch (and link) */
-static void do_branch(CPUUniCore32State *env, DisasContext *s, uint32_t insn)
-{
-    unsigned int val;
-    int32_t offset;
-    TCGv tmp;
-
-    if (UCOP_COND == 0xf) {
-        ILLEGAL;
-    }
-
-    if (UCOP_COND != 0xe) {
-        /* if not always execute, we generate a conditional jump to
-           next instruction */
-        s->condlabel = gen_new_label();
-        gen_test_cc(UCOP_COND ^ 1, s->condlabel);
-        s->condjmp = 1;
-    }
-
-    val = (int32_t)s->pc;
-    if (UCOP_SET_L) {
-        tmp = new_tmp();
-        tcg_gen_movi_i32(tmp, val);
-        store_reg(s, 30, tmp);
-    }
-    offset = (((int32_t)insn << 8) >> 8);
-    val += (offset << 2); /* unicore is pc+4 */
-    gen_jmp(s, val);
-}
-
-static void disas_uc32_insn(CPUUniCore32State *env, DisasContext *s)
-{
-    unsigned int insn;
-
-    insn = cpu_ldl_code(env, s->pc);
-    s->pc += 4;
-
-    /* UniCore instructions class:
-     * AAAB BBBC xxxx xxxx xxxx xxxD xxEx xxxx
-     * AAA  : see switch case
-     * BBBB : opcodes or cond or PUBW
-     * C    : S OR L
-     * D    : 8
-     * E    : 5
-     */
-    switch (insn >> 29) {
-    case 0x0:
-        if (UCOP_SET(5) && UCOP_SET(8) && !UCOP_SET(28)) {
-            do_mult(env, s, insn);
-            break;
-        }
-
-        if (UCOP_SET(8)) {
-            do_misc(env, s, insn);
-            break;
-        }
-        /* fallthrough */
-    case 0x1:
-        if (((UCOP_OPCODES >> 2) == 2) && !UCOP_SET_S) {
-            do_misc(env, s, insn);
-            break;
-        }
-        do_datap(env, s, insn);
-        break;
-
-    case 0x2:
-        if (UCOP_SET(8) && UCOP_SET(5)) {
-            do_ldst_hwsb(env, s, insn);
-            break;
-        }
-        if (UCOP_SET(8) || UCOP_SET(5)) {
-            ILLEGAL;
-        }
-        /* fallthrough */
-    case 0x3:
-        do_ldst_ir(env, s, insn);
-        break;
-
-    case 0x4:
-        if (UCOP_SET(8)) {
-            ILLEGAL; /* extended instructions */
-        }
-        do_ldst_m(env, s, insn);
-        break;
-    case 0x5:
-        do_branch(env, s, insn);
-        break;
-    case 0x6:
-        /* Coprocessor.  */
-        disas_coproc_insn(env, s, insn);
-        break;
-    case 0x7:
-        if (!UCOP_SET(28)) {
-            disas_coproc_insn(env, s, insn);
-            break;
-        }
-        if ((insn & 0xff000000) == 0xff000000) { /* syscall */
-            gen_set_pc_im(s->pc);
-            s->is_jmp = DISAS_SYSCALL;
-            break;
-        }
-        ILLEGAL;
-    }
-}
-
-/* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
-{
-    CPUUniCore32State *env = cs->env_ptr;
-    DisasContext dc1, *dc = &dc1;
-    target_ulong pc_start;
-    uint32_t page_start;
-    int num_insns;
-
-    /* generate intermediate code */
-    num_temps = 0;
-
-    pc_start = tb->pc;
-
-    dc->tb = tb;
-
-    dc->is_jmp = DISAS_NEXT;
-    dc->pc = pc_start;
-    dc->singlestep_enabled = cs->singlestep_enabled;
-    dc->condjmp = 0;
-    cpu_F0s = tcg_temp_new_i32();
-    cpu_F1s = tcg_temp_new_i32();
-    cpu_F0d = tcg_temp_new_i64();
-    cpu_F1d = tcg_temp_new_i64();
-    page_start = pc_start & TARGET_PAGE_MASK;
-    num_insns = 0;
-
-#ifndef CONFIG_USER_ONLY
-    if ((env->uncached_asr & ASR_M) == ASR_MODE_USER) {
-        dc->user = 1;
-    } else {
-        dc->user = 0;
-    }
-#endif
-
-    gen_tb_start(tb);
-    do {
-        tcg_gen_insn_start(dc->pc);
-        num_insns++;
-
-        if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
-            gen_set_pc_im(dc->pc);
-            gen_exception(EXCP_DEBUG);
-            dc->is_jmp = DISAS_JUMP;
-            /* The address covered by the breakpoint must be included in
-               [tb->pc, tb->pc + tb->size) in order to for it to be
-               properly cleared -- thus we increment the PC here so that
-               the logic setting tb->size below does the right thing.  */
-            dc->pc += 4;
-            goto done_generating;
-        }
-
-        if (num_insns == max_insns && (tb_cflags(tb) & CF_LAST_IO)) {
-            gen_io_start();
-        }
-
-        disas_uc32_insn(env, dc);
-
-        if (num_temps) {
-            fprintf(stderr, "Internal resource leak before %08x\n", dc->pc);
-            num_temps = 0;
-        }
-
-        if (dc->condjmp && !dc->is_jmp) {
-            gen_set_label(dc->condlabel);
-            dc->condjmp = 0;
-        }
-        /* Translation stops when a conditional branch is encountered.
-         * Otherwise the subsequent code could get translated several times.
-         * Also stop translation when a page boundary is reached.  This
-         * ensures prefetch aborts occur at the right place.  */
-    } while (!dc->is_jmp && !tcg_op_buf_full() &&
-             !cs->singlestep_enabled &&
-             !singlestep &&
-             dc->pc - page_start < TARGET_PAGE_SIZE &&
-             num_insns < max_insns);
-
-    if (tb_cflags(tb) & CF_LAST_IO) {
-        if (dc->condjmp) {
-            /* FIXME:  This can theoretically happen with self-modifying
-               code.  */
-            cpu_abort(cs, "IO on conditional branch instruction");
-        }
-    }
-
-    /* At this stage dc->condjmp will only be set when the skipped
-       instruction was a conditional branch or trap, and the PC has
-       already been written.  */
-    if (unlikely(cs->singlestep_enabled)) {
-        /* Make sure the pc is updated, and raise a debug exception.  */
-        if (dc->condjmp) {
-            if (dc->is_jmp == DISAS_SYSCALL) {
-                gen_exception(UC32_EXCP_PRIV);
-            } else {
-                gen_exception(EXCP_DEBUG);
-            }
-            gen_set_label(dc->condlabel);
-        }
-        if (dc->condjmp || !dc->is_jmp) {
-            gen_set_pc_im(dc->pc);
-            dc->condjmp = 0;
-        }
-        if (dc->is_jmp == DISAS_SYSCALL && !dc->condjmp) {
-            gen_exception(UC32_EXCP_PRIV);
-        } else {
-            gen_exception(EXCP_DEBUG);
-        }
-    } else {
-        /* While branches must always occur at the end of an IT block,
-           there are a few other things that can cause us to terminate
-           the TB in the middel of an IT block:
-            - Exception generating instructions (bkpt, swi, undefined).
-            - Page boundaries.
-            - Hardware watchpoints.
-           Hardware breakpoints have already been handled and skip this code.
-         */
-        switch (dc->is_jmp) {
-        case DISAS_NEXT:
-            gen_goto_tb(dc, 1, dc->pc);
-            break;
-        default:
-        case DISAS_JUMP:
-        case DISAS_UPDATE:
-            /* indicate that the hash table must be used to find the next TB */
-            tcg_gen_exit_tb(NULL, 0);
-            break;
-        case DISAS_TB_JUMP:
-            /* nothing more to generate */
-            break;
-        case DISAS_SYSCALL:
-            gen_exception(UC32_EXCP_PRIV);
-            break;
-        }
-        if (dc->condjmp) {
-            gen_set_label(dc->condlabel);
-            gen_goto_tb(dc, 1, dc->pc);
-            dc->condjmp = 0;
-        }
-    }
-
-done_generating:
-    gen_tb_end(tb, num_insns);
-
-#ifdef DEBUG_DISAS
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
-        && qemu_log_in_addr_range(pc_start)) {
-        FILE *logfile = qemu_log_lock();
-        qemu_log("----------------\n");
-        qemu_log("IN: %s\n", lookup_symbol(pc_start));
-        log_target_disas(cs, pc_start, dc->pc - pc_start);
-        qemu_log("\n");
-        qemu_log_unlock(logfile);
-    }
-#endif
-    tb->size = dc->pc - pc_start;
-    tb->icount = num_insns;
-}
-
-static const char *cpu_mode_names[16] = {
-    "USER", "REAL", "INTR", "PRIV", "UM14", "UM15", "UM16", "TRAP",
-    "UM18", "UM19", "UM1A", "EXTN", "UM1C", "UM1D", "UM1E", "SUSR"
-};
-
-#undef UCF64_DUMP_STATE
-#ifdef UCF64_DUMP_STATE
-static void cpu_dump_state_ucf64(CPUUniCore32State *env, int flags)
-{
-    int i;
-    union {
-        uint32_t i;
-        float s;
-    } s0, s1;
-    CPU_DoubleU d;
-    /* ??? This assumes float64 and double have the same layout.
-       Oh well, it's only debug dumps.  */
-    union {
-        float64 f64;
-        double d;
-    } d0;
-
-    for (i = 0; i < 16; i++) {
-        d.d = env->ucf64.regs[i];
-        s0.i = d.l.lower;
-        s1.i = d.l.upper;
-        d0.f64 = d.d;
-        qemu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g)",
-                     i * 2, (int)s0.i, s0.s,
-                     i * 2 + 1, (int)s1.i, s1.s);
-        qemu_fprintf(f, " d%02d=%" PRIx64 "(%8g)\n",
-                     i, (uint64_t)d0.f64, d0.d);
-    }
-    qemu_fprintf(f, "FPSCR: %08x\n", (int)env->ucf64.xregs[UC32_UCF64_FPSCR]);
-}
-#else
-#define cpu_dump_state_ucf64(env, file, pr, flags)      do { } while (0)
-#endif
-
-void uc32_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-{
-    UniCore32CPU *cpu = UNICORE32_CPU(cs);
-    CPUUniCore32State *env = &cpu->env;
-    int i;
-    uint32_t psr;
-
-    for (i = 0; i < 32; i++) {
-        qemu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
-        if ((i % 4) == 3) {
-            qemu_fprintf(f, "\n");
-        } else {
-            qemu_fprintf(f, " ");
-        }
-    }
-    psr = cpu_asr_read(env);
-    qemu_fprintf(f, "PSR=%08x %c%c%c%c %s\n",
-                 psr,
-                 psr & (1 << 31) ? 'N' : '-',
-                 psr & (1 << 30) ? 'Z' : '-',
-                 psr & (1 << 29) ? 'C' : '-',
-                 psr & (1 << 28) ? 'V' : '-',
-                 cpu_mode_names[psr & 0xf]);
-
-    if (flags & CPU_DUMP_FPU) {
-        cpu_dump_state_ucf64(env, f, cpu_fprintf, flags);
-    }
-}
-
-void restore_state_to_opc(CPUUniCore32State *env, TranslationBlock *tb,
-                          target_ulong *data)
-{
-    env->regs[31] = data[0];
-}
diff --git a/target/unicore32/ucf64_helper.c b/target/unicore32/ucf64_helper.c
deleted file mode 100644
index 12a91900f64..00000000000
--- a/target/unicore32/ucf64_helper.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * UniCore-F64 simulation helpers for QEMU.
- *
- * Copyright (C) 2010-2012 Guan Xuetao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation, or any later version.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "fpu/softfloat.h"
-
-/*
- * The convention used for UniCore-F64 instructions:
- *  Single precition routines have a "s" suffix
- *  Double precision routines have a "d" suffix.
- */
-
-/* Convert host exception flags to f64 form.  */
-static inline int ucf64_exceptbits_from_host(int host_bits)
-{
-    int target_bits = 0;
-
-    if (host_bits & float_flag_invalid) {
-        target_bits |= UCF64_FPSCR_FLAG_INVALID;
-    }
-    if (host_bits & float_flag_divbyzero) {
-        target_bits |= UCF64_FPSCR_FLAG_DIVZERO;
-    }
-    if (host_bits & float_flag_overflow) {
-        target_bits |= UCF64_FPSCR_FLAG_OVERFLOW;
-    }
-    if (host_bits & float_flag_underflow) {
-        target_bits |= UCF64_FPSCR_FLAG_UNDERFLOW;
-    }
-    if (host_bits & float_flag_inexact) {
-        target_bits |= UCF64_FPSCR_FLAG_INEXACT;
-    }
-    return target_bits;
-}
-
-uint32_t HELPER(ucf64_get_fpscr)(CPUUniCore32State *env)
-{
-    int i;
-    uint32_t fpscr;
-
-    fpscr = (env->ucf64.xregs[UC32_UCF64_FPSCR] & UCF64_FPSCR_MASK);
-    i = get_float_exception_flags(&env->ucf64.fp_status);
-    fpscr |= ucf64_exceptbits_from_host(i);
-    return fpscr;
-}
-
-/* Convert ucf64 exception flags to target form.  */
-static inline int ucf64_exceptbits_to_host(int target_bits)
-{
-    int host_bits = 0;
-
-    if (target_bits & UCF64_FPSCR_FLAG_INVALID) {
-        host_bits |= float_flag_invalid;
-    }
-    if (target_bits & UCF64_FPSCR_FLAG_DIVZERO) {
-        host_bits |= float_flag_divbyzero;
-    }
-    if (target_bits & UCF64_FPSCR_FLAG_OVERFLOW) {
-        host_bits |= float_flag_overflow;
-    }
-    if (target_bits & UCF64_FPSCR_FLAG_UNDERFLOW) {
-        host_bits |= float_flag_underflow;
-    }
-    if (target_bits & UCF64_FPSCR_FLAG_INEXACT) {
-        host_bits |= float_flag_inexact;
-    }
-    return host_bits;
-}
-
-void HELPER(ucf64_set_fpscr)(CPUUniCore32State *env, uint32_t val)
-{
-    UniCore32CPU *cpu = env_archcpu(env);
-    int i;
-    uint32_t changed;
-
-    changed = env->ucf64.xregs[UC32_UCF64_FPSCR];
-    env->ucf64.xregs[UC32_UCF64_FPSCR] = (val & UCF64_FPSCR_MASK);
-
-    changed ^= val;
-    if (changed & (UCF64_FPSCR_RND_MASK)) {
-        i = UCF64_FPSCR_RND(val);
-        switch (i) {
-        case 0:
-            i = float_round_nearest_even;
-            break;
-        case 1:
-            i = float_round_to_zero;
-            break;
-        case 2:
-            i = float_round_up;
-            break;
-        case 3:
-            i = float_round_down;
-            break;
-        default: /* 100 and 101 not implement */
-            cpu_abort(CPU(cpu), "Unsupported UniCore-F64 round mode");
-        }
-        set_float_rounding_mode(i, &env->ucf64.fp_status);
-    }
-
-    i = ucf64_exceptbits_to_host(UCF64_FPSCR_TRAPEN(val));
-    set_float_exception_flags(i, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_adds)(float32 a, float32 b, CPUUniCore32State *env)
-{
-    return float32_add(a, b, &env->ucf64.fp_status);
-}
-
-float64 HELPER(ucf64_addd)(float64 a, float64 b, CPUUniCore32State *env)
-{
-    return float64_add(a, b, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_subs)(float32 a, float32 b, CPUUniCore32State *env)
-{
-    return float32_sub(a, b, &env->ucf64.fp_status);
-}
-
-float64 HELPER(ucf64_subd)(float64 a, float64 b, CPUUniCore32State *env)
-{
-    return float64_sub(a, b, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_muls)(float32 a, float32 b, CPUUniCore32State *env)
-{
-    return float32_mul(a, b, &env->ucf64.fp_status);
-}
-
-float64 HELPER(ucf64_muld)(float64 a, float64 b, CPUUniCore32State *env)
-{
-    return float64_mul(a, b, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_divs)(float32 a, float32 b, CPUUniCore32State *env)
-{
-    return float32_div(a, b, &env->ucf64.fp_status);
-}
-
-float64 HELPER(ucf64_divd)(float64 a, float64 b, CPUUniCore32State *env)
-{
-    return float64_div(a, b, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_negs)(float32 a)
-{
-    return float32_chs(a);
-}
-
-float64 HELPER(ucf64_negd)(float64 a)
-{
-    return float64_chs(a);
-}
-
-float32 HELPER(ucf64_abss)(float32 a)
-{
-    return float32_abs(a);
-}
-
-float64 HELPER(ucf64_absd)(float64 a)
-{
-    return float64_abs(a);
-}
-
-void HELPER(ucf64_cmps)(float32 a, float32 b, uint32_t c,
-        CPUUniCore32State *env)
-{
-    FloatRelation flag = float32_compare_quiet(a, b, &env->ucf64.fp_status);
-    env->CF = 0;
-    switch (c & 0x7) {
-    case 0: /* F */
-        break;
-    case 1: /* UN */
-        if (flag == 2) {
-            env->CF = 1;
-        }
-        break;
-    case 2: /* EQ */
-        if (flag == 0) {
-            env->CF = 1;
-        }
-        break;
-    case 3: /* UEQ */
-        if ((flag == 0) || (flag == 2)) {
-            env->CF = 1;
-        }
-        break;
-    case 4: /* OLT */
-        if (flag == -1) {
-            env->CF = 1;
-        }
-        break;
-    case 5: /* ULT */
-        if ((flag == -1) || (flag == 2)) {
-            env->CF = 1;
-        }
-        break;
-    case 6: /* OLE */
-        if ((flag == -1) || (flag == 0)) {
-            env->CF = 1;
-        }
-        break;
-    case 7: /* ULE */
-        if (flag != 1) {
-            env->CF = 1;
-        }
-        break;
-    }
-    env->ucf64.xregs[UC32_UCF64_FPSCR] = (env->CF << 29)
-                    | (env->ucf64.xregs[UC32_UCF64_FPSCR] & 0x0fffffff);
-}
-
-void HELPER(ucf64_cmpd)(float64 a, float64 b, uint32_t c,
-        CPUUniCore32State *env)
-{
-    FloatRelation flag = float64_compare_quiet(a, b, &env->ucf64.fp_status);
-    env->CF = 0;
-    switch (c & 0x7) {
-    case 0: /* F */
-        break;
-    case 1: /* UN */
-        if (flag == 2) {
-            env->CF = 1;
-        }
-        break;
-    case 2: /* EQ */
-        if (flag == 0) {
-            env->CF = 1;
-        }
-        break;
-    case 3: /* UEQ */
-        if ((flag == 0) || (flag == 2)) {
-            env->CF = 1;
-        }
-        break;
-    case 4: /* OLT */
-        if (flag == -1) {
-            env->CF = 1;
-        }
-        break;
-    case 5: /* ULT */
-        if ((flag == -1) || (flag == 2)) {
-            env->CF = 1;
-        }
-        break;
-    case 6: /* OLE */
-        if ((flag == -1) || (flag == 0)) {
-            env->CF = 1;
-        }
-        break;
-    case 7: /* ULE */
-        if (flag != 1) {
-            env->CF = 1;
-        }
-        break;
-    }
-    env->ucf64.xregs[UC32_UCF64_FPSCR] = (env->CF << 29)
-                    | (env->ucf64.xregs[UC32_UCF64_FPSCR] & 0x0fffffff);
-}
-
-/* Helper routines to perform bitwise copies between float and int.  */
-static inline float32 ucf64_itos(uint32_t i)
-{
-    union {
-        uint32_t i;
-        float32 s;
-    } v;
-
-    v.i = i;
-    return v.s;
-}
-
-static inline uint32_t ucf64_stoi(float32 s)
-{
-    union {
-        uint32_t i;
-        float32 s;
-    } v;
-
-    v.s = s;
-    return v.i;
-}
-
-/* Integer to float conversion.  */
-float32 HELPER(ucf64_si2sf)(float32 x, CPUUniCore32State *env)
-{
-    return int32_to_float32(ucf64_stoi(x), &env->ucf64.fp_status);
-}
-
-float64 HELPER(ucf64_si2df)(float32 x, CPUUniCore32State *env)
-{
-    return int32_to_float64(ucf64_stoi(x), &env->ucf64.fp_status);
-}
-
-/* Float to integer conversion.  */
-float32 HELPER(ucf64_sf2si)(float32 x, CPUUniCore32State *env)
-{
-    return ucf64_itos(float32_to_int32(x, &env->ucf64.fp_status));
-}
-
-float32 HELPER(ucf64_df2si)(float64 x, CPUUniCore32State *env)
-{
-    return ucf64_itos(float64_to_int32(x, &env->ucf64.fp_status));
-}
-
-/* floating point conversion */
-float64 HELPER(ucf64_sf2df)(float32 x, CPUUniCore32State *env)
-{
-    return float32_to_float64(x, &env->ucf64.fp_status);
-}
-
-float32 HELPER(ucf64_df2sf)(float64 x, CPUUniCore32State *env)
-{
-    return float64_to_float32(x, &env->ucf64.fp_status);
-}
diff --git a/target/xtensa/Kconfig b/target/xtensa/Kconfig
new file mode 100644
index 00000000000..a3c8dc7f6d7
--- /dev/null
+++ b/target/xtensa/Kconfig
@@ -0,0 +1,2 @@
+config XTENSA
+    bool
diff --git a/target/xtensa/cores.list b/target/xtensa/cores.list
new file mode 100644
index 00000000000..5772a00ab2c
--- /dev/null
+++ b/target/xtensa/cores.list
@@ -0,0 +1,9 @@
+core-dc232b.c
+core-dc233c.c
+core-de212.c
+core-de233_fpu.c
+core-dsp3400.c
+core-fsf.c
+core-sample_controller.c
+core-test_kc705_be.c
+core-test_mmuhifi_c3.c
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index e2b2c7a71c1..224f7232369 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -79,7 +79,6 @@ static void xtensa_cpu_reset(DeviceState *dev)
 
     xcc->parent_reset(dev);
 
-    env->exception_taken = 0;
     env->pc = env->config->exception_vector[EXC_RESET0 + env->static_vectors];
     env->sregs[LITBASE] &= ~1;
 #ifndef CONFIG_USER_ONLY
@@ -176,20 +175,28 @@ static void xtensa_cpu_initfn(Object *obj)
 #endif
 }
 
+#ifndef CONFIG_USER_ONLY
 static const VMStateDescription vmstate_xtensa_cpu = {
     .name = "cpu",
     .unmigratable = 1,
 };
 
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps xtensa_sysemu_ops = {
+    .get_phys_page_debug = xtensa_cpu_get_phys_page_debug,
+};
+#endif
+
 #include "hw/core/tcg-cpu-ops.h"
 
-static struct TCGCPUOps xtensa_tcg_ops = {
+static const struct TCGCPUOps xtensa_tcg_ops = {
     .initialize = xtensa_translate_init,
-    .cpu_exec_interrupt = xtensa_cpu_exec_interrupt,
-    .tlb_fill = xtensa_cpu_tlb_fill,
     .debug_excp_handler = xtensa_breakpoint_handler,
 
 #ifndef CONFIG_USER_ONLY
+    .tlb_fill = xtensa_cpu_tlb_fill,
+    .cpu_exec_interrupt = xtensa_cpu_exec_interrupt,
     .do_interrupt = xtensa_cpu_do_interrupt,
     .do_transaction_failed = xtensa_cpu_do_transaction_failed,
     .do_unaligned_access = xtensa_cpu_do_unaligned_access,
@@ -215,10 +222,10 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_write_register = xtensa_cpu_gdb_write_register;
     cc->gdb_stop_before_watchpoint = true;
 #ifndef CONFIG_USER_ONLY
-    cc->get_phys_page_debug = xtensa_cpu_get_phys_page_debug;
+    cc->sysemu_ops = &xtensa_sysemu_ops;
+    dc->vmsd = &vmstate_xtensa_cpu;
 #endif
     cc->disas_set_info = xtensa_cpu_disas_set_info;
-    dc->vmsd = &vmstate_xtensa_cpu;
     cc->tcg_ops = &xtensa_tcg_ops;
 }
 
diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
index 3bd4f691c1a..02143f2f776 100644
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -540,7 +540,6 @@ typedef struct CPUXtensaState {
     uint32_t ccount_base;
 #endif
 
-    int exception_taken;
     int yield_needed;
     unsigned static_vectors;
 
@@ -564,6 +563,7 @@ struct XtensaCPU {
 };
 
 
+#ifndef CONFIG_USER_ONLY
 bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                          MMUAccessType access_type, int mmu_idx,
                          bool probe, uintptr_t retaddr);
@@ -573,6 +573,7 @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr,
                                       unsigned size, MMUAccessType access_type,
                                       int mmu_idx, MemTxAttrs attrs,
                                       MemTxResult response, uintptr_t retaddr);
+#endif
 void xtensa_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
 hwaddr xtensa_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 void xtensa_count_regs(const XtensaConfig *config,
@@ -580,10 +581,9 @@ void xtensa_count_regs(const XtensaConfig *config,
 int xtensa_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
 int xtensa_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void xtensa_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
-                                    MMUAccessType access_type,
-                                    int mmu_idx, uintptr_t retaddr);
+                                    MMUAccessType access_type, int mmu_idx,
+                                    uintptr_t retaddr) QEMU_NORETURN;
 
-#define cpu_signal_handler cpu_xtensa_signal_handler
 #define cpu_list xtensa_cpu_list
 
 #define XTENSA_CPU_TYPE_SUFFIX "-" TYPE_XTENSA_CPU
@@ -612,7 +612,6 @@ void check_interrupts(CPUXtensaState *s);
 void xtensa_irq_init(CPUXtensaState *env);
 qemu_irq *xtensa_get_extints(CPUXtensaState *env);
 qemu_irq xtensa_get_runstall(CPUXtensaState *env);
-int cpu_xtensa_signal_handler(int host_signum, void *pinfo, void *puc);
 void xtensa_cpu_list(void);
 void xtensa_sync_window_from_phys(CPUXtensaState *env);
 void xtensa_sync_phys_from_window(CPUXtensaState *env);
@@ -711,7 +710,6 @@ static inline int cpu_mmu_index(CPUXtensaState *env, bool ifetch)
 #define XTENSA_TBFLAG_ICOUNT 0x20
 #define XTENSA_TBFLAG_CPENABLE_MASK 0x3fc0
 #define XTENSA_TBFLAG_CPENABLE_SHIFT 6
-#define XTENSA_TBFLAG_EXCEPTION 0x4000
 #define XTENSA_TBFLAG_WINDOW_MASK 0x18000
 #define XTENSA_TBFLAG_WINDOW_SHIFT 15
 #define XTENSA_TBFLAG_YIELD 0x20000
@@ -732,8 +730,6 @@ typedef XtensaCPU ArchCPU;
 static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
         target_ulong *cs_base, uint32_t *flags)
 {
-    CPUState *cs = env_cpu(env);
-
     *pc = env->pc;
     *cs_base = 0;
     *flags = 0;
@@ -782,9 +778,6 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) {
         *flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT;
     }
-    if (cs->singlestep_enabled && env->exception_taken) {
-        *flags |= XTENSA_TBFLAG_EXCEPTION;
-    }
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_WINDOWED_REGISTER) &&
         (env->sregs[PS] & (PS_WOE | PS_EXCM)) == PS_WOE) {
         uint32_t windowstart = xtensa_replicate_windowstart(env) >>
diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c
index 2f032bc0533..9bc7f50d355 100644
--- a/target/xtensa/exc_helper.c
+++ b/target/xtensa/exc_helper.c
@@ -40,9 +40,6 @@ void HELPER(exception)(CPUXtensaState *env, uint32_t excp)
     if (excp == EXCP_YIELD) {
         env->yield_needed = 0;
     }
-    if (excp == EXCP_DEBUG) {
-        env->exception_taken = 0;
-    }
     cpu_loop_exit(cs);
 }
 
@@ -197,7 +194,6 @@ static void handle_interrupt(CPUXtensaState *env)
             }
             env->sregs[PS] |= PS_EXCM;
         }
-        env->exception_taken = 1;
     }
 }
 
@@ -242,7 +238,6 @@ void xtensa_cpu_do_interrupt(CPUState *cs)
 
             vector = env->config->exception_vector[cs->exception_index];
             env->pc = relocated_vector(env, vector);
-            env->exception_taken = 1;
         } else {
             qemu_log_mask(CPU_LOG_INT,
                           "%s(pc = %08x) bad exception_index: %d\n",
@@ -260,11 +255,6 @@ void xtensa_cpu_do_interrupt(CPUState *cs)
     }
     check_interrupts(env);
 }
-#else
-void xtensa_cpu_do_interrupt(CPUState *cs)
-{
-}
-#endif
 
 bool xtensa_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
@@ -275,3 +265,5 @@ bool xtensa_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     }
     return false;
 }
+
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c
index eeffee297d1..29d216ec1b2 100644
--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -242,27 +242,7 @@ void xtensa_cpu_list(void)
     }
 }
 
-#ifdef CONFIG_USER_ONLY
-
-bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
-                         MMUAccessType access_type, int mmu_idx,
-                         bool probe, uintptr_t retaddr)
-{
-    XtensaCPU *cpu = XTENSA_CPU(cs);
-    CPUXtensaState *env = &cpu->env;
-
-    qemu_log_mask(CPU_LOG_INT,
-                  "%s: rw = %d, address = 0x%08" VADDR_PRIx ", size = %d\n",
-                  __func__, access_type, address, size);
-    env->sregs[EXCVADDR] = address;
-    env->sregs[EXCCAUSE] = (access_type == MMU_DATA_STORE ?
-                            STORE_PROHIBITED_CAUSE : LOAD_PROHIBITED_CAUSE);
-    cs->exception_index = EXC_USER;
-    cpu_loop_exit_restore(cs, retaddr);
-}
-
-#else /* !CONFIG_USER_ONLY */
-
+#ifndef CONFIG_USER_ONLY
 void xtensa_cpu_do_unaligned_access(CPUState *cs,
                                     vaddr addr, MMUAccessType access_type,
                                     int mmu_idx, uintptr_t retaddr)
@@ -270,13 +250,12 @@ void xtensa_cpu_do_unaligned_access(CPUState *cs,
     XtensaCPU *cpu = XTENSA_CPU(cs);
     CPUXtensaState *env = &cpu->env;
 
-    if (xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION) &&
-        !xtensa_option_enabled(env->config, XTENSA_OPTION_HW_ALIGNMENT)) {
-        cpu_restore_state(CPU(cpu), retaddr, true);
-        HELPER(exception_cause_vaddr)(env,
-                                      env->pc, LOAD_STORE_ALIGNMENT_CAUSE,
-                                      addr);
-    }
+    assert(xtensa_option_enabled(env->config,
+                                 XTENSA_OPTION_UNALIGNED_EXCEPTION));
+    cpu_restore_state(CPU(cpu), retaddr, true);
+    HELPER(exception_cause_vaddr)(env,
+                                  env->pc, LOAD_STORE_ALIGNMENT_CAUSE,
+                                  addr);
 }
 
 bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
diff --git a/target/xtensa/import_core.sh b/target/xtensa/import_core.sh
index 396b264be93..df66d09393a 100755
--- a/target/xtensa/import_core.sh
+++ b/target/xtensa/import_core.sh
@@ -66,3 +66,6 @@ static XtensaConfig $NAME __attribute__((unused)) = {
 
 REGISTER_CORE($NAME)
 EOF
+
+grep -qxf core-${NAME}.c "$BASE"/cores.list || \
+    echo core-${NAME}.c >> "$BASE"/cores.list
diff --git a/target/xtensa/meson.build b/target/xtensa/meson.build
index 7c4efa6c629..20bbf9b335a 100644
--- a/target/xtensa/meson.build
+++ b/target/xtensa/meson.build
@@ -1,7 +1,7 @@
 xtensa_ss = ss.source_set()
 
-xtensa_cores = run_command('sh', '-c', 'cd $MESON_SOURCE_ROOT/$MESON_SUBDIR ; ls -1 core-*.c')
-xtensa_ss.add(files(xtensa_cores.stdout().strip().split('\n')))
+xtensa_cores = fs.read('cores.list')
+xtensa_ss.add(files(xtensa_cores.strip().split('\n')))
 
 xtensa_ss.add(files(
   'cpu.c',
diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
index b01ff9399ae..57e319a1af3 100644
--- a/target/xtensa/mmu_helper.c
+++ b/target/xtensa/mmu_helper.c
@@ -1098,7 +1098,7 @@ static void dump_tlb(CPUXtensaState *env, bool dtlb)
                     qemu_printf("\tVaddr       Paddr       ASID  Attr RWX Cache\n"
                                 "\t----------  ----------  ----  ---- --- -------\n");
                 }
-                qemu_printf("\t0x%08x  0x%08x  0x%02x  0x%02x %c%c%c %-7s\n",
+                qemu_printf("\t0x%08x  0x%08x  0x%02x  0x%02x %c%c%c %s\n",
                             entry->vaddr,
                             entry->paddr,
                             entry->asid,
diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c
index 143476849fb..d85d3516d6a 100644
--- a/target/xtensa/op_helper.c
+++ b/target/xtensa/op_helper.c
@@ -32,7 +32,6 @@
 #include "qemu/host-utils.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
-#include "exec/address-spaces.h"
 #include "qemu/timer.h"
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index 0ae4efc48a1..09430c1bf99 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -43,7 +43,6 @@
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
 
-#include "trace-tcg.h"
 #include "exec/log.h"
 
 
@@ -339,16 +338,6 @@ static void gen_exception_cause(DisasContext *dc, uint32_t cause)
     }
 }
 
-static void gen_exception_cause_vaddr(DisasContext *dc, uint32_t cause,
-        TCGv_i32 vaddr)
-{
-    TCGv_i32 tpc = tcg_const_i32(dc->pc);
-    TCGv_i32 tcause = tcg_const_i32(cause);
-    gen_helper_exception_cause_vaddr(cpu_env, tpc, tcause, vaddr);
-    tcg_temp_free(tpc);
-    tcg_temp_free(tcause);
-}
-
 static void gen_debug_exception(DisasContext *dc, uint32_t cause)
 {
     TCGv_i32 tpc = tcg_const_i32(dc->pc);
@@ -393,18 +382,14 @@ static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot)
     if (dc->icount) {
         tcg_gen_mov_i32(cpu_SR[ICOUNT], dc->next_icount);
     }
-    if (dc->base.singlestep_enabled) {
-        gen_exception(dc, EXCP_DEBUG);
+    if (dc->op_flags & XTENSA_OP_POSTPROCESS) {
+        slot = gen_postprocess(dc, slot);
+    }
+    if (slot >= 0) {
+        tcg_gen_goto_tb(slot);
+        tcg_gen_exit_tb(dc->base.tb, slot);
     } else {
-        if (dc->op_flags & XTENSA_OP_POSTPROCESS) {
-            slot = gen_postprocess(dc, slot);
-        }
-        if (slot >= 0) {
-            tcg_gen_goto_tb(slot);
-            tcg_gen_exit_tb(dc->base.tb, slot);
-        } else {
-            tcg_gen_exit_tb(NULL, 0);
-        }
+        tcg_gen_exit_tb(NULL, 0);
     }
     dc->base.is_jmp = DISAS_NORETURN;
 }
@@ -416,11 +401,7 @@ static void gen_jump(DisasContext *dc, TCGv dest)
 
 static int adjust_jump_slot(DisasContext *dc, uint32_t dest, int slot)
 {
-    if (((dc->base.pc_first ^ dest) & TARGET_PAGE_MASK) != 0) {
-        return -1;
-    } else {
-        return slot;
-    }
+    return translator_use_goto_tb(&dc->base, dest) ? slot : -1;
 }
 
 static void gen_jumpi(DisasContext *dc, uint32_t dest, int slot)
@@ -554,21 +535,20 @@ static uint32_t test_exceptions_hpi(DisasContext *dc, const OpcodeArg arg[],
     return test_exceptions_sr(dc, arg, par);
 }
 
-static void gen_load_store_alignment(DisasContext *dc, int shift,
-        TCGv_i32 addr, bool no_hw_alignment)
+static MemOp gen_load_store_alignment(DisasContext *dc, MemOp mop,
+                                      TCGv_i32 addr)
 {
+    if ((mop & MO_SIZE) == MO_8) {
+        return mop;
+    }
+    if ((mop & MO_AMASK) == MO_UNALN &&
+        !option_enabled(dc, XTENSA_OPTION_HW_ALIGNMENT)) {
+        mop |= MO_ALIGN;
+    }
     if (!option_enabled(dc, XTENSA_OPTION_UNALIGNED_EXCEPTION)) {
-        tcg_gen_andi_i32(addr, addr, ~0 << shift);
-    } else if (option_enabled(dc, XTENSA_OPTION_HW_ALIGNMENT) &&
-            no_hw_alignment) {
-        TCGLabel *label = gen_new_label();
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_andi_i32(tmp, addr, ~(~0 << shift));
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        gen_exception_cause_vaddr(dc, LOAD_STORE_ALIGNMENT_CAUSE, addr);
-        gen_set_label(label);
-        tcg_temp_free(tmp);
+        tcg_gen_andi_i32(addr, addr, ~0 << get_alignment_bits(mop));
     }
+    return mop;
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -898,7 +878,8 @@ static int arg_copy_compare(const void *a, const void *b)
 static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
 {
     xtensa_isa isa = dc->config->isa;
-    unsigned char b[MAX_INSN_LENGTH] = {translator_ldub(env, dc->pc)};
+    unsigned char b[MAX_INSN_LENGTH] = {translator_ldub(env, &dc->base,
+                                                        dc->pc)};
     unsigned len = xtensa_op0_insn_len(dc, b[0]);
     xtensa_format fmt;
     int slot, slots;
@@ -917,12 +898,13 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc)
                       "unknown instruction length (pc = %08x)\n",
                       dc->pc);
         gen_exception_cause(dc, ILLEGAL_INSTRUCTION_CAUSE);
+        dc->base.pc_next = dc->pc + 1;
         return;
     }
 
     dc->base.pc_next = dc->pc + len;
     for (i = 1; i < len; ++i) {
-        b[i] = translator_ldub(env, dc->pc + i);
+        b[i] = translator_ldub(env, &dc->base, dc->pc + i);
     }
     xtensa_insnbuf_from_chars(isa, dc->insnbuf, b, len);
     fmt = xtensa_format_decode(isa, dc->insnbuf);
@@ -1247,22 +1229,6 @@ static void xtensa_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
     tcg_gen_insn_start(dcbase->pc_next);
 }
 
-static bool xtensa_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
-                                       const CPUBreakpoint *bp)
-{
-    DisasContext *dc = container_of(dcbase, DisasContext, base);
-
-    tcg_gen_movi_i32(cpu_pc, dc->base.pc_next);
-    gen_exception(dc, EXCP_DEBUG);
-    dc->base.is_jmp = DISAS_NORETURN;
-    /* The address covered by the breakpoint must be included in
-       [tb->pc, tb->pc + tb->size) in order to for it to be
-       properly cleared -- thus we increment the PC here so that
-       the logic setting tb->size below does the right thing.  */
-    dc->base.pc_next += 2;
-    return true;
-}
-
 static void xtensa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
@@ -1274,11 +1240,7 @@ static void xtensa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
         && (dc->base.tb->flags & XTENSA_TBFLAG_YIELD)) {
         gen_exception(dc, EXCP_YIELD);
-        dc->base.is_jmp = DISAS_NORETURN;
-        return;
-    }
-    if (dc->base.tb->flags & XTENSA_TBFLAG_EXCEPTION) {
-        gen_exception(dc, EXCP_DEBUG);
+        dc->base.pc_next = dc->pc + 1;
         dc->base.is_jmp = DISAS_NORETURN;
         return;
     }
@@ -1327,12 +1289,7 @@ static void xtensa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
     case DISAS_NORETURN:
         break;
     case DISAS_TOO_MANY:
-        if (dc->base.singlestep_enabled) {
-            tcg_gen_movi_i32(cpu_pc, dc->pc);
-            gen_exception(dc, EXCP_DEBUG);
-        } else {
-            gen_jumpi(dc, dc->pc, 0);
-        }
+        gen_jumpi(dc, dc->pc, 0);
         break;
     default:
         g_assert_not_reached();
@@ -1349,7 +1306,6 @@ static const TranslatorOps xtensa_translator_ops = {
     .init_disas_context = xtensa_tr_init_disas_context,
     .tb_start           = xtensa_tr_tb_start,
     .insn_start         = xtensa_tr_insn_start,
-    .breakpoint_check   = xtensa_tr_breakpoint_check,
     .translate_insn     = xtensa_tr_translate_insn,
     .tb_stop            = xtensa_tr_tb_stop,
     .disas_log          = xtensa_tr_disas_log,
@@ -1784,10 +1740,11 @@ static void translate_l32e(DisasContext *dc, const OpcodeArg arg[],
                            const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_addi_i32(addr, arg[1].in, arg[2].imm);
-    gen_load_store_alignment(dc, 2, addr, false);
-    tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->ring, MO_TEUL);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
+    tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->ring, mop);
     tcg_temp_free(addr);
 }
 
@@ -1813,11 +1770,12 @@ static void translate_l32ex(DisasContext *dc, const OpcodeArg arg[],
                             const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_mov_i32(addr, arg[1].in);
-    gen_load_store_alignment(dc, 2, addr, true);
+    mop = gen_load_store_alignment(dc, MO_TEUL | MO_ALIGN, addr);
     gen_check_exclusive(dc, addr, false);
-    tcg_gen_qemu_ld_i32(arg[0].out, addr, dc->ring, MO_TEUL);
+    tcg_gen_qemu_ld_i32(arg[0].out, addr, dc->cring, mop);
     tcg_gen_mov_i32(cpu_exclusive_addr, addr);
     tcg_gen_mov_i32(cpu_exclusive_val, arg[0].out);
     tcg_temp_free(addr);
@@ -1827,18 +1785,18 @@ static void translate_ldst(DisasContext *dc, const OpcodeArg arg[],
                            const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_addi_i32(addr, arg[1].in, arg[2].imm);
-    if (par[0] & MO_SIZE) {
-        gen_load_store_alignment(dc, par[0] & MO_SIZE, addr, par[1]);
-    }
+    mop = gen_load_store_alignment(dc, par[0], addr);
+
     if (par[2]) {
         if (par[1]) {
             tcg_gen_mb(TCG_BAR_STRL | TCG_MO_ALL);
         }
-        tcg_gen_qemu_st_tl(arg[0].in, addr, dc->cring, par[0]);
+        tcg_gen_qemu_st_tl(arg[0].in, addr, dc->cring, mop);
     } else {
-        tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->cring, par[0]);
+        tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->cring, mop);
         if (par[1]) {
             tcg_gen_mb(TCG_BAR_LDAQ | TCG_MO_ALL);
         }
@@ -1909,9 +1867,11 @@ static void translate_mac16(DisasContext *dc, const OpcodeArg arg[],
     TCGv_i32 mem32 = tcg_temp_new_i32();
 
     if (ld_offset) {
+        MemOp mop;
+
         tcg_gen_addi_i32(vaddr, arg[1].in, ld_offset);
-        gen_load_store_alignment(dc, 2, vaddr, false);
-        tcg_gen_qemu_ld32u(mem32, vaddr, dc->cring);
+        mop = gen_load_store_alignment(dc, MO_TEUL, vaddr);
+        tcg_gen_qemu_ld_tl(mem32, vaddr, dc->cring, mop);
     }
     if (op != MAC16_NONE) {
         TCGv_i32 m1 = gen_mac16_m(arg[off].in,
@@ -2357,13 +2317,14 @@ static void translate_s32c1i(DisasContext *dc, const OpcodeArg arg[],
 {
     TCGv_i32 tmp = tcg_temp_local_new_i32();
     TCGv_i32 addr = tcg_temp_local_new_i32();
+    MemOp mop;
 
     tcg_gen_mov_i32(tmp, arg[0].in);
     tcg_gen_addi_i32(addr, arg[1].in, arg[2].imm);
-    gen_load_store_alignment(dc, 2, addr, true);
+    mop = gen_load_store_alignment(dc, MO_TEUL | MO_ALIGN, addr);
     gen_check_atomctl(dc, addr);
     tcg_gen_atomic_cmpxchg_i32(arg[0].out, addr, cpu_SR[SCOMPARE1],
-                               tmp, dc->cring, MO_TEUL);
+                               tmp, dc->cring, mop);
     tcg_temp_free(addr);
     tcg_temp_free(tmp);
 }
@@ -2372,10 +2333,11 @@ static void translate_s32e(DisasContext *dc, const OpcodeArg arg[],
                            const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_addi_i32(addr, arg[1].in, arg[2].imm);
-    gen_load_store_alignment(dc, 2, addr, false);
-    tcg_gen_qemu_st_tl(arg[0].in, addr, dc->ring, MO_TEUL);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
+    tcg_gen_qemu_st_tl(arg[0].in, addr, dc->ring, mop);
     tcg_temp_free(addr);
 }
 
@@ -2386,14 +2348,15 @@ static void translate_s32ex(DisasContext *dc, const OpcodeArg arg[],
     TCGv_i32 addr = tcg_temp_local_new_i32();
     TCGv_i32 res = tcg_temp_local_new_i32();
     TCGLabel *label = gen_new_label();
+    MemOp mop;
 
     tcg_gen_movi_i32(res, 0);
     tcg_gen_mov_i32(addr, arg[1].in);
-    gen_load_store_alignment(dc, 2, addr, true);
+    mop = gen_load_store_alignment(dc, MO_TEUL | MO_ALIGN, addr);
     tcg_gen_brcond_i32(TCG_COND_NE, addr, cpu_exclusive_addr, label);
     gen_check_exclusive(dc, addr, true);
     tcg_gen_atomic_cmpxchg_i32(prev, cpu_exclusive_addr, cpu_exclusive_val,
-                               arg[0].in, dc->cring, MO_TEUL);
+                               arg[0].in, dc->cring, mop);
     tcg_gen_setcond_i32(TCG_COND_EQ, res, prev, cpu_exclusive_val);
     tcg_gen_movcond_i32(TCG_COND_EQ, cpu_exclusive_val,
                         prev, cpu_exclusive_val, prev, cpu_exclusive_val);
@@ -3380,7 +3343,7 @@ static const XtensaOpcodeOps core_ops[] = {
     }, {
         .name = "l32ai",
         .translate = translate_ldst,
-        .par = (const uint32_t[]){MO_TEUL, true, false},
+        .par = (const uint32_t[]){MO_TEUL | MO_ALIGN, true, false},
         .op_flags = XTENSA_OP_LOAD,
     }, {
         .name = "l32e",
@@ -4707,7 +4670,7 @@ static const XtensaOpcodeOps core_ops[] = {
     }, {
         .name = "s32ri",
         .translate = translate_ldst,
-        .par = (const uint32_t[]){MO_TEUL, true, true},
+        .par = (const uint32_t[]){MO_TEUL | MO_ALIGN, true, true},
         .op_flags = XTENSA_OP_STORE,
     }, {
         .name = "s8i",
@@ -6642,13 +6605,14 @@ static void translate_ldsti(DisasContext *dc, const OpcodeArg arg[],
                             const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_addi_i32(addr, arg[1].in, arg[2].imm);
-    gen_load_store_alignment(dc, 2, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
     if (par[0]) {
-        tcg_gen_qemu_st32(arg[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_tl(arg[0].in, addr, dc->cring, mop);
     } else {
-        tcg_gen_qemu_ld32u(arg[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->cring, mop);
     }
     if (par[1]) {
         tcg_gen_mov_i32(arg[1].out, addr);
@@ -6660,13 +6624,14 @@ static void translate_ldstx(DisasContext *dc, const OpcodeArg arg[],
                             const uint32_t par[])
 {
     TCGv_i32 addr = tcg_temp_new_i32();
+    MemOp mop;
 
     tcg_gen_add_i32(addr, arg[1].in, arg[2].in);
-    gen_load_store_alignment(dc, 2, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
     if (par[0]) {
-        tcg_gen_qemu_st32(arg[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_tl(arg[0].in, addr, dc->cring, mop);
     } else {
-        tcg_gen_qemu_ld32u(arg[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_tl(arg[0].out, addr, dc->cring, mop);
     }
     if (par[1]) {
         tcg_gen_mov_i32(arg[1].out, addr);
@@ -7104,6 +7069,7 @@ static void translate_ldsti_d(DisasContext *dc, const OpcodeArg arg[],
                               const uint32_t par[])
 {
     TCGv_i32 addr;
+    MemOp mop;
 
     if (par[1]) {
         addr = tcg_temp_new_i32();
@@ -7111,11 +7077,11 @@ static void translate_ldsti_d(DisasContext *dc, const OpcodeArg arg[],
     } else {
         addr = arg[1].in;
     }
-    gen_load_store_alignment(dc, 3, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEQ, addr);
     if (par[0]) {
-        tcg_gen_qemu_st64(arg[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_i64(arg[0].in, addr, dc->cring, mop);
     } else {
-        tcg_gen_qemu_ld64(arg[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_i64(arg[0].out, addr, dc->cring, mop);
     }
     if (par[2]) {
         if (par[1]) {
@@ -7134,6 +7100,7 @@ static void translate_ldsti_s(DisasContext *dc, const OpcodeArg arg[],
 {
     TCGv_i32 addr;
     OpcodeArg arg32[1];
+    MemOp mop;
 
     if (par[1]) {
         addr = tcg_temp_new_i32();
@@ -7141,14 +7108,14 @@ static void translate_ldsti_s(DisasContext *dc, const OpcodeArg arg[],
     } else {
         addr = arg[1].in;
     }
-    gen_load_store_alignment(dc, 2, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
     if (par[0]) {
         get_f32_i1(arg, arg32, 0);
-        tcg_gen_qemu_st32(arg32[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_tl(arg32[0].in, addr, dc->cring, mop);
         put_f32_i1(arg, arg32, 0);
     } else {
         get_f32_o1(arg, arg32, 0);
-        tcg_gen_qemu_ld32u(arg32[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_tl(arg32[0].out, addr, dc->cring, mop);
         put_f32_o1(arg, arg32, 0);
     }
     if (par[2]) {
@@ -7167,6 +7134,7 @@ static void translate_ldstx_d(DisasContext *dc, const OpcodeArg arg[],
                               const uint32_t par[])
 {
     TCGv_i32 addr;
+    MemOp mop;
 
     if (par[1]) {
         addr = tcg_temp_new_i32();
@@ -7174,11 +7142,11 @@ static void translate_ldstx_d(DisasContext *dc, const OpcodeArg arg[],
     } else {
         addr = arg[1].in;
     }
-    gen_load_store_alignment(dc, 3, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEQ, addr);
     if (par[0]) {
-        tcg_gen_qemu_st64(arg[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_i64(arg[0].in, addr, dc->cring, mop);
     } else {
-        tcg_gen_qemu_ld64(arg[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_i64(arg[0].out, addr, dc->cring, mop);
     }
     if (par[2]) {
         if (par[1]) {
@@ -7197,6 +7165,7 @@ static void translate_ldstx_s(DisasContext *dc, const OpcodeArg arg[],
 {
     TCGv_i32 addr;
     OpcodeArg arg32[1];
+    MemOp mop;
 
     if (par[1]) {
         addr = tcg_temp_new_i32();
@@ -7204,14 +7173,14 @@ static void translate_ldstx_s(DisasContext *dc, const OpcodeArg arg[],
     } else {
         addr = arg[1].in;
     }
-    gen_load_store_alignment(dc, 2, addr, false);
+    mop = gen_load_store_alignment(dc, MO_TEUL, addr);
     if (par[0]) {
         get_f32_i1(arg, arg32, 0);
-        tcg_gen_qemu_st32(arg32[0].in, addr, dc->cring);
+        tcg_gen_qemu_st_tl(arg32[0].in, addr, dc->cring, mop);
         put_f32_i1(arg, arg32, 0);
     } else {
         get_f32_o1(arg, arg32, 0);
-        tcg_gen_qemu_ld32u(arg32[0].out, addr, dc->cring);
+        tcg_gen_qemu_ld_tl(arg32[0].out, addr, dc->cring, mop);
         put_f32_o1(arg, arg32, 0);
     }
     if (par[2]) {
diff --git a/target/xtensa/xtensa-semi.c b/target/xtensa/xtensa-semi.c
index 79f2b043f23..fa21b7e11fc 100644
--- a/target/xtensa/xtensa-semi.c
+++ b/target/xtensa/xtensa-semi.c
@@ -95,59 +95,53 @@ enum {
 
 static uint32_t errno_h2g(int host_errno)
 {
-    static const uint32_t guest_errno[] = {
-        [EPERM]         = TARGET_EPERM,
-        [ENOENT]        = TARGET_ENOENT,
-        [ESRCH]         = TARGET_ESRCH,
-        [EINTR]         = TARGET_EINTR,
-        [EIO]           = TARGET_EIO,
-        [ENXIO]         = TARGET_ENXIO,
-        [E2BIG]         = TARGET_E2BIG,
-        [ENOEXEC]       = TARGET_ENOEXEC,
-        [EBADF]         = TARGET_EBADF,
-        [ECHILD]        = TARGET_ECHILD,
-        [EAGAIN]        = TARGET_EAGAIN,
-        [ENOMEM]        = TARGET_ENOMEM,
-        [EACCES]        = TARGET_EACCES,
-        [EFAULT]        = TARGET_EFAULT,
+    switch (host_errno) {
+    case 0:         return 0;
+    case EPERM:     return TARGET_EPERM;
+    case ENOENT:    return TARGET_ENOENT;
+    case ESRCH:     return TARGET_ESRCH;
+    case EINTR:     return TARGET_EINTR;
+    case EIO:       return TARGET_EIO;
+    case ENXIO:     return TARGET_ENXIO;
+    case E2BIG:     return TARGET_E2BIG;
+    case ENOEXEC:   return TARGET_ENOEXEC;
+    case EBADF:     return TARGET_EBADF;
+    case ECHILD:    return TARGET_ECHILD;
+    case EAGAIN:    return TARGET_EAGAIN;
+    case ENOMEM:    return TARGET_ENOMEM;
+    case EACCES:    return TARGET_EACCES;
+    case EFAULT:    return TARGET_EFAULT;
 #ifdef ENOTBLK
-        [ENOTBLK]       = TARGET_ENOTBLK,
+    case ENOTBLK:   return TARGET_ENOTBLK;
 #endif
-        [EBUSY]         = TARGET_EBUSY,
-        [EEXIST]        = TARGET_EEXIST,
-        [EXDEV]         = TARGET_EXDEV,
-        [ENODEV]        = TARGET_ENODEV,
-        [ENOTDIR]       = TARGET_ENOTDIR,
-        [EISDIR]        = TARGET_EISDIR,
-        [EINVAL]        = TARGET_EINVAL,
-        [ENFILE]        = TARGET_ENFILE,
-        [EMFILE]        = TARGET_EMFILE,
-        [ENOTTY]        = TARGET_ENOTTY,
+    case EBUSY:     return TARGET_EBUSY;
+    case EEXIST:    return TARGET_EEXIST;
+    case EXDEV:     return TARGET_EXDEV;
+    case ENODEV:    return TARGET_ENODEV;
+    case ENOTDIR:   return TARGET_ENOTDIR;
+    case EISDIR:    return TARGET_EISDIR;
+    case EINVAL:    return TARGET_EINVAL;
+    case ENFILE:    return TARGET_ENFILE;
+    case EMFILE:    return TARGET_EMFILE;
+    case ENOTTY:    return TARGET_ENOTTY;
 #ifdef ETXTBSY
-        [ETXTBSY]       = TARGET_ETXTBSY,
+    case ETXTBSY:   return TARGET_ETXTBSY;
 #endif
-        [EFBIG]         = TARGET_EFBIG,
-        [ENOSPC]        = TARGET_ENOSPC,
-        [ESPIPE]        = TARGET_ESPIPE,
-        [EROFS]         = TARGET_EROFS,
-        [EMLINK]        = TARGET_EMLINK,
-        [EPIPE]         = TARGET_EPIPE,
-        [EDOM]          = TARGET_EDOM,
-        [ERANGE]        = TARGET_ERANGE,
-        [ENOSYS]        = TARGET_ENOSYS,
+    case EFBIG:     return TARGET_EFBIG;
+    case ENOSPC:    return TARGET_ENOSPC;
+    case ESPIPE:    return TARGET_ESPIPE;
+    case EROFS:     return TARGET_EROFS;
+    case EMLINK:    return TARGET_EMLINK;
+    case EPIPE:     return TARGET_EPIPE;
+    case EDOM:      return TARGET_EDOM;
+    case ERANGE:    return TARGET_ERANGE;
+    case ENOSYS:    return TARGET_ENOSYS;
 #ifdef ELOOP
-        [ELOOP]         = TARGET_ELOOP,
+    case ELOOP:     return TARGET_ELOOP;
 #endif
     };
 
-    if (host_errno == 0) {
-        return 0;
-    } else if (host_errno > 0 && host_errno < ARRAY_SIZE(guest_errno) &&
-            guest_errno[host_errno]) {
-        return guest_errno[host_errno];
-    } else {
-        return TARGET_EINVAL;
-    }
+    return TARGET_EINVAL;
 }
 
 typedef struct XtensaSimConsole {
diff --git a/tcg/README b/tcg/README
index 0cf9e2727c1..bc15cc3b32f 100644
--- a/tcg/README
+++ b/tcg/README
@@ -254,6 +254,12 @@ t0 = t1 ? clz(t1) : t2
 
 t0 = t1 ? ctz(t1) : t2
 
+* ctpop_i32/i64 t0, t1
+
+t0 = number of bits set in t1
+With "ctpop" short for "count population", matching
+the function name used in include/qemu/host-utils.h.
+
 ********* Shifts/Rotates
 
 * shl_i32/i64 t0, t1, t2
@@ -295,19 +301,25 @@ ext32u_i64 t0, t1
 
 8, 16 or 32 bit sign/zero extension (both operands must have the same type)
 
-* bswap16_i32/i64 t0, t1
+* bswap16_i32/i64 t0, t1, flags
 
-16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order
-bytes are set to zero.
+16 bit byte swap on the low bits of a 32/64 bit input.
+If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15.
+If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15.
+If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15.
+If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of
+t0 above bit 15 may contain any value.
 
-* bswap32_i32/i64 t0, t1
+* bswap32_i64 t0, t1, flags
 
-32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that
-the four high order bytes are set to zero.
+32 bit byte swap on a 64-bit value.  The flags are the same as for bswap16,
+except they apply from bit 31 instead of bit 15.
 
-* bswap64_i64 t0, t1
+* bswap32_i32 t0, t1, flags
+* bswap64_i64 t0, t1, flags
 
-64 bit byte swap
+32/64 bit byte swap.  The flags are ignored, but still present
+for consistency with the other bswap opcodes.
 
 * discard_i32/i64 t0
 
@@ -461,7 +473,7 @@ when MTTCG is enabled.
 The guest translators should generate this opcode for all guest instructions
 which have ordering side effects.
 
-Please see docs/devel/atomics.txt for more information on memory barriers.
+Please see docs/devel/atomics.rst for more information on memory barriers.
 
 ********* 64-bit guest on 32-bit host support
 
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index f07ba98aa48..5edca8d44d6 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -277,11 +277,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
     }
 }
 
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct = arg_ct->ct;
-
     if (ct & TCG_CT_CONST) {
         return 1;
     }
@@ -478,9 +475,7 @@ typedef enum {
     /* Data-processing (1 source) instructions.  */
     I3507_CLZ       = 0x5ac01000,
     I3507_RBIT      = 0x5ac00000,
-    I3507_REV16     = 0x5ac00400,
-    I3507_REV32     = 0x5ac00800,
-    I3507_REV64     = 0x5ac00c00,
+    I3507_REV       = 0x5ac00000, /* + size << 10 */
 
     /* Data-processing (2 source) instructions.  */
     I3508_LSLV      = 0x1ac02000,
@@ -1291,9 +1286,8 @@ static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
                                 TCGReg rd, TCGReg rn, unsigned int m)
 {
-    int bits = ext ? 64 : 32;
-    int max = bits - 1;
-    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
+    int max = ext ? 63 : 31;
+    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
 }
 
 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
@@ -1421,19 +1415,11 @@ static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
     }
 }
 
-static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
-{
-    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
-}
-
-static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
-{
-    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
-}
-
-static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
+static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
+                               TCGReg rd, TCGReg rn)
 {
-    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
+    /* REV, REV16, REV32 */
+    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
 }
 
 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
@@ -1559,30 +1545,36 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
 #include "../tcg-ldst.c.inc"
 
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
- *                                     TCGMemOpIdx oi, uintptr_t ra)
+ *                                     MemOpIdx oi, uintptr_t ra)
  */
-static void * const qemu_ld_helpers[16] = {
-    [MO_UB]   = helper_ret_ldub_mmu,
-    [MO_LEUW] = helper_le_lduw_mmu,
-    [MO_LEUL] = helper_le_ldul_mmu,
-    [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
+static void * const qemu_ld_helpers[MO_SIZE + 1] = {
+    [MO_8]  = helper_ret_ldub_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_16] = helper_be_lduw_mmu,
+    [MO_32] = helper_be_ldul_mmu,
+    [MO_64] = helper_be_ldq_mmu,
+#else
+    [MO_16] = helper_le_lduw_mmu,
+    [MO_32] = helper_le_ldul_mmu,
+    [MO_64] = helper_le_ldq_mmu,
+#endif
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
- *                                     uintxx_t val, TCGMemOpIdx oi,
+ *                                     uintxx_t val, MemOpIdx oi,
  *                                     uintptr_t ra)
  */
-static void * const qemu_st_helpers[16] = {
-    [MO_UB]   = helper_ret_stb_mmu,
-    [MO_LEUW] = helper_le_stw_mmu,
-    [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+    [MO_8]  = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_16] = helper_be_stw_mmu,
+    [MO_32] = helper_be_stl_mmu,
+    [MO_64] = helper_be_stq_mmu,
+#else
+    [MO_16] = helper_le_stw_mmu,
+    [MO_32] = helper_le_stl_mmu,
+    [MO_64] = helper_le_stq_mmu,
+#endif
 };
 
 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
@@ -1594,7 +1586,7 @@ static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
 
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
     MemOp size = opc & MO_SIZE;
 
@@ -1606,7 +1598,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
     tcg_out_adr(s, TCG_REG_X3, lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
     if (opc & MO_SIGN) {
         tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
     } else {
@@ -1619,7 +1611,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
     MemOp size = opc & MO_SIZE;
 
@@ -1632,12 +1624,12 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
     tcg_out_adr(s, TCG_REG_X4, lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
     tcg_out_goto(s, lb->raddr);
     return true;
 }
 
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
                                 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
                                 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
 {
@@ -1728,7 +1720,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    const MemOp bswap = memop & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((memop & MO_BSWAP) == 0);
 
     switch (memop & MO_SSIZE) {
     case MO_UB:
@@ -1740,40 +1733,19 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
         break;
     case MO_UW:
         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev16(s, data_r, data_r);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
-            tcg_out_rev16(s, data_r, data_r);
-            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
-        } else {
-            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
-                           data_r, addr_r, otype, off_r);
-        }
+        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
+                       data_r, addr_r, otype, off_r);
         break;
     case MO_UL:
         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev32(s, data_r, data_r);
-        }
         break;
     case MO_SL:
-        if (bswap) {
-            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
-            tcg_out_rev32(s, data_r, data_r);
-            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
-        } else {
-            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
-        }
+        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
         break;
     case MO_Q:
         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
-        if (bswap) {
-            tcg_out_rev64(s, data_r, data_r);
-        }
         break;
     default:
         tcg_abort();
@@ -1784,31 +1756,20 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
                                    TCGReg data_r, TCGReg addr_r,
                                    TCGType otype, TCGReg off_r)
 {
-    const MemOp bswap = memop & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((memop & MO_BSWAP) == 0);
 
     switch (memop & MO_SIZE) {
     case MO_8:
         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
         break;
     case MO_16:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev16(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
         break;
     case MO_32:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev32(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
         break;
     case MO_64:
-        if (bswap && data_r != TCG_REG_XZR) {
-            tcg_out_rev64(s, TCG_REG_TMP, data_r);
-            data_r = TCG_REG_TMP;
-        }
         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
         break;
     default:
@@ -1817,7 +1778,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
 }
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOpIdx oi, TCGType ext)
+                            MemOpIdx oi, TCGType ext)
 {
     MemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
@@ -1842,7 +1803,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOpIdx oi)
+                            MemOpIdx oi)
 {
     MemOp memop = get_memop(oi);
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
@@ -2188,15 +2149,27 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_bswap64_i64:
-        tcg_out_rev64(s, a0, a1);
+        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
         break;
     case INDEX_op_bswap32_i64:
+        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
+        if (a2 & TCG_BSWAP_OS) {
+            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
+        }
+        break;
     case INDEX_op_bswap32_i32:
-        tcg_out_rev32(s, a0, a1);
+        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
         break;
     case INDEX_op_bswap16_i64:
     case INDEX_op_bswap16_i32:
-        tcg_out_rev16(s, a0, a1);
+        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
+        if (a2 & TCG_BSWAP_OS) {
+            /* Output must be sign-extended. */
+            tcg_out_sxt(s, ext, MO_16, a0, a0);
+        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            /* Output must be zero-extended, but input isn't. */
+            tcg_out_uxt(s, MO_16, a0, a0);
+        }
         break;
 
     case INDEX_op_ext8s_i64:
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 5ec30dba253..7a93ac8023f 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -15,6 +15,7 @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE  4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
@@ -87,7 +88,6 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i32        0
 #define TCG_TARGET_HAS_extrl_i64_i32    0
 #define TCG_TARGET_HAS_extrh_i64_i32    0
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
 #define TCG_TARGET_HAS_div_i64          1
@@ -147,7 +147,7 @@ typedef enum {
 #define TCG_TARGET_HAS_cmpsel_vec       0
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+#define TCG_TARGET_HAS_MEMORY_BSWAP     0
 
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
index ab63e089c26..3685e1786ac 100644
--- a/tcg/arm/tcg-target-con-set.h
+++ b/tcg/arm/tcg-target-con-set.h
@@ -13,11 +13,15 @@ C_O0_I1(r)
 C_O0_I2(r, r)
 C_O0_I2(r, rIN)
 C_O0_I2(s, s)
+C_O0_I2(w, r)
 C_O0_I3(s, s, s)
 C_O0_I4(r, r, rI, rI)
 C_O0_I4(s, s, s, s)
 C_O1_I1(r, l)
 C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
+C_O1_I1(w, wr)
 C_O1_I2(r, 0, rZ)
 C_O1_I2(r, l, l)
 C_O1_I2(r, r, r)
@@ -26,6 +30,12 @@ C_O1_I2(r, r, rIK)
 C_O1_I2(r, r, rIN)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, 0, w)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wO)
+C_O1_I2(w, w, wV)
+C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
 C_O1_I4(r, r, r, rI, rI)
 C_O1_I4(r, r, rIN, rIK, 0)
 C_O2_I1(r, r, l)
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
index a0ab7747dbc..8f501149e14 100644
--- a/tcg/arm/tcg-target-con-str.h
+++ b/tcg/arm/tcg-target-con-str.h
@@ -11,6 +11,7 @@
 REGS('r', ALL_GENERAL_REGS)
 REGS('l', ALL_QLOAD_REGS)
 REGS('s', ALL_QSTORE_REGS)
+REGS('w', ALL_VECTOR_REGS)
 
 /*
  * Define constraint letters for constants:
@@ -19,4 +20,6 @@ REGS('s', ALL_QSTORE_REGS)
 CONST('I', TCG_CT_CONST_ARM)
 CONST('K', TCG_CT_CONST_INV)
 CONST('N', TCG_CT_CONST_NEG)
+CONST('O', TCG_CT_CONST_ORRI)
+CONST('V', TCG_CT_CONST_ANDI)
 CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 8457108a87a..9d322cdba6c 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -30,6 +30,9 @@ int arm_arch = __ARM_ARCH;
 #ifndef use_idiv_instructions
 bool use_idiv_instructions;
 #endif
+#ifndef use_neon_instructions
+bool use_neon_instructions;
+#endif
 
 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
 #ifdef CONFIG_SOFTMMU
@@ -40,22 +43,10 @@ bool use_idiv_instructions;
 
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%r0",
-    "%r1",
-    "%r2",
-    "%r3",
-    "%r4",
-    "%r5",
-    "%r6",
-    "%r7",
-    "%r8",
-    "%r9",
-    "%r10",
-    "%r11",
-    "%r12",
-    "%r13",
-    "%r14",
-    "%pc",
+    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%sp",  "%r14", "%pc",
+    "%q0",  "%q1",  "%q2",  "%q3",  "%q4",  "%q5",  "%q6",  "%q7",
+    "%q8",  "%q9",  "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
 };
 #endif
 
@@ -75,6 +66,20 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R3,
     TCG_REG_R12,
     TCG_REG_R14,
+
+    TCG_REG_Q0,
+    TCG_REG_Q1,
+    TCG_REG_Q2,
+    TCG_REG_Q3,
+    /* Q4 - Q7 are call-saved, and skipped. */
+    TCG_REG_Q8,
+    TCG_REG_Q9,
+    TCG_REG_Q10,
+    TCG_REG_Q11,
+    TCG_REG_Q12,
+    TCG_REG_Q13,
+    TCG_REG_Q14,
+    TCG_REG_Q15,
 };
 
 static const int tcg_target_call_iarg_regs[4] = {
@@ -85,8 +90,9 @@ static const int tcg_target_call_oarg_regs[2] = {
 };
 
 #define TCG_REG_TMP  TCG_REG_R12
+#define TCG_VEC_TMP  TCG_REG_Q15
 
-enum arm_cond_code_e {
+typedef enum {
     COND_EQ = 0x0,
     COND_NE = 0x1,
     COND_CS = 0x2,	/* Unsigned greater or equal */
@@ -102,7 +108,7 @@ enum arm_cond_code_e {
     COND_GT = 0xc,
     COND_LE = 0xd,
     COND_AL = 0xe,
-};
+} ARMCond;
 
 #define TO_CPSR (1 << 20)
 
@@ -135,6 +141,9 @@ typedef enum {
     INSN_CLZ       = 0x016f0f10,
     INSN_RBIT      = 0x06ff0f30,
 
+    INSN_LDMIA     = 0x08b00000,
+    INSN_STMDB     = 0x09200000,
+
     INSN_LDR_IMM   = 0x04100000,
     INSN_LDR_REG   = 0x06100000,
     INSN_STR_IMM   = 0x04000000,
@@ -169,6 +178,60 @@ typedef enum {
     INSN_NOP_v6k   = 0xe320f000,
     /* Otherwise the assembler uses mov r0,r0 */
     INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
+
+    INSN_VADD      = 0xf2000800,
+    INSN_VAND      = 0xf2000110,
+    INSN_VBIC      = 0xf2100110,
+    INSN_VEOR      = 0xf3000110,
+    INSN_VORN      = 0xf2300110,
+    INSN_VORR      = 0xf2200110,
+    INSN_VSUB      = 0xf3000800,
+    INSN_VMUL      = 0xf2000910,
+    INSN_VQADD     = 0xf2000010,
+    INSN_VQADD_U   = 0xf3000010,
+    INSN_VQSUB     = 0xf2000210,
+    INSN_VQSUB_U   = 0xf3000210,
+    INSN_VMAX      = 0xf2000600,
+    INSN_VMAX_U    = 0xf3000600,
+    INSN_VMIN      = 0xf2000610,
+    INSN_VMIN_U    = 0xf3000610,
+
+    INSN_VABS      = 0xf3b10300,
+    INSN_VMVN      = 0xf3b00580,
+    INSN_VNEG      = 0xf3b10380,
+
+    INSN_VCEQ0     = 0xf3b10100,
+    INSN_VCGT0     = 0xf3b10000,
+    INSN_VCGE0     = 0xf3b10080,
+    INSN_VCLE0     = 0xf3b10180,
+    INSN_VCLT0     = 0xf3b10200,
+
+    INSN_VCEQ      = 0xf3000810,
+    INSN_VCGE      = 0xf2000310,
+    INSN_VCGT      = 0xf2000300,
+    INSN_VCGE_U    = 0xf3000310,
+    INSN_VCGT_U    = 0xf3000300,
+
+    INSN_VSHLI     = 0xf2800510,  /* VSHL (immediate) */
+    INSN_VSARI     = 0xf2800010,  /* VSHR.S */
+    INSN_VSHRI     = 0xf3800010,  /* VSHR.U */
+    INSN_VSLI      = 0xf3800510,
+    INSN_VSHL_S    = 0xf2000400,  /* VSHL.S (register) */
+    INSN_VSHL_U    = 0xf3000400,  /* VSHL.U (register) */
+
+    INSN_VBSL      = 0xf3100110,
+    INSN_VBIT      = 0xf3200110,
+    INSN_VBIF      = 0xf3300110,
+
+    INSN_VTST      = 0xf2000810,
+
+    INSN_VDUP_G    = 0xee800b10,  /* VDUP (ARM core register) */
+    INSN_VDUP_S    = 0xf3b00c00,  /* VDUP (scalar) */
+    INSN_VLDR_D    = 0xed100b00,  /* VLDR.64 */
+    INSN_VLD1      = 0xf4200000,  /* VLD1 (multiple single elements) */
+    INSN_VLD1R     = 0xf4a00c00,  /* VLD1 (single element to all lanes) */
+    INSN_VST1      = 0xf4000000,  /* VST1 (multiple single elements) */
+    INSN_VMOVI     = 0xf2800010,  /* VMOV (immediate) */
 } ARMInsn;
 
 #define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
@@ -187,6 +250,14 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
     [TCG_COND_GTU] = COND_HI,
 };
 
+static int encode_imm(uint32_t imm);
+
+/* TCG private relocation type: add with pc+imm8 */
+#define R_ARM_PC8  11
+
+/* TCG private relocation type: vldr with imm8 << 2 */
+#define R_ARM_PC11 12
+
 static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
 {
     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
@@ -218,16 +289,52 @@ static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
     return false;
 }
 
+static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+    ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
+
+    if (offset >= -0xff && offset <= 0xff) {
+        tcg_insn_unit insn = *src_rw;
+        bool u = (offset >= 0);
+        if (!u) {
+            offset = -offset;
+        }
+        insn = deposit32(insn, 23, 1, u);
+        insn = deposit32(insn, 0, 8, offset);
+        *src_rw = insn;
+        return true;
+    }
+    return false;
+}
+
+static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+    ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
+    int imm12 = encode_imm(offset);
+
+    if (imm12 >= 0) {
+        *src_rw = deposit32(*src_rw, 0, 12, imm12);
+        return true;
+    }
+    return false;
+}
+
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
     tcg_debug_assert(addend == 0);
-
-    if (type == R_ARM_PC24) {
+    switch (type) {
+    case R_ARM_PC24:
         return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
-    } else if (type == R_ARM_PC13) {
+    case R_ARM_PC13:
         return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
-    } else {
+    case R_ARM_PC11:
+        return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
+    case R_ARM_PC8:
+        return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
+    default:
         g_assert_not_reached();
     }
 }
@@ -236,8 +343,11 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #define TCG_CT_CONST_INV  0x200
 #define TCG_CT_CONST_NEG  0x400
 #define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ORRI 0x1000
+#define TCG_CT_CONST_ANDI 0x2000
 
 #define ALL_GENERAL_REGS  0xffffu
+#define ALL_VECTOR_REGS   0xffff0000u
 
 /*
  * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
@@ -259,40 +369,142 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
     (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
 #endif
 
-static inline uint32_t rotl(uint32_t val, int n)
+/*
+ * ARM immediates for ALU instructions are made of an unsigned 8-bit
+ * right-rotated by an even amount between 0 and 30.
+ *
+ * Return < 0 if @imm cannot be encoded, else the entire imm12 field.
+ */
+static int encode_imm(uint32_t imm)
 {
-  return (val << n) | (val >> (32 - n));
-}
+    uint32_t rot, imm8;
 
-/* ARM immediates for ALU instructions are made of an unsigned 8-bit
-   right-rotated by an even amount between 0 and 30. */
-static inline int encode_imm(uint32_t imm)
-{
-    int shift;
+    /* Simple case, no rotation required. */
+    if ((imm & ~0xff) == 0) {
+        return imm;
+    }
 
-    /* simple case, only lower bits */
-    if ((imm & ~0xff) == 0)
-        return 0;
-    /* then try a simple even shift */
-    shift = ctz32(imm) & ~1;
-    if (((imm >> shift) & ~0xff) == 0)
-        return 32 - shift;
-    /* now try harder with rotations */
-    if ((rotl(imm, 2) & ~0xff) == 0)
-        return 2;
-    if ((rotl(imm, 4) & ~0xff) == 0)
-        return 4;
-    if ((rotl(imm, 6) & ~0xff) == 0)
-        return 6;
-    /* imm can't be encoded */
+    /* Next, try a simple even shift.  */
+    rot = ctz32(imm) & ~1;
+    imm8 = imm >> rot;
+    rot = 32 - rot;
+    if ((imm8 & ~0xff) == 0) {
+        goto found;
+    }
+
+    /*
+     * Finally, try harder with rotations.
+     * The ctz test above will have taken care of rotates >= 8.
+     */
+    for (rot = 2; rot < 8; rot += 2) {
+        imm8 = rol32(imm, rot);
+        if ((imm8 & ~0xff) == 0) {
+            goto found;
+        }
+    }
+    /* Fail: imm cannot be encoded. */
     return -1;
+
+ found:
+    /* Note that rot is even, and we discard bit 0 by shifting by 7. */
+    return rot << 7 | imm8;
+}
+
+static int encode_imm_nofail(uint32_t imm)
+{
+    int ret = encode_imm(imm);
+    tcg_debug_assert(ret >= 0);
+    return ret;
 }
 
-static inline int check_fit_imm(uint32_t imm)
+static bool check_fit_imm(uint32_t imm)
 {
     return encode_imm(imm) >= 0;
 }
 
+/* Return true if v16 is a valid 16-bit shifted immediate.  */
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
+{
+    if (v16 == (v16 & 0xff)) {
+        *cmode = 0x8;
+        *imm8 = v16 & 0xff;
+        return true;
+    } else if (v16 == (v16 & 0xff00)) {
+        *cmode = 0xa;
+        *imm8 = v16 >> 8;
+        return true;
+    }
+    return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifted immediate.  */
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
+{
+    if (v32 == (v32 & 0xff)) {
+        *cmode = 0x0;
+        *imm8 = v32 & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff00)) {
+        *cmode = 0x2;
+        *imm8 = (v32 >> 8) & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff0000)) {
+        *cmode = 0x4;
+        *imm8 = (v32 >> 16) & 0xff;
+        return true;
+    } else if (v32 == (v32 & 0xff000000)) {
+        *cmode = 0x6;
+        *imm8 = v32 >> 24;
+        return true;
+    }
+    return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
+{
+    if ((v32 & 0xffff00ff) == 0xff) {
+        *cmode = 0xc;
+        *imm8 = (v32 >> 8) & 0xff;
+        return true;
+    } else if ((v32 & 0xff00ffff) == 0xffff) {
+        *cmode = 0xd;
+        *imm8 = (v32 >> 16) & 0xff;
+        return true;
+    }
+    return false;
+}
+
+/*
+ * Return non-zero if v32 can be formed by MOVI+ORR.
+ * Place the parameters for MOVI in (cmode, imm8).
+ * Return the cmode for ORR; the imm8 can be had via extraction from v32.
+ */
+static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
+{
+    int i;
+
+    for (i = 6; i > 0; i -= 2) {
+        /* Mask out one byte we can add with ORR.  */
+        uint32_t tmp = v32 & ~(0xffu << (i * 4));
+        if (is_shimm32(tmp, cmode, imm8) ||
+            is_soimm32(tmp, cmode, imm8)) {
+            break;
+        }
+    }
+    return i;
+}
+
+/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
+static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
+{
+    if (v32 == deposit32(v32, 16, 16, v32)) {
+        return is_shimm16(v32, cmode, imm8);
+    } else {
+        return is_shimm32(v32, cmode, imm8);
+    }
+}
+
 /* Test if a constant matches the constraint.
  * TODO: define constraints for:
  *
@@ -301,11 +513,8 @@ static inline int check_fit_imm(uint32_t imm)
  * mov operand2:     values represented with x << (2 * y), x < 0x100
  * add, sub, eor...: ditto
  */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                         const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct;
-    ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -316,47 +525,59 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
         return 1;
     } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
         return 1;
-    } else {
-        return 0;
     }
+
+    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
+    case 0:
+        break;
+    case TCG_CT_CONST_ANDI:
+        val = ~val;
+        /* fallthru */
+    case TCG_CT_CONST_ORRI:
+        if (val == deposit64(val, 32, 32, val)) {
+            int cmode, imm8;
+            return is_shimm1632(val, &cmode, &imm8);
+        }
+        break;
+    default:
+        /* Both bits should not be set for the same insn.  */
+        g_assert_not_reached();
+    }
+
+    return 0;
 }
 
-static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset)
 {
     tcg_out32(s, (cond << 28) | 0x0a000000 |
                     (((offset - 8) >> 2) & 0x00ffffff));
 }
 
-static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, int32_t offset)
 {
     tcg_out32(s, (cond << 28) | 0x0b000000 |
                     (((offset - 8) >> 2) & 0x00ffffff));
 }
 
-static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
+static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
 {
     tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 }
 
-static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
+static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 {
     tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
                 (((offset - 8) >> 2) & 0x00ffffff));
 }
 
-static inline void tcg_out_dat_reg(TCGContext *s,
-                int cond, int opc, int rd, int rn, int rm, int shift)
+static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc,
+                            TCGReg rd, TCGReg rn, TCGReg rm, int shift)
 {
     tcg_out32(s, (cond << 28) | (0 << 25) | opc |
                     (rn << 16) | (rd << 12) | shift | rm);
 }
 
-static inline void tcg_out_nop(TCGContext *s)
-{
-    tcg_out32(s, INSN_NOP);
-}
-
-static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
+static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rm)
 {
     /* Simple reg-reg move, optimising out the 'do nothing' case */
     if (rd != rm) {
@@ -364,35 +585,47 @@ static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
     }
 }
 
-static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
+static void tcg_out_bx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
+{
+    tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+}
+
+static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn)
 {
-    /* Unless the C portion of QEMU is compiled as thumb, we don't
-       actually need true BX semantics; merely a branch to an address
-       held in a register.  */
+    /*
+     * Unless the C portion of QEMU is compiled as thumb, we don't need
+     * true BX semantics; merely a branch to an address held in a register.
+     */
     if (use_armv5t_instructions) {
-        tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+        tcg_out_bx_reg(s, cond, rn);
     } else {
         tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
     }
 }
 
-static inline void tcg_out_dat_imm(TCGContext *s,
-                int cond, int opc, int rd, int rn, int im)
+static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
+                            TCGReg rd, TCGReg rn, int im)
 {
     tcg_out32(s, (cond << 28) | (1 << 25) | opc |
                     (rn << 16) | (rd << 12) | im);
 }
 
+static void tcg_out_ldstm(TCGContext *s, ARMCond cond, ARMInsn opc,
+                          TCGReg rn, uint16_t mask)
+{
+    tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
+}
+
 /* Note that this routine is used for both LDR and LDRH formats, so we do
    not wish to include an immediate shift at this point.  */
-static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+static void tcg_out_memop_r(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
                             TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 {
     tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
               | (w << 21) | (rn << 16) | (rt << 12) | rm);
 }
 
-static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+static void tcg_out_memop_8(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
                             TCGReg rn, int imm8, bool p, bool w)
 {
     bool u = 1;
@@ -404,8 +637,8 @@ static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
               (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 }
 
-static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
-                             TCGReg rn, int imm12, bool p, bool w)
+static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc,
+                             TCGReg rt, TCGReg rn, int imm12, bool p, bool w)
 {
     bool u = 1;
     if (imm12 < 0) {
@@ -416,167 +649,167 @@ static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
               (rn << 16) | (rt << 12) | imm12);
 }
 
-static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm12)
+static void tcg_out_ld32_12(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, int imm12)
 {
     tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm12)
+static void tcg_out_st32_12(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, int imm12)
 {
     tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_ld32_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_st32_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm8)
+static void tcg_out_ldrd_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_ldrd_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt,
-                                    TCGReg rn, TCGReg rm)
+static void __attribute__((unused))
+tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
 }
 
-static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm8)
+static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_strd_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
 }
 
 /* Register pre-increment with base writeback.  */
-static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
-                                    TCGReg rn, TCGReg rm)
+static void tcg_out_ld32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
+                             TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
 }
 
-static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
-                                    TCGReg rn, TCGReg rm)
+static void tcg_out_st32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
+                             TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
 }
 
-static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm8)
+static void tcg_out_ld16u_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, int imm8)
+static void tcg_out_st16_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, TCGReg rm)
+static void tcg_out_ld16u_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_st16_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, int imm8)
+static void tcg_out_ld16s_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
-                                   TCGReg rn, TCGReg rm)
+static void tcg_out_ld16s_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                            TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, int imm12)
+static void tcg_out_ld8_12(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm12)
 {
     tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, int imm12)
+static void tcg_out_st8_12(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm12)
 {
     tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
-                                 TCGReg rn, TCGReg rm)
+static void tcg_out_ld8_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                          TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
-                                 TCGReg rn, TCGReg rm)
+static void tcg_out_st8_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                          TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, int imm8)
+static void tcg_out_ld8s_8(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, int imm8)
 {
     tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
-                                  TCGReg rn, TCGReg rm)
+static void tcg_out_ld8s_r(TCGContext *s, ARMCond cond, TCGReg rt,
+                           TCGReg rn, TCGReg rm)
 {
     tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static void tcg_out_movi_pool(TCGContext *s, int cond, int rd, uint32_t arg)
+static void tcg_out_movi_pool(TCGContext *s, ARMCond cond,
+                              TCGReg rd, uint32_t arg)
 {
     new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
     tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
 }
 
-static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
+static void tcg_out_movi32(TCGContext *s, ARMCond cond,
+                           TCGReg rd, uint32_t arg)
 {
-    int rot, diff, opc, sh1, sh2;
+    int imm12, diff, opc, sh1, sh2;
     uint32_t tt0, tt1, tt2;
 
     /* Check a single MOV/MVN before anything else.  */
-    rot = encode_imm(arg);
-    if (rot >= 0) {
-        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
-                        rotl(arg, rot) | (rot << 7));
+    imm12 = encode_imm(arg);
+    if (imm12 >= 0) {
+        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, imm12);
         return;
     }
-    rot = encode_imm(~arg);
-    if (rot >= 0) {
-        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
-                        rotl(~arg, rot) | (rot << 7));
+    imm12 = encode_imm(~arg);
+    if (imm12 >= 0) {
+        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, imm12);
         return;
     }
 
@@ -584,17 +817,15 @@ static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
        or within the TB, which is immediately before the code block.  */
     diff = tcg_pcrel_diff(s, (void *)arg) - 8;
     if (diff >= 0) {
-        rot = encode_imm(diff);
-        if (rot >= 0) {
-            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
-                            rotl(diff, rot) | (rot << 7));
+        imm12 = encode_imm(diff);
+        if (imm12 >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC, imm12);
             return;
         }
     } else {
-        rot = encode_imm(-diff);
-        if (rot >= 0) {
-            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
-                            rotl(-diff, rot) | (rot << 7));
+        imm12 = encode_imm(-diff);
+        if (imm12 >= 0) {
+            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC, imm12);
             return;
         }
     }
@@ -626,6 +857,8 @@ static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
     sh2 = ctz32(tt1) & ~1;
     tt2 = tt1 & ~(0xff << sh2);
     if (tt2 == 0) {
+        int rot;
+
         rot = ((32 - sh1) << 7) & 0xf00;
         tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
         rot = ((32 - sh2) << 7) & 0xf00;
@@ -638,65 +871,61 @@ static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
     tcg_out_movi_pool(s, cond, rd, arg);
 }
 
-static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
-                                  TCGArg lhs, TCGArg rhs, int rhs_is_const)
+/*
+ * Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rI" constraint.
+ */
+static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc,
+                           TCGReg dst, TCGReg lhs, TCGArg rhs, int rhs_is_const)
 {
-    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
-     * rhs must satisfy the "rI" constraint.
-     */
     if (rhs_is_const) {
-        int rot = encode_imm(rhs);
-        tcg_debug_assert(rot >= 0);
-        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, encode_imm_nofail(rhs));
     } else {
         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
     }
 }
 
-static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
-                            TCGReg dst, TCGReg lhs, TCGArg rhs,
+/*
+ * Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rIK" constraint.
+ */
+static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc,
+                            ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs,
                             bool rhs_is_const)
 {
-    /* Emit either the reg,imm or reg,reg form of a data-processing insn.
-     * rhs must satisfy the "rIK" constraint.
-     */
     if (rhs_is_const) {
-        int rot = encode_imm(rhs);
-        if (rot < 0) {
-            rhs = ~rhs;
-            rot = encode_imm(rhs);
-            tcg_debug_assert(rot >= 0);
+        int imm12 = encode_imm(rhs);
+        if (imm12 < 0) {
+            imm12 = encode_imm_nofail(~rhs);
             opc = opinv;
         }
-        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
     } else {
         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
     }
 }
 
-static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
-                            TCGArg dst, TCGArg lhs, TCGArg rhs,
+static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
+                            ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs,
                             bool rhs_is_const)
 {
     /* Emit either the reg,imm or reg,reg form of a data-processing insn.
      * rhs must satisfy the "rIN" constraint.
      */
     if (rhs_is_const) {
-        int rot = encode_imm(rhs);
-        if (rot < 0) {
-            rhs = -rhs;
-            rot = encode_imm(rhs);
-            tcg_debug_assert(rot >= 0);
+        int imm12 = encode_imm(rhs);
+        if (imm12 < 0) {
+            imm12 = encode_imm_nofail(-rhs);
             opc = opneg;
         }
-        tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+        tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
     } else {
         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
     }
 }
 
-static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
-                                 TCGReg rn, TCGReg rm)
+static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd,
+                          TCGReg rn, TCGReg rm)
 {
     /* if ArchVersion() < 6 && d == n then UNPREDICTABLE;  */
     if (!use_armv6_instructions && rd == rn) {
@@ -713,8 +942,8 @@ static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
     tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
 }
 
-static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
-                                   TCGReg rd1, TCGReg rn, TCGReg rm)
+static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0,
+                            TCGReg rd1, TCGReg rn, TCGReg rm)
 {
     /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
     if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
@@ -732,8 +961,8 @@ static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
               (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 }
 
-static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
-                                   TCGReg rd1, TCGReg rn, TCGReg rm)
+static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0,
+                            TCGReg rd1, TCGReg rn, TCGReg rm)
 {
     /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE;  */
     if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
@@ -751,18 +980,19 @@ static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
               (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 }
 
-static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
+static void tcg_out_sdiv(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, TCGReg rm)
 {
     tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 }
 
-static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
+static void tcg_out_udiv(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, TCGReg rm)
 {
     tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, int cond,
-                                 int rd, int rn)
+static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
     if (use_armv6_instructions) {
         /* sxtb */
@@ -775,14 +1005,13 @@ static inline void tcg_out_ext8s(TCGContext *s, int cond,
     }
 }
 
-static inline void tcg_out_ext8u(TCGContext *s, int cond,
-                                 int rd, int rn)
+static void __attribute__((unused))
+tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
     tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 }
 
-static inline void tcg_out_ext16s(TCGContext *s, int cond,
-                                  int rd, int rn)
+static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
     if (use_armv6_instructions) {
         /* sxth */
@@ -795,8 +1024,7 @@ static inline void tcg_out_ext16s(TCGContext *s, int cond,
     }
 }
 
-static inline void tcg_out_ext16u(TCGContext *s, int cond,
-                                  int rd, int rn)
+static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
     if (use_armv6_instructions) {
         /* uxth */
@@ -809,53 +1037,75 @@ static inline void tcg_out_ext16u(TCGContext *s, int cond,
     }
 }
 
-static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
+static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
+                            TCGReg rd, TCGReg rn, int flags)
 {
     if (use_armv6_instructions) {
-        /* revsh */
-        tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
-    } else {
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
-        tcg_out_dat_reg(s, cond, ARITH_ORR,
-                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
-    }
-}
+        if (flags & TCG_BSWAP_OS) {
+            /* revsh */
+            tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
+            return;
+        }
 
-static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
-{
-    if (use_armv6_instructions) {
         /* rev16 */
         tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
-    } else {
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
-        tcg_out_dat_reg(s, cond, ARITH_MOV,
-                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
-        tcg_out_dat_reg(s, cond, ARITH_ORR,
-                        rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
+        if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            /* uxth */
+            tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd);
+        }
+        return;
     }
-}
 
-/* swap the two low bytes assuming that the two high input bytes and the
-   two high output bit can hold any value. */
-static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
-{
-    if (use_armv6_instructions) {
-        /* rev16 */
-        tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
-    } else {
+    if (flags == 0) {
+        /*
+         * For stores, no input or output extension:
+         *                              rn  = xxAB
+         * lsr tmp, rn, #8              tmp = 0xxA
+         * and tmp, tmp, #0xff          tmp = 000A
+         * orr rd, tmp, rn, lsl #8      rd  = xABA
+         */
         tcg_out_dat_reg(s, cond, ARITH_MOV,
                         TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
         tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
         tcg_out_dat_reg(s, cond, ARITH_ORR,
                         rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
+        return;
     }
+
+    /*
+     * Byte swap, leaving the result at the top of the register.
+     * We will then shift down, zero or sign-extending.
+     */
+    if (flags & TCG_BSWAP_IZ) {
+        /*
+         *                              rn  = 00AB
+         * ror tmp, rn, #8              tmp = B00A
+         * orr tmp, tmp, tmp, lsl #16   tmp = BA00
+         */
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8));
+        tcg_out_dat_reg(s, cond, ARITH_ORR,
+                        TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP,
+                        SHIFT_IMM_LSL(16));
+    } else {
+        /*
+         *                              rn  = xxAB
+         * and tmp, rn, #0xff00         tmp = 00A0
+         * lsl tmp, tmp, #8             tmp = 0A00
+         * orr tmp, tmp, rn, lsl #24    tmp = BA00
+         */
+        tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1);
+        tcg_out_dat_reg(s, cond, ARITH_MOV,
+                        TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8));
+        tcg_out_dat_reg(s, cond, ARITH_ORR,
+                        TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24));
+    }
+    tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP,
+                    (flags & TCG_BSWAP_OS
+                     ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8)));
 }
 
-static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
+static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
     if (use_armv6_instructions) {
         /* rev */
@@ -872,8 +1122,8 @@ static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
     }
 }
 
-static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
-                                   TCGArg a1, int ofs, int len, bool const_a1)
+static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
+                            TCGArg a1, int ofs, int len, bool const_a1)
 {
     if (const_a1) {
         /* bfi becomes bfc with rn == 15.  */
@@ -884,24 +1134,24 @@ static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
               | (ofs << 7) | ((ofs + len - 1) << 16));
 }
 
-static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
-                                   TCGArg a1, int ofs, int len)
+static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
+                            TCGReg rn, int ofs, int len)
 {
     /* ubfx */
-    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
+    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
               | (ofs << 7) | ((len - 1) << 16));
 }
 
-static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
-                                    TCGArg a1, int ofs, int len)
+static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
+                             TCGReg rn, int ofs, int len)
 {
     /* sbfx */
-    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
+    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
               | (ofs << 7) | ((len - 1) << 16));
 }
 
-static inline void tcg_out_ld32u(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
+                          TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xfff || offset < -0xfff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -910,8 +1160,8 @@ static inline void tcg_out_ld32u(TCGContext *s, int cond,
         tcg_out_ld32_12(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_st32(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_st32(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xfff || offset < -0xfff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -920,8 +1170,8 @@ static inline void tcg_out_st32(TCGContext *s, int cond,
         tcg_out_st32_12(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_ld16u(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_ld16u(TCGContext *s, ARMCond cond,
+                          TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xff || offset < -0xff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -930,8 +1180,8 @@ static inline void tcg_out_ld16u(TCGContext *s, int cond,
         tcg_out_ld16u_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_ld16s(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_ld16s(TCGContext *s, ARMCond cond,
+                          TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xff || offset < -0xff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -940,8 +1190,8 @@ static inline void tcg_out_ld16s(TCGContext *s, int cond,
         tcg_out_ld16s_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_st16(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_st16(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xff || offset < -0xff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -950,8 +1200,8 @@ static inline void tcg_out_st16(TCGContext *s, int cond,
         tcg_out_st16_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_ld8u(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_ld8u(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xfff || offset < -0xfff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -960,8 +1210,8 @@ static inline void tcg_out_ld8u(TCGContext *s, int cond,
         tcg_out_ld8_12(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_ld8s(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_ld8s(TCGContext *s, ARMCond cond,
+                         TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xff || offset < -0xff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -970,8 +1220,8 @@ static inline void tcg_out_ld8s(TCGContext *s, int cond,
         tcg_out_ld8s_8(s, cond, rd, rn, offset);
 }
 
-static inline void tcg_out_st8(TCGContext *s, int cond,
-                int rd, int rn, int32_t offset)
+static void tcg_out_st8(TCGContext *s, ARMCond cond,
+                        TCGReg rd, TCGReg rn, int32_t offset)
 {
     if (offset > 0xfff || offset < -0xfff) {
         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
@@ -980,60 +1230,79 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
         tcg_out_st8_12(s, cond, rd, rn, offset);
 }
 
-/* The _goto case is normally between TBs within the same code buffer, and
+/*
+ * The _goto case is normally between TBs within the same code buffer, and
  * with the code buffer limited to 16MB we wouldn't need the long case.
  * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
  */
-static void tcg_out_goto(TCGContext *s, int cond, const tcg_insn_unit *addr)
+static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr)
 {
     intptr_t addri = (intptr_t)addr;
     ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+    bool arm_mode = !(addri & 1);
+
+    if (arm_mode && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
+        tcg_out_b_imm(s, cond, disp);
+        return;
+    }
 
-    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
-        tcg_out_b(s, cond, disp);
+    /* LDR is interworking from v5t. */
+    if (arm_mode || use_armv5t_instructions) {
+        tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
         return;
     }
-    tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
+
+    /* else v4t */
+    tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+    tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
 }
 
-/* The call case is mostly used for helpers - so it's not unreasonable
- * for them to be beyond branch range */
+/*
+ * The call case is mostly used for helpers - so it's not unreasonable
+ * for them to be beyond branch range.
+ */
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
 {
     intptr_t addri = (intptr_t)addr;
     ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+    bool arm_mode = !(addri & 1);
 
     if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
-        if (addri & 1) {
-            /* Use BLX if the target is in Thumb mode */
-            if (!use_armv5t_instructions) {
-                tcg_abort();
-            }
+        if (arm_mode) {
+            tcg_out_bl_imm(s, COND_AL, disp);
+            return;
+        }
+        if (use_armv5t_instructions) {
             tcg_out_blx_imm(s, disp);
-        } else {
-            tcg_out_bl(s, COND_AL, disp);
+            return;
         }
-    } else if (use_armv7_instructions) {
+    }
+
+    if (use_armv5t_instructions) {
         tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
-        tcg_out_blx(s, COND_AL, TCG_REG_TMP);
-    } else {
+        tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
+    } else if (arm_mode) {
         /* ??? Know that movi_pool emits exactly 1 insn.  */
-        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0);
+        tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
         tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
+    } else {
+        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+        tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
+        tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
     }
 }
 
-static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
+static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l)
 {
     if (l->has_value) {
         tcg_out_goto(s, cond, l->u.value_ptr);
     } else {
         tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
-        tcg_out_b(s, cond, 0);
+        tcg_out_b_imm(s, cond, 0);
     }
 }
 
-static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
 {
     if (use_armv7_instructions) {
         tcg_out32(s, INSN_DMB_ISH);
@@ -1092,40 +1361,114 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
     }
 }
 
+/*
+ * Note that TCGReg references Q-registers.
+ * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
+ */
+static uint32_t encode_vd(TCGReg rd)
+{
+    tcg_debug_assert(rd >= TCG_REG_Q0);
+    return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
+}
+
+static uint32_t encode_vn(TCGReg rn)
+{
+    tcg_debug_assert(rn >= TCG_REG_Q0);
+    return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
+}
+
+static uint32_t encode_vm(TCGReg rm)
+{
+    tcg_debug_assert(rm >= TCG_REG_Q0);
+    return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
+}
+
+static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
+                          TCGReg d, TCGReg m)
+{
+    tcg_out32(s, insn | (vece << 18) | (q << 6) |
+              encode_vd(d) | encode_vm(m));
+}
+
+static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
+                          TCGReg d, TCGReg n, TCGReg m)
+{
+    tcg_out32(s, insn | (vece << 20) | (q << 6) |
+              encode_vd(d) | encode_vn(n) | encode_vm(m));
+}
+
+static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
+                          int q, int op, int cmode, uint8_t imm8)
+{
+    tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
+              | (cmode << 8) | extract32(imm8, 0, 4)
+              | (extract32(imm8, 4, 3) << 16)
+              | (extract32(imm8, 7, 1) << 24));
+}
+
+static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
+                            TCGReg rd, TCGReg rm, int l_imm6)
+{
+    tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
+              (extract32(l_imm6, 6, 1) << 7) |
+              (extract32(l_imm6, 0, 6) << 16));
+}
+
+static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
+                          TCGReg rd, TCGReg rn, int offset)
+{
+    if (offset != 0) {
+        if (check_fit_imm(offset) || check_fit_imm(-offset)) {
+            tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+                            TCG_REG_TMP, rn, offset, true);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+            tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+                            TCG_REG_TMP, TCG_REG_TMP, rn, 0);
+        }
+        rn = TCG_REG_TMP;
+    }
+    tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
+}
+
 #ifdef CONFIG_SOFTMMU
 #include "../tcg-ldst.c.inc"
 
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_SB]   = helper_ret_ldsb_mmu,
-
-    [MO_LEUW] = helper_le_lduw_mmu,
-    [MO_LEUL] = helper_le_ldul_mmu,
-    [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_LESW] = helper_le_ldsw_mmu,
-    [MO_LESL] = helper_le_ldul_mmu,
-
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
-    [MO_BESW] = helper_be_ldsw_mmu,
-    [MO_BESL] = helper_be_ldul_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_UW] = helper_be_lduw_mmu,
+    [MO_UL] = helper_be_ldul_mmu,
+    [MO_Q]  = helper_be_ldq_mmu,
+    [MO_SW] = helper_be_ldsw_mmu,
+    [MO_SL] = helper_be_ldul_mmu,
+#else
+    [MO_UW] = helper_le_lduw_mmu,
+    [MO_UL] = helper_le_ldul_mmu,
+    [MO_Q]  = helper_le_ldq_mmu,
+    [MO_SW] = helper_le_ldsw_mmu,
+    [MO_SL] = helper_le_ldul_mmu,
+#endif
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_st_helpers[16] = {
-    [MO_UB]   = helper_ret_stb_mmu,
-    [MO_LEUW] = helper_le_stw_mmu,
-    [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+    [MO_8]   = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_16] = helper_be_stw_mmu,
+    [MO_32] = helper_be_stl_mmu,
+    [MO_64] = helper_be_stq_mmu,
+#else
+    [MO_16] = helper_le_stw_mmu,
+    [MO_32] = helper_le_stl_mmu,
+    [MO_64] = helper_le_stq_mmu,
+#endif
 };
 
 /* Helper routines for marshalling helper function arguments into
@@ -1289,7 +1632,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
 /* Record the context of a call to the out of line helper code for the slow
    path for a load or store, so that we can later generate the correct
    helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
                                 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
                                 TCGReg addrhi, tcg_insn_unit *raddr,
                                 tcg_insn_unit *label_ptr)
@@ -1309,7 +1652,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
     TCGReg argreg, datalo, datahi;
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
     void *func;
 
@@ -1330,9 +1673,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
        icache usage.  For pre-armv6, use the signed helpers since we do
        not have a single insn sign-extend.  */
     if (use_armv6_instructions) {
-        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
+        func = qemu_ld_helpers[opc & MO_SIZE];
     } else {
-        func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
+        func = qemu_ld_helpers[opc & MO_SSIZE];
         if (opc & MO_SIGN) {
             opc = MO_UL;
         }
@@ -1373,7 +1716,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
     TCGReg argreg, datalo, datahi;
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
 
     if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -1410,16 +1753,17 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
     /* Tail-call to the helper, which will return to the fast path.  */
-    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
     return true;
 }
 #endif /* SOFTMMU */
 
-static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
-                                         TCGReg datalo, TCGReg datahi,
-                                         TCGReg addrlo, TCGReg addend)
+static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
+                                  TCGReg datalo, TCGReg datahi,
+                                  TCGReg addrlo, TCGReg addend)
 {
-    MemOp bswap = opc & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & MO_SSIZE) {
     case MO_UB:
@@ -1430,57 +1774,39 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
         break;
     case MO_UW:
         tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
-        if (bswap) {
-            tcg_out_bswap16(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
-            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
-        } else {
-            tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
-        }
+        tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
         break;
     case MO_UL:
-    default:
         tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_Q:
-        {
-            TCGReg dl = (bswap ? datahi : datalo);
-            TCGReg dh = (bswap ? datalo : datahi);
-
-            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-            if (USING_SOFTMMU && use_armv6_instructions
-                && (dl & 1) == 0 && dh == dl + 1) {
-                tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
-            } else if (dl != addend) {
-                tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
-                tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
-            } else {
-                tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
-                                addend, addrlo, SHIFT_IMM_LSL(0));
-                tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
-                tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, COND_AL, dl, dl);
-                tcg_out_bswap32(s, COND_AL, dh, dh);
-            }
+        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
+            tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
+        } else if (datalo != addend) {
+            tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
+            tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
+        } else {
+            tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
+                            addend, addrlo, SHIFT_IMM_LSL(0));
+            tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
+            tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
-                                          TCGReg datalo, TCGReg datahi,
-                                          TCGReg addrlo)
+#ifndef CONFIG_SOFTMMU
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
+                                   TCGReg datahi, TCGReg addrlo)
 {
-    MemOp bswap = opc & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & MO_SSIZE) {
     case MO_UB:
@@ -1491,54 +1817,36 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc,
         break;
     case MO_UW:
         tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
-        if (bswap) {
-            tcg_out_bswap16(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_SW:
-        if (bswap) {
-            tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
-            tcg_out_bswap16s(s, COND_AL, datalo, datalo);
-        } else {
-            tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_UL:
-    default:
         tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, datalo, datalo);
-        }
         break;
     case MO_Q:
-        {
-            TCGReg dl = (bswap ? datahi : datalo);
-            TCGReg dh = (bswap ? datalo : datahi);
-
-            /* Avoid ldrd for user-only emulation, to handle unaligned.  */
-            if (USING_SOFTMMU && use_armv6_instructions
-                && (dl & 1) == 0 && dh == dl + 1) {
-                tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
-            } else if (dl == addrlo) {
-                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
-                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
-            } else {
-                tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
-                tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, COND_AL, dl, dl);
-                tcg_out_bswap32(s, COND_AL, dh, dh);
-            }
+        /* Avoid ldrd for user-only emulation, to handle unaligned.  */
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
+            tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
+        } else if (datalo == addrlo) {
+            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
+            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+        } else {
+            tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+            tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
+#endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
@@ -1560,7 +1868,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     /* This a conditional BL only to load a pointer within this opcode into LR
        for the slow path.  We will not be using the value for a tail call.  */
     label_ptr = s->code_ptr;
-    tcg_out_bl(s, COND_NE, 0);
+    tcg_out_bl_imm(s, COND_NE, 0);
 
     tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
 
@@ -1576,100 +1884,75 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 #endif
 }
 
-static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc,
-                                         TCGReg datalo, TCGReg datahi,
-                                         TCGReg addrlo, TCGReg addend)
+static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
+                                  TCGReg datalo, TCGReg datahi,
+                                  TCGReg addrlo, TCGReg addend)
 {
-    MemOp bswap = opc & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & MO_SIZE) {
     case MO_8:
         tcg_out_st8_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
-        } else {
-            tcg_out_st16_r(s, cond, datalo, addrlo, addend);
-        }
+        tcg_out_st16_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_32:
-    default:
-        if (bswap) {
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
-        } else {
-            tcg_out_st32_r(s, cond, datalo, addrlo, addend);
-        }
+        tcg_out_st32_r(s, cond, datalo, addrlo, addend);
         break;
     case MO_64:
         /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (bswap) {
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
-            tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
-            tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
-        } else if (USING_SOFTMMU && use_armv6_instructions
-                   && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
         } else {
             tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
             tcg_out_st32_12(s, cond, datahi, addend, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
 
-static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc,
-                                          TCGReg datalo, TCGReg datahi,
-                                          TCGReg addrlo)
+#ifndef CONFIG_SOFTMMU
+static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
+                                   TCGReg datahi, TCGReg addrlo)
 {
-    MemOp bswap = opc & MO_BSWAP;
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & MO_SIZE) {
     case MO_8:
         tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_16:
-        if (bswap) {
-            tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
-        } else {
-            tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_32:
-    default:
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
-        } else {
-            tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
-        }
+        tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
         break;
     case MO_64:
         /* Avoid strd for user-only emulation, to handle unaligned.  */
-        if (bswap) {
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
-            tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
-            tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
-        } else if (USING_SOFTMMU && use_armv6_instructions
-                   && (datalo & 1) == 0 && datahi == datalo + 1) {
+        if (USING_SOFTMMU && use_armv6_instructions
+            && (datalo & 1) == 0 && datahi == datalo + 1) {
             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
         } else {
             tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
             tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
         }
         break;
+    default:
+        g_assert_not_reached();
     }
 }
+#endif
 
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
@@ -1692,7 +1975,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 
     /* The conditional call must come last, as we're going to return here.  */
     label_ptr = s->code_ptr;
-    tcg_out_bl(s, COND_NE, 0);
+    tcg_out_bl_imm(s, COND_NE, 0);
 
     add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
@@ -1709,8 +1992,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 
 static void tcg_out_epilogue(TCGContext *s);
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
     TCGArg a0, a1, a2, a3, a4, a5;
     int c;
@@ -1744,7 +2028,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_goto_ptr:
-        tcg_out_bx(s, COND_AL, args[0]);
+        tcg_out_b_reg(s, COND_AL, args[0]);
         break;
     case INDEX_op_br:
         tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
@@ -1970,7 +2254,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_bswap16_i32:
-        tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+        tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]);
         break;
     case INDEX_op_bswap32_i32:
         tcg_out_bswap32(s, COND_AL, args[0], args[1]);
@@ -2120,6 +2404,48 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_qemu_st_i64:
         return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
 
+    case INDEX_op_st_vec:
+        return C_O0_I2(w, r);
+    case INDEX_op_ld_vec:
+    case INDEX_op_dupm_vec:
+        return C_O1_I1(w, r);
+    case INDEX_op_dup_vec:
+        return C_O1_I1(w, wr);
+    case INDEX_op_abs_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_sari_vec:
+        return C_O1_I1(w, w);
+    case INDEX_op_dup2_vec:
+    case INDEX_op_add_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_arm_sshl_vec:
+    case INDEX_op_arm_ushl_vec:
+        return C_O1_I2(w, w, w);
+    case INDEX_op_arm_sli_vec:
+        return C_O1_I2(w, 0, w);
+    case INDEX_op_or_vec:
+    case INDEX_op_andc_vec:
+        return C_O1_I2(w, w, wO);
+    case INDEX_op_and_vec:
+    case INDEX_op_orc_vec:
+        return C_O1_I2(w, w, wV);
+    case INDEX_op_cmp_vec:
+        return C_O1_I2(w, w, wZ);
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(w, w, w, w);
     default:
         g_assert_not_reached();
     }
@@ -2127,14 +2453,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
 static void tcg_target_init(TCGContext *s)
 {
-    /* Only probe for the platform and capabilities if we havn't already
-       determined maximum values at compile time.  */
-#ifndef use_idiv_instructions
+    /*
+     * Only probe for the platform and capabilities if we haven't already
+     * determined maximum values at compile time.
+     */
+#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
     {
         unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+#ifndef use_idiv_instructions
         use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
+#endif
+#ifndef use_neon_instructions
+        use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
+#endif
     }
 #endif
+
     if (__ARM_ARCH < 7) {
         const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
         if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
@@ -2142,7 +2476,7 @@ static void tcg_target_init(TCGContext *s)
         }
     }
 
-    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
+    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
 
     tcg_target_call_clobber_regs = 0;
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
@@ -2152,43 +2486,601 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
 
+    if (use_neon_instructions) {
+        tcg_target_available_regs[TCG_TYPE_V64]  = ALL_VECTOR_REGS;
+        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
+    }
+
     s->reserved_regs = 0;
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
+    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
-    tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+        return;
+    case TCG_TYPE_V64:
+        /* regs 1; size 8; align 8 */
+        tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
+        return;
+    case TCG_TYPE_V128:
+        /*
+         * We have only 8-byte alignment for the stack per the ABI.
+         * Rather than dynamically re-align the stack, it's easier
+         * to simply not request alignment beyond that.  So:
+         * regs 2; size 8; align 8
+         */
+        tcg_out_vldst(s, INSN_VLD1 | 0xad0, arg, arg1, arg2);
+        return;
+    default:
+        g_assert_not_reached();
+    }
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
-    tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+    switch (type) {
+    case TCG_TYPE_I32:
+        tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+        return;
+    case TCG_TYPE_V64:
+        /* regs 1; size 8; align 8 */
+        tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
+        return;
+    case TCG_TYPE_V128:
+        /* See tcg_out_ld re alignment: regs 2; size 8; align 8 */
+        tcg_out_vldst(s, INSN_VST1 | 0xad0, arg, arg1, arg2);
+        return;
+    default:
+        g_assert_not_reached();
+    }
 }
 
-static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                               TCGReg base, intptr_t ofs)
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
 {
     return false;
 }
 
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
-                               TCGReg ret, TCGReg arg)
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 {
-    tcg_out_mov_reg(s, COND_AL, ret, arg);
-    return true;
+    if (ret == arg) {
+        return true;
+    }
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
+            tcg_out_mov_reg(s, COND_AL, ret, arg);
+            return true;
+        }
+        return false;
+
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+        /* "VMOV D,N" is an alias for "VORR D,N,N". */
+        tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
+        return true;
+
+    default:
+        g_assert_not_reached();
+    }
 }
 
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                TCGReg ret, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long arg)
 {
+    tcg_debug_assert(type == TCG_TYPE_I32);
+    tcg_debug_assert(ret < TCG_REG_Q0);
     tcg_out_movi32(s, COND_AL, ret, arg);
 }
 
+/* Type is always V128, with I64 elements.  */
+static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
+{
+    /* Move high element into place first. */
+    /* VMOV Dd+1, Ds */
+    tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
+    /* Move low element into place; tcg_out_mov will check for nop. */
+    tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                            TCGReg rd, TCGReg rs)
+{
+    int q = type - TCG_TYPE_V64;
+
+    if (vece == MO_64) {
+        if (type == TCG_TYPE_V128) {
+            tcg_out_dup2_vec(s, rd, rs, rs);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
+        }
+    } else if (rs < TCG_REG_Q0) {
+        int b = (vece == MO_8);
+        int e = (vece == MO_16);
+        tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
+                  encode_vn(rd) | (rs << 12));
+    } else {
+        int imm4 = 1 << vece;
+        tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
+                  encode_vd(rd) | encode_vm(rs));
+    }
+    return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg rd, TCGReg base, intptr_t offset)
+{
+    if (vece == MO_64) {
+        tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
+        if (type == TCG_TYPE_V128) {
+            tcg_out_dup2_vec(s, rd, rd, rd);
+        }
+    } else {
+        int q = type - TCG_TYPE_V64;
+        tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
+                      rd, base, offset);
+    }
+    return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg rd, int64_t v64)
+{
+    int q = type - TCG_TYPE_V64;
+    int cmode, imm8, i;
+
+    /* Test all bytes equal first.  */
+    if (vece == MO_8) {
+        tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
+        return;
+    }
+
+    /*
+     * Test all bytes 0x00 or 0xff second.  This can match cases that
+     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+     */
+    for (i = imm8 = 0; i < 8; i++) {
+        uint8_t byte = v64 >> (i * 8);
+        if (byte == 0xff) {
+            imm8 |= 1 << i;
+        } else if (byte != 0) {
+            goto fail_bytes;
+        }
+    }
+    tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
+    return;
+ fail_bytes:
+
+    /*
+     * Tests for various replications.  For each element width, if we
+     * cannot find an expansion there's no point checking a larger
+     * width because we already know by replication it cannot match.
+     */
+    if (vece == MO_16) {
+        uint16_t v16 = v64;
+
+        if (is_shimm16(v16, &cmode, &imm8)) {
+            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+            return;
+        }
+        if (is_shimm16(~v16, &cmode, &imm8)) {
+            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+            return;
+        }
+
+        /*
+         * Otherwise, all remaining constants can be loaded in two insns:
+         * rd = v16 & 0xff, rd |= v16 & 0xff00.
+         */
+        tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
+        tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8);   /* VORRI */
+        return;
+    }
+
+    if (vece == MO_32) {
+        uint32_t v32 = v64;
+
+        if (is_shimm32(v32, &cmode, &imm8) ||
+            is_soimm32(v32, &cmode, &imm8)) {
+            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+            return;
+        }
+        if (is_shimm32(~v32, &cmode, &imm8) ||
+            is_soimm32(~v32, &cmode, &imm8)) {
+            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+            return;
+        }
+
+        /*
+         * Restrict the set of constants to those we can load with
+         * two instructions.  Others we load from the pool.
+         */
+        i = is_shimm32_pair(v32, &cmode, &imm8);
+        if (i) {
+            tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+            tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
+            return;
+        }
+        i = is_shimm32_pair(~v32, &cmode, &imm8);
+        if (i) {
+            tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+            tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
+            return;
+        }
+    }
+
+    /*
+     * As a last resort, load from the constant pool.
+     */
+    if (!q || vece == MO_64) {
+        new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
+        /* VLDR Dd, [pc + offset] */
+        tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
+        if (q) {
+            tcg_out_dup2_vec(s, rd, rd, rd);
+        }
+    } else {
+        new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
+        /* add tmp, pc, offset */
+        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
+        tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
+    }
+}
+
+static const ARMInsn vec_cmp_insn[16] = {
+    [TCG_COND_EQ] = INSN_VCEQ,
+    [TCG_COND_GT] = INSN_VCGT,
+    [TCG_COND_GE] = INSN_VCGE,
+    [TCG_COND_GTU] = INSN_VCGT_U,
+    [TCG_COND_GEU] = INSN_VCGE_U,
+};
+
+static const ARMInsn vec_cmp0_insn[16] = {
+    [TCG_COND_EQ] = INSN_VCEQ0,
+    [TCG_COND_GT] = INSN_VCGT0,
+    [TCG_COND_GE] = INSN_VCGE0,
+    [TCG_COND_LT] = INSN_VCLT0,
+    [TCG_COND_LE] = INSN_VCLE0,
+};
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+                           unsigned vecl, unsigned vece,
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGType type = vecl + TCG_TYPE_V64;
+    unsigned q = vecl;
+    TCGArg a0, a1, a2, a3;
+    int cmode, imm8;
+
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+
+    switch (opc) {
+    case INDEX_op_ld_vec:
+        tcg_out_ld(s, type, a0, a1, a2);
+        return;
+    case INDEX_op_st_vec:
+        tcg_out_st(s, type, a0, a1, a2);
+        return;
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        return;
+    case INDEX_op_dup2_vec:
+        tcg_out_dup2_vec(s, a0, a1, a2);
+        return;
+    case INDEX_op_abs_vec:
+        tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
+        return;
+    case INDEX_op_neg_vec:
+        tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
+        return;
+    case INDEX_op_not_vec:
+        tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
+        return;
+    case INDEX_op_add_vec:
+        tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_mul_vec:
+        tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_smax_vec:
+        tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_smin_vec:
+        tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_sub_vec:
+        tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_ssadd_vec:
+        tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_sssub_vec:
+        tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_umax_vec:
+        tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_umin_vec:
+        tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_usadd_vec:
+        tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_ussub_vec:
+        tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
+        return;
+    case INDEX_op_xor_vec:
+        tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
+        return;
+    case INDEX_op_arm_sshl_vec:
+        /*
+         * Note that Vm is the data and Vn is the shift count,
+         * therefore the arguments appear reversed.
+         */
+        tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
+        return;
+    case INDEX_op_arm_ushl_vec:
+        /* See above. */
+        tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
+        return;
+    case INDEX_op_shli_vec:
+        tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
+        return;
+    case INDEX_op_shri_vec:
+        tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
+        return;
+    case INDEX_op_sari_vec:
+        tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
+        return;
+    case INDEX_op_arm_sli_vec:
+        tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
+        return;
+
+    case INDEX_op_andc_vec:
+        if (!const_args[2]) {
+            tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
+            return;
+        }
+        a2 = ~a2;
+        /* fall through */
+    case INDEX_op_and_vec:
+        if (const_args[2]) {
+            is_shimm1632(~a2, &cmode, &imm8);
+            if (a0 == a1) {
+                tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
+                return;
+            }
+            tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
+            a2 = a0;
+        }
+        tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
+        return;
+
+    case INDEX_op_orc_vec:
+        if (!const_args[2]) {
+            tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
+            return;
+        }
+        a2 = ~a2;
+        /* fall through */
+    case INDEX_op_or_vec:
+        if (const_args[2]) {
+            is_shimm1632(a2, &cmode, &imm8);
+            if (a0 == a1) {
+                tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
+                return;
+            }
+            tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
+            a2 = a0;
+        }
+        tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
+        return;
+
+    case INDEX_op_cmp_vec:
+        {
+            TCGCond cond = args[3];
+
+            if (cond == TCG_COND_NE) {
+                if (const_args[2]) {
+                    tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
+                } else {
+                    tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
+                    tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
+                }
+            } else {
+                ARMInsn insn;
+
+                if (const_args[2]) {
+                    insn = vec_cmp0_insn[cond];
+                    if (insn) {
+                        tcg_out_vreg2(s, insn, q, vece, a0, a1);
+                        return;
+                    }
+                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
+                    a2 = TCG_VEC_TMP;
+                }
+                insn = vec_cmp_insn[cond];
+                if (insn == 0) {
+                    TCGArg t;
+                    t = a1, a1 = a2, a2 = t;
+                    cond = tcg_swap_cond(cond);
+                    insn = vec_cmp_insn[cond];
+                    tcg_debug_assert(insn != 0);
+                }
+                tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
+            }
+        }
+        return;
+
+    case INDEX_op_bitsel_vec:
+        a3 = args[3];
+        if (a0 == a3) {
+            tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
+        } else if (a0 == a2) {
+            tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
+        } else {
+            tcg_out_mov(s, type, a0, a1);
+            tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
+        }
+        return;
+
+    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
+    default:
+        g_assert_not_reached();
+    }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+    switch (opc) {
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_sssub_vec:
+    case INDEX_op_usadd_vec:
+    case INDEX_op_ussub_vec:
+    case INDEX_op_bitsel_vec:
+        return 1;
+    case INDEX_op_abs_vec:
+    case INDEX_op_cmp_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
+        return vece < MO_64;
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_rotli_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
+        return -1;
+    default:
+        return 0;
+    }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+                       TCGArg a0, ...)
+{
+    va_list va;
+    TCGv_vec v0, v1, v2, t1, t2, c1;
+    TCGArg a2;
+
+    va_start(va, a0);
+    v0 = temp_tcgv_vec(arg_temp(a0));
+    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+    a2 = va_arg(va, TCGArg);
+    va_end(va);
+
+    switch (opc) {
+    case INDEX_op_shlv_vec:
+        /*
+         * Merely propagate shlv_vec to arm_ushl_vec.
+         * In this way we don't set TCG_TARGET_HAS_shv_vec
+         * because everything is done via expansion.
+         */
+        v2 = temp_tcgv_vec(arg_temp(a2));
+        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+        break;
+
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+        /* Right shifts are negative left shifts for NEON.  */
+        v2 = temp_tcgv_vec(arg_temp(a2));
+        t1 = tcg_temp_new_vec(type);
+        tcg_gen_neg_vec(vece, t1, v2);
+        if (opc == INDEX_op_shrv_vec) {
+            opc = INDEX_op_arm_ushl_vec;
+        } else {
+            opc = INDEX_op_arm_sshl_vec;
+        }
+        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+        tcg_temp_free_vec(t1);
+        break;
+
+    case INDEX_op_rotli_vec:
+        t1 = tcg_temp_new_vec(type);
+        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
+        vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
+                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
+        tcg_temp_free_vec(t1);
+        break;
+
+    case INDEX_op_rotlv_vec:
+        v2 = temp_tcgv_vec(arg_temp(a2));
+        t1 = tcg_temp_new_vec(type);
+        c1 = tcg_constant_vec(type, vece, 8 << vece);
+        tcg_gen_sub_vec(vece, t1, v2, c1);
+        /* Right shifts are negative left shifts for NEON.  */
+        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+        tcg_gen_or_vec(vece, v0, v0, t1);
+        tcg_temp_free_vec(t1);
+        break;
+
+    case INDEX_op_rotrv_vec:
+        v2 = temp_tcgv_vec(arg_temp(a2));
+        t1 = tcg_temp_new_vec(type);
+        t2 = tcg_temp_new_vec(type);
+        c1 = tcg_constant_vec(type, vece, 8 << vece);
+        tcg_gen_neg_vec(vece, t1, v2);
+        tcg_gen_sub_vec(vece, t2, c1, v2);
+        /* Right shifts are negative left shifts for NEON.  */
+        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+        vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+        tcg_gen_or_vec(vece, v0, t1, t2);
+        tcg_temp_free_vec(t1);
+        tcg_temp_free_vec(t2);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
     int i;
@@ -2215,7 +3107,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
     /* Calling convention requires us to save r4-r11 and lr.  */
     /* stmdb sp!, { r4 - r11, lr } */
-    tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
+    tcg_out_ldstm(s, COND_AL, INSN_STMDB, TCG_REG_CALL_STACK,
+                  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+                  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+                  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_R14));
 
     /* Reserve callee argument and tcg temp space.  */
     tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
@@ -2225,7 +3120,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
-    tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
+    tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
 
     /*
      * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
@@ -2243,7 +3138,10 @@ static void tcg_out_epilogue(TCGContext *s)
                    TCG_REG_CALL_STACK, STACK_ADDEND, 1);
 
     /* ldmia sp!, { r4 - r11, pc } */
-    tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
+    tcg_out_ldstm(s, COND_AL, INSN_LDMIA, TCG_REG_CALL_STACK,
+                  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+                  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+                  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
 }
 
 typedef struct {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 8d1fee6327f..f41b809554e 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -26,40 +26,16 @@
 #ifndef ARM_TCG_TARGET_H
 #define ARM_TCG_TARGET_H
 
-/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
-#ifndef __ARM_ARCH
-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
-     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
-     || defined(__ARM_ARCH_7EM__)
-#  define __ARM_ARCH 7
-# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
-       || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
-       || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
-#  define __ARM_ARCH 6
-# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
-       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
-       || defined(__ARM_ARCH_5TEJ__)
-#  define __ARM_ARCH 5
-# else
-#  define __ARM_ARCH 4
-# endif
-#endif
-
 extern int arm_arch;
 
-#if defined(__ARM_ARCH_5T__) \
-    || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
-# define use_armv5t_instructions 1
-#else
-# define use_armv5t_instructions use_armv6_instructions
-#endif
-
+#define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5)
 #define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
 #define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
 
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define MAX_CODE_GEN_BUFFER_SIZE  UINT32_MAX
 
 typedef enum {
     TCG_REG_R0 = 0,
@@ -78,19 +54,42 @@ typedef enum {
     TCG_REG_R13,
     TCG_REG_R14,
     TCG_REG_PC,
+
+    TCG_REG_Q0,
+    TCG_REG_Q1,
+    TCG_REG_Q2,
+    TCG_REG_Q3,
+    TCG_REG_Q4,
+    TCG_REG_Q5,
+    TCG_REG_Q6,
+    TCG_REG_Q7,
+    TCG_REG_Q8,
+    TCG_REG_Q9,
+    TCG_REG_Q10,
+    TCG_REG_Q11,
+    TCG_REG_Q12,
+    TCG_REG_Q13,
+    TCG_REG_Q14,
+    TCG_REG_Q15,
+
+    TCG_AREG0 = TCG_REG_R6,
+    TCG_REG_CALL_STACK = TCG_REG_R13,
 } TCGReg;
 
-#define TCG_TARGET_NB_REGS 16
+#define TCG_TARGET_NB_REGS 32
 
 #ifdef __ARM_ARCH_EXT_IDIV__
 #define use_idiv_instructions  1
 #else
 extern bool use_idiv_instructions;
 #endif
-
+#ifdef __ARM_NEON__
+#define use_neon_instructions  1
+#else
+extern bool use_neon_instructions;
+#endif
 
 /* used for function call generation */
-#define TCG_REG_CALL_STACK		TCG_REG_R13
 #define TCG_TARGET_STACK_ALIGN		8
 #define TCG_TARGET_CALL_ALIGN_ARGS	1
 #define TCG_TARGET_CALL_STACK_OFFSET	0
@@ -124,16 +123,32 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_mulsh_i32        0
 #define TCG_TARGET_HAS_div_i32          use_idiv_instructions
 #define TCG_TARGET_HAS_rem_i32          0
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_direct_jump      0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
-enum {
-    TCG_AREG0 = TCG_REG_R6,
-};
+#define TCG_TARGET_HAS_v64              use_neon_instructions
+#define TCG_TARGET_HAS_v128             use_neon_instructions
+#define TCG_TARGET_HAS_v256             0
+
+#define TCG_TARGET_HAS_andc_vec         1
+#define TCG_TARGET_HAS_orc_vec          1
+#define TCG_TARGET_HAS_not_vec          1
+#define TCG_TARGET_HAS_neg_vec          1
+#define TCG_TARGET_HAS_abs_vec          1
+#define TCG_TARGET_HAS_roti_vec         0
+#define TCG_TARGET_HAS_rots_vec         0
+#define TCG_TARGET_HAS_rotv_vec         0
+#define TCG_TARGET_HAS_shi_vec          1
+#define TCG_TARGET_HAS_shs_vec          0
+#define TCG_TARGET_HAS_shv_vec          0
+#define TCG_TARGET_HAS_mul_vec          1
+#define TCG_TARGET_HAS_sat_vec          1
+#define TCG_TARGET_HAS_minmax_vec       1
+#define TCG_TARGET_HAS_bitsel_vec       1
+#define TCG_TARGET_HAS_cmpsel_vec       0
 
 #define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP     1
+#define TCG_TARGET_HAS_MEMORY_BSWAP     0
 
 /* not defined -- call should be eliminated at compile time */
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
new file mode 100644
index 00000000000..d38af9a8088
--- /dev/null
+++ b/tcg/arm/tcg-target.opc.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion.  These will be
+ * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
+DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 415c5c07969..84b109bb848 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -210,10 +210,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 }
 
 /* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                         const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     }
@@ -243,8 +241,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define P_EXT		0x100		/* 0x0f opcode prefix */
 #define P_EXT38         0x200           /* 0x0f 0x38 opcode prefix */
 #define P_DATA16        0x400           /* 0x66 opcode prefix */
+#define P_VEXW          0x1000          /* Set VEX.W = 1 */
 #if TCG_TARGET_REG_BITS == 64
-# define P_REXW         0x1000          /* Set REX.W = 1 */
+# define P_REXW         P_VEXW          /* Set REX.W = 1; match VEXW */
 # define P_REXB_R       0x2000          /* REG field as byte register */
 # define P_REXB_RM      0x4000          /* R/M field as byte register */
 # define P_GS           0x8000          /* gs segment override */
@@ -412,13 +411,13 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
 #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
 #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
-#define OPC_VPERMQ      (0x00 | P_EXT3A | P_DATA16 | P_REXW)
+#define OPC_VPERMQ      (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
 #define OPC_VPERM2I128  (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
 #define OPC_VPSLLVD     (0x47 | P_EXT38 | P_DATA16)
-#define OPC_VPSLLVQ     (0x47 | P_EXT38 | P_DATA16 | P_REXW)
+#define OPC_VPSLLVQ     (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
 #define OPC_VPSRAVD     (0x46 | P_EXT38 | P_DATA16)
 #define OPC_VPSRLVD     (0x45 | P_EXT38 | P_DATA16)
-#define OPC_VPSRLVQ     (0x45 | P_EXT38 | P_DATA16 | P_REXW)
+#define OPC_VPSRLVQ     (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
 #define OPC_VZEROUPPER  (0x77 | P_EXT)
 #define OPC_XCHG_ax_r32	(0x90)
 
@@ -578,7 +577,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
 
     /* Use the two byte form if possible, which cannot encode
        VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT.  */
-    if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_REXW)) == P_EXT
+    if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT
         && ((rm | index) & 8) == 0) {
         /* Two byte VEX prefix.  */
         tcg_out8(s, 0xc5);
@@ -603,7 +602,7 @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
         tmp |= (rm & 8 ? 0 : 0x20);            /* VEX.B */
         tcg_out8(s, tmp);
 
-        tmp = (opc & P_REXW ? 0x80 : 0);       /* VEX.W */
+        tmp = (opc & P_VEXW ? 0x80 : 0);       /* VEX.W */
     }
 
     tmp |= (opc & P_VEXL ? 0x04 : 0);      /* VEX.L */
@@ -1612,7 +1611,7 @@ static void tcg_out_nopn(TCGContext *s, int n)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -1625,7 +1624,7 @@ static void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1742,7 +1741,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
  * for a load or store, so that we can later generate the correct helper code
  */
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
-                                TCGMemOpIdx oi,
+                                MemOpIdx oi,
                                 TCGReg datalo, TCGReg datahi,
                                 TCGReg addrlo, TCGReg addrhi,
                                 tcg_insn_unit *raddr,
@@ -1769,7 +1768,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
  */
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     TCGReg data_reg;
     tcg_insn_unit **label_ptr = &l->label_ptr[0];
@@ -1854,7 +1853,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
  */
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     MemOp s_bits = opc & MO_SIZE;
     tcg_insn_unit **label_ptr = &l->label_ptr[0];
@@ -2055,7 +2054,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
     TCGReg datalo, datahi, addrlo;
     TCGReg addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
@@ -2144,7 +2143,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
     TCGReg datalo, datahi, addrlo;
     TCGReg addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
@@ -2423,10 +2422,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     OP_32_64(bswap16):
-        tcg_out_rolw_8(s, a0);
+        if (a2 & TCG_BSWAP_OS) {
+            /* Output must be sign-extended. */
+            if (rexw) {
+                tcg_out_bswap64(s, a0);
+                tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48);
+            } else {
+                tcg_out_bswap32(s, a0);
+                tcg_out_shifti(s, SHIFT_SAR, a0, 16);
+            }
+        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            /* Output must be zero-extended, but input isn't. */
+            tcg_out_bswap32(s, a0);
+            tcg_out_shifti(s, SHIFT_SHR, a0, 16);
+        } else {
+            tcg_out_rolw_8(s, a0);
+        }
         break;
     OP_32_64(bswap32):
         tcg_out_bswap32(s, a0);
+        if (rexw && (a2 & TCG_BSWAP_OS)) {
+            tcg_out_ext32s(s, a0, a0);
+        }
         break;
 
     OP_32_64(neg):
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b693d3692d3..b00a6da293e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -31,9 +31,11 @@
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
 # define TCG_TARGET_NB_REGS   32
+# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 #else
 # define TCG_TARGET_REG_BITS  32
 # define TCG_TARGET_NB_REGS   24
+# define MAX_CODE_GEN_BUFFER_SIZE  UINT32_MAX
 #endif
 
 typedef enum {
@@ -133,7 +135,6 @@ extern bool have_movbe;
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_direct_jump      1
 
 #if TCG_TARGET_REG_BITS == 64
diff --git a/tcg/meson.build b/tcg/meson.build
new file mode 100644
index 00000000000..c4c63b19d4e
--- /dev/null
+++ b/tcg/meson.build
@@ -0,0 +1,20 @@
+tcg_ss = ss.source_set()
+
+tcg_ss.add(files(
+  'optimize.c',
+  'region.c',
+  'tcg.c',
+  'tcg-common.c',
+  'tcg-op.c',
+  'tcg-op-gvec.c',
+  'tcg-op-vec.c',
+))
+
+if get_option('tcg_interpreter')
+  libffi = dependency('libffi', version: '>=3.0', required: true,
+                      method: 'pkg-config', kwargs: static_kwargs)
+  specific_ss.add(libffi)
+  specific_ss.add(files('tci.c'))
+endif
+
+specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 8b16726242b..d8f6914f030 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -187,17 +187,14 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #endif
 
 
-static inline bool is_p2m1(tcg_target_long val)
+static bool is_p2m1(tcg_target_long val)
 {
     return val && ((val + 1) & val) == 0;
 }
 
 /* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                         const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct;
-    ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
@@ -364,8 +361,8 @@ typedef enum {
 /*
  * Type reg
  */
-static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
-                                   TCGReg rd, TCGReg rs, TCGReg rt)
+static void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
+                            TCGReg rd, TCGReg rs, TCGReg rt)
 {
     int32_t inst;
 
@@ -379,8 +376,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
 /*
  * Type immediate
  */
-static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
-                                   TCGReg rt, TCGReg rs, TCGArg imm)
+static void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
+                            TCGReg rt, TCGReg rs, TCGArg imm)
 {
     int32_t inst;
 
@@ -394,8 +391,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
 /*
  * Type bitfield
  */
-static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
-                                  TCGReg rs, int msb, int lsb)
+static void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
+                           TCGReg rs, int msb, int lsb)
 {
     int32_t inst;
 
@@ -407,8 +404,8 @@ static inline void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
     tcg_out32(s, inst);
 }
 
-static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
-                                    MIPSInsn oph, TCGReg rt, TCGReg rs,
+static void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
+                             MIPSInsn oph, TCGReg rt, TCGReg rs,
                                     int msb, int lsb)
 {
     if (lsb >= 32) {
@@ -425,8 +422,7 @@ static inline void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
 /*
  * Type branch
  */
-static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
-                                  TCGReg rt, TCGReg rs)
+static void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs)
 {
     tcg_out_opc_imm(s, opc, rt, rs, 0);
 }
@@ -434,8 +430,8 @@ static inline void tcg_out_opc_br(TCGContext *s, MIPSInsn opc,
 /*
  * Type sa
  */
-static inline void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
-                                  TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
+                           TCGReg rd, TCGReg rt, TCGArg sa)
 {
     int32_t inst;
 
@@ -482,28 +478,27 @@ static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, const void *target)
     return true;
 }
 
-static inline void tcg_out_nop(TCGContext *s)
+static void tcg_out_nop(TCGContext *s)
 {
     tcg_out32(s, 0);
 }
 
-static inline void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
 }
 
-static inline void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSRL, OPC_DSRL32, rd, rt, sa);
 }
 
-static inline void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+static void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
 {
     tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
 }
 
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
-                               TCGReg ret, TCGReg arg)
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 {
     /* Simple reg-reg move, optimising out the 'do nothing' case */
     if (ret != arg) {
@@ -543,68 +538,61 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     }
 }
 
-static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
 {
+    /* ret and arg can't be register tmp0 */
+    tcg_debug_assert(ret != TCG_TMP0);
+    tcg_debug_assert(arg != TCG_TMP0);
+
+    /* With arg = abcd: */
     if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
-    } else {
-        /* ret and arg can't be register at */
-        if (ret == TCG_TMP0 || arg == TCG_TMP0) {
-            tcg_abort();
+        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);                 /* badc */
+        if (flags & TCG_BSWAP_OS) {
+            tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);              /* ssdc */
+        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff);        /* 00dc */
         }
-
-        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
-        tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+        return;
     }
-}
 
-static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
-        tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+    tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);                  /* 0abc */
+    if (!(flags & TCG_BSWAP_IZ)) {
+        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);  /* 000c */
+    }
+    if (flags & TCG_BSWAP_OS) {
+        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);                  /* d000 */
+        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);                  /* ssd0 */
     } else {
-        /* ret and arg can't be register at */
-        if (ret == TCG_TMP0 || arg == TCG_TMP0) {
-            tcg_abort();
+        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);                   /* bcd0 */
+        if (flags & TCG_BSWAP_OZ) {
+            tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);        /* 00d0 */
         }
-
-        tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
-        tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
     }
+    tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);                /* ssdc */
 }
 
 static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
 {
-    bool ok = tcg_out_opc_jmp(s, OPC_JAL, sub);
-    tcg_debug_assert(ok);
+    if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP1, (uintptr_t)sub);
+        tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_TMP1, 0);
+    }
 }
 
-static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
 {
     if (use_mips32r2_instructions) {
         tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
         tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+        if (flags & TCG_BSWAP_OZ) {
+            tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0);
+        }
     } else {
-        tcg_out_bswap_subr(s, bswap32_addr);
-        /* delay slot -- never omit the insn, like tcg_out_mov might.  */
-        tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
-        tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
-    }
-}
-
-static void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
-        tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
-        tcg_out_dsrl(s, ret, ret, 32);
-    } else {
-        tcg_out_bswap_subr(s, bswap32u_addr);
+        if (flags & TCG_BSWAP_OZ) {
+            tcg_out_bswap_subr(s, bswap32u_addr);
+        } else {
+            tcg_out_bswap_subr(s, bswap32_addr);
+        }
         /* delay slot -- never omit the insn, like tcg_out_mov might.  */
         tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
         tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
@@ -624,27 +612,7 @@ static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
     }
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
-    } else {
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 24);
-    }
-}
-
-static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
-{
-    if (use_mips32r2_instructions) {
-        tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
-    } else {
-        tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
-        tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
-    }
-}
-
-static inline void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
 {
     if (use_mips32r2_instructions) {
         tcg_out_opc_bf(s, OPC_DEXT, ret, arg, 31, 0);
@@ -668,8 +636,8 @@ static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
     tcg_out_opc_imm(s, opc, data, addr, lo);
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
     MIPSInsn opc = OPC_LD;
     if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
@@ -678,8 +646,8 @@ static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_ldst(s, opc, arg, arg1, arg2);
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
     MIPSInsn opc = OPC_SD;
     if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
@@ -688,8 +656,8 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_ldst(s, opc, arg, arg1, arg2);
 }
 
-static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                               TCGReg base, intptr_t ofs)
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
 {
     if (val == 0) {
         tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
@@ -1049,7 +1017,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
 #if defined(CONFIG_SOFTMMU)
 #include "../tcg-ldst.c.inc"
 
-static void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_SB]   = helper_ret_ldsb_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
@@ -1066,7 +1034,7 @@ static void * const qemu_ld_helpers[16] = {
 #endif
 };
 
-static void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1152,7 +1120,7 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
  * Clobbers TMP0, TMP1, TMP2, TMP3.
  */
 static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
-                             TCGReg addrh, TCGMemOpIdx oi,
+                             TCGReg addrh, MemOpIdx oi,
                              tcg_insn_unit *label_ptr[2], bool is_load)
 {
     MemOp opc = get_memop(oi);
@@ -1228,7 +1196,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
     tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
 }
 
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
                                 TCGType ext,
                                 TCGReg datalo, TCGReg datahi,
                                 TCGReg addrlo, TCGReg addrhi,
@@ -1253,7 +1221,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
     const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     TCGReg v0;
     int i;
@@ -1307,7 +1275,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
     const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     MemOp s_bits = opc & MO_SIZE;
     int i;
@@ -1370,14 +1338,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         break;
     case MO_UW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-        tcg_out_bswap16(s, lo, TCG_TMP1);
+        tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
         break;
     case MO_UW:
         tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
         break;
     case MO_SW | MO_BSWAP:
         tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-        tcg_out_bswap16s(s, lo, TCG_TMP1);
+        tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
         break;
     case MO_SW:
         tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
@@ -1386,7 +1354,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         if (TCG_TARGET_REG_BITS == 64 && is_64) {
             if (use_mips32r2_instructions) {
                 tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
-                tcg_out_bswap32u(s, lo, lo);
+                tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
             } else {
                 tcg_out_bswap_subr(s, bswap32u_addr);
                 /* delay slot */
@@ -1399,7 +1367,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
     case MO_SL | MO_BSWAP:
         if (use_mips32r2_instructions) {
             tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
-            tcg_out_bswap32(s, lo, lo);
+            tcg_out_bswap32(s, lo, lo, 0);
         } else {
             tcg_out_bswap_subr(s, bswap32_addr);
             /* delay slot */
@@ -1466,7 +1434,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
     TCGReg data_regl, data_regh;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
@@ -1517,8 +1485,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         break;
 
     case MO_16 | MO_BSWAP:
-        tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
-        tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
+        tcg_out_bswap16(s, TCG_TMP1, lo, 0);
         lo = TCG_TMP1;
         /* FALLTHRU */
     case MO_16:
@@ -1526,7 +1493,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
         break;
 
     case MO_32 | MO_BSWAP:
-        tcg_out_bswap32(s, TCG_TMP3, lo);
+        tcg_out_bswap32(s, TCG_TMP3, lo, 0);
         lo = TCG_TMP3;
         /* FALLTHRU */
     case MO_32:
@@ -1545,9 +1512,9 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
             tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
             tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
         } else {
-            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi);
+            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
             tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
-            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo);
+            tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
             tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
         }
         break;
@@ -1569,7 +1536,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
     TCGReg data_regl, data_regh;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[2];
@@ -1650,9 +1617,9 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
     }
 }
 
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                              const TCGArg args[TCG_MAX_OP_ARGS],
-                              const int const_args[TCG_MAX_OP_ARGS])
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg args[TCG_MAX_OP_ARGS],
+                       const int const_args[TCG_MAX_OP_ARGS])
 {
     MIPSInsn i1, i2;
     TCGArg a0, a1, a2;
@@ -1687,17 +1654,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_insn_offset) {
-            /* direct jump method */
-            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-            /* Avoid clobbering the address during retranslation.  */
-            tcg_out32(s, OPC_J | (*(uint32_t *)s->code_ptr & 0x3ffffff));
-        } else {
-            /* indirect jump method */
-            tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
-                       (uintptr_t)(s->tb_jmp_target_addr + a0));
-            tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
-        }
+        /* indirect jump method */
+        tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+        tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
+                   (uintptr_t)(s->tb_jmp_target_addr + a0));
+        tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
         tcg_out_nop(s);
         set_jmp_reset_offset(s, a0);
         break;
@@ -1936,10 +1897,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_not_i64:
         i1 = OPC_NOR;
         goto do_unary;
-    case INDEX_op_bswap16_i32:
-    case INDEX_op_bswap16_i64:
-        i1 = OPC_WSBH;
-        goto do_unary;
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext8s_i64:
         i1 = OPC_SEB;
@@ -1951,11 +1908,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
         break;
 
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+        tcg_out_bswap16(s, a0, a1, a2);
+        break;
     case INDEX_op_bswap32_i32:
-        tcg_out_bswap32(s, a0, a1);
+        tcg_out_bswap32(s, a0, a1, 0);
         break;
     case INDEX_op_bswap32_i64:
-        tcg_out_bswap32u(s, a0, a1);
+        tcg_out_bswap32(s, a0, a1, a2);
         break;
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, a0, a1);
@@ -2571,13 +2532,6 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
 }
 
-void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
-                              uintptr_t jmp_rw, uintptr_t addr)
-{
-    qatomic_set((uint32_t *)jmp_rw, deposit32(OPC_J, 0, 26, addr >> 2));
-    flush_idcache_range(jmp_rx, jmp_rw, 4);
-}
-
 typedef struct {
     DebugFrameHeader h;
     uint8_t fde_def_cfa[4];
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c2c32fb38f5..c366fdf74b1 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -39,6 +39,8 @@
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+
 typedef enum {
     TCG_REG_ZERO = 0,
     TCG_REG_AT,
@@ -130,8 +132,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
 #define TCG_TARGET_HAS_bswap32_i32      1
-#define TCG_TARGET_HAS_goto_ptr         1
-#define TCG_TARGET_HAS_direct_jump      1
+#define TCG_TARGET_HAS_direct_jump      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
@@ -202,7 +203,9 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_DEFAULT_MO (0)
 #define TCG_TARGET_HAS_MEMORY_BSWAP     1
 
-void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t)
+    QEMU_ERROR("code path is reachable");
 
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 37c902283e3..2397f2cf93a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -24,7 +24,9 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/int128.h"
 #include "tcg/tcg-op.h"
+#include "tcg-internal.h"
 
 #define CASE_OP_32_64(x)                        \
         glue(glue(case INDEX_op_, x), _i32):    \
@@ -40,9 +42,61 @@ typedef struct TempOptInfo {
     TCGTemp *prev_copy;
     TCGTemp *next_copy;
     uint64_t val;
-    uint64_t mask;
+    uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
+    uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
 } TempOptInfo;
 
+typedef struct OptContext {
+    TCGContext *tcg;
+    TCGOp *prev_mb;
+    TCGTempSet temps_used;
+
+    /* In flight values from optimization. */
+    uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
+    uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
+    uint64_t s_mask;  /* mask of clrsb(value) bits */
+    TCGType type;
+} OptContext;
+
+/* Calculate the smask for a specific value. */
+static uint64_t smask_from_value(uint64_t value)
+{
+    int rep = clrsb64(value);
+    return ~(~0ull >> rep);
+}
+
+/*
+ * Calculate the smask for a given set of known-zeros.
+ * If there are lots of zeros on the left, we can consider the remainder
+ * an unsigned field, and thus the corresponding signed field is one bit
+ * larger.
+ */
+static uint64_t smask_from_zmask(uint64_t zmask)
+{
+    /*
+     * Only the 0 bits are significant for zmask, thus the msb itself
+     * must be zero, else we have no sign information.
+     */
+    int rep = clz64(zmask);
+    if (rep == 0) {
+        return 0;
+    }
+    rep -= 1;
+    return ~(~0ull >> rep);
+}
+
+/*
+ * Recreate a properly left-aligned smask after manipulation.
+ * Some bit-shuffling, particularly shifts and rotates, may
+ * retain sign bits on the left, but may scatter disconnected
+ * sign bits on the right.  Retain only what remains to the left.
+ */
+static uint64_t smask_from_smask(int64_t smask)
+{
+    /* Only the 1 bits are significant for smask */
+    return smask_from_zmask(~smask);
+}
+
 static inline TempOptInfo *ts_info(TCGTemp *ts)
 {
     return ts->state_ptr;
@@ -80,7 +134,8 @@ static void reset_ts(TCGTemp *ts)
     ti->next_copy = ts;
     ti->prev_copy = ts;
     ti->is_const = false;
-    ti->mask = -1;
+    ti->z_mask = -1;
+    ti->s_mask = 0;
 }
 
 static void reset_temp(TCGArg arg)
@@ -89,15 +144,15 @@ static void reset_temp(TCGArg arg)
 }
 
 /* Initialize and activate a temporary.  */
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
 {
     size_t idx = temp_idx(ts);
     TempOptInfo *ti;
 
-    if (test_bit(idx, temps_used->l)) {
+    if (test_bit(idx, ctx->temps_used.l)) {
         return;
     }
-    set_bit(idx, temps_used->l);
+    set_bit(idx, ctx->temps_used.l);
 
     ti = ts->state_ptr;
     if (ti == NULL) {
@@ -110,22 +165,15 @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
     if (ts->kind == TEMP_CONST) {
         ti->is_const = true;
         ti->val = ts->val;
-        ti->mask = ts->val;
-        if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
-            /* High bits of a 32-bit quantity are garbage.  */
-            ti->mask |= ~0xffffffffull;
-        }
+        ti->z_mask = ts->val;
+        ti->s_mask = smask_from_value(ts->val);
     } else {
         ti->is_const = false;
-        ti->mask = -1;
+        ti->z_mask = -1;
+        ti->s_mask = 0;
     }
 }
 
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
-{
-    init_ts_info(temps_used, arg_temp(arg));
-}
-
 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 {
     TCGTemp *i, *g, *l;
@@ -178,43 +226,45 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
 {
     TCGTemp *dst_ts = arg_temp(dst);
     TCGTemp *src_ts = arg_temp(src);
-    const TCGOpDef *def;
     TempOptInfo *di;
     TempOptInfo *si;
-    uint64_t mask;
     TCGOpcode new_op;
 
     if (ts_are_copies(dst_ts, src_ts)) {
-        tcg_op_remove(s, op);
-        return;
+        tcg_op_remove(ctx->tcg, op);
+        return true;
     }
 
     reset_ts(dst_ts);
     di = ts_info(dst_ts);
     si = ts_info(src_ts);
-    def = &tcg_op_defs[op->opc];
-    if (def->flags & TCG_OPF_VECTOR) {
-        new_op = INDEX_op_mov_vec;
-    } else if (def->flags & TCG_OPF_64BIT) {
-        new_op = INDEX_op_mov_i64;
-    } else {
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
         new_op = INDEX_op_mov_i32;
+        break;
+    case TCG_TYPE_I64:
+        new_op = INDEX_op_mov_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
+        new_op = INDEX_op_mov_vec;
+        break;
+    default:
+        g_assert_not_reached();
     }
     op->opc = new_op;
-    /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
     op->args[0] = dst;
     op->args[1] = src;
 
-    mask = si->mask;
-    if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
-        /* High bits of the destination are now garbage.  */
-        mask |= ~0xffffffffull;
-    }
-    di->mask = mask;
+    di->z_mask = si->z_mask;
+    di->s_mask = si->s_mask;
 
     if (src_ts->type == dst_ts->type) {
         TempOptInfo *ni = ts_info(si->next_copy);
@@ -226,27 +276,22 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
         di->is_const = si->is_const;
         di->val = si->val;
     }
+    return true;
 }
 
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
-                             TCGOp *op, TCGArg dst, uint64_t val)
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+                             TCGArg dst, uint64_t val)
 {
-    const TCGOpDef *def = &tcg_op_defs[op->opc];
-    TCGType type;
     TCGTemp *tv;
 
-    if (def->flags & TCG_OPF_VECTOR) {
-        type = TCGOP_VECL(op) + TCG_TYPE_V64;
-    } else if (def->flags & TCG_OPF_64BIT) {
-        type = TCG_TYPE_I64;
-    } else {
-        type = TCG_TYPE_I32;
+    if (ctx->type == TCG_TYPE_I32) {
+        val = (int32_t)val;
     }
 
     /* Convert movi to mov with constant temp. */
-    tv = tcg_constant_internal(type, val);
-    init_ts_info(temps_used, tv);
-    tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
+    tv = tcg_constant_internal(ctx->type, val);
+    init_ts_info(ctx, tv);
+    return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
 }
 
 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
@@ -354,10 +399,12 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
         return (uint16_t)x;
 
     CASE_OP_32_64(bswap16):
-        return bswap16(x);
+        x = bswap16(x);
+        return y & TCG_BSWAP_OS ? (int16_t)x : x;
 
     CASE_OP_32_64(bswap32):
-        return bswap32(x);
+        x = bswap32(x);
+        return y & TCG_BSWAP_OS ? (int32_t)x : x;
 
     case INDEX_op_bswap64_i64:
         return bswap64(x);
@@ -412,11 +459,11 @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
     }
 }
 
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
+                                    uint64_t x, uint64_t y)
 {
-    const TCGOpDef *def = &tcg_op_defs[op];
     uint64_t res = do_constant_folding_2(op, x, y);
-    if (!(def->flags & TCG_OPF_64BIT)) {
+    if (type == TCG_TYPE_I32) {
         res = (int32_t)res;
     }
     return res;
@@ -498,21 +545,25 @@ static bool do_constant_folding_cond_eq(TCGCond c)
     }
 }
 
-/* Return 2 if the condition can't be simplified, and the result
-   of the condition (0 or 1) if it can */
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
-                                       TCGArg y, TCGCond c)
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond(TCGType type, TCGArg x,
+                                    TCGArg y, TCGCond c)
 {
     uint64_t xv = arg_info(x)->val;
     uint64_t yv = arg_info(y)->val;
 
     if (arg_is_const(x) && arg_is_const(y)) {
-        const TCGOpDef *def = &tcg_op_defs[op];
-        tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
-        if (def->flags & TCG_OPF_64BIT) {
-            return do_constant_folding_cond_64(xv, yv, c);
-        } else {
+        switch (type) {
+        case TCG_TYPE_I32:
             return do_constant_folding_cond_32(xv, yv, c);
+        case TCG_TYPE_I64:
+            return do_constant_folding_cond_64(xv, yv, c);
+        default:
+            /* Only scalar comparisons are optimizable */
+            return -1;
         }
     } else if (args_are_copies(x, y)) {
         return do_constant_folding_cond_eq(c);
@@ -523,15 +574,17 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         case TCG_COND_GEU:
             return 1;
         default:
-            return 2;
+            return -1;
         }
     }
-    return 2;
+    return -1;
 }
 
-/* Return 2 if the condition can't be simplified, and the result
-   of the condition (0 or 1) if it can */
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
 {
     TCGArg al = p1[0], ah = p1[1];
     TCGArg bl = p2[0], bh = p2[1];
@@ -561,9 +614,22 @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
         return do_constant_folding_cond_eq(c);
     }
-    return 2;
+    return -1;
 }
 
+/**
+ * swap_commutative:
+ * @dest: TCGArg of the destination argument, or NO_DEST.
+ * @p1: first paired argument
+ * @p2: second paired argument
+ *
+ * If *@p1 is a constant and *@p2 is not, swap.
+ * If *@p2 matches @dest, swap.
+ * Return true if a swap was performed.
+ */
+
+#define NO_DEST  temp_arg(NULL)
+
 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 {
     TCGArg a1 = *p1, a2 = *p2;
@@ -597,960 +663,1551 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
     return false;
 }
 
-/* Propagate constants and copies, fold constant expressions. */
-void tcg_optimize(TCGContext *s)
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
 {
-    int nb_temps, nb_globals, i;
-    TCGOp *op, *op_next, *prev_mb = NULL;
-    TCGTempSet temps_used;
-
-    /* Array VALS has an element for each temp.
-       If this temp holds a constant then its value is kept in VALS' element.
-       If this temp is a copy of other ones then the other copies are
-       available through the doubly linked circular list. */
-
-    nb_temps = s->nb_temps;
-    nb_globals = s->nb_globals;
-
-    memset(&temps_used, 0, sizeof(temps_used));
-    for (i = 0; i < nb_temps; ++i) {
-        s->temps[i].state_ptr = NULL;
+    for (int i = 0; i < nb_args; i++) {
+        TCGTemp *ts = arg_temp(op->args[i]);
+        if (ts) {
+            init_ts_info(ctx, ts);
+        }
     }
+}
 
-    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
-        uint64_t mask, partmask, affected, tmp;
-        int nb_oargs, nb_iargs;
-        TCGOpcode opc = op->opc;
-        const TCGOpDef *def = &tcg_op_defs[opc];
+static void copy_propagate(OptContext *ctx, TCGOp *op,
+                           int nb_oargs, int nb_iargs)
+{
+    TCGContext *s = ctx->tcg;
 
-        /* Count the arguments, and initialize the temps that are
-           going to be used */
-        if (opc == INDEX_op_call) {
-            nb_oargs = TCGOP_CALLO(op);
-            nb_iargs = TCGOP_CALLI(op);
-            for (i = 0; i < nb_oargs + nb_iargs; i++) {
-                TCGTemp *ts = arg_temp(op->args[i]);
-                if (ts) {
-                    init_ts_info(&temps_used, ts);
-                }
-            }
-        } else {
-            nb_oargs = def->nb_oargs;
-            nb_iargs = def->nb_iargs;
-            for (i = 0; i < nb_oargs + nb_iargs; i++) {
-                init_arg_info(&temps_used, op->args[i]);
-            }
+    for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+        TCGTemp *ts = arg_temp(op->args[i]);
+        if (ts && ts_is_copy(ts)) {
+            op->args[i] = temp_arg(find_better_copy(s, ts));
         }
+    }
+}
 
-        /* Do copy propagation */
-        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-            TCGTemp *ts = arg_temp(op->args[i]);
-            if (ts && ts_is_copy(ts)) {
-                op->args[i] = temp_arg(find_better_copy(s, ts));
-            }
-        }
+static void finish_folding(OptContext *ctx, TCGOp *op)
+{
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+    int i, nb_oargs;
+
+    /*
+     * For an opcode that ends a BB, reset all temp data.
+     * We do no cross-BB optimization.
+     */
+    if (def->flags & TCG_OPF_BB_END) {
+        memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+        ctx->prev_mb = NULL;
+        return;
+    }
 
-        /* For commutative operations make constant second argument */
-        switch (opc) {
-        CASE_OP_32_64_VEC(add):
-        CASE_OP_32_64_VEC(mul):
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(or):
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64(eqv):
-        CASE_OP_32_64(nand):
-        CASE_OP_32_64(nor):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-            swap_commutative(op->args[0], &op->args[1], &op->args[2]);
-            break;
-        CASE_OP_32_64(brcond):
-            if (swap_commutative(-1, &op->args[0], &op->args[1])) {
-                op->args[2] = tcg_swap_cond(op->args[2]);
-            }
-            break;
-        CASE_OP_32_64(setcond):
-            if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
-                op->args[3] = tcg_swap_cond(op->args[3]);
-            }
-            break;
-        CASE_OP_32_64(movcond):
-            if (swap_commutative(-1, &op->args[1], &op->args[2])) {
-                op->args[5] = tcg_swap_cond(op->args[5]);
-            }
-            /* For movcond, we canonicalize the "false" input reg to match
-               the destination reg so that the tcg backend can implement
-               a "move if true" operation.  */
-            if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
-                op->args[5] = tcg_invert_cond(op->args[5]);
-            }
-            break;
-        CASE_OP_32_64(add2):
-            swap_commutative(op->args[0], &op->args[2], &op->args[4]);
-            swap_commutative(op->args[1], &op->args[3], &op->args[5]);
-            break;
-        CASE_OP_32_64(mulu2):
-        CASE_OP_32_64(muls2):
-            swap_commutative(op->args[0], &op->args[2], &op->args[3]);
-            break;
-        case INDEX_op_brcond2_i32:
-            if (swap_commutative2(&op->args[0], &op->args[2])) {
-                op->args[4] = tcg_swap_cond(op->args[4]);
-            }
-            break;
-        case INDEX_op_setcond2_i32:
-            if (swap_commutative2(&op->args[1], &op->args[3])) {
-                op->args[5] = tcg_swap_cond(op->args[5]);
-            }
-            break;
-        default:
-            break;
+    nb_oargs = def->nb_oargs;
+    for (i = 0; i < nb_oargs; i++) {
+        TCGTemp *ts = arg_temp(op->args[i]);
+        reset_ts(ts);
+        /*
+         * Save the corresponding known-zero/sign bits mask for the
+         * first output argument (only one supported so far).
+         */
+        if (i == 0) {
+            ts_info(ts)->z_mask = ctx->z_mask;
+            ts_info(ts)->s_mask = ctx->s_mask;
         }
+    }
+}
 
-        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
-           and "sub r, 0, a => neg r, a" case.  */
-        switch (opc) {
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-            if (arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == 0) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
-                continue;
-            }
-            break;
-        CASE_OP_32_64_VEC(sub):
-            {
-                TCGOpcode neg_op;
-                bool have_neg;
-
-                if (arg_is_const(op->args[2])) {
-                    /* Proceed with possible constant folding. */
-                    break;
-                }
-                if (opc == INDEX_op_sub_i32) {
-                    neg_op = INDEX_op_neg_i32;
-                    have_neg = TCG_TARGET_HAS_neg_i32;
-                } else if (opc == INDEX_op_sub_i64) {
-                    neg_op = INDEX_op_neg_i64;
-                    have_neg = TCG_TARGET_HAS_neg_i64;
-                } else if (TCG_TARGET_HAS_neg_vec) {
-                    TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
-                    unsigned vece = TCGOP_VECE(op);
-                    neg_op = INDEX_op_neg_vec;
-                    have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
-                } else {
-                    break;
-                }
-                if (!have_neg) {
-                    break;
-                }
-                if (arg_is_const(op->args[1])
-                    && arg_info(op->args[1])->val == 0) {
-                    op->opc = neg_op;
-                    reset_temp(op->args[0]);
-                    op->args[1] = op->args[2];
-                    continue;
-                }
-            }
-            break;
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64(nand):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == -1) {
-                i = 1;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64(nor):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                i = 1;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64_VEC(andc):
-            if (!arg_is_const(op->args[2])
-                && arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == -1) {
-                i = 2;
-                goto try_not;
-            }
-            break;
-        CASE_OP_32_64_VEC(orc):
-        CASE_OP_32_64(eqv):
-            if (!arg_is_const(op->args[2])
-                && arg_is_const(op->args[1])
-                && arg_info(op->args[1])->val == 0) {
-                i = 2;
-                goto try_not;
-            }
-            break;
-        try_not:
-            {
-                TCGOpcode not_op;
-                bool have_not;
-
-                if (def->flags & TCG_OPF_VECTOR) {
-                    not_op = INDEX_op_not_vec;
-                    have_not = TCG_TARGET_HAS_not_vec;
-                } else if (def->flags & TCG_OPF_64BIT) {
-                    not_op = INDEX_op_not_i64;
-                    have_not = TCG_TARGET_HAS_not_i64;
-                } else {
-                    not_op = INDEX_op_not_i32;
-                    have_not = TCG_TARGET_HAS_not_i32;
-                }
-                if (!have_not) {
-                    break;
-                }
-                op->opc = not_op;
-                reset_temp(op->args[0]);
-                op->args[1] = op->args[i];
-                continue;
-            }
-        default:
-            break;
-        }
+/*
+ * The fold_* functions return true when processing is complete,
+ * usually by folding the operation to a constant or to a copy,
+ * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
+ * like collect information about the value produced, for use in
+ * optimizing a subsequent operation.
+ *
+ * These first fold_* functions are all helpers, used by other
+ * folders for more specific operations.
+ */
 
-        /* Simplify expression for "op r, a, const => mov r, a" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(add):
-        CASE_OP_32_64_VEC(sub):
-        CASE_OP_32_64_VEC(or):
-        CASE_OP_32_64_VEC(xor):
-        CASE_OP_32_64_VEC(andc):
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(orc):
-        CASE_OP_32_64(eqv):
-            if (!arg_is_const(op->args[1])
-                && arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == -1) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        default:
-            break;
-        }
+static bool fold_const1(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
 
-        /* Simplify using known-zero bits. Currently only ops with a single
-           output argument is supported. */
-        mask = -1;
-        affected = -1;
-        switch (opc) {
-        CASE_OP_32_64(ext8s):
-            if ((arg_info(op->args[1])->mask & 0x80) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        CASE_OP_32_64(ext8u):
-            mask = 0xff;
-            goto and_const;
-        CASE_OP_32_64(ext16s):
-            if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        CASE_OP_32_64(ext16u):
-            mask = 0xffff;
-            goto and_const;
-        case INDEX_op_ext32s_i64:
-            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        case INDEX_op_ext32u_i64:
-            mask = 0xffffffffU;
-            goto and_const;
-
-        CASE_OP_32_64(and):
-            mask = arg_info(op->args[2])->mask;
-            if (arg_is_const(op->args[2])) {
-        and_const:
-                affected = arg_info(op->args[1])->mask & ~mask;
-            }
-            mask = arg_info(op->args[1])->mask & mask;
-            break;
+        t = arg_info(op->args[1])->val;
+        t = do_constant_folding(op->opc, ctx->type, t, 0);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
 
-        case INDEX_op_ext_i32_i64:
-            if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
-                break;
-            }
-            QEMU_FALLTHROUGH;
-        case INDEX_op_extu_i32_i64:
-            /* We do not compute affected as it is a size changing op.  */
-            mask = (uint32_t)arg_info(op->args[1])->mask;
-            break;
+static bool fold_const2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t1 = arg_info(op->args[1])->val;
+        uint64_t t2 = arg_info(op->args[2])->val;
 
-        CASE_OP_32_64(andc):
-            /* Known-zeros does not imply known-ones.  Therefore unless
-               op->args[2] is constant, we can't infer anything from it.  */
-            if (arg_is_const(op->args[2])) {
-                mask = ~arg_info(op->args[2])->mask;
-                goto and_const;
-            }
-            /* But we certainly know nothing outside args[1] may be set. */
-            mask = arg_info(op->args[1])->mask;
-            break;
+        t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+    }
+    return false;
+}
 
-        case INDEX_op_sar_i32:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 31;
-                mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
-            }
-            break;
-        case INDEX_op_sar_i64:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 63;
-                mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
-            }
-            break;
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+    return fold_const2(ctx, op);
+}
 
-        case INDEX_op_shr_i32:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 31;
-                mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
-            }
-            break;
-        case INDEX_op_shr_i64:
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & 63;
-                mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
-            }
-            break;
+static bool fold_masks(OptContext *ctx, TCGOp *op)
+{
+    uint64_t a_mask = ctx->a_mask;
+    uint64_t z_mask = ctx->z_mask;
+    uint64_t s_mask = ctx->s_mask;
+
+    /*
+     * 32-bit ops generate 32-bit results, which for the purpose of
+     * simplifying tcg are sign-extended.  Certainly that's how we
+     * represent our constants elsewhere.  Note that the bits will
+     * be reset properly for a 64-bit value when encountering the
+     * type changing opcodes.
+     */
+    if (ctx->type == TCG_TYPE_I32) {
+        a_mask = (int32_t)a_mask;
+        z_mask = (int32_t)z_mask;
+        s_mask |= MAKE_64BIT_MASK(32, 32);
+        ctx->z_mask = z_mask;
+        ctx->s_mask = s_mask;
+    }
 
-        case INDEX_op_extrl_i64_i32:
-            mask = (uint32_t)arg_info(op->args[1])->mask;
-            break;
-        case INDEX_op_extrh_i64_i32:
-            mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
-            break;
+    if (z_mask == 0) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
+    }
+    if (a_mask == 0) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(shl):
-            if (arg_is_const(op->args[2])) {
-                tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
-                mask = arg_info(op->args[1])->mask << tmp;
-            }
-            break;
+/*
+ * Convert @op to NOT, if NOT is supported by the host.
+ * Return true f the conversion is successful, which will still
+ * indicate that the processing is complete.
+ */
+static bool fold_not(OptContext *ctx, TCGOp *op);
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
+{
+    TCGOpcode not_op;
+    bool have_not;
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        not_op = INDEX_op_not_i32;
+        have_not = TCG_TARGET_HAS_not_i32;
+        break;
+    case TCG_TYPE_I64:
+        not_op = INDEX_op_not_i64;
+        have_not = TCG_TARGET_HAS_not_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        not_op = INDEX_op_not_vec;
+        have_not = TCG_TARGET_HAS_not_vec;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (have_not) {
+        op->opc = not_op;
+        op->args[1] = op->args[idx];
+        return fold_not(ctx, op);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(neg):
-            /* Set to 1 all bits to the left of the rightmost.  */
-            mask = -(arg_info(op->args[1])->mask
-                     & -arg_info(op->args[1])->mask);
-            break;
+/* If the binary operation has first argument @i, fold to @i. */
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(deposit):
-            mask = deposit64(arg_info(op->args[1])->mask,
-                             op->args[3], op->args[4],
-                             arg_info(op->args[2])->mask);
-            break;
+/* If the binary operation has first argument @i, fold to NOT. */
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+        return fold_to_not(ctx, op, 2);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(extract):
-            mask = extract64(arg_info(op->args[1])->mask,
-                             op->args[2], op->args[3]);
-            if (op->args[2] == 0) {
-                affected = arg_info(op->args[1])->mask & ~mask;
-            }
-            break;
-        CASE_OP_32_64(sextract):
-            mask = sextract64(arg_info(op->args[1])->mask,
-                              op->args[2], op->args[3]);
-            if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
-                affected = arg_info(op->args[1])->mask & ~mask;
-            }
-            break;
+/* If the binary operation has second argument @i, fold to @i. */
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(or):
-        CASE_OP_32_64(xor):
-            mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
-            break;
+/* If the binary operation has second argument @i, fold to identity. */
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
 
-        case INDEX_op_clz_i32:
-        case INDEX_op_ctz_i32:
-            mask = arg_info(op->args[2])->mask | 31;
-            break;
+/* If the binary operation has second argument @i, fold to NOT. */
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+        return fold_to_not(ctx, op, 1);
+    }
+    return false;
+}
+
+/* If the binary operation has both arguments equal, fold to @i. */
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+    if (args_are_copies(op->args[1], op->args[2])) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+    return false;
+}
+
+/* If the binary operation has both arguments equal, fold to identity. */
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
+{
+    if (args_are_copies(op->args[1], op->args[2])) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+    }
+    return false;
+}
+
+/*
+ * These outermost fold_<op> functions are sorted alphabetically.
+ *
+ * The ordering of the transformations should be:
+ *   1) those that produce a constant
+ *   2) those that produce a copy
+ *   3) those that produce information about the result value.
+ */
+
+static bool fold_add(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
+{
+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
+        arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+        uint64_t al = arg_info(op->args[2])->val;
+        uint64_t ah = arg_info(op->args[3])->val;
+        uint64_t bl = arg_info(op->args[4])->val;
+        uint64_t bh = arg_info(op->args[5])->val;
+        TCGArg rl, rh;
+        TCGOp *op2;
+
+        if (ctx->type == TCG_TYPE_I32) {
+            uint64_t a = deposit64(al, 32, 32, ah);
+            uint64_t b = deposit64(bl, 32, 32, bh);
+
+            if (add) {
+                a += b;
+            } else {
+                a -= b;
+            }
+
+            al = sextract64(a, 0, 32);
+            ah = sextract64(a, 32, 32);
+        } else {
+            Int128 a = int128_make128(al, ah);
+            Int128 b = int128_make128(bl, bh);
+
+            if (add) {
+                a = int128_add(a, b);
+            } else {
+                a = int128_sub(a, b);
+            }
+
+            al = int128_getlo(a);
+            ah = int128_gethi(a);
+        }
+
+        rl = op->args[0];
+        rh = op->args[1];
+
+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+        tcg_opt_gen_movi(ctx, op, rl, al);
+        tcg_opt_gen_movi(ctx, op2, rh, ah);
+        return true;
+    }
+    return false;
+}
+
+static bool fold_add2(OptContext *ctx, TCGOp *op)
+{
+    /* Note that the high and low parts may be independently swapped. */
+    swap_commutative(op->args[0], &op->args[2], &op->args[4]);
+    swap_commutative(op->args[1], &op->args[3], &op->args[5]);
+
+    return fold_addsub2(ctx, op, true);
+}
+
+static bool fold_and(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z1, z2;
+
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, -1) ||
+        fold_xx_to_x(ctx, op)) {
+        return true;
+    }
+
+    z1 = arg_info(op->args[1])->z_mask;
+    z2 = arg_info(op->args[2])->z_mask;
+    ctx->z_mask = z1 & z2;
+
+    /*
+     * Sign repetitions are perforce all identical, whether they are 1 or 0.
+     * Bitwise operations preserve the relative quantity of the repetitions.
+     */
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+
+    /*
+     * Known-zeros does not imply known-ones.  Therefore unless
+     * arg2 is constant, we can't infer affected bits from it.
+     */
+    if (arg_is_const(op->args[2])) {
+        ctx->a_mask = z1 & ~z2;
+    }
+
+    return fold_masks(ctx, op);
+}
+
+static bool fold_andc(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z1;
+
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
+        fold_ix_to_not(ctx, op, -1)) {
+        return true;
+    }
+
+    z1 = arg_info(op->args[1])->z_mask;
+
+    /*
+     * Known-zeros does not imply known-ones.  Therefore unless
+     * arg2 is constant, we can't infer anything from it.
+     */
+    if (arg_is_const(op->args[2])) {
+        uint64_t z2 = ~arg_info(op->args[2])->z_mask;
+        ctx->a_mask = z1 & ~z2;
+        z1 &= z2;
+    }
+    ctx->z_mask = z1;
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return fold_masks(ctx, op);
+}
+
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[2];
+    int i;
+
+    if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
+        op->args[2] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+    if (i == 0) {
+        tcg_op_remove(ctx->tcg, op);
+        return true;
+    }
+    if (i > 0) {
+        op->opc = INDEX_op_br;
+        op->args[0] = op->args[3];
+    }
+    return false;
+}
+
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[4];
+    TCGArg label = op->args[5];
+    int i, inv = 0;
+
+    if (swap_commutative2(&op->args[0], &op->args[2])) {
+        op->args[4] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
+    if (i >= 0) {
+        goto do_brcond_const;
+    }
+
+    switch (cond) {
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /*
+         * Simplify LT/GE comparisons vs zero to a single compare
+         * vs the high word of the input.
+         */
+        if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
+            arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+            goto do_brcond_high;
+        }
+        break;
+
+    case TCG_COND_NE:
+        inv = 1;
+        QEMU_FALLTHROUGH;
+    case TCG_COND_EQ:
+        /*
+         * Simplify EQ/NE comparisons where one of the pairs
+         * can be simplified.
+         */
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
+                                     op->args[2], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_brcond_const;
+        case 1:
+            goto do_brcond_high;
+        }
+
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
+                                     op->args[3], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_brcond_const;
+        case 1:
+            op->opc = INDEX_op_brcond_i32;
+            op->args[1] = op->args[2];
+            op->args[2] = cond;
+            op->args[3] = label;
+            break;
+        }
+        break;
+
+    default:
+        break;
+
+    do_brcond_high:
+        op->opc = INDEX_op_brcond_i32;
+        op->args[0] = op->args[1];
+        op->args[1] = op->args[3];
+        op->args[2] = cond;
+        op->args[3] = label;
+        break;
+
+    do_brcond_const:
+        if (i == 0) {
+            tcg_op_remove(ctx->tcg, op);
+            return true;
+        }
+        op->opc = INDEX_op_br;
+        op->args[0] = label;
+        break;
+    }
+    return false;
+}
+
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask, s_mask, sign;
+
+    if (arg_is_const(op->args[1])) {
+        uint64_t t = arg_info(op->args[1])->val;
+
+        t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+
+    z_mask = arg_info(op->args[1])->z_mask;
+
+    switch (op->opc) {
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+        z_mask = bswap16(z_mask);
+        sign = INT16_MIN;
+        break;
+    case INDEX_op_bswap32_i32:
+    case INDEX_op_bswap32_i64:
+        z_mask = bswap32(z_mask);
+        sign = INT32_MIN;
+        break;
+    case INDEX_op_bswap64_i64:
+        z_mask = bswap64(z_mask);
+        sign = INT64_MIN;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    s_mask = smask_from_zmask(z_mask);
+
+    switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
+    case TCG_BSWAP_OZ:
+        break;
+    case TCG_BSWAP_OS:
+        /* If the sign bit may be 1, force all the bits above to 1. */
+        if (z_mask & sign) {
+            z_mask |= sign;
+            s_mask = sign << 1;
+        }
+        break;
+    default:
+        /* The high bits are undefined: force all bits above the sign to 1. */
+        z_mask |= sign << 1;
+        s_mask = 0;
+        break;
+    }
+    ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
+
+    return fold_masks(ctx, op);
+}
+
+static bool fold_call(OptContext *ctx, TCGOp *op)
+{
+    TCGContext *s = ctx->tcg;
+    int nb_oargs = TCGOP_CALLO(op);
+    int nb_iargs = TCGOP_CALLI(op);
+    int flags, i;
+
+    init_arguments(ctx, op, nb_oargs + nb_iargs);
+    copy_propagate(ctx, op, nb_oargs, nb_iargs);
+
+    /* If the function reads or writes globals, reset temp data. */
+    flags = tcg_call_flags(op);
+    if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
+        int nb_globals = s->nb_globals;
+
+        for (i = 0; i < nb_globals; i++) {
+            if (test_bit(i, ctx->temps_used.l)) {
+                reset_ts(&ctx->tcg->temps[i]);
+            }
+        }
+    }
+
+    /* Reset temp data for outputs. */
+    for (i = 0; i < nb_oargs; i++) {
+        reset_temp(op->args[i]);
+    }
+
+    /* Stop optimizing MB across calls. */
+    ctx->prev_mb = NULL;
+    return true;
+}
+
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask;
+
+    if (arg_is_const(op->args[1])) {
+        uint64_t t = arg_info(op->args[1])->val;
+
+        if (t != 0) {
+            t = do_constant_folding(op->opc, ctx->type, t, 0);
+            return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+        }
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
+    }
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        z_mask = 31;
+        break;
+    case TCG_TYPE_I64:
+        z_mask = 63;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
+    ctx->s_mask = smask_from_zmask(ctx->z_mask);
+    return false;
+}
+
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        ctx->z_mask = 32 | 31;
+        break;
+    case TCG_TYPE_I64:
+        ctx->z_mask = 64 | 63;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    ctx->s_mask = smask_from_zmask(ctx->z_mask);
+    return false;
+}
+
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t1 = arg_info(op->args[1])->val;
+        uint64_t t2 = arg_info(op->args[2])->val;
+
+        t1 = deposit64(t1, op->args[3], op->args[4], t2);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+    }
+
+    ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
+                            op->args[3], op->args[4],
+                            arg_info(op->args[2])->z_mask);
+    return false;
+}
+
+static bool fold_divide(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_x(ctx, op, 1)) {
+        return true;
+    }
+    return false;
+}
+
+static bool fold_dup(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1])) {
+        uint64_t t = arg_info(op->args[1])->val;
+        t = dup_const(TCGOP_VECE(op), t);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+    return false;
+}
+
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
+                               arg_info(op->args[2])->val);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+
+    if (args_are_copies(op->args[1], op->args[2])) {
+        op->opc = INDEX_op_dup_vec;
+        TCGOP_VECE(op) = MO_32;
+    }
+    return false;
+}
+
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, -1) ||
+        fold_xi_to_not(ctx, op, 0)) {
+        return true;
+    }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return false;
+}
+
+static bool fold_extract(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask_old, z_mask;
+    int pos = op->args[2];
+    int len = op->args[3];
+
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
+
+        t = arg_info(op->args[1])->val;
+        t = extract64(t, pos, len);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+
+    z_mask_old = arg_info(op->args[1])->z_mask;
+    z_mask = extract64(z_mask_old, pos, len);
+    if (pos == 0) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
+
+    return fold_masks(ctx, op);
+}
+
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
+{
+    if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+        uint64_t v1 = arg_info(op->args[1])->val;
+        uint64_t v2 = arg_info(op->args[2])->val;
+        int shr = op->args[3];
+
+        if (op->opc == INDEX_op_extract2_i64) {
+            v1 >>= shr;
+            v2 <<= 64 - shr;
+        } else {
+            v1 = (uint32_t)v1 >> shr;
+            v2 = (uint64_t)((int32_t)v2 << (32 - shr));
+        }
+        return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
+    }
+    return false;
+}
+
+static bool fold_exts(OptContext *ctx, TCGOp *op)
+{
+    uint64_t s_mask_old, s_mask, z_mask, sign;
+    bool type_change = false;
+
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    z_mask = arg_info(op->args[1])->z_mask;
+    s_mask = arg_info(op->args[1])->s_mask;
+    s_mask_old = s_mask;
+
+    switch (op->opc) {
+    CASE_OP_32_64(ext8s):
+        sign = INT8_MIN;
+        z_mask = (uint8_t)z_mask;
+        break;
+    CASE_OP_32_64(ext16s):
+        sign = INT16_MIN;
+        z_mask = (uint16_t)z_mask;
+        break;
+    case INDEX_op_ext_i32_i64:
+        type_change = true;
+        QEMU_FALLTHROUGH;
+    case INDEX_op_ext32s_i64:
+        sign = INT32_MIN;
+        z_mask = (uint32_t)z_mask;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (z_mask & sign) {
+        z_mask |= sign;
+    }
+    s_mask |= sign << 1;
+
+    ctx->z_mask = z_mask;
+    ctx->s_mask = s_mask;
+    if (!type_change) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
+
+    return fold_masks(ctx, op);
+}
+
+static bool fold_extu(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask_old, z_mask;
+    bool type_change = false;
+
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+
+    switch (op->opc) {
+    CASE_OP_32_64(ext8u):
+        z_mask = (uint8_t)z_mask;
+        break;
+    CASE_OP_32_64(ext16u):
+        z_mask = (uint16_t)z_mask;
+        break;
+    case INDEX_op_extrl_i64_i32:
+    case INDEX_op_extu_i32_i64:
+        type_change = true;
+        QEMU_FALLTHROUGH;
+    case INDEX_op_ext32u_i64:
+        z_mask = (uint32_t)z_mask;
+        break;
+    case INDEX_op_extrh_i64_i32:
+        type_change = true;
+        z_mask >>= 32;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    ctx->z_mask = z_mask;
+    ctx->s_mask = smask_from_zmask(z_mask);
+    if (!type_change) {
+        ctx->a_mask = z_mask_old ^ z_mask;
+    }
+    return fold_masks(ctx, op);
+}
+
+static bool fold_mb(OptContext *ctx, TCGOp *op)
+{
+    /* Eliminate duplicate and redundant fence instructions.  */
+    if (ctx->prev_mb) {
+        /*
+         * Merge two barriers of the same type into one,
+         * or a weaker barrier into a stronger one,
+         * or two weaker barriers into a stronger one.
+         *   mb X; mb Y => mb X|Y
+         *   mb; strl => mb; st
+         *   ldaq; mb => ld; mb
+         *   ldaq; strl => ld; mb; st
+         * Other combinations are also merged into a strong
+         * barrier.  This is stricter than specified but for
+         * the purposes of TCG is better than not optimizing.
+         */
+        ctx->prev_mb->args[0] |= op->args[0];
+        tcg_op_remove(ctx->tcg, op);
+    } else {
+        ctx->prev_mb = op;
+    }
+    return true;
+}
+
+static bool fold_mov(OptContext *ctx, TCGOp *op)
+{
+    return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+}
+
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[5];
+    int i;
+
+    if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+        op->args[5] = cond = tcg_swap_cond(cond);
+    }
+    /*
+     * Canonicalize the "false" input reg to match the destination reg so
+     * that the tcg backend can implement a "move if true" operation.
+     */
+    if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+        op->args[5] = cond = tcg_invert_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+    if (i >= 0) {
+        return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
+    }
+
+    ctx->z_mask = arg_info(op->args[3])->z_mask
+                | arg_info(op->args[4])->z_mask;
+    ctx->s_mask = arg_info(op->args[3])->s_mask
+                & arg_info(op->args[4])->s_mask;
+
+    if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
+        uint64_t tv = arg_info(op->args[3])->val;
+        uint64_t fv = arg_info(op->args[4])->val;
+        TCGOpcode opc;
+
+        switch (ctx->type) {
+        case TCG_TYPE_I32:
+            opc = INDEX_op_setcond_i32;
+            break;
+        case TCG_TYPE_I64:
+            opc = INDEX_op_setcond_i64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (tv == 1 && fv == 0) {
+            op->opc = opc;
+            op->args[3] = cond;
+        } else if (fv == 1 && tv == 0) {
+            op->opc = opc;
+            op->args[3] = tcg_invert_cond(cond);
+        }
+    }
+    return false;
+}
+
+static bool fold_mul(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 1)) {
+        return true;
+    }
+    return false;
+}
+
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
+{
+    swap_commutative(op->args[0], &op->args[2], &op->args[3]);
+
+    if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+        uint64_t a = arg_info(op->args[2])->val;
+        uint64_t b = arg_info(op->args[3])->val;
+        uint64_t h, l;
+        TCGArg rl, rh;
+        TCGOp *op2;
+
+        switch (op->opc) {
+        case INDEX_op_mulu2_i32:
+            l = (uint64_t)(uint32_t)a * (uint32_t)b;
+            h = (int32_t)(l >> 32);
+            l = (int32_t)l;
+            break;
+        case INDEX_op_muls2_i32:
+            l = (int64_t)(int32_t)a * (int32_t)b;
+            h = l >> 32;
+            l = (int32_t)l;
+            break;
+        case INDEX_op_mulu2_i64:
+            mulu64(&l, &h, a, b);
+            break;
+        case INDEX_op_muls2_i64:
+            muls64(&l, &h, a, b);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        rl = op->args[0];
+        rh = op->args[1];
+
+        /* The proper opcode is supplied by tcg_opt_gen_mov. */
+        op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+        tcg_opt_gen_movi(ctx, op, rl, l);
+        tcg_opt_gen_movi(ctx, op2, rh, h);
+        return true;
+    }
+    return false;
+}
+
+static bool fold_nand(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_not(ctx, op, -1)) {
+        return true;
+    }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return false;
+}
+
+static bool fold_neg(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask;
+
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    /* Set to 1 all bits to the left of the rightmost.  */
+    z_mask = arg_info(op->args[1])->z_mask;
+    ctx->z_mask = -(z_mask & -z_mask);
+
+    /*
+     * Because of fold_sub_to_neg, we want to always return true,
+     * via finish_folding.
+     */
+    finish_folding(ctx, op);
+    return true;
+}
+
+static bool fold_nor(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_not(ctx, op, 0)) {
+        return true;
+    }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return false;
+}
+
+static bool fold_not(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const1(ctx, op)) {
+        return true;
+    }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask;
+
+    /* Because of fold_to_not, we want to always return true, via finish. */
+    finish_folding(ctx, op);
+    return true;
+}
+
+static bool fold_or(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xi_to_x(ctx, op, 0) ||
+        fold_xx_to_x(ctx, op)) {
+        return true;
+    }
+
+    ctx->z_mask = arg_info(op->args[1])->z_mask
+                | arg_info(op->args[2])->z_mask;
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return fold_masks(ctx, op);
+}
+
+static bool fold_orc(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, -1) ||
+        fold_xi_to_x(ctx, op, -1) ||
+        fold_ix_to_not(ctx, op, 0)) {
+        return true;
+    }
+
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return false;
+}
+
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+{
+    const TCGOpDef *def = &tcg_op_defs[op->opc];
+    MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
+    MemOp mop = get_memop(oi);
+    int width = 8 * memop_size(mop);
+
+    if (width < 64) {
+        ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+        if (!(mop & MO_SIGN)) {
+            ctx->z_mask = MAKE_64BIT_MASK(0, width);
+            ctx->s_mask <<= 1;
+        }
+    }
+
+    /* Opcodes that touch guest memory stop the mb optimization.  */
+    ctx->prev_mb = NULL;
+    return false;
+}
+
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
+{
+    /* Opcodes that touch guest memory stop the mb optimization.  */
+    ctx->prev_mb = NULL;
+    return false;
+}
+
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0)) {
+        return true;
+    }
+    return false;
+}
+
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[3];
+    int i;
+
+    if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
+        op->args[3] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+    if (i >= 0) {
+        return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+    }
+
+    ctx->z_mask = 1;
+    ctx->s_mask = smask_from_zmask(1);
+    return false;
+}
+
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+{
+    TCGCond cond = op->args[5];
+    int i, inv = 0;
+
+    if (swap_commutative2(&op->args[1], &op->args[3])) {
+        op->args[5] = cond = tcg_swap_cond(cond);
+    }
+
+    i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
+    if (i >= 0) {
+        goto do_setcond_const;
+    }
+
+    switch (cond) {
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /*
+         * Simplify LT/GE comparisons vs zero to a single compare
+         * vs the high word of the input.
+         */
+        if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
+            arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
+            goto do_setcond_high;
+        }
+        break;
+
+    case TCG_COND_NE:
+        inv = 1;
+        QEMU_FALLTHROUGH;
+    case TCG_COND_EQ:
+        /*
+         * Simplify EQ/NE comparisons where one of the pairs
+         * can be simplified.
+         */
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
+                                     op->args[3], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_setcond_const;
+        case 1:
+            goto do_setcond_high;
+        }
+
+        i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
+                                     op->args[4], cond);
+        switch (i ^ inv) {
+        case 0:
+            goto do_setcond_const;
+        case 1:
+            op->args[2] = op->args[3];
+            op->args[3] = cond;
+            op->opc = INDEX_op_setcond_i32;
+            break;
+        }
+        break;
+
+    default:
+        break;
+
+    do_setcond_high:
+        op->args[1] = op->args[2];
+        op->args[2] = op->args[4];
+        op->args[3] = cond;
+        op->opc = INDEX_op_setcond_i32;
+        break;
+    }
+
+    ctx->z_mask = 1;
+    ctx->s_mask = smask_from_zmask(1);
+    return false;
+
+ do_setcond_const:
+    return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+}
+
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
+{
+    uint64_t z_mask, s_mask, s_mask_old;
+    int pos = op->args[2];
+    int len = op->args[3];
+
+    if (arg_is_const(op->args[1])) {
+        uint64_t t;
+
+        t = arg_info(op->args[1])->val;
+        t = sextract64(t, pos, len);
+        return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+    }
+
+    z_mask = arg_info(op->args[1])->z_mask;
+    z_mask = sextract64(z_mask, pos, len);
+    ctx->z_mask = z_mask;
+
+    s_mask_old = arg_info(op->args[1])->s_mask;
+    s_mask = sextract64(s_mask_old, pos, len);
+    s_mask |= MAKE_64BIT_MASK(len, 64 - len);
+    ctx->s_mask = s_mask;
+
+    if (pos == 0) {
+        ctx->a_mask = s_mask & ~s_mask_old;
+    }
+
+    return fold_masks(ctx, op);
+}
+
+static bool fold_shift(OptContext *ctx, TCGOp *op)
+{
+    uint64_t s_mask, z_mask, sign;
+
+    if (fold_const2(ctx, op) ||
+        fold_ix_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0)) {
+        return true;
+    }
+
+    s_mask = arg_info(op->args[1])->s_mask;
+    z_mask = arg_info(op->args[1])->z_mask;
+
+    if (arg_is_const(op->args[2])) {
+        int sh = arg_info(op->args[2])->val;
+
+        ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+
+        s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
+        ctx->s_mask = smask_from_smask(s_mask);
+
+        return fold_masks(ctx, op);
+    }
 
-        case INDEX_op_clz_i64:
-        case INDEX_op_ctz_i64:
-            mask = arg_info(op->args[2])->mask | 63;
-            break;
+    switch (op->opc) {
+    CASE_OP_32_64(sar):
+        /*
+         * Arithmetic right shift will not reduce the number of
+         * input sign repetitions.
+         */
+        ctx->s_mask = s_mask;
+        break;
+    CASE_OP_32_64(shr):
+        /*
+         * If the sign bit is known zero, then logical right shift
+         * will not reduced the number of input sign repetitions.
+         */
+        sign = (s_mask & -s_mask) >> 1;
+        if (!(z_mask & sign)) {
+            ctx->s_mask = s_mask;
+        }
+        break;
+    default:
+        break;
+    }
 
-        case INDEX_op_ctpop_i32:
-            mask = 32 | 31;
-            break;
-        case INDEX_op_ctpop_i64:
-            mask = 64 | 63;
-            break;
+    return false;
+}
 
-        CASE_OP_32_64(setcond):
-        case INDEX_op_setcond2_i32:
-            mask = 1;
-            break;
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
+{
+    TCGOpcode neg_op;
+    bool have_neg;
 
-        CASE_OP_32_64(movcond):
-            mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
-            break;
+    if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+        return false;
+    }
 
-        CASE_OP_32_64(ld8u):
-            mask = 0xff;
-            break;
-        CASE_OP_32_64(ld16u):
-            mask = 0xffff;
-            break;
-        case INDEX_op_ld32u_i64:
-            mask = 0xffffffffu;
-            break;
+    switch (ctx->type) {
+    case TCG_TYPE_I32:
+        neg_op = INDEX_op_neg_i32;
+        have_neg = TCG_TARGET_HAS_neg_i32;
+        break;
+    case TCG_TYPE_I64:
+        neg_op = INDEX_op_neg_i64;
+        have_neg = TCG_TARGET_HAS_neg_i64;
+        break;
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+    case TCG_TYPE_V256:
+        neg_op = INDEX_op_neg_vec;
+        have_neg = (TCG_TARGET_HAS_neg_vec &&
+                    tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    if (have_neg) {
+        op->opc = neg_op;
+        op->args[1] = op->args[2];
+        return fold_neg(ctx, op);
+    }
+    return false;
+}
 
-        CASE_OP_32_64(qemu_ld):
-            {
-                TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
-                MemOp mop = get_memop(oi);
-                if (!(mop & MO_SIGN)) {
-                    mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
-                }
-            }
-            break;
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
+        fold_sub_to_neg(ctx, op)) {
+        return true;
+    }
+    return false;
+}
 
-        default:
-            break;
-        }
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
+{
+    return fold_addsub2(ctx, op, false);
+}
 
-        /* 32-bit ops generate 32-bit results.  For the result is zero test
-           below, we can ignore high bits, but for further optimizations we
-           need to record that the high bits contain garbage.  */
-        partmask = mask;
-        if (!(def->flags & TCG_OPF_64BIT)) {
-            mask |= ~(tcg_target_ulong)0xffffffffu;
-            partmask &= 0xffffffffu;
-            affected &= 0xffffffffu;
-        }
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
+{
+    /* We can't do any folding with a load, but we can record bits. */
+    switch (op->opc) {
+    CASE_OP_32_64(ld8s):
+        ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+        break;
+    CASE_OP_32_64(ld8u):
+        ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+        ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+        break;
+    CASE_OP_32_64(ld16s):
+        ctx->s_mask = MAKE_64BIT_MASK(16, 48);
+        break;
+    CASE_OP_32_64(ld16u):
+        ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+        ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+        break;
+    case INDEX_op_ld32s_i64:
+        ctx->s_mask = MAKE_64BIT_MASK(32, 32);
+        break;
+    case INDEX_op_ld32u_i64:
+        ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+        ctx->s_mask = MAKE_64BIT_MASK(33, 31);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return false;
+}
+
+static bool fold_xor(OptContext *ctx, TCGOp *op)
+{
+    if (fold_const2_commutative(ctx, op) ||
+        fold_xx_to_i(ctx, op, 0) ||
+        fold_xi_to_x(ctx, op, 0) ||
+        fold_xi_to_not(ctx, op, -1)) {
+        return true;
+    }
+
+    ctx->z_mask = arg_info(op->args[1])->z_mask
+                | arg_info(op->args[2])->z_mask;
+    ctx->s_mask = arg_info(op->args[1])->s_mask
+                & arg_info(op->args[2])->s_mask;
+    return fold_masks(ctx, op);
+}
+
+/* Propagate constants and copies, fold constant expressions. */
+void tcg_optimize(TCGContext *s)
+{
+    int nb_temps, i;
+    TCGOp *op, *op_next;
+    OptContext ctx = { .tcg = s };
+
+    /* Array VALS has an element for each temp.
+       If this temp holds a constant then its value is kept in VALS' element.
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
+
+    nb_temps = s->nb_temps;
+    for (i = 0; i < nb_temps; ++i) {
+        s->temps[i].state_ptr = NULL;
+    }
+
+    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def;
+        bool done = false;
 
-        if (partmask == 0) {
-            tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
+        /* Calls are special. */
+        if (opc == INDEX_op_call) {
+            fold_call(&ctx, op);
             continue;
         }
-        if (affected == 0) {
-            tcg_debug_assert(nb_oargs == 1);
-            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
-            continue;
+
+        def = &tcg_op_defs[opc];
+        init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
+        copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
+
+        /* Pre-compute the type of the operation. */
+        if (def->flags & TCG_OPF_VECTOR) {
+            ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
+        } else if (def->flags & TCG_OPF_64BIT) {
+            ctx.type = TCG_TYPE_I64;
+        } else {
+            ctx.type = TCG_TYPE_I32;
         }
 
-        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        /* Assume all bits affected, no bits known zero, no sign reps. */
+        ctx.a_mask = -1;
+        ctx.z_mask = -1;
+        ctx.s_mask = 0;
+
+        /*
+         * Process each opcode.
+         * Sorted alphabetically by opcode as much as possible.
+         */
         switch (opc) {
-        CASE_OP_32_64_VEC(and):
-        CASE_OP_32_64_VEC(mul):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-            if (arg_is_const(op->args[2])
-                && arg_info(op->args[2])->val == 0) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
-                continue;
-            }
+        CASE_OP_32_64_VEC(add):
+            done = fold_add(&ctx, op);
             break;
-        default:
+        CASE_OP_32_64(add2):
+            done = fold_add2(&ctx, op);
             break;
-        }
-
-        /* Simplify expression for "op r, a, a => mov r, a" cases */
-        switch (opc) {
-        CASE_OP_32_64_VEC(or):
         CASE_OP_32_64_VEC(and):
-            if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
-                continue;
-            }
-            break;
-        default:
+            done = fold_and(&ctx, op);
             break;
-        }
-
-        /* Simplify expression for "op r, a, a => movi r, 0" cases */
-        switch (opc) {
         CASE_OP_32_64_VEC(andc):
-        CASE_OP_32_64_VEC(sub):
-        CASE_OP_32_64_VEC(xor):
-            if (args_are_copies(op->args[1], op->args[2])) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
-                continue;
-            }
+            done = fold_andc(&ctx, op);
             break;
-        default:
+        CASE_OP_32_64(brcond):
+            done = fold_brcond(&ctx, op);
             break;
-        }
-
-        /* Propagate constants through copy operations and do constant
-           folding.  Constants will be substituted to arguments by register
-           allocator where needed and possible.  Also detect copies. */
-        switch (opc) {
-        CASE_OP_32_64_VEC(mov):
-            tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
+        case INDEX_op_brcond2_i32:
+            done = fold_brcond2(&ctx, op);
+            break;
+        CASE_OP_32_64(bswap16):
+        CASE_OP_32_64(bswap32):
+        case INDEX_op_bswap64_i64:
+            done = fold_bswap(&ctx, op);
+            break;
+        CASE_OP_32_64(clz):
+        CASE_OP_32_64(ctz):
+            done = fold_count_zeros(&ctx, op);
+            break;
+        CASE_OP_32_64(ctpop):
+            done = fold_ctpop(&ctx, op);
+            break;
+        CASE_OP_32_64(deposit):
+            done = fold_deposit(&ctx, op);
+            break;
+        CASE_OP_32_64(div):
+        CASE_OP_32_64(divu):
+            done = fold_divide(&ctx, op);
             break;
-
         case INDEX_op_dup_vec:
-            if (arg_is_const(op->args[1])) {
-                tmp = arg_info(op->args[1])->val;
-                tmp = dup_const(TCGOP_VECE(op), tmp);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
+            done = fold_dup(&ctx, op);
+            break;
         case INDEX_op_dup2_vec:
-            assert(TCG_TARGET_REG_BITS == 32);
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
-                                 deposit64(arg_info(op->args[1])->val, 32, 32,
-                                           arg_info(op->args[2])->val));
-                break;
-            } else if (args_are_copies(op->args[1], op->args[2])) {
-                op->opc = INDEX_op_dup_vec;
-                TCGOP_VECE(op) = MO_32;
-                nb_iargs = 1;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(not):
-        CASE_OP_32_64(neg):
+            done = fold_dup2(&ctx, op);
+            break;
+        CASE_OP_32_64(eqv):
+            done = fold_eqv(&ctx, op);
+            break;
+        CASE_OP_32_64(extract):
+            done = fold_extract(&ctx, op);
+            break;
+        CASE_OP_32_64(extract2):
+            done = fold_extract2(&ctx, op);
+            break;
         CASE_OP_32_64(ext8s):
-        CASE_OP_32_64(ext8u):
         CASE_OP_32_64(ext16s):
-        CASE_OP_32_64(ext16u):
-        CASE_OP_32_64(ctpop):
-        CASE_OP_32_64(bswap16):
-        CASE_OP_32_64(bswap32):
-        case INDEX_op_bswap64_i64:
         case INDEX_op_ext32s_i64:
-        case INDEX_op_ext32u_i64:
         case INDEX_op_ext_i32_i64:
+            done = fold_exts(&ctx, op);
+            break;
+        CASE_OP_32_64(ext8u):
+        CASE_OP_32_64(ext16u):
+        case INDEX_op_ext32u_i64:
         case INDEX_op_extu_i32_i64:
         case INDEX_op_extrl_i64_i32:
         case INDEX_op_extrh_i64_i32:
-            if (arg_is_const(op->args[1])) {
-                tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(add):
-        CASE_OP_32_64(sub):
+            done = fold_extu(&ctx, op);
+            break;
+        CASE_OP_32_64(ld8s):
+        CASE_OP_32_64(ld8u):
+        CASE_OP_32_64(ld16s):
+        CASE_OP_32_64(ld16u):
+        case INDEX_op_ld32s_i64:
+        case INDEX_op_ld32u_i64:
+            done = fold_tcg_ld(&ctx, op);
+            break;
+        case INDEX_op_mb:
+            done = fold_mb(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(mov):
+            done = fold_mov(&ctx, op);
+            break;
+        CASE_OP_32_64(movcond):
+            done = fold_movcond(&ctx, op);
+            break;
         CASE_OP_32_64(mul):
-        CASE_OP_32_64(or):
-        CASE_OP_32_64(and):
-        CASE_OP_32_64(xor):
-        CASE_OP_32_64(shl):
-        CASE_OP_32_64(shr):
-        CASE_OP_32_64(sar):
-        CASE_OP_32_64(rotl):
-        CASE_OP_32_64(rotr):
-        CASE_OP_32_64(andc):
-        CASE_OP_32_64(orc):
-        CASE_OP_32_64(eqv):
+            done = fold_mul(&ctx, op);
+            break;
+        CASE_OP_32_64(mulsh):
+        CASE_OP_32_64(muluh):
+            done = fold_mul_highpart(&ctx, op);
+            break;
+        CASE_OP_32_64(muls2):
+        CASE_OP_32_64(mulu2):
+            done = fold_multiply2(&ctx, op);
+            break;
         CASE_OP_32_64(nand):
+            done = fold_nand(&ctx, op);
+            break;
+        CASE_OP_32_64(neg):
+            done = fold_neg(&ctx, op);
+            break;
         CASE_OP_32_64(nor):
-        CASE_OP_32_64(muluh):
-        CASE_OP_32_64(mulsh):
-        CASE_OP_32_64(div):
-        CASE_OP_32_64(divu):
+            done = fold_nor(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(not):
+            done = fold_not(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(or):
+            done = fold_or(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(orc):
+            done = fold_orc(&ctx, op);
+            break;
+        case INDEX_op_qemu_ld_i32:
+        case INDEX_op_qemu_ld_i64:
+            done = fold_qemu_ld(&ctx, op);
+            break;
+        case INDEX_op_qemu_st_i32:
+        case INDEX_op_qemu_st8_i32:
+        case INDEX_op_qemu_st_i64:
+            done = fold_qemu_st(&ctx, op);
+            break;
         CASE_OP_32_64(rem):
         CASE_OP_32_64(remu):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
-                                          arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(clz):
-        CASE_OP_32_64(ctz):
-            if (arg_is_const(op->args[1])) {
-                TCGArg v = arg_info(op->args[1])->val;
-                if (v != 0) {
-                    tmp = do_constant_folding(opc, v, 0);
-                    tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                } else {
-                    tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
-                }
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(deposit):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                tmp = deposit64(arg_info(op->args[1])->val,
-                                op->args[3], op->args[4],
-                                arg_info(op->args[2])->val);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(extract):
-            if (arg_is_const(op->args[1])) {
-                tmp = extract64(arg_info(op->args[1])->val,
-                                op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(sextract):
-            if (arg_is_const(op->args[1])) {
-                tmp = sextract64(arg_info(op->args[1])->val,
-                                 op->args[2], op->args[3]);
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(extract2):
-            if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
-                uint64_t v1 = arg_info(op->args[1])->val;
-                uint64_t v2 = arg_info(op->args[2])->val;
-                int shr = op->args[3];
-
-                if (opc == INDEX_op_extract2_i64) {
-                    tmp = (v1 >> shr) | (v2 << (64 - shr));
-                } else {
-                    tmp = (int32_t)(((uint32_t)v1 >> shr) |
-                                    ((uint32_t)v2 << (32 - shr)));
-                }
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
+            done = fold_remainder(&ctx, op);
+            break;
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+            done = fold_shift(&ctx, op);
+            break;
         CASE_OP_32_64(setcond):
-            tmp = do_constant_folding_cond(opc, op->args[1],
-                                           op->args[2], op->args[3]);
-            if (tmp != 2) {
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(brcond):
-            tmp = do_constant_folding_cond(opc, op->args[0],
-                                           op->args[1], op->args[2]);
-            if (tmp != 2) {
-                if (tmp) {
-                    memset(&temps_used, 0, sizeof(temps_used));
-                    op->opc = INDEX_op_br;
-                    op->args[0] = op->args[3];
-                } else {
-                    tcg_op_remove(s, op);
-                }
-                break;
-            }
-            goto do_default;
-
-        CASE_OP_32_64(movcond):
-            tmp = do_constant_folding_cond(opc, op->args[1],
-                                           op->args[2], op->args[5]);
-            if (tmp != 2) {
-                tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
-                break;
-            }
-            if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
-                uint64_t tv = arg_info(op->args[3])->val;
-                uint64_t fv = arg_info(op->args[4])->val;
-                TCGCond cond = op->args[5];
-
-                if (fv == 1 && tv == 0) {
-                    cond = tcg_invert_cond(cond);
-                } else if (!(tv == 1 && fv == 0)) {
-                    goto do_default;
-                }
-                op->args[3] = cond;
-                op->opc = opc = (opc == INDEX_op_movcond_i32
-                                 ? INDEX_op_setcond_i32
-                                 : INDEX_op_setcond_i64);
-                nb_iargs = 2;
-            }
-            goto do_default;
-
-        case INDEX_op_add2_i32:
-        case INDEX_op_sub2_i32:
-            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
-                && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
-                uint32_t al = arg_info(op->args[2])->val;
-                uint32_t ah = arg_info(op->args[3])->val;
-                uint32_t bl = arg_info(op->args[4])->val;
-                uint32_t bh = arg_info(op->args[5])->val;
-                uint64_t a = ((uint64_t)ah << 32) | al;
-                uint64_t b = ((uint64_t)bh << 32) | bl;
-                TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
-
-                if (opc == INDEX_op_add2_i32) {
-                    a += b;
-                } else {
-                    a -= b;
-                }
-
-                rl = op->args[0];
-                rh = op->args[1];
-                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
-                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
-                break;
-            }
-            goto do_default;
-
-        case INDEX_op_mulu2_i32:
-            if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
-                uint32_t a = arg_info(op->args[2])->val;
-                uint32_t b = arg_info(op->args[3])->val;
-                uint64_t r = (uint64_t)a * b;
-                TCGArg rl, rh;
-                TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
-
-                rl = op->args[0];
-                rh = op->args[1];
-                tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
-                tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
-                break;
-            }
-            goto do_default;
-
-        case INDEX_op_brcond2_i32:
-            tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
-                                            op->args[4]);
-            if (tmp != 2) {
-                if (tmp) {
-            do_brcond_true:
-                    memset(&temps_used, 0, sizeof(temps_used));
-                    op->opc = INDEX_op_br;
-                    op->args[0] = op->args[5];
-                } else {
-            do_brcond_false:
-                    tcg_op_remove(s, op);
-                }
-            } else if ((op->args[4] == TCG_COND_LT
-                        || op->args[4] == TCG_COND_GE)
-                       && arg_is_const(op->args[2])
-                       && arg_info(op->args[2])->val == 0
-                       && arg_is_const(op->args[3])
-                       && arg_info(op->args[3])->val == 0) {
-                /* Simplify LT/GE comparisons vs zero to a single compare
-                   vs the high word of the input.  */
-            do_brcond_high:
-                memset(&temps_used, 0, sizeof(temps_used));
-                op->opc = INDEX_op_brcond_i32;
-                op->args[0] = op->args[1];
-                op->args[1] = op->args[3];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-            } else if (op->args[4] == TCG_COND_EQ) {
-                /* Simplify EQ comparisons where one of the pairs
-                   can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[0], op->args[2],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
-                    goto do_brcond_false;
-                } else if (tmp == 1) {
-                    goto do_brcond_high;
-                }
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
-                    goto do_brcond_false;
-                } else if (tmp != 1) {
-                    goto do_default;
-                }
-            do_brcond_low:
-                memset(&temps_used, 0, sizeof(temps_used));
-                op->opc = INDEX_op_brcond_i32;
-                op->args[1] = op->args[2];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-            } else if (op->args[4] == TCG_COND_NE) {
-                /* Simplify NE comparisons where one of the pairs
-                   can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[0], op->args[2],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
-                    goto do_brcond_high;
-                } else if (tmp == 1) {
-                    goto do_brcond_true;
-                }
-                tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
-                    goto do_brcond_low;
-                } else if (tmp == 1) {
-                    goto do_brcond_true;
-                }
-                goto do_default;
-            } else {
-                goto do_default;
-            }
+            done = fold_setcond(&ctx, op);
             break;
-
         case INDEX_op_setcond2_i32:
-            tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
-                                            op->args[5]);
-            if (tmp != 2) {
-            do_setcond_const:
-                tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
-            } else if ((op->args[5] == TCG_COND_LT
-                        || op->args[5] == TCG_COND_GE)
-                       && arg_is_const(op->args[3])
-                       && arg_info(op->args[3])->val == 0
-                       && arg_is_const(op->args[4])
-                       && arg_info(op->args[4])->val == 0) {
-                /* Simplify LT/GE comparisons vs zero to a single compare
-                   vs the high word of the input.  */
-            do_setcond_high:
-                reset_temp(op->args[0]);
-                arg_info(op->args[0])->mask = 1;
-                op->opc = INDEX_op_setcond_i32;
-                op->args[1] = op->args[2];
-                op->args[2] = op->args[4];
-                op->args[3] = op->args[5];
-            } else if (op->args[5] == TCG_COND_EQ) {
-                /* Simplify EQ comparisons where one of the pairs
-                   can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
-                    goto do_setcond_const;
-                } else if (tmp == 1) {
-                    goto do_setcond_high;
-                }
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[2], op->args[4],
-                                               TCG_COND_EQ);
-                if (tmp == 0) {
-                    goto do_setcond_high;
-                } else if (tmp != 1) {
-                    goto do_default;
-                }
-            do_setcond_low:
-                reset_temp(op->args[0]);
-                arg_info(op->args[0])->mask = 1;
-                op->opc = INDEX_op_setcond_i32;
-                op->args[2] = op->args[3];
-                op->args[3] = op->args[5];
-            } else if (op->args[5] == TCG_COND_NE) {
-                /* Simplify NE comparisons where one of the pairs
-                   can be simplified.  */
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[1], op->args[3],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
-                    goto do_setcond_high;
-                } else if (tmp == 1) {
-                    goto do_setcond_const;
-                }
-                tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
-                                               op->args[2], op->args[4],
-                                               TCG_COND_NE);
-                if (tmp == 0) {
-                    goto do_setcond_low;
-                } else if (tmp == 1) {
-                    goto do_setcond_const;
-                }
-                goto do_default;
-            } else {
-                goto do_default;
-            }
+            done = fold_setcond2(&ctx, op);
+            break;
+        CASE_OP_32_64(sextract):
+            done = fold_sextract(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(sub):
+            done = fold_sub(&ctx, op);
+            break;
+        CASE_OP_32_64(sub2):
+            done = fold_sub2(&ctx, op);
+            break;
+        CASE_OP_32_64_VEC(xor):
+            done = fold_xor(&ctx, op);
             break;
-
-        case INDEX_op_call:
-            if (!(op->args[nb_oargs + nb_iargs + 1]
-                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
-                for (i = 0; i < nb_globals; i++) {
-                    if (test_bit(i, temps_used.l)) {
-                        reset_ts(&s->temps[i]);
-                    }
-                }
-            }
-            goto do_reset_output;
-
         default:
-        do_default:
-            /* Default case: we know nothing about operation (or were unable
-               to compute the operation result) so no propagation is done.
-               We trash everything if the operation is the end of a basic
-               block, otherwise we only trash the output args.  "mask" is
-               the non-zero bits mask for the first output arg.  */
-            if (def->flags & TCG_OPF_BB_END) {
-                memset(&temps_used, 0, sizeof(temps_used));
-            } else {
-        do_reset_output:
-                for (i = 0; i < nb_oargs; i++) {
-                    reset_temp(op->args[i]);
-                    /* Save the corresponding known-zero bits mask for the
-                       first output argument (only one supported so far). */
-                    if (i == 0) {
-                        arg_info(op->args[i])->mask = mask;
-                    }
-                }
-            }
             break;
         }
 
-        /* Eliminate duplicate and redundant fence instructions.  */
-        if (prev_mb) {
-            switch (opc) {
-            case INDEX_op_mb:
-                /* Merge two barriers of the same type into one,
-                 * or a weaker barrier into a stronger one,
-                 * or two weaker barriers into a stronger one.
-                 *   mb X; mb Y => mb X|Y
-                 *   mb; strl => mb; st
-                 *   ldaq; mb => ld; mb
-                 *   ldaq; strl => ld; mb; st
-                 * Other combinations are also merged into a strong
-                 * barrier.  This is stricter than specified but for
-                 * the purposes of TCG is better than not optimizing.
-                 */
-                prev_mb->args[0] |= op->args[0];
-                tcg_op_remove(s, op);
-                break;
-
-            default:
-                /* Opcodes that end the block stop the optimization.  */
-                if ((def->flags & TCG_OPF_BB_END) == 0) {
-                    break;
-                }
-                /* fallthru */
-            case INDEX_op_qemu_ld_i32:
-            case INDEX_op_qemu_ld_i64:
-            case INDEX_op_qemu_st_i32:
-            case INDEX_op_qemu_st8_i32:
-            case INDEX_op_qemu_st_i64:
-            case INDEX_op_call:
-                /* Opcodes that touch guest memory stop the optimization.  */
-                prev_mb = NULL;
-                break;
-            }
-        } else if (opc == INDEX_op_mb) {
-            prev_mb = op;
+        if (!done) {
+            finish_folding(&ctx, op);
         }
     }
 }
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 838ccfa42d9..3e4ca2be881 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -25,9 +25,24 @@
 #include "elf.h"
 #include "../tcg-pool.c.inc"
 
-#if defined _CALL_DARWIN || defined __APPLE__
-#define TCG_TARGET_CALL_DARWIN
-#endif
+/*
+ * Standardize on the _CALL_FOO symbols used by GCC:
+ * Apple XCode does not define _CALL_DARWIN.
+ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
+ */
+#if !defined(_CALL_SYSV) && \
+    !defined(_CALL_DARWIN) && \
+    !defined(_CALL_AIX) && \
+    !defined(_CALL_ELF)
+# if defined(__APPLE__)
+#  define _CALL_DARWIN
+# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
+#  define _CALL_SYSV
+# else
+#  error "Unknown ABI"
+# endif
+#endif 
+
 #ifdef _CALL_SYSV
 # define TCG_TARGET_CALL_ALIGN_ARGS   1
 #endif
@@ -169,7 +184,7 @@ static const int tcg_target_call_oarg_regs[] = {
 };
 
 static const int tcg_target_callee_save_regs[] = {
-#ifdef TCG_TARGET_CALL_DARWIN
+#ifdef _CALL_DARWIN
     TCG_REG_R11,
 #endif
     TCG_REG_R14,
@@ -238,10 +253,8 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
 }
 
 /* test if a constant matches the constraint */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     }
@@ -415,6 +428,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define SRAD   XO31(794)
 #define SRADI  XO31(413<<1)
 
+#define BRH    XO31(219)
+#define BRW    XO31(155)
+#define BRD    XO31(187)
+
 #define TW     XO31( 4)
 #define TRAP   (TW | TO(31))
 
@@ -740,6 +757,26 @@ static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
     tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
 }
 
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    tcg_out32(s, EXTSB | RA(dst) | RS(src));
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    tcg_out32(s, EXTSH | RA(dst) | RS(src));
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    tcg_out32(s, EXTSW | RA(dst) | RS(src));
+}
+
 static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
 {
     tcg_out_rld(s, RLDICL, dst, src, 0, 32);
@@ -755,6 +792,12 @@ static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
     tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
 }
 
+static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+    /* Limit immediate shift count lest we create an illegal insn.  */
+    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
+}
+
 static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
 {
     tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
@@ -765,6 +808,116 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
     tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
 }
 
+static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
+}
+
+static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
+{
+    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
+
+    if (have_isa_3_10) {
+        tcg_out32(s, BRH | RA(dst) | RS(src));
+        if (flags & TCG_BSWAP_OS) {
+            tcg_out_ext16s(s, dst, dst);
+        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            tcg_out_ext16u(s, dst, dst);
+        }
+        return;
+    }
+
+    /*
+     * In the following,
+     *   dep(a, b, m) -> (a & ~m) | (b & m)
+     *
+     * Begin with:                              src = xxxxabcd
+     */
+    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
+    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
+    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
+    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
+
+    if (flags & TCG_BSWAP_OS) {
+        tcg_out_ext16s(s, dst, tmp);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
+    }
+}
+
+static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
+{
+    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
+
+    if (have_isa_3_10) {
+        tcg_out32(s, BRW | RA(dst) | RS(src));
+        if (flags & TCG_BSWAP_OS) {
+            tcg_out_ext32s(s, dst, dst);
+        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            tcg_out_ext32u(s, dst, dst);
+        }
+        return;
+    }
+
+    /*
+     * Stolen from gcc's builtin_bswap32.
+     * In the following,
+     *   dep(a, b, m) -> (a & ~m) | (b & m)
+     *
+     * Begin with:                              src = xxxxabcd
+     */
+    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
+    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
+    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
+    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
+    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
+    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
+
+    if (flags & TCG_BSWAP_OS) {
+        tcg_out_ext32s(s, dst, tmp);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
+    }
+}
+
+static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
+{
+    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
+    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
+
+    if (have_isa_3_10) {
+        tcg_out32(s, BRD | RA(dst) | RS(src));
+        return;
+    }
+
+    /*
+     * In the following,
+     *   dep(a, b, m) -> (a & ~m) | (b & m)
+     *
+     * Begin with:                              src = abcdefgh
+     */
+    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
+    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
+    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
+    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
+    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
+    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
+
+    /* t0 = rol64(t0, 32)                           = hgfe0000 */
+    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
+    /* t1 = rol64(src, 32)                          = efghabcd */
+    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
+
+    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
+    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
+    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
+    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
+    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
+    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
+
+    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
+}
+
 /* Emit a move into ret of arg, if it can be done in one insn.  */
 static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
 {
@@ -1778,7 +1931,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
 #endif
 }
 
-static const uint32_t qemu_ldx_opc[16] = {
+static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
     [MO_UB] = LBZX,
     [MO_UW] = LHZX,
     [MO_UL] = LWZX,
@@ -1791,7 +1944,7 @@ static const uint32_t qemu_ldx_opc[16] = {
     [MO_BSWAP | MO_Q]  = LDBRX,
 };
 
-static const uint32_t qemu_stx_opc[16] = {
+static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
     [MO_UB] = STBX,
     [MO_UW] = STHX,
     [MO_UL] = STWX,
@@ -1812,7 +1965,7 @@ static const uint32_t qemu_exts_opc[4] = {
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
  *                                 int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -1825,7 +1978,7 @@ static void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
  *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1950,7 +2103,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
 /* Record the context of a call to the out of line helper code for the slow
    path for a load or store, so that we can later generate the correct
    helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
                                 TCGReg datalo_reg, TCGReg datahi_reg,
                                 TCGReg addrlo_reg, TCGReg addrhi_reg,
                                 tcg_insn_unit *raddr, tcg_insn_unit *lptr)
@@ -1969,7 +2122,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
 
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
     TCGReg hi, lo, arg = TCG_REG_R3;
 
@@ -2016,7 +2169,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    TCGMemOpIdx oi = lb->oi;
+    MemOpIdx oi = lb->oi;
     MemOp opc = get_memop(oi);
     MemOp s_bits = opc & MO_SIZE;
     TCGReg hi, lo, arg = TCG_REG_R3;
@@ -2080,7 +2233,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg datalo, datahi, addrlo, rbase;
     TCGReg addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc, s_bits;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
@@ -2155,7 +2308,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg datalo, datahi, addrlo, rbase;
     TCGReg addrhi __attribute__((unused));
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc, s_bits;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
@@ -2234,7 +2387,7 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 # define LINK_AREA_SIZE                (6 * SZR)
 # define LR_OFFSET                     (1 * SZR)
 # define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
-#elif defined(TCG_TARGET_CALL_DARWIN)
+#elif defined(_CALL_DARWIN)
 # define LINK_AREA_SIZE                (6 * SZR)
 # define LR_OFFSET                     (2 * SZR)
 #elif TCG_TARGET_REG_BITS == 64
@@ -2324,7 +2477,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
                        const int const_args[TCG_MAX_OP_ARGS])
 {
     TCGArg a0, a1, a2;
-    int c;
 
     switch (opc) {
     case INDEX_op_exit_tb:
@@ -2392,7 +2544,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ld8s_i32:
     case INDEX_op_ld8s_i64:
         tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
-        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
+        tcg_out_ext8s(s, args[0], args[0]);
         break;
     case INDEX_op_ld16u_i32:
     case INDEX_op_ld16u_i64:
@@ -2589,8 +2741,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_sar_i32:
         if (const_args[2]) {
-            /* Limit immediate shift count lest we create an illegal insn.  */
-            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
+            tcg_out_sari32(s, args[0], args[1], args[2]);
         } else {
             tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
         }
@@ -2678,8 +2829,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
     case INDEX_op_sar_i64:
         if (const_args[2]) {
-            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
-            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
+            tcg_out_sari64(s, args[0], args[1], args[2]);
         } else {
             tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
         }
@@ -2730,18 +2880,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_ext8s_i32:
     case INDEX_op_ext8s_i64:
-        c = EXTSB;
-        goto gen_ext;
+        tcg_out_ext8s(s, args[0], args[1]);
+        break;
     case INDEX_op_ext16s_i32:
     case INDEX_op_ext16s_i64:
-        c = EXTSH;
-        goto gen_ext;
+        tcg_out_ext16s(s, args[0], args[1]);
+        break;
     case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
-        c = EXTSW;
-        goto gen_ext;
-    gen_ext:
-        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
+        tcg_out_ext32s(s, args[0], args[1]);
         break;
     case INDEX_op_extu_i32_i64:
         tcg_out_ext32u(s, args[0], args[1]);
@@ -2761,72 +2908,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_bswap16_i32:
     case INDEX_op_bswap16_i64:
-        a0 = args[0], a1 = args[1];
-        /* a1 = abcd */
-        if (a0 != a1) {
-            /* a0 = (a1 r<< 24) & 0xff # 000c */
-            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
-            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
-            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
-        } else {
-            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
-            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
-            /* a0 = (a1 r<< 24) & 0xff # 000c */
-            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
-            /* a0 = a0 | r0 # 00dc */
-            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
-        }
+        tcg_out_bswap16(s, args[0], args[1], args[2]);
         break;
-
     case INDEX_op_bswap32_i32:
+        tcg_out_bswap32(s, args[0], args[1], 0);
+        break;
     case INDEX_op_bswap32_i64:
-        /* Stolen from gcc's builtin_bswap32 */
-        a1 = args[1];
-        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
-
-        /* a1 = args[1] # abcd */
-        /* a0 = rotate_left (a1, 8) # bcda */
-        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
-        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
-        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
-        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
-        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
-
-        if (a0 == TCG_REG_R0) {
-            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
-        }
+        tcg_out_bswap32(s, args[0], args[1], args[2]);
         break;
-
     case INDEX_op_bswap64_i64:
-        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
-        if (a0 == a1) {
-            a0 = TCG_REG_R0;
-            a2 = a1;
-        }
-
-        /* a1 = # abcd efgh */
-        /* a0 = rl32(a1, 8) # 0000 fghe */
-        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
-        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
-        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
-        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
-        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
-
-        /* a0 = rl64(a0, 32) # hgfe 0000 */
-        /* a2 = rl64(a1, 32) # efgh abcd */
-        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
-        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
-
-        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
-        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
-        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
-        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
-        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
-        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
-
-        if (a0 == 0) {
-            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
-        }
+        tcg_out_bswap64(s, args[0], args[1]);
         break;
 
     case INDEX_op_deposit_i32:
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index d1339afc666..0943192cdea 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -27,8 +27,10 @@
 
 #ifdef _ARCH_PPC64
 # define TCG_TARGET_REG_BITS  64
+# define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 #else
 # define TCG_TARGET_REG_BITS  32
+# define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
 #endif
 
 #define TCG_TARGET_NB_REGS 64
@@ -106,7 +108,6 @@ extern bool have_vsx;
 #define TCG_TARGET_HAS_muls2_i32        0
 #define TCG_TARGET_HAS_muluh_i32        1
 #define TCG_TARGET_HAS_mulsh_i32        1
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_direct_jump      1
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
diff --git a/tcg/region.c b/tcg/region.c
new file mode 100644
index 00000000000..9cc30d49225
--- /dev/null
+++ b/tcg/region.c
@@ -0,0 +1,891 @@
+/*
+ * Memory region management for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+#include "tcg-internal.h"
+
+
+struct tcg_region_tree {
+    QemuMutex lock;
+    GTree *tree;
+    /* padding to avoid false sharing is computed at run-time */
+};
+
+/*
+ * We divide code_gen_buffer into equally-sized "regions" that TCG threads
+ * dynamically allocate from as demand dictates. Given appropriate region
+ * sizing, this minimizes flushes even when some TCG threads generate a lot
+ * more code than others.
+ */
+struct tcg_region_state {
+    QemuMutex lock;
+
+    /* fields set at init time */
+    void *start_aligned;
+    void *after_prologue;
+    size_t n;
+    size_t size; /* size of one region */
+    size_t stride; /* .size + guard size */
+    size_t total_size; /* size of entire buffer, >= n * stride */
+
+    /* fields protected by the lock */
+    size_t current; /* current region index */
+    size_t agg_size_full; /* aggregate size of full regions */
+};
+
+static struct tcg_region_state region;
+
+/*
+ * This is an array of struct tcg_region_tree's, with padding.
+ * We use void * to simplify the computation of region_trees[i]; each
+ * struct is found every tree_size bytes.
+ */
+static void *region_trees;
+static size_t tree_size;
+
+bool in_code_gen_buffer(const void *p)
+{
+    /*
+     * Much like it is valid to have a pointer to the byte past the
+     * end of an array (so long as you don't dereference it), allow
+     * a pointer to the byte past the end of the code gen buffer.
+     */
+    return (size_t)(p - region.start_aligned) <= region.total_size;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw)
+{
+    /* Pass NULL pointers unchanged. */
+    if (rw) {
+        g_assert(in_code_gen_buffer(rw));
+        rw += tcg_splitwx_diff;
+    }
+    return rw;
+}
+
+void *tcg_splitwx_to_rw(const void *rx)
+{
+    /* Pass NULL pointers unchanged. */
+    if (rx) {
+        rx -= tcg_splitwx_diff;
+        /* Assert that we end with a pointer in the rw region. */
+        g_assert(in_code_gen_buffer(rx));
+    }
+    return (void *)rx;
+}
+#endif /* CONFIG_DEBUG_TCG */
+
+/* compare a pointer @ptr and a tb_tc @s */
+static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
+{
+    if (ptr >= s->ptr + s->size) {
+        return 1;
+    } else if (ptr < s->ptr) {
+        return -1;
+    }
+    return 0;
+}
+
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
+{
+    const struct tb_tc *a = ap;
+    const struct tb_tc *b = bp;
+
+    /*
+     * When both sizes are set, we know this isn't a lookup.
+     * This is the most likely case: every TB must be inserted; lookups
+     * are a lot less frequent.
+     */
+    if (likely(a->size && b->size)) {
+        if (a->ptr > b->ptr) {
+            return 1;
+        } else if (a->ptr < b->ptr) {
+            return -1;
+        }
+        /* a->ptr == b->ptr should happen only on deletions */
+        g_assert(a->size == b->size);
+        return 0;
+    }
+    /*
+     * All lookups have either .size field set to 0.
+     * From the glib sources we see that @ap is always the lookup key. However
+     * the docs provide no guarantee, so we just mark this case as likely.
+     */
+    if (likely(a->size == 0)) {
+        return ptr_cmp_tb_tc(a->ptr, b);
+    }
+    return ptr_cmp_tb_tc(b->ptr, a);
+}
+
+static void tb_destroy(gpointer value)
+{
+    TranslationBlock *tb = value;
+    qemu_spin_destroy(&tb->jmp_lock);
+}
+
+static void tcg_region_trees_init(void)
+{
+    size_t i;
+
+    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
+    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        qemu_mutex_init(&rt->lock);
+        rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
+    }
+}
+
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
+{
+    size_t region_idx;
+
+    /*
+     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
+     * a signal handler over which the caller has no control.
+     */
+    if (!in_code_gen_buffer(p)) {
+        p -= tcg_splitwx_diff;
+        if (!in_code_gen_buffer(p)) {
+            return NULL;
+        }
+    }
+
+    if (p < region.start_aligned) {
+        region_idx = 0;
+    } else {
+        ptrdiff_t offset = p - region.start_aligned;
+
+        if (offset > region.stride * (region.n - 1)) {
+            region_idx = region.n - 1;
+        } else {
+            region_idx = offset / region.stride;
+        }
+    }
+    return region_trees + region_idx * tree_size;
+}
+
+void tcg_tb_insert(TranslationBlock *tb)
+{
+    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
+
+    g_assert(rt != NULL);
+    qemu_mutex_lock(&rt->lock);
+    g_tree_insert(rt->tree, &tb->tc, tb);
+    qemu_mutex_unlock(&rt->lock);
+}
+
+void tcg_tb_remove(TranslationBlock *tb)
+{
+    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
+
+    g_assert(rt != NULL);
+    qemu_mutex_lock(&rt->lock);
+    g_tree_remove(rt->tree, &tb->tc);
+    qemu_mutex_unlock(&rt->lock);
+}
+
+/*
+ * Find the TB 'tb' such that
+ * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
+ * Return NULL if not found.
+ */
+TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
+{
+    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
+    TranslationBlock *tb;
+    struct tb_tc s = { .ptr = (void *)tc_ptr };
+
+    if (rt == NULL) {
+        return NULL;
+    }
+
+    qemu_mutex_lock(&rt->lock);
+    tb = g_tree_lookup(rt->tree, &s);
+    qemu_mutex_unlock(&rt->lock);
+    return tb;
+}
+
+static void tcg_region_tree_lock_all(void)
+{
+    size_t i;
+
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        qemu_mutex_lock(&rt->lock);
+    }
+}
+
+static void tcg_region_tree_unlock_all(void)
+{
+    size_t i;
+
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        qemu_mutex_unlock(&rt->lock);
+    }
+}
+
+void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
+{
+    size_t i;
+
+    tcg_region_tree_lock_all();
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        g_tree_foreach(rt->tree, func, user_data);
+    }
+    tcg_region_tree_unlock_all();
+}
+
+size_t tcg_nb_tbs(void)
+{
+    size_t nb_tbs = 0;
+    size_t i;
+
+    tcg_region_tree_lock_all();
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        nb_tbs += g_tree_nnodes(rt->tree);
+    }
+    tcg_region_tree_unlock_all();
+    return nb_tbs;
+}
+
+static void tcg_region_tree_reset_all(void)
+{
+    size_t i;
+
+    tcg_region_tree_lock_all();
+    for (i = 0; i < region.n; i++) {
+        struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+        /* Increment the refcount first so that destroy acts as a reset */
+        g_tree_ref(rt->tree);
+        g_tree_destroy(rt->tree);
+    }
+    tcg_region_tree_unlock_all();
+}
+
+static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
+{
+    void *start, *end;
+
+    start = region.start_aligned + curr_region * region.stride;
+    end = start + region.size;
+
+    if (curr_region == 0) {
+        start = region.after_prologue;
+    }
+    /* The final region may have a few extra pages due to earlier rounding. */
+    if (curr_region == region.n - 1) {
+        end = region.start_aligned + region.total_size;
+    }
+
+    *pstart = start;
+    *pend = end;
+}
+
+static void tcg_region_assign(TCGContext *s, size_t curr_region)
+{
+    void *start, *end;
+
+    tcg_region_bounds(curr_region, &start, &end);
+
+    s->code_gen_buffer = start;
+    s->code_gen_ptr = start;
+    s->code_gen_buffer_size = end - start;
+    s->code_gen_highwater = end - TCG_HIGHWATER;
+}
+
+static bool tcg_region_alloc__locked(TCGContext *s)
+{
+    if (region.current == region.n) {
+        return true;
+    }
+    tcg_region_assign(s, region.current);
+    region.current++;
+    return false;
+}
+
+/*
+ * Request a new region once the one in use has filled up.
+ * Returns true on error.
+ */
+bool tcg_region_alloc(TCGContext *s)
+{
+    bool err;
+    /* read the region size now; alloc__locked will overwrite it on success */
+    size_t size_full = s->code_gen_buffer_size;
+
+    qemu_mutex_lock(&region.lock);
+    err = tcg_region_alloc__locked(s);
+    if (!err) {
+        region.agg_size_full += size_full - TCG_HIGHWATER;
+    }
+    qemu_mutex_unlock(&region.lock);
+    return err;
+}
+
+/*
+ * Perform a context's first region allocation.
+ * This function does _not_ increment region.agg_size_full.
+ */
+static void tcg_region_initial_alloc__locked(TCGContext *s)
+{
+    bool err = tcg_region_alloc__locked(s);
+    g_assert(!err);
+}
+
+void tcg_region_initial_alloc(TCGContext *s)
+{
+    qemu_mutex_lock(&region.lock);
+    tcg_region_initial_alloc__locked(s);
+    qemu_mutex_unlock(&region.lock);
+}
+
+/* Call from a safe-work context */
+void tcg_region_reset_all(void)
+{
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+    unsigned int i;
+
+    qemu_mutex_lock(&region.lock);
+    region.current = 0;
+    region.agg_size_full = 0;
+
+    for (i = 0; i < n_ctxs; i++) {
+        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+        tcg_region_initial_alloc__locked(s);
+    }
+    qemu_mutex_unlock(&region.lock);
+
+    tcg_region_tree_reset_all();
+}
+
+static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
+{
+#ifdef CONFIG_USER_ONLY
+    return 1;
+#else
+    size_t n_regions;
+
+    /*
+     * It is likely that some vCPUs will translate more code than others,
+     * so we first try to set more regions than max_cpus, with those regions
+     * being of reasonable size. If that's not possible we make do by evenly
+     * dividing the code_gen_buffer among the vCPUs.
+     */
+    /* Use a single region if all we have is one vCPU thread */
+    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
+        return 1;
+    }
+
+    /*
+     * Try to have more regions than max_cpus, with each region being >= 2 MB.
+     * If we can't, then just allocate one region per vCPU thread.
+     */
+    n_regions = tb_size / (2 * MiB);
+    if (n_regions <= max_cpus) {
+        return max_cpus;
+    }
+    return MIN(n_regions, max_cpus * 8);
+#endif
+}
+
+/*
+ * Minimum size of the code gen buffer.  This number is randomly chosen,
+ * but not so small that we can't have a fair number of TB's live.
+ *
+ * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
+ * Unless otherwise indicated, this is constrained by the range of
+ * direct branches on the host cpu, as used by the TCG implementation
+ * of goto_tb.
+ */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
+
+#if TCG_TARGET_REG_BITS == 32
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
+#ifdef CONFIG_USER_ONLY
+/*
+ * For user mode on smaller 32 bit systems we may run into trouble
+ * allocating big chunks of data in the right place. On these systems
+ * we utilise a static code generation buffer directly in the binary.
+ */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+#else /* TCG_TARGET_REG_BITS == 64 */
+#ifdef CONFIG_USER_ONLY
+/*
+ * As user-mode emulation typically means running multiple instances
+ * of the translator don't go too nuts with our default code gen
+ * buffer lest we make things too hard for the OS.
+ */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
+#else
+/*
+ * We expect most system emulation to run one or two guests per host.
+ * Users running large scale system emulation may want to tweak their
+ * runtime setup via the tb-size control on the command line.
+ */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
+#endif
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
+{
+    void *buf, *end;
+    size_t size;
+
+    if (splitwx > 0) {
+        error_setg(errp, "jit split-wx not supported");
+        return -1;
+    }
+
+    /* page-align the beginning and end of the buffer */
+    buf = static_code_gen_buffer;
+    end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
+    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
+    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
+
+    size = end - buf;
+
+    /* Honor a command-line option limiting the size of the buffer.  */
+    if (size > tb_size) {
+        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
+    }
+
+    region.start_aligned = buf;
+    region.total_size = size;
+
+    return PROT_READ | PROT_WRITE;
+}
+#elif defined(_WIN32)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+{
+    void *buf;
+
+    if (splitwx > 0) {
+        error_setg(errp, "jit split-wx not supported");
+        return -1;
+    }
+
+    buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
+                             PAGE_EXECUTE_READWRITE);
+    if (buf == NULL) {
+        error_setg_win32(errp, GetLastError(),
+                         "allocate %zu bytes for jit buffer", size);
+        return false;
+    }
+
+    region.start_aligned = buf;
+    region.total_size = size;
+
+    return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+}
+#else
+static int alloc_code_gen_buffer_anon(size_t size, int prot,
+                                      int flags, Error **errp)
+{
+    void *buf;
+
+    buf = mmap(NULL, size, prot, flags, -1, 0);
+    if (buf == MAP_FAILED) {
+        error_setg_errno(errp, errno,
+                         "allocate %zu bytes for jit buffer", size);
+        return -1;
+    }
+
+    region.start_aligned = buf;
+    region.total_size = size;
+    return prot;
+}
+
+#ifndef CONFIG_TCG_INTERPRETER
+#ifdef CONFIG_POSIX
+#include "qemu/memfd.h"
+
+static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
+{
+    void *buf_rw = NULL, *buf_rx = MAP_FAILED;
+    int fd = -1;
+
+    buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
+    if (buf_rw == NULL) {
+        goto fail;
+    }
+
+    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+    if (buf_rx == MAP_FAILED) {
+        goto fail_rx;
+    }
+
+    close(fd);
+    region.start_aligned = buf_rw;
+    region.total_size = size;
+    tcg_splitwx_diff = buf_rx - buf_rw;
+
+    return PROT_READ | PROT_WRITE;
+
+ fail_rx:
+    error_setg_errno(errp, errno, "failed to map shared memory for execute");
+ fail:
+    if (buf_rx != MAP_FAILED) {
+        munmap(buf_rx, size);
+    }
+    if (buf_rw) {
+        munmap(buf_rw, size);
+    }
+    if (fd >= 0) {
+        close(fd);
+    }
+    return -1;
+}
+#endif /* CONFIG_POSIX */
+
+#ifdef CONFIG_DARWIN
+#include <mach/mach.h>
+
+extern kern_return_t mach_vm_remap(vm_map_t target_task,
+                                   mach_vm_address_t *target_address,
+                                   mach_vm_size_t size,
+                                   mach_vm_offset_t mask,
+                                   int flags,
+                                   vm_map_t src_task,
+                                   mach_vm_address_t src_address,
+                                   boolean_t copy,
+                                   vm_prot_t *cur_protection,
+                                   vm_prot_t *max_protection,
+                                   vm_inherit_t inheritance);
+
+static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
+{
+    kern_return_t ret;
+    mach_vm_address_t buf_rw, buf_rx;
+    vm_prot_t cur_prot, max_prot;
+
+    /* Map the read-write portion via normal anon memory. */
+    if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
+                                    MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
+        return -1;
+    }
+
+    buf_rw = (mach_vm_address_t)region.start_aligned;
+    buf_rx = 0;
+    ret = mach_vm_remap(mach_task_self(),
+                        &buf_rx,
+                        size,
+                        0,
+                        VM_FLAGS_ANYWHERE,
+                        mach_task_self(),
+                        buf_rw,
+                        false,
+                        &cur_prot,
+                        &max_prot,
+                        VM_INHERIT_NONE);
+    if (ret != KERN_SUCCESS) {
+        /* TODO: Convert "ret" to a human readable error message. */
+        error_setg(errp, "vm_remap for jit splitwx failed");
+        munmap((void *)buf_rw, size);
+        return -1;
+    }
+
+    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+        error_setg_errno(errp, errno, "mprotect for jit splitwx");
+        munmap((void *)buf_rx, size);
+        munmap((void *)buf_rw, size);
+        return -1;
+    }
+
+    tcg_splitwx_diff = buf_rx - buf_rw;
+    return PROT_READ | PROT_WRITE;
+}
+#endif /* CONFIG_DARWIN */
+#endif /* CONFIG_TCG_INTERPRETER */
+
+static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
+{
+#ifndef CONFIG_TCG_INTERPRETER
+# ifdef CONFIG_DARWIN
+    return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
+# endif
+# ifdef CONFIG_POSIX
+    return alloc_code_gen_buffer_splitwx_memfd(size, errp);
+# endif
+#endif
+    error_setg(errp, "jit split-wx not supported");
+    return -1;
+}
+
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+{
+    ERRP_GUARD();
+    int prot, flags;
+
+    if (splitwx) {
+        prot = alloc_code_gen_buffer_splitwx(size, errp);
+        if (prot >= 0) {
+            return prot;
+        }
+        /*
+         * If splitwx force-on (1), fail;
+         * if splitwx default-on (-1), fall through to splitwx off.
+         */
+        if (splitwx > 0) {
+            return -1;
+        }
+        error_free_or_abort(errp);
+    }
+
+    /*
+     * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
+     * rejects a permission change from RWX -> NONE when reserving the
+     * guard pages later.  We can go the other way with the same number
+     * of syscalls, so always begin with PROT_NONE.
+     */
+    prot = PROT_NONE;
+    flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#ifdef CONFIG_DARWIN
+    /* Applicable to both iOS and macOS (Apple Silicon). */
+    if (!splitwx) {
+        flags |= MAP_JIT;
+    }
+#endif
+
+    return alloc_code_gen_buffer_anon(size, prot, flags, errp);
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
+
+/*
+ * Initializes region partitioning.
+ *
+ * Called at init time from the parent thread (i.e. the one calling
+ * tcg_context_init), after the target's TCG globals have been set.
+ *
+ * Region partitioning works by splitting code_gen_buffer into separate regions,
+ * and then assigning regions to TCG threads so that the threads can translate
+ * code in parallel without synchronization.
+ *
+ * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
+ * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
+ * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
+ * must have been parsed before calling this function, since it calls
+ * qemu_tcg_mttcg_enabled().
+ *
+ * In user-mode we use a single region.  Having multiple regions in user-mode
+ * is not supported, because the number of vCPU threads (recall that each thread
+ * spawned by the guest corresponds to a vCPU thread) is only bounded by the
+ * OS, and usually this number is huge (tens of thousands is not uncommon).
+ * Thus, given this large bound on the number of vCPU threads and the fact
+ * that code_gen_buffer is allocated at compile-time, we cannot guarantee
+ * that the availability of at least one region per vCPU thread.
+ *
+ * However, this user-mode limitation is unlikely to be a significant problem
+ * in practice. Multi-threaded guests share most if not all of their translated
+ * code, which makes parallel code generation less appealing than in softmmu.
+ */
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
+{
+    const size_t page_size = qemu_real_host_page_size;
+    size_t region_size;
+    int have_prot, need_prot;
+
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+        size_t phys_mem = qemu_get_host_physmem();
+        if (phys_mem == 0) {
+            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+        } else {
+            tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
+            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
+        }
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+
+    have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
+    assert(have_prot >= 0);
+
+    /* Request large pages for the buffer and the splitwx.  */
+    qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
+    if (tcg_splitwx_diff) {
+        qemu_madvise(region.start_aligned + tcg_splitwx_diff,
+                     region.total_size, QEMU_MADV_HUGEPAGE);
+    }
+
+    /*
+     * Make region_size a multiple of page_size, using aligned as the start.
+     * As a result of this we might end up with a few extra pages at the end of
+     * the buffer; we will assign those to the last region.
+     */
+    region.n = tcg_n_regions(tb_size, max_cpus);
+    region_size = tb_size / region.n;
+    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
+
+    /* A region must have at least 2 pages; one code, one guard */
+    g_assert(region_size >= 2 * page_size);
+    region.stride = region_size;
+
+    /* Reserve space for guard pages. */
+    region.size = region_size - page_size;
+    region.total_size -= page_size;
+
+    /*
+     * The first region will be smaller than the others, via the prologue,
+     * which has yet to be allocated.  For now, the first region begins at
+     * the page boundary.
+     */
+    region.after_prologue = region.start_aligned;
+
+    /* init the region struct */
+    qemu_mutex_init(&region.lock);
+
+    /*
+     * Set guard pages in the rw buffer, as that's the one into which
+     * buffer overruns could occur.  Do not set guard pages in the rx
+     * buffer -- let that one use hugepages throughout.
+     * Work with the page protections set up with the initial mapping.
+     */
+    need_prot = PAGE_READ | PAGE_WRITE;
+#ifndef CONFIG_TCG_INTERPRETER
+    if (tcg_splitwx_diff == 0) {
+        need_prot |= PAGE_EXEC;
+    }
+#endif
+    for (size_t i = 0, n = region.n; i < n; i++) {
+        void *start, *end;
+
+        tcg_region_bounds(i, &start, &end);
+        if (have_prot != need_prot) {
+            int rc;
+
+            if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
+                rc = qemu_mprotect_rwx(start, end - start);
+            } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
+                rc = qemu_mprotect_rw(start, end - start);
+            } else {
+                g_assert_not_reached();
+            }
+            if (rc) {
+                error_setg_errno(&error_fatal, errno,
+                                 "mprotect of jit buffer");
+            }
+        }
+        if (have_prot != 0) {
+            /* Guard pages are nice for bug detection but are not essential. */
+            (void)qemu_mprotect_none(end, page_size);
+        }
+    }
+
+    tcg_region_trees_init();
+
+    /*
+     * Leave the initial context initialized to the first region.
+     * This will be the context into which we generate the prologue.
+     * It is also the only context for CONFIG_USER_ONLY.
+     */
+    tcg_region_initial_alloc__locked(&tcg_init_ctx);
+}
+
+void tcg_region_prologue_set(TCGContext *s)
+{
+    /* Deduct the prologue from the first region.  */
+    g_assert(region.start_aligned == s->code_gen_buffer);
+    region.after_prologue = s->code_ptr;
+
+    /* Recompute boundaries of the first region. */
+    tcg_region_assign(s, 0);
+
+    /* Register the balance of the buffer with gdb. */
+    tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
+                     region.start_aligned + region.total_size -
+                     region.after_prologue);
+}
+
+/*
+ * Returns the size (in bytes) of all translated code (i.e. from all regions)
+ * currently in the cache.
+ * See also: tcg_code_capacity()
+ * Do not confuse with tcg_current_code_size(); that one applies to a single
+ * TCG context.
+ */
+size_t tcg_code_size(void)
+{
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+    unsigned int i;
+    size_t total;
+
+    qemu_mutex_lock(&region.lock);
+    total = region.agg_size_full;
+    for (i = 0; i < n_ctxs; i++) {
+        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+        size_t size;
+
+        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
+        g_assert(size <= s->code_gen_buffer_size);
+        total += size;
+    }
+    qemu_mutex_unlock(&region.lock);
+    return total;
+}
+
+/*
+ * Returns the code capacity (in bytes) of the entire cache, i.e. including all
+ * regions.
+ * See also: tcg_code_size()
+ */
+size_t tcg_code_capacity(void)
+{
+    size_t guard_size, capacity;
+
+    /* no need for synchronization; these variables are set at init time */
+    guard_size = region.stride - region.size;
+    capacity = region.total_size;
+    capacity -= (region.n - 1) * guard_size;
+    capacity -= region.n * TCG_HIGHWATER;
+
+    return capacity;
+}
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index ef43147040e..9b13a46fb44 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -145,10 +145,8 @@ static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 }
 
 /* test if a constant matches the constraint */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     }
@@ -852,39 +850,45 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 #include "../tcg-ldst.c.inc"
 
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
- *                                     TCGMemOpIdx oi, uintptr_t ra)
+ *                                     MemOpIdx oi, uintptr_t ra)
  */
-static void * const qemu_ld_helpers[16] = {
-    [MO_UB]   = helper_ret_ldub_mmu,
-    [MO_SB]   = helper_ret_ldsb_mmu,
-    [MO_LEUW] = helper_le_lduw_mmu,
-    [MO_LESW] = helper_le_ldsw_mmu,
-    [MO_LEUL] = helper_le_ldul_mmu,
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
+    [MO_UB] = helper_ret_ldub_mmu,
+    [MO_SB] = helper_ret_ldsb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_UW] = helper_be_lduw_mmu,
+    [MO_SW] = helper_be_ldsw_mmu,
+    [MO_UL] = helper_be_ldul_mmu,
 #if TCG_TARGET_REG_BITS == 64
-    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_SL] = helper_be_ldsl_mmu,
 #endif
-    [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BESW] = helper_be_ldsw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_Q]  = helper_be_ldq_mmu,
+#else
+    [MO_UW] = helper_le_lduw_mmu,
+    [MO_SW] = helper_le_ldsw_mmu,
+    [MO_UL] = helper_le_ldul_mmu,
 #if TCG_TARGET_REG_BITS == 64
-    [MO_BESL] = helper_be_ldsl_mmu,
+    [MO_SL] = helper_le_ldsl_mmu,
+#endif
+    [MO_Q]  = helper_le_ldq_mmu,
 #endif
-    [MO_BEQ]  = helper_be_ldq_mmu,
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
- *                                     uintxx_t val, TCGMemOpIdx oi,
+ *                                     uintxx_t val, MemOpIdx oi,
  *                                     uintptr_t ra)
  */
-static void * const qemu_st_helpers[16] = {
-    [MO_UB]   = helper_ret_stb_mmu,
-    [MO_LEUW] = helper_le_stw_mmu,
-    [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+    [MO_8]   = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+    [MO_16] = helper_be_stw_mmu,
+    [MO_32] = helper_be_stl_mmu,
+    [MO_64] = helper_be_stq_mmu,
+#else
+    [MO_16] = helper_le_stw_mmu,
+    [MO_32] = helper_le_stl_mmu,
+    [MO_64] = helper_le_stq_mmu,
+#endif
 };
 
 /* We don't support oversize guests */
@@ -902,7 +906,7 @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
 }
 
 static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
-                             TCGReg addrh, TCGMemOpIdx oi,
+                             TCGReg addrh, MemOpIdx oi,
                              tcg_insn_unit **label_ptr, bool is_load)
 {
     MemOp opc = get_memop(oi);
@@ -955,7 +959,7 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
     tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
 }
 
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
                                 TCGType ext,
                                 TCGReg datalo, TCGReg datahi,
                                 TCGReg addrlo, TCGReg addrhi,
@@ -976,7 +980,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
 
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     TCGReg a0 = tcg_target_call_iarg_regs[0];
     TCGReg a1 = tcg_target_call_iarg_regs[1];
@@ -999,7 +1003,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
     tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
 
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]);
     tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
 
     tcg_out_goto(s, l->raddr);
@@ -1008,7 +1012,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    TCGMemOpIdx oi = l->oi;
+    MemOpIdx oi = l->oi;
     MemOp opc = get_memop(oi);
     MemOp s_bits = opc & MO_SIZE;
     TCGReg a0 = tcg_target_call_iarg_regs[0];
@@ -1044,7 +1048,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
     tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
 
-    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
 
     tcg_out_goto(s, l->raddr);
     return true;
@@ -1054,10 +1058,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
                                    TCGReg base, MemOp opc, bool is_64)
 {
-    const MemOp bswap = opc & MO_BSWAP;
-
-    /* We don't yet handle byteswapping, assert */
-    g_assert(!bswap);
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & (MO_SSIZE)) {
     case MO_UB:
@@ -1102,7 +1104,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
     TCGReg data_regl, data_regh;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[1];
@@ -1128,10 +1130,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
         tcg_out_ext32u(s, base, addr_regl);
         addr_regl = base;
     }
-
-    if (guest_base == 0) {
-        tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO);
-    } else {
+    if (guest_base != 0) {
         tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
     tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
@@ -1141,10 +1140,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
                                    TCGReg base, MemOp opc)
 {
-    const MemOp bswap = opc & MO_BSWAP;
-
-    /* We don't yet handle byteswapping, assert */
-    g_assert(!bswap);
+    /* Byte swapping is left to middle-end expansion. */
+    tcg_debug_assert((opc & MO_BSWAP) == 0);
 
     switch (opc & (MO_SSIZE)) {
     case MO_8:
@@ -1173,7 +1170,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 {
     TCGReg addr_regl, addr_regh __attribute__((unused));
     TCGReg data_regl, data_regh;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     MemOp opc;
 #if defined(CONFIG_SOFTMMU)
     tcg_insn_unit *label_ptr[1];
@@ -1199,10 +1196,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
         tcg_out_ext32u(s, base, addr_regl);
         addr_regl = base;
     }
-
-    if (guest_base == 0) {
-        tcg_out_opc_reg(s, OPC_ADD, base, addr_regl, TCG_REG_ZERO);
-    } else {
+    if (guest_base != 0) {
         tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
     }
     tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index 727c8df418d..ef78b99e981 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -34,6 +34,7 @@
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
 #define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 typedef enum {
     TCG_REG_ZERO,
@@ -84,7 +85,6 @@ typedef enum {
 #define TCG_TARGET_CALL_STACK_OFFSET    0
 
 /* optional instructions */
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_movcond_i32      0
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
deleted file mode 100644
index af8dfe81acb..00000000000
--- a/tcg/s390/tcg-target.c.inc
+++ /dev/null
@@ -1,2568 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
- * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
- * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/* We only support generating code for 64-bit mode.  */
-#if TCG_TARGET_REG_BITS != 64
-#error "unsupported code generation mode"
-#endif
-
-#include "../tcg-pool.c.inc"
-#include "elf.h"
-
-/* ??? The translation blocks produced by TCG are generally small enough to
-   be entirely reachable with a 16-bit displacement.  Leaving the option for
-   a 32-bit displacement here Just In Case.  */
-#define USE_LONG_BRANCHES 0
-
-#define TCG_CT_CONST_S16   0x100
-#define TCG_CT_CONST_S32   0x200
-#define TCG_CT_CONST_S33   0x400
-#define TCG_CT_CONST_ZERO  0x800
-
-#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
-/*
- * For softmmu, we need to avoid conflicts with the first 3
- * argument registers to perform the tlb lookup, and to call
- * the helper function.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
-
-
-/* Several places within the instruction set 0 means "no register"
-   rather than TCG_REG_R0.  */
-#define TCG_REG_NONE    0
-
-/* A scratch register that may be be used throughout the backend.  */
-#define TCG_TMP0        TCG_REG_R1
-
-/* A scratch register that holds a pointer to the beginning of the TB.
-   We don't need this when we have pc-relative loads with the general
-   instructions extension facility.  */
-#define TCG_REG_TB      TCG_REG_R12
-#define USE_REG_TB      (!(s390_facilities & FACILITY_GEN_INST_EXT))
-
-#ifndef CONFIG_SOFTMMU
-#define TCG_GUEST_BASE_REG TCG_REG_R13
-#endif
-
-/* All of the following instructions are prefixed with their instruction
-   format, and are defined as 8- or 16-bit quantities, even when the two
-   halves of the 16-bit quantity may appear 32 bits apart in the insn.
-   This makes it easy to copy the values from the tables in Appendix B.  */
-typedef enum S390Opcode {
-    RIL_AFI     = 0xc209,
-    RIL_AGFI    = 0xc208,
-    RIL_ALFI    = 0xc20b,
-    RIL_ALGFI   = 0xc20a,
-    RIL_BRASL   = 0xc005,
-    RIL_BRCL    = 0xc004,
-    RIL_CFI     = 0xc20d,
-    RIL_CGFI    = 0xc20c,
-    RIL_CLFI    = 0xc20f,
-    RIL_CLGFI   = 0xc20e,
-    RIL_CLRL    = 0xc60f,
-    RIL_CLGRL   = 0xc60a,
-    RIL_CRL     = 0xc60d,
-    RIL_CGRL    = 0xc608,
-    RIL_IIHF    = 0xc008,
-    RIL_IILF    = 0xc009,
-    RIL_LARL    = 0xc000,
-    RIL_LGFI    = 0xc001,
-    RIL_LGRL    = 0xc408,
-    RIL_LLIHF   = 0xc00e,
-    RIL_LLILF   = 0xc00f,
-    RIL_LRL     = 0xc40d,
-    RIL_MSFI    = 0xc201,
-    RIL_MSGFI   = 0xc200,
-    RIL_NIHF    = 0xc00a,
-    RIL_NILF    = 0xc00b,
-    RIL_OIHF    = 0xc00c,
-    RIL_OILF    = 0xc00d,
-    RIL_SLFI    = 0xc205,
-    RIL_SLGFI   = 0xc204,
-    RIL_XIHF    = 0xc006,
-    RIL_XILF    = 0xc007,
-
-    RI_AGHI     = 0xa70b,
-    RI_AHI      = 0xa70a,
-    RI_BRC      = 0xa704,
-    RI_CHI      = 0xa70e,
-    RI_CGHI     = 0xa70f,
-    RI_IIHH     = 0xa500,
-    RI_IIHL     = 0xa501,
-    RI_IILH     = 0xa502,
-    RI_IILL     = 0xa503,
-    RI_LGHI     = 0xa709,
-    RI_LLIHH    = 0xa50c,
-    RI_LLIHL    = 0xa50d,
-    RI_LLILH    = 0xa50e,
-    RI_LLILL    = 0xa50f,
-    RI_MGHI     = 0xa70d,
-    RI_MHI      = 0xa70c,
-    RI_NIHH     = 0xa504,
-    RI_NIHL     = 0xa505,
-    RI_NILH     = 0xa506,
-    RI_NILL     = 0xa507,
-    RI_OIHH     = 0xa508,
-    RI_OIHL     = 0xa509,
-    RI_OILH     = 0xa50a,
-    RI_OILL     = 0xa50b,
-
-    RIE_CGIJ    = 0xec7c,
-    RIE_CGRJ    = 0xec64,
-    RIE_CIJ     = 0xec7e,
-    RIE_CLGRJ   = 0xec65,
-    RIE_CLIJ    = 0xec7f,
-    RIE_CLGIJ   = 0xec7d,
-    RIE_CLRJ    = 0xec77,
-    RIE_CRJ     = 0xec76,
-    RIE_LOCGHI  = 0xec46,
-    RIE_RISBG   = 0xec55,
-
-    RRE_AGR     = 0xb908,
-    RRE_ALGR    = 0xb90a,
-    RRE_ALCR    = 0xb998,
-    RRE_ALCGR   = 0xb988,
-    RRE_CGR     = 0xb920,
-    RRE_CLGR    = 0xb921,
-    RRE_DLGR    = 0xb987,
-    RRE_DLR     = 0xb997,
-    RRE_DSGFR   = 0xb91d,
-    RRE_DSGR    = 0xb90d,
-    RRE_FLOGR   = 0xb983,
-    RRE_LGBR    = 0xb906,
-    RRE_LCGR    = 0xb903,
-    RRE_LGFR    = 0xb914,
-    RRE_LGHR    = 0xb907,
-    RRE_LGR     = 0xb904,
-    RRE_LLGCR   = 0xb984,
-    RRE_LLGFR   = 0xb916,
-    RRE_LLGHR   = 0xb985,
-    RRE_LRVR    = 0xb91f,
-    RRE_LRVGR   = 0xb90f,
-    RRE_LTGR    = 0xb902,
-    RRE_MLGR    = 0xb986,
-    RRE_MSGR    = 0xb90c,
-    RRE_MSR     = 0xb252,
-    RRE_NGR     = 0xb980,
-    RRE_OGR     = 0xb981,
-    RRE_SGR     = 0xb909,
-    RRE_SLGR    = 0xb90b,
-    RRE_SLBR    = 0xb999,
-    RRE_SLBGR   = 0xb989,
-    RRE_XGR     = 0xb982,
-
-    RRF_LOCR    = 0xb9f2,
-    RRF_LOCGR   = 0xb9e2,
-    RRF_NRK     = 0xb9f4,
-    RRF_NGRK    = 0xb9e4,
-    RRF_ORK     = 0xb9f6,
-    RRF_OGRK    = 0xb9e6,
-    RRF_SRK     = 0xb9f9,
-    RRF_SGRK    = 0xb9e9,
-    RRF_SLRK    = 0xb9fb,
-    RRF_SLGRK   = 0xb9eb,
-    RRF_XRK     = 0xb9f7,
-    RRF_XGRK    = 0xb9e7,
-
-    RR_AR       = 0x1a,
-    RR_ALR      = 0x1e,
-    RR_BASR     = 0x0d,
-    RR_BCR      = 0x07,
-    RR_CLR      = 0x15,
-    RR_CR       = 0x19,
-    RR_DR       = 0x1d,
-    RR_LCR      = 0x13,
-    RR_LR       = 0x18,
-    RR_LTR      = 0x12,
-    RR_NR       = 0x14,
-    RR_OR       = 0x16,
-    RR_SR       = 0x1b,
-    RR_SLR      = 0x1f,
-    RR_XR       = 0x17,
-
-    RSY_RLL     = 0xeb1d,
-    RSY_RLLG    = 0xeb1c,
-    RSY_SLLG    = 0xeb0d,
-    RSY_SLLK    = 0xebdf,
-    RSY_SRAG    = 0xeb0a,
-    RSY_SRAK    = 0xebdc,
-    RSY_SRLG    = 0xeb0c,
-    RSY_SRLK    = 0xebde,
-
-    RS_SLL      = 0x89,
-    RS_SRA      = 0x8a,
-    RS_SRL      = 0x88,
-
-    RXY_AG      = 0xe308,
-    RXY_AY      = 0xe35a,
-    RXY_CG      = 0xe320,
-    RXY_CLG     = 0xe321,
-    RXY_CLY     = 0xe355,
-    RXY_CY      = 0xe359,
-    RXY_LAY     = 0xe371,
-    RXY_LB      = 0xe376,
-    RXY_LG      = 0xe304,
-    RXY_LGB     = 0xe377,
-    RXY_LGF     = 0xe314,
-    RXY_LGH     = 0xe315,
-    RXY_LHY     = 0xe378,
-    RXY_LLGC    = 0xe390,
-    RXY_LLGF    = 0xe316,
-    RXY_LLGH    = 0xe391,
-    RXY_LMG     = 0xeb04,
-    RXY_LRV     = 0xe31e,
-    RXY_LRVG    = 0xe30f,
-    RXY_LRVH    = 0xe31f,
-    RXY_LY      = 0xe358,
-    RXY_NG      = 0xe380,
-    RXY_OG      = 0xe381,
-    RXY_STCY    = 0xe372,
-    RXY_STG     = 0xe324,
-    RXY_STHY    = 0xe370,
-    RXY_STMG    = 0xeb24,
-    RXY_STRV    = 0xe33e,
-    RXY_STRVG   = 0xe32f,
-    RXY_STRVH   = 0xe33f,
-    RXY_STY     = 0xe350,
-    RXY_XG      = 0xe382,
-
-    RX_A        = 0x5a,
-    RX_C        = 0x59,
-    RX_L        = 0x58,
-    RX_LA       = 0x41,
-    RX_LH       = 0x48,
-    RX_ST       = 0x50,
-    RX_STC      = 0x42,
-    RX_STH      = 0x40,
-
-    NOP         = 0x0707,
-} S390Opcode;
-
-#ifdef CONFIG_DEBUG_TCG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
-    "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15"
-};
-#endif
-
-/* Since R6 is a potential argument register, choose it last of the
-   call-saved registers.  Likewise prefer the call-clobbered registers
-   in reverse order to maximize the chance of avoiding the arguments.  */
-static const int tcg_target_reg_alloc_order[] = {
-    /* Call saved registers.  */
-    TCG_REG_R13,
-    TCG_REG_R12,
-    TCG_REG_R11,
-    TCG_REG_R10,
-    TCG_REG_R9,
-    TCG_REG_R8,
-    TCG_REG_R7,
-    TCG_REG_R6,
-    /* Call clobbered registers.  */
-    TCG_REG_R14,
-    TCG_REG_R0,
-    TCG_REG_R1,
-    /* Argument registers, in reverse order of allocation.  */
-    TCG_REG_R5,
-    TCG_REG_R4,
-    TCG_REG_R3,
-    TCG_REG_R2,
-};
-
-static const int tcg_target_call_iarg_regs[] = {
-    TCG_REG_R2,
-    TCG_REG_R3,
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-};
-
-static const int tcg_target_call_oarg_regs[] = {
-    TCG_REG_R2,
-};
-
-#define S390_CC_EQ      8
-#define S390_CC_LT      4
-#define S390_CC_GT      2
-#define S390_CC_OV      1
-#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
-#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
-#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
-#define S390_CC_NEVER   0
-#define S390_CC_ALWAYS  15
-
-/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[] = {
-    [TCG_COND_EQ]  = S390_CC_EQ,
-    [TCG_COND_NE]  = S390_CC_NE,
-    [TCG_COND_LT]  = S390_CC_LT,
-    [TCG_COND_LE]  = S390_CC_LE,
-    [TCG_COND_GT]  = S390_CC_GT,
-    [TCG_COND_GE]  = S390_CC_GE,
-    [TCG_COND_LTU] = S390_CC_LT,
-    [TCG_COND_LEU] = S390_CC_LE,
-    [TCG_COND_GTU] = S390_CC_GT,
-    [TCG_COND_GEU] = S390_CC_GE,
-};
-
-/* Condition codes that result from a LOAD AND TEST.  Here, we have no
-   unsigned instruction variation, however since the test is vs zero we
-   can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[] = {
-    [TCG_COND_EQ]  = S390_CC_EQ,
-    [TCG_COND_NE]  = S390_CC_NE,
-    [TCG_COND_LT]  = S390_CC_LT,
-    [TCG_COND_LE]  = S390_CC_LE,
-    [TCG_COND_GT]  = S390_CC_GT,
-    [TCG_COND_GE]  = S390_CC_GE,
-    [TCG_COND_LTU] = S390_CC_NEVER,
-    [TCG_COND_LEU] = S390_CC_EQ,
-    [TCG_COND_GTU] = S390_CC_NE,
-    [TCG_COND_GEU] = S390_CC_ALWAYS,
-};
-
-#ifdef CONFIG_SOFTMMU
-static void * const qemu_ld_helpers[16] = {
-    [MO_UB]   = helper_ret_ldub_mmu,
-    [MO_SB]   = helper_ret_ldsb_mmu,
-    [MO_LEUW] = helper_le_lduw_mmu,
-    [MO_LESW] = helper_le_ldsw_mmu,
-    [MO_LEUL] = helper_le_ldul_mmu,
-    [MO_LESL] = helper_le_ldsl_mmu,
-    [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BESW] = helper_be_ldsw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
-    [MO_BESL] = helper_be_ldsl_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
-};
-
-static void * const qemu_st_helpers[16] = {
-    [MO_UB]   = helper_ret_stb_mmu,
-    [MO_LEUW] = helper_le_stw_mmu,
-    [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
-};
-#endif
-
-static const tcg_insn_unit *tb_ret_addr;
-uint64_t s390_facilities;
-
-static bool patch_reloc(tcg_insn_unit *src_rw, int type,
-                        intptr_t value, intptr_t addend)
-{
-    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
-    intptr_t pcrel2;
-    uint32_t old;
-
-    value += addend;
-    pcrel2 = (tcg_insn_unit *)value - src_rx;
-
-    switch (type) {
-    case R_390_PC16DBL:
-        if (pcrel2 == (int16_t)pcrel2) {
-            tcg_patch16(src_rw, pcrel2);
-            return true;
-        }
-        break;
-    case R_390_PC32DBL:
-        if (pcrel2 == (int32_t)pcrel2) {
-            tcg_patch32(src_rw, pcrel2);
-            return true;
-        }
-        break;
-    case R_390_20:
-        if (value == sextract64(value, 0, 20)) {
-            old = *(uint32_t *)src_rw & 0xf00000ff;
-            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
-            tcg_patch32(src_rw, old);
-            return true;
-        }
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return false;
-}
-
-/* Test if a constant matches the constraint. */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
-{
-    int ct = arg_ct->ct;
-
-    if (ct & TCG_CT_CONST) {
-        return 1;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        val = (int32_t)val;
-    }
-
-    /* The following are mutually exclusive.  */
-    if (ct & TCG_CT_CONST_S16) {
-        return val == (int16_t)val;
-    } else if (ct & TCG_CT_CONST_S32) {
-        return val == (int32_t)val;
-    } else if (ct & TCG_CT_CONST_S33) {
-        return val >= -0xffffffffll && val <= 0xffffffffll;
-    } else if (ct & TCG_CT_CONST_ZERO) {
-        return val == 0;
-    }
-
-    return 0;
-}
-
-/* Emit instructions according to the given instruction format.  */
-
-static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
-{
-    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
-}
-
-static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
-                             TCGReg r1, TCGReg r2)
-{
-    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
-}
-
-static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
-                             TCGReg r1, TCGReg r2, int m3)
-{
-    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
-}
-
-static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
-{
-    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
-}
-
-static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
-                             int i2, int m3)
-{
-    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
-    tcg_out32(s, (i2 << 16) | (op & 0xff));
-}
-
-static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
-{
-    tcg_out16(s, op | (r1 << 4));
-    tcg_out32(s, i2);
-}
-
-static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
-                            TCGReg b2, TCGReg r3, int disp)
-{
-    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
-              | (disp & 0xfff));
-}
-
-static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
-                             TCGReg b2, TCGReg r3, int disp)
-{
-    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
-    tcg_out32(s, (op & 0xff) | (b2 << 28)
-              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
-}
-
-#define tcg_out_insn_RX   tcg_out_insn_RS
-#define tcg_out_insn_RXY  tcg_out_insn_RSY
-
-/* Emit an opcode with "type-checking" of the format.  */
-#define tcg_out_insn(S, FMT, OP, ...) \
-    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
-
-
-/* emit 64-bit shifts */
-static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
-                         TCGReg src, TCGReg sh_reg, int sh_imm)
-{
-    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
-}
-
-/* emit 32-bit shifts */
-static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
-                         TCGReg sh_reg, int sh_imm)
-{
-    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
-}
-
-static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
-{
-    if (src != dst) {
-        if (type == TCG_TYPE_I32) {
-            tcg_out_insn(s, RR, LR, dst, src);
-        } else {
-            tcg_out_insn(s, RRE, LGR, dst, src);
-        }
-    }
-    return true;
-}
-
-static const S390Opcode lli_insns[4] = {
-    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
-};
-
-static bool maybe_out_small_movi(TCGContext *s, TCGType type,
-                                 TCGReg ret, tcg_target_long sval)
-{
-    tcg_target_ulong uval = sval;
-    int i;
-
-    if (type == TCG_TYPE_I32) {
-        uval = (uint32_t)sval;
-        sval = (int32_t)sval;
-    }
-
-    /* Try all 32-bit insns that can load it in one go.  */
-    if (sval >= -0x8000 && sval < 0x8000) {
-        tcg_out_insn(s, RI, LGHI, ret, sval);
-        return true;
-    }
-
-    for (i = 0; i < 4; i++) {
-        tcg_target_long mask = 0xffffull << i*16;
-        if ((uval & mask) == uval) {
-            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
-            return true;
-        }
-    }
-
-    return false;
-}
-
-/* load a register with an immediate value */
-static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
-                             tcg_target_long sval, bool in_prologue)
-{
-    tcg_target_ulong uval;
-
-    /* Try all 32-bit insns that can load it in one go.  */
-    if (maybe_out_small_movi(s, type, ret, sval)) {
-        return;
-    }
-
-    uval = sval;
-    if (type == TCG_TYPE_I32) {
-        uval = (uint32_t)sval;
-        sval = (int32_t)sval;
-    }
-
-    /* Try all 48-bit insns that can load it in one go.  */
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        if (sval == (int32_t)sval) {
-            tcg_out_insn(s, RIL, LGFI, ret, sval);
-            return;
-        }
-        if (uval <= 0xffffffff) {
-            tcg_out_insn(s, RIL, LLILF, ret, uval);
-            return;
-        }
-        if ((uval & 0xffffffff) == 0) {
-            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
-            return;
-        }
-    }
-
-    /* Try for PC-relative address load.  For odd addresses,
-       attempt to use an offset from the start of the TB.  */
-    if ((sval & 1) == 0) {
-        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
-        if (off == (int32_t)off) {
-            tcg_out_insn(s, RIL, LARL, ret, off);
-            return;
-        }
-    } else if (USE_REG_TB && !in_prologue) {
-        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
-        if (off == sextract64(off, 0, 20)) {
-            /* This is certain to be an address within TB, and therefore
-               OFF will be negative; don't try RX_LA.  */
-            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
-            return;
-        }
-    }
-
-    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
-       that LLILL, LLIHL, LLILF above did not succeed, we know that
-       both insns are required.  */
-    if (uval <= 0xffffffff) {
-        tcg_out_insn(s, RI, LLILL, ret, uval);
-        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
-        return;
-    }
-
-    /* Otherwise, stuff it in the constant pool.  */
-    if (s390_facilities & FACILITY_GEN_INST_EXT) {
-        tcg_out_insn(s, RIL, LGRL, ret, 0);
-        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
-    } else if (USE_REG_TB && !in_prologue) {
-        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
-    } else {
-        TCGReg base = ret ? ret : TCG_TMP0;
-        tcg_out_insn(s, RIL, LARL, base, 0);
-        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
-        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
-    }
-}
-
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         TCGReg ret, tcg_target_long sval)
-{
-    tcg_out_movi_int(s, type, ret, sval, false);
-}
-
-/* Emit a load/store type instruction.  Inputs are:
-   DATA:     The register to be loaded or stored.
-   BASE+OFS: The effective address.
-   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
-   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
-
-static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
-                        TCGReg data, TCGReg base, TCGReg index,
-                        tcg_target_long ofs)
-{
-    if (ofs < -0x80000 || ofs >= 0x80000) {
-        /* Combine the low 20 bits of the offset with the actual load insn;
-           the high 44 bits must come from an immediate load.  */
-        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
-        ofs = low;
-
-        /* If we were already given an index register, add it in.  */
-        if (index != TCG_REG_NONE) {
-            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
-        }
-        index = TCG_TMP0;
-    }
-
-    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
-        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
-    } else {
-        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
-    }
-}
-
-
-/* load data without address translation or endianness conversion */
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
-{
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
-    }
-}
-
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
-{
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
-    }
-}
-
-static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                               TCGReg base, intptr_t ofs)
-{
-    return false;
-}
-
-/* load data from an absolute host address */
-static void tcg_out_ld_abs(TCGContext *s, TCGType type,
-                           TCGReg dest, const void *abs)
-{
-    intptr_t addr = (intptr_t)abs;
-
-    if ((s390_facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
-        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
-        if (disp == (int32_t)disp) {
-            if (type == TCG_TYPE_I32) {
-                tcg_out_insn(s, RIL, LRL, dest, disp);
-            } else {
-                tcg_out_insn(s, RIL, LGRL, dest, disp);
-            }
-            return;
-        }
-    }
-    if (USE_REG_TB) {
-        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
-        if (disp == sextract64(disp, 0, 20)) {
-            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
-            return;
-        }
-    }
-
-    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
-    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
-}
-
-static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
-                                 int msb, int lsb, int ofs, int z)
-{
-    /* Format RIE-f */
-    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
-    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
-    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
-}
-
-static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-{
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        tcg_out_insn(s, RRE, LGBR, dest, src);
-        return;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        if (dest == src) {
-            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
-        } else {
-            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
-        }
-        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
-    } else {
-        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
-        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
-    }
-}
-
-static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-{
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        tcg_out_insn(s, RRE, LLGCR, dest, src);
-        return;
-    }
-
-    if (dest == src) {
-        tcg_out_movi(s, type, TCG_TMP0, 0xff);
-        src = TCG_TMP0;
-    } else {
-        tcg_out_movi(s, type, dest, 0xff);
-    }
-    if (type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RR, NR, dest, src);
-    } else {
-        tcg_out_insn(s, RRE, NGR, dest, src);
-    }
-}
-
-static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-{
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        tcg_out_insn(s, RRE, LGHR, dest, src);
-        return;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        if (dest == src) {
-            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
-        } else {
-            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
-        }
-        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
-    } else {
-        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
-        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
-    }
-}
-
-static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-{
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        tcg_out_insn(s, RRE, LLGHR, dest, src);
-        return;
-    }
-
-    if (dest == src) {
-        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
-        src = TCG_TMP0;
-    } else {
-        tcg_out_movi(s, type, dest, 0xffff);
-    }
-    if (type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RR, NR, dest, src);
-    } else {
-        tcg_out_insn(s, RRE, NGR, dest, src);
-    }
-}
-
-static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
-{
-    tcg_out_insn(s, RRE, LGFR, dest, src);
-}
-
-static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
-{
-    tcg_out_insn(s, RRE, LLGFR, dest, src);
-}
-
-/* Accept bit patterns like these:
-    0....01....1
-    1....10....0
-    1..10..01..1
-    0..01..10..0
-   Copied from gcc sources.  */
-static inline bool risbg_mask(uint64_t c)
-{
-    uint64_t lsb;
-    /* We don't change the number of transitions by inverting,
-       so make sure we start with the LSB zero.  */
-    if (c & 1) {
-        c = ~c;
-    }
-    /* Reject all zeros or all ones.  */
-    if (c == 0) {
-        return false;
-    }
-    /* Find the first transition.  */
-    lsb = c & -c;
-    /* Invert to look for a second transition.  */
-    c = ~c;
-    /* Erase the first transition.  */
-    c &= -lsb;
-    /* Find the second transition, if any.  */
-    lsb = c & -c;
-    /* Match if all the bits are 1's, or if c is zero.  */
-    return c == -lsb;
-}
-
-static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
-{
-    int msb, lsb;
-    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
-        /* Achieve wraparound by swapping msb and lsb.  */
-        msb = 64 - ctz64(~val);
-        lsb = clz64(~val) - 1;
-    } else {
-        msb = clz64(val);
-        lsb = 63 - ctz64(val);
-    }
-    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
-}
-
-static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
-{
-    static const S390Opcode ni_insns[4] = {
-        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
-    };
-    static const S390Opcode nif_insns[2] = {
-        RIL_NILF, RIL_NIHF
-    };
-    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
-    int i;
-
-    /* Look for the zero-extensions.  */
-    if ((val & valid) == 0xffffffff) {
-        tgen_ext32u(s, dest, dest);
-        return;
-    }
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        if ((val & valid) == 0xff) {
-            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
-            return;
-        }
-        if ((val & valid) == 0xffff) {
-            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
-            return;
-        }
-    }
-
-    /* Try all 32-bit insns that can perform it in one go.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
-        if (((val | ~valid) & mask) == mask) {
-            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
-            return;
-        }
-    }
-
-    /* Try all 48-bit insns that can perform it in one go.  */
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        for (i = 0; i < 2; i++) {
-            tcg_target_ulong mask = ~(0xffffffffull << i*32);
-            if (((val | ~valid) & mask) == mask) {
-                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
-                return;
-            }
-        }
-    }
-    if ((s390_facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
-        tgen_andi_risbg(s, dest, dest, val);
-        return;
-    }
-
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
-    if (USE_REG_TB) {
-        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
-            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
-                           tcg_tbrel_diff(s, NULL));
-            return;
-        }
-    } else {
-        tcg_out_movi(s, type, TCG_TMP0, val);
-    }
-    if (type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
-    } else {
-        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
-    }
-}
-
-static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
-{
-    static const S390Opcode oi_insns[4] = {
-        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
-    };
-    static const S390Opcode oif_insns[2] = {
-        RIL_OILF, RIL_OIHF
-    };
-
-    int i;
-
-    /* Look for no-op.  */
-    if (unlikely(val == 0)) {
-        return;
-    }
-
-    /* Try all 32-bit insns that can perform it in one go.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = (0xffffull << i*16);
-        if ((val & mask) != 0 && (val & ~mask) == 0) {
-            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
-            return;
-        }
-    }
-
-    /* Try all 48-bit insns that can perform it in one go.  */
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        for (i = 0; i < 2; i++) {
-            tcg_target_ulong mask = (0xffffffffull << i*32);
-            if ((val & mask) != 0 && (val & ~mask) == 0) {
-                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
-                return;
-            }
-        }
-    }
-
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
-    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
-        if (type == TCG_TYPE_I32) {
-            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
-        } else {
-            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
-        }
-    } else if (USE_REG_TB) {
-        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
-    } else {
-        /* Perform the OR via sequential modifications to the high and
-           low parts.  Do this via recursion to handle 16-bit vs 32-bit
-           masks in each half.  */
-        tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
-        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
-        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
-    }
-}
-
-static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
-{
-    /* Try all 48-bit insns that can perform it in one go.  */
-    if (s390_facilities & FACILITY_EXT_IMM) {
-        if ((val & 0xffffffff00000000ull) == 0) {
-            tcg_out_insn(s, RIL, XILF, dest, val);
-            return;
-        }
-        if ((val & 0x00000000ffffffffull) == 0) {
-            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
-            return;
-        }
-    }
-
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
-    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
-        if (type == TCG_TYPE_I32) {
-            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
-        } else {
-            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
-        }
-    } else if (USE_REG_TB) {
-        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
-    } else {
-        /* Perform the xor by parts.  */
-        tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
-        if (val & 0xffffffff) {
-            tcg_out_insn(s, RIL, XILF, dest, val);
-        }
-        if (val > 0xffffffff) {
-            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
-        }
-    }
-}
-
-static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
-                    TCGArg c2, bool c2const, bool need_carry)
-{
-    bool is_unsigned = is_unsigned_cond(c);
-    S390Opcode op;
-
-    if (c2const) {
-        if (c2 == 0) {
-            if (!(is_unsigned && need_carry)) {
-                if (type == TCG_TYPE_I32) {
-                    tcg_out_insn(s, RR, LTR, r1, r1);
-                } else {
-                    tcg_out_insn(s, RRE, LTGR, r1, r1);
-                }
-                return tcg_cond_to_ltr_cond[c];
-            }
-        }
-
-        if (!is_unsigned && c2 == (int16_t)c2) {
-            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
-            tcg_out_insn_RI(s, op, r1, c2);
-            goto exit;
-        }
-
-        if (s390_facilities & FACILITY_EXT_IMM) {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
-                tcg_out_insn_RIL(s, op, r1, c2);
-                goto exit;
-            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
-                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
-                tcg_out_insn_RIL(s, op, r1, c2);
-                goto exit;
-            }
-        }
-
-        /* Use the constant pool, but not for small constants.  */
-        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
-            c2 = TCG_TMP0;
-            /* fall through to reg-reg */
-        } else if (USE_REG_TB) {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RXY_CLY : RXY_CY);
-                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
-                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
-                               4 - tcg_tbrel_diff(s, NULL));
-            } else {
-                op = (is_unsigned ? RXY_CLG : RXY_CG);
-                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
-                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
-                               tcg_tbrel_diff(s, NULL));
-            }
-            goto exit;
-        } else {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
-                tcg_out_insn_RIL(s, op, r1, 0);
-                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
-                               s->code_ptr - 2, 2 + 4);
-            } else {
-                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
-                tcg_out_insn_RIL(s, op, r1, 0);
-                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
-            }
-            goto exit;
-        }
-    }
-
-    if (type == TCG_TYPE_I32) {
-        op = (is_unsigned ? RR_CLR : RR_CR);
-        tcg_out_insn_RR(s, op, r1, c2);
-    } else {
-        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
-        tcg_out_insn_RRE(s, op, r1, c2);
-    }
-
- exit:
-    return tcg_cond_to_s390_cond[c];
-}
-
-static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
-                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
-{
-    int cc;
-    bool have_loc;
-
-    /* With LOC2, we can always emit the minimum 3 insns.  */
-    if (s390_facilities & FACILITY_LOAD_ON_COND2) {
-        /* Emit: d = 0, d = (cc ? 1 : d).  */
-        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
-        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
-        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
-        return;
-    }
-
-    have_loc = (s390_facilities & FACILITY_LOAD_ON_COND) != 0;
-
-    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
- restart:
-    switch (cond) {
-    case TCG_COND_NE:
-        /* X != 0 is X > 0.  */
-        if (c2const && c2 == 0) {
-            cond = TCG_COND_GTU;
-        } else {
-            break;
-        }
-        /* fallthru */
-
-    case TCG_COND_GTU:
-    case TCG_COND_GT:
-        /* The result of a compare has CC=2 for GT and CC=3 unused.
-           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
-        tgen_cmp(s, type, cond, c1, c2, c2const, true);
-        tcg_out_movi(s, type, dest, 0);
-        tcg_out_insn(s, RRE, ALCGR, dest, dest);
-        return;
-
-    case TCG_COND_EQ:
-        /* X == 0 is X <= 0.  */
-        if (c2const && c2 == 0) {
-            cond = TCG_COND_LEU;
-        } else {
-            break;
-        }
-        /* fallthru */
-
-    case TCG_COND_LEU:
-    case TCG_COND_LE:
-        /* As above, but we're looking for borrow, or !carry.
-           The second insn computes d - d - borrow, or -1 for true
-           and 0 for false.  So we must mask to 1 bit afterward.  */
-        tgen_cmp(s, type, cond, c1, c2, c2const, true);
-        tcg_out_insn(s, RRE, SLBGR, dest, dest);
-        tgen_andi(s, type, dest, 1);
-        return;
-
-    case TCG_COND_GEU:
-    case TCG_COND_LTU:
-    case TCG_COND_LT:
-    case TCG_COND_GE:
-        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
-        if (c2const) {
-            if (have_loc) {
-                break;
-            }
-            tcg_out_movi(s, type, TCG_TMP0, c2);
-            c2 = c1;
-            c2const = 0;
-            c1 = TCG_TMP0;
-        } else {
-            TCGReg t = c1;
-            c1 = c2;
-            c2 = t;
-        }
-        cond = tcg_swap_cond(cond);
-        goto restart;
-
-    default:
-        g_assert_not_reached();
-    }
-
-    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
-    if (have_loc) {
-        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
-        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
-        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
-    } else {
-        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
-        tcg_out_movi(s, type, dest, 1);
-        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
-        tcg_out_movi(s, type, dest, 0);
-    }
-}
-
-static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
-                         TCGReg c1, TCGArg c2, int c2const,
-                         TCGArg v3, int v3const)
-{
-    int cc;
-    if (s390_facilities & FACILITY_LOAD_ON_COND) {
-        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
-        if (v3const) {
-            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
-        } else {
-            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
-        }
-    } else {
-        c = tcg_invert_cond(c);
-        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
-
-        /* Emit: if (cc) goto over; dest = r3; over:  */
-        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
-        tcg_out_insn(s, RRE, LGR, dest, v3);
-    }
-}
-
-static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
-                     TCGArg a2, int a2const)
-{
-    /* Since this sets both R and R+1, we have no choice but to store the
-       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
-    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
-    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
-
-    if (a2const && a2 == 64) {
-        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
-    } else {
-        if (a2const) {
-            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
-        }
-        if (s390_facilities & FACILITY_LOAD_ON_COND) {
-            /* Emit: if (one bit found) dest = r0.  */
-            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
-        } else {
-            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
-            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
-            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
-        }
-    }
-}
-
-static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
-                         int ofs, int len, int z)
-{
-    int lsb = (63 - ofs);
-    int msb = lsb - (len - 1);
-    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
-}
-
-static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
-                         int ofs, int len)
-{
-    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
-}
-
-static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
-{
-    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
-    if (off == (int16_t)off) {
-        tcg_out_insn(s, RI, BRC, cc, off);
-    } else if (off == (int32_t)off) {
-        tcg_out_insn(s, RIL, BRCL, cc, off);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
-        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
-    }
-}
-
-static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
-{
-    if (l->has_value) {
-        tgen_gotoi(s, cc, l->u.value_ptr);
-    } else if (USE_LONG_BRANCHES) {
-        tcg_out16(s, RIL_BRCL | (cc << 4));
-        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
-        s->code_ptr += 2;
-    } else {
-        tcg_out16(s, RI_BRC | (cc << 4));
-        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
-        s->code_ptr += 1;
-    }
-}
-
-static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
-                                TCGReg r1, TCGReg r2, TCGLabel *l)
-{
-    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
-    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
-    tcg_out16(s, 0);
-    tcg_out16(s, cc << 12 | (opc & 0xff));
-}
-
-static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
-                                    TCGReg r1, int i2, TCGLabel *l)
-{
-    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
-    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
-    tcg_out16(s, 0);
-    tcg_out16(s, (i2 << 8) | (opc & 0xff));
-}
-
-static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
-                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
-{
-    int cc;
-
-    if (s390_facilities & FACILITY_GEN_INST_EXT) {
-        bool is_unsigned = is_unsigned_cond(c);
-        bool in_range;
-        S390Opcode opc;
-
-        cc = tcg_cond_to_s390_cond[c];
-
-        if (!c2const) {
-            opc = (type == TCG_TYPE_I32
-                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
-                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
-            tgen_compare_branch(s, opc, cc, r1, c2, l);
-            return;
-        }
-
-        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
-           If the immediate we've been given does not fit that range, we'll
-           fall back to separate compare and branch instructions using the
-           larger comparison range afforded by COMPARE IMMEDIATE.  */
-        if (type == TCG_TYPE_I32) {
-            if (is_unsigned) {
-                opc = RIE_CLIJ;
-                in_range = (uint32_t)c2 == (uint8_t)c2;
-            } else {
-                opc = RIE_CIJ;
-                in_range = (int32_t)c2 == (int8_t)c2;
-            }
-        } else {
-            if (is_unsigned) {
-                opc = RIE_CLGIJ;
-                in_range = (uint64_t)c2 == (uint8_t)c2;
-            } else {
-                opc = RIE_CGIJ;
-                in_range = (int64_t)c2 == (int8_t)c2;
-            }
-        }
-        if (in_range) {
-            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
-            return;
-        }
-    }
-
-    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
-    tgen_branch(s, cc, l);
-}
-
-static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
-{
-    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
-    if (off == (int32_t)off) {
-        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
-        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
-    }
-}
-
-static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
-                                   TCGReg base, TCGReg index, int disp)
-{
-    switch (opc & (MO_SSIZE | MO_BSWAP)) {
-    case MO_UB:
-        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
-        break;
-    case MO_SB:
-        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
-        break;
-
-    case MO_UW | MO_BSWAP:
-        /* swapped unsigned halfword load with upper bits zeroed */
-        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
-        tgen_ext16u(s, TCG_TYPE_I64, data, data);
-        break;
-    case MO_UW:
-        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
-        break;
-
-    case MO_SW | MO_BSWAP:
-        /* swapped sign-extended halfword load */
-        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
-        tgen_ext16s(s, TCG_TYPE_I64, data, data);
-        break;
-    case MO_SW:
-        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
-        break;
-
-    case MO_UL | MO_BSWAP:
-        /* swapped unsigned int load with upper bits zeroed */
-        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
-        tgen_ext32u(s, data, data);
-        break;
-    case MO_UL:
-        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
-        break;
-
-    case MO_SL | MO_BSWAP:
-        /* swapped sign-extended int load */
-        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
-        tgen_ext32s(s, data, data);
-        break;
-    case MO_SL:
-        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
-        break;
-
-    case MO_Q | MO_BSWAP:
-        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
-        break;
-    case MO_Q:
-        tcg_out_insn(s, RXY, LG, data, base, index, disp);
-        break;
-
-    default:
-        tcg_abort();
-    }
-}
-
-static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
-                                   TCGReg base, TCGReg index, int disp)
-{
-    switch (opc & (MO_SIZE | MO_BSWAP)) {
-    case MO_UB:
-        if (disp >= 0 && disp < 0x1000) {
-            tcg_out_insn(s, RX, STC, data, base, index, disp);
-        } else {
-            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
-        }
-        break;
-
-    case MO_UW | MO_BSWAP:
-        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
-        break;
-    case MO_UW:
-        if (disp >= 0 && disp < 0x1000) {
-            tcg_out_insn(s, RX, STH, data, base, index, disp);
-        } else {
-            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
-        }
-        break;
-
-    case MO_UL | MO_BSWAP:
-        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
-        break;
-    case MO_UL:
-        if (disp >= 0 && disp < 0x1000) {
-            tcg_out_insn(s, RX, ST, data, base, index, disp);
-        } else {
-            tcg_out_insn(s, RXY, STY, data, base, index, disp);
-        }
-        break;
-
-    case MO_Q | MO_BSWAP:
-        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
-        break;
-    case MO_Q:
-        tcg_out_insn(s, RXY, STG, data, base, index, disp);
-        break;
-
-    default:
-        tcg_abort();
-    }
-}
-
-#if defined(CONFIG_SOFTMMU)
-#include "../tcg-ldst.c.inc"
-
-/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
-
-/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
-   addend into R2.  Returns a register with the santitized guest address.  */
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
-                               int mem_index, bool is_ld)
-{
-    unsigned s_bits = opc & MO_SIZE;
-    unsigned a_bits = get_alignment_bits(opc);
-    unsigned s_mask = (1 << s_bits) - 1;
-    unsigned a_mask = (1 << a_bits) - 1;
-    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
-    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
-    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
-    int ofs, a_off;
-    uint64_t tlb_mask;
-
-    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
-                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
-    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
-
-    /* For aligned accesses, we check the first byte and include the alignment
-       bits within the address.  For unaligned access, we check that we don't
-       cross pages using the address of the last byte of the access.  */
-    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
-    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
-    if ((s390_facilities & FACILITY_GEN_INST_EXT) && a_off == 0) {
-        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
-    } else {
-        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
-        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
-    }
-
-    if (is_ld) {
-        ofs = offsetof(CPUTLBEntry, addr_read);
-    } else {
-        ofs = offsetof(CPUTLBEntry, addr_write);
-    }
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
-    }
-
-    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
-                 offsetof(CPUTLBEntry, addend));
-
-    if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, TCG_REG_R3, addr_reg);
-        return TCG_REG_R3;
-    }
-    return addr_reg;
-}
-
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
-                                TCGReg data, TCGReg addr,
-                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
-{
-    TCGLabelQemuLdst *label = new_ldst_label(s);
-
-    label->is_ld = is_ld;
-    label->oi = oi;
-    label->datalo_reg = data;
-    label->addrlo_reg = addr;
-    label->raddr = tcg_splitwx_to_rx(raddr);
-    label->label_ptr[0] = label_ptr;
-}
-
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
-{
-    TCGReg addr_reg = lb->addrlo_reg;
-    TCGReg data_reg = lb->datalo_reg;
-    TCGMemOpIdx oi = lb->oi;
-    MemOp opc = get_memop(oi);
-
-    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
-                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
-        return false;
-    }
-
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
-    }
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
-    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
-
-    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
-    return true;
-}
-
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
-{
-    TCGReg addr_reg = lb->addrlo_reg;
-    TCGReg data_reg = lb->datalo_reg;
-    TCGMemOpIdx oi = lb->oi;
-    MemOp opc = get_memop(oi);
-
-    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
-                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
-        return false;
-    }
-
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
-    }
-    switch (opc & MO_SIZE) {
-    case MO_UB:
-        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
-        break;
-    case MO_UW:
-        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
-        break;
-    case MO_UL:
-        tgen_ext32u(s, TCG_REG_R4, data_reg);
-        break;
-    case MO_Q:
-        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
-        break;
-    default:
-        tcg_abort();
-    }
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
-
-    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
-    return true;
-}
-#else
-static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
-                                  TCGReg *index_reg, tcg_target_long *disp)
-{
-    if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, TCG_TMP0, *addr_reg);
-        *addr_reg = TCG_TMP0;
-    }
-    if (guest_base < 0x80000) {
-        *index_reg = TCG_REG_NONE;
-        *disp = guest_base;
-    } else {
-        *index_reg = TCG_GUEST_BASE_REG;
-        *disp = 0;
-    }
-}
-#endif /* CONFIG_SOFTMMU */
-
-static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOpIdx oi)
-{
-    MemOp opc = get_memop(oi);
-#ifdef CONFIG_SOFTMMU
-    unsigned mem_index = get_mmuidx(oi);
-    tcg_insn_unit *label_ptr;
-    TCGReg base_reg;
-
-    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
-
-    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
-    label_ptr = s->code_ptr;
-    s->code_ptr += 1;
-
-    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
-
-    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
-#else
-    TCGReg index_reg;
-    tcg_target_long disp;
-
-    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
-    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
-#endif
-}
-
-static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOpIdx oi)
-{
-    MemOp opc = get_memop(oi);
-#ifdef CONFIG_SOFTMMU
-    unsigned mem_index = get_mmuidx(oi);
-    tcg_insn_unit *label_ptr;
-    TCGReg base_reg;
-
-    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
-
-    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
-    label_ptr = s->code_ptr;
-    s->code_ptr += 1;
-
-    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
-
-    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
-#else
-    TCGReg index_reg;
-    tcg_target_long disp;
-
-    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
-    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
-#endif
-}
-
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_,x),_i32): \
-        case glue(glue(INDEX_op_,x),_i64)
-
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                              const TCGArg args[TCG_MAX_OP_ARGS],
-                              const int const_args[TCG_MAX_OP_ARGS])
-{
-    S390Opcode op, op2;
-    TCGArg a0, a1, a2;
-
-    switch (opc) {
-    case INDEX_op_exit_tb:
-        /* Reuse the zeroing that exists for goto_ptr.  */
-        a0 = args[0];
-        if (a0 == 0) {
-            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
-        } else {
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
-            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
-        }
-        break;
-
-    case INDEX_op_goto_tb:
-        a0 = args[0];
-        if (s->tb_jmp_insn_offset) {
-            /*
-             * branch displacement must be aligned for atomic patching;
-             * see if we need to add extra nop before branch
-             */
-            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
-                tcg_out16(s, NOP);
-            }
-            tcg_debug_assert(!USE_REG_TB);
-            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
-            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-            s->code_ptr += 2;
-        } else {
-            /* load address stored at s->tb_jmp_target_addr + a0 */
-            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
-                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
-            /* and go there */
-            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
-        }
-        set_jmp_reset_offset(s, a0);
-
-        /* For the unlinked path of goto_tb, we need to reset
-           TCG_REG_TB to the beginning of this TB.  */
-        if (USE_REG_TB) {
-            int ofs = -tcg_current_code_size(s);
-            /* All TB are restricted to 64KiB by unwind info. */
-            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
-            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
-                         TCG_REG_TB, TCG_REG_NONE, ofs);
-        }
-        break;
-
-    case INDEX_op_goto_ptr:
-        a0 = args[0];
-        if (USE_REG_TB) {
-            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
-        }
-        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
-        break;
-
-    OP_32_64(ld8u):
-        /* ??? LLC (RXY format) is only present with the extended-immediate
-           facility, whereas LLGC is always present.  */
-        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-
-    OP_32_64(ld8s):
-        /* ??? LB is no smaller than LGB, so no point to using it.  */
-        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-
-    OP_32_64(ld16u):
-        /* ??? LLH (RXY format) is only present with the extended-immediate
-           facility, whereas LLGH is always present.  */
-        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-
-    case INDEX_op_ld16s_i32:
-        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-
-    case INDEX_op_ld_i32:
-        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
-        break;
-
-    OP_32_64(st8):
-        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
-                    TCG_REG_NONE, args[2]);
-        break;
-
-    OP_32_64(st16):
-        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
-                    TCG_REG_NONE, args[2]);
-        break;
-
-    case INDEX_op_st_i32:
-        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_add_i32:
-        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
-        if (const_args[2]) {
-        do_addi_32:
-            if (a0 == a1) {
-                if (a2 == (int16_t)a2) {
-                    tcg_out_insn(s, RI, AHI, a0, a2);
-                    break;
-                }
-                if (s390_facilities & FACILITY_EXT_IMM) {
-                    tcg_out_insn(s, RIL, AFI, a0, a2);
-                    break;
-                }
-            }
-            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RR, AR, a0, a2);
-        } else {
-            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
-        }
-        break;
-    case INDEX_op_sub_i32:
-        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
-        if (const_args[2]) {
-            a2 = -a2;
-            goto do_addi_32;
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RR, SR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
-        }
-        break;
-
-    case INDEX_op_and_i32:
-        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-            tgen_andi(s, TCG_TYPE_I32, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RR, NR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
-        }
-        break;
-    case INDEX_op_or_i32:
-        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-            tgen_ori(s, TCG_TYPE_I32, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RR, OR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
-        }
-        break;
-    case INDEX_op_xor_i32:
-        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-            tgen_xori(s, TCG_TYPE_I32, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RR, XR, args[0], args[2]);
-        } else {
-            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
-        }
-        break;
-
-    case INDEX_op_neg_i32:
-        tcg_out_insn(s, RR, LCR, args[0], args[1]);
-        break;
-
-    case INDEX_op_mul_i32:
-        if (const_args[2]) {
-            if ((int32_t)args[2] == (int16_t)args[2]) {
-                tcg_out_insn(s, RI, MHI, args[0], args[2]);
-            } else {
-                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
-            }
-        } else {
-            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
-        }
-        break;
-
-    case INDEX_op_div2_i32:
-        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
-        break;
-    case INDEX_op_divu2_i32:
-        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
-        break;
-
-    case INDEX_op_shl_i32:
-        op = RS_SLL;
-        op2 = RSY_SLLK;
-    do_shift32:
-        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
-        if (a0 == a1) {
-            if (const_args[2]) {
-                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
-            } else {
-                tcg_out_sh32(s, op, a0, a2, 0);
-            }
-        } else {
-            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
-            if (const_args[2]) {
-                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
-            } else {
-                tcg_out_sh64(s, op2, a0, a1, a2, 0);
-            }
-        }
-        break;
-    case INDEX_op_shr_i32:
-        op = RS_SRL;
-        op2 = RSY_SRLK;
-        goto do_shift32;
-    case INDEX_op_sar_i32:
-        op = RS_SRA;
-        op2 = RSY_SRAK;
-        goto do_shift32;
-
-    case INDEX_op_rotl_i32:
-        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
-        if (const_args[2]) {
-            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
-        } else {
-            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
-        }
-        break;
-    case INDEX_op_rotr_i32:
-        if (const_args[2]) {
-            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
-                         TCG_REG_NONE, (32 - args[2]) & 31);
-        } else {
-            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
-            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
-        }
-        break;
-
-    case INDEX_op_ext8s_i32:
-        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_ext16s_i32:
-        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_ext8u_i32:
-        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_ext16u_i32:
-        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-
-    OP_32_64(bswap16):
-        /* The TCG bswap definition requires bits 0-47 already be zero.
-           Thus we don't need the G-type insns to implement bswap16_i64.  */
-        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
-        tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16);
-        break;
-    OP_32_64(bswap32):
-        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
-        break;
-
-    case INDEX_op_add2_i32:
-        if (const_args[4]) {
-            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
-        } else {
-            tcg_out_insn(s, RR, ALR, args[0], args[4]);
-        }
-        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
-        break;
-    case INDEX_op_sub2_i32:
-        if (const_args[4]) {
-            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
-        } else {
-            tcg_out_insn(s, RR, SLR, args[0], args[4]);
-        }
-        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
-        break;
-
-    case INDEX_op_br:
-        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
-        break;
-
-    case INDEX_op_brcond_i32:
-        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
-                    args[1], const_args[1], arg_label(args[3]));
-        break;
-    case INDEX_op_setcond_i32:
-        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
-                     args[2], const_args[2]);
-        break;
-    case INDEX_op_movcond_i32:
-        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
-                     args[2], const_args[2], args[3], const_args[3]);
-        break;
-
-    case INDEX_op_qemu_ld_i32:
-        /* ??? Technically we can use a non-extending instruction.  */
-    case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_qemu_st_i32:
-    case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_ld16s_i64:
-        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-    case INDEX_op_ld32u_i64:
-        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-    case INDEX_op_ld32s_i64:
-        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
-        break;
-    case INDEX_op_ld_i64:
-        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_st32_i64:
-        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_st_i64:
-        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
-        break;
-
-    case INDEX_op_add_i64:
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[2]) {
-        do_addi_64:
-            if (a0 == a1) {
-                if (a2 == (int16_t)a2) {
-                    tcg_out_insn(s, RI, AGHI, a0, a2);
-                    break;
-                }
-                if (s390_facilities & FACILITY_EXT_IMM) {
-                    if (a2 == (int32_t)a2) {
-                        tcg_out_insn(s, RIL, AGFI, a0, a2);
-                        break;
-                    } else if (a2 == (uint32_t)a2) {
-                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
-                        break;
-                    } else if (-a2 == (uint32_t)-a2) {
-                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
-                        break;
-                    }
-                }
-            }
-            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, AGR, a0, a2);
-        } else {
-            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
-        }
-        break;
-    case INDEX_op_sub_i64:
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[2]) {
-            a2 = -a2;
-            goto do_addi_64;
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, SGR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
-        }
-        break;
-
-    case INDEX_op_and_i64:
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
-            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
-        } else {
-            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
-        }
-        break;
-    case INDEX_op_or_i64:
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
-            tgen_ori(s, TCG_TYPE_I64, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, OGR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
-        }
-        break;
-    case INDEX_op_xor_i64:
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[2]) {
-            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
-            tgen_xori(s, TCG_TYPE_I64, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, XGR, a0, a2);
-        } else {
-            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
-        }
-        break;
-
-    case INDEX_op_neg_i64:
-        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
-        break;
-    case INDEX_op_bswap64_i64:
-        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
-        break;
-
-    case INDEX_op_mul_i64:
-        if (const_args[2]) {
-            if (args[2] == (int16_t)args[2]) {
-                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
-            } else {
-                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
-            }
-        } else {
-            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
-        }
-        break;
-
-    case INDEX_op_div2_i64:
-        /* ??? We get an unnecessary sign-extension of the dividend
-           into R3 with this definition, but as we do in fact always
-           produce both quotient and remainder using INDEX_op_div_i64
-           instead requires jumping through even more hoops.  */
-        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
-        break;
-    case INDEX_op_divu2_i64:
-        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
-        break;
-    case INDEX_op_mulu2_i64:
-        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
-        break;
-
-    case INDEX_op_shl_i64:
-        op = RSY_SLLG;
-    do_shift64:
-        if (const_args[2]) {
-            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
-        } else {
-            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
-        }
-        break;
-    case INDEX_op_shr_i64:
-        op = RSY_SRLG;
-        goto do_shift64;
-    case INDEX_op_sar_i64:
-        op = RSY_SRAG;
-        goto do_shift64;
-
-    case INDEX_op_rotl_i64:
-        if (const_args[2]) {
-            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
-                         TCG_REG_NONE, args[2]);
-        } else {
-            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
-        }
-        break;
-    case INDEX_op_rotr_i64:
-        if (const_args[2]) {
-            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
-                         TCG_REG_NONE, (64 - args[2]) & 63);
-        } else {
-            /* We can use the smaller 32-bit negate because only the
-               low 6 bits are examined for the rotate.  */
-            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
-            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
-        }
-        break;
-
-    case INDEX_op_ext8s_i64:
-        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_ext16s_i64:
-        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_ext32s_i64:
-        tgen_ext32s(s, args[0], args[1]);
-        break;
-    case INDEX_op_ext8u_i64:
-        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_ext16u_i64:
-        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_extu_i32_i64:
-    case INDEX_op_ext32u_i64:
-        tgen_ext32u(s, args[0], args[1]);
-        break;
-
-    case INDEX_op_add2_i64:
-        if (const_args[4]) {
-            if ((int64_t)args[4] >= 0) {
-                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
-            } else {
-                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
-            }
-        } else {
-            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
-        }
-        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
-        break;
-    case INDEX_op_sub2_i64:
-        if (const_args[4]) {
-            if ((int64_t)args[4] >= 0) {
-                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
-            } else {
-                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
-            }
-        } else {
-            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
-        }
-        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
-        break;
-
-    case INDEX_op_brcond_i64:
-        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
-                    args[1], const_args[1], arg_label(args[3]));
-        break;
-    case INDEX_op_setcond_i64:
-        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
-                     args[2], const_args[2]);
-        break;
-    case INDEX_op_movcond_i64:
-        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
-                     args[2], const_args[2], args[3], const_args[3]);
-        break;
-
-    OP_32_64(deposit):
-        a0 = args[0], a1 = args[1], a2 = args[2];
-        if (const_args[1]) {
-            tgen_deposit(s, a0, a2, args[3], args[4], 1);
-        } else {
-            /* Since we can't support "0Z" as a constraint, we allow a1 in
-               any register.  Fix things up as if a matching constraint.  */
-            if (a0 != a1) {
-                TCGType type = (opc == INDEX_op_deposit_i64);
-                if (a0 == a2) {
-                    tcg_out_mov(s, type, TCG_TMP0, a2);
-                    a2 = TCG_TMP0;
-                }
-                tcg_out_mov(s, type, a0, a1);
-            }
-            tgen_deposit(s, a0, a2, args[3], args[4], 0);
-        }
-        break;
-
-    OP_32_64(extract):
-        tgen_extract(s, args[0], args[1], args[2], args[3]);
-        break;
-
-    case INDEX_op_clz_i64:
-        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
-        break;
-
-    case INDEX_op_mb:
-        /* The host memory model is quite strong, we simply need to
-           serialize the instruction stream.  */
-        if (args[0] & TCG_MO_ST_LD) {
-            tcg_out_insn(s, RR, BCR,
-                         s390_facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
-        }
-        break;
-
-    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
-    case INDEX_op_mov_i64:
-    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
-    default:
-        tcg_abort();
-    }
-}
-
-static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
-{
-    switch (op) {
-    case INDEX_op_goto_ptr:
-        return C_O0_I1(r);
-
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8u_i64:
-    case INDEX_op_ld8s_i32:
-    case INDEX_op_ld8s_i64:
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16u_i64:
-    case INDEX_op_ld16s_i32:
-    case INDEX_op_ld16s_i64:
-    case INDEX_op_ld_i32:
-    case INDEX_op_ld32u_i64:
-    case INDEX_op_ld32s_i64:
-    case INDEX_op_ld_i64:
-        return C_O1_I1(r, r);
-
-    case INDEX_op_st8_i32:
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i32:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st_i32:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
-        return C_O0_I2(r, r);
-
-    case INDEX_op_add_i32:
-    case INDEX_op_add_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_rotl_i32:
-    case INDEX_op_rotl_i64:
-    case INDEX_op_rotr_i32:
-    case INDEX_op_rotr_i64:
-    case INDEX_op_clz_i64:
-    case INDEX_op_setcond_i32:
-    case INDEX_op_setcond_i64:
-        return C_O1_I2(r, r, ri);
-
-    case INDEX_op_sub_i32:
-    case INDEX_op_sub_i64:
-    case INDEX_op_and_i32:
-    case INDEX_op_and_i64:
-    case INDEX_op_or_i32:
-    case INDEX_op_or_i64:
-    case INDEX_op_xor_i32:
-    case INDEX_op_xor_i64:
-        return (s390_facilities & FACILITY_DISTINCT_OPS
-                ? C_O1_I2(r, r, ri)
-                : C_O1_I2(r, 0, ri));
-
-    case INDEX_op_mul_i32:
-        /* If we have the general-instruction-extensions, then we have
-           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
-           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
-        return (s390_facilities & FACILITY_GEN_INST_EXT
-                ? C_O1_I2(r, 0, ri)
-                : C_O1_I2(r, 0, rI));
-
-    case INDEX_op_mul_i64:
-        return (s390_facilities & FACILITY_GEN_INST_EXT
-                ? C_O1_I2(r, 0, rJ)
-                : C_O1_I2(r, 0, rI));
-
-    case INDEX_op_shl_i32:
-    case INDEX_op_shr_i32:
-    case INDEX_op_sar_i32:
-        return (s390_facilities & FACILITY_DISTINCT_OPS
-                ? C_O1_I2(r, r, ri)
-                : C_O1_I2(r, 0, ri));
-
-    case INDEX_op_brcond_i32:
-    case INDEX_op_brcond_i64:
-        return C_O0_I2(r, ri);
-
-    case INDEX_op_bswap16_i32:
-    case INDEX_op_bswap16_i64:
-    case INDEX_op_bswap32_i32:
-    case INDEX_op_bswap32_i64:
-    case INDEX_op_bswap64_i64:
-    case INDEX_op_neg_i32:
-    case INDEX_op_neg_i64:
-    case INDEX_op_ext8s_i32:
-    case INDEX_op_ext8s_i64:
-    case INDEX_op_ext8u_i32:
-    case INDEX_op_ext8u_i64:
-    case INDEX_op_ext16s_i32:
-    case INDEX_op_ext16s_i64:
-    case INDEX_op_ext16u_i32:
-    case INDEX_op_ext16u_i64:
-    case INDEX_op_ext32s_i64:
-    case INDEX_op_ext32u_i64:
-    case INDEX_op_ext_i32_i64:
-    case INDEX_op_extu_i32_i64:
-    case INDEX_op_extract_i32:
-    case INDEX_op_extract_i64:
-        return C_O1_I1(r, r);
-
-    case INDEX_op_qemu_ld_i32:
-    case INDEX_op_qemu_ld_i64:
-        return C_O1_I1(r, L);
-    case INDEX_op_qemu_st_i64:
-    case INDEX_op_qemu_st_i32:
-        return C_O0_I2(L, L);
-
-    case INDEX_op_deposit_i32:
-    case INDEX_op_deposit_i64:
-        return C_O1_I2(r, rZ, r);
-
-    case INDEX_op_movcond_i32:
-    case INDEX_op_movcond_i64:
-        return (s390_facilities & FACILITY_LOAD_ON_COND2
-                ? C_O1_I4(r, r, ri, rI, 0)
-                : C_O1_I4(r, r, ri, r, 0));
-
-    case INDEX_op_div2_i32:
-    case INDEX_op_div2_i64:
-    case INDEX_op_divu2_i32:
-    case INDEX_op_divu2_i64:
-        return C_O2_I3(b, a, 0, 1, r);
-
-    case INDEX_op_mulu2_i64:
-        return C_O2_I2(b, a, 0, r);
-
-    case INDEX_op_add2_i32:
-    case INDEX_op_sub2_i32:
-        return (s390_facilities & FACILITY_EXT_IMM
-                ? C_O2_I4(r, r, 0, 1, ri, r)
-                : C_O2_I4(r, r, 0, 1, r, r));
-
-    case INDEX_op_add2_i64:
-    case INDEX_op_sub2_i64:
-        return (s390_facilities & FACILITY_EXT_IMM
-                ? C_O2_I4(r, r, 0, 1, rA, r)
-                : C_O2_I4(r, r, 0, 1, r, r));
-
-    default:
-        g_assert_not_reached();
-    }
-}
-
-static void query_s390_facilities(void)
-{
-    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
-
-    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
-       is present on all 64-bit systems, but let's check for it anyway.  */
-    if (hwcap & HWCAP_S390_STFLE) {
-        register int r0 __asm__("0");
-        register void *r1 __asm__("1");
-
-        /* stfle 0(%r1) */
-        r1 = &s390_facilities;
-        asm volatile(".word 0xb2b0,0x1000"
-                     : "=r"(r0) : "0"(0), "r"(r1) : "memory", "cc");
-    }
-}
-
-static void tcg_target_init(TCGContext *s)
-{
-    query_s390_facilities();
-
-    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
-    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
-
-    tcg_target_call_clobber_regs = 0;
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
-    /* The r6 register is technically call-saved, but it's also a parameter
-       register, so it can get killed by setup for the qemu_st helper.  */
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
-    /* The return register can be considered call-clobbered.  */
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
-
-    s->reserved_regs = 0;
-    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
-    /* XXX many insns can't be used with R0, so we better avoid it for now */
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
-    if (USE_REG_TB) {
-        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
-    }
-}
-
-#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
-                           + TCG_STATIC_CALL_ARGS_SIZE           \
-                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
-
-static void tcg_target_qemu_prologue(TCGContext *s)
-{
-    /* stmg %r6,%r15,48(%r15) (save registers) */
-    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
-
-    /* aghi %r15,-frame_size */
-    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
-
-    tcg_set_frame(s, TCG_REG_CALL_STACK,
-                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
-
-#ifndef CONFIG_SOFTMMU
-    if (guest_base >= 0x80000) {
-        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
-        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
-    }
-#endif
-
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-    if (USE_REG_TB) {
-        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
-                    tcg_target_call_iarg_regs[1]);
-    }
-
-    /* br %r3 (go to TB) */
-    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
-
-    /*
-     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
-     * and fall through to the rest of the epilogue.
-     */
-    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
-
-    /* TB epilogue */
-    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
-
-    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
-    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
-                 FRAME_SIZE + 48);
-
-    /* br %r14 (return) */
-    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
-}
-
-static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
-{
-    memset(p, 0x07, count * sizeof(tcg_insn_unit));
-}
-
-typedef struct {
-    DebugFrameHeader h;
-    uint8_t fde_def_cfa[4];
-    uint8_t fde_reg_ofs[18];
-} DebugFrame;
-
-/* We're expecting a 2 byte uleb128 encoded value.  */
-QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
-
-#define ELF_HOST_MACHINE  EM_S390
-
-static const DebugFrame debug_frame = {
-    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
-    .h.cie.id = -1,
-    .h.cie.version = 1,
-    .h.cie.code_align = 1,
-    .h.cie.data_align = 8,                /* sleb128 8 */
-    .h.cie.return_column = TCG_REG_R14,
-
-    /* Total FDE size does not include the "len" member.  */
-    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
-
-    .fde_def_cfa = {
-        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
-        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
-        (FRAME_SIZE >> 7)
-    },
-    .fde_reg_ofs = {
-        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
-        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
-        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
-        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
-        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
-        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
-        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
-        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
-        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
-    }
-};
-
-void tcg_register_jit(const void *buf, size_t buf_size)
-{
-    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
-}
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
deleted file mode 100644
index 641464eea48..00000000000
--- a/tcg/s390/tcg-target.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Tiny Code Generator for QEMU
- *
- * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef S390_TCG_TARGET_H
-#define S390_TCG_TARGET_H
-
-#define TCG_TARGET_INSN_UNIT_SIZE 2
-#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
-
-typedef enum TCGReg {
-    TCG_REG_R0 = 0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15
-} TCGReg;
-
-#define TCG_TARGET_NB_REGS 16
-
-/* A list of relevant facilities used by this translator.  Some of these
-   are required for proper operation, and these are checked at startup.  */
-
-#define FACILITY_ZARCH_ACTIVE         (1ULL << (63 - 2))
-#define FACILITY_LONG_DISP            (1ULL << (63 - 18))
-#define FACILITY_EXT_IMM              (1ULL << (63 - 21))
-#define FACILITY_GEN_INST_EXT         (1ULL << (63 - 34))
-#define FACILITY_LOAD_ON_COND         (1ULL << (63 - 45))
-#define FACILITY_FAST_BCR_SER         FACILITY_LOAD_ON_COND
-#define FACILITY_DISTINCT_OPS         FACILITY_LOAD_ON_COND
-#define FACILITY_LOAD_ON_COND2        (1ULL << (63 - 53))
-
-extern uint64_t s390_facilities;
-
-/* optional instructions */
-#define TCG_TARGET_HAS_div2_i32       1
-#define TCG_TARGET_HAS_rot_i32        1
-#define TCG_TARGET_HAS_ext8s_i32      1
-#define TCG_TARGET_HAS_ext16s_i32     1
-#define TCG_TARGET_HAS_ext8u_i32      1
-#define TCG_TARGET_HAS_ext16u_i32     1
-#define TCG_TARGET_HAS_bswap16_i32    1
-#define TCG_TARGET_HAS_bswap32_i32    1
-#define TCG_TARGET_HAS_not_i32        0
-#define TCG_TARGET_HAS_neg_i32        1
-#define TCG_TARGET_HAS_andc_i32       0
-#define TCG_TARGET_HAS_orc_i32        0
-#define TCG_TARGET_HAS_eqv_i32        0
-#define TCG_TARGET_HAS_nand_i32       0
-#define TCG_TARGET_HAS_nor_i32        0
-#define TCG_TARGET_HAS_clz_i32        0
-#define TCG_TARGET_HAS_ctz_i32        0
-#define TCG_TARGET_HAS_ctpop_i32      0
-#define TCG_TARGET_HAS_deposit_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_extract_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_sextract_i32   0
-#define TCG_TARGET_HAS_extract2_i32   0
-#define TCG_TARGET_HAS_movcond_i32    1
-#define TCG_TARGET_HAS_add2_i32       1
-#define TCG_TARGET_HAS_sub2_i32       1
-#define TCG_TARGET_HAS_mulu2_i32      0
-#define TCG_TARGET_HAS_muls2_i32      0
-#define TCG_TARGET_HAS_muluh_i32      0
-#define TCG_TARGET_HAS_mulsh_i32      0
-#define TCG_TARGET_HAS_extrl_i64_i32  0
-#define TCG_TARGET_HAS_extrh_i64_i32  0
-#define TCG_TARGET_HAS_goto_ptr       1
-#define TCG_TARGET_HAS_direct_jump    (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_qemu_st8_i32   0
-
-#define TCG_TARGET_HAS_div2_i64       1
-#define TCG_TARGET_HAS_rot_i64        1
-#define TCG_TARGET_HAS_ext8s_i64      1
-#define TCG_TARGET_HAS_ext16s_i64     1
-#define TCG_TARGET_HAS_ext32s_i64     1
-#define TCG_TARGET_HAS_ext8u_i64      1
-#define TCG_TARGET_HAS_ext16u_i64     1
-#define TCG_TARGET_HAS_ext32u_i64     1
-#define TCG_TARGET_HAS_bswap16_i64    1
-#define TCG_TARGET_HAS_bswap32_i64    1
-#define TCG_TARGET_HAS_bswap64_i64    1
-#define TCG_TARGET_HAS_not_i64        0
-#define TCG_TARGET_HAS_neg_i64        1
-#define TCG_TARGET_HAS_andc_i64       0
-#define TCG_TARGET_HAS_orc_i64        0
-#define TCG_TARGET_HAS_eqv_i64        0
-#define TCG_TARGET_HAS_nand_i64       0
-#define TCG_TARGET_HAS_nor_i64        0
-#define TCG_TARGET_HAS_clz_i64        (s390_facilities & FACILITY_EXT_IMM)
-#define TCG_TARGET_HAS_ctz_i64        0
-#define TCG_TARGET_HAS_ctpop_i64      0
-#define TCG_TARGET_HAS_deposit_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_extract_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
-#define TCG_TARGET_HAS_sextract_i64   0
-#define TCG_TARGET_HAS_extract2_i64   0
-#define TCG_TARGET_HAS_movcond_i64    1
-#define TCG_TARGET_HAS_add2_i64       1
-#define TCG_TARGET_HAS_sub2_i64       1
-#define TCG_TARGET_HAS_mulu2_i64      1
-#define TCG_TARGET_HAS_muls2_i64      0
-#define TCG_TARGET_HAS_muluh_i64      0
-#define TCG_TARGET_HAS_mulsh_i64      0
-
-/* used for function call generation */
-#define TCG_REG_CALL_STACK		TCG_REG_R15
-#define TCG_TARGET_STACK_ALIGN		8
-#define TCG_TARGET_CALL_STACK_OFFSET	160
-
-#define TCG_TARGET_EXTEND_ARGS 1
-#define TCG_TARGET_HAS_MEMORY_BSWAP   1
-
-#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
-
-enum {
-    TCG_AREG0 = TCG_REG_R10,
-};
-
-static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
-                                            uintptr_t jmp_rw, uintptr_t addr)
-{
-    /* patch the branch destination */
-    intptr_t disp = addr - (jmp_rx - 2);
-    qatomic_set((int32_t *)jmp_rw, disp / 2);
-    /* no need to flush icache explicitly */
-}
-
-#ifdef CONFIG_SOFTMMU
-#define TCG_TARGET_NEED_LDST_LABELS
-#endif
-#define TCG_TARGET_NEED_POOL_LABELS
-
-#endif
diff --git a/tcg/s390/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
similarity index 86%
rename from tcg/s390/tcg-target-con-set.h
rename to tcg/s390x/tcg-target-con-set.h
index 31985e49036..426dd92e51a 100644
--- a/tcg/s390/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -13,13 +13,20 @@ C_O0_I1(r)
 C_O0_I2(L, L)
 C_O0_I2(r, r)
 C_O0_I2(r, ri)
+C_O0_I2(v, r)
 C_O1_I1(r, L)
 C_O1_I1(r, r)
+C_O1_I1(v, r)
+C_O1_I1(v, v)
+C_O1_I1(v, vr)
 C_O1_I2(r, 0, ri)
 C_O1_I2(r, 0, rI)
 C_O1_I2(r, 0, rJ)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, rZ, r)
+C_O1_I2(v, v, r)
+C_O1_I2(v, v, v)
+C_O1_I3(v, v, v, v)
 C_O1_I4(r, r, ri, r, 0)
 C_O1_I4(r, r, ri, rI, 0)
 C_O2_I2(b, a, 0, r)
diff --git a/tcg/s390/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
similarity index 96%
rename from tcg/s390/tcg-target-con-str.h
rename to tcg/s390x/tcg-target-con-str.h
index 892d8f8c069..8bb0358ae51 100644
--- a/tcg/s390/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -10,6 +10,7 @@
  */
 REGS('r', ALL_GENERAL_REGS)
 REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+REGS('v', ALL_VECTOR_REGS)
 /*
  * A (single) even/odd pair for division.
  * TODO: Add something to the register allocator to allow
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
new file mode 100644
index 00000000000..57e803e3395
--- /dev/null
+++ b/tcg/s390x/tcg-target.c.inc
@@ -0,0 +1,3411 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* We only support generating code for 64-bit mode.  */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
+#include "../tcg-pool.c.inc"
+#include "elf.h"
+
+/* ??? The translation blocks produced by TCG are generally small enough to
+   be entirely reachable with a 16-bit displacement.  Leaving the option for
+   a 32-bit displacement here Just In Case.  */
+#define USE_LONG_BRANCHES 0
+
+#define TCG_CT_CONST_S16   0x100
+#define TCG_CT_CONST_S32   0x200
+#define TCG_CT_CONST_S33   0x400
+#define TCG_CT_CONST_ZERO  0x800
+
+#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
+#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
+
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+
+/* Several places within the instruction set 0 means "no register"
+   rather than TCG_REG_R0.  */
+#define TCG_REG_NONE    0
+
+/* A scratch register that may be be used throughout the backend.  */
+#define TCG_TMP0        TCG_REG_R1
+
+/* A scratch register that holds a pointer to the beginning of the TB.
+   We don't need this when we have pc-relative loads with the general
+   instructions extension facility.  */
+#define TCG_REG_TB      TCG_REG_R12
+#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
+
+#ifndef CONFIG_SOFTMMU
+#define TCG_GUEST_BASE_REG TCG_REG_R13
+#endif
+
+/* All of the following instructions are prefixed with their instruction
+   format, and are defined as 8- or 16-bit quantities, even when the two
+   halves of the 16-bit quantity may appear 32 bits apart in the insn.
+   This makes it easy to copy the values from the tables in Appendix B.  */
+typedef enum S390Opcode {
+    RIL_AFI     = 0xc209,
+    RIL_AGFI    = 0xc208,
+    RIL_ALFI    = 0xc20b,
+    RIL_ALGFI   = 0xc20a,
+    RIL_BRASL   = 0xc005,
+    RIL_BRCL    = 0xc004,
+    RIL_CFI     = 0xc20d,
+    RIL_CGFI    = 0xc20c,
+    RIL_CLFI    = 0xc20f,
+    RIL_CLGFI   = 0xc20e,
+    RIL_CLRL    = 0xc60f,
+    RIL_CLGRL   = 0xc60a,
+    RIL_CRL     = 0xc60d,
+    RIL_CGRL    = 0xc608,
+    RIL_IIHF    = 0xc008,
+    RIL_IILF    = 0xc009,
+    RIL_LARL    = 0xc000,
+    RIL_LGFI    = 0xc001,
+    RIL_LGRL    = 0xc408,
+    RIL_LLIHF   = 0xc00e,
+    RIL_LLILF   = 0xc00f,
+    RIL_LRL     = 0xc40d,
+    RIL_MSFI    = 0xc201,
+    RIL_MSGFI   = 0xc200,
+    RIL_NIHF    = 0xc00a,
+    RIL_NILF    = 0xc00b,
+    RIL_OIHF    = 0xc00c,
+    RIL_OILF    = 0xc00d,
+    RIL_SLFI    = 0xc205,
+    RIL_SLGFI   = 0xc204,
+    RIL_XIHF    = 0xc006,
+    RIL_XILF    = 0xc007,
+
+    RI_AGHI     = 0xa70b,
+    RI_AHI      = 0xa70a,
+    RI_BRC      = 0xa704,
+    RI_CHI      = 0xa70e,
+    RI_CGHI     = 0xa70f,
+    RI_IIHH     = 0xa500,
+    RI_IIHL     = 0xa501,
+    RI_IILH     = 0xa502,
+    RI_IILL     = 0xa503,
+    RI_LGHI     = 0xa709,
+    RI_LLIHH    = 0xa50c,
+    RI_LLIHL    = 0xa50d,
+    RI_LLILH    = 0xa50e,
+    RI_LLILL    = 0xa50f,
+    RI_MGHI     = 0xa70d,
+    RI_MHI      = 0xa70c,
+    RI_NIHH     = 0xa504,
+    RI_NIHL     = 0xa505,
+    RI_NILH     = 0xa506,
+    RI_NILL     = 0xa507,
+    RI_OIHH     = 0xa508,
+    RI_OIHL     = 0xa509,
+    RI_OILH     = 0xa50a,
+    RI_OILL     = 0xa50b,
+
+    RIE_CGIJ    = 0xec7c,
+    RIE_CGRJ    = 0xec64,
+    RIE_CIJ     = 0xec7e,
+    RIE_CLGRJ   = 0xec65,
+    RIE_CLIJ    = 0xec7f,
+    RIE_CLGIJ   = 0xec7d,
+    RIE_CLRJ    = 0xec77,
+    RIE_CRJ     = 0xec76,
+    RIE_LOCGHI  = 0xec46,
+    RIE_RISBG   = 0xec55,
+
+    RRE_AGR     = 0xb908,
+    RRE_ALGR    = 0xb90a,
+    RRE_ALCR    = 0xb998,
+    RRE_ALCGR   = 0xb988,
+    RRE_CGR     = 0xb920,
+    RRE_CLGR    = 0xb921,
+    RRE_DLGR    = 0xb987,
+    RRE_DLR     = 0xb997,
+    RRE_DSGFR   = 0xb91d,
+    RRE_DSGR    = 0xb90d,
+    RRE_FLOGR   = 0xb983,
+    RRE_LGBR    = 0xb906,
+    RRE_LCGR    = 0xb903,
+    RRE_LGFR    = 0xb914,
+    RRE_LGHR    = 0xb907,
+    RRE_LGR     = 0xb904,
+    RRE_LLGCR   = 0xb984,
+    RRE_LLGFR   = 0xb916,
+    RRE_LLGHR   = 0xb985,
+    RRE_LRVR    = 0xb91f,
+    RRE_LRVGR   = 0xb90f,
+    RRE_LTGR    = 0xb902,
+    RRE_MLGR    = 0xb986,
+    RRE_MSGR    = 0xb90c,
+    RRE_MSR     = 0xb252,
+    RRE_NGR     = 0xb980,
+    RRE_OGR     = 0xb981,
+    RRE_SGR     = 0xb909,
+    RRE_SLGR    = 0xb90b,
+    RRE_SLBR    = 0xb999,
+    RRE_SLBGR   = 0xb989,
+    RRE_XGR     = 0xb982,
+
+    RRF_LOCR    = 0xb9f2,
+    RRF_LOCGR   = 0xb9e2,
+    RRF_NRK     = 0xb9f4,
+    RRF_NGRK    = 0xb9e4,
+    RRF_ORK     = 0xb9f6,
+    RRF_OGRK    = 0xb9e6,
+    RRF_SRK     = 0xb9f9,
+    RRF_SGRK    = 0xb9e9,
+    RRF_SLRK    = 0xb9fb,
+    RRF_SLGRK   = 0xb9eb,
+    RRF_XRK     = 0xb9f7,
+    RRF_XGRK    = 0xb9e7,
+
+    RR_AR       = 0x1a,
+    RR_ALR      = 0x1e,
+    RR_BASR     = 0x0d,
+    RR_BCR      = 0x07,
+    RR_CLR      = 0x15,
+    RR_CR       = 0x19,
+    RR_DR       = 0x1d,
+    RR_LCR      = 0x13,
+    RR_LR       = 0x18,
+    RR_LTR      = 0x12,
+    RR_NR       = 0x14,
+    RR_OR       = 0x16,
+    RR_SR       = 0x1b,
+    RR_SLR      = 0x1f,
+    RR_XR       = 0x17,
+
+    RSY_RLL     = 0xeb1d,
+    RSY_RLLG    = 0xeb1c,
+    RSY_SLLG    = 0xeb0d,
+    RSY_SLLK    = 0xebdf,
+    RSY_SRAG    = 0xeb0a,
+    RSY_SRAK    = 0xebdc,
+    RSY_SRLG    = 0xeb0c,
+    RSY_SRLK    = 0xebde,
+
+    RS_SLL      = 0x89,
+    RS_SRA      = 0x8a,
+    RS_SRL      = 0x88,
+
+    RXY_AG      = 0xe308,
+    RXY_AY      = 0xe35a,
+    RXY_CG      = 0xe320,
+    RXY_CLG     = 0xe321,
+    RXY_CLY     = 0xe355,
+    RXY_CY      = 0xe359,
+    RXY_LAY     = 0xe371,
+    RXY_LB      = 0xe376,
+    RXY_LG      = 0xe304,
+    RXY_LGB     = 0xe377,
+    RXY_LGF     = 0xe314,
+    RXY_LGH     = 0xe315,
+    RXY_LHY     = 0xe378,
+    RXY_LLGC    = 0xe390,
+    RXY_LLGF    = 0xe316,
+    RXY_LLGH    = 0xe391,
+    RXY_LMG     = 0xeb04,
+    RXY_LRV     = 0xe31e,
+    RXY_LRVG    = 0xe30f,
+    RXY_LRVH    = 0xe31f,
+    RXY_LY      = 0xe358,
+    RXY_NG      = 0xe380,
+    RXY_OG      = 0xe381,
+    RXY_STCY    = 0xe372,
+    RXY_STG     = 0xe324,
+    RXY_STHY    = 0xe370,
+    RXY_STMG    = 0xeb24,
+    RXY_STRV    = 0xe33e,
+    RXY_STRVG   = 0xe32f,
+    RXY_STRVH   = 0xe33f,
+    RXY_STY     = 0xe350,
+    RXY_XG      = 0xe382,
+
+    RX_A        = 0x5a,
+    RX_C        = 0x59,
+    RX_L        = 0x58,
+    RX_LA       = 0x41,
+    RX_LH       = 0x48,
+    RX_ST       = 0x50,
+    RX_STC      = 0x42,
+    RX_STH      = 0x40,
+
+    VRIa_VGBM   = 0xe744,
+    VRIa_VREPI  = 0xe745,
+    VRIb_VGM    = 0xe746,
+    VRIc_VREP   = 0xe74d,
+
+    VRRa_VLC    = 0xe7de,
+    VRRa_VLP    = 0xe7df,
+    VRRa_VLR    = 0xe756,
+    VRRc_VA     = 0xe7f3,
+    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
+    VRRc_VCH    = 0xe7fb,   /* " */
+    VRRc_VCHL   = 0xe7f9,   /* " */
+    VRRc_VERLLV = 0xe773,
+    VRRc_VESLV  = 0xe770,
+    VRRc_VESRAV = 0xe77a,
+    VRRc_VESRLV = 0xe778,
+    VRRc_VML    = 0xe7a2,
+    VRRc_VMN    = 0xe7fe,
+    VRRc_VMNL   = 0xe7fc,
+    VRRc_VMX    = 0xe7ff,
+    VRRc_VMXL   = 0xe7fd,
+    VRRc_VN     = 0xe768,
+    VRRc_VNC    = 0xe769,
+    VRRc_VNO    = 0xe76b,
+    VRRc_VO     = 0xe76a,
+    VRRc_VOC    = 0xe76f,
+    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
+    VRRc_VS     = 0xe7f7,
+    VRRa_VUPH   = 0xe7d7,
+    VRRa_VUPL   = 0xe7d6,
+    VRRc_VX     = 0xe76d,
+    VRRe_VSEL   = 0xe78d,
+    VRRf_VLVGP  = 0xe762,
+
+    VRSa_VERLL  = 0xe733,
+    VRSa_VESL   = 0xe730,
+    VRSa_VESRA  = 0xe73a,
+    VRSa_VESRL  = 0xe738,
+    VRSb_VLVG   = 0xe722,
+    VRSc_VLGV   = 0xe721,
+
+    VRX_VL      = 0xe706,
+    VRX_VLLEZ   = 0xe704,
+    VRX_VLREP   = 0xe705,
+    VRX_VST     = 0xe70e,
+    VRX_VSTEF   = 0xe70b,
+    VRX_VSTEG   = 0xe70a,
+
+    NOP         = 0x0707,
+} S390Opcode;
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
+    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
+};
+#endif
+
+/* Since R6 is a potential argument register, choose it last of the
+   call-saved registers.  Likewise prefer the call-clobbered registers
+   in reverse order to maximize the chance of avoiding the arguments.  */
+static const int tcg_target_reg_alloc_order[] = {
+    /* Call saved registers.  */
+    TCG_REG_R13,
+    TCG_REG_R12,
+    TCG_REG_R11,
+    TCG_REG_R10,
+    TCG_REG_R9,
+    TCG_REG_R8,
+    TCG_REG_R7,
+    TCG_REG_R6,
+    /* Call clobbered registers.  */
+    TCG_REG_R14,
+    TCG_REG_R0,
+    TCG_REG_R1,
+    /* Argument registers, in reverse order of allocation.  */
+    TCG_REG_R5,
+    TCG_REG_R4,
+    TCG_REG_R3,
+    TCG_REG_R2,
+
+    /* V8-V15 are call saved, and omitted. */
+    TCG_REG_V0,
+    TCG_REG_V1,
+    TCG_REG_V2,
+    TCG_REG_V3,
+    TCG_REG_V4,
+    TCG_REG_V5,
+    TCG_REG_V6,
+    TCG_REG_V7,
+    TCG_REG_V16,
+    TCG_REG_V17,
+    TCG_REG_V18,
+    TCG_REG_V19,
+    TCG_REG_V20,
+    TCG_REG_V21,
+    TCG_REG_V22,
+    TCG_REG_V23,
+    TCG_REG_V24,
+    TCG_REG_V25,
+    TCG_REG_V26,
+    TCG_REG_V27,
+    TCG_REG_V28,
+    TCG_REG_V29,
+    TCG_REG_V30,
+    TCG_REG_V31,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+    TCG_REG_R2,
+    TCG_REG_R3,
+    TCG_REG_R4,
+    TCG_REG_R5,
+    TCG_REG_R6,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+    TCG_REG_R2,
+};
+
+#define S390_CC_EQ      8
+#define S390_CC_LT      4
+#define S390_CC_GT      2
+#define S390_CC_OV      1
+#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
+#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
+#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
+#define S390_CC_NEVER   0
+#define S390_CC_ALWAYS  15
+
+/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
+static const uint8_t tcg_cond_to_s390_cond[] = {
+    [TCG_COND_EQ]  = S390_CC_EQ,
+    [TCG_COND_NE]  = S390_CC_NE,
+    [TCG_COND_LT]  = S390_CC_LT,
+    [TCG_COND_LE]  = S390_CC_LE,
+    [TCG_COND_GT]  = S390_CC_GT,
+    [TCG_COND_GE]  = S390_CC_GE,
+    [TCG_COND_LTU] = S390_CC_LT,
+    [TCG_COND_LEU] = S390_CC_LE,
+    [TCG_COND_GTU] = S390_CC_GT,
+    [TCG_COND_GEU] = S390_CC_GE,
+};
+
+/* Condition codes that result from a LOAD AND TEST.  Here, we have no
+   unsigned instruction variation, however since the test is vs zero we
+   can re-map the outcomes appropriately.  */
+static const uint8_t tcg_cond_to_ltr_cond[] = {
+    [TCG_COND_EQ]  = S390_CC_EQ,
+    [TCG_COND_NE]  = S390_CC_NE,
+    [TCG_COND_LT]  = S390_CC_LT,
+    [TCG_COND_LE]  = S390_CC_LE,
+    [TCG_COND_GT]  = S390_CC_GT,
+    [TCG_COND_GE]  = S390_CC_GE,
+    [TCG_COND_LTU] = S390_CC_NEVER,
+    [TCG_COND_LEU] = S390_CC_EQ,
+    [TCG_COND_GTU] = S390_CC_NE,
+    [TCG_COND_GEU] = S390_CC_ALWAYS,
+};
+
+#ifdef CONFIG_SOFTMMU
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_SB]   = helper_ret_ldsb_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LESW] = helper_le_ldsw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LESL] = helper_le_ldsl_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BESW] = helper_be_ldsw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BESL] = helper_be_ldsl_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
+};
+
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
+};
+#endif
+
+static const tcg_insn_unit *tb_ret_addr;
+uint64_t s390_facilities[3];
+
+static inline bool is_general_reg(TCGReg r)
+{
+    return r <= TCG_REG_R15;
+}
+
+static inline bool is_vector_reg(TCGReg r)
+{
+    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
+}
+
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
+                        intptr_t value, intptr_t addend)
+{
+    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+    intptr_t pcrel2;
+    uint32_t old;
+
+    value += addend;
+    pcrel2 = (tcg_insn_unit *)value - src_rx;
+
+    switch (type) {
+    case R_390_PC16DBL:
+        if (pcrel2 == (int16_t)pcrel2) {
+            tcg_patch16(src_rw, pcrel2);
+            return true;
+        }
+        break;
+    case R_390_PC32DBL:
+        if (pcrel2 == (int32_t)pcrel2) {
+            tcg_patch32(src_rw, pcrel2);
+            return true;
+        }
+        break;
+    case R_390_20:
+        if (value == sextract64(value, 0, 20)) {
+            old = *(uint32_t *)src_rw & 0xf00000ff;
+            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
+            tcg_patch32(src_rw, old);
+            return true;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return false;
+}
+
+/* Test if a constant matches the constraint. */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    }
+
+    if (type == TCG_TYPE_I32) {
+        val = (int32_t)val;
+    }
+
+    /* The following are mutually exclusive.  */
+    if (ct & TCG_CT_CONST_S16) {
+        return val == (int16_t)val;
+    } else if (ct & TCG_CT_CONST_S32) {
+        return val == (int32_t)val;
+    } else if (ct & TCG_CT_CONST_S33) {
+        return val >= -0xffffffffll && val <= 0xffffffffll;
+    } else if (ct & TCG_CT_CONST_ZERO) {
+        return val == 0;
+    }
+
+    return 0;
+}
+
+/* Emit instructions according to the given instruction format.  */
+
+static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
+{
+    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
+                             TCGReg r1, TCGReg r2)
+{
+    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+                             TCGReg r1, TCGReg r2, int m3)
+{
+    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
+}
+
+static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
+                             int i2, int m3)
+{
+    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
+    tcg_out32(s, (i2 << 16) | (op & 0xff));
+}
+
+static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+    tcg_out16(s, op | (r1 << 4));
+    tcg_out32(s, i2);
+}
+
+static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
+                            TCGReg b2, TCGReg r3, int disp)
+{
+    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
+              | (disp & 0xfff));
+}
+
+static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
+                             TCGReg b2, TCGReg r3, int disp)
+{
+    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
+    tcg_out32(s, (op & 0xff) | (b2 << 28)
+              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
+}
+
+#define tcg_out_insn_RX   tcg_out_insn_RS
+#define tcg_out_insn_RXY  tcg_out_insn_RSY
+
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+    /*
+     * Shift bit 4 of each regno to its corresponding bit of RXB.
+     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
+     * is the left-shift of the 4th operand.
+     */
+    return ((v1 & 0x10) << (4 + 3))
+         | ((v2 & 0x10) << (4 + 2))
+         | ((v3 & 0x10) << (4 + 1))
+         | ((v4 & 0x10) << (4 + 0));
+}
+
+static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
+                              TCGReg v1, uint16_t i2, int m3)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+    tcg_out16(s, i2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
+                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
+                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+    tcg_out16(s, i2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
+                              TCGReg v1, TCGReg v2, int m3)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_vector_reg(v2));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
+                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_vector_reg(v2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+    tcg_out16(s, v3 << 12);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
+                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_vector_reg(v2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_debug_assert(is_vector_reg(v4));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+    tcg_out16(s, v3 << 12);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
+}
+
+static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
+                              TCGReg v1, TCGReg r2, TCGReg r3)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_general_reg(r2));
+    tcg_debug_assert(is_general_reg(r3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
+    tcg_out16(s, r3 << 12);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
+}
+
+static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
+                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(is_general_reg(b2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+    tcg_out16(s, b2 << 12 | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
+                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(is_general_reg(b2));
+    tcg_debug_assert(is_general_reg(r3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
+    tcg_out16(s, b2 << 12 | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
+                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+    tcg_debug_assert(is_general_reg(r1));
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(is_general_reg(b2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
+    tcg_out16(s, b2 << 12 | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
+                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(is_general_reg(x2));
+    tcg_debug_assert(is_general_reg(b2));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
+    tcg_out16(s, (b2 << 12) | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
+/* Emit an opcode with "type-checking" of the format.  */
+#define tcg_out_insn(S, FMT, OP, ...) \
+    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
+
+
+/* emit 64-bit shifts */
+static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
+                         TCGReg src, TCGReg sh_reg, int sh_imm)
+{
+    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
+}
+
+/* emit 32-bit shifts */
+static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
+                         TCGReg sh_reg, int sh_imm)
+{
+    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
+{
+    if (src == dst) {
+        return true;
+    }
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(is_general_reg(dst) && is_general_reg(src))) {
+            tcg_out_insn(s, RR, LR, dst, src);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_I64:
+        if (likely(is_general_reg(dst))) {
+            if (likely(is_general_reg(src))) {
+                tcg_out_insn(s, RRE, LGR, dst, src);
+            } else {
+                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
+            }
+            break;
+        } else if (is_general_reg(src)) {
+            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+    case TCG_TYPE_V128:
+        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+    return true;
+}
+
+static const S390Opcode lli_insns[4] = {
+    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
+};
+
+static bool maybe_out_small_movi(TCGContext *s, TCGType type,
+                                 TCGReg ret, tcg_target_long sval)
+{
+    tcg_target_ulong uval = sval;
+    int i;
+
+    if (type == TCG_TYPE_I32) {
+        uval = (uint32_t)sval;
+        sval = (int32_t)sval;
+    }
+
+    /* Try all 32-bit insns that can load it in one go.  */
+    if (sval >= -0x8000 && sval < 0x8000) {
+        tcg_out_insn(s, RI, LGHI, ret, sval);
+        return true;
+    }
+
+    for (i = 0; i < 4; i++) {
+        tcg_target_long mask = 0xffffull << i*16;
+        if ((uval & mask) == uval) {
+            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* load a register with an immediate value */
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+                             tcg_target_long sval, bool in_prologue)
+{
+    tcg_target_ulong uval;
+
+    /* Try all 32-bit insns that can load it in one go.  */
+    if (maybe_out_small_movi(s, type, ret, sval)) {
+        return;
+    }
+
+    uval = sval;
+    if (type == TCG_TYPE_I32) {
+        uval = (uint32_t)sval;
+        sval = (int32_t)sval;
+    }
+
+    /* Try all 48-bit insns that can load it in one go.  */
+    if (HAVE_FACILITY(EXT_IMM)) {
+        if (sval == (int32_t)sval) {
+            tcg_out_insn(s, RIL, LGFI, ret, sval);
+            return;
+        }
+        if (uval <= 0xffffffff) {
+            tcg_out_insn(s, RIL, LLILF, ret, uval);
+            return;
+        }
+        if ((uval & 0xffffffff) == 0) {
+            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
+            return;
+        }
+    }
+
+    /* Try for PC-relative address load.  For odd addresses,
+       attempt to use an offset from the start of the TB.  */
+    if ((sval & 1) == 0) {
+        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
+        if (off == (int32_t)off) {
+            tcg_out_insn(s, RIL, LARL, ret, off);
+            return;
+        }
+    } else if (USE_REG_TB && !in_prologue) {
+        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
+        if (off == sextract64(off, 0, 20)) {
+            /* This is certain to be an address within TB, and therefore
+               OFF will be negative; don't try RX_LA.  */
+            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
+            return;
+        }
+    }
+
+    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
+       that LLILL, LLIHL, LLILF above did not succeed, we know that
+       both insns are required.  */
+    if (uval <= 0xffffffff) {
+        tcg_out_insn(s, RI, LLILL, ret, uval);
+        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+        return;
+    }
+
+    /* Otherwise, stuff it in the constant pool.  */
+    if (HAVE_FACILITY(GEN_INST_EXT)) {
+        tcg_out_insn(s, RIL, LGRL, ret, 0);
+        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+    } else if (USE_REG_TB && !in_prologue) {
+        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
+        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
+                       tcg_tbrel_diff(s, NULL));
+    } else {
+        TCGReg base = ret ? ret : TCG_TMP0;
+        tcg_out_insn(s, RIL, LARL, base, 0);
+        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
+    }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long sval)
+{
+    tcg_out_movi_int(s, type, ret, sval, false);
+}
+
+/* Emit a load/store type instruction.  Inputs are:
+   DATA:     The register to be loaded or stored.
+   BASE+OFS: The effective address.
+   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
+   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
+
+static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
+                        TCGReg data, TCGReg base, TCGReg index,
+                        tcg_target_long ofs)
+{
+    if (ofs < -0x80000 || ofs >= 0x80000) {
+        /* Combine the low 20 bits of the offset with the actual load insn;
+           the high 44 bits must come from an immediate load.  */
+        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+        ofs = low;
+
+        /* If we were already given an index register, add it in.  */
+        if (index != TCG_REG_NONE) {
+            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+        }
+        index = TCG_TMP0;
+    }
+
+    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
+        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
+    } else {
+        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
+    }
+}
+
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
+                            TCGReg data, TCGReg base, TCGReg index,
+                            tcg_target_long ofs, int m3)
+{
+    if (ofs < 0 || ofs >= 0x1000) {
+        if (ofs >= -0x80000 && ofs < 0x80000) {
+            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
+            base = TCG_TMP0;
+            index = TCG_REG_NONE;
+            ofs = 0;
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
+            if (index != TCG_REG_NONE) {
+                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+            }
+            index = TCG_TMP0;
+            ofs = 0;
+        }
+    }
+    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
+}
+
+/* load data without address translation or endianness conversion */
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
+{
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(is_general_reg(data))) {
+            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(is_general_reg(data))) {
+            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
+{
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(is_general_reg(data))) {
+            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+        } else {
+            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
+        }
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(is_general_reg(data))) {
+            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                               TCGReg base, intptr_t ofs)
+{
+    return false;
+}
+
+/* load data from an absolute host address */
+static void tcg_out_ld_abs(TCGContext *s, TCGType type,
+                           TCGReg dest, const void *abs)
+{
+    intptr_t addr = (intptr_t)abs;
+
+    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
+        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
+        if (disp == (int32_t)disp) {
+            if (type == TCG_TYPE_I32) {
+                tcg_out_insn(s, RIL, LRL, dest, disp);
+            } else {
+                tcg_out_insn(s, RIL, LGRL, dest, disp);
+            }
+            return;
+        }
+    }
+    if (USE_REG_TB) {
+        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
+        if (disp == sextract64(disp, 0, 20)) {
+            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
+            return;
+        }
+    }
+
+    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
+    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
+}
+
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+                                 int msb, int lsb, int ofs, int z)
+{
+    /* Format RIE-f */
+    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
+static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+    if (HAVE_FACILITY(EXT_IMM)) {
+        tcg_out_insn(s, RRE, LGBR, dest, src);
+        return;
+    }
+
+    if (type == TCG_TYPE_I32) {
+        if (dest == src) {
+            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
+        } else {
+            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
+        }
+        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
+    } else {
+        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
+        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
+    }
+}
+
+static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+    if (HAVE_FACILITY(EXT_IMM)) {
+        tcg_out_insn(s, RRE, LLGCR, dest, src);
+        return;
+    }
+
+    if (dest == src) {
+        tcg_out_movi(s, type, TCG_TMP0, 0xff);
+        src = TCG_TMP0;
+    } else {
+        tcg_out_movi(s, type, dest, 0xff);
+    }
+    if (type == TCG_TYPE_I32) {
+        tcg_out_insn(s, RR, NR, dest, src);
+    } else {
+        tcg_out_insn(s, RRE, NGR, dest, src);
+    }
+}
+
+static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+    if (HAVE_FACILITY(EXT_IMM)) {
+        tcg_out_insn(s, RRE, LGHR, dest, src);
+        return;
+    }
+
+    if (type == TCG_TYPE_I32) {
+        if (dest == src) {
+            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
+        } else {
+            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
+        }
+        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
+    } else {
+        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
+        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
+    }
+}
+
+static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+    if (HAVE_FACILITY(EXT_IMM)) {
+        tcg_out_insn(s, RRE, LLGHR, dest, src);
+        return;
+    }
+
+    if (dest == src) {
+        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
+        src = TCG_TMP0;
+    } else {
+        tcg_out_movi(s, type, dest, 0xffff);
+    }
+    if (type == TCG_TYPE_I32) {
+        tcg_out_insn(s, RR, NR, dest, src);
+    } else {
+        tcg_out_insn(s, RRE, NGR, dest, src);
+    }
+}
+
+static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
+{
+    tcg_out_insn(s, RRE, LGFR, dest, src);
+}
+
+static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
+{
+    tcg_out_insn(s, RRE, LLGFR, dest, src);
+}
+
+/* Accept bit patterns like these:
+    0....01....1
+    1....10....0
+    1..10..01..1
+    0..01..10..0
+   Copied from gcc sources.  */
+static inline bool risbg_mask(uint64_t c)
+{
+    uint64_t lsb;
+    /* We don't change the number of transitions by inverting,
+       so make sure we start with the LSB zero.  */
+    if (c & 1) {
+        c = ~c;
+    }
+    /* Reject all zeros or all ones.  */
+    if (c == 0) {
+        return false;
+    }
+    /* Find the first transition.  */
+    lsb = c & -c;
+    /* Invert to look for a second transition.  */
+    c = ~c;
+    /* Erase the first transition.  */
+    c &= -lsb;
+    /* Find the second transition, if any.  */
+    lsb = c & -c;
+    /* Match if all the bits are 1's, or if c is zero.  */
+    return c == -lsb;
+}
+
+static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
+{
+    int msb, lsb;
+    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+        /* Achieve wraparound by swapping msb and lsb.  */
+        msb = 64 - ctz64(~val);
+        lsb = clz64(~val) - 1;
+    } else {
+        msb = clz64(val);
+        lsb = 63 - ctz64(val);
+    }
+    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
+}
+
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+    static const S390Opcode ni_insns[4] = {
+        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+    };
+    static const S390Opcode nif_insns[2] = {
+        RIL_NILF, RIL_NIHF
+    };
+    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
+    int i;
+
+    /* Look for the zero-extensions.  */
+    if ((val & valid) == 0xffffffff) {
+        tgen_ext32u(s, dest, dest);
+        return;
+    }
+    if (HAVE_FACILITY(EXT_IMM)) {
+        if ((val & valid) == 0xff) {
+            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+            return;
+        }
+        if ((val & valid) == 0xffff) {
+            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+            return;
+        }
+    }
+
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if (((val | ~valid) & mask) == mask) {
+            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            return;
+        }
+    }
+
+    /* Try all 48-bit insns that can perform it in one go.  */
+    if (HAVE_FACILITY(EXT_IMM)) {
+        for (i = 0; i < 2; i++) {
+            tcg_target_ulong mask = ~(0xffffffffull << i*32);
+            if (((val | ~valid) & mask) == mask) {
+                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                return;
+            }
+        }
+    }
+    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
+        tgen_andi_risbg(s, dest, dest, val);
+        return;
+    }
+
+    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
+    if (USE_REG_TB) {
+        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
+                           tcg_tbrel_diff(s, NULL));
+            return;
+        }
+    } else {
+        tcg_out_movi(s, type, TCG_TMP0, val);
+    }
+    if (type == TCG_TYPE_I32) {
+        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
+    } else {
+        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+    }
+}
+
+static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+    static const S390Opcode oi_insns[4] = {
+        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
+    };
+    static const S390Opcode oif_insns[2] = {
+        RIL_OILF, RIL_OIHF
+    };
+
+    int i;
+
+    /* Look for no-op.  */
+    if (unlikely(val == 0)) {
+        return;
+    }
+
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = (0xffffull << i*16);
+        if ((val & mask) != 0 && (val & ~mask) == 0) {
+            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+            return;
+        }
+    }
+
+    /* Try all 48-bit insns that can perform it in one go.  */
+    if (HAVE_FACILITY(EXT_IMM)) {
+        for (i = 0; i < 2; i++) {
+            tcg_target_ulong mask = (0xffffffffull << i*32);
+            if ((val & mask) != 0 && (val & ~mask) == 0) {
+                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
+                return;
+            }
+        }
+    }
+
+    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
+    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+        if (type == TCG_TYPE_I32) {
+            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
+        } else {
+            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
+        }
+    } else if (USE_REG_TB) {
+        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
+                       tcg_tbrel_diff(s, NULL));
+    } else {
+        /* Perform the OR via sequential modifications to the high and
+           low parts.  Do this via recursion to handle 16-bit vs 32-bit
+           masks in each half.  */
+        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
+        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
+        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
+    }
+}
+
+static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+    /* Try all 48-bit insns that can perform it in one go.  */
+    if (HAVE_FACILITY(EXT_IMM)) {
+        if ((val & 0xffffffff00000000ull) == 0) {
+            tcg_out_insn(s, RIL, XILF, dest, val);
+            return;
+        }
+        if ((val & 0x00000000ffffffffull) == 0) {
+            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
+            return;
+        }
+    }
+
+    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
+    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+        if (type == TCG_TYPE_I32) {
+            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
+        } else {
+            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
+        }
+    } else if (USE_REG_TB) {
+        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
+                       tcg_tbrel_diff(s, NULL));
+    } else {
+        /* Perform the xor by parts.  */
+        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
+        if (val & 0xffffffff) {
+            tcg_out_insn(s, RIL, XILF, dest, val);
+        }
+        if (val > 0xffffffff) {
+            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
+        }
+    }
+}
+
+static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
+                    TCGArg c2, bool c2const, bool need_carry)
+{
+    bool is_unsigned = is_unsigned_cond(c);
+    S390Opcode op;
+
+    if (c2const) {
+        if (c2 == 0) {
+            if (!(is_unsigned && need_carry)) {
+                if (type == TCG_TYPE_I32) {
+                    tcg_out_insn(s, RR, LTR, r1, r1);
+                } else {
+                    tcg_out_insn(s, RRE, LTGR, r1, r1);
+                }
+                return tcg_cond_to_ltr_cond[c];
+            }
+        }
+
+        if (!is_unsigned && c2 == (int16_t)c2) {
+            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
+            tcg_out_insn_RI(s, op, r1, c2);
+            goto exit;
+        }
+
+        if (HAVE_FACILITY(EXT_IMM)) {
+            if (type == TCG_TYPE_I32) {
+                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
+                tcg_out_insn_RIL(s, op, r1, c2);
+                goto exit;
+            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
+                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
+                tcg_out_insn_RIL(s, op, r1, c2);
+                goto exit;
+            }
+        }
+
+        /* Use the constant pool, but not for small constants.  */
+        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
+            c2 = TCG_TMP0;
+            /* fall through to reg-reg */
+        } else if (USE_REG_TB) {
+            if (type == TCG_TYPE_I32) {
+                op = (is_unsigned ? RXY_CLY : RXY_CY);
+                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
+                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
+                               4 - tcg_tbrel_diff(s, NULL));
+            } else {
+                op = (is_unsigned ? RXY_CLG : RXY_CG);
+                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
+                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
+                               tcg_tbrel_diff(s, NULL));
+            }
+            goto exit;
+        } else {
+            if (type == TCG_TYPE_I32) {
+                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
+                tcg_out_insn_RIL(s, op, r1, 0);
+                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
+                               s->code_ptr - 2, 2 + 4);
+            } else {
+                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
+                tcg_out_insn_RIL(s, op, r1, 0);
+                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
+            }
+            goto exit;
+        }
+    }
+
+    if (type == TCG_TYPE_I32) {
+        op = (is_unsigned ? RR_CLR : RR_CR);
+        tcg_out_insn_RR(s, op, r1, c2);
+    } else {
+        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
+        tcg_out_insn_RRE(s, op, r1, c2);
+    }
+
+ exit:
+    return tcg_cond_to_s390_cond[c];
+}
+
+static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
+                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
+{
+    int cc;
+    bool have_loc;
+
+    /* With LOC2, we can always emit the minimum 3 insns.  */
+    if (HAVE_FACILITY(LOAD_ON_COND2)) {
+        /* Emit: d = 0, d = (cc ? 1 : d).  */
+        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
+        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
+        return;
+    }
+
+    have_loc = HAVE_FACILITY(LOAD_ON_COND);
+
+    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
+ restart:
+    switch (cond) {
+    case TCG_COND_NE:
+        /* X != 0 is X > 0.  */
+        if (c2const && c2 == 0) {
+            cond = TCG_COND_GTU;
+        } else {
+            break;
+        }
+        /* fallthru */
+
+    case TCG_COND_GTU:
+    case TCG_COND_GT:
+        /* The result of a compare has CC=2 for GT and CC=3 unused.
+           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
+        tgen_cmp(s, type, cond, c1, c2, c2const, true);
+        tcg_out_movi(s, type, dest, 0);
+        tcg_out_insn(s, RRE, ALCGR, dest, dest);
+        return;
+
+    case TCG_COND_EQ:
+        /* X == 0 is X <= 0.  */
+        if (c2const && c2 == 0) {
+            cond = TCG_COND_LEU;
+        } else {
+            break;
+        }
+        /* fallthru */
+
+    case TCG_COND_LEU:
+    case TCG_COND_LE:
+        /* As above, but we're looking for borrow, or !carry.
+           The second insn computes d - d - borrow, or -1 for true
+           and 0 for false.  So we must mask to 1 bit afterward.  */
+        tgen_cmp(s, type, cond, c1, c2, c2const, true);
+        tcg_out_insn(s, RRE, SLBGR, dest, dest);
+        tgen_andi(s, type, dest, 1);
+        return;
+
+    case TCG_COND_GEU:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
+        if (c2const) {
+            if (have_loc) {
+                break;
+            }
+            tcg_out_movi(s, type, TCG_TMP0, c2);
+            c2 = c1;
+            c2const = 0;
+            c1 = TCG_TMP0;
+        } else {
+            TCGReg t = c1;
+            c1 = c2;
+            c2 = t;
+        }
+        cond = tcg_swap_cond(cond);
+        goto restart;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
+    if (have_loc) {
+        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
+        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
+        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
+    } else {
+        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
+        tcg_out_movi(s, type, dest, 1);
+        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+        tcg_out_movi(s, type, dest, 0);
+    }
+}
+
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+                         TCGReg c1, TCGArg c2, int c2const,
+                         TCGArg v3, int v3const)
+{
+    int cc;
+    if (HAVE_FACILITY(LOAD_ON_COND)) {
+        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
+        if (v3const) {
+            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
+        } else {
+            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
+        }
+    } else {
+        c = tcg_invert_cond(c);
+        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
+
+        /* Emit: if (cc) goto over; dest = r3; over:  */
+        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+        tcg_out_insn(s, RRE, LGR, dest, v3);
+    }
+}
+
+static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
+                     TCGArg a2, int a2const)
+{
+    /* Since this sets both R and R+1, we have no choice but to store the
+       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
+    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
+    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
+
+    if (a2const && a2 == 64) {
+        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
+    } else {
+        if (a2const) {
+            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
+        }
+        if (HAVE_FACILITY(LOAD_ON_COND)) {
+            /* Emit: if (one bit found) dest = r0.  */
+            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
+        } else {
+            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
+            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
+            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
+        }
+    }
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+                         int ofs, int len, int z)
+{
+    int lsb = (63 - ofs);
+    int msb = lsb - (len - 1);
+    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
+}
+
+static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
+                         int ofs, int len)
+{
+    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
+}
+
+static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
+{
+    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
+    if (off == (int16_t)off) {
+        tcg_out_insn(s, RI, BRC, cc, off);
+    } else if (off == (int32_t)off) {
+        tcg_out_insn(s, RIL, BRCL, cc, off);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
+    }
+}
+
+static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
+{
+    if (l->has_value) {
+        tgen_gotoi(s, cc, l->u.value_ptr);
+    } else if (USE_LONG_BRANCHES) {
+        tcg_out16(s, RIL_BRCL | (cc << 4));
+        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
+        s->code_ptr += 2;
+    } else {
+        tcg_out16(s, RI_BRC | (cc << 4));
+        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
+        s->code_ptr += 1;
+    }
+}
+
+static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
+                                TCGReg r1, TCGReg r2, TCGLabel *l)
+{
+    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
+    tcg_out16(s, 0);
+    tcg_out16(s, cc << 12 | (opc & 0xff));
+}
+
+static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
+                                    TCGReg r1, int i2, TCGLabel *l)
+{
+    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
+    tcg_out16(s, 0);
+    tcg_out16(s, (i2 << 8) | (opc & 0xff));
+}
+
+static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
+                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
+{
+    int cc;
+
+    if (HAVE_FACILITY(GEN_INST_EXT)) {
+        bool is_unsigned = is_unsigned_cond(c);
+        bool in_range;
+        S390Opcode opc;
+
+        cc = tcg_cond_to_s390_cond[c];
+
+        if (!c2const) {
+            opc = (type == TCG_TYPE_I32
+                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
+                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
+            tgen_compare_branch(s, opc, cc, r1, c2, l);
+            return;
+        }
+
+        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
+           If the immediate we've been given does not fit that range, we'll
+           fall back to separate compare and branch instructions using the
+           larger comparison range afforded by COMPARE IMMEDIATE.  */
+        if (type == TCG_TYPE_I32) {
+            if (is_unsigned) {
+                opc = RIE_CLIJ;
+                in_range = (uint32_t)c2 == (uint8_t)c2;
+            } else {
+                opc = RIE_CIJ;
+                in_range = (int32_t)c2 == (int8_t)c2;
+            }
+        } else {
+            if (is_unsigned) {
+                opc = RIE_CLGIJ;
+                in_range = (uint64_t)c2 == (uint8_t)c2;
+            } else {
+                opc = RIE_CGIJ;
+                in_range = (int64_t)c2 == (int8_t)c2;
+            }
+        }
+        if (in_range) {
+            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
+            return;
+        }
+    }
+
+    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
+    tgen_branch(s, cc, l);
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+{
+    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
+    if (off == (int32_t)off) {
+        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
+    }
+}
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
+                                   TCGReg base, TCGReg index, int disp)
+{
+    switch (opc & (MO_SSIZE | MO_BSWAP)) {
+    case MO_UB:
+        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
+        break;
+    case MO_SB:
+        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
+        break;
+
+    case MO_UW | MO_BSWAP:
+        /* swapped unsigned halfword load with upper bits zeroed */
+        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+        tgen_ext16u(s, TCG_TYPE_I64, data, data);
+        break;
+    case MO_UW:
+        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
+        break;
+
+    case MO_SW | MO_BSWAP:
+        /* swapped sign-extended halfword load */
+        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+        tgen_ext16s(s, TCG_TYPE_I64, data, data);
+        break;
+    case MO_SW:
+        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
+        break;
+
+    case MO_UL | MO_BSWAP:
+        /* swapped unsigned int load with upper bits zeroed */
+        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+        tgen_ext32u(s, data, data);
+        break;
+    case MO_UL:
+        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
+        break;
+
+    case MO_SL | MO_BSWAP:
+        /* swapped sign-extended int load */
+        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+        tgen_ext32s(s, data, data);
+        break;
+    case MO_SL:
+        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
+        break;
+
+    case MO_Q | MO_BSWAP:
+        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
+        break;
+    case MO_Q:
+        tcg_out_insn(s, RXY, LG, data, base, index, disp);
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
+                                   TCGReg base, TCGReg index, int disp)
+{
+    switch (opc & (MO_SIZE | MO_BSWAP)) {
+    case MO_UB:
+        if (disp >= 0 && disp < 0x1000) {
+            tcg_out_insn(s, RX, STC, data, base, index, disp);
+        } else {
+            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
+        }
+        break;
+
+    case MO_UW | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
+        break;
+    case MO_UW:
+        if (disp >= 0 && disp < 0x1000) {
+            tcg_out_insn(s, RX, STH, data, base, index, disp);
+        } else {
+            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
+        }
+        break;
+
+    case MO_UL | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
+        break;
+    case MO_UL:
+        if (disp >= 0 && disp < 0x1000) {
+            tcg_out_insn(s, RX, ST, data, base, index, disp);
+        } else {
+            tcg_out_insn(s, RXY, STY, data, base, index, disp);
+        }
+        break;
+
+    case MO_Q | MO_BSWAP:
+        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
+        break;
+    case MO_Q:
+        tcg_out_insn(s, RXY, STG, data, base, index, disp);
+        break;
+
+    default:
+        tcg_abort();
+    }
+}
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
+
+/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
+   addend into R2.  Returns a register with the santitized guest address.  */
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
+                               int mem_index, bool is_ld)
+{
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_mask = (1 << s_bits) - 1;
+    unsigned a_mask = (1 << a_bits) - 1;
+    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+    int ofs, a_off;
+    uint64_t tlb_mask;
+
+    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
+    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
+
+    /* For aligned accesses, we check the first byte and include the alignment
+       bits within the address.  For unaligned access, we check that we don't
+       cross pages using the address of the last byte of the access.  */
+    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
+    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
+    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
+        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+    } else {
+        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
+        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
+    }
+
+    if (is_ld) {
+        ofs = offsetof(CPUTLBEntry, addr_read);
+    } else {
+        ofs = offsetof(CPUTLBEntry, addr_write);
+    }
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+    } else {
+        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+    }
+
+    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
+                 offsetof(CPUTLBEntry, addend));
+
+    if (TARGET_LONG_BITS == 32) {
+        tgen_ext32u(s, TCG_REG_R3, addr_reg);
+        return TCG_REG_R3;
+    }
+    return addr_reg;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+                                TCGReg data, TCGReg addr,
+                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->oi = oi;
+    label->datalo_reg = data;
+    label->addrlo_reg = addr;
+    label->raddr = tcg_splitwx_to_rx(raddr);
+    label->label_ptr[0] = label_ptr;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+    TCGReg addr_reg = lb->addrlo_reg;
+    TCGReg data_reg = lb->datalo_reg;
+    MemOpIdx oi = lb->oi;
+    MemOp opc = get_memop(oi);
+
+    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
+                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
+        return false;
+    }
+
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
+    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+
+    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+    return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+    TCGReg addr_reg = lb->addrlo_reg;
+    TCGReg data_reg = lb->datalo_reg;
+    MemOpIdx oi = lb->oi;
+    MemOp opc = get_memop(oi);
+
+    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
+                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
+        return false;
+    }
+
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+    }
+    switch (opc & MO_SIZE) {
+    case MO_UB:
+        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    case MO_UW:
+        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    case MO_UL:
+        tgen_ext32u(s, TCG_REG_R4, data_reg);
+        break;
+    case MO_Q:
+        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+        break;
+    default:
+        tcg_abort();
+    }
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+    return true;
+}
+#else
+static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
+                                  TCGReg *index_reg, tcg_target_long *disp)
+{
+    if (TARGET_LONG_BITS == 32) {
+        tgen_ext32u(s, TCG_TMP0, *addr_reg);
+        *addr_reg = TCG_TMP0;
+    }
+    if (guest_base < 0x80000) {
+        *index_reg = TCG_REG_NONE;
+        *disp = guest_base;
+    } else {
+        *index_reg = TCG_GUEST_BASE_REG;
+        *disp = 0;
+    }
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+                            MemOpIdx oi)
+{
+    MemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+    unsigned mem_index = get_mmuidx(oi);
+    tcg_insn_unit *label_ptr;
+    TCGReg base_reg;
+
+    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
+
+    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+    label_ptr = s->code_ptr;
+    s->code_ptr += 1;
+
+    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+    TCGReg index_reg;
+    tcg_target_long disp;
+
+    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+                            MemOpIdx oi)
+{
+    MemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+    unsigned mem_index = get_mmuidx(oi);
+    tcg_insn_unit *label_ptr;
+    TCGReg base_reg;
+
+    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
+
+    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+    label_ptr = s->code_ptr;
+    s->code_ptr += 1;
+
+    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+    TCGReg index_reg;
+    tcg_target_long disp;
+
+    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+# define OP_32_64(x) \
+        case glue(glue(INDEX_op_,x),_i32): \
+        case glue(glue(INDEX_op_,x),_i64)
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                              const TCGArg args[TCG_MAX_OP_ARGS],
+                              const int const_args[TCG_MAX_OP_ARGS])
+{
+    S390Opcode op, op2;
+    TCGArg a0, a1, a2;
+
+    switch (opc) {
+    case INDEX_op_exit_tb:
+        /* Reuse the zeroing that exists for goto_ptr.  */
+        a0 = args[0];
+        if (a0 == 0) {
+            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
+            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+        }
+        break;
+
+    case INDEX_op_goto_tb:
+        a0 = args[0];
+        if (s->tb_jmp_insn_offset) {
+            /*
+             * branch displacement must be aligned for atomic patching;
+             * see if we need to add extra nop before branch
+             */
+            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
+                tcg_out16(s, NOP);
+            }
+            tcg_debug_assert(!USE_REG_TB);
+            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
+            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+            s->code_ptr += 2;
+        } else {
+            /* load address stored at s->tb_jmp_target_addr + a0 */
+            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
+                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
+            /* and go there */
+            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
+        }
+        set_jmp_reset_offset(s, a0);
+
+        /* For the unlinked path of goto_tb, we need to reset
+           TCG_REG_TB to the beginning of this TB.  */
+        if (USE_REG_TB) {
+            int ofs = -tcg_current_code_size(s);
+            /* All TB are restricted to 64KiB by unwind info. */
+            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
+            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
+                         TCG_REG_TB, TCG_REG_NONE, ofs);
+        }
+        break;
+
+    case INDEX_op_goto_ptr:
+        a0 = args[0];
+        if (USE_REG_TB) {
+            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
+        }
+        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
+        break;
+
+    OP_32_64(ld8u):
+        /* ??? LLC (RXY format) is only present with the extended-immediate
+           facility, whereas LLGC is always present.  */
+        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+
+    OP_32_64(ld8s):
+        /* ??? LB is no smaller than LGB, so no point to using it.  */
+        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+
+    OP_32_64(ld16u):
+        /* ??? LLH (RXY format) is only present with the extended-immediate
+           facility, whereas LLGH is always present.  */
+        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+
+    case INDEX_op_ld16s_i32:
+        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+
+    case INDEX_op_ld_i32:
+        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        break;
+
+    OP_32_64(st8):
+        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
+                    TCG_REG_NONE, args[2]);
+        break;
+
+    OP_32_64(st16):
+        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
+                    TCG_REG_NONE, args[2]);
+        break;
+
+    case INDEX_op_st_i32:
+        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        break;
+
+    case INDEX_op_add_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+        if (const_args[2]) {
+        do_addi_32:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AHI, a0, a2);
+                    break;
+                }
+                if (HAVE_FACILITY(EXT_IMM)) {
+                    tcg_out_insn(s, RIL, AFI, a0, a2);
+                    break;
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, AR, a0, a2);
+        } else {
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+        }
+        break;
+    case INDEX_op_sub_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+        if (const_args[2]) {
+            a2 = -a2;
+            goto do_addi_32;
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, SR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_and_i32:
+        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+            tgen_andi(s, TCG_TYPE_I32, a0, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, NR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_or_i32:
+        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+            tgen_ori(s, TCG_TYPE_I32, a0, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, OR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_xor_i32:
+        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+            tgen_xori(s, TCG_TYPE_I32, a0, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, XR, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_neg_i32:
+        tcg_out_insn(s, RR, LCR, args[0], args[1]);
+        break;
+
+    case INDEX_op_mul_i32:
+        if (const_args[2]) {
+            if ((int32_t)args[2] == (int16_t)args[2]) {
+                tcg_out_insn(s, RI, MHI, args[0], args[2]);
+            } else {
+                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+        }
+        break;
+
+    case INDEX_op_div2_i32:
+        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+        break;
+    case INDEX_op_divu2_i32:
+        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+        break;
+
+    case INDEX_op_shl_i32:
+        op = RS_SLL;
+        op2 = RSY_SLLK;
+    do_shift32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+        if (a0 == a1) {
+            if (const_args[2]) {
+                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
+            } else {
+                tcg_out_sh32(s, op, a0, a2, 0);
+            }
+        } else {
+            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
+            if (const_args[2]) {
+                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
+            } else {
+                tcg_out_sh64(s, op2, a0, a1, a2, 0);
+            }
+        }
+        break;
+    case INDEX_op_shr_i32:
+        op = RS_SRL;
+        op2 = RSY_SRLK;
+        goto do_shift32;
+    case INDEX_op_sar_i32:
+        op = RS_SRA;
+        op2 = RSY_SRAK;
+        goto do_shift32;
+
+    case INDEX_op_rotl_i32:
+        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
+        if (const_args[2]) {
+            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
+        } else {
+            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
+        }
+        break;
+    case INDEX_op_rotr_i32:
+        if (const_args[2]) {
+            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
+                         TCG_REG_NONE, (32 - args[2]) & 31);
+        } else {
+            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
+        }
+        break;
+
+    case INDEX_op_ext8s_i32:
+        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+        break;
+    case INDEX_op_ext16s_i32:
+        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+        break;
+    case INDEX_op_ext8u_i32:
+        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i32:
+        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
+        break;
+
+    case INDEX_op_bswap16_i32:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        tcg_out_insn(s, RRE, LRVR, a0, a1);
+        if (a2 & TCG_BSWAP_OS) {
+            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
+        } else {
+            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
+        }
+        break;
+    case INDEX_op_bswap16_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        tcg_out_insn(s, RRE, LRVGR, a0, a1);
+        if (a2 & TCG_BSWAP_OS) {
+            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
+        } else {
+            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
+        }
+        break;
+
+    case INDEX_op_bswap32_i32:
+        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+        break;
+    case INDEX_op_bswap32_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        tcg_out_insn(s, RRE, LRVR, a0, a1);
+        if (a2 & TCG_BSWAP_OS) {
+            tgen_ext32s(s, a0, a0);
+        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+            tgen_ext32u(s, a0, a0);
+        }
+        break;
+
+    case INDEX_op_add2_i32:
+        if (const_args[4]) {
+            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+        } else {
+            tcg_out_insn(s, RR, ALR, args[0], args[4]);
+        }
+        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i32:
+        if (const_args[4]) {
+            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+        } else {
+            tcg_out_insn(s, RR, SLR, args[0], args[4]);
+        }
+        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+        break;
+
+    case INDEX_op_br:
+        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
+        break;
+
+    case INDEX_op_brcond_i32:
+        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
+                    args[1], const_args[1], arg_label(args[3]));
+        break;
+    case INDEX_op_setcond_i32:
+        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
+                     args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i32:
+        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3], const_args[3]);
+        break;
+
+    case INDEX_op_qemu_ld_i32:
+        /* ??? Technically we can use a non-extending instruction.  */
+    case INDEX_op_qemu_ld_i64:
+        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_st_i64:
+        tcg_out_qemu_st(s, args[0], args[1], args[2]);
+        break;
+
+    case INDEX_op_ld16s_i64:
+        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+    case INDEX_op_ld32u_i64:
+        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+    case INDEX_op_ld32s_i64:
+        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
+        break;
+    case INDEX_op_ld_i64:
+        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        break;
+
+    case INDEX_op_st32_i64:
+        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        break;
+    case INDEX_op_st_i64:
+        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        break;
+
+    case INDEX_op_add_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[2]) {
+        do_addi_64:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AGHI, a0, a2);
+                    break;
+                }
+                if (HAVE_FACILITY(EXT_IMM)) {
+                    if (a2 == (int32_t)a2) {
+                        tcg_out_insn(s, RIL, AGFI, a0, a2);
+                        break;
+                    } else if (a2 == (uint32_t)a2) {
+                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
+                        break;
+                    } else if (-a2 == (uint32_t)-a2) {
+                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+                        break;
+                    }
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, AGR, a0, a2);
+        } else {
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+        }
+        break;
+    case INDEX_op_sub_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[2]) {
+            a2 = -a2;
+            goto do_addi_64;
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, SGR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_and_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_or_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+            tgen_ori(s, TCG_TYPE_I64, a0, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, OGR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
+        }
+        break;
+    case INDEX_op_xor_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[2]) {
+            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+            tgen_xori(s, TCG_TYPE_I64, a0, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, XGR, a0, a2);
+        } else {
+            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
+        }
+        break;
+
+    case INDEX_op_neg_i64:
+        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
+        break;
+    case INDEX_op_bswap64_i64:
+        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
+        break;
+
+    case INDEX_op_mul_i64:
+        if (const_args[2]) {
+            if (args[2] == (int16_t)args[2]) {
+                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+            } else {
+                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+        }
+        break;
+
+    case INDEX_op_div2_i64:
+        /* ??? We get an unnecessary sign-extension of the dividend
+           into R3 with this definition, but as we do in fact always
+           produce both quotient and remainder using INDEX_op_div_i64
+           instead requires jumping through even more hoops.  */
+        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+        break;
+    case INDEX_op_divu2_i64:
+        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+        break;
+    case INDEX_op_mulu2_i64:
+        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+        break;
+
+    case INDEX_op_shl_i64:
+        op = RSY_SLLG;
+    do_shift64:
+        if (const_args[2]) {
+            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
+        } else {
+            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
+        }
+        break;
+    case INDEX_op_shr_i64:
+        op = RSY_SRLG;
+        goto do_shift64;
+    case INDEX_op_sar_i64:
+        op = RSY_SRAG;
+        goto do_shift64;
+
+    case INDEX_op_rotl_i64:
+        if (const_args[2]) {
+            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+                         TCG_REG_NONE, args[2]);
+        } else {
+            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
+        }
+        break;
+    case INDEX_op_rotr_i64:
+        if (const_args[2]) {
+            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+                         TCG_REG_NONE, (64 - args[2]) & 63);
+        } else {
+            /* We can use the smaller 32-bit negate because only the
+               low 6 bits are examined for the rotate.  */
+            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
+        }
+        break;
+
+    case INDEX_op_ext8s_i64:
+        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+    case INDEX_op_ext16s_i64:
+        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_ext32s_i64:
+        tgen_ext32s(s, args[0], args[1]);
+        break;
+    case INDEX_op_ext8u_i64:
+        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+    case INDEX_op_ext16u_i64:
+        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_ext32u_i64:
+        tgen_ext32u(s, args[0], args[1]);
+        break;
+
+    case INDEX_op_add2_i64:
+        if (const_args[4]) {
+            if ((int64_t)args[4] >= 0) {
+                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+            } else {
+                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+        }
+        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i64:
+        if (const_args[4]) {
+            if ((int64_t)args[4] >= 0) {
+                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+            } else {
+                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
+            }
+        } else {
+            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+        }
+        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+        break;
+
+    case INDEX_op_brcond_i64:
+        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
+                    args[1], const_args[1], arg_label(args[3]));
+        break;
+    case INDEX_op_setcond_i64:
+        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
+                     args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i64:
+        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3], const_args[3]);
+        break;
+
+    OP_32_64(deposit):
+        a0 = args[0], a1 = args[1], a2 = args[2];
+        if (const_args[1]) {
+            tgen_deposit(s, a0, a2, args[3], args[4], 1);
+        } else {
+            /* Since we can't support "0Z" as a constraint, we allow a1 in
+               any register.  Fix things up as if a matching constraint.  */
+            if (a0 != a1) {
+                TCGType type = (opc == INDEX_op_deposit_i64);
+                if (a0 == a2) {
+                    tcg_out_mov(s, type, TCG_TMP0, a2);
+                    a2 = TCG_TMP0;
+                }
+                tcg_out_mov(s, type, a0, a1);
+            }
+            tgen_deposit(s, a0, a2, args[3], args[4], 0);
+        }
+        break;
+
+    OP_32_64(extract):
+        tgen_extract(s, args[0], args[1], args[2], args[3]);
+        break;
+
+    case INDEX_op_clz_i64:
+        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
+        break;
+
+    case INDEX_op_mb:
+        /* The host memory model is quite strong, we simply need to
+           serialize the instruction stream.  */
+        if (args[0] & TCG_MO_ST_LD) {
+            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
+        }
+        break;
+
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
+    default:
+        tcg_abort();
+    }
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+                            TCGReg dst, TCGReg src)
+{
+    if (is_general_reg(src)) {
+        /* Replicate general register into two MO_64. */
+        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
+        if (vece == MO_64) {
+            return true;
+        }
+    }
+
+    /*
+     * Recall that the "standard" integer, within a vector, is the
+     * rightmost element of the leftmost doubleword, a-la VLLEZ.
+     */
+    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
+    return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg dst, TCGReg base, intptr_t offset)
+{
+    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
+    return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+                             TCGReg dst, int64_t val)
+{
+    int i, mask, msb, lsb;
+
+    /* Look for int16_t elements.  */
+    if (vece <= MO_16 ||
+        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
+        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
+        return;
+    }
+
+    /* Look for bit masks.  */
+    if (vece == MO_32) {
+        if (risbg_mask((int32_t)val)) {
+            /* Handle wraparound by swapping msb and lsb.  */
+            if ((val & 0x80000001u) == 0x80000001u) {
+                msb = 32 - ctz32(~val);
+                lsb = clz32(~val) - 1;
+            } else {
+                msb = clz32(val);
+                lsb = 31 - ctz32(val);
+            }
+            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
+            return;
+        }
+    } else {
+        if (risbg_mask(val)) {
+            /* Handle wraparound by swapping msb and lsb.  */
+            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+                /* Handle wraparound by swapping msb and lsb.  */
+                msb = 64 - ctz64(~val);
+                lsb = clz64(~val) - 1;
+            } else {
+                msb = clz64(val);
+                lsb = 63 - ctz64(val);
+            }
+            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
+            return;
+        }
+    }
+
+    /* Look for all bytes 0x00 or 0xff.  */
+    for (i = mask = 0; i < 8; i++) {
+        uint8_t byte = val >> (i * 8);
+        if (byte == 0xff) {
+            mask |= 1 << i;
+        } else if (byte != 0) {
+            break;
+        }
+    }
+    if (i == 8) {
+        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
+        return;
+    }
+
+    /* Otherwise, stuff it in the constant pool.  */
+    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
+    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
+    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+                           unsigned vecl, unsigned vece,
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
+{
+    TCGType type = vecl + TCG_TYPE_V64;
+    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
+
+    switch (opc) {
+    case INDEX_op_ld_vec:
+        tcg_out_ld(s, type, a0, a1, a2);
+        break;
+    case INDEX_op_st_vec:
+        tcg_out_st(s, type, a0, a1, a2);
+        break;
+    case INDEX_op_dupm_vec:
+        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+        break;
+
+    case INDEX_op_abs_vec:
+        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
+        break;
+    case INDEX_op_neg_vec:
+        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
+        break;
+    case INDEX_op_not_vec:
+        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
+        break;
+
+    case INDEX_op_add_vec:
+        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
+        break;
+    case INDEX_op_sub_vec:
+        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
+        break;
+    case INDEX_op_and_vec:
+        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
+        break;
+    case INDEX_op_andc_vec:
+        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
+        break;
+    case INDEX_op_mul_vec:
+        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
+        break;
+    case INDEX_op_or_vec:
+        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
+        break;
+    case INDEX_op_orc_vec:
+        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
+        break;
+    case INDEX_op_xor_vec:
+        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
+        break;
+
+    case INDEX_op_shli_vec:
+        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_shri_vec:
+        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_sari_vec:
+        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_rotli_vec:
+        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_shls_vec:
+        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_shrs_vec:
+        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_sars_vec:
+        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_rotls_vec:
+        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_shlv_vec:
+        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_shrv_vec:
+        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_sarv_vec:
+        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_rotlv_vec:
+        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
+        break;
+
+    case INDEX_op_smin_vec:
+        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
+        break;
+    case INDEX_op_smax_vec:
+        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
+        break;
+    case INDEX_op_umin_vec:
+        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
+        break;
+    case INDEX_op_umax_vec:
+        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
+        break;
+
+    case INDEX_op_bitsel_vec:
+        tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
+        break;
+
+    case INDEX_op_cmp_vec:
+        switch ((TCGCond)args[3]) {
+        case TCG_COND_EQ:
+            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
+            break;
+        case TCG_COND_GT:
+            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
+            break;
+        case TCG_COND_GTU:
+            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+
+    case INDEX_op_s390_vuph_vec:
+        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vupl_vec:
+        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vpks_vec:
+        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
+        break;
+
+    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
+    default:
+        g_assert_not_reached();
+    }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+    switch (opc) {
+    case INDEX_op_abs_vec:
+    case INDEX_op_add_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_bitsel_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_rotli_vec:
+    case INDEX_op_rotls_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_sars_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shls_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_shrs_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_xor_vec:
+        return 1;
+    case INDEX_op_cmp_vec:
+    case INDEX_op_cmpsel_vec:
+    case INDEX_op_rotrv_vec:
+        return -1;
+    case INDEX_op_mul_vec:
+        return vece < MO_64;
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_sssub_vec:
+        return vece < MO_64 ? -1 : 0;
+    default:
+        return 0;
+    }
+}
+
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
+                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+    bool need_swap = false, need_inv = false;
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        break;
+    case TCG_COND_NE:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        need_inv = true;
+        break;
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        need_swap = true;
+        break;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+        need_swap = need_inv = true;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (need_inv) {
+        cond = tcg_invert_cond(cond);
+    }
+    if (need_swap) {
+        TCGv_vec t1;
+        t1 = v1, v1 = v2, v2 = t1;
+        cond = tcg_swap_cond(cond);
+    }
+
+    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
+              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
+
+    return need_inv;
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
+        tcg_gen_not_vec(vece, v0, v0);
+    }
+}
+
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+                              TCGv_vec c1, TCGv_vec c2,
+                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+    TCGv_vec t = tcg_temp_new_vec(type);
+
+    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+        /* Invert the sense of the compare by swapping arguments.  */
+        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
+    } else {
+        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
+    }
+    tcg_temp_free_vec(t);
+}
+
+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
+                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
+{
+    TCGv_vec h1 = tcg_temp_new_vec(type);
+    TCGv_vec h2 = tcg_temp_new_vec(type);
+    TCGv_vec l1 = tcg_temp_new_vec(type);
+    TCGv_vec l2 = tcg_temp_new_vec(type);
+
+    tcg_debug_assert (vece < MO_64);
+
+    /* Unpack with sign-extension. */
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
+
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
+
+    /* Arithmetic on a wider element size. */
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
+              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
+              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
+
+    /* Pack with saturation. */
+    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
+              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
+
+    tcg_temp_free_vec(h1);
+    tcg_temp_free_vec(h2);
+    tcg_temp_free_vec(l1);
+    tcg_temp_free_vec(l2);
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+                       TCGArg a0, ...)
+{
+    va_list va;
+    TCGv_vec v0, v1, v2, v3, v4, t0;
+
+    va_start(va, a0);
+    v0 = temp_tcgv_vec(arg_temp(a0));
+    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+
+    switch (opc) {
+    case INDEX_op_cmp_vec:
+        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+        break;
+
+    case INDEX_op_cmpsel_vec:
+        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+        break;
+
+    case INDEX_op_rotrv_vec:
+        t0 = tcg_temp_new_vec(type);
+        tcg_gen_neg_vec(vece, t0, v2);
+        tcg_gen_rotlv_vec(vece, v0, v1, t0);
+        tcg_temp_free_vec(t0);
+        break;
+
+    case INDEX_op_ssadd_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
+        break;
+    case INDEX_op_sssub_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+    va_end(va);
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+    switch (op) {
+    case INDEX_op_goto_ptr:
+        return C_O0_I1(r);
+
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld_i32:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
+        return C_O1_I1(r, r);
+
+    case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
+        return C_O0_I2(r, r);
+
+    case INDEX_op_add_i32:
+    case INDEX_op_add_i64:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i64:
+    case INDEX_op_rotl_i32:
+    case INDEX_op_rotl_i64:
+    case INDEX_op_rotr_i32:
+    case INDEX_op_rotr_i64:
+    case INDEX_op_clz_i64:
+    case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
+        return C_O1_I2(r, r, ri);
+
+    case INDEX_op_sub_i32:
+    case INDEX_op_sub_i64:
+    case INDEX_op_and_i32:
+    case INDEX_op_and_i64:
+    case INDEX_op_or_i32:
+    case INDEX_op_or_i64:
+    case INDEX_op_xor_i32:
+    case INDEX_op_xor_i64:
+        return (HAVE_FACILITY(DISTINCT_OPS)
+                ? C_O1_I2(r, r, ri)
+                : C_O1_I2(r, 0, ri));
+
+    case INDEX_op_mul_i32:
+        /* If we have the general-instruction-extensions, then we have
+           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
+           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
+        return (HAVE_FACILITY(GEN_INST_EXT)
+                ? C_O1_I2(r, 0, ri)
+                : C_O1_I2(r, 0, rI));
+
+    case INDEX_op_mul_i64:
+        return (HAVE_FACILITY(GEN_INST_EXT)
+                ? C_O1_I2(r, 0, rJ)
+                : C_O1_I2(r, 0, rI));
+
+    case INDEX_op_shl_i32:
+    case INDEX_op_shr_i32:
+    case INDEX_op_sar_i32:
+        return (HAVE_FACILITY(DISTINCT_OPS)
+                ? C_O1_I2(r, r, ri)
+                : C_O1_I2(r, 0, ri));
+
+    case INDEX_op_brcond_i32:
+    case INDEX_op_brcond_i64:
+        return C_O0_I2(r, ri);
+
+    case INDEX_op_bswap16_i32:
+    case INDEX_op_bswap16_i64:
+    case INDEX_op_bswap32_i32:
+    case INDEX_op_bswap32_i64:
+    case INDEX_op_bswap64_i64:
+    case INDEX_op_neg_i32:
+    case INDEX_op_neg_i64:
+    case INDEX_op_ext8s_i32:
+    case INDEX_op_ext8s_i64:
+    case INDEX_op_ext8u_i32:
+    case INDEX_op_ext8u_i64:
+    case INDEX_op_ext16s_i32:
+    case INDEX_op_ext16s_i64:
+    case INDEX_op_ext16u_i32:
+    case INDEX_op_ext16u_i64:
+    case INDEX_op_ext32s_i64:
+    case INDEX_op_ext32u_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extract_i32:
+    case INDEX_op_extract_i64:
+        return C_O1_I1(r, r);
+
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_ld_i64:
+        return C_O1_I1(r, L);
+    case INDEX_op_qemu_st_i64:
+    case INDEX_op_qemu_st_i32:
+        return C_O0_I2(L, L);
+
+    case INDEX_op_deposit_i32:
+    case INDEX_op_deposit_i64:
+        return C_O1_I2(r, rZ, r);
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
+        return (HAVE_FACILITY(LOAD_ON_COND2)
+                ? C_O1_I4(r, r, ri, rI, 0)
+                : C_O1_I4(r, r, ri, r, 0));
+
+    case INDEX_op_div2_i32:
+    case INDEX_op_div2_i64:
+    case INDEX_op_divu2_i32:
+    case INDEX_op_divu2_i64:
+        return C_O2_I3(b, a, 0, 1, r);
+
+    case INDEX_op_mulu2_i64:
+        return C_O2_I2(b, a, 0, r);
+
+    case INDEX_op_add2_i32:
+    case INDEX_op_sub2_i32:
+        return (HAVE_FACILITY(EXT_IMM)
+                ? C_O2_I4(r, r, 0, 1, ri, r)
+                : C_O2_I4(r, r, 0, 1, r, r));
+
+    case INDEX_op_add2_i64:
+    case INDEX_op_sub2_i64:
+        return (HAVE_FACILITY(EXT_IMM)
+                ? C_O2_I4(r, r, 0, 1, rA, r)
+                : C_O2_I4(r, r, 0, 1, r, r));
+
+    case INDEX_op_st_vec:
+        return C_O0_I2(v, r);
+    case INDEX_op_ld_vec:
+    case INDEX_op_dupm_vec:
+        return C_O1_I1(v, r);
+    case INDEX_op_dup_vec:
+        return C_O1_I1(v, vr);
+    case INDEX_op_abs_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_rotli_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_s390_vuph_vec:
+    case INDEX_op_s390_vupl_vec:
+        return C_O1_I1(v, v);
+    case INDEX_op_add_vec:
+    case INDEX_op_sub_vec:
+    case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_xor_vec:
+    case INDEX_op_cmp_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_s390_vpks_vec:
+        return C_O1_I2(v, v, v);
+    case INDEX_op_rotls_vec:
+    case INDEX_op_shls_vec:
+    case INDEX_op_shrs_vec:
+    case INDEX_op_sars_vec:
+        return C_O1_I2(v, v, r);
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(v, v, v, v);
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
+ * Some distros have fixed this up locally, others have not.
+ */
+#ifndef HWCAP_S390_VXRS
+#define HWCAP_S390_VXRS 2048
+#endif
+
+static void query_s390_facilities(void)
+{
+    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+
+    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
+       is present on all 64-bit systems, but let's check for it anyway.  */
+    if (hwcap & HWCAP_S390_STFLE) {
+        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
+        register void *r1 __asm__("1") = s390_facilities;
+
+        /* stfle 0(%r1) */
+        asm volatile(".word 0xb2b0,0x1000"
+                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
+    }
+
+    /*
+     * Use of vector registers requires os support beyond the facility bit.
+     * If the kernel does not advertise support, disable the facility bits.
+     * There is nothing else we currently care about in the 3rd word, so
+     * disable VECTOR with one store.
+     */
+    if (!(hwcap & HWCAP_S390_VXRS)) {
+        s390_facilities[2] = 0;
+    }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+    query_s390_facilities();
+
+    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
+    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
+    if (HAVE_FACILITY(VECTOR)) {
+        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
+    }
+
+    tcg_target_call_clobber_regs = 0;
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
+    /* The r6 register is technically call-saved, but it's also a parameter
+       register, so it can get killed by setup for the qemu_st helper.  */
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
+    /* The return register can be considered call-clobbered.  */
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
+
+    s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+    /* XXX many insns can't be used with R0, so we better avoid it for now */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+    if (USE_REG_TB) {
+        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
+    }
+}
+
+#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
+                           + TCG_STATIC_CALL_ARGS_SIZE           \
+                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+    /* stmg %r6,%r15,48(%r15) (save registers) */
+    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
+
+    /* aghi %r15,-frame_size */
+    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
+
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+#ifndef CONFIG_SOFTMMU
+    if (guest_base >= 0x80000) {
+        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    if (USE_REG_TB) {
+        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
+                    tcg_target_call_iarg_regs[1]);
+    }
+
+    /* br %r3 (go to TB) */
+    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
+
+    /*
+     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+     * and fall through to the rest of the epilogue.
+     */
+    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
+
+    /* TB epilogue */
+    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+
+    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+                 FRAME_SIZE + 48);
+
+    /* br %r14 (return) */
+    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    memset(p, 0x07, count * sizeof(tcg_insn_unit));
+}
+
+typedef struct {
+    DebugFrameHeader h;
+    uint8_t fde_def_cfa[4];
+    uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE  EM_S390
+
+static const DebugFrame debug_frame = {
+    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+    .h.cie.id = -1,
+    .h.cie.version = 1,
+    .h.cie.code_align = 1,
+    .h.cie.data_align = 8,                /* sleb128 8 */
+    .h.cie.return_column = TCG_REG_R14,
+
+    /* Total FDE size does not include the "len" member.  */
+    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+    .fde_def_cfa = {
+        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
+        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
+        (FRAME_SIZE >> 7)
+    },
+    .fde_reg_ofs = {
+        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
+        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
+        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
+        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
+        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
+        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
+        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
+        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
+        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
+    }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
new file mode 100644
index 00000000000..527ada0f63b
--- /dev/null
+++ b/tcg/s390x/tcg-target.h
@@ -0,0 +1,186 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef S390_TCG_TARGET_H
+#define S390_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+
+/* We have a +- 4GB range on the branches; leave some slop.  */
+#define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
+
+typedef enum TCGReg {
+    TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
+    TCG_REG_R4,  TCG_REG_R5,  TCG_REG_R6,  TCG_REG_R7,
+    TCG_REG_R8,  TCG_REG_R9,  TCG_REG_R10, TCG_REG_R11,
+    TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+
+    TCG_REG_V0 = 32, TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,
+    TCG_REG_V4,  TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,
+    TCG_REG_V8,  TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11,
+    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
+    TCG_AREG0 = TCG_REG_R10,
+    TCG_REG_CALL_STACK = TCG_REG_R15
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 64
+
+/* A list of relevant facilities used by this translator.  Some of these
+   are required for proper operation, and these are checked at startup.  */
+
+#define FACILITY_ZARCH_ACTIVE         2
+#define FACILITY_LONG_DISP            18
+#define FACILITY_EXT_IMM              21
+#define FACILITY_GEN_INST_EXT         34
+#define FACILITY_LOAD_ON_COND         45
+#define FACILITY_FAST_BCR_SER         FACILITY_LOAD_ON_COND
+#define FACILITY_DISTINCT_OPS         FACILITY_LOAD_ON_COND
+#define FACILITY_LOAD_ON_COND2        53
+#define FACILITY_VECTOR               129
+#define FACILITY_VECTOR_ENH1          135
+
+extern uint64_t s390_facilities[3];
+
+#define HAVE_FACILITY(X) \
+    ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32       1
+#define TCG_TARGET_HAS_rot_i32        1
+#define TCG_TARGET_HAS_ext8s_i32      1
+#define TCG_TARGET_HAS_ext16s_i32     1
+#define TCG_TARGET_HAS_ext8u_i32      1
+#define TCG_TARGET_HAS_ext16u_i32     1
+#define TCG_TARGET_HAS_bswap16_i32    1
+#define TCG_TARGET_HAS_bswap32_i32    1
+#define TCG_TARGET_HAS_not_i32        0
+#define TCG_TARGET_HAS_neg_i32        1
+#define TCG_TARGET_HAS_andc_i32       0
+#define TCG_TARGET_HAS_orc_i32        0
+#define TCG_TARGET_HAS_eqv_i32        0
+#define TCG_TARGET_HAS_nand_i32       0
+#define TCG_TARGET_HAS_nor_i32        0
+#define TCG_TARGET_HAS_clz_i32        0
+#define TCG_TARGET_HAS_ctz_i32        0
+#define TCG_TARGET_HAS_ctpop_i32      0
+#define TCG_TARGET_HAS_deposit_i32    HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i32    HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i32   0
+#define TCG_TARGET_HAS_extract2_i32   0
+#define TCG_TARGET_HAS_movcond_i32    1
+#define TCG_TARGET_HAS_add2_i32       1
+#define TCG_TARGET_HAS_sub2_i32       1
+#define TCG_TARGET_HAS_mulu2_i32      0
+#define TCG_TARGET_HAS_muls2_i32      0
+#define TCG_TARGET_HAS_muluh_i32      0
+#define TCG_TARGET_HAS_mulsh_i32      0
+#define TCG_TARGET_HAS_extrl_i64_i32  0
+#define TCG_TARGET_HAS_extrh_i64_i32  0
+#define TCG_TARGET_HAS_direct_jump    HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_qemu_st8_i32   0
+
+#define TCG_TARGET_HAS_div2_i64       1
+#define TCG_TARGET_HAS_rot_i64        1
+#define TCG_TARGET_HAS_ext8s_i64      1
+#define TCG_TARGET_HAS_ext16s_i64     1
+#define TCG_TARGET_HAS_ext32s_i64     1
+#define TCG_TARGET_HAS_ext8u_i64      1
+#define TCG_TARGET_HAS_ext16u_i64     1
+#define TCG_TARGET_HAS_ext32u_i64     1
+#define TCG_TARGET_HAS_bswap16_i64    1
+#define TCG_TARGET_HAS_bswap32_i64    1
+#define TCG_TARGET_HAS_bswap64_i64    1
+#define TCG_TARGET_HAS_not_i64        0
+#define TCG_TARGET_HAS_neg_i64        1
+#define TCG_TARGET_HAS_andc_i64       0
+#define TCG_TARGET_HAS_orc_i64        0
+#define TCG_TARGET_HAS_eqv_i64        0
+#define TCG_TARGET_HAS_nand_i64       0
+#define TCG_TARGET_HAS_nor_i64        0
+#define TCG_TARGET_HAS_clz_i64        HAVE_FACILITY(EXT_IMM)
+#define TCG_TARGET_HAS_ctz_i64        0
+#define TCG_TARGET_HAS_ctpop_i64      0
+#define TCG_TARGET_HAS_deposit_i64    HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i64    HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i64   0
+#define TCG_TARGET_HAS_extract2_i64   0
+#define TCG_TARGET_HAS_movcond_i64    1
+#define TCG_TARGET_HAS_add2_i64       1
+#define TCG_TARGET_HAS_sub2_i64       1
+#define TCG_TARGET_HAS_mulu2_i64      1
+#define TCG_TARGET_HAS_muls2_i64      0
+#define TCG_TARGET_HAS_muluh_i64      0
+#define TCG_TARGET_HAS_mulsh_i64      0
+
+#define TCG_TARGET_HAS_v64            HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v128           HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v256           0
+
+#define TCG_TARGET_HAS_andc_vec       1
+#define TCG_TARGET_HAS_orc_vec        HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_not_vec        1
+#define TCG_TARGET_HAS_neg_vec        1
+#define TCG_TARGET_HAS_abs_vec        1
+#define TCG_TARGET_HAS_roti_vec       1
+#define TCG_TARGET_HAS_rots_vec       1
+#define TCG_TARGET_HAS_rotv_vec       1
+#define TCG_TARGET_HAS_shi_vec        1
+#define TCG_TARGET_HAS_shs_vec        1
+#define TCG_TARGET_HAS_shv_vec        1
+#define TCG_TARGET_HAS_mul_vec        1
+#define TCG_TARGET_HAS_sat_vec        0
+#define TCG_TARGET_HAS_minmax_vec     1
+#define TCG_TARGET_HAS_bitsel_vec     1
+#define TCG_TARGET_HAS_cmpsel_vec     0
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN		8
+#define TCG_TARGET_CALL_STACK_OFFSET	160
+
+#define TCG_TARGET_EXTEND_ARGS 1
+#define TCG_TARGET_HAS_MEMORY_BSWAP   1
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+                                            uintptr_t jmp_rw, uintptr_t addr)
+{
+    /* patch the branch destination */
+    intptr_t disp = addr - (jmp_rx - 2);
+    qatomic_set((int32_t *)jmp_rw, disp / 2);
+    /* no need to flush icache explicitly */
+}
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
new file mode 100644
index 00000000000..0eb2350fb39
--- /dev/null
+++ b/tcg/s390x/tcg-target.opc.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2021 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion.  These will be
+ * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index c046d1cc609..9dd32ef95eb 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -294,12 +294,12 @@ static const int tcg_target_call_oarg_regs[] = {
 bool use_vis3_instructions;
 #endif
 
-static inline int check_fit_i64(int64_t val, unsigned int bits)
+static bool check_fit_i64(int64_t val, unsigned int bits)
 {
     return val == sextract64(val, 0, bits);
 }
 
-static inline int check_fit_i32(int32_t val, unsigned int bits)
+static bool check_fit_i32(int32_t val, unsigned int bits)
 {
     return val == sextract32(val, 0, bits);
 }
@@ -341,11 +341,8 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
 }
 
 /* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                         const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
 {
-    int ct = arg_ct->ct;
-
     if (ct & TCG_CT_CONST) {
         return 1;
     }
@@ -365,14 +362,19 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
     }
 }
 
-static inline void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
-                                 TCGReg rs2, int op)
+static void tcg_out_nop(TCGContext *s)
+{
+    tcg_out32(s, NOP);
+}
+
+static void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
+                          TCGReg rs2, int op)
 {
     tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
 }
 
-static inline void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
-                                  int32_t offset, int op)
+static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
+                           int32_t offset, int op)
 {
     tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
 }
@@ -384,8 +386,7 @@ static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
               | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
 }
 
-static inline bool tcg_out_mov(TCGContext *s, TCGType type,
-                               TCGReg ret, TCGReg arg)
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
 {
     if (ret != arg) {
         tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
@@ -393,12 +394,21 @@ static inline bool tcg_out_mov(TCGContext *s, TCGType type,
     return true;
 }
 
-static inline void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
+static void tcg_out_mov_delay(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+    if (ret != arg) {
+        tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+    } else {
+        tcg_out_nop(s);
+    }
+}
+
+static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
 {
     tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
 }
 
-static inline void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
+static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
 {
     tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
 }
@@ -473,14 +483,14 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
     }
 }
 
-static inline void tcg_out_movi(TCGContext *s, TCGType type,
-                                TCGReg ret, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long arg)
 {
     tcg_out_movi_int(s, type, ret, arg, false);
 }
 
-static inline void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
-                                   TCGReg a2, int op)
+static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
+                            TCGReg a2, int op)
 {
     tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
 }
@@ -497,20 +507,20 @@ static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
     }
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                       TCGReg arg1, intptr_t arg2)
 {
     tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                       TCGReg arg1, intptr_t arg2)
 {
     tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
 }
 
-static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
-                               TCGReg base, intptr_t ofs)
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs)
 {
     if (val == 0) {
         tcg_out_st(s, type, TCG_REG_G0, base, ofs);
@@ -530,12 +540,12 @@ static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg)
     tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff);
 }
 
-static inline void tcg_out_sety(TCGContext *s, TCGReg rs)
+static void tcg_out_sety(TCGContext *s, TCGReg rs)
 {
     tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
 }
 
-static inline void tcg_out_rdy(TCGContext *s, TCGReg rd)
+static void tcg_out_rdy(TCGContext *s, TCGReg rd)
 {
     tcg_out32(s, RDY | INSN_RD(rd));
 }
@@ -555,11 +565,6 @@ static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
                    uns ? ARITH_UDIV : ARITH_SDIV);
 }
 
-static inline void tcg_out_nop(TCGContext *s)
-{
-    tcg_out32(s, NOP);
-}
-
 static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
@@ -850,8 +855,8 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 }
 
 #ifdef CONFIG_SOFTMMU
-static const tcg_insn_unit *qemu_ld_trampoline[16];
-static const tcg_insn_unit *qemu_st_trampoline[16];
+static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
+static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
 
 static void emit_extend(TCGContext *s, TCGReg r, int op)
 {
@@ -878,7 +883,7 @@ static void emit_extend(TCGContext *s, TCGReg r, int op)
 
 static void build_trampolines(TCGContext *s)
 {
-    static void * const qemu_ld_helpers[16] = {
+    static void * const qemu_ld_helpers[] = {
         [MO_UB]   = helper_ret_ldub_mmu,
         [MO_SB]   = helper_ret_ldsb_mmu,
         [MO_LEUW] = helper_le_lduw_mmu,
@@ -890,7 +895,7 @@ static void build_trampolines(TCGContext *s)
         [MO_BEUL] = helper_be_ldul_mmu,
         [MO_BEQ]  = helper_be_ldq_mmu,
     };
-    static void * const qemu_st_helpers[16] = {
+    static void * const qemu_st_helpers[] = {
         [MO_UB]   = helper_ret_stb_mmu,
         [MO_LEUW] = helper_le_stw_mmu,
         [MO_LEUL] = helper_le_stl_mmu,
@@ -903,7 +908,7 @@ static void build_trampolines(TCGContext *s)
     int i;
     TCGReg ra;
 
-    for (i = 0; i < 16; ++i) {
+    for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
         if (qemu_ld_helpers[i] == NULL) {
             continue;
         }
@@ -931,7 +936,7 @@ static void build_trampolines(TCGContext *s)
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
     }
 
-    for (i = 0; i < 16; ++i) {
+    for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
         if (qemu_st_helpers[i] == NULL) {
             continue;
         }
@@ -1113,7 +1118,7 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
 }
 #endif /* CONFIG_SOFTMMU */
 
-static const int qemu_ld_opc[16] = {
+static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = LDUB,
     [MO_SB]   = LDSB,
 
@@ -1130,7 +1135,7 @@ static const int qemu_ld_opc[16] = {
     [MO_LEQ]  = LDX_LE,
 };
 
-static const int qemu_st_opc[16] = {
+static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_UB]   = STB,
 
     [MO_BEUW] = STH,
@@ -1143,7 +1148,7 @@ static const int qemu_st_opc[16] = {
 };
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
-                            TCGMemOpIdx oi, bool is_64)
+                            MemOpIdx oi, bool is_64)
 {
     MemOp memop = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
@@ -1225,7 +1230,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
-                            TCGMemOpIdx oi)
+                            MemOpIdx oi)
 {
     MemOp memop = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
@@ -1353,7 +1358,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_goto_ptr:
         tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
         if (USE_REG_TB) {
-            tcg_out_arith(s, TCG_REG_TB, a0, TCG_REG_G0, ARITH_OR);
+            tcg_out_mov_delay(s, TCG_REG_TB, a0);
         } else {
             tcg_out_nop(s);
         }
@@ -1693,8 +1698,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 
 static void tcg_target_init(TCGContext *s)
 {
-    /* Only probe for the platform and capabilities if we havn't already
-       determined maximum values at compile time.  */
+    /*
+     * Only probe for the platform and capabilities if we haven't already
+     * determined maximum values at compile time.
+     */
 #ifndef use_vis3_instructions
     {
         unsigned long hwcap = qemu_getauxval(AT_HWCAP);
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index f66f5d07dc8..c0507630495 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -30,6 +30,7 @@
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 
 typedef enum {
     TCG_REG_G0 = 0,
@@ -120,7 +121,6 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_goto_ptr         1
 #define TCG_TARGET_HAS_direct_jump      1
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
new file mode 100644
index 00000000000..92c91dcde97
--- /dev/null
+++ b/tcg/tcg-internal.h
@@ -0,0 +1,62 @@
+/*
+ * Internal declarations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_INTERNAL_H
+#define TCG_INTERNAL_H 1
+
+#define TCG_HIGHWATER 1024
+
+typedef struct TCGHelperInfo {
+    void *func;
+    const char *name;
+    unsigned flags;
+    unsigned typemask;
+} TCGHelperInfo;
+
+extern TCGContext tcg_init_ctx;
+extern TCGContext **tcg_ctxs;
+extern unsigned int tcg_cur_ctxs;
+extern unsigned int tcg_max_ctxs;
+
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus);
+bool tcg_region_alloc(TCGContext *s);
+void tcg_region_initial_alloc(TCGContext *s);
+void tcg_region_prologue_set(TCGContext *s);
+
+static inline void *tcg_call_func(TCGOp *op)
+{
+    return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op)];
+}
+
+static inline const TCGHelperInfo *tcg_call_info(TCGOp *op)
+{
+    return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
+}
+
+static inline unsigned tcg_call_flags(TCGOp *op)
+{
+    return tcg_call_info(op)->flags;
+}
+
+#endif /* TCG_INTERNAL_H */
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
index c3ce88e69d2..6c6848d034d 100644
--- a/tcg/tcg-ldst.c.inc
+++ b/tcg/tcg-ldst.c.inc
@@ -22,7 +22,7 @@
 
 typedef struct TCGLabelQemuLdst {
     bool is_ld;             /* qemu_ld: true, qemu_st: false */
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     TCGType type;           /* result type of a load */
     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index 498a959839f..ffe55e908f8 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -386,7 +386,7 @@ uint64_t (dup_const)(unsigned vece, uint64_t c)
 }
 
 /* Duplicate IN into OUT as per VECE.  */
-static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
 {
     switch (vece) {
     case MO_8:
@@ -404,7 +404,7 @@ static void gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
     }
 }
 
-static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
 {
     switch (vece) {
     case MO_8:
@@ -578,15 +578,15 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
                 && (vece != MO_32 || !check_size_impl(oprsz, 4))) {
                 t_64 = tcg_temp_new_i64();
                 tcg_gen_extu_i32_i64(t_64, in_32);
-                gen_dup_i64(vece, t_64, t_64);
+                tcg_gen_dup_i64(vece, t_64, t_64);
             } else {
                 t_32 = tcg_temp_new_i32();
-                gen_dup_i32(vece, t_32, in_32);
+                tcg_gen_dup_i32(vece, t_32, in_32);
             }
         } else if (in_64) {
             /* We are given a 64-bit variable input.  */
             t_64 = tcg_temp_new_i64();
-            gen_dup_i64(vece, t_64, in_64);
+            tcg_gen_dup_i64(vece, t_64, in_64);
         } else {
             /* We are given a constant input.  */
             /* For 64-bit hosts, use 64-bit constants for "simple" constants
@@ -1311,14 +1311,14 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
     } else if (g->fni8 && check_size_impl(oprsz, 8)) {
         TCGv_i64 t64 = tcg_temp_new_i64();
 
-        gen_dup_i64(g->vece, t64, c);
+        tcg_gen_dup_i64(g->vece, t64, c);
         expand_2s_i64(dofs, aofs, oprsz, t64, g->scalar_first, g->fni8);
         tcg_temp_free_i64(t64);
     } else if (g->fni4 && check_size_impl(oprsz, 4)) {
         TCGv_i32 t32 = tcg_temp_new_i32();
 
         tcg_gen_extrl_i64_i32(t32, c);
-        gen_dup_i32(g->vece, t32, t32);
+        tcg_gen_dup_i32(g->vece, t32, t32);
         expand_2s_i32(dofs, aofs, oprsz, t32, g->scalar_first, g->fni4);
         tcg_temp_free_i32(t32);
     } else {
@@ -1736,12 +1736,45 @@ void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_andc_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_xor_i32(t3, a, b);
+    tcg_gen_add_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
     gen_addv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, a, ~0xffff);
+    tcg_gen_add_i32(t2, a, b);
+    tcg_gen_add_i32(t1, t1, b);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -1886,12 +1919,45 @@ void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
+
+    tcg_gen_or_i32(t1, a, m);
+    tcg_gen_andc_i32(t2, b, m);
+    tcg_gen_eqv_i32(t3, a, b);
+    tcg_gen_sub_i32(d, t1, t2);
+    tcg_gen_and_i32(t3, t3, m);
+    tcg_gen_xor_i32(d, d, t3);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+}
+
 void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
     gen_subv_mask(d, a, b, m);
 }
 
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    tcg_gen_andi_i32(t1, b, ~0xffff);
+    tcg_gen_sub_i32(t2, a, b);
+    tcg_gen_sub_i32(t1, a, t1);
+    tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
 void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
 {
     TCGv_i64 t1 = tcg_temp_new_i64();
@@ -2538,7 +2604,7 @@ void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
                        TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
-    gen_dup_i64(vece, tmp, c);
+    tcg_gen_dup_i64(vece, tmp, c);
     tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
     tcg_temp_free_i64(tmp);
 }
@@ -2562,7 +2628,7 @@ void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
                        TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
-    gen_dup_i64(vece, tmp, c);
+    tcg_gen_dup_i64(vece, tmp, c);
     tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
     tcg_temp_free_i64(tmp);
 }
@@ -2586,7 +2652,7 @@ void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
                       TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
 {
     TCGv_i64 tmp = tcg_temp_new_i64();
-    gen_dup_i64(vece, tmp, c);
+    tcg_gen_dup_i64(vece, tmp, c);
     tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
     tcg_temp_free_i64(tmp);
 }
@@ -2612,6 +2678,20 @@ void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff << c);
+    tcg_gen_shli_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2663,6 +2743,20 @@ void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_gen_andi_i64(d, d, mask);
 }
 
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_8, 0xff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t mask = dup_const(MO_16, 0xffff >> c);
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(d, d, mask);
+}
+
 void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
@@ -2728,6 +2822,34 @@ void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
     tcg_temp_free_i64(s);
 }
 
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
+    uint32_t c_mask = dup_const(MO_8, 0xff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+    uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
+    uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
+    TCGv_i32 s = tcg_temp_new_i32();
+
+    tcg_gen_shri_i32(d, a, c);
+    tcg_gen_andi_i32(s, d, s_mask);  /* isolate (shifted) sign bit */
+    tcg_gen_andi_i32(d, d, c_mask);  /* clear out bits above sign  */
+    tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+    tcg_gen_or_i32(d, d, s);         /* include sign extension */
+    tcg_temp_free_i32(s);
+}
+
 void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
                        int64_t shift, uint32_t oprsz, uint32_t maxsz)
 {
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index d19aa7373ea..faf30f9cdd3 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -18,7 +18,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
 #include "tcg/tcg-mo.h"
@@ -120,6 +119,18 @@ bool tcg_can_emit_vecop_list(const TCGOpcode *list,
                 continue;
             }
             break;
+        case INDEX_op_usadd_vec:
+            if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
+                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+                continue;
+            }
+            break;
+        case INDEX_op_ussub_vec:
+            if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
+                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+                continue;
+            }
+            break;
         case INDEX_op_cmpsel_vec:
         case INDEX_op_smin_vec:
         case INDEX_op_smax_vec:
@@ -604,7 +615,18 @@ void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 
 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 {
-    do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
+    if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+        TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+        /* usadd(a, b) = min(a, ~b) + b */
+        tcg_gen_not_vec(vece, t, b);
+        tcg_gen_umin_vec(vece, t, t, a);
+        tcg_gen_add_vec(vece, r, t, b);
+
+        tcg_temp_free_vec(t);
+        tcg_swap_vecop_list(hold_list);
+    }
 }
 
 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
@@ -614,7 +636,17 @@ void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 
 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 {
-    do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
+    if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
+        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+        TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+        /* ussub(a, b) = max(a, b) - b */
+        tcg_gen_umax_vec(vece, t, a, b);
+        tcg_gen_sub_vec(vece, r, t, b);
+
+        tcg_temp_free_vec(t);
+        tcg_swap_vecop_list(hold_list);
+    }
 }
 
 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 4f12655866f..07ed751befe 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -23,13 +23,11 @@
  */
 
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
 #include "tcg/tcg-mo.h"
 #include "trace-tcg.h"
-#include "trace/mem.h"
 #include "exec/plugin-gen.h"
 #include "exec/log_instr.h"
 #include "cheri_defs.h"
@@ -1018,26 +1016,42 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
     }
 }
 
-/* Note: we assume the two high bytes are set to zero */
-void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg)
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_HAS_bswap16_i32) {
-        tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg);
+        tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
 
-        tcg_gen_ext8u_i32(t0, arg);
-        tcg_gen_shli_i32(t0, t0, 8);
-        tcg_gen_shri_i32(ret, arg, 8);
-        tcg_gen_or_i32(ret, ret, t0);
+        tcg_gen_shri_i32(t0, arg, 8);
+        if (!(flags & TCG_BSWAP_IZ)) {
+            tcg_gen_ext8u_i32(t0, t0);
+        }
+
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_shli_i32(t1, arg, 24);
+            tcg_gen_sari_i32(t1, t1, 16);
+        } else if (flags & TCG_BSWAP_OZ) {
+            tcg_gen_ext8u_i32(t1, arg);
+            tcg_gen_shli_i32(t1, t1, 8);
+        } else {
+            tcg_gen_shli_i32(t1, arg, 8);
+        }
+
+        tcg_gen_or_i32(ret, t0, t1);
         tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
     }
 }
 
 void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
     if (TCG_TARGET_HAS_bswap32_i32) {
-        tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg);
+        tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
     } else {
         TCGv_i32 t0 = tcg_temp_new_i32();
         TCGv_i32 t1 = tcg_temp_new_i32();
@@ -1677,49 +1691,79 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
     }
 }
 
-/* Note: we assume the six high bytes are set to zero */
-void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg)
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_REG_BITS == 32) {
-        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg));
-        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        }
     } else if (TCG_TARGET_HAS_bswap16_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+
+        tcg_gen_shri_i64(t0, arg, 8);
+        if (!(flags & TCG_BSWAP_IZ)) {
+            tcg_gen_ext8u_i64(t0, t0);
+        }
 
-        tcg_gen_ext8u_i64(t0, arg);
-        tcg_gen_shli_i64(t0, t0, 8);
-        tcg_gen_shri_i64(ret, arg, 8);
-        tcg_gen_or_i64(ret, ret, t0);
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_shli_i64(t1, arg, 56);
+            tcg_gen_sari_i64(t1, t1, 48);
+        } else if (flags & TCG_BSWAP_OZ) {
+            tcg_gen_ext8u_i64(t1, arg);
+            tcg_gen_shli_i64(t1, t1, 8);
+        } else {
+            tcg_gen_shli_i64(t1, arg, 8);
+        }
+
+        tcg_gen_or_i64(ret, t0, t1);
         tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
     }
 }
 
-/* Note: we assume the four high bytes are set to zero */
-void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg)
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
 {
+    /* Only one extension flag may be present. */
+    tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
     if (TCG_TARGET_REG_BITS == 32) {
         tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
-        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+        } else {
+            tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+        }
     } else if (TCG_TARGET_HAS_bswap32_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
         TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
 
-                                        /* arg = ....abcd */
-        tcg_gen_shri_i64(t0, arg, 8);   /*  t0 = .....abc */
-        tcg_gen_and_i64(t1, arg, t2);   /*  t1 = .....b.d */
-        tcg_gen_and_i64(t0, t0, t2);    /*  t0 = .....a.c */
-        tcg_gen_shli_i64(t1, t1, 8);    /*  t1 = ....b.d. */
-        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....badc */
-
-        tcg_gen_shli_i64(t1, ret, 48);  /*  t1 = dc...... */
-        tcg_gen_shri_i64(t0, ret, 16);  /*  t0 = ......ba */
-        tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
-        tcg_gen_or_i64(ret, t0, t1);    /* ret = ....dcba */
+                                            /* arg = xxxxabcd */
+        tcg_gen_shri_i64(t0, arg, 8);       /*  t0 = .xxxxabc */
+        tcg_gen_and_i64(t1, arg, t2);       /*  t1 = .....b.d */
+        tcg_gen_and_i64(t0, t0, t2);        /*  t0 = .....a.c */
+        tcg_gen_shli_i64(t1, t1, 8);        /*  t1 = ....b.d. */
+        tcg_gen_or_i64(ret, t0, t1);        /* ret = ....badc */
+
+        tcg_gen_shli_i64(t1, ret, 48);      /*  t1 = dc...... */
+        tcg_gen_shri_i64(t0, ret, 16);      /*  t0 = ......ba */
+        if (flags & TCG_BSWAP_OS) {
+            tcg_gen_sari_i64(t1, t1, 32);   /*  t1 = ssssdc.. */
+        } else {
+            tcg_gen_shri_i64(t1, t1, 32);   /*  t1 = ....dc.. */
+        }
+        tcg_gen_or_i64(ret, t0, t1);        /* ret = ssssdcba */
 
         tcg_temp_free_i64(t0);
         tcg_temp_free_i64(t1);
@@ -1740,7 +1784,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
         tcg_temp_free_i32(t0);
         tcg_temp_free_i32(t1);
     } else if (TCG_TARGET_HAS_bswap64_i64) {
-        tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg);
+        tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
     } else {
         TCGv_i64 t0 = tcg_temp_new_i64();
         TCGv_i64 t1 = tcg_temp_new_i64();
@@ -2700,10 +2744,6 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
            seen this numbered exit before, via tcg_gen_goto_tb.  */
         tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
 #endif
-        /* When not chaining, exit without indicating a link.  */
-        if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-            val = 0;
-        }
     } else {
         /* This is an exit via the exitreq label.  */
         tcg_debug_assert(idx == TB_EXIT_REQUESTED);
@@ -2715,39 +2755,44 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
 
 void tcg_gen_goto_tb(unsigned idx)
 {
+    /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
+    tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
     /* We only support two chained exits.  */
     tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
 #ifdef CONFIG_DEBUG_TCG
-    /* Verify that we havn't seen this numbered exit before.  */
+    /* Verify that we haven't seen this numbered exit before.  */
     tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
     tcg_ctx->goto_tb_issue_mask |= 1 << idx;
 #endif
     plugin_gen_disable_mem_helpers();
-    /* When not chaining, we simply fall through to the "fallback" exit.  */
-    if (!qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        tcg_gen_op1i(INDEX_op_goto_tb, idx);
-    }
+    tcg_gen_op1i(INDEX_op_goto_tb, idx);
 }
 
 void tcg_gen_lookup_and_goto_ptr(void)
 {
-    if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        TCGv_ptr ptr;
+    TCGv_ptr ptr;
 
-        plugin_gen_disable_mem_helpers();
-        ptr = tcg_temp_new_ptr();
-        gen_helper_lookup_tb_ptr(ptr, cpu_env);
-        tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
-        tcg_temp_free_ptr(ptr);
-    } else {
+    if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) {
         tcg_gen_exit_tb(NULL, 0);
+        return;
     }
+
+    plugin_gen_disable_mem_helpers();
+    ptr = tcg_temp_new_ptr();
+    gen_helper_lookup_tb_ptr(ptr, cpu_env);
+    tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
+    tcg_temp_free_ptr(ptr);
 }
 
 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
 {
     /* Trigger the asserts within as early as possible.  */
-    (void)get_alignment_bits(op);
+    unsigned a_bits = get_alignment_bits(op);
+
+    /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
+    if (a_bits == (op & MO_SIZE)) {
+        op = (op & ~MO_AMASK) | MO_ALIGN;
+    }
 
     switch (op & MO_SIZE) {
     case MO_8:
@@ -2761,10 +2806,13 @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
         }
         break;
     case MO_64:
-        if (!is64) {
-            tcg_abort();
+        if (is64) {
+            op &= ~MO_SIGN;
+            break;
         }
-        break;
+        /* fall through */
+    default:
+        g_assert_not_reached();
     }
     if (st) {
         op &= ~MO_SIGN;
@@ -2775,7 +2823,7 @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv_cap_checked_ptr addr,
                          MemOp memop, TCGArg idx)
 {
-    TCGMemOpIdx oi = make_memop_idx(memop, idx);
+    MemOpIdx oi = make_memop_idx(memop, idx);
 #if TARGET_LONG_BITS == 32
     tcg_gen_op3i_i32(opc, val, (TCGv)addr, oi);
 #else
@@ -2790,7 +2838,7 @@ static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv_cap_checked_ptr addr,
 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv_cap_checked_ptr addr,
                          MemOp memop, TCGArg idx)
 {
-    TCGMemOpIdx oi = make_memop_idx(memop, idx);
+    MemOpIdx oi = make_memop_idx(memop, idx);
 #if TARGET_LONG_BITS == 32
     if (TCG_TARGET_REG_BITS == 32) {
         tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), (TCGv)addr, oi);
@@ -2831,10 +2879,12 @@ static inline TCGv_cap_checked_ptr plugin_prep_mem_callbacks(TCGv_cap_checked_pt
     return vaddr;
 }
 
-static inline void plugin_gen_mem_callbacks(TCGv_cap_checked_ptr vaddr, uint16_t info)
+static void plugin_gen_mem_callbacks(TCGv_cap_checked_ptr vaddr, MemOpIdx oi,
+                                     enum qemu_plugin_mem_rw rw)
 {
 #ifdef CONFIG_PLUGIN
     if (tcg_ctx->plugin_insn != NULL) {
+        qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
         plugin_gen_empty_mem_callback((TCGv)vaddr, info);
         tcg_temp_free_cap_checked(vaddr);
     }
@@ -2853,16 +2903,17 @@ static inline uint64_t memop_rvfi_mask(MemOp op) {
 void tcg_gen_qemu_ld_i32_with_checked_addr(TCGv_i32 val, TCGv_cap_checked_ptr addr, TCGArg idx, MemOp memop)
 {
     MemOp orig_memop;
-    uint16_t info = trace_mem_get_info(memop, idx, 0);
+    MemOpIdx oi;
 
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 0, 0);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, info);
+    oi = make_memop_idx(memop, idx);
+    trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, oi);
 
     orig_memop = memop;
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         memop &= ~MO_BSWAP;
-        /* The bswap primitive requires zero-extended input.  */
+        /* The bswap primitive benefits from zero-extended input.  */
         if ((memop & MO_SSIZE) == MO_SW) {
             memop &= ~MO_SIGN;
         }
@@ -2881,16 +2932,14 @@ void tcg_gen_qemu_ld_i32_with_checked_addr(TCGv_i32 val, TCGv_cap_checked_ptr ad
 #endif
     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
     gen_rvfi_dii_set_mem_data_i32(r, addr, val, memop);
-
-    plugin_gen_mem_callbacks(addr, info);
+    plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_bswap16_i32(val, val);
-            if (orig_memop & MO_SIGN) {
-                tcg_gen_ext16s_i32(val, val);
-            }
+            tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
+                                           ? TCG_BSWAP_IZ | TCG_BSWAP_OS
+                                           : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
             break;
         case MO_32:
             tcg_gen_bswap32_i32(val, val);
@@ -2932,18 +2981,18 @@ static void tcg_gen_qemu_st_i32_with_checked_addr_cond_invalidate(
     bool invalidate)
 {
     TCGv_i32 swap = NULL;
-    uint16_t info = trace_mem_get_info(memop, idx, 1);
+    MemOpIdx oi;
 
     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 0, 1);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, info);
+    oi = make_memop_idx(memop, idx);
+    trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, oi);
 
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         swap = tcg_temp_new_i32();
         switch (memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_ext16u_i32(swap, val);
-            tcg_gen_bswap16_i32(swap, swap);
+            tcg_gen_bswap16_i32(swap, val, 0);
             break;
         case MO_32:
             tcg_gen_bswap32_i32(swap, val);
@@ -2962,9 +3011,9 @@ static void tcg_gen_qemu_st_i32_with_checked_addr_cond_invalidate(
         gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
     }
     gen_rvfi_dii_set_mem_data_i32(w, addr, val, memop);
-    plugin_gen_mem_callbacks(addr, info);
+    plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
 #if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
-    TCGv_i32 tcoi = tcg_const_i32(make_memop_idx(memop, idx));
+    TCGv_i32 tcoi = tcg_const_i32(oi);
 #if defined(CONFIG_TCG_LOG_INSTR)
     if (tcg_ctx_logging_enabled) {
         gen_helper_qemu_log_instr_store32(cpu_env, addr, val, tcoi);
@@ -2999,7 +3048,7 @@ void tcg_gen_qemu_st_i32_with_checked_addr(TCGv_i32 val,
 void tcg_gen_qemu_ld_i64_with_checked_addr(TCGv_i64 val, TCGv_cap_checked_ptr addr, TCGArg idx, MemOp memop)
 {
     MemOp orig_memop;
-    uint16_t info;
+    MemOpIdx oi;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32_with_checked_addr(TCGV_LOW(val), addr, idx, memop);
@@ -3013,13 +3062,13 @@ void tcg_gen_qemu_ld_i64_with_checked_addr(TCGv_i64 val, TCGv_cap_checked_ptr ad
 
     tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 1, 0);
-    info = trace_mem_get_info(memop, idx, 0);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, info);
+    oi = make_memop_idx(memop, idx);
+    trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, oi);
 
     orig_memop = memop;
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         memop &= ~MO_BSWAP;
-        /* The bswap primitive requires zero-extended input.  */
+        /* The bswap primitive benefits from zero-extended input.  */
         if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
             memop &= ~MO_SIGN;
         }
@@ -3038,22 +3087,18 @@ void tcg_gen_qemu_ld_i64_with_checked_addr(TCGv_i64 val, TCGv_cap_checked_ptr ad
 #endif
     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
     gen_rvfi_dii_set_mem_data_i64(r, addr, val, memop);
-
-    plugin_gen_mem_callbacks(addr, info);
+    plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
+        int flags = (orig_memop & MO_SIGN
+                     ? TCG_BSWAP_IZ | TCG_BSWAP_OS
+                     : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
         switch (orig_memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_bswap16_i64(val, val);
-            if (orig_memop & MO_SIGN) {
-                tcg_gen_ext16s_i64(val, val);
-            }
+            tcg_gen_bswap16_i64(val, val, flags);
             break;
         case MO_32:
-            tcg_gen_bswap32_i64(val, val);
-            if (orig_memop & MO_SIGN) {
-                tcg_gen_ext32s_i64(val, val);
-            }
+            tcg_gen_bswap32_i64(val, val, flags);
             break;
         case MO_64:
             tcg_gen_bswap64_i64(val, val);
@@ -3063,7 +3108,7 @@ void tcg_gen_qemu_ld_i64_with_checked_addr(TCGv_i64 val, TCGv_cap_checked_ptr ad
         }
     }
 #if defined(CONFIG_TCG_LOG_INSTR)
-    TCGv_i32 tcop = tcg_const_i32(memop);
+    TCGv_i32 tcop = tcg_const_i32(make_memop_idx(memop, idx));
     if (tcg_ctx_logging_enabled) {
         gen_helper_qemu_log_instr_load64(cpu_env, saved_load_addr, val, tcop);
     }
@@ -3079,7 +3124,7 @@ void tcg_gen_qemu_st_i64_with_checked_addr_cond_invalidate(
     bool invalidate)
 {
     TCGv_i64 swap = NULL;
-    uint16_t info;
+    MemOpIdx oi;
 
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32_with_checked_addr_cond_invalidate(
@@ -3089,19 +3134,17 @@ void tcg_gen_qemu_st_i64_with_checked_addr_cond_invalidate(
 
     tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 1, 1);
-    info = trace_mem_get_info(memop, idx, 1);
-    trace_guest_mem_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, info);
+    oi = make_memop_idx(memop, idx);
+    trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, (TCGv)addr, oi);
 
     if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
         swap = tcg_temp_new_i64();
         switch (memop & MO_SIZE) {
         case MO_16:
-            tcg_gen_ext16u_i64(swap, val);
-            tcg_gen_bswap16_i64(swap, swap);
+            tcg_gen_bswap16_i64(swap, val, 0);
             break;
         case MO_32:
-            tcg_gen_ext32u_i64(swap, val);
-            tcg_gen_bswap32_i64(swap, swap);
+            tcg_gen_bswap32_i64(swap, val, 0);
             break;
         case MO_64:
             tcg_gen_bswap64_i64(swap, val);
@@ -3116,8 +3159,8 @@ void tcg_gen_qemu_st_i64_with_checked_addr_cond_invalidate(
     addr = plugin_prep_mem_callbacks(addr);
     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
     gen_rvfi_dii_set_mem_data_i64(w, addr, val, memop);
+    plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
 
-    plugin_gen_mem_callbacks(addr, info);
 #if defined(TARGET_CHERI) || defined(CONFIG_TCG_LOG_INSTR)
     TCGv_i32 tcoi = tcg_const_i32(make_memop_idx(memop, idx));
 #if defined(CONFIG_TCG_LOG_INSTR)
@@ -3198,7 +3241,6 @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
     }
 }
 
-#ifdef CONFIG_SOFTMMU
 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
                                   TCGv_i32, TCGv_i32, TCGv_i32);
 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
@@ -3207,12 +3249,6 @@ typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
                                   TCGv_i32, TCGv_i32);
 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
                                   TCGv_i64, TCGv_i32);
-#else
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
-#endif
 
 #ifdef CONFIG_ATOMIC64
 # define WITH_ATOMIC64(X) X,
@@ -3220,7 +3256,7 @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
 # define WITH_ATOMIC64(X)
 #endif
 
-static void * const table_cmpxchg[16] = {
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
     [MO_8] = gen_helper_atomic_cmpxchgb,
     [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
     [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
@@ -3265,18 +3301,13 @@ void tcg_gen_atomic_cmpxchg_i32_with_checked_addr(
     } else {
         ASSERT_IF_CHERI();
         gen_atomic_cx_i32 gen;
+        MemOpIdx oi;
 
         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
         tcg_debug_assert(gen != NULL);
 
-#ifdef CONFIG_SOFTMMU
-        {
-            TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
-            gen(retv, cpu_env, (TCGv)checked_addr, cmpv, newv, tcg_constant_i32(oi));
-        }
-#else
-        gen(retv, cpu_env, addr, cmpv, newv);
-#endif
+        oi = make_memop_idx(memop & ~MO_SIGN, idx);
+        gen(retv, cpu_env, (TCGv)checked_addr, cmpv, newv, tcg_constant_i32(oi));
 
         if (memop & MO_SIGN) {
             tcg_gen_ext_i32(retv, retv, memop);
@@ -3323,18 +3354,13 @@ void tcg_gen_atomic_cmpxchg_i64_with_checked_addr(
         ASSERT_IF_CHERI();
 #ifdef CONFIG_ATOMIC64
         gen_atomic_cx_i64 gen;
+        MemOpIdx oi;
 
         gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
         tcg_debug_assert(gen != NULL);
 
-#ifdef CONFIG_SOFTMMU
-        {
-            TCGMemOpIdx oi = make_memop_idx(memop, idx);
-            gen(retv, cpu_env, (TCGv)checked_addr, cmpv, newv, tcg_constant_i32(oi));
-        }
-#else
-        gen(retv, cpu_env, (TCGv)checked_addr, cmpv, newv);
-#endif
+        oi = make_memop_idx(memop, idx);
+        gen(retv, cpu_env, (TCGv)checked_addr, cmpv, newv, tcg_constant_i32(oi));
 #else
         gen_helper_exit_atomic(cpu_env);
         /* Produce a result, so that we have a well-formed opcode stream
@@ -3379,12 +3405,12 @@ enum GEN_OP_SIGN {
 static MemOp get_memop_for_operation(MemOp base_memop,
                                      enum GEN_OP_SIGN gen_sign)
 {
-    if (((base_memop & MO_SIZE) == MO_Q) || gen_sign == GEN_OP_NO_SIGN)
-        return base_memop;
+    if ((base_memop & MO_SIZE) == MO_Q || gen_sign == GEN_OP_UNSIGNED)
+        return base_memop & ~MO_SIGN;
     else if (gen_sign == GEN_OP_SIGNED)
         return base_memop | MO_SIGN;
     else
-        return base_memop & ~MO_SIGN;
+        return base_memop;
 }
 
 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv_cap_checked_ptr checked_addr,
@@ -3416,20 +3442,15 @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv_cap_checked_ptr checked_addr,
 {
     ASSERT_IF_CHERI();
     gen_atomic_op_i32 gen;
+    MemOpIdx oi;
 
     memop = tcg_canonicalize_memop(memop, 0, 0);
 
     gen = table[memop & (MO_SIZE | MO_BSWAP)];
     tcg_debug_assert(gen != NULL);
 
-#ifdef CONFIG_SOFTMMU
-    {
-        TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
-        gen(ret, cpu_env, (TCGv)checked_addr, val, tcg_constant_i32(oi));
-    }
-#else
-    gen(ret, cpu_env, addr, val);
-#endif
+    oi = make_memop_idx(memop & ~MO_SIGN, idx);
+    gen(ret, cpu_env, (TCGv)checked_addr, val, tcg_constant_i32(oi));
 #if defined(TARGET_CHERI)
     TCGv_i32 tcoi = tcg_const_i32(make_memop_idx(memop, idx));
     gen_helper_cheri_invalidate_tags(cpu_env, checked_addr, tcoi);
@@ -3475,18 +3496,13 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv_cap_checked_ptr checked_addr,
     if ((memop & MO_SIZE) == MO_64) {
 #ifdef CONFIG_ATOMIC64
         gen_atomic_op_i64 gen;
+        MemOpIdx oi;
 
         gen = table[memop & (MO_SIZE | MO_BSWAP)];
         tcg_debug_assert(gen != NULL);
 
-#ifdef CONFIG_SOFTMMU
-        {
-            TCGMemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
-            gen(ret, cpu_env, (TCGv)checked_addr, val, tcg_constant_i32(oi));
-        }
-#else
-        gen(ret, cpu_env, (TCGv)checked_addr, val);
-#endif
+        oi = make_memop_idx(memop & ~MO_SIGN, idx);
+        gen(ret, cpu_env, (TCGv)checked_addr, val, tcg_constant_i32(oi));
 #else
         gen_helper_exit_atomic(cpu_env);
         /* Produce a result, so that we have a well-formed opcode stream
@@ -3519,7 +3535,7 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv_cap_checked_ptr checked_addr,
 }
 
 #define GEN_ATOMIC_HELPER(NAME, OP, NEW, SIGNED)                        \
-static void * const table_##NAME[16] = {                                \
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {          \
     [MO_8] = gen_helper_atomic_##NAME##b,                               \
     [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
     [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
diff --git a/tcg/tcg.c b/tcg/tcg.c
index f9ec4a7db8e..ee7555bf6c2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -41,14 +41,8 @@
    CPU definitions. Currently they are used for qemu_ld/st
    instructions */
 #define NO_CPU_IO_DEFS
-#include "cpu.h"
 
 #include "exec/exec-all.h"
-
-#if !defined(CONFIG_USER_ONLY)
-#include "hw/boards.h"
-#endif
-
 #include "tcg/tcg-op.h"
 
 #if UINTPTR_MAX == UINT32_MAX
@@ -64,7 +58,12 @@
 
 #include "elf.h"
 #include "exec/log.h"
-#include "sysemu/sysemu.h"
+#include "tcg/tcg-ldst.h"
+#include "tcg-internal.h"
+
+#ifdef CONFIG_TCG_INTERPRETER
+#include <ffi.h>
+#endif
 
 /* Forward declarations for functions declared in tcg-target.c.inc and
    used here. */
@@ -149,17 +148,23 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
                        intptr_t arg2);
 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
                         TCGReg base, intptr_t ofs);
+#ifdef CONFIG_TCG_INTERPRETER
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
+                         ffi_cif *cif);
+#else
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct);
+#endif
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
 #ifdef TCG_TARGET_NEED_LDST_LABELS
 static int tcg_out_ldst_finalize(TCGContext *s);
 #endif
 
-#define TCG_HIGHWATER 1024
+TCGContext tcg_init_ctx;
+__thread TCGContext *tcg_ctx;
 
-static TCGContext **tcg_ctxs;
-static unsigned int n_tcg_ctxs;
+TCGContext **tcg_ctxs;
+unsigned int tcg_cur_ctxs;
+unsigned int tcg_max_ctxs;
 TCGv_env cpu_env = 0;
 const void *tcg_code_gen_epilogue;
 uintptr_t tcg_splitwx_diff;
@@ -174,43 +179,6 @@ TCGv _pc_is_current = 0;
 #ifdef TARGET_CHERI
 TCGv ddc_interposition;
 #endif
-
-struct tcg_region_tree {
-    QemuMutex lock;
-    GTree *tree;
-    /* padding to avoid false sharing is computed at run-time */
-};
-
-/*
- * We divide code_gen_buffer into equally-sized "regions" that TCG threads
- * dynamically allocate from as demand dictates. Given appropriate region
- * sizing, this minimizes flushes even when some TCG threads generate a lot
- * more code than others.
- */
-struct tcg_region_state {
-    QemuMutex lock;
-
-    /* fields set at init time */
-    void *start;
-    void *start_aligned;
-    void *end;
-    size_t n;
-    size_t size; /* size of one region */
-    size_t stride; /* .size + guard size */
-
-    /* fields protected by the lock */
-    size_t current; /* current region index */
-    size_t agg_size_full; /* aggregate size of full regions */
-};
-
-static struct tcg_region_state region;
-/*
- * This is an array of struct tcg_region_tree's, with padding.
- * We use void * to simplify the computation of region_trees[i]; each
- * struct is found every tree_size bytes.
- */
-static void *region_trees;
-static size_t tree_size;
 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 static TCGRegSet tcg_target_call_clobber_regs;
 
@@ -467,456 +435,6 @@ static const TCGTargetOpDef constraint_sets[] = {
 
 #include "tcg-target.c.inc"
 
-/* compare a pointer @ptr and a tb_tc @s */
-static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
-{
-    if (ptr >= s->ptr + s->size) {
-        return 1;
-    } else if (ptr < s->ptr) {
-        return -1;
-    }
-    return 0;
-}
-
-static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
-{
-    const struct tb_tc *a = ap;
-    const struct tb_tc *b = bp;
-
-    /*
-     * When both sizes are set, we know this isn't a lookup.
-     * This is the most likely case: every TB must be inserted; lookups
-     * are a lot less frequent.
-     */
-    if (likely(a->size && b->size)) {
-        if (a->ptr > b->ptr) {
-            return 1;
-        } else if (a->ptr < b->ptr) {
-            return -1;
-        }
-        /* a->ptr == b->ptr should happen only on deletions */
-        g_assert(a->size == b->size);
-        return 0;
-    }
-    /*
-     * All lookups have either .size field set to 0.
-     * From the glib sources we see that @ap is always the lookup key. However
-     * the docs provide no guarantee, so we just mark this case as likely.
-     */
-    if (likely(a->size == 0)) {
-        return ptr_cmp_tb_tc(a->ptr, b);
-    }
-    return ptr_cmp_tb_tc(b->ptr, a);
-}
-
-static void tcg_region_trees_init(void)
-{
-    size_t i;
-
-    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
-    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        qemu_mutex_init(&rt->lock);
-        rt->tree = g_tree_new(tb_tc_cmp);
-    }
-}
-
-static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
-{
-    size_t region_idx;
-
-    /*
-     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
-     * a signal handler over which the caller has no control.
-     */
-    if (!in_code_gen_buffer(p)) {
-        p -= tcg_splitwx_diff;
-        if (!in_code_gen_buffer(p)) {
-            return NULL;
-        }
-    }
-
-    if (p < region.start_aligned) {
-        region_idx = 0;
-    } else {
-        ptrdiff_t offset = p - region.start_aligned;
-
-        if (offset > region.stride * (region.n - 1)) {
-            region_idx = region.n - 1;
-        } else {
-            region_idx = offset / region.stride;
-        }
-    }
-    return region_trees + region_idx * tree_size;
-}
-
-void tcg_tb_insert(TranslationBlock *tb)
-{
-    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
-
-    g_assert(rt != NULL);
-    qemu_mutex_lock(&rt->lock);
-    g_tree_insert(rt->tree, &tb->tc, tb);
-    qemu_mutex_unlock(&rt->lock);
-}
-
-void tcg_tb_remove(TranslationBlock *tb)
-{
-    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
-
-    g_assert(rt != NULL);
-    qemu_mutex_lock(&rt->lock);
-    g_tree_remove(rt->tree, &tb->tc);
-    qemu_mutex_unlock(&rt->lock);
-}
-
-/*
- * Find the TB 'tb' such that
- * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
- * Return NULL if not found.
- */
-TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
-{
-    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
-    TranslationBlock *tb;
-    struct tb_tc s = { .ptr = (void *)tc_ptr };
-
-    if (rt == NULL) {
-        return NULL;
-    }
-
-    qemu_mutex_lock(&rt->lock);
-    tb = g_tree_lookup(rt->tree, &s);
-    qemu_mutex_unlock(&rt->lock);
-    return tb;
-}
-
-static void tcg_region_tree_lock_all(void)
-{
-    size_t i;
-
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        qemu_mutex_lock(&rt->lock);
-    }
-}
-
-static void tcg_region_tree_unlock_all(void)
-{
-    size_t i;
-
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        qemu_mutex_unlock(&rt->lock);
-    }
-}
-
-void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
-{
-    size_t i;
-
-    tcg_region_tree_lock_all();
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        g_tree_foreach(rt->tree, func, user_data);
-    }
-    tcg_region_tree_unlock_all();
-}
-
-size_t tcg_nb_tbs(void)
-{
-    size_t nb_tbs = 0;
-    size_t i;
-
-    tcg_region_tree_lock_all();
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        nb_tbs += g_tree_nnodes(rt->tree);
-    }
-    tcg_region_tree_unlock_all();
-    return nb_tbs;
-}
-
-static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
-{
-    TranslationBlock *tb = v;
-
-    tb_destroy(tb);
-    return FALSE;
-}
-
-static void tcg_region_tree_reset_all(void)
-{
-    size_t i;
-
-    tcg_region_tree_lock_all();
-    for (i = 0; i < region.n; i++) {
-        struct tcg_region_tree *rt = region_trees + i * tree_size;
-
-        g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
-        /* Increment the refcount first so that destroy acts as a reset */
-        g_tree_ref(rt->tree);
-        g_tree_destroy(rt->tree);
-    }
-    tcg_region_tree_unlock_all();
-}
-
-static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
-{
-    void *start, *end;
-
-    start = region.start_aligned + curr_region * region.stride;
-    end = start + region.size;
-
-    if (curr_region == 0) {
-        start = region.start;
-    }
-    if (curr_region == region.n - 1) {
-        end = region.end;
-    }
-
-    *pstart = start;
-    *pend = end;
-}
-
-static void tcg_region_assign(TCGContext *s, size_t curr_region)
-{
-    void *start, *end;
-
-    tcg_region_bounds(curr_region, &start, &end);
-
-    s->code_gen_buffer = start;
-    s->code_gen_ptr = start;
-    s->code_gen_buffer_size = end - start;
-    s->code_gen_highwater = end - TCG_HIGHWATER;
-}
-
-static bool tcg_region_alloc__locked(TCGContext *s)
-{
-    if (region.current == region.n) {
-        return true;
-    }
-    tcg_region_assign(s, region.current);
-    region.current++;
-    return false;
-}
-
-/*
- * Request a new region once the one in use has filled up.
- * Returns true on error.
- */
-static bool tcg_region_alloc(TCGContext *s)
-{
-    bool err;
-    /* read the region size now; alloc__locked will overwrite it on success */
-    size_t size_full = s->code_gen_buffer_size;
-
-    qemu_mutex_lock(&region.lock);
-    err = tcg_region_alloc__locked(s);
-    if (!err) {
-        region.agg_size_full += size_full - TCG_HIGHWATER;
-    }
-    qemu_mutex_unlock(&region.lock);
-    return err;
-}
-
-/*
- * Perform a context's first region allocation.
- * This function does _not_ increment region.agg_size_full.
- */
-static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
-{
-    return tcg_region_alloc__locked(s);
-}
-
-/* Call from a safe-work context */
-void tcg_region_reset_all(void)
-{
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
-    unsigned int i;
-
-    qemu_mutex_lock(&region.lock);
-    region.current = 0;
-    region.agg_size_full = 0;
-
-    for (i = 0; i < n_ctxs; i++) {
-        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-        bool err = tcg_region_initial_alloc__locked(s);
-
-        g_assert(!err);
-    }
-    qemu_mutex_unlock(&region.lock);
-
-    tcg_region_tree_reset_all();
-}
-
-#ifdef CONFIG_USER_ONLY
-static size_t tcg_n_regions(void)
-{
-    return 1;
-}
-#else
-/*
- * It is likely that some vCPUs will translate more code than others, so we
- * first try to set more regions than max_cpus, with those regions being of
- * reasonable size. If that's not possible we make do by evenly dividing
- * the code_gen_buffer among the vCPUs.
- */
-static size_t tcg_n_regions(void)
-{
-    size_t i;
-
-    /* Use a single region if all we have is one vCPU thread */
-#if !defined(CONFIG_USER_ONLY)
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
-#endif
-    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
-        return 1;
-    }
-
-    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
-    for (i = 8; i > 0; i--) {
-        size_t regions_per_thread = i;
-        size_t region_size;
-
-        region_size = tcg_init_ctx.code_gen_buffer_size;
-        region_size /= max_cpus * regions_per_thread;
-
-        if (region_size >= 2 * 1024u * 1024) {
-            return max_cpus * regions_per_thread;
-        }
-    }
-    /* If we can't, then just allocate one region per vCPU thread */
-    return max_cpus;
-}
-#endif
-
-/*
- * Initializes region partitioning.
- *
- * Called at init time from the parent thread (i.e. the one calling
- * tcg_context_init), after the target's TCG globals have been set.
- *
- * Region partitioning works by splitting code_gen_buffer into separate regions,
- * and then assigning regions to TCG threads so that the threads can translate
- * code in parallel without synchronization.
- *
- * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
- * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
- * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
- * must have been parsed before calling this function, since it calls
- * qemu_tcg_mttcg_enabled().
- *
- * In user-mode we use a single region.  Having multiple regions in user-mode
- * is not supported, because the number of vCPU threads (recall that each thread
- * spawned by the guest corresponds to a vCPU thread) is only bounded by the
- * OS, and usually this number is huge (tens of thousands is not uncommon).
- * Thus, given this large bound on the number of vCPU threads and the fact
- * that code_gen_buffer is allocated at compile-time, we cannot guarantee
- * that the availability of at least one region per vCPU thread.
- *
- * However, this user-mode limitation is unlikely to be a significant problem
- * in practice. Multi-threaded guests share most if not all of their translated
- * code, which makes parallel code generation less appealing than in softmmu.
- */
-void tcg_region_init(void)
-{
-    void *buf = tcg_init_ctx.code_gen_buffer;
-    void *aligned;
-    size_t size = tcg_init_ctx.code_gen_buffer_size;
-    size_t page_size = qemu_real_host_page_size;
-    size_t region_size;
-    size_t n_regions;
-    size_t i;
-
-    n_regions = tcg_n_regions();
-
-    /* The first region will be 'aligned - buf' bytes larger than the others */
-    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
-    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
-    /*
-     * Make region_size a multiple of page_size, using aligned as the start.
-     * As a result of this we might end up with a few extra pages at the end of
-     * the buffer; we will assign those to the last region.
-     */
-    region_size = (size - (aligned - buf)) / n_regions;
-    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
-
-    /* A region must have at least 2 pages; one code, one guard */
-    g_assert(region_size >= 2 * page_size);
-
-    /* init the region struct */
-    qemu_mutex_init(&region.lock);
-    region.n = n_regions;
-    region.size = region_size - page_size;
-    region.stride = region_size;
-    region.start = buf;
-    region.start_aligned = aligned;
-    /* page-align the end, since its last page will be a guard page */
-    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
-    /* account for that last guard page */
-    region.end -= page_size;
-
-    /*
-     * Set guard pages in the rw buffer, as that's the one into which
-     * buffer overruns could occur.  Do not set guard pages in the rx
-     * buffer -- let that one use hugepages throughout.
-     */
-    for (i = 0; i < region.n; i++) {
-        void *start, *end;
-
-        tcg_region_bounds(i, &start, &end);
-
-        /*
-         * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
-         * rejects a permission change from RWX -> NONE.  Guard pages are
-         * nice for bug detection but are not essential; ignore any failure.
-         */
-        (void)qemu_mprotect_none(end, page_size);
-    }
-
-    tcg_region_trees_init();
-
-    /* In user-mode we support only one ctx, so do the initial allocation now */
-#ifdef CONFIG_USER_ONLY
-    {
-        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
-
-        g_assert(!err);
-    }
-#endif
-}
-
-#ifdef CONFIG_DEBUG_TCG
-const void *tcg_splitwx_to_rx(void *rw)
-{
-    /* Pass NULL pointers unchanged. */
-    if (rw) {
-        g_assert(in_code_gen_buffer(rw));
-        rw += tcg_splitwx_diff;
-    }
-    return rw;
-}
-
-void *tcg_splitwx_to_rw(const void *rx)
-{
-    /* Pass NULL pointers unchanged. */
-    if (rx) {
-        rx -= tcg_splitwx_diff;
-        /* Assert that we end with a pointer in the rw region. */
-        g_assert(in_code_gen_buffer(rx));
-    }
-    return (void *)rx;
-}
-#endif /* CONFIG_DEBUG_TCG */
-
 static void alloc_tcg_plugin_context(TCGContext *s)
 {
 #ifdef CONFIG_PLUGIN
@@ -949,10 +467,8 @@ void tcg_register_thread(void)
 #else
 void tcg_register_thread(void)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
     TCGContext *s = g_malloc(sizeof(*s));
     unsigned int i, n;
-    bool err;
 
     *s = tcg_init_ctx;
 
@@ -966,79 +482,19 @@ void tcg_register_thread(void)
     }
 
     /* Claim an entry in tcg_ctxs */
-    n = qatomic_fetch_inc(&n_tcg_ctxs);
-    g_assert(n < ms->smp.max_cpus);
+    n = qatomic_fetch_inc(&tcg_cur_ctxs);
+    g_assert(n < tcg_max_ctxs);
     qatomic_set(&tcg_ctxs[n], s);
 
     if (n > 0) {
         alloc_tcg_plugin_context(s);
+        tcg_region_initial_alloc(s);
     }
 
     tcg_ctx = s;
-    qemu_mutex_lock(&region.lock);
-    err = tcg_region_initial_alloc__locked(tcg_ctx);
-    g_assert(!err);
-    qemu_mutex_unlock(&region.lock);
 }
 #endif /* !CONFIG_USER_ONLY */
 
-/*
- * Returns the size (in bytes) of all translated code (i.e. from all regions)
- * currently in the cache.
- * See also: tcg_code_capacity()
- * Do not confuse with tcg_current_code_size(); that one applies to a single
- * TCG context.
- */
-size_t tcg_code_size(void)
-{
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
-    unsigned int i;
-    size_t total;
-
-    qemu_mutex_lock(&region.lock);
-    total = region.agg_size_full;
-    for (i = 0; i < n_ctxs; i++) {
-        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-        size_t size;
-
-        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
-        g_assert(size <= s->code_gen_buffer_size);
-        total += size;
-    }
-    qemu_mutex_unlock(&region.lock);
-    return total;
-}
-
-/*
- * Returns the code capacity (in bytes) of the entire cache, i.e. including all
- * regions.
- * See also: tcg_code_size()
- */
-size_t tcg_code_capacity(void)
-{
-    size_t guard_size, capacity;
-
-    /* no need for synchronization; these variables are set at init time */
-    guard_size = region.stride - region.size;
-    capacity = region.end + guard_size - region.start;
-    capacity -= region.n * (guard_size + TCG_HIGHWATER);
-    return capacity;
-}
-
-size_t tcg_tb_phys_invalidate_count(void)
-{
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
-    unsigned int i;
-    size_t total = 0;
-
-    for (i = 0; i < n_ctxs; i++) {
-        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
-
-        total += qatomic_read(&s->tb_phys_invalidate_count);
-    }
-    return total;
-}
-
 /* pool based memory allocation */
 void *tcg_malloc_internal(TCGContext *s, int size)
 {
@@ -1092,13 +548,6 @@ void tcg_pool_reset(TCGContext *s)
     s->pool_current = NULL;
 }
 
-typedef struct TCGHelperInfo {
-    void *func;
-    const char *name;
-    unsigned flags;
-    unsigned sizemask;
-} TCGHelperInfo;
-
 #include "exec/helper-proto.h"
 
 static const TCGHelperInfo all_helpers[] = {
@@ -1106,13 +555,28 @@ static const TCGHelperInfo all_helpers[] = {
 };
 static GHashTable *helper_table;
 
+#ifdef CONFIG_TCG_INTERPRETER
+static GHashTable *ffi_table;
+
+static ffi_type * const typecode_to_ffi[8] = {
+    [dh_typecode_void] = &ffi_type_void,
+    [dh_typecode_i32]  = &ffi_type_uint32,
+    [dh_typecode_s32]  = &ffi_type_sint32,
+    [dh_typecode_i64]  = &ffi_type_uint64,
+    [dh_typecode_s64]  = &ffi_type_sint64,
+    [dh_typecode_ptr]  = &ffi_type_pointer,
+    [dh_typecode_cap_checked_ptr]  = &ffi_type_pointer,
+};
+#endif
+
 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 static void process_op_defs(TCGContext *s);
 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
                                             TCGReg reg, const char *name);
 
-void tcg_context_init(TCGContext *s)
+static void tcg_context_init(unsigned max_cpus)
 {
+    TCGContext *s = &tcg_init_ctx;
     int op, total_args, n, i;
     TCGOpDef *def;
     TCGArgConstraint *args_ct;
@@ -1148,6 +612,47 @@ void tcg_context_init(TCGContext *s)
                             (gpointer)&all_helpers[i]);
     }
 
+#ifdef CONFIG_TCG_INTERPRETER
+    /* g_direct_hash/equal for direct comparisons on uint32_t.  */
+    ffi_table = g_hash_table_new(NULL, NULL);
+    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+        struct {
+            ffi_cif cif;
+            ffi_type *args[];
+        } *ca;
+        uint32_t typemask = all_helpers[i].typemask;
+        gpointer hash = (gpointer)(uintptr_t)typemask;
+        ffi_status status;
+        int nargs;
+
+        if (g_hash_table_lookup(ffi_table, hash)) {
+            continue;
+        }
+
+        /* Ignoring the return type, find the last non-zero field. */
+        nargs = 32 - clz32(typemask >> 3);
+        nargs = DIV_ROUND_UP(nargs, 3);
+
+        ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
+        ca->cif.rtype = typecode_to_ffi[typemask & 7];
+        ca->cif.nargs = nargs;
+
+        if (nargs != 0) {
+            ca->cif.arg_types = ca->args;
+            for (i = 0; i < nargs; ++i) {
+                int typecode = extract32(typemask, (i + 1) * 3, 3);
+                ca->args[i] = typecode_to_ffi[typecode];
+            }
+        }
+
+        status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
+                              ca->cif.rtype, ca->cif.arg_types);
+        assert(status == FFI_OK);
+
+        g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
+    }
+#endif
+
     tcg_target_init(s);
     process_op_defs(s);
 
@@ -1177,11 +682,11 @@ void tcg_context_init(TCGContext *s)
      */
 #ifdef CONFIG_USER_ONLY
     tcg_ctxs = &tcg_ctx;
-    n_tcg_ctxs = 1;
+    tcg_cur_ctxs = 1;
+    tcg_max_ctxs = 1;
 #else
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
-    tcg_ctxs = g_new(TCGContext *, max_cpus);
+    tcg_max_ctxs = max_cpus;
+    tcg_ctxs = g_new0(TCGContext *, max_cpus);
 #endif
 
     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
@@ -1189,6 +694,12 @@ void tcg_context_init(TCGContext *s)
     cpu_env = temp_tcgv_ptr(ts);
 }
 
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
+{
+    tcg_context_init(max_cpus);
+    tcg_region_init(tb_size, splitwx, max_cpus);
+}
+
 /*
  * Allocate TBs right before their corresponding translated code, making
  * sure that TBs and code are on different cache lines.
@@ -1216,32 +727,16 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
 
 void tcg_prologue_init(TCGContext *s)
 {
-    size_t prologue_size, total_size;
-    void *buf0, *buf1;
-
-    /* Put the prologue at the beginning of code_gen_buffer.  */
-    buf0 = s->code_gen_buffer;
-    total_size = s->code_gen_buffer_size;
-    s->code_ptr = buf0;
-    s->code_buf = buf0;
-    s->data_gen_ptr = NULL;
+    size_t prologue_size;
 
-    /*
-     * The region trees are not yet configured, but tcg_splitwx_to_rx
-     * needs the bounds for an assert.
-     */
-    region.start = buf0;
-    region.end = buf0 + total_size;
+    s->code_ptr = s->code_gen_ptr;
+    s->code_buf = s->code_gen_ptr;
+    s->data_gen_ptr = NULL;
 
 #ifndef CONFIG_TCG_INTERPRETER
-    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
+    tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
 #endif
 
-    /* Compute a high-water mark, at which we voluntarily flush the buffer
-       and start over.  The size here is arbitrary, significantly larger
-       than we expect the code generation for any one opcode to require.  */
-    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
-
 #ifdef TCG_TARGET_NEED_POOL_LABELS
     s->pool_labels = NULL;
 #endif
@@ -1258,32 +753,23 @@ void tcg_prologue_init(TCGContext *s)
     }
 #endif
 
-    buf1 = s->code_ptr;
-#ifndef CONFIG_TCG_INTERPRETER
-    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
-                        tcg_ptr_byte_diff(buf1, buf0));
-#endif
-
-    /* Deduct the prologue from the buffer.  */
     prologue_size = tcg_current_code_size(s);
-    s->code_gen_ptr = buf1;
-    s->code_gen_buffer = buf1;
-    s->code_buf = buf1;
-    total_size -= prologue_size;
-    s->code_gen_buffer_size = total_size;
 
-    tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
+#ifndef CONFIG_TCG_INTERPRETER
+    flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
+                        (uintptr_t)s->code_buf, prologue_size);
+#endif
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
         FILE *logfile = qemu_log_lock();
         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
         if (s->data_gen_ptr) {
-            size_t code_size = s->data_gen_ptr - buf0;
+            size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
             size_t data_size = prologue_size - code_size;
             size_t i;
 
-            log_disas(buf0, code_size);
+            log_disas(s->code_gen_ptr, code_size);
 
             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
                 if (sizeof(tcg_target_ulong) == 8) {
@@ -1297,7 +783,7 @@ void tcg_prologue_init(TCGContext *s)
                 }
             }
         } else {
-            log_disas(buf0, prologue_size);
+            log_disas(s->code_gen_ptr, prologue_size);
         }
         qemu_log("\n");
         qemu_log_flush();
@@ -1305,10 +791,16 @@ void tcg_prologue_init(TCGContext *s)
     }
 #endif
 
-    /* Assert that goto_ptr is implemented completely.  */
-    if (TCG_TARGET_HAS_goto_ptr) {
-        tcg_debug_assert(tcg_code_gen_epilogue != NULL);
-    }
+#ifndef CONFIG_TCG_INTERPRETER
+    /*
+     * Assert that goto_ptr is implemented completely, setting an epilogue.
+     * For tci, we use NULL as the signal to return from the interpreter,
+     * so skip this check.
+     */
+    tcg_debug_assert(tcg_code_gen_epilogue != NULL);
+#endif
+
+    tcg_region_prologue_set(s);
 }
 
 void tcg_func_start(TCGContext *s)
@@ -1691,6 +1183,7 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_insn_start:
     case INDEX_op_exit_tb:
     case INDEX_op_goto_tb:
+    case INDEX_op_goto_ptr:
     case INDEX_op_qemu_ld_i32:
     case INDEX_op_qemu_st_i32:
     case INDEX_op_qemu_ld_i64:
@@ -1700,9 +1193,6 @@ bool tcg_op_supported(TCGOpcode op)
     case INDEX_op_qemu_st8_i32:
         return TCG_TARGET_HAS_qemu_st8_i32;
 
-    case INDEX_op_goto_ptr:
-        return TCG_TARGET_HAS_goto_ptr;
-
     case INDEX_op_mov_i32:
     case INDEX_op_setcond_i32:
     case INDEX_op_brcond_i32:
@@ -1972,13 +1462,12 @@ bool tcg_op_supported(TCGOpcode op)
 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 {
     int i, real_args, nb_rets, pi;
-    unsigned sizemask, flags;
-    TCGHelperInfo *info;
+    unsigned typemask;
+    const TCGHelperInfo *info;
     TCGOp *op;
 
     info = g_hash_table_lookup(helper_table, (gpointer)func);
-    flags = info->flags;
-    sizemask = info->sizemask;
+    typemask = info->typemask;
 
 #ifdef CONFIG_PLUGIN
     /* detect non-plugin helpers */
@@ -1991,42 +1480,47 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     && !defined(CONFIG_TCG_INTERPRETER)
     /* We have 64-bit values in one register, but need to pass as two
        separate parameters.  Split them.  */
-    int orig_sizemask = sizemask;
+    int orig_typemask = typemask;
     int orig_nargs = nargs;
     TCGv_i64 retl, reth;
     TCGTemp *split_args[MAX_OPC_PARAM];
 
     retl = NULL;
     reth = NULL;
-    if (sizemask != 0) {
-        for (i = real_args = 0; i < nargs; ++i) {
-            int is_64bit = sizemask & (1 << (i+1)*2);
-            if (is_64bit) {
-                TCGv_i64 orig = temp_tcgv_i64(args[i]);
-                TCGv_i32 h = tcg_temp_new_i32();
-                TCGv_i32 l = tcg_temp_new_i32();
-                tcg_gen_extr_i64_i32(l, h, orig);
-                split_args[real_args++] = tcgv_i32_temp(h);
-                split_args[real_args++] = tcgv_i32_temp(l);
-            } else {
-                split_args[real_args++] = args[i];
-            }
+    typemask = 0;
+    for (i = real_args = 0; i < nargs; ++i) {
+        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
+        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+
+        if (is_64bit) {
+            TCGv_i64 orig = temp_tcgv_i64(args[i]);
+            TCGv_i32 h = tcg_temp_new_i32();
+            TCGv_i32 l = tcg_temp_new_i32();
+            tcg_gen_extr_i64_i32(l, h, orig);
+            split_args[real_args++] = tcgv_i32_temp(h);
+            typemask |= dh_typecode_i32 << (real_args * 3);
+            split_args[real_args++] = tcgv_i32_temp(l);
+            typemask |= dh_typecode_i32 << (real_args * 3);
+        } else {
+            split_args[real_args++] = args[i];
+            typemask |= argtype << (real_args * 3);
         }
-        nargs = real_args;
-        args = split_args;
-        sizemask = 0;
     }
+    nargs = real_args;
+    args = split_args;
 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
-        int is_64bit = sizemask & (1 << (i+1)*2);
-        int is_signed = sizemask & (2 << (i+1)*2);
-        if (!is_64bit) {
+        int argtype = extract32(typemask, (i + 1) * 3, 3);
+        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
+        bool is_signed = argtype & 1;
+
+        if (is_32bit) {
             TCGv_i64 temp = tcg_temp_new_i64();
-            TCGv_i64 orig = temp_tcgv_i64(args[i]);
+            TCGv_i32 orig = temp_tcgv_i32(args[i]);
             if (is_signed) {
-                tcg_gen_ext32s_i64(temp, orig);
+                tcg_gen_ext_i32_i64(temp, orig);
             } else {
-                tcg_gen_ext32u_i64(temp, orig);
+                tcg_gen_extu_i32_i64(temp, orig);
             }
             args[i] = tcgv_i64_temp(temp);
         }
@@ -2039,7 +1533,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     if (ret != NULL) {
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
-        if (orig_sizemask & 1) {
+        if ((typemask & 6) == dh_typecode_i64) {
             /* The 32-bit ABI is going to return the 64-bit value in
                the %o0/%o1 register pair.  Prepare for this by using
                two return temporaries, and reassemble below.  */
@@ -2053,7 +1547,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
             nb_rets = 1;
         }
 #else
-        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
+        if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
 #ifdef HOST_WORDS_BIGENDIAN
             op->args[pi++] = temp_arg(ret + 1);
             op->args[pi++] = temp_arg(ret);
@@ -2074,25 +1568,39 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 
     real_args = 0;
     for (i = 0; i < nargs; i++) {
-        int is_64bit = sizemask & (1 << (i+1)*2);
-        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
-#ifdef TCG_TARGET_CALL_ALIGN_ARGS
-            /* some targets want aligned 64 bit args */
-            if (real_args & 1) {
-                op->args[pi++] = TCG_CALL_DUMMY_ARG;
-                real_args++;
-            }
+        int argtype = extract32(typemask, (i + 1) * 3, 3);
+        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+        bool want_align = false;
+
+#if defined(CONFIG_TCG_INTERPRETER)
+        /*
+         * Align all arguments, so that they land in predictable places
+         * for passing off to ffi_call.
+         */
+        want_align = true;
+#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
+        /* Some targets want aligned 64 bit args */
+        want_align = is_64bit;
 #endif
-           /* If stack grows up, then we will be placing successive
-              arguments at lower addresses, which means we need to
-              reverse the order compared to how we would normally
-              treat either big or little-endian.  For those arguments
-              that will wind up in registers, this still works for
-              HPPA (the only current STACK_GROWSUP target) since the
-              argument registers are *also* allocated in decreasing
-              order.  If another such target is added, this logic may
-              have to get more complicated to differentiate between
-              stack arguments and register arguments.  */
+
+        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
+            op->args[pi++] = TCG_CALL_DUMMY_ARG;
+            real_args++;
+        }
+
+        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
+            /*
+             * If stack grows up, then we will be placing successive
+             * arguments at lower addresses, which means we need to
+             * reverse the order compared to how we would normally
+             * treat either big or little-endian.  For those arguments
+             * that will wind up in registers, this still works for
+             * HPPA (the only current STACK_GROWSUP target) since the
+             * argument registers are *also* allocated in decreasing
+             * order.  If another such target is added, this logic may
+             * have to get more complicated to differentiate between
+             * stack arguments and register arguments.
+             */
 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
             op->args[pi++] = temp_arg(args[i] + 1);
             op->args[pi++] = temp_arg(args[i]);
@@ -2108,7 +1616,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
         real_args++;
     }
     op->args[pi++] = (uintptr_t)func;
-    op->args[pi++] = flags;
+    op->args[pi++] = (uintptr_t)info;
     TCGOP_CALLI(op) = real_args;
 
     /* Make sure the fields didn't overflow.  */
@@ -2119,7 +1627,9 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     && !defined(CONFIG_TCG_INTERPRETER)
     /* Free all of the parts we allocated above.  */
     for (i = real_args = 0; i < orig_nargs; ++i) {
-        int is_64bit = orig_sizemask & (1 << (i+1)*2);
+        int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
+        bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+
         if (is_64bit) {
             tcg_temp_free_internal(args[real_args++]);
             tcg_temp_free_internal(args[real_args++]);
@@ -2127,7 +1637,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
             real_args++;
         }
     }
-    if (orig_sizemask & 1) {
+    if ((orig_typemask & 6) == dh_typecode_i64) {
         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
            Note that describing these as TCGv_i64 eliminates an unnecessary
            zero-extension that tcg_gen_concat_i32_i64 would create.  */
@@ -2137,8 +1647,10 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
     }
 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
     for (i = 0; i < nargs; ++i) {
-        int is_64bit = sizemask & (1 << (i+1)*2);
-        if (!is_64bit) {
+        int argtype = extract32(typemask, (i + 1) * 3, 3);
+        bool is_32bit = (argtype & ~1) == dh_typecode_i32;
+
+        if (is_32bit) {
             tcg_temp_free_internal(args[i]);
         }
     }
@@ -2223,19 +1735,6 @@ static char *tcg_get_arg_str(TCGContext *s, char *buf,
     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
 }
 
-/* Find helper name.  */
-static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
-{
-    const char *ret = NULL;
-    if (helper_table) {
-        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
-        if (info) {
-            ret = info->name;
-        }
-    }
-    return ret;
-}
-
 static const char * const cond_name[] =
 {
     [TCG_COND_NEVER] = "never",
@@ -2284,6 +1783,14 @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
 };
 
+static const char bswap_flag_name[][6] = {
+    [TCG_BSWAP_IZ] = "iz",
+    [TCG_BSWAP_OZ] = "oz",
+    [TCG_BSWAP_OS] = "os",
+    [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
+    [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
+};
+
 static inline bool tcg_regset_single(TCGRegSet d)
 {
     return (d & (d - 1)) == 0;
@@ -2326,15 +1833,28 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
                 col += qemu_log(" " TARGET_FMT_lx, a);
             }
         } else if (c == INDEX_op_call) {
+            const TCGHelperInfo *info = tcg_call_info(op);
+            void *func = tcg_call_func(op);
+
             /* variable number of arguments */
             nb_oargs = TCGOP_CALLO(op);
             nb_iargs = TCGOP_CALLI(op);
             nb_cargs = def->nb_cargs;
 
-            /* function name, flags, out args */
-            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
-                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
-                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
+            col += qemu_log(" %s ", def->name);
+
+            /*
+             * Print the function name from TCGHelperInfo, if available.
+             * Note that plugins have a template function for the info,
+             * but the actual function pointer comes from the plugin.
+             */
+            if (func == info->func) {
+                col += qemu_log("%s", info->name);
+            } else {
+                col += qemu_log("plugin(%p)", func);
+            }
+
+            col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
             for (i = 0; i < nb_oargs; i++) {
                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
                                                        op->args[i]));
@@ -2399,7 +1919,7 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
             case INDEX_op_qemu_ld_i64:
             case INDEX_op_qemu_st_i64:
                 {
-                    TCGMemOpIdx oi = op->args[k++];
+                    MemOpIdx oi = op->args[k++];
                     MemOp op = get_memop(oi);
                     unsigned ix = get_mmuidx(oi);
 
@@ -2414,6 +1934,26 @@ static void tcg_dump_ops(TCGContext *s, bool have_prefs)
                     i = 1;
                 }
                 break;
+            case INDEX_op_bswap16_i32:
+            case INDEX_op_bswap16_i64:
+            case INDEX_op_bswap32_i32:
+            case INDEX_op_bswap32_i64:
+            case INDEX_op_bswap64_i64:
+                {
+                    TCGArg flags = op->args[k];
+                    const char *name = NULL;
+
+                    if (flags < ARRAY_SIZE(bswap_flag_name)) {
+                        name = bswap_flag_name[flags];
+                    }
+                    if (name) {
+                        col += qemu_log(",%s", name);
+                    } else {
+                        col += qemu_log(",$0x%" TCG_PRIlx, flags);
+                    }
+                    i = k = 1;
+                }
+                break;
             default:
                 i = 0;
                 break;
@@ -2662,6 +2202,19 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
 #endif
 }
 
+void tcg_remove_ops_after(TCGOp *op)
+{
+    TCGContext *s = tcg_ctx;
+
+    while (true) {
+        TCGOp *last = tcg_last_op();
+        if (last == op) {
+            return;
+        }
+        tcg_op_remove(s, last);
+    }
+}
+
 static TCGOp *tcg_op_alloc(TCGOpcode opc)
 {
     TCGContext *s = tcg_ctx;
@@ -2710,7 +2263,6 @@ static void reachable_code_pass(TCGContext *s)
     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
         bool remove = dead;
         TCGLabel *label;
-        int call_flags;
 
         switch (op->opc) {
         case INDEX_op_set_label:
@@ -2755,8 +2307,7 @@ static void reachable_code_pass(TCGContext *s)
 
         case INDEX_op_call:
             /* Notice noreturn helper calls, raising exceptions.  */
-            call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
-            if (call_flags & TCG_CALL_NO_RETURN) {
+            if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
                 dead = true;
             }
             break;
@@ -2957,7 +2508,7 @@ static void liveness_pass_1(TCGContext *s)
 
                 nb_oargs = TCGOP_CALLO(op);
                 nb_iargs = TCGOP_CALLI(op);
-                call_flags = op->args[nb_oargs + nb_iargs + 1];
+                call_flags = tcg_call_flags(op);
 
                 /* pure functions can be removed if their result is unused */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
@@ -3277,7 +2828,7 @@ static bool liveness_pass_2(TCGContext *s)
         if (opc == INDEX_op_call) {
             nb_oargs = TCGOP_CALLO(op);
             nb_iargs = TCGOP_CALLI(op);
-            call_flags = op->args[nb_oargs + nb_iargs + 1];
+            call_flags = tcg_call_flags(op);
         } else {
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -3525,9 +3076,18 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
         g_assert_not_reached();
     }
 
-    assert(align <= TCG_TARGET_STACK_ALIGN);
+    /*
+     * Assume the stack is sufficiently aligned.
+     * This affects e.g. ARM NEON, where we have 8 byte stack alignment
+     * and do not require 16 byte vector alignment.  This seems slightly
+     * easier than fully parameterizing the above switch statement.
+     */
+    align = MIN(TCG_TARGET_STACK_ALIGN, align);
     off = ROUND_UP(s->current_frame_offset, align);
-    assert(off + size <= s->frame_end);
+    /* If we've exhausted the stack frame, restart with a smaller TB. */
+    if (off + size > s->frame_end) {
+        tcg_raise_tb_overflow(s);
+    }
     s->current_frame_offset = off + size;
 
     ts->mem_offset = off;
@@ -4113,7 +3673,7 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
         ts = arg_temp(arg);
 
         if (ts->val_type == TEMP_VAL_CONST
-            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
+            && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
             /* constant is OK for instruction */
             const_args[i] = 1;
             new_args[i] = ts->val;
@@ -4366,6 +3926,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
     const int nb_oargs = TCGOP_CALLO(op);
     const int nb_iargs = TCGOP_CALLI(op);
     const TCGLifeData arg_life = op->life;
+    const TCGHelperInfo *info;
     int flags, nb_regs, i;
     TCGReg reg;
     TCGArg arg;
@@ -4376,8 +3937,9 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
     int allocate_args;
     TCGRegSet allocated_regs;
 
-    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
-    flags = op->args[nb_oargs + nb_iargs + 1];
+    func_addr = tcg_call_func(op);
+    info = tcg_call_info(op);
+    flags = info->flags;
 
     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
     if (nb_regs > nb_iargs) {
@@ -4469,7 +4031,16 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
         save_globals(s, allocated_regs);
     }
 
+#ifdef CONFIG_TCG_INTERPRETER
+    {
+        gpointer hash = (gpointer)(uintptr_t)info->typemask;
+        ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
+        assert(cif != NULL);
+        tcg_out_call(s, func_addr, cif);
+    }
+#else
     tcg_out_call(s, func_addr);
+#endif
 
     /* assign output registers and emit moves if needed */
     for(i = 0; i < nb_oargs; i++) {
@@ -4516,7 +4087,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
 static inline
 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
 {
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
     unsigned int i;
 
     for (i = 0; i < n_ctxs; i++) {
@@ -4565,21 +4136,21 @@ static void tcg_profile_snapshot_table(TCGProfile *prof)
     tcg_profile_snapshot(prof, false, true);
 }
 
-void tcg_dump_op_count(void)
+void tcg_dump_op_count(GString *buf)
 {
     TCGProfile prof = {};
     int i;
 
     tcg_profile_snapshot_table(&prof);
     for (i = 0; i < NB_OPS; i++) {
-        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
-                    prof.table_op_count[i]);
+        g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
+                               prof.table_op_count[i]);
     }
 }
 
 int64_t tcg_cpu_exec_time(void)
 {
-    unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
+    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
     unsigned int i;
     int64_t ret = 0;
 
@@ -4592,9 +4163,9 @@ int64_t tcg_cpu_exec_time(void)
     return ret;
 }
 #else
-void tcg_dump_op_count(void)
+void tcg_dump_op_count(GString *buf)
 {
-    qemu_printf("[TCG profiler not compiled]\n");
+    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
 }
 
 int64_t tcg_cpu_exec_time(void)
@@ -4845,7 +4416,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 }
 
 #ifdef CONFIG_PROFILER
-void tcg_dump_info(void)
+void tcg_dump_info(GString *buf)
 {
     TCGProfile prof = {};
     const TCGProfile *s;
@@ -4859,53 +4430,59 @@ void tcg_dump_info(void)
     tb_div_count = tb_count ? tb_count : 1;
     tot = s->interm_time + s->code_time;
 
-    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
-                tot, tot / 2.4e9);
-    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
-                " %0.1f%%)\n",
-                tb_count, s->tb_count1 - tb_count,
-                (double)(s->tb_count1 - s->tb_count)
-                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
-    qemu_printf("avg ops/TB          %0.1f max=%d\n",
-                (double)s->op_count / tb_div_count, s->op_count_max);
-    qemu_printf("deleted ops/TB      %0.2f\n",
-                (double)s->del_op_count / tb_div_count);
-    qemu_printf("avg temps/TB        %0.2f max=%d\n",
-                (double)s->temp_count / tb_div_count, s->temp_count_max);
-    qemu_printf("avg host code/TB    %0.1f\n",
-                (double)s->code_out_len / tb_div_count);
-    qemu_printf("avg search data/TB  %0.1f\n",
-                (double)s->search_out_len / tb_div_count);
+    g_string_append_printf(buf, "JIT cycles          %" PRId64
+                           " (%0.3f s at 2.4 GHz)\n",
+                           tot, tot / 2.4e9);
+    g_string_append_printf(buf, "translated TBs      %" PRId64
+                           " (aborted=%" PRId64 " %0.1f%%)\n",
+                           tb_count, s->tb_count1 - tb_count,
+                           (double)(s->tb_count1 - s->tb_count)
+                           / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
+    g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
+                           (double)s->op_count / tb_div_count, s->op_count_max);
+    g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
+                           (double)s->del_op_count / tb_div_count);
+    g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
+                           (double)s->temp_count / tb_div_count,
+                           s->temp_count_max);
+    g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
+                           (double)s->code_out_len / tb_div_count);
+    g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+                           (double)s->search_out_len / tb_div_count);
     
-    qemu_printf("cycles/op           %0.1f\n",
-                s->op_count ? (double)tot / s->op_count : 0);
-    qemu_printf("cycles/in byte      %0.1f\n",
-                s->code_in_len ? (double)tot / s->code_in_len : 0);
-    qemu_printf("cycles/out byte     %0.1f\n",
-                s->code_out_len ? (double)tot / s->code_out_len : 0);
-    qemu_printf("cycles/search byte     %0.1f\n",
-                s->search_out_len ? (double)tot / s->search_out_len : 0);
+    g_string_append_printf(buf, "cycles/op           %0.1f\n",
+                           s->op_count ? (double)tot / s->op_count : 0);
+    g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
+                           s->code_in_len ? (double)tot / s->code_in_len : 0);
+    g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
+                           s->code_out_len ? (double)tot / s->code_out_len : 0);
+    g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
+                           s->search_out_len ?
+                           (double)tot / s->search_out_len : 0);
     if (tot == 0) {
         tot = 1;
     }
-    qemu_printf("  gen_interm time   %0.1f%%\n",
-                (double)s->interm_time / tot * 100.0);
-    qemu_printf("  gen_code time     %0.1f%%\n",
-                (double)s->code_time / tot * 100.0);
-    qemu_printf("optim./code time    %0.1f%%\n",
-                (double)s->opt_time / (s->code_time ? s->code_time : 1)
-                * 100.0);
-    qemu_printf("liveness/code time  %0.1f%%\n",
-                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
-    qemu_printf("cpu_restore count   %" PRId64 "\n",
-                s->restore_count);
-    qemu_printf("  avg cycles        %0.1f\n",
-                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
+    g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
+                           (double)s->interm_time / tot * 100.0);
+    g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
+                           (double)s->code_time / tot * 100.0);
+    g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
+                           (double)s->opt_time / (s->code_time ?
+                                                  s->code_time : 1)
+                           * 100.0);
+    g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
+                           (double)s->la_time / (s->code_time ?
+                                                 s->code_time : 1) * 100.0);
+    g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
+                           s->restore_count);
+    g_string_append_printf(buf, "  avg cycles        %0.1f\n",
+                           s->restore_count ?
+                           (double)s->restore_time / s->restore_count : 0);
 }
 #else
-void tcg_dump_info(void)
+void tcg_dump_info(GString *buf)
 {
-    qemu_printf("[TCG profiler not compiled]\n");
+    g_string_append_printf(buf, "[TCG profiler not compiled]\n");
 }
 #endif
 
diff --git a/tcg/tci.c b/tcg/tci.c
index d68c5a4e55e..e76087ccaca 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -18,59 +18,32 @@
  */
 
 #include "qemu/osdep.h"
-
-/* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
- * Without assertions, the interpreter runs much faster. */
-#if defined(CONFIG_DEBUG_TCG)
-# define tci_assert(cond) assert(cond)
-#else
-# define tci_assert(cond) ((void)(cond))
-#endif
-
 #include "qemu-common.h"
 #include "tcg/tcg.h"           /* MAX_OPC_PARAM_IARGS */
 #include "exec/cpu_ldst.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-ldst.h"
 #include "qemu/compiler.h"
+#include <ffi.h>
 
-#if MAX_OPC_PARAM_IARGS != 6
-# error Fix needed, number of supported input arguments changed!
-#endif
-#if TCG_TARGET_REG_BITS == 32
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
+
+/*
+ * Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
+ * Without assertions, the interpreter runs much faster.
+ */
+#if defined(CONFIG_DEBUG_TCG)
+# define tci_assert(cond) assert(cond)
 #else
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
+# define tci_assert(cond) ((void)(cond))
 #endif
 
 __thread uintptr_t tci_tb_ptr;
 
-static tcg_target_ulong tci_read_reg(const tcg_target_ulong *regs, TCGReg index)
-{
-    tci_assert(index < TCG_TARGET_NB_REGS);
-    return regs[index];
-}
-
-static void
-tci_write_reg(tcg_target_ulong *regs, TCGReg index, tcg_target_ulong value)
-{
-    tci_assert(index < TCG_TARGET_NB_REGS);
-    tci_assert(index != TCG_AREG0);
-    tci_assert(index != TCG_REG_CALL_STACK);
-    regs[index] = value;
-}
-
 static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
                             uint32_t low_index, uint64_t value)
 {
-    tci_write_reg(regs, low_index, value);
-    tci_write_reg(regs, high_index, value >> 32);
+    regs[low_index] = (uint32_t)value;
+    regs[high_index] = value >> 32;
 }
 
 /* Create a 64 bit value from two 32 bit values. */
@@ -79,49 +52,6 @@ static uint64_t tci_uint64(uint32_t high, uint32_t low)
     return ((uint64_t)high << 32) + low;
 }
 
-/* Read constant byte from bytecode. */
-static uint8_t tci_read_b(const uint8_t **tb_ptr)
-{
-    return *(tb_ptr[0]++);
-}
-
-/* Read register number from bytecode. */
-static TCGReg tci_read_r(const uint8_t **tb_ptr)
-{
-    uint8_t regno = tci_read_b(tb_ptr);
-    tci_assert(regno < TCG_TARGET_NB_REGS);
-    return regno;
-}
-
-/* Read constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_i(const uint8_t **tb_ptr)
-{
-    tcg_target_ulong value = *(const tcg_target_ulong *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-/* Read unsigned constant (32 bit) from bytecode. */
-static uint32_t tci_read_i32(const uint8_t **tb_ptr)
-{
-    uint32_t value = *(const uint32_t *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-/* Read signed constant (32 bit) from bytecode. */
-static int32_t tci_read_s32(const uint8_t **tb_ptr)
-{
-    int32_t value = *(const int32_t *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
-{
-    return tci_read_i(tb_ptr);
-}
-
 /*
  * Load sets of arguments all at once.  The naming convention is:
  *   tci_args_<arguments>
@@ -132,224 +62,148 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr)
  *   i = immediate (uint32_t)
  *   I = immediate (tcg_target_ulong)
  *   l = label or pointer
- *   m = immediate (TCGMemOpIdx)
+ *   m = immediate (MemOpIdx)
+ *   n = immediate (call return length)
  *   r = register
  *   s = signed ldst offset
  */
 
-static void check_size(const uint8_t *start, const uint8_t **tb_ptr)
+static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0)
 {
-    const uint8_t *old_code_ptr = start - 2;
-    uint8_t op_size = old_code_ptr[1];
-    tci_assert(*tb_ptr == old_code_ptr + op_size);
+    int diff = sextract32(insn, 12, 20);
+    *l0 = diff ? (void *)tb_ptr + diff : NULL;
 }
 
-static void tci_args_l(const uint8_t **tb_ptr, void **l0)
+static void tci_args_r(uint32_t insn, TCGReg *r0)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *l0 = (void *)tci_read_label(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
 }
 
-static void tci_args_rr(const uint8_t **tb_ptr,
-                        TCGReg *r0, TCGReg *r1)
+static void tci_args_nl(uint32_t insn, const void *tb_ptr,
+                        uint8_t *n0, void **l1)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *n0 = extract32(insn, 8, 4);
+    *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
 }
 
-static void tci_args_ri(const uint8_t **tb_ptr,
-                        TCGReg *r0, tcg_target_ulong *i1)
+static void tci_args_rl(uint32_t insn, const void *tb_ptr,
+                        TCGReg *r0, void **l1)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *i1 = tci_read_i32(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
 }
 
-#if TCG_TARGET_REG_BITS == 64
-static void tci_args_rI(const uint8_t **tb_ptr,
-                        TCGReg *r0, tcg_target_ulong *i1)
+static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *i1 = tci_read_i(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
 }
-#endif
 
-static void tci_args_rrm(const uint8_t **tb_ptr,
-                         TCGReg *r0, TCGReg *r1, TCGMemOpIdx *m2)
+static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *m2 = tci_read_i32(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *i1 = sextract32(insn, 12, 20);
 }
 
-static void tci_args_rrr(const uint8_t **tb_ptr,
-                         TCGReg *r0, TCGReg *r1, TCGReg *r2)
+static void tci_args_rrm(uint32_t insn, TCGReg *r0,
+                         TCGReg *r1, MemOpIdx *m2)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *m2 = extract32(insn, 20, 12);
 }
 
-static void tci_args_rrs(const uint8_t **tb_ptr,
-                         TCGReg *r0, TCGReg *r1, int32_t *i2)
+static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *i2 = tci_read_s32(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
 }
 
-static void tci_args_rrcl(const uint8_t **tb_ptr,
-                          TCGReg *r0, TCGReg *r1, TCGCond *c2, void **l3)
+static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *c2 = tci_read_b(tb_ptr);
-    *l3 = (void *)tci_read_label(tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *i2 = sextract32(insn, 16, 16);
+}
 
-    check_size(start, tb_ptr);
+static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
+                          uint8_t *i2, uint8_t *i3)
+{
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *i2 = extract32(insn, 16, 6);
+    *i3 = extract32(insn, 22, 6);
 }
 
-static void tci_args_rrrc(const uint8_t **tb_ptr,
+static void tci_args_rrrc(uint32_t insn,
                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *c3 = tci_read_b(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *c3 = extract32(insn, 20, 4);
 }
 
-static void tci_args_rrrm(const uint8_t **tb_ptr,
-                          TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGMemOpIdx *m3)
+static void tci_args_rrrm(uint32_t insn,
+                          TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *m3 = tci_read_i32(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *m3 = extract32(insn, 20, 12);
 }
 
-static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
+static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
                            TCGReg *r2, uint8_t *i3, uint8_t *i4)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *i3 = tci_read_b(tb_ptr);
-    *i4 = tci_read_b(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *i3 = extract32(insn, 20, 6);
+    *i4 = extract32(insn, 26, 6);
 }
 
-static void tci_args_rrrrm(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
-                           TCGReg *r2, TCGReg *r3, TCGMemOpIdx *m4)
+static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
+                           TCGReg *r2, TCGReg *r3, TCGReg *r4)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *r3 = tci_read_r(tb_ptr);
-    *m4 = tci_read_i32(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *r3 = extract32(insn, 20, 4);
+    *r4 = extract32(insn, 24, 4);
 }
 
-#if TCG_TARGET_REG_BITS == 32
-static void tci_args_rrrr(const uint8_t **tb_ptr,
+static void tci_args_rrrr(uint32_t insn,
                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *r3 = tci_read_r(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *r3 = extract32(insn, 20, 4);
 }
 
-static void tci_args_rrrrcl(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
-                            TCGReg *r2, TCGReg *r3, TCGCond *c4, void **l5)
-{
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *r3 = tci_read_r(tb_ptr);
-    *c4 = tci_read_b(tb_ptr);
-    *l5 = (void *)tci_read_label(tb_ptr);
-
-    check_size(start, tb_ptr);
-}
-
-static void tci_args_rrrrrc(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
+static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
                             TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *r3 = tci_read_r(tb_ptr);
-    *r4 = tci_read_r(tb_ptr);
-    *c5 = tci_read_b(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *r3 = extract32(insn, 20, 4);
+    *r4 = extract32(insn, 24, 4);
+    *c5 = extract32(insn, 28, 4);
 }
 
-static void tci_args_rrrrrr(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1,
+static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
                             TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
 {
-    const uint8_t *start = *tb_ptr;
-
-    *r0 = tci_read_r(tb_ptr);
-    *r1 = tci_read_r(tb_ptr);
-    *r2 = tci_read_r(tb_ptr);
-    *r3 = tci_read_r(tb_ptr);
-    *r4 = tci_read_r(tb_ptr);
-    *r5 = tci_read_r(tb_ptr);
-
-    check_size(start, tb_ptr);
+    *r0 = extract32(insn, 8, 4);
+    *r1 = extract32(insn, 12, 4);
+    *r2 = extract32(insn, 16, 4);
+    *r3 = extract32(insn, 20, 4);
+    *r4 = extract32(insn, 24, 4);
+    *r5 = extract32(insn, 28, 4);
 }
-#endif
 
 static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
 {
@@ -435,34 +289,155 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
     return result;
 }
 
-#define qemu_ld_ub \
-    cpu_ldub_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_leuw \
-    cpu_lduw_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_leul \
-    cpu_ldl_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_leq \
-    cpu_ldq_le_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_beuw \
-    cpu_lduw_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_beul \
-    cpu_ldl_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_ld_beq \
-    cpu_ldq_be_mmuidx_ra(env, taddr, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_b(X) \
-    cpu_stb_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_lew(X) \
-    cpu_stw_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_lel(X) \
-    cpu_stl_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_leq(X) \
-    cpu_stq_le_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_bew(X) \
-    cpu_stw_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_bel(X) \
-    cpu_stl_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
-#define qemu_st_beq(X) \
-    cpu_stq_be_mmuidx_ra(env, taddr, X, get_mmuidx(oi), (uintptr_t)tb_ptr)
+static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
+                            MemOpIdx oi, const void *tb_ptr)
+{
+    MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
+    uintptr_t ra = (uintptr_t)tb_ptr;
+
+#ifdef CONFIG_SOFTMMU
+    switch (mop) {
+    case MO_UB:
+        return helper_ret_ldub_mmu(env, taddr, oi, ra);
+    case MO_SB:
+        return helper_ret_ldsb_mmu(env, taddr, oi, ra);
+    case MO_LEUW:
+        return helper_le_lduw_mmu(env, taddr, oi, ra);
+    case MO_LESW:
+        return helper_le_ldsw_mmu(env, taddr, oi, ra);
+    case MO_LEUL:
+        return helper_le_ldul_mmu(env, taddr, oi, ra);
+    case MO_LESL:
+        return helper_le_ldsl_mmu(env, taddr, oi, ra);
+    case MO_LEQ:
+        return helper_le_ldq_mmu(env, taddr, oi, ra);
+    case MO_BEUW:
+        return helper_be_lduw_mmu(env, taddr, oi, ra);
+    case MO_BESW:
+        return helper_be_ldsw_mmu(env, taddr, oi, ra);
+    case MO_BEUL:
+        return helper_be_ldul_mmu(env, taddr, oi, ra);
+    case MO_BESL:
+        return helper_be_ldsl_mmu(env, taddr, oi, ra);
+    case MO_BEQ:
+        return helper_be_ldq_mmu(env, taddr, oi, ra);
+    default:
+        g_assert_not_reached();
+    }
+#else
+    void *haddr = g2h(env_cpu(env), taddr);
+    uint64_t ret;
+
+    set_helper_retaddr(ra);
+    switch (mop) {
+    case MO_UB:
+        ret = ldub_p(haddr);
+        break;
+    case MO_SB:
+        ret = ldsb_p(haddr);
+        break;
+    case MO_LEUW:
+        ret = lduw_le_p(haddr);
+        break;
+    case MO_LESW:
+        ret = ldsw_le_p(haddr);
+        break;
+    case MO_LEUL:
+        ret = (uint32_t)ldl_le_p(haddr);
+        break;
+    case MO_LESL:
+        ret = (int32_t)ldl_le_p(haddr);
+        break;
+    case MO_LEQ:
+        ret = ldq_le_p(haddr);
+        break;
+    case MO_BEUW:
+        ret = lduw_be_p(haddr);
+        break;
+    case MO_BESW:
+        ret = ldsw_be_p(haddr);
+        break;
+    case MO_BEUL:
+        ret = (uint32_t)ldl_be_p(haddr);
+        break;
+    case MO_BESL:
+        ret = (int32_t)ldl_be_p(haddr);
+        break;
+    case MO_BEQ:
+        ret = ldq_be_p(haddr);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    clear_helper_retaddr();
+    return ret;
+#endif
+}
+
+static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
+                        MemOpIdx oi, const void *tb_ptr)
+{
+    MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
+    uintptr_t ra = (uintptr_t)tb_ptr;
+
+#ifdef CONFIG_SOFTMMU
+    switch (mop) {
+    case MO_UB:
+        helper_ret_stb_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_LEUW:
+        helper_le_stw_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_LEUL:
+        helper_le_stl_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_LEQ:
+        helper_le_stq_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_BEUW:
+        helper_be_stw_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_BEUL:
+        helper_be_stl_mmu(env, taddr, val, oi, ra);
+        break;
+    case MO_BEQ:
+        helper_be_stq_mmu(env, taddr, val, oi, ra);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+#else
+    void *haddr = g2h(env_cpu(env), taddr);
+
+    set_helper_retaddr(ra);
+    switch (mop) {
+    case MO_UB:
+        stb_p(haddr, val);
+        break;
+    case MO_LEUW:
+        stw_le_p(haddr, val);
+        break;
+    case MO_LEUL:
+        stl_le_p(haddr, val);
+        break;
+    case MO_LEQ:
+        stq_le_p(haddr, val);
+        break;
+    case MO_BEUW:
+        stw_be_p(haddr, val);
+        break;
+    case MO_BEUL:
+        stl_be_p(haddr, val);
+        break;
+    case MO_BEQ:
+        stq_be_p(haddr, val);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    clear_helper_retaddr();
+#endif
+}
 
 #if TCG_TARGET_REG_BITS == 64
 # define CASE_32_64(x) \
@@ -485,135 +460,171 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
 uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
                                             const void *v_tb_ptr)
 {
-    const uint8_t *tb_ptr = v_tb_ptr;
+    const uint32_t *tb_ptr = v_tb_ptr;
     tcg_target_ulong regs[TCG_TARGET_NB_REGS];
-    long tcg_temps[CPU_TEMP_BUF_NLONGS];
-    uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
+    uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
+                   / sizeof(uint64_t)];
+    void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
 
     regs[TCG_AREG0] = (tcg_target_ulong)env;
-    regs[TCG_REG_CALL_STACK] = sp_value;
+    regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
+    /* Other call_slots entries initialized at first use (see below). */
+    call_slots[0] = NULL;
     tci_assert(tb_ptr);
 
     for (;;) {
-        TCGOpcode opc = tb_ptr[0];
-        TCGReg r0, r1, r2, r3;
+        uint32_t insn;
+        TCGOpcode opc;
+        TCGReg r0, r1, r2, r3, r4, r5;
         tcg_target_ulong t1;
         TCGCond condition;
         target_ulong taddr;
         uint8_t pos, len;
         uint32_t tmp32;
         uint64_t tmp64;
-#if TCG_TARGET_REG_BITS == 32
-        TCGReg r4, r5;
         uint64_t T1, T2;
-#endif
-        TCGMemOpIdx oi;
+        MemOpIdx oi;
         int32_t ofs;
         void *ptr;
 
-        /* Skip opcode and size entry. */
-        tb_ptr += 2;
+        insn = *tb_ptr++;
+        opc = extract32(insn, 0, 8);
 
         switch (opc) {
         case INDEX_op_call:
-            tci_args_l(&tb_ptr, &ptr);
+            /*
+             * Set up the ffi_avalue array once, delayed until now
+             * because many TB's do not make any calls. In tcg_gen_callN,
+             * we arranged for every real argument to be "left-aligned"
+             * in each 64-bit slot.
+             */
+            if (unlikely(call_slots[0] == NULL)) {
+                for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
+                    call_slots[i] = &stack[i];
+                }
+            }
+
+            tci_args_nl(insn, tb_ptr, &len, &ptr);
+
+            /* Helper functions may need to access the "return address" */
             tci_tb_ptr = (uintptr_t)tb_ptr;
-#if TCG_TARGET_REG_BITS == 32
-            tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
-                                           tci_read_reg(regs, TCG_REG_R1),
-                                           tci_read_reg(regs, TCG_REG_R2),
-                                           tci_read_reg(regs, TCG_REG_R3),
-                                           tci_read_reg(regs, TCG_REG_R4),
-                                           tci_read_reg(regs, TCG_REG_R5),
-                                           tci_read_reg(regs, TCG_REG_R6),
-                                           tci_read_reg(regs, TCG_REG_R7),
-                                           tci_read_reg(regs, TCG_REG_R8),
-                                           tci_read_reg(regs, TCG_REG_R9),
-                                           tci_read_reg(regs, TCG_REG_R10),
-                                           tci_read_reg(regs, TCG_REG_R11));
-            tci_write_reg(regs, TCG_REG_R0, tmp64);
-            tci_write_reg(regs, TCG_REG_R1, tmp64 >> 32);
-#else
-            tmp64 = ((helper_function)ptr)(tci_read_reg(regs, TCG_REG_R0),
-                                           tci_read_reg(regs, TCG_REG_R1),
-                                           tci_read_reg(regs, TCG_REG_R2),
-                                           tci_read_reg(regs, TCG_REG_R3),
-                                           tci_read_reg(regs, TCG_REG_R4),
-                                           tci_read_reg(regs, TCG_REG_R5));
-            tci_write_reg(regs, TCG_REG_R0, tmp64);
-#endif
+
+            {
+                void **pptr = ptr;
+                ffi_call(pptr[1], pptr[0], stack, call_slots);
+            }
+
+            /* Any result winds up "left-aligned" in the stack[0] slot. */
+            switch (len) {
+            case 0: /* void */
+                break;
+            case 1: /* uint32_t */
+                /*
+                 * Note that libffi has an odd special case in that it will
+                 * always widen an integral result to ffi_arg.
+                 */
+                if (sizeof(ffi_arg) == 4) {
+                    regs[TCG_REG_R0] = *(uint32_t *)stack;
+                    break;
+                }
+                /* fall through */
+            case 2: /* uint64_t */
+                if (TCG_TARGET_REG_BITS == 32) {
+                    tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
+                } else {
+                    regs[TCG_REG_R0] = stack[0];
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
             break;
+
         case INDEX_op_br:
-            tci_args_l(&tb_ptr, &ptr);
+            tci_args_l(insn, tb_ptr, &ptr);
             tb_ptr = ptr;
             continue;
         case INDEX_op_setcond_i32:
-            tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition);
+            tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
             regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
             break;
+        case INDEX_op_movcond_i32:
+            tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+            tmp32 = tci_compare32(regs[r1], regs[r2], condition);
+            regs[r0] = regs[tmp32 ? r3 : r4];
+            break;
 #if TCG_TARGET_REG_BITS == 32
         case INDEX_op_setcond2_i32:
-            tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &condition);
+            tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
             T1 = tci_uint64(regs[r2], regs[r1]);
             T2 = tci_uint64(regs[r4], regs[r3]);
             regs[r0] = tci_compare64(T1, T2, condition);
             break;
 #elif TCG_TARGET_REG_BITS == 64
         case INDEX_op_setcond_i64:
-            tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &condition);
+            tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
             regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
             break;
+        case INDEX_op_movcond_i64:
+            tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+            tmp32 = tci_compare64(regs[r1], regs[r2], condition);
+            regs[r0] = regs[tmp32 ? r3 : r4];
+            break;
 #endif
         CASE_32_64(mov)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = regs[r1];
             break;
-        case INDEX_op_tci_movi_i32:
-            tci_args_ri(&tb_ptr, &r0, &t1);
+        case INDEX_op_tci_movi:
+            tci_args_ri(insn, &r0, &t1);
             regs[r0] = t1;
             break;
+        case INDEX_op_tci_movl:
+            tci_args_rl(insn, tb_ptr, &r0, &ptr);
+            regs[r0] = *(tcg_target_ulong *)ptr;
+            break;
 
             /* Load/store operations (32 bit). */
 
         CASE_32_64(ld8u)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(uint8_t *)ptr;
             break;
         CASE_32_64(ld8s)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(int8_t *)ptr;
             break;
         CASE_32_64(ld16u)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(uint16_t *)ptr;
             break;
         CASE_32_64(ld16s)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(int16_t *)ptr;
             break;
         case INDEX_op_ld_i32:
         CASE_64(ld32u)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(uint32_t *)ptr;
             break;
         CASE_32_64(st8)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             *(uint8_t *)ptr = regs[r0];
             break;
         CASE_32_64(st16)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             *(uint16_t *)ptr = regs[r0];
             break;
         case INDEX_op_st_i32:
         CASE_64(st32)
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             *(uint32_t *)ptr = regs[r0];
             break;
@@ -621,180 +632,241 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             /* Arithmetic operations (mixed 32/64 bit). */
 
         CASE_32_64(add)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] + regs[r2];
             break;
         CASE_32_64(sub)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] - regs[r2];
             break;
         CASE_32_64(mul)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] * regs[r2];
             break;
         CASE_32_64(and)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] & regs[r2];
             break;
         CASE_32_64(or)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] | regs[r2];
             break;
         CASE_32_64(xor)
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] ^ regs[r2];
             break;
+#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64
+        CASE_32_64(andc)
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = regs[r1] & ~regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64
+        CASE_32_64(orc)
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = regs[r1] | ~regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64
+        CASE_32_64(eqv)
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = ~(regs[r1] ^ regs[r2]);
+            break;
+#endif
+#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64
+        CASE_32_64(nand)
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = ~(regs[r1] & regs[r2]);
+            break;
+#endif
+#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64
+        CASE_32_64(nor)
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = ~(regs[r1] | regs[r2]);
+            break;
+#endif
 
             /* Arithmetic operations (32 bit). */
 
         case INDEX_op_div_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2];
             break;
         case INDEX_op_divu_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2];
             break;
         case INDEX_op_rem_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2];
             break;
         case INDEX_op_remu_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2];
             break;
+#if TCG_TARGET_HAS_clz_i32
+        case INDEX_op_clz_i32:
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            tmp32 = regs[r1];
+            regs[r0] = tmp32 ? clz32(tmp32) : regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_ctz_i32
+        case INDEX_op_ctz_i32:
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            tmp32 = regs[r1];
+            regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_ctpop_i32
+        case INDEX_op_ctpop_i32:
+            tci_args_rr(insn, &r0, &r1);
+            regs[r0] = ctpop32(regs[r1]);
+            break;
+#endif
 
             /* Shift/rotate operations (32 bit). */
 
         case INDEX_op_shl_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31);
             break;
         case INDEX_op_shr_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31);
             break;
         case INDEX_op_sar_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31);
             break;
 #if TCG_TARGET_HAS_rot_i32
         case INDEX_op_rotl_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = rol32(regs[r1], regs[r2] & 31);
             break;
         case INDEX_op_rotr_i32:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = ror32(regs[r1], regs[r2] & 31);
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i32
         case INDEX_op_deposit_i32:
-            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
+            tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
             break;
+#endif
+#if TCG_TARGET_HAS_extract_i32
+        case INDEX_op_extract_i32:
+            tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+            regs[r0] = extract32(regs[r1], pos, len);
+            break;
+#endif
+#if TCG_TARGET_HAS_sextract_i32
+        case INDEX_op_sextract_i32:
+            tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+            regs[r0] = sextract32(regs[r1], pos, len);
+            break;
 #endif
         case INDEX_op_brcond_i32:
-            tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr);
-            if (tci_compare32(regs[r0], regs[r1], condition)) {
+            tci_args_rl(insn, tb_ptr, &r0, &ptr);
+            if ((uint32_t)regs[r0]) {
                 tb_ptr = ptr;
             }
             break;
-#if TCG_TARGET_REG_BITS == 32
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
         case INDEX_op_add2_i32:
-            tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
+            tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
             T1 = tci_uint64(regs[r3], regs[r2]);
             T2 = tci_uint64(regs[r5], regs[r4]);
             tci_write_reg64(regs, r1, r0, T1 + T2);
             break;
+#endif
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
         case INDEX_op_sub2_i32:
-            tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
+            tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
             T1 = tci_uint64(regs[r3], regs[r2]);
             T2 = tci_uint64(regs[r5], regs[r4]);
             tci_write_reg64(regs, r1, r0, T1 - T2);
             break;
-        case INDEX_op_brcond2_i32:
-            tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &condition, &ptr);
-            T1 = tci_uint64(regs[r1], regs[r0]);
-            T2 = tci_uint64(regs[r3], regs[r2]);
-            if (tci_compare64(T1, T2, condition)) {
-                tb_ptr = ptr;
-                continue;
-            }
-            break;
+#endif
+#if TCG_TARGET_HAS_mulu2_i32
         case INDEX_op_mulu2_i32:
-            tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
-            tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]);
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
+            tci_write_reg64(regs, r1, r0, tmp64);
             break;
-#endif /* TCG_TARGET_REG_BITS == 32 */
+#endif
+#if TCG_TARGET_HAS_muls2_i32
+        case INDEX_op_muls2_i32:
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
+            tci_write_reg64(regs, r1, r0, tmp64);
+            break;
+#endif
 #if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
         CASE_32_64(ext8s)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (int8_t)regs[r1];
             break;
 #endif
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \
+    TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
         CASE_32_64(ext16s)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (int16_t)regs[r1];
             break;
 #endif
 #if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
         CASE_32_64(ext8u)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (uint8_t)regs[r1];
             break;
 #endif
 #if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
         CASE_32_64(ext16u)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (uint16_t)regs[r1];
             break;
 #endif
 #if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
         CASE_32_64(bswap16)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = bswap16(regs[r1]);
             break;
 #endif
 #if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
         CASE_32_64(bswap32)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = bswap32(regs[r1]);
             break;
 #endif
 #if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
         CASE_32_64(not)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = ~regs[r1];
             break;
 #endif
 #if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
         CASE_32_64(neg)
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = -regs[r1];
             break;
 #endif
 #if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_tci_movi_i64:
-            tci_args_rI(&tb_ptr, &r0, &t1);
-            regs[r0] = t1;
-            break;
-
             /* Load/store operations (64 bit). */
 
         case INDEX_op_ld32s_i64:
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(int32_t *)ptr;
             break;
         case INDEX_op_ld_i64:
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             regs[r0] = *(uint64_t *)ptr;
             break;
         case INDEX_op_st_i64:
-            tci_args_rrs(&tb_ptr, &r0, &r1, &ofs);
+            tci_args_rrs(insn, &r0, &r1, &ofs);
             ptr = (void *)(regs[r1] + ofs);
             *(uint64_t *)ptr = regs[r0];
             break;
@@ -802,71 +874,131 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             /* Arithmetic operations (64 bit). */
 
         case INDEX_op_div_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2];
             break;
         case INDEX_op_divu_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2];
             break;
         case INDEX_op_rem_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2];
             break;
         case INDEX_op_remu_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2];
             break;
+#if TCG_TARGET_HAS_clz_i64
+        case INDEX_op_clz_i64:
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_ctz_i64
+        case INDEX_op_ctz_i64:
+            tci_args_rrr(insn, &r0, &r1, &r2);
+            regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
+            break;
+#endif
+#if TCG_TARGET_HAS_ctpop_i64
+        case INDEX_op_ctpop_i64:
+            tci_args_rr(insn, &r0, &r1);
+            regs[r0] = ctpop64(regs[r1]);
+            break;
+#endif
+#if TCG_TARGET_HAS_mulu2_i64
+        case INDEX_op_mulu2_i64:
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            mulu64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
+            break;
+#endif
+#if TCG_TARGET_HAS_muls2_i64
+        case INDEX_op_muls2_i64:
+            tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+            muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
+            break;
+#endif
+#if TCG_TARGET_HAS_add2_i64
+        case INDEX_op_add2_i64:
+            tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+            T1 = regs[r2] + regs[r4];
+            T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
+            regs[r0] = T1;
+            regs[r1] = T2;
+            break;
+#endif
+#if TCG_TARGET_HAS_add2_i64
+        case INDEX_op_sub2_i64:
+            tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+            T1 = regs[r2] - regs[r4];
+            T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
+            regs[r0] = T1;
+            regs[r1] = T2;
+            break;
+#endif
 
             /* Shift/rotate operations (64 bit). */
 
         case INDEX_op_shl_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] << (regs[r2] & 63);
             break;
         case INDEX_op_shr_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = regs[r1] >> (regs[r2] & 63);
             break;
         case INDEX_op_sar_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63);
             break;
 #if TCG_TARGET_HAS_rot_i64
         case INDEX_op_rotl_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = rol64(regs[r1], regs[r2] & 63);
             break;
         case INDEX_op_rotr_i64:
-            tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+            tci_args_rrr(insn, &r0, &r1, &r2);
             regs[r0] = ror64(regs[r1], regs[r2] & 63);
             break;
 #endif
 #if TCG_TARGET_HAS_deposit_i64
         case INDEX_op_deposit_i64:
-            tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
+            tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
             break;
+#endif
+#if TCG_TARGET_HAS_extract_i64
+        case INDEX_op_extract_i64:
+            tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+            regs[r0] = extract64(regs[r1], pos, len);
+            break;
+#endif
+#if TCG_TARGET_HAS_sextract_i64
+        case INDEX_op_sextract_i64:
+            tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+            regs[r0] = sextract64(regs[r1], pos, len);
+            break;
 #endif
         case INDEX_op_brcond_i64:
-            tci_args_rrcl(&tb_ptr, &r0, &r1, &condition, &ptr);
-            if (tci_compare64(regs[r0], regs[r1], condition)) {
+            tci_args_rl(insn, tb_ptr, &r0, &ptr);
+            if (regs[r0]) {
                 tb_ptr = ptr;
             }
             break;
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext_i32_i64:
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (int32_t)regs[r1];
             break;
         case INDEX_op_ext32u_i64:
         case INDEX_op_extu_i32_i64:
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = (uint32_t)regs[r1];
             break;
 #if TCG_TARGET_HAS_bswap64_i64
         case INDEX_op_bswap64_i64:
-            tci_args_rr(&tb_ptr, &r0, &r1);
+            tci_args_rr(insn, &r0, &r1);
             regs[r0] = bswap64(regs[r1]);
             break;
 #endif
@@ -875,104 +1007,48 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
             /* QEMU specific operations. */
 
         case INDEX_op_exit_tb:
-            tci_args_l(&tb_ptr, &ptr);
+            tci_args_l(insn, tb_ptr, &ptr);
             return (uintptr_t)ptr;
 
         case INDEX_op_goto_tb:
-            tci_args_l(&tb_ptr, &ptr);
+            tci_args_l(insn, tb_ptr, &ptr);
             tb_ptr = *(void **)ptr;
             break;
 
+        case INDEX_op_goto_ptr:
+            tci_args_r(insn, &r0);
+            ptr = (void *)regs[r0];
+            if (!ptr) {
+                return 0;
+            }
+            tb_ptr = ptr;
+            break;
+
         case INDEX_op_qemu_ld_i32:
             if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
-                tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
+                tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
+                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
                 taddr = tci_uint64(regs[r2], regs[r1]);
             }
-            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
-            case MO_UB:
-                tmp32 = qemu_ld_ub;
-                break;
-            case MO_SB:
-                tmp32 = (int8_t)qemu_ld_ub;
-                break;
-            case MO_LEUW:
-                tmp32 = qemu_ld_leuw;
-                break;
-            case MO_LESW:
-                tmp32 = (int16_t)qemu_ld_leuw;
-                break;
-            case MO_LEUL:
-                tmp32 = qemu_ld_leul;
-                break;
-            case MO_BEUW:
-                tmp32 = qemu_ld_beuw;
-                break;
-            case MO_BESW:
-                tmp32 = (int16_t)qemu_ld_beuw;
-                break;
-            case MO_BEUL:
-                tmp32 = qemu_ld_beul;
-                break;
-            default:
-                g_assert_not_reached();
-            }
+            tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
             regs[r0] = tmp32;
             break;
 
         case INDEX_op_qemu_ld_i64:
             if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
+                tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
-                tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
+                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
                 taddr = regs[r2];
             } else {
-                tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
+                tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
                 taddr = tci_uint64(regs[r3], regs[r2]);
+                oi = regs[r4];
             }
-            switch (get_memop(oi) & (MO_BSWAP | MO_SSIZE)) {
-            case MO_UB:
-                tmp64 = qemu_ld_ub;
-                break;
-            case MO_SB:
-                tmp64 = (int8_t)qemu_ld_ub;
-                break;
-            case MO_LEUW:
-                tmp64 = qemu_ld_leuw;
-                break;
-            case MO_LESW:
-                tmp64 = (int16_t)qemu_ld_leuw;
-                break;
-            case MO_LEUL:
-                tmp64 = qemu_ld_leul;
-                break;
-            case MO_LESL:
-                tmp64 = (int32_t)qemu_ld_leul;
-                break;
-            case MO_LEQ:
-                tmp64 = qemu_ld_leq;
-                break;
-            case MO_BEUW:
-                tmp64 = qemu_ld_beuw;
-                break;
-            case MO_BESW:
-                tmp64 = (int16_t)qemu_ld_beuw;
-                break;
-            case MO_BEUL:
-                tmp64 = qemu_ld_beul;
-                break;
-            case MO_BESL:
-                tmp64 = (int32_t)qemu_ld_beul;
-                break;
-            case MO_BEQ:
-                tmp64 = qemu_ld_beq;
-                break;
-            default:
-                g_assert_not_reached();
-            }
+            tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
             if (TCG_TARGET_REG_BITS == 32) {
                 tci_write_reg64(regs, r1, r0, tmp64);
             } else {
@@ -982,74 +1058,33 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
 
         case INDEX_op_qemu_st_i32:
             if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
-                tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
+                tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
             } else {
-                tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
+                tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
                 taddr = tci_uint64(regs[r2], regs[r1]);
             }
             tmp32 = regs[r0];
-            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
-            case MO_UB:
-                qemu_st_b(tmp32);
-                break;
-            case MO_LEUW:
-                qemu_st_lew(tmp32);
-                break;
-            case MO_LEUL:
-                qemu_st_lel(tmp32);
-                break;
-            case MO_BEUW:
-                qemu_st_bew(tmp32);
-                break;
-            case MO_BEUL:
-                qemu_st_bel(tmp32);
-                break;
-            default:
-                g_assert_not_reached();
-            }
+            tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
             break;
 
         case INDEX_op_qemu_st_i64:
             if (TCG_TARGET_REG_BITS == 64) {
-                tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
+                tci_args_rrm(insn, &r0, &r1, &oi);
                 taddr = regs[r1];
                 tmp64 = regs[r0];
             } else {
                 if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
-                    tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
+                    tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
                     taddr = regs[r2];
                 } else {
-                    tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
+                    tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
                     taddr = tci_uint64(regs[r3], regs[r2]);
+                    oi = regs[r4];
                 }
                 tmp64 = tci_uint64(regs[r1], regs[r0]);
             }
-            switch (get_memop(oi) & (MO_BSWAP | MO_SIZE)) {
-            case MO_UB:
-                qemu_st_b(tmp64);
-                break;
-            case MO_LEUW:
-                qemu_st_lew(tmp64);
-                break;
-            case MO_LEUL:
-                qemu_st_lel(tmp64);
-                break;
-            case MO_LEQ:
-                qemu_st_leq(tmp64);
-                break;
-            case MO_BEUW:
-                qemu_st_bew(tmp64);
-                break;
-            case MO_BEUL:
-                qemu_st_bel(tmp64);
-                break;
-            case MO_BEQ:
-                qemu_st_beq(tmp64);
-                break;
-            default:
-                g_assert_not_reached();
-            }
+            tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
             break;
 
         case INDEX_op_mb:
@@ -1105,82 +1140,71 @@ static const char *str_c(TCGCond c)
 /* Disassemble TCI bytecode. */
 int print_insn_tci(bfd_vma addr, disassemble_info *info)
 {
-    uint8_t buf[256];
-    int length, status;
+    const uint32_t *tb_ptr = (const void *)(uintptr_t)addr;
     const TCGOpDef *def;
     const char *op_name;
+    uint32_t insn;
     TCGOpcode op;
-    TCGReg r0, r1, r2, r3;
-#if TCG_TARGET_REG_BITS == 32
-    TCGReg r4, r5;
-#endif
+    TCGReg r0, r1, r2, r3, r4, r5;
     tcg_target_ulong i1;
     int32_t s2;
     TCGCond c;
-    TCGMemOpIdx oi;
+    MemOpIdx oi;
     uint8_t pos, len;
     void *ptr;
-    const uint8_t *tb_ptr;
-
-    status = info->read_memory_func(addr, buf, 2, info);
-    if (status != 0) {
-        info->memory_error_func(status, addr, info);
-        return -1;
-    }
-    op = buf[0];
-    length = buf[1];
 
-    if (length < 2) {
-        info->fprintf_func(info->stream, "invalid length %d", length);
-        return 1;
-    }
+    /* TCI is always the host, so we don't need to load indirect. */
+    insn = *tb_ptr++;
 
-    status = info->read_memory_func(addr + 2, buf + 2, length - 2, info);
-    if (status != 0) {
-        info->memory_error_func(status, addr + 2, info);
-        return -1;
-    }
+    info->fprintf_func(info->stream, "%08x  ", insn);
 
+    op = extract32(insn, 0, 8);
     def = &tcg_op_defs[op];
     op_name = def->name;
-    tb_ptr = buf + 2;
 
     switch (op) {
     case INDEX_op_br:
-    case INDEX_op_call:
     case INDEX_op_exit_tb:
     case INDEX_op_goto_tb:
-        tci_args_l(&tb_ptr, &ptr);
+        tci_args_l(insn, tb_ptr, &ptr);
         info->fprintf_func(info->stream, "%-12s  %p", op_name, ptr);
         break;
 
+    case INDEX_op_goto_ptr:
+        tci_args_r(insn, &r0);
+        info->fprintf_func(info->stream, "%-12s  %s", op_name, str_r(r0));
+        break;
+
+    case INDEX_op_call:
+        tci_args_nl(insn, tb_ptr, &len, &ptr);
+        info->fprintf_func(info->stream, "%-12s  %d, %p", op_name, len, ptr);
+        break;
+
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
-        tci_args_rrcl(&tb_ptr, &r0, &r1, &c, &ptr);
-        info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %p",
-                           op_name, str_r(r0), str_r(r1), str_c(c), ptr);
+        tci_args_rl(insn, tb_ptr, &r0, &ptr);
+        info->fprintf_func(info->stream, "%-12s  %s, 0, ne, %p",
+                           op_name, str_r(r0), ptr);
         break;
 
     case INDEX_op_setcond_i32:
     case INDEX_op_setcond_i64:
-        tci_args_rrrc(&tb_ptr, &r0, &r1, &r2, &c);
+        tci_args_rrrc(insn, &r0, &r1, &r2, &c);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
                            op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c));
         break;
 
-    case INDEX_op_tci_movi_i32:
-        tci_args_ri(&tb_ptr, &r0, &i1);
+    case INDEX_op_tci_movi:
+        tci_args_ri(insn, &r0, &i1);
         info->fprintf_func(info->stream, "%-12s  %s, 0x%" TCG_PRIlx,
                            op_name, str_r(r0), i1);
         break;
 
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_tci_movi_i64:
-        tci_args_rI(&tb_ptr, &r0, &i1);
-        info->fprintf_func(info->stream, "%-12s  %s, 0x%" TCG_PRIlx,
-                           op_name, str_r(r0), i1);
+    case INDEX_op_tci_movl:
+        tci_args_rl(insn, tb_ptr, &r0, &ptr);
+        info->fprintf_func(info->stream, "%-12s  %s, %p",
+                           op_name, str_r(r0), ptr);
         break;
-#endif
 
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
@@ -1201,7 +1225,7 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
     case INDEX_op_st32_i64:
     case INDEX_op_st_i32:
     case INDEX_op_st_i64:
-        tci_args_rrs(&tb_ptr, &r0, &r1, &s2);
+        tci_args_rrs(insn, &r0, &r1, &s2);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %d",
                            op_name, str_r(r0), str_r(r1), s2);
         break;
@@ -1228,7 +1252,9 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
     case INDEX_op_not_i64:
     case INDEX_op_neg_i32:
     case INDEX_op_neg_i64:
-        tci_args_rr(&tb_ptr, &r0, &r1);
+    case INDEX_op_ctpop_i32:
+    case INDEX_op_ctpop_i64:
+        tci_args_rr(insn, &r0, &r1);
         info->fprintf_func(info->stream, "%-12s  %s, %s",
                            op_name, str_r(r0), str_r(r1));
         break;
@@ -1245,6 +1271,16 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
     case INDEX_op_or_i64:
     case INDEX_op_xor_i32:
     case INDEX_op_xor_i64:
+    case INDEX_op_andc_i32:
+    case INDEX_op_andc_i64:
+    case INDEX_op_orc_i32:
+    case INDEX_op_orc_i64:
+    case INDEX_op_eqv_i32:
+    case INDEX_op_eqv_i64:
+    case INDEX_op_nand_i32:
+    case INDEX_op_nand_i64:
+    case INDEX_op_nor_i32:
+    case INDEX_op_nor_i64:
     case INDEX_op_div_i32:
     case INDEX_op_div_i64:
     case INDEX_op_rem_i32:
@@ -1263,48 +1299,59 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
     case INDEX_op_rotl_i64:
     case INDEX_op_rotr_i32:
     case INDEX_op_rotr_i64:
-        tci_args_rrr(&tb_ptr, &r0, &r1, &r2);
+    case INDEX_op_clz_i32:
+    case INDEX_op_clz_i64:
+    case INDEX_op_ctz_i32:
+    case INDEX_op_ctz_i64:
+        tci_args_rrr(insn, &r0, &r1, &r2);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s",
                            op_name, str_r(r0), str_r(r1), str_r(r2));
         break;
 
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
-        tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len);
+        tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %d, %d",
                            op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
+    case INDEX_op_extract_i32:
+    case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
+        tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+        info->fprintf_func(info->stream, "%-12s  %s,%s,%d,%d",
+                           op_name, str_r(r0), str_r(r1), pos, len);
+        break;
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
     case INDEX_op_setcond2_i32:
-        tci_args_rrrrrc(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &c);
+        tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s, %s",
                            op_name, str_r(r0), str_r(r1), str_r(r2),
                            str_r(r3), str_r(r4), str_c(c));
         break;
 
-    case INDEX_op_brcond2_i32:
-        tci_args_rrrrcl(&tb_ptr, &r0, &r1, &r2, &r3, &c, &ptr);
-        info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s, %p",
-                           op_name, str_r(r0), str_r(r1),
-                           str_r(r2), str_r(r3), str_c(c), ptr);
-        break;
-
     case INDEX_op_mulu2_i32:
-        tci_args_rrrr(&tb_ptr, &r0, &r1, &r2, &r3);
+    case INDEX_op_mulu2_i64:
+    case INDEX_op_muls2_i32:
+    case INDEX_op_muls2_i64:
+        tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
                            op_name, str_r(r0), str_r(r1),
                            str_r(r2), str_r(r3));
         break;
 
     case INDEX_op_add2_i32:
+    case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
-        tci_args_rrrrrr(&tb_ptr, &r0, &r1, &r2, &r3, &r4, &r5);
+    case INDEX_op_sub2_i64:
+        tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s, %s",
                            op_name, str_r(r0), str_r(r1), str_r(r2),
                            str_r(r3), str_r(r4), str_r(r5));
         break;
-#endif
 
     case INDEX_op_qemu_ld_i64:
     case INDEX_op_qemu_st_i64:
@@ -1317,30 +1364,38 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
         len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
         switch (len) {
         case 2:
-            tci_args_rrm(&tb_ptr, &r0, &r1, &oi);
+            tci_args_rrm(insn, &r0, &r1, &oi);
             info->fprintf_func(info->stream, "%-12s  %s, %s, %x",
                                op_name, str_r(r0), str_r(r1), oi);
             break;
         case 3:
-            tci_args_rrrm(&tb_ptr, &r0, &r1, &r2, &oi);
+            tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
             info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %x",
                                op_name, str_r(r0), str_r(r1), str_r(r2), oi);
             break;
         case 4:
-            tci_args_rrrrm(&tb_ptr, &r0, &r1, &r2, &r3, &oi);
-            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %x",
+            tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
+            info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s",
                                op_name, str_r(r0), str_r(r1),
-                               str_r(r2), str_r(r3), oi);
+                               str_r(r2), str_r(r3), str_r(r4));
             break;
         default:
             g_assert_not_reached();
         }
         break;
 
+    case 0:
+        /* tcg_out_nop_fill uses zeros */
+        if (insn == 0) {
+            info->fprintf_func(info->stream, "align");
+            break;
+        }
+        /* fall through */
+
     default:
         info->fprintf_func(info->stream, "illegal opcode %d", op);
         break;
     }
 
-    return length;
+    return sizeof(insn);
 }
diff --git a/tcg/tci/README b/tcg/tci/README
index 9bb7d7a5d39..f72a40a395a 100644
--- a/tcg/tci/README
+++ b/tcg/tci/README
@@ -23,10 +23,12 @@ This is what TCI (Tiny Code Interpreter) does.
 Like each TCG host frontend, TCI implements the code generator in
 tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci.
 
-The additional file tcg/tci.c adds the interpreter.
+The additional file tcg/tci.c adds the interpreter and disassembler.
 
-The bytecode consists of opcodes (same numeric values as those used by
-TCG), command length and arguments of variable size and number.
+The bytecode consists of opcodes (with only a few exceptions, with
+the same same numeric values and semantics as used by TCG), and up
+to six arguments packed into a 32-bit integer.  See comments in tci.c
+for details on the encoding.
 
 3) Usage
 
@@ -39,11 +41,6 @@ suggest using this option. Setting it automatically would need
 additional code in configure which must be fixed when new native TCG
 implementations are added.
 
-System emulation should work on any 32 or 64 bit host.
-User mode emulation might work. Maybe a new linker script (*.ld)
-is needed. Byte order might be wrong (on big endian hosts)
-and need fixes in configure.
-
 For hosts with native TCG, the interpreter TCI can be enabled by
 
         configure --enable-tcg-interpreter
@@ -118,13 +115,6 @@ u1 = linux-user-test works
   in the interpreter. These opcodes raise a runtime exception, so it is
   possible to see where code must be added.
 
-* The pseudo code is not optimized and still ugly. For hosts with special
-  alignment requirements, it needs some fixes (maybe aligned bytecode
-  would also improve speed for hosts which support byte alignment).
-
-* A better disassembler for the pseudo code would be nice (a very primitive
-  disassembler is included in tcg-target.c.inc).
-
 * It might be useful to have a runtime option which selects the native TCG
   or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
   is a configure option, so you need two compilations of QEMU.
diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
index 316730f32cb..ae2dc3b8442 100644
--- a/tcg/tci/tcg-target-con-set.h
+++ b/tcg/tci/tcg-target-con-set.h
@@ -9,6 +9,7 @@
  * Each operand should be a sequence of constraint letters as defined by
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
+C_O0_I1(r)
 C_O0_I2(r, r)
 C_O0_I3(r, r, r)
 C_O0_I4(r, r, r, r)
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index ee6cdfec710..0cb16aaa812 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -22,24 +22,14 @@
  * THE SOFTWARE.
  */
 
-/* TODO list:
- * - See TODO comments in code.
- */
-
-/* Marker for missing code. */
-#define TODO() \
-    do { \
-        fprintf(stderr, "TODO %s:%u: %s()\n", \
-                __FILE__, __LINE__, __func__); \
-        tcg_abort(); \
-    } while (0)
-
-/* Bitfield n...m (in 32 bit value). */
-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
+#include "../tcg-pool.c.inc"
 
 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
     switch (op) {
+    case INDEX_op_goto_ptr:
+        return C_O0_I1(r);
+
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8s_i32:
     case INDEX_op_ld16u_i32:
@@ -73,6 +63,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_bswap32_i32:
     case INDEX_op_bswap32_i64:
     case INDEX_op_bswap64_i64:
+    case INDEX_op_extract_i32:
+    case INDEX_op_extract_i64:
+    case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
+    case INDEX_op_ctpop_i32:
+    case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_st8_i32:
@@ -128,24 +124,37 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_setcond_i64:
     case INDEX_op_deposit_i32:
     case INDEX_op_deposit_i64:
+    case INDEX_op_clz_i32:
+    case INDEX_op_clz_i64:
+    case INDEX_op_ctz_i32:
+    case INDEX_op_ctz_i64:
         return C_O1_I2(r, r, r);
 
     case INDEX_op_brcond_i32:
     case INDEX_op_brcond_i64:
         return C_O0_I2(r, r);
 
-#if TCG_TARGET_REG_BITS == 32
-    /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
     case INDEX_op_add2_i32:
+    case INDEX_op_add2_i64:
     case INDEX_op_sub2_i32:
+    case INDEX_op_sub2_i64:
         return C_O2_I4(r, r, r, r, r, r);
+
+#if TCG_TARGET_REG_BITS == 32
     case INDEX_op_brcond2_i32:
         return C_O0_I4(r, r, r, r);
+#endif
+
     case INDEX_op_mulu2_i32:
+    case INDEX_op_mulu2_i64:
+    case INDEX_op_muls2_i32:
+    case INDEX_op_muls2_i64:
         return C_O2_I2(r, r, r, r);
+
+    case INDEX_op_movcond_i32:
+    case INDEX_op_movcond_i64:
     case INDEX_op_setcond2_i32:
         return C_O1_I4(r, r, r, r, r);
-#endif
 
     case INDEX_op_qemu_ld_i32:
         return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
@@ -170,8 +179,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 }
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R0,
-    TCG_REG_R1,
     TCG_REG_R2,
     TCG_REG_R3,
     TCG_REG_R4,
@@ -186,29 +193,16 @@ static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_R13,
     TCG_REG_R14,
     TCG_REG_R15,
+    TCG_REG_R1,
+    TCG_REG_R0,
 };
 
 #if MAX_OPC_PARAM_IARGS != 6
 # error Fix needed, number of supported input arguments changed!
 #endif
 
-static const int tcg_target_call_iarg_regs[] = {
-    TCG_REG_R0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
-    TCG_REG_R4,
-    TCG_REG_R5,
-#if TCG_TARGET_REG_BITS == 32
-    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
-    TCG_REG_R6,
-    TCG_REG_R7,
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-#endif
-};
+/* No call arguments via registers.  All will be stored on the "stack". */
+static const int tcg_target_call_iarg_regs[] = { };
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_R0,
@@ -241,317 +235,281 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    /* tcg_out_reloc always uses the same type, addend. */
-    tcg_debug_assert(type == sizeof(tcg_target_long));
-    tcg_debug_assert(addend == 0);
-    tcg_debug_assert(value != 0);
-    if (TCG_TARGET_REG_BITS == 32) {
-        tcg_patch32(code_ptr, value);
-    } else {
-        tcg_patch64(code_ptr, value);
-    }
-    return true;
-}
-
-/* Write value (native size). */
-static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
-{
-    if (TCG_TARGET_REG_BITS == 32) {
-        tcg_out32(s, v);
-    } else {
-        tcg_out64(s, v);
-    }
-}
-
-/* Write opcode. */
-static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
-{
-    tcg_out8(s, op);
-    tcg_out8(s, 0);
-}
+    intptr_t diff = value - (intptr_t)(code_ptr + 1);
 
-/* Write register. */
-static void tcg_out_r(TCGContext *s, TCGArg t0)
-{
-    tcg_debug_assert(t0 < TCG_TARGET_NB_REGS);
-    tcg_out8(s, t0);
-}
+    tcg_debug_assert(addend == 0);
+    tcg_debug_assert(type == 20);
 
-/* Write label. */
-static void tci_out_label(TCGContext *s, TCGLabel *label)
-{
-    if (label->has_value) {
-        tcg_out_i(s, label->u.value);
-        tcg_debug_assert(label->u.value);
-    } else {
-        tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), label, 0);
-        s->code_ptr += sizeof(tcg_target_ulong);
+    if (diff == sextract32(diff, 0, type)) {
+        tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff));
+        return true;
     }
+    return false;
 }
 
 static void stack_bounds_check(TCGReg base, target_long offset)
 {
     if (base == TCG_REG_CALL_STACK) {
-        tcg_debug_assert(offset < 0);
-        tcg_debug_assert(offset >= -(CPU_TEMP_BUF_NLONGS * sizeof(long)));
+        tcg_debug_assert(offset >= 0);
+        tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE +
+                                   TCG_STATIC_FRAME_SIZE));
     }
 }
 
 static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tci_out_label(s, l0);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_out_reloc(s, s->code_ptr, 20, l0, 0);
+    insn = deposit32(insn, 0, 8, op);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_i(s, (uintptr_t)p0);
+    tcg_insn_unit insn = 0;
+    intptr_t diff;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    /* Special case for exit_tb: map null -> 0. */
+    if (p0 == NULL) {
+        diff = 0;
+    } else {
+        diff = p0 - (void *)(s->code_ptr + 1);
+        tcg_debug_assert(diff != 0);
+        if (diff != sextract32(diff, 0, 20)) {
+            tcg_raise_tb_overflow(s);
+        }
+    }
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 12, 20, diff);
+    tcg_out32(s, insn);
 }
 
-static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
+static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    tcg_out32(s, insn);
+}
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
+{
+    tcg_out32(s, (uint8_t)op);
 }
 
 static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out32(s, i1);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(i1 == sextract32(i1, 0, 20));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 20, i1);
+    tcg_out32(s, insn);
 }
 
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_op_rI(TCGContext *s, TCGOpcode op,
-                          TCGReg r0, uint64_t i1)
+static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out64(s, i1);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_out_reloc(s, s->code_ptr, 20, l1, 0);
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    tcg_out32(s, insn);
 }
-#endif
 
 static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
                            TCGReg r0, TCGReg r1, TCGArg m2)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out32(s, m2);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(m2 == extract32(m2, 0, 12));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 20, 12, m2);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
                            TCGReg r0, TCGReg r1, TCGReg r2)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
                            TCGReg r0, TCGReg r1, intptr_t i2)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_debug_assert(i2 == (int32_t)i2);
-    tcg_out32(s, i2);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(i2 == sextract32(i2, 0, 16));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 16, i2);
+    tcg_out32(s, insn);
 }
 
-static void tcg_out_op_rrcl(TCGContext *s, TCGOpcode op,
-                            TCGReg r0, TCGReg r1, TCGCond c2, TCGLabel *l3)
+static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
+                            TCGReg r1, uint8_t b2, uint8_t b3)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out8(s, c2);
-    tci_out_label(s, l3);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(b2 == extract32(b2, 0, 6));
+    tcg_debug_assert(b3 == extract32(b3, 0, 6));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 6, b2);
+    insn = deposit32(insn, 22, 6, b3);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
                             TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out8(s, c3);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, c3);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
                             TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out32(s, m3);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(m3 == extract32(m3, 0, 12));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 12, m3);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
                              TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out8(s, b3);
-    tcg_out8(s, b4);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_debug_assert(b3 == extract32(b3, 0, 6));
+    tcg_debug_assert(b4 == extract32(b4, 0, 6));
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 6, b3);
+    insn = deposit32(insn, 26, 6, b4);
+    tcg_out32(s, insn);
 }
 
-static void tcg_out_op_rrrrm(TCGContext *s, TCGOpcode op, TCGReg r0,
-                             TCGReg r1, TCGReg r2, TCGReg r3, TCGArg m4)
+static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
+                             TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out_r(s, r3);
-    tcg_out32(s, m4);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, r3);
+    insn = deposit32(insn, 24, 4, r4);
+    tcg_out32(s, insn);
 }
 
-#if TCG_TARGET_REG_BITS == 32
 static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
                             TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out_r(s, r3);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
-}
-
-static void tcg_out_op_rrrrcl(TCGContext *s, TCGOpcode op,
-                              TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3,
-                              TCGCond c4, TCGLabel *l5)
-{
-    uint8_t *old_code_ptr = s->code_ptr;
-
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out_r(s, r3);
-    tcg_out8(s, c4);
-    tci_out_label(s, l5);
+    tcg_insn_unit insn = 0;
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, r3);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
                               TCGReg r0, TCGReg r1, TCGReg r2,
                               TCGReg r3, TCGReg r4, TCGCond c5)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out_r(s, r3);
-    tcg_out_r(s, r4);
-    tcg_out8(s, c5);
-
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, r3);
+    insn = deposit32(insn, 24, 4, r4);
+    insn = deposit32(insn, 28, 4, c5);
+    tcg_out32(s, insn);
 }
 
 static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
                               TCGReg r0, TCGReg r1, TCGReg r2,
                               TCGReg r3, TCGReg r4, TCGReg r5)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    tcg_insn_unit insn = 0;
 
-    tcg_out_op_t(s, op);
-    tcg_out_r(s, r0);
-    tcg_out_r(s, r1);
-    tcg_out_r(s, r2);
-    tcg_out_r(s, r3);
-    tcg_out_r(s, r4);
-    tcg_out_r(s, r5);
+    insn = deposit32(insn, 0, 8, op);
+    insn = deposit32(insn, 8, 4, r0);
+    insn = deposit32(insn, 12, 4, r1);
+    insn = deposit32(insn, 16, 4, r2);
+    insn = deposit32(insn, 20, 4, r3);
+    insn = deposit32(insn, 24, 4, r4);
+    insn = deposit32(insn, 28, 4, r5);
+    tcg_out32(s, insn);
+}
 
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
+                         TCGReg base, intptr_t offset)
+{
+    stack_bounds_check(base, offset);
+    if (offset != sextract32(offset, 0, 16)) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+        tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32
+                           ? INDEX_op_add_i32 : INDEX_op_add_i64),
+                       TCG_REG_TMP, TCG_REG_TMP, base);
+        base = TCG_REG_TMP;
+        offset = 0;
+    }
+    tcg_out_op_rrs(s, op, val, base, offset);
 }
-#endif
 
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
                        intptr_t offset)
 {
-    stack_bounds_check(base, offset);
     switch (type) {
     case TCG_TYPE_I32:
-        tcg_out_op_rrs(s, INDEX_op_ld_i32, val, base, offset);
+        tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset);
         break;
 #if TCG_TARGET_REG_BITS == 64
     case TCG_TYPE_I64:
-        tcg_out_op_rrs(s, INDEX_op_ld_i64, val, base, offset);
+        tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset);
         break;
 #endif
     default:
@@ -581,24 +539,46 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
 {
     switch (type) {
     case TCG_TYPE_I32:
-        tcg_out_op_ri(s, INDEX_op_tci_movi_i32, ret, arg);
-        break;
 #if TCG_TARGET_REG_BITS == 64
+        arg = (int32_t)arg;
+        /* fall through */
     case TCG_TYPE_I64:
-        tcg_out_op_rI(s, INDEX_op_tci_movi_i64, ret, arg);
-        break;
 #endif
+        break;
     default:
         g_assert_not_reached();
     }
+
+    if (arg == sextract32(arg, 0, 20)) {
+        tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg);
+    } else {
+        tcg_insn_unit insn = 0;
+
+        new_pool_label(s, arg, 20, s->code_ptr, 0);
+        insn = deposit32(insn, 0, 8, INDEX_op_tci_movl);
+        insn = deposit32(insn, 8, 4, ret);
+        tcg_out32(s, insn);
+    }
 }
 
-static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
+                         ffi_cif *cif)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-    tcg_out_op_t(s, INDEX_op_call);
-    tcg_out_i(s, (uintptr_t)arg);
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_insn_unit insn = 0;
+    uint8_t which;
+
+    if (cif->rtype == &ffi_type_void) {
+        which = 0;
+    } else if (cif->rtype->size == 4) {
+        which = 1;
+    } else {
+        tcg_debug_assert(cif->rtype->size == 8);
+        which = 2;
+    }
+    new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
+    insn = deposit32(insn, 0, 8, INDEX_op_call);
+    insn = deposit32(insn, 8, 4, which);
+    tcg_out32(s, insn);
 }
 
 #if TCG_TARGET_REG_BITS == 64
@@ -617,6 +597,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
                        const TCGArg args[TCG_MAX_OP_ARGS],
                        const int const_args[TCG_MAX_OP_ARGS])
 {
+    TCGOpcode exts;
+
     switch (opc) {
     case INDEX_op_exit_tb:
         tcg_out_op_p(s, opc, (void *)args[0]);
@@ -629,6 +611,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         set_jmp_reset_offset(s, args[0]);
         break;
 
+    case INDEX_op_goto_ptr:
+        tcg_out_op_r(s, opc, args[0]);
+        break;
+
     case INDEX_op_br:
         tcg_out_op_l(s, opc, arg_label(args[0]));
         break;
@@ -637,12 +623,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
+    CASE_32_64(movcond)
     case INDEX_op_setcond2_i32:
         tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2],
                           args[3], args[4], args[5]);
         break;
-#endif
 
     CASE_32_64(ld8u)
     CASE_32_64(ld8s)
@@ -657,8 +642,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_st_i32:
     CASE_64(st32)
     CASE_64(st)
-        stack_bounds_check(args[1], args[2]);
-        tcg_out_op_rrs(s, opc, args[0], args[1], args[2]);
+        tcg_out_ldst(s, opc, args[0], args[1], args[2]);
         break;
 
     CASE_32_64(add)
@@ -681,6 +665,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     CASE_32_64(divu)     /* Optional (TCG_TARGET_HAS_div_*). */
     CASE_32_64(rem)      /* Optional (TCG_TARGET_HAS_div_*). */
     CASE_32_64(remu)     /* Optional (TCG_TARGET_HAS_div_*). */
+    CASE_32_64(clz)      /* Optional (TCG_TARGET_HAS_clz_*). */
+    CASE_32_64(ctz)      /* Optional (TCG_TARGET_HAS_ctz_*). */
         tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
         break;
 
@@ -696,8 +682,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    CASE_32_64(extract)  /* Optional (TCG_TARGET_HAS_extract_*). */
+    CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
+        {
+            TCGArg pos = args[2], len = args[3];
+            TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
+
+            tcg_debug_assert(pos < max);
+            tcg_debug_assert(pos + len <= max);
+
+            tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
+        }
+        break;
+
     CASE_32_64(brcond)
-        tcg_out_op_rrcl(s, opc, args[0], args[1], args[2], arg_label(args[3]));
+        tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32
+                            ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64),
+                        TCG_REG_TMP, args[0], args[1], args[2]);
+        tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3]));
         break;
 
     CASE_32_64(neg)      /* Optional (TCG_TARGET_HAS_neg_*). */
@@ -710,26 +712,46 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     CASE_64(ext32u)      /* Optional (TCG_TARGET_HAS_ext32u_i64). */
     CASE_64(ext_i32)
     CASE_64(extu_i32)
-    CASE_32_64(bswap16)  /* Optional (TCG_TARGET_HAS_bswap16_*). */
-    CASE_32_64(bswap32)  /* Optional (TCG_TARGET_HAS_bswap32_*). */
-    CASE_64(bswap64)     /* Optional (TCG_TARGET_HAS_bswap64_i64). */
+    CASE_32_64(ctpop)    /* Optional (TCG_TARGET_HAS_ctpop_*). */
+    case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
+    case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
         tcg_out_op_rr(s, opc, args[0], args[1]);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
-    case INDEX_op_add2_i32:
-    case INDEX_op_sub2_i32:
+    case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
+        exts = INDEX_op_ext16s_i32;
+        goto do_bswap;
+    case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
+        exts = INDEX_op_ext16s_i64;
+        goto do_bswap;
+    case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
+        exts = INDEX_op_ext32s_i64;
+    do_bswap:
+        /* The base tci bswaps zero-extend, and ignore high bits. */
+        tcg_out_op_rr(s, opc, args[0], args[1]);
+        if (args[2] & TCG_BSWAP_OS) {
+            tcg_out_op_rr(s, exts, args[0], args[0]);
+        }
+        break;
+
+    CASE_32_64(add2)
+    CASE_32_64(sub2)
         tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
                           args[3], args[4], args[5]);
         break;
+
+#if TCG_TARGET_REG_BITS == 32
     case INDEX_op_brcond2_i32:
-        tcg_out_op_rrrrcl(s, opc, args[0], args[1], args[2],
-                          args[3], args[4], arg_label(args[5]));
+        tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
+                          args[0], args[1], args[2], args[3], args[4]);
+        tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
         break;
-    case INDEX_op_mulu2_i32:
+#endif
+
+    CASE_32_64(mulu2)
+    CASE_32_64(muls2)
         tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
         break;
-#endif
 
     case INDEX_op_qemu_ld_i32:
     case INDEX_op_qemu_st_i32:
@@ -747,8 +769,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
             tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
         } else {
-            tcg_out_op_rrrrm(s, opc, args[0], args[1],
-                             args[2], args[3], args[4]);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+            tcg_out_op_rrrrr(s, opc, args[0], args[1],
+                             args[2], args[3], TCG_REG_TMP);
         }
         break;
 
@@ -789,11 +812,14 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 }
 
 /* Test if a constant matches the constraint. */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+    return ct & TCG_CT_CONST;
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
 {
-    /* No need to return 0 or 1, 0 or != 0 is good enough. */
-    return arg_ct->ct & TCG_CT_CONST;
+    memset(p, 0, sizeof(*p) * count);
 }
 
 static void tcg_target_init(TCGContext *s)
@@ -812,17 +838,22 @@ static void tcg_target_init(TCGContext *s)
     tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
     /* Registers available for 64 bit operations. */
     tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
-    /* TODO: Which registers should be set here? */
-    tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1;
+    /*
+     * The interpreter "registers" are in the local stack frame and
+     * cannot be clobbered by the called helper functions.  However,
+     * the interpreter assumes a 64-bit return value and assigns to
+     * the return value registers.
+     */
+    tcg_target_call_clobber_regs =
+        MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
 
     s->reserved_regs = 0;
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
 
-    /* We use negative offsets from "sp" so that we can distinguish
-       stores that might pretend to be call arguments.  */
-    tcg_set_frame(s, TCG_REG_CALL_STACK,
-                  -CPU_TEMP_BUF_NLONGS * sizeof(long),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+    /* The call arguments come first, followed by the temp storage. */
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  TCG_STATIC_FRAME_SIZE);
 }
 
 /* Generate global QEMU prologue and epilogue code. */
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 52af6d8bc51..033e613f241 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -41,8 +41,9 @@
 #define TCG_TARGET_H
 
 #define TCG_TARGET_INTERPRETER 1
-#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
@@ -67,26 +68,25 @@
 #define TCG_TARGET_HAS_ext16s_i32       1
 #define TCG_TARGET_HAS_ext8u_i32        1
 #define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_andc_i32         1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_extract_i32      0
-#define TCG_TARGET_HAS_sextract_i32     0
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
 #define TCG_TARGET_HAS_extract2_i32     0
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_clz_i32          0
-#define TCG_TARGET_HAS_ctz_i32          0
-#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_nand_i32         1
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_clz_i32          1
+#define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        1
 #define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_muls2_i32        1
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_goto_ptr         0
 #define TCG_TARGET_HAS_direct_jump      0
 #define TCG_TARGET_HAS_qemu_st8_i32     0
 
@@ -97,8 +97,8 @@
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1
 #define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_extract_i64      0
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_extract_i64      1
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     0
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
@@ -108,25 +108,25 @@
 #define TCG_TARGET_HAS_ext8u_i64        1
 #define TCG_TARGET_HAS_ext16u_i64       1
 #define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_andc_i64         0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_clz_i64          0
-#define TCG_TARGET_HAS_ctz_i64          0
-#define TCG_TARGET_HAS_ctpop_i64        0
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i64         1
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_clz_i64          1
+#define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        1
 #define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_movcond_i64      0
-#define TCG_TARGET_HAS_muls2_i64        0
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_muls2_i64        1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
+#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 #else
@@ -155,15 +155,17 @@ typedef enum {
     TCG_REG_R14,
     TCG_REG_R15,
 
+    TCG_REG_TMP = TCG_REG_R13,
     TCG_AREG0 = TCG_REG_R14,
     TCG_REG_CALL_STACK = TCG_REG_R15,
 } TCGReg;
 
 /* Used for function call generation. */
 #define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_STACK_ALIGN          8
 
 #define HAVE_TCG_QEMU_TB_EXEC
+#define TCG_TARGET_NEED_POOL_LABELS
 
 /* We could notice __i386__ or __s390x__ and reduce the barriers depending
    on the host.  But if you want performance, you use the normal backend.
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 8f220e15d13..4c564cf7899 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -16,7 +16,7 @@ ifneq ($(filter $(all-check-targets), check-softfloat),)
 	@echo " $(MAKE) check-tcg            Run TCG tests"
 	@echo " $(MAKE) check-softfloat      Run FPU emulation tests"
 endif
-	@echo " $(MAKE) check-acceptance     Run all acceptance (functional) tests"
+	@echo " $(MAKE) check-avocado        Run avocado (integration) tests for currently configured targets"
 	@echo
 	@echo " $(MAKE) check-report.tap     Generates an aggregated TAP test report"
 	@echo " $(MAKE) check-venv           Creates a Python venv for tests"
@@ -24,7 +24,7 @@ endif
 	@echo
 	@echo "The following are useful for CI builds"
 	@echo " $(MAKE) check-build          Build most test binaris"
-	@echo " $(MAKE) get-vm-images        Downloads all images used by acceptance tests, according to configured targets (~350 MB each, 1.5 GB max)"
+	@echo " $(MAKE) get-vm-images        Downloads all images used by avocado tests, according to configured targets (~350 MB each, 1.5 GB max)"
 	@echo
 	@echo
 	@echo "The variable SPEED can be set to control the gtester speed setting."
@@ -36,7 +36,7 @@ export SRC_PATH
 
 # Get the list of all supported sysemu targets
 SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \
-   $(wildcard $(SRC_PATH)/default-configs/*-softmmu.mak)))
+   $(wildcard $(SRC_PATH)/configs/*-softmmu.mak)))
 
 SPEED = quick
 
@@ -83,20 +83,28 @@ clean-tcg: $(CLEAN_TCG_TARGET_RULES)
 
 # Python venv for running tests
 
-.PHONY: check-venv check-acceptance
+.PHONY: check-venv check-avocado check-acceptance check-acceptance-deprecated-warning
 
 TESTS_VENV_DIR=$(BUILD_DIR)/tests/venv
 TESTS_VENV_REQ=$(SRC_PATH)/tests/requirements.txt
 TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results
+ifndef AVOCADO_TESTS
+	AVOCADO_TESTS=tests/avocado
+endif
 # Controls the output generated by Avocado when running tests.
 # Any number of command separated loggers are accepted.  For more
 # information please refer to "avocado --help".
 AVOCADO_SHOW=app
-AVOCADO_TAGS=$(patsubst %-softmmu,-t arch:%, $(filter %-softmmu,$(TARGETS)))
+ifndef AVOCADO_TAGS
+	AVOCADO_CMDLINE_TAGS=$(patsubst %-softmmu,-t arch:%, \
+						 $(filter %-softmmu,$(TARGETS)))
+else
+	AVOCADO_CMDLINE_TAGS=$(addprefix -t , $(AVOCADO_TAGS))
+endif
 
 $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
 	$(call quiet-command, \
-            $(PYTHON) -m venv --system-site-packages $@, \
+            $(PYTHON) -m venv $@, \
             VENV, $@)
 	$(call quiet-command, \
             $(TESTS_VENV_DIR)/bin/python -m pip -q install -r $(TESTS_VENV_REQ), \
@@ -119,19 +127,27 @@ get-vm-image-fedora-31-%: check-venv
 	$(call quiet-command, \
              $(TESTS_VENV_DIR)/bin/python -m avocado vmimage get \
              --distro=fedora --distro-version=31 --arch=$*, \
-	"AVOCADO", "Downloading acceptance tests VM image for $*")
+	"AVOCADO", "Downloading avocado tests VM image for $*")
 
 # download all vm images, according to defined targets
 get-vm-images: check-venv $(patsubst %,get-vm-image-fedora-31-%, $(FEDORA_31_DOWNLOAD))
 
-check-acceptance: check-venv $(TESTS_RESULTS_DIR) get-vm-images
+check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
 	$(call quiet-command, \
             $(TESTS_VENV_DIR)/bin/python -m avocado \
             --show=$(AVOCADO_SHOW) run --job-results-dir=$(TESTS_RESULTS_DIR) \
-            --filter-by-tags-include-empty --filter-by-tags-include-empty-key \
-            $(AVOCADO_TAGS) \
-            $(if $(GITLAB_CI),,--failfast) tests/acceptance, \
-            "AVOCADO", "tests/acceptance")
+            $(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \
+			--filter-by-tags-include-empty-key) \
+            $(AVOCADO_CMDLINE_TAGS) \
+            $(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), \
+            "AVOCADO", "tests/avocado")
+
+check-acceptance-deprecated-warning:
+	@echo
+	@echo "Note '$(MAKE) check-acceptance' is deprecated, use '$(MAKE) check-avocado' instead."
+	@echo
+
+check-acceptance: check-acceptance-deprecated-warning | check-avocado
 
 # Consolidated targets
 
@@ -139,16 +155,23 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) get-vm-images
 check:
 
 ifeq ($(CONFIG_TOOLS)$(CONFIG_POSIX),yy)
-QEMU_IOTESTS_HELPERS-$(CONFIG_LINUX) = tests/qemu-iotests/socket_scm_helper$(EXESUF)
 check: check-block
 export PYTHON
-check-block: $(SRC_PATH)/tests/check-block.sh qemu-img$(EXESUF) \
-		qemu-io$(EXESUF) qemu-nbd$(EXESUF) $(QEMU_IOTESTS_HELPERS-y) \
-		$(filter qemu-system-%, $(ninja-targets))
+
+ifneq ($(filter check check-block check-build, $(MAKECMDGOALS)),)
+ninja-cmd-goals += \
+	qemu-img$(EXESUF) \
+	qemu-io$(EXESUF) \
+	qemu-nbd$(EXESUF) \
+	storage-daemon/qemu-storage-daemon$(EXESUF) \
+	$(filter qemu-system-%, $(ninja-targets))
+endif
+
+check-block: $(SRC_PATH)/tests/check-block.sh run-ninja
 	@$<
 endif
 
-check-build: $(QEMU_IOTESTS_HELPERS-y)
+check-build: run-ninja
 
 check-clean:
 	rm -rf $(TESTS_VENV_DIR) $(TESTS_RESULTS_DIR)
diff --git a/tests/acceptance/README.rst b/tests/acceptance/README.rst
deleted file mode 100644
index 89260faed6b..00000000000
--- a/tests/acceptance/README.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-============================================
-Acceptance tests using the Avocado Framework
-============================================
-
-This directory contains functional tests, also known as acceptance
-level tests.  They're usually higher level, and may interact with
-external resources and with various guest operating systems.
-
-For more information, please refer to ``docs/devel/testing.rst``,
-section "Acceptance tests using the Avocado Framework".
diff --git a/tests/acceptance/avocado_qemu/__init__.py b/tests/acceptance/avocado_qemu/__init__.py
deleted file mode 100644
index 83b1741ec85..00000000000
--- a/tests/acceptance/avocado_qemu/__init__.py
+++ /dev/null
@@ -1,350 +0,0 @@
-# Test class and utilities for functional tests
-#
-# Copyright (c) 2018 Red Hat, Inc.
-#
-# Author:
-#  Cleber Rosa <crosa@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# later.  See the COPYING file in the top-level directory.
-
-import logging
-import os
-import shutil
-import sys
-import uuid
-import tempfile
-
-import avocado
-
-from avocado.utils import cloudinit
-from avocado.utils import datadrainer
-from avocado.utils import network
-from avocado.utils import vmimage
-from avocado.utils.path import find_command
-
-
-#: The QEMU build root directory.  It may also be the source directory
-#: if building from the source dir, but it's safer to use BUILD_DIR for
-#: that purpose.  Be aware that if this code is moved outside of a source
-#: and build tree, it will not be accurate.
-BUILD_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
-
-if os.path.islink(os.path.dirname(os.path.dirname(__file__))):
-    # The link to the acceptance tests dir in the source code directory
-    lnk = os.path.dirname(os.path.dirname(__file__))
-    #: The QEMU root source directory
-    SOURCE_DIR = os.path.dirname(os.path.dirname(os.readlink(lnk)))
-else:
-    SOURCE_DIR = BUILD_DIR
-
-sys.path.append(os.path.join(SOURCE_DIR, 'python'))
-
-from qemu.accel import kvm_available
-from qemu.accel import tcg_available
-from qemu.machine import QEMUMachine
-
-def is_readable_executable_file(path):
-    return os.path.isfile(path) and os.access(path, os.R_OK | os.X_OK)
-
-
-def pick_default_qemu_bin(arch=None):
-    """
-    Picks the path of a QEMU binary, starting either in the current working
-    directory or in the source tree root directory.
-
-    :param arch: the arch to use when looking for a QEMU binary (the target
-                 will match the arch given).  If None (the default), arch
-                 will be the current host system arch (as given by
-                 :func:`os.uname`).
-    :type arch: str
-    :returns: the path to the default QEMU binary or None if one could not
-              be found
-    :rtype: str or None
-    """
-    if arch is None:
-        arch = os.uname()[4]
-    # qemu binary path does not match arch for powerpc, handle it
-    if 'ppc64le' in arch:
-        arch = 'ppc64'
-    qemu_bin_relative_path = "./qemu-system-%s" % arch
-    if is_readable_executable_file(qemu_bin_relative_path):
-        return qemu_bin_relative_path
-
-    qemu_bin_from_bld_dir_path = os.path.join(BUILD_DIR,
-                                              qemu_bin_relative_path)
-    if is_readable_executable_file(qemu_bin_from_bld_dir_path):
-        return qemu_bin_from_bld_dir_path
-
-
-def _console_interaction(test, success_message, failure_message,
-                         send_string, keep_sending=False, vm=None):
-    assert not keep_sending or send_string
-    if vm is None:
-        vm = test.vm
-    console = vm.console_socket.makefile()
-    console_logger = logging.getLogger('console')
-    while True:
-        if send_string:
-            vm.console_socket.sendall(send_string.encode())
-            if not keep_sending:
-                send_string = None # send only once
-        msg = console.readline().strip()
-        if not msg:
-            continue
-        console_logger.debug(msg)
-        if success_message is None or success_message in msg:
-            break
-        if failure_message and failure_message in msg:
-            console.close()
-            fail = 'Failure message found in console: "%s". Expected: "%s"' % \
-                    (failure_message, success_message)
-            test.fail(fail)
-
-def interrupt_interactive_console_until_pattern(test, success_message,
-                                                failure_message=None,
-                                                interrupt_string='\r'):
-    """
-    Keep sending a string to interrupt a console prompt, while logging the
-    console output. Typical use case is to break a boot loader prompt, such:
-
-        Press a key within 5 seconds to interrupt boot process.
-        5
-        4
-        3
-        2
-        1
-        Booting default image...
-
-    :param test: an Avocado test containing a VM that will have its console
-                 read and probed for a success or failure message
-    :type test: :class:`avocado_qemu.Test`
-    :param success_message: if this message appears, test succeeds
-    :param failure_message: if this message appears, test fails
-    :param interrupt_string: a string to send to the console before trying
-                             to read a new line
-    """
-    _console_interaction(test, success_message, failure_message,
-                         interrupt_string, True)
-
-def wait_for_console_pattern(test, success_message, failure_message=None,
-                             vm=None):
-    """
-    Waits for messages to appear on the console, while logging the content
-
-    :param test: an Avocado test containing a VM that will have its console
-                 read and probed for a success or failure message
-    :type test: :class:`avocado_qemu.Test`
-    :param success_message: if this message appears, test succeeds
-    :param failure_message: if this message appears, test fails
-    """
-    _console_interaction(test, success_message, failure_message, None, vm=vm)
-
-def exec_command(test, command):
-    """
-    Send a command to a console (appending CRLF characters), while logging
-    the content.
-
-    :param test: an Avocado test containing a VM.
-    :type test: :class:`avocado_qemu.Test`
-    :param command: the command to send
-    :type command: str
-    """
-    _console_interaction(test, None, None, command + '\r')
-
-def exec_command_and_wait_for_pattern(test, command,
-                                      success_message, failure_message=None):
-    """
-    Send a command to a console (appending CRLF characters), then wait
-    for success_message to appear on the console, while logging the.
-    content. Mark the test as failed if failure_message is found instead.
-
-    :param test: an Avocado test containing a VM that will have its console
-                 read and probed for a success or failure message
-    :type test: :class:`avocado_qemu.Test`
-    :param command: the command to send
-    :param success_message: if this message appears, test succeeds
-    :param failure_message: if this message appears, test fails
-    """
-    _console_interaction(test, success_message, failure_message, command + '\r')
-
-class Test(avocado.Test):
-    def _get_unique_tag_val(self, tag_name):
-        """
-        Gets a tag value, if unique for a key
-        """
-        vals = self.tags.get(tag_name, [])
-        if len(vals) == 1:
-            return vals.pop()
-        return None
-
-    def require_accelerator(self, accelerator):
-        """
-        Requires an accelerator to be available for the test to continue
-
-        It takes into account the currently set qemu binary.
-
-        If the check fails, the test is canceled.  If the check itself
-        for the given accelerator is not available, the test is also
-        canceled.
-
-        :param accelerator: name of the accelerator, such as "kvm" or "tcg"
-        :type accelerator: str
-        """
-        checker = {'tcg': tcg_available,
-                   'kvm': kvm_available}.get(accelerator)
-        if checker is None:
-            self.cancel("Don't know how to check for the presence "
-                        "of accelerator %s" % accelerator)
-        if not checker(qemu_bin=self.qemu_bin):
-            self.cancel("%s accelerator does not seem to be "
-                        "available" % accelerator)
-
-    def setUp(self):
-        self._vms = {}
-
-        self.arch = self.params.get('arch',
-                                    default=self._get_unique_tag_val('arch'))
-
-        self.machine = self.params.get('machine',
-                                       default=self._get_unique_tag_val('machine'))
-
-        default_qemu_bin = pick_default_qemu_bin(arch=self.arch)
-        self.qemu_bin = self.params.get('qemu_bin',
-                                        default=default_qemu_bin)
-        if self.qemu_bin is None:
-            self.cancel("No QEMU binary defined or found in the build tree")
-
-    def _new_vm(self, *args):
-        self._sd = tempfile.TemporaryDirectory(prefix="avo_qemu_sock_")
-        vm = QEMUMachine(self.qemu_bin, sock_dir=self._sd.name)
-        if args:
-            vm.add_args(*args)
-        return vm
-
-    @property
-    def vm(self):
-        return self.get_vm(name='default')
-
-    def get_vm(self, *args, name=None):
-        if not name:
-            name = str(uuid.uuid4())
-        if self._vms.get(name) is None:
-            self._vms[name] = self._new_vm(*args)
-            if self.machine is not None:
-                self._vms[name].set_machine(self.machine)
-        return self._vms[name]
-
-    def tearDown(self):
-        for vm in self._vms.values():
-            vm.shutdown()
-        self._sd = None
-
-    def fetch_asset(self, name,
-                    asset_hash=None, algorithm=None,
-                    locations=None, expire=None,
-                    find_only=False, cancel_on_missing=True):
-        return super(Test, self).fetch_asset(name,
-                        asset_hash=asset_hash,
-                        algorithm=algorithm,
-                        locations=locations,
-                        expire=expire,
-                        find_only=find_only,
-                        cancel_on_missing=cancel_on_missing)
-
-
-class LinuxTest(Test):
-    """Facilitates having a cloud-image Linux based available.
-
-    For tests that indend to interact with guests, this is a better choice
-    to start with than the more vanilla `Test` class.
-    """
-
-    timeout = 900
-    chksum = None
-
-    def setUp(self, ssh_pubkey=None):
-        super(LinuxTest, self).setUp()
-        self.vm.add_args('-smp', '2')
-        self.vm.add_args('-m', '1024')
-        self.set_up_boot()
-        if ssh_pubkey is None:
-            ssh_pubkey, self.ssh_key = self.set_up_existing_ssh_keys()
-        self.set_up_cloudinit(ssh_pubkey)
-
-    def set_up_existing_ssh_keys(self):
-        ssh_public_key = os.path.join(SOURCE_DIR, 'tests', 'keys', 'id_rsa.pub')
-        source_private_key = os.path.join(SOURCE_DIR, 'tests', 'keys', 'id_rsa')
-        ssh_dir = os.path.join(self.workdir, '.ssh')
-        os.mkdir(ssh_dir, mode=0o700)
-        ssh_private_key = os.path.join(ssh_dir,
-                                       os.path.basename(source_private_key))
-        shutil.copyfile(source_private_key, ssh_private_key)
-        os.chmod(ssh_private_key, 0o600)
-        return (ssh_public_key, ssh_private_key)
-
-    def download_boot(self):
-        self.log.debug('Looking for and selecting a qemu-img binary to be '
-                       'used to create the bootable snapshot image')
-        # If qemu-img has been built, use it, otherwise the system wide one
-        # will be used.  If none is available, the test will cancel.
-        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
-        if not os.path.exists(qemu_img):
-            qemu_img = find_command('qemu-img', False)
-        if qemu_img is False:
-            self.cancel('Could not find "qemu-img", which is required to '
-                        'create the bootable image')
-        vmimage.QEMU_IMG = qemu_img
-
-        self.log.info('Downloading/preparing boot image')
-        # Fedora 31 only provides ppc64le images
-        image_arch = self.arch
-        if image_arch == 'ppc64':
-            image_arch = 'ppc64le'
-        try:
-            boot = vmimage.get(
-                'fedora', arch=image_arch, version='31',
-                checksum=self.chksum,
-                algorithm='sha256',
-                cache_dir=self.cache_dirs[0],
-                snapshot_dir=self.workdir)
-        except:
-            self.cancel('Failed to download/prepare boot image')
-        return boot.path
-
-    def prepare_cloudinit(self, ssh_pubkey=None):
-        self.log.info('Preparing cloudinit image')
-        try:
-            cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso')
-            self.phone_home_port = network.find_free_port()
-            pubkey_content = None
-            if ssh_pubkey:
-                with open(ssh_pubkey) as pubkey:
-                    pubkey_content = pubkey.read()
-            cloudinit.iso(cloudinit_iso, self.name,
-                          username='root',
-                          password='password',
-                          # QEMU's hard coded usermode router address
-                          phone_home_host='10.0.2.2',
-                          phone_home_port=self.phone_home_port,
-                          authorized_key=pubkey_content)
-        except Exception:
-            self.cancel('Failed to prepare the cloudinit image')
-        return cloudinit_iso
-
-    def set_up_boot(self):
-        path = self.download_boot()
-        self.vm.add_args('-drive', 'file=%s' % path)
-
-    def set_up_cloudinit(self, ssh_pubkey=None):
-        cloudinit_iso = self.prepare_cloudinit(ssh_pubkey)
-        self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso)
-
-    def launch_and_wait(self):
-        self.vm.set_console()
-        self.vm.launch()
-        console_drainer = datadrainer.LineLogger(self.vm.console_socket.fileno(),
-                                                 logger=self.log.getChild('console'))
-        console_drainer.start()
-        self.log.info('VM launched, waiting for boot confirmation from guest')
-        cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port), self.name)
diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py
deleted file mode 100644
index 0d178038a09..00000000000
--- a/tests/acceptance/boot_linux.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Functional test that boots a complete Linux system via a cloud image
-#
-# Copyright (c) 2018-2020 Red Hat, Inc.
-#
-# Author:
-#  Cleber Rosa <crosa@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# later.  See the COPYING file in the top-level directory.
-
-import os
-
-from avocado_qemu import LinuxTest, BUILD_DIR
-
-from avocado import skipIf
-
-
-class BootLinuxX8664(LinuxTest):
-    """
-    :avocado: tags=arch:x86_64
-    """
-
-    chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
-
-    def test_pc_i440fx_tcg(self):
-        """
-        :avocado: tags=machine:pc
-        :avocado: tags=accel:tcg
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.launch_and_wait()
-
-    def test_pc_i440fx_kvm(self):
-        """
-        :avocado: tags=machine:pc
-        :avocado: tags=accel:kvm
-        """
-        self.require_accelerator("kvm")
-        self.vm.add_args("-accel", "kvm")
-        self.launch_and_wait()
-
-    def test_pc_q35_tcg(self):
-        """
-        :avocado: tags=machine:q35
-        :avocado: tags=accel:tcg
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.launch_and_wait()
-
-    def test_pc_q35_kvm(self):
-        """
-        :avocado: tags=machine:q35
-        :avocado: tags=accel:kvm
-        """
-        self.require_accelerator("kvm")
-        self.vm.add_args("-accel", "kvm")
-        self.launch_and_wait()
-
-
-class BootLinuxAarch64(LinuxTest):
-    """
-    :avocado: tags=arch:aarch64
-    :avocado: tags=machine:virt
-    :avocado: tags=machine:gic-version=2
-    """
-
-    chksum = '1e18d9c0cf734940c4b5d5ec592facaed2af0ad0329383d5639c997fdf16fe49'
-
-    def add_common_args(self):
-        self.vm.add_args('-bios',
-                         os.path.join(BUILD_DIR, 'pc-bios',
-                                      'edk2-aarch64-code.fd'))
-        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
-        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
-
-    def test_virt_tcg(self):
-        """
-        :avocado: tags=accel:tcg
-        :avocado: tags=cpu:max
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.vm.add_args("-cpu", "max")
-        self.vm.add_args("-machine", "virt,gic-version=2")
-        self.add_common_args()
-        self.launch_and_wait()
-
-    def test_virt_kvm_gicv2(self):
-        """
-        :avocado: tags=accel:kvm
-        :avocado: tags=cpu:host
-        :avocado: tags=device:gicv2
-        """
-        self.require_accelerator("kvm")
-        self.vm.add_args("-accel", "kvm")
-        self.vm.add_args("-cpu", "host")
-        self.vm.add_args("-machine", "virt,gic-version=2")
-        self.add_common_args()
-        self.launch_and_wait()
-
-    def test_virt_kvm_gicv3(self):
-        """
-        :avocado: tags=accel:kvm
-        :avocado: tags=cpu:host
-        :avocado: tags=device:gicv3
-        """
-        self.require_accelerator("kvm")
-        self.vm.add_args("-accel", "kvm")
-        self.vm.add_args("-cpu", "host")
-        self.vm.add_args("-machine", "virt,gic-version=3")
-        self.add_common_args()
-        self.launch_and_wait()
-
-
-class BootLinuxPPC64(LinuxTest):
-    """
-    :avocado: tags=arch:ppc64
-    """
-
-    chksum = '7c3528b85a3df4b2306e892199a9e1e43f991c506f2cc390dc4efa2026ad2f58'
-
-    def test_pseries_tcg(self):
-        """
-        :avocado: tags=machine:pseries
-        :avocado: tags=accel:tcg
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.launch_and_wait()
-
-
-class BootLinuxS390X(LinuxTest):
-    """
-    :avocado: tags=arch:s390x
-    """
-
-    chksum = '4caaab5a434fd4d1079149a072fdc7891e354f834d355069ca982fdcaf5a122d'
-
-    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
-    def test_s390_ccw_virtio_tcg(self):
-        """
-        :avocado: tags=machine:s390-ccw-virtio
-        :avocado: tags=accel:tcg
-        """
-        self.require_accelerator("tcg")
-        self.vm.add_args("-accel", "tcg")
-        self.launch_and_wait()
diff --git a/tests/acceptance/machine_ppc.py b/tests/acceptance/machine_ppc.py
deleted file mode 100644
index a836e2496f1..00000000000
--- a/tests/acceptance/machine_ppc.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Test that Linux kernel boots on ppc machines and check the console
-#
-# Copyright (c) 2018, 2020 Red Hat, Inc.
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# later.  See the COPYING file in the top-level directory.
-
-from avocado.utils import archive
-from avocado_qemu import Test
-from avocado_qemu import wait_for_console_pattern
-
-class PpcMachine(Test):
-
-    timeout = 90
-    KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
-    panic_message = 'Kernel panic - not syncing'
-
-    def test_ppc64_pseries(self):
-        """
-        :avocado: tags=arch:ppc64
-        :avocado: tags=machine:pseries
-        """
-        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
-                      '/fedora-secondary/releases/29/Everything/ppc64le/os'
-                      '/ppc/ppc64/vmlinuz')
-        kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77'
-        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
-
-        self.vm.set_console()
-        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
-        self.vm.add_args('-kernel', kernel_path,
-                         '-append', kernel_command_line)
-        self.vm.launch()
-        console_pattern = 'Kernel command line: %s' % kernel_command_line
-        wait_for_console_pattern(self, console_pattern, self.panic_message)
-
-    def test_ppc_mpc8544ds(self):
-        """
-        :avocado: tags=arch:ppc
-        :avocado: tags=machine:mpc8544ds
-        """
-        tar_url = ('https://www.qemu-advent-calendar.org'
-                   '/2020/download/day17.tar.gz')
-        tar_hash = '7a5239542a7c4257aa4d3b7f6ddf08fb6775c494'
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        archive.extract(file_path, self.workdir)
-        self.vm.set_console()
-        self.vm.add_args('-kernel', self.workdir + '/creek/creek.bin')
-        self.vm.launch()
-        wait_for_console_pattern(self, 'QEMU advent calendar 2020',
-                                 self.panic_message)
-
-    def test_ppc_virtex_ml507(self):
-        """
-        :avocado: tags=arch:ppc
-        :avocado: tags=machine:virtex-ml507
-        """
-        tar_url = ('https://www.qemu-advent-calendar.org'
-                   '/2020/download/hippo.tar.gz')
-        tar_hash = '306b95bfe7d147f125aa176a877e266db8ef914a'
-        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        archive.extract(file_path, self.workdir)
-        self.vm.set_console()
-        self.vm.add_args('-kernel', self.workdir + '/hippo/hippo.linux',
-                         '-dtb', self.workdir + '/hippo/virtex440-ml507.dtb',
-                         '-m', '512')
-        self.vm.launch()
-        wait_for_console_pattern(self, 'QEMU advent calendar 2020',
-                                 self.panic_message)
diff --git a/tests/acceptance/virtiofs_submounts.py b/tests/acceptance/virtiofs_submounts.py
deleted file mode 100644
index 46fa65392a1..00000000000
--- a/tests/acceptance/virtiofs_submounts.py
+++ /dev/null
@@ -1,327 +0,0 @@
-import logging
-import re
-import os
-import subprocess
-import time
-
-from avocado import skipUnless
-from avocado_qemu import LinuxTest, BUILD_DIR
-from avocado_qemu import wait_for_console_pattern
-from avocado.utils import ssh
-
-
-def run_cmd(args):
-    subp = subprocess.Popen(args,
-                            stdout=subprocess.PIPE,
-                            stderr=subprocess.PIPE,
-                            universal_newlines=True)
-    stdout, stderr = subp.communicate()
-    ret = subp.returncode
-
-    return (stdout, stderr, ret)
-
-def has_cmd(name, args=None):
-    """
-    This function is for use in a @avocado.skipUnless decorator, e.g.:
-
-        @skipUnless(*has_cmd('sudo -n', ('sudo', '-n', 'true')))
-        def test_something_that_needs_sudo(self):
-            ...
-    """
-
-    if args is None:
-        args = ('which', name)
-
-    try:
-        _, stderr, exitcode = run_cmd(args)
-    except Exception as e:
-        exitcode = -1
-        stderr = str(e)
-
-    if exitcode != 0:
-        cmd_line = ' '.join(args)
-        err = f'{name} required, but "{cmd_line}" failed: {stderr.strip()}'
-        return (False, err)
-    else:
-        return (True, '')
-
-def has_cmds(*cmds):
-    """
-    This function is for use in a @avocado.skipUnless decorator and
-    allows checking for the availability of multiple commands, e.g.:
-
-        @skipUnless(*has_cmds(('cmd1', ('cmd1', '--some-parameter')),
-                              'cmd2', 'cmd3'))
-        def test_something_that_needs_cmd1_and_cmd2(self):
-            ...
-    """
-
-    for cmd in cmds:
-        if isinstance(cmd, str):
-            cmd = (cmd,)
-
-        ok, errstr = has_cmd(*cmd)
-        if not ok:
-            return (False, errstr)
-
-    return (True, '')
-
-
-class VirtiofsSubmountsTest(LinuxTest):
-    """
-    :avocado: tags=arch:x86_64
-    """
-
-    def get_portfwd(self):
-        port = None
-
-        res = self.vm.command('human-monitor-command',
-                              command_line='info usernet')
-        for line in res.split('\r\n'):
-            match = \
-                re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s+(\d+)\s+10\.',
-                          line)
-            if match is not None:
-                port = int(match[1])
-                break
-
-        self.assertIsNotNone(port)
-        self.assertGreater(port, 0)
-        self.log.debug('sshd listening on port: %d', port)
-        return port
-
-    def ssh_connect(self, username, keyfile):
-        self.ssh_logger = logging.getLogger('ssh')
-        port = self.get_portfwd()
-        self.ssh_session = ssh.Session('127.0.0.1', port=port,
-                                       user=username, key=keyfile)
-        for i in range(10):
-            try:
-                self.ssh_session.connect()
-                return
-            except:
-                time.sleep(4)
-                pass
-        self.fail('ssh connection timeout')
-
-    def ssh_command(self, command):
-        self.ssh_logger.info(command)
-        result = self.ssh_session.cmd(command)
-        stdout_lines = [line.rstrip() for line
-                        in result.stdout_text.splitlines()]
-        for line in stdout_lines:
-            self.ssh_logger.info(line)
-        stderr_lines = [line.rstrip() for line
-                        in result.stderr_text.splitlines()]
-        for line in stderr_lines:
-            self.ssh_logger.warning(line)
-
-        self.assertEqual(result.exit_status, 0,
-                         f'Guest command failed: {command}')
-        return stdout_lines, stderr_lines
-
-    def run(self, args, ignore_error=False):
-        stdout, stderr, ret = run_cmd(args)
-
-        if ret != 0:
-            cmdline = ' '.join(args)
-            if not ignore_error:
-                self.fail(f'{cmdline}: Returned {ret}: {stderr}')
-            else:
-                self.log.warn(f'{cmdline}: Returned {ret}: {stderr}')
-
-        return (stdout, stderr, ret)
-
-    def set_up_shared_dir(self):
-        self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared')
-
-        os.mkdir(self.shared_dir)
-
-        self.run(('cp', self.get_data('guest.sh'),
-                 os.path.join(self.shared_dir, 'check.sh')))
-
-        self.run(('cp', self.get_data('guest-cleanup.sh'),
-                 os.path.join(self.shared_dir, 'cleanup.sh')))
-
-    def set_up_virtiofs(self):
-        attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR')
-        self.vfsdsock = os.path.join(attmp, 'vfsdsock')
-
-        self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True)
-
-        self.virtiofsd = \
-            subprocess.Popen(('sudo', '-n',
-                              'tools/virtiofsd/virtiofsd',
-                              f'--socket-path={self.vfsdsock}',
-                              '-o', f'source={self.shared_dir}',
-                              '-o', 'cache=always',
-                              '-o', 'xattr',
-                              '-o', 'announce_submounts',
-                              '-f'),
-                             stdout=subprocess.DEVNULL,
-                             stderr=subprocess.PIPE,
-                             universal_newlines=True)
-
-        while not os.path.exists(self.vfsdsock):
-            if self.virtiofsd.poll() is not None:
-                self.fail('virtiofsd exited prematurely: ' +
-                          self.virtiofsd.communicate()[1])
-            time.sleep(0.1)
-
-        self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock))
-
-        self.vm.add_args('-chardev',
-                         f'socket,id=vfsdsock,path={self.vfsdsock}',
-                         '-device',
-                         'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \
-                             ',tag=host',
-                         '-object',
-                         'memory-backend-file,id=mem,size=1G,' \
-                             'mem-path=/dev/shm,share=on',
-                         '-numa',
-                         'node,memdev=mem')
-
-    def launch_vm(self):
-        self.launch_and_wait()
-        self.ssh_connect('root', self.ssh_key)
-
-    def set_up_nested_mounts(self):
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        try:
-            os.mkdir(scratch_dir)
-        except FileExistsError:
-            pass
-
-        args = ['bash', self.get_data('host.sh'), scratch_dir]
-        if self.seed:
-            args += [self.seed]
-
-        out, _, _ = self.run(args)
-        seed = re.search(r'^Seed: \d+', out)
-        self.log.info(seed[0])
-
-    def mount_in_guest(self):
-        self.ssh_command('mkdir -p /mnt/host')
-        self.ssh_command('mount -t virtiofs host /mnt/host')
-
-    def check_in_guest(self):
-        self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share')
-
-    def live_cleanup(self):
-        self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch')
-
-        # It would be nice if the above was sufficient to make virtiofsd clear
-        # all references to the mounted directories (so they can be unmounted
-        # on the host), but unfortunately it is not.  To do so, we have to
-        # resort to a remount.
-        self.ssh_command('mount -o remount /mnt/host')
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir))
-
-    @skipUnless(*has_cmds(('sudo -n', ('sudo', '-n', 'true')),
-                          'ssh-keygen', 'bash', 'losetup', 'mkfs.xfs', 'mount'))
-    def setUp(self):
-        vmlinuz = self.params.get('vmlinuz')
-        if vmlinuz is None:
-            """
-            The Linux kernel supports FUSE auto-submounts only as of 5.10.
-            boot_linux.py currently provides Fedora 31, whose kernel is too
-            old, so this test cannot pass with the on-image kernel (you are
-            welcome to try, hence the option to force such a test with
-            -p vmlinuz='').  Therefore, for now the user must provide a
-            sufficiently new custom kernel, or effectively explicitly
-            request failure with -p vmlinuz=''.
-            Once an image with a sufficiently new kernel is available
-            (probably Fedora 34), we can make -p vmlinuz='' the default, so
-            that this parameter no longer needs to be specified.
-            """
-            self.cancel('vmlinuz parameter not set; you must point it to a '
-                        'Linux kernel binary to test (to run this test with ' \
-                        'the on-image kernel, set it to an empty string)')
-
-        self.seed = self.params.get('seed')
-
-        self.ssh_key = os.path.join(self.workdir, 'id_ed25519')
-
-        self.run(('ssh-keygen', '-N', '', '-t', 'ed25519', '-f', self.ssh_key))
-
-        pubkey = open(self.ssh_key + '.pub').read()
-
-        super(VirtiofsSubmountsTest, self).setUp(pubkey)
-
-        if len(vmlinuz) > 0:
-            self.vm.add_args('-kernel', vmlinuz,
-                             '-append', 'console=ttyS0 root=/dev/sda1')
-
-        # Allow us to connect to SSH
-        self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22',
-                         '-device', 'virtio-net,netdev=vnet')
-
-        self.require_accelerator("kvm")
-        self.vm.add_args('-accel', 'kvm')
-
-    def tearDown(self):
-        try:
-            self.vm.shutdown()
-        except:
-            pass
-
-        scratch_dir = os.path.join(self.shared_dir, 'scratch')
-        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir),
-                 ignore_error=True)
-
-    def test_pre_virtiofsd_set_up(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_vm()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_pre_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-
-        self.set_up_nested_mounts()
-
-        self.launch_vm()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_launch_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_vm()
-
-        self.set_up_nested_mounts()
-
-        self.mount_in_guest()
-        self.check_in_guest()
-
-    def test_post_mount_set_up(self):
-        self.set_up_shared_dir()
-        self.set_up_virtiofs()
-        self.launch_vm()
-        self.mount_in_guest()
-
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()
-
-    def test_two_runs(self):
-        self.set_up_shared_dir()
-
-        self.set_up_nested_mounts()
-
-        self.set_up_virtiofs()
-        self.launch_vm()
-        self.mount_in_guest()
-        self.check_in_guest()
-
-        self.live_cleanup()
-        self.set_up_nested_mounts()
-
-        self.check_in_guest()
diff --git a/tests/avocado/README.rst b/tests/avocado/README.rst
new file mode 100644
index 00000000000..94488371bbe
--- /dev/null
+++ b/tests/avocado/README.rst
@@ -0,0 +1,10 @@
+=============================================
+Integration tests using the Avocado Framework
+=============================================
+
+This directory contains integration tests. They're usually higher
+level, and may interact with external resources and with various
+guest operating systems.
+
+For more information, please refer to ``docs/devel/testing.rst``,
+section "Integration tests using the Avocado Framework".
diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py
new file mode 100644
index 00000000000..75063c0c301
--- /dev/null
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -0,0 +1,639 @@
+# Test class and utilities for functional tests
+#
+# Copyright (c) 2018 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+import uuid
+
+import avocado
+from avocado.utils import cloudinit, datadrainer, network, process, ssh, vmimage
+from avocado.utils.path import find_command
+
+#: The QEMU build root directory.  It may also be the source directory
+#: if building from the source dir, but it's safer to use BUILD_DIR for
+#: that purpose.  Be aware that if this code is moved outside of a source
+#: and build tree, it will not be accurate.
+BUILD_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+
+if os.path.islink(os.path.dirname(os.path.dirname(__file__))):
+    # The link to the avocado tests dir in the source code directory
+    lnk = os.path.dirname(os.path.dirname(__file__))
+    #: The QEMU root source directory
+    SOURCE_DIR = os.path.dirname(os.path.dirname(os.readlink(lnk)))
+else:
+    SOURCE_DIR = BUILD_DIR
+
+sys.path.append(os.path.join(SOURCE_DIR, 'python'))
+
+from qemu.machine import QEMUMachine
+from qemu.utils import (get_info_usernet_hostfwd_port, kvm_available,
+                        tcg_available)
+
+
+def has_cmd(name, args=None):
+    """
+    This function is for use in a @avocado.skipUnless decorator, e.g.:
+
+        @skipUnless(*has_cmd('sudo -n', ('sudo', '-n', 'true')))
+        def test_something_that_needs_sudo(self):
+            ...
+    """
+
+    if args is None:
+        args = ('which', name)
+
+    try:
+        _, stderr, exitcode = run_cmd(args)
+    except Exception as e:
+        exitcode = -1
+        stderr = str(e)
+
+    if exitcode != 0:
+        cmd_line = ' '.join(args)
+        err = f'{name} required, but "{cmd_line}" failed: {stderr.strip()}'
+        return (False, err)
+    else:
+        return (True, '')
+
+def has_cmds(*cmds):
+    """
+    This function is for use in a @avocado.skipUnless decorator and
+    allows checking for the availability of multiple commands, e.g.:
+
+        @skipUnless(*has_cmds(('cmd1', ('cmd1', '--some-parameter')),
+                              'cmd2', 'cmd3'))
+        def test_something_that_needs_cmd1_and_cmd2(self):
+            ...
+    """
+
+    for cmd in cmds:
+        if isinstance(cmd, str):
+            cmd = (cmd,)
+
+        ok, errstr = has_cmd(*cmd)
+        if not ok:
+            return (False, errstr)
+
+    return (True, '')
+
+def run_cmd(args):
+    subp = subprocess.Popen(args,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE,
+                            universal_newlines=True)
+    stdout, stderr = subp.communicate()
+    ret = subp.returncode
+
+    return (stdout, stderr, ret)
+
+def is_readable_executable_file(path):
+    return os.path.isfile(path) and os.access(path, os.R_OK | os.X_OK)
+
+
+def pick_default_qemu_bin(bin_prefix='qemu-system-', arch=None):
+    """
+    Picks the path of a QEMU binary, starting either in the current working
+    directory or in the source tree root directory.
+
+    :param arch: the arch to use when looking for a QEMU binary (the target
+                 will match the arch given).  If None (the default), arch
+                 will be the current host system arch (as given by
+                 :func:`os.uname`).
+    :type arch: str
+    :returns: the path to the default QEMU binary or None if one could not
+              be found
+    :rtype: str or None
+    """
+    if arch is None:
+        arch = os.uname()[4]
+    # qemu binary path does not match arch for powerpc, handle it
+    if 'ppc64le' in arch:
+        arch = 'ppc64'
+    qemu_bin_relative_path = os.path.join(".", bin_prefix + arch)
+    if is_readable_executable_file(qemu_bin_relative_path):
+        return qemu_bin_relative_path
+
+    qemu_bin_from_bld_dir_path = os.path.join(BUILD_DIR,
+                                              qemu_bin_relative_path)
+    if is_readable_executable_file(qemu_bin_from_bld_dir_path):
+        return qemu_bin_from_bld_dir_path
+    return None
+
+
+def _console_interaction(test, success_message, failure_message,
+                         send_string, keep_sending=False, vm=None):
+    assert not keep_sending or send_string
+    if vm is None:
+        vm = test.vm
+    console = vm.console_socket.makefile(mode='rb', encoding='utf-8')
+    console_logger = logging.getLogger('console')
+    while True:
+        if send_string:
+            vm.console_socket.sendall(send_string.encode())
+            if not keep_sending:
+                send_string = None # send only once
+        try:
+            msg = console.readline().decode().strip()
+        except UnicodeDecodeError:
+            msg = None
+        if not msg:
+            continue
+        console_logger.debug(msg)
+        if success_message is None or success_message in msg:
+            break
+        if failure_message and failure_message in msg:
+            console.close()
+            fail = 'Failure message found in console: "%s". Expected: "%s"' % \
+                    (failure_message, success_message)
+            test.fail(fail)
+
+def interrupt_interactive_console_until_pattern(test, success_message,
+                                                failure_message=None,
+                                                interrupt_string='\r'):
+    """
+    Keep sending a string to interrupt a console prompt, while logging the
+    console output. Typical use case is to break a boot loader prompt, such:
+
+        Press a key within 5 seconds to interrupt boot process.
+        5
+        4
+        3
+        2
+        1
+        Booting default image...
+
+    :param test: an Avocado test containing a VM that will have its console
+                 read and probed for a success or failure message
+    :type test: :class:`avocado_qemu.QemuSystemTest`
+    :param success_message: if this message appears, test succeeds
+    :param failure_message: if this message appears, test fails
+    :param interrupt_string: a string to send to the console before trying
+                             to read a new line
+    """
+    _console_interaction(test, success_message, failure_message,
+                         interrupt_string, True)
+
+def wait_for_console_pattern(test, success_message, failure_message=None,
+                             vm=None):
+    """
+    Waits for messages to appear on the console, while logging the content
+
+    :param test: an Avocado test containing a VM that will have its console
+                 read and probed for a success or failure message
+    :type test: :class:`avocado_qemu.QemuSystemTest`
+    :param success_message: if this message appears, test succeeds
+    :param failure_message: if this message appears, test fails
+    """
+    _console_interaction(test, success_message, failure_message, None, vm=vm)
+
+def exec_command(test, command):
+    """
+    Send a command to a console (appending CRLF characters), while logging
+    the content.
+
+    :param test: an Avocado test containing a VM.
+    :type test: :class:`avocado_qemu.QemuSystemTest`
+    :param command: the command to send
+    :type command: str
+    """
+    _console_interaction(test, None, None, command + '\r')
+
+def exec_command_and_wait_for_pattern(test, command,
+                                      success_message, failure_message=None):
+    """
+    Send a command to a console (appending CRLF characters), then wait
+    for success_message to appear on the console, while logging the.
+    content. Mark the test as failed if failure_message is found instead.
+
+    :param test: an Avocado test containing a VM that will have its console
+                 read and probed for a success or failure message
+    :type test: :class:`avocado_qemu.QemuSystemTest`
+    :param command: the command to send
+    :param success_message: if this message appears, test succeeds
+    :param failure_message: if this message appears, test fails
+    """
+    _console_interaction(test, success_message, failure_message, command + '\r')
+
+class QemuBaseTest(avocado.Test):
+    def _get_unique_tag_val(self, tag_name):
+        """
+        Gets a tag value, if unique for a key
+        """
+        vals = self.tags.get(tag_name, [])
+        if len(vals) == 1:
+            return vals.pop()
+        return None
+
+    def setUp(self, bin_prefix):
+        self.arch = self.params.get('arch',
+                                    default=self._get_unique_tag_val('arch'))
+
+        self.cpu = self.params.get('cpu',
+                                   default=self._get_unique_tag_val('cpu'))
+
+        default_qemu_bin = pick_default_qemu_bin(bin_prefix, arch=self.arch)
+        self.qemu_bin = self.params.get('qemu_bin',
+                                        default=default_qemu_bin)
+        if self.qemu_bin is None:
+            self.cancel("No QEMU binary defined or found in the build tree")
+
+    def fetch_asset(self, name,
+                    asset_hash=None, algorithm=None,
+                    locations=None, expire=None,
+                    find_only=False, cancel_on_missing=True):
+        return super().fetch_asset(name,
+                        asset_hash=asset_hash,
+                        algorithm=algorithm,
+                        locations=locations,
+                        expire=expire,
+                        find_only=find_only,
+                        cancel_on_missing=cancel_on_missing)
+
+
+class QemuSystemTest(QemuBaseTest):
+    """Facilitates system emulation tests."""
+
+    def setUp(self):
+        self._vms = {}
+
+        super().setUp('qemu-system-')
+
+        self.machine = self.params.get('machine',
+                                       default=self._get_unique_tag_val('machine'))
+
+    def require_accelerator(self, accelerator):
+        """
+        Requires an accelerator to be available for the test to continue
+
+        It takes into account the currently set qemu binary.
+
+        If the check fails, the test is canceled.  If the check itself
+        for the given accelerator is not available, the test is also
+        canceled.
+
+        :param accelerator: name of the accelerator, such as "kvm" or "tcg"
+        :type accelerator: str
+        """
+        checker = {'tcg': tcg_available,
+                   'kvm': kvm_available}.get(accelerator)
+        if checker is None:
+            self.cancel("Don't know how to check for the presence "
+                        "of accelerator %s" % accelerator)
+        if not checker(qemu_bin=self.qemu_bin):
+            self.cancel("%s accelerator does not seem to be "
+                        "available" % accelerator)
+
+    def _new_vm(self, name, *args):
+        self._sd = tempfile.TemporaryDirectory(prefix="avo_qemu_sock_")
+        vm = QEMUMachine(self.qemu_bin, base_temp_dir=self.workdir,
+                         sock_dir=self._sd.name, log_dir=self.logdir)
+        self.log.debug('QEMUMachine "%s" created', name)
+        self.log.debug('QEMUMachine "%s" temp_dir: %s', name, vm.temp_dir)
+        self.log.debug('QEMUMachine "%s" log_dir: %s', name, vm.log_dir)
+        if args:
+            vm.add_args(*args)
+        return vm
+
+    @property
+    def vm(self):
+        return self.get_vm(name='default')
+
+    def get_vm(self, *args, name=None):
+        if not name:
+            name = str(uuid.uuid4())
+        if self._vms.get(name) is None:
+            self._vms[name] = self._new_vm(name, *args)
+            if self.cpu is not None:
+                self._vms[name].add_args('-cpu', self.cpu)
+            if self.machine is not None:
+                self._vms[name].set_machine(self.machine)
+        return self._vms[name]
+
+    def set_vm_arg(self, arg, value):
+        """
+        Set an argument to list of extra arguments to be given to the QEMU
+        binary. If the argument already exists then its value is replaced.
+
+        :param arg: the QEMU argument, such as "-cpu" in "-cpu host"
+        :type arg: str
+        :param value: the argument value, such as "host" in "-cpu host"
+        :type value: str
+        """
+        if not arg or not value:
+            return
+        if arg not in self.vm.args:
+            self.vm.args.extend([arg, value])
+        else:
+            idx = self.vm.args.index(arg) + 1
+            if idx < len(self.vm.args):
+                self.vm.args[idx] = value
+            else:
+                self.vm.args.append(value)
+
+    def tearDown(self):
+        for vm in self._vms.values():
+            vm.shutdown()
+        self._sd = None
+        super().tearDown()
+
+
+class QemuUserTest(QemuBaseTest):
+    """Facilitates user-mode emulation tests."""
+
+    def setUp(self):
+        self._ldpath = []
+        super().setUp('qemu-')
+
+    def add_ldpath(self, ldpath):
+        self._ldpath.append(os.path.abspath(ldpath))
+
+    def run(self, bin_path, args=[]):
+        qemu_args = " ".join(["-L %s" % ldpath for ldpath in self._ldpath])
+        bin_args = " ".join(args)
+        return process.run("%s %s %s %s" % (self.qemu_bin, qemu_args,
+                                            bin_path, bin_args))
+
+
+class LinuxSSHMixIn:
+    """Contains utility methods for interacting with a guest via SSH."""
+
+    def ssh_connect(self, username, credential, credential_is_key=True):
+        self.ssh_logger = logging.getLogger('ssh')
+        res = self.vm.command('human-monitor-command',
+                              command_line='info usernet')
+        port = get_info_usernet_hostfwd_port(res)
+        self.assertIsNotNone(port)
+        self.assertGreater(port, 0)
+        self.log.debug('sshd listening on port: %d', port)
+        if credential_is_key:
+            self.ssh_session = ssh.Session('127.0.0.1', port=port,
+                                           user=username, key=credential)
+        else:
+            self.ssh_session = ssh.Session('127.0.0.1', port=port,
+                                           user=username, password=credential)
+        for i in range(10):
+            try:
+                self.ssh_session.connect()
+                return
+            except:
+                time.sleep(i)
+        self.fail('ssh connection timeout')
+
+    def ssh_command(self, command):
+        self.ssh_logger.info(command)
+        result = self.ssh_session.cmd(command)
+        stdout_lines = [line.rstrip() for line
+                        in result.stdout_text.splitlines()]
+        for line in stdout_lines:
+            self.ssh_logger.info(line)
+        stderr_lines = [line.rstrip() for line
+                        in result.stderr_text.splitlines()]
+        for line in stderr_lines:
+            self.ssh_logger.warning(line)
+
+        self.assertEqual(result.exit_status, 0,
+                         f'Guest command failed: {command}')
+        return stdout_lines, stderr_lines
+
+class LinuxDistro:
+    """Represents a Linux distribution
+
+    Holds information of known distros.
+    """
+    #: A collection of known distros and their respective image checksum
+    KNOWN_DISTROS = {
+        'fedora': {
+            '31': {
+                'x86_64':
+                {'checksum': ('e3c1b309d9203604922d6e255c2c5d09'
+                              '8a309c2d46215d8fc026954f3c5c27a0'),
+                 'pxeboot_url': ('https://archives.fedoraproject.org/'
+                                 'pub/archive/fedora/linux/releases/31/'
+                                 'Everything/x86_64/os/images/pxeboot/'),
+                 'kernel_params': ('root=UUID=b1438b9b-2cab-4065-a99a-'
+                                   '08a96687f73c ro no_timer_check '
+                                   'net.ifnames=0 console=tty1 '
+                                   'console=ttyS0,115200n8'),
+                },
+                'aarch64':
+                {'checksum': ('1e18d9c0cf734940c4b5d5ec592facae'
+                              'd2af0ad0329383d5639c997fdf16fe49'),
+                'pxeboot_url': 'https://archives.fedoraproject.org/'
+                               'pub/archive/fedora/linux/releases/31/'
+                               'Everything/aarch64/os/images/pxeboot/',
+                'kernel_params': ('root=UUID=b6950a44-9f3c-4076-a9c2-'
+                                  '355e8475b0a7 ro earlyprintk=pl011,0x9000000'
+                                  ' ignore_loglevel no_timer_check'
+                                  ' printk.time=1 rd_NO_PLYMOUTH'
+                                  ' console=ttyAMA0'),
+                },
+                'ppc64':
+                {'checksum': ('7c3528b85a3df4b2306e892199a9e1e4'
+                              '3f991c506f2cc390dc4efa2026ad2f58')},
+                's390x':
+                {'checksum': ('4caaab5a434fd4d1079149a072fdc789'
+                              '1e354f834d355069ca982fdcaf5a122d')},
+            },
+            '32': {
+                'aarch64':
+                {'checksum': ('b367755c664a2d7a26955bbfff985855'
+                              'adfa2ca15e908baf15b4b176d68d3967'),
+                'pxeboot_url': ('http://dl.fedoraproject.org/pub/fedora/linux/'
+                                'releases/32/Server/aarch64/os/images/'
+                                'pxeboot/'),
+                'kernel_params': ('root=UUID=3df75b65-be8d-4db4-8655-'
+                                  '14d95c0e90c5 ro no_timer_check net.ifnames=0'
+                                  ' console=tty1 console=ttyS0,115200n8'),
+                },
+            },
+            '33': {
+                'aarch64':
+                {'checksum': ('e7f75cdfd523fe5ac2ca9eeece68edc1'
+                              'a81f386a17f969c1d1c7c87031008a6b'),
+                'pxeboot_url': ('http://dl.fedoraproject.org/pub/fedora/linux/'
+                                'releases/33/Server/aarch64/os/images/'
+                                'pxeboot/'),
+                'kernel_params': ('root=UUID=d20b3ffa-6397-4a63-a734-'
+                                  '1126a0208f8a ro no_timer_check net.ifnames=0'
+                                  ' console=tty1 console=ttyS0,115200n8'
+                                  ' console=tty0'),
+                 },
+            },
+        }
+    }
+
+    def __init__(self, name, version, arch):
+        self.name = name
+        self.version = version
+        self.arch = arch
+        try:
+            info = self.KNOWN_DISTROS.get(name).get(version).get(arch)
+        except AttributeError:
+            # Unknown distro
+            info = None
+        self._info = info or {}
+
+    @property
+    def checksum(self):
+        """Gets the cloud-image file checksum"""
+        return self._info.get('checksum', None)
+
+    @checksum.setter
+    def checksum(self, value):
+        self._info['checksum'] = value
+
+    @property
+    def pxeboot_url(self):
+        """Gets the repository url where pxeboot files can be found"""
+        return self._info.get('pxeboot_url', None)
+
+    @property
+    def default_kernel_params(self):
+        """Gets the default kernel parameters"""
+        return self._info.get('kernel_params', None)
+
+
+class LinuxTest(LinuxSSHMixIn, QemuSystemTest):
+    """Facilitates having a cloud-image Linux based available.
+
+    For tests that indent to interact with guests, this is a better choice
+    to start with than the more vanilla `QemuSystemTest` class.
+    """
+
+    timeout = 900
+    distro = None
+    username = 'root'
+    password = 'password'
+
+    def _set_distro(self):
+        distro_name = self.params.get(
+            'distro',
+            default=self._get_unique_tag_val('distro'))
+        if not distro_name:
+            distro_name = 'fedora'
+
+        distro_version = self.params.get(
+            'distro_version',
+            default=self._get_unique_tag_val('distro_version'))
+        if not distro_version:
+            distro_version = '31'
+
+        self.distro = LinuxDistro(distro_name, distro_version, self.arch)
+
+        # The distro checksum behaves differently than distro name and
+        # version. First, it does not respect a tag with the same
+        # name, given that it's not expected to be used for filtering
+        # (distro name versions are the natural choice).  Second, the
+        # order of precedence is: parameter, attribute and then value
+        # from KNOWN_DISTROS.
+        distro_checksum = self.params.get('distro_checksum',
+                                          default=None)
+        if distro_checksum:
+            self.distro.checksum = distro_checksum
+
+    def setUp(self, ssh_pubkey=None, network_device_type='virtio-net'):
+        super().setUp()
+        self._set_distro()
+        self.vm.add_args('-smp', '2')
+        self.vm.add_args('-m', '1024')
+        # The following network device allows for SSH connections
+        self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22',
+                         '-device', '%s,netdev=vnet' % network_device_type)
+        self.set_up_boot()
+        if ssh_pubkey is None:
+            ssh_pubkey, self.ssh_key = self.set_up_existing_ssh_keys()
+        self.set_up_cloudinit(ssh_pubkey)
+
+    def set_up_existing_ssh_keys(self):
+        ssh_public_key = os.path.join(SOURCE_DIR, 'tests', 'keys', 'id_rsa.pub')
+        source_private_key = os.path.join(SOURCE_DIR, 'tests', 'keys', 'id_rsa')
+        ssh_dir = os.path.join(self.workdir, '.ssh')
+        os.mkdir(ssh_dir, mode=0o700)
+        ssh_private_key = os.path.join(ssh_dir,
+                                       os.path.basename(source_private_key))
+        shutil.copyfile(source_private_key, ssh_private_key)
+        os.chmod(ssh_private_key, 0o600)
+        return (ssh_public_key, ssh_private_key)
+
+    def download_boot(self):
+        self.log.debug('Looking for and selecting a qemu-img binary to be '
+                       'used to create the bootable snapshot image')
+        # If qemu-img has been built, use it, otherwise the system wide one
+        # will be used.  If none is available, the test will cancel.
+        qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
+        if not os.path.exists(qemu_img):
+            qemu_img = find_command('qemu-img', False)
+        if qemu_img is False:
+            self.cancel('Could not find "qemu-img", which is required to '
+                        'create the bootable image')
+        vmimage.QEMU_IMG = qemu_img
+
+        self.log.info('Downloading/preparing boot image')
+        # Fedora 31 only provides ppc64le images
+        image_arch = self.arch
+        if self.distro.name == 'fedora':
+            if image_arch == 'ppc64':
+                image_arch = 'ppc64le'
+
+        try:
+            boot = vmimage.get(
+                self.distro.name, arch=image_arch, version=self.distro.version,
+                checksum=self.distro.checksum,
+                algorithm='sha256',
+                cache_dir=self.cache_dirs[0],
+                snapshot_dir=self.workdir)
+        except:
+            self.cancel('Failed to download/prepare boot image')
+        return boot.path
+
+    def prepare_cloudinit(self, ssh_pubkey=None):
+        self.log.info('Preparing cloudinit image')
+        try:
+            cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso')
+            self.phone_home_port = network.find_free_port()
+            pubkey_content = None
+            if ssh_pubkey:
+                with open(ssh_pubkey) as pubkey:
+                    pubkey_content = pubkey.read()
+            cloudinit.iso(cloudinit_iso, self.name,
+                          username=self.username,
+                          password=self.password,
+                          # QEMU's hard coded usermode router address
+                          phone_home_host='10.0.2.2',
+                          phone_home_port=self.phone_home_port,
+                          authorized_key=pubkey_content)
+        except Exception:
+            self.cancel('Failed to prepare the cloudinit image')
+        return cloudinit_iso
+
+    def set_up_boot(self):
+        path = self.download_boot()
+        self.vm.add_args('-drive', 'file=%s' % path)
+
+    def set_up_cloudinit(self, ssh_pubkey=None):
+        cloudinit_iso = self.prepare_cloudinit(ssh_pubkey)
+        self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso)
+
+    def launch_and_wait(self, set_up_ssh_connection=True):
+        self.vm.set_console()
+        self.vm.launch()
+        console_drainer = datadrainer.LineLogger(self.vm.console_socket.fileno(),
+                                                 logger=self.log.getChild('console'))
+        console_drainer.start()
+        self.log.info('VM launched, waiting for boot confirmation from guest')
+        cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port), self.name)
+        if set_up_ssh_connection:
+            self.log.info('Setting up the SSH connection')
+            self.ssh_connect(self.username, self.ssh_key)
diff --git a/tests/avocado/boot_linux.py b/tests/avocado/boot_linux.py
new file mode 100644
index 00000000000..ab19146d1ee
--- /dev/null
+++ b/tests/avocado/boot_linux.py
@@ -0,0 +1,138 @@
+# Functional test that boots a complete Linux system via a cloud image
+#
+# Copyright (c) 2018-2020 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+
+from avocado_qemu import LinuxTest, BUILD_DIR
+
+from avocado import skipIf
+
+
+class BootLinuxX8664(LinuxTest):
+    """
+    :avocado: tags=arch:x86_64
+    """
+
+    def test_pc_i440fx_tcg(self):
+        """
+        :avocado: tags=machine:pc
+        :avocado: tags=accel:tcg
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+    def test_pc_i440fx_kvm(self):
+        """
+        :avocado: tags=machine:pc
+        :avocado: tags=accel:kvm
+        """
+        self.require_accelerator("kvm")
+        self.vm.add_args("-accel", "kvm")
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+    def test_pc_q35_tcg(self):
+        """
+        :avocado: tags=machine:q35
+        :avocado: tags=accel:tcg
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+    def test_pc_q35_kvm(self):
+        """
+        :avocado: tags=machine:q35
+        :avocado: tags=accel:kvm
+        """
+        self.require_accelerator("kvm")
+        self.vm.add_args("-accel", "kvm")
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+
+class BootLinuxAarch64(LinuxTest):
+    """
+    :avocado: tags=arch:aarch64
+    :avocado: tags=machine:virt
+    :avocado: tags=machine:gic-version=2
+    """
+
+    def add_common_args(self):
+        self.vm.add_args('-bios',
+                         os.path.join(BUILD_DIR, 'pc-bios',
+                                      'edk2-aarch64-code.fd'))
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object', 'rng-random,id=rng0,filename=/dev/urandom')
+
+    def test_virt_tcg_gicv2(self):
+        """
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        :avocado: tags=device:gicv2
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.vm.add_args("-machine", "virt,gic-version=2")
+        self.add_common_args()
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+    def test_virt_tcg_gicv3(self):
+        """
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:max
+        :avocado: tags=device:gicv3
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.vm.add_args("-machine", "virt,gic-version=3")
+        self.add_common_args()
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+    def test_virt_kvm(self):
+        """
+        :avocado: tags=accel:kvm
+        :avocado: tags=cpu:host
+        """
+        self.require_accelerator("kvm")
+        self.vm.add_args("-accel", "kvm")
+        self.vm.add_args("-machine", "virt,gic-version=host")
+        self.add_common_args()
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+
+class BootLinuxPPC64(LinuxTest):
+    """
+    :avocado: tags=arch:ppc64
+    """
+
+    def test_pseries_tcg(self):
+        """
+        :avocado: tags=machine:pseries
+        :avocado: tags=accel:tcg
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.launch_and_wait(set_up_ssh_connection=False)
+
+
+class BootLinuxS390X(LinuxTest):
+    """
+    :avocado: tags=arch:s390x
+    """
+
+    @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+    def test_s390_ccw_virtio_tcg(self):
+        """
+        :avocado: tags=machine:s390-ccw-virtio
+        :avocado: tags=accel:tcg
+        """
+        self.require_accelerator("tcg")
+        self.vm.add_args("-accel", "tcg")
+        self.launch_and_wait(set_up_ssh_connection=False)
diff --git a/tests/acceptance/boot_linux_console.py b/tests/avocado/boot_linux_console.py
similarity index 86%
rename from tests/acceptance/boot_linux_console.py
rename to tests/avocado/boot_linux_console.py
index 1ca32ecf253..9c618d4809f 100644
--- a/tests/acceptance/boot_linux_console.py
+++ b/tests/avocado/boot_linux_console.py
@@ -15,19 +15,13 @@
 
 from avocado import skip
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import exec_command
 from avocado_qemu import exec_command_and_wait_for_pattern
 from avocado_qemu import interrupt_interactive_console_until_pattern
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import process
 from avocado.utils import archive
-from avocado.utils.path import find_command, CmdNotFoundError
-
-P7ZIP_AVAILABLE = True
-try:
-    find_command('7z')
-except CmdNotFoundError:
-    P7ZIP_AVAILABLE = False
 
 """
 Round up to next power of 2
@@ -45,7 +39,7 @@ def image_pow2ceil_expand(path):
             with open(path, 'ab+') as fd:
                 fd.truncate(size_aligned)
 
-class LinuxKernelTest(Test):
+class LinuxKernelTest(QemuSystemTest):
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
 
     def wait_for_console_pattern(self, success_message, vm=None):
@@ -238,6 +232,7 @@ def test_mips64el_malta_5KEc_cpio(self):
         :avocado: tags=arch:mips64el
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:5KEc
         """
         kernel_url = ('https://github.com/philmd/qemu-testing-blob/'
                       'raw/9ad2df38/mips/malta/mips64el/'
@@ -257,8 +252,7 @@ def test_mips64el_malta_5KEc_cpio(self):
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE
                                + 'console=ttyS0 console=tty '
                                + 'rdinit=/sbin/init noreboot')
-        self.vm.add_args('-cpu', '5KEc',
-                         '-kernel', kernel_path,
+        self.vm.add_args('-kernel', kernel_path,
                          '-initrd', initrd_path,
                          '-append', kernel_command_line,
                          '-no-reboot')
@@ -286,7 +280,6 @@ def do_test_mips_malta32el_nanomips(self, kernel_url, kernel_hash):
                                + 'mem=256m@@0x0 '
                                + 'console=ttyS0')
         self.vm.add_args('-no-reboot',
-                         '-cpu', 'I7200',
                          '-kernel', kernel_path,
                          '-append', kernel_command_line)
         self.vm.launch()
@@ -298,6 +291,7 @@ def test_mips_malta32el_nanomips_4k(self):
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
@@ -310,6 +304,7 @@ def test_mips_malta32el_nanomips_16k_up(self):
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
@@ -322,6 +317,7 @@ def test_mips_malta32el_nanomips_64k_dbg(self):
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
@@ -333,6 +329,8 @@ def test_aarch64_virt(self):
         """
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
+        :avocado: tags=cpu:cortex-a53
         """
         kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
                       '/linux/releases/29/Everything/aarch64/os/images/pxeboot'
@@ -343,7 +341,9 @@ def test_aarch64_virt(self):
         self.vm.set_console()
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
                                'console=ttyAMA0')
+        self.require_accelerator("tcg")
         self.vm.add_args('-cpu', 'cortex-a53',
+                         '-accel', 'tcg',
                          '-kernel', kernel_path,
                          '-append', kernel_command_line)
         self.vm.launch()
@@ -356,6 +356,7 @@ def test_aarch64_xlnx_versal_virt(self):
         :avocado: tags=machine:xlnx-versal-virt
         :avocado: tags=device:pl011
         :avocado: tags=device:arm_gicv3
+        :avocado: tags=accel:tcg
         """
         images_url = ('http://ports.ubuntu.com/ubuntu-ports/dists/'
                       'bionic-updates/main/installer-arm64/'
@@ -370,6 +371,7 @@ def test_aarch64_xlnx_versal_virt(self):
 
         self.vm.set_console()
         self.vm.add_args('-m', '2G',
+                         '-accel', 'tcg',
                          '-kernel', kernel_path,
                          '-initrd', initrd_path)
         self.vm.launch()
@@ -379,6 +381,7 @@ def test_arm_virt(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:virt
+        :avocado: tags=accel:tcg
         """
         kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
                       '/linux/releases/29/Everything/armhfp/os/images/pxeboot'
@@ -401,6 +404,7 @@ def test_arm_emcraft_sf2(self):
         :avocado: tags=machine:emcraft-sf2
         :avocado: tags=endian:little
         :avocado: tags=u-boot
+        :avocado: tags=accel:tcg
         """
         uboot_url = ('https://raw.githubusercontent.com/'
                      'Subbaraya-Sundeep/qemu-test-binaries/'
@@ -429,6 +433,8 @@ def test_arm_emcraft_sf2(self):
 
     def do_test_arm_raspi2(self, uart_id):
         """
+        :avocado: tags=accel:tcg
+
         The kernel can be rebuilt using the kernel source referenced
         and following the instructions on the on:
         https://www.raspberrypi.org/documentation/linux/kernel/building.md
@@ -462,15 +468,59 @@ def do_test_arm_raspi2(self, uart_id):
     def test_arm_raspi2_uart0(self):
         """
         :avocado: tags=arch:arm
-        :avocado: tags=machine:raspi2
+        :avocado: tags=machine:raspi2b
         :avocado: tags=device:pl011
+        :avocado: tags=accel:tcg
         """
         self.do_test_arm_raspi2(0)
 
+    def test_arm_raspi2_initrd(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=machine:raspi2b
+        """
+        deb_url = ('http://archive.raspberrypi.org/debian/'
+                   'pool/main/r/raspberrypi-firmware/'
+                   'raspberrypi-kernel_1.20190215-1_armhf.deb')
+        deb_hash = 'cd284220b32128c5084037553db3c482426f3972'
+        deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash)
+        kernel_path = self.extract_from_deb(deb_path, '/boot/kernel7.img')
+        dtb_path = self.extract_from_deb(deb_path, '/boot/bcm2709-rpi-2-b.dtb')
+
+        initrd_url = ('https://github.com/groeck/linux-build-test/raw/'
+                      '2eb0a73b5d5a28df3170c546ddaaa9757e1e0848/rootfs/'
+                      'arm/rootfs-armv7a.cpio.gz')
+        initrd_hash = '604b2e45cdf35045846b8bbfbf2129b1891bdc9c'
+        initrd_path_gz = self.fetch_asset(initrd_url, asset_hash=initrd_hash)
+        initrd_path = os.path.join(self.workdir, 'rootfs.cpio')
+        archive.gzip_uncompress(initrd_path_gz, initrd_path)
+
+        self.vm.set_console()
+        kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
+                               'earlycon=pl011,0x3f201000 console=ttyAMA0 '
+                               'panic=-1 noreboot ' +
+                               'dwc_otg.fiq_fsm_enable=0')
+        self.vm.add_args('-kernel', kernel_path,
+                         '-dtb', dtb_path,
+                         '-initrd', initrd_path,
+                         '-append', kernel_command_line,
+                         '-no-reboot')
+        self.vm.launch()
+        self.wait_for_console_pattern('Boot successful.')
+
+        exec_command_and_wait_for_pattern(self, 'cat /proc/cpuinfo',
+                                                'BCM2835')
+        exec_command_and_wait_for_pattern(self, 'cat /proc/iomem',
+                                                '/soc/cprman@7e101000')
+        exec_command(self, 'halt')
+        # Wait for VM to shut down gracefully
+        self.vm.wait()
+
     def test_arm_exynos4210_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:smdkc210
+        :avocado: tags=accel:tcg
         """
         deb_url = ('https://snapshot.debian.org/archive/debian/'
                    '20190928T224601Z/pool/main/l/linux/'
@@ -511,6 +561,7 @@ def test_arm_cubieboard_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:cubieboard
+        :avocado: tags=accel:tcg
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
                    'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
@@ -551,6 +602,7 @@ def test_arm_cubieboard_sata(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:cubieboard
+        :avocado: tags=accel:tcg
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
                    'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
@@ -595,6 +647,7 @@ def test_arm_quanta_gsj(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:quanta-gsj
+        :avocado: tags=accel:tcg
         """
         # 25 MiB compressed, 32 MiB uncompressed.
         image_url = (
@@ -642,6 +695,7 @@ def test_arm_quanta_gsj_initrd(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:quanta-gsj
+        :avocado: tags=accel:tcg
         """
         initrd_url = (
                 'https://github.com/hskinnemoen/openbmc/releases/download/'
@@ -678,6 +732,7 @@ def test_arm_orangepi(self):
         """
         :avocado: tags=arch:arm
         :avocado: tags=machine:orangepi-pc
+        :avocado: tags=accel:tcg
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
                    'linux-5.10.16-sunxi/linux-image-current-sunxi_21.02.2_armhf.deb')
@@ -702,6 +757,7 @@ def test_arm_orangepi(self):
     def test_arm_orangepi_initrd(self):
         """
         :avocado: tags=arch:arm
+        :avocado: tags=accel:tcg
         :avocado: tags=machine:orangepi-pc
         """
         deb_url = ('https://apt.armbian.com/pool/main/l/'
@@ -744,6 +800,7 @@ def test_arm_orangepi_initrd(self):
     def test_arm_orangepi_sd(self):
         """
         :avocado: tags=arch:arm
+        :avocado: tags=accel:tcg
         :avocado: tags=machine:orangepi-pc
         :avocado: tags=device:sd
         """
@@ -844,6 +901,7 @@ def test_arm_orangepi_uboot_netbsd9(self):
         :avocado: tags=arch:arm
         :avocado: tags=machine:orangepi-pc
         :avocado: tags=device:sd
+        :avocado: tags=os:netbsd
         """
         # This test download a 304MB compressed image and expand it to 2GB
         deb_url = ('http://snapshot.debian.org/archive/debian/'
@@ -906,7 +964,7 @@ def test_arm_orangepi_uboot_netbsd9(self):
     def test_aarch64_raspi3_atf(self):
         """
         :avocado: tags=arch:aarch64
-        :avocado: tags=machine:raspi3
+        :avocado: tags=machine:raspi3b
         :avocado: tags=cpu:cortex-a53
         :avocado: tags=device:pl011
         :avocado: tags=atf
@@ -1010,6 +1068,74 @@ def test_arm_vexpressa9(self):
         self.vm.add_args('-dtb', self.workdir + '/day16/vexpress-v2p-ca9.dtb')
         self.do_test_advcal_2018('16', tar_hash, 'winter.zImage')
 
+    def test_arm_ast2400_palmetto_openbmc_v2_9_0(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=machine:palmetto-bmc
+        """
+
+        image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/'
+                     'obmc-phosphor-image-palmetto.static.mtd')
+        image_hash = ('3e13bbbc28e424865dc42f35ad672b10f2e82cdb11846bb28fa625b48beafd0d')
+        image_path = self.fetch_asset(image_url, asset_hash=image_hash,
+                                      algorithm='sha256')
+
+        self.do_test_arm_aspeed(image_path)
+
+    def test_arm_ast2500_romulus_openbmc_v2_9_0(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=machine:romulus-bmc
+        """
+
+        image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/'
+                     'obmc-phosphor-image-romulus.static.mtd')
+        image_hash = ('820341076803f1955bc31e647a512c79f9add4f5233d0697678bab4604c7bb25')
+        image_path = self.fetch_asset(image_url, asset_hash=image_hash,
+                                      algorithm='sha256')
+
+        self.do_test_arm_aspeed(image_path)
+
+    def do_test_arm_aspeed(self, image):
+        self.vm.set_console()
+        self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw',
+                         '-net', 'nic')
+        self.vm.launch()
+
+        self.wait_for_console_pattern("U-Boot 2016.07")
+        self.wait_for_console_pattern("## Loading kernel from FIT Image at 20080000")
+        self.wait_for_console_pattern("Starting kernel ...")
+        self.wait_for_console_pattern("Booting Linux on physical CPU 0x0")
+        self.wait_for_console_pattern(
+                "aspeed-smc 1e620000.spi: read control register: 203b0641")
+        self.wait_for_console_pattern("ftgmac100 1e660000.ethernet eth0: irq ")
+        self.wait_for_console_pattern("systemd[1]: Set hostname to")
+
+    def test_arm_ast2600_debian(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=machine:tacoma-bmc
+        """
+        deb_url = ('http://snapshot.debian.org/archive/debian/'
+                   '20210302T203551Z/'
+                   'pool/main/l/linux/'
+                   'linux-image-5.10.0-3-armmp_5.10.13-1_armhf.deb')
+        deb_hash = 'db40d32fe39255d05482bea48d72467b67d6225bb2a2a4d6f618cb8976f1e09e'
+        deb_path = self.fetch_asset(deb_url, asset_hash=deb_hash,
+                                    algorithm='sha256')
+        kernel_path = self.extract_from_deb(deb_path, '/boot/vmlinuz-5.10.0-3-armmp')
+        dtb_path = self.extract_from_deb(deb_path,
+                '/usr/lib/linux-image-5.10.0-3-armmp/aspeed-bmc-opp-tacoma.dtb')
+
+        self.vm.set_console()
+        self.vm.add_args('-kernel', kernel_path,
+                         '-dtb', dtb_path,
+                         '-net', 'nic')
+        self.vm.launch()
+        self.wait_for_console_pattern("Booting Linux on physical CPU 0xf00")
+        self.wait_for_console_pattern("SMP: Total of 2 processors activated")
+        self.wait_for_console_pattern("No filesystem could mount root")
+
     def test_m68k_mcf5208evb(self):
         """
         :avocado: tags=arch:m68k
@@ -1038,11 +1164,46 @@ def test_ppc64_e500(self):
         """
         :avocado: tags=arch:ppc64
         :avocado: tags=machine:ppce500
+        :avocado: tags=cpu:e5500
         """
         tar_hash = '6951d86d644b302898da2fd701739c9406527fe1'
-        self.vm.add_args('-cpu', 'e5500')
         self.do_test_advcal_2018('19', tar_hash, 'uImage')
 
+    def do_test_ppc64_powernv(self, proc):
+        images_url = ('https://github.com/open-power/op-build/releases/download/v2.7/')
+
+        kernel_url = images_url + 'zImage.epapr'
+        kernel_hash = '0ab237df661727e5392cee97460e8674057a883c5f74381a128fa772588d45cd'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash,
+                                       algorithm='sha256')
+        self.vm.set_console()
+        self.vm.add_args('-kernel', kernel_path,
+                         '-append', 'console=tty0 console=hvc0',
+                         '-device', 'pcie-pci-bridge,id=bridge1,bus=pcie.1,addr=0x0',
+                         '-device', 'nvme,bus=pcie.2,addr=0x0,serial=1234',
+                         '-device', 'e1000e,bus=bridge1,addr=0x3',
+                         '-device', 'nec-usb-xhci,bus=bridge1,addr=0x2')
+        self.vm.launch()
+
+        self.wait_for_console_pattern("CPU: " + proc + " generation processor")
+        self.wait_for_console_pattern("zImage starting: loaded")
+        self.wait_for_console_pattern("Run /init as init process")
+        self.wait_for_console_pattern("Creating 1 MTD partitions")
+
+    def test_ppc_powernv8(self):
+        """
+        :avocado: tags=arch:ppc64
+        :avocado: tags=machine:powernv8
+        """
+        self.do_test_ppc64_powernv('P8')
+
+    def test_ppc_powernv9(self):
+        """
+        :avocado: tags=arch:ppc64
+        :avocado: tags=machine:powernv9
+        """
+        self.do_test_ppc64_powernv('P9')
+
     def test_ppc_g3beige(self):
         """
         :avocado: tags=arch:ppc
@@ -1082,7 +1243,7 @@ def test_xtensa_lx60(self):
         """
         :avocado: tags=arch:xtensa
         :avocado: tags=machine:lx60
+        :avocado: tags=cpu:dc233c
         """
         tar_hash = '49e88d9933742f0164b60839886c9739cb7a0d34'
-        self.vm.add_args('-cpu', 'dc233c')
         self.do_test_advcal_2018('02', tar_hash, 'santas-sleigh-ride.elf')
diff --git a/tests/acceptance/boot_xen.py b/tests/avocado/boot_xen.py
similarity index 98%
rename from tests/acceptance/boot_xen.py
rename to tests/avocado/boot_xen.py
index 75c2d44492b..fc2faeedb55 100644
--- a/tests/acceptance/boot_xen.py
+++ b/tests/avocado/boot_xen.py
@@ -13,7 +13,6 @@
 
 import os
 
-from avocado import skipIf
 from avocado_qemu import wait_for_console_pattern
 from boot_linux_console import LinuxKernelTest
 
@@ -48,7 +47,6 @@ def launch_xen(self, xen_path):
 
         xen_command_line = self.XEN_COMMON_COMMAND_LINE
         self.vm.add_args('-machine', 'virtualization=on',
-                         '-cpu', 'cortex-a57',
                          '-m', '768',
                          '-kernel', xen_path,
                          '-append', xen_command_line,
diff --git a/tests/acceptance/cpu_queries.py b/tests/avocado/cpu_queries.py
similarity index 86%
rename from tests/acceptance/cpu_queries.py
rename to tests/avocado/cpu_queries.py
index 293dccb89ab..cf69f69b116 100644
--- a/tests/acceptance/cpu_queries.py
+++ b/tests/avocado/cpu_queries.py
@@ -8,11 +8,9 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
-import logging
+from avocado_qemu import QemuSystemTest
 
-from avocado_qemu import Test
-
-class QueryCPUModelExpansion(Test):
+class QueryCPUModelExpansion(QemuSystemTest):
     """
     Run query-cpu-model-expansion for each CPU model, and validate results
     """
@@ -27,7 +25,7 @@ def test(self):
 
         cpus = self.vm.command('query-cpu-definitions')
         for c in cpus:
-            print(repr(c))
+            self.log.info("Checking CPU: %s", c)
             self.assertNotIn('', c['unavailable-features'], c['name'])
 
         for c in cpus:
diff --git a/tests/acceptance/empty_cpu_model.py b/tests/avocado/empty_cpu_model.py
similarity index 88%
rename from tests/acceptance/empty_cpu_model.py
rename to tests/avocado/empty_cpu_model.py
index a1e59e45e4e..22f504418d2 100644
--- a/tests/acceptance/empty_cpu_model.py
+++ b/tests/avocado/empty_cpu_model.py
@@ -7,9 +7,9 @@
 #
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
-class EmptyCPUModel(Test):
+class EmptyCPUModel(QemuSystemTest):
     def test(self):
         self.vm.add_args('-S', '-display', 'none', '-machine', 'none', '-cpu', '')
         self.vm.set_qmp_monitor(enabled=False)
diff --git a/tests/avocado/hotplug_cpu.py b/tests/avocado/hotplug_cpu.py
new file mode 100644
index 00000000000..6374bf1b546
--- /dev/null
+++ b/tests/avocado/hotplug_cpu.py
@@ -0,0 +1,37 @@
+# Functional test that hotplugs a CPU and checks it on a Linux guest
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado_qemu import LinuxTest
+
+
+class HotPlugCPU(LinuxTest):
+
+    def test(self):
+        """
+        :avocado: tags=arch:x86_64
+        :avocado: tags=machine:q35
+        :avocado: tags=accel:kvm
+        """
+        self.require_accelerator('kvm')
+        self.vm.add_args('-accel', 'kvm')
+        self.vm.add_args('-cpu', 'Haswell')
+        self.vm.add_args('-smp', '1,sockets=1,cores=2,threads=1,maxcpus=2')
+        self.launch_and_wait()
+
+        self.ssh_command('test -e /sys/devices/system/cpu/cpu0')
+        with self.assertRaises(AssertionError):
+            self.ssh_command('test -e /sys/devices/system/cpu/cpu1')
+
+        self.vm.command('device_add',
+                        driver='Haswell-x86_64-cpu',
+                        socket_id=0,
+                        core_id=1,
+                        thread_id=0)
+        self.ssh_command('test -e /sys/devices/system/cpu/cpu1')
diff --git a/tests/avocado/info_usernet.py b/tests/avocado/info_usernet.py
new file mode 100644
index 00000000000..dc01f74150c
--- /dev/null
+++ b/tests/avocado/info_usernet.py
@@ -0,0 +1,29 @@
+# Test for the hmp command "info usernet"
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado_qemu import QemuSystemTest
+
+from qemu.utils import get_info_usernet_hostfwd_port
+
+
+class InfoUsernet(QemuSystemTest):
+
+    def test_hostfwd(self):
+        self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22')
+        self.vm.launch()
+        res = self.vm.command('human-monitor-command',
+                              command_line='info usernet')
+        port = get_info_usernet_hostfwd_port(res)
+        self.assertIsNotNone(port,
+                             ('"info usernet" output content does not seem to '
+                              'contain the redirected port'))
+        self.assertGreater(port, 0,
+                           ('Found a redirected port that is not greater than'
+                            ' zero'))
diff --git a/tests/avocado/intel_iommu.py b/tests/avocado/intel_iommu.py
new file mode 100644
index 00000000000..474d62f6bf1
--- /dev/null
+++ b/tests/avocado/intel_iommu.py
@@ -0,0 +1,119 @@
+# INTEL_IOMMU Functional tests
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Eric Auger <eric.auger@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+import os
+
+from avocado import skipIf
+from avocado_qemu import LinuxTest
+
+@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+class IntelIOMMU(LinuxTest):
+    """
+    :avocado: tags=arch:x86_64
+    :avocado: tags=distro:fedora
+    :avocado: tags=distro_version:31
+    :avocado: tags=machine:q35
+    :avocado: tags=accel:kvm
+    :avocado: tags=intel_iommu
+    """
+
+    IOMMU_ADDON = ',iommu_platform=on,disable-modern=off,disable-legacy=on'
+    kernel_path = None
+    initrd_path = None
+    kernel_params = None
+
+    def set_up_boot(self):
+        path = self.download_boot()
+        self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
+                         'drive=drv0,id=virtio-disk0,bootindex=1,'
+                         'werror=stop,rerror=stop' + self.IOMMU_ADDON)
+        self.vm.add_args('-device', 'virtio-gpu-pci' + self.IOMMU_ADDON)
+        self.vm.add_args('-drive',
+                         'file=%s,if=none,cache=writethrough,id=drv0' % path)
+
+    def setUp(self):
+        super(IntelIOMMU, self).setUp(None, 'virtio-net-pci' + self.IOMMU_ADDON)
+
+    def add_common_args(self):
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object',
+                         'rng-random,id=rng0,filename=/dev/urandom')
+
+    def common_vm_setup(self, custom_kernel=None):
+        self.require_accelerator("kvm")
+        self.add_common_args()
+        self.vm.add_args("-accel", "kvm")
+
+        if custom_kernel is None:
+            return
+
+        kernel_url = self.distro.pxeboot_url + 'vmlinuz'
+        initrd_url = self.distro.pxeboot_url + 'initrd.img'
+        self.kernel_path = self.fetch_asset(kernel_url)
+        self.initrd_path = self.fetch_asset(initrd_url)
+
+    def run_and_check(self):
+        if self.kernel_path:
+            self.vm.add_args('-kernel', self.kernel_path,
+                             '-append', self.kernel_params,
+                             '-initrd', self.initrd_path)
+        self.launch_and_wait()
+        self.ssh_command('cat /proc/cmdline')
+        self.ssh_command('dmesg | grep -e DMAR -e IOMMU')
+        self.ssh_command('find /sys/kernel/iommu_groups/ -type l')
+        self.ssh_command('dnf -y install numactl-devel')
+
+    def test_intel_iommu(self):
+        """
+        :avocado: tags=intel_iommu_intremap
+        """
+
+        self.common_vm_setup(True)
+        self.vm.add_args('-device', 'intel-iommu,intremap=on')
+        self.vm.add_args('-machine', 'kernel_irqchip=split')
+
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on')
+        self.run_and_check()
+
+    def test_intel_iommu_strict(self):
+        """
+        :avocado: tags=intel_iommu_strict
+        """
+
+        self.common_vm_setup(True)
+        self.vm.add_args('-device', 'intel-iommu,intremap=on')
+        self.vm.add_args('-machine', 'kernel_irqchip=split')
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on,strict')
+        self.run_and_check()
+
+    def test_intel_iommu_strict_cm(self):
+        """
+        :avocado: tags=intel_iommu_strict_cm
+        """
+
+        self.common_vm_setup(True)
+        self.vm.add_args('-device', 'intel-iommu,intremap=on,caching-mode=on')
+        self.vm.add_args('-machine', 'kernel_irqchip=split')
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on,strict')
+        self.run_and_check()
+
+    def test_intel_iommu_pt(self):
+        """
+        :avocado: tags=intel_iommu_pt
+        """
+
+        self.common_vm_setup(True)
+        self.vm.add_args('-device', 'intel-iommu,intremap=on')
+        self.vm.add_args('-machine', 'kernel_irqchip=split')
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on iommu=pt')
+        self.run_and_check()
diff --git a/tests/acceptance/linux_initrd.py b/tests/avocado/linux_initrd.py
similarity index 96%
rename from tests/acceptance/linux_initrd.py
rename to tests/avocado/linux_initrd.py
index a249e2f14a2..ba02e5a563d 100644
--- a/tests/acceptance/linux_initrd.py
+++ b/tests/avocado/linux_initrd.py
@@ -1,4 +1,4 @@
-# Linux initrd acceptance test.
+# Linux initrd integration test.
 #
 # Copyright (c) 2018 Red Hat, Inc.
 #
@@ -12,11 +12,11 @@
 import logging
 import tempfile
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado import skipIf
 
 
-class LinuxInitrd(Test):
+class LinuxInitrd(QemuSystemTest):
     """
     Checks QEMU evaluates correctly the initrd file passed as -initrd option.
 
diff --git a/tests/acceptance/linux_ssh_mips_malta.py b/tests/avocado/linux_ssh_mips_malta.py
similarity index 81%
rename from tests/acceptance/linux_ssh_mips_malta.py
rename to tests/avocado/linux_ssh_mips_malta.py
index 6dbd02d49d5..c0f0be5adee 100644
--- a/tests/acceptance/linux_ssh_mips_malta.py
+++ b/tests/avocado/linux_ssh_mips_malta.py
@@ -12,14 +12,17 @@
 import time
 
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import LinuxSSHMixIn
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import process
 from avocado.utils import archive
 from avocado.utils import ssh
 
 
-class LinuxSSH(Test):
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
+@skipUnless(ssh.SSH_CLIENT_BINARY, 'No SSH client available')
+class LinuxSSH(QemuSystemTest, LinuxSSHMixIn):
 
     timeout = 150 # Not for 'configure --enable-debug --enable-debug-tcg'
 
@@ -65,50 +68,9 @@ def get_kernel_info(self, endianess, wordsize):
         kernel_hash = self.IMAGE_INFO[endianess]['kernel_hash'][wordsize]
         return kernel_url, kernel_hash
 
-    @skipUnless(ssh.SSH_CLIENT_BINARY, 'No SSH client available')
-    @skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
-    def setUp(self):
-        super(LinuxSSH, self).setUp()
-
-    def get_portfwd(self):
-        res = self.vm.command('human-monitor-command',
-                              command_line='info usernet')
-        line = res.split('\r\n')[2]
-        port = re.split(r'.*TCP.HOST_FORWARD.*127\.0\.0\.1 (\d+)\s+10\..*',
-                        line)[1]
-        self.log.debug("sshd listening on port:" + port)
-        return port
-
-    def ssh_connect(self, username, password):
-        self.ssh_logger = logging.getLogger('ssh')
-        port = self.get_portfwd()
-        self.ssh_session = ssh.Session(self.VM_IP, port=int(port),
-                                       user=username, password=password)
-        for i in range(10):
-            try:
-                self.ssh_session.connect()
-                return
-            except:
-                time.sleep(4)
-                pass
-        self.fail("ssh connection timeout")
-
     def ssh_disconnect_vm(self):
         self.ssh_session.quit()
 
-    def ssh_command(self, command, is_root=True):
-        self.ssh_logger.info(command)
-        result = self.ssh_session.cmd(command)
-        stdout_lines = [line.rstrip() for line
-                        in result.stdout_text.splitlines()]
-        for line in stdout_lines:
-            self.ssh_logger.info(line)
-        stderr_lines = [line.rstrip() for line
-                        in result.stderr_text.splitlines()]
-        for line in stderr_lines:
-            self.ssh_logger.warning(line)
-        return stdout_lines, stderr_lines
-
     def boot_debian_wheezy_image_and_ssh_login(self, endianess, kernel_path):
         image_url, image_hash = self.get_image_info(endianess)
         image_path = self.fetch_asset(image_url, asset_hash=image_hash)
@@ -129,7 +91,7 @@ def boot_debian_wheezy_image_and_ssh_login(self, endianess, kernel_path):
         wait_for_console_pattern(self, console_pattern, 'Oops')
         self.log.info('sshd ready')
 
-        self.ssh_connect('root', 'root')
+        self.ssh_connect('root', 'root', False)
 
     def shutdown_via_ssh(self):
         self.ssh_command('poweroff')
diff --git a/tests/avocado/load_bflt.py b/tests/avocado/load_bflt.py
new file mode 100644
index 00000000000..bb50cec1ee8
--- /dev/null
+++ b/tests/avocado/load_bflt.py
@@ -0,0 +1,54 @@
+# Test the bFLT loader format
+#
+# Copyright (C) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+import bz2
+import subprocess
+
+from avocado import skipUnless
+from avocado_qemu import QemuUserTest
+from avocado_qemu import has_cmd
+
+
+class LoadBFLT(QemuUserTest):
+
+    def extract_cpio(self, cpio_path):
+        """
+        Extracts a cpio archive into the test workdir
+
+        :param cpio_path: path to the cpio archive
+        """
+        cwd = os.getcwd()
+        os.chdir(self.workdir)
+        with bz2.open(cpio_path, 'rb') as archive_cpio:
+            subprocess.run(['cpio', '-i'], input=archive_cpio.read(),
+                           stderr=subprocess.DEVNULL)
+        os.chdir(cwd)
+
+    @skipUnless(*has_cmd('cpio'))
+    @skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+    def test_stm32(self):
+        """
+        :avocado: tags=arch:arm
+        :avocado: tags=linux_user
+        :avocado: tags=quick
+        """
+        # See https://elinux.org/STM32#User_Space
+        rootfs_url = ('https://elinux.org/images/5/51/'
+                      'Stm32_mini_rootfs.cpio.bz2')
+        rootfs_hash = '9f065e6ba40cce7411ba757f924f30fcc57951e6'
+        rootfs_path_bz2 = self.fetch_asset(rootfs_url, asset_hash=rootfs_hash)
+        busybox_path = os.path.join(self.workdir, "/bin/busybox")
+
+        self.extract_cpio(rootfs_path_bz2)
+
+        res = self.run(busybox_path)
+        ver = 'BusyBox v1.24.0.git (2015-02-03 22:17:13 CET) multi-call binary.'
+        self.assertIn(ver, res.stdout_text)
+
+        res = self.run(busybox_path, ['uname', '-a'])
+        unm = 'armv7l GNU/Linux'
+        self.assertIn(unm, res.stdout_text)
diff --git a/tests/acceptance/machine_arm_canona1100.py b/tests/avocado/machine_arm_canona1100.py
similarity index 93%
rename from tests/acceptance/machine_arm_canona1100.py
rename to tests/avocado/machine_arm_canona1100.py
index 0e5c43dbcf8..182a0b05134 100644
--- a/tests/acceptance/machine_arm_canona1100.py
+++ b/tests/avocado/machine_arm_canona1100.py
@@ -8,11 +8,11 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import archive
 
-class CanonA1100Machine(Test):
+class CanonA1100Machine(QemuSystemTest):
     """Boots the barebox firmware and checks that the console is operational"""
 
     timeout = 90
diff --git a/tests/acceptance/machine_arm_integratorcp.py b/tests/avocado/machine_arm_integratorcp.py
similarity index 97%
rename from tests/acceptance/machine_arm_integratorcp.py
rename to tests/avocado/machine_arm_integratorcp.py
index 49c8ebff78b..1ffe1073ef8 100644
--- a/tests/acceptance/machine_arm_integratorcp.py
+++ b/tests/avocado/machine_arm_integratorcp.py
@@ -12,7 +12,7 @@
 import logging
 
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 
 
@@ -29,7 +29,7 @@
     CV2_AVAILABLE = False
 
 
-class IntegratorMachine(Test):
+class IntegratorMachine(QemuSystemTest):
 
     timeout = 90
 
diff --git a/tests/acceptance/machine_arm_n8x0.py b/tests/avocado/machine_arm_n8x0.py
similarity index 95%
rename from tests/acceptance/machine_arm_n8x0.py
rename to tests/avocado/machine_arm_n8x0.py
index e5741f2d8d1..12e9a6803ba 100644
--- a/tests/acceptance/machine_arm_n8x0.py
+++ b/tests/avocado/machine_arm_n8x0.py
@@ -11,10 +11,10 @@
 import os
 
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 
-class N8x0Machine(Test):
+class N8x0Machine(QemuSystemTest):
     """Boots the Linux kernel and checks that the console is operational"""
 
     timeout = 90
diff --git a/tests/acceptance/machine_avr6.py b/tests/avocado/machine_avr6.py
similarity index 94%
rename from tests/acceptance/machine_avr6.py
rename to tests/avocado/machine_avr6.py
index 6baf4e9c7f3..5485db79c68 100644
--- a/tests/acceptance/machine_avr6.py
+++ b/tests/avocado/machine_avr6.py
@@ -1,5 +1,5 @@
 #
-# QEMU AVR acceptance tests
+# QEMU AVR integration tests
 #
 # Copyright (c) 2019-2020 Michael Rolnik <mrolnik@gmail.com>
 #
@@ -19,9 +19,9 @@
 
 import time
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
-class AVR6Machine(Test):
+class AVR6Machine(QemuSystemTest):
     timeout = 5
 
     def test_freertos(self):
diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/avocado/machine_m68k_nextcube.py
similarity index 97%
rename from tests/acceptance/machine_m68k_nextcube.py
rename to tests/avocado/machine_m68k_nextcube.py
index 09e2745cc52..6790e7d9cd1 100644
--- a/tests/acceptance/machine_m68k_nextcube.py
+++ b/tests/avocado/machine_m68k_nextcube.py
@@ -8,7 +8,7 @@
 import os
 import time
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado import skipUnless
 
 from tesseract_utils import tesseract_available, tesseract_ocr
@@ -20,7 +20,7 @@
     PIL_AVAILABLE = False
 
 
-class NextCubeMachine(Test):
+class NextCubeMachine(QemuSystemTest):
     """
     :avocado: tags=arch:m68k
     :avocado: tags=machine:next-cube
diff --git a/tests/acceptance/machine_microblaze.py b/tests/avocado/machine_microblaze.py
similarity index 94%
rename from tests/acceptance/machine_microblaze.py
rename to tests/avocado/machine_microblaze.py
index 7f6d18495d8..4928920f960 100644
--- a/tests/acceptance/machine_microblaze.py
+++ b/tests/avocado/machine_microblaze.py
@@ -5,11 +5,11 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later. See the COPYING file in the top-level directory.
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import archive
 
-class MicroblazeMachine(Test):
+class MicroblazeMachine(QemuSystemTest):
 
     timeout = 90
 
diff --git a/tests/avocado/machine_mips_fuloong2e.py b/tests/avocado/machine_mips_fuloong2e.py
new file mode 100644
index 00000000000..89291f47b24
--- /dev/null
+++ b/tests/avocado/machine_mips_fuloong2e.py
@@ -0,0 +1,42 @@
+# Functional tests for the Lemote Fuloong-2E machine.
+#
+# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import os
+
+from avocado import skipUnless
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+
+class MipsFuloong2e(QemuSystemTest):
+
+    timeout = 60
+
+    @skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
+    @skipUnless(os.getenv('RESCUE_YL_PATH'), 'RESCUE_YL_PATH not available')
+    def test_linux_kernel_isa_serial(self):
+        """
+        :avocado: tags=arch:mips64el
+        :avocado: tags=machine:fuloong2e
+        :avocado: tags=endian:little
+        :avocado: tags=device:bonito64
+        :avocado: tags=device:via686b
+        """
+        # Recovery system for the Yeeloong laptop
+        # (enough to test the fuloong2e southbridge, accessing its ISA bus)
+        # http://dev.lemote.com/files/resource/download/rescue/rescue-yl
+        kernel_hash = 'ec4d1bd89a8439c41033ca63db60160cc6d6f09a'
+        kernel_path = self.fetch_asset('file://' + os.getenv('RESCUE_YL_PATH'),
+                                       asset_hash=kernel_hash)
+
+        self.vm.set_console()
+        self.vm.add_args('-kernel', kernel_path)
+        self.vm.launch()
+        wait_for_console_pattern(self, 'Linux version 2.6.27.7lemote')
+        cpu_revision = 'CPU revision is: 00006302 (ICT Loongson-2)'
+        wait_for_console_pattern(self, cpu_revision)
diff --git a/tests/acceptance/machine_mips_loongson3v.py b/tests/avocado/machine_mips_loongson3v.py
similarity index 94%
rename from tests/acceptance/machine_mips_loongson3v.py
rename to tests/avocado/machine_mips_loongson3v.py
index 85b131a40f0..5194cf18c9c 100644
--- a/tests/acceptance/machine_mips_loongson3v.py
+++ b/tests/avocado/machine_mips_loongson3v.py
@@ -11,10 +11,10 @@
 import time
 
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 
-class MipsLoongson3v(Test):
+class MipsLoongson3v(QemuSystemTest):
     timeout = 60
 
     @skipUnless(os.getenv('AVOCADO_ALLOW_UNTRUSTED_CODE'), 'untrusted code')
diff --git a/tests/acceptance/machine_mips_malta.py b/tests/avocado/machine_mips_malta.py
similarity index 95%
rename from tests/acceptance/machine_mips_malta.py
rename to tests/avocado/machine_mips_malta.py
index 7c9a4ee4d2d..f1895d59f35 100644
--- a/tests/acceptance/machine_mips_malta.py
+++ b/tests/avocado/machine_mips_malta.py
@@ -12,7 +12,7 @@
 import logging
 
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import archive
 from avocado import skipIf
@@ -33,7 +33,7 @@
 
 @skipUnless(NUMPY_AVAILABLE, 'Python NumPy not installed')
 @skipUnless(CV2_AVAILABLE, 'Python OpenCV not installed')
-class MaltaMachineFramebuffer(Test):
+class MaltaMachineFramebuffer(QemuSystemTest):
 
     timeout = 30
 
@@ -62,7 +62,6 @@ def do_test_i6400_framebuffer_logo(self, cpu_cores_count):
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
                                'clocksource=GIC console=tty0 console=ttyS0')
         self.vm.add_args('-kernel', kernel_path,
-                         '-cpu', 'I6400',
                          '-smp', '%u' % cpu_cores_count,
                          '-vga', 'std',
                          '-append', kernel_command_line)
@@ -96,7 +95,7 @@ def test_mips_malta_i6400_framebuffer_logo_1core(self):
         """
         :avocado: tags=arch:mips64el
         :avocado: tags=machine:malta
-        :avocado: tags=cpu:i6400
+        :avocado: tags=cpu:I6400
         """
         self.do_test_i6400_framebuffer_logo(1)
 
@@ -105,7 +104,7 @@ def test_mips_malta_i6400_framebuffer_logo_7cores(self):
         """
         :avocado: tags=arch:mips64el
         :avocado: tags=machine:malta
-        :avocado: tags=cpu:i6400
+        :avocado: tags=cpu:I6400
         :avocado: tags=mips:smp
         """
         self.do_test_i6400_framebuffer_logo(7)
@@ -115,7 +114,7 @@ def test_mips_malta_i6400_framebuffer_logo_8cores(self):
         """
         :avocado: tags=arch:mips64el
         :avocado: tags=machine:malta
-        :avocado: tags=cpu:i6400
+        :avocado: tags=cpu:I6400
         :avocado: tags=mips:smp
         """
         self.do_test_i6400_framebuffer_logo(8)
diff --git a/tests/acceptance/machine_rx_gdbsim.py b/tests/avocado/machine_rx_gdbsim.py
similarity index 97%
rename from tests/acceptance/machine_rx_gdbsim.py
rename to tests/avocado/machine_rx_gdbsim.py
index 32b737b6d85..6cd8704b016 100644
--- a/tests/acceptance/machine_rx_gdbsim.py
+++ b/tests/avocado/machine_rx_gdbsim.py
@@ -11,13 +11,13 @@
 import os
 
 from avocado import skipIf
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import exec_command_and_wait_for_pattern
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import archive
 
 
-class RxGdbSimMachine(Test):
+class RxGdbSimMachine(QemuSystemTest):
 
     timeout = 30
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
diff --git a/tests/acceptance/machine_s390_ccw_virtio.py b/tests/avocado/machine_s390_ccw_virtio.py
similarity index 99%
rename from tests/acceptance/machine_s390_ccw_virtio.py
rename to tests/avocado/machine_s390_ccw_virtio.py
index 4028c99afce..bd03d7160b4 100644
--- a/tests/acceptance/machine_s390_ccw_virtio.py
+++ b/tests/avocado/machine_s390_ccw_virtio.py
@@ -13,12 +13,12 @@
 import tempfile
 
 from avocado import skipIf
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import exec_command_and_wait_for_pattern
 from avocado_qemu import wait_for_console_pattern
 from avocado.utils import archive
 
-class S390CCWVirtioMachine(Test):
+class S390CCWVirtioMachine(QemuSystemTest):
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
 
     timeout = 120
diff --git a/tests/acceptance/machine_sparc64_sun4u.py b/tests/avocado/machine_sparc64_sun4u.py
similarity index 100%
rename from tests/acceptance/machine_sparc64_sun4u.py
rename to tests/avocado/machine_sparc64_sun4u.py
diff --git a/tests/acceptance/machine_sparc_leon3.py b/tests/avocado/machine_sparc_leon3.py
similarity index 94%
rename from tests/acceptance/machine_sparc_leon3.py
rename to tests/avocado/machine_sparc_leon3.py
index 2405cd7a0d7..e61b223185a 100644
--- a/tests/acceptance/machine_sparc_leon3.py
+++ b/tests/avocado/machine_sparc_leon3.py
@@ -5,12 +5,12 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later. See the COPYING file in the top-level directory.
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado import skip
 
 
-class Leon3Machine(Test):
+class Leon3Machine(QemuSystemTest):
 
     timeout = 60
 
diff --git a/tests/acceptance/migration.py b/tests/avocado/migration.py
similarity index 97%
rename from tests/acceptance/migration.py
rename to tests/avocado/migration.py
index 792639cb693..584d6ef53f5 100644
--- a/tests/acceptance/migration.py
+++ b/tests/avocado/migration.py
@@ -11,7 +11,7 @@
 
 
 import tempfile
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado import skipUnless
 
 from avocado.utils import network
@@ -19,7 +19,7 @@
 from avocado.utils.path import find_command
 
 
-class Migration(Test):
+class Migration(QemuSystemTest):
     """
     :avocado: tags=migration
     """
diff --git a/tests/acceptance/multiprocess.py b/tests/avocado/multiprocess.py
similarity index 98%
rename from tests/acceptance/multiprocess.py
rename to tests/avocado/multiprocess.py
index 96627f022a8..80a3b8f442b 100644
--- a/tests/acceptance/multiprocess.py
+++ b/tests/avocado/multiprocess.py
@@ -7,12 +7,12 @@
 import os
 import socket
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado_qemu import exec_command
 from avocado_qemu import exec_command_and_wait_for_pattern
 
-class Multiprocess(Test):
+class Multiprocess(QemuSystemTest):
     """
     :avocado: tags=multiprocess
     """
diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/avocado/pc_cpu_hotplug_props.py
similarity index 91%
rename from tests/acceptance/pc_cpu_hotplug_props.py
rename to tests/avocado/pc_cpu_hotplug_props.py
index f48f68fc6b1..52b878188ed 100644
--- a/tests/acceptance/pc_cpu_hotplug_props.py
+++ b/tests/avocado/pc_cpu_hotplug_props.py
@@ -20,16 +20,16 @@
 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
 #
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
-class OmittedCPUProps(Test):
+class OmittedCPUProps(QemuSystemTest):
     """
     :avocado: tags=arch:x86_64
+    :avocado: tags=cpu:qemu64
     """
     def test_no_die_id(self):
         self.vm.add_args('-nodefaults', '-S')
         self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8')
-        self.vm.add_args('-cpu', 'qemu64')
         self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0')
         self.vm.launch()
         self.assertEquals(len(self.vm.command('query-cpus-fast')), 2)
diff --git a/tests/avocado/ppc_405.py b/tests/avocado/ppc_405.py
new file mode 100644
index 00000000000..a47f89b9346
--- /dev/null
+++ b/tests/avocado/ppc_405.py
@@ -0,0 +1,42 @@
+# Test that the U-Boot firmware boots on ppc 405 machines and check the console
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+from avocado_qemu import exec_command_and_wait_for_pattern
+
+class Ppc405Machine(QemuSystemTest):
+
+    timeout = 90
+
+    def do_test_ppc405(self):
+        uboot_url = ('https://gitlab.com/huth/u-boot/-/raw/'
+                     'taihu-2021-10-09/u-boot-taihu.bin')
+        uboot_hash = ('3208940e908a5edc7c03eab072c60f0dcfadc2ab');
+        file_path = self.fetch_asset(uboot_url, asset_hash=uboot_hash)
+        self.vm.set_console(console_index=1)
+        self.vm.add_args('-bios', file_path)
+        self.vm.launch()
+        wait_for_console_pattern(self, 'AMCC PPC405EP Evaluation Board')
+        exec_command_and_wait_for_pattern(self, 'reset', 'AMCC PowerPC 405EP')
+
+    def test_ppc_taihu(self):
+        """
+        :avocado: tags=arch:ppc
+        :avocado: tags=machine:taihu
+        :avocado: tags=cpu:405ep
+        """
+        self.do_test_ppc405()
+
+    def test_ppc_ref405ep(self):
+        """
+        :avocado: tags=arch:ppc
+        :avocado: tags=machine:ref405ep
+        :avocado: tags=cpu:405ep
+        """
+        self.do_test_ppc405()
diff --git a/tests/avocado/ppc_bamboo.py b/tests/avocado/ppc_bamboo.py
new file mode 100644
index 00000000000..40629e34789
--- /dev/null
+++ b/tests/avocado/ppc_bamboo.py
@@ -0,0 +1,39 @@
+# Test that Linux kernel boots on the ppc bamboo board and check the console
+#
+# Copyright (c) 2021 Red Hat
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+from avocado_qemu import exec_command_and_wait_for_pattern
+
+class BambooMachine(QemuSystemTest):
+
+    timeout = 90
+
+    def test_ppc_bamboo(self):
+        """
+        :avocado: tags=arch:ppc
+        :avocado: tags=machine:bamboo
+        :avocado: tags=cpu:440epb
+        :avocado: tags=device:rtl8139
+        """
+        tar_url = ('http://landley.net/aboriginal/downloads/binaries/'
+                   'system-image-powerpc-440fp.tar.gz')
+        tar_hash = '53e5f16414b195b82d2c70272f81c2eedb39bad9'
+        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+        archive.extract(file_path, self.workdir)
+        self.vm.set_console()
+        self.vm.add_args('-kernel', self.workdir +
+                                   '/system-image-powerpc-440fp/linux',
+                         '-initrd', self.workdir +
+                                   '/system-image-powerpc-440fp/rootfs.cpio.gz',
+                         '-nic', 'user,model=rtl8139,restrict=on')
+        self.vm.launch()
+        wait_for_console_pattern(self, 'Type exit when done')
+        exec_command_and_wait_for_pattern(self, 'ping 10.0.2.2',
+                                          '10.0.2.2 is alive!')
+        exec_command_and_wait_for_pattern(self, 'halt', 'System Halted')
diff --git a/tests/avocado/ppc_mpc8544ds.py b/tests/avocado/ppc_mpc8544ds.py
new file mode 100644
index 00000000000..886f967b15a
--- /dev/null
+++ b/tests/avocado/ppc_mpc8544ds.py
@@ -0,0 +1,32 @@
+# Test that Linux kernel boots on ppc machines and check the console
+#
+# Copyright (c) 2018, 2020 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+
+class Mpc8544dsMachine(QemuSystemTest):
+
+    timeout = 90
+    KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
+    panic_message = 'Kernel panic - not syncing'
+
+    def test_ppc_mpc8544ds(self):
+        """
+        :avocado: tags=arch:ppc
+        :avocado: tags=machine:mpc8544ds
+        """
+        tar_url = ('https://www.qemu-advent-calendar.org'
+                   '/2020/download/day17.tar.gz')
+        tar_hash = '7a5239542a7c4257aa4d3b7f6ddf08fb6775c494'
+        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+        archive.extract(file_path, self.workdir)
+        self.vm.set_console()
+        self.vm.add_args('-kernel', self.workdir + '/creek/creek.bin')
+        self.vm.launch()
+        wait_for_console_pattern(self, 'QEMU advent calendar 2020',
+                                 self.panic_message)
diff --git a/tests/acceptance/ppc_prep_40p.py b/tests/avocado/ppc_prep_40p.py
similarity index 90%
rename from tests/acceptance/ppc_prep_40p.py
rename to tests/avocado/ppc_prep_40p.py
index 96ba13b8943..4bd956584d6 100644
--- a/tests/acceptance/ppc_prep_40p.py
+++ b/tests/avocado/ppc_prep_40p.py
@@ -7,13 +7,12 @@
 
 import os
 
-from avocado import skipIf
 from avocado import skipUnless
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 
 
-class IbmPrep40pMachine(Test):
+class IbmPrep40pMachine(QemuSystemTest):
 
     timeout = 60
 
@@ -27,6 +26,7 @@ def test_factory_firmware_and_netbsd(self):
         """
         :avocado: tags=arch:ppc
         :avocado: tags=machine:40p
+        :avocado: tags=os:netbsd
         :avocado: tags=slowness:high
         """
         bios_url = ('http://ftpmirror.your.org/pub/misc/'
@@ -64,9 +64,10 @@ def test_openbios_and_netbsd(self):
         """
         :avocado: tags=arch:ppc
         :avocado: tags=machine:40p
+        :avocado: tags=os:netbsd
         """
-        drive_url = ('https://cdn.netbsd.org/pub/NetBSD/iso/7.1.2/'
-                     'NetBSD-7.1.2-prep.iso')
+        drive_url = ('https://archive.netbsd.org/pub/NetBSD-archive/'
+                     'NetBSD-7.1.2/iso/NetBSD-7.1.2-prep.iso')
         drive_hash = 'ac6fa2707d888b36d6fa64de6e7fe48e'
         drive_path = self.fetch_asset(drive_url, asset_hash=drive_hash,
                                       algorithm='md5')
diff --git a/tests/avocado/ppc_pseries.py b/tests/avocado/ppc_pseries.py
new file mode 100644
index 00000000000..d8b04dc3ead
--- /dev/null
+++ b/tests/avocado/ppc_pseries.py
@@ -0,0 +1,35 @@
+# Test that Linux kernel boots on ppc machines and check the console
+#
+# Copyright (c) 2018, 2020 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+
+class pseriesMachine(QemuSystemTest):
+
+    timeout = 90
+    KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
+    panic_message = 'Kernel panic - not syncing'
+
+    def test_ppc64_pseries(self):
+        """
+        :avocado: tags=arch:ppc64
+        :avocado: tags=machine:pseries
+        """
+        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
+                      '/fedora-secondary/releases/29/Everything/ppc64le/os'
+                      '/ppc/ppc64/vmlinuz')
+        kernel_hash = '3fe04abfc852b66653b8c3c897a59a689270bc77'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        self.vm.set_console()
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
+        self.vm.add_args('-kernel', kernel_path,
+                         '-append', kernel_command_line)
+        self.vm.launch()
+        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        wait_for_console_pattern(self, console_pattern, self.panic_message)
diff --git a/tests/avocado/ppc_virtex_ml507.py b/tests/avocado/ppc_virtex_ml507.py
new file mode 100644
index 00000000000..a6912ee5793
--- /dev/null
+++ b/tests/avocado/ppc_virtex_ml507.py
@@ -0,0 +1,34 @@
+# Test that Linux kernel boots on ppc machines and check the console
+#
+# Copyright (c) 2018, 2020 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import QemuSystemTest
+from avocado_qemu import wait_for_console_pattern
+
+class VirtexMl507Machine(QemuSystemTest):
+
+    timeout = 90
+    KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 '
+    panic_message = 'Kernel panic - not syncing'
+
+    def test_ppc_virtex_ml507(self):
+        """
+        :avocado: tags=arch:ppc
+        :avocado: tags=machine:virtex-ml507
+        """
+        tar_url = ('https://www.qemu-advent-calendar.org'
+                   '/2020/download/hippo.tar.gz')
+        tar_hash = '306b95bfe7d147f125aa176a877e266db8ef914a'
+        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+        archive.extract(file_path, self.workdir)
+        self.vm.set_console()
+        self.vm.add_args('-kernel', self.workdir + '/hippo/hippo.linux',
+                         '-dtb', self.workdir + '/hippo/virtex440-ml507.dtb',
+                         '-m', '512')
+        self.vm.launch()
+        wait_for_console_pattern(self, 'QEMU advent calendar 2020',
+                                 self.panic_message)
diff --git a/tests/acceptance/replay_kernel.py b/tests/avocado/replay_kernel.py
similarity index 88%
rename from tests/acceptance/replay_kernel.py
rename to tests/avocado/replay_kernel.py
index 71facdaa752..c68a9537301 100644
--- a/tests/acceptance/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -156,8 +156,7 @@ def test_aarch64_virt(self):
                                'console=ttyAMA0')
         console_pattern = 'VFS: Cannot open root device'
 
-        self.run_rr(kernel_path, kernel_command_line, console_pattern,
-                    args=('-cpu', 'cortex-a53'))
+        self.run_rr(kernel_path, kernel_command_line, console_pattern)
 
     def test_arm_virt(self):
         """
@@ -208,6 +207,38 @@ def test_arm_cubieboard_initrd(self):
                           '-initrd', initrd_path,
                           '-no-reboot'))
 
+    def test_s390x_s390_ccw_virtio(self):
+        """
+        :avocado: tags=arch:s390x
+        :avocado: tags=machine:s390-ccw-virtio
+        """
+        kernel_url = ('https://archives.fedoraproject.org/pub/archive'
+                      '/fedora-secondary/releases/29/Everything/s390x/os/images'
+                      '/kernel.img')
+        kernel_hash = 'e8e8439103ef8053418ef062644ffd46a7919313'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=sclp0'
+        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=9)
+
+    def test_alpha_clipper(self):
+        """
+        :avocado: tags=arch:alpha
+        :avocado: tags=machine:clipper
+        """
+        kernel_url = ('http://archive.debian.org/debian/dists/lenny/main/'
+                      'installer-alpha/20090123lenny10/images/cdrom/vmlinuz')
+        kernel_hash = '3a943149335529e2ed3e74d0d787b85fb5671ba3'
+        kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+        uncompressed_kernel = archive.uncompress(kernel_path, self.workdir)
+
+        kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
+        console_pattern = 'Kernel command line: %s' % kernel_command_line
+        self.run_rr(uncompressed_kernel, kernel_command_line, console_pattern, shift=9,
+            args=('-nodefaults', ))
+
     def test_ppc64_pseries(self):
         """
         :avocado: tags=arch:ppc64
@@ -301,7 +332,29 @@ def test_ppc64_e500(self):
         tar_url = ('https://www.qemu-advent-calendar.org'
                    '/2018/download/day19.tar.xz')
         file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'uImage', ('-cpu', 'e5500'))
+        self.do_test_advcal_2018(file_path, 'uImage')
+
+    def test_or1k_sim(self):
+        """
+        :avocado: tags=arch:or1k
+        :avocado: tags=machine:or1k-sim
+        """
+        tar_hash = '20334cdaf386108c530ff0badaecc955693027dd'
+        tar_url = ('https://www.qemu-advent-calendar.org'
+                   '/2018/download/day20.tar.xz')
+        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+        self.do_test_advcal_2018(file_path, 'vmlinux')
+
+    def test_nios2_10m50(self):
+        """
+        :avocado: tags=arch:nios2
+        :avocado: tags=machine:10m50-ghrd
+        """
+        tar_hash = 'e4251141726c412ac0407c5a6bceefbbff018918'
+        tar_url = ('https://www.qemu-advent-calendar.org'
+                   '/2018/download/day14.tar.xz')
+        file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
+        self.do_test_advcal_2018(file_path, 'vmlinux.elf')
 
     def test_ppc_g3beige(self):
         """
@@ -348,8 +401,7 @@ def test_xtensa_lx60(self):
         tar_url = ('https://www.qemu-advent-calendar.org'
                    '/2018/download/day02.tar.xz')
         file_path = self.fetch_asset(tar_url, asset_hash=tar_hash)
-        self.do_test_advcal_2018(file_path, 'santas-sleigh-ride.elf',
-                                 args=('-cpu', 'dc233c'))
+        self.do_test_advcal_2018(file_path, 'santas-sleigh-ride.elf')
 
 @skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
 class ReplayKernelSlow(ReplayKernelBase):
@@ -394,6 +446,7 @@ def test_mips64el_malta_5KEc_cpio(self):
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
         :avocado: tags=slowness:high
+        :avocado: tags=cpu:5KEc
         """
         kernel_url = ('https://github.com/philmd/qemu-testing-blob/'
                       'raw/9ad2df38/mips/malta/mips64el/'
@@ -414,7 +467,7 @@ def test_mips64el_malta_5KEc_cpio(self):
                                'rdinit=/sbin/init noreboot')
         console_pattern = 'Boot successful.'
         self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5,
-                    args=('-initrd', initrd_path, '-cpu', '5KEc'))
+                    args=('-initrd', initrd_path))
 
     def do_test_mips_malta32el_nanomips(self, kernel_path_xz):
         kernel_path = self.workdir + "kernel"
@@ -426,14 +479,14 @@ def do_test_mips_malta32el_nanomips(self, kernel_path_xz):
                                'mem=256m@@0x0 '
                                'console=ttyS0')
         console_pattern = 'Kernel command line: %s' % kernel_command_line
-        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5,
-                    args=('-cpu', 'I7200'))
+        self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
     def test_mips_malta32el_nanomips_4k(self):
         """
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
@@ -447,6 +500,7 @@ def test_mips_malta32el_nanomips_16k_up(self):
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
@@ -460,6 +514,7 @@ def test_mips_malta32el_nanomips_64k_dbg(self):
         :avocado: tags=arch:mipsel
         :avocado: tags=machine:malta
         :avocado: tags=endian:little
+        :avocado: tags=cpu:I7200
         """
         kernel_url = ('https://mipsdistros.mips.com/LinuxDistro/nanomips/'
                       'kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/'
diff --git a/tests/avocado/replay_linux.py b/tests/avocado/replay_linux.py
new file mode 100644
index 00000000000..15953f9e496
--- /dev/null
+++ b/tests/avocado/replay_linux.py
@@ -0,0 +1,116 @@
+# Record/replay test that boots a complete Linux system via a cloud image
+#
+# Copyright (c) 2020 ISP RAS
+#
+# Author:
+#  Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import os
+import logging
+import time
+
+from avocado import skipUnless
+from avocado.utils import cloudinit
+from avocado.utils import network
+from avocado.utils import vmimage
+from avocado.utils import datadrainer
+from avocado.utils.path import find_command
+from avocado_qemu import LinuxTest
+
+class ReplayLinux(LinuxTest):
+    """
+    Boots a Linux system, checking for a successful initialization
+    """
+
+    timeout = 1800
+    chksum = None
+    hdd = 'ide-hd'
+    cd = 'ide-cd'
+    bus = 'ide'
+
+    def setUp(self):
+        super(ReplayLinux, self).setUp()
+        self.boot_path = self.download_boot()
+        self.cloudinit_path = self.prepare_cloudinit()
+
+    def vm_add_disk(self, vm, path, id, device):
+        bus_string = ''
+        if self.bus:
+            bus_string = ',bus=%s.%d' % (self.bus, id,)
+        vm.add_args('-drive', 'file=%s,snapshot,id=disk%s,if=none' % (path, id))
+        vm.add_args('-drive',
+            'driver=blkreplay,id=disk%s-rr,if=none,image=disk%s' % (id, id))
+        vm.add_args('-device',
+            '%s,drive=disk%s-rr%s' % (device, id, bus_string))
+
+    def launch_and_wait(self, record, args, shift):
+        vm = self.get_vm()
+        vm.add_args('-smp', '1')
+        vm.add_args('-m', '1024')
+        vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0')
+        if args:
+            vm.add_args(*args)
+        self.vm_add_disk(vm, self.boot_path, 0, self.hdd)
+        self.vm_add_disk(vm, self.cloudinit_path, 1, self.cd)
+        logger = logging.getLogger('replay')
+        if record:
+            logger.info('recording the execution...')
+            mode = 'record'
+        else:
+            logger.info('replaying the execution...')
+            mode = 'replay'
+        replay_path = os.path.join(self.workdir, 'replay.bin')
+        vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s' %
+                    (shift, mode, replay_path))
+
+        start_time = time.time()
+
+        vm.set_console()
+        vm.launch()
+        console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
+                                    logger=self.log.getChild('console'),
+                                    stop_check=(lambda : not vm.is_running()))
+        console_drainer.start()
+        if record:
+            cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port),
+                                          self.name)
+            vm.shutdown()
+            logger.info('finished the recording with log size %s bytes'
+                % os.path.getsize(replay_path))
+        else:
+            vm.event_wait('SHUTDOWN', self.timeout)
+            vm.shutdown(True)
+            logger.info('successfully fihished the replay')
+        elapsed = time.time() - start_time
+        logger.info('elapsed time %.2f sec' % elapsed)
+        return elapsed
+
+    def run_rr(self, args=None, shift=7):
+        t1 = self.launch_and_wait(True, args, shift)
+        t2 = self.launch_and_wait(False, args, shift)
+        logger = logging.getLogger('replay')
+        logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
+
+@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
+class ReplayLinuxX8664(ReplayLinux):
+    """
+    :avocado: tags=arch:x86_64
+    :avocado: tags=accel:tcg
+    """
+
+    chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0'
+
+    def test_pc_i440fx(self):
+        """
+        :avocado: tags=machine:pc
+        """
+        self.run_rr(shift=1)
+
+    def test_pc_q35(self):
+        """
+        :avocado: tags=machine:q35
+        """
+        self.run_rr(shift=3)
diff --git a/tests/acceptance/reverse_debugging.py b/tests/avocado/reverse_debugging.py
similarity index 99%
rename from tests/acceptance/reverse_debugging.py
rename to tests/avocado/reverse_debugging.py
index be01aca2176..d2921e70c3b 100644
--- a/tests/acceptance/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -207,4 +207,4 @@ def test_aarch64_virt(self):
         kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
 
         self.reverse_debugging(
-            args=('-kernel', kernel_path, '-cpu', 'cortex-a53'))
+            args=('-kernel', kernel_path))
diff --git a/tests/avocado/smmu.py b/tests/avocado/smmu.py
new file mode 100644
index 00000000000..b3c4de6bf4a
--- /dev/null
+++ b/tests/avocado/smmu.py
@@ -0,0 +1,137 @@
+# SMMUv3 Functional tests
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Eric Auger <eric.auger@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+import os
+
+from avocado import skipIf
+from avocado_qemu import LinuxTest, BUILD_DIR
+
+@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+class SMMU(LinuxTest):
+    """
+    :avocado: tags=accel:kvm
+    :avocado: tags=cpu:host
+    :avocado: tags=arch:aarch64
+    :avocado: tags=machine:virt
+    :avocado: tags=distro:fedora
+    :avocado: tags=smmu
+    """
+
+    IOMMU_ADDON = ',iommu_platform=on,disable-modern=off,disable-legacy=on'
+    kernel_path = None
+    initrd_path = None
+    kernel_params = None
+
+    def set_up_boot(self):
+        path = self.download_boot()
+        self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
+                         'drive=drv0,id=virtio-disk0,bootindex=1,'
+                         'werror=stop,rerror=stop' + self.IOMMU_ADDON)
+        self.vm.add_args('-drive',
+                         'file=%s,if=none,cache=writethrough,id=drv0' % path)
+
+    def setUp(self):
+        super(SMMU, self).setUp(None, 'virtio-net-pci' + self.IOMMU_ADDON)
+
+    def common_vm_setup(self, custom_kernel=False):
+        self.require_accelerator("kvm")
+        self.vm.add_args("-accel", "kvm")
+        self.vm.add_args("-cpu", "host")
+        self.vm.add_args("-machine", "iommu=smmuv3")
+        self.vm.add_args("-d", "guest_errors")
+        self.vm.add_args('-bios', os.path.join(BUILD_DIR, 'pc-bios',
+                         'edk2-aarch64-code.fd'))
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object',
+                         'rng-random,id=rng0,filename=/dev/urandom')
+
+        if custom_kernel is False:
+            return
+
+        kernel_url = self.distro.pxeboot_url + 'vmlinuz'
+        initrd_url = self.distro.pxeboot_url + 'initrd.img'
+        self.kernel_path = self.fetch_asset(kernel_url)
+        self.initrd_path = self.fetch_asset(initrd_url)
+
+    def run_and_check(self):
+        if self.kernel_path:
+            self.vm.add_args('-kernel', self.kernel_path,
+                             '-append', self.kernel_params,
+                             '-initrd', self.initrd_path)
+        self.launch_and_wait()
+        self.ssh_command('cat /proc/cmdline')
+        self.ssh_command('dnf -y install numactl-devel')
+
+
+    # 5.3 kernel without RIL #
+
+    def test_smmu_noril(self):
+        """
+        :avocado: tags=smmu_noril
+        :avocado: tags=smmu_noril_tests
+        :avocado: tags=distro_version:31
+        """
+        self.common_vm_setup()
+        self.run_and_check()
+
+    def test_smmu_noril_passthrough(self):
+        """
+        :avocado: tags=smmu_noril_passthrough
+        :avocado: tags=smmu_noril_tests
+        :avocado: tags=distro_version:31
+        """
+        self.common_vm_setup(True)
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' iommu.passthrough=on')
+        self.run_and_check()
+
+    def test_smmu_noril_nostrict(self):
+        """
+        :avocado: tags=smmu_noril_nostrict
+        :avocado: tags=smmu_noril_tests
+        :avocado: tags=distro_version:31
+        """
+        self.common_vm_setup(True)
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' iommu.strict=0')
+        self.run_and_check()
+
+    # 5.8 kernel featuring range invalidation
+    # >= v5.7 kernel
+
+    def test_smmu_ril(self):
+        """
+        :avocado: tags=smmu_ril
+        :avocado: tags=smmu_ril_tests
+        :avocado: tags=distro_version:33
+        """
+        self.common_vm_setup()
+        self.run_and_check()
+
+    def test_smmu_ril_passthrough(self):
+        """
+        :avocado: tags=smmu_ril_passthrough
+        :avocado: tags=smmu_ril_tests
+        :avocado: tags=distro_version:33
+        """
+        self.common_vm_setup(True)
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' iommu.passthrough=on')
+        self.run_and_check()
+
+    def test_smmu_ril_nostrict(self):
+        """
+        :avocado: tags=smmu_ril_nostrict
+        :avocado: tags=smmu_ril_tests
+        :avocado: tags=distro_version:33
+        """
+        self.common_vm_setup(True)
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' iommu.strict=0')
+        self.run_and_check()
diff --git a/tests/acceptance/tcg_plugins.py b/tests/avocado/tcg_plugins.py
similarity index 91%
rename from tests/acceptance/tcg_plugins.py
rename to tests/avocado/tcg_plugins.py
index c21bf9e52a6..642d2e49e30 100644
--- a/tests/acceptance/tcg_plugins.py
+++ b/tests/avocado/tcg_plugins.py
@@ -25,7 +25,7 @@ class PluginKernelBase(LinuxKernelTest):
     KERNEL_COMMON_COMMAND_LINE = 'printk.time=1 panic=-1 '
 
     def run_vm(self, kernel_path, kernel_command_line,
-               plugin, plugin_log, console_pattern, args):
+               plugin, plugin_log, console_pattern, args=None):
 
         vm = self.get_vm()
         vm.set_console()
@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self):
         :avocado: tags=accel:tcg
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:virt
-        :avocado: tags=cpu:cortex-a57
+        :avocado: tags=cpu:cortex-a53
         """
         kernel_path = self._grab_aarch64_kernel()
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -80,8 +80,7 @@ def test_aarch64_virt_insn(self):
 
         self.run_vm(kernel_path, kernel_command_line,
                     "tests/plugin/libinsn.so", plugin_log.name,
-                    console_pattern,
-                    args=('-cpu', 'cortex-a53'))
+                    console_pattern)
 
         with plugin_log as lf, \
              mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
@@ -95,7 +94,7 @@ def test_aarch64_virt_insn_icount(self):
         :avocado: tags=accel:tcg
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:virt
-        :avocado: tags=cpu:cortex-a57
+        :avocado: tags=cpu:cortex-a53
         """
         kernel_path = self._grab_aarch64_kernel()
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -108,7 +107,7 @@ def test_aarch64_virt_insn_icount(self):
         self.run_vm(kernel_path, kernel_command_line,
                     "tests/plugin/libinsn.so", plugin_log.name,
                     console_pattern,
-                    args=('-cpu', 'cortex-a53', '-icount', 'shift=1'))
+                    args=('-icount', 'shift=1'))
 
         with plugin_log as lf, \
              mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
@@ -121,7 +120,7 @@ def test_aarch64_virt_mem_icount(self):
         :avocado: tags=accel:tcg
         :avocado: tags=arch:aarch64
         :avocado: tags=machine:virt
-        :avocado: tags=cpu:cortex-a57
+        :avocado: tags=cpu:cortex-a53
         """
         kernel_path = self._grab_aarch64_kernel()
         kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -132,9 +131,9 @@ def test_aarch64_virt_mem_icount(self):
                                                  suffix=".log")
 
         self.run_vm(kernel_path, kernel_command_line,
-                    "tests/plugin/libmem.so,arg=both", plugin_log.name,
+                    "tests/plugin/libmem.so,inline=true,callback=true", plugin_log.name,
                     console_pattern,
-                    args=('-cpu', 'cortex-a53', '-icount', 'shift=1'))
+                    args=('-icount', 'shift=1'))
 
         with plugin_log as lf, \
              mmap.mmap(lf.fileno(), 0, access=mmap.ACCESS_READ) as s:
diff --git a/tests/acceptance/tesseract_utils.py b/tests/avocado/tesseract_utils.py
similarity index 100%
rename from tests/acceptance/tesseract_utils.py
rename to tests/avocado/tesseract_utils.py
diff --git a/tests/acceptance/version.py b/tests/avocado/version.py
similarity index 88%
rename from tests/acceptance/version.py
rename to tests/avocado/version.py
index 79b923d4fc0..ded7f039c1b 100644
--- a/tests/acceptance/version.py
+++ b/tests/avocado/version.py
@@ -9,10 +9,10 @@
 # later.  See the COPYING file in the top-level directory.
 
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
 
-class Version(Test):
+class Version(QemuSystemTest):
     """
     :avocado: tags=quick
     """
diff --git a/tests/acceptance/virtio-gpu.py b/tests/avocado/virtio-gpu.py
similarity index 77%
rename from tests/acceptance/virtio-gpu.py
rename to tests/avocado/virtio-gpu.py
index ab18cddbb73..2a249a3a2c1 100644
--- a/tests/acceptance/virtio-gpu.py
+++ b/tests/avocado/virtio-gpu.py
@@ -4,23 +4,19 @@
 # later.  See the COPYING file in the top-level directory.
 
 
-from avocado_qemu import Test
 from avocado_qemu import BUILD_DIR
+from avocado_qemu import QemuSystemTest
 from avocado_qemu import wait_for_console_pattern
 from avocado_qemu import exec_command_and_wait_for_pattern
 from avocado_qemu import is_readable_executable_file
 
-from qemu.accel import kvm_available
+from qemu.utils import kvm_available
 
 import os
 import socket
 import subprocess
 
 
-ACCEL_NOT_AVAILABLE_FMT = "%s accelerator does not seem to be available"
-KVM_NOT_AVAILABLE = ACCEL_NOT_AVAILABLE_FMT % "KVM"
-
-
 def pick_default_vug_bin():
     relative_path = "./contrib/vhost-user-gpu/vhost-user-gpu"
     if is_readable_executable_file(relative_path):
@@ -31,22 +27,26 @@ def pick_default_vug_bin():
         return bld_dir_path
 
 
-class VirtioGPUx86(Test):
+class VirtioGPUx86(QemuSystemTest):
     """
     :avocado: tags=virtio-gpu
+    :avocado: tags=arch:x86_64
+    :avocado: tags=cpu:host
     """
 
-    KERNEL_COMMON_COMMAND_LINE = "printk.time=0 "
+    KERNEL_COMMAND_LINE = "printk.time=0 console=ttyS0 rdinit=/bin/bash"
     KERNEL_URL = (
         "https://archives.fedoraproject.org/pub/fedora"
         "/linux/releases/33/Everything/x86_64/os/images"
         "/pxeboot/vmlinuz"
     )
+    KERNEL_HASH = '1433cfe3f2ffaa44de4ecfb57ec25dc2399cdecf'
     INITRD_URL = (
         "https://archives.fedoraproject.org/pub/fedora"
         "/linux/releases/33/Everything/x86_64/os/images"
         "/pxeboot/initrd.img"
     )
+    INITRD_HASH = 'c828d68a027b53e5220536585efe03412332c2d9'
 
     def wait_for_console_pattern(self, success_message, vm=None):
         wait_for_console_pattern(
@@ -58,24 +58,18 @@ def wait_for_console_pattern(self, success_message, vm=None):
 
     def test_virtio_vga_virgl(self):
         """
-        :avocado: tags=arch:x86_64
-        :avocado: tags=device:virtio-vga
+        :avocado: tags=device:virtio-vga-gl
         """
-        kernel_command_line = (
-            self.KERNEL_COMMON_COMMAND_LINE + "console=ttyS0 rdinit=/bin/bash"
-        )
         # FIXME: should check presence of virtio, virgl etc
-        if not kvm_available(self.arch, self.qemu_bin):
-            self.cancel(KVM_NOT_AVAILABLE)
+        self.require_accelerator('kvm')
 
-        kernel_path = self.fetch_asset(self.KERNEL_URL)
-        initrd_path = self.fetch_asset(self.INITRD_URL)
+        kernel_path = self.fetch_asset(self.KERNEL_URL, self.KERNEL_HASH)
+        initrd_path = self.fetch_asset(self.INITRD_URL, self.INITRD_HASH)
 
         self.vm.set_console()
-        self.vm.add_args("-cpu", "host")
         self.vm.add_args("-m", "2G")
         self.vm.add_args("-machine", "pc,accel=kvm")
-        self.vm.add_args("-device", "virtio-vga,virgl=on")
+        self.vm.add_args("-device", "virtio-vga-gl")
         self.vm.add_args("-display", "egl-headless")
         self.vm.add_args(
             "-kernel",
@@ -83,7 +77,7 @@ def test_virtio_vga_virgl(self):
             "-initrd",
             initrd_path,
             "-append",
-            kernel_command_line,
+            self.KERNEL_COMMAND_LINE,
         )
         try:
             self.vm.launch()
@@ -99,22 +93,17 @@ def test_virtio_vga_virgl(self):
 
     def test_vhost_user_vga_virgl(self):
         """
-        :avocado: tags=arch:x86_64
         :avocado: tags=device:vhost-user-vga
         """
-        kernel_command_line = (
-            self.KERNEL_COMMON_COMMAND_LINE + "console=ttyS0 rdinit=/bin/bash"
-        )
         # FIXME: should check presence of vhost-user-gpu, virgl, memfd etc
-        if not kvm_available(self.arch, self.qemu_bin):
-            self.cancel(KVM_NOT_AVAILABLE)
+        self.require_accelerator('kvm')
 
         vug = pick_default_vug_bin()
         if not vug:
             self.cancel("Could not find vhost-user-gpu")
 
-        kernel_path = self.fetch_asset(self.KERNEL_URL)
-        initrd_path = self.fetch_asset(self.INITRD_URL)
+        kernel_path = self.fetch_asset(self.KERNEL_URL, self.KERNEL_HASH)
+        initrd_path = self.fetch_asset(self.INITRD_URL, self.INITRD_HASH)
 
         # Create socketpair to connect proxy and remote processes
         qemu_sock, vug_sock = socket.socketpair(
@@ -140,7 +129,6 @@ def test_vhost_user_vga_virgl(self):
         )
 
         self.vm.set_console()
-        self.vm.add_args("-cpu", "host")
         self.vm.add_args("-m", "2G")
         self.vm.add_args("-object", "memory-backend-memfd,id=mem,size=2G")
         self.vm.add_args("-machine", "pc,memory-backend=mem,accel=kvm")
@@ -153,7 +141,7 @@ def test_vhost_user_vga_virgl(self):
             "-initrd",
             initrd_path,
             "-append",
-            kernel_command_line,
+            self.KERNEL_COMMAND_LINE,
         )
         self.vm.launch()
         self.wait_for_console_pattern("as init process")
diff --git a/tests/acceptance/virtio_check_params.py b/tests/avocado/virtio_check_params.py
similarity index 98%
rename from tests/acceptance/virtio_check_params.py
rename to tests/avocado/virtio_check_params.py
index 87e6c839d14..e869690473a 100644
--- a/tests/acceptance/virtio_check_params.py
+++ b/tests/avocado/virtio_check_params.py
@@ -24,7 +24,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 from avocado import skip
 
 #list of machine types and virtqueue properties to test
@@ -41,7 +41,7 @@
                                     'driver=null-co,id=drive0,if=none']}
 
 
-class VirtioMaxSegSettingsCheck(Test):
+class VirtioMaxSegSettingsCheck(QemuSystemTest):
     @staticmethod
     def make_pattern(props):
         pattern_items = ['{0} = \w+'.format(prop) for prop in props]
diff --git a/tests/acceptance/virtio_version.py b/tests/avocado/virtio_version.py
similarity index 98%
rename from tests/acceptance/virtio_version.py
rename to tests/avocado/virtio_version.py
index 33593c29dd0..208910bb844 100644
--- a/tests/acceptance/virtio_version.py
+++ b/tests/avocado/virtio_version.py
@@ -13,7 +13,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
 # Virtio Device IDs:
 VIRTIO_NET = 1
@@ -55,7 +55,7 @@ def get_pci_interfaces(vm, devtype):
     interfaces = ('pci-express-device', 'conventional-pci-device')
     return [i for i in interfaces if devtype_implements(vm, devtype, i)]
 
-class VirtioVersionCheck(Test):
+class VirtioVersionCheck(QemuSystemTest):
     """
     Check if virtio-version-specific device types result in the
     same device tree created by `disable-modern` and
diff --git a/tests/avocado/virtiofs_submounts.py b/tests/avocado/virtiofs_submounts.py
new file mode 100644
index 00000000000..e6dc32ffd4e
--- /dev/null
+++ b/tests/avocado/virtiofs_submounts.py
@@ -0,0 +1,217 @@
+import logging
+import re
+import os
+import subprocess
+import time
+
+from avocado import skipUnless
+from avocado_qemu import LinuxTest, BUILD_DIR
+from avocado_qemu import has_cmds
+from avocado_qemu import run_cmd
+from avocado_qemu import wait_for_console_pattern
+from avocado.utils import ssh
+
+
+class VirtiofsSubmountsTest(LinuxTest):
+    """
+    :avocado: tags=arch:x86_64
+    :avocado: tags=accel:kvm
+    """
+
+    def run(self, args, ignore_error=False):
+        stdout, stderr, ret = run_cmd(args)
+
+        if ret != 0:
+            cmdline = ' '.join(args)
+            if not ignore_error:
+                self.fail(f'{cmdline}: Returned {ret}: {stderr}')
+            else:
+                self.log.warn(f'{cmdline}: Returned {ret}: {stderr}')
+
+        return (stdout, stderr, ret)
+
+    def set_up_shared_dir(self):
+        self.shared_dir = os.path.join(self.workdir, 'virtiofs-shared')
+
+        os.mkdir(self.shared_dir)
+
+        self.run(('cp', self.get_data('guest.sh'),
+                 os.path.join(self.shared_dir, 'check.sh')))
+
+        self.run(('cp', self.get_data('guest-cleanup.sh'),
+                 os.path.join(self.shared_dir, 'cleanup.sh')))
+
+    def set_up_virtiofs(self):
+        attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR')
+        self.vfsdsock = os.path.join(attmp, 'vfsdsock')
+
+        self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True)
+
+        self.virtiofsd = \
+            subprocess.Popen(('sudo', '-n',
+                              'tools/virtiofsd/virtiofsd',
+                              f'--socket-path={self.vfsdsock}',
+                              '-o', f'source={self.shared_dir}',
+                              '-o', 'cache=always',
+                              '-o', 'xattr',
+                              '-o', 'announce_submounts',
+                              '-f'),
+                             stdout=subprocess.DEVNULL,
+                             stderr=subprocess.PIPE,
+                             universal_newlines=True)
+
+        while not os.path.exists(self.vfsdsock):
+            if self.virtiofsd.poll() is not None:
+                self.fail('virtiofsd exited prematurely: ' +
+                          self.virtiofsd.communicate()[1])
+            time.sleep(0.1)
+
+        self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock))
+
+        self.vm.add_args('-chardev',
+                         f'socket,id=vfsdsock,path={self.vfsdsock}',
+                         '-device',
+                         'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \
+                             ',tag=host',
+                         '-object',
+                         'memory-backend-file,id=mem,size=1G,' \
+                             'mem-path=/dev/shm,share=on',
+                         '-numa',
+                         'node,memdev=mem')
+
+    def set_up_nested_mounts(self):
+        scratch_dir = os.path.join(self.shared_dir, 'scratch')
+        try:
+            os.mkdir(scratch_dir)
+        except FileExistsError:
+            pass
+
+        args = ['bash', self.get_data('host.sh'), scratch_dir]
+        if self.seed:
+            args += [self.seed]
+
+        out, _, _ = self.run(args)
+        seed = re.search(r'^Seed: \d+', out)
+        self.log.info(seed[0])
+
+    def mount_in_guest(self):
+        self.ssh_command('mkdir -p /mnt/host')
+        self.ssh_command('mount -t virtiofs host /mnt/host')
+
+    def check_in_guest(self):
+        self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share')
+
+    def live_cleanup(self):
+        self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch')
+
+        # It would be nice if the above was sufficient to make virtiofsd clear
+        # all references to the mounted directories (so they can be unmounted
+        # on the host), but unfortunately it is not.  To do so, we have to
+        # resort to a remount.
+        self.ssh_command('mount -o remount /mnt/host')
+
+        scratch_dir = os.path.join(self.shared_dir, 'scratch')
+        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir))
+
+    @skipUnless(*has_cmds(('sudo -n', ('sudo', '-n', 'true')),
+                          'ssh-keygen', 'bash', 'losetup', 'mkfs.xfs', 'mount'))
+    def setUp(self):
+        vmlinuz = self.params.get('vmlinuz')
+        if vmlinuz is None:
+            """
+            The Linux kernel supports FUSE auto-submounts only as of 5.10.
+            boot_linux.py currently provides Fedora 31, whose kernel is too
+            old, so this test cannot pass with the on-image kernel (you are
+            welcome to try, hence the option to force such a test with
+            -p vmlinuz='').  Therefore, for now the user must provide a
+            sufficiently new custom kernel, or effectively explicitly
+            request failure with -p vmlinuz=''.
+            Once an image with a sufficiently new kernel is available
+            (probably Fedora 34), we can make -p vmlinuz='' the default, so
+            that this parameter no longer needs to be specified.
+            """
+            self.cancel('vmlinuz parameter not set; you must point it to a '
+                        'Linux kernel binary to test (to run this test with ' \
+                        'the on-image kernel, set it to an empty string)')
+
+        self.seed = self.params.get('seed')
+
+        self.ssh_key = os.path.join(self.workdir, 'id_ed25519')
+
+        self.run(('ssh-keygen', '-N', '', '-t', 'ed25519', '-f', self.ssh_key))
+
+        pubkey = self.ssh_key + '.pub'
+
+        super(VirtiofsSubmountsTest, self).setUp(pubkey)
+
+        if vmlinuz:
+            self.vm.add_args('-kernel', vmlinuz,
+                             '-append', 'console=ttyS0 root=/dev/sda1')
+
+        self.require_accelerator("kvm")
+        self.vm.add_args('-accel', 'kvm')
+
+    def tearDown(self):
+        try:
+            self.vm.shutdown()
+        except:
+            pass
+
+        scratch_dir = os.path.join(self.shared_dir, 'scratch')
+        self.run(('bash', self.get_data('cleanup.sh'), scratch_dir),
+                 ignore_error=True)
+
+    def test_pre_virtiofsd_set_up(self):
+        self.set_up_shared_dir()
+
+        self.set_up_nested_mounts()
+
+        self.set_up_virtiofs()
+        self.launch_and_wait()
+        self.mount_in_guest()
+        self.check_in_guest()
+
+    def test_pre_launch_set_up(self):
+        self.set_up_shared_dir()
+        self.set_up_virtiofs()
+
+        self.set_up_nested_mounts()
+
+        self.launch_and_wait()
+        self.mount_in_guest()
+        self.check_in_guest()
+
+    def test_post_launch_set_up(self):
+        self.set_up_shared_dir()
+        self.set_up_virtiofs()
+        self.launch_and_wait()
+
+        self.set_up_nested_mounts()
+
+        self.mount_in_guest()
+        self.check_in_guest()
+
+    def test_post_mount_set_up(self):
+        self.set_up_shared_dir()
+        self.set_up_virtiofs()
+        self.launch_and_wait()
+        self.mount_in_guest()
+
+        self.set_up_nested_mounts()
+
+        self.check_in_guest()
+
+    def test_two_runs(self):
+        self.set_up_shared_dir()
+
+        self.set_up_nested_mounts()
+
+        self.set_up_virtiofs()
+        self.launch_and_wait()
+        self.mount_in_guest()
+        self.check_in_guest()
+
+        self.live_cleanup()
+        self.set_up_nested_mounts()
+
+        self.check_in_guest()
diff --git a/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh b/tests/avocado/virtiofs_submounts.py.data/cleanup.sh
similarity index 100%
rename from tests/acceptance/virtiofs_submounts.py.data/cleanup.sh
rename to tests/avocado/virtiofs_submounts.py.data/cleanup.sh
diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh b/tests/avocado/virtiofs_submounts.py.data/guest-cleanup.sh
similarity index 100%
rename from tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh
rename to tests/avocado/virtiofs_submounts.py.data/guest-cleanup.sh
diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest.sh b/tests/avocado/virtiofs_submounts.py.data/guest.sh
similarity index 100%
rename from tests/acceptance/virtiofs_submounts.py.data/guest.sh
rename to tests/avocado/virtiofs_submounts.py.data/guest.sh
diff --git a/tests/acceptance/virtiofs_submounts.py.data/host.sh b/tests/avocado/virtiofs_submounts.py.data/host.sh
similarity index 100%
rename from tests/acceptance/virtiofs_submounts.py.data/host.sh
rename to tests/avocado/virtiofs_submounts.py.data/host.sh
diff --git a/tests/acceptance/vnc.py b/tests/avocado/vnc.py
similarity index 96%
rename from tests/acceptance/vnc.py
rename to tests/avocado/vnc.py
index 22656bbcc2b..096432988fb 100644
--- a/tests/acceptance/vnc.py
+++ b/tests/avocado/vnc.py
@@ -8,10 +8,10 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
-from avocado_qemu import Test
+from avocado_qemu import QemuSystemTest
 
 
-class Vnc(Test):
+class Vnc(QemuSystemTest):
     """
     :avocado: tags=vnc,quick
     """
@@ -45,7 +45,7 @@ def test_change_password_requires_a_password(self):
                          'Could not set password')
 
     def test_change_password(self):
-        self.vm.add_args('-nodefaults', '-S', '-vnc', ':0,password')
+        self.vm.add_args('-nodefaults', '-S', '-vnc', ':0,password=on')
         self.vm.launch()
         self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled'])
         set_password_response = self.vm.qmp('change-vnc-password',
diff --git a/tests/acceptance/x86_cpu_model_versions.py b/tests/avocado/x86_cpu_model_versions.py
similarity index 89%
rename from tests/acceptance/x86_cpu_model_versions.py
rename to tests/avocado/x86_cpu_model_versions.py
index 77ed8597a47..a6edf74c1cc 100644
--- a/tests/acceptance/x86_cpu_model_versions.py
+++ b/tests/avocado/x86_cpu_model_versions.py
@@ -24,7 +24,7 @@
 import avocado_qemu
 import re
 
-class X86CPUModelAliases(avocado_qemu.Test):
+class X86CPUModelAliases(avocado_qemu.QemuSystemTest):
     """
     Validation of PC CPU model versions and CPU model aliases
 
@@ -239,7 +239,7 @@ def test_none_alias(self):
         self.validate_aliases(cpus)
 
 
-class CascadelakeArchCapabilities(avocado_qemu.Test):
+class CascadelakeArchCapabilities(avocado_qemu.QemuSystemTest):
     """
     Validation of Cascadelake arch-capabilities
 
@@ -252,10 +252,13 @@ def get_cpu_prop(self, prop):
     def test_4_1(self):
         """
         :avocado: tags=machine:pc-i440fx-4.1
+        :avocado: tags=cpu:Cascadelake-Server
         """
         # machine-type only:
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server,x-force-features=on,check=off,enforce=off')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server,x-force-features=on,check=off,'
+                        'enforce=off')
         self.vm.launch()
         self.assertFalse(self.get_cpu_prop('arch-capabilities'),
                          'pc-i440fx-4.1 + Cascadelake-Server should not have arch-capabilities')
@@ -263,9 +266,12 @@ def test_4_1(self):
     def test_4_0(self):
         """
         :avocado: tags=machine:pc-i440fx-4.0
+        :avocado: tags=cpu:Cascadelake-Server
         """
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server,x-force-features=on,check=off,enforce=off')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server,x-force-features=on,check=off,'
+                        'enforce=off')
         self.vm.launch()
         self.assertFalse(self.get_cpu_prop('arch-capabilities'),
                          'pc-i440fx-4.0 + Cascadelake-Server should not have arch-capabilities')
@@ -273,10 +279,13 @@ def test_4_0(self):
     def test_set_4_0(self):
         """
         :avocado: tags=machine:pc-i440fx-4.0
+        :avocado: tags=cpu:Cascadelake-Server
         """
         # command line must override machine-type if CPU model is not versioned:
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server,x-force-features=on,check=off,enforce=off,+arch-capabilities')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server,x-force-features=on,check=off,'
+                        'enforce=off,+arch-capabilities')
         self.vm.launch()
         self.assertTrue(self.get_cpu_prop('arch-capabilities'),
                         'pc-i440fx-4.0 + Cascadelake-Server,+arch-capabilities should have arch-capabilities')
@@ -284,9 +293,12 @@ def test_set_4_0(self):
     def test_unset_4_1(self):
         """
         :avocado: tags=machine:pc-i440fx-4.1
+        :avocado: tags=cpu:Cascadelake-Server
         """
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server,x-force-features=on,check=off,enforce=off,-arch-capabilities')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server,x-force-features=on,check=off,'
+                        'enforce=off,-arch-capabilities')
         self.vm.launch()
         self.assertFalse(self.get_cpu_prop('arch-capabilities'),
                          'pc-i440fx-4.1 + Cascadelake-Server,-arch-capabilities should not have arch-capabilities')
@@ -294,10 +306,13 @@ def test_unset_4_1(self):
     def test_v1_4_0(self):
         """
         :avocado: tags=machine:pc-i440fx-4.0
+        :avocado: tags=cpu:Cascadelake-Server
         """
         # versioned CPU model overrides machine-type:
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server-v1,x-force-features=on,check=off,enforce=off')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server-v1,x-force-features=on,check=off,'
+                        'enforce=off')
         self.vm.launch()
         self.assertFalse(self.get_cpu_prop('arch-capabilities'),
                          'pc-i440fx-4.0 + Cascadelake-Server-v1 should not have arch-capabilities')
@@ -305,9 +320,12 @@ def test_v1_4_0(self):
     def test_v2_4_0(self):
         """
         :avocado: tags=machine:pc-i440fx-4.0
+        :avocado: tags=cpu:Cascadelake-Server
         """
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server-v2,x-force-features=on,check=off,enforce=off')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server-v2,x-force-features=on,check=off,'
+                        'enforce=off')
         self.vm.launch()
         self.assertTrue(self.get_cpu_prop('arch-capabilities'),
                         'pc-i440fx-4.0 + Cascadelake-Server-v2 should have arch-capabilities')
@@ -315,10 +333,13 @@ def test_v2_4_0(self):
     def test_v1_set_4_0(self):
         """
         :avocado: tags=machine:pc-i440fx-4.0
+        :avocado: tags=cpu:Cascadelake-Server
         """
         # command line must override machine-type and versioned CPU model:
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server-v1,x-force-features=on,check=off,enforce=off,+arch-capabilities')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server-v1,x-force-features=on,check=off,'
+                        'enforce=off,+arch-capabilities')
         self.vm.launch()
         self.assertTrue(self.get_cpu_prop('arch-capabilities'),
                         'pc-i440fx-4.0 + Cascadelake-Server-v1,+arch-capabilities should have arch-capabilities')
@@ -326,9 +347,12 @@ def test_v1_set_4_0(self):
     def test_v2_unset_4_1(self):
         """
         :avocado: tags=machine:pc-i440fx-4.1
+        :avocado: tags=cpu:Cascadelake-Server
         """
         self.vm.add_args('-S')
-        self.vm.add_args('-cpu', 'Cascadelake-Server-v2,x-force-features=on,check=off,enforce=off,-arch-capabilities')
+        self.set_vm_arg('-cpu',
+                        'Cascadelake-Server-v2,x-force-features=on,check=off,'
+                        'enforce=off,-arch-capabilities')
         self.vm.launch()
         self.assertFalse(self.get_cpu_prop('arch-capabilities'),
                          'pc-i440fx-4.1 + Cascadelake-Server-v2,-arch-capabilities should not have arch-capabilities')
diff --git a/tests/data/acpi/microvm/DSDT.pcie b/tests/data/acpi/microvm/DSDT.pcie
index 3fb373fd970..765f14ef3d1 100644
Binary files a/tests/data/acpi/microvm/DSDT.pcie and b/tests/data/acpi/microvm/DSDT.pcie differ
diff --git a/tests/data/acpi/pc/DSDT b/tests/data/acpi/pc/DSDT
index b9dd9b38e4e..cc1223773e9 100644
Binary files a/tests/data/acpi/pc/DSDT and b/tests/data/acpi/pc/DSDT differ
diff --git a/tests/data/acpi/pc/DSDT.acpihmat b/tests/data/acpi/pc/DSDT.acpihmat
index cba5a1dcb04..2d0678eb832 100644
Binary files a/tests/data/acpi/pc/DSDT.acpihmat and b/tests/data/acpi/pc/DSDT.acpihmat differ
diff --git a/tests/data/acpi/pc/DSDT.bridge b/tests/data/acpi/pc/DSDT.bridge
index a9b4d565945..77778c3a699 100644
Binary files a/tests/data/acpi/pc/DSDT.bridge and b/tests/data/acpi/pc/DSDT.bridge differ
diff --git a/tests/data/acpi/pc/DSDT.cphp b/tests/data/acpi/pc/DSDT.cphp
index 8d86155e275..af046b40b0a 100644
Binary files a/tests/data/acpi/pc/DSDT.cphp and b/tests/data/acpi/pc/DSDT.cphp differ
diff --git a/tests/data/acpi/pc/DSDT.dimmpxm b/tests/data/acpi/pc/DSDT.dimmpxm
index e00a447f92b..b56b2e08901 100644
Binary files a/tests/data/acpi/pc/DSDT.dimmpxm and b/tests/data/acpi/pc/DSDT.dimmpxm differ
diff --git a/tests/data/acpi/pc/DSDT.hpbridge b/tests/data/acpi/pc/DSDT.hpbridge
index 5d8ba195055..bb0593eeb87 100644
Binary files a/tests/data/acpi/pc/DSDT.hpbridge and b/tests/data/acpi/pc/DSDT.hpbridge differ
diff --git a/tests/data/acpi/pc/DSDT.ipmikcs b/tests/data/acpi/pc/DSDT.ipmikcs
index 01e53bd4366..2e618e49d35 100644
Binary files a/tests/data/acpi/pc/DSDT.ipmikcs and b/tests/data/acpi/pc/DSDT.ipmikcs differ
diff --git a/tests/data/acpi/pc/DSDT.memhp b/tests/data/acpi/pc/DSDT.memhp
index b8103799b45..c32d28575b9 100644
Binary files a/tests/data/acpi/pc/DSDT.memhp and b/tests/data/acpi/pc/DSDT.memhp differ
diff --git a/tests/data/acpi/pc/DSDT.nohpet b/tests/data/acpi/pc/DSDT.nohpet
index d4f0050533f..623f06a900d 100644
Binary files a/tests/data/acpi/pc/DSDT.nohpet and b/tests/data/acpi/pc/DSDT.nohpet differ
diff --git a/tests/data/acpi/pc/DSDT.numamem b/tests/data/acpi/pc/DSDT.numamem
index 8632dfe8a8b..f0a3fa92de9 100644
Binary files a/tests/data/acpi/pc/DSDT.numamem and b/tests/data/acpi/pc/DSDT.numamem differ
diff --git a/tests/data/acpi/q35/APIC.xapic b/tests/data/acpi/q35/APIC.xapic
new file mode 100644
index 00000000000..c1969c35aa1
Binary files /dev/null and b/tests/data/acpi/q35/APIC.xapic differ
diff --git a/tests/data/acpi/q35/DMAR.dmar b/tests/data/acpi/q35/DMAR.dmar
new file mode 100644
index 00000000000..0dca6e68ad8
Binary files /dev/null and b/tests/data/acpi/q35/DMAR.dmar differ
diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT
index cccf92f0466..c1965f6051e 100644
Binary files a/tests/data/acpi/q35/DSDT and b/tests/data/acpi/q35/DSDT differ
diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat
index b3c1dd6bc40..f24d4874bff 100644
Binary files a/tests/data/acpi/q35/DSDT.acpihmat and b/tests/data/acpi/q35/DSDT.acpihmat differ
diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge
index eb5d27d95b2..424d51bd1cb 100644
Binary files a/tests/data/acpi/q35/DSDT.bridge and b/tests/data/acpi/q35/DSDT.bridge differ
diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp
index e55d12990c9..f1275606f68 100644
Binary files a/tests/data/acpi/q35/DSDT.cphp and b/tests/data/acpi/q35/DSDT.cphp differ
diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm
index 95901f94c0d..76e451e829e 100644
Binary files a/tests/data/acpi/q35/DSDT.dimmpxm and b/tests/data/acpi/q35/DSDT.dimmpxm differ
diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt
index ce07e9f152d..6ad2411d0ec 100644
Binary files a/tests/data/acpi/q35/DSDT.ipmibt and b/tests/data/acpi/q35/DSDT.ipmibt differ
diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs
new file mode 100644
index 00000000000..cad26e3f0c2
Binary files /dev/null and b/tests/data/acpi/q35/DSDT.ivrs differ
diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp
index 7acf6243f08..4e9cb3dc689 100644
Binary files a/tests/data/acpi/q35/DSDT.memhp and b/tests/data/acpi/q35/DSDT.memhp differ
diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64
index 77d46369e48..eb5a1c7171c 100644
Binary files a/tests/data/acpi/q35/DSDT.mmio64 and b/tests/data/acpi/q35/DSDT.mmio64 differ
diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge
new file mode 100644
index 00000000000..45808eb03b7
Binary files /dev/null and b/tests/data/acpi/q35/DSDT.multi-bridge differ
diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet
index 0b10128e42a..83d1aa00ac5 100644
Binary files a/tests/data/acpi/q35/DSDT.nohpet and b/tests/data/acpi/q35/DSDT.nohpet differ
diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem
index e4c4582e7f7..050aaa237b4 100644
Binary files a/tests/data/acpi/q35/DSDT.numamem and b/tests/data/acpi/q35/DSDT.numamem differ
diff --git a/tests/data/acpi/q35/DSDT.tis b/tests/data/acpi/q35/DSDT.tis
deleted file mode 100644
index 15a26a14e4b..00000000000
Binary files a/tests/data/acpi/q35/DSDT.tis and /dev/null differ
diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12
new file mode 100644
index 00000000000..0ebdf6fbd77
Binary files /dev/null and b/tests/data/acpi/q35/DSDT.tis.tpm12 differ
diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2
new file mode 100644
index 00000000000..dcbb7f0af37
Binary files /dev/null and b/tests/data/acpi/q35/DSDT.tis.tpm2 differ
diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic
new file mode 100644
index 00000000000..17552ce363a
Binary files /dev/null and b/tests/data/acpi/q35/DSDT.xapic differ
diff --git a/tests/data/acpi/q35/FACP.xapic b/tests/data/acpi/q35/FACP.xapic
new file mode 100644
index 00000000000..2d3659c9c67
Binary files /dev/null and b/tests/data/acpi/q35/FACP.xapic differ
diff --git a/tests/data/acpi/q35/IVRS.ivrs b/tests/data/acpi/q35/IVRS.ivrs
new file mode 100644
index 00000000000..17611202e53
Binary files /dev/null and b/tests/data/acpi/q35/IVRS.ivrs differ
diff --git a/tests/data/acpi/q35/SRAT.xapic b/tests/data/acpi/q35/SRAT.xapic
new file mode 100644
index 00000000000..1a91cfa65fd
Binary files /dev/null and b/tests/data/acpi/q35/SRAT.xapic differ
diff --git a/tests/data/acpi/q35/TCPA.tis.tpm12 b/tests/data/acpi/q35/TCPA.tis.tpm12
new file mode 100644
index 00000000000..a56961b413e
Binary files /dev/null and b/tests/data/acpi/q35/TCPA.tis.tpm12 differ
diff --git a/tests/data/acpi/q35/TPM2.tis b/tests/data/acpi/q35/TPM2.tis.tpm2
similarity index 100%
rename from tests/data/acpi/q35/TPM2.tis
rename to tests/data/acpi/q35/TPM2.tis.tpm2
diff --git a/tests/data/acpi/rebuild-expected-aml.sh b/tests/data/acpi/rebuild-expected-aml.sh
index fc787705446..dcf2e2f221b 100755
--- a/tests/data/acpi/rebuild-expected-aml.sh
+++ b/tests/data/acpi/rebuild-expected-aml.sh
@@ -12,7 +12,7 @@
 # This work is licensed under the terms of the GNU GPLv2.
 # See the COPYING.LIB file in the top-level directory.
 
-qemu_bins="./qemu-system-x86_64 ./qemu-system-aarch64"
+qemu_arches="x86_64 aarch64"
 
 if [ ! -e "tests/qtest/bios-tables-test" ]; then
     echo "Test: bios-tables-test is required! Run make check before this script."
@@ -20,6 +20,26 @@ if [ ! -e "tests/qtest/bios-tables-test" ]; then
     exit 1;
 fi
 
+if grep TARGET_DIRS= config-host.mak; then
+    for arch in $qemu_arches; do
+        if  grep TARGET_DIRS= config-host.mak | grep "$arch"-softmmu;
+        then
+            qemu_bins="$qemu_bins ./qemu-system-$arch"
+        fi
+    done
+else
+    echo "config-host.mak missing!"
+    echo "Run this script from the build directory."
+    exit 1;
+fi
+
+if [ -z "$qemu_bins" ]; then
+    echo "Only the following architectures are currently supported: $qemu_arches"
+    echo "None of these configured!"
+    echo "To fix, run configure --target-list=x86_64-softmmu,aarch64-softmmu"
+    exit 1;
+fi
+
 for qemu in $qemu_bins; do
     if [ ! -e $qemu ]; then
         echo "Run 'make' to build the following QEMU executables: $qemu_bins"
diff --git a/tests/data/acpi/virt/DBG2 b/tests/data/acpi/virt/DBG2
new file mode 100644
index 00000000000..86e6314f7b0
Binary files /dev/null and b/tests/data/acpi/virt/DBG2 differ
diff --git a/tests/data/acpi/virt/DSDT b/tests/data/acpi/virt/DSDT
index 134d8ae5b60..c4750399071 100644
Binary files a/tests/data/acpi/virt/DSDT and b/tests/data/acpi/virt/DSDT differ
diff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/virt/DSDT.memhp
index 140976b23eb..bae36cdd397 100644
Binary files a/tests/data/acpi/virt/DSDT.memhp and b/tests/data/acpi/virt/DSDT.memhp differ
diff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem
index 134d8ae5b60..c4750399071 100644
Binary files a/tests/data/acpi/virt/DSDT.numamem and b/tests/data/acpi/virt/DSDT.numamem differ
diff --git a/tests/data/acpi/virt/DSDT.pxb b/tests/data/acpi/virt/DSDT.pxb
index 46b9c4cad5d..fbd78f44c47 100644
Binary files a/tests/data/acpi/virt/DSDT.pxb and b/tests/data/acpi/virt/DSDT.pxb differ
diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
new file mode 100644
index 00000000000..7efd0ce8a6b
Binary files /dev/null and b/tests/data/acpi/virt/IORT differ
diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
new file mode 100644
index 00000000000..7efd0ce8a6b
Binary files /dev/null and b/tests/data/acpi/virt/IORT.memhp differ
diff --git a/tests/data/acpi/virt/IORT.numamem b/tests/data/acpi/virt/IORT.numamem
new file mode 100644
index 00000000000..7efd0ce8a6b
Binary files /dev/null and b/tests/data/acpi/virt/IORT.numamem differ
diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb
new file mode 100644
index 00000000000..7efd0ce8a6b
Binary files /dev/null and b/tests/data/acpi/virt/IORT.pxb differ
diff --git a/tests/data/acpi/virt/PPTT b/tests/data/acpi/virt/PPTT
new file mode 100644
index 00000000000..7a1258ecf12
Binary files /dev/null and b/tests/data/acpi/virt/PPTT differ
diff --git a/tests/decode/succ_argset_type1.decode b/tests/decode/succ_argset_type1.decode
new file mode 100644
index 00000000000..ed946b420d1
--- /dev/null
+++ b/tests/decode/succ_argset_type1.decode
@@ -0,0 +1 @@
+&asdf b:bool c:uint64_t a
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 9f464cb92c0..f1a0c5db7a8 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -11,8 +11,10 @@ HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
 DOCKER_SUFFIX := .docker
 DOCKER_FILES_DIR := $(SRC_PATH)/tests/docker/dockerfiles
 # we don't run tests on intermediate images (used as base by another image)
-DOCKER_PARTIAL_IMAGES := debian10 debian11 debian-bootstrap empty
-DOCKER_IMAGES := $(sort $(notdir $(basename $(wildcard $(DOCKER_FILES_DIR)/*.docker))))
+DOCKER_PARTIAL_IMAGES := debian10 debian11
+# we don't directly build virtual images (they are used to build other images)
+DOCKER_VIRTUAL_IMAGES := debian-bootstrap debian-toolchain empty
+DOCKER_IMAGES := $(sort $(filter-out $(DOCKER_VIRTUAL_IMAGES), $(notdir $(basename $(wildcard $(DOCKER_FILES_DIR)/*.docker)))))
 DOCKER_TARGETS := $(patsubst %,docker-image-%,$(DOCKER_IMAGES))
 # Use a global constant ccache directory to speed up repetitive builds
 DOCKER_CCACHE_DIR := $$HOME/.cache/qemu-docker-ccache
@@ -140,30 +142,87 @@ docker-image-debian-mips-cross: docker-image-debian10
 docker-image-debian-mips64-cross: docker-image-debian10
 docker-image-debian-mips64el-cross: docker-image-debian10
 docker-image-debian-mipsel-cross: docker-image-debian10
-docker-image-debian-powerpc-cross: docker-image-debian10
-docker-image-debian-ppc64-cross: docker-image-debian10
 docker-image-debian-ppc64el-cross: docker-image-debian10
-docker-image-debian-riscv64-cross: docker-image-debian10
 docker-image-debian-s390x-cross: docker-image-debian10
 docker-image-debian-sh4-cross: docker-image-debian10
 docker-image-debian-sparc64-cross: docker-image-debian10
 
+# The native build should never use the registry
+docker-image-debian-native: DOCKER_REGISTRY=
+
+# base images should not add a local user
+docker-image-debian10: NOUSER=1
+docker-image-debian11: NOUSER=1
+
+#
+# The build rule for hexagon-cross is special in so far for most of
+# the time we don't want to build it. While dockers caching does avoid
+# this most of the time sometimes we want to force the issue.
+#
+docker-image-debian-hexagon-cross: $(DOCKER_FILES_DIR)/debian-hexagon-cross.docker
+	$(if $(NOCACHE), 								\
+		$(call quiet-command,							\
+			$(DOCKER_SCRIPT) build -t qemu/debian-hexagon-cross -f $< 	\
+			$(if $V,,--quiet) --no-cache 					\
+			--registry $(DOCKER_REGISTRY) --extra-files			\
+			$(DOCKER_FILES_DIR)/debian-hexagon-cross.docker.d/build-toolchain.sh, \
+			"BUILD", "debian-hexagon-cross"),				\
+		$(call quiet-command,							\
+			$(DOCKER_SCRIPT) fetch $(if $V,,--quiet)			\
+				qemu/debian-hexagon-cross $(DOCKER_REGISTRY),		\
+			"FETCH", "debian-hexagon-cross")				\
+		$(call quiet-command,							\
+			$(DOCKER_SCRIPT) update $(if $V,,--quiet) 			\
+				qemu/debian-hexagon-cross --add-current-user,		\
+			"PREPARE", "debian-hexagon-cross"))
+
+debian-toolchain-run = \
+	$(if $(NOCACHE), 						\
+		$(call quiet-command,					\
+			$(DOCKER_SCRIPT) build -t qemu/$1 -f $< 	\
+			$(if $V,,--quiet) --no-cache 			\
+			--registry $(DOCKER_REGISTRY) --extra-files	\
+			$(DOCKER_FILES_DIR)/$1.d/build-toolchain.sh,	\
+			"BUILD", $1),				        \
+		$(call quiet-command,					\
+			$(DOCKER_SCRIPT) fetch $(if $V,,--quiet)	\
+				qemu/$1 $(DOCKER_REGISTRY),		\
+			"FETCH", $1)					\
+		$(call quiet-command,					\
+			$(DOCKER_SCRIPT) update $(if $V,,--quiet) 	\
+				qemu/$1 				\
+				$(if $(NOUSER),,--add-current-user) 	\
+			"PREPARE", $1))
+debian-toolchain = $(call debian-toolchain-run,$(patsubst docker-image-%,%,$1))
+
+docker-image-debian-microblaze-cross: $(DOCKER_FILES_DIR)/debian-toolchain.docker \
+    $(DOCKER_FILES_DIR)/debian-microblaze-cross.d/build-toolchain.sh
+	$(call debian-toolchain, $@)
+
+docker-image-debian-nios2-cross: $(DOCKER_FILES_DIR)/debian-toolchain.docker \
+    $(DOCKER_FILES_DIR)/debian-nios2-cross.d/build-toolchain.sh
+	$(call debian-toolchain, $@)
+
 # Specialist build images, sometimes very limited tools
 docker-image-debian-tricore-cross: docker-image-debian10
 docker-image-debian-all-test-cross: docker-image-debian10
 docker-image-debian-arm64-test-cross: docker-image-debian11
+docker-image-debian-microblaze-cross: docker-image-debian10
+docker-image-debian-nios2-cross: docker-image-debian10
+docker-image-debian-powerpc-test-cross: docker-image-debian11
 
 # These images may be good enough for building tests but not for test builds
 DOCKER_PARTIAL_IMAGES += debian-alpha-cross
 DOCKER_PARTIAL_IMAGES += debian-arm64-test-cross
+DOCKER_PARTIAL_IMAGES += debian-powerpc-test-cross
 DOCKER_PARTIAL_IMAGES += debian-hppa-cross
 DOCKER_PARTIAL_IMAGES += debian-m68k-cross debian-mips64-cross
-DOCKER_PARTIAL_IMAGES += debian-powerpc-cross debian-ppc64-cross
-DOCKER_PARTIAL_IMAGES += debian-riscv64-cross
+DOCKER_PARTIAL_IMAGES += debian-microblaze-cross
+DOCKER_PARTIAL_IMAGES += debian-nios2-cross
 DOCKER_PARTIAL_IMAGES += debian-sh4-cross debian-sparc64-cross
 DOCKER_PARTIAL_IMAGES += debian-tricore-cross
 DOCKER_PARTIAL_IMAGES += debian-xtensa-cross
-DOCKER_PARTIAL_IMAGES += fedora-i386-cross fedora-cris-cross
+DOCKER_PARTIAL_IMAGES += fedora-cris-cross
 
 # Rules for building linux-user powered images
 #
@@ -172,7 +231,7 @@ DOCKER_PARTIAL_IMAGES += fedora-i386-cross fedora-cris-cross
 # packages.
 
 # Expand all the pre-requistes for each docker image and test combination
-$(foreach i,$(filter-out $(DOCKER_PARTIAL_IMAGES),$(DOCKER_IMAGES)), \
+$(foreach i,$(filter-out $(DOCKER_PARTIAL_IMAGES) $(DOCKER_VIRTUAL_IMAGES),$(DOCKER_IMAGES)), \
 	$(foreach t,$(DOCKER_TESTS), \
 		$(eval .PHONY: docker-$t@$i) \
 		$(eval docker-$t@$i: docker-image-$i docker-run-$t@$i) \
@@ -248,7 +307,7 @@ docker-run: docker-qemu-src
 	$(if $(EXECUTABLE),						\
 		$(call quiet-command,					\
 			$(DOCKER_SCRIPT) update 			\
-			$(IMAGE) $(EXECUTABLE),				\
+			$(IMAGE) --executable $(EXECUTABLE),		\
 			"  COPYING $(EXECUTABLE) to $(IMAGE)"))
 	$(call quiet-command,						\
 		$(DOCKER_SCRIPT) run 					\
diff --git a/tests/docker/common.rc b/tests/docker/common.rc
index ebc5b97ecf9..e6f8cee0d61 100755
--- a/tests/docker/common.rc
+++ b/tests/docker/common.rc
@@ -12,17 +12,32 @@
 # the top-level directory.
 
 # This might be set by ENV of a docker container... it is always
-# overriden by TARGET_LIST if the user sets it.
-DEF_TARGET_LIST=${DEF_TARGET_LIST:-"x86_64-softmmu,aarch64-softmmu"}
+# overriden by TARGET_LIST if the user sets it. We special case
+# "none" to allow for other options like --disable-tcg to restrict the
+# builds we eventually do.
+if test "$DEF_TARGET_LIST" = "none"; then
+    DEF_TARGET_LIST=""
+else
+    DEF_TARGET_LIST=${DEF_TARGET_LIST:-"x86_64-softmmu,aarch64-softmmu"}
+fi
 
-requires()
+requires_binary()
 {
+    found=0
     for c in $@; do
-        if ! echo "$FEATURES" | grep -wq -e "$c"; then
-            echo "Prerequisite '$c' not present, skip"
-            exit 0
-        fi
+        for d in /bin /usr/bin /usr/local/bin
+        do
+            if test -f "$d/$c"
+            then
+                found=1
+            fi
+        done
     done
+    if test "$found" != "1"
+    then
+        echo "Prerequisite '$c' not present, skip"
+        exit 0
+    fi
 }
 
 configure_qemu()
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index d28df4c1406..78dd13171e2 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -228,7 +228,9 @@ class Docker(object):
     def __init__(self):
         self._command = _guess_engine_command()
 
-        if "docker" in self._command and "TRAVIS" not in os.environ:
+        if ("docker" in self._command and
+            "TRAVIS" not in os.environ and
+            "GITLAB_CI" not in os.environ):
             os.environ["DOCKER_BUILDKIT"] = "1"
             self._buildkit = True
         else:
@@ -515,16 +517,36 @@ def run(self, args, argv):
 
         return 0
 
+class FetchCommand(SubCommand):
+    """ Fetch a docker image from the registry. Args: <tag> <registry>"""
+    name = "fetch"
+
+    def args(self, parser):
+        parser.add_argument("tag",
+                            help="Local tag for image")
+        parser.add_argument("registry",
+                            help="Docker registry")
+
+    def run(self, args, argv):
+        dkr = Docker()
+        dkr.command(cmd="pull", quiet=args.quiet,
+                    argv=["%s/%s" % (args.registry, args.tag)])
+        dkr.command(cmd="tag", quiet=args.quiet,
+                    argv=["%s/%s" % (args.registry, args.tag), args.tag])
+
 
 class UpdateCommand(SubCommand):
-    """ Update a docker image with new executables. Args: <tag> <executable>"""
+    """ Update a docker image. Args: <tag> <actions>"""
     name = "update"
 
     def args(self, parser):
         parser.add_argument("tag",
                             help="Image Tag")
-        parser.add_argument("executable",
+        parser.add_argument("--executable",
                             help="Executable to copy")
+        parser.add_argument("--add-current-user", "-u", dest="user",
+                            action="store_true",
+                            help="Add the current user to image's passwd")
 
     def run(self, args, argv):
         # Create a temporary tarball with our whole build context and
@@ -532,28 +554,44 @@ def run(self, args, argv):
         tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz")
         tmp_tar = TarFile(fileobj=tmp, mode='w')
 
-        # Add the executable to the tarball, using the current
-        # configured binfmt_misc path. If we don't get a path then we
-        # only need the support libraries copied
-        ff, enabled = _check_binfmt_misc(args.executable)
-
-        if not enabled:
-            print("binfmt_misc not enabled, update disabled")
-            return 1
-
-        if ff:
-            tmp_tar.add(args.executable, arcname=ff)
-
-        # Add any associated libraries
-        libs = _get_so_libs(args.executable)
-        if libs:
-            for l in libs:
-                tmp_tar.add(os.path.realpath(l), arcname=l)
-
         # Create a Docker buildfile
         df = StringIO()
         df.write(u"FROM %s\n" % args.tag)
-        df.write(u"ADD . /\n")
+
+        if args.executable:
+            # Add the executable to the tarball, using the current
+            # configured binfmt_misc path. If we don't get a path then we
+            # only need the support libraries copied
+            ff, enabled = _check_binfmt_misc(args.executable)
+
+            if not enabled:
+                print("binfmt_misc not enabled, update disabled")
+                return 1
+
+            if ff:
+                tmp_tar.add(args.executable, arcname=ff)
+
+            # Add any associated libraries
+            libs = _get_so_libs(args.executable)
+            if libs:
+                for l in libs:
+                    so_path = os.path.dirname(l)
+                    name = os.path.basename(l)
+                    real_l = os.path.realpath(l)
+                    try:
+                        tmp_tar.add(real_l, arcname="%s/%s" % (so_path, name))
+                    except FileNotFoundError:
+                        print("Couldn't add %s/%s to archive" % (so_path, name))
+                        pass
+
+            df.write(u"ADD . /\n")
+
+        if args.user:
+            uid = os.getuid()
+            uname = getpwuid(uid).pw_name
+            df.write("\n")
+            df.write("RUN id %s 2>/dev/null || useradd -u %d -U %s" %
+                     (uname, uid, uname))
 
         df_bytes = BytesIO(bytes(df.getvalue(), "UTF-8"))
 
diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker
index d63a269aef7..7e6997e3015 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -9,6 +9,7 @@ ENV PACKAGES \
 	alsa-lib-dev \
 	bash \
 	binutils \
+	ccache \
 	coreutils \
 	curl-dev \
 	g++ \
@@ -19,7 +20,9 @@ ENV PACKAGES \
 	gnutls-dev \
 	gtk+3.0-dev \
 	libaio-dev \
+	libbpf-dev \
 	libcap-ng-dev \
+	libffi-dev \
 	libjpeg-turbo-dev \
 	libnfs-dev \
 	libpng-dev \
@@ -39,6 +42,7 @@ ENV PACKAGES \
 	pulseaudio-dev \
 	python3 \
 	py3-sphinx \
+	py3-sphinx_rtd_theme \
 	shadow \
 	snappy-dev \
 	spice-dev \
diff --git a/tests/docker/dockerfiles/centos7.docker b/tests/docker/dockerfiles/centos7.docker
deleted file mode 100644
index 75fdb53c7c0..00000000000
--- a/tests/docker/dockerfiles/centos7.docker
+++ /dev/null
@@ -1,43 +0,0 @@
-FROM centos:7
-RUN yum install -y epel-release centos-release-xen-48
-
-RUN yum -y update
-
-# Please keep this list sorted alphabetically
-ENV PACKAGES \
-    bzip2 \
-    bzip2-devel \
-    ccache \
-    csnappy-devel \
-    dbus-daemon \
-    gcc-c++ \
-    gcc \
-    gettext \
-    git \
-    glib2-devel \
-    glibc-static \
-    gnutls-devel \
-    libaio-devel \
-    libepoxy-devel \
-    libfdt-devel \
-    libgcrypt-devel \
-    librdmacm-devel \
-    libzstd-devel \
-    lzo-devel \
-    make \
-    mesa-libEGL-devel \
-    mesa-libgbm-devel \
-    nettle-devel \
-    ninja-build \
-    perl-Test-Harness \
-    pixman-devel \
-    python3 \
-    SDL2-devel \
-    spice-glib-devel \
-    spice-server-devel \
-    tar \
-    vte-devel \
-    xen-devel \
-    zlib-devel
-RUN yum install -y $PACKAGES
-RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker
index a8c6c528b04..7f135f8e8c0 100644
--- a/tests/docker/dockerfiles/centos8.docker
+++ b/tests/docker/dockerfiles/centos8.docker
@@ -1,39 +1,112 @@
-FROM centos:8.3.2011
+FROM docker.io/centos:8
 
 RUN dnf -y update
 ENV PACKAGES \
     SDL2-devel \
+    alsa-lib-devel \
+    bc \
+    brlapi-devel \
     bzip2 \
     bzip2-devel \
+    ca-certificates \
+    capstone-devel \
+    ccache \
+    clang \
+    ctags \
+    cyrus-sasl-devel \
+    daxctl-devel \
     dbus-daemon \
+    device-mapper-multipath-devel \
     diffutils \
+    findutils \
     gcc \
     gcc-c++ \
     genisoimage \
     gettext \
     git \
     glib2-devel \
+    glibc-langpack-en \
+    glibc-static \
+    glusterfs-api-devel \
+    gnutls-devel \
+    gtk3-devel \
+    hostname \
+    jemalloc-devel \
     libaio-devel \
+    libasan \
+    libattr-devel \
+    libbpf-devel \
+    libcacard-devel \
+    libcap-ng-devel \
+    libcurl-devel \
+    libdrm-devel \
     libepoxy-devel \
     libfdt-devel \
+    libffi-devel \
     libgcrypt-devel \
+    libiscsi-devel \
+    libjpeg-devel \
+    libnfs-devel \
+    libpmem-devel \
+    libpng-devel \
+    librbd-devel \
+    libseccomp-devel \
+    libselinux-devel \
+    libslirp-devel \
+    libssh-devel \
+    libtasn1-devel \
+    libubsan \
+    libudev-devel \
+    libusbx-devel \
+    libxml2-devel \
+    libzstd-devel \
+    llvm \
     lzo-devel \
     make \
-    mesa-libEGL-devel \
-    nmap-ncat \
+    mesa-libgbm-devel \
+    ncurses-devel \
     nettle-devel \
     ninja-build \
+    nmap-ncat \
+    numactl-devel \
+    openssh-clients \
+    pam-devel \
+    perl \
     perl-Test-Harness \
     pixman-devel \
-    python36 \
+    pkgconfig \
+    pulseaudio-libs-devel \
+    python3 \
+    python3-PyYAML \
+    python3-numpy \
+    python3-pillow \
+    python3-pip \
+    python3-setuptools \
+    python3-sphinx \
+    python3-sphinx_rtd_theme \
+    python3-virtualenv \
+    python3-wheel \
     rdma-core-devel \
-    spice-glib-devel \
-    spice-server \
+    rpm \
+    sed \
+    snappy-devel \
+    spice-protocol \
+    spice-server-devel \
+    systemd-devel \
     systemtap-sdt-devel \
     tar \
+    texinfo \
+    usbredir-devel \
+    util-linux \
+    virglrenderer-devel \
+    vte291-devel \
+    which \
+    xfsprogs-devel \
     zlib-devel
 
 RUN dnf install -y dnf-plugins-core && \
   dnf config-manager --set-enabled powertools && \
+  dnf install -y centos-release-advanced-virtualization && \
+  dnf install -y epel-release && \
   dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/debian-hexagon-cross.docker b/tests/docker/dockerfiles/debian-hexagon-cross.docker
new file mode 100644
index 00000000000..d5dc299dc1f
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-hexagon-cross.docker
@@ -0,0 +1,45 @@
+#
+# Docker Hexagon cross-compiler target
+#
+# This docker target is used for building hexagon tests. As it also
+# needs to be able to build QEMU itself in CI we include it's
+# build-deps. It is also a "stand-alone" image so as not to be
+# triggered by re-builds on other base images given it takes a long
+# time to build.
+#
+FROM qemu/debian10
+
+# Install common build utilities
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt install -y --no-install-recommends \
+        bison \
+        cmake \
+        flex \
+        lld \
+        rsync \
+        wget
+
+ENV TOOLCHAIN_INSTALL /usr/local
+ENV ROOTFS /usr/local
+
+ENV LLVM_URL https://github.com/llvm/llvm-project/archive/bfcd21876adc3498065e4da92799f613e730d475.tar.gz
+ENV MUSL_URL https://github.com/quic/musl/archive/aff74b395fbf59cd7e93b3691905aa1af6c0778c.tar.gz
+ENV LINUX_URL https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.6.18.tar.xz
+
+ADD build-toolchain.sh /root/hexagon-toolchain/build-toolchain.sh
+
+RUN cd /root/hexagon-toolchain && ./build-toolchain.sh
+
+FROM debian:buster-slim
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/^deb\ /deb-src /" >> /etc/apt/sources.list
+# Install QEMU build deps for use in CI
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
+    DEBIAN_FRONTEND=noninteractive eatmydata apt install -yy git ninja-build && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt build-dep -yy --arch-only qemu
+COPY --from=0 /usr/local /usr/local
+ENV PATH $PATH:/usr/local/bin/
diff --git a/tests/docker/dockerfiles/debian-hexagon-cross.docker.d/build-toolchain.sh b/tests/docker/dockerfiles/debian-hexagon-cross.docker.d/build-toolchain.sh
new file mode 100755
index 00000000000..19b1c9f83e1
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-hexagon-cross.docker.d/build-toolchain.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+
+set -e
+
+BASE=$(readlink -f ${PWD})
+
+TOOLCHAIN_INSTALL=$(readlink -f "$TOOLCHAIN_INSTALL")
+ROOTFS=$(readlink -f "$ROOTFS")
+
+TOOLCHAIN_BIN=${TOOLCHAIN_INSTALL}/bin
+HEX_SYSROOT=${TOOLCHAIN_INSTALL}/hexagon-unknown-linux-musl
+HEX_TOOLS_TARGET_BASE=${HEX_SYSROOT}/usr
+
+function cdp() {
+  DIR="$1"
+  mkdir -p "$DIR"
+  cd "$DIR"
+}
+
+function fetch() {
+  DIR="$1"
+  URL="$2"
+  TEMP="$(readlink -f "$PWD/tmp.tar.gz")"
+  wget --quiet "$URL" -O "$TEMP"
+  cdp "$DIR"
+  tar xaf "$TEMP" --strip-components=1
+  rm "$TEMP"
+  cd -
+}
+
+build_llvm_clang() {
+  fetch "$BASE/llvm-project" "$LLVM_URL"
+  cdp "$BASE/build-llvm"
+
+  cmake -G Ninja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=${TOOLCHAIN_INSTALL} \
+    -DLLVM_ENABLE_LLD=ON \
+    -DLLVM_TARGETS_TO_BUILD="Hexagon" \
+    -DLLVM_ENABLE_PROJECTS="clang;lld" \
+    "$BASE/llvm-project/llvm"
+  ninja all install
+  cd ${TOOLCHAIN_BIN}
+  ln -sf clang hexagon-unknown-linux-musl-clang
+  ln -sf clang++ hexagon-unknown-linux-musl-clang++
+  ln -sf llvm-ar hexagon-unknown-linux-musl-ar
+  ln -sf llvm-objdump hexagon-unknown-linux-musl-objdump
+  ln -sf llvm-objcopy hexagon-unknown-linux-musl-objcopy
+  ln -sf llvm-readelf hexagon-unknown-linux-musl-readelf
+  ln -sf llvm-ranlib hexagon-unknown-linux-musl-ranlib
+
+  # workaround for now:
+  cat <<EOF > hexagon-unknown-linux-musl.cfg
+-G0 --sysroot=${HEX_SYSROOT}
+EOF
+}
+
+build_clang_rt() {
+  cdp "$BASE/build-clang_rt"
+  cmake -G Ninja \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DLLVM_CONFIG_PATH="$BASE/build-llvm/bin/llvm-config" \
+    -DCMAKE_ASM_FLAGS="-G0 -mlong-calls -fno-pic --target=hexagon-unknown-linux-musl " \
+    -DCMAKE_SYSTEM_NAME=Linux \
+    -DCMAKE_C_COMPILER="${TOOLCHAIN_BIN}/hexagon-unknown-linux-musl-clang" \
+    -DCMAKE_ASM_COMPILER="${TOOLCHAIN_BIN}/hexagon-unknown-linux-musl-clang" \
+    -DCMAKE_INSTALL_PREFIX=${HEX_TOOLS_TARGET_BASE} \
+    -DCMAKE_CROSSCOMPILING=ON \
+    -DCMAKE_C_COMPILER_FORCED=ON \
+    -DCMAKE_CXX_COMPILER_FORCED=ON \
+    -DCOMPILER_RT_BUILD_BUILTINS=ON \
+    -DCOMPILER_RT_BUILTINS_ENABLE_PIC=OFF \
+    -DCMAKE_SIZEOF_VOID_P=4 \
+    -DCOMPILER_RT_OS_DIR= \
+    -DCAN_TARGET_hexagon=1 \
+    -DCAN_TARGET_x86_64=0 \
+    -DCOMPILER_RT_SUPPORTED_ARCH=hexagon \
+    -DLLVM_ENABLE_PROJECTS="compiler-rt" \
+    "$BASE/llvm-project/compiler-rt"
+  ninja install-compiler-rt
+}
+
+build_musl_headers() {
+  fetch "$BASE/musl" "$MUSL_URL"
+  cd "$BASE/musl"
+  make clean
+  CC=${TOOLCHAIN_BIN}/hexagon-unknown-linux-musl-clang \
+    CROSS_COMPILE=hexagon-unknown-linux-musl \
+    LIBCC=${HEX_TOOLS_TARGET_BASE}/lib/libclang_rt.builtins-hexagon.a \
+    CROSS_CFLAGS="-G0 -O0 -mv65 -fno-builtin -fno-rounding-math --target=hexagon-unknown-linux-musl" \
+    ./configure --target=hexagon --prefix=${HEX_TOOLS_TARGET_BASE}
+  PATH=${TOOLCHAIN_BIN}:$PATH make CROSS_COMPILE= install-headers
+
+  cd ${HEX_SYSROOT}/..
+  ln -sf hexagon-unknown-linux-musl hexagon
+}
+
+build_kernel_headers() {
+  fetch "$BASE/linux" "$LINUX_URL"
+  mkdir -p "$BASE/build-linux"
+  cd "$BASE/linux"
+  make O=../build-linux ARCH=hexagon \
+   KBUILD_CFLAGS_KERNEL="-mlong-calls" \
+   CC=${TOOLCHAIN_BIN}/hexagon-unknown-linux-musl-clang \
+   LD=${TOOLCHAIN_BIN}/ld.lld \
+   KBUILD_VERBOSE=1 comet_defconfig
+  make mrproper
+
+  cd "$BASE/build-linux"
+  make \
+    ARCH=hexagon \
+    CC=${TOOLCHAIN_BIN}/clang \
+    INSTALL_HDR_PATH=${HEX_TOOLS_TARGET_BASE} \
+    V=1 \
+    headers_install
+}
+
+build_musl() {
+  cd "$BASE/musl"
+  make clean
+  CROSS_COMPILE=hexagon-unknown-linux-musl- \
+    AR=llvm-ar \
+    RANLIB=llvm-ranlib \
+    STRIP=llvm-strip \
+    CC=clang \
+    LIBCC=${HEX_TOOLS_TARGET_BASE}/lib/libclang_rt.builtins-hexagon.a \
+    CFLAGS="-G0 -O0 -mv65 -fno-builtin -fno-rounding-math --target=hexagon-unknown-linux-musl" \
+    ./configure --target=hexagon --prefix=${HEX_TOOLS_TARGET_BASE}
+  PATH=${TOOLCHAIN_BIN}/:$PATH make CROSS_COMPILE= install
+  cd ${HEX_TOOLS_TARGET_BASE}/lib
+  ln -sf libc.so ld-musl-hexagon.so
+  ln -sf ld-musl-hexagon.so ld-musl-hexagon.so.1
+  cdp ${HEX_TOOLS_TARGET_BASE}/../lib
+  ln -sf ../usr/lib/ld-musl-hexagon.so.1
+}
+
+build_llvm_clang
+build_kernel_headers
+build_musl_headers
+build_clang_rt
+build_musl
diff --git a/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh b/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh
new file mode 100755
index 00000000000..23ec0aa9a72
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+set -e
+
+TARGET=microblaze-linux-musl
+LINUX_ARCH=microblaze
+
+J=$(expr $(nproc) / 2)
+TOOLCHAIN_INSTALL=/usr/local
+TOOLCHAIN_BIN=${TOOLCHAIN_INSTALL}/bin
+CROSS_SYSROOT=${TOOLCHAIN_INSTALL}/$TARGET/sys-root
+
+export PATH=${TOOLCHAIN_BIN}:$PATH
+
+#
+# Grab all of the source for the toolchain bootstrap.
+#
+
+wget https://ftp.gnu.org/gnu/binutils/binutils-2.37.tar.xz
+wget https://ftp.gnu.org/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.xz
+wget https://www.musl-libc.org/releases/musl-1.2.2.tar.gz
+wget https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.70.tar.xz
+
+tar axf binutils-2.37.tar.xz
+tar axf gcc-11.2.0.tar.xz
+tar axf musl-1.2.2.tar.gz
+tar axf linux-5.10.70.tar.xz
+
+mv binutils-2.37 src-binu
+mv gcc-11.2.0 src-gcc
+mv musl-1.2.2 src-musl
+mv linux-5.10.70 src-linux
+
+mkdir -p bld-hdr bld-binu bld-gcc bld-musl
+mkdir -p ${CROSS_SYSROOT}/usr/include
+
+#
+# Install kernel headers
+#
+
+cd src-linux
+make headers_install ARCH=${LINUX_ARCH} INSTALL_HDR_PATH=${CROSS_SYSROOT}/usr
+cd ..
+
+#
+# Build binutils
+#
+
+cd bld-binu
+../src-binu/configure --disable-werror \
+  --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET}
+make -j${J}
+make install
+cd ..
+
+#
+# Build gcc, just the compiler so far.
+#
+
+cd bld-gcc
+../src-gcc/configure --disable-werror --disable-shared \
+  --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} \
+  --enable-languages=c --disable-libssp --disable-libsanitizer \
+  --disable-libatomic --disable-libgomp --disable-libquadmath
+make -j${J} all-gcc
+make install-gcc
+cd ..
+
+#
+# Build musl.
+# We won't go through the extra step of building shared libraries
+# because we don't actually use them in QEMU docker testing.
+#
+
+cd bld-musl
+../src-musl/configure --prefix=/usr --host=${TARGET} --disable-shared
+make -j${j}
+make install DESTDIR=${CROSS_SYSROOT}
+cd ..
+
+#
+# Go back and build the compiler runtime
+#
+
+cd bld-gcc
+make -j${j}
+make install
+cd ..
diff --git a/tests/docker/dockerfiles/debian-native.docker b/tests/docker/dockerfiles/debian-native.docker
new file mode 100644
index 00000000000..efd55cb6e0e
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-native.docker
@@ -0,0 +1,49 @@
+#
+# Docker Debian Native
+#
+# This this intended to build QEMU on native host systems. Debian is
+# chosen due to the broadest range on supported host systems for QEMU.
+#
+# This docker target is based on the docker.io Debian Bullseye base
+# image rather than QEMU's base because we would otherwise confuse the
+# build grabbing stuff from the registry built for other
+# architectures.
+#
+FROM docker.io/library/debian:bullseye-slim
+MAINTAINER Alex Bennée <alex.bennee@linaro.org>
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/^deb\ /deb-src /" >> /etc/apt/sources.list
+
+# Install common build utilities
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata
+
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt build-dep -yy --arch-only qemu
+
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt install -y --no-install-recommends \
+        cscope \
+        genisoimage \
+        exuberant-ctags \
+        global \
+        libbz2-dev \
+        liblzo2-dev \
+        libgcrypt20-dev \
+        libfdt-dev \
+        librdmacm-dev \
+        libsasl2-dev \
+        libsnappy-dev \
+        libvte-dev \
+        netcat-openbsd \
+        ninja-build \
+        openssh-client \
+        python3-numpy \
+        python3-opencv \
+        python3-venv
+
+ENV QEMU_CONFIGURE_OPTS $QEMU_CONFIGURE_OPTS
+ENV DEF_TARGET_LIST "none"
diff --git a/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh b/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh
new file mode 100755
index 00000000000..ba3c9d8affa
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+set -e
+
+TARGET=nios2-linux-gnu
+LINUX_ARCH=nios2
+
+J=$(expr $(nproc) / 2)
+TOOLCHAIN_INSTALL=/usr/local
+TOOLCHAIN_BIN=${TOOLCHAIN_INSTALL}/bin
+CROSS_SYSROOT=${TOOLCHAIN_INSTALL}/$TARGET/sys-root
+
+export PATH=${TOOLCHAIN_BIN}:$PATH
+
+#
+# Grab all of the source for the toolchain bootstrap.
+#
+
+wget https://ftp.gnu.org/gnu/binutils/binutils-2.37.tar.xz
+wget https://ftp.gnu.org/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.xz
+wget https://ftp.gnu.org/gnu/glibc/glibc-2.34.tar.xz
+wget https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.70.tar.xz
+
+tar axf binutils-2.37.tar.xz
+tar axf gcc-11.2.0.tar.xz
+tar axf glibc-2.34.tar.xz
+tar axf linux-5.10.70.tar.xz
+
+mv binutils-2.37 src-binu
+mv gcc-11.2.0 src-gcc
+mv glibc-2.34 src-glibc
+mv linux-5.10.70 src-linux
+
+mkdir -p bld-hdr bld-binu bld-gcc bld-glibc
+mkdir -p ${CROSS_SYSROOT}/usr/include
+
+#
+# Install kernel and glibc headers
+#
+
+cd src-linux
+make headers_install ARCH=${LINUX_ARCH} INSTALL_HDR_PATH=${CROSS_SYSROOT}/usr
+cd ..
+
+cd bld-hdr
+../src-glibc/configure --prefix=/usr --host=${TARGET}
+make install-headers DESTDIR=${CROSS_SYSROOT}
+touch ${CROSS_SYSROOT}/usr/include/gnu/stubs.h
+cd ..
+
+#
+# Build binutils
+#
+
+cd bld-binu
+../src-binu/configure --disable-werror \
+  --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET}
+make -j${J}
+make install
+cd ..
+
+#
+# Build gcc, without shared libraries, because we do not yet
+# have a shared libc against which to link.
+#
+
+cd bld-gcc
+../src-gcc/configure --disable-werror --disable-shared \
+  --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} \
+  --enable-languages=c --disable-libssp --disable-libsanitizer \
+  --disable-libatomic --disable-libgomp --disable-libquadmath
+make -j${J}
+make install
+cd ..
+
+#
+# Build glibc
+# There are a few random things that use c++ but we didn't build that
+# cross-compiler.  We can get away without them.  Disable CXX so that
+# glibc doesn't try to use the host c++ compiler.
+#
+
+cd bld-glibc
+CXX=false ../src-glibc/configure --prefix=/usr --host=${TARGET}
+make -j${j}
+make install DESTDIR=${CROSS_SYSROOT}
+cd ..
diff --git a/tests/docker/dockerfiles/debian-powerpc-cross.docker b/tests/docker/dockerfiles/debian-powerpc-cross.docker
deleted file mode 100644
index 07e1789650b..00000000000
--- a/tests/docker/dockerfiles/debian-powerpc-cross.docker
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# Docker powerpc cross-compiler target
-#
-# This docker target builds on the debian Buster base image.
-#
-FROM qemu/debian10
-
-RUN apt update && \
-    DEBIAN_FRONTEND=noninteractive eatmydata \
-    apt install -y --no-install-recommends \
-        gcc-powerpc-linux-gnu \
-        libc6-dev-powerpc-cross
diff --git a/tests/docker/dockerfiles/debian-powerpc-test-cross.docker b/tests/docker/dockerfiles/debian-powerpc-test-cross.docker
new file mode 100644
index 00000000000..36b336f709f
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-powerpc-test-cross.docker
@@ -0,0 +1,17 @@
+#
+# Docker powerpc/ppc64/ppc64le cross-compiler target
+#
+# This docker target builds on the debian Bullseye base image.
+#
+FROM qemu/debian11
+
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt install -y --no-install-recommends \
+        gcc-powerpc-linux-gnu \
+        libc6-dev-powerpc-cross \
+        gcc-10-powerpc64-linux-gnu \
+        libc6-dev-ppc64-cross \
+        gcc-10-powerpc64le-linux-gnu \
+        libc6-dev-ppc64el-cross
+
diff --git a/tests/docker/dockerfiles/debian-ppc64-cross.docker b/tests/docker/dockerfiles/debian-ppc64-cross.docker
deleted file mode 100644
index 8efe68874ed..00000000000
--- a/tests/docker/dockerfiles/debian-ppc64-cross.docker
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Docker ppc64 cross-compiler target
-#
-# This docker target builds on the debian Buster base image.
-FROM qemu/debian10
-
-RUN apt update && \
-    DEBIAN_FRONTEND=noninteractive eatmydata \
-    apt install -y --no-install-recommends \
-       gcc-powerpc64-linux-gnu \
-       libc6-dev-ppc64-cross
diff --git a/tests/docker/dockerfiles/debian-riscv64-cross.docker b/tests/docker/dockerfiles/debian-riscv64-cross.docker
index 2bbff19772a..594d97982c1 100644
--- a/tests/docker/dockerfiles/debian-riscv64-cross.docker
+++ b/tests/docker/dockerfiles/debian-riscv64-cross.docker
@@ -1,12 +1,48 @@
 #
-# Docker cross-compiler target
+# Docker cross-compiler target for riscv64
 #
-# This docker target builds on the debian Buster base image.
+# Currently the only distro that gets close to cross compiling riscv64
+# images is Debian Sid (with unofficial ports). As this is a moving
+# target we keep the library list minimal and are aiming to migrate
+# from this hack as soon as we are able.
 #
-FROM qemu/debian10
+FROM docker.io/library/debian:sid-slim
+
+# Add ports
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
+    DEBIAN_FRONTEND=noninteractive eatmydata apt update -yy && \
+    DEBIAN_FRONTEND=noninteractive eatmydata apt upgrade -yy
+
+# Install common build utilities
+RUN DEBIAN_FRONTEND=noninteractive eatmydata apt install -yy \
+    bc \
+    build-essential \
+    ca-certificates \
+    debian-ports-archive-keyring \
+    dpkg-dev \
+    gettext \
+    git \
+    ninja-build \
+    pkg-config \
+    python3
+
+# Add ports and riscv64 architecture
+RUN echo "deb http://ftp.ports.debian.org/debian-ports/ sid main" >> /etc/apt/sources.list
+RUN dpkg --add-architecture riscv64
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/^deb\ /deb-src /" >> /etc/apt/sources.list
 
 RUN apt update && \
     DEBIAN_FRONTEND=noninteractive eatmydata \
     apt install -y --no-install-recommends \
-        gcc-riscv64-linux-gnu \
-        libc6-dev-riscv64-cross
+         gcc-riscv64-linux-gnu \
+         libc6-dev-riscv64-cross \
+         libffi-dev:riscv64 \
+         libglib2.0-dev:riscv64 \
+         libpixman-1-dev:riscv64
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=riscv64-linux-gnu-
+ENV DEF_TARGET_LIST riscv64-softmmu,riscv64-linux-user
diff --git a/tests/docker/dockerfiles/debian-toolchain.docker b/tests/docker/dockerfiles/debian-toolchain.docker
new file mode 100644
index 00000000000..738d808aa65
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-toolchain.docker
@@ -0,0 +1,36 @@
+#
+# Docker toolchain cross-compiler
+#
+# This dockerfile is used for building a cross-compiler toolchain.
+# The script for building the toolchain is supplied via extra-files.
+#
+FROM qemu/debian10
+
+# Install build utilities for building gcc and glibc.
+# ??? The build-dep isn't working, missing a number of
+# minimal build dependiencies, e.g. libmpc.
+
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt install -y --no-install-recommends \
+        bison \
+        flex \
+        gawk \
+        libmpc-dev \
+        libmpfr-dev \
+        rsync \
+        texinfo \
+        wget && \
+    DEBIAN_FRONTEND=noninteractive eatmydata \
+    apt build-dep -yy --arch-only gcc glibc
+
+ADD build-toolchain.sh /root/build-toolchain.sh
+
+RUN cd /root && ./build-toolchain.sh
+
+# Throw away the extra toolchain build deps, the downloaded source,
+# and the build trees by restoring the original debian10 image,
+# then copying the built toolchain from stage 0.
+FROM qemu/debian10
+COPY --from=0 /usr/local /usr/local
diff --git a/tests/docker/dockerfiles/debian-tricore-cross.docker b/tests/docker/dockerfiles/debian-tricore-cross.docker
index 985925134c6..d8df2c61170 100644
--- a/tests/docker/dockerfiles/debian-tricore-cross.docker
+++ b/tests/docker/dockerfiles/debian-tricore-cross.docker
@@ -1,23 +1,47 @@
 #
 # Docker TriCore cross-compiler target
 #
-# This docker target builds on the debian Stretch base image.
+# This docker target builds on the Debian Buster base image but
+# doesn't inherit from the common one to avoid bringing in unneeded
+# dependencies.
 #
 # Copyright (c) 2018 Philippe Mathieu-Daudé
 #
 # SPDX-License-Identifier: GPL-2.0-or-later
 #
-FROM qemu/debian10
+FROM docker.io/library/debian:buster-slim
 
 MAINTAINER Philippe Mathieu-Daudé <f4bug@amsat.org>
 
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
+    DEBIAN_FRONTEND=noninteractive eatmydata apt install -yy \
+       bzip2 \
+       ca-certificates \
+       ccache \
+       g++ \
+       gcc \
+       git \
+       libglib2.0-dev \
+       libpixman-1-dev \
+       libtest-harness-perl \
+       locales \
+       make \
+       ninja-build \
+       perl-base \
+       pkgconf \
+       python3-pip \
+       python3-setuptools \
+       python3-wheel
+
 RUN git clone --single-branch \
         https://github.com/bkoppelmann/tricore-binutils.git \
         /usr/src/binutils && \
     cd /usr/src/binutils && chmod +x missing && \
-    CFLAGS=-w ./configure --prefix=/usr --disable-nls --target=tricore && \
+    CFLAGS=-w ./configure --prefix=/usr/local --disable-nls --target=tricore && \
     make && make install && \
     rm -rf /usr/src/binutils
 
-# This image isn't designed for building QEMU but building tests
-ENV QEMU_CONFIGURE_OPTS --disable-system --disable-user
+# This image can only build a very minimal QEMU as well as the tests
+ENV DEF_TARGET_LIST tricore-softmmu
+ENV QEMU_CONFIGURE_OPTS --disable-user --disable-tools --disable-fdt
diff --git a/tests/docker/dockerfiles/debian-xtensa-cross.docker b/tests/docker/dockerfiles/debian-xtensa-cross.docker
index ba4148299c5..2f11b3b7bc2 100644
--- a/tests/docker/dockerfiles/debian-xtensa-cross.docker
+++ b/tests/docker/dockerfiles/debian-xtensa-cross.docker
@@ -5,7 +5,7 @@
 # using a prebuilt toolchains for Xtensa cores from:
 # https://github.com/foss-xtensa/toolchain/releases
 #
-FROM debian:stretch-slim
+FROM docker.io/library/debian:stretch-slim
 
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \
diff --git a/tests/docker/dockerfiles/debian10.docker b/tests/docker/dockerfiles/debian10.docker
index d034acbd256..b414af1b9f7 100644
--- a/tests/docker/dockerfiles/debian10.docker
+++ b/tests/docker/dockerfiles/debian10.docker
@@ -7,7 +7,7 @@
 # On its own you can't build much but the docker-foo-cross targets
 # build on top of the base debian image.
 #
-FROM debian:buster-slim
+FROM docker.io/library/debian:buster-slim
 
 # Duplicate deb line as deb-src
 RUN cat /etc/apt/sources.list | sed "s/^deb\ /deb-src /" >> /etc/apt/sources.list
@@ -26,12 +26,12 @@ RUN apt update && \
         gdb-multiarch \
         gettext \
         git \
+        libffi-dev \
         libncurses5-dev \
         ninja-build \
         pkg-config \
         psmisc \
         python3 \
         python3-sphinx \
+        python3-sphinx-rtd-theme \
         $(apt-get -s build-dep --arch-only qemu | egrep ^Inst | fgrep '[all]' | cut -d\  -f2)
-
-ENV FEATURES docs
diff --git a/tests/docker/dockerfiles/debian11.docker b/tests/docker/dockerfiles/debian11.docker
index 5adfd62d553..febf884f8fd 100644
--- a/tests/docker/dockerfiles/debian11.docker
+++ b/tests/docker/dockerfiles/debian11.docker
@@ -8,7 +8,7 @@
 # On its own you can't build much but the docker-foo-cross targets
 # build on top of the base debian image.
 #
-FROM debian:bullseye-slim
+FROM docker.io/library/debian:bullseye-slim
 
 # Duplicate deb line as deb-src
 RUN cat /etc/apt/sources.list | sed "s/^deb\ /deb-src /" >> /etc/apt/sources.list
diff --git a/tests/docker/dockerfiles/fedora-cris-cross.docker b/tests/docker/dockerfiles/fedora-cris-cross.docker
index 1dfff6e0b96..91c373fdd3e 100644
--- a/tests/docker/dockerfiles/fedora-cris-cross.docker
+++ b/tests/docker/dockerfiles/fedora-cris-cross.docker
@@ -2,7 +2,7 @@
 # Cross compiler for cris system tests
 #
 
-FROM fedora:33
+FROM registry.fedoraproject.org/fedora:33
 ENV PACKAGES gcc-cris-linux-gnu
 RUN dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker
index 966072c08e6..13328e6081f 100644
--- a/tests/docker/dockerfiles/fedora-i386-cross.docker
+++ b/tests/docker/dockerfiles/fedora-i386-cross.docker
@@ -1,10 +1,14 @@
-FROM fedora:33
+FROM registry.fedoraproject.org/fedora:34
+
 ENV PACKAGES \
     bzip2 \
+    ccache \
     diffutils \
     findutils \
     gcc \
     git \
+    libffi-devel.i686 \
+    libselinux-devel.i686 \
     libtasn1-devel.i686 \
     libzstd-devel.i686 \
     make \
@@ -15,12 +19,14 @@ ENV PACKAGES \
     glibc-static.i686 \
     gnutls-devel.i686 \
     nettle-devel.i686 \
+    pcre-devel.i686 \
     perl-Test-Harness \
     pixman-devel.i686 \
+    sysprof-capture-devel.i686 \
     zlib-devel.i686
 
-ENV QEMU_CONFIGURE_OPTS --extra-cflags=-m32 --disable-vhost-user
-ENV PKG_CONFIG_PATH /usr/lib/pkgconfig
+ENV QEMU_CONFIGURE_OPTS --cpu=i386 --disable-vhost-user
+ENV PKG_CONFIG_LIBDIR /usr/lib/pkgconfig
 
-RUN dnf install -y $PACKAGES
+RUN dnf update -y && dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/fedora-win32-cross.docker b/tests/docker/dockerfiles/fedora-win32-cross.docker
index 81b5659e9c5..aad39dd97ff 100644
--- a/tests/docker/dockerfiles/fedora-win32-cross.docker
+++ b/tests/docker/dockerfiles/fedora-win32-cross.docker
@@ -1,9 +1,10 @@
-FROM fedora:33
+FROM registry.fedoraproject.org/fedora:33
 
 # Please keep this list sorted alphabetically
 ENV PACKAGES \
     bc \
     bzip2 \
+    ccache \
     diffutils \
     findutils \
     gcc \
@@ -18,9 +19,11 @@ ENV PACKAGES \
     mingw32-gmp \
     mingw32-gnutls \
     mingw32-gtk3 \
+    mingw32-libffi \
     mingw32-libjpeg-turbo \
     mingw32-libpng \
     mingw32-libtasn1 \
+    mingw32-libusbx \
     mingw32-nettle \
     mingw32-nsis \
     mingw32-pixman \
@@ -35,7 +38,6 @@ ENV PACKAGES \
 
 RUN dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
-ENV FEATURES mingw
 
 # Specify the cross prefix for this image (see tests/docker/common.rc)
 ENV QEMU_CONFIGURE_OPTS --cross-prefix=i686-w64-mingw32-
diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker
index bcb428e7242..9a224a619bd 100644
--- a/tests/docker/dockerfiles/fedora-win64-cross.docker
+++ b/tests/docker/dockerfiles/fedora-win64-cross.docker
@@ -1,9 +1,10 @@
-FROM fedora:33
+FROM registry.fedoraproject.org/fedora:33
 
 # Please keep this list sorted alphabetically
 ENV PACKAGES \
     bc \
     bzip2 \
+    ccache \
     diffutils \
     findutils \
     gcc \
@@ -12,14 +13,17 @@ ENV PACKAGES \
     hostname \
     make \
     meson \
+    mingw32-nsis \
     mingw64-bzip2 \
     mingw64-curl \
     mingw64-glib2 \
     mingw64-gmp \
     mingw64-gtk3 \
+    mingw64-libffi \
     mingw64-libjpeg-turbo \
     mingw64-libpng \
     mingw64-libtasn1 \
+    mingw64-libusbx \
     mingw64-pixman \
     mingw64-pkg-config \
     perl \
@@ -31,7 +35,6 @@ ENV PACKAGES \
 
 RUN dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
-ENV FEATURES mingw
 
 # Specify the cross prefix for this image (see tests/docker/common.rc)
 ENV QEMU_CONFIGURE_OPTS --cross-prefix=x86_64-w64-mingw32- --disable-capstone
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 915fdc1845e..c6fd7e1113d 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,90 +1,86 @@
-FROM fedora:33
+FROM registry.fedoraproject.org/fedora:33
 
 # Please keep this list sorted alphabetically
 ENV PACKAGES \
+    SDL2-devel \
+    SDL2_image-devel \
+    alsa-lib-devel \
     bc \
     brlapi-devel \
     bzip2 \
     bzip2-devel \
+    ca-certificates \
     capstone-devel \
     ccache \
     clang \
+    ctags \
     cyrus-sasl-devel \
+    daxctl-devel \
     dbus-daemon \
     device-mapper-multipath-devel \
     diffutils \
     findutils \
     gcc \
     gcc-c++ \
+    gcovr \
     genisoimage \
     gettext \
     git \
     glib2-devel \
+    glibc-langpack-en \
+    glibc-static \
     glusterfs-api-devel \
     gnutls-devel \
     gtk3-devel \
     hostname \
+    jemalloc-devel \
     libaio-devel \
     libasan \
     libattr-devel \
-    libblockdev-mpath-devel \
+    libbpf-devel \
+    libcacard-devel \
     libcap-ng-devel \
     libcurl-devel \
+    libdrm-devel \
     libepoxy-devel \
     libfdt-devel \
+    libffi-devel \
+    libgcrypt-devel \
     libiscsi-devel \
     libjpeg-devel \
+    libnfs-devel \
     libpmem-devel \
     libpng-devel \
     librbd-devel \
     libseccomp-devel \
+    libselinux-devel \
     libslirp-devel \
     libssh-devel \
+    libtasn1-devel \
     libubsan \
     libudev-devel \
+    liburing-devel \
     libusbx-devel \
     libxml2-devel \
     libzstd-devel \
     llvm \
+    lttng-ust-devel \
     lzo-devel \
     make \
+    mesa-libgbm-devel \
     meson \
-    mingw32-bzip2 \
-    mingw32-curl \
-    mingw32-glib2 \
-    mingw32-gmp \
-    mingw32-gnutls \
-    mingw32-gtk3 \
-    mingw32-libjpeg-turbo \
-    mingw32-libpng \
-    mingw32-libtasn1 \
-    mingw32-nettle \
-    mingw32-nsis \
-    mingw32-pixman \
-    mingw32-pkg-config \
-    mingw32-SDL2 \
-    mingw64-bzip2 \
-    mingw64-curl \
-    mingw64-glib2 \
-    mingw64-gmp \
-    mingw64-gnutls \
-    mingw64-gtk3 \
-    mingw64-libjpeg-turbo \
-    mingw64-libpng \
-    mingw64-libtasn1 \
-    mingw64-nettle \
-    mingw64-pixman \
-    mingw64-pkg-config \
-    mingw64-SDL2 \
-    nmap-ncat \
     ncurses-devel \
     nettle-devel \
     ninja-build \
-    nss-devel \
+    nmap-ncat \
     numactl-devel \
-    perl \
+    openssh-clients \
+    pam-devel \
     perl-Test-Harness \
+    perl-base \
     pixman-devel \
+    pkgconfig \
+    pulseaudio-libs-devel \
     python3 \
     python3-PyYAML \
     python3-numpy \
@@ -92,26 +88,31 @@ ENV PACKAGES \
     python3-pillow \
     python3-pip \
     python3-sphinx \
+    python3-sphinx_rtd_theme \
     python3-virtualenv \
     rdma-core-devel \
-    SDL2-devel \
+    rpm \
+    sed \
     snappy-devel \
     sparse \
+    spice-protocol \
     spice-server-devel \
     systemd-devel \
     systemtap-sdt-devel \
     tar \
     tesseract \
     tesseract-langpack-eng \
+    texinfo \
     usbredir-devel \
+    util-linux \
     virglrenderer-devel \
     vte291-devel \
     which \
     xen-devel \
+    xfsprogs-devel \
     zlib-devel
 ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
 
 RUN dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
 ENV PATH $PATH:/usr/libexec/python3-sphinx/
-ENV FEATURES mingw clang pyyaml asan docs
diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker
index 0e64893e4a5..3bbdb67f4fa 100644
--- a/tests/docker/dockerfiles/opensuse-leap.docker
+++ b/tests/docker/dockerfiles/opensuse-leap.docker
@@ -1,53 +1,112 @@
-FROM opensuse/leap:15.2
+FROM registry.opensuse.org/opensuse/leap:15.2
 
 # Please keep this list sorted alphabetically
 ENV PACKAGES \
+    Mesa-devel \
+    alsa-lib-devel \
     bc \
     brlapi-devel \
     bzip2 \
+    ca-certificates \
+    ccache \
+    clang \
+    ctags \
     cyrus-sasl-devel \
+    dbus-1 \
+    diffutils \
+    findutils \
     gcc \
     gcc-c++ \
-    mkisofs \
+    gcovr \
     gettext-runtime \
     git \
     glib2-devel \
+    glibc-locale \
+    glibc-static \
     glusterfs-devel \
-    libgnutls-devel \
     gtk3-devel \
+    hostname \
+    jemalloc-devel \
+    libSDL2-devel \
+    libSDL2_image-devel \
     libaio-devel \
+    libasan6 \
     libattr-devel \
+    libbpf-devel \
+    libbz2-devel \
+    libcacard-devel \
     libcap-ng-devel \
+    libcurl-devel \
+    libdrm-devel \
     libepoxy-devel \
     libfdt-devel \
+    libffi-devel \
+    libgcrypt-devel \
+    libgnutls-devel \
     libiscsi-devel \
     libjpeg8-devel \
+    libndctl-devel \
+    libnettle-devel \
+    libnfs-devel \
+    libnuma-devel \
+    libpixman-1-0-devel \
     libpmem-devel \
     libpng16-devel \
+    libpulse-devel \
     librbd-devel \
     libseccomp-devel \
+    libselinux-devel \
+    libspice-server-devel \
     libssh-devel \
+    libtasn1-devel \
+    libubsan1 \
+    libudev-devel \
+    libusb-1_0-devel \
+    libxml2-devel \
+    libzstd-devel \
+    llvm \
+    lttng-ust-devel \
     lzo-devel \
     make \
-    libSDL2_image-devel \
+    mkisofs \
+    ncat \
     ncurses-devel \
     ninja \
-    libnuma-devel \
-    perl \
-    libpixman-1-0-devel \
+    openssh \
+    pam-devel \
+    perl-Test-Harness \
+    perl-base \
+    pkgconfig \
+    python3-Pillow \
+    python3-PyYAML \
+    python3-Sphinx \
     python3-base \
+    python3-numpy \
+    python3-opencv \
+    python3-pip \
+    python3-setuptools \
+    python3-sphinx_rtd_theme \
     python3-virtualenv \
+    python3-wheel \
     rdma-core-devel \
-    libSDL2-devel \
+    rpm \
+    sed \
     snappy-devel \
-    libspice-server-devel \
+    sparse \
+    spice-protocol-devel \
     systemd-devel \
     systemtap-sdt-devel \
     tar \
+    tesseract-ocr \
+    tesseract-ocr-traineddata-english \
+    texinfo \
     usbredir-devel \
+    util-linux \
     virglrenderer-devel \
-    xen-devel \
     vte-devel \
+    which \
+    xen-devel \
+    xfsprogs-devel \
     zlib-devel
 ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3.6
 
diff --git a/tests/docker/dockerfiles/python.docker b/tests/docker/dockerfiles/python.docker
new file mode 100644
index 00000000000..56d88417df4
--- /dev/null
+++ b/tests/docker/dockerfiles/python.docker
@@ -0,0 +1,18 @@
+# Python library testing environment
+
+FROM fedora:latest
+MAINTAINER John Snow <jsnow@redhat.com>
+
+# Please keep this list sorted alphabetically
+ENV PACKAGES \
+    gcc \
+    make \
+    pipenv \
+    python3 \
+    python3-pip \
+    python3-tox \
+    python3-virtualenv \
+    python3.10
+
+RUN dnf install -y $PACKAGES
+RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
index b5ef7a8198a..f0e0180d215 100644
--- a/tests/docker/dockerfiles/ubuntu.docker
+++ b/tests/docker/dockerfiles/ubuntu.docker
@@ -9,7 +9,7 @@
 # system won't pick up that it has changed.
 #
 
-FROM ubuntu:20.04
+FROM docker.io/library/ubuntu:20.04
 ENV PACKAGES \
     ccache \
     clang \
@@ -28,6 +28,7 @@ ENV PACKAGES \
     libdrm-dev \
     libepoxy-dev \
     libfdt-dev \
+    libffi-dev \
     libgbm-dev \
     libgnutls28-dev \
     libgtk-3-dev \
@@ -39,7 +40,6 @@ ENV PACKAGES \
     libncurses5-dev \
     libncursesw5-dev \
     libnfs-dev \
-    libnss3-dev \
     libnuma-dev \
     libpixman-1-dev \
     libpng-dev \
@@ -63,9 +63,9 @@ ENV PACKAGES \
     ninja-build \
     python3-yaml \
     python3-sphinx \
+    python3-sphinx-rtd-theme \
     sparse \
     xfslibs-dev
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get -y install $PACKAGES
 RUN dpkg -l $PACKAGES | sort > /packages.txt
-ENV FEATURES clang pyyaml sdl2 docs
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
index 9b0a19ba5ee..450fd06d0d5 100644
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -1,60 +1,117 @@
-FROM ubuntu:18.04
+FROM docker.io/library/ubuntu:18.04
 ENV PACKAGES \
+    bc \
+    bsdmainutils \
+    bzip2 \
+    ca-certificates \
     ccache \
     clang \
+    dbus \
+    debianutils \
+    diffutils \
+    exuberant-ctags \
+    findutils \
+    g++ \
     gcc \
+    gcovr \
+    genisoimage \
     gettext \
     git \
     glusterfs-common \
+    hostname \
     libaio-dev \
+    libasan5 \
+    libasound2-dev \
     libattr1-dev \
     libbrlapi-dev \
     libbz2-dev \
+    libc6-dev \
     libcacard-dev \
     libcap-ng-dev \
+    libcapstone-dev \
     libcurl4-gnutls-dev \
+    libdaxctl-dev \
     libdrm-dev \
     libepoxy-dev \
     libfdt-dev \
+    libffi-dev \
     libgbm-dev \
+    libgcrypt20-dev \
+    libglib2.0-dev \
+    libgnutls28-dev \
     libgtk-3-dev \
     libibverbs-dev \
     libiscsi-dev \
     libjemalloc-dev \
     libjpeg-turbo8-dev \
+    liblttng-ust-dev \
     liblzo2-dev \
-    libncurses5-dev \
     libncursesw5-dev \
     libnfs-dev \
-    libnss3-dev \
     libnuma-dev \
+    libpam0g-dev \
     libpixman-1-dev \
-    librados-dev \
+    libpmem-dev \
+    libpng-dev \
+    libpulse-dev \
     librbd-dev \
     librdmacm-dev \
     libsasl2-dev \
     libsdl2-dev \
+    libsdl2-image-dev \
     libseccomp-dev \
+    libselinux-dev \
     libsnappy-dev \
     libspice-protocol-dev \
     libspice-server-dev \
     libssh-dev \
+    libsystemd-dev \
+    libtasn1-6-dev \
+    libtest-harness-perl \
+    libubsan1 \
+    libudev-dev \
     libusb-1.0-0-dev \
     libusbredirhost-dev \
     libvdeplug-dev \
+    libvirglrenderer-dev \
     libvte-2.91-dev \
     libxen-dev \
+    libxml2-dev \
     libzstd-dev \
+    llvm \
+    locales \
     make \
-    python3-yaml \
-    python3-sphinx \
+    multipath-tools \
+    netcat-openbsd \
+    nettle-dev \
     ninja-build \
+    openssh-client \
+    perl-base \
+    pkgconf \
+    python3 \
+    python3-numpy \
+    python3-opencv \
+    python3-pillow \
+    python3-pip \
+    python3-setuptools \
+    python3-sphinx \
+    python3-sphinx-rtd-theme \
+    python3-venv \
+    python3-wheel \
+    python3-yaml \
+    rpm2cpio \
+    sed \
     sparse \
-    xfslibs-dev
+    systemtap-sdt-dev \
+    tar \
+    tesseract-ocr \
+    tesseract-ocr-eng \
+    texinfo \
+    xfslibs-dev \
+    zlib1g-dev
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get -y install $PACKAGES
 RUN dpkg -l $PACKAGES | sort > /packages.txt
-ENV FEATURES clang pyyaml sdl2 docs
 
 # https://bugs.launchpad.net/qemu/+bug/1838763
 ENV QEMU_CONFIGURE_OPTS --disable-libssh
diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker
index 9750016e51d..15a026be091 100644
--- a/tests/docker/dockerfiles/ubuntu2004.docker
+++ b/tests/docker/dockerfiles/ubuntu2004.docker
@@ -1,25 +1,44 @@
-FROM ubuntu:20.04
-ENV PACKAGES flex bison \
+FROM docker.io/library/ubuntu:20.04
+ENV PACKAGES \
+    bc \
     bsdmainutils \
+    bzip2 \
+    ca-certificates \
     ccache \
-    clang-10\
+    clang \
+    dbus \
+    debianutils \
+    diffutils \
+    exuberant-ctags \
+    findutils \
+    g++ \
     gcc \
     gcovr \
     genisoimage \
     gettext \
     git \
-    glusterfs-common \
+    hostname \
     libaio-dev \
+    libasan5 \
+    libasound2-dev \
     libattr1-dev \
     libbrlapi-dev \
     libbz2-dev \
+    libc6-dev \
     libcacard-dev \
     libcap-ng-dev \
+    libcapstone-dev \
     libcurl4-gnutls-dev \
+    libdaxctl-dev \
     libdrm-dev \
     libepoxy-dev \
     libfdt-dev \
+    libffi-dev \
     libgbm-dev \
+    libgcrypt20-dev \
+    libglib2.0-dev \
+    libglusterfs-dev \
+    libgnutls28-dev \
     libgtk-3-dev \
     libibverbs-dev \
     libiscsi-dev \
@@ -27,49 +46,73 @@ ENV PACKAGES flex bison \
     libjpeg-turbo8-dev \
     liblttng-ust-dev \
     liblzo2-dev \
-    libncurses5-dev \
     libncursesw5-dev \
     libnfs-dev \
-    libnss3-dev \
     libnuma-dev \
+    libpam0g-dev \
     libpixman-1-dev \
-    librados-dev \
+    libpmem-dev \
+    libpng-dev \
+    libpulse-dev \
     librbd-dev \
     librdmacm-dev \
     libsasl2-dev \
     libsdl2-dev \
+    libsdl2-image-dev \
     libseccomp-dev \
+    libselinux-dev \
     libslirp-dev \
     libsnappy-dev \
     libspice-protocol-dev \
     libspice-server-dev \
     libssh-dev \
+    libsystemd-dev \
+    libtasn1-6-dev \
+    libtest-harness-perl \
+    libubsan1 \
+    libudev-dev \
     libusb-1.0-0-dev \
     libusbredirhost-dev \
     libvdeplug-dev \
+    libvirglrenderer-dev \
     libvte-2.91-dev \
     libxen-dev \
+    libxml2-dev \
     libzstd-dev \
+    llvm \
+    locales \
     make \
-    netcat-openbsd \
+    multipath-tools \
+    ncat \
+    nettle-dev \
     ninja-build \
+    openssh-client \
+    perl-base \
+    pkgconf \
+    python3 \
     python3-numpy \
     python3-opencv \
-    python3-pil \
+    python3-pillow \
     python3-pip \
+    python3-setuptools \
     python3-sphinx \
+    python3-sphinx-rtd-theme \
     python3-venv \
+    python3-wheel \
     python3-yaml \
     rpm2cpio \
+    sed \
     sparse \
+    systemtap-sdt-dev \
+    tar \
     tesseract-ocr \
     tesseract-ocr-eng \
-    xfslibs-dev\
-    vim
+    texinfo \
+    xfslibs-dev \
+    zlib1g-dev
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get -y install $PACKAGES
 RUN dpkg -l $PACKAGES | sort > /packages.txt
-ENV FEATURES clang tsan pyyaml sdl2
 
 # Apply patch https://reviews.llvm.org/D75820
 # This is required for TSan in clang-10 to compile with QEMU.
diff --git a/tests/docker/run b/tests/docker/run
index 8edc7026ee3..421393046b7 100755
--- a/tests/docker/run
+++ b/tests/docker/run
@@ -30,9 +30,6 @@ mkdir -p $TEST_DIR/{src,build,install}
 
 # Extract the source tarballs
 tar -C $TEST_DIR/src -xf $BASE/qemu.tar || { echo "Failed to untar source"; exit 2; }
-if test -f $TEST_DIR/src/Makefile; then
-    export FEATURES="$FEATURES dtc"
-fi
 
 if test -n "$SHOW_ENV"; then
     if test -f /packages.txt; then
diff --git a/tests/docker/test-clang b/tests/docker/test-clang
index 8c51ead5181..b57e0119d93 100755
--- a/tests/docker/test-clang
+++ b/tests/docker/test-clang
@@ -13,7 +13,7 @@
 
 . common.rc
 
-requires clang
+requires_binary clang
 
 cd "$BUILD_DIR"
 
diff --git a/tests/docker/test-debug b/tests/docker/test-debug
index c050fa0d938..f52f16328ca 100755
--- a/tests/docker/test-debug
+++ b/tests/docker/test-debug
@@ -14,7 +14,7 @@
 
 . common.rc
 
-requires clang asan
+requires_binary clang
 
 cd "$BUILD_DIR"
 
diff --git a/tests/docker/test-mingw b/tests/docker/test-mingw
index c30eb654eb7..0bc6d788720 100755
--- a/tests/docker/test-mingw
+++ b/tests/docker/test-mingw
@@ -13,7 +13,8 @@
 
 . common.rc
 
-requires mingw dtc
+requires_binary x86_64-w64-mingw32-gcc
+requires_binary i686-w64-mingw32-gcc
 
 cd "$BUILD_DIR"
 
diff --git a/tests/docker/test-misc b/tests/docker/test-misc
index cc94a738dd0..2a3c2c2e1cf 100755
--- a/tests/docker/test-misc
+++ b/tests/docker/test-misc
@@ -14,7 +14,7 @@
 
 . common.rc
 
-requires docs
+requires_binary sphinx-build-3 sphinx-build
 
 cd "$BUILD_DIR"
 
diff --git a/tests/docker/test-tsan b/tests/docker/test-tsan
index eb40ac45b7a..53d90d2f79f 100755
--- a/tests/docker/test-tsan
+++ b/tests/docker/test-tsan
@@ -17,7 +17,7 @@
 
 setup_tsan()
 {
-    requires clang tsan
+    requires_binary clang
     tsan_log_dir="/tmp/qemu-test/build/tsan"
     mkdir -p $tsan_log_dir > /dev/null || true
     EXTRA_CONFIGURE_OPTS="${EXTRA_CONFIGURE_OPTS} --enable-tsan \
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
index 4ba5e1d2d4d..c24baf85350 100644
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -14,6 +14,7 @@
 #include <math.h>
 #include <fenv.h>
 #include "qemu/timer.h"
+#include "qemu/int128.h"
 #include "fpu/softfloat.h"
 
 /* amortize the computation of random inputs */
@@ -50,8 +51,10 @@ static const char * const op_names[] = {
 enum precision {
     PREC_SINGLE,
     PREC_DOUBLE,
+    PREC_QUAD,
     PREC_FLOAT32,
     PREC_FLOAT64,
+    PREC_FLOAT128,
     PREC_MAX_NR,
 };
 
@@ -89,6 +92,7 @@ union fp {
     double d;
     float32 f32;
     float64 f64;
+    float128 f128;
     uint64_t u64;
 };
 
@@ -113,6 +117,10 @@ struct op_desc {
 static uint64_t random_ops[MAX_OPERANDS] = {
     SEED_A, SEED_B, SEED_C,
 };
+
+static float128 random_quad_ops[MAX_OPERANDS] = {
+    {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
+};
 static float_status soft_status;
 static enum precision precision;
 static enum op operation;
@@ -141,25 +149,45 @@ static void update_random_ops(int n_ops, enum precision prec)
     int i;
 
     for (i = 0; i < n_ops; i++) {
-        uint64_t r = random_ops[i];
 
         switch (prec) {
         case PREC_SINGLE:
         case PREC_FLOAT32:
+        {
+            uint64_t r = random_ops[i];
             do {
                 r = xorshift64star(r);
             } while (!float32_is_normal(r));
+            random_ops[i] = r;
             break;
+        }
         case PREC_DOUBLE:
         case PREC_FLOAT64:
+        {
+            uint64_t r = random_ops[i];
             do {
                 r = xorshift64star(r);
             } while (!float64_is_normal(r));
+            random_ops[i] = r;
             break;
+        }
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+        {
+            float128 r = random_quad_ops[i];
+            uint64_t hi = r.high;
+            uint64_t lo = r.low;
+            do {
+                hi = xorshift64star(hi);
+                lo = xorshift64star(lo);
+                r = make_float128(hi, lo);
+            } while (!float128_is_normal(r));
+            random_quad_ops[i] = r;
+            break;
+        }
         default:
             g_assert_not_reached();
         }
-        random_ops[i] = r;
     }
 }
 
@@ -184,6 +212,13 @@ static void fill_random(union fp *ops, int n_ops, enum precision prec,
                 ops[i].f64 = float64_chs(ops[i].f64);
             }
             break;
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+            ops[i].f128 = random_quad_ops[i];
+            if (no_neg && float128_is_neg(ops[i].f128)) {
+                ops[i].f128 = float128_chs(ops[i].f128);
+            }
+            break;
         default:
             g_assert_not_reached();
         }
@@ -345,6 +380,41 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
                 }
             }
             break;
+        case PREC_FLOAT128:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float128 a = ops[0].f128;
+                float128 b = ops[1].f128;
+                float128 c = ops[2].f128;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f128 = float128_add(a, b, &soft_status);
+                    break;
+                case OP_SUB:
+                    res.f128 = float128_sub(a, b, &soft_status);
+                    break;
+                case OP_MUL:
+                    res.f128 = float128_mul(a, b, &soft_status);
+                    break;
+                case OP_DIV:
+                    res.f128 = float128_div(a, b, &soft_status);
+                    break;
+                case OP_FMA:
+                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
+                    break;
+                case OP_SQRT:
+                    res.f128 = float128_sqrt(a, &soft_status);
+                    break;
+                case OP_CMP:
+                    res.u64 = float128_compare_quiet(a, b, &soft_status);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
         default:
             g_assert_not_reached();
         }
@@ -369,7 +439,8 @@ static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
     GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
     GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
     GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
-    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
+    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
 
 GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
 GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
@@ -383,7 +454,8 @@ GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
     GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
     GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
     GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
-    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
 
 GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
 #undef GEN_BENCH_ALL_TYPES_NO_NEG
@@ -397,6 +469,7 @@ GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
         [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
         [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
         [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
+        [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
     }
 
 static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
@@ -445,7 +518,7 @@ static void usage_complete(int argc, char *argv[])
     fprintf(stderr, " -h = show this help message.\n");
     fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
             op_list, op_names[0]);
-    fprintf(stderr, " -p = floating point precision (single, double). "
+    fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
             "Default: single\n");
     fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
             "Default: even\n");
@@ -565,6 +638,8 @@ static void parse_args(int argc, char *argv[])
                 precision = PREC_SINGLE;
             } else if (!strcmp(optarg, "double")) {
                 precision = PREC_DOUBLE;
+            } else if (!strcmp(optarg, "quad")) {
+                precision = PREC_QUAD;
             } else {
                 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
                 exit(EXIT_FAILURE);
@@ -608,6 +683,9 @@ static void parse_args(int argc, char *argv[])
         case PREC_DOUBLE:
             precision = PREC_FLOAT64;
             break;
+        case PREC_QUAD:
+            precision = PREC_FLOAT128;
+            break;
         default:
             g_assert_not_reached();
         }
diff --git a/tests/fp/fp-test-log2.c b/tests/fp/fp-test-log2.c
new file mode 100644
index 00000000000..4eae93eb7cc
--- /dev/null
+++ b/tests/fp/fp-test-log2.c
@@ -0,0 +1,118 @@
+/*
+ * fp-test-log2.c - test QEMU's softfloat log2
+ *
+ * Copyright (C) 2020, Linaro, Ltd.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef HW_POISON_H
+#error Must define HW_POISON_H to work around TARGET_* poisoning
+#endif
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include <math.h>
+#include "fpu/softfloat.h"
+
+typedef union {
+    double d;
+    float64 i;
+} ufloat64;
+
+static int errors;
+
+static void compare(ufloat64 test, ufloat64 real, ufloat64 soft, bool exact)
+{
+    int msb;
+    uint64_t ulp = UINT64_MAX;
+
+    if (real.i == soft.i) {
+        return;
+    }
+    msb = 63 - __builtin_clzll(real.i ^ soft.i);
+
+    if (msb < 52) {
+        if (real.i > soft.i) {
+            ulp = real.i - soft.i;
+        } else {
+            ulp = soft.i - real.i;
+        }
+    }
+
+    /* glibc allows 3 ulp error in its libm-test-ulps; allow 4 here */
+    if (!exact && ulp <= 4) {
+        return;
+    }
+
+    printf("test: %016" PRIx64 "  %+.13a\n"
+           "  sf: %016" PRIx64 "  %+.13a\n"
+           "libm: %016" PRIx64 "  %+.13a\n",
+           test.i, test.d, soft.i, soft.d, real.i, real.d);
+
+    if (msb == 63) {
+        printf("Error in sign!\n\n");
+    } else if (msb >= 52) {
+        printf("Error in exponent: %d\n\n",
+               (int)(soft.i >> 52) - (int)(real.i >> 52));
+    } else {
+        printf("Error in fraction: %" PRIu64 " ulp\n\n", ulp);
+    }
+
+    if (++errors == 20) {
+        exit(1);
+    }
+}
+
+int main(int ac, char **av)
+{
+    ufloat64 test, real, soft;
+    float_status qsf = {0};
+    int i;
+
+    set_float_rounding_mode(float_round_nearest_even, &qsf);
+
+    test.d = 0.0;
+    real.d = -__builtin_inf();
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 1.0;
+    real.d = 0.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 2.0;
+    real.d = 1.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 4.0;
+    real.d = 2.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 0x1p64;
+    real.d = 64.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = __builtin_inf();
+    real.d = __builtin_inf();
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    for (i = 0; i < 10000; ++i) {
+        test.d = drand48() + 1.0;    /* [1.0, 2.0) */
+        real.d = log2(test.d);
+        soft.i = float64_log2(test.i, &qsf);
+        compare(test, real, soft, false);
+
+        test.d = drand48() * 100;    /* [0.0, 100) */
+        real.d = log2(test.d);
+        soft.i = float64_log2(test.i, &qsf);
+        compare(test, real, soft, false);
+    }
+
+    return 0;
+}
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
index 5a4cad8c8b2..352dd71c44f 100644
--- a/tests/fp/fp-test.c
+++ b/tests/fp/fp-test.c
@@ -717,7 +717,7 @@ static void do_testfloat(int op, int rmode, bool exact)
         test_abz_f128(true_abz_f128M, subj_abz_f128M);
         break;
     case F128_MULADD:
-        not_implemented();
+        test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
         break;
     case F128_SQRT:
         test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
@@ -963,18 +963,21 @@ static void QEMU_NORETURN run_test(void)
             verCases_usesExact = !!(attrs & FUNC_ARG_EXACT);
 
             for (k = 0; k < 3; k++) {
-                int prec80 = 32;
+                FloatX80RoundPrec qsf_prec80 = floatx80_precision_x;
+                int prec80 = 80;
                 int l;
 
                 if (k == 1) {
                     prec80 = 64;
+                    qsf_prec80 = floatx80_precision_d;
                 } else if (k == 2) {
-                    prec80 = 80;
+                    prec80 = 32;
+                    qsf_prec80 = floatx80_precision_s;
                 }
 
                 verCases_roundingPrecision = 0;
                 slow_extF80_roundingPrecision = prec80;
-                qsf.floatx80_rounding_precision = prec80;
+                qsf.floatx80_rounding_precision = qsf_prec80;
 
                 if (attrs & FUNC_EFF_ROUNDINGPRECISION) {
                     verCases_roundingPrecision = prec80;
diff --git a/tests/fp/meson.build b/tests/fp/meson.build
index 1dba5d3d228..8916544e128 100644
--- a/tests/fp/meson.build
+++ b/tests/fp/meson.build
@@ -559,7 +559,9 @@ softfloat_conv_tests = {
                       'extF80_to_f64 extF80_to_f128 ' +
                       'f128_to_f16',
     'int-to-float': 'i32_to_f16 i64_to_f16 i32_to_f32 i64_to_f32 ' +
-                    'i32_to_f64 i64_to_f64 i32_to_f128 i64_to_f128',
+                    'i32_to_f64 i64_to_f64 ' +
+                    'i32_to_extF80 i64_to_extF80 ' +
+                    'i32_to_f128 i64_to_f128',
     'uint-to-float': 'ui32_to_f16 ui64_to_f16 ui32_to_f32 ui64_to_f32 ' +
                      'ui32_to_f64 ui64_to_f64 ui64_to_f128 ' +
                      'ui32_to_extF80 ui64_to_extF80',
@@ -584,7 +586,7 @@ softfloat_conv_tests = {
                      'extF80_to_ui64 extF80_to_ui64_r_minMag ' +
                      'f128_to_ui64 f128_to_ui64_r_minMag',
     'round-to-integer': 'f16_roundToInt f32_roundToInt ' +
-                        'f64_roundToInt f128_roundToInt'
+                        'f64_roundToInt extF80_roundToInt f128_roundToInt'
 }
 softfloat_tests = {
     'eq_signaling' : 'compare',
@@ -605,24 +607,20 @@ fptest_args = ['-s', '-l', '1']
 fptest_rounding_args = ['-r', 'all']
 
 # Conversion Routines:
-# FIXME: i32_to_extF80 (broken), i64_to_extF80 (broken)
-#        extF80_roundToInt (broken)
 foreach k, v : softfloat_conv_tests
   test('fp-test-' + k, fptest,
        args: fptest_args + fptest_rounding_args + v.split(),
        suite: ['softfloat', 'softfloat-conv'])
 endforeach
 
-# FIXME: extF80_{lt_quiet, rem} (broken),
-#        extF80_{mulAdd} (missing)
 foreach k, v : softfloat_tests
-  extF80_broken = ['lt_quiet', 'rem'].contains(k)
   test('fp-test-' + k, fptest,
        args: fptest_args + fptest_rounding_args +
-             ['f16_' + k, 'f32_' + k, 'f64_' + k, 'f128_' + k] +
-             (extF80_broken ? [] : ['extF80_' + k]),
+             ['f16_' + k, 'f32_' + k, 'f64_' + k, 'f128_' + k, 'extF80_' + k],
        suite: ['softfloat', 'softfloat-' + v])
 endforeach
+
+# FIXME: extF80_{mulAdd} (missing)
 test('fp-test-mulAdd', fptest,
      # no fptest_rounding_args
      args: fptest_args +
@@ -637,3 +635,14 @@ fpbench = executable(
   include_directories: [sfinc, include_directories(tfdir)],
   c_args: fpcflags,
 )
+
+fptestlog2 = executable(
+  'fp-test-log2',
+  ['fp-test-log2.c', '../../fpu/softfloat.c'],
+  link_with: [libsoftfloat],
+  dependencies: [qemuutil],
+  include_directories: [sfinc],
+  c_args: fpcflags,
+)
+test('fp-test-log2', fptestlog2,
+     suite: ['softfloat', 'softfloat-ops'])
diff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc
index 0cbd20013e7..9ff884c140b 100644
--- a/tests/fp/wrap.c.inc
+++ b/tests/fp/wrap.c.inc
@@ -574,6 +574,18 @@ WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
 WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
 #undef WRAP_MULADD
 
+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
+                              const float128_t *cp, float128_t *res)
+{
+    float128 a, b, c, ret;
+
+    a = soft_to_qemu128(*ap);
+    b = soft_to_qemu128(*bp);
+    c = soft_to_qemu128(*cp);
+    ret = float128_muladd(a, b, c, 0, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
 #define WRAP_CMP16(name, func, retcond)         \
     static bool name(float16_t a, float16_t b)  \
     {                                           \
@@ -631,7 +643,7 @@ WRAP_CMP80(qemu_extF80M_eq, floatx80_eq_quiet)
 WRAP_CMP80(qemu_extF80M_le, floatx80_le)
 WRAP_CMP80(qemu_extF80M_lt, floatx80_lt)
 WRAP_CMP80(qemu_extF80M_le_quiet, floatx80_le_quiet)
-WRAP_CMP80(qemu_extF80M_lt_quiet, floatx80_le_quiet)
+WRAP_CMP80(qemu_extF80M_lt_quiet, floatx80_lt_quiet)
 #undef WRAP_CMP80
 
 #define WRAP_CMP128(name, func)                                         \
diff --git a/tests/meson.build b/tests/meson.build
index 55a7b082751..3f3882748ae 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -67,10 +67,6 @@ if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in confi
              dependencies: [qemuutil, vhost_user])
 endif
 
-if have_system and 'CONFIG_POSIX' in config_host
-  subdir('qemu-iotests')
-endif
-
 test('decodetree', sh,
      args: [ files('decode/check.sh'), config_host['PYTHON'], files('../scripts/decodetree.py') ],
      workdir: meson.current_source_dir() / 'decode',
diff --git a/tests/migration/guestperf/comparison.py b/tests/migration/guestperf/comparison.py
index ba2edbe546b..c03b3f6d7e9 100644
--- a/tests/migration/guestperf/comparison.py
+++ b/tests/migration/guestperf/comparison.py
@@ -121,4 +121,18 @@ def __init__(self, name, scenarios):
         Scenario("compr-xbzrle-cache-50",
                  compression_xbzrle=True, compression_xbzrle_cache=50),
     ]),
+
+
+    # Looking at effect of multifd with
+    # varying numbers of channels
+    Comparison("compr-multifd", scenarios = [
+        Scenario("compr-multifd-channels-4",
+                 multifd=True, multifd_channels=2),
+        Scenario("compr-multifd-channels-8",
+                 multifd=True, multifd_channels=8),
+        Scenario("compr-multifd-channels-32",
+                 multifd=True, multifd_channels=32),
+        Scenario("compr-multifd-channels-64",
+                 multifd=True, multifd_channels=64),
+    ]),
 ]
diff --git a/tests/migration/guestperf/engine.py b/tests/migration/guestperf/engine.py
index 6b49aed579e..87a6ab20090 100644
--- a/tests/migration/guestperf/engine.py
+++ b/tests/migration/guestperf/engine.py
@@ -113,7 +113,7 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
         vcpus = src.command("query-cpus-fast")
         src_threads = []
         for vcpu in vcpus:
-            src_threads.append(vcpu["thread_id"])
+            src_threads.append(vcpu["thread-id"])
 
         # XXX how to get dst timings on remote host ?
 
@@ -153,7 +153,7 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
                            max_bandwidth=scenario._bandwidth * 1024 * 1024)
 
         resp = src.command("migrate-set-parameters",
-                           downtime_limit=scenario._downtime / 1024.0)
+                           downtime_limit=scenario._downtime)
 
         if scenario._compression_mt:
             resp = src.command("migrate-set-capabilities",
@@ -188,6 +188,22 @@ def _migrate(self, hardware, scenario, src, dst, connect_uri):
                                    1024 * 1024 * 1024 / 100 *
                                    scenario._compression_xbzrle_cache))
 
+        if scenario._multifd:
+            resp = src.command("migrate-set-capabilities",
+                               capabilities = [
+                                   { "capability": "multifd",
+                                     "state": True }
+                               ])
+            resp = src.command("migrate-set-parameters",
+                               multifd_channels=scenario._multifd_channels)
+            resp = dst.command("migrate-set-capabilities",
+                               capabilities = [
+                                   { "capability": "multifd",
+                                     "state": True }
+                               ])
+            resp = dst.command("migrate-set-parameters",
+                               multifd_channels=scenario._multifd_channels)
+
         resp = src.command("migrate", uri=connect_uri)
 
         post_copy = False
@@ -407,7 +423,7 @@ def run(self, hardware, scenario, result_dir=os.getcwd()):
             progress_history = ret[0]
             qemu_timings = ret[1]
             vcpu_timings = ret[2]
-            if uri[0:5] == "unix:":
+            if uri[0:5] == "unix:" and os.path.exists(uri[5:]):
                 os.remove(uri[5:])
 
             if os.path.exists(srcmonaddr):
diff --git a/tests/migration/guestperf/scenario.py b/tests/migration/guestperf/scenario.py
index 28ef36c26db..de70d9b2f59 100644
--- a/tests/migration/guestperf/scenario.py
+++ b/tests/migration/guestperf/scenario.py
@@ -29,7 +29,8 @@ def __init__(self, name,
                  post_copy=False, post_copy_iters=5,
                  auto_converge=False, auto_converge_step=10,
                  compression_mt=False, compression_mt_threads=1,
-                 compression_xbzrle=False, compression_xbzrle_cache=10):
+                 compression_xbzrle=False, compression_xbzrle_cache=10,
+                 multifd=False, multifd_channels=2):
 
         self._name = name
 
@@ -56,6 +57,9 @@ def __init__(self, name,
         self._compression_xbzrle = compression_xbzrle
         self._compression_xbzrle_cache = compression_xbzrle_cache # percentage of guest RAM
 
+        self._multifd = multifd
+        self._multifd_channels = multifd_channels
+
     def serialize(self):
         return {
             "name": self._name,
@@ -73,6 +77,8 @@ def serialize(self):
             "compression_mt_threads": self._compression_mt_threads,
             "compression_xbzrle": self._compression_xbzrle,
             "compression_xbzrle_cache": self._compression_xbzrle_cache,
+            "multifd": self._multifd,
+            "multifd_channels": self._multifd_channels,
         }
 
     @classmethod
@@ -92,4 +98,6 @@ def deserialize(cls, data):
             data["compression_mt"],
             data["compression_mt_threads"],
             data["compression_xbzrle"],
-            data["compression_xbzrle_cache"])
+            data["compression_xbzrle_cache"],
+            data["multifd"],
+            data["multifd_channels"])
diff --git a/tests/migration/guestperf/shell.py b/tests/migration/guestperf/shell.py
index f838888809c..8a809e3dda0 100644
--- a/tests/migration/guestperf/shell.py
+++ b/tests/migration/guestperf/shell.py
@@ -122,6 +122,11 @@ def __init__(self):
         parser.add_argument("--compression-xbzrle", dest="compression_xbzrle", default=False, action="store_true")
         parser.add_argument("--compression-xbzrle-cache", dest="compression_xbzrle_cache", default=10, type=int)
 
+        parser.add_argument("--multifd", dest="multifd", default=False,
+                            action="store_true")
+        parser.add_argument("--multifd-channels", dest="multifd_channels",
+                            default=2, type=int)
+
     def get_scenario(self, args):
         return Scenario(name="perfreport",
                         downtime=args.downtime,
@@ -142,7 +147,10 @@ def get_scenario(self, args):
                         compression_mt_threads=args.compression_mt_threads,
 
                         compression_xbzrle=args.compression_xbzrle,
-                        compression_xbzrle_cache=args.compression_xbzrle_cache)
+                        compression_xbzrle_cache=args.compression_xbzrle_cache,
+
+                        multifd=args.multifd,
+                        multifd_channels=args.multifd_channels)
 
     def run(self, argv):
         args = self._parser.parse_args(argv)
diff --git a/tests/plugin/bb.c b/tests/plugin/bb.c
index de09bdde4e4..7d470a1011f 100644
--- a/tests/plugin/bb.c
+++ b/tests/plugin/bb.c
@@ -104,10 +104,17 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
 
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
-        if (g_strcmp0(opt, "inline") == 0) {
-            do_inline = true;
-        } else if (g_strcmp0(opt, "idle") == 0) {
-            idle_report = true;
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "inline") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "idle") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &idle_report)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c
index c253980ec8c..d229fdc0014 100644
--- a/tests/plugin/insn.c
+++ b/tests/plugin/insn.c
@@ -18,6 +18,8 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
 static uint64_t insn_count;
 static bool do_inline;
+static bool do_size;
+static GArray *sizes;
 
 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
 {
@@ -49,21 +51,62 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
                 insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS,
                 GUINT_TO_POINTER(vaddr));
         }
+
+        if (do_size) {
+            size_t sz = qemu_plugin_insn_size(insn);
+            if (sz > sizes->len) {
+                g_array_set_size(sizes, sz);
+            }
+            unsigned long *cnt = &g_array_index(sizes, unsigned long, sz);
+            (*cnt)++;
+        }
     }
 }
 
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
-    g_autofree gchar *out = g_strdup_printf("insns: %" PRIu64 "\n", insn_count);
-    qemu_plugin_outs(out);
+    g_autoptr(GString) out = g_string_new(NULL);
+
+    if (do_size) {
+        int i;
+        for (i = 0; i <= sizes->len; i++) {
+            unsigned long *cnt = &g_array_index(sizes, unsigned long, i);
+            if (*cnt) {
+                g_string_append_printf(out,
+                                       "len %d bytes: %ld insns\n", i, *cnt);
+            }
+        }
+    } else {
+        g_string_append_printf(out, "insns: %" PRIu64 "\n", insn_count);
+    }
+    qemu_plugin_outs(out->str);
 }
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
 {
-    if (argc && !strcmp(argv[0], "inline")) {
-        do_inline = true;
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "inline") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "sizes") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_size)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    if (do_size) {
+        sizes = g_array_new(true, true, sizeof(unsigned long));
     }
 
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
diff --git a/tests/plugin/mem.c b/tests/plugin/mem.c
index afd1d27e5ce..4570f7d8152 100644
--- a/tests/plugin/mem.c
+++ b/tests/plugin/mem.c
@@ -80,29 +80,40 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
 {
-    if (argc) {
-        if (argc >= 3) {
-            if (!strcmp(argv[2], "haddr")) {
-                do_haddr = true;
-            }
-        }
-        if (argc >= 2) {
-            const char *str = argv[1];
 
-            if (!strcmp(str, "r")) {
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+        if (g_strcmp0(tokens[0], "haddr") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_haddr)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "track") == 0) {
+            if (g_strcmp0(tokens[1], "r") == 0) {
                 rw = QEMU_PLUGIN_MEM_R;
-            } else if (!strcmp(str, "w")) {
+            } else if (g_strcmp0(tokens[1], "w") == 0) {
                 rw = QEMU_PLUGIN_MEM_W;
+            } else if (g_strcmp0(tokens[1], "rw") == 0) {
+                rw = QEMU_PLUGIN_MEM_RW;
+            } else {
+                fprintf(stderr, "invaild value for argument track: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "inline") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else if (g_strcmp0(tokens[0], "callback") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_callback)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
             }
-        }
-        if (!strcmp(argv[0], "inline")) {
-            do_inline = true;
-            do_callback = false;
-        } else if (!strcmp(argv[0], "both")) {
-            do_inline = true;
-            do_callback = true;
         } else {
-            do_callback = true;
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
         }
     }
 
diff --git a/tests/plugin/syscall.c b/tests/plugin/syscall.c
index 53ee2ab6c47..96040c578fc 100644
--- a/tests/plugin/syscall.c
+++ b/tests/plugin/syscall.c
@@ -16,32 +16,127 @@
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
+typedef struct {
+    int64_t num;
+    int64_t calls;
+    int64_t errors;
+} SyscallStats;
+
+static GMutex lock;
+static GHashTable *statistics;
+
+static SyscallStats *get_or_create_entry(int64_t num)
+{
+    SyscallStats *entry =
+        (SyscallStats *) g_hash_table_lookup(statistics, GINT_TO_POINTER(num));
+
+    if (!entry) {
+        entry = g_new0(SyscallStats, 1);
+        entry->num = num;
+        g_hash_table_insert(statistics, GINT_TO_POINTER(num), (gpointer) entry);
+    }
+
+    return entry;
+}
+
 static void vcpu_syscall(qemu_plugin_id_t id, unsigned int vcpu_index,
                          int64_t num, uint64_t a1, uint64_t a2,
                          uint64_t a3, uint64_t a4, uint64_t a5,
                          uint64_t a6, uint64_t a7, uint64_t a8)
 {
-    g_autofree gchar *out = g_strdup_printf("syscall #%" PRIi64 "\n", num);
-    qemu_plugin_outs(out);
+    if (statistics) {
+        SyscallStats *entry;
+        g_mutex_lock(&lock);
+        entry = get_or_create_entry(num);
+        entry->calls++;
+        g_mutex_unlock(&lock);
+    } else {
+        g_autofree gchar *out = g_strdup_printf("syscall #%" PRIi64 "\n", num);
+        qemu_plugin_outs(out);
+    }
 }
 
 static void vcpu_syscall_ret(qemu_plugin_id_t id, unsigned int vcpu_idx,
                              int64_t num, int64_t ret)
 {
-    g_autofree gchar *out;
-    out = g_strdup_printf("syscall #%" PRIi64 " returned -> %" PRIi64 "\n",
-            num, ret);
+    if (statistics) {
+        SyscallStats *entry;
+
+        g_mutex_lock(&lock);
+        /* Should always return an existent entry. */
+        entry = get_or_create_entry(num);
+        if (ret < 0) {
+            entry->errors++;
+        }
+        g_mutex_unlock(&lock);
+    } else {
+        g_autofree gchar *out = g_strdup_printf(
+             "syscall #%" PRIi64 " returned -> %" PRIi64 "\n", num, ret);
+        qemu_plugin_outs(out);
+    }
+}
+
+static void print_entry(gpointer val, gpointer user_data)
+{
+    SyscallStats *entry = (SyscallStats *) val;
+    int64_t syscall_num = entry->num;
+    g_autofree gchar *out = g_strdup_printf(
+        "%-13" PRIi64 "%-6" PRIi64 " %" PRIi64 "\n",
+        syscall_num, entry->calls, entry->errors);
     qemu_plugin_outs(out);
 }
 
+static gint comp_func(gconstpointer ea, gconstpointer eb)
+{
+    SyscallStats *ent_a = (SyscallStats *) ea;
+    SyscallStats *ent_b = (SyscallStats *) eb;
+
+    return ent_a->calls > ent_b->calls ? -1 : 1;
+}
+
 /* ************************************************************************* */
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    if (!statistics) {
+        return;
+    }
+
+    g_mutex_lock(&lock);
+    GList *entries = g_hash_table_get_values(statistics);
+    entries = g_list_sort(entries, comp_func);
+    qemu_plugin_outs("syscall no.  calls  errors\n");
 
-static void plugin_exit(qemu_plugin_id_t id, void *p) {}
+    g_list_foreach(entries, print_entry, NULL);
+
+    g_list_free(entries);
+    g_hash_table_destroy(statistics);
+    g_mutex_unlock(&lock);
+}
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
                                            const qemu_info_t *info,
                                            int argc, char **argv)
 {
+    bool do_print = false;
+
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+        if (g_strcmp0(tokens[0], "print") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_print)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+            }
+        } else {
+            fprintf(stderr, "unsupported argument: %s\n", argv[i]);
+            return -1;
+        }
+    }
+
+    if (!do_print) {
+        statistics = g_hash_table_new_full(NULL, g_direct_equal, NULL, g_free);
+    }
+
     qemu_plugin_register_vcpu_syscall_cb(id, vcpu_syscall);
     qemu_plugin_register_vcpu_syscall_ret_cb(id, vcpu_syscall_ret);
     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
diff --git a/tests/qapi-schema/alternate-branch-if-invalid.err b/tests/qapi-schema/alternate-branch-if-invalid.err
index d384929c51c..03bad877a3d 100644
--- a/tests/qapi-schema/alternate-branch-if-invalid.err
+++ b/tests/qapi-schema/alternate-branch-if-invalid.err
@@ -1,2 +1,2 @@
 alternate-branch-if-invalid.json: In alternate 'Alt':
-alternate-branch-if-invalid.json:2: 'if' condition ' ' of 'data' member 'branch' makes no sense
+alternate-branch-if-invalid.json:2: 'if' condition ' ' of 'data' member 'branch' is not a valid identifier
diff --git a/tests/qapi-schema/alternate-data-invalid.err b/tests/qapi-schema/alternate-data-invalid.err
new file mode 100644
index 00000000000..55f1033aef5
--- /dev/null
+++ b/tests/qapi-schema/alternate-data-invalid.err
@@ -0,0 +1,2 @@
+alternate-data-invalid.json: In alternate 'Alt':
+alternate-data-invalid.json:2: 'data' must be an object
diff --git a/tests/qapi-schema/alternate-data-invalid.json b/tests/qapi-schema/alternate-data-invalid.json
new file mode 100644
index 00000000000..7d5d9055811
--- /dev/null
+++ b/tests/qapi-schema/alternate-data-invalid.json
@@ -0,0 +1,4 @@
+# Alternate type requires an object for 'data'
+{ 'alternate': 'Alt',
+  'data': ['rubbish', 'nonsense']
+}
diff --git a/tests/qapi-schema/flat-union-base-union.out b/tests/qapi-schema/alternate-data-invalid.out
similarity index 100%
rename from tests/qapi-schema/flat-union-base-union.out
rename to tests/qapi-schema/alternate-data-invalid.out
diff --git a/tests/qapi-schema/args-union.err b/tests/qapi-schema/args-union.err
index 4bf49550278..4b80a99f745 100644
--- a/tests/qapi-schema/args-union.err
+++ b/tests/qapi-schema/args-union.err
@@ -1,2 +1,2 @@
 args-union.json: In command 'oops':
-args-union.json:3: command's 'data' can take union type 'Uni' only with 'boxed': true
+args-union.json:9: command's 'data' can take union type 'Uni' only with 'boxed': true
diff --git a/tests/qapi-schema/args-union.json b/tests/qapi-schema/args-union.json
index 2fcaeaae163..aabb1590632 100644
--- a/tests/qapi-schema/args-union.json
+++ b/tests/qapi-schema/args-union.json
@@ -1,3 +1,9 @@
 # use of union arguments requires 'boxed':true
-{ 'union': 'Uni', 'data': { 'case1': 'int', 'case2': 'str' } }
+{ 'enum': 'Enum', 'data': [ 'case1', 'case2' ] }
+{ 'struct': 'Case1', 'data': { 'data': 'int' } }
+{ 'struct': 'Case2', 'data': { 'data': 'str' } }
+{ 'union': 'Uni',
+  'base': { 'type': 'Enum' },
+  'discriminator': 'type',
+  'data': { 'case1': 'Case1', 'case2': 'Case2' } }
 { 'command': 'oops', 'data': 'Uni' }
diff --git a/tests/qapi-schema/bad-base.err b/tests/qapi-schema/bad-base.err
index 61a1efc2c06..1fad63e3925 100644
--- a/tests/qapi-schema/bad-base.err
+++ b/tests/qapi-schema/bad-base.err
@@ -1,2 +1,2 @@
 bad-base.json: In struct 'MyType':
-bad-base.json:3: 'base' requires a struct type, union type 'Union' isn't
+bad-base.json:9: 'base' requires a struct type, union type 'Union' isn't
diff --git a/tests/qapi-schema/bad-base.json b/tests/qapi-schema/bad-base.json
index a634331cdd8..8c773ff5449 100644
--- a/tests/qapi-schema/bad-base.json
+++ b/tests/qapi-schema/bad-base.json
@@ -1,3 +1,9 @@
 # we reject a base that is not a struct
-{ 'union': 'Union', 'data': { 'a': 'int', 'b': 'str' } }
+{ 'enum': 'Enum', 'data': [ 'a', 'b' ] }
+{ 'struct': 'Int', 'data': { 'data': 'int' } }
+{ 'struct': 'Str', 'data': { 'data': 'str' } }
+{ 'union': 'Union',
+  'base': { 'type': 'Enum' },
+  'discriminator': 'type',
+  'data': { 'a': 'Int', 'b': 'Str' } }
 { 'struct': 'MyType', 'base': 'Union', 'data': { 'c': 'int' } }
diff --git a/tests/qapi-schema/bad-if-all.err b/tests/qapi-schema/bad-if-all.err
new file mode 100644
index 00000000000..a04f6e70433
--- /dev/null
+++ b/tests/qapi-schema/bad-if-all.err
@@ -0,0 +1,2 @@
+bad-if-all.json: In struct 'TestIfStruct':
+bad-if-all.json:2: 'all' condition of struct must be an array
diff --git a/tests/qapi-schema/bad-if-all.json b/tests/qapi-schema/bad-if-all.json
new file mode 100644
index 00000000000..44837d39811
--- /dev/null
+++ b/tests/qapi-schema/bad-if-all.json
@@ -0,0 +1,3 @@
+# check 'if all' is not a list
+{ 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
+  'if': { 'all': 'ALL' } }
diff --git a/tests/qapi-schema/flat-union-clash-member.out b/tests/qapi-schema/bad-if-all.out
similarity index 100%
rename from tests/qapi-schema/flat-union-clash-member.out
rename to tests/qapi-schema/bad-if-all.out
diff --git a/tests/qapi-schema/bad-if-empty-list.json b/tests/qapi-schema/bad-if-empty-list.json
index 94f2eb86702..b62b5671df1 100644
--- a/tests/qapi-schema/bad-if-empty-list.json
+++ b/tests/qapi-schema/bad-if-empty-list.json
@@ -1,3 +1,3 @@
 # check empty 'if' list
 { 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
-  'if': [] }
+  'if': { 'all': [] } }
diff --git a/tests/qapi-schema/bad-if-empty.err b/tests/qapi-schema/bad-if-empty.err
index a0f3effefbd..5208f543ce8 100644
--- a/tests/qapi-schema/bad-if-empty.err
+++ b/tests/qapi-schema/bad-if-empty.err
@@ -1,2 +1,2 @@
 bad-if-empty.json: In struct 'TestIfStruct':
-bad-if-empty.json:2: 'if' condition '' of struct makes no sense
+bad-if-empty.json:2: 'if' condition '' of struct is not a valid identifier
diff --git a/tests/qapi-schema/bad-if-key.err b/tests/qapi-schema/bad-if-key.err
new file mode 100644
index 00000000000..38cf44b6875
--- /dev/null
+++ b/tests/qapi-schema/bad-if-key.err
@@ -0,0 +1,3 @@
+bad-if-key.json: In struct 'TestIfStruct':
+bad-if-key.json:2: 'if' condition of struct has unknown key 'value'
+Valid keys are 'all', 'any', 'not'.
diff --git a/tests/qapi-schema/bad-if-key.json b/tests/qapi-schema/bad-if-key.json
new file mode 100644
index 00000000000..64c74c13f20
--- /dev/null
+++ b/tests/qapi-schema/bad-if-key.json
@@ -0,0 +1,3 @@
+# check unknown 'if' dict key
+{ 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
+  'if': { 'value': 'defined(TEST_IF_STRUCT)' } }
diff --git a/tests/qapi-schema/flat-union-discriminator-bad-name.out b/tests/qapi-schema/bad-if-key.out
similarity index 100%
rename from tests/qapi-schema/flat-union-discriminator-bad-name.out
rename to tests/qapi-schema/bad-if-key.out
diff --git a/tests/qapi-schema/bad-if-keys.err b/tests/qapi-schema/bad-if-keys.err
new file mode 100644
index 00000000000..fe87bd30aca
--- /dev/null
+++ b/tests/qapi-schema/bad-if-keys.err
@@ -0,0 +1,2 @@
+bad-if-keys.json: In struct 'TestIfStruct':
+bad-if-keys.json:2: 'if' condition of struct has conflicting keys
diff --git a/tests/qapi-schema/bad-if-keys.json b/tests/qapi-schema/bad-if-keys.json
new file mode 100644
index 00000000000..9e2f39ae21b
--- /dev/null
+++ b/tests/qapi-schema/bad-if-keys.json
@@ -0,0 +1,3 @@
+# check multiple 'if' keys
+{ 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
+  'if': { 'any': ['ANY'], 'all': ['ALL'] } }
diff --git a/tests/qapi-schema/flat-union-empty.out b/tests/qapi-schema/bad-if-keys.out
similarity index 100%
rename from tests/qapi-schema/flat-union-empty.out
rename to tests/qapi-schema/bad-if-keys.out
diff --git a/tests/qapi-schema/bad-if-list.err b/tests/qapi-schema/bad-if-list.err
index c462f11b90b..334e8b845a9 100644
--- a/tests/qapi-schema/bad-if-list.err
+++ b/tests/qapi-schema/bad-if-list.err
@@ -1,2 +1,2 @@
 bad-if-list.json: In struct 'TestIfStruct':
-bad-if-list.json:2: 'if' condition ' ' of struct makes no sense
+bad-if-list.json:2: 'if' condition 'foo' of struct is not a valid identifier
diff --git a/tests/qapi-schema/bad-if-list.json b/tests/qapi-schema/bad-if-list.json
index ea3d95bb6b5..1fefef16a7b 100644
--- a/tests/qapi-schema/bad-if-list.json
+++ b/tests/qapi-schema/bad-if-list.json
@@ -1,3 +1,3 @@
 # check invalid 'if' content
 { 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
-  'if': ['foo', ' '] }
+  'if': { 'all': ['foo', ' '] } }
diff --git a/tests/qapi-schema/bad-if-not.err b/tests/qapi-schema/bad-if-not.err
new file mode 100644
index 00000000000..b33f5e16b80
--- /dev/null
+++ b/tests/qapi-schema/bad-if-not.err
@@ -0,0 +1,2 @@
+bad-if-not.json: In struct 'TestIfStruct':
+bad-if-not.json:2: 'if' condition '' of struct is not a valid identifier
diff --git a/tests/qapi-schema/bad-if-not.json b/tests/qapi-schema/bad-if-not.json
new file mode 100644
index 00000000000..9fdaacc47b7
--- /dev/null
+++ b/tests/qapi-schema/bad-if-not.json
@@ -0,0 +1,3 @@
+# check 'if not' with empy argument
+{ 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
+  'if': { 'not': '' } }
diff --git a/tests/qapi-schema/flat-union-inline-invalid-dict.out b/tests/qapi-schema/bad-if-not.out
similarity index 100%
rename from tests/qapi-schema/flat-union-inline-invalid-dict.out
rename to tests/qapi-schema/bad-if-not.out
diff --git a/tests/qapi-schema/bad-if.err b/tests/qapi-schema/bad-if.err
index f83dee65da1..ec373b213f7 100644
--- a/tests/qapi-schema/bad-if.err
+++ b/tests/qapi-schema/bad-if.err
@@ -1,2 +1,2 @@
 bad-if.json: In struct 'TestIfStruct':
-bad-if.json:2: 'if' condition of struct must be a string or a list of strings
+bad-if.json:2: 'if' condition of struct must be a string or an object
diff --git a/tests/qapi-schema/bad-if.json b/tests/qapi-schema/bad-if.json
index 3edd1a0bf27..2639e3c6617 100644
--- a/tests/qapi-schema/bad-if.json
+++ b/tests/qapi-schema/bad-if.json
@@ -1,3 +1,3 @@
 # check invalid 'if' type
 { 'struct': 'TestIfStruct', 'data': { 'foo': 'int' },
-  'if': { 'value': 'defined(TEST_IF_STRUCT)' } }
+  'if': ['TEST_IF_STRUCT'] }
diff --git a/tests/qapi-schema/doc-bad-feature.err b/tests/qapi-schema/doc-bad-feature.err
index e4c62adfa3e..49d1746c3d1 100644
--- a/tests/qapi-schema/doc-bad-feature.err
+++ b/tests/qapi-schema/doc-bad-feature.err
@@ -1 +1 @@
-doc-bad-feature.json:3: documented member 'a' does not exist
+doc-bad-feature.json:3: documented feature 'a' does not exist
diff --git a/tests/qapi-schema/doc-empty-symbol.err b/tests/qapi-schema/doc-empty-symbol.err
index 81b90e882a7..aa51be41b2d 100644
--- a/tests/qapi-schema/doc-empty-symbol.err
+++ b/tests/qapi-schema/doc-empty-symbol.err
@@ -1 +1 @@
-doc-empty-symbol.json:4:1: invalid name
+doc-empty-symbol.json:4:1: name required after '@'
diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json
index 423ea23e07d..74745fb4052 100644
--- a/tests/qapi-schema/doc-good.json
+++ b/tests/qapi-schema/doc-good.json
@@ -53,27 +53,34 @@
 
 ##
 # @Enum:
+#
 # @one: The _one_ {and only}
 #
 # Features:
 # @enum-feat: Also _one_ {and only}
+# @enum-member-feat: a member feature
 #
 # @two is undocumented
 ##
-{ 'enum': 'Enum', 'data':
-  [ { 'name': 'one', 'if': 'defined(IFONE)' }, 'two' ],
+{ 'enum': 'Enum',
+  'data': [ { 'name': 'one', 'if': 'IFONE',
+              'features': [ 'enum-member-feat' ] },
+            'two' ],
   'features': [ 'enum-feat' ],
-  'if': 'defined(IFCOND)' }
+  'if': 'IFCOND' }
 
 ##
 # @Base:
+#
 # @base1:
 # the first member
 ##
-{ 'struct': 'Base', 'data': { 'base1': 'Enum' } }
+{ 'struct': 'Base', 'data': { 'base1': 'Enum' },
+  'if': { 'all': ['IFALL1', 'IFALL2'] } }
 
 ##
 # @Variant1:
+#
 # A paragraph
 #
 # Another paragraph (but no @var: line)
@@ -86,15 +93,17 @@
   'features': [ 'variant1-feat' ],
   'data': { 'var1': { 'type': 'str',
                       'features': [ 'member-feat' ],
-                      'if': 'defined(IFSTR)' } } }
+                      'if': 'IFSTR' } } }
 
 ##
 # @Variant2:
+#
 ##
 { 'struct': 'Variant2', 'data': {} }
 
 ##
 # @Object:
+#
 # Features:
 # @union-feat1: a feature
 ##
@@ -102,19 +111,13 @@
   'features': [ 'union-feat1' ],
   'base': 'Base',
   'discriminator': 'base1',
-  'data': { 'one': 'Variant1', 'two': { 'type': 'Variant2', 'if': 'IFTWO' } } }
-
-##
-# @SugaredUnion:
-# Features:
-# @union-feat2: a feature
-##
-{ 'union': 'SugaredUnion',
-  'features': [ 'union-feat2' ],
-  'data': { 'one': 'Variant1', 'two': { 'type': 'Variant2', 'if': 'IFTWO' } } }
+  'data': { 'one': 'Variant1',
+            'two': { 'type': 'Variant2',
+                     'if': { 'any': ['IFONE', 'IFTWO'] } } } }
 
 ##
 # @Alternate:
+#
 # @i: an integer
 #     @b is undocumented
 #
@@ -123,7 +126,8 @@
 ##
 { 'alternate': 'Alternate',
   'features': [ 'alt-feat' ],
-  'data': { 'i': 'int', 'b': 'bool' } }
+  'data': { 'i': 'int', 'b': 'bool' },
+  'if': { 'not': { 'any': [ 'IFONE', 'IFTWO' ] } } }
 
 ##
 # == Another subsection
@@ -131,6 +135,7 @@
 
 ##
 # @cmd:
+#
 # @arg1: the first argument
 #
 # @arg2: the second
@@ -180,6 +185,7 @@
 
 ##
 # @EVT_BOXED:
+#
 # Features:
 # @feat3: a feature
 ##
diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out
index 8f54ceff2e0..9dd65b9d92e 100644
--- a/tests/qapi-schema/doc-good.out
+++ b/tests/qapi-schema/doc-good.out
@@ -12,15 +12,17 @@ enum QType
 module doc-good.json
 enum Enum
     member one
-        if ['defined(IFONE)']
+        if IFONE
+        feature enum-member-feat
     member two
-    if ['defined(IFCOND)']
+    if IFCOND
     feature enum-feat
 object Base
     member base1: Enum optional=False
+    if {'all': ['IFALL1', 'IFALL2']}
 object Variant1
     member var1: str optional=False
-        if ['defined(IFSTR)']
+        if IFSTR
         feature member-feat
     feature variant1-feat
 object Variant2
@@ -29,27 +31,13 @@ object Object
     tag base1
     case one: Variant1
     case two: Variant2
-        if ['IFTWO']
+        if {'any': ['IFONE', 'IFTWO']}
     feature union-feat1
-object q_obj_Variant1-wrapper
-    member data: Variant1 optional=False
-object q_obj_Variant2-wrapper
-    member data: Variant2 optional=False
-enum SugaredUnionKind
-    member one
-    member two
-        if ['IFTWO']
-object SugaredUnion
-    member type: SugaredUnionKind optional=False
-    tag type
-    case one: q_obj_Variant1-wrapper
-    case two: q_obj_Variant2-wrapper
-        if ['IFTWO']
-    feature union-feat2
 alternate Alternate
     tag type
     case i: int
     case b: bool
+    if {'not': {'any': ['IFONE', 'IFTWO']}}
     feature alt-feat
 object q_obj_cmd-arg
     member arg1: int optional=False
@@ -121,6 +109,8 @@ The _one_ {and only}
 
     feature=enum-feat
 Also _one_ {and only}
+    feature=enum-member-feat
+a member feature
     section=None
 @two is undocumented
 doc symbol=Base
@@ -147,13 +137,6 @@ doc symbol=Object
 
     feature=union-feat1
 a feature
-doc symbol=SugaredUnion
-    body=
-
-    arg=type
-
-    feature=union-feat2
-a feature
 doc symbol=Alternate
     body=
 
diff --git a/tests/qapi-schema/doc-good.txt b/tests/qapi-schema/doc-good.txt
index 726727af74e..b3b76bd43fd 100644
--- a/tests/qapi-schema/doc-good.txt
+++ b/tests/qapi-schema/doc-good.txt
@@ -43,7 +43,7 @@ Example:
 Values
 ~~~~~~
 
-"one" (**If: **"defined(IFONE)")
+"one" (**If: **"IFONE")
    The _one_ {and only}
 
 "two"
@@ -56,13 +56,16 @@ Features
 "enum-feat"
    Also _one_ {and only}
 
+"enum-member-feat"
+   a member feature
+
 "two" is undocumented
 
 
 If
 ~~
 
-"defined(IFCOND)"
+"IFCOND"
 
 
 "Base" (Object)
@@ -76,6 +79,12 @@ Members
    the first member
 
 
+If
+~~
+
+"IFALL1 and IFALL2"
+
+
 "Variant1" (Object)
 -------------------
 
@@ -87,7 +96,7 @@ Another paragraph (but no "var": line)
 Members
 ~~~~~~~
 
-"var1": "string" (**If: **"defined(IFSTR)")
+"var1": "string" (**If: **"IFSTR")
    Not documented
 
 
@@ -114,7 +123,8 @@ Members
 
 The members of "Base"
 The members of "Variant1" when "base1" is ""one""
-The members of "Variant2" when "base1" is ""two"" (**If: **"IFTWO")
+The members of "Variant2" when "base1" is ""two"" (**If: **"IFONE or
+IFTWO")
 
 Features
 ~~~~~~~~
@@ -123,26 +133,6 @@ Features
    a feature
 
 
-"SugaredUnion" (Object)
------------------------
-
-
-Members
-~~~~~~~
-
-"type"
-   One of "one", "two"
-
-"data": "Variant1" when "type" is ""one""
-"data": "Variant2" when "type" is ""two"" (**If: **"IFTWO")
-
-Features
-~~~~~~~~
-
-"union-feat2"
-   a feature
-
-
 "Alternate" (Alternate)
 -----------------------
 
@@ -164,6 +154,12 @@ Features
    a feature
 
 
+If
+~~
+
+"not (IFONE or IFTWO)"
+
+
 Another subsection
 ==================
 
diff --git a/tests/qapi-schema/double-type.err b/tests/qapi-schema/double-type.err
index 576e716197c..6a1e8a59903 100644
--- a/tests/qapi-schema/double-type.err
+++ b/tests/qapi-schema/double-type.err
@@ -1,3 +1 @@
-double-type.json: In struct 'Bar':
-double-type.json:2: struct has unknown key 'command'
-Valid keys are 'base', 'data', 'features', 'if', 'struct'.
+double-type.json:2: expression must have exactly one key 'enum', 'struct', 'union', 'alternate', 'command', 'event'
diff --git a/tests/qapi-schema/enum-dict-member-unknown.err b/tests/qapi-schema/enum-dict-member-unknown.err
index f8617ea1790..235cde0c49a 100644
--- a/tests/qapi-schema/enum-dict-member-unknown.err
+++ b/tests/qapi-schema/enum-dict-member-unknown.err
@@ -1,3 +1,3 @@
 enum-dict-member-unknown.json: In enum 'MyEnum':
 enum-dict-member-unknown.json:2: 'data' member has unknown key 'bad-key'
-Valid keys are 'if', 'name'.
+Valid keys are 'features', 'if', 'name'.
diff --git a/tests/qapi-schema/enum-dict-no-name.err b/tests/qapi-schema/enum-dict-no-name.err
new file mode 100644
index 00000000000..3ce0c16987d
--- /dev/null
+++ b/tests/qapi-schema/enum-dict-no-name.err
@@ -0,0 +1,2 @@
+enum-dict-no-name.json: In enum 'Enum':
+enum-dict-no-name.json:2: 'data' member misses key 'name'
diff --git a/tests/qapi-schema/enum-dict-no-name.json b/tests/qapi-schema/enum-dict-no-name.json
new file mode 100644
index 00000000000..5952a8662e3
--- /dev/null
+++ b/tests/qapi-schema/enum-dict-no-name.json
@@ -0,0 +1,2 @@
+# enum member lacking a name
+{ 'enum': 'Enum', 'data': [ {} ] }
diff --git a/tests/qapi-schema/flat-union-int-branch.out b/tests/qapi-schema/enum-dict-no-name.out
similarity index 100%
rename from tests/qapi-schema/flat-union-int-branch.out
rename to tests/qapi-schema/enum-dict-no-name.out
diff --git a/tests/qapi-schema/enum-if-invalid.err b/tests/qapi-schema/enum-if-invalid.err
index 0556dc967be..2b2bbffb65b 100644
--- a/tests/qapi-schema/enum-if-invalid.err
+++ b/tests/qapi-schema/enum-if-invalid.err
@@ -1,2 +1,3 @@
 enum-if-invalid.json: In enum 'TestIfEnum':
-enum-if-invalid.json:2: 'if' condition of 'data' member 'bar' must be a string or a list of strings
+enum-if-invalid.json:2: 'if' condition of 'data' member 'bar' has unknown key 'val'
+Valid keys are 'all', 'any', 'not'.
diff --git a/tests/qapi-schema/enum-if-invalid.json b/tests/qapi-schema/enum-if-invalid.json
index 60bd0ef1d71..6bd20041f33 100644
--- a/tests/qapi-schema/enum-if-invalid.json
+++ b/tests/qapi-schema/enum-if-invalid.json
@@ -1,3 +1,3 @@
 # check invalid 'if' type
-{ 'enum': 'TestIfEnum', 'data':
-  [ 'foo', { 'name' : 'bar', 'if': { 'val': 'foo' } } ] }
+{ 'enum': 'TestIfEnum',
+  'data': [ 'foo', { 'name' : 'bar', 'if': { 'val': 'foo' } } ] }
diff --git a/tests/qapi-schema/features-if-invalid.err b/tests/qapi-schema/features-if-invalid.err
index f63b89535e0..0ce7b6fcdf9 100644
--- a/tests/qapi-schema/features-if-invalid.err
+++ b/tests/qapi-schema/features-if-invalid.err
@@ -1,2 +1,2 @@
 features-if-invalid.json: In struct 'Stru':
-features-if-invalid.json:2: 'if' condition of 'features' member 'f' must be a string or a list of strings
+features-if-invalid.json:2: 'if' condition of 'features' member 'f' must be a string or an object
diff --git a/tests/qapi-schema/features-missing-name.json b/tests/qapi-schema/features-missing-name.json
index 2314f97c003..8772c8f7b32 100644
--- a/tests/qapi-schema/features-missing-name.json
+++ b/tests/qapi-schema/features-missing-name.json
@@ -1,3 +1,3 @@
 { 'struct': 'FeatureStruct0',
   'data': { 'foo': 'int' },
-  'features': [ { 'if': 'defined(NAMELESS_FEATURES)' } ] }
+  'features': [ { 'if': 'NAMELESS_FEATURES' } ] }
diff --git a/tests/qapi-schema/flat-union-array-branch.err b/tests/qapi-schema/flat-union-array-branch.err
deleted file mode 100644
index 20a8ef14069..00000000000
--- a/tests/qapi-schema/flat-union-array-branch.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-array-branch.json: In union 'TestUnion':
-flat-union-array-branch.json:8: 'data' member 'value1' cannot be an array
diff --git a/tests/qapi-schema/flat-union-array-branch.json b/tests/qapi-schema/flat-union-array-branch.json
deleted file mode 100644
index 0b98820a8fe..00000000000
--- a/tests/qapi-schema/flat-union-array-branch.json
+++ /dev/null
@@ -1,12 +0,0 @@
-# we require flat union branches to be a struct
-{ 'enum': 'TestEnum',
-  'data': [ 'value1', 'value2' ] }
-{ 'struct': 'Base',
-  'data': { 'enum1': 'TestEnum' } }
-{ 'struct': 'TestTypeB',
-  'data': { 'integer': 'int' } }
-{ 'union': 'TestUnion',
-  'base': 'Base',
-  'discriminator': 'enum1',
-  'data': { 'value1': ['TestTypeB'],
-            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/flat-union-bad-base.err b/tests/qapi-schema/flat-union-bad-base.err
deleted file mode 100644
index e0a205a58ca..00000000000
--- a/tests/qapi-schema/flat-union-bad-base.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-bad-base.json: In union 'TestUnion':
-flat-union-bad-base.json:8: member 'string' of type 'TestTypeA' collides with base member 'string'
diff --git a/tests/qapi-schema/flat-union-bad-discriminator.err b/tests/qapi-schema/flat-union-bad-discriminator.err
deleted file mode 100644
index b705439bd95..00000000000
--- a/tests/qapi-schema/flat-union-bad-discriminator.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-bad-discriminator.json: In union 'TestUnion':
-flat-union-bad-discriminator.json:11: 'discriminator' requires a string name
diff --git a/tests/qapi-schema/flat-union-base-any.err b/tests/qapi-schema/flat-union-base-any.err
deleted file mode 100644
index c2d4de6a5d0..00000000000
--- a/tests/qapi-schema/flat-union-base-any.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-base-any.json: In union 'TestUnion':
-flat-union-base-any.json:8: 'base' requires a struct type, built-in type 'any' isn't
diff --git a/tests/qapi-schema/flat-union-base-union.err b/tests/qapi-schema/flat-union-base-union.err
deleted file mode 100644
index 3b0087220e2..00000000000
--- a/tests/qapi-schema/flat-union-base-union.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-base-union.json: In union 'TestUnion':
-flat-union-base-union.json:14: 'base' requires a struct type, union type 'UnionBase' isn't
diff --git a/tests/qapi-schema/flat-union-base-union.json b/tests/qapi-schema/flat-union-base-union.json
deleted file mode 100644
index 98b4eba181e..00000000000
--- a/tests/qapi-schema/flat-union-base-union.json
+++ /dev/null
@@ -1,18 +0,0 @@
-# For now, we require the base to be a struct without variants
-# TODO: It would be possible to allow a union as a base, as long as all
-# permutations of QMP names exposed by base do not clash with any QMP
-# member names added by local variants.
-{ 'enum': 'TestEnum',
-  'data': [ 'value1', 'value2' ] }
-{ 'struct': 'TestTypeA',
-  'data': { 'string': 'str' } }
-{ 'struct': 'TestTypeB',
-  'data': { 'integer': 'int' } }
-{ 'union': 'UnionBase',
-  'data': { 'kind1': 'TestTypeA',
-            'kind2': 'TestTypeB' } }
-{ 'union': 'TestUnion',
-  'base': 'UnionBase',
-  'discriminator': 'type',
-  'data': { 'kind1': 'TestTypeA',
-            'kind2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/flat-union-clash-member.err b/tests/qapi-schema/flat-union-clash-member.err
deleted file mode 100644
index 07551e6ef51..00000000000
--- a/tests/qapi-schema/flat-union-clash-member.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-clash-member.json: In union 'TestUnion':
-flat-union-clash-member.json:11: member 'name' of type 'Branch1' collides with member 'name' of type 'Base'
diff --git a/tests/qapi-schema/flat-union-discriminator-bad-name.err b/tests/qapi-schema/flat-union-discriminator-bad-name.err
deleted file mode 100644
index 28be49c31a2..00000000000
--- a/tests/qapi-schema/flat-union-discriminator-bad-name.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-discriminator-bad-name.json: In union 'MyUnion':
-flat-union-discriminator-bad-name.json:6: discriminator '*switch' is not a member of 'base'
diff --git a/tests/qapi-schema/flat-union-empty.err b/tests/qapi-schema/flat-union-empty.err
deleted file mode 100644
index 89b0f25cb05..00000000000
--- a/tests/qapi-schema/flat-union-empty.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-empty.json: In union 'Union':
-flat-union-empty.json:4: union has no branches
diff --git a/tests/qapi-schema/flat-union-empty.json b/tests/qapi-schema/flat-union-empty.json
deleted file mode 100644
index 83e1cc7b96b..00000000000
--- a/tests/qapi-schema/flat-union-empty.json
+++ /dev/null
@@ -1,4 +0,0 @@
-# flat union discriminator cannot be empty
-{ 'enum': 'Empty', 'data': [ ] }
-{ 'struct': 'Base', 'data': { 'type': 'Empty' } }
-{ 'union': 'Union', 'base': 'Base', 'discriminator': 'type', 'data': { } }
diff --git a/tests/qapi-schema/flat-union-inline-invalid-dict.err b/tests/qapi-schema/flat-union-inline-invalid-dict.err
deleted file mode 100644
index 53e54167072..00000000000
--- a/tests/qapi-schema/flat-union-inline-invalid-dict.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-inline-invalid-dict.json: In union 'TestUnion':
-flat-union-inline-invalid-dict.json:7: 'data' member 'value1' misses key 'type'
diff --git a/tests/qapi-schema/flat-union-int-branch.err b/tests/qapi-schema/flat-union-int-branch.err
deleted file mode 100644
index ae7f800603f..00000000000
--- a/tests/qapi-schema/flat-union-int-branch.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-int-branch.json: In union 'TestUnion':
-flat-union-int-branch.json:8: branch 'value1' cannot use built-in type 'int'
diff --git a/tests/qapi-schema/flat-union-int-branch.json b/tests/qapi-schema/flat-union-int-branch.json
deleted file mode 100644
index 9370c349e8b..00000000000
--- a/tests/qapi-schema/flat-union-int-branch.json
+++ /dev/null
@@ -1,12 +0,0 @@
-# we require flat union branches to be a struct
-{ 'enum': 'TestEnum',
-  'data': [ 'value1', 'value2' ] }
-{ 'struct': 'Base',
-  'data': { 'enum1': 'TestEnum' } }
-{ 'struct': 'TestTypeB',
-  'data': { 'integer': 'int' } }
-{ 'union': 'TestUnion',
-  'base': 'Base',
-  'discriminator': 'enum1',
-  'data': { 'value1': 'int',
-            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/flat-union-invalid-branch-key.err b/tests/qapi-schema/flat-union-invalid-branch-key.err
deleted file mode 100644
index 5576a25f9b2..00000000000
--- a/tests/qapi-schema/flat-union-invalid-branch-key.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-invalid-branch-key.json: In union 'TestUnion':
-flat-union-invalid-branch-key.json:13: branch 'value_wrong' is not a value of enum type 'TestEnum'
diff --git a/tests/qapi-schema/flat-union-invalid-discriminator.err b/tests/qapi-schema/flat-union-invalid-discriminator.err
deleted file mode 100644
index 99bca2ddabb..00000000000
--- a/tests/qapi-schema/flat-union-invalid-discriminator.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-invalid-discriminator.json: In union 'TestUnion':
-flat-union-invalid-discriminator.json:10: discriminator 'enum_wrong' is not a member of 'base'
diff --git a/tests/qapi-schema/flat-union-invalid-if-discriminator.err b/tests/qapi-schema/flat-union-invalid-if-discriminator.err
deleted file mode 100644
index 350f28da9d9..00000000000
--- a/tests/qapi-schema/flat-union-invalid-if-discriminator.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-invalid-if-discriminator.json: In union 'TestUnion':
-flat-union-invalid-if-discriminator.json:10: discriminator member 'enum1' of 'base' must not be conditional
diff --git a/tests/qapi-schema/flat-union-no-base.err b/tests/qapi-schema/flat-union-no-base.err
deleted file mode 100644
index 5167565b000..00000000000
--- a/tests/qapi-schema/flat-union-no-base.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-no-base.json: In union 'TestUnion':
-flat-union-no-base.json:8: 'discriminator' requires 'base'
diff --git a/tests/qapi-schema/flat-union-no-base.json b/tests/qapi-schema/flat-union-no-base.json
deleted file mode 100644
index 327877b563b..00000000000
--- a/tests/qapi-schema/flat-union-no-base.json
+++ /dev/null
@@ -1,11 +0,0 @@
-# flat unions require a base
-{ 'struct': 'TestTypeA',
-  'data': { 'string': 'str' } }
-{ 'struct': 'TestTypeB',
-  'data': { 'integer': 'int' } }
-{ 'enum': 'Enum',
-  'data': [ 'value1', 'value2' ] }
-{ 'union': 'TestUnion',
-  'discriminator': 'Enum',
-  'data': { 'value1': 'TestTypeA',
-            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.err b/tests/qapi-schema/flat-union-optional-discriminator.err
deleted file mode 100644
index 3d60a1b496b..00000000000
--- a/tests/qapi-schema/flat-union-optional-discriminator.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-optional-discriminator.json: In union 'MyUnion':
-flat-union-optional-discriminator.json:6: discriminator member 'switch' of base type 'Base' must not be optional
diff --git a/tests/qapi-schema/flat-union-string-discriminator.err b/tests/qapi-schema/flat-union-string-discriminator.err
deleted file mode 100644
index ff42c9728bd..00000000000
--- a/tests/qapi-schema/flat-union-string-discriminator.err
+++ /dev/null
@@ -1,2 +0,0 @@
-flat-union-string-discriminator.json: In union 'TestUnion':
-flat-union-string-discriminator.json:13: discriminator member 'kind' of base type 'TestBase' must be of enum type
diff --git a/tests/qapi-schema/meson.build b/tests/qapi-schema/meson.build
index 8ba69171329..caf0791ba84 100644
--- a/tests/qapi-schema/meson.build
+++ b/tests/qapi-schema/meson.build
@@ -1,5 +1,5 @@
 test_env = environment()
-test_env.set('PYTHONPATH', meson.source_root() / 'scripts')
+test_env.set('PYTHONPATH', meson.project_source_root() / 'scripts')
 test_env.set('PYTHONIOENCODING', 'utf-8')
 
 schemas = [
@@ -14,6 +14,7 @@ schemas = [
   'alternate-conflict-string.json',
   'alternate-conflict-bool-string.json',
   'alternate-conflict-num-string.json',
+  'alternate-data-invalid.json',
   'alternate-empty.json',
   'alternate-invalid-dict.json',
   'alternate-nested.json',
@@ -36,9 +37,13 @@ schemas = [
   'bad-data.json',
   'bad-ident.json',
   'bad-if.json',
+  'bad-if-all.json',
   'bad-if-empty.json',
   'bad-if-empty-list.json',
+  'bad-if-key.json',
+  'bad-if-keys.json',
   'bad-if-list.json',
+  'bad-if-not.json',
   'bad-type-bool.json',
   'bad-type-dict.json',
   'bad-type-int.json',
@@ -102,22 +107,6 @@ schemas = [
   'features-name-bad-type.json',
   'features-no-list.json',
   'features-unknown-key.json',
-  'flat-union-array-branch.json',
-  'flat-union-bad-base.json',
-  'flat-union-bad-discriminator.json',
-  'flat-union-base-any.json',
-  'flat-union-base-union.json',
-  'flat-union-clash-member.json',
-  'flat-union-discriminator-bad-name.json',
-  'flat-union-empty.json',
-  'flat-union-inline-invalid-dict.json',
-  'flat-union-int-branch.json',
-  'flat-union-invalid-branch-key.json',
-  'flat-union-invalid-discriminator.json',
-  'flat-union-invalid-if-discriminator.json',
-  'flat-union-no-base.json',
-  'flat-union-optional-discriminator.json',
-  'flat-union-string-discriminator.json',
   'funny-char.json',
   'funny-word.json',
   'ident-with-escape.json',
@@ -133,9 +122,11 @@ schemas = [
   'indented-expr.json',
   'leading-comma-list.json',
   'leading-comma-object.json',
+  'missing-array-rsqb.json',
   'missing-colon.json',
   'missing-comma-list.json',
   'missing-comma-object.json',
+  'missing-object-member-element.json',
   'missing-type.json',
   'nested-struct-data.json',
   'nested-struct-data-invalid-dict.json',
@@ -161,7 +152,6 @@ schemas = [
   'reserved-member-q.json',
   'reserved-member-u.json',
   'reserved-member-underscore.json',
-  'reserved-type-kind.json',
   'reserved-type-list.json',
   'returns-alternate.json',
   'returns-array-bad.json',
@@ -184,24 +174,42 @@ schemas = [
   'unclosed-list.json',
   'unclosed-object.json',
   'unclosed-string.json',
+  'union-array-branch.json',
+  'union-bad-base.json',
+  'union-bad-discriminator.json',
+  'union-base-any.json',
   'union-base-empty.json',
   'union-base-no-discriminator.json',
-  'union-branch-case.json',
+  'union-base-union.json',
   'union-branch-if-invalid.json',
   'union-branch-invalid-dict.json',
-  'union-clash-branches.json',
+  'union-clash-member.json',
+  'union-discriminator-bad-name.json',
   'union-empty.json',
+  'union-inline-invalid-dict.json',
+  'union-int-branch.json',
   'union-invalid-base.json',
-  'union-optional-branch.json',
+  'union-invalid-branch-key.json',
+  'union-invalid-data.json',
+  'union-invalid-discriminator.json',
+  'union-invalid-if-discriminator.json',
+  'union-no-base.json',
+  'union-optional-discriminator.json',
+  'union-string-discriminator.json',
   'union-unknown.json',
   'unknown-escape.json',
   'unknown-expr-key.json',
 ]
+schemas = files(schemas)
+
+# Intentionally missing schema file test -- not passed through files():
+schemas += [meson.current_source_dir() / 'missing-schema.json']
 
 # Because people may want to use test-qapi.py from the command line, we
 # are not using the "#! /usr/bin/env python3" trick here.  See
-# docs/devel/build-system.txt
-test('QAPI schema regression tests', python, args: files('test-qapi.py', schemas),
+# docs/devel/build-system.rst
+test('QAPI schema regression tests', python,
+     args: files('test-qapi.py') + schemas,
      env: test_env, suite: ['qapi-schema', 'qapi-frontend'])
 
 diff = find_program('diff')
@@ -233,15 +241,17 @@ if build_docs
                                output: ['doc-good.txt'],
                                input: files('doc-good.json', 'doc-good.rst'),
                                build_by_default: true,
-                               depend_files: sphinx_extn_depends,
+                               depfile: 'docs.d',
                                # We use -E to suppress Sphinx's caching, because
                                # we want it to always really run the QAPI doc
                                # generation code. It also means we don't
                                # clutter up the build dir with the cache.
                                command: [SPHINX_ARGS,
                                          '-b', 'text', '-E',
-                                         '-c', meson.source_root() / 'docs',
+                                         '-c', meson.project_source_root() / 'docs',
                                          '-D', 'master_doc=doc-good',
+                                         '-Ddepfile=@DEPFILE@',
+                                         '-Ddepfile_stamp=doc-good.stamp',
                                          meson.current_source_dir(),
                                          meson.current_build_dir()])
 
diff --git a/tests/qapi-schema/missing-array-rsqb.err b/tests/qapi-schema/missing-array-rsqb.err
new file mode 100644
index 00000000000..b5f58b8c12a
--- /dev/null
+++ b/tests/qapi-schema/missing-array-rsqb.err
@@ -0,0 +1 @@
+missing-array-rsqb.json:1:44: expected '{', '[', string, or boolean
diff --git a/tests/qapi-schema/missing-array-rsqb.json b/tests/qapi-schema/missing-array-rsqb.json
new file mode 100644
index 00000000000..7fca1df923c
--- /dev/null
+++ b/tests/qapi-schema/missing-array-rsqb.json
@@ -0,0 +1 @@
+['Daisy,', 'Daisy,', 'Give me your answer',
diff --git a/tests/qapi-schema/flat-union-invalid-branch-key.out b/tests/qapi-schema/missing-array-rsqb.out
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-branch-key.out
rename to tests/qapi-schema/missing-array-rsqb.out
diff --git a/tests/qapi-schema/missing-object-member-element.err b/tests/qapi-schema/missing-object-member-element.err
new file mode 100644
index 00000000000..c08a3dc307f
--- /dev/null
+++ b/tests/qapi-schema/missing-object-member-element.err
@@ -0,0 +1 @@
+missing-object-member-element.json:1:8: expected '{', '[', string, or boolean
diff --git a/tests/qapi-schema/missing-object-member-element.json b/tests/qapi-schema/missing-object-member-element.json
new file mode 100644
index 00000000000..f52d0106f31
--- /dev/null
+++ b/tests/qapi-schema/missing-object-member-element.json
@@ -0,0 +1 @@
+{'key':
diff --git a/tests/qapi-schema/flat-union-invalid-discriminator.out b/tests/qapi-schema/missing-object-member-element.out
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-discriminator.out
rename to tests/qapi-schema/missing-object-member-element.out
diff --git a/tests/qapi-schema/missing-schema.err b/tests/qapi-schema/missing-schema.err
new file mode 100644
index 00000000000..b4d9ff1fb2b
--- /dev/null
+++ b/tests/qapi-schema/missing-schema.err
@@ -0,0 +1 @@
+can't read schema file 'missing-schema.json': No such file or directory
diff --git a/tests/qapi-schema/flat-union-invalid-if-discriminator.out b/tests/qapi-schema/missing-schema.out
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-if-discriminator.out
rename to tests/qapi-schema/missing-schema.out
diff --git a/tests/qapi-schema/missing-type.err b/tests/qapi-schema/missing-type.err
index 5755386a188..cb39569e490 100644
--- a/tests/qapi-schema/missing-type.err
+++ b/tests/qapi-schema/missing-type.err
@@ -1 +1 @@
-missing-type.json:2: expression is missing metatype
+missing-type.json:2: expression must have exactly one key 'enum', 'struct', 'union', 'alternate', 'command', 'event'
diff --git a/tests/qapi-schema/non-objects.err b/tests/qapi-schema/non-objects.err
index 3a4ea36966e..23bdb69c711 100644
--- a/tests/qapi-schema/non-objects.err
+++ b/tests/qapi-schema/non-objects.err
@@ -1 +1 @@
-non-objects.json:1:1: expected '{'
+non-objects.json:1: top-level expression must be an object
diff --git a/tests/qapi-schema/qapi-schema-test.json b/tests/qapi-schema/qapi-schema-test.json
index 84b9d41f158..43b86970029 100644
--- a/tests/qapi-schema/qapi-schema-test.json
+++ b/tests/qapi-schema/qapi-schema-test.json
@@ -30,7 +30,7 @@
 { 'struct': 'Empty1', 'data': { } }
 { 'struct': 'Empty2', 'base': 'Empty1', 'data': { } }
 
-# Likewise for an empty flat union
+# Likewise for an empty union
 { 'union': 'Union',
   'base': { 'type': 'EnumOne' }, 'discriminator': 'type',
   'data': { } }
@@ -123,8 +123,7 @@
 # for testing use of 'str' within alternates
 { 'alternate': 'AltStrObj', 'data': { 's': 'str', 'o': 'TestStruct' } }
 
-# for testing lists
-{ 'union': 'UserDefListUnion',
+{ 'struct': 'ArrayStruct',
   'data': { 'integer': ['int'],
             's8': ['int8'],
             's16': ['int16'],
@@ -137,9 +136,9 @@
             'number': ['number'],
             'boolean': ['bool'],
             'string': ['str'],
-            'sizes': ['size'],
-            'any': ['any'],
-            'user': ['Status'] } } # intentional forward ref. to sub-module
+            '*sz': ['size'],
+            '*any': ['any'],
+            '*user': ['Status'] } } # intentional forward ref. to sub-module
 
 # for testing sub-modules
 { 'include': 'include/sub-module.json' }
@@ -159,7 +158,7 @@
   'returns': 'int' }
 { 'command': 'guest-sync', 'data': { 'arg': 'any' }, 'returns': 'any' }
 { 'command': 'boxed-struct', 'boxed': true, 'data': 'UserDefZero' }
-{ 'command': 'boxed-union', 'data': 'UserDefListUnion', 'boxed': true }
+{ 'command': 'boxed-union', 'data': 'UserDefFlatUnion', 'boxed': true }
 { 'command': 'boxed-empty', 'boxed': true, 'data': 'Empty1' }
 
 # Smoke test on out-of-band and allow-preconfig-test
@@ -203,11 +202,10 @@
   'data': { '__org.qemu_x-member1': '__org.qemu_x-Enum' } }
 { 'struct': '__org.qemu_x-Struct', 'base': '__org.qemu_x-Base',
   'data': { '__org.qemu_x-member2': 'str', '*wchar-t': 'int' } }
-{ 'union': '__org.qemu_x-Union1', 'data': { '__org.qemu_x-branch': 'str' } }
 { 'alternate': '__org.qemu_x-Alt1', 'data': { '__org.qemu_x-branch': 'str' } }
 { 'struct': '__org.qemu_x-Struct2',
-  'data': { 'array': ['__org.qemu_x-Union1'] } }
-{ 'union': '__org.qemu_x-Union2', 'base': '__org.qemu_x-Base',
+  'data': { 'array': ['__org.qemu_x-Union'] } }
+{ 'union': '__org.qemu_x-Union', 'base': '__org.qemu_x-Base',
   'discriminator': '__org.qemu_x-member1',
   'data': { '__org.qemu_x-value': '__org.qemu_x-Struct2' } }
 { 'alternate': '__org.qemu_x-Alt',
@@ -215,51 +213,56 @@
 { 'event': '__ORG.QEMU_X-EVENT', 'data': '__org.qemu_x-Struct' }
 { 'command': '__org.qemu_x-command',
   'data': { 'a': ['__org.qemu_x-Enum'], 'b': ['__org.qemu_x-Struct'],
-            'c': '__org.qemu_x-Union2', 'd': '__org.qemu_x-Alt' },
-  'returns': '__org.qemu_x-Union1' }
+            'c': '__org.qemu_x-Union', 'd': '__org.qemu_x-Alt' } }
 
 # test 'if' condition handling
 
-{ 'struct': 'TestIfStruct', 'data':
-  { 'foo': 'int',
-    'bar': { 'type': 'int', 'if': 'defined(TEST_IF_STRUCT_BAR)'} },
-  'if': 'defined(TEST_IF_STRUCT)' }
+{ 'struct': 'TestIfStruct',
+  'data': { 'foo': 'int',
+            'bar': { 'type': 'int', 'if': 'TEST_IF_STRUCT_BAR'} },
+  'if': 'TEST_IF_STRUCT' }
 
-{ 'enum': 'TestIfEnum', 'data':
-  [ 'foo', { 'name' : 'bar', 'if': 'defined(TEST_IF_ENUM_BAR)' } ],
-  'if': 'defined(TEST_IF_ENUM)' }
+{ 'enum': 'TestIfEnum',
+  'data': [ 'foo', { 'name' : 'bar', 'if': 'TEST_IF_ENUM_BAR' } ],
+  'if': 'TEST_IF_ENUM' }
 
-{ 'union': 'TestIfUnion', 'data':
-  { 'foo': 'TestStruct',
-    'bar': { 'type': 'str', 'if': 'defined(TEST_IF_UNION_BAR)'} },
-  'if': 'defined(TEST_IF_UNION) && defined(TEST_IF_STRUCT)' }
+{ 'union': 'TestIfUnion',
+  'base': { 'type': 'TestIfEnum' },
+  'discriminator': 'type',
+  'data': { 'foo': 'TestStruct',
+            'bar': { 'type': 'UserDefZero', 'if': 'TEST_IF_ENUM_BAR'} },
+  'if': { 'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT'] } }
 
 { 'command': 'test-if-union-cmd',
   'data': { 'union-cmd-arg': 'TestIfUnion' },
-  'if': 'defined(TEST_IF_UNION)' }
+  'if': { 'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT'] } }
 
-{ 'alternate': 'TestIfAlternate', 'data':
-  { 'foo': 'int',
-    'bar': { 'type': 'TestStruct', 'if': 'defined(TEST_IF_ALT_BAR)'} },
-  'if': 'defined(TEST_IF_ALT) && defined(TEST_IF_STRUCT)' }
+{ 'alternate': 'TestIfAlternate',
+  'data': { 'foo': 'int',
+            'bar': { 'type': 'TestStruct', 'if': 'TEST_IF_ALT_BAR'} },
+  'if': { 'all': ['TEST_IF_ALT', 'TEST_IF_STRUCT'] } }
 
 { 'command': 'test-if-alternate-cmd',
   'data': { 'alt-cmd-arg': 'TestIfAlternate' },
-  'if': 'defined(TEST_IF_ALT)' }
+  'if': { 'all': ['TEST_IF_ALT', 'TEST_IF_STRUCT'] } }
 
 { 'command': 'test-if-cmd',
   'data': {
     'foo': 'TestIfStruct',
-    'bar': { 'type': 'TestIfEnum', 'if': 'defined(TEST_IF_CMD_BAR)' } },
+    'bar': { 'type': 'TestIfEnum', 'if': 'TEST_IF_CMD_BAR' } },
   'returns': 'UserDefThree',
-  'if': ['defined(TEST_IF_CMD)', 'defined(TEST_IF_STRUCT)'] }
+  'if': { 'all': ['TEST_IF_CMD', 'TEST_IF_STRUCT'] } }
 
 { 'command': 'test-cmd-return-def-three', 'returns': 'UserDefThree' }
 
-{ 'event': 'TEST_IF_EVENT', 'data':
-  { 'foo': 'TestIfStruct',
-    'bar': { 'type': ['TestIfEnum'], 'if': 'defined(TEST_IF_EVT_BAR)' } },
-  'if': 'defined(TEST_IF_EVT) && defined(TEST_IF_STRUCT)' }
+{ 'event': 'TEST_IF_EVENT',
+  'data': { 'foo': 'TestIfStruct',
+            'bar': { 'type': ['TestIfEnum'], 'if': 'TEST_IF_EVT_BAR' } },
+  'if': { 'all': ['TEST_IF_EVT', 'TEST_IF_STRUCT'] } }
+
+{ 'event': 'TEST_IF_EVENT2', 'data': {},
+  'if': { 'not': { 'any': [ { 'not': 'TEST_IF_EVT' },
+                            { 'not': 'TEST_IF_STRUCT' } ] } } }
 
 # test 'features'
 
@@ -270,7 +273,7 @@
   'data': { 'foo': { 'type': 'int', 'features': [ 'deprecated' ] } },
   'features': [ 'feature1' ] }
 { 'struct': 'FeatureStruct2',
-  'data': { 'foo': 'int' },
+  'data': { 'foo': { 'type': 'int', 'features': [ 'unstable' ] } },
   'features': [ { 'name': 'feature1' } ] }
 { 'struct': 'FeatureStruct3',
   'data': { 'foo': 'int' },
@@ -281,18 +284,25 @@
 
 { 'struct': 'CondFeatureStruct1',
   'data': { 'foo': 'int' },
-  'features': [ { 'name': 'feature1', 'if': 'defined(TEST_IF_FEATURE_1)'} ] }
+  'features': [ { 'name': 'feature1', 'if': 'TEST_IF_FEATURE_1'} ] }
 { 'struct': 'CondFeatureStruct2',
   'data': { 'foo': 'int' },
-  'features': [ { 'name': 'feature1', 'if': 'defined(TEST_IF_FEATURE_1)'},
-                { 'name': 'feature2', 'if': 'defined(TEST_IF_FEATURE_2)'} ] }
+  'features': [ { 'name': 'feature1', 'if': 'TEST_IF_FEATURE_1'},
+                { 'name': 'feature2', 'if': 'TEST_IF_FEATURE_2'} ] }
 { 'struct': 'CondFeatureStruct3',
   'data': { 'foo': 'int' },
-  'features': [ { 'name': 'feature1', 'if': [ 'defined(TEST_IF_COND_1)',
-                                              'defined(TEST_IF_COND_2)'] } ] }
+  'features': [ { 'name': 'feature1',
+                  'if': { 'all': [ 'TEST_IF_COND_1',
+                                   'TEST_IF_COND_2'] } } ] }
+{ 'struct': 'CondFeatureStruct4',
+  'data': { 'foo': 'int' },
+  'features': [ { 'name': 'feature1',
+                  'if': {'any': ['TEST_IF_COND_1',
+                                 'TEST_IF_COND_2'] } } ] }
 
 { 'enum': 'FeatureEnum1',
-  'data': [ 'eins', 'zwei', 'drei' ],
+  'data': [ 'eins', 'zwei',
+            { 'name': 'drei', 'features': [ 'deprecated' ] } ],
   'features': [ 'feature1' ] }
 
 { 'union': 'FeatureUnion1',
@@ -313,26 +323,31 @@
             '*fs4': 'FeatureStruct4',
             '*cfs1': 'CondFeatureStruct1',
             '*cfs2': 'CondFeatureStruct2',
-            '*cfs3': 'CondFeatureStruct3' },
+            '*cfs3': 'CondFeatureStruct3',
+            '*cfs4': 'CondFeatureStruct4' },
   'returns': 'FeatureStruct1',
   'features': [] }
 
 { 'command': 'test-command-features1',
   'features': [ 'deprecated' ] }
 { 'command': 'test-command-features3',
-  'features': [ 'feature1', 'feature2' ] }
+  'features': [ 'unstable', 'feature1', 'feature2' ] }
 
 { 'command': 'test-command-cond-features1',
-  'features': [ { 'name': 'feature1', 'if': 'defined(TEST_IF_FEATURE_1)'} ] }
+  'features': [ { 'name': 'feature1', 'if': 'TEST_IF_FEATURE_1'} ] }
 { 'command': 'test-command-cond-features2',
-  'features': [ { 'name': 'feature1', 'if': 'defined(TEST_IF_FEATURE_1)'},
-                { 'name': 'feature2', 'if': 'defined(TEST_IF_FEATURE_2)'} ] }
+  'features': [ { 'name': 'feature1', 'if': 'TEST_IF_FEATURE_1'},
+                { 'name': 'feature2', 'if': 'TEST_IF_FEATURE_2'} ] }
 { 'command': 'test-command-cond-features3',
-  'features': [ { 'name': 'feature1', 'if': [ 'defined(TEST_IF_COND_1)',
-                                              'defined(TEST_IF_COND_2)'] } ] }
+  'features': [ { 'name': 'feature1',
+                  'if': { 'all': [ 'TEST_IF_COND_1',
+                                   'TEST_IF_COND_2'] } } ] }
 
 { 'event': 'TEST_EVENT_FEATURES0',
   'data': 'FeatureStruct1' }
 
 { 'event': 'TEST_EVENT_FEATURES1',
   'features': [ 'deprecated' ] }
+
+{ 'event': 'TEST_EVENT_FEATURES2',
+  'features': [ 'unstable' ] }
diff --git a/tests/qapi-schema/qapi-schema-test.out b/tests/qapi-schema/qapi-schema-test.out
index e0b8a5f0b69..1f9585fa9b8 100644
--- a/tests/qapi-schema/qapi-schema-test.out
+++ b/tests/qapi-schema/qapi-schema-test.out
@@ -125,70 +125,22 @@ alternate AltStrObj
     tag type
     case s: str
     case o: TestStruct
-object q_obj_intList-wrapper
-    member data: intList optional=False
-object q_obj_int8List-wrapper
-    member data: int8List optional=False
-object q_obj_int16List-wrapper
-    member data: int16List optional=False
-object q_obj_int32List-wrapper
-    member data: int32List optional=False
-object q_obj_int64List-wrapper
-    member data: int64List optional=False
-object q_obj_uint8List-wrapper
-    member data: uint8List optional=False
-object q_obj_uint16List-wrapper
-    member data: uint16List optional=False
-object q_obj_uint32List-wrapper
-    member data: uint32List optional=False
-object q_obj_uint64List-wrapper
-    member data: uint64List optional=False
-object q_obj_numberList-wrapper
-    member data: numberList optional=False
-object q_obj_boolList-wrapper
-    member data: boolList optional=False
-object q_obj_strList-wrapper
-    member data: strList optional=False
-object q_obj_sizeList-wrapper
-    member data: sizeList optional=False
-object q_obj_anyList-wrapper
-    member data: anyList optional=False
-object q_obj_StatusList-wrapper
-    member data: StatusList optional=False
-enum UserDefListUnionKind
-    member integer
-    member s8
-    member s16
-    member s32
-    member s64
-    member u8
-    member u16
-    member u32
-    member u64
-    member number
-    member boolean
-    member string
-    member sizes
-    member any
-    member user
-object UserDefListUnion
-    member type: UserDefListUnionKind optional=False
-    tag type
-    case integer: q_obj_intList-wrapper
-    case s8: q_obj_int8List-wrapper
-    case s16: q_obj_int16List-wrapper
-    case s32: q_obj_int32List-wrapper
-    case s64: q_obj_int64List-wrapper
-    case u8: q_obj_uint8List-wrapper
-    case u16: q_obj_uint16List-wrapper
-    case u32: q_obj_uint32List-wrapper
-    case u64: q_obj_uint64List-wrapper
-    case number: q_obj_numberList-wrapper
-    case boolean: q_obj_boolList-wrapper
-    case string: q_obj_strList-wrapper
-    case sizes: q_obj_sizeList-wrapper
-    case any: q_obj_anyList-wrapper
-    case user: q_obj_StatusList-wrapper
+object ArrayStruct
+    member integer: intList optional=False
+    member s8: int8List optional=False
+    member s16: int16List optional=False
+    member s32: int32List optional=False
+    member s64: int64List optional=False
+    member u8: uint8List optional=False
+    member u16: uint16List optional=False
+    member u32: uint32List optional=False
+    member u64: uint64List optional=False
+    member number: numberList optional=False
+    member boolean: boolList optional=False
+    member string: strList optional=False
+    member sz: sizeList optional=True
+    member any: anyList optional=True
+    member user: StatusList optional=True
 include include/sub-module.json
 command user-def-cmd None -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
@@ -216,7 +168,7 @@ command guest-sync q_obj_guest-sync-arg -> any
     gen=True success_response=True boxed=False oob=False preconfig=False
 command boxed-struct UserDefZero -> None
     gen=True success_response=True boxed=True oob=False preconfig=False
-command boxed-union UserDefListUnion -> None
+command boxed-union UserDefFlatUnion -> None
     gen=True success_response=True boxed=True oob=False preconfig=False
 command boxed-empty Empty1 -> None
     gen=True success_response=True boxed=True oob=False preconfig=False
@@ -263,21 +215,13 @@ object __org.qemu_x-Struct
     base __org.qemu_x-Base
     member __org.qemu_x-member2: str optional=False
     member wchar-t: int optional=True
-object q_obj_str-wrapper
-    member data: str optional=False
-enum __org.qemu_x-Union1Kind
-    member __org.qemu_x-branch
-object __org.qemu_x-Union1
-    member type: __org.qemu_x-Union1Kind optional=False
-    tag type
-    case __org.qemu_x-branch: q_obj_str-wrapper
 alternate __org.qemu_x-Alt1
     tag type
     case __org.qemu_x-branch: str
-array __org.qemu_x-Union1List __org.qemu_x-Union1
+array __org.qemu_x-UnionList __org.qemu_x-Union
 object __org.qemu_x-Struct2
-    member array: __org.qemu_x-Union1List optional=False
-object __org.qemu_x-Union2
+    member array: __org.qemu_x-UnionList optional=False
+object __org.qemu_x-Union
     base __org.qemu_x-Base
     tag __org.qemu_x-member1
     case __org.qemu_x-value: __org.qemu_x-Struct2
@@ -291,72 +235,71 @@ array __org.qemu_x-StructList __org.qemu_x-Struct
 object q_obj___org.qemu_x-command-arg
     member a: __org.qemu_x-EnumList optional=False
     member b: __org.qemu_x-StructList optional=False
-    member c: __org.qemu_x-Union2 optional=False
+    member c: __org.qemu_x-Union optional=False
     member d: __org.qemu_x-Alt optional=False
-command __org.qemu_x-command q_obj___org.qemu_x-command-arg -> __org.qemu_x-Union1
+command __org.qemu_x-command q_obj___org.qemu_x-command-arg -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
 object TestIfStruct
     member foo: int optional=False
     member bar: int optional=False
-        if ['defined(TEST_IF_STRUCT_BAR)']
-    if ['defined(TEST_IF_STRUCT)']
+        if TEST_IF_STRUCT_BAR
+    if TEST_IF_STRUCT
 enum TestIfEnum
     member foo
     member bar
-        if ['defined(TEST_IF_ENUM_BAR)']
-    if ['defined(TEST_IF_ENUM)']
-object q_obj_TestStruct-wrapper
-    member data: TestStruct optional=False
-enum TestIfUnionKind
-    member foo
-    member bar
-        if ['defined(TEST_IF_UNION_BAR)']
-    if ['defined(TEST_IF_UNION) && defined(TEST_IF_STRUCT)']
+        if TEST_IF_ENUM_BAR
+    if TEST_IF_ENUM
+object q_obj_TestIfUnion-base
+    member type: TestIfEnum optional=False
+    if {'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT']}
 object TestIfUnion
-    member type: TestIfUnionKind optional=False
+    base q_obj_TestIfUnion-base
     tag type
-    case foo: q_obj_TestStruct-wrapper
-    case bar: q_obj_str-wrapper
-        if ['defined(TEST_IF_UNION_BAR)']
-    if ['defined(TEST_IF_UNION) && defined(TEST_IF_STRUCT)']
+    case foo: TestStruct
+    case bar: UserDefZero
+        if TEST_IF_ENUM_BAR
+    if {'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT']}
 object q_obj_test-if-union-cmd-arg
     member union-cmd-arg: TestIfUnion optional=False
-    if ['defined(TEST_IF_UNION)']
+    if {'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT']}
 command test-if-union-cmd q_obj_test-if-union-cmd-arg -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
-    if ['defined(TEST_IF_UNION)']
+    if {'all': ['TEST_IF_UNION', 'TEST_IF_STRUCT']}
 alternate TestIfAlternate
     tag type
     case foo: int
     case bar: TestStruct
-        if ['defined(TEST_IF_ALT_BAR)']
-    if ['defined(TEST_IF_ALT) && defined(TEST_IF_STRUCT)']
+        if TEST_IF_ALT_BAR
+    if {'all': ['TEST_IF_ALT', 'TEST_IF_STRUCT']}
 object q_obj_test-if-alternate-cmd-arg
     member alt-cmd-arg: TestIfAlternate optional=False
-    if ['defined(TEST_IF_ALT)']
+    if {'all': ['TEST_IF_ALT', 'TEST_IF_STRUCT']}
 command test-if-alternate-cmd q_obj_test-if-alternate-cmd-arg -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
-    if ['defined(TEST_IF_ALT)']
+    if {'all': ['TEST_IF_ALT', 'TEST_IF_STRUCT']}
 object q_obj_test-if-cmd-arg
     member foo: TestIfStruct optional=False
     member bar: TestIfEnum optional=False
-        if ['defined(TEST_IF_CMD_BAR)']
-    if ['defined(TEST_IF_CMD)', 'defined(TEST_IF_STRUCT)']
+        if TEST_IF_CMD_BAR
+    if {'all': ['TEST_IF_CMD', 'TEST_IF_STRUCT']}
 command test-if-cmd q_obj_test-if-cmd-arg -> UserDefThree
     gen=True success_response=True boxed=False oob=False preconfig=False
-    if ['defined(TEST_IF_CMD)', 'defined(TEST_IF_STRUCT)']
+    if {'all': ['TEST_IF_CMD', 'TEST_IF_STRUCT']}
 command test-cmd-return-def-three None -> UserDefThree
     gen=True success_response=True boxed=False oob=False preconfig=False
 array TestIfEnumList TestIfEnum
-    if ['defined(TEST_IF_ENUM)']
+    if TEST_IF_ENUM
 object q_obj_TEST_IF_EVENT-arg
     member foo: TestIfStruct optional=False
     member bar: TestIfEnumList optional=False
-        if ['defined(TEST_IF_EVT_BAR)']
-    if ['defined(TEST_IF_EVT) && defined(TEST_IF_STRUCT)']
+        if TEST_IF_EVT_BAR
+    if {'all': ['TEST_IF_EVT', 'TEST_IF_STRUCT']}
 event TEST_IF_EVENT q_obj_TEST_IF_EVENT-arg
     boxed=False
-    if ['defined(TEST_IF_EVT) && defined(TEST_IF_STRUCT)']
+    if {'all': ['TEST_IF_EVT', 'TEST_IF_STRUCT']}
+event TEST_IF_EVENT2 None
+    boxed=False
+    if {'not': {'any': [{'not': 'TEST_IF_EVT'}, {'not': 'TEST_IF_STRUCT'}]}}
 object FeatureStruct0
     member foo: int optional=False
 object FeatureStruct1
@@ -365,6 +308,7 @@ object FeatureStruct1
     feature feature1
 object FeatureStruct2
     member foo: int optional=False
+        feature unstable
     feature feature1
 object FeatureStruct3
     member foo: int optional=False
@@ -379,21 +323,26 @@ object FeatureStruct4
 object CondFeatureStruct1
     member foo: int optional=False
     feature feature1
-        if ['defined(TEST_IF_FEATURE_1)']
+        if TEST_IF_FEATURE_1
 object CondFeatureStruct2
     member foo: int optional=False
     feature feature1
-        if ['defined(TEST_IF_FEATURE_1)']
+        if TEST_IF_FEATURE_1
     feature feature2
-        if ['defined(TEST_IF_FEATURE_2)']
+        if TEST_IF_FEATURE_2
 object CondFeatureStruct3
     member foo: int optional=False
     feature feature1
-        if ['defined(TEST_IF_COND_1)', 'defined(TEST_IF_COND_2)']
+        if {'all': ['TEST_IF_COND_1', 'TEST_IF_COND_2']}
+object CondFeatureStruct4
+    member foo: int optional=False
+    feature feature1
+        if {'any': ['TEST_IF_COND_1', 'TEST_IF_COND_2']}
 enum FeatureEnum1
     member eins
     member zwei
     member drei
+        feature deprecated
     feature feature1
 object q_obj_FeatureUnion1-base
     member tag: FeatureEnum1 optional=False
@@ -417,6 +366,7 @@ object q_obj_test-features0-arg
     member cfs1: CondFeatureStruct1 optional=True
     member cfs2: CondFeatureStruct2 optional=True
     member cfs3: CondFeatureStruct3 optional=True
+    member cfs4: CondFeatureStruct4 optional=True
 command test-features0 q_obj_test-features0-arg -> FeatureStruct1
     gen=True success_response=True boxed=False oob=False preconfig=False
 command test-command-features1 None -> None
@@ -424,27 +374,31 @@ command test-command-features1 None -> None
     feature deprecated
 command test-command-features3 None -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
+    feature unstable
     feature feature1
     feature feature2
 command test-command-cond-features1 None -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
     feature feature1
-        if ['defined(TEST_IF_FEATURE_1)']
+        if TEST_IF_FEATURE_1
 command test-command-cond-features2 None -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
     feature feature1
-        if ['defined(TEST_IF_FEATURE_1)']
+        if TEST_IF_FEATURE_1
     feature feature2
-        if ['defined(TEST_IF_FEATURE_2)']
+        if TEST_IF_FEATURE_2
 command test-command-cond-features3 None -> None
     gen=True success_response=True boxed=False oob=False preconfig=False
     feature feature1
-        if ['defined(TEST_IF_COND_1)', 'defined(TEST_IF_COND_2)']
+        if {'all': ['TEST_IF_COND_1', 'TEST_IF_COND_2']}
 event TEST_EVENT_FEATURES0 FeatureStruct1
     boxed=False
 event TEST_EVENT_FEATURES1 None
     boxed=False
     feature deprecated
+event TEST_EVENT_FEATURES2 None
+    boxed=False
+    feature unstable
 module include/sub-module.json
 include sub-sub-module.json
 object SecondArrayRef
diff --git a/tests/qapi-schema/quoted-structural-chars.err b/tests/qapi-schema/quoted-structural-chars.err
index 07d1561d1f7..af6c1e173db 100644
--- a/tests/qapi-schema/quoted-structural-chars.err
+++ b/tests/qapi-schema/quoted-structural-chars.err
@@ -1 +1 @@
-quoted-structural-chars.json:1:1: expected '{'
+quoted-structural-chars.json:1: top-level expression must be an object
diff --git a/tests/qapi-schema/redefined-event.json b/tests/qapi-schema/redefined-event.json
index 7717e91c18c..7901930e3de 100644
--- a/tests/qapi-schema/redefined-event.json
+++ b/tests/qapi-schema/redefined-event.json
@@ -1,3 +1,3 @@
 # we reject duplicate events
 { 'event': 'EVENT_A', 'data': { 'myint': 'int' } }
-{ 'event': 'EVENT_A', 'data': { 'myint': 'int' } }
+{ 'event': 'EVENT_A', 'data': { 'myint': 'int' }, 'if': 'FOO' }
diff --git a/tests/qapi-schema/reserved-member-u.json b/tests/qapi-schema/reserved-member-u.json
index 2bfb8f59b65..d982ab5e0cc 100644
--- a/tests/qapi-schema/reserved-member-u.json
+++ b/tests/qapi-schema/reserved-member-u.json
@@ -2,6 +2,6 @@
 # We reject use of 'u' as a member name, to allow it for internal use in
 # putting union branch members in a separate namespace from QMP members.
 # This is true even for non-unions, because it is possible to convert a
-# struct to flat union while remaining backwards compatible in QMP.
+# struct to union while remaining backwards compatible in QMP.
 # TODO - we could munge the member name to 'q_u' to avoid the collision
 { 'struct': 'Oops', 'data': { '*u': 'str' } }
diff --git a/tests/qapi-schema/reserved-type-kind.err b/tests/qapi-schema/reserved-type-kind.err
deleted file mode 100644
index d8fb769f9dc..00000000000
--- a/tests/qapi-schema/reserved-type-kind.err
+++ /dev/null
@@ -1,2 +0,0 @@
-reserved-type-kind.json: In enum 'UnionKind':
-reserved-type-kind.json:2: enum name should not end in 'Kind'
diff --git a/tests/qapi-schema/reserved-type-kind.json b/tests/qapi-schema/reserved-type-kind.json
deleted file mode 100644
index 9ecaba12bc1..00000000000
--- a/tests/qapi-schema/reserved-type-kind.json
+++ /dev/null
@@ -1,2 +0,0 @@
-# we reject types that would conflict with implicit union enum
-{ 'enum': 'UnionKind', 'data': [ 'oops' ] }
diff --git a/tests/qapi-schema/struct-member-if-invalid.err b/tests/qapi-schema/struct-member-if-invalid.err
index 42e7fdae3cc..5ee08afa41e 100644
--- a/tests/qapi-schema/struct-member-if-invalid.err
+++ b/tests/qapi-schema/struct-member-if-invalid.err
@@ -1,2 +1,2 @@
 struct-member-if-invalid.json: In struct 'Stru':
-struct-member-if-invalid.json:2: 'if' condition of 'data' member 'member' must be a string or a list of strings
+struct-member-if-invalid.json:2: 'if' condition of 'data' member 'member' must be a string or an object
diff --git a/tests/qapi-schema/test-qapi.py b/tests/qapi-schema/test-qapi.py
index e8db9d09d91..2160cef0822 100755
--- a/tests/qapi-schema/test-qapi.py
+++ b/tests/qapi-schema/test-qapi.py
@@ -37,6 +37,7 @@ def visit_enum_type(self, name, info, ifcond, features, members, prefix):
         for m in members:
             print('    member %s' % m.name)
             self._print_if(m.ifcond, indent=8)
+            self._print_features(m.features, indent=8)
         self._print_if(ifcond)
         self._print_features(features)
 
@@ -94,8 +95,17 @@ def _print_variants(variants):
 
     @staticmethod
     def _print_if(ifcond, indent=4):
-        if ifcond:
-            print('%sif %s' % (' ' * indent, ifcond))
+        # TODO Drop this hack after replacing OrderedDict by plain
+        # dict (requires Python 3.7)
+        def _massage(subcond):
+            if isinstance(subcond, str):
+                return subcond
+            if isinstance(subcond, list):
+                return [_massage(val) for val in subcond]
+            return {key: _massage(val) for key, val in subcond.items()}
+
+        if ifcond.is_present():
+            print('%sif %s' % (' ' * indent, _massage(ifcond.ifcond)))
 
     @classmethod
     def _print_features(cls, features, indent=4):
@@ -123,14 +133,22 @@ def test_frontend(fname):
             print('    section=%s\n%s' % (section.name, section.text))
 
 
+def open_test_result(dir_name, file_name, update):
+    mode = 'r+' if update else 'r'
+    try:
+        fp = open(os.path.join(dir_name, file_name), mode)
+    except FileNotFoundError:
+        if not update:
+            raise
+        fp = open(os.path.join(dir_name, file_name), 'w+')
+    return fp
+
+
 def test_and_diff(test_name, dir_name, update):
     sys.stdout = StringIO()
     try:
         test_frontend(os.path.join(dir_name, test_name + '.json'))
     except QAPIError as err:
-        if err.info.fname is None:
-            print("%s" % err, file=sys.stderr)
-            return 2
         errstr = str(err) + '\n'
         if dir_name:
             errstr = errstr.replace(dir_name + '/', '')
@@ -142,13 +160,12 @@ def test_and_diff(test_name, dir_name, update):
         sys.stdout.close()
         sys.stdout = sys.__stdout__
 
-    mode = 'r+' if update else 'r'
     try:
-        outfp = open(os.path.join(dir_name, test_name + '.out'), mode)
-        errfp = open(os.path.join(dir_name, test_name + '.err'), mode)
+        outfp = open_test_result(dir_name, test_name + '.out', update)
+        errfp = open_test_result(dir_name, test_name + '.err', update)
         expected_out = outfp.readlines()
         expected_err = errfp.readlines()
-    except IOError as err:
+    except OSError as err:
         print("%s: can't open '%s': %s"
               % (sys.argv[0], err.filename, err.strerror),
               file=sys.stderr)
@@ -174,7 +191,7 @@ def test_and_diff(test_name, dir_name, update):
         errfp.truncate(0)
         errfp.seek(0)
         errfp.writelines(actual_err)
-    except IOError as err:
+    except OSError as err:
         print("%s: can't write '%s': %s"
               % (sys.argv[0], err.filename, err.strerror),
               file=sys.stderr)
diff --git a/tests/qapi-schema/union-array-branch.err b/tests/qapi-schema/union-array-branch.err
new file mode 100644
index 00000000000..5db9c174811
--- /dev/null
+++ b/tests/qapi-schema/union-array-branch.err
@@ -0,0 +1,2 @@
+union-array-branch.json: In union 'TestUnion':
+union-array-branch.json:8: 'data' member 'value1' cannot be an array
diff --git a/tests/qapi-schema/union-array-branch.json b/tests/qapi-schema/union-array-branch.json
new file mode 100644
index 00000000000..6dda7ec379a
--- /dev/null
+++ b/tests/qapi-schema/union-array-branch.json
@@ -0,0 +1,12 @@
+# we require union branches to be a struct
+{ 'enum': 'TestEnum',
+  'data': [ 'value1', 'value2' ] }
+{ 'struct': 'Base',
+  'data': { 'enum1': 'TestEnum' } }
+{ 'struct': 'TestTypeB',
+  'data': { 'integer': 'int' } }
+{ 'union': 'TestUnion',
+  'base': 'Base',
+  'discriminator': 'enum1',
+  'data': { 'value1': ['TestTypeB'],
+            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/flat-union-no-base.out b/tests/qapi-schema/union-array-branch.out
similarity index 100%
rename from tests/qapi-schema/flat-union-no-base.out
rename to tests/qapi-schema/union-array-branch.out
diff --git a/tests/qapi-schema/union-bad-base.err b/tests/qapi-schema/union-bad-base.err
new file mode 100644
index 00000000000..42b2ed1ddad
--- /dev/null
+++ b/tests/qapi-schema/union-bad-base.err
@@ -0,0 +1,2 @@
+union-bad-base.json: In union 'TestUnion':
+union-bad-base.json:8: member 'string' of type 'TestTypeA' collides with base member 'string'
diff --git a/tests/qapi-schema/flat-union-bad-base.json b/tests/qapi-schema/union-bad-base.json
similarity index 100%
rename from tests/qapi-schema/flat-union-bad-base.json
rename to tests/qapi-schema/union-bad-base.json
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.out b/tests/qapi-schema/union-bad-base.out
similarity index 100%
rename from tests/qapi-schema/flat-union-optional-discriminator.out
rename to tests/qapi-schema/union-bad-base.out
diff --git a/tests/qapi-schema/union-bad-discriminator.err b/tests/qapi-schema/union-bad-discriminator.err
new file mode 100644
index 00000000000..7cfd470f58f
--- /dev/null
+++ b/tests/qapi-schema/union-bad-discriminator.err
@@ -0,0 +1,2 @@
+union-bad-discriminator.json: In union 'TestUnion':
+union-bad-discriminator.json:11: 'discriminator' requires a string name
diff --git a/tests/qapi-schema/flat-union-bad-discriminator.json b/tests/qapi-schema/union-bad-discriminator.json
similarity index 100%
rename from tests/qapi-schema/flat-union-bad-discriminator.json
rename to tests/qapi-schema/union-bad-discriminator.json
diff --git a/tests/qapi-schema/flat-union-string-discriminator.out b/tests/qapi-schema/union-bad-discriminator.out
similarity index 100%
rename from tests/qapi-schema/flat-union-string-discriminator.out
rename to tests/qapi-schema/union-bad-discriminator.out
diff --git a/tests/qapi-schema/union-base-any.err b/tests/qapi-schema/union-base-any.err
new file mode 100644
index 00000000000..82b48bc1c8d
--- /dev/null
+++ b/tests/qapi-schema/union-base-any.err
@@ -0,0 +1,2 @@
+union-base-any.json: In union 'TestUnion':
+union-base-any.json:8: 'base' requires a struct type, built-in type 'any' isn't
diff --git a/tests/qapi-schema/flat-union-base-any.json b/tests/qapi-schema/union-base-any.json
similarity index 100%
rename from tests/qapi-schema/flat-union-base-any.json
rename to tests/qapi-schema/union-base-any.json
diff --git a/tests/qapi-schema/reserved-type-kind.out b/tests/qapi-schema/union-base-any.out
similarity index 100%
rename from tests/qapi-schema/reserved-type-kind.out
rename to tests/qapi-schema/union-base-any.out
diff --git a/tests/qapi-schema/union-base-empty.json b/tests/qapi-schema/union-base-empty.json
index d1843d33b47..6f8ef000dbb 100644
--- a/tests/qapi-schema/union-base-empty.json
+++ b/tests/qapi-schema/union-base-empty.json
@@ -1,4 +1,4 @@
-# Flat union with empty base and therefore without discriminator
+# Union with empty base and therefore without discriminator
 
 { 'struct': 'Empty', 'data': { } }
 
diff --git a/tests/qapi-schema/union-base-no-discriminator.err b/tests/qapi-schema/union-base-no-discriminator.err
index 9cd5d11b0bb..a730b7fd3cf 100644
--- a/tests/qapi-schema/union-base-no-discriminator.err
+++ b/tests/qapi-schema/union-base-no-discriminator.err
@@ -1,2 +1,2 @@
 union-base-no-discriminator.json: In union 'TestUnion':
-union-base-no-discriminator.json:11: 'base' requires 'discriminator'
+union-base-no-discriminator.json:11: union misses key 'discriminator'
diff --git a/tests/qapi-schema/union-base-no-discriminator.json b/tests/qapi-schema/union-base-no-discriminator.json
index 1409cf5c9e0..2e7cae9b228 100644
--- a/tests/qapi-schema/union-base-no-discriminator.json
+++ b/tests/qapi-schema/union-base-no-discriminator.json
@@ -1,4 +1,4 @@
-# we reject simple unions with a base (or flat unions without discriminator)
+# we reject unions without discriminator
 { 'struct': 'TestTypeA',
   'data': { 'string': 'str' } }
 
diff --git a/tests/qapi-schema/union-base-union.err b/tests/qapi-schema/union-base-union.err
new file mode 100644
index 00000000000..2bddaf6a845
--- /dev/null
+++ b/tests/qapi-schema/union-base-union.err
@@ -0,0 +1,2 @@
+union-base-union.json: In union 'TestUnion':
+union-base-union.json:17: 'base' requires a struct type, union type 'UnionBase' isn't
diff --git a/tests/qapi-schema/union-base-union.json b/tests/qapi-schema/union-base-union.json
new file mode 100644
index 00000000000..82d4c96e570
--- /dev/null
+++ b/tests/qapi-schema/union-base-union.json
@@ -0,0 +1,21 @@
+# For now, we require the base to be a struct without variants
+# TODO: It would be possible to allow a union as a base, as long as all
+# permutations of QMP names exposed by base do not clash with any QMP
+# member names added by local variants.
+{ 'enum': 'TestEnum',
+  'data': [ 'value1', 'value2' ] }
+{ 'struct': 'TestTypeA',
+  'data': { 'string': 'str' } }
+{ 'struct': 'TestTypeB',
+  'data': { 'integer': 'int' } }
+{ 'enum': 'Enum', 'data': [ 'kind1', 'kind2' ] }
+{ 'union': 'UnionBase',
+  'base': { 'type': 'Enum' },
+  'discriminator': 'type',
+  'data': { 'kind1': 'TestTypeA',
+            'kind2': 'TestTypeB' } }
+{ 'union': 'TestUnion',
+  'base': 'UnionBase',
+  'discriminator': 'type',
+  'data': { 'kind1': 'TestTypeA',
+            'kind2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/union-branch-case.out b/tests/qapi-schema/union-base-union.out
similarity index 100%
rename from tests/qapi-schema/union-branch-case.out
rename to tests/qapi-schema/union-base-union.out
diff --git a/tests/qapi-schema/union-branch-case.err b/tests/qapi-schema/union-branch-case.err
deleted file mode 100644
index d2d5cb8993a..00000000000
--- a/tests/qapi-schema/union-branch-case.err
+++ /dev/null
@@ -1,2 +0,0 @@
-union-branch-case.json: In union 'Uni':
-union-branch-case.json:2: name of 'data' member 'Branch' must not use uppercase or '_'
diff --git a/tests/qapi-schema/union-branch-case.json b/tests/qapi-schema/union-branch-case.json
deleted file mode 100644
index b7894b75d60..00000000000
--- a/tests/qapi-schema/union-branch-case.json
+++ /dev/null
@@ -1,2 +0,0 @@
-# Branch names should be 'lower-case'
-{ 'union': 'Uni', 'data': { 'Branch': 'int' } }
diff --git a/tests/qapi-schema/union-branch-if-invalid.err b/tests/qapi-schema/union-branch-if-invalid.err
index dd4518233e6..046187a5b96 100644
--- a/tests/qapi-schema/union-branch-if-invalid.err
+++ b/tests/qapi-schema/union-branch-if-invalid.err
@@ -1,2 +1,2 @@
 union-branch-if-invalid.json: In union 'Uni':
-union-branch-if-invalid.json:4: 'if' condition '' of 'data' member 'branch1' makes no sense
+union-branch-if-invalid.json:4: 'if' condition '' of 'data' member 'branch1' is not a valid identifier
diff --git a/tests/qapi-schema/union-branch-if-invalid.json b/tests/qapi-schema/union-branch-if-invalid.json
index 46d4239af6f..c41633856f4 100644
--- a/tests/qapi-schema/union-branch-if-invalid.json
+++ b/tests/qapi-schema/union-branch-if-invalid.json
@@ -3,4 +3,4 @@
 { 'struct': 'Stru', 'data': { 'member': 'str' } }
 { 'union': 'Uni',
   'base': { 'tag': 'Branches' }, 'discriminator': 'tag',
-  'data': { 'branch1': { 'type': 'Stru', 'if': [''] } } }
+  'data': { 'branch1': { 'type': 'Stru', 'if': { 'all': [''] } } } }
diff --git a/tests/qapi-schema/union-branch-invalid-dict.err b/tests/qapi-schema/union-branch-invalid-dict.err
index 8137c5a7675..001cdec0691 100644
--- a/tests/qapi-schema/union-branch-invalid-dict.err
+++ b/tests/qapi-schema/union-branch-invalid-dict.err
@@ -1,2 +1,2 @@
 union-branch-invalid-dict.json: In union 'UnionInvalidBranch':
-union-branch-invalid-dict.json:2: 'data' member 'integer' misses key 'type'
+union-branch-invalid-dict.json:4: 'data' member 'integer' misses key 'type'
diff --git a/tests/qapi-schema/union-branch-invalid-dict.json b/tests/qapi-schema/union-branch-invalid-dict.json
index 9778598dbd4..c7c81c0e000 100644
--- a/tests/qapi-schema/union-branch-invalid-dict.json
+++ b/tests/qapi-schema/union-branch-invalid-dict.json
@@ -1,4 +1,8 @@
 # Long form of member must have a value member 'type'
+{ 'enum': 'TestEnum',
+  'data': [ 'integer', 's8' ] }
 { 'union': 'UnionInvalidBranch',
+  'base': { 'type': 'TestEnum' },
+  'discriminator': 'type',
   'data': { 'integer': { 'if': 'foo'},
             's8': 'int8' } }
diff --git a/tests/qapi-schema/union-clash-branches.err b/tests/qapi-schema/union-clash-branches.err
deleted file mode 100644
index ef536457281..00000000000
--- a/tests/qapi-schema/union-clash-branches.err
+++ /dev/null
@@ -1,2 +0,0 @@
-union-clash-branches.json: In union 'TestUnion':
-union-clash-branches.json:6: name of 'data' member 'a_b' must not use uppercase or '_'
diff --git a/tests/qapi-schema/union-clash-branches.json b/tests/qapi-schema/union-clash-branches.json
deleted file mode 100644
index 7bdda0b0da8..00000000000
--- a/tests/qapi-schema/union-clash-branches.json
+++ /dev/null
@@ -1,7 +0,0 @@
-# Union branch name collision
-# Naming rules make collision impossible (even with the pragma).  If
-# that wasn't the case, then we'd get collisions in generated C: two
-# union members a_b, and two enum members TEST_UNION_A_B.
-{ 'pragma': { 'member-name-exceptions': [ 'TestUnion' ] } }
-{ 'union': 'TestUnion',
-  'data': { 'a-b': 'int', 'a_b': 'str' } }
diff --git a/tests/qapi-schema/union-clash-member.err b/tests/qapi-schema/union-clash-member.err
new file mode 100644
index 00000000000..c1f3a025528
--- /dev/null
+++ b/tests/qapi-schema/union-clash-member.err
@@ -0,0 +1,2 @@
+union-clash-member.json: In union 'TestUnion':
+union-clash-member.json:11: member 'name' of type 'Branch1' collides with member 'name' of type 'Base'
diff --git a/tests/qapi-schema/flat-union-clash-member.json b/tests/qapi-schema/union-clash-member.json
similarity index 100%
rename from tests/qapi-schema/flat-union-clash-member.json
rename to tests/qapi-schema/union-clash-member.json
diff --git a/tests/qapi-schema/union-clash-branches.out b/tests/qapi-schema/union-clash-member.out
similarity index 100%
rename from tests/qapi-schema/union-clash-branches.out
rename to tests/qapi-schema/union-clash-member.out
diff --git a/tests/qapi-schema/union-discriminator-bad-name.err b/tests/qapi-schema/union-discriminator-bad-name.err
new file mode 100644
index 00000000000..5793e9af661
--- /dev/null
+++ b/tests/qapi-schema/union-discriminator-bad-name.err
@@ -0,0 +1,2 @@
+union-discriminator-bad-name.json: In union 'MyUnion':
+union-discriminator-bad-name.json:6: discriminator '*switch' is not a member of 'base'
diff --git a/tests/qapi-schema/flat-union-discriminator-bad-name.json b/tests/qapi-schema/union-discriminator-bad-name.json
similarity index 100%
rename from tests/qapi-schema/flat-union-discriminator-bad-name.json
rename to tests/qapi-schema/union-discriminator-bad-name.json
diff --git a/tests/qapi-schema/union-optional-branch.out b/tests/qapi-schema/union-discriminator-bad-name.out
similarity index 100%
rename from tests/qapi-schema/union-optional-branch.out
rename to tests/qapi-schema/union-discriminator-bad-name.out
diff --git a/tests/qapi-schema/union-empty.err b/tests/qapi-schema/union-empty.err
index 59788c94cea..d4284399621 100644
--- a/tests/qapi-schema/union-empty.err
+++ b/tests/qapi-schema/union-empty.err
@@ -1,2 +1,2 @@
 union-empty.json: In union 'Union':
-union-empty.json:2: union has no branches
+union-empty.json:4: union has no branches
diff --git a/tests/qapi-schema/union-empty.json b/tests/qapi-schema/union-empty.json
index df3e5e639a5..584ed6098c9 100644
--- a/tests/qapi-schema/union-empty.json
+++ b/tests/qapi-schema/union-empty.json
@@ -1,2 +1,4 @@
-# simple unions cannot be empty
-{ 'union': 'Union', 'data': { } }
+# union discriminator enum cannot be empty
+{ 'enum': 'Empty', 'data': [ ] }
+{ 'struct': 'Base', 'data': { 'type': 'Empty' } }
+{ 'union': 'Union', 'base': 'Base', 'discriminator': 'type', 'data': { } }
diff --git a/tests/qapi-schema/union-inline-invalid-dict.err b/tests/qapi-schema/union-inline-invalid-dict.err
new file mode 100644
index 00000000000..25ddf7c7653
--- /dev/null
+++ b/tests/qapi-schema/union-inline-invalid-dict.err
@@ -0,0 +1,2 @@
+union-inline-invalid-dict.json: In union 'TestUnion':
+union-inline-invalid-dict.json:7: 'data' member 'value1' misses key 'type'
diff --git a/tests/qapi-schema/flat-union-inline-invalid-dict.json b/tests/qapi-schema/union-inline-invalid-dict.json
similarity index 100%
rename from tests/qapi-schema/flat-union-inline-invalid-dict.json
rename to tests/qapi-schema/union-inline-invalid-dict.json
diff --git a/tests/qapi-schema/union-inline-invalid-dict.out b/tests/qapi-schema/union-inline-invalid-dict.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-int-branch.err b/tests/qapi-schema/union-int-branch.err
new file mode 100644
index 00000000000..8fdc81edd1e
--- /dev/null
+++ b/tests/qapi-schema/union-int-branch.err
@@ -0,0 +1,2 @@
+union-int-branch.json: In union 'TestUnion':
+union-int-branch.json:8: branch 'value1' cannot use built-in type 'int'
diff --git a/tests/qapi-schema/union-int-branch.json b/tests/qapi-schema/union-int-branch.json
new file mode 100644
index 00000000000..567043d9d21
--- /dev/null
+++ b/tests/qapi-schema/union-int-branch.json
@@ -0,0 +1,12 @@
+# we require union branches to be a struct
+{ 'enum': 'TestEnum',
+  'data': [ 'value1', 'value2' ] }
+{ 'struct': 'Base',
+  'data': { 'enum1': 'TestEnum' } }
+{ 'struct': 'TestTypeB',
+  'data': { 'integer': 'int' } }
+{ 'union': 'TestUnion',
+  'base': 'Base',
+  'discriminator': 'enum1',
+  'data': { 'value1': 'int',
+            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/union-int-branch.out b/tests/qapi-schema/union-int-branch.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-invalid-branch-key.err b/tests/qapi-schema/union-invalid-branch-key.err
new file mode 100644
index 00000000000..bf588005077
--- /dev/null
+++ b/tests/qapi-schema/union-invalid-branch-key.err
@@ -0,0 +1,2 @@
+union-invalid-branch-key.json: In union 'TestUnion':
+union-invalid-branch-key.json:13: branch 'value_wrong' is not a value of enum type 'TestEnum'
diff --git a/tests/qapi-schema/flat-union-invalid-branch-key.json b/tests/qapi-schema/union-invalid-branch-key.json
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-branch-key.json
rename to tests/qapi-schema/union-invalid-branch-key.json
diff --git a/tests/qapi-schema/union-invalid-branch-key.out b/tests/qapi-schema/union-invalid-branch-key.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-invalid-data.err b/tests/qapi-schema/union-invalid-data.err
new file mode 100644
index 00000000000..e26cf769a3e
--- /dev/null
+++ b/tests/qapi-schema/union-invalid-data.err
@@ -0,0 +1,2 @@
+union-invalid-data.json: In union 'TestUnion':
+union-invalid-data.json:2: 'data' must be an object
diff --git a/tests/qapi-schema/union-invalid-data.json b/tests/qapi-schema/union-invalid-data.json
new file mode 100644
index 00000000000..395ba24d395
--- /dev/null
+++ b/tests/qapi-schema/union-invalid-data.json
@@ -0,0 +1,6 @@
+# the union data type must be an object.
+{ 'union': 'TestUnion',
+  'base': 'int',
+  'discriminator': 'int',
+  'data': ['rubbish', 'nonsense']
+}
diff --git a/tests/qapi-schema/union-invalid-data.out b/tests/qapi-schema/union-invalid-data.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-invalid-discriminator.err b/tests/qapi-schema/union-invalid-discriminator.err
new file mode 100644
index 00000000000..38efb24b98c
--- /dev/null
+++ b/tests/qapi-schema/union-invalid-discriminator.err
@@ -0,0 +1,2 @@
+union-invalid-discriminator.json: In union 'TestUnion':
+union-invalid-discriminator.json:10: discriminator 'enum_wrong' is not a member of 'base'
diff --git a/tests/qapi-schema/flat-union-invalid-discriminator.json b/tests/qapi-schema/union-invalid-discriminator.json
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-discriminator.json
rename to tests/qapi-schema/union-invalid-discriminator.json
diff --git a/tests/qapi-schema/union-invalid-discriminator.out b/tests/qapi-schema/union-invalid-discriminator.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-invalid-if-discriminator.err b/tests/qapi-schema/union-invalid-if-discriminator.err
new file mode 100644
index 00000000000..3f41d03f8e4
--- /dev/null
+++ b/tests/qapi-schema/union-invalid-if-discriminator.err
@@ -0,0 +1,2 @@
+union-invalid-if-discriminator.json: In union 'TestUnion':
+union-invalid-if-discriminator.json:10: discriminator member 'enum1' of 'base' must not be conditional
diff --git a/tests/qapi-schema/flat-union-invalid-if-discriminator.json b/tests/qapi-schema/union-invalid-if-discriminator.json
similarity index 100%
rename from tests/qapi-schema/flat-union-invalid-if-discriminator.json
rename to tests/qapi-schema/union-invalid-if-discriminator.json
diff --git a/tests/qapi-schema/union-invalid-if-discriminator.out b/tests/qapi-schema/union-invalid-if-discriminator.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-no-base.err b/tests/qapi-schema/union-no-base.err
new file mode 100644
index 00000000000..cbf12ac5265
--- /dev/null
+++ b/tests/qapi-schema/union-no-base.err
@@ -0,0 +1,2 @@
+union-no-base.json: In union 'TestUnion':
+union-no-base.json:8: union misses key 'base'
diff --git a/tests/qapi-schema/union-no-base.json b/tests/qapi-schema/union-no-base.json
new file mode 100644
index 00000000000..f6fe12da3bd
--- /dev/null
+++ b/tests/qapi-schema/union-no-base.json
@@ -0,0 +1,11 @@
+# unions require a base
+{ 'struct': 'TestTypeA',
+  'data': { 'string': 'str' } }
+{ 'struct': 'TestTypeB',
+  'data': { 'integer': 'int' } }
+{ 'enum': 'Enum',
+  'data': [ 'value1', 'value2' ] }
+{ 'union': 'TestUnion',
+  'discriminator': 'Enum',
+  'data': { 'value1': 'TestTypeA',
+            'value2': 'TestTypeB' } }
diff --git a/tests/qapi-schema/union-no-base.out b/tests/qapi-schema/union-no-base.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-optional-branch.err b/tests/qapi-schema/union-optional-branch.err
deleted file mode 100644
index b33f111de49..00000000000
--- a/tests/qapi-schema/union-optional-branch.err
+++ /dev/null
@@ -1,2 +0,0 @@
-union-optional-branch.json: In union 'Union':
-union-optional-branch.json:2: 'data' member '*a' has an invalid name
diff --git a/tests/qapi-schema/union-optional-branch.json b/tests/qapi-schema/union-optional-branch.json
deleted file mode 100644
index 591615fc689..00000000000
--- a/tests/qapi-schema/union-optional-branch.json
+++ /dev/null
@@ -1,2 +0,0 @@
-# union branches cannot be optional
-{ 'union': 'Union', 'data': { '*a': 'int', 'b': 'str' } }
diff --git a/tests/qapi-schema/union-optional-discriminator.err b/tests/qapi-schema/union-optional-discriminator.err
new file mode 100644
index 00000000000..8d980bd2ac3
--- /dev/null
+++ b/tests/qapi-schema/union-optional-discriminator.err
@@ -0,0 +1,2 @@
+union-optional-discriminator.json: In union 'MyUnion':
+union-optional-discriminator.json:6: discriminator member 'switch' of base type 'Base' must not be optional
diff --git a/tests/qapi-schema/flat-union-optional-discriminator.json b/tests/qapi-schema/union-optional-discriminator.json
similarity index 100%
rename from tests/qapi-schema/flat-union-optional-discriminator.json
rename to tests/qapi-schema/union-optional-discriminator.json
diff --git a/tests/qapi-schema/union-optional-discriminator.out b/tests/qapi-schema/union-optional-discriminator.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-string-discriminator.err b/tests/qapi-schema/union-string-discriminator.err
new file mode 100644
index 00000000000..eccbe681bdf
--- /dev/null
+++ b/tests/qapi-schema/union-string-discriminator.err
@@ -0,0 +1,2 @@
+union-string-discriminator.json: In union 'TestUnion':
+union-string-discriminator.json:13: discriminator member 'kind' of base type 'TestBase' must be of enum type
diff --git a/tests/qapi-schema/flat-union-string-discriminator.json b/tests/qapi-schema/union-string-discriminator.json
similarity index 100%
rename from tests/qapi-schema/flat-union-string-discriminator.json
rename to tests/qapi-schema/union-string-discriminator.json
diff --git a/tests/qapi-schema/union-string-discriminator.out b/tests/qapi-schema/union-string-discriminator.out
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/qapi-schema/union-unknown.err b/tests/qapi-schema/union-unknown.err
index 7aba9f94dad..dad79beae0c 100644
--- a/tests/qapi-schema/union-unknown.err
+++ b/tests/qapi-schema/union-unknown.err
@@ -1,2 +1,2 @@
 union-unknown.json: In union 'Union':
-union-unknown.json:2: union uses unknown type 'MissingType'
+union-unknown.json:3: branch 'unknown' uses unknown type 'MissingType'
diff --git a/tests/qapi-schema/union-unknown.json b/tests/qapi-schema/union-unknown.json
index 64d36661761..4736f1ab084 100644
--- a/tests/qapi-schema/union-unknown.json
+++ b/tests/qapi-schema/union-unknown.json
@@ -1,3 +1,6 @@
 # we reject a union with unknown type in branch
+{ 'enum': 'Enum', 'data': [ 'unknown' ] }
 { 'union': 'Union',
-  'data': { 'unknown': ['MissingType'] } }
+  'base': { 'type': 'Enum' },
+  'discriminator': 'type',
+  'data': { 'unknown': 'MissingType' } }
diff --git a/tests/qemu-iotests/005 b/tests/qemu-iotests/005
index 40e64a9a8f7..ba377543b01 100755
--- a/tests/qemu-iotests/005
+++ b/tests/qemu-iotests/005
@@ -52,11 +52,6 @@ if [ "$IMGFMT" = "vpc" ]; then
     _notrun "image format $IMGFMT does not support large image sizes"
 fi
 
-# sheepdog image is limited to 4TB, so we can't test it here
-if [ "$IMGPROTO" = "sheepdog" ]; then
-    _notrun "image protocol $IMGPROTO does not support large image sizes"
-fi
-
 # Sanity check: For raw, we require a file system that permits the creation
 # of a HUGE (but very sparse) file. Check we can create it before continuing.
 if [ "$IMGFMT" = "raw" ]; then
diff --git a/tests/qemu-iotests/025 b/tests/qemu-iotests/025
index da77ed31542..80686a30d5c 100755
--- a/tests/qemu-iotests/025
+++ b/tests/qemu-iotests/025
@@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.pattern
 
 _supported_fmt raw qcow2 qed luks
-_supported_proto file sheepdog rbd nfs fuse
+_supported_proto file rbd nfs fuse
 
 echo "=== Creating image"
 echo
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 5fb65b4bef5..567bf1da67b 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase):
                                  speed=1024)
             self.assert_qmp(result, 'return', {})
 
-        for job in pending_jobs:
+        # Do this in reverse: After unthrottling them, some jobs may finish
+        # before we have unthrottled all of them.  This will drain their
+        # subgraph, and this will make jobs above them advance (despite those
+        # jobs on top being throttled).  In the worst case, all jobs below the
+        # top one are finished before we can unthrottle it, and this makes it
+        # advance so far that it completes before we can unthrottle it - which
+        # results in an error.
+        # Starting from the top (i.e. in reverse) does not have this problem:
+        # When a job finishes, the ones below it are not advanced.
+        for job in reversed(pending_jobs):
             result = self.vm.qmp('block-job-set-speed', device=job, speed=0)
             self.assert_qmp(result, 'return', {})
 
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index ba7cb34ce8c..6af5ab9e764 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -92,10 +92,9 @@ class TestSingleDrive(ImageCommitTestCase):
         self.vm.add_device('virtio-scsi')
         self.vm.add_device("scsi-hd,id=scsi0,drive=drive0")
         self.vm.launch()
-        self.has_quit = False
 
     def tearDown(self):
-        self.vm.shutdown(has_quit=self.has_quit)
+        self.vm.shutdown()
         os.remove(test_img)
         os.remove(mid_img)
         os.remove(backing_img)
@@ -127,8 +126,6 @@ class TestSingleDrive(ImageCommitTestCase):
         result = self.vm.qmp('quit')
         self.assert_qmp(result, 'return', {})
 
-        self.has_quit = True
-
     # Same as above, but this time we add the filter after starting the job
     @iotests.skip_if_unsupported(['throttle'])
     def test_commit_plus_filter_and_quit(self):
@@ -147,8 +144,6 @@ class TestSingleDrive(ImageCommitTestCase):
         result = self.vm.qmp('quit')
         self.assert_qmp(result, 'return', {})
 
-        self.has_quit = True
-
     def test_device_not_found(self):
         result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
@@ -920,8 +915,8 @@ class TestCommitWithOverriddenBacking(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, self.img_base_a, '1M')
         qemu_img('create', '-f', iotests.imgfmt, self.img_base_b, '1M')
-        qemu_img('create', '-f', iotests.imgfmt, '-b', self.img_base_a, \
-                 self.img_top)
+        qemu_img('create', '-f', iotests.imgfmt, '-b', self.img_base_a,
+                 '-F', iotests.imgfmt, self.img_top)
 
         self.vm = iotests.VM()
         self.vm.launch()
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 5cc02b24fc7..db9f5dc540e 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -1295,8 +1295,10 @@ class TestReplaces(iotests.QMPTestCase):
 class TestFilters(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
-        qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, test_img)
-        qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, target_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img,
+                 '-F', iotests.imgfmt, test_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img,
+                 '-F', iotests.imgfmt, target_img)
 
         qemu_io('-c', 'write -P 1 0 512k', backing_img)
         qemu_io('-c', 'write -P 2 512k 512k', test_img)
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 01f7b1f2408..8719c91b483 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -174,11 +174,11 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp
 
 qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter '0.42'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value '0.42'
 
 qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter 'foobar'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value 'foobar'
 
 == Check preallocation option ==
 
@@ -190,7 +190,7 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off prea
 
 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter '1234'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'preallocation' does not accept value '1234'
 
 == Check encryption option ==
 
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index 7bf29343d72..1d2fa93a115 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -199,7 +199,7 @@ case "$QEMU_DEFAULT_MACHINE" in
         # virtio-blk enables the iothread only when the driver initialises the
         # device, so a second virtio-blk device can't be added even with the
         # same iothread. virtio-scsi allows this.
-        run_qemu $iothread -device virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on
+        run_qemu $iothread -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
         run_qemu $iothread -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
         ;;
      *)
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index afe76329643..063e4fc5843 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -183,9 +183,9 @@ Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id
 QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend
 
-Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on
+Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iothread=iothread0,share-rw=on: Cannot change iothread of active block backend
+(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iothread=thread0,share-rw=on: Cannot change iothread of active block backend
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on
 QEMU X.Y.Z monitor - type 'help' for more information
diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index e26d94a0df3..9507c223bda 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -167,6 +167,9 @@ _make_test_img -o "compat=1.1" 64M
 TEST_IMG="$TEST_IMG.base" _make_test_img -o "compat=1.1" 64M
 $QEMU_IO -c "write -P 0x2a 0 128k" "$TEST_IMG.base" | _filter_qemu_io
 $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG amend -o "backing_file=$TEST_IMG.base,backing_fmt=qcow2" \
+	  "$TEST_IMG" && echo "unexpected pass"
+$QEMU_IMG rebase -u -b "$TEST_IMG.base" -F qcow2 "$TEST_IMG"
 $QEMU_IMG amend -o "backing_file=$TEST_IMG.base,backing_fmt=qcow2" "$TEST_IMG"
 $QEMU_IO -c "read -P 0x2a 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index ee30da26651..7ecbd4dea87 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -370,7 +370,8 @@ wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead
+qemu-img: Cannot amend the backing file
+You can use 'qemu-img rebase' instead.
 read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out
index b70c12c1393..077ed0f2c7d 100644
--- a/tests/qemu-iotests/082.out
+++ b/tests/qemu-iotests/082.out
@@ -808,12 +808,14 @@ Amend options for 'qcow2':
   size=<size>            - Virtual disk size
 
 Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2
-qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead
+qemu-img: Cannot amend the backing file
+You can use 'qemu-img rebase' instead.
 
 Testing: rebase -u -b  -f qcow2 TEST_DIR/t.qcow2
 
 Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,? TEST_DIR/t.qcow2
-qemu-img: warning: Deprecated use of amend to alter the backing file; use qemu-img rebase instead
+qemu-img: Cannot amend the backing file
+You can use 'qemu-img rebase' instead.
 
 Testing: rebase -u -b  -f qcow2 TEST_DIR/t.qcow2
 
diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085
index d557522943d..de74262a265 100755
--- a/tests/qemu-iotests/085
+++ b/tests/qemu-iotests/085
@@ -103,11 +103,18 @@ do_blockdev_add()
 }
 
 # ${1}: unique identifier for the snapshot filename
-add_snapshot_image()
+create_snapshot_image()
 {
     base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}"
     snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}"
     TEST_IMG=$snapshot_file _make_test_img -u -b "${base_image}" -F $IMGFMT "$size"
+}
+
+# ${1}: unique identifier for the snapshot filename
+add_snapshot_image()
+{
+    snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}"
+    create_snapshot_image "$1"
     do_blockdev_add "$1" "'backing': null, " "${snapshot_file}"
 }
 
@@ -230,6 +237,28 @@ _make_test_img -b "${TEST_IMG}.base" -F $IMGFMT "$size"
 do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}"
 blockdev_snapshot ${SNAPSHOTS} error
 
+echo
+echo === Invalid command - creating loops ===
+echo
+
+SNAPSHOTS=$((${SNAPSHOTS}+1))
+add_snapshot_image ${SNAPSHOTS}
+
+_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot',
+                     'arguments': { 'node':'snap_${SNAPSHOTS}',
+                                    'overlay':'snap_${SNAPSHOTS}' }
+                   }" "error"
+
+SNAPSHOTS=$((${SNAPSHOTS}+1))
+create_snapshot_image ${SNAPSHOTS}
+do_blockdev_add ${SNAPSHOTS} "'backing': 'snap_$((${SNAPSHOTS}-1))', " \
+    "${TEST_DIR}/${SNAPSHOTS}-${snapshot_virt0}"
+
+_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot',
+                     'arguments': { 'node':'snap_${SNAPSHOTS}',
+                                    'overlay':'snap_$((${SNAPSHOTS}-1))' }
+                   }" "error"
+
 echo
 echo === Invalid command - The node does not exist ===
 echo
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 1d4c565b6dd..b543b992ffd 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -217,15 +217,42 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
                                      'overlay':'snap_13' } }
 {"error": {"class": "GenericError", "desc": "The overlay already has a backing image"}}
 
+=== Invalid command - creating loops ===
+
+Formatting 'TEST_DIR/14-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/13-snapshot-v0.IMGFMT backing_fmt=IMGFMT
+{ 'execute': 'blockdev-add', 'arguments':
+           { 'driver': 'IMGFMT', 'node-name': 'snap_14', 'backing': null,
+             'file':
+             { 'driver': 'file', 'filename': 'TEST_DIR/14-snapshot-v0.IMGFMT',
+               'node-name': 'file_14' } } }
+{"return": {}}
+{ 'execute': 'blockdev-snapshot',
+                     'arguments': { 'node':'snap_14',
+                                    'overlay':'snap_14' }
+                   }
+{"error": {"class": "GenericError", "desc": "Making 'snap_14' a backing child of 'snap_14' would create a cycle"}}
+Formatting 'TEST_DIR/15-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/14-snapshot-v0.IMGFMT backing_fmt=IMGFMT
+{ 'execute': 'blockdev-add', 'arguments':
+           { 'driver': 'IMGFMT', 'node-name': 'snap_15', 'backing': 'snap_14',
+             'file':
+             { 'driver': 'file', 'filename': 'TEST_DIR/15-snapshot-v0.IMGFMT',
+               'node-name': 'file_15' } } }
+{"return": {}}
+{ 'execute': 'blockdev-snapshot',
+                     'arguments': { 'node':'snap_15',
+                                    'overlay':'snap_14' }
+                   }
+{"error": {"class": "GenericError", "desc": "Making 'snap_15' a backing child of 'snap_14' would create a cycle"}}
+
 === Invalid command - The node does not exist ===
 
 { 'execute': 'blockdev-snapshot',
                       'arguments': { 'node': 'virtio0',
-                                     'overlay':'snap_14' } }
-{"error": {"class": "GenericError", "desc": "Cannot find device='snap_14' nor node-name='snap_14'"}}
+                                     'overlay':'snap_16' } }
+{"error": {"class": "GenericError", "desc": "Cannot find device='snap_16' nor node-name='snap_16'"}}
 { 'execute': 'blockdev-snapshot',
                      'arguments': { 'node':'nodevice',
-                                    'overlay':'snap_13' }
+                                    'overlay':'snap_15' }
                    }
 {"error": {"class": "GenericError", "desc": "Cannot find device='nodevice' nor node-name='nodevice'"}}
 *** done
diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out
index 8f839b4b7fd..e29280015e7 100644
--- a/tests/qemu-iotests/109.out
+++ b/tests/qemu-iotests/109.out
@@ -44,9 +44,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -95,9 +94,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -146,9 +144,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -197,9 +194,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -248,9 +244,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -299,9 +294,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -349,9 +343,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -399,9 +392,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -449,9 +441,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -499,9 +490,8 @@ read 512/512 bytes at offset 0
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -529,9 +519,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
@@ -552,9 +541,8 @@ Images are identical.
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "src"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}}
 Images are identical.
diff --git a/tests/qemu-iotests/114 b/tests/qemu-iotests/114
index 43cb0bc6c34..de6fd327eea 100755
--- a/tests/qemu-iotests/114
+++ b/tests/qemu-iotests/114
@@ -44,16 +44,16 @@ _supported_os Linux
 # qcow2.py does not work too well with external data files
 _unsupported_imgopts data_file
 
-# Intentionally specify backing file without backing format; demonstrate
-# the difference in warning messages when backing file could be probed.
-# Note that only a non-raw probe result will affect the resulting image.
+# Older qemu-img could set up backing file without backing format; modern
+# qemu can't but we can use qcow2.py to simulate older files.
 truncate -s $((64 * 1024 * 1024)) "$TEST_IMG.orig"
-_make_test_img -b "$TEST_IMG.orig" 64M
+_make_test_img -b "$TEST_IMG.orig" -F raw 64M
+$PYTHON qcow2.py "$TEST_IMG" del-header-ext 0xE2792ACA
 
 TEST_IMG="$TEST_IMG.base" _make_test_img 64M
 $QEMU_IMG convert -O qcow2 -B "$TEST_IMG.orig" "$TEST_IMG.orig" "$TEST_IMG"
-_make_test_img -b "$TEST_IMG.base" 64M
-_make_test_img -u -b "$TEST_IMG.base" 64M
+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT 64M
+_make_test_img -u -b "$TEST_IMG.base" -F $IMGFMT 64M
 
 # Set an invalid backing file format
 $PYTHON qcow2.py "$TEST_IMG" add-header-ext 0xE2792ACA "foo"
@@ -64,9 +64,9 @@ _img_info
 $QEMU_IO -c "open $TEST_IMG" -c "read 0 4k" 2>&1 | _filter_qemu_io | _filter_testdir
 $QEMU_IO -c "open -o backing.driver=$IMGFMT $TEST_IMG" -c "read 0 4k" | _filter_qemu_io
 
-# Rebase the image, to show that omitting backing format triggers a warning,
-# but probing now lets us use the backing file.
-$QEMU_IMG rebase -u -b "$TEST_IMG.base" "$TEST_IMG"
+# Rebase the image, to show that backing format is required.
+($QEMU_IMG rebase -u -b "$TEST_IMG.base" "$TEST_IMG" 2>&1 && echo "unexpected pass") | _filter_testdir
+$QEMU_IMG rebase -u -b "$TEST_IMG.base" -F $IMGFMT "$TEST_IMG"
 $QEMU_IO -c "open $TEST_IMG" -c "read 0 4k" 2>&1 | _filter_qemu_io | _filter_testdir
 
 # success, all done
diff --git a/tests/qemu-iotests/114.out b/tests/qemu-iotests/114.out
index 0a37d20c82a..f51dd9d20a1 100644
--- a/tests/qemu-iotests/114.out
+++ b/tests/qemu-iotests/114.out
@@ -1,12 +1,9 @@
 QA output created by 114
-qemu-img: warning: Deprecated use of backing file without explicit backing format (detected format of raw)
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.orig
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.orig backing_fmt=raw
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-qemu-img: warning: Deprecated use of backing file without explicit backing format
-qemu-img: warning: Deprecated use of backing file without explicit backing format (detected format of IMGFMT)
+qemu-img: Use of backing file requires explicit backing format
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-qemu-img: warning: Deprecated use of unopened backing file without explicit backing format, use of this image requires potentially unsafe format probing
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
 virtual size: 64 MiB (67108864 bytes)
@@ -17,7 +14,7 @@ qemu-io: can't open device TEST_DIR/t.qcow2: Could not open backing file: Unknow
 no file open, try 'help open'
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-qemu-img: warning: Deprecated use of backing file without explicit backing format, use of this image requires potentially unsafe format probing
+qemu-img: Could not change the backing file to 'TEST_DIR/t.qcow2.base': backing format must be specified
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
index 5d550ed13ea..efb260d822d 100755
--- a/tests/qemu-iotests/122
+++ b/tests/qemu-iotests/122
@@ -67,7 +67,7 @@ echo
 _make_test_img -b "$TEST_IMG".base -F $IMGFMT
 
 $QEMU_IO -c "write -P 0 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
-$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base -F $IMGFMT \
     "$TEST_IMG" "$TEST_IMG".orig
 $QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
 $QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base -o backing_fmt=$IMGFMT \
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
index 3a3e121d579..8fbdac2b39c 100644
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out
@@ -67,12 +67,12 @@ read 65536/65536 bytes at offset 4194304
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 8388608
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
-{ "start": 65536, "length": 4128768, "depth": 0, "zero": true, "data": false},
-{ "start": 4194304, "length": 65536, "depth": 0, "zero": false, "data": true},
-{ "start": 4259840, "length": 4128768, "depth": 0, "zero": true, "data": false},
-{ "start": 8388608, "length": 65536, "depth": 0, "zero": false, "data": true},
-{ "start": 8454144, "length": 4128768, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 65536, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 4194304, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 4259840, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 8388608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 8454144, "length": 4128768, "depth": 0, "present": false, "zero": true, "data": false}]
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 4194304
@@ -94,12 +94,12 @@ wrote 1024/1024 bytes at offset 1046528
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 1024/1024 bytes at offset 0
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
-{ "start": 65536, "length": 65536, "depth": 0, "zero": true, "data": false},
-{ "start": 131072, "length": 196608, "depth": 0, "zero": false, "data": true},
-{ "start": 327680, "length": 655360, "depth": 0, "zero": true, "data": false},
-{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true},
-{ "start": 1048576, "length": 1046528, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 65536, "length": 65536, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 131072, "length": 196608, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 327680, "length": 655360, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 1048576, "length": 1046528, "depth": 0, "present": false, "zero": true, "data": false}]
 read 16384/16384 bytes at offset 0
 16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 16384/16384 bytes at offset 16384
@@ -130,14 +130,14 @@ read 3145728/3145728 bytes at offset 0
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0:
 read 3145728/3145728 bytes at offset 0
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
 wrote 33554432/33554432 bytes at offset 0
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -152,7 +152,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0 with source backing file:
 read 3145728/3145728 bytes at offset 0
@@ -161,7 +161,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
 
 convert -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -170,7 +170,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 convert -c -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -179,7 +179,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true}]
 
 === Non-zero -S ===
 
@@ -194,32 +194,32 @@ wrote 1024/1024 bytes at offset 17408
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
 convert -S 4k
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12288, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20480, "length": 67088384, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}]
 
 convert -c -S 4k
-[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false}]
 
 convert -S 8k
-[{ "start": 0, "length": 24576, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 24576, "length": 67084288, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}]
 
 convert -c -S 8k
-[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
-{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 1024, "length": 7168, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 8192, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 9216, "length": 8192, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "present": false, "zero": true, "data": false}]
 
 === -n to a non-zero image ===
 
@@ -233,18 +233,18 @@ Images are identical.
 
 Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": false, "zero": true, "data": false}]
 
 === -n to an empty image with a backing file ===
 
 Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
 
 === -n -B to an image without a backing file ===
 
diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 5251e2669ea..c75ec62ecf1 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -77,8 +77,8 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
         self.do_test_stop("drive-backup", device="drive0",
                           target=self.target_img, format=iotests.imgfmt,
                           sync="full",
-                          x_perf={ 'max-chunk': 65536,
-                                   'max-workers': 8 })
+                          x_perf={'max-chunk': 65536,
+                                  'max-workers': 8})
 
     def test_block_commit(self):
         # Add overlay above the source node so that we actually use a
@@ -88,13 +88,13 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
                          '1G')
 
         result = self.vm.qmp('blockdev-add', **{
-                                 'node-name': 'overlay',
-                                 'driver': iotests.imgfmt,
-                                 'file': {
-                                     'driver': 'file',
-                                     'filename': self.overlay_img
-                                 }
-                             })
+            'node-name': 'overlay',
+            'driver': iotests.imgfmt,
+            'file': {
+                'driver': 'file',
+                'filename': self.overlay_img
+            }
+        })
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('blockdev-snapshot',
diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142
index 69fd10ef510..86d65a2d1a1 100755
--- a/tests/qemu-iotests/142
+++ b/tests/qemu-iotests/142
@@ -350,6 +350,35 @@ info block backing-file"
 
 echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache"
 
+echo
+echo "--- Alignment after changing O_DIRECT ---"
+echo
+
+# Directly test the protocol level: Can unaligned requests succeed even if
+# O_DIRECT was only enabled through a reopen and vice versa?
+
+# Ensure image size is a multiple of the sector size (required for O_DIRECT)
+$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create
+
+# And write some data (not strictly necessary, but it feels better to actually
+# have something to be read)
+$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IO --cache=writeback -f file $TEST_IMG <<EOF | _filter_qemu_io
+read 42 42
+reopen -o cache.direct=on
+read 42 42
+reopen -o cache.direct=off
+read 42 42
+EOF
+$QEMU_IO --cache=none -f file $TEST_IMG <<EOF | _filter_qemu_io
+read 42 42
+reopen -o cache.direct=off
+read 42 42
+reopen -o cache.direct=on
+read 42 42
+EOF
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/142.out b/tests/qemu-iotests/142.out
index a92b948edd9..e20cfc8eb81 100644
--- a/tests/qemu-iotests/142.out
+++ b/tests/qemu-iotests/142.out
@@ -747,4 +747,22 @@ cache.no-flush=on on backing-file
     Cache mode:       writeback
     Cache mode:       writeback, direct
     Cache mode:       writeback, ignore flushes
+
+--- Alignment after changing O_DIRECT ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=file size=1048576
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 42/42 bytes at offset 42
+42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/146.out b/tests/qemu-iotests/146.out
index c67ba4ba7c1..dfd6c771407 100644
--- a/tests/qemu-iotests/146.out
+++ b/tests/qemu-iotests/146.out
@@ -2,414 +2,414 @@ QA output created by 146
 
 === Testing VPC Autodetect ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing VPC with current_size force ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing VPC with chs force ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing Hyper-V Autodetect ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing Hyper-V with current_size force ===
 
-[{ "start": 0, "length": 136365211648, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136365211648, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing Hyper-V with chs force ===
 
-[{ "start": 0, "length": 136363130880, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 136363130880, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing d2v Autodetect ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 === Testing d2v with current_size force ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 === Testing d2v with chs force ===
 
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4194304, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 6291456, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8388608, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 10485760, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 12582912, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 14680064, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 16777216, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 29360128, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 35651584, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 37748736, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 39845888, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 41943040, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 44040192, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 46137344, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 48234496, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 50331648, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 54525952, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 56623104, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 60817408, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 62914560, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65011712, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 67108864, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69206016, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 71303168, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73400320, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 75497472, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77594624, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 79691776, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 81788928, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 83886080, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 85983232, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 88080384, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 90177536, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 92274688, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 94371840, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 96468992, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 98566144, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 100663296, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 102760448, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 104857600, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 106954752, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 109051904, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 111149056, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 113246208, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 115343360, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 117440512, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 119537664, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 121634816, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 123731968, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 125829120, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 127926272, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 130023424, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 132120576, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 134217728, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 136314880, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 138412032, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 140509184, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 142606336, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 144703488, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 146800640, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 148897792, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 150994944, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 153092096, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 155189248, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 157286400, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 159383552, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 161480704, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 163577856, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 165675008, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 167772160, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 169869312, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 171966464, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 174063616, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 176160768, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 178257920, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 180355072, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 182452224, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 184549376, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 186646528, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 188743680, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 190840832, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 192937984, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 195035136, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 197132288, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 199229440, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 201326592, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 203423744, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 205520896, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 207618048, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 209715200, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 211812352, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 213909504, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 216006656, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 218103808, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 220200960, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 222298112, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 224395264, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 226492416, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 228589568, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 230686720, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 232783872, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 234881024, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 236978176, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 239075328, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 241172480, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 243269632, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 245366784, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 247463936, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 249561088, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 251658240, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 253755392, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 255852544, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 257949696, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 260046848, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144000, "length": 1310720, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 35651584, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 37748736, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 39845888, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 41943040, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 44040192, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 46137344, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 48234496, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 50331648, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 52428800, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 54525952, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 56623104, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 60817408, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 62914560, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 65011712, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 67108864, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69206016, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 71303168, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 73400320, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 75497472, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 77594624, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 79691776, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 81788928, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 83886080, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 85983232, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 88080384, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 90177536, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 92274688, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 94371840, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 96468992, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 98566144, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 100663296, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 102760448, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 104857600, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 106954752, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 109051904, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 111149056, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 113246208, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 115343360, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 117440512, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 119537664, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 121634816, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 123731968, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 125829120, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 127926272, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 130023424, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 132120576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 134217728, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 136314880, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 138412032, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 140509184, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 142606336, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 144703488, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 146800640, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 148897792, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 150994944, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 153092096, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 155189248, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 157286400, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 159383552, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 161480704, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 163577856, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 165675008, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 167772160, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 169869312, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 171966464, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 174063616, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 176160768, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 178257920, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 180355072, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 182452224, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 184549376, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 186646528, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 188743680, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 190840832, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 192937984, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 195035136, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 197132288, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 199229440, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 201326592, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 203423744, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 205520896, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 207618048, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 209715200, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 211812352, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 213909504, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 216006656, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 218103808, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 220200960, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 222298112, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 224395264, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 226492416, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 228589568, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 230686720, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 232783872, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 234881024, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 236978176, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 239075328, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 241172480, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 243269632, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 245366784, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 247463936, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 249561088, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 251658240, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 253755392, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 255852544, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 257949696, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 260046848, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 262144000, "length": 1310720, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 
 === Testing Image create, default ===
 
@@ -417,15 +417,15 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
 
 === Read created image, default opts ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Read created image, force_size_calc=chs ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Read created image, force_size_calc=current_size ====
 
-[{ "start": 0, "length": 4295467008, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4295467008, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Testing Image create, force_size ===
 
@@ -433,13 +433,13 @@ Formatting 'TEST_DIR/IMGFMT-create-test.IMGFMT', fmt=IMGFMT size=4294967296
 
 === Read created image, default opts ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Read created image, force_size_calc=chs ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Read created image, force_size_calc=current_size ====
 
-[{ "start": 0, "length": 4294967296, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 4294967296, "depth": 0, "present": true, "zero": true, "data": false}]
 *** done
diff --git a/tests/qemu-iotests/149 b/tests/qemu-iotests/149
index 9831768b9b3..b4a429ac187 100755
--- a/tests/qemu-iotests/149
+++ b/tests/qemu-iotests/149
@@ -230,6 +230,18 @@ def create_image(config, size_mb):
         fn.truncate(size_mb * 1024 * 1024)
 
 
+def check_cipher_support(config, output):
+    """Check the output of qemu-img or qemu-io for mention of the respective
+    cipher algorithm being unsupported, and if so, skip this test.
+    (Returns `output` for convenience.)"""
+
+    if 'Unsupported cipher algorithm' in output:
+        iotests.notrun('Unsupported cipher algorithm '
+                       f'{config.cipher}-{config.keylen}-{config.mode}; '
+                       'consider configuring qemu with a different crypto '
+                       'backend')
+    return output
+
 def qemu_img_create(config, size_mb):
     """Create and format a disk image with LUKS using qemu-img"""
 
@@ -253,7 +265,8 @@ def qemu_img_create(config, size_mb):
             "%dM" % size_mb]
 
     iotests.log("qemu-img " + " ".join(args), filters=[iotests.filter_test_dir])
-    iotests.log(iotests.qemu_img_pipe(*args), filters=[iotests.filter_test_dir])
+    iotests.log(check_cipher_support(config, iotests.qemu_img_pipe(*args)),
+                filters=[iotests.filter_test_dir])
 
 def qemu_io_image_args(config, dev=False):
     """Get the args for access an image or device with qemu-io"""
@@ -279,8 +292,8 @@ def qemu_io_write_pattern(config, pattern, offset_mb, size_mb, dev=False):
     args = ["-c", "write -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)]
     args.extend(qemu_io_image_args(config, dev))
     iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir])
-    iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir,
-                                                 iotests.filter_qemu_io])
+    iotests.log(check_cipher_support(config, iotests.qemu_io(*args)),
+                filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
 
 
 def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False):
@@ -291,8 +304,8 @@ def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False):
     args = ["-c", "read -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)]
     args.extend(qemu_io_image_args(config, dev))
     iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir])
-    iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir,
-                                                 iotests.filter_qemu_io])
+    iotests.log(check_cipher_support(config, iotests.qemu_io(*args)),
+                filters=[iotests.filter_test_dir, iotests.filter_qemu_io])
 
 
 def test_once(config, qemu_img=False):
diff --git a/tests/qemu-iotests/151 b/tests/qemu-iotests/151
index 182f6b5321a..93d14193d0c 100755
--- a/tests/qemu-iotests/151
+++ b/tests/qemu-iotests/151
@@ -38,8 +38,9 @@ class TestActiveMirror(iotests.QMPTestCase):
                       'if': 'none',
                       'node-name': 'source-node',
                       'driver': iotests.imgfmt,
-                      'file': {'driver': 'file',
-                               'filename': source_img}}
+                      'file': {'driver': 'blkdebug',
+                               'image': {'driver': 'file',
+                                         'filename': source_img}}}
 
         blk_target = {'node-name': 'target-node',
                       'driver': iotests.imgfmt,
@@ -141,6 +142,55 @@ class TestActiveMirror(iotests.QMPTestCase):
 
         self.potential_writes_in_flight = False
 
+    def testIntersectingActiveIO(self):
+        # Fill the source image
+        result = self.vm.hmp_qemu_io('source', 'write -P 1 0 2M')
+
+        # Start the block job (very slowly)
+        result = self.vm.qmp('blockdev-mirror',
+                             job_id='mirror',
+                             filter_node_name='mirror-node',
+                             device='source-node',
+                             target='target-node',
+                             sync='full',
+                             copy_mode='write-blocking',
+                             speed=1)
+
+        self.vm.hmp_qemu_io('source', 'break write_aio A')
+        self.vm.hmp_qemu_io('source', 'aio_write 0 1M')  # 1
+        self.vm.hmp_qemu_io('source', 'wait_break A')
+        self.vm.hmp_qemu_io('source', 'aio_write 0 2M')  # 2
+        self.vm.hmp_qemu_io('source', 'aio_write 0 2M')  # 3
+
+        # Now 2 and 3 are in mirror_wait_on_conflicts, waiting for 1
+
+        self.vm.hmp_qemu_io('source', 'break write_aio B')
+        self.vm.hmp_qemu_io('source', 'aio_write 1M 2M')  # 4
+        self.vm.hmp_qemu_io('source', 'wait_break B')
+
+        # 4 doesn't wait for 2 and 3, because they didn't yet set
+        # in_flight_bitmap. So, nothing prevents 4 to go except for our
+        # break-point B.
+
+        self.vm.hmp_qemu_io('source', 'resume A')
+
+        # Now we resumed 1, so 2 and 3 goes to the next iteration of while loop
+        # in mirror_wait_on_conflicts(). They don't exit, as bitmap is dirty
+        # due to request 4.
+        # In the past at that point 2 and 3 would wait for each other producing
+        # a dead-lock. Now this is fixed and they will wait for request 4.
+
+        self.vm.hmp_qemu_io('source', 'resume B')
+
+        # After resuming 4, one of 2 and 3 goes first and set in_flight_bitmap,
+        # so the other will wait for it.
+
+        result = self.vm.qmp('block-job-set-speed', device='mirror', speed=0)
+        self.assert_qmp(result, 'return', {})
+        self.complete_and_wait(drive='mirror')
+
+        self.potential_writes_in_flight = False
+
 
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'raw'],
diff --git a/tests/qemu-iotests/151.out b/tests/qemu-iotests/151.out
index 8d7e9967009..89968f35d78 100644
--- a/tests/qemu-iotests/151.out
+++ b/tests/qemu-iotests/151.out
@@ -1,5 +1,5 @@
-...
+....
 ----------------------------------------------------------------------
-Ran 3 tests
+Ran 4 tests
 
 OK
diff --git a/tests/qemu-iotests/154.out b/tests/qemu-iotests/154.out
index 4863e248386..1fa7ffc4757 100644
--- a/tests/qemu-iotests/154.out
+++ b/tests/qemu-iotests/154.out
@@ -11,14 +11,14 @@ wrote 2048/2048 bytes at offset 17408
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2048/2048 bytes at offset 27648
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 4096, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 12288, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 20480, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 24576, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 32768, "length": 134184960, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4096, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 12288, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 20480, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 24576, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == backing file contains non-zero data before write_zeroes ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -41,11 +41,11 @@ read 1024/1024 bytes at offset 65536
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 67584
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 28672, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 134148096, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 36864, "length": 28672, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69632, "length": 134148096, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == backing file contains non-zero data after write_zeroes ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -68,11 +68,11 @@ read 1024/1024 bytes at offset 44032
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 3072/3072 bytes at offset 40960
 3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 40960, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 45056, "length": 134172672, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 36864, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 40960, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 45056, "length": 134172672, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == write_zeroes covers non-zero data ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -101,15 +101,15 @@ wrote 2048/2048 bytes at offset 29696
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 4096/4096 bytes at offset 28672
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 4096, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 12288, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 16384, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 20480, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 24576, "length": 4096, "depth": 1, "zero": true, "data": false},
-{ "start": 28672, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 32768, "length": 134184960, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 4096, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8192, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 12288, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 16384, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 20480, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 24576, "length": 4096, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 28672, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 32768, "length": 134184960, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning two clusters, non-zero before request ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -142,16 +142,16 @@ read 1024/1024 bytes at offset 67584
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 5120/5120 bytes at offset 68608
 5 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 36864, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 49152, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 53248, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning two clusters, non-zero after request ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -184,16 +184,16 @@ read 7168/7168 bytes at offset 65536
 7 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 72704
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 32768, "depth": 1, "zero": true, "data": false},
-{ "start": 32768, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 36864, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 40960, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 49152, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 53248, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 57344, "length": 8192, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 69632, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 73728, "length": 134144000, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 32768, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 32768, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 36864, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 40960, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 49152, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 53248, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 57344, "length": 8192, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 73728, "length": 134144000, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning two clusters, partially overwriting backing file ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -212,8 +212,8 @@ read 1024/1024 bytes at offset 5120
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 6144
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 8192, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 8192, "length": 134209536, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 8192, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 8192, "length": 134209536, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning multiple clusters, non-zero in first cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -226,10 +226,10 @@ read 2048/2048 bytes at offset 65536
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 10240/10240 bytes at offset 67584
 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69632, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning multiple clusters, non-zero in intermediate cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -240,9 +240,9 @@ wrote 7168/7168 bytes at offset 67584
 7 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 12288/12288 bytes at offset 65536
 12 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 12288, "depth": 0, "zero": true, "data": false},
-{ "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 12288, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning multiple clusters, non-zero in final cluster ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -255,10 +255,10 @@ read 10240/10240 bytes at offset 65536
 10 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2048/2048 bytes at offset 75776
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 8192, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == spanning multiple clusters, partially overwriting backing file ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728
@@ -277,84 +277,88 @@ read 2048/2048 bytes at offset 74752
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 76800
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 1, "zero": true, "data": false},
-{ "start": 65536, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 69632, "length": 4096, "depth": 0, "zero": true, "data": false},
-{ "start": 73728, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 77824, "length": 134139904, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 65536, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 69632, "length": 4096, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 73728, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 77824, "length": 134139904, "depth": 1, "present": false, "zero": true, "data": false}]
 
 == unaligned image tail cluster, no allocation needed ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134219776, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134219264
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 1024/1024 bytes at offset 134218240
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 2048/2048 bytes at offset 134217728
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 2048/2048 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": true, "data": false}]
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134218752
 wrote 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -365,15 +369,15 @@ read 512/512 bytes at offset 134217728
 read 512/512 bytes at offset 134218240
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 1024/1024 bytes allocated at offset 128 MiB
-[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false},
-{ "start": 134217728, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 wrote 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 1024/1024 bytes allocated at offset 128 MiB
 read 1024/1024 bytes at offset 134217728
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false},
-{ "start": 134217728, "length": 1024, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 1024, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 
 == unaligned image tail cluster, allocation required ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
@@ -386,8 +390,8 @@ read 512/512 bytes at offset 134217728
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1536/1536 bytes at offset 134218240
 1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134218752
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134219776 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 wrote 512/512 bytes at offset 134218240
@@ -408,6 +412,6 @@ read 512/512 bytes at offset 134218240
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 1024/1024 bytes at offset 134218752
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 134217728, "depth": 1, "zero": true, "data": false},
-{ "start": 134217728, "length": 2048, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 134217728, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 134217728, "length": 2048, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index bafef9dd9ae..eadda526154 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -261,9 +261,12 @@ class TestBlockdevMirrorReopen(MirrorBaseClass):
             result = self.vm.qmp('blockdev-add', node_name="backing",
                                  driver="null-co")
             self.assert_qmp(result, 'return', {})
-            result = self.vm.qmp('x-blockdev-reopen', node_name="target",
-                                 driver=iotests.imgfmt, file="target-file",
-                                 backing="backing")
+            result = self.vm.qmp('blockdev-reopen', options=[{
+                                     'node-name': "target",
+                                     'driver': iotests.imgfmt,
+                                     'file': "target-file",
+                                     'backing': "backing"
+                                 }])
             self.assert_qmp(result, 'return', {})
 
 class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen):
diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165
index abc4ffadd50..ce499946b84 100755
--- a/tests/qemu-iotests/165
+++ b/tests/qemu-iotests/165
@@ -137,7 +137,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase):
         assert sha256_1 == self.getSha256()
 
         # Reopen to RW
-        result = self.vm.qmp('x-blockdev-reopen', **{
+        result = self.vm.qmp('blockdev-reopen', options=[{
             'node-name': 'node0',
             'driver': iotests.imgfmt,
             'file': {
@@ -145,7 +145,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase):
                 'filename': disk
             },
             'read-only': False
-        })
+        }])
         self.assert_qmp(result, 'return', {})
 
         # Check that bitmap is reopened to RW and we can write to it.
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index d53f61d0dee..4cf4d536b46 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -21,6 +21,7 @@ Testing:
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -48,6 +49,7 @@ Testing: -fda TEST_DIR/t.qcow2
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -85,6 +87,7 @@ Testing: -fdb TEST_DIR/t.qcow2
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -96,6 +99,7 @@ Testing: -fdb TEST_DIR/t.qcow2
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -137,6 +141,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -148,6 +153,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -190,6 +196,7 @@ Testing: -fdb
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -201,6 +208,7 @@ Testing: -fdb
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -228,6 +236,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -265,6 +274,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -276,6 +286,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -317,6 +328,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -328,6 +340,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -373,6 +386,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -410,6 +424,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -447,6 +462,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -458,6 +474,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -509,6 +526,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -520,6 +538,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -562,6 +581,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -573,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -615,6 +636,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -626,6 +648,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -668,6 +691,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -679,6 +703,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "floppy1"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -730,6 +755,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -741,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -783,6 +810,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
               dev: floppy, id ""
                 unit = 1 (0x1)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -794,6 +822,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "floppy0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -842,6 +871,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -909,6 +939,7 @@ Testing: -device floppy
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = ""
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -933,6 +964,7 @@ Testing: -device floppy,drive-type=120
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = ""
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -957,6 +989,7 @@ Testing: -device floppy,drive-type=144
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = ""
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -981,6 +1014,7 @@ Testing: -device floppy,drive-type=288
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = ""
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -1008,6 +1042,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -1045,6 +1080,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -1085,6 +1121,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
@@ -1122,6 +1159,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
               dev: floppy, id ""
                 unit = 0 (0x0)
                 drive = "none0"
+                backend_defaults = "auto"
                 logical_block_size = 512 (512 B)
                 physical_block_size = 512 (512 B)
                 min_io_size = 0 (0 B)
diff --git a/tests/qemu-iotests/179.out b/tests/qemu-iotests/179.out
index 1f7680002ca..7cf22cd75fd 100644
--- a/tests/qemu-iotests/179.out
+++ b/tests/qemu-iotests/179.out
@@ -13,7 +13,11 @@ wrote 2097152/2097152 bytes at offset 6291456
 2 MiB (0x200000) bytes not allocated at offset 4 MiB (0x400000)
 2 MiB (0x200000) bytes     allocated at offset 6 MiB (0x600000)
 56 MiB (0x3800000) bytes not allocated at offset 8 MiB (0x800000)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 58720256, "depth": 0, "present": false, "zero": true, "data": false}]
 wrote 2097150/2097150 bytes at offset 10485761
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097150/2097150 bytes at offset 14680065
@@ -27,7 +31,15 @@ wrote 2097150/2097150 bytes at offset 14680065
 2 MiB (0x200000) bytes not allocated at offset 12 MiB (0xc00000)
 2 MiB (0x200000) bytes     allocated at offset 14 MiB (0xe00000)
 48 MiB (0x3000000) bytes not allocated at offset 16 MiB (0x1000000)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 16777216, "length": 50331648, "depth": 0, "present": false, "zero": true, "data": false}]
 wrote 14680064/14680064 bytes at offset 18874368
 14 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 20971520
@@ -45,13 +57,21 @@ wrote 6291456/6291456 bytes at offset 25165824
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
 32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
-[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 6291456, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 6291456, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false}]
 wrote 2097152/2097152 bytes at offset 27262976
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 29360128
@@ -67,15 +87,23 @@ wrote 2097152/2097152 bytes at offset 29360128
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 14 MiB (0xe00000) bytes     allocated at offset 18 MiB (0x1200000)
 32 MiB (0x2000000) bytes not allocated at offset 32 MiB (0x2000000)
-[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 33554432, "depth": 0, "present": false, "zero": true, "data": false}]
 wrote 8388608/8388608 bytes at offset 33554432
 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 2097152/2097152 bytes at offset 35651584
@@ -93,15 +121,24 @@ wrote 2097152/2097152 bytes at offset 37748736
 2 MiB (0x200000) bytes not allocated at offset 16 MiB (0x1000000)
 22 MiB (0x1600000) bytes     allocated at offset 18 MiB (0x1200000)
 24 MiB (0x1800000) bytes not allocated at offset 40 MiB (0x2800000)
-[{ "start": 0, "length": 18874368, "depth": 0, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 33554432, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 10485760, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 12582912, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 14680064, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 16777216, "length": 2097152, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 8388608, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 41943040, "length": 25165824, "depth": 0, "present": false, "zero": true, "data": false}]
 wrote 8388608/8388608 bytes at offset 41943040
 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 8388608/8388608 bytes at offset 50331648
@@ -125,23 +162,31 @@ wrote 2097152/2097152 bytes at offset 62914560
 4 MiB (0x400000) bytes not allocated at offset 54 MiB (0x3600000)
 4 MiB (0x400000) bytes     allocated at offset 58 MiB (0x3a00000)
 2 MiB (0x200000) bytes not allocated at offset 62 MiB (0x3e00000)
-[{ "start": 0, "length": 18874368, "depth": 1, "zero": true, "data": false},
-{ "start": 18874368, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 20971520, "length": 2097152, "depth": 1, "zero": true, "data": false},
-{ "start": 23068672, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 25165824, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 27262976, "length": 2097152, "depth": 1, "zero": true, "data": false},
-{ "start": 29360128, "length": 2097152, "depth": 1, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 31457280, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 33554432, "length": 10485760, "depth": 1, "zero": true, "data": false},
-{ "start": 44040192, "length": 4194304, "depth": 0, "zero": true, "data": false},
-{ "start": 48234496, "length": 2097152, "depth": 1, "zero": true, "data": false},
-{ "start": 50331648, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 52428800, "length": 4194304, "depth": 0, "zero": true, "data": false},
-{ "start": 56623104, "length": 2097152, "depth": 1, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 58720256, "length": 2097152, "depth": 1, "zero": true, "data": false},
-{ "start": 60817408, "length": 4194304, "depth": 0, "zero": true, "data": false},
-{ "start": 65011712, "length": 2097152, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 2097152, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 6291456, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 8388608, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 10485760, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 12582912, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 14680064, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 16777216, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 18874368, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 20971520, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 23068672, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 25165824, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 27262976, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 29360128, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 31457280, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 33554432, "length": 10485760, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 44040192, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 48234496, "length": 2097152, "depth": 1, "present": true, "zero": true, "data": false},
+{ "start": 50331648, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 52428800, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 56623104, "length": 2097152, "depth": 1, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 58720256, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 60817408, "length": 4194304, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 65011712, "length": 2097152, "depth": 1, "present": false, "zero": true, "data": false}]
 No errors were found on the image.
 No errors were found on the image.
 
diff --git a/tests/qemu-iotests/189 b/tests/qemu-iotests/189
index 4e463385b26..801494c6b97 100755
--- a/tests/qemu-iotests/189
+++ b/tests/qemu-iotests/189
@@ -67,7 +67,7 @@ echo "== verify pattern =="
 $QEMU_IO --object $SECRET0 -c "read -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir
 
 echo "== create overlay =="
-_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -u -b "$TEST_IMG_BASE" -F $IMGFMT $size
+_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -b "$TEST_IMG_BASE" -F $IMGFMT
 
 echo
 echo "== writing part of a cluster =="
diff --git a/tests/qemu-iotests/198 b/tests/qemu-iotests/198
index b333a8f281c..1c93dea1f73 100755
--- a/tests/qemu-iotests/198
+++ b/tests/qemu-iotests/198
@@ -64,7 +64,7 @@ echo "== writing whole image base =="
 $QEMU_IO --object $SECRET0 -c "write -P 0xa 0 $size" --image-opts $IMGSPECBASE | _filter_qemu_io | _filter_testdir
 
 echo "== create overlay =="
-_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -u -b "$TEST_IMG_BASE" -F $IMGFMT $size
+_make_test_img --object $SECRET1 -o "encrypt.format=luks,encrypt.key-secret=sec1,encrypt.iter-time=10" -b "$TEST_IMG_BASE" -F $IMGFMT
 
 echo
 echo "== writing whole image layer =="
diff --git a/tests/qemu-iotests/206 b/tests/qemu-iotests/206
index c3cdad4ce44..10eff343f76 100755
--- a/tests/qemu-iotests/206
+++ b/tests/qemu-iotests/206
@@ -162,8 +162,8 @@ with iotests.FilePath('t.qcow2') as disk_path, \
                          'encrypt': {
                              'format': 'luks',
                              'key-secret': 'keysec0',
-                             'cipher-alg': 'twofish-128',
-                             'cipher-mode': 'ctr',
+                             'cipher-alg': 'aes-128',
+                             'cipher-mode': 'cbc',
                              'ivgen-alg': 'plain64',
                              'ivgen-hash-alg': 'md5',
                              'hash-alg': 'sha1',
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index b68c4438679..80cd2742238 100644
--- a/tests/qemu-iotests/206.out
+++ b/tests/qemu-iotests/206.out
@@ -97,7 +97,7 @@ Format specific information:
 
 === Successful image creation (encrypted) ===
 
-{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
@@ -115,10 +115,10 @@ Format specific information:
     encrypt:
         ivgen alg: plain64
         hash alg: sha1
-        cipher alg: twofish-128
+        cipher alg: aes-128
         uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
         format: luks
-        cipher mode: ctr
+        cipher mode: cbc
         slots:
             [0]:
                 active: true
@@ -192,7 +192,7 @@ Job failed: Could not resize image: Failed to grow the L1 table: File too large
 
 === Invalid version ===
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "node0", "size": 67108864, "version": "v1"}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'v1'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'version' does not accept value 'v1'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "node0", "lazy-refcounts": true, "size": 67108864, "version": "v2"}}}
 {"return": {}}
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
index f9f3fd71313..0f5c4bc8a02 100755
--- a/tests/qemu-iotests/207
+++ b/tests/qemu-iotests/207
@@ -73,6 +73,9 @@ with iotests.FilePath('t.img') as disk_path, \
     iotests.log("=== Test host-key-check options ===")
     iotests.log("")
 
+    iotests.log("--- no host key checking --")
+    iotests.log("")
+
     vm.launch()
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
@@ -90,6 +93,9 @@ with iotests.FilePath('t.img') as disk_path, \
 
     iotests.img_info_log(remote_path)
 
+    iotests.log("--- known_hosts key checking --")
+    iotests.log("")
+
     vm.launch()
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
@@ -115,6 +121,7 @@ with iotests.FilePath('t.img') as disk_path, \
     # Mappings of base64 representations to digests
     md5_keys = {}
     sha1_keys = {}
+    sha256_keys = {}
 
     for key in keys:
         md5_keys[key] = subprocess.check_output(
@@ -125,6 +132,10 @@ with iotests.FilePath('t.img') as disk_path, \
             'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
             shell=True).rstrip().decode('ascii')
 
+        sha256_keys[key] = subprocess.check_output(
+            'echo %s | base64 -d | sha256sum -b | cut -d" " -f1' % key,
+            shell=True).rstrip().decode('ascii')
+
     vm.launch()
 
     # Find correct key first
@@ -150,6 +161,9 @@ with iotests.FilePath('t.img') as disk_path, \
         vm.shutdown()
         iotests.notrun('Did not find a key that fits 127.0.0.1')
 
+    iotests.log("--- explicit md5 key checking --")
+    iotests.log("")
+
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
                               'path': disk_path,
@@ -164,6 +178,7 @@ with iotests.FilePath('t.img') as disk_path, \
                               }
                           },
                           'size': 2097152 })
+
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
                               'path': disk_path,
@@ -182,6 +197,9 @@ with iotests.FilePath('t.img') as disk_path, \
 
     iotests.img_info_log(remote_path)
 
+    iotests.log("--- explicit sha1 key checking --")
+    iotests.log("")
+
     vm.launch()
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
@@ -215,6 +233,42 @@ with iotests.FilePath('t.img') as disk_path, \
 
     iotests.img_info_log(remote_path)
 
+    iotests.log("--- explicit sha256 key checking --")
+    iotests.log("")
+
+    vm.launch()
+    blockdev_create(vm, { 'driver': 'ssh',
+                          'location': {
+                              'path': disk_path,
+                              'server': {
+                                  'host': '127.0.0.1',
+                                  'port': '22'
+                              },
+                              'host-key-check': {
+                                  'mode': 'hash',
+                                  'type': 'sha256',
+                                  'hash': 'wrong',
+                              }
+                          },
+                          'size': 2097152 })
+    blockdev_create(vm, { 'driver': 'ssh',
+                          'location': {
+                              'path': disk_path,
+                              'server': {
+                                  'host': '127.0.0.1',
+                                  'port': '22'
+                              },
+                              'host-key-check': {
+                                  'mode': 'hash',
+                                  'type': 'sha256',
+                                  'hash': sha256_keys[matching_key],
+                              }
+                          },
+                          'size': 4194304 })
+    vm.shutdown()
+
+    iotests.img_info_log(remote_path)
+
     #
     # Invalid path and user
     #
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
index 1239d9d6484..aeb8569d77a 100644
--- a/tests/qemu-iotests/207.out
+++ b/tests/qemu-iotests/207.out
@@ -16,6 +16,8 @@ virtual size: 4 MiB (4194304 bytes)
 
 === Test host-key-check options ===
 
+--- no host key checking --
+
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 8388608}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
@@ -25,6 +27,8 @@ image: TEST_IMG
 file format: IMGFMT
 virtual size: 8 MiB (8388608 bytes)
 
+--- known_hosts key checking --
+
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "known_hosts"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
@@ -34,6 +38,8 @@ image: TEST_IMG
 file format: IMGFMT
 virtual size: 4 MiB (4194304 bytes)
 
+--- explicit md5 key checking --
+
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"hash": "wrong", "mode": "hash", "type": "md5"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 2097152}}}
 {"return": {}}
 Job failed: remote host key does not match host_key_check 'wrong'
@@ -49,6 +55,8 @@ image: TEST_IMG
 file format: IMGFMT
 virtual size: 8 MiB (8388608 bytes)
 
+--- explicit sha1 key checking --
+
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"hash": "wrong", "mode": "hash", "type": "sha1"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 2097152}}}
 {"return": {}}
 Job failed: remote host key does not match host_key_check 'wrong'
@@ -64,6 +72,23 @@ image: TEST_IMG
 file format: IMGFMT
 virtual size: 4 MiB (4194304 bytes)
 
+--- explicit sha256 key checking --
+
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"hash": "wrong", "mode": "hash", "type": "sha256"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 2097152}}}
+{"return": {}}
+Job failed: remote host key does not match host_key_check 'wrong'
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"return": {}}
+
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"hash": "HASH", "mode": "hash", "type": "sha256"}, "path": "TEST_DIR/PID-t.img", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
+{"return": {}}
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"return": {}}
+
+image: TEST_IMG
+file format: IMGFMT
+virtual size: 4 MiB (4194304 bytes)
+
 === Invalid path and user ===
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out
index 214e27bfcec..f27be3fa7b0 100644
--- a/tests/qemu-iotests/209.out
+++ b/tests/qemu-iotests/209.out
@@ -1,2 +1,2 @@
-[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true, "offset": 0},
-{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false, "offset": 524288}]
+[{ "start": 0, "length": 524288, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
+{ "start": 524288, "length": 524288, "depth": 0, "present": true, "zero": true, "data": false, "offset": 524288}]
diff --git a/tests/qemu-iotests/210 b/tests/qemu-iotests/210
index 5a62ed4dd1e..a4dcc5fe59d 100755
--- a/tests/qemu-iotests/210
+++ b/tests/qemu-iotests/210
@@ -83,8 +83,8 @@ with iotests.FilePath('t.luks') as disk_path, \
                          },
                          'size': size,
                          'key-secret': 'keysec0',
-                         'cipher-alg': 'twofish-128',
-                         'cipher-mode': 'ctr',
+                         'cipher-alg': 'aes-128',
+                         'cipher-mode': 'cbc',
                          'ivgen-alg': 'plain64',
                          'ivgen-hash-alg': 'md5',
                          'hash-alg': 'sha1',
diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out
index 55c08443705..96d9f749dda 100644
--- a/tests/qemu-iotests/210.out
+++ b/tests/qemu-iotests/210.out
@@ -59,7 +59,7 @@ Format specific information:
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
 
-{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}}
+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
@@ -71,9 +71,9 @@ encrypted: yes
 Format specific information:
     ivgen alg: plain64
     hash alg: sha1
-    cipher alg: twofish-128
+    cipher alg: aes-128
     uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
-    cipher mode: ctr
+    cipher mode: cbc
     slots:
         [0]:
             active: true
diff --git a/tests/qemu-iotests/211.out b/tests/qemu-iotests/211.out
index 3bc092a8a89..c4425b59822 100644
--- a/tests/qemu-iotests/211.out
+++ b/tests/qemu-iotests/211.out
@@ -17,7 +17,7 @@ file format: IMGFMT
 virtual size: 128 MiB (134217728 bytes)
 cluster_size: 1048576
 
-[{ "start": 0, "length": 134217728, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 134217728, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Successful image creation (explicit defaults) ===
 
@@ -36,7 +36,7 @@ file format: IMGFMT
 virtual size: 64 MiB (67108864 bytes)
 cluster_size: 1048576
 
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 67108864, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === Successful image creation (with non-default options) ===
 
@@ -55,8 +55,8 @@ file format: IMGFMT
 virtual size: 32 MiB (33554432 bytes)
 cluster_size: 1048576
 
-[{ "start": 0, "length": 3072, "depth": 0, "zero": false, "data": true, "offset": 1024},
-{ "start": 3072, "length": 33551360, "depth": 0, "zero": true, "data": true, "offset": 4096}]
+[{ "start": 0, "length": 3072, "depth": 0, "present": true, "zero": false, "data": true, "offset": 1024},
+{ "start": 3072, "length": 33551360, "depth": 0, "present": true, "zero": true, "data": true, "offset": 4096}]
 
 === Invalid BlockdevRef ===
 
diff --git a/tests/qemu-iotests/218 b/tests/qemu-iotests/218
index 325d8244fb9..4922b4d3b6f 100755
--- a/tests/qemu-iotests/218
+++ b/tests/qemu-iotests/218
@@ -187,4 +187,4 @@ with iotests.VM() as vm, \
     log(vm.qmp('quit'))
 
     with iotests.Timeout(5, 'Timeout waiting for VM to quit'):
-        vm.shutdown(has_quit=True)
+        vm.shutdown()
diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out
index 93846c7dabb..9cdd171a2db 100644
--- a/tests/qemu-iotests/221.out
+++ b/tests/qemu-iotests/221.out
@@ -5,14 +5,14 @@ QA output created by 221
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537
 discard 65537/65537 bytes at offset 0
 64.001 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 66048, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 wrote 1/1 bytes at offset 65536
 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 65536, "length": 1, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 65537, "length": 511, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/222
deleted file mode 100755
index b48afe623e3..00000000000
--- a/tests/qemu-iotests/222
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-# group: rw quick
-#
-# This test covers the basic fleecing workflow, which provides a
-# point-in-time snapshot of a node that can be queried over NBD.
-#
-# Copyright (C) 2018 Red Hat, Inc.
-# John helped, too.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-# Creator/Owner: John Snow <jsnow@redhat.com>
-
-import iotests
-from iotests import log, qemu_img, qemu_io, qemu_io_silent
-
-iotests.script_initialize(
-    supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'],
-    supported_platforms=['linux'],
-)
-
-patterns = [("0x5d", "0",         "64k"),
-            ("0xd5", "1M",        "64k"),
-            ("0xdc", "32M",       "64k"),
-            ("0xcd", "0x3ff0000", "64k")]  # 64M - 64K
-
-overwrite = [("0xab", "0",         "64k"), # Full overwrite
-             ("0xad", "0x00f8000", "64k"), # Partial-left (1M-32K)
-             ("0x1d", "0x2008000", "64k"), # Partial-right (32M+32K)
-             ("0xea", "0x3fe0000", "64k")] # Adjacent-left (64M - 128K)
-
-zeroes = [("0", "0x00f8000", "32k"), # Left-end of partial-left (1M-32K)
-          ("0", "0x2010000", "32k"), # Right-end of partial-right (32M+64K)
-          ("0", "0x3fe0000", "64k")] # overwrite[3]
-
-remainder = [("0xd5", "0x108000",  "32k"), # Right-end of partial-left [1]
-             ("0xdc", "32M",       "32k"), # Left-end of partial-right [2]
-             ("0xcd", "0x3ff0000", "64k")] # patterns[3]
-
-with iotests.FilePath('base.img') as base_img_path, \
-     iotests.FilePath('fleece.img') as fleece_img_path, \
-     iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock_path, \
-     iotests.VM() as vm:
-
-    log('--- Setting up images ---')
-    log('')
-
-    assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
-    assert qemu_img('create', '-f', "qcow2", fleece_img_path, '64M') == 0
-
-    for p in patterns:
-        qemu_io('-f', iotests.imgfmt,
-                '-c', 'write -P%s %s %s' % p, base_img_path)
-
-    log('Done')
-
-    log('')
-    log('--- Launching VM ---')
-    log('')
-
-    vm.add_drive(base_img_path)
-    vm.launch()
-    log('Done')
-
-    log('')
-    log('--- Setting up Fleecing Graph ---')
-    log('')
-
-    src_node = "drive0"
-    tgt_node = "fleeceNode"
-
-    # create tgt_node backed by src_node
-    log(vm.qmp("blockdev-add", **{
-        "driver": "qcow2",
-        "node-name": tgt_node,
-        "file": {
-            "driver": "file",
-            "filename": fleece_img_path,
-        },
-        "backing": src_node,
-    }))
-
-    # Establish COW from source to fleecing node
-    log(vm.qmp("blockdev-backup",
-               device=src_node,
-               target=tgt_node,
-               sync="none"))
-
-    log('')
-    log('--- Setting up NBD Export ---')
-    log('')
-
-    nbd_uri = 'nbd+unix:///%s?socket=%s' % (tgt_node, nbd_sock_path)
-    log(vm.qmp("nbd-server-start",
-               **{"addr": { "type": "unix",
-                            "data": { "path": nbd_sock_path } } }))
-
-    log(vm.qmp("nbd-server-add", device=tgt_node))
-
-    log('')
-    log('--- Sanity Check ---')
-    log('')
-
-    for p in (patterns + zeroes):
-        cmd = "read -P%s %s %s" % p
-        log(cmd)
-        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
-
-    log('')
-    log('--- Testing COW ---')
-    log('')
-
-    for p in overwrite:
-        cmd = "write -P%s %s %s" % p
-        log(cmd)
-        log(vm.hmp_qemu_io(src_node, cmd))
-
-    log('')
-    log('--- Verifying Data ---')
-    log('')
-
-    for p in (patterns + zeroes):
-        cmd = "read -P%s %s %s" % p
-        log(cmd)
-        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
-
-    log('')
-    log('--- Cleanup ---')
-    log('')
-
-    log(vm.qmp('block-job-cancel', device=src_node))
-    log(vm.event_wait('BLOCK_JOB_CANCELLED'),
-        filters=[iotests.filter_qmp_event])
-    log(vm.qmp('nbd-server-stop'))
-    log(vm.qmp('blockdev-del', node_name=tgt_node))
-    vm.shutdown()
-
-    log('')
-    log('--- Confirming writes ---')
-    log('')
-
-    for p in (overwrite + remainder):
-        cmd = "read -P%s %s %s" % p
-        log(cmd)
-        assert qemu_io_silent(base_img_path, '-c', cmd) == 0
-
-    log('')
-    log('Done')
diff --git a/tests/qemu-iotests/222.out b/tests/qemu-iotests/222.out
deleted file mode 100644
index 16643dde301..00000000000
--- a/tests/qemu-iotests/222.out
+++ /dev/null
@@ -1,67 +0,0 @@
---- Setting up images ---
-
-Done
-
---- Launching VM ---
-
-Done
-
---- Setting up Fleecing Graph ---
-
-{"return": {}}
-{"return": {}}
-
---- Setting up NBD Export ---
-
-{"return": {}}
-{"return": {}}
-
---- Sanity Check ---
-
-read -P0x5d 0 64k
-read -P0xd5 1M 64k
-read -P0xdc 32M 64k
-read -P0xcd 0x3ff0000 64k
-read -P0 0x00f8000 32k
-read -P0 0x2010000 32k
-read -P0 0x3fe0000 64k
-
---- Testing COW ---
-
-write -P0xab 0 64k
-{"return": ""}
-write -P0xad 0x00f8000 64k
-{"return": ""}
-write -P0x1d 0x2008000 64k
-{"return": ""}
-write -P0xea 0x3fe0000 64k
-{"return": ""}
-
---- Verifying Data ---
-
-read -P0x5d 0 64k
-read -P0xd5 1M 64k
-read -P0xdc 32M 64k
-read -P0xcd 0x3ff0000 64k
-read -P0 0x00f8000 32k
-read -P0 0x2010000 32k
-read -P0 0x3fe0000 64k
-
---- Cleanup ---
-
-{"return": {}}
-{"data": {"device": "drive0", "len": 67108864, "offset": 393216, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-{"return": {}}
-{"return": {}}
-
---- Confirming writes ---
-
-read -P0xab 0 64k
-read -P0xad 0x00f8000 64k
-read -P0x1d 0x2008000 64k
-read -P0xea 0x3fe0000 64k
-read -P0xd5 0x108000 32k
-read -P0xdc 32M 32k
-read -P0xcd 0x3ff0000 64k
-
-Done
diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index 083b62d0538..e58ea5abbd5 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -100,19 +100,19 @@ read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2097152/2097152 bytes at offset 2097152
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 1048576, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
 
 === Contrast to small granularity dirty-bitmap ===
 
-[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
 
 === End qemu NBD server ===
 
@@ -201,19 +201,19 @@ read 1048576/1048576 bytes at offset 1048576
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 2097152/2097152 bytes at offset 2097152
 2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 1048576, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 1048576, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
 
 === Contrast to small granularity dirty-bitmap ===
 
-[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "zero": false, "data": false},
-{ "start": 1024, "length": 2096128, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 1024, "length": 2096128, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
 
 === End qemu NBD server ===
 
@@ -238,12 +238,12 @@ read 2097152/2097152 bytes at offset 2097152
 
 === Use qemu-nbd as server ===
 
-[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": false},
-{ "start": 65536, "length": 2031616, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
-[{ "start": 0, "length": 512, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 512, "length": 512, "depth": 0, "zero": false, "data": false},
-{ "start": 1024, "length": 11321, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 12345, "length": 2084807, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": false, "data": false}]
+[{ "start": 0, "length": 65536, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 65536, "length": 2031616, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
+[{ "start": 0, "length": 512, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 512, "length": 512, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 1024, "length": 11321, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 12345, "length": 2084807, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false}]
 *** done
diff --git a/tests/qemu-iotests/231 b/tests/qemu-iotests/231
index 0f66d0ca362..8e6c6447c11 100755
--- a/tests/qemu-iotests/231
+++ b/tests/qemu-iotests/231
@@ -55,6 +55,10 @@ _filter_conf()
 $QEMU_IMG info "json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=${BOGUS_CONF}'}" 2>&1 | _filter_conf
 $QEMU_IMG info "json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'${BOGUS_CONF}'}" 2>&1 | _filter_conf
 
+# Regression test: the qemu-img invocation is expected to fail, but it should
+# not seg fault the parser.
+$QEMU_IMG create "rbd:rbd/aa\/bb:conf=${BOGUS_CONF}" 1M 2>&1 | _filter_conf
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/231.out b/tests/qemu-iotests/231.out
index 579ba11c164..a785a6e859d 100644
--- a/tests/qemu-iotests/231.out
+++ b/tests/qemu-iotests/231.out
@@ -1,9 +1,10 @@
 QA output created by 231
-qemu-img: RBD options encoded in the filename as keyvalue pairs is deprecated.  Future versions may cease to parse these options in the future.
+qemu-img: warning: RBD options encoded in the filename as keyvalue pairs is deprecated
 unable to get monitor info from DNS SRV with service name: ceph-mon
-no monitors specified to connect to.
 qemu-img: Could not open 'json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=BOGUS_CONF'}': error connecting: No such file or directory
 unable to get monitor info from DNS SRV with service name: ceph-mon
-no monitors specified to connect to.
 qemu-img: Could not open 'json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'BOGUS_CONF'}': error connecting: No such file or directory
+Formatting 'rbd:rbd/aa\/bb:conf=BOGUS_CONF', fmt=raw size=1048576
+unable to get monitor info from DNS SRV with service name: ceph-mon
+qemu-img: rbd:rbd/aa\/bb:conf=BOGUS_CONF: error connecting: No such file or directory
 *** done
diff --git a/tests/qemu-iotests/233 b/tests/qemu-iotests/233
index da150cd27bb..9ca7b68f42c 100755
--- a/tests/qemu-iotests/233
+++ b/tests/qemu-iotests/233
@@ -148,7 +148,7 @@ $QEMU_IMG info --image-opts \
 
 echo
 echo "== final server log =="
-cat "$TEST_DIR/server.log"
+cat "$TEST_DIR/server.log" | _filter_authz_check_tls
 rm -f "$TEST_DIR/server.log"
 
 # success, all done
diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out
index c3c344811b2..4b1f6a0e151 100644
--- a/tests/qemu-iotests/233.out
+++ b/tests/qemu-iotests/233.out
@@ -65,6 +65,6 @@ qemu-img: Could not open 'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': F
 == final server log ==
 qemu-nbd: option negotiation failed: Verify failed: No certificate was found.
 qemu-nbd: option negotiation failed: Verify failed: No certificate was found.
-qemu-nbd: option negotiation failed: TLS x509 authz check for CN=localhost,O=Cthulhu Dark Lord Enterprises client1,L=R'lyeh,C=South Pacific is denied
-qemu-nbd: option negotiation failed: TLS x509 authz check for CN=localhost,O=Cthulhu Dark Lord Enterprises client3,L=R'lyeh,C=South Pacific is denied
+qemu-nbd: option negotiation failed: TLS x509 authz check for DISTINGUISHED-NAME is denied
+qemu-nbd: option negotiation failed: TLS x509 authz check for DISTINGUISHED-NAME is denied
 *** done
diff --git a/tests/qemu-iotests/235 b/tests/qemu-iotests/235
index 8aed45f9a76..4de920c3801 100755
--- a/tests/qemu-iotests/235
+++ b/tests/qemu-iotests/235
@@ -24,8 +24,6 @@ import os
 import iotests
 from iotests import qemu_img_create, qemu_io, file_path, log
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-
 from qemu.machine import QEMUMachine
 
 iotests.script_initialize(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/237.out b/tests/qemu-iotests/237.out
index aa949868032..2f09ff5512c 100644
--- a/tests/qemu-iotests/237.out
+++ b/tests/qemu-iotests/237.out
@@ -116,13 +116,13 @@ Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't
 == Invalid adapter types ==
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": "foo", "driver": "vmdk", "file": "node0", "size": 33554432}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'foo'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'adapter-type' does not accept value 'foo'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": "IDE", "driver": "vmdk", "file": "node0", "size": 33554432}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'IDE'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'adapter-type' does not accept value 'IDE'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": "legacyesx", "driver": "vmdk", "file": "node0", "size": 33554432}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'legacyesx'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'adapter-type' does not accept value 'legacyesx'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": 1, "driver": "vmdk", "file": "node0", "size": 33554432}}}
 {"error": {"class": "GenericError", "desc": "Invalid parameter type for 'options.adapter-type', expected: string"}}
diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out
index e0982831aea..89ed25e506c 100644
--- a/tests/qemu-iotests/240.out
+++ b/tests/qemu-iotests/240.out
@@ -15,7 +15,7 @@
 {"return": {}}
 {"execute": "blockdev-del", "arguments": {"node-name": "hd0"}}
 {"return": {}}
-==Attach two SCSI disks using the same block device and the same iothread==
+.==Attach two SCSI disks using the same block device and the same iothread==
 {"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true, "read-zeroes": true}}
 {"return": {}}
 {"execute": "object-add", "arguments": {"id": "iothread0", "qom-type": "iothread"}}
@@ -32,7 +32,7 @@
 {"return": {}}
 {"execute": "blockdev-del", "arguments": {"node-name": "hd0"}}
 {"return": {}}
-==Attach two SCSI disks using the same block device but different iothreads==
+.==Attach two SCSI disks using the same block device but different iothreads==
 {"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true, "read-zeroes": true}}
 {"return": {}}
 {"execute": "object-add", "arguments": {"id": "iothread0", "qom-type": "iothread"}}
@@ -55,7 +55,7 @@
 {"return": {}}
 {"execute": "blockdev-del", "arguments": {"node-name": "hd0"}}
 {"return": {}}
-==Attach a SCSI disks using the same block device as a NBD server==
+.==Attach a SCSI disks using the same block device as a NBD server==
 {"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true, "read-zeroes": true}}
 {"return": {}}
 {"execute": "nbd-server-start", "arguments": {"addr": {"data": {"path": "SOCK_DIR/PID-nbd.sock"}, "type": "unix"}}}
@@ -68,7 +68,7 @@
 {"return": {}}
 {"execute": "device_add", "arguments": {"drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd0"}}
 {"return": {}}
-....
+.
 ----------------------------------------------------------------------
 Ran 4 tests
 
diff --git a/tests/qemu-iotests/241.out b/tests/qemu-iotests/241.out
index 3f8c173cc82..56e95b599a3 100644
--- a/tests/qemu-iotests/241.out
+++ b/tests/qemu-iotests/241.out
@@ -4,15 +4,15 @@ QA output created by 241
 
   size:  1024
   min block: 1
-[{ "start": 0, "length": 1000, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1000, "length": 24, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 
 === Exporting unaligned raw image, forced server sector alignment ===
 
   size:  1024
   min block: 512
-[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw.
          Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted.
@@ -22,7 +22,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed
 
   size:  1024
   min block: 1
-[{ "start": 0, "length": 1000, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1000, "length": 24, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 1 KiB (0x400) bytes     allocated at offset 0 bytes (0x0)
 *** done
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
index 99f56ac18c4..5e03add0544 100644
--- a/tests/qemu-iotests/244.out
+++ b/tests/qemu-iotests/244.out
@@ -57,11 +57,12 @@ wrote 3145728/3145728 bytes at offset 3145728
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false},
-{ "start": 1048576, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": 1048576},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 4194304, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": 4194304},
-{ "start": 5242880, "length": 61865984, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": 1048576},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "offset": 4194304},
+{ "start": 5242880, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 6291456, "length": 60817408, "depth": 0, "present": false, "zero": true, "data": false}]
 
 read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -93,10 +94,10 @@ wrote 3145728/3145728 bytes at offset 3145728
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 0},
-{ "start": 2097152, "length": 2097152, "depth": 0, "zero": true, "data": false},
-{ "start": 4194304, "length": 2097152, "depth": 0, "zero": true, "data": false, "offset": 4194304},
-{ "start": 6291456, "length": 60817408, "depth": 0, "zero": false, "data": true, "offset": 6291456}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
+{ "start": 2097152, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 4194304, "length": 2097152, "depth": 0, "present": true, "zero": true, "data": false, "offset": 4194304},
+{ "start": 6291456, "length": 60817408, "depth": 0, "present": true, "zero": false, "data": true, "offset": 6291456}]
 
 read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -122,8 +123,8 @@ read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          Mapped to       File
 0               0x100000        0               TEST_DIR/t.qcow2.data
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": 0},
-{ "start": 1048576, "length": 66060288, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": 0},
+{ "start": 1048576, "length": 66060288, "depth": 0, "present": false, "zero": true, "data": false}]
 
 === Copy offloading ===
 
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index 11104b92088..24ac43f70ee 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # group: rw
 #
-# Test cases for the QMP 'x-blockdev-reopen' command
+# Test cases for the QMP 'blockdev-reopen' command
 #
 # Copyright (C) 2018-2019 Igalia, S.L.
 # Author: Alberto Garcia <berto@igalia.com>
@@ -79,14 +79,24 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         for line in log.split("\n"):
             if line.startswith("Pattern verification failed"):
                 raise Exception("%s (command #%d)" % (line, found))
-            if re.match("read .*/.* bytes at offset", line):
+            if re.match("(read|wrote) .*/.* bytes at offset", line):
                 found += 1
         self.assertEqual(found, self.total_io_cmds,
                          "Expected output of %d qemu-io commands, found %d" %
                          (found, self.total_io_cmds))
 
-    # Run x-blockdev-reopen with 'opts' but applying 'newopts'
-    # on top of it. The original 'opts' dict is unmodified
+    # Run blockdev-reopen on a list of block devices
+    def reopenMultiple(self, opts, errmsg = None):
+        result = self.vm.qmp('blockdev-reopen', conv_keys=False, options=opts)
+        if errmsg:
+            self.assert_qmp(result, 'error/class', 'GenericError')
+            self.assert_qmp(result, 'error/desc', errmsg)
+        else:
+            self.assert_qmp(result, 'return', {})
+
+    # Run blockdev-reopen on a single block device (specified by
+    # 'opts') but applying 'newopts' on top of it. The original 'opts'
+    # dict is unmodified
     def reopen(self, opts, newopts = {}, errmsg = None):
         opts = copy.deepcopy(opts)
 
@@ -101,12 +111,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
                 subdict = opts[prefix]
             subdict[key] = value
 
-        result = self.vm.qmp('x-blockdev-reopen', conv_keys = False, **opts)
-        if errmsg:
-            self.assert_qmp(result, 'error/class', 'GenericError')
-            self.assert_qmp(result, 'error/desc', errmsg)
-        else:
-            self.assert_qmp(result, 'return', {})
+        self.reopenMultiple([ opts ], errmsg)
 
 
     # Run query-named-block-nodes and return the specified entry
@@ -142,21 +147,21 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         # We cannot change any of these
         self.reopen(opts, {'node-name': 'not-found'}, "Failed to find node with node-name='not-found'")
         self.reopen(opts, {'node-name': ''}, "Failed to find node with node-name=''")
-        self.reopen(opts, {'node-name': None}, "Invalid parameter type for 'node-name', expected: string")
+        self.reopen(opts, {'node-name': None}, "Invalid parameter type for 'options[0].node-name', expected: string")
         self.reopen(opts, {'driver': 'raw'}, "Cannot change the option 'driver'")
-        self.reopen(opts, {'driver': ''}, "Invalid parameter ''")
-        self.reopen(opts, {'driver': None}, "Invalid parameter type for 'driver', expected: string")
-        self.reopen(opts, {'file': 'not-found'}, "Cannot change the option 'file'")
-        self.reopen(opts, {'file': ''}, "Cannot change the option 'file'")
+        self.reopen(opts, {'driver': ''}, "Parameter 'driver' does not accept value ''")
+        self.reopen(opts, {'driver': None}, "Invalid parameter type for 'options[0].driver', expected: string")
+        self.reopen(opts, {'file': 'not-found'}, "Cannot find device='' nor node-name='not-found'")
+        self.reopen(opts, {'file': ''}, "Cannot find device='' nor node-name=''")
         self.reopen(opts, {'file': None}, "Invalid parameter type for 'file', expected: BlockdevRef")
         self.reopen(opts, {'file.node-name': 'newname'}, "Cannot change the option 'node-name'")
         self.reopen(opts, {'file.driver': 'host_device'}, "Cannot change the option 'driver'")
         self.reopen(opts, {'file.filename': hd_path[1]}, "Cannot change the option 'filename'")
         self.reopen(opts, {'file.aio': 'native'}, "Cannot change the option 'aio'")
         self.reopen(opts, {'file.locking': 'off'}, "Cannot change the option 'locking'")
-        self.reopen(opts, {'file.filename': None}, "Invalid parameter type for 'file.filename', expected: string")
+        self.reopen(opts, {'file.filename': None}, "Invalid parameter type for 'options[0].file.filename', expected: string")
 
-        # node-name is optional in BlockdevOptions, but x-blockdev-reopen needs it
+        # node-name is optional in BlockdevOptions, but blockdev-reopen needs it
         del opts['node-name']
         self.reopen(opts, {}, "node-name not specified")
 
@@ -443,7 +448,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # Illegal operation: hd2 is a child of hd1
         self.reopen(opts[2], {'backing': 'hd1'},
-                    "Making 'hd1' a backing file of 'hd2' would create a cycle")
+                    "Making 'hd1' a backing child of 'hd2' would create a cycle")
 
         # hd2 <- hd0, hd2 <- hd1
         self.reopen(opts[0], {'backing': 'hd2'})
@@ -454,8 +459,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # More illegal operations
         self.reopen(opts[2], {'backing': 'hd1'},
-                    "Making 'hd1' a backing file of 'hd2' would create a cycle")
-        self.reopen(opts[2], {'file': 'hd0-file'}, "Cannot change the option 'file'")
+                    "Making 'hd1' a backing child of 'hd2' would create a cycle")
+        self.reopen(opts[2], {'file': 'hd0-file'},
+                    "Permission conflict on node 'hd0-file': permissions 'write, resize' are both required by node 'hd0' (uses node 'hd0-file' as 'file' child) and unshared by node 'hd2' (uses node 'hd0-file' as 'file' child).")
 
         result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd2')
         self.assert_qmp(result, 'error/class', 'GenericError')
@@ -497,18 +503,18 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # Illegal: hd2 is backed by hd1
         self.reopen(opts[1], {'backing': 'hd2'},
-                    "Making 'hd2' a backing file of 'hd1' would create a cycle")
+                    "Making 'hd2' a backing child of 'hd1' would create a cycle")
 
         # hd1 <- hd0 <- hd2
         self.reopen(opts[2], {'backing': 'hd0'})
 
         # Illegal: hd2 is backed by hd0, which is backed by hd1
         self.reopen(opts[1], {'backing': 'hd2'},
-                    "Making 'hd2' a backing file of 'hd1' would create a cycle")
+                    "Making 'hd2' a backing child of 'hd1' would create a cycle")
 
         # Illegal: hd1 cannot point to itself
         self.reopen(opts[1], {'backing': 'hd1'},
-                    "Making 'hd1' a backing file of 'hd1' would create a cycle")
+                    "Making 'hd1' a backing child of 'hd1' would create a cycle")
 
         # Remove all backing files
         self.reopen(opts[0])
@@ -530,12 +536,166 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # Illegal: hd0 is a child of the blkverify node
         self.reopen(opts[0], {'backing': 'bv'},
-                    "Making 'bv' a backing file of 'hd0' would create a cycle")
+                    "Making 'bv' a backing child of 'hd0' would create a cycle")
 
         # Delete the blkverify node
         result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'bv')
         self.assert_qmp(result, 'return', {})
 
+    # Replace the protocol layer ('file' parameter) of a disk image
+    def test_replace_file(self):
+        # Create two small raw images and add them to a running VM
+        qemu_img('create', '-f', 'raw', hd_path[0], '10k')
+        qemu_img('create', '-f', 'raw', hd_path[1], '10k')
+
+        hd0_opts = {'driver': 'file', 'node-name': 'hd0-file', 'filename':  hd_path[0] }
+        hd1_opts = {'driver': 'file', 'node-name': 'hd1-file', 'filename':  hd_path[1] }
+
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **hd0_opts)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **hd1_opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Add a raw format layer that uses hd0-file as its protocol layer
+        opts = {'driver': 'raw', 'node-name': 'hd', 'file': 'hd0-file'}
+
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Fill the image with data
+        self.run_qemu_io("hd", "read  -P 0 0 10k")
+        self.run_qemu_io("hd", "write -P 0xa0 0 10k")
+
+        # Replace hd0-file with hd1-file and fill it with (different) data
+        self.reopen(opts, {'file': 'hd1-file'})
+        self.run_qemu_io("hd", "read  -P 0 0 10k")
+        self.run_qemu_io("hd", "write -P 0xa1 0 10k")
+
+        # Use hd0-file again and check that it contains the expected data
+        self.reopen(opts, {'file': 'hd0-file'})
+        self.run_qemu_io("hd", "read  -P 0xa0 0 10k")
+
+        # And finally do the same with hd1-file
+        self.reopen(opts, {'file': 'hd1-file'})
+        self.run_qemu_io("hd", "read  -P 0xa1 0 10k")
+
+    # Insert (and remove) a throttle filter
+    def test_insert_throttle_filter(self):
+        # Add an image to the VM
+        hd0_opts = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **hd0_opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Create a throttle-group object
+        opts = { 'qom-type': 'throttle-group', 'id': 'group0',
+                 'limits': { 'iops-total': 1000 } }
+        result = self.vm.qmp('object-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Add a throttle filter with the group that we just created.
+        # The filter is not used by anyone yet
+        opts = { 'driver': 'throttle', 'node-name': 'throttle0',
+                 'throttle-group': 'group0', 'file': 'hd0-file' }
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Insert the throttle filter between hd0 and hd0-file
+        self.reopen(hd0_opts, {'file': 'throttle0'})
+
+        # Remove the throttle filter from hd0
+        self.reopen(hd0_opts, {'file': 'hd0-file'})
+
+    # Insert (and remove) a compress filter
+    def test_insert_compress_filter(self):
+        # Add an image to the VM: hd (raw) -> hd0 (qcow2) -> hd0-file (file)
+        opts = {'driver': 'raw', 'node-name': 'hd', 'file': hd_opts(0)}
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Add a 'compress' filter
+        filter_opts = {'driver': 'compress',
+                       'node-name': 'compress0',
+                       'file': 'hd0'}
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **filter_opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Unmap the beginning of the image (we cannot write compressed
+        # data to an allocated cluster)
+        self.run_qemu_io("hd", "write -z -u 0 128k")
+
+        # Write data to the first cluster
+        self.run_qemu_io("hd", "write -P 0xa0 0 64k")
+
+        # Insert the filter then write to the second cluster
+        # hd -> compress0 -> hd0 -> hd0-file
+        self.reopen(opts, {'file': 'compress0'})
+        self.run_qemu_io("hd", "write -P 0xa1 64k 64k")
+
+        # Remove the filter then write to the third cluster
+        # hd -> hd0 -> hd0-file
+        self.reopen(opts, {'file': 'hd0'})
+        self.run_qemu_io("hd", "write -P 0xa2 128k 64k")
+
+        # Verify the data that we just wrote
+        self.run_qemu_io("hd", "read -P 0xa0    0 64k")
+        self.run_qemu_io("hd", "read -P 0xa1  64k 64k")
+        self.run_qemu_io("hd", "read -P 0xa2 128k 64k")
+
+        self.vm.shutdown()
+
+        # Check the first byte of the first three L2 entries and verify that
+        # the second one is compressed (0x40) while the others are not (0x80)
+        iotests.qemu_io_log('-f', 'raw', '-c', 'read -P 0x80 0x40000 1',
+                                         '-c', 'read -P 0x40 0x40008 1',
+                                         '-c', 'read -P 0x80 0x40010 1', hd_path[0])
+
+    # Swap the disk images of two active block devices
+    def test_swap_files(self):
+        # Add hd0 and hd2 (none of them with backing files)
+        opts0 = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts0)
+        self.assert_qmp(result, 'return', {})
+
+        opts2 = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2)
+        self.assert_qmp(result, 'return', {})
+
+        # Write different data to both block devices
+        self.run_qemu_io("hd0", "write -P 0xa0 0 1k")
+        self.run_qemu_io("hd2", "write -P 0xa2 0 1k")
+
+        # Check that the data reads correctly
+        self.run_qemu_io("hd0", "read  -P 0xa0 0 1k")
+        self.run_qemu_io("hd2", "read  -P 0xa2 0 1k")
+
+        # It's not possible to make a block device use an image that
+        # is already being used by the other device.
+        self.reopen(opts0, {'file': 'hd2-file'},
+                    "Permission conflict on node 'hd2-file': permissions "
+                    "'write, resize' are both required by node 'hd2' (uses "
+                    "node 'hd2-file' as 'file' child) and unshared by node "
+                    "'hd0' (uses node 'hd2-file' as 'file' child).")
+        self.reopen(opts2, {'file': 'hd0-file'},
+                    "Permission conflict on node 'hd0-file': permissions "
+                    "'write, resize' are both required by node 'hd0' (uses "
+                    "node 'hd0-file' as 'file' child) and unshared by node "
+                    "'hd2' (uses node 'hd0-file' as 'file' child).")
+
+        # But we can swap the images if we reopen both devices at the
+        # same time
+        opts0['file'] = 'hd2-file'
+        opts2['file'] = 'hd0-file'
+        self.reopenMultiple([opts0, opts2])
+        self.run_qemu_io("hd0", "read  -P 0xa2 0 1k")
+        self.run_qemu_io("hd2", "read  -P 0xa0 0 1k")
+
+        # And we can of course come back to the original state
+        opts0['file'] = 'hd0-file'
+        opts2['file'] = 'hd2-file'
+        self.reopenMultiple([opts0, opts2])
+        self.run_qemu_io("hd0", "read  -P 0xa0 0 1k")
+        self.run_qemu_io("hd2", "read  -P 0xa2 0 1k")
+
     # Misc reopen tests with different block drivers
     @iotests.skip_if_unsupported(['quorum', 'throttle'])
     def test_misc_drivers(self):
@@ -563,7 +723,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         # You can't make quorum0 a backing file of hd0:
         # hd0 is already a child of quorum0.
         self.reopen(hd_opts(0), {'backing': 'quorum0'},
-                    "Making 'quorum0' a backing file of 'hd0' would create a cycle")
+                    "Making 'quorum0' a backing child of 'hd0' would create a cycle")
 
         # Delete quorum0
         result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'quorum0')
@@ -878,7 +1038,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # We can't remove hd1 while the stream job is ongoing
         opts['backing'] = None
-        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'")
+        self.reopen(opts, {}, "Cannot change frozen 'backing' link from 'hd0' to 'hd1'")
 
         self.vm.run_job('stream0', auto_finalize = False, auto_dismiss = True)
 
@@ -905,12 +1065,12 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         # We can't reopen hd1 to read-only, as block-stream requires it to be
         # read-write
         self.reopen(opts['backing'], {'read-only': True},
-                    "Cannot make block node read-only, there is a writer on it")
+                    "Read-only block node 'hd1' cannot support read-write users")
 
         # We can't remove hd2 while the stream job is ongoing
         opts['backing']['backing'] = None
         self.reopen(opts['backing'], {'read-only': False},
-                    "Cannot change 'backing' link from 'hd1' to 'hd2'")
+                    "Cannot change frozen 'backing' link from 'hd1' to 'hd2'")
 
         # We can detach hd1 from hd0 because it doesn't affect the stream job
         opts['backing'] = None
@@ -933,11 +1093,11 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # We can't remove hd2 while the commit job is ongoing
         opts['backing']['backing'] = None
-        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'")
+        self.reopen(opts, {}, "Cannot change frozen 'backing' link from 'hd1' to 'hd2'")
 
         # We can't remove hd1 while the commit job is ongoing
         opts['backing'] = None
-        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'")
+        self.reopen(opts, {}, "Cannot change frozen 'backing' link from 'hd0' to 'hd1'")
 
         event = self.vm.event_wait(name='BLOCK_JOB_READY')
         self.assert_qmp(event, 'data/device', 'commit0')
@@ -969,7 +1129,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
 
         # We can't remove hd1 while the commit job is ongoing
         opts['backing'] = None
-        self.reopen(opts, {}, "Cannot change backing link if 'hd0' has an implicit backing file")
+        self.reopen(opts, {}, "Cannot replace implicit backing child of hd0")
 
         # hd2 <- hd0
         self.vm.run_job('commit0', auto_finalize = False, auto_dismiss = True)
@@ -1029,10 +1189,10 @@ class TestBlockdevReopen(iotests.QMPTestCase):
         self.run_test_iothreads('iothread0', 'iothread0')
 
     def test_iothreads_switch_backing(self):
-        self.run_test_iothreads('iothread0', None)
+        self.run_test_iothreads('iothread0', '')
 
     def test_iothreads_switch_overlay(self):
-        self.run_test_iothreads(None, 'iothread0')
+        self.run_test_iothreads('', 'iothread0')
 
 if __name__ == '__main__':
     iotests.activate_logging()
diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out
index 4b33dcaf5c6..4eced192944 100644
--- a/tests/qemu-iotests/245.out
+++ b/tests/qemu-iotests/245.out
@@ -1,17 +1,24 @@
-{"execute": "job-finalize", "arguments": {"id": "commit0"}}
+..{"execute": "job-finalize", "arguments": {"id": "commit0"}}
 {"return": {}}
 {"data": {"id": "commit0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"device": "commit0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-{"execute": "job-finalize", "arguments": {"id": "stream0"}}
+...{"execute": "job-finalize", "arguments": {"id": "stream0"}}
 {"return": {}}
 {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-{"execute": "job-finalize", "arguments": {"id": "stream0"}}
+.{"execute": "job-finalize", "arguments": {"id": "stream0"}}
 {"return": {}}
 {"data": {"id": "stream0", "type": "stream"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
 {"data": {"device": "stream0", "len": 3145728, "offset": 3145728, "speed": 0, "type": "stream"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
-.....................
+....read 1/1 bytes at offset 262144
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1/1 bytes at offset 262152
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1/1 bytes at offset 262160
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+...............
 ----------------------------------------------------------------------
-Ran 21 tests
+Ran 25 tests
 
 OK
diff --git a/tests/qemu-iotests/248 b/tests/qemu-iotests/248
index 4daaed1530e..2ec2416e8a0 100755
--- a/tests/qemu-iotests/248
+++ b/tests/qemu-iotests/248
@@ -62,8 +62,8 @@ vm.event_wait('JOB_STATUS_CHANGE', timeout=3.0,
 vm.get_qmp_events()
 
 del blockdev_opts['file']['size']
-vm.qmp_log('x-blockdev-reopen', filters=[filter_qmp_testfiles],
-           **blockdev_opts)
+vm.qmp_log('blockdev-reopen', filters=[filter_qmp_testfiles],
+           options = [ blockdev_opts ])
 
 vm.qmp_log('block-job-resume', device='drive0')
 vm.event_wait('JOB_STATUS_CHANGE', timeout=1.0,
diff --git a/tests/qemu-iotests/248.out b/tests/qemu-iotests/248.out
index 369b25bf26c..66e94ccd7ea 100644
--- a/tests/qemu-iotests/248.out
+++ b/tests/qemu-iotests/248.out
@@ -2,7 +2,7 @@
 {"return": {}}
 {"execute": "blockdev-mirror", "arguments": {"device": "drive0", "on-target-error": "enospc", "sync": "full", "target": "target"}}
 {"return": {}}
-{"execute": "x-blockdev-reopen", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}}, "node-name": "target"}}
+{"execute": "blockdev-reopen", "arguments": {"options": [{"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}}, "node-name": "target"}]}}
 {"return": {}}
 {"execute": "block-job-resume", "arguments": {"device": "drive0"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/252.out b/tests/qemu-iotests/252.out
index 12dce889f82..c578129c25b 100644
--- a/tests/qemu-iotests/252.out
+++ b/tests/qemu-iotests/252.out
@@ -23,8 +23,8 @@ read 131072/131072 bytes at offset 131072
 read 131072/131072 bytes at offset 262144
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
-[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144, "length": 131072, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 262144, "length": 131072, "depth": 0, "present": false, "zero": true, "data": false}]
 
 read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -33,7 +33,7 @@ read 131072/131072 bytes at offset 131072
 read 131072/131072 bytes at offset 262144
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
-[{ "start": 0, "length": 262144, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 262144, "length": 65536, "depth": 0, "zero": true, "data": false},
-{ "start": 327680, "length": 65536, "depth": 1, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 262144, "length": 65536, "depth": 0, "present": true, "zero": true, "data": false},
+{ "start": 327680, "length": 65536, "depth": 1, "present": false, "zero": true, "data": false}]
 *** done
diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out
index 3d08b305d7d..b3dca75a899 100644
--- a/tests/qemu-iotests/253.out
+++ b/tests/qemu-iotests/253.out
@@ -3,16 +3,16 @@ QA output created by 253
 === Check mapping of unaligned raw image ===
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 1044480, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
 wrote 65535/65535 bytes at offset 983040
 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET},
-{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 4096, "length": 978944, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET},
+{ "start": 983040, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
 *** done
diff --git a/tests/qemu-iotests/255 b/tests/qemu-iotests/255
index c43aa9c67ac..3d6d0e80cb5 100755
--- a/tests/qemu-iotests/255
+++ b/tests/qemu-iotests/255
@@ -123,4 +123,4 @@ with iotests.FilePath('src.qcow2') as src_path, \
     vm.qmp_log('block-job-cancel', device='job0')
     vm.qmp_log('quit')
 
-    vm.shutdown(has_quit=True)
+    vm.shutdown()
diff --git a/tests/qemu-iotests/264 b/tests/qemu-iotests/264
index 4f96825a22c..bc431d1a19b 100755
--- a/tests/qemu-iotests/264
+++ b/tests/qemu-iotests/264
@@ -95,7 +95,7 @@ class TestNbdReconnect(iotests.QMPTestCase):
             self.assert_qmp(result, 'return', {})
 
     def cancel_job(self):
-        result = self.vm.qmp('block-job-cancel', device='drive0')
+        result = self.vm.qmp('block-job-cancel', device='drive0', force=True)
         self.assert_qmp(result, 'return', {})
 
         start_t = time.time()
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
index 599b849cc6b..2775b4d1305 100755
--- a/tests/qemu-iotests/271
+++ b/tests/qemu-iotests/271
@@ -893,7 +893,10 @@ EOF
 }
 
 _make_test_img -o extended_l2=on 1M
-_concurrent_io     | $QEMU_IO | _filter_qemu_io
+# Second and third writes in _concurrent_io() are independent and may finish in
+# different order. So, filter offset out to match both possible variants.
+_concurrent_io     | $QEMU_IO | _filter_qemu_io | \
+    $SED -e 's/\(20480\|40960\)/OFFSET/'
 _concurrent_verify | $QEMU_IO | _filter_qemu_io
 
 # success, all done
diff --git a/tests/qemu-iotests/271.out b/tests/qemu-iotests/271.out
index 81043ba4d77..5be780de768 100644
--- a/tests/qemu-iotests/271.out
+++ b/tests/qemu-iotests/271.out
@@ -719,8 +719,8 @@ blkdebug: Suspended request 'A'
 blkdebug: Resuming request 'A'
 wrote 2048/2048 bytes at offset 30720
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 2048/2048 bytes at offset 20480
+wrote 2048/2048 bytes at offset OFFSET
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 2048/2048 bytes at offset 40960
+wrote 2048/2048 bytes at offset OFFSET
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
index cfe17a86592..16a95a48508 100644
--- a/tests/qemu-iotests/274.out
+++ b/tests/qemu-iotests/274.out
@@ -26,18 +26,18 @@ read 1048576/1048576 bytes at offset 1048576
 0/1048576 bytes allocated at offset 1 MiB
 
 === Checking map ===
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
 
 Offset          Length          Mapped to       File
 0               0x200000        0x50000         TEST_DIR/PID-base
 
-[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "offset": 327680}]
 
 Offset          Length          Mapped to       File
 0               0x100000        0x50000         TEST_DIR/PID-base
 
-[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680},
-{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1048576, "depth": 2, "present": true, "zero": false, "data": true, "offset": 327680},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": true, "data": false}]
 
 Offset          Length          Mapped to       File
 0               0x100000        0x50000         TEST_DIR/PID-base
@@ -220,8 +220,8 @@ read 65536/65536 bytes at offset 5368709120
 1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
 7 GiB (0x1c0000000) bytes     allocated at offset 1 GiB (0x40000000)
 
-[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false},
-{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 1073741824, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === preallocation=metadata ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=34359738368 lazy_refcounts=off refcount_bits=16
@@ -239,13 +239,13 @@ read 65536/65536 bytes at offset 33285996544
 30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
 3 GiB (0xc0000000) bytes     allocated at offset 30 GiB (0x780000000)
 
-[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false},
-{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680},
-{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128},
-{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576},
-{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024},
-{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008},
-{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}]
+[{ "start": 0, "length": 32212254720, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 32212254720, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 327680},
+{ "start": 32749125632, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 537264128},
+{ "start": 33285996544, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 1074200576},
+{ "start": 33822867456, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 1611137024},
+{ "start": 34359738368, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 2148139008},
+{ "start": 34896609280, "length": 536870912, "depth": 0, "present": true, "zero": true, "data": false, "offset": 2685075456}]
 
 === preallocation=falloc ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=10485760 lazy_refcounts=off refcount_bits=16
@@ -263,8 +263,8 @@ read 65536/65536 bytes at offset 9437184
 5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
 10 MiB (0xa00000) bytes     allocated at offset 5 MiB (0x500000)
 
-[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 5242880, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 5242880, "length": 10485760, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
 
 === preallocation=full ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=16777216 lazy_refcounts=off refcount_bits=16
@@ -282,8 +282,8 @@ read 65536/65536 bytes at offset 11534336
 8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
 4 MiB (0x400000) bytes     allocated at offset 8 MiB (0x800000)
 
-[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+[{ "start": 0, "length": 8388608, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 8388608, "length": 4194304, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680}]
 
 === preallocation=off ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=393216 lazy_refcounts=off refcount_bits=16
@@ -301,9 +301,9 @@ read 65536/65536 bytes at offset 259072
 192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
 320 KiB (0x50000) bytes     allocated at offset 192 KiB (0x30000)
 
-[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false},
-{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680},
-{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 196608, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 196608, "length": 65536, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680},
+{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === preallocation=off ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=409600 lazy_refcounts=off refcount_bits=16
@@ -321,8 +321,8 @@ read 65536/65536 bytes at offset 344064
 256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
 256 KiB (0x40000) bytes     allocated at offset 256 KiB (0x40000)
 
-[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
-{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 262144, "length": 262144, "depth": 0, "present": true, "zero": true, "data": false}]
 
 === preallocation=off ===
 Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=524288 lazy_refcounts=off refcount_bits=16
@@ -340,6 +340,6 @@ read 65536/65536 bytes at offset 446464
 256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
 244 KiB (0x3d000) bytes     allocated at offset 256 KiB (0x40000)
 
-[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
-{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}]
+[{ "start": 0, "length": 262144, "depth": 1, "present": false, "zero": true, "data": false},
+{ "start": 262144, "length": 249856, "depth": 0, "present": true, "zero": true, "data": false}]
 
diff --git a/tests/qemu-iotests/283 b/tests/qemu-iotests/283
index 010c22f0a2e..a09e0183ae6 100755
--- a/tests/qemu-iotests/283
+++ b/tests/qemu-iotests/283
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # group: auto quick
 #
-# Test for backup-top filter permission activation failure
+# Test for copy-before-write filter permission conflict
 #
 # Copyright (c) 2019 Virtuozzo International GmbH.
 #
@@ -31,13 +31,13 @@ size = 1024 * 1024
 """ Test description
 
 When performing a backup, all writes on the source subtree must go through the
-backup-top filter so it can copy all data to the target before it is changed.
-backup-top filter is appended above source node, to achieve this thing, so all
-parents of source node are handled. A configuration with side parents of source
-sub-tree with write permission is unsupported (we'd have append several
-backup-top filter like nodes to handle such parents). The test create an
-example of such configuration and checks that a backup is then not allowed
-(blockdev-backup command should fail).
+copy-before-write filter so it can copy all data to the target before it is
+changed.  copy-before-write filter is appended above source node, to achieve
+this thing, so all parents of source node are handled. A configuration with
+side parents of source sub-tree with write permission is unsupported (we'd have
+append several copy-before-write filter like nodes to handle such parents). The
+test create an example of such configuration and checks that a backup is then
+not allowed (blockdev-backup command should fail).
 
 The configuration:
 
@@ -57,11 +57,10 @@ The configuration:
                         │    base     │ ◀──────────── │ other │
                         └─────────────┘               └───────┘
 
-On activation (see .active field of backup-top state in block/backup-top.c),
-backup-top is going to unshare write permission on its source child. Write
-unsharing will be propagated to the "source->base" link and will conflict with
-other node write permission. So permission update will fail and backup job will
-not be started.
+copy-before-write filter wants to unshare write permission on its source child.
+Write unsharing will be propagated to the "source->base" link and will conflict
+with other node write permission. So permission update will fail and backup job
+will not be started.
 
 Note, that the only thing which prevents backup of running on such
 configuration is default permission propagation scheme. It may be altered by
@@ -99,13 +98,9 @@ vm.qmp_log('blockdev-backup', sync='full', device='source', target='target')
 vm.shutdown()
 
 
-print('\n=== backup-top should be gone after job-finalize ===\n')
+print('\n=== copy-before-write filter should be gone after job-finalize ===\n')
 
-# Check that the backup-top node is gone after job-finalize.
-#
-# During finalization, the node becomes inactive and can no longer
-# function.  If it is still present, new parents might be attached, and
-# there would be no meaningful way to handle their I/O requests.
+# Check that the copy-before-write node is gone after job-finalize.
 
 vm = iotests.VM()
 vm.launch()
@@ -131,7 +126,7 @@ vm.qmp_log('blockdev-backup',
 
 vm.event_wait('BLOCK_JOB_PENDING', 5.0)
 
-# The backup-top filter should still be present prior to finalization
+# The copy-before-write filter should still be present prior to finalization
 assert vm.node_info('backup-filter') is not None
 
 vm.qmp_log('job-finalize', id='backup')
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
index 37c35058ae8..5bb75952eff 100644
--- a/tests/qemu-iotests/283.out
+++ b/tests/qemu-iotests/283.out
@@ -5,9 +5,9 @@
 {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
 {"return": {}}
 {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
-{"error": {"class": "GenericError", "desc": "Cannot set permissions for backup-top filter: Conflicts with use by other as 'image', which uses 'write' on base"}}
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}}
 
-=== backup-top should be gone after job-finalize ===
+=== copy-before-write filter should be gone after job-finalize ===
 
 {"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "source"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/287 b/tests/qemu-iotests/287
index 22ce9ff0e47..2d5334e8bfb 100755
--- a/tests/qemu-iotests/287
+++ b/tests/qemu-iotests/287
@@ -53,7 +53,7 @@ CLUSTER_SIZE=65536
 
 # Check if we can run this test.
 output=$(_make_test_img -o 'compression_type=zstd' 64M; _cleanup_test_img)
-if echo "$output" | grep -q "Invalid parameter 'zstd'"; then
+if echo "$output" | grep -q "Parameter 'compression-type' does not accept value 'zstd'"; then
     _notrun "ZSTD is disabled"
 fi
 
diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291
deleted file mode 100755
index 20efb080a6c..00000000000
--- a/tests/qemu-iotests/291
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env bash
-# group: rw quick
-#
-# Test qemu-img bitmap handling
-#
-# Copyright (C) 2018-2020 Red Hat, Inc.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-seq="$(basename $0)"
-echo "QA output created by $seq"
-
-status=1 # failure is the default!
-
-_cleanup()
-{
-    _cleanup_test_img
-    nbd_server_stop
-}
-trap "_cleanup; exit \$status" 0 1 2 3 15
-
-# get standard environment, filters and checks
-. ./common.rc
-. ./common.filter
-. ./common.nbd
-
-_supported_fmt qcow2
-_supported_proto file fuse
-_supported_os Linux
-_require_command QEMU_NBD
-# compat=0.10 does not support bitmaps
-_unsupported_imgopts 'compat=0.10'
-
-# Filter irrelevant format-specific information from the qemu-img info
-# output (we only want the bitmaps, basically)
-_filter_irrelevant_img_info()
-{
-    grep -v -e 'compat' -e 'compression type' -e 'data file' -e 'extended l2' \
-            -e 'lazy refcounts' -e 'refcount bits'
-}
-
-echo
-echo "=== Initial image setup ==="
-echo
-
-# Create backing image with one bitmap
-TEST_IMG="$TEST_IMG.base" _make_test_img 10M
-$QEMU_IMG bitmap --add -f $IMGFMT "$TEST_IMG.base" b0
-$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io
-
-# Create initial image and populate two bitmaps: one active, one inactive.
-ORIG_IMG=$TEST_IMG
-TEST_IMG=$TEST_IMG.orig
-_make_test_img -b "$ORIG_IMG.base" -F $IMGFMT 10M
-$QEMU_IO -c 'w 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
-$QEMU_IMG bitmap --add -g 512k -f $IMGFMT "$TEST_IMG" b1
-$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b2
-$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
-$QEMU_IMG bitmap --clear -f $IMGFMT "$TEST_IMG" b1
-$QEMU_IO -c 'w 1M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
-$QEMU_IMG bitmap --disable -f $IMGFMT "$TEST_IMG" b1
-$QEMU_IMG bitmap --enable -f $IMGFMT "$TEST_IMG" b2
-$QEMU_IO -c 'w 2M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
-
-echo
-echo "=== Bitmap preservation not possible to non-qcow2 ==="
-echo
-
-TEST_IMG=$ORIG_IMG
-$QEMU_IMG convert --bitmaps -O raw "$TEST_IMG.orig" "$TEST_IMG" &&
-    echo "unexpected success"
-
-echo
-echo "=== Convert with bitmap preservation ==="
-echo
-
-# Only bitmaps from the active layer are copied
-$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG"
-_img_info --format-specific | _filter_irrelevant_img_info
-# But we can also merge in bitmaps from other layers.  This test is a bit
-# contrived to cover more code paths, in reality, you could merge directly
-# into b0 without going through tmp
-$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b0
-$QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F $IMGFMT \
-     -f $IMGFMT "$TEST_IMG" tmp
-$QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0
-$QEMU_IMG bitmap --remove --image-opts \
-    driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp
-_img_info --format-specific | _filter_irrelevant_img_info
-
-echo
-echo "=== Merge from top layer into backing image ==="
-echo
-
-$QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG"
-$QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \
-     -f $IMGFMT "$TEST_IMG.base" b3
-_img_info --format-specific --backing-chain | _filter_irrelevant_img_info
-
-echo
-echo "=== Check bitmap contents ==="
-echo
-
-# x-dirty-bitmap is a hack for reading bitmaps; it abuses block status to
-# report "data":false for portions of the bitmap which are set
-IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
-nbd_server_start_unix_socket -r -f qcow2 \
-    -B b0 -B b1 -B b2 -B b3 "$TEST_IMG"
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b0" | _filter_qemu_img_map
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b1" | _filter_qemu_img_map
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map
-
-nbd_server_stop
-
-# success, all done
-echo '*** done'
-rm -f $seq.full
-status=0
diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out
deleted file mode 100644
index 23411c0ff4d..00000000000
--- a/tests/qemu-iotests/291.out
+++ /dev/null
@@ -1,118 +0,0 @@
-QA output created by 291
-
-=== Initial image setup ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=10485760
-wrote 1048576/1048576 bytes at offset 3145728
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=10485760 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-wrote 1048576/1048576 bytes at offset 0
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 1048576/1048576 bytes at offset 3145728
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 1048576/1048576 bytes at offset 1048576
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 1048576/1048576 bytes at offset 2097152
-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
-=== Bitmap preservation not possible to non-qcow2 ===
-
-qemu-img: Format driver 'raw' does not support bitmaps
-
-=== Convert with bitmap preservation ===
-
-image: TEST_DIR/t.IMGFMT
-file format: IMGFMT
-virtual size: 10 MiB (10485760 bytes)
-cluster_size: 65536
-Format specific information:
-    bitmaps:
-        [0]:
-            flags:
-            name: b1
-            granularity: 524288
-        [1]:
-            flags:
-                [0]: auto
-            name: b2
-            granularity: 65536
-    corrupt: false
-image: TEST_DIR/t.IMGFMT
-file format: IMGFMT
-virtual size: 10 MiB (10485760 bytes)
-cluster_size: 65536
-Format specific information:
-    bitmaps:
-        [0]:
-            flags:
-            name: b1
-            granularity: 524288
-        [1]:
-            flags:
-                [0]: auto
-            name: b2
-            granularity: 65536
-        [2]:
-            flags:
-            name: b0
-            granularity: 65536
-    corrupt: false
-
-=== Merge from top layer into backing image ===
-
-image: TEST_DIR/t.IMGFMT
-file format: IMGFMT
-virtual size: 10 MiB (10485760 bytes)
-cluster_size: 65536
-backing file: TEST_DIR/t.IMGFMT.base
-backing file format: IMGFMT
-Format specific information:
-    bitmaps:
-        [0]:
-            flags:
-            name: b1
-            granularity: 524288
-        [1]:
-            flags:
-                [0]: auto
-            name: b2
-            granularity: 65536
-        [2]:
-            flags:
-            name: b0
-            granularity: 65536
-    corrupt: false
-
-image: TEST_DIR/t.IMGFMT.base
-file format: IMGFMT
-virtual size: 10 MiB (10485760 bytes)
-cluster_size: 65536
-Format specific information:
-    bitmaps:
-        [0]:
-            flags:
-                [0]: auto
-            name: b0
-            granularity: 65536
-        [1]:
-            flags:
-                [0]: auto
-            name: b3
-            granularity: 65536
-    corrupt: false
-
-=== Check bitmap contents ===
-
-[{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 3145728, "length": 1048576, "depth": 0, "zero": false, "data": false},
-{ "start": 4194304, "length": 6291456, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 1048576, "length": 1048576, "depth": 0, "zero": false, "data": false},
-{ "start": 2097152, "length": 8388608, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false},
-{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false},
-{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-*** done
diff --git a/tests/qemu-iotests/295.out b/tests/qemu-iotests/295.out
index ad34b2ca2c5..5ff91f116c3 100644
--- a/tests/qemu-iotests/295.out
+++ b/tests/qemu-iotests/295.out
@@ -4,7 +4,7 @@
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job_erase_key"}}
 {"return": {}}
-{"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
+.{"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job_erase_key"}}
 {"return": {}}
@@ -13,7 +13,7 @@ Job failed: Invalid password, cannot unlock any keyslot
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
 {"return": {}}
-{"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
+.{"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job_add_key"}}
 {"return": {}}
@@ -33,7 +33,7 @@ Job failed: All the active keyslots match the (old) password that was given and
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job_erase_key"}}
 {"return": {}}
-...
+.
 ----------------------------------------------------------------------
 Ran 3 tests
 
diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index 7c65e987a15..099a3eeaa52 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -118,10 +118,9 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
     def openImageQmp(self, vm, id, file, secret,
                      readOnly = False, reOpen = False):
 
-        command = 'x-blockdev-reopen' if reOpen else 'blockdev-add'
+        command = 'blockdev-reopen' if reOpen else 'blockdev-add'
 
-        result = vm.qmp(command, **
-            {
+        opts = {
                 'driver': iotests.imgfmt,
                 'node-name': id,
                 'read-only': readOnly,
@@ -131,7 +130,11 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
                     'filename': test_img,
                 }
             }
-        )
+
+        if reOpen:
+            result = vm.qmp(command, options=[opts])
+        else:
+            result = vm.qmp(command, **opts)
         self.assert_qmp(result, 'return', {})
 
 
diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out
index cb2859a15c0..6c697356045 100644
--- a/tests/qemu-iotests/296.out
+++ b/tests/qemu-iotests/296.out
@@ -13,7 +13,7 @@ Job failed: Failed to get shared "consistent read" lock
 qemu-img: Failed to get shared "consistent read" lock
 Is another process using the image [TEST_DIR/test.img]?
 
-Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
+.Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
 
 Job failed: Block node is read-only
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
@@ -26,15 +26,15 @@ Job failed: Failed to get shared "consistent read" lock
 {"return": {}}
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}
-Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
+.Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
 
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "Failed to get \"write\" lock"}}
-Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
+.Formatting 'TEST_DIR/test.img', fmt=luks size=1048576 key-secret=keysec0 iter-time=10
 
 {"return": {}}
 {"return": {}}
-....
+.
 ----------------------------------------------------------------------
 Ran 4 tests
 
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index a37910b42d9..ee78a627359 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -17,97 +17,66 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import os
-import re
-import shutil
 import subprocess
 import sys
+from typing import List
 
 import iotests
+import linters
 
 
-# TODO: Empty this list!
-SKIP_FILES = (
-    '030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
-    '096', '118', '124', '132', '136', '139', '147', '148', '149',
-    '151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
-    '203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
-    '218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
-    '240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
-    '262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
-    '299', '302', '303', '304', '307',
-    'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
-)
+# Looking for something?
+#
+#  List of files to exclude from linting: linters.py
+#  mypy configuration:                    mypy.ini
+#  pylint configuration:                  pylintrc
 
 
-def is_python_file(filename):
-    if not os.path.isfile(filename):
+def check_linter(linter: str) -> bool:
+    try:
+        linters.run_linter(linter, ['--version'], suppress_output=True)
+    except subprocess.CalledProcessError:
+        iotests.case_notrun(f"'{linter}' not found")
         return False
+    return True
 
-    if filename.endswith('.py'):
-        return True
 
-    with open(filename) as f:
-        try:
-            first_line = f.readline()
-            return re.match('^#!.*python', first_line) is not None
-        except UnicodeDecodeError:  # Ignore binary files
-            return False
+def test_pylint(files: List[str]) -> None:
+    print('=== pylint ===')
+    sys.stdout.flush()
 
+    if not check_linter('pylint'):
+        return
 
-def run_linters():
-    files = [filename for filename in (set(os.listdir('.')) - set(SKIP_FILES))
-             if is_python_file(filename)]
+    linters.run_linter('pylint', files)
 
-    iotests.logger.debug('Files to be checked:')
-    iotests.logger.debug(', '.join(sorted(files)))
 
-    print('=== pylint ===')
+def test_mypy(files: List[str]) -> None:
+    print('=== mypy ===')
     sys.stdout.flush()
 
-    # Todo notes are fine, but fixme's or xxx's should probably just be
-    # fixed (in tests, at least)
+    if not check_linter('mypy'):
+        return
+
     env = os.environ.copy()
-    qemu_module_path = os.path.join(os.path.dirname(__file__),
-                                    '..', '..', 'python')
-    try:
-        env['PYTHONPATH'] += os.pathsep + qemu_module_path
-    except KeyError:
-        env['PYTHONPATH'] = qemu_module_path
-    subprocess.run(('pylint-3', '--score=n', '--notes=FIXME,XXX', *files),
-                   env=env, check=False)
+    env['MYPYPATH'] = env['PYTHONPATH']
 
-    print('=== mypy ===')
-    sys.stdout.flush()
+    linters.run_linter('mypy', files, env=env, suppress_output=True)
 
-    # We have to call mypy separately for each file.  Otherwise, it
-    # will interpret all given files as belonging together (i.e., they
-    # may not both define the same classes, etc.; most notably, they
-    # must not both define the __main__ module).
-    env['MYPYPATH'] = env['PYTHONPATH']
-    for filename in files:
-        p = subprocess.run(('mypy',
-                            '--warn-unused-configs',
-                            '--disallow-subclassing-any',
-                            '--disallow-any-generics',
-                            '--disallow-incomplete-defs',
-                            '--disallow-untyped-decorators',
-                            '--no-implicit-optional',
-                            '--warn-redundant-casts',
-                            '--warn-unused-ignores',
-                            '--no-implicit-reexport',
-                            filename),
-                           env=env,
-                           check=False,
-                           stdout=subprocess.PIPE,
-                           stderr=subprocess.STDOUT,
-                           universal_newlines=True)
-
-        if p.returncode != 0:
-            print(p.stdout)
-
-
-for linter in ('pylint-3', 'mypy'):
-    if shutil.which(linter) is None:
-        iotests.notrun(f'{linter} not found')
-
-iotests.script_main(run_linters)
+
+def main() -> None:
+    files = linters.get_test_files()
+
+    iotests.logger.debug('Files to be checked:')
+    iotests.logger.debug(', '.join(sorted(files)))
+
+    for test in (test_pylint, test_mypy):
+        try:
+            test(files)
+        except subprocess.CalledProcessError as exc:
+            # Linter failure will be caught by diffing the IO.
+            if exc.output:
+                print(exc.output)
+
+
+iotests.script_main(main)
diff --git a/tests/qemu-iotests/298 b/tests/qemu-iotests/298
index d535946b5fe..fae72211b11 100755
--- a/tests/qemu-iotests/298
+++ b/tests/qemu-iotests/298
@@ -98,7 +98,7 @@ class TestPreallocateFilter(TestPreallocateBase):
         self.check_big()
 
     def test_reopen_opts(self):
-        result = self.vm.qmp('x-blockdev-reopen', **{
+        result = self.vm.qmp('blockdev-reopen', options=[{
             'node-name': 'disk',
             'driver': iotests.imgfmt,
             'file': {
@@ -112,7 +112,7 @@ class TestPreallocateFilter(TestPreallocateBase):
                     'filename': disk
                 }
             }
-        })
+        }])
         self.assert_qmp(result, 'return', {})
 
         self.vm.hmp_qemu_io('drive0', 'write 0 1M')
diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300
index b475a92c478..dbd28384ec3 100755
--- a/tests/qemu-iotests/300
+++ b/tests/qemu-iotests/300
@@ -26,9 +26,6 @@ from typing import Dict, List, Optional
 
 import iotests
 
-# Import qemu after iotests.py has amended sys.path
-# pylint: disable=wrong-import-order
-import qemu
 
 BlockBitmapMapping = List[Dict[str, object]]
 
@@ -462,12 +459,11 @@ class TestBlockBitmapMappingErrors(TestDirtyBitmapMigration):
                       f"'{self.src_node_name}': Name is longer than 255 bytes",
                       log)
 
-        # Expect abnormal shutdown of the destination VM because of
-        # the failed migration
-        try:
-            self.vm_b.shutdown()
-        except qemu.machine.AbnormalShutdown:
-            pass
+        # Destination VM will terminate w/ error of its own accord
+        # due to the failed migration.
+        self.vm_b.wait()
+        rc = self.vm_b.exitcode()
+        assert rc is not None and rc > 0
 
     def test_aliased_bitmap_name_too_long(self) -> None:
         # Longer than the maximum for bitmap names
diff --git a/tests/qemu-iotests/301 b/tests/qemu-iotests/301
index 9f943cadbe2..220de1043fa 100755
--- a/tests/qemu-iotests/301
+++ b/tests/qemu-iotests/301
@@ -3,7 +3,7 @@
 #
 # Test qcow backing file warnings
 #
-# Copyright (C) 2020 Red Hat, Inc.
+# Copyright (C) 2020-2021 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -46,7 +46,6 @@ echo "== qcow backed by qcow =="
 
 TEST_IMG="$TEST_IMG.base" _make_test_img $size
 _make_test_img -b "$TEST_IMG.base" $size
-_img_info
 _make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size
 _img_info
 
@@ -71,7 +70,6 @@ echo "== qcow backed by raw =="
 rm "$TEST_IMG.base"
 truncate --size=$size "$TEST_IMG.base"
 _make_test_img -b "$TEST_IMG.base" $size
-_img_info
 _make_test_img -b "$TEST_IMG.base" -F raw $size
 _img_info
 
diff --git a/tests/qemu-iotests/301.out b/tests/qemu-iotests/301.out
index 9004dad6392..e280658191e 100644
--- a/tests/qemu-iotests/301.out
+++ b/tests/qemu-iotests/301.out
@@ -2,13 +2,7 @@ QA output created by 301
 
 == qcow backed by qcow ==
 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=33554432
-qemu-img: warning: Deprecated use of backing file without explicit backing format (detected format of IMGFMT)
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-image: TEST_DIR/t.IMGFMT
-file format: IMGFMT
-virtual size: 32 MiB (33554432 bytes)
-cluster_size: 512
-backing file: TEST_DIR/t.IMGFMT.base
+qemu-img: TEST_DIR/t.IMGFMT: Backing file specified without backing format
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
@@ -36,13 +30,7 @@ cluster_size: 512
 backing file: TEST_DIR/t.IMGFMT.base
 
 == qcow backed by raw ==
-qemu-img: warning: Deprecated use of backing file without explicit backing format (detected format of raw)
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base
-image: TEST_DIR/t.IMGFMT
-file format: IMGFMT
-virtual size: 32 MiB (33554432 bytes)
-cluster_size: 512
-backing file: TEST_DIR/t.IMGFMT.base
+qemu-img: TEST_DIR/t.IMGFMT: Backing file specified without backing format
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=33554432 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=raw
 image: TEST_DIR/t.IMGFMT
 file format: IMGFMT
diff --git a/tests/qemu-iotests/307 b/tests/qemu-iotests/307
index c7685347bc4..b429b5aa50a 100755
--- a/tests/qemu-iotests/307
+++ b/tests/qemu-iotests/307
@@ -41,9 +41,11 @@ with iotests.FilePath('image') as img, \
     iotests.log('=== Launch VM ===')
 
     vm.add_object('iothread,id=iothread0')
+    vm.add_object('iothread,id=iothread1')
     vm.add_blockdev(f'file,filename={img},node-name=file')
     vm.add_blockdev(f'{iotests.imgfmt},file=file,node-name=fmt')
     vm.add_blockdev('raw,file=file,node-name=ro,read-only=on')
+    vm.add_blockdev('null-co,node-name=null')
     vm.add_device(f'id=scsi0,driver=virtio-scsi,iothread=iothread0')
     vm.launch()
 
@@ -74,6 +76,19 @@ with iotests.FilePath('image') as img, \
     vm.qmp_log('query-block-exports')
     iotests.qemu_nbd_list_log('-k', socket)
 
+    iotests.log('\n=== Add export with conflicting iothread ===')
+
+    vm.qmp_log('device_add', id='sdb', driver='scsi-hd', drive='null')
+
+    # Should fail because of fixed-iothread
+    vm.qmp_log('block-export-add', id='export1', type='nbd', node_name='null',
+               iothread='iothread1', fixed_iothread=True, writable=True)
+
+    # Should ignore the iothread conflict, but then fail because of the
+    # permission conflict (and not crash)
+    vm.qmp_log('block-export-add', id='export1', type='nbd', node_name='null',
+               iothread='iothread1', fixed_iothread=False, writable=True)
+
     iotests.log('\n=== Add a writable export ===')
 
     # This fails because share-rw=off
diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out
index daa8ad2da0e..ec8d2be0e0a 100644
--- a/tests/qemu-iotests/307.out
+++ b/tests/qemu-iotests/307.out
@@ -51,9 +51,17 @@ exports available: 1
    base:allocation
 
 
+=== Add export with conflicting iothread ===
+{"execute": "device_add", "arguments": {"drive": "null", "driver": "scsi-hd", "id": "sdb"}}
+{"return": {}}
+{"execute": "block-export-add", "arguments": {"fixed-iothread": true, "id": "export1", "iothread": "iothread1", "node-name": "null", "type": "nbd", "writable": true}}
+{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}}
+{"execute": "block-export-add", "arguments": {"fixed-iothread": false, "id": "export1", "iothread": "iothread1", "node-name": "null", "type": "nbd", "writable": true}}
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'null': permissions 'write' are both required by an unnamed block device (uses node 'null' as 'root' child) and unshared by block device 'sdb' (uses node 'null' as 'root' child)."}}
+
 === Add a writable export ===
 {"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}}
-{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}}
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}}
 {"execute": "device_del", "arguments": {"id": "sda"}}
 {"return": {}}
 {"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
index f122065d0fe..2e3f8f42824 100755
--- a/tests/qemu-iotests/308
+++ b/tests/qemu-iotests/308
@@ -58,6 +58,9 @@ _supported_os Linux # We need /dev/urandom
 # $4: Node to export (defaults to 'node-format')
 fuse_export_add()
 {
+    # The grep -v is a filter for errors when /etc/fuse.conf does not contain
+    # user_allow_other.  (The error is benign, but it is printed by fusermount
+    # on the first mount attempt, so our export code cannot hide it.)
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'block-export-add',
           'arguments': {
@@ -67,7 +70,8 @@ fuse_export_add()
               $2
           } }" \
         "${3:-return}" \
-        | _filter_imgfmt
+        | _filter_imgfmt \
+        | grep -v 'option allow_other only allowed if'
 }
 
 # $1: Export ID
@@ -144,7 +148,7 @@ rmdir "$EXT_MP" 2>/dev/null
 rm -f "$EXT_MP"
 output=$(fuse_export_add 'export-err' "'mountpoint': '$EXT_MP'" error)
 
-if echo "$output" | grep -q "Invalid parameter 'fuse'"; then
+if echo "$output" | grep -q "Parameter 'type' does not accept value 'fuse'"; then
     _notrun 'No FUSE support'
 fi
 
@@ -166,6 +170,17 @@ fuse_export_add 'export-mp' "'mountpoint': '$EXT_MP'"
 # Check that the export presents the same data as the original image
 $QEMU_IMG compare -f raw -F $IMGFMT -U "$EXT_MP" "$TEST_IMG"
 
+# Some quick chmod tests
+stat -c 'Permissions pre-chmod: %a' "$EXT_MP"
+
+# Verify that we cannot set +w
+chmod u+w "$EXT_MP" 2>&1 | _filter_testdir | _filter_imgfmt
+stat -c 'Permissions post-+w: %a' "$EXT_MP"
+
+# But that we can set, say, +x (if we are so inclined)
+chmod u+x "$EXT_MP" 2>&1 | _filter_testdir | _filter_imgfmt
+stat -c 'Permissions post-+x: %a' "$EXT_MP"
+
 echo
 echo '=== Mount over existing file ==='
 
@@ -215,7 +230,8 @@ echo '=== Writable export ==='
 fuse_export_add 'export-mp' "'mountpoint': '$EXT_MP', 'writable': true"
 
 # Check that writing to the read-only export fails
-$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -f raw -c 'write -P 42 1M 64k' "$TEST_IMG" 2>&1 \
+    | _filter_qemu_io | _filter_testdir | _filter_imgfmt
 
 # But here it should work
 $QEMU_IO -f raw -c 'write -P 42 1M 64k' "$EXT_MP" | _filter_qemu_io
diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out
index 466e7e02676..fc47bb11a2e 100644
--- a/tests/qemu-iotests/308.out
+++ b/tests/qemu-iotests/308.out
@@ -50,6 +50,10 @@ wrote 67108864/67108864 bytes at offset 0
           } }
 {"return": {}}
 Images are identical.
+Permissions pre-chmod: 400
+chmod: changing permissions of 'TEST_DIR/t.IMGFMT.fuse': Read-only file system
+Permissions post-+w: 400
+Permissions post-+x: 500
 
 === Mount over existing file ===
 {'execute': 'block-export-add',
@@ -91,7 +95,7 @@ virtual size: 0 B (0 bytes)
               'mountpoint': 'TEST_DIR/t.IMGFMT.fuse', 'writable': true
           } }
 {"return": {}}
-write failed: Permission denied
+qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
 wrote 65536/65536 bytes at offset 1048576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 1048576
diff --git a/tests/qemu-iotests/309 b/tests/qemu-iotests/309
deleted file mode 100755
index b90b279994c..00000000000
--- a/tests/qemu-iotests/309
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env bash
-# group: rw auto quick
-#
-# Test qemu-nbd -A
-#
-# Copyright (C) 2018-2020 Red Hat, Inc.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-seq="$(basename $0)"
-echo "QA output created by $seq"
-
-status=1 # failure is the default!
-
-_cleanup()
-{
-    _cleanup_test_img
-    nbd_server_stop
-}
-trap "_cleanup; exit \$status" 0 1 2 3 15
-
-# get standard environment, filters and checks
-. ./common.rc
-. ./common.filter
-. ./common.nbd
-
-_supported_fmt qcow2
-_supported_proto file
-_supported_os Linux
-_require_command QEMU_NBD
-
-echo
-echo "=== Initial image setup ==="
-echo
-
-TEST_IMG="$TEST_IMG.base" _make_test_img 4M
-$QEMU_IO -c 'w 0 2M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io
-_make_test_img -b "$TEST_IMG.base" -F $IMGFMT 4M
-$QEMU_IO -c 'w 1M 2M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
-
-echo
-echo "=== Check allocation over NBD ==="
-echo
-
-$QEMU_IMG map --output=json -f qcow2 "$TEST_IMG"
-IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
-nbd_server_start_unix_socket -r -f qcow2 -A "$TEST_IMG"
-# Normal -f raw NBD block status loses access to allocation information
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG" | _filter_qemu_img_map
-# But when we use -A, coupled with x-dirty-bitmap in the client for feeding
-# 2-bit block status from an alternative NBD metadata context (note that
-# the client code for x-dirty-bitmap intentionally collapses all depths
-# beyond 2 into a single value), we can determine:
-#    unallocated (depth 0) => "zero":false, "data":true
-#    local (depth 1)       => "zero":false, "data":false
-#    backing (depth 2+)    => "zero":true,  "data":true
-$QEMU_IMG map --output=json --image-opts \
-    "$IMG,x-dirty-bitmap=qemu:allocation-depth" | _filter_qemu_img_map
-# More accurate results can be obtained by other NBD clients such as
-# libnbd, but this test works without such external dependencies.
-
-# success, all done
-echo '*** done'
-rm -f $seq.full
-status=0
diff --git a/tests/qemu-iotests/309.out b/tests/qemu-iotests/309.out
deleted file mode 100644
index db75bb6b0df..00000000000
--- a/tests/qemu-iotests/309.out
+++ /dev/null
@@ -1,22 +0,0 @@
-QA output created by 309
-
-=== Initial image setup ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=4194304
-wrote 2097152/2097152 bytes at offset 0
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
-wrote 2097152/2097152 bytes at offset 1048576
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-
-=== Check allocation over NBD ===
-
-[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680},
-{ "start": 1048576, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680},
-{ "start": 3145728, "length": 1048576, "depth": 1, "zero": true, "data": false}]
-[{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET},
-{ "start": 3145728, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}]
-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": true, "offset": OFFSET},
-{ "start": 1048576, "length": 2097152, "depth": 0, "zero": false, "data": false},
-{ "start": 3145728, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": OFFSET}]
-*** done
diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
index 9d9c928c4b3..33c34118694 100755
--- a/tests/qemu-iotests/310
+++ b/tests/qemu-iotests/310
@@ -48,11 +48,11 @@ with iotests.FilePath('base.img') as base_img_path, \
     assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
     assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
                     '-F', iotests.imgfmt, mid_img_path) == 0
-    assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
-    assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+    assert qemu_io_silent(mid_img_path, '-c', 'write -P 3 2M 1M') == 0
+    assert qemu_io_silent(mid_img_path, '-c', 'write -P 3 4M 1M') == 0
     assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
                     '-F', iotests.imgfmt, top_img_path) == 0
-    assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'write -P 2 1M 1M') == 0
 
 #      0 1 2 3 4
 # top    2
@@ -108,10 +108,10 @@ with iotests.FilePath('base.img') as base_img_path, \
     assert qemu_img('rebase', '-u', '-b', '', '-f', iotests.imgfmt,
                     top_img_path) == 0
 
-    assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 0 1M') == 0
-    assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
-    assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
-    assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 3M 1M') == 0
-    assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'read -P 0 0 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'read -P 2 1M 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'read -P 3 2M 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'read -P 0 3M 1M') == 0
+    assert qemu_io_silent(top_img_path, '-c', 'read -P 3 4M 1M') == 0
 
     log('Done')
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index d1c87ceaf17..43a4b694cc1 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -19,6 +19,9 @@
 import os
 import sys
 import argparse
+import shutil
+from pathlib import Path
+
 from findtests import TestFinder
 from testenv import TestEnv
 from testrunner import TestRunner
@@ -33,6 +36,16 @@ def make_argparser() -> argparse.ArgumentParser:
                    help='pretty print output for make check')
 
     p.add_argument('-d', dest='debug', action='store_true', help='debug')
+    p.add_argument('-p', dest='print', action='store_true',
+                   help='redirects qemu\'s stdout and stderr to '
+                        'the test output')
+    p.add_argument('-gdb', action='store_true',
+                   help="start gdbserver with $GDB_OPTIONS options "
+                        "('localhost:12345' if $GDB_OPTIONS is empty)")
+    p.add_argument('-valgrind', action='store_true',
+                   help='use valgrind, sets VALGRIND_QEMU environment '
+                        'variable')
+
     p.add_argument('-misalign', action='store_true',
                    help='misalign memory allocations')
     p.add_argument('--color', choices=['on', 'off', 'auto'],
@@ -65,8 +78,7 @@ def make_argparser() -> argparse.ArgumentParser:
         mg.add_argument('-' + fmt, dest='imgfmt', action='store_const',
                         const=fmt, help=f'test {fmt}')
 
-    protocol_list = ['file', 'rbd', 'sheepdog', 'nbd', 'ssh', 'nfs',
-                     'fuse']
+    protocol_list = ['file', 'rbd', 'nbd', 'ssh', 'nfs', 'fuse']
     g_prt = p.add_argument_group(
         '  image protocol options',
         'The following options set the IMGPROTO environment variable. '
@@ -83,9 +95,6 @@ def make_argparser() -> argparse.ArgumentParser:
     g_bash.add_argument('-o', dest='imgopts',
                         help='options to pass to qemu-img create/convert, '
                         'sets IMGOPTS environment variable')
-    g_bash.add_argument('-valgrind', action='store_true',
-                        help='use valgrind, sets VALGRIND_QEMU environment '
-                        'variable')
 
     g_sel = p.add_argument_group('test selecting options',
                                  'The following options specify test set '
@@ -101,7 +110,7 @@ def make_argparser() -> argparse.ArgumentParser:
                        'rerun failed ./check command, starting from the '
                        'middle of the process.')
     g_sel.add_argument('tests', metavar='TEST_FILES', nargs='*',
-                       help='tests to run')
+                       help='tests to run, or "--" followed by a command')
 
     return p
 
@@ -112,7 +121,22 @@ if __name__ == '__main__':
     env = TestEnv(imgfmt=args.imgfmt, imgproto=args.imgproto,
                   aiomode=args.aiomode, cachemode=args.cachemode,
                   imgopts=args.imgopts, misalign=args.misalign,
-                  debug=args.debug, valgrind=args.valgrind)
+                  debug=args.debug, valgrind=args.valgrind,
+                  gdb=args.gdb, qprint=args.print)
+
+    if len(sys.argv) > 1 and sys.argv[-len(args.tests)-1] == '--':
+        if not args.tests:
+            sys.exit("missing command after '--'")
+        cmd = args.tests
+        env.print_env()
+        exec_pathstr = shutil.which(cmd[0])
+        if exec_pathstr is None:
+            sys.exit('command not found: ' + cmd[0])
+        exec_path = Path(exec_pathstr).resolve()
+        cmd[0] = str(exec_path)
+        full_env = env.prepare_subprocess(cmd)
+        os.chdir(exec_path.parent)
+        os.execve(cmd[0], cmd, full_env)
 
     testfinder = TestFinder(test_dir=env.source_iotests)
 
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 268b749e2fa..2b2b53946c6 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -332,5 +332,10 @@ for fname in fnames:
 sys.stdout.write(result)'
 }
 
+_filter_authz_check_tls()
+{
+    $SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
+}
+
 # make sure this script returns success
 true
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 0fc52d20d74..0f1fecc68ed 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -85,7 +85,12 @@ _timed_wait_for()
     timeout=yes
 
     QEMU_STATUS[$h]=0
-    while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
+    read_timeout="-t ${QEMU_COMM_TIMEOUT}"
+    if [ -n "${GDB_OPTIONS}" ]; then
+        read_timeout=
+    fi
+
+    while IFS= read ${read_timeout} resp <&${QEMU_OUT[$h]}
     do
         if [ -n "$capture_events" ]; then
             capture=0
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 7f49c9716db..d8582454de0 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -166,8 +166,14 @@ _qemu_wrapper()
         if [ -n "${QEMU_NEED_PID}" ]; then
             echo $BASHPID > "${QEMU_TEST_DIR}/qemu-${_QEMU_HANDLE}.pid"
         fi
+
+        GDB=""
+        if [ -n "${GDB_OPTIONS}" ]; then
+            GDB="gdbserver ${GDB_OPTIONS}"
+        fi
+
         VALGRIND_QEMU="${VALGRIND_QEMU_VM}" _qemu_proc_exec "${VALGRIND_LOGFILE}" \
-            "$QEMU_PROG" $QEMU_OPTIONS "$@"
+            $GDB "$QEMU_PROG" $QEMU_OPTIONS "$@"
     )
     RETVAL=$?
     _qemu_proc_valgrind_log "${VALGRIND_LOGFILE}" $RETVAL
@@ -512,9 +518,13 @@ _make_test_img()
         # Usually, users would export formatted nodes.  But we present fuse as a
         # protocol-level driver here, so we have to leave the format to the
         # client.
+        # Switch off allow-other, because in general we do not need it for
+        # iotests.  The default allow-other=auto has the downside of printing a
+        # fusermount error on its first attempt if allow_other is not
+        # permissible, which we would need to filter.
         QSD_NEED_PID=y $QSD \
               --blockdev file,node-name=export-node,filename=$img_name,discard=unmap \
-              --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=on \
+              --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=on,allow-other=off \
               &
 
         pidfile="$QEMU_TEST_DIR/qemu-storage-daemon.pid"
@@ -641,10 +651,6 @@ _cleanup_test_img()
             rbd --no-progress rm "$TEST_DIR/t.$IMGFMT" > /dev/null
             ;;
 
-        sheepdog)
-            collie vdi delete "$TEST_DIR/t.$IMGFMT"
-            ;;
-
     esac
 }
 
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 5af01828951..83bfedb9020 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -20,7 +20,6 @@
 import bz2
 from collections import OrderedDict
 import faulthandler
-import io
 import json
 import logging
 import os
@@ -31,15 +30,13 @@
 import subprocess
 import sys
 import time
-from typing import (Any, Callable, Dict, Iterable,
-                    List, Optional, Sequence, Tuple, TypeVar)
+from typing import (Any, Callable, Dict, Iterable, Iterator,
+                    List, Optional, Sequence, TextIO, Tuple, Type, TypeVar)
 import unittest
 
 from contextlib import contextmanager
 
-# pylint: disable=import-error, wrong-import-position
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu import qtest
+from qemu.machine import qtest
 from qemu.qmp import QMPMessage
 
 # Use this logger for logging messages directly from the iotests module
@@ -75,6 +72,13 @@
 qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
 qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
 
+gdb_qemu_env = os.environ.get('GDB_OPTIONS')
+qemu_gdb = []
+if gdb_qemu_env:
+    qemu_gdb = ['gdbserver'] + gdb_qemu_env.strip().split(' ')
+
+qemu_print = os.environ.get('PRINT_QEMU', False)
+
 imgfmt = os.environ.get('IMGFMT', 'raw')
 imgproto = os.environ.get('IMGPROTO', 'file')
 output_dir = os.environ.get('OUTPUT_DIR', '.')
@@ -92,7 +96,16 @@
     sys.stderr.write('Please run this test via the "check" script\n')
     sys.exit(os.EX_USAGE)
 
-socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
+qemu_valgrind = []
+if os.environ.get('VALGRIND_QEMU') == "y" and \
+    os.environ.get('NO_VALGRIND') != "y":
+    valgrind_logfile = "--log-file=" + test_dir
+    # %p allows to put the valgrind process PID, since
+    # we don't know it a priori (subprocess.Popen is
+    # not yet invoked)
+    valgrind_logfile += "/%p.valgrind"
+
+    qemu_valgrind = ['valgrind', valgrind_logfile, '--error-exitcode=99']
 
 luks_default_secret_object = 'secret,id=keysec0,data=' + \
                              os.environ.get('IMGKEYSECRET', '')
@@ -101,6 +114,24 @@
 sample_img_dir = os.environ['SAMPLE_IMG_DIR']
 
 
+@contextmanager
+def change_log_level(
+        logger_name: str, level: int = logging.CRITICAL) -> Iterator[None]:
+    """
+    Utility function for temporarily changing the log level of a logger.
+
+    This can be used to silence errors that are expected or uninteresting.
+    """
+    _logger = logging.getLogger(logger_name)
+    current_level = _logger.level
+    _logger.setLevel(level)
+
+    try:
+        yield
+    finally:
+        _logger.setLevel(current_level)
+
+
 def unarchive_sample_image(sample, fname):
     sample_fname = os.path.join(sample_img_dir, sample + '.bz2')
     with bz2.open(sample_fname) as f_in, open(fname, 'wb') as f_out:
@@ -113,15 +144,14 @@ def qemu_tool_pipe_and_status(tool: str, args: Sequence[str],
     Run a tool and return both its output and its exit code
     """
     stderr = subprocess.STDOUT if connect_stderr else None
-    subp = subprocess.Popen(args,
-                            stdout=subprocess.PIPE,
-                            stderr=stderr,
-                            universal_newlines=True)
-    output = subp.communicate()[0]
-    if subp.returncode < 0:
-        cmd = ' '.join(args)
-        sys.stderr.write(f'{tool} received signal {-subp.returncode}: {cmd}\n')
-    return (output, subp.returncode)
+    with subprocess.Popen(args, stdout=subprocess.PIPE,
+                          stderr=stderr, universal_newlines=True) as subp:
+        output = subp.communicate()[0]
+        if subp.returncode < 0:
+            cmd = ' '.join(args)
+            sys.stderr.write(f'{tool} received signal \
+                               {-subp.returncode}: {cmd}\n')
+        return (output, subp.returncode)
 
 def qemu_img_pipe_and_status(*args: str) -> Tuple[str, int]:
     """
@@ -221,22 +251,25 @@ def qemu_io_silent(*args):
         default_args = qemu_io_args
 
     args = default_args + list(args)
-    exitcode = subprocess.call(args, stdout=open('/dev/null', 'w'))
-    if exitcode < 0:
+    result = subprocess.run(args, stdout=subprocess.DEVNULL, check=False)
+    if result.returncode < 0:
         sys.stderr.write('qemu-io received signal %i: %s\n' %
-                         (-exitcode, ' '.join(args)))
-    return exitcode
+                         (-result.returncode, ' '.join(args)))
+    return result.returncode
 
 def qemu_io_silent_check(*args):
     '''Run qemu-io and return the true if subprocess returned 0'''
     args = qemu_io_args + list(args)
-    exitcode = subprocess.call(args, stdout=open('/dev/null', 'w'),
-                               stderr=subprocess.STDOUT)
-    return exitcode == 0
+    result = subprocess.run(args, stdout=subprocess.DEVNULL,
+                            stderr=subprocess.STDOUT, check=False)
+    return result.returncode == 0
 
 class QemuIoInteractive:
     def __init__(self, *args):
         self.args = qemu_io_args_no_fmt + list(args)
+        # We need to keep the Popen objext around, and not
+        # close it immediately. Therefore, disable the pylint check:
+        # pylint: disable=consider-using-with
         self._p = subprocess.Popen(self.args, stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT,
@@ -310,22 +343,22 @@ def qemu_nbd_popen(*args):
     cmd.extend(args)
 
     log('Start NBD server')
-    p = subprocess.Popen(cmd)
-    try:
-        while not os.path.exists(pid_file):
-            if p.poll() is not None:
-                raise RuntimeError(
-                    "qemu-nbd terminated with exit code {}: {}"
-                    .format(p.returncode, ' '.join(cmd)))
-
-            time.sleep(0.01)
-        yield
-    finally:
-        if os.path.exists(pid_file):
-            os.remove(pid_file)
-        log('Kill NBD server')
-        p.kill()
-        p.wait()
+    with subprocess.Popen(cmd) as p:
+        try:
+            while not os.path.exists(pid_file):
+                if p.poll() is not None:
+                    raise RuntimeError(
+                        "qemu-nbd terminated with exit code {}: {}"
+                        .format(p.returncode, ' '.join(cmd)))
+
+                time.sleep(0.01)
+            yield
+        finally:
+            if os.path.exists(pid_file):
+                os.remove(pid_file)
+            log('Kill NBD server')
+            p.kill()
+            p.wait()
 
 def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
     '''Return True if two image files are identical'''
@@ -334,13 +367,12 @@ def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
 
 def create_image(name, size):
     '''Create a fully-allocated raw image with sector markers'''
-    file = open(name, 'wb')
-    i = 0
-    while i < size:
-        sector = struct.pack('>l504xl', i // 512, i // 512)
-        file.write(sector)
-        i = i + 512
-    file.close()
+    with open(name, 'wb') as file:
+        i = 0
+        while i < size:
+            sector = struct.pack('>l504xl', i // 512, i // 512)
+            file.write(sector)
+            i = i + 512
 
 def image_size(img):
     '''Return image's virtual size'''
@@ -472,10 +504,14 @@ def __init__(self, seconds, errmsg="Timeout"):
         self.seconds = seconds
         self.errmsg = errmsg
     def __enter__(self):
+        if qemu_gdb or qemu_valgrind:
+            return self
         signal.signal(signal.SIGALRM, self.timeout)
         signal.setitimer(signal.ITIMER_REAL, self.seconds)
         return self
     def __exit__(self, exc_type, value, traceback):
+        if qemu_gdb or qemu_valgrind:
+            return False
         signal.setitimer(signal.ITIMER_REAL, 0)
         return False
     def timeout(self, signum, frame):
@@ -570,12 +606,34 @@ class VM(qtest.QEMUQtestMachine):
 
     def __init__(self, path_suffix=''):
         name = "qemu%s-%d" % (path_suffix, os.getpid())
-        super().__init__(qemu_prog, qemu_opts, name=name,
-                         test_dir=test_dir,
-                         socket_scm_helper=socket_scm_helper,
-                         sock_dir=sock_dir)
+        timer = 15.0 if not (qemu_gdb or qemu_valgrind) else None
+        if qemu_gdb and qemu_valgrind:
+            sys.stderr.write('gdb and valgrind are mutually exclusive\n')
+            sys.exit(1)
+        wrapper = qemu_gdb if qemu_gdb else qemu_valgrind
+        super().__init__(qemu_prog, qemu_opts, wrapper=wrapper,
+                         name=name,
+                         base_temp_dir=test_dir,
+                         sock_dir=sock_dir, qmp_timer=timer)
         self._num_drives = 0
 
+    def _post_shutdown(self) -> None:
+        super()._post_shutdown()
+        if not qemu_valgrind or not self._popen:
+            return
+        valgrind_filename = f"{test_dir}/{self._popen.pid}.valgrind"
+        if self.exitcode() == 99:
+            with open(valgrind_filename, encoding='utf-8') as f:
+                print(f.read())
+        else:
+            os.remove(valgrind_filename)
+
+    def _pre_launch(self) -> None:
+        super()._pre_launch()
+        if qemu_print:
+            # set QEMU binary output to stdout
+            self._close_qemu_log_file()
+
     def add_object(self, opts):
         self._args.append('-object')
         self._args.append(opts)
@@ -651,13 +709,14 @@ def resume_drive(self, drive: str) -> None:
         self.hmp(f'qemu-io {drive} "remove_break bp_{drive}"')
 
     def hmp_qemu_io(self, drive: str, cmd: str,
-                    use_log: bool = False) -> QMPMessage:
+                    use_log: bool = False, qdev: bool = False) -> QMPMessage:
         """Write to a given drive using an HMP command"""
-        return self.hmp(f'qemu-io {drive} "{cmd}"', use_log=use_log)
+        d = '-d ' if qdev else ''
+        return self.hmp(f'qemu-io {d}{drive} "{cmd}"', use_log=use_log)
 
     def flatten_qmp_object(self, obj, output=None, basestr=''):
         if output is None:
-            output = dict()
+            output = {}
         if isinstance(obj, list):
             for i, item in enumerate(obj):
                 self.flatten_qmp_object(item, output, basestr + str(i) + '.')
@@ -670,7 +729,7 @@ def flatten_qmp_object(self, obj, output=None, basestr=''):
 
     def qmp_to_opts(self, obj):
         obj = self.flatten_qmp_object(obj)
-        output_list = list()
+        output_list = []
         for key in obj:
             output_list += [key + '=' + obj[key]]
         return ','.join(output_list)
@@ -1075,7 +1134,9 @@ def notrun(reason):
     # Each test in qemu-iotests has a number ("seq")
     seq = os.path.basename(sys.argv[0])
 
-    open('%s/%s.notrun' % (output_dir, seq), 'w').write(reason + '\n')
+    with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \
+            as outfile:
+        outfile.write(reason + '\n')
     logger.warning("%s not run: %s", seq, reason)
     sys.exit(0)
 
@@ -1088,8 +1149,9 @@ def case_notrun(reason):
     # Each test in qemu-iotests has a number ("seq")
     seq = os.path.basename(sys.argv[0])
 
-    open('%s/%s.casenotrun' % (output_dir, seq), 'a').write(
-        '    [case not run] ' + reason + '\n')
+    with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \
+            as outfile:
+        outfile.write('    [case not run] ' + reason + '\n')
 
 def _verify_image_format(supported_fmts: Sequence[str] = (),
                          unsupported_fmts: Sequence[str] = ()) -> None:
@@ -1271,37 +1333,55 @@ def func_wrapper(*args, **kwargs):
             return func(*args, **kwargs)
     return func_wrapper
 
-def execute_unittest(debug=False):
+# We need to filter out the time taken from the output so that
+# qemu-iotest can reliably diff the results against master output,
+# and hide skipped tests from the reference output.
+
+class ReproducibleTestResult(unittest.TextTestResult):
+    def addSkip(self, test, reason):
+        # Same as TextTestResult, but print dot instead of "s"
+        unittest.TestResult.addSkip(self, test, reason)
+        if self.showAll:
+            self.stream.writeln("skipped {0!r}".format(reason))
+        elif self.dots:
+            self.stream.write(".")
+            self.stream.flush()
+
+class ReproducibleStreamWrapper:
+    def __init__(self, stream: TextIO):
+        self.stream = stream
+
+    def __getattr__(self, attr):
+        if attr in ('stream', '__getstate__'):
+            raise AttributeError(attr)
+        return getattr(self.stream, attr)
+
+    def write(self, arg=None):
+        arg = re.sub(r'Ran (\d+) tests? in [\d.]+s', r'Ran \1 tests', arg)
+        arg = re.sub(r' \(skipped=\d+\)', r'', arg)
+        self.stream.write(arg)
+
+class ReproducibleTestRunner(unittest.TextTestRunner):
+    def __init__(self, stream: Optional[TextIO] = None,
+                 resultclass: Type[unittest.TestResult] =
+                 ReproducibleTestResult,
+                 **kwargs: Any) -> None:
+        rstream = ReproducibleStreamWrapper(stream or sys.stdout)
+        super().__init__(stream=rstream,           # type: ignore
+                         descriptions=True,
+                         resultclass=resultclass,
+                         **kwargs)
+
+def execute_unittest(argv: List[str], debug: bool = False) -> None:
     """Executes unittests within the calling module."""
 
-    verbosity = 2 if debug else 1
-
-    if debug:
-        output = sys.stdout
-    else:
-        # We need to filter out the time taken from the output so that
-        # qemu-iotest can reliably diff the results against master output.
-        output = io.StringIO()
-
-    runner = unittest.TextTestRunner(stream=output, descriptions=True,
-                                     verbosity=verbosity)
-    try:
-        # unittest.main() will use sys.exit(); so expect a SystemExit
-        # exception
-        unittest.main(testRunner=runner)
-    finally:
-        # We need to filter out the time taken from the output so that
-        # qemu-iotest can reliably diff the results against master output.
-        if not debug:
-            out = output.getvalue()
-            out = re.sub(r'Ran (\d+) tests? in [\d.]+s', r'Ran \1 tests', out)
-
-            # Hide skipped tests from the reference output
-            out = re.sub(r'OK \(skipped=\d+\)', 'OK', out)
-            out_first_line, out_rest = out.split('\n', 1)
-            out = out_first_line.replace('s', '.') + '\n' + out_rest
-
-            sys.stderr.write(out)
+    # Some tests have warnings, especially ResourceWarnings for unclosed
+    # files and sockets.  Ignore them for now to ensure reproducibility of
+    # the test output.
+    unittest.main(argv=argv,
+                  testRunner=ReproducibleTestRunner,
+                  verbosity=2 if debug else 1,
+                  warnings=None if sys.warnoptions else 'ignore')
 
 def execute_setup_common(supported_fmts: Sequence[str] = (),
                          supported_platforms: Sequence[str] = (),
@@ -1338,7 +1418,7 @@ def execute_test(*args, test_function=None, **kwargs):
 
     debug = execute_setup_common(*args, **kwargs)
     if not test_function:
-        execute_unittest(debug)
+        execute_unittest(sys.argv, debug)
     else:
         test_function()
 
diff --git a/tests/qemu-iotests/linters.py b/tests/qemu-iotests/linters.py
new file mode 100644
index 00000000000..65c4c4e8272
--- /dev/null
+++ b/tests/qemu-iotests/linters.py
@@ -0,0 +1,105 @@
+# Copyright (C) 2020 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import re
+import subprocess
+import sys
+from typing import List, Mapping, Optional
+
+
+# TODO: Empty this list!
+SKIP_FILES = (
+    '030', '040', '041', '044', '045', '055', '056', '057', '065', '093',
+    '096', '118', '124', '132', '136', '139', '147', '148', '149',
+    '151', '152', '155', '163', '165', '194', '196', '202',
+    '203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
+    '218', '219', '224', '228', '234', '235', '236', '237', '238',
+    '240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
+    '262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
+    '299', '302', '303', '304', '307',
+    'nbd-fault-injector.py', 'qcow2.py', 'qcow2_format.py', 'qed.py'
+)
+
+
+def is_python_file(filename):
+    if not os.path.isfile(filename):
+        return False
+
+    if filename.endswith('.py'):
+        return True
+
+    with open(filename, encoding='utf-8') as f:
+        try:
+            first_line = f.readline()
+            return re.match('^#!.*python', first_line) is not None
+        except UnicodeDecodeError:  # Ignore binary files
+            return False
+
+
+def get_test_files() -> List[str]:
+    named_tests = [f'tests/{entry}' for entry in os.listdir('tests')]
+    check_tests = set(os.listdir('.') + named_tests) - set(SKIP_FILES)
+    return list(filter(is_python_file, check_tests))
+
+
+def run_linter(
+        tool: str,
+        args: List[str],
+        env: Optional[Mapping[str, str]] = None,
+        suppress_output: bool = False,
+) -> None:
+    """
+    Run a python-based linting tool.
+
+    :param suppress_output: If True, suppress all stdout/stderr output.
+    :raise CalledProcessError: If the linter process exits with failure.
+    """
+    subprocess.run(
+        ('python3', '-m', tool, *args),
+        env=env,
+        check=True,
+        stdout=subprocess.PIPE if suppress_output else None,
+        stderr=subprocess.STDOUT if suppress_output else None,
+        universal_newlines=True,
+    )
+
+
+def main() -> None:
+    """
+    Used by the Python CI system as an entry point to run these linters.
+    """
+    def show_usage() -> None:
+        print(f"Usage: {sys.argv[0]} < --mypy | --pylint >", file=sys.stderr)
+        sys.exit(1)
+
+    if len(sys.argv) != 2:
+        show_usage()
+
+    files = get_test_files()
+
+    if sys.argv[1] == '--pylint':
+        run_linter('pylint', files)
+    elif sys.argv[1] == '--mypy':
+        # mypy bug #9852; disable incremental checking as a workaround.
+        args = ['--no-incremental'] + files
+        run_linter('mypy', args)
+    else:
+        print(f"Unrecognized argument: '{sys.argv[1]}'", file=sys.stderr)
+        show_usage()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build
deleted file mode 100644
index 67aed1e4927..00000000000
--- a/tests/qemu-iotests/meson.build
+++ /dev/null
@@ -1,5 +0,0 @@
-if 'CONFIG_LINUX' in config_host
-    socket_scm_helper = executable('socket_scm_helper', 'socket_scm_helper.c')
-else
-    socket_scm_helper = []
-endif
diff --git a/tests/qemu-iotests/mypy.ini b/tests/qemu-iotests/mypy.ini
new file mode 100644
index 00000000000..4c0339f5589
--- /dev/null
+++ b/tests/qemu-iotests/mypy.ini
@@ -0,0 +1,12 @@
+[mypy]
+disallow_any_generics = True
+disallow_incomplete_defs = True
+disallow_subclassing_any = True
+disallow_untyped_decorators = True
+implicit_reexport = False
+namespace_packages = True
+no_implicit_optional = True
+scripts_are_modules = True
+warn_redundant_casts = True
+warn_unused_configs = True
+warn_unused_ignores = True
diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc
index 7a6c0a94740..32ab77b8bb9 100644
--- a/tests/qemu-iotests/pylintrc
+++ b/tests/qemu-iotests/pylintrc
@@ -19,10 +19,33 @@ disable=invalid-name,
         too-many-public-methods,
         # pylint warns about Optional[] etc. as unsubscriptable in 3.9
         unsubscriptable-object,
+        # pylint's static analysis causes false positivies for file_path();
+        # If we really care to make it statically knowable, we'll use mypy.
+        unbalanced-tuple-unpacking,
+        # Sometimes we need to disable a newly introduced pylint warning.
+        # Doing so should not produce a warning in older versions of pylint.
+        bad-option-value,
         # These are temporary, and should be removed:
         missing-docstring,
         too-many-return-statements,
-        too-many-statements
+        too-many-statements,
+        consider-using-f-string,
+
+
+[REPORTS]
+
+# Activate the evaluation score.
+score=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+# TODO notes are fine, but FIXMEs or XXXs should probably just be
+# fixed (in tests, at least).
+notes=FIXME,
+      XXX,
+
 
 [FORMAT]
 
diff --git a/tests/qemu-iotests/socket_scm_helper.c b/tests/qemu-iotests/socket_scm_helper.c
deleted file mode 100644
index eb76d31aa94..00000000000
--- a/tests/qemu-iotests/socket_scm_helper.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * SCM_RIGHTS with unix socket help program for test
- *
- * Copyright IBM, Inc. 2013
- *
- * Authors:
- *  Wenchao Xia    <xiawenc@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include <sys/socket.h>
-#include <sys/un.h>
-
-/* #define SOCKET_SCM_DEBUG */
-
-/*
- * @fd and @fd_to_send will not be checked for validation in this function,
- * a blank will be sent as iov data to notify qemu.
- */
-static int send_fd(int fd, int fd_to_send)
-{
-    struct msghdr msg;
-    struct iovec iov[1];
-    int ret;
-    char control[CMSG_SPACE(sizeof(int))];
-    struct cmsghdr *cmsg;
-
-    memset(&msg, 0, sizeof(msg));
-    memset(control, 0, sizeof(control));
-
-    /* Send a blank to notify qemu */
-    iov[0].iov_base = (void *)" ";
-    iov[0].iov_len = 1;
-
-    msg.msg_iov = iov;
-    msg.msg_iovlen = 1;
-
-    msg.msg_control = control;
-    msg.msg_controllen = sizeof(control);
-
-    cmsg = CMSG_FIRSTHDR(&msg);
-
-    cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-    cmsg->cmsg_level = SOL_SOCKET;
-    cmsg->cmsg_type = SCM_RIGHTS;
-    memcpy(CMSG_DATA(cmsg), &fd_to_send, sizeof(int));
-
-    do {
-        ret = sendmsg(fd, &msg, 0);
-    } while (ret < 0 && errno == EINTR);
-
-    if (ret < 0) {
-        fprintf(stderr, "Failed to send msg, reason: %s\n", strerror(errno));
-    }
-
-    return ret;
-}
-
-/* Convert string to fd number. */
-static int get_fd_num(const char *fd_str, bool silent)
-{
-    int sock;
-    char *err;
-
-    errno = 0;
-    sock = strtol(fd_str, &err, 10);
-    if (errno) {
-        if (!silent) {
-            fprintf(stderr, "Failed in strtol for socket fd, reason: %s\n",
-                    strerror(errno));
-        }
-        return -1;
-    }
-    if (!*fd_str || *err || sock < 0) {
-        if (!silent) {
-            fprintf(stderr, "bad numerical value for socket fd '%s'\n", fd_str);
-        }
-        return -1;
-    }
-
-    return sock;
-}
-
-/*
- * To make things simple, the caller needs to specify:
- * 1. socket fd.
- * 2. path of the file to be sent.
- */
-int main(int argc, char **argv, char **envp)
-{
-    int sock, fd, ret;
-
-#ifdef SOCKET_SCM_DEBUG
-    int i;
-    for (i = 0; i < argc; i++) {
-        fprintf(stderr, "Parameter %d: %s\n", i, argv[i]);
-    }
-#endif
-
-    if (argc != 3) {
-        fprintf(stderr,
-                "Usage: %s < socket-fd > < file-path >\n",
-                argv[0]);
-        return EXIT_FAILURE;
-    }
-
-
-    sock = get_fd_num(argv[1], false);
-    if (sock < 0) {
-        return EXIT_FAILURE;
-    }
-
-    fd = get_fd_num(argv[2], true);
-    if (fd < 0) {
-        /* Now only open a file in readonly mode for test purpose. If more
-           precise control is needed, use python script in file operation, which
-           is supposed to fork and exec this program. */
-        fd = open(argv[2], O_RDONLY);
-        if (fd < 0) {
-            fprintf(stderr, "Failed to open file '%s'\n", argv[2]);
-            return EXIT_FAILURE;
-        }
-    }
-
-    ret = send_fd(sock, fd);
-    if (ret < 0) {
-        close(fd);
-        return EXIT_FAILURE;
-    }
-
-    close(fd);
-    return EXIT_SUCCESS;
-}
diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 6d27712617a..c33454fa685 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -25,8 +25,9 @@
 import random
 import subprocess
 import glob
-from typing import Dict, Any, Optional, ContextManager
+from typing import List, Dict, Any, Optional, ContextManager
 
+DEF_GDB_OPTIONS = 'localhost:12345'
 
 def isxfile(path: str) -> bool:
     return os.path.isfile(path) and os.access(path, os.X_OK)
@@ -67,12 +68,28 @@ class TestEnv(ContextManager['TestEnv']):
     env_variables = ['PYTHONPATH', 'TEST_DIR', 'SOCK_DIR', 'SAMPLE_IMG_DIR',
                      'OUTPUT_DIR', 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG',
                      'QEMU_IO_PROG', 'QEMU_NBD_PROG', 'QSD_PROG',
-                     'SOCKET_SCM_HELPER', 'QEMU_OPTIONS', 'QEMU_IMG_OPTIONS',
+                     'QEMU_OPTIONS', 'QEMU_IMG_OPTIONS',
                      'QEMU_IO_OPTIONS', 'QEMU_IO_OPTIONS_NO_FMT',
                      'QEMU_NBD_OPTIONS', 'IMGOPTS', 'IMGFMT', 'IMGPROTO',
                      'AIOMODE', 'CACHEMODE', 'VALGRIND_QEMU',
                      'CACHEMODE_IS_DEFAULT', 'IMGFMT_GENERIC', 'IMGOPTSSYNTAX',
-                     'IMGKEYSECRET', 'QEMU_DEFAULT_MACHINE', 'MALLOC_PERTURB_']
+                     'IMGKEYSECRET', 'QEMU_DEFAULT_MACHINE', 'MALLOC_PERTURB_',
+                     'GDB_OPTIONS', 'PRINT_QEMU']
+
+    def prepare_subprocess(self, args: List[str]) -> Dict[str, str]:
+        if self.debug:
+            args.append('-d')
+
+        with open(args[0], encoding="utf-8") as f:
+            try:
+                if f.readline().rstrip() == '#!/usr/bin/env python3':
+                    args.insert(0, self.python)
+            except UnicodeDecodeError:  # binary test? for future.
+                pass
+
+        os_env = os.environ.copy()
+        os_env.update(self.get_env())
+        return os_env
 
     def get_env(self) -> Dict[str, str]:
         env = {}
@@ -91,12 +108,15 @@ def init_directories(self) -> None:
              SAMPLE_IMG_DIR
              OUTPUT_DIR
         """
-        self.pythonpath = os.getenv('PYTHONPATH')
-        if self.pythonpath:
-            self.pythonpath = self.source_iotests + os.pathsep + \
-                self.pythonpath
-        else:
-            self.pythonpath = self.source_iotests
+
+        # Path where qemu goodies live in this source tree.
+        qemu_srctree_path = Path(__file__, '../../../python').resolve()
+
+        self.pythonpath = os.pathsep.join(filter(None, (
+            self.source_iotests,
+            str(qemu_srctree_path),
+            os.getenv('PYTHONPATH'),
+        )))
 
         self.test_dir = os.getenv('TEST_DIR',
                                   os.path.join(os.getcwd(), 'scratch'))
@@ -105,7 +125,7 @@ def init_directories(self) -> None:
         try:
             self.sock_dir = os.environ['SOCK_DIR']
             self.tmp_sock_dir = False
-            Path(self.test_dir).mkdir(parents=True, exist_ok=True)
+            Path(self.sock_dir).mkdir(parents=True, exist_ok=True)
         except KeyError:
             self.sock_dir = tempfile.mkdtemp()
             self.tmp_sock_dir = True
@@ -120,7 +140,6 @@ def init_binaries(self) -> None:
         """Init binary path variables:
              PYTHON (for bash tests)
              QEMU_PROG, QEMU_IMG_PROG, QEMU_IO_PROG, QEMU_NBD_PROG, QSD_PROG
-             SOCKET_SCM_HELPER
         """
         self.python = sys.executable
 
@@ -154,16 +173,14 @@ def root(*names: str) -> str:
             if not isxfile(b):
                 sys.exit('Not executable: ' + b)
 
-        helper_path = os.path.join(self.build_iotests, 'socket_scm_helper')
-        if isxfile(helper_path):
-            self.socket_scm_helper = helper_path  # SOCKET_SCM_HELPER
-
     def __init__(self, imgfmt: str, imgproto: str, aiomode: str,
                  cachemode: Optional[str] = None,
                  imgopts: Optional[str] = None,
                  misalign: bool = False,
                  debug: bool = False,
-                 valgrind: bool = False) -> None:
+                 valgrind: bool = False,
+                 gdb: bool = False,
+                 qprint: bool = False) -> None:
         self.imgfmt = imgfmt
         self.imgproto = imgproto
         self.aiomode = aiomode
@@ -171,6 +188,18 @@ def __init__(self, imgfmt: str, imgproto: str, aiomode: str,
         self.misalign = misalign
         self.debug = debug
 
+        if qprint:
+            self.print_qemu = 'y'
+
+        if gdb:
+            self.gdb_options = os.getenv('GDB_OPTIONS', DEF_GDB_OPTIONS)
+            if not self.gdb_options:
+                # cover the case 'export GDB_OPTIONS='
+                self.gdb_options = DEF_GDB_OPTIONS
+        elif 'GDB_OPTIONS' in os.environ:
+            # to not propagate it in prepare_subprocess()
+            del os.environ['GDB_OPTIONS']
+
         if valgrind:
             self.valgrind_qemu = 'y'
 
@@ -269,7 +298,10 @@ def print_env(self) -> None:
 PLATFORM      -- {platform}
 TEST_DIR      -- {TEST_DIR}
 SOCK_DIR      -- {SOCK_DIR}
-SOCKET_SCM_HELPER -- {SOCKET_SCM_HELPER}"""
+GDB_OPTIONS   -- {GDB_OPTIONS}
+VALGRIND_QEMU -- {VALGRIND_QEMU}
+PRINT_QEMU_OUTPUT -- {PRINT_QEMU}
+"""
 
         args = collections.defaultdict(str, self.get_env())
 
diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py
index 1fc61fcaa34..0e29c2fdddd 100644
--- a/tests/qemu-iotests/testrunner.py
+++ b/tests/qemu-iotests/testrunner.py
@@ -129,7 +129,6 @@ class TestRunner(ContextManager['TestRunner']):
     def __init__(self, env: TestEnv, makecheck: bool = False,
                  color: str = 'auto') -> None:
         self.env = env
-        self.test_run_env = self.env.get_env()
         self.makecheck = makecheck
         self.last_elapsed = LastElapsedTime('.last-elapsed-cache', env)
 
@@ -243,32 +242,21 @@ def do_run_test(self, test: str) -> TestResult:
             silent_unlink(p)
 
         args = [str(f_test.resolve())]
-        if self.env.debug:
-            args.append('-d')
-
-        with f_test.open(encoding="utf-8") as f:
-            try:
-                if f.readline().rstrip() == '#!/usr/bin/env python3':
-                    args.insert(0, self.env.python)
-            except UnicodeDecodeError:  # binary test? for future.
-                pass
-
-        env = os.environ.copy()
-        env.update(self.test_run_env)
+        env = self.env.prepare_subprocess(args)
 
         t0 = time.time()
         with f_bad.open('w', encoding="utf-8") as f:
-            proc = subprocess.Popen(args, cwd=str(f_test.parent), env=env,
-                                    stdout=f, stderr=subprocess.STDOUT)
-            try:
-                proc.wait()
-            except KeyboardInterrupt:
-                proc.terminate()
-                proc.wait()
-                return TestResult(status='not run',
-                                  description='Interrupted by user',
-                                  interrupted=True)
-            ret = proc.returncode
+            with subprocess.Popen(args, cwd=str(f_test.parent), env=env,
+                                  stdout=f, stderr=subprocess.STDOUT) as proc:
+                try:
+                    proc.wait()
+                except KeyboardInterrupt:
+                    proc.terminate()
+                    proc.wait()
+                    return TestResult(status='not run',
+                                      description='Interrupted by user',
+                                      interrupted=True)
+                ret = proc.returncode
 
         elapsed = round(time.time() - t0, 1)
 
@@ -278,12 +266,13 @@ def do_run_test(self, test: str) -> TestResult:
                               diff=file_diff(str(f_reference), str(f_bad)))
 
         if f_notrun.exists():
-            return TestResult(status='not run',
-                              description=f_notrun.read_text().strip())
+            return TestResult(
+                status='not run',
+                description=f_notrun.read_text(encoding='utf-8').strip())
 
         casenotrun = ''
         if f_casenotrun.exists():
-            casenotrun = f_casenotrun.read_text()
+            casenotrun = f_casenotrun.read_text(encoding='utf-8')
 
         diff = file_diff(str(f_reference), str(f_bad))
         if diff:
@@ -328,7 +317,6 @@ def run_tests(self, tests: List[str]) -> bool:
 
         if not self.makecheck:
             self.env.print_env()
-            print()
 
         test_field_width = max(len(os.path.basename(t)) for t in tests) + 2
 
@@ -353,6 +341,7 @@ def run_tests(self, tests: List[str]) -> bool:
             elif res.status == 'not run':
                 notrun.append(name)
 
+            sys.stdout.flush()
             if res.interrupted:
                 break
 
diff --git a/tests/qemu-iotests/tests/fuse-allow-other b/tests/qemu-iotests/tests/fuse-allow-other
new file mode 100755
index 00000000000..19f494aefb1
--- /dev/null
+++ b/tests/qemu-iotests/tests/fuse-allow-other
@@ -0,0 +1,168 @@
+#!/usr/bin/env bash
+# group: rw
+#
+# Test FUSE exports' allow-other option
+#
+# Copyright (C) 2021 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq=$(basename "$0")
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_qemu
+    _cleanup_test_img
+    rm -f "$EXT_MP"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ../common.rc
+. ../common.filter
+. ../common.qemu
+
+_supported_fmt generic
+
+_supported_proto file # We create the FUSE export manually
+
+sudo -n -u nobody true || \
+    _notrun 'Password-less sudo as nobody required to test allow_other'
+
+# $1: Export ID
+# $2: Options (beyond the node-name and ID)
+# $3: Expected return value (defaults to 'return')
+# $4: Node to export (defaults to 'node-format')
+fuse_export_add()
+{
+    allow_other_not_supported='option allow_other only allowed if'
+
+    output=$(
+        success_or_failure=yes _send_qemu_cmd $QEMU_HANDLE \
+            "{'execute': 'block-export-add',
+              'arguments': {
+                  'type': 'fuse',
+                  'id': '$1',
+                  'node-name': '${4:-node-format}',
+                  $2
+              } }" \
+            "${3:-return}" \
+            "$allow_other_not_supported" \
+            | _filter_imgfmt
+    )
+
+    if echo "$output" | grep -q "$allow_other_not_supported"; then
+        # Shut down qemu gracefully so it can unmount the export
+        _send_qemu_cmd $QEMU_HANDLE \
+            "{'execute': 'quit'}" \
+            'return'
+
+        wait=yes _cleanup_qemu
+
+        _notrun "allow_other not supported"
+    fi
+
+    echo "$output"
+}
+
+EXT_MP="$TEST_DIR/fuse-export"
+
+_make_test_img 64k
+touch "$EXT_MP"
+
+echo
+echo '=== Test permissions ==='
+
+# $1: allow-other value ('on'/'off'/'auto')
+run_permission_test()
+{
+    _launch_qemu \
+        -blockdev \
+        "$IMGFMT,node-name=node-format,file.driver=file,file.filename=$TEST_IMG"
+
+    _send_qemu_cmd $QEMU_HANDLE \
+        "{'execute': 'qmp_capabilities'}" \
+        'return'
+
+    fuse_export_add 'export' \
+        "'mountpoint': '$EXT_MP',
+         'allow-other': '$1'"
+
+    # Should always work
+    echo '(Removing all permissions)'
+    chmod 000 "$EXT_MP" 2>&1 | _filter_testdir | _filter_imgfmt
+    stat -c 'Permissions post-chmod: %a' "$EXT_MP"
+
+    # Should always work
+    echo '(Granting u+r)'
+    chmod u+r "$EXT_MP" 2>&1 | _filter_testdir | _filter_imgfmt
+    stat -c 'Permissions post-chmod: %a' "$EXT_MP"
+
+    # Should only work with allow-other: Otherwise, no permissions can be
+    # granted to the group or others
+    echo '(Granting read permissions for everyone)'
+    chmod 444 "$EXT_MP" 2>&1 | _filter_testdir | _filter_imgfmt
+    stat -c 'Permissions post-chmod: %a' "$EXT_MP"
+
+    echo 'Doing operations as nobody:'
+    # Change to TEST_DIR, so nobody will not have to attempt a lookup
+    pushd "$TEST_DIR" >/dev/null
+
+    # This is already prevented by the permissions (without allow-other, FUSE
+    # exports always have o-r), but test it anyway
+    sudo -n -u nobody cat fuse-export >/dev/null
+
+    # If the only problem were the lack of permissions, we should still be able
+    # to stat the export as nobody; it should not work without allow-other,
+    # though
+    sudo -n -u nobody \
+        stat -c 'Permissions seen by nobody: %a' fuse-export 2>&1 \
+        | _filter_imgfmt
+
+    # To prove the point, revoke read permissions for others and try again
+    chmod o-r fuse-export 2>&1 | _filter_testdir | _filter_imgfmt
+
+    # Should fail
+    sudo -n -u nobody cat fuse-export >/dev/null
+    # Should work with allow_other
+    sudo -n -u nobody \
+        stat -c 'Permissions seen by nobody: %a' fuse-export 2>&1 \
+        | _filter_imgfmt
+
+    popd >/dev/null
+
+    _send_qemu_cmd $QEMU_HANDLE \
+        "{'execute': 'quit'}" \
+        'return'
+
+    wait=yes _cleanup_qemu
+}
+
+# 'auto' should behave exactly like 'on', because 'on' tests that
+# allow_other works (otherwise, this test is skipped)
+for ao in off on auto; do
+    echo
+    echo "--- allow-other=$ao ---"
+
+    run_permission_test "$ao"
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/fuse-allow-other.out b/tests/qemu-iotests/tests/fuse-allow-other.out
new file mode 100644
index 00000000000..543fa52a063
--- /dev/null
+++ b/tests/qemu-iotests/tests/fuse-allow-other.out
@@ -0,0 +1,88 @@
+QA output created by fuse-allow-other
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
+
+=== Test permissions ===
+
+--- allow-other=off ---
+{'execute': 'qmp_capabilities'}
+{"return": {}}
+{'execute': 'block-export-add',
+              'arguments': {
+                  'type': 'fuse',
+                  'id': 'export',
+                  'node-name': 'node-format',
+                  'mountpoint': 'TEST_DIR/fuse-export',
+         'allow-other': 'off'
+              } }
+{"return": {}}
+(Removing all permissions)
+Permissions post-chmod: 0
+(Granting u+r)
+Permissions post-chmod: 400
+(Granting read permissions for everyone)
+chmod: changing permissions of 'TEST_DIR/fuse-export': Operation not permitted
+Permissions post-chmod: 400
+Doing operations as nobody:
+cat: fuse-export: Permission denied
+stat: cannot statx 'fuse-export': Permission denied
+cat: fuse-export: Permission denied
+stat: cannot statx 'fuse-export': Permission denied
+{'execute': 'quit'}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}}
+
+--- allow-other=on ---
+{'execute': 'qmp_capabilities'}
+{"return": {}}
+{'execute': 'block-export-add',
+              'arguments': {
+                  'type': 'fuse',
+                  'id': 'export',
+                  'node-name': 'node-format',
+                  'mountpoint': 'TEST_DIR/fuse-export',
+         'allow-other': 'on'
+              } }
+{"return": {}}
+(Removing all permissions)
+Permissions post-chmod: 0
+(Granting u+r)
+Permissions post-chmod: 400
+(Granting read permissions for everyone)
+Permissions post-chmod: 444
+Doing operations as nobody:
+Permissions seen by nobody: 444
+cat: fuse-export: Permission denied
+Permissions seen by nobody: 440
+{'execute': 'quit'}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}}
+
+--- allow-other=auto ---
+{'execute': 'qmp_capabilities'}
+{"return": {}}
+{'execute': 'block-export-add',
+              'arguments': {
+                  'type': 'fuse',
+                  'id': 'export',
+                  'node-name': 'node-format',
+                  'mountpoint': 'TEST_DIR/fuse-export',
+         'allow-other': 'auto'
+              } }
+{"return": {}}
+(Removing all permissions)
+Permissions post-chmod: 0
+(Granting u+r)
+Permissions post-chmod: 400
+(Granting read permissions for everyone)
+Permissions post-chmod: 444
+Doing operations as nobody:
+Permissions seen by nobody: 444
+cat: fuse-export: Permission denied
+Permissions seen by nobody: 440
+{'execute': 'quit'}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}}
+*** done
diff --git a/tests/qemu-iotests/tests/image-fleecing b/tests/qemu-iotests/tests/image-fleecing
new file mode 100755
index 00000000000..a58b5a17816
--- /dev/null
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+# group: rw quick
+#
+# This test covers the basic fleecing workflow, which provides a
+# point-in-time snapshot of a node that can be queried over NBD.
+#
+# Copyright (C) 2018 Red Hat, Inc.
+# John helped, too.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: John Snow <jsnow@redhat.com>
+
+import iotests
+from iotests import log, qemu_img, qemu_io, qemu_io_silent
+
+iotests.script_initialize(
+    supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'],
+    supported_platforms=['linux'],
+    required_fmts=['copy-before-write'],
+)
+
+patterns = [('0x5d', '0',         '64k'),
+            ('0xd5', '1M',        '64k'),
+            ('0xdc', '32M',       '64k'),
+            ('0xcd', '0x3ff0000', '64k')]  # 64M - 64K
+
+overwrite = [('0xab', '0',         '64k'), # Full overwrite
+             ('0xad', '0x00f8000', '64k'), # Partial-left (1M-32K)
+             ('0x1d', '0x2008000', '64k'), # Partial-right (32M+32K)
+             ('0xea', '0x3fe0000', '64k')] # Adjacent-left (64M - 128K)
+
+zeroes = [('0', '0x00f8000', '32k'), # Left-end of partial-left (1M-32K)
+          ('0', '0x2010000', '32k'), # Right-end of partial-right (32M+64K)
+          ('0', '0x3fe0000', '64k')] # overwrite[3]
+
+remainder = [('0xd5', '0x108000',  '32k'), # Right-end of partial-left [1]
+             ('0xdc', '32M',       '32k'), # Left-end of partial-right [2]
+             ('0xcd', '0x3ff0000', '64k')] # patterns[3]
+
+def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
+    log('--- Setting up images ---')
+    log('')
+
+    assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+    assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0
+
+    for p in patterns:
+        qemu_io('-f', iotests.imgfmt,
+                '-c', 'write -P%s %s %s' % p, base_img_path)
+
+    log('Done')
+
+    log('')
+    log('--- Launching VM ---')
+    log('')
+
+    src_node = 'source'
+    tmp_node = 'temp'
+    qom_path = '/machine/peripheral/sda'
+    vm.add_blockdev(f'driver={iotests.imgfmt},file.driver=file,'
+                    f'file.filename={base_img_path},node-name={src_node}')
+    vm.add_device('virtio-scsi')
+    vm.add_device(f'scsi-hd,id=sda,drive={src_node}')
+    vm.launch()
+    log('Done')
+
+    log('')
+    log('--- Setting up Fleecing Graph ---')
+    log('')
+
+
+    # create tmp_node backed by src_node
+    log(vm.qmp('blockdev-add', {
+        'driver': 'qcow2',
+        'node-name': tmp_node,
+        'file': {
+            'driver': 'file',
+            'filename': fleece_img_path,
+        },
+        'backing': src_node,
+    }))
+
+    # Establish CBW from source to fleecing node
+    if use_cbw:
+        log(vm.qmp('blockdev-add', {
+            'driver': 'copy-before-write',
+            'node-name': 'fl-cbw',
+            'file': src_node,
+            'target': tmp_node
+        }))
+
+        log(vm.qmp('qom-set', path=qom_path, property='drive', value='fl-cbw'))
+    else:
+        log(vm.qmp('blockdev-backup',
+                   job_id='fleecing',
+                   device=src_node,
+                   target=tmp_node,
+                   sync='none'))
+
+    log('')
+    log('--- Setting up NBD Export ---')
+    log('')
+
+    nbd_uri = 'nbd+unix:///%s?socket=%s' % (tmp_node, nbd_sock_path)
+    log(vm.qmp('nbd-server-start',
+               {'addr': {'type': 'unix',
+                         'data': {'path': nbd_sock_path}}}))
+
+    log(vm.qmp('nbd-server-add', device=tmp_node))
+
+    log('')
+    log('--- Sanity Check ---')
+    log('')
+
+    for p in patterns + zeroes:
+        cmd = 'read -P%s %s %s' % p
+        log(cmd)
+        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+
+    log('')
+    log('--- Testing COW ---')
+    log('')
+
+    for p in overwrite:
+        cmd = 'write -P%s %s %s' % p
+        log(cmd)
+        log(vm.hmp_qemu_io(qom_path, cmd, qdev=True))
+
+    log('')
+    log('--- Verifying Data ---')
+    log('')
+
+    for p in patterns + zeroes:
+        cmd = 'read -P%s %s %s' % p
+        log(cmd)
+        assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
+
+    log('')
+    log('--- Cleanup ---')
+    log('')
+
+    if use_cbw:
+        log(vm.qmp('qom-set', path=qom_path, property='drive', value=src_node))
+        log(vm.qmp('blockdev-del', node_name='fl-cbw'))
+    else:
+        log(vm.qmp('block-job-cancel', device='fleecing'))
+        e = vm.event_wait('BLOCK_JOB_CANCELLED')
+        assert e is not None
+        log(e, filters=[iotests.filter_qmp_event])
+
+    log(vm.qmp('nbd-server-stop'))
+    log(vm.qmp('blockdev-del', node_name=tmp_node))
+    vm.shutdown()
+
+    log('')
+    log('--- Confirming writes ---')
+    log('')
+
+    for p in overwrite + remainder:
+        cmd = 'read -P%s %s %s' % p
+        log(cmd)
+        assert qemu_io_silent(base_img_path, '-c', cmd) == 0
+
+    log('')
+    log('Done')
+
+
+def test(use_cbw):
+    with iotests.FilePath('base.img') as base_img_path, \
+         iotests.FilePath('fleece.img') as fleece_img_path, \
+         iotests.FilePath('nbd.sock',
+                          base_dir=iotests.sock_dir) as nbd_sock_path, \
+         iotests.VM() as vm:
+        do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm)
+
+
+log('=== Test backup(sync=none) based fleecing ===\n')
+test(False)
+
+log('=== Test filter based fleecing ===\n')
+test(True)
diff --git a/tests/qemu-iotests/tests/image-fleecing.out b/tests/qemu-iotests/tests/image-fleecing.out
new file mode 100644
index 00000000000..e96d122a8b9
--- /dev/null
+++ b/tests/qemu-iotests/tests/image-fleecing.out
@@ -0,0 +1,139 @@
+=== Test backup(sync=none) based fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+
+--- Setting up NBD Export ---
+
+{"return": {}}
+{"return": {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{"return": ""}
+write -P0xad 0x00f8000 64k
+{"return": ""}
+write -P0x1d 0x2008000 64k
+{"return": ""}
+write -P0xea 0x3fe0000 64k
+{"return": ""}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Cleanup ---
+
+{"return": {}}
+{"data": {"device": "fleecing", "len": 67108864, "offset": 393216, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
+{"return": {}}
+{"return": {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
+=== Test filter based fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Setting up NBD Export ---
+
+{"return": {}}
+{"return": {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Testing COW ---
+
+write -P0xab 0 64k
+{"return": ""}
+write -P0xad 0x00f8000 64k
+{"return": ""}
+write -P0x1d 0x2008000 64k
+{"return": ""}
+write -P0xea 0x3fe0000 64k
+{"return": ""}
+
+--- Verifying Data ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff0000 64k
+read -P0 0x00f8000 32k
+read -P0 0x2010000 32k
+read -P0 0x3fe0000 64k
+
+--- Cleanup ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Confirming writes ---
+
+read -P0xab 0 64k
+read -P0xad 0x00f8000 64k
+read -P0x1d 0x2008000 64k
+read -P0xea 0x3fe0000 64k
+read -P0xd5 0x108000 32k
+read -P0xdc 32M 32k
+read -P0xcd 0x3ff0000 64k
+
+Done
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
index 584062b4128..00ebb5c2516 100755
--- a/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-postcopy-test
@@ -132,10 +132,10 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
 
         result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
                                node='drive0', name='bitmap0')
-        self.discards1_sha256 = result['return']['sha256']
+        discards1_sha256 = result['return']['sha256']
 
         # Check, that updating the bitmap by discards works
-        assert self.discards1_sha256 != empty_sha256
+        assert discards1_sha256 != empty_sha256
 
         # We want to calculate resulting sha256. Do it in bitmap0, so, disable
         # other bitmaps
@@ -148,7 +148,7 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
 
         result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
                                node='drive0', name='bitmap0')
-        self.all_discards_sha256 = result['return']['sha256']
+        all_discards_sha256 = result['return']['sha256']
 
         # Now, enable some bitmaps, to be updated during migration
         for i in range(2, nb_bitmaps, 2):
@@ -173,10 +173,11 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
 
         event_resume = self.vm_b.event_wait('RESUME')
         self.vm_b_events.append(event_resume)
-        return event_resume
+        return (event_resume, discards1_sha256, all_discards_sha256)
 
     def test_postcopy_success(self):
-        event_resume = self.start_postcopy()
+        event_resume, discards1_sha256, all_discards_sha256 = \
+                self.start_postcopy()
 
         # enabled bitmaps should be updated
         apply_discards(self.vm_b, discards2)
@@ -217,7 +218,7 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase):
         for i in range(0, nb_bitmaps, 5):
             result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
                                    node='drive0', name='bitmap{}'.format(i))
-            sha = self.discards1_sha256 if i % 2 else self.all_discards_sha256
+            sha = discards1_sha256 if i % 2 else all_discards_sha256
             self.assert_qmp(result, 'return/sha256', sha)
 
     def test_early_shutdown_destination(self):
diff --git a/tests/qemu-iotests/tests/migrate-bitmaps-test b/tests/qemu-iotests/tests/migrate-bitmaps-test
index a5c7bc83e0e..c23df3d75c7 100755
--- a/tests/qemu-iotests/tests/migrate-bitmaps-test
+++ b/tests/qemu-iotests/tests/migrate-bitmaps-test
@@ -19,12 +19,12 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-import os
-import iotests
-import time
 import itertools
 import operator
+import os
 import re
+
+import iotests
 from iotests import qemu_img, qemu_img_create, Timeout
 
 
@@ -37,6 +37,12 @@ mig_cmd = 'exec: cat > ' + mig_file
 incoming_cmd = 'exec: cat ' + mig_file
 
 
+def get_bitmap_hash(vm):
+    result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
+                    node='drive0', name='bitmap0')
+    return result['return']['sha256']
+
+
 class TestDirtyBitmapMigration(iotests.QMPTestCase):
     def tearDown(self):
         self.vm_a.shutdown()
@@ -62,21 +68,16 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
             params['persistent'] = True
 
         result = vm.qmp('block-dirty-bitmap-add', **params)
-        self.assert_qmp(result, 'return', {});
-
-    def get_bitmap_hash(self, vm):
-        result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
-                        node='drive0', name='bitmap0')
-        return result['return']['sha256']
+        self.assert_qmp(result, 'return', {})
 
     def check_bitmap(self, vm, sha256):
         result = vm.qmp('x-debug-block-dirty-bitmap-sha256',
                         node='drive0', name='bitmap0')
         if sha256:
-            self.assert_qmp(result, 'return/sha256', sha256);
+            self.assert_qmp(result, 'return/sha256', sha256)
         else:
             self.assert_qmp(result, 'error/desc',
-                            "Dirty bitmap 'bitmap0' not found");
+                            "Dirty bitmap 'bitmap0' not found")
 
     def do_test_migration_resume_source(self, persistent, migrate_bitmaps):
         granularity = 512
@@ -97,7 +98,7 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
         self.add_bitmap(self.vm_a, granularity, persistent)
         for r in regions:
             self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
-        sha256 = self.get_bitmap_hash(self.vm_a)
+        sha256 = get_bitmap_hash(self.vm_a)
 
         result = self.vm_a.qmp('migrate', uri=mig_cmd)
         while True:
@@ -106,7 +107,7 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
                 break
         while True:
             result = self.vm_a.qmp('query-status')
-            if (result['return']['status'] == 'postmigrate'):
+            if result['return']['status'] == 'postmigrate':
                 break
 
         # test that bitmap is still here
@@ -164,7 +165,7 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
         self.add_bitmap(self.vm_a, granularity, persistent)
         for r in regions:
             self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % r)
-        sha256 = self.get_bitmap_hash(self.vm_a)
+        sha256 = get_bitmap_hash(self.vm_a)
 
         if pre_shutdown:
             self.vm_a.shutdown()
@@ -214,27 +215,14 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
             self.check_bitmap(self.vm_b, sha256 if persistent else False)
 
 
-def inject_test_case(klass, name, method, *args, **kwargs):
+def inject_test_case(klass, suffix, method, *args, **kwargs):
     mc = operator.methodcaller(method, *args, **kwargs)
-    setattr(klass, 'test_' + method + name, lambda self: mc(self))
-
-for cmb in list(itertools.product((True, False), repeat=5)):
-    name = ('_' if cmb[0] else '_not_') + 'persistent_'
-    name += ('_' if cmb[1] else '_not_') + 'migbitmap_'
-    name += '_online' if cmb[2] else '_offline'
-    name += '_shared' if cmb[3] else '_nonshared'
-    if (cmb[4]):
-        name += '__pre_shutdown'
-
-    inject_test_case(TestDirtyBitmapMigration, name, 'do_test_migration',
-                     *list(cmb))
-
-for cmb in list(itertools.product((True, False), repeat=2)):
-    name = ('_' if cmb[0] else '_not_') + 'persistent_'
-    name += ('_' if cmb[1] else '_not_') + 'migbitmap'
-
-    inject_test_case(TestDirtyBitmapMigration, name,
-                     'do_test_migration_resume_source', *list(cmb))
+    # We want to add a function attribute to `klass`, so that it is
+    # correctly converted to a method on instantiation.  The
+    # methodcaller object `mc` is a callable, not a function, so we
+    # need the lambda to turn it into a function.
+    # pylint: disable=unnecessary-lambda
+    setattr(klass, 'test_' + method + suffix, lambda self: mc(self))
 
 
 class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
@@ -270,7 +258,8 @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
         self.assert_qmp(result, 'return', {})
 
         # Check that the bitmaps are there
-        for node in self.vm.qmp('query-named-block-nodes', flat=True)['return']:
+        nodes = self.vm.qmp('query-named-block-nodes', flat=True)['return']
+        for node in nodes:
             if 'node0' in node['node-name']:
                 self.assert_qmp(node, 'dirty-bitmaps[0]/name', 'bmap0')
 
@@ -287,7 +276,7 @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
         """
         Continue the source after migration.
         """
-        result = self.vm.qmp('migrate', uri=f'exec: cat > /dev/null')
+        result = self.vm.qmp('migrate', uri='exec: cat > /dev/null')
         self.assert_qmp(result, 'return', {})
 
         with Timeout(10, 'Migration timeout'):
@@ -297,6 +286,30 @@ class TestDirtyBitmapBackingMigration(iotests.QMPTestCase):
         self.assert_qmp(result, 'return', {})
 
 
+def main() -> None:
+    for cmb in list(itertools.product((True, False), repeat=5)):
+        name = ('_' if cmb[0] else '_not_') + 'persistent_'
+        name += ('_' if cmb[1] else '_not_') + 'migbitmap_'
+        name += '_online' if cmb[2] else '_offline'
+        name += '_shared' if cmb[3] else '_nonshared'
+        if cmb[4]:
+            name += '__pre_shutdown'
+
+        inject_test_case(TestDirtyBitmapMigration, name, 'do_test_migration',
+                         *list(cmb))
+
+    for cmb in list(itertools.product((True, False), repeat=2)):
+        name = ('_' if cmb[0] else '_not_') + 'persistent_'
+        name += ('_' if cmb[1] else '_not_') + 'migbitmap'
+
+        inject_test_case(TestDirtyBitmapMigration, name,
+                         'do_test_migration_resume_source', *list(cmb))
+
+    iotests.main(
+        supported_fmts=['qcow2'],
+        supported_protocols=['file']
+    )
+
+
 if __name__ == '__main__':
-    iotests.main(supported_fmts=['qcow2'],
-                 supported_protocols=['file'])
+    main()
diff --git a/tests/qemu-iotests/tests/migrate-during-backup b/tests/qemu-iotests/tests/migrate-during-backup
new file mode 100755
index 00000000000..34103229eef
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-during-backup
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# group: migration
+#
+# Copyright (c) 2021 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img_create, qemu_io
+
+
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
+size = '1M'
+mig_file = os.path.join(iotests.test_dir, 'mig_file')
+mig_cmd = 'exec: cat > ' + mig_file
+
+
+class TestMigrateDuringBackup(iotests.QMPTestCase):
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(disk_a)
+        os.remove(disk_b)
+        os.remove(mig_file)
+
+    def setUp(self):
+        qemu_img_create('-f', iotests.imgfmt, disk_a, size)
+        qemu_img_create('-f', iotests.imgfmt, disk_b, size)
+        qemu_io('-c', f'write 0 {size}', disk_a)
+
+        self.vm = iotests.VM().add_drive(disk_a)
+        self.vm.launch()
+        result = self.vm.qmp('blockdev-add', {
+            'node-name': 'target',
+            'driver': iotests.imgfmt,
+            'file': {
+                'driver': 'file',
+                'filename': disk_b
+            }
+        })
+        self.assert_qmp(result, 'return', {})
+
+    def test_migrate(self):
+        result = self.vm.qmp('blockdev-backup', device='drive0',
+                             target='target', sync='full',
+                             speed=1, x_perf={
+                                 'max-workers': 1,
+                                 'max-chunk': 64 * 1024
+                             })
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('job-pause', id='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('migrate-set-capabilities',
+                             capabilities=[{'capability': 'events',
+                                            'state': True}])
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('migrate', uri=mig_cmd)
+        self.assert_qmp(result, 'return', {})
+
+        e = self.vm.events_wait((('MIGRATION',
+                                  {'data': {'status': 'completed'}}),
+                                 ('MIGRATION',
+                                  {'data': {'status': 'failed'}})))
+
+        # Don't assert that e is 'failed' now: this way we'll miss
+        # possible crash when backup continues :)
+
+        result = self.vm.qmp('block-job-set-speed', device='drive0',
+                             speed=0)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('job-resume', id='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        # For future: if something changes so that both migration
+        # and backup pass, let's not miss that moment, as it may
+        # be a bug as well as improvement.
+        self.assert_qmp(e, 'data/status', 'failed')
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'],
+                 supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/migrate-during-backup.out b/tests/qemu-iotests/tests/migrate-during-backup.out
new file mode 100644
index 00000000000..ae1213e6f86
--- /dev/null
+++ b/tests/qemu-iotests/tests/migrate-during-backup.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/tests/mirror-ready-cancel-error b/tests/qemu-iotests/tests/mirror-ready-cancel-error
new file mode 100755
index 00000000000..f2dc88881f9
--- /dev/null
+++ b/tests/qemu-iotests/tests/mirror-ready-cancel-error
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# group: rw quick
+#
+# Test what happens when errors occur to a mirror job after it has
+# been cancelled in the READY phase
+#
+# Copyright (C) 2021 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+
+
+image_size = 1 * 1024 * 1024
+source = os.path.join(iotests.test_dir, 'source.img')
+target = os.path.join(iotests.test_dir, 'target.img')
+
+
+class TestMirrorReadyCancelError(iotests.QMPTestCase):
+    def setUp(self) -> None:
+        assert iotests.qemu_img_create('-f', iotests.imgfmt, source,
+                                       str(image_size)) == 0
+        assert iotests.qemu_img_create('-f', iotests.imgfmt, target,
+                                       str(image_size)) == 0
+
+        self.vm = iotests.VM()
+        self.vm.launch()
+
+    def tearDown(self) -> None:
+        self.vm.shutdown()
+        os.remove(source)
+        os.remove(target)
+
+    def add_blockdevs(self, once: bool) -> None:
+        res = self.vm.qmp('blockdev-add',
+                          **{'node-name': 'source',
+                             'driver': iotests.imgfmt,
+                             'file': {
+                                 'driver': 'file',
+                                 'filename': source
+                             }})
+        self.assert_qmp(res, 'return', {})
+
+        # blkdebug notes:
+        # Enter state 2 on the first flush, which happens before the
+        # job enters the READY state.  The second flush will happen
+        # when the job is about to complete, and we want that one to
+        # fail.
+        res = self.vm.qmp('blockdev-add',
+                          **{'node-name': 'target',
+                             'driver': iotests.imgfmt,
+                             'file': {
+                                 'driver': 'blkdebug',
+                                 'image': {
+                                     'driver': 'file',
+                                     'filename': target
+                                 },
+                                 'set-state': [{
+                                     'event': 'flush_to_disk',
+                                     'state': 1,
+                                     'new_state': 2
+                                 }],
+                                 'inject-error': [{
+                                     'event': 'flush_to_disk',
+                                     'once': once,
+                                     'immediately': True,
+                                     'state': 2
+                                 }]}})
+        self.assert_qmp(res, 'return', {})
+
+    def start_mirror(self) -> None:
+        res = self.vm.qmp('blockdev-mirror',
+                          job_id='mirror',
+                          device='source',
+                          target='target',
+                          filter_node_name='mirror-top',
+                          sync='full',
+                          on_target_error='stop')
+        self.assert_qmp(res, 'return', {})
+
+    def cancel_mirror_with_error(self) -> None:
+        self.vm.event_wait('BLOCK_JOB_READY')
+
+        # Write something so will not leave the job immediately, but
+        # flush first (which will fail, thanks to blkdebug)
+        res = self.vm.qmp('human-monitor-command',
+                          command_line='qemu-io mirror-top "write 0 64k"')
+        self.assert_qmp(res, 'return', '')
+
+        # Drain status change events
+        while self.vm.event_wait('JOB_STATUS_CHANGE', timeout=0.0) is not None:
+            pass
+
+        res = self.vm.qmp('block-job-cancel', device='mirror')
+        self.assert_qmp(res, 'return', {})
+
+        self.vm.event_wait('BLOCK_JOB_ERROR')
+
+    def test_transient_error(self) -> None:
+        self.add_blockdevs(True)
+        self.start_mirror()
+        self.cancel_mirror_with_error()
+
+        while True:
+            e = self.vm.event_wait('JOB_STATUS_CHANGE')
+            if e['data']['status'] == 'standby':
+                # Transient error, try again
+                self.vm.qmp('block-job-resume', device='mirror')
+            elif e['data']['status'] == 'null':
+                break
+
+    def test_persistent_error(self) -> None:
+        self.add_blockdevs(False)
+        self.start_mirror()
+        self.cancel_mirror_with_error()
+
+        while True:
+            e = self.vm.event_wait('JOB_STATUS_CHANGE')
+            if e['data']['status'] == 'standby':
+                # Persistent error, no point in continuing
+                self.vm.qmp('block-job-cancel', device='mirror', force=True)
+            elif e['data']['status'] == 'null':
+                break
+
+
+if __name__ == '__main__':
+    # LUKS would require special key-secret handling in add_blockdevs()
+    iotests.main(supported_fmts=['generic'],
+                 unsupported_fmts=['luks'],
+                 supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/mirror-ready-cancel-error.out b/tests/qemu-iotests/tests/mirror-ready-cancel-error.out
new file mode 100644
index 00000000000..fbc63e62f88
--- /dev/null
+++ b/tests/qemu-iotests/tests/mirror-ready-cancel-error.out
@@ -0,0 +1,5 @@
+..
+----------------------------------------------------------------------
+Ran 2 tests
+
+OK
diff --git a/tests/qemu-iotests/tests/mirror-top-perms b/tests/qemu-iotests/tests/mirror-top-perms
index 451a0666f81..0a51a613f39 100755
--- a/tests/qemu-iotests/tests/mirror-top-perms
+++ b/tests/qemu-iotests/tests/mirror-top-perms
@@ -20,12 +20,13 @@
 #
 
 import os
-import iotests
-from iotests import qemu_img
 
-# Import qemu after iotests.py has amended sys.path
-# pylint: disable=wrong-import-order
-import qemu
+from qemu.aqmp import ConnectError
+from qemu.machine import machine
+from qemu.qmp import QMPConnectError
+
+import iotests
+from iotests import change_log_level, qemu_img
 
 
 image_size = 1 * 1024 * 1024
@@ -47,7 +48,7 @@ class TestMirrorTopPerms(iotests.QMPTestCase):
     def tearDown(self):
         try:
             self.vm.shutdown()
-        except qemu.machine.AbnormalShutdown:
+        except machine.AbnormalShutdown:
             pass
 
         if self.vm_b is not None:
@@ -99,10 +100,14 @@ class TestMirrorTopPerms(iotests.QMPTestCase):
         self.vm_b.add_blockdev(f'file,node-name=drive0,filename={source}')
         self.vm_b.add_device('virtio-blk,drive=drive0,share-rw=on')
         try:
-            self.vm_b.launch()
-            print('ERROR: VM B launched successfully, this should not have '
-                  'happened')
-        except qemu.qmp.QMPConnectError:
+            # Silence AQMP errors temporarily.
+            # TODO: Remove this and just allow the errors to be logged when
+            # AQMP fully replaces QMP.
+            with change_log_level('qemu.aqmp'):
+                self.vm_b.launch()
+                print('ERROR: VM B launched successfully, '
+                      'this should not have happened')
+        except (QMPConnectError, ConnectError):
             assert 'Is another process using the image' in self.vm_b.get_log()
 
         result = self.vm.qmp('block-job-cancel',
diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation b/tests/qemu-iotests/tests/nbd-qemu-allocation
new file mode 100755
index 00000000000..4ee73db8033
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# group: rw auto quick
+#
+# Test qemu-nbd -A
+#
+# Copyright (C) 2018-2021 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    nbd_server_stop
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+. ./common.nbd
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+_require_command QEMU_NBD
+
+echo
+echo "=== Initial image setup ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 4M
+$QEMU_IO -c 'w 0 2M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io
+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT 4M
+$QEMU_IO -c 'w 1M 2M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Check allocation over NBD ==="
+echo
+
+$QEMU_IMG map --output=json -f qcow2 "$TEST_IMG"
+IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
+nbd_server_start_unix_socket -r -f qcow2 -A "$TEST_IMG"
+# Inspect what the server is exposing
+$QEMU_NBD --list -k $nbd_unix_socket
+# Normal -f raw NBD block status loses access to allocation information
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG" | _filter_qemu_img_map
+# But when we use -A, coupled with x-dirty-bitmap in the client for feeding
+# 2-bit block status from an alternative NBD metadata context (note that
+# the client code for x-dirty-bitmap intentionally collapses all depths
+# beyond 2 into a single value), we can determine:
+#    unallocated (depth 0) => "zero":false, "data":true
+#    local (depth 1)       => "zero":false, "data":false
+#    backing (depth 2+)    => "zero":true,  "data":true
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:allocation-depth" | _filter_qemu_img_map
+# More accurate results can be obtained by other NBD clients such as
+# libnbd, but this test works without such external dependencies.
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
new file mode 100644
index 00000000000..0bf1abb0633
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
@@ -0,0 +1,32 @@
+QA output created by nbd-qemu-allocation
+
+=== Initial image setup ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=4194304
+wrote 2097152/2097152 bytes at offset 0
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
+wrote 2097152/2097152 bytes at offset 1048576
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Check allocation over NBD ===
+
+[{ "start": 0, "length": 1048576, "depth": 1, "present": true, "zero": false, "data": true, "offset": 327680},
+{ "start": 1048576, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": 327680},
+{ "start": 3145728, "length": 1048576, "depth": 1, "present": false, "zero": true, "data": false}]
+exports available: 1
+ export: ''
+  size:  4194304
+  flags: 0x58f ( readonly flush fua df multi cache )
+  min block: 1
+  opt block: 4096
+  max block: 33554432
+  available meta contexts: 2
+   base:allocation
+   qemu:allocation-depth
+[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": false, "offset": OFFSET}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": true, "data": true, "offset": OFFSET},
+{ "start": 1048576, "length": 2097152, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+*** done
diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps b/tests/qemu-iotests/tests/qemu-img-bitmaps
new file mode 100755
index 00000000000..7a3fe8c3d37
--- /dev/null
+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps
@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Test qemu-img bitmap handling
+#
+# Copyright (C) 2018-2021 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    _rm_test_img "$TEST_IMG.copy"
+    nbd_server_stop
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+cd ..
+. ./common.rc
+. ./common.filter
+. ./common.nbd
+
+_supported_fmt qcow2
+_supported_proto file fuse
+_supported_os Linux
+_require_command QEMU_NBD
+# compat=0.10 does not support bitmaps
+_unsupported_imgopts 'compat=0.10'
+
+# Filter irrelevant format-specific information from the qemu-img info
+# output (we only want the bitmaps, basically)
+_filter_irrelevant_img_info()
+{
+    grep -v -e 'compat' -e 'compression type' -e 'data file' -e 'extended l2' \
+            -e 'lazy refcounts' -e 'refcount bits'
+}
+
+echo
+echo "=== Initial image setup ==="
+echo
+
+# Create backing image with one bitmap
+TEST_IMG="$TEST_IMG.base" _make_test_img 10M
+$QEMU_IMG bitmap --add -f $IMGFMT "$TEST_IMG.base" b0
+$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io
+
+# Create initial image and populate two bitmaps: one active, one inactive.
+ORIG_IMG=$TEST_IMG
+TEST_IMG=$TEST_IMG.orig
+_make_test_img -b "$ORIG_IMG.base" -F $IMGFMT 10M
+$QEMU_IO -c 'w 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG bitmap --add -g 512k -f $IMGFMT "$TEST_IMG" b1
+$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b2
+$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG bitmap --clear -f $IMGFMT "$TEST_IMG" b1
+$QEMU_IO -c 'w 1M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG bitmap --disable -f $IMGFMT "$TEST_IMG" b1
+$QEMU_IMG bitmap --enable -f $IMGFMT "$TEST_IMG" b2
+$QEMU_IO -c 'w 2M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "=== Bitmap preservation not possible to non-qcow2 ==="
+echo
+
+TEST_IMG=$ORIG_IMG
+$QEMU_IMG convert --bitmaps -O raw "$TEST_IMG.orig" "$TEST_IMG" &&
+    echo "unexpected success"
+
+echo
+echo "=== Convert with bitmap preservation ==="
+echo
+
+# Only bitmaps from the active layer are copied
+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG"
+_img_info --format-specific | _filter_irrelevant_img_info
+# But we can also merge in bitmaps from other layers.  This test is a bit
+# contrived to cover more code paths, in reality, you could merge directly
+# into b0 without going through tmp
+$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b0
+$QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F $IMGFMT \
+     -f $IMGFMT "$TEST_IMG" tmp
+$QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0
+$QEMU_IMG bitmap --remove --image-opts \
+    driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp
+_img_info --format-specific | _filter_irrelevant_img_info
+
+echo
+echo "=== Merge from top layer into backing image ==="
+echo
+
+$QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG"
+$QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \
+     -f $IMGFMT "$TEST_IMG.base" b3
+_img_info --format-specific --backing-chain | _filter_irrelevant_img_info
+
+echo
+echo "=== Check bitmap contents ==="
+echo
+
+# x-dirty-bitmap is a hack for reading bitmaps; it abuses block status to
+# report "data":false for portions of the bitmap which are set
+IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
+nbd_server_start_unix_socket -r -f qcow2 \
+    -B b0 -B b1 -B b2 -B b3 "$TEST_IMG"
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b0" | _filter_qemu_img_map
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b1" | _filter_qemu_img_map
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map
+$QEMU_IMG map --output=json --image-opts \
+    "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map
+
+nbd_server_stop
+
+echo
+echo "=== Check handling of inconsistent bitmap ==="
+echo
+
+# Prepare image with corrupted bitmap
+$QEMU_IO -c abort "$TEST_IMG" 2>/dev/null
+$QEMU_IMG bitmap --add "$TEST_IMG" b4
+$QEMU_IMG bitmap --remove "$TEST_IMG" b1
+_img_info --format-specific | _filter_irrelevant_img_info
+# Proof that we fail fast if bitmaps can't be copied
+echo
+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" &&
+    echo "unexpected success"
+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \
+    | _filter_irrelevant_img_info
+# Skipping the broken bitmaps works,...
+echo
+$QEMU_IMG convert --bitmaps --skip-broken-bitmaps \
+    -O qcow2 "$TEST_IMG" "$TEST_IMG.copy"
+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \
+    | _filter_irrelevant_img_info
+# ...as does removing them
+echo
+_rm_test_img "$TEST_IMG.copy"
+$QEMU_IMG bitmap --remove "$TEST_IMG" b0
+$QEMU_IMG bitmap --remove --add "$TEST_IMG" b2
+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy"
+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \
+    | _filter_irrelevant_img_info
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
new file mode 100644
index 00000000000..e851f0320ec
--- /dev/null
+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out
@@ -0,0 +1,183 @@
+QA output created by qemu-img-bitmaps
+
+=== Initial image setup ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=10485760
+wrote 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=10485760 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 2097152
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Bitmap preservation not possible to non-qcow2 ===
+
+qemu-img: Format driver 'raw' does not support bitmaps
+
+=== Convert with bitmap preservation ===
+
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+            name: b1
+            granularity: 524288
+        [1]:
+            flags:
+                [0]: auto
+            name: b2
+            granularity: 65536
+    corrupt: false
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+            name: b1
+            granularity: 524288
+        [1]:
+            flags:
+                [0]: auto
+            name: b2
+            granularity: 65536
+        [2]:
+            flags:
+            name: b0
+            granularity: 65536
+    corrupt: false
+
+=== Merge from top layer into backing image ===
+
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+backing file: TEST_DIR/t.IMGFMT.base
+backing file format: IMGFMT
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+            name: b1
+            granularity: 524288
+        [1]:
+            flags:
+                [0]: auto
+            name: b2
+            granularity: 65536
+        [2]:
+            flags:
+            name: b0
+            granularity: 65536
+    corrupt: false
+
+image: TEST_DIR/t.IMGFMT.base
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+                [0]: auto
+            name: b0
+            granularity: 65536
+        [1]:
+            flags:
+                [0]: auto
+            name: b3
+            granularity: 65536
+    corrupt: false
+
+=== Check bitmap contents ===
+
+[{ "start": 0, "length": 3145728, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 3145728, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 4194304, "length": 6291456, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 1048576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 1048576, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 2097152, "length": 8388608, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+[{ "start": 0, "length": 2097152, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET},
+{ "start": 2097152, "length": 1048576, "depth": 0, "present": false, "zero": false, "data": false},
+{ "start": 3145728, "length": 7340032, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}]
+
+=== Check handling of inconsistent bitmap ===
+
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+backing file: TEST_DIR/t.IMGFMT.base
+backing file format: IMGFMT
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+                [0]: in-use
+                [1]: auto
+            name: b2
+            granularity: 65536
+        [1]:
+            flags:
+                [0]: in-use
+            name: b0
+            granularity: 65536
+        [2]:
+            flags:
+                [0]: auto
+            name: b4
+            granularity: 65536
+    corrupt: false
+
+qemu-img: Cannot copy inconsistent bitmap 'b0'
+Try --skip-broken-bitmaps, or use 'qemu-img bitmap --remove' to delete it
+qemu-img: Could not open 'TEST_DIR/t.IMGFMT.copy': Could not open 'TEST_DIR/t.IMGFMT.copy': No such file or directory
+
+qemu-img: warning: Skipping inconsistent bitmap 'b0'
+qemu-img: warning: Skipping inconsistent bitmap 'b2'
+image: TEST_DIR/t.IMGFMT.copy
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+                [0]: auto
+            name: b4
+            granularity: 65536
+    corrupt: false
+
+image: TEST_DIR/t.IMGFMT.copy
+file format: IMGFMT
+virtual size: 10 MiB (10485760 bytes)
+cluster_size: 65536
+Format specific information:
+    bitmaps:
+        [0]:
+            flags:
+                [0]: auto
+            name: b4
+            granularity: 65536
+        [1]:
+            flags:
+                [0]: auto
+            name: b2
+            granularity: 65536
+    corrupt: false
+*** done
diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out
index 5f41491e058..c1bc9b83562 100644
--- a/tests/qemu-iotests/tests/qsd-jobs.out
+++ b/tests/qemu-iotests/tests/qsd-jobs.out
@@ -8,7 +8,7 @@ QMP_VERSION
 {"return": {}}
 {"return": {}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 
 === Streaming can't get permission on base node ===
 
@@ -16,7 +16,7 @@ QMP_VERSION
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
-{"error": {"class": "GenericError", "desc": "Conflicts with use by a block device as 'root', which uses 'write' on fmt_base"}}
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}}
 *** done
diff --git a/tests/qemu-iotests/tests/remove-bitmap-from-backing b/tests/qemu-iotests/tests/remove-bitmap-from-backing
index 0ea4c36507b..8d48fc0f3ce 100755
--- a/tests/qemu-iotests/tests/remove-bitmap-from-backing
+++ b/tests/qemu-iotests/tests/remove-bitmap-from-backing
@@ -41,25 +41,27 @@ log('Trying to remove persistent bitmap from r-o base node, should fail:')
 vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0')
 
 new_base_opts = {
-    'node-name': 'base',
-    'driver': 'qcow2',
-    'file': {
-        'driver': 'file',
-        'filename':  base
-    },
-    'read-only': False
+    'options': [{
+        'node-name': 'base',
+        'driver': 'qcow2',
+        'file': {
+            'driver': 'file',
+            'filename':  base
+        },
+        'read-only': False
+    }]
 }
 
 # Don't want to bother with filtering qmp_log for reopen command
-result = vm.qmp('x-blockdev-reopen', **new_base_opts)
+result = vm.qmp('blockdev-reopen', **new_base_opts)
 if result != {'return': {}}:
     log('Failed to reopen: ' + str(result))
 
 log('Remove persistent bitmap from base node reopened to RW:')
 vm.qmp_log('block-dirty-bitmap-remove', node='base', name='bitmap0')
 
-new_base_opts['read-only'] = True
-result = vm.qmp('x-blockdev-reopen', **new_base_opts)
+new_base_opts['options'][0]['read-only'] = True
+result = vm.qmp('blockdev-reopen', **new_base_opts)
 if result != {'return': {}}:
     log('Failed to reopen: ' + str(result))
 
diff --git a/tests/qtest/acpi-utils.c b/tests/qtest/acpi-utils.c
index d2a202efcae..766c48e3a6a 100644
--- a/tests/qtest/acpi-utils.c
+++ b/tests/qtest/acpi-utils.c
@@ -98,6 +98,20 @@ void acpi_fetch_table(QTestState *qts, uint8_t **aml, uint32_t *aml_len,
         ACPI_ASSERT_CMP(**aml, sig);
     }
     if (verify_checksum) {
+        if (acpi_calc_checksum(*aml, *aml_len)) {
+            gint fd, ret;
+            char *fname = NULL;
+            GError *error = NULL;
+
+            fprintf(stderr, "Invalid '%.4s'(%d)\n", *aml, *aml_len);
+            fd = g_file_open_tmp("malformed-XXXXXX.dat", &fname, &error);
+            g_assert_no_error(error);
+            fprintf(stderr, "Dumping invalid table into '%s'\n", fname);
+            ret = qemu_write_full(fd, *aml, *aml_len);
+            g_assert(ret == *aml_len);
+            close(fd);
+            g_free(fname);
+        }
         g_assert(!acpi_calc_checksum(*aml, *aml_len));
     }
 }
diff --git a/tests/qtest/adm1272-test.c b/tests/qtest/adm1272-test.c
new file mode 100644
index 00000000000..63f8514801b
--- /dev/null
+++ b/tests/qtest/adm1272-test.c
@@ -0,0 +1,445 @@
+/*
+ * QTests for the ADM1272 hotswap controller
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <math.h>
+#include "hw/i2c/pmbus_device.h"
+#include "libqtest-single.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qemu/bitops.h"
+
+#define TEST_ID "adm1272-test"
+#define TEST_ADDR (0x10)
+
+#define ADM1272_RESTART_TIME            0xCC
+#define ADM1272_MFR_PEAK_IOUT           0xD0
+#define ADM1272_MFR_PEAK_VIN            0xD1
+#define ADM1272_MFR_PEAK_VOUT           0xD2
+#define ADM1272_MFR_PMON_CONTROL        0xD3
+#define ADM1272_MFR_PMON_CONFIG         0xD4
+#define ADM1272_MFR_ALERT1_CONFIG       0xD5
+#define ADM1272_MFR_ALERT2_CONFIG       0xD6
+#define ADM1272_MFR_PEAK_TEMPERATURE    0xD7
+#define ADM1272_MFR_DEVICE_CONFIG       0xD8
+#define ADM1272_MFR_POWER_CYCLE         0xD9
+#define ADM1272_MFR_PEAK_PIN            0xDA
+#define ADM1272_MFR_READ_PIN_EXT        0xDB
+#define ADM1272_MFR_READ_EIN_EXT        0xDC
+
+#define ADM1272_HYSTERESIS_LOW          0xF2
+#define ADM1272_HYSTERESIS_HIGH         0xF3
+#define ADM1272_STATUS_HYSTERESIS       0xF4
+#define ADM1272_STATUS_GPIO             0xF5
+#define ADM1272_STRT_UP_IOUT_LIM        0xF6
+
+/* Defaults */
+#define ADM1272_OPERATION_DEFAULT       0x80
+#define ADM1272_CAPABILITY_DEFAULT      0xB0
+#define ADM1272_CAPABILITY_NO_PEC       0x30
+#define ADM1272_DIRECT_MODE             0x40
+#define ADM1272_HIGH_LIMIT_DEFAULT      0x0FFF
+#define ADM1272_PIN_OP_DEFAULT          0x7FFF
+#define ADM1272_PMBUS_REVISION_DEFAULT  0x22
+#define ADM1272_MFR_ID_DEFAULT          "ADI"
+#define ADM1272_MODEL_DEFAULT           "ADM1272-A1"
+#define ADM1272_MFR_DEFAULT_REVISION    "25"
+#define ADM1272_DEFAULT_DATE            "160301"
+#define ADM1272_RESTART_TIME_DEFAULT    0x64
+#define ADM1272_PMON_CONTROL_DEFAULT    0x1
+#define ADM1272_PMON_CONFIG_DEFAULT     0x3F35
+#define ADM1272_DEVICE_CONFIG_DEFAULT   0x8
+#define ADM1272_HYSTERESIS_HIGH_DEFAULT     0xFFFF
+#define ADM1272_STRT_UP_IOUT_LIM_DEFAULT    0x000F
+#define ADM1272_VOLT_DEFAULT            12000
+#define ADM1272_IOUT_DEFAULT            25000
+#define ADM1272_PWR_DEFAULT             300  /* 12V 25A */
+#define ADM1272_SHUNT                   300 /* micro-ohms */
+#define ADM1272_VOLTAGE_COEFF_DEFAULT   1
+#define ADM1272_CURRENT_COEFF_DEFAULT   3
+#define ADM1272_PWR_COEFF_DEFAULT       7
+#define ADM1272_IOUT_OFFSET             0x5000
+#define ADM1272_IOUT_OFFSET             0x5000
+
+static const PMBusCoefficients adm1272_coefficients[] = {
+    [0] = { 6770, 0, -2 },       /* voltage, vrange 60V */
+    [1] = { 4062, 0, -2 },       /* voltage, vrange 100V */
+    [2] = { 1326, 20480, -1 },   /* current, vsense range 15mV */
+    [3] = { 663, 20480, -1 },    /* current, vsense range 30mV */
+    [4] = { 3512, 0, -2 },       /* power, vrange 60V, irange 15mV */
+    [5] = { 21071, 0, -3 },      /* power, vrange 100V, irange 15mV */
+    [6] = { 17561, 0, -3 },      /* power, vrange 60V, irange 30mV */
+    [7] = { 10535, 0, -3 },      /* power, vrange 100V, irange 30mV */
+    [8] = { 42, 31871, -1 },     /* temperature */
+};
+
+uint16_t pmbus_data2direct_mode(PMBusCoefficients c, uint32_t value)
+{
+    /* R is usually negative to fit large readings into 16 bits */
+    uint16_t y = (c.m * value + c.b) * pow(10, c.R);
+    return y;
+}
+
+uint32_t pmbus_direct_mode2data(PMBusCoefficients c, uint16_t value)
+{
+    /* X = (Y * 10^-R - b) / m */
+    uint32_t x = (value / pow(10, c.R) - c.b) / c.m;
+    return x;
+}
+
+
+static uint16_t adm1272_millivolts_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT];
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_millivolts(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_VOLTAGE_COEFF_DEFAULT];
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static uint16_t adm1272_milliamps_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT];
+    /* Y = (m * r_sense * x - b) * 10^R */
+    c.m = c.m * ADM1272_SHUNT / 1000; /* micro-ohms */
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_milliamps(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_CURRENT_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    c.b = c.b * 1000;
+    c.R = c.R - 3;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static uint16_t adm1272_watts_to_direct(uint32_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    return pmbus_data2direct_mode(c, value);
+}
+
+static uint32_t adm1272_direct_to_watts(uint16_t value)
+{
+    PMBusCoefficients c = adm1272_coefficients[ADM1272_PWR_COEFF_DEFAULT];
+    c.m = c.m * ADM1272_SHUNT / 1000;
+    return pmbus_direct_mode2data(c, value);
+}
+
+static uint16_t qmp_adm1272_get(const char *id, const char *property)
+{
+    QDict *response;
+    uint64_t ret;
+
+    response = qmp("{ 'execute': 'qom-get', 'arguments': { 'path': %s, "
+                   "'property': %s } }", id, property);
+    g_assert(qdict_haskey(response, "return"));
+    ret = qnum_get_uint(qobject_to(QNum, qdict_get(response, "return")));
+    qobject_unref(response);
+    return ret;
+}
+
+static void qmp_adm1272_set(const char *id,
+                            const char *property,
+                            uint16_t value)
+{
+    QDict *response;
+
+    response = qmp("{ 'execute': 'qom-set', 'arguments': { 'path': %s, "
+                   "'property': %s, 'value': %u } }", id, property, value);
+    g_assert(qdict_haskey(response, "return"));
+    qobject_unref(response);
+}
+
+/* PMBus commands are little endian vs i2c_set16 in i2c.h which is big endian */
+static uint16_t adm1272_i2c_get16(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[2];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return (resp[1] << 8) | resp[0];
+}
+
+/* PMBus commands are little endian vs i2c_set16 in i2c.h which is big endian */
+static void adm1272_i2c_set16(QI2CDevice *i2cdev, uint8_t reg, uint16_t value)
+{
+    uint8_t data[2];
+
+    data[0] = value & 255;
+    data[1] = value >> 8;
+    i2c_write_block(i2cdev, reg, data, sizeof(data));
+}
+
+static void test_defaults(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t value, i2c_value;
+    int16_t err;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    value = qmp_adm1272_get(TEST_ID, "vout");
+    err = ADM1272_VOLT_DEFAULT - value;
+    g_assert_cmpuint(abs(err), <, ADM1272_VOLT_DEFAULT / 20);
+
+    i2c_value = i2c_get8(i2cdev, PMBUS_OPERATION);
+    g_assert_cmphex(i2c_value, ==, ADM1272_OPERATION_DEFAULT);
+
+    i2c_value = i2c_get8(i2cdev, PMBUS_VOUT_MODE);
+    g_assert_cmphex(i2c_value, ==, ADM1272_DIRECT_MODE);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HIGH_LIMIT_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_IOUT_OC_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HIGH_LIMIT_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_OT_FAULT_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HIGH_LIMIT_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_OT_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HIGH_LIMIT_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VIN_OV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HIGH_LIMIT_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VIN_UV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_PIN_OP_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, ADM1272_PIN_OP_DEFAULT);
+
+    i2c_value = i2c_get8(i2cdev, PMBUS_REVISION);
+    g_assert_cmphex(i2c_value, ==, ADM1272_PMBUS_REVISION_DEFAULT);
+
+    i2c_value = i2c_get8(i2cdev, ADM1272_MFR_PMON_CONTROL);
+    g_assert_cmphex(i2c_value, ==, ADM1272_PMON_CONTROL_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_PMON_CONFIG);
+    g_assert_cmphex(i2c_value, ==, ADM1272_PMON_CONFIG_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_DEVICE_CONFIG);
+    g_assert_cmphex(i2c_value, ==, ADM1272_DEVICE_CONFIG_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_HYSTERESIS_HIGH);
+    g_assert_cmphex(i2c_value, ==, ADM1272_HYSTERESIS_HIGH_DEFAULT);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_STRT_UP_IOUT_LIM);
+    g_assert_cmphex(i2c_value, ==, ADM1272_STRT_UP_IOUT_LIM_DEFAULT);
+}
+
+/* test qmp access */
+static void test_tx_rx(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value, value, i2c_voltage, i2c_pwr, lossy_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    /* converting to direct mode is lossy - we generate the same loss here */
+    lossy_value =
+        adm1272_direct_to_millivolts(adm1272_millivolts_to_direct(1000));
+    qmp_adm1272_set(TEST_ID, "vin", 1000);
+    value = qmp_adm1272_get(TEST_ID, "vin");
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VIN);
+    i2c_voltage = adm1272_direct_to_millivolts(i2c_value);
+    g_assert_cmpuint(value, ==, i2c_voltage);
+    g_assert_cmpuint(i2c_voltage, ==, lossy_value);
+
+    lossy_value =
+        adm1272_direct_to_millivolts(adm1272_millivolts_to_direct(1500));
+    qmp_adm1272_set(TEST_ID, "vout", 1500);
+    value = qmp_adm1272_get(TEST_ID, "vout");
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+    i2c_voltage = adm1272_direct_to_millivolts(i2c_value);
+    g_assert_cmpuint(value, ==, i2c_voltage);
+    g_assert_cmpuint(i2c_voltage, ==, lossy_value);
+
+    lossy_value =
+        adm1272_direct_to_milliamps(adm1272_milliamps_to_direct(1600));
+    qmp_adm1272_set(TEST_ID, "iout", 1600);
+    value = qmp_adm1272_get(TEST_ID, "iout");
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_IOUT);
+    i2c_value = adm1272_direct_to_milliamps(i2c_value);
+    g_assert_cmphex(value, ==, i2c_value);
+    g_assert_cmphex(i2c_value, ==, lossy_value);
+
+    lossy_value =
+        adm1272_direct_to_watts(adm1272_watts_to_direct(320));
+    qmp_adm1272_set(TEST_ID, "pin", 320);
+    value = qmp_adm1272_get(TEST_ID, "pin");
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_PIN);
+    i2c_pwr = adm1272_direct_to_watts(i2c_value);
+    g_assert_cmphex(value, ==, i2c_pwr);
+    g_assert_cmphex(i2c_pwr, ==, lossy_value);
+}
+
+/* test r/w registers */
+static void test_rw_regs(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT, 0xABCD);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0xABCD);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT, 0xCDEF);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0xCDEF);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_IOUT_OC_WARN_LIMIT, 0x1234);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_IOUT_OC_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0x1234);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_OT_FAULT_LIMIT, 0x5678);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_OT_FAULT_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0x5678);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_OT_WARN_LIMIT, 0xABDC);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_OT_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0xABDC);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VIN_OV_WARN_LIMIT, 0xCDEF);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VIN_OV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0xCDEF);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VIN_UV_WARN_LIMIT, 0x2345);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_VIN_UV_WARN_LIMIT);
+    g_assert_cmphex(i2c_value, ==, 0x2345);
+
+    i2c_set8(i2cdev, ADM1272_RESTART_TIME, 0xF8);
+    i2c_value = i2c_get8(i2cdev, ADM1272_RESTART_TIME);
+    g_assert_cmphex(i2c_value, ==, 0xF8);
+
+    i2c_set8(i2cdev, ADM1272_MFR_PMON_CONTROL, 0);
+    i2c_value = i2c_get8(i2cdev, ADM1272_MFR_PMON_CONTROL);
+    g_assert_cmpuint(i2c_value, ==, 0);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_MFR_PMON_CONFIG, 0xDEF0);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_PMON_CONFIG);
+    g_assert_cmphex(i2c_value, ==, 0xDEF0);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_MFR_ALERT1_CONFIG, 0x0123);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_ALERT1_CONFIG);
+    g_assert_cmphex(i2c_value, ==, 0x0123);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_MFR_ALERT2_CONFIG, 0x9876);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_ALERT2_CONFIG);
+    g_assert_cmphex(i2c_value, ==, 0x9876);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_MFR_DEVICE_CONFIG, 0x3456);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_MFR_DEVICE_CONFIG);
+    g_assert_cmphex(i2c_value, ==, 0x3456);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_HYSTERESIS_LOW, 0xCABA);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_HYSTERESIS_LOW);
+    g_assert_cmphex(i2c_value, ==, 0xCABA);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_HYSTERESIS_HIGH, 0x6789);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_HYSTERESIS_HIGH);
+    g_assert_cmphex(i2c_value, ==, 0x6789);
+
+    adm1272_i2c_set16(i2cdev, ADM1272_STRT_UP_IOUT_LIM, 0x9876);
+    i2c_value = adm1272_i2c_get16(i2cdev, ADM1272_STRT_UP_IOUT_LIM);
+    g_assert_cmphex(i2c_value, ==, 0x9876);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_OPERATION, 0xA);
+    i2c_value = i2c_get8(i2cdev, PMBUS_OPERATION);
+    g_assert_cmphex(i2c_value, ==, 0xA);
+}
+
+/* test read-only registers */
+static void test_ro_regs(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_init_value, i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    i2c_init_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VIN);
+    adm1272_i2c_set16(i2cdev, PMBUS_READ_VIN, 0xBEEF);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VIN);
+    g_assert_cmphex(i2c_init_value, ==, i2c_value);
+
+    i2c_init_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+    adm1272_i2c_set16(i2cdev, PMBUS_READ_VOUT, 0x1234);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+    g_assert_cmphex(i2c_init_value, ==, i2c_value);
+
+    i2c_init_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_IOUT);
+    adm1272_i2c_set16(i2cdev, PMBUS_READ_IOUT, 0x6547);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_IOUT);
+    g_assert_cmphex(i2c_init_value, ==, i2c_value);
+
+    i2c_init_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+    adm1272_i2c_set16(i2cdev, PMBUS_READ_TEMPERATURE_1, 0x1597);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+    g_assert_cmphex(i2c_init_value, ==, i2c_value);
+
+    i2c_init_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_PIN);
+    adm1272_i2c_set16(i2cdev, PMBUS_READ_PIN, 0xDEAD);
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_READ_PIN);
+    g_assert_cmphex(i2c_init_value, ==, i2c_value);
+}
+
+/* test voltage fault handling */
+static void test_voltage_faults(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value;
+    uint8_t i2c_byte;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT,
+                      adm1272_millivolts_to_direct(5000));
+    qmp_adm1272_set(TEST_ID, "vout", 5100);
+
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_STATUS_WORD);
+    i2c_byte = i2c_get8(i2cdev, PMBUS_STATUS_VOUT);
+    g_assert_true((i2c_value & PB_STATUS_VOUT) != 0);
+    g_assert_true((i2c_byte & PB_STATUS_VOUT_OV_WARN) != 0);
+
+    qmp_adm1272_set(TEST_ID, "vout", 4500);
+    i2c_set8(i2cdev, PMBUS_CLEAR_FAULTS, 0);
+    i2c_byte = i2c_get8(i2cdev, PMBUS_STATUS_VOUT);
+    g_assert_true((i2c_byte & PB_STATUS_VOUT_OV_WARN) == 0);
+
+    adm1272_i2c_set16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT,
+                      adm1272_millivolts_to_direct(4600));
+    i2c_value = adm1272_i2c_get16(i2cdev, PMBUS_STATUS_WORD);
+    i2c_byte = i2c_get8(i2cdev, PMBUS_STATUS_VOUT);
+    g_assert_true((i2c_value & PB_STATUS_VOUT) != 0);
+    g_assert_true((i2c_byte & PB_STATUS_VOUT_UV_WARN) != 0);
+
+}
+
+static void adm1272_register_nodes(void)
+{
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "id=" TEST_ID ",address=0x10"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { TEST_ADDR });
+
+    qos_node_create_driver("adm1272", i2c_device_create);
+    qos_node_consumes("adm1272", "i2c-bus", &opts);
+
+    qos_add_test("test_defaults", "adm1272", test_defaults, NULL);
+    qos_add_test("test_tx_rx", "adm1272", test_tx_rx, NULL);
+    qos_add_test("test_rw_regs", "adm1272", test_rw_regs, NULL);
+    qos_add_test("test_ro_regs", "adm1272", test_ro_regs, NULL);
+    qos_add_test("test_ov_faults", "adm1272", test_voltage_faults, NULL);
+}
+libqos_init(adm1272_register_nodes);
diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
index 5e1954852e7..8073ccc2052 100644
--- a/tests/qtest/ahci-test.c
+++ b/tests/qtest/ahci-test.c
@@ -1491,14 +1491,14 @@ static void ahci_test_cdrom(int nsectors, bool dma, uint8_t cmd,
     char *iso;
     int fd;
     AHCIOpts opts = {
-        .size = (ATAPI_SECTOR_SIZE * nsectors),
+        .size = ((uint64_t)ATAPI_SECTOR_SIZE * nsectors),
         .atapi = true,
         .atapi_dma = dma,
         .post_cb = ahci_cb_cmp_buff,
         .set_bcl = override_bcl,
         .bcl = bcl,
     };
-    uint64_t iso_size = ATAPI_SECTOR_SIZE * (nsectors + 1);
+    uint64_t iso_size = (uint64_t)ATAPI_SECTOR_SIZE * (nsectors + 1);
 
     /* Prepare ISO and fill 'tx' buffer */
     fd = prepare_iso(iso_size, &tx, &iso);
diff --git a/tests/qtest/am53c974-test.c b/tests/qtest/am53c974-test.c
index d996866cd44..d214a912b3b 100644
--- a/tests/qtest/am53c974-test.c
+++ b/tests/qtest/am53c974-test.c
@@ -189,6 +189,68 @@ static void test_cancelled_request_ok(void)
     qtest_quit(s);
 }
 
+static void test_inflight_cancel_ok(void)
+{
+    QTestState *s = qtest_init(
+        "-device am53c974,id=scsi "
+        "-device scsi-hd,drive=disk0 -drive "
+        "id=disk0,if=none,file=null-co://,format=raw -nodefaults");
+    qtest_outl(s, 0xcf8, 0x80001000);
+    qtest_inw(s, 0xcfc);
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_outl(s, 0xcfc, 0xffffffff);
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_inl(s, 0xcfc);
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_outl(s, 0xcfc, 0xc001);
+    qtest_outl(s, 0xcf8, 0x80001004);
+    qtest_inw(s, 0xcfc);
+    qtest_outl(s, 0xcf8, 0x80001004);
+    qtest_outw(s, 0xcfc, 0x7);
+    qtest_outl(s, 0xcf8, 0x80001004);
+    qtest_inw(s, 0xcfc);
+    qtest_inb(s, 0xc000);
+    qtest_outb(s, 0xc008, 0x8);
+    qtest_outw(s, 0xc00b, 0x4100);
+    qtest_outb(s, 0xc009, 0x0);
+    qtest_outb(s, 0xc009, 0x0);
+    qtest_outw(s, 0xc00b, 0xc212);
+    qtest_outl(s, 0xc042, 0x2c2c5a88);
+    qtest_outw(s, 0xc00b, 0xc212);
+    qtest_outw(s, 0xc00b, 0x415a);
+    qtest_outl(s, 0xc03f, 0x3060303);
+    qtest_outl(s, 0xc00b, 0x5afa9054);
+    qtest_quit(s);
+}
+
+static void test_reset_before_transfer_ok(void)
+{
+    QTestState *s = qtest_init(
+        "-device am53c974,id=scsi "
+        "-device scsi-hd,drive=disk0 -drive "
+        "id=disk0,if=none,file=null-co://,format=raw -nodefaults");
+
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_outl(s, 0xcfc, 0xc000);
+    qtest_outl(s, 0xcf8, 0x80001004);
+    qtest_outw(s, 0xcfc, 0x01);
+    qtest_outl(s, 0xc007, 0x2500);
+    qtest_outl(s, 0xc00a, 0x410000);
+    qtest_outl(s, 0xc00a, 0x410000);
+    qtest_outw(s, 0xc00b, 0x0200);
+    qtest_outw(s, 0xc040, 0x03);
+    qtest_outw(s, 0xc009, 0x00);
+    qtest_outw(s, 0xc00b, 0x00);
+    qtest_outw(s, 0xc009, 0x00);
+    qtest_outw(s, 0xc00b, 0x00);
+    qtest_outw(s, 0xc009, 0x00);
+    qtest_outw(s, 0xc003, 0x1000);
+    qtest_outw(s, 0xc00b, 0x1000);
+    qtest_outl(s, 0xc00b, 0x9000);
+    qtest_outw(s, 0xc00b, 0x1000);
+    qtest_quit(s);
+}
+
 int main(int argc, char **argv)
 {
     const char *arch = qtest_get_arch();
@@ -212,6 +274,10 @@ int main(int argc, char **argv)
                        test_fifo_underflow_on_write_ok);
         qtest_add_func("am53c974/test_cancelled_request_ok",
                        test_cancelled_request_ok);
+        qtest_add_func("am53c974/test_inflight_cancel_ok",
+                       test_inflight_cancel_ok);
+        qtest_add_func("am53c974/test_reset_before_transfer_ok",
+                       test_reset_before_transfer_ok);
     }
 
     return g_test_run();
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index 8252b85bb85..f76652143ac 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -26,21 +26,6 @@
                     "  'arguments': { 'type': 'full', "
 #define QUERY_TAIL  "}}"
 
-static bool kvm_enabled(QTestState *qts)
-{
-    QDict *resp, *qdict;
-    bool enabled;
-
-    resp = qtest_qmp(qts, "{ 'execute': 'query-kvm' }");
-    g_assert(qdict_haskey(resp, "return"));
-    qdict = qdict_get_qdict(resp, "return");
-    g_assert(qdict_haskey(qdict, "enabled"));
-    enabled = qdict_get_bool(qdict, "enabled");
-    qobject_unref(resp);
-
-    return enabled;
-}
-
 static QDict *do_query_no_props(QTestState *qts, const char *cpu_type)
 {
     return qtest_qmp(qts, QUERY_HEAD "'model': { 'name': %s }"
@@ -473,6 +458,19 @@ static void test_query_cpu_model_expansion(const void *data)
         assert_has_feature_enabled(qts, "cortex-a57", "pmu");
         assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
 
+        assert_has_feature_enabled(qts, "a64fx", "pmu");
+        assert_has_feature_enabled(qts, "a64fx", "aarch64");
+        /*
+         * A64FX does not support any other vector lengths besides those
+         * that are enabled by default(128bit, 256bits, 512bit).
+         */
+        assert_has_feature_enabled(qts, "a64fx", "sve");
+        assert_sve_vls(qts, "a64fx", 0xb, NULL);
+        assert_error(qts, "a64fx", "cannot enable sve384",
+                     "{ 'sve384': true }");
+        assert_error(qts, "a64fx", "cannot enable sve640",
+                     "{ 'sve640': true }");
+
         sve_tests_default(qts, "max");
         pauth_tests_default(qts, "max");
 
@@ -493,14 +491,6 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
 
     qts = qtest_init(MACHINE_KVM "-cpu max");
 
-    /*
-     * These tests target the 'host' CPU type, so KVM must be enabled.
-     */
-    if (!kvm_enabled(qts)) {
-        qtest_quit(qts);
-        return;
-    }
-
     /* Enabling and disabling kvm-no-adjvtime should always work. */
     assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime");
     assert_set_feature(qts, "host", "kvm-no-adjvtime", true);
@@ -624,7 +614,11 @@ int main(int argc, char **argv)
      * order avoid attempting to run an AArch32 QEMU with KVM on
      * AArch64 hosts. That won't work and isn't easy to detect.
      */
-    if (g_str_equal(qtest_get_arch(), "aarch64")) {
+    if (g_str_equal(qtest_get_arch(), "aarch64") && qtest_has_accel("kvm")) {
+        /*
+         * This tests target the 'host' CPU type, so register it only if
+         * KVM is available.
+         */
         qtest_add_data_func("/arm/kvm/query-cpu-model-expansion",
                             NULL, test_query_cpu_model_expansion_kvm);
     }
diff --git a/tests/qtest/aspeed_hace-test.c b/tests/qtest/aspeed_hace-test.c
new file mode 100644
index 00000000000..09ee31545e4
--- /dev/null
+++ b/tests/qtest/aspeed_hace-test.c
@@ -0,0 +1,469 @@
+/*
+ * QTest testcase for the ASPEED Hash and Crypto Engine
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 IBM Corp.
+ */
+
+#include "qemu/osdep.h"
+
+#include "libqos/libqtest.h"
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+#define HACE_CMD                 0x10
+#define  HACE_SHA_BE_EN          BIT(3)
+#define  HACE_MD5_LE_EN          BIT(2)
+#define  HACE_ALGO_MD5           0
+#define  HACE_ALGO_SHA1          BIT(5)
+#define  HACE_ALGO_SHA224        BIT(6)
+#define  HACE_ALGO_SHA256        (BIT(4) | BIT(6))
+#define  HACE_ALGO_SHA512        (BIT(5) | BIT(6))
+#define  HACE_ALGO_SHA384        (BIT(5) | BIT(6) | BIT(10))
+#define  HACE_SG_EN              BIT(18)
+
+#define HACE_STS                 0x1c
+#define  HACE_RSA_ISR            BIT(13)
+#define  HACE_CRYPTO_ISR         BIT(12)
+#define  HACE_HASH_ISR           BIT(9)
+#define  HACE_RSA_BUSY           BIT(2)
+#define  HACE_CRYPTO_BUSY        BIT(1)
+#define  HACE_HASH_BUSY          BIT(0)
+#define HACE_HASH_SRC            0x20
+#define HACE_HASH_DIGEST         0x24
+#define HACE_HASH_KEY_BUFF       0x28
+#define HACE_HASH_DATA_LEN       0x2c
+#define HACE_HASH_CMD            0x30
+/* Scatter-Gather Hash */
+#define SG_LIST_LEN_LAST         BIT(31)
+struct AspeedSgList {
+        uint32_t len;
+        uint32_t addr;
+} __attribute__ ((__packed__));
+
+/*
+ * Test vector is the ascii "abc"
+ *
+ * Expected results were generated using command line utitiles:
+ *
+ *  echo -n -e 'abc' | dd of=/tmp/test
+ *  for hash in sha512sum sha256sum md5sum; do $hash /tmp/test; done
+ *
+ */
+static const uint8_t test_vector[] = {0x61, 0x62, 0x63};
+
+static const uint8_t test_result_sha512[] = {
+    0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, 0xcc, 0x41, 0x73, 0x49,
+    0xae, 0x20, 0x41, 0x31, 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2,
+    0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, 0x21, 0x92, 0x99, 0x2a,
+    0x27, 0x4f, 0xc1, 0xa8, 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd,
+    0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, 0x2a, 0x9a, 0xc9, 0x4f,
+    0xa5, 0x4c, 0xa4, 0x9f};
+
+static const uint8_t test_result_sha256[] = {
+    0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde,
+    0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c,
+    0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad};
+
+static const uint8_t test_result_md5[] = {
+    0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, 0xd6, 0x96, 0x3f, 0x7d,
+    0x28, 0xe1, 0x7f, 0x72};
+
+/*
+ * The Scatter-Gather Test vector is the ascii "abc" "def" "ghi", broken
+ * into blocks of 3 characters as shown
+ *
+ * Expected results were generated using command line utitiles:
+ *
+ *  echo -n -e 'abcdefghijkl' | dd of=/tmp/test
+ *  for hash in sha512sum sha256sum; do $hash /tmp/test; done
+ *
+ */
+static const uint8_t test_vector_sg1[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
+static const uint8_t test_vector_sg2[] = {0x67, 0x68, 0x69};
+static const uint8_t test_vector_sg3[] = {0x6a, 0x6b, 0x6c};
+
+static const uint8_t test_result_sg_sha512[] = {
+    0x17, 0x80, 0x7c, 0x72, 0x8e, 0xe3, 0xba, 0x35, 0xe7, 0xcf, 0x7a, 0xf8,
+    0x23, 0x11, 0x6d, 0x26, 0xe4, 0x1e, 0x5d, 0x4d, 0x6c, 0x2f, 0xf1, 0xf3,
+    0x72, 0x0d, 0x3d, 0x96, 0xaa, 0xcb, 0x6f, 0x69, 0xde, 0x64, 0x2e, 0x63,
+    0xd5, 0xb7, 0x3f, 0xc3, 0x96, 0xc1, 0x2b, 0xe3, 0x8b, 0x2b, 0xd5, 0xd8,
+    0x84, 0x25, 0x7c, 0x32, 0xc8, 0xf6, 0xd0, 0x85, 0x4a, 0xe6, 0xb5, 0x40,
+    0xf8, 0x6d, 0xda, 0x2e};
+
+static const uint8_t test_result_sg_sha256[] = {
+    0xd6, 0x82, 0xed, 0x4c, 0xa4, 0xd9, 0x89, 0xc1, 0x34, 0xec, 0x94, 0xf1,
+    0x55, 0x1e, 0x1e, 0xc5, 0x80, 0xdd, 0x6d, 0x5a, 0x6e, 0xcd, 0xe9, 0xf3,
+    0xd3, 0x5e, 0x6e, 0x4a, 0x71, 0x7f, 0xbd, 0xe4};
+
+
+static void write_regs(QTestState *s, uint32_t base, uint32_t src,
+                       uint32_t length, uint32_t out, uint32_t method)
+{
+        qtest_writel(s, base + HACE_HASH_SRC, src);
+        qtest_writel(s, base + HACE_HASH_DIGEST, out);
+        qtest_writel(s, base + HACE_HASH_DATA_LEN, length);
+        qtest_writel(s, base + HACE_HASH_CMD, HACE_SHA_BE_EN | method);
+}
+
+static void test_md5(const char *machine, const uint32_t base,
+                     const uint32_t src_addr)
+
+{
+    QTestState *s = qtest_init(machine);
+
+    uint32_t digest_addr = src_addr + 0x01000000;
+    uint8_t digest[16] = {0};
+
+    /* Check engine is idle, no busy or irq bits set */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Write test vector into memory */
+    qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector));
+
+    write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_MD5);
+
+    /* Check hash IRQ status is asserted */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200);
+
+    /* Clear IRQ status and check status is deasserted */
+    qtest_writel(s, base + HACE_STS, 0x00000200);
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Read computed digest from memory */
+    qtest_memread(s, digest_addr, digest, sizeof(digest));
+
+    /* Check result of computation */
+    g_assert_cmpmem(digest, sizeof(digest),
+                    test_result_md5, sizeof(digest));
+
+    qtest_quit(s);
+}
+
+static void test_sha256(const char *machine, const uint32_t base,
+                        const uint32_t src_addr)
+{
+    QTestState *s = qtest_init(machine);
+
+    const uint32_t digest_addr = src_addr + 0x1000000;
+    uint8_t digest[32] = {0};
+
+    /* Check engine is idle, no busy or irq bits set */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Write test vector into memory */
+    qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector));
+
+    write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_SHA256);
+
+    /* Check hash IRQ status is asserted */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200);
+
+    /* Clear IRQ status and check status is deasserted */
+    qtest_writel(s, base + HACE_STS, 0x00000200);
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Read computed digest from memory */
+    qtest_memread(s, digest_addr, digest, sizeof(digest));
+
+    /* Check result of computation */
+    g_assert_cmpmem(digest, sizeof(digest),
+                    test_result_sha256, sizeof(digest));
+
+    qtest_quit(s);
+}
+
+static void test_sha512(const char *machine, const uint32_t base,
+                        const uint32_t src_addr)
+{
+    QTestState *s = qtest_init(machine);
+
+    const uint32_t digest_addr = src_addr + 0x1000000;
+    uint8_t digest[64] = {0};
+
+    /* Check engine is idle, no busy or irq bits set */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Write test vector into memory */
+    qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector));
+
+    write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_SHA512);
+
+    /* Check hash IRQ status is asserted */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200);
+
+    /* Clear IRQ status and check status is deasserted */
+    qtest_writel(s, base + HACE_STS, 0x00000200);
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Read computed digest from memory */
+    qtest_memread(s, digest_addr, digest, sizeof(digest));
+
+    /* Check result of computation */
+    g_assert_cmpmem(digest, sizeof(digest),
+                    test_result_sha512, sizeof(digest));
+
+    qtest_quit(s);
+}
+
+static void test_sha256_sg(const char *machine, const uint32_t base,
+                        const uint32_t src_addr)
+{
+    QTestState *s = qtest_init(machine);
+
+    const uint32_t src_addr_1 = src_addr + 0x1000000;
+    const uint32_t src_addr_2 = src_addr + 0x2000000;
+    const uint32_t src_addr_3 = src_addr + 0x3000000;
+    const uint32_t digest_addr = src_addr + 0x4000000;
+    uint8_t digest[32] = {0};
+    struct AspeedSgList array[] = {
+        {  cpu_to_le32(sizeof(test_vector_sg1)),
+           cpu_to_le32(src_addr_1) },
+        {  cpu_to_le32(sizeof(test_vector_sg2)),
+           cpu_to_le32(src_addr_2) },
+        {  cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST),
+           cpu_to_le32(src_addr_3) },
+    };
+
+    /* Check engine is idle, no busy or irq bits set */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Write test vector into memory */
+    qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1));
+    qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2));
+    qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3));
+    qtest_memwrite(s, src_addr, array, sizeof(array));
+
+    write_regs(s, base, src_addr,
+               (sizeof(test_vector_sg1)
+                + sizeof(test_vector_sg2)
+                + sizeof(test_vector_sg3)),
+               digest_addr, HACE_ALGO_SHA256 | HACE_SG_EN);
+
+    /* Check hash IRQ status is asserted */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200);
+
+    /* Clear IRQ status and check status is deasserted */
+    qtest_writel(s, base + HACE_STS, 0x00000200);
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Read computed digest from memory */
+    qtest_memread(s, digest_addr, digest, sizeof(digest));
+
+    /* Check result of computation */
+    g_assert_cmpmem(digest, sizeof(digest),
+                    test_result_sg_sha256, sizeof(digest));
+
+    qtest_quit(s);
+}
+
+static void test_sha512_sg(const char *machine, const uint32_t base,
+                        const uint32_t src_addr)
+{
+    QTestState *s = qtest_init(machine);
+
+    const uint32_t src_addr_1 = src_addr + 0x1000000;
+    const uint32_t src_addr_2 = src_addr + 0x2000000;
+    const uint32_t src_addr_3 = src_addr + 0x3000000;
+    const uint32_t digest_addr = src_addr + 0x4000000;
+    uint8_t digest[64] = {0};
+    struct AspeedSgList array[] = {
+        {  cpu_to_le32(sizeof(test_vector_sg1)),
+           cpu_to_le32(src_addr_1) },
+        {  cpu_to_le32(sizeof(test_vector_sg2)),
+           cpu_to_le32(src_addr_2) },
+        {  cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST),
+           cpu_to_le32(src_addr_3) },
+    };
+
+    /* Check engine is idle, no busy or irq bits set */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Write test vector into memory */
+    qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1));
+    qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2));
+    qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3));
+    qtest_memwrite(s, src_addr, array, sizeof(array));
+
+    write_regs(s, base, src_addr,
+               (sizeof(test_vector_sg1)
+                + sizeof(test_vector_sg2)
+                + sizeof(test_vector_sg3)),
+               digest_addr, HACE_ALGO_SHA512 | HACE_SG_EN);
+
+    /* Check hash IRQ status is asserted */
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200);
+
+    /* Clear IRQ status and check status is deasserted */
+    qtest_writel(s, base + HACE_STS, 0x00000200);
+    g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0);
+
+    /* Read computed digest from memory */
+    qtest_memread(s, digest_addr, digest, sizeof(digest));
+
+    /* Check result of computation */
+    g_assert_cmpmem(digest, sizeof(digest),
+                    test_result_sg_sha512, sizeof(digest));
+
+    qtest_quit(s);
+}
+
+struct masks {
+    uint32_t src;
+    uint32_t dest;
+    uint32_t len;
+};
+
+static const struct masks ast2600_masks = {
+    .src  = 0x7fffffff,
+    .dest = 0x7ffffff8,
+    .len  = 0x0fffffff,
+};
+
+static const struct masks ast2500_masks = {
+    .src  = 0x3fffffff,
+    .dest = 0x3ffffff8,
+    .len  = 0x0fffffff,
+};
+
+static const struct masks ast2400_masks = {
+    .src  = 0x0fffffff,
+    .dest = 0x0ffffff8,
+    .len  = 0x0fffffff,
+};
+
+static void test_addresses(const char *machine, const uint32_t base,
+                           const struct masks *expected)
+{
+    QTestState *s = qtest_init(machine);
+
+    /*
+     * Check command mode is zero, meaning engine is in direct access mode,
+     * as this affects the masking behavior of the HASH_SRC register.
+     */
+    g_assert_cmphex(qtest_readl(s, base + HACE_CMD), ==, 0);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0);
+
+
+    /* Check that the address masking is correct */
+    qtest_writel(s, base + HACE_HASH_SRC, 0xffffffff);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, expected->src);
+
+    qtest_writel(s, base + HACE_HASH_DIGEST, 0xffffffff);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, expected->dest);
+
+    qtest_writel(s, base + HACE_HASH_DATA_LEN, 0xffffffff);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, expected->len);
+
+    /* Reset to zero */
+    qtest_writel(s, base + HACE_HASH_SRC, 0);
+    qtest_writel(s, base + HACE_HASH_DIGEST, 0);
+    qtest_writel(s, base + HACE_HASH_DATA_LEN, 0);
+
+    /* Check that all bits are now zero */
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0);
+    g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0);
+
+    qtest_quit(s);
+}
+
+/* ast2600 */
+static void test_md5_ast2600(void)
+{
+    test_md5("-machine ast2600-evb", 0x1e6d0000, 0x80000000);
+}
+
+static void test_sha256_ast2600(void)
+{
+    test_sha256("-machine ast2600-evb", 0x1e6d0000, 0x80000000);
+}
+
+static void test_sha256_sg_ast2600(void)
+{
+    test_sha256_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000);
+}
+
+static void test_sha512_ast2600(void)
+{
+    test_sha512("-machine ast2600-evb", 0x1e6d0000, 0x80000000);
+}
+
+static void test_sha512_sg_ast2600(void)
+{
+    test_sha512_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000);
+}
+
+static void test_addresses_ast2600(void)
+{
+    test_addresses("-machine ast2600-evb", 0x1e6d0000, &ast2600_masks);
+}
+
+/* ast2500 */
+static void test_md5_ast2500(void)
+{
+    test_md5("-machine ast2500-evb", 0x1e6e3000, 0x80000000);
+}
+
+static void test_sha256_ast2500(void)
+{
+    test_sha256("-machine ast2500-evb", 0x1e6e3000, 0x80000000);
+}
+
+static void test_sha512_ast2500(void)
+{
+    test_sha512("-machine ast2500-evb", 0x1e6e3000, 0x80000000);
+}
+
+static void test_addresses_ast2500(void)
+{
+    test_addresses("-machine ast2500-evb", 0x1e6e3000, &ast2500_masks);
+}
+
+/* ast2400 */
+static void test_md5_ast2400(void)
+{
+    test_md5("-machine palmetto-bmc", 0x1e6e3000, 0x40000000);
+}
+
+static void test_sha256_ast2400(void)
+{
+    test_sha256("-machine palmetto-bmc", 0x1e6e3000, 0x40000000);
+}
+
+static void test_sha512_ast2400(void)
+{
+    test_sha512("-machine palmetto-bmc", 0x1e6e3000, 0x40000000);
+}
+
+static void test_addresses_ast2400(void)
+{
+    test_addresses("-machine palmetto-bmc", 0x1e6e3000, &ast2400_masks);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("ast2600/hace/addresses", test_addresses_ast2600);
+    qtest_add_func("ast2600/hace/sha512", test_sha512_ast2600);
+    qtest_add_func("ast2600/hace/sha256", test_sha256_ast2600);
+    qtest_add_func("ast2600/hace/md5", test_md5_ast2600);
+
+    qtest_add_func("ast2600/hace/sha512_sg", test_sha512_sg_ast2600);
+    qtest_add_func("ast2600/hace/sha256_sg", test_sha256_sg_ast2600);
+
+    qtest_add_func("ast2500/hace/addresses", test_addresses_ast2500);
+    qtest_add_func("ast2500/hace/sha512", test_sha512_ast2500);
+    qtest_add_func("ast2500/hace/sha256", test_sha256_ast2500);
+    qtest_add_func("ast2500/hace/md5", test_md5_ast2500);
+
+    qtest_add_func("ast2400/hace/addresses", test_addresses_ast2400);
+    qtest_add_func("ast2400/hace/sha512", test_sha512_ast2400);
+    qtest_add_func("ast2400/hace/sha256", test_sha256_ast2400);
+    qtest_add_func("ast2400/hace/md5", test_md5_ast2400);
+
+    return g_test_run();
+}
diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
new file mode 100644
index 00000000000..87b40a0ef18
--- /dev/null
+++ b/tests/qtest/aspeed_smc-test.c
@@ -0,0 +1,382 @@
+/*
+ * QTest testcase for the M25P80 Flash (Using the Aspeed SPI
+ * Controller)
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "libqtest-single.h"
+
+/*
+ * ASPEED SPI Controller registers
+ */
+#define R_CONF              0x00
+#define   CONF_ENABLE_W0       (1 << 16)
+#define R_CE_CTRL           0x04
+#define   CRTL_EXTENDED0       0  /* 32 bit addressing for SPI */
+#define R_CTRL0             0x10
+#define   CTRL_CE_STOP_ACTIVE  (1 << 2)
+#define   CTRL_READMODE        0x0
+#define   CTRL_FREADMODE       0x1
+#define   CTRL_WRITEMODE       0x2
+#define   CTRL_USERMODE        0x3
+
+#define ASPEED_FMC_BASE    0x1E620000
+#define ASPEED_FLASH_BASE  0x20000000
+
+/*
+ * Flash commands
+ */
+enum {
+    JEDEC_READ = 0x9f,
+    BULK_ERASE = 0xc7,
+    READ = 0x03,
+    PP = 0x02,
+    WREN = 0x6,
+    RESET_ENABLE = 0x66,
+    RESET_MEMORY = 0x99,
+    EN_4BYTE_ADDR = 0xB7,
+    ERASE_SECTOR = 0xd8,
+};
+
+#define FLASH_JEDEC         0x20ba19  /* n25q256a */
+#define FLASH_SIZE          (32 * 1024 * 1024)
+
+#define FLASH_PAGE_SIZE           256
+
+/*
+ * Use an explicit bswap for the values read/wrote to the flash region
+ * as they are BE and the Aspeed CPU is LE.
+ */
+static inline uint32_t make_be32(uint32_t data)
+{
+    return bswap32(data);
+}
+
+static void spi_conf(uint32_t value)
+{
+    uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
+
+    conf |= value;
+    writel(ASPEED_FMC_BASE + R_CONF, conf);
+}
+
+static void spi_conf_remove(uint32_t value)
+{
+    uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
+
+    conf &= ~value;
+    writel(ASPEED_FMC_BASE + R_CONF, conf);
+}
+
+static void spi_ce_ctrl(uint32_t value)
+{
+    uint32_t conf = readl(ASPEED_FMC_BASE + R_CE_CTRL);
+
+    conf |= value;
+    writel(ASPEED_FMC_BASE + R_CE_CTRL, conf);
+}
+
+static void spi_ctrl_setmode(uint8_t mode, uint8_t cmd)
+{
+    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+    ctrl &= ~(CTRL_USERMODE | 0xff << 16);
+    ctrl |= mode | (cmd << 16);
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void spi_ctrl_start_user(void)
+{
+    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+
+    ctrl &= ~CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void spi_ctrl_stop_user(void)
+{
+    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void flash_reset(void)
+{
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, RESET_ENABLE);
+    writeb(ASPEED_FLASH_BASE, RESET_MEMORY);
+    spi_ctrl_stop_user();
+
+    spi_conf_remove(CONF_ENABLE_W0);
+}
+
+static void test_read_jedec(void)
+{
+    uint32_t jedec = 0x0;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, JEDEC_READ);
+    jedec |= readb(ASPEED_FLASH_BASE) << 16;
+    jedec |= readb(ASPEED_FLASH_BASE) << 8;
+    jedec |= readb(ASPEED_FLASH_BASE);
+    spi_ctrl_stop_user();
+
+    flash_reset();
+
+    g_assert_cmphex(jedec, ==, FLASH_JEDEC);
+}
+
+static void read_page(uint32_t addr, uint32_t *page)
+{
+    int i;
+
+    spi_ctrl_start_user();
+
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, READ);
+    writel(ASPEED_FLASH_BASE, make_be32(addr));
+
+    /* Continuous read are supported */
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        page[i] = make_be32(readl(ASPEED_FLASH_BASE));
+    }
+    spi_ctrl_stop_user();
+}
+
+static void read_page_mem(uint32_t addr, uint32_t *page)
+{
+    int i;
+
+    /* move out USER mode to use direct reads from the AHB bus */
+    spi_ctrl_setmode(CTRL_READMODE, READ);
+
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        page[i] = make_be32(readl(ASPEED_FLASH_BASE + addr + i * 4));
+    }
+}
+
+static void test_erase_sector(void)
+{
+    uint32_t some_page_addr = 0x600 * FLASH_PAGE_SIZE;
+    uint32_t page[FLASH_PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, WREN);
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, ERASE_SECTOR);
+    writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
+    spi_ctrl_stop_user();
+
+    /* Previous page should be full of zeroes as backend is not
+     * initialized */
+    read_page(some_page_addr - FLASH_PAGE_SIZE, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0x0);
+    }
+
+    /* But this one was erased */
+    read_page(some_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+
+    flash_reset();
+}
+
+static void test_erase_all(void)
+{
+    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
+    uint32_t page[FLASH_PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    /* Check some random page. Should be full of zeroes as backend is
+     * not initialized */
+    read_page(some_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0x0);
+    }
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, WREN);
+    writeb(ASPEED_FLASH_BASE, BULK_ERASE);
+    spi_ctrl_stop_user();
+
+    /* Recheck that some random page */
+    read_page(some_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+
+    flash_reset();
+}
+
+static void test_write_page(void)
+{
+    uint32_t my_page_addr = 0x14000 * FLASH_PAGE_SIZE; /* beyond 16MB */
+    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
+    uint32_t page[FLASH_PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, WREN);
+    writeb(ASPEED_FLASH_BASE, PP);
+    writel(ASPEED_FLASH_BASE, make_be32(my_page_addr));
+
+    /* Fill the page with its own addresses */
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        writel(ASPEED_FLASH_BASE, make_be32(my_page_addr + i * 4));
+    }
+    spi_ctrl_stop_user();
+
+    /* Check what was written */
+    read_page(my_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
+    }
+
+    /* Check some other page. It should be full of 0xff */
+    read_page(some_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+
+    flash_reset();
+}
+
+static void test_read_page_mem(void)
+{
+    uint32_t my_page_addr = 0x14000 * FLASH_PAGE_SIZE; /* beyond 16MB */
+    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
+    uint32_t page[FLASH_PAGE_SIZE / 4];
+    int i;
+
+    /* Enable 4BYTE mode for controller. This is should be strapped by
+     * HW for CE0 anyhow.
+     */
+    spi_ce_ctrl(1 << CRTL_EXTENDED0);
+
+    /* Enable 4BYTE mode for flash. */
+    spi_conf(CONF_ENABLE_W0);
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    spi_ctrl_stop_user();
+    spi_conf_remove(CONF_ENABLE_W0);
+
+    /* Check what was written */
+    read_page_mem(my_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
+    }
+
+    /* Check some other page. It should be full of 0xff */
+    read_page_mem(some_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+
+    flash_reset();
+}
+
+static void test_write_page_mem(void)
+{
+    uint32_t my_page_addr = 0x15000 * FLASH_PAGE_SIZE;
+    uint32_t page[FLASH_PAGE_SIZE / 4];
+    int i;
+
+    /* Enable 4BYTE mode for controller. This is should be strapped by
+     * HW for CE0 anyhow.
+     */
+    spi_ce_ctrl(1 << CRTL_EXTENDED0);
+
+    /* Enable 4BYTE mode for flash. */
+    spi_conf(CONF_ENABLE_W0);
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, WREN);
+    spi_ctrl_stop_user();
+
+    /* move out USER mode to use direct writes to the AHB bus */
+    spi_ctrl_setmode(CTRL_WRITEMODE, PP);
+
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        writel(ASPEED_FLASH_BASE + my_page_addr + i * 4,
+               make_be32(my_page_addr + i * 4));
+    }
+
+    /* Check what was written */
+    read_page_mem(my_page_addr, page);
+    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
+    }
+
+    flash_reset();
+}
+
+static char tmp_path[] = "/tmp/qtest.m25p80.XXXXXX";
+
+int main(int argc, char **argv)
+{
+    int ret;
+    int fd;
+
+    g_test_init(&argc, &argv, NULL);
+
+    fd = mkstemp(tmp_path);
+    g_assert(fd >= 0);
+    ret = ftruncate(fd, FLASH_SIZE);
+    g_assert(ret == 0);
+    close(fd);
+
+    global_qtest = qtest_initf("-m 256 -machine palmetto-bmc "
+                               "-drive file=%s,format=raw,if=mtd",
+                               tmp_path);
+
+    qtest_add_func("/ast2400/smc/read_jedec", test_read_jedec);
+    qtest_add_func("/ast2400/smc/erase_sector", test_erase_sector);
+    qtest_add_func("/ast2400/smc/erase_all",  test_erase_all);
+    qtest_add_func("/ast2400/smc/write_page", test_write_page);
+    qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
+    qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
+
+    ret = g_test_run();
+
+    qtest_quit(global_qtest);
+    unlink(tmp_path);
+    return ret;
+}
diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 156d4174aa3..258874167ef 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -271,19 +271,28 @@ static void dump_aml_files(test_data *data, bool rebuild)
     }
 }
 
+static bool create_tmp_asl(AcpiSdtTable *sdt)
+{
+    GError *error = NULL;
+    gint fd;
+
+    fd = g_file_open_tmp("asl-XXXXXX.dsl", &sdt->asl_file, &error);
+    g_assert_no_error(error);
+    close(fd);
+
+    return false;
+}
+
 static bool load_asl(GArray *sdts, AcpiSdtTable *sdt)
 {
     AcpiSdtTable *temp;
     GError *error = NULL;
     GString *command_line = g_string_new(iasl);
-    gint fd;
     gchar *out, *out_err;
     gboolean ret;
     int i;
 
-    fd = g_file_open_tmp("asl-XXXXXX.dsl", &sdt->asl_file, &error);
-    g_assert_no_error(error);
-    close(fd);
+    create_tmp_asl(sdt);
 
     /* build command line */
     g_string_append_printf(command_line, " -p %s ", sdt->asl_file);
@@ -463,11 +472,20 @@ static void test_acpi_asl(test_data *data)
         err = load_asl(data->tables, sdt);
         asl = normalize_asl(sdt->asl);
 
-        exp_err = load_asl(exp_data.tables, exp_sdt);
-        exp_asl = normalize_asl(exp_sdt->asl);
+        /*
+         * If expected file is empty - it's likely that it was a stub just
+         * created for step 1 above: we do want to decompile the actual one.
+         */
+        if (exp_sdt->aml_len) {
+            exp_err = load_asl(exp_data.tables, exp_sdt);
+            exp_asl = normalize_asl(exp_sdt->asl);
+        } else {
+            exp_err = create_tmp_asl(exp_sdt);
+            exp_asl = g_string_new("");
+        }
 
         /* TODO: check for warnings */
-        g_assert(!err || exp_err);
+        g_assert(!err || exp_err || !exp_sdt->aml_len);
 
         if (g_strcmp0(asl->str, exp_asl->str)) {
             sdt->tmp_files_retain = true;
@@ -489,10 +507,14 @@ static void test_acpi_asl(test_data *data)
                                                  exp_sdt->asl_file, sdt->asl_file);
                     int out = dup(STDOUT_FILENO);
                     int ret G_GNUC_UNUSED;
+                    int dupret;
 
-                    dup2(STDERR_FILENO, STDOUT_FILENO);
+                    g_assert(out >= 0);
+                    dupret = dup2(STDERR_FILENO, STDOUT_FILENO);
+                    g_assert(dupret >= 0);
                     ret = system(diff) ;
-                    dup2(out, STDOUT_FILENO);
+                    dupret = dup2(out, STDOUT_FILENO);
+                    g_assert(dupret >= 0);
                     close(out);
                     g_free(diff);
                 }
@@ -718,13 +740,6 @@ static void test_acpi_one(const char *params, test_data *data)
     char *args;
     bool use_uefi = data->uefi_fl1 && data->uefi_fl2;
 
-#ifndef CONFIG_TCG
-    if (data->tcg_only) {
-        g_test_skip("TCG disabled, skipping ACPI tcg_only test");
-        return;
-    }
-#endif /* CONFIG_TCG */
-
     args = test_acpi_create_args(data, params, use_uefi);
     data->qts = qtest_init(args);
     test_acpi_load_tables(data, use_uefi);
@@ -855,6 +870,23 @@ static void test_acpi_q35_tcg_bridge(void)
     free_test_data(&data);
 }
 
+static void test_acpi_q35_multif_bridge(void)
+{
+    test_data data = {
+        .machine = MACHINE_Q35,
+        .variant = ".multi-bridge",
+    };
+    test_acpi_one("-device pcie-root-port,id=pcie-root-port-0,"
+                  "multifunction=on,"
+                  "port=0x0,chassis=1,addr=0x2,bus=pcie.0 "
+                  "-device pcie-root-port,id=pcie-root-port-1,"
+                  "port=0x1,chassis=2,addr=0x3.0x1,bus=pcie.0 "
+                  "-device virtio-balloon,id=balloon0,"
+                  "bus=pcie.0,addr=0x4.0x2",
+                  &data);
+    free_test_data(&data);
+}
+
 static void test_acpi_q35_tcg_mmio64(void)
 {
     test_data data = {
@@ -1029,6 +1061,19 @@ static void test_acpi_q35_tcg_numamem(void)
     free_test_data(&data);
 }
 
+static void test_acpi_q35_kvm_xapic(void)
+{
+    test_data data;
+
+    memset(&data, 0, sizeof(data));
+    data.machine = MACHINE_Q35;
+    data.variant = ".xapic";
+    test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M"
+                  " -numa node -numa node,memdev=ram0"
+                  " -machine kernel-irqchip=on -smp 1,maxcpus=288", &data);
+    free_test_data(&data);
+}
+
 static void test_acpi_q35_tcg_nosmm(void)
 {
     test_data data;
@@ -1073,6 +1118,30 @@ static void test_acpi_q35_tcg_nohpet(void)
     free_test_data(&data);
 }
 
+static void test_acpi_q35_kvm_dmar(void)
+{
+    test_data data;
+
+    memset(&data, 0, sizeof(data));
+    data.machine = MACHINE_Q35;
+    data.variant = ".dmar";
+    test_acpi_one("-machine kernel-irqchip=split -accel kvm"
+                  " -device intel-iommu,intremap=on,device-iotlb=on", &data);
+    free_test_data(&data);
+}
+
+static void test_acpi_q35_tcg_ivrs(void)
+{
+    test_data data;
+
+    memset(&data, 0, sizeof(data));
+    data.machine = MACHINE_Q35;
+    data.variant = ".ivrs";
+    data.tcg_only = true,
+    test_acpi_one(" -device amd-iommu", &data);
+    free_test_data(&data);
+}
+
 static void test_acpi_piix4_tcg_numamem(void)
 {
     test_data data;
@@ -1088,16 +1157,16 @@ static void test_acpi_piix4_tcg_numamem(void)
 uint64_t tpm_tis_base_addr;
 
 static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if,
-                              uint64_t base)
+                              uint64_t base, enum TPMVersion tpm_version)
 {
-#ifdef CONFIG_TPM
     gchar *tmp_dir_name = g_strdup_printf("qemu-test_acpi_%s_tcg_%s.XXXXXX",
                                           machine, tpm_if);
     char *tmp_path = g_dir_make_tmp(tmp_dir_name, NULL);
-    TestState test;
+    TPMTestState test;
     test_data data;
     GThread *thread;
-    char *args, *variant = g_strdup_printf(".%s", tpm_if);
+    const char *suffix = tpm_version == TPM_VERSION_2_0 ? "tpm2" : "tpm12";
+    char *args, *variant = g_strdup_printf(".%s.%s", tpm_if, suffix);
 
     tpm_tis_base_addr = base;
 
@@ -1109,6 +1178,7 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if,
     g_mutex_init(&test.data_mutex);
     g_cond_init(&test.data_cond);
     test.data_cond_signal = false;
+    test.tpm_version = tpm_version;
 
     thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test);
     tpm_emu_test_wait_cond(&test);
@@ -1134,14 +1204,16 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if,
     g_free(tmp_dir_name);
     g_free(args);
     free_test_data(&data);
-#else
-    g_test_skip("TPM disabled");
-#endif
 }
 
-static void test_acpi_q35_tcg_tpm_tis(void)
+static void test_acpi_q35_tcg_tpm2_tis(void)
+{
+    test_acpi_tcg_tpm("q35", "tis", 0xFED40000, TPM_VERSION_2_0);
+}
+
+static void test_acpi_q35_tcg_tpm12_tis(void)
 {
-    test_acpi_tcg_tpm("q35", "tis", 0xFED40000);
+    test_acpi_tcg_tpm("q35", "tis", 0xFED40000, TPM_VERSION_1_2);
 }
 
 static void test_acpi_tcg_dimm_pxm(const char *machine)
@@ -1386,9 +1458,6 @@ static void test_acpi_virt_tcg(void)
         .scan_len = 128ULL * 1024 * 1024,
     };
 
-    test_acpi_one("-cpu cortex-a57", &data);
-    free_test_data(&data);
-
     data.smbios_cpu_max_speed = 2900;
     data.smbios_cpu_curr_speed = 2700;
     test_acpi_one("-cpu cortex-a57 "
@@ -1502,6 +1571,8 @@ static void test_acpi_oem_fields_virt(void)
 int main(int argc, char *argv[])
 {
     const char *arch = qtest_get_arch();
+    const bool has_kvm = qtest_has_accel("kvm");
+    const bool has_tcg = qtest_has_accel("tcg");
     int ret;
 
     g_test_init(&argc, &argv, NULL);
@@ -1512,7 +1583,10 @@ int main(int argc, char *argv[])
             return ret;
         }
         qtest_add_func("acpi/q35/oem-fields", test_acpi_oem_fields_q35);
-        qtest_add_func("acpi/q35/tpm-tis", test_acpi_q35_tcg_tpm_tis);
+        if (tpm_model_is_available("-machine q35", "tpm-tis")) {
+            qtest_add_func("acpi/q35/tpm2-tis", test_acpi_q35_tcg_tpm2_tis);
+            qtest_add_func("acpi/q35/tpm12-tis", test_acpi_q35_tcg_tpm12_tis);
+        }
         qtest_add_func("acpi/piix4", test_acpi_piix4_tcg);
         qtest_add_func("acpi/oem-fields", test_acpi_oem_fields_pc);
         qtest_add_func("acpi/piix4/bridge", test_acpi_piix4_tcg_bridge);
@@ -1524,6 +1598,7 @@ int main(int argc, char *argv[])
                        test_acpi_piix4_no_acpi_pci_hotplug);
         qtest_add_func("acpi/q35", test_acpi_q35_tcg);
         qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge);
+        qtest_add_func("acpi/q35/multif-bridge", test_acpi_q35_multif_bridge);
         qtest_add_func("acpi/q35/mmio64", test_acpi_q35_tcg_mmio64);
         qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi);
         qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi);
@@ -1554,15 +1629,24 @@ int main(int argc, char *argv[])
         qtest_add_func("acpi/microvm/rtc", test_acpi_microvm_rtc_tcg);
         qtest_add_func("acpi/microvm/ioapic2", test_acpi_microvm_ioapic2_tcg);
         qtest_add_func("acpi/microvm/oem-fields", test_acpi_oem_fields_microvm);
-        if (strcmp(arch, "x86_64") == 0) {
-            qtest_add_func("acpi/microvm/pcie", test_acpi_microvm_pcie_tcg);
+        if (has_tcg) {
+            qtest_add_func("acpi/q35/ivrs", test_acpi_q35_tcg_ivrs);
+            if (strcmp(arch, "x86_64") == 0) {
+                qtest_add_func("acpi/microvm/pcie", test_acpi_microvm_pcie_tcg);
+            }
+        }
+        if (has_kvm) {
+            qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic);
+            qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar);
         }
     } else if (strcmp(arch, "aarch64") == 0) {
-        qtest_add_func("acpi/virt", test_acpi_virt_tcg);
-        qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
-        qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp);
-        qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb);
-        qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt);
+        if (has_tcg) {
+            qtest_add_func("acpi/virt", test_acpi_virt_tcg);
+            qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem);
+            qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp);
+            qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb);
+            qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt);
+        }
     }
     ret = g_test_run();
     boot_sector_cleanup(disk);
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
index d74509b1c57..83828ba2707 100644
--- a/tests/qtest/boot-serial-test.c
+++ b/tests/qtest/boot-serial-test.c
@@ -61,13 +61,6 @@ static const uint8_t kernel_plml605[] = {
     0xfc, 0xff, 0x00, 0xb8                  /* bri   -4  loop */
 };
 
-static const uint8_t bios_moxiesim[] = {
-    0x20, 0x10, 0x00, 0x00, 0x03, 0xf8,     /* ldi.s r1,0x3f8 */
-    0x1b, 0x20, 0x00, 0x00, 0x00, 0x54,     /* ldi.b r2,'T' */
-    0x1e, 0x12,                             /* st.b  r1,r2 */
-    0x1a, 0x00, 0x00, 0x00, 0x10, 0x00      /* jmpa  0x1000 */
-};
-
 static const uint8_t bios_raspi2[] = {
     0x08, 0x30, 0x9f, 0xe5,                 /* ldr   r3,[pc,#8]    Get base */
     0x54, 0x20, 0xa0, 0xe3,                 /* mov     r2,#'T' */
@@ -101,6 +94,41 @@ static const uint8_t kernel_nrf51[] = {
     0x1c, 0x25, 0x00, 0x40                  /* 0x4000251c = UART TXD */
 };
 
+static const uint8_t kernel_stm32vldiscovery[] = {
+    0x00, 0x00, 0x00, 0x00,                 /* Stack top address */
+    0x1d, 0x00, 0x00, 0x00,                 /* Reset handler address */
+    0x00, 0x00, 0x00, 0x00,                 /* NMI */
+    0x00, 0x00, 0x00, 0x00,                 /* Hard fault */
+    0x00, 0x00, 0x00, 0x00,                 /* Memory management fault */
+    0x00, 0x00, 0x00, 0x00,                 /* Bus fault */
+    0x00, 0x00, 0x00, 0x00,                 /* Usage fault */
+    0x0b, 0x4b,                             /* ldr  r3, [pc, #44] Get RCC */
+    0x44, 0xf2, 0x04, 0x02,                 /* movw r2, #16388 */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0x0a, 0x4b,                             /* ldr  r3, [pc, #40] Get GPIOA */
+    0x1a, 0x68,                             /* ldr  r2, [r3] */
+    0x22, 0xf0, 0xf0, 0x02,                 /* bic  r2, r2, #240 */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0x1a, 0x68,                             /* ldr  r2, [r3] */
+    0x42, 0xf0, 0xb0, 0x02,                 /* orr  r2, r2, #176 */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0x07, 0x4b,                             /* ldr  r3, [pc, #26] Get BAUD */
+    0x45, 0x22,                             /* movs r2, #69 */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0x06, 0x4b,                             /* ldr  r3, [pc, #24] Get ENABLE */
+    0x42, 0xf2, 0x08, 0x02,                 /* movw r2, #8200 */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0x05, 0x4b,                             /* ldr  r3, [pc, #20] Get TXD */
+    0x54, 0x22,                             /* movs r2, 'T' */
+    0x1a, 0x60,                             /* str  r2, [r3] */
+    0xfe, 0xe7,                             /* b    . */
+    0x18, 0x10, 0x02, 0x40,                 /* 0x40021018 = RCC */
+    0x04, 0x08, 0x01, 0x40,                 /* 0x40010804 = GPIOA */
+    0x08, 0x38, 0x01, 0x40,                 /* 0x40013808 = USART1 BAUD */
+    0x0c, 0x38, 0x01, 0x40,                 /* 0x4001380c = USART1 ENABLE */
+    0x04, 0x38, 0x01, 0x40                  /* 0x40013804 = USART1 TXD */
+};
+
 typedef struct testdef {
     const char *arch;       /* Target architecture */
     const char *machine;    /* Name of the machine */
@@ -145,13 +173,14 @@ static testdef_t tests[] = {
       sizeof(kernel_pls3adsp1800), kernel_pls3adsp1800 },
     { "microblazeel", "petalogix-ml605", "", "TT",
       sizeof(kernel_plml605), kernel_plml605 },
-    { "moxie", "moxiesim", "", "TT", sizeof(bios_moxiesim), 0, bios_moxiesim },
-    { "arm", "raspi2", "", "TT", sizeof(bios_raspi2), 0, bios_raspi2 },
+    { "arm", "raspi2b", "", "TT", sizeof(bios_raspi2), 0, bios_raspi2 },
     /* For hppa, force bios to output to serial by disabling graphics. */
     { "hppa", "hppa", "-vga none", "SeaBIOS wants SYSTEM HALT" },
     { "aarch64", "virt", "-cpu max", "TT", sizeof(kernel_aarch64),
       kernel_aarch64 },
     { "arm", "microbit", "", "T", sizeof(kernel_nrf51), kernel_nrf51 },
+    { "arm", "stm32vldiscovery", "", "T",
+      sizeof(kernel_stm32vldiscovery), kernel_stm32vldiscovery },
 
     { NULL }
 };
diff --git a/tests/qtest/e1000e-test.c b/tests/qtest/e1000e-test.c
index fc226fdfeb5..0273fe4c156 100644
--- a/tests/qtest/e1000e-test.c
+++ b/tests/qtest/e1000e-test.c
@@ -93,7 +93,8 @@ static void e1000e_send_verify(QE1000E *d, int *test_sockets, QGuestAllocator *a
     /* Check data sent to the backend */
     ret = qemu_recv(test_sockets[0], &recv_len, sizeof(recv_len), 0);
     g_assert_cmpint(ret, == , sizeof(recv_len));
-    qemu_recv(test_sockets[0], buffer, 64, 0);
+    ret = qemu_recv(test_sockets[0], buffer, 64, 0);
+    g_assert_cmpint(ret, >=, 5);
     g_assert_cmpstr(buffer, == , "TEST");
 
     /* Free test data buffer */
diff --git a/tests/qtest/emc141x-test.c b/tests/qtest/emc141x-test.c
index 714058806a7..8c866940916 100644
--- a/tests/qtest/emc141x-test.c
+++ b/tests/qtest/emc141x-test.c
@@ -11,7 +11,7 @@
 #include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "qapi/qmp/qdict.h"
-#include "hw/misc/emc141x_regs.h"
+#include "hw/sensor/emc141x_regs.h"
 
 #define EMC1414_TEST_ID   "emc1414-test"
 
diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
index 26b69f7c5cd..8f6eee84a47 100644
--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
@@ -32,6 +32,9 @@
 /* TODO actually test the results and get rid of this */
 #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__))
 
+#define DRIVE_FLOPPY_BLANK \
+    "-drive if=floppy,file=null-co://,file.read-zeroes=on,format=raw,size=1440k"
+
 #define TEST_IMAGE_SIZE 1440 * 1024
 
 #define FLOPPY_BASE 0x3f0
@@ -546,6 +549,40 @@ static void fuzz_registers(void)
     }
 }
 
+static bool qtest_check_clang_sanitizer(void)
+{
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+    return true;
+#else
+    g_test_skip("QEMU not configured using --enable-sanitizers");
+    return false;
+#endif
+}
+static void test_cve_2021_20196(void)
+{
+    QTestState *s;
+
+    if (!qtest_check_clang_sanitizer()) {
+        return;
+    }
+
+    s = qtest_initf("-nographic -m 32M -nodefaults " DRIVE_FLOPPY_BLANK);
+
+    qtest_outw(s, 0x3f4, 0x0500);
+    qtest_outb(s, 0x3f5, 0x00);
+    qtest_outb(s, 0x3f5, 0x00);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outb(s, 0x3f5, 0x00);
+    qtest_outw(s, 0x3f1, 0x0400);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outb(s, 0x3f5, 0x00);
+    qtest_outb(s, 0x3f5, 0x01);
+    qtest_outw(s, 0x3f1, 0x0500);
+    qtest_outb(s, 0x3f5, 0x00);
+    qtest_quit(s);
+}
+
 int main(int argc, char **argv)
 {
     int fd;
@@ -576,6 +613,7 @@ int main(int argc, char **argv)
     qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18);
     qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
     qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
+    qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196);
 
     ret = g_test_run();
 
diff --git a/tests/qtest/fuzz-sb16-test.c b/tests/qtest/fuzz-sb16-test.c
new file mode 100644
index 00000000000..f47a8bcdbd9
--- /dev/null
+++ b/tests/qtest/fuzz-sb16-test.c
@@ -0,0 +1,69 @@
+/*
+ * QTest fuzzer-generated testcase for sb16 audio device
+ *
+ * Copyright (c) 2021 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "libqos/libqtest.h"
+
+/*
+ * This used to trigger the assert in audio_calloc
+ * https://bugs.launchpad.net/qemu/+bug/1910603
+ */
+static void test_fuzz_sb16_0x1c(void)
+{
+    QTestState *s = qtest_init("-M q35 -display none "
+                               "-device sb16,audiodev=snd0 "
+                               "-audiodev none,id=snd0");
+    qtest_outw(s, 0x22c, 0x41);
+    qtest_outb(s, 0x22c, 0x00);
+    qtest_outw(s, 0x22c, 0x1004);
+    qtest_outw(s, 0x22c, 0x001c);
+    qtest_quit(s);
+}
+
+static void test_fuzz_sb16_0x91(void)
+{
+    QTestState *s = qtest_init("-M pc -display none "
+                               "-device sb16,audiodev=none "
+                               "-audiodev id=none,driver=none");
+    qtest_outw(s, 0x22c, 0xf141);
+    qtest_outb(s, 0x22c, 0x00);
+    qtest_outb(s, 0x22c, 0x24);
+    qtest_outb(s, 0x22c, 0x91);
+    qtest_quit(s);
+}
+
+/*
+ * This used to trigger the assert in audio_calloc
+ * through command 0xd4
+ */
+static void test_fuzz_sb16_0xd4(void)
+{
+    QTestState *s = qtest_init("-M pc -display none "
+                               "-device sb16,audiodev=none "
+                               "-audiodev id=none,driver=none");
+    qtest_outb(s, 0x22c, 0x41);
+    qtest_outb(s, 0x22c, 0x00);
+    qtest_outb(s, 0x22c, 0x14);
+    qtest_outb(s, 0x22c, 0xd4);
+    qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+    const char *arch = qtest_get_arch();
+
+    g_test_init(&argc, &argv, NULL);
+
+   if (strcmp(arch, "i386") == 0) {
+        qtest_add_func("fuzz/test_fuzz_sb16/1c", test_fuzz_sb16_0x1c);
+        qtest_add_func("fuzz/test_fuzz_sb16/91", test_fuzz_sb16_0x91);
+        qtest_add_func("fuzz/test_fuzz_sb16/d4", test_fuzz_sb16_0xd4);
+   }
+
+   return g_test_run();
+}
diff --git a/tests/qtest/fuzz-sdcard-test.c b/tests/qtest/fuzz-sdcard-test.c
new file mode 100644
index 00000000000..ae14305344a
--- /dev/null
+++ b/tests/qtest/fuzz-sdcard-test.c
@@ -0,0 +1,102 @@
+/*
+ * QTest fuzzer-generated testcase for sdcard device
+ *
+ * Copyright (c) 2021 Philippe Mathieu-Daudé <f4bug@amsat.org>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "libqos/libqtest.h"
+
+/*
+ * https://gitlab.com/qemu-project/qemu/-/issues/450
+ * Used to trigger:
+ *  Assertion `wpnum < sd->wpgrps_size' failed.
+ */
+static void oss_fuzz_29225(void)
+{
+    QTestState *s;
+
+    s = qtest_init(" -display none -m 512m -nodefaults -nographic"
+                   " -device sdhci-pci,sd-spec-version=3"
+                   " -device sd-card,drive=d0"
+                   " -drive if=none,index=0,file=null-co://,format=raw,id=d0");
+
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_outl(s, 0xcfc, 0xd0690);
+    qtest_outl(s, 0xcf8, 0x80001003);
+    qtest_outl(s, 0xcf8, 0x80001013);
+    qtest_outl(s, 0xcfc, 0xffffffff);
+    qtest_outl(s, 0xcf8, 0x80001003);
+    qtest_outl(s, 0xcfc, 0x3effe00);
+
+    qtest_bufwrite(s, 0xff0d062c, "\xff", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\xb7", 0x1);
+    qtest_bufwrite(s, 0xff0d060a, "\xc9", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\x29", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\xc2", 0x1);
+    qtest_bufwrite(s, 0xff0d0628, "\xf7", 0x1);
+    qtest_bufwrite(s, 0x0, "\xe3", 0x1);
+    qtest_bufwrite(s, 0x7, "\x13", 0x1);
+    qtest_bufwrite(s, 0x8, "\xe3", 0x1);
+    qtest_bufwrite(s, 0xf, "\xe3", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\x03", 0x1);
+    qtest_bufwrite(s, 0xff0d0605, "\x01", 0x1);
+    qtest_bufwrite(s, 0xff0d060b, "\xff", 0x1);
+    qtest_bufwrite(s, 0xff0d060c, "\xff", 0x1);
+    qtest_bufwrite(s, 0xff0d060e, "\xff", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\x06", 0x1);
+    qtest_bufwrite(s, 0xff0d060f, "\x9e", 0x1);
+
+    qtest_quit(s);
+}
+
+/*
+ * https://gitlab.com/qemu-project/qemu/-/issues/495
+ * Used to trigger:
+ *  Assertion `wpnum < sd->wpgrps_size' failed.
+ */
+static void oss_fuzz_36217(void)
+{
+    QTestState *s;
+
+    s = qtest_init(" -display none -m 32 -nodefaults -nographic"
+                   " -device sdhci-pci,sd-spec-version=3 "
+                   "-device sd-card,drive=d0 "
+                   "-drive if=none,index=0,file=null-co://,format=raw,id=d0");
+
+    qtest_outl(s, 0xcf8, 0x80001010);
+    qtest_outl(s, 0xcfc, 0xe0000000);
+    qtest_outl(s, 0xcf8, 0x80001004);
+    qtest_outw(s, 0xcfc, 0x02);
+    qtest_bufwrite(s, 0xe000002c, "\x05", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x37", 0x1);
+    qtest_bufwrite(s, 0xe000000a, "\x01", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x29", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x02", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x03", 0x1);
+    qtest_bufwrite(s, 0xe0000005, "\x01", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x06", 0x1);
+    qtest_bufwrite(s, 0xe000000c, "\x05", 0x1);
+    qtest_bufwrite(s, 0xe000000e, "\x20", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x08", 0x1);
+    qtest_bufwrite(s, 0xe000000b, "\x3d", 0x1);
+    qtest_bufwrite(s, 0xe000000f, "\x1e", 0x1);
+
+    qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+    const char *arch = qtest_get_arch();
+
+    g_test_init(&argc, &argv, NULL);
+
+   if (strcmp(arch, "i386") == 0) {
+        qtest_add_func("fuzz/sdcard/oss_fuzz_29225", oss_fuzz_29225);
+        qtest_add_func("fuzz/sdcard/oss_fuzz_36217", oss_fuzz_36217);
+   }
+
+   return g_test_run();
+}
diff --git a/tests/qtest/fuzz-xlnx-dp-test.c b/tests/qtest/fuzz-xlnx-dp-test.c
new file mode 100644
index 00000000000..69eb6c0eb10
--- /dev/null
+++ b/tests/qtest/fuzz-xlnx-dp-test.c
@@ -0,0 +1,33 @@
+/*
+ * QTest fuzzer-generated testcase for xlnx-dp display device
+ *
+ * Copyright (c) 2021 Qiang Liu <cyruscyliu@gmail.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "libqos/libqtest.h"
+
+/*
+ * This used to trigger the out-of-bounds read in xlnx_dp_read
+ */
+static void test_fuzz_xlnx_dp_0x3ac(void)
+{
+    QTestState *s = qtest_init("-M xlnx-zcu102 -display none ");
+    qtest_readl(s, 0xfd4a03ac);
+    qtest_quit(s);
+}
+
+int main(int argc, char **argv)
+{
+    const char *arch = qtest_get_arch();
+
+    g_test_init(&argc, &argv, NULL);
+
+   if (strcmp(arch, "aarch64") == 0) {
+        qtest_add_func("fuzz/test_fuzz_xlnx_dp/3ac", test_fuzz_xlnx_dp_0x3ac);
+   }
+
+   return g_test_run();
+}
diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c
index 496d11a2313..5f77c849837 100644
--- a/tests/qtest/fuzz/fuzz.c
+++ b/tests/qtest/fuzz/fuzz.c
@@ -16,9 +16,9 @@
 #include <wordexp.h>
 
 #include "qemu/datadir.h"
+#include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "sysemu/runstate.h"
-#include "sysemu/sysemu.h"
 #include "qemu/main-loop.h"
 #include "qemu/rcu.h"
 #include "tests/qtest/libqos/libqtest.h"
diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
index ae219540b42..dd7e25851cb 100644
--- a/tests/qtest/fuzz/generic_fuzz.c
+++ b/tests/qtest/fuzz/generic_fuzz.c
@@ -19,11 +19,9 @@
 #include "tests/qtest/libqos/pci-pc.h"
 #include "fuzz.h"
 #include "fork_fuzz.h"
-#include "exec/address-spaces.h"
 #include "string.h"
 #include "exec/memory.h"
 #include "exec/ramblock.h"
-#include "exec/address-spaces.h"
 #include "hw/qdev-core.h"
 #include "hw/pci/pci.h"
 #include "hw/boards.h"
@@ -242,10 +240,17 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr)
                                       addr, &addr1, &l, true,
                                       MEMTXATTRS_UNSPECIFIED);
 
-        if (!(memory_region_is_ram(mr1) ||
-              memory_region_is_romd(mr1)) && mr1 != sparse_mem_mr) {
+        /*
+         *  If mr1 isn't RAM, address_space_translate doesn't update l. Use
+         *  memory_access_size to identify the number of bytes that it is safe
+         *  to write without accidentally writing to another MemoryRegion.
+         */
+        if (!memory_region_is_ram(mr1)) {
             l = memory_access_size(mr1, l, addr1);
-        } else {
+        }
+        if (memory_region_is_ram(mr1) ||
+            memory_region_is_romd(mr1) ||
+            mr1 == sparse_mem_mr) {
             /* ROM/RAM case */
             if (qtest_log_enabled) {
                 /*
@@ -663,31 +668,41 @@ static void generic_fuzz(QTestState *s, const unsigned char *Data, size_t Size)
     uint8_t op;
 
     if (fork() == 0) {
+        struct sigaction sact;
+        struct itimerval timer;
+        sigset_t set;
         /*
          * Sometimes the fuzzer will find inputs that take quite a long time to
          * process. Often times, these inputs do not result in new coverage.
          * Even if these inputs might be interesting, they can slow down the
-         * fuzzer, overall. Set a timeout to avoid hurting performance, too much
+         * fuzzer, overall. Set a timeout for each command to avoid hurting
+         * performance, too much
          */
         if (timeout) {
-            struct sigaction sact;
-            struct itimerval timer;
 
             sigemptyset(&sact.sa_mask);
             sact.sa_flags   = SA_NODEFER;
             sact.sa_handler = handle_timeout;
             sigaction(SIGALRM, &sact, NULL);
 
+            sigemptyset(&set);
+            sigaddset(&set, SIGALRM);
+            pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
             memset(&timer, 0, sizeof(timer));
             timer.it_value.tv_sec = timeout / USEC_IN_SEC;
             timer.it_value.tv_usec = timeout % USEC_IN_SEC;
-            setitimer(ITIMER_VIRTUAL, &timer, NULL);
         }
 
         op_clear_dma_patterns(s, NULL, 0);
         pci_disabled = false;
 
         while (cmd && Size) {
+            /* Reset the timeout, each time we run a new command */
+            if (timeout) {
+                setitimer(ITIMER_REAL, &timer, NULL);
+            }
+
             /* Get the length until the next command or end of input */
             nextcmd = memmem(cmd, Size, SEPARATOR, strlen(SEPARATOR));
             cmd_len = nextcmd ? nextcmd - cmd : Size;
@@ -748,8 +763,13 @@ static int locate_fuzz_memory_regions(Object *child, void *opaque)
 
 static int locate_fuzz_objects(Object *child, void *opaque)
 {
+    GString *type_name;
+    GString *path_name;
     char *pattern = opaque;
-    if (g_pattern_match_simple(pattern, object_get_typename(child))) {
+
+    type_name = g_string_new(object_get_typename(child));
+    g_string_ascii_down(type_name);
+    if (g_pattern_match_simple(pattern, type_name->str)) {
         /* Find and save ptrs to any child MemoryRegions */
         object_child_foreach_recursive(child, locate_fuzz_memory_regions, NULL);
 
@@ -766,8 +786,9 @@ static int locate_fuzz_objects(Object *child, void *opaque)
             g_ptr_array_add(fuzzable_pci_devices, PCI_DEVICE(child));
         }
     } else if (object_dynamic_cast(OBJECT(child), TYPE_MEMORY_REGION)) {
-        if (g_pattern_match_simple(pattern,
-            object_get_canonical_path_component(child))) {
+        path_name = g_string_new(object_get_canonical_path_component(child));
+        g_string_ascii_down(path_name);
+        if (g_pattern_match_simple(pattern, path_name->str)) {
             MemoryRegion *mr;
             mr = MEMORY_REGION(child);
             if ((memory_region_is_ram(mr) ||
@@ -776,7 +797,9 @@ static int locate_fuzz_objects(Object *child, void *opaque)
                 g_hash_table_insert(fuzzable_memoryregions, mr, (gpointer)true);
             }
         }
+        g_string_free(path_name, true);
     }
+    g_string_free(type_name, true);
     return 0;
 }
 
@@ -804,6 +827,7 @@ static void generic_pre_fuzz(QTestState *s)
     MemoryRegion *mr;
     QPCIBus *pcibus;
     char **result;
+    GString *name_pattern;
 
     if (!getenv("QEMU_FUZZ_OBJECTS")) {
         usage();
@@ -833,19 +857,26 @@ static void generic_pre_fuzz(QTestState *s)
 
     result = g_strsplit(getenv("QEMU_FUZZ_OBJECTS"), " ", -1);
     for (int i = 0; result[i] != NULL; i++) {
+        name_pattern = g_string_new(result[i]);
+        /*
+         * Make the pattern lowercase. We do the same for all the MemoryRegion
+         * and Type names so the configs are case-insensitive.
+         */
+        g_string_ascii_down(name_pattern);
         printf("Matching objects by name %s\n", result[i]);
         object_child_foreach_recursive(qdev_get_machine(),
                                     locate_fuzz_objects,
-                                    result[i]);
+                                    name_pattern->str);
+        g_string_free(name_pattern, true);
     }
     g_strfreev(result);
     printf("This process will try to fuzz the following MemoryRegions:\n");
 
     g_hash_table_iter_init(&iter, fuzzable_memoryregions);
     while (g_hash_table_iter_next(&iter, (gpointer)&mr, NULL)) {
-        printf("  * %s (size %lx)\n",
+        printf("  * %s (size 0x%" PRIx64 ")\n",
                object_get_canonical_path_component(&(mr->parent_obj)),
-               (uint64_t)mr->size);
+               memory_region_size(mr));
     }
 
     if (!g_hash_table_size(fuzzable_memoryregions)) {
diff --git a/tests/qtest/fuzz/meson.build b/tests/qtest/fuzz/meson.build
index 8af6848cd57..189901d4a26 100644
--- a/tests/qtest/fuzz/meson.build
+++ b/tests/qtest/fuzz/meson.build
@@ -1,3 +1,7 @@
+if not get_option('fuzzing')
+  subdir_done()
+endif
+
 specific_fuzz_ss.add(files('fuzz.c', 'fork_fuzz.c', 'qos_fuzz.c',
                            'qtest_wrappers.c'), qos)
 
@@ -9,7 +13,7 @@ specific_fuzz_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio_blk_fuzz.
 specific_fuzz_ss.add(files('generic_fuzz.c'))
 
 fork_fuzz = declare_dependency(
-  link_args: config_host['FUZZ_EXE_LDFLAGS'].split() +
+  link_args: fuzz_exe_ldflags +
              ['-Wl,-wrap,qtest_inb',
               '-Wl,-wrap,qtest_inw',
               '-Wl,-wrap,qtest_inl',
diff --git a/tests/qtest/fuzz/qos_fuzz.c b/tests/qtest/fuzz/qos_fuzz.c
index cee1a2a60f6..7a244c951e5 100644
--- a/tests/qtest/fuzz/qos_fuzz.c
+++ b/tests/qtest/fuzz/qos_fuzz.c
@@ -21,8 +21,6 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "exec/memory.h"
-#include "exec/address-spaces.h"
-#include "sysemu/sysemu.h"
 #include "qemu/main-loop.h"
 
 #include "tests/qtest/libqos/libqtest.h"
diff --git a/tests/qtest/fuzz/qos_fuzz.h b/tests/qtest/fuzz/qos_fuzz.h
index 477f11b02b2..63d8459b71e 100644
--- a/tests/qtest/fuzz/qos_fuzz.h
+++ b/tests/qtest/fuzz/qos_fuzz.h
@@ -10,8 +10,8 @@
  * See the COPYING file in the top-level directory.
  */
 
-#ifndef _QOS_FUZZ_H_
-#define _QOS_FUZZ_H_
+#ifndef QOS_FUZZ_H
+#define QOS_FUZZ_H
 
 #include "tests/qtest/fuzz/fuzz.h"
 #include "tests/qtest/libqos/qgraph.h"
diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c
index f7b7cfbc2d1..113126ae06c 100644
--- a/tests/qtest/hd-geo-test.c
+++ b/tests/qtest/hd-geo-test.c
@@ -464,7 +464,7 @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
     }
 
     fd = mkstemp(raw_path);
-    g_assert(fd);
+    g_assert(fd >= 0);
     close(fd);
 
     fd = open(raw_path, O_WRONLY);
@@ -474,7 +474,7 @@ static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
     close(fd);
 
     fd = mkstemp(qcow2_path);
-    g_assert(fd);
+    g_assert(fd >= 0);
     close(fd);
 
     qemu_img_path = getenv("QTEST_QEMU_IMG");
diff --git a/tests/qtest/ipmi-bt-test.c b/tests/qtest/ipmi-bt-test.c
index a42207d416f..19612e9405a 100644
--- a/tests/qtest/ipmi-bt-test.c
+++ b/tests/qtest/ipmi-bt-test.c
@@ -98,7 +98,8 @@ static void bt_wait_b_busy(void)
 {
     unsigned int count = 1000;
     while (IPMI_BT_CTLREG_GET_B_BUSY() != 0) {
-        g_assert(--count != 0);
+        --count;
+        g_assert(count != 0);
         usleep(100);
     }
 }
@@ -107,7 +108,8 @@ static void bt_wait_b2h_atn(void)
 {
     unsigned int count = 1000;
     while (IPMI_BT_CTLREG_GET_B2H_ATN() == 0) {
-        g_assert(--count != 0);
+        --count;
+        g_assert(count != 0);
         usleep(100);
     }
 }
@@ -376,7 +378,7 @@ static void test_enable_irq(void)
  */
 static void open_socket(void)
 {
-    struct sockaddr_in myaddr;
+    struct sockaddr_in myaddr = {};
     socklen_t addrlen;
 
     myaddr.sin_family = AF_INET;
diff --git a/tests/qtest/ipmi-kcs-test.c b/tests/qtest/ipmi-kcs-test.c
index fc0a918c8d1..afc24dd3e46 100644
--- a/tests/qtest/ipmi-kcs-test.c
+++ b/tests/qtest/ipmi-kcs-test.c
@@ -73,7 +73,8 @@ static void kcs_wait_ibf(void)
 {
     unsigned int count = 1000;
     while (IPMI_KCS_CMDREG_GET_IBF() != 0) {
-        g_assert(--count != 0);
+        --count;
+        g_assert(count != 0);
     }
 }
 
diff --git a/tests/qtest/libqos/arm-raspi2-machine.c b/tests/qtest/libqos/arm-raspi2-machine.c
index 35bb4709a45..09ca863c103 100644
--- a/tests/qtest/libqos/arm-raspi2-machine.c
+++ b/tests/qtest/libqos/arm-raspi2-machine.c
@@ -42,7 +42,7 @@ static void *raspi2_get_driver(void *object, const char *interface)
         return &machine->alloc;
     }
 
-    fprintf(stderr, "%s not present in arm/raspi2\n", interface);
+    fprintf(stderr, "%s not present in arm/raspi2b\n", interface);
     g_assert_not_reached();
 }
 
@@ -53,7 +53,7 @@ static QOSGraphObject *raspi2_get_device(void *obj, const char *device)
         return &machine->sdhci.obj;
     }
 
-    fprintf(stderr, "%s not present in arm/raspi2\n", device);
+    fprintf(stderr, "%s not present in arm/raspi2b\n", device);
     g_assert_not_reached();
 }
 
@@ -85,8 +85,8 @@ static void *qos_create_machine_arm_raspi2(QTestState *qts)
 
 static void raspi2_register_nodes(void)
 {
-    qos_node_create_machine("arm/raspi2", qos_create_machine_arm_raspi2);
-    qos_node_contains("arm/raspi2", "generic-sdhci", NULL);
+    qos_node_create_machine("arm/raspi2b", qos_create_machine_arm_raspi2);
+    qos_node_contains("arm/raspi2b", "generic-sdhci", NULL);
 }
 
 libqos_init(raspi2_register_nodes);
diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
index a68dcd79d44..59e92711956 100644
--- a/tests/qtest/libqos/libqtest.h
+++ b/tests/qtest/libqos/libqtest.h
@@ -588,6 +588,14 @@ bool qtest_big_endian(QTestState *s);
  */
 const char *qtest_get_arch(void);
 
+/**
+ * qtest_has_accel:
+ * @accel_name: Accelerator name to check for.
+ *
+ * Returns: true if the accelerator is built in.
+ */
+bool qtest_has_accel(const char *accel_name);
+
 /**
  * qtest_add_func:
  * @str: Test case path.
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 1cddf5bdaa1..4af1f04787c 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -5,6 +5,7 @@ libqos_srcs = files('../libqtest.c',
         'fw_cfg.c',
         'malloc.c',
         'libqos.c',
+        'sdhci-cmd.c',
 
         # spapr
         'malloc-spapr.c',
@@ -32,6 +33,7 @@ libqos_srcs = files('../libqtest.c',
         'virtio-9p.c',
         'virtio-balloon.c',
         'virtio-blk.c',
+        'vhost-user-blk.c',
         'virtio-mmio.c',
         'virtio-net.c',
         'virtio-pci.c',
diff --git a/tests/qtest/libqos/qgraph.c b/tests/qtest/libqos/qgraph.c
index b3b1a31f81e..d1dc4919305 100644
--- a/tests/qtest/libqos/qgraph.c
+++ b/tests/qtest/libqos/qgraph.c
@@ -844,7 +844,7 @@ void qos_dump_graph(void)
         }
         qos_printf_literal("type=%d cmd_line='%s' [%s]\n",
                            node->type, node->command_line,
-                           node->available ? "available" : "UNAVAILBLE"
+                           node->available ? "available" : "UNAVAILABLE"
         );
     }
     g_list_free(keys);
diff --git a/tests/qtest/libqos/qgraph.h b/tests/qtest/libqos/qgraph.h
index 54672350c8f..871740c0dc8 100644
--- a/tests/qtest/libqos/qgraph.h
+++ b/tests/qtest/libqos/qgraph.h
@@ -252,17 +252,17 @@ void qos_node_create_driver_named(const char *name, const char *qemu_name,
  * This function can be useful when there are multiple devices
  * with the same node name contained in a machine/other node
  *
- * For example, if ``arm/raspi2`` contains 2 ``generic-sdhci``
+ * For example, if ``arm/raspi2b`` contains 2 ``generic-sdhci``
  * devices, the right commands will be:
  *
  * .. code::
  *
- *    qos_node_create_machine("arm/raspi2");
+ *    qos_node_create_machine("arm/raspi2b");
  *    qos_node_create_driver("generic-sdhci", constructor);
  *    // assume rest of the fields are set NULL
  *    QOSGraphEdgeOptions op1 = { .edge_name = "emmc" };
  *    QOSGraphEdgeOptions op2 = { .edge_name = "sdcard" };
- *    qos_node_contains("arm/raspi2", "generic-sdhci", &op1, &op2, NULL);
+ *    qos_node_contains("arm/raspi2b", "generic-sdhci", &op1, &op2, NULL);
  *
  * Of course this also requires that the @container's get_device function
  * should implement a case for "emmc" and "sdcard".
diff --git a/tests/qtest/libqos/qgraph_internal.h b/tests/qtest/libqos/qgraph_internal.h
index c0025f5ab9b..7d62fd17af7 100644
--- a/tests/qtest/libqos/qgraph_internal.h
+++ b/tests/qtest/libqos/qgraph_internal.h
@@ -230,7 +230,7 @@ void qos_graph_foreach_test_path(QOSTestCallback fn);
 /**
  * qos_get_machine_type(): return QEMU machine type for a machine node.
  * This function requires every machine @name to be in the form
- * <arch>/<machine_name>, like "arm/raspi2" or "x86_64/pc".
+ * <arch>/<machine_name>, like "arm/raspi2b" or "x86_64/pc".
  *
  * The function will validate the format and return a pointer to
  * @machine to <machine_name>.  For example, when passed "x86_64/pc"
diff --git a/tests/qtest/libqos/sdhci-cmd.c b/tests/qtest/libqos/sdhci-cmd.c
new file mode 100644
index 00000000000..2d9e5183411
--- /dev/null
+++ b/tests/qtest/libqos/sdhci-cmd.c
@@ -0,0 +1,116 @@
+/*
+ * MMC Host Controller Commands
+ *
+ * Copyright (c) 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "sdhci-cmd.h"
+#include "libqtest.h"
+
+static ssize_t read_fifo(QTestState *qts, uint64_t reg, char *msg, size_t count)
+{
+    uint32_t mask = 0xff;
+    size_t index = 0;
+    uint32_t msg_frag;
+    int size;
+    while (index < count) {
+        size = count - index;
+        if (size > 4) {
+            size = 4;
+        }
+        msg_frag = qtest_readl(qts, reg);
+        while (size > 0) {
+            msg[index] = msg_frag & mask;
+            if (msg[index++] == 0) {
+                return index;
+            }
+            msg_frag >>= 8;
+            --size;
+        }
+    }
+    return index;
+}
+
+static void write_fifo(QTestState *qts, uint64_t reg, const char *msg,
+                       size_t count)
+{
+    size_t index = 0;
+    uint32_t msg_frag;
+    int size;
+    int frag_i;
+    while (index < count) {
+        size = count - index;
+        if (size > 4) {
+            size = 4;
+        }
+        msg_frag = 0;
+        frag_i = 0;
+        while (frag_i < size) {
+            msg_frag |= ((uint32_t)msg[index++]) << (frag_i * 8);
+            ++frag_i;
+        }
+        qtest_writel(qts, reg, msg_frag);
+    }
+}
+
+static void fill_block(QTestState *qts, uint64_t reg, int count)
+{
+    while (--count >= 0) {
+        qtest_writel(qts, reg, 0);
+    }
+}
+
+void sdhci_cmd_regs(QTestState *qts, uint64_t base_addr, uint16_t blksize,
+                    uint16_t blkcnt, uint32_t argument, uint16_t trnmod,
+                    uint16_t cmdreg)
+{
+    qtest_writew(qts, base_addr + SDHC_BLKSIZE, blksize);
+    qtest_writew(qts, base_addr + SDHC_BLKCNT, blkcnt);
+    qtest_writel(qts, base_addr + SDHC_ARGUMENT, argument);
+    qtest_writew(qts, base_addr + SDHC_TRNMOD, trnmod);
+    qtest_writew(qts, base_addr + SDHC_CMDREG, cmdreg);
+}
+
+ssize_t sdhci_read_cmd(QTestState *qts, uint64_t base_addr, char *msg,
+                       size_t count)
+{
+    sdhci_cmd_regs(qts, base_addr, count, 1, 0,
+                   SDHC_TRNS_MULTI | SDHC_TRNS_READ | SDHC_TRNS_BLK_CNT_EN,
+                   SDHC_READ_MULTIPLE_BLOCK | SDHC_CMD_DATA_PRESENT);
+
+    /* read sd fifo_buffer */
+    ssize_t bytes_read = read_fifo(qts, base_addr + SDHC_BDATA, msg, count);
+
+    sdhci_cmd_regs(qts, base_addr, 0, 0, 0,
+                   SDHC_TRNS_MULTI | SDHC_TRNS_READ | SDHC_TRNS_BLK_CNT_EN,
+                   SDHC_STOP_TRANSMISSION);
+
+    return bytes_read;
+}
+
+void sdhci_write_cmd(QTestState *qts, uint64_t base_addr, const char *msg,
+                     size_t count, size_t blksize)
+{
+    sdhci_cmd_regs(qts, base_addr, blksize, 1, 0,
+                   SDHC_TRNS_MULTI | SDHC_TRNS_WRITE | SDHC_TRNS_BLK_CNT_EN,
+                   SDHC_WRITE_MULTIPLE_BLOCK | SDHC_CMD_DATA_PRESENT);
+
+    /* write to sd fifo_buffer */
+    write_fifo(qts, base_addr + SDHC_BDATA, msg, count);
+    fill_block(qts, base_addr + SDHC_BDATA, (blksize - count) / 4);
+
+    sdhci_cmd_regs(qts, base_addr, 0, 0, 0,
+                   SDHC_TRNS_MULTI | SDHC_TRNS_WRITE | SDHC_TRNS_BLK_CNT_EN,
+                   SDHC_STOP_TRANSMISSION);
+}
diff --git a/tests/qtest/libqos/sdhci-cmd.h b/tests/qtest/libqos/sdhci-cmd.h
new file mode 100644
index 00000000000..64763c5a2ac
--- /dev/null
+++ b/tests/qtest/libqos/sdhci-cmd.h
@@ -0,0 +1,70 @@
+/*
+ * MMC Host Controller Commands
+ *
+ * Copyright (c) 2021 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "libqtest.h"
+
+/* more details at hw/sd/sdhci-internal.h */
+#define SDHC_BLKSIZE 0x04
+#define SDHC_BLKCNT 0x06
+#define SDHC_ARGUMENT 0x08
+#define SDHC_TRNMOD 0x0C
+#define SDHC_CMDREG 0x0E
+#define SDHC_BDATA 0x20
+#define SDHC_PRNSTS 0x24
+#define SDHC_BLKGAP 0x2A
+#define SDHC_CLKCON 0x2C
+#define SDHC_SWRST 0x2F
+#define SDHC_CAPAB 0x40
+#define SDHC_MAXCURR 0x48
+#define SDHC_HCVER 0xFE
+
+/* TRNSMOD Reg */
+#define SDHC_TRNS_BLK_CNT_EN 0x0002
+#define SDHC_TRNS_READ 0x0010
+#define SDHC_TRNS_WRITE 0x0000
+#define SDHC_TRNS_MULTI 0x0020
+
+/* CMD Reg */
+#define SDHC_CMD_DATA_PRESENT (1 << 5)
+#define SDHC_ALL_SEND_CID (2 << 8)
+#define SDHC_SEND_RELATIVE_ADDR (3 << 8)
+#define SDHC_SELECT_DESELECT_CARD (7 << 8)
+#define SDHC_SEND_CSD (9 << 8)
+#define SDHC_STOP_TRANSMISSION (12 << 8)
+#define SDHC_READ_MULTIPLE_BLOCK (18 << 8)
+#define SDHC_WRITE_MULTIPLE_BLOCK (25 << 8)
+#define SDHC_APP_CMD (55 << 8)
+
+/* SWRST Reg */
+#define SDHC_RESET_ALL 0x01
+
+/* CLKCTRL Reg */
+#define SDHC_CLOCK_INT_EN 0x0001
+#define SDHC_CLOCK_INT_STABLE 0x0002
+#define SDHC_CLOCK_SDCLK_EN (1 << 2)
+
+/* Set registers needed to send commands to SD */
+void sdhci_cmd_regs(QTestState *qts, uint64_t base_addr, uint16_t blksize,
+                    uint16_t blkcnt, uint32_t argument, uint16_t trnmod,
+                    uint16_t cmdreg);
+
+/* Read at most 1 block of SD using non-DMA  */
+ssize_t sdhci_read_cmd(QTestState *qts, uint64_t base_addr, char *msg,
+                       size_t count);
+
+/* Write at most 1 block of SD using non-DMA  */
+void sdhci_write_cmd(QTestState *qts, uint64_t base_addr, const char *msg,
+                     size_t count, size_t blksize);
diff --git a/tests/qtest/libqos/vhost-user-blk.c b/tests/qtest/libqos/vhost-user-blk.c
new file mode 100644
index 00000000000..568c3426ed3
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.c
@@ -0,0 +1,130 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "qemu/module.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "vhost-user-blk.h"
+
+#define PCI_SLOT                0x04
+#define PCI_FN                  0x00
+
+/* virtio-blk-device */
+static void *qvhost_user_blk_get_driver(QVhostUserBlk *v_blk,
+                                    const char *interface)
+{
+    if (!g_strcmp0(interface, "vhost-user-blk")) {
+        return v_blk;
+    }
+    if (!g_strcmp0(interface, "virtio")) {
+        return v_blk->vdev;
+    }
+
+    fprintf(stderr, "%s not present in vhost-user-blk-device\n", interface);
+    g_assert_not_reached();
+}
+
+static void *qvhost_user_blk_device_get_driver(void *object,
+                                           const char *interface)
+{
+    QVhostUserBlkDevice *v_blk = object;
+    return qvhost_user_blk_get_driver(&v_blk->blk, interface);
+}
+
+static void *vhost_user_blk_device_create(void *virtio_dev,
+                                      QGuestAllocator *t_alloc,
+                                      void *addr)
+{
+    QVhostUserBlkDevice *vhost_user_blk = g_new0(QVhostUserBlkDevice, 1);
+    QVhostUserBlk *interface = &vhost_user_blk->blk;
+
+    interface->vdev = virtio_dev;
+
+    vhost_user_blk->obj.get_driver = qvhost_user_blk_device_get_driver;
+
+    return &vhost_user_blk->obj;
+}
+
+/* virtio-blk-pci */
+static void *qvhost_user_blk_pci_get_driver(void *object, const char *interface)
+{
+    QVhostUserBlkPCI *v_blk = object;
+    if (!g_strcmp0(interface, "pci-device")) {
+        return v_blk->pci_vdev.pdev;
+    }
+    return qvhost_user_blk_get_driver(&v_blk->blk, interface);
+}
+
+static void *vhost_user_blk_pci_create(void *pci_bus, QGuestAllocator *t_alloc,
+                                      void *addr)
+{
+    QVhostUserBlkPCI *vhost_user_blk = g_new0(QVhostUserBlkPCI, 1);
+    QVhostUserBlk *interface = &vhost_user_blk->blk;
+    QOSGraphObject *obj = &vhost_user_blk->pci_vdev.obj;
+
+    virtio_pci_init(&vhost_user_blk->pci_vdev, pci_bus, addr);
+    interface->vdev = &vhost_user_blk->pci_vdev.vdev;
+
+    g_assert_cmphex(interface->vdev->device_type, ==, VIRTIO_ID_BLOCK);
+
+    obj->get_driver = qvhost_user_blk_pci_get_driver;
+
+    return obj;
+}
+
+static void vhost_user_blk_register_nodes(void)
+{
+    /*
+     * FIXME: every test using these two nodes needs to setup a
+     * -drive,id=drive0 otherwise QEMU is not going to start.
+     * Therefore, we do not include "produces" edge for virtio
+     * and pci-device yet.
+     */
+
+    char *arg = g_strdup_printf("id=drv0,chardev=char1,addr=%x.%x",
+                                PCI_SLOT, PCI_FN);
+
+    QPCIAddress addr = {
+        .devfn = QPCI_DEVFN(PCI_SLOT, PCI_FN),
+    };
+
+    QOSGraphEdgeOptions opts = { };
+
+    /* virtio-blk-device */
+    /** opts.extra_device_opts = "drive=drive0"; */
+    qos_node_create_driver("vhost-user-blk-device",
+                           vhost_user_blk_device_create);
+    qos_node_consumes("vhost-user-blk-device", "virtio-bus", &opts);
+    qos_node_produces("vhost-user-blk-device", "vhost-user-blk");
+
+    /* virtio-blk-pci */
+    opts.extra_device_opts = arg;
+    add_qpci_address(&opts, &addr);
+    qos_node_create_driver("vhost-user-blk-pci", vhost_user_blk_pci_create);
+    qos_node_consumes("vhost-user-blk-pci", "pci-bus", &opts);
+    qos_node_produces("vhost-user-blk-pci", "vhost-user-blk");
+
+    g_free(arg);
+}
+
+libqos_init(vhost_user_blk_register_nodes);
diff --git a/tests/qtest/libqos/vhost-user-blk.h b/tests/qtest/libqos/vhost-user-blk.h
new file mode 100644
index 00000000000..2a03456a458
--- /dev/null
+++ b/tests/qtest/libqos/vhost-user-blk.h
@@ -0,0 +1,48 @@
+/*
+ * libqos driver framework
+ *
+ * Based on tests/qtest/libqos/virtio-blk.c
+ *
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef TESTS_LIBQOS_VHOST_USER_BLK_H
+#define TESTS_LIBQOS_VHOST_USER_BLK_H
+
+#include "qgraph.h"
+#include "virtio.h"
+#include "virtio-pci.h"
+
+typedef struct QVhostUserBlk QVhostUserBlk;
+typedef struct QVhostUserBlkPCI QVhostUserBlkPCI;
+typedef struct QVhostUserBlkDevice QVhostUserBlkDevice;
+
+struct QVhostUserBlk {
+    QVirtioDevice *vdev;
+};
+
+struct QVhostUserBlkPCI {
+    QVirtioPCIDevice pci_vdev;
+    QVhostUserBlk blk;
+};
+
+struct QVhostUserBlkDevice {
+    QOSGraphObject obj;
+    QVhostUserBlk blk;
+};
+
+#endif
diff --git a/tests/qtest/libqos/virtio-9p.c b/tests/qtest/libqos/virtio-9p.c
index be91662c6f0..b4e1143288a 100644
--- a/tests/qtest/libqos/virtio-9p.c
+++ b/tests/qtest/libqos/virtio-9p.c
@@ -16,6 +16,11 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "libqtest.h"
 #include "qemu/module.h"
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index 71e359efcd3..25aeea385bf 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -301,7 +301,9 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
     s->expected_status = 0;
     s->qemu_pid = fork();
     if (s->qemu_pid == 0) {
-        g_setenv("QEMU_AUDIO_DRV", "none", true);
+        if (!g_setenv("QEMU_AUDIO_DRV", "none", true)) {
+            exit(1);
+        }
         execlp("/bin/sh", "sh", "-c", command, NULL);
         exit(1);
     }
@@ -907,12 +909,46 @@ const char *qtest_get_arch(void)
 
     if (!end) {
         fprintf(stderr, "Can't determine architecture from binary name.\n");
-        abort();
+        exit(1);
+    }
+
+    if (!strstr(qemu, "-system-")) {
+        fprintf(stderr, "QTEST_QEMU_BINARY must end with *-system-<arch> "
+                "where 'arch' is the target\narchitecture (x86_64, aarch64, "
+                "etc).\n");
+        exit(1);
     }
 
     return end + 1;
 }
 
+bool qtest_has_accel(const char *accel_name)
+{
+    if (g_str_equal(accel_name, "tcg")) {
+#if defined(CONFIG_TCG)
+        return true;
+#else
+        return false;
+#endif
+    } else if (g_str_equal(accel_name, "kvm")) {
+        int i;
+        const char *arch = qtest_get_arch();
+        const char *targets[] = { CONFIG_KVM_TARGETS };
+
+        for (i = 0; i < ARRAY_SIZE(targets); i++) {
+            if (!strncmp(targets[i], arch, strlen(arch))) {
+                if (!access("/dev/kvm", R_OK | W_OK)) {
+                    return true;
+                }
+            }
+        }
+    } else {
+        /* not implemented */
+        g_assert_not_reached();
+    }
+    return false;
+}
+
 bool qtest_get_irq(QTestState *s, int num)
 {
     /* dummy operation in order to make sure irq is up to date */
diff --git a/tests/qtest/m25p80-test.c b/tests/qtest/m25p80-test.c
deleted file mode 100644
index f860cef5f08..00000000000
--- a/tests/qtest/m25p80-test.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * QTest testcase for the M25P80 Flash (Using the Aspeed SPI
- * Controller)
- *
- * Copyright (C) 2016 IBM Corp.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qemu/bswap.h"
-#include "libqtest-single.h"
-
-/*
- * ASPEED SPI Controller registers
- */
-#define R_CONF              0x00
-#define   CONF_ENABLE_W0       (1 << 16)
-#define R_CE_CTRL           0x04
-#define   CRTL_EXTENDED0       0  /* 32 bit addressing for SPI */
-#define R_CTRL0             0x10
-#define   CTRL_CE_STOP_ACTIVE  (1 << 2)
-#define   CTRL_READMODE        0x0
-#define   CTRL_FREADMODE       0x1
-#define   CTRL_WRITEMODE       0x2
-#define   CTRL_USERMODE        0x3
-
-#define ASPEED_FMC_BASE    0x1E620000
-#define ASPEED_FLASH_BASE  0x20000000
-
-/*
- * Flash commands
- */
-enum {
-    JEDEC_READ = 0x9f,
-    BULK_ERASE = 0xc7,
-    READ = 0x03,
-    PP = 0x02,
-    WREN = 0x6,
-    RESET_ENABLE = 0x66,
-    RESET_MEMORY = 0x99,
-    EN_4BYTE_ADDR = 0xB7,
-    ERASE_SECTOR = 0xd8,
-};
-
-#define FLASH_JEDEC         0x20ba19  /* n25q256a */
-#define FLASH_SIZE          (32 * 1024 * 1024)
-
-#define FLASH_PAGE_SIZE           256
-
-/*
- * Use an explicit bswap for the values read/wrote to the flash region
- * as they are BE and the Aspeed CPU is LE.
- */
-static inline uint32_t make_be32(uint32_t data)
-{
-    return bswap32(data);
-}
-
-static void spi_conf(uint32_t value)
-{
-    uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
-
-    conf |= value;
-    writel(ASPEED_FMC_BASE + R_CONF, conf);
-}
-
-static void spi_conf_remove(uint32_t value)
-{
-    uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
-
-    conf &= ~value;
-    writel(ASPEED_FMC_BASE + R_CONF, conf);
-}
-
-static void spi_ce_ctrl(uint32_t value)
-{
-    uint32_t conf = readl(ASPEED_FMC_BASE + R_CE_CTRL);
-
-    conf |= value;
-    writel(ASPEED_FMC_BASE + R_CE_CTRL, conf);
-}
-
-static void spi_ctrl_setmode(uint8_t mode, uint8_t cmd)
-{
-    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
-    ctrl &= ~(CTRL_USERMODE | 0xff << 16);
-    ctrl |= mode | (cmd << 16);
-    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
-}
-
-static void spi_ctrl_start_user(void)
-{
-    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
-
-    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
-    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
-
-    ctrl &= ~CTRL_CE_STOP_ACTIVE;
-    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
-}
-
-static void spi_ctrl_stop_user(void)
-{
-    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
-
-    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
-    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
-}
-
-static void flash_reset(void)
-{
-    spi_conf(CONF_ENABLE_W0);
-
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, RESET_ENABLE);
-    writeb(ASPEED_FLASH_BASE, RESET_MEMORY);
-    spi_ctrl_stop_user();
-
-    spi_conf_remove(CONF_ENABLE_W0);
-}
-
-static void test_read_jedec(void)
-{
-    uint32_t jedec = 0x0;
-
-    spi_conf(CONF_ENABLE_W0);
-
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, JEDEC_READ);
-    jedec |= readb(ASPEED_FLASH_BASE) << 16;
-    jedec |= readb(ASPEED_FLASH_BASE) << 8;
-    jedec |= readb(ASPEED_FLASH_BASE);
-    spi_ctrl_stop_user();
-
-    flash_reset();
-
-    g_assert_cmphex(jedec, ==, FLASH_JEDEC);
-}
-
-static void read_page(uint32_t addr, uint32_t *page)
-{
-    int i;
-
-    spi_ctrl_start_user();
-
-    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-    writeb(ASPEED_FLASH_BASE, READ);
-    writel(ASPEED_FLASH_BASE, make_be32(addr));
-
-    /* Continuous read are supported */
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        page[i] = make_be32(readl(ASPEED_FLASH_BASE));
-    }
-    spi_ctrl_stop_user();
-}
-
-static void read_page_mem(uint32_t addr, uint32_t *page)
-{
-    int i;
-
-    /* move out USER mode to use direct reads from the AHB bus */
-    spi_ctrl_setmode(CTRL_READMODE, READ);
-
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        page[i] = make_be32(readl(ASPEED_FLASH_BASE + addr + i * 4));
-    }
-}
-
-static void test_erase_sector(void)
-{
-    uint32_t some_page_addr = 0x600 * FLASH_PAGE_SIZE;
-    uint32_t page[FLASH_PAGE_SIZE / 4];
-    int i;
-
-    spi_conf(CONF_ENABLE_W0);
-
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, WREN);
-    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-    writeb(ASPEED_FLASH_BASE, ERASE_SECTOR);
-    writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
-    spi_ctrl_stop_user();
-
-    /* Previous page should be full of zeroes as backend is not
-     * initialized */
-    read_page(some_page_addr - FLASH_PAGE_SIZE, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0x0);
-    }
-
-    /* But this one was erased */
-    read_page(some_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0xffffffff);
-    }
-
-    flash_reset();
-}
-
-static void test_erase_all(void)
-{
-    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
-    uint32_t page[FLASH_PAGE_SIZE / 4];
-    int i;
-
-    spi_conf(CONF_ENABLE_W0);
-
-    /* Check some random page. Should be full of zeroes as backend is
-     * not initialized */
-    read_page(some_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0x0);
-    }
-
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, WREN);
-    writeb(ASPEED_FLASH_BASE, BULK_ERASE);
-    spi_ctrl_stop_user();
-
-    /* Recheck that some random page */
-    read_page(some_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0xffffffff);
-    }
-
-    flash_reset();
-}
-
-static void test_write_page(void)
-{
-    uint32_t my_page_addr = 0x14000 * FLASH_PAGE_SIZE; /* beyond 16MB */
-    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
-    uint32_t page[FLASH_PAGE_SIZE / 4];
-    int i;
-
-    spi_conf(CONF_ENABLE_W0);
-
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-    writeb(ASPEED_FLASH_BASE, WREN);
-    writeb(ASPEED_FLASH_BASE, PP);
-    writel(ASPEED_FLASH_BASE, make_be32(my_page_addr));
-
-    /* Fill the page with its own addresses */
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        writel(ASPEED_FLASH_BASE, make_be32(my_page_addr + i * 4));
-    }
-    spi_ctrl_stop_user();
-
-    /* Check what was written */
-    read_page(my_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
-    }
-
-    /* Check some other page. It should be full of 0xff */
-    read_page(some_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0xffffffff);
-    }
-
-    flash_reset();
-}
-
-static void test_read_page_mem(void)
-{
-    uint32_t my_page_addr = 0x14000 * FLASH_PAGE_SIZE; /* beyond 16MB */
-    uint32_t some_page_addr = 0x15000 * FLASH_PAGE_SIZE;
-    uint32_t page[FLASH_PAGE_SIZE / 4];
-    int i;
-
-    /* Enable 4BYTE mode for controller. This is should be strapped by
-     * HW for CE0 anyhow.
-     */
-    spi_ce_ctrl(1 << CRTL_EXTENDED0);
-
-    /* Enable 4BYTE mode for flash. */
-    spi_conf(CONF_ENABLE_W0);
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-    spi_ctrl_stop_user();
-    spi_conf_remove(CONF_ENABLE_W0);
-
-    /* Check what was written */
-    read_page_mem(my_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
-    }
-
-    /* Check some other page. It should be full of 0xff */
-    read_page_mem(some_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, 0xffffffff);
-    }
-
-    flash_reset();
-}
-
-static void test_write_page_mem(void)
-{
-    uint32_t my_page_addr = 0x15000 * FLASH_PAGE_SIZE;
-    uint32_t page[FLASH_PAGE_SIZE / 4];
-    int i;
-
-    /* Enable 4BYTE mode for controller. This is should be strapped by
-     * HW for CE0 anyhow.
-     */
-    spi_ce_ctrl(1 << CRTL_EXTENDED0);
-
-    /* Enable 4BYTE mode for flash. */
-    spi_conf(CONF_ENABLE_W0);
-    spi_ctrl_start_user();
-    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-    writeb(ASPEED_FLASH_BASE, WREN);
-    spi_ctrl_stop_user();
-
-    /* move out USER mode to use direct writes to the AHB bus */
-    spi_ctrl_setmode(CTRL_WRITEMODE, PP);
-
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        writel(ASPEED_FLASH_BASE + my_page_addr + i * 4,
-               make_be32(my_page_addr + i * 4));
-    }
-
-    /* Check what was written */
-    read_page_mem(my_page_addr, page);
-    for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
-    }
-
-    flash_reset();
-}
-
-static char tmp_path[] = "/tmp/qtest.m25p80.XXXXXX";
-
-int main(int argc, char **argv)
-{
-    int ret;
-    int fd;
-
-    g_test_init(&argc, &argv, NULL);
-
-    fd = mkstemp(tmp_path);
-    g_assert(fd >= 0);
-    ret = ftruncate(fd, FLASH_SIZE);
-    g_assert(ret == 0);
-    close(fd);
-
-    global_qtest = qtest_initf("-m 256 -machine palmetto-bmc "
-                               "-drive file=%s,format=raw,if=mtd",
-                               tmp_path);
-
-    qtest_add_func("/m25p80/read_jedec", test_read_jedec);
-    qtest_add_func("/m25p80/erase_sector", test_erase_sector);
-    qtest_add_func("/m25p80/erase_all",  test_erase_all);
-    qtest_add_func("/m25p80/write_page", test_write_page);
-    qtest_add_func("/m25p80/read_page_mem", test_read_page_mem);
-    qtest_add_func("/m25p80/write_page_mem", test_write_page_mem);
-
-    ret = g_test_run();
-
-    qtest_quit(global_qtest);
-    unlink(tmp_path);
-    return ret;
-}
diff --git a/tests/qtest/machine-none-test.c b/tests/qtest/machine-none-test.c
index 4a10bd2d5fd..ca165a1c910 100644
--- a/tests/qtest/machine-none-test.c
+++ b/tests/qtest/machine-none-test.c
@@ -32,7 +32,6 @@ static struct arch2cpu cpus_map[] = {
     { "i386", "qemu32,apic-id=0" },
     { "alpha", "ev67" },
     { "cris", "crisv32" },
-    { "lm32", "lm32-full" },
     { "m68k", "m5206" },
     { "microblaze", "any" },
     { "microblazeel", "any" },
@@ -42,7 +41,6 @@ static struct arch2cpu cpus_map[] = {
     { "mips64cheri128", "BERI" },
     { "mips64el", "I6500" },
     { "morello", "morello" },
-    { "moxie", "MoxieLite" },
     { "nios2", "FIXME" },
     { "or1k", "or1200" },
     { "ppc", "604" },
@@ -53,7 +51,6 @@ static struct arch2cpu cpus_map[] = {
     { "sparc", "LEON2" },
     { "sparc64", "Fujitsu Sparc64" },
     { "tricore", "tc1796" },
-    { "unicore32", "UniCore-II" },
     { "xtensa", "dc233c" },
     { "xtensaeb", "fsf" },
     { "hppa", "hppa" },
diff --git a/tests/qtest/max34451-test.c b/tests/qtest/max34451-test.c
new file mode 100644
index 00000000000..0c98d0764cc
--- /dev/null
+++ b/tests/qtest/max34451-test.c
@@ -0,0 +1,336 @@
+/*
+ * QTests for the MAX34451 device
+ *
+ * Copyright 2021 Google LLC
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i2c/pmbus_device.h"
+#include "libqtest-single.h"
+#include "libqos/qgraph.h"
+#include "libqos/i2c.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qemu/bitops.h"
+
+#define TEST_ID "max34451-test"
+#define TEST_ADDR (0x4e)
+
+#define MAX34451_MFR_VOUT_PEAK          0xD4
+#define MAX34451_MFR_IOUT_PEAK          0xD5
+#define MAX34451_MFR_TEMPERATURE_PEAK   0xD6
+#define MAX34451_MFR_VOUT_MIN           0xD7
+
+#define DEFAULT_VOUT                    0
+#define DEFAULT_UV_LIMIT                0
+#define DEFAULT_TEMPERATURE             2500
+#define DEFAULT_SCALE                   0x7FFF
+#define DEFAULT_OV_LIMIT                0x7FFF
+#define DEFAULT_OC_LIMIT                0x7FFF
+#define DEFAULT_OT_LIMIT                0x7FFF
+#define DEFAULT_VMIN                    0x7FFF
+#define DEFAULT_TON_FAULT_LIMIT         0xFFFF
+#define DEFAULT_CHANNEL_CONFIG          0x20
+#define DEFAULT_TEXT                    0x20
+
+#define MAX34451_NUM_PWR_DEVICES        16
+#define MAX34451_NUM_TEMP_DEVICES       5
+
+
+static uint16_t qmp_max34451_get(const char *id, const char *property)
+{
+    QDict *response;
+    uint16_t ret;
+    response = qmp("{ 'execute': 'qom-get', 'arguments': { 'path': %s, "
+                   "'property': %s } }", id, property);
+    g_assert(qdict_haskey(response, "return"));
+    ret = qnum_get_uint(qobject_to(QNum, qdict_get(response, "return")));
+    qobject_unref(response);
+    return ret;
+}
+
+static void qmp_max34451_set(const char *id,
+                             const char *property,
+                             uint16_t value)
+{
+    QDict *response;
+
+    response = qmp("{ 'execute': 'qom-set', 'arguments': { 'path': %s, "
+                   "'property': %s, 'value': %u } }",
+                   id, property, value);
+    g_assert(qdict_haskey(response, "return"));
+    qobject_unref(response);
+}
+
+/* PMBus commands are little endian vs i2c_set16 in i2c.h which is big endian */
+static uint16_t max34451_i2c_get16(QI2CDevice *i2cdev, uint8_t reg)
+{
+    uint8_t resp[2];
+    i2c_read_block(i2cdev, reg, resp, sizeof(resp));
+    return (resp[1] << 8) | resp[0];
+}
+
+/* PMBus commands are little endian vs i2c_set16 in i2c.h which is big endian */
+static void max34451_i2c_set16(QI2CDevice *i2cdev, uint8_t reg, uint16_t value)
+{
+    uint8_t data[2];
+
+    data[0] = value & 255;
+    data[1] = value >> 8;
+    i2c_write_block(i2cdev, reg, data, sizeof(data));
+}
+
+/* Test default values */
+static void test_defaults(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t value, i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    char *path;
+
+    /* Default temperatures and temperature fault limits */
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        path = g_strdup_printf("temperature[%d]", i);
+        value = qmp_max34451_get(TEST_ID, path);
+        g_assert_cmpuint(value, ==, DEFAULT_TEMPERATURE);
+        g_free(path);
+
+        /* Temperature sensors start on page 16 */
+        i2c_set8(i2cdev, PMBUS_PAGE, i + 16);
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_TEMPERATURE);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_OT_FAULT_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_OT_LIMIT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_OT_WARN_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_OT_LIMIT);
+    }
+
+    /* Default voltages and fault limits */
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        path = g_strdup_printf("vout[%d]", i);
+        value = qmp_max34451_get(TEST_ID, path);
+        g_assert_cmpuint(value, ==, DEFAULT_VOUT);
+        g_free(path);
+
+        i2c_set8(i2cdev, PMBUS_PAGE, i);
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_VOUT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_OV_FAULT_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_OV_LIMIT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_OV_LIMIT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_UV_LIMIT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_UV_FAULT_LIMIT);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_UV_LIMIT);
+
+        i2c_value = max34451_i2c_get16(i2cdev, MAX34451_MFR_VOUT_MIN);
+        g_assert_cmpuint(i2c_value, ==, DEFAULT_VMIN);
+    }
+
+    i2c_value = i2c_get8(i2cdev, PMBUS_VOUT_MODE);
+    g_assert_cmphex(i2c_value, ==, 0x40); /* DIRECT mode */
+
+    i2c_value = i2c_get8(i2cdev, PMBUS_REVISION);
+    g_assert_cmphex(i2c_value, ==, 0x11); /* Rev 1.1 */
+}
+
+/* Test setting temperature */
+static void test_temperature(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t value, i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    char *path;
+
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        path = g_strdup_printf("temperature[%d]", i);
+        qmp_max34451_set(TEST_ID, path, 0xBE00 + i);
+        value = qmp_max34451_get(TEST_ID, path);
+        g_assert_cmphex(value, ==, 0xBE00 + i);
+        g_free(path);
+    }
+
+    /* compare qmp read with i2c read separately */
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        /* temperature[0] is on page 16 */
+        i2c_set8(i2cdev, PMBUS_PAGE, i + 16);
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+        g_assert_cmphex(i2c_value, ==, 0xBE00 + i);
+
+        i2c_value = max34451_i2c_get16(i2cdev, MAX34451_MFR_TEMPERATURE_PEAK);
+        g_assert_cmphex(i2c_value, ==, 0xBE00 + i);
+    }
+}
+
+/* Test setting voltage */
+static void test_voltage(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t value, i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    char *path;
+
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        path = g_strdup_printf("vout[%d]", i);
+        qmp_max34451_set(TEST_ID, path, 3000 + i);
+        value = qmp_max34451_get(TEST_ID, path);
+        g_assert_cmpuint(value, ==, 3000 + i);
+        g_free(path);
+    }
+
+    /* compare qmp read with i2c read separately */
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        i2c_set8(i2cdev, PMBUS_PAGE, i);
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+        g_assert_cmpuint(i2c_value, ==, 3000 + i);
+
+        i2c_value = max34451_i2c_get16(i2cdev, MAX34451_MFR_VOUT_PEAK);
+        g_assert_cmpuint(i2c_value, ==, 3000 + i);
+
+        i2c_value = max34451_i2c_get16(i2cdev, MAX34451_MFR_VOUT_MIN);
+        g_assert_cmpuint(i2c_value, ==, 3000 + i);
+    }
+}
+
+/* Test setting some read/write registers */
+static void test_rw_regs(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    i2c_set8(i2cdev, PMBUS_PAGE, 11);
+    i2c_value = i2c_get8(i2cdev, PMBUS_PAGE);
+    g_assert_cmpuint(i2c_value, ==, 11);
+
+    i2c_set8(i2cdev, PMBUS_OPERATION, 1);
+    i2c_value = i2c_get8(i2cdev, PMBUS_OPERATION);
+    g_assert_cmpuint(i2c_value, ==, 1);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_MARGIN_HIGH, 5000);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_MARGIN_HIGH);
+    g_assert_cmpuint(i2c_value, ==, 5000);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_MARGIN_LOW, 4000);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_MARGIN_LOW);
+    g_assert_cmpuint(i2c_value, ==, 4000);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_OV_FAULT_LIMIT, 5500);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_OV_FAULT_LIMIT);
+    g_assert_cmpuint(i2c_value, ==, 5500);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT, 5600);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_OV_WARN_LIMIT);
+    g_assert_cmpuint(i2c_value, ==, 5600);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_UV_FAULT_LIMIT, 5700);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_UV_FAULT_LIMIT);
+    g_assert_cmpuint(i2c_value, ==, 5700);
+
+    max34451_i2c_set16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT, 5800);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_VOUT_UV_WARN_LIMIT);
+    g_assert_cmpuint(i2c_value, ==, 5800);
+
+    max34451_i2c_set16(i2cdev, PMBUS_POWER_GOOD_ON, 5900);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_POWER_GOOD_ON);
+    g_assert_cmpuint(i2c_value, ==, 5900);
+
+    max34451_i2c_set16(i2cdev, PMBUS_POWER_GOOD_OFF, 6100);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_POWER_GOOD_OFF);
+    g_assert_cmpuint(i2c_value, ==, 6100);
+}
+
+/* Test that Read only registers can't be written */
+static void test_ro_regs(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value, i2c_init_value;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+
+    i2c_set8(i2cdev, PMBUS_PAGE, 1); /* move to page 1 */
+    i2c_init_value = i2c_get8(i2cdev, PMBUS_CAPABILITY);
+    i2c_set8(i2cdev, PMBUS_CAPABILITY, 0xF9);
+    i2c_value = i2c_get8(i2cdev, PMBUS_CAPABILITY);
+    g_assert_cmpuint(i2c_init_value, ==, i2c_value);
+
+    i2c_init_value = max34451_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+    max34451_i2c_set16(i2cdev, PMBUS_READ_VOUT, 0xDEAD);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_VOUT);
+    g_assert_cmpuint(i2c_init_value, ==, i2c_value);
+    g_assert_cmphex(i2c_value, !=, 0xDEAD);
+
+    i2c_set8(i2cdev, PMBUS_PAGE, 16); /* move to page 16 */
+    i2c_init_value = max34451_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+    max34451_i2c_set16(i2cdev, PMBUS_READ_TEMPERATURE_1, 0xABBA);
+    i2c_value = max34451_i2c_get16(i2cdev, PMBUS_READ_TEMPERATURE_1);
+    g_assert_cmpuint(i2c_init_value, ==, i2c_value);
+    g_assert_cmphex(i2c_value, !=, 0xABBA);
+}
+
+/* test over voltage faults */
+static void test_ov_faults(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value;
+    uint8_t i2c_byte;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    char *path;
+    /* Test ov fault reporting */
+    for (int i = 0; i < MAX34451_NUM_PWR_DEVICES; i++) {
+        path = g_strdup_printf("vout[%d]", i);
+        i2c_set8(i2cdev, PMBUS_PAGE, i);
+        max34451_i2c_set16(i2cdev, PMBUS_VOUT_OV_FAULT_LIMIT, 5000);
+        qmp_max34451_set(TEST_ID, path, 5100);
+        g_free(path);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_STATUS_WORD);
+        i2c_byte = i2c_get8(i2cdev, PMBUS_STATUS_VOUT);
+        g_assert_true((i2c_value & PB_STATUS_VOUT) != 0);
+        g_assert_true((i2c_byte & PB_STATUS_VOUT_OV_FAULT) != 0);
+    }
+}
+
+/* test over temperature faults */
+static void test_ot_faults(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t i2c_value;
+    uint8_t i2c_byte;
+    QI2CDevice *i2cdev = (QI2CDevice *)obj;
+    char *path;
+
+    for (int i = 0; i < MAX34451_NUM_TEMP_DEVICES; i++) {
+        path = g_strdup_printf("temperature[%d]", i);
+        i2c_set8(i2cdev, PMBUS_PAGE, i + 16);
+        max34451_i2c_set16(i2cdev, PMBUS_OT_FAULT_LIMIT, 6000);
+        qmp_max34451_set(TEST_ID, path, 6100);
+        g_free(path);
+
+        i2c_value = max34451_i2c_get16(i2cdev, PMBUS_STATUS_WORD);
+        i2c_byte = i2c_get8(i2cdev, PMBUS_STATUS_TEMPERATURE);
+        g_assert_true((i2c_value & PB_STATUS_TEMPERATURE) != 0);
+        g_assert_true((i2c_byte & PB_STATUS_OT_FAULT) != 0);
+    }
+}
+
+static void max34451_register_nodes(void)
+{
+    QOSGraphEdgeOptions opts = {
+        .extra_device_opts = "id=" TEST_ID ",address=0x4e"
+    };
+    add_qi2c_address(&opts, &(QI2CAddress) { TEST_ADDR });
+
+    qos_node_create_driver("max34451", i2c_device_create);
+    qos_node_consumes("max34451", "i2c-bus", &opts);
+
+    qos_add_test("test_defaults", "max34451", test_defaults, NULL);
+    qos_add_test("test_temperature", "max34451", test_temperature, NULL);
+    qos_add_test("test_voltage", "max34451", test_voltage, NULL);
+    qos_add_test("test_rw_regs", "max34451", test_rw_regs, NULL);
+    qos_add_test("test_ro_regs", "max34451", test_ro_regs, NULL);
+    qos_add_test("test_ov_faults", "max34451", test_ov_faults, NULL);
+    qos_add_test("test_ot_faults", "max34451", test_ot_faults, NULL);
+}
+libqos_init(max34451_register_nodes);
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 0c767389217..c9d8458062f 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -20,6 +20,8 @@ slow_qtests = {
 qtests_generic = \
   (config_all_devices.has_key('CONFIG_MEGASAS_SCSI_PCI') ? ['fuzz-megasas-test'] : []) + \
   (config_all_devices.has_key('CONFIG_VIRTIO_SCSI') ? ['fuzz-virtio-scsi-test'] : []) + \
+  (config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) + \
+  (config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) + \
   [
   'cdrom-test',
   'device-introspect-test',
@@ -66,12 +68,12 @@ qtests_i386 = \
   (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) +              \
   (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) +   \
   (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) +                 \
+  (unpack_edk2_blobs ? ['bios-tables-test'] : []) +                                         \
   qtests_pci +                                                                              \
   ['fdc-test',
    'ide-test',
    'hd-geo-test',
    'boot-order-test',
-   'bios-tables-test',
    'rtc-test',
    'i440fx-test',
    'fw_cfg-test',
@@ -126,8 +128,6 @@ qtests_mips64el = \
   (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +            \
   (config_all_devices.has_key('CONFIG_VGA') ? ['display-vga-test'] : [])
 
-qtests_moxie = [ 'boot-serial-test' ]
-
 qtests_ppc = \
   (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +            \
   (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) +                     \
@@ -161,29 +161,33 @@ qtests_npcm7xx = \
    'npcm7xx_timer-test',
    'npcm7xx_watchdog_timer-test'] + \
    (slirp.found() ? ['npcm7xx_emc-test'] : [])
+qtests_aspeed = \
+  ['aspeed_hace-test',
+   'aspeed_smc-test']
 qtests_arm = \
   (config_all_devices.has_key('CONFIG_MPS2') ? ['sse-timer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_DUALTIMER') ? ['cmsdk-apb-dualtimer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_TIMER') ? ['cmsdk-apb-timer-test'] : []) + \
   (config_all_devices.has_key('CONFIG_CMSDK_APB_WATCHDOG') ? ['cmsdk-apb-watchdog-test'] : []) + \
   (config_all_devices.has_key('CONFIG_PFLASH_CFI02') ? ['pflash-cfi02-test'] : []) +         \
+  (config_all_devices.has_key('CONFIG_ASPEED_SOC') ? qtests_aspeed : []) + \
   (config_all_devices.has_key('CONFIG_NPCM7XX') ? qtests_npcm7xx : []) + \
   ['arm-cpu-features',
    'microbit-test',
-   'm25p80-test',
    'test-arm-mptimer',
    'boot-serial-test',
    'hexloader-test']
 
 # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
 qtests_aarch64 = \
-  (cpu != 'arm' ? ['bios-tables-test'] : []) +                                                  \
+  (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) +                            \
   (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) +        \
   (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) +  \
   ['arm-cpu-features',
    'numa-test',
    'boot-serial-test',
    'xlnx-can-test',
+   'fuzz-xlnx-dp-test',
    'migration-test']
 
 qtests_s390x = \
@@ -200,12 +204,14 @@ qtests_s390x = \
 qos_test_ss = ss.source_set()
 qos_test_ss.add(
   'ac97-test.c',
+  'adm1272-test.c',
   'ds1338-test.c',
   'e1000-test.c',
   'e1000e-test.c',
   'eepro100-test.c',
   'es1370-test.c',
   'ipoctal232-test.c',
+  'max34451-test.c',
   'megasas-test.c',
   'ne2000-test.c',
   'tulip-test.c',
@@ -230,6 +236,9 @@ if have_virtfs
   qos_test_ss.add(files('virtio-9p-test.c'))
 endif
 qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
+if have_tools and have_vhost_user_blk_server
+  qos_test_ss.add(files('vhost-user-blk-test.c'))
+endif
 
 tpmemu_files = ['tpm-emu.c', 'tpm-util.c', 'tpm-tests.c']
 
@@ -260,15 +269,19 @@ foreach dir : target_dirs
   qtest_emulator = emulators['qemu-system-' + target_base]
   target_qtests = get_variable('qtests_' + target_base, []) + qtests_generic
 
-  test_deps = []
+  test_deps = roms
   qtest_env = environment()
   if have_tools
     qtest_env.set('QTEST_QEMU_IMG', './qemu-img')
     test_deps += [qemu_img]
   endif
-  qtest_env.set('G_TEST_DBUS_DAEMON', meson.source_root() / 'tests/dbus-vmstate-daemon.sh')
+  qtest_env.set('G_TEST_DBUS_DAEMON', meson.project_source_root() / 'tests/dbus-vmstate-daemon.sh')
   qtest_env.set('QTEST_QEMU_BINARY', './qemu-system-' + target_base)
-  
+  if have_tools and have_vhost_user_blk_server
+    qtest_env.set('QTEST_QEMU_STORAGE_DAEMON_BINARY', './storage-daemon/qemu-storage-daemon')
+    test_deps += [qsd]
+  endif
+
   foreach test : target_qtests
     # Executables are shared across targets, declare them only the first time we
     # encounter them
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 3a711bb4929..7b42f6fd909 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -27,6 +27,11 @@
 #include "migration-helpers.h"
 #include "tests/migration/migration-test.h"
 
+/* For dirty ring test; so far only x86_64 is supported */
+#if defined(__linux__) && defined(HOST_X86_64)
+#include "linux/kvm.h"
+#endif
+
 /* TODO actually test the results and get rid of this */
 #define qtest_qmp_discard_response(...) qobject_unref(qtest_qmp(__VA_ARGS__))
 
@@ -110,13 +115,12 @@ static void init_bootfile(const char *bootpath, void *content, size_t len)
  */
 static void wait_for_serial(const char *side)
 {
-    char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
+    g_autofree char *serialpath = g_strdup_printf("%s/%s", tmpfs, side);
     FILE *serialfile = fopen(serialpath, "r");
     const char *arch = qtest_get_arch();
     int started = (strcmp(side, "src_serial") == 0 &&
                    strcmp(arch, "ppc64") == 0) ? 0 : 1;
 
-    g_free(serialpath);
     do {
         int readvalue = fgetc(serialfile);
 
@@ -274,10 +278,9 @@ static void check_guests_ram(QTestState *who)
 
 static void cleanup(const char *filename)
 {
-    char *path = g_strdup_printf("%s/%s", tmpfs, filename);
+    g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, filename);
 
     unlink(path);
-    g_free(path);
 }
 
 static char *SocketAddress_to_str(SocketAddress *addr)
@@ -374,11 +377,8 @@ static char *migrate_get_parameter_str(QTestState *who,
 static void migrate_check_parameter_str(QTestState *who, const char *parameter,
                                         const char *value)
 {
-    char *result;
-
-    result = migrate_get_parameter_str(who, parameter);
+    g_autofree char *result = migrate_get_parameter_str(who, parameter);
     g_assert_cmpstr(result, ==, value);
-    g_free(result);
 }
 
 static void migrate_set_parameter_str(QTestState *who, const char *parameter,
@@ -472,6 +472,8 @@ typedef struct {
     bool use_shmem;
     /* only launch the target process */
     bool only_target;
+    /* Use dirty ring if true; dirty logging otherwise */
+    bool use_dirty_ring;
     char *opts_source;
     char *opts_target;
 } MigrateStart;
@@ -495,12 +497,14 @@ static void migrate_start_destroy(MigrateStart *args)
 static int test_migrate_start(QTestState **from, QTestState **to,
                               const char *uri, MigrateStart *args)
 {
-    gchar *arch_source, *arch_target;
-    gchar *cmd_source, *cmd_target;
+    g_autofree gchar *arch_source = NULL;
+    g_autofree gchar *arch_target = NULL;
+    g_autofree gchar *cmd_source = NULL;
+    g_autofree gchar *cmd_target = NULL;
     const gchar *ignore_stderr;
-    char *bootpath = NULL;
-    char *shmem_opts;
-    char *shmem_path;
+    g_autofree char *bootpath = NULL;
+    g_autofree char *shmem_opts = NULL;
+    g_autofree char *shmem_path = NULL;
     const char *arch = qtest_get_arch();
     const char *machine_opts = NULL;
     const char *memory_size;
@@ -559,8 +563,6 @@ static int test_migrate_start(QTestState **from, QTestState **to,
         g_assert_not_reached();
     }
 
-    g_free(bootpath);
-
     if (!getenv("QTEST_LOG") && args->hide_stderr) {
         ignore_stderr = "2>/dev/null";
     } else {
@@ -578,45 +580,43 @@ static int test_migrate_start(QTestState **from, QTestState **to,
         shmem_opts = g_strdup("");
     }
 
-    cmd_source = g_strdup_printf("-accel kvm -accel tcg%s%s "
+    cmd_source = g_strdup_printf("-accel kvm%s -accel tcg%s%s "
                                  "-name source,debug-threads=on "
                                  "-m %s "
                                  "-serial file:%s/src_serial "
                                  "%s %s %s %s",
+                                 args->use_dirty_ring ?
+                                 ",dirty-ring-size=4096" : "",
                                  machine_opts ? " -machine " : "",
                                  machine_opts ? machine_opts : "",
                                  memory_size, tmpfs,
                                  arch_source, shmem_opts, args->opts_source,
                                  ignore_stderr);
-    g_free(arch_source);
     if (!args->only_target) {
         *from = qtest_init(cmd_source);
     }
-    g_free(cmd_source);
 
-    cmd_target = g_strdup_printf("-accel kvm -accel tcg%s%s "
+    cmd_target = g_strdup_printf("-accel kvm%s -accel tcg%s%s "
                                  "-name target,debug-threads=on "
                                  "-m %s "
                                  "-serial file:%s/dest_serial "
                                  "-incoming %s "
                                  "%s %s %s %s",
+                                 args->use_dirty_ring ?
+                                 ",dirty-ring-size=4096" : "",
                                  machine_opts ? " -machine " : "",
                                  machine_opts ? machine_opts : "",
                                  memory_size, tmpfs, uri,
                                  arch_target, shmem_opts,
                                  args->opts_target, ignore_stderr);
-    g_free(arch_target);
     *to = qtest_init(cmd_target);
-    g_free(cmd_target);
 
-    g_free(shmem_opts);
     /*
      * Remove shmem file immediately to avoid memory leak in test failed case.
      * It's valid becase QEMU has already opened this file
      */
     if (args->use_shmem) {
         unlink(shmem_path);
-        g_free(shmem_path);
     }
 
 out:
@@ -662,7 +662,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
                                     QTestState **to_ptr,
                                     MigrateStart *args)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
     QTestState *from, *to;
 
     if (test_migrate_start(&from, &to, uri, args)) {
@@ -684,7 +684,6 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
     wait_for_serial("src_serial");
 
     migrate_qmp(from, uri, "{}");
-    g_free(uri);
 
     wait_for_migration_pass(from);
 
@@ -724,7 +723,7 @@ static void test_postcopy_recovery(void)
 {
     MigrateStart *args = migrate_start_new();
     QTestState *from, *to;
-    char *uri;
+    g_autofree char *uri = NULL;
 
     args->hide_stderr = true;
 
@@ -775,7 +774,6 @@ static void test_postcopy_recovery(void)
                               (const char * []) { "failed", "active",
                                                   "completed", NULL });
     migrate_qmp(from, uri, "{'resume': true}");
-    g_free(uri);
 
     /* Restore the postcopy bandwidth to unlimited */
     migrate_set_parameter_int(from, "max-postcopy-bandwidth", 0);
@@ -790,20 +788,22 @@ static void test_baddest(void)
 
     args->hide_stderr = true;
 
-    if (test_migrate_start(&from, &to, "tcp:0:0", args)) {
+    if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) {
         return;
     }
-    migrate_qmp(from, "tcp:0:0", "{}");
+    migrate_qmp(from, "tcp:127.0.0.1:0", "{}");
     wait_for_migration_fail(from, false);
     test_migrate_end(from, to, false);
 }
 
-static void test_precopy_unix(void)
+static void test_precopy_unix_common(bool dirty_ring)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
     MigrateStart *args = migrate_start_new();
     QTestState *from, *to;
 
+    args->use_dirty_ring = dirty_ring;
+
     if (test_migrate_start(&from, &to, uri, args)) {
         return;
     }
@@ -836,14 +836,25 @@ static void test_precopy_unix(void)
     wait_for_migration_complete(from);
 
     test_migrate_end(from, to, true);
-    g_free(uri);
+}
+
+static void test_precopy_unix(void)
+{
+    /* Using default dirty logging */
+    test_precopy_unix_common(false);
+}
+
+static void test_precopy_unix_dirty_ring(void)
+{
+    /* Using dirty ring tracking */
+    test_precopy_unix_common(true);
 }
 
 #if 0
 /* Currently upset on aarch64 TCG */
 static void test_ignore_shared(void)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
     QTestState *from, *to;
 
     if (test_migrate_start(&from, &to, uri, false, true, NULL, NULL)) {
@@ -873,7 +884,6 @@ static void test_ignore_shared(void)
     g_assert_cmpint(read_ram_property_int(from, "transferred"), <, 1024 * 1024);
 
     test_migrate_end(from, to, true);
-    g_free(uri);
 }
 #endif
 
@@ -898,8 +908,8 @@ static void test_xbzrle(const char *uri)
 
     migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432);
 
-    migrate_set_capability(from, "xbzrle", "true");
-    migrate_set_capability(to, "xbzrle", "true");
+    migrate_set_capability(from, "xbzrle", true);
+    migrate_set_capability(to, "xbzrle", true);
     /* Wait for the first serial output from the source */
     wait_for_serial("src_serial");
 
@@ -925,16 +935,15 @@ static void test_xbzrle(const char *uri)
 
 static void test_xbzrle_unix(void)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
 
     test_xbzrle(uri);
-    g_free(uri);
 }
 
 static void test_precopy_tcp(void)
 {
     MigrateStart *args = migrate_start_new();
-    char *uri;
+    g_autofree char *uri = NULL;
     QTestState *from, *to;
 
     if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) {
@@ -971,7 +980,6 @@ static void test_precopy_tcp(void)
     wait_for_migration_complete(from);
 
     test_migrate_end(from, to, true);
-    g_free(uri);
 }
 
 static void test_migrate_fd_proto(void)
@@ -1060,7 +1068,7 @@ static void test_migrate_fd_proto(void)
 
 static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
     QTestState *from, *to;
 
     if (test_migrate_start(&from, &to, uri, args)) {
@@ -1088,7 +1096,6 @@ static void do_test_validate_uuid(MigrateStart *args, bool should_fail)
     }
 
     test_migrate_end(from, to, false);
-    g_free(uri);
 }
 
 static void test_validate_uuid(void)
@@ -1136,7 +1143,7 @@ static void test_validate_uuid_dst_not_set(void)
 
 static void test_migrate_auto_converge(void)
 {
-    char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
+    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
     MigrateStart *args = migrate_start_new();
     QTestState *from, *to;
     int64_t remaining, percentage;
@@ -1214,7 +1221,6 @@ static void test_migrate_auto_converge(void)
     wait_for_serial("dest_serial");
     wait_for_migration_complete(from);
 
-    g_free(uri);
 
     test_migrate_end(from, to, true);
 }
@@ -1224,7 +1230,7 @@ static void test_multifd_tcp(const char *method)
     MigrateStart *args = migrate_start_new();
     QTestState *from, *to;
     QDict *rsp;
-    char *uri;
+    g_autofree char *uri = NULL;
 
     if (test_migrate_start(&from, &to, "defer", args)) {
         return;
@@ -1246,8 +1252,8 @@ static void test_multifd_tcp(const char *method)
     migrate_set_parameter_str(from, "multifd-compression", method);
     migrate_set_parameter_str(to, "multifd-compression", method);
 
-    migrate_set_capability(from, "multifd", "true");
-    migrate_set_capability(to, "multifd", "true");
+    migrate_set_capability(from, "multifd", true);
+    migrate_set_capability(to, "multifd", true);
 
     /* Start incoming migration from the 1st socket */
     rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
@@ -1273,7 +1279,6 @@ static void test_multifd_tcp(const char *method)
     wait_for_serial("dest_serial");
     wait_for_migration_complete(from);
     test_migrate_end(from, to, true);
-    g_free(uri);
 }
 
 static void test_multifd_tcp_none(void)
@@ -1309,7 +1314,7 @@ static void test_multifd_tcp_cancel(void)
     MigrateStart *args = migrate_start_new();
     QTestState *from, *to, *to2;
     QDict *rsp;
-    char *uri;
+    g_autofree char *uri = NULL;
 
     args->hide_stderr = true;
 
@@ -1330,8 +1335,8 @@ static void test_multifd_tcp_cancel(void)
     migrate_set_parameter_int(from, "multifd-channels", 16);
     migrate_set_parameter_int(to, "multifd-channels", 16);
 
-    migrate_set_capability(from, "multifd", "true");
-    migrate_set_capability(to, "multifd", "true");
+    migrate_set_capability(from, "multifd", true);
+    migrate_set_capability(to, "multifd", true);
 
     /* Start incoming migration from the 1st socket */
     rsp = wait_command(to, "{ 'execute': 'migrate-incoming',"
@@ -1358,7 +1363,7 @@ static void test_multifd_tcp_cancel(void)
 
     migrate_set_parameter_int(to2, "multifd-channels", 16);
 
-    migrate_set_capability(to2, "multifd", "true");
+    migrate_set_capability(to2, "multifd", true);
 
     /* Start incoming migration from the 1st socket */
     rsp = wait_command(to2, "{ 'execute': 'migrate-incoming',"
@@ -1387,12 +1392,35 @@ static void test_multifd_tcp_cancel(void)
     wait_for_serial("dest_serial");
     wait_for_migration_complete(from);
     test_migrate_end(from, to2, true);
-    g_free(uri);
+}
+
+static bool kvm_dirty_ring_supported(void)
+{
+#if defined(__linux__) && defined(HOST_X86_64)
+    int ret, kvm_fd = open("/dev/kvm", O_RDONLY);
+
+    if (kvm_fd < 0) {
+        return false;
+    }
+
+    ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, KVM_CAP_DIRTY_LOG_RING);
+    close(kvm_fd);
+
+    /* We test with 4096 slots */
+    if (ret < 4096) {
+        return false;
+    }
+
+    return true;
+#else
+    return false;
+#endif
 }
 
 int main(int argc, char **argv)
 {
     char template[] = "/tmp/migration-test-XXXXXX";
+    const bool has_kvm = qtest_has_accel("kvm");
     int ret;
 
     g_test_init(&argc, &argv, NULL);
@@ -1407,8 +1435,7 @@ int main(int argc, char **argv)
      * some reason)
      */
     if (g_str_equal(qtest_get_arch(), "ppc64") &&
-        (access("/sys/module/kvm_hv", F_OK) ||
-         access("/dev/kvm", R_OK | W_OK))) {
+        (!has_kvm || access("/sys/module/kvm_hv", F_OK))) {
         g_test_message("Skipping test: kvm_hv not available");
         return g_test_run();
     }
@@ -1417,16 +1444,9 @@ int main(int argc, char **argv)
      * Similar to ppc64, s390x seems to be touchy with TCG, so disable it
      * there until the problems are resolved
      */
-    if (g_str_equal(qtest_get_arch(), "s390x")) {
-#if defined(HOST_S390X)
-        if (access("/dev/kvm", R_OK | W_OK)) {
-            g_test_message("Skipping test: kvm not available");
-            return g_test_run();
-        }
-#else
-        g_test_message("Skipping test: Need s390x host to work properly");
+    if (g_str_equal(qtest_get_arch(), "s390x") && !has_kvm) {
+        g_test_message("Skipping test: s390x host with KVM is required");
         return g_test_run();
-#endif
     }
 
     tmpfs = mkdtemp(template);
@@ -1460,6 +1480,11 @@ int main(int argc, char **argv)
     qtest_add_func("/migration/multifd/tcp/zstd", test_multifd_tcp_zstd);
 #endif
 
+    if (kvm_dirty_ring_supported()) {
+        qtest_add_func("/migration/dirty_ring",
+                       test_precopy_unix_dirty_ring);
+    }
+
     ret = g_test_run();
 
     g_assert_cmpint(ret, ==, 0);
diff --git a/tests/qtest/npcm7xx_pwm-test.c b/tests/qtest/npcm7xx_pwm-test.c
index bd15a1c294b..a54fd70d273 100644
--- a/tests/qtest/npcm7xx_pwm-test.c
+++ b/tests/qtest/npcm7xx_pwm-test.c
@@ -201,7 +201,7 @@ static int pwm_module_index(const PWMModule *module)
 {
     ptrdiff_t diff = module - pwm_module_list;
 
-    g_assert_true(diff >= 0 && diff < ARRAY_SIZE(pwm_module_list));
+    g_assert(diff >= 0 && diff < ARRAY_SIZE(pwm_module_list));
 
     return diff;
 }
@@ -211,7 +211,7 @@ static int pwm_index(const PWM *pwm)
 {
     ptrdiff_t diff = pwm - pwm_list;
 
-    g_assert_true(diff >= 0 && diff < ARRAY_SIZE(pwm_list));
+    g_assert(diff >= 0 && diff < ARRAY_SIZE(pwm_list));
 
     return diff;
 }
diff --git a/tests/qtest/npcm7xx_smbus-test.c b/tests/qtest/npcm7xx_smbus-test.c
index 4f9f493872a..6b3038ac596 100644
--- a/tests/qtest/npcm7xx_smbus-test.c
+++ b/tests/qtest/npcm7xx_smbus-test.c
@@ -18,7 +18,7 @@
 #include "qemu/bitops.h"
 #include "libqos/i2c.h"
 #include "libqos/libqtest.h"
-#include "hw/misc/tmp105_regs.h"
+#include "hw/sensor/tmp105_regs.h"
 
 #define NR_SMBUS_DEVICES    16
 #define SMBUS_ADDR(x)       (0xf0080000 + 0x1000 * (x))
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index dc0ec571ca2..90bf68a5b33 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -25,7 +25,7 @@ static void test_mon_explicit(const void *data)
     g_autofree char *s = NULL;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 8 -numa node,nodeid=0,memdev=ram,cpus=0-3 "
+    cli = make_cli(data, "-machine smp.cpus=8 -numa node,nodeid=0,memdev=ram,cpus=0-3 "
                          "-numa node,nodeid=1,cpus=4-7");
     qts = qtest_init(cli);
 
@@ -42,7 +42,8 @@ static void test_def_cpu_split(const void *data)
     g_autofree char *s = NULL;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 8 -numa node,memdev=ram -numa node");
+    cli = make_cli(data, "-machine smp.cpus=8,smp.sockets=8 "
+                         "-numa node,memdev=ram -numa node");
     qts = qtest_init(cli);
 
     s = qtest_hmp(qts, "info numa");
@@ -58,7 +59,7 @@ static void test_mon_partial(const void *data)
     g_autofree char *s = NULL;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 8 "
+    cli = make_cli(data, "-machine smp.cpus=8 "
                    "-numa node,nodeid=0,memdev=ram,cpus=0-1 "
                    "-numa node,nodeid=1,cpus=4-5 ");
     qts = qtest_init(cli);
@@ -86,7 +87,7 @@ static void test_query_cpus(const void *data)
     QTestState *qts;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 8 -numa node,memdev=ram,cpus=0-3 "
+    cli = make_cli(data, "-machine smp.cpus=8 -numa node,memdev=ram,cpus=0-3 "
                          "-numa node,cpus=4-7");
     qts = qtest_init(cli);
     cpus = get_cpus(qts, &resp);
@@ -124,7 +125,7 @@ static void pc_numa_cpu(const void *data)
     QTestState *qts;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-cpu pentium -smp 8,sockets=2,cores=2,threads=2 "
+    cli = make_cli(data, "-cpu pentium -machine smp.cpus=8,smp.sockets=2,smp.cores=2,smp.threads=2 "
         "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
         "-numa cpu,node-id=1,socket-id=0 "
         "-numa cpu,node-id=0,socket-id=1,core-id=0 "
@@ -177,7 +178,7 @@ static void spapr_numa_cpu(const void *data)
     QTestState *qts;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 4,cores=4 "
+    cli = make_cli(data, "-machine smp.cpus=4,smp.cores=4 "
         "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
         "-numa cpu,node-id=0,core-id=0 "
         "-numa cpu,node-id=0,core-id=1 "
@@ -222,7 +223,7 @@ static void aarch64_numa_cpu(const void *data)
     QTestState *qts;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-smp 2 "
+    cli = make_cli(data, "-machine smp.cpus=2 "
         "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
         "-numa cpu,node-id=1,thread-id=0 "
         "-numa cpu,node-id=0,thread-id=1");
@@ -265,7 +266,8 @@ static void pc_dynamic_cpu_cfg(const void *data)
     QTestState *qs;
     g_autofree char *cli = NULL;
 
-    cli = make_cli(data, "-nodefaults --preconfig -smp 2");
+    cli = make_cli(data, "-nodefaults --preconfig "
+                         "-machine smp.cpus=2,smp.sockets=2");
     qs = qtest_init(cli);
 
     /* create 2 numa nodes */
@@ -324,7 +326,7 @@ static void pc_hmat_build_cfg(const void *data)
     g_autofree char *cli = NULL;
 
     cli = make_cli(data, "-nodefaults --preconfig -machine hmat=on "
-                         "-smp 2,sockets=2 "
+                         "-machine smp.cpus=2,smp.sockets=2 "
                          "-m 128M,slots=2,maxmem=1G "
                          "-object memory-backend-ram,size=64M,id=m0 "
                          "-object memory-backend-ram,size=64M,id=m1 "
@@ -453,7 +455,7 @@ static void pc_hmat_off_cfg(const void *data)
     g_autofree char *cli = NULL;
 
     cli = make_cli(data, "-nodefaults --preconfig "
-                         "-smp 2,sockets=2 "
+                         "-machine smp.cpus=2,smp.sockets=2 "
                          "-m 128M,slots=2,maxmem=1G "
                          "-object memory-backend-ram,size=64M,id=m0,prealloc=y "
                          "-object memory-backend-ram,size=64M,id=m1 "
@@ -492,7 +494,7 @@ static void pc_hmat_erange_cfg(const void *data)
     g_autofree char *cli = NULL;
 
     cli = make_cli(data, "-nodefaults --preconfig -machine hmat=on "
-                         "-smp 2,sockets=2 "
+                         "-machine smp.cpus=2,smp.sockets=2 "
                          "-m 128M,slots=2,maxmem=1G "
                          "-object memory-backend-ram,size=64M,id=m0 "
                          "-object memory-backend-ram,size=64M,id=m1 "
diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c
index d32c953a382..f8bafb5d70f 100644
--- a/tests/qtest/nvme-test.c
+++ b/tests/qtest/nvme-test.c
@@ -13,6 +13,7 @@
 #include "libqos/libqtest.h"
 #include "libqos/qgraph.h"
 #include "libqos/pci.h"
+#include "include/block/nvme.h"
 
 typedef struct QNvme QNvme;
 
@@ -66,12 +67,89 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc)
     g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211);
 }
 
+static void nvmetest_reg_read_test(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QNvme *nvme = obj;
+    QPCIDevice *pdev = &nvme->dev;
+    QPCIBar bar;
+    uint32_t cap_lo, cap_hi;
+    uint64_t cap;
+
+    qpci_device_enable(pdev);
+    bar = qpci_iomap(pdev, 0, NULL);
+
+    cap_lo = qpci_io_readl(pdev, bar, 0x0);
+    g_assert_cmpint(NVME_CAP_MQES(cap_lo), ==, 0x7ff);
+
+    cap_hi = qpci_io_readl(pdev, bar, 0x4);
+    g_assert_cmpint(NVME_CAP_MPSMAX((uint64_t)cap_hi << 32), ==, 0x4);
+
+    cap = qpci_io_readq(pdev, bar, 0x0);
+    g_assert_cmpint(NVME_CAP_MQES(cap), ==, 0x7ff);
+    g_assert_cmpint(NVME_CAP_MPSMAX(cap), ==, 0x4);
+
+    qpci_iounmap(pdev, bar);
+}
+
+static void nvmetest_pmr_reg_test(void *obj, void *data, QGuestAllocator *alloc)
+{
+    QNvme *nvme = obj;
+    QPCIDevice *pdev = &nvme->dev;
+    QPCIBar pmr_bar, nvme_bar;
+    uint32_t pmrcap, pmrsts;
+
+    qpci_device_enable(pdev);
+    pmr_bar = qpci_iomap(pdev, 4, NULL);
+
+    /* Without Enabling PMRCTL check bar enablemet */
+    qpci_io_writel(pdev, pmr_bar, 0, 0xccbbaa99);
+    g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x99);
+    g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0xaa99);
+
+    /* Map NVMe Bar Register to Enable the Mem Region */
+    nvme_bar = qpci_iomap(pdev, 0, NULL);
+
+    pmrcap = qpci_io_readl(pdev, nvme_bar, 0xe00);
+    g_assert_cmpint(NVME_PMRCAP_RDS(pmrcap), ==, 0x1);
+    g_assert_cmpint(NVME_PMRCAP_WDS(pmrcap), ==, 0x1);
+    g_assert_cmpint(NVME_PMRCAP_BIR(pmrcap), ==, 0x4);
+    g_assert_cmpint(NVME_PMRCAP_PMRWBM(pmrcap), ==, 0x2);
+    g_assert_cmpint(NVME_PMRCAP_CMSS(pmrcap), ==, 0x1);
+
+    /* Enable PMRCTRL */
+    qpci_io_writel(pdev, nvme_bar, 0xe04, 0x1);
+
+    qpci_io_writel(pdev, pmr_bar, 0, 0x44332211);
+    g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), ==, 0x11);
+    g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), ==, 0x2211);
+    g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), ==, 0x44332211);
+
+    pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08);
+    g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x0);
+
+    /* Disable PMRCTRL */
+    qpci_io_writel(pdev, nvme_bar, 0xe04, 0x0);
+
+    qpci_io_writel(pdev, pmr_bar, 0, 0x88776655);
+    g_assert_cmpint(qpci_io_readb(pdev, pmr_bar, 0), !=, 0x55);
+    g_assert_cmpint(qpci_io_readw(pdev, pmr_bar, 0), !=, 0x6655);
+    g_assert_cmpint(qpci_io_readl(pdev, pmr_bar, 0), !=, 0x88776655);
+
+    pmrsts = qpci_io_readl(pdev, nvme_bar, 0xe08);
+    g_assert_cmpint(NVME_PMRSTS_NRDY(pmrsts), ==, 0x1);
+
+    qpci_iounmap(pdev, nvme_bar);
+    qpci_iounmap(pdev, pmr_bar);
+}
+
 static void nvme_register_nodes(void)
 {
     QOSGraphEdgeOptions opts = {
         .extra_device_opts = "addr=04.0,drive=drv0,serial=foo",
         .before_cmd_line = "-drive id=drv0,if=none,file=null-co://,"
-                           "file.read-zeroes=on,format=raw",
+                           "file.read-zeroes=on,format=raw "
+                           "-object memory-backend-ram,id=pmr0,"
+                           "share=on,size=8",
     };
 
     add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) });
@@ -83,6 +161,13 @@ static void nvme_register_nodes(void)
     qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) {
         .edge.extra_device_opts = "cmb_size_mb=2"
     });
+
+    qos_add_test("pmr-test-access", "nvme", nvmetest_pmr_reg_test,
+                 &(QOSGraphTestOptions) {
+        .edge.extra_device_opts = "pmrdev=pmr0"
+    });
+
+    qos_add_test("reg-read", "nvme", nvmetest_reg_read_test, NULL);
 }
 
 libqos_init(nvme_register_nodes);
diff --git a/tests/qtest/pflash-cfi02-test.c b/tests/qtest/pflash-cfi02-test.c
index 60db81a3a2b..6168edc821a 100644
--- a/tests/qtest/pflash-cfi02-test.c
+++ b/tests/qtest/pflash-cfi02-test.c
@@ -406,7 +406,7 @@ static void test_geometry(const void *opaque)
 
     for (int region = 0; region < nb_erase_regions; ++region) {
         for (uint32_t i = 0; i < c->nb_blocs[region]; ++i) {
-            uint64_t byte_addr = i * c->sector_len[region];
+            uint64_t byte_addr = (uint64_t)i * c->sector_len[region];
             g_assert_cmphex(flash_read(c, byte_addr), ==, bank_mask(c));
         }
     }
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index c98b78d0339..7f103ea3fd2 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -46,6 +46,14 @@ static int query_error_class(const char *cmd)
         { "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
         { "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
         { "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
+#ifndef CONFIG_PROFILER
+        { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
+#endif
+        /* Only valid with a USB bus added */
+        { "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
+        /* Only valid with accel=tcg */
+        { "x-query-jit", ERROR_CLASS_GENERIC_ERROR },
+        { "x-query-opcount", ERROR_CLASS_GENERIC_ERROR },
         { NULL, -1 }
     };
     int i;
@@ -100,6 +108,8 @@ static bool query_is_ignored(const char *cmd)
         /* Success depends on Host or Hypervisor SEV support */
         "query-sev",
         "query-sev-capabilities",
+        "query-sgx",
+        "query-sgx-capabilities",
         NULL
     };
     int i;
diff --git a/tests/qtest/rtas-test.c b/tests/qtest/rtas-test.c
index 16751dbd2f5..5f1194a6eb5 100644
--- a/tests/qtest/rtas-test.c
+++ b/tests/qtest/rtas-test.c
@@ -5,7 +5,7 @@
 #include "libqos/libqos-spapr.h"
 #include "libqos/rtas.h"
 
-static void test_rtas_get_time_of_day(void)
+static void run_test_rtas_get_time_of_day(const char *machine)
 {
     QOSState *qs;
     struct tm tm;
@@ -13,7 +13,7 @@ static void test_rtas_get_time_of_day(void)
     uint64_t ret;
     time_t t1, t2;
 
-    qs = qtest_spapr_boot("-machine pseries");
+    qs = qtest_spapr_boot(machine);
 
     t1 = time(NULL);
     ret = qrtas_get_time_of_day(qs->qts, &qs->alloc, &tm, &ns);
@@ -24,6 +24,16 @@ static void test_rtas_get_time_of_day(void)
     qtest_shutdown(qs);
 }
 
+static void test_rtas_get_time_of_day(void)
+{
+    run_test_rtas_get_time_of_day("-machine pseries");
+}
+
+static void test_rtas_get_time_of_day_vof(void)
+{
+    run_test_rtas_get_time_of_day("-machine pseries,x-vof=on");
+}
+
 int main(int argc, char *argv[])
 {
     const char *arch = qtest_get_arch();
@@ -35,6 +45,7 @@ int main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
     qtest_add_func("rtas/get-time-of-day", test_rtas_get_time_of_day);
+    qtest_add_func("rtas/get-time-of-day-vof", test_rtas_get_time_of_day_vof);
 
     return g_test_run();
 }
diff --git a/tests/qtest/rtc-test.c b/tests/qtest/rtc-test.c
index 402ce2c6090..8126ab1bdb8 100644
--- a/tests/qtest/rtc-test.c
+++ b/tests/qtest/rtc-test.c
@@ -686,7 +686,7 @@ static void periodic_timer(void)
 
 int main(int argc, char **argv)
 {
-    QTestState *s = NULL;
+    QTestState *s;
     int ret;
 
     g_test_init(&argc, &argv, NULL);
@@ -712,9 +712,7 @@ int main(int argc, char **argv)
 
     ret = g_test_run();
 
-    if (s) {
-        qtest_quit(s);
-    }
+    qtest_quit(s);
 
     return ret;
 }
diff --git a/tests/qtest/tmp105-test.c b/tests/qtest/tmp105-test.c
index f930a96b83e..3678646df52 100644
--- a/tests/qtest/tmp105-test.c
+++ b/tests/qtest/tmp105-test.c
@@ -13,7 +13,7 @@
 #include "libqos/qgraph.h"
 #include "libqos/i2c.h"
 #include "qapi/qmp/qdict.h"
-#include "hw/misc/tmp105_regs.h"
+#include "hw/sensor/tmp105_regs.h"
 
 #define TMP105_TEST_ID   "tmp105-test"
 #define TMP105_TEST_ADDR 0x49
diff --git a/tests/qtest/tpm-crb-test.c b/tests/qtest/tpm-crb-test.c
index ed533900d18..7b94453390e 100644
--- a/tests/qtest/tpm-crb-test.c
+++ b/tests/qtest/tpm-crb-test.c
@@ -26,7 +26,7 @@ uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE;
 
 static void tpm_crb_test(const void *data)
 {
-    const TestState *s = data;
+    const TPMTestState *s = data;
     uint32_t intfid = readl(TPM_CRB_ADDR_BASE + A_CRB_INTF_ID);
     uint32_t csize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_SIZE);
     uint64_t caddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
@@ -145,7 +145,7 @@ int main(int argc, char **argv)
     int ret;
     char *args, *tmp_path = g_dir_make_tmp("qemu-tpm-crb-test.XXXXXX", NULL);
     GThread *thread;
-    TestState test;
+    TPMTestState test;
 
     module_call_init(MODULE_INIT_QOM);
     g_test_init(&argc, &argv, NULL);
@@ -156,6 +156,7 @@ int main(int argc, char **argv)
     g_mutex_init(&test.data_mutex);
     g_cond_init(&test.data_cond);
     test.data_cond_signal = false;
+    test.tpm_version = TPM_VERSION_2_0;
 
     thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test);
     tpm_emu_test_wait_cond(&test);
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index 2e8eb7b94f2..2994d1cf423 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -16,9 +16,11 @@
 #include "backends/tpm/tpm_ioctl.h"
 #include "io/channel-socket.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qstring.h"
 #include "tpm-emu.h"
 
-void tpm_emu_test_wait_cond(TestState *s)
+void tpm_emu_test_wait_cond(TPMTestState *s)
 {
     gint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
 
@@ -36,7 +38,7 @@ void tpm_emu_test_wait_cond(TestState *s)
 
 static void *tpm_emu_tpm_thread(void *data)
 {
-    TestState *s = data;
+    TPMTestState *s = data;
     QIOChannel *ioc = s->tpm_ioc;
 
     s->tpm_msg = g_new(struct tpm_hdr, 1);
@@ -56,9 +58,21 @@ static void *tpm_emu_tpm_thread(void *data)
         s->tpm_msg->code = be32_to_cpu(s->tpm_msg->code);
 
         /* reply error */
-        s->tpm_msg->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-        s->tpm_msg->len = cpu_to_be32(sizeof(struct tpm_hdr));
-        s->tpm_msg->code = cpu_to_be32(TPM_RC_FAILURE);
+        switch (s->tpm_version) {
+        case TPM_VERSION_2_0:
+            s->tpm_msg->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+            s->tpm_msg->len = cpu_to_be32(sizeof(struct tpm_hdr));
+            s->tpm_msg->code = cpu_to_be32(TPM_RC_FAILURE);
+            break;
+        case TPM_VERSION_1_2:
+            s->tpm_msg->tag = cpu_to_be16(TPM_TAG_RSP_COMMAND);
+            s->tpm_msg->len = cpu_to_be32(sizeof(struct tpm_hdr));
+            s->tpm_msg->code = cpu_to_be32(TPM_FAIL);
+            break;
+        default:
+            g_debug("unsupport TPM version %u", s->tpm_version);
+            g_assert_not_reached();
+        }
         qio_channel_write(ioc, (char *)s->tpm_msg, be32_to_cpu(s->tpm_msg->len),
                           &error_abort);
     }
@@ -71,7 +85,7 @@ static void *tpm_emu_tpm_thread(void *data)
 
 void *tpm_emu_ctrl_thread(void *data)
 {
-    TestState *s = data;
+    TPMTestState *s = data;
     QIOChannelSocket *lioc = qio_channel_socket_new();
     QIOChannel *ioc;
 
@@ -180,3 +194,39 @@ void *tpm_emu_ctrl_thread(void *data)
     object_unref(OBJECT(lioc));
     return NULL;
 }
+
+bool tpm_model_is_available(const char *args, const char *tpm_if)
+{
+    QTestState *qts;
+    QDict *rsp_tpm;
+    bool ret = false;
+
+    qts = qtest_init(args);
+    if (!qts) {
+        return false;
+    }
+
+    rsp_tpm = qtest_qmp(qts, "{ 'execute': 'query-tpm'}");
+    if (!qdict_haskey(rsp_tpm, "error")) {
+        QDict *rsp_models = qtest_qmp(qts,
+                                      "{ 'execute': 'query-tpm-models'}");
+        if (qdict_haskey(rsp_models, "return")) {
+            QList *models = qdict_get_qlist(rsp_models, "return");
+            QListEntry *e;
+
+            QLIST_FOREACH_ENTRY(models, e) {
+                QString *s = qobject_to(QString, qlist_entry_obj(e));
+                const char *ename = qstring_get_str(s);
+                if (!strcmp(ename, tpm_if)) {
+                    ret = true;
+                    break;
+                }
+            }
+        }
+        qobject_unref(rsp_models);
+    }
+    qobject_unref(rsp_tpm);
+    qtest_quit(qts);
+
+    return ret;
+}
diff --git a/tests/qtest/tpm-emu.h b/tests/qtest/tpm-emu.h
index 73f3bed0c4b..c33d99af374 100644
--- a/tests/qtest/tpm-emu.h
+++ b/tests/qtest/tpm-emu.h
@@ -16,8 +16,13 @@
 #define TPM_RC_FAILURE 0x101
 #define TPM2_ST_NO_SESSIONS 0x8001
 
+#define TPM_FAIL 9
+#define TPM_TAG_RSP_COMMAND 0xc4
+
 #include "qemu/sockets.h"
 #include "io/channel.h"
+#include "sysemu/tpm.h"
+#include "libqos/libqtest.h"
 
 struct tpm_hdr {
     uint16_t tag;
@@ -26,7 +31,14 @@ struct tpm_hdr {
     char buffer[];
 } QEMU_PACKED;
 
-typedef struct TestState {
+#ifndef CONFIG_TPM
+enum TPMVersion {
+    TPM_VERSION_1_2 = 1,
+    TPM_VERSION_2_0 = 2,
+};
+#endif
+
+typedef struct TPMTestState {
     GMutex data_mutex;
     GCond data_cond;
     bool data_cond_signal;
@@ -34,9 +46,11 @@ typedef struct TestState {
     QIOChannel *tpm_ioc;
     GThread *emu_tpm_thread;
     struct tpm_hdr *tpm_msg;
-} TestState;
+    enum TPMVersion tpm_version;
+} TPMTestState;
 
-void tpm_emu_test_wait_cond(TestState *s);
+void tpm_emu_test_wait_cond(TPMTestState *s);
 void *tpm_emu_ctrl_thread(void *data);
+bool tpm_model_is_available(const char *args, const char *tpm_if);
 
 #endif /* TESTS_TPM_EMU_H */
diff --git a/tests/qtest/tpm-tests.c b/tests/qtest/tpm-tests.c
index 0da3a8a4df5..25073d1f9e9 100644
--- a/tests/qtest/tpm-tests.c
+++ b/tests/qtest/tpm-tests.c
@@ -123,14 +123,10 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path,
     qtest_quit(src_qemu);
 
     tpm_util_swtpm_kill(dst_tpm_pid);
-    if (dst_tpm_addr) {
-        g_unlink(dst_tpm_addr->u.q_unix.path);
-        qapi_free_SocketAddress(dst_tpm_addr);
-    }
+    g_unlink(dst_tpm_addr->u.q_unix.path);
+    qapi_free_SocketAddress(dst_tpm_addr);
 
     tpm_util_swtpm_kill(src_tpm_pid);
-    if (src_tpm_addr) {
-        g_unlink(src_tpm_addr->u.q_unix.path);
-        qapi_free_SocketAddress(src_tpm_addr);
-    }
+    g_unlink(src_tpm_addr->u.q_unix.path);
+    qapi_free_SocketAddress(src_tpm_addr);
 }
diff --git a/tests/qtest/tpm-tis-device-test.c b/tests/qtest/tpm-tis-device-test.c
index 63ed36440fd..3ddefb51ecf 100644
--- a/tests/qtest/tpm-tis-device-test.c
+++ b/tests/qtest/tpm-tis-device-test.c
@@ -33,7 +33,7 @@ int main(int argc, char **argv)
 {
     char *tmp_path = g_dir_make_tmp("qemu-tpm-tis-device-test.XXXXXX", NULL);
     GThread *thread;
-    TestState test;
+    TPMTestState test;
     char *args;
     int ret;
 
@@ -46,6 +46,7 @@ int main(int argc, char **argv)
     g_mutex_init(&test.data_mutex);
     g_cond_init(&test.data_cond);
     test.data_cond_signal = false;
+    test.tpm_version = TPM_VERSION_2_0;
 
     thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test);
     tpm_emu_test_wait_cond(&test);
diff --git a/tests/qtest/tpm-tis-test.c b/tests/qtest/tpm-tis-test.c
index 79ffbc943e5..a4a25ba7452 100644
--- a/tests/qtest/tpm-tis-test.c
+++ b/tests/qtest/tpm-tis-test.c
@@ -29,7 +29,7 @@ int main(int argc, char **argv)
     int ret;
     char *args, *tmp_path = g_dir_make_tmp("qemu-tpm-tis-test.XXXXXX", NULL);
     GThread *thread;
-    TestState test;
+    TPMTestState test;
 
     module_call_init(MODULE_INIT_QOM);
     g_test_init(&argc, &argv, NULL);
@@ -40,6 +40,7 @@ int main(int argc, char **argv)
     g_mutex_init(&test.data_mutex);
     g_cond_init(&test.data_cond);
     test.data_cond_signal = false;
+    test.tpm_version = TPM_VERSION_2_0;
 
     thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test);
     tpm_emu_test_wait_cond(&test);
diff --git a/tests/qtest/tpm-tis-util.c b/tests/qtest/tpm-tis-util.c
index 9aff503fd81..939893bf014 100644
--- a/tests/qtest/tpm-tis-util.c
+++ b/tests/qtest/tpm-tis-util.c
@@ -373,7 +373,7 @@ void tpm_tis_test_check_access_reg_release(const void *data)
  */
 void tpm_tis_test_check_transmit(const void *data)
 {
-    const TestState *s = data;
+    const TPMTestState *s = data;
     uint8_t access;
     uint32_t sts;
     uint16_t bcount;
diff --git a/tests/qtest/tpm-util.c b/tests/qtest/tpm-util.c
index b70cc32d600..3a40ff3f96c 100644
--- a/tests/qtest/tpm-util.c
+++ b/tests/qtest/tpm-util.c
@@ -289,6 +289,6 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu,
 
     *dst_qemu = qtest_init(dst_qemu_args);
 
-    free(src_qemu_args);
-    free(dst_qemu_args);
+    g_free(src_qemu_args);
+    g_free(dst_qemu_args);
 }
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
new file mode 100644
index 00000000000..62e670f39be
--- /dev/null
+++ b/tests/qtest/vhost-user-blk-test.c
@@ -0,0 +1,997 @@
+/*
+ * QTest testcase for Vhost-user Block Device
+ *
+ * Based on tests/qtest//virtio-blk-test.c
+
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2014 Marc Marí
+ * Copyright (c) 2020 Coiby Xu
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest-single.h"
+#include "qemu/bswap.h"
+#include "qemu/module.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "standard-headers/linux/virtio_pci.h"
+#include "libqos/qgraph.h"
+#include "libqos/vhost-user-blk.h"
+#include "libqos/libqos-pc.h"
+
+#define TEST_IMAGE_SIZE         (64 * 1024 * 1024)
+#define QVIRTIO_BLK_TIMEOUT_US  (30 * 1000 * 1000)
+#define PCI_SLOT_HP             0x06
+
+typedef struct {
+    pid_t pid;
+} QemuStorageDaemonState;
+
+typedef struct QVirtioBlkReq {
+    uint32_t type;
+    uint32_t ioprio;
+    uint64_t sector;
+    char *data;
+    uint8_t status;
+} QVirtioBlkReq;
+
+#ifdef HOST_WORDS_BIGENDIAN
+static const bool host_is_big_endian = true;
+#else
+static const bool host_is_big_endian; /* false */
+#endif
+
+static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
+{
+    if (qvirtio_is_big_endian(d) != host_is_big_endian) {
+        req->type = bswap32(req->type);
+        req->ioprio = bswap32(req->ioprio);
+        req->sector = bswap64(req->sector);
+    }
+}
+
+static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d,
+    struct virtio_blk_discard_write_zeroes *dwz_hdr)
+{
+    if (qvirtio_is_big_endian(d) != host_is_big_endian) {
+        dwz_hdr->sector = bswap64(dwz_hdr->sector);
+        dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors);
+        dwz_hdr->flags = bswap32(dwz_hdr->flags);
+    }
+}
+
+static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d,
+                                   QVirtioBlkReq *req, uint64_t data_size)
+{
+    uint64_t addr;
+    uint8_t status = 0xFF;
+    QTestState *qts = global_qtest;
+
+    switch (req->type) {
+    case VIRTIO_BLK_T_IN:
+    case VIRTIO_BLK_T_OUT:
+        g_assert_cmpuint(data_size % 512, ==, 0);
+        break;
+    case VIRTIO_BLK_T_DISCARD:
+    case VIRTIO_BLK_T_WRITE_ZEROES:
+        g_assert_cmpuint(data_size %
+                         sizeof(struct virtio_blk_discard_write_zeroes), ==, 0);
+        break;
+    default:
+        g_assert_cmpuint(data_size, ==, 0);
+    }
+
+    addr = guest_alloc(alloc, sizeof(*req) + data_size);
+
+    virtio_blk_fix_request(d, req);
+
+    qtest_memwrite(qts, addr, req, 16);
+    qtest_memwrite(qts, addr + 16, req->data, data_size);
+    qtest_memwrite(qts, addr + 16 + data_size, &status, sizeof(status));
+
+    return addr;
+}
+
+static void test_invalid_discard_write_zeroes(QVirtioDevice *dev,
+                                              QGuestAllocator *alloc,
+                                              QTestState *qts,
+                                              QVirtQueue *vq,
+                                              uint32_t type)
+{
+    QVirtioBlkReq req;
+    struct virtio_blk_discard_write_zeroes dwz_hdr;
+    struct virtio_blk_discard_write_zeroes dwz_hdr2[2];
+    uint64_t req_addr;
+    uint32_t free_head;
+    uint8_t status;
+
+    /* More than one dwz is not supported */
+    req.type = type;
+    req.data = (char *) dwz_hdr2;
+    dwz_hdr2[0].sector = 0;
+    dwz_hdr2[0].num_sectors = 1;
+    dwz_hdr2[0].flags = 0;
+    dwz_hdr2[1].sector = 1;
+    dwz_hdr2[1].num_sectors = 1;
+    dwz_hdr2[1].flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr2[0]);
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr2[1]);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr2));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr2), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr2), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr2));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_UNSUPP);
+
+    guest_free(alloc, req_addr);
+
+    /* num_sectors must be less than config->max_write_zeroes_sectors */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = 0;
+    dwz_hdr.num_sectors = 0xffffffff;
+    dwz_hdr.flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_IOERR);
+
+    guest_free(alloc, req_addr);
+
+    /* sector must be less than the device capacity */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = TEST_IMAGE_SIZE / 512 + 1;
+    dwz_hdr.num_sectors = 1;
+    dwz_hdr.flags = 0;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_IOERR);
+
+    guest_free(alloc, req_addr);
+
+    /* reserved flag bits must be zero */
+    req.type = type;
+    req.data = (char *) &dwz_hdr;
+    dwz_hdr.sector = 0;
+    dwz_hdr.num_sectors = 1;
+    dwz_hdr.flags = ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
+
+    virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                   false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 16 + sizeof(dwz_hdr));
+    g_assert_cmpint(status, ==, VIRTIO_BLK_S_UNSUPP);
+
+    guest_free(alloc, req_addr);
+}
+
+/* Returns the request virtqueue so the caller can perform further tests */
+static QVirtQueue *test_basic(QVirtioDevice *dev, QGuestAllocator *alloc)
+{
+    QVirtioBlkReq req;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+    QTestState *qts = global_qtest;
+    QVirtQueue *vq;
+
+    features = qvirtio_get_features(dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                    (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                    (1u << VIRTIO_RING_F_EVENT_IDX) |
+                    (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, alloc, 0);
+
+    qvirtio_set_driver_ok(dev);
+
+    /* Write and read with 3 descriptor layout */
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
+        struct virtio_blk_discard_write_zeroes dwz_hdr;
+        void *expected;
+
+        /*
+         * WRITE_ZEROES request on the same sector of previous test where
+         * we wrote "TEST".
+         */
+        req.type = VIRTIO_BLK_T_WRITE_ZEROES;
+        req.data = (char *) &dwz_hdr;
+        dwz_hdr.sector = 0;
+        dwz_hdr.num_sectors = 1;
+        dwz_hdr.flags = 0;
+
+        virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
+                       false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 16 + sizeof(dwz_hdr));
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request to check if the sector contains all zeroes */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 0;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+        qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc(512);
+        expected = g_malloc0(512);
+        qtest_memread(qts, req_addr + 16, data, 512);
+        g_assert_cmpmem(data, 512, expected, 512);
+        g_free(expected);
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+
+        test_invalid_discard_write_zeroes(dev, alloc, qts, vq,
+                                          VIRTIO_BLK_T_WRITE_ZEROES);
+    }
+
+    if (features & (1u << VIRTIO_BLK_F_DISCARD)) {
+        struct virtio_blk_discard_write_zeroes dwz_hdr;
+
+        req.type = VIRTIO_BLK_T_DISCARD;
+        req.data = (char *) &dwz_hdr;
+        dwz_hdr.sector = 0;
+        dwz_hdr.num_sectors = 1;
+        dwz_hdr.flags = 0;
+
+        virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr),
+                       1, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 16 + sizeof(dwz_hdr));
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        test_invalid_discard_write_zeroes(dev, alloc, qts, vq,
+                                          VIRTIO_BLK_T_DISCARD);
+    }
+
+    if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
+        /* Write and read with 2 descriptor layout */
+        /* Write request */
+        req.type = VIRTIO_BLK_T_OUT;
+        req.ioprio = 1;
+        req.sector = 1;
+        req.data = g_malloc0(512);
+        strcpy(req.data, "TEST");
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 528, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        guest_free(alloc, req_addr);
+
+        /* Read request */
+        req.type = VIRTIO_BLK_T_IN;
+        req.ioprio = 1;
+        req.sector = 1;
+        req.data = g_malloc0(512);
+
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
+
+        g_free(req.data);
+
+        free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+        qvirtqueue_add(qts, vq, req_addr + 16, 513, true, false);
+
+        qvirtqueue_kick(qts, dev, vq, free_head);
+
+        qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                               QVIRTIO_BLK_TIMEOUT_US);
+        status = readb(req_addr + 528);
+        g_assert_cmpint(status, ==, 0);
+
+        data = g_malloc0(512);
+        qtest_memread(qts, req_addr + 16, data, 512);
+        g_assert_cmpstr(data, ==, "TEST");
+        g_free(data);
+
+        guest_free(alloc, req_addr);
+    }
+
+    return vq;
+}
+
+static void basic(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVhostUserBlk *blk_if = obj;
+    QVirtQueue *vq;
+
+    vq = test_basic(blk_if->vdev, t_alloc);
+    qvirtqueue_cleanup(blk_if->vdev->bus, vq, t_alloc);
+
+}
+
+static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc)
+{
+    QVirtQueue *vq;
+    QVhostUserBlk *blk_if = obj;
+    QVirtioDevice *dev = blk_if->vdev;
+    QVirtioBlkReq req;
+    QVRingIndirectDesc *indirect;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+    QTestState *qts = global_qtest;
+
+    features = qvirtio_get_features(dev);
+    g_assert_cmphex(features & (1u << VIRTIO_RING_F_INDIRECT_DESC), !=, 0);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_EVENT_IDX) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, t_alloc, 0);
+    qvirtio_set_driver_ok(dev);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr, 528, false);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr + 528, 1, true);
+    free_head = qvirtqueue_add_indirect(qts, vq, indirect);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    g_free(indirect);
+    guest_free(t_alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr, 16, false);
+    qvring_indirect_desc_add(dev, qts, indirect, req_addr + 16, 513, true);
+    free_head = qvirtqueue_add_indirect(qts, vq, indirect);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    g_free(indirect);
+    guest_free(t_alloc, req_addr);
+    qvirtqueue_cleanup(dev->bus, vq, t_alloc);
+}
+
+static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc)
+{
+    QVirtQueue *vq;
+    QVhostUserBlkPCI *blk = obj;
+    QVirtioPCIDevice *pdev = &blk->pci_vdev;
+    QVirtioDevice *dev = &pdev->vdev;
+    QVirtioBlkReq req;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint64_t features;
+    uint32_t free_head;
+    uint32_t write_head;
+    uint32_t desc_idx;
+    uint8_t status;
+    char *data;
+    QOSGraphObject *blk_object = obj;
+    QPCIDevice *pci_dev = blk_object->get_driver(blk_object, "pci-device");
+    QTestState *qts = global_qtest;
+
+    if (qpci_check_buggy_msi(pci_dev)) {
+        return;
+    }
+
+    qpci_msix_enable(pdev->pdev);
+    qvirtio_pci_set_msix_configuration_vector(pdev, t_alloc, 0);
+
+    features = qvirtio_get_features(dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev, features);
+
+    capacity = qvirtio_config_readq(dev, 0);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    vq = qvirtqueue_setup(dev, t_alloc, 0);
+    qvirtqueue_pci_msix_setup(pdev, (QVirtQueuePCI *)vq, t_alloc, 1);
+
+    qvirtio_set_driver_ok(dev);
+
+    /*
+     * libvhost-user signals the call fd in VHOST_USER_SET_VRING_CALL, make
+     * sure to wait for the isr here so we don't race and confuse it later on.
+     */
+    qvirtio_wait_queue_isr(qts, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+
+    /* Write request */
+    req.type = VIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    /* Notify after processing the third request */
+    qvirtqueue_set_used_event(qts, vq, 2);
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(qts, dev, vq, free_head);
+    write_head = free_head;
+
+    /* No notification expected */
+    status = qvirtio_wait_status_byte_no_isr(qts, dev,
+                                             vq, req_addr + 528,
+                                             QVIRTIO_BLK_TIMEOUT_US);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(t_alloc, req_addr);
+
+    /* Read request */
+    req.type = VIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
+    qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(qts, dev, vq, free_head);
+
+    /* We get just one notification for both requests */
+    qvirtio_wait_used_elem(qts, dev, vq, write_head, NULL,
+                           QVIRTIO_BLK_TIMEOUT_US);
+    g_assert(qvirtqueue_get_buf(qts, vq, &desc_idx, NULL));
+    g_assert_cmpint(desc_idx, ==, free_head);
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    qtest_memread(qts, req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(t_alloc, req_addr);
+
+    /* End test */
+    qpci_msix_disable(pdev->pdev);
+
+    qvirtqueue_cleanup(dev->bus, vq, t_alloc);
+}
+
+static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVirtioPCIDevice *dev1 = obj;
+    QVirtioPCIDevice *dev;
+    QTestState *qts = dev1->pdev->bus->qts;
+
+    /* plug secondary disk */
+    qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
+                         "{'addr': %s, 'chardev': 'char2'}",
+                         stringify(PCI_SLOT_HP) ".0");
+
+    dev = virtio_pci_new(dev1->pdev->bus,
+                         &(QPCIAddress) { .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
+                                        });
+    g_assert_nonnull(dev);
+    g_assert_cmpint(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
+    qvirtio_pci_device_disable(dev);
+    qos_object_destroy((QOSGraphObject *)dev);
+
+    /* unplug secondary disk */
+    qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
+}
+
+static void multiqueue(void *obj, void *data, QGuestAllocator *t_alloc)
+{
+    QVirtioPCIDevice *pdev1 = obj;
+    QVirtioDevice *dev1 = &pdev1->vdev;
+    QVirtioPCIDevice *pdev8;
+    QVirtioDevice *dev8;
+    QTestState *qts = pdev1->pdev->bus->qts;
+    uint64_t features;
+    uint16_t num_queues;
+
+    /*
+     * The primary device has 1 queue and VIRTIO_BLK_F_MQ is not enabled. The
+     * VIRTIO specification allows VIRTIO_BLK_F_MQ to be enabled when there is
+     * only 1 virtqueue, but --device vhost-user-blk-pci doesn't do this (which
+     * is also spec-compliant).
+     */
+    features = qvirtio_get_features(dev1);
+    g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ), ==, 0);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI));
+    qvirtio_set_features(dev1, features);
+
+    /* Hotplug a secondary device with 8 queues */
+    qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
+                         "{'addr': %s, 'chardev': 'char2', 'num-queues': 8}",
+                         stringify(PCI_SLOT_HP) ".0");
+
+    pdev8 = virtio_pci_new(pdev1->pdev->bus,
+                           &(QPCIAddress) {
+                               .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
+                           });
+    g_assert_nonnull(pdev8);
+    g_assert_cmpint(pdev8->vdev.device_type, ==, VIRTIO_ID_BLOCK);
+
+    qos_object_start_hw(&pdev8->obj);
+
+    dev8 = &pdev8->vdev;
+    features = qvirtio_get_features(dev8);
+    g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ),
+                    ==,
+                    (1u << VIRTIO_BLK_F_MQ));
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
+                            (1u << VIRTIO_BLK_F_SCSI) |
+                            (1u << VIRTIO_BLK_F_MQ));
+    qvirtio_set_features(dev8, features);
+
+    num_queues = qvirtio_config_readw(dev8,
+            offsetof(struct virtio_blk_config, num_queues));
+    g_assert_cmpint(num_queues, ==, 8);
+
+    qvirtio_pci_device_disable(pdev8);
+    qos_object_destroy(&pdev8->obj);
+
+    /* unplug secondary disk */
+    qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
+}
+
+/*
+ * Check that setting the vring addr on a non-existent virtqueue does
+ * not crash.
+ */
+static void test_nonexistent_virtqueue(void *obj, void *data,
+                                       QGuestAllocator *t_alloc)
+{
+    QVhostUserBlkPCI *blk = obj;
+    QVirtioPCIDevice *pdev = &blk->pci_vdev;
+    QPCIBar bar0;
+    QPCIDevice *dev;
+
+    dev = qpci_device_find(pdev->pdev->bus, QPCI_DEVFN(4, 0));
+    g_assert(dev != NULL);
+    qpci_device_enable(dev);
+
+    bar0 = qpci_iomap(dev, 0, NULL);
+
+    qpci_io_writeb(dev, bar0, VIRTIO_PCI_QUEUE_SEL, 2);
+    qpci_io_writel(dev, bar0, VIRTIO_PCI_QUEUE_PFN, 1);
+
+    g_free(dev);
+}
+
+static const char *qtest_qemu_storage_daemon_binary(void)
+{
+    const char *qemu_storage_daemon_bin;
+
+    qemu_storage_daemon_bin = getenv("QTEST_QEMU_STORAGE_DAEMON_BINARY");
+    if (!qemu_storage_daemon_bin) {
+        fprintf(stderr, "Environment variable "
+                        "QTEST_QEMU_STORAGE_DAEMON_BINARY required\n");
+        exit(0);
+    }
+
+    /* If we've got a path to the binary, check whether we can access it */
+    if (strchr(qemu_storage_daemon_bin, '/') &&
+        access(qemu_storage_daemon_bin, X_OK) != 0) {
+        fprintf(stderr, "ERROR: '%s' is not accessible\n",
+                qemu_storage_daemon_bin);
+        exit(1);
+    }
+
+    return qemu_storage_daemon_bin;
+}
+
+/* g_test_queue_destroy() cleanup function for files */
+static void destroy_file(void *path)
+{
+    unlink(path);
+    g_free(path);
+    qos_invalidate_command_line();
+}
+
+static char *drive_create(void)
+{
+    int fd, ret;
+    /** vhost-user-blk won't recognize drive located in /tmp */
+    char *t_path = g_strdup("qtest.XXXXXX");
+
+    /** Create a temporary raw image */
+    fd = mkstemp(t_path);
+    g_assert_cmpint(fd, >=, 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert_cmpint(ret, ==, 0);
+    close(fd);
+
+    g_test_queue_destroy(destroy_file, t_path);
+    return t_path;
+}
+
+static char *create_listen_socket(int *fd)
+{
+    int tmp_fd;
+    char *path;
+
+    /* No race because our pid makes the path unique */
+    path = g_strdup_printf("/tmp/qtest-%d-sock.XXXXXX", getpid());
+    tmp_fd = mkstemp(path);
+    g_assert_cmpint(tmp_fd, >=, 0);
+    close(tmp_fd);
+    unlink(path);
+
+    *fd = qtest_socket_server(path);
+    g_test_queue_destroy(destroy_file, path);
+    return path;
+}
+
+/*
+ * g_test_queue_destroy() and qtest_add_abrt_handler() cleanup function for
+ * qemu-storage-daemon.
+ */
+static void quit_storage_daemon(void *data)
+{
+    QemuStorageDaemonState *qsd = data;
+    int wstatus;
+    pid_t pid;
+
+    /*
+     * If we were invoked as a g_test_queue_destroy() cleanup function we need
+     * to remove the abrt handler to avoid being called again if the code below
+     * aborts. Also, we must not leave the abrt handler installed after
+     * cleanup.
+     */
+    qtest_remove_abrt_handler(data);
+
+    /* Before quitting storage-daemon, quit qemu to avoid dubious messages */
+    qtest_kill_qemu(global_qtest);
+
+    kill(qsd->pid, SIGTERM);
+    pid = waitpid(qsd->pid, &wstatus, 0);
+    g_assert_cmpint(pid, ==, qsd->pid);
+    if (!WIFEXITED(wstatus)) {
+        fprintf(stderr, "%s: expected qemu-storage-daemon to exit\n",
+                __func__);
+        abort();
+    }
+    if (WEXITSTATUS(wstatus) != 0) {
+        fprintf(stderr, "%s: expected qemu-storage-daemon to exit "
+                "successfully, got %d\n",
+                __func__, WEXITSTATUS(wstatus));
+        abort();
+    }
+
+    g_free(data);
+}
+
+static void start_vhost_user_blk(GString *cmd_line, int vus_instances,
+                                 int num_queues)
+{
+    const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
+    int i;
+    gchar *img_path;
+    GString *storage_daemon_command = g_string_new(NULL);
+    QemuStorageDaemonState *qsd;
+
+    g_string_append_printf(storage_daemon_command,
+                           "exec %s ",
+                           vhost_user_blk_bin);
+
+    g_string_append_printf(cmd_line,
+            " -object memory-backend-memfd,id=mem,size=256M,share=on "
+            " -M memory-backend=mem -m 256M ");
+
+    for (i = 0; i < vus_instances; i++) {
+        int fd;
+        char *sock_path = create_listen_socket(&fd);
+
+        /* create image file */
+        img_path = drive_create();
+        g_string_append_printf(storage_daemon_command,
+            "--blockdev driver=file,node-name=disk%d,filename=%s "
+            "--export type=vhost-user-blk,id=disk%d,addr.type=fd,addr.str=%d,"
+            "node-name=disk%i,writable=on,num-queues=%d ",
+            i, img_path, i, fd, i, num_queues);
+
+        g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
+                               i + 1, sock_path);
+    }
+
+    g_test_message("starting vhost-user backend: %s",
+                   storage_daemon_command->str);
+    pid_t pid = fork();
+    if (pid == 0) {
+        /*
+         * Close standard file descriptors so tap-driver.pl pipe detects when
+         * our parent terminates.
+         */
+        close(0);
+        close(1);
+        open("/dev/null", O_RDONLY);
+        open("/dev/null", O_WRONLY);
+
+        execlp("/bin/sh", "sh", "-c", storage_daemon_command->str, NULL);
+        exit(1);
+    }
+    g_string_free(storage_daemon_command, true);
+
+    qsd = g_new(QemuStorageDaemonState, 1);
+    qsd->pid = pid;
+
+    /* Make sure qemu-storage-daemon is stopped */
+    qtest_add_abrt_handler(quit_storage_daemon, qsd);
+    g_test_queue_destroy(quit_storage_daemon, qsd);
+}
+
+static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
+{
+    start_vhost_user_blk(cmd_line, 1, 1);
+    return arg;
+}
+
+/*
+ * Setup for hotplug.
+ *
+ * Since vhost-user server only serves one vhost-user client one time,
+ * another exprot
+ *
+ */
+static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
+{
+    /* "-chardev socket,id=char2" is used for pci_hotplug*/
+    start_vhost_user_blk(cmd_line, 2, 1);
+    return arg;
+}
+
+static void *vhost_user_blk_multiqueue_test_setup(GString *cmd_line, void *arg)
+{
+    start_vhost_user_blk(cmd_line, 2, 8);
+    return arg;
+}
+
+static void register_vhost_user_blk_test(void)
+{
+    QOSGraphTestOptions opts = {
+        .before = vhost_user_blk_test_setup,
+    };
+
+    /*
+     * tests for vhost-user-blk and vhost-user-blk-pci
+     * The tests are borrowed from tests/virtio-blk-test.c. But some tests
+     * regarding block_resize don't work for vhost-user-blk.
+     * vhost-user-blk device doesn't have -drive, so tests containing
+     * block_resize are also abandoned,
+     *  - config
+     *  - resize
+     */
+    qos_add_test("basic", "vhost-user-blk", basic, &opts);
+    qos_add_test("indirect", "vhost-user-blk", indirect, &opts);
+    qos_add_test("idx", "vhost-user-blk-pci", idx, &opts);
+    qos_add_test("nxvirtq", "vhost-user-blk-pci",
+                 test_nonexistent_virtqueue, &opts);
+
+    opts.before = vhost_user_blk_hotplug_test_setup;
+    qos_add_test("hotplug", "vhost-user-blk-pci", pci_hotplug, &opts);
+
+    opts.before = vhost_user_blk_multiqueue_test_setup;
+    qos_add_test("multiqueue", "vhost-user-blk-pci", multiqueue, &opts);
+}
+
+libqos_init(register_vhost_user_blk_test);
diff --git a/tests/qtest/virtio-9p-test.c b/tests/qtest/virtio-9p-test.c
index 92a498f2492..41fed41de15 100644
--- a/tests/qtest/virtio-9p-test.c
+++ b/tests/qtest/virtio-9p-test.c
@@ -7,6 +7,11 @@
  * See the COPYING file in the top-level directory.
  */
 
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
 #include "qemu/osdep.h"
 #include "libqtest-single.h"
 #include "qemu/module.h"
diff --git a/tests/qtest/virtio-net-test.c b/tests/qtest/virtio-net-test.c
index a08e2ffe123..8bf74e516cc 100644
--- a/tests/qtest/virtio-net-test.c
+++ b/tests/qtest/virtio-net-test.c
@@ -319,7 +319,7 @@ static void register_virtio_net_test(void)
         .before = virtio_net_test_setup,
     };
 
-    qos_add_test("hotplug", "virtio-pci", hotplug, &opts);
+    qos_add_test("hotplug", "virtio-net-pci", hotplug, &opts);
 #ifndef _WIN32
     qos_add_test("basic", "virtio-net", send_recv_test, &opts);
     qos_add_test("rx_stop_cont", "virtio-net", stop_cont_test, &opts);
diff --git a/tests/qtest/virtio-scsi-test.c b/tests/qtest/virtio-scsi-test.c
index 1b7ecc1c8fa..8ceb12aacdf 100644
--- a/tests/qtest/virtio-scsi-test.c
+++ b/tests/qtest/virtio-scsi-test.c
@@ -200,6 +200,42 @@ static void test_unaligned_write_same(void *obj, void *data,
     qvirtio_scsi_pci_free(vs);
 }
 
+/* Test UNMAP with a large LBA, issue #345 */
+static void test_unmap_large_lba(void *obj, void *data,
+                                      QGuestAllocator *t_alloc)
+{
+    QVirtioSCSI *scsi = obj;
+    QVirtioSCSIQueues *vs;
+    const uint8_t unmap[VIRTIO_SCSI_CDB_SIZE] = {
+        0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00
+    };
+
+    /*
+     * Default null-co device size is 2**30
+     * LBA 0x7fff is ~ 1/8 into device, with 4k blocks
+     * if check_lba_range incorrectly using 512 bytes, will trigger sense error
+     */
+    uint8_t unmap_params[0x18] = {
+        0x00, 0x16, /* unmap data length */
+        0x00, 0x10, /* unmap block descriptor data length */
+        0x00, 0x00, 0x00, 0x00, /* reserved */
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xff, /* LBA */
+        0x00, 0x00, 0x03, 0xff, /* sector count */
+        0x00, 0x00, 0x00, 0x00, /* reserved */
+    };
+    struct virtio_scsi_cmd_resp resp;
+
+    alloc = t_alloc;
+    vs = qvirtio_scsi_init(scsi->vdev);
+
+    virtio_scsi_do_command(vs, unmap, NULL, 0, unmap_params,
+                           sizeof(unmap_params), &resp);
+    g_assert_cmphex(resp.response, ==, 0);
+    g_assert_cmphex(resp.status, !=, CHECK_CONDITION);
+
+    qvirtio_scsi_pci_free(vs);
+}
+
 static void test_write_to_cdrom(void *obj, void *data,
                                 QGuestAllocator *t_alloc)
 {
@@ -293,6 +329,17 @@ static void *virtio_scsi_setup(GString *cmd_line, void *arg)
     return arg;
 }
 
+static void *virtio_scsi_setup_4k(GString *cmd_line, void *arg)
+{
+    g_string_append(cmd_line,
+                    " -drive file=blkdebug::null-co://,"
+                    "file.image.read-zeroes=on,"
+                    "if=none,id=dr1,format=raw "
+                    "-device scsi-hd,drive=dr1,lun=0,scsi-id=1"
+                    ",logical_block_size=4k,physical_block_size=4k");
+    return arg;
+}
+
 static void *virtio_scsi_setup_cd(GString *cmd_line, void *arg)
 {
     g_string_append(cmd_line,
@@ -323,6 +370,10 @@ static void register_virtio_scsi_test(void)
     qos_add_test("unaligned-write-same", "virtio-scsi",
                  test_unaligned_write_same, &opts);
 
+    opts.before = virtio_scsi_setup_4k;
+    qos_add_test("large-lba-unmap", "virtio-scsi",
+                 test_unmap_large_lba, &opts);
+
     opts.before = virtio_scsi_setup_cd;
     qos_add_test("write-to-cdrom", "virtio-scsi", test_write_to_cdrom, &opts);
 
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 91f3a343b95..a21b59b4439 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,5 +1,5 @@
 # Add Python module requirements, one per line, to be installed
 # in the tests/venv Python virtual environment. For more info,
 # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
-avocado-framework==85.0
+avocado-framework==88.1
 pycdlib==1.11.0
diff --git a/tests/tcg/Makefile.qemu b/tests/tcg/Makefile.qemu
index a56564660cf..84c8543878b 100644
--- a/tests/tcg/Makefile.qemu
+++ b/tests/tcg/Makefile.qemu
@@ -22,6 +22,8 @@ quiet-@ = $(if $(V),,@)
 quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3)
 
 CROSS_CC_GUEST:=
+CROSS_AS_GUEST:=
+CROSS_LD_GUEST:=
 DOCKER_IMAGE:=
 
 -include tests/tcg/config-$(TARGET).mak
@@ -42,6 +44,8 @@ cross-build-guest-tests:
 	$(call quiet-command, \
 	   (mkdir -p tests/tcg/$(TARGET) && cd tests/tcg/$(TARGET) && \
 	    $(MAKE) -f $(TCG_MAKE) TARGET="$(TARGET)" CC="$(CROSS_CC_GUEST)" \
+			$(if $(CROSS_AS_GUEST),AS="$(CROSS_AS_GUEST)") \
+			$(if $(CROSS_LD_GUEST),LD="$(CROSS_LD_GUEST)") \
 			SRC_PATH="$(SRC_PATH)" BUILD_STATIC=$(CROSS_CC_GUEST_STATIC) \
 			EXTRA_CFLAGS="$(CROSS_CC_GUEST_CFLAGS)"), \
 	"BUILD","$(TARGET) guest-tests with $(CROSS_CC_GUEST)")
@@ -59,11 +63,24 @@ DOCKER_COMPILE_CMD="$(DOCKER_SCRIPT) cc \
 		-i qemu/$(DOCKER_IMAGE) \
 		-s $(SRC_PATH) -- "
 
+DOCKER_AS_CMD=$(if $(DOCKER_CROSS_AS_GUEST),"$(DOCKER_SCRIPT) cc \
+		--cc $(DOCKER_CROSS_AS_GUEST) \
+		-i qemu/$(DOCKER_IMAGE) \
+		-s $(SRC_PATH) -- ")
+
+DOCKER_LD_CMD=$(if $(DOCKER_CROSS_LD_GUEST),"$(DOCKER_SCRIPT) cc \
+		--cc $(DOCKER_CROSS_LD_GUEST) \
+		-i qemu/$(DOCKER_IMAGE) \
+		-s $(SRC_PATH) -- ")
+
+
 .PHONY: docker-build-guest-tests
 docker-build-guest-tests: docker-image-$(DOCKER_IMAGE)
 	$(call quiet-command, \
 	  (mkdir -p tests/tcg/$(TARGET) && cd tests/tcg/$(TARGET) && \
 	   $(MAKE) -f $(TCG_MAKE) TARGET="$(TARGET)" CC=$(DOCKER_COMPILE_CMD) \
+			$(if $(DOCKER_AS_CMD),AS=$(DOCKER_AS_CMD)) \
+			$(if $(DOCKER_LD_CMD),LD=$(DOCKER_LD_CMD)) \
 			SRC_PATH="$(SRC_PATH)" BUILD_STATIC=y \
 			EXTRA_CFLAGS="$(CROSS_CC_GUEST_CFLAGS)"), \
 	"BUILD","$(TARGET) guest-tests with docker qemu/$(DOCKER_IMAGE)")
diff --git a/tests/tcg/Makefile.target b/tests/tcg/Makefile.target
index cab8c6b3a2d..63cf1b2573f 100644
--- a/tests/tcg/Makefile.target
+++ b/tests/tcg/Makefile.target
@@ -43,9 +43,10 @@ quiet-command = $(if $(V),$1,$(if $(2),@printf "  %-7s %s\n" $2 $3 && $1, @$1))
 
 # $1 = test name, $2 = cmd, $3 = desc
 ifdef CONFIG_USER_ONLY
-run-test = $(call quiet-command, timeout $(TIMEOUT) $2 > $1.out,"TEST",$3)
+run-test = $(call quiet-command, timeout --foreground $(TIMEOUT) $2 > $1.out, \
+	"TEST",$3)
 else
-run-test = $(call quiet-command, timeout $(TIMEOUT) $2,"TEST",$3)
+run-test = $(call quiet-command, timeout --foreground $(TIMEOUT) $2,"TEST",$3)
 endif
 
 # $1 = test name, $2 = reference
@@ -80,8 +81,10 @@ LDFLAGS=
 QEMU_OPTS=
 
 
-# If TCG debugging is enabled things are a lot slower
-ifeq ($(CONFIG_DEBUG_TCG),y)
+# If TCG debugging, or TCI is enabled things are a lot slower
+ifneq ($(CONFIG_TCG_INTERPRETER),)
+TIMEOUT=90
+else ifneq ($(CONFIG_DEBUG_TCG),)
 TIMEOUT=60
 else
 TIMEOUT=15
diff --git a/tests/tcg/README b/tests/tcg/README
index 2a58f9a0581..706bb185b42 100644
--- a/tests/tcg/README
+++ b/tests/tcg/README
@@ -7,9 +7,3 @@ CRIS
 ====
 The testsuite for CRIS is in tests/tcg/cris.  You can run it
 with "make test-cris".
-
-LM32
-====
-The testsuite for LM32 is in tests/tcg/lm32.  You can run it
-with "make test-lm32".
-
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 05b2622bfc9..2c05c90d170 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -37,7 +37,7 @@ AARCH64_TESTS += bti-2
 
 # MTE Tests
 ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_ARMV8_MTE),)
-AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-6
+AARCH64_TESTS += mte-1 mte-2 mte-3 mte-4 mte-5 mte-6 mte-7
 mte-%: CFLAGS += -march=armv8.5-a+memtag
 endif
 
diff --git a/tests/tcg/aarch64/mte-5.c b/tests/tcg/aarch64/mte-5.c
new file mode 100644
index 00000000000..6dbd6ab3ea7
--- /dev/null
+++ b/tests/tcg/aarch64/mte-5.c
@@ -0,0 +1,44 @@
+/*
+ * Memory tagging, faulting unaligned access.
+ *
+ * Copyright (c) 2021 Linaro Ltd
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "mte.h"
+
+void pass(int sig, siginfo_t *info, void *uc)
+{
+    assert(info->si_code == SEGV_MTESERR);
+    exit(0);
+}
+
+int main(int ac, char **av)
+{
+    struct sigaction sa;
+    void *p0, *p1, *p2;
+    long excl = 1;
+
+    enable_mte(PR_MTE_TCF_SYNC);
+    p0 = alloc_mte_mem(sizeof(*p0));
+
+    /* Create two differently tagged pointers.  */
+    asm("irg %0,%1,%2" : "=r"(p1) : "r"(p0), "r"(excl));
+    asm("gmi %0,%1,%0" : "+r"(excl) : "r" (p1));
+    assert(excl != 1);
+    asm("irg %0,%1,%2" : "=r"(p2) : "r"(p0), "r"(excl));
+    assert(p1 != p2);
+
+    memset(&sa, 0, sizeof(sa));
+    sa.sa_sigaction = pass;
+    sa.sa_flags = SA_SIGINFO;
+    sigaction(SIGSEGV, &sa, NULL);
+
+    /* Store store two different tags in sequential granules. */
+    asm("stg %0, [%0]" : : "r"(p1));
+    asm("stg %0, [%0]" : : "r"(p2 + 16));
+
+    /* Perform an unaligned load crossing the granules. */
+    asm volatile("ldr %0, [%1]" : "=r"(p0) : "r"(p1 + 12));
+    abort();
+}
diff --git a/tests/tcg/aarch64/mte-7.c b/tests/tcg/aarch64/mte-7.c
new file mode 100644
index 00000000000..a981de62d4a
--- /dev/null
+++ b/tests/tcg/aarch64/mte-7.c
@@ -0,0 +1,31 @@
+/*
+ * Memory tagging, unaligned access crossing pages.
+ * https://gitlab.com/qemu-project/qemu/-/issues/403
+ *
+ * Copyright (c) 2021 Linaro Ltd
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "mte.h"
+
+int main(int ac, char **av)
+{
+    void *p;
+
+    enable_mte(PR_MTE_TCF_SYNC);
+    p = alloc_mte_mem(2 * 0x1000);
+
+    /* Tag the pointer. */
+    p = (void *)((unsigned long)p | (1ul << 56));
+
+    /* Store tag in sequential granules. */
+    asm("stg %0, [%0]" : : "r"(p + 0x0ff0));
+    asm("stg %0, [%0]" : : "r"(p + 0x1000));
+
+    /*
+     * Perform an unaligned store with tag 1 crossing the pages.
+     * Failure dies with SIGSEGV.
+     */
+    asm("str %0, [%0]" : : "r"(p + 0x0ffc));
+    return 0;
+}
diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh
index fa1a4261a44..9b76f582585 100755
--- a/tests/tcg/configure.sh
+++ b/tests/tcg/configure.sh
@@ -46,7 +46,7 @@ fi
 : ${cross_cc_aarch64="aarch64-linux-gnu-gcc"}
 : ${cross_cc_aarch64_be="$cross_cc_aarch64"}
 : ${cross_cc_cflags_aarch64_be="-mbig-endian"}
-: $(cross_cc_alpha="alpha-linux-gnu-gcc")
+: ${cross_cc_alpha="alpha-linux-gnu-gcc"}
 : ${cross_cc_arm="arm-linux-gnueabihf-gcc"}
 : ${cross_cc_cflags_armeb="-mbig-endian"}
 : ${cross_cc_hexagon="hexagon-unknown-linux-musl-clang"}
@@ -55,56 +55,42 @@ fi
 : ${cross_cc_i386="i686-linux-gnu-gcc"}
 : ${cross_cc_cflags_i386="-m32"}
 : ${cross_cc_m68k="m68k-linux-gnu-gcc"}
-: $(cross_cc_mips64el="mips64el-linux-gnuabi64-gcc")
-: $(cross_cc_mips64="mips64-linux-gnuabi64-gcc")
-: $(cross_cc_mipsel="mipsel-linux-gnu-gcc")
-: $(cross_cc_mips="mips-linux-gnu-gcc")
+: ${cross_cc_microblaze="microblaze-linux-musl-gcc"}
+: ${cross_cc_mips64el="mips64el-linux-gnuabi64-gcc"}
+: ${cross_cc_mips64="mips64-linux-gnuabi64-gcc"}
+: ${cross_cc_mipsel="mipsel-linux-gnu-gcc"}
+: ${cross_cc_mips="mips-linux-gnu-gcc"}
+: ${cross_cc_nios2="nios2-linux-gnu-gcc"}
 : ${cross_cc_ppc="powerpc-linux-gnu-gcc"}
 : ${cross_cc_cflags_ppc="-m32"}
 : ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"}
 : ${cross_cc_ppc64le="powerpc64le-linux-gnu-gcc"}
-: $(cross_cc_riscv64="riscv64-linux-gnu-gcc")
+: ${cross_cc_riscv64="riscv64-linux-gnu-gcc"}
 : ${cross_cc_s390x="s390x-linux-gnu-gcc"}
-: $(cross_cc_sh4="sh4-linux-gnu-gcc")
+: ${cross_cc_sh4="sh4-linux-gnu-gcc"}
 : ${cross_cc_cflags_sparc="-m32 -mv8plus -mcpu=ultrasparc"}
 : ${cross_cc_sparc64="sparc64-linux-gnu-gcc"}
 : ${cross_cc_cflags_sparc64="-m64 -mcpu=ultrasparc"}
 : ${cross_cc_x86_64="x86_64-linux-gnu-gcc"}
 : ${cross_cc_cflags_x86_64="-m64"}
 
+# tricore is special as it doesn't have a compiler
+: ${cross_as_tricore="tricore-as"}
+: ${cross_ld_tricore="tricore-ld"}
+
 for target in $target_list; do
   arch=${target%%-*}
-  case $arch in
-    arm|armeb)
-      arches=arm
-      ;;
-    aarch64|aarch64_be)
-      arches="aarch64 arm"
-      ;;
-    mips*)
-      arches=mips
-      ;;
-    ppc*)
-      arches=ppc
-      ;;
-    sh4|sh4eb)
-      arches=sh4
-      ;;
-    x86_64)
-      arches="x86_64 i386"
-      ;;
-    xtensa|xtensaeb)
-      arches=xtensa
-      ;;
-    alpha|cris|hexagon|hppa|i386|lm32|microblaze|microblazeel|m68k|openrisc|riscv64|s390x|sh4|sparc64)
-      arches=$target
-      ;;
-    *)
-      continue
-      ;;
-  esac
 
+  # reset all container fields
   container_image=
+  container_hosts=
+  container_cross_cc=
+  container_cross_as=
+  container_cross_ld=
+
+  # suppress clang
+  supress_clang=
+
   case $target in
     aarch64-*)
       # We don't have any bigendian build tools so we only use this for AArch64
@@ -128,6 +114,11 @@ for target in $target_list; do
       container_image=fedora-cris-cross
       container_cross_cc=cris-linux-gnu-gcc
       ;;
+    hexagon-*)
+      container_hosts=x86_64
+      container_image=debian-hexagon-cross
+      container_cross_cc=hexagon-unknown-linux-musl-clang
+      ;;
     hppa-*)
       container_hosts=x86_64
       container_image=debian-hppa-cross
@@ -137,12 +128,18 @@ for target in $target_list; do
       container_hosts=x86_64
       container_image=fedora-i386-cross
       container_cross_cc=gcc
+      supress_clang=yes
       ;;
     m68k-*)
       container_hosts=x86_64
       container_image=debian-m68k-cross
       container_cross_cc=m68k-linux-gnu-gcc
       ;;
+    microblaze-*)
+      container_hosts=x86_64
+      container_image=debian-microblaze-cross
+      container_cross_cc=microblaze-linux-musl-gcc
+      ;;
     mips64el-*)
       container_hosts=x86_64
       container_image=debian-mips64el-cross
@@ -163,20 +160,21 @@ for target in $target_list; do
       container_image=debian-mips-cross
       container_cross_cc=mips-linux-gnu-gcc
       ;;
-    ppc-*|ppc64abi32-*)
+    nios2-*)
       container_hosts=x86_64
-      container_image=debian-powerpc-cross
-      container_cross_cc=powerpc-linux-gnu-gcc
+      container_image=debian-nios2-cross
+      container_cross_cc=nios2-linux-gnu-gcc
       ;;
-    ppc64-*)
+    ppc-*|ppc64abi32-*)
       container_hosts=x86_64
-      container_image=debian-ppc64-cross
-      container_cross_cc=powerpc64-linux-gnu-gcc
+      container_image=debian-powerpc-test-cross
+      container_cross_cc=powerpc-linux-gnu-gcc-10
       ;;
-    ppc64le-*)
+    ppc64-*|ppc64le-*)
       container_hosts=x86_64
-      container_image=debian-ppc64el-cross
-      container_cross_cc=powerpc64le-linux-gnu-gcc
+      container_image=debian-powerpc-test-cross
+      container_cross_cc=${target%%-*}-linux-gnu-gcc-10
+      container_cross_cc=powerpc${container_cross_cc#ppc}
       ;;
     riscv64-*)
       container_hosts=x86_64
@@ -198,10 +196,17 @@ for target in $target_list; do
       container_image=debian-sparc64-cross
       container_cross_cc=sparc64-linux-gnu-gcc
       ;;
+    tricore-softmmu)
+      container_hosts=x86_64
+      container_image=debian-tricore-cross
+      container_cross_as=tricore-as
+      container_cross_ld=tricore-ld
+      ;;
     x86_64-*)
       container_hosts="aarch64 ppc64el x86_64"
       container_image=debian-amd64-cross
       container_cross_cc=x86_64-linux-gnu-gcc
+      supress_clang=yes
       ;;
     xtensa*-softmmu)
       container_hosts=x86_64
@@ -216,6 +221,7 @@ for target in $target_list; do
 
   echo "# Automatically generated by configure - do not modify" > $config_target_mak
   echo "TARGET_NAME=$arch" >> $config_target_mak
+  echo "target=$target" >> $config_target_mak
   case $target in
     *-linux-user | *-bsd-user)
       echo "CONFIG_USER_ONLY=y" >> $config_target_mak
@@ -231,74 +237,105 @@ for target in $target_list; do
   echo "CROSS_CC_GUEST_CFLAGS=$target_compiler_cflags" >> $config_target_mak
 
   got_cross_cc=no
-  for i in $arch $arches; do
-    if eval test "x\${cross_cc_$i+yes}" != xyes; then
-      continue
-    fi
 
-    eval "target_compiler=\${cross_cc_$i}"
-    if ! has $target_compiler; then
-      continue
-    fi
-    write_c_skeleton
-    if ! do_compiler "$target_compiler" $target_compiler_cflags -o $TMPE $TMPC -static ; then
-      # For host systems we might get away with building without -static
-      if ! do_compiler "$target_compiler" $target_compiler_cflags -o $TMPE $TMPC ; then
-        continue
+  if eval test "x\"\${cross_cc_$arch}\"" != xyes; then
+      eval "target_compiler=\"\${cross_cc_$arch}\""
+
+      if has $target_compiler; then
+          if test "$supress_clang" = yes &&
+                  $target_compiler --version | grep -qi "clang"; then
+              got_cross_cc=no
+          else
+              write_c_skeleton
+              if ! do_compiler "$target_compiler" $target_compiler_cflags \
+                   -o $TMPE $TMPC -static ; then
+                  # For host systems we might get away with building without -static
+                  if do_compiler "$target_compiler" $target_compiler_cflags \
+                                 -o $TMPE $TMPC ; then
+                      got_cross_cc=yes
+                      echo "CROSS_CC_GUEST_STATIC=y" >> $config_target_mak
+                      echo "CROSS_CC_GUEST=$target_compiler" >> $config_target_mak
+                  fi
+              else
+                  got_cross_cc=yes
+                  echo "CROSS_CC_GUEST_STATIC=y" >> $config_target_mak
+                  echo "CROSS_CC_GUEST=$target_compiler" >> $config_target_mak
+              fi
+          fi
       fi
-      echo "CROSS_CC_GUEST_STATIC=y" >> $config_target_mak
-    else
-      echo "CROSS_CC_GUEST_STATIC=y" >> $config_target_mak
-    fi
-    echo "CROSS_CC_GUEST=$target_compiler" >> $config_target_mak
 
-    # Test for compiler features for optional tests. We only do this
-    # for cross compilers because ensuring the docker containers based
-    # compilers is a requirememt for adding a new test that needs a
-    # compiler feature.
-    case $target in
-        aarch64-*)
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-               -march=armv8.1-a+sve -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_SVE=y" >> $config_target_mak
-            fi
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-               -march=armv8.3-a -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_ARMV8_3=y" >> $config_target_mak
-            fi
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-               -mbranch-protection=standard -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_ARMV8_BTI=y" >> $config_target_mak
-            fi
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-               -march=armv8.5-a+memtag -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_ARMV8_MTE=y" >> $config_target_mak
-            fi
-        ;;
-        ppc*)
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-               -mpower8-vector -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_POWER8_VECTOR=y" >> $config_target_mak
-            fi
-        ;;
-        i386-linux-user)
-            if do_compiler "$target_compiler" $target_compiler_cflags \
-                -Werror -fno-pie -o $TMPE $TMPC; then
-                echo "CROSS_CC_HAS_I386_NOPIE=y" >> $config_target_mak
-            fi
-        ;;
-    esac
+      # Special handling for assembler only tests
+      eval "target_as=\"\${cross_as_$arch}\""
+      eval "target_ld=\"\${cross_ld_$arch}\""
+      if has $target_as && has $target_ld; then
+          case $target in
+              tricore-softmmu)
+                  echo "CROSS_CC_GUEST=$target_as" >> $config_target_mak
+                  echo "CROSS_AS_GUEST=$target_as" >> $config_target_mak
+                  echo "CROSS_LD_GUEST=$target_ld" >> $config_target_mak
+                  got_cross_cc=yes
+                  ;;
+          esac
+      fi
+  fi
 
-    enabled_cross_compilers="$enabled_cross_compilers $target_compiler"
-    got_cross_cc=yes
-    break
-  done
+  if test $got_cross_cc = yes; then
+      # Test for compiler features for optional tests. We only do this
+      # for cross compilers because ensuring the docker containers based
+      # compilers is a requirememt for adding a new test that needs a
+      # compiler feature.
 
-  if test $got_cross_cc = no && test "$container" != no && test -n "$container_image"; then
+      case $target in
+          aarch64-*)
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -march=armv8.1-a+sve -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_SVE=y" >> $config_target_mak
+              fi
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -march=armv8.3-a -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_ARMV8_3=y" >> $config_target_mak
+              fi
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -mbranch-protection=standard -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_ARMV8_BTI=y" >> $config_target_mak
+              fi
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -march=armv8.5-a+memtag -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_ARMV8_MTE=y" >> $config_target_mak
+              fi
+              ;;
+          ppc*)
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -mpower8-vector -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_POWER8_VECTOR=y" >> $config_target_mak
+              fi
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -mpower10 -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_POWER10=y" >> $config_target_mak
+              fi
+              ;;
+          i386-linux-user)
+              if do_compiler "$target_compiler" $target_compiler_cflags \
+                             -Werror -fno-pie -o $TMPE $TMPC; then
+                  echo "CROSS_CC_HAS_I386_NOPIE=y" >> $config_target_mak
+              fi
+              ;;
+      esac
+  elif test $got_cross_cc = no && test "$container" != no && \
+          test -n "$container_image"; then
       for host in $container_hosts; do
           if test "$host" = "$ARCH"; then
               echo "DOCKER_IMAGE=$container_image" >> $config_target_mak
-              echo "DOCKER_CROSS_CC_GUEST=$container_cross_cc" >> $config_target_mak
+              echo "DOCKER_CROSS_CC_GUEST=$container_cross_cc" >> \
+                   $config_target_mak
+              if test -n "$container_cross_as"; then
+                  echo "DOCKER_CROSS_AS_GUEST=$container_cross_as" >> \
+                      $config_target_mak
+              fi
+              if test -n "$container_cross_ld"; then
+                  echo "DOCKER_CROSS_LD_GUEST=$container_cross_ld" >> \
+                      $config_target_mak
+              fi
           fi
       done
   fi
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 616af697fe6..8b07a281660 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -18,16 +18,8 @@
 # Hexagon doesn't support gdb, so skip the EXTRA_RUNS
 EXTRA_RUNS =
 
-# Hexagon has 64K pages, so increase the timeout to keep
-# test-mmap from timing out
-ifeq ($(CONFIG_DEBUG_TCG),y)
-TIMEOUT=800
-else
-TIMEOUT=500
-endif
-
-
 CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal
+CFLAGS += -fno-unroll-loops
 
 HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon
 VPATH += $(HEX_SRC)
@@ -36,11 +28,18 @@ first: $(HEX_SRC)/first.S
 	$(CC) -static -mv67 -nostdlib $^ -o $@
 
 HEX_TESTS = first
+HEX_TESTS += hex_sigsegv
 HEX_TESTS += misc
 HEX_TESTS += preg_alias
 HEX_TESTS += dual_stores
+HEX_TESTS += multi_result
 HEX_TESTS += mem_noshuf
+HEX_TESTS += circ
+HEX_TESTS += brev
+HEX_TESTS += load_unpack
+HEX_TESTS += load_align
 HEX_TESTS += atomics
 HEX_TESTS += fpstuff
+HEX_TESTS += overflow
 
 TESTS += $(HEX_TESTS)
diff --git a/tests/tcg/hexagon/brev.c b/tests/tcg/hexagon/brev.c
new file mode 100644
index 00000000000..9736a2405de
--- /dev/null
+++ b/tests/tcg/hexagon/brev.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+#define NBITS          8
+#define SIZE           (1 << NBITS)
+
+long long     dbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+int           wbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+short         hbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+unsigned char bbuf[SIZE] __attribute__((aligned(1 << 16))) = {0};
+
+/*
+ * We use the C preporcessor to deal with the combinations of types
+ */
+
+#define BREV_LOAD(SZ, RES, ADDR, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
+        : "=r"(RES), "+r"(ADDR) \
+        : "r"(INC) \
+        : "m0")
+
+#define BREV_LOAD_b(RES, ADDR, INC) \
+    BREV_LOAD(b, RES, ADDR, INC)
+#define BREV_LOAD_ub(RES, ADDR, INC) \
+    BREV_LOAD(ub, RES, ADDR, INC)
+#define BREV_LOAD_h(RES, ADDR, INC) \
+    BREV_LOAD(h, RES, ADDR, INC)
+#define BREV_LOAD_uh(RES, ADDR, INC) \
+    BREV_LOAD(uh, RES, ADDR, INC)
+#define BREV_LOAD_w(RES, ADDR, INC) \
+    BREV_LOAD(w, RES, ADDR, INC)
+#define BREV_LOAD_d(RES, ADDR, INC) \
+    BREV_LOAD(d, RES, ADDR, INC)
+
+#define BREV_STORE(SZ, PART, ADDR, VAL, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "mem" #SZ "(%0++m0:brev) = %1" PART "\n\t" \
+        : "+r"(ADDR) \
+        : "r"(VAL), "r"(INC) \
+        : "m0", "memory")
+
+#define BREV_STORE_b(ADDR, VAL, INC) \
+    BREV_STORE(b, "", ADDR, VAL, INC)
+#define BREV_STORE_h(ADDR, VAL, INC) \
+    BREV_STORE(h, "", ADDR, VAL, INC)
+#define BREV_STORE_f(ADDR, VAL, INC) \
+    BREV_STORE(h, ".H", ADDR, VAL, INC)
+#define BREV_STORE_w(ADDR, VAL, INC) \
+    BREV_STORE(w, "", ADDR, VAL, INC)
+#define BREV_STORE_d(ADDR, VAL, INC) \
+    BREV_STORE(d, "", ADDR, VAL, INC)
+
+#define BREV_STORE_NEW(SZ, ADDR, VAL, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "{\n\t" \
+        "    r5 = %1\n\t" \
+        "    mem" #SZ "(%0++m0:brev) = r5.new\n\t" \
+        "}\n\t" \
+        : "+r"(ADDR) \
+        : "r"(VAL), "r"(INC) \
+        : "r5", "m0", "memory")
+
+#define BREV_STORE_bnew(ADDR, VAL, INC) \
+    BREV_STORE_NEW(b, ADDR, VAL, INC)
+#define BREV_STORE_hnew(ADDR, VAL, INC) \
+    BREV_STORE_NEW(h, ADDR, VAL, INC)
+#define BREV_STORE_wnew(ADDR, VAL, INC) \
+    BREV_STORE_NEW(w, ADDR, VAL, INC)
+
+int bitreverse(int x)
+{
+    int result = 0;
+    int i;
+    for (i = 0; i < NBITS; i++) {
+        result <<= 1;
+        result |= x & 1;
+        x >>= 1;
+    }
+    return result;
+}
+
+int sext8(int x)
+{
+    return (x << 24) >> 24;
+}
+
+void check(int i, long long result, long long expect)
+{
+    if (result != expect) {
+        printf("ERROR(%d): 0x%04llx != 0x%04llx\n", i, result, expect);
+        err++;
+    }
+}
+
+#define TEST_BREV_LOAD(SZ, TYPE, BUF, SHIFT, EXP) \
+    do { \
+        p = BUF; \
+        for (i = 0; i < SIZE; i++) { \
+            TYPE result; \
+            BREV_LOAD_##SZ(result, p, 1 << (SHIFT - NBITS)); \
+            check(i, result, EXP); \
+        } \
+    } while (0)
+
+#define TEST_BREV_STORE(SZ, TYPE, BUF, VAL, SHIFT) \
+    do { \
+        p = BUF; \
+        memset(BUF, 0xff, sizeof(BUF)); \
+        for (i = 0; i < SIZE; i++) { \
+            BREV_STORE_##SZ(p, (TYPE)(VAL), 1 << (SHIFT - NBITS)); \
+        } \
+        for (i = 0; i < SIZE; i++) { \
+            check(i, BUF[i], bitreverse(i)); \
+        } \
+    } while (0)
+
+#define TEST_BREV_STORE_NEW(SZ, BUF, SHIFT) \
+    do { \
+        p = BUF; \
+        memset(BUF, 0xff, sizeof(BUF)); \
+        for (i = 0; i < SIZE; i++) { \
+            BREV_STORE_##SZ(p, i, 1 << (SHIFT - NBITS)); \
+        } \
+        for (i = 0; i < SIZE; i++) { \
+            check(i, BUF[i], bitreverse(i)); \
+        } \
+    } while (0)
+
+/*
+ * We'll set high_half[i] = i << 16 for use in the .H form of store
+ * which stores from the high half of the word.
+ */
+int high_half[SIZE];
+
+int main()
+{
+    void *p;
+    int i;
+
+    for (i = 0; i < SIZE; i++) {
+        bbuf[i] = bitreverse(i);
+        hbuf[i] = bitreverse(i);
+        wbuf[i] = bitreverse(i);
+        dbuf[i] = bitreverse(i);
+        high_half[i] = i << 16;
+    }
+
+    TEST_BREV_LOAD(b,  int,       bbuf, 16, sext8(i));
+    TEST_BREV_LOAD(ub, int,       bbuf, 16, i);
+    TEST_BREV_LOAD(h,  int,       hbuf, 15, i);
+    TEST_BREV_LOAD(uh, int,       hbuf, 15, i);
+    TEST_BREV_LOAD(w,  int,       wbuf, 14, i);
+    TEST_BREV_LOAD(d,  long long, dbuf, 13, i);
+
+    TEST_BREV_STORE(b, int,       bbuf, i,            16);
+    TEST_BREV_STORE(h, int,       hbuf, i,            15);
+    TEST_BREV_STORE(f, int,       hbuf, high_half[i], 15);
+    TEST_BREV_STORE(w, int,       wbuf, i,            14);
+    TEST_BREV_STORE(d, long long, dbuf, i,            13);
+
+    TEST_BREV_STORE_NEW(bnew, bbuf, 16);
+    TEST_BREV_STORE_NEW(hnew, hbuf, 15);
+    TEST_BREV_STORE_NEW(wnew, wbuf, 14);
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c
new file mode 100644
index 00000000000..67a1aa30541
--- /dev/null
+++ b/tests/tcg/hexagon/circ.c
@@ -0,0 +1,486 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+#define DEBUG          0
+#define DEBUG_PRINTF(...) \
+    do { \
+        if (DEBUG) { \
+            printf(__VA_ARGS__); \
+        } \
+    } while (0)
+
+
+#define NBYTES         (1 << 8)
+#define NHALFS         (NBYTES / sizeof(short))
+#define NWORDS         (NBYTES / sizeof(int))
+#define NDOBLS         (NBYTES / sizeof(long long))
+
+long long     dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
+int           wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
+short         hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
+unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
+
+/*
+ * We use the C preporcessor to deal with the combinations of types
+ */
+
+#define INIT(BUF, N) \
+    void init_##BUF(void) \
+    { \
+        int i; \
+        for (i = 0; i < N; i++) { \
+            BUF[i] = i; \
+        } \
+    } \
+
+INIT(bbuf, NBYTES)
+INIT(hbuf, NHALFS)
+INIT(wbuf, NWORDS)
+INIT(dbuf, NDOBLS)
+
+/*
+ * Macros for performing circular load
+ *     RES         result
+ *     ADDR        address
+ *     START       start address of buffer
+ *     LEN         length of buffer (in bytes)
+ *     INC         address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %2\n\t" \
+        "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
+        : "=r"(RES), "+r"(ADDR) \
+        : "r"(START), "r"(LEN) \
+        : "r4", "m0", "cs0")
+#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * The mreg has the following pieces
+ *     mreg[31:28]              increment[10:7]
+ *     mreg[27:24]              K value (used Hexagon v3 and earlier)
+ *     mreg[23:17]              increment[6:0]
+ *     mreg[16:0]               circular buffer length
+ */
+static int build_mreg(int inc, int K, int len)
+{
+    return ((inc & 0x780) << 21) |
+           ((K & 0xf) << 24) |
+           ((inc & 0x7f) << 17) |
+           (len & 0x1ffff);
+}
+
+#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %2\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %3\n\t" \
+        "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
+        : "=r"(RES), "+r"(ADDR) \
+        : "r"(build_mreg((INC), 0, (LEN))), \
+          "r"(START) \
+        : "r4", "m1", "cs1")
+#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
+#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
+    CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
+
+/*
+ * Macros for performing circular store
+ *     VAL         value to store
+ *     ADDR        address
+ *     START       start address of buffer
+ *     LEN         length of buffer (in bytes)
+ *     INC         address increment (in bytes for IMM, elements for REG)
+ */
+#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %1\n\t" \
+        "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
+        : "+r"(ADDR) \
+        : "r"(START), "r"(VAL), "r"(LEN) \
+        : "r4", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %1\n\t" \
+        "{\n\t" \
+        "    r5 = %2\n\t" \
+        "    mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
+        "}\n\t" \
+        : "+r"(ADDR) \
+        : "r"(START), "r"(VAL), "r"(LEN) \
+        : "r4", "r5", "m0", "cs0", "memory")
+#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %1\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %2\n\t" \
+        "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
+        : "+r"(ADDR) \
+        : "r"(build_mreg((INC), 0, (LEN))), \
+          "r"(START), \
+          "r"(VAL) \
+        : "r4", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
+
+#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %1\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %2\n\t" \
+        "{\n\t" \
+        "    r5 = %3\n\t" \
+        "    mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
+        "}\n\t" \
+        : "+r"(ADDR) \
+        : "r"(build_mreg((INC), 0, (LEN))), \
+          "r"(START), \
+          "r"(VAL) \
+        : "r4", "r5", "m1", "cs1", "memory")
+#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
+#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
+    CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
+
+
+int err;
+
+/* We'll test increments +1 and -1 */
+void check_load(int i, long long result, int inc, int size)
+{
+    int expect = (i * inc);
+    while (expect >= size) {
+        expect -= size;
+    }
+    while (expect < 0) {
+        expect += size;
+    }
+    if (result != expect) {
+        printf("ERROR(%d): %lld != %d\n", i, result, expect);
+        err++;
+    }
+}
+
+#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
+void circ_test_load_imm_##SZ(void) \
+{ \
+    TYPE *p = (TYPE *)BUF; \
+    int size = 10; \
+    int i; \
+    for (i = 0; i < BUFSIZE; i++) { \
+        TYPE element; \
+        CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
+        DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+                     i, p, element); \
+        check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
+    } \
+    p = (TYPE *)BUF; \
+    for (i = 0; i < BUFSIZE; i++) { \
+        TYPE element; \
+        CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
+        DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+                     i, p, element); \
+        check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
+    } \
+}
+
+TEST_LOAD_IMM(b,  char,           bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(ub, unsigned char,  bbuf, NBYTES, 1, d)
+TEST_LOAD_IMM(h,  short,          hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d)
+TEST_LOAD_IMM(w,  int,            wbuf, NWORDS, 4, d)
+TEST_LOAD_IMM(d,  long long,      dbuf, NDOBLS, 8, lld)
+
+#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
+void circ_test_load_reg_##SZ(void) \
+{ \
+    TYPE *p = (TYPE *)BUF; \
+    int size = 13; \
+    int i; \
+    for (i = 0; i < BUFSIZE; i++) { \
+        TYPE element; \
+        CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
+        DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+                     i, p, element); \
+        check_load(i, element, 1, size); \
+    } \
+    p = (TYPE *)BUF; \
+    for (i = 0; i < BUFSIZE; i++) { \
+        TYPE element; \
+        CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
+        DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
+                     i, p, element); \
+        check_load(i, element, -1, size); \
+    } \
+}
+
+TEST_LOAD_REG(b,  char,           bbuf, NBYTES, d)
+TEST_LOAD_REG(ub, unsigned char,  bbuf, NBYTES, d)
+TEST_LOAD_REG(h,  short,          hbuf, NHALFS, d)
+TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d)
+TEST_LOAD_REG(w,  int,            wbuf, NWORDS, d)
+TEST_LOAD_REG(d,  long long,      dbuf, NDOBLS, lld)
+
+/* The circular stores will wrap around somewhere inside the buffer */
+#define CIRC_VAL(SZ, TYPE, BUFSIZE) \
+TYPE circ_val_##SZ(int i, int inc, int size) \
+{ \
+    int mod = BUFSIZE % size; \
+    int elem = i * inc; \
+    if (elem < 0) { \
+        if (-elem <= size - mod) { \
+            return (elem + BUFSIZE - mod); \
+        } else { \
+            return (elem + BUFSIZE + size - mod); \
+        } \
+    } else if (elem < mod) {\
+        return (elem + BUFSIZE - mod); \
+    } else { \
+        return (elem + BUFSIZE - size - mod); \
+    } \
+}
+
+CIRC_VAL(b, unsigned char, NBYTES)
+CIRC_VAL(h, short,         NHALFS)
+CIRC_VAL(w, int,           NWORDS)
+CIRC_VAL(d, long long,     NDOBLS)
+
+/*
+ * Circular stores should only write to the first "size" elements of the buffer
+ * the remainder of the elements should have BUF[i] == i
+ */
+#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
+void check_store_##SZ(int inc, int size) \
+{ \
+    int i; \
+    for (i = 0; i < size; i++) { \
+        DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
+                     i, BUF[i], circ_val_##SZ(i, inc, size)); \
+        if (BUF[i] != circ_val_##SZ(i, inc, size)) { \
+            printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \
+                   i, BUF[i], circ_val_##SZ(i, inc, size)); \
+            err++; \
+        } \
+    } \
+    for (i = size; i < BUFSIZE; i++) { \
+        if (BUF[i] != i) { \
+            printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \
+            err++; \
+        } \
+    } \
+}
+
+CHECK_STORE(b, bbuf, NBYTES, x)
+CHECK_STORE(h, hbuf, NHALFS, x)
+CHECK_STORE(w, wbuf, NWORDS, x)
+CHECK_STORE(d, dbuf, NDOBLS, llx)
+
+#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
+void circ_test_store_imm_##SZ(void) \
+{ \
+    unsigned int size = 27; \
+    TYPE *p = BUF; \
+    TYPE val = 0; \
+    int i; \
+    init_##BUF(); \
+    for (i = 0; i < BUFSIZE; i++) { \
+        CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
+        val++; \
+    } \
+    check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
+    p = BUF; \
+    val = 0; \
+    init_##BUF(); \
+    for (i = 0; i < BUFSIZE; i++) { \
+        CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
+                            -(INC)); \
+        val++; \
+    } \
+    check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
+}
+
+CIRC_TEST_STORE_IMM(b,    b, unsigned char, bbuf, NBYTES, 0,  1)
+CIRC_TEST_STORE_IMM(h,    h, short,         hbuf, NHALFS, 0,  2)
+CIRC_TEST_STORE_IMM(f,    h, short,         hbuf, NHALFS, 16, 2)
+CIRC_TEST_STORE_IMM(w,    w, int,           wbuf, NWORDS, 0,  4)
+CIRC_TEST_STORE_IMM(d,    d, long long,     dbuf, NDOBLS, 0,  8)
+CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0,  1)
+CIRC_TEST_STORE_IMM(hnew, h, short,         hbuf, NHALFS, 0,  2)
+CIRC_TEST_STORE_IMM(wnew, w, int,           wbuf, NWORDS, 0,  4)
+
+#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
+void circ_test_store_reg_##SZ(void) \
+{ \
+    TYPE *p = BUF; \
+    unsigned int size = 19; \
+    TYPE val = 0; \
+    int i; \
+    init_##BUF(); \
+    for (i = 0; i < BUFSIZE; i++) { \
+        CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
+        val++; \
+    } \
+    check_store_##CHK(1, size); \
+    p = BUF; \
+    val = 0; \
+    init_##BUF(); \
+    for (i = 0; i < BUFSIZE; i++) { \
+        CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
+        val++; \
+    } \
+    check_store_##CHK(-1, size); \
+}
+
+CIRC_TEST_STORE_REG(b,    b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(h,    h, short,         hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(f,    h, short,         hbuf, NHALFS, 16)
+CIRC_TEST_STORE_REG(w,    w, int,           wbuf, NWORDS, 0)
+CIRC_TEST_STORE_REG(d,    d, long long,     dbuf, NDOBLS, 0)
+CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0)
+CIRC_TEST_STORE_REG(hnew, h, short,         hbuf, NHALFS, 0)
+CIRC_TEST_STORE_REG(wnew, w, int,           wbuf, NWORDS, 0)
+
+/* Test the old scheme used in Hexagon V3 */
+static void circ_test_v3(void)
+{
+    int *p = wbuf;
+    int size = 15;
+    int K = 4;      /* 64 bytes */
+    int element;
+    int i;
+
+    init_wbuf();
+
+    for (i = 0; i < NWORDS; i++) {
+        __asm__(
+            "r4 = %2\n\t"
+            "m1 = r4\n\t"
+            "%0 = memw(%1++I:circ(M1))\n\t"
+            : "=r"(element), "+r"(p)
+            : "r"(build_mreg(1, K, size * sizeof(int)))
+            : "r4", "m1");
+        DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
+        check_load(i, element, 1, size);
+    }
+}
+
+int main()
+{
+    init_bbuf();
+    init_hbuf();
+    init_wbuf();
+    init_dbuf();
+
+    DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
+    DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
+    DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
+    DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
+    DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
+
+    circ_test_load_imm_b();
+    circ_test_load_imm_ub();
+    circ_test_load_imm_h();
+    circ_test_load_imm_uh();
+    circ_test_load_imm_w();
+    circ_test_load_imm_d();
+
+    circ_test_load_reg_b();
+    circ_test_load_reg_ub();
+    circ_test_load_reg_h();
+    circ_test_load_reg_uh();
+    circ_test_load_reg_w();
+    circ_test_load_reg_d();
+
+    circ_test_store_imm_b();
+    circ_test_store_imm_h();
+    circ_test_store_imm_f();
+    circ_test_store_imm_w();
+    circ_test_store_imm_d();
+    circ_test_store_imm_bnew();
+    circ_test_store_imm_hnew();
+    circ_test_store_imm_wnew();
+
+    circ_test_store_reg_b();
+    circ_test_store_reg_h();
+    circ_test_store_reg_f();
+    circ_test_store_reg_w();
+    circ_test_store_reg_d();
+    circ_test_store_reg_bnew();
+    circ_test_store_reg_hnew();
+    circ_test_store_reg_wnew();
+
+    circ_test_v3();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c
index e4f1a0eeb48..0dff429f4cf 100644
--- a/tests/tcg/hexagon/fpstuff.c
+++ b/tests/tcg/hexagon/fpstuff.c
@@ -37,10 +37,12 @@ const int SF_NaN =                        0x7fc00000;
 const int SF_NaN_special =                0x7f800001;
 const int SF_ANY =                        0x3f800000;
 const int SF_HEX_NAN =                    0xffffffff;
+const int SF_small_neg =                  0xab98fba8;
 
 const long long DF_NaN =                  0x7ff8000000000000ULL;
 const long long DF_ANY =                  0x3f80000000000000ULL;
 const long long DF_HEX_NAN =              0xffffffffffffffffULL;
+const long long DF_small_neg =            0xbd731f7500000000ULL;
 
 int err;
 
@@ -248,6 +250,87 @@ static void check_dfminmax(void)
     check_fpstatus(usr, FPINVF);
 }
 
+static void check_recip_exception(void)
+{
+    int result;
+    int usr;
+
+    /*
+     * Check that sfrecipa doesn't set status bits when
+     * a NaN with bit 22 non-zero is passed
+     */
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, 0);
+
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, 0);
+
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %2)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_NaN)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, 0);
+
+    /*
+     * Check that sfrecipa doesn't set status bits when
+     * a NaN with bit 22 zero is passed
+     */
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_NaN_special), "r"(SF_ANY)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, FPINVF);
+
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN_special)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, FPINVF);
+
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %2)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(SF_NaN_special)
+         : "r2", "p0", "usr");
+    check32(result, SF_HEX_NAN);
+    check_fpstatus(usr, FPINVF);
+
+    /*
+     * Check that sfrecipa properly sets divid-by-zero
+     */
+        asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(0x885dc960), "r"(0x80000000)
+         : "r2", "p0", "usr");
+    check32(result, 0x3f800000);
+    check_fpstatus(usr, FPDBZF);
+
+    asm (CLEAR_FPSTATUS
+         "%0,p0 = sfrecipa(%2, %3)\n\t"
+         "%1 = usr\n\t"
+         : "=r"(result), "=r"(usr) : "r"(0x7f800000), "r"(SF_ZERO)
+         : "r2", "p0", "usr");
+    check32(result, 0x3f800000);
+    check_fpstatus(usr, 0);
+}
+
 static void check_canonical_NaN(void)
 {
     int sf_result;
@@ -358,12 +441,171 @@ static void check_canonical_NaN(void)
     check_fpstatus(usr, 0);
 }
 
+static void check_invsqrta(void)
+{
+    int result;
+    int predval;
+
+    asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+                 "%1 = p0\n\t"
+                 : "+r"(result), "=r"(predval)
+                 : "r"(0x7f800000)
+                 : "p0");
+    check32(result, 0xff800000);
+    check32(predval, 0x0);
+}
+
+static void check_float2int_convs()
+{
+    int res32;
+    long long res64;
+    int usr;
+
+    /*
+     * Check that the various forms of float-to-unsigned
+     *  check sign before rounding
+     */
+        asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2uw(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(SF_small_neg)
+        : "r2", "usr");
+    check32(res32, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2uw(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(SF_small_neg)
+        : "r2", "usr");
+    check32(res32, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2ud(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(SF_small_neg)
+        : "r2", "usr");
+    check64(res64, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2ud(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(SF_small_neg)
+        : "r2", "usr");
+    check64(res64, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2uw(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(DF_small_neg)
+        : "r2", "usr");
+    check32(res32, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2uw(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(DF_small_neg)
+        : "r2", "usr");
+    check32(res32, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2ud(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(DF_small_neg)
+        : "r2", "usr");
+    check64(res64, 0);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2ud(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(DF_small_neg)
+        : "r2", "usr");
+    check64(res64, 0);
+    check_fpstatus(usr, FPINVF);
+
+    /*
+     * Check that the various forms of float-to-signed return -1 for NaN
+     */
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2w(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(SF_NaN)
+        : "r2", "usr");
+    check32(res32, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2w(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(SF_NaN)
+        : "r2", "usr");
+    check32(res32, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2d(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(SF_NaN)
+        : "r2", "usr");
+    check64(res64, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_sf2d(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(SF_NaN)
+        : "r2", "usr");
+    check64(res64, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2w(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+        : "r2", "usr");
+    check32(res32, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2w(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res32), "=r"(usr) : "r"(DF_NaN)
+        : "r2", "usr");
+    check32(res32, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2d(%2)\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+        : "r2", "usr");
+    check64(res64, -1);
+    check_fpstatus(usr, FPINVF);
+
+    asm(CLEAR_FPSTATUS
+        "%0 = convert_df2d(%2):chop\n\t"
+        "%1 = usr\n\t"
+        : "=r"(res64), "=r"(usr) : "r"(DF_NaN)
+        : "r2", "usr");
+    check64(res64, -1);
+    check_fpstatus(usr, FPINVF);
+}
+
 int main()
 {
     check_compare_exception();
     check_sfminmax();
     check_dfminmax();
+    check_recip_exception();
     check_canonical_NaN();
+    check_invsqrta();
+    check_float2int_convs();
 
     puts(err ? "FAIL" : "PASS");
     return err ? 1 : 0;
diff --git a/tests/tcg/hexagon/hex_sigsegv.c b/tests/tcg/hexagon/hex_sigsegv.c
new file mode 100644
index 00000000000..dc2b349257c
--- /dev/null
+++ b/tests/tcg/hexagon/hex_sigsegv.c
@@ -0,0 +1,106 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test the VLIW semantics of two stores in a packet
+ *
+ * When a packet has 2 stores, either both commit or neither commit.
+ * We test this with a packet that does stores to both NULL and a global
+ * variable, "should_not_change".  After the SIGSEGV is caught, we check
+ * that the "should_not_change" value is the same.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <setjmp.h>
+#include <signal.h>
+
+typedef unsigned char uint8_t;
+
+int err;
+int segv_caught;
+
+#define SHOULD_NOT_CHANGE_VAL        5
+int should_not_change = SHOULD_NOT_CHANGE_VAL;
+
+#define BUF_SIZE        300
+unsigned char buf[BUF_SIZE];
+
+
+static void __check(const char *filename, int line, int x, int expect)
+{
+    if (x != expect) {
+        printf("ERROR %s:%d - %d != %d\n",
+               filename, line, x, expect);
+        err++;
+    }
+}
+
+#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect))
+
+static void __chk_error(const char *filename, int line, int ret)
+{
+    if (ret < 0) {
+        printf("ERROR %s:%d - %d\n", filename, line, ret);
+        err++;
+    }
+}
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+jmp_buf jmp_env;
+
+static void sig_segv(int sig, siginfo_t *info, void *puc)
+{
+    check(sig, SIGSEGV);
+    segv_caught = 1;
+    longjmp(jmp_env, 1);
+}
+
+int main()
+{
+    struct sigaction act;
+
+    /* SIGSEGV test */
+    act.sa_sigaction = sig_segv;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    chk_error(sigaction(SIGSEGV, &act, NULL));
+    if (setjmp(jmp_env) == 0) {
+        asm volatile("r18 = ##should_not_change\n\t"
+                     "r19 = #0\n\t"
+                     "{\n\t"
+                     "    memw(r18) = #7\n\t"
+                     "    memw(r19) = #0\n\t"
+                     "}\n\t"
+                      : : : "r18", "r19", "memory");
+    }
+
+    act.sa_handler = SIG_DFL;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+    chk_error(sigaction(SIGSEGV, &act, NULL));
+
+    check(segv_caught, 1);
+    check(should_not_change, SHOULD_NOT_CHANGE_VAL);
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tests/tcg/hexagon/hvx_histogram.c b/tests/tcg/hexagon/hvx_histogram.c
new file mode 100644
index 00000000000..43377a9abb8
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram.c
@@ -0,0 +1,88 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include "hvx_histogram_row.h"
+
+const int vector_len = 128;
+const int width = 275;
+const int height = 20;
+const int stride = (width + vector_len - 1) & -vector_len;
+
+int err;
+
+static uint8_t input[height][stride] __attribute__((aligned(128))) = {
+#include "hvx_histogram_input.h"
+};
+
+static int result[256] __attribute__((aligned(128)));
+static int expect[256] __attribute__((aligned(128)));
+
+static void check(void)
+{
+    for (int i = 0; i < 256; i++) {
+        int res = result[i];
+        int exp = expect[i];
+        if (res != exp) {
+            printf("ERROR at %3d: 0x%04x != 0x%04x\n",
+                   i, res, exp);
+            err++;
+        }
+    }
+}
+
+static void ref_histogram(uint8_t *src, int stride, int width, int height,
+                          int *hist)
+{
+    for (int i = 0; i < 256; i++) {
+        hist[i] = 0;
+    }
+
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+            hist[src[i * stride + j]]++;
+        }
+    }
+}
+
+static void hvx_histogram(uint8_t *src, int stride, int width, int height,
+                          int *hist)
+{
+    int n = 8192 / width;
+
+    for (int i = 0; i < 256; i++) {
+        hist[i] = 0;
+    }
+
+    for (int i = 0; i < height; i += n) {
+        int k = height - i > n ? n : height - i;
+        hvx_histogram_row(src, stride, width, k, hist);
+        src += n * stride;
+    }
+}
+
+int main()
+{
+    ref_histogram(&input[0][0], stride, width, height, expect);
+    hvx_histogram(&input[0][0], stride, width, height, result);
+    check();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/hvx_histogram_input.h b/tests/tcg/hexagon/hvx_histogram_input.h
new file mode 100644
index 00000000000..2f9109255ec
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_input.h
@@ -0,0 +1,717 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+    { 0x26, 0x32, 0x2e, 0x2e, 0x2d, 0x2c, 0x2d, 0x2d,
+      0x2c, 0x2e, 0x31, 0x33, 0x36, 0x39, 0x3b, 0x3f,
+      0x42, 0x46, 0x4a, 0x4c, 0x51, 0x53, 0x53, 0x54,
+      0x56, 0x57, 0x58, 0x57, 0x56, 0x52, 0x51, 0x4f,
+      0x4c, 0x49, 0x47, 0x42, 0x3e, 0x3b, 0x38, 0x35,
+      0x33, 0x30, 0x2e, 0x2c, 0x2b, 0x2a, 0x2a, 0x28,
+      0x28, 0x27, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2e,
+      0x2f, 0x33, 0x36, 0x38, 0x3c, 0x3d, 0x40, 0x42,
+      0x43, 0x42, 0x43, 0x44, 0x43, 0x41, 0x40, 0x3b,
+      0x3b, 0x3a, 0x38, 0x35, 0x32, 0x2f, 0x2c, 0x29,
+      0x27, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x19,
+      0x17, 0x15, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+      0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+      0x0c, 0x0d, 0x0e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+      0x0c, 0x0c, 0x0d, 0x0c, 0x0f, 0x0e, 0x0f, 0x0f,
+      0x0f, 0x10, 0x11, 0x12, 0x14, 0x16, 0x17, 0x19,
+      0x1c, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2f,
+      0x31, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x3b, 0x3c,
+      0x3c, 0x3d, 0x3e, 0x3e, 0x3c, 0x3b, 0x3a, 0x39,
+      0x39, 0x3a, 0x3a, 0x3a, 0x3a, 0x3c, 0x3e, 0x43,
+      0x47, 0x4a, 0x4d, 0x51, 0x51, 0x54, 0x56, 0x56,
+      0x57, 0x56, 0x53, 0x4f, 0x4b, 0x47, 0x43, 0x41,
+      0x3e, 0x3c, 0x3a, 0x37, 0x36, 0x33, 0x32, 0x34,
+      0x34, 0x34, 0x34, 0x35, 0x36, 0x39, 0x3d, 0x3d,
+      0x3f, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40,
+      0x42, 0x44, 0x47, 0x48, 0x4b, 0x4e, 0x56, 0x5c,
+      0x62, 0x68, 0x6f, 0x73, 0x76, 0x79, 0x7a, 0x7c,
+      0x7e, 0x7c, 0x78, 0x72, 0x6e, 0x69, 0x65, 0x60,
+      0x5b, 0x56, 0x52, 0x4d, 0x4a, 0x48, 0x47, 0x46,
+      0x44, 0x43, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40,
+      0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3b, 0x38, 0x37,
+      0x36, 0x35, 0x36, 0x35, 0x36, 0x37, 0x38, 0x3c,
+      0x3d, 0x3f, 0x42, 0x44, 0x46, 0x48, 0x4b, 0x4c,
+      0x4e, 0x4e, 0x4d, 0x4c, 0x4a, 0x48, 0x49, 0x49,
+      0x4b, 0x4d, 0x4e, },
+    { 0x23, 0x2d, 0x29, 0x29, 0x28, 0x28, 0x29, 0x29,
+      0x28, 0x2b, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a,
+      0x3d, 0x41, 0x44, 0x47, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48,
+      0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30,
+      0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25,
+      0x25, 0x24, 0x24, 0x24, 0x26, 0x28, 0x28, 0x2a,
+      0x2b, 0x2e, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+      0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3c, 0x3b, 0x38,
+      0x37, 0x35, 0x33, 0x30, 0x2e, 0x2b, 0x27, 0x25,
+      0x24, 0x21, 0x20, 0x1d, 0x1b, 0x1a, 0x18, 0x16,
+      0x15, 0x14, 0x13, 0x12, 0x10, 0x11, 0x10, 0x0e,
+      0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b,
+      0x0b, 0x0b, 0x0c, 0x0b, 0x0b, 0x09, 0x0a, 0x0b,
+      0x0b, 0x0a, 0x0a, 0x0c, 0x0c, 0x0c, 0x0d, 0x0e,
+      0x0e, 0x0f, 0x0f, 0x11, 0x12, 0x15, 0x15, 0x17,
+      0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2a,
+      0x2d, 0x30, 0x33, 0x34, 0x35, 0x35, 0x37, 0x37,
+      0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x36, 0x37,
+      0x35, 0x36, 0x35, 0x35, 0x36, 0x37, 0x3a, 0x3e,
+      0x40, 0x43, 0x48, 0x49, 0x4b, 0x4c, 0x4d, 0x4e,
+      0x4f, 0x4f, 0x4c, 0x48, 0x45, 0x41, 0x3e, 0x3b,
+      0x3a, 0x37, 0x36, 0x33, 0x32, 0x31, 0x30, 0x31,
+      0x32, 0x31, 0x31, 0x31, 0x31, 0x34, 0x37, 0x38,
+      0x3a, 0x3b, 0x3b, 0x3b, 0x3c, 0x3b, 0x3d, 0x3e,
+      0x3f, 0x40, 0x43, 0x44, 0x47, 0x4b, 0x4f, 0x56,
+      0x5a, 0x60, 0x66, 0x69, 0x6a, 0x6e, 0x71, 0x72,
+      0x73, 0x72, 0x6d, 0x69, 0x66, 0x60, 0x5c, 0x59,
+      0x54, 0x50, 0x4d, 0x48, 0x46, 0x44, 0x44, 0x43,
+      0x42, 0x41, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d,
+      0x3d, 0x3d, 0x3c, 0x3a, 0x39, 0x38, 0x35, 0x35,
+      0x34, 0x34, 0x35, 0x34, 0x35, 0x36, 0x39, 0x3c,
+      0x3d, 0x3e, 0x41, 0x43, 0x44, 0x46, 0x48, 0x49,
+      0x4a, 0x49, 0x48, 0x47, 0x45, 0x43, 0x43, 0x44,
+      0x45, 0x47, 0x48, },
+    { 0x23, 0x2d, 0x2a, 0x2a, 0x29, 0x29, 0x2a, 0x2a,
+      0x29, 0x2c, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a,
+      0x3d, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48,
+      0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30,
+      0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25,
+      0x25, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
+      0x2b, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+      0x3d, 0x3e, 0x3e, 0x3d, 0x3e, 0x3c, 0x3c, 0x3a,
+      0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26,
+      0x24, 0x21, 0x20, 0x1e, 0x1c, 0x1b, 0x18, 0x17,
+      0x16, 0x14, 0x13, 0x12, 0x10, 0x10, 0x0f, 0x0e,
+      0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c,
+      0x0b, 0x0b, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b, 0x0c,
+      0x0c, 0x0b, 0x0b, 0x0c, 0x0d, 0x0c, 0x0e, 0x0e,
+      0x0e, 0x0f, 0x11, 0x11, 0x13, 0x14, 0x16, 0x18,
+      0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2b,
+      0x2d, 0x31, 0x33, 0x34, 0x36, 0x37, 0x38, 0x38,
+      0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x37, 0x37,
+      0x35, 0x36, 0x35, 0x36, 0x35, 0x38, 0x3a, 0x3e,
+      0x40, 0x41, 0x45, 0x47, 0x49, 0x4a, 0x4c, 0x4d,
+      0x4e, 0x4d, 0x4a, 0x47, 0x44, 0x40, 0x3d, 0x3b,
+      0x39, 0x37, 0x34, 0x34, 0x32, 0x31, 0x31, 0x33,
+      0x32, 0x31, 0x32, 0x33, 0x32, 0x36, 0x38, 0x39,
+      0x3b, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3e, 0x3e,
+      0x41, 0x42, 0x43, 0x45, 0x48, 0x4c, 0x50, 0x56,
+      0x5b, 0x5f, 0x62, 0x67, 0x69, 0x6c, 0x6e, 0x6e,
+      0x70, 0x6f, 0x6b, 0x67, 0x63, 0x5e, 0x5b, 0x58,
+      0x54, 0x51, 0x4e, 0x4a, 0x48, 0x46, 0x46, 0x46,
+      0x45, 0x46, 0x44, 0x43, 0x44, 0x43, 0x42, 0x42,
+      0x41, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, 0x3a, 0x39,
+      0x39, 0x39, 0x38, 0x37, 0x37, 0x3a, 0x3e, 0x40,
+      0x42, 0x43, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4c,
+      0x4c, 0x4b, 0x4a, 0x48, 0x46, 0x44, 0x43, 0x45,
+      0x45, 0x46, 0x47, },
+    { 0x21, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x29, 0x29,
+      0x28, 0x2a, 0x2d, 0x30, 0x32, 0x34, 0x37, 0x3a,
+      0x3c, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48,
+      0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30,
+      0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x27, 0x26, 0x25,
+      0x24, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+      0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3d, 0x3c, 0x3a,
+      0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26,
+      0x25, 0x21, 0x20, 0x1e, 0x1c, 0x19, 0x19, 0x18,
+      0x17, 0x15, 0x15, 0x12, 0x11, 0x11, 0x11, 0x0f,
+      0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c,
+      0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+      0x0c, 0x0c, 0x0c, 0x0c, 0x0e, 0x0e, 0x0f, 0x0f,
+      0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x16, 0x18,
+      0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x28, 0x29, 0x2d,
+      0x2f, 0x32, 0x34, 0x35, 0x36, 0x37, 0x38, 0x38,
+      0x39, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36,
+      0x35, 0x35, 0x37, 0x35, 0x36, 0x37, 0x3a, 0x3d,
+      0x3e, 0x41, 0x43, 0x46, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x49, 0x47, 0x45, 0x42, 0x3f, 0x3d, 0x3b,
+      0x3a, 0x38, 0x36, 0x34, 0x32, 0x32, 0x32, 0x32,
+      0x32, 0x31, 0x33, 0x32, 0x34, 0x37, 0x38, 0x38,
+      0x3a, 0x3b, 0x3d, 0x3d, 0x3d, 0x3e, 0x3f, 0x41,
+      0x42, 0x44, 0x44, 0x46, 0x49, 0x4d, 0x50, 0x54,
+      0x58, 0x5c, 0x61, 0x63, 0x65, 0x69, 0x6a, 0x6c,
+      0x6d, 0x6c, 0x68, 0x64, 0x61, 0x5c, 0x59, 0x57,
+      0x53, 0x51, 0x4f, 0x4c, 0x4a, 0x48, 0x48, 0x49,
+      0x49, 0x48, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46,
+      0x45, 0x44, 0x42, 0x41, 0x3f, 0x3e, 0x3c, 0x3c,
+      0x3c, 0x3d, 0x3c, 0x3c, 0x3c, 0x3e, 0x41, 0x43,
+      0x46, 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4e,
+      0x4e, 0x4d, 0x4b, 0x49, 0x47, 0x44, 0x44, 0x45,
+      0x45, 0x45, 0x46, },
+    { 0x22, 0x2b, 0x27, 0x27, 0x27, 0x27, 0x28, 0x28,
+      0x28, 0x2a, 0x2c, 0x2f, 0x30, 0x34, 0x37, 0x3b,
+      0x3d, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x47,
+      0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30,
+      0x2f, 0x2d, 0x2b, 0x2a, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3b,
+      0x3d, 0x3e, 0x3e, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a,
+      0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26,
+      0x25, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x18,
+      0x16, 0x15, 0x14, 0x12, 0x10, 0x11, 0x11, 0x0f,
+      0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0c, 0x0d, 0x0c,
+      0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+      0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0f, 0x0f,
+      0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x15, 0x18,
+      0x19, 0x1d, 0x1f, 0x21, 0x24, 0x27, 0x2a, 0x2c,
+      0x30, 0x33, 0x35, 0x36, 0x37, 0x38, 0x39, 0x39,
+      0x3a, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36,
+      0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x39, 0x3a,
+      0x3d, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x46, 0x46,
+      0x47, 0x46, 0x44, 0x42, 0x40, 0x3d, 0x3a, 0x39,
+      0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x32, 0x32,
+      0x32, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3c, 0x3c, 0x3e, 0x3e, 0x3e, 0x41, 0x43,
+      0x44, 0x45, 0x46, 0x48, 0x49, 0x4c, 0x51, 0x54,
+      0x56, 0x5a, 0x5f, 0x61, 0x63, 0x65, 0x67, 0x69,
+      0x6a, 0x69, 0x67, 0x61, 0x5f, 0x5b, 0x58, 0x56,
+      0x54, 0x51, 0x50, 0x4e, 0x4c, 0x4a, 0x4b, 0x4c,
+      0x4c, 0x4b, 0x4b, 0x4b, 0x4b, 0x49, 0x4a, 0x49,
+      0x49, 0x48, 0x46, 0x44, 0x42, 0x41, 0x40, 0x3f,
+      0x3f, 0x40, 0x40, 0x40, 0x40, 0x42, 0x46, 0x49,
+      0x4b, 0x4c, 0x4f, 0x4f, 0x50, 0x52, 0x51, 0x51,
+      0x50, 0x4f, 0x4c, 0x4a, 0x48, 0x46, 0x45, 0x44,
+      0x44, 0x45, 0x46, },
+    { 0x21, 0x2a, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
+      0x27, 0x29, 0x2d, 0x2f, 0x31, 0x34, 0x37, 0x3b,
+      0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48,
+      0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x2f,
+      0x2f, 0x2d, 0x2a, 0x2a, 0x27, 0x26, 0x25, 0x24,
+      0x22, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2f, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3c,
+      0x3d, 0x3e, 0x3f, 0x40, 0x3f, 0x3d, 0x3d, 0x3a,
+      0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26,
+      0x25, 0x22, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x18,
+      0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f,
+      0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d,
+      0x0d, 0x0d, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0c,
+      0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f,
+      0x0f, 0x10, 0x13, 0x13, 0x14, 0x15, 0x17, 0x19,
+      0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x27, 0x2a, 0x2e,
+      0x31, 0x33, 0x35, 0x38, 0x39, 0x3a, 0x3b, 0x3b,
+      0x3c, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x38, 0x37,
+      0x36, 0x36, 0x37, 0x36, 0x37, 0x38, 0x38, 0x3a,
+      0x3b, 0x3e, 0x40, 0x40, 0x41, 0x42, 0x43, 0x42,
+      0x43, 0x42, 0x40, 0x40, 0x3f, 0x3c, 0x3b, 0x39,
+      0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x33,
+      0x32, 0x32, 0x34, 0x35, 0x35, 0x36, 0x39, 0x39,
+      0x3a, 0x3c, 0x3c, 0x3f, 0x40, 0x41, 0x43, 0x45,
+      0x45, 0x47, 0x48, 0x4a, 0x4b, 0x4d, 0x50, 0x53,
+      0x56, 0x59, 0x5c, 0x5f, 0x60, 0x65, 0x64, 0x66,
+      0x68, 0x66, 0x64, 0x61, 0x5e, 0x5a, 0x59, 0x56,
+      0x54, 0x52, 0x51, 0x50, 0x4e, 0x4c, 0x4d, 0x4f,
+      0x4f, 0x4f, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d,
+      0x4c, 0x4b, 0x49, 0x47, 0x45, 0x44, 0x43, 0x43,
+      0x42, 0x43, 0x44, 0x44, 0x46, 0x47, 0x49, 0x4d,
+      0x4f, 0x51, 0x53, 0x54, 0x53, 0x54, 0x54, 0x53,
+      0x53, 0x51, 0x4e, 0x4b, 0x4a, 0x47, 0x45, 0x44,
+      0x44, 0x45, 0x46, },
+    { 0x20, 0x28, 0x26, 0x26, 0x25, 0x24, 0x27, 0x27,
+      0x27, 0x29, 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+      0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e,
+      0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4a, 0x49,
+      0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+      0x3d, 0x3e, 0x3f, 0x40, 0x3e, 0x3d, 0x3d, 0x3a,
+      0x38, 0x36, 0x34, 0x31, 0x2f, 0x2c, 0x29, 0x27,
+      0x25, 0x21, 0x21, 0x1f, 0x1c, 0x1d, 0x19, 0x18,
+      0x16, 0x15, 0x15, 0x13, 0x12, 0x11, 0x11, 0x0f,
+      0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+      0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+      0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x10,
+      0x10, 0x10, 0x12, 0x13, 0x15, 0x16, 0x18, 0x1a,
+      0x1c, 0x1d, 0x20, 0x22, 0x25, 0x27, 0x2a, 0x2e,
+      0x30, 0x34, 0x38, 0x39, 0x3a, 0x3b, 0x3b, 0x3b,
+      0x3c, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37,
+      0x36, 0x36, 0x38, 0x37, 0x37, 0x37, 0x38, 0x3a,
+      0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x40, 0x40,
+      0x42, 0x40, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39,
+      0x37, 0x36, 0x36, 0x35, 0x34, 0x34, 0x33, 0x33,
+      0x33, 0x34, 0x35, 0x35, 0x35, 0x36, 0x38, 0x39,
+      0x3a, 0x3b, 0x3d, 0x3f, 0x42, 0x43, 0x45, 0x45,
+      0x46, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x50, 0x53,
+      0x56, 0x57, 0x5a, 0x5c, 0x5e, 0x61, 0x63, 0x65,
+      0x66, 0x64, 0x62, 0x5f, 0x5c, 0x59, 0x58, 0x56,
+      0x55, 0x54, 0x52, 0x51, 0x50, 0x51, 0x51, 0x52,
+      0x52, 0x52, 0x52, 0x52, 0x51, 0x51, 0x51, 0x50,
+      0x4f, 0x4e, 0x4c, 0x4a, 0x47, 0x46, 0x45, 0x45,
+      0x45, 0x46, 0x46, 0x46, 0x4a, 0x4c, 0x4d, 0x52,
+      0x54, 0x56, 0x58, 0x58, 0x56, 0x57, 0x57, 0x56,
+      0x55, 0x53, 0x50, 0x4d, 0x49, 0x45, 0x44, 0x44,
+      0x43, 0x44, 0x45, },
+    { 0x1f, 0x27, 0x24, 0x23, 0x25, 0x24, 0x25, 0x26,
+      0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+      0x3d, 0x41, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4e,
+      0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x49,
+      0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30,
+      0x2f, 0x2d, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x25, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c,
+      0x3e, 0x3f, 0x3f, 0x40, 0x3e, 0x3d, 0x3c, 0x3a,
+      0x38, 0x36, 0x34, 0x31, 0x30, 0x2c, 0x29, 0x28,
+      0x25, 0x23, 0x22, 0x1f, 0x1c, 0x1c, 0x18, 0x18,
+      0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f,
+      0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+      0x0c, 0x0c, 0x0b, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+      0x0d, 0x0e, 0x0e, 0x0f, 0x0d, 0x0f, 0x10, 0x10,
+      0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x19, 0x1a,
+      0x1c, 0x1f, 0x20, 0x23, 0x26, 0x28, 0x2a, 0x2e,
+      0x31, 0x35, 0x38, 0x39, 0x3a, 0x3c, 0x3d, 0x3d,
+      0x3e, 0x3e, 0x3d, 0x3c, 0x3a, 0x3a, 0x39, 0x39,
+      0x38, 0x37, 0x38, 0x38, 0x37, 0x38, 0x39, 0x3a,
+      0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x3f, 0x40, 0x3f,
+      0x41, 0x40, 0x3e, 0x3e, 0x3d, 0x3b, 0x3b, 0x39,
+      0x37, 0x37, 0x35, 0x36, 0x34, 0x34, 0x34, 0x35,
+      0x35, 0x34, 0x34, 0x35, 0x35, 0x37, 0x38, 0x39,
+      0x3a, 0x3c, 0x3f, 0x3f, 0x43, 0x43, 0x45, 0x47,
+      0x48, 0x48, 0x4a, 0x4b, 0x4e, 0x4d, 0x51, 0x53,
+      0x56, 0x58, 0x59, 0x5b, 0x5d, 0x60, 0x62, 0x63,
+      0x64, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x57, 0x56,
+      0x55, 0x54, 0x53, 0x52, 0x51, 0x51, 0x52, 0x52,
+      0x54, 0x54, 0x55, 0x55, 0x55, 0x54, 0x54, 0x53,
+      0x52, 0x50, 0x4e, 0x4d, 0x4b, 0x4a, 0x48, 0x48,
+      0x48, 0x48, 0x4a, 0x4b, 0x4d, 0x4f, 0x52, 0x55,
+      0x58, 0x5a, 0x5b, 0x5b, 0x5b, 0x5b, 0x5a, 0x59,
+      0x58, 0x55, 0x51, 0x4e, 0x4a, 0x46, 0x45, 0x44,
+      0x44, 0x44, 0x44, },
+    { 0x1e, 0x26, 0x23, 0x23, 0x25, 0x24, 0x25, 0x26,
+      0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+      0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f,
+      0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x48,
+      0x46, 0x44, 0x3f, 0x3b, 0x39, 0x36, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3d,
+      0x3e, 0x3f, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3b,
+      0x38, 0x37, 0x34, 0x32, 0x30, 0x2c, 0x2a, 0x27,
+      0x26, 0x23, 0x22, 0x20, 0x1d, 0x1b, 0x1a, 0x19,
+      0x17, 0x15, 0x15, 0x13, 0x12, 0x12, 0x11, 0x0f,
+      0x11, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c,
+      0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+      0x0e, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x11,
+      0x11, 0x13, 0x16, 0x15, 0x15, 0x18, 0x1a, 0x1b,
+      0x1d, 0x20, 0x22, 0x24, 0x27, 0x29, 0x2c, 0x30,
+      0x33, 0x37, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3e,
+      0x40, 0x40, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3a,
+      0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3b, 0x3d,
+      0x3d, 0x3f, 0x40, 0x40, 0x3f, 0x41, 0x41, 0x41,
+      0x41, 0x41, 0x40, 0x40, 0x3f, 0x3e, 0x3c, 0x3b,
+      0x3a, 0x39, 0x37, 0x36, 0x36, 0x35, 0x35, 0x36,
+      0x36, 0x35, 0x35, 0x36, 0x36, 0x38, 0x39, 0x39,
+      0x3b, 0x3c, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x47,
+      0x48, 0x48, 0x4b, 0x4c, 0x4d, 0x4f, 0x51, 0x53,
+      0x56, 0x56, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x62,
+      0x63, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x59, 0x57,
+      0x56, 0x54, 0x54, 0x53, 0x52, 0x53, 0x53, 0x55,
+      0x56, 0x56, 0x57, 0x57, 0x57, 0x57, 0x56, 0x56,
+      0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x4b,
+      0x4b, 0x4c, 0x4d, 0x4e, 0x51, 0x53, 0x55, 0x58,
+      0x5b, 0x5c, 0x60, 0x60, 0x5f, 0x5e, 0x5d, 0x5c,
+      0x5a, 0x57, 0x53, 0x4f, 0x4b, 0x46, 0x45, 0x44,
+      0x44, 0x44, 0x44, },
+    { 0x1d, 0x25, 0x22, 0x22, 0x23, 0x23, 0x24, 0x25,
+      0x25, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+      0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f,
+      0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x47,
+      0x45, 0x43, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2b, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3c, 0x3d,
+      0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3e, 0x3d, 0x3b,
+      0x39, 0x36, 0x34, 0x32, 0x30, 0x2d, 0x2a, 0x26,
+      0x26, 0x24, 0x22, 0x1f, 0x1d, 0x1c, 0x1a, 0x19,
+      0x18, 0x16, 0x15, 0x14, 0x12, 0x12, 0x12, 0x10,
+      0x10, 0x0f, 0x0e, 0x10, 0x0e, 0x0e, 0x0d, 0x0c,
+      0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e,
+      0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x11, 0x12,
+      0x13, 0x14, 0x16, 0x16, 0x18, 0x1a, 0x1b, 0x1c,
+      0x1e, 0x21, 0x23, 0x25, 0x28, 0x2a, 0x2e, 0x32,
+      0x34, 0x38, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42,
+      0x43, 0x43, 0x43, 0x42, 0x40, 0x3e, 0x3e, 0x3c,
+      0x3b, 0x3b, 0x3c, 0x3a, 0x3b, 0x3b, 0x3e, 0x3e,
+      0x40, 0x3f, 0x41, 0x41, 0x41, 0x42, 0x42, 0x43,
+      0x42, 0x41, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3c,
+      0x3b, 0x3a, 0x39, 0x37, 0x36, 0x35, 0x36, 0x37,
+      0x35, 0x36, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+      0x3b, 0x3d, 0x3e, 0x40, 0x41, 0x41, 0x44, 0x46,
+      0x48, 0x48, 0x4a, 0x4c, 0x4d, 0x4f, 0x51, 0x53,
+      0x55, 0x57, 0x59, 0x5a, 0x5b, 0x5e, 0x5f, 0x61,
+      0x62, 0x61, 0x60, 0x5e, 0x5c, 0x5a, 0x59, 0x58,
+      0x56, 0x55, 0x54, 0x53, 0x53, 0x54, 0x54, 0x55,
+      0x57, 0x57, 0x58, 0x59, 0x5a, 0x58, 0x59, 0x58,
+      0x57, 0x55, 0x53, 0x52, 0x4f, 0x4e, 0x4d, 0x4d,
+      0x4d, 0x4f, 0x51, 0x50, 0x54, 0x56, 0x59, 0x5c,
+      0x5f, 0x61, 0x64, 0x64, 0x63, 0x61, 0x5e, 0x5e,
+      0x5c, 0x59, 0x54, 0x50, 0x4c, 0x46, 0x45, 0x44,
+      0x44, 0x44, 0x44, },
+    { 0x1c, 0x24, 0x21, 0x21, 0x21, 0x22, 0x23, 0x23,
+      0x25, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+      0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4c, 0x50, 0x4f,
+      0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4a, 0x49,
+      0x45, 0x42, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2b, 0x2f, 0x32, 0x34, 0x38, 0x39, 0x3c, 0x3d,
+      0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3e, 0x3c, 0x3a,
+      0x39, 0x37, 0x35, 0x33, 0x30, 0x2d, 0x2b, 0x28,
+      0x26, 0x23, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x19,
+      0x17, 0x16, 0x15, 0x14, 0x12, 0x12, 0x11, 0x10,
+      0x0f, 0x0e, 0x0e, 0x10, 0x0e, 0x0d, 0x0c, 0x0c,
+      0x0c, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e,
+      0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x12, 0x14,
+      0x14, 0x14, 0x16, 0x18, 0x19, 0x1b, 0x1c, 0x1e,
+      0x20, 0x23, 0x26, 0x27, 0x29, 0x2c, 0x2f, 0x33,
+      0x36, 0x38, 0x3b, 0x3e, 0x3e, 0x42, 0x43, 0x46,
+      0x46, 0x46, 0x46, 0x44, 0x42, 0x41, 0x3f, 0x3e,
+      0x3d, 0x3d, 0x3e, 0x3d, 0x3d, 0x3e, 0x3e, 0x40,
+      0x40, 0x40, 0x43, 0x43, 0x42, 0x43, 0x45, 0x43,
+      0x43, 0x43, 0x42, 0x42, 0x41, 0x40, 0x40, 0x3e,
+      0x3c, 0x3a, 0x3a, 0x38, 0x36, 0x36, 0x36, 0x36,
+      0x37, 0x37, 0x36, 0x38, 0x38, 0x39, 0x3b, 0x3b,
+      0x3e, 0x3e, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x46,
+      0x46, 0x49, 0x4c, 0x4c, 0x4d, 0x4f, 0x51, 0x54,
+      0x56, 0x57, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x60,
+      0x61, 0x61, 0x60, 0x5f, 0x5c, 0x5a, 0x59, 0x58,
+      0x57, 0x57, 0x55, 0x54, 0x53, 0x55, 0x55, 0x58,
+      0x58, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, 0x5b, 0x5b,
+      0x5a, 0x59, 0x56, 0x54, 0x53, 0x4e, 0x4e, 0x50,
+      0x50, 0x51, 0x52, 0x52, 0x57, 0x59, 0x5d, 0x60,
+      0x63, 0x63, 0x66, 0x66, 0x66, 0x64, 0x63, 0x61,
+      0x60, 0x5b, 0x55, 0x51, 0x4d, 0x48, 0x45, 0x44,
+      0x43, 0x43, 0x43, },
+    { 0x1b, 0x23, 0x20, 0x21, 0x22, 0x22, 0x23, 0x24,
+      0x26, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+      0x3d, 0x42, 0x46, 0x49, 0x4a, 0x4c, 0x4f, 0x4f,
+      0x50, 0x50, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x49,
+      0x45, 0x42, 0x3e, 0x3c, 0x38, 0x35, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3a, 0x3c, 0x3d,
+      0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3f, 0x3d, 0x3b,
+      0x3a, 0x38, 0x36, 0x33, 0x30, 0x2d, 0x2b, 0x29,
+      0x27, 0x24, 0x24, 0x21, 0x1e, 0x1c, 0x1b, 0x1a,
+      0x18, 0x17, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f,
+      0x10, 0x0f, 0x0e, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
+      0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x0e, 0x0f,
+      0x10, 0x11, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15,
+      0x15, 0x16, 0x17, 0x1a, 0x1b, 0x1d, 0x1e, 0x20,
+      0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x31, 0x35,
+      0x37, 0x39, 0x3c, 0x3f, 0x40, 0x43, 0x46, 0x47,
+      0x4a, 0x49, 0x48, 0x46, 0x45, 0x43, 0x42, 0x41,
+      0x3f, 0x40, 0x3f, 0x3f, 0x40, 0x3f, 0x41, 0x43,
+      0x43, 0x43, 0x44, 0x45, 0x45, 0x45, 0x45, 0x45,
+      0x45, 0x45, 0x44, 0x43, 0x43, 0x42, 0x42, 0x40,
+      0x3e, 0x3d, 0x3c, 0x39, 0x38, 0x38, 0x38, 0x38,
+      0x38, 0x36, 0x38, 0x39, 0x39, 0x3a, 0x3c, 0x3d,
+      0x3e, 0x3e, 0x3f, 0x41, 0x42, 0x42, 0x43, 0x45,
+      0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x50, 0x53, 0x54,
+      0x57, 0x58, 0x5a, 0x5c, 0x5b, 0x5e, 0x60, 0x61,
+      0x60, 0x60, 0x5f, 0x5f, 0x5d, 0x5b, 0x5b, 0x59,
+      0x58, 0x57, 0x56, 0x55, 0x55, 0x55, 0x57, 0x59,
+      0x5b, 0x5b, 0x5d, 0x5c, 0x5c, 0x5e, 0x5e, 0x5e,
+      0x5d, 0x5b, 0x59, 0x56, 0x54, 0x51, 0x51, 0x51,
+      0x52, 0x55, 0x56, 0x56, 0x5a, 0x5d, 0x5f, 0x63,
+      0x66, 0x68, 0x6b, 0x6b, 0x68, 0x67, 0x66, 0x64,
+      0x61, 0x5d, 0x57, 0x52, 0x4f, 0x49, 0x46, 0x45,
+      0x43, 0x43, 0x43, },
+    { 0x1a, 0x22, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
+      0x26, 0x27, 0x2a, 0x2d, 0x31, 0x33, 0x37, 0x3b,
+      0x3d, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4f,
+      0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x48,
+      0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x33, 0x30,
+      0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a,
+      0x2d, 0x2f, 0x32, 0x35, 0x39, 0x3a, 0x3c, 0x3d,
+      0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c,
+      0x3a, 0x38, 0x36, 0x33, 0x31, 0x2d, 0x2c, 0x29,
+      0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a,
+      0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10,
+      0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e,
+      0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10,
+      0x11, 0x12, 0x12, 0x13, 0x15, 0x15, 0x16, 0x16,
+      0x17, 0x18, 0x1a, 0x1b, 0x1c, 0x1e, 0x1f, 0x21,
+      0x22, 0x25, 0x27, 0x2a, 0x2c, 0x2e, 0x33, 0x36,
+      0x39, 0x3a, 0x3d, 0x40, 0x41, 0x45, 0x47, 0x4a,
+      0x4c, 0x4d, 0x4c, 0x4a, 0x48, 0x45, 0x44, 0x41,
+      0x42, 0x42, 0x42, 0x42, 0x42, 0x43, 0x43, 0x44,
+      0x45, 0x47, 0x47, 0x48, 0x47, 0x48, 0x47, 0x47,
+      0x48, 0x48, 0x46, 0x46, 0x46, 0x43, 0x43, 0x41,
+      0x3f, 0x3e, 0x3b, 0x39, 0x38, 0x37, 0x37, 0x37,
+      0x38, 0x38, 0x37, 0x39, 0x39, 0x3a, 0x3c, 0x3e,
+      0x3e, 0x3f, 0x3f, 0x3f, 0x42, 0x43, 0x43, 0x45,
+      0x47, 0x48, 0x4b, 0x4c, 0x4e, 0x50, 0x51, 0x54,
+      0x56, 0x58, 0x5a, 0x5c, 0x5c, 0x5f, 0x5f, 0x5f,
+      0x61, 0x60, 0x5f, 0x5f, 0x5e, 0x5b, 0x5c, 0x5b,
+      0x59, 0x59, 0x57, 0x56, 0x55, 0x56, 0x57, 0x59,
+      0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5e, 0x5d,
+      0x5e, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x51, 0x52,
+      0x53, 0x55, 0x57, 0x58, 0x5c, 0x5e, 0x61, 0x65,
+      0x69, 0x6b, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x64,
+      0x61, 0x5d, 0x59, 0x53, 0x4d, 0x48, 0x46, 0x45,
+      0x44, 0x44, 0x43, },
+    { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24,
+      0x25, 0x28, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b,
+      0x3e, 0x41, 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x4e,
+      0x50, 0x51, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+      0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x30,
+      0x2f, 0x2d, 0x29, 0x27, 0x27, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x26, 0x27, 0x29, 0x2a,
+      0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3b, 0x3c, 0x3e,
+      0x3f, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c,
+      0x3a, 0x39, 0x36, 0x34, 0x31, 0x2d, 0x2c, 0x29,
+      0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a,
+      0x19, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10,
+      0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e,
+      0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10,
+      0x11, 0x13, 0x14, 0x14, 0x15, 0x16, 0x17, 0x19,
+      0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x22, 0x24,
+      0x25, 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x35, 0x38,
+      0x3a, 0x3d, 0x41, 0x42, 0x45, 0x48, 0x4c, 0x4e,
+      0x4f, 0x4f, 0x4f, 0x4d, 0x4b, 0x49, 0x47, 0x47,
+      0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x46, 0x47,
+      0x48, 0x49, 0x4b, 0x4b, 0x4a, 0x4b, 0x4b, 0x4a,
+      0x4b, 0x4a, 0x49, 0x49, 0x48, 0x46, 0x46, 0x44,
+      0x42, 0x41, 0x3d, 0x3b, 0x3a, 0x38, 0x38, 0x38,
+      0x37, 0x37, 0x39, 0x38, 0x3a, 0x3a, 0x3c, 0x3c,
+      0x3e, 0x40, 0x40, 0x41, 0x43, 0x43, 0x45, 0x46,
+      0x48, 0x49, 0x4b, 0x4e, 0x4f, 0x50, 0x53, 0x55,
+      0x57, 0x59, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60,
+      0x60, 0x60, 0x5f, 0x5f, 0x5e, 0x5c, 0x5b, 0x5a,
+      0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5d,
+      0x5c, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x52, 0x53,
+      0x54, 0x57, 0x58, 0x58, 0x5b, 0x5e, 0x62, 0x65,
+      0x69, 0x6b, 0x6d, 0x6c, 0x6a, 0x69, 0x67, 0x64,
+      0x62, 0x5e, 0x59, 0x54, 0x4d, 0x48, 0x47, 0x46,
+      0x45, 0x45, 0x44, },
+    { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24,
+      0x25, 0x28, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+      0x3e, 0x42, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x4f,
+      0x50, 0x51, 0x51, 0x50, 0x50, 0x4c, 0x4a, 0x47,
+      0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x31,
+      0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x29, 0x2b,
+      0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+      0x40, 0x40, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d,
+      0x3b, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a,
+      0x27, 0x26, 0x25, 0x22, 0x1f, 0x1d, 0x1c, 0x1b,
+      0x19, 0x17, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11,
+      0x11, 0x11, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f,
+      0x0f, 0x0f, 0x10, 0x11, 0x10, 0x11, 0x11, 0x12,
+      0x11, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1b,
+      0x1c, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x23, 0x25,
+      0x27, 0x2a, 0x2c, 0x2f, 0x31, 0x35, 0x38, 0x3b,
+      0x3d, 0x40, 0x44, 0x47, 0x49, 0x4c, 0x4f, 0x51,
+      0x53, 0x53, 0x53, 0x51, 0x50, 0x4e, 0x4c, 0x4b,
+      0x4a, 0x49, 0x49, 0x49, 0x49, 0x4a, 0x4a, 0x4d,
+      0x4e, 0x4e, 0x4f, 0x50, 0x4f, 0x50, 0x51, 0x50,
+      0x50, 0x4e, 0x4d, 0x4c, 0x4b, 0x48, 0x48, 0x47,
+      0x44, 0x42, 0x3f, 0x3d, 0x3b, 0x3a, 0x39, 0x39,
+      0x39, 0x38, 0x39, 0x3b, 0x3a, 0x3c, 0x3e, 0x3d,
+      0x40, 0x40, 0x40, 0x42, 0x42, 0x42, 0x45, 0x46,
+      0x47, 0x49, 0x4c, 0x4e, 0x50, 0x50, 0x53, 0x56,
+      0x58, 0x59, 0x5d, 0x5d, 0x5e, 0x60, 0x61, 0x61,
+      0x62, 0x61, 0x60, 0x60, 0x5e, 0x5d, 0x5d, 0x5b,
+      0x57, 0x58, 0x56, 0x55, 0x55, 0x56, 0x56, 0x59,
+      0x59, 0x58, 0x5a, 0x5a, 0x5a, 0x5c, 0x5c, 0x5c,
+      0x5b, 0x5b, 0x58, 0x57, 0x54, 0x53, 0x52, 0x53,
+      0x54, 0x57, 0x58, 0x59, 0x5c, 0x5f, 0x63, 0x67,
+      0x6b, 0x6d, 0x6e, 0x6e, 0x6b, 0x6a, 0x68, 0x64,
+      0x62, 0x5e, 0x58, 0x53, 0x4f, 0x49, 0x47, 0x46,
+      0x45, 0x45, 0x44, },
+    { 0x19, 0x20, 0x1e, 0x1e, 0x1f, 0x20, 0x22, 0x23,
+      0x25, 0x27, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3a,
+      0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4e,
+      0x50, 0x51, 0x51, 0x4f, 0x4f, 0x4d, 0x49, 0x47,
+      0x44, 0x42, 0x3e, 0x3c, 0x39, 0x36, 0x32, 0x31,
+      0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x25, 0x25, 0x26, 0x28, 0x29, 0x2b,
+      0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+      0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d,
+      0x3c, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a,
+      0x27, 0x26, 0x25, 0x22, 0x1f, 0x1e, 0x1d, 0x1b,
+      0x1a, 0x17, 0x17, 0x17, 0x14, 0x14, 0x12, 0x11,
+      0x11, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x13, 0x14,
+      0x14, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1c, 0x1e,
+      0x1e, 0x1f, 0x22, 0x23, 0x23, 0x24, 0x25, 0x27,
+      0x2a, 0x2d, 0x2f, 0x31, 0x35, 0x38, 0x3a, 0x3e,
+      0x41, 0x44, 0x48, 0x4b, 0x4d, 0x51, 0x53, 0x55,
+      0x57, 0x57, 0x56, 0x55, 0x54, 0x52, 0x52, 0x50,
+      0x4e, 0x50, 0x4e, 0x4d, 0x4d, 0x4d, 0x4f, 0x51,
+      0x51, 0x52, 0x54, 0x55, 0x55, 0x55, 0x57, 0x55,
+      0x54, 0x53, 0x52, 0x4e, 0x4d, 0x4b, 0x4a, 0x49,
+      0x46, 0x44, 0x41, 0x3f, 0x3d, 0x3b, 0x3a, 0x3a,
+      0x39, 0x39, 0x39, 0x39, 0x3a, 0x3b, 0x3d, 0x3e,
+      0x3f, 0x40, 0x41, 0x42, 0x44, 0x44, 0x45, 0x47,
+      0x49, 0x49, 0x4a, 0x4d, 0x50, 0x51, 0x53, 0x57,
+      0x5a, 0x5b, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x62,
+      0x63, 0x62, 0x60, 0x60, 0x5e, 0x5c, 0x5c, 0x59,
+      0x58, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54,
+      0x56, 0x56, 0x57, 0x58, 0x58, 0x59, 0x5a, 0x59,
+      0x58, 0x57, 0x56, 0x55, 0x54, 0x52, 0x53, 0x53,
+      0x53, 0x56, 0x57, 0x59, 0x5b, 0x5e, 0x62, 0x66,
+      0x6a, 0x6c, 0x6d, 0x6e, 0x6b, 0x69, 0x67, 0x64,
+      0x61, 0x5d, 0x58, 0x54, 0x50, 0x4a, 0x47, 0x46,
+      0x45, 0x45, 0x44, },
+    { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23,
+      0x25, 0x27, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3b,
+      0x3d, 0x42, 0x45, 0x49, 0x4a, 0x4d, 0x4e, 0x4e,
+      0x51, 0x52, 0x50, 0x4f, 0x4f, 0x4c, 0x49, 0x48,
+      0x45, 0x42, 0x3e, 0x3b, 0x39, 0x36, 0x32, 0x32,
+      0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x25, 0x28, 0x29, 0x2b,
+      0x2d, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e,
+      0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3e, 0x3c,
+      0x3c, 0x3a, 0x37, 0x33, 0x32, 0x2f, 0x2d, 0x2b,
+      0x28, 0x26, 0x25, 0x22, 0x20, 0x1e, 0x1d, 0x1b,
+      0x1a, 0x17, 0x17, 0x16, 0x14, 0x14, 0x12, 0x11,
+      0x12, 0x11, 0x11, 0x11, 0x11, 0x10, 0x10, 0x10,
+      0x10, 0x11, 0x12, 0x12, 0x12, 0x13, 0x14, 0x14,
+      0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f,
+      0x21, 0x22, 0x23, 0x25, 0x26, 0x26, 0x28, 0x2a,
+      0x2c, 0x2e, 0x32, 0x34, 0x39, 0x39, 0x3d, 0x41,
+      0x45, 0x47, 0x4c, 0x4e, 0x51, 0x54, 0x56, 0x58,
+      0x5b, 0x5c, 0x5a, 0x59, 0x58, 0x56, 0x55, 0x53,
+      0x53, 0x52, 0x52, 0x51, 0x52, 0x52, 0x53, 0x55,
+      0x57, 0x58, 0x5a, 0x5a, 0x59, 0x5b, 0x59, 0x59,
+      0x58, 0x57, 0x55, 0x53, 0x51, 0x4e, 0x4c, 0x4a,
+      0x48, 0x46, 0x43, 0x40, 0x3e, 0x3c, 0x3b, 0x3b,
+      0x38, 0x39, 0x38, 0x39, 0x3a, 0x3d, 0x3d, 0x3e,
+      0x3f, 0x40, 0x41, 0x43, 0x44, 0x45, 0x46, 0x48,
+      0x4a, 0x4b, 0x4d, 0x4e, 0x50, 0x52, 0x54, 0x56,
+      0x59, 0x5c, 0x5e, 0x5f, 0x60, 0x62, 0x62, 0x63,
+      0x63, 0x63, 0x61, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b,
+      0x59, 0x56, 0x56, 0x55, 0x54, 0x53, 0x53, 0x54,
+      0x55, 0x54, 0x55, 0x55, 0x55, 0x57, 0x58, 0x57,
+      0x57, 0x56, 0x55, 0x54, 0x54, 0x52, 0x52, 0x53,
+      0x54, 0x55, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x65,
+      0x69, 0x6b, 0x6d, 0x6e, 0x6a, 0x69, 0x67, 0x63,
+      0x61, 0x5d, 0x58, 0x54, 0x4f, 0x4b, 0x48, 0x47,
+      0x46, 0x45, 0x45, },
+    { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23,
+      0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+      0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4e, 0x4f,
+      0x51, 0x52, 0x50, 0x50, 0x4f, 0x4c, 0x4a, 0x48,
+      0x45, 0x42, 0x3f, 0x3b, 0x39, 0x36, 0x32, 0x31,
+      0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b,
+      0x2d, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+      0x3f, 0x40, 0x42, 0x43, 0x42, 0x40, 0x3e, 0x3c,
+      0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2d, 0x2c,
+      0x2a, 0x27, 0x26, 0x23, 0x20, 0x1e, 0x1d, 0x1c,
+      0x1a, 0x18, 0x18, 0x17, 0x15, 0x16, 0x14, 0x12,
+      0x12, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11, 0x12,
+      0x12, 0x12, 0x13, 0x14, 0x14, 0x14, 0x15, 0x16,
+      0x17, 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x20, 0x22,
+      0x24, 0x25, 0x26, 0x27, 0x28, 0x2a, 0x2c, 0x2c,
+      0x2f, 0x32, 0x35, 0x37, 0x3b, 0x3c, 0x41, 0x45,
+      0x48, 0x4c, 0x50, 0x52, 0x54, 0x57, 0x5a, 0x5c,
+      0x5f, 0x5f, 0x5f, 0x5d, 0x5c, 0x5b, 0x5a, 0x58,
+      0x57, 0x57, 0x57, 0x56, 0x56, 0x57, 0x57, 0x5a,
+      0x5c, 0x5e, 0x5f, 0x61, 0x5f, 0x5f, 0x5f, 0x5e,
+      0x5d, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x4f, 0x4e,
+      0x4a, 0x47, 0x46, 0x42, 0x41, 0x3e, 0x3d, 0x3c,
+      0x3b, 0x3a, 0x39, 0x39, 0x3b, 0x3c, 0x3d, 0x3f,
+      0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x49, 0x49,
+      0x4b, 0x4c, 0x4e, 0x4f, 0x51, 0x54, 0x57, 0x58,
+      0x5b, 0x5d, 0x61, 0x61, 0x61, 0x63, 0x65, 0x65,
+      0x64, 0x64, 0x62, 0x61, 0x60, 0x5e, 0x5d, 0x5c,
+      0x59, 0x58, 0x56, 0x54, 0x53, 0x53, 0x53, 0x54,
+      0x54, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, 0x55,
+      0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x53,
+      0x55, 0x56, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x66,
+      0x69, 0x6b, 0x6d, 0x6d, 0x6b, 0x69, 0x67, 0x64,
+      0x61, 0x5d, 0x58, 0x55, 0x50, 0x4b, 0x48, 0x47,
+      0x46, 0x46, 0x46, },
+    { 0x1a, 0x20, 0x1e, 0x1f, 0x1f, 0x21, 0x22, 0x23,
+      0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+      0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f,
+      0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+      0x45, 0x42, 0x3f, 0x3b, 0x38, 0x36, 0x32, 0x31,
+      0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b,
+      0x2e, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+      0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c,
+      0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b,
+      0x29, 0x26, 0x24, 0x24, 0x20, 0x1f, 0x1d, 0x1d,
+      0x1a, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14,
+      0x13, 0x12, 0x13, 0x13, 0x13, 0x12, 0x12, 0x13,
+      0x13, 0x14, 0x15, 0x15, 0x14, 0x15, 0x16, 0x18,
+      0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x24,
+      0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2c, 0x2d, 0x2f,
+      0x32, 0x35, 0x37, 0x3a, 0x3c, 0x3e, 0x44, 0x48,
+      0x4c, 0x50, 0x54, 0x56, 0x58, 0x5b, 0x5e, 0x60,
+      0x61, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5e,
+      0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x5b, 0x5c, 0x5e,
+      0x60, 0x63, 0x64, 0x65, 0x63, 0x62, 0x63, 0x63,
+      0x61, 0x60, 0x5e, 0x5b, 0x58, 0x55, 0x51, 0x4f,
+      0x4c, 0x4a, 0x47, 0x44, 0x42, 0x41, 0x3e, 0x3c,
+      0x3b, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3e, 0x3f,
+      0x40, 0x42, 0x43, 0x45, 0x46, 0x47, 0x49, 0x4a,
+      0x4c, 0x4c, 0x4f, 0x51, 0x52, 0x55, 0x58, 0x5b,
+      0x5c, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x64, 0x65,
+      0x66, 0x65, 0x63, 0x62, 0x5f, 0x5e, 0x5e, 0x5c,
+      0x5b, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x53,
+      0x52, 0x52, 0x52, 0x52, 0x52, 0x53, 0x55, 0x55,
+      0x55, 0x53, 0x53, 0x53, 0x52, 0x51, 0x52, 0x52,
+      0x55, 0x55, 0x58, 0x58, 0x5b, 0x5d, 0x61, 0x65,
+      0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64,
+      0x61, 0x5e, 0x58, 0x54, 0x4f, 0x4b, 0x49, 0x48,
+      0x47, 0x46, 0x45, },
+    { 0x19, 0x20, 0x1d, 0x1f, 0x1f, 0x20, 0x23, 0x23,
+      0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b,
+      0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f,
+      0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48,
+      0x44, 0x42, 0x3f, 0x3a, 0x38, 0x36, 0x32, 0x30,
+      0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24,
+      0x23, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2b,
+      0x2e, 0x30, 0x34, 0x36, 0x39, 0x3b, 0x3d, 0x3f,
+      0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c,
+      0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b,
+      0x29, 0x27, 0x25, 0x24, 0x21, 0x1f, 0x1e, 0x1c,
+      0x1b, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14,
+      0x13, 0x12, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+      0x13, 0x14, 0x15, 0x14, 0x14, 0x14, 0x17, 0x19,
+      0x1a, 0x1c, 0x1e, 0x20, 0x21, 0x23, 0x24, 0x26,
+      0x29, 0x29, 0x2b, 0x2c, 0x2d, 0x2e, 0x30, 0x31,
+      0x34, 0x38, 0x3b, 0x3c, 0x3f, 0x42, 0x47, 0x4c,
+      0x50, 0x54, 0x57, 0x5b, 0x5c, 0x5e, 0x62, 0x63,
+      0x66, 0x66, 0x66, 0x65, 0x64, 0x63, 0x61, 0x62,
+      0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5f, 0x60, 0x62,
+      0x65, 0x67, 0x69, 0x6a, 0x69, 0x68, 0x69, 0x67,
+      0x66, 0x64, 0x62, 0x5f, 0x5c, 0x58, 0x54, 0x51,
+      0x4e, 0x4b, 0x49, 0x45, 0x43, 0x41, 0x40, 0x3e,
+      0x3c, 0x3a, 0x3b, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+      0x41, 0x42, 0x44, 0x46, 0x46, 0x48, 0x49, 0x4b,
+      0x4d, 0x50, 0x51, 0x53, 0x55, 0x57, 0x58, 0x5c,
+      0x5f, 0x60, 0x63, 0x64, 0x64, 0x65, 0x66, 0x66,
+      0x66, 0x65, 0x65, 0x63, 0x61, 0x5f, 0x5e, 0x5c,
+      0x5a, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x52,
+      0x53, 0x52, 0x52, 0x52, 0x52, 0x53, 0x53, 0x53,
+      0x54, 0x53, 0x53, 0x52, 0x53, 0x51, 0x53, 0x53,
+      0x55, 0x57, 0x58, 0x59, 0x5b, 0x5d, 0x62, 0x64,
+      0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64,
+      0x61, 0x5d, 0x57, 0x54, 0x50, 0x4a, 0x48, 0x47,
+      0x46, 0x45, 0x45, },
diff --git a/tests/tcg/hexagon/hvx_histogram_row.S b/tests/tcg/hexagon/hvx_histogram_row.S
new file mode 100644
index 00000000000..5e42c33145f
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_row.S
@@ -0,0 +1,294 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * void hvx_histogram_row(uint8_t *src,     => r0
+ *                        int stride,       => r1
+ *                        int width,        => r2
+ *                        int height,       => r3
+ *                        int *hist         => r4)
+ */
+    .text
+    .p2align 2
+    .global hvx_histogram_row
+    .type hvx_histogram_row, @function
+hvx_histogram_row:
+    { r2 = lsr(r2, #7)          /* size / VLEN */
+      r5 = and(r2, #127)        /* size % VLEN */
+      v1 = #0
+      v0 = #0
+    }
+    /*
+     * Step 1: Clean the whole vector register file
+     */
+    { v3:2 = v1:0
+      v5:4 = v1:0
+      p0 = cmp.gt(r2, #0)       /* P0 = (width / VLEN > 0) */
+      p1 = cmp.eq(r5, #0)       /* P1 = (width % VLEN == 0) */
+    }
+    { q0 = vsetq(r5)
+      v7:6 = v1:0
+    }
+    { v9:8   = v1:0
+      v11:10 = v1:0
+    }
+    { v13:12 = v1:0
+      v15:14 = v1:0
+    }
+    { v17:16 = v1:0
+      v19:18 = v1:0
+    }
+    { v21:20 = v1:0
+      v23:22 = v1:0
+    }
+    { v25:24 = v1:0
+      v27:26 = v1:0
+    }
+    { v29:28 = v1:0
+      v31:30 = v1:0
+      r10 = add(r0, r1)           /* R10 = &src[2 * stride] */
+      loop1(.outerloop, r3)
+    }
+
+    /*
+     * Step 2: vhist
+     */
+    .falign
+.outerloop:
+    { if (!p0) jump .loopend
+      loop0(.innerloop, r2)
+    }
+
+    .falign
+.innerloop:
+    { v12.tmp = vmem(R0++#1)
+      vhist
+    }:endloop0
+
+    .falign
+.loopend:
+    if (p1) jump .skip       /* if (width % VLEN == 0) done with current row */
+    { v13.tmp = vmem(r0 + #0)
+      vhist(q0)
+    }
+
+    .falign
+.skip:
+    { r0 = r10                    /* R0  = &src[(i + 1) * stride] */
+      r10 = add(r10, r1)          /* R10 = &src[(i + 2) * stride] */
+    }:endloop1
+
+
+    /*
+     * Step 3: Sum up the data
+     */
+    { v0.h = vshuff(v0.h)
+      r10 = ##0x00010001
+    }
+    v1.h = vshuff(v1.h)
+    { V2.h = vshuff(v2.h)
+      v0.w = vdmpy(v0.h, r10.h):sat
+    }
+    { v3.h = vshuff(v3.h)
+      v1.w = vdmpy(v1.h, r10.h):sat
+    }
+    { v4.h = vshuff(V4.h)
+      v2.w = vdmpy(v2.h, r10.h):sat
+    }
+    { v5.h = vshuff(v5.h)
+      v3.w = vdmpy(v3.h, r10.h):sat
+    }
+    { v6.h = vshuff(v6.h)
+      v4.w = vdmpy(v4.h, r10.h):sat
+    }
+    { v7.h = vshuff(v7.h)
+      v5.w = vdmpy(v5.h, r10.h):sat
+    }
+    { v8.h = vshuff(V8.h)
+      v6.w = vdmpy(v6.h, r10.h):sat
+    }
+    { v9.h = vshuff(V9.h)
+      v7.w = vdmpy(v7.h, r10.h):sat
+    }
+    { v10.h = vshuff(v10.h)
+      v8.w = vdmpy(v8.h, r10.h):sat
+    }
+    { v11.h = vshuff(v11.h)
+      v9.w = vdmpy(v9.h, r10.h):sat
+    }
+    { v12.h = vshuff(v12.h)
+      v10.w = vdmpy(v10.h, r10.h):sat
+    }
+    { v13.h = vshuff(V13.h)
+      v11.w = vdmpy(v11.h, r10.h):sat
+    }
+    { v14.h = vshuff(v14.h)
+      v12.w = vdmpy(v12.h, r10.h):sat
+    }
+    { v15.h = vshuff(v15.h)
+      v13.w = vdmpy(v13.h, r10.h):sat
+    }
+    { v16.h = vshuff(v16.h)
+      v14.w = vdmpy(v14.h, r10.h):sat
+    }
+    { v17.h = vshuff(v17.h)
+      v15.w = vdmpy(v15.h, r10.h):sat
+    }
+    { v18.h = vshuff(v18.h)
+      v16.w = vdmpy(v16.h, r10.h):sat
+    }
+    { v19.h = vshuff(v19.h)
+      v17.w = vdmpy(v17.h, r10.h):sat
+    }
+    { v20.h = vshuff(v20.h)
+      v18.W = vdmpy(v18.h, r10.h):sat
+    }
+    { v21.h = vshuff(v21.h)
+      v19.w = vdmpy(v19.h, r10.h):sat
+    }
+    { v22.h = vshuff(v22.h)
+      v20.w = vdmpy(v20.h, r10.h):sat
+    }
+    { v23.h = vshuff(v23.h)
+      v21.w = vdmpy(v21.h, r10.h):sat
+    }
+    { v24.h = vshuff(v24.h)
+      v22.w = vdmpy(v22.h, r10.h):sat
+    }
+    { v25.h = vshuff(v25.h)
+      v23.w = vdmpy(v23.h, r10.h):sat
+    }
+    { v26.h = vshuff(v26.h)
+      v24.w = vdmpy(v24.h, r10.h):sat
+    }
+    { v27.h = vshuff(V27.h)
+      v25.w = vdmpy(v25.h, r10.h):sat
+    }
+    { v28.h = vshuff(v28.h)
+      v26.w = vdmpy(v26.h, r10.h):sat
+    }
+    { v29.h = vshuff(v29.h)
+      v27.w = vdmpy(v27.h, r10.h):sat
+    }
+    { v30.h = vshuff(v30.h)
+      v28.w = vdmpy(v28.h, r10.h):sat
+    }
+    { v31.h = vshuff(v31.h)
+      v29.w = vdmpy(v29.h, r10.h):sat
+      r28 = #32
+    }
+    { vshuff(v1, v0, r28)
+      v30.w = vdmpy(v30.h, r10.h):sat
+    }
+    { vshuff(v3, v2, r28)
+      v31.w = vdmpy(v31.h, r10.h):sat
+    }
+    { vshuff(v5, v4, r28)
+      v0.w = vadd(v1.w, v0.w)
+      v2.w = vadd(v3.w, v2.w)
+    }
+    { vshuff(v7, v6, r28)
+      r7 = #64
+    }
+    { vshuff(v9, v8, r28)
+      v4.w = vadd(v5.w, v4.w)
+      v6.w = vadd(v7.w, v6.w)
+    }
+    vshuff(v11, v10, r28)
+    { vshuff(v13, v12, r28)
+      v8.w = vadd(v9.w, v8.w)
+      v10.w = vadd(v11.w, v10.w)
+    }
+    vshuff(v15, v14, r28)
+    { vshuff(v17, v16, r28)
+      v12.w = vadd(v13.w, v12.w)
+      v14.w = vadd(v15.w, v14.w)
+    }
+    vshuff(v19, v18, r28)
+    { vshuff(v21, v20, r28)
+      v16.w = vadd(v17.w, v16.w)
+      v18.w = vadd(v19.w, v18.w)
+    }
+    vshuff(v23, v22, r28)
+    { vshuff(v25, v24, r28)
+      v20.w = vadd(v21.w, v20.w)
+      v22.w = vadd(v23.w, v22.w)
+    }
+    vshuff(v27, v26, r28)
+    { vshuff(v29, v28, r28)
+      v24.w = vadd(v25.w, v24.w)
+      v26.w = vadd(v27.w, v26.w)
+    }
+    vshuff(v31, v30, r28)
+    { v28.w = vadd(v29.w, v28.w)
+      vshuff(v2, v0, r7)
+    }
+    { v30.w = vadd(v31.w, v30.w)
+      vshuff(v6, v4, r7)
+      v0.w  = vadd(v0.w, v2.w)
+    }
+    { vshuff(v10, v8, r7)
+      v1.tmp = vmem(r4 + #0)      /* update hist[0-31] */
+      v0.w  = vadd(v0.w, v1.w)
+      vmem(r4++#1) = v0.new
+    }
+    { vshuff(v14, v12, r7)
+      v4.w  = vadd(v4.w, v6.w)
+      v8.w  = vadd(v8.w, v10.w)
+    }
+    { vshuff(v18, v16, r7)
+      v1.tmp = vmem(r4 + #0)      /* update hist[32-63] */
+      v4.w  = vadd(v4.w, v1.w)
+      vmem(r4++#1) = v4.new
+    }
+    { vshuff(v22, v20, r7)
+      v12.w = vadd(v12.w, v14.w)
+      V16.w = vadd(v16.w, v18.w)
+    }
+    { vshuff(v26, v24, r7)
+      v1.tmp = vmem(r4 + #0)      /* update hist[64-95] */
+      v8.w  = vadd(v8.w, v1.w)
+      vmem(r4++#1) = v8.new
+    }
+    { vshuff(v30, v28, r7)
+      v1.tmp = vmem(r4 + #0)      /* update hist[96-127] */
+      v12.w  = vadd(v12.w, v1.w)
+      vmem(r4++#1) = v12.new
+    }
+
+    { v20.w = vadd(v20.w, v22.w)
+      v1.tmp = vmem(r4 + #0)      /* update hist[128-159] */
+      v16.w  = vadd(v16.w, v1.w)
+      vmem(r4++#1) = v16.new
+    }
+    { v24.w = vadd(v24.w, v26.w)
+      v1.tmp = vmem(r4 + #0)      /* update hist[160-191] */
+      v20.w  = vadd(v20.w, v1.w)
+      vmem(r4++#1) = v20.new
+    }
+    { v28.w = vadd(v28.w, v30.w)
+      v1.tmp = vmem(r4 + #0)      /* update hist[192-223] */
+      v24.w  = vadd(v24.w, v1.w)
+      vmem(r4++#1) = v24.new
+    }
+    { v1.tmp = vmem(r4 + #0)      /* update hist[224-255] */
+      v28.w  = vadd(v28.w, v1.w)
+      vmem(r4++#1) = v28.new
+    }
+    jumpr r31
+    .size hvx_histogram_row, .-hvx_histogram_row
diff --git a/tests/tcg/hexagon/hvx_histogram_row.h b/tests/tcg/hexagon/hvx_histogram_row.h
new file mode 100644
index 00000000000..6a4531a92da
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_histogram_row.h
@@ -0,0 +1,24 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HVX_HISTOGRAM_ROW_H
+#define HVX_HISTOGRAM_ROW_H
+
+void hvx_histogram_row(uint8_t *src, int stride, int width, int height,
+                       int *hist);
+
+#endif
diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c
new file mode 100644
index 00000000000..312bb98b41c
--- /dev/null
+++ b/tests/tcg/hexagon/hvx_misc.c
@@ -0,0 +1,469 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+
+int err;
+
+static void __check(int line, int i, int j, uint64_t result, uint64_t expect)
+{
+    if (result != expect) {
+        printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n",
+               line, i, j, result, expect);
+        err++;
+    }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+#define MAX_VEC_SIZE_BYTES         128
+
+typedef union {
+    uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
+    int64_t   d[MAX_VEC_SIZE_BYTES / 8];
+    uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
+    int32_t   w[MAX_VEC_SIZE_BYTES / 4];
+    uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
+    int16_t   h[MAX_VEC_SIZE_BYTES / 2];
+    uint8_t  ub[MAX_VEC_SIZE_BYTES / 1];
+    int8_t    b[MAX_VEC_SIZE_BYTES / 1];
+} MMVector;
+
+#define BUFSIZE      16
+#define OUTSIZE      16
+#define MASKMOD      3
+
+MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
+
+#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \
+static void check_output_##FIELD(int line, size_t num_vectors) \
+{ \
+    for (int i = 0; i < num_vectors; i++) { \
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+            __check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \
+        } \
+    } \
+}
+
+CHECK_OUTPUT_FUNC(d,  8)
+CHECK_OUTPUT_FUNC(w,  4)
+CHECK_OUTPUT_FUNC(h,  2)
+CHECK_OUTPUT_FUNC(b,  1)
+
+static void init_buffers(void)
+{
+    int counter0 = 0;
+    int counter1 = 17;
+    for (int i = 0; i < BUFSIZE; i++) {
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) {
+            buffer0[i].b[j] = counter0++;
+            buffer1[i].b[j] = counter1++;
+        }
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1;
+        }
+    }
+}
+
+static void test_load_tmp(void)
+{
+    void *p0 = buffer0;
+    void *p1 = buffer1;
+    void *pout = output;
+
+    for (int i = 0; i < BUFSIZE; i++) {
+        /*
+         * Load into v12 as .tmp, then use it in the next packet
+         * Should get the new value within the same packet and
+         * the old value in the next packet
+         */
+        asm("v3 = vmem(%0 + #0)\n\t"
+            "r1 = #1\n\t"
+            "v12 = vsplat(r1)\n\t"
+            "{\n\t"
+            "    v12.tmp = vmem(%1 + #0)\n\t"
+            "    v4.w = vadd(v12.w, v3.w)\n\t"
+            "}\n\t"
+            "v4.w = vadd(v4.w, v12.w)\n\t"
+            "vmem(%2 + #0) = v4\n\t"
+            : : "r"(p0), "r"(p1), "r"(pout)
+            : "r1", "v12", "v3", "v4", "v6", "memory");
+        p0 += sizeof(MMVector);
+        p1 += sizeof(MMVector);
+        pout += sizeof(MMVector);
+
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1;
+        }
+    }
+
+    check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_cur(void)
+{
+    void *p0 = buffer0;
+    void *pout = output;
+
+    for (int i = 0; i < BUFSIZE; i++) {
+        asm("{\n\t"
+            "    v2.cur = vmem(%0 + #0)\n\t"
+            "    vmem(%1 + #0) = v2\n\t"
+            "}\n\t"
+            : : "r"(p0), "r"(pout) : "v2", "memory");
+        p0 += sizeof(MMVector);
+        pout += sizeof(MMVector);
+
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            expect[i].uw[j] = buffer0[i].uw[j];
+        }
+    }
+
+    check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_load_aligned(void)
+{
+    /* Aligned loads ignore the low bits of the address */
+    void *p0 = buffer0;
+    void *pout = output;
+    const size_t offset = 13;
+
+    p0 += offset;    /* Create an unaligned address */
+    asm("v2 = vmem(%0 + #0)\n\t"
+        "vmem(%1 + #0) = v2\n\t"
+        : : "r"(p0), "r"(pout) : "v2", "memory");
+
+    expect[0] = buffer0[0];
+
+    check_output_w(__LINE__, 1);
+}
+
+static void test_load_unaligned(void)
+{
+    void *p0 = buffer0;
+    void *pout = output;
+    const size_t offset = 12;
+
+    p0 += offset;    /* Create an unaligned address */
+    asm("v2 = vmemu(%0 + #0)\n\t"
+        "vmem(%1 + #0) = v2\n\t"
+        : : "r"(p0), "r"(pout) : "v2", "memory");
+
+    memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector));
+
+    check_output_w(__LINE__, 1);
+}
+
+static void test_store_aligned(void)
+{
+    /* Aligned stores ignore the low bits of the address */
+    void *p0 = buffer0;
+    void *pout = output;
+    const size_t offset = 13;
+
+    pout += offset;    /* Create an unaligned address */
+    asm("v2 = vmem(%0 + #0)\n\t"
+        "vmem(%1 + #0) = v2\n\t"
+        : : "r"(p0), "r"(pout) : "v2", "memory");
+
+    expect[0] = buffer0[0];
+
+    check_output_w(__LINE__, 1);
+}
+
+static void test_store_unaligned(void)
+{
+    void *p0 = buffer0;
+    void *pout = output;
+    const size_t offset = 12;
+
+    pout += offset;    /* Create an unaligned address */
+    asm("v2 = vmem(%0 + #0)\n\t"
+        "vmemu(%1 + #0) = v2\n\t"
+        : : "r"(p0), "r"(pout) : "v2", "memory");
+
+    memcpy(expect, buffer0, 2 * sizeof(MMVector));
+    memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector));
+
+    check_output_w(__LINE__, 2);
+}
+
+static void test_masked_store(bool invert)
+{
+    void *p0 = buffer0;
+    void *pmask = mask;
+    void *pout = output;
+
+    memset(expect, 0xff, sizeof(expect));
+    memset(output, 0xff, sizeof(expect));
+
+    for (int i = 0; i < BUFSIZE; i++) {
+        if (invert) {
+            asm("r4 = #0\n\t"
+                "v4 = vsplat(r4)\n\t"
+                "v5 = vmem(%0 + #0)\n\t"
+                "q0 = vcmp.eq(v4.w, v5.w)\n\t"
+                "v5 = vmem(%1)\n\t"
+                "if (!q0) vmem(%2) = v5\n\t"             /* Inverted test */
+                : : "r"(pmask), "r"(p0), "r"(pout)
+                : "r4", "v4", "v5", "q0", "memory");
+        } else {
+            asm("r4 = #0\n\t"
+                "v4 = vsplat(r4)\n\t"
+                "v5 = vmem(%0 + #0)\n\t"
+                "q0 = vcmp.eq(v4.w, v5.w)\n\t"
+                "v5 = vmem(%1)\n\t"
+                "if (q0) vmem(%2) = v5\n\t"             /* Non-inverted test */
+                : : "r"(pmask), "r"(p0), "r"(pout)
+                : "r4", "v4", "v5", "q0", "memory");
+        }
+        p0 += sizeof(MMVector);
+        pmask += sizeof(MMVector);
+        pout += sizeof(MMVector);
+
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
+            if (invert) {
+                if (i + j % MASKMOD != 0) {
+                    expect[i].w[j] = buffer0[i].w[j];
+                }
+            } else {
+                if (i + j % MASKMOD == 0) {
+                    expect[i].w[j] = buffer0[i].w[j];
+                }
+            }
+        }
+    }
+
+    check_output_w(__LINE__, BUFSIZE);
+}
+
+static void test_new_value_store(void)
+{
+    void *p0 = buffer0;
+    void *pout = output;
+
+    asm("{\n\t"
+        "    v2 = vmem(%0 + #0)\n\t"
+        "    vmem(%1 + #0) = v2.new\n\t"
+        "}\n\t"
+        : : "r"(p0), "r"(pout) : "v2", "memory");
+
+    expect[0] = buffer0[0];
+
+    check_output_w(__LINE__, 1);
+}
+
+static void test_max_temps()
+{
+    void *p0 = buffer0;
+    void *pout = output;
+
+    asm("v0 = vmem(%0 + #0)\n\t"
+        "v1 = vmem(%0 + #1)\n\t"
+        "v2 = vmem(%0 + #2)\n\t"
+        "v3 = vmem(%0 + #3)\n\t"
+        "v4 = vmem(%0 + #4)\n\t"
+        "{\n\t"
+        "    v1:0.w = vadd(v3:2.w, v1:0.w)\n\t"
+        "    v2.b = vshuffe(v3.b, v2.b)\n\t"
+        "    v3.w = vadd(v1.w, v4.w)\n\t"
+        "    v4.tmp = vmem(%0 + #5)\n\t"
+        "}\n\t"
+        "vmem(%1 + #0) = v0\n\t"
+        "vmem(%1 + #1) = v1\n\t"
+        "vmem(%1 + #2) = v2\n\t"
+        "vmem(%1 + #3) = v3\n\t"
+        "vmem(%1 + #4) = v4\n\t"
+        : : "r"(p0), "r"(pout) : "memory");
+
+        /* The first two vectors come from the vadd-pair instruction */
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+            expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i];
+            expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i];
+        }
+        /* The third vector comes from the vshuffe instruction */
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) {
+            expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) |
+                              (buffer0[3].uh[i] & 0xff) << 8;
+        }
+        /* The fourth vector comes from the vadd-single instruction */
+        for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
+            expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i];
+        }
+        /*
+         * The fifth vector comes from the load to v4
+         * make sure the .tmp is dropped
+         */
+        expect[4] = buffer0[4];
+
+        check_output_b(__LINE__, 5);
+}
+
+#define VEC_OP1(ASM, EL, IN, OUT) \
+    asm("v2 = vmem(%0 + #0)\n\t" \
+        "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \
+        "vmem(%1 + #0) = v2\n\t" \
+        : : "r"(IN), "r"(OUT) : "v2", "memory")
+
+#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \
+    asm("v2 = vmem(%0 + #0)\n\t" \
+        "v3 = vmem(%1 + #0)\n\t" \
+        "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \
+        "vmem(%2 + #0) = v2\n\t" \
+        : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory")
+
+#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
+static void test_##NAME(void) \
+{ \
+    void *pin = buffer0; \
+    void *pout = output; \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        VEC_OP1(ASM, EL, pin, pout); \
+        pin += sizeof(MMVector); \
+        pout += sizeof(MMVector); \
+    } \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+            expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \
+        } \
+    } \
+    check_output_##FIELD(__LINE__, BUFSIZE); \
+}
+
+#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
+static void test_##NAME(void) \
+{ \
+    void *p0 = buffer0; \
+    void *p1 = buffer1; \
+    void *pout = output; \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        VEC_OP2(ASM, EL, p0, p1, pout); \
+        p0 += sizeof(MMVector); \
+        p1 += sizeof(MMVector); \
+        pout += sizeof(MMVector); \
+    } \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
+            expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \
+        } \
+    } \
+    check_output_##FIELD(__LINE__, BUFSIZE); \
+}
+
+#define THRESHOLD        31
+
+#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \
+    asm("r4 = #%3\n\t" \
+        "v1.b = vsplat(r4)\n\t" \
+        "v2 = vmem(%0 + #0)\n\t" \
+        "q0 = vcmp.gt(v2.b, v1.b)\n\t" \
+        "v3 = vmem(%1 + #0)\n\t" \
+        "q1 = vcmp.gt(v3.b, v1.b)\n\t" \
+        "q2 = " #ASM "(q0, " INV "q1)\n\t" \
+        "r4 = #0xff\n\t" \
+        "v1.b = vsplat(r4)\n\t" \
+        "if (q2) vmem(%2 + #0) = v1\n\t" \
+        : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \
+        : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory")
+
+#define TEST_PRED_OP2(NAME, ASM, OP, INV) \
+static void test_##NAME(bool invert) \
+{ \
+    void *p0 = buffer0; \
+    void *p1 = buffer1; \
+    void *pout = output; \
+    memset(output, 0, sizeof(expect)); \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        PRED_OP2(ASM, p0, p1, pout, INV); \
+        p0 += sizeof(MMVector); \
+        p1 += sizeof(MMVector); \
+        pout += sizeof(MMVector); \
+    } \
+    for (int i = 0; i < BUFSIZE; i++) { \
+        for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \
+            bool p0 = (buffer0[i].b[j] > THRESHOLD); \
+            bool p1 = (buffer1[i].b[j] > THRESHOLD); \
+            if (invert) { \
+                expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \
+            } else { \
+                expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \
+            } \
+        } \
+    } \
+    check_output_b(__LINE__, BUFSIZE); \
+}
+
+TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +)
+TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +)
+TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +)
+TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -)
+TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -)
+TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -)
+TEST_VEC_OP2(vxor, vxor, , d, 8, ^)
+TEST_VEC_OP2(vand, vand, , d, 8, &)
+TEST_VEC_OP2(vor, vor, , d, 8, |)
+TEST_VEC_OP1(vnot, vnot, , d, 8, ~)
+
+TEST_PRED_OP2(pred_or, or, |, "")
+TEST_PRED_OP2(pred_or_n, or, |, "!")
+TEST_PRED_OP2(pred_and, and, &, "")
+TEST_PRED_OP2(pred_and_n, and, &, "!")
+TEST_PRED_OP2(pred_xor, xor, ^, "")
+
+int main()
+{
+    init_buffers();
+
+    test_load_tmp();
+    test_load_cur();
+    test_load_aligned();
+    test_load_unaligned();
+    test_store_aligned();
+    test_store_unaligned();
+    test_masked_store(false);
+    test_masked_store(true);
+    test_new_value_store();
+    test_max_temps();
+
+    test_vadd_w();
+    test_vadd_h();
+    test_vadd_b();
+    test_vsub_w();
+    test_vsub_h();
+    test_vsub_b();
+    test_vxor();
+    test_vand();
+    test_vor();
+    test_vnot();
+
+    test_pred_or(false);
+    test_pred_or_n(true);
+    test_pred_and(false);
+    test_pred_and_n(true);
+    test_pred_xor(false);
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/load_align.c b/tests/tcg/hexagon/load_align.c
new file mode 100644
index 00000000000..12fc9cbd8ff
--- /dev/null
+++ b/tests/tcg/hexagon/load_align.c
@@ -0,0 +1,415 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test load align instructions
+ *
+ * Example
+ *     r1:0 = memh_fifo(r1+#0)
+ * loads a half word from memory, shifts the destination register
+ * right by one half word and inserts the loaded value into the high
+ * half word of the destination.
+ *
+ * There are 8 addressing modes and byte and half word variants, for a
+ * total of 16 instructions to test
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+char buf[16] __attribute__((aligned(1 << 16)));
+
+void init_buf(void)
+{
+    int i;
+    for (i = 0; i < 16; i++) {
+        buf[i] = i + 1;
+    }
+}
+
+void __check(int line, long long result, long long expect)
+{
+    if (result != expect) {
+        printf("ERROR at line %d: 0x%016llx != 0x%016llx\n",
+               line, result, expect);
+        err++;
+    }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+void __checkp(int line, void *p, void *expect)
+{
+    if (p != expect) {
+        printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect);
+        err++;
+    }
+}
+
+#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP)
+
+/*
+ ****************************************************************************
+ * _io addressing mode (addr + offset)
+ */
+#define LOAD_io(SZ, RES, ADDR, OFF) \
+    __asm__( \
+        "%0 = mem" #SZ "_fifo(%1+#" #OFF ")\n\t" \
+        : "+r"(RES) \
+        : "r"(ADDR))
+#define LOAD_io_b(RES, ADDR, OFF) \
+    LOAD_io(b, RES, ADDR, OFF)
+#define LOAD_io_h(RES, ADDR, OFF) \
+    LOAD_io(h, RES, ADDR, OFF)
+
+#define TEST_io(NAME, SZ, SIZE, EXP1, EXP2, EXP3, EXP4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    LOAD_io_##SZ(result, buf, 0 * (SIZE)); \
+    check(result, (EXP1)); \
+    LOAD_io_##SZ(result, buf, 1 * (SIZE)); \
+    check(result, (EXP2)); \
+    LOAD_io_##SZ(result, buf, 2 * (SIZE)); \
+    check(result, (EXP3)); \
+    LOAD_io_##SZ(result, buf, 3 * (SIZE)); \
+    check(result, (EXP4)); \
+}
+
+TEST_io(loadalignb_io, b, 1,
+        0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+        0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_io(loadalignh_io, h, 2,
+        0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+        0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _ur addressing mode (index << offset + base)
+ */
+#define LOAD_ur(SZ, RES, SHIFT, IDX) \
+    __asm__( \
+        "%0 = mem" #SZ "_fifo(%1<<#" #SHIFT " + ##buf)\n\t" \
+        : "+r"(RES) \
+        : "r"(IDX))
+#define LOAD_ur_b(RES, SHIFT, IDX) \
+    LOAD_ur(b, RES, SHIFT, IDX)
+#define LOAD_ur_h(RES, SHIFT, IDX) \
+    LOAD_ur(h, RES, SHIFT, IDX)
+
+#define TEST_ur(NAME, SZ, SHIFT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    LOAD_ur_##SZ(result, (SHIFT), 0); \
+    check(result, (RES1)); \
+    LOAD_ur_##SZ(result, (SHIFT), 1); \
+    check(result, (RES2)); \
+    LOAD_ur_##SZ(result, (SHIFT), 2); \
+    check(result, (RES3)); \
+    LOAD_ur_##SZ(result, (SHIFT), 3); \
+    check(result, (RES4)); \
+}
+
+TEST_ur(loadalignb_ur, b, 1,
+        0x01ffffffffffffffLL, 0x0301ffffffffffffLL,
+        0x050301ffffffffffLL, 0x07050301ffffffffLL)
+TEST_ur(loadalignh_ur, h, 1,
+        0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+        0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _ap addressing mode (addr = base)
+ */
+#define LOAD_ap(SZ, RES, PTR, ADDR) \
+    __asm__(  \
+        "%0 = mem" #SZ "_fifo(%1 = ##" #ADDR ")\n\t" \
+        : "+r"(RES), "=r"(PTR))
+#define LOAD_ap_b(RES, PTR, ADDR) \
+    LOAD_ap(b, RES, PTR, ADDR)
+#define LOAD_ap_h(RES, PTR, ADDR) \
+    LOAD_ap(h, RES, PTR, ADDR)
+
+#define TEST_ap(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr; \
+    LOAD_ap_##SZ(result, ptr, (buf + 0 * (SIZE))); \
+    check(result, (RES1)); \
+    checkp(ptr, &buf[0 * (SIZE)]); \
+    LOAD_ap_##SZ(result, ptr, (buf + 1 * (SIZE))); \
+    check(result, (RES2)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    LOAD_ap_##SZ(result, ptr, (buf + 2 * (SIZE))); \
+    check(result, (RES3)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    LOAD_ap_##SZ(result, ptr, (buf + 3 * (SIZE))); \
+    check(result, (RES4)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+}
+
+TEST_ap(loadalignb_ap, b, 1,
+        0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+        0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_ap(loadalignh_ap, h, 2,
+        0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+        0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _rp addressing mode (addr ++ modifer-reg)
+ */
+#define LOAD_pr(SZ, RES, PTR, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "%0 = mem" #SZ "_fifo(%1++m0)\n\t" \
+        : "+r"(RES), "+r"(PTR) \
+        : "r"(INC) \
+        : "m0")
+#define LOAD_pr_b(RES, PTR, INC) \
+    LOAD_pr(b, RES, PTR, INC)
+#define LOAD_pr_h(RES, PTR, INC) \
+    LOAD_pr(h, RES, PTR, INC)
+
+#define TEST_pr(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr = buf; \
+    LOAD_pr_##SZ(result, ptr, (SIZE)); \
+    check(result, (RES1)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    LOAD_pr_##SZ(result, ptr, (SIZE)); \
+    check(result, (RES2)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    LOAD_pr_##SZ(result, ptr, (SIZE)); \
+    check(result, (RES3)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+    LOAD_pr_##SZ(result, ptr, (SIZE)); \
+    check(result, (RES4)); \
+    checkp(ptr, &buf[4 * (SIZE)]); \
+}
+
+TEST_pr(loadalignb_pr, b, 1,
+        0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+        0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_pr(loadalignh_pr, h, 2,
+        0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+        0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _pbr addressing mode (addr ++ modifer-reg:brev)
+ */
+#define LOAD_pbr(SZ, RES, PTR) \
+    __asm__( \
+        "r4 = #(1 << (16 - 3))\n\t" \
+        "m0 = r4\n\t" \
+        "%0 = mem" #SZ "_fifo(%1++m0:brev)\n\t" \
+        : "+r"(RES), "+r"(PTR) \
+        : \
+        : "r4", "m0")
+#define LOAD_pbr_b(RES, PTR) \
+    LOAD_pbr(b, RES, PTR)
+#define LOAD_pbr_h(RES, PTR) \
+    LOAD_pbr(h, RES, PTR)
+
+#define TEST_pbr(NAME, SZ, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr = buf; \
+    LOAD_pbr_##SZ(result, ptr); \
+    check(result, (RES1)); \
+    LOAD_pbr_##SZ(result, ptr); \
+    check(result, (RES2)); \
+    LOAD_pbr_##SZ(result, ptr); \
+    check(result, (RES3)); \
+    LOAD_pbr_##SZ(result, ptr); \
+    check(result, (RES4)); \
+}
+
+TEST_pbr(loadalignb_pbr, b,
+    0x01ffffffffffffffLL, 0x0501ffffffffffffLL,
+    0x030501ffffffffffLL, 0x07030501ffffffffLL)
+TEST_pbr(loadalignh_pbr, h,
+    0x0201ffffffffffffLL, 0x06050201ffffffffLL,
+    0x040306050201ffffLL, 0x0807040306050201LL)
+
+/*
+ ****************************************************************************
+ * _pi addressing mode (addr ++ inc)
+ */
+#define LOAD_pi(SZ, RES, PTR, INC) \
+    __asm__( \
+        "%0 = mem" #SZ "_fifo(%1++#" #INC ")\n\t" \
+        : "+r"(RES), "+r"(PTR))
+#define LOAD_pi_b(RES, PTR, INC) \
+    LOAD_pi(b, RES, PTR, INC)
+#define LOAD_pi_h(RES, PTR, INC) \
+    LOAD_pi(h, RES, PTR, INC)
+
+#define TEST_pi(NAME, SZ, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr = buf; \
+    LOAD_pi_##SZ(result, ptr, (INC)); \
+    check(result, (RES1)); \
+    checkp(ptr, &buf[1 * (INC)]); \
+    LOAD_pi_##SZ(result, ptr, (INC)); \
+    check(result, (RES2)); \
+    checkp(ptr, &buf[2 * (INC)]); \
+    LOAD_pi_##SZ(result, ptr, (INC)); \
+    check(result, (RES3)); \
+    checkp(ptr, &buf[3 * (INC)]); \
+    LOAD_pi_##SZ(result, ptr, (INC)); \
+    check(result, (RES4)); \
+    checkp(ptr, &buf[4 * (INC)]); \
+}
+
+TEST_pi(loadalignb_pi, b, 1,
+        0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+        0x030201ffffffffffLL, 0x04030201ffffffffLL)
+TEST_pi(loadalignh_pi, h, 2,
+        0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+        0x060504030201ffffLL, 0x0807060504030201LL)
+
+/*
+ ****************************************************************************
+ * _pci addressing mode (addr ++ inc:circ)
+ */
+#define LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %2\n\t" \
+        "%0 = mem" #SZ "_fifo(%1++#" #INC ":circ(m0))\n\t" \
+        : "+r"(RES), "+r"(PTR) \
+        : "r"(START), "r"(LEN) \
+        : "r4", "m0", "cs0")
+#define LOAD_pci_b(RES, PTR, START, LEN, INC) \
+    LOAD_pci(b, RES, PTR, START, LEN, INC)
+#define LOAD_pci_h(RES, PTR, START, LEN, INC) \
+    LOAD_pci(h, RES, PTR, START, LEN, INC)
+
+#define TEST_pci(NAME, SZ, LEN, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr = buf; \
+    LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1)); \
+    checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
+    LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2)); \
+    checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
+    LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3)); \
+    checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
+    LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4)); \
+    checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
+}
+
+TEST_pci(loadalignb_pci, b, 2, 1,
+    0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+    0x010201ffffffffffLL, 0x02010201ffffffffLL)
+TEST_pci(loadalignh_pci, h, 4, 2,
+    0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+    0x020104030201ffffLL, 0x0403020104030201LL)
+
+/*
+ ****************************************************************************
+ * _pcr addressing mode (addr ++ I:circ(modifier-reg))
+ */
+#define LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %2\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %3\n\t" \
+        "%0 = mem" #SZ "_fifo(%1++I:circ(m1))\n\t" \
+        : "+r"(RES), "+r"(PTR) \
+        : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
+          "r"(START) \
+        : "r4", "m1", "cs1")
+#define LOAD_pcr_b(RES, PTR, START, LEN, INC) \
+    LOAD_pcr(b, RES, PTR, START, LEN, INC)
+#define LOAD_pcr_h(RES, PTR, START, LEN, INC) \
+    LOAD_pcr(h, RES, PTR, START, LEN, INC)
+
+#define TEST_pcr(NAME, SZ, SIZE, LEN, INC, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    long long result = ~0LL; \
+    void *ptr = buf; \
+    LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1)); \
+    checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
+    LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2)); \
+    checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
+    LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3)); \
+    checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
+    LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4)); \
+    checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
+}
+
+TEST_pcr(loadalignb_pcr, b, 1, 2, 1,
+    0x01ffffffffffffffLL, 0x0201ffffffffffffLL,
+    0x010201ffffffffffLL, 0x02010201ffffffffLL)
+TEST_pcr(loadalignh_pcr, h, 2, 4, 1,
+    0x0201ffffffffffffLL, 0x04030201ffffffffLL,
+    0x020104030201ffffLL, 0x0403020104030201LL)
+
+int main()
+{
+    init_buf();
+
+    test_loadalignb_io();
+    test_loadalignh_io();
+
+    test_loadalignb_ur();
+    test_loadalignh_ur();
+
+    test_loadalignb_ap();
+    test_loadalignh_ap();
+
+    test_loadalignb_pr();
+    test_loadalignh_pr();
+
+    test_loadalignb_pbr();
+    test_loadalignh_pbr();
+
+    test_loadalignb_pi();
+    test_loadalignh_pi();
+
+    test_loadalignb_pci();
+    test_loadalignh_pci();
+
+    test_loadalignb_pcr();
+    test_loadalignh_pcr();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c
new file mode 100644
index 00000000000..3575a37a28f
--- /dev/null
+++ b/tests/tcg/hexagon/load_unpack.c
@@ -0,0 +1,474 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test load unpack instructions
+ *
+ * Example
+ *     r0 = memubh(r1+#0)
+ * loads a half word from memory and zero-extends the 2 bytes to form a word
+ *
+ * For each addressing mode, there are 4 tests
+ *     bzw2          unsigned     2 elements
+ *     bsw2          signed       2 elements
+ *     bzw4          unsigned     4 elements
+ *     bsw4          signed       4 elements
+ * There are 8 addressing modes, for a total of 32 instructions to test
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+char buf[16] __attribute__((aligned(1 << 16)));
+
+void init_buf(void)
+{
+    int i;
+    for (i = 0; i < 16; i++) {
+        int sign = i % 2 == 0 ? 0x80 : 0;
+        buf[i] = sign | (i + 1);
+    }
+}
+
+void __check(int line, long long result, long long expect)
+{
+    if (result != expect) {
+        printf("ERROR at line %d: 0x%08llx != 0x%08llx\n",
+               line, result, expect);
+        err++;
+    }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+void __checkp(int line, void *p, void *expect)
+{
+    if (p != expect) {
+        printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect);
+        err++;
+    }
+}
+
+#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP)
+
+/*
+ ****************************************************************************
+ * _io addressing mode (addr + offset)
+ */
+#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \
+        : "=r"(RES) \
+        : "r"(ADDR))
+#define BxW_LOAD_io_Z(RES, ADDR, OFF) \
+    BxW_LOAD_io(ubh, RES, ADDR, OFF)
+#define BxW_LOAD_io_S(RES, ADDR, OFF) \
+    BxW_LOAD_io(bh, RES, ADDR, OFF)
+
+#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    init_buf(); \
+    BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \
+    check(result, (EXP1) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \
+    check(result, (EXP2) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \
+    check(result, (EXP3) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \
+    check(result, (EXP4) | (EXT)); \
+}
+
+
+TEST_io(loadbzw2_io, int, Z, 2, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbzw4_io, long long, Z,  4, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_io(loadbsw4_io, long long, S,  4, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ur addressing mode (index << offset + base)
+ */
+#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \
+        : "=r"(RES) \
+        : "r"(IDX))
+#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \
+    BxW_LOAD_ur(ubh, RES, SHIFT, IDX)
+#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \
+    BxW_LOAD_ur(bh, RES, SHIFT, IDX)
+
+#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    init_buf(); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \
+    check(result, (RES1) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \
+    check(result, (RES2) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \
+    check(result, (RES3) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \
+    check(result, (RES4) | (EXT)); \
+} \
+
+TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ap addressing mode (addr = base)
+ */
+#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \
+        : "=r"(RES), "=r"(PTR))
+#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \
+    BxW_LOAD_ap(ubh, RES, PTR, ADDR)
+#define BxW_LOAD_ap_S(RES, PTR, ADDR) \
+    BxW_LOAD_ap(bh, RES, PTR, ADDR)
+
+#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr; \
+    init_buf(); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[0 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+}
+
+TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _rp addressing mode (addr ++ modifer-reg)
+ */
+#define BxW_LOAD_pr(SZ, RES, PTR, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "%0 = mem" #SZ "(%1++m0)\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"(INC) \
+        : "m0")
+#define BxW_LOAD_pr_Z(RES, PTR, INC) \
+    BxW_LOAD_pr(ubh, RES, PTR, INC)
+#define BxW_LOAD_pr_S(RES, PTR, INC) \
+    BxW_LOAD_pr(bh, RES, PTR, INC)
+
+#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[4 * (SIZE)]); \
+}
+
+TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000,
+    0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00,
+    0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pbr addressing mode (addr ++ modifer-reg:brev)
+ */
+#define BxW_LOAD_pbr(SZ, RES, PTR) \
+    __asm__( \
+        "r4 = #(1 << (16 - 3))\n\t" \
+        "m0 = r4\n\t" \
+        "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : \
+        : "r4", "m0")
+#define BxW_LOAD_pbr_Z(RES, PTR) \
+    BxW_LOAD_pbr(ubh, RES, PTR)
+#define BxW_LOAD_pbr_S(RES, PTR) \
+    BxW_LOAD_pbr(bh, RES, PTR)
+
+#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES1) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES2) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES3) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES4) | (EXT)); \
+}
+
+TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000,
+    0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00,
+    0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0006008500040083LL, 0x000a008900080087LL)
+TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0006008500040083LL, 0x000a008900080087LL)
+
+/*
+ ****************************************************************************
+ * _pi addressing mode (addr ++ inc)
+ */
+#define BxW_LOAD_pi(SZ, RES, PTR, INC) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \
+        : "=r"(RES), "+r"(PTR))
+#define BxW_LOAD_pi_Z(RES, PTR, INC) \
+    BxW_LOAD_pi(ubh, RES, PTR, INC)
+#define BxW_LOAD_pi_S(RES, PTR, INC) \
+    BxW_LOAD_pi(bh, RES, PTR, INC)
+
+#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[1 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[2 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[3 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[4 * (INC)]); \
+}
+
+TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000,
+    0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00,
+    0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pci addressing mode (addr ++ inc:circ)
+ */
+#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %2\n\t" \
+        "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"(START), "r"(LEN) \
+        : "r4", "m0", "cs0")
+#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
+}
+
+TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000,
+    0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00,
+    0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+
+/*
+ ****************************************************************************
+ * _pcr addressing mode (addr ++ I:circ(modifier-reg))
+ */
+#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %2\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %3\n\t" \
+        "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
+          "r"(START) \
+        : "r4", "m1", "cs1")
+#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \
+                 EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
+}
+
+TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000,
+    0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00,
+    0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+
+int main()
+{
+    test_loadbzw2_io();
+    test_loadbsw2_io();
+    test_loadbzw4_io();
+    test_loadbsw4_io();
+
+    test_loadbzw2_ur();
+    test_loadbsw2_ur();
+    test_loadbzw4_ur();
+    test_loadbsw4_ur();
+
+    test_loadbzw2_ap();
+    test_loadbsw2_ap();
+    test_loadbzw4_ap();
+    test_loadbsw4_ap();
+
+    test_loadbzw2_pr();
+    test_loadbsw2_pr();
+    test_loadbzw4_pr();
+    test_loadbsw4_pr();
+
+    test_loadbzw2_pbr();
+    test_loadbsw2_pbr();
+    test_loadbzw4_pbr();
+    test_loadbsw4_pbr();
+
+    test_loadbzw2_pi();
+    test_loadbsw2_pi();
+    test_loadbzw4_pi();
+    test_loadbsw4_pi();
+
+    test_loadbzw2_pci();
+    test_loadbsw2_pci();
+    test_loadbzw4_pci();
+    test_loadbsw4_pci();
+
+    test_loadbzw2_pcr();
+    test_loadbsw2_pcr();
+    test_loadbzw4_pcr();
+    test_loadbsw4_pcr();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c
index 458759f7b1b..f0b1947fb3a 100644
--- a/tests/tcg/hexagon/misc.c
+++ b/tests/tcg/hexagon/misc.c
@@ -181,6 +181,19 @@ static inline void S4_storeirifnew_io(void *p, int pred)
                : "p0", "memory");
 }
 
+static int L2_ploadrifnew_pi(void *p, int pred)
+{
+  int result;
+  asm volatile("%0 = #31\n\t"
+               "{\n\t"
+               "    p0 = cmp.eq(%1, #1)\n\t"
+               "    if (!p0.new) %0 = memw(%2++#4)\n\t"
+               "}\n\t"
+               : "=r"(result) : "r"(pred), "r"(p)
+               : "p0");
+  return result;
+}
+
 /*
  * Test that compound-compare-jump is executed in 2 parts
  * First we have to do all the compares in the packet and
@@ -231,6 +244,14 @@ static void check(int val, int expect)
     }
 }
 
+static void check64(long long val, long long expect)
+{
+    if (val != expect) {
+        printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect);
+        err++;
+    }
+}
+
 uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
 uint32_t array[10];
 
@@ -264,8 +285,59 @@ static long long creg_pair(int x, int y)
     return retval;
 }
 
+static long long decbin(long long x, long long y, int *pred)
+{
+    long long retval;
+    asm ("%0 = decbin(%2, %3)\n\t"
+         "%1 = p0\n\t"
+         : "=r"(retval), "=r"(*pred)
+         : "r"(x), "r"(y));
+    return retval;
+}
+
+/* Check that predicates are auto-and'ed in a packet */
+static int auto_and(void)
+{
+    int retval;
+    asm ("r5 = #1\n\t"
+         "{\n\t"
+         "    p0 = cmp.eq(r1, #1)\n\t"
+         "    p0 = cmp.eq(r1, #2)\n\t"
+         "}\n\t"
+         "%0 = p0\n\t"
+         : "=r"(retval)
+         :
+         : "r5", "p0");
+    return retval;
+}
+
+void test_lsbnew(void)
+{
+    int result;
+
+    asm("r0 = #2\n\t"
+        "r1 = #5\n\t"
+        "{\n\t"
+        "    p0 = r0\n\t"
+        "    if (p0.new) r1 = #3\n\t"
+        "}\n\t"
+        "%0 = r1\n\t"
+        : "=r"(result) :: "r0", "r1", "p0");
+    check(result, 5);
+}
+
+void test_l2fetch(void)
+{
+    /* These don't do anything in qemu, just make sure they don't assert */
+    asm volatile ("l2fetch(r0, r1)\n\t"
+                  "l2fetch(r0, r3:2)\n\t");
+}
+
 int main()
 {
+    int res;
+    long long res64;
+    int pred;
 
     memcpy(array, init, sizeof(array));
     S4_storerhnew_rr(array, 4, 0xffff);
@@ -358,6 +430,12 @@ int main()
     S4_storeirifnew_io(&array[8], 1);
     check(array[9], 9);
 
+    memcpy(array, init, sizeof(array));
+    res = L2_ploadrifnew_pi(&array[6], 0);
+    check(res, 6);
+    res = L2_ploadrifnew_pi(&array[7], 1);
+    check(res, 31);
+
     int x = cmpnd_cmp_jump();
     check(x, 12);
 
@@ -370,11 +448,26 @@ int main()
     check((int)pair, 5);
     check((int)(pair >> 32), 7);
 
-    int res = test_clrtnew(1, 7);
+    res = test_clrtnew(1, 7);
     check(res, 0);
     res = test_clrtnew(2, 7);
     check(res, 7);
 
+    res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred);
+    check64(res64, 0x357980003700010cLL);
+    check(pred, 0);
+
+    res64 = decbin(0xfLL, 0x1bLL, &pred);
+    check64(res64, 0x78000100LL);
+    check(pred, 1);
+
+    res = auto_and();
+    check(res, 0);
+
+    test_lsbnew();
+
+    test_l2fetch();
+
     puts(err ? "FAIL" : "PASS");
     return err;
 }
diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c
new file mode 100644
index 00000000000..52997b31285
--- /dev/null
+++ b/tests/tcg/hexagon/multi_result.c
@@ -0,0 +1,282 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+static int sfrecipa(int Rs, int Rt, int *pred_result)
+{
+  int result;
+  int predval;
+
+  asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t"
+               "%1 = p0\n\t"
+               : "+r"(result), "=r"(predval)
+               : "r"(Rs), "r"(Rt)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
+static int sfinvsqrta(int Rs, int *pred_result)
+{
+  int result;
+  int predval;
+
+  asm volatile("%0,p0 = sfinvsqrta(%2)\n\t"
+               "%1 = p0\n\t"
+               : "+r"(result), "=r"(predval)
+               : "r"(Rs)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
+static long long vacsh(long long Rxx, long long Rss, long long Rtt,
+                       int *pred_result, int *ovf_result)
+{
+  long long result = Rxx;
+  int predval;
+  int usr;
+
+  /*
+   * This instruction can set bit 0 (OVF/overflow) in usr
+   * Clear the bit first, then return that bit to the caller
+   */
+  asm volatile("r2 = usr\n\t"
+               "r2 = clrbit(r2, #0)\n\t"        /* clear overflow bit */
+               "usr = r2\n\t"
+               "%0,p0 = vacsh(%3, %4)\n\t"
+               "%1 = p0\n\t"
+               "%2 = usr\n\t"
+               : "+r"(result), "=r"(predval), "=r"(usr)
+               : "r"(Rss), "r"(Rtt)
+               : "r2", "p0", "usr");
+  *pred_result = predval;
+  *ovf_result = (usr & 1);
+  return result;
+}
+
+static long long vminub(long long Rtt, long long Rss,
+                        int *pred_result)
+{
+  long long result;
+  int predval;
+
+  asm volatile("%0,p0 = vminub(%2, %3)\n\t"
+               "%1 = p0\n\t"
+               : "=r"(result), "=r"(predval)
+               : "r"(Rtt), "r"(Rss)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
+static long long add_carry(long long Rss, long long Rtt,
+                           int pred_in, int *pred_result)
+{
+  long long result;
+  int predval = pred_in;
+
+  asm volatile("p0 = %1\n\t"
+               "%0 = add(%2, %3, p0):carry\n\t"
+               "%1 = p0\n\t"
+               : "=r"(result), "+r"(predval)
+               : "r"(Rss), "r"(Rtt)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
+static long long sub_carry(long long Rss, long long Rtt,
+                           int pred_in, int *pred_result)
+{
+  long long result;
+  int predval = pred_in;
+
+  asm volatile("p0 = !cmp.eq(%1, #0)\n\t"
+               "%0 = sub(%2, %3, p0):carry\n\t"
+               "%1 = p0\n\t"
+               : "=r"(result), "+r"(predval)
+               : "r"(Rss), "r"(Rtt)
+               : "p0");
+  *pred_result = predval;
+  return result;
+}
+
+int err;
+
+static void check_ll(long long val, long long expect)
+{
+    if (val != expect) {
+        printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect);
+        err++;
+    }
+}
+
+static void check(int val, int expect)
+{
+    if (val != expect) {
+        printf("ERROR: 0x%08x != 0x%08x\n", val, expect);
+        err++;
+    }
+}
+
+static void check_p(int val, int expect)
+{
+    if (val != expect) {
+        printf("ERROR: 0x%02x != 0x%02x\n", val, expect);
+        err++;
+    }
+}
+
+static void test_sfrecipa()
+{
+    int res;
+    int pred_result;
+
+    res = sfrecipa(0x04030201, 0x05060708, &pred_result);
+    check(res, 0x59f38001);
+    check_p(pred_result, 0x00);
+}
+
+static void test_sfinvsqrta()
+{
+    int res;
+    int pred_result;
+
+    res = sfinvsqrta(0x04030201, &pred_result);
+    check(res, 0x4d330000);
+    check_p(pred_result, 0xe0);
+
+    res = sfinvsqrta(0x0, &pred_result);
+    check(res, 0x3f800000);
+    check_p(pred_result, 0x0);
+}
+
+static void test_vacsh()
+{
+    long long res64;
+    int pred_result;
+    int ovf_result;
+
+    res64 = vacsh(0x0004000300020001LL,
+                  0x0001000200030004LL,
+                  0x0000000000000000LL, &pred_result, &ovf_result);
+    check_ll(res64, 0x0004000300030004LL);
+    check_p(pred_result, 0xf0);
+    check(ovf_result, 0);
+
+    res64 = vacsh(0x0004000300020001LL,
+                  0x0001000200030004LL,
+                  0x000affff000d0000LL, &pred_result, &ovf_result);
+    check_ll(res64, 0x000e0003000f0004LL);
+    check_p(pred_result, 0xcc);
+    check(ovf_result, 0);
+
+    res64 = vacsh(0x00047fff00020001LL,
+                  0x00017fff00030004LL,
+                  0x000a0fff000d0000LL, &pred_result, &ovf_result);
+    check_ll(res64, 0x000e7fff000f0004LL);
+    check_p(pred_result, 0xfc);
+    check(ovf_result, 1);
+
+    res64 = vacsh(0x0004000300020001LL,
+                  0x0001000200030009LL,
+                  0x000affff000d0001LL, &pred_result, &ovf_result);
+    check_ll(res64, 0x000e0003000f0008LL);
+    check_p(pred_result, 0xcc);
+    check(ovf_result, 0);
+}
+
+static void test_vminub()
+{
+    long long res64;
+    int pred_result;
+
+    res64 = vminub(0x0807060504030201LL,
+                   0x0102030405060708LL,
+                   &pred_result);
+    check_ll(res64, 0x0102030404030201LL);
+    check_p(pred_result, 0xf0);
+
+    res64 = vminub(0x0802060405030701LL,
+                   0x0107030504060208LL,
+                   &pred_result);
+    check_ll(res64, 0x0102030404030201LL);
+    check_p(pred_result, 0xaa);
+}
+
+static void test_add_carry()
+{
+    long long res64;
+    int pred_result;
+
+    res64 = add_carry(0x0000000000000000LL,
+                      0xffffffffffffffffLL,
+                      1, &pred_result);
+    check_ll(res64, 0x0000000000000000LL);
+    check_p(pred_result, 0xff);
+
+    res64 = add_carry(0x0000000100000000LL,
+                      0xffffffffffffffffLL,
+                      0, &pred_result);
+    check_ll(res64, 0x00000000ffffffffLL);
+    check_p(pred_result, 0xff);
+
+    res64 = add_carry(0x0000000100000000LL,
+                      0xffffffffffffffffLL,
+                      0, &pred_result);
+    check_ll(res64, 0x00000000ffffffffLL);
+    check_p(pred_result, 0xff);
+}
+
+static void test_sub_carry()
+{
+    long long res64;
+    int pred_result;
+
+    res64 = sub_carry(0x0000000000000000LL,
+                      0x0000000000000000LL,
+                      1, &pred_result);
+    check_ll(res64, 0x0000000000000000LL);
+    check_p(pred_result, 0xff);
+
+    res64 = sub_carry(0x0000000100000000LL,
+                      0x0000000000000000LL,
+                      0, &pred_result);
+    check_ll(res64, 0x00000000ffffffffLL);
+    check_p(pred_result, 0xff);
+
+    res64 = sub_carry(0x0000000100000000LL,
+                      0x0000000000000000LL,
+                      0, &pred_result);
+    check_ll(res64, 0x00000000ffffffffLL);
+    check_p(pred_result, 0xff);
+}
+
+int main()
+{
+    test_sfrecipa();
+    test_sfinvsqrta();
+    test_vacsh();
+    test_vminub();
+    test_add_carry();
+    test_sub_carry();
+
+    puts(err ? "FAIL" : "PASS");
+    return err;
+}
diff --git a/tests/tcg/hexagon/overflow.c b/tests/tcg/hexagon/overflow.c
new file mode 100644
index 00000000000..196fcf7f3a6
--- /dev/null
+++ b/tests/tcg/hexagon/overflow.c
@@ -0,0 +1,107 @@
+/*
+ *  Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <setjmp.h>
+#include <signal.h>
+
+
+int err;
+
+static void __check(const char *filename, int line, int x, int expect)
+{
+    if (x != expect) {
+        printf("ERROR %s:%d - %d != %d\n",
+               filename, line, x, expect);
+        err++;
+    }
+}
+
+#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect))
+
+static int satub(int src, int *p, int *ovf_result)
+{
+    int result;
+    int usr;
+
+    /*
+     * This instruction can set bit 0 (OVF/overflow) in usr
+     * Clear the bit first, then return that bit to the caller
+     *
+     * We also store the src into *p in the same packet, so we
+     * can ensure the overflow doesn't get set when an exception
+     * is generated.
+     */
+    asm volatile("r2 = usr\n\t"
+                 "r2 = clrbit(r2, #0)\n\t"        /* clear overflow bit */
+                 "usr = r2\n\t"
+                 "{\n\t"
+                 "    %0 = satub(%2)\n\t"
+                 "    memw(%3) = %2\n\t"
+                 "}\n\t"
+                 "%1 = usr\n\t"
+                 : "=r"(result), "=r"(usr)
+                 : "r"(src), "r"(p)
+                 : "r2", "usr", "memory");
+  *ovf_result = (usr & 1);
+  return result;
+}
+
+int read_usr_overflow(void)
+{
+    int result;
+    asm volatile("%0 = usr\n\t" : "=r"(result));
+    return result & 1;
+}
+
+
+jmp_buf jmp_env;
+int usr_overflow;
+
+static void sig_segv(int sig, siginfo_t *info, void *puc)
+{
+    usr_overflow = read_usr_overflow();
+    longjmp(jmp_env, 1);
+}
+
+int main()
+{
+    struct sigaction act;
+    int ovf;
+
+    /* SIGSEGV test */
+    act.sa_sigaction = sig_segv;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    sigaction(SIGSEGV, &act, NULL);
+    if (setjmp(jmp_env) == 0) {
+        satub(300, 0, &ovf);
+    }
+
+    act.sa_handler = SIG_DFL;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = 0;
+
+    check(usr_overflow, 0);
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c
new file mode 100644
index 00000000000..b93eb181339
--- /dev/null
+++ b/tests/tcg/hexagon/scatter_gather.c
@@ -0,0 +1,1011 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This example tests the HVX scatter/gather instructions
+ *
+ * See section 5.13 of the V68 HVX Programmer's Reference
+ *
+ * There are 3 main classes operations
+ *     _16                 16-bit elements and 16-bit offsets
+ *     _32                 32-bit elements and 32-bit offsets
+ *     _16_32              16-bit elements and 32-bit offsets
+ *
+ * There are also masked and accumulate versions
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+typedef long HVX_Vector       __attribute__((__vector_size__(128)))
+                              __attribute__((aligned(128)));
+typedef long HVX_VectorPair   __attribute__((__vector_size__(256)))
+                              __attribute__((aligned(128)));
+typedef long HVX_VectorPred   __attribute__((__vector_size__(128)))
+                              __attribute__((aligned(128)));
+
+#define VSCATTER_16(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS)
+#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \
+    __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS)
+
+#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF)
+#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \
+    __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF)
+
+#define VSHUFF_H(V) \
+    __builtin_HEXAGON_V6_vshuffh_128B(V)
+#define VSPLAT_H(X) \
+    __builtin_HEXAGON_V6_lvsplath_128B(X)
+#define VAND_VAL(PRED, VAL) \
+    __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL)
+#define VDEAL_H(V) \
+    __builtin_HEXAGON_V6_vdealh_128B(V)
+
+int err;
+
+/* define the number of rows/cols in a square matrix */
+#define MATRIX_SIZE 64
+
+/* define the size of the scatter buffer */
+#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE)
+
+/* fake vtcm - put buffers together and force alignment */
+static struct {
+    unsigned short vscatter16[SCATTER_BUFFER_SIZE];
+    unsigned short vgather16[MATRIX_SIZE];
+    unsigned int   vscatter32[SCATTER_BUFFER_SIZE];
+    unsigned int   vgather32[MATRIX_SIZE];
+    unsigned short vscatter16_32[SCATTER_BUFFER_SIZE];
+    unsigned short vgather16_32[MATRIX_SIZE];
+} vtcm __attribute__((aligned(0x10000)));
+
+/* declare the arrays of reference values */
+unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_ref[MATRIX_SIZE];
+unsigned int   vscatter32_ref[SCATTER_BUFFER_SIZE];
+unsigned int   vgather32_ref[MATRIX_SIZE];
+unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE];
+unsigned short vgather16_32_ref[MATRIX_SIZE];
+
+/* declare the arrays of offsets */
+unsigned short half_offsets[MATRIX_SIZE];
+unsigned int   word_offsets[MATRIX_SIZE];
+
+/* declare the arrays of values */
+unsigned short half_values[MATRIX_SIZE];
+unsigned short half_values_acc[MATRIX_SIZE];
+unsigned short half_values_masked[MATRIX_SIZE];
+unsigned int   word_values[MATRIX_SIZE];
+unsigned int   word_values_acc[MATRIX_SIZE];
+unsigned int   word_values_masked[MATRIX_SIZE];
+
+/* declare the arrays of predicates */
+unsigned short half_predicates[MATRIX_SIZE];
+unsigned int   word_predicates[MATRIX_SIZE];
+
+/* make this big enough for all the intrinsics */
+const size_t region_len = sizeof(vtcm);
+
+/* optionally add sync instructions */
+#define SYNC_VECTOR 1
+
+static void sync_scatter(void *addr)
+{
+#if SYNC_VECTOR
+    /*
+     * Do the scatter release followed by a dummy load to complete the
+     * synchronization.  Normally the dummy load would be deferred as
+     * long as possible to minimize stalls.
+     */
+    asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(addr));
+    /* use volatile to force the load */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy;
+#endif
+}
+
+static void sync_gather(void *addr)
+{
+#if SYNC_VECTOR
+    /* use volatile to force the load */
+    volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy;
+#endif
+}
+
+/* optionally print the results */
+#define PRINT_DATA 0
+
+#define FILL_CHAR       '.'
+
+/* fill vtcm scratch with ee */
+void prefill_vtcm_scratch(void)
+{
+    memset(&vtcm, FILL_CHAR, sizeof(vtcm));
+}
+
+/* create byte offsets to be a diagonal of the matrix with 16 bit elements */
+void create_offsets_values_preds_16(void)
+{
+    unsigned short half_element = 0;
+    unsigned short half_element_masked = 0;
+    char letter = 'A';
+    char letter_masked = '@';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        half_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+        half_element = 0;
+        half_element_masked = 0;
+        for (int j = 0; j < 2; j++) {
+            half_element |= letter << j * 8;
+            half_element_masked |= letter_masked << j * 8;
+        }
+
+        half_values[i] = half_element;
+        half_values_acc[i] = ((i % 10) << 8) + (i % 10);
+        half_values_masked[i] = half_element_masked;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'M') {
+            letter = 'A';
+        }
+
+        half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0;
+    }
+}
+
+/* create byte offsets to be a diagonal of the matrix with 32 bit elements */
+void create_offsets_values_preds_32(void)
+{
+    unsigned int word_element = 0;
+    unsigned int word_element_masked = 0;
+    char letter = 'A';
+    char letter_masked = '&';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        word_offsets[i] = i * (4 * MATRIX_SIZE + 4);
+
+        word_element = 0;
+        word_element_masked = 0;
+        for (int j = 0; j < 4; j++) {
+            word_element |= letter << j * 8;
+            word_element_masked |= letter_masked << j * 8;
+        }
+
+        word_values[i] = word_element;
+        word_values_acc[i] = ((i % 10) << 8) + (i % 10);
+        word_values_masked[i] = word_element_masked;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'M') {
+            letter = 'A';
+        }
+
+        word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0;
+    }
+}
+
+/*
+ * create byte offsets to be a diagonal of the matrix with 16 bit elements
+ * and 32 bit offsets
+ */
+void create_offsets_values_preds_16_32(void)
+{
+    unsigned short half_element = 0;
+    unsigned short half_element_masked = 0;
+    char letter = 'D';
+    char letter_masked = '$';
+
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        word_offsets[i] = i * (2 * MATRIX_SIZE + 2);
+
+        half_element = 0;
+        half_element_masked = 0;
+        for (int j = 0; j < 2; j++) {
+            half_element |= letter << j * 8;
+            half_element_masked |= letter_masked << j * 8;
+        }
+
+        half_values[i] = half_element;
+        half_values_acc[i] = ((i % 10) << 8) + (i % 10);
+        half_values_masked[i] = half_element_masked;
+
+        letter++;
+        /* reset to 'A' */
+        if (letter == 'P') {
+            letter = 'D';
+        }
+
+        half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0;
+    }
+}
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_16(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_values;
+
+    VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values);
+
+    sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter-accumulate the 16 bit elements using intrinsics */
+void vector_scatter_16_acc(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_values_acc;
+
+    VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values);
+
+    sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 16 bit elements using intrinsics */
+void vector_scatter_16_masked(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector values = *(HVX_Vector *)half_values_masked;
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+    VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values);
+
+    sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_32(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_values;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2];
+
+    VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo);
+    VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+
+    sync_scatter(vtcm.vscatter32);
+}
+
+/* scatter-acc the 32 bit elements using intrinsics */
+void vector_scatter_32_acc(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_values_acc;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2];
+
+    VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo);
+    VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi);
+
+    sync_scatter(vtcm.vscatter32);
+}
+
+/* scatter the 32 bit elements using intrinsics */
+void vector_scatter_32_masked(void)
+{
+    /* copy the offsets and values to vectors */
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector valueslo = *(HVX_Vector *)word_values_masked;
+    HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2];
+    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+    HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
+    HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
+
+    VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo,
+                       valueslo);
+    VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi,
+                       valueshi);
+
+    sync_scatter(vtcm.vscatter16);
+}
+
+/* scatter the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32(void)
+{
+    HVX_VectorPair offsets;
+    HVX_Vector values;
+
+    /* get the word offsets in a vector pair */
+    offsets = *(HVX_VectorPair *)word_offsets;
+
+    /* these values need to be shuffled for the scatter */
+    values = *(HVX_Vector *)half_values;
+    values = VSHUFF_H(values);
+
+    VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values);
+
+    sync_scatter(vtcm.vscatter16_32);
+}
+
+/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32_acc(void)
+{
+    HVX_VectorPair offsets;
+    HVX_Vector values;
+
+    /* get the word offsets in a vector pair */
+    offsets = *(HVX_VectorPair *)word_offsets;
+
+    /* these values need to be shuffled for the scatter */
+    values = *(HVX_Vector *)half_values_acc;
+    values = VSHUFF_H(values);
+
+    VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values);
+
+    sync_scatter(vtcm.vscatter16_32);
+}
+
+/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */
+void vector_scatter_16_32_masked(void)
+{
+    HVX_VectorPair offsets;
+    HVX_Vector values;
+    HVX_Vector pred_reg;
+
+    /* get the word offsets in a vector pair */
+    offsets = *(HVX_VectorPair *)word_offsets;
+
+    /* these values need to be shuffled for the scatter */
+    values = *(HVX_Vector *)half_values_masked;
+    values = VSHUFF_H(values);
+
+    pred_reg = *(HVX_Vector *)half_predicates;
+    pred_reg = VSHUFF_H(pred_reg);
+    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+    VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets,
+                          values);
+
+    sync_scatter(vtcm.vscatter16_32);
+}
+
+/* gather the elements from the scatter16 buffer */
+void vector_gather_16(void)
+{
+    HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+
+    VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets);
+
+    sync_gather(vgather);
+}
+
+static unsigned short gather_16_masked_init(void)
+{
+    char letter = '?';
+    return letter | (letter << 8);
+}
+
+void vector_gather_16_masked(void)
+{
+    HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16;
+    HVX_Vector offsets = *(HVX_Vector *)half_offsets;
+    HVX_Vector pred_reg = *(HVX_Vector *)half_predicates;
+    HVX_VectorPred preds = VAND_VAL(pred_reg, ~0);
+
+    *vgather = VSPLAT_H(gather_16_masked_init());
+    VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets);
+
+    sync_gather(vgather);
+}
+
+/* gather the elements from the scatter32 buffer */
+void vector_gather_32(void)
+{
+    HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
+    HVX_Vector *vgatherhi =
+        (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+
+    VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo);
+    VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi);
+
+    sync_gather(vgatherhi);
+}
+
+static unsigned int gather_32_masked_init(void)
+{
+    char letter = '?';
+    return letter | (letter << 8) | (letter << 16) | (letter << 24);
+}
+
+void vector_gather_32_masked(void)
+{
+    HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32;
+    HVX_Vector *vgatherhi =
+        (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2));
+    HVX_Vector offsetslo = *(HVX_Vector *)word_offsets;
+    HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2];
+    HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates;
+    HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0);
+    HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2];
+    HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0);
+
+    *vgatherlo = VSPLAT_H(gather_32_masked_init());
+    *vgatherhi = VSPLAT_H(gather_32_masked_init());
+    VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len,
+                      offsetslo);
+    VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len,
+                      offsetshi);
+
+    sync_gather(vgatherlo);
+    sync_gather(vgatherhi);
+}
+
+/* gather the elements from the scatter16_32 buffer */
+void vector_gather_16_32(void)
+{
+    HVX_Vector *vgather;
+    HVX_VectorPair offsets;
+    HVX_Vector values;
+
+    /* get the vtcm address to gather from */
+    vgather = (HVX_Vector *)&vtcm.vgather16_32;
+
+    /* get the word offsets in a vector pair */
+    offsets = *(HVX_VectorPair *)word_offsets;
+
+    VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets);
+
+    /* deal the elements to get the order back */
+    values = *(HVX_Vector *)vgather;
+    values = VDEAL_H(values);
+
+    /* write it back to vtcm address */
+    *(HVX_Vector *)vgather = values;
+}
+
+void vector_gather_16_32_masked(void)
+{
+    HVX_Vector *vgather;
+    HVX_VectorPair offsets;
+    HVX_Vector pred_reg;
+    HVX_VectorPred preds;
+    HVX_Vector values;
+
+    /* get the vtcm address to gather from */
+    vgather = (HVX_Vector *)&vtcm.vgather16_32;
+
+    /* get the word offsets in a vector pair */
+    offsets = *(HVX_VectorPair *)word_offsets;
+    pred_reg = *(HVX_Vector *)half_predicates;
+    pred_reg = VSHUFF_H(pred_reg);
+    preds = VAND_VAL(pred_reg, ~0);
+
+   *vgather = VSPLAT_H(gather_16_masked_init());
+   VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len,
+                        offsets);
+
+    /* deal the elements to get the order back */
+    values = *(HVX_Vector *)vgather;
+    values = VDEAL_H(values);
+
+    /* write it back to vtcm address */
+    *(HVX_Vector *)vgather = values;
+}
+
+static void check_buffer(const char *name, void *c, void *r, size_t size)
+{
+    char *check = (char *)c;
+    char *ref = (char *)r;
+    for (int i = 0; i < size; i++) {
+        if (check[i] != ref[i]) {
+            printf("ERROR %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i,
+                   check[i], check[i], ref[i], ref[i]);
+            err++;
+        }
+    }
+}
+
+/*
+ * These scalar functions are the C equivalents of the vector functions that
+ * use HVX
+ */
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16(unsigned short *vscatter16)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16[half_offsets[i] / 2] = half_values[i];
+    }
+}
+
+void check_scatter_16()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16_acc(unsigned short *vscatter16)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16[half_offsets[i] / 2] += half_values_acc[i];
+    }
+}
+
+void check_scatter_16_acc()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    scalar_scatter_16_acc(vscatter16_ref);
+    check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 16 bit elements using C */
+void scalar_scatter_16_masked(unsigned short *vscatter16)
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (half_predicates[i]) {
+            vscatter16[half_offsets[i] / 2] = half_values_masked[i];
+        }
+    }
+
+}
+
+void check_scatter_16_masked()
+{
+    memset(vscatter16_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16(vscatter16_ref);
+    scalar_scatter_16_acc(vscatter16_ref);
+    scalar_scatter_16_masked(vscatter16_ref);
+    check_buffer(__func__, vtcm.vscatter16, vscatter16_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32(unsigned int *vscatter32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter32[word_offsets[i] / 4] = word_values[i];
+    }
+}
+
+void check_scatter_32()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32_acc(unsigned int *vscatter32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter32[word_offsets[i] / 4] += word_values_acc[i];
+    }
+}
+
+void check_scatter_32_acc()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    scalar_scatter_32_acc(vscatter32_ref);
+    check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_32_masked(unsigned int *vscatter32)
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (word_predicates[i]) {
+            vscatter32[word_offsets[i] / 4] = word_values_masked[i];
+        }
+    }
+}
+
+void check_scatter_32_masked()
+{
+    memset(vscatter32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+    scalar_scatter_32(vscatter32_ref);
+    scalar_scatter_32_acc(vscatter32_ref);
+    scalar_scatter_32_masked(vscatter32_ref);
+    check_buffer(__func__, vtcm.vscatter32, vscatter32_ref,
+                  SCATTER_BUFFER_SIZE * sizeof(unsigned int));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_16_32(unsigned short *vscatter16_32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16_32[word_offsets[i] / 2] = half_values[i];
+    }
+}
+
+void check_scatter_16_32()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* scatter the 32 bit elements using C */
+void scalar_scatter_16_32_acc(unsigned short *vscatter16_32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vscatter16_32[word_offsets[i] / 2] += half_values_acc[i];
+    }
+}
+
+void check_scatter_16_32_acc()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    scalar_scatter_16_32_acc(vscatter16_32_ref);
+    check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+void scalar_scatter_16_32_masked(unsigned short *vscatter16_32)
+{
+    for (int i = 0; i < MATRIX_SIZE; i++) {
+        if (half_predicates[i]) {
+            vscatter16_32[word_offsets[i] / 2] = half_values_masked[i];
+        }
+    }
+}
+
+void check_scatter_16_32_masked()
+{
+    memset(vscatter16_32_ref, FILL_CHAR,
+           SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+    scalar_scatter_16_32(vscatter16_32_ref);
+    scalar_scatter_16_32_acc(vscatter16_32_ref);
+    scalar_scatter_16_32_masked(vscatter16_32_ref);
+    check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref,
+                 SCATTER_BUFFER_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16(unsigned short *vgather16)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2];
+    }
+}
+
+void check_gather_16()
+{
+      memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+      scalar_gather_16(vgather16_ref);
+      check_buffer(__func__, vtcm.vgather16, vgather16_ref,
+                   MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void scalar_gather_16_masked(unsigned short *vgather16)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (half_predicates[i]) {
+            vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2];
+        }
+    }
+}
+
+void check_gather_16_masked()
+{
+    memset(vgather16_ref, gather_16_masked_init(),
+           MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_16_masked(vgather16_ref);
+    check_buffer(__func__, vtcm.vgather16, vgather16_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_32(unsigned int *vgather32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4];
+    }
+}
+
+void check_gather_32(void)
+{
+    memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int));
+    scalar_gather_32(vgather32_ref);
+    check_buffer(__func__, vtcm.vgather32, vgather32_ref,
+                 MATRIX_SIZE * sizeof(unsigned int));
+}
+
+void scalar_gather_32_masked(unsigned int *vgather32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (word_predicates[i]) {
+            vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4];
+        }
+    }
+}
+
+
+void check_gather_32_masked(void)
+{
+    memset(vgather32_ref, gather_32_masked_init(),
+           MATRIX_SIZE * sizeof(unsigned int));
+    scalar_gather_32_masked(vgather32_ref);
+    check_buffer(__func__, vtcm.vgather32,
+                 vgather32_ref, MATRIX_SIZE * sizeof(unsigned int));
+}
+
+/* gather the elements from the scatter buffer using C */
+void scalar_gather_16_32(unsigned short *vgather16_32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2];
+    }
+}
+
+void check_gather_16_32(void)
+{
+    memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_16_32(vgather16_32_ref);
+    check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+void scalar_gather_16_32_masked(unsigned short *vgather16_32)
+{
+    for (int i = 0; i < MATRIX_SIZE; ++i) {
+        if (half_predicates[i]) {
+            vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2];
+        }
+    }
+
+}
+
+void check_gather_16_32_masked(void)
+{
+    memset(vgather16_32_ref, gather_16_masked_init(),
+           MATRIX_SIZE * sizeof(unsigned short));
+    scalar_gather_16_32_masked(vgather16_32_ref);
+    check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref,
+                 MATRIX_SIZE * sizeof(unsigned short));
+}
+
+/* print scatter16 buffer */
+void print_scatter16_buffer(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 16 bit scatter buffer");
+
+        for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+            if ((i % MATRIX_SIZE) == 0) {
+                printf("\n");
+            }
+            for (int j = 0; j < 2; j++) {
+                printf("%c", (char)((vtcm.vscatter16[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+/* print the gather 16 buffer */
+void print_gather_result_16(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 16 bit gather result\n");
+
+        for (int i = 0; i < MATRIX_SIZE; i++) {
+            for (int j = 0; j < 2; j++) {
+                printf("%c", (char)((vtcm.vgather16[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+/* print the scatter32 buffer */
+void print_scatter32_buffer(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 32 bit scatter buffer");
+
+        for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+            if ((i % MATRIX_SIZE) == 0) {
+                printf("\n");
+            }
+            for (int j = 0; j < 4; j++) {
+                printf("%c", (char)((vtcm.vscatter32[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+/* print the gather 32 buffer */
+void print_gather_result_32(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 32 bit gather result\n");
+
+        for (int i = 0; i < MATRIX_SIZE; i++) {
+            for (int j = 0; j < 4; j++) {
+                printf("%c", (char)((vtcm.vgather32[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+/* print the scatter16_32 buffer */
+void print_scatter16_32_buffer(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 16_32 bit scatter buffer");
+
+        for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) {
+            if ((i % MATRIX_SIZE) == 0) {
+                printf("\n");
+            }
+            for (int j = 0; j < 2; j++) {
+                printf("%c",
+                      (unsigned char)((vtcm.vscatter16_32[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+/* print the gather 16_32 buffer */
+void print_gather_result_16_32(void)
+{
+    if (PRINT_DATA) {
+        printf("\n\nPrinting the 16_32 bit gather result\n");
+
+        for (int i = 0; i < MATRIX_SIZE; i++) {
+            for (int j = 0; j < 2; j++) {
+                printf("%c",
+                       (unsigned char)((vtcm.vgather16_32[i] >> j * 8) & 0xff));
+            }
+            printf(" ");
+        }
+        printf("\n");
+    }
+}
+
+int main()
+{
+    prefill_vtcm_scratch();
+
+    /* 16 bit elements with 16 bit offsets */
+    create_offsets_values_preds_16();
+
+    vector_scatter_16();
+    print_scatter16_buffer();
+    check_scatter_16();
+
+    vector_gather_16();
+    print_gather_result_16();
+    check_gather_16();
+
+    vector_gather_16_masked();
+    print_gather_result_16();
+    check_gather_16_masked();
+
+    vector_scatter_16_acc();
+    print_scatter16_buffer();
+    check_scatter_16_acc();
+
+    vector_scatter_16_masked();
+    print_scatter16_buffer();
+    check_scatter_16_masked();
+
+    /* 32 bit elements with 32 bit offsets */
+    create_offsets_values_preds_32();
+
+    vector_scatter_32();
+    print_scatter32_buffer();
+    check_scatter_32();
+
+    vector_gather_32();
+    print_gather_result_32();
+    check_gather_32();
+
+    vector_gather_32_masked();
+    print_gather_result_32();
+    check_gather_32_masked();
+
+    vector_scatter_32_acc();
+    print_scatter32_buffer();
+    check_scatter_32_acc();
+
+    vector_scatter_32_masked();
+    print_scatter32_buffer();
+    check_scatter_32_masked();
+
+    /* 16 bit elements with 32 bit offsets */
+    create_offsets_values_preds_16_32();
+
+    vector_scatter_16_32();
+    print_scatter16_32_buffer();
+    check_scatter_16_32();
+
+    vector_gather_16_32();
+    print_gather_result_16_32();
+    check_gather_16_32();
+
+    vector_gather_16_32_masked();
+    print_gather_result_16_32();
+    check_gather_16_32_masked();
+
+    vector_scatter_16_32_acc();
+    print_scatter16_32_buffer();
+    check_scatter_16_32_acc();
+
+    vector_scatter_16_32_masked();
+    print_scatter16_32_buffer();
+    check_scatter_16_32_masked();
+
+    puts(err ? "FAIL" : "PASS");
+    return err;
+}
diff --git a/tests/tcg/hexagon/vector_add_int.c b/tests/tcg/hexagon/vector_add_int.c
new file mode 100644
index 00000000000..d6010ea14b5
--- /dev/null
+++ b/tests/tcg/hexagon/vector_add_int.c
@@ -0,0 +1,61 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+
+int gA[401];
+int gB[401];
+int gC[401];
+
+void vector_add_int()
+{
+  int i;
+  for (i = 0; i < 400; i++) {
+    gA[i] = gB[i] + gC[i];
+  }
+}
+
+int main()
+{
+  int error = 0;
+  int i;
+  for (i = 0; i < 400; i++) {
+    gB[i] = i * 2;
+    gC[i] = i * 3;
+  }
+  gA[400] = 17;
+  vector_add_int();
+  for (i = 0; i < 400; i++) {
+    if (gA[i] != i * 5) {
+        error++;
+        printf("ERROR: gB[%d] = %d\t", i, gB[i]);
+        printf("gC[%d] = %d\t", i, gC[i]);
+        printf("gA[%d] = %d\n", i, gA[i]);
+    }
+  }
+  if (gA[400] != 17) {
+    error++;
+    printf("ERROR: Overran the buffer\n");
+  }
+  if (!error) {
+    printf("PASS\n");
+    return 0;
+  } else {
+    printf("FAIL\n");
+    return 1;
+  }
+}
diff --git a/tests/tcg/hppa/Makefile.target b/tests/tcg/hppa/Makefile.target
index 8bf01966bd0..d0d5e0e2576 100644
--- a/tests/tcg/hppa/Makefile.target
+++ b/tests/tcg/hppa/Makefile.target
@@ -4,3 +4,11 @@
 
 # On parisc Linux supports 4K/16K/64K (but currently only 4k works)
 EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-16384 run-test-mmap-65536
+
+# This triggers failures for hppa-linux about 1% of the time
+# HPPA is the odd target that can't use the sigtramp page;
+# it requires the full vdso with dwarf2 unwind info.
+run-signals: signals
+	$(call skip-test, $<, "BROKEN awaiting vdso support")
+run-plugin-signals-with-%:
+	$(call skip-test, $<, "BROKEN awaiting vdso support")
diff --git a/tests/tcg/i386/Makefile.softmmu-target b/tests/tcg/i386/Makefile.softmmu-target
index fa9b1b9f902..9b9038d0bef 100644
--- a/tests/tcg/i386/Makefile.softmmu-target
+++ b/tests/tcg/i386/Makefile.softmmu-target
@@ -38,7 +38,7 @@ run-plugin-%-with-libinsn.so:
 	$(call run-test, $@, \
 	  $(QEMU) -monitor none -display none \
 		  -chardev file$(COMMA)path=$@.out$(COMMA)id=output \
-                  -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
+                  -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
 	    	  -d plugin -D $*-with-libinsn.so.pout \
 	   	  $(QEMU_OPTS) $*, \
 		  "$* on $(TARGET_NAME)")
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index f7efaab9182..38c10379af0 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -61,7 +61,7 @@ endif
 # non-inline runs will trigger the duplicate instruction heuristics in libinsn.so
 run-plugin-%-with-libinsn.so:
 	$(call run-test, $@, $(QEMU) $(QEMU_OPTS) \
-	       -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
+	       -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
 	       -d plugin -D $*-with-libinsn.so.pout $*, \
 		"$* (inline) on $(TARGET_NAME)")
 
diff --git a/tests/tcg/lm32/Makefile b/tests/tcg/lm32/Makefile
deleted file mode 100644
index 57e7363b2cf..00000000000
--- a/tests/tcg/lm32/Makefile
+++ /dev/null
@@ -1,106 +0,0 @@
--include ../../../config-host.mak
-
-CROSS=lm32-elf-
-
-SIM = qemu-system-lm32
-SIMFLAGS = -M lm32-evr -nographic -semihosting -net none -kernel
-
-CC      = $(CROSS)gcc
-AS      = $(CROSS)as
-AS      = $(CC) -x assembler
-SIZE    = $(CROSS)size
-LD      = $(CC)
-OBJCOPY = $(CROSS)objcopy
-
-TSRC_PATH = $(SRC_PATH)/tests/tcg/lm32
-
-LDFLAGS = -T$(TSRC_PATH)/linker.ld
-ASFLAGS += -Wa,-I,$(TSRC_PATH)/
-
-CRT        = crt.o
-HELPER     = helper.o
-TESTCASES += test_add.tst
-TESTCASES += test_addi.tst
-TESTCASES += test_and.tst
-TESTCASES += test_andhi.tst
-TESTCASES += test_andi.tst
-TESTCASES += test_b.tst
-TESTCASES += test_be.tst
-TESTCASES += test_bg.tst
-TESTCASES += test_bge.tst
-TESTCASES += test_bgeu.tst
-TESTCASES += test_bgu.tst
-TESTCASES += test_bi.tst
-TESTCASES += test_bne.tst
-TESTCASES += test_break.tst
-TESTCASES += test_bret.tst
-TESTCASES += test_call.tst
-TESTCASES += test_calli.tst
-TESTCASES += test_cmpe.tst
-TESTCASES += test_cmpei.tst
-TESTCASES += test_cmpg.tst
-TESTCASES += test_cmpgi.tst
-TESTCASES += test_cmpge.tst
-TESTCASES += test_cmpgei.tst
-TESTCASES += test_cmpgeu.tst
-TESTCASES += test_cmpgeui.tst
-TESTCASES += test_cmpgu.tst
-TESTCASES += test_cmpgui.tst
-TESTCASES += test_cmpne.tst
-TESTCASES += test_cmpnei.tst
-TESTCASES += test_divu.tst
-TESTCASES += test_eret.tst
-TESTCASES += test_lb.tst
-TESTCASES += test_lbu.tst
-TESTCASES += test_lh.tst
-TESTCASES += test_lhu.tst
-TESTCASES += test_lw.tst
-TESTCASES += test_modu.tst
-TESTCASES += test_mul.tst
-TESTCASES += test_muli.tst
-TESTCASES += test_nor.tst
-TESTCASES += test_nori.tst
-TESTCASES += test_or.tst
-TESTCASES += test_ori.tst
-TESTCASES += test_orhi.tst
-#TESTCASES += test_rcsr.tst
-TESTCASES += test_ret.tst
-TESTCASES += test_sb.tst
-TESTCASES += test_scall.tst
-TESTCASES += test_sextb.tst
-TESTCASES += test_sexth.tst
-TESTCASES += test_sh.tst
-TESTCASES += test_sl.tst
-TESTCASES += test_sli.tst
-TESTCASES += test_sr.tst
-TESTCASES += test_sri.tst
-TESTCASES += test_sru.tst
-TESTCASES += test_srui.tst
-TESTCASES += test_sub.tst
-TESTCASES += test_sw.tst
-#TESTCASES += test_wcsr.tst
-TESTCASES += test_xnor.tst
-TESTCASES += test_xnori.tst
-TESTCASES += test_xor.tst
-TESTCASES += test_xori.tst
-
-all: build
-
-%.o: $(TSRC_PATH)/%.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-%.o: $(TSRC_PATH)/%.S
-	$(AS) $(ASFLAGS) -c $< -o $@
-
-%.tst: %.o $(TSRC_PATH)/macros.inc $(CRT) $(HELPER)
-	$(LD) $(LDFLAGS) $(NOSTDFLAGS) $(CRT) $(HELPER) $< -o $@
-
-build: $(TESTCASES)
-
-check: $(TESTCASES:test_%.tst=check_%)
-
-check_%: test_%.tst
-	@$(SIM) $(SIMFLAGS) $<
-
-clean:
-	$(RM) -fr $(TESTCASES) $(CRT) $(HELPER)
diff --git a/tests/tcg/lm32/crt.S b/tests/tcg/lm32/crt.S
deleted file mode 100644
index fc437a3de13..00000000000
--- a/tests/tcg/lm32/crt.S
+++ /dev/null
@@ -1,84 +0,0 @@
-.text
-.global _start
-
-_start:
-_reset_handler:
-	xor r0, r0, r0
-	mvhi r1, hi(_start)
-	ori r1, r1, lo(_start)
-	wcsr eba, r1
-	wcsr deba, r1
-	mvhi sp, hi(_fstack)
-	ori sp, sp, lo(_fstack)
-	bi _main
-
-_breakpoint_handler:
-	ori r25, r25, 1
-	addi ra, ba, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_instruction_bus_error_handler:
-	ori r25, r25, 2
-	addi ra, ea, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_watchpoint_handler:
-	ori r25, r25, 4
-	addi ra, ba, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_data_bus_error_handler:
-	ori r25, r25, 8
-	addi ra, ea, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_divide_by_zero_handler:
-	ori r25, r25, 16
-	addi ra, ea, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_interrupt_handler:
-	ori r25, r25, 32
-	addi ra, ea, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
-_system_call_handler:
-	ori r25, r25, 64
-	addi ra, ea, 4
-	ret
-	nop
-	nop
-	nop
-	nop
-	nop
-
diff --git a/tests/tcg/lm32/helper.S b/tests/tcg/lm32/helper.S
deleted file mode 100644
index 3351d41e842..00000000000
--- a/tests/tcg/lm32/helper.S
+++ /dev/null
@@ -1,65 +0,0 @@
-.text
-.global _start, _write, _exit
-.global _tc_fail, _tc_pass
-
-_write:
-	addi sp, sp, -4
-	sw (sp+4), r8
-	mvi r8, 5
-	scall
-	lw r8, (sp+4)
-	addi sp, sp, 4
-	ret
-
-_exit:
-	mvi r8, 1
-	scall
-1:
-	bi 1b
-
-_tc_pass:
-.data
-1:
-	.ascii "OK\n"
-2:
-.text
-	addi sp, sp, -16
-	sw (sp+4), ra
-	sw (sp+8), r1
-	sw (sp+12), r2
-	sw (sp+16), r3
-	mvi r1, 1
-	mvhi r2, hi(1b)
-	ori r2, r2, lo(1b)
-	mvi r3, (2b - 1b)
-	calli _write
-	lw r3, (sp+16)
-	lw r2, (sp+12)
-	lw r1, (sp+8)
-	lw ra, (sp+4)
-	addi sp, sp, 16
-	ret
-
-_tc_fail:
-.data
-1:
-	.ascii "FAILED\n"
-2:
-.text
-	addi sp, sp, -16
-	sw (sp+4), ra
-	sw (sp+8), r1
-	sw (sp+12), r2
-	sw (sp+16), r3
-	sw (sp+4), ra
-	mvi r1, 1
-	mvhi r2, hi(1b)
-	ori r2, r2, lo(1b)
-	mvi r3, (2b - 1b)
-	calli _write
-	lw r3, (sp+16)
-	lw r2, (sp+12)
-	lw r1, (sp+8)
-	lw ra, (sp+4)
-	addi sp, sp, 16
-	ret
diff --git a/tests/tcg/lm32/linker.ld b/tests/tcg/lm32/linker.ld
deleted file mode 100644
index 52d43a4c744..00000000000
--- a/tests/tcg/lm32/linker.ld
+++ /dev/null
@@ -1,55 +0,0 @@
-OUTPUT_FORMAT("elf32-lm32")
-ENTRY(_start)
-
-__DYNAMIC = 0;
-
-MEMORY {
-	ram : ORIGIN = 0x08000000, LENGTH = 0x04000000  /* 64M */
-}
-
-SECTIONS
-{
-	.text :
-	{
-		_ftext = .;
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-		_etext = .;
-	} > ram
-
-	.rodata :
-	{
-		. = ALIGN(4);
-		_frodata = .;
-		*(.rodata .rodata.* .gnu.linkonce.r.*)
-		*(.rodata1)
-		_erodata = .;
-	} > ram
-
-	.data :
-	{
-		. = ALIGN(4);
-		_fdata = .;
-		*(.data .data.* .gnu.linkonce.d.*)
-		*(.data1)
-		_gp = ALIGN(16);
-		*(.sdata .sdata.* .gnu.linkonce.s.*)
-		_edata = .;
-	} > ram
-
-	.bss :
-	{
-		. = ALIGN(4);
-		_fbss = .;
-		*(.dynsbss)
-		*(.sbss .sbss.* .gnu.linkonce.sb.*)
-		*(.scommon)
-		*(.dynbss)
-		*(.bss .bss.* .gnu.linkonce.b.*)
-		*(COMMON)
-		_ebss = .;
-		_end = .;
-	} > ram
-}
-
-PROVIDE(_fstack = ORIGIN(ram) + LENGTH(ram) - 4);
-
diff --git a/tests/tcg/lm32/macros.inc b/tests/tcg/lm32/macros.inc
deleted file mode 100644
index 360ad53c9fc..00000000000
--- a/tests/tcg/lm32/macros.inc
+++ /dev/null
@@ -1,90 +0,0 @@
-
-.equ MAX_TESTNAME_LEN, 32
-.macro test_name name
-	.data
-tn_\name:
-	.ascii "\name"
-	.space MAX_TESTNAME_LEN - (. - tn_\name), ' '
-	.text
-	.global \name
-\name:
-	addi sp, sp, -12
-	sw (sp+4), r1
-	sw (sp+8), r2
-	sw (sp+12), r3
-	mvi r1, 1
-	mvhi r2, hi(tn_\name)
-	ori r2, r2, lo(tn_\name)
-	mvi r3, MAX_TESTNAME_LEN
-	calli _write
-	lw r3, (sp+12)
-	lw r2, (sp+8)
-	lw r1, (sp+4)
-	addi sp, sp, 12
-.endm
-
-.macro load reg val
-	mvhi \reg, hi(\val)
-	ori \reg, \reg, lo(\val)
-.endm
-
-.macro tc_pass
-	calli _tc_pass
-.endm
-
-.macro tc_fail
-	addi r12, r12, 1
-	calli _tc_fail
-.endm
-
-.macro check_r3 val
-	mvhi r13, hi(\val)
-	ori r13, r13, lo(\val)
-	be r3, r13, 1f
-	tc_fail
-	bi 2f
-1:
-	tc_pass
-2:
-.endm
-
-.macro check_mem adr val
-	mvhi r13, hi(\adr)
-	ori r13, r13, lo(\adr)
-	mvhi r14, hi(\val)
-	ori r14, r14, lo(\val)
-	lw r13, (r13+0)
-	be r13, r14, 1f
-	tc_fail
-	bi 2f
-1:
-	tc_pass
-2:
-.endm
-
-.macro check_excp excp
-	andi r13, r25, \excp
-	bne r13, r0, 1f
-	tc_fail
-	bi 2f
-1:
-	tc_pass
-2:
-.endm
-
-.macro start
-	.global _main
-	.text
-_main:
-	mvi r12, 0
-.endm
-
-.macro end
-	mv r1, r12
-	calli _exit
-.endm
-
-# base +
-#  0  ctrl
-#  4  pass/fail
-#  8  ptr to test name
diff --git a/tests/tcg/lm32/test_add.S b/tests/tcg/lm32/test_add.S
deleted file mode 100644
index 030ad197bb1..00000000000
--- a/tests/tcg/lm32/test_add.S
+++ /dev/null
@@ -1,75 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ADD_1
-mvi r1, 0
-mvi r2, 0
-add r3, r1, r2
-check_r3 0
-
-test_name ADD_2
-mvi r1, 0
-mvi r2, 1
-add r3, r1, r2
-check_r3 1
-
-test_name ADD_3
-mvi r1, 1
-mvi r2, 0
-add r3, r1, r2
-check_r3 1
-
-test_name ADD_4
-mvi r1, 1
-mvi r2, -1
-add r3, r1, r2
-check_r3 0
-
-test_name ADD_5
-mvi r1, -1
-mvi r2, 1
-add r3, r1, r2
-check_r3 0
-
-test_name ADD_6
-mvi r1, -1
-mvi r2, 0
-add r3, r1, r2
-check_r3 -1
-
-test_name ADD_7
-mvi r1, 0
-mvi r2, -1
-add r3, r1, r2
-check_r3 -1
-
-test_name ADD_8
-mvi r3, 2
-add r3, r3, r3
-check_r3 4
-
-test_name ADD_9
-mvi r1, 4
-mvi r3, 2
-add r3, r1, r3
-check_r3 6
-
-test_name ADD_10
-mvi r1, 4
-mvi r3, 2
-add r3, r3, r1
-check_r3 6
-
-test_name ADD_11
-mvi r1, 4
-add r3, r1, r1
-check_r3 8
-
-test_name ADD_12
-load r1 0x12345678
-load r2 0xabcdef97
-add r3, r1, r2
-check_r3 0xbe02460f
-
-end
diff --git a/tests/tcg/lm32/test_addi.S b/tests/tcg/lm32/test_addi.S
deleted file mode 100644
index 68e766d1e57..00000000000
--- a/tests/tcg/lm32/test_addi.S
+++ /dev/null
@@ -1,56 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ADDI_1
-mvi r1, 0
-addi r3, r1, 0
-check_r3 0
-
-test_name ADDI_2
-mvi r1, 0
-addi r3, r1, 1
-check_r3 1
-
-test_name ADDI_3
-mvi r1, 1
-addi r3, r1, 0
-check_r3 1
-
-test_name ADDI_4
-mvi r1, 1
-addi r3, r1, -1
-check_r3 0
-
-test_name ADDI_5
-mvi r1, -1
-addi r3, r1, 1
-check_r3 0
-
-test_name ADDI_6
-mvi r1, -1
-addi r3, r1, 0
-check_r3 -1
-
-test_name ADDI_7
-mvi r1, 0
-addi r3, r1, -1
-check_r3 -1
-
-test_name ADDI_8
-mvi r3, 4
-addi r3, r3, 4
-check_r3 8
-
-test_name ADDI_9
-mvi r3, 4
-addi r3, r3, -4
-check_r3 0
-
-test_name ADDI_10
-mvi r3, 4
-addi r3, r3, -5
-check_r3 -1
-
-end
-
diff --git a/tests/tcg/lm32/test_and.S b/tests/tcg/lm32/test_and.S
deleted file mode 100644
index 80962ce7a29..00000000000
--- a/tests/tcg/lm32/test_and.S
+++ /dev/null
@@ -1,45 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name AND_1
-mvi r1, 0
-mvi r2, 0
-and r3, r1, r2
-check_r3 0
-
-test_name AND_2
-mvi r1, 0
-mvi r2, 1
-and r3, r1, r2
-check_r3 0
-
-test_name AND_3
-mvi r1, 1
-mvi r2, 1
-and r3, r1, r2
-check_r3 1
-
-test_name AND_4
-mvi r3, 7
-and r3, r3, r3
-check_r3 7
-
-test_name AND_5
-mvi r1, 7
-and r3, r1, r1
-check_r3 7
-
-test_name AND_6
-mvi r1, 7
-mvi r3, 0
-and r3, r1, r3
-check_r3 0
-
-test_name AND_7
-load r1 0xaa55aa55
-load r2 0x55aa55aa
-and r3, r1, r2
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_andhi.S b/tests/tcg/lm32/test_andhi.S
deleted file mode 100644
index 4f73af550bc..00000000000
--- a/tests/tcg/lm32/test_andhi.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ANDHI_1
-mvi r1, 0
-andhi r3, r1, 0
-check_r3 0
-
-test_name ANDHI_2
-mvi r1, 1
-andhi r3, r1, 1
-check_r3 0
-
-test_name ANDHI_3
-load r1 0x000f0000
-andhi r3, r1, 1
-check_r3 0x00010000
-
-test_name ANDHI_4
-load r1 0xffffffff
-andhi r3, r1, 0xffff
-check_r3 0xffff0000
-
-test_name ANDHI_5
-load r1 0xffffffff
-andhi r3, r1, 0
-check_r3 0
-
-test_name ANDHI_6
-load r3 0x55aaffff
-andhi r3, r3, 0xaaaa
-check_r3 0x00aa0000
-
-end
diff --git a/tests/tcg/lm32/test_andi.S b/tests/tcg/lm32/test_andi.S
deleted file mode 100644
index da1b0a32f78..00000000000
--- a/tests/tcg/lm32/test_andi.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ANDI_1
-mvi r1, 0
-andi r3, r1, 0
-check_r3 0
-
-test_name ANDI_2
-mvi r1, 1
-andi r3, r1, 1
-check_r3 1
-
-test_name ANDI_3
-load r1 0x000f0000
-andi r3, r1, 1
-check_r3 0
-
-test_name ANDI_4
-load r1 0xffffffff
-andi r3, r1, 0xffff
-check_r3 0xffff
-
-test_name ANDI_5
-load r1 0xffffffff
-andi r3, r1, 0
-check_r3 0
-
-test_name ANDI_6
-load r3 0xffff55aa
-andi r3, r3, 0xaaaa
-check_r3 0x000000aa
-
-end
diff --git a/tests/tcg/lm32/test_b.S b/tests/tcg/lm32/test_b.S
deleted file mode 100644
index 98172d8a951..00000000000
--- a/tests/tcg/lm32/test_b.S
+++ /dev/null
@@ -1,13 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name B_1
-load r1 jump
-b r1
-tc_fail
-end
-
-jump:
-tc_pass
-end
diff --git a/tests/tcg/lm32/test_be.S b/tests/tcg/lm32/test_be.S
deleted file mode 100644
index 635cabacad6..00000000000
--- a/tests/tcg/lm32/test_be.S
+++ /dev/null
@@ -1,48 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BE_1
-mvi r1, 0
-mvi r2, 0
-be r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BE_2
-mvi r1, 1
-mvi r2, 0
-be r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BE_3
-mvi r1, 0
-mvi r2, 1
-be r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-bi 2f
-1:
-tc_pass
-bi 3f
-2:
-test_name BE_4
-mvi r1, 1
-mvi r2, 1
-be r1, r2, 1b
-tc_fail
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_bg.S b/tests/tcg/lm32/test_bg.S
deleted file mode 100644
index 81823c23043..00000000000
--- a/tests/tcg/lm32/test_bg.S
+++ /dev/null
@@ -1,78 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BG_1
-mvi r1, 0
-mvi r2, 0
-bg r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BG_2
-mvi r1, 1
-mvi r2, 0
-bg r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BG_3
-mvi r1, 0
-mvi r2, 1
-bg r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BG_4
-mvi r1, 0
-mvi r2, -1
-bg r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BG_5
-mvi r1, -1
-mvi r2, 0
-bg r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BG_6
-mvi r1, -1
-mvi r2, -1
-bg r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-bi 2f
-1:
-tc_pass
-bi 3f
-2:
-test_name BG_7
-mvi r1, 1
-mvi r2, 0
-bg r1, r2, 1b
-tc_fail
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_bge.S b/tests/tcg/lm32/test_bge.S
deleted file mode 100644
index 6684d15a55f..00000000000
--- a/tests/tcg/lm32/test_bge.S
+++ /dev/null
@@ -1,78 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BGE_1
-mvi r1, 0
-mvi r2, 0
-bge r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGE_2
-mvi r1, 1
-mvi r2, 0
-bge r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGE_3
-mvi r1, 0
-mvi r2, 1
-bge r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGE_4
-mvi r1, 0
-mvi r2, -1
-bge r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGE_5
-mvi r1, -1
-mvi r2, 0
-bge r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGE_6
-mvi r1, -1
-mvi r2, -1
-bge r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-bi 2f
-1:
-tc_pass
-bi 3f
-2:
-test_name BGE_7
-mvi r1, 1
-mvi r2, 0
-bge r1, r2, 1b
-tc_fail
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_bgeu.S b/tests/tcg/lm32/test_bgeu.S
deleted file mode 100644
index be440308fd8..00000000000
--- a/tests/tcg/lm32/test_bgeu.S
+++ /dev/null
@@ -1,78 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BGEU_1
-mvi r1, 0
-mvi r2, 0
-bgeu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGEU_2
-mvi r1, 1
-mvi r2, 0
-bgeu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGEU_3
-mvi r1, 0
-mvi r2, 1
-bgeu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGEU_4
-mvi r1, 0
-mvi r2, -1
-bgeu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGEU_5
-mvi r1, -1
-mvi r2, 0
-bgeu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGEU_6
-mvi r1, -1
-mvi r2, -1
-bgeu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-bi 2f
-1:
-tc_pass
-bi 3f
-2:
-test_name BGEU_7
-mvi r1, 1
-mvi r2, 0
-bgeu r1, r2, 1b
-tc_fail
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_bgu.S b/tests/tcg/lm32/test_bgu.S
deleted file mode 100644
index 8cc695b310b..00000000000
--- a/tests/tcg/lm32/test_bgu.S
+++ /dev/null
@@ -1,78 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BGU_1
-mvi r1, 0
-mvi r2, 0
-bgu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGU_2
-mvi r1, 1
-mvi r2, 0
-bgu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGU_3
-mvi r1, 0
-mvi r2, 1
-bgu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGU_4
-mvi r1, 0
-mvi r2, -1
-bgu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BGU_5
-mvi r1, -1
-mvi r2, 0
-bgu r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BGU_6
-mvi r1, -1
-mvi r2, -1
-bgu r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-bi 2f
-1:
-tc_pass
-bi 3f
-2:
-test_name BGU_7
-mvi r1, 1
-mvi r2, 0
-bgu r1, r2, 1b
-tc_fail
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_bi.S b/tests/tcg/lm32/test_bi.S
deleted file mode 100644
index a1fbd6fc075..00000000000
--- a/tests/tcg/lm32/test_bi.S
+++ /dev/null
@@ -1,23 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BI_1
-bi jump
-tc_fail
-end
-
-jump_back:
-tc_pass
-end
-
-jump:
-tc_pass
-
-test_name BI_2
-bi jump_back
-tc_fail
-
-end
-
-
diff --git a/tests/tcg/lm32/test_bne.S b/tests/tcg/lm32/test_bne.S
deleted file mode 100644
index 871a006755c..00000000000
--- a/tests/tcg/lm32/test_bne.S
+++ /dev/null
@@ -1,48 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BNE_1
-mvi r1, 0
-mvi r2, 0
-bne r1, r2, 1f
-tc_pass
-bi 2f
-1:
-tc_fail
-2:
-
-test_name BNE_2
-mvi r1, 1
-mvi r2, 0
-bne r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-test_name BNE_3
-mvi r1, 0
-mvi r2, 1
-bne r1, r2, 1f
-tc_fail
-bi 2f
-1:
-tc_pass
-2:
-
-bi 2f
-1:
-tc_fail
-bi 3f
-2:
-test_name BNE_4
-mvi r1, 1
-mvi r2, 1
-bne r1, r2, 1b
-tc_pass
-3:
-
-end
-
diff --git a/tests/tcg/lm32/test_break.S b/tests/tcg/lm32/test_break.S
deleted file mode 100644
index 0384fc6128f..00000000000
--- a/tests/tcg/lm32/test_break.S
+++ /dev/null
@@ -1,20 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BREAK_1
-mvi r1, 1
-wcsr IE, r1
-insn:
-break
-check_excp 1
-
-test_name BREAK_2
-mv r3, ba
-check_r3 insn
-
-test_name BREAK_3
-rcsr r3, IE
-check_r3 4
-
-end
diff --git a/tests/tcg/lm32/test_bret.S b/tests/tcg/lm32/test_bret.S
deleted file mode 100644
index 645210e4344..00000000000
--- a/tests/tcg/lm32/test_bret.S
+++ /dev/null
@@ -1,38 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name BRET_1
-mvi r1, 4
-wcsr IE, r1
-load ba mark
-bret
-tc_fail
-bi 1f
-
-mark:
-tc_pass
-
-1:
-test_name BRET_2
-rcsr r3, IE
-check_r3 5
-
-test_name BRET_3
-mvi r1, 0
-wcsr IE, r1
-load ba mark2
-bret
-tc_fail
-bi 1f
-
-mark2:
-tc_pass
-
-1:
-test_name BRET_4
-rcsr r3, IE
-check_r3 0
-
-end
-
diff --git a/tests/tcg/lm32/test_call.S b/tests/tcg/lm32/test_call.S
deleted file mode 100644
index 1b91a5f2be8..00000000000
--- a/tests/tcg/lm32/test_call.S
+++ /dev/null
@@ -1,16 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CALL_1
-load r1 mark
-call r1
-return:
-
-tc_fail
-end
-
-mark:
-mv r3, ra
-check_r3 return
-end
diff --git a/tests/tcg/lm32/test_calli.S b/tests/tcg/lm32/test_calli.S
deleted file mode 100644
index 1d87ae6e21c..00000000000
--- a/tests/tcg/lm32/test_calli.S
+++ /dev/null
@@ -1,15 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CALLI_1
-calli mark
-return:
-
-tc_fail
-end
-
-mark:
-mv r3, ra
-check_r3 return
-end
diff --git a/tests/tcg/lm32/test_cmpe.S b/tests/tcg/lm32/test_cmpe.S
deleted file mode 100644
index 60a885500b9..00000000000
--- a/tests/tcg/lm32/test_cmpe.S
+++ /dev/null
@@ -1,40 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPE_1
-mvi r1, 0
-mvi r2, 0
-cmpe r3, r1, r2
-check_r3 1
-
-test_name CMPE_2
-mvi r1, 0
-mvi r2, 1
-cmpe r3, r1, r2
-check_r3 0
-
-test_name CMPE_3
-mvi r1, 1
-mvi r2, 0
-cmpe r3, r1, r2
-check_r3 0
-
-test_name CMPE_4
-mvi r3, 0
-mvi r2, 1
-cmpe r3, r3, r2
-check_r3 0
-
-test_name CMPE_5
-mvi r3, 0
-mvi r2, 0
-cmpe r3, r3, r2
-check_r3 1
-
-test_name CMPE_6
-mvi r3, 0
-cmpe r3, r3, r3
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_cmpei.S b/tests/tcg/lm32/test_cmpei.S
deleted file mode 100644
index c3d3566ad33..00000000000
--- a/tests/tcg/lm32/test_cmpei.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPEI_1
-mvi r1, 0
-cmpei r3, r1, 0
-check_r3 1
-
-test_name CMPEI_2
-mvi r1, 0
-cmpei r3, r1, 1
-check_r3 0
-
-test_name CMPEI_3
-mvi r1, 1
-cmpei r3, r1, 0
-check_r3 0
-
-test_name CMPEI_4
-load r1 0xffffffff
-cmpei r3, r1, -1
-check_r3 1
-
-test_name CMPEI_5
-mvi r3, 0
-cmpei r3, r3, 0
-check_r3 1
-
-test_name CMPEI_6
-mvi r3, 0
-cmpei r3, r3, 1
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpg.S b/tests/tcg/lm32/test_cmpg.S
deleted file mode 100644
index 012407874cd..00000000000
--- a/tests/tcg/lm32/test_cmpg.S
+++ /dev/null
@@ -1,64 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPG_1
-mvi r1, 0
-mvi r2, 0
-cmpg r3, r1, r2
-check_r3 0
-
-test_name CMPG_2
-mvi r1, 0
-mvi r2, 1
-cmpg r3, r1, r2
-check_r3 0
-
-test_name CMPG_3
-mvi r1, 1
-mvi r2, 0
-cmpg r3, r1, r2
-check_r3 1
-
-test_name CMPG_4
-mvi r1, 1
-mvi r2, 1
-cmpg r3, r1, r2
-check_r3 0
-
-test_name CMPG_5
-mvi r1, 0
-mvi r2, -1
-cmpg r3, r1, r2
-check_r3 1
-
-test_name CMPG_6
-mvi r1, -1
-mvi r2, 0
-cmpg r3, r1, r2
-check_r3 0
-
-test_name CMPG_7
-mvi r1, -1
-mvi r2, -1
-cmpg r3, r1, r2
-check_r3 0
-
-test_name CMPG_8
-mvi r3, 0
-mvi r2, 1
-cmpg r3, r3, r2
-check_r3 0
-
-test_name CMPG_9
-mvi r3, 1
-mvi r2, 0
-cmpg r3, r3, r2
-check_r3 1
-
-test_name CMPG_10
-mvi r3, 0
-cmpg r3, r3, r3
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpge.S b/tests/tcg/lm32/test_cmpge.S
deleted file mode 100644
index 84620a00e35..00000000000
--- a/tests/tcg/lm32/test_cmpge.S
+++ /dev/null
@@ -1,64 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGE_1
-mvi r1, 0
-mvi r2, 0
-cmpge r3, r1, r2
-check_r3 1
-
-test_name CMPGE_2
-mvi r1, 0
-mvi r2, 1
-cmpge r3, r1, r2
-check_r3 0
-
-test_name CMPGE_3
-mvi r1, 1
-mvi r2, 0
-cmpge r3, r1, r2
-check_r3 1
-
-test_name CMPGE_4
-mvi r1, 1
-mvi r2, 1
-cmpge r3, r1, r2
-check_r3 1
-
-test_name CMPGE_5
-mvi r1, 0
-mvi r2, -1
-cmpge r3, r1, r2
-check_r3 1
-
-test_name CMPGE_6
-mvi r1, -1
-mvi r2, 0
-cmpge r3, r1, r2
-check_r3 0
-
-test_name CMPGE_7
-mvi r1, -1
-mvi r2, -1
-cmpge r3, r1, r2
-check_r3 1
-
-test_name CMPGE_8
-mvi r3, 0
-mvi r2, 1
-cmpge r3, r3, r2
-check_r3 0
-
-test_name CMPGE_9
-mvi r3, 1
-mvi r2, 0
-cmpge r3, r3, r2
-check_r3 1
-
-test_name CMPGE_10
-mvi r3, 0
-cmpge r3, r3, r3
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_cmpgei.S b/tests/tcg/lm32/test_cmpgei.S
deleted file mode 100644
index 6e388a2a350..00000000000
--- a/tests/tcg/lm32/test_cmpgei.S
+++ /dev/null
@@ -1,70 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGEI_1
-mvi r1, 0
-cmpgei r3, r1, 0
-check_r3 1
-
-test_name CMPGEI_2
-mvi r1, 0
-cmpgei r3, r1, 1
-check_r3 0
-
-test_name CMPGEI_3
-mvi r1, 1
-cmpgei r3, r1, 0
-check_r3 1
-
-test_name CMPGEI_4
-mvi r1, 1
-cmpgei r3, r1, 1
-check_r3 1
-
-test_name CMPGEI_5
-mvi r1, 0
-cmpgei r3, r1, -1
-check_r3 1
-
-test_name CMPGEI_6
-mvi r1, -1
-cmpgei r3, r1, 0
-check_r3 0
-
-test_name CMPGEI_7
-mvi r1, -1
-cmpgei r3, r1, -1
-check_r3 1
-
-test_name CMPGEI_8
-mvi r3, 0
-cmpgei r3, r3, 1
-check_r3 0
-
-test_name CMPGEI_9
-mvi r3, 1
-cmpgei r3, r3, 0
-check_r3 1
-
-test_name CMPGEI_10
-mvi r3, 0
-cmpgei r3, r3, 0
-check_r3 1
-
-test_name CMPGEI_11
-mvi r1, 0
-cmpgei r3, r1, -32768
-check_r3 1
-
-test_name CMPGEI_12
-mvi r1, -1
-cmpgei r3, r1, -32768
-check_r3 1
-
-test_name CMPGEI_13
-mvi r1, -32768
-cmpgei r3, r1, -32768
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_cmpgeu.S b/tests/tcg/lm32/test_cmpgeu.S
deleted file mode 100644
index 2110ccb6b79..00000000000
--- a/tests/tcg/lm32/test_cmpgeu.S
+++ /dev/null
@@ -1,64 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGEU_1
-mvi r1, 0
-mvi r2, 0
-cmpgeu r3, r1, r2
-check_r3 1
-
-test_name CMPGEU_2
-mvi r1, 0
-mvi r2, 1
-cmpgeu r3, r1, r2
-check_r3 0
-
-test_name CMPGEU_3
-mvi r1, 1
-mvi r2, 0
-cmpgeu r3, r1, r2
-check_r3 1
-
-test_name CMPGEU_4
-mvi r1, 1
-mvi r2, 1
-cmpgeu r3, r1, r2
-check_r3 1
-
-test_name CMPGEU_5
-mvi r1, 0
-mvi r2, -1
-cmpgeu r3, r1, r2
-check_r3 0
-
-test_name CMPGEU_6
-mvi r1, -1
-mvi r2, 0
-cmpgeu r3, r1, r2
-check_r3 1
-
-test_name CMPGEU_7
-mvi r1, -1
-mvi r2, -1
-cmpgeu r3, r1, r2
-check_r3 1
-
-test_name CMPGEU_8
-mvi r3, 0
-mvi r2, 1
-cmpgeu r3, r3, r2
-check_r3 0
-
-test_name CMPGEU_9
-mvi r3, 1
-mvi r2, 0
-cmpgeu r3, r3, r2
-check_r3 1
-
-test_name CMPGEU_10
-mvi r3, 0
-cmpgeu r3, r3, r3
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_cmpgeui.S b/tests/tcg/lm32/test_cmpgeui.S
deleted file mode 100644
index 3866d96cb78..00000000000
--- a/tests/tcg/lm32/test_cmpgeui.S
+++ /dev/null
@@ -1,70 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGEUI_1
-mvi r1, 0
-cmpgeui r3, r1, 0
-check_r3 1
-
-test_name CMPGEUI_2
-mvi r1, 0
-cmpgeui r3, r1, 1
-check_r3 0
-
-test_name CMPGEUI_3
-mvi r1, 1
-cmpgeui r3, r1, 0
-check_r3 1
-
-test_name CMPGEUI_4
-mvi r1, 1
-cmpgeui r3, r1, 1
-check_r3 1
-
-test_name CMPGEUI_5
-mvi r1, 0
-cmpgeui r3, r1, 0xffff
-check_r3 0
-
-test_name CMPGEUI_6
-mvi r1, -1
-cmpgeui r3, r1, 0
-check_r3 1
-
-test_name CMPGEUI_7
-mvi r1, -1
-cmpgeui r3, r1, 0xffff
-check_r3 1
-
-test_name CMPGEUI_8
-mvi r3, 0
-cmpgeui r3, r3, 1
-check_r3 0
-
-test_name CMPGEUI_9
-mvi r3, 1
-cmpgeui r3, r3, 0
-check_r3 1
-
-test_name CMPGEUI_10
-mvi r3, 0
-cmpgeui r3, r3, 0
-check_r3 1
-
-test_name CMPGEUI_11
-mvi r1, 0
-cmpgeui r3, r1, 0x8000
-check_r3 0
-
-test_name CMPGEUI_12
-mvi r1, -1
-cmpgeui r3, r1, 0x8000
-check_r3 1
-
-test_name CMPGEUI_13
-ori r1, r0, 0x8000
-cmpgeui r3, r1, 0x8000
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_cmpgi.S b/tests/tcg/lm32/test_cmpgi.S
deleted file mode 100644
index 21695f97ab9..00000000000
--- a/tests/tcg/lm32/test_cmpgi.S
+++ /dev/null
@@ -1,70 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGI_1
-mvi r1, 0
-cmpgi r3, r1, 0
-check_r3 0
-
-test_name CMPGI_2
-mvi r1, 0
-cmpgi r3, r1, 1
-check_r3 0
-
-test_name CMPGI_3
-mvi r1, 1
-cmpgi r3, r1, 0
-check_r3 1
-
-test_name CMPGI_4
-mvi r1, 1
-cmpgi r3, r1, 1
-check_r3 0
-
-test_name CMPGI_5
-mvi r1, 0
-cmpgi r3, r1, -1
-check_r3 1
-
-test_name CMPGI_6
-mvi r1, -1
-cmpgi r3, r1, 0
-check_r3 0
-
-test_name CMPGI_7
-mvi r1, -1
-cmpgi r3, r1, -1
-check_r3 0
-
-test_name CMPGI_8
-mvi r3, 0
-cmpgi r3, r3, 1
-check_r3 0
-
-test_name CMPGI_9
-mvi r3, 1
-cmpgi r3, r3, 0
-check_r3 1
-
-test_name CMPGI_10
-mvi r3, 0
-cmpgi r3, r3, 0
-check_r3 0
-
-test_name CMPGI_11
-mvi r1, 0
-cmpgi r3, r1, -32768
-check_r3 1
-
-test_name CMPGI_12
-mvi r1, -1
-cmpgi r3, r1, -32768
-check_r3 1
-
-test_name CMPGI_13
-mvi r1, -32768
-cmpgi r3, r1, -32768
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpgu.S b/tests/tcg/lm32/test_cmpgu.S
deleted file mode 100644
index dd465471ea1..00000000000
--- a/tests/tcg/lm32/test_cmpgu.S
+++ /dev/null
@@ -1,64 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGU_1
-mvi r1, 0
-mvi r2, 0
-cmpgu r3, r1, r2
-check_r3 0
-
-test_name CMPGU_2
-mvi r1, 0
-mvi r2, 1
-cmpgu r3, r1, r2
-check_r3 0
-
-test_name CMPGU_3
-mvi r1, 1
-mvi r2, 0
-cmpgu r3, r1, r2
-check_r3 1
-
-test_name CMPGU_4
-mvi r1, 1
-mvi r2, 1
-cmpgu r3, r1, r2
-check_r3 0
-
-test_name CMPGU_5
-mvi r1, 0
-mvi r2, -1
-cmpgu r3, r1, r2
-check_r3 0
-
-test_name CMPGU_6
-mvi r1, -1
-mvi r2, 0
-cmpgu r3, r1, r2
-check_r3 1
-
-test_name CMPGU_7
-mvi r1, -1
-mvi r2, -1
-cmpgu r3, r1, r2
-check_r3 0
-
-test_name CMPGU_8
-mvi r3, 0
-mvi r2, 1
-cmpgu r3, r3, r2
-check_r3 0
-
-test_name CMPGU_9
-mvi r3, 1
-mvi r2, 0
-cmpgu r3, r3, r2
-check_r3 1
-
-test_name CMPGU_10
-mvi r3, 0
-cmpgu r3, r3, r3
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpgui.S b/tests/tcg/lm32/test_cmpgui.S
deleted file mode 100644
index dd940014927..00000000000
--- a/tests/tcg/lm32/test_cmpgui.S
+++ /dev/null
@@ -1,70 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPGUI_1
-mvi r1, 0
-cmpgui r3, r1, 0
-check_r3 0
-
-test_name CMPGUI_2
-mvi r1, 0
-cmpgui r3, r1, 1
-check_r3 0
-
-test_name CMPGUI_3
-mvi r1, 1
-cmpgui r3, r1, 0
-check_r3 1
-
-test_name CMPGUI_4
-mvi r1, 1
-cmpgui r3, r1, 1
-check_r3 0
-
-test_name CMPGUI_5
-mvi r1, 0
-cmpgui r3, r1, 0xffff
-check_r3 0
-
-test_name CMPGUI_6
-mvi r1, -1
-cmpgui r3, r1, 0
-check_r3 1
-
-test_name CMPGUI_7
-mvi r1, -1
-cmpgui r3, r1, 0xffff
-check_r3 1
-
-test_name CMPGUI_8
-mvi r3, 0
-cmpgui r3, r3, 1
-check_r3 0
-
-test_name CMPGUI_9
-mvi r3, 1
-cmpgui r3, r3, 0
-check_r3 1
-
-test_name CMPGUI_10
-mvi r3, 0
-cmpgui r3, r3, 0
-check_r3 0
-
-test_name CMPGUI_11
-mvi r1, 0
-cmpgui r3, r1, 0x8000
-check_r3 0
-
-test_name CMPGUI_12
-mvi r1, -1
-cmpgui r3, r1, 0x8000
-check_r3 1
-
-test_name CMPGUI_13
-ori r1, r0, 0x8000
-cmpgui r3, r1, 0x8000
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpne.S b/tests/tcg/lm32/test_cmpne.S
deleted file mode 100644
index 0f1078114cb..00000000000
--- a/tests/tcg/lm32/test_cmpne.S
+++ /dev/null
@@ -1,40 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPNE_1
-mvi r1, 0
-mvi r2, 0
-cmpne r3, r1, r2
-check_r3 0
-
-test_name CMPNE_2
-mvi r1, 0
-mvi r2, 1
-cmpne r3, r1, r2
-check_r3 1
-
-test_name CMPNE_3
-mvi r1, 1
-mvi r2, 0
-cmpne r3, r1, r2
-check_r3 1
-
-test_name CMPNE_4
-mvi r3, 0
-mvi r2, 1
-cmpne r3, r3, r2
-check_r3 1
-
-test_name CMPNE_5
-mvi r3, 0
-mvi r2, 0
-cmpne r3, r3, r2
-check_r3 0
-
-test_name CMPNE_6
-mvi r3, 0
-cmpne r3, r3, r3
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_cmpnei.S b/tests/tcg/lm32/test_cmpnei.S
deleted file mode 100644
index 060dd9d394a..00000000000
--- a/tests/tcg/lm32/test_cmpnei.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name CMPNEI_1
-mvi r1, 0
-cmpnei r3, r1, 0
-check_r3 0
-
-test_name CMPNEI_2
-mvi r1, 0
-cmpnei r3, r1, 1
-check_r3 1
-
-test_name CMPNEI_3
-mvi r1, 1
-cmpnei r3, r1, 0
-check_r3 1
-
-test_name CMPNEI_4
-load r1 0xffffffff
-cmpnei r3, r1, -1
-check_r3 0
-
-test_name CMPNEI_5
-mvi r3, 0
-cmpnei r3, r3, 0
-check_r3 0
-
-test_name CMPNEI_6
-mvi r3, 0
-cmpnei r3, r3, 1
-check_r3 1
-
-end
diff --git a/tests/tcg/lm32/test_divu.S b/tests/tcg/lm32/test_divu.S
deleted file mode 100644
index f381d095c53..00000000000
--- a/tests/tcg/lm32/test_divu.S
+++ /dev/null
@@ -1,29 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name DIVU_1
-mvi r1, 0
-mvi r2, 1
-divu r3, r1, r2
-check_r3 0
-
-test_name DIVU_2
-mvi r1, 1
-mvi r2, 1
-divu r3, r1, r2
-check_r3 1
-
-test_name DIVU_3
-mvi r1, 0
-mvi r2, 0
-divu r3, r1, r2
-check_excp 16
-
-test_name DIVU_4
-load r1 0xabcdef12
-load r2 0x12345
-divu r3, r1, r2
-check_r3 0x9700
-
-end
diff --git a/tests/tcg/lm32/test_eret.S b/tests/tcg/lm32/test_eret.S
deleted file mode 100644
index 6830bd1abf3..00000000000
--- a/tests/tcg/lm32/test_eret.S
+++ /dev/null
@@ -1,38 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ERET_1
-mvi r1, 2
-wcsr IE, r1
-load ea mark
-eret
-tc_fail
-bi 1f
-
-mark:
-tc_pass
-
-1:
-test_name ERET_2
-rcsr r3, IE
-check_r3 3
-
-test_name ERET_3
-mvi r1, 0
-wcsr IE, r1
-load ea mark2
-eret
-tc_fail
-bi 1f
-
-mark2:
-tc_pass
-
-1:
-test_name ERET_4
-rcsr r3, IE
-check_r3 0
-
-end
-
diff --git a/tests/tcg/lm32/test_lb.S b/tests/tcg/lm32/test_lb.S
deleted file mode 100644
index d677eea4c48..00000000000
--- a/tests/tcg/lm32/test_lb.S
+++ /dev/null
@@ -1,49 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name LB_1
-load r1 data
-lb r3, (r1+0)
-check_r3 0x7e
-
-test_name LB_2
-load r1 data
-lb r3, (r1+1)
-check_r3 0x7f
-
-test_name LB_3
-load r1 data
-lb r3, (r1+-1)
-check_r3 0x7d
-
-test_name LB_4
-load r1 data_msb
-lb r3, (r1+0)
-check_r3 0xfffffffe
-
-test_name LB_5
-load r1 data_msb
-lb r3, (r1+1)
-check_r3 0xffffffff
-
-test_name LB_6
-load r1 data_msb
-lb r3, (r1+-1)
-check_r3 0xfffffffd
-
-test_name LB_7
-load r3 data
-lb r3, (r3+0)
-check_r3 0x7e
-
-end
-
-.data
-	.align 4
-	.byte 0x7a, 0x7b, 0x7c, 0x7d
-data:
-	.byte 0x7e, 0x7f, 0x70, 0x71
-	.byte 0xfa, 0xfb, 0xfc, 0xfd
-data_msb:
-	.byte 0xfe, 0xff, 0xf0, 0xf1
diff --git a/tests/tcg/lm32/test_lbu.S b/tests/tcg/lm32/test_lbu.S
deleted file mode 100644
index dc5d5f67d3e..00000000000
--- a/tests/tcg/lm32/test_lbu.S
+++ /dev/null
@@ -1,49 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name LBU_1
-load r1 data
-lbu r3, (r1+0)
-check_r3 0x7e
-
-test_name LBU_2
-load r1 data
-lbu r3, (r1+1)
-check_r3 0x7f
-
-test_name LBU_3
-load r1 data
-lbu r3, (r1+-1)
-check_r3 0x7d
-
-test_name LBU_4
-load r1 data_msb
-lbu r3, (r1+0)
-check_r3 0xfe
-
-test_name LBU_5
-load r1 data_msb
-lbu r3, (r1+1)
-check_r3 0xff
-
-test_name LBU_6
-load r1 data_msb
-lbu r3, (r1+-1)
-check_r3 0xfd
-
-test_name LBU_7
-load r3 data
-lbu r3, (r3+0)
-check_r3 0x7e
-
-end
-
-.data
-	.align 4
-	.byte 0x7a, 0x7b, 0x7c, 0x7d
-data:
-	.byte 0x7e, 0x7f, 0x70, 0x71
-	.byte 0xfa, 0xfb, 0xfc, 0xfd
-data_msb:
-	.byte 0xfe, 0xff, 0xf0, 0xf1
diff --git a/tests/tcg/lm32/test_lh.S b/tests/tcg/lm32/test_lh.S
deleted file mode 100644
index 397996bddde..00000000000
--- a/tests/tcg/lm32/test_lh.S
+++ /dev/null
@@ -1,49 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name LH_1
-load r1 data
-lh r3, (r1+0)
-check_r3 0x7e7f
-
-test_name LH_2
-load r1 data
-lh r3, (r1+2)
-check_r3 0x7071
-
-test_name LH_3
-load r1 data
-lh r3, (r1+-2)
-check_r3 0x7c7d
-
-test_name LH_4
-load r1 data_msb
-lh r3, (r1+0)
-check_r3 0xfffffeff
-
-test_name LH_5
-load r1 data_msb
-lh r3, (r1+2)
-check_r3 0xfffff0f1
-
-test_name LH_6
-load r1 data_msb
-lh r3, (r1+-2)
-check_r3 0xfffffcfd
-
-test_name LH_7
-load r3 data
-lh r3, (r3+0)
-check_r3 0x7e7f
-
-end
-
-.data
-	.align 4
-	.byte 0x7a, 0x7b, 0x7c, 0x7d
-data:
-	.byte 0x7e, 0x7f, 0x70, 0x71
-	.byte 0xfa, 0xfb, 0xfc, 0xfd
-data_msb:
-	.byte 0xfe, 0xff, 0xf0, 0xf1
diff --git a/tests/tcg/lm32/test_lhu.S b/tests/tcg/lm32/test_lhu.S
deleted file mode 100644
index 8de7c525602..00000000000
--- a/tests/tcg/lm32/test_lhu.S
+++ /dev/null
@@ -1,49 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name LHU_1
-load r1 data
-lhu r3, (r1+0)
-check_r3 0x7e7f
-
-test_name LHU_2
-load r1 data
-lhu r3, (r1+2)
-check_r3 0x7071
-
-test_name LHU_3
-load r1 data
-lhu r3, (r1+-2)
-check_r3 0x7c7d
-
-test_name LHU_4
-load r1 data_msb
-lhu r3, (r1+0)
-check_r3 0xfeff
-
-test_name LHU_5
-load r1 data_msb
-lhu r3, (r1+2)
-check_r3 0xf0f1
-
-test_name LHU_6
-load r1 data_msb
-lhu r3, (r1+-2)
-check_r3 0xfcfd
-
-test_name LHU_7
-load r3 data
-lhu r3, (r3+0)
-check_r3 0x7e7f
-
-end
-
-.data
-	.align 4
-	.byte 0x7a, 0x7b, 0x7c, 0x7d
-data:
-	.byte 0x7e, 0x7f, 0x70, 0x71
-	.byte 0xfa, 0xfb, 0xfc, 0xfd
-data_msb:
-	.byte 0xfe, 0xff, 0xf0, 0xf1
diff --git a/tests/tcg/lm32/test_lw.S b/tests/tcg/lm32/test_lw.S
deleted file mode 100644
index 996e5f8c888..00000000000
--- a/tests/tcg/lm32/test_lw.S
+++ /dev/null
@@ -1,32 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name LW_1
-load r1 data
-lw r3, (r1+0)
-check_r3 0x7e7f7071
-
-test_name LW_2
-load r1 data
-lw r3, (r1+4)
-check_r3 0x72737475
-
-test_name LW_3
-load r1 data
-lw r3, (r1+-4)
-check_r3 0x7a7b7c7d
-
-test_name LW_4
-load r3 data
-lw r3, (r3+0)
-check_r3 0x7e7f7071
-
-end
-
-.data
-	.align 4
-	.byte 0x7a, 0x7b, 0x7c, 0x7d
-data:
-	.byte 0x7e, 0x7f, 0x70, 0x71
-	.byte 0x72, 0x73, 0x74, 0x75
diff --git a/tests/tcg/lm32/test_modu.S b/tests/tcg/lm32/test_modu.S
deleted file mode 100644
index 42486900b44..00000000000
--- a/tests/tcg/lm32/test_modu.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name MODU_1
-mvi r1, 0
-mvi r2, 1
-modu r3, r1, r2
-check_r3 0
-
-test_name MODU_2
-mvi r1, 1
-mvi r2, 1
-modu r3, r1, r2
-check_r3 0
-
-test_name MODU_3
-mvi r1, 3
-mvi r2, 2
-modu r3, r1, r2
-check_r3 1
-
-test_name MODU_4
-mvi r1, 0
-mvi r2, 0
-modu r3, r1, r2
-check_excp 16
-
-test_name MODU_5
-load r1 0xabcdef12
-load r2 0x12345
-modu r3, r1, r2
-check_r3 0x3c12
-
-end
diff --git a/tests/tcg/lm32/test_mul.S b/tests/tcg/lm32/test_mul.S
deleted file mode 100644
index e9b937e648d..00000000000
--- a/tests/tcg/lm32/test_mul.S
+++ /dev/null
@@ -1,70 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name MUL_1
-mvi r1, 0
-mvi r2, 0
-mul r3, r1, r2
-check_r3 0
-
-test_name MUL_2
-mvi r1, 1
-mvi r2, 0
-mul r3, r1, r2
-check_r3 0
-
-test_name MUL_3
-mvi r1, 0
-mvi r2, 1
-mul r3, r1, r2
-check_r3 0
-
-test_name MUL_4
-mvi r1, 1
-mvi r2, 1
-mul r3, r1, r2
-check_r3 1
-
-test_name MUL_5
-mvi r1, 2
-mvi r2, -1
-mul r3, r1, r2
-check_r3 -2
-
-test_name MUL_6
-mvi r1, -2
-mvi r2, -1
-mul r3, r1, r2
-check_r3 2
-
-test_name MUL_7
-mvi r1, 0x1234
-mvi r2, 0x789
-mul r3, r1, r2
-check_r3 0x8929d4
-
-test_name MUL_8
-mvi r3, 4
-mul r3, r3, r3
-check_r3 16
-
-test_name MUL_9
-mvi r2, 2
-mvi r3, 4
-mul r3, r3, r2
-check_r3 8
-
-test_name MUL_10
-load r1 0x12345678
-load r2 0x7bcdef12
-mul r3, r1, r2
-check_r3 0xa801c70
-
-test_name MUL_11
-load r1 0x12345678
-load r2 0xabcdef12
-mul r3, r1, r2
-check_r3 0x8a801c70
-
-end
diff --git a/tests/tcg/lm32/test_muli.S b/tests/tcg/lm32/test_muli.S
deleted file mode 100644
index d6dd4a0f7ec..00000000000
--- a/tests/tcg/lm32/test_muli.S
+++ /dev/null
@@ -1,45 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name MULI_1
-mvi r1, 0
-muli r3, r1, 0
-check_r3 0
-
-test_name MULI_2
-mvi r1, 1
-muli r3, r1, 0
-check_r3 0
-
-test_name MULI_3
-mvi r1, 0
-muli r3, r1, 1
-check_r3 0
-
-test_name MULI_4
-mvi r1, 1
-muli r3, r1, 1
-check_r3 1
-
-test_name MULI_5
-mvi r1, 2
-muli r3, r1, -1
-check_r3 -2
-
-test_name MULI_6
-mvi r1, -2
-muli r3, r1, -1
-check_r3 2
-
-test_name MULI_7
-mvi r1, 0x1234
-muli r3, r1, 0x789
-check_r3 0x8929d4
-
-test_name MULI_8
-mvi r3, 4
-muli r3, r3, 4
-check_r3 16
-
-end
diff --git a/tests/tcg/lm32/test_nor.S b/tests/tcg/lm32/test_nor.S
deleted file mode 100644
index 74d7592565d..00000000000
--- a/tests/tcg/lm32/test_nor.S
+++ /dev/null
@@ -1,51 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name NOR_1
-mvi r1, 0
-mvi r2, 0
-nor r3, r1, r2
-check_r3 0xffffffff
-
-test_name NOR_2
-mvi r1, 0
-mvi r2, 1
-nor r3, r1, r2
-check_r3 0xfffffffe
-
-test_name NOR_3
-mvi r1, 1
-mvi r2, 1
-nor r3, r1, r2
-check_r3 0xfffffffe
-
-test_name NOR_4
-mvi r1, 1
-mvi r2, 0
-nor r3, r1, r2
-check_r3 0xfffffffe
-
-test_name NOR_5
-load r1 0xaa55aa55
-load r2 0x55aa55aa
-nor r3, r1, r2
-check_r3 0
-
-test_name NOR_6
-load r1 0xaa550000
-load r2 0x0000aa55
-nor r3, r1, r2
-check_r3 0x55aa55aa
-
-test_name NOR_7
-load r1 0xaa55aa55
-nor r3, r1, r1
-check_r3 0x55aa55aa
-
-test_name NOR_8
-load r3 0xaa55aa55
-nor r3, r3, r3
-check_r3 0x55aa55aa
-
-end
diff --git a/tests/tcg/lm32/test_nori.S b/tests/tcg/lm32/test_nori.S
deleted file mode 100644
index d00309c73ea..00000000000
--- a/tests/tcg/lm32/test_nori.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name NORI_1
-mvi r1, 0
-nori r3, r1, 0
-check_r3 0xffffffff
-
-test_name NORI_2
-mvi r1, 0
-nori r3, r1, 1
-check_r3 0xfffffffe
-
-test_name NORI_3
-mvi r1, 1
-nori r3, r1, 1
-check_r3 0xfffffffe
-
-test_name NORI_4
-mvi r1, 1
-nori r3, r1, 0
-check_r3 0xfffffffe
-
-test_name NORI_5
-load r1 0xaa55aa55
-nori r3, r1, 0x55aa
-check_r3 0x55aa0000
-
-test_name NORI_6
-load r3 0xaa55aa55
-nori r3, r3, 0x55aa
-check_r3 0x55aa0000
-
-end
diff --git a/tests/tcg/lm32/test_or.S b/tests/tcg/lm32/test_or.S
deleted file mode 100644
index 4ed292330ec..00000000000
--- a/tests/tcg/lm32/test_or.S
+++ /dev/null
@@ -1,51 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name OR_1
-mvi r1, 0
-mvi r2, 0
-or r3, r1, r2
-check_r3 0
-
-test_name OR_2
-mvi r1, 0
-mvi r2, 1
-or r3, r1, r2
-check_r3 1
-
-test_name OR_3
-mvi r1, 1
-mvi r2, 1
-or r3, r1, r2
-check_r3 1
-
-test_name OR_4
-mvi r1, 1
-mvi r2, 0
-or r3, r1, r2
-check_r3 1
-
-test_name OR_5
-load r1 0xaa55aa55
-load r2 0x55aa55aa
-or r3, r1, r2
-check_r3 0xffffffff
-
-test_name OR_6
-load r1 0xaa550000
-load r2 0x0000aa55
-or r3, r1, r2
-check_r3 0xaa55aa55
-
-test_name OR_7
-load r1 0xaa55aa55
-or r3, r1, r1
-check_r3 0xaa55aa55
-
-test_name OR_8
-load r3 0xaa55aa55
-or r3, r3, r3
-check_r3 0xaa55aa55
-
-end
diff --git a/tests/tcg/lm32/test_orhi.S b/tests/tcg/lm32/test_orhi.S
deleted file mode 100644
index 78b7600e03f..00000000000
--- a/tests/tcg/lm32/test_orhi.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ORHI_1
-mvi r1, 0
-orhi r3, r1, 0
-check_r3 0
-
-test_name ORHI_2
-mvi r1, 0
-orhi r3, r1, 1
-check_r3 0x00010000
-
-test_name ORHI_3
-load r1 0x00010000
-orhi r3, r1, 1
-check_r3 0x00010000
-
-test_name ORHI_4
-mvi r1, 1
-orhi r3, r1, 0
-check_r3 1
-
-test_name ORHI_5
-load r1 0xaa55aa55
-orhi r3, r1, 0x55aa
-check_r3 0xffffaa55
-
-test_name ORHI_6
-load r3 0xaa55aa55
-orhi r3, r3, 0x55aa
-check_r3 0xffffaa55
-
-end
diff --git a/tests/tcg/lm32/test_ori.S b/tests/tcg/lm32/test_ori.S
deleted file mode 100644
index 3d576cdb8b5..00000000000
--- a/tests/tcg/lm32/test_ori.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name ORI_1
-mvi r1, 0
-ori r3, r1, 0
-check_r3 0
-
-test_name ORI_2
-mvi r1, 0
-ori r3, r1, 1
-check_r3 1
-
-test_name ORI_3
-mvi r1, 1
-ori r3, r1, 1
-check_r3 1
-
-test_name ORI_4
-mvi r1, 1
-ori r3, r1, 0
-check_r3 1
-
-test_name ORI_5
-load r1 0xaa55aa55
-ori r3, r1, 0x55aa
-check_r3 0xaa55ffff
-
-test_name ORI_6
-load r3 0xaa55aa55
-ori r3, r3, 0x55aa
-check_r3 0xaa55ffff
-
-end
diff --git a/tests/tcg/lm32/test_ret.S b/tests/tcg/lm32/test_ret.S
deleted file mode 100644
index 320264f1480..00000000000
--- a/tests/tcg/lm32/test_ret.S
+++ /dev/null
@@ -1,14 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name RET_1
-load ra mark
-ret
-
-tc_fail
-end
-
-mark:
-tc_pass
-end
diff --git a/tests/tcg/lm32/test_sb.S b/tests/tcg/lm32/test_sb.S
deleted file mode 100644
index b15a89d342a..00000000000
--- a/tests/tcg/lm32/test_sb.S
+++ /dev/null
@@ -1,32 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SB_1
-load r1 data
-load r2 0xf0f1f2aa
-sb (r1+0), r2
-check_mem data 0xaa000000
-
-test_name SB_2
-load r1 data
-load r2 0xf0f1f2bb
-sb (r1+1), r2
-check_mem data 0xaabb0000
-
-test_name SB_3
-load r1 data
-load r2 0xf0f1f2cc
-sb (r1+-1), r2
-check_mem data0 0x000000cc
-
-end
-
-.data
-	.align 4
-data0:
-	.byte 0, 0, 0, 0
-data:
-	.byte 0, 0, 0, 0
-data1:
-	.byte 0, 0, 0, 0
diff --git a/tests/tcg/lm32/test_scall.S b/tests/tcg/lm32/test_scall.S
deleted file mode 100644
index 46032f841da..00000000000
--- a/tests/tcg/lm32/test_scall.S
+++ /dev/null
@@ -1,24 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SCALL_1
-mvi r1, 1
-wcsr IE, r1
-# we are running in a semi hosted environment
-# therefore we have to set r8 to some unused system
-# call
-mvi r8, 0
-insn:
-scall
-check_excp 64
-
-test_name SCALL_2
-mv r3, ea
-check_r3 insn
-
-test_name SCALL_3
-rcsr r3, IE
-check_r3 2
-
-end
diff --git a/tests/tcg/lm32/test_sextb.S b/tests/tcg/lm32/test_sextb.S
deleted file mode 100644
index 58db8ee8b96..00000000000
--- a/tests/tcg/lm32/test_sextb.S
+++ /dev/null
@@ -1,20 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SEXTB_1
-mvi r1, 0
-sextb r3, r1
-check_r3 0
-
-test_name SEXTB_2
-mvi r1, 0x7f
-sextb r3, r1
-check_r3 0x0000007f
-
-test_name SEXTB_3
-mvi r1, 0x80
-sextb r3, r1
-check_r3 0xffffff80
-
-end
diff --git a/tests/tcg/lm32/test_sexth.S b/tests/tcg/lm32/test_sexth.S
deleted file mode 100644
index a059ec3ee6d..00000000000
--- a/tests/tcg/lm32/test_sexth.S
+++ /dev/null
@@ -1,20 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SEXTH_1
-mvi r1, 0
-sexth r3, r1
-check_r3 0
-
-test_name SEXTH_2
-load r1 0x7fff
-sexth r3, r1
-check_r3 0x00007fff
-
-test_name SEXTH_3
-load r1 0x8000
-sexth r3, r1
-check_r3 0xffff8000
-
-end
diff --git a/tests/tcg/lm32/test_sh.S b/tests/tcg/lm32/test_sh.S
deleted file mode 100644
index bba10224f64..00000000000
--- a/tests/tcg/lm32/test_sh.S
+++ /dev/null
@@ -1,32 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SH_1
-load r1 data
-load r2 0xf0f1aaaa
-sh (r1+0), r2
-check_mem data 0xaaaa0000
-
-test_name SH_2
-load r1 data
-load r2 0xf0f1bbbb
-sh (r1+2), r2
-check_mem data 0xaaaabbbb
-
-test_name SH_3
-load r1 data
-load r2 0xf0f1cccc
-sh (r1+-2), r2
-check_mem data0 0x0000cccc
-
-end
-
-.data
-	.align 4
-data0:
-	.byte 0, 0, 0, 0
-data:
-	.byte 0, 0, 0, 0
-data1:
-	.byte 0, 0, 0, 0
diff --git a/tests/tcg/lm32/test_sl.S b/tests/tcg/lm32/test_sl.S
deleted file mode 100644
index 0aee17fdb86..00000000000
--- a/tests/tcg/lm32/test_sl.S
+++ /dev/null
@@ -1,45 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SL_1
-mvi r1, 1
-mvi r2, 0
-sl r3, r1, r2
-check_r3 1
-
-test_name SL_2
-mvi r1, 0
-mvi r2, 1
-sl r3, r1, r2
-check_r3 0
-
-test_name SL_3
-mvi r1, 1
-mvi r2, 31
-sl r3, r1, r2
-check_r3 0x80000000
-
-test_name SL_4
-mvi r1, 16
-mvi r2, 31
-sl r3, r1, r2
-check_r3 0
-
-test_name SL_5
-mvi r1, 1
-mvi r2, 34
-sl r3, r1, r2
-check_r3 4
-
-test_name SL_6
-mvi r1, 2
-sl r3, r1, r1
-check_r3 8
-
-test_name SL_7
-mvi r3, 2
-sl r3, r3, r3
-check_r3 8
-
-end
diff --git a/tests/tcg/lm32/test_sli.S b/tests/tcg/lm32/test_sli.S
deleted file mode 100644
index a421de90143..00000000000
--- a/tests/tcg/lm32/test_sli.S
+++ /dev/null
@@ -1,30 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SLI_1
-mvi r1, 1
-sli r3, r1, 0
-check_r3 1
-
-test_name SLI_2
-mvi r1, 0
-sli r3, r1, 1
-check_r3 0
-
-test_name SLI_3
-mvi r1, 1
-sli r3, r1, 31
-check_r3 0x80000000
-
-test_name SLI_4
-mvi r1, 16
-sli r3, r1, 31
-check_r3 0
-
-test_name SLI_7
-mvi r3, 2
-sli r3, r3, 2
-check_r3 8
-
-end
diff --git a/tests/tcg/lm32/test_sr.S b/tests/tcg/lm32/test_sr.S
deleted file mode 100644
index 62431a9864b..00000000000
--- a/tests/tcg/lm32/test_sr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SR_1
-mvi r1, 1
-mvi r2, 0
-sr r3, r1, r2
-check_r3 1
-
-test_name SR_2
-mvi r1, 0
-mvi r2, 1
-sr r3, r1, r2
-check_r3 0
-
-test_name SR_3
-load r1 0x40000000
-mvi r2, 30
-sr r3, r1, r2
-check_r3 1
-
-test_name SR_4
-load r1 0x40000000
-mvi r2, 31
-sr r3, r1, r2
-check_r3 0
-
-test_name SR_5
-mvi r1, 16
-mvi r2, 34
-sr r3, r1, r2
-check_r3 4
-
-test_name SR_6
-mvi r1, 2
-sr r3, r1, r1
-check_r3 0
-
-test_name SR_7
-mvi r3, 2
-sr r3, r3, r3
-check_r3 0
-
-test_name SR_8
-mvi r1, 0xfffffff0
-mvi r2, 2
-sr r3, r1, r2
-check_r3 0xfffffffc
-
-test_name SR_9
-mvi r1, 0xfffffff0
-mvi r2, 4
-sr r3, r1, r2
-check_r3 0xffffffff
-
-end
diff --git a/tests/tcg/lm32/test_sri.S b/tests/tcg/lm32/test_sri.S
deleted file mode 100644
index c1be907b5be..00000000000
--- a/tests/tcg/lm32/test_sri.S
+++ /dev/null
@@ -1,40 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SRI_1
-mvi r1, 1
-sri r3, r1, 0
-check_r3 1
-
-test_name SRI_2
-mvi r1, 0
-sri r3, r1, 1
-check_r3 0
-
-test_name SRI_3
-load r1 0x40000000
-sri r3, r1, 30
-check_r3 1
-
-test_name SRI_4
-load r1 0x40000000
-sri r3, r1, 31
-check_r3 0
-
-test_name SRI_5
-mvi r3, 2
-sri r3, r3, 2
-check_r3 0
-
-test_name SRI_6
-mvi r1, 0xfffffff0
-sri r3, r1, 2
-check_r3 0xfffffffc
-
-test_name SRI_7
-mvi r1, 0xfffffff0
-sri r3, r1, 4
-check_r3 0xffffffff
-
-end
diff --git a/tests/tcg/lm32/test_sru.S b/tests/tcg/lm32/test_sru.S
deleted file mode 100644
index 2ab0b54c77c..00000000000
--- a/tests/tcg/lm32/test_sru.S
+++ /dev/null
@@ -1,57 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SRU_1
-mvi r1, 1
-mvi r2, 0
-sru r3, r1, r2
-check_r3 1
-
-test_name SRU_2
-mvi r1, 0
-mvi r2, 1
-sru r3, r1, r2
-check_r3 0
-
-test_name SRU_3
-load r1 0x40000000
-mvi r2, 30
-sru r3, r1, r2
-check_r3 1
-
-test_name SRU_4
-load r1 0x40000000
-mvi r2, 31
-sru r3, r1, r2
-check_r3 0
-
-test_name SRU_5
-mvi r1, 16
-mvi r2, 34
-sru r3, r1, r2
-check_r3 4
-
-test_name SRU_6
-mvi r1, 2
-sru r3, r1, r1
-check_r3 0
-
-test_name SRU_7
-mvi r3, 2
-sru r3, r3, r3
-check_r3 0
-
-test_name SRU_8
-mvi r1, 0xfffffff0
-mvi r2, 2
-sru r3, r1, r2
-check_r3 0x3ffffffc
-
-test_name SRU_9
-mvi r1, 0xfffffff0
-mvi r2, 4
-sru r3, r1, r2
-check_r3 0x0fffffff
-
-end
diff --git a/tests/tcg/lm32/test_srui.S b/tests/tcg/lm32/test_srui.S
deleted file mode 100644
index 872c374121f..00000000000
--- a/tests/tcg/lm32/test_srui.S
+++ /dev/null
@@ -1,40 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SRUI_1
-mvi r1, 1
-srui r3, r1, 0
-check_r3 1
-
-test_name SRUI_2
-mvi r1, 0
-srui r3, r1, 1
-check_r3 0
-
-test_name SRUI_3
-load r1 0x40000000
-srui r3, r1, 30
-check_r3 1
-
-test_name SRUI_4
-load r1 0x40000000
-srui r3, r1, 31
-check_r3 0
-
-test_name SRUI_5
-mvi r3, 2
-srui r3, r3, 2
-check_r3 0
-
-test_name SRUI_6
-mvi r1, 0xfffffff0
-srui r3, r1, 2
-check_r3 0x3ffffffc
-
-test_name SRUI_7
-mvi r1, 0xfffffff0
-srui r3, r1, 4
-check_r3 0x0fffffff
-
-end
diff --git a/tests/tcg/lm32/test_sub.S b/tests/tcg/lm32/test_sub.S
deleted file mode 100644
index 44b74a9e103..00000000000
--- a/tests/tcg/lm32/test_sub.S
+++ /dev/null
@@ -1,75 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SUB_1
-mvi r1, 0
-mvi r2, 0
-sub r3, r1, r2
-check_r3 0
-
-test_name SUB_2
-mvi r1, 0
-mvi r2, 1
-sub r3, r1, r2
-check_r3 -1
-
-test_name SUB_3
-mvi r1, 1
-mvi r2, 0
-sub r3, r1, r2
-check_r3 1
-
-test_name SUB_4
-mvi r1, 1
-mvi r2, -1
-sub r3, r1, r2
-check_r3 2
-
-test_name SUB_5
-mvi r1, -1
-mvi r2, 1
-sub r3, r1, r2
-check_r3 -2
-
-test_name SUB_6
-mvi r1, -1
-mvi r2, 0
-sub r3, r1, r2
-check_r3 -1
-
-test_name SUB_7
-mvi r1, 0
-mvi r2, -1
-sub r3, r1, r2
-check_r3 1
-
-test_name SUB_8
-mvi r3, 2
-sub r3, r3, r3
-check_r3 0
-
-test_name SUB_9
-mvi r1, 4
-mvi r3, 2
-sub r3, r1, r3
-check_r3 2
-
-test_name SUB_10
-mvi r1, 4
-mvi r3, 2
-sub r3, r3, r1
-check_r3 -2
-
-test_name SUB_11
-mvi r1, 4
-sub r3, r1, r1
-check_r3 0
-
-test_name SUB_12
-load r1 0x12345678
-load r2 0xabcdef97
-sub r3, r1, r2
-check_r3 0x666666e1
-
-end
diff --git a/tests/tcg/lm32/test_sw.S b/tests/tcg/lm32/test_sw.S
deleted file mode 100644
index 2b1c017e7b4..00000000000
--- a/tests/tcg/lm32/test_sw.S
+++ /dev/null
@@ -1,38 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name SW_1
-load r1 data
-load r2 0xaabbccdd
-sw (r1+0), r2
-check_mem data 0xaabbccdd
-
-test_name SW_2
-load r1 data
-load r2 0x00112233
-sw (r1+4), r2
-check_mem data1 0x00112233
-
-test_name SW_3
-load r1 data
-load r2 0x44556677
-sw (r1+-4), r2
-check_mem data0 0x44556677
-
-test_name SW_4
-load r1 data
-sw (r1+0), r1
-lw r3, (r1+0)
-check_r3 data
-
-end
-
-.data
-	.align 4
-data0:
-	.byte 0, 0, 0, 0
-data:
-	.byte 0, 0, 0, 0
-data1:
-	.byte 0, 0, 0, 0
diff --git a/tests/tcg/lm32/test_xnor.S b/tests/tcg/lm32/test_xnor.S
deleted file mode 100644
index 14a62075f67..00000000000
--- a/tests/tcg/lm32/test_xnor.S
+++ /dev/null
@@ -1,51 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name XNOR_1
-mvi r1, 0
-mvi r2, 0
-xnor r3, r1, r2
-check_r3 0xffffffff
-
-test_name XNOR_2
-mvi r1, 0
-mvi r2, 1
-xnor r3, r1, r2
-check_r3 0xfffffffe
-
-test_name XNOR_3
-mvi r1, 1
-mvi r2, 1
-xnor r3, r1, r2
-check_r3 0xffffffff
-
-test_name XNOR_4
-mvi r1, 1
-mvi r2, 0
-xnor r3, r1, r2
-check_r3 0xfffffffe
-
-test_name XNOR_5
-load r1 0xaa55aa55
-load r2 0x55aa55aa
-xnor r3, r1, r2
-check_r3 0
-
-test_name XNOR_6
-load r1 0xaa550000
-load r2 0x0000aa55
-xnor r3, r1, r2
-check_r3 0x55aa55aa
-
-test_name XNOR_7
-load r1 0xaa55aa55
-xnor r3, r1, r1
-check_r3 0xffffffff
-
-test_name XNOR_8
-load r3 0xaa55aa55
-xnor r3, r3, r3
-check_r3 0xffffffff
-
-end
diff --git a/tests/tcg/lm32/test_xnori.S b/tests/tcg/lm32/test_xnori.S
deleted file mode 100644
index 9d9c3c6780e..00000000000
--- a/tests/tcg/lm32/test_xnori.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name XNORI_1
-mvi r1, 0
-xnori r3, r1, 0
-check_r3 0xffffffff
-
-test_name XNORI_2
-mvi r1, 0
-xnori r3, r1, 1
-check_r3 0xfffffffe
-
-test_name XNORI_3
-mvi r1, 1
-xnori r3, r1, 1
-check_r3 0xffffffff
-
-test_name XNORI_4
-mvi r1, 1
-xnori r3, r1, 0
-check_r3 0xfffffffe
-
-test_name XNORI_5
-load r1 0xaa55aa55
-xnori r3, r1, 0x5555
-check_r3 0x55aa00ff
-
-test_name XNORI_6
-load r3 0xaa55aa55
-xnori r3, r3, 0x5555
-check_r3 0x55aa00ff
-
-end
diff --git a/tests/tcg/lm32/test_xor.S b/tests/tcg/lm32/test_xor.S
deleted file mode 100644
index 6c6e712bae4..00000000000
--- a/tests/tcg/lm32/test_xor.S
+++ /dev/null
@@ -1,51 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name XOR_1
-mvi r1, 0
-mvi r2, 0
-xor r3, r1, r2
-check_r3 0
-
-test_name XOR_2
-mvi r1, 0
-mvi r2, 1
-xor r3, r1, r2
-check_r3 1
-
-test_name XOR_3
-mvi r1, 1
-mvi r2, 1
-xor r3, r1, r2
-check_r3 0
-
-test_name XOR_4
-mvi r1, 1
-mvi r2, 0
-xor r3, r1, r2
-check_r3 1
-
-test_name XOR_5
-load r1 0xaa55aa55
-load r2 0x55aa55aa
-xor r3, r1, r2
-check_r3 0xffffffff
-
-test_name XOR_6
-load r1 0xaa550000
-load r2 0x0000aa55
-xor r3, r1, r2
-check_r3 0xaa55aa55
-
-test_name XOR_7
-load r1 0xaa55aa55
-xor r3, r1, r1
-check_r3 0
-
-test_name XOR_8
-load r3 0xaa55aa55
-xor r3, r3, r3
-check_r3 0
-
-end
diff --git a/tests/tcg/lm32/test_xori.S b/tests/tcg/lm32/test_xori.S
deleted file mode 100644
index 2051699f120..00000000000
--- a/tests/tcg/lm32/test_xori.S
+++ /dev/null
@@ -1,35 +0,0 @@
-.include "macros.inc"
-
-start
-
-test_name XORI_1
-mvi r1, 0
-xori r3, r1, 0
-check_r3 0
-
-test_name XORI_2
-mvi r1, 0
-xori r3, r1, 1
-check_r3 1
-
-test_name XORI_3
-mvi r1, 1
-xori r3, r1, 1
-check_r3 0
-
-test_name XORI_4
-mvi r1, 1
-xori r3, r1, 0
-check_r3 1
-
-test_name XORI_5
-load r1 0xaa55aa55
-xori r3, r1, 0x5555
-check_r3 0xaa55ff00
-
-test_name XORI_6
-load r3 0xaa55aa55
-xori r3, r3, 0x5555
-check_r3 0xaa55ff00
-
-end
diff --git a/tests/tcg/minilib/minilib.h b/tests/tcg/minilib/minilib.h
index e23361380ae..17d0f2f3140 100644
--- a/tests/tcg/minilib/minilib.h
+++ b/tests/tcg/minilib/minilib.h
@@ -9,8 +9,8 @@
  * SPDX-License-Identifier: GPL-2.0-only
  */
 
-#ifndef _MINILIB_H_
-#define _MINILIB_H_
+#ifndef MINILIB_H
+#define MINILIB_H
 
 /*
  * Provided by the individual arch
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index a3a751723da..a83efb4a9da 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -8,9 +8,13 @@
 MULTIARCH_SRC=$(SRC_PATH)/tests/tcg/multiarch
 
 # Set search path for all sources
-VPATH 		+= $(MULTIARCH_SRC)
-MULTIARCH_SRCS   =$(notdir $(wildcard $(MULTIARCH_SRC)/*.c))
-MULTIARCH_TESTS  =$(filter-out float_helpers, $(MULTIARCH_SRCS:.c=))
+VPATH 	       += $(MULTIARCH_SRC)
+MULTIARCH_SRCS =  $(notdir $(wildcard $(MULTIARCH_SRC)/*.c))
+ifneq ($(CONFIG_LINUX),)
+VPATH 	       += $(MULTIARCH_SRC)/linux
+MULTIARCH_SRCS += $(notdir $(wildcard $(MULTIARCH_SRC)/linux/*.c))
+endif
+MULTIARCH_TESTS = $(MULTIARCH_SRCS:.c=)
 
 #
 # The following are any additional rules needed to build things
@@ -18,8 +22,8 @@ MULTIARCH_TESTS  =$(filter-out float_helpers, $(MULTIARCH_SRCS:.c=))
 
 
 float_%: LDFLAGS+=-lm
-float_%: float_%.c float_helpers.c
-	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< $(MULTIARCH_SRC)/float_helpers.c -o $@ $(LDFLAGS)
+float_%: float_%.c libs/float_helpers.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< $(MULTIARCH_SRC)/libs/float_helpers.c -o $@ $(LDFLAGS)
 
 run-float_%: float_%
 	$(call run-test,$<, $(QEMU) $(QEMU_OPTS) $<,"$< on $(TARGET_NAME)")
@@ -30,6 +34,8 @@ testthread: LDFLAGS+=-lpthread
 
 threadcount: LDFLAGS+=-lpthread
 
+signals: LDFLAGS+=-lrt -lpthread
+
 # We define the runner for test-mmap after the individual
 # architectures have defined their supported pages sizes. If no
 # additional page sizes are defined we only run the default test.
@@ -54,8 +60,6 @@ run-gdbstub-sha1: sha1
 		--bin $< --test $(MULTIARCH_SRC)/gdbstub/sha1.py, \
 	"basic gdbstub support")
 
-EXTRA_RUNS += run-gdbstub-sha1
-
 run-gdbstub-qxfer-auxv-read: sha1
 	$(call run-test, $@, $(GDB_SCRIPT) \
 		--gdb $(HAVE_GDB_BIN) \
@@ -63,11 +67,19 @@ run-gdbstub-qxfer-auxv-read: sha1
 		--bin $< --test $(MULTIARCH_SRC)/gdbstub/test-qxfer-auxv-read.py, \
 	"basic gdbstub qXfer:auxv:read support")
 
+run-gdbstub-thread-breakpoint: testthread
+	$(call run-test, $@, $(GDB_SCRIPT) \
+		--gdb $(HAVE_GDB_BIN) \
+		--qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
+		--bin $< --test $(MULTIARCH_SRC)/gdbstub/test-thread-breakpoint.py, \
+	"hitting a breakpoint on non-main thread")
+
 else
 run-gdbstub-%:
 	$(call skip-test, "gdbstub test $*", "need working gdb")
 endif
-EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read
+EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read \
+	      run-gdbstub-thread-breakpoint
 
 # ARM Compatible Semi Hosting Tests
 #
diff --git a/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py
new file mode 100644
index 00000000000..798d508bc70
--- /dev/null
+++ b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py
@@ -0,0 +1,60 @@
+from __future__ import print_function
+#
+# Test auxiliary vector is loaded via gdbstub
+#
+# This is launched via tests/guest-debug/run-test.py
+#
+
+import gdb
+import sys
+
+failcount = 0
+
+def report(cond, msg):
+    "Report success/fail of test"
+    if cond:
+        print ("PASS: %s" % (msg))
+    else:
+        print ("FAIL: %s" % (msg))
+        global failcount
+        failcount += 1
+
+def run_test():
+    "Run through the tests one by one"
+
+    sym, ok = gdb.lookup_symbol("thread1_func")
+    gdb.execute("b thread1_func")
+    gdb.execute("c")
+
+    frame = gdb.selected_frame()
+    report(str(frame.function()) == "thread1_func", "break @ %s"%frame)
+
+#
+# This runs as the script it sourced (via -x, via run-test.py)
+#
+try:
+    inferior = gdb.selected_inferior()
+    arch = inferior.architecture()
+    print("ATTACHED: %s" % arch.name())
+except (gdb.error, AttributeError):
+    print("SKIPPING (not connected)", file=sys.stderr)
+    exit(0)
+
+if gdb.parse_and_eval('$pc') == 0:
+    print("SKIP: PC not set")
+    exit(0)
+
+try:
+    # These are not very useful in scripts
+    gdb.execute("set pagination off")
+    gdb.execute("set confirm off")
+
+    # Run the actual tests
+    run_test()
+except (gdb.error):
+    print ("GDB Exception: %s" % (sys.exc_info()[0]))
+    failcount += 1
+    pass
+
+print("All tests complete: %d failures" % failcount)
+exit(failcount)
diff --git a/tests/tcg/multiarch/float_helpers.c b/tests/tcg/multiarch/libs/float_helpers.c
similarity index 99%
rename from tests/tcg/multiarch/float_helpers.c
rename to tests/tcg/multiarch/libs/float_helpers.c
index bc530e5732d..4e68d2b6598 100644
--- a/tests/tcg/multiarch/float_helpers.c
+++ b/tests/tcg/multiarch/libs/float_helpers.c
@@ -22,7 +22,7 @@
 #include <float.h>
 #include <fenv.h>
 
-#include "float_helpers.h"
+#include "../float_helpers.h"
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
diff --git a/tests/tcg/multiarch/linux-test.c b/tests/tcg/multiarch/linux/linux-test.c
similarity index 97%
rename from tests/tcg/multiarch/linux-test.c
rename to tests/tcg/multiarch/linux/linux-test.c
index 96bbad58231..019d8175ca6 100644
--- a/tests/tcg/multiarch/linux-test.c
+++ b/tests/tcg/multiarch/linux/linux-test.c
@@ -251,7 +251,7 @@ static void test_time(void)
 static int server_socket(void)
 {
     int val, fd;
-    struct sockaddr_in sockaddr;
+    struct sockaddr_in sockaddr = {};
 
     /* server socket */
     fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
@@ -271,7 +271,7 @@ static int server_socket(void)
 static int client_socket(uint16_t port)
 {
     int fd;
-    struct sockaddr_in sockaddr;
+    struct sockaddr_in sockaddr = {};
 
     /* server socket */
     fd = chk_error(socket(PF_INET, SOCK_STREAM, 0));
@@ -496,6 +496,15 @@ static void test_signal(void)
     sigemptyset(&act.sa_mask);
     act.sa_flags = 0;
     chk_error(sigaction(SIGSEGV, &act, NULL));
+
+    if (sigaction(SIGKILL, &act, NULL) == 0) {
+        error("sigaction(SIGKILL, &act, NULL) must not succeed");
+    }
+    if (sigaction(SIGSTOP, &act, NULL) == 0) {
+        error("sigaction(SIGSTOP, &act, NULL) must not succeed");
+    }
+    chk_error(sigaction(SIGKILL, NULL, &act));
+    chk_error(sigaction(SIGSTOP, NULL, &act));
 }
 
 #define SHM_SIZE 32768
diff --git a/tests/tcg/multiarch/sha1.c b/tests/tcg/multiarch/sha1.c
index 87bfbcdf52d..0081bd76577 100644
--- a/tests/tcg/multiarch/sha1.c
+++ b/tests/tcg/multiarch/sha1.c
@@ -43,7 +43,6 @@ void SHA1Init(SHA1_CTX* context);
 void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
 void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
 /* ================ end of sha1.h ================ */
-#include <endian.h>
 
 #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
 
diff --git a/tests/tcg/multiarch/signals.c b/tests/tcg/multiarch/signals.c
new file mode 100644
index 00000000000..998c8fdefd6
--- /dev/null
+++ b/tests/tcg/multiarch/signals.c
@@ -0,0 +1,149 @@
+/*
+ * linux-user signal handling tests.
+ *
+ * Copyright (c) 2021 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+
+static void error1(const char *filename, int line, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "%s:%d: ", filename, line);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static int __chk_error(const char *filename, int line, int ret)
+{
+    if (ret < 0) {
+        error1(filename, line, "%m (ret=%d, errno=%d/%s)",
+               ret, errno, strerror(errno));
+    }
+    return ret;
+}
+
+#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__)
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+/*
+ * Thread handling
+ */
+typedef struct ThreadJob ThreadJob;
+
+struct ThreadJob {
+    int number;
+    int sleep;
+    int count;
+};
+
+static pthread_t *threads;
+static int max_threads = 10;
+__thread int signal_count;
+int total_signal_count;
+
+static void *background_thread_func(void *arg)
+{
+    ThreadJob *job = (ThreadJob *) arg;
+
+    printf("thread%d: started\n", job->number);
+    while (total_signal_count < job->count) {
+        usleep(job->sleep);
+    }
+    printf("thread%d: saw %d alarms from %d\n", job->number,
+           signal_count, total_signal_count);
+    return NULL;
+}
+
+static void spawn_threads(void)
+{
+    int i;
+    threads = calloc(sizeof(pthread_t), max_threads);
+
+    for (i = 0; i < max_threads; i++) {
+        ThreadJob *job = calloc(sizeof(ThreadJob), 1);
+        job->number = i;
+        job->sleep = i * 1000;
+        job->count = i * 100;
+        pthread_create(threads + i, NULL, background_thread_func, job);
+    }
+}
+
+static void close_threads(void)
+{
+    int i;
+    for (i = 0; i < max_threads; i++) {
+        pthread_join(threads[i], NULL);
+    }
+    free(threads);
+    threads = NULL;
+}
+
+static void sig_alarm(int sig, siginfo_t *info, void *puc)
+{
+    if (sig != SIGRTMIN) {
+        error("unexpected signal");
+    }
+    signal_count++;
+    __atomic_fetch_add(&total_signal_count, 1, __ATOMIC_SEQ_CST);
+}
+
+static void test_signals(void)
+{
+    struct sigaction act;
+    struct itimerspec it;
+    timer_t tid;
+    struct sigevent sev;
+
+    /* Set up SIG handler */
+    act.sa_sigaction = sig_alarm;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    chk_error(sigaction(SIGRTMIN, &act, NULL));
+
+    /* Create POSIX timer */
+    sev.sigev_notify = SIGEV_SIGNAL;
+    sev.sigev_signo = SIGRTMIN;
+    sev.sigev_value.sival_ptr = &tid;
+    chk_error(timer_create(CLOCK_REALTIME, &sev, &tid));
+
+    it.it_interval.tv_sec = 0;
+    it.it_interval.tv_nsec = 1000000;
+    it.it_value.tv_sec = 0;
+    it.it_value.tv_nsec = 1000000;
+    chk_error(timer_settime(tid, 0, &it, NULL));
+
+    spawn_threads();
+
+    do {
+        usleep(1000);
+    } while (total_signal_count < 2000);
+
+    printf("shutting down after: %d signals\n", total_signal_count);
+
+    close_threads();
+
+    chk_error(timer_delete(tid));
+}
+
+int main(int argc, char **argv)
+{
+    test_signals();
+    return 0;
+}
diff --git a/tests/tcg/multiarch/system/memory.c b/tests/tcg/multiarch/system/memory.c
index eb0ec6f8ebe..41c7f66e2ed 100644
--- a/tests/tcg/multiarch/system/memory.c
+++ b/tests/tcg/multiarch/system/memory.c
@@ -326,6 +326,7 @@ static bool do_unsigned_test(init_ufn fn)
         fn(i);
         ok = do_unsigned_reads(i);
     }
+    return ok;
 #else
     fn(0);
     return do_unsigned_reads(0);
diff --git a/tests/tcg/multiarch/test-mmap.c b/tests/tcg/multiarch/test-mmap.c
index 11d0e777b10..96257f8ebec 100644
--- a/tests/tcg/multiarch/test-mmap.c
+++ b/tests/tcg/multiarch/test-mmap.c
@@ -49,64 +49,62 @@ size_t test_fsize;
 
 void check_aligned_anonymous_unfixed_mmaps(void)
 {
-	void *p1;
-	void *p2;
-	void *p3;
-	void *p4;
-	void *p5;
-	uintptr_t p;
-	int i;
-
-	fprintf(stdout, "%s", __func__);
-	for (i = 0; i < 0x1fff; i++)
-	{
-		size_t len;
-
-		len = pagesize + (pagesize * i & 7);
-		p1 = mmap(NULL, len, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		p2 = mmap(NULL, len, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		p3 = mmap(NULL, len, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		p4 = mmap(NULL, len, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		p5 = mmap(NULL, len, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
-		/* Make sure we get pages aligned with the pagesize. The
-		   target expects this.  */
-		fail_unless (p1 != MAP_FAILED);
-		fail_unless (p2 != MAP_FAILED);
-		fail_unless (p3 != MAP_FAILED);
-		fail_unless (p4 != MAP_FAILED);
-		fail_unless (p5 != MAP_FAILED);
-		p = (uintptr_t) p1;
-		D(printf ("p=%x\n", p));
-		fail_unless ((p & pagemask) == 0);
-		p = (uintptr_t) p2;
-		fail_unless ((p & pagemask) == 0);
-		p = (uintptr_t) p3;
-		fail_unless ((p & pagemask) == 0);
-		p = (uintptr_t) p4;
-		fail_unless ((p & pagemask) == 0);
-		p = (uintptr_t) p5;
-		fail_unless ((p & pagemask) == 0);
-
-		/* Make sure we can read from the entire area.  */
-		memcpy (dummybuf, p1, pagesize);
-		memcpy (dummybuf, p2, pagesize);
-		memcpy (dummybuf, p3, pagesize);
-		memcpy (dummybuf, p4, pagesize);
-		memcpy (dummybuf, p5, pagesize);
-
-		munmap (p1, len);
-		munmap (p2, len);
-		munmap (p3, len);
-		munmap (p4, len);
-		munmap (p5, len);
-	}
-	fprintf(stdout, " passed\n");
+    void *p1;
+    void *p2;
+    void *p3;
+    void *p4;
+    void *p5;
+    uintptr_t p;
+    int i;
+    fprintf(stdout, "%s", __func__);
+    for (i = 0; i < 8; i++) {
+        size_t len;
+        len = pagesize + (pagesize * i);
+        p1 = mmap(NULL, len, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        p2 = mmap(NULL, len, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        p3 = mmap(NULL, len, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        p4 = mmap(NULL, len, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        p5 = mmap(NULL, len, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+        /*
+         * Make sure we get pages aligned with the pagesize. The
+         * target expects this.
+         */
+        fail_unless(p1 != MAP_FAILED);
+        fail_unless(p2 != MAP_FAILED);
+        fail_unless(p3 != MAP_FAILED);
+        fail_unless(p4 != MAP_FAILED);
+        fail_unless(p5 != MAP_FAILED);
+        p = (uintptr_t) p1;
+        D(printf("p=%x\n", p));
+        fail_unless((p & pagemask) == 0);
+        p = (uintptr_t) p2;
+        fail_unless((p & pagemask) == 0);
+        p = (uintptr_t) p3;
+        fail_unless((p & pagemask) == 0);
+        p = (uintptr_t) p4;
+        fail_unless((p & pagemask) == 0);
+        p = (uintptr_t) p5;
+        fail_unless((p & pagemask) == 0);
+
+        /* Make sure we can read from the entire area.  */
+        memcpy(dummybuf, p1, pagesize);
+        memcpy(dummybuf, p2, pagesize);
+        memcpy(dummybuf, p3, pagesize);
+        memcpy(dummybuf, p4, pagesize);
+        memcpy(dummybuf, p5, pagesize);
+        munmap(p1, len);
+        munmap(p2, len);
+        munmap(p3, len);
+        munmap(p4, len);
+        munmap(p5, len);
+    }
+    fprintf(stdout, " passed\n");
 }
 
 void check_large_anonymous_unfixed_mmap(void)
@@ -135,52 +133,54 @@ void check_large_anonymous_unfixed_mmap(void)
 
 void check_aligned_anonymous_unfixed_colliding_mmaps(void)
 {
-	char *p1;
-	char *p2;
-	char *p3;
-	uintptr_t p;
-	int i;
-
-	fprintf(stdout, "%s", __func__);
-	for (i = 0; i < 0x2fff; i++)
-	{
-		int nlen;
-		p1 = mmap(NULL, pagesize, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		fail_unless (p1 != MAP_FAILED);
-		p = (uintptr_t) p1;
-		fail_unless ((p & pagemask) == 0);
-		memcpy (dummybuf, p1, pagesize);
-
-		p2 = mmap(NULL, pagesize, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		fail_unless (p2 != MAP_FAILED);
-		p = (uintptr_t) p2;
-		fail_unless ((p & pagemask) == 0);
-		memcpy (dummybuf, p2, pagesize);
-
-
-		munmap (p1, pagesize);
-		nlen = pagesize * 8;
-		p3 = mmap(NULL, nlen, PROT_READ, 
-			  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		fail_unless (p3 != MAP_FAILED);
-
-		/* Check if the mmaped areas collide.  */
-		if (p3 < p2 
-		    && (p3 + nlen) > p2)
-			fail_unless (0);
-
-		memcpy (dummybuf, p3, pagesize);
-
-		/* Make sure we get pages aligned with the pagesize. The
-		   target expects this.  */
-		p = (uintptr_t) p3;
-		fail_unless ((p & pagemask) == 0);
-		munmap (p2, pagesize);
-		munmap (p3, nlen);
-	}
-	fprintf(stdout, " passed\n");
+    char *p1;
+    char *p2;
+    char *p3;
+    uintptr_t p;
+    int i;
+
+    fprintf(stdout, "%s", __func__);
+    for (i = 0; i < 2; i++) {
+        int nlen;
+        p1 = mmap(NULL, pagesize, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        fail_unless(p1 != MAP_FAILED);
+        p = (uintptr_t) p1;
+        fail_unless((p & pagemask) == 0);
+        memcpy(dummybuf, p1, pagesize);
+
+        p2 = mmap(NULL, pagesize, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        fail_unless(p2 != MAP_FAILED);
+        p = (uintptr_t) p2;
+        fail_unless((p & pagemask) == 0);
+        memcpy(dummybuf, p2, pagesize);
+
+
+        munmap(p1, pagesize);
+        nlen = pagesize * 8;
+        p3 = mmap(NULL, nlen, PROT_READ,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        fail_unless(p3 != MAP_FAILED);
+
+        /* Check if the mmaped areas collide.  */
+        if (p3 < p2
+            && (p3 + nlen) > p2) {
+            fail_unless(0);
+        }
+
+        memcpy(dummybuf, p3, pagesize);
+
+        /*
+         * Make sure we get pages aligned with the pagesize. The
+         * target expects this.
+         */
+        p = (uintptr_t) p3;
+        fail_unless((p & pagemask) == 0);
+        munmap(p2, pagesize);
+        munmap(p3, nlen);
+    }
+    fprintf(stdout, " passed\n");
 }
 
 void check_aligned_anonymous_fixed_mmaps(void)
diff --git a/tests/tcg/nios2/Makefile.target b/tests/tcg/nios2/Makefile.target
new file mode 100644
index 00000000000..b38e2352b7e
--- /dev/null
+++ b/tests/tcg/nios2/Makefile.target
@@ -0,0 +1,11 @@
+# nios2 specific test tweaks
+
+# Currently nios2 signal handling is broken
+run-signals: signals
+	$(call skip-test, $<, "BROKEN")
+run-plugin-signals-with-%:
+	$(call skip-test, $<, "BROKEN")
+run-linux-test: linux-test
+	$(call skip-test, $<, "BROKEN")
+run-plugin-linux-test-with-%:
+	$(call skip-test, $<, "BROKEN")
diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target
index 0c6a4585fc3..6ab7934fdff 100644
--- a/tests/tcg/ppc64/Makefile.target
+++ b/tests/tcg/ppc64/Makefile.target
@@ -10,4 +10,19 @@ PPC64_TESTS=bcdsub
 endif
 bcdsub: CFLAGS += -mpower8-vector
 
+PPC64_TESTS += byte_reverse
+ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
+run-byte_reverse: QEMU_OPTS+=-cpu POWER10
+run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
+else
+byte_reverse:
+	$(call skip-test, "BUILD of $@", "missing compiler support")
+run-byte_reverse:
+	$(call skip-test, "RUN of byte_reverse", "not built")
+run-plugin-byte_reverse-with-%:
+	$(call skip-test, "RUN of byte_reverse ($*)", "not built")
+endif
+
+PPC64_TESTS += signal_save_restore_xer
+
 TESTS += $(PPC64_TESTS)
diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target
index 1acfcff94ac..ba2fde5ff1c 100644
--- a/tests/tcg/ppc64le/Makefile.target
+++ b/tests/tcg/ppc64le/Makefile.target
@@ -9,4 +9,13 @@ PPC64LE_TESTS=bcdsub
 endif
 bcdsub: CFLAGS += -mpower8-vector
 
+ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
+PPC64LE_TESTS += byte_reverse
+endif
+byte_reverse: CFLAGS += -mcpu=power10
+run-byte_reverse: QEMU_OPTS+=-cpu POWER10
+run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
+
+PPC64LE_TESTS += signal_save_restore_xer
+
 TESTS += $(PPC64LE_TESTS)
diff --git a/tests/tcg/ppc64le/byte_reverse.c b/tests/tcg/ppc64le/byte_reverse.c
new file mode 100644
index 00000000000..53b76fc2e28
--- /dev/null
+++ b/tests/tcg/ppc64le/byte_reverse.c
@@ -0,0 +1,21 @@
+#include <assert.h>
+
+int main(void)
+{
+    unsigned long var;
+
+    var = 0xFEDCBA9876543210;
+    asm("brh %0, %0" : "+r"(var));
+    assert(var == 0xDCFE98BA54761032);
+
+    var = 0xFEDCBA9876543210;
+    asm("brw %0, %0" : "+r"(var));
+    assert(var == 0x98BADCFE10325476);
+
+    var = 0xFEDCBA9876543210;
+    asm("brd %0, %0" : "+r"(var));
+    assert(var == 0x1032547698BADCFE);
+
+    return 0;
+}
+
diff --git a/tests/tcg/ppc64le/signal_save_restore_xer.c b/tests/tcg/ppc64le/signal_save_restore_xer.c
new file mode 100644
index 00000000000..e4f8a07dd7e
--- /dev/null
+++ b/tests/tcg/ppc64le/signal_save_restore_xer.c
@@ -0,0 +1,42 @@
+#include <assert.h>
+#include <stdint.h>
+#include <signal.h>
+#include <sys/user.h>
+
+#define XER_SO   (1 << 31)
+#define XER_OV   (1 << 30)
+#define XER_CA   (1 << 29)
+#define XER_OV32 (1 << 19)
+#define XER_CA32 (1 << 18)
+
+uint64_t saved;
+
+void sigill_handler(int sig, siginfo_t *si, void *ucontext)
+{
+    ucontext_t *uc = ucontext;
+    uc->uc_mcontext.regs->nip += 4;
+    saved = uc->uc_mcontext.regs->xer;
+    uc->uc_mcontext.regs->xer |= XER_OV | XER_OV32;
+}
+
+int main(void)
+{
+    uint64_t initial = XER_CA | XER_CA32, restored;
+    struct sigaction sa = {
+        .sa_sigaction = sigill_handler,
+        .sa_flags = SA_SIGINFO
+    };
+
+    sigaction(SIGILL, &sa, NULL);
+
+    asm("mtspr 1, %1\n\t"
+        ".long 0x0\n\t"
+        "mfspr %0, 1\n\t"
+        : "=r" (restored)
+        : "r" (initial));
+
+    assert(saved == initial);
+    assert(restored == (XER_OV | XER_OV32 | XER_CA | XER_CA32));
+
+    return 0;
+}
diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target
new file mode 100644
index 00000000000..d41bf6d60d1
--- /dev/null
+++ b/tests/tcg/riscv64/Makefile.target
@@ -0,0 +1,5 @@
+# -*- Mode: makefile -*-
+# RISC-V specific tweaks
+
+VPATH += $(SRC_PATH)/tests/tcg/riscv64
+TESTS += test-div
diff --git a/tests/tcg/riscv64/test-div.c b/tests/tcg/riscv64/test-div.c
new file mode 100644
index 00000000000..a90480be3f0
--- /dev/null
+++ b/tests/tcg/riscv64/test-div.c
@@ -0,0 +1,58 @@
+#include <assert.h>
+#include <limits.h>
+
+struct TestS {
+    long x, y, q, r;
+};
+
+static struct TestS test_s[] = {
+    { 4, 2, 2, 0 },                 /* normal cases */
+    { 9, 7, 1, 2 },
+    { 0, 0, -1, 0 },                /* div by zero cases */
+    { 9, 0, -1, 9 },
+    { LONG_MIN, -1, LONG_MIN, 0 },  /* overflow case */
+};
+
+struct TestU {
+    unsigned long x, y, q, r;
+};
+
+static struct TestU test_u[] = {
+    { 4, 2, 2, 0 },                 /* normal cases */
+    { 9, 7, 1, 2 },
+    { 0, 0, ULONG_MAX, 0 },         /* div by zero cases */
+    { 9, 0, ULONG_MAX, 9 },
+};
+
+#define ARRAY_SIZE(X)  (sizeof(X) / sizeof(*(X)))
+
+int main (void)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(test_s); i++) {
+        long q, r;
+
+        asm("div %0, %2, %3\n\t"
+            "rem %1, %2, %3"
+            : "=&r" (q), "=r" (r)
+            : "r" (test_s[i].x), "r" (test_s[i].y));
+
+        assert(q == test_s[i].q);
+        assert(r == test_s[i].r);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(test_u); i++) {
+        unsigned long q, r;
+
+        asm("divu %0, %2, %3\n\t"
+            "remu %1, %2, %3"
+            : "=&r" (q), "=r" (r)
+            : "r" (test_u[i].x), "r" (test_u[i].y));
+
+        assert(q == test_u[i].q);
+        assert(r == test_u[i].r);
+    }
+
+    return 0;
+}
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 241ef28f618..cc64dd32d29 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -1,4 +1,5 @@
-VPATH+=$(SRC_PATH)/tests/tcg/s390x
+S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
+VPATH+=$(S390X_SRC)
 CFLAGS+=-march=zEC12 -m64
 TESTS+=hello-s390x
 TESTS+=csst
@@ -8,3 +9,18 @@ TESTS+=exrl-trtr
 TESTS+=pack
 TESTS+=mvo
 TESTS+=mvc
+TESTS+=trap
+TESTS+=signals-s390x
+
+ifneq ($(HAVE_GDB_BIN),)
+GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
+
+run-gdbstub-signals-s390x: signals-s390x
+	$(call run-test, $@, $(GDB_SCRIPT) \
+		--gdb $(HAVE_GDB_BIN) \
+		--qemu $(QEMU) --qargs "$(QEMU_OPTS)" \
+		--bin $< --test $(S390X_SRC)/gdbstub/test-signals-s390x.py, \
+	"mixing signals and debugging on s390x")
+
+EXTRA_RUNS += run-gdbstub-signals-s390x
+endif
diff --git a/tests/tcg/s390x/gdbstub/test-signals-s390x.py b/tests/tcg/s390x/gdbstub/test-signals-s390x.py
new file mode 100644
index 00000000000..80a284b4751
--- /dev/null
+++ b/tests/tcg/s390x/gdbstub/test-signals-s390x.py
@@ -0,0 +1,76 @@
+from __future__ import print_function
+
+#
+# Test that signals and debugging mix well together on s390x.
+#
+# This is launched via tests/guest-debug/run-test.py
+#
+
+import gdb
+import sys
+
+failcount = 0
+
+
+def report(cond, msg):
+    """Report success/fail of test"""
+    if cond:
+        print("PASS: %s" % (msg))
+    else:
+        print("FAIL: %s" % (msg))
+        global failcount
+        failcount += 1
+
+
+def run_test():
+    """Run through the tests one by one"""
+    illegal_op = gdb.Breakpoint("illegal_op")
+    stg = gdb.Breakpoint("stg")
+    mvc_8 = gdb.Breakpoint("mvc_8")
+
+    # Expect the following events:
+    # 1x illegal_op breakpoint
+    # 2x stg breakpoint, segv, breakpoint
+    # 2x mvc_8 breakpoint, segv, breakpoint
+    for _ in range(14):
+        gdb.execute("c")
+    report(illegal_op.hit_count == 1, "illegal_op.hit_count == 1")
+    report(stg.hit_count == 4, "stg.hit_count == 4")
+    report(mvc_8.hit_count == 4, "mvc_8.hit_count == 4")
+
+    # The test must succeed.
+    gdb.Breakpoint("_exit")
+    gdb.execute("c")
+    status = int(gdb.parse_and_eval("$r2"))
+    report(status == 0, "status == 0");
+
+
+#
+# This runs as the script it sourced (via -x, via run-test.py)
+#
+try:
+    inferior = gdb.selected_inferior()
+    arch = inferior.architecture()
+    print("ATTACHED: %s" % arch.name())
+except (gdb.error, AttributeError):
+    print("SKIPPING (not connected)", file=sys.stderr)
+    exit(0)
+
+if gdb.parse_and_eval("$pc") == 0:
+    print("SKIP: PC not set")
+    exit(0)
+
+try:
+    # These are not very useful in scripts
+    gdb.execute("set pagination off")
+    gdb.execute("set confirm off")
+
+    # Run the actual tests
+    run_test()
+except (gdb.error):
+    print("GDB Exception: %s" % (sys.exc_info()[0]))
+    failcount += 1
+    pass
+
+print("All tests complete: %d failures" % failcount)
+exit(failcount)
diff --git a/tests/tcg/s390x/signals-s390x.c b/tests/tcg/s390x/signals-s390x.c
new file mode 100644
index 00000000000..dc2f8ee59ac
--- /dev/null
+++ b/tests/tcg/s390x/signals-s390x.c
@@ -0,0 +1,165 @@
+#include <assert.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+/*
+ * Various instructions that generate SIGILL and SIGSEGV. They could have been
+ * defined in a separate .s file, but this would complicate the build, so the
+ * inline asm is used instead.
+ */
+
+void illegal_op(void);
+void after_illegal_op(void);
+asm(".globl\tillegal_op\n"
+    "illegal_op:\t.byte\t0x00,0x00\n"
+    "\t.globl\tafter_illegal_op\n"
+    "after_illegal_op:\tbr\t%r14");
+
+void stg(void *dst, unsigned long src);
+asm(".globl\tstg\n"
+    "stg:\tstg\t%r3,0(%r2)\n"
+    "\tbr\t%r14");
+
+void mvc_8(void *dst, void *src);
+asm(".globl\tmvc_8\n"
+    "mvc_8:\tmvc\t0(8,%r2),0(%r3)\n"
+    "\tbr\t%r14");
+
+static void safe_puts(const char *s)
+{
+    write(0, s, strlen(s));
+    write(0, "\n", 1);
+}
+
+enum exception {
+    exception_operation,
+    exception_translation,
+    exception_protection,
+};
+
+static struct {
+    int sig;
+    void *addr;
+    unsigned long psw_addr;
+    enum exception exception;
+} expected;
+
+static void handle_signal(int sig, siginfo_t *info, void *ucontext)
+{
+    void *page;
+    int err;
+
+    if (sig != expected.sig) {
+        safe_puts("[  FAILED  ] wrong signal");
+        _exit(1);
+    }
+
+    if (info->si_addr != expected.addr) {
+        safe_puts("[  FAILED  ] wrong si_addr");
+        _exit(1);
+    }
+
+    if (((ucontext_t *)ucontext)->uc_mcontext.psw.addr != expected.psw_addr) {
+        safe_puts("[  FAILED  ] wrong psw.addr");
+        _exit(1);
+    }
+
+    switch (expected.exception) {
+    case exception_translation:
+        page = mmap(expected.addr, 4096, PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+        if (page != expected.addr) {
+            safe_puts("[  FAILED  ] mmap() failed");
+            _exit(1);
+        }
+        break;
+    case exception_protection:
+        err = mprotect(expected.addr, 4096, PROT_READ | PROT_WRITE);
+        if (err != 0) {
+            safe_puts("[  FAILED  ] mprotect() failed");
+            _exit(1);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void check_sigsegv(void *func, enum exception exception,
+                          unsigned long val)
+{
+    int prot;
+    unsigned long *page;
+    unsigned long *addr;
+    int err;
+
+    prot = exception == exception_translation ? PROT_NONE : PROT_READ;
+    page = mmap(NULL, 4096, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    assert(page != MAP_FAILED);
+    if (exception == exception_translation) {
+        /* Hopefully nothing will be mapped at this address. */
+        err = munmap(page, 4096);
+        assert(err == 0);
+    }
+    addr = page + (val & 0x1ff);
+
+    expected.sig = SIGSEGV;
+    expected.addr = page;
+    expected.psw_addr = (unsigned long)func;
+    expected.exception = exception;
+    if (func == stg) {
+        stg(addr, val);
+    } else {
+        assert(func == mvc_8);
+        mvc_8(addr, &val);
+    }
+    assert(*addr == val);
+
+    err = munmap(page, 4096);
+    assert(err == 0);
+}
+
+int main(void)
+{
+    struct sigaction act;
+    int err;
+
+    memset(&act, 0, sizeof(act));
+    act.sa_sigaction = handle_signal;
+    act.sa_flags = SA_SIGINFO;
+    err = sigaction(SIGILL, &act, NULL);
+    assert(err == 0);
+    err = sigaction(SIGSEGV, &act, NULL);
+    assert(err == 0);
+
+    safe_puts("[ RUN      ] Operation exception");
+    expected.sig = SIGILL;
+    expected.addr = illegal_op;
+    expected.psw_addr = (unsigned long)after_illegal_op;
+    expected.exception = exception_operation;
+    illegal_op();
+    safe_puts("[       OK ]");
+
+    safe_puts("[ RUN      ] Translation exception from stg");
+    check_sigsegv(stg, exception_translation, 42);
+    safe_puts("[       OK ]");
+
+    safe_puts("[ RUN      ] Translation exception from mvc");
+    check_sigsegv(mvc_8, exception_translation, 4242);
+    safe_puts("[       OK ]");
+
+    safe_puts("[ RUN      ] Protection exception from stg");
+    check_sigsegv(stg, exception_protection, 424242);
+    safe_puts("[       OK ]");
+
+    safe_puts("[ RUN      ] Protection exception from mvc");
+    check_sigsegv(mvc_8, exception_protection, 42424242);
+    safe_puts("[       OK ]");
+
+    safe_puts("[  PASSED  ]");
+
+    _exit(0);
+}
diff --git a/tests/tcg/s390x/trap.c b/tests/tcg/s390x/trap.c
new file mode 100644
index 00000000000..d4c61c7f520
--- /dev/null
+++ b/tests/tcg/s390x/trap.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2021 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <signal.h>
+
+static void error1(const char *filename, int line, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "%s:%d: ", filename, line);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static int __chk_error(const char *filename, int line, int ret)
+{
+    if (ret < 0) {
+        error1(filename, line, "%m (ret=%d, errno=%d/%s)",
+               ret, errno, strerror(errno));
+    }
+    return ret;
+}
+
+#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__)
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+int sigfpe_count;
+int sigill_count;
+
+static void sig_handler(int sig, siginfo_t *si, void *puc)
+{
+    if (sig == SIGFPE) {
+        if (si->si_code != 0) {
+            error("unexpected si_code: 0x%x != 0", si->si_code);
+        }
+        ++sigfpe_count;
+        return;
+    }
+
+    if (sig == SIGILL) {
+        ++sigill_count;
+        return;
+    }
+
+    error("unexpected signal 0x%x\n", sig);
+}
+
+int main(int argc, char **argv)
+{
+    sigfpe_count = sigill_count = 0;
+
+    struct sigaction act;
+
+    /* Set up SIG handler */
+    act.sa_sigaction = sig_handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    chk_error(sigaction(SIGFPE, &act, NULL));
+    chk_error(sigaction(SIGILL, &act, NULL));
+
+    uint64_t z = 0x0ull;
+    uint64_t lz = 0xffffffffffffffffull;
+    asm volatile (
+        "lg %%r13,%[lz]\n"
+        "cgitne %%r13,0\n" /* SIGFPE */
+        "lg %%r13,%[z]\n"
+        "cgitne %%r13,0\n" /* no trap */
+        "nopr\n"
+        "lg %%r13,%[lz]\n"
+        "citne %%r13,0\n" /* SIGFPE */
+        "lg %%r13,%[z]\n"
+        "citne %%r13,0\n" /* no trap */
+        "nopr\n"
+        :
+        : [z] "m" (z), [lz] "m" (lz)
+        : "memory", "r13");
+
+    if (sigfpe_count != 2) {
+        error("unexpected SIGFPE count: %d != 2", sigfpe_count);
+    }
+    if (sigill_count != 0) {
+        error("unexpected SIGILL count: %d != 0", sigill_count);
+    }
+
+    printf("PASS\n");
+    return 0;
+}
diff --git a/tests/tcg/sh4/Makefile.target b/tests/tcg/sh4/Makefile.target
index 9d18d44612e..0e96aeff16c 100644
--- a/tests/tcg/sh4/Makefile.target
+++ b/tests/tcg/sh4/Makefile.target
@@ -5,3 +5,16 @@
 
 # On sh Linux supports 4k, 8k, 16k and 64k pages (but only 4k currently works)
 EXTRA_RUNS+=run-test-mmap-4096 # run-test-mmap-8192 run-test-mmap-16384 run-test-mmap-65536
+
+# This triggers failures for sh4-linux about 10% of the time.
+# Random SIGSEGV at unpredictable guest address, cause unknown.
+run-signals: signals
+	$(call skip-test, $<, "BROKEN")
+run-plugin-signals-with-%:
+	$(call skip-test, $<, "BROKEN")
+
+# This test is currently broken: https://gitlab.com/qemu-project/qemu/-/issues/704
+run-linux-test: linux-test
+	$(call skip-test, $<, "BROKEN")
+run-plugin-linux-test-with-%:
+	$(call skip-test, $<, "BROKEN")
diff --git a/tests/tcg/sparc64/Makefile.target b/tests/tcg/sparc64/Makefile.target
index 5bd7f90583d..408dace7839 100644
--- a/tests/tcg/sparc64/Makefile.target
+++ b/tests/tcg/sparc64/Makefile.target
@@ -1,11 +1,6 @@
 # -*- Mode: makefile -*-
 #
-# sparc specific tweaks and masking out broken tests
-
-# different from the other hangs:
-# tests/tcg/multiarch/linux-test.c:264: Value too large for defined data type (ret=-1, errno=92/Value too large for defined data type)
-run-linux-test: linux-test
-	$(call skip-test, $<, "BROKEN")
+# sparc specific tweaks
 
 # On Sparc64 Linux support 8k pages
 EXTRA_RUNS+=run-test-mmap-8192
diff --git a/tests/tcg/tricore/Makefile.softmmu-target b/tests/tcg/tricore/Makefile.softmmu-target
new file mode 100644
index 00000000000..5007c60ce8c
--- /dev/null
+++ b/tests/tcg/tricore/Makefile.softmmu-target
@@ -0,0 +1,26 @@
+TESTS_PATH = $(SRC_PATH)/tests/tcg/tricore
+
+LDFLAGS = -T$(TESTS_PATH)/link.ld
+ASFLAGS =
+
+TESTS += test_abs.tst
+TESTS += test_bmerge.tst
+TESTS += test_clz.tst
+TESTS += test_dvstep.tst
+TESTS += test_fadd.tst
+TESTS += test_fmul.tst
+TESTS += test_ftoi.tst
+TESTS += test_madd.tst
+TESTS += test_msub.tst
+TESTS += test_muls.tst
+
+QEMU_OPTS += -M tricore_testboard -nographic -kernel
+
+%.pS: $(TESTS_PATH)/%.S
+	$(HOST_CC) -E -o $@ $<
+
+%.o: %.pS
+	$(AS) $(ASFLAGS) -o $@ $<
+
+%.tst: %.o
+	$(LD) $(LDFLAGS) $< -o $@
diff --git a/tests/tcg/tricore/link.ld b/tests/tcg/tricore/link.ld
new file mode 100644
index 00000000000..364bcdc00a6
--- /dev/null
+++ b/tests/tcg/tricore/link.ld
@@ -0,0 +1,60 @@
+/* Default linker script, for normal executables */
+OUTPUT_FORMAT("elf32-tricore")
+OUTPUT_ARCH(tricore)
+ENTRY(_start)
+
+/* the internal ram description */
+MEMORY
+{
+  text_ram (rx!p): org = 0x80000000, len = 15K
+  data_ram (w!xp): org = 0xd0000000, len = 130K
+}
+/*
+ * Define the sizes of the user and system stacks.
+ */
+__USTACK_SIZE = DEFINED (__USTACK_SIZE) ? __USTACK_SIZE : 1K ;
+/*
+ * Define the start address and the size of the context save area.
+ */
+__CSA_BEGIN =  0xd0000000 ;
+__CSA_SIZE =  8k ;
+__CSA_END = __CSA_BEGIN + __CSA_SIZE ;
+
+SECTIONS
+{
+  .text  :
+  {
+    *(.text)
+    . = ALIGN(8);
+  } > text_ram
+
+  .rodata :
+  {
+    *(.rodata)
+    *(.rodata1)
+  } > data_ram
+
+  .data :
+  {
+    . = ALIGN(8) ;
+    *(.data)
+    *(.data.*)
+    . = ALIGN(8) ;
+    __USTACK = . + __USTACK_SIZE -768;
+
+  } > data_ram
+  /*
+   * Allocate space for BSS sections.
+   */
+  .bss  :
+  {
+    BSS_BASE = . ;
+    *(.bss)
+    *(COMMON)
+    . = ALIGN(8) ;
+  } > data_ram
+  /* Make sure CSA, stack and heap addresses are properly aligned.  */
+  _. = ASSERT ((__CSA_BEGIN & 0x3f) == 0 , "illegal CSA start address") ;
+  _. = ASSERT ((__CSA_SIZE & 0x3f) == 0 , "illegal CSA size") ;
+
+}
diff --git a/tests/tcg/tricore/macros.h b/tests/tcg/tricore/macros.h
new file mode 100644
index 00000000000..0d76fc403a7
--- /dev/null
+++ b/tests/tcg/tricore/macros.h
@@ -0,0 +1,129 @@
+/* Helpers */
+#define LI(reg, val)           \
+    mov.u reg, lo:val;         \
+    movh DREG_TEMP_LI, up:val; \
+    or reg, reg, DREG_TEMP_LI; \
+
+/* Address definitions */
+#define TESTDEV_ADDR 0xf0000000
+/* Register definitions */
+#define DREG_RS1 %d0
+#define DREG_RS2 %d1
+#define DREG_RS3 %d4
+#define DREG_CALC_RESULT %d1
+#define DREG_CALC_PSW %d2
+#define DREG_CORRECT_PSW %d3
+#define DREG_TEMP_LI %d10
+#define DREG_TEMP %d11
+#define DREG_TEST_NUM %d14
+#define DREG_CORRECT_RESULT %d15
+
+#define DREG_DEV_ADDR %a15
+
+#define EREG_RS1 %e6
+#define EREG_RS1_LO %d6
+#define EREG_RS1_HI %d7
+#define EREG_RS2 %e8
+#define EREG_RS2_LO %d8
+#define EREG_RS2_HI %d9
+#define EREG_CALC_RESULT %e8
+#define EREG_CALC_RESULT_HI %d9
+#define EREG_CALC_RESULT_LO %d8
+#define EREG_CORRECT_RESULT_LO %d0
+#define EREG_CORRECT_RESULT_HI %d1
+
+/* Test case wrappers */
+#define TEST_CASE(num, testreg, correct, code...) \
+test_ ## num:                                     \
+    code;                                         \
+    LI(DREG_CORRECT_RESULT, correct)              \
+    mov DREG_TEST_NUM, num;                       \
+    jne testreg, DREG_CORRECT_RESULT, fail        \
+
+#define TEST_CASE_E(num, correct_lo, correct_hi, code...)  \
+test_ ## num:                                              \
+    code;                                                  \
+    mov DREG_TEST_NUM, num;                                \
+    LI(EREG_CORRECT_RESULT_LO, correct_lo)                 \
+    jne EREG_CALC_RESULT_LO, EREG_CORRECT_RESULT_LO, fail; \
+    LI(EREG_CORRECT_RESULT_HI, correct_hi)                 \
+    jne EREG_CALC_RESULT_HI, EREG_CORRECT_RESULT_HI, fail;
+
+#define TEST_CASE_PSW(num, testreg, correct, correct_psw, code...) \
+test_ ## num:                                                      \
+    code;                                                          \
+    LI(DREG_CORRECT_RESULT, correct)                               \
+    mov DREG_TEST_NUM, num;                                        \
+    jne testreg, DREG_CORRECT_RESULT, fail;                        \
+    mfcr DREG_CALC_PSW, $psw;                                      \
+    LI(DREG_CORRECT_PSW, correct_psw)                              \
+    mov DREG_TEST_NUM, num;                                        \
+    jne DREG_CALC_PSW, DREG_CORRECT_PSW, fail;
+
+/* Actual test case type
+ * e.g inst %dX, %dY      -> TEST_D_D
+ *     inst %dX, %dY, %dZ -> TEST_D_DD
+ *     inst %eX, %dY, %dZ -> TEST_E_DD
+ */
+#define TEST_D_D(insn, num, result, rs1)      \
+    TEST_CASE(num, DREG_CALC_RESULT, result,  \
+    LI(DREG_RS1, rs1);                        \
+    insn DREG_CALC_RESULT, DREG_RS1;          \
+    )
+
+#define TEST_D_D_PSW(insn, num, result, psw, rs1)     \
+    TEST_CASE_PSW(num, DREG_CALC_RESULT, result, psw, \
+    LI(DREG_RS1, rs1);                                \
+    rstv;                                             \
+    insn DREG_CORRECT_RESULT, DREG_RS1;               \
+    )
+
+#define TEST_D_DD_PSW(insn, num, result, psw, rs1, rs2) \
+    TEST_CASE_PSW(num, DREG_CALC_RESULT, result, psw,   \
+    LI(DREG_RS1, rs1);                                  \
+    LI(DREG_RS2, rs2);                                  \
+    rstv;                                               \
+    insn DREG_CALC_RESULT, DREG_RS1, DREG_RS2;          \
+    )
+
+#define TEST_D_DDD_PSW(insn, num, result, psw, rs1, rs2, rs3) \
+    TEST_CASE_PSW(num, DREG_CALC_RESULT, result, psw,         \
+    LI(DREG_RS1, rs1);                                        \
+    LI(DREG_RS2, rs2);                                        \
+    LI(DREG_RS3, rs3);                                        \
+    rstv;                                                     \
+    insn DREG_CALC_RESULT, DREG_RS1, DREG_RS2, DREG_RS3;      \
+    )
+
+#define TEST_D_DDI_PSW(insn, num, result, psw, rs1, rs2, imm) \
+    TEST_CASE_PSW(num, DREG_CALC_RESULT, result, psw,         \
+    LI(DREG_RS1, rs1);                                        \
+    LI(DREG_RS2, rs2);                                        \
+    rstv;                                                     \
+    insn DREG_CALC_RESULT, DREG_RS1, DREG_RS2, imm;           \
+    )
+
+#define TEST_E_ED(insn, num, res_hi, res_lo, rs1_hi, rs1_lo, rs2) \
+    TEST_CASE_E(num, res_lo, res_hi,                              \
+    LI(EREG_RS1_LO, rs1_lo);                                      \
+    LI(EREG_RS1_HI, rs1_hi);                                      \
+    LI(DREG_RS2, rs2);                                            \
+    insn EREG_CALC_RESULT, EREG_RS1, DREG_RS2;                    \
+    )
+
+/* Pass/Fail handling part */
+#define TEST_PASSFAIL                       \
+        j pass;                             \
+fail:                                       \
+        LI(DREG_TEMP, TESTDEV_ADDR)         \
+        mov.a DREG_DEV_ADDR, DREG_TEMP;     \
+        st.w [DREG_DEV_ADDR], DREG_TEST_NUM;\
+        debug;                              \
+        j fail;                             \
+pass:                                       \
+        LI(DREG_TEMP, TESTDEV_ADDR)         \
+        mov.a DREG_DEV_ADDR, DREG_TEMP;     \
+        mov DREG_TEST_NUM, 0;               \
+        st.w [DREG_DEV_ADDR], DREG_TEST_NUM;\
+        debug;                              \
+        j pass;
diff --git a/tests/tcg/tricore/test_abs.S b/tests/tcg/tricore/test_abs.S
new file mode 100644
index 00000000000..e42240159a8
--- /dev/null
+++ b/tests/tcg/tricore/test_abs.S
@@ -0,0 +1,7 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_D(abs, 1, 0, 0)
+
+    TEST_PASSFAIL
diff --git a/tests/tcg/tricore/test_bmerge.S b/tests/tcg/tricore/test_bmerge.S
new file mode 100644
index 00000000000..8a0fa6d3f6c
--- /dev/null
+++ b/tests/tcg/tricore/test_bmerge.S
@@ -0,0 +1,8 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DD_PSW(bmerge, 1, 0x555557f7, 0x00000b80, 0x0000001d, 0x0000ffff)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_clz.S b/tests/tcg/tricore/test_clz.S
new file mode 100644
index 00000000000..e03835f1231
--- /dev/null
+++ b/tests/tcg/tricore/test_clz.S
@@ -0,0 +1,9 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_D(cls.h, 1, 0x0, 0x6db17976)
+    TEST_D_D(cls.h, 2, 0x000f000f, 0x0)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_dvstep.S b/tests/tcg/tricore/test_dvstep.S
new file mode 100644
index 00000000000..858dbc62dd2
--- /dev/null
+++ b/tests/tcg/tricore/test_dvstep.S
@@ -0,0 +1,15 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    #                              Result                   RS1            RS2
+    TEST_E_ED(dvstep,   1, 0x000001ff, 0xfffe5cff, 0x00000001, 0xfffffe5c, 0x0)
+    TEST_E_ED(dvstep,   2, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x0)
+    TEST_E_ED(dvstep,   3, 0x0000f000, 0x000000fd, 0x010000f0, 0x00000000, 0x0)
+    TEST_E_ED(dvstep,   4, 0xfffff000, 0x00000000, 0x7ffffff0, 0x00000000, 0x0)
+    TEST_E_ED(dvstep.u, 5, 0xffffff00, 0x100008ff, 0xffffffff, 0x00100008, 0x0)
+    TEST_E_ED(dvstep.u, 6, 0x00000100, 0x00000000, 0x08000001, 0x00000000, \
+                           0xffffff2d)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_fadd.S b/tests/tcg/tricore/test_fadd.S
new file mode 100644
index 00000000000..1a650548034
--- /dev/null
+++ b/tests/tcg/tricore/test_fadd.S
@@ -0,0 +1,16 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DD_PSW(add.f, 1, 0x7fc00000, 0x00000b80, 0xffffff85, 0x00001234)
+    TEST_D_DD_PSW(add.f, 2, 0xf9c00000, 0x00000b80, 0xf9400000, 0xf9400000)
+    TEST_D_DD_PSW(add.f, 3, 0x8bb858ca, 0x00000b80, 0x8b3858ca, 0x8b3858ca)
+    TEST_D_DD_PSW(add.f, 4, 0x00000000, 0x00000b80, 0x000000ff, 0x00000000)
+    TEST_D_DD_PSW(add.f, 5, 0x7fc00000, 0x00000b80, 0xfffffe52, 0x0a4cf70c)
+    TEST_D_DD_PSW(add.f, 6, 0x9e6d5076, 0x84000b80, 0x9ded50ec, 0x9ded4fff)
+    TEST_D_DD_PSW(add.f, 7, 0x00000000, 0x04000b80, 0x0000e8bd, 0x00000000)
+    TEST_D_DD_PSW(add.f, 8, 0x7fc00000, 0xc4000b80, 0xffad546e, 0xffad546e)
+    TEST_D_DD_PSW(add.f, 9, 0x7fc00000, 0x04000b80, 0xfffe0000, 0x08130000)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_fmul.S b/tests/tcg/tricore/test_fmul.S
new file mode 100644
index 00000000000..fb1f634b2de
--- /dev/null
+++ b/tests/tcg/tricore/test_fmul.S
@@ -0,0 +1,8 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DD_PSW(mul.f, 1, 0x974f4f0a, 0x84000b80, 0x1a0b1980, 0xbcbec42d)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_ftoi.S b/tests/tcg/tricore/test_ftoi.S
new file mode 100644
index 00000000000..fb4af6b5aac
--- /dev/null
+++ b/tests/tcg/tricore/test_ftoi.S
@@ -0,0 +1,10 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_D_PSW(ftoi, 1, 0x0, 0x84000b80, 0x05f6e605)
+    TEST_D_D_PSW(ftoi, 2, 0x0, 0x04000b80, 0x00012200)
+    TEST_D_D_PSW(ftoi, 3, 0x0, 0xc4000b80, 0xffffffff)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_madd.S b/tests/tcg/tricore/test_madd.S
new file mode 100644
index 00000000000..5d839772bb1
--- /dev/null
+++ b/tests/tcg/tricore/test_madd.S
@@ -0,0 +1,11 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DDI_PSW(madd,    1, 0x0000fffd, 0x60000b80, 0x0000ffff, 0x7fffffff,2)
+    TEST_D_DDI_PSW(madd,    2, 0xffff7fff, 0x60000b80, 0xffff8001, 0x7fffffff,2)
+    TEST_D_DDD_PSW(madds.u, 3, 0xffffffff, 0x60000b80, 0x00000000, 0x80000000, \
+                             0x80000000)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_msub.S b/tests/tcg/tricore/test_msub.S
new file mode 100644
index 00000000000..6dee87d99c0
--- /dev/null
+++ b/tests/tcg/tricore/test_msub.S
@@ -0,0 +1,9 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DDI_PSW(msub, 1, 0xd2fbe5e0, 0x00000b80,0x64003300, 0xff5420d4, -216)
+    TEST_D_DDI_PSW(msub, 2, 0xfffffc10, 0x00000b80,0xfffffe68, 0xfffffffd, -200)
+    TEST_D_DDD_PSW(msubs.u, 3, 0x0, 0x60000b80, 0x1, 0xffffffff, 0xffffffdb)
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/tricore/test_muls.S b/tests/tcg/tricore/test_muls.S
new file mode 100644
index 00000000000..ca517556bcf
--- /dev/null
+++ b/tests/tcg/tricore/test_muls.S
@@ -0,0 +1,9 @@
+#include "macros.h"
+.text
+.global _start
+_start:
+    TEST_D_DD_PSW(muls.u, 1, 0xffffffff, 0x78000b80, 0x80000001, 0xffffffff)
+    TEST_D_DD_PSW(muls.u, 2, 0xffffffff, 0x60000b80, 0xfffffffe, 0xffffffff)
+
+    TEST_PASSFAIL
+
diff --git a/tests/tcg/x86_64/Makefile.softmmu-target b/tests/tcg/x86_64/Makefile.softmmu-target
index 9896319f0e0..2afa3298bfb 100644
--- a/tests/tcg/x86_64/Makefile.softmmu-target
+++ b/tests/tcg/x86_64/Makefile.softmmu-target
@@ -38,7 +38,7 @@ run-plugin-%-with-libinsn.so:
 	$(call run-test, $@, \
 	  $(QEMU) -monitor none -display none \
 		  -chardev file$(COMMA)path=$@.out$(COMMA)id=output \
-                  -plugin ../../plugin/libinsn.so$(COMMA)arg=inline \
+                  -plugin ../../plugin/libinsn.so$(COMMA)inline=on \
 	    	  -d plugin -D $*-with-libinsn.so.pout \
 	   	  $(QEMU_OPTS) $*, \
 		  "$* on $(TARGET_NAME)")
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index 20bf96202ad..d7a73855835 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -3,14 +3,22 @@
 # x86_64 tests - included from tests/tcg/Makefile.target
 #
 # Currently we only build test-x86_64 and test-i386-ssse3 from
-# $(SRC)/tests/tcg/i386/
+# $(SRC_PATH)/tests/tcg/i386/
 #
 
 include $(SRC_PATH)/tests/tcg/i386/Makefile.target
 
+ifneq ($(CONFIG_LINUX),)
+X86_64_TESTS += vsyscall
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
+else
+TESTS=$(MULTIARCH_TESTS)
+endif
 QEMU_OPTS += -cpu max
 
 test-x86_64: LDFLAGS+=-lm -lc
 test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
+	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
diff --git a/tests/tcg/x86_64/system/kernel.ld b/tests/tcg/x86_64/system/kernel.ld
index 49c12b04ae1..ca5d6bd8509 100644
--- a/tests/tcg/x86_64/system/kernel.ld
+++ b/tests/tcg/x86_64/system/kernel.ld
@@ -16,7 +16,10 @@ SECTIONS {
 		*(.rodata)
 	} :text
 
-        /* Keep build ID and PVH notes in same section */
+        /DISCARD/ : {
+                *(.note.gnu*)
+        }
+
         .notes :  {
                *(.note.*)
         } :note
diff --git a/tests/tcg/x86_64/vsyscall.c b/tests/tcg/x86_64/vsyscall.c
new file mode 100644
index 00000000000..786b047053a
--- /dev/null
+++ b/tests/tcg/x86_64/vsyscall.c
@@ -0,0 +1,12 @@
+#include <stdio.h>
+#include <time.h>
+
+#define VSYSCALL_PAGE 0xffffffffff600000
+#define TIME_OFFSET 0x400
+typedef time_t (*time_func)(time_t *);
+
+int main(void)
+{
+    printf("%ld\n", ((time_func)(VSYSCALL_PAGE + TIME_OFFSET))(NULL));
+    return 0;
+}
diff --git a/tests/tcg/xtensa/test_load_store.S b/tests/tcg/xtensa/test_load_store.S
new file mode 100644
index 00000000000..b339f40f128
--- /dev/null
+++ b/tests/tcg/xtensa/test_load_store.S
@@ -0,0 +1,221 @@
+#include "macros.inc"
+
+test_suite load_store
+
+.macro load_ok_test op, type, data, value
+    .data
+    .align  4
+1:
+    \type \data
+    .previous
+
+    reset_ps
+    set_vector kernel, 0
+    movi    a3, 1b
+    addi    a4, a4, 1
+    mov     a5, a4
+    \op     a5, a3, 0
+    movi    a6, \value
+    assert  eq, a5, a6
+.endm
+
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION
+.macro load_unaligned_test will_trap, op, type, data, value
+    .data
+    .align  4
+    .byte   0
+1:
+    \type \data
+    .previous
+
+    reset_ps
+    .ifeq \will_trap
+    set_vector kernel, 0
+    .else
+    set_vector kernel, 2f
+    .endif
+    movi    a3, 1b
+    addi    a4, a4, 1
+    mov     a5, a4
+1:
+    \op     a5, a3, 0
+    .ifeq \will_trap
+    movi    a6, \value
+    assert  eq, a5, a6
+    .else
+    test_fail
+2:
+    rsr     a6, exccause
+    movi    a7, 9
+    assert  eq, a6, a7
+    rsr     a6, epc1
+    movi    a7, 1b
+    assert  eq, a6, a7
+    rsr     a6, excvaddr
+    assert  eq, a6, a3
+    assert  eq, a5, a4
+    .endif
+    reset_ps
+.endm
+#else
+.macro load_unaligned_test will_trap, op, type, data, value
+    .data
+    .align  4
+1:
+    \type \data
+    .previous
+
+    reset_ps
+    set_vector kernel, 0
+    movi    a3, 1b + 1
+    addi    a4, a4, 1
+    mov     a5, a4
+    \op     a5, a3, 0
+    movi    a6, \value
+    assert  eq, a5, a6
+.endm
+#endif
+
+.macro store_ok_test op, type, value
+    .data
+    .align  4
+    .byte   0, 0, 0, 0x55
+1:
+    \type 0
+2:
+    .byte   0xaa
+    .previous
+
+    reset_ps
+    set_vector kernel, 0
+    movi    a3, 1b
+    movi    a5, \value
+    \op     a5, a3, 0
+    movi    a3, 2b
+    l8ui    a5, a3, 0
+    movi    a6, 0xaa
+    assert  eq, a5, a6
+    movi    a3, 1b - 1
+    l8ui    a5, a3, 0
+    movi    a6, 0x55
+    assert  eq, a5, a6
+.endm
+
+#if XCHAL_UNALIGNED_STORE_EXCEPTION
+.macro store_unaligned_test will_trap, op, nop, type, value
+    .data
+    .align  4
+    .byte   0x55
+1:
+    \type   0
+2:
+    .byte   0xaa
+    .previous
+
+    reset_ps
+    .ifeq \will_trap
+    set_vector kernel, 0
+    .else
+    set_vector kernel, 4f
+    .endif
+    movi    a3, 1b
+    movi    a5, \value
+3:
+    \op     a5, a3, 0
+    .ifne \will_trap
+    test_fail
+4:
+    rsr     a6, exccause
+    movi    a7, 9
+    assert  eq, a6, a7
+    rsr     a6, epc1
+    movi    a7, 3b
+    assert  eq, a6, a7
+    rsr     a6, excvaddr
+    assert  eq, a6, a3
+    l8ui    a5, a3, 0
+    assert  eqi, a5, 0
+    .endif
+    reset_ps
+    movi    a3, 2b
+    l8ui    a5, a3, 0
+    movi    a6, 0xaa
+    assert  eq, a5, a6
+    movi    a3, 1b - 1
+    l8ui    a5, a3, 0
+    movi    a6, 0x55
+    assert  eq, a5, a6
+.endm
+#else
+.macro store_unaligned_test will_trap, sop, lop, type, value
+    .data
+    .align  4
+    .byte   0x55
+1:
+    \type   0
+    .previous
+
+    reset_ps
+    set_vector kernel, 0
+    movi    a3, 1b
+    movi    a5, \value
+    \sop    a5, a3, 0
+    movi    a3, 1b - 1
+    \lop    a6, a3, 0
+    assert  eq, a5, a6
+.endm
+#endif
+
+test load_ok
+    load_ok_test l16si, .short, 0x00001234, 0x00001234
+    load_ok_test l16si, .short, 0x000089ab, 0xffff89ab
+    load_ok_test l16ui, .short, 0x00001234, 0x00001234
+    load_ok_test l16ui, .short, 0x000089ab, 0x000089ab
+    load_ok_test l32i,  .word,  0x12345678, 0x12345678
+#if XCHAL_HAVE_RELEASE_SYNC
+    load_ok_test l32ai, .word,  0x12345678, 0x12345678
+#endif
+test_end
+
+#undef WILL_TRAP
+#if XCHAL_UNALIGNED_LOAD_HW
+#define WILL_TRAP 0
+#else
+#define WILL_TRAP 1
+#endif
+
+test load_unaligned
+    load_unaligned_test WILL_TRAP, l16si, .short, 0x00001234, 0x00001234
+    load_unaligned_test WILL_TRAP, l16si, .short, 0x000089ab, 0xffff89ab
+    load_unaligned_test WILL_TRAP, l16ui, .short, 0x00001234, 0x00001234
+    load_unaligned_test WILL_TRAP, l16ui, .short, 0x000089ab, 0x000089ab
+    load_unaligned_test WILL_TRAP, l32i,  .word,  0x12345678, 0x12345678
+#if XCHAL_HAVE_RELEASE_SYNC
+    load_unaligned_test 1,         l32ai, .word,  0x12345678, 0x12345678
+#endif
+test_end
+
+test store_ok
+    store_ok_test s16i,  .short, 0x00001234
+    store_ok_test s32i,  .word,  0x12345678
+#if XCHAL_HAVE_RELEASE_SYNC
+    store_ok_test s32ri, .word,  0x12345678
+#endif
+test_end
+
+#undef WILL_TRAP
+#if XCHAL_UNALIGNED_STORE_HW
+#define WILL_TRAP 0
+#else
+#define WILL_TRAP 1
+#endif
+
+test store_unaligned
+    store_unaligned_test WILL_TRAP, s16i,  l16ui, .short, 0x00001234
+    store_unaligned_test WILL_TRAP, s32i,  l32i,  .word,  0x12345678
+#if XCHAL_HAVE_RELEASE_SYNC
+    store_unaligned_test 1,         s32ri, l32i,  .word,  0x12345678
+#endif
+test_end
+
+test_suite_end
diff --git a/tests/unit/check-qom-proplist.c b/tests/unit/check-qom-proplist.c
index 48503e0dffc..ed341088d35 100644
--- a/tests/unit/check-qom-proplist.c
+++ b/tests/unit/check-qom-proplist.c
@@ -488,7 +488,7 @@ static void test_dummy_badenum(void)
     g_assert(dobj == NULL);
     g_assert(err != NULL);
     g_assert_cmpstr(error_get_pretty(err), ==,
-                    "Invalid parameter 'yeti'");
+                    "Parameter 'av' does not accept value 'yeti'");
 
     g_assert(object_resolve_path_component(parent, "dummy0")
              == NULL);
diff --git a/tests/unit/crypto-tls-psk-helpers.c b/tests/unit/crypto-tls-psk-helpers.c
index a8395477c3d..7f8a4889610 100644
--- a/tests/unit/crypto-tls-psk-helpers.c
+++ b/tests/unit/crypto-tls-psk-helpers.c
@@ -20,14 +20,10 @@
 
 #include "qemu/osdep.h"
 
-/* Include this first because it defines QCRYPTO_HAVE_TLS_TEST_SUPPORT */
 #include "crypto-tls-x509-helpers.h"
-
 #include "crypto-tls-psk-helpers.h"
 #include "qemu/sockets.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 void test_tls_psk_init(const char *pskfile)
 {
     FILE *fp;
@@ -46,5 +42,3 @@ void test_tls_psk_cleanup(const char *pskfile)
 {
     unlink(pskfile);
 }
-
-#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/crypto-tls-psk-helpers.h b/tests/unit/crypto-tls-psk-helpers.h
index 5aa9951cb6f..faa645c6294 100644
--- a/tests/unit/crypto-tls-psk-helpers.h
+++ b/tests/unit/crypto-tls-psk-helpers.h
@@ -23,11 +23,7 @@
 
 #include <gnutls/gnutls.h>
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 void test_tls_psk_init(const char *keyfile);
 void test_tls_psk_cleanup(const char *keyfile);
 
-#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
-
 #endif
diff --git a/tests/unit/crypto-tls-x509-helpers.c b/tests/unit/crypto-tls-x509-helpers.c
index 97658592a2f..fc609b3fd4e 100644
--- a/tests/unit/crypto-tls-x509-helpers.c
+++ b/tests/unit/crypto-tls-x509-helpers.c
@@ -24,8 +24,6 @@
 #include "crypto/init.h"
 #include "qemu/sockets.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 /*
  * This stores some static data that is needed when
  * encoding extensions in the x509 certs
@@ -504,5 +502,3 @@ void test_tls_discard_cert(QCryptoTLSTestCertReq *req)
         unlink(req->filename);
     }
 }
-
-#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/crypto-tls-x509-helpers.h b/tests/unit/crypto-tls-x509-helpers.h
index 8fcd7785ab0..cf6329e6534 100644
--- a/tests/unit/crypto-tls-x509-helpers.h
+++ b/tests/unit/crypto-tls-x509-helpers.h
@@ -23,14 +23,7 @@
 
 #include <gnutls/gnutls.h>
 #include <gnutls/x509.h>
-
-#if !(defined WIN32) && \
-    defined(CONFIG_TASN1)
-# define QCRYPTO_HAVE_TLS_TEST_SUPPORT
-#endif
-
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-# include <libtasn1.h>
+#include <libtasn1.h>
 
 
 /*
@@ -127,6 +120,4 @@ void test_tls_cleanup(const char *keyfile);
 
 extern const asn1_static_node pkix_asn1_tab[];
 
-#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
-
 #endif
diff --git a/tests/unit/iothread.c b/tests/unit/iothread.c
index afde12b4efb..f9b0791084e 100644
--- a/tests/unit/iothread.c
+++ b/tests/unit/iothread.c
@@ -30,13 +30,6 @@ struct IOThread {
     bool stopping;
 };
 
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
 static void iothread_init_gcontext(IOThread *iothread)
 {
     GSource *source;
@@ -54,9 +47,9 @@ static void *iothread_run(void *opaque)
 
     rcu_register_thread();
 
-    my_iothread = iothread;
     qemu_mutex_lock(&iothread->init_done_lock);
     iothread->ctx = aio_context_new(&error_abort);
+    qemu_set_current_aio_context(iothread->ctx);
 
     /*
      * We must connect the ctx to a GMainContext, because in older versions
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index b3bc2109da0..acac3622edc 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -14,6 +14,7 @@ tests = {
   'test-qobject-output-visitor': [testqapi],
   'test-clone-visitor': [testqapi],
   'test-qobject-input-visitor': [testqapi],
+  'test-forward-visitor': [testqapi],
   'test-string-input-visitor': [testqapi],
   'test-string-output-visitor': [testqapi],
   'test-opts-visitor': [testqapi],
@@ -22,6 +23,7 @@ tests = {
   # all code tested by test-x86-cpuid is inside topology.h
   'test-x86-cpuid': [],
   'test-cutils': [],
+  'test-div128': [],
   'test-shift128': [],
   'test-mul64': [],
   # all code tested by test-int128 is inside int128.h
@@ -42,8 +44,9 @@ tests = {
   'test-keyval': [testqapi],
   'test-logging': [],
   'test-uuid': [],
-  'ptimer-test': ['ptimer-test-stubs.c', meson.source_root() / 'hw/core/ptimer.c'],
+  'ptimer-test': ['ptimer-test-stubs.c', meson.project_source_root() / 'hw/core/ptimer.c'],
   'test-qapi-util': [],
+  'test-smp-parse': [qom, meson.project_source_root() / 'hw/core/machine-smp.c'],
 }
 
 if have_system or have_tools
@@ -83,8 +86,8 @@ if have_block
     'test-crypto-afsplit': [io],
     'test-crypto-block': [io],
   }
-  if 'CONFIG_GNUTLS' in config_host and \
-     'CONFIG_TASN1' in config_host and \
+  if gnutls.found() and \
+     tasn1.found() and \
      'CONFIG_POSIX' in config_host
     tests += {
       'test-crypto-tlscredsx509': ['crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
@@ -94,10 +97,10 @@ if have_block
       'test-io-channel-tls': ['io-channel-helpers.c', 'crypto-tls-x509-helpers.c', 'pkix_asn1_tab.c',
                               tasn1, io, crypto, gnutls]}
   endif
-  if 'CONFIG_AUTH_PAM' in config_host
+  if pam.found()
     tests += {'test-authz-pam': [authz]}
   endif
-  if 'CONFIG_QEMU_PRIVATE_XTS' in config_host
+  if xts == 'private'
     tests += {'test-crypto-xts': [crypto, io]}
   endif
   if 'CONFIG_POSIX' in config_host
@@ -106,7 +109,7 @@ if have_block
   if 'CONFIG_REPLICATION' in config_host
     tests += {'test-replication': [testblock]}
   endif
-  if 'CONFIG_NETTLE' in config_host or 'CONFIG_GCRYPT' in config_host
+  if nettle.found() or gcrypt.found()
     tests += {'test-crypto-pbkdf': [io]}
   endif
   if 'CONFIG_EPOLL_CREATE1' in config_host
diff --git a/tests/unit/pkix_asn1_tab.c b/tests/unit/pkix_asn1_tab.c
index 15397cf77a2..89521408a1d 100644
--- a/tests/unit/pkix_asn1_tab.c
+++ b/tests/unit/pkix_asn1_tab.c
@@ -6,8 +6,6 @@
 #include "qemu/osdep.h"
 #include "crypto-tls-x509-helpers.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 const asn1_static_node pkix_asn1_tab[] = {
   {"PKIX1", 536875024, 0},
   {0, 1073741836, 0},
@@ -1105,4 +1103,3 @@ const asn1_static_node pkix_asn1_tab[] = {
   {0, 1048586, "2"},
   {0, 0, 0}
 };
-#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
index 7f801a4d09e..2a3ef587991 100644
--- a/tests/unit/ptimer-test-stubs.c
+++ b/tests/unit/ptimer-test-stubs.c
@@ -108,7 +108,7 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
     return deadline;
 }
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
 {
     QEMUBH *bh = g_new(QEMUBH, 1);
 
diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c
index de6f649058e..495a4e6f420 100644
--- a/tests/unit/rcutorture.c
+++ b/tests/unit/rcutorture.c
@@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg)
 
     rcu_register_thread();
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     qatomic_inc(&nthreadsrunning);
     while (goflag == GOFLAG_INIT) {
         g_usleep(1000);
@@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg)
 
     rcu_register_thread();
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     qatomic_inc(&nthreadsrunning);
     while (goflag == GOFLAG_INIT) {
         g_usleep(1000);
@@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg)
 
     rcu_register_thread();
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     while (goflag == GOFLAG_INIT) {
         g_usleep(1000);
     }
@@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg)
     struct rcu_stress *cp = qatomic_read(&rcu_stress_current);
 
     rcu_register_thread();
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
 
     while (goflag == GOFLAG_INIT) {
         g_usleep(1000);
@@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg)
 {
     rcu_register_thread();
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     while (goflag == GOFLAG_INIT) {
         g_usleep(1000);
     }
diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c
index 8a460784634..6feeb9a4a9f 100644
--- a/tests/unit/test-aio.c
+++ b/tests/unit/test-aio.c
@@ -877,6 +877,42 @@ static void test_queue_chaining(void)
     g_assert_cmpint(data_b.i, ==, data_b.max);
 }
 
+static void co_check_current_thread(void *opaque)
+{
+    QemuThread *main_thread = opaque;
+    assert(qemu_thread_is_self(main_thread));
+}
+
+static void *test_aio_co_enter(void *co)
+{
+    /*
+     * qemu_get_current_aio_context() should not to be the main thread
+     * AioContext, because this is a worker thread that has not taken
+     * the BQL.  So aio_co_enter will schedule the coroutine in the
+     * main thread AioContext.
+     */
+    aio_co_enter(qemu_get_aio_context(), co);
+    return NULL;
+}
+
+static void test_worker_thread_co_enter(void)
+{
+    QemuThread this_thread, worker_thread;
+    Coroutine *co;
+
+    qemu_thread_get_self(&this_thread);
+    co = qemu_coroutine_create(co_check_current_thread, &this_thread);
+
+    qemu_thread_create(&worker_thread, "test_acquire_thread",
+                       test_aio_co_enter,
+                       co, QEMU_THREAD_JOINABLE);
+
+    /* Test aio_co_enter from a worker thread.  */
+    qemu_thread_join(&worker_thread);
+    g_assert(aio_poll(ctx, true));
+    g_assert(!aio_poll(ctx, false));
+}
+
 /* End of tests.  */
 
 int main(int argc, char **argv)
@@ -903,6 +939,7 @@ int main(int argc, char **argv)
     g_test_add_func("/aio/timer/schedule",          test_timer_schedule);
 
     g_test_add_func("/aio/coroutine/queue-chaining", test_queue_chaining);
+    g_test_add_func("/aio/coroutine/worker-thread-co-enter", test_worker_thread_co_enter);
 
     g_test_add_func("/aio-gsource/flush",                   test_source_flush);
     g_test_add_func("/aio-gsource/bh/schedule",             test_source_bh_schedule);
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 8a29e33e004..2d3c17e566f 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -65,8 +65,9 @@ static void co_reenter_bh(void *opaque)
 }
 
 static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
-                                            uint64_t offset, uint64_t bytes,
-                                            QEMUIOVector *qiov, int flags)
+                                            int64_t offset, int64_t bytes,
+                                            QEMUIOVector *qiov,
+                                            BdrvRequestFlags flags)
 {
     BDRVTestState *s = bs->opaque;
 
@@ -95,6 +96,7 @@ static int bdrv_test_change_backing_file(BlockDriverState *bs,
 static BlockDriver bdrv_test = {
     .format_name            = "test",
     .instance_size          = sizeof(BDRVTestState),
+    .supports_backing       = true,
 
     .bdrv_close             = bdrv_test_close,
     .bdrv_co_preadv         = bdrv_test_co_preadv,
@@ -1105,8 +1107,9 @@ static void bdrv_test_top_close(BlockDriverState *bs)
 }
 
 static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
-                                                uint64_t offset, uint64_t bytes,
-                                                QEMUIOVector *qiov, int flags)
+                                                int64_t offset, int64_t bytes,
+                                                QEMUIOVector *qiov,
+                                                BdrvRequestFlags flags)
 {
     BDRVTestTopState *tts = bs->opaque;
     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
@@ -1478,7 +1481,6 @@ static void test_append_to_drained(void)
     g_assert_cmpint(base_s->drain_count, ==, 1);
     g_assert_cmpint(base->in_flight, ==, 0);
 
-    /* Takes ownership of overlay, so we don't have to unref it later */
     bdrv_append(overlay, base, &error_abort);
     g_assert_cmpint(base->in_flight, ==, 0);
     g_assert_cmpint(overlay->in_flight, ==, 0);
@@ -1495,6 +1497,7 @@ static void test_append_to_drained(void)
     g_assert_cmpint(overlay->quiesce_counter, ==, 0);
     g_assert_cmpint(overlay_s->drain_count, ==, 0);
 
+    bdrv_unref(overlay);
     bdrv_unref(base);
     blk_unref(blk);
 }
@@ -1854,10 +1857,10 @@ static void bdrv_replace_test_close(BlockDriverState *bs)
  *   Set .has_read to true and return success.
  */
 static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
-                                                    uint64_t offset,
-                                                    uint64_t bytes,
+                                                    int64_t offset,
+                                                    int64_t bytes,
                                                     QEMUIOVector *qiov,
-                                                    int flags)
+                                                    BdrvRequestFlags flags)
 {
     BDRVReplaceTestState *s = bs->opaque;
 
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index c4f7d160392..a6e3bb79be2 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -1,7 +1,7 @@
 /*
  * Block node graph modifications tests
  *
- * Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
+ * Copyright (c) 2019-2021 Virtuozzo International GmbH. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -41,9 +41,25 @@ static void no_perm_default_perms(BlockDriverState *bs, BdrvChild *c,
 
 static BlockDriver bdrv_no_perm = {
     .format_name = "no-perm",
+    .supports_backing = true,
     .bdrv_child_perm = no_perm_default_perms,
 };
 
+static void exclusive_write_perms(BlockDriverState *bs, BdrvChild *c,
+                                  BdrvChildRole role,
+                                  BlockReopenQueue *reopen_queue,
+                                  uint64_t perm, uint64_t shared,
+                                  uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = BLK_PERM_WRITE;
+    *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
+}
+
+static BlockDriver bdrv_exclusive_writer = {
+    .format_name = "exclusive-writer",
+    .bdrv_child_perm = exclusive_write_perms,
+};
+
 static BlockDriverState *no_perm_node(const char *name)
 {
     return bdrv_new_open_driver(&bdrv_no_perm, name, BDRV_O_RDWR, &error_abort);
@@ -55,6 +71,12 @@ static BlockDriverState *pass_through_node(const char *name)
                                 BDRV_O_RDWR, &error_abort);
 }
 
+static BlockDriverState *exclusive_writer_node(const char *name)
+{
+    return bdrv_new_open_driver(&bdrv_exclusive_writer, name,
+                                BDRV_O_RDWR, &error_abort);
+}
+
 /*
  * test_update_perm_tree
  *
@@ -117,6 +139,7 @@ static void test_update_perm_tree(void)
     ret = bdrv_append(filter, bs, NULL);
     g_assert_cmpint(ret, <, 0);
 
+    bdrv_unref(filter);
     blk_unref(root);
 }
 
@@ -181,10 +204,189 @@ static void test_should_update_child(void)
     bdrv_append(filter, bs, &error_abort);
     g_assert(target->backing->bs == bs);
 
+    bdrv_unref(filter);
     bdrv_unref(bs);
     blk_unref(root);
 }
 
+/*
+ * test_parallel_exclusive_write
+ *
+ * Check that when we replace node, old permissions of the node being removed
+ * doesn't break the replacement.
+ */
+static void test_parallel_exclusive_write(void)
+{
+    BlockDriverState *top = exclusive_writer_node("top");
+    BlockDriverState *base = no_perm_node("base");
+    BlockDriverState *fl1 = pass_through_node("fl1");
+    BlockDriverState *fl2 = pass_through_node("fl2");
+
+    /*
+     * bdrv_attach_child() eats child bs reference, so we need two @base
+     * references for two filters:
+     */
+    bdrv_ref(base);
+
+    bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA,
+                      &error_abort);
+    bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+                      &error_abort);
+    bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+                      &error_abort);
+
+    bdrv_replace_node(fl1, fl2, &error_abort);
+
+    bdrv_unref(fl2);
+    bdrv_unref(top);
+}
+
+static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,
+                                     BdrvChildRole role,
+                                     BlockReopenQueue *reopen_queue,
+                                     uint64_t perm, uint64_t shared,
+                                     uint64_t *nperm, uint64_t *nshared)
+{
+    if (bs->file && c == bs->file) {
+        *nperm = BLK_PERM_WRITE;
+        *nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
+    } else {
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
+static BlockDriver bdrv_write_to_file = {
+    .format_name = "tricky-perm",
+    .bdrv_child_perm = write_to_file_perms,
+};
+
+
+/*
+ * The following test shows that topological-sort order is required for
+ * permission update, simple DFS is not enough.
+ *
+ * Consider the block driver which has two filter children: one active
+ * with exclusive write access and one inactive with no specific
+ * permissions.
+ *
+ * And, these two children has a common base child, like this:
+ *
+ * ┌─────┐     ┌──────┐
+ * │ fl2 │ ◀── │ top  │
+ * └─────┘     └──────┘
+ *   │           │
+ *   │           │ w
+ *   │           ▼
+ *   │         ┌──────┐
+ *   │         │ fl1  │
+ *   │         └──────┘
+ *   │           │
+ *   │           │ w
+ *   │           ▼
+ *   │         ┌──────┐
+ *   └───────▶ │ base │
+ *             └──────┘
+ *
+ * So, exclusive write is propagated.
+ *
+ * Assume, we want to make fl2 active instead of fl1.
+ * So, we set some option for top driver and do permission update.
+ *
+ * With simple DFS, if permission update goes first through
+ * top->fl1->base branch it will succeed: it firstly drop exclusive write
+ * permissions and than apply them for another BdrvChildren.
+ * But if permission update goes first through top->fl2->base branch it
+ * will fail, as when we try to update fl2->base child, old not yet
+ * updated fl1->base child will be in conflict.
+ *
+ * With topological-sort order we always update parents before children, so fl1
+ * and fl2 are both updated when we update base and there is no conflict.
+ */
+static void test_parallel_perm_update(void)
+{
+    BlockDriverState *top = no_perm_node("top");
+    BlockDriverState *tricky =
+            bdrv_new_open_driver(&bdrv_write_to_file, "tricky", BDRV_O_RDWR,
+                                 &error_abort);
+    BlockDriverState *base = no_perm_node("base");
+    BlockDriverState *fl1 = pass_through_node("fl1");
+    BlockDriverState *fl2 = pass_through_node("fl2");
+    BdrvChild *c_fl1, *c_fl2;
+
+    /*
+     * bdrv_attach_child() eats child bs reference, so we need two @base
+     * references for two filters:
+     */
+    bdrv_ref(base);
+
+    bdrv_attach_child(top, tricky, "file", &child_of_bds, BDRV_CHILD_DATA,
+                      &error_abort);
+    c_fl1 = bdrv_attach_child(tricky, fl1, "first", &child_of_bds,
+                              BDRV_CHILD_FILTERED, &error_abort);
+    c_fl2 = bdrv_attach_child(tricky, fl2, "second", &child_of_bds,
+                              BDRV_CHILD_FILTERED, &error_abort);
+    bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+                      &error_abort);
+    bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
+                      &error_abort);
+
+    /* Select fl1 as first child to be active */
+    tricky->file = c_fl1;
+    bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+    assert(c_fl1->perm & BLK_PERM_WRITE);
+    assert(!(c_fl2->perm & BLK_PERM_WRITE));
+
+    /* Now, try to switch active child and update permissions */
+    tricky->file = c_fl2;
+    bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+    assert(c_fl2->perm & BLK_PERM_WRITE);
+    assert(!(c_fl1->perm & BLK_PERM_WRITE));
+
+    /* Switch once more, to not care about real child order in the list */
+    tricky->file = c_fl1;
+    bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
+
+    assert(c_fl1->perm & BLK_PERM_WRITE);
+    assert(!(c_fl2->perm & BLK_PERM_WRITE));
+
+    bdrv_unref(top);
+}
+
+/*
+ * It's possible that filter required permissions allows to insert it to backing
+ * chain, like:
+ *
+ *  1.  [top] -> [filter] -> [base]
+ *
+ * but doesn't allow to add it as a branch:
+ *
+ *  2.  [filter] --\
+ *                 v
+ *      [top] -> [base]
+ *
+ * So, inserting such filter should do all graph modifications and only then
+ * update permissions. If we try to go through intermediate state [2] and update
+ * permissions on it we'll fail.
+ *
+ * Let's check that bdrv_append() can append such a filter.
+ */
+static void test_append_greedy_filter(void)
+{
+    BlockDriverState *top = exclusive_writer_node("top");
+    BlockDriverState *base = no_perm_node("base");
+    BlockDriverState *fl = exclusive_writer_node("fl1");
+
+    bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW,
+                      &error_abort);
+
+    bdrv_append(fl, base, &error_abort);
+    bdrv_unref(fl);
+    bdrv_unref(top);
+}
+
 int main(int argc, char *argv[])
 {
     bdrv_init();
@@ -195,6 +397,12 @@ int main(int argc, char *argv[])
     g_test_add_func("/bdrv-graph-mod/update-perm-tree", test_update_perm_tree);
     g_test_add_func("/bdrv-graph-mod/should-update-child",
                     test_should_update_child);
+    g_test_add_func("/bdrv-graph-mod/parallel-perm-update",
+                    test_parallel_perm_update);
+    g_test_add_func("/bdrv-graph-mod/parallel-exclusive-write",
+                    test_parallel_exclusive_write);
+    g_test_add_func("/bdrv-graph-mod/append-greedy-filter",
+                    test_append_greedy_filter);
 
     return g_test_run();
 }
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index 8cf172cb7a7..aea660aeed8 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -31,15 +31,24 @@
 #include "qemu/main-loop.h"
 #include "iothread.h"
 
-static int coroutine_fn bdrv_test_co_prwv(BlockDriverState *bs,
-                                          uint64_t offset, uint64_t bytes,
-                                          QEMUIOVector *qiov, int flags)
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
+                                            int64_t offset, int64_t bytes,
+                                            QEMUIOVector *qiov,
+                                            BdrvRequestFlags flags)
+{
+    return 0;
+}
+
+static int coroutine_fn bdrv_test_co_pwritev(BlockDriverState *bs,
+                                             int64_t offset, int64_t bytes,
+                                             QEMUIOVector *qiov,
+                                             BdrvRequestFlags flags)
 {
     return 0;
 }
 
 static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int bytes)
+                                              int64_t offset, int64_t bytes)
 {
     return 0;
 }
@@ -66,8 +75,8 @@ static BlockDriver bdrv_test = {
     .format_name            = "test",
     .instance_size          = 1,
 
-    .bdrv_co_preadv         = bdrv_test_co_prwv,
-    .bdrv_co_pwritev        = bdrv_test_co_prwv,
+    .bdrv_co_preadv         = bdrv_test_co_preadv,
+    .bdrv_co_pwritev        = bdrv_test_co_pwritev,
     .bdrv_co_pdiscard       = bdrv_test_co_pdiscard,
     .bdrv_co_truncate       = bdrv_test_co_truncate,
     .bdrv_co_block_status   = bdrv_test_co_block_status,
@@ -194,13 +203,11 @@ static void test_sync_op_truncate(BdrvChild *c)
     g_assert_cmpint(ret, ==, -EINVAL);
 
     /* Error: Read-only image */
-    c->bs->read_only = true;
     c->bs->open_flags &= ~BDRV_O_RDWR;
 
     ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
     g_assert_cmpint(ret, ==, -EACCES);
 
-    c->bs->read_only = false;
     c->bs->open_flags |= BDRV_O_RDWR;
 }
 
@@ -236,13 +243,11 @@ static void test_sync_op_flush(BdrvChild *c)
     g_assert_cmpint(ret, ==, 0);
 
     /* Early success: Read-only image */
-    c->bs->read_only = true;
     c->bs->open_flags &= ~BDRV_O_RDWR;
 
     ret = bdrv_flush(c->bs);
     g_assert_cmpint(ret, ==, 0);
 
-    c->bs->read_only = false;
     c->bs->open_flags |= BDRV_O_RDWR;
 }
 
@@ -256,13 +261,11 @@ static void test_sync_op_blk_flush(BlockBackend *blk)
     g_assert_cmpint(ret, ==, 0);
 
     /* Early success: Read-only image */
-    bs->read_only = true;
     bs->open_flags &= ~BDRV_O_RDWR;
 
     ret = blk_flush(blk);
     g_assert_cmpint(ret, ==, 0);
 
-    bs->read_only = false;
     bs->open_flags |= BDRV_O_RDWR;
 }
 
diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c
index dcacfa6c7ce..4c9e1bf1e54 100644
--- a/tests/unit/test-blockjob.c
+++ b/tests/unit/test-blockjob.c
@@ -230,7 +230,7 @@ static void cancel_common(CancelJob *s)
     ctx = job->job.aio_context;
     aio_context_acquire(ctx);
 
-    job_cancel_sync(&job->job);
+    job_cancel_sync(&job->job, true);
     if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) {
         Job *dummy = &job->job;
         job_dismiss(&dummy, &error_abort);
diff --git a/tests/unit/test-clone-visitor.c b/tests/unit/test-clone-visitor.c
index 4944b3d8573..5d48e125b8e 100644
--- a/tests/unit/test-clone-visitor.c
+++ b/tests/unit/test-clone-visitor.c
@@ -63,7 +63,7 @@ static void test_clone_alternate(void)
     qapi_free_AltEnumBool(s_dst);
 }
 
-static void test_clone_list_union(void)
+static void test_clone_list(void)
 {
     uint8List *src = NULL, *dst;
     uint8List *tmp = NULL;
@@ -99,18 +99,26 @@ static void test_clone_empty(void)
 
 static void test_clone_complex1(void)
 {
-    UserDefListUnion *src, *dst;
+    UserDefFlatUnion *src, *dst;
 
-    src = g_new0(UserDefListUnion, 1);
-    src->type = USER_DEF_LIST_UNION_KIND_STRING;
+    src = g_new0(UserDefFlatUnion, 1);
+    src->integer = 123;
+    src->string = g_strdup("abc");
+    src->enum1 = ENUM_ONE_VALUE1;
+    src->u.value1.boolean = true;
 
-    dst = QAPI_CLONE(UserDefListUnion, src);
+    dst = QAPI_CLONE(UserDefFlatUnion, src);
     g_assert(dst);
-    g_assert_cmpint(dst->type, ==, src->type);
-    g_assert(!dst->u.string.data);
 
-    qapi_free_UserDefListUnion(src);
-    qapi_free_UserDefListUnion(dst);
+    g_assert_cmpint(dst->integer, ==, 123);
+    g_assert_cmpstr(dst->string, ==, "abc");
+    g_assert_cmpint(dst->enum1, ==, ENUM_ONE_VALUE1);
+    g_assert(dst->u.value1.boolean);
+    g_assert(!dst->u.value1.has_a_b);
+    g_assert_cmpint(dst->u.value1.a_b, ==, 0);
+
+    qapi_free_UserDefFlatUnion(src);
+    qapi_free_UserDefFlatUnion(dst);
 }
 
 static void test_clone_complex2(void)
@@ -145,42 +153,48 @@ static void test_clone_complex2(void)
 
 static void test_clone_complex3(void)
 {
-    __org_qemu_x_Struct2 *src, *dst;
-    __org_qemu_x_Union1List *tmp;
-
-    src = g_new0(__org_qemu_x_Struct2, 1);
-    tmp = src->array = g_new0(__org_qemu_x_Union1List, 1);
-    tmp->value = g_new0(__org_qemu_x_Union1, 1);
-    tmp->value->type = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH;
-    tmp->value->u.__org_qemu_x_branch.data = g_strdup("one");
-    tmp = tmp->next = g_new0(__org_qemu_x_Union1List, 1);
-    tmp->value = g_new0(__org_qemu_x_Union1, 1);
-    tmp->value->type = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH;
-    tmp->value->u.__org_qemu_x_branch.data = g_strdup("two");
-    tmp = tmp->next = g_new0(__org_qemu_x_Union1List, 1);
-    tmp->value = g_new0(__org_qemu_x_Union1, 1);
-    tmp->value->type = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH;
-    tmp->value->u.__org_qemu_x_branch.data = g_strdup("three");
-
-    dst = QAPI_CLONE(__org_qemu_x_Struct2, src);
-    g_assert(dst);
-    tmp = dst->array;
-    g_assert(tmp);
-    g_assert(tmp->value);
-    g_assert_cmpstr(tmp->value->u.__org_qemu_x_branch.data, ==, "one");
-    tmp = tmp->next;
-    g_assert(tmp);
-    g_assert(tmp->value);
-    g_assert_cmpstr(tmp->value->u.__org_qemu_x_branch.data, ==, "two");
-    tmp = tmp->next;
-    g_assert(tmp);
-    g_assert(tmp->value);
-    g_assert_cmpstr(tmp->value->u.__org_qemu_x_branch.data, ==, "three");
-    tmp = tmp->next;
-    g_assert(!tmp);
+    UserDefOneList *src, *dst, *tail;
+    UserDefOne *elt;
+
+    src = NULL;
+    elt = g_new0(UserDefOne, 1);
+    elt->integer = 3;
+    elt->string = g_strdup("three");
+    elt->has_enum1 = true;
+    elt->enum1 = ENUM_ONE_VALUE3;
+    QAPI_LIST_PREPEND(src, elt);
+    elt = g_new0(UserDefOne, 1);
+    elt->integer = 2;
+    elt->string = g_strdup("two");
+    QAPI_LIST_PREPEND(src, elt);
+    elt = g_new0(UserDefOne, 1);
+    elt->integer = 1;
+    elt->string = g_strdup("one");
+    QAPI_LIST_PREPEND(src, elt);
+
+    dst = QAPI_CLONE(UserDefOneList, src);
 
-    qapi_free___org_qemu_x_Struct2(src);
-    qapi_free___org_qemu_x_Struct2(dst);
+    g_assert(dst);
+    tail = dst;
+    elt = tail->value;
+    g_assert_cmpint(elt->integer, ==, 1);
+    g_assert_cmpstr(elt->string, ==, "one");
+    g_assert(!elt->has_enum1);
+    tail = tail->next;
+    elt = tail->value;
+    g_assert_cmpint(elt->integer, ==, 2);
+    g_assert_cmpstr(elt->string, ==, "two");
+    g_assert(!elt->has_enum1);
+    tail = tail->next;
+    elt = tail->value;
+    g_assert_cmpint(elt->integer, ==, 3);
+    g_assert_cmpstr(elt->string, ==, "three");
+    g_assert(elt->has_enum1);
+    g_assert_cmpint(elt->enum1, ==, ENUM_ONE_VALUE3);
+    g_assert(!tail->next);
+
+    qapi_free_UserDefOneList(src);
+    qapi_free_UserDefOneList(dst);
 }
 
 int main(int argc, char **argv)
@@ -189,7 +203,7 @@ int main(int argc, char **argv)
 
     g_test_add_func("/visitor/clone/struct", test_clone_struct);
     g_test_add_func("/visitor/clone/alternate", test_clone_alternate);
-    g_test_add_func("/visitor/clone/list_union", test_clone_list_union);
+    g_test_add_func("/visitor/clone/list", test_clone_list);
     g_test_add_func("/visitor/clone/empty", test_clone_empty);
     g_test_add_func("/visitor/clone/complex1", test_clone_complex1);
     g_test_add_func("/visitor/clone/complex2", test_clone_complex2);
diff --git a/tests/unit/test-crypto-cipher.c b/tests/unit/test-crypto-cipher.c
index 280319a2236..d9d9d078ff1 100644
--- a/tests/unit/test-crypto-cipher.c
+++ b/tests/unit/test-crypto-cipher.c
@@ -150,10 +150,33 @@ static QCryptoCipherTestData test_data[] = {
             "b2eb05e2c39be9fcda6c19078c6a9d1b",
     },
     {
-        .path = "/crypto/cipher/des-rfb-ecb-56",
-        .alg = QCRYPTO_CIPHER_ALG_DES_RFB,
+        /*
+         * Testing 'password' as plaintext fits
+         * in single AES block, and gives identical
+         * ciphertext in ECB and CBC modes
+         */
+        .path = "/crypto/cipher/des-ecb-56-one-block",
+        .alg = QCRYPTO_CIPHER_ALG_DES,
+        .mode = QCRYPTO_CIPHER_MODE_ECB,
+        .key = "80c4a2e691d5b3f7",
+        .plaintext = "70617373776f7264",
+        .ciphertext = "73fa80b66134e403",
+    },
+    {
+        /* See previous comment */
+        .path = "/crypto/cipher/des-cbc-56-one-block",
+        .alg = QCRYPTO_CIPHER_ALG_DES,
+        .mode = QCRYPTO_CIPHER_MODE_CBC,
+        .key = "80c4a2e691d5b3f7",
+        .iv = "0000000000000000",
+        .plaintext = "70617373776f7264",
+        .ciphertext = "73fa80b66134e403",
+    },
+    {
+        .path = "/crypto/cipher/des-ecb-56",
+        .alg = QCRYPTO_CIPHER_ALG_DES,
         .mode = QCRYPTO_CIPHER_MODE_ECB,
-        .key = "0123456789abcdef",
+        .key = "80c4a2e691d5b3f7",
         .plaintext =
             "6bc1bee22e409f96e93d7e117393172a"
             "ae2d8a571e03ac9c9eb76fac45af8e51"
@@ -165,7 +188,6 @@ static QCryptoCipherTestData test_data[] = {
             "ffd29f1bb5596ad94ea2d8e6196b7f09"
             "30d8ed0bf2773af36dd82a6280c20926",
     },
-#if defined(CONFIG_NETTLE) || defined(CONFIG_GCRYPT)
     {
         /* Borrowed from linux-kernel crypto/testmgr.h */
         .path = "/crypto/cipher/3des-cbc",
@@ -283,7 +305,6 @@ static QCryptoCipherTestData test_data[] = {
             "407772c2ea0e3a7846b991b6e73d5142"
             "fd51b0c62c6313785ceefccfc4700034",
     },
-#endif
     {
         /* RFC 2144, Appendix B.1 */
         .path = "/crypto/cipher/cast5-128",
diff --git a/tests/unit/test-crypto-hash.c b/tests/unit/test-crypto-hash.c
index ce7d0ab9b57..1f4abb822b6 100644
--- a/tests/unit/test-crypto-hash.c
+++ b/tests/unit/test-crypto-hash.c
@@ -104,7 +104,7 @@ static void test_hash_alloc(void)
                                  strlen(INPUT_TEXT),
                                  &result,
                                  &resultlen,
-                                 NULL);
+                                 &error_fatal);
         g_assert(ret == 0);
         g_assert(resultlen == expected_lens[i]);
 
@@ -139,7 +139,7 @@ static void test_hash_prealloc(void)
                                  strlen(INPUT_TEXT),
                                  &result,
                                  &resultlen,
-                                 NULL);
+                                 &error_fatal);
         g_assert(ret == 0);
 
         g_assert(resultlen == expected_lens[i]);
@@ -176,7 +176,7 @@ static void test_hash_iov(void)
                                   iov, 3,
                                   &result,
                                   &resultlen,
-                                  NULL);
+                                  &error_fatal);
         g_assert(ret == 0);
         g_assert(resultlen == expected_lens[i]);
         for (j = 0; j < resultlen; j++) {
@@ -210,7 +210,7 @@ static void test_hash_digest(void)
                                   INPUT_TEXT,
                                   strlen(INPUT_TEXT),
                                   &digest,
-                                  NULL);
+                                  &error_fatal);
         g_assert(ret == 0);
         g_assert_cmpstr(digest, ==, expected_outputs[i]);
         g_free(digest);
@@ -234,7 +234,7 @@ static void test_hash_base64(void)
                                   INPUT_TEXT,
                                   strlen(INPUT_TEXT),
                                   &digest,
-                                  NULL);
+                                  &error_fatal);
         g_assert(ret == 0);
         g_assert_cmpstr(digest, ==, expected_outputs_b64[i]);
         g_free(digest);
@@ -243,7 +243,8 @@ static void test_hash_base64(void)
 
 int main(int argc, char **argv)
 {
-    g_assert(qcrypto_init(NULL) == 0);
+    int ret = qcrypto_init(&error_fatal);
+    g_assert(ret == 0);
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/crypto/hash/iov", test_hash_iov);
diff --git a/tests/unit/test-crypto-hmac.c b/tests/unit/test-crypto-hmac.c
index ee55382a3c4..23eb724d942 100644
--- a/tests/unit/test-crypto-hmac.c
+++ b/tests/unit/test-crypto-hmac.c
@@ -89,7 +89,6 @@ static void test_hmac_alloc(void)
         QCryptoHmac *hmac = NULL;
         uint8_t *result = NULL;
         size_t resultlen = 0;
-        Error *err = NULL;
         const char *exp_output = NULL;
         int ret;
         size_t j;
@@ -101,14 +100,12 @@ static void test_hmac_alloc(void)
         exp_output = data->hex_digest;
 
         hmac = qcrypto_hmac_new(data->alg, (const uint8_t *)KEY,
-                                strlen(KEY), &err);
-        g_assert(err == NULL);
+                                strlen(KEY), &error_fatal);
         g_assert(hmac != NULL);
 
         ret = qcrypto_hmac_bytes(hmac, (const char *)INPUT_TEXT,
                                  strlen(INPUT_TEXT), &result,
-                                 &resultlen, &err);
-        g_assert(err == NULL);
+                                 &resultlen, &error_fatal);
         g_assert(ret == 0);
 
         for (j = 0; j < resultlen; j++) {
@@ -131,7 +128,6 @@ static void test_hmac_prealloc(void)
         QCryptoHmac *hmac = NULL;
         uint8_t *result = NULL;
         size_t resultlen = 0;
-        Error *err = NULL;
         const char *exp_output = NULL;
         int ret;
         size_t j;
@@ -146,14 +142,12 @@ static void test_hmac_prealloc(void)
         result = g_new0(uint8_t, resultlen);
 
         hmac = qcrypto_hmac_new(data->alg, (const uint8_t *)KEY,
-                                strlen(KEY), &err);
-        g_assert(err == NULL);
+                                strlen(KEY), &error_fatal);
         g_assert(hmac != NULL);
 
         ret = qcrypto_hmac_bytes(hmac, (const char *)INPUT_TEXT,
                                  strlen(INPUT_TEXT), &result,
-                                 &resultlen, &err);
-        g_assert(err == NULL);
+                                 &resultlen, &error_fatal);
         g_assert(ret == 0);
 
         exp_output = data->hex_digest;
@@ -177,7 +171,6 @@ static void test_hmac_iov(void)
         QCryptoHmac *hmac = NULL;
         uint8_t *result = NULL;
         size_t resultlen = 0;
-        Error *err = NULL;
         const char *exp_output = NULL;
         int ret;
         size_t j;
@@ -194,13 +187,11 @@ static void test_hmac_iov(void)
         exp_output = data->hex_digest;
 
         hmac = qcrypto_hmac_new(data->alg, (const uint8_t *)KEY,
-                                strlen(KEY), &err);
-        g_assert(err == NULL);
+                                strlen(KEY), &error_fatal);
         g_assert(hmac != NULL);
 
         ret = qcrypto_hmac_bytesv(hmac, iov, 3, &result,
-                                  &resultlen, &err);
-        g_assert(err == NULL);
+                                  &resultlen, &error_fatal);
         g_assert(ret == 0);
 
         for (j = 0; j < resultlen; j++) {
@@ -222,7 +213,6 @@ static void test_hmac_digest(void)
         QCryptoHmacTestData *data = &test_data[i];
         QCryptoHmac *hmac = NULL;
         uint8_t *result = NULL;
-        Error *err = NULL;
         const char *exp_output = NULL;
         int ret;
 
@@ -233,14 +223,12 @@ static void test_hmac_digest(void)
         exp_output = data->hex_digest;
 
         hmac = qcrypto_hmac_new(data->alg, (const uint8_t *)KEY,
-                                strlen(KEY), &err);
-        g_assert(err == NULL);
+                                strlen(KEY), &error_fatal);
         g_assert(hmac != NULL);
 
         ret = qcrypto_hmac_digest(hmac, (const char *)INPUT_TEXT,
                                   strlen(INPUT_TEXT), (char **)&result,
-                                  &err);
-        g_assert(err == NULL);
+                                  &error_fatal);
         g_assert(ret == 0);
 
         g_assert_cmpstr((const char *)result, ==, exp_output);
diff --git a/tests/unit/test-crypto-ivgen.c b/tests/unit/test-crypto-ivgen.c
index f581e6aba7b..29630ed348a 100644
--- a/tests/unit/test-crypto-ivgen.c
+++ b/tests/unit/test-crypto-ivgen.c
@@ -136,8 +136,15 @@ struct QCryptoIVGenTestData {
 static void test_ivgen(const void *opaque)
 {
     const struct QCryptoIVGenTestData *data = opaque;
-    uint8_t *iv = g_new0(uint8_t, data->niv);
-    QCryptoIVGen *ivgen = qcrypto_ivgen_new(
+    g_autofree uint8_t *iv = g_new0(uint8_t, data->niv);
+    g_autoptr(QCryptoIVGen) ivgen = NULL;
+
+    if (!qcrypto_cipher_supports(data->cipheralg,
+                                 QCRYPTO_CIPHER_MODE_ECB)) {
+        return;
+    }
+
+    ivgen = qcrypto_ivgen_new(
         data->ivalg,
         data->cipheralg,
         data->hashalg,
@@ -152,9 +159,6 @@ static void test_ivgen(const void *opaque)
                             &error_abort);
 
     g_assert(memcmp(iv, data->iv, data->niv) == 0);
-
-    qcrypto_ivgen_free(ivgen);
-    g_free(iv);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/unit/test-crypto-pbkdf.c b/tests/unit/test-crypto-pbkdf.c
index c50fd639d28..43c417f6b43 100644
--- a/tests/unit/test-crypto-pbkdf.c
+++ b/tests/unit/test-crypto-pbkdf.c
@@ -229,10 +229,8 @@ static QCryptoPbkdfTestData test_data[] = {
     },
 
     /* non-RFC misc test data */
-#ifdef CONFIG_NETTLE
     {
-        /* empty password test.
-         * Broken with libgcrypt <= 1.5.0, hence CONFIG_NETTLE */
+        /* empty password test. */
         .path = "/crypto/pbkdf/nonrfc/sha1/iter2",
         .hash = QCRYPTO_HASH_ALG_SHA1,
         .iterations = 2,
@@ -244,7 +242,6 @@ static QCryptoPbkdfTestData test_data[] = {
                "\xbf\x03\xe1\x1c\x71\xca\x79\x4e\x07\x97",
         .nout = 20
     },
-#endif
     {
         /* Password exceeds block size test */
         .path = "/crypto/pbkdf/nonrfc/sha256/iter1200",
diff --git a/tests/unit/test-crypto-tlscredsx509.c b/tests/unit/test-crypto-tlscredsx509.c
index f487349c32d..aab4149b564 100644
--- a/tests/unit/test-crypto-tlscredsx509.c
+++ b/tests/unit/test-crypto-tlscredsx509.c
@@ -25,8 +25,6 @@
 #include "qapi/error.h"
 #include "qemu/module.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 #define WORKDIR "tests/test-crypto-tlscredsx509-work/"
 #define KEYFILE WORKDIR "key-ctx.pem"
 
@@ -706,13 +704,3 @@ int main(int argc, char **argv)
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-
-#else /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
-
-int
-main(void)
-{
-    return EXIT_SUCCESS;
-}
-
-#endif /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/test-crypto-tlssession.c b/tests/unit/test-crypto-tlssession.c
index 8b2453fa79b..5f0da9192c5 100644
--- a/tests/unit/test-crypto-tlssession.c
+++ b/tests/unit/test-crypto-tlssession.c
@@ -31,8 +31,6 @@
 #include "qemu/sockets.h"
 #include "authz/list.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 #define WORKDIR "tests/test-crypto-tlssession-work/"
 #define PSKFILE WORKDIR "keys.psk"
 #define KEYFILE WORKDIR "key-ctx.pem"
@@ -648,13 +646,3 @@ int main(int argc, char **argv)
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-
-#else /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
-
-int
-main(void)
-{
-    return EXIT_SUCCESS;
-}
-
-#endif /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
new file mode 100644
index 00000000000..0bc25fe4a8c
--- /dev/null
+++ b/tests/unit/test-div128.c
@@ -0,0 +1,197 @@
+/*
+ * Test 128-bit division functions
+ *
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+
+typedef struct {
+    uint64_t high;
+    uint64_t low;
+    uint64_t rhigh;
+    uint64_t rlow;
+    uint64_t divisor;
+    uint64_t remainder;
+} test_data_unsigned;
+
+typedef struct {
+    int64_t high;
+    uint64_t low;
+    int64_t rhigh;
+    uint64_t rlow;
+    int64_t divisor;
+    int64_t remainder;
+} test_data_signed;
+
+static const test_data_unsigned test_table_unsigned[] = {
+    /* Dividend fits in 64 bits */
+    { 0x0000000000000000ULL, 0x0000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000000ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0x0000000000000003ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x0000000000000002ULL, 0x0000000000000001ULL},
+    { 0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x0000000000000000ULL, 0xa000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000002ULL,
+      0x4000000000000000ULL, 0x2000000000000000ULL},
+    { 0x0000000000000000ULL, 0x8000000000000000ULL,
+      0x0000000000000000ULL, 0x0000000000000001ULL,
+      0x8000000000000000ULL, 0x0000000000000000ULL},
+
+    /* Dividend > 64 bits, with MSB 0 */
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0000000000000001ULL, 0x000000000000000dULL,
+      0x123456789abcdefeULL, 0x03456789abcdf03bULL},
+    { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
+      0x0123456789abcdefULL, 0xeefedcba98765432ULL,
+      0x0000000000000010ULL, 0x0000000000000001ULL},
+
+    /* Dividend > 64 bits, with MSB 1 */
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
+      0x0000000000000010ULL, 0x000000000000000fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
+      0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
+
+    /**
+     * Divisor == 64 bits, with MSB 1
+     * and high 64 bits of dividend >= divisor
+     * (for testing normalization)
+     */
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0x0000000000000000ULL,
+      0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
+    { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
+      0x0000000000000001ULL, 0xfddbb9977553310aULL,
+      0x8000000000000001ULL, 0x78899aabbccddf05ULL},
+
+    /* Dividend > 64 bits, divisor almost as big */
+    { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
+      0x0000000000000000ULL, 0x000000000000000fULL,
+      0x123456789abcdefeULL, 0x123456789abcde1fULL},
+};
+
+static const test_data_signed test_table_signed[] = {
+    /* Positive dividend, positive/negative divisors */
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000001LL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x00000000005e30a7ULL,
+      0x0000000000000002LL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
+      0xfffffffffffffffeLL, 0x0000000000000000LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x0000000000178c29ULL,
+      0x0000000000000008LL, 0x0000000000000006LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
+      0xfffffffffffffff8LL, 0x0000000000000006LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0x0000000000000000LL, 0x000000000000550dULL,
+      0x0000000000000237LL, 0x0000000000000183LL},
+    { 0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
+      0xfffffffffffffdc9LL, 0x0000000000000183LL},
+
+    /* Negative dividend, positive/negative divisors */
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000001LL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x0000000000bc614eULL,
+      0xffffffffffffffffLL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
+      0x0000000000000002LL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x00000000005e30a7ULL,
+      0xfffffffffffffffeLL, 0x0000000000000000LL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
+      0x0000000000000008LL, 0xfffffffffffffffaLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x0000000000178c29ULL,
+      0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
+      0x0000000000000237LL, 0xfffffffffffffe7dLL},
+    { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
+      0x0000000000000000LL, 0x000000000000550dULL,
+      0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
+};
+
+static void test_divu128(void)
+{
+    int i;
+    uint64_t rem;
+    test_data_unsigned tmp;
+
+    for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
+        tmp = test_table_unsigned[i];
+
+        rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
+        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
+        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
+        g_assert_cmpuint(rem, ==, tmp.remainder);
+    }
+}
+
+static void test_divs128(void)
+{
+    int i;
+    int64_t rem;
+    test_data_signed tmp;
+
+    for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
+        tmp = test_table_signed[i];
+
+        rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
+        g_assert_cmpuint(tmp.low, ==, tmp.rlow);
+        g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
+        g_assert_cmpuint(rem, ==, tmp.remainder);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/host-utils/test_divu128", test_divu128);
+    g_test_add_func("/host-utils/test_divs128", test_divs128);
+    return g_test_run();
+}
diff --git a/tests/unit/test-forward-visitor.c b/tests/unit/test-forward-visitor.c
new file mode 100644
index 00000000000..348f7e4e81c
--- /dev/null
+++ b/tests/unit/test-forward-visitor.c
@@ -0,0 +1,197 @@
+/*
+ * QAPI Forwarding Visitor unit-tests.
+ *
+ * Copyright (C) 2021 Red Hat Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu-common.h"
+#include "qapi/forward-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qdict.h"
+#include "test-qapi-visit.h"
+#include "qemu/option.h"
+
+typedef bool GenericVisitor (Visitor *, const char *, void **, Error **);
+#define CAST_VISIT_TYPE(fn) ((GenericVisitor *)(fn))
+
+/*
+ * Parse @srcstr and wrap it with a ForwardFieldVisitor converting "src" to
+ * "dst". Check that visiting the result with "src" name fails, and return
+ * the result of visiting "dst".
+ */
+static void *visit_with_forward(const char *srcstr, GenericVisitor *fn)
+{
+    bool help = false;
+    QDict *src = keyval_parse(srcstr, NULL, &help, &error_abort);
+    Visitor *v, *alias_v;
+    Error *err = NULL;
+    void *result = NULL;
+
+    v = qobject_input_visitor_new_keyval(QOBJECT(src));
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+
+    alias_v = visitor_forward_field(v, "dst", "src");
+    fn(alias_v, "src", &result, &err);
+    error_free_or_abort(&err);
+    assert(!result);
+    fn(alias_v, "dst", &result, &err);
+    assert(err == NULL);
+    visit_free(alias_v);
+
+    visit_end_struct(v, NULL);
+    visit_free(v);
+    qobject_unref(QOBJECT(src));
+    return result;
+}
+
+static void test_forward_any(void)
+{
+    QObject *src = visit_with_forward("src.integer=42,src.string=Hello,src.enum1=value2",
+                                      CAST_VISIT_TYPE(visit_type_any));
+    Visitor *v = qobject_input_visitor_new_keyval(src);
+    Error *err = NULL;
+    UserDefOne *dst;
+
+    visit_type_UserDefOne(v, NULL, &dst, &err);
+    assert(err == NULL);
+    visit_free(v);
+
+    g_assert_cmpint(dst->integer, ==, 42);
+    g_assert_cmpstr(dst->string, ==, "Hello");
+    g_assert_cmpint(dst->has_enum1, ==, true);
+    g_assert_cmpint(dst->enum1, ==, ENUM_ONE_VALUE2);
+    qapi_free_UserDefOne(dst);
+    qobject_unref(QOBJECT(src));
+}
+
+static void test_forward_size(void)
+{
+    /*
+     * visit_type_size does not return a pointer, so visit_with_forward
+     * cannot be used.
+     */
+    bool help = false;
+    QDict *src = keyval_parse("src=1.5M", NULL, &help, &error_abort);
+    Visitor *v, *alias_v;
+    Error *err = NULL;
+    uint64_t result = 0;
+
+    v = qobject_input_visitor_new_keyval(QOBJECT(src));
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+
+    alias_v = visitor_forward_field(v, "dst", "src");
+    visit_type_size(alias_v, "src", &result, &err);
+    error_free_or_abort(&err);
+    visit_type_size(alias_v, "dst", &result, &err);
+    assert(result == 3 << 19);
+    assert(err == NULL);
+    visit_free(alias_v);
+
+    visit_end_struct(v, NULL);
+    visit_free(v);
+    qobject_unref(QOBJECT(src));
+}
+
+static void test_forward_number(void)
+{
+    /*
+     * visit_type_number does not return a pointer, so visit_with_forward
+     * cannot be used.
+     */
+    bool help = false;
+    QDict *src = keyval_parse("src=1.5", NULL, &help, &error_abort);
+    Visitor *v, *alias_v;
+    Error *err = NULL;
+    double result = 0.0;
+
+    v = qobject_input_visitor_new_keyval(QOBJECT(src));
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+
+    alias_v = visitor_forward_field(v, "dst", "src");
+    visit_type_number(alias_v, "src", &result, &err);
+    error_free_or_abort(&err);
+    visit_type_number(alias_v, "dst", &result, &err);
+    assert(result == 1.5);
+    assert(err == NULL);
+    visit_free(alias_v);
+
+    visit_end_struct(v, NULL);
+    visit_free(v);
+    qobject_unref(QOBJECT(src));
+}
+
+static void test_forward_string(void)
+{
+    char *dst = visit_with_forward("src=Hello",
+                                   CAST_VISIT_TYPE(visit_type_str));
+
+    g_assert_cmpstr(dst, ==, "Hello");
+    g_free(dst);
+}
+
+static void test_forward_struct(void)
+{
+    UserDefOne *dst = visit_with_forward("src.integer=42,src.string=Hello",
+                                         CAST_VISIT_TYPE(visit_type_UserDefOne));
+
+    g_assert_cmpint(dst->integer, ==, 42);
+    g_assert_cmpstr(dst->string, ==, "Hello");
+    g_assert_cmpint(dst->has_enum1, ==, false);
+    qapi_free_UserDefOne(dst);
+}
+
+static void test_forward_alternate(void)
+{
+    AltStrObj *s_dst = visit_with_forward("src=hello",
+                                          CAST_VISIT_TYPE(visit_type_AltStrObj));
+    AltStrObj *o_dst = visit_with_forward("src.integer=42,src.boolean=true,src.string=world",
+                                          CAST_VISIT_TYPE(visit_type_AltStrObj));
+
+    g_assert_cmpint(s_dst->type, ==, QTYPE_QSTRING);
+    g_assert_cmpstr(s_dst->u.s, ==, "hello");
+    g_assert_cmpint(o_dst->type, ==, QTYPE_QDICT);
+    g_assert_cmpint(o_dst->u.o.integer, ==, 42);
+    g_assert_cmpint(o_dst->u.o.boolean, ==, true);
+    g_assert_cmpstr(o_dst->u.o.string, ==, "world");
+
+    qapi_free_AltStrObj(s_dst);
+    qapi_free_AltStrObj(o_dst);
+}
+
+static void test_forward_list(void)
+{
+    uint8List *dst = visit_with_forward("src.0=1,src.1=2,src.2=3,src.3=4",
+                                        CAST_VISIT_TYPE(visit_type_uint8List));
+    uint8List *tmp;
+    int i;
+
+    for (tmp = dst, i = 1; i <= 4; i++) {
+        g_assert(tmp);
+        g_assert_cmpint(tmp->value, ==, i);
+        tmp = tmp->next;
+    }
+    g_assert(!tmp);
+    qapi_free_uint8List(dst);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/visitor/forward/struct", test_forward_struct);
+    g_test_add_func("/visitor/forward/alternate", test_forward_alternate);
+    g_test_add_func("/visitor/forward/string", test_forward_string);
+    g_test_add_func("/visitor/forward/size", test_forward_size);
+    g_test_add_func("/visitor/forward/number", test_forward_number);
+    g_test_add_func("/visitor/forward/any", test_forward_any);
+    g_test_add_func("/visitor/forward/list", test_forward_list);
+
+    return g_test_run();
+}
diff --git a/tests/unit/test-io-channel-tls.c b/tests/unit/test-io-channel-tls.c
index ad7554c534f..f6fb988c015 100644
--- a/tests/unit/test-io-channel-tls.c
+++ b/tests/unit/test-io-channel-tls.c
@@ -34,8 +34,6 @@
 #include "authz/list.h"
 #include "qom/object_interfaces.h"
 
-#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
-
 #define WORKDIR "tests/test-io-channel-tls-work/"
 #define KEYFILE WORKDIR "key-ctx.pem"
 
@@ -334,13 +332,3 @@ int main(int argc, char **argv)
 
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-
-#else /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
-
-int
-main(void)
-{
-    return EXIT_SUCCESS;
-}
-
-#endif /* ! QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/unit/test-iov.c b/tests/unit/test-iov.c
index 9c415e2f1ff..5371066fb6a 100644
--- a/tests/unit/test-iov.c
+++ b/tests/unit/test-iov.c
@@ -158,7 +158,7 @@ static void test_io(void)
 
     int sv[2];
     int r;
-    unsigned i, j, k, s, t;
+    unsigned i, j, k, s;
     fd_set fds;
     unsigned niov;
     struct iovec *iov, *siov;
@@ -182,7 +182,6 @@ static void test_io(void)
 
     FD_ZERO(&fds);
 
-    t = 0;
     if (fork() == 0) {
        /* writer */
 
@@ -201,7 +200,6 @@ static void test_io(void)
                    g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0);
                    if (r >= 0) {
                        k += r;
-                       t += r;
                        usleep(g_test_rand_int_range(0, 30));
                    } else if (errno == EAGAIN) {
                        select(sv[1]+1, NULL, &fds, NULL, NULL);
@@ -238,7 +236,6 @@ static void test_io(void)
                    g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0);
                    if (r > 0) {
                        k += r;
-                       t += r;
                    } else if (!r) {
                        if (s) {
                            break;
diff --git a/tests/unit/test-keyval.c b/tests/unit/test-keyval.c
index e20c07cf3ea..af0581ae6c5 100644
--- a/tests/unit/test-keyval.c
+++ b/tests/unit/test-keyval.c
@@ -747,6 +747,61 @@ static void test_keyval_visit_any(void)
     visit_free(v);
 }
 
+static void test_keyval_merge_dict(void)
+{
+    QDict *first = keyval_parse("opt1=abc,opt2.sub1=def,opt2.sub2=ghi,opt3=xyz",
+                                NULL, NULL, &error_abort);
+    QDict *second = keyval_parse("opt1=ABC,opt2.sub2=GHI,opt2.sub3=JKL",
+                                 NULL, NULL, &error_abort);
+    QDict *combined = keyval_parse("opt1=ABC,opt2.sub1=def,opt2.sub2=GHI,opt2.sub3=JKL,opt3=xyz",
+                                   NULL, NULL, &error_abort);
+    Error *err = NULL;
+
+    keyval_merge(first, second, &err);
+    g_assert(!err);
+    g_assert(qobject_is_equal(QOBJECT(combined), QOBJECT(first)));
+    qobject_unref(first);
+    qobject_unref(second);
+    qobject_unref(combined);
+}
+
+static void test_keyval_merge_list(void)
+{
+    QDict *first = keyval_parse("opt1.0=abc,opt2.0=xyz",
+                                NULL, NULL, &error_abort);
+    QDict *second = keyval_parse("opt1.0=def",
+                                 NULL, NULL, &error_abort);
+    QDict *combined = keyval_parse("opt1.0=abc,opt1.1=def,opt2.0=xyz",
+                                   NULL, NULL, &error_abort);
+    Error *err = NULL;
+
+    keyval_merge(first, second, &err);
+    g_assert(!err);
+    g_assert(qobject_is_equal(QOBJECT(combined), QOBJECT(first)));
+    qobject_unref(first);
+    qobject_unref(second);
+    qobject_unref(combined);
+}
+
+static void test_keyval_merge_conflict(void)
+{
+    QDict *first = keyval_parse("opt2=ABC",
+                                NULL, NULL, &error_abort);
+    QDict *second = keyval_parse("opt2.sub1=def,opt2.sub2=ghi",
+                                 NULL, NULL, &error_abort);
+    QDict *third = qdict_clone_shallow(first);
+    Error *err = NULL;
+
+    keyval_merge(first, second, &err);
+    error_free_or_abort(&err);
+    keyval_merge(second, third, &err);
+    error_free_or_abort(&err);
+
+    qobject_unref(first);
+    qobject_unref(second);
+    qobject_unref(third);
+}
+
 int main(int argc, char *argv[])
 {
     g_test_init(&argc, &argv, NULL);
@@ -760,6 +815,9 @@ int main(int argc, char *argv[])
     g_test_add_func("/keyval/visit/optional", test_keyval_visit_optional);
     g_test_add_func("/keyval/visit/alternate", test_keyval_visit_alternate);
     g_test_add_func("/keyval/visit/any", test_keyval_visit_any);
+    g_test_add_func("/keyval/merge/dict", test_keyval_merge_dict);
+    g_test_add_func("/keyval/merge/list", test_keyval_merge_list);
+    g_test_add_func("/keyval/merge/conflict", test_keyval_merge_conflict);
     g_test_run();
     return 0;
 }
diff --git a/tests/unit/test-qemu-opts.c b/tests/unit/test-qemu-opts.c
index 6568e31a722..828d40e9283 100644
--- a/tests/unit/test-qemu-opts.c
+++ b/tests/unit/test-qemu-opts.c
@@ -410,40 +410,6 @@ static void test_qemu_opts_reset(void)
     g_assert(opts == NULL);
 }
 
-static void test_qemu_opts_set(void)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *opt;
-
-    list = qemu_find_opts("opts_list_04");
-    g_assert(list != NULL);
-    g_assert(QTAILQ_EMPTY(&list->head));
-    g_assert_cmpstr(list->name, ==, "opts_list_04");
-
-    /* should not find anything at this point */
-    opts = qemu_opts_find(list, NULL);
-    g_assert(opts == NULL);
-
-    /* implicitly create opts and set str3 value */
-    qemu_opts_set(list, "str3", "value", &error_abort);
-    g_assert(!QTAILQ_EMPTY(&list->head));
-
-    /* get the just created opts */
-    opts = qemu_opts_find(list, NULL);
-    g_assert(opts != NULL);
-
-    /* check the str3 value */
-    opt = qemu_opt_get(opts, "str3");
-    g_assert_cmpstr(opt, ==, "value");
-
-    qemu_opts_del(opts);
-
-    /* should not find anything at this point */
-    opts = qemu_opts_find(list, NULL);
-    g_assert(opts == NULL);
-}
-
 static int opts_count_iter(void *opaque, const char *name, const char *value,
                            Error **errp)
 {
@@ -1041,7 +1007,6 @@ int main(int argc, char *argv[])
     g_test_add_func("/qemu-opts/opt_get_size", test_qemu_opt_get_size);
     g_test_add_func("/qemu-opts/opt_unset", test_qemu_opt_unset);
     g_test_add_func("/qemu-opts/opts_reset", test_qemu_opts_reset);
-    g_test_add_func("/qemu-opts/opts_set", test_qemu_opts_set);
     g_test_add_func("/qemu-opts/opts_parse/general", test_opts_parse);
     g_test_add_func("/qemu-opts/opts_parse/bool", test_opts_parse_bool);
     g_test_add_func("/qemu-opts/opts_parse/number", test_opts_parse_number);
diff --git a/tests/unit/test-qgraph.c b/tests/unit/test-qgraph.c
index f819430e2cc..334c76c8e71 100644
--- a/tests/unit/test-qgraph.c
+++ b/tests/unit/test-qgraph.c
@@ -21,7 +21,7 @@
 #include "../qtest/libqos/qgraph_internal.h"
 
 #define MACHINE_PC "x86_64/pc"
-#define MACHINE_RASPI2 "arm/raspi2"
+#define MACHINE_RASPI2 "arm/raspi2b"
 #define I440FX "i440FX-pcihost"
 #define PCIBUS_PC "pcibus-pc"
 #define SDHCI "sdhci"
diff --git a/tests/unit/test-qmp-cmds.c b/tests/unit/test-qmp-cmds.c
index 1b0b7d99df2..faa858624a4 100644
--- a/tests/unit/test-qmp-cmds.c
+++ b/tests/unit/test-qmp-cmds.c
@@ -51,6 +51,7 @@ FeatureStruct1 *qmp_test_features0(bool has_fs0, FeatureStruct0 *fs0,
                                    bool has_cfs1, CondFeatureStruct1 *cfs1,
                                    bool has_cfs2, CondFeatureStruct2 *cfs2,
                                    bool has_cfs3, CondFeatureStruct3 *cfs3,
+                                   bool has_cfs4, CondFeatureStruct4 *cfs4,
                                    Error **errp)
 {
     return g_new0(FeatureStruct1, 1);
@@ -118,7 +119,7 @@ void qmp_boxed_struct(UserDefZero *arg, Error **errp)
 {
 }
 
-void qmp_boxed_union(UserDefListUnion *arg, Error **errp)
+void qmp_boxed_union(UserDefFlatUnion *arg, Error **errp)
 {
 }
 
@@ -126,22 +127,16 @@ void qmp_boxed_empty(Empty1 *arg, Error **errp)
 {
 }
 
-__org_qemu_x_Union1 *qmp___org_qemu_x_command(__org_qemu_x_EnumList *a,
-                                              __org_qemu_x_StructList *b,
-                                              __org_qemu_x_Union2 *c,
-                                              __org_qemu_x_Alt *d,
-                                              Error **errp)
+void qmp___org_qemu_x_command(__org_qemu_x_EnumList *a,
+                              __org_qemu_x_StructList *b,
+                              __org_qemu_x_Union *c,
+                              __org_qemu_x_Alt *d,
+                              Error **errp)
 {
-    __org_qemu_x_Union1 *ret = g_new0(__org_qemu_x_Union1, 1);
-
-    ret->type = ORG_QEMU_X_UNION1_KIND___ORG_QEMU_X_BRANCH;
-    ret->u.__org_qemu_x_branch.data = strdup("blah1");
-
     /* Also test that 'wchar-t' was munged to 'q_wchar_t' */
     if (b && b->value && !b->value->has_q_wchar_t) {
         b->value->q_wchar_t = 1;
     }
-    return ret;
 }
 
 
diff --git a/tests/unit/test-qobject-input-visitor.c b/tests/unit/test-qobject-input-visitor.c
index e41b91a2a6f..6f59a7f4324 100644
--- a/tests/unit/test-qobject-input-visitor.c
+++ b/tests/unit/test-qobject-input-visitor.c
@@ -464,6 +464,151 @@ static void test_visitor_in_list(TestInputVisitorData *data,
     g_assert(!head);
 }
 
+static void test_visitor_in_list_struct(TestInputVisitorData *data,
+                                        const void *unused)
+{
+    const char *int_member[] = {
+        "integer", "s8", "s16", "s32", "s64", "u8", "u16", "u32", "u64" };
+    g_autoptr(GString) json = g_string_new("");
+    int i, j;
+    const char *sep;
+    g_autoptr(ArrayStruct) arrs = NULL;
+    Visitor *v;
+    intList *int_list;
+    int8List *s8_list;
+    int16List *s16_list;
+    int32List *s32_list;
+    int64List *s64_list;
+    uint8List *u8_list;
+    uint16List *u16_list;
+    uint32List *u32_list;
+    uint64List *u64_list;
+    numberList *num_list;
+    boolList *bool_list;
+    strList *str_list;
+
+    g_string_append_printf(json, "{");
+
+    for (i = 0; i < G_N_ELEMENTS(int_member); i++) {
+        g_string_append_printf(json, "'%s': [", int_member[i]);
+        sep = "";
+        for (j = 0; j < 32; j++) {
+            g_string_append_printf(json, "%s%d", sep, j);
+            sep = ", ";
+        }
+        g_string_append_printf(json, "], ");
+    }
+
+    g_string_append_printf(json, "'number': [");
+    sep = "";
+    for (i = 0; i < 32; i++) {
+        g_string_append_printf(json, "%s%f", sep, (double)i / 3);
+        sep = ", ";
+    }
+    g_string_append_printf(json, "], ");
+
+    g_string_append_printf(json, "'boolean': [");
+    sep = "";
+    for (i = 0; i < 32; i++) {
+        g_string_append_printf(json, "%s%s",
+                               sep, i % 3 == 0 ? "true" : "false");
+        sep = ", ";
+    }
+    g_string_append_printf(json, "], ");
+
+    g_string_append_printf(json, "'string': [");
+    sep = "";
+    for (i = 0; i < 32; i++) {
+        g_string_append_printf(json, "%s'%d'", sep, i);
+        sep = ", ";
+    }
+    g_string_append_printf(json, "]");
+
+    g_string_append_printf(json, "}");
+
+    v = visitor_input_test_init_raw(data, json->str);
+    visit_type_ArrayStruct(v, NULL, &arrs, &error_abort);
+
+    i = 0;
+    for (int_list = arrs->integer; int_list; int_list = int_list->next) {
+        g_assert_cmpint(int_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (s8_list = arrs->s8; s8_list; s8_list = s8_list->next) {
+        g_assert_cmpint(s8_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (s16_list = arrs->s16; s16_list; s16_list = s16_list->next) {
+        g_assert_cmpint(s16_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (s32_list = arrs->s32; s32_list; s32_list = s32_list->next) {
+        g_assert_cmpint(s32_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (s64_list = arrs->s64; s64_list; s64_list = s64_list->next) {
+        g_assert_cmpint(s64_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (u8_list = arrs->u8; u8_list; u8_list = u8_list->next) {
+        g_assert_cmpint(u8_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (u16_list = arrs->u16; u16_list; u16_list = u16_list->next) {
+        g_assert_cmpint(u16_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (u32_list = arrs->u32; u32_list; u32_list = u32_list->next) {
+        g_assert_cmpint(u32_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (u64_list = arrs->u64; u64_list; u64_list = u64_list->next) {
+        g_assert_cmpint(u64_list->value, ==, i);
+        i++;
+    }
+
+    i = 0;
+    for (num_list = arrs->number; num_list; num_list = num_list->next) {
+        char expected[32], actual[32];
+
+        sprintf(expected, "%.6f", (double)i / 3);
+        sprintf(actual, "%.6f", num_list->value);
+        g_assert_cmpstr(expected, ==, actual);
+        i++;
+    }
+
+    i = 0;
+    for (bool_list = arrs->boolean; bool_list; bool_list = bool_list->next) {
+        g_assert_cmpint(bool_list->value, ==, i % 3 == 0);
+        i++;
+    }
+
+    i = 0;
+    for (str_list = arrs->string; str_list; str_list = str_list->next) {
+        char expected[32];
+
+        sprintf(expected, "%d", i);
+        g_assert_cmpstr(str_list->value, ==, expected);
+        i++;
+    }
+}
+
 static void test_visitor_in_any(TestInputVisitorData *data,
                                 const void *unused)
 {
@@ -682,276 +827,6 @@ static void test_visitor_in_alternate_number(TestInputVisitorData *data,
     qapi_free_AltEnumInt(asi);
 }
 
-static void test_list_union_integer_helper(TestInputVisitorData *data,
-                                           const void *unused,
-                                           UserDefListUnionKind kind)
-{
-    g_autoptr(UserDefListUnion) cvalue = NULL;
-    Visitor *v;
-    GString *gstr_list = g_string_new("");
-    GString *gstr_union = g_string_new("");
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        g_string_append_printf(gstr_list, "%d", i);
-        if (i != 31) {
-            g_string_append(gstr_list, ", ");
-        }
-    }
-    g_string_append_printf(gstr_union,  "{ 'type': '%s', 'data': [ %s ] }",
-                           UserDefListUnionKind_str(kind),
-                           gstr_list->str);
-    v = visitor_input_test_init_raw(data,  gstr_union->str);
-
-    visit_type_UserDefListUnion(v, NULL, &cvalue, &error_abort);
-    g_assert(cvalue != NULL);
-    g_assert_cmpint(cvalue->type, ==, kind);
-
-    switch (kind) {
-    case USER_DEF_LIST_UNION_KIND_INTEGER: {
-        intList *elem = NULL;
-        for (i = 0, elem = cvalue->u.integer.data;
-             elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_S8: {
-        int8List *elem = NULL;
-        for (i = 0, elem = cvalue->u.s8.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_S16: {
-        int16List *elem = NULL;
-        for (i = 0, elem = cvalue->u.s16.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_S32: {
-        int32List *elem = NULL;
-        for (i = 0, elem = cvalue->u.s32.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_S64: {
-        int64List *elem = NULL;
-        for (i = 0, elem = cvalue->u.s64.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_U8: {
-        uint8List *elem = NULL;
-        for (i = 0, elem = cvalue->u.u8.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_U16: {
-        uint16List *elem = NULL;
-        for (i = 0, elem = cvalue->u.u16.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_U32: {
-        uint32List *elem = NULL;
-        for (i = 0, elem = cvalue->u.u32.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_U64: {
-        uint64List *elem = NULL;
-        for (i = 0, elem = cvalue->u.u64.data; elem; elem = elem->next, i++) {
-            g_assert_cmpint(elem->value, ==, i);
-        }
-        break;
-    }
-    default:
-        g_assert_not_reached();
-    }
-
-    g_string_free(gstr_union, true);
-    g_string_free(gstr_list, true);
-}
-
-static void test_visitor_in_list_union_int(TestInputVisitorData *data,
-                                           const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_INTEGER);
-}
-
-static void test_visitor_in_list_union_int8(TestInputVisitorData *data,
-                                            const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_S8);
-}
-
-static void test_visitor_in_list_union_int16(TestInputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_S16);
-}
-
-static void test_visitor_in_list_union_int32(TestInputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_S32);
-}
-
-static void test_visitor_in_list_union_int64(TestInputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_S64);
-}
-
-static void test_visitor_in_list_union_uint8(TestInputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_U8);
-}
-
-static void test_visitor_in_list_union_uint16(TestInputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_U16);
-}
-
-static void test_visitor_in_list_union_uint32(TestInputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_U32);
-}
-
-static void test_visitor_in_list_union_uint64(TestInputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union_integer_helper(data, unused,
-                                   USER_DEF_LIST_UNION_KIND_U64);
-}
-
-static void test_visitor_in_list_union_bool(TestInputVisitorData *data,
-                                            const void *unused)
-{
-    g_autoptr(UserDefListUnion) cvalue = NULL;
-    boolList *elem = NULL;
-    Visitor *v;
-    GString *gstr_list = g_string_new("");
-    GString *gstr_union = g_string_new("");
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        g_string_append_printf(gstr_list, "%s",
-                               (i % 3 == 0) ? "true" : "false");
-        if (i != 31) {
-            g_string_append(gstr_list, ", ");
-        }
-    }
-    g_string_append_printf(gstr_union,  "{ 'type': 'boolean', 'data': [ %s ] }",
-                           gstr_list->str);
-    v = visitor_input_test_init_raw(data,  gstr_union->str);
-
-    visit_type_UserDefListUnion(v, NULL, &cvalue, &error_abort);
-    g_assert(cvalue != NULL);
-    g_assert_cmpint(cvalue->type, ==, USER_DEF_LIST_UNION_KIND_BOOLEAN);
-
-    for (i = 0, elem = cvalue->u.boolean.data; elem; elem = elem->next, i++) {
-        g_assert_cmpint(elem->value, ==, (i % 3 == 0) ? 1 : 0);
-    }
-
-    g_string_free(gstr_union, true);
-    g_string_free(gstr_list, true);
-}
-
-static void test_visitor_in_list_union_string(TestInputVisitorData *data,
-                                              const void *unused)
-{
-    g_autoptr(UserDefListUnion) cvalue = NULL;
-    strList *elem = NULL;
-    Visitor *v;
-    GString *gstr_list = g_string_new("");
-    GString *gstr_union = g_string_new("");
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        g_string_append_printf(gstr_list, "'%d'", i);
-        if (i != 31) {
-            g_string_append(gstr_list, ", ");
-        }
-    }
-    g_string_append_printf(gstr_union,  "{ 'type': 'string', 'data': [ %s ] }",
-                           gstr_list->str);
-    v = visitor_input_test_init_raw(data,  gstr_union->str);
-
-    visit_type_UserDefListUnion(v, NULL, &cvalue, &error_abort);
-    g_assert(cvalue != NULL);
-    g_assert_cmpint(cvalue->type, ==, USER_DEF_LIST_UNION_KIND_STRING);
-
-    for (i = 0, elem = cvalue->u.string.data; elem; elem = elem->next, i++) {
-        gchar str[8];
-        sprintf(str, "%d", i);
-        g_assert_cmpstr(elem->value, ==, str);
-    }
-
-    g_string_free(gstr_union, true);
-    g_string_free(gstr_list, true);
-}
-
-#define DOUBLE_STR_MAX 16
-
-static void test_visitor_in_list_union_number(TestInputVisitorData *data,
-                                              const void *unused)
-{
-    g_autoptr(UserDefListUnion) cvalue = NULL;
-    numberList *elem = NULL;
-    Visitor *v;
-    GString *gstr_list = g_string_new("");
-    GString *gstr_union = g_string_new("");
-    int i;
-
-    for (i = 0; i < 32; i++) {
-        g_string_append_printf(gstr_list, "%f", (double)i / 3);
-        if (i != 31) {
-            g_string_append(gstr_list, ", ");
-        }
-    }
-    g_string_append_printf(gstr_union,  "{ 'type': 'number', 'data': [ %s ] }",
-                           gstr_list->str);
-    v = visitor_input_test_init_raw(data,  gstr_union->str);
-
-    visit_type_UserDefListUnion(v, NULL, &cvalue, &error_abort);
-    g_assert(cvalue != NULL);
-    g_assert_cmpint(cvalue->type, ==, USER_DEF_LIST_UNION_KIND_NUMBER);
-
-    for (i = 0, elem = cvalue->u.number.data; elem; elem = elem->next, i++) {
-        GString *double_expected = g_string_new("");
-        GString *double_actual = g_string_new("");
-
-        g_string_printf(double_expected, "%.6f", (double)i / 3);
-        g_string_printf(double_actual, "%.6f", elem->value);
-        g_assert_cmpstr(double_expected->str, ==, double_actual->str);
-
-        g_string_free(double_expected, true);
-        g_string_free(double_actual, true);
-    }
-
-    g_string_free(gstr_union, true);
-    g_string_free(gstr_list, true);
-}
-
 static void input_visitor_test_add(const char *testpath,
                                    const void *user_data,
                                    void (*test_func)(TestInputVisitorData *data,
@@ -1184,21 +1059,6 @@ static void test_visitor_in_fail_list_nested(TestInputVisitorData *data,
     visit_end_list(v, NULL);
 }
 
-static void test_visitor_in_fail_union_list(TestInputVisitorData *data,
-                                            const void *unused)
-{
-    UserDefListUnion *tmp = NULL;
-    Error *err = NULL;
-    Visitor *v;
-
-    v = visitor_input_test_init(data,
-                                "{ 'type': 'integer', 'data' : [ 'string' ] }");
-
-    visit_type_UserDefListUnion(v, NULL, &tmp, &err);
-    error_free_or_abort(&err);
-    g_assert(!tmp);
-}
-
 static void test_visitor_in_fail_union_flat(TestInputVisitorData *data,
                                             const void *unused)
 {
@@ -1206,7 +1066,7 @@ static void test_visitor_in_fail_union_flat(TestInputVisitorData *data,
     Error *err = NULL;
     Visitor *v;
 
-    v = visitor_input_test_init(data, "{ 'string': 'c', 'integer': 41, 'boolean': true }");
+    v = visitor_input_test_init(data, "{ 'enum1': 'value2', 'string': 'c', 'integer': 41, 'boolean': true }");
 
     visit_type_UserDefFlatUnion(v, NULL, &tmp, &err);
     error_free_or_abort(&err);
@@ -1310,6 +1170,8 @@ int main(int argc, char **argv)
                            NULL, test_visitor_in_struct);
     input_visitor_test_add("/visitor/input/struct-nested",
                            NULL, test_visitor_in_struct_nested);
+    input_visitor_test_add("/visitor/input/list2",
+                           NULL, test_visitor_in_list_struct);
     input_visitor_test_add("/visitor/input/list",
                            NULL, test_visitor_in_list);
     input_visitor_test_add("/visitor/input/any",
@@ -1326,30 +1188,6 @@ int main(int argc, char **argv)
                            NULL, test_visitor_in_wrong_type);
     input_visitor_test_add("/visitor/input/alternate-number",
                            NULL, test_visitor_in_alternate_number);
-    input_visitor_test_add("/visitor/input/list_union/int",
-                           NULL, test_visitor_in_list_union_int);
-    input_visitor_test_add("/visitor/input/list_union/int8",
-                           NULL, test_visitor_in_list_union_int8);
-    input_visitor_test_add("/visitor/input/list_union/int16",
-                           NULL, test_visitor_in_list_union_int16);
-    input_visitor_test_add("/visitor/input/list_union/int32",
-                           NULL, test_visitor_in_list_union_int32);
-    input_visitor_test_add("/visitor/input/list_union/int64",
-                           NULL, test_visitor_in_list_union_int64);
-    input_visitor_test_add("/visitor/input/list_union/uint8",
-                           NULL, test_visitor_in_list_union_uint8);
-    input_visitor_test_add("/visitor/input/list_union/uint16",
-                           NULL, test_visitor_in_list_union_uint16);
-    input_visitor_test_add("/visitor/input/list_union/uint32",
-                           NULL, test_visitor_in_list_union_uint32);
-    input_visitor_test_add("/visitor/input/list_union/uint64",
-                           NULL, test_visitor_in_list_union_uint64);
-    input_visitor_test_add("/visitor/input/list_union/bool",
-                           NULL, test_visitor_in_list_union_bool);
-    input_visitor_test_add("/visitor/input/list_union/str",
-                           NULL, test_visitor_in_list_union_string);
-    input_visitor_test_add("/visitor/input/list_union/number",
-                           NULL, test_visitor_in_list_union_number);
     input_visitor_test_add("/visitor/input/fail/struct",
                            NULL, test_visitor_in_fail_struct);
     input_visitor_test_add("/visitor/input/fail/struct-nested",
@@ -1368,8 +1206,6 @@ int main(int argc, char **argv)
                            NULL, test_visitor_in_fail_union_flat_no_discrim);
     input_visitor_test_add("/visitor/input/fail/alternate",
                            NULL, test_visitor_in_fail_alternate);
-    input_visitor_test_add("/visitor/input/fail/union-list",
-                           NULL, test_visitor_in_fail_union_list);
     input_visitor_test_add("/visitor/input/qapi-introspect",
                            NULL, test_visitor_in_qmp_introspect);
 
diff --git a/tests/unit/test-qobject-output-visitor.c b/tests/unit/test-qobject-output-visitor.c
index 9dc1e075e7d..34d67a439ac 100644
--- a/tests/unit/test-qobject-output-visitor.c
+++ b/tests/unit/test-qobject-output-visitor.c
@@ -437,289 +437,118 @@ static void test_visitor_out_null(TestOutputVisitorData *data,
     g_assert(qobject_type(nil) == QTYPE_QNULL);
 }
 
-static void init_list_union(UserDefListUnion *cvalue)
+static void test_visitor_out_list_struct(TestOutputVisitorData *data,
+                                         const void *unused)
 {
-    int i;
-    switch (cvalue->type) {
-    case USER_DEF_LIST_UNION_KIND_INTEGER: {
-        intList **tail = &cvalue->u.integer.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
-    }
-    case USER_DEF_LIST_UNION_KIND_S8: {
-        int8List **tail = &cvalue->u.s8.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+    const char *int_member[] = {
+        "integer", "s8", "s16", "s32", "s64", "u8", "u16", "u32", "u64" };
+    g_autoptr(ArrayStruct) arrs = g_new0(ArrayStruct, 1);
+    int i, j;
+    QDict *qdict;
+    QList *qlist;
+    QListEntry *e;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->integer, i);
     }
-    case USER_DEF_LIST_UNION_KIND_S16: {
-        int16List **tail = &cvalue->u.s16.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->s8, i);
     }
-    case USER_DEF_LIST_UNION_KIND_S32: {
-        int32List **tail = &cvalue->u.s32.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->s16, i);
     }
-    case USER_DEF_LIST_UNION_KIND_S64: {
-        int64List **tail = &cvalue->u.s64.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->s32, i);
     }
-    case USER_DEF_LIST_UNION_KIND_U8: {
-        uint8List **tail = &cvalue->u.u8.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->s64, i);
     }
-    case USER_DEF_LIST_UNION_KIND_U16: {
-        uint16List **tail = &cvalue->u.u16.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->u8, i);
     }
-    case USER_DEF_LIST_UNION_KIND_U32: {
-        uint32List **tail = &cvalue->u.u32.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->u16, i);
     }
-    case USER_DEF_LIST_UNION_KIND_U64: {
-        uint64List **tail = &cvalue->u.u64.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, i);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->u32, i);
     }
-    case USER_DEF_LIST_UNION_KIND_BOOLEAN: {
-        boolList **tail = &cvalue->u.boolean.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, QEMU_IS_ALIGNED(i, 3));
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->u64, i);
     }
-    case USER_DEF_LIST_UNION_KIND_STRING: {
-        strList **tail = &cvalue->u.string.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, g_strdup_printf("%d", i));
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->number, (double)i / 3);
     }
-    case USER_DEF_LIST_UNION_KIND_NUMBER: {
-        numberList **tail = &cvalue->u.number.data;
-        for (i = 0; i < 32; i++) {
-            QAPI_LIST_APPEND(tail, (double)i / 3);
-        }
-        break;
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->boolean, QEMU_IS_ALIGNED(i, 3));
     }
-    default:
-        g_assert_not_reached();
+
+    for (i = 31; i >= 0; i--) {
+        QAPI_LIST_PREPEND(arrs->string, g_strdup_printf("%d", i));
     }
-}
 
-static void check_list_union(QObject *qobj,
-                             UserDefListUnionKind kind)
-{
-    QDict *qdict;
-    QList *qlist;
-    int i;
+    visit_type_ArrayStruct(data->ov, NULL, &arrs, &error_abort);
 
-    qdict = qobject_to(QDict, qobj);
+    qdict = qobject_to(QDict, visitor_get(data));
     g_assert(qdict);
-    g_assert(qdict_haskey(qdict, "data"));
-    qlist = qlist_copy(qobject_to(QList, qdict_get(qdict, "data")));
-
-    switch (kind) {
-    case USER_DEF_LIST_UNION_KIND_U8:
-    case USER_DEF_LIST_UNION_KIND_U16:
-    case USER_DEF_LIST_UNION_KIND_U32:
-    case USER_DEF_LIST_UNION_KIND_U64:
-        for (i = 0; i < 32; i++) {
-            QObject *tmp;
-            QNum *qvalue;
-            uint64_t val;
-
-            tmp = qlist_peek(qlist);
-            g_assert(tmp);
-            qvalue = qobject_to(QNum, tmp);
-            g_assert(qnum_get_try_uint(qvalue, &val));
-            g_assert_cmpint(val, ==, i);
-            qobject_unref(qlist_pop(qlist));
-        }
-        break;
-
-    case USER_DEF_LIST_UNION_KIND_S8:
-    case USER_DEF_LIST_UNION_KIND_S16:
-    case USER_DEF_LIST_UNION_KIND_S32:
-    case USER_DEF_LIST_UNION_KIND_S64:
-        /*
-         * All integer elements in JSON arrays get stored into QNums
-         * when we convert to QObjects, so we can check them all in
-         * the same fashion, so simply fall through here.
-         */
-    case USER_DEF_LIST_UNION_KIND_INTEGER:
-        for (i = 0; i < 32; i++) {
-            QObject *tmp;
-            QNum *qvalue;
-            int64_t val;
-
-            tmp = qlist_peek(qlist);
-            g_assert(tmp);
-            qvalue = qobject_to(QNum, tmp);
-            g_assert(qnum_get_try_int(qvalue, &val));
-            g_assert_cmpint(val, ==, i);
-            qobject_unref(qlist_pop(qlist));
-        }
-        break;
-    case USER_DEF_LIST_UNION_KIND_BOOLEAN:
-        for (i = 0; i < 32; i++) {
-            QObject *tmp;
-            QBool *qvalue;
-            tmp = qlist_peek(qlist);
-            g_assert(tmp);
-            qvalue = qobject_to(QBool, tmp);
-            g_assert_cmpint(qbool_get_bool(qvalue), ==, i % 3 == 0);
-            qobject_unref(qlist_pop(qlist));
-        }
-        break;
-    case USER_DEF_LIST_UNION_KIND_STRING:
-        for (i = 0; i < 32; i++) {
-            QObject *tmp;
-            QString *qvalue;
-            gchar str[8];
-            tmp = qlist_peek(qlist);
-            g_assert(tmp);
-            qvalue = qobject_to(QString, tmp);
-            sprintf(str, "%d", i);
-            g_assert_cmpstr(qstring_get_str(qvalue), ==, str);
-            qobject_unref(qlist_pop(qlist));
-        }
-        break;
-    case USER_DEF_LIST_UNION_KIND_NUMBER:
-        for (i = 0; i < 32; i++) {
-            QObject *tmp;
-            QNum *qvalue;
-            GString *double_expected = g_string_new("");
-            GString *double_actual = g_string_new("");
-
-            tmp = qlist_peek(qlist);
-            g_assert(tmp);
-            qvalue = qobject_to(QNum, tmp);
-            g_string_printf(double_expected, "%.6f", (double)i / 3);
-            g_string_printf(double_actual, "%.6f", qnum_get_double(qvalue));
-            g_assert_cmpstr(double_actual->str, ==, double_expected->str);
-
-            qobject_unref(qlist_pop(qlist));
-            g_string_free(double_expected, true);
-            g_string_free(double_actual, true);
+
+    for (i = 0; i < G_N_ELEMENTS(int_member); i++) {
+        qlist = qdict_get_qlist(qdict, int_member[i]);
+        g_assert(qlist);
+        j = 0;
+        QLIST_FOREACH_ENTRY(qlist, e) {
+            QNum *qvalue = qobject_to(QNum, qlist_entry_obj(e));
+            g_assert(qvalue);
+            g_assert_cmpint(qnum_get_int(qvalue), ==, j);
+            j++;
         }
-        break;
-    default:
-        g_assert_not_reached();
     }
-    qobject_unref(qlist);
-}
-
-static void test_list_union(TestOutputVisitorData *data,
-                            const void *unused,
-                            UserDefListUnionKind kind)
-{
-    UserDefListUnion *cvalue = g_new0(UserDefListUnion, 1);
-    QObject *obj;
-
-    cvalue->type = kind;
-    init_list_union(cvalue);
-
-    visit_type_UserDefListUnion(data->ov, NULL, &cvalue, &error_abort);
-
-    obj = visitor_get(data);
-    check_list_union(obj, cvalue->type);
-    qapi_free_UserDefListUnion(cvalue);
-}
-
-static void test_visitor_out_list_union_int(TestOutputVisitorData *data,
-                                            const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_INTEGER);
-}
-
-static void test_visitor_out_list_union_int8(TestOutputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_S8);
-}
-
-static void test_visitor_out_list_union_int16(TestOutputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_S16);
-}
-
-static void test_visitor_out_list_union_int32(TestOutputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_S32);
-}
-
-static void test_visitor_out_list_union_int64(TestOutputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_S64);
-}
-
-static void test_visitor_out_list_union_uint8(TestOutputVisitorData *data,
-                                              const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_U8);
-}
-
-static void test_visitor_out_list_union_uint16(TestOutputVisitorData *data,
-                                               const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_U16);
-}
 
-static void test_visitor_out_list_union_uint32(TestOutputVisitorData *data,
-                                               const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_U32);
-}
-
-static void test_visitor_out_list_union_uint64(TestOutputVisitorData *data,
-                                               const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_U64);
-}
+    qlist = qdict_get_qlist(qdict, "number");
+    g_assert(qlist);
+    i = 0;
+    QLIST_FOREACH_ENTRY(qlist, e) {
+        QNum *qvalue = qobject_to(QNum, qlist_entry_obj(e));
+        char expected[32], actual[32];
+
+        g_assert(qvalue);
+        sprintf(expected, "%.6f", (double)i / 3);
+        sprintf(actual, "%.6f", qnum_get_double(qvalue));
+        g_assert_cmpstr(actual, ==, expected);
+        i++;
+    }
 
-static void test_visitor_out_list_union_bool(TestOutputVisitorData *data,
-                                             const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_BOOLEAN);
-}
+    qlist = qdict_get_qlist(qdict, "boolean");
+    g_assert(qlist);
+    i = 0;
+    QLIST_FOREACH_ENTRY(qlist, e) {
+        QBool *qvalue = qobject_to(QBool, qlist_entry_obj(e));
+        g_assert(qvalue);
+        g_assert_cmpint(qbool_get_bool(qvalue), ==, i % 3 == 0);
+        i++;
+    }
 
-static void test_visitor_out_list_union_str(TestOutputVisitorData *data,
-                                            const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_STRING);
-}
+    qlist = qdict_get_qlist(qdict, "string");
+    g_assert(qlist);
+    i = 0;
+    QLIST_FOREACH_ENTRY(qlist, e) {
+        QString *qvalue = qobject_to(QString, qlist_entry_obj(e));
+        char expected[32];
 
-static void test_visitor_out_list_union_number(TestOutputVisitorData *data,
-                                               const void *unused)
-{
-    test_list_union(data, unused, USER_DEF_LIST_UNION_KIND_NUMBER);
+        g_assert(qvalue);
+        sprintf(expected, "%d", i);
+        g_assert_cmpstr(qstring_get_str(qvalue), ==, expected);
+        i++;
+    }
 }
 
 static void output_visitor_test_add(const char *testpath,
@@ -764,42 +593,8 @@ int main(int argc, char **argv)
                             &out_visitor_data, test_visitor_out_alternate);
     output_visitor_test_add("/visitor/output/null",
                             &out_visitor_data, test_visitor_out_null);
-    output_visitor_test_add("/visitor/output/list_union/int",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_int);
-    output_visitor_test_add("/visitor/output/list_union/int8",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_int8);
-    output_visitor_test_add("/visitor/output/list_union/int16",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_int16);
-    output_visitor_test_add("/visitor/output/list_union/int32",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_int32);
-    output_visitor_test_add("/visitor/output/list_union/int64",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_int64);
-    output_visitor_test_add("/visitor/output/list_union/uint8",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_uint8);
-    output_visitor_test_add("/visitor/output/list_union/uint16",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_uint16);
-    output_visitor_test_add("/visitor/output/list_union/uint32",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_uint32);
-    output_visitor_test_add("/visitor/output/list_union/uint64",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_uint64);
-    output_visitor_test_add("/visitor/output/list_union/bool",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_bool);
-    output_visitor_test_add("/visitor/output/list_union/string",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_str);
-    output_visitor_test_add("/visitor/output/list_union/number",
-                            &out_visitor_data,
-                            test_visitor_out_list_union_number);
+    output_visitor_test_add("/visitor/output/list_struct",
+                            &out_visitor_data, test_visitor_out_list_struct);
 
     g_test_run();
 
diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c
index 49641e19366..64b81ae0581 100644
--- a/tests/unit/test-rcu-list.c
+++ b/tests/unit/test-rcu-list.c
@@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg)
 
     rcu_register_thread();
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     qatomic_inc(&nthreadsrunning);
     while (qatomic_read(&goflag) == GOFLAG_INIT) {
         g_usleep(1000);
@@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg)
     long long n_removed_local = 0;
     struct list_element *el, *prev_el;
 
-    *(struct rcu_reader_data **)arg = &rcu_reader;
+    *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
     qatomic_inc(&nthreadsrunning);
     while (qatomic_read(&goflag) == GOFLAG_INIT) {
         g_usleep(1000);
diff --git a/tests/unit/test-replication.c b/tests/unit/test-replication.c
index b067240addd..afff908d77a 100644
--- a/tests/unit/test-replication.c
+++ b/tests/unit/test-replication.c
@@ -14,7 +14,7 @@
 #include "qapi/qmp/qdict.h"
 #include "qemu/option.h"
 #include "qemu/main-loop.h"
-#include "replication.h"
+#include "block/replication.h"
 #include "block/block_int.h"
 #include "block/qdict.h"
 #include "sysemu/block-backend.h"
diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
new file mode 100644
index 00000000000..b02450e25a3
--- /dev/null
+++ b/tests/unit/test-smp-parse.c
@@ -0,0 +1,608 @@
+/*
+ * SMP parsing unit-tests
+ *
+ * Copyright (c) 2021 Huawei Technologies Co., Ltd
+ *
+ * Authors:
+ *  Yanan Wang <wangyanan55@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+
+#include "hw/boards.h"
+
+#define T true
+#define F false
+
+#define MIN_CPUS 1   /* set the min CPUs supported by the machine as 1 */
+#define MAX_CPUS 512 /* set the max CPUs supported by the machine as 512 */
+
+#define SMP_MACHINE_NAME "TEST-SMP"
+
+/*
+ * Used to define the generic 3-level CPU topology hierarchy
+ *  -sockets/cores/threads
+ */
+#define SMP_CONFIG_GENERIC(ha, a, hb, b, hc, c, hd, d, he, e) \
+        {                                                     \
+            .has_cpus    = ha, .cpus    = a,                  \
+            .has_sockets = hb, .sockets = b,                  \
+            .has_cores   = hc, .cores   = c,                  \
+            .has_threads = hd, .threads = d,                  \
+            .has_maxcpus = he, .maxcpus = e,                  \
+        }
+
+#define CPU_TOPOLOGY_GENERIC(a, b, c, d, e)                   \
+        {                                                     \
+            .cpus     = a,                                    \
+            .sockets  = b,                                    \
+            .cores    = c,                                    \
+            .threads  = d,                                    \
+            .max_cpus = e,                                    \
+        }
+
+/*
+ * Currently a 4-level topology hierarchy is supported on PC machines
+ *  -sockets/dies/cores/threads
+ */
+#define SMP_CONFIG_WITH_DIES(ha, a, hb, b, hc, c, hd, d, he, e, hf, f) \
+        {                                                     \
+            .has_cpus    = ha, .cpus    = a,                  \
+            .has_sockets = hb, .sockets = b,                  \
+            .has_dies    = hc, .dies    = c,                  \
+            .has_cores   = hd, .cores   = d,                  \
+            .has_threads = he, .threads = e,                  \
+            .has_maxcpus = hf, .maxcpus = f,                  \
+        }
+
+/**
+ * @config - the given SMP configuration
+ * @expect_prefer_sockets - the expected parsing result for the
+ * valid configuration, when sockets are preferred over cores
+ * @expect_prefer_cores - the expected parsing result for the
+ * valid configuration, when cores are preferred over sockets
+ * @expect_error - the expected error report when the given
+ * configuration is invalid
+ */
+typedef struct SMPTestData {
+    SMPConfiguration config;
+    CpuTopology expect_prefer_sockets;
+    CpuTopology expect_prefer_cores;
+    const char *expect_error;
+} SMPTestData;
+
+/*
+ * List all the possible valid sub-collections of the generic 5
+ * topology parameters (i.e. cpus/maxcpus/sockets/cores/threads),
+ * then test the automatic calculation algorithm of the missing
+ * values in the parser.
+ */
+static struct SMPTestData data_generic_valid[] = {
+    {
+        /* config: no configuration provided
+         * expect: cpus=1,sockets=1,cores=1,threads=1,maxcpus=1 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, F, 0, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(1, 1, 1, 1, 1),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(1, 1, 1, 1, 1),
+    }, {
+        /* config: -smp 8
+         * prefer_sockets: cpus=8,sockets=8,cores=1,threads=1,maxcpus=8
+         * prefer_cores: cpus=8,sockets=1,cores=8,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, F, 0, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 8, 1, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 8, 1, 8),
+    }, {
+        /* config: -smp sockets=2
+         * expect: cpus=2,sockets=2,cores=1,threads=1,maxcpus=2 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, F, 0, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(2, 2, 1, 1, 2),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(2, 2, 1, 1, 2),
+    }, {
+        /* config: -smp cores=4
+         * expect: cpus=4,sockets=1,cores=4,threads=1,maxcpus=4 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, T, 4, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(4, 1, 4, 1, 4),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(4, 1, 4, 1, 4),
+    }, {
+        /* config: -smp threads=2
+         * expect: cpus=2,sockets=1,cores=1,threads=2,maxcpus=2 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, F, 0, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(2, 1, 1, 2, 2),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(2, 1, 1, 2, 2),
+    }, {
+        /* config: -smp maxcpus=16
+         * prefer_sockets: cpus=16,sockets=16,cores=1,threads=1,maxcpus=16
+         * prefer_cores: cpus=16,sockets=1,cores=16,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, F, 0, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 16, 1, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 1, 16, 1, 16),
+    }, {
+        /* config: -smp 8,sockets=2
+         * expect: cpus=8,sockets=2,cores=4,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, F, 0, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+    }, {
+        /* config: -smp 8,cores=4
+         * expect: cpus=8,sockets=2,cores=4,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, T, 4, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+    }, {
+        /* config: -smp 8,threads=2
+         * prefer_sockets: cpus=8,sockets=4,cores=1,threads=2,maxcpus=8
+         * prefer_cores: cpus=8,sockets=1,cores=4,threads=2,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, F, 0, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 4, 1, 2, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 4, 2, 8),
+    }, {
+        /* config: -smp 8,maxcpus=16
+         * prefer_sockets: cpus=8,sockets=16,cores=1,threads=1,maxcpus=16
+         * prefer_cores: cpus=8,sockets=1,cores=16,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, F, 0, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 16, 1, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 16, 1, 16),
+    }, {
+        /* config: -smp sockets=2,cores=4
+         * expect: cpus=8,sockets=2,cores=4,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, T, 4, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+    }, {
+        /* config: -smp sockets=2,threads=2
+         * expect: cpus=4,sockets=2,cores=1,threads=2,maxcpus=4 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, F, 0, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(4, 2, 1, 2, 4),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(4, 2, 1, 2, 4),
+    }, {
+        /* config: -smp sockets=2,maxcpus=16
+         * expect: cpus=16,sockets=2,cores=8,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, F, 0, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 8, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 8, 1, 16),
+    }, {
+        /* config: -smp cores=4,threads=2
+         * expect: cpus=8,sockets=1,cores=4,threads=2,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, T, 4, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 1, 4, 2, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 4, 2, 8),
+    }, {
+        /* config: -smp cores=4,maxcpus=16
+         * expect: cpus=16,sockets=4,cores=4,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, T, 4, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 4, 4, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 4, 4, 1, 16),
+    }, {
+        /* config: -smp threads=2,maxcpus=16
+         * prefer_sockets: cpus=16,sockets=8,cores=1,threads=2,maxcpus=16
+         * prefer_cores: cpus=16,sockets=1,cores=8,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, F, 0, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 8, 1, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 1, 8, 2, 16),
+    }, {
+        /* config: -smp 8,sockets=2,cores=4
+         * expect: cpus=8,sockets=2,cores=4,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, F, 0, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+    }, {
+        /* config: -smp 8,sockets=2,threads=2
+         * expect: cpus=8,sockets=2,cores=2,threads=2,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, F, 0, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 2, 2, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 2, 2, 8),
+    }, {
+        /* config: -smp 8,sockets=2,maxcpus=16
+         * expect: cpus=8,sockets=2,cores=8,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, F, 0, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 8, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 8, 1, 16),
+    }, {
+        /* config: -smp 8,cores=4,threads=2
+         * expect: cpus=8,sockets=1,cores=4,threads=2,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, T, 4, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 1, 4, 2, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 4, 2, 8),
+    }, {
+        /* config: -smp 8,cores=4,maxcpus=16
+         * expect: cpus=8,sockets=4,cores=4,threads=1,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, T, 4, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 4, 4, 1, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 4, 4, 1, 16),
+    }, {
+        /* config: -smp 8,threads=2,maxcpus=16
+         * prefer_sockets: cpus=8,sockets=8,cores=1,threads=2,maxcpus=16
+         * prefer_cores: cpus=8,sockets=1,cores=8,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, F, 0, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 8, 1, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 1, 8, 2, 16),
+    }, {
+        /* config: -smp sockets=2,cores=4,threads=2
+         * expect: cpus=16,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, T, 4, T, 2, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+    }, {
+        /* config: -smp sockets=2,cores=4,maxcpus=16
+         * expect: cpus=16,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, T, 4, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+    }, {
+        /* config: -smp sockets=2,threads=2,maxcpus=16
+         * expect: cpus=16,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, F, 0, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+    }, {
+        /* config: -smp cores=4,threads=2,maxcpus=16
+         * expect: cpus=16,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, F, 0, T, 4, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+    }, {
+        /* config: -smp 8,sockets=2,cores=4,threads=1
+         * expect: cpus=8,sockets=2,cores=4,threads=1,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, T, 1, F, 0),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 1, 8),
+    }, {
+        /* config: -smp 8,sockets=2,cores=4,maxcpus=16
+         * expect: cpus=8,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, F, 0, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+    }, {
+        /* config: -smp 8,sockets=2,threads=2,maxcpus=16
+         * expect: cpus=8,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, F, 0, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+    }, {
+        /* config: -smp 8,cores=4,threads=2,maxcpus=16
+         * expect: cpus=8,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, F, 0, T, 4, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+    }, {
+        /* config: -smp sockets=2,cores=4,threads=2,maxcpus=16
+         * expect: cpus=16,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(F, 0, T, 2, T, 4, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(16, 2, 4, 2, 16),
+    }, {
+        /* config: -smp 8,sockets=2,cores=4,threads=2,maxcpus=16
+         * expect: cpus=8,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, T, 2, T, 16),
+        .expect_prefer_sockets = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+        .expect_prefer_cores   = CPU_TOPOLOGY_GENERIC(8, 2, 4, 2, 16),
+    },
+};
+
+static struct SMPTestData data_generic_invalid[] = {
+    {
+        /* config: -smp 2,dies=2 */
+        .config = SMP_CONFIG_WITH_DIES(T, 2, F, 0, T, 2, F, 0, F, 0, F, 0),
+        .expect_error = "dies not supported by this machine's CPU topology",
+    }, {
+        /* config: -smp 8,sockets=2,cores=4,threads=2,maxcpus=8 */
+        .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, T, 2, T, 8),
+        .expect_error = "Invalid CPU topology: "
+                        "product of the hierarchy must match maxcpus: "
+                        "sockets (2) * cores (4) * threads (2) "
+                        "!= maxcpus (8)",
+    }, {
+        /* config: -smp 18,sockets=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_GENERIC(T, 18, T, 2, T, 4, T, 2, T, 16),
+        .expect_error = "Invalid CPU topology: "
+                        "maxcpus must be equal to or greater than smp: "
+                        "sockets (2) * cores (4) * threads (2) "
+                        "== maxcpus (16) < smp_cpus (18)",
+    }, {
+        /* config: -smp 1
+         * should tweak the supported min CPUs to 2 for testing */
+        .config = SMP_CONFIG_GENERIC(T, 1, F, 0, F, 0, F, 0, F, 0),
+        .expect_error = "Invalid SMP CPUs 1. The min CPUs supported "
+                        "by machine '" SMP_MACHINE_NAME "' is 2",
+    }, {
+        /* config: -smp 512
+         * should tweak the supported max CPUs to 511 for testing */
+        .config = SMP_CONFIG_GENERIC(T, 512, F, 0, F, 0, F, 0, F, 0),
+        .expect_error = "Invalid SMP CPUs 512. The max CPUs supported "
+                        "by machine '" SMP_MACHINE_NAME "' is 511",
+    },
+};
+
+static struct SMPTestData data_with_dies_invalid[] = {
+    {
+        /* config: -smp 16,sockets=2,dies=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_WITH_DIES(T, 16, T, 2, T, 2, T, 4, T, 2, T, 16),
+        .expect_error = "Invalid CPU topology: "
+                        "product of the hierarchy must match maxcpus: "
+                        "sockets (2) * dies (2) * cores (4) * threads (2) "
+                        "!= maxcpus (16)",
+    }, {
+        /* config: -smp 34,sockets=2,dies=2,cores=4,threads=2,maxcpus=32 */
+        .config = SMP_CONFIG_WITH_DIES(T, 34, T, 2, T, 2, T, 4, T, 2, T, 32),
+        .expect_error = "Invalid CPU topology: "
+                        "maxcpus must be equal to or greater than smp: "
+                        "sockets (2) * dies (2) * cores (4) * threads (2) "
+                        "== maxcpus (32) < smp_cpus (34)",
+    },
+};
+
+static char *smp_config_to_string(SMPConfiguration *config)
+{
+    return g_strdup_printf(
+        "(SMPConfiguration) {\n"
+        "    .has_cpus    = %5s, cpus    = %" PRId64 ",\n"
+        "    .has_sockets = %5s, sockets = %" PRId64 ",\n"
+        "    .has_dies    = %5s, dies    = %" PRId64 ",\n"
+        "    .has_cores   = %5s, cores   = %" PRId64 ",\n"
+        "    .has_threads = %5s, threads = %" PRId64 ",\n"
+        "    .has_maxcpus = %5s, maxcpus = %" PRId64 ",\n"
+        "}",
+        config->has_cpus ? "true" : "false", config->cpus,
+        config->has_sockets ? "true" : "false", config->sockets,
+        config->has_dies ? "true" : "false", config->dies,
+        config->has_cores ? "true" : "false", config->cores,
+        config->has_threads ? "true" : "false", config->threads,
+        config->has_maxcpus ? "true" : "false", config->maxcpus);
+}
+
+static char *cpu_topology_to_string(CpuTopology *topo)
+{
+    return g_strdup_printf(
+        "(CpuTopology) {\n"
+        "    .cpus     = %u,\n"
+        "    .sockets  = %u,\n"
+        "    .dies     = %u,\n"
+        "    .cores    = %u,\n"
+        "    .threads  = %u,\n"
+        "    .max_cpus = %u,\n"
+        "}",
+        topo->cpus, topo->sockets, topo->dies,
+        topo->cores, topo->threads, topo->max_cpus);
+}
+
+static void check_parse(MachineState *ms, SMPConfiguration *config,
+                        CpuTopology *expect_topo, const char *expect_err,
+                        bool is_valid)
+{
+    g_autofree char *config_str = smp_config_to_string(config);
+    g_autofree char *expect_topo_str = cpu_topology_to_string(expect_topo);
+    g_autofree char *output_topo_str = NULL;
+    Error *err = NULL;
+
+    /* call the generic parser smp_parse() */
+    smp_parse(ms, config, &err);
+
+    output_topo_str = cpu_topology_to_string(&ms->smp);
+
+    /* when the configuration is supposed to be valid */
+    if (is_valid) {
+        if ((err == NULL) &&
+            (ms->smp.cpus == expect_topo->cpus) &&
+            (ms->smp.sockets == expect_topo->sockets) &&
+            (ms->smp.dies == expect_topo->dies) &&
+            (ms->smp.cores == expect_topo->cores) &&
+            (ms->smp.threads == expect_topo->threads) &&
+            (ms->smp.max_cpus == expect_topo->max_cpus)) {
+            return;
+        }
+
+        if (err != NULL) {
+            g_printerr("Test smp_parse failed!\n"
+                       "Input configuration: %s\n"
+                       "Should be valid: yes\n"
+                       "Expected topology: %s\n\n"
+                       "Result is valid: no\n"
+                       "Output error report: %s\n",
+                       config_str, expect_topo_str, error_get_pretty(err));
+            goto end;
+        }
+
+        g_printerr("Test smp_parse failed!\n"
+                   "Input configuration: %s\n"
+                   "Should be valid: yes\n"
+                   "Expected topology: %s\n\n"
+                   "Result is valid: yes\n"
+                   "Output topology: %s\n",
+                   config_str, expect_topo_str, output_topo_str);
+        goto end;
+    }
+
+    /* when the configuration is supposed to be invalid */
+    if (err != NULL) {
+        if (expect_err == NULL ||
+            g_str_equal(expect_err, error_get_pretty(err))) {
+            error_free(err);
+            return;
+        }
+
+        g_printerr("Test smp_parse failed!\n"
+                   "Input configuration: %s\n"
+                   "Should be valid: no\n"
+                   "Expected error report: %s\n\n"
+                   "Result is valid: no\n"
+                   "Output error report: %s\n",
+                   config_str, expect_err, error_get_pretty(err));
+        goto end;
+    }
+
+    g_printerr("Test smp_parse failed!\n"
+               "Input configuration: %s\n"
+               "Should be valid: no\n"
+               "Expected error report: %s\n\n"
+               "Result is valid: yes\n"
+               "Output topology: %s\n",
+               config_str, expect_err, output_topo_str);
+
+end:
+    if (err != NULL) {
+        error_free(err);
+    }
+
+    abort();
+}
+
+static void smp_parse_test(MachineState *ms, SMPTestData *data, bool is_valid)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    mc->smp_props.prefer_sockets = true;
+    check_parse(ms, &data->config, &data->expect_prefer_sockets,
+                data->expect_error, is_valid);
+
+    mc->smp_props.prefer_sockets = false;
+    check_parse(ms, &data->config, &data->expect_prefer_cores,
+                data->expect_error, is_valid);
+}
+
+/* The parsed results of the unsupported parameters should be 1 */
+static void unsupported_params_init(MachineClass *mc, SMPTestData *data)
+{
+    if (!mc->smp_props.dies_supported) {
+        data->expect_prefer_sockets.dies = 1;
+        data->expect_prefer_cores.dies = 1;
+    }
+}
+
+static void machine_base_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->min_cpus = MIN_CPUS;
+    mc->max_cpus = MAX_CPUS;
+
+    mc->smp_props.prefer_sockets = true;
+    mc->smp_props.dies_supported = false;
+
+    mc->name = g_strdup(SMP_MACHINE_NAME);
+}
+
+static void test_generic(void)
+{
+    Object *obj = object_new(TYPE_MACHINE);
+    MachineState *ms = MACHINE(obj);
+    MachineClass *mc = MACHINE_GET_CLASS(obj);
+    SMPTestData *data = &(SMPTestData){{ }};
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
+        *data = data_generic_valid[i];
+        unsupported_params_init(mc, data);
+
+        smp_parse_test(ms, data, true);
+
+        /* Unsupported parameters can be provided with their values as 1 */
+        data->config.has_dies = true;
+        data->config.dies = 1;
+        smp_parse_test(ms, data, true);
+    }
+
+    /* Force invalid min CPUs and max CPUs */
+    mc->min_cpus = 2;
+    mc->max_cpus = 511;
+
+    for (i = 0; i < ARRAY_SIZE(data_generic_invalid); i++) {
+        *data = data_generic_invalid[i];
+        unsupported_params_init(mc, data);
+
+        smp_parse_test(ms, data, false);
+    }
+
+    /* Reset the supported min CPUs and max CPUs */
+    mc->min_cpus = MIN_CPUS;
+    mc->max_cpus = MAX_CPUS;
+
+    object_unref(obj);
+}
+
+static void test_with_dies(void)
+{
+    Object *obj = object_new(TYPE_MACHINE);
+    MachineState *ms = MACHINE(obj);
+    MachineClass *mc = MACHINE_GET_CLASS(obj);
+    SMPTestData *data = &(SMPTestData){{ }};
+    unsigned int num_dies = 2;
+    int i;
+
+    /* Force the SMP compat properties */
+    mc->smp_props.dies_supported = true;
+
+    for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
+        *data = data_generic_valid[i];
+        unsupported_params_init(mc, data);
+
+        /* when dies parameter is omitted, it will be set as 1 */
+        data->expect_prefer_sockets.dies = 1;
+        data->expect_prefer_cores.dies = 1;
+
+        smp_parse_test(ms, data, true);
+
+        /* when dies parameter is specified */
+        data->config.has_dies = true;
+        data->config.dies = num_dies;
+        if (data->config.has_cpus) {
+            data->config.cpus *= num_dies;
+        }
+        if (data->config.has_maxcpus) {
+            data->config.maxcpus *= num_dies;
+        }
+
+        data->expect_prefer_sockets.dies = num_dies;
+        data->expect_prefer_sockets.cpus *= num_dies;
+        data->expect_prefer_sockets.max_cpus *= num_dies;
+        data->expect_prefer_cores.dies = num_dies;
+        data->expect_prefer_cores.cpus *= num_dies;
+        data->expect_prefer_cores.max_cpus *= num_dies;
+
+        smp_parse_test(ms, data, true);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(data_with_dies_invalid); i++) {
+        *data = data_with_dies_invalid[i];
+        unsupported_params_init(mc, data);
+
+        smp_parse_test(ms, data, false);
+    }
+
+    /* Restore the SMP compat properties */
+    mc->smp_props.dies_supported = false;
+
+    object_unref(obj);
+}
+
+/* Type info of the tested machine */
+static const TypeInfo smp_machine_types[] = {
+    {
+        .name           = TYPE_MACHINE,
+        .parent         = TYPE_OBJECT,
+        .class_init     = machine_base_class_init,
+        .class_size     = sizeof(MachineClass),
+        .instance_size  = sizeof(MachineState),
+    }
+};
+
+DEFINE_TYPES(smp_machine_types)
+
+int main(int argc, char *argv[])
+{
+    module_call_init(MODULE_INIT_QOM);
+
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/test-smp-parse/generic", test_generic);
+    g_test_add_func("/test-smp-parse/with_dies", test_with_dies);
+
+    g_test_run();
+
+    return 0;
+}
diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c
index a001879585e..4688c03ea72 100644
--- a/tests/unit/test-vmstate.c
+++ b/tests/unit/test-vmstate.c
@@ -40,10 +40,12 @@ static int temp_fd;
 /* Duplicate temp_fd and seek to the beginning of the file */
 static QEMUFile *open_test_file(bool write)
 {
-    int fd = dup(temp_fd);
+    int fd;
     QIOChannel *ioc;
     QEMUFile *f;
 
+    fd = dup(temp_fd);
+    g_assert(fd >= 0);
     lseek(fd, 0, SEEK_SET);
     if (write) {
         g_assert_cmpint(ftruncate(fd, 0), ==, 0);
@@ -1486,6 +1488,7 @@ int main(int argc, char **argv)
     g_autofree char *temp_file = g_strdup_printf("%s/vmst.test.XXXXXX",
                                                  g_get_tmp_dir());
     temp_fd = mkstemp(temp_file);
+    g_assert(temp_fd >= 0);
 
     module_call_init(MODULE_INIT_QOM);
 
diff --git a/tests/unit/test-write-threshold.c b/tests/unit/test-write-threshold.c
index fc1c45a2eb9..0158e4637a8 100644
--- a/tests/unit/test-write-threshold.c
+++ b/tests/unit/test-write-threshold.c
@@ -7,117 +7,41 @@
  */
 
 #include "qemu/osdep.h"
-#include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/write-threshold.h"
 
 
-static void test_threshold_not_set_on_init(void)
-{
-    uint64_t res;
-    BlockDriverState bs;
-    memset(&bs, 0, sizeof(bs));
-
-    g_assert(!bdrv_write_threshold_is_set(&bs));
-
-    res = bdrv_write_threshold_get(&bs);
-    g_assert_cmpint(res, ==, 0);
-}
-
-static void test_threshold_set_get(void)
-{
-    uint64_t threshold = 4 * 1024 * 1024;
-    uint64_t res;
-    BlockDriverState bs;
-    memset(&bs, 0, sizeof(bs));
-
-    bdrv_write_threshold_set(&bs, threshold);
-
-    g_assert(bdrv_write_threshold_is_set(&bs));
-
-    res = bdrv_write_threshold_get(&bs);
-    g_assert_cmpint(res, ==, threshold);
-}
-
-static void test_threshold_multi_set_get(void)
-{
-    uint64_t threshold1 = 4 * 1024 * 1024;
-    uint64_t threshold2 = 15 * 1024 * 1024;
-    uint64_t res;
-    BlockDriverState bs;
-    memset(&bs, 0, sizeof(bs));
-
-    bdrv_write_threshold_set(&bs, threshold1);
-    bdrv_write_threshold_set(&bs, threshold2);
-    res = bdrv_write_threshold_get(&bs);
-    g_assert_cmpint(res, ==, threshold2);
-}
-
 static void test_threshold_not_trigger(void)
 {
-    uint64_t amount = 0;
     uint64_t threshold = 4 * 1024 * 1024;
     BlockDriverState bs;
-    BdrvTrackedRequest req;
 
     memset(&bs, 0, sizeof(bs));
-    memset(&req, 0, sizeof(req));
-    req.offset = 1024;
-    req.bytes = 1024;
-
-    bdrv_check_request(req.offset, req.bytes, &error_abort);
 
     bdrv_write_threshold_set(&bs, threshold);
-    amount = bdrv_write_threshold_exceeded(&bs, &req);
-    g_assert_cmpuint(amount, ==, 0);
+    bdrv_write_threshold_check_write(&bs, 1024, 1024);
+    g_assert_cmpuint(bdrv_write_threshold_get(&bs), ==, threshold);
 }
 
 
 static void test_threshold_trigger(void)
 {
-    uint64_t amount = 0;
     uint64_t threshold = 4 * 1024 * 1024;
     BlockDriverState bs;
-    BdrvTrackedRequest req;
 
     memset(&bs, 0, sizeof(bs));
-    memset(&req, 0, sizeof(req));
-    req.offset = (4 * 1024 * 1024) - 1024;
-    req.bytes = 2 * 1024;
-
-    bdrv_check_request(req.offset, req.bytes, &error_abort);
 
     bdrv_write_threshold_set(&bs, threshold);
-    amount = bdrv_write_threshold_exceeded(&bs, &req);
-    g_assert_cmpuint(amount, >=, 1024);
+    bdrv_write_threshold_check_write(&bs, threshold - 1024, 2 * 1024);
+    g_assert_cmpuint(bdrv_write_threshold_get(&bs), ==, 0);
 }
 
-typedef struct TestStruct {
-    const char *name;
-    void (*func)(void);
-} TestStruct;
-
 
 int main(int argc, char **argv)
 {
-    size_t i;
-    TestStruct tests[] = {
-        { "/write-threshold/not-set-on-init",
-          test_threshold_not_set_on_init },
-        { "/write-threshold/set-get",
-          test_threshold_set_get },
-        { "/write-threshold/multi-set-get",
-          test_threshold_multi_set_get },
-        { "/write-threshold/not-trigger",
-          test_threshold_not_trigger },
-        { "/write-threshold/trigger",
-          test_threshold_trigger },
-        { NULL, NULL }
-    };
-
     g_test_init(&argc, &argv, NULL);
-    for (i = 0; tests[i].name != NULL; i++) {
-        g_test_add_func(tests[i].name, tests[i].func);
-    }
+    g_test_add_func("/write-threshold/not-trigger", test_threshold_not_trigger);
+    g_test_add_func("/write-threshold/trigger", test_threshold_trigger);
+
     return g_test_run();
 }
diff --git a/tests/unit/test-yank.c b/tests/unit/test-yank.c
index 2383d2908c0..e6c036a64d9 100644
--- a/tests/unit/test-yank.c
+++ b/tests/unit/test-yank.c
@@ -88,7 +88,7 @@ static void char_change_test(gconstpointer opaque)
             .type = CHARDEV_BACKEND_KIND_SOCKET,
             .u.socket.data = &(ChardevSocket) {
                 .addr = &(SocketAddressLegacy) {
-                    .type = SOCKET_ADDRESS_LEGACY_KIND_INET,
+                    .type = SOCKET_ADDRESS_TYPE_INET,
                     .u.inet.data = &addr->u.inet
                 },
                 .has_server = true,
@@ -102,7 +102,7 @@ static void char_change_test(gconstpointer opaque)
             .type = CHARDEV_BACKEND_KIND_UDP,
             .u.udp.data = &(ChardevUdp) {
                 .remote = &(SocketAddressLegacy) {
-                    .type = SOCKET_ADDRESS_LEGACY_KIND_UNIX,
+                    .type = SOCKET_ADDRESS_TYPE_UNIX,
                     .u.q_unix.data = &(UnixSocketAddress) {
                         .path = (char *)""
                     }
@@ -114,7 +114,7 @@ static void char_change_test(gconstpointer opaque)
             .type = CHARDEV_BACKEND_KIND_SOCKET,
             .u.socket.data = &(ChardevSocket) {
                 .addr = &(SocketAddressLegacy) {
-                    .type = SOCKET_ADDRESS_LEGACY_KIND_INET,
+                    .type = SOCKET_ADDRESS_TYPE_INET,
                     .u.inet.data = &(InetSocketAddress) {
                         .host = (char *)"127.0.0.1",
                         .port = (char *)"0"
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 24815920b2b..35088dd67f7 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -540,6 +540,11 @@ vubr_new(const char *path, bool client)
     CallbackFunc cb;
     size_t len;
 
+    if (strlen(path) >= sizeof(un.sun_path)) {
+        fprintf(stderr, "unix domain socket path '%s' is too long\n", path);
+        exit(1);
+    }
+
     /* Get a UNIX socket. */
     dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
     if (dev->sock == -1) {
@@ -826,7 +831,7 @@ main(int argc, char *argv[])
 out:
     fprintf(stderr, "Usage: %s ", argv[0]);
     fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n");
-    fprintf(stderr, "\t-u path to unix doman socket. default: %s\n",
+    fprintf(stderr, "\t-u path to unix domain socket. default: %s\n",
             DEFAULT_UD_SOCKET);
     fprintf(stderr, "\t-l local host and port. default: %s:%s\n",
             DEFAULT_LHOST, DEFAULT_LPORT);
diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index e94d95ec541..ae91f5043e5 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -2,14 +2,22 @@
 
 .PHONY: vm-build-all vm-clean-all
 
+HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
+
 EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
-IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
+X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
 ifneq ($(GENISOIMAGE),)
-IMAGES += ubuntu.i386 centos
+X86_IMAGES += ubuntu.i386 centos
 ifneq ($(EFI_AARCH64),)
-IMAGES += ubuntu.aarch64 centos.aarch64
+ARM64_IMAGES += ubuntu.aarch64 centos.aarch64
+endif
 endif
+
+ifeq ($(HOST_ARCH),x86_64)
+IMAGES=$(X86_IMAGES) $(if $(USE_TCG),$(ARM64_IMAGES))
+else ifeq ($(HOST_ARCH),aarch64)
+IMAGES=$(ARM64_IMAGES) $(if $(USE_TCG),$(X86_IMAGES))
 endif
 
 IMAGES_DIR := $(HOME)/.cache/qemu-vm/images
@@ -43,7 +51,7 @@ else
 endif
 	@echo "  vm-build-haiku.x86_64           - Build QEMU in Haiku VM"
 	@echo ""
-	@echo "  vm-build-all                    - Build QEMU in all VMs"
+	@echo "  vm-build-all                    - Build QEMU in: $(IMAGES)"
 	@echo "  vm-clean-all                    - Clean up VM images"
 	@echo
 	@echo "For trouble-shooting:"
@@ -52,21 +60,22 @@ endif
 	@echo
 	@echo "Special variables:"
 	@echo "    BUILD_TARGET=foo		 - Override the build target"
-	@echo "    TARGET_LIST=a,b,c    	 - Override target list in builds"
-	@echo '    EXTRA_CONFIGURE_OPTS="..."'
-	@echo "    J=[0..9]*            	 - Override the -jN parameter for make commands"
 	@echo "    DEBUG=1              	 - Enable verbose output on host and interactive debugging"
+	@echo '    EXTRA_CONFIGURE_OPTS="..."   - Pass to configure step'
+	@echo "    J=[0..9]*            	 - Override the -jN parameter for make commands"
 	@echo "    LOG_CONSOLE=1        	 - Log console to file in: ~/.cache/qemu-vm "
-	@echo "    V=1				 - Enable verbose ouput on host and guest commands"
-	@echo "    QEMU_LOCAL=1                 - Use QEMU binary local to this build."
+	@echo "    USE_TCG=1        	         - Use TCG for cross-arch images"
 	@echo "    QEMU=/path/to/qemu		 - Change path to QEMU binary"
-	@echo "    QEMU_IMG=/path/to/qemu-img	 - Change path to qemu-img tool"
 ifeq ($(HAVE_PYTHON_YAML),yes)
 	@echo "    QEMU_CONFIG=/path/conf.yml   - Change path to VM configuration .yml file."
 else
 	@echo "    (install python3-yaml to enable support for yaml file to configure a VM.)"
 endif
 	@echo "                                   See conf_example_*.yml for file format details."
+	@echo "    QEMU_IMG=/path/to/qemu-img	 - Change path to qemu-img tool"
+	@echo "    QEMU_LOCAL=1                 - Use QEMU binary local to this build."
+	@echo "    TARGET_LIST=a,b,c    	 - Override target list in builds"
+	@echo "    V=1				 - Enable verbose ouput on host and guest commands"
 
 vm-build-all: $(addprefix vm-build-, $(IMAGES))
 
@@ -84,6 +93,7 @@ $(IMAGES_DIR)/%.img:	$(SRC_PATH)/tests/vm/% \
 		$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
 		$(if $(EFI_AARCH64),--efi-aarch64 $(EFI_AARCH64)) \
 		$(if $(LOG_CONSOLE),--log-console) \
+		--source-path $(SRC_PATH) \
 		--image "$@" \
 		--force \
 		--build-image $@, \
diff --git a/tests/vm/aarch64vm.py b/tests/vm/aarch64vm.py
index d70ab843b6b..b00cce07eb8 100644
--- a/tests/vm/aarch64vm.py
+++ b/tests/vm/aarch64vm.py
@@ -14,7 +14,7 @@
 import sys
 import subprocess
 import basevm
-from qemu.accel import kvm_available
+from qemu.utils import kvm_available
 
 # This is the config needed for current version of QEMU.
 # This works for both kvm and tcg.
diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index 00f1d5ca8da..254e11c932b 100644
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -19,8 +19,8 @@
 import time
 import datetime
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.accel import kvm_available
 from qemu.machine import QEMUMachine
+from qemu.utils import get_info_usernet_hostfwd_port, kvm_available
 import subprocess
 import hashlib
 import argparse
@@ -96,6 +96,7 @@ def __init__(self, args, config=None):
         self._genisoimage = args.genisoimage
         self._build_path = args.build_path
         self._efi_aarch64 = args.efi_aarch64
+        self._source_path = args.source_path
         # Allow input config to override defaults.
         self._config = DEFAULT_CONFIG.copy()
         if config != None:
@@ -227,7 +228,7 @@ def _ssh_do(self, user, cmd, check):
                    "-o", "UserKnownHostsFile=" + os.devnull,
                    "-o",
                    "ConnectTimeout={}".format(self._config["ssh_timeout"]),
-                   "-p", self.ssh_port, "-i", self._ssh_tmp_key_file]
+                   "-p", str(self.ssh_port), "-i", self._ssh_tmp_key_file]
         # If not in debug mode, set ssh to quiet mode to
         # avoid printing the results of commands.
         if not self.debug:
@@ -305,12 +306,8 @@ def boot(self, img, extra_args=[]):
         # Init console so we can start consuming the chars.
         self.console_init()
         usernet_info = guest.qmp("human-monitor-command",
-                                 command_line="info usernet")
-        self.ssh_port = None
-        for l in usernet_info["return"].splitlines():
-            fields = l.split()
-            if "TCP[HOST_FORWARD]" in fields and "22" in fields:
-                self.ssh_port = l.split()[3]
+                                 command_line="info usernet").get("return")
+        self.ssh_port = get_info_usernet_hostfwd_port(usernet_info)
         if not self.ssh_port:
             raise Exception("Cannot find ssh port from 'info usernet':\n%s" % \
                             usernet_info)
@@ -595,6 +592,9 @@ def get_default_jobs():
     parser.add_argument("--build-path", default=None,
                         help="Path of build directory, "\
                         "for using build tree QEMU binary. ")
+    parser.add_argument("--source-path", default=None,
+                        help="Path of source directory, "\
+                        "for finding additional files. ")
     parser.add_argument("--interactive", "-I", action="store_true",
                         help="Interactively run command")
     parser.add_argument("--snapshot", "-s", action="store_true",
diff --git a/tests/vm/centos b/tests/vm/centos
index efe3dbbb360..5c7bc1c1a9a 100755
--- a/tests/vm/centos
+++ b/tests/vm/centos
@@ -26,24 +26,23 @@ class CentosVM(basevm.BaseVM):
         export SRC_ARCHIVE=/dev/vdb;
         sudo chmod a+r $SRC_ARCHIVE;
         tar -xf $SRC_ARCHIVE;
-        make docker-test-block@centos7 {verbose} J={jobs} NETWORK=1;
-        make docker-test-quick@centos7 {verbose} J={jobs} NETWORK=1;
+        make docker-test-block@centos8 {verbose} J={jobs} NETWORK=1;
+        make docker-test-quick@centos8 {verbose} J={jobs} NETWORK=1;
         make docker-test-mingw@fedora  {verbose} J={jobs} NETWORK=1;
     """
 
     def build_image(self, img):
-        cimg = self._download_with_cache("https://cloud.centos.org/centos/7/images/CentOS-7-x86_64-GenericCloud-1802.qcow2.xz")
+        cimg = self._download_with_cache("https://cloud.centos.org/centos/8/x86_64/images/CentOS-8-GenericCloud-8.3.2011-20201204.2.x86_64.qcow2")
         img_tmp = img + ".tmp"
-        sys.stderr.write("Extracting the image...\n")
-        subprocess.check_call(["ln", "-f", cimg, img_tmp + ".xz"])
-        subprocess.check_call(["xz", "--keep", "-dvf", img_tmp + ".xz"])
+        subprocess.check_call(["ln", "-f", cimg, img_tmp])
         self.exec_qemu_img("resize", img_tmp, "50G")
         self.boot(img_tmp, extra_args = ["-cdrom", self.gen_cloud_init_iso()])
         self.wait_ssh()
         self.ssh_root_check("touch /etc/cloud/cloud-init.disabled")
-        self.ssh_root_check("yum update -y")
-        self.ssh_root_check("yum install -y docker make ninja-build git python3")
-        self.ssh_root_check("systemctl enable docker")
+        self.ssh_root_check("dnf update -y")
+        self.ssh_root_check("dnf install -y dnf-plugins-core")
+        self.ssh_root_check("dnf config-manager --set-enabled powertools")
+        self.ssh_root_check("dnf install -y podman make ninja-build git python3")
         self.ssh_root("poweroff")
         self.wait()
         os.rename(img_tmp, img)
diff --git a/tests/vm/centos.aarch64 b/tests/vm/centos.aarch64
index e687b93e521..81c3004c3c5 100755
--- a/tests/vm/centos.aarch64
+++ b/tests/vm/centos.aarch64
@@ -64,7 +64,7 @@ class CentosAarch64VM(basevm.BaseVM):
     def create_kickstart(self):
         """Generate the kickstart file used to generate the centos image."""
         # Start with the template for the kickstart.
-        ks_file = "../tests/vm/centos-8-aarch64.ks"
+        ks_file = self._source_path + "/tests/vm/centos-8-aarch64.ks"
         subprocess.check_call("cp {} ./ks.cfg".format(ks_file), shell=True)
         # Append the ssh keys to the kickstart file
         # as the post processing phase of installation.
diff --git a/tests/vm/netbsd b/tests/vm/netbsd
index b9efc269d26..4cc58df130f 100755
--- a/tests/vm/netbsd
+++ b/tests/vm/netbsd
@@ -22,8 +22,8 @@ class NetBSDVM(basevm.BaseVM):
     name = "netbsd"
     arch = "x86_64"
 
-    link = "https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.1/images/NetBSD-9.1-amd64.iso"
-    csum = "65bddc95945991c3b2021f9c8ded7f34c25f0a7611b7aa15a15fe23399e902307e926ae97fcd01dc1662ac67b5f6e4be643c6a2b581692ddcb616d30125066f9"
+    link = "https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.2/images/NetBSD-9.2-amd64.iso"
+    csum = "5ee0ea101f73386b9b424f5d1041e371db3c42fdd6f4e4518dc79c4a08f31d43091ebe93425c9f0dcaaed2b51131836fe6774f33f89030b58d64709b35fda72f"
     size = "20G"
     pkgs = [
         # tools
diff --git a/tests/vm/openbsd b/tests/vm/openbsd
index 4d1399378e6..337fe7c3033 100755
--- a/tests/vm/openbsd
+++ b/tests/vm/openbsd
@@ -22,8 +22,8 @@ class OpenBSDVM(basevm.BaseVM):
     name = "openbsd"
     arch = "x86_64"
 
-    link = "https://cdn.openbsd.org/pub/OpenBSD/6.8/amd64/install68.iso"
-    csum = "47e291fcc2d0c1a8ae0b66329f040b33af755b6adbd21739e20bb5ad56f62b6c"
+    link = "https://cdn.openbsd.org/pub/OpenBSD/7.0/amd64/install70.iso"
+    csum = "1882f9a23c9800e5dba3dbd2cf0126f552605c915433ef4c5bb672610a4ca3a4"
     size = "20G"
     pkgs = [
         # tools
@@ -95,10 +95,9 @@ class OpenBSDVM(basevm.BaseVM):
         self.console_wait_send("Terminal type",           "xterm\n")
         self.console_wait_send("System hostname",         "openbsd\n")
         self.console_wait_send("Which network interface", "vio0\n")
-        self.console_wait_send("IPv4 address",            "dhcp\n")
+        self.console_wait_send("IPv4 address",            "autoconf\n")
         self.console_wait_send("IPv6 address",            "none\n")
         self.console_wait_send("Which network interface", "done\n")
-        self.console_wait_send("DNS domain name",         "localnet\n")
         self.console_wait("Password for root account")
         self.console_send("%s\n" % self._config["root_pass"])
         self.console_wait("Password for root account")
diff --git a/thunk.c b/thunk.c
index fc5be1a502e..dac4bf11c65 100644
--- a/thunk.c
+++ b/thunk.c
@@ -17,6 +17,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 
 #include "qemu.h"
 #include "exec/user/thunk.h"
diff --git a/tools/ebpf/Makefile.ebpf b/tools/ebpf/Makefile.ebpf
new file mode 100755
index 00000000000..8f327ae3b82
--- /dev/null
+++ b/tools/ebpf/Makefile.ebpf
@@ -0,0 +1,21 @@
+OBJS = rss.bpf.o
+
+LLC ?= llc
+CLANG ?= clang
+INC_FLAGS = `$(CLANG) -print-file-name=include`
+EXTRA_CFLAGS ?= -O2 -emit-llvm -fno-stack-protector
+
+all: $(OBJS)
+
+.PHONY: clean
+
+clean:
+	rm -f $(OBJS)
+
+$(OBJS):  %.o:%.c
+	$(CLANG) $(INC_FLAGS) \
+                -D__KERNEL__ -D__ASM_SYSREG_H \
+                -I../include $(LINUXINCLUDE) \
+                $(EXTRA_CFLAGS) -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+	bpftool gen skeleton rss.bpf.o > rss.bpf.skeleton.h
+	cp rss.bpf.skeleton.h ../../ebpf/
diff --git a/tools/ebpf/rss.bpf.c b/tools/ebpf/rss.bpf.c
new file mode 100644
index 00000000000..e85ec55f9b5
--- /dev/null
+++ b/tools/ebpf/rss.bpf.c
@@ -0,0 +1,571 @@
+/*
+ * eBPF RSS program
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko <andrew@daynix.com>
+ *  Yuri Benditovich <yuri.benditovich@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Prepare:
+ * Requires llvm, clang, bpftool, linux kernel tree
+ *
+ * Build rss.bpf.skeleton.h:
+ * make -f Makefile.ebpf clean all
+ */
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <linux/virtio_net.h>
+
+#define INDIRECTION_TABLE_SIZE 128
+#define HASH_CALCULATION_BUFFER_SIZE 36
+
+struct rss_config_t {
+    __u8 redirect;
+    __u8 populate_hash;
+    __u32 hash_types;
+    __u16 indirections_len;
+    __u16 default_queue;
+} __attribute__((packed));
+
+struct toeplitz_key_data_t {
+    __u32 leftmost_32_bits;
+    __u8 next_byte[HASH_CALCULATION_BUFFER_SIZE];
+};
+
+struct packet_hash_info_t {
+    __u8 is_ipv4;
+    __u8 is_ipv6;
+    __u8 is_udp;
+    __u8 is_tcp;
+    __u8 is_ipv6_ext_src;
+    __u8 is_ipv6_ext_dst;
+    __u8 is_fragmented;
+
+    __u16 src_port;
+    __u16 dst_port;
+
+    union {
+        struct {
+            __be32 in_src;
+            __be32 in_dst;
+        };
+
+        struct {
+            struct in6_addr in6_src;
+            struct in6_addr in6_dst;
+            struct in6_addr in6_ext_src;
+            struct in6_addr in6_ext_dst;
+        };
+    };
+};
+
+struct bpf_map_def SEC("maps")
+tap_rss_map_configurations = {
+        .type        = BPF_MAP_TYPE_ARRAY,
+        .key_size    = sizeof(__u32),
+        .value_size  = sizeof(struct rss_config_t),
+        .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps")
+tap_rss_map_toeplitz_key = {
+        .type        = BPF_MAP_TYPE_ARRAY,
+        .key_size    = sizeof(__u32),
+        .value_size  = sizeof(struct toeplitz_key_data_t),
+        .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps")
+tap_rss_map_indirection_table = {
+        .type        = BPF_MAP_TYPE_ARRAY,
+        .key_size    = sizeof(__u32),
+        .value_size  = sizeof(__u16),
+        .max_entries = INDIRECTION_TABLE_SIZE,
+};
+
+static inline void net_rx_rss_add_chunk(__u8 *rss_input, size_t *bytes_written,
+                                        const void *ptr, size_t size) {
+    __builtin_memcpy(&rss_input[*bytes_written], ptr, size);
+    *bytes_written += size;
+}
+
+static inline
+void net_toeplitz_add(__u32 *result,
+                      __u8 *input,
+                      __u32 len
+        , struct toeplitz_key_data_t *key) {
+
+    __u32 accumulator = *result;
+    __u32 leftmost_32_bits = key->leftmost_32_bits;
+    __u32 byte;
+
+    for (byte = 0; byte < HASH_CALCULATION_BUFFER_SIZE; byte++) {
+        __u8 input_byte = input[byte];
+        __u8 key_byte = key->next_byte[byte];
+        __u8 bit;
+
+        for (bit = 0; bit < 8; bit++) {
+            if (input_byte & (1 << 7)) {
+                accumulator ^= leftmost_32_bits;
+            }
+
+            leftmost_32_bits =
+                    (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7);
+
+            input_byte <<= 1;
+            key_byte <<= 1;
+        }
+    }
+
+    *result = accumulator;
+}
+
+
+static inline int ip6_extension_header_type(__u8 hdr_type)
+{
+    switch (hdr_type) {
+    case IPPROTO_HOPOPTS:
+    case IPPROTO_ROUTING:
+    case IPPROTO_FRAGMENT:
+    case IPPROTO_ICMPV6:
+    case IPPROTO_NONE:
+    case IPPROTO_DSTOPTS:
+    case IPPROTO_MH:
+        return 1;
+    default:
+        return 0;
+    }
+}
+/*
+ * According to
+ * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
+ * we expect that there are would be no more than 11 extensions in IPv6 header,
+ * also there is 27 TLV options for Destination and Hop-by-hop extensions.
+ * Need to choose reasonable amount of maximum extensions/options we may
+ * check to find ext src/dst.
+ */
+#define IP6_EXTENSIONS_COUNT 11
+#define IP6_OPTIONS_COUNT 30
+
+static inline int parse_ipv6_ext(struct __sk_buff *skb,
+        struct packet_hash_info_t *info,
+        __u8 *l4_protocol, size_t *l4_offset)
+{
+    int err = 0;
+
+    if (!ip6_extension_header_type(*l4_protocol)) {
+        return 0;
+    }
+
+    struct ipv6_opt_hdr ext_hdr = {};
+
+    for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) {
+
+        err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_hdr,
+                                    sizeof(ext_hdr), BPF_HDR_START_NET);
+        if (err) {
+            goto error;
+        }
+
+        if (*l4_protocol == IPPROTO_ROUTING) {
+            struct ipv6_rt_hdr ext_rt = {};
+
+            err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_rt,
+                                        sizeof(ext_rt), BPF_HDR_START_NET);
+            if (err) {
+                goto error;
+            }
+
+            if ((ext_rt.type == IPV6_SRCRT_TYPE_2) &&
+                    (ext_rt.hdrlen == sizeof(struct in6_addr) / 8) &&
+                    (ext_rt.segments_left == 1)) {
+
+                err = bpf_skb_load_bytes_relative(skb,
+                    *l4_offset + offsetof(struct rt2_hdr, addr),
+                    &info->in6_ext_dst, sizeof(info->in6_ext_dst),
+                    BPF_HDR_START_NET);
+                if (err) {
+                    goto error;
+                }
+
+                info->is_ipv6_ext_dst = 1;
+            }
+
+        } else if (*l4_protocol == IPPROTO_DSTOPTS) {
+            struct ipv6_opt_t {
+                __u8 type;
+                __u8 length;
+            } __attribute__((packed)) opt = {};
+
+            size_t opt_offset = sizeof(ext_hdr);
+
+            for (unsigned int j = 0; j < IP6_OPTIONS_COUNT; ++j) {
+                err = bpf_skb_load_bytes_relative(skb, *l4_offset + opt_offset,
+                                        &opt, sizeof(opt), BPF_HDR_START_NET);
+                if (err) {
+                    goto error;
+                }
+
+                if (opt.type == IPV6_TLV_HAO) {
+                    err = bpf_skb_load_bytes_relative(skb,
+                        *l4_offset + opt_offset
+                        + offsetof(struct ipv6_destopt_hao, addr),
+                        &info->in6_ext_src, sizeof(info->in6_ext_src),
+                        BPF_HDR_START_NET);
+                    if (err) {
+                        goto error;
+                    }
+
+                    info->is_ipv6_ext_src = 1;
+                    break;
+                }
+
+                opt_offset += (opt.type == IPV6_TLV_PAD1) ?
+                              1 : opt.length + sizeof(opt);
+
+                if (opt_offset + 1 >= ext_hdr.hdrlen * 8) {
+                    break;
+                }
+            }
+        } else if (*l4_protocol == IPPROTO_FRAGMENT) {
+            info->is_fragmented = true;
+        }
+
+        *l4_protocol = ext_hdr.nexthdr;
+        *l4_offset += (ext_hdr.hdrlen + 1) * 8;
+
+        if (!ip6_extension_header_type(ext_hdr.nexthdr)) {
+            return 0;
+        }
+    }
+
+    return 0;
+error:
+    return err;
+}
+
+static __be16 parse_eth_type(struct __sk_buff *skb)
+{
+    unsigned int offset = 12;
+    __be16 ret = 0;
+    int err = 0;
+
+    err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
+                                BPF_HDR_START_MAC);
+    if (err) {
+        return 0;
+    }
+
+    switch (bpf_ntohs(ret)) {
+    case ETH_P_8021AD:
+        offset += 4;
+    case ETH_P_8021Q:
+        offset += 4;
+        err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
+                                    BPF_HDR_START_MAC);
+    default:
+        break;
+    }
+
+    if (err) {
+        return 0;
+    }
+
+    return ret;
+}
+
+static inline int parse_packet(struct __sk_buff *skb,
+        struct packet_hash_info_t *info)
+{
+    int err = 0;
+
+    if (!info || !skb) {
+        return -1;
+    }
+
+    size_t l4_offset = 0;
+    __u8 l4_protocol = 0;
+    __u16 l3_protocol = bpf_ntohs(parse_eth_type(skb));
+    if (l3_protocol == 0) {
+        err = -1;
+        goto error;
+    }
+
+    if (l3_protocol == ETH_P_IP) {
+        info->is_ipv4 = 1;
+
+        struct iphdr ip = {};
+        err = bpf_skb_load_bytes_relative(skb, 0, &ip, sizeof(ip),
+                                    BPF_HDR_START_NET);
+        if (err) {
+            goto error;
+        }
+
+        info->in_src = ip.saddr;
+        info->in_dst = ip.daddr;
+        info->is_fragmented = !!ip.frag_off;
+
+        l4_protocol = ip.protocol;
+        l4_offset = ip.ihl * 4;
+    } else if (l3_protocol == ETH_P_IPV6) {
+        info->is_ipv6 = 1;
+
+        struct ipv6hdr ip6 = {};
+        err = bpf_skb_load_bytes_relative(skb, 0, &ip6, sizeof(ip6),
+                                    BPF_HDR_START_NET);
+        if (err) {
+            goto error;
+        }
+
+        info->in6_src = ip6.saddr;
+        info->in6_dst = ip6.daddr;
+
+        l4_protocol = ip6.nexthdr;
+        l4_offset = sizeof(ip6);
+
+        err = parse_ipv6_ext(skb, info, &l4_protocol, &l4_offset);
+        if (err) {
+            goto error;
+        }
+    }
+
+    if (l4_protocol != 0 && !info->is_fragmented) {
+        if (l4_protocol == IPPROTO_TCP) {
+            info->is_tcp = 1;
+
+            struct tcphdr tcp = {};
+            err = bpf_skb_load_bytes_relative(skb, l4_offset, &tcp, sizeof(tcp),
+                                        BPF_HDR_START_NET);
+            if (err) {
+                goto error;
+            }
+
+            info->src_port = tcp.source;
+            info->dst_port = tcp.dest;
+        } else if (l4_protocol == IPPROTO_UDP) { /* TODO: add udplite? */
+            info->is_udp = 1;
+
+            struct udphdr udp = {};
+            err = bpf_skb_load_bytes_relative(skb, l4_offset, &udp, sizeof(udp),
+                                        BPF_HDR_START_NET);
+            if (err) {
+                goto error;
+            }
+
+            info->src_port = udp.source;
+            info->dst_port = udp.dest;
+        }
+    }
+
+    return 0;
+
+error:
+    return err;
+}
+
+static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
+        struct rss_config_t *config, struct toeplitz_key_data_t *toe)
+{
+    __u8 rss_input[HASH_CALCULATION_BUFFER_SIZE] = {};
+    size_t bytes_written = 0;
+    __u32 result = 0;
+    int err = 0;
+    struct packet_hash_info_t packet_info = {};
+
+    err = parse_packet(skb, &packet_info);
+    if (err) {
+        return 0;
+    }
+
+    if (packet_info.is_ipv4) {
+        if (packet_info.is_tcp &&
+            config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
+
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_src,
+                                 sizeof(packet_info.in_src));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_dst,
+                                 sizeof(packet_info.in_dst));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.src_port,
+                                 sizeof(packet_info.src_port));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.dst_port,
+                                 sizeof(packet_info.dst_port));
+        } else if (packet_info.is_udp &&
+                   config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
+
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_src,
+                                 sizeof(packet_info.in_src));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_dst,
+                                 sizeof(packet_info.in_dst));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.src_port,
+                                 sizeof(packet_info.src_port));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.dst_port,
+                                 sizeof(packet_info.dst_port));
+        } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_src,
+                                 sizeof(packet_info.in_src));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.in_dst,
+                                 sizeof(packet_info.in_dst));
+        }
+    } else if (packet_info.is_ipv6) {
+        if (packet_info.is_tcp &&
+            config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
+
+            if (packet_info.is_ipv6_ext_src &&
+                config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_src,
+                                     sizeof(packet_info.in6_ext_src));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_src,
+                                     sizeof(packet_info.in6_src));
+            }
+            if (packet_info.is_ipv6_ext_dst &&
+                config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_dst,
+                                     sizeof(packet_info.in6_ext_dst));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_dst,
+                                     sizeof(packet_info.in6_dst));
+            }
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.src_port,
+                                 sizeof(packet_info.src_port));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.dst_port,
+                                 sizeof(packet_info.dst_port));
+        } else if (packet_info.is_udp &&
+                   config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
+
+            if (packet_info.is_ipv6_ext_src &&
+               config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_src,
+                                     sizeof(packet_info.in6_ext_src));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_src,
+                                     sizeof(packet_info.in6_src));
+            }
+            if (packet_info.is_ipv6_ext_dst &&
+               config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_dst,
+                                     sizeof(packet_info.in6_ext_dst));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_dst,
+                                     sizeof(packet_info.in6_dst));
+            }
+
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.src_port,
+                                 sizeof(packet_info.src_port));
+            net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                 &packet_info.dst_port,
+                                 sizeof(packet_info.dst_port));
+
+        } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
+            if (packet_info.is_ipv6_ext_src &&
+               config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_src,
+                                     sizeof(packet_info.in6_ext_src));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_src,
+                                     sizeof(packet_info.in6_src));
+            }
+            if (packet_info.is_ipv6_ext_dst &&
+                config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
+
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_ext_dst,
+                                     sizeof(packet_info.in6_ext_dst));
+            } else {
+                net_rx_rss_add_chunk(rss_input, &bytes_written,
+                                     &packet_info.in6_dst,
+                                     sizeof(packet_info.in6_dst));
+            }
+        }
+    }
+
+    if (bytes_written) {
+        net_toeplitz_add(&result, rss_input, bytes_written, toe);
+    }
+
+    return result;
+}
+
+SEC("tun_rss_steering")
+int tun_rss_steering_prog(struct __sk_buff *skb)
+{
+
+    struct rss_config_t *config;
+    struct toeplitz_key_data_t *toe;
+
+    __u32 key = 0;
+    __u32 hash = 0;
+
+    config = bpf_map_lookup_elem(&tap_rss_map_configurations, &key);
+    toe = bpf_map_lookup_elem(&tap_rss_map_toeplitz_key, &key);
+
+    if (config && toe) {
+        if (!config->redirect) {
+            return config->default_queue;
+        }
+
+        hash = calculate_rss_hash(skb, config, toe);
+        if (hash) {
+            __u32 table_idx = hash % config->indirections_len;
+            __u16 *queue = 0;
+
+            queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
+                                        &table_idx);
+
+            if (queue) {
+                return *queue;
+            }
+        }
+
+        return config->default_queue;
+    }
+
+    return -1;
+}
+
+char _license[] SEC("license") = "GPL v2";
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 874f01c4882..b5f04be356f 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -37,7 +37,7 @@ static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
     struct iovec *iov;
     int fd = out_buf->fd;
 
-    iov = calloc(iovcnt, sizeof(struct iovec));
+    iov = g_try_new0(struct iovec, iovcnt);
     if (!iov) {
         return -ENOMEM;
     }
@@ -61,7 +61,7 @@ static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
         res = -errno;
     }
 
-    free(iov);
+    g_free(iov);
     return res;
 }
 
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index fa9671872eb..0c2665b9772 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -372,6 +372,11 @@ struct fuse_file_info {
  */
 #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
 
+/**
+ * Indicates that file server supports extended struct fuse_setxattr_in
+ */
+#define FUSE_CAP_SETXATTR_EXT (1 << 29)
+
 /**
  * Ioctl flags
  *
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 58e32fc9636..e4679c73abc 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -106,7 +106,7 @@ static void list_add_req(struct fuse_req *req, struct fuse_req *next)
 static void destroy_req(fuse_req_t req)
 {
     pthread_mutex_destroy(&req->lock);
-    free(req);
+    g_free(req);
 }
 
 void fuse_free_req(fuse_req_t req)
@@ -130,7 +130,7 @@ static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se)
 {
     struct fuse_req *req;
 
-    req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req));
+    req = g_try_new0(struct fuse_req, 1);
     if (req == NULL) {
         fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n");
     } else {
@@ -217,9 +217,9 @@ static int send_reply(fuse_req_t req, int error, const void *arg,
 int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count)
 {
     int res;
-    struct iovec *padded_iov;
+    g_autofree struct iovec *padded_iov = NULL;
 
-    padded_iov = malloc((count + 1) * sizeof(struct iovec));
+    padded_iov = g_try_new(struct iovec, count + 1);
     if (padded_iov == NULL) {
         return fuse_reply_err(req, ENOMEM);
     }
@@ -228,7 +228,6 @@ int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count)
     count++;
 
     res = send_reply_iov(req, 0, padded_iov, count);
-    free(padded_iov);
 
     return res;
 }
@@ -568,7 +567,7 @@ static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov,
     struct fuse_ioctl_iovec *fiov;
     size_t i;
 
-    fiov = malloc(sizeof(fiov[0]) * count);
+    fiov = g_try_new(struct fuse_ioctl_iovec, count);
     if (!fiov) {
         return NULL;
     }
@@ -586,8 +585,8 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
                            size_t out_count)
 {
     struct fuse_ioctl_out arg;
-    struct fuse_ioctl_iovec *in_fiov = NULL;
-    struct fuse_ioctl_iovec *out_fiov = NULL;
+    g_autofree struct fuse_ioctl_iovec *in_fiov = NULL;
+    g_autofree struct fuse_ioctl_iovec *out_fiov = NULL;
     struct iovec iov[4];
     size_t count = 1;
     int res;
@@ -603,13 +602,14 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
     /* Can't handle non-compat 64bit ioctls on 32bit */
     if (sizeof(void *) == 4 && req->ioctl_64bit) {
         res = fuse_reply_err(req, EINVAL);
-        goto out;
+        return res;
     }
 
     if (in_count) {
         in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
         if (!in_fiov) {
-            goto enomem;
+            res = fuse_reply_err(req, ENOMEM);
+            return res;
         }
 
         iov[count].iov_base = (void *)in_fiov;
@@ -619,7 +619,8 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
     if (out_count) {
         out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
         if (!out_fiov) {
-            goto enomem;
+            res = fuse_reply_err(req, ENOMEM);
+            return res;
         }
 
         iov[count].iov_base = (void *)out_fiov;
@@ -628,15 +629,8 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
     }
 
     res = send_reply_iov(req, 0, iov, count);
-out:
-    free(in_fiov);
-    free(out_fiov);
 
     return res;
-
-enomem:
-    res = fuse_reply_err(req, ENOMEM);
-    goto out;
 }
 
 int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size)
@@ -663,11 +657,11 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size)
 int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
                          int count)
 {
-    struct iovec *padded_iov;
+    g_autofree struct iovec *padded_iov = NULL;
     struct fuse_ioctl_out arg;
     int res;
 
-    padded_iov = malloc((count + 2) * sizeof(struct iovec));
+    padded_iov = g_try_new(struct iovec, count + 2);
     if (padded_iov == NULL) {
         return fuse_reply_err(req, ENOMEM);
     }
@@ -680,7 +674,6 @@ int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov,
     memcpy(&padded_iov[2], iov, count * sizeof(struct iovec));
 
     res = send_reply_iov(req, 0, padded_iov, count + 2);
-    free(padded_iov);
 
     return res;
 }
@@ -1091,6 +1084,12 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid,
         return;
     }
 
+    /* File creation is handled by do_create() or do_mknod() */
+    if (arg->flags & (O_CREAT | O_TMPFILE)) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
     memset(&fi, 0, sizeof(fi));
     fi.flags = arg->flags;
     fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
@@ -1426,8 +1425,13 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid,
     struct fuse_setxattr_in *arg;
     const char *name;
     const char *value;
+    bool setxattr_ext = req->se->conn.want & FUSE_CAP_SETXATTR_EXT;
 
-    arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    if (setxattr_ext) {
+        arg = fuse_mbuf_iter_advance(iter, sizeof(*arg));
+    } else {
+        arg = fuse_mbuf_iter_advance(iter, FUSE_COMPAT_SETXATTR_IN_SIZE);
+    }
     name = fuse_mbuf_iter_advance_str(iter);
     if (!arg || !name) {
         fuse_reply_err(req, EINVAL);
@@ -1441,7 +1445,9 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid,
     }
 
     if (req->se->op.setxattr) {
-        req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags);
+        uint32_t setxattr_flags = setxattr_ext ? arg->setxattr_flags : 0;
+        req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags,
+                             setxattr_flags);
     } else {
         fuse_reply_err(req, ENOSYS);
     }
@@ -1684,7 +1690,7 @@ static struct fuse_req *check_interrupt(struct fuse_session *se,
         if (curr->u.i.unique == req->unique) {
             req->interrupted = 1;
             list_del_req(curr);
-            free(curr);
+            g_free(curr);
             return NULL;
         }
     }
@@ -1988,6 +1994,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
     if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
         se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
     }
+    if (arg->flags & FUSE_SETXATTR_EXT) {
+        se->conn.capable |= FUSE_CAP_SETXATTR_EXT;
+    }
 #ifdef HAVE_SPLICE
 #ifdef HAVE_VMSPLICE
     se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
@@ -2123,6 +2132,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
         outarg.flags |= FUSE_HANDLE_KILLPRIV_V2;
     }
 
+    if (se->conn.want & FUSE_CAP_SETXATTR_EXT) {
+        outarg.flags |= FUSE_SETXATTR_EXT;
+    }
+
     fuse_log(FUSE_LOG_DEBUG, "   INIT: %u.%u\n", outarg.major, outarg.minor);
     fuse_log(FUSE_LOG_DEBUG, "   flags=0x%08x\n", outarg.flags);
     fuse_log(FUSE_LOG_DEBUG, "   max_readahead=0x%08x\n", outarg.max_readahead);
@@ -2477,7 +2490,7 @@ void fuse_session_destroy(struct fuse_session *se)
     free(se->vu_socket_path);
     se->vu_socket_path = NULL;
 
-    free(se);
+    g_free(se);
 }
 
 
@@ -2500,7 +2513,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
         return NULL;
     }
 
-    se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session));
+    se = g_try_new0(struct fuse_session, 1);
     if (se == NULL) {
         fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n");
         goto out1;
@@ -2560,7 +2573,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
 out4:
     fuse_opt_free_args(args);
 out2:
-    free(se);
+    g_free(se);
 out1:
     return NULL;
 }
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index 3bf786b0348..c55c0ca2fc1 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -798,7 +798,8 @@ struct fuse_lowlevel_ops {
      *   fuse_reply_err
      */
     void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name,
-                     const char *value, size_t size, int flags);
+                     const char *value, size_t size, int flags,
+                     uint32_t setxattr_flags);
 
     /**
      * Get an extended attribute
@@ -1602,7 +1603,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
  * parent/name
  *
  * To avoid a deadlock this function must not be called in the
- * execution path of a related filesytem operation or within any code
+ * execution path of a related filesystem operation or within any code
  * that could hold a lock that could be needed to execute such an
  * operation. As of kernel 4.18, a "related operation" is a lookup(),
  * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create()
@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
  * that the dentry has been deleted.
  *
  * To avoid a deadlock this function must not be called while
- * executing a related filesytem operation or while holding a lock
+ * executing a related filesystem operation or while holding a lock
  * that could be needed to execute such an operation (see the
  * description of fuse_lowlevel_notify_inval_entry() for more
  * details).
diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c
index f0ab8d22f4d..9d371448e94 100644
--- a/tools/virtiofsd/fuse_opt.c
+++ b/tools/virtiofsd/fuse_opt.c
@@ -272,7 +272,7 @@ static int process_opt_sep_arg(struct fuse_opt_context *ctx,
     }
 
     param = ctx->argv[ctx->argctr];
-    newarg = malloc(sep + strlen(param) + 1);
+    newarg = g_try_malloc(sep + strlen(param) + 1);
     if (!newarg) {
         return alloc_failed();
     }
@@ -280,7 +280,7 @@ static int process_opt_sep_arg(struct fuse_opt_context *ctx,
     memcpy(newarg, arg, sep);
     strcpy(newarg + sep, param);
     res = process_opt(ctx, opt, sep, newarg, iso);
-    free(newarg);
+    g_free(newarg);
 
     return res;
 }
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 3e13997406b..60b96470c51 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -82,12 +82,6 @@ struct fv_VuDev {
     struct fv_QueueInfo **qi;
 };
 
-/* From spec */
-struct virtio_fs_config {
-    char tag[36];
-    uint32_t num_queues;
-};
-
 /* Callback from libvhost-user */
 static uint64_t fv_get_features(VuDev *dev)
 {
@@ -129,18 +123,55 @@ static void fv_panic(VuDev *dev, const char *err)
  * Copy from an iovec into a fuse_buf (memory only)
  * Caller must ensure there is space
  */
-static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
-                          const struct iovec *out_sg)
+static size_t copy_from_iov(struct fuse_buf *buf, size_t out_num,
+                            const struct iovec *out_sg,
+                            size_t max)
 {
     void *dest = buf->mem;
+    size_t copied = 0;
 
-    while (out_num) {
+    while (out_num && max) {
         size_t onelen = out_sg->iov_len;
+        onelen = MIN(onelen, max);
         memcpy(dest, out_sg->iov_base, onelen);
         dest += onelen;
+        copied += onelen;
         out_sg++;
         out_num--;
+        max -= onelen;
     }
+
+    return copied;
+}
+
+/*
+ * Skip 'skip' bytes in the iov; 'sg_1stindex' is set as
+ * the index for the 1st iovec to read data from, and
+ * 'sg_1stskip' is the number of bytes to skip in that entry.
+ *
+ * Returns True if there are at least 'skip' bytes in the iovec
+ *
+ */
+static bool skip_iov(const struct iovec *sg, size_t sg_size,
+                     size_t skip,
+                     size_t *sg_1stindex, size_t *sg_1stskip)
+{
+    size_t vec;
+
+    for (vec = 0; vec < sg_size; vec++) {
+        if (sg[vec].iov_len > skip) {
+            *sg_1stskip = skip;
+            *sg_1stindex = vec;
+
+            return true;
+        }
+
+        skip -= sg[vec].iov_len;
+    }
+
+    *sg_1stindex = vec;
+    *sg_1stskip = 0;
+    return skip == 0;
 }
 
 /*
@@ -212,6 +243,21 @@ static void vu_dispatch_unlock(struct fv_VuDev *vud)
     assert(ret == 0);
 }
 
+static void vq_send_element(struct fv_QueueInfo *qi, VuVirtqElement *elem,
+                            ssize_t len)
+{
+    struct fuse_session *se = qi->virtio_dev->se;
+    VuDev *dev = &se->virtio_dev->dev;
+    VuVirtq *q = vu_get_queue(dev, qi->qidx);
+
+    vu_dispatch_rdlock(qi->virtio_dev);
+    pthread_mutex_lock(&qi->vq_lock);
+    vu_queue_push(dev, q, elem, len);
+    vu_queue_notify(dev, q);
+    pthread_mutex_unlock(&qi->vq_lock);
+    vu_dispatch_unlock(qi->virtio_dev);
+}
+
 /*
  * Called back by ll whenever it wants to send a reply/message back
  * The 1st element of the iov starts with the fuse_out_header
@@ -222,8 +268,6 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
 {
     FVRequest *req = container_of(ch, FVRequest, ch);
     struct fv_QueueInfo *qi = ch->qi;
-    VuDev *dev = &se->virtio_dev->dev;
-    VuVirtq *q = vu_get_queue(dev, qi->qidx);
     VuVirtqElement *elem = &req->elem;
     int ret = 0;
 
@@ -265,13 +309,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
 
     copy_iov(iov, count, in_sg, in_num, tosend_len);
 
-    vu_dispatch_rdlock(qi->virtio_dev);
-    pthread_mutex_lock(&qi->vq_lock);
-    vu_queue_push(dev, q, elem, tosend_len);
-    vu_queue_notify(dev, q);
-    pthread_mutex_unlock(&qi->vq_lock);
-    vu_dispatch_unlock(qi->virtio_dev);
-
+    vq_send_element(qi, elem, tosend_len);
     req->reply_sent = true;
 
 err:
@@ -290,10 +328,9 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
 {
     FVRequest *req = container_of(ch, FVRequest, ch);
     struct fv_QueueInfo *qi = ch->qi;
-    VuDev *dev = &se->virtio_dev->dev;
-    VuVirtq *q = vu_get_queue(dev, qi->qidx);
     VuVirtqElement *elem = &req->elem;
     int ret = 0;
+    g_autofree struct iovec *in_sg_cpy = NULL;
 
     assert(count >= 1);
     assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
@@ -328,14 +365,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
     if (in_len < sizeof(struct fuse_out_header)) {
         fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
                  __func__, elem->index);
-        ret = E2BIG;
-        goto err;
+        return E2BIG;
     }
     if (in_len < tosend_len) {
         fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
                  __func__, elem->index, tosend_len);
-        ret = E2BIG;
-        goto err;
+        return E2BIG;
     }
 
     /* TODO: Limit to 'len' */
@@ -347,76 +382,50 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
      * Build a copy of the the in_sg iov so we can skip bits in it,
      * including changing the offsets
      */
-    struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
-    assert(in_sg_cpy);
+    in_sg_cpy = g_new(struct iovec, in_num);
     memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
     /* These get updated as we skip */
     struct iovec *in_sg_ptr = in_sg_cpy;
-    int in_sg_cpy_count = in_num;
+    unsigned int in_sg_cpy_count = in_num;
 
     /* skip over parts of in_sg that contained the header iov */
-    size_t skip_size = iov_len;
+    iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, iov_len);
 
-    size_t in_sg_left = 0;
     do {
-        while (skip_size != 0 && in_sg_cpy_count) {
-            if (skip_size >= in_sg_ptr[0].iov_len) {
-                skip_size -= in_sg_ptr[0].iov_len;
-                in_sg_ptr++;
-                in_sg_cpy_count--;
-            } else {
-                in_sg_ptr[0].iov_len -= skip_size;
-                in_sg_ptr[0].iov_base += skip_size;
-                break;
-            }
-        }
+        fuse_log(FUSE_LOG_DEBUG, "%s: in_sg_cpy_count=%d len remaining=%zd\n",
+                 __func__, in_sg_cpy_count, len);
 
-        int i;
-        for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
-            in_sg_left += in_sg_ptr[i].iov_len;
-        }
-        fuse_log(FUSE_LOG_DEBUG,
-                 "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
-                 "in_sg_left=%zd\n",
-                 __func__, skip_size, in_sg_cpy_count, in_sg_left);
         ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
                      buf->buf[0].pos);
 
         if (ret == -1) {
             ret = errno;
+            if (ret == EINTR) {
+                continue;
+            }
             fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
                      __func__, len);
-            free(in_sg_cpy);
-            goto err;
+            return ret;
         }
-        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
-                 ret, len);
-        if (ret < len && ret) {
-            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
-            /* Skip over this much next time around */
-            skip_size = ret;
-            buf->buf[0].pos += ret;
-            len -= ret;
 
-            /* Lets do another read */
-            continue;
-        }
         if (!ret) {
             /* EOF case? */
-            fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
-                     in_sg_left);
+            fuse_log(FUSE_LOG_DEBUG, "%s: !ret len remaining=%zd\n", __func__,
+                     len);
             break;
         }
-        if (ret != len) {
-            fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
-            ret = EIO;
-            free(in_sg_cpy);
-            goto err;
-        }
-        in_sg_left -= ret;
+        fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
+                 ret, len);
+
         len -= ret;
-    } while (in_sg_left);
-    free(in_sg_cpy);
+        /* Short read. Retry reading remaining bytes */
+        if (len) {
+            fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
+            /* Skip over this much next time around */
+            iov_discard_front(&in_sg_ptr, &in_sg_cpy_count, ret);
+            buf->buf[0].pos += ret;
+        }
+    } while (len);
 
     /* Need to fix out->len on EOF */
     if (len) {
@@ -426,21 +435,9 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
         out_sg->len = tosend_len;
     }
 
-    ret = 0;
-
-    vu_dispatch_rdlock(qi->virtio_dev);
-    pthread_mutex_lock(&qi->vq_lock);
-    vu_queue_push(dev, q, elem, tosend_len);
-    vu_queue_notify(dev, q);
-    pthread_mutex_unlock(&qi->vq_lock);
-    vu_dispatch_unlock(qi->virtio_dev);
-
-err:
-    if (ret == 0) {
-        req->reply_sent = true;
-    }
-
-    return ret;
+    vq_send_element(qi, elem, tosend_len);
+    req->reply_sent = true;
+    return 0;
 }
 
 static __thread bool clone_fs_called;
@@ -450,13 +447,13 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
 {
     struct fv_QueueInfo *qi = user_data;
     struct fuse_session *se = qi->virtio_dev->se;
-    struct VuDev *dev = &qi->virtio_dev->dev;
     FVRequest *req = data;
     VuVirtqElement *elem = &req->elem;
     struct fuse_buf fbuf = {};
     bool allocated_bufv = false;
     struct fuse_bufvec bufv;
     struct fuse_bufvec *pbufv;
+    struct fuse_in_header inh;
 
     assert(se->bufsize > sizeof(struct fuse_in_header));
 
@@ -476,8 +473,7 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
      * They're spread over multiple descriptors in a scatter/gather set
      * and we can't trust the guest to keep them still; so copy in/out.
      */
-    fbuf.mem = malloc(se->bufsize);
-    assert(fbuf.mem);
+    fbuf.mem = g_malloc(se->bufsize);
 
     fuse_mutex_init(&req->ch.lock);
     req->ch.fd = -1;
@@ -505,14 +501,15 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
                  elem->index);
         assert(0); /* TODO */
     }
-    /* Copy just the first element and look at it */
-    copy_from_iov(&fbuf, 1, out_sg);
+    /* Copy just the fuse_in_header and look at it */
+    copy_from_iov(&fbuf, out_num, out_sg,
+                  sizeof(struct fuse_in_header));
+    memcpy(&inh, fbuf.mem, sizeof(struct fuse_in_header));
 
     pbufv = NULL; /* Compiler thinks an unitialised path */
-    if (out_num > 2 &&
-        out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
-        ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
-        out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+    if (inh.opcode == FUSE_WRITE &&
+        out_len >= (sizeof(struct fuse_in_header) +
+                    sizeof(struct fuse_write_in))) {
         /*
          * For a write we don't actually need to copy the
          * data, we can just do it straight out of guest memory
@@ -521,15 +518,15 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
          */
         fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
 
-        /* copy the fuse_write_in header afte rthe fuse_in_header */
-        fbuf.mem += out_sg->iov_len;
-        copy_from_iov(&fbuf, 1, out_sg + 1);
-        fbuf.mem -= out_sg->iov_len;
-        fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+        fbuf.size = copy_from_iov(&fbuf, out_num, out_sg,
+                                  sizeof(struct fuse_in_header) +
+                                  sizeof(struct fuse_write_in));
+        /* That copy reread the in_header, make sure we use the original */
+        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
 
         /* Allocate the bufv, with space for the rest of the iov */
-        pbufv = malloc(sizeof(struct fuse_bufvec) +
-                       sizeof(struct fuse_buf) * (out_num - 2));
+        pbufv = g_try_malloc(sizeof(struct fuse_bufvec) +
+                             sizeof(struct fuse_buf) * out_num);
         if (!pbufv) {
             fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
                     __func__);
@@ -540,24 +537,37 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
         pbufv->count = 1;
         pbufv->buf[0] = fbuf;
 
-        size_t iovindex, pbufvindex;
-        iovindex = 2; /* 2 headers, separate iovs */
+        size_t iovindex, pbufvindex, iov_bytes_skip;
         pbufvindex = 1; /* 2 headers, 1 fusebuf */
 
+        if (!skip_iov(out_sg, out_num,
+                      sizeof(struct fuse_in_header) +
+                      sizeof(struct fuse_write_in),
+                      &iovindex, &iov_bytes_skip)) {
+            fuse_log(FUSE_LOG_ERR, "%s: skip failed\n",
+                    __func__);
+            goto out;
+        }
+
         for (; iovindex < out_num; iovindex++, pbufvindex++) {
             pbufv->count++;
             pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
             pbufv->buf[pbufvindex].flags = 0;
             pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
             pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+
+            if (iov_bytes_skip) {
+                pbufv->buf[pbufvindex].mem += iov_bytes_skip;
+                pbufv->buf[pbufvindex].size -= iov_bytes_skip;
+                iov_bytes_skip = 0;
+            }
         }
     } else {
         /* Normal (non fast write) path */
 
-        /* Copy the rest of the buffer */
-        fbuf.mem += out_sg->iov_len;
-        copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
-        fbuf.mem -= out_sg->iov_len;
+        copy_from_iov(&fbuf, out_num, out_sg, se->bufsize);
+        /* That copy reread the in_header, make sure we use the original */
+        memcpy(fbuf.mem, &inh, sizeof(struct fuse_in_header));
         fbuf.size = out_len;
 
         /* TODO! Endianness of header */
@@ -573,26 +583,18 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
 
 out:
     if (allocated_bufv) {
-        free(pbufv);
+        g_free(pbufv);
     }
 
     /* If the request has no reply, still recycle the virtqueue element */
     if (!req->reply_sent) {
-        struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
-
         fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__,
                  elem->index);
-
-        vu_dispatch_rdlock(qi->virtio_dev);
-        pthread_mutex_lock(&qi->vq_lock);
-        vu_queue_push(dev, q, elem, 0);
-        vu_queue_notify(dev, q);
-        pthread_mutex_unlock(&qi->vq_lock);
-        vu_dispatch_unlock(qi->virtio_dev);
+        vq_send_element(qi, elem, 0);
     }
 
     pthread_mutex_destroy(&req->ch.lock);
-    free(fbuf.mem);
+    g_free(fbuf.mem);
     free(req);
 }
 
@@ -699,6 +701,7 @@ static void *fv_queue_thread(void *opaque)
 
         /* Process all the requests. */
         if (!se->thread_pool_size && req_list != NULL) {
+            req_list = g_list_reverse(req_list);
             g_list_foreach(req_list, fv_queue_worker, qi);
             g_list_free(req_list);
             req_list = NULL;
@@ -733,10 +736,22 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
     pthread_mutex_destroy(&ourqi->vq_lock);
     close(ourqi->kill_fd);
     ourqi->kick_fd = -1;
-    free(vud->qi[qidx]);
+    g_free(vud->qi[qidx]);
     vud->qi[qidx] = NULL;
 }
 
+static void stop_all_queues(struct fv_VuDev *vud)
+{
+    for (int i = 0; i < vud->nqueues; i++) {
+        if (!vud->qi[i]) {
+            continue;
+        }
+
+        fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
+        fv_queue_cleanup_thread(vud, i);
+    }
+}
+
 /* Callback from libvhost-user on start or stop of a queue */
 static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
 {
@@ -764,15 +779,13 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
     if (started) {
         /* Fire up a thread to watch this queue */
         if (qidx >= vud->nqueues) {
-            vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
-            assert(vud->qi);
+            vud->qi = g_realloc_n(vud->qi, qidx + 1, sizeof(vud->qi[0]));
             memset(vud->qi + vud->nqueues, 0,
                    sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
             vud->nqueues = qidx + 1;
         }
         if (!vud->qi[qidx]) {
-            vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
-            assert(vud->qi[qidx]);
+            vud->qi[qidx] = g_new0(struct fv_QueueInfo, 1);
             vud->qi[qidx]->virtio_dev = vud;
             vud->qi[qidx]->qidx = qidx;
         } else {
@@ -869,15 +882,7 @@ int virtio_loop(struct fuse_session *se)
      * Make sure all fv_queue_thread()s quit on exit, as we're about to
      * free virtio dev and fuse session, no one should access them anymore.
      */
-    for (int i = 0; i < se->virtio_dev->nqueues; i++) {
-        if (!se->virtio_dev->qi[i]) {
-            continue;
-        }
-
-        fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i);
-        fv_queue_cleanup_thread(se->virtio_dev, i);
-    }
-
+    stop_all_queues(se->virtio_dev);
     fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
 
     return 0;
@@ -902,7 +907,7 @@ static bool fv_socket_lock(struct fuse_session *se)
     dir = qemu_get_local_state_pathname("run/virtiofsd");
 
     if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
-        fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s",
+        fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s\n",
                  __func__, dir, strerror(errno));
         return false;
     }
@@ -978,11 +983,18 @@ static int fv_create_listen_socket(struct fuse_session *se)
     if (se->vu_socket_group) {
         struct group *g = getgrnam(se->vu_socket_group);
         if (g) {
-            if (!chown(se->vu_socket_path, -1, g->gr_gid)) {
+            if (chown(se->vu_socket_path, -1, g->gr_gid) == -1) {
                 fuse_log(FUSE_LOG_WARNING,
-                         "vhost socket failed to set group to %s (%d)\n",
+                         "vhost socket failed to set group to %s (%d): %m\n",
                          se->vu_socket_group, g->gr_gid);
             }
+        } else {
+            fuse_log(FUSE_LOG_ERR,
+                     "vhost socket: unable to find group '%s'\n",
+                     se->vu_socket_group);
+            close(listen_sock);
+            umask(old_umask);
+            return -1;
         }
     }
     umask(old_umask);
@@ -1038,12 +1050,7 @@ int virtio_session_mount(struct fuse_session *se)
              __func__);
 
     /* TODO: Some cleanup/deallocation! */
-    se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
-    if (!se->virtio_dev) {
-        fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
-        close(data_sock);
-        return -1;
-    }
+    se->virtio_dev = g_new0(struct fv_VuDev, 1);
 
     se->vu_socketfd = data_sock;
     se->virtio_dev->se = se;
@@ -1065,8 +1072,8 @@ void virtio_session_close(struct fuse_session *se)
         return;
     }
 
-    free(se->virtio_dev->qi);
+    g_free(se->virtio_dev->qi);
     pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
-    free(se->virtio_dev);
+    g_free(se->virtio_dev);
     se->virtio_dev = NULL;
 }
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 28243b51b2d..a8295d975a1 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -172,6 +172,9 @@ void fuse_cmdline_help(void)
            "                               default: no_writeback\n"
            "    -o xattr|no_xattr          enable/disable xattr\n"
            "                               default: no_xattr\n"
+           "    -o xattrmap=<mapping>      Enable xattr mapping (enables xattr)\n"
+           "                               <mapping> is a string consists of a series of rules\n"
+           "                               e.g. -o xattrmap=:map::user.virtiofs.:\n"
            "    -o modcaps=CAPLIST         Modify the list of capabilities\n"
            "                               e.g. -o modcaps=+sys_admin:-chown\n"
            "    --rlimit-nofile=<num>      set maximum number of file descriptors\n"
@@ -183,6 +186,7 @@ void fuse_cmdline_help(void)
            "                               to virtiofsd from guest applications.\n"
            "                               default: no_allow_direct_io\n"
            "    -o announce_submounts      Announce sub-mount points to the guest\n"
+           "    -o posix_acl/no_posix_acl  Enable/Disable posix_acl. (default: disabled)\n"
            );
 }
 
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 1553d2ef454..64b5b4fbb18 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -122,6 +122,7 @@ struct lo_inode {
 struct lo_cred {
     uid_t euid;
     gid_t egid;
+    mode_t umask;
 };
 
 enum {
@@ -172,6 +173,9 @@ struct lo_data {
     /* An O_PATH file descriptor to /proc/self/fd/ */
     int proc_self_fd;
     int user_killpriv_v2, killpriv_v2;
+    /* If set, virtiofsd is responsible for setting umask during creation */
+    bool change_umask;
+    int user_posix_acl, posix_acl;
 };
 
 static const struct fuse_opt lo_opts[] = {
@@ -204,6 +208,8 @@ static const struct fuse_opt lo_opts[] = {
     { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
     { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
     { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
+    { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 },
+    { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 },
     FUSE_OPT_END
 };
 static bool use_syslog = false;
@@ -406,7 +412,7 @@ static void lo_map_init(struct lo_map *map)
 
 static void lo_map_destroy(struct lo_map *map)
 {
-    free(map->elems);
+    g_free(map->elems);
 }
 
 static int lo_map_grow(struct lo_map *map, size_t new_nelems)
@@ -418,7 +424,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems)
         return 1;
     }
 
-    new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems);
+    new_elems = g_try_realloc_n(map->elems, new_nelems, sizeof(map->elems[0]));
     if (!new_elems) {
         return 0;
     }
@@ -702,6 +708,32 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
         conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
         lo->killpriv_v2 = 0;
     }
+
+    if (lo->user_posix_acl == 1) {
+        /*
+         * User explicitly asked for this option. Enable it unconditionally.
+         * If connection does not have this capability, print error message
+         * now. It will fail later in fuse_lowlevel.c
+         */
+        if (!(conn->capable & FUSE_CAP_POSIX_ACL) ||
+            !(conn->capable & FUSE_CAP_DONT_MASK) ||
+            !(conn->capable & FUSE_CAP_SETXATTR_EXT)) {
+            fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable posix acl."
+                     " kernel does not support FUSE_POSIX_ACL, FUSE_DONT_MASK"
+                     " or FUSE_SETXATTR_EXT capability.\n");
+        } else {
+            fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling posix acl\n");
+        }
+
+        conn->want |= FUSE_CAP_POSIX_ACL | FUSE_CAP_DONT_MASK |
+                      FUSE_CAP_SETXATTR_EXT;
+        lo->change_umask = true;
+        lo->posix_acl = true;
+    } else {
+        /* User either did not specify anything or wants it disabled */
+        fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n");
+        conn->want &= ~FUSE_CAP_POSIX_ACL;
+    }
 }
 
 static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
@@ -1134,7 +1166,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
  * ownership of caller.
  * TODO: What about selinux context?
  */
-static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
+static int lo_change_cred(fuse_req_t req, struct lo_cred *old,
+                          bool change_umask)
 {
     int res;
 
@@ -1154,11 +1187,14 @@ static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
         return errno_save;
     }
 
+    if (change_umask) {
+        old->umask = umask(req->ctx.umask);
+    }
     return 0;
 }
 
 /* Regain Privileges */
-static void lo_restore_cred(struct lo_cred *old)
+static void lo_restore_cred(struct lo_cred *old, bool restore_umask)
 {
     int res;
 
@@ -1173,6 +1209,54 @@ static void lo_restore_cred(struct lo_cred *old)
         fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
         exit(1);
     }
+
+    if (restore_umask)
+        umask(old->umask);
+}
+
+/*
+ * A helper to change cred and drop capability. Returns 0 on success and
+ * errno on error
+ */
+static int lo_drop_cap_change_cred(fuse_req_t req, struct lo_cred *old,
+                                   bool change_umask, const char *cap_name,
+                                   bool *cap_dropped)
+{
+    int ret;
+    bool __cap_dropped;
+
+    assert(cap_name);
+
+    ret = drop_effective_cap(cap_name, &__cap_dropped);
+    if (ret) {
+        return ret;
+    }
+
+    ret = lo_change_cred(req, old, change_umask);
+    if (ret) {
+        if (__cap_dropped) {
+            if (gain_effective_cap(cap_name)) {
+                fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
+            }
+        }
+    }
+
+    if (cap_dropped) {
+        *cap_dropped = __cap_dropped;
+    }
+    return ret;
+}
+
+static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask,
+                                     const char *cap_name)
+{
+    assert(cap_name);
+
+    lo_restore_cred(old, restore_umask);
+
+    if (gain_effective_cap(cap_name)) {
+        fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name);
+    }
 }
 
 static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
@@ -1202,7 +1286,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
         return;
     }
 
-    saverr = lo_change_cred(req, &old);
+    saverr = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode));
     if (saverr) {
         goto out;
     }
@@ -1211,7 +1295,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
 
     saverr = errno;
 
-    lo_restore_cred(&old);
+    lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode));
 
     if (res == -1) {
         goto out;
@@ -1653,7 +1737,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
     struct lo_data *lo = lo_data(req);
     struct lo_dirp *d = NULL;
     struct lo_inode *dinode;
-    char *buf = NULL;
+    g_autofree char *buf = NULL;
     char *p;
     size_t rem = size;
     int err = EBADF;
@@ -1669,7 +1753,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
     }
 
     err = ENOMEM;
-    buf = calloc(1, size);
+    buf = g_try_malloc0(size);
     if (!buf) {
         goto error;
     }
@@ -1755,7 +1839,6 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
     } else {
         fuse_reply_buf(req, buf, size - rem);
     }
-    free(buf);
 }
 
 static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
@@ -1918,7 +2001,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
         return;
     }
 
-    err = lo_change_cred(req, &old);
+    err = lo_change_cred(req, &old, lo->change_umask);
     if (err) {
         goto out;
     }
@@ -1929,7 +2012,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
     fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode);
     err = fd == -1 ? errno : 0;
 
-    lo_restore_cred(&old);
+    lo_restore_cred(&old, lo->change_umask);
 
     /* Ignore the error if file exists and O_EXCL was not given */
     if (err && (err != EEXIST || (fi->flags & O_EXCL))) {
@@ -2011,10 +2094,10 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
 
     fuse_log(FUSE_LOG_DEBUG,
              "lo_getlk(ino=%" PRIu64 ", flags=%d)"
-             " owner=0x%lx, l_type=%d l_start=0x%lx"
-             " l_len=0x%lx\n",
-             ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
-             lock->l_len);
+             " owner=0x%" PRIx64 ", l_type=%d l_start=0x%" PRIx64
+             " l_len=0x%" PRIx64 "\n",
+             ino, fi->flags, fi->lock_owner, lock->l_type,
+             (uint64_t)lock->l_start, (uint64_t)lock->l_len);
 
     if (!lo->posix_lock) {
         fuse_reply_err(req, ENOSYS);
@@ -2061,10 +2144,10 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
 
     fuse_log(FUSE_LOG_DEBUG,
              "lo_setlk(ino=%" PRIu64 ", flags=%d)"
-             " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d"
-             " l_start=0x%lx l_len=0x%lx\n",
+             " cmd=%d pid=%d owner=0x%" PRIx64 " sleep=%d l_whence=%d"
+             " l_start=0x%" PRIx64 " l_len=0x%" PRIx64 "\n",
              ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
-             lock->l_whence, lock->l_start, lock->l_len);
+             lock->l_whence, (uint64_t)lock->l_start, (uint64_t)lock->l_len);
 
     if (!lo->posix_lock) {
         fuse_reply_err(req, ENOSYS);
@@ -2382,6 +2465,11 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
  * Automatically reversed on read
  */
 #define XATTR_MAP_FLAG_PREFIX  (1 <<  2)
+/*
+ * The attribute is unsupported;
+ * ENOTSUP on write, hidden on read.
+ */
+#define XATTR_MAP_FLAG_UNSUPPORTED     (1 <<  3)
 
 /* scopes */
 /* Apply rule to get/set/remove */
@@ -2553,6 +2641,8 @@ static void parse_xattrmap(struct lo_data *lo)
             tmp_entry.flags |= XATTR_MAP_FLAG_OK;
         } else if (strstart(map, "bad", &map)) {
             tmp_entry.flags |= XATTR_MAP_FLAG_BAD;
+        } else if (strstart(map, "unsupported", &map)) {
+            tmp_entry.flags |= XATTR_MAP_FLAG_UNSUPPORTED;
         } else if (strstart(map, "map", &map)) {
             /*
              * map is sugar that adds a number of rules, and must be
@@ -2563,8 +2653,8 @@ static void parse_xattrmap(struct lo_data *lo)
         } else {
             fuse_log(FUSE_LOG_ERR,
                      "%s: Unexpected type;"
-                     "Expecting 'prefix', 'ok', 'bad' or 'map' in rule %zu\n",
-                     __func__, lo->xattr_map_nentries);
+                     "Expecting 'prefix', 'ok', 'bad', 'unsupported' or 'map'"
+                     " in rule %zu\n", __func__, lo->xattr_map_nentries);
             exit(1);
         }
 
@@ -2666,6 +2756,9 @@ static int xattr_map_client(const struct lo_data *lo, const char *client_name,
             if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
                 return -EPERM;
             }
+            if (cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
+                return -ENOTSUP;
+            }
             if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
                 /* Unmodified name */
                 return 0;
@@ -2705,7 +2798,8 @@ static int xattr_map_server(const struct lo_data *lo, const char *server_name,
 
         if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) &&
             (strstart(server_name, cur_entry->prepend, &end))) {
-            if (cur_entry->flags & XATTR_MAP_FLAG_BAD) {
+            if (cur_entry->flags & XATTR_MAP_FLAG_BAD ||
+                cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) {
                 return -ENODATA;
             }
             if (cur_entry->flags & XATTR_MAP_FLAG_OK) {
@@ -2723,11 +2817,73 @@ static int xattr_map_server(const struct lo_data *lo, const char *server_name,
     return -ENODATA;
 }
 
+#define FCHDIR_NOFAIL(fd) do {                         \
+        int fchdir_res = fchdir(fd);                   \
+        assert(fchdir_res == 0);                       \
+    } while (0)
+
+static bool block_xattr(struct lo_data *lo, const char *name)
+{
+    /*
+     * If user explicitly enabled posix_acl or did not provide any option,
+     * do not block acl. Otherwise block system.posix_acl_access and
+     * system.posix_acl_default xattrs.
+     */
+    if (lo->user_posix_acl) {
+        return false;
+    }
+    if (!strcmp(name, "system.posix_acl_access") ||
+        !strcmp(name, "system.posix_acl_default"))
+            return true;
+
+    return false;
+}
+
+/*
+ * Returns number of bytes in xattr_list after filtering on success. This
+ * could be zero as well if nothing is left after filtering.
+ *
+ * Returns negative error code on failure.
+ * xattr_list is modified in place.
+ */
+static int remove_blocked_xattrs(struct lo_data *lo, char *xattr_list,
+                                 unsigned in_size)
+{
+    size_t out_index, in_index;
+
+    /*
+     * As of now we only filter out acl xattrs. If acls are enabled or
+     * they have not been explicitly disabled, there is nothing to
+     * filter.
+     */
+    if (lo->user_posix_acl) {
+        return in_size;
+    }
+
+    out_index = 0;
+    in_index = 0;
+    while (in_index < in_size) {
+        char *in_ptr = xattr_list + in_index;
+
+        /* Length of current attribute name */
+        size_t in_len = strlen(xattr_list + in_index) + 1;
+
+        if (!block_xattr(lo, in_ptr)) {
+            if (in_index != out_index) {
+                memmove(xattr_list + out_index, xattr_list + in_index, in_len);
+            }
+            out_index += in_len;
+        }
+        in_index += in_len;
+     }
+    return out_index;
+}
+
 static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
                         size_t size)
 {
     struct lo_data *lo = lo_data(req);
-    char *value = NULL;
+    g_autofree char *value = NULL;
     char procname[64];
     const char *name;
     char *mapped_name;
@@ -2736,6 +2892,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
     int saverr;
     int fd = -1;
 
+    if (block_xattr(lo, in_name)) {
+        fuse_reply_err(req, EOPNOTSUPP);
+        return;
+    }
+
     mapped_name = NULL;
     name = in_name;
     if (lo->xattrmap) {
@@ -2768,7 +2929,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
              ino, name, size);
 
     if (size) {
-        value = malloc(size);
+        value = g_try_malloc(size);
         if (!value) {
             goto out_err;
         }
@@ -2787,15 +2948,17 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
             goto out_err;
         }
         ret = fgetxattr(fd, name, value, size);
+        saverr = ret == -1 ? errno : 0;
     } else {
         /* fchdir should not fail here */
-        assert(fchdir(lo->proc_self_fd) == 0);
+        FCHDIR_NOFAIL(lo->proc_self_fd);
         ret = getxattr(procname, name, value, size);
-        assert(fchdir(lo->root.fd) == 0);
+        saverr = ret == -1 ? errno : 0;
+        FCHDIR_NOFAIL(lo->root.fd);
     }
 
     if (ret == -1) {
-        goto out_err;
+        goto out;
     }
     if (size) {
         saverr = 0;
@@ -2807,8 +2970,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
         fuse_reply_xattr(req, ret);
     }
 out_free:
-    free(value);
-
     if (fd >= 0) {
         close(fd);
     }
@@ -2827,7 +2988,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
 static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
 {
     struct lo_data *lo = lo_data(req);
-    char *value = NULL;
+    g_autofree char *value = NULL;
     char procname[64];
     struct lo_inode *inode;
     ssize_t ret;
@@ -2849,7 +3010,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
              size);
 
     if (size) {
-        value = malloc(size);
+        value = g_try_malloc(size);
         if (!value) {
             goto out_err;
         }
@@ -2862,15 +3023,17 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
             goto out_err;
         }
         ret = flistxattr(fd, value, size);
+        saverr = ret == -1 ? errno : 0;
     } else {
         /* fchdir should not fail here */
-        assert(fchdir(lo->proc_self_fd) == 0);
+        FCHDIR_NOFAIL(lo->proc_self_fd);
         ret = listxattr(procname, value, size);
-        assert(fchdir(lo->root.fd) == 0);
+        saverr = ret == -1 ? errno : 0;
+        FCHDIR_NOFAIL(lo->root.fd);
     }
 
     if (ret == -1) {
-        goto out_err;
+        goto out;
     }
     if (size) {
         saverr = 0;
@@ -2924,6 +3087,12 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
                 goto out;
             }
         }
+
+        ret = remove_blocked_xattrs(lo, value, ret);
+        if (ret <= 0) {
+            saverr = -ret;
+            goto out;
+        }
         fuse_reply_buf(req, value, ret);
     } else {
         /*
@@ -2934,8 +3103,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
         fuse_reply_xattr(req, ret);
     }
 out_free:
-    free(value);
-
     if (fd >= 0) {
         close(fd);
     }
@@ -2951,7 +3118,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
 }
 
 static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
-                        const char *value, size_t size, int flags)
+                        const char *value, size_t size, int flags,
+                        uint32_t extra_flags)
 {
     char procname[64];
     const char *name;
@@ -2961,6 +3129,14 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
     ssize_t ret;
     int saverr;
     int fd = -1;
+    bool switched_creds = false;
+    bool cap_fsetid_dropped = false;
+    struct lo_cred old = {};
+
+    if (block_xattr(lo, in_name)) {
+        fuse_reply_err(req, EOPNOTSUPP);
+        return;
+    }
 
     mapped_name = NULL;
     name = in_name;
@@ -2991,6 +3167,26 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
              ", name=%s value=%s size=%zd)\n", ino, name, value, size);
 
     sprintf(procname, "%i", inode->fd);
+    /*
+     * If we are setting posix access acl and if SGID needs to be
+     * cleared, then switch to caller's gid and drop CAP_FSETID
+     * and that should make sure host kernel clears SGID.
+     *
+     * This probably will not work when we support idmapped mounts.
+     * In that case we will need to find a non-root gid and switch
+     * to it. (Instead of gid in request). Fix it when we support
+     * idmapped mounts.
+     */
+    if (lo->posix_acl && !strcmp(name, "system.posix_acl_access")
+        && (extra_flags & FUSE_SETXATTR_ACL_KILL_SGID)) {
+        ret = lo_drop_cap_change_cred(req, &old, false, "FSETID",
+                                      &cap_fsetid_dropped);
+        if (ret) {
+            saverr = ret;
+            goto out;
+        }
+        switched_creds = true;
+    }
     if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
         fd = openat(lo->proc_self_fd, procname, O_RDONLY);
         if (fd < 0) {
@@ -2998,14 +3194,20 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name,
             goto out;
         }
         ret = fsetxattr(fd, name, value, size, flags);
+        saverr = ret == -1 ? errno : 0;
     } else {
         /* fchdir should not fail here */
-        assert(fchdir(lo->proc_self_fd) == 0);
+        FCHDIR_NOFAIL(lo->proc_self_fd);
         ret = setxattr(procname, name, value, size, flags);
-        assert(fchdir(lo->root.fd) == 0);
+        saverr = ret == -1 ? errno : 0;
+        FCHDIR_NOFAIL(lo->root.fd);
+    }
+    if (switched_creds) {
+        if (cap_fsetid_dropped)
+            lo_restore_cred_gain_cap(&old, false, "FSETID");
+        else
+            lo_restore_cred(&old, false);
     }
-
-    saverr = ret == -1 ? errno : 0;
 
 out:
     if (fd >= 0) {
@@ -3028,6 +3230,11 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
     int saverr;
     int fd = -1;
 
+    if (block_xattr(lo, in_name)) {
+        fuse_reply_err(req, EOPNOTSUPP);
+        return;
+    }
+
     mapped_name = NULL;
     name = in_name;
     if (lo->xattrmap) {
@@ -3064,15 +3271,15 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name)
             goto out;
         }
         ret = fremovexattr(fd, name);
+        saverr = ret == -1 ? errno : 0;
     } else {
         /* fchdir should not fail here */
-        assert(fchdir(lo->proc_self_fd) == 0);
+        FCHDIR_NOFAIL(lo->proc_self_fd);
         ret = removexattr(procname, name);
-        assert(fchdir(lo->root.fd) == 0);
+        saverr = ret == -1 ? errno : 0;
+        FCHDIR_NOFAIL(lo->root.fd);
     }
 
-    saverr = ret == -1 ? errno : 0;
-
 out:
     if (fd >= 0) {
         close(fd);
@@ -3097,9 +3304,10 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
 
     fuse_log(FUSE_LOG_DEBUG,
              "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
-             "off=%lu, ino=%" PRIu64 "/fd=%d, "
-             "off=%lu, size=%zd, flags=0x%x)\n",
-             ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags);
+             "off=%ju, ino=%" PRIu64 "/fd=%d, "
+             "off=%ju, size=%zd, flags=0x%x)\n",
+             ino_in, in_fd, (intmax_t)off_in,
+             ino_out, out_fd, (intmax_t)off_out, len, flags);
 
     res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
     if (res < 0) {
@@ -3558,10 +3766,6 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile)
 static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
 {
     g_autofree char *localfmt = NULL;
-    struct timespec ts;
-    struct tm tm;
-    char sec_fmt[sizeof "2020-12-07 18:17:54"];
-    char zone_fmt[sizeof "+0100"];
 
     if (current_log_level < level) {
         return;
@@ -3573,23 +3777,10 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
             localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
                                        fmt);
         } else {
-            /* try formatting a broken-down timestamp */
-            if (clock_gettime(CLOCK_REALTIME, &ts) != -1 &&
-                localtime_r(&ts.tv_sec, &tm) != NULL &&
-                strftime(sec_fmt, sizeof sec_fmt, "%Y-%m-%d %H:%M:%S",
-                         &tm) != 0 &&
-                strftime(zone_fmt, sizeof zone_fmt, "%z", &tm) != 0) {
-                localfmt = g_strdup_printf("[%s.%02ld%s] [ID: %08ld] %s",
-                                           sec_fmt,
-                                           ts.tv_nsec / (10L * 1000 * 1000),
-                                           zone_fmt, syscall(__NR_gettid),
-                                           fmt);
-            } else {
-                /* fall back to a flat timestamp */
-                localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
-                                           get_clock(), syscall(__NR_gettid),
-                                           fmt);
-            }
+            g_autoptr(GDateTime) now = g_date_time_new_now_utc();
+            g_autofree char *nowstr = g_date_time_format(now, "%Y-%m-%d %H:%M:%S.%f%z");
+            localfmt = g_strdup_printf("[%s] [ID: %08ld] %s",
+                                       nowstr, syscall(__NR_gettid), fmt);
         }
         fmt = localfmt;
     }
@@ -3721,6 +3912,7 @@ int main(int argc, char *argv[])
         .allow_direct_io = 0,
         .proc_self_fd = -1,
         .user_killpriv_v2 = -1,
+        .user_posix_acl = -1,
     };
     struct lo_map_elem *root_elem;
     struct lo_map_elem *reserve_elem;
@@ -3826,6 +4018,7 @@ int main(int argc, char *argv[])
     }
 
     if (lo.xattrmap) {
+        lo.xattr = 1;
         parse_xattrmap(&lo);
     }
 
@@ -3848,6 +4041,12 @@ int main(int argc, char *argv[])
         exit(1);
     }
 
+    if (lo.user_posix_acl == 1 && !lo.xattr) {
+        fuse_log(FUSE_LOG_ERR, "Can't enable posix ACLs. xattrs are disabled."
+                 "\n");
+        exit(1);
+    }
+
     lo.use_statx = true;
 
     se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index 62441cfcdb9..a3ce9f898d2 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -51,6 +51,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(fsetxattr),
     SCMP_SYS(fstat),
     SCMP_SYS(fstatfs),
+    SCMP_SYS(fstatfs64),
     SCMP_SYS(fsync),
     SCMP_SYS(ftruncate),
     SCMP_SYS(futex),
@@ -114,6 +115,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(utimensat),
     SCMP_SYS(write),
     SCMP_SYS(writev),
+    SCMP_SYS(umask),
 };
 
 /* Syscalls used when --syslog is enabled */
diff --git a/trace-events b/trace-events
index ac7cef93359..a637a61ebab 100644
--- a/trace-events
+++ b/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 #
 # This file is processed by the tracetool script during the build.
 #
@@ -25,6 +25,11 @@
 #
 # The <format-string> should be a sprintf()-compatible format string.
 
+# cpu.c
+breakpoint_insert(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
+breakpoint_remove(int cpu_index, uint64_t pc, int flags) "cpu=%d pc=0x%" PRIx64 " flags=0x%x"
+breakpoint_singlestep(int cpu_index, int enabled) "cpu=%d enable=%d"
+
 # dma-helpers.c
 dma_blk_io(void *dbs, void *bs, int64_t offset, bool to_dev) "dbs=%p bs=%p offset=%" PRId64 " to_dev=%d"
 dma_aio_cancel(void *dbs) "dbs=%p"
@@ -115,26 +120,16 @@ vcpu guest_cpu_reset(void)
 # tcg/tcg-op.c
 
 # @vaddr: Access' virtual address.
-# @info : Access' information (see below).
+# @memopidx: Access' information (see below).
 #
 # Start virtual memory access (before any potential access violation).
-#
 # Does not include memory accesses performed by devices.
 #
-# Access information can be parsed as:
-#
-# struct mem_info {
-#     uint8_t size_shift : 4; /* interpreted as "1 << size_shift" bytes */
-#     bool    sign_extend: 1; /* sign-extended */
-#     uint8_t endianness : 1; /* 0: little, 1: big */
-#     bool    store      : 1; /* whether it is a store operation */
-#             pad        : 1;
-#     uint8_t mmuidx     : 4; /* mmuidx (softmmu only)  */
-# };
-#
 # Mode: user, softmmu
 # Targets: TCG(all)
-vcpu tcg guest_mem_before(TCGv vaddr, uint16_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
+vcpu tcg guest_ld_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
+vcpu tcg guest_st_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
+vcpu tcg guest_rmw_before(TCGv vaddr, uint32_t memopidx) "info=%d", "vaddr=0x%016"PRIx64" memopidx=0x%x"
 
 # include/user/syscall-trace.h
 
diff --git a/trace/control-target.c b/trace/control-target.c
index e293eeed7c0..8418673c18c 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -127,7 +127,7 @@ void trace_init_vcpu(CPUState *vcpu)
 {
     TraceEventIter iter;
     TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (trace_event_is_vcpu(ev) &&
             trace_event_get_state_static(ev) &&
diff --git a/trace/control.c b/trace/control.c
index 4be38e1af28..d5b68e846ee 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -82,6 +82,10 @@ void trace_event_register_group(TraceEvent **events)
     event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
     event_groups[nevent_groups].events = events;
     nevent_groups++;
+
+#ifdef CONFIG_TRACE_SIMPLE
+    st_init_group(nevent_groups - 1);
+#endif
 }
 
 
@@ -91,7 +95,7 @@ TraceEvent *trace_event_name(const char *name)
 
     TraceEventIter iter;
     TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (strcmp(trace_event_get_name(ev), name) == 0) {
             return ev;
@@ -100,27 +104,46 @@ TraceEvent *trace_event_name(const char *name)
     return NULL;
 }
 
-void trace_event_iter_init(TraceEventIter *iter, const char *pattern)
+void trace_event_iter_init_all(TraceEventIter *iter)
 {
     iter->event = 0;
     iter->group = 0;
+    iter->group_id = -1;
+    iter->pattern = NULL;
+}
+
+void trace_event_iter_init_pattern(TraceEventIter *iter, const char *pattern)
+{
+    trace_event_iter_init_all(iter);
     iter->pattern = pattern;
 }
 
+void trace_event_iter_init_group(TraceEventIter *iter, size_t group_id)
+{
+    trace_event_iter_init_all(iter);
+    iter->group_id = group_id;
+}
+
 TraceEvent *trace_event_iter_next(TraceEventIter *iter)
 {
     while (iter->group < nevent_groups &&
            event_groups[iter->group].events[iter->event] != NULL) {
         TraceEvent *ev = event_groups[iter->group].events[iter->event];
+        size_t group = iter->group;
         iter->event++;
         if (event_groups[iter->group].events[iter->event] == NULL) {
             iter->event = 0;
             iter->group++;
         }
-        if (!iter->pattern ||
-            g_pattern_match_simple(iter->pattern, trace_event_get_name(ev))) {
-            return ev;
+        if (iter->pattern &&
+            !g_pattern_match_simple(iter->pattern, trace_event_get_name(ev))) {
+            continue;
+        }
+        if (iter->group_id != -1 &&
+            iter->group_id != group) {
+            continue;
         }
+        return ev;
     }
 
     return NULL;
@@ -130,7 +153,7 @@ void trace_list_events(FILE *f)
 {
     TraceEventIter iter;
     TraceEvent *ev;
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         fprintf(f, "%s\n", trace_event_get_name(ev));
     }
@@ -150,7 +173,7 @@ static void do_trace_enable_events(const char *line_buf)
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(line_ptr);
 
-    trace_event_iter_init(&iter, line_ptr);
+    trace_event_iter_init_pattern(&iter, line_ptr);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (!trace_event_get_state_static(ev)) {
             if (!is_pattern) {
@@ -256,7 +279,7 @@ void trace_fini_vcpu(CPUState *vcpu)
 
     trace_guest_cpu_exit(vcpu);
 
-    trace_event_iter_init(&iter, NULL);
+    trace_event_iter_init_all(&iter);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (trace_event_is_vcpu(ev) &&
             trace_event_get_state_static(ev) &&
diff --git a/trace/control.h b/trace/control.h
index 9522a7b318e..23b8393b297 100644
--- a/trace/control.h
+++ b/trace/control.h
@@ -13,22 +13,44 @@
 #include "event-internal.h"
 
 typedef struct TraceEventIter {
+    /* iter state */
     size_t event;
     size_t group;
+    /* filter conditions */
+    size_t group_id;
     const char *pattern;
 } TraceEventIter;
 
 
 /**
- * trace_event_iter_init:
+ * trace_event_iter_init_all:
  * @iter: the event iterator struct
- * @pattern: optional pattern to filter events on name
  *
  * Initialize the event iterator struct @iter,
- * optionally using @pattern to filter out events
+ * for all events.
+ */
+void trace_event_iter_init_all(TraceEventIter *iter);
+
+/**
+ * trace_event_iter_init_pattern:
+ * @iter: the event iterator struct
+ * @pattern: pattern to filter events on name
+ *
+ * Initialize the event iterator struct @iter,
+ * using @pattern to filter out events
  * with non-matching names.
  */
-void trace_event_iter_init(TraceEventIter *iter, const char *pattern);
+void trace_event_iter_init_pattern(TraceEventIter *iter, const char *pattern);
+
+/**
+ * trace_event_iter_init_group:
+ * @iter: the event iterator struct
+ * @group_id: group_id to filter events by group.
+ *
+ * Initialize the event iterator struct @iter,
+ * using @group_id to filter for events in the group.
+ */
+void trace_event_iter_init_group(TraceEventIter *iter, size_t group_id);
 
 /**
  * trace_event_iter_next:
diff --git a/trace/mem-internal.h b/trace/mem-internal.h
deleted file mode 100644
index 8b72b678fa7..00000000000
--- a/trace/mem-internal.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Helper functions for guest memory tracing
- *
- * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__MEM_INTERNAL_H
-#define TRACE__MEM_INTERNAL_H
-
-#define TRACE_MEM_SZ_SHIFT_MASK 0xf /* size shift mask */
-#define TRACE_MEM_SE (1ULL << 4)    /* sign extended (y/n) */
-#define TRACE_MEM_BE (1ULL << 5)    /* big endian (y/n) */
-#define TRACE_MEM_ST (1ULL << 6)    /* store (y/n) */
-#define TRACE_MEM_MMU_SHIFT 8       /* mmu idx */
-
-static inline uint16_t trace_mem_build_info(
-    int size_shift, bool sign_extend, MemOp endianness,
-    bool store, unsigned int mmu_idx)
-{
-    uint16_t res;
-
-    res = size_shift & TRACE_MEM_SZ_SHIFT_MASK;
-    if (sign_extend) {
-        res |= TRACE_MEM_SE;
-    }
-    if (endianness == MO_BE) {
-        res |= TRACE_MEM_BE;
-    }
-    if (store) {
-        res |= TRACE_MEM_ST;
-    }
-#ifdef CONFIG_SOFTMMU
-    res |= mmu_idx << TRACE_MEM_MMU_SHIFT;
-#endif
-    return res;
-}
-
-static inline uint16_t trace_mem_get_info(MemOp op,
-                                          unsigned int mmu_idx,
-                                          bool store)
-{
-    return trace_mem_build_info(op & MO_SIZE, !!(op & MO_SIGN),
-                                op & MO_BSWAP, store,
-                                mmu_idx);
-}
-
-#endif /* TRACE__MEM_INTERNAL_H */
diff --git a/trace/mem.h b/trace/mem.h
deleted file mode 100644
index 9644f592b4f..00000000000
--- a/trace/mem.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Helper functions for guest memory tracing
- *
- * Copyright (C) 2016 Lluís Vilanova <vilanova@ac.upc.edu>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#ifndef TRACE__MEM_H
-#define TRACE__MEM_H
-
-#include "tcg/tcg.h"
-
-
-/**
- * trace_mem_get_info:
- *
- * Return a value for the 'info' argument in guest memory access traces.
- */
-static uint16_t trace_mem_get_info(MemOp op, unsigned int mmu_idx, bool store);
-
-/**
- * trace_mem_build_info:
- *
- * Return a value for the 'info' argument in guest memory access traces.
- */
-static uint16_t trace_mem_build_info(int size_shift, bool sign_extend,
-                                     MemOp endianness, bool store,
-                                     unsigned int mmuidx);
-
-
-#include "trace/mem-internal.h"
-
-#endif /* TRACE__MEM_H */
diff --git a/trace/meson.build b/trace/meson.build
index 08f83a15c31..573dd699c67 100644
--- a/trace/meson.build
+++ b/trace/meson.build
@@ -3,7 +3,7 @@ specific_ss.add(files('control-target.c'))
 
 trace_events_files = []
 foreach dir : [ '.' ] + trace_events_subdirs
-  trace_events_file = meson.source_root() / dir / 'trace-events'
+  trace_events_file = meson.project_source_root() / dir / 'trace-events'
   trace_events_files += [ trace_events_file ]
   group_name = dir == '.' ? 'root' : dir.underscorify()
   group = '--group=' + group_name
@@ -20,17 +20,17 @@ foreach dir : [ '.' ] + trace_events_subdirs
                           input: trace_events_file,
                           command: [ tracetool, group, '--format=c', '@INPUT@', '@OUTPUT@' ],
                           depend_files: tracetool_depends)
-  if 'CONFIG_TRACE_UST' in config_host
+  if 'ust' in get_option('trace_backends')
     trace_ust_h = custom_target(fmt.format('trace-ust', 'h'),
                                 output: fmt.format('trace-ust', 'h'),
                                 input: trace_events_file,
                                 command: [ tracetool, group, '--format=ust-events-h', '@INPUT@', '@OUTPUT@' ],
                                 depend_files: tracetool_depends)
-    trace_ss.add(trace_ust_h, lttng, urcubp)
+    trace_ss.add(trace_ust_h, lttng)
     genh += trace_ust_h
   endif
   trace_ss.add(trace_h, trace_c)
-  if 'CONFIG_TRACE_DTRACE' in config_host
+  if 'dtrace' in get_option('trace_backends')
     trace_dtrace = custom_target(fmt.format('trace-dtrace', 'dtrace'),
                                  output: fmt.format('trace-dtrace', 'dtrace'),
                                  input: trace_events_file,
@@ -39,13 +39,13 @@ foreach dir : [ '.' ] + trace_events_subdirs
     trace_dtrace_h = custom_target(fmt.format('trace-dtrace', 'h'),
                                    output: fmt.format('trace-dtrace', 'h'),
                                    input: trace_dtrace,
-                                   command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-h', '-s', '@INPUT@' ])
+                                   command: [ dtrace, '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-h', '-s', '@INPUT@' ])
     trace_ss.add(trace_dtrace_h)
     if host_machine.system() != 'darwin'
       trace_dtrace_o = custom_target(fmt.format('trace-dtrace', 'o'),
                                      output: fmt.format('trace-dtrace', 'o'),
                                      input: trace_dtrace,
-                                     command: [ 'dtrace', '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-G', '-s', '@INPUT@' ])
+                                     command: [ dtrace, '-DSTAP_SDT_V2', '-o', '@OUTPUT@', '-G', '-s', '@INPUT@' ])
       trace_ss.add(trace_dtrace_o)
     endif
 
@@ -69,13 +69,13 @@ foreach d : [
 ]
   gen = custom_target(d[0],
                 output: d[0],
-                input: meson.source_root() / 'trace-events',
+                input: meson.project_source_root() / 'trace-events',
                 command: [ tracetool, '--group=root', '--format=@0@'.format(d[1]), '@INPUT@', '@OUTPUT@' ],
                 depend_files: tracetool_depends)
   specific_ss.add(when: 'CONFIG_TCG', if_true: gen)
 endforeach
 
-if 'CONFIG_TRACE_UST' in config_host
+if 'ust' in get_option('trace_backends')
   trace_ust_all_h = custom_target('trace-ust-all.h',
                                   output: 'trace-ust-all.h',
                                   input: trace_events_files,
@@ -90,7 +90,11 @@ if 'CONFIG_TRACE_UST' in config_host
   genh += trace_ust_all_h
 endif
 
-trace_ss.add(when: 'CONFIG_TRACE_SIMPLE', if_true: files('simple.c'))
-trace_ss.add(when: 'CONFIG_TRACE_FTRACE', if_true: files('ftrace.c'))
+if 'simple' in get_option('trace_backends')
+  trace_ss.add(files('simple.c'))
+endif
+if 'ftrace' in get_option('trace_backends')
+  trace_ss.add(files('ftrace.c'))
+endif
 trace_ss.add(files('control.c'))
 trace_ss.add(files('qmp.c'))
diff --git a/trace/qmp.c b/trace/qmp.c
index 85f81e47cc6..3b4f4702b4f 100644
--- a/trace/qmp.c
+++ b/trace/qmp.c
@@ -55,7 +55,7 @@ static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern
         /* error for unavailable events */
         TraceEventIter iter;
         TraceEvent *ev;
-        trace_event_iter_init(&iter, name);
+        trace_event_iter_init_pattern(&iter, name);
         while ((ev = trace_event_iter_next(&iter)) != NULL) {
             if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
                 error_setg(errp, "event \"%s\" is disabled", trace_event_get_name(ev));
@@ -90,7 +90,7 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name,
     }
 
     /* Get states (all errors checked above) */
-    trace_event_iter_init(&iter, name);
+    trace_event_iter_init_pattern(&iter, name);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         TraceEventInfo *value;
         bool is_vcpu = trace_event_is_vcpu(ev);
@@ -153,7 +153,7 @@ void qmp_trace_event_set_state(const char *name, bool enable,
     }
 
     /* Apply changes (all errors checked above) */
-    trace_event_iter_init(&iter, name);
+    trace_event_iter_init_pattern(&iter, name);
     while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (!trace_event_get_state_static(ev) ||
             (has_vcpu && !trace_event_is_vcpu(ev))) {
diff --git a/trace/simple.c b/trace/simple.c
index 9cd2ed1fb3f..18af590cf7b 100644
--- a/trace/simple.c
+++ b/trace/simple.c
@@ -280,14 +280,12 @@ void trace_record_finish(TraceBufferRecord *rec)
     }
 }
 
-static int st_write_event_mapping(void)
+static int st_write_event_mapping(TraceEventIter *iter)
 {
     uint64_t type = TRACE_RECORD_TYPE_MAPPING;
-    TraceEventIter iter;
     TraceEvent *ev;
 
-    trace_event_iter_init(&iter, NULL);
-    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+    while ((ev = trace_event_iter_next(iter)) != NULL) {
         uint64_t id = trace_event_get_id(ev);
         const char *name = trace_event_get_name(ev);
         uint32_t len = strlen(name);
@@ -309,6 +307,7 @@ static int st_write_event_mapping(void)
  */
 bool st_set_trace_file_enabled(bool enable)
 {
+    TraceEventIter iter;
     bool was_enabled = trace_fp;
 
     if (enable == !!trace_fp) {
@@ -333,8 +332,9 @@ bool st_set_trace_file_enabled(bool enable)
             return was_enabled;
         }
 
+        trace_event_iter_init_all(&iter);
         if (fwrite(&header, sizeof header, 1, trace_fp) != 1 ||
-            st_write_event_mapping() < 0) {
+            st_write_event_mapping(&iter) < 0) {
             fclose(trace_fp);
             trace_fp = NULL;
             return was_enabled;
@@ -364,7 +364,7 @@ void st_set_trace_file(const char *file)
 
     if (!file) {
         /* Type cast needed for Windows where getpid() returns an int. */
-        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, (pid_t)getpid());
+        trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE "-" FMT_pid, (pid_t)getpid());
     } else {
         trace_file_name = g_strdup_printf("%s", file);
     }
@@ -422,3 +422,15 @@ bool st_init(void)
     atexit(st_flush_trace_buffer);
     return true;
 }
+
+void st_init_group(size_t group)
+{
+    TraceEventIter iter;
+
+    if (!trace_writeout_enabled) {
+        return;
+    }
+
+    trace_event_iter_init_group(&iter, group);
+    st_write_event_mapping(&iter);
+}
diff --git a/trace/simple.h b/trace/simple.h
index 26ccbc8b8ae..ee1983ce561 100644
--- a/trace/simple.h
+++ b/trace/simple.h
@@ -15,6 +15,7 @@ void st_print_trace_file_status(void);
 bool st_set_trace_file_enabled(bool enable);
 void st_set_trace_file(const char *file);
 bool st_init(void);
+void st_init_group(size_t group);
 void st_flush_trace_buffer(void);
 
 typedef struct {
diff --git a/ui/clipboard.c b/ui/clipboard.c
new file mode 100644
index 00000000000..d7b008d62a0
--- /dev/null
+++ b/ui/clipboard.c
@@ -0,0 +1,133 @@
+#include "qemu/osdep.h"
+#include "ui/clipboard.h"
+
+static NotifierList clipboard_notifiers =
+    NOTIFIER_LIST_INITIALIZER(clipboard_notifiers);
+
+static QemuClipboardInfo *cbinfo[QEMU_CLIPBOARD_SELECTION__COUNT];
+
+void qemu_clipboard_peer_register(QemuClipboardPeer *peer)
+{
+    notifier_list_add(&clipboard_notifiers, &peer->update);
+}
+
+void qemu_clipboard_peer_unregister(QemuClipboardPeer *peer)
+{
+    int i;
+
+    for (i = 0; i < QEMU_CLIPBOARD_SELECTION__COUNT; i++) {
+        qemu_clipboard_peer_release(peer, i);
+    }
+
+    notifier_remove(&peer->update);
+}
+
+bool qemu_clipboard_peer_owns(QemuClipboardPeer *peer,
+                              QemuClipboardSelection selection)
+{
+    QemuClipboardInfo *info = qemu_clipboard_info(selection);
+
+    return info && info->owner == peer;
+}
+
+void qemu_clipboard_peer_release(QemuClipboardPeer *peer,
+                                 QemuClipboardSelection selection)
+{
+    g_autoptr(QemuClipboardInfo) info = NULL;
+
+    if (qemu_clipboard_peer_owns(peer, selection)) {
+        /* set empty clipboard info */
+        info = qemu_clipboard_info_new(NULL, selection);
+        qemu_clipboard_update(info);
+    }
+}
+
+void qemu_clipboard_update(QemuClipboardInfo *info)
+{
+    g_autoptr(QemuClipboardInfo) old = NULL;
+    assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT);
+
+    notifier_list_notify(&clipboard_notifiers, info);
+
+    old = cbinfo[info->selection];
+    cbinfo[info->selection] = qemu_clipboard_info_ref(info);
+}
+
+QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection)
+{
+    assert(selection < QEMU_CLIPBOARD_SELECTION__COUNT);
+
+    return cbinfo[selection];
+}
+
+QemuClipboardInfo *qemu_clipboard_info_new(QemuClipboardPeer *owner,
+                                           QemuClipboardSelection selection)
+{
+    QemuClipboardInfo *info = g_new0(QemuClipboardInfo, 1);
+
+    info->owner = owner;
+    info->selection = selection;
+    info->refcount = 1;
+
+    return info;
+}
+
+QemuClipboardInfo *qemu_clipboard_info_ref(QemuClipboardInfo *info)
+{
+    info->refcount++;
+    return info;
+}
+
+void qemu_clipboard_info_unref(QemuClipboardInfo *info)
+{
+    uint32_t type;
+
+    if (!info) {
+        return;
+    }
+
+    info->refcount--;
+    if (info->refcount > 0) {
+        return;
+    }
+
+    for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) {
+        g_free(info->types[type].data);
+    }
+    g_free(info);
+}
+
+void qemu_clipboard_request(QemuClipboardInfo *info,
+                            QemuClipboardType type)
+{
+    if (info->types[type].data ||
+        info->types[type].requested ||
+        !info->types[type].available ||
+        !info->owner)
+        return;
+
+    info->types[type].requested = true;
+    info->owner->request(info, type);
+}
+
+void qemu_clipboard_set_data(QemuClipboardPeer *peer,
+                             QemuClipboardInfo *info,
+                             QemuClipboardType type,
+                             uint32_t size,
+                             const void *data,
+                             bool update)
+{
+    if (!info ||
+        info->owner != peer) {
+        return;
+    }
+
+    g_free(info->types[type].data);
+    info->types[type].data = g_memdup(data, size);
+    info->types[type].size = size;
+    info->types[type].available = true;
+
+    if (update) {
+        qemu_clipboard_update(info);
+    }
+}
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 37e1fb52eb4..68a6302184a 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -28,6 +28,7 @@
 #include <crt_externs.h>
 
 #include "qemu-common.h"
+#include "ui/clipboard.h"
 #include "ui/console.h"
 #include "ui/input.h"
 #include "ui/kbd-state.h"
@@ -105,6 +106,10 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 static QemuSemaphore app_started_sem;
 static bool allow_events;
 
+static NSInteger cbchangecount = -1;
+static QemuClipboardInfo *cbinfo;
+static QemuEvent cbevent;
+
 // Utility functions to run specified code block with iothread lock held
 typedef void (^CodeBlock)(void);
 typedef bool (^BoolCodeBlock)(void);
@@ -518,6 +523,43 @@ - (void) setContentDimensions
     }
 }
 
+- (void) updateUIInfo
+{
+    NSSize frameSize;
+    QemuUIInfo info;
+
+    if (!qemu_console_is_graphic(dcl.con)) {
+        return;
+    }
+
+    if ([self window]) {
+        NSDictionary *description = [[[self window] screen] deviceDescription];
+        CGDirectDisplayID display = [[description objectForKey:@"NSScreenNumber"] unsignedIntValue];
+        NSSize screenSize = [[[self window] screen] frame].size;
+        CGSize screenPhysicalSize = CGDisplayScreenSize(display);
+
+        frameSize = isFullscreen ? screenSize : [self frame].size;
+        info.width_mm = frameSize.width / screenSize.width * screenPhysicalSize.width;
+        info.height_mm = frameSize.height / screenSize.height * screenPhysicalSize.height;
+    } else {
+        frameSize = [self frame].size;
+        info.width_mm = 0;
+        info.height_mm = 0;
+    }
+
+    info.xoff = 0;
+    info.yoff = 0;
+    info.width = frameSize.width;
+    info.height = frameSize.height;
+
+    dpy_set_ui_info(dcl.con, &info);
+}
+
+- (void)viewDidMoveToWindow
+{
+    [self updateUIInfo];
+}
+
 - (void) switchSurface:(pixman_image_t *)image
 {
     COCOA_DEBUG("QemuCocoaView: switchSurface\n");
@@ -1172,6 +1214,16 @@ - (NSApplicationTerminateReply)applicationShouldTerminate:
     return [self verifyQuit];
 }
 
+- (void)windowDidChangeScreen:(NSNotification *)notification
+{
+    [cocoaView updateUIInfo];
+}
+
+- (void)windowDidResize:(NSNotification *)notification
+{
+    [cocoaView updateUIInfo];
+}
+
 /* Called when the user clicks on a window's close button */
 - (BOOL)windowShouldClose:(id)sender
 {
@@ -1711,6 +1763,93 @@ static void addRemovableDevicesMenuItems(void)
     qapi_free_BlockInfoList(pointerToFree);
 }
 
+@interface QemuCocoaPasteboardTypeOwner : NSObject<NSPasteboardTypeOwner>
+@end
+
+@implementation QemuCocoaPasteboardTypeOwner
+
+- (void)pasteboard:(NSPasteboard *)sender provideDataForType:(NSPasteboardType)type
+{
+    if (type != NSPasteboardTypeString) {
+        return;
+    }
+
+    with_iothread_lock(^{
+        QemuClipboardInfo *info = qemu_clipboard_info_ref(cbinfo);
+        qemu_event_reset(&cbevent);
+        qemu_clipboard_request(info, QEMU_CLIPBOARD_TYPE_TEXT);
+
+        while (info == cbinfo &&
+               info->types[QEMU_CLIPBOARD_TYPE_TEXT].available &&
+               info->types[QEMU_CLIPBOARD_TYPE_TEXT].data == NULL) {
+            qemu_mutex_unlock_iothread();
+            qemu_event_wait(&cbevent);
+            qemu_mutex_lock_iothread();
+        }
+
+        if (info == cbinfo) {
+            NSData *data = [[NSData alloc] initWithBytes:info->types[QEMU_CLIPBOARD_TYPE_TEXT].data
+                                           length:info->types[QEMU_CLIPBOARD_TYPE_TEXT].size];
+            [sender setData:data forType:NSPasteboardTypeString];
+            [data release];
+        }
+
+        qemu_clipboard_info_unref(info);
+    });
+}
+
+@end
+
+static QemuCocoaPasteboardTypeOwner *cbowner;
+
+static void cocoa_clipboard_notify(Notifier *notifier, void *data);
+static void cocoa_clipboard_request(QemuClipboardInfo *info,
+                                    QemuClipboardType type);
+
+static QemuClipboardPeer cbpeer = {
+    .name = "cocoa",
+    .update = { .notify = cocoa_clipboard_notify },
+    .request = cocoa_clipboard_request
+};
+
+static void cocoa_clipboard_notify(Notifier *notifier, void *data)
+{
+    QemuClipboardInfo *info = data;
+
+    if (info->owner == &cbpeer || info->selection != QEMU_CLIPBOARD_SELECTION_CLIPBOARD) {
+        return;
+    }
+
+    if (info != cbinfo) {
+        NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
+        qemu_clipboard_info_unref(cbinfo);
+        cbinfo = qemu_clipboard_info_ref(info);
+        cbchangecount = [[NSPasteboard generalPasteboard] declareTypes:@[NSPasteboardTypeString] owner:cbowner];
+        [pool release];
+    }
+
+    qemu_event_set(&cbevent);
+}
+
+static void cocoa_clipboard_request(QemuClipboardInfo *info,
+                                    QemuClipboardType type)
+{
+    NSData *text;
+
+    switch (type) {
+    case QEMU_CLIPBOARD_TYPE_TEXT:
+        text = [[NSPasteboard generalPasteboard] dataForType:NSPasteboardTypeString];
+        if (text) {
+            qemu_clipboard_set_data(&cbpeer, info, type,
+                                    [text length], [text bytes], true);
+            [text release];
+        }
+        break;
+    default:
+        break;
+    }
+}
+
 /*
  * The startup process for the OSX/Cocoa UI is complicated, because
  * OSX insists that the UI runs on the initial main thread, and so we
@@ -1745,15 +1884,16 @@ static void addRemovableDevicesMenuItems(void)
     COCOA_DEBUG("Second thread: calling qemu_main()\n");
     status = qemu_main(gArgc, gArgv, *_NSGetEnviron());
     COCOA_DEBUG("Second thread: qemu_main() returned, exiting\n");
+    [cbowner release];
     exit(status);
 }
 
-int main (int argc, const char * argv[]) {
+int main (int argc, char **argv) {
     QemuThread thread;
 
     COCOA_DEBUG("Entered main()\n");
     gArgc = argc;
-    gArgv = (char **)argv;
+    gArgv = argv;
 
     qemu_sem_init(&display_init_sem, 0);
     qemu_sem_init(&app_started_sem, 0);
@@ -1836,6 +1976,8 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 
     COCOA_DEBUG("qemu_cocoa: cocoa_switch\n");
 
+    [cocoaView updateUIInfo];
+
     // The DisplaySurface will be freed as soon as this callback returns.
     // We take a reference to the underlying pixman image here so it does
     // not disappear from under our feet; the switchSurface method will
@@ -1865,6 +2007,18 @@ static void cocoa_refresh(DisplayChangeListener *dcl)
             [cocoaView setAbsoluteEnabled:YES];
         });
     }
+
+    if (cbchangecount != [[NSPasteboard generalPasteboard] changeCount]) {
+        qemu_clipboard_info_unref(cbinfo);
+        cbinfo = qemu_clipboard_info_new(&cbpeer, QEMU_CLIPBOARD_SELECTION_CLIPBOARD);
+        if ([[NSPasteboard generalPasteboard] availableTypeFromArray:@[NSPasteboardTypeString]]) {
+            cbinfo->types[QEMU_CLIPBOARD_TYPE_TEXT].available = true;
+        }
+        qemu_clipboard_update(cbinfo);
+        cbchangecount = [[NSPasteboard generalPasteboard] changeCount];
+        qemu_event_set(&cbevent);
+    }
+
     [pool release];
 }
 
@@ -1890,6 +2044,10 @@ static void cocoa_display_init(DisplayState *ds, DisplayOptions *opts)
 
     // register vga output callbacks
     register_displaychangelistener(&dcl);
+
+    qemu_event_init(&cbevent, false);
+    cbowner = [[QemuCocoaPasteboardTypeOwner alloc] init];
+    qemu_clipboard_peer_register(&cbpeer);
 }
 
 static QemuDisplay qemu_display_cocoa = {
diff --git a/ui/console.c b/ui/console.c
index 2de5f4105b5..29a3e3f0f51 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -27,10 +27,12 @@
 #include "hw/qdev-core.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-ui.h"
+#include "qemu/fifo8.h"
+#include "qemu/main-loop.h"
 #include "qemu/module.h"
 #include "qemu/option.h"
 #include "qemu/timer.h"
-#include "chardev/char-fe.h"
+#include "chardev/char.h"
 #include "trace.h"
 #include "exec/memory.h"
 #include "io/channel-file.h"
@@ -62,57 +64,6 @@ enum TTYState {
     TTY_STATE_CSI,
 };
 
-typedef struct QEMUFIFO {
-    uint8_t *buf;
-    int buf_size;
-    int count, wptr, rptr;
-} QEMUFIFO;
-
-static int qemu_fifo_write(QEMUFIFO *f, const uint8_t *buf, int len1)
-{
-    int l, len;
-
-    l = f->buf_size - f->count;
-    if (len1 > l)
-        len1 = l;
-    len = len1;
-    while (len > 0) {
-        l = f->buf_size - f->wptr;
-        if (l > len)
-            l = len;
-        memcpy(f->buf + f->wptr, buf, l);
-        f->wptr += l;
-        if (f->wptr >= f->buf_size)
-            f->wptr = 0;
-        buf += l;
-        len -= l;
-    }
-    f->count += len1;
-    return len1;
-}
-
-static int qemu_fifo_read(QEMUFIFO *f, uint8_t *buf, int len1)
-{
-    int l, len;
-
-    if (len1 > f->count)
-        len1 = f->count;
-    len = len1;
-    while (len > 0) {
-        l = f->buf_size - f->rptr;
-        if (l > len)
-            l = len;
-        memcpy(buf, f->buf + f->rptr, l);
-        f->rptr += l;
-        if (f->rptr >= f->buf_size)
-            f->rptr = 0;
-        buf += l;
-        len -= l;
-    }
-    f->count -= len1;
-    return len1;
-}
-
 typedef enum {
     GRAPHIC_CONSOLE,
     TEXT_CONSOLE,
@@ -165,9 +116,7 @@ struct QemuConsole {
 
     Chardev *chr;
     /* fifo for key pressed */
-    QEMUFIFO out_fifo;
-    uint8_t out_fifo_buf[16];
-    QEMUTimer *kbd_timer;
+    Fifo8 out_fifo;
     CoQueue dump_queue;
 
     QTAILQ_ENTRY(QemuConsole) next;
@@ -1157,25 +1106,20 @@ static int vc_chr_write(Chardev *chr, const uint8_t *buf, int len)
     return len;
 }
 
-static void kbd_send_chars(void *opaque)
+static void kbd_send_chars(QemuConsole *s)
 {
-    QemuConsole *s = opaque;
-    int len;
-    uint8_t buf[16];
+    uint32_t len, avail;
 
     len = qemu_chr_be_can_write(s->chr);
-    if (len > s->out_fifo.count)
-        len = s->out_fifo.count;
-    if (len > 0) {
-        if (len > sizeof(buf))
-            len = sizeof(buf);
-        qemu_fifo_read(&s->out_fifo, buf, len);
-        qemu_chr_be_write(s->chr, buf, len);
-    }
-    /* characters are pending: we send them a bit later (XXX:
-       horrible, should change char device API) */
-    if (s->out_fifo.count > 0) {
-        timer_mod(s->kbd_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1);
+    avail = fifo8_num_used(&s->out_fifo);
+    while (len > 0 && avail > 0) {
+        const uint8_t *buf;
+        uint32_t size;
+
+        buf = fifo8_pop_buf(&s->out_fifo, MIN(len, avail), &size);
+        qemu_chr_be_write(s->chr, (uint8_t *)buf, size);
+        len = qemu_chr_be_can_write(s->chr);
+        avail -= size;
     }
 }
 
@@ -1183,8 +1127,8 @@ static void kbd_send_chars(void *opaque)
 void kbd_put_keysym_console(QemuConsole *s, int keysym)
 {
     uint8_t buf[16], *q;
-    CharBackend *be;
     int c;
+    uint32_t num_free;
 
     if (!s || (s->console_type == GRAPHIC_CONSOLE))
         return;
@@ -1226,11 +1170,9 @@ void kbd_put_keysym_console(QemuConsole *s, int keysym)
         if (s->echo) {
             vc_chr_write(s->chr, buf, q - buf);
         }
-        be = s->chr->be;
-        if (be && be->chr_read) {
-            qemu_fifo_write(&s->out_fifo, buf, q - buf);
-            kbd_send_chars(s);
-        }
+        num_free = fifo8_num_free(&s->out_fifo);
+        fifo8_push_all(&s->out_fifo, buf, MIN(num_free, q - buf));
+        kbd_send_chars(s);
         break;
     }
 }
@@ -1481,7 +1423,6 @@ static bool displaychangelistener_has_dmabuf(DisplayChangeListener *dcl)
 static bool dpy_compatible_with(QemuConsole *con,
                                 DisplayChangeListener *dcl, Error **errp)
 {
-    ERRP_GUARD();
     int flags;
 
     flags = con->hw_ops->get_flags ? con->hw_ops->get_flags(con->hw) : 0;
@@ -1508,7 +1449,6 @@ void register_displaychangelistener(DisplayChangeListener *dcl)
         "This VM has no graphic display device.";
     static DisplaySurface *dummy;
     QemuConsole *con;
-    Error *err = NULL;
 
     assert(!dcl->ds);
 
@@ -1523,9 +1463,8 @@ void register_displaychangelistener(DisplayChangeListener *dcl)
         dcl->con->gl = dcl;
     }
 
-    if (dcl->con && !dpy_compatible_with(dcl->con, dcl, &err)) {
-        error_report_err(err);
-        exit(1);
+    if (dcl->con) {
+        dpy_compatible_with(dcl->con, dcl, &error_fatal);
     }
 
     trace_displaychangelistener_register(dcl, dcl->ops->dpy_name);
@@ -2189,6 +2128,14 @@ int qemu_console_get_height(QemuConsole *con, int fallback)
     return con ? surface_height(con->surface) : fallback;
 }
 
+static void vc_chr_accept_input(Chardev *chr)
+{
+    VCChardev *drv = VC_CHARDEV(chr);
+    QemuConsole *s = drv->console;
+
+    kbd_send_chars(s);
+}
+
 static void vc_chr_set_echo(Chardev *chr, bool echo)
 {
     VCChardev *drv = VC_CHARDEV(chr);
@@ -2236,9 +2183,7 @@ static void text_console_do_init(Chardev *chr, DisplayState *ds)
     int g_width = 80 * FONT_WIDTH;
     int g_height = 24 * FONT_HEIGHT;
 
-    s->out_fifo.buf = s->out_fifo_buf;
-    s->out_fifo.buf_size = sizeof(s->out_fifo_buf);
-    s->kbd_timer = timer_new_ms(QEMU_CLOCK_REALTIME, kbd_send_chars, s);
+    fifo8_create(&s->out_fifo, 16);
     s->ds = ds;
 
     s->y_displayed = 0;
@@ -2370,13 +2315,19 @@ void qemu_display_register(QemuDisplay *ui)
 bool qemu_display_find_default(DisplayOptions *opts)
 {
     static DisplayType prio[] = {
+#if defined(CONFIG_GTK)
         DISPLAY_TYPE_GTK,
+#endif
+#if defined(CONFIG_SDL)
         DISPLAY_TYPE_SDL,
+#endif
+#if defined(CONFIG_COCOA)
         DISPLAY_TYPE_COCOA
+#endif
     };
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(prio); i++) {
+    for (i = 0; i < (int)ARRAY_SIZE(prio); i++) {
         if (dpys[prio[i]] == NULL) {
             ui_module_load_one(DisplayType_str(prio[i]));
         }
@@ -2482,6 +2433,7 @@ static void char_vc_class_init(ObjectClass *oc, void *data)
     cc->parse = qemu_chr_parse_vc;
     cc->open = vc_chr_open;
     cc->chr_write = vc_chr_write;
+    cc->chr_accept_input = vc_chr_accept_input;
     cc->chr_set_echo = vc_chr_set_echo;
 }
 
diff --git a/ui/curses.c b/ui/curses.c
index e4f9588c3e8..861d63244c7 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -38,6 +38,10 @@
 #include "ui/input.h"
 #include "sysemu/sysemu.h"
 
+#if defined(__APPLE__) || defined(__OpenBSD__)
+#define _XOPEN_SOURCE_EXTENDED 1
+#endif
+
 /* KEY_EVENT is defined in wincon.h and in curses.h. Avoid redefinition. */
 #undef KEY_EVENT
 #include <curses.h>
diff --git a/ui/egl-headless.c b/ui/egl-headless.c
index da377a74af6..a26a2520c49 100644
--- a/ui/egl-headless.c
+++ b/ui/egl-headless.c
@@ -213,3 +213,5 @@ static void register_egl(void)
 }
 
 type_init(register_egl);
+
+module_dep("ui-opengl");
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 6d0cb2b5cb9..3a88245b678 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -90,14 +90,31 @@ void egl_fb_setup_new_tex(egl_fb *fb, int width, int height)
 
 void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip)
 {
-    GLuint y1, y2;
+    GLuint x1 = 0;
+    GLuint y1 = 0;
+    GLuint x2, y2;
+    GLuint w = src->width;
+    GLuint h = src->height;
 
     glBindFramebuffer(GL_READ_FRAMEBUFFER, src->framebuffer);
     glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst->framebuffer);
     glViewport(0, 0, dst->width, dst->height);
-    y1 = flip ? src->height : 0;
-    y2 = flip ? 0 : src->height;
-    glBlitFramebuffer(0, y1, src->width, y2,
+
+    if (src->dmabuf) {
+        x1 = src->dmabuf->x;
+        y1 = src->dmabuf->y;
+        w = src->dmabuf->scanout_width;
+        h = src->dmabuf->scanout_height;
+    }
+
+    w = (x1 + w) > src->width ? src->width - x1 : w;
+    h = (y1 + h) > src->height ? src->height - y1 : h;
+
+    y2 = flip ? y1 : h + y1;
+    y1 = flip ? h + y1 : y1;
+    x2 = x1 + w;
+
+    glBlitFramebuffer(x1, y1, x2, y2,
                       0, 0, dst->width, dst->height,
                       GL_COLOR_BUFFER_BIT, GL_LINEAR);
 }
@@ -287,6 +304,32 @@ void egl_dmabuf_release_texture(QemuDmaBuf *dmabuf)
     dmabuf->texture = 0;
 }
 
+void egl_dmabuf_create_sync(QemuDmaBuf *dmabuf)
+{
+    EGLSyncKHR sync;
+
+    if (epoxy_has_egl_extension(qemu_egl_display,
+                                "EGL_KHR_fence_sync") &&
+        epoxy_has_egl_extension(qemu_egl_display,
+                                "EGL_ANDROID_native_fence_sync")) {
+        sync = eglCreateSyncKHR(qemu_egl_display,
+                                EGL_SYNC_NATIVE_FENCE_ANDROID, NULL);
+        if (sync != EGL_NO_SYNC_KHR) {
+            dmabuf->sync = sync;
+        }
+    }
+}
+
+void egl_dmabuf_create_fence(QemuDmaBuf *dmabuf)
+{
+    if (dmabuf->sync) {
+        dmabuf->fence_fd = eglDupNativeFenceFDANDROID(qemu_egl_display,
+                                                      dmabuf->sync);
+        eglDestroySyncKHR(qemu_egl_display, dmabuf->sync);
+        dmabuf->sync = NULL;
+    }
+}
+
 #endif /* CONFIG_GBM */
 
 /* ---------------------------------------------------------------------- */
diff --git a/ui/gtk-clipboard.c b/ui/gtk-clipboard.c
new file mode 100644
index 00000000000..35b7a2c2283
--- /dev/null
+++ b/ui/gtk-clipboard.c
@@ -0,0 +1,192 @@
+/*
+ * GTK UI -- clipboard support
+ *
+ * Copyright (C) 2021 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/main-loop.h"
+
+#include "ui/gtk.h"
+
+static QemuClipboardSelection gd_find_selection(GtkDisplayState *gd,
+                                                GtkClipboard *clipboard)
+{
+    QemuClipboardSelection s;
+
+    for (s = 0; s < QEMU_CLIPBOARD_SELECTION__COUNT; s++) {
+        if (gd->gtkcb[s] == clipboard) {
+            return s;
+        }
+    }
+    return QEMU_CLIPBOARD_SELECTION_CLIPBOARD;
+}
+
+static void gd_clipboard_get_data(GtkClipboard     *clipboard,
+                                  GtkSelectionData *selection_data,
+                                  guint             selection_info,
+                                  gpointer          data)
+{
+    GtkDisplayState *gd = data;
+    QemuClipboardSelection s = gd_find_selection(gd, clipboard);
+    QemuClipboardType type = QEMU_CLIPBOARD_TYPE_TEXT;
+    g_autoptr(QemuClipboardInfo) info = NULL;
+
+    info = qemu_clipboard_info_ref(qemu_clipboard_info(s));
+
+    qemu_clipboard_request(info, type);
+    while (info == qemu_clipboard_info(s) &&
+           info->types[type].available &&
+           info->types[type].data == NULL) {
+        main_loop_wait(false);
+    }
+
+    if (info == qemu_clipboard_info(s) && gd->cbowner[s]) {
+        gtk_selection_data_set_text(selection_data,
+                                    info->types[type].data,
+                                    info->types[type].size);
+    } else {
+        /* clipboard owner changed while waiting for the data */
+    }
+}
+
+static void gd_clipboard_clear(GtkClipboard *clipboard,
+                               gpointer data)
+{
+    GtkDisplayState *gd = data;
+    QemuClipboardSelection s = gd_find_selection(gd, clipboard);
+
+    gd->cbowner[s] = false;
+}
+
+static void gd_clipboard_notify(Notifier *notifier, void *data)
+{
+    GtkDisplayState *gd = container_of(notifier, GtkDisplayState, cbpeer.update);
+    QemuClipboardInfo *info = data;
+    QemuClipboardSelection s = info->selection;
+    bool self_update = info->owner == &gd->cbpeer;
+
+    if (info != qemu_clipboard_info(s)) {
+        gd->cbpending[s] = 0;
+        if (!self_update) {
+            GtkTargetList *list;
+            GtkTargetEntry *targets;
+            gint n_targets;
+
+            list = gtk_target_list_new(NULL, 0);
+            if (info->types[QEMU_CLIPBOARD_TYPE_TEXT].available) {
+                gtk_target_list_add_text_targets(list, 0);
+            }
+            targets = gtk_target_table_new_from_list(list, &n_targets);
+
+            gtk_clipboard_clear(gd->gtkcb[s]);
+            gd->cbowner[s] = true;
+            gtk_clipboard_set_with_data(gd->gtkcb[s],
+                                        targets, n_targets,
+                                        gd_clipboard_get_data,
+                                        gd_clipboard_clear,
+                                        gd);
+
+            gtk_target_table_free(targets, n_targets);
+            gtk_target_list_unref(list);
+        }
+        return;
+    }
+
+    if (self_update) {
+        return;
+    }
+
+    /*
+     * Clipboard got updated, with data probably.  No action here, we
+     * are waiting for updates in gd_clipboard_get_data().
+     */
+}
+
+static void gd_clipboard_request(QemuClipboardInfo *info,
+                                 QemuClipboardType type)
+{
+    GtkDisplayState *gd = container_of(info->owner, GtkDisplayState, cbpeer);
+    char *text;
+
+    switch (type) {
+    case QEMU_CLIPBOARD_TYPE_TEXT:
+        text = gtk_clipboard_wait_for_text(gd->gtkcb[info->selection]);
+        if (text) {
+            qemu_clipboard_set_data(&gd->cbpeer, info, type,
+                                    strlen(text), text, true);
+            g_free(text);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void gd_owner_change(GtkClipboard *clipboard,
+                            GdkEvent *event,
+                            gpointer data)
+{
+    GtkDisplayState *gd = data;
+    QemuClipboardSelection s = gd_find_selection(gd, clipboard);
+    QemuClipboardInfo *info;
+
+    if (gd->cbowner[s]) {
+        /* ignore notifications about our own grabs */
+        return;
+    }
+
+
+    switch (event->owner_change.reason) {
+    case GDK_OWNER_CHANGE_NEW_OWNER:
+        info = qemu_clipboard_info_new(&gd->cbpeer, s);
+        if (gtk_clipboard_wait_is_text_available(clipboard)) {
+            info->types[QEMU_CLIPBOARD_TYPE_TEXT].available = true;
+        }
+
+        qemu_clipboard_update(info);
+        qemu_clipboard_info_unref(info);
+        break;
+    default:
+        qemu_clipboard_peer_release(&gd->cbpeer, s);
+        gd->cbowner[s] = false;
+        break;
+    }
+}
+
+void gd_clipboard_init(GtkDisplayState *gd)
+{
+    gd->cbpeer.name = "gtk";
+    gd->cbpeer.update.notify = gd_clipboard_notify;
+    gd->cbpeer.request = gd_clipboard_request;
+    qemu_clipboard_peer_register(&gd->cbpeer);
+
+    gd->gtkcb[QEMU_CLIPBOARD_SELECTION_CLIPBOARD] =
+        gtk_clipboard_get(GDK_SELECTION_CLIPBOARD);
+    gd->gtkcb[QEMU_CLIPBOARD_SELECTION_PRIMARY] =
+        gtk_clipboard_get(GDK_SELECTION_PRIMARY);
+    gd->gtkcb[QEMU_CLIPBOARD_SELECTION_SECONDARY] =
+        gtk_clipboard_get(GDK_SELECTION_SECONDARY);
+
+    g_signal_connect(gd->gtkcb[QEMU_CLIPBOARD_SELECTION_CLIPBOARD],
+                     "owner-change", G_CALLBACK(gd_owner_change), gd);
+    g_signal_connect(gd->gtkcb[QEMU_CLIPBOARD_SELECTION_PRIMARY],
+                     "owner-change", G_CALLBACK(gd_owner_change), gd);
+    g_signal_connect(gd->gtkcb[QEMU_CLIPBOARD_SELECTION_SECONDARY],
+                     "owner-change", G_CALLBACK(gd_owner_change), gd);
+}
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 2a2e6d3a17d..45cb67712df 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 
 #include "trace.h"
 
@@ -62,6 +63,9 @@ void gd_egl_init(VirtualConsole *vc)
 void gd_egl_draw(VirtualConsole *vc)
 {
     GdkWindow *window;
+#ifdef CONFIG_GBM
+    QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
+#endif
     int ww, wh;
 
     if (!vc->gfx.gls) {
@@ -73,10 +77,31 @@ void gd_egl_draw(VirtualConsole *vc)
     wh = gdk_window_get_height(window);
 
     if (vc->gfx.scanout_mode) {
+#ifdef CONFIG_GBM
+        if (dmabuf) {
+            if (!dmabuf->draw_submitted) {
+                return;
+            } else {
+                dmabuf->draw_submitted = false;
+            }
+        }
+#endif
         gd_egl_scanout_flush(&vc->gfx.dcl, 0, 0, vc->gfx.w, vc->gfx.h);
 
         vc->gfx.scale_x = (double)ww / vc->gfx.w;
         vc->gfx.scale_y = (double)wh / vc->gfx.h;
+
+        glFlush();
+#ifdef CONFIG_GBM
+        if (dmabuf) {
+            egl_dmabuf_create_fence(dmabuf);
+            if (dmabuf->fence_fd > 0) {
+                qemu_set_fd_handler(dmabuf->fence_fd, gd_hw_gl_flushed, NULL, vc);
+                return;
+            }
+            graphic_hw_gl_block(vc->gfx.dcl.con, false);
+        }
+#endif
     } else {
         if (!vc->gfx.ds) {
             return;
@@ -91,9 +116,10 @@ void gd_egl_draw(VirtualConsole *vc)
 
         vc->gfx.scale_x = (double)ww / surface_width(vc->gfx.ds);
         vc->gfx.scale_y = (double)wh / surface_height(vc->gfx.ds);
+
+        glFlush();
     }
 
-    glFlush();
     graphic_hw_gl_flushed(vc->gfx.dcl.con);
 }
 
@@ -126,8 +152,15 @@ void gd_egl_refresh(DisplayChangeListener *dcl)
         }
         vc->gfx.gls = qemu_gl_init_shader();
         if (vc->gfx.ds) {
+            surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds);
             surface_gl_create_texture(vc->gfx.gls, vc->gfx.ds);
         }
+#ifdef CONFIG_GBM
+        if (vc->gfx.guest_fb.dmabuf) {
+            egl_dmabuf_release_texture(vc->gfx.guest_fb.dmabuf);
+            gd_egl_scanout_dmabuf(dcl, vc->gfx.guest_fb.dmabuf);
+        }
+#endif
     }
 
     graphic_hw_update(dcl->con);
@@ -152,6 +185,8 @@ void gd_egl_switch(DisplayChangeListener *dcl,
         surface_height(vc->gfx.ds) == surface_height(surface)) {
         resized = false;
     }
+    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                   vc->gfx.esurface, vc->gfx.ectx);
 
     surface_gl_destroy_texture(vc->gfx.gls, vc->gfx.ds);
     vc->gfx.ds = surface;
@@ -209,6 +244,11 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
                            QemuDmaBuf *dmabuf)
 {
 #ifdef CONFIG_GBM
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
+                   vc->gfx.esurface, vc->gfx.ectx);
+
     egl_dmabuf_import_texture(dmabuf);
     if (!dmabuf->texture) {
         return;
@@ -217,6 +257,10 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
     gd_egl_scanout_texture(dcl, dmabuf->texture,
                            false, dmabuf->width, dmabuf->height,
                            0, 0, dmabuf->width, dmabuf->height);
+
+    if (dmabuf->allow_fences) {
+        vc->gfx.guest_fb.dmabuf = dmabuf;
+    }
 #endif
 }
 
@@ -249,14 +293,6 @@ void gd_egl_cursor_position(DisplayChangeListener *dcl,
     vc->gfx.cursor_y = pos_y * vc->gfx.scale_y;
 }
 
-void gd_egl_release_dmabuf(DisplayChangeListener *dcl,
-                           QemuDmaBuf *dmabuf)
-{
-#ifdef CONFIG_GBM
-    egl_dmabuf_release_texture(dmabuf);
-#endif
-}
-
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h)
 {
@@ -289,9 +325,31 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl,
         egl_fb_blit(&vc->gfx.win_fb, &vc->gfx.guest_fb, !vc->gfx.y0_top);
     }
 
+#ifdef CONFIG_GBM
+    if (vc->gfx.guest_fb.dmabuf) {
+        egl_dmabuf_create_sync(vc->gfx.guest_fb.dmabuf);
+    }
+#endif
+
     eglSwapBuffers(qemu_egl_display, vc->gfx.esurface);
 }
 
+void gd_egl_flush(DisplayChangeListener *dcl,
+                  uint32_t x, uint32_t y, uint32_t w, uint32_t h)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+    GtkWidget *area = vc->gfx.drawing_area;
+
+    if (vc->gfx.guest_fb.dmabuf) {
+        graphic_hw_gl_block(vc->gfx.dcl.con, true);
+        vc->gfx.guest_fb.dmabuf->draw_submitted = true;
+        gtk_widget_queue_draw_area(area, x, y, w, h);
+        return;
+    }
+
+    gd_egl_scanout_flush(&vc->gfx.dcl, x, y, w, h);
+}
+
 void gtk_egl_init(DisplayGLMode mode)
 {
     GdkDisplay *gdk_display = gdk_display_get_default();
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index dd5783fec78..01e4e74ee36 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 
 #include "trace.h"
 
@@ -37,21 +38,35 @@ static void gtk_gl_area_set_scanout_mode(VirtualConsole *vc, bool scanout)
 
 void gd_gl_area_draw(VirtualConsole *vc)
 {
-    int ww, wh, y1, y2;
+#ifdef CONFIG_GBM
+    QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
+#endif
+    int ww, wh, ws, y1, y2;
 
     if (!vc->gfx.gls) {
         return;
     }
 
     gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
-    ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area);
-    wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area);
+    ws = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area));
+    ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * ws;
+    wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * ws;
 
     if (vc->gfx.scanout_mode) {
         if (!vc->gfx.guest_fb.framebuffer) {
             return;
         }
 
+#ifdef CONFIG_GBM
+        if (dmabuf) {
+            if (!dmabuf->draw_submitted) {
+                return;
+            } else {
+                dmabuf->draw_submitted = false;
+            }
+        }
+#endif
+
         glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.guest_fb.framebuffer);
         /* GtkGLArea sets GL_DRAW_FRAMEBUFFER for us */
 
@@ -61,6 +76,22 @@ void gd_gl_area_draw(VirtualConsole *vc)
         glBlitFramebuffer(0, y1, vc->gfx.w, y2,
                           0, 0, ww, wh,
                           GL_COLOR_BUFFER_BIT, GL_NEAREST);
+#ifdef CONFIG_GBM
+        if (dmabuf) {
+            egl_dmabuf_create_sync(dmabuf);
+        }
+#endif
+        glFlush();
+#ifdef CONFIG_GBM
+        if (dmabuf) {
+            egl_dmabuf_create_fence(dmabuf);
+            if (dmabuf->fence_fd > 0) {
+                qemu_set_fd_handler(dmabuf->fence_fd, gd_hw_gl_flushed, NULL, vc);
+                return;
+            }
+            graphic_hw_gl_block(vc->gfx.dcl.con, false);
+        }
+#endif
     } else {
         if (!vc->gfx.ds) {
             return;
@@ -71,7 +102,6 @@ void gd_gl_area_draw(VirtualConsole *vc)
         surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds);
     }
 
-    glFlush();
     graphic_hw_gl_flushed(vc->gfx.dcl.con);
 }
 
@@ -93,6 +123,9 @@ void gd_gl_area_refresh(DisplayChangeListener *dcl)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
+    vc->gfx.dcl.update_interval = gd_monitor_update_interval(
+            vc->window ? vc->window : vc->gfx.drawing_area);
+
     if (!vc->gfx.gls) {
         if (!gtk_widget_get_realized(vc->gfx.drawing_area)) {
             return;
@@ -213,6 +246,10 @@ void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
+    if (vc->gfx.guest_fb.dmabuf) {
+        graphic_hw_gl_block(vc->gfx.dcl.con, true);
+        vc->gfx.guest_fb.dmabuf->draw_submitted = true;
+    }
     gtk_gl_area_queue_render(GTK_GL_AREA(vc->gfx.drawing_area));
 }
 
@@ -231,6 +268,10 @@ void gd_gl_area_scanout_dmabuf(DisplayChangeListener *dcl,
     gd_gl_area_scanout_texture(dcl, dmabuf->texture,
                                false, dmabuf->width, dmabuf->height,
                                0, 0, dmabuf->width, dmabuf->height);
+
+    if (dmabuf->allow_fences) {
+        vc->gfx.guest_fb.dmabuf = dmabuf;
+    }
 #endif
 }
 
diff --git a/ui/gtk.c b/ui/gtk.c
index 1ea12535284..428f02f2dfe 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -36,6 +36,7 @@
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qapi-commands-misc.h"
 #include "qemu/cutils.h"
+#include "qemu/main-loop.h"
 
 #include "ui/console.h"
 #include "ui/gtk.h"
@@ -60,7 +61,6 @@
 #include "chardev/char.h"
 #include "qom/object.h"
 
-#define MAX_VCS 10
 #define VC_WINDOW_X_MIN  320
 #define VC_WINDOW_Y_MIN  240
 #define VC_TERM_X_MIN     80
@@ -119,60 +119,6 @@
 static const guint16 *keycode_map;
 static size_t keycode_maplen;
 
-struct GtkDisplayState {
-    GtkWidget *window;
-
-    GtkWidget *menu_bar;
-
-    GtkAccelGroup *accel_group;
-
-    GtkWidget *machine_menu_item;
-    GtkWidget *machine_menu;
-    GtkWidget *pause_item;
-    GtkWidget *reset_item;
-    GtkWidget *powerdown_item;
-    GtkWidget *quit_item;
-
-    GtkWidget *view_menu_item;
-    GtkWidget *view_menu;
-    GtkWidget *full_screen_item;
-    GtkWidget *copy_item;
-    GtkWidget *zoom_in_item;
-    GtkWidget *zoom_out_item;
-    GtkWidget *zoom_fixed_item;
-    GtkWidget *zoom_fit_item;
-    GtkWidget *grab_item;
-    GtkWidget *grab_on_hover_item;
-
-    int nb_vcs;
-    VirtualConsole vc[MAX_VCS];
-
-    GtkWidget *show_tabs_item;
-    GtkWidget *untabify_item;
-    GtkWidget *show_menubar_item;
-
-    GtkWidget *vbox;
-    GtkWidget *notebook;
-    int button_mask;
-    gboolean last_set;
-    int last_x;
-    int last_y;
-    int grab_x_root;
-    int grab_y_root;
-    VirtualConsole *kbd_owner;
-    VirtualConsole *ptr_owner;
-
-    gboolean full_screen;
-
-    GdkCursor *null_cursor;
-    Notifier mouse_mode_notifier;
-    gboolean free_scale;
-
-    bool external_pause_update;
-
-    DisplayOptions *opts;
-};
-
 struct VCChardev {
     Chardev parent;
     VirtualConsole *console;
@@ -630,6 +576,26 @@ static bool gd_has_dmabuf(DisplayChangeListener *dcl)
     return vc->gfx.has_dmabuf;
 }
 
+static void gd_gl_release_dmabuf(DisplayChangeListener *dcl,
+                                 QemuDmaBuf *dmabuf)
+{
+#ifdef CONFIG_GBM
+    egl_dmabuf_release_texture(dmabuf);
+#endif
+}
+
+void gd_hw_gl_flushed(void *vcon)
+{
+    VirtualConsole *vc = vcon;
+    QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf;
+
+    qemu_set_fd_handler(dmabuf->fence_fd, NULL, NULL, NULL);
+    close(dmabuf->fence_fd);
+    dmabuf->fence_fd = -1;
+    graphic_hw_gl_block(vc->gfx.dcl.con, false);
+    graphic_hw_gl_flushed(vc->gfx.dcl.con);
+}
+
 /** DisplayState Callbacks (opengl version) **/
 
 static const DisplayChangeListenerOps dcl_gl_area_ops = {
@@ -648,6 +614,7 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
     .dpy_gl_scanout_disable  = gd_gl_area_scanout_disable,
     .dpy_gl_update           = gd_gl_area_scanout_flush,
     .dpy_gl_scanout_dmabuf   = gd_gl_area_scanout_dmabuf,
+    .dpy_gl_release_dmabuf   = gd_gl_release_dmabuf,
     .dpy_has_dmabuf          = gd_has_dmabuf,
 };
 
@@ -670,8 +637,8 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
     .dpy_gl_scanout_dmabuf   = gd_egl_scanout_dmabuf,
     .dpy_gl_cursor_dmabuf    = gd_egl_cursor_dmabuf,
     .dpy_gl_cursor_position  = gd_egl_cursor_position,
-    .dpy_gl_release_dmabuf   = gd_egl_release_dmabuf,
-    .dpy_gl_update           = gd_egl_scanout_flush,
+    .dpy_gl_update           = gd_egl_flush,
+    .dpy_gl_release_dmabuf   = gd_gl_release_dmabuf,
     .dpy_has_dmabuf          = gd_has_dmabuf,
 };
 
@@ -811,6 +778,9 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque)
     if (!vc->gfx.ds) {
         return FALSE;
     }
+    if (!vc->gfx.surface) {
+        return FALSE;
+    }
 
     vc->gfx.dcl.update_interval =
         gd_monitor_update_interval(vc->window ? vc->window : s->window);
@@ -868,10 +838,11 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
 {
     VirtualConsole *vc = opaque;
     GtkDisplayState *s = vc->s;
+    GdkWindow *window;
     int x, y;
     int mx, my;
     int fbh, fbw;
-    int ww, wh;
+    int ww, wh, ws;
 
     if (!vc->gfx.ds) {
         return TRUE;
@@ -880,8 +851,10 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
     fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x;
     fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y;
 
-    ww = gdk_window_get_width(gtk_widget_get_window(vc->gfx.drawing_area));
-    wh = gdk_window_get_height(gtk_widget_get_window(vc->gfx.drawing_area));
+    window = gtk_widget_get_window(vc->gfx.drawing_area);
+    ww = gdk_window_get_width(window);
+    wh = gdk_window_get_height(window);
+    ws = gdk_window_get_scale_factor(window);
 
     mx = my = 0;
     if (ww > fbw) {
@@ -891,8 +864,8 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
         my = (wh - fbh) / 2;
     }
 
-    x = (motion->x - mx) / vc->gfx.scale_x;
-    y = (motion->y - my) / vc->gfx.scale_y;
+    x = (motion->x - mx) / vc->gfx.scale_x * ws;
+    y = (motion->y - my) / vc->gfx.scale_y * ws;
 
     if (qemu_input_is_absolute()) {
         if (x < 0 || y < 0 ||
@@ -920,37 +893,25 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
         GdkWindow *win = gtk_widget_get_window(widget);
         GdkMonitor *monitor = gdk_display_get_monitor_at_window(dpy, win);
         GdkRectangle geometry;
-        int screen_width, screen_height;
 
         int x = (int)motion->x_root;
         int y = (int)motion->y_root;
 
         gdk_monitor_get_geometry(monitor, &geometry);
-        screen_width = geometry.width;
-        screen_height = geometry.height;
 
         /* In relative mode check to see if client pointer hit
-         * one of the screen edges, and if so move it back by
-         * 200 pixels. This is important because the pointer
+         * one of the monitor edges, and if so move it back to the
+         * center of the monitor. This is important because the pointer
          * in the server doesn't correspond 1-for-1, and so
          * may still be only half way across the screen. Without
          * this warp, the server pointer would thus appear to hit
          * an invisible wall */
-        if (x == 0) {
-            x += 200;
-        }
-        if (y == 0) {
-            y += 200;
-        }
-        if (x == (screen_width - 1)) {
-            x -= 200;
-        }
-        if (y == (screen_height - 1)) {
-            y -= 200;
-        }
-
-        if (x != (int)motion->x_root || y != (int)motion->y_root) {
+        if (x <= geometry.x || x - geometry.x >= geometry.width - 1 ||
+            y <= geometry.y || y - geometry.y >= geometry.height - 1) {
             GdkDevice *dev = gdk_event_get_device((GdkEvent *)motion);
+            x = geometry.x + geometry.width / 2;
+            y = geometry.y + geometry.height / 2;
+
             gdk_device_warp(dev, screen, x, y);
             s->last_set = FALSE;
             return FALSE;
@@ -1287,6 +1248,16 @@ static gboolean gd_tab_window_close(GtkWidget *widget, GdkEvent *event,
                                     vc->tab_item, vc->label);
     gtk_widget_destroy(vc->window);
     vc->window = NULL;
+#if defined(CONFIG_OPENGL)
+    if (vc->gfx.esurface) {
+        eglDestroySurface(qemu_egl_display, vc->gfx.esurface);
+        vc->gfx.esurface = NULL;
+    }
+    if (vc->gfx.ectx) {
+        eglDestroyContext(qemu_egl_display, vc->gfx.ectx);
+        vc->gfx.ectx = NULL;
+    }
+#endif
     return TRUE;
 }
 
@@ -1316,6 +1287,16 @@ static void gd_menu_untabify(GtkMenuItem *item, void *opaque)
     if (!vc->window) {
         gtk_widget_set_sensitive(vc->menu_item, false);
         vc->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+#if defined(CONFIG_OPENGL)
+        if (vc->gfx.esurface) {
+            eglDestroySurface(qemu_egl_display, vc->gfx.esurface);
+            vc->gfx.esurface = NULL;
+        }
+        if (vc->gfx.esurface) {
+            eglDestroyContext(qemu_egl_display, vc->gfx.ectx);
+            vc->gfx.ectx = NULL;
+        }
+#endif
         gd_widget_reparent(s->notebook, vc->window, vc->tab_item);
 
         g_signal_connect(vc->window, "delete-event",
@@ -1707,6 +1688,23 @@ static void gd_vc_adjustment_changed(GtkAdjustment *adjustment, void *opaque)
     }
 }
 
+static void gd_vc_send_chars(VirtualConsole *vc)
+{
+    uint32_t len, avail;
+
+    len = qemu_chr_be_can_write(vc->vte.chr);
+    avail = fifo8_num_used(&vc->vte.out_fifo);
+    while (len > 0 && avail > 0) {
+        const uint8_t *buf;
+        uint32_t size;
+
+        buf = fifo8_pop_buf(&vc->vte.out_fifo, MIN(len, avail), &size);
+        qemu_chr_be_write(vc->vte.chr, (uint8_t *)buf, size);
+        len = qemu_chr_be_can_write(vc->vte.chr);
+        avail -= size;
+    }
+}
+
 static int gd_vc_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
     VCChardev *vcd = VC_CHARDEV(chr);
@@ -1716,6 +1714,14 @@ static int gd_vc_chr_write(Chardev *chr, const uint8_t *buf, int len)
     return len;
 }
 
+static void gd_vc_chr_accept_input(Chardev *chr)
+{
+    VCChardev *vcd = VC_CHARDEV(chr);
+    VirtualConsole *vc = vcd->console;
+
+    gd_vc_send_chars(vc);
+}
+
 static void gd_vc_chr_set_echo(Chardev *chr, bool echo)
 {
     VCChardev *vcd = VC_CHARDEV(chr);
@@ -1755,6 +1761,7 @@ static void char_gd_vc_class_init(ObjectClass *oc, void *data)
     cc->parse = qemu_chr_parse_vc;
     cc->open = gd_vc_open;
     cc->chr_write = gd_vc_chr_write;
+    cc->chr_accept_input = gd_vc_chr_accept_input;
     cc->chr_set_echo = gd_vc_chr_set_echo;
 }
 
@@ -1769,6 +1776,7 @@ static gboolean gd_vc_in(VteTerminal *terminal, gchar *text, guint size,
                          gpointer user_data)
 {
     VirtualConsole *vc = user_data;
+    uint32_t free;
 
     if (vc->vte.echo) {
         VteTerminal *term = VTE_TERMINAL(vc->vte.terminal);
@@ -1788,16 +1796,10 @@ static gboolean gd_vc_in(VteTerminal *terminal, gchar *text, guint size,
         }
     }
 
-    int remaining = size;
-    uint8_t* p = (uint8_t *)text;
-    while (remaining > 0) {
-        int can_write = qemu_chr_be_can_write(vc->vte.chr);
-        int written = MIN(remaining, can_write);
-        qemu_chr_be_write(vc->vte.chr, p, written);
+    free = fifo8_num_free(&vc->vte.out_fifo);
+    fifo8_push_all(&vc->vte.out_fifo, (uint8_t *)text, MIN(free, size));
+    gd_vc_send_chars(vc);
 
-        remaining -= written;
-        p += written;
-    }
     return TRUE;
 }
 
@@ -1814,6 +1816,7 @@ static GSList *gd_vc_vte_init(GtkDisplayState *s, VirtualConsole *vc,
     vc->s = s;
     vc->vte.echo = vcd->echo;
     vc->vte.chr = chr;
+    fifo8_create(&vc->vte.out_fifo, 4096);
     vcd->console = vc;
 
     snprintf(buffer, sizeof(buffer), "vc%d", idx);
@@ -2322,6 +2325,7 @@ static void gtk_display_init(DisplayState *ds, DisplayOptions *opts)
         opts->u.gtk.grab_on_hover) {
         gtk_menu_item_activate(GTK_MENU_ITEM(s->grab_on_hover_item));
     }
+    gd_clipboard_init(s);
 }
 
 static void early_gtk_display_init(DisplayOptions *opts)
@@ -2387,3 +2391,7 @@ static void register_gtk(void)
 }
 
 type_init(register_gtk);
+
+#ifdef CONFIG_OPENGL
+module_dep("ui-opengl");
+#endif
diff --git a/ui/input-barrier.c b/ui/input-barrier.c
index 81b8d04ec8d..2d57ca70791 100644
--- a/ui/input-barrier.c
+++ b/ui/input-barrier.c
@@ -3,6 +3,11 @@
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
+ *
+ * TODO:
+ *
+ *  - Enable SSL
+ *  - Manage SetOptions/ResetOptions commands
  */
 
 #include "qemu/osdep.h"
diff --git a/ui/keycodemapdb b/ui/keycodemapdb
index 6119e6e19a0..d21009b1c9f 160000
--- a/ui/keycodemapdb
+++ b/ui/keycodemapdb
@@ -1 +1 @@
-Subproject commit 6119e6e19a050df847418de7babe5166779955e4
+Subproject commit d21009b1c9f94b740ea66be8e48a1d8ad8124023
diff --git a/ui/meson.build b/ui/meson.build
index e8d3ff41b90..ee8ef27714c 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -2,6 +2,7 @@ softmmu_ss.add(pixman)
 specific_ss.add(when: ['CONFIG_SOFTMMU'], if_true: pixman)   # for the include path
 
 softmmu_ss.add(files(
+  'clipboard.c',
   'console.c',
   'cursor.c',
   'input-keymap.c',
@@ -13,8 +14,12 @@ softmmu_ss.add(files(
   'qemu-pixman.c',
 ))
 softmmu_ss.add([spice_headers, files('spice-module.c')])
+softmmu_ss.add(when: spice_protocol, if_true: files('vdagent.c'))
 
-softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('input-linux.c'))
+softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files(
+  'input-linux.c',
+  'udmabuf.c',
+))
 softmmu_ss.add(when: cocoa, if_true: files('cocoa.m'))
 
 vnc_ss = ss.source_set()
@@ -28,6 +33,7 @@ vnc_ss.add(files(
   'vnc-auth-vencrypt.c',
   'vnc-ws.c',
   'vnc-jobs.c',
+  'vnc-clipboard.c',
 ))
 vnc_ss.add(zlib, png, jpeg, gnutls)
 vnc_ss.add(when: sasl, if_true: files('vnc-auth-sasl.c'))
@@ -62,7 +68,7 @@ if gtk.found()
   softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('win32-kbd-hook.c'))
 
   gtk_ss = ss.source_set()
-  gtk_ss.add(gtk, vte, pixman, files('gtk.c'))
+  gtk_ss.add(gtk, vte, pixman, files('gtk.c', 'gtk-clipboard.c'))
   gtk_ss.add(when: x11, if_true: files('x_keymap.c'))
   gtk_ss.add(when: [opengl, 'CONFIG_OPENGL'], if_true: files('gtk-gl-area.c'))
   gtk_ss.add(when: [x11, opengl, 'CONFIG_OPENGL'], if_true: files('gtk-egl.c'))
@@ -83,7 +89,7 @@ if sdl.found()
   ui_modules += {'sdl' : sdl_ss}
 endif
 
-if config_host.has_key('CONFIG_SPICE')
+if spice.found()
   spice_core_ss = ss.source_set()
   spice_core_ss.add(spice, pixman, files(
     'spice-core.c',
@@ -93,14 +99,12 @@ if config_host.has_key('CONFIG_SPICE')
   ui_modules += {'spice-core' : spice_core_ss}
 endif
 
-if config_host.has_key('CONFIG_SPICE') and config_host.has_key('CONFIG_GIO')
+if spice.found() and config_host.has_key('CONFIG_GIO')
   spice_ss = ss.source_set()
   spice_ss.add(spice, gio, pixman, files('spice-app.c'))
   ui_modules += {'spice-app': spice_ss}
 endif
 
-keymap_gen = find_program('keycodemapdb/tools/keymap-gen')
-
 keymaps = [
   ['atset1', 'qcode'],
   ['linux', 'qcode'],
@@ -128,7 +132,7 @@ if have_system or xkbcommon.found()
                   output: output,
                   capture: true,
                   input: files('keycodemapdb/data/keymaps.csv'),
-                  command: [python.full_path(), files('keycodemapdb/tools/keymap-gen'),
+                  command: [python, files('keycodemapdb/tools/keymap-gen'),
                             'code-map',
                             '--lang', 'glib2',
                             '--varname', 'qemu_input_map_@0@_to_@1@'.format(e[0], e[1]),
diff --git a/ui/qemu-pixman.c b/ui/qemu-pixman.c
index 85f2945e886..3ab7e2e958a 100644
--- a/ui/qemu-pixman.c
+++ b/ui/qemu-pixman.c
@@ -89,21 +89,34 @@ pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian)
 }
 
 /* Note: drm is little endian, pixman is native endian */
+static const struct {
+    uint32_t drm_format;
+    pixman_format_code_t pixman_format;
+} drm_format_pixman_map[] = {
+    { DRM_FORMAT_RGB888,   PIXMAN_LE_r8g8b8   },
+    { DRM_FORMAT_ARGB8888, PIXMAN_LE_a8r8g8b8 },
+    { DRM_FORMAT_XRGB8888, PIXMAN_LE_x8r8g8b8 }
+};
+
 pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format)
 {
-    static const struct {
-        uint32_t drm_format;
-        pixman_format_code_t pixman;
-    } map[] = {
-        { DRM_FORMAT_RGB888,   PIXMAN_LE_r8g8b8   },
-        { DRM_FORMAT_ARGB8888, PIXMAN_LE_a8r8g8b8 },
-        { DRM_FORMAT_XRGB8888, PIXMAN_LE_x8r8g8b8 }
-    };
     int i;
 
-    for (i = 0; i < ARRAY_SIZE(map); i++) {
-        if (drm_format == map[i].drm_format) {
-            return map[i].pixman;
+    for (i = 0; i < ARRAY_SIZE(drm_format_pixman_map); i++) {
+        if (drm_format == drm_format_pixman_map[i].drm_format) {
+            return drm_format_pixman_map[i].pixman_format;
+        }
+    }
+    return 0;
+}
+
+uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman_format)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(drm_format_pixman_map); i++) {
+        if (pixman_format == drm_format_pixman_map[i].pixman_format) {
+            return drm_format_pixman_map[i].drm_format;
         }
     }
     return 0;
diff --git a/ui/sdl2.c b/ui/sdl2.c
index a203cb0239e..17c0ec30ebf 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -817,7 +817,10 @@ static void sdl2_display_init(DisplayState *ds, DisplayOptions *o)
      * This is a bit hackish but saves us from bigger problem.
      * Maybe it's a good idea to fix this in SDL instead.
      */
-    g_setenv("SDL_VIDEODRIVER", "x11", 0);
+    if (!g_setenv("SDL_VIDEODRIVER", "x11", 0)) {
+        fprintf(stderr, "Could not set SDL_VIDEODRIVER environment variable\n");
+        exit(1);
+    }
 #endif
 
     if (SDL_Init(SDL_INIT_VIDEO)) {
@@ -918,3 +921,7 @@ static void register_sdl1(void)
 }
 
 type_init(register_sdl1);
+
+#ifdef CONFIG_OPENGL
+module_dep("ui-opengl");
+#endif
diff --git a/ui/spice-app.c b/ui/spice-app.c
index 4325ac2d9c5..7e71e18da9a 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -27,6 +27,7 @@
 #include <gio/gio.h>
 
 #include "ui/console.h"
+#include "ui/spice-display.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/cutils.h"
@@ -175,7 +176,7 @@ static void spice_app_display_early_init(DisplayOptions *opts)
     qemu_opt_set(qopts, "addr", sock_path, &error_abort);
     qemu_opt_set(qopts, "image-compression", "off", &error_abort);
     qemu_opt_set(qopts, "streaming-video", "off", &error_abort);
-#ifdef CONFIG_OPENGL
+#ifdef HAVE_SPICE_GL
     qemu_opt_set(qopts, "gl", opts->has_gl ? "on" : "off", &error_abort);
     display_opengl = opts->has_gl;
 #endif
@@ -221,3 +222,6 @@ static void register_spice_app(void)
 }
 
 type_init(register_spice_app);
+
+module_dep("ui-spice-core");
+module_dep("chardev-spice");
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 272d19b0c15..31974b8d6c4 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -671,18 +671,13 @@ static void qemu_spice_init(void)
     }
     passwordSecret = qemu_opt_get(opts, "password-secret");
     if (passwordSecret) {
-        Error *local_err = NULL;
         if (qemu_opt_get(opts, "password")) {
             error_report("'password' option is mutually exclusive with "
                          "'password-secret'");
             exit(1);
         }
         password = qcrypto_secret_lookup_as_utf8(passwordSecret,
-                                                 &local_err);
-        if (!password) {
-            error_report_err(local_err);
-            exit(1);
-        }
+                                                 &error_fatal);
     } else {
         str = qemu_opt_get(opts, "password");
         if (str) {
@@ -1037,3 +1032,8 @@ static void spice_register_config(void)
     qemu_add_opts(&qemu_spice_opts);
 }
 opts_init(spice_register_config);
+module_opts("spice");
+
+#ifdef HAVE_SPICE_GL
+module_dep("ui-opengl");
+#endif
diff --git a/ui/spice-display.c b/ui/spice-display.c
index d22781a23d0..f59c69882d9 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -561,6 +561,10 @@ static void interface_release_resource(QXLInstance *sin,
     SimpleSpiceCursor *cursor;
     QXLCommandExt *ext;
 
+    if (!rext.info) {
+        return;
+    }
+
     ext = (void *)(intptr_t)(rext.info->id);
     switch (ext->cmd.type) {
     case QXL_CMD_DRAW:
diff --git a/ui/trace-events b/ui/trace-events
index 5d1da6f2366..b9c0dd0fa11 100644
--- a/ui/trace-events
+++ b/ui/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # console.c
 console_gfx_new(void) ""
@@ -124,3 +124,14 @@ xkeymap_extension(const char *name) "extension '%s'"
 xkeymap_vendor(const char *name) "vendor '%s'"
 xkeymap_keycodes(const char *name) "keycodes '%s'"
 xkeymap_keymap(const char *name) "keymap '%s'"
+
+# vdagent.c
+vdagent_open(void) ""
+vdagent_close(void) ""
+vdagent_send(const char *name) "msg %s"
+vdagent_send_empty_clipboard(void) ""
+vdagent_recv_chunk(uint32_t size) "size %d"
+vdagent_recv_msg(const char *name, uint32_t size) "msg %s, size %d"
+vdagent_peer_cap(const char *name) "cap %s"
+vdagent_cb_grab_selection(const char *name) "selection %s"
+vdagent_cb_grab_type(const char *name) "type %s"
diff --git a/ui/udmabuf.c b/ui/udmabuf.c
new file mode 100644
index 00000000000..cebceb26100
--- /dev/null
+++ b/ui/udmabuf.c
@@ -0,0 +1,29 @@
+/*
+ * udmabuf helper functions.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "ui/console.h"
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
+int udmabuf_fd(void)
+{
+    static bool first = true;
+    static int udmabuf;
+
+    if (!first) {
+        return udmabuf;
+    }
+    first = false;
+
+    udmabuf = open("/dev/udmabuf", O_RDWR);
+    if (udmabuf < 0) {
+        warn_report("open /dev/udmabuf: %s", strerror(errno));
+    }
+    return udmabuf;
+}
diff --git a/ui/vdagent.c b/ui/vdagent.c
new file mode 100644
index 00000000000..19e8fbfc96f
--- /dev/null
+++ b/ui/vdagent.c
@@ -0,0 +1,871 @@
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "include/qemu-common.h"
+#include "chardev/char.h"
+#include "qemu/buffer.h"
+#include "qemu/option.h"
+#include "qemu/units.h"
+#include "hw/qdev-core.h"
+#include "migration/blocker.h"
+#include "ui/clipboard.h"
+#include "ui/console.h"
+#include "ui/input.h"
+#include "trace.h"
+
+#include "qapi/qapi-types-char.h"
+#include "qapi/qapi-types-ui.h"
+
+#include "spice/vd_agent.h"
+
+#define VDAGENT_BUFFER_LIMIT (1 * MiB)
+#define VDAGENT_MOUSE_DEFAULT true
+#define VDAGENT_CLIPBOARD_DEFAULT false
+
+struct VDAgentChardev {
+    Chardev parent;
+
+    /* TODO: migration isn't yet supported */
+    Error *migration_blocker;
+
+    /* config */
+    bool mouse;
+    bool clipboard;
+
+    /* guest vdagent */
+    uint32_t caps;
+    VDIChunkHeader chunk;
+    uint32_t chunksize;
+    uint8_t *msgbuf;
+    uint32_t msgsize;
+    uint8_t *xbuf;
+    uint32_t xoff, xsize;
+    Buffer outbuf;
+
+    /* mouse */
+    DeviceState mouse_dev;
+    uint32_t mouse_x;
+    uint32_t mouse_y;
+    uint32_t mouse_btn;
+    uint32_t mouse_display;
+    QemuInputHandlerState *mouse_hs;
+
+    /* clipboard */
+    QemuClipboardPeer cbpeer;
+    uint32_t cbpending[QEMU_CLIPBOARD_SELECTION__COUNT];
+};
+typedef struct VDAgentChardev VDAgentChardev;
+
+#define TYPE_CHARDEV_QEMU_VDAGENT "chardev-qemu-vdagent"
+
+DECLARE_INSTANCE_CHECKER(VDAgentChardev, QEMU_VDAGENT_CHARDEV,
+                         TYPE_CHARDEV_QEMU_VDAGENT);
+
+/* ------------------------------------------------------------------ */
+/* names, for debug logging                                           */
+
+static const char *cap_name[] = {
+    [VD_AGENT_CAP_MOUSE_STATE]                    = "mouse-state",
+    [VD_AGENT_CAP_MONITORS_CONFIG]                = "monitors-config",
+    [VD_AGENT_CAP_REPLY]                          = "reply",
+    [VD_AGENT_CAP_CLIPBOARD]                      = "clipboard",
+    [VD_AGENT_CAP_DISPLAY_CONFIG]                 = "display-config",
+    [VD_AGENT_CAP_CLIPBOARD_BY_DEMAND]            = "clipboard-by-demand",
+    [VD_AGENT_CAP_CLIPBOARD_SELECTION]            = "clipboard-selection",
+    [VD_AGENT_CAP_SPARSE_MONITORS_CONFIG]         = "sparse-monitors-config",
+    [VD_AGENT_CAP_GUEST_LINEEND_LF]               = "guest-lineend-lf",
+    [VD_AGENT_CAP_GUEST_LINEEND_CRLF]             = "guest-lineend-crlf",
+    [VD_AGENT_CAP_MAX_CLIPBOARD]                  = "max-clipboard",
+    [VD_AGENT_CAP_AUDIO_VOLUME_SYNC]              = "audio-volume-sync",
+    [VD_AGENT_CAP_MONITORS_CONFIG_POSITION]       = "monitors-config-position",
+    [VD_AGENT_CAP_FILE_XFER_DISABLED]             = "file-xfer-disabled",
+    [VD_AGENT_CAP_FILE_XFER_DETAILED_ERRORS]      = "file-xfer-detailed-errors",
+#if 0
+    [VD_AGENT_CAP_GRAPHICS_DEVICE_INFO]           = "graphics-device-info",
+    [VD_AGENT_CAP_CLIPBOARD_NO_RELEASE_ON_REGRAB] = "clipboard-no-release-on-regrab",
+    [VD_AGENT_CAP_CLIPBOARD_GRAB_SERIAL]          = "clipboard-grab-serial",
+#endif
+};
+
+static const char *msg_name[] = {
+    [VD_AGENT_MOUSE_STATE]           = "mouse-state",
+    [VD_AGENT_MONITORS_CONFIG]       = "monitors-config",
+    [VD_AGENT_REPLY]                 = "reply",
+    [VD_AGENT_CLIPBOARD]             = "clipboard",
+    [VD_AGENT_DISPLAY_CONFIG]        = "display-config",
+    [VD_AGENT_ANNOUNCE_CAPABILITIES] = "announce-capabilities",
+    [VD_AGENT_CLIPBOARD_GRAB]        = "clipboard-grab",
+    [VD_AGENT_CLIPBOARD_REQUEST]     = "clipboard-request",
+    [VD_AGENT_CLIPBOARD_RELEASE]     = "clipboard-release",
+    [VD_AGENT_FILE_XFER_START]       = "file-xfer-start",
+    [VD_AGENT_FILE_XFER_STATUS]      = "file-xfer-status",
+    [VD_AGENT_FILE_XFER_DATA]        = "file-xfer-data",
+    [VD_AGENT_CLIENT_DISCONNECTED]   = "client-disconnected",
+    [VD_AGENT_MAX_CLIPBOARD]         = "max-clipboard",
+    [VD_AGENT_AUDIO_VOLUME_SYNC]     = "audio-volume-sync",
+#if 0
+    [VD_AGENT_GRAPHICS_DEVICE_INFO]  = "graphics-device-info",
+#endif
+};
+
+static const char *sel_name[] = {
+    [VD_AGENT_CLIPBOARD_SELECTION_CLIPBOARD] = "clipboard",
+    [VD_AGENT_CLIPBOARD_SELECTION_PRIMARY]   = "primary",
+    [VD_AGENT_CLIPBOARD_SELECTION_SECONDARY] = "secondary",
+};
+
+static const char *type_name[] = {
+    [VD_AGENT_CLIPBOARD_NONE]       = "none",
+    [VD_AGENT_CLIPBOARD_UTF8_TEXT]  = "text",
+    [VD_AGENT_CLIPBOARD_IMAGE_PNG]  = "png",
+    [VD_AGENT_CLIPBOARD_IMAGE_BMP]  = "bmp",
+    [VD_AGENT_CLIPBOARD_IMAGE_TIFF] = "tiff",
+    [VD_AGENT_CLIPBOARD_IMAGE_JPG]  = "jpg",
+#if 0
+    [VD_AGENT_CLIPBOARD_FILE_LIST]  = "files",
+#endif
+};
+
+#define GET_NAME(_m, _v) \
+    (((_v) < ARRAY_SIZE(_m) && (_m[_v])) ? (_m[_v]) : "???")
+
+/* ------------------------------------------------------------------ */
+/* send messages                                                      */
+
+static void vdagent_send_buf(VDAgentChardev *vd)
+{
+    uint32_t len;
+
+    while (!buffer_empty(&vd->outbuf)) {
+        len = qemu_chr_be_can_write(CHARDEV(vd));
+        if (len == 0) {
+            return;
+        }
+        if (len > vd->outbuf.offset) {
+            len = vd->outbuf.offset;
+        }
+        qemu_chr_be_write(CHARDEV(vd), vd->outbuf.buffer, len);
+        buffer_advance(&vd->outbuf, len);
+    }
+}
+
+static void vdagent_send_msg(VDAgentChardev *vd, VDAgentMessage *msg)
+{
+    uint8_t *msgbuf = (void *)msg;
+    uint32_t msgsize = sizeof(VDAgentMessage) + msg->size;
+    uint32_t msgoff = 0;
+    VDIChunkHeader chunk;
+
+    trace_vdagent_send(GET_NAME(msg_name, msg->type));
+
+    msg->protocol = VD_AGENT_PROTOCOL;
+
+    if (vd->outbuf.offset + msgsize > VDAGENT_BUFFER_LIMIT) {
+        error_report("buffer full, dropping message");
+        return;
+    }
+
+    while (msgoff < msgsize) {
+        chunk.port = VDP_CLIENT_PORT;
+        chunk.size = msgsize - msgoff;
+        if (chunk.size > 1024) {
+            chunk.size = 1024;
+        }
+        buffer_reserve(&vd->outbuf, sizeof(chunk) + chunk.size);
+        buffer_append(&vd->outbuf, &chunk, sizeof(chunk));
+        buffer_append(&vd->outbuf, msgbuf + msgoff, chunk.size);
+        msgoff += chunk.size;
+    }
+    vdagent_send_buf(vd);
+}
+
+static void vdagent_send_caps(VDAgentChardev *vd)
+{
+    g_autofree VDAgentMessage *msg = g_malloc0(sizeof(VDAgentMessage) +
+                                               sizeof(VDAgentAnnounceCapabilities) +
+                                               sizeof(uint32_t));
+    VDAgentAnnounceCapabilities *caps = (void *)msg->data;
+
+    msg->type = VD_AGENT_ANNOUNCE_CAPABILITIES;
+    msg->size = sizeof(VDAgentAnnounceCapabilities) + sizeof(uint32_t);
+    if (vd->mouse) {
+        caps->caps[0] |= (1 << VD_AGENT_CAP_MOUSE_STATE);
+    }
+    if (vd->clipboard) {
+        caps->caps[0] |= (1 << VD_AGENT_CAP_CLIPBOARD_BY_DEMAND);
+        caps->caps[0] |= (1 << VD_AGENT_CAP_CLIPBOARD_SELECTION);
+    }
+
+    vdagent_send_msg(vd, msg);
+}
+
+/* ------------------------------------------------------------------ */
+/* mouse events                                                       */
+
+static bool have_mouse(VDAgentChardev *vd)
+{
+    return vd->mouse &&
+        (vd->caps & (1 << VD_AGENT_CAP_MOUSE_STATE));
+}
+
+static void vdagent_send_mouse(VDAgentChardev *vd)
+{
+    g_autofree VDAgentMessage *msg = g_malloc0(sizeof(VDAgentMessage) +
+                                               sizeof(VDAgentMouseState));
+    VDAgentMouseState *mouse = (void *)msg->data;
+
+    msg->type = VD_AGENT_MOUSE_STATE;
+    msg->size = sizeof(VDAgentMouseState);
+
+    mouse->x          = vd->mouse_x;
+    mouse->y          = vd->mouse_y;
+    mouse->buttons    = vd->mouse_btn;
+    mouse->display_id = vd->mouse_display;
+
+    vdagent_send_msg(vd, msg);
+}
+
+static void vdagent_pointer_event(DeviceState *dev, QemuConsole *src,
+                                  InputEvent *evt)
+{
+    static const int bmap[INPUT_BUTTON__MAX] = {
+        [INPUT_BUTTON_LEFT]        = VD_AGENT_LBUTTON_MASK,
+        [INPUT_BUTTON_RIGHT]       = VD_AGENT_RBUTTON_MASK,
+        [INPUT_BUTTON_MIDDLE]      = VD_AGENT_MBUTTON_MASK,
+        [INPUT_BUTTON_WHEEL_UP]    = VD_AGENT_UBUTTON_MASK,
+        [INPUT_BUTTON_WHEEL_DOWN]  = VD_AGENT_DBUTTON_MASK,
+#ifdef VD_AGENT_EBUTTON_MASK
+        [INPUT_BUTTON_SIDE]        = VD_AGENT_SBUTTON_MASK,
+        [INPUT_BUTTON_EXTRA]       = VD_AGENT_EBUTTON_MASK,
+#endif
+    };
+
+    VDAgentChardev *vd = container_of(dev, struct VDAgentChardev, mouse_dev);
+    InputMoveEvent *move;
+    InputBtnEvent *btn;
+    uint32_t xres, yres;
+
+    switch (evt->type) {
+    case INPUT_EVENT_KIND_ABS:
+        move = evt->u.abs.data;
+        xres = qemu_console_get_width(src, 1024);
+        yres = qemu_console_get_height(src, 768);
+        if (move->axis == INPUT_AXIS_X) {
+            vd->mouse_x = qemu_input_scale_axis(move->value,
+                                                INPUT_EVENT_ABS_MIN,
+                                                INPUT_EVENT_ABS_MAX,
+                                                0, xres);
+        } else if (move->axis == INPUT_AXIS_Y) {
+            vd->mouse_y = qemu_input_scale_axis(move->value,
+                                                INPUT_EVENT_ABS_MIN,
+                                                INPUT_EVENT_ABS_MAX,
+                                                0, yres);
+        }
+        vd->mouse_display = qemu_console_get_index(src);
+        break;
+
+    case INPUT_EVENT_KIND_BTN:
+        btn = evt->u.btn.data;
+        if (btn->down) {
+            vd->mouse_btn |= bmap[btn->button];
+        } else {
+            vd->mouse_btn &= ~bmap[btn->button];
+        }
+        break;
+
+    default:
+        /* keep gcc happy */
+        break;
+    }
+}
+
+static void vdagent_pointer_sync(DeviceState *dev)
+{
+    VDAgentChardev *vd = container_of(dev, struct VDAgentChardev, mouse_dev);
+
+    if (vd->caps & (1 << VD_AGENT_CAP_MOUSE_STATE)) {
+        vdagent_send_mouse(vd);
+    }
+}
+
+static QemuInputHandler vdagent_mouse_handler = {
+    .name  = "vdagent mouse",
+    .mask  = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS,
+    .event = vdagent_pointer_event,
+    .sync  = vdagent_pointer_sync,
+};
+
+/* ------------------------------------------------------------------ */
+/* clipboard                                                          */
+
+static bool have_clipboard(VDAgentChardev *vd)
+{
+    return vd->clipboard &&
+        (vd->caps & (1 << VD_AGENT_CAP_CLIPBOARD_BY_DEMAND));
+}
+
+static bool have_selection(VDAgentChardev *vd)
+{
+    return vd->caps & (1 << VD_AGENT_CAP_CLIPBOARD_SELECTION);
+}
+
+static uint32_t type_qemu_to_vdagent(enum QemuClipboardType type)
+{
+    switch (type) {
+    case QEMU_CLIPBOARD_TYPE_TEXT:
+        return VD_AGENT_CLIPBOARD_UTF8_TEXT;
+    default:
+        return VD_AGENT_CLIPBOARD_NONE;
+    }
+}
+
+static void vdagent_send_clipboard_grab(VDAgentChardev *vd,
+                                        QemuClipboardInfo *info)
+{
+    g_autofree VDAgentMessage *msg =
+        g_malloc0(sizeof(VDAgentMessage) +
+                  sizeof(uint32_t) * (QEMU_CLIPBOARD_TYPE__COUNT + 1));
+    uint8_t *s = msg->data;
+    uint32_t *data = (uint32_t *)msg->data;
+    uint32_t q, type;
+
+    if (have_selection(vd)) {
+        *s = info->selection;
+        data++;
+        msg->size += sizeof(uint32_t);
+    } else if (info->selection != QEMU_CLIPBOARD_SELECTION_CLIPBOARD) {
+        return;
+    }
+
+    for (q = 0; q < QEMU_CLIPBOARD_TYPE__COUNT; q++) {
+        type = type_qemu_to_vdagent(q);
+        if (type != VD_AGENT_CLIPBOARD_NONE && info->types[q].available) {
+            *data = type;
+            data++;
+            msg->size += sizeof(uint32_t);
+        }
+    }
+
+    msg->type = VD_AGENT_CLIPBOARD_GRAB;
+    vdagent_send_msg(vd, msg);
+}
+
+static void vdagent_send_clipboard_release(VDAgentChardev *vd,
+                                           QemuClipboardInfo *info)
+{
+    g_autofree VDAgentMessage *msg = g_malloc0(sizeof(VDAgentMessage) +
+                                               sizeof(uint32_t));
+
+    if (have_selection(vd)) {
+        uint8_t *s = msg->data;
+        *s = info->selection;
+        msg->size += sizeof(uint32_t);
+    } else if (info->selection != QEMU_CLIPBOARD_SELECTION_CLIPBOARD) {
+        return;
+    }
+
+    msg->type = VD_AGENT_CLIPBOARD_RELEASE;
+    vdagent_send_msg(vd, msg);
+}
+
+static void vdagent_send_clipboard_data(VDAgentChardev *vd,
+                                        QemuClipboardInfo *info,
+                                        QemuClipboardType type)
+{
+    g_autofree VDAgentMessage *msg = g_malloc0(sizeof(VDAgentMessage) +
+                                               sizeof(uint32_t) * 2 +
+                                               info->types[type].size);
+
+    uint8_t *s = msg->data;
+    uint32_t *data = (uint32_t *)msg->data;
+
+    if (have_selection(vd)) {
+        *s = info->selection;
+        data++;
+        msg->size += sizeof(uint32_t);
+    } else if (info->selection != QEMU_CLIPBOARD_SELECTION_CLIPBOARD) {
+        return;
+    }
+
+    *data = type_qemu_to_vdagent(type);
+    data++;
+    msg->size += sizeof(uint32_t);
+
+    memcpy(data, info->types[type].data, info->types[type].size);
+    msg->size += info->types[type].size;
+
+    msg->type = VD_AGENT_CLIPBOARD;
+    vdagent_send_msg(vd, msg);
+}
+
+static void vdagent_send_empty_clipboard_data(VDAgentChardev *vd,
+                                              QemuClipboardSelection selection,
+                                              QemuClipboardType type)
+{
+    g_autoptr(QemuClipboardInfo) info = qemu_clipboard_info_new(&vd->cbpeer, selection);
+
+    trace_vdagent_send_empty_clipboard();
+    vdagent_send_clipboard_data(vd, info, type);
+}
+
+static void vdagent_clipboard_notify(Notifier *notifier, void *data)
+{
+    VDAgentChardev *vd = container_of(notifier, VDAgentChardev, cbpeer.update);
+    QemuClipboardInfo *info = data;
+    QemuClipboardSelection s = info->selection;
+    QemuClipboardType type;
+    bool self_update = info->owner == &vd->cbpeer;
+
+    if (info != qemu_clipboard_info(s)) {
+        vd->cbpending[s] = 0;
+        if (!self_update) {
+            if (info->owner) {
+                vdagent_send_clipboard_grab(vd, info);
+            } else {
+                vdagent_send_clipboard_release(vd, info);
+            }
+        }
+        return;
+    }
+
+    if (self_update) {
+        return;
+    }
+
+    for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) {
+        if (vd->cbpending[s] & (1 << type)) {
+            vd->cbpending[s] &= ~(1 << type);
+            vdagent_send_clipboard_data(vd, info, type);
+        }
+    }
+}
+
+static void vdagent_clipboard_request(QemuClipboardInfo *info,
+                                      QemuClipboardType qtype)
+{
+    VDAgentChardev *vd = container_of(info->owner, VDAgentChardev, cbpeer);
+    g_autofree VDAgentMessage *msg = g_malloc0(sizeof(VDAgentMessage) +
+                                               sizeof(uint32_t) * 2);
+    uint32_t type = type_qemu_to_vdagent(qtype);
+    uint8_t *s = msg->data;
+    uint32_t *data = (uint32_t *)msg->data;
+
+    if (type == VD_AGENT_CLIPBOARD_NONE) {
+        return;
+    }
+
+    if (have_selection(vd)) {
+        *s = info->selection;
+        data++;
+        msg->size += sizeof(uint32_t);
+    }
+
+    *data = type;
+    msg->size += sizeof(uint32_t);
+
+    msg->type = VD_AGENT_CLIPBOARD_REQUEST;
+    vdagent_send_msg(vd, msg);
+}
+
+static void vdagent_clipboard_recv_grab(VDAgentChardev *vd, uint8_t s, uint32_t size, void *data)
+{
+    g_autoptr(QemuClipboardInfo) info = NULL;
+
+    trace_vdagent_cb_grab_selection(GET_NAME(sel_name, s));
+    info = qemu_clipboard_info_new(&vd->cbpeer, s);
+    if (size > sizeof(uint32_t) * 10) {
+        /*
+         * spice has 6 types as of 2021. Limiting to 10 entries
+         * so we we have some wiggle room.
+         */
+        return;
+    }
+    while (size >= sizeof(uint32_t)) {
+        trace_vdagent_cb_grab_type(GET_NAME(type_name, *(uint32_t *)data));
+        switch (*(uint32_t *)data) {
+        case VD_AGENT_CLIPBOARD_UTF8_TEXT:
+            info->types[QEMU_CLIPBOARD_TYPE_TEXT].available = true;
+            break;
+        default:
+            break;
+        }
+        data += sizeof(uint32_t);
+        size -= sizeof(uint32_t);
+    }
+    qemu_clipboard_update(info);
+}
+
+static void vdagent_clipboard_recv_request(VDAgentChardev *vd, uint8_t s, uint32_t size, void *data)
+{
+    QemuClipboardType type;
+    QemuClipboardInfo *info;
+
+    if (size < sizeof(uint32_t)) {
+        return;
+    }
+    switch (*(uint32_t *)data) {
+    case VD_AGENT_CLIPBOARD_UTF8_TEXT:
+        type = QEMU_CLIPBOARD_TYPE_TEXT;
+        break;
+    default:
+        return;
+    }
+
+    info = qemu_clipboard_info(s);
+    if (info && info->types[type].available && info->owner != &vd->cbpeer) {
+        if (info->types[type].data) {
+            vdagent_send_clipboard_data(vd, info, type);
+        } else {
+            vd->cbpending[s] |= (1 << type);
+            qemu_clipboard_request(info, type);
+        }
+    } else {
+        vdagent_send_empty_clipboard_data(vd, s, type);
+    }
+}
+
+static void vdagent_clipboard_recv_data(VDAgentChardev *vd, uint8_t s, uint32_t size, void *data)
+{
+    QemuClipboardType type;
+
+    if (size < sizeof(uint32_t)) {
+        return;
+    }
+    switch (*(uint32_t *)data) {
+    case VD_AGENT_CLIPBOARD_UTF8_TEXT:
+        type = QEMU_CLIPBOARD_TYPE_TEXT;
+        break;
+    default:
+        return;
+    }
+    data += 4;
+    size -= 4;
+
+    if (qemu_clipboard_peer_owns(&vd->cbpeer, s)) {
+        qemu_clipboard_set_data(&vd->cbpeer, qemu_clipboard_info(s),
+                                type, size, data, true);
+    }
+}
+
+static void vdagent_clipboard_recv_release(VDAgentChardev *vd, uint8_t s)
+{
+    qemu_clipboard_peer_release(&vd->cbpeer, s);
+}
+
+static void vdagent_chr_recv_clipboard(VDAgentChardev *vd, VDAgentMessage *msg)
+{
+    uint8_t s = VD_AGENT_CLIPBOARD_SELECTION_CLIPBOARD;
+    uint32_t size = msg->size;
+    void *data = msg->data;
+
+    if (have_selection(vd)) {
+        if (size < 4) {
+            return;
+        }
+        s = *(uint8_t *)data;
+        if (s >= QEMU_CLIPBOARD_SELECTION__COUNT) {
+            return;
+        }
+        data += 4;
+        size -= 4;
+    }
+
+    switch (msg->type) {
+    case VD_AGENT_CLIPBOARD_GRAB:
+        return vdagent_clipboard_recv_grab(vd, s, size, data);
+    case VD_AGENT_CLIPBOARD_REQUEST:
+        return vdagent_clipboard_recv_request(vd, s, size, data);
+    case VD_AGENT_CLIPBOARD: /* data */
+        return vdagent_clipboard_recv_data(vd, s, size, data);
+    case VD_AGENT_CLIPBOARD_RELEASE:
+        return vdagent_clipboard_recv_release(vd, s);
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* ------------------------------------------------------------------ */
+/* chardev backend                                                    */
+
+static void vdagent_chr_open(Chardev *chr,
+                             ChardevBackend *backend,
+                             bool *be_opened,
+                             Error **errp)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(chr);
+    ChardevQemuVDAgent *cfg = backend->u.qemu_vdagent.data;
+
+#if defined(HOST_WORDS_BIGENDIAN)
+    /*
+     * TODO: vdagent protocol is defined to be LE,
+     * so we have to byteswap everything on BE hosts.
+     */
+    error_setg(errp, "vdagent is not supported on bigendian hosts");
+    return;
+#endif
+
+    if (migrate_add_blocker(vd->migration_blocker, errp) != 0) {
+        return;
+    }
+
+    vd->mouse = VDAGENT_MOUSE_DEFAULT;
+    if (cfg->has_mouse) {
+        vd->mouse = cfg->mouse;
+    }
+
+    vd->clipboard = VDAGENT_CLIPBOARD_DEFAULT;
+    if (cfg->has_clipboard) {
+        vd->clipboard = cfg->clipboard;
+    }
+
+    if (vd->mouse) {
+        vd->mouse_hs = qemu_input_handler_register(&vd->mouse_dev,
+                                                   &vdagent_mouse_handler);
+    }
+
+    *be_opened = true;
+}
+
+static void vdagent_chr_recv_caps(VDAgentChardev *vd, VDAgentMessage *msg)
+{
+    VDAgentAnnounceCapabilities *caps = (void *)msg->data;
+    int i;
+
+    if (msg->size < (sizeof(VDAgentAnnounceCapabilities) +
+                     sizeof(uint32_t))) {
+        return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(cap_name); i++) {
+        if (caps->caps[0] & (1 << i)) {
+            trace_vdagent_peer_cap(GET_NAME(cap_name, i));
+        }
+    }
+
+    vd->caps = caps->caps[0];
+    if (caps->request) {
+        vdagent_send_caps(vd);
+    }
+    if (have_mouse(vd) && vd->mouse_hs) {
+        qemu_input_handler_activate(vd->mouse_hs);
+    }
+    if (have_clipboard(vd) && vd->cbpeer.update.notify == NULL) {
+        vd->cbpeer.name = "vdagent";
+        vd->cbpeer.update.notify = vdagent_clipboard_notify;
+        vd->cbpeer.request = vdagent_clipboard_request;
+        qemu_clipboard_peer_register(&vd->cbpeer);
+    }
+}
+
+static void vdagent_chr_recv_msg(VDAgentChardev *vd, VDAgentMessage *msg)
+{
+    trace_vdagent_recv_msg(GET_NAME(msg_name, msg->type), msg->size);
+
+    switch (msg->type) {
+    case VD_AGENT_ANNOUNCE_CAPABILITIES:
+        vdagent_chr_recv_caps(vd, msg);
+        break;
+    case VD_AGENT_CLIPBOARD:
+    case VD_AGENT_CLIPBOARD_GRAB:
+    case VD_AGENT_CLIPBOARD_REQUEST:
+    case VD_AGENT_CLIPBOARD_RELEASE:
+        if (have_clipboard(vd)) {
+            vdagent_chr_recv_clipboard(vd, msg);
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void vdagent_reset_xbuf(VDAgentChardev *vd)
+{
+    g_clear_pointer(&vd->xbuf, g_free);
+    vd->xoff = 0;
+    vd->xsize = 0;
+}
+
+static void vdagent_chr_recv_chunk(VDAgentChardev *vd)
+{
+    VDAgentMessage *msg = (void *)vd->msgbuf;
+
+    if (!vd->xsize) {
+        if (vd->msgsize < sizeof(*msg)) {
+            error_report("%s: message too small: %d < %zd", __func__,
+                         vd->msgsize, sizeof(*msg));
+            return;
+        }
+        if (vd->msgsize == msg->size + sizeof(*msg)) {
+            vdagent_chr_recv_msg(vd, msg);
+            return;
+        }
+    }
+
+    if (!vd->xsize) {
+        vd->xsize = msg->size + sizeof(*msg);
+        vd->xbuf = g_malloc0(vd->xsize);
+    }
+
+    if (vd->xoff + vd->msgsize > vd->xsize) {
+        error_report("%s: Oops: %d+%d > %d", __func__,
+                     vd->xoff, vd->msgsize, vd->xsize);
+        vdagent_reset_xbuf(vd);
+        return;
+    }
+
+    memcpy(vd->xbuf + vd->xoff, vd->msgbuf, vd->msgsize);
+    vd->xoff += vd->msgsize;
+    if (vd->xoff < vd->xsize) {
+        return;
+    }
+
+    msg = (void *)vd->xbuf;
+    vdagent_chr_recv_msg(vd, msg);
+    vdagent_reset_xbuf(vd);
+}
+
+static void vdagent_reset_bufs(VDAgentChardev *vd)
+{
+    memset(&vd->chunk, 0, sizeof(vd->chunk));
+    vd->chunksize = 0;
+    g_free(vd->msgbuf);
+    vd->msgbuf = NULL;
+    vd->msgsize = 0;
+}
+
+static int vdagent_chr_write(Chardev *chr, const uint8_t *buf, int len)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(chr);
+    uint32_t copy, ret = len;
+
+    while (len) {
+        if (vd->chunksize < sizeof(vd->chunk)) {
+            copy = sizeof(vd->chunk) - vd->chunksize;
+            if (copy > len) {
+                copy = len;
+            }
+            memcpy((void *)(&vd->chunk) + vd->chunksize, buf, copy);
+            vd->chunksize += copy;
+            buf += copy;
+            len -= copy;
+            if (vd->chunksize < sizeof(vd->chunk)) {
+                break;
+            }
+
+            assert(vd->msgbuf == NULL);
+            vd->msgbuf = g_malloc0(vd->chunk.size);
+        }
+
+        copy = vd->chunk.size - vd->msgsize;
+        if (copy > len) {
+            copy = len;
+        }
+        memcpy(vd->msgbuf + vd->msgsize, buf, copy);
+        vd->msgsize += copy;
+        buf += copy;
+        len -= copy;
+
+        if (vd->msgsize == vd->chunk.size) {
+            trace_vdagent_recv_chunk(vd->chunk.size);
+            vdagent_chr_recv_chunk(vd);
+            vdagent_reset_bufs(vd);
+        }
+    }
+
+    return ret;
+}
+
+static void vdagent_chr_accept_input(Chardev *chr)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(chr);
+
+    vdagent_send_buf(vd);
+}
+
+static void vdagent_disconnect(VDAgentChardev *vd)
+{
+    buffer_reset(&vd->outbuf);
+    vdagent_reset_bufs(vd);
+    vd->caps = 0;
+    if (vd->mouse_hs) {
+        qemu_input_handler_deactivate(vd->mouse_hs);
+    }
+    if (vd->cbpeer.update.notify) {
+        qemu_clipboard_peer_unregister(&vd->cbpeer);
+        memset(&vd->cbpeer, 0, sizeof(vd->cbpeer));
+    }
+}
+
+static void vdagent_chr_set_fe_open(struct Chardev *chr, int fe_open)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(chr);
+
+    if (!fe_open) {
+        trace_vdagent_close();
+        vdagent_disconnect(vd);
+        return;
+    }
+
+    trace_vdagent_open();
+}
+
+static void vdagent_chr_parse(QemuOpts *opts, ChardevBackend *backend,
+                              Error **errp)
+{
+    ChardevQemuVDAgent *cfg;
+
+    backend->type = CHARDEV_BACKEND_KIND_QEMU_VDAGENT;
+    cfg = backend->u.qemu_vdagent.data = g_new0(ChardevQemuVDAgent, 1);
+    qemu_chr_parse_common(opts, qapi_ChardevQemuVDAgent_base(cfg));
+    cfg->has_mouse = true;
+    cfg->mouse = qemu_opt_get_bool(opts, "mouse", VDAGENT_MOUSE_DEFAULT);
+    cfg->has_clipboard = true;
+    cfg->clipboard = qemu_opt_get_bool(opts, "clipboard", VDAGENT_CLIPBOARD_DEFAULT);
+}
+
+/* ------------------------------------------------------------------ */
+
+static void vdagent_chr_class_init(ObjectClass *oc, void *data)
+{
+    ChardevClass *cc = CHARDEV_CLASS(oc);
+
+    cc->parse            = vdagent_chr_parse;
+    cc->open             = vdagent_chr_open;
+    cc->chr_write        = vdagent_chr_write;
+    cc->chr_set_fe_open  = vdagent_chr_set_fe_open;
+    cc->chr_accept_input = vdagent_chr_accept_input;
+}
+
+static void vdagent_chr_init(Object *obj)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj);
+
+    buffer_init(&vd->outbuf, "vdagent-outbuf");
+    error_setg(&vd->migration_blocker,
+               "The vdagent chardev doesn't yet support migration");
+}
+
+static void vdagent_chr_fini(Object *obj)
+{
+    VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj);
+
+    migrate_del_blocker(vd->migration_blocker);
+    vdagent_disconnect(vd);
+    buffer_free(&vd->outbuf);
+    error_free(vd->migration_blocker);
+}
+
+static const TypeInfo vdagent_chr_type_info = {
+    .name = TYPE_CHARDEV_QEMU_VDAGENT,
+    .parent = TYPE_CHARDEV,
+    .instance_size = sizeof(VDAgentChardev),
+    .instance_init = vdagent_chr_init,
+    .instance_finalize = vdagent_chr_fini,
+    .class_init = vdagent_chr_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&vdagent_chr_type_info);
+}
+
+type_init(register_types);
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index df7dc08e9fc..47fdae5b215 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -28,10 +28,30 @@
 #include "vnc.h"
 #include "trace.h"
 
+/*
+ * Apple has deprecated sasl.h functions in OS X 10.11.  Therefore,
+ * files that use SASL API need to disable -Wdeprecated-declarations.
+ */
+#ifdef CONFIG_DARWIN
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
 /* Max amount of data we send/recv for SASL steps to prevent DOS */
 #define SASL_DATA_MAX_LEN (1024 * 1024)
 
 
+bool vnc_sasl_server_init(Error **errp)
+{
+    int saslErr = sasl_server_init(NULL, "qemu");
+
+    if (saslErr != SASL_OK) {
+        error_setg(errp, "Failed to initialize SASL auth: %s",
+                   sasl_errstring(saslErr, NULL, NULL));
+        return false;
+    }
+    return true;
+}
+
 void vnc_sasl_client_cleanup(VncState *vs)
 {
     if (vs->sasl.conn) {
diff --git a/ui/vnc-auth-sasl.h b/ui/vnc-auth-sasl.h
index 1bfb86c6f59..367b8672cc2 100644
--- a/ui/vnc-auth-sasl.h
+++ b/ui/vnc-auth-sasl.h
@@ -63,6 +63,7 @@ struct VncDisplaySASL {
     char *authzid;
 };
 
+bool vnc_sasl_server_init(Error **errp);
 void vnc_sasl_client_cleanup(VncState *vs);
 
 size_t vnc_client_read_sasl(VncState *vs);
diff --git a/ui/vnc-clipboard.c b/ui/vnc-clipboard.c
new file mode 100644
index 00000000000..67284b556cd
--- /dev/null
+++ b/ui/vnc-clipboard.c
@@ -0,0 +1,325 @@
+/*
+ * QEMU VNC display driver -- clipboard support
+ *
+ * Copyright (C) 2021 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "vnc.h"
+#include "vnc-jobs.h"
+
+static uint8_t *inflate_buffer(uint8_t *in, uint32_t in_len, uint32_t *size)
+{
+    z_stream stream = {
+        .next_in  = in,
+        .avail_in = in_len,
+        .zalloc   = Z_NULL,
+        .zfree    = Z_NULL,
+    };
+    uint32_t out_len = 8;
+    uint8_t *out = g_malloc(out_len);
+    int ret;
+
+    stream.next_out = out + stream.total_out;
+    stream.avail_out = out_len - stream.total_out;
+
+    ret = inflateInit(&stream);
+    if (ret != Z_OK) {
+        goto err;
+    }
+
+    while (stream.avail_in) {
+        ret = inflate(&stream, Z_FINISH);
+        switch (ret) {
+        case Z_OK:
+        case Z_STREAM_END:
+            break;
+        case Z_BUF_ERROR:
+            out_len <<= 1;
+            if (out_len > (1 << 20)) {
+                goto err_end;
+            }
+            out = g_realloc(out, out_len);
+            stream.next_out = out + stream.total_out;
+            stream.avail_out = out_len - stream.total_out;
+            break;
+        default:
+            goto err_end;
+        }
+    }
+
+    *size = stream.total_out;
+    inflateEnd(&stream);
+
+    return out;
+
+err_end:
+    inflateEnd(&stream);
+err:
+    g_free(out);
+    return NULL;
+}
+
+static uint8_t *deflate_buffer(uint8_t *in, uint32_t in_len, uint32_t *size)
+{
+    z_stream stream = {
+        .next_in  = in,
+        .avail_in = in_len,
+        .zalloc   = Z_NULL,
+        .zfree    = Z_NULL,
+    };
+    uint32_t out_len = 8;
+    uint8_t *out = g_malloc(out_len);
+    int ret;
+
+    stream.next_out = out + stream.total_out;
+    stream.avail_out = out_len - stream.total_out;
+
+    ret = deflateInit(&stream, Z_DEFAULT_COMPRESSION);
+    if (ret != Z_OK) {
+        goto err;
+    }
+
+    while (ret != Z_STREAM_END) {
+        ret = deflate(&stream, Z_FINISH);
+        switch (ret) {
+        case Z_OK:
+        case Z_STREAM_END:
+            break;
+        case Z_BUF_ERROR:
+            out_len <<= 1;
+            if (out_len > (1 << 20)) {
+                goto err_end;
+            }
+            out = g_realloc(out, out_len);
+            stream.next_out = out + stream.total_out;
+            stream.avail_out = out_len - stream.total_out;
+            break;
+        default:
+            goto err_end;
+        }
+    }
+
+    *size = stream.total_out;
+    deflateEnd(&stream);
+
+    return out;
+
+err_end:
+    deflateEnd(&stream);
+err:
+    g_free(out);
+    return NULL;
+}
+
+static void vnc_clipboard_send(VncState *vs, uint32_t count, uint32_t *dwords)
+{
+    int i;
+
+    vnc_lock_output(vs);
+    vnc_write_u8(vs, VNC_MSG_SERVER_CUT_TEXT);
+    vnc_write_u8(vs, 0);
+    vnc_write_u8(vs, 0);
+    vnc_write_u8(vs, 0);
+    vnc_write_s32(vs, -(count * sizeof(uint32_t)));  /* -(message length) */
+    for (i = 0; i < count; i++) {
+        vnc_write_u32(vs, dwords[i]);
+    }
+    vnc_unlock_output(vs);
+    vnc_flush(vs);
+}
+
+static void vnc_clipboard_provide(VncState *vs,
+                                  QemuClipboardInfo *info,
+                                  QemuClipboardType type)
+{
+    uint32_t flags = 0;
+    g_autofree uint8_t *buf = NULL;
+    g_autofree void *zbuf = NULL;
+    uint32_t zsize;
+
+    switch (type) {
+    case QEMU_CLIPBOARD_TYPE_TEXT:
+        flags |= VNC_CLIPBOARD_TEXT;
+        break;
+    default:
+        return;
+    }
+    flags |= VNC_CLIPBOARD_PROVIDE;
+
+    buf = g_malloc(info->types[type].size + 4);
+    buf[0] = (info->types[type].size >> 24) & 0xff;
+    buf[1] = (info->types[type].size >> 16) & 0xff;
+    buf[2] = (info->types[type].size >>  8) & 0xff;
+    buf[3] = (info->types[type].size >>  0) & 0xff;
+    memcpy(buf + 4, info->types[type].data, info->types[type].size);
+    zbuf = deflate_buffer(buf, info->types[type].size + 4, &zsize);
+    if (!zbuf) {
+        return;
+    }
+
+    vnc_lock_output(vs);
+    vnc_write_u8(vs, VNC_MSG_SERVER_CUT_TEXT);
+    vnc_write_u8(vs, 0);
+    vnc_write_u8(vs, 0);
+    vnc_write_u8(vs, 0);
+    vnc_write_s32(vs, -(sizeof(uint32_t) + zsize));  /* -(message length) */
+    vnc_write_u32(vs, flags);
+    vnc_write(vs, zbuf, zsize);
+    vnc_unlock_output(vs);
+    vnc_flush(vs);
+}
+
+static void vnc_clipboard_notify(Notifier *notifier, void *data)
+{
+    VncState *vs = container_of(notifier, VncState, cbpeer.update);
+    QemuClipboardInfo *info = data;
+    QemuClipboardType type;
+    bool self_update = info->owner == &vs->cbpeer;
+    uint32_t flags = 0;
+
+    if (info != vs->cbinfo) {
+        qemu_clipboard_info_unref(vs->cbinfo);
+        vs->cbinfo = qemu_clipboard_info_ref(info);
+        vs->cbpending = 0;
+        if (!self_update) {
+            if (info->types[QEMU_CLIPBOARD_TYPE_TEXT].available) {
+                flags |= VNC_CLIPBOARD_TEXT;
+            }
+            flags |= VNC_CLIPBOARD_NOTIFY;
+            vnc_clipboard_send(vs, 1, &flags);
+        }
+        return;
+    }
+
+    if (self_update) {
+        return;
+    }
+
+    for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) {
+        if (vs->cbpending & (1 << type)) {
+            vs->cbpending &= ~(1 << type);
+            vnc_clipboard_provide(vs, info, type);
+        }
+    }
+}
+
+static void vnc_clipboard_request(QemuClipboardInfo *info,
+                                  QemuClipboardType type)
+{
+    VncState *vs = container_of(info->owner, VncState, cbpeer);
+    uint32_t flags = 0;
+
+    if (type == QEMU_CLIPBOARD_TYPE_TEXT) {
+        flags |= VNC_CLIPBOARD_TEXT;
+    }
+    if (!flags) {
+        return;
+    }
+    flags |= VNC_CLIPBOARD_REQUEST;
+
+    vnc_clipboard_send(vs, 1, &flags);
+}
+
+void vnc_client_cut_text_ext(VncState *vs, int32_t len, uint32_t flags, uint8_t *data)
+{
+    if (flags & VNC_CLIPBOARD_CAPS) {
+        /* need store caps somewhere ? */
+        return;
+    }
+
+    if (flags & VNC_CLIPBOARD_NOTIFY) {
+        QemuClipboardInfo *info =
+            qemu_clipboard_info_new(&vs->cbpeer, QEMU_CLIPBOARD_SELECTION_CLIPBOARD);
+        if (flags & VNC_CLIPBOARD_TEXT) {
+            info->types[QEMU_CLIPBOARD_TYPE_TEXT].available = true;
+        }
+        qemu_clipboard_update(info);
+        qemu_clipboard_info_unref(info);
+        return;
+    }
+
+    if (flags & VNC_CLIPBOARD_PROVIDE &&
+        vs->cbinfo &&
+        vs->cbinfo->owner == &vs->cbpeer) {
+        uint32_t size = 0;
+        g_autofree uint8_t *buf = inflate_buffer(data, len - 4, &size);
+        if ((flags & VNC_CLIPBOARD_TEXT) &&
+            buf && size >= 4) {
+            uint32_t tsize = read_u32(buf, 0);
+            uint8_t *tbuf = buf + 4;
+            if (tsize < size) {
+                qemu_clipboard_set_data(&vs->cbpeer, vs->cbinfo,
+                                        QEMU_CLIPBOARD_TYPE_TEXT,
+                                        tsize, tbuf, true);
+            }
+        }
+    }
+
+    if (flags & VNC_CLIPBOARD_REQUEST &&
+        vs->cbinfo &&
+        vs->cbinfo->owner != &vs->cbpeer) {
+        if ((flags & VNC_CLIPBOARD_TEXT) &&
+            vs->cbinfo->types[QEMU_CLIPBOARD_TYPE_TEXT].available) {
+            if (vs->cbinfo->types[QEMU_CLIPBOARD_TYPE_TEXT].data) {
+                vnc_clipboard_provide(vs, vs->cbinfo, QEMU_CLIPBOARD_TYPE_TEXT);
+            } else {
+                vs->cbpending |= (1 << QEMU_CLIPBOARD_TYPE_TEXT);
+                qemu_clipboard_request(vs->cbinfo, QEMU_CLIPBOARD_TYPE_TEXT);
+            }
+        }
+    }
+}
+
+void vnc_client_cut_text(VncState *vs, size_t len, uint8_t *text)
+{
+    QemuClipboardInfo *info =
+        qemu_clipboard_info_new(&vs->cbpeer, QEMU_CLIPBOARD_SELECTION_CLIPBOARD);
+
+    qemu_clipboard_set_data(&vs->cbpeer, info, QEMU_CLIPBOARD_TYPE_TEXT,
+                            len, text, true);
+    qemu_clipboard_info_unref(info);
+}
+
+void vnc_server_cut_text_caps(VncState *vs)
+{
+    uint32_t caps[2];
+
+    if (!vnc_has_feature(vs, VNC_FEATURE_CLIPBOARD_EXT)) {
+        return;
+    }
+
+    caps[0] = (VNC_CLIPBOARD_PROVIDE |
+               VNC_CLIPBOARD_NOTIFY  |
+               VNC_CLIPBOARD_REQUEST |
+               VNC_CLIPBOARD_CAPS    |
+               VNC_CLIPBOARD_TEXT);
+    caps[1] = 0;
+    vnc_clipboard_send(vs, 2, caps);
+
+    if (!vs->cbpeer.update.notify) {
+        vs->cbpeer.name = "vnc";
+        vs->cbpeer.update.notify = vnc_clipboard_notify;
+        vs->cbpeer.request = vnc_clipboard_request;
+        qemu_clipboard_peer_register(&vs->cbpeer);
+    }
+}
diff --git a/ui/vnc.c b/ui/vnc.c
index 456db47d713..af02522e841 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -25,6 +25,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu-common.h"
 #include "vnc.h"
 #include "vnc-jobs.h"
 #include "trace.h"
@@ -45,6 +46,7 @@
 #include "qapi/qapi-commands-ui.h"
 #include "ui/input.h"
 #include "crypto/hash.h"
+#include "crypto/tlscreds.h"
 #include "crypto/tlscredsanon.h"
 #include "crypto/tlscredsx509.h"
 #include "crypto/random.h"
@@ -596,7 +598,7 @@ bool vnc_display_reload_certs(const char *id, Error **errp)
     }
 
     if (!vd->tlscreds) {
-        error_setg(errp, "vnc tls is not enable");
+        error_setg(errp, "vnc tls is not enabled");
         return false;
     }
 
@@ -1352,6 +1354,9 @@ void vnc_disconnect_finish(VncState *vs)
         /* last client gone */
         vnc_update_server_surface(vs->vd);
     }
+    if (vs->cbpeer.update.notify) {
+        qemu_clipboard_peer_unregister(&vs->cbpeer);
+    }
 
     vnc_unlock_output(vs);
 
@@ -1777,10 +1782,6 @@ uint32_t read_u32(uint8_t *data, size_t offset)
             (data[offset + 2] << 8) | data[offset + 3]);
 }
 
-static void client_cut_text(VncState *vs, size_t len, uint8_t *text)
-{
-}
-
 static void check_pointer_type_change(Notifier *notifier, void *data)
 {
     VncState *vs = container_of(notifier, VncState, mouse_mode_notifier);
@@ -2222,6 +2223,10 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings)
                 send_xvp_message(vs, VNC_XVP_CODE_INIT);
             }
             break;
+        case VNC_ENCODING_CLIPBOARD_EXT:
+            vs->features |= VNC_FEATURE_CLIPBOARD_EXT_MASK;
+            vnc_server_cut_text_caps(vs);
+            break;
         case VNC_ENCODING_COMPRESSLEVEL0 ... VNC_ENCODING_COMPRESSLEVEL0 + 9:
             vs->tight->compression = (enc & 0x0F);
             break;
@@ -2438,7 +2443,7 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
             return 8;
         }
         if (len == 8) {
-            uint32_t dlen = read_u32(data, 4);
+            uint32_t dlen = abs(read_s32(data, 4));
             if (dlen > (1 << 20)) {
                 error_report("vnc: client_cut_text msg payload has %u bytes"
                              " which exceeds our limit of 1MB.", dlen);
@@ -2450,7 +2455,12 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len)
             }
         }
 
-        client_cut_text(vs, read_u32(data, 4), data + 8);
+        if (read_s32(data, 4) < 0) {
+            vnc_client_cut_text_ext(vs, abs(read_s32(data, 4)),
+                                    read_u32(data, 8), data + 12);
+            break;
+        }
+        vnc_client_cut_text(vs, read_u32(data, 4), data + 8);
         break;
     case VNC_MSG_CLIENT_XVP:
         if (!(vs->features & VNC_FEATURE_XVP)) {
@@ -2723,6 +2733,19 @@ static void authentication_failed(VncState *vs)
     vnc_client_error(vs);
 }
 
+static void
+vnc_munge_des_rfb_key(unsigned char *key, size_t nkey)
+{
+    size_t i;
+    for (i = 0; i < nkey; i++) {
+        uint8_t r = key[i];
+        r = (r & 0xf0) >> 4 | (r & 0x0f) << 4;
+        r = (r & 0xcc) >> 2 | (r & 0x33) << 2;
+        r = (r & 0xaa) >> 1 | (r & 0x55) << 1;
+        key[i] = r;
+    }
+}
+
 static int protocol_client_auth_vnc(VncState *vs, uint8_t *data, size_t len)
 {
     unsigned char response[VNC_AUTH_CHALLENGE_SIZE];
@@ -2747,9 +2770,10 @@ static int protocol_client_auth_vnc(VncState *vs, uint8_t *data, size_t len)
     pwlen = strlen(vs->vd->password);
     for (i=0; i<sizeof(key); i++)
         key[i] = i<pwlen ? vs->vd->password[i] : 0;
+    vnc_munge_des_rfb_key(key, sizeof(key));
 
     cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_DES_RFB,
+        QCRYPTO_CIPHER_ALG_DES,
         QCRYPTO_CIPHER_MODE_ECB,
         key, G_N_ELEMENTS(key),
         &err);
@@ -4035,9 +4059,9 @@ void vnc_display_open(const char *id, Error **errp)
             goto fail;
         }
         if (!qcrypto_cipher_supports(
-                QCRYPTO_CIPHER_ALG_DES_RFB, QCRYPTO_CIPHER_MODE_ECB)) {
+                QCRYPTO_CIPHER_ALG_DES, QCRYPTO_CIPHER_MODE_ECB)) {
             error_setg(errp,
-                       "Cipher backend does not support DES RFB algorithm");
+                       "Cipher backend does not support DES algorithm");
             goto fail;
         }
     }
@@ -4071,9 +4095,9 @@ void vnc_display_open(const char *id, Error **errp)
         }
         object_ref(OBJECT(vd->tlscreds));
 
-        if (vd->tlscreds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
-            error_setg(errp,
-                       "Expecting TLS credentials with a server endpoint");
+        if (!qcrypto_tls_creds_check_endpoint(vd->tlscreds,
+                                              QCRYPTO_TLS_CREDS_ENDPOINT_SERVER,
+                                              errp)) {
             goto fail;
         }
     }
@@ -4145,14 +4169,8 @@ void vnc_display_open(const char *id, Error **errp)
     trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth);
 
 #ifdef CONFIG_VNC_SASL
-    if (sasl) {
-        int saslErr = sasl_server_init(NULL, "qemu");
-
-        if (saslErr != SASL_OK) {
-            error_setg(errp, "Failed to initialize SASL auth: %s",
-                       sasl_errstring(saslErr, NULL, NULL));
-            goto fail;
-        }
+    if (sasl && !vnc_sasl_server_init(errp)) {
+        goto fail;
     }
 #endif
     vd->lock_key_sync = lock_key_sync;
diff --git a/ui/vnc.h b/ui/vnc.h
index d4f3e155580..a7149831f90 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -29,6 +29,7 @@
 
 #include "qemu/queue.h"
 #include "qemu/thread.h"
+#include "ui/clipboard.h"
 #include "ui/console.h"
 #include "audio/audio.h"
 #include "qemu/bitmap.h"
@@ -348,6 +349,10 @@ struct VncState
 
     Notifier mouse_mode_notifier;
 
+    QemuClipboardPeer cbpeer;
+    QemuClipboardInfo *cbinfo;
+    uint32_t cbpending;
+
     QTAILQ_ENTRY(VncState) next;
 };
 
@@ -417,6 +422,7 @@ enum {
 #define VNC_ENCODING_XVP                  0XFFFFFECB /* -309 */
 #define VNC_ENCODING_ALPHA_CURSOR         0XFFFFFEC6 /* -314 */
 #define VNC_ENCODING_WMVi                 0x574D5669
+#define VNC_ENCODING_CLIPBOARD_EXT        0xc0a1e5ce
 
 /*****************************************************************************
  *
@@ -458,6 +464,7 @@ enum VncFeatures {
     VNC_FEATURE_ZYWRLE,
     VNC_FEATURE_LED_STATE,
     VNC_FEATURE_XVP,
+    VNC_FEATURE_CLIPBOARD_EXT,
 };
 
 #define VNC_FEATURE_RESIZE_MASK              (1 << VNC_FEATURE_RESIZE)
@@ -474,6 +481,7 @@ enum VncFeatures {
 #define VNC_FEATURE_ZYWRLE_MASK              (1 << VNC_FEATURE_ZYWRLE)
 #define VNC_FEATURE_LED_STATE_MASK           (1 << VNC_FEATURE_LED_STATE)
 #define VNC_FEATURE_XVP_MASK                 (1 << VNC_FEATURE_XVP)
+#define VNC_FEATURE_CLIPBOARD_EXT_MASK       (1 <<  VNC_FEATURE_CLIPBOARD_EXT)
 
 
 /* Client -> Server message IDs */
@@ -535,6 +543,17 @@ enum VncFeatures {
 #define VNC_XVP_ACTION_REBOOT 3
 #define VNC_XVP_ACTION_RESET 4
 
+/* extended clipboard flags  */
+#define VNC_CLIPBOARD_TEXT     (1 << 0)
+#define VNC_CLIPBOARD_RTF      (1 << 1)
+#define VNC_CLIPBOARD_HTML     (1 << 2)
+#define VNC_CLIPBOARD_DIB      (1 << 3)
+#define VNC_CLIPBOARD_FILES    (1 << 4)
+#define VNC_CLIPBOARD_CAPS     (1 << 24)
+#define VNC_CLIPBOARD_REQUEST  (1 << 25)
+#define VNC_CLIPBOARD_PEEK     (1 << 26)
+#define VNC_CLIPBOARD_NOTIFY   (1 << 27)
+#define VNC_CLIPBOARD_PROVIDE  (1 << 28)
 
 /*****************************************************************************
  *
@@ -618,4 +637,9 @@ int vnc_zrle_send_framebuffer_update(VncState *vs, int x, int y, int w, int h);
 int vnc_zywrle_send_framebuffer_update(VncState *vs, int x, int y, int w, int h);
 void vnc_zrle_clear(VncState *vs);
 
+/* vnc-clipboard.c */
+void vnc_server_cut_text_caps(VncState *vs);
+void vnc_client_cut_text(VncState *vs, size_t len, uint8_t *text);
+void vnc_client_cut_text_ext(VncState *vs, int32_t len, uint32_t flags, uint8_t *data);
+
 #endif /* QEMU_VNC_H */
diff --git a/ui/x_keymap.c b/ui/x_keymap.c
index 555086fb6bd..2ce7b899615 100644
--- a/ui/x_keymap.c
+++ b/ui/x_keymap.c
@@ -56,6 +56,7 @@ const guint16 *qemu_xkeymap_mapping_table(Display *dpy, size_t *maplen)
 {
     XkbDescPtr desc;
     const gchar *keycodes = NULL;
+    const guint16 *map;
 
     /* There is no easy way to determine what X11 server
      * and platform & keyboard driver is in use. Thus we
@@ -83,21 +84,21 @@ const guint16 *qemu_xkeymap_mapping_table(Display *dpy, size_t *maplen)
     if (check_for_xwin(dpy)) {
         trace_xkeymap_keymap("xwin");
         *maplen = qemu_input_map_xorgxwin_to_qcode_len;
-        return qemu_input_map_xorgxwin_to_qcode;
+        map = qemu_input_map_xorgxwin_to_qcode;
     } else if (check_for_xquartz(dpy)) {
         trace_xkeymap_keymap("xquartz");
         *maplen = qemu_input_map_xorgxquartz_to_qcode_len;
-        return qemu_input_map_xorgxquartz_to_qcode;
+        map = qemu_input_map_xorgxquartz_to_qcode;
     } else if ((keycodes && g_str_has_prefix(keycodes, "evdev")) ||
                (XKeysymToKeycode(dpy, XK_Page_Up) == 0x70)) {
         trace_xkeymap_keymap("evdev");
         *maplen = qemu_input_map_xorgevdev_to_qcode_len;
-        return qemu_input_map_xorgevdev_to_qcode;
+        map = qemu_input_map_xorgevdev_to_qcode;
     } else if ((keycodes && g_str_has_prefix(keycodes, "xfree86")) ||
                (XKeysymToKeycode(dpy, XK_Page_Up) == 0x63)) {
         trace_xkeymap_keymap("kbd");
         *maplen = qemu_input_map_xorgkbd_to_qcode_len;
-        return qemu_input_map_xorgkbd_to_qcode;
+        map = qemu_input_map_xorgkbd_to_qcode;
     } else {
         trace_xkeymap_keymap("NULL");
         g_warning("Unknown X11 keycode mapping '%s'.\n"
@@ -109,6 +110,10 @@ const guint16 *qemu_xkeymap_mapping_table(Display *dpy, size_t *maplen)
                   "  - xprop -root\n"
                   "  - xdpyinfo\n",
                   keycodes ? keycodes : "<null>");
-        return NULL;
+        map = NULL;
     }
+    if (keycodes) {
+        XFree((void *)keycodes);
+    }
+    return map;
 }
diff --git a/util/aio-posix.c b/util/aio-posix.c
index 30f5354b1e9..2b86777e91e 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -716,3 +716,15 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 
     aio_notify(ctx);
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+    /*
+     * No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
+     */
+    ctx->aio_max_batch = max_batch;
+
+    aio_notify(ctx);
+}
diff --git a/util/aio-win32.c b/util/aio-win32.c
index 168717b51bd..d5b09a11938 100644
--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -440,3 +440,8 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
         error_setg(errp, "AioContext polling is not implemented on Windows");
     }
 }
+
+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                Error **errp)
+{
+}
diff --git a/util/async.c b/util/async.c
index 674dbefb7c2..6f6717a34b6 100644
--- a/util/async.c
+++ b/util/async.c
@@ -57,6 +57,7 @@ enum {
 
 struct QEMUBH {
     AioContext *ctx;
+    const char *name;
     QEMUBHFunc *cb;
     void *opaque;
     QSLIST_ENTRY(QEMUBH) next;
@@ -107,7 +108,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
     return bh;
 }
 
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
+                                  void *opaque, const char *name)
 {
     QEMUBH *bh;
     bh = g_new(QEMUBH, 1);
@@ -115,11 +117,13 @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
         .ctx = ctx,
         .cb = cb,
         .opaque = opaque,
+        .name = name,
     };
     aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
 }
 
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
+                        const char *name)
 {
     QEMUBH *bh;
     bh = g_new(QEMUBH, 1);
@@ -127,6 +131,7 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
         .ctx = ctx,
         .cb = cb,
         .opaque = opaque,
+        .name = name,
     };
     return bh;
 }
@@ -339,8 +344,20 @@ aio_ctx_finalize(GSource     *source)
     assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
 
     while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
-        /* qemu_bh_delete() must have been called on BHs in this AioContext */
-        assert(flags & BH_DELETED);
+        /*
+         * qemu_bh_delete() must have been called on BHs in this AioContext. In
+         * many cases memory leaks, hangs, or inconsistent state occur when a
+         * BH is leaked because something still expects it to run.
+         *
+         * If you hit this, fix the lifecycle of the BH so that
+         * qemu_bh_delete() and any associated cleanup is called before the
+         * AioContext is finalized.
+         */
+        if (unlikely(!(flags & BH_DELETED))) {
+            fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
+                    __func__, bh->name);
+            abort();
+        }
 
         g_free(bh);
     }
@@ -537,6 +554,8 @@ AioContext *aio_context_new(Error **errp)
     ctx->poll_grow = 0;
     ctx->poll_shrink = 0;
 
+    ctx->aio_max_batch = 0;
+
     return ctx;
 fail:
     g_source_destroy(&ctx->source);
@@ -649,3 +668,23 @@ void aio_context_release(AioContext *ctx)
 {
     qemu_rec_mutex_unlock(&ctx->lock);
 }
+
+static __thread AioContext *my_aiocontext;
+
+AioContext *qemu_get_current_aio_context(void)
+{
+    if (my_aiocontext) {
+        return my_aiocontext;
+    }
+    if (qemu_mutex_iothread_locked()) {
+        /* Possibly in a vCPU thread.  */
+        return qemu_get_aio_context();
+    }
+    return NULL;
+}
+
+void qemu_set_current_aio_context(AioContext *ctx)
+{
+    assert(!my_aiocontext);
+    my_aiocontext = ctx;
+}
diff --git a/util/compatfd.c b/util/compatfd.c
index 174f3945333..ab810c42a92 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -17,7 +17,7 @@
 #include "qemu/thread.h"
 
 #if defined(CONFIG_SIGNALFD)
-#include <sys/syscall.h>
+#include <sys/signalfd.h>
 #endif
 
 struct sigfd_compat_info {
@@ -72,14 +72,10 @@ static int qemu_signalfd_compat(const sigset_t *mask)
     QemuThread thread;
     int fds[2];
 
-    info = malloc(sizeof(*info));
-    if (info == NULL) {
-        errno = ENOMEM;
-        return -1;
-    }
+    info = g_malloc(sizeof(*info));
 
     if (pipe(fds) == -1) {
-        free(info);
+        g_free(info);
         return -1;
     }
 
@@ -100,9 +96,8 @@ int qemu_signalfd(const sigset_t *mask)
 #if defined(CONFIG_SIGNALFD)
     int ret;
 
-    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+    ret = signalfd(-1, mask, SFD_CLOEXEC);
     if (ret != -1) {
-        qemu_set_cloexec(ret);
         return ret;
     }
 #endif
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index 904b375192c..127d5a13c8e 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include <ucontext.h>
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
 
 #ifdef CONFIG_VALGRIND_H
 #include <valgrind/valgrind.h>
@@ -66,8 +67,8 @@ typedef struct {
 /**
  * Per-thread coroutine bookkeeping
  */
-static __thread CoroutineUContext leader;
-static __thread Coroutine *current;
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader);
 
 /*
  * va_args to makecontext() must be type 'int', so passing
@@ -97,14 +98,15 @@ static inline __attribute__((always_inline))
 void finish_switch_fiber(void *fake_stack_save)
 {
 #ifdef CONFIG_ASAN
+    CoroutineUContext *leaderp = get_ptr_leader();
     const void *bottom_old;
     size_t size_old;
 
     __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
 
-    if (!leader.stack) {
-        leader.stack = (void *)bottom_old;
-        leader.stack_size = size_old;
+    if (!leaderp->stack) {
+        leaderp->stack = (void *)bottom_old;
+        leaderp->stack_size = size_old;
     }
 #endif
 #ifdef CONFIG_TSAN
@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1)
 
     /* Initialize longjmp environment and switch back the caller */
     if (!sigsetjmp(self->env, 0)) {
-        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
-                                leader.stack_size);
+        CoroutineUContext *leaderp = get_ptr_leader();
+
+        start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save,
+                                leaderp->stack, leaderp->stack_size);
         start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
         siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
     }
@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
     int ret;
     void *fake_stack_save = NULL;
 
-    current = to_;
+    set_current(to_);
 
     ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
 Coroutine *qemu_coroutine_self(void)
 {
-    if (!current) {
-        current = &leader.base;
+    Coroutine *self = get_current();
+    CoroutineUContext *leaderp = get_ptr_leader();
+
+    if (!self) {
+        self = &leaderp->base;
+        set_current(self);
     }
 #ifdef CONFIG_TSAN
-    if (!leader.tsan_co_fiber) {
-        leader.tsan_co_fiber = __tsan_get_current_fiber();
+    if (!leaderp->tsan_co_fiber) {
+        leaderp->tsan_co_fiber = __tsan_get_current_fiber();
     }
 #endif
-    return current;
+    return self;
 }
 
 bool qemu_in_coroutine(void)
 {
-    return current && current->caller;
+    Coroutine *self = get_current();
+
+    return self && self->caller;
 }
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
index de6bd4fd3e4..c02a62c8969 100644
--- a/util/coroutine-win32.c
+++ b/util/coroutine-win32.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
 
 typedef struct
 {
@@ -34,8 +35,8 @@ typedef struct
     CoroutineAction action;
 } CoroutineWin32;
 
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader);
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
 
 /* This function is marked noinline to prevent GCC from inlining it
  * into coroutine_trampoline(). If we allow it to do that then it
@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
     CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
     CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
 
-    current = to_;
+    set_current(to_);
 
     to->action = action;
     SwitchToFiber(to->fiber);
@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_)
 
 Coroutine *qemu_coroutine_self(void)
 {
+    Coroutine *current = get_current();
+
     if (!current) {
-        current = &leader.base;
-        leader.fiber = ConvertThreadToFiber(NULL);
+        CoroutineWin32 *leader = get_ptr_leader();
+
+        current = &leader->base;
+        set_current(current);
+        leader->fiber = ConvertThreadToFiber(NULL);
     }
     return current;
 }
 
 bool qemu_in_coroutine(void)
 {
+    Coroutine *current = get_current();
+
     return current && current->caller;
 }
diff --git a/util/cutils.c b/util/cutils.c
index ee908486da4..c9b91e7535a 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -1055,5 +1055,5 @@ char *get_relocated_path(const char *dir)
         assert(G_IS_DIR_SEPARATOR(dir[-1]));
         g_string_append(result, dir - 1);
     }
-    return result->str;
+    return g_string_free(result, false);
 }
diff --git a/util/guest-random.c b/util/guest-random.c
index 086115bd670..23643f86cc6 100644
--- a/util/guest-random.c
+++ b/util/guest-random.c
@@ -38,7 +38,7 @@ static int glib_random_bytes(void *buf, size_t len)
     }
     if (i < len) {
         x = g_rand_int(rand);
-        __builtin_memcpy(buf + i, &x, i - len);
+        __builtin_memcpy(buf + i, &x, len - i);
     }
     return 0;
 }
diff --git a/util/host-utils.c b/util/host-utils.c
index 7b9322071d1..bcc772b8ec9 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -86,78 +86,119 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
     *phigh = rh;
 }
 
-/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
-/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
-/* remainder via phigh. */
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+/*
+ * Unsigned 128-by-64 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
-    unsigned i;
-    uint64_t carry = 0;
+    uint64_t rem, dhighest;
+    int sh;
 
-    if (divisor == 0) {
-        return 1;
-    } else if (dhi == 0) {
+    if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
-        return 0;
-    } else if (dhi > divisor) {
-        return 1;
+        *phigh = 0;
+        return dlo % divisor;
     } else {
+        sh = clz64(divisor);
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
-                dhi -= divisor;
-                carry = 1;
+        if (dhi < divisor) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+            }
+
+            *phigh = 0;
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhighest = dhi >> (64 - sh);
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+
+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
             } else {
-                carry = 0;
+                /**
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi -= divisor;
+                *phigh = 1;
             }
-            dlo = (dlo << 1) | carry;
+
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
         }
 
-        *plow = dlo;
-        *phigh = dhi;
-        return 0;
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        return rem >> sh;
     }
 }
 
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+/*
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
+ */
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
-    int overflow = 0;
-
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
-            *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
-    }
+    bool neg_quotient = false, neg_remainder = false;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
+    if (*phigh < 0) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
     }
 
-    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+    if (divisor < 0) {
+        neg_quotient = !neg_quotient;
 
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+        divisor = -divisor;
     }
 
-    if (!overflow) {
-        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
-            overflow = 1;
+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
+
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
+            *plow = 0;
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
         }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
     }
 
-    return overflow;
+    if (neg_remainder) {
+        return -rem;
+    } else {
+        return rem;
+    }
 }
 #endif
 
diff --git a/util/iova-tree.c b/util/iova-tree.c
index 7990692cbd3..23ea35b7a4b 100644
--- a/util/iova-tree.c
+++ b/util/iova-tree.c
@@ -42,14 +42,14 @@ IOVATree *iova_tree_new(void)
     return iova_tree;
 }
 
-DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map)
+const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
 {
     return g_tree_lookup(tree->tree, map);
 }
 
-DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova)
+const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
 {
-    DMAMap map = { .iova = iova, .size = 0 };
+    const DMAMap map = { .iova = iova, .size = 0 };
 
     return iova_tree_find(tree, &map);
 }
@@ -60,7 +60,7 @@ static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range)
     g_tree_insert(gtree, range, range);
 }
 
-int iova_tree_insert(IOVATree *tree, DMAMap *map)
+int iova_tree_insert(IOVATree *tree, const DMAMap *map)
 {
     DMAMap *new;
 
@@ -96,9 +96,9 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
     g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
 }
 
-int iova_tree_remove(IOVATree *tree, DMAMap *map)
+int iova_tree_remove(IOVATree *tree, const DMAMap *map)
 {
-    DMAMap *overlap;
+    const DMAMap *overlap;
 
     while ((overlap = iova_tree_find(tree, map))) {
         g_tree_remove(tree->tree, overlap);
diff --git a/util/keyval.c b/util/keyval.c
index be349288139..904337c8a10 100644
--- a/util/keyval.c
+++ b/util/keyval.c
@@ -310,6 +310,86 @@ static char *reassemble_key(GSList *key)
     return g_string_free(s, FALSE);
 }
 
+/*
+ * Recursive worker for keyval_merge.
+ *
+ * @str is the path that led to the * current dictionary (to be used for
+ * error messages).  It is modified internally but restored before the
+ * function returns.
+ */
+static void keyval_do_merge(QDict *dest, const QDict *merged, GString *str, Error **errp)
+{
+    size_t save_len = str->len;
+    const QDictEntry *ent;
+    QObject *old_value;
+
+    for (ent = qdict_first(merged); ent; ent = qdict_next(merged, ent)) {
+        old_value = qdict_get(dest, ent->key);
+        if (old_value) {
+            if (qobject_type(old_value) != qobject_type(ent->value)) {
+                error_setg(errp, "Parameter '%s%s' used inconsistently",
+                           str->str, ent->key);
+                return;
+            } else if (qobject_type(ent->value) == QTYPE_QDICT) {
+                /* Merge sub-dictionaries.  */
+                g_string_append(str, ent->key);
+                g_string_append_c(str, '.');
+                keyval_do_merge(qobject_to(QDict, old_value),
+                                qobject_to(QDict, ent->value),
+                                str, errp);
+                g_string_truncate(str, save_len);
+                continue;
+            } else if (qobject_type(ent->value) == QTYPE_QLIST) {
+                /* Append to old list.  */
+                QList *old = qobject_to(QList, old_value);
+                QList *new = qobject_to(QList, ent->value);
+                const QListEntry *item;
+                QLIST_FOREACH_ENTRY(new, item) {
+                    qobject_ref(item->value);
+                    qlist_append_obj(old, item->value);
+                }
+                continue;
+            } else {
+                assert(qobject_type(ent->value) == QTYPE_QSTRING);
+            }
+        }
+
+        qobject_ref(ent->value);
+        qdict_put_obj(dest, ent->key, ent->value);
+    }
+}
+
+/* Merge the @merged dictionary into @dest.
+ *
+ * The dictionaries are expected to be returned by the keyval parser, and
+ * therefore the only expected scalar type is the string.  In case the same
+ * path is present in both @dest and @merged, the semantics are as follows:
+ *
+ * - lists are concatenated
+ *
+ * - dictionaries are merged recursively
+ *
+ * - for scalar values, @merged wins
+ *
+ * In case an error is reported, @dest may already have been modified.
+ *
+ * This function can be used to implement semantics analogous to QemuOpts's
+ * .merge_lists = true case, or to implement -set for options backed by QDicts.
+ *
+ * Note: while QemuOpts is commonly used so that repeated keys overwrite
+ * ("last one wins"), it can also be used so that repeated keys build up
+ * a list. keyval_merge() can only be used when the options' semantics are
+ * the former, not the latter.
+ */
+void keyval_merge(QDict *dest, const QDict *merged, Error **errp)
+{
+    GString *str;
+
+    str = g_string_new("");
+    keyval_do_merge(dest, merged, str, errp);
+    g_string_free(str, TRUE);
+}
+
 /*
  * Listify @cur recursively.
  * Replace QDicts whose keys are all valid list indexes by QLists.
@@ -431,13 +511,14 @@ static QObject *keyval_listify(QDict *cur, GSList *key_of_cur, Error **errp)
  * If @p_help is not NULL, store whether help is requested there.
  * If @p_help is NULL and help is requested, fail.
  *
- * On success, return a dictionary of the parsed keys and values.
- * On failure, store an error through @errp and return NULL.
+ * On success, return @dict, now filled with the parsed keys and values.
+ *
+ * On failure, store an error through @errp and return NULL.  Any keys
+ * and values parsed so far will be in @dict nevertheless.
  */
-QDict *keyval_parse(const char *params, const char *implied_key,
-                    bool *p_help, Error **errp)
+QDict *keyval_parse_into(QDict *qdict, const char *params, const char *implied_key,
+                         bool *p_help, Error **errp)
 {
-    QDict *qdict = qdict_new();
     QObject *listified;
     const char *s;
     bool help = false;
@@ -446,7 +527,6 @@ QDict *keyval_parse(const char *params, const char *implied_key,
     while (*s) {
         s = keyval_parse_one(qdict, s, implied_key, &help, errp);
         if (!s) {
-            qobject_unref(qdict);
             return NULL;
         }
         implied_key = NULL;
@@ -456,15 +536,42 @@ QDict *keyval_parse(const char *params, const char *implied_key,
         *p_help = help;
     } else if (help) {
         error_setg(errp, "Help is not available for this option");
-        qobject_unref(qdict);
         return NULL;
     }
 
     listified = keyval_listify(qdict, NULL, errp);
     if (!listified) {
-        qobject_unref(qdict);
         return NULL;
     }
     assert(listified == QOBJECT(qdict));
     return qdict;
 }
+
+/*
+ * Parse @params in QEMU's traditional KEY=VALUE,... syntax.
+ *
+ * If @implied_key, the first KEY= can be omitted.  @implied_key is
+ * implied then, and VALUE can't be empty or contain ',' or '='.
+ *
+ * A parameter "help" or "?" without a value isn't added to the
+ * resulting dictionary, but instead is interpreted as help request.
+ * All other options are parsed and returned normally so that context
+ * specific help can be printed.
+ *
+ * If @p_help is not NULL, store whether help is requested there.
+ * If @p_help is NULL and help is requested, fail.
+ *
+ * On success, return a dictionary of the parsed keys and values.
+ * On failure, store an error through @errp and return NULL.
+ */
+QDict *keyval_parse(const char *params, const char *implied_key,
+                    bool *p_help, Error **errp)
+{
+    QDict *qdict = qdict_new();
+    QDict *ret = keyval_parse_into(qdict, params, implied_key, p_help, errp);
+
+    if (!ret) {
+        qobject_unref(qdict);
+    }
+    return ret;
+}
diff --git a/util/main-loop.c b/util/main-loop.c
index 2a290d2e589..06b18b195cf 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -170,6 +170,7 @@ int qemu_init_main_loop(Error **errp)
     if (!qemu_aio_context) {
         return -EMFILE;
     }
+    qemu_set_current_aio_context(qemu_aio_context);
     qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
     gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     src = aio_get_g_source(qemu_aio_context);
@@ -543,9 +544,9 @@ void main_loop_wait(int nonblocking)
 
 /* Functions to operate on the main QEMU AioContext.  */
 
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
 {
-    return aio_bh_new(qemu_aio_context, cb, opaque);
+    return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
 }
 
 /*
@@ -591,71 +592,3 @@ void event_notifier_set_handler(EventNotifier *e,
     aio_set_event_notifier(iohandler_ctx, e, false,
                            handler, NULL);
 }
-
-/* reaping of zombies. On termination the callback function is called with
- * the waitpid() status result and the argument that was passed in.    */
-#ifndef _WIN32
-typedef struct ChildProcessRecord {
-    int pid;
-    ChildTerminationHandler *callback;
-    void *opaque;
-    QLIST_ENTRY(ChildProcessRecord) next;
-} ChildProcessRecord;
-
-static QLIST_HEAD(, ChildProcessRecord) child_watches =
-    QLIST_HEAD_INITIALIZER(child_watches);
-
-static QEMUBH *sigchld_bh;
-
-static void sigchld_handler(int signal)
-{
-    qemu_bh_schedule(sigchld_bh);
-}
-
-static void sigchld_bh_handler(void *opaque)
-{
-    ChildProcessRecord *rec, *next;
-
-    QLIST_FOREACH_SAFE(rec, &child_watches, next, next) {
-        int status = 0;
-        if (waitpid(rec->pid, &status, WNOHANG) == rec->pid) {
-            if (rec->callback)
-                rec->callback(status, rec->opaque);
-            QLIST_REMOVE(rec, next);
-            g_free(rec);
-        }
-    }
-}
-
-static void qemu_init_child_watch(void)
-{
-    struct sigaction act;
-    sigchld_bh = qemu_bh_new(sigchld_bh_handler, NULL);
-
-    memset(&act, 0, sizeof(act));
-    act.sa_handler = sigchld_handler;
-    act.sa_flags = SA_NOCLDSTOP;
-    sigaction(SIGCHLD, &act, NULL);
-}
-
-int qemu_add_child_watch(pid_t pid, ChildTerminationHandler *callback, void* opaque)
-{
-    ChildProcessRecord *rec;
-
-    if (!sigchld_bh) {
-        qemu_init_child_watch();
-    }
-
-    QLIST_FOREACH(rec, &child_watches, next) {
-        if (rec->pid == pid) {
-            return 1;
-        }
-    }
-    rec = g_malloc0(sizeof(ChildProcessRecord));
-    rec->pid = pid;
-    rec->callback = callback;
-    rec->opaque = opaque;
-    QLIST_INSERT_HEAD(&child_watches, rec, next);
-    return 0;
-}
-#endif
diff --git a/util/meson.build b/util/meson.build
index 510765cde46..05b593055a1 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -1,10 +1,13 @@
-util_ss.add(dependency('threads'))
 util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
-util_ss.add(when: 'CONFIG_ATOMIC64', if_false: files('atomic64.c'))
+if not config_host_data.get('CONFIG_ATOMIC64')
+  util_ss.add(files('atomic64.c'))
+endif
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
-util_ss.add(when: 'CONFIG_EPOLL_CREATE1', if_true: files('fdmon-epoll.c'))
-util_ss.add(when: ['CONFIG_LINUX_IO_URING', linux_io_uring], if_true: files('fdmon-io_uring.c'))
+if config_host_data.get('CONFIG_EPOLL_CREATE1')
+  util_ss.add(files('fdmon-epoll.c'))
+endif
+util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('compatfd.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('event_notifier-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('mmap-alloc.c'))
@@ -41,6 +44,7 @@ util_ss.add(files('qsp.c'))
 util_ss.add(files('range.c'))
 util_ss.add(files('stats64.c'))
 util_ss.add(files('systemd.c'))
+util_ss.add(files('transactions.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
 util_ss.add(files('guest-random.c'))
 util_ss.add(files('yank.c'))
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index e6fa8b598b2..893d864354a 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -20,6 +20,8 @@
 #include "qemu/osdep.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/host-utils.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
 
 #define HUGETLBFS_MAGIC       0x958458f6
 
@@ -82,32 +84,81 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
     return qemu_real_host_page_size;
 }
 
-void *qemu_ram_mmap(int fd,
-                    size_t size,
-                    size_t align,
-                    bool readonly,
-                    bool shared,
-                    bool is_pmem,
-                    off_t map_offset)
+#define OVERCOMMIT_MEMORY_PATH "/proc/sys/vm/overcommit_memory"
+static bool map_noreserve_effective(int fd, uint32_t qemu_map_flags)
 {
-    int prot;
-    int flags;
-    int map_sync_flags = 0;
-    int guardfd;
-    size_t offset;
-    size_t pagesize;
-    size_t total;
-    void *guardptr;
-    void *ptr;
+#if defined(__linux__)
+    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+    gchar *content = NULL;
+    const char *endptr;
+    unsigned int tmp;
 
     /*
-     * Note: this always allocates at least one extra page of virtual address
-     * space, even if size is already aligned.
+     * hugeltb accounting is different than ordinary swap reservation:
+     * a) Hugetlb pages from the pool are reserved for both private and
+     *    shared mappings. For shared mappings, all mappers have to specify
+     *    MAP_NORESERVE.
+     * b) MAP_NORESERVE is not affected by /proc/sys/vm/overcommit_memory.
      */
-    total = size + align;
+    if (qemu_fd_getpagesize(fd) != qemu_real_host_page_size) {
+        return true;
+    }
+
+    /*
+     * Accountable mappings in the kernel that can be affected by MAP_NORESEVE
+     * are private writable mappings (see mm/mmap.c:accountable_mapping() in
+     * Linux). For all shared or readonly mappings, MAP_NORESERVE is always
+     * implicitly active -- no reservation; this includes shmem. The only
+     * exception is shared anonymous memory, it is accounted like private
+     * anonymous memory.
+     */
+    if (readonly || (shared && fd >= 0)) {
+        return true;
+    }
+
+    /*
+     * MAP_NORESERVE is globally ignored for applicable !hugetlb mappings when
+     * memory overcommit is set to "never". Sparse memory regions aren't really
+     * possible in this system configuration.
+     *
+     * Bail out now instead of silently committing way more memory than
+     * currently desired by the user.
+     */
+    if (g_file_get_contents(OVERCOMMIT_MEMORY_PATH, &content, NULL, NULL) &&
+        !qemu_strtoui(content, &endptr, 0, &tmp) &&
+        (!endptr || *endptr == '\n')) {
+        if (tmp == 2) {
+            error_report("Skipping reservation of swap space is not supported:"
+                         " \"" OVERCOMMIT_MEMORY_PATH "\" is \"2\"");
+            return false;
+        }
+        return true;
+    }
+    /* this interface has been around since Linux 2.6 */
+    error_report("Skipping reservation of swap space is not supported:"
+                 " Could not read: \"" OVERCOMMIT_MEMORY_PATH "\"");
+    return false;
+#endif
+    /*
+     * E.g., FreeBSD used to define MAP_NORESERVE, never implemented it,
+     * and removed it a while ago.
+     */
+    error_report("Skipping reservation of swap space is not supported");
+    return false;
+}
+
+/*
+ * Reserve a new memory region of the requested size to be used for mapping
+ * from the given fd (if any).
+ */
+static void *mmap_reserve(size_t size, int fd)
+{
+    int flags = MAP_PRIVATE;
 
 #if defined(__powerpc64__) && defined(__linux__)
-    /* On ppc64 mappings in the same segment (aka slice) must share the same
+    /*
+     * On ppc64 mappings in the same segment (aka slice) must share the same
      * page size. Since we will be re-allocating part of this segment
      * from the supplied fd, we should make sure to use the same page size, to
      * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
@@ -115,52 +166,55 @@ void *qemu_ram_mmap(int fd,
      * We do this unless we are using the system page size, in which case
      * anonymous memory is OK.
      */
-    flags = MAP_PRIVATE;
-    pagesize = qemu_fd_getpagesize(fd);
-    if (fd == -1 || pagesize == qemu_real_host_page_size) {
-        guardfd = -1;
+    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
+        fd = -1;
         flags |= MAP_ANONYMOUS;
     } else {
-        guardfd = fd;
         flags |= MAP_NORESERVE;
     }
 #else
-    guardfd = -1;
-    pagesize = qemu_real_host_page_size;
-    flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    fd = -1;
+    flags |= MAP_ANONYMOUS;
 #endif
 
-    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
+    return mmap(0, size, PROT_NONE, flags, fd, 0);
+}
 
-    if (guardptr == MAP_FAILED) {
+/*
+ * Activate memory in a reserved region from the given fd (if any), to make
+ * it accessible.
+ */
+static void *mmap_activate(void *ptr, size_t size, int fd,
+                           uint32_t qemu_map_flags, off_t map_offset)
+{
+    const bool noreserve = qemu_map_flags & QEMU_MAP_NORESERVE;
+    const bool readonly = qemu_map_flags & QEMU_MAP_READONLY;
+    const bool shared = qemu_map_flags & QEMU_MAP_SHARED;
+    const bool sync = qemu_map_flags & QEMU_MAP_SYNC;
+    const int prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
+    int map_sync_flags = 0;
+    int flags = MAP_FIXED;
+    void *activated_ptr;
+
+    if (noreserve && !map_noreserve_effective(fd, qemu_map_flags)) {
         return MAP_FAILED;
     }
 
-    assert(is_power_of_2(align));
-    /* Always align to host page size */
-    assert(align >= pagesize);
-
-    flags = MAP_FIXED;
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
     flags |= shared ? MAP_SHARED : MAP_PRIVATE;
-    if (shared && is_pmem) {
+    flags |= noreserve ? MAP_NORESERVE : 0;
+    if (shared && sync) {
         map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
     }
 
-    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
-
-    prot = PROT_READ | (readonly ? 0 : PROT_WRITE);
-
-    ptr = mmap(guardptr + offset, size, prot,
-               flags | map_sync_flags, fd, map_offset);
-
-    if (ptr == MAP_FAILED && map_sync_flags) {
+    activated_ptr = mmap(ptr, size, prot, flags | map_sync_flags, fd,
+                         map_offset);
+    if (activated_ptr == MAP_FAILED && map_sync_flags) {
         if (errno == ENOTSUP) {
-            char *proc_link, *file_name;
-            int len;
-            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
-            file_name = g_malloc0(PATH_MAX);
-            len = readlink(proc_link, file_name, PATH_MAX - 1);
+            char *proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+            char *file_name = g_malloc0(PATH_MAX);
+            int len = readlink(proc_link, file_name, PATH_MAX - 1);
+
             if (len < 0) {
                 len = 0;
             }
@@ -171,14 +225,57 @@ void *qemu_ram_mmap(int fd,
                     "crash.\n", file_name);
             g_free(proc_link);
             g_free(file_name);
+            warn_report("Using non DAX backing file with 'pmem=on' option"
+                        " is deprecated");
         }
         /*
-         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
-         * we will remove these flags to handle compatibility.
+         * If mmap failed with MAP_SHARED_VALIDATE | MAP_SYNC, we will try
+         * again without these flags to handle backwards compatibility.
          */
-        ptr = mmap(guardptr + offset, size, prot, flags, fd, map_offset);
+        activated_ptr = mmap(ptr, size, prot, flags, fd, map_offset);
     }
+    return activated_ptr;
+}
 
+static inline size_t mmap_guard_pagesize(int fd)
+{
+#if defined(__powerpc64__) && defined(__linux__)
+    /* Mappings in the same segment must share the same page size */
+    return qemu_fd_getpagesize(fd);
+#else
+    return qemu_real_host_page_size;
+#endif
+}
+
+void *qemu_ram_mmap(int fd,
+                    size_t size,
+                    size_t align,
+                    uint32_t qemu_map_flags,
+                    off_t map_offset)
+{
+    const size_t guard_pagesize = mmap_guard_pagesize(fd);
+    size_t offset, total;
+    void *ptr, *guardptr;
+
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    total = size + align;
+
+    guardptr = mmap_reserve(total, fd);
+    if (guardptr == MAP_FAILED) {
+        return MAP_FAILED;
+    }
+
+    assert(is_power_of_2(align));
+    /* Always align to host page size */
+    assert(align >= guard_pagesize);
+
+    offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
+
+    ptr = mmap_activate(guardptr + offset, size, fd, qemu_map_flags,
+                        map_offset);
     if (ptr == MAP_FAILED) {
         munmap(guardptr, total);
         return MAP_FAILED;
@@ -193,8 +290,8 @@ void *qemu_ram_mmap(int fd,
      * a guard page guarding against potential buffer overflows.
      */
     total -= offset;
-    if (total > size + pagesize) {
-        munmap(ptr + size + pagesize, total - size - pagesize);
+    if (total > size + guard_pagesize) {
+        munmap(ptr + size + guard_pagesize, total - size - guard_pagesize);
     }
 
     return ptr;
@@ -202,15 +299,8 @@ void *qemu_ram_mmap(int fd,
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size)
 {
-    size_t pagesize;
-
     if (ptr) {
         /* Unmap both the RAM block and the guard page */
-#if defined(__powerpc64__) && defined(__linux__)
-        pagesize = qemu_fd_getpagesize(fd);
-#else
-        pagesize = qemu_real_host_page_size;
-#endif
-        munmap(ptr, size + pagesize);
+        munmap(ptr, size + mmap_guard_pagesize(fd));
     }
 }
diff --git a/util/module.c b/util/module.c
index 7661d0f6234..6bb4ad915a1 100644
--- a/util/module.c
+++ b/util/module.c
@@ -20,9 +20,11 @@
 #include "qemu/queue.h"
 #include "qemu/module.h"
 #include "qemu/cutils.h"
+#include "qemu/config-file.h"
 #ifdef CONFIG_MODULE_UPGRADES
 #include "qemu-version.h"
 #endif
+#include "trace.h"
 
 typedef struct ModuleEntry
 {
@@ -110,6 +112,38 @@ void module_call_init(module_init_type type)
 }
 
 #ifdef CONFIG_MODULES
+
+static const QemuModinfo module_info_stub[] = { {
+    /* end of list */
+} };
+static const QemuModinfo *module_info = module_info_stub;
+static const char *module_arch;
+
+void module_init_info(const QemuModinfo *info)
+{
+    module_info = info;
+}
+
+void module_allow_arch(const char *arch)
+{
+    module_arch = arch;
+}
+
+static bool module_check_arch(const QemuModinfo *modinfo)
+{
+    if (modinfo->arch) {
+        if (!module_arch) {
+            /* no arch set -> ignore all */
+            return false;
+        }
+        if (strcmp(module_arch, modinfo->arch) != 0) {
+            /* mismatch */
+            return false;
+        }
+    }
+    return true;
+}
+
 static int module_load_file(const char *fname, bool mayfail, bool export_symbols)
 {
     GModule *g_module;
@@ -164,6 +198,7 @@ static int module_load_file(const char *fname, bool mayfail, bool export_symbols
         ret = 0;
     }
 
+    trace_module_load_module(fname);
     QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) {
         QTAILQ_REMOVE(&dso_init_list, e, node);
         g_free(e);
@@ -171,24 +206,6 @@ static int module_load_file(const char *fname, bool mayfail, bool export_symbols
 out:
     return ret;
 }
-
-static const struct {
-    const char *name;
-    const char *dep;
-} module_deps[] = {
-    { "audio-spice",    "ui-spice-core" },
-    { "chardev-spice",  "ui-spice-core" },
-    { "hw-display-qxl", "ui-spice-core" },
-    { "ui-spice-app",   "ui-spice-core" },
-    { "ui-spice-app",   "chardev-spice" },
-
-#ifdef CONFIG_OPENGL
-    { "ui-egl-headless", "ui-opengl"    },
-    { "ui-gtk",          "ui-opengl"    },
-    { "ui-sdl",          "ui-opengl"    },
-    { "ui-spice-core",   "ui-opengl"    },
-#endif
-};
 #endif
 
 bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
@@ -204,9 +221,11 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
     char *dirs[5];
     char *module_name;
     int i = 0, n_dirs = 0;
-    int ret, dep;
+    int ret;
     bool export_symbols = false;
     static GHashTable *loaded_modules;
+    const QemuModinfo *modinfo;
+    const char **sl;
 
     if (!g_module_supported()) {
         fprintf(stderr, "Module is not supported by system.\n");
@@ -219,23 +238,37 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
 
     module_name = g_strdup_printf("%s%s", prefix, lib_name);
 
-    for (dep = 0; dep < ARRAY_SIZE(module_deps); dep++) {
-        if (strcmp(module_name, module_deps[dep].name) == 0) {
-            /* we depend on another module */
-            module_load_one("", module_deps[dep].dep, false);
-        }
-        if (strcmp(module_name, module_deps[dep].dep) == 0) {
-            /* another module depends on us */
-            export_symbols = true;
-        }
-    }
-
     if (g_hash_table_contains(loaded_modules, module_name)) {
         g_free(module_name);
         return true;
     }
     g_hash_table_add(loaded_modules, module_name);
 
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (modinfo->arch) {
+            if (strcmp(modinfo->name, module_name) == 0) {
+                if (!module_check_arch(modinfo)) {
+                    return false;
+                }
+            }
+        }
+        if (modinfo->deps) {
+            if (strcmp(modinfo->name, module_name) == 0) {
+                /* we depend on other module(s) */
+                for (sl = modinfo->deps; *sl != NULL; sl++) {
+                    module_load_one("", *sl, false);
+                }
+            } else {
+                for (sl = modinfo->deps; *sl != NULL; sl++) {
+                    if (strcmp(module_name, *sl) == 0) {
+                        /* another module depends on us */
+                        export_symbols = true;
+                    }
+                }
+            }
+        }
+    }
+
     search_dir = getenv("QEMU_MODULE_DIR");
     if (search_dir != NULL) {
         dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
@@ -278,77 +311,77 @@ bool module_load_one(const char *prefix, const char *lib_name, bool mayfail)
     return success;
 }
 
-/*
- * Building devices and other qom objects modular is mostly useful in
- * case they have dependencies to external shared libraries, so we can
- * cut down the core qemu library dependencies.  Which is the case for
- * only a very few devices & objects.
- *
- * So with the expectation that this will be rather the exception than
- * the rule and the list will not gain that many entries, go with a
- * simple manually maintained list for now.
- *
- * The list must be sorted by module (module_load_qom_all() needs this).
- */
-static struct {
-    const char *type;
-    const char *prefix;
-    const char *module;
-} const qom_modules[] = {
-    { "ccid-card-passthru",    "hw-", "usb-smartcard"         },
-    { "ccid-card-emulated",    "hw-", "usb-smartcard"         },
-    { "usb-redir",             "hw-", "usb-redirect"          },
-    { "qxl-vga",               "hw-", "display-qxl"           },
-    { "qxl",                   "hw-", "display-qxl"           },
-    { "virtio-gpu-device",     "hw-", "display-virtio-gpu"    },
-    { "vhost-user-gpu",        "hw-", "display-virtio-gpu"    },
-    { "virtio-gpu-pci-base",   "hw-", "display-virtio-gpu-pci" },
-    { "virtio-gpu-pci",        "hw-", "display-virtio-gpu-pci" },
-    { "vhost-user-gpu-pci",    "hw-", "display-virtio-gpu-pci" },
-    { "virtio-gpu-ccw",        "hw-", "s390x-virtio-gpu-ccw"   },
-    { "virtio-vga-base",       "hw-", "display-virtio-vga"    },
-    { "virtio-vga",            "hw-", "display-virtio-vga"    },
-    { "vhost-user-vga",        "hw-", "display-virtio-vga"    },
-    { "chardev-braille",       "chardev-", "baum"             },
-    { "chardev-spicevmc",      "chardev-", "spice"            },
-    { "chardev-spiceport",     "chardev-", "spice"            },
-};
+#ifdef CONFIG_MODULES
 
 static bool module_loaded_qom_all;
 
 void module_load_qom_one(const char *type)
 {
-    int i;
+    const QemuModinfo *modinfo;
+    const char **sl;
 
     if (!type) {
         return;
     }
-    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
-        if (strcmp(qom_modules[i].type, type) == 0) {
-            module_load_one(qom_modules[i].prefix,
-                            qom_modules[i].module,
-                            false);
-            return;
+
+    trace_module_lookup_object_type(type);
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->objs) {
+            continue;
+        }
+        if (!module_check_arch(modinfo)) {
+            continue;
+        }
+        for (sl = modinfo->objs; *sl != NULL; sl++) {
+            if (strcmp(type, *sl) == 0) {
+                module_load_one("", modinfo->name, false);
+            }
         }
     }
 }
 
 void module_load_qom_all(void)
 {
-    int i;
+    const QemuModinfo *modinfo;
 
     if (module_loaded_qom_all) {
         return;
     }
-    for (i = 0; i < ARRAY_SIZE(qom_modules); i++) {
-        if (i > 0 && (strcmp(qom_modules[i - 1].module,
-                             qom_modules[i].module) == 0 &&
-                      strcmp(qom_modules[i - 1].prefix,
-                             qom_modules[i].prefix) == 0)) {
-            /* one module implementing multiple types -> load only once */
+
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->objs) {
+            continue;
+        }
+        if (!module_check_arch(modinfo)) {
             continue;
         }
-        module_load_one(qom_modules[i].prefix, qom_modules[i].module, true);
+        module_load_one("", modinfo->name, false);
     }
     module_loaded_qom_all = true;
 }
+
+void qemu_load_module_for_opts(const char *group)
+{
+    const QemuModinfo *modinfo;
+    const char **sl;
+
+    for (modinfo = module_info; modinfo->name != NULL; modinfo++) {
+        if (!modinfo->opts) {
+            continue;
+        }
+        for (sl = modinfo->opts; *sl != NULL; sl++) {
+            if (strcmp(group, *sl) == 0) {
+                module_load_one("", modinfo->name, false);
+            }
+        }
+    }
+}
+
+#else
+
+void module_allow_arch(const char *arch) {}
+void qemu_load_module_for_opts(const char *group) {}
+void module_load_qom_one(const char *type) {}
+void module_load_qom_all(void) {}
+
+#endif
diff --git a/util/osdep.c b/util/osdep.c
index c3b86c2ca28..7004d065a3b 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -108,6 +108,15 @@ static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
 #endif
 }
 
+int qemu_mprotect_rw(void *addr, size_t size)
+{
+#ifdef _WIN32
+    return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
+#else
+    return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
+#endif
+}
+
 int qemu_mprotect_rwx(void *addr, size_t size)
 {
 #ifdef _WIN32
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 36820fec169..e8bdb02e1d0 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -227,10 +227,13 @@ void *qemu_memalign(size_t alignment, size_t size)
 }
 
 /* alloc shared memory pages */
-void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
+                          bool noreserve)
 {
+    const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
+                                    (noreserve ? QEMU_MAP_NORESERVE : 0);
     size_t align = QEMU_VMALLOC_ALIGN;
-    void *ptr = qemu_ram_mmap(-1, size, align, false, shared, false, 0);
+    void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -273,17 +276,6 @@ int qemu_try_set_nonblock(int fd)
         return -errno;
     }
     if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
-#ifdef __OpenBSD__
-        /*
-         * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
-         * memory devices and sets errno to ENODEV.
-         * It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
-         * because they will never block anyway.
-         */
-        if (errno == ENODEV) {
-            return 0;
-        }
-#endif
         return -errno;
     }
     return 0;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index f68b8012bb8..af559ef3398 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -34,11 +34,11 @@
 #include <windows.h>
 #include "qemu-common.h"
 #include "qapi/error.h"
-#include "sysemu/sysemu.h"
 #include "qemu/main-loop.h"
 #include "trace.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include <malloc.h>
 
 /* this must come after including "trace.h" */
@@ -58,7 +58,11 @@ void *qemu_try_memalign(size_t alignment, size_t size)
     void *ptr;
 
     g_assert(size != 0);
-    g_assert(is_power_of_2(alignment));
+    if (alignment < sizeof(void *)) {
+        alignment = sizeof(void *);
+    } else {
+        g_assert(is_power_of_2(alignment));
+    }
     ptr = _aligned_malloc(size, alignment);
     trace_qemu_memalign(alignment, size, ptr);
     return ptr;
@@ -77,10 +81,20 @@ static int get_allocation_granularity(void)
     return system_info.dwAllocationGranularity;
 }
 
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve)
 {
     void *ptr;
 
+    if (noreserve) {
+        /*
+         * We need a MEM_COMMIT before accessing any memory in a MEM_RESERVE
+         * area; we cannot easily mimic POSIX MAP_NORESERVE semantics.
+         */
+        error_report("Skipping reservation of swap space is not supported.");
+        return NULL;
+    }
+
     ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
     trace_qemu_anon_ram_alloc(size, ptr);
 
@@ -348,210 +362,6 @@ const char *qemu_get_exec_dir(void)
     return exec_dir;
 }
 
-#if !GLIB_CHECK_VERSION(2, 50, 0)
-/*
- * The original implementation of g_poll from glib has a problem on Windows
- * when using timeouts < 10 ms.
- *
- * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead
- * of wait. This causes significant performance degradation of QEMU.
- *
- * The following code is a copy of the original code from glib/gpoll.c
- * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19).
- * Some debug code was removed and the code was reformatted.
- * All other code modifications are marked with 'QEMU'.
- */
-
-/*
- * gpoll.c: poll(2) abstraction
- * Copyright 1998 Owen Taylor
- * Copyright 2008 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles,
-                     GPollFD *fds, guint nfds, gint timeout)
-{
-    DWORD ready;
-    GPollFD *f;
-    int recursed_result;
-
-    if (poll_msgs) {
-        /* Wait for either messages or handles
-         * -> Use MsgWaitForMultipleObjectsEx
-         */
-        ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout,
-                                            QS_ALLINPUT, MWMO_ALERTABLE);
-
-        if (ready == WAIT_FAILED) {
-            gchar *emsg = g_win32_error_message(GetLastError());
-            g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg);
-            g_free(emsg);
-        }
-    } else if (nhandles == 0) {
-        /* No handles to wait for, just the timeout */
-        if (timeout == INFINITE) {
-            ready = WAIT_FAILED;
-        } else {
-            SleepEx(timeout, TRUE);
-            ready = WAIT_TIMEOUT;
-        }
-    } else {
-        /* Wait for just handles
-         * -> Use WaitForMultipleObjectsEx
-         */
-        ready =
-            WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE);
-        if (ready == WAIT_FAILED) {
-            gchar *emsg = g_win32_error_message(GetLastError());
-            g_warning("WaitForMultipleObjectsEx failed: %s", emsg);
-            g_free(emsg);
-        }
-    }
-
-    if (ready == WAIT_FAILED) {
-        return -1;
-    } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) {
-        return 0;
-    } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) {
-                f->revents |= G_IO_IN;
-            }
-        }
-
-        /* If we have a timeout, or no handles to poll, be satisfied
-         * with just noticing we have messages waiting.
-         */
-        if (timeout != 0 || nhandles == 0) {
-            return 1;
-        }
-
-        /* If no timeout and handles to poll, recurse to poll them,
-         * too.
-         */
-        recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
-        return (recursed_result == -1) ? -1 : 1 + recursed_result;
-    } else if (/* QEMU: removed the following unneeded statement which causes
-                * a compiler warning: ready >= WAIT_OBJECT_0 && */
-               ready < WAIT_OBJECT_0 + nhandles) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) {
-                f->revents = f->events;
-            }
-        }
-
-        /* If no timeout and polling several handles, recurse to poll
-         * the rest of them.
-         */
-        if (timeout == 0 && nhandles > 1) {
-            /* Remove the handle that fired */
-            int i;
-            for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) {
-                handles[i-1] = handles[i];
-            }
-            nhandles--;
-            recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
-            return (recursed_result == -1) ? -1 : 1 + recursed_result;
-        }
-        return 1;
-    }
-
-    return 0;
-}
-
-gint g_poll(GPollFD *fds, guint nfds, gint timeout)
-{
-    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
-    gboolean poll_msgs = FALSE;
-    GPollFD *f;
-    gint nhandles = 0;
-    int retval;
-
-    for (f = fds; f < &fds[nfds]; ++f) {
-        if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) {
-            poll_msgs = TRUE;
-        } else if (f->fd > 0) {
-            /* Don't add the same handle several times into the array, as
-             * docs say that is not allowed, even if it actually does seem
-             * to work.
-             */
-            gint i;
-
-            for (i = 0; i < nhandles; i++) {
-                if (handles[i] == (HANDLE) f->fd) {
-                    break;
-                }
-            }
-
-            if (i == nhandles) {
-                if (nhandles == MAXIMUM_WAIT_OBJECTS) {
-                    g_warning("Too many handles to wait for!\n");
-                    break;
-                } else {
-                    handles[nhandles++] = (HANDLE) f->fd;
-                }
-            }
-        }
-    }
-
-    for (f = fds; f < &fds[nfds]; ++f) {
-        f->revents = 0;
-    }
-
-    if (timeout == -1) {
-        timeout = INFINITE;
-    }
-
-    /* Polling for several things? */
-    if (nhandles > 1 || (nhandles > 0 && poll_msgs)) {
-        /* First check if one or several of them are immediately
-         * available
-         */
-        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0);
-
-        /* If not, and we have a significant timeout, poll again with
-         * timeout then. Note that this will return indication for only
-         * one event, or only for messages. We ignore timeouts less than
-         * ten milliseconds as they are mostly pointless on Windows, the
-         * MsgWaitForMultipleObjectsEx() call will timeout right away
-         * anyway.
-         *
-         * Modification for QEMU: replaced timeout >= 10 by timeout > 0.
-         */
-        if (retval == 0 && (timeout == INFINITE || timeout > 0)) {
-            retval = poll_rest(poll_msgs, handles, nhandles,
-                               fds, nfds, timeout);
-        }
-    } else {
-        /* Just polling for one thing, so no need to check first if
-         * available immediately
-         */
-        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout);
-    }
-
-    if (retval == -1) {
-        for (f = fds; f < &fds[nfds]; ++f) {
-            f->revents = 0;
-        }
-    }
-
-    return retval;
-}
-#endif
-
 int getpagesize(void)
 {
     SYSTEM_INFO system_info;
diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c
index 1c83cd9d298..a66cc07e75b 100644
--- a/util/qemu-co-shared-resource.c
+++ b/util/qemu-co-shared-resource.c
@@ -28,10 +28,13 @@
 #include "qemu/co-shared-resource.h"
 
 struct SharedResource {
-    uint64_t total;
-    uint64_t available;
+    uint64_t total; /* Set in shres_create() and not changed anymore */
 
+    /* State fields protected by lock */
+    uint64_t available;
     CoQueue queue;
+
+    QemuMutex lock;
 };
 
 SharedResource *shres_create(uint64_t total)
@@ -40,6 +43,7 @@ SharedResource *shres_create(uint64_t total)
 
     s->total = s->available = total;
     qemu_co_queue_init(&s->queue);
+    qemu_mutex_init(&s->lock);
 
     return s;
 }
@@ -47,10 +51,12 @@ SharedResource *shres_create(uint64_t total)
 void shres_destroy(SharedResource *s)
 {
     assert(s->available == s->total);
+    qemu_mutex_destroy(&s->lock);
     g_free(s);
 }
 
-bool co_try_get_from_shres(SharedResource *s, uint64_t n)
+/* Called with lock held. */
+static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n)
 {
     if (s->available >= n) {
         s->available -= n;
@@ -60,16 +66,24 @@ bool co_try_get_from_shres(SharedResource *s, uint64_t n)
     return false;
 }
 
+bool co_try_get_from_shres(SharedResource *s, uint64_t n)
+{
+    QEMU_LOCK_GUARD(&s->lock);
+    return co_try_get_from_shres_locked(s, n);
+}
+
 void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n)
 {
     assert(n <= s->total);
-    while (!co_try_get_from_shres(s, n)) {
-        qemu_co_queue_wait(&s->queue, NULL);
+    QEMU_LOCK_GUARD(&s->lock);
+    while (!co_try_get_from_shres_locked(s, n)) {
+        qemu_co_queue_wait(&s->queue, &s->lock);
     }
 }
 
 void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n)
 {
+    QEMU_LOCK_GUARD(&s->lock);
     assert(s->total - s->available >= n);
     s->available += n;
     qemu_co_queue_restart_all(&s->queue);
diff --git a/util/qemu-config.c b/util/qemu-config.c
index 670bd6ebcaa..436ab63b169 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -2,6 +2,7 @@
 #include "block/qdict.h" /* for qdict_extract_subqdict() */
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"
@@ -16,6 +17,7 @@ static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
 {
     int i;
 
+    qemu_load_module_for_opts(group);
     for (i = 0; lists[i] != NULL; i++) {
         if (strcmp(lists[i]->name, group) == 0)
             break;
@@ -253,8 +255,6 @@ CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
             info->option = g_strdup(vm_config_groups[i]->name);
             if (!strcmp("drive", vm_config_groups[i]->name)) {
                 info->parameters = get_drive_infolist();
-            } else if (!strcmp("machine", vm_config_groups[i]->name)) {
-                info->parameters = query_option_descs(machine_opts.desc);
             } else {
                 info->parameters =
                     query_option_descs(vm_config_groups[i]->desc);
@@ -263,6 +263,13 @@ CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
         }
     }
 
+    if (!has_option || !strcmp(option, "machine")) {
+        info = g_malloc0(sizeof(*info));
+        info->option = g_strdup("machine");
+        info->parameters = query_option_descs(machine_opts.desc);
+        QAPI_LIST_PREPEND(conf_list, info);
+    }
+
     if (conf_list == NULL) {
         error_setg(errp, "invalid option name: %s", option);
     }
@@ -350,19 +357,19 @@ void qemu_config_write(FILE *fp)
 }
 
 /* Returns number of config groups on success, -errno on error */
-int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error **errp)
+static int qemu_config_foreach(FILE *fp, QEMUConfigCB *cb, void *opaque,
+                               const char *fname, Error **errp)
 {
-    char line[1024], group[64], id[64], arg[64], value[1024];
+    char line[1024], prev_group[64], group[64], arg[64], value[1024];
     Location loc;
-    QemuOptsList *list = NULL;
     Error *local_err = NULL;
-    QemuOpts *opts = NULL;
+    QDict *qdict = NULL;
     int res = -EINVAL, lno = 0;
     int count = 0;
 
     loc_push_none(&loc);
     while (fgets(line, sizeof(line), fp) != NULL) {
-        loc_set_file(fname, ++lno);
+        ++lno;
         if (line[0] == '\n') {
             /* skip empty lines */
             continue;
@@ -371,39 +378,39 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error *
             /* comment */
             continue;
         }
-        if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) {
-            /* group with id */
-            list = find_list(lists, group, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                goto out;
+        if (line[0] == '[') {
+            QDict *prev = qdict;
+            if (sscanf(line, "[%63s \"%63[^\"]\"]", group, value) == 2) {
+                qdict = qdict_new();
+                qdict_put_str(qdict, "id", value);
+                count++;
+            } else if (sscanf(line, "[%63[^]]]", group) == 1) {
+                qdict = qdict_new();
+                count++;
             }
-            opts = qemu_opts_create(list, id, 1, NULL);
-            count++;
-            continue;
-        }
-        if (sscanf(line, "[%63[^]]]", group) == 1) {
-            /* group without id */
-            list = find_list(lists, group, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                goto out;
+            if (qdict != prev) {
+                if (prev) {
+                    cb(prev_group, prev, opaque, &local_err);
+                    qobject_unref(prev);
+                    if (local_err) {
+                        error_propagate(errp, local_err);
+                        goto out;
+                    }
+                }
+                strcpy(prev_group, group);
+                continue;
             }
-            opts = qemu_opts_create(list, NULL, 0, &error_abort);
-            count++;
-            continue;
         }
+        loc_set_file(fname, lno);
         value[0] = '\0';
         if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
             sscanf(line, " %63s = \"\"", arg) == 1) {
             /* arg = value */
-            if (opts == NULL) {
+            if (qdict == NULL) {
                 error_setg(errp, "no group defined");
                 goto out;
             }
-            if (!qemu_opt_set(opts, arg, value, errp)) {
-                goto out;
-            }
+            qdict_put_str(qdict, arg, value);
             continue;
         }
         error_setg(errp, "parse error");
@@ -412,15 +419,38 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error *
     if (ferror(fp)) {
         loc_pop(&loc);
         error_setg_errno(errp, errno, "Cannot read config file");
-        return res;
+        goto out_no_loc;
     }
     res = count;
+    if (qdict) {
+        cb(group, qdict, opaque, errp);
+    }
 out:
     loc_pop(&loc);
+out_no_loc:
+    qobject_unref(qdict);
     return res;
 }
 
-int qemu_read_config_file(const char *filename, Error **errp)
+void qemu_config_do_parse(const char *group, QDict *qdict, void *opaque, Error **errp)
+{
+    QemuOptsList **lists = opaque;
+    QemuOptsList *list;
+
+    list = find_list(lists, group, errp);
+    if (!list) {
+        return;
+    }
+
+    qemu_opts_from_qdict(list, qdict, errp);
+}
+
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname, Error **errp)
+{
+    return qemu_config_foreach(fp, qemu_config_do_parse, lists, fname, errp);
+}
+
+int qemu_read_config_file(const char *filename, QEMUConfigCB *cb, Error **errp)
 {
     FILE *f = fopen(filename, "r");
     int ret;
@@ -430,7 +460,7 @@ int qemu_read_config_file(const char *filename, Error **errp)
         return -errno;
     }
 
-    ret = qemu_config_parse(f, vm_config_groups, filename, errp);
+    ret = qemu_config_foreach(f, cb, vm_config_groups, filename, errp);
     fclose(f);
     return ret;
 }
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 8c4dac4fd75..571ab521ff0 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -19,43 +19,34 @@
 
 static const char *qemu_co_sleep_ns__scheduled = "qemu_co_sleep_ns";
 
-struct QemuCoSleepState {
+void qemu_co_sleep_wake(QemuCoSleep *w)
+{
     Coroutine *co;
-    QEMUTimer *ts;
-    QemuCoSleepState **user_state_pointer;
-};
 
-void qemu_co_sleep_wake(QemuCoSleepState *sleep_state)
-{
-    /* Write of schedule protected by barrier write in aio_co_schedule */
-    const char *scheduled = qatomic_cmpxchg(&sleep_state->co->scheduled,
-                                           qemu_co_sleep_ns__scheduled, NULL);
+    co = w->to_wake;
+    w->to_wake = NULL;
+    if (co) {
+        /* Write of schedule protected by barrier write in aio_co_schedule */
+        const char *scheduled = qatomic_cmpxchg(&co->scheduled,
+                                                qemu_co_sleep_ns__scheduled, NULL);
 
-    assert(scheduled == qemu_co_sleep_ns__scheduled);
-    if (sleep_state->user_state_pointer) {
-        *sleep_state->user_state_pointer = NULL;
+        assert(scheduled == qemu_co_sleep_ns__scheduled);
+        aio_co_wake(co);
     }
-    timer_del(sleep_state->ts);
-    aio_co_wake(sleep_state->co);
 }
 
 static void co_sleep_cb(void *opaque)
 {
-    qemu_co_sleep_wake(opaque);
+    QemuCoSleep *w = opaque;
+    qemu_co_sleep_wake(w);
 }
 
-void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
-                                            QemuCoSleepState **sleep_state)
+void coroutine_fn qemu_co_sleep(QemuCoSleep *w)
 {
-    AioContext *ctx = qemu_get_current_aio_context();
-    QemuCoSleepState state = {
-        .co = qemu_coroutine_self(),
-        .ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &state),
-        .user_state_pointer = sleep_state,
-    };
+    Coroutine *co = qemu_coroutine_self();
 
-    const char *scheduled = qatomic_cmpxchg(&state.co->scheduled, NULL,
-                                           qemu_co_sleep_ns__scheduled);
+    const char *scheduled = qatomic_cmpxchg(&co->scheduled, NULL,
+                                            qemu_co_sleep_ns__scheduled);
     if (scheduled) {
         fprintf(stderr,
                 "%s: Co-routine was already scheduled in '%s'\n",
@@ -63,17 +54,27 @@ void coroutine_fn qemu_co_sleep_ns_wakeable(QEMUClockType type, int64_t ns,
         abort();
     }
 
-    if (sleep_state) {
-        *sleep_state = &state;
-    }
-    timer_mod(state.ts, qemu_clock_get_ns(type) + ns);
+    w->to_wake = co;
     qemu_coroutine_yield();
-    if (sleep_state) {
-        /*
-         * Note that *sleep_state is cleared during qemu_co_sleep_wake
-         * before resuming this coroutine.
-         */
-        assert(*sleep_state == NULL);
-    }
-    timer_free(state.ts);
+
+    /* w->to_wake is cleared before resuming this coroutine.  */
+    assert(w->to_wake == NULL);
+}
+
+void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w,
+                                            QEMUClockType type, int64_t ns)
+{
+    AioContext *ctx = qemu_get_current_aio_context();
+    QEMUTimer ts;
+
+    aio_timer_init(ctx, &ts, type, SCALE_NS, co_sleep_cb, w);
+    timer_mod(&ts, qemu_clock_get_ns(type) + ns);
+
+    /*
+     * The timer will fire in the current AiOContext, so the callback
+     * must happen after qemu_co_sleep yields and there is no race
+     * between timer_mod and qemu_co_sleep.
+     */
+    qemu_co_sleep(w);
+    timer_del(&ts);
 }
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 38fb6d3084d..22071dac4b8 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -18,6 +18,7 @@
 #include "qemu/atomic.h"
 #include "qemu/coroutine.h"
 #include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
 #include "block/aio.h"
 
 enum {
@@ -27,17 +28,20 @@ enum {
 /** Free list to speed up creation */
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
 static unsigned int release_pool_size;
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
-static __thread unsigned int alloc_pool_size;
-static __thread Notifier coroutine_pool_cleanup_notifier;
+
+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
 
 static void coroutine_pool_cleanup(Notifier *n, void *value)
 {
     Coroutine *co;
     Coroutine *tmp;
+    CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
 
-    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
-        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+    QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
         qemu_coroutine_delete(co);
     }
 }
@@ -47,27 +51,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
     Coroutine *co = NULL;
 
     if (CONFIG_COROUTINE_POOL) {
-        co = QSLIST_FIRST(&alloc_pool);
+        CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
+
+        co = QSLIST_FIRST(alloc_pool);
         if (!co) {
             if (release_pool_size > POOL_BATCH_SIZE) {
                 /* Slow path; a good place to register the destructor, too.  */
-                if (!coroutine_pool_cleanup_notifier.notify) {
-                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
-                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
+                if (!notifier->notify) {
+                    notifier->notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(notifier);
                 }
 
                 /* This is not exact; there could be a little skew between
                  * release_pool_size and the actual size of release_pool.  But
                  * it is just a heuristic, it does not need to be perfect.
                  */
-                alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
-                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
-                co = QSLIST_FIRST(&alloc_pool);
+                set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
+                QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
+                co = QSLIST_FIRST(alloc_pool);
             }
         }
         if (co) {
-            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
-            alloc_pool_size--;
+            QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
+            set_alloc_pool_size(get_alloc_pool_size() - 1);
         }
     }
 
@@ -91,9 +98,9 @@ static void coroutine_delete(Coroutine *co)
             qatomic_inc(&release_pool_size);
             return;
         }
-        if (alloc_pool_size < POOL_BATCH_SIZE) {
-            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
-            alloc_pool_size++;
+        if (get_alloc_pool_size() < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
+            set_alloc_pool_size(get_alloc_pool_size() + 1);
             return;
         }
     }
diff --git a/util/qemu-openpty.c b/util/qemu-openpty.c
index eb17f5b0bce..427f43a7697 100644
--- a/util/qemu-openpty.c
+++ b/util/qemu-openpty.c
@@ -80,10 +80,9 @@ static int openpty(int *amaster, int *aslave, char *name,
             (termp != NULL && tcgetattr(sfd, termp) < 0))
                 goto err;
 
-        if (amaster)
-                *amaster = mfd;
-        if (aslave)
-                *aslave = sfd;
+        *amaster = mfd;
+        *aslave = sfd;
+
         if (winp)
                 ioctl(sfd, TIOCSWINSZ, winp);
 
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 9678d5b6821..eedd08929b4 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -479,19 +479,14 @@ int qemu_opt_unset(QemuOpts *opts, const char *name)
     }
 }
 
-static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value,
-                           bool prepend)
+static QemuOpt *opt_create(QemuOpts *opts, const char *name, char *value)
 {
     QemuOpt *opt = g_malloc0(sizeof(*opt));
 
     opt->name = g_strdup(name);
     opt->str = value;
     opt->opts = opts;
-    if (prepend) {
-        QTAILQ_INSERT_HEAD(&opts->head, opt, next);
-    } else {
-        QTAILQ_INSERT_TAIL(&opts->head, opt, next);
-    }
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
 
     return opt;
 }
@@ -518,7 +513,7 @@ static bool opt_validate(QemuOpt *opt, Error **errp)
 bool qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
                   Error **errp)
 {
-    QemuOpt *opt = opt_create(opts, name, g_strdup(value), false);
+    QemuOpt *opt = opt_create(opts, name, g_strdup(value));
 
     if (!opt_validate(opt, errp)) {
         qemu_opt_del(opt);
@@ -662,15 +657,6 @@ void qemu_opts_loc_restore(QemuOpts *opts)
     loc_restore(&opts->loc);
 }
 
-bool qemu_opts_set(QemuOptsList *list, const char *name, const char *value, Error **errp)
-{
-    QemuOpts *opts;
-
-    assert(list->merge_lists);
-    opts = qemu_opts_create(list, NULL, 0, &error_abort);
-    return qemu_opt_set(opts, name, value, errp);
-}
-
 const char *qemu_opts_id(QemuOpts *opts)
 {
     return opts->id;
@@ -811,7 +797,7 @@ static const char *get_opt_name_value(const char *params,
 }
 
 static bool opts_do_parse(QemuOpts *opts, const char *params,
-                          const char *firstname, bool prepend,
+                          const char *firstname,
                           bool warn_on_flag, bool *help_wanted, Error **errp)
 {
     char *option, *value;
@@ -833,7 +819,7 @@ static bool opts_do_parse(QemuOpts *opts, const char *params,
             continue;
         }
 
-        opt = opt_create(opts, option, value, prepend);
+        opt = opt_create(opts, option, value);
         g_free(option);
         if (!opt_validate(opt, errp)) {
             qemu_opt_del(opt);
@@ -889,11 +875,11 @@ bool has_help_option(const char *params)
 bool qemu_opts_do_parse(QemuOpts *opts, const char *params,
                        const char *firstname, Error **errp)
 {
-    return opts_do_parse(opts, params, firstname, false, false, NULL, errp);
+    return opts_do_parse(opts, params, firstname, false, NULL, errp);
 }
 
 static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
-                            bool permit_abbrev, bool defaults,
+                            bool permit_abbrev,
                             bool warn_on_flag, bool *help_wanted, Error **errp)
 {
     const char *firstname;
@@ -903,21 +889,13 @@ static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
     assert(!permit_abbrev || list->implied_opt_name);
     firstname = permit_abbrev ? list->implied_opt_name : NULL;
 
-    /*
-     * This code doesn't work for defaults && !list->merge_lists: when
-     * params has no id=, and list has an element with !opts->id, it
-     * appends a new element instead of returning the existing opts.
-     * However, we got no use for this case.  Guard against possible
-     * (if unlikely) future misuse:
-     */
-    assert(!defaults || list->merge_lists);
     opts = qemu_opts_create(list, id, !list->merge_lists, errp);
     g_free(id);
     if (opts == NULL) {
         return NULL;
     }
 
-    if (!opts_do_parse(opts, params, firstname, defaults,
+    if (!opts_do_parse(opts, params, firstname,
                        warn_on_flag, help_wanted, errp)) {
         qemu_opts_del(opts);
         return NULL;
@@ -936,7 +914,7 @@ static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
 QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
                           bool permit_abbrev, Error **errp)
 {
-    return opts_parse(list, params, permit_abbrev, false, false, NULL, errp);
+    return opts_parse(list, params, permit_abbrev, false, NULL, errp);
 }
 
 /**
@@ -954,7 +932,7 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
     QemuOpts *opts;
     bool help_wanted = false;
 
-    opts = opts_parse(list, params, permit_abbrev, false, true,
+    opts = opts_parse(list, params, permit_abbrev, true,
                       opts_accepts_any(list) ? NULL : &help_wanted,
                       &err);
     if (!opts) {
@@ -968,15 +946,6 @@ QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
     return opts;
 }
 
-void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
-                            int permit_abbrev)
-{
-    QemuOpts *opts;
-
-    opts = opts_parse(list, params, permit_abbrev, true, false, NULL, NULL);
-    assert(opts);
-}
-
 static bool qemu_opts_from_qdict_entry(QemuOpts *opts,
                                        const QDictEntry *entry,
                                        Error **errp)
@@ -1028,8 +997,6 @@ QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
         return NULL;
     }
 
-    assert(opts != NULL);
-
     for (entry = qdict_first(qdict);
          entry;
          entry = qdict_next(qdict, entry)) {
@@ -1056,7 +1023,8 @@ bool qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
     while (entry != NULL) {
         next = qdict_next(qdict, entry);
 
-        if (find_desc_by_name(opts->list->desc, entry->key)) {
+        if (opts_accepts_any(opts->list) ||
+            find_desc_by_name(opts->list->desc, entry->key)) {
             if (!qemu_opts_from_qdict_entry(opts, entry, errp)) {
                 return false;
             }
@@ -1158,11 +1126,11 @@ int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func,
                       void *opaque, Error **errp)
 {
     Location loc;
-    QemuOpts *opts;
+    QemuOpts *opts, *next;
     int rc = 0;
 
     loc_push_none(&loc);
-    QTAILQ_FOREACH(opts, &list->head, next) {
+    QTAILQ_FOREACH_SAFE(opts, &list->head, next, next) {
         loc_restore(&opts->loc);
         rc = func(opaque, opts, errp);
         if (rc) {
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 8af0278f15c..0585e7a6298 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -278,6 +278,11 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
 
     /* create socket + bind/listen */
     for (e = res; e != NULL; e = e->ai_next) {
+#ifdef HAVE_IPPROTO_MPTCP
+        if (saddr->has_mptcp && saddr->mptcp) {
+            e->ai_protocol = IPPROTO_MPTCP;
+        }
+#endif
         getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
                         uaddr,INET6_ADDRSTRLEN,uport,32,
                         NI_NUMERICHOST | NI_NUMERICSERV);
@@ -456,6 +461,13 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
     for (e = res; e != NULL; e = e->ai_next) {
         error_free(local_err);
         local_err = NULL;
+
+#ifdef HAVE_IPPROTO_MPTCP
+        if (saddr->has_mptcp && saddr->mptcp) {
+            e->ai_protocol = IPPROTO_MPTCP;
+        }
+#endif
+
         sock = inet_connect_addr(saddr, e, &local_err);
         if (sock >= 0) {
             break;
@@ -687,6 +699,17 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
         }
         addr->has_keep_alive = true;
     }
+#ifdef HAVE_IPPROTO_MPTCP
+    begin = strstr(optstr, ",mptcp");
+    if (begin) {
+        if (inet_parse_flag("mptcp", begin + strlen(",mptcp"),
+                            &addr->mptcp, errp) < 0)
+        {
+            return -1;
+        }
+        addr->has_mptcp = true;
+    }
+#endif
     return 0;
 }
 
@@ -1116,14 +1139,10 @@ SocketAddress *socket_parse(const char *str, Error **errp)
     return NULL;
 }
 
-static int socket_get_fd(const char *fdstr, int num, Error **errp)
+static int socket_get_fd(const char *fdstr, Error **errp)
 {
     Monitor *cur_mon = monitor_cur();
     int fd;
-    if (num != 1) {
-        error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections");
-        return -1;
-    }
     if (cur_mon) {
         fd = monitor_get_fd(cur_mon, fdstr, errp);
         if (fd < 0) {
@@ -1145,6 +1164,25 @@ static int socket_get_fd(const char *fdstr, int num, Error **errp)
     return fd;
 }
 
+int socket_address_parse_named_fd(SocketAddress *addr, Error **errp)
+{
+    int fd;
+
+    if (addr->type != SOCKET_ADDRESS_TYPE_FD) {
+        return 0;
+    }
+
+    fd = socket_get_fd(addr->u.fd.str, errp);
+    if (fd < 0) {
+        return fd;
+    }
+
+    g_free(addr->u.fd.str);
+    addr->u.fd.str = g_strdup_printf("%d", fd);
+
+    return 0;
+}
+
 int socket_connect(SocketAddress *addr, Error **errp)
 {
     int fd;
@@ -1159,7 +1197,7 @@ int socket_connect(SocketAddress *addr, Error **errp)
         break;
 
     case SOCKET_ADDRESS_TYPE_FD:
-        fd = socket_get_fd(addr->u.fd.str, 1, errp);
+        fd = socket_get_fd(addr->u.fd.str, errp);
         break;
 
     case SOCKET_ADDRESS_TYPE_VSOCK:
@@ -1187,7 +1225,26 @@ int socket_listen(SocketAddress *addr, int num, Error **errp)
         break;
 
     case SOCKET_ADDRESS_TYPE_FD:
-        fd = socket_get_fd(addr->u.fd.str, num, errp);
+        fd = socket_get_fd(addr->u.fd.str, errp);
+        if (fd < 0) {
+            return -1;
+        }
+
+        /*
+         * If the socket is not yet in the listen state, then transition it to
+         * the listen state now.
+         *
+         * If it's already listening then this updates the backlog value as
+         * requested.
+         *
+         * If this socket cannot listen because it's already in another state
+         * (e.g. unbound or connected) then we'll catch the error here.
+         */
+        if (listen(fd, num) != 0) {
+            error_setg_errno(errp, errno, "Failed to listen on fd socket");
+            closesocket(fd);
+            return -1;
+        }
         break;
 
     case SOCKET_ADDRESS_TYPE_VSOCK:
@@ -1290,20 +1347,20 @@ socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
 
     addr = g_new0(SocketAddress, 1);
     addr->type = SOCKET_ADDRESS_TYPE_UNIX;
+    salen -= offsetof(struct sockaddr_un, sun_path);
 #ifdef CONFIG_LINUX
-    if (!su->sun_path[0]) {
+    if (salen > 0 && !su->sun_path[0]) {
         /* Linux abstract socket */
-        addr->u.q_unix.path = g_strndup(su->sun_path + 1,
-                                        sizeof(su->sun_path) - 1);
+        addr->u.q_unix.path = g_strndup(su->sun_path + 1, salen - 1);
         addr->u.q_unix.has_abstract = true;
         addr->u.q_unix.abstract = true;
         addr->u.q_unix.has_tight = true;
-        addr->u.q_unix.tight = salen < sizeof(*su);
+        addr->u.q_unix.tight = salen < sizeof(su->sun_path);
         return addr;
     }
 #endif
 
-    addr->u.q_unix.path = g_strndup(su->sun_path, sizeof(su->sun_path));
+    addr->u.q_unix.path = g_strndup(su->sun_path, salen);
     return addr;
 }
 #endif /* WIN32 */
@@ -1398,22 +1455,22 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr_legacy)
     addr = g_new(SocketAddress, 1);
 
     switch (addr_legacy->type) {
-    case SOCKET_ADDRESS_LEGACY_KIND_INET:
+    case SOCKET_ADDRESS_TYPE_INET:
         addr->type = SOCKET_ADDRESS_TYPE_INET;
         QAPI_CLONE_MEMBERS(InetSocketAddress, &addr->u.inet,
                            addr_legacy->u.inet.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_UNIX:
+    case SOCKET_ADDRESS_TYPE_UNIX:
         addr->type = SOCKET_ADDRESS_TYPE_UNIX;
         QAPI_CLONE_MEMBERS(UnixSocketAddress, &addr->u.q_unix,
                            addr_legacy->u.q_unix.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_VSOCK:
+    case SOCKET_ADDRESS_TYPE_VSOCK:
         addr->type = SOCKET_ADDRESS_TYPE_VSOCK;
         QAPI_CLONE_MEMBERS(VsockSocketAddress, &addr->u.vsock,
                            addr_legacy->u.vsock.data);
         break;
-    case SOCKET_ADDRESS_LEGACY_KIND_FD:
+    case SOCKET_ADDRESS_TYPE_FD:
         addr->type = SOCKET_ADDRESS_TYPE_FD;
         QAPI_CLONE_MEMBERS(String, &addr->u.fd, addr_legacy->u.fd.data);
         break;
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 6d085fa5e9f..e1225b63bd2 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -23,7 +23,8 @@ void qemu_thread_naming(bool enable)
 {
     name_threads = enable;
 
-#if !defined(CONFIG_THREAD_SETNAME_BYTHREAD) && !defined(__FreeBSD__)
+#if !defined CONFIG_PTHREAD_SETNAME_NP_W_TID && \
+    !defined CONFIG_PTHREAD_SETNAME_NP_WO_TID
     /* This is a debugging option, not fatal */
     if (enable) {
         fprintf(stderr, "qemu: thread naming not supported on this host\n");
@@ -116,12 +117,32 @@ void qemu_rec_mutex_init(QemuRecMutex *mutex)
 
     pthread_mutexattr_init(&attr);
     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-    err = pthread_mutex_init(&mutex->lock, &attr);
+    err = pthread_mutex_init(&mutex->m.lock, &attr);
     pthread_mutexattr_destroy(&attr);
     if (err) {
         error_exit(err, __func__);
     }
-    mutex->initialized = true;
+    mutex->m.initialized = true;
+}
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
+{
+    qemu_mutex_destroy(&mutex->m);
+}
+
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    qemu_mutex_lock_impl(&mutex->m, file, line);
+}
+
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    return qemu_mutex_trylock_impl(&mutex->m, file, line);
+}
+
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
+{
+    qemu_mutex_unlock_impl(&mutex->m, file, line);
 }
 
 void qemu_cond_init(QemuCond *cond)
@@ -502,7 +523,6 @@ static void *qemu_thread_start(void *args)
     void *arg = qemu_thread_args->arg;
     void *r;
 
-#ifdef CONFIG_THREAD_SETNAME_BYTHREAD
     /* Attempt to set the threads name; note that this is for debug, so
      * we're not going to fail if we can't set it.
      */
@@ -513,13 +533,31 @@ static void *qemu_thread_start(void *args)
         pthread_setname_np(qemu_thread_args->name);
 # endif
     }
-#endif
     QEMU_TSAN_ANNOTATE_THREAD_NAME(qemu_thread_args->name);
     g_free(qemu_thread_args->name);
     g_free(qemu_thread_args);
+
+    /*
+     * GCC 11 with glibc 2.17 on PowerPC reports
+     *
+     * qemu-thread-posix.c:540:5: error: ‘__sigsetjmp’ accessing 656 bytes
+     *   in a region of size 528 [-Werror=stringop-overflow=]
+     * 540 |     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+     *     |     ^~~~~~~~~~~~~~~~~~~~
+     *
+     * which is clearly nonsense.
+     */
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
     pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
     r = start_routine(arg);
     pthread_cleanup_pop(1);
+
+#pragma GCC diagnostic pop
+
     return r;
 }
 
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index cb5aa2018c3..52eb19f3511 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -105,7 +105,7 @@ int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
     return !TryEnterCriticalSection(&mutex->lock);
 }
 
-void qemu_rec_mutex_unlock(QemuRecMutex *mutex)
+void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line)
 {
     assert(mutex->initialized);
     LeaveCriticalSection(&mutex->lock);
diff --git a/util/qsp.c b/util/qsp.c
index bacc5fa2f6f..8562b14a873 100644
--- a/util/qsp.c
+++ b/util/qsp.c
@@ -83,8 +83,8 @@ typedef struct QSPCallSite QSPCallSite;
 struct QSPEntry {
     void *thread_ptr;
     const QSPCallSite *callsite;
-    uint64_t n_acqs;
-    uint64_t ns;
+    aligned_uint64_t n_acqs;
+    aligned_uint64_t ns;
     unsigned int n_objs; /* count of coalesced objs; only used for reporting */
 };
 typedef struct QSPEntry QSPEntry;
diff --git a/util/rcu.c b/util/rcu.c
index 13ac0f75cb2..b6d6c71cff5 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -46,6 +46,7 @@
 unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
 
 QemuEvent rcu_gp_event;
+static int in_drain_call_rcu;
 static QemuMutex rcu_registry_lock;
 static QemuMutex rcu_sync_lock;
 
@@ -64,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
 /* Written to only by each individual reader. Read by both the reader and the
  * writers.
  */
-__thread struct rcu_reader_data rcu_reader;
+QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
 
 /* Protected by rcu_registry_lock.  */
 typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
@@ -107,6 +108,8 @@ static void wait_for_readers(void)
                  * get some extra futex wakeups.
                  */
                 qatomic_set(&index->waiting, false);
+            } else if (qatomic_read(&in_drain_call_rcu)) {
+                notifier_list_notify(&index->force_rcu, NULL);
             }
         }
 
@@ -339,8 +342,10 @@ void drain_call_rcu(void)
      * assumed.
      */
 
+    qatomic_inc(&in_drain_call_rcu);
     call_rcu1(&rcu_drain.rcu, drain_rcu_callback);
     qemu_event_wait(&rcu_drain.drain_complete_event);
+    qatomic_dec(&in_drain_call_rcu);
 
     if (locked) {
         qemu_mutex_lock_iothread();
@@ -350,16 +355,30 @@ void drain_call_rcu(void)
 
 void rcu_register_thread(void)
 {
-    assert(rcu_reader.ctr == 0);
+    assert(get_ptr_rcu_reader()->ctr == 0);
     qemu_mutex_lock(&rcu_registry_lock);
-    QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+    QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
 void rcu_unregister_thread(void)
 {
     qemu_mutex_lock(&rcu_registry_lock);
-    QLIST_REMOVE(&rcu_reader, node);
+    QLIST_REMOVE(get_ptr_rcu_reader(), node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_add_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_remove_force_rcu_notifier(Notifier *n)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    notifier_remove(n);
     qemu_mutex_unlock(&rcu_registry_lock);
 }
 
diff --git a/util/selfmap.c b/util/selfmap.c
index 2ec99dfdda1..2c14f019ce4 100644
--- a/util/selfmap.c
+++ b/util/selfmap.c
@@ -23,29 +23,34 @@ GSList *read_self_maps(void)
             gchar **fields = g_strsplit(lines[i], " ", 6);
             if (g_strv_length(fields) > 4) {
                 MapInfo *e = g_new0(MapInfo, 1);
-                int errors;
+                int errors = 0;
                 const char *end;
 
-                errors  = qemu_strtoul(fields[0], &end, 16, &e->start);
-                errors += qemu_strtoul(end + 1, NULL, 16, &e->end);
+                errors |= qemu_strtoul(fields[0], &end, 16, &e->start);
+                errors |= qemu_strtoul(end + 1, NULL, 16, &e->end);
 
                 e->is_read  = fields[1][0] == 'r';
                 e->is_write = fields[1][1] == 'w';
                 e->is_exec  = fields[1][2] == 'x';
                 e->is_priv  = fields[1][3] == 'p';
 
-                errors += qemu_strtoul(fields[2], NULL, 16, &e->offset);
+                errors |= qemu_strtoul(fields[2], NULL, 16, &e->offset);
                 e->dev = g_strdup(fields[3]);
-                errors += qemu_strtou64(fields[4], NULL, 10, &e->inode);
+                errors |= qemu_strtou64(fields[4], NULL, 10, &e->inode);
 
-                /*
-                 * The last field may have leading spaces which we
-                 * need to strip.
-                 */
-                if (g_strv_length(fields) == 6) {
-                    e->path = g_strdup(g_strchug(fields[5]));
+                if (!errors) {
+                    /*
+                     * The last field may have leading spaces which we
+                     * need to strip.
+                     */
+                    if (g_strv_length(fields) == 6) {
+                        e->path = g_strdup(g_strchug(fields[5]));
+                    }
+                    map_info = g_slist_prepend(map_info, e);
+                } else {
+                    g_free(e->dev);
+                    g_free(e);
                 }
-                map_info = g_slist_prepend(map_info, e);
             }
 
             g_strfreev(fields);
diff --git a/util/trace-events b/util/trace-events
index bac0924899e..c8f53d7d9fc 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -1,4 +1,4 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
 
 # aio-posix.c
 run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64
@@ -100,3 +100,7 @@ uffd_create_fd_api_failed(int err) "errno: %i"
 uffd_create_fd_api_noioctl(uint64_t ioctl_req, uint64_t ioctl_supp) "ioctl_req: 0x%" PRIx64 "ioctl_supp: 0x%" PRIx64
 uffd_register_memory_failed(void *addr, uint64_t length, uint64_t mode, int err) "addr: %p length: %" PRIu64 " mode: 0x%" PRIx64 " errno: %i"
 uffd_unregister_memory_failed(void *addr, uint64_t length, int err) "addr: %p length: %" PRIu64 " errno: %i"
+
+# module.c
+module_load_module(const char *name) "file %s"
+module_lookup_object_type(const char *name) "name %s"
diff --git a/util/transactions.c b/util/transactions.c
new file mode 100644
index 00000000000..2dbdedce95b
--- /dev/null
+++ b/util/transactions.c
@@ -0,0 +1,100 @@
+/*
+ * Simple transactions API
+ *
+ * Copyright (c) 2021 Virtuozzo International GmbH.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/transactions.h"
+#include "qemu/queue.h"
+
+typedef struct TransactionAction {
+    TransactionActionDrv *drv;
+    void *opaque;
+    QSLIST_ENTRY(TransactionAction) entry;
+} TransactionAction;
+
+struct Transaction {
+    QSLIST_HEAD(, TransactionAction) actions;
+};
+
+Transaction *tran_new(void)
+{
+    Transaction *tran = g_new(Transaction, 1);
+
+    QSLIST_INIT(&tran->actions);
+
+    return tran;
+}
+
+void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque)
+{
+    TransactionAction *act;
+
+    act = g_new(TransactionAction, 1);
+    *act = (TransactionAction) {
+        .drv = drv,
+        .opaque = opaque
+    };
+
+    QSLIST_INSERT_HEAD(&tran->actions, act, entry);
+}
+
+void tran_abort(Transaction *tran)
+{
+    TransactionAction *act, *next;
+
+    QSLIST_FOREACH(act, &tran->actions, entry) {
+        if (act->drv->abort) {
+            act->drv->abort(act->opaque);
+        }
+    }
+
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+        if (act->drv->clean) {
+            act->drv->clean(act->opaque);
+        }
+
+        g_free(act);
+    }
+
+    g_free(tran);
+}
+
+void tran_commit(Transaction *tran)
+{
+    TransactionAction *act, *next;
+
+    QSLIST_FOREACH(act, &tran->actions, entry) {
+        if (act->drv->commit) {
+            act->drv->commit(act->opaque);
+        }
+    }
+
+    QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
+        if (act->drv->clean) {
+            act->drv->clean(act->opaque);
+        }
+
+        g_free(act);
+    }
+
+    g_free(tran);
+}
diff --git a/util/uri.c b/util/uri.c
index 8bdef841208..ff72c6005f2 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1340,7 +1340,7 @@ static void uri_clean(URI *uri)
 
 /**
  * uri_free:
- * @uri:  pointer to an URI
+ * @uri:  pointer to an URI, NULL is ignored
  *
  * Free up the URI struct
  */
@@ -1939,15 +1939,9 @@ char *uri_resolve(const char *uri, const char *base)
     val = uri_to_string(res);
 
 done:
-    if (ref != NULL) {
-        uri_free(ref);
-    }
-    if (bas != NULL) {
-        uri_free(bas);
-    }
-    if (res != NULL) {
-        uri_free(res);
-    }
+    uri_free(ref);
+    uri_free(bas);
+    uri_free(res);
     return val;
 }
 
@@ -2190,12 +2184,8 @@ char *uri_resolve_relative(const char *uri, const char *base)
     if (remove_path != 0) {
         ref->path = NULL;
     }
-    if (ref != NULL) {
-        uri_free(ref);
-    }
-    if (bas != NULL) {
-        uri_free(bas);
-    }
+    uri_free(ref);
+    uri_free(bas);
 
     return val;
 }
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 97dfa3fd57c..00a80431a09 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -459,51 +459,40 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
     return ret;
 }
 
-static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
-                                      void *host, size_t size)
+static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
+                                      size_t size, size_t max_size)
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
-    trace_qemu_vfio_ram_block_added(s, host, size);
-    qemu_vfio_dma_map(s, host, size, false, NULL);
+    Error *local_err = NULL;
+    int ret;
+
+    trace_qemu_vfio_ram_block_added(s, host, max_size);
+    ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err);
+    if (ret) {
+        error_reportf_err(local_err,
+                          "qemu_vfio_dma_map(%p, %zu) failed: ",
+                          host, max_size);
+    }
 }
 
-static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
-                                        void *host, size_t size)
+static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host,
+                                        size_t size, size_t max_size)
 {
     QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
     if (host) {
-        trace_qemu_vfio_ram_block_removed(s, host, size);
+        trace_qemu_vfio_ram_block_removed(s, host, max_size);
         qemu_vfio_dma_unmap(s, host);
     }
 }
 
-static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
-{
-    void *host_addr = qemu_ram_get_host_addr(rb);
-    ram_addr_t length = qemu_ram_get_used_length(rb);
-    int ret;
-    QEMUVFIOState *s = opaque;
-
-    if (!host_addr) {
-        return 0;
-    }
-    ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
-    if (ret) {
-        fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
-                host_addr, (uint64_t)length);
-    }
-    return 0;
-}
-
 static void qemu_vfio_open_common(QEMUVFIOState *s)
 {
     qemu_mutex_init(&s->lock);
     s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
     s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
-    ram_block_notifier_add(&s->ram_notifier);
     s->low_water_mark = QEMU_VFIO_IOVA_MIN;
     s->high_water_mark = QEMU_VFIO_IOVA_MAX;
-    qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
+    ram_block_notifier_add(&s->ram_notifier);
 }
 
 /**
@@ -621,7 +610,7 @@ static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
 
 /* Do the DMA mapping with VFIO. */
 static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
-                                uint64_t iova)
+                                uint64_t iova, Error **errp)
 {
     struct vfio_iommu_type1_dma_map dma_map = {
         .argsz = sizeof(dma_map),
@@ -633,7 +622,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
     trace_qemu_vfio_do_mapping(s, host, iova, size);
 
     if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
-        error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
+        error_setg_errno(errp, errno, "VFIO_MAP_DMA failed");
         return -errno;
     }
     return 0;
@@ -673,13 +662,13 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
     if (QEMU_VFIO_DEBUG) {
         for (i = 0; i < s->nr_mappings - 1; ++i) {
             if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
-                fprintf(stderr, "item %d not sorted!\n", i);
+                error_report("item %d not sorted!", i);
                 qemu_vfio_dump_mappings(s);
                 return false;
             }
             if (!(s->mappings[i].host + s->mappings[i].size <=
                   s->mappings[i + 1].host)) {
-                fprintf(stderr, "item %d overlap with next!\n", i);
+                error_report("item %d overlap with next!", i);
                 qemu_vfio_dump_mappings(s);
                 return false;
             }
@@ -688,8 +677,8 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
     return true;
 }
 
-static int
-qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size,
+                                      uint64_t *iova, Error **errp)
 {
     int i;
 
@@ -704,14 +693,16 @@ qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
             s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
             *iova = s->low_water_mark;
             s->low_water_mark += size;
-            return 0;
+            return true;
         }
     }
-    return -ENOMEM;
+    error_setg(errp, "fixed iova range not found");
+
+    return false;
 }
 
-static int
-qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
+static bool qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size,
+                                     uint64_t *iova, Error **errp)
 {
     int i;
 
@@ -726,10 +717,27 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
             s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
             *iova = s->high_water_mark - size;
             s->high_water_mark = *iova;
-            return 0;
+            return true;
         }
     }
-    return -ENOMEM;
+    error_setg(errp, "temporary iova range not found");
+
+    return false;
+}
+
+/**
+ * qemu_vfio_water_mark_reached:
+ *
+ * Returns %true if high watermark has been reached, %false otherwise.
+ */
+static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, size_t size,
+                                         Error **errp)
+{
+    if (s->high_water_mark - s->low_water_mark + 1 < size) {
+        error_setg(errp, "iova exhausted (water mark reached)");
+        return true;
+    }
+    return false;
 }
 
 /* Map [host, host + size) area into a contiguous IOVA address space, and store
@@ -738,9 +746,8 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
  * mapping status within this area is not allowed).
  */
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-                      bool temporary, uint64_t *iova)
+                      bool temporary, uint64_t *iova, Error **errp)
 {
-    int ret = 0;
     int index;
     IOVAMapping *mapping;
     uint64_t iova0;
@@ -748,41 +755,36 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
     assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
     assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
     trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
-    qemu_mutex_lock(&s->lock);
+    QEMU_LOCK_GUARD(&s->lock);
     mapping = qemu_vfio_find_mapping(s, host, &index);
     if (mapping) {
         iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
     } else {
-        if (s->high_water_mark - s->low_water_mark + 1 < size) {
-            ret = -ENOMEM;
-            goto out;
+        int ret;
+
+        if (qemu_vfio_water_mark_reached(s, size, errp)) {
+            return -ENOMEM;
         }
         if (!temporary) {
-            if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
-                ret = -ENOMEM;
-                goto out;
+            if (!qemu_vfio_find_fixed_iova(s, size, &iova0, errp)) {
+                return -ENOMEM;
             }
 
             mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
-            if (!mapping) {
-                ret = -ENOMEM;
-                goto out;
-            }
             assert(qemu_vfio_verify_mappings(s));
-            ret = qemu_vfio_do_mapping(s, host, size, iova0);
-            if (ret) {
+            ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+            if (ret < 0) {
                 qemu_vfio_undo_mapping(s, mapping, NULL);
-                goto out;
+                return ret;
             }
             qemu_vfio_dump_mappings(s);
         } else {
-            if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
-                ret = -ENOMEM;
-                goto out;
+            if (!qemu_vfio_find_temp_iova(s, size, &iova0, errp)) {
+                return -ENOMEM;
             }
-            ret = qemu_vfio_do_mapping(s, host, size, iova0);
-            if (ret) {
-                goto out;
+            ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
+            if (ret < 0) {
+                return ret;
             }
         }
     }
@@ -790,9 +792,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
     if (iova) {
         *iova = iova0;
     }
-out:
-    qemu_mutex_unlock(&s->lock);
-    return ret;
+    return 0;
 }
 
 /* Reset the high watermark and free all "temporary" mappings. */
@@ -826,14 +826,12 @@ void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
     }
 
     trace_qemu_vfio_dma_unmap(s, host);
-    qemu_mutex_lock(&s->lock);
+    QEMU_LOCK_GUARD(&s->lock);
     m = qemu_vfio_find_mapping(s, host, &index);
     if (!m) {
-        goto out;
+        return;
     }
     qemu_vfio_undo_mapping(s, m, NULL);
-out:
-    qemu_mutex_unlock(&s->lock);
 }
 
 static void qemu_vfio_reset(QEMUVFIOState *s)